Merge tag 'clk-for-linus-3.16-part2' of git://git.linaro.org/people/mike.turquette/linux Pull more clock framework updates from Mike Turquette: "This contains the second half the of the clk changes for 3.16. They are simply fixes and code refactoring for the OMAP clock drivers. The sunxi clock driver changes include splitting out the one mega-driver into several smaller pieces and adding support for the A31 SoC clocks" * tag 'clk-for-linus-3.16-part2' of git://git.linaro.org/people/mike.turquette/linux: (25 commits) clk: sunxi: document PRCM clock compatible strings clk: sunxi: add PRCM (Power/Reset/Clock Management) clks support clk: sun6i: Protect SDRAM gating bit clk: sun6i: Protect CPU clock clk: sunxi: Rework clock protection code clk: sunxi: Move the GMAC clock to a file of its own clk: sunxi: Move the 24M oscillator to a file of its own clk: sunxi: Remove calls to clk_put clk: sunxi: document new A31 USB clock compatible clk: sunxi: Implement A31 USB clock ARM: dts: OMAP5/DRA7: use omap5-mpu-dpll-clock capable of dealing with higher frequencies CLK: TI: dpll: support OMAP5 MPU DPLL that need special handling for higher frequencies ARM: OMAP5+: dpll: support Duty Cycle Correction(DCC) CLK: TI: clk-54xx: Set the rate for dpll_abe_m2x2_ck CLK: TI: Driver for DRA7 ATL (Audio Tracking Logic) dt:/bindings: DRA7 ATL (Audio Tracking Logic) clock bindings ARM: dts: dra7xx-clocks: Correct name for atl clkin3 clock CLK: TI: gate: add composite interface clock to OMAP2 only build ARM: OMAP2: clock: add DT boot support for cpufreq_ck CLK: TI: OMAP2: add clock init support ...

commit: dd1845af24a47b70cf84c29126698884f740ff9c [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Sun Jun 15 16:02:20 2014 -1000
committer: Linus Torvalds <torvalds@linux-foundation.org> Sun Jun 15 16:02:20 2014 -1000
tree: fa12809d854d18ba36a568c21d57ceff43617af1
parent: b55b39020289f225bf2455349ce1a67372a0baa9 [diff]
parent: b640a6037c9ecd1f0ad23a8e9b4ca5f5b4112508 [diff]
diff --git a/.gitignore b/.gitignore
index 42fa0d5..f4c0b09 100644
--- a/.gitignore
+++ b/.gitignore

@@ -22,7 +22,6 @@
 *.lst
 *.symtypes
 *.order
-modules.builtin
 *.elf
 *.bin
 *.gz
@@ -33,6 +32,8 @@
 *.lzo
 *.patch
 *.gcno
+modules.builtin
+Module.symvers
 
 #
 # Top-level generic files
@@ -44,7 +45,6 @@
 /vmlinuz
 /System.map
 /Module.markers
-/Module.symvers
 
 #
 # Debian directory (make deb-pkg)

diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
new file mode 100644
index 0000000..33c133e
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu

@@ -0,0 +1,25 @@
+What: 		/sys/devices/system/cpu/dscr_default
+Date:		13-May-2014
+KernelVersion:	v3.15.0
+Contact:
+Description:	Writes are equivalent to writing to
+		/sys/devices/system/cpu/cpuN/dscr on all CPUs.
+		Reads return the last written value or 0.
+		This value is not a global default: it is a way to set
+		all per-CPU defaults at the same time.
+Values:		64 bit unsigned integer (bit field)
+
+What: 		/sys/devices/system/cpu/cpu[0-9]+/dscr
+Date:		13-May-2014
+KernelVersion:	v3.15.0
+Contact:
+Description:	Default value for the Data Stream Control Register (DSCR) on
+		a CPU.
+		This default value is used when the kernel is executing and
+		for any process that has not set the DSCR itself.
+		If a process ever sets the DSCR (via direct access to the
+		SPR) that value will be persisted for that process and used
+		on any CPU where it executes (overriding the value described
+		here).
+		If set by a process it will be inherited by child processes.
+Values:		64 bit unsigned integer (bit field)

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index f1c5cc9..4c3efe4 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy

@@ -23,7 +23,7 @@
 				 [fowner]]
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
-			option:	[[appraise_type=]]
+			option:	[[appraise_type=]] [permit_directio]
 
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index d922060..416c5d5 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net

@@ -169,6 +169,14 @@
 		"unknown", "notpresent", "down", "lowerlayerdown", "testing",
 		"dormant", "up".
 
+What:		/sys/class/net/<iface>/phys_port_id
+Date:		July 2013
+KernelVersion:	3.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the interface unique physical port identifier within
+		the NIC, as a string.
+
 What:		/sys/class/net/<iface>/speed
 Date:		October 2009
 KernelVersion:	2.6.33

diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
new file mode 100644
index 0000000..5cedf72d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm

@@ -0,0 +1,149 @@
+What:		/sys/class/net/<iface>/cdc_ncm/min_tx_pkt
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		The driver will pad NCM Transfer Blocks (NTBs) longer
+		than this to tx_max, allowing the device to receive
+		tx_max sized frames with no terminating short
+		packet. NTBs shorter than this limit are transmitted
+		as-is, without any padding, and are terminated with a
+		short USB packet.
+
+		Padding to tx_max allows the driver to transmit NTBs
+		back-to-back without any interleaving short USB
+		packets.  This reduces the number of short packet
+		interrupts in the device, and represents a tradeoff
+		between USB bus bandwidth and device DMA optimization.
+
+		Set to 0 to pad all frames. Set greater than tx_max to
+		disable all padding.
+
+What:		/sys/class/net/<iface>/cdc_ncm/rx_max
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		The maximum NTB size for RX.  Cannot exceed the
+		maximum value supported by the device. Must allow at
+		least one max sized datagram plus headers.
+
+		The actual limits are device dependent.  See
+		dwNtbInMaxSize.
+
+		Note: Some devices will silently ignore changes to
+		this value, resulting in oversized NTBs and
+		corresponding framing errors.
+
+What:		/sys/class/net/<iface>/cdc_ncm/tx_max
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		The maximum NTB size for TX.  Cannot exceed the
+		maximum value supported by the device.  Must allow at
+		least one max sized datagram plus headers.
+
+		The actual limits are device dependent.  See
+		dwNtbOutMaxSize.
+
+What:		/sys/class/net/<iface>/cdc_ncm/tx_timer_usecs
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Datagram aggregation timeout in µs. The driver will
+		wait up to 3 times this timeout for more datagrams to
+		aggregate before transmitting an NTB frame.
+
+		Valid range: 5 to 4000000
+
+		Set to 0 to disable aggregation.
+
+The following read-only attributes all represent fields of the
+structure defined in section 6.2.1 "GetNtbParameters" of "Universal
+Serial Bus Communications Class Subclass Specifications for Network
+Control Model Devices" (CDC NCM), Revision 1.0 (Errata 1), November
+24, 2010 from USB Implementers Forum, Inc.  The descriptions are
+quoted from table 6-3 of CDC NCM: "NTB Parameter Structure".
+
+What:		/sys/class/net/<iface>/cdc_ncm/bmNtbFormatsSupported
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Bit 0: 16-bit NTB supported (set to 1)
+		Bit 1: 32-bit NTB supported
+		Bits 2 – 15: reserved (reset to zero; must be ignored by host)
+
+What:		/sys/class/net/<iface>/cdc_ncm/dwNtbInMaxSize
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		IN NTB Maximum Size in bytes
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpInDivisor
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Divisor used for IN NTB Datagram payload alignment
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpInPayloadRemainder
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Remainder used to align input datagram payload within
+		the NTB: (Payload Offset) mod (wNdpInDivisor) =
+		wNdpInPayloadRemainder
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpInAlignment
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		NDP alignment modulus for NTBs on the IN pipe. Shall
+		be a power of 2, and shall be at least 4.
+
+What:		/sys/class/net/<iface>/cdc_ncm/dwNtbOutMaxSize
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		OUT NTB Maximum Size
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpOutDivisor
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		OUT NTB Datagram alignment modulus
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpOutPayloadRemainder
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Remainder used to align output datagram payload
+		offsets within the NTB: Padding, shall be transmitted
+		as zero by function, and ignored by host.  (Payload
+		Offset) mod (wNdpOutDivisor) = wNdpOutPayloadRemainder
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNdpOutAlignment
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		NDP alignment modulus for use in NTBs on the OUT
+		pipe. Shall be a power of 2, and shall be at least 4.
+
+What:		/sys/class/net/<iface>/cdc_ncm/wNtbOutMaxDatagrams
+Date:		May 2014
+KernelVersion:	3.16
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Maximum number of datagrams that the host may pack
+		into a single OUT NTB. Zero means that the device
+		imposes no limit.

diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
new file mode 100644
index 0000000..5e9aeb9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-queues

@@ -0,0 +1,79 @@
+What:		/sys/class/<iface>/queues/rx-<queue>/rps_cpus
+Date:		March 2010
+KernelVersion:	2.6.35
+Contact:	netdev@vger.kernel.org
+Description:
+		Mask of the CPU(s) currently enabled to participate into the
+		Receive Packet Steering packet processing flow for this
+		network device queue. Possible values depend on the number
+		of available CPU(s) in the system.
+
+What:		/sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+Date:		April 2010
+KernelVersion:	2.6.35
+Contact:	netdev@vger.kernel.org
+Description:
+		Number of Receive Packet Steering flows being currently
+		processed by this particular network device receive queue.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/tx_timeout
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of transmit timeout events seen by this
+		network interface transmit queue.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/xps_cpus
+Date:		November 2010
+KernelVersion:	2.6.38
+Contact:	netdev@vger.kernel.org
+Description:
+		Mask of the CPU(s) currently enabled to participate into the
+		Transmit Packet Steering packet processing flow for this
+		network device transmit queue. Possible vaules depend on the
+		number of available CPU(s) in the system.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the hold time in milliseconds to measure the slack
+		of this particular network device transmit queue.
+		Default value is 1000.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of bytes (objects) in flight on this
+		network device transmit queue.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the current limit of bytes allowed to be queued
+		on this network device transmit queue. This value is clamped
+		to be within the bounds defined by limit_max and limit_min.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the absolute maximum limit of bytes allowed to be
+		queued on this network device transmit queue. See
+		include/linux/dynamic_queue_limits.h for the default value.
+
+What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+Date:		November 2011
+KernelVersion:	3.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the absolute minimum limit of bytes allowed to be
+		queued on this network device transmit queue. Default value is
+		0.

diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
new file mode 100644
index 0000000..397118d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics

@@ -0,0 +1,201 @@
+What:		/sys/class/<iface>/statistics/collisions
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of collisions seen by this network device.
+		This value might not be relevant with all MAC layers.
+
+What:		/sys/class/<iface>/statistics/multicast
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of multicast packets received by this
+		network device.
+
+What:		/sys/class/<iface>/statistics/rx_bytes
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of bytes received by this network device.
+		See the network driver for the exact meaning of when this
+		value is incremented.
+
+What:		/sys/class/<iface>/statistics/rx_compressed
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of compressed packets received by this
+		network device. This value might only be relevant for interfaces
+		that support packet compression (e.g: PPP).
+
+What:		/sys/class/<iface>/statistics/rx_crc_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets received with a CRC (FCS) error
+		by this network device. Note that the specific meaning might
+		depend on the MAC layer used by the interface.
+
+What:		/sys/class/<iface>/statistics/rx_dropped
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets received by the network device
+		but dropped, that are not forwarded to the upper layers for
+		packet processing. See the network driver for the exact
+		meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_fifo_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of receive FIFO errors seen by this
+		network device. See the network driver for the exact
+		meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_frame_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of received frames with error, such as
+		alignment errors. Note that the specific meaning depends on
+		on the MAC layer protocol used. See the network driver for
+		the exact meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_length_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of received error packet with a length
+		error, oversized or undersized. See the network driver for the
+		exact meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_missed_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of received packets that have been missed
+		due to lack of capacity in the receive side. See the network
+		driver for the exact meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_over_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of received packets that are oversized
+		compared to what the network device is configured to accept
+		(e.g: larger than MTU). See the network driver for the exact
+		meaning of this value.
+
+What:		/sys/class/<iface>/statistics/rx_packets
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the total number of good packets received by this
+		network device.
+
+What:		/sys/class/<iface>/statistics/tx_aborted_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets that have been aborted
+		during transmission by a network device (e.g: because of
+		a medium collision). See the network driver for the exact
+		meaning of this value.
+
+What:		/sys/class/<iface>/statistics/tx_bytes
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of bytes transmitted by a network
+		device. See the network driver for the exact meaning of this
+		value, in particular whether this accounts for all successfully
+		transmitted packets or all packets that have been queued for
+		transmission.
+
+What:		/sys/class/<iface>/statistics/tx_carrier_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets that could not be transmitted
+		because of carrier errors (e.g: physical link down). See the
+		network driver for the exact meaning of this value.
+
+What:		/sys/class/<iface>/statistics/tx_compressed
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of transmitted compressed packets. Note
+		this might only be relevant for devices that support
+		compression (e.g: PPP).
+
+What:		/sys/class/<iface>/statistics/tx_dropped
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets dropped during transmission.
+		See the driver for the exact reasons as to why the packets were
+		dropped.
+
+What:		/sys/class/<iface>/statistics/tx_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets in error during transmission by
+		a network device. See the driver for the exact reasons as to
+		why the packets were dropped.
+
+What:		/sys/class/<iface>/statistics/tx_fifo_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets having caused a transmit
+		FIFO error. See the driver for the exact reasons as to why the
+		packets were dropped.
+
+What:		/sys/class/<iface>/statistics/tx_heartbeat_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets transmitted that have been
+		reported as heartbeat errors. See the driver for the exact
+		reasons as to why the packets were dropped.
+
+What:		/sys/class/<iface>/statistics/tx_packets
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets transmitted by a network
+		device. See the driver for whether this reports the number of all
+		attempted or successful transmissions.
+
+What:		/sys/class/<iface>/statistics/tx_window_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of packets not successfully transmitted
+		due to a window collision. The specific meaning depends on the
+		MAC layer used.  On Ethernet this is usually used to report
+		late collisions errors.

diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 044b764..d9b9416 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl

@@ -100,6 +100,7 @@
 !Finclude/net/cfg80211.h wdev_priv
 !Finclude/net/cfg80211.h ieee80211_iface_limit
 !Finclude/net/cfg80211.h ieee80211_iface_combination
+!Finclude/net/cfg80211.h cfg80211_check_combinations
       </chapter>
       <chapter>
       <title>Actions and configuration</title>

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index ba60d93..7df3134 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl

@@ -142,6 +142,12 @@
       to register it with the DRM subsystem.
     </para>
     <para>
+      Newer drivers that no longer require a <structname>drm_bus</structname>
+      structure can alternatively use the low-level device initialization and
+      registration functions such as <function>drm_dev_alloc()</function> and
+      <function>drm_dev_register()</function> directly.
+    </para>
+    <para>
       The <structname>drm_driver</structname> structure contains static
       information that describes the driver and features it supports, and
       pointers to methods that the DRM core will call to implement the DRM API.
@@ -282,6 +288,36 @@
       </sect3>
     </sect2>
     <sect2>
+      <title>Device Registration</title>
+      <para>
+        A number of functions are provided to help with device registration.
+        The functions deal with PCI, USB and platform devices, respectively.
+      </para>
+!Edrivers/gpu/drm/drm_pci.c
+!Edrivers/gpu/drm/drm_usb.c
+!Edrivers/gpu/drm/drm_platform.c
+      <para>
+        New drivers that no longer rely on the services provided by the
+        <structname>drm_bus</structname> structure can call the low-level
+        device registration functions directly. The
+        <function>drm_dev_alloc()</function> function can be used to allocate
+        and initialize a new <structname>drm_device</structname> structure.
+        Drivers will typically want to perform some additional setup on this
+        structure, such as allocating driver-specific data and storing a
+        pointer to it in the DRM device's <structfield>dev_private</structfield>
+        field. Drivers should also set the device's unique name using the
+        <function>drm_dev_set_unique()</function> function. After it has been
+        set up a device can be registered with the DRM subsystem by calling
+        <function>drm_dev_register()</function>. This will cause the device to
+        be exposed to userspace and will call the driver's
+        <structfield>.load()</structfield> implementation. When a device is
+        removed, the DRM device can safely be unregistered and freed by calling
+        <function>drm_dev_unregister()</function> followed by a call to
+        <function>drm_dev_unref()</function>.
+      </para>
+!Edrivers/gpu/drm/drm_stub.c
+    </sect2>
+    <sect2>
       <title>Driver Load</title>
       <para>
         The <methodname>load</methodname> method is the driver and device
@@ -342,21 +378,13 @@
         <sect4>
           <title>Managed IRQ Registration</title>
           <para>
-            Both the <function>drm_irq_install</function> and
-	    <function>drm_irq_uninstall</function> functions get the device IRQ by
-	    calling <function>drm_dev_to_irq</function>. This inline function will
-	    call a bus-specific operation to retrieve the IRQ number. For platform
-	    devices, <function>platform_get_irq</function>(..., 0) is used to
-	    retrieve the IRQ number.
-          </para>
-          <para>
             <function>drm_irq_install</function> starts by calling the
             <methodname>irq_preinstall</methodname> driver operation. The operation
             is optional and must make sure that the interrupt will not get fired by
             clearing all pending interrupt flags or disabling the interrupt.
           </para>
           <para>
-            The IRQ will then be requested by a call to
+            The passed-in IRQ will then be requested by a call to
             <function>request_irq</function>. If the DRIVER_IRQ_SHARED driver
             feature flag is set, a shared (IRQF_SHARED) IRQ handler will be
             requested.
@@ -1799,6 +1827,12 @@
       <title>KMS API Functions</title>
 !Edrivers/gpu/drm/drm_crtc.c
     </sect2>
+    <sect2>
+      <title>KMS Locking</title>
+!Pdrivers/gpu/drm/drm_modeset_lock.c kms locking
+!Iinclude/drm/drm_modeset_lock.h
+!Edrivers/gpu/drm/drm_modeset_lock.c
+    </sect2>
   </sect1>
 
   <!-- Internals: kms helper functions -->
@@ -1903,8 +1937,8 @@
           <para>
             The function filters out modes larger than
             <parameter>max_width</parameter> and <parameter>max_height</parameter>
-            if specified. It then calls the connector
-            <methodname>mode_valid</methodname> helper operation for  each mode in
+            if specified. It then calls the optional connector
+            <methodname>mode_valid</methodname> helper operation for each mode in
             the probed list to check whether the mode is valid for the connector.
           </para>
         </listitem>
@@ -2265,7 +2299,7 @@
           <para>
             Verify whether a mode is valid for the connector. Return MODE_OK for
             supported modes and one of the enum drm_mode_status values (MODE_*)
-            for unsupported modes. This operation is mandatory.
+            for unsupported modes. This operation is optional.
           </para>
           <para>
             As the mode rejection reason is currently not used beside for
@@ -2450,6 +2484,863 @@
       pointer to the target object, a pointer to the previously created property
       and an initial instance value.
     </para>
+    <sect2>
+	<title>Existing KMS Properties</title>
+	<para>
+	The following table gives description of drm properties exposed by various
+	modules/drivers.
+	</para>
+	<table border="1" cellpadding="0" cellspacing="0">
+	<tbody>
+	<tr style="font-weight: bold;">
+	<td valign="top" >Owner Module/Drivers</td>
+	<td valign="top" >Group</td>
+	<td valign="top" >Property Name</td>
+	<td valign="top" >Type</td>
+	<td valign="top" >Property Values</td>
+	<td valign="top" >Object attached</td>
+	<td valign="top" >Description/Restrictions</td>
+	</tr>
+	<tr>
+	<td rowspan="20" valign="top" >DRM</td>
+	<td rowspan="2" valign="top" >Generic</td>
+	<td valign="top" >“EDID”</td>
+	<td valign="top" >BLOB | IMMUTABLE</td>
+	<td valign="top" >0</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >Contains id of edid blob ptr object.</td>
+	</tr>
+	<tr>
+	<td valign="top" >“DPMS”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ “On”, “Standby”, “Suspend”, “Off” }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >Contains DPMS operation mode value.</td>
+	</tr>
+	<tr>
+	<td rowspan="1" valign="top" >Plane</td>
+	<td valign="top" >“type”</td>
+	<td valign="top" >ENUM | IMMUTABLE</td>
+	<td valign="top" >{ "Overlay", "Primary", "Cursor" }</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >Plane type</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >DVI-I</td>
+	<td valign="top" >“subconnector”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ “Unknown”, “DVI-D”, “DVI-A” }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“select subconnector”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ “Automatic”, “DVI-D”, “DVI-A” }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="13" valign="top" >TV</td>
+	<td valign="top" >“subconnector”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "Unknown", "Composite", "SVIDEO", "Component", "SCART" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“select subconnector”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "Automatic", "Composite", "SVIDEO", "Component", "SCART" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“left margin”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“right margin”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“top margin”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“bottom margin”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“brightness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“contrast”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker reduction”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“overscan”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“saturation”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hue”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >Optional</td>
+	<td valign="top" >“scaling mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "None", "Full", "Center", "Full aspect" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“dirty”</td>
+	<td valign="top" >ENUM | IMMUTABLE</td>
+	<td valign="top" >{ "Off", "On", "Annotate" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="21" valign="top" >i915</td>
+	<td rowspan="3" valign="top" >Generic</td>
+	<td valign="top" >"Broadcast RGB"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "Automatic", "Full", "Limited 16:235" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“audio”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "force-dvi", "off", "auto", "on" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Standard name as in DRM</td>
+	<td valign="top" >Standard type as in DRM</td>
+	<td valign="top" >Standard value as in DRM</td>
+	<td valign="top" >Standard Object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="17" valign="top" >SDVO-TV</td>
+	<td valign="top" >“mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"left_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"right_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"top_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"bottom_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hpos”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“vpos”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“contrast”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“saturation”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hue”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“sharpness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter_adaptive”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter_2d”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“tv_chroma_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“tv_luma_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“dot_crawl”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >SDVO-TV/LVDS</td>
+	<td valign="top" >“brightness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="3" valign="top" >CDV gma-500</td>
+	<td rowspan="3" valign="top" >Generic</td>
+	<td valign="top" >"Broadcast RGB"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ “Full”, “Limited 16:235” }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"Broadcast RGB"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ “off”, “auto”, “on” }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Standard name as in DRM</td>
+	<td valign="top" >Standard type as in DRM</td>
+	<td valign="top" >Standard value as in DRM</td>
+	<td valign="top" >Standard Object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="20" valign="top" >Poulsbo</td>
+	<td rowspan="2" valign="top" >Generic</td>
+	<td valign="top" >“backlight”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=100</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Standard name as in DRM</td>
+	<td valign="top" >Standard type as in DRM</td>
+	<td valign="top" >Standard value as in DRM</td>
+	<td valign="top" >Standard Object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="17" valign="top" >SDVO-TV</td>
+	<td valign="top" >“mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"left_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"right_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"top_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"bottom_margin"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hpos”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“vpos”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“contrast”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“saturation”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hue”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“sharpness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter_adaptive”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“flicker_filter_2d”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“tv_chroma_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“tv_luma_filter”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“dot_crawl”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >SDVO-TV/LVDS</td>
+	<td valign="top" >“brightness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max= SDVO dependent</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="11" valign="top" >armada</td>
+	<td rowspan="2" valign="top" >CRTC</td>
+	<td valign="top" >"CSC_YUV"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "Auto" , "CCIR601", "CCIR709" }</td>
+	<td valign="top" >CRTC</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"CSC_RGB"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "Auto", "Computer system", "Studio" }</td>
+	<td valign="top" >CRTC</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="9" valign="top" >Overlay</td>
+	<td valign="top" >"colorkey"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0xffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey_min"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0xffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey_max"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0xffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey_val"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0xffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey_alpha"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0xffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey_mode"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "disabled", "Y component", "U component"
+	, "V component", "RGB", “R component", "G component", "B component" }</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"brightness"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=256 + 255</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"contrast"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0x7fff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"saturation"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0x7fff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >exynos</td>
+	<td valign="top" >CRTC</td>
+	<td valign="top" >“mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "normal", "blank" }</td>
+	<td valign="top" >CRTC</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Overlay</td>
+	<td valign="top" >“zpos”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=MAX_PLANE-1</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="3" valign="top" >i2c/ch7006_drv</td>
+	<td valign="top" >Generic</td>
+	<td valign="top" >“scale”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=2</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >TV</td>
+	<td valign="top" >Standard names as in DRM</td>
+	<td valign="top" >Standard types as in DRM</td>
+	<td valign="top" >Standard Values as in DRM</td>
+	<td valign="top" >Standard object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "PAL", "PAL-M","PAL-N"}, ”PAL-Nc"
+	, "PAL-60", "NTSC-M", "NTSC-J" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="16" valign="top" >nouveau</td>
+	<td rowspan="6" valign="top" >NV10 Overlay</td>
+	<td valign="top" >"colorkey"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0x01ffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“contrast”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=8192-1</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“brightness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1024</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“hue”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=359</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“saturation”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=8192-1</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“iturbt_709”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >Nv04 Overlay</td>
+	<td valign="top" >“colorkey”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0x01ffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“brightness”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1024</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="7" valign="top" >Display</td>
+	<td valign="top" >“dithering mode”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "auto", "off", "on" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“dithering depth”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "auto", "off", "on", "static 2x2", "dynamic 2x2", "temporal" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“underscan”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "auto", "6 bpc", "8 bpc" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“underscan hborder”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=128</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“underscan vborder”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=128</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“vibrant hue”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=180</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“color vibrance”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=200</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Generic</td>
+	<td valign="top" >Standard name as in DRM</td>
+	<td valign="top" >Standard type as in DRM</td>
+	<td valign="top" >Standard value as in DRM</td>
+	<td valign="top" >Standard Object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="2" valign="top" >omap</td>
+	<td rowspan="2" valign="top" >Generic</td>
+	<td valign="top" >“rotation”</td>
+	<td valign="top" >BITMASK</td>
+	<td valign="top" >{ 0, "rotate-0" },
+	{ 1, "rotate-90" },
+	{ 2, "rotate-180" },
+	{ 3, "rotate-270" },
+	{ 4, "reflect-x" },
+	{ 5, "reflect-y" }</td>
+	<td valign="top" >CRTC, Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >“zorder”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=3</td>
+	<td valign="top" >CRTC, Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >qxl</td>
+	<td valign="top" >Generic</td>
+	<td valign="top" >“hotplug_mode_update"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="10" valign="top" >radeon</td>
+	<td valign="top" >DVI-I</td>
+	<td valign="top" >“coherent”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >DAC enable load detect</td>
+	<td valign="top" >“load detection”</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=1</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >TV Standard</td>
+	<td valign="top" >"tv standard"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "ntsc", "pal", "pal-m", "pal-60", "ntsc-j"
+	, "scart-pal", "pal-cn", "secam" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >legacy TMDS PLL detect</td>
+	<td valign="top" >"tmds_pll"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "driver", "bios" }</td>
+	<td valign="top" >-</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="3" valign="top" >Underscan</td>
+	<td valign="top" >"underscan"</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "off", "on", "auto" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"underscan hborder"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=128</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"underscan vborder"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=128</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Audio</td>
+	<td valign="top" >“audio”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "off", "on", "auto" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >FMT Dithering</td>
+	<td valign="top" >“dither”</td>
+	<td valign="top" >ENUM</td>
+	<td valign="top" >{ "off", "on" }</td>
+	<td valign="top" >Connector</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >Generic</td>
+	<td valign="top" >Standard name as in DRM</td>
+	<td valign="top" >Standard type as in DRM</td>
+	<td valign="top" >Standard value as in DRM</td>
+	<td valign="top" >Standard Object as in DRM</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td rowspan="3" valign="top" >rcar-du</td>
+	<td rowspan="3" valign="top" >Generic</td>
+	<td valign="top" >"alpha"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=255</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"colorkey"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=0, Max=0x01ffffff</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	<tr>
+	<td valign="top" >"zpos"</td>
+	<td valign="top" >RANGE</td>
+	<td valign="top" >Min=1, Max=7</td>
+	<td valign="top" >Plane</td>
+	<td valign="top" >TBD</td>
+	</tr>
+	</tbody>
+	</table>
+    </sect2>
   </sect1>
 
   <!-- Internals: vertical blanking -->
@@ -2527,6 +3418,10 @@
       with a call to <function>drm_vblank_cleanup</function> in the driver
       <methodname>unload</methodname> operation handler.
     </para>
+    <sect2>
+      <title>Vertical Blanking and Interrupt Handling Functions Reference</title>
+!Edrivers/gpu/drm/drm_irq.c
+    </sect2>
   </sect1>
 
   <!-- Internals: open/close, file operations and ioctls -->
@@ -2869,17 +3764,16 @@
             <term>DRM_IOCTL_MODESET_CTL</term>
             <listitem>
               <para>
-                This should be called by application level drivers before and
-                after mode setting, since on many devices the vertical blank
-                counter is reset at that time.  Internally, the DRM snapshots
-                the last vblank count when the ioctl is called with the
-                _DRM_PRE_MODESET command, so that the counter won't go backwards
-                (which is dealt with when _DRM_POST_MODESET is used).
+		This was only used for user-mode-settind drivers around
+		modesetting changes to allow the kernel to update the vblank
+		interrupt after mode setting, since on many devices the vertical
+		blank counter is reset to 0 at some point during modeset. Modern
+		drivers should not call this any more since with kernel mode
+		setting it is a no-op.
               </para>
             </listitem>
           </varlistentry>
         </variablelist>
-<!--!Edrivers/char/drm/drm_irq.c-->
       </para>
     </sect1>
 
@@ -2942,6 +3836,96 @@
 	  probing, so those sections fully apply.
         </para>
       </sect2>
+      <sect2>
+        <title>DPIO</title>
+!Pdrivers/gpu/drm/i915/i915_reg.h DPIO
+	<table id="dpiox2">
+	  <title>Dual channel PHY (VLV/CHV)</title>
+	  <tgroup cols="8">
+	    <colspec colname="c0" />
+	    <colspec colname="c1" />
+	    <colspec colname="c2" />
+	    <colspec colname="c3" />
+	    <colspec colname="c4" />
+	    <colspec colname="c5" />
+	    <colspec colname="c6" />
+	    <colspec colname="c7" />
+	    <spanspec spanname="ch0" namest="c0" nameend="c3" />
+	    <spanspec spanname="ch1" namest="c4" nameend="c7" />
+	    <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
+	    <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
+	    <spanspec spanname="ch1pcs01" namest="c4" nameend="c5" />
+	    <spanspec spanname="ch1pcs23" namest="c6" nameend="c7" />
+	    <thead>
+	      <row>
+		<entry spanname="ch0">CH0</entry>
+		<entry spanname="ch1">CH1</entry>
+	      </row>
+	    </thead>
+	    <tbody valign="top" align="center">
+	      <row>
+		<entry spanname="ch0">CMN/PLL/REF</entry>
+		<entry spanname="ch1">CMN/PLL/REF</entry>
+	      </row>
+	      <row>
+		<entry spanname="ch0pcs01">PCS01</entry>
+		<entry spanname="ch0pcs23">PCS23</entry>
+		<entry spanname="ch1pcs01">PCS01</entry>
+		<entry spanname="ch1pcs23">PCS23</entry>
+	      </row>
+	      <row>
+		<entry>TX0</entry>
+		<entry>TX1</entry>
+		<entry>TX2</entry>
+		<entry>TX3</entry>
+		<entry>TX0</entry>
+		<entry>TX1</entry>
+		<entry>TX2</entry>
+		<entry>TX3</entry>
+	      </row>
+	      <row>
+		<entry spanname="ch0">DDI0</entry>
+		<entry spanname="ch1">DDI1</entry>
+	      </row>
+	    </tbody>
+	  </tgroup>
+	</table>
+	<table id="dpiox1">
+	  <title>Single channel PHY (CHV)</title>
+	  <tgroup cols="4">
+	    <colspec colname="c0" />
+	    <colspec colname="c1" />
+	    <colspec colname="c2" />
+	    <colspec colname="c3" />
+	    <spanspec spanname="ch0" namest="c0" nameend="c3" />
+	    <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
+	    <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
+	    <thead>
+	      <row>
+		<entry spanname="ch0">CH0</entry>
+	      </row>
+	    </thead>
+	    <tbody valign="top" align="center">
+	      <row>
+		<entry spanname="ch0">CMN/PLL/REF</entry>
+	      </row>
+	      <row>
+		<entry spanname="ch0pcs01">PCS01</entry>
+		<entry spanname="ch0pcs23">PCS23</entry>
+	      </row>
+	      <row>
+		<entry>TX0</entry>
+		<entry>TX1</entry>
+		<entry>TX2</entry>
+		<entry>TX3</entry>
+	      </row>
+	      <row>
+		<entry spanname="ch0">DDI2</entry>
+	      </row>
+	    </tbody>
+	  </tgroup>
+	</table>
+      </sect2>
     </sect1>
 
     <sect1>
@@ -2950,6 +3934,11 @@
 	This sections covers all things related to the GEM implementation in the
 	i915 driver.
       </para>
+      <sect2>
+        <title>Batchbuffer Parsing</title>
+!Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
+!Idrivers/gpu/drm/i915/i915_cmd_parser.c
+      </sect2>
     </sect1>
   </chapter>
 </part>

diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S
index 4b486fe..6f3e4b7 100644
--- a/Documentation/EDID/1024x768.S
+++ b/Documentation/EDID/1024x768.S

@@ -36,7 +36,7 @@
 #define DPI 72
 #define VFREQ 60 /* Hz */
 #define TIMING_NAME "Linux XGA"
-#define ESTABLISHED_TIMINGS_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
+#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
 #define HSYNC_POL 0
 #define VSYNC_POL 0
 #define CRC 0x55

diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S
index a2799fe..bd9bef2 100644
--- a/Documentation/EDID/1280x1024.S
+++ b/Documentation/EDID/1280x1024.S

@@ -36,7 +36,7 @@
 #define DPI 72
 #define VFREQ 60 /* Hz */
 #define TIMING_NAME "Linux SXGA"
-#define ESTABLISHED_TIMINGS_BITS 0x00 /* none */
+/* No ESTABLISHED_TIMINGx_BITS */
 #define HSYNC_POL 1
 #define VSYNC_POL 1
 #define CRC 0xa0

diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S
index 0ded64c..a45101c 100644
--- a/Documentation/EDID/1600x1200.S
+++ b/Documentation/EDID/1600x1200.S

@@ -36,7 +36,7 @@
 #define DPI 72
 #define VFREQ 60 /* Hz */
 #define TIMING_NAME "Linux UXGA"
-#define ESTABLISHED_TIMINGS_BITS 0x00 /* none */
+/* No ESTABLISHED_TIMINGx_BITS */
 #define HSYNC_POL 1
 #define VSYNC_POL 1
 #define CRC 0x9d

diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S
index 96f67ca..b0d7c69 100644
--- a/Documentation/EDID/1680x1050.S
+++ b/Documentation/EDID/1680x1050.S

@@ -36,7 +36,7 @@
 #define DPI 96
 #define VFREQ 60 /* Hz */
 #define TIMING_NAME "Linux WSXGA"
-#define ESTABLISHED_TIMINGS_BITS 0x00 /* none */
+/* No ESTABLISHED_TIMINGx_BITS */
 #define HSYNC_POL 1
 #define VSYNC_POL 1
 #define CRC 0x26

diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S
index 36ed5d5..3084355e 100644
--- a/Documentation/EDID/1920x1080.S
+++ b/Documentation/EDID/1920x1080.S

@@ -36,7 +36,7 @@
 #define DPI 96
 #define VFREQ 60 /* Hz */
 #define TIMING_NAME "Linux FHD"
-#define ESTABLISHED_TIMINGS_BITS 0x00 /* none */
+/* No ESTABLISHED_TIMINGx_BITS */
 #define HSYNC_POL 1
 #define VSYNC_POL 1
 #define CRC 0x05

diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S
new file mode 100644
index 0000000..6644e26
--- /dev/null
+++ b/Documentation/EDID/800x600.S

@@ -0,0 +1,41 @@
+/*
+   800x600.S: EDID data set for standard 800x600 60 Hz monitor
+
+   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+   Copyright (C) 2014 Linaro Limited
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 40000 /* kHz */
+#define XPIX 800
+#define YPIX 600
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 256
+#define YBLANK 28
+#define XOFFSET 40
+#define XPULSE 128
+#define YOFFSET (63+1)
+#define YPULSE (63+4)
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SVGA"
+#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0xc2
+
+#include "edid.S"

diff --git a/Documentation/EDID/HOWTO.txt b/Documentation/EDID/HOWTO.txt
index 7146db1..835db33 100644
--- a/Documentation/EDID/HOWTO.txt
+++ b/Documentation/EDID/HOWTO.txt

@@ -18,7 +18,7 @@
 individually prepared or corrected EDID data set in the /lib/firmware
 directory from where it is loaded via the firmware interface. The code
 (see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
-commonly used screen resolutions (1024x768, 1280x1024, 1600x1200,
+commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
 1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
 not contain code to create these data. In order to elucidate the origin
 of the built-in binary EDID blobs and to facilitate the creation of

diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S
index ea97ae2..7ac0327 100644
--- a/Documentation/EDID/edid.S
+++ b/Documentation/EDID/edid.S

@@ -33,6 +33,17 @@
 #define XY_RATIO_5_4	0b10
 #define XY_RATIO_16_9	0b11
 
+/* Provide defaults for the timing bits */
+#ifndef ESTABLISHED_TIMING1_BITS
+#define ESTABLISHED_TIMING1_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING2_BITS
+#define ESTABLISHED_TIMING2_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING3_BITS
+#define ESTABLISHED_TIMING3_BITS 0x00
+#endif
+
 #define mfgname2id(v1,v2,v3) \
 	((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
 #define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
@@ -139,7 +150,7 @@
    Bit 2	640x480 @ 75 Hz
    Bit 1	800x600 @ 56 Hz
    Bit 0	800x600 @ 60 Hz */
-estbl_timing1:	.byte	0x00
+estbl_timing1:	.byte	ESTABLISHED_TIMING1_BITS
 
 /* Bit 7	800x600 @ 72 Hz
    Bit 6	800x600 @ 75 Hz
@@ -149,11 +160,11 @@
    Bit 2	1024x768 @ 72 Hz
    Bit 1	1024x768 @ 75 Hz
    Bit 0	1280x1024 @ 75 Hz */
-estbl_timing2:	.byte	ESTABLISHED_TIMINGS_BITS
+estbl_timing2:	.byte	ESTABLISHED_TIMING2_BITS
 
 /* Bit 7	1152x870 @ 75 Hz (Apple Macintosh II)
    Bits 6-0 	Other manufacturer-specific display mod */
-estbl_timing3:	.byte	0x00
+estbl_timing3:	.byte	ESTABLISHED_TIMING3_BITS
 
 /* Standard timing */
 /* X resolution, less 31, divided by 8 (256-2288 pixels) */

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index b3429ae..02ab997 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt

@@ -458,15 +458,11 @@
 
 5.1 force_empty
   memory.force_empty interface is provided to make cgroup's memory usage empty.
-  You can use this interface only when the cgroup has no tasks.
   When writing anything to this
 
   # echo 0 > memory.force_empty
 
-  Almost all pages tracked by this memory cgroup will be unmapped and freed.
-  Some pages cannot be freed because they are locked or in-use. Such pages are
-  moved to parent (if use_hierarchy==1) or root (if use_hierarchy==0) and this
-  cgroup will be empty.
+  the cgroup will be reclaimed and as many pages reclaimed as possible.
 
   The typical use case for this interface is before calling rmdir().
   Because rmdir() moves all pages to parent, some out-of-use page caches can be

diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
new file mode 100644
index 0000000..324b182
--- /dev/null
+++ b/Documentation/cgroups/unified-hierarchy.txt

@@ -0,0 +1,359 @@
+
+Cgroup unified hierarchy
+
+April, 2014		Tejun Heo <tj@kernel.org>
+
+This document describes the changes made by unified hierarchy and
+their rationales.  It will eventually be merged into the main cgroup
+documentation.
+
+CONTENTS
+
+1. Background
+2. Basic Operation
+  2-1. Mounting
+  2-2. cgroup.subtree_control
+  2-3. cgroup.controllers
+3. Structural Constraints
+  3-1. Top-down
+  3-2. No internal tasks
+4. Other Changes
+  4-1. [Un]populated Notification
+  4-2. Other Core Changes
+  4-3. Per-Controller Changes
+    4-3-1. blkio
+    4-3-2. cpuset
+    4-3-3. memory
+5. Planned Changes
+  5-1. CAP for resource control
+
+
+1. Background
+
+cgroup allows an arbitrary number of hierarchies and each hierarchy
+can host any number of controllers.  While this seems to provide a
+high level of flexibility, it isn't quite useful in practice.
+
+For example, as there is only one instance of each controller, utility
+type controllers such as freezer which can be useful in all
+hierarchies can only be used in one.  The issue is exacerbated by the
+fact that controllers can't be moved around once hierarchies are
+populated.  Another issue is that all controllers bound to a hierarchy
+are forced to have exactly the same view of the hierarchy.  It isn't
+possible to vary the granularity depending on the specific controller.
+
+In practice, these issues heavily limit which controllers can be put
+on the same hierarchy and most configurations resort to putting each
+controller on its own hierarchy.  Only closely related ones, such as
+the cpu and cpuacct controllers, make sense to put on the same
+hierarchy.  This often means that userland ends up managing multiple
+similar hierarchies repeating the same steps on each hierarchy
+whenever a hierarchy management operation is necessary.
+
+Unfortunately, support for multiple hierarchies comes at a steep cost.
+Internal implementation in cgroup core proper is dazzlingly
+complicated but more importantly the support for multiple hierarchies
+restricts how cgroup is used in general and what controllers can do.
+
+There's no limit on how many hierarchies there may be, which means
+that a task's cgroup membership can't be described in finite length.
+The key may contain any varying number of entries and is unlimited in
+length, which makes it highly awkward to handle and leads to addition
+of controllers which exist only to identify membership, which in turn
+exacerbates the original problem.
+
+Also, as a controller can't have any expectation regarding what shape
+of hierarchies other controllers would be on, each controller has to
+assume that all other controllers are operating on completely
+orthogonal hierarchies.  This makes it impossible, or at least very
+cumbersome, for controllers to cooperate with each other.
+
+In most use cases, putting controllers on hierarchies which are
+completely orthogonal to each other isn't necessary.  What usually is
+called for is the ability to have differing levels of granularity
+depending on the specific controller.  In other words, hierarchy may
+be collapsed from leaf towards root when viewed from specific
+controllers.  For example, a given configuration might not care about
+how memory is distributed beyond a certain level while still wanting
+to control how CPU cycles are distributed.
+
+Unified hierarchy is the next version of cgroup interface.  It aims to
+address the aforementioned issues by having more structure while
+retaining enough flexibility for most use cases.  Various other
+general and controller-specific interface issues are also addressed in
+the process.
+
+
+2. Basic Operation
+
+2-1. Mounting
+
+Currently, unified hierarchy can be mounted with the following mount
+command.  Note that this is still under development and scheduled to
+change soon.
+
+ mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT
+
+All controllers which are not bound to other hierarchies are
+automatically bound to unified hierarchy and show up at the root of
+it.  Controllers which are enabled only in the root of unified
+hierarchy can be bound to other hierarchies at any time.  This allows
+mixing unified hierarchy with the traditional multiple hierarchies in
+a fully backward compatible way.
+
+
+2-2. cgroup.subtree_control
+
+All cgroups on unified hierarchy have a "cgroup.subtree_control" file
+which governs which controllers are enabled on the children of the
+cgroup.  Let's assume a hierarchy like the following.
+
+  root - A - B - C
+               \ D
+
+root's "cgroup.subtree_control" file determines which controllers are
+enabled on A.  A's on B.  B's on C and D.  This coincides with the
+fact that controllers on the immediate sub-level are used to
+distribute the resources of the parent.  In fact, it's natural to
+assume that resource control knobs of a child belong to its parent.
+Enabling a controller in a "cgroup.subtree_control" file declares that
+distribution of the respective resources of the cgroup will be
+controlled.  Note that this means that controller enable states are
+shared among siblings.
+
+When read, the file contains a space-separated list of currently
+enabled controllers.  A write to the file should contain a
+space-separated list of controllers with '+' or '-' prefixed (without
+the quotes).  Controllers prefixed with '+' are enabled and '-'
+disabled.  If a controller is listed multiple times, the last entry
+wins.  The specific operations are executed atomically - either all
+succeed or fail.
+
+
+2-3. cgroup.controllers
+
+Read-only "cgroup.controllers" file contains a space-separated list of
+controllers which can be enabled in the cgroup's
+"cgroup.subtree_control" file.
+
+In the root cgroup, this lists controllers which are not bound to
+other hierarchies and the content changes as controllers are bound to
+and unbound from other hierarchies.
+
+In non-root cgroups, the content of this file equals that of the
+parent's "cgroup.subtree_control" file as only controllers enabled
+from the parent can be used in its children.
+
+
+3. Structural Constraints
+
+3-1. Top-down
+
+As it doesn't make sense to nest control of an uncontrolled resource,
+all non-root "cgroup.subtree_control" files can only contain
+controllers which are enabled in the parent's "cgroup.subtree_control"
+file.  A controller can be enabled only if the parent has the
+controller enabled and a controller can't be disabled if one or more
+children have it enabled.
+
+
+3-2. No internal tasks
+
+One long-standing issue that cgroup faces is the competition between
+tasks belonging to the parent cgroup and its children cgroups.  This
+is inherently nasty as two different types of entities compete and
+there is no agreed-upon obvious way to handle it.  Different
+controllers are doing different things.
+
+The cpu controller considers tasks and cgroups as equivalents and maps
+nice levels to cgroup weights.  This works for some cases but falls
+flat when children should be allocated specific ratios of CPU cycles
+and the number of internal tasks fluctuates - the ratios constantly
+change as the number of competing entities fluctuates.  There also are
+other issues.  The mapping from nice level to weight isn't obvious or
+universal, and there are various other knobs which simply aren't
+available for tasks.
+
+The blkio controller implicitly creates a hidden leaf node for each
+cgroup to host the tasks.  The hidden leaf has its own copies of all
+the knobs with "leaf_" prefixed.  While this allows equivalent control
+over internal tasks, it's with serious drawbacks.  It always adds an
+extra layer of nesting which may not be necessary, makes the interface
+messy and significantly complicates the implementation.
+
+The memory controller currently doesn't have a way to control what
+happens between internal tasks and child cgroups and the behavior is
+not clearly defined.  There have been attempts to add ad-hoc behaviors
+and knobs to tailor the behavior to specific workloads.  Continuing
+this direction will lead to problems which will be extremely difficult
+to resolve in the long term.
+
+Multiple controllers struggle with internal tasks and came up with
+different ways to deal with it; unfortunately, all the approaches in
+use now are severely flawed and, furthermore, the widely different
+behaviors make cgroup as whole highly inconsistent.
+
+It is clear that this is something which needs to be addressed from
+cgroup core proper in a uniform way so that controllers don't need to
+worry about it and cgroup as a whole shows a consistent and logical
+behavior.  To achieve that, unified hierarchy enforces the following
+structural constraint:
+
+ Except for the root, only cgroups which don't contain any task may
+ have controllers enabled in their "cgroup.subtree_control" files.
+
+Combined with other properties, this guarantees that, when a
+controller is looking at the part of the hierarchy which has it
+enabled, tasks are always only on the leaves.  This rules out
+situations where child cgroups compete against internal tasks of the
+parent.
+
+There are two things to note.  Firstly, the root cgroup is exempt from
+the restriction.  Root contains tasks and anonymous resource
+consumption which can't be associated with any other cgroup and
+requires special treatment from most controllers.  How resource
+consumption in the root cgroup is governed is up to each controller.
+
+Secondly, the restriction doesn't take effect if there is no enabled
+controller in the cgroup's "cgroup.subtree_control" file.  This is
+important as otherwise it wouldn't be possible to create children of a
+populated cgroup.  To control resource distribution of a cgroup, the
+cgroup must create children and transfer all its tasks to the children
+before enabling controllers in its "cgroup.subtree_control" file.
+
+
+4. Other Changes
+
+4-1. [Un]populated Notification
+
+cgroup users often need a way to determine when a cgroup's
+subhierarchy becomes empty so that it can be cleaned up.  cgroup
+currently provides release_agent for it; unfortunately, this mechanism
+is riddled with issues.
+
+- It delivers events by forking and execing a userland binary
+  specified as the release_agent.  This is a long deprecated method of
+  notification delivery.  It's extremely heavy, slow and cumbersome to
+  integrate with larger infrastructure.
+
+- There is single monitoring point at the root.  There's no way to
+  delegate management of a subtree.
+
+- The event isn't recursive.  It triggers when a cgroup doesn't have
+  any tasks or child cgroups.  Events for internal nodes trigger only
+  after all children are removed.  This again makes it impossible to
+  delegate management of a subtree.
+
+- Events are filtered from the kernel side.  A "notify_on_release"
+  file is used to subscribe to or suppress release events.  This is
+  unnecessarily complicated and probably done this way because event
+  delivery itself was expensive.
+
+Unified hierarchy implements an interface file "cgroup.populated"
+which can be used to monitor whether the cgroup's subhierarchy has
+tasks in it or not.  Its value is 0 if there is no task in the cgroup
+and its descendants; otherwise, 1.  poll and [id]notify events are
+triggered when the value changes.
+
+This is significantly lighter and simpler and trivially allows
+delegating management of subhierarchy - subhierarchy monitoring can
+block further propagation simply by putting itself or another process
+in the subhierarchy and monitor events that it's interested in from
+there without interfering with monitoring higher in the tree.
+
+In unified hierarchy, the release_agent mechanism is no longer
+supported and the interface files "release_agent" and
+"notify_on_release" do not exist.
+
+
+4-2. Other Core Changes
+
+- None of the mount options is allowed.
+
+- remount is disallowed.
+
+- rename(2) is disallowed.
+
+- The "tasks" file is removed.  Everything should at process
+  granularity.  Use the "cgroup.procs" file instead.
+
+- The "cgroup.procs" file is not sorted.  pids will be unique unless
+  they got recycled in-between reads.
+
+- The "cgroup.clone_children" file is removed.
+
+
+4-3. Per-Controller Changes
+
+4-3-1. blkio
+
+- blk-throttle becomes properly hierarchical.
+
+
+4-3-2. cpuset
+
+- Tasks are kept in empty cpusets after hotplug and take on the masks
+  of the nearest non-empty ancestor, instead of being moved to it.
+
+- A task can be moved into an empty cpuset, and again it takes on the
+  masks of the nearest non-empty ancestor.
+
+
+4-3-3. memory
+
+- use_hierarchy is on by default and the cgroup file for the flag is
+  not created.
+
+
+5. Planned Changes
+
+5-1. CAP for resource control
+
+Unified hierarchy will require one of the capabilities(7), which is
+yet to be decided, for all resource control related knobs.  Process
+organization operations - creation of sub-cgroups and migration of
+processes in sub-hierarchies may be delegated by changing the
+ownership and/or permissions on the cgroup directory and
+"cgroup.procs" interface file; however, all operations which affect
+resource control - writes to a "cgroup.subtree_control" file or any
+controller-specific knobs - will require an explicit CAP privilege.
+
+This, in part, is to prevent the cgroup interface from being
+inadvertently promoted to programmable API used by non-privileged
+binaries.  cgroup exposes various aspects of the system in ways which
+aren't properly abstracted for direct consumption by regular programs.
+This is an administration interface much closer to sysctl knobs than
+system calls.  Even the basic access model, being filesystem path
+based, isn't suitable for direct consumption.  There's no way to
+access "my cgroup" in a race-free way or make multiple operations
+atomic against migration to another cgroup.
+
+Another aspect is that, for better or for worse, the cgroup interface
+goes through far less scrutiny than regular interfaces for
+unprivileged userland.  The upside is that cgroup is able to expose
+useful features which may not be suitable for general consumption in a
+reasonable time frame.  It provides a relatively short path between
+internal details and userland-visible interface.  Of course, this
+shortcut comes with high risk.  We go through what we go through for
+general kernel APIs for good reasons.  It may end up leaking internal
+details in a way which can exert significant pain by locking the
+kernel into a contract that can't be maintained in a reasonable
+manner.
+
+Also, due to the specific nature, cgroup and its controllers don't
+tend to attract attention from a wide scope of developers.  cgroup's
+short history is already fraught with severely mis-designed
+interfaces, unnecessary commitments to and exposing of internal
+details, broken and dangerous implementations of various features.
+
+Keeping cgroup as an administration interface is both advantageous for
+its role and imperative given its nature.  Some of the cgroup features
+may make sense for unprivileged access.  If deemed justified, those
+must be further abstracted and implemented as a different interface,
+be it a system call or process-private filesystem, and survive through
+the scrutiny that any interface for general consumption is required to
+go through.
+
+Requiring CAP is not a complete solution but should serve as a
+significant deterrent against spraying cgroup usages in non-privileged
+programs.

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index b045fe5..14f4e63 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt

@@ -26,6 +26,7 @@
 1.4  target/target_index or setpolicy?
 1.5  target/target_index
 1.6  setpolicy
+1.7  get_intermediate and target_intermediate
 2.   Frequency Table Helpers
 
 
@@ -79,6 +80,10 @@
 				"struct freq_attr" which allow to
 				export values to sysfs.
 
+cpufreq_driver.get_intermediate
+and target_intermediate		Used to switch to stable frequency while
+				changing CPU frequency.
+
 
 1.2 Per-CPU Initialization
 --------------------------
@@ -151,7 +156,7 @@
 limits on their own. These shall use the ->setpolicy call
 
 
-1.4. target/target_index
+1.5. target/target_index
 -------------
 
 The target_index call has two arguments: struct cpufreq_policy *policy,
@@ -160,6 +165,9 @@
 The CPUfreq driver must set the new frequency when called here. The
 actual frequency must be determined by freq_table[index].frequency.
 
+It should always restore to earlier frequency (i.e. policy->restore_freq) in
+case of errors, even if we switched to intermediate frequency earlier.
+
 Deprecated:
 ----------
 The target call has three arguments: struct cpufreq_policy *policy,
@@ -179,7 +187,7 @@
 for details.
 
 
-1.5 setpolicy
+1.6 setpolicy
 ---------------
 
 The setpolicy call only takes a struct cpufreq_policy *policy as
@@ -190,6 +198,23 @@
 powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
 the reference implementation in drivers/cpufreq/longrun.c
 
+1.7 get_intermediate and target_intermediate
+--------------------------------------------
+
+Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
+
+get_intermediate should return a stable intermediate frequency platform wants to
+switch to, and target_intermediate() should set CPU to to that frequency, before
+jumping to the frequency corresponding to 'index'. Core will take care of
+sending notifications and driver doesn't have to handle them in
+target_intermediate() or target_index().
+
+Drivers can return '0' from get_intermediate() in case they don't wish to switch
+to intermediate frequency for some target frequency. In that case core will
+directly call ->target_index().
+
+NOTE: ->target_index() should restore to policy->restore_freq in case of
+failures as core would send notifications for that.
 
 
 2. Frequency Table Helpers

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index 48b285f..c96d8dc 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt

@@ -4,10 +4,16 @@
 Each SATA controller should have its own node.
 
 Required properties:
-- compatible        : compatible list, one of "snps,spear-ahci",
-                      "snps,exynos5440-ahci", "ibm,476gtr-ahci",
-                      "allwinner,sun4i-a10-ahci", "fsl,imx53-ahci"
-                      "fsl,imx6q-ahci" or "snps,dwc-ahci"
+- compatible        : compatible string, one of:
+  - "allwinner,sun4i-a10-ahci"
+  - "fsl,imx53-ahci"
+  - "fsl,imx6q-ahci"
+  - "hisilicon,hisi-ahci"
+  - "ibm,476gtr-ahci"
+  - "marvell,armada-380-ahci"
+  - "snps,dwc-ahci"
+  - "snps,exynos5440-ahci"
+  - "snps,spear-ahci"
 - interrupts        : <interrupt mapping for SATA IRQ>
 - reg               : <registers mapping>
 

diff --git a/Documentation/devicetree/bindings/clock/corenet-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
similarity index 95%
rename from Documentation/devicetree/bindings/clock/corenet-clock.txt
rename to Documentation/devicetree/bindings/clock/qoriq-clock.txt
index 24711af..5666812 100644
--- a/Documentation/devicetree/bindings/clock/corenet-clock.txt
+++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt

@@ -7,6 +7,14 @@
 cores and peripheral IP blocks.
 Please refer to the Reference Manual for details.
 
+All references to "1.0" and "2.0" refer to the QorIQ chassis version to
+which the chip complies.
+
+Chassis Version		Example Chips
+---------------		-------------
+1.0			p4080, p5020, p5040
+2.0			t4240, b4860, t1040
+
 1. Clock Block Binding
 
 Required properties:
@@ -85,7 +93,7 @@
 			#clock-cells = <0>;
 			compatible = "fsl,qoriq-sysclk-1.0";
 			clock-output-names = "sysclk";
-		}
+		};
 
 		pll0: pll0@800 {
 			#clock-cells = <1>;

diff --git a/Documentation/devicetree/bindings/dma/mmp-dma.txt b/Documentation/devicetree/bindings/dma/mmp-dma.txt
index a4fa4ef..7a802f6 100644
--- a/Documentation/devicetree/bindings/dma/mmp-dma.txt
+++ b/Documentation/devicetree/bindings/dma/mmp-dma.txt

@@ -1,17 +1,20 @@
 * MARVELL MMP DMA controller
 
 Marvell Peripheral DMA Controller
-Used platfroms: pxa688, pxa910, pxa3xx, etc
+Used platforms: pxa688, pxa910, pxa3xx, etc
 
 Required properties:
 - compatible: Should be "marvell,pdma-1.0"
 - reg: Should contain DMA registers location and length.
 - interrupts: Either contain all of the per-channel DMA interrupts
 		or one irq for pdma device
-- #dma-channels: Number of DMA channels supported by the controller.
+
+Optional properties:
+- #dma-channels: Number of DMA channels supported by the controller (defaults
+  to 32 when not specified)
 
 "marvell,pdma-1.0"
-Used platfroms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
+Used platforms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
 
 Examples:
 
@@ -45,7 +48,7 @@
 
 
 Marvell Two Channel DMA Controller used specifically for audio
-Used platfroms: pxa688, pxa910
+Used platforms: pxa688, pxa910
 
 Required properties:
 - compatible: Should be "marvell,adma-1.0" or "marvell,pxa910-squ"

diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
new file mode 100644
index 0000000..1405ed0
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt

@@ -0,0 +1,75 @@
+Xilinx AXI VDMA engine, it does transfers between memory and video devices.
+It can be configured to have one channel or two channels. If configured
+as two channels, one is to transmit to the video device and another is
+to receive from the video device.
+
+Required properties:
+- compatible: Should be "xlnx,axi-vdma-1.00.a"
+- #dma-cells: Should be <1>, see "dmas" property below
+- reg: Should contain VDMA registers location and length.
+- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
+- dma-channel child node: Should have at least one channel and can have up to
+	two channels per device. This node specifies the properties of each
+	DMA channel (see child node properties below).
+
+Optional properties:
+- xlnx,include-sg: Tells configured for Scatter-mode in
+	the hardware.
+- xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
+	It takes following values:
+	{1}, flush both channels
+	{2}, flush mm2s channel
+	{3}, flush s2mm channel
+
+Required child node properties:
+- compatible: It should be either "xlnx,axi-vdma-mm2s-channel" or
+	"xlnx,axi-vdma-s2mm-channel".
+- interrupts: Should contain per channel VDMA interrupts.
+- xlnx,data-width: Should contain the stream data width, take values
+	{32,64...1024}.
+
+Optional child node properties:
+- xlnx,include-dre: Tells hardware is configured for Data
+	Realignment Engine.
+- xlnx,genlock-mode: Tells Genlock synchronization is
+	enabled/disabled in hardware.
+
+Example:
+++++++++
+
+axi_vdma_0: axivdma@40030000 {
+	compatible = "xlnx,axi-vdma-1.00.a";
+	#dma_cells = <1>;
+	reg = < 0x40030000 0x10000 >;
+	xlnx,num-fstores = <0x8>;
+	xlnx,flush-fsync = <0x1>;
+	dma-channel@40030000 {
+		compatible = "xlnx,axi-vdma-mm2s-channel";
+		interrupts = < 0 54 4 >;
+		xlnx,datawidth = <0x40>;
+	} ;
+	dma-channel@40030030 {
+		compatible = "xlnx,axi-vdma-s2mm-channel";
+		interrupts = < 0 53 4 >;
+		xlnx,datawidth = <0x40>;
+	} ;
+} ;
+
+
+* DMA client
+
+Required properties:
+- dmas: a list of <[Video DMA device phandle] [Channel ID]> pairs,
+	where Channel ID is '0' for write/tx and '1' for read/rx
+	channel.
+- dma-names: a list of DMA channel names, one per "dmas" entry
+
+Example:
+++++++++
+
+vdmatest_0: vdmatest@0 {
+	compatible ="xlnx,axi-vdma-test-1.00.a";
+	dmas = <&axi_vdma_0 0
+		&axi_vdma_0 1>;
+	dma-names = "vdma0", "vdma1";
+} ;

diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
index efa8b84..b48f4ef 100644
--- a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt

@@ -136,6 +136,7 @@
   - compatible: "nvidia,tegra<chip>-hdmi"
   - reg: Physical base address and length of the controller's registers.
   - interrupts: The interrupt outputs from the controller.
+  - hdmi-supply: supply for the +5V HDMI connector pin
   - vdd-supply: regulator for supply voltage
   - pll-supply: regulator for PLL
   - clocks: Must contain an entry for each entry in clock-names.
@@ -180,6 +181,7 @@
     See ../reset/reset.txt for details.
   - reset-names: Must include the following entries:
     - dsi
+  - avdd-dsi-supply: phandle of a supply that powers the DSI controller
   - nvidia,mipi-calibrate: Should contain a phandle and a specifier specifying
     which pads are used by this DSI output and need to be calibrated. See also
     ../mipi/nvidia,tegra114-mipi.txt.

diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
similarity index 100%
rename from Documentation/devicetree/bindings/gpio/gpio_keys.txt
rename to Documentation/devicetree/bindings/input/gpio-keys.txt


diff --git a/Documentation/devicetree/bindings/input/st-keyscan.txt b/Documentation/devicetree/bindings/input/st-keyscan.txt
new file mode 100644
index 0000000..51eb428
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/st-keyscan.txt

@@ -0,0 +1,60 @@
+* ST Keyscan controller Device Tree bindings
+
+The ST keyscan controller Device Tree binding is based on the
+matrix-keymap.
+
+Required properties:
+- compatible: "st,sti-keyscan"
+
+- reg: Register base address and size of st-keyscan controller.
+
+- interrupts: Interrupt number for the st-keyscan controller.
+
+- clocks: Must contain one entry, for the module clock.
+  See ../clocks/clock-bindings.txt for details.
+
+- pinctrl: Should specify pin control groups used for this controller.
+  See ../pinctrl/pinctrl-bindings.txt for details.
+
+- linux,keymap: The keymap for keys as described in the binding document
+  devicetree/bindings/input/matrix-keymap.txt.
+
+- keypad,num-rows: Number of row lines connected to the keypad controller.
+
+- keypad,num-columns: Number of column lines connected to the keypad
+  controller.
+
+Optional property:
+- st,debounce_us: Debouncing interval time in microseconds
+
+Example:
+
+keyscan: keyscan@fe4b0000 {
+	compatible = "st,sti-keyscan";
+	reg = <0xfe4b0000 0x2000>;
+	interrupts = <GIC_SPI 212 IRQ_TYPE_NONE>;
+	clocks	= <&CLK_SYSIN>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_keyscan>;
+
+	keypad,num-rows = <4>;
+	keypad,num-columns = <4>;
+	st,debounce_us = <5000>;
+
+	linux,keymap = < MATRIX_KEY(0x00, 0x00, KEY_F13)
+			 MATRIX_KEY(0x00, 0x01, KEY_F9)
+			 MATRIX_KEY(0x00, 0x02, KEY_F5)
+			 MATRIX_KEY(0x00, 0x03, KEY_F1)
+			 MATRIX_KEY(0x01, 0x00, KEY_F14)
+			 MATRIX_KEY(0x01, 0x01, KEY_F10)
+			 MATRIX_KEY(0x01, 0x02, KEY_F6)
+			 MATRIX_KEY(0x01, 0x03, KEY_F2)
+			 MATRIX_KEY(0x02, 0x00, KEY_F15)
+			 MATRIX_KEY(0x02, 0x01, KEY_F11)
+			 MATRIX_KEY(0x02, 0x02, KEY_F7)
+			 MATRIX_KEY(0x02, 0x03, KEY_F3)
+			 MATRIX_KEY(0x03, 0x00, KEY_F16)
+			 MATRIX_KEY(0x03, 0x01, KEY_F12)
+			 MATRIX_KEY(0x03, 0x02, KEY_F8)
+			 MATRIX_KEY(0x03, 0x03, KEY_F4) >;
+	};

diff --git a/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt b/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt
new file mode 100644
index 0000000..aef5779
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt

@@ -0,0 +1,20 @@
+sun4i resistive touchscreen controller
+--------------------------------------
+
+Required properties:
+ - compatible: "allwinner,sun4i-a10-ts"
+ - reg: mmio address range of the chip
+ - interrupts: interrupt to which the chip is connected
+
+Optional properties:
+ - allwinner,ts-attached: boolean indicating that an actual touchscreen is
+			  attached to the controller
+
+Example:
+
+	rtp: rtp@01c25000 {
+		compatible = "allwinner,sun4i-a10-ts";
+		reg = <0x01c25000 0x100>;
+		interrupts = <29>;
+		allwinner,ts-attached;
+	};

diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
new file mode 100644
index 0000000..d8e0616
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt

@@ -0,0 +1,27 @@
+General Touchscreen Properties:
+
+Optional properties for Touchscreens:
+ - touchscreen-size-x		: horizontal resolution of touchscreen
+				  (in pixels)
+ - touchscreen-size-y		: vertical resolution of touchscreen
+				  (in pixels)
+ - touchscreen-max-pressure	: maximum reported pressure (arbitrary range
+				  dependent on the controller)
+ - touchscreen-fuzz-x		: horizontal noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-y		: vertical noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-pressure	: pressure noise value of the absolute input
+				  device (arbitrary range dependent on the
+				  controller)
+ - touchscreen-inverted-x	: X axis is inverted (boolean)
+ - touchscreen-inverted-y	: Y axis is inverted (boolean)
+
+Deprecated properties for Touchscreens:
+ - x-size			: deprecated name for touchscreen-size-x
+ - y-size			: deprecated name for touchscreen-size-y
+ - moving-threshold		: deprecated name for a combination of
+				  touchscreen-fuzz-x and touchscreen-fuzz-y
+ - contact-threshold		: deprecated name for touchscreen-fuzz-pressure
+ - x-invert			: deprecated name for touchscreen-inverted-x
+ - y-invert			: deprecated name for touchscreen-inverted-y

diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
new file mode 100644
index 0000000..4b641c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt

@@ -0,0 +1,42 @@
+* Texas Instruments tsc2005 touchscreen controller
+
+Required properties:
+ - compatible		      : "ti,tsc2005"
+ - reg			      : SPI device address
+ - spi-max-frequency	      : Maximal SPI speed
+ - interrupts		      : IRQ specifier
+ - reset-gpios		      : GPIO specifier
+ - vio-supply                 : Regulator specifier
+
+Optional properties:
+ - ti,x-plate-ohms	      : integer, resistance of the touchscreen's X plates
+				in ohm (defaults to 280)
+ - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after
+				the configured time (in milli seconds), the driver
+				will reset it. This is disabled by default.
+ - properties defined in touchscreen.txt
+
+Example:
+
+&mcspi1 {
+	tsc2005@0 {
+		compatible = "ti,tsc2005";
+		spi-max-frequency = <6000000>;
+		reg = <0>;
+
+		vio-supply = <&vio>;
+
+		reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+		interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+		touchscreen-fuzz-x = <4>;
+		touchscreen-fuzz-y = <7>;
+		touchscreen-fuzz-pressure = <2>;
+		touchscreen-max-x = <4096>;
+		touchscreen-max-y = <4096>;
+		touchscreen-max-pressure = <2048>;
+
+		ti,x-plate-ohms = <280>;
+		ti,esd-recovery-timeout-ms = <8000>;
+	};
+}

diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
index c55b8c0..1b66a41 100644
--- a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
+++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt

@@ -1,7 +1,13 @@
 Binding for TI/National Semiconductor LP55xx Led Drivers
 
 Required properties:
-- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" or "ti,lp8501"
+- compatible: one of
+	national,lp5521
+	national,lp5523
+	ti,lp55231
+	ti,lp5562
+	ti,lp8501
+
 - reg: I2C slave address
 - clock-mode: Input clock mode, (0: automode, 1: internal, 2: external)
 

diff --git a/Documentation/devicetree/bindings/leds/leds-pwm.txt b/Documentation/devicetree/bindings/leds/leds-pwm.txt
index 7297107..6c6583c 100644
--- a/Documentation/devicetree/bindings/leds/leds-pwm.txt
+++ b/Documentation/devicetree/bindings/leds/leds-pwm.txt

@@ -13,6 +13,8 @@
   For the pwms and pwm-names property please refer to:
   Documentation/devicetree/bindings/pwm/pwm.txt
 - max-brightness : Maximum brightness possible for the LED
+- active-low : (optional) For PWMs where the LED is wired to supply
+  rather than ground.
 - label :  (optional)
   see Documentation/devicetree/bindings/leds/common.txt
 - linux,default-trigger :  (optional)

diff --git a/Documentation/devicetree/bindings/mfd/bfticu.txt b/Documentation/devicetree/bindings/mfd/bfticu.txt
new file mode 100644
index 0000000..65c9077
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/bfticu.txt

@@ -0,0 +1,25 @@
+KEYMILE bfticu Chassis Management FPGA
+
+The bfticu is a multifunction device that manages the whole chassis.
+Its main functionality is to collect IRQs from the whole chassis and signals
+them to a single controller.
+
+Required properties:
+- compatible: "keymile,bfticu"
+- interrupt-controller: the bfticu FPGA is an interrupt controller
+- interrupts: the main IRQ line to signal the collected IRQs
+- #interrupt-cells : is 2 and their usage is compliant to the 2 cells variant
+  of Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+- interrupt-parent: the parent IRQ ctrl the main IRQ is connected to
+- reg: access on the parent local bus (chip select, offset in chip select, size)
+
+Example:
+
+	chassis-mgmt@3,0 {
+		compatible = "keymile,bfticu";
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		reg = <3 0 0x100>;
+		interrupt-parent = <&mpic>;
+		interrupts = <6 1 0 0>;
+	};

diff --git a/Documentation/devicetree/bindings/mfd/qriox.txt b/Documentation/devicetree/bindings/mfd/qriox.txt
new file mode 100644
index 0000000..f301e2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qriox.txt

@@ -0,0 +1,17 @@
+KEYMILE qrio Board Control CPLD
+
+The qrio is a multifunction device that controls the KEYMILE boards based on
+the kmp204x design.
+It is consists of a reset controller, watchdog timer, LEDs, and 2 IRQ capable
+GPIO blocks.
+
+Required properties:
+- compatible: "keymile,qriox"
+- reg: access on the parent local bus (chip select, offset in chip select, size)
+
+Example:
+
+	board-control@1,0 {
+		compatible = "keymile,qriox";
+		reg = <1 0 0x80>;
+	};

diff --git a/Documentation/devicetree/bindings/mfd/twl4030-power.txt b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
index 8e15ec3..b9ee7b9 100644
--- a/Documentation/devicetree/bindings/mfd/twl4030-power.txt
+++ b/Documentation/devicetree/bindings/mfd/twl4030-power.txt

@@ -5,7 +5,22 @@
 binding only supports the complete shutdown of the system after poweroff.
 
 Required properties:
-- compatible : must be "ti,twl4030-power"
+- compatible : must be one of the following
+	"ti,twl4030-power"
+	"ti,twl4030-power-reset"
+	"ti,twl4030-power-idle"
+	"ti,twl4030-power-idle-osc-off"
+
+The use of ti,twl4030-power-reset is recommended at least on
+3530 that needs a special configuration for warm reset to work.
+
+When using ti,twl4030-power-idle, the TI recommended configuration
+for idle modes is loaded to the tlw4030 PMIC.
+
+When using ti,twl4030-power-idle-osc-off, the TI recommended
+configuration is used with the external oscillator being shut
+down during off-idle. Note that this does not work on all boards
+depending on how the external oscillator is wired.
 
 Optional properties:
 - ti,use_poweroff: With this flag, the chip will initiates an ACTIVE-to-OFF or

diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 9dce540..3c18001 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt

@@ -38,6 +38,8 @@
 - mmc-highspeed-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
 - mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
 - mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
+- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
+- mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
 
 *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
 polarity properties, we have to fix the meaning of the "normal" and "inverted"

diff --git a/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt
new file mode 100644
index 0000000..b638191
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt

@@ -0,0 +1,30 @@
+MOXA ART MMC Host Controller Interface
+
+  Inherits from mmc binding[1].
+
+  [1] Documentation/devicetree/bindings/mmc/mmc.txt
+
+Required properties:
+
+- compatible :	Must be "moxa,moxart-mmc" or "faraday,ftsdc010"
+- reg :		Should contain registers location and length
+- interrupts :	Should contain the interrupt number
+- clocks :	Should contain phandle for the clock feeding the MMC controller
+
+Optional properties:
+
+- dmas :	Should contain two DMA channels, line request number must be 5 for
+		both channels
+- dma-names :	Must be "tx", "rx"
+
+Example:
+
+	mmc: mmc@98e00000 {
+		compatible = "moxa,moxart-mmc";
+		reg = <0x98e00000 0x5C>;
+		interrupts = <5 0>;
+		clocks = <&clk_apb>;
+		dmas =  <&dma 5>,
+			<&dma 5>;
+		dma-names = "tx", "rx";
+	};

diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 8f3f133..2d4a725 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt

@@ -69,10 +69,6 @@
 
 * supports-highspeed: Enables support for high speed cards (up to 50MHz)
 
-* caps2-mmc-hs200-1_8v: Supports mmc HS200 SDR 1.8V mode
-
-* caps2-mmc-hs200-1_2v: Supports mmc HS200 SDR 1.2V mode
-
 * broken-cd: as documented in mmc core bindings.
 
 * vmmc-supply: The phandle to the regulator to use for vmmc.  If this is
@@ -103,7 +99,6 @@
 		clock-freq-min-max = <400000 200000000>;
 		num-slots = <1>;
 		supports-highspeed;
-		caps2-mmc-hs200-1_8v;
 		broken-cd;
 		fifo-depth = <0x80>;
 		card-detect-delay = <200>;

diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
new file mode 100644
index 0000000..8babdaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt

@@ -0,0 +1,33 @@
+* Renesas usdhi6rol0 SD/SDIO host controller
+
+Required properties:
+
+- compatible:	must be
+		"renesas,usdhi6rol0"
+- interrupts:	3 interrupts, named "card detect", "data" and "SDIO" must be
+		specified
+- clocks:	a clock binding for the IMCLK input
+
+Optional properties:
+
+- vmmc-supply:	a phandle of a regulator, supplying Vcc to the card
+- vqmmc-supply:	a phandle of a regulator, supplying VccQ to the card
+
+Additionally any standard mmc bindings from mmc.txt can be used.
+
+Example:
+
+sd0: sd@ab000000 {
+	compatible = "renesas,usdhi6rol0";
+	reg = <0xab000000 0x200>;
+	interrupts = <0 23 0x4
+		      0 24 0x4
+		      0 25 0x4>;
+	interrupt-names = "card detect", "data", "SDIO";
+	bus-width = <4>;
+	max-frequency = <50000000>;
+	cap-power-off-card;
+	clocks = <&imclk>;
+	vmmc-supply = <&vcc_sd0>;
+	vqmmc-supply = <&vccq_sd0>;
+};

diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
new file mode 100644
index 0000000..823d134
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt

@@ -0,0 +1,35 @@
+* Freescale Quad Serial Peripheral Interface(QuadSPI)
+
+Required properties:
+  - compatible : Should be "fsl,vf610-qspi"
+  - reg : the first contains the register location and length,
+          the second contains the memory mapping address and length
+  - reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory"
+  - interrupts : Should contain the interrupt for the device
+  - clocks : The clocks needed by the QuadSPI controller
+  - clock-names : the name of the clocks
+
+Optional properties:
+  - fsl,qspi-has-second-chip: The controller has two buses, bus A and bus B.
+                              Each bus can be connected with two NOR flashes.
+			      Most of the time, each bus only has one NOR flash
+			      connected, this is the default case.
+			      But if there are two NOR flashes connected to the
+			      bus, you should enable this property.
+			      (Please check the board's schematic.)
+
+Example:
+
+qspi0: quadspi@40044000 {
+	compatible = "fsl,vf610-qspi";
+	reg = <0x40044000 0x1000>, <0x20000000 0x10000000>;
+	reg-names = "QuadSPI", "QuadSPI-memory";
+	interrupts = <0 24 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&clks VF610_CLK_QSPI0_EN>,
+		<&clks VF610_CLK_QSPI0>;
+	clock-names = "qspi_en", "qspi";
+
+	flash0: s25fl128s@0 {
+		....
+	};
+};

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index eb05255..65f4f7c 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt

@@ -28,6 +28,8 @@
 		"ham1"		1-bit Hamming ecc code
 		"bch4"		4-bit BCH ecc code
 		"bch8"		8-bit BCH ecc code
+		"bch16"		16-bit BCH ECC code
+		Refer below "How to select correct ECC scheme for your device ?"
 
  - ti,nand-xfer-type:		A string setting the data transfer type. One of:
 
@@ -90,3 +92,46 @@
 		};
 	};
 
+How to select correct ECC scheme for your device ?
+--------------------------------------------------
+Higher ECC scheme usually means better protection against bit-flips and
+increased system lifetime. However, selection of ECC scheme is dependent
+on various other factors also like;
+
+(1) support of built in hardware engines.
+	Some legacy OMAP SoC do not have ELM harware engine, so those SoC cannot
+	support ecc-schemes with hardware error-correction (BCHx_HW). However
+	such SoC can use ecc-schemes with software library for error-correction
+	(BCHx_HW_DETECTION_SW). The error correction capability with software
+	library remains equivalent to their hardware counter-part, but there is
+	slight CPU penalty when too many bit-flips are detected during reads.
+
+(2) Device parameters like OOBSIZE.
+	Other factor which governs the selection of ecc-scheme is oob-size.
+	Higher ECC schemes require more OOB/Spare area to store ECC syndrome,
+	so the device should have enough free bytes available its OOB/Spare
+	area to accomodate ECC for entire page. In general following expression
+	helps in determining if given device can accomodate ECC syndrome:
+	"2 + (PAGESIZE / 512) * ECC_BYTES" >= OOBSIZE"
+	where
+		OOBSIZE		number of bytes in OOB/spare area
+		PAGESIZE	number of bytes in main-area of device page
+		ECC_BYTES	number of ECC bytes generated to protect
+		                512 bytes of data, which is:
+				'3' for HAM1_xx ecc schemes
+				'7' for BCH4_xx ecc schemes
+				'14' for BCH8_xx ecc schemes
+				'26' for BCH16_xx ecc schemes
+
+	Example(a): For a device with PAGESIZE = 2048 and OOBSIZE = 64 and
+		trying to use BCH16 (ECC_BYTES=26) ecc-scheme.
+		Number of ECC bytes per page = (2 + (2048 / 512) * 26) = 106 B
+		which is greater than capacity of NAND device (OOBSIZE=64)
+		Hence, BCH16 cannot be supported on given device. But it can
+		probably use lower ecc-schemes like BCH8.
+
+	Example(b): For a device with PAGESIZE = 2048 and OOBSIZE = 128 and
+		trying to use BCH16 (ECC_BYTES=26) ecc-scheme.
+		Number of ECC bytes per page = (2 + (2048 / 512) * 26) = 106 B
+		which can be accomodate in the OOB/Spare area of this device
+		(OOBSIZE=128). So this device can use BCH16 ecc-scheme.

diff --git a/Documentation/devicetree/bindings/mtd/m25p80.txt b/Documentation/devicetree/bindings/mtd/m25p80.txt
index 6d3d576..4611aa8 100644
--- a/Documentation/devicetree/bindings/mtd/m25p80.txt
+++ b/Documentation/devicetree/bindings/mtd/m25p80.txt

@@ -5,8 +5,8 @@
   representing partitions.
 - compatible : Should be the manufacturer and the name of the chip. Bear in mind
                the DT binding is not Linux-only, but in case of Linux, see the
-               "m25p_ids" table in drivers/mtd/devices/m25p80.c for the list of
-               supported chips.
+               "spi_nor_ids" table in drivers/mtd/spi-nor/spi-nor.c for the list
+               of supported chips.
 - reg : Chip-Select number
 - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
 

diff --git a/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
index 86e0a56..de8b517 100644
--- a/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt

@@ -17,6 +17,14 @@
  - num-cs:			Number of chipselect lines to usw
  - nand-on-flash-bbt: 		boolean to enable on flash bbt option if
 				not present false
+ - nand-ecc-strength:           number of bits to correct per ECC step
+ - nand-ecc-step-size:          number of data bytes covered by a single ECC step
+
+The following ECC strength and step size are currently supported:
+
+ - nand-ecc-strength = <1>, nand-ecc-step-size = <512>
+ - nand-ecc-strength = <4>, nand-ecc-step-size = <512>
+ - nand-ecc-strength = <8>, nand-ecc-step-size = <512>
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
new file mode 100644
index 0000000..d01ed63
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt

@@ -0,0 +1,17 @@
+* AMD 10GbE PHY driver (amd-xgbe-phy)
+
+Required properties:
+- compatible: Should be "amd,xgbe-phy-seattle-v1a" and
+  "ethernet-phy-ieee802.3-c45"
+- reg: Address and length of the register sets for the device
+   - SerDes Rx/Tx registers
+   - SerDes integration registers (1/2)
+   - SerDes integration registers (2/2)
+
+Example:
+	xgbe_phy@e1240800 {
+		compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45";
+		reg = <0 0xe1240800 0 0x00400>,
+		      <0 0xe1250000 0 0x00060>,
+		      <0 0xe1250080 0 0x00004>;
+	};

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
new file mode 100644
index 0000000..ea0c790
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt

@@ -0,0 +1,34 @@
+* AMD 10GbE driver (amd-xgbe)
+
+Required properties:
+- compatible: Should be "amd,xgbe-seattle-v1a"
+- reg: Address and length of the register sets for the device
+   - MAC registers
+   - PCS registers
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the amd-xgbe interrupt
+- clocks: Should be the DMA clock for the amd-xgbe device (used for
+  calculating the correct Rx interrupt watchdog timer value on a DMA
+  channel for coalescing)
+- clock-names: Should be the name of the DMA clock, "dma_clk"
+- phy-handle: See ethernet.txt file in the same directory
+- phy-mode: See ethernet.txt file in the same directory
+
+Optional properties:
+- mac-address: mac address to be assigned to the device. Can be overridden
+  by UEFI.
+
+Example:
+	xgbe@e0700000 {
+		compatible = "amd,xgbe-seattle-v1a";
+		reg = <0 0xe0700000 0 0x80000>,
+		      <0 0xe0780000 0 0x80000>;
+		interrupt-parent = <&gic>;
+		interrupts = <0 325 4>;
+		clocks = <&xgbe_clk>;
+		clock-names = "dma_clk";
+		phy-handle = <&phy>;
+		phy-mode = "xgmii";
+		mac-address = [ 02 a1 a2 a3 a4 a5 ];
+	};

diff --git a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt
index f2febb9..451fef2 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt

@@ -24,7 +24,7 @@
 - fixed-link: When the GENET interface is connected to a MoCA hardware block or
   when operating in a RGMII to RGMII type of connection, or when the MDIO bus is
   voluntarily disabled, this property should be used to describe the "fixed link".
-  See Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for information on
+  See Documentation/devicetree/bindings/net/fixed-link.txt for information on
   the property specifics
 
 Required child nodes:

diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt
new file mode 100644
index 0000000..c183ea9
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt

@@ -0,0 +1,29 @@
+* Broadcom BCM7xxx Ethernet Systemport Controller (SYSTEMPORT)
+
+Required properties:
+- compatible: should be one of "brcm,systemport-v1.00" or "brcm,systemport"
+- reg: address and length of the register set for the device.
+- interrupts: interrupts for the device, first cell must be for the the rx
+  interrupts, and the second cell should be for the transmit queues
+- local-mac-address: Ethernet MAC address (48 bits) of this adapter
+- phy-mode: Should be a string describing the PHY interface to the
+  Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt
+- fixed-link: see Documentation/devicetree/bindings/net/fixed-link.txt for
+  the property specific details
+
+Optional properties:
+- systemport,num-tier2-arb: number of tier 2 arbiters, an integer
+- systemport,num-tier1-arb: number of tier 1 arbiters, an integer
+- systemport,num-txq: number of HW transmit queues, an integer
+- systemport,num-rxq: number of HW receive queues, an integer
+
+Example:
+ethernet@f04a0000 {
+	compatible = "brcm,systemport-v1.00";
+	reg = <0xf04a0000 0x4650>;
+	local-mac-address = [ 00 11 22 33 44 55 ];
+	fixed-link = <0 1 1000 0 0>;
+	phy-mode = "gmii";
+	interrupts = <0x0 0x16 0x0>,
+		<0x0 0x17 0x0>;
+};

diff --git a/Documentation/devicetree/bindings/net/can/xilinx_can.txt b/Documentation/devicetree/bindings/net/can/xilinx_can.txt
new file mode 100644
index 0000000..fe38847
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/xilinx_can.txt

@@ -0,0 +1,44 @@
+Xilinx Axi CAN/Zynq CANPS controller Device Tree Bindings
+---------------------------------------------------------
+
+Required properties:
+- compatible		: Should be "xlnx,zynq-can-1.0" for Zynq CAN
+			  controllers and "xlnx,axi-can-1.00.a" for Axi CAN
+			  controllers.
+- reg			: Physical base address and size of the Axi CAN/Zynq
+			  CANPS registers map.
+- interrupts		: Property with a value describing the interrupt
+			  number.
+- interrupt-parent	: Must be core interrupt controller
+- clock-names		: List of input clock names - "can_clk", "pclk"
+			  (For CANPS), "can_clk" , "s_axi_aclk"(For AXI CAN)
+			  (See clock bindings for details).
+- clocks		: Clock phandles (see clock bindings for details).
+- tx-fifo-depth		: Can Tx fifo depth.
+- rx-fifo-depth		: Can Rx fifo depth.
+
+
+Example:
+
+For Zynq CANPS Dts file:
+	zynq_can_0: can@e0008000 {
+			compatible = "xlnx,zynq-can-1.0";
+			clocks = <&clkc 19>, <&clkc 36>;
+			clock-names = "can_clk", "pclk";
+			reg = <0xe0008000 0x1000>;
+			interrupts = <0 28 4>;
+			interrupt-parent = <&intc>;
+			tx-fifo-depth = <0x40>;
+			rx-fifo-depth = <0x40>;
+		};
+For Axi CAN Dts file:
+	axi_can_0: axi-can@40000000 {
+			compatible = "xlnx,axi-can-1.00.a";
+			clocks = <&clkc 0>, <&clkc 1>;
+			clock-names = "can_clk","s_axi_aclk" ;
+			reg = <0x40000000 0x10000>;
+			interrupt-parent = <&intc>;
+			interrupts = <0 59 1>;
+			tx-fifo-depth = <0x40>;
+			rx-fifo-depth = <0x40>;
+		};

diff --git a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt
index 7ff57a1..764c0c7 100644
--- a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt
+++ b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt

@@ -2,7 +2,9 @@
 -----------------------------------------------
 
 Required properties:
-- compatible		: Should be "ti,am3352-cpsw-phy-sel"
+- compatible		: Should be "ti,am3352-cpsw-phy-sel" for am335x platform and
+			  "ti,dra7xx-cpsw-phy-sel" for dra7xx platform
+			  "ti,am43xx-cpsw-phy-sel" for am43xx platform
 - reg			: physical base address and size of the cpsw
 			  registers map
 - reg-names		: names of the register map given in "reg" node

diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
new file mode 100644
index 0000000..82bf7e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt

@@ -0,0 +1,42 @@
+Fixed link Device Tree binding
+------------------------------
+
+Some Ethernet MACs have a "fixed link", and are not connected to a
+normal MDIO-managed PHY device. For those situations, a Device Tree
+binding allows to describe a "fixed link".
+
+Such a fixed link situation is described by creating a 'fixed-link'
+sub-node of the Ethernet MAC device node, with the following
+properties:
+
+* 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+* 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+* 'pause' (boolean, optional), to indicate that pause should be
+  enabled.
+* 'asym-pause' (boolean, optional), to indicate that asym_pause should
+  be enabled.
+
+Old, deprecated 'fixed-link' binding:
+
+* A 'fixed-link' property in the Ethernet MAC node, with 5 cells, of the
+  form <a b c d e> with the following accepted values:
+  - a: emulated PHY ID, choose any but but unique to the all specified
+    fixed-links, from 0 to 31
+  - b: duplex configuration: 0 for half duplex, 1 for full duplex
+  - c: link speed in Mbits/sec, accepted values are: 10, 100 and 1000
+  - d: pause configuration: 0 for no pause, 1 for pause
+  - e: asymmetric pause configuration: 0 for no asymmetric pause, 1 for
+    asymmetric pause
+
+Example:
+
+ethernet@0 {
+	...
+	fixed-link {
+	      speed = <1000>;
+	      full-duplex;
+	};
+	...
+};

diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index 737cdef..be6ea89 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt

@@ -42,10 +42,7 @@
     interrupt.  For TSEC and eTSEC devices, the first interrupt is
     transmit, the second is receive, and the third is error.
   - phy-handle : See ethernet.txt file in the same directory.
-  - fixed-link : <a b c d e> where a is emulated phy id - choose any,
-    but unique to the all specified fixed-links, b is duplex - 0 half,
-    1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no
-    pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause.
+  - fixed-link : See fixed-link.txt in the same directory.
   - phy-connection-type : See ethernet.txt file in the same directory.
     This property is only really needed if the connection is of type
     "rgmii-id", as all other connection types are detected by hardware.

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
new file mode 100644
index 0000000..75d398b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt

@@ -0,0 +1,36 @@
+Hisilicon hix5hd2 gmac controller
+
+Required properties:
+- compatible: should be "hisilicon,hix5hd2-gmac".
+- reg: specifies base physical address(s) and size of the device registers.
+  The first region is the MAC register base and size.
+  The second region is external interface control register.
+- interrupts: should contain the MAC interrupt.
+- #address-cells: must be <1>.
+- #size-cells: must be <0>.
+- phy-mode: see ethernet.txt [1].
+- phy-handle: see ethernet.txt [1].
+- mac-address: see ethernet.txt [1].
+- clocks: clock phandle and specifier pair.
+
+- PHY subnode: inherits from phy binding [2]
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+	gmac0: ethernet@f9840000 {
+		compatible = "hisilicon,hix5hd2-gmac";
+		reg = <0xf9840000 0x1000>,<0xf984300c 0x4>;
+		interrupts = <0 71 4>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy-mode = "mii";
+		phy-handle = <&phy2>;
+		mac-address = [00 00 00 00 00 00];
+		clocks = <&clock HIX5HD2_MAC0_CLK>;
+
+		phy2: ethernet-phy@2 {
+			reg = <2>;
+		};
+	};

diff --git a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
new file mode 100644
index 0000000..d3bbdded
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt

@@ -0,0 +1,23 @@
+* AT86RF230 IEEE 802.15.4 *
+
+Required properties:
+  - compatible:		should be "atmel,at86rf230", "atmel,at86rf231",
+			"atmel,at86rf233" or "atmel,at86rf212"
+  - spi-max-frequency:	maximal bus speed, should be set to 7500000 depends
+			sync or async operation mode
+  - reg:		the chipselect index
+  - interrupts:		the interrupt generated by the device
+
+Optional properties:
+  - reset-gpio:		GPIO spec for the rstn pin
+  - sleep-gpio:		GPIO spec for the slp_tr pin
+
+Example:
+
+	at86rf231@0 {
+		compatible = "atmel,at86rf231";
+		spi-max-frequency = <7500000>;
+		reg = <0>;
+		interrupts = <19 1>;
+		interrupt-parent = <&gpio3>;
+	};

diff --git a/Documentation/devicetree/bindings/net/micrel-ks8851.txt b/Documentation/devicetree/bindings/net/micrel-ks8851.txt
index d54d0cc..bbdf9a7 100644
--- a/Documentation/devicetree/bindings/net/micrel-ks8851.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ks8851.txt

@@ -1,9 +1,18 @@
-Micrel KS8851 Ethernet mac
+Micrel KS8851 Ethernet mac (MLL)
 
 Required properties:
-- compatible = "micrel,ks8851-ml" of parallel interface
+- compatible = "micrel,ks8851-mll" of parallel interface
 - reg : 2 physical address and size of registers for data and command
 - interrupts : interrupt connection
 
+Micrel KS8851 Ethernet mac (SPI)
+
+Required properties:
+- compatible = "micrel,ks8851" or the deprecated "ks8851"
+- reg : chip select number
+- interrupts : interrupt connection
+
 Optional properties:
-- vdd-supply:	supply for Ethernet mac
+- vdd-supply: analog 3.3V supply for Ethernet mac
+- vdd-io-supply: digital 1.8V IO supply for Ethernet mac
+- reset-gpios: reset_n input pin

diff --git a/Documentation/devicetree/bindings/net/micrel-ksz9021.txt b/Documentation/devicetree/bindings/net/micrel-ksz9021.txt
deleted file mode 100644
index 997a63f..0000000
--- a/Documentation/devicetree/bindings/net/micrel-ksz9021.txt
+++ /dev/null

@@ -1,49 +0,0 @@
-Micrel KSZ9021 Gigabit Ethernet PHY
-
-Some boards require special tuning values, particularly when it comes to
-clock delays.  You can specify clock delay values by adding
-micrel-specific properties to an Ethernet OF device node.
-
-All skew control options are specified in picoseconds.  The minimum
-value is 0, and the maximum value is 3000.
-
-Optional properties:
- - rxc-skew-ps : Skew control of RXC pad
- - rxdv-skew-ps : Skew control of RX CTL pad
- - txc-skew-ps : Skew control of TXC pad
- - txen-skew-ps : Skew control of TX_CTL pad
- - rxd0-skew-ps : Skew control of RX data 0 pad
- - rxd1-skew-ps : Skew control of RX data 1 pad
- - rxd2-skew-ps : Skew control of RX data 2 pad
- - rxd3-skew-ps : Skew control of RX data 3 pad
- - txd0-skew-ps : Skew control of TX data 0 pad
- - txd1-skew-ps : Skew control of TX data 1 pad
- - txd2-skew-ps : Skew control of TX data 2 pad
- - txd3-skew-ps : Skew control of TX data 3 pad
-
-Examples:
-
-	/* Attach to an Ethernet device with autodetected PHY */
-	&enet {
-		rxc-skew-ps = <3000>;
-		rxdv-skew-ps = <0>;
-		txc-skew-ps = <3000>;
-		txen-skew-ps = <0>;
-		status = "okay";
-	};
-
-	/* Attach to an explicitly-specified PHY */
-	mdio {
-		phy0: ethernet-phy@0 {
-			rxc-skew-ps = <3000>;
-			rxdv-skew-ps = <0>;
-			txc-skew-ps = <3000>;
-			txen-skew-ps = <0>;
-			reg = <0>;
-		};
-	};
-	ethernet@70000 {
-		status = "okay";
-		phy = <&phy0>;
-		phy-mode = "rgmii-id";
-	};

diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
new file mode 100644
index 0000000..692076f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt

@@ -0,0 +1,83 @@
+Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY
+
+Some boards require special tuning values, particularly when it comes to
+clock delays. You can specify clock delay values by adding
+micrel-specific properties to an Ethernet OF device node.
+
+Note that these settings are applied after any phy-specific fixup from
+phy_fixup_list (see phy_init_hw() from drivers/net/phy/phy_device.c),
+and therefore may overwrite them.
+
+KSZ9021:
+
+  All skew control options are specified in picoseconds. The minimum
+  value is 0, the maximum value is 3000, and it is incremented by 200ps
+  steps.
+
+  Optional properties:
+
+    - rxc-skew-ps : Skew control of RXC pad
+    - rxdv-skew-ps : Skew control of RX CTL pad
+    - txc-skew-ps : Skew control of TXC pad
+    - txen-skew-ps : Skew control of TX CTL pad
+    - rxd0-skew-ps : Skew control of RX data 0 pad
+    - rxd1-skew-ps : Skew control of RX data 1 pad
+    - rxd2-skew-ps : Skew control of RX data 2 pad
+    - rxd3-skew-ps : Skew control of RX data 3 pad
+    - txd0-skew-ps : Skew control of TX data 0 pad
+    - txd1-skew-ps : Skew control of TX data 1 pad
+    - txd2-skew-ps : Skew control of TX data 2 pad
+    - txd3-skew-ps : Skew control of TX data 3 pad
+
+KSZ9031:
+
+  All skew control options are specified in picoseconds. The minimum
+  value is 0, and the maximum is property-dependent. The increment
+  step is 60ps.
+
+  Optional properties:
+
+    Maximum value of 1860:
+
+      - rxc-skew-ps : Skew control of RX clock pad
+      - txc-skew-ps : Skew control of TX clock pad
+
+    Maximum value of 900:
+
+      - rxdv-skew-ps : Skew control of RX CTL pad
+      - txen-skew-ps : Skew control of TX CTL pad
+      - rxd0-skew-ps : Skew control of RX data 0 pad
+      - rxd1-skew-ps : Skew control of RX data 1 pad
+      - rxd2-skew-ps : Skew control of RX data 2 pad
+      - rxd3-skew-ps : Skew control of RX data 3 pad
+      - txd0-skew-ps : Skew control of TX data 0 pad
+      - txd1-skew-ps : Skew control of TX data 1 pad
+      - txd2-skew-ps : Skew control of TX data 2 pad
+      - txd3-skew-ps : Skew control of TX data 3 pad
+
+Examples:
+
+	/* Attach to an Ethernet device with autodetected PHY */
+	&enet {
+		rxc-skew-ps = <3000>;
+		rxdv-skew-ps = <0>;
+		txc-skew-ps = <3000>;
+		txen-skew-ps = <0>;
+		status = "okay";
+	};
+
+	/* Attach to an explicitly-specified PHY */
+	mdio {
+		phy0: ethernet-phy@0 {
+			rxc-skew-ps = <3000>;
+			rxdv-skew-ps = <0>;
+			txc-skew-ps = <3000>;
+			txen-skew-ps = <0>;
+			reg = <0>;
+		};
+	};
+	ethernet@70000 {
+		status = "okay";
+		phy = <&phy0>;
+		phy-mode = "rgmii-id";
+	};

diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt
new file mode 100644
index 0000000..dab69f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt

@@ -0,0 +1,35 @@
+* NXP Semiconductors PN544 NFC Controller
+
+Required properties:
+- compatible: Should be "nxp,pn544-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- enable-gpios: Output GPIO pin used for enabling/disabling the PN544
+- firmware-gpios: Output GPIO pin used to enter firmware download mode
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with PN544 on I2C2):
+
+&i2c2 {
+
+	status = "okay";
+
+	pn544: pn544@28 {
+
+		compatible = "nxp,pn544-i2c";
+
+		reg = <0x28>;
+		clock-frequency = <400000>;
+
+		interrupt-parent = <&gpio1>;
+		interrupts = <17 GPIO_ACTIVE_HIGH>;
+
+		enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+		firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfca.txt b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt
new file mode 100644
index 0000000..e4faa2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt

@@ -0,0 +1,33 @@
+* STMicroelectronics SAS. ST21NFCA NFC Controller
+
+Required properties:
+- compatible: Should be "st,st21nfca_i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- enable-gpios: Output GPIO pin used for enabling/disabling the ST21NFCA
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBoard xM with ST21NFCA on I2C2):
+
+&i2c2 {
+
+	status = "okay";
+
+	st21nfca: st21nfca@1 {
+
+		compatible = "st,st21nfca_i2c";
+
+		reg = <0x01>;
+		clock-frequency = <400000>;
+
+		interrupt-parent = <&gpio5>;
+		interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
+
+		enable-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
index 8dd3ef7..1e43613 100644
--- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt

@@ -12,6 +12,7 @@
 Optional SoC Specific Properties:
 - pinctrl-names: Contains only one value - "default".
 - pintctrl-0: Specifies the pin control groups used for this controller.
+- autosuspend-delay: Specify autosuspend delay in milliseconds.
 
 Example (for ARM-based BeagleBone with TRF7970A on SPI1):
 
@@ -29,6 +30,7 @@
 		ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>,
 				  <&gpio2 5 GPIO_ACTIVE_LOW>;
 		vin-supply = <&ldo3_reg>;
+		autosuspend-delay = <30000>;
 		status = "okay";
 	};
 };

diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt
new file mode 100644
index 0000000..334eca2
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/via-rhine.txt

@@ -0,0 +1,17 @@
+* VIA Rhine 10/100 Network Controller
+
+Required properties:
+- compatible : Should be "via,vt8500-rhine" for integrated
+	Rhine controllers found in VIA VT8500, WonderMedia WM8950
+	and similar. These are listed as 1106:3106 rev. 0x84 on the
+	virtual PCI bus under vendor-provided kernels
+- reg : Address and length of the io space
+- interrupts : Should contain the controller interrupt line
+
+Examples:
+
+ethernet@d8004000 {
+	compatible = "via,vt8500-rhine";
+	reg = <0xd8004000 0x100>;
+	interrupts = <10>;
+};

diff --git a/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt b/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt
new file mode 100644
index 0000000..7443b7c
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt

@@ -0,0 +1,7 @@
+AU Optronics Corporation 13.3" WXGA (1366x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,b133xtn01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

diff --git a/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt b/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt
new file mode 100644
index 0000000..4903d7b
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt

@@ -0,0 +1,7 @@
+Emerging Display Technology Corp. 5.7" VGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,et057090dhu"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

diff --git a/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt b/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt
new file mode 100644
index 0000000..20cb38e
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt

@@ -0,0 +1,10 @@
+Emerging Display Technology Corp. ET070080DH6 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,et070080dh6"
+
+This panel is the same as ETM0700G0DH6 except for the touchscreen.
+ET070080DH6 is the model with resistive touch.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

diff --git a/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt b/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt
new file mode 100644
index 0000000..ee4b180
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt

@@ -0,0 +1,10 @@
+Emerging Display Technology Corp. ETM0700G0DH6 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,etm0700g0dh6"
+
+This panel is the same as ET070080DH6 except for the touchscreen.
+ETM0700G0DH6 is the model with capacitive multitouch.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
index d6fae13..d0d15ee 100644
--- a/Documentation/devicetree/bindings/pci/designware-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt

@@ -1,15 +1,7 @@
 * Synopsys Designware PCIe interface
 
 Required properties:
-- compatible: should contain "snps,dw-pcie" to identify the
-	core, plus an identifier for the specific instance, such
-	as "samsung,exynos5440-pcie" or "fsl,imx6q-pcie".
-- reg: base addresses and lengths of the pcie controller,
-	the phy controller, additional register for the phy controller.
-- interrupts: interrupt values for level interrupt,
-	pulse interrupt, special interrupt.
-- clocks: from common clock binding: handle to pci clock.
-- clock-names: from common clock binding: should be "pcie" and "pcie_bus".
+- compatible: should contain "snps,dw-pcie" to identify the core.
 - #address-cells: set to <3>
 - #size-cells: set to <2>
 - device_type: set to "pci"
@@ -19,65 +11,11 @@
 	to define the mapping of the PCIe interface to interrupt
 	numbers.
 - num-lanes: number of lanes to use
+- clocks: Must contain an entry for each entry in clock-names.
+	See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+	- "pcie"
+	- "pcie_bus"
 
 Optional properties:
 - reset-gpio: gpio pin number of power good signal
-
-Optional properties for fsl,imx6q-pcie
-- power-on-gpio: gpio pin number of power-enable signal
-- wake-up-gpio: gpio pin number of incoming wakeup signal
-- disable-gpio: gpio pin number of outgoing rfkill/endpoint disable signal
-
-Example:
-
-SoC specific DT Entry:
-
-	pcie@290000 {
-		compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
-		reg = <0x290000 0x1000
-			0x270000 0x1000
-			0x271000 0x40>;
-		interrupts = <0 20 0>, <0 21 0>, <0 22 0>;
-		clocks = <&clock 28>, <&clock 27>;
-		clock-names = "pcie", "pcie_bus";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		device_type = "pci";
-		ranges = <0x00000800 0 0x40000000 0x40000000 0 0x00001000   /* configuration space */
-			  0x81000000 0 0	  0x40001000 0 0x00010000   /* downstream I/O */
-			  0x82000000 0 0x40011000 0x40011000 0 0x1ffef000>; /* non-prefetchable memory */
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0x0 0 &gic 53>;
-		num-lanes = <4>;
-	};
-
-	pcie@2a0000 {
-		compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
-		reg = <0x2a0000 0x1000
-			0x272000 0x1000
-			0x271040 0x40>;
-		interrupts = <0 23 0>, <0 24 0>, <0 25 0>;
-		clocks = <&clock 29>, <&clock 27>;
-		clock-names = "pcie", "pcie_bus";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		device_type = "pci";
-		ranges = <0x00000800 0 0x60000000 0x60000000 0 0x00001000   /* configuration space */
-			  0x81000000 0 0	  0x60001000 0 0x00010000   /* downstream I/O */
-			  0x82000000 0 0x60011000 0x60011000 0 0x1ffef000>; /* non-prefetchable memory */
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0x0 0 &gic 56>;
-		num-lanes = <4>;
-	};
-
-Board specific DT Entry:
-
-	pcie@290000 {
-		reset-gpio = <&pin_ctrl 5 0>;
-	};
-
-	pcie@2a0000 {
-		reset-gpio = <&pin_ctrl 22 0>;
-	};

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
new file mode 100644
index 0000000..9455fd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt

@@ -0,0 +1,38 @@
+* Freescale i.MX6 PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "fsl,imx6q-pcie"
+- reg: base addresse and length of the pcie controller
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+  entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+	- "msi": The interrupt that is asserted when an MSI is received
+- clock-names: Must include the following additional entries:
+	- "pcie_phy"
+
+Example:
+
+	pcie@0x01000000 {
+		compatible = "fsl,imx6q-pcie", "snps,dw-pcie";
+		reg = <0x01ffc000 0x4000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x00000800 0 0x01f00000 0x01f00000 0 0x00080000
+			  0x81000000 0 0          0x01f80000 0 0x00010000
+			  0x82000000 0 0x01000000 0x01000000 0 0x00f00000>;
+		num-lanes = <1>;
+		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "msi";
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 0x7>;
+		interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+		                <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+		                <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+		                <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&clks 144>, <&clks 206>, <&clks 189>;
+		clock-names = "pcie", "pcie_bus", "pcie_phy";
+	};

diff --git a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
new file mode 100644
index 0000000..4f9d23d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt

@@ -0,0 +1,65 @@
+* Samsung Exynos 5440 PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "samsung,exynos5440-pcie"
+- reg: base addresses and lengths of the pcie controller,
+	the phy controller, additional register for the phy controller.
+- interrupts: A list of interrupt outputs for level interrupt,
+	pulse interrupt, special interrupt.
+
+Example:
+
+SoC specific DT Entry:
+
+	pcie@290000 {
+		compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
+		reg = <0x290000 0x1000
+			0x270000 0x1000
+			0x271000 0x40>;
+		interrupts = <0 20 0>, <0 21 0>, <0 22 0>;
+		clocks = <&clock 28>, <&clock 27>;
+		clock-names = "pcie", "pcie_bus";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x00000800 0 0x40000000 0x40000000 0 0x00001000   /* configuration space */
+			  0x81000000 0 0	  0x40001000 0 0x00010000   /* downstream I/O */
+			  0x82000000 0 0x40011000 0x40011000 0 0x1ffef000>; /* non-prefetchable memory */
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+		num-lanes = <4>;
+	};
+
+	pcie@2a0000 {
+		compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
+		reg = <0x2a0000 0x1000
+			0x272000 0x1000
+			0x271040 0x40>;
+		interrupts = <0 23 0>, <0 24 0>, <0 25 0>;
+		clocks = <&clock 29>, <&clock 27>;
+		clock-names = "pcie", "pcie_bus";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x00000800 0 0x60000000 0x60000000 0 0x00001000   /* configuration space */
+			  0x81000000 0 0	  0x60001000 0 0x00010000   /* downstream I/O */
+			  0x82000000 0 0x60011000 0x60011000 0 0x1ffef000>; /* non-prefetchable memory */
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+		num-lanes = <4>;
+	};
+
+Board specific DT Entry:
+
+	pcie@290000 {
+		reset-gpio = <&pin_ctrl 5 0>;
+	};
+
+	pcie@2a0000 {
+		reset-gpio = <&pin_ctrl 22 0>;
+	};

diff --git a/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt b/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt
new file mode 100644
index 0000000..db93921
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt

@@ -0,0 +1,54 @@
+
+IBM Akebono board device tree
+=============================
+
+The IBM Akebono board is a development board for the PPC476GTR SoC.
+
+0) The root node
+
+   Required properties:
+
+   - model : "ibm,akebono".
+   - compatible : "ibm,akebono" , "ibm,476gtr".
+
+1.a) The Secure Digital Host Controller Interface (SDHCI) node
+
+  Represent the Secure Digital Host Controller Interfaces.
+
+  Required properties:
+
+   - compatible : should be "ibm,476gtr-sdhci","generic-sdhci".
+   - reg : should contain the SDHCI registers location and length.
+   - interrupt-parent : a phandle for the interrupt controller.
+   - interrupts : should contain the SDHCI interrupt.
+
+1.b) The Advanced Host Controller Interface (AHCI) SATA node
+
+  Represents the advanced host controller SATA interface.
+
+  Required properties:
+
+   - compatible : should be "ibm,476gtr-ahci".
+   - reg : should contain the AHCI registers location and length.
+   - interrupt-parent : a phandle for the interrupt controller.
+   - interrupts : should contain the AHCI interrupt.
+
+1.c) The FPGA node
+
+  The Akebono board stores some board information such as the revision
+  number in an FPGA which is represented by this node.
+
+  Required properties:
+
+   - compatible : should be "ibm,akebono-fpga".
+   - reg : should contain the FPGA registers location and length.
+
+1.d) The AVR node
+
+  The Akebono board has an Atmel AVR microprocessor attached to the I2C
+  bus as a power controller for the board.
+
+  Required properties:
+
+   - compatible : should be "ibm,akebono-avr".
+   - reg : should contain the I2C bus address for the AVR.

diff --git a/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt b/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt
new file mode 100644
index 0000000..c737c83
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt

@@ -0,0 +1,19 @@
+
+ppc476gtr High Speed Serial Assist (HSTA) node
+==============================================
+
+The 476gtr SoC contains a high speed serial assist module attached
+between the plb4 and plb6 system buses to provide high speed data
+transfer between memory and system peripherals as well as support for
+PCI message signalled interrupts.
+
+Currently only the MSI support is used by Linux using the following
+device tree entries:
+
+Require properties:
+- compatible		: "ibm,476gtr-hsta-msi", "ibm,hsta-msi"
+- reg			: register mapping for the HSTA MSI space
+- interrupt-parent	: parent controller for mapping interrupts
+- interrupts		: ordered interrupt mapping for each MSI in the register
+			  space. The first interrupt should be associated with a
+			  register offset of 0x00, the second to 0x10, etc.

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/board.txt b/Documentation/devicetree/bindings/powerpc/fsl/board.txt
index 380914e..700dec4 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/board.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/board.txt

@@ -67,3 +67,20 @@
 			gpio-controller;
 		};
 	};
+
+* Freescale on-board FPGA connected on I2C bus
+
+Some Freescale boards like BSC9132QDS have on board FPGA connected on
+the i2c bus.
+
+Required properties:
+- compatible: Should be a board-specific string followed by a string
+  indicating the type of FPGA.  Example:
+	"fsl,<board>-fpga", "fsl,fpga-qixis-i2c"
+- reg: Should contain the address of the FPGA
+
+Example:
+	fpga: fpga@66 {
+		compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
+		reg = <0x66>;
+	};

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt b/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt
new file mode 100644
index 0000000..454da7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt

@@ -0,0 +1,46 @@
+Freescale CoreNet Coherency Fabric(CCF) Device Tree Binding
+
+DESCRIPTION
+
+The CoreNet coherency fabric is a fabric-oriented, connectivity infrastructure
+that enables the implementation of coherent, multicore systems.
+
+Required properties:
+
+- compatible: <string list>
+		fsl,corenet1-cf - CoreNet coherency fabric version 1.
+		Example chips: T4240, B4860
+
+		fsl,corenet2-cf - CoreNet coherency fabric version 2.
+		Example chips: P5040, P5020, P4080, P3041, P2041
+
+		fsl,corenet-cf - Used to represent the common registers
+		between CCF version 1 and CCF version 2.  This compatible
+		is retained for compatibility reasons, as it was already
+		used for both CCF version 1 chips and CCF version 2
+		chips.  It should be specified after either
+		"fsl,corenet1-cf" or "fsl,corenet2-cf".
+
+- reg: <prop-encoded-array>
+		A standard property. Represents the CCF registers.
+
+- interrupts: <prop-encoded-array>
+		Interrupt mapping for CCF error interrupt.
+
+- fsl,ccf-num-csdids: <u32>
+		Specifies the number of Coherency Subdomain ID Port Mapping
+		Registers that are supported by the CCF.
+
+- fsl,ccf-num-snoopids: <u32>
+		Specifies the number of Snoop ID Port Mapping Registers that
+		are supported by CCF.
+
+Example:
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt
index 922c30a..f8cd239 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt

@@ -20,3 +20,14 @@
 	a property named fsl,eref-[CAT], where [CAT] is the abbreviated category
 	name with all uppercase letters converted to lowercase, indicates that
 	the category is supported by the implementation.
+
+    - fsl,portid-mapping
+	Usage: optional
+	Value type: <u32>
+	Definition: The Coherency Subdomain ID Port Mapping Registers and
+	Snoop ID Port Mapping registers, which are part of the CoreNet
+	Coherency fabric (CCF), provide a CoreNet Coherency Subdomain
+	ID/CoreNet Snoop ID to cpu mapping functions.  Certain bits from
+	these registers should be set if the coresponding CPU should be
+	snooped.  This property defines a bitmask which selects the bit
+	that should be set if this cpu should be snooped.

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt b/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt
index 1f5e329..c2b2899 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt

@@ -34,6 +34,15 @@
 		  for legacy drivers.
 - interrupt-parent : <phandle>
 		  Phandle to interrupt controller
+- fsl,portid-mapping : <u32>
+		  The Coherency Subdomain ID Port Mapping Registers and
+		  Snoop ID Port Mapping registers, which are part of the
+		  CoreNet Coherency fabric (CCF), provide a CoreNet
+		  Coherency Subdomain ID/CoreNet Snoop ID to pamu mapping
+		  functions.  Certain bits from these registers should be
+		  set if PAMUs should be snooped.  This property defines
+		  a bitmask which selects the bits that should be set if
+		  PAMUs should be snooped.
 
 Child nodes:
 
@@ -88,6 +97,7 @@
 		compatible = "fsl,pamu-v1.0", "fsl,pamu";
 		reg = <0x20000 0x5000>;
 		ranges = <0 0x20000 0x5000>;
+		fsl,portid-mapping = <0xf80000>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 		interrupts = <

diff --git a/Documentation/devicetree/bindings/pwm/bcm-kona-pwm.txt b/Documentation/devicetree/bindings/pwm/bcm-kona-pwm.txt
new file mode 100644
index 0000000..8eae9fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/bcm-kona-pwm.txt

@@ -0,0 +1,21 @@
+Broadcom Kona PWM controller device tree bindings
+
+This controller has 6 channels.
+
+Required Properties :
+- compatible: should contain "brcm,kona-pwm"
+- reg: physical base address and length of the controller's registers
+- clocks: phandle + clock specifier pair for the external clock
+- #pwm-cells: Should be 3. See pwm.txt in this directory for a
+  description of the cells format.
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
+
+Example:
+
+pwm: pwm@3e01a000 {
+	compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
+	reg = <0x3e01a000 0xc4>;
+	clocks = <&pwm_clk>;
+	#pwm-cells = <3>;
+};

diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
index fff93d5..4cf0249 100644
--- a/Documentation/devicetree/bindings/thermal/armada-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt

@@ -1,11 +1,21 @@
-* Marvell Armada 370/XP thermal management
+* Marvell Armada 370/375/380/XP thermal management
 
 Required properties:
 
 - compatible:	Should be set to one of the following:
 		marvell,armada370-thermal
+		marvell,armada375-thermal
+		marvell,armada375-z1-thermal
+		marvell,armada380-thermal
 		marvell,armadaxp-thermal
 
+		Note: As the name suggests, "marvell,armada375-z1-thermal"
+		applies for the SoC Z1 stepping only. On such stepping
+		some quirks need to be done and the register offset differs
+		from the one in the A0 stepping.
+		The operating system may auto-detect the SoC stepping and
+		update the compatible and register offsets at runtime.
+
 - reg:		Device's register space.
 		Two entries are expected, see the examples below.
 		The first one is required for the sensor register;

diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
index 284f530..c949092 100644
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt

@@ -6,16 +6,35 @@
 	       "samsung,exynos4412-tmu"
 	       "samsung,exynos4210-tmu"
 	       "samsung,exynos5250-tmu"
+	       "samsung,exynos5260-tmu"
+	       "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
+	       "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
+			Exynos5420 (Must pass triminfo base and triminfo clock)
 	       "samsung,exynos5440-tmu"
 - interrupt-parent : The phandle for the interrupt controller
 - reg : Address range of the thermal registers. For soc's which has multiple
 	instances of TMU and some registers are shared across all TMU's like
 	interrupt related then 2 set of register has to supplied. First set
-	belongs	to each instance of TMU and second set belongs to common TMU
-	registers.
+	belongs	to register set of TMU instance and second set belongs to
+	registers shared with the TMU instance.
+
+  NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
+	channels 2, 3 and 4
+	Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
+	register, also provide clock to access that base.
+
+	TRIMINFO at 0x1006c000 contains data for TMU channel 3
+	TRIMINFO at 0x100a0000 contains data for TMU channel 4
+	TRIMINFO at 0x10068000 contains data for TMU channel 2
+
 - interrupts : Should contain interrupt for thermal system
-- clocks : The main clock for TMU device
+- clocks : The main clocks for TMU device
+	-- 1. operational clock for TMU channel
+	-- 2. optional clock to access the shared registers of TMU channel
 - clock-names : Thermal system clock name
+	-- "tmu_apbif" operational clock for current TMU channel
+	-- "tmu_triminfo_apbif" clock to access the shared triminfo register
+		for current TMU channel
 - vtmu-supply: This entry is optional and provides the regulator node supplying
 		voltage to TMU. If needed this entry can be placed inside
 		board/platform specific dts file.
@@ -43,6 +62,31 @@
 		clock-names = "tmu_apbif";
 	};
 
+Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register")
+	tmu_cpu2: tmu@10068000 {
+		compatible = "samsung,exynos5420-tmu-ext-triminfo";
+		reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+		interrupts = <0 184 0>;
+		clocks = <&clock 318>, <&clock 318>;
+		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+	};
+
+	tmu_cpu3: tmu@1006c000 {
+		compatible = "samsung,exynos5420-tmu-ext-triminfo";
+		reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
+		interrupts = <0 185 0>;
+		clocks = <&clock 318>, <&clock 319>;
+		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+	};
+
+	tmu_gpu: tmu@100a0000 {
+		compatible = "samsung,exynos5420-tmu-ext-triminfo";
+		reg = <0x100a0000 0x100>, <0x10068000 0x4>;
+		interrupts = <0 215 0>;
+		clocks = <&clock 319>, <&clock 318>;
+		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+	};
+
 Note: For multi-instance tmu each instance should have an alias correctly
 numbered in "aliases" node.
 

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 5261271..4d7f375 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt

@@ -142,3 +142,4 @@
 xes	Extreme Engineering Solutions (X-ES)
 xlnx	Xilinx
 zyxel	ZyXEL Communications Corp.
+zarlink	Zarlink Semiconductor

diff --git a/Documentation/devicetree/bindings/video/exynos_dp.txt b/Documentation/devicetree/bindings/video/exynos_dp.txt
index 57ccdde..53dbccf 100644
--- a/Documentation/devicetree/bindings/video/exynos_dp.txt
+++ b/Documentation/devicetree/bindings/video/exynos_dp.txt

@@ -62,6 +62,10 @@
 	-hsync-active-high:
 		HSYNC polarity configuration.
 			High if defined, Low if not defined
+	-samsung,hpd-gpio:
+		Hotplug detect GPIO.
+			Indicates which GPIO should be used for hotplug
+			detection
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/video/exynos_hdmi.txt b/Documentation/devicetree/bindings/video/exynos_hdmi.txt
index f9187a2..1fd8cf9 100644
--- a/Documentation/devicetree/bindings/video/exynos_hdmi.txt
+++ b/Documentation/devicetree/bindings/video/exynos_hdmi.txt

@@ -5,6 +5,7 @@
 	1) "samsung,exynos5-hdmi" <DEPRECATED>
 	2) "samsung,exynos4210-hdmi"
 	3) "samsung,exynos4212-hdmi"
+	4) "samsung,exynos5420-hdmi"
 - reg: physical base address of the hdmi and length of memory mapped
 	region.
 - interrupts: interrupt number to the cpu.
@@ -27,6 +28,7 @@
 	"hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy" and "mout_hdmi".
 - ddc: phandle to the hdmi ddc node
 - phy: phandle to the hdmi phy node
+- samsung,syscon-phandle: phandle for system controller node for PMU.
 
 Example:
 
@@ -37,4 +39,5 @@
 		hpd-gpio = <&gpx3 7 1>;
 		ddc = <&hdmi_ddc_node>;
 		phy = <&hdmi_phy_node>;
+		samsung,syscon-phandle = <&pmu_system_controller>;
 	};

diff --git a/Documentation/devicetree/bindings/watchdog/marvel.txt b/Documentation/devicetree/bindings/watchdog/marvel.txt
index de11eb4..97223fd 100644
--- a/Documentation/devicetree/bindings/watchdog/marvel.txt
+++ b/Documentation/devicetree/bindings/watchdog/marvel.txt

@@ -5,11 +5,18 @@
 - Compatibility : "marvell,orion-wdt"
 		  "marvell,armada-370-wdt"
 		  "marvell,armada-xp-wdt"
+		  "marvell,armada-375-wdt"
+		  "marvell,armada-380-wdt"
 
 - reg		: Should contain two entries: first one with the
 		  timer control address, second one with the
 		  rstout enable address.
 
+For "marvell,armada-375-wdt" and "marvell,armada-380-wdt":
+
+- reg		: A third entry is mandatory and should contain the
+                  shared mask/unmask RSTOUT address.
+
 Optional properties:
 
 - interrupts	: Contains the IRQ for watchdog expiration

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 8947255..1525e30 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt

@@ -318,3 +318,8 @@
   devm_gpiod_get_optional()
   devm_gpiod_get_index_optional()
   devm_gpiod_put()
+
+MDIO
+  devm_mdiobus_alloc()
+  devm_mdiobus_alloc_size()
+  devm_mdiobus_free()

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index eba7901..b18dd17 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking

@@ -196,8 +196,7 @@
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
 				unsigned long *);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
@@ -431,6 +430,8 @@
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 25311e11..51afba1 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt

@@ -461,11 +461,11 @@
   # of blocks in level #n = |
                             `- 4, Otherwise
 
-                             ,- 2^ (n + dir_level),
-			     |            if n < MAX_DIR_HASH_DEPTH / 2,
+                             ,- 2^(n + dir_level),
+			     |        if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
   # of buckets in level #n = |
-                             `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
-			                  Otherwise
+                             `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+			              Otherwise
 
 When F2FS finds a file name in a directory, at first a hash value of the file
 name is calculated. Then, F2FS scans the hash table in level #0 to find the

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index b930ad0..c49cd7e 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt

@@ -176,7 +176,5 @@
   ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
   fail to live up to the promise we made in CREATE_SESSION fore channel
   negotiation.
-* No more than one read-like operation allowed per compound; encoding
-  replies that cross page boundaries (except for read data) not handled.
 
 See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 617f6d7..a1d0d7a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt

@@ -589,8 +589,7 @@
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
 	/* migrate the contents of a page to the specified target */
@@ -807,6 +806,8 @@
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -837,11 +838,15 @@
 
   read: called by read(2) and related system calls
 
-  aio_read: called by io_submit(2) and other asynchronous I/O operations
+  aio_read: vectored, possibly asynchronous read
+
+  read_iter: possibly asynchronous read with iov_iter as destination
 
   write: called by write(2) and related system calls
 
-  aio_write: called by io_submit(2) and other asynchronous I/O operations
+  aio_write: vectored, possibly asynchronous write
+
+  write_iter: possibly asynchronous write with iov_iter as source
 
   iterate: called when the VFS needs to read the directory contents
 

diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1
new file mode 100644
index 0000000..6b1e054
--- /dev/null
+++ b/Documentation/hwmon/shtc1

@@ -0,0 +1,43 @@
+Kernel driver shtc1
+===================
+
+Supported chips:
+  * Sensirion SHTC1
+    Prefix: 'shtc1'
+    Addresses scanned: none
+    Datasheet: http://www.sensirion.com/file/datasheet_shtc1
+
+  * Sensirion SHTW1
+    Prefix: 'shtw1'
+    Addresses scanned: none
+    Datasheet: Not publicly available
+
+Author:
+  Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHTC1 chip, a humidity and
+temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. Driver can be used as well for SHTW1
+chip, which has the same electrical interface.
+
+The device communicates with the I2C protocol. All sensors are set to I2C
+address 0x70. See Documentation/i2c/instantiating-devices for methods to
+instantiate the device.
+
+There are two options configurable by means of shtc1_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+   non-blocking mode. Blocking mode will guarantee the fastest result but
+   the I2C bus will be busy during that time. By default, non-blocking mode
+   is used. Make sure clock-stretching works properly on your device if you
+   want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+   strongly recommended.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input

diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 69372fb..3fb39e0 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt

@@ -470,7 +470,7 @@
 
 	Sometimes, an external module uses exported symbols from
 	another external module. kbuild needs to have full knowledge of
-	all symbols to avoid spliitting out warnings about undefined
+	all symbols to avoid spitting out warnings about undefined
 	symbols. Three solutions exist for this situation.
 
 	NOTE: The method with a top-level kbuild file is recommended

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b9f6778..6eaa9cd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -1,27 +1,37 @@
                           Kernel Parameters
                           ~~~~~~~~~~~~~~~~~
 
-The following is a consolidated list of the kernel parameters as implemented
-(mostly) by the __setup() macro and sorted into English Dictionary order
-(defined as ignoring all punctuation and sorting digits before letters in a
-case insensitive manner), and with descriptions where known.
+The following is a consolidated list of the kernel parameters as
+implemented by the __setup(), core_param() and module_param() macros
+and sorted into English Dictionary order (defined as ignoring all
+punctuation and sorting digits before letters in a case insensitive
+manner), and with descriptions where known.
 
-Module parameters for loadable modules are specified only as the
-parameter name with optional '=' and value as appropriate, such as:
+The kernel parses parameters from the kernel command line up to "--";
+if it doesn't recognize a parameter and it doesn't contain a '.', the
+parameter gets passed to init: parameters with '=' go into init's
+environment, others are passed as command line arguments to init.
+Everything after "--" is passed as an argument to init.
 
-	modprobe usbcore blinkenlights=1
+Module parameters can be specified in two ways: via the kernel command
+line with a module name prefix, or via modprobe, e.g.:
 
-Module parameters for modules that are built into the kernel image
-are specified on the kernel command line with the module name plus
-'.' plus parameter name, with '=' and value if appropriate, such as:
+	(kernel command line) usbcore.blinkenlights=1
+	(modprobe command line) modprobe usbcore blinkenlights=1
 
-	usbcore.blinkenlights=1
+Parameters for modules which are built into the kernel need to be
+specified on the kernel command line.  modprobe looks through the
+kernel command line (/proc/cmdline) and collects module parameters
+when it loads a module, so the kernel command line can be used for
+loadable modules too.
 
 Hyphens (dashes) and underscores are equivalent in parameter names, so
 	log_buf_len=1M print-fatal-signals=1
 can also be entered as
 	log-buf-len=1M print_fatal_signals=1
 
+Double-quotes can be used to protect spaces in values, e.g.:
+	param="spaces in here"
 
 This document may not be entirely up to date and comprehensive. The command
 "modinfo -p ${modulename}" shows a current list of all parameters of a loadable

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 0cfb00f..4bbeca8 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt

@@ -22,8 +22,9 @@
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address, specifying a handler
+can trap at almost any kernel code address(*), specifying a handler
 routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -273,6 +274,19 @@
  or
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following

diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/mtd/spi-nor.txt
new file mode 100644
index 0000000..548d630
--- /dev/null
+++ b/Documentation/mtd/spi-nor.txt

@@ -0,0 +1,62 @@
+                          SPI NOR framework
+               ============================================
+
+Part I - Why do we need this framework?
+---------------------------------------
+
+SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
+controller operates agnostic of the specific device attached. However, some
+controllers (such as Freescale's QuadSPI controller) cannot easily handle
+arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
+
+In particular, Freescale's QuadSPI controller must know the NOR commands to
+find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
+opcodes, addresses, or data payloads; a SPI controller simply knows to send or
+receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
+which the controller driver is aware of the opcodes, addressing, and other
+details of the SPI NOR protocol.
+
+Part II - How does the framework work?
+--------------------------------------
+
+This framework just adds a new layer between the MTD and the SPI bus driver.
+With this new layer, the SPI NOR controller driver does not depend on the
+m25p80 code anymore.
+
+   Before this framework, the layer is like:
+
+                   MTD
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	        SPI NOR chip
+
+   After this framework, the layer is like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	       SPI NOR chip
+
+  With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                fsl-quadSPI
+         ------------------------
+	       SPI NOR chip
+
+Part III - How can drivers use the framework?
+---------------------------------------------
+
+The main API is spi_nor_scan(). Before you call the hook, a driver should
+initialize the necessary fields for spi_nor{}. Please see
+drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
+when you want to write a new driver for a SPI NOR controller.

diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index 1dfe62c..ee231ed 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt

@@ -1,139 +1,157 @@
 Generic Mutex Subsystem
 
 started by Ingo Molnar <mingo@redhat.com>
+updated by Davidlohr Bueso <davidlohr@hp.com>
 
-  "Why on earth do we need a new mutex subsystem, and what's wrong
-   with semaphores?"
+What are mutexes?
+-----------------
 
-firstly, there's nothing wrong with semaphores. But if the simpler
-mutex semantics are sufficient for your code, then there are a couple
-of advantages of mutexes:
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
 
- - 'struct mutex' is smaller on most architectures: E.g. on x86,
-   'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
-   A smaller structure size means less RAM footprint, and better
-   CPU-cache utilization.
+[1] http://lwn.net/Articles/164802/
 
- - tighter code. On x86 i get the following .text sizes when
-   switching all mutex-alike semaphores in the kernel to the mutex
-   subsystem:
+Implementation
+--------------
 
-        text    data     bss     dec     hex filename
-     3280380  868188  396860 4545428  455b94 vmlinux-semaphore
-     3255329  865296  396732 4517357  44eded vmlinux-mutex
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use a three
+state atomic counter (->count) to represent the different possible
+transitions that can occur during the lifetime of a lock:
 
-   that's 25051 bytes of code saved, or a 0.76% win - off the hottest
-   codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%)
-   Smaller code means better icache footprint, which is one of the
-   major optimization goals in the Linux kernel currently.
+	  1: unlocked
+	  0: locked, no waiters
+   negative: locked, with potential waiters
 
- - the mutex subsystem is slightly faster and has better scalability for
-   contended workloads. On an 8-way x86 system, running a mutex-based
-   kernel and testing creat+unlink+close (of separate, per-task files)
-   in /tmp with 16 parallel tasks, the average number of ops/sec is:
+In its most basic form it also includes a wait-queue and a spinlock
+that serializes access to it. CONFIG_SMP systems can also include
+a pointer to the lock task owner (->owner) as well as a spinner MCS
+lock (->osq), both described below in (ii).
 
-    Semaphores:                        Mutexes:
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
 
-    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
-    8 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
-    checking VFS performance.          checking VFS performance.
-    avg loops/sec:      34713          avg loops/sec:      84153
-    CPU utilization:    63%            CPU utilization:    22%
+(i) fastpath: tries to atomically acquire the lock by decrementing the
+    counter. If it was already taken by another task it goes to the next
+    possible path. This logic is architecture specific. On x86-64, the
+    locking fastpath is 2 instructions:
 
-   i.e. in this workload, the mutex based kernel was 2.4 times faster
-   than the semaphore based kernel, _and_ it also had 2.8 times less CPU
-   utilization. (In terms of 'ops per CPU cycle', the semaphore kernel
-   performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
-   performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
-   more efficient.)
-
-   the scalability difference is visible even on a 2-way P4 HT box:
-
-    Semaphores:                        Mutexes:
-
-    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
-    4 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
-    checking VFS performance.          checking VFS performance.
-    avg loops/sec:      127659         avg loops/sec:      181082
-    CPU utilization:    100%           CPU utilization:    34%
-
-   (the straight performance advantage of mutexes is 41%, the per-cycle
-    efficiency of mutexes is 4.1 times better.)
-
- - there are no fastpath tradeoffs, the mutex fastpath is just as tight
-   as the semaphore fastpath. On x86, the locking fastpath is 2
-   instructions:
-
-    c0377ccb <mutex_lock>:
-    c0377ccb:       f0 ff 08                lock decl (%eax)
-    c0377cce:       78 0e                   js     c0377cde <.text..lock.mutex>
-    c0377cd0:       c3                      ret
+    0000000000000e10 <mutex_lock>:
+    e21:   f0 ff 0b                lock decl (%rbx)
+    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
 
    the unlocking fastpath is equally tight:
 
-    c0377cd1 <mutex_unlock>:
-    c0377cd1:       f0 ff 00                lock incl (%eax)
-    c0377cd4:       7e 0f                   jle    c0377ce5 <.text..lock.mutex+0x7>
-    c0377cd6:       c3                      ret
+    0000000000000bc0 <mutex_unlock>:
+    bc8:   f0 ff 07                lock incl (%rdi)
+    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
 
- - 'struct mutex' semantics are well-defined and are enforced if
-   CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have
-   virtually no debugging code or instrumentation. The mutex subsystem
-   checks and enforces the following rules:
 
-   * - only one task can hold the mutex at a time
-   * - only the owner can unlock the mutex
-   * - multiple unlocks are not permitted
-   * - recursive locking is not permitted
-   * - a mutex object must be initialized via the API
-   * - a mutex object must not be initialized via memset or copying
-   * - task may not exit with mutex held
-   * - memory areas where held locks reside must not be freed
-   * - held mutexes must not be reinitialized
-   * - mutexes may not be used in hardware or software interrupt
-   *   contexts such as tasklets and timers
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+     while the lock owner is running and there are no other tasks ready
+     to run that have higher priority (need_resched). The rationale is
+     that if the lock owner is running, it is likely to release the lock
+     soon. The mutex spinners are queued up using MCS lock so that only
+     one spinner can compete for the mutex.
 
-   furthermore, there are also convenience features in the debugging
-   code:
+     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+     with the desirable properties of being fair and with each cpu trying
+     to acquire the lock spinning on a local variable. It avoids expensive
+     cacheline bouncing that common test-and-set spinlock implementations
+     incur. An MCS-like lock is specially tailored for optimistic spinning
+     for sleeping lock implementation. An important feature of the customized
+     MCS lock is that it has the extra property that spinners are able to exit
+     the MCS spinlock queue when they need to reschedule. This further helps
+     avoid situations where MCS spinners that need to reschedule would continue
+     waiting to spin on mutex owner, only to go directly to slowpath upon
+     obtaining the MCS lock.
 
-   * - uses symbolic names of mutexes, whenever they are printed in debug output
-   * - point-of-acquire tracking, symbolic lookup of function names
-   * - list of all locks held in the system, printout of them
-   * - owner tracking
-   * - detects self-recursing locks and prints out all relevant info
-   * - detects multi-task circular deadlocks and prints out all affected
-   *   locks and tasks (and only those tasks)
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+      the task is added to the wait-queue and sleeps until woken up by the
+      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+    - Only one task can hold the mutex at a time.
+    - Only the owner can unlock the mutex.
+    - Multiple unlocks are not permitted.
+    - Recursive locking/unlocking is not permitted.
+    - A mutex must only be initialized via the API (see below).
+    - A task may not exit with a mutex held.
+    - Memory areas where held locks reside must not be freed.
+    - Held mutexes must not be reinitialized.
+    - Mutexes may not be used in hardware or software interrupt
+      contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+    - Uses symbolic names of mutexes, whenever they are printed
+      in debug output.
+    - Point-of-acquire tracking, symbolic lookup of function names,
+      list of all locks held in the system, printout of them.
+    - Owner tracking.
+    - Detects self-recursing locks and prints out all relevant info.
+    - Detects multi-task circular deadlocks and prints out all affected
+      locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex:
+   DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex:
+   mutex_init(mutex);
+
+Acquire the mutex, uninterruptible:
+   void mutex_lock(struct mutex *lock);
+   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+   int  mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible:
+   int mutex_lock_interruptible_nested(struct mutex *lock,
+				       unsigned int subclass);
+   int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0:
+   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex:
+   void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken:
+   int mutex_is_locked(struct mutex *lock);
 
 Disadvantages
 -------------
 
-The stricter mutex API means you cannot use mutexes the same way you
-can use semaphores: e.g. they cannot be used from an interrupt context,
-nor can they be unlocked from a different context that which acquired
-it. [ I'm not aware of any other (e.g. performance) disadvantages from
-using mutexes at the moment, please let me know if you find any. ]
+Unlike its original design and purpose, 'struct mutex' is larger than
+most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
+as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
+'struct rw_semaphore' variant. Larger structure sizes mean more CPU
+cache and memory footprint.
 
-Implementation of mutexes
--------------------------
+When to use mutexes
+-------------------
 
-'struct mutex' is the new mutex type, defined in include/linux/mutex.h and
-implemented in kernel/locking/mutex.c. It is a counter-based mutex with a
-spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for
-"locked" and negative numbers (usually -1) for "locked, potential waiters
-queued".
-
-the APIs of 'struct mutex' have been streamlined:
-
- DEFINE_MUTEX(name);
-
- mutex_init(mutex);
-
- void mutex_lock(struct mutex *lock);
- int  mutex_lock_interruptible(struct mutex *lock);
- int  mutex_trylock(struct mutex *lock);
- void mutex_unlock(struct mutex *lock);
- int  mutex_is_locked(struct mutex *lock);
- void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
- int  mutex_lock_interruptible_nested(struct mutex *lock,
-                                      unsigned int subclass);
- int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index a383c00..9c723ec 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt

@@ -585,13 +585,19 @@
 	balance-tlb or 5
 
 		Adaptive transmit load balancing: channel bonding that
-		does not require any special switch support.  The
-		outgoing traffic is distributed according to the
-		current load (computed relative to the speed) on each
-		slave.  Incoming traffic is received by the current
-		slave.  If the receiving slave fails, another slave
-		takes over the MAC address of the failed receiving
-		slave.
+		does not require any special switch support.
+
+		In tlb_dynamic_lb=1 mode; the outgoing traffic is
+		distributed according to the current load (computed
+		relative to the speed) on each slave.
+
+		In tlb_dynamic_lb=0 mode; the load balancing based on
+		current load is disabled and the load is distributed
+		only using the hash distribution.
+
+		Incoming traffic is received by the current slave.
+		If the receiving slave fails, another slave takes over
+		the MAC address of the failed receiving slave.
 
 		Prerequisite:
 
@@ -736,6 +742,28 @@
 
 	This option was added for bonding version 3.6.0.
 
+tlb_dynamic_lb
+
+	Specifies if dynamic shuffling of flows is enabled in tlb
+	mode. The value has no effect on any other modes.
+
+	The default behavior of tlb mode is to shuffle active flows across
+	slaves based on the load in that interval. This gives nice lb
+	characteristics but can cause packet reordering. If re-ordering is
+	a concern use this variable to disable flow shuffling and rely on
+	load balancing provided solely by the hash distribution.
+	xmit-hash-policy can be used to select the appropriate hashing for
+	the setup.
+
+	The sysfs entry can be used to change the setting per bond device
+	and the initial value is derived from the module parameter. The
+	sysfs entry is allowed to be changed only if the bond device is
+	down.
+
+	The default value is "1" that enables flow shuffling while value "0"
+	disables it. This option was added in bonding driver 3.7.1
+
+
 updelay
 
 	Specifies the time, in milliseconds, to wait before enabling a
@@ -769,7 +797,7 @@
 xmit_hash_policy
 
 	Selects the transmit hash policy to use for slave selection in
-	balance-xor and 802.3ad modes.  Possible values are:
+	balance-xor, 802.3ad, and tlb modes.  Possible values are:
 
 	layer2
 

diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 4f7ae52..2236d6d 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt

@@ -469,6 +469,41 @@
   having this 'send only' use-case we may remove the receive list in the
   Kernel to save a little (really a very little!) CPU usage.
 
+  4.1.1.1 CAN filter usage optimisation
+
+  The CAN filters are processed in per-device filter lists at CAN frame
+  reception time. To reduce the number of checks that need to be performed
+  while walking through the filter lists the CAN core provides an optimized
+  filter handling when the filter subscription focusses on a single CAN ID.
+
+  For the possible 2048 SFF CAN identifiers the identifier is used as an index
+  to access the corresponding subscription list without any further checks.
+  For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
+  hash function to retrieve the EFF table index.
+
+  To benefit from the optimized filters for single CAN identifiers the
+  CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
+  with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
+  can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
+  subscribed. E.g. in the example from above
+
+    rfilter[0].can_id   = 0x123;
+    rfilter[0].can_mask = CAN_SFF_MASK;
+
+  both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
+
+  To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
+  filter has to be defined in this way to benefit from the optimized filters:
+
+    struct can_filter rfilter[2];
+
+    rfilter[0].can_id   = 0x123;
+    rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
+    rfilter[1].can_id   = 0x12345678 | CAN_EFF_FLAG;
+    rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
+
+    setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
   4.1.2 RAW socket option CAN_RAW_ERR_FILTER
 
   As described in chapter 3.4 the CAN interface driver can generate so

diff --git a/Documentation/networking/cdc_mbim.txt b/Documentation/networking/cdc_mbim.txt
new file mode 100644
index 0000000..a15ea60
--- /dev/null
+++ b/Documentation/networking/cdc_mbim.txt

@@ -0,0 +1,339 @@
+     cdc_mbim - Driver for CDC MBIM Mobile Broadband modems
+    ========================================================
+
+The cdc_mbim driver supports USB devices conforming to the "Universal
+Serial Bus Communications Class Subclass Specification for Mobile
+Broadband Interface Model" [1], which is a further development of
+"Universal Serial Bus Communications Class Subclass Specifications for
+Network Control Model Devices" [2] optimized for Mobile Broadband
+devices, aka "3G/LTE modems".
+
+
+Command Line Parameters
+=======================
+
+The cdc_mbim driver has no parameters of its own.  But the probing
+behaviour for NCM 1.0 backwards compatible MBIM functions (an
+"NCM/MBIM function" as defined in section 3.2 of [1]) is affected
+by a cdc_ncm driver parameter:
+
+prefer_mbim
+-----------
+Type:          Boolean
+Valid Range:   N/Y (0-1)
+Default Value: Y (MBIM is preferred)
+
+This parameter sets the system policy for NCM/MBIM functions.  Such
+functions will be handled by either the cdc_ncm driver or the cdc_mbim
+driver depending on the prefer_mbim setting.  Setting prefer_mbim=N
+makes the cdc_mbim driver ignore these functions and lets the cdc_ncm
+driver handle them instead.
+
+The parameter is writable, and can be changed at any time. A manual
+unbind/bind is required to make the change effective for NCM/MBIM
+functions bound to the "wrong" driver
+
+
+Basic usage
+===========
+
+MBIM functions are inactive when unmanaged. The cdc_mbim driver only
+provides an userspace interface to the MBIM control channel, and will
+not participate in the management of the function. This implies that a
+userspace MBIM management application always is required to enable a
+MBIM function.
+
+Such userspace applications includes, but are not limited to:
+ - mbimcli (included with the libmbim [3] library), and
+ - ModemManager [4]
+
+Establishing a MBIM IP session reequires at least these actions by the
+management application:
+ - open the control channel
+ - configure network connection settings
+ - connect to network
+ - configure IP interface
+
+Management application development
+----------------------------------
+The driver <-> userspace interfaces are described below.  The MBIM
+control channel protocol is described in [1].
+
+
+MBIM control channel userspace ABI
+==================================
+
+/dev/cdc-wdmX character device
+------------------------------
+The driver creates a two-way pipe to the MBIM function control channel
+using the cdc-wdm driver as a subdriver.  The userspace end of the
+control channel pipe is a /dev/cdc-wdmX character device.
+
+The cdc_mbim driver does not process or police messages on the control
+channel.  The channel is fully delegated to the userspace management
+application.  It is therefore up to this application to ensure that it
+complies with all the control channel requirements in [1].
+
+The cdc-wdmX device is created as a child of the MBIM control
+interface USB device.  The character device associated with a specific
+MBIM function can be looked up using sysfs.  For example:
+
+ bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc
+ cdc-wdm0
+
+ bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev
+ 180:0
+
+
+USB configuration descriptors
+-----------------------------
+The wMaxControlMessage field of the CDC MBIM functional descriptor
+limits the maximum control message size. The managament application is
+responsible for negotiating a control message size complying with the
+requirements in section 9.3.1 of [1], taking this descriptor field
+into consideration.
+
+The userspace application can access the CDC MBIM functional
+descriptor of a MBIM function using either of the two USB
+configuration descriptor kernel interfaces described in [6] or [7].
+
+See also the ioctl documentation below.
+
+
+Fragmentation
+-------------
+The userspace application is responsible for all control message
+fragmentation and defragmentaion, as described in section 9.5 of [1].
+
+
+/dev/cdc-wdmX write()
+---------------------
+The MBIM control messages from the management application *must not*
+exceed the negotiated control message size.
+
+
+/dev/cdc-wdmX read()
+--------------------
+The management application *must* accept control messages of up the
+negotiated control message size.
+
+
+/dev/cdc-wdmX ioctl()
+--------------------
+IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size
+This ioctl returns the wMaxControlMessage field of the CDC MBIM
+functional descriptor for MBIM devices.  This is intended as a
+convenience, eliminating the need to parse the USB descriptors from
+userspace.
+
+	#include <stdio.h>
+	#include <fcntl.h>
+	#include <sys/ioctl.h>
+	#include <linux/types.h>
+	#include <linux/usb/cdc-wdm.h>
+	int main()
+	{
+		__u16 max;
+		int fd = open("/dev/cdc-wdm0", O_RDWR);
+		if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max))
+			printf("wMaxControlMessage is %d\n", max);
+	}
+
+
+Custom device services
+----------------------
+The MBIM specification allows vendors to freely define additional
+services.  This is fully supported by the cdc_mbim driver.
+
+Support for new MBIM services, including vendor specified services, is
+implemented entirely in userspace, like the rest of the MBIM control
+protocol
+
+New services should be registered in the MBIM Registry [5].
+
+
+
+MBIM data channel userspace ABI
+===============================
+
+wwanY network device
+--------------------
+The cdc_mbim driver represents the MBIM data channel as a single
+network device of the "wwan" type. This network device is initially
+mapped to MBIM IP session 0.
+
+
+Multiplexed IP sessions (IPS)
+-----------------------------
+MBIM allows multiplexing up to 256 IP sessions over a single USB data
+channel.  The cdc_mbim driver models such IP sessions as 802.1q VLAN
+subdevices of the master wwanY device, mapping MBIM IP session Z to
+VLAN ID Z for all values of Z greater than 0.
+
+The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure
+described in section 10.5.1 of [1].
+
+The userspace management application is responsible for adding new
+VLAN links prior to establishing MBIM IP sessions where the SessionId
+is greater than 0. These links can be added by using the normal VLAN
+kernel interfaces, either ioctl or netlink.
+
+For example, adding a link for a MBIM IP session with SessionId 3:
+
+  ip link add link wwan0 name wwan0.3 type vlan id 3
+
+The driver will automatically map the "wwan0.3" network device to MBIM
+IP session 3.
+
+
+Device Service Streams (DSS)
+----------------------------
+MBIM also allows up to 256 non-IP data streams to be multiplexed over
+the same shared USB data channel.  The cdc_mbim driver models these
+sessions as another set of 802.1q VLAN subdevices of the master wwanY
+device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values
+of A.
+
+The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO
+structure described in section 10.5.29 of [1].
+
+The DSS VLAN subdevices are used as a practical interface between the
+shared MBIM data channel and a MBIM DSS aware userspace application.
+It is not intended to be presented as-is to an end user. The
+assumption is that an userspace application initiating a DSS session
+also takes care of the necessary framing of the DSS data, presenting
+the stream to the end user in an appropriate way for the stream type.
+
+The network device ABI requires a dummy ethernet header for every DSS
+data frame being transported.  The contents of this header is
+arbitrary, with the following exceptions:
+ - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped
+ - RX frames will have the protocol field set to ETH_P_802_3 (but will
+   not be properly formatted 802.3 frames)
+ - RX frames will have the destination address set to the hardware
+   address of the master device
+
+The DSS supporting userspace management application is responsible for
+adding the dummy ethernet header on TX and stripping it on RX.
+
+This is a simple example using tools commonly available, exporting
+DssSessionId 5 as a pty character device pointed to by a /dev/nmea
+symlink:
+
+  ip link add link wwan0 name wwan0.dss5 type vlan id 261
+  ip link set dev wwan0.dss5 up
+  socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea
+
+This is only an example, most suitable for testing out a DSS
+service. Userspace applications supporting specific MBIM DSS services
+are expected to use the tools and programming interfaces required by
+that service.
+
+Note that adding VLAN links for DSS sessions is entirely optional.  A
+management application may instead choose to bind a packet socket
+directly to the master network device, using the received VLAN tags to
+map frames to the correct DSS session and adding 18 byte VLAN ethernet
+headers with the appropriate tag on TX.  In this case using a socket
+filter is recommended, matching only the DSS VLAN subset. This avoid
+unnecessary copying of unrelated IP session data to userspace.  For
+example:
+
+  static struct sock_filter dssfilter[] = {
+	/* use special negative offsets to get VLAN tag */
+	BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
+	BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */
+
+	/* verify DSS VLAN range */
+	BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG),
+	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4),	/* 256 is first DSS VLAN */
+	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0),	/* 511 is last DSS VLAN */
+
+	/* verify ethertype */
+        BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1),
+
+        BPF_STMT(BPF_RET|BPF_K, (u_int)-1),	/* accept */
+        BPF_STMT(BPF_RET|BPF_K, 0),		/* ignore */
+  };
+
+
+
+Tagged IP session 0 VLAN
+------------------------
+As described above, MBIM IP session 0 is treated as special by the
+driver.  It is initially mapped to untagged frames on the wwanY
+network device.
+
+This mapping implies a few restrictions on multiplexed IPS and DSS
+sessions, which may not always be practical:
+ - no IPS or DSS session can use a frame size greater than the MTU on
+   IP session 0
+ - no IPS or DSS session can be in the up state unless the network
+   device representing IP session 0 also is up
+
+These problems can be avoided by optionally making the driver map IP
+session 0 to a VLAN subdevice, similar to all other IP sessions.  This
+behaviour is triggered by adding a VLAN link for the magic VLAN ID
+4094.  The driver will then immediately start mapping MBIM IP session
+0 to this VLAN, and will drop untagged frames on the master wwanY
+device.
+
+Tip: It might be less confusing to the end user to name this VLAN
+subdevice after the MBIM SessionID instead of the VLAN ID.  For
+example:
+
+  ip link add link wwan0 name wwan0.0 type vlan id 4094
+
+
+VLAN mapping
+------------
+
+Summarizing the cdc_mbim driver mapping described above, we have this
+relationship between VLAN tags on the wwanY network device and MBIM
+sessions on the shared USB data channel:
+
+  VLAN ID       MBIM type   MBIM SessionID           Notes
+  ---------------------------------------------------------
+  untagged      IPS         0                        a)
+  1 - 255       IPS         1 - 255 <VLANID>
+  256 - 511     DSS         0 - 255 <VLANID - 256>
+  512 - 4093                                         b)
+  4094          IPS         0                        c)
+
+    a) if no VLAN ID 4094 link exists, else dropped
+    b) unsupported VLAN range, unconditionally dropped
+    c) if a VLAN ID 4094 link exists, else dropped
+
+
+
+
+References
+==========
+
+[1] USB Implementers Forum, Inc. - "Universal Serial Bus
+      Communications Class Subclass Specification for Mobile Broadband
+      Interface Model", Revision 1.0 (Errata 1), May 1, 2013
+      - http://www.usb.org/developers/docs/devclass_docs/
+
+[2] USB Implementers Forum, Inc. - "Universal Serial Bus
+      Communications Class Subclass Specifications for Network Control
+      Model Devices", Revision 1.0 (Errata 1), November 24, 2010
+      - http://www.usb.org/developers/docs/devclass_docs/
+
+[3] libmbim - "a glib-based library for talking to WWAN modems and
+      devices which speak the Mobile Interface Broadband Model (MBIM)
+      protocol"
+      - http://www.freedesktop.org/wiki/Software/libmbim/
+
+[4] ModemManager - "a DBus-activated daemon which controls mobile
+      broadband (2G/3G/4G) devices and connections"
+      - http://www.freedesktop.org/wiki/Software/ModemManager/
+
+[5] "MBIM (Mobile Broadband Interface Model) Registry"
+       - http://compliance.usb.org/mbim/
+
+[6] "/proc/bus/usb filesystem output"
+       - Documentation/usb/proc_usb_info.txt
+
+[7] "/sys/bus/usb/devices/.../descriptors"
+       - Documentation/ABI/stable/sysfs-bus-usb

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index e3ba753..ee78eba 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt

@@ -281,6 +281,7 @@
   cpu                                   raw_smp_processor_id()
   vlan_tci                              vlan_tx_tag_get(skb)
   vlan_pr                               vlan_tx_tag_present(skb)
+  rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
 Examples for low-level BPF:
@@ -308,6 +309,18 @@
   ret #-1
   drop: ret #0
 
+** icmp random packet sampling, 1 in 4
+  ldh [12]
+  jne #0x800, drop
+  ldb [23]
+  jneq #1, drop
+  # get a random uint32 number
+  ld rand
+  mod #4
+  jneq #1, drop
+  ret #-1
+  drop: ret #0
+
 ** SECCOMP filter example:
 
   ld [4]                  /* offsetof(struct seccomp_data, arch) */
@@ -548,42 +561,43 @@
 
 BPF kernel internals
 --------------------
-Internally, for the kernel interpreter, a different BPF instruction set
+Internally, for the kernel interpreter, a different instruction set
 format with similar underlying principles from BPF described in previous
 paragraphs is being used. However, the instruction set format is modelled
 closer to the underlying architecture to mimic native instruction sets, so
-that a better performance can be achieved (more details later).
+that a better performance can be achieved (more details later). This new
+ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which
+originates from [e]xtended BPF is not the same as BPF extensions! While
+eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading'
+of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.)
 
 It is designed to be JITed with one to one mapping, which can also open up
-the possibility for GCC/LLVM compilers to generate optimized BPF code through
-a BPF backend that performs almost as fast as natively compiled code.
+the possibility for GCC/LLVM compilers to generate optimized eBPF code through
+an eBPF backend that performs almost as fast as natively compiled code.
 
 The new instruction set was originally designed with the possible goal in
-mind to write programs in "restricted C" and compile into BPF with a optional
+mind to write programs in "restricted C" and compile into eBPF with a optional
 GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with
-minimal performance overhead over two steps, that is, C -> BPF -> native code.
+minimal performance overhead over two steps, that is, C -> eBPF -> native code.
 
 Currently, the new format is being used for running user BPF programs, which
 includes seccomp BPF, classic socket filters, cls_bpf traffic classifier,
 team driver's classifier for its load-balancing mode, netfilter's xt_bpf
 extension, PTP dissector/classifier, and much more. They are all internally
 converted by the kernel into the new instruction set representation and run
-in the extended interpreter. For in-kernel handlers, this all works
-transparently by using sk_unattached_filter_create() for setting up the
-filter, resp. sk_unattached_filter_destroy() for destroying it. The macro
-SK_RUN_FILTER(filter, ctx) transparently invokes the right BPF function to
-run the filter. 'filter' is a pointer to struct sk_filter that we got from
-sk_unattached_filter_create(), and 'ctx' the given context (e.g. skb pointer).
-All constraints and restrictions from sk_chk_filter() apply before a
-conversion to the new layout is being done behind the scenes!
+in the eBPF interpreter. For in-kernel handlers, this all works transparently
+by using sk_unattached_filter_create() for setting up the filter, resp.
+sk_unattached_filter_destroy() for destroying it. The macro
+SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct sk_filter that we
+got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
+skb pointer). All constraints and restrictions from sk_chk_filter() apply
+before a conversion to the new layout is being done behind the scenes!
 
-Currently, for JITing, the user BPF format is being used and current BPF JIT
-compilers reused whenever possible. In other words, we do not (yet!) perform
-a JIT compilation in the new layout, however, future work will successively
-migrate traditional JIT compilers into the new instruction format as well, so
-that they will profit from the very same benefits. Thus, when speaking about
-JIT in the following, a JIT compiler (TBD) for the new instruction format is
-meant in this context.
+Currently, the classic BPF format is being used for JITing on most of the
+architectures. Only x86-64 performs JIT compilation from eBPF instruction set,
+however, future work will migrate other JIT compilers as well, so that they
+will profit from the very same benefits.
 
 Some core changes of the new internal format:
 
@@ -592,35 +606,35 @@
   The old format had two registers A and X, and a hidden frame pointer. The
   new layout extends this to be 10 internal registers and a read-only frame
   pointer. Since 64-bit CPUs are passing arguments to functions via registers
-  the number of args from BPF program to in-kernel function is restricted
+  the number of args from eBPF program to in-kernel function is restricted
   to 5 and one register is used to accept return value from an in-kernel
   function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
   sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
   registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
 
-  Therefore, BPF calling convention is defined as:
+  Therefore, eBPF calling convention is defined as:
 
-    * R0	- return value from in-kernel function
-    * R1 - R5	- arguments from BPF program to in-kernel function
+    * R0	- return value from in-kernel function, and exit value for eBPF program
+    * R1 - R5	- arguments from eBPF program to in-kernel function
     * R6 - R9	- callee saved registers that in-kernel function will preserve
     * R10	- read-only frame pointer to access stack
 
-  Thus, all BPF registers map one to one to HW registers on x86_64, aarch64,
-  etc, and BPF calling convention maps directly to ABIs used by the kernel on
+  Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
+  etc, and eBPF calling convention maps directly to ABIs used by the kernel on
   64-bit architectures.
 
   On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
   and may let more complex programs to be interpreted.
 
-  R0 - R5 are scratch registers and BPF program needs spill/fill them if
-  necessary across calls. Note that there is only one BPF program (== one BPF
-  main routine) and it cannot call other BPF functions, it can only call
-  predefined in-kernel functions, though.
+  R0 - R5 are scratch registers and eBPF program needs spill/fill them if
+  necessary across calls. Note that there is only one eBPF program (== one
+  eBPF main routine) and it cannot call other eBPF functions, it can only
+  call predefined in-kernel functions, though.
 
 - Register width increases from 32-bit to 64-bit:
 
   Still, the semantics of the original 32-bit ALU operations are preserved
-  via 32-bit subregisters. All BPF registers are 64-bit with 32-bit lower
+  via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
   subregisters that zero-extend into 64-bit if they are being written to.
   That behavior maps directly to x86_64 and arm64 subregister definition, but
   makes other JITs more difficult.
@@ -631,8 +645,8 @@
 
   Operation is 64-bit, because on 64-bit architectures, pointers are also
   64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
-  so 32-bit BPF registers would otherwise require to define register-pair
-  ABI, thus, there won't be able to use a direct BPF register to HW register
+  so 32-bit eBPF registers would otherwise require to define register-pair
+  ABI, thus, there won't be able to use a direct eBPF register to HW register
   mapping and JIT would need to do combine/split/move operations for every
   register in and out of the function, which is complex, bug prone and slow.
   Another reason is the use of atomic 64-bit counters.
@@ -646,14 +660,145 @@
 - Introduces bpf_call insn and register passing convention for zero overhead
   calls from/to other kernel functions:
 
-  After a kernel function call, R1 - R5 are reset to unreadable and R0 has a
-  return type of the function. Since R6 - R9 are callee saved, their state is
-  preserved across the call.
+  Before an in-kernel function call, the internal BPF program needs to
+  place function arguments into R1 to R5 registers to satisfy calling
+  convention, then the interpreter will take them from registers and pass
+  to in-kernel function. If R1 - R5 registers are mapped to CPU registers
+  that are used for argument passing on given architecture, the JIT compiler
+  doesn't need to emit extra moves. Function arguments will be in the correct
+  registers and BPF_CALL instruction will be JITed as single 'call' HW
+  instruction. This calling convention was picked to cover common call
+  situations without performance penalty.
 
-Also in the new design, BPF is limited to 4096 insns, which means that any
+  After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
+  a return value of the function. Since R6 - R9 are callee saved, their state
+  is preserved across the call.
+
+  For example, consider three C functions:
+
+  u64 f1() { return (*_f2)(1); }
+  u64 f2(u64 a) { return f3(a + 1, a); }
+  u64 f3(u64 a, u64 b) { return a - b; }
+
+  GCC can compile f1, f3 into x86_64:
+
+  f1:
+    movl $1, %edi
+    movq _f2(%rip), %rax
+    jmp  *%rax
+  f3:
+    movq %rdi, %rax
+    subq %rsi, %rax
+    ret
+
+  Function f2 in eBPF may look like:
+
+  f2:
+    bpf_mov R2, R1
+    bpf_add R1, 1
+    bpf_call f3
+    bpf_exit
+
+  If f2 is JITed and the pointer stored to '_f2'. The calls f1 -> f2 -> f3 and
+  returns will be seamless. Without JIT, __sk_run_filter() interpreter needs to
+  be used to call into f2.
+
+  For practical reasons all eBPF programs have only one argument 'ctx' which is
+  already placed into R1 (e.g. on __sk_run_filter() startup) and the programs
+  can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
+  are currently not supported, but these restrictions can be lifted if necessary
+  in the future.
+
+  On 64-bit architectures all register map to HW registers one to one. For
+  example, x86_64 JIT compiler can map them as ...
+
+    R0 - rax
+    R1 - rdi
+    R2 - rsi
+    R3 - rdx
+    R4 - rcx
+    R5 - r8
+    R6 - rbx
+    R7 - r13
+    R8 - r14
+    R9 - r15
+    R10 - rbp
+
+  ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
+  and rbx, r12 - r15 are callee saved.
+
+  Then the following internal BPF pseudo-program:
+
+    bpf_mov R6, R1 /* save ctx */
+    bpf_mov R2, 2
+    bpf_mov R3, 3
+    bpf_mov R4, 4
+    bpf_mov R5, 5
+    bpf_call foo
+    bpf_mov R7, R0 /* save foo() return value */
+    bpf_mov R1, R6 /* restore ctx for next call */
+    bpf_mov R2, 6
+    bpf_mov R3, 7
+    bpf_mov R4, 8
+    bpf_mov R5, 9
+    bpf_call bar
+    bpf_add R0, R7
+    bpf_exit
+
+  After JIT to x86_64 may look like:
+
+    push %rbp
+    mov %rsp,%rbp
+    sub $0x228,%rsp
+    mov %rbx,-0x228(%rbp)
+    mov %r13,-0x220(%rbp)
+    mov %rdi,%rbx
+    mov $0x2,%esi
+    mov $0x3,%edx
+    mov $0x4,%ecx
+    mov $0x5,%r8d
+    callq foo
+    mov %rax,%r13
+    mov %rbx,%rdi
+    mov $0x2,%esi
+    mov $0x3,%edx
+    mov $0x4,%ecx
+    mov $0x5,%r8d
+    callq bar
+    add %r13,%rax
+    mov -0x228(%rbp),%rbx
+    mov -0x220(%rbp),%r13
+    leaveq
+    retq
+
+  Which is in this example equivalent in C to:
+
+    u64 bpf_filter(u64 ctx)
+    {
+        return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
+    }
+
+  In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
+  arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
+  registers and place their return value into '%rax' which is R0 in eBPF.
+  Prologue and epilogue are emitted by JIT and are implicit in the
+  interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
+  them across the calls as defined by calling convention.
+
+  For example the following program is invalid:
+
+    bpf_mov R1, 1
+    bpf_call foo
+    bpf_mov R0, R1
+    bpf_exit
+
+  After the call the registers R1-R5 contain junk values and cannot be read.
+  In the future an eBPF verifier can be used to validate internal BPF programs.
+
+Also in the new design, eBPF is limited to 4096 insns, which means that any
 program will terminate quickly and will only call a fixed number of kernel
 functions. Original BPF and the new format are two operand instructions,
-which helps to do one-to-one mapping between BPF insn and x86 insn during JIT.
+which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
 
 The input context pointer for invoking the interpreter function is generic,
 its content is defined by a specific use case. For seccomp register R1 points
@@ -661,7 +806,26 @@
 
 A program, that is translated internally consists of the following elements:
 
-  op:16, jt:8, jf:8, k:32    ==>    op:8, a_reg:4, x_reg:4, off:16, imm:32
+  op:16, jt:8, jf:8, k:32    ==>    op:8, dst_reg:4, src_reg:4, off:16, imm:32
+
+So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
+has room for new instructions. Some of them may use 16/24/32 byte encoding. New
+instructions must be multiple of 8 bytes to preserve backward compatibility.
+
+Internal BPF is a general purpose RISC instruction set. Not every register and
+every instruction are used during translation from original BPF to new format.
+For example, socket filters are not using 'exclusive add' instruction, but
+tracing filters may do to maintain counters of events, for example. Register R9
+is not used by socket filters either, but more complex filters may be running
+out of registers and would have to resort to spill/fill to stack.
+
+Internal BPF can used as generic assembler for last step performance
+optimizations, socket filters and seccomp are using it as assembler. Tracing
+filters may use it as assembler to generate code from kernel. In kernel usage
+may not be bounded by security considerations, since generated internal BPF code
+may be optimizing internal code path and not being exposed to the user space.
+Safety of internal BPF can come from a verifier (TBD). In such use cases as
+described, it may be used as safe instruction set.
 
 Just like the original BPF, the new format runs within a controlled environment,
 is deterministic and the kernel can easily prove that. The safety of the program
@@ -670,6 +834,181 @@
 descends all possible paths. It simulates execution of every insn and observes
 the state change of registers and stack.
 
+eBPF opcode encoding
+--------------------
+
+eBPF is reusing most of the opcode encoding from classic to simplify conversion
+of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
+field is divided into three parts:
+
+  +----------------+--------+--------------------+
+  |   4 bits       |  1 bit |   3 bits           |
+  | operation code | source | instruction class  |
+  +----------------+--------+--------------------+
+  (MSB)                                      (LSB)
+
+Three LSB bits store instruction class which is one of:
+
+  Classic BPF classes:    eBPF classes:
+
+  BPF_LD    0x00          BPF_LD    0x00
+  BPF_LDX   0x01          BPF_LDX   0x01
+  BPF_ST    0x02          BPF_ST    0x02
+  BPF_STX   0x03          BPF_STX   0x03
+  BPF_ALU   0x04          BPF_ALU   0x04
+  BPF_JMP   0x05          BPF_JMP   0x05
+  BPF_RET   0x06          [ class 6 unused, for future if needed ]
+  BPF_MISC  0x07          BPF_ALU64 0x07
+
+When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
+
+  BPF_K     0x00
+  BPF_X     0x08
+
+ * in classic BPF, this means:
+
+  BPF_SRC(code) == BPF_X - use register X as source operand
+  BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+ * in eBPF, this means:
+
+  BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
+  BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+... and four MSB bits store operation code.
+
+If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
+
+  BPF_ADD   0x00
+  BPF_SUB   0x10
+  BPF_MUL   0x20
+  BPF_DIV   0x30
+  BPF_OR    0x40
+  BPF_AND   0x50
+  BPF_LSH   0x60
+  BPF_RSH   0x70
+  BPF_NEG   0x80
+  BPF_MOD   0x90
+  BPF_XOR   0xa0
+  BPF_MOV   0xb0  /* eBPF only: mov reg to reg */
+  BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
+  BPF_END   0xd0  /* eBPF only: endianness conversion */
+
+If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
+
+  BPF_JA    0x00
+  BPF_JEQ   0x10
+  BPF_JGT   0x20
+  BPF_JGE   0x30
+  BPF_JSET  0x40
+  BPF_JNE   0x50  /* eBPF only: jump != */
+  BPF_JSGT  0x60  /* eBPF only: signed '>' */
+  BPF_JSGE  0x70  /* eBPF only: signed '>=' */
+  BPF_CALL  0x80  /* eBPF only: function call */
+  BPF_EXIT  0x90  /* eBPF only: function return */
+
+So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
+and eBPF. There are only two registers in classic BPF, so it means A += X.
+In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
+BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
+src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
+
+Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
+eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
+BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
+exactly the same operations as BPF_ALU, but with 64-bit wide operands
+instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
+dst_reg = dst_reg + src_reg
+
+Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
+operation. Classic BPF_RET | BPF_K means copy imm32 into return register
+and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
+in eBPF means function exit only. The eBPF program needs to store return
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
+unused and reserved for future use.
+
+For load and store instructions the 8-bit 'code' field is divided as:
+
+  +--------+--------+-------------------+
+  | 3 bits | 2 bits |   3 bits          |
+  |  mode  |  size  | instruction class |
+  +--------+--------+-------------------+
+  (MSB)                             (LSB)
+
+Size modifier is one of ...
+
+  BPF_W   0x00    /* word */
+  BPF_H   0x08    /* half word */
+  BPF_B   0x10    /* byte */
+  BPF_DW  0x18    /* eBPF only, double word */
+
+... which encodes size of load/store operation:
+
+ B  - 1 byte
+ H  - 2 byte
+ W  - 4 byte
+ DW - 8 byte (eBPF only)
+
+Mode modifier is one of:
+
+  BPF_IMM  0x00  /* classic BPF only, reserved in eBPF */
+  BPF_ABS  0x20
+  BPF_IND  0x40
+  BPF_MEM  0x60
+  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+
+eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
+(BPF_IND | <size> | BPF_LD) which are used to access packet data.
+
+They had to be carried over from classic to have strong performance of
+socket filters running in eBPF interpreter. These instructions can only
+be used when interpreter context is a pointer to 'struct sk_buff' and
+have seven implicit operands. Register R6 is an implicit input that must
+contain pointer to sk_buff. Register R0 is an implicit output which contains
+the data fetched from the packet. Registers R1-R5 are scratch registers
+and must not be used to store the data across BPF_ABS | BPF_LD or
+BPF_IND | BPF_LD instructions.
+
+These instructions have implicit program exit condition as well. When
+eBPF program is trying to access the data beyond the packet boundary,
+the interpreter will abort the execution of the program. JIT compilers
+therefore must preserve this property. src_reg and imm32 fields are
+explicit inputs to these instructions.
+
+For example:
+
+  BPF_IND | BPF_W | BPF_LD means:
+
+    R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
+    and R1 - R5 were scratched.
+
+Unlike classic BPF instruction set, eBPF has generic load/store operations:
+
+BPF_MEM | <size> | BPF_STX:  *(size *) (dst_reg + off) = src_reg
+BPF_MEM | <size> | BPF_ST:   *(size *) (dst_reg + off) = imm32
+BPF_MEM | <size> | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
+BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
+BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
+
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
+2 byte atomic increments are not supported.
+
+Testing
+-------
+
+Next to the BPF toolchain, the kernel also ships a test module that contains
+various test cases for classic and internal BPF that can be executed against
+the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and
+enabled via Kconfig:
+
+  CONFIG_TEST_BPF=m
+
+After the module has been built and installed, the test suite can be executed
+via insmod or modprobe against 'test_bpf' module. Results of the test cases
+including timings in nsec can be found in the kernel log (dmesg).
+
 Misc
 ----
 

diff --git a/Documentation/platform/x86-laptop-drivers.txt b/Documentation/platform/x86-laptop-drivers.txt
new file mode 100644
index 0000000..01facd2
--- /dev/null
+++ b/Documentation/platform/x86-laptop-drivers.txt

@@ -0,0 +1,18 @@
+compal-laptop
+=============
+List of supported hardware:
+
+by Compal:
+	Compal FL90/IFL90
+	Compal FL91/IFL91
+	Compal FL92/JFL92
+	Compal FT00/IFT00
+
+by Dell:
+	Dell Vostro 1200
+	Dell Mini 9 (Inspiron 910)
+	Dell Mini 10 (Inspiron 1010)
+	Dell Mini 10v (Inspiron 1011)
+	Dell Mini 1012 (Inspiron 1012)
+	Dell Inspiron 11z (Inspiron 1110)
+	Dell Mini 12 (Inspiron 1210)

diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
index e13dafc..2850df3 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.txt

@@ -1,6 +1,6 @@
 Interaction of Suspend code (S3) with the CPU hotplug infrastructure
 
-     (C) 2011 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+     (C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
 
 
 I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM

diff --git a/Documentation/powerpc/cpu_families.txt b/Documentation/powerpc/cpu_families.txt
new file mode 100644
index 0000000..fc08e22
--- /dev/null
+++ b/Documentation/powerpc/cpu_families.txt

@@ -0,0 +1,221 @@
+CPU Families
+============
+
+This document tries to summarise some of the different cpu families that exist
+and are supported by arch/powerpc.
+
+
+Book3S (aka sPAPR)
+------------------
+
+ - Hash MMU
+ - Mix of 32 & 64 bit
+
+   +--------------+                 +----------------+
+   |  Old POWER   | --------------> | RS64 (threads) |
+   +--------------+                 +----------------+
+          |
+          |
+          v
+   +--------------+                 +----------------+      +------+
+   |     601      | --------------> |      603       | ---> | e300 |
+   +--------------+                 +----------------+      +------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+      +-------+
+   |     604      |                 |    750 (G3)    | ---> | 750CX |
+   +--------------+                 +----------------+      +-------+
+          |                                 |                   |
+          |                                 |                   |
+          v                                 v                   v
+   +--------------+                 +----------------+      +-------+
+   | 620 (64 bit) |                 |      7400      |      | 750CL |
+   +--------------+                 +----------------+      +-------+
+          |                                 |                   |
+          |                                 |                   |
+          v                                 v                   v
+   +--------------+                 +----------------+      +-------+
+   |  POWER3/630  |                 |      7410      |      | 750FX |
+   +--------------+                 +----------------+      +-------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+
+   |   POWER3+    |                 |      7450      |
+   +--------------+                 +----------------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+
+   |    POWER4    |                 |      7455      |
+   +--------------+                 +----------------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+     +-------+   +----------------+
+   |   POWER4+    | --> |  970  |   |      7447      |
+   +--------------+     +-------+   +----------------+
+          |                 |               |
+          |                 |               |
+          v                 v               v
+   +--------------+     +-------+   +----------------+
+   |    POWER5    |     | 970FX |   |      7448      |
+   +--------------+     +-------+   +----------------+
+          |                 |               |
+          |                 |               |
+          v                 v               v
+   +--------------+     +-------+   +----------------+
+   |   POWER5+    |     | 970MP |   |      e600      |
+   +--------------+     +-------+   +----------------+
+          |
+          |
+          v
+   +--------------+
+   |   POWER5++   |
+   +--------------+
+          |
+          |
+          v
+   +--------------+       +-------+
+   |    POWER6    | <-?-> | Cell  |
+   +--------------+       +-------+
+          |
+          |
+          v
+   +--------------+
+   |    POWER7    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |   POWER7+    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |    POWER8    |
+   +--------------+
+
+
+   +---------------+
+   | PA6T (64 bit) |
+   +---------------+
+
+
+IBM BookE
+---------
+
+ - Software loaded TLB.
+ - All 32 bit
+
+   +--------------+
+   |     401      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     403      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     405      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     440      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+     +----------------+
+   |     450      | --> |      BG/P      |
+   +--------------+     +----------------+
+          |
+          |
+          v
+   +--------------+
+   |     460      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     476      |
+   +--------------+
+
+
+Motorola/Freescale 8xx
+----------------------
+
+ - Software loaded with hardware assist.
+ - All 32 bit
+
+   +-------------+
+   | MPC8xx Core |
+   +-------------+
+
+
+Freescale BookE
+---------------
+
+ - Software loaded TLB.
+ - e6500 adds HW loaded indirect TLB entries.
+ - Mix of 32 & 64 bit
+
+   +--------------+
+   |     e200     |
+   +--------------+
+
+
+   +--------------------------------+
+   |              e500              |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |             e500v2             |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |        e500mc (Book3e)         |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |          e5500 (64 bit)        |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   | e6500 (HW TLB) (Multithreaded) |
+   +--------------------------------+
+
+
+IBM A2 core
+-----------
+
+ - Book3E, software loaded TLB + HW loaded indirect TLB entries.
+ - 64 bit
+
+   +--------------+     +----------------+
+   |   A2 core    | --> |      WSP       |
+   +--------------+     +----------------+
+           |
+           |
+           v
+   +--------------+
+   |     BG/Q     |
+   +--------------+

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 93cb979..ca895fd 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt

@@ -19,7 +19,8 @@
 consumers to providers, as given in the following example:
 
 	static struct pwm_lookup board_pwm_lookup[] = {
-		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL),
+		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
+			   50000, PWM_POLARITY_NORMAL),
 	};
 
 	static void __init board_init(void)
@@ -97,6 +98,13 @@
 number of PWM devices provided by the chip and the chip-specific
 implementation of the supported PWM operations to the framework.
 
+When implementing polarity support in a PWM driver, make sure to respect the
+signal conventions in the PWM framework. By definition, normal polarity
+characterizes a signal starts high for the duration of the duty cycle and
+goes low for the remainder of the period. Conversely, a signal with inversed
+polarity starts low for the duration of the duty cycle and goes high for the
+remainder of the period.
+
 Locking
 -------
 

diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx
index 5020b7b..52f0b43 100644
--- a/Documentation/scsi/LICENSE.qla2xxx
+++ b/Documentation/scsi/LICENSE.qla2xxx

@@ -1,4 +1,4 @@
-Copyright (c) 2003-2013 QLogic Corporation
+Copyright (c) 2003-2014 QLogic Corporation
 QLogic Linux FC-FCoE Driver
 
 This program includes a device driver for Linux 3.x.

diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt
index 5ea996f..b6ef7e9 100644
--- a/Documentation/security/Smack.txt
+++ b/Documentation/security/Smack.txt

@@ -204,6 +204,16 @@
 	these capabilities are effective at for processes with any
 	label. The value is set by writing the desired label to the
 	file or cleared by writing "-" to the file.
+ptrace
+	This is used to define the current ptrace policy
+	0 - default: this is the policy that relies on smack access rules.
+	    For the PTRACE_READ a subject needs to have a read access on
+	    object. For the PTRACE_ATTACH a read-write access is required.
+	1 - exact: this is the policy that limits PTRACE_ATTACH. Attach is
+	    only allowed when subject's and object's labels are equal.
+	    PTRACE_READ is not affected. Can be overriden with CAP_SYS_PTRACE.
+	2 - draconian: this policy behaves like the 'exact' above with an
+	    exception that it can't be overriden with CAP_SYS_PTRACE.
 revoke-subject
 	Writing a Smack label here sets the access to '-' for all access
 	rules with that subject label.

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bd36598..2479b2a 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt

@@ -2003,6 +2003,32 @@
   360.774530 |   1)   0.594 us    |                                          __phys_addr();
 
 
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+	hide: echo nofuncgraph-tail > trace_options
+	show: echo funcgraph-tail > trace_options
+
+  Example with nofuncgraph-tail (default):
+  0)               |      putname() {
+  0)               |        kmem_cache_free() {
+  0)   0.518 us    |          __phys_addr();
+  0)   1.757 us    |        }
+  0)   2.861 us    |      }
+
+  Example with funcgraph-tail:
+  0)               |      putname() {
+  0)               |        kmem_cache_free() {
+  0)   0.518 us    |          __phys_addr();
+  0)   1.757 us    |        } /* kmem_cache_free() */
+  0)   2.861 us    |      } /* putname() */
+
 You can put some comments on specific functions by using
 trace_printk() For example, if you want to put a comment inside
 the __might_sleep() function, you just have to include

diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
index 6b018b5..a3efac6 100644
--- a/Documentation/trace/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt

@@ -115,6 +115,30 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
 used to export the defined tracepoints.
 
+If you need to do a bit of work for a tracepoint parameter, and
+that work is only used for the tracepoint, that work can be encapsulated
+within an if statement with the following:
+
+	if (trace_foo_bar_enabled()) {
+		int i;
+		int tot = 0;
+
+		for (i = 0; i < count; i++)
+			tot += calculate_nuggets();
+
+		trace_foo_bar(tot);
+	}
+
+All trace_<tracepoint>() calls have a matching trace_<tracepoint>_enabled()
+function defined that returns true if the tracepoint is enabled and
+false otherwise. The trace_<tracepoint>() should always be within the
+block of the if (trace_<tracepoint>_enabled()) to prevent races between
+the tracepoint being enabled and the check being seen.
+
+The advantage of using the trace_<tracepoint>_enabled() is that it uses
+the static_key of the tracepoint to allow the if statement to be implemented
+with jump labels and avoid conditional branches.
+
 Note: The convenience macro TRACE_EVENT provides an alternative way to
       define tracepoints. Check http://lwn.net/Articles/379903,
       http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362

diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c
index 8587020..1dbb4b8 100644
--- a/Documentation/vDSO/parse_vdso.c
+++ b/Documentation/vDSO/parse_vdso.c

@@ -1,6 +1,6 @@
 /*
  * parse_vdso.c: Linux reference vDSO parser
- * Written by Andrew Lutomirski, 2011.
+ * Written by Andrew Lutomirski, 2011-2014.
  *
  * This code is meant to be linked in to various programs that run on Linux.
  * As such, it is available with as few restrictions as possible.  This file
@@ -11,13 +11,14 @@
  * it starts a program.  It works equally well in statically and dynamically
  * linked binaries.
  *
- * This code is tested on x86_64.  In principle it should work on any 64-bit
+ * This code is tested on x86.  In principle it should work on any
  * architecture that has a vDSO.
  */
 
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
+#include <limits.h>
 #include <elf.h>
 
 /*
@@ -45,11 +46,18 @@
 
 
 /* And here's the code. */
-
-#ifndef __x86_64__
-# error Not yet ported to non-x86_64 architectures
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+#  define ELF_BITS 64
+# else
+#  define ELF_BITS 32
+# endif
 #endif
 
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
 static struct vdso_info
 {
 	bool valid;
@@ -59,14 +67,14 @@
 	uintptr_t load_offset;  /* load_addr - recorded vaddr */
 
 	/* Symbol table */
-	Elf64_Sym *symtab;
+	ELF(Sym) *symtab;
 	const char *symstrings;
-	Elf64_Word *bucket, *chain;
-	Elf64_Word nbucket, nchain;
+	ELF(Word) *bucket, *chain;
+	ELF(Word) nbucket, nchain;
 
 	/* Version table */
-	Elf64_Versym *versym;
-	Elf64_Verdef *verdef;
+	ELF(Versym) *versym;
+	ELF(Verdef) *verdef;
 } vdso_info;
 
 /* Straight from the ELF specification. */
@@ -92,9 +100,14 @@
 
 	vdso_info.load_addr = base;
 
-	Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
-	Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
-	Elf64_Dyn *dyn = 0;
+	ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+	if (hdr->e_ident[EI_CLASS] !=
+	    (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+		return;  /* Wrong ELF class -- check ELF_BITS */
+	}
+
+	ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+	ELF(Dyn) *dyn = 0;
 
 	/*
 	 * We need two things from the segment table: the load offset
@@ -108,7 +121,7 @@
 				+ (uintptr_t)pt[i].p_offset
 				- (uintptr_t)pt[i].p_vaddr;
 		} else if (pt[i].p_type == PT_DYNAMIC) {
-			dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
+			dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
 		}
 	}
 
@@ -118,7 +131,7 @@
 	/*
 	 * Fish out the useful bits of the dynamic table.
 	 */
-	Elf64_Word *hash = 0;
+	ELF(Word) *hash = 0;
 	vdso_info.symstrings = 0;
 	vdso_info.symtab = 0;
 	vdso_info.versym = 0;
@@ -131,22 +144,22 @@
 				 + vdso_info.load_offset);
 			break;
 		case DT_SYMTAB:
-			vdso_info.symtab = (Elf64_Sym *)
+			vdso_info.symtab = (ELF(Sym) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_HASH:
-			hash = (Elf64_Word *)
+			hash = (ELF(Word) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_VERSYM:
-			vdso_info.versym = (Elf64_Versym *)
+			vdso_info.versym = (ELF(Versym) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_VERDEF:
-			vdso_info.verdef = (Elf64_Verdef *)
+			vdso_info.verdef = (ELF(Verdef) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
@@ -168,8 +181,8 @@
 	vdso_info.valid = true;
 }
 
-static bool vdso_match_version(Elf64_Versym ver,
-			       const char *name, Elf64_Word hash)
+static bool vdso_match_version(ELF(Versym) ver,
+			       const char *name, ELF(Word) hash)
 {
 	/*
 	 * This is a helper function to check if the version indexed by
@@ -188,7 +201,7 @@
 
 	/* First step: find the version definition */
 	ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */
-	Elf64_Verdef *def = vdso_info.verdef;
+	ELF(Verdef) *def = vdso_info.verdef;
 	while(true) {
 		if ((def->vd_flags & VER_FLG_BASE) == 0
 		    && (def->vd_ndx & 0x7fff) == ver)
@@ -197,11 +210,11 @@
 		if (def->vd_next == 0)
 			return false;  /* No definition. */
 
-		def = (Elf64_Verdef *)((char *)def + def->vd_next);
+		def = (ELF(Verdef) *)((char *)def + def->vd_next);
 	}
 
 	/* Now figure out whether it matches. */
-	Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
+	ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
 	return def->vd_hash == hash
 		&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
 }
@@ -213,10 +226,10 @@
 		return 0;
 
 	ver_hash = elf_hash(version);
-	Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+	ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
 
 	for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
-		Elf64_Sym *sym = &vdso_info.symtab[chain];
+		ELF(Sym) *sym = &vdso_info.symtab[chain];
 
 		/* Check for a defined global or weak function w/ right name. */
 		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
@@ -243,7 +256,7 @@
 
 void vdso_init_from_auxv(void *auxv)
 {
-	Elf64_auxv_t *elf_auxv = auxv;
+	ELF(auxv_t) *elf_auxv = auxv;
 	for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
 	{
 		if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {

diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 0000000..d462402
--- /dev/null
+++ b/Documentation/vDSO/vdso_standalone_test_x86.c

@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ *      vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary.  On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+	/* This implementation is buggy: it never returns -1. */
+	while (*a || *b) {
+		if (*a != *b)
+			return 1;
+		if (*a == 0 || *b == 0)
+			return 1;
+		a++;
+		b++;
+	}
+
+	return 0;
+}
+
+/* ...and two syscalls.  This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+	long ret;
+#ifdef __x86_64__
+	asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+		      "D" (a0), "S" (a1), "d" (a2) :
+		      "cc", "memory", "rcx",
+		      "r8", "r9", "r10", "r11" );
+#else
+	asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+		      "b" (a0), "c" (a1), "d" (a2) :
+		      "cc", "memory" );
+#endif
+	return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+	return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+	x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, uint64_t n)
+{
+	while (n) {
+		*lastdig = (n % 10) + '0';
+		n /= 10;
+		lastdig--;
+	}
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+	/* Parse the stack */
+	long argc = (long)*stack;
+	stack += argc + 2;
+
+	/* Now we're pointing at the environment.  Skip it. */
+	while(*stack)
+		stack++;
+	stack++;
+
+	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
+	vdso_init_from_auxv((void *)stack);
+
+	/* Find gettimeofday. */
+	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+	if (!gtod)
+		linux_exit(1);
+
+	struct timeval tv;
+	long ret = gtod(&tv, 0);
+
+	if (ret == 0) {
+		char buf[] = "The time is                     .000000\n";
+		to_base10(buf + 31, tv.tv_sec);
+		to_base10(buf + 38, tv.tv_usec);
+		linux_write(1, buf, sizeof(buf) - 1);
+	} else {
+		linux_exit(ret);
+	}
+
+	linux_exit(0);
+}
+
+/*
+ * This is the real entry point.  It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+	".text\n"
+	".global _start\n"
+	".type _start,@function\n"
+	"_start:\n\t"
+#ifdef __x86_64__
+	"mov %rsp,%rdi\n\t"
+	"jmp c_main"
+#else
+	"push %esp\n\t"
+	"call c_main\n\t"
+	"int $3"
+#endif
+	);

diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c
index fff6334..8daeb7d 100644
--- a/Documentation/vDSO/vdso_test.c
+++ b/Documentation/vDSO/vdso_test.c

@@ -1,111 +1,52 @@
 /*
- * vdso_test.c: Sample code to test parse_vdso.c on x86_64
- * Copyright (c) 2011 Andy Lutomirski
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
  * Subject to the GNU General Public License, version 2
  *
- * You can amuse yourself by compiling with:
- * gcc -std=gnu99 -nostdlib
- *     -Os -fno-asynchronous-unwind-tables -flto
- *      vdso_test.c parse_vdso.c -o vdso_test
- * to generate a small binary with no dependencies at all.
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
  */
 
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <unistd.h>
 #include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
 
 extern void *vdso_sym(const char *version, const char *name);
 extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
 extern void vdso_init_from_auxv(void *auxv);
 
-/* We need a libc functions... */
-int strcmp(const char *a, const char *b)
+int main(int argc, char **argv)
 {
-	/* This implementation is buggy: it never returns -1. */
-	while (*a || *b) {
-		if (*a != *b)
-			return 1;
-		if (*a == 0 || *b == 0)
-			return 1;
-		a++;
-		b++;
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return 0;
 	}
 
-	return 0;
-}
-
-/* ...and two syscalls.  This is x86_64-specific. */
-static inline long linux_write(int fd, const void *data, size_t len)
-{
-
-	long ret;
-	asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write),
-		      "D" (fd), "S" (data), "d" (len) :
-		      "cc", "memory", "rcx",
-		      "r8", "r9", "r10", "r11" );
-	return ret;
-}
-
-static inline void linux_exit(int code)
-{
-	asm volatile ("syscall" : : "a" (__NR_exit), "D" (code));
-}
-
-void to_base10(char *lastdig, uint64_t n)
-{
-	while (n) {
-		*lastdig = (n % 10) + '0';
-		n /= 10;
-		lastdig--;
-	}
-}
-
-__attribute__((externally_visible)) void c_main(void **stack)
-{
-	/* Parse the stack */
-	long argc = (long)*stack;
-	stack += argc + 2;
-
-	/* Now we're pointing at the environment.  Skip it. */
-	while(*stack)
-		stack++;
-	stack++;
-
-	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
-	vdso_init_from_auxv((void *)stack);
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
 
 	/* Find gettimeofday. */
 	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
 	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
 
-	if (!gtod)
-		linux_exit(1);
+	if (!gtod) {
+		printf("Could not find __vdso_gettimeofday\n");
+		return 1;
+	}
 
 	struct timeval tv;
 	long ret = gtod(&tv, 0);
 
 	if (ret == 0) {
-		char buf[] = "The time is                     .000000\n";
-		to_base10(buf + 31, tv.tv_sec);
-		to_base10(buf + 38, tv.tv_usec);
-		linux_write(1, buf, sizeof(buf) - 1);
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
 	} else {
-		linux_exit(ret);
+		printf("__vdso_gettimeofday failed\n");
 	}
 
-	linux_exit(0);
+	return 0;
 }
-
-/*
- * This is the real entry point.  It passes the initial stack into
- * the C entry point.
- */
-asm (
-	".text\n"
-	".global _start\n"
-        ".type _start,@function\n"
-        "_start:\n\t"
-        "mov %rsp,%rdi\n\t"
-        "jmp c_main"
-	);

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b22565..055f952 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -604,6 +604,13 @@
 S:	Maintained
 F:	arch/x86/kernel/microcode_amd.c
 
+AMD XGBE DRIVER
+M:	Tom Lendacky <thomas.lendacky@amd.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/amd/xgbe/
+F:	drivers/net/phy/amd-xgbe-phy.c
+
 AMS (Apple Motion Sensor) DRIVER
 M:	Michael Hanselmann <linux-kernel@hansmi.ch>
 S:	Supported
@@ -1894,7 +1901,7 @@
 F:	drivers/net/ethernet/broadcom/bnx2_*
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Ariel Elior <ariele@broadcom.com>
+M:	Ariel Elior <ariel.elior@qlogic.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2x/
@@ -1974,6 +1981,12 @@
 F:	drivers/bcma/
 F:	include/linux/bcma/
 
+BROADCOM SYSTEMPORT ETHERNET DRIVER
+M:	Florian Fainelli <f.fainelli@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/broadcom/bcmsysport.*
+
 BROCADE BFA FC SCSI DRIVER
 M:	Anil Gurumurthy <anil.gurumurthy@qlogic.com>
 M:	Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
@@ -2230,9 +2243,8 @@
 CISCO VIC ETHERNET NIC DRIVER
 M:	Christian Benvenuti <benve@cisco.com>
 M:	Sujith Sankar <ssujith@cisco.com>
-M:	Govindarajulu Varadarajan <govindarajulu90@gmail.com>
+M:	Govindarajulu Varadarajan <_govind@gmx.com>
 M:	Neel Patel <neepatel@cisco.com>
-M:	Nishank Trivedi <nistrive@cisco.com>
 S:	Supported
 F:	drivers/net/ethernet/cisco/enic/
 
@@ -2384,16 +2396,35 @@
 S:	Maintained
 F:	drivers/connector/
 
-CONTROL GROUPS (CGROUPS)
+CONTROL GROUP (CGROUP)
 M:	Tejun Heo <tj@kernel.org>
 M:	Li Zefan <lizefan@huawei.com>
-L:	containers@lists.linux-foundation.org
 L:	cgroups@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
 S:	Maintained
+F:	Documentation/cgroups/
 F:	include/linux/cgroup*
 F:	kernel/cgroup*
-F:	mm/*cgroup*
+
+CONTROL GROUP - CPUSET
+M:	Li Zefan <lizefan@huawei.com>
+L:	cgroups@vger.kernel.org
+W:	http://www.bullopensource.org/cpuset/
+W:	http://oss.sgi.com/projects/cpusets/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
+S:	Maintained
+F:	Documentation/cgroups/cpusets.txt
+F:	include/linux/cpuset.h
+F:	kernel/cpuset.c
+
+CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
+M:	Johannes Weiner <hannes@cmpxchg.org>
+M:	Michal Hocko <mhocko@suse.cz>
+L:	cgroups@vger.kernel.org
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	mm/memcontrol.c
+F:	mm/page_cgroup.c
 
 CORETEMP HARDWARE MONITORING DRIVER
 M:	Fenghua Yu <fenghua.yu@intel.com>
@@ -2464,17 +2495,6 @@
 S:	Maintained
 F:	tools/power/cpupower/
 
-CPUSETS
-M:	Li Zefan <lizefan@huawei.com>
-L:	cgroups@vger.kernel.org
-W:	http://www.bullopensource.org/cpuset/
-W:	http://oss.sgi.com/projects/cpusets/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
-S:	Maintained
-F:	Documentation/cgroups/cpusets.txt
-F:	include/linux/cpuset.h
-F:	kernel/cpuset.c
-
 CRAMFS FILESYSTEM
 W:	http://sourceforge.net/projects/cramfs/
 S:	Orphan / Obsolete
@@ -2692,6 +2712,15 @@
 F:	Documentation/networking/decnet.txt
 F:	net/decnet/
 
+DECSTATION PLATFORM SUPPORT
+M:	"Maciej W. Rozycki" <macro@linux-mips.org>
+L:	linux-mips@linux-mips.org
+W:	http://www.linux-mips.org/wiki/DECstation
+S:	Maintained
+F:	arch/mips/dec/
+F:	arch/mips/include/asm/dec/
+F:	arch/mips/include/asm/mach-dec/
+
 DEFXX FDDI NETWORK DRIVER
 M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 S:	Maintained
@@ -2935,6 +2964,7 @@
 T:	git git://people.freedesktop.org/~airlied/linux
 S:	Maintained
 F:	drivers/gpu/drm/
+F:	drivers/gpu/vga/
 F:	include/drm/
 F:	include/uapi/drm/
 
@@ -3775,7 +3805,8 @@
 F:	include/linux/fscache*.h
 
 F2FS FILE SYSTEM
-M:	Jaegeuk Kim <jaegeuk.kim@samsung.com>
+M:	Jaegeuk Kim <jaegeuk@kernel.org>
+M:	Changman Lee <cm224.lee@samsung.com>
 L:	linux-f2fs-devel@lists.sourceforge.net
 W:	http://en.wikipedia.org/wiki/F2FS
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
@@ -4373,7 +4404,7 @@
 F:	drivers/crypto/nx/
 
 IBM Power 842 compression accelerator
-M:	Robert Jennings <rcj@linux.vnet.ibm.com>
+M:	Nathan Fontenot <nfont@linux.vnet.ibm.com>
 S:	Supported
 F:	drivers/crypto/nx/nx-842.c
 F:	include/linux/nx842.h
@@ -4389,12 +4420,18 @@
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmveth.*
 
-IBM Power Virtual SCSI/FC Device Drivers
-M:	Robert Jennings <rcj@linux.vnet.ibm.com>
+IBM Power Virtual SCSI Device Drivers
+M:	Nathan Fontenot <nfont@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
-F:	drivers/scsi/ibmvscsi/
-X:	drivers/scsi/ibmvscsi/ibmvstgt.c
+F:	drivers/scsi/ibmvscsi/ibmvscsi*
+F:	drivers/scsi/ibmvscsi/viosrp.h
+
+IBM Power Virtual FC Device Drivers
+M:	Brian King <brking@linux.vnet.ibm.com>
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/ibmvscsi/ibmvfc*
 
 IBM ServeRAID RAID DRIVER
 P:	Jack Hammer
@@ -5757,17 +5794,6 @@
 F:	include/linux/vmalloc.h
 F:	mm/
 
-MEMORY RESOURCE CONTROLLER
-M:	Johannes Weiner <hannes@cmpxchg.org>
-M:	Michal Hocko <mhocko@suse.cz>
-M:	Balbir Singh <bsingharora@gmail.com>
-M:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
-L:	cgroups@vger.kernel.org
-L:	linux-mm@kvack.org
-S:	Maintained
-F:	mm/memcontrol.c
-F:	mm/page_cgroup.c
-
 MEMORY TECHNOLOGY DEVICES (MTD)
 M:	David Woodhouse <dwmw2@infradead.org>
 M:	Brian Norris <computersforpeace@gmail.com>
@@ -5961,6 +5987,7 @@
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-mmc@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
+T:	git git://git.linaro.org/people/ulf.hansson/mmc.git
 S:	Maintained
 F:	drivers/mmc/
 F:	include/linux/mmc/
@@ -6022,6 +6049,28 @@
 S:	Odd Fixes
 F:	fs/ncpfs/
 
+NCR 5380 SCSI DRIVERS
+M:	Finn Thain <fthain@telegraphics.com.au>
+M:	Michael Schmitz <schmitzmic@gmail.com>
+L:	linux-scsi@vger.kernel.org
+S:	Maintained
+F:	Documentation/scsi/g_NCR5380.txt
+F:	drivers/scsi/NCR5380.*
+F:	drivers/scsi/arm/cumana_1.c
+F:	drivers/scsi/arm/oak.c
+F:	drivers/scsi/atari_NCR5380.c
+F:	drivers/scsi/atari_scsi.*
+F:	drivers/scsi/dmx3191d.c
+F:	drivers/scsi/dtc.*
+F:	drivers/scsi/g_NCR5380.*
+F:	drivers/scsi/g_NCR5380_mmio.c
+F:	drivers/scsi/mac_scsi.*
+F:	drivers/scsi/pas16.*
+F:	drivers/scsi/sun3_NCR5380.c
+F:	drivers/scsi/sun3_scsi.*
+F:	drivers/scsi/sun3_scsi_vme.c
+F:	drivers/scsi/t128.*
+
 NCR DUAL 700 SCSI DRIVER (MICROCHANNEL)
 M:	"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
 L:	linux-scsi@vger.kernel.org
@@ -6131,6 +6180,7 @@
 F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
+F:	lib/test_bpf.c
 
 NETWORKING [IPv4/IPv6]
 M:	"David S. Miller" <davem@davemloft.net>
@@ -9068,7 +9118,7 @@
 F:	include/uapi/linux/toshiba.h
 
 TMIO MMC DRIVER
-M:	Ian Molton <ian@mnementh.co.uk>
+M:	Ian Molton <ian.molton@codethink.co.uk>
 L:	linux-mmc@vger.kernel.org
 S:	Maintained
 F:	drivers/mmc/host/tmio_mmc*
@@ -9109,7 +9159,6 @@
 
 TRACING
 M:	Steven Rostedt <rostedt@goodmis.org>
-M:	Frederic Weisbecker <fweisbec@gmail.com>
 M:	Ingo Molnar <mingo@redhat.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:	Maintained

diff --git a/Makefile b/Makefile
index c761fb1..7680d7c 100644
--- a/Makefile
+++ b/Makefile

@@ -105,10 +105,6 @@
   KBUILD_OUTPUT := $(O)
 endif
 
-ifeq ("$(origin W)", "command line")
-  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
-endif
-
 # That's our default target when none is given on the command line
 PHONY := _all
 _all:
@@ -153,8 +149,18 @@
 _all: modules
 endif
 
-srctree		:= $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR))
-objtree		:= $(CURDIR)
+ifeq ($(KBUILD_SRC),)
+        # building in the source tree
+        srctree := .
+else
+        ifeq ($(KBUILD_SRC)/,$(dir $(CURDIR)))
+                # building in a subdirectory of the source tree
+                srctree := ..
+        else
+                srctree := $(KBUILD_SRC)
+        endif
+endif
+objtree		:= .
 src		:= $(srctree)
 obj		:= $(objtree)
 
@@ -166,7 +172,7 @@
 # SUBARCH tells the usermode build what the underlying arch is.  That is set
 # first, and if a usermode build is happening, the "ARCH=um" on the command
 # line overrides the setting of ARCH below.  If a native build is happening,
-# then ARCH is assigned, getting whatever value it gets normally, and 
+# then ARCH is assigned, getting whatever value it gets normally, and
 # SUBARCH is subsequently ignored.
 
 SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
@@ -259,18 +265,18 @@
 KBUILD_MODULES :=
 KBUILD_BUILTIN := 1
 
-#	If we have only "make modules", don't compile built-in objects.
-#	When we're building modules with modversions, we need to consider
-#	the built-in objects during the descend as well, in order to
-#	make sure the checksums are up to date before we record them.
+# If we have only "make modules", don't compile built-in objects.
+# When we're building modules with modversions, we need to consider
+# the built-in objects during the descend as well, in order to
+# make sure the checksums are up to date before we record them.
 
 ifeq ($(MAKECMDGOALS),modules)
   KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
 endif
 
-#	If we have "make <whatever> modules", compile modules
-#	in addition to whatever we do anyway.
-#	Just "make" or "make all" shall build modules as well
+# If we have "make <whatever> modules", compile modules
+# in addition to whatever we do anyway.
+# Just "make" or "make all" shall build modules as well
 
 ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
@@ -294,7 +300,7 @@
 #         cmd_cc_o_c       = $(CC) $(c_flags) -c -o $@ $<
 #
 # If $(quiet) is empty, the whole command will be printed.
-# If it is set to "quiet_", only the short version will be printed. 
+# If it is set to "quiet_", only the short version will be printed.
 # If it is set to "silent_", nothing will be printed at all, since
 # the variable $(silent_cmd_cc_o_c) doesn't exist.
 #
@@ -346,7 +352,6 @@
 include $(srctree)/scripts/Kbuild.include
 
 # Make variables (CC, etc...)
-
 AS		= $(CROSS_COMPILE)as
 LD		= $(CROSS_COMPILE)ld
 CC		= $(CROSS_COMPILE)gcc
@@ -395,8 +400,8 @@
 KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		   -fno-strict-aliasing -fno-common \
 		   -Werror-implicit-function-declaration \
-		   -Wno-format-security \
-		   $(call cc-option,-fno-delete-null-pointer-checks,)
+		   -Wno-format-security
+
 KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
 KBUILD_AFLAGS   := -D__ASSEMBLY__
@@ -504,8 +509,16 @@
 # We're called with mixed targets (*config and build targets).
 # Handle them one by one.
 
-%:: FORCE
-	$(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@
+PHONY += $(MAKECMDGOALS) __build_one_by_one
+
+$(filter-out __build_one_by_one, $(MAKECMDGOALS)): __build_one_by_one
+	@:
+
+__build_one_by_one:
+	$(Q)set -e; \
+	for i in $(MAKECMDGOALS); do \
+		$(MAKE) -f $(srctree)/Makefile $$i; \
+	done
 
 else
 ifeq ($(config-targets),1)
@@ -520,11 +533,9 @@
 export KBUILD_DEFCONFIG KBUILD_KCONFIG
 
 config: scripts_basic outputmakefile FORCE
-	$(Q)mkdir -p include/linux include/config
 	$(Q)$(MAKE) $(build)=scripts/kconfig $@
 
 %config: scripts_basic outputmakefile FORCE
-	$(Q)mkdir -p include/linux include/config
 	$(Q)$(MAKE) $(build)=scripts/kconfig $@
 
 else
@@ -594,14 +605,16 @@
 # Defaults to vmlinux, but the arch makefile usually adds further targets
 all: vmlinux
 
+include $(srctree)/arch/$(SRCARCH)/Makefile
+
+KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
+
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 KBUILD_CFLAGS	+= -O2
 endif
 
-include $(srctree)/arch/$(SRCARCH)/Makefile
-
 ifdef CONFIG_READABLE_ASM
 # Disable optimizations that make assembler listings hard to read.
 # reorder blocks reorders the control in the function
@@ -731,6 +744,8 @@
 	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
+include $(srctree)/scripts/Makefile.extrawarn
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 KBUILD_CPPFLAGS += $(KCPPFLAGS)
 KBUILD_AFLAGS += $(KAFLAGS)
@@ -775,10 +790,10 @@
 export MODLIB
 
 #
-#  INSTALL_MOD_STRIP, if defined, will cause modules to be
-#  stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
-#  the default option --strip-debug will be used.  Otherwise,
-#  INSTALL_MOD_STRIP value will be used as the options to the strip command.
+# INSTALL_MOD_STRIP, if defined, will cause modules to be
+# stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
+# the default option --strip-debug will be used.  Otherwise,
+# INSTALL_MOD_STRIP value will be used as the options to the strip command.
 
 ifdef INSTALL_MOD_STRIP
 ifeq ($(INSTALL_MOD_STRIP),1)
@@ -863,7 +878,7 @@
 endif
 	+$(call if_changed,link-vmlinux)
 
-# The actual objects are generated when descending, 
+# The actual objects are generated when descending,
 # make sure no implicit rule kicks in
 $(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
 
@@ -1021,11 +1036,11 @@
 
 all: modules
 
-#	Build modules
+# Build modules
 #
-#	A module can be listed more than once in obj-m resulting in
-#	duplicate lines in modules.order files.  Those are removed
-#	using awk while concatenating to the final file.
+# A module can be listed more than once in obj-m resulting in
+# duplicate lines in modules.order files.  Those are removed
+# using awk while concatenating to the final file.
 
 PHONY += modules
 modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin
@@ -1054,10 +1069,10 @@
 	@rm -rf $(MODLIB)/kernel
 	@rm -f $(MODLIB)/source
 	@mkdir -p $(MODLIB)/kernel
-	@ln -s $(srctree) $(MODLIB)/source
+	@ln -s `cd $(srctree) && /bin/pwd` $(MODLIB)/source
 	@if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
 		rm -f $(MODLIB)/build ; \
-		ln -s $(objtree) $(MODLIB)/build ; \
+		ln -s $(CURDIR) $(MODLIB)/build ; \
 	fi
 	@cp -f $(objtree)/modules.order $(MODLIB)/
 	@cp -f $(objtree)/modules.builtin $(MODLIB)/
@@ -1104,7 +1119,7 @@
 
 # Directories & files removed with 'make mrproper'
 MRPROPER_DIRS  += include/config usr/include include/generated          \
-                  arch/*/include/generated .tmp_objdiff
+		  arch/*/include/generated .tmp_objdiff
 MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
 		  signing_key.priv signing_key.x509 x509.genkey		\
@@ -1478,7 +1493,7 @@
 	$(build)=$(build-dir) $(@:.ko=.o)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
 
-# FIXME Should go into a make.lib or something 
+# FIXME Should go into a make.lib or something
 # ===========================================================================
 
 quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN   $(wildcard $(rm-dirs)))

diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
index bcf662d..5bb2fda 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts

@@ -17,7 +17,7 @@
 	interrupt-parent = <&intc>;
 
 	chosen {
-		bootargs = "console=ttyARC0,115200n8";
+		bootargs = "console=ttyARC0,115200n8 earlyprintk=ttyARC0";
 	};
 
 	aliases {

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 2fd3162..c1d3d2d 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h

@@ -55,4 +55,31 @@
 
 #endif	/* !__ASSEMBLY__ */
 
+/* Instruction cache related Auxiliary registers */
+#define ARC_REG_IC_BCR		0x77	/* Build Config reg */
+#define ARC_REG_IC_IVIC		0x10
+#define ARC_REG_IC_CTRL		0x11
+#define ARC_REG_IC_IVIL		0x19
+#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4)
+#define ARC_REG_IC_PTAG		0x1E
+#endif
+
+/* Bit val in IC_CTRL */
+#define IC_CTRL_CACHE_DISABLE   0x1
+
+/* Data cache related Auxiliary registers */
+#define ARC_REG_DC_BCR		0x72	/* Build Config reg */
+#define ARC_REG_DC_IVDC		0x47
+#define ARC_REG_DC_CTRL		0x48
+#define ARC_REG_DC_IVDL		0x4A
+#define ARC_REG_DC_FLSH		0x4B
+#define ARC_REG_DC_FLDL		0x4C
+#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4)
+#define ARC_REG_DC_PTAG		0x5C
+#endif
+
+/* Bit val in DC_CTRL */
+#define DC_CTRL_INV_MODE_FLUSH  0x40
+#define DC_CTRL_FLUSH_STATUS    0x100
+
 #endif /* _ASM_CACHE_H */

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 291a70d..fb4efb6 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h

@@ -19,8 +19,6 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
-extern int get_hw_config_num_irq(void);
-
-void arc_local_timer_setup(unsigned int cpu);
+void arc_local_timer_setup(void);
 
 #endif

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 15334ab..d99f9b3 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h

@@ -18,7 +18,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/arcregs.h>	/* for STATUS_E1_MASK et all */
 #include <asm/ptrace.h>
 
 /* Arch specific stuff which needs to be saved per task.
@@ -41,15 +40,13 @@
 /* Forward declaration, a strange C thing */
 struct task_struct;
 
-/*
- * Return saved PC of a blocked thread.
- */
+/* Return saved PC of a blocked thread  */
 unsigned long thread_saved_pc(struct task_struct *t);
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
 
-/* Free all resources held by a thread. */
+/* Free all resources held by a thread */
 #define release_thread(thread) do { } while (0)
 
 /* Prepare to copy thread state - unlazy all lazy status */
@@ -82,26 +79,8 @@
 #define KSTK_BLINK(tsk) KSTK_REG(tsk, 4)
 #define KSTK_FP(tsk)    KSTK_REG(tsk, 0)
 
-/*
- * Do necessary setup to start up a newly executed thread.
- *
- * E1,E2 so that Interrupts are enabled in user mode
- * L set, so Loop inhibited to begin with
- * lp_start and lp_end seeded with bogus non-zero values so to easily catch
- * the ARC700 sr to lp_start hardware bug
- */
-#define start_thread(_regs, _pc, _usp)				\
-do {								\
-	set_fs(USER_DS); /* reads from user space */		\
-	(_regs)->ret = (_pc);					\
-	/* Interrupts enabled in User Mode */			\
-	(_regs)->status32 = STATUS_U_MASK | STATUS_L_MASK	\
-		| STATUS_E1_MASK | STATUS_E2_MASK;		\
-	(_regs)->sp = (_usp);					\
-	/* bogus seed values for debugging */			\
-	(_regs)->lp_start = 0x10;				\
-	(_regs)->lp_end = 0x80;					\
-} while (0)
+extern void start_thread(struct pt_regs * regs, unsigned long pc,
+			 unsigned long usp);
 
 extern unsigned int get_wchan(struct task_struct *p);
 

diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index 18fefae..f50d02d 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild

@@ -2,11 +2,4 @@
 include include/uapi/asm-generic/Kbuild.asm
 header-y += elf.h
 header-y += page.h
-header-y += setup.h
-header-y += byteorder.h
 header-y += cachectl.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += swab.h
-header-y += unistd.h

diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 29b82ad..83a046a 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S

@@ -156,7 +156,7 @@
 int1_saved_reg:
 	.zero 4
 
-/* Each Interrupt level needs it's own scratch */
+/* Each Interrupt level needs its own scratch */
 #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
 
 ARCFP_DATA int2_saved_reg
@@ -473,7 +473,7 @@
 	lr  r0, [efa]
 	mov r1, sp
 
-	; Now that we have read EFA, its safe to do "fake" rtie
+	; Now that we have read EFA, it is safe to do "fake" rtie
 	;   and get out of CPU exception mode
 	FAKE_RET_FROM_EXCPN r11
 
@@ -678,9 +678,9 @@
 	brne r9, event_IRQ2, 149f
 
 	;------------------------------------------------------------------
-	; if L2 IRQ interrupted a L1 ISR,  we'd disbaled preemption earlier
-	; so that sched doesnt move to new task, causing L1 to be delayed
-	; undeterministically. Now that we've achieved that, lets reset
+	; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
+	; so that sched doesn't move to new task, causing L1 to be delayed
+	; undeterministically. Now that we've achieved that, let's reset
 	; things to what they were, before returning from L2 context
 	;----------------------------------------------------------------
 
@@ -736,7 +736,7 @@
 	; put last task in scheduler queue
 	bl   @schedule_tail
 
-	; If kernel thread, jump to it's entry-point
+	; If kernel thread, jump to its entry-point
 	ld   r9, [sp, PT_status32]
 	brne r9, 0, 1f
 

diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 4ad0491..07a58f2 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S

@@ -12,10 +12,42 @@
  *      to skip certain things during boot on simulator
  */
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/entry.h>
-#include <linux/linkage.h>
 #include <asm/arcregs.h>
+#include <asm/cache.h>
+
+.macro CPU_EARLY_SETUP
+
+	; Setting up Vectror Table (in case exception happens in early boot
+	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+
+	; Disable I-cache/D-cache if kernel so configured
+	lr	r5, [ARC_REG_IC_BCR]
+	breq    r5, 0, 1f		; I$ doesn't exist
+	lr	r5, [ARC_REG_IC_CTRL]
+#ifdef CONFIG_ARC_HAS_ICACHE
+	bclr	r5, r5, 0		; 0 - Enable, 1 is Disable
+#else
+	bset	r5, r5, 0		; I$ exists, but is not used
+#endif
+	sr	r5, [ARC_REG_IC_CTRL]
+
+1:
+	lr	r5, [ARC_REG_DC_BCR]
+	breq    r5, 0, 1f		; D$ doesn't exist
+	lr	r5, [ARC_REG_DC_CTRL]
+	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
+#ifdef CONFIG_ARC_HAS_DCACHE
+	bclr	r5, r5, 0		; Enable (+Inv)
+#else
+	bset	r5, r5, 0		; Disable (+Inv)
+#endif
+	sr	r5, [ARC_REG_DC_CTRL]
+
+1:
+.endm
 
 	.cpu A7
 
@@ -27,7 +59,7 @@
 	; Don't clobber r0-r2 yet. It might have bootloader provided info
 	;-------------------------------------------------------------------
 
-	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+	CPU_EARLY_SETUP
 
 #ifdef CONFIG_SMP
 	; Ensure Boot (Master) proceeds. Others wait in platform dependent way
@@ -90,7 +122,7 @@
 
 first_lines_of_secondary:
 
-	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+	CPU_EARLY_SETUP
 
 	; setup per-cpu idle task as "current" on this CPU
 	ld	r0, [@secondary_idle_tsk]

diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index a4b141e..7d653c0 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c

@@ -150,24 +150,6 @@
 	set_irq_regs(old_regs);
 }
 
-int get_hw_config_num_irq(void)
-{
-	uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
-
-	switch (val & 0x03) {
-	case 0:
-		return 16;
-	case 1:
-		return 32;
-	case 2:
-		return 8;
-	default:
-		return 0;
-	}
-
-	return 0;
-}
-
 /*
  * arch_local_irq_enable - Enable interrupts.
  *

diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 07a3a96..fdd8971 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c

@@ -151,6 +151,29 @@
 }
 
 /*
+ * Do necessary setup to start up a new user task
+ */
+void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
+{
+	set_fs(USER_DS); /* user space */
+
+	regs->sp = usp;
+	regs->ret = pc;
+
+	/*
+	 * [U]ser Mode bit set
+	 * [L] ZOL loop inhibited to begin with - cleared by a LP insn
+	 * Interrupts enabled
+	 */
+	regs->status32 = STATUS_U_MASK | STATUS_L_MASK |
+			 STATUS_E1_MASK | STATUS_E2_MASK;
+
+	/* bogus seed values for debugging */
+	regs->lp_start = 0x10;
+	regs->lp_end = 0x80;
+}
+
+/*
  * Some archs flush debug and FPU info here
  */
 void flush_thread(void)

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 40859e5..cf90b6f 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c

@@ -138,7 +138,7 @@
 	if (machine_desc->init_smp)
 		machine_desc->init_smp(smp_processor_id());
 
-	arc_local_timer_setup(cpu);
+	arc_local_timer_setup();
 
 	local_irq_enable();
 	preempt_disable();

diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 71c4252..36c2aa9 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c

@@ -219,12 +219,13 @@
 /*
  * Setup the local event timer for @cpu
  */
-void arc_local_timer_setup(unsigned int cpu)
+void arc_local_timer_setup()
 {
-	struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
+	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
+	int cpu = smp_processor_id();
 
-	clk->cpumask = cpumask_of(cpu);
-	clockevents_config_and_register(clk, arc_get_core_freq(),
+	evt->cpumask = cpumask_of(cpu);
+	clockevents_config_and_register(evt, arc_get_core_freq(),
 					0, ARC_TIMER_MAX);
 
 	/*
@@ -261,7 +262,7 @@
 		clocksource_register_hz(&arc_counter, arc_get_core_freq());
 
 	/* sets up the periodic event timer */
-	arc_local_timer_setup(smp_processor_id());
+	arc_local_timer_setup();
 
 	if (machine_desc->init_time)
 		machine_desc->init_time();

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 89edf79..1f676c4 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c

@@ -73,33 +73,6 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
-/* Instruction cache related Auxiliary registers */
-#define ARC_REG_IC_BCR		0x77	/* Build Config reg */
-#define ARC_REG_IC_IVIC		0x10
-#define ARC_REG_IC_CTRL		0x11
-#define ARC_REG_IC_IVIL		0x19
-#if (CONFIG_ARC_MMU_VER > 2)
-#define ARC_REG_IC_PTAG		0x1E
-#endif
-
-/* Bit val in IC_CTRL */
-#define IC_CTRL_CACHE_DISABLE   0x1
-
-/* Data cache related Auxiliary registers */
-#define ARC_REG_DC_BCR		0x72	/* Build Config reg */
-#define ARC_REG_DC_IVDC		0x47
-#define ARC_REG_DC_CTRL		0x48
-#define ARC_REG_DC_IVDL		0x4A
-#define ARC_REG_DC_FLSH		0x4B
-#define ARC_REG_DC_FLDL		0x4C
-#if (CONFIG_ARC_MMU_VER > 2)
-#define ARC_REG_DC_PTAG		0x5C
-#endif
-
-/* Bit val in DC_CTRL */
-#define DC_CTRL_INV_MODE_FLUSH  0x40
-#define DC_CTRL_FLUSH_STATUS    0x100
-
 char *arc_cache_mumbojumbo(int c, char *buf, int len)
 {
 	int n = 0;
@@ -168,72 +141,43 @@
  */
 void arc_cache_init(void)
 {
-	unsigned int cpu = smp_processor_id();
-	struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
-	struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-	unsigned int dcache_does_alias, temp;
+	unsigned int __maybe_unused cpu = smp_processor_id();
+	struct cpuinfo_arc_cache __maybe_unused *ic, __maybe_unused *dc;
 	char str[256];
 
 	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 
-	if (!ic->ver)
-		goto chk_dc;
-
 #ifdef CONFIG_ARC_HAS_ICACHE
-	/* 1. Confirm some of I-cache params which Linux assumes */
-	if (ic->line_len != L1_CACHE_BYTES)
-		panic("Cache H/W doesn't match kernel Config");
+	ic = &cpuinfo_arc700[cpu].icache;
+	if (ic->ver) {
+		if (ic->line_len != L1_CACHE_BYTES)
+			panic("ICache line [%d] != kernel Config [%d]",
+			      ic->line_len, L1_CACHE_BYTES);
 
-	if (ic->ver != CONFIG_ARC_MMU_VER)
-		panic("Cache ver doesn't match MMU ver\n");
+		if (ic->ver != CONFIG_ARC_MMU_VER)
+			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+			      ic->ver, CONFIG_ARC_MMU_VER);
+	}
 #endif
 
-	/* Enable/disable I-Cache */
-	temp = read_aux_reg(ARC_REG_IC_CTRL);
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-	temp &= ~IC_CTRL_CACHE_DISABLE;
-#else
-	temp |= IC_CTRL_CACHE_DISABLE;
-#endif
-
-	write_aux_reg(ARC_REG_IC_CTRL, temp);
-
-chk_dc:
-	if (!dc->ver)
-		return;
-
 #ifdef CONFIG_ARC_HAS_DCACHE
-	if (dc->line_len != L1_CACHE_BYTES)
-		panic("Cache H/W doesn't match kernel Config");
+	dc = &cpuinfo_arc700[cpu].dcache;
+	if (dc->ver) {
+		unsigned int dcache_does_alias;
 
-	/* check for D-Cache aliasing */
-	dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE;
+		if (dc->line_len != L1_CACHE_BYTES)
+			panic("DCache line [%d] != kernel Config [%d]",
+			      dc->line_len, L1_CACHE_BYTES);
 
-	if (dcache_does_alias && !cache_is_vipt_aliasing())
-		panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-	else if (!dcache_does_alias && cache_is_vipt_aliasing())
-		panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+		/* check for D-Cache aliasing */
+		dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE;
+
+		if (dcache_does_alias && !cache_is_vipt_aliasing())
+			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+		else if (!dcache_does_alias && cache_is_vipt_aliasing())
+			panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+	}
 #endif
-
-	/* Set the default Invalidate Mode to "simpy discard dirty lines"
-	 *  as this is more frequent then flush before invalidate
-	 * Ofcourse we toggle this default behviour when desired
-	 */
-	temp = read_aux_reg(ARC_REG_DC_CTRL);
-	temp &= ~DC_CTRL_INV_MODE_FLUSH;
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-	/* Enable D-Cache: Clear Bit 0 */
-	write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
-#else
-	/* Flush D cache */
-	write_aux_reg(ARC_REG_DC_FLSH, 0x1);
-	/* Disable D cache */
-	write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
-#endif
-
-	return;
 }
 
 #define OP_INV		0x1
@@ -253,12 +197,16 @@
 
 	if (cacheop == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
+#if (CONFIG_ARC_MMU_VER > 2)
 		aux_tag = ARC_REG_IC_PTAG;
+#endif
 	}
 	else {
 		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
 		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+#if (CONFIG_ARC_MMU_VER > 2)
 		aux_tag = ARC_REG_DC_PTAG;
+#endif
 	}
 
 	/* Ensure we properly floor/ceil the non-line aligned/sized requests

diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig
index 33058aa..e27bb5c 100644
--- a/arch/arc/plat-arcfpga/Kconfig
+++ b/arch/arc/plat-arcfpga/Kconfig

@@ -48,36 +48,4 @@
 	help
 	  Baud rate for the ARC UART
 
-menuconfig ARC_HAS_BVCI_LAT_UNIT
-	bool "BVCI Bus Latency Unit"
-	depends on ARC_BOARD_ML509 || ARC_BOARD_ANGEL4
-	help
-	  IP to add artificial latency to BVCI Bus Based FPGA builds.
-	  The default latency (even worst case) for FPGA is non-realistic
-	  (~10 SDRAM, ~5 SSRAM).
-
-config BVCI_LAT_UNITS
-	hex "Latency Unit(s) Bitmap"
-	default "0x0"
-	depends on ARC_HAS_BVCI_LAT_UNIT
-	help
-	  There are multiple Latency Units corresponding to the many
-	  interfaces of the system bus arbiter (both CPU side as well as
-	  the peripheral side).
-	  To add latency to ALL memory transaction, choose Unit 0, otherwise
-	  for finer grainer - interface wise latency, specify a bitmap (1 bit
-	  per unit) of all units. e.g. 1,2,12 will be 0x1003
-
-	  Unit  0 - System Arb and Mem Controller
-	  Unit  1 - I$ and System Bus
-	  Unit  2 - D$ and System Bus
-	  ..
-	  Unit 12 - IDE Disk controller and System Bus
-
-config BVCI_LAT_CYCLES
-	int "Latency Value in cycles"
-	range 0 63
-	default "30"
-	depends on ARC_HAS_BVCI_LAT_UNIT
-
 endif

diff --git a/arch/arc/plat-arcfpga/Makefile b/arch/arc/plat-arcfpga/Makefile
index a44e22e..4d1bddc 100644
--- a/arch/arc/plat-arcfpga/Makefile
+++ b/arch/arc/plat-arcfpga/Makefile

@@ -9,4 +9,4 @@
 KBUILD_CFLAGS	+= -Iarch/arc/plat-arcfpga/include
 
 obj-y := platform.o irq.o
-obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_ISS_SMP_EXTN)		+= smp.o

diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
index 19b76b6..61c7e59 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-arcfpga/platform.c

@@ -22,59 +22,6 @@
 #include <plat/smp.h>
 #include <plat/irq.h>
 
-/*-----------------------BVCI Latency Unit -----------------------------*/
-
-#ifdef CONFIG_ARC_HAS_BVCI_LAT_UNIT
-
-int lat_cycles = CONFIG_BVCI_LAT_CYCLES;
-
-/* BVCI Bus Profiler: Latency Unit */
-static void __init setup_bvci_lat_unit(void)
-{
-#define MAX_BVCI_UNITS 12
-
-	unsigned int i;
-	unsigned int *base = (unsigned int *)BVCI_LAT_UNIT_BASE;
-	const unsigned long units_req = CONFIG_BVCI_LAT_UNITS;
-	const unsigned int REG_UNIT = 21;
-	const unsigned int REG_VAL = 22;
-
-	/*
-	 * There are multiple Latency Units corresponding to the many
-	 * interfaces of the system bus arbiter (both CPU side as well as
-	 * the peripheral side).
-	 *
-	 * Unit  0 - System Arb and Mem Controller - adds latency to all
-	 *	    memory trasactions
-	 * Unit  1 - I$ and System Bus
-	 * Unit  2 - D$ and System Bus
-	 * ..
-	 * Unit 12 - IDE Disk controller and System Bus
-	 *
-	 * The programmers model requires writing to lat_unit reg first
-	 * and then the latency value (cycles) to lat_value reg
-	 */
-
-	if (CONFIG_BVCI_LAT_UNITS == 0) {
-		writel(0, base + REG_UNIT);
-		writel(lat_cycles, base + REG_VAL);
-		pr_info("BVCI Latency for all Memory Transactions %d cycles\n",
-			lat_cycles);
-	} else {
-		for_each_set_bit(i, &units_req, MAX_BVCI_UNITS) {
-			writel(i + 1, base + REG_UNIT); /* loop is 0 based */
-			writel(lat_cycles, base + REG_VAL);
-			pr_info("BVCI Latency for Unit[%d] = %d cycles\n",
-				(i + 1), lat_cycles);
-		}
-	}
-}
-#else
-static void __init setup_bvci_lat_unit(void)
-{
-}
-#endif
-
 /*----------------------- Platform Devices -----------------------------*/
 
 #if IS_ENABLED(CONFIG_SERIAL_ARC)
@@ -132,16 +79,11 @@
 				   ARRAY_SIZE(fpga_early_devs));
 
 	/*
-	 * ARC console driver registers itself as an early platform driver
-	 * of class "earlyprintk".
-	 * Install it here, followed by probe of devices.
-	 * The installation here doesn't require earlyprintk in command line
-	 * To do so however, replace the lines below with
-	 *	parse_early_param();
-	 *	early_platform_driver_probe("earlyprintk", 1, 1);
-	 *						      ^^
+	 * ARC console driver registers (build time) as an early platform driver
+	 * of class "earlyprintk". However it needs explicit cmdline toggle
+	 * "earlyprintk=ttyARC0" to be successfuly runtime registered.
+	 * Otherwise the early probe below fails to find the driver
 	 */
-	early_platform_driver_register_all("earlyprintk");
 	early_platform_driver_probe("earlyprintk", 1, 0);
 
 	/*
@@ -165,11 +107,9 @@
 {
 	pr_info("[plat-arcfpga]: registering early dev resources\n");
 
-	setup_bvci_lat_unit();
-
 	arc_fpga_serial_init();
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_ISS_SMP_EXTN
 	iss_model_init_early_smp();
 #endif
 }
@@ -211,7 +151,7 @@
 	.init_early	= plat_fpga_early_init,
 	.init_machine	= plat_fpga_populate_dev,
 	.init_irq	= plat_fpga_init_IRQ,
-#ifdef CONFIG_SMP
+#ifdef CONFIG_ISS_SMP_EXTN
 	.init_smp	= iss_model_init_smp,
 #endif
 MACHINE_END

diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c
index 8a12741..92bad91 100644
--- a/arch/arc/plat-arcfpga/smp.c
+++ b/arch/arc/plat-arcfpga/smp.c

@@ -42,6 +42,24 @@
 
 }
 
+static inline int get_hw_config_num_irq(void)
+{
+	uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
+
+	switch (val & 0x03) {
+	case 0:
+		return 16;
+	case 1:
+		return 32;
+	case 2:
+		return 8;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 /*
  * Any SMP specific init any CPU does when it comes up.
  * Here we setup the CPU to enable Inter-Processor-Interrupts

diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 9f53e82..4a4e02d 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi

@@ -662,6 +662,8 @@
 		mac: ethernet@4a100000 {
 			compatible = "ti,cpsw";
 			ti,hwmods = "cpgmac0";
+			clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>;
+			clock-names = "fck", "cpts";
 			cpdma_channels = <8>;
 			ale_entries = <1024>;
 			bd_ram_size = <0x2000>;

diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 794c73e..49fa596 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi

@@ -490,6 +490,8 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			ti,hwmods = "cpgmac0";
+			clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>;
+			clock-names = "fck", "cpts";
 			status = "disabled";
 			cpdma_channels = <8>;
 			ale_entries = <1024>;
@@ -857,6 +859,35 @@
 			ti,hwmods = "hdq1w";
 			status = "disabled";
 		};
+
+		dss: dss@4832a000 {
+			compatible = "ti,omap3-dss";
+			reg = <0x4832a000 0x200>;
+			status = "disabled";
+			ti,hwmods = "dss_core";
+			clocks = <&disp_clk>;
+			clock-names = "fck";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+
+			dispc@4832a400 {
+				compatible = "ti,omap3-dispc";
+				reg = <0x4832a400 0x400>;
+				interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+				ti,hwmods = "dss_dispc";
+				clocks = <&disp_clk>;
+				clock-names = "fck";
+			};
+
+			rfbi: rfbi@4832a800 {
+				compatible = "ti,omap3-rfbi";
+				reg = <0x4832a800 0x100>;
+				ti,hwmods = "dss_rfbi";
+				clocks = <&disp_clk>;
+				clock-names = "fck";
+			};
+		};
 	};
 };
 

diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index c25d158..003766c 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts

@@ -19,6 +19,10 @@
 	model = "TI AM437x GP EVM";
 	compatible = "ti,am437x-gp-evm","ti,am4372","ti,am43";
 
+	aliases {
+		display0 = &lcd0;
+	};
+
 	vmmcsd_fixed: fixedregulator-sd {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmcsd_fixed";
@@ -64,6 +68,44 @@
 				0x02000069      /* LEFT */
 				0x0201006c>;      /* DOWN */
 		};
+
+	lcd0: display {
+		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		label = "lcd";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&lcd_pins>;
+
+		/*
+		 * SelLCDorHDMI, LOW to select HDMI. This is not really the
+		 * panel's enable GPIO, but we don't have HDMI driver support nor
+		 * support to switch between two displays, so using this gpio as
+		 * panel's enable should be safe.
+		 */
+		enable-gpios = <&gpio5 8 GPIO_ACTIVE_HIGH>;
+
+		panel-timing {
+			clock-frequency = <33000000>;
+			hactive = <800>;
+			vactive = <480>;
+			hfront-porch = <210>;
+			hback-porch = <16>;
+			hsync-len = <30>;
+			vback-porch = <10>;
+			vfront-porch = <22>;
+			vsync-len = <13>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+			de-active = <1>;
+			pixelclk-active = <1>;
+		};
+
+		port {
+			lcd_in: endpoint {
+				remote-endpoint = <&dpi_out>;
+			};
+		};
+	};
 };
 
 &am43xx_pinmux {
@@ -171,6 +213,47 @@
 			0x9c (PIN_OUTPUT | MUX_MODE0)		/* gpmc_be0n_cle.gpmc_be0n_cle */
 		>;
 	};
+
+	dss_pins: dss_pins {
+		pinctrl-single,pins = <
+			0x020 (PIN_OUTPUT_PULLUP | MUX_MODE1) /*gpmc ad 8 -> DSS DATA 23 */
+			0x024 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x028 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x02c (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x030 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x034 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x038 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+			0x03c (PIN_OUTPUT_PULLUP | MUX_MODE1) /*gpmc ad 15 -> DSS DATA 16 */
+			0x0a0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS DATA 0 */
+			0x0a4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0a8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0ac (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0b0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0b4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0b8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0bc (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0c0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0c4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0c8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0cc (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0d0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0d4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0d8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			0x0dc (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS DATA 15 */
+			0x0e0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS VSYNC */
+			0x0e4 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS HSYNC */
+			0x0e8 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS PCLK */
+			0x0ec (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS AC BIAS EN */
+
+		>;
+	};
+
+	lcd_pins: lcd_pins {
+		pinctrl-single,pins = <
+			/* GPIO 5_8 to select LCD / HDMI */
+			0x238 (PIN_OUTPUT_PULLUP | MUX_MODE7)
+		>;
+	};
 };
 
 &i2c0 {
@@ -359,3 +442,17 @@
 		};
 	};
 };
+
+&dss {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_pins>;
+
+	port {
+		dpi_out: endpoint@0 {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <24>;
+		};
+	};
+};

diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index ad362c5..19f1f7e 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts

@@ -19,6 +19,10 @@
 	model = "TI AM43x EPOS EVM";
 	compatible = "ti,am43x-epos-evm","ti,am4372","ti,am43";
 
+	aliases {
+		display0 = &lcd0;
+	};
+
 	vmmcsd_fixed: fixedregulator-sd {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmcsd_fixed";
@@ -27,6 +31,44 @@
 		enable-active-high;
 	};
 
+	lcd0: display {
+		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		label = "lcd";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&lcd_pins>;
+
+		/*
+		 * SelLCDorHDMI, LOW to select HDMI. This is not really the
+		 * panel's enable GPIO, but we don't have HDMI driver support nor
+		 * support to switch between two displays, so using this gpio as
+		 * panel's enable should be safe.
+		 */
+		enable-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
+
+		panel-timing {
+			clock-frequency = <33000000>;
+			hactive = <800>;
+			vactive = <480>;
+			hfront-porch = <210>;
+			hback-porch = <16>;
+			hsync-len = <30>;
+			vback-porch = <10>;
+			vfront-porch = <22>;
+			vsync-len = <13>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+			de-active = <1>;
+			pixelclk-active = <1>;
+		};
+
+		port {
+			lcd_in: endpoint {
+				remote-endpoint = <&dpi_out>;
+			};
+		};
+	};
+
 	am43xx_pinmux: pinmux@44e10800 {
 		cpsw_default: cpsw_default {
 			pinctrl-single,pins = <
@@ -161,6 +203,46 @@
 				0x234 (PIN_INPUT_PULLUP | MUX_MODE1)    /* cam1_wen.hdq_gpio */
 			>;
 		};
+
+		dss_pins: dss_pins {
+			pinctrl-single,pins = <
+				0x020 (PIN_OUTPUT_PULLUP | MUX_MODE1) /*gpmc ad 8 -> DSS DATA 23 */
+				0x024 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x028 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x02C (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x030 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x034 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x038 (PIN_OUTPUT_PULLUP | MUX_MODE1)
+				0x03C (PIN_OUTPUT_PULLUP | MUX_MODE1) /*gpmc ad 15 -> DSS DATA 16 */
+				0x0A0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS DATA 0 */
+				0x0A4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0A8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0AC (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0B0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0B4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0B8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0BC (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0C0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0C4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0C8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0CC (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0D0 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0D4 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0D8 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+				0x0DC (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS DATA 15 */
+				0x0E0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS VSYNC */
+				0x0E4 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS HSYNC */
+				0x0E8 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS PCLK */
+				0x0EC (PIN_OUTPUT_PULLUP | MUX_MODE0) /* DSS AC BIAS EN */
+			>;
+		};
+
+		lcd_pins: lcd_pins {
+			pinctrl-single,pins = <
+				/* GPMC CLK -> GPIO 2_1 to select LCD / HDMI */
+				0x08C (PIN_OUTPUT_PULLUP | MUX_MODE7)
+			>;
+		};
 	};
 
 	matrix_keypad: matrix_keypad@0 {
@@ -468,3 +550,17 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&hdq_pins>;
 };
+
+&dss {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_pins>;
+
+	port {
+		dpi_out: endpoint@0 {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <24>;
+		};
+	};
+};

diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts
index 25674fe..7e291e2 100644
--- a/arch/arm/boot/dts/armada-xp-matrix.dts
+++ b/arch/arm/boot/dts/armada-xp-matrix.dts

@@ -57,6 +57,10 @@
 			ethernet@30000 {
 				status = "okay";
 				phy-mode = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
 			};
 
 			pcie-controller {

diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index d9f99e7..c1414cb 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi

@@ -66,9 +66,21 @@
 		ranges = <0x0 0x0 0x0 0xc0000000>;
 		dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
 
+		pllctrl: pll-controller@02310000 {
+			compatible = "ti,keystone-pllctrl", "syscon";
+			reg = <0x02310000 0x200>;
+		};
+
+		devctrl: device-state-control@02620000 {
+			compatible = "ti,keystone-devctrl", "syscon";
+			reg = <0x02620000 0x1000>;
+		};
+
 		rstctrl: reset-controller {
 			compatible = "ti,keystone-reset";
-			reg = <0x023100e8 4>;	/* pll reset control reg */
+			ti,syscon-pll = <&pllctrl 0xe4>;
+			ti,syscon-dev = <&devctrl 0x328>;
+			ti,wdt-list = <0>;
 		};
 
 		/include/ "keystone-clocks.dtsi"

diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts
index 9cba94b..a8bd434 100644
--- a/arch/arm/boot/dts/omap3-evm-37xx.dts
+++ b/arch/arm/boot/dts/omap3-evm-37xx.dts

@@ -26,7 +26,44 @@
 	};
 };
 
+&dss {
+	pinctrl-names = "default";
+	pinctrl-0 = <
+		&dss_dpi_pins1
+		&dss_dpi_pins2
+	>;
+};
+
 &omap3_pmx_core {
+	dss_dpi_pins1: pinmux_dss_dpi_pins2 {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x20d4, PIN_OUTPUT | MUX_MODE0)   /* dss_pclk.dss_pclk */
+			OMAP3_CORE1_IOPAD(0x20d6, PIN_OUTPUT | MUX_MODE0)   /* dss_hsync.dss_hsync */
+			OMAP3_CORE1_IOPAD(0x20d8, PIN_OUTPUT | MUX_MODE0)   /* dss_vsync.dss_vsync */
+			OMAP3_CORE1_IOPAD(0x20da, PIN_OUTPUT | MUX_MODE0)   /* dss_acbias.dss_acbias */
+
+			OMAP3_CORE1_IOPAD(0x20e8, PIN_OUTPUT | MUX_MODE0)   /* dss_data6.dss_data6 */
+			OMAP3_CORE1_IOPAD(0x20ea, PIN_OUTPUT | MUX_MODE0)   /* dss_data7.dss_data7 */
+			OMAP3_CORE1_IOPAD(0x20ec, PIN_OUTPUT | MUX_MODE0)   /* dss_data8.dss_data8 */
+			OMAP3_CORE1_IOPAD(0x20ee, PIN_OUTPUT | MUX_MODE0)   /* dss_data9.dss_data9 */
+			OMAP3_CORE1_IOPAD(0x20f0, PIN_OUTPUT | MUX_MODE0)   /* dss_data10.dss_data10 */
+			OMAP3_CORE1_IOPAD(0x20f2, PIN_OUTPUT | MUX_MODE0)   /* dss_data11.dss_data11 */
+			OMAP3_CORE1_IOPAD(0x20f4, PIN_OUTPUT | MUX_MODE0)   /* dss_data12.dss_data12 */
+			OMAP3_CORE1_IOPAD(0x20f6, PIN_OUTPUT | MUX_MODE0)   /* dss_data13.dss_data13 */
+			OMAP3_CORE1_IOPAD(0x20f8, PIN_OUTPUT | MUX_MODE0)   /* dss_data14.dss_data14 */
+			OMAP3_CORE1_IOPAD(0x20fa, PIN_OUTPUT | MUX_MODE0)   /* dss_data15.dss_data15 */
+			OMAP3_CORE1_IOPAD(0x20fc, PIN_OUTPUT | MUX_MODE0)   /* dss_data16.dss_data16 */
+			OMAP3_CORE1_IOPAD(0x20fe, PIN_OUTPUT | MUX_MODE0)   /* dss_data17.dss_data17 */
+
+			OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE3)   /* dss_data18.dss_data0 */
+			OMAP3_CORE1_IOPAD(0x2102, PIN_OUTPUT | MUX_MODE3)   /* dss_data19.dss_data1 */
+			OMAP3_CORE1_IOPAD(0x2104, PIN_OUTPUT | MUX_MODE3)   /* dss_data20.dss_data2 */
+			OMAP3_CORE1_IOPAD(0x2106, PIN_OUTPUT | MUX_MODE3)   /* dss_data21.dss_data3 */
+			OMAP3_CORE1_IOPAD(0x2108, PIN_OUTPUT | MUX_MODE3)   /* dss_data22.dss_data4 */
+			OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE3)   /* dss_data23.dss_data5 */
+		>;
+	};
+
 	mmc1_pins: pinmux_mmc1_pins {
 		pinctrl-single,pins = <
 			0x114 (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* sdmmc1_clk.sdmmc1_clk */
@@ -75,6 +112,19 @@
 	};
 };
 
+&omap3_pmx_wkup {
+	dss_dpi_pins2: pinmux_dss_dpi_pins1 {
+		pinctrl-single,pins = <
+			0x0a (PIN_OUTPUT | MUX_MODE3)   /* sys_boot0.dss_data18 */
+			0x0c (PIN_OUTPUT | MUX_MODE3)   /* sys_boot1.dss_data19 */
+			0x10 (PIN_OUTPUT | MUX_MODE3)   /* sys_boot3.dss_data20 */
+			0x12 (PIN_OUTPUT | MUX_MODE3)   /* sys_boot4.dss_data21 */
+			0x14 (PIN_OUTPUT | MUX_MODE3)   /* sys_boot5.dss_data22 */
+			0x16 (PIN_OUTPUT | MUX_MODE3)   /* sys_boot6.dss_data23 */
+		>;
+	};
+};
+
 &mmc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins>;

diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi
index 3007e79..8ae8f00 100644
--- a/arch/arm/boot/dts/omap3-evm-common.dtsi
+++ b/arch/arm/boot/dts/omap3-evm-common.dtsi

@@ -44,6 +44,11 @@
 
 #include "twl4030.dtsi"
 #include "twl4030_omap3.dtsi"
+#include "omap3-panel-sharp-ls037v7dw01.dtsi"
+
+&backlight0 {
+	gpios = <&twl_gpio 18 GPIO_ACTIVE_LOW>;
+};
 
 &i2c2 {
 	clock-frequency = <400000>;
@@ -61,6 +66,27 @@
 	};
 };
 
+&lcd_3v3 {
+	gpio = <&gpio5 25 GPIO_ACTIVE_LOW>;	/* gpio153 */
+	enable-active-low;
+};
+
+&lcd0 {
+	enable-gpios = <&gpio5 24 GPIO_ACTIVE_HIGH>;	/* gpio152, lcd INI */
+	reset-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>;	/* gpio155, lcd RESB */
+	mode-gpios = <&gpio5 26 GPIO_ACTIVE_HIGH	/* gpio154, lcd MO */
+		      &gpio1 2 GPIO_ACTIVE_HIGH		/* gpio2, lcd LR */
+		      &gpio1 3 GPIO_ACTIVE_HIGH>;	/* gpio3, lcd UD */
+};
+
+&mcspi1 {
+	tsc2046@0 {
+		interrupt-parent = <&gpio6>;
+		interrupts = <15 0>;		/* gpio175 */
+		pendown-gpio = <&gpio6 15 0>;
+	};
+};
+
 &mmc1 {
 	vmmc-supply = <&vmmc1>;
 	vmmc_aux-supply = <&vsim>;

diff --git a/arch/arm/boot/dts/omap3-gta04.dts b/arch/arm/boot/dts/omap3-gta04.dts
index f8ad125..021311f 100644
--- a/arch/arm/boot/dts/omap3-gta04.dts
+++ b/arch/arm/boot/dts/omap3-gta04.dts

@@ -44,6 +44,36 @@
 		ti,mcbsp = <&mcbsp2>;
 		ti,codec = <&twl_audio>;
 	};
+
+	spi_lcd {
+		compatible = "spi-gpio";
+		#address-cells = <0x1>;
+		#size-cells = <0x0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&spi_gpio_pins>;
+
+		gpio-sck = <&gpio1 12 0>;
+		gpio-miso = <&gpio1 18 0>;
+		gpio-mosi = <&gpio1 20 0>;
+		cs-gpios = <&gpio1 19 0>;
+		num-chipselects = <1>;
+
+		/* lcd panel */
+		lcd: td028ttec1@0 {
+			compatible = "toppoly,td028ttec1";
+			reg = <0>;
+			spi-max-frequency = <100000>;
+			spi-cpol;
+			spi-cpha;
+
+			label = "lcd";
+			port {
+				lcd_in: endpoint {
+					remote-endpoint = <&dpi_out>;
+				};
+			};
+		};
+	};
 };
 
 &omap3_pmx_core {
@@ -78,6 +108,47 @@
 			0x11e (PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc1_dat3.sdmmc1_dat3 */
 		>;
 	};
+
+	dss_dpi_pins: pinmux_dss_dpi_pins {
+               pinctrl-single,pins = <
+                       0x0a4 (PIN_OUTPUT | MUX_MODE0)   /* dss_pclk.dss_pclk */
+                       0x0a6 (PIN_OUTPUT | MUX_MODE0)   /* dss_hsync.dss_hsync */
+                       0x0a8 (PIN_OUTPUT | MUX_MODE0)   /* dss_vsync.dss_vsync */
+                       0x0aa (PIN_OUTPUT | MUX_MODE0)   /* dss_acbias.dss_acbias */
+                       0x0ac (PIN_OUTPUT | MUX_MODE0)   /* dss_data0.dss_data0 */
+                       0x0ae (PIN_OUTPUT | MUX_MODE0)   /* dss_data1.dss_data1 */
+                       0x0b0 (PIN_OUTPUT | MUX_MODE0)   /* dss_data2.dss_data2 */
+                       0x0b2 (PIN_OUTPUT | MUX_MODE0)   /* dss_data3.dss_data3 */
+                       0x0b4 (PIN_OUTPUT | MUX_MODE0)   /* dss_data4.dss_data4 */
+                       0x0b6 (PIN_OUTPUT | MUX_MODE0)   /* dss_data5.dss_data5 */
+                       0x0b8 (PIN_OUTPUT | MUX_MODE0)   /* dss_data6.dss_data6 */
+                       0x0ba (PIN_OUTPUT | MUX_MODE0)   /* dss_data7.dss_data7 */
+                       0x0bc (PIN_OUTPUT | MUX_MODE0)   /* dss_data8.dss_data8 */
+                       0x0be (PIN_OUTPUT | MUX_MODE0)   /* dss_data9.dss_data9 */
+                       0x0c0 (PIN_OUTPUT | MUX_MODE0)   /* dss_data10.dss_data10 */
+                       0x0c2 (PIN_OUTPUT | MUX_MODE0)   /* dss_data11.dss_data11 */
+                       0x0c4 (PIN_OUTPUT | MUX_MODE0)   /* dss_data12.dss_data12 */
+                       0x0c6 (PIN_OUTPUT | MUX_MODE0)   /* dss_data13.dss_data13 */
+                       0x0c8 (PIN_OUTPUT | MUX_MODE0)   /* dss_data14.dss_data14 */
+                       0x0ca (PIN_OUTPUT | MUX_MODE0)   /* dss_data15.dss_data15 */
+                       0x0cc (PIN_OUTPUT | MUX_MODE0)   /* dss_data16.dss_data16 */
+                       0x0ce (PIN_OUTPUT | MUX_MODE0)   /* dss_data17.dss_data17 */
+                       0x0d0 (PIN_OUTPUT | MUX_MODE0)   /* dss_data18.dss_data18 */
+                       0x0d2 (PIN_OUTPUT | MUX_MODE0)   /* dss_data19.dss_data19 */
+                       0x0d4 (PIN_OUTPUT | MUX_MODE0)   /* dss_data20.dss_data20 */
+                       0x0d6 (PIN_OUTPUT | MUX_MODE0)   /* dss_data21.dss_data21 */
+                       0x0d8 (PIN_OUTPUT | MUX_MODE0)   /* dss_data22.dss_data22 */
+                       0x0da (PIN_OUTPUT | MUX_MODE0)   /* dss_data23.dss_data23 */
+               >;
+       };
+
+	spi_gpio_pins: spi_gpio_pinmux {
+		pinctrl-single,pins = <0x5a8 (PIN_OUTPUT | MUX_MODE4) /* clk */
+			0x5b6 (PIN_OUTPUT | MUX_MODE4) /* cs */
+			0x5b8 (PIN_OUTPUT | MUX_MODE4) /* tx */
+			0x5b4 (PIN_INPUT | MUX_MODE4) /* rx */
+		>;
+	};
 };
 
 &i2c1 {
@@ -219,3 +290,22 @@
 	regulator-min-microvolt = <2800000>;
 	regulator-max-microvolt = <3150000>;
 };
+
+/* Needed to power the DPI pins */
+&vpll2 {
+	regulator-always-on;
+};
+
+&dss {
+	pinctrl-names = "default";
+	pinctrl-0 = < &dss_dpi_pins >;
+
+	status = "okay";
+
+	port {
+		dpi_out: endpoint {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <24>;
+		};
+	};
+};

diff --git a/arch/arm/boot/dts/omap3-ldp.dts b/arch/arm/boot/dts/omap3-ldp.dts
index 476ff15..af272c1 100644
--- a/arch/arm/boot/dts/omap3-ldp.dts
+++ b/arch/arm/boot/dts/omap3-ldp.dts

@@ -164,6 +164,11 @@
 
 #include "twl4030.dtsi"
 #include "twl4030_omap3.dtsi"
+#include "omap3-panel-sharp-ls037v7dw01.dtsi"
+
+&backlight0 {
+	gpios = <&twl_gpio 7 GPIO_ACTIVE_HIGH>;
+};
 
 &i2c2 {
 	clock-frequency = <400000>;
@@ -173,6 +178,25 @@
 	clock-frequency = <400000>;
 };
 
+/* tps61130rsa enabled by twl4030 regen */
+&lcd_3v3 {
+	regulator-always-on;
+};
+
+&lcd0 {
+	enable-gpios = <&twl_gpio 15 GPIO_ACTIVE_HIGH>;	/* lcd INI */
+	reset-gpios = <&gpio2 23 GPIO_ACTIVE_HIGH>;	/* gpio55, lcd RESB */
+	mode-gpios = <&gpio2 24 GPIO_ACTIVE_HIGH>;	/* gpio56, lcd MO */
+};
+
+&mcspi1 {
+	tsc2046@0 {
+		interrupt-parent = <&gpio2>;
+		interrupts = <22 0>;		/* gpio54 */
+		pendown-gpio = <&gpio2 22 0>;
+	};
+};
+
 &mmc1 {
 	/* See 35xx errata 2.1.1.128 in SPRZ278F */
 	compatible = "ti,omap3-pre-es3-hsmmc";
@@ -251,8 +275,3 @@
 	/* Needed for ads7846 */
         regulator-name = "vcc";
 };
-
-&vpll2 {
-       /* Needed for DSS */
-       regulator-name = "vdds_dsi";
-};

diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 059a8ff..ae8ae3f 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts

@@ -651,9 +651,24 @@
 	 * Also... order in the device tree actually matters here.
 	 */
 	tsc2005@0 {
-		compatible = "tsc2005";
+		compatible = "ti,tsc2005";
 		spi-max-frequency = <6000000>;
 		reg = <0>;
+
+		vio-supply = <&vio>;
+
+		reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+		interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+		touchscreen-fuzz-x = <4>;
+		touchscreen-fuzz-y = <7>;
+		touchscreen-fuzz-pressure = <2>;
+		touchscreen-max-x = <4096>;
+		touchscreen-max-y = <4096>;
+		touchscreen-max-pressure = <2048>;
+
+		ti,x-plate-ohms = <280>;
+		ti,esd-recovery-timeout-ms = <8000>;
 	};
 
 	acx565akm@2 {

diff --git a/arch/arm/boot/dts/omap3-overo-alto35-common.dtsi b/arch/arm/boot/dts/omap3-overo-alto35-common.dtsi
index 19d6486..7aae8fb 100644
--- a/arch/arm/boot/dts/omap3-overo-alto35-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-alto35-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-lcd35.dtsi"
 
 #include <dt-bindings/input/input.h>
 

diff --git a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
index 19de6ff..17b82f8 100644
--- a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-lcd43.dtsi"
 
 #include <dt-bindings/input/input.h>
 

diff --git a/arch/arm/boot/dts/omap3-overo-common-dvi.dtsi b/arch/arm/boot/dts/omap3-overo-common-dvi.dtsi
new file mode 100644
index 0000000..802f704
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-overo-common-dvi.dtsi

@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2014 Florian Vaussard, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * DVI output for some Gumstix Overo boards (Tobi and Summit)
+ */
+
+&omap3_pmx_core {
+	dss_dpi_pins: pinmux_dss_dpi_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x20d4, PIN_OUTPUT | MUX_MODE0)	/* dss_pclk.dss_pclk */
+			OMAP3_CORE1_IOPAD(0x20d6, PIN_OUTPUT | MUX_MODE0)	/* dss_hsync.dss_hsync */
+			OMAP3_CORE1_IOPAD(0x20d8, PIN_OUTPUT | MUX_MODE0)	/* dss_vsync.dss_vsync */
+			OMAP3_CORE1_IOPAD(0x20da, PIN_OUTPUT | MUX_MODE0)	/* dss_acbias.dss_acbias */
+			OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE0)	/* dss_data0.dss_data0 */
+			OMAP3_CORE1_IOPAD(0x20de, PIN_OUTPUT | MUX_MODE0)	/* dss_data1.dss_data1 */
+			OMAP3_CORE1_IOPAD(0x20e0, PIN_OUTPUT | MUX_MODE0)	/* dss_data2.dss_data2 */
+			OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE0)	/* dss_data3.dss_data3 */
+			OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE0)	/* dss_data4.dss_data4 */
+			OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE0)	/* dss_data5.dss_data5 */
+			OMAP3_CORE1_IOPAD(0x20e8, PIN_OUTPUT | MUX_MODE0)	/* dss_data6.dss_data6 */
+			OMAP3_CORE1_IOPAD(0x20ea, PIN_OUTPUT | MUX_MODE0)	/* dss_data7.dss_data7 */
+			OMAP3_CORE1_IOPAD(0x20ec, PIN_OUTPUT | MUX_MODE0)	/* dss_data8.dss_data8 */
+			OMAP3_CORE1_IOPAD(0x20ee, PIN_OUTPUT | MUX_MODE0)	/* dss_data9.dss_data9 */
+			OMAP3_CORE1_IOPAD(0x20f0, PIN_OUTPUT | MUX_MODE0)	/* dss_data10.dss_data10 */
+			OMAP3_CORE1_IOPAD(0x20f2, PIN_OUTPUT | MUX_MODE0)	/* dss_data11.dss_data11 */
+			OMAP3_CORE1_IOPAD(0x20f4, PIN_OUTPUT | MUX_MODE0)	/* dss_data12.dss_data12 */
+			OMAP3_CORE1_IOPAD(0x20f6, PIN_OUTPUT | MUX_MODE0)	/* dss_data13.dss_data13 */
+			OMAP3_CORE1_IOPAD(0x20f8, PIN_OUTPUT | MUX_MODE0)	/* dss_data14.dss_data14 */
+			OMAP3_CORE1_IOPAD(0x20fa, PIN_OUTPUT | MUX_MODE0)	/* dss_data15.dss_data15 */
+			OMAP3_CORE1_IOPAD(0x20fc, PIN_OUTPUT | MUX_MODE0)	/* dss_data16.dss_data16 */
+			OMAP3_CORE1_IOPAD(0x20fe, PIN_OUTPUT | MUX_MODE0)	/* dss_data17.dss_data17 */
+			OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE0)	/* dss_data18.dss_data18 */
+			OMAP3_CORE1_IOPAD(0x2102, PIN_OUTPUT | MUX_MODE0)	/* dss_data19.dss_data19 */
+			OMAP3_CORE1_IOPAD(0x2104, PIN_OUTPUT | MUX_MODE0)	/* dss_data20.dss_data20 */
+			OMAP3_CORE1_IOPAD(0x2106, PIN_OUTPUT | MUX_MODE0)	/* dss_data21.dss_data21 */
+			OMAP3_CORE1_IOPAD(0x2108, PIN_OUTPUT | MUX_MODE0)	/* dss_data22.dss_data22 */
+			OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE0)	/* dss_data23.dss_data23 */
+		>;
+	};
+};
+
+/* Needed to power the DPI pins */
+&vpll2 {
+	regulator-always-on;
+};
+
+&dss {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_dpi_pins>;
+
+	port {
+		dpi_out: endpoint {
+			remote-endpoint = <&tfp410_in>;
+			data-lines = <24>;
+		};
+	};
+};
+
+/ {
+	aliases {
+		display0 = &dvi0;
+	};
+
+	tfp410: encoder@0 {
+		compatible = "ti,tfp410";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
+				tfp410_in: endpoint@0 {
+					remote-endpoint = <&dpi_out>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+
+				tfp410_out: endpoint@0 {
+					remote-endpoint = <&dvi_connector_in>;
+				};
+			};
+		};
+	};
+
+	dvi0: connector@0 {
+		compatible = "dvi-connector";
+		label = "dvi";
+
+		digital;
+		ddc-i2c-bus = <&i2c3>;
+
+		port {
+			dvi_connector_in: endpoint {
+				remote-endpoint = <&tfp410_out>;
+			};
+		};
+	};
+};
+

diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
new file mode 100644
index 0000000..233c69e
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi

@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2014 Florian Vaussard, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * 4.3'' LCD panel output for some Gumstix Overo boards (Gallop43, Chestnut43)
+ */
+
+&omap3_pmx_core {
+	dss_dpi_pins: pinmux_dss_dpi_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x20d4, PIN_OUTPUT | MUX_MODE0)	/* dss_pclk.dss_pclk */
+			OMAP3_CORE1_IOPAD(0x20d6, PIN_OUTPUT | MUX_MODE0)	/* dss_hsync.dss_hsync */
+			OMAP3_CORE1_IOPAD(0x20d8, PIN_OUTPUT | MUX_MODE0)	/* dss_vsync.dss_vsync */
+			OMAP3_CORE1_IOPAD(0x20da, PIN_OUTPUT | MUX_MODE0)	/* dss_acbias.dss_acbias */
+			OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE0)	/* dss_data0.dss_data0 */
+			OMAP3_CORE1_IOPAD(0x20de, PIN_OUTPUT | MUX_MODE0)	/* dss_data1.dss_data1 */
+			OMAP3_CORE1_IOPAD(0x20e0, PIN_OUTPUT | MUX_MODE0)	/* dss_data2.dss_data2 */
+			OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE0)	/* dss_data3.dss_data3 */
+			OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE0)	/* dss_data4.dss_data4 */
+			OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE0)	/* dss_data5.dss_data5 */
+			OMAP3_CORE1_IOPAD(0x20e8, PIN_OUTPUT | MUX_MODE0)	/* dss_data6.dss_data6 */
+			OMAP3_CORE1_IOPAD(0x20ea, PIN_OUTPUT | MUX_MODE0)	/* dss_data7.dss_data7 */
+			OMAP3_CORE1_IOPAD(0x20ec, PIN_OUTPUT | MUX_MODE0)	/* dss_data8.dss_data8 */
+			OMAP3_CORE1_IOPAD(0x20ee, PIN_OUTPUT | MUX_MODE0)	/* dss_data9.dss_data9 */
+			OMAP3_CORE1_IOPAD(0x20f0, PIN_OUTPUT | MUX_MODE0)	/* dss_data10.dss_data10 */
+			OMAP3_CORE1_IOPAD(0x20f2, PIN_OUTPUT | MUX_MODE0)	/* dss_data11.dss_data11 */
+			OMAP3_CORE1_IOPAD(0x20f4, PIN_OUTPUT | MUX_MODE0)	/* dss_data12.dss_data12 */
+			OMAP3_CORE1_IOPAD(0x20f6, PIN_OUTPUT | MUX_MODE0)	/* dss_data13.dss_data13 */
+			OMAP3_CORE1_IOPAD(0x20f8, PIN_OUTPUT | MUX_MODE0)	/* dss_data14.dss_data14 */
+			OMAP3_CORE1_IOPAD(0x20fa, PIN_OUTPUT | MUX_MODE0)	/* dss_data15.dss_data15 */
+			OMAP3_CORE1_IOPAD(0x20fc, PIN_OUTPUT | MUX_MODE0)	/* dss_data16.dss_data16 */
+			OMAP3_CORE1_IOPAD(0x20fe, PIN_OUTPUT | MUX_MODE0)	/* dss_data17.dss_data17 */
+			OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE0)	/* dss_data18.dss_data18 */
+			OMAP3_CORE1_IOPAD(0x2102, PIN_OUTPUT | MUX_MODE0)	/* dss_data19.dss_data19 */
+			OMAP3_CORE1_IOPAD(0x2104, PIN_OUTPUT | MUX_MODE0)	/* dss_data20.dss_data20 */
+			OMAP3_CORE1_IOPAD(0x2106, PIN_OUTPUT | MUX_MODE0)	/* dss_data21.dss_data21 */
+			OMAP3_CORE1_IOPAD(0x2108, PIN_OUTPUT | MUX_MODE0)	/* dss_data22.dss_data22 */
+			OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE0)	/* dss_data23.dss_data23 */
+		>;
+	};
+
+	lb035_pins: pinmux_lb035_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2174, PIN_OUTPUT | MUX_MODE4)	/* uart2_cts.gpio_144 */
+		>;
+	};
+
+	backlight_pins: pinmux_backlight_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE4)	/* uart2_rts.gpio_145 */
+		>;
+	};
+
+	mcspi1_pins: pinmux_mcspi1_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x21c8, PIN_INPUT | MUX_MODE0)	/* mcspi1_clk.mcspi1_clk */
+			OMAP3_CORE1_IOPAD(0x21ca, PIN_INPUT | MUX_MODE0)	/* mcspi1_simo.mcspi1_simo */
+			OMAP3_CORE1_IOPAD(0x21cc, PIN_INPUT | MUX_MODE0)	/* mcspi1_somi.mcspi1_somi */
+			OMAP3_CORE1_IOPAD(0x21ce, PIN_INPUT | MUX_MODE0)	/* mcspi1_cs0.mcspi1_cs0 */
+		>;
+	};
+
+	ads7846_pins: pinmux_ads7846_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2138, PIN_INPUT_PULLDOWN | MUX_MODE4)	/* csi2_dx1.gpio_114 */
+		>;
+	};
+};
+
+/* Needed to power the DPI pins */
+&vpll2 {
+	regulator-always-on;
+};
+
+&dss {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_dpi_pins>;
+
+	port {
+		dpi_out: endpoint {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <24>;
+		};
+	};
+};
+
+/ {
+	aliases {
+		display0 = &lcd0;
+	};
+
+	ads7846reg: ads7846-reg {
+		compatible = "regulator-fixed";
+		regulator-name = "ads7846-reg";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	backlight {
+		compatible = "gpio-backlight";
+		
+		pinctrl-names = "default";
+		pinctrl-0 = <&backlight_pins>;
+		gpios = <&gpio5 17 GPIO_ACTIVE_HIGH>;		/* gpio_145 */
+
+		default-on;
+	};
+};
+
+&mcspi1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcspi1_pins>;
+
+	lcd0: display@0 {
+		compatible = "lgphilips,lb035q02";
+		label = "lcd";
+
+		reg = <1>;					/* CS1 */
+		spi-max-frequency = <10000000>;
+		spi-cpol;
+		spi-cpha;
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&lb035_pins>;
+		enable-gpios = <&gpio5 16 GPIO_ACTIVE_HIGH>;	/* gpio_144 */
+
+		port {
+			lcd_in: endpoint {
+				remote-endpoint = <&dpi_out>;
+			};
+		};
+	};
+
+	/* touch controller */
+	ads7846@0 {
+		pinctrl-names = "default";
+		pinctrl-0 = <&ads7846_pins>;
+
+		compatible = "ti,ads7846";
+		vcc-supply = <&ads7846reg>;
+
+		reg = <0>;				/* CS0 */
+		spi-max-frequency = <1500000>;
+
+		interrupt-parent = <&gpio4>;
+		interrupts = <18 0>;			/* gpio_114 */
+		pendown-gpio = <&gpio4 18 0>;
+
+		ti,x-min = /bits/ 16 <0x0>;
+		ti,x-max = /bits/ 16 <0x0fff>;
+		ti,y-min = /bits/ 16 <0x0>;
+		ti,y-max = /bits/ 16 <0x0fff>;
+		ti,x-plate-ohms = /bits/ 16 <180>;
+		ti,pressure-max = /bits/ 16 <255>;
+
+		linux,wakeup;
+	};
+};

diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
new file mode 100644
index 0000000..f5395b7
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi

@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2014 Florian Vaussard, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * 4.3'' LCD panel output for some Gumstix Overo boards (Gallop43, Chestnut43)
+ */
+
+&omap3_pmx_core {
+	dss_dpi_pins: pinmux_dss_dpi_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x20d4, PIN_OUTPUT | MUX_MODE0)	/* dss_pclk.dss_pclk */
+			OMAP3_CORE1_IOPAD(0x20d6, PIN_OUTPUT | MUX_MODE0)	/* dss_hsync.dss_hsync */
+			OMAP3_CORE1_IOPAD(0x20d8, PIN_OUTPUT | MUX_MODE0)	/* dss_vsync.dss_vsync */
+			OMAP3_CORE1_IOPAD(0x20da, PIN_OUTPUT | MUX_MODE0)	/* dss_acbias.dss_acbias */
+			OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE0)	/* dss_data0.dss_data0 */
+			OMAP3_CORE1_IOPAD(0x20de, PIN_OUTPUT | MUX_MODE0)	/* dss_data1.dss_data1 */
+			OMAP3_CORE1_IOPAD(0x20e0, PIN_OUTPUT | MUX_MODE0)	/* dss_data2.dss_data2 */
+			OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE0)	/* dss_data3.dss_data3 */
+			OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE0)	/* dss_data4.dss_data4 */
+			OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE0)	/* dss_data5.dss_data5 */
+			OMAP3_CORE1_IOPAD(0x20e8, PIN_OUTPUT | MUX_MODE0)	/* dss_data6.dss_data6 */
+			OMAP3_CORE1_IOPAD(0x20ea, PIN_OUTPUT | MUX_MODE0)	/* dss_data7.dss_data7 */
+			OMAP3_CORE1_IOPAD(0x20ec, PIN_OUTPUT | MUX_MODE0)	/* dss_data8.dss_data8 */
+			OMAP3_CORE1_IOPAD(0x20ee, PIN_OUTPUT | MUX_MODE0)	/* dss_data9.dss_data9 */
+			OMAP3_CORE1_IOPAD(0x20f0, PIN_OUTPUT | MUX_MODE0)	/* dss_data10.dss_data10 */
+			OMAP3_CORE1_IOPAD(0x20f2, PIN_OUTPUT | MUX_MODE0)	/* dss_data11.dss_data11 */
+			OMAP3_CORE1_IOPAD(0x20f4, PIN_OUTPUT | MUX_MODE0)	/* dss_data12.dss_data12 */
+			OMAP3_CORE1_IOPAD(0x20f6, PIN_OUTPUT | MUX_MODE0)	/* dss_data13.dss_data13 */
+			OMAP3_CORE1_IOPAD(0x20f8, PIN_OUTPUT | MUX_MODE0)	/* dss_data14.dss_data14 */
+			OMAP3_CORE1_IOPAD(0x20fa, PIN_OUTPUT | MUX_MODE0)	/* dss_data15.dss_data15 */
+			OMAP3_CORE1_IOPAD(0x20fc, PIN_OUTPUT | MUX_MODE0)	/* dss_data16.dss_data16 */
+			OMAP3_CORE1_IOPAD(0x20fe, PIN_OUTPUT | MUX_MODE0)	/* dss_data17.dss_data17 */
+			OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE0)	/* dss_data18.dss_data18 */
+			OMAP3_CORE1_IOPAD(0x2102, PIN_OUTPUT | MUX_MODE0)	/* dss_data19.dss_data19 */
+			OMAP3_CORE1_IOPAD(0x2104, PIN_OUTPUT | MUX_MODE0)	/* dss_data20.dss_data20 */
+			OMAP3_CORE1_IOPAD(0x2106, PIN_OUTPUT | MUX_MODE0)	/* dss_data21.dss_data21 */
+			OMAP3_CORE1_IOPAD(0x2108, PIN_OUTPUT | MUX_MODE0)	/* dss_data22.dss_data22 */
+			OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE0)	/* dss_data23.dss_data23 */
+		>;
+	};
+
+	lte430_pins: pinmux_lte430_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2174, PIN_OUTPUT | MUX_MODE4)	/* uart2_cts.gpio_144 */
+		>;
+	};
+
+	backlight_pins: pinmux_backlight_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE4)	/* uart2_rts.gpio_145 */
+		>;
+	};
+
+	mcspi1_pins: pinmux_mcspi1_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x21c8, PIN_INPUT | MUX_MODE0)	/* mcspi1_clk.mcspi1_clk */
+			OMAP3_CORE1_IOPAD(0x21ca, PIN_INPUT | MUX_MODE0)	/* mcspi1_simo.mcspi1_simo */
+			OMAP3_CORE1_IOPAD(0x21cc, PIN_INPUT | MUX_MODE0)	/* mcspi1_somi.mcspi1_somi */
+			OMAP3_CORE1_IOPAD(0x21ce, PIN_INPUT | MUX_MODE0)	/* mcspi1_cs0.mcspi1_cs0 */
+		>;
+	};
+
+	ads7846_pins: pinmux_ads7846_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x2138, PIN_INPUT_PULLDOWN | MUX_MODE4)	/* csi2_dx1.gpio_114 */
+		>;
+	};
+};
+
+/* Needed to power the DPI pins */
+&vpll2 {
+	regulator-always-on;
+};
+
+&dss {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_dpi_pins>;
+
+	port {
+		dpi_out: endpoint {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <24>;
+		};
+	};
+};
+
+/ {
+	aliases {
+		display0 = &lcd0;
+	};
+
+	lcd0: display@0 {
+		compatible = "samsung,lte430wq-f0c", "panel-dpi";
+		label = "lcd";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&lte430_pins>;
+		enable-gpios = <&gpio5 16 GPIO_ACTIVE_HIGH>;		/* gpio_144 */
+
+		port {
+			lcd_in: endpoint {
+				remote-endpoint = <&dpi_out>;
+			};
+		};
+
+		panel-timing {
+			clock-frequency = <9200000>;
+			hactive = <480>;
+			vactive = <272>;
+			hfront-porch = <8>;
+			hback-porch = <4>;
+			hsync-len = <41>;
+			vback-porch = <2>;
+			vfront-porch = <4>;
+			vsync-len = <10>;
+
+			hsync-active = <0>;
+			vsync-active = <0>;
+			de-active = <1>;
+			pixelclk-active = <1>;
+		};
+	};
+
+	ads7846reg: ads7846-reg {
+		compatible = "regulator-fixed";
+		regulator-name = "ads7846-reg";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	backlight {
+		compatible = "gpio-backlight";
+		
+		pinctrl-names = "default";
+		pinctrl-0 = <&backlight_pins>;
+		gpios = <&gpio5 17 GPIO_ACTIVE_HIGH>;		/* gpio_145 */
+
+		default-on;
+	};
+};
+
+&mcspi1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcspi1_pins>;
+
+	/* touch controller */
+	ads7846@0 {
+		pinctrl-names = "default";
+		pinctrl-0 = <&ads7846_pins>;
+
+		compatible = "ti,ads7846";
+		vcc-supply = <&ads7846reg>;
+
+		reg = <0>;				/* CS0 */
+		spi-max-frequency = <1500000>;
+
+		interrupt-parent = <&gpio4>;
+		interrupts = <18 0>;			/* gpio_114 */
+		pendown-gpio = <&gpio4 18 0>;
+
+		ti,x-min = /bits/ 16 <0x0>;
+		ti,x-max = /bits/ 16 <0x0fff>;
+		ti,y-min = /bits/ 16 <0x0>;
+		ti,y-max = /bits/ 16 <0x0fff>;
+		ti,x-plate-ohms = /bits/ 16 <180>;
+		ti,pressure-max = /bits/ 16 <255>;
+
+		linux,wakeup;
+	};
+};
+

diff --git a/arch/arm/boot/dts/omap3-overo-gallop43-common.dtsi b/arch/arm/boot/dts/omap3-overo-gallop43-common.dtsi
index 5e848c2..49d2254 100644
--- a/arch/arm/boot/dts/omap3-overo-gallop43-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-gallop43-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-lcd43.dtsi"
 
 #include <dt-bindings/input/input.h>
 

diff --git a/arch/arm/boot/dts/omap3-overo-palo43-common.dtsi b/arch/arm/boot/dts/omap3-overo-palo43-common.dtsi
index abea232..087aedf 100644
--- a/arch/arm/boot/dts/omap3-overo-palo43-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-palo43-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-lcd43.dtsi"
 
 #include <dt-bindings/input/input.h>
 

diff --git a/arch/arm/boot/dts/omap3-overo-summit-common.dtsi b/arch/arm/boot/dts/omap3-overo-summit-common.dtsi
index 999d1cd..0ac97ba 100644
--- a/arch/arm/boot/dts/omap3-overo-summit-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-summit-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-dvi.dtsi"
 
 / {
 	leds {

diff --git a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
index 13df50b..9e24b6a 100644
--- a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi

@@ -11,6 +11,7 @@
  */
 
 #include "omap3-overo-common-peripherals.dtsi"
+#include "omap3-overo-common-dvi.dtsi"
 
 / {
 	leds {

diff --git a/arch/arm/boot/dts/omap3-panel-sharp-ls037v7dw01.dtsi b/arch/arm/boot/dts/omap3-panel-sharp-ls037v7dw01.dtsi
new file mode 100644
index 0000000..f4b1a61
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-panel-sharp-ls037v7dw01.dtsi

@@ -0,0 +1,71 @@
+/*
+ * Common file for omap dpi panels with QVGA and reset pins
+ *
+ * Note that the board specifc DTS file needs to specify
+ * at minimum the GPIO enable-gpios for display, and
+ * gpios for gpio-backlight.
+ */
+
+/ {
+	aliases {
+		display0 = &lcd0;
+	};
+
+	backlight0: backlight {
+		compatible = "gpio-backlight";
+		default-on;
+	};
+
+	/* 3.3V GPIO controlled regulator for LCD_ENVDD */
+	lcd_3v3: regulator-lcd-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "lcd_3v3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		startup-delay-us = <70000>;
+	};
+
+	lcd0: display {
+		compatible = "sharp,ls037v7dw01";
+		label = "lcd";
+		power-supply = <&lcd_3v3>;
+
+		port {
+			lcd_in: endpoint {
+				remote-endpoint = <&dpi_out>;
+			};
+		};
+	};
+};
+
+/* Needed to power the DPI pins */
+&vpll2 {
+	regulator-always-on;
+};
+
+&dss {
+	status = "ok";
+	port {
+		dpi_out: endpoint {
+			remote-endpoint = <&lcd_in>;
+			data-lines = <18>;
+		};
+	};
+};
+
+&mcspi1 {
+	tsc2046@0 {
+		reg = <0>;			/* CS0 */
+		compatible = "ti,tsc2046";
+		spi-max-frequency = <1000000>;
+		vcc-supply = <&lcd_3v3>;
+		ti,x-min = /bits/ 16 <0>;
+		ti,x-max = /bits/ 16 <8000>;
+		ti,y-min = /bits/ 16 <0>;
+		ti,y-max = /bits/ 16 <4800>;
+		ti,x-plate-ohms = /bits/ 16 <40>;
+		ti,pressure-max = /bits/ 16 <255>;
+		ti,swap-xy;
+		linux,wakeup;
+	};
+};

diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts
index cd53a64..6dc84d9 100644
--- a/arch/arm/boot/dts/omap4-duovero-parlor.dts
+++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts

@@ -15,6 +15,10 @@
 	model = "OMAP4430 Gumstix Duovero on Parlor";
 	compatible = "gumstix,omap4-duovero-parlor", "gumstix,omap4-duovero", "ti,omap4430", "ti,omap4";
 
+	aliases {
+		display0 = &hdmi0;
+	};
+
 	leds {
 		compatible = "gpio-leds";
 		led0 {
@@ -35,6 +39,21 @@
 			gpio-key,wakeup;
 		};
 	};
+
+	hdmi0: connector@0 {
+		compatible = "hdmi-connector";
+		label = "hdmi";
+
+		type = "d";
+
+		hpd-gpios = <&gpio2 31 GPIO_ACTIVE_HIGH>;	/* gpio_63 */
+
+		port {
+			hdmi_connector_in: endpoint {
+				remote-endpoint = <&hdmi_out>;
+			};
+		};
+	};
 };
 
 &omap4_pmx_core {
@@ -77,6 +96,15 @@
 			OMAP4_IOPAD(0x070, PIN_INPUT_PULLUP | MUX_MODE3)	/* gpmc_a24.gpio_48: amdix enabled */
 		>;
 	};
+
+	dss_hdmi_pins: pinmux_dss_hdmi_pins {
+		pinctrl-single,pins = <
+			OMAP4_IOPAD(0x098, PIN_INPUT | MUX_MODE3)		/* hdmi_hpd.gpio_63 */
+			OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+			OMAP4_IOPAD(0x09c, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_ddc_scl.hdmi_ddc_scl */
+			OMAP4_IOPAD(0x09e, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_ddc_sda.hdmi_ddc_sda */
+		>;
+	};
 };
 
 &i2c2 {
@@ -143,4 +171,20 @@
 	};
 };
 
+&dss {
+	status = "ok";
+};
+
+&hdmi {
+	status = "ok";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_hdmi_pins>;
+
+	port {
+		hdmi_out: endpoint {
+			remote-endpoint = <&hdmi_connector_in>;
+		};
+	};
+};
 

diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 43a587e..7e26d22 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi

@@ -922,6 +922,8 @@
 				ti,hwmods = "dss_hdmi";
 				clocks = <&dss_48mhz_clk>, <&dss_sys_clk>;
 				clock-names = "fck", "sys_clk";
+				dmas = <&sdma 76>;
+				dma-names = "audio_tx";
 			};
 		};
 	};

diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 3b99ec2..1e1b057 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts

@@ -20,6 +20,10 @@
 		reg = <0x80000000 0x7F000000>; /* 2032 MB */
 	};
 
+	aliases {
+		display0 = &hdmi0;
+	};
+
 	vmmcsd_fixed: fixedregulator-mmcsd {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmcsd_fixed";
@@ -51,6 +55,51 @@
 			default-state = "off";
 		};
 	};
+
+	tpd12s015: encoder@0 {
+		compatible = "ti,tpd12s015";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&tpd12s015_pins>;
+
+		gpios = <&gpio9 0 GPIO_ACTIVE_HIGH>,	/* TCA6424A P01, CT CP HPD */
+			<&gpio9 1 GPIO_ACTIVE_HIGH>,	/* TCA6424A P00, LS OE */
+			<&gpio7 1 GPIO_ACTIVE_HIGH>;	/* GPIO 193, HPD */
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
+				tpd12s015_in: endpoint@0 {
+					remote-endpoint = <&hdmi_out>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+
+				tpd12s015_out: endpoint@0 {
+					remote-endpoint = <&hdmi_connector_in>;
+				};
+			};
+		};
+	};
+
+	hdmi0: connector@0 {
+		compatible = "hdmi-connector";
+		label = "hdmi";
+
+		type = "b";
+
+		port {
+			hdmi_connector_in: endpoint {
+				remote-endpoint = <&tpd12s015_out>;
+			};
+		};
+	};
 };
 
 &omap5_pmx_core {
@@ -183,6 +232,19 @@
 		>;
 	};
 
+	dss_hdmi_pins: pinmux_dss_hdmi_pins {
+		pinctrl-single,pins = <
+			0x0fc (PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+			0x100 (PIN_INPUT | MUX_MODE0)	/* hdmi_ddc_scl.hdmi_ddc_scl */
+			0x102 (PIN_INPUT | MUX_MODE0)	/* hdmi_ddc_sda.hdmi_ddc_sda */
+		>;
+	};
+
+	tpd12s015_pins: pinmux_tpd12s015_pins {
+		pinctrl-single,pins = <
+			0x0fe (PIN_INPUT_PULLDOWN | MUX_MODE6)	/* hdmi_hpd.gpio7_193 */
+		>;
+	};
 };
 
 &omap5_pmx_wkup {
@@ -434,6 +496,13 @@
 	pinctrl-0 = <&i2c5_pins>;
 
 	clock-frequency = <400000>;
+
+	gpio9: gpio@22 {
+		compatible = "ti,tca6424";
+		reg = <0x22>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
 };
 
 &mcbsp3 {
@@ -491,3 +560,21 @@
 &cpu0 {
 	cpu0-supply = <&smps123_reg>;
 };
+
+&dss {
+	status = "ok";
+};
+
+&hdmi {
+	status = "ok";
+	vdda-supply = <&ldo4_reg>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&dss_hdmi_pins>;
+
+	port {
+		hdmi_out: endpoint {
+			remote-endpoint = <&tpd12s015_in>;
+		};
+	};
+};

diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index e58be57..3bfda16 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi

@@ -924,6 +924,68 @@
 			ti,hwmods = "sata";
 		};
 
+		dss: dss@58000000 {
+			compatible = "ti,omap5-dss";
+			reg = <0x58000000 0x80>;
+			status = "disabled";
+			ti,hwmods = "dss_core";
+			clocks = <&dss_dss_clk>;
+			clock-names = "fck";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+
+			dispc@58001000 {
+				compatible = "ti,omap5-dispc";
+				reg = <0x58001000 0x1000>;
+				interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
+				ti,hwmods = "dss_dispc";
+				clocks = <&dss_dss_clk>;
+				clock-names = "fck";
+			};
+
+			dsi1: encoder@58004000 {
+				compatible = "ti,omap5-dsi";
+				reg = <0x58004000 0x200>,
+				      <0x58004200 0x40>,
+				      <0x58004300 0x40>;
+				reg-names = "proto", "phy", "pll";
+				interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+				ti,hwmods = "dss_dsi1";
+				clocks = <&dss_dss_clk>, <&dss_sys_clk>;
+				clock-names = "fck", "sys_clk";
+			};
+
+			dsi2: encoder@58005000 {
+				compatible = "ti,omap5-dsi";
+				reg = <0x58009000 0x200>,
+				      <0x58009200 0x40>,
+				      <0x58009300 0x40>;
+				reg-names = "proto", "phy", "pll";
+				interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+				ti,hwmods = "dss_dsi2";
+				clocks = <&dss_dss_clk>, <&dss_sys_clk>;
+				clock-names = "fck", "sys_clk";
+			};
+
+			hdmi: encoder@58060000 {
+				compatible = "ti,omap5-hdmi";
+				reg = <0x58040000 0x200>,
+				      <0x58040200 0x80>,
+				      <0x58040300 0x80>,
+				      <0x58060000 0x19000>;
+				reg-names = "wp", "pll", "phy", "core";
+				interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+				ti,hwmods = "dss_hdmi";
+				clocks = <&dss_48mhz_clk>, <&dss_sys_clk>;
+				clock-names = "fck", "sys_clk";
+				dmas = <&sdma 76>;
+				dma-names = "audio_tx";
+			};
+		};
 	};
 };
 

diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi
index 51d0e91..1929ad3 100644
--- a/arch/arm/boot/dts/vt8500.dtsi
+++ b/arch/arm/boot/dts/vt8500.dtsi

@@ -165,5 +165,11 @@
 			reg = <0xd8100000 0x10000>;
 			interrupts = <48>;
 		};
+
+		ethernet@d8004000 {
+			compatible = "via,vt8500-rhine";
+			reg = <0xd8004000 0x100>;
+			interrupts = <10>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi
index 7525982..b1c59a7 100644
--- a/arch/arm/boot/dts/wm8650.dtsi
+++ b/arch/arm/boot/dts/wm8650.dtsi

@@ -218,5 +218,11 @@
 			reg = <0xd8100000 0x10000>;
 			interrupts = <48>;
 		};
+
+		ethernet@d8004000 {
+			compatible = "via,vt8500-rhine";
+			reg = <0xd8004000 0x100>;
+			interrupts = <10>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/wm8850.dtsi b/arch/arm/boot/dts/wm8850.dtsi
index d98386d..8fbccfbe 100644
--- a/arch/arm/boot/dts/wm8850.dtsi
+++ b/arch/arm/boot/dts/wm8850.dtsi

@@ -298,5 +298,11 @@
 			bus-width = <4>;
 			sdon-inverted;
 		};
+
+		ethernet@d8004000 {
+			compatible = "via,vt8500-rhine";
+			reg = <0xd8004000 0x100>;
+			interrupts = <10>;
+                };
 	};
 };

diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig
index 5bae195..c1f5adc 100644
--- a/arch/arm/configs/integrator_defconfig
+++ b/arch/arm/configs/integrator_defconfig

@@ -73,7 +73,6 @@
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_PL030=y
-CONFIG_COMMON_CLK_DEBUG=y
 CONFIG_EXT2_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y

diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 095bb52..932ae40 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig

@@ -112,6 +112,7 @@
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_DAVINCI=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_MTD_UBI=y
 CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
@@ -131,6 +132,9 @@
 CONFIG_SPI_DAVINCI=y
 CONFIG_SPI_SPIDEV=y
 # CONFIG_HWMON is not set
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_KEYSTONE=y
 CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_CORE=y
 CONFIG_DAVINCI_WATCHDOG=y
@@ -145,7 +149,6 @@
 CONFIG_KEYSTONE_USB_PHY=y
 CONFIG_DMADEVICES=y
 CONFIG_TI_EDMA=y
-CONFIG_COMMON_CLK_DEBUG=y
 CONFIG_MEMORY=y
 CONFIG_TI_AEMIF=y
 CONFIG_EXT4_FS=y

diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig
index f052017..9e7a256 100644
--- a/arch/arm/configs/vt8500_v6_v7_defconfig
+++ b/arch/arm/configs/vt8500_v6_v7_defconfig

@@ -73,7 +73,6 @@
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_VT8500=y
 CONFIG_DMADEVICES=y
-CONFIG_COMMON_CLK_DEBUG=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PWM=y
 CONFIG_PWM_VT8500=y

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a6bc431..4238bcb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c

@@ -410,7 +410,7 @@
 	 */
 	hwc->config_base	    |= (unsigned long)mapping;
 
-	if (!hwc->sample_period) {
+	if (!is_sampling_event(event)) {
 		/*
 		 * For non-sampling runs, limit the sample_period to half
 		 * of the counter width. That way, the new counter value

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index a71ae15..af9e35e 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c

@@ -126,8 +126,8 @@
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 	if (irqs < 1) {
-		pr_err("no irqs for PMUs defined\n");
-		return -ENODEV;
+		printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
 	}
 
 	irq = platform_get_irq(pmu_device, 0);
@@ -191,6 +191,10 @@
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
 		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 }
 
 /*

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 3997c41..9d85318 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c

@@ -26,30 +26,30 @@
 #include <asm/topology.h>
 
 /*
- * cpu power scale management
+ * cpu capacity scale management
  */
 
 /*
- * cpu power table
+ * cpu capacity table
  * This per cpu data structure describes the relative capacity of each core.
  * On a heteregenous system, cores don't have the same computation capacity
- * and we reflect that difference in the cpu_power field so the scheduler can
- * take this difference into account during load balance. A per cpu structure
- * is preferred because each CPU updates its own cpu_power field during the
- * load balance except for idle cores. One idle core is selected to run the
- * rebalance_domains for all idle cores and the cpu_power can be updated
- * during this sequence.
+ * and we reflect that difference in the cpu_capacity field so the scheduler
+ * can take this difference into account during load balance. A per cpu
+ * structure is preferred because each CPU updates its own cpu_capacity field
+ * during the load balance except for idle cores. One idle core is selected
+ * to run the rebalance_domains for all idle cores and the cpu_capacity can be
+ * updated during this sequence.
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale);
 
-unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(cpu_scale, cpu);
 }
 
-static void set_power_scale(unsigned int cpu, unsigned long power)
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
 {
-	per_cpu(cpu_scale, cpu) = power;
+	per_cpu(cpu_scale, cpu) = capacity;
 }
 
 #ifdef CONFIG_OF
@@ -62,11 +62,11 @@
  * Table of relative efficiency of each processors
  * The efficiency value must fit in 20bit and the final
  * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
  * in order to return at most 1 when DIV_ROUND_CLOSEST
  * is used to compute the capacity of a CPU.
  * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
  */
 static const struct cpu_efficiency table_efficiency[] = {
 	{"arm,cortex-a15", 3891},
@@ -83,9 +83,9 @@
  * Iterate all CPUs' descriptor in DT and compute the efficiency
  * (as per table_efficiency). Also calculate a middle efficiency
  * as close as possible to  (max{eff_i} - min{eff_i}) / 2
- * This is later used to scale the cpu_power field such that an
- * 'average' CPU is of middle power. Also see the comments near
- * table_efficiency[] and update_cpu_power().
+ * This is later used to scale the cpu_capacity field such that an
+ * 'average' CPU is of middle capacity. Also see the comments near
+ * table_efficiency[] and update_cpu_capacity().
  */
 static void __init parse_dt_topology(void)
 {
@@ -141,15 +141,15 @@
 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
 	 * compute a middle_capacity factor that will ensure that the capacity
 	 * of an 'average' CPU of the system will be as close as possible to
-	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
 	 * constraint explained near table_efficiency[].
 	 */
 	if (4*max_capacity < (3*(max_capacity + min_capacity)))
 		middle_capacity = (min_capacity + max_capacity)
-				>> (SCHED_POWER_SHIFT+1);
+				>> (SCHED_CAPACITY_SHIFT+1);
 	else
 		middle_capacity = ((max_capacity / 3)
-				>> (SCHED_POWER_SHIFT-1)) + 1;
+				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
 
 }
 
@@ -158,20 +158,20 @@
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
  * function returns directly for SMP system.
  */
-static void update_cpu_power(unsigned int cpu)
+static void update_cpu_capacity(unsigned int cpu)
 {
 	if (!cpu_capacity(cpu))
 		return;
 
-	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
-		cpu, arch_scale_freq_power(NULL, cpu));
+	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+		cpu, arch_scale_freq_capacity(NULL, cpu));
 }
 
 #else
 static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
+static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
  /*
@@ -267,7 +267,7 @@
 
 	update_siblings_masks(cpuid);
 
-	update_cpu_power(cpuid);
+	update_cpu_capacity(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -297,7 +297,7 @@
 {
 	unsigned int cpu;
 
-	/* init core mask and power*/
+	/* init core mask and capacity */
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -307,7 +307,7 @@
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
 
-		set_power_scale(cpu, SCHED_POWER_SCALE);
+		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
 	}
 	smp_wmb();
 

diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index e0b9e1b..7f352de 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c

@@ -14,60 +14,100 @@
 #include <linux/init.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/mach/map.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 #include <asm/smp_plat.h>
+#include <asm/memory.h>
+
+#include "memory.h"
 
 #include "keystone.h"
 
-#define PLL_RESET_WRITE_KEY_MASK		0xffff0000
-#define PLL_RESET_WRITE_KEY			0x5a69
-#define PLL_RESET				BIT(16)
+static struct notifier_block platform_nb;
+static unsigned long keystone_dma_pfn_offset __read_mostly;
 
-static void __iomem *keystone_rstctrl;
+static int keystone_platform_notifier(struct notifier_block *nb,
+				      unsigned long event, void *data)
+{
+	struct device *dev = data;
+
+	if (event != BUS_NOTIFY_ADD_DEVICE)
+		return NOTIFY_DONE;
+
+	if (!dev)
+		return NOTIFY_BAD;
+
+	if (!dev->of_node) {
+		dev->dma_pfn_offset = keystone_dma_pfn_offset;
+		dev_err(dev, "set dma_pfn_offset%08lx\n",
+			dev->dma_pfn_offset);
+	}
+	return NOTIFY_OK;
+}
 
 static void __init keystone_init(void)
 {
-	struct device_node *node;
-
-	node = of_find_compatible_node(NULL, NULL, "ti,keystone-reset");
-	if (WARN_ON(!node))
-		pr_warn("ti,keystone-reset node undefined\n");
-
-	keystone_rstctrl = of_iomap(node, 0);
-	if (WARN_ON(!keystone_rstctrl))
-		pr_warn("ti,keystone-reset iomap error\n");
-
 	keystone_pm_runtime_init();
+	if (platform_nb.notifier_call)
+		bus_register_notifier(&platform_bus_type, &platform_nb);
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
+static phys_addr_t keystone_virt_to_idmap(unsigned long x)
+{
+	return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
+}
+
+static void __init keystone_init_meminfo(void)
+{
+	bool lpae = IS_ENABLED(CONFIG_ARM_LPAE);
+	bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT);
+	phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START;
+	phys_addr_t mem_start, mem_end;
+
+	mem_start = memblock_start_of_DRAM();
+	mem_end = memblock_end_of_DRAM();
+
+	/* nothing to do if we are running out of the <32-bit space */
+	if (mem_start >= KEYSTONE_LOW_PHYS_START &&
+	    mem_end   <= KEYSTONE_LOW_PHYS_END)
+		return;
+
+	if (!lpae || !pvpatch) {
+		pr_crit("Enable %s%s%s to run outside 32-bit space\n",
+		      !lpae ? __stringify(CONFIG_ARM_LPAE) : "",
+		      (!lpae && !pvpatch) ? " and " : "",
+		      !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : "");
+	}
+
+	if (mem_start < KEYSTONE_HIGH_PHYS_START ||
+	    mem_end   > KEYSTONE_HIGH_PHYS_END) {
+		pr_crit("Invalid address space for memory (%08llx-%08llx)\n",
+		      (u64)mem_start, (u64)mem_end);
+	}
+
+	offset += KEYSTONE_HIGH_PHYS_START;
+	__pv_phys_pfn_offset = PFN_DOWN(offset);
+	__pv_offset = (offset - PAGE_OFFSET);
+
+	/* Populate the arch idmap hook */
+	arch_virt_to_idmap = keystone_virt_to_idmap;
+	platform_nb.notifier_call = keystone_platform_notifier;
+	keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
+						KEYSTONE_LOW_PHYS_START);
+
+	pr_info("Switching to high address space at 0x%llx\n", (u64)offset);
+}
+
 static const char *keystone_match[] __initconst = {
 	"ti,keystone",
 	NULL,
 };
 
-void keystone_restart(enum reboot_mode mode, const char *cmd)
-{
-	u32 val;
-
-	BUG_ON(!keystone_rstctrl);
-
-	/* Enable write access to RSTCTRL */
-	val = readl(keystone_rstctrl);
-	val &= PLL_RESET_WRITE_KEY_MASK;
-	val |= PLL_RESET_WRITE_KEY;
-	writel(val, keystone_rstctrl);
-
-	/* Reset the SOC */
-	val = readl(keystone_rstctrl);
-	val &= ~PLL_RESET;
-	writel(val, keystone_rstctrl);
-}
-
 DT_MACHINE_START(KEYSTONE, "Keystone")
 #if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
 	.dma_zone_size	= SZ_2G,
@@ -75,5 +115,5 @@
 	.smp		= smp_ops(keystone_smp_ops),
 	.init_machine	= keystone_init,
 	.dt_compat	= keystone_match,
-	.restart	= keystone_restart,
+	.init_meminfo   = keystone_init_meminfo,
 MACHINE_END

diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h
new file mode 100644
index 0000000..b854fb1
--- /dev/null
+++ b/arch/arm/mach-keystone/memory.h

@@ -0,0 +1,24 @@
+/*
+ * Copyright 2014 Texas Instruments, Inc.
+ *	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef __MEMORY_H
+#define __MEMORY_H
+
+#define MAX_PHYSMEM_BITS	36
+#define SECTION_SIZE_BITS	34
+
+#define KEYSTONE_LOW_PHYS_START		0x80000000ULL
+#define KEYSTONE_LOW_PHYS_SIZE		0x80000000ULL /* 2G */
+#define KEYSTONE_LOW_PHYS_END		(KEYSTONE_LOW_PHYS_START + \
+					 KEYSTONE_LOW_PHYS_SIZE - 1)
+
+#define KEYSTONE_HIGH_PHYS_START	0x800000000ULL
+#define KEYSTONE_HIGH_PHYS_SIZE		0x400000000ULL	/* 16G */
+#define KEYSTONE_HIGH_PHYS_END		(KEYSTONE_HIGH_PHYS_START + \
+					 KEYSTONE_HIGH_PHYS_SIZE - 1)
+#endif /* __MEMORY_H */

diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5cf0683..5f46a7c 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c

@@ -17,13 +17,16 @@
 #include <linux/io.h>
 
 #include <asm/smp_plat.h>
+#include <asm/prom.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
 
 #include "keystone.h"
 
 static int keystone_smp_boot_secondary(unsigned int cpu,
 						struct task_struct *idle)
 {
-	unsigned long start = virt_to_phys(&secondary_startup);
+	unsigned long start = virt_to_idmap(&secondary_startup);
 	int error;
 
 	pr_debug("keystone-smp: booting cpu %d, vector %08lx\n",
@@ -36,6 +39,19 @@
 	return error;
 }
 
+#ifdef CONFIG_ARM_LPAE
+static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
+{
+	pgd_t *pgd0 = pgd_offset_k(0);
+	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+	local_flush_tlb_all();
+}
+#else
+static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
+{}
+#endif
+
 struct smp_operations keystone_smp_ops __initdata = {
 	.smp_boot_secondary	= keystone_smp_boot_secondary,
+	.smp_secondary_init     = keystone_smp_secondary_initmem,
 };

diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 660bfc5..e2e5203 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c

@@ -60,7 +60,8 @@
 
 static struct pwm_lookup pwm_lookup[] = {
 	/* LEDB -> PMU_STAT */
-	PWM_LOOKUP("twl-pwmled", 1, "leds_pwm", "beagleboard::pmu_stat"),
+	PWM_LOOKUP("twl-pwmled", 1, "leds_pwm", "beagleboard::pmu_stat",
+		   7812500, PWM_POLARITY_NORMAL),
 };
 
 static struct led_pwm pwm_leds[] = {

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index e58609b..592ba0a 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c

@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/omap4-keypad.h>
 #include <linux/platform_data/mailbox-omap.h>
 
 #include <asm/mach-types.h>
@@ -29,7 +28,6 @@
 #include "iomap.h"
 #include "omap_hwmod.h"
 #include "omap_device.h"
-#include "omap4-keypad.h"
 
 #include "soc.h"
 #include "common.h"
@@ -255,37 +253,6 @@
 #endif
 }
 
-int __init omap4_keyboard_init(struct omap4_keypad_platform_data
-			*sdp4430_keypad_data, struct omap_board_data *bdata)
-{
-	struct platform_device *pdev;
-	struct omap_hwmod *oh;
-	struct omap4_keypad_platform_data *keypad_data;
-	unsigned int id = -1;
-	char *oh_name = "kbd";
-	char *name = "omap4-keypad";
-
-	oh = omap_hwmod_lookup(oh_name);
-	if (!oh) {
-		pr_err("Could not look up %s\n", oh_name);
-		return -ENODEV;
-	}
-
-	keypad_data = sdp4430_keypad_data;
-
-	pdev = omap_device_build(name, id, oh, keypad_data,
-				 sizeof(struct omap4_keypad_platform_data));
-
-	if (IS_ERR(pdev)) {
-		WARN(1, "Can't build omap_device for %s:%s.\n",
-						name, oh->name);
-		return PTR_ERR(pdev);
-	}
-	oh->mux = omap_hwmod_mux_init(bdata->pads, bdata->pads_cnt);
-
-	return 0;
-}
-
 #if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE)
 static inline void __init omap_init_mbox(void)
 {

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 852b19a..2c0c281 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c

@@ -68,6 +68,9 @@
 #define	GPMC_ECC_BCH_RESULT_1	0x244	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_2	0x248	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_3	0x24c	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_4	0x300	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_5	0x304	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_6	0x308	/* not available on OMAP2 */
 
 /* GPMC ECC control settings */
 #define GPMC_ECC_CTRL_ECCCLEAR		0x100
@@ -677,6 +680,12 @@
 					   GPMC_BCH_SIZE * i;
 		reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 +
 					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result4[i] = gpmc_base + GPMC_ECC_BCH_RESULT_4 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result5[i] = gpmc_base + GPMC_ECC_BCH_RESULT_5 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result6[i] = gpmc_base + GPMC_ECC_BCH_RESULT_6 +
+					   i * GPMC_BCH_SIZE;
 	}
 }
 
@@ -1412,6 +1421,12 @@
 		else
 			gpmc_nand_data->ecc_opt =
 				OMAP_ECC_BCH8_CODE_HW_DETECTION_SW;
+	else if (!strcmp(s, "bch16"))
+		if (gpmc_nand_data->elm_of_node)
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH16_CODE_HW;
+		else
+			pr_err("%s: BCH16 requires ELM support\n", __func__);
 	else
 		pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__);
 

diff --git a/arch/arm/mach-omap2/omap4-keypad.h b/arch/arm/mach-omap2/omap4-keypad.h
deleted file mode 100644
index 20de0d5..0000000
--- a/arch/arm/mach-omap2/omap4-keypad.h
+++ /dev/null

@@ -1,8 +0,0 @@
-#ifndef ARCH_ARM_PLAT_OMAP4_KEYPAD_H
-#define ARCH_ARM_PLAT_OMAP4_KEYPAD_H
-
-struct omap_board_data;
-
-extern int omap4_keyboard_init(struct omap4_keypad_platform_data *,
-				struct omap_board_data *);
-#endif

diff --git a/arch/arm/mach-orion5x/board-dt.c b/arch/arm/mach-orion5x/board-dt.c
index 35d418f..79f033b 100644
--- a/arch/arm/mach-orion5x/board-dt.c
+++ b/arch/arm/mach-orion5x/board-dt.c

@@ -45,7 +45,7 @@
 	orion5x_id(&dev, &rev, &dev_name);
 	printk(KERN_INFO "Orion ID: %s. TCLK=%d.\n", dev_name, orion5x_tclk);
 
-	BUG_ON(mvebu_mbus_dt_init());
+	BUG_ON(mvebu_mbus_dt_init(false));
 
 	/*
 	 * Setup Orion address map

diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index a7c30eb..c66ad4e 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c

@@ -574,7 +574,8 @@
 };
 
 static struct pwm_lookup hx4700_pwm_lookup[] = {
-	PWM_LOOKUP("pxa27x-pwm.1", 0, "pwm-backlight", NULL),
+	PWM_LOOKUP("pxa27x-pwm.1", 0, "pwm-backlight", NULL,
+		   30923, PWM_POLARITY_NORMAL),
 };
 
 /*

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index 6c719ec..c1ce921 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c

@@ -234,14 +234,6 @@
 
 /* TSP */
 static struct mxt_platform_data qt602240_platform_data = {
-	.x_line		= 17,
-	.y_line		= 11,
-	.x_size		= 800,
-	.y_size		= 480,
-	.blen		= 0x21,
-	.threshold	= 0x28,
-	.voltage	= 2800000,              /* 2.8V */
-	.orient		= MXT_DIAGONAL,
 	.irqflags	= IRQF_TRIGGER_FALLING,
 };
 

diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 01f8110..30fcac7 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c

@@ -31,7 +31,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/regulator/driver.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/pwm-renesas-tpu.h>
+#include <linux/pwm.h>
 #include <linux/pwm_backlight.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/gpio-regulator.h>
@@ -399,24 +399,16 @@
 	},
 };
 
-static struct tpu_pwm_platform_data pwm_device_data = {
-	.channels[2] = {
-		.polarity = PWM_POLARITY_INVERSED,
-	}
-};
-
 static struct platform_device pwm_device = {
 	.name = "renesas-tpu-pwm",
 	.id = -1,
-	.dev = {
-		.platform_data = &pwm_device_data,
-	},
 	.num_resources = ARRAY_SIZE(pwm_resources),
 	.resource = pwm_resources,
 };
 
 static struct pwm_lookup pwm_lookup[] = {
-	PWM_LOOKUP("renesas-tpu-pwm", 2, "pwm-backlight.0", NULL),
+	PWM_LOOKUP("renesas-tpu-pwm", 2, "pwm-backlight.0", NULL,
+		   33333, PWM_POLARITY_INVERSED),
 };
 
 /* LCDC and backlight */

diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c
index e4dec9f..9c6029b 100644
--- a/arch/arm/mach-tegra/board-paz00.c
+++ b/arch/arm/mach-tegra/board-paz00.c

@@ -23,9 +23,7 @@
 #include "board.h"
 
 static struct rfkill_gpio_platform_data wifi_rfkill_platform_data = {
-	.name		= "wifi_rfkill",
-	.reset_gpio	= 25, /* PD1 */
-	.shutdown_gpio	= 85, /* PK5 */
+	.name	= "wifi_rfkill",
 	.type	= RFKILL_TYPE_WLAN,
 };
 

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6f879c3..fb5503c 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c

@@ -136,7 +136,7 @@
 	u16 ret = 0;
 
 	if ((ctx->skf->len > 1) ||
-	    (ctx->skf->insns[0].code == BPF_S_RET_A))
+	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
 		ret |= 1 << r_A;
 
 #ifdef CONFIG_FRAME_POINTER
@@ -164,18 +164,10 @@
 static inline bool is_load_to_a(u16 inst)
 {
 	switch (inst) {
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_QUEUE:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		return true;
 	default:
 		return false;
@@ -215,7 +207,7 @@
 		emit(ARM_MOV_I(r_X, 0), ctx);
 
 	/* do not leak kernel data to userspace */
-	if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst)))
+	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 		emit(ARM_MOV_I(r_A, 0), ctx);
 
 	/* stack space for the BPF_MEM words */
@@ -480,36 +472,39 @@
 	u32 k;
 
 	for (i = 0; i < prog->len; i++) {
+		u16 code;
+
 		inst = &(prog->insns[i]);
 		/* K as an immediate value operand */
 		k = inst->k;
+		code = bpf_anc_helper(inst);
 
 		/* compute offsets only in the fake pass */
 		if (ctx->target == NULL)
 			ctx->offsets[i] = ctx->idx * 4;
 
-		switch (inst->code) {
-		case BPF_S_LD_IMM:
+		switch (code) {
+		case BPF_LD | BPF_IMM:
 			emit_mov_i(r_A, k, ctx);
 			break;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			emit(ARM_LDR_I(r_A, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LD_MEM:
+		case BPF_LD | BPF_MEM:
 			/* A = scratch[k] */
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			load_order = 2;
 			goto load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			load_order = 1;
 			goto load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			load_order = 0;
 load:
 			/* the interpreter will deal with the negative K */
@@ -552,31 +547,31 @@
 			emit_err_ret(ARM_COND_NE, ctx);
 			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
 			break;
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			load_order = 2;
 			goto load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			load_order = 1;
 			goto load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			load_order = 0;
 load_ind:
 			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
 			goto load_common;
-		case BPF_S_LDX_IMM:
+		case BPF_LDX | BPF_IMM:
 			ctx->seen |= SEEN_X;
 			emit_mov_i(r_X, k, ctx);
 			break;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_X | SEEN_SKB;
 			emit(ARM_LDR_I(r_X, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LDX_MEM:
+		case BPF_LDX | BPF_MEM:
 			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			/* x = ((*(frame + k)) & 0xf) << 2; */
 			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
 			/* the interpreter should deal with the negative K */
@@ -606,113 +601,113 @@
 			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
 			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
 			break;
-		case BPF_S_ST:
+		case BPF_ST:
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_STX:
+		case BPF_STX:
 			update_on_xread(ctx);
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_ALU_ADD_K:
+		case BPF_ALU | BPF_ADD | BPF_K:
 			/* A += K */
 			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_ADD_X:
+		case BPF_ALU | BPF_ADD | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_SUB_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
 			/* A -= K */
 			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_SUB_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_MUL_K:
+		case BPF_ALU | BPF_MUL | BPF_K:
 			/* A *= K */
 			emit_mov_i(r_scratch, k, ctx);
 			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
 			break;
-		case BPF_S_ALU_MUL_X:
+		case BPF_ALU | BPF_MUL | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_MUL(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_DIV_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
 			if (k == 1)
 				break;
 			emit_mov_i(r_scratch, k, ctx);
 			emit_udiv(r_A, r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ALU_DIV_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_CMP_I(r_X, 0), ctx);
 			emit_err_ret(ARM_COND_EQ, ctx);
 			emit_udiv(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			/* A |= K */
 			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_XOR_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
 			/* A ^= K; */
 			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X:
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
 			/* A ^= X */
 			update_on_xread(ctx);
 			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			/* A &= K */
 			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSL_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_LSH_X:
+		case BPF_ALU | BPF_LSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_RSH_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSR_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_RSH_X:
+		case BPF_ALU | BPF_RSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			/* A = -A */
 			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
 			break;
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			/* pc += K */
 			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_K:
+		case BPF_JMP | BPF_JEQ | BPF_K:
 			/* pc += (A == K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_EQ;
 			goto cmp_imm;
-		case BPF_S_JMP_JGT_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
 			/* pc += (A > K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HI;
 			goto cmp_imm;
-		case BPF_S_JMP_JGE_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
 			/* pc += (A >= K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HS;
 cmp_imm:
@@ -731,22 +726,22 @@
 				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
 							     ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			/* pc += (A == X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_EQ;
 			goto cmp_x;
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			/* pc += (A > X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_HI;
 			goto cmp_x;
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			/* pc += (A >= X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_CS;
 cmp_x:
 			update_on_xread(ctx);
 			emit(ARM_CMP_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_JMP_JSET_K:
+		case BPF_JMP | BPF_JSET | BPF_K:
 			/* pc += (A & K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_NE;
 			/* not set iff all zeroes iff Z==1 iff EQ */
@@ -759,16 +754,16 @@
 				emit(ARM_TST_I(r_A, imm12), ctx);
 			}
 			goto cond_jump;
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			/* pc += (A & X) ? pc->jt : pc->jf */
 			update_on_xread(ctx);
 			condt  = ARM_COND_NE;
 			emit(ARM_TST_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
 			goto b_epilogue;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			if ((k == 0) && (ctx->ret0_fp_idx < 0))
 				ctx->ret0_fp_idx = i;
 			emit_mov_i(ARM_R0, k, ctx);
@@ -776,17 +771,17 @@
 			if (i != ctx->skf->len - 1)
 				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
 			break;
-		case BPF_S_MISC_TAX:
+		case BPF_MISC | BPF_TAX:
 			/* X = A */
 			ctx->seen |= SEEN_X;
 			emit(ARM_MOV_R(r_X, r_A), ctx);
 			break;
-		case BPF_S_MISC_TXA:
+		case BPF_MISC | BPF_TXA:
 			/* A = X */
 			update_on_xread(ctx);
 			emit(ARM_MOV_R(r_A, r_X), ctx);
 			break;
-		case BPF_S_ANC_PROTOCOL:
+		case BPF_ANC | SKF_AD_PROTOCOL:
 			/* A = ntohs(skb->protocol) */
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
@@ -795,7 +790,7 @@
 			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
 			emit_swap16(r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 			/* r_scratch = current_thread_info() */
 			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
 			/* A = current_thread_info()->cpu */
@@ -803,7 +798,7 @@
 			off = offsetof(struct thread_info, cpu);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
 			ctx->seen |= SEEN_SKB;
 			off = offsetof(struct sk_buff, dev);
@@ -817,30 +812,30 @@
 			off = offsetof(struct net_device, ifindex);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			off = offsetof(struct sk_buff, mark);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			off = offsetof(struct sk_buff, hash);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
-			if (inst->code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
 			else
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);

diff --git a/arch/arm/plat-samsung/dev-backlight.c b/arch/arm/plat-samsung/dev-backlight.c
index be4ad0b..2157c5b 100644
--- a/arch/arm/plat-samsung/dev-backlight.c
+++ b/arch/arm/plat-samsung/dev-backlight.c

@@ -124,8 +124,6 @@
 		samsung_bl_data->pwm_period_ns = bl_data->pwm_period_ns;
 	if (bl_data->enable_gpio >= 0)
 		samsung_bl_data->enable_gpio = bl_data->enable_gpio;
-	if (bl_data->enable_gpio_flags)
-		samsung_bl_data->enable_gpio_flags = bl_data->enable_gpio_flags;
 	if (bl_data->init)
 		samsung_bl_data->init = bl_data->init;
 	if (bl_data->notify)

diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
index 1759fad..e66ba31 100644
--- a/arch/blackfin/configs/BF526-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig

@@ -53,7 +53,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -63,6 +62,7 @@
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
index 3577296..0207c58 100644
--- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig

@@ -58,7 +58,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
@@ -66,6 +65,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
index 2e73a5d..99c131b 100644
--- a/arch/blackfin/configs/BF527-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig

@@ -57,7 +57,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
@@ -65,6 +64,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index f0a2ddf..38cb17d 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig

@@ -64,7 +64,6 @@
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -75,6 +74,7 @@
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_BF5XX=y
 # CONFIG_MTD_NAND_BF5XX_HWECC is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y

diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig
index 4ca39ab..a7e9bfd 100644
--- a/arch/blackfin/configs/BF609-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig

@@ -57,7 +57,6 @@
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -65,6 +64,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_MTD_UBI=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y

diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
index 3853c47..f4a9200 100644
--- a/arch/blackfin/configs/BlackStamp_defconfig
+++ b/arch/blackfin/configs/BlackStamp_defconfig

@@ -45,7 +45,6 @@
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
@@ -53,7 +52,7 @@
 CONFIG_MTD_ROM=m
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
-# CONFIG_M25PXX_USE_FAST_READ is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=y
 CONFIG_BLK_DEV_RAM=y

diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig
index f754e49..0ff97d8 100644
--- a/arch/blackfin/configs/H8606_defconfig
+++ b/arch/blackfin/configs/H8606_defconfig

@@ -36,13 +36,12 @@
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
-# CONFIG_M25PXX_USE_FAST_READ is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_MISC_DEVICES=y
 CONFIG_EEPROM_AT25=y

diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index 8d1e4c2..40e9c2b 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h

@@ -316,6 +316,8 @@
 }
 static inline void enable_dma(unsigned int channel)
 {
+	dma_ch[channel].regs->curr_x_count = 0;
+	dma_ch[channel].regs->curr_y_count = 0;
 	dma_ch[channel].regs->cfg |= DMAEN;
 }
 int set_dma_callback(unsigned int channel, irq_handler_t callback, void *data);

diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index d098929..6f4bac9 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c

@@ -17,6 +17,7 @@
 #if IS_ENABLED(CONFIG_USB_ISP1362_HCD)
 #include <linux/usb/isp1362.h>
 #endif
+#include <linux/gpio.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
 #include <asm/dma.h>

diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
index d2cfe45..dd29533 100644
--- a/arch/mips/Kbuild
+++ b/arch/mips/Kbuild

@@ -16,7 +16,7 @@
 
 obj-y += kernel/
 obj-y += mm/
-obj-y += math-emu/
+obj-y += net/
 
 ifdef CONFIG_KVM
 obj-y += kvm/

diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 6e23912..f5e18bf 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms

@@ -18,6 +18,7 @@
 platforms += mti-malta
 platforms += mti-sead3
 platforms += netlogic
+platforms += paravirt
 platforms += pmcs-msp71xx
 platforms += pnx833x
 platforms += ralink

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5e0014e..7a469ac 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -12,6 +12,7 @@
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_BPF_JIT if !CPU_MICROMIPS
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
@@ -50,6 +51,8 @@
 	select CLONE_BACKWARDS
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_CC_STACKPROTECTOR
+	select CPU_PM if CPU_IDLE
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 
 menu "Machine selection"
 
@@ -83,6 +86,7 @@
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	select SYS_SUPPORTS_ZBOOT_UART16550
 	select ARCH_REQUIRE_GPIOLIB
 	select VLYNQ
@@ -106,6 +110,7 @@
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	help
 	  Support for the Atheros AR71XX/AR724X/AR913X SoCs.
 
@@ -122,6 +127,7 @@
 	select NO_EXCEPT_FILL
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	select SYS_HAS_EARLY_PRINTK
 	select USE_GENERIC_EARLY_PRINTK_8250
 	help
@@ -168,9 +174,9 @@
 	bool "DECstations"
 	select BOOT_ELF32
 	select CEVT_DS1287
-	select CEVT_R4K
+	select CEVT_R4K if CPU_R4X00
 	select CSRC_IOASIC
-	select CSRC_R4K
+	select CSRC_R4K if CPU_R4X00
 	select CPU_DADDI_WORKAROUNDS if 64BIT
 	select CPU_R4000_WORKAROUNDS if 64BIT
 	select CPU_R4400_WORKAROUNDS if 64BIT
@@ -248,6 +254,7 @@
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_MIPS16
 	select SYS_SUPPORTS_MULTITHREADING
 	select SYS_HAS_EARLY_PRINTK
 	select ARCH_REQUIRE_GPIOLIB
@@ -330,6 +337,7 @@
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_MIPS_CMP
 	select SYS_SUPPORTS_MIPS_CPS
+	select SYS_SUPPORTS_MIPS16
 	select SYS_SUPPORTS_MULTITHREADING
 	select SYS_SUPPORTS_SMARTMIPS
 	select SYS_SUPPORTS_ZBOOT
@@ -361,6 +369,7 @@
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_SMARTMIPS
 	select SYS_SUPPORTS_MICROMIPS
+	select SYS_SUPPORTS_MIPS16
 	select USB_EHCI_BIG_ENDIAN_DESC
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select USE_OF
@@ -380,6 +389,7 @@
 	select CEVT_R4K
 	select CSRC_R4K
 	select SYS_HAS_CPU_VR41XX
+	select SYS_SUPPORTS_MIPS16
 	select ARCH_REQUIRE_GPIOLIB
 
 config NXP_STB220
@@ -407,6 +417,7 @@
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	select IRQ_CPU
 	select SERIAL_8250
 	select SERIAL_8250_CONSOLE
@@ -430,6 +441,7 @@
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	select SYS_HAS_EARLY_PRINTK
 	select HAVE_MACH_CLKDEV
 	select CLKDEV_LOOKUP
@@ -674,7 +686,6 @@
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_HIGHMEM
 	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select USE_GENERIC_EARLY_PRINTK_8250
 	help
 	  The SNI RM200/300/400 are MIPS-based machines manufactured by
 	  Siemens Nixdorf Informationssysteme (SNI), parent company of Pyramid
@@ -721,6 +732,11 @@
 	select ZONE_DMA32
 	select HOLES_IN_ZONE
 	select ARCH_REQUIRE_GPIOLIB
+	select LIBFDT
+	select USE_OF
+	select ARCH_SPARSEMEM_ENABLE
+	select SYS_SUPPORTS_SMP
+	select NR_CPUS_DEFAULT_16
 	help
 	  This option supports all of the Octeon reference boards from Cavium
 	  Networks. It builds a kernel that dynamically determines the Octeon
@@ -789,6 +805,25 @@
 	  This board is based on Netlogic XLP Processor.
 	  Say Y here if you have a XLP based board.
 
+config MIPS_PARAVIRT
+	bool "Para-Virtualized guest system"
+	select CEVT_R4K
+	select CSRC_R4K
+	select DMA_COHERENT
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_SMP
+	select NR_CPUS_DEFAULT_4
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_HAS_CPU_MIPS64_R2
+	select SYS_HAS_CPU_CAVIUM_OCTEON
+	select HW_HAS_PCI
+	select SWAP_IO_SPACE
+	help
+	  This option supports guest running under ????
+
 endchoice
 
 source "arch/mips/alchemy/Kconfig"
@@ -809,6 +844,7 @@
 source "arch/mips/loongson/Kconfig"
 source "arch/mips/loongson1/Kconfig"
 source "arch/mips/netlogic/Kconfig"
+source "arch/mips/paravirt/Kconfig"
 
 endmenu
 
@@ -1059,6 +1095,7 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_MIPS16
 	select CPU_MIPSR2_IRQ_VI
 
 config SOC_PNX8335
@@ -1398,16 +1435,11 @@
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
-	select ARCH_SPARSEMEM_ENABLE
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_SMP
-	select NR_CPUS_DEFAULT_16
 	select WEAK_ORDERING
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_HUGEPAGES
-	select LIBFDT
-	select USE_OF
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select MIPS_L1_CACHE_SHIFT_7
 	help
@@ -1659,6 +1691,12 @@
 config SYS_HAS_CPU_XLP
 	bool
 
+config MIPS_MALTA_PM
+	depends on MIPS_MALTA
+	depends on PCI
+	bool
+	default y
+
 #
 # CPU may reorder R->R, R->W, W->R, W->W
 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
@@ -1842,7 +1880,7 @@
 
 config CEVT_GIC
 	bool "Use GIC global counter for clock events"
-	depends on IRQ_GIC && !(MIPS_SEAD3 || MIPS_MT_SMTC)
+	depends on IRQ_GIC && !MIPS_SEAD3
 	help
 	  Use the GIC global counter for the clock events. The R4K clock
 	  event driver is always present, so if the platform ends up not
@@ -1895,19 +1933,8 @@
 	bool
 	default y if !(CPU_R3000 || CPU_R8000 || CPU_SB1 || CPU_TX39XX || CPU_CAVIUM_OCTEON)
 
-choice
-	prompt "MIPS MT options"
-
-config MIPS_MT_DISABLED
-	bool "Disable multithreading support"
-	help
-	  Use this option if your platform does not support the MT ASE
-	  which is hardware multithreading support. On systems without
-	  an MT-enabled processor, this will be the only option that is
-	  available in this menu.
-
 config MIPS_MT_SMP
-	bool "Use 1 TC on each available VPE for SMP"
+	bool "MIPS MT SMP support (1 TC on each available VPE)"
 	depends on SYS_SUPPORTS_MULTITHREADING
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
@@ -1926,26 +1953,6 @@
 	  Intel Hyperthreading feature. For further information go to
 	  <http://www.imgtec.com/mips/mips-multithreading.asp>.
 
-config MIPS_MT_SMTC
-	bool "Use all TCs on all VPEs for SMP (DEPRECATED)"
-	depends on CPU_MIPS32_R2
-	depends on SYS_SUPPORTS_MULTITHREADING
-	depends on !MIPS_CPS
-	select CPU_MIPSR2_IRQ_VI
-	select CPU_MIPSR2_IRQ_EI
-	select MIPS_MT
-	select SMP
-	select SMP_UP
-	select SYS_SUPPORTS_SMP
-	select NR_CPUS_DEFAULT_8
-	help
-	  This is a kernel model which is known as SMTC. This is
-	  supported on cores with the MT ASE and presents all TCs
-	  available on all VPEs to support SMP. For further
-	  information see <http://www.linux-mips.org/wiki/34K#SMTC>.
-
-endchoice
-
 config MIPS_MT
 	bool
 
@@ -1967,7 +1974,7 @@
 config MIPS_MT_FPAFF
 	bool "Dynamic FPU affinity for FP-intensive threads"
 	default y
-	depends on MIPS_MT_SMP || MIPS_MT_SMTC
+	depends on MIPS_MT_SMP
 
 config MIPS_VPE_LOADER
 	bool "VPE loader support."
@@ -1989,29 +1996,6 @@
 	default "y"
 	depends on MIPS_VPE_LOADER && !MIPS_CMP
 
-config MIPS_MT_SMTC_IM_BACKSTOP
-	bool "Use per-TC register bits as backstop for inhibited IM bits"
-	depends on MIPS_MT_SMTC
-	default n
-	help
-	  To support multiple TC microthreads acting as "CPUs" within
-	  a VPE, VPE-wide interrupt mask bits must be specially manipulated
-	  during interrupt handling. To support legacy drivers and interrupt
-	  controller management code, SMTC has a "backstop" to track and
-	  if necessary restore the interrupt mask. This has some performance
-	  impact on interrupt service overhead.
-
-config MIPS_MT_SMTC_IRQAFF
-	bool "Support IRQ affinity API"
-	depends on MIPS_MT_SMTC
-	default n
-	help
-	  Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
-	  for SMTC Linux kernel. Requires platform support, of which
-	  an example can be found in the MIPS kernel i8259 and Malta
-	  platform code.  Adds some overhead to interrupt dispatch, and
-	  should be used only if you know what you are doing.
-
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
 	depends on MIPS_VPE_LOADER
@@ -2039,7 +2023,7 @@
 
 config MIPS_CMP
 	bool "MIPS CMP framework support (DEPRECATED)"
-	depends on SYS_SUPPORTS_MIPS_CMP && !MIPS_MT_SMTC
+	depends on SYS_SUPPORTS_MIPS_CMP
 	select MIPS_GIC_IPI
 	select SYNC_R4K
 	select WEAK_ORDERING
@@ -2057,9 +2041,11 @@
 	depends on SYS_SUPPORTS_MIPS_CPS
 	select MIPS_CM
 	select MIPS_CPC
+	select MIPS_CPS_PM if HOTPLUG_CPU
 	select MIPS_GIC_IPI
 	select SMP
 	select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
+	select SYS_SUPPORTS_HOTPLUG_CPU
 	select SYS_SUPPORTS_SMP
 	select WEAK_ORDERING
 	help
@@ -2069,6 +2055,9 @@
 	  no external assistance. It is safe to enable this when hardware
 	  support is unavailable.
 
+config MIPS_CPS_PM
+	bool
+
 config MIPS_GIC_IPI
 	bool
 
@@ -2199,6 +2188,13 @@
 config SYS_SUPPORTS_MICROMIPS
 	bool
 
+config SYS_SUPPORTS_MIPS16
+	bool
+	help
+	  This option must be set if a kernel might be executed on a MIPS16-
+	  enabled CPU even if MIPS16 is not actually being used.  In other
+	  words, it makes the kernel MIPS16-tolerant.
+
 config CPU_SUPPORTS_MSA
 	bool
 
@@ -2239,7 +2235,7 @@
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS && !MIPS_MT_SMTC && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP)
+	depends on PERF_EVENTS && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP)
 	default y
 	help
 	  Enable hardware performance counter support for perf events. If
@@ -2297,8 +2293,8 @@
 	bool
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-64)"
-	range 2 64
+	int "Maximum number of CPUs (2-256)"
+	range 2 256
 	depends on SMP
 	default "4" if NR_CPUS_DEFAULT_4
 	default "8" if NR_CPUS_DEFAULT_8
@@ -2671,12 +2667,16 @@
 config MIPS_EXTERNAL_TIMER
 	bool
 
-if CPU_SUPPORTS_CPUFREQ && MIPS_EXTERNAL_TIMER
 menu "CPU Power Management"
+
+if CPU_SUPPORTS_CPUFREQ && MIPS_EXTERNAL_TIMER
 source "drivers/cpufreq/Kconfig"
-endmenu
 endif
 
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"

diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 25de292..3a2b775 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug

@@ -79,15 +79,6 @@
 
 	  Normally, you will choose 'N' here.
 
-config SMTC_IDLE_HOOK_DEBUG
-	bool "Enable additional debug checks before going into CPU idle loop"
-	depends on DEBUG_KERNEL && MIPS_MT_SMTC
-	help
-	  This option enables Enable additional debug checks before going into
-	  CPU idle loop.  For details on these checks, see
-	  arch/mips/kernel/smtc.c.  This debugging option result in significant
-	  overhead so should be disabled in production kernels.
-
 config SB1XXX_CORELIS
 	bool "Corelis Debugger"
 	depends on SIBYTE_SB1xxx_SOC

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 60a359c..a8521de 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile

@@ -120,7 +120,7 @@
 				   -fno-omit-frame-pointer
 
 ifeq ($(CONFIG_CPU_HAS_MSA),y)
-toolchain-msa			:= $(call cc-option-yn,-mhard-float -mfp64 -mmsa)
+toolchain-msa	:= $(call cc-option-yn,-mhard-float -mfp64 -Wa$(comma)-mmsa)
 cflags-$(toolchain-msa)		+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
 
@@ -251,6 +251,7 @@
 head-y := arch/mips/kernel/head.o
 
 libs-y			+= arch/mips/lib/
+libs-y			+= arch/mips/math-emu/
 
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/

diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c
index bd55136..3fb814b 100644
--- a/arch/mips/alchemy/board-xxs1500.c
+++ b/arch/mips/alchemy/board-xxs1500.c

@@ -49,7 +49,7 @@
 	prom_init_cmdline();
 
 	memsize_str = prom_getenv("memsize");
-	if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize))
+	if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
 		memsize = 0x04000000;
 
 	add_memory_region(0, memsize, BOOT_MEM_RAM);

diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 566a174..8267e3c 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c

@@ -67,6 +67,12 @@
 	case ALCHEMY_CPU_AU1500:
 	case ALCHEMY_CPU_AU1100:
 		coherentio = 0;
+		break;
+	case ALCHEMY_CPU_AU1200:
+		/* Au1200 AB USB does not support coherent memory */
+		if (0 == (read_c0_prid() & PRID_REV_MASK))
+			coherentio = 0;
+		break;
 	}
 
 	board_setup();	/* board specific setup */

diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c
index 2adc7ed..d193dbe 100644
--- a/arch/mips/alchemy/common/usb.c
+++ b/arch/mips/alchemy/common/usb.c

@@ -355,47 +355,25 @@
 	}
 }
 
-static inline int au1200_coherency_bug(void)
-{
-#if defined(CONFIG_DMA_COHERENT)
-	/* Au1200 AB USB does not support coherent memory */
-	if (!(read_c0_prid() & PRID_REV_MASK)) {
-		printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n");
-		printk(KERN_INFO "Au1200 USB: update your board or re-configure"
-				 " the kernel\n");
-		return -ENODEV;
-	}
-#endif
-	return 0;
-}
-
 static inline int au1200_usb_control(int block, int enable)
 {
 	void __iomem *base =
 			(void __iomem *)KSEG1ADDR(AU1200_USB_CTL_PHYS_ADDR);
-	int ret = 0;
 
 	switch (block) {
 	case ALCHEMY_USB_OHCI0:
-		ret = au1200_coherency_bug();
-		if (ret && enable)
-			goto out;
 		__au1200_ohci_control(base, enable);
 		break;
 	case ALCHEMY_USB_UDC0:
 		__au1200_udc_control(base, enable);
 		break;
 	case ALCHEMY_USB_EHCI0:
-		ret = au1200_coherency_bug();
-		if (ret && enable)
-			goto out;
 		__au1200_ehci_control(base, enable);
 		break;
 	default:
-		ret = -ENODEV;
+		return -ENODEV;
 	}
-out:
-	return ret;
+	return 0;
 }
 
 

diff --git a/arch/mips/alchemy/devboards/pm.c b/arch/mips/alchemy/devboards/pm.c
index b86bff3..61e90fe 100644
--- a/arch/mips/alchemy/devboards/pm.c
+++ b/arch/mips/alchemy/devboards/pm.c

@@ -158,7 +158,7 @@
 	int tmp;
 
 	if (ATTRCMP(timer_timeout)) {
-		tmp = strict_strtoul(instr, 0, &l);
+		tmp = kstrtoul(instr, 0, &l);
 		if (tmp)
 			return tmp;
 
@@ -181,7 +181,7 @@
 		}
 
 	} else if (ATTRCMP(wakemsk)) {
-		tmp = strict_strtoul(instr, 0, &l);
+		tmp = kstrtoul(instr, 0, &l);
 		if (tmp)
 			return tmp;
 

diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index 0af808d..1a03a2f 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c

@@ -69,15 +69,18 @@
 	 * BCM47XX uses 128MB for addressing the ram, if the system contains
 	 * less that that amount of ram it remaps the ram more often into the
 	 * available space.
-	 * Accessing memory after 128MB will cause an exception.
-	 * max contains the biggest possible address supported by the platform.
-	 * If the method wants to try something above we assume 128MB ram.
 	 */
-	off = (unsigned long)prom_init;
-	max = off | ((128 << 20) - 1);
-	for (mem = (1 << 20); mem < (128 << 20); mem += (1 << 20)) {
-		if ((off + mem) > max) {
-			mem = (128 << 20);
+
+	/* Physical address, without mapping to any kernel segment */
+	off = CPHYSADDR((unsigned long)prom_init);
+
+	/* Accessing memory after 128 MiB will cause an exception */
+	max = 128 << 20;
+
+	for (mem = 1 << 20; mem < max; mem += 1 << 20) {
+		/* Loop condition may be not enough, off may be over 1 MiB */
+		if (off + mem >= max) {
+			mem = max;
 			printk(KERN_DEBUG "assume 128MB RAM\n");
 			break;
 		}

diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index a8b5408..da4cdb1 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c

@@ -168,6 +168,7 @@
 static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom,
 					const char *prefix, bool fallback)
 {
+	nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback);

diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index 227705d..6028666 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig

@@ -10,6 +10,17 @@
 	  non-CN63XXP1 hardware, so it is recommended to select "n"
 	  unless it is known the workarounds are needed.
 
+config CAVIUM_OCTEON_CVMSEG_SIZE
+	int "Number of L1 cache lines reserved for CVMSEG memory"
+	range 0 54
+	default 1
+	help
+	  CVMSEG LM is a segment that accesses portions of the dcache as a
+	  local memory; the larger CVMSEG is, the smaller the cache is.
+	  This selects the size of CVMSEG LM, which is in cache blocks. The
+	  legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
+	  between zero and 6192 bytes).
+
 endif # CPU_CAVIUM_OCTEON
 
 if CAVIUM_OCTEON_SOC
@@ -23,17 +34,6 @@
 	  with this option to be run at the same time as one built without this
 	  option.
 
-config CAVIUM_OCTEON_CVMSEG_SIZE
-	int "Number of L1 cache lines reserved for CVMSEG memory"
-	range 0 54
-	default 1
-	help
-	  CVMSEG LM is a segment that accesses portions of the dcache as a
-	  local memory; the larger CVMSEG is, the smaller the cache is.
-	  This selects the size of CVMSEG LM, which is in cache blocks. The
-	  legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
-	  between zero and 6192 bytes).
-
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
 	default "y"
@@ -86,7 +86,6 @@
 	select IOMMU_HELPER
 	select NEED_SG_DMA_LENGTH
 
-
 config OCTEON_ILM
 	tristate "Module to measure interrupt latency using Octeon CIU Timer"
 	help

diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
index 8553ad5..7e5cf7a 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c

@@ -106,6 +106,158 @@
 EXPORT_SYMBOL_GPL(cvmx_helper_ports_on_interface);
 
 /**
+ * @INTERNAL
+ * Return interface mode for CN68xx.
+ */
+static cvmx_helper_interface_mode_t __cvmx_get_mode_cn68xx(int interface)
+{
+	union cvmx_mio_qlmx_cfg qlm_cfg;
+	switch (interface) {
+	case 0:
+		qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(0));
+		/* QLM is disabled when QLM SPD is 15. */
+		if (qlm_cfg.s.qlm_spd == 15)
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+		if (qlm_cfg.s.qlm_cfg == 2)
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		else if (qlm_cfg.s.qlm_cfg == 3)
+			return CVMX_HELPER_INTERFACE_MODE_XAUI;
+		else
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	case 2:
+	case 3:
+	case 4:
+		qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(interface));
+		/* QLM is disabled when QLM SPD is 15. */
+		if (qlm_cfg.s.qlm_spd == 15)
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+		if (qlm_cfg.s.qlm_cfg == 2)
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		else if (qlm_cfg.s.qlm_cfg == 3)
+			return CVMX_HELPER_INTERFACE_MODE_XAUI;
+		else
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	case 7:
+		qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(3));
+		/* QLM is disabled when QLM SPD is 15. */
+		if (qlm_cfg.s.qlm_spd == 15) {
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		} else if (qlm_cfg.s.qlm_cfg != 0) {
+			qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(1));
+			if (qlm_cfg.s.qlm_cfg != 0)
+				return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		}
+		return CVMX_HELPER_INTERFACE_MODE_NPI;
+	case 8:
+		return CVMX_HELPER_INTERFACE_MODE_LOOP;
+	default:
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	}
+}
+
+/**
+ * @INTERNAL
+ * Return interface mode for an Octeon II
+ */
+static cvmx_helper_interface_mode_t __cvmx_get_mode_octeon2(int interface)
+{
+	union cvmx_gmxx_inf_mode mode;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
+		return __cvmx_get_mode_cn68xx(interface);
+
+	if (interface == 2)
+		return CVMX_HELPER_INTERFACE_MODE_NPI;
+
+	if (interface == 3)
+		return CVMX_HELPER_INTERFACE_MODE_LOOP;
+
+	/* Only present in CN63XX & CN66XX Octeon model */
+	if ((OCTEON_IS_MODEL(OCTEON_CN63XX) &&
+	     (interface == 4 || interface == 5)) ||
+	    (OCTEON_IS_MODEL(OCTEON_CN66XX) &&
+	     interface >= 4 && interface <= 7)) {
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	}
+
+	if (OCTEON_IS_MODEL(OCTEON_CN66XX)) {
+		union cvmx_mio_qlmx_cfg mio_qlm_cfg;
+
+		/* QLM2 is SGMII0 and QLM1 is SGMII1 */
+		if (interface == 0)
+			mio_qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(2));
+		else if (interface == 1)
+			mio_qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(1));
+		else
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+		if (mio_qlm_cfg.s.qlm_spd == 15)
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+		if (mio_qlm_cfg.s.qlm_cfg == 9)
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		else if (mio_qlm_cfg.s.qlm_cfg == 11)
+			return CVMX_HELPER_INTERFACE_MODE_XAUI;
+		else
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	} else if (OCTEON_IS_MODEL(OCTEON_CN61XX)) {
+		union cvmx_mio_qlmx_cfg qlm_cfg;
+
+		if (interface == 0) {
+			qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(2));
+			if (qlm_cfg.s.qlm_cfg == 2)
+				return CVMX_HELPER_INTERFACE_MODE_SGMII;
+			else if (qlm_cfg.s.qlm_cfg == 3)
+				return CVMX_HELPER_INTERFACE_MODE_XAUI;
+			else
+				return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		} else if (interface == 1) {
+			qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(0));
+			if (qlm_cfg.s.qlm_cfg == 2)
+				return CVMX_HELPER_INTERFACE_MODE_SGMII;
+			else if (qlm_cfg.s.qlm_cfg == 3)
+				return CVMX_HELPER_INTERFACE_MODE_XAUI;
+			else
+				return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		}
+	} else if (OCTEON_IS_MODEL(OCTEON_CNF71XX)) {
+		if (interface == 0) {
+			union cvmx_mio_qlmx_cfg qlm_cfg;
+			qlm_cfg.u64 = cvmx_read_csr(CVMX_MIO_QLMX_CFG(0));
+			if (qlm_cfg.s.qlm_cfg == 2)
+				return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		}
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	}
+
+	if (interface == 1 && OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+	mode.u64 = cvmx_read_csr(CVMX_GMXX_INF_MODE(interface));
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		switch (mode.cn63xx.mode) {
+		case 0:
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		case 1:
+			return CVMX_HELPER_INTERFACE_MODE_XAUI;
+		default:
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		}
+	} else {
+		if (!mode.s.en)
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+		if (mode.s.type)
+			return CVMX_HELPER_INTERFACE_MODE_GMII;
+		else
+			return CVMX_HELPER_INTERFACE_MODE_RGMII;
+	}
+}
+
+/**
  * Get the operating mode of an interface. Depending on the Octeon
  * chip and configuration, this function returns an enumeration
  * of the type of packet I/O supported by an interface.
@@ -118,6 +270,20 @@
 cvmx_helper_interface_mode_t cvmx_helper_interface_get_mode(int interface)
 {
 	union cvmx_gmxx_inf_mode mode;
+
+	if (interface < 0 ||
+	    interface >= cvmx_helper_get_number_of_interfaces())
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+
+	/*
+	 * Octeon II models
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX))
+		return __cvmx_get_mode_octeon2(interface);
+
+	/*
+	 * Octeon and Octeon Plus models
+	 */
 	if (interface == 2)
 		return CVMX_HELPER_INTERFACE_MODE_NPI;
 

diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 3aa5b46..1b82ac6 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c

@@ -1260,11 +1260,13 @@
 	for (i = 0; i < 4; i++)
 		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 0, i + 40);
 
+	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI, 0, 45);
 	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_RML, 0, 46);
 	for (i = 0; i < 4; i++)
 		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_TIMER0, 0, i + 52);
 
 	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56);
+	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59);
 
 	/* CIU_1 */
 	for (i = 0; i < 16; i++)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index f1bec00..008e9c8 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c

@@ -729,17 +729,6 @@
 	octeon_write_lcd("Linux");
 #endif
 
-#ifdef CONFIG_CAVIUM_GDB
-	/*
-	 * When debugging the linux kernel, force the cores to enter
-	 * the debug exception handler to break in.
-	 */
-	if (octeon_get_boot_debug_flag()) {
-		cvmx_write_csr(CVMX_CIU_DINT, 1 << cvmx_get_core_num());
-		cvmx_read_csr(CVMX_CIU_DINT);
-	}
-#endif
-
 	octeon_setup_delays();
 
 	/*
@@ -779,12 +768,6 @@
 				MAX_MEMORY = 32ull << 30;
 			if (*p == '@')
 				RESERVE_LOW_MEM = memparse(p + 1, &p);
-		} else if (strcmp(arg, "ecc_verbose") == 0) {
-#ifdef CONFIG_CAVIUM_REPORT_SINGLE_BIT_ECC
-			__cvmx_interrupt_ecc_report_single_bit_errors = 1;
-			pr_notice("Reporting of single bit ECC errors is "
-				  "turned on\n");
-#endif
 #ifdef CONFIG_KEXEC
 		} else if (strncmp(arg, "crashkernel=", 12) == 0) {
 			crashk_size = memparse(arg+12, &p);

diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 67a078f..a7b3ae1 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c

@@ -218,15 +218,6 @@
  */
 static void octeon_smp_finish(void)
 {
-#ifdef CONFIG_CAVIUM_GDB
-	unsigned long tmp;
-	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
-	   to be not masked by this core so we know the signal is received by
-	   someone */
-	asm volatile ("dmfc0 %0, $22\n"
-		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
-#endif
-
 	octeon_user_io_init();
 
 	/* to generate the first CPU timer interrupt */
@@ -234,21 +225,6 @@
 	local_irq_enable();
 }
 
-/**
- * Hook for after all CPUs are online
- */
-static void octeon_cpus_done(void)
-{
-#ifdef CONFIG_CAVIUM_GDB
-	unsigned long tmp;
-	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
-	   to be not masked by this core so we know the signal is received by
-	   someone */
-	asm volatile ("dmfc0 %0, $22\n"
-		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
-#endif
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* State of each CPU. */
@@ -405,7 +381,6 @@
 	.send_ipi_mask		= octeon_send_ipi_mask,
 	.init_secondary		= octeon_init_secondary,
 	.smp_finish		= octeon_smp_finish,
-	.cpus_done		= octeon_cpus_done,
 	.boot_secondary		= octeon_boot_secondary,
 	.smp_setup		= octeon_smp_setup,
 	.prepare_cpus		= octeon_prepare_cpus,

diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig
index e3a3836..134879c 100644
--- a/arch/mips/configs/ath79_defconfig
+++ b/arch/mips/configs/ath79_defconfig

@@ -46,7 +46,6 @@
 CONFIG_MTD_REDBOOT_PARTS=y
 CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_JEDECPROBE=y
@@ -54,7 +53,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_M25P80=y
-# CONFIG_M25PXX_USE_FAST_READ is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_PACKET_ENGINE is not set
 CONFIG_ATH_COMMON=m

diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index c99b6ee..a64b30b 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig

@@ -113,6 +113,7 @@
 CONFIG_MTD_NAND_ECC_BCH=y
 CONFIG_MTD_NAND_AU1550=y
 CONFIG_MTD_NAND_PLATFORM=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_EEPROM_AT24=y
 CONFIG_EEPROM_AT25=y
 CONFIG_SCSI_TGT=y

diff --git a/arch/mips/configs/maltasmtc_defconfig b/arch/mips/configs/maltasmtc_defconfig
deleted file mode 100644
index eb31644..0000000
--- a/arch/mips/configs/maltasmtc_defconfig
+++ /dev/null

@@ -1,196 +0,0 @@
-CONFIG_MIPS_MALTA=y
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_CPU_MIPS32_R2=y
-CONFIG_PAGE_SIZE_16KB=y
-CONFIG_MIPS_MT_SMTC=y
-# CONFIG_MIPS_MT_FPAFF is not set
-CONFIG_NR_CPUS=9
-CONFIG_HZ_48=y
-CONFIG_LOCALVERSION="smtc"
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PCI=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_NET_IPIP=m
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
-# CONFIG_WIRELESS is not set
-CONFIG_DEVTMPFS=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_IDE=y
-# CONFIG_IDE_PROC_FS is not set
-# CONFIG_IDEPCI_PCIBUS_ORDER is not set
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_PIIX=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_NETDEVICES=y
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-CONFIG_PCNET32=y
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_NET_VENDOR_TOSHIBA is not set
-# CONFIG_NET_VENDOR_VIA is not set
-# CONFIG_WLAN is not set
-# CONFIG_VT is not set
-CONFIG_LEGACY_PTY_COUNT=16
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HW_RANDOM=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_G=y
-CONFIG_USB=y
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
-CONFIG_USB_UHCI_HCD=y
-CONFIG_USB_STORAGE=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_IDE_DISK=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_LEDS_TRIGGER_BACKLIGHT=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_XFS_FS=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_QUOTA=y
-CONFIG_QFMT_V2=y
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=m
-CONFIG_CIFS_WEAK_PW_HASH=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_ISO8859_1=m
-# CONFIG_FTRACE is not set
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set

diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 10ef3be..f8a3231 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig

@@ -4,10 +4,9 @@
 CONFIG_PAGE_SIZE_16KB=y
 CONFIG_MIPS_MT_SMP=y
 CONFIG_SCHED_SMT=y
-CONFIG_MIPS_CMP=y
+CONFIG_MIPS_CPS=y
 CONFIG_NR_CPUS=8
 CONFIG_HZ_100=y
-CONFIG_LOCALVERSION="cmp"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y

diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 2d3002c..c83338a 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig

@@ -5,10 +5,9 @@
 CONFIG_PAGE_SIZE_16KB=y
 CONFIG_MIPS_MT_SMP=y
 CONFIG_SCHED_SMT=y
-CONFIG_MIPS_CMP=y
+CONFIG_MIPS_CPS=y
 CONFIG_NR_CPUS=8
 CONFIG_HZ_100=y
-CONFIG_LOCALVERSION="cmp"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y

diff --git a/arch/mips/configs/mips_paravirt_defconfig b/arch/mips/configs/mips_paravirt_defconfig
new file mode 100644
index 0000000..84cfcb4
--- /dev/null
+++ b/arch/mips/configs/mips_paravirt_defconfig

@@ -0,0 +1,103 @@
+CONFIG_MIPS_PARAVIRT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_SMP=y
+CONFIG_HZ_1000=y
+CONFIG_PREEMPT=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PCI=y
+CONFIG_MIPS32_COMPAT=y
+CONFIG_MIPS32_O32=y
+CONFIG_MIPS32_N32=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_PHYLIB=y
+CONFIG_MARVELL_PHY=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_BCM87XX_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_MMIO=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set

diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index d1741bc..d14ae2f 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig

@@ -81,7 +81,6 @@
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
@@ -89,6 +88,7 @@
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_EEPROM_93CX6=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y

diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index 56e6e2c..41bbffd 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c

@@ -23,6 +23,7 @@
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
 #include <asm/irq.h>
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
@@ -748,6 +749,10 @@
 		cpu_fpu_mask = 0;
 		dec_interrupt[DEC_IRQ_FPU] = -1;
 	}
+	/* Free the halt interrupt unused on R4k systems.  */
+	if (current_cpu_type() == CPU_R4000SC ||
+	    current_cpu_type() == CPU_R4400SC)
+		dec_interrupt[DEC_IRQ_HALT] = -1;
 
 	/* Register board interrupts: FPU and cascade. */
 	if (dec_interrupt[DEC_IRQ_FPU] >= 0)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index b464b8b..935543f 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h

@@ -17,26 +17,8 @@
 #ifdef CONFIG_64BIT
 #include <asm/asmmacro-64.h>
 #endif
-#ifdef CONFIG_MIPS_MT_SMTC
-#include <asm/mipsmtregs.h>
-#endif
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	.macro	local_irq_enable reg=t0
-	mfc0	\reg, CP0_TCSTATUS
-	ori	\reg, \reg, TCSTATUS_IXMT
-	xori	\reg, \reg, TCSTATUS_IXMT
-	mtc0	\reg, CP0_TCSTATUS
-	_ehb
-	.endm
-
-	.macro	local_irq_disable reg=t0
-	mfc0	\reg, CP0_TCSTATUS
-	ori	\reg, \reg, TCSTATUS_IXMT
-	mtc0	\reg, CP0_TCSTATUS
-	_ehb
-	.endm
-#elif defined(CONFIG_CPU_MIPSR2)
+#ifdef CONFIG_CPU_MIPSR2
 	.macro	local_irq_enable reg=t0
 	ei
 	irq_enable_hazard
@@ -71,7 +53,7 @@
 	sw      \reg, TI_PRE_COUNT($28)
 #endif
 	.endm
-#endif /* CONFIG_MIPS_MT_SMTC */
+#endif /* CONFIG_CPU_MIPSR2 */
 
 	.macro	fpu_save_16even thread tmp=t0
 	cfc1	\tmp, fcr31
@@ -267,13 +249,35 @@
 	.set	pop
 	.endm
 #else
+
+#ifdef CONFIG_CPU_MICROMIPS
+#define CFC_MSA_INSN		0x587e0056
+#define CTC_MSA_INSN		0x583e0816
+#define LDD_MSA_INSN		0x58000837
+#define STD_MSA_INSN		0x5800083f
+#define COPY_UW_MSA_INSN	0x58f00056
+#define COPY_UD_MSA_INSN	0x58f80056
+#define INSERT_W_MSA_INSN	0x59300816
+#define INSERT_D_MSA_INSN	0x59380816
+#else
+#define CFC_MSA_INSN		0x787e0059
+#define CTC_MSA_INSN		0x783e0819
+#define LDD_MSA_INSN		0x78000823
+#define STD_MSA_INSN		0x78000827
+#define COPY_UW_MSA_INSN	0x78f00059
+#define COPY_UD_MSA_INSN	0x78f80059
+#define INSERT_W_MSA_INSN	0x79300819
+#define INSERT_D_MSA_INSN	0x79380819
+#endif
+
 	/*
 	 * Temporary until all toolchains in use include MSA support.
 	 */
 	.macro	cfcmsa	rd, cs
 	.set	push
 	.set	noat
-	.word	0x787e0059 | (\cs << 11)
+	.insn
+	.word	CFC_MSA_INSN | (\cs << 11)
 	move	\rd, $1
 	.set	pop
 	.endm
@@ -282,7 +286,7 @@
 	.set	push
 	.set	noat
 	move	$1, \rs
-	.word	0x783e0819 | (\cd << 6)
+	.word	CTC_MSA_INSN | (\cd << 6)
 	.set	pop
 	.endm
 
@@ -290,7 +294,7 @@
 	.set	push
 	.set	noat
 	add	$1, \base, \off
-	.word	0x78000823 | (\wd << 6)
+	.word	LDD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
 
@@ -298,14 +302,15 @@
 	.set	push
 	.set	noat
 	add	$1, \base, \off
-	.word	0x78000827 | (\wd << 6)
+	.word	STD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
 
 	.macro	copy_u_w	rd, ws, n
 	.set	push
 	.set	noat
-	.word	0x78f00059 | (\n << 16) | (\ws << 11)
+	.insn
+	.word	COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
 	/* move triggers an assembler bug... */
 	or	\rd, $1, zero
 	.set	pop
@@ -314,7 +319,8 @@
 	.macro	copy_u_d	rd, ws, n
 	.set	push
 	.set	noat
-	.word	0x78f80059 | (\n << 16) | (\ws << 11)
+	.insn
+	.word	COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
 	/* move triggers an assembler bug... */
 	or	\rd, $1, zero
 	.set	pop
@@ -325,7 +331,7 @@
 	.set	noat
 	/* move triggers an assembler bug... */
 	or	$1, \rs, zero
-	.word	0x79300819 | (\n << 16) | (\wd << 6)
+	.word	INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
 
@@ -334,7 +340,7 @@
 	.set	noat
 	/* move triggers an assembler bug... */
 	or	$1, \rs, zero
-	.word	0x79380819 | (\n << 16) | (\wd << 6)
+	.word	INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
 #endif

diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h
index e28a3e0..de781cf 100644
--- a/arch/mips/include/asm/branch.h
+++ b/arch/mips/include/asm/branch.h

@@ -8,6 +8,8 @@
 #ifndef _ASM_BRANCH_H
 #define _ASM_BRANCH_H
 
+#include <asm/cpu-features.h>
+#include <asm/mipsregs.h>
 #include <asm/ptrace.h>
 #include <asm/inst.h>
 
@@ -18,12 +20,40 @@
 extern int __microMIPS_compute_return_epc(struct pt_regs *regs);
 extern int __MIPS16e_compute_return_epc(struct pt_regs *regs);
 
+/*
+ * microMIPS bitfields
+ */
+#define MM_POOL32A_MINOR_MASK	0x3f
+#define MM_POOL32A_MINOR_SHIFT	0x6
+#define MM_MIPS32_COND_FC	0x30
+
+extern int __mm_isBranchInstr(struct pt_regs *regs,
+	struct mm_decoded_insn dec_insn, unsigned long *contpc);
+
+static inline int mm_isBranchInstr(struct pt_regs *regs,
+	struct mm_decoded_insn dec_insn, unsigned long *contpc)
+{
+	if (!cpu_has_mmips)
+		return 0;
+
+	return __mm_isBranchInstr(regs, dec_insn, contpc);
+}
 
 static inline int delay_slot(struct pt_regs *regs)
 {
 	return regs->cp0_cause & CAUSEF_BD;
 }
 
+static inline void clear_delay_slot(struct pt_regs *regs)
+{
+	regs->cp0_cause &= ~CAUSEF_BD;
+}
+
+static inline void set_delay_slot(struct pt_regs *regs)
+{
+	regs->cp0_cause |= CAUSEF_BD;
+}
+
 static inline unsigned long exception_epc(struct pt_regs *regs)
 {
 	if (likely(!delay_slot(regs)))

diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index 69468de..e08381a 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h

@@ -113,6 +113,12 @@
 
 extern void *kmap_coherent(struct page *page, unsigned long addr);
 extern void kunmap_coherent(void);
+extern void *kmap_noncoherent(struct page *page, unsigned long addr);
+
+static inline void kunmap_noncoherent(void)
+{
+	kunmap_coherent();
+}
 
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 static inline void flush_kernel_dcache_page(struct page *page)

diff --git a/arch/mips/include/asm/cmp.h b/arch/mips/include/asm/cmp.h
index 89a73fb..033d973 100644
--- a/arch/mips/include/asm/cmp.h
+++ b/arch/mips/include/asm/cmp.h

@@ -10,7 +10,6 @@
 extern void cmp_smp_finish(void);
 extern void cmp_boot_secondary(int cpu, struct task_struct *t);
 extern void cmp_init_secondary(void);
-extern void cmp_cpus_done(void);
 extern void cmp_prepare_cpus(unsigned int max_cpus);
 
 /* This is platform specific */

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index f56cc97..c7d8c99 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h

@@ -110,9 +110,15 @@
 #ifndef cpu_has_smartmips
 #define cpu_has_smartmips	(cpu_data[0].ases & MIPS_ASE_SMARTMIPS)
 #endif
+
 #ifndef cpu_has_rixi
-#define cpu_has_rixi		(cpu_data[0].options & MIPS_CPU_RIXI)
+# ifdef CONFIG_64BIT
+# define cpu_has_rixi		(cpu_data[0].options & MIPS_CPU_RIXI)
+# else /* CONFIG_32BIT */
+# define cpu_has_rixi		((cpu_data[0].options & MIPS_CPU_RIXI) && !cpu_has_64bits)
+# endif
 #endif
+
 #ifndef cpu_has_mmips
 # ifdef CONFIG_SYS_SUPPORTS_MICROMIPS
 #  define cpu_has_mmips		(cpu_data[0].options & MIPS_CPU_MICROMIPS)
@@ -120,6 +126,7 @@
 #  define cpu_has_mmips		0
 # endif
 #endif
+
 #ifndef cpu_has_vtag_icache
 #define cpu_has_vtag_icache	(cpu_data[0].icache.flags & MIPS_CACHE_VTAG)
 #endif
@@ -183,6 +190,17 @@
 /*
  * Shortcuts ...
  */
+#define cpu_has_mips_2_3_4_5	(cpu_has_mips_2 | cpu_has_mips_3_4_5)
+#define cpu_has_mips_3_4_5	(cpu_has_mips_3 | cpu_has_mips_4_5)
+#define cpu_has_mips_4_5	(cpu_has_mips_4 | cpu_has_mips_5)
+
+#define cpu_has_mips_2_3_4_5_r	(cpu_has_mips_2 | cpu_has_mips_3_4_5_r)
+#define cpu_has_mips_3_4_5_r	(cpu_has_mips_3 | cpu_has_mips_4_5_r)
+#define cpu_has_mips_4_5_r	(cpu_has_mips_4 | cpu_has_mips_5_r)
+#define cpu_has_mips_5_r	(cpu_has_mips_5 | cpu_has_mips_r)
+
+#define cpu_has_mips_4_5_r2	(cpu_has_mips_4_5 | cpu_has_mips_r2)
+
 #define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2)
 #define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2)
 #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1)

diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index ff2707a..47d5967 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h

@@ -65,18 +65,13 @@
 #ifdef CONFIG_64BIT
 	int			vmbits; /* Virtual memory size in bits */
 #endif
-#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_MIPS_MT_SMP
 	/*
-	 * In the MIPS MT "SMTC" model, each TC is considered
-	 * to be a "CPU" for the purposes of scheduling, but
-	 * exception resources, ASID spaces, etc, are common
-	 * to all TCs within the same VPE.
+	 * There is not necessarily a 1:1 mapping of VPE num to CPU number
+	 * in particular on multi-core systems.
 	 */
 	int			vpe_id;	 /* Virtual Processor number */
 #endif
-#ifdef CONFIG_MIPS_MT_SMTC
-	int			tc_id;	 /* Thread Context number */
-#endif
 	void			*data;	/* Additional data */
 	unsigned int		watch_reg_count;   /* Number that exist */
 	unsigned int		watch_reg_use_cnt; /* Usable by ptrace */
@@ -117,7 +112,7 @@
 	unsigned long n;
 };
 
-#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_MIPS_MT_SMP
 # define cpu_vpe_id(cpuinfo)	((cpuinfo)->vpe_id)
 #else
 # define cpu_vpe_id(cpuinfo)	0

diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 72190613..b4e2bd8 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h

@@ -155,9 +155,6 @@
 	case CPU_RM7000:
 	case CPU_SR71000:
 #endif
-#ifdef CONFIG_SYS_HAS_CPU_RM9000
-	case CPU_RM9000:
-#endif
 #ifdef CONFIG_SYS_HAS_CPU_SB1
 	case CPU_SB1:
 	case CPU_SB1A:
@@ -166,6 +163,7 @@
 	case CPU_CAVIUM_OCTEON:
 	case CPU_CAVIUM_OCTEON_PLUS:
 	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
 #endif
 
 #if defined(CONFIG_SYS_HAS_CPU_BMIPS32_3300) || \

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 530eb8b..129d087 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h

@@ -201,6 +201,7 @@
 #define PRID_IMP_NETLOGIC_XLP3XX	0x1100
 #define PRID_IMP_NETLOGIC_XLP2XX	0x1200
 #define PRID_IMP_NETLOGIC_XLP9XX	0x1500
+#define PRID_IMP_NETLOGIC_XLP5XX	0x1300
 
 /*
  * Particular Revision values for bits 7:0 of the PRId register.
@@ -281,7 +282,7 @@
 	CPU_R4700, CPU_R5000, CPU_R5500, CPU_NEVADA, CPU_R5432, CPU_R10000,
 	CPU_R12000, CPU_R14000, CPU_VR41XX, CPU_VR4111, CPU_VR4121, CPU_VR4122,
 	CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000,
-	CPU_SR71000, CPU_RM9000, CPU_TX49XX,
+	CPU_SR71000, CPU_TX49XX,
 
 	/*
 	 * R8000 class processors

diff --git a/arch/mips/include/asm/dec/kn05.h b/arch/mips/include/asm/dec/kn05.h
index 56d22dc..8e14f67 100644
--- a/arch/mips/include/asm/dec/kn05.h
+++ b/arch/mips/include/asm/dec/kn05.h

@@ -49,12 +49,20 @@
 #define KN4K_RES_15	(15*IOASIC_SLOT_SIZE)	/* unused? */
 
 /*
+ * MB ASIC interrupt bits.
+ */
+#define KN4K_MB_INR_MB		4	/* ??? */
+#define KN4K_MB_INR_MT		3	/* memory, I/O bus read/write errors */
+#define KN4K_MB_INR_RES_2	2	/* unused */
+#define KN4K_MB_INR_RTC		1	/* RTC */
+#define KN4K_MB_INR_TC		0	/* I/O ASIC cascade */
+
+/*
  * Bits for the MB interrupt register.
  * The register appears read-only.
  */
-#define KN4K_MB_INT_TC		(1<<0)		/* TURBOchannel? */
-#define KN4K_MB_INT_RTC		(1<<1)		/* RTC? */
-#define KN4K_MB_INT_MT		(1<<3)		/* I/O ASIC cascade */
+#define KN4K_MB_INT_IRQ		(0x1f<<0)	/* CPU Int[4:0] status. */
+#define KN4K_MB_INT_IRQ_N(n)	(1<<(n))	/* Individual status bits. */
 
 /*
  * Bits for the MB control & status register.
@@ -70,6 +78,7 @@
 #define KN4K_MB_CSR_NC		(1<<14)		/* ??? */
 #define KN4K_MB_CSR_EE		(1<<15)		/* (bus) Exception Enable? */
 #define KN4K_MB_CSR_MSK		(0x1f<<16)	/* CPU Int[4:0] mask */
+#define KN4K_MB_CSR_MSK_N(n)	(1<<((n)+16))	/* Individual mask bits. */
 #define KN4K_MB_CSR_FW		(1<<21)		/* ??? */
 #define KN4K_MB_CSR_W		(1<<31)		/* ??? */
 

diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 8c012af..6842ffa 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h

@@ -48,11 +48,7 @@
 enum fixed_addresses {
 #define FIX_N_COLOURS 8
 	FIX_CMAP_BEGIN,
-#ifdef CONFIG_MIPS_MT_SMTC
-	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS * 2),
-#else
 	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * 2),
-#endif
 #ifdef CONFIG_HIGHMEM
 	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_BEGIN = FIX_CMAP_END + 1,

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 4d86b72..a939574 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h

@@ -17,6 +17,7 @@
 #include <asm/mipsregs.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
+#include <asm/fpu_emulator.h>
 #include <asm/hazards.h>
 #include <asm/processor.h>
 #include <asm/current.h>
@@ -28,7 +29,6 @@
 struct sigcontext;
 struct sigcontext32;
 
-extern void fpu_emulator_init_fpu(void);
 extern void _init_fpu(void);
 extern void _save_fp(struct task_struct *);
 extern void _restore_fp(struct task_struct *);
@@ -156,15 +156,16 @@
 	int ret = 0;
 
 	preempt_disable();
+
 	if (cpu_has_fpu) {
 		ret = __own_fpu();
 		if (!ret)
 			_init_fpu();
-	} else {
+	} else
 		fpu_emulator_init_fpu();
-	}
 
 	preempt_enable();
+
 	return ret;
 }
 

diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 2abb587..0195745 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h

@@ -23,9 +23,12 @@
 #ifndef _ASM_FPU_EMULATOR_H
 #define _ASM_FPU_EMULATOR_H
 
+#include <linux/sched.h>
 #include <asm/break.h>
+#include <asm/thread_info.h>
 #include <asm/inst.h>
 #include <asm/local.h>
+#include <asm/processor.h>
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -36,6 +39,11 @@
 	local_t cp1ops;
 	local_t cp1xops;
 	local_t errors;
+	local_t ieee754_inexact;
+	local_t ieee754_underflow;
+	local_t ieee754_overflow;
+	local_t ieee754_zerodiv;
+	local_t ieee754_invalidop;
 };
 
 DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
@@ -71,4 +79,17 @@
  */
 #define BREAK_MATH (0x0000000d | (BRK_MEMU << 16))
 
+#define SIGNALLING_NAN 0x7ff800007ff80000LL
+
+static inline void fpu_emulator_init_fpu(void)
+{
+	struct task_struct *t = current;
+	int i;
+
+	t->thread.fpu.fcr31 = 0;
+
+	for (i = 0; i < 32; i++)
+		set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
+}
+
 #endif /* _ASM_FPU_EMULATOR_H */

diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h
index 0827166..10f6a99 100644
--- a/arch/mips/include/asm/gic.h
+++ b/arch/mips/include/asm/gic.h

@@ -380,6 +380,7 @@
 extern cycle_t gic_read_count(void);
 extern cycle_t gic_read_compare(void);
 extern void gic_write_compare(cycle_t cnt);
+extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
 extern void gic_send_ipi(unsigned int intr);
 extern unsigned int plat_ipi_call_int_xlate(unsigned int);
 extern unsigned int plat_ipi_resched_int_xlate(unsigned int);

diff --git a/arch/mips/include/asm/gio_device.h b/arch/mips/include/asm/gio_device.h
index 0878701..4be1a57 100644
--- a/arch/mips/include/asm/gio_device.h
+++ b/arch/mips/include/asm/gio_device.h

@@ -50,7 +50,7 @@
 extern int gio_register_driver(struct gio_driver *);
 extern void gio_unregister_driver(struct gio_driver *);
 
-#define gio_get_drvdata(_dev)	     drv_get_drvdata(&(_dev)->dev)
-#define gio_set_drvdata(_dev, data)  drv_set_drvdata(&(_dev)->dev, (data))
+#define gio_get_drvdata(_dev)	     dev_get_drvdata(&(_dev)->dev)
+#define gio_set_drvdata(_dev, data)  dev_set_drvdata(&(_dev)->dev, (data))
 
 extern void gio_set_master(struct gio_device *);

diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h
index d192158..d9f932d 100644
--- a/arch/mips/include/asm/idle.h
+++ b/arch/mips/include/asm/idle.h

@@ -1,6 +1,7 @@
 #ifndef __ASM_IDLE_H
 #define __ASM_IDLE_H
 
+#include <linux/cpuidle.h>
 #include <linux/linkage.h>
 
 extern void (*cpu_wait)(void);
@@ -20,4 +21,17 @@
 	       addr < (unsigned long)__pastwait;
 }
 
+extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
+				   struct cpuidle_driver *drv, int index);
+
+#define MIPS_CPUIDLE_WAIT_STATE {\
+	.enter			= mips_cpuidle_wait_enter,\
+	.exit_latency		= 1,\
+	.target_residency	= 1,\
+	.power_usage		= UINT_MAX,\
+	.flags			= CPUIDLE_FLAG_TIME_VALID,\
+	.name			= "wait",\
+	.desc			= "MIPS wait",\
+}
+
 #endif /* __ASM_IDLE_H  */

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 7bc2cdb..ae1f7b2 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h

@@ -26,104 +26,8 @@
 #define irq_canonicalize(irq) (irq)	/* Sane hardware, sane code ... */
 #endif
 
-#ifdef CONFIG_MIPS_MT_SMTC
-
-struct irqaction;
-
-extern unsigned long irq_hwmask[];
-extern int setup_irq_smtc(unsigned int irq, struct irqaction * new,
-			  unsigned long hwmask);
-
-static inline void smtc_im_ack_irq(unsigned int irq)
-{
-	if (irq_hwmask[irq] & ST0_IM)
-		set_c0_status(irq_hwmask[irq] & ST0_IM);
-}
-
-#else
-
-static inline void smtc_im_ack_irq(unsigned int irq)
-{
-}
-
-#endif /* CONFIG_MIPS_MT_SMTC */
-
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-#include <linux/cpumask.h>
-
-extern int plat_set_irq_affinity(struct irq_data *d,
-				 const struct cpumask *affinity, bool force);
-extern void smtc_forward_irq(struct irq_data *d);
-
-/*
- * IRQ affinity hook invoked at the beginning of interrupt dispatch
- * if option is enabled.
- *
- * Up through Linux 2.6.22 (at least) cpumask operations are very
- * inefficient on MIPS.	 Initial prototypes of SMTC IRQ affinity
- * used a "fast path" per-IRQ-descriptor cache of affinity information
- * to reduce latency.  As there is a project afoot to optimize the
- * cpumask implementations, this version is optimistically assuming
- * that cpumask.h macro overhead is reasonable during interrupt dispatch.
- */
-static inline int handle_on_other_cpu(unsigned int irq)
-{
-	struct irq_data *d = irq_get_irq_data(irq);
-
-	if (cpumask_test_cpu(smp_processor_id(), d->affinity))
-		return 0;
-	smtc_forward_irq(d);
-	return 1;
-}
-
-#else /* Not doing SMTC affinity */
-
-static inline int handle_on_other_cpu(unsigned int irq) { return 0; }
-
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
-
-#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
-
-static inline void smtc_im_backstop(unsigned int irq)
-{
-	if (irq_hwmask[irq] & 0x0000ff00)
-		write_c0_tccontext(read_c0_tccontext() &
-				   ~(irq_hwmask[irq] & 0x0000ff00));
-}
-
-/*
- * Clear interrupt mask handling "backstop" if irq_hwmask
- * entry so indicates. This implies that the ack() or end()
- * functions will take over re-enabling the low-level mask.
- * Otherwise it will be done on return from exception.
- */
-static inline int smtc_handle_on_other_cpu(unsigned int irq)
-{
-	int ret = handle_on_other_cpu(irq);
-
-	if (!ret)
-		smtc_im_backstop(irq);
-	return ret;
-}
-
-#else
-
-static inline void smtc_im_backstop(unsigned int irq) { }
-static inline int smtc_handle_on_other_cpu(unsigned int irq)
-{
-	return handle_on_other_cpu(irq);
-}
-
-#endif
-
 extern void do_IRQ(unsigned int irq);
 
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-
-extern void do_IRQ_no_affinity(unsigned int irq);
-
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
-
 extern void arch_init_irq(void);
 extern void spurious_interrupt(void);
 

diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index 45c0095..0fa5fdc 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h

@@ -17,7 +17,7 @@
 #include <linux/stringify.h>
 #include <asm/hazards.h>
 
-#if defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_CPU_MIPSR2
 
 static inline void arch_local_irq_disable(void)
 {
@@ -118,30 +118,15 @@
 unsigned long arch_local_irq_save(void);
 void arch_local_irq_restore(unsigned long flags);
 void __arch_local_irq_restore(unsigned long flags);
-#endif /* if defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_MIPS_MT_SMTC) */
-
-
-extern void smtc_ipi_replay(void);
+#endif /* CONFIG_CPU_MIPSR2 */
 
 static inline void arch_local_irq_enable(void)
 {
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC kernel needs to do a software replay of queued
-	 * IPIs, at the cost of call overhead on each local_irq_enable()
-	 */
-	smtc_ipi_replay();
-#endif
 	__asm__ __volatile__(
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	$1, $2, 1	# SMTC - clear TCStatus.IXMT	\n"
-	"	ori	$1, 0x400					\n"
-	"	xori	$1, 0x400					\n"
-	"	mtc0	$1, $2, 1					\n"
-#elif defined(CONFIG_CPU_MIPSR2)
+#if   defined(CONFIG_CPU_MIPSR2)
 	"	ei							\n"
 #else
 	"	mfc0	$1,$12						\n"
@@ -163,11 +148,7 @@
 	asm __volatile__(
 	"	.set	push						\n"
 	"	.set	reorder						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	%[flags], $2, 1					\n"
-#else
 	"	mfc0	%[flags], $12					\n"
-#endif
 	"	.set	pop						\n"
 	: [flags] "=r" (flags));
 
@@ -177,14 +158,7 @@
 
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC model uses TCStatus.IXMT to disable interrupts for a thread/CPU
-	 */
-	return flags & 0x400;
-#else
 	return !(flags & 1);
-#endif
 }
 
 #endif /* #ifndef __ASSEMBLY__ */

diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
new file mode 100644
index 0000000..5a9aa91
--- /dev/null
+++ b/arch/mips/include/asm/kvm_para.h

@@ -0,0 +1,109 @@
+#ifndef _ASM_MIPS_KVM_PARA_H
+#define _ASM_MIPS_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+
+#define KVM_HYPERCALL ".word 0x42000028"
+
+/*
+ * Hypercalls for KVM.
+ *
+ * Hypercall number is passed in v0.
+ * Return value will be placed in v0.
+ * Up to 3 arguments are passed in a0, a1, and a2.
+ */
+static inline unsigned long kvm_hypercall0(unsigned long num)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+
+	n = num;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall1(unsigned long num,
+					unsigned long arg0)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+
+	n = num;
+	a0 = arg0;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall2(unsigned long num,
+					unsigned long arg0, unsigned long arg1)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+	register unsigned long a1 asm("a1");
+
+	n = num;
+	a0 = arg0;
+	a1 = arg1;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0), "r" (a1) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall3(unsigned long num,
+	unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+	register unsigned long a1 asm("a1");
+	register unsigned long a2 asm("a2");
+
+	n = num;
+	a0 = arg0;
+	a1 = arg1;
+	a2 = arg2;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0), "r" (a1), "r" (a2) : "memory"
+		);
+
+	return r;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+	return false;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+#ifdef CONFIG_MIPS_PARAVIRT
+static inline bool kvm_para_available(void)
+{
+	return true;
+}
+#else
+static inline bool kvm_para_available(void)
+{
+	return false;
+}
+#endif
+
+
+#endif /* _ASM_MIPS_KVM_PARA_H */

diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index 94ed063..cf80228 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h

@@ -22,7 +22,6 @@
 #define cpu_has_3k_cache	0
 #define cpu_has_4k_cache	0
 #define cpu_has_tx39_cache	0
-#define cpu_has_fpu		0
 #define cpu_has_counter		1
 #define cpu_has_watch		1
 #define cpu_has_divec		1

diff --git a/arch/mips/include/asm/mach-cavium-octeon/irq.h b/arch/mips/include/asm/mach-cavium-octeon/irq.h
index 60fc4c3..cceae32 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/irq.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h

@@ -35,6 +35,8 @@
 	OCTEON_IRQ_PCI_MSI2,
 	OCTEON_IRQ_PCI_MSI3,
 
+	OCTEON_IRQ_TWSI,
+	OCTEON_IRQ_TWSI2,
 	OCTEON_IRQ_RML,
 	OCTEON_IRQ_TIMER0,
 	OCTEON_IRQ_TIMER1,

diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
index 1bcb642..1dfe474 100644
--- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h

@@ -39,6 +39,10 @@
 #define cpu_has_nofpuex		0
 #define cpu_has_64bits		1
 
+#define cpu_has_mips_2		1
+#define cpu_has_mips_3		1
+#define cpu_has_mips_5		0
+
 #define cpu_has_mips32r1	0
 #define cpu_has_mips32r2	0
 #define cpu_has_mips64r1	0

diff --git a/arch/mips/include/asm/mach-malta/kernel-entry-init.h b/arch/mips/include/asm/mach-malta/kernel-entry-init.h
index 7c5e17a..77eeda7 100644
--- a/arch/mips/include/asm/mach-malta/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-malta/kernel-entry-init.h

@@ -80,36 +80,6 @@
 	.endm
 
 	.macro	kernel_entry_setup
-#ifdef CONFIG_MIPS_MT_SMTC
-	mfc0	t0, CP0_CONFIG
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 1
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 2
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 3
-	and	t0, 1<<2
-	bnez	t0, 0f
-9:
-	/* Assume we came from YAMON... */
-	PTR_LA	v0, 0x9fc00534	/* YAMON print */
-	lw	v0, (v0)
-	move	a0, zero
-	PTR_LA	a1, nonmt_processor
-	jal	v0
-
-	PTR_LA	v0, 0x9fc00520	/* YAMON exit */
-	lw	v0, (v0)
-	li	a0, 1
-	jal	v0
-
-1:	b	1b
-
-	__INITDATA
-nonmt_processor:
-	.asciz	"SMTC kernel requires the MT ASE to run\n"
-	__FINIT
-#endif
 
 #ifdef CONFIG_EVA
 	sync

diff --git a/arch/mips/include/asm/mach-malta/malta-pm.h b/arch/mips/include/asm/mach-malta/malta-pm.h
new file mode 100644
index 0000000..c2c2e20
--- /dev/null
+++ b/arch/mips/include/asm/mach-malta/malta-pm.h

@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_MIPS_MACH_MALTA_PM_H__
+#define __ASM_MIPS_MACH_MALTA_PM_H__
+
+#include <asm/mips-boards/piix4.h>
+
+#ifdef CONFIG_MIPS_MALTA_PM
+
+/**
+ * mips_pm_suspend - enter a suspend state
+ * @state: the state to enter, one of PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_*
+ *
+ * Enters a suspend state via the Malta's PIIX4. If the state to be entered
+ * is one which loses context (eg. SOFF) then this function will never
+ * return.
+ */
+extern int mips_pm_suspend(unsigned state);
+
+#else /* !CONFIG_MIPS_MALTA_PM */
+
+static inline int mips_pm_suspend(unsigned state)
+{
+	return -EINVAL;
+}
+
+#endif /* !CONFIG_MIPS_MALTA_PM */
+
+#endif /* __ASM_MIPS_MACH_MALTA_PM_H__ */

diff --git a/arch/mips/include/asm/mach-netlogic/topology.h b/arch/mips/include/asm/mach-netlogic/topology.h
index 0da99fa..ceeb1f5 100644
--- a/arch/mips/include/asm/mach-netlogic/topology.h
+++ b/arch/mips/include/asm/mach-netlogic/topology.h

@@ -10,10 +10,12 @@
 
 #include <asm/mach-netlogic/multi-node.h>
 
+#ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)	cpu_to_node(cpu)
 #define topology_core_id(cpu)	(cpu_logical_map(cpu) / NLM_THREADS_PER_CORE)
 #define topology_thread_cpumask(cpu)		(&cpu_sibling_map[cpu])
 #define topology_core_cpumask(cpu)	cpumask_of_node(cpu_to_node(cpu))
+#endif
 
 #include <asm-generic/topology.h>
 

diff --git a/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h
new file mode 100644
index 0000000..725e1ed
--- /dev/null
+++ b/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h

@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+#ifndef __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_4kex		1
+#define cpu_has_3k_cache	0
+#define cpu_has_tx39_cache	0
+#define cpu_has_counter		1
+#define cpu_has_llsc		1
+/*
+ * We Disable LL/SC on non SMP systems as it is faster to disable
+ * interrupts for atomic access than a LL/SC.
+ */
+#ifdef CONFIG_SMP
+# define kernel_uses_llsc	1
+#else
+# define kernel_uses_llsc	0
+#endif
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define cpu_dcache_line_size()	128
+#define cpu_icache_line_size()	128
+#define cpu_has_octeon_cache	1
+#define cpu_has_4k_cache	0
+#else
+#define cpu_has_octeon_cache	0
+#define cpu_has_4k_cache	1
+#endif
+
+#endif /* __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H */

diff --git a/arch/mips/include/asm/mach-paravirt/irq.h b/arch/mips/include/asm/mach-paravirt/irq.h
new file mode 100644
index 0000000..9b4d35e
--- /dev/null
+++ b/arch/mips/include/asm/mach-paravirt/irq.h

@@ -0,0 +1,19 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+#ifndef __ASM_MACH_PARAVIRT_IRQ_H__
+#define  __ASM_MACH_PARAVIRT_IRQ_H__
+
+#define NR_IRQS 64
+#define MIPS_CPU_IRQ_BASE 1
+
+#define MIPS_IRQ_PCIA (MIPS_CPU_IRQ_BASE + 8)
+
+#define MIPS_IRQ_MBOX0 (MIPS_CPU_IRQ_BASE + 32)
+#define MIPS_IRQ_MBOX1 (MIPS_CPU_IRQ_BASE + 33)
+
+#endif /* __ASM_MACH_PARAVIRT_IRQ_H__ */

diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
new file mode 100644
index 0000000..2f82bfa
--- /dev/null
+++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h

@@ -0,0 +1,50 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc
+ */
+#ifndef __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H
+#define __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H
+
+#define CP0_EBASE $15, 1
+
+	.macro  kernel_entry_setup
+	mfc0	t0, CP0_EBASE
+	andi	t0, t0, 0x3ff		# CPUNum
+	beqz	t0, 1f
+	# CPUs other than zero goto smp_bootstrap
+	j	smp_bootstrap
+
+1:
+	.endm
+
+/*
+ * Do SMP slave processor setup necessary before we can safely execute
+ * C code.
+ */
+	.macro  smp_slave_setup
+	mfc0	t0, CP0_EBASE
+	andi	t0, t0, 0x3ff		# CPUNum
+	slti	t1, t0, NR_CPUS
+	bnez	t1, 1f
+2:
+	di
+	wait
+	b	2b			# Unknown CPU, loop forever.
+1:
+	PTR_LA	t1, paravirt_smp_sp
+	PTR_SLL	t0, PTR_SCALESHIFT
+	PTR_ADDU t1, t1, t0
+3:
+	PTR_L	sp, 0(t1)
+	beqz	sp, 3b			# Spin until told to proceed.
+
+	PTR_LA	t1, paravirt_smp_gp
+	PTR_ADDU t1, t1, t0
+	sync
+	PTR_L	gp, 0(t1)
+	.endm
+
+#endif /* __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H */

diff --git a/arch/mips/include/asm/mach-paravirt/war.h b/arch/mips/include/asm/mach-paravirt/war.h
new file mode 100644
index 0000000..36d3afb
--- /dev/null
+++ b/arch/mips/include/asm/mach-paravirt/war.h

@@ -0,0 +1,25 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2013 Cavium Networks <support@caviumnetworks.com>
+ */
+#ifndef __ASM_MIPS_MACH_PARAVIRT_WAR_H
+#define __ASM_MIPS_MACH_PARAVIRT_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_PARAVIRT_WAR_H */

diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h
index aa45e6a..fe1566f 100644
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h
+++ b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h

@@ -25,11 +25,7 @@
 #ifndef MSP_USB_H_
 #define MSP_USB_H_
 
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-#define NUM_USB_DEVS   2
-#else
 #define NUM_USB_DEVS   1
-#endif
 
 /* Register spaces for USB host 0 */
 #define MSP_USB0_MAB_START	(MSP_USB0_BASE + 0x0)

diff --git a/arch/mips/include/asm/mach-ralink/war.h b/arch/mips/include/asm/mach-ralink/war.h
index a7b712c..c074b5d 100644
--- a/arch/mips/include/asm/mach-ralink/war.h
+++ b/arch/mips/include/asm/mach-ralink/war.h

@@ -17,7 +17,6 @@
 #define MIPS4K_ICACHE_REFILL_WAR	0
 #define MIPS_CACHE_SYNC_WAR		0
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define RM9000_CDEX_SMP_WAR		0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
 #define R10000_LLSC_WAR			0
 #define MIPS34K_MISSED_ITLB_WAR		0

diff --git a/arch/mips/include/asm/mach-sead3/kernel-entry-init.h b/arch/mips/include/asm/mach-sead3/kernel-entry-init.h
index 3dfbd8e..6cccd4d 100644
--- a/arch/mips/include/asm/mach-sead3/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-sead3/kernel-entry-init.h

@@ -10,37 +10,6 @@
 #define __ASM_MACH_MIPS_KERNEL_ENTRY_INIT_H
 
 	.macro	kernel_entry_setup
-#ifdef CONFIG_MIPS_MT_SMTC
-	mfc0	t0, CP0_CONFIG
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 1
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 2
-	bgez	t0, 9f
-	mfc0	t0, CP0_CONFIG, 3
-	and	t0, 1<<2
-	bnez	t0, 0f
-9 :
-	/* Assume we came from YAMON... */
-	PTR_LA	v0, 0x9fc00534	/* YAMON print */
-	lw	v0, (v0)
-	move	a0, zero
-	PTR_LA	a1, nonmt_processor
-	jal	v0
-
-	PTR_LA	v0, 0x9fc00520	/* YAMON exit */
-	lw	v0, (v0)
-	li	a0, 1
-	jal	v0
-
-1 :	b	1b
-
-	__INITDATA
-nonmt_processor :
-	.asciz	"SMTC kernel requires the MT ASE to run\n"
-	__FINIT
-0 :
-#endif
 	.endm
 
 /*

diff --git a/arch/mips/include/asm/mips-boards/piix4.h b/arch/mips/include/asm/mips-boards/piix4.h
index 9cf5404..9e340be 100644
--- a/arch/mips/include/asm/mips-boards/piix4.h
+++ b/arch/mips/include/asm/mips-boards/piix4.h

@@ -55,4 +55,16 @@
 #define PIIX4_FUNC3_PMREGMISC			0x80
 #define   PIIX4_FUNC3_PMREGMISC_EN			(1 << 0)
 
+/* Power Management IO Space */
+#define PIIX4_FUNC3IO_PMSTS			0x00
+#define   PIIX4_FUNC3IO_PMSTS_PWRBTN_STS		(1 << 8)
+#define PIIX4_FUNC3IO_PMCNTRL			0x04
+#define   PIIX4_FUNC3IO_PMCNTRL_SUS_EN			(1 << 13)
+#define   PIIX4_FUNC3IO_PMCNTRL_SUS_TYP			(0x7 << 10)
+#define   PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_SOFF		(0x0 << 10)
+#define   PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_STR		(0x1 << 10)
+
+/* Data for magic special PCI cycle */
+#define PIIX4_SUSPEND_MAGIC			0x00120002
+
 #endif /* __ASM_MIPS_BOARDS_PIIX4_H */

diff --git a/arch/mips/include/asm/mips-cpc.h b/arch/mips/include/asm/mips-cpc.h
index 988507e..e139a53 100644
--- a/arch/mips/include/asm/mips-cpc.h
+++ b/arch/mips/include/asm/mips-cpc.h

@@ -72,7 +72,12 @@
 #define MIPS_CPC_COCB_OFS	0x4000
 
 /* Macros to ease the creation of register access functions */
-#define BUILD_CPC_R_(name, off) \
+#define BUILD_CPC_R_(name, off)					\
+static inline u32 *addr_cpc_##name(void)			\
+{								\
+	return (u32 *)(mips_cpc_base + (off));			\
+}								\
+								\
 static inline u32 read_cpc_##name(void)				\
 {								\
 	return __raw_readl(mips_cpc_base + (off));		\
@@ -147,4 +152,31 @@
 #define CPC_Cx_OTHER_CORENUM_SHF		16
 #define CPC_Cx_OTHER_CORENUM_MSK		(_ULCAST_(0xff) << 16)
 
+#ifdef CONFIG_MIPS_CPC
+
+/**
+ * mips_cpc_lock_other - lock access to another core
+ * core: the other core to be accessed
+ *
+ * Call before operating upon a core via the 'other' register region in
+ * order to prevent the region being moved during access. Must be followed
+ * by a call to mips_cpc_unlock_other.
+ */
+extern void mips_cpc_lock_other(unsigned int core);
+
+/**
+ * mips_cpc_unlock_other - unlock access to another core
+ *
+ * Call after operating upon another core via the 'other' register region.
+ * Must be called after mips_cpc_lock_other.
+ */
+extern void mips_cpc_unlock_other(void);
+
+#else /* !CONFIG_MIPS_CPC */
+
+static inline void mips_cpc_lock_other(unsigned int core) { }
+static inline void mips_cpc_unlock_other(void) { }
+
+#endif /* !CONFIG_MIPS_CPC */
+
 #endif /* __MIPS_ASM_MIPS_CPC_H__ */

diff --git a/arch/mips/include/asm/mips_mt.h b/arch/mips/include/asm/mips_mt.h
index a3df0c3..f6ba004 100644
--- a/arch/mips/include/asm/mips_mt.h
+++ b/arch/mips/include/asm/mips_mt.h

@@ -1,7 +1,6 @@
 /*
- * Definitions and decalrations for MIPS MT support
- * that are common between SMTC, VSMP, and/or AP/SP
- * kernel models.
+ * Definitions and decalrations for MIPS MT support that are common between
+ * the VSMP, and AP/SP kernel models.
  */
 #ifndef __ASM_MIPS_MT_H
 #define __ASM_MIPS_MT_H

diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h
index 6efa79a..5f8052c 100644
--- a/arch/mips/include/asm/mipsmtregs.h
+++ b/arch/mips/include/asm/mipsmtregs.h

@@ -36,6 +36,8 @@
 
 #define read_c0_tcbind()		__read_32bit_c0_register($2, 2)
 
+#define write_c0_tchalt(val)		__write_32bit_c0_register($2, 4, val)
+
 #define read_c0_tccontext()		__read_32bit_c0_register($2, 5)
 #define write_c0_tccontext(val)		__write_32bit_c0_register($2, 5, val)
 

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 3e025b5..98e9754 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h

@@ -709,11 +709,18 @@
 #ifndef __ASSEMBLY__
 
 /*
- * Macros for handling the ISA mode bit for microMIPS.
+ * Macros for handling the ISA mode bit for MIPS16 and microMIPS.
  */
+#if defined(CONFIG_SYS_SUPPORTS_MIPS16) || \
+    defined(CONFIG_SYS_SUPPORTS_MICROMIPS)
 #define get_isa16_mode(x)		((x) & 0x1)
 #define msk_isa16_mode(x)		((x) & ~0x1)
 #define set_isa16_mode(x)		do { (x) |= 0x1; } while(0)
+#else
+#define get_isa16_mode(x)		0
+#define msk_isa16_mode(x)		(x)
+#define set_isa16_mode(x)		do { } while(0)
+#endif
 
 /*
  * microMIPS instructions can be 16-bit or 32-bit in length. This
@@ -1007,19 +1014,8 @@
 #define write_c0_compare3(val)	__write_32bit_c0_register($11, 7, val)
 
 #define read_c0_status()	__read_32bit_c0_register($12, 0)
-#ifdef CONFIG_MIPS_MT_SMTC
-#define write_c0_status(val)						\
-do {									\
-	__write_32bit_c0_register($12, 0, val);				\
-	__ehb();							\
-} while (0)
-#else
-/*
- * Legacy non-SMTC code, which may be hazardous
- * but which might not support EHB
- */
+
 #define write_c0_status(val)	__write_32bit_c0_register($12, 0, val)
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 #define read_c0_cause()		__read_32bit_c0_register($13, 0)
 #define write_c0_cause(val)	__write_32bit_c0_register($13, 0, val)
@@ -1743,11 +1739,6 @@
 /*
  * Manipulate bits in a c0 register.
  */
-#ifndef CONFIG_MIPS_MT_SMTC
-/*
- * SMTC Linux requires shutting-down microthread scheduling
- * during CP0 register read-modify-write sequences.
- */
 #define __BUILD_SET_C0(name)					\
 static inline unsigned int					\
 set_c0_##name(unsigned int set)					\
@@ -1786,121 +1777,6 @@
 	return res;						\
 }
 
-#else /* SMTC versions that manage MT scheduling */
-
-#include <linux/irqflags.h>
-
-/*
- * This is a duplicate of dmt() in mipsmtregs.h to avoid problems with
- * header file recursion.
- */
-static inline unsigned int __dmt(void)
-{
-	int res;
-
-	__asm__ __volatile__(
-	"	.set	push						\n"
-	"	.set	mips32r2					\n"
-	"	.set	noat						\n"
-	"	.word	0x41610BC1			# dmt $1	\n"
-	"	ehb							\n"
-	"	move	%0, $1						\n"
-	"	.set	pop						\n"
-	: "=r" (res));
-
-	instruction_hazard();
-
-	return res;
-}
-
-#define __VPECONTROL_TE_SHIFT	15
-#define __VPECONTROL_TE		(1UL << __VPECONTROL_TE_SHIFT)
-
-#define __EMT_ENABLE		__VPECONTROL_TE
-
-static inline void __emt(unsigned int previous)
-{
-	if ((previous & __EMT_ENABLE))
-		__asm__ __volatile__(
-		"	.set	mips32r2				\n"
-		"	.word	0x41600be1		# emt		\n"
-		"	ehb						\n"
-		"	.set	mips0					\n");
-}
-
-static inline void __ehb(void)
-{
-	__asm__ __volatile__(
-	"	.set	mips32r2					\n"
-	"	ehb							\n"		"	.set	mips0						\n");
-}
-
-/*
- * Note that local_irq_save/restore affect TC-specific IXMT state,
- * not Status.IE as in non-SMTC kernel.
- */
-
-#define __BUILD_SET_C0(name)					\
-static inline unsigned int					\
-set_c0_##name(unsigned int set)					\
-{								\
-	unsigned int res;					\
-	unsigned int new;					\
-	unsigned int omt;					\
-	unsigned long flags;					\
-								\
-	local_irq_save(flags);					\
-	omt = __dmt();						\
-	res = read_c0_##name();					\
-	new = res | set;					\
-	write_c0_##name(new);					\
-	__emt(omt);						\
-	local_irq_restore(flags);				\
-								\
-	return res;						\
-}								\
-								\
-static inline unsigned int					\
-clear_c0_##name(unsigned int clear)				\
-{								\
-	unsigned int res;					\
-	unsigned int new;					\
-	unsigned int omt;					\
-	unsigned long flags;					\
-								\
-	local_irq_save(flags);					\
-	omt = __dmt();						\
-	res = read_c0_##name();					\
-	new = res & ~clear;					\
-	write_c0_##name(new);					\
-	__emt(omt);						\
-	local_irq_restore(flags);				\
-								\
-	return res;						\
-}								\
-								\
-static inline unsigned int					\
-change_c0_##name(unsigned int change, unsigned int newbits)	\
-{								\
-	unsigned int res;					\
-	unsigned int new;					\
-	unsigned int omt;					\
-	unsigned long flags;					\
-								\
-	local_irq_save(flags);					\
-								\
-	omt = __dmt();						\
-	res = read_c0_##name();					\
-	new = res & ~change;					\
-	new |= (newbits & change);				\
-	write_c0_##name(new);					\
-	__emt(omt);						\
-	local_irq_restore(flags);				\
-								\
-	return res;						\
-}
-#endif
-
 __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
@@ -1916,6 +1792,15 @@
 __BUILD_SET_C0(brcm_config)
 __BUILD_SET_C0(brcm_mode)
 
+/*
+ * Return low 10 bits of ebase.
+ * Note that under KVM (MIPSVZ) this returns vcpu id.
+ */
+static inline unsigned int get_ebase_cpunum(void)
+{
+	return read_c0_ebase() & 0x3ff;
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_MIPSREGS_H */

diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index e277bba..2e373da 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h

@@ -18,10 +18,6 @@
 #include <asm/cacheflush.h>
 #include <asm/hazards.h>
 #include <asm/tlbflush.h>
-#ifdef CONFIG_MIPS_MT_SMTC
-#include <asm/mipsmtregs.h>
-#include <asm/smtc.h>
-#endif /* SMTC */
 #include <asm-generic/mm_hooks.h>
 
 #define TLBMISS_HANDLER_SETUP_PGD(pgd)					\
@@ -31,11 +27,15 @@
 } while (0)
 
 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT
+
+#define TLBMISS_HANDLER_RESTORE()					\
+	write_c0_xcontext((unsigned long) smp_processor_id() <<		\
+			  SMP_CPUID_REGSHIFT)
+
 #define TLBMISS_HANDLER_SETUP()						\
 	do {								\
 		TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);		\
-		write_c0_xcontext((unsigned long) smp_processor_id() <<	\
-						SMP_CPUID_REGSHIFT);	\
+		TLBMISS_HANDLER_RESTORE();				\
 	} while (0)
 
 #else /* !CONFIG_MIPS_PGD_C0_CONTEXT: using  pgd_current*/
@@ -47,9 +47,12 @@
  */
 extern unsigned long pgd_current[];
 
-#define TLBMISS_HANDLER_SETUP()						\
+#define TLBMISS_HANDLER_RESTORE()					\
 	write_c0_context((unsigned long) smp_processor_id() <<		\
-						SMP_CPUID_REGSHIFT);	\
+			 SMP_CPUID_REGSHIFT)
+
+#define TLBMISS_HANDLER_SETUP()						\
+	TLBMISS_HANDLER_RESTORE();					\
 	back_to_back_c0_hazard();					\
 	TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir)
 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT*/
@@ -63,13 +66,6 @@
 #define ASID_INC	0x10
 #define ASID_MASK	0xff0
 
-#elif defined(CONFIG_MIPS_MT_SMTC)
-
-#define ASID_INC	0x1
-extern unsigned long smtc_asid_mask;
-#define ASID_MASK	(smtc_asid_mask)
-#define HW_ASID_MASK	0xff
-/* End SMTC/34K debug hack */
 #else /* FIXME: not correct for R6000 */
 
 #define ASID_INC	0x1
@@ -92,7 +88,6 @@
 #define ASID_VERSION_MASK  ((unsigned long)~(ASID_MASK|(ASID_MASK-1)))
 #define ASID_FIRST_VERSION ((unsigned long)(~ASID_VERSION_MASK) + 1)
 
-#ifndef CONFIG_MIPS_MT_SMTC
 /* Normal, classic MIPS get_new_mmu_context */
 static inline void
 get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
@@ -115,12 +110,6 @@
 	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
 }
 
-#else /* CONFIG_MIPS_MT_SMTC */
-
-#define get_new_mmu_context(mm, cpu) smtc_get_new_mmu_context((mm), (cpu))
-
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 /*
  * Initialize the context related info for a new mm_struct
  * instance.
@@ -141,46 +130,12 @@
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned long flags;
-#ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long oldasid;
-	unsigned long mtflags;
-	int mytlb = (smtc_status & SMTC_TLB_SHARED) ? 0 : cpu_data[cpu].vpe_id;
 	local_irq_save(flags);
-	mtflags = dvpe();
-#else /* Not SMTC */
-	local_irq_save(flags);
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 	/* Check if our ASID is of an older version and thus invalid */
 	if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK)
 		get_new_mmu_context(next, cpu);
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * If the EntryHi ASID being replaced happens to be
-	 * the value flagged at ASID recycling time as having
-	 * an extended life, clear the bit showing it being
-	 * in use by this "CPU", and if that's the last bit,
-	 * free up the ASID value for use and flush any old
-	 * instances of it from the TLB.
-	 */
-	oldasid = (read_c0_entryhi() & ASID_MASK);
-	if(smtc_live_asid[mytlb][oldasid]) {
-		smtc_live_asid[mytlb][oldasid] &= ~(0x1 << cpu);
-		if(smtc_live_asid[mytlb][oldasid] == 0)
-			smtc_flush_tlb_asid(oldasid);
-	}
-	/*
-	 * Tread softly on EntryHi, and so long as we support
-	 * having ASID_MASK smaller than the hardware maximum,
-	 * make sure no "soft" bits become "hard"...
-	 */
-	write_c0_entryhi((read_c0_entryhi() & ~HW_ASID_MASK) |
-			 cpu_asid(cpu, next));
-	ehb(); /* Make sure it propagates to TCStatus */
-	evpe(mtflags);
-#else
 	write_c0_entryhi(cpu_asid(cpu, next));
-#endif /* CONFIG_MIPS_MT_SMTC */
 	TLBMISS_HANDLER_SETUP_PGD(next->pgd);
 
 	/*
@@ -213,34 +168,12 @@
 	unsigned long flags;
 	unsigned int cpu = smp_processor_id();
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long oldasid;
-	unsigned long mtflags;
-	int mytlb = (smtc_status & SMTC_TLB_SHARED) ? 0 : cpu_data[cpu].vpe_id;
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 	local_irq_save(flags);
 
 	/* Unconditionally get a new ASID.  */
 	get_new_mmu_context(next, cpu);
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/* See comments for similar code above */
-	mtflags = dvpe();
-	oldasid = read_c0_entryhi() & ASID_MASK;
-	if(smtc_live_asid[mytlb][oldasid]) {
-		smtc_live_asid[mytlb][oldasid] &= ~(0x1 << cpu);
-		if(smtc_live_asid[mytlb][oldasid] == 0)
-			 smtc_flush_tlb_asid(oldasid);
-	}
-	/* See comments for similar code above */
-	write_c0_entryhi((read_c0_entryhi() & ~HW_ASID_MASK) |
-			 cpu_asid(cpu, next));
-	ehb(); /* Make sure it propagates to TCStatus */
-	evpe(mtflags);
-#else
 	write_c0_entryhi(cpu_asid(cpu, next));
-#endif /* CONFIG_MIPS_MT_SMTC */
 	TLBMISS_HANDLER_SETUP_PGD(next->pgd);
 
 	/* mark mmu ownership change */
@@ -258,48 +191,15 @@
 drop_mmu_context(struct mm_struct *mm, unsigned cpu)
 {
 	unsigned long flags;
-#ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long oldasid;
-	/* Can't use spinlock because called from TLB flush within DVPE */
-	unsigned int prevvpe;
-	int mytlb = (smtc_status & SMTC_TLB_SHARED) ? 0 : cpu_data[cpu].vpe_id;
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 	local_irq_save(flags);
 
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm)))  {
 		get_new_mmu_context(mm, cpu);
-#ifdef CONFIG_MIPS_MT_SMTC
-		/* See comments for similar code above */
-		prevvpe = dvpe();
-		oldasid = (read_c0_entryhi() & ASID_MASK);
-		if (smtc_live_asid[mytlb][oldasid]) {
-			smtc_live_asid[mytlb][oldasid] &= ~(0x1 << cpu);
-			if(smtc_live_asid[mytlb][oldasid] == 0)
-				smtc_flush_tlb_asid(oldasid);
-		}
-		/* See comments for similar code above */
-		write_c0_entryhi((read_c0_entryhi() & ~HW_ASID_MASK)
-				| cpu_asid(cpu, mm));
-		ehb(); /* Make sure it propagates to TCStatus */
-		evpe(prevvpe);
-#else /* not CONFIG_MIPS_MT_SMTC */
 		write_c0_entryhi(cpu_asid(cpu, mm));
-#endif /* CONFIG_MIPS_MT_SMTC */
 	} else {
 		/* will get a new context next time */
-#ifndef CONFIG_MIPS_MT_SMTC
 		cpu_context(cpu, mm) = 0;
-#else /* SMTC */
-		int i;
-
-		/* SMTC shares the TLB (and ASIDs) across VPEs */
-		for_each_online_cpu(i) {
-		    if((smtc_status & SMTC_TLB_SHARED)
-		    || (cpu_data[i].vpe_id == cpu_data[cpu].vpe_id))
-			cpu_context(i, mm) = 0;
-		}
-#endif /* CONFIG_MIPS_MT_SMTC */
 	}
 	local_irq_restore(flags);
 }

diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index c2edae3..800fe57 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h

@@ -144,13 +144,7 @@
 #define MODULE_KERNEL_TYPE "64BIT "
 #endif
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define MODULE_KERNEL_SMTC "MT_SMTC "
-#else
-#define MODULE_KERNEL_SMTC ""
-#endif
-
 #define MODULE_ARCH_VERMAGIC \
-	MODULE_PROC_FAMILY MODULE_KERNEL_TYPE MODULE_KERNEL_SMTC
+	MODULE_PROC_FAMILY MODULE_KERNEL_TYPE
 
 #endif /* _ASM_MODULE_H */

diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index a2aba6c..538f6d4 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h

@@ -84,7 +84,7 @@
 	__asm__ __volatile__(					\
 	"	.set	push\n"					\
 	"	.set	msa\n"					\
-	"	cfcmsa	$" #cs ", %0\n"				\
+	"	ctcmsa	$" #cs ", %0\n"				\
 	"	.set	pop\n"					\
 	: : "r"(val));						\
 }
@@ -96,6 +96,13 @@
  * allow compilation with toolchains that do not support MSA. Once all
  * toolchains in use support MSA these can be removed.
  */
+#ifdef CONFIG_CPU_MICROMIPS
+#define CFC_MSA_INSN	0x587e0056
+#define CTC_MSA_INSN	0x583e0816
+#else
+#define CFC_MSA_INSN	0x787e0059
+#define CTC_MSA_INSN	0x783e0819
+#endif
 
 #define __BUILD_MSA_CTL_REG(name, cs)				\
 static inline unsigned int read_msa_##name(void)		\
@@ -104,7 +111,8 @@
 	__asm__ __volatile__(					\
 	"	.set	push\n"					\
 	"	.set	noat\n"					\
-	"	.word	0x787e0059 | (" #cs " << 11)\n"		\
+	"	.insn\n"					\
+	"	.word	#CFC_MSA_INSN | (" #cs " << 11)\n"	\
 	"	move	%0, $1\n"				\
 	"	.set	pop\n"					\
 	: "=r"(reg));						\
@@ -117,7 +125,8 @@
 	"	.set	push\n"					\
 	"	.set	noat\n"					\
 	"	move	$1, %0\n"				\
-	"	.word	0x783e0819 | (" #cs " << 6)\n"		\
+	"	.insn\n"					\
+	"	.word	#CTC_MSA_INSN | (" #cs " << 6)\n"	\
 	"	.set	pop\n"					\
 	: : "r"(val));						\
 }

diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
index de9aada..06f1f75 100644
--- a/arch/mips/include/asm/netlogic/mips-extns.h
+++ b/arch/mips/include/asm/netlogic/mips-extns.h

@@ -146,9 +146,10 @@
 
 static inline int nlm_nodeid(void)
 {
-	uint32_t prid = read_c0_prid();
+	uint32_t prid = read_c0_prid() & PRID_IMP_MASK;
 
-	if ((prid & 0xff00) == PRID_IMP_NETLOGIC_XLP9XX)
+	if ((prid == PRID_IMP_NETLOGIC_XLP9XX) ||
+			(prid == PRID_IMP_NETLOGIC_XLP5XX))
 		return (__read_32bit_c0_register($15, 1) >> 7) & 0x7;
 	else
 		return (__read_32bit_c0_register($15, 1) >> 5) & 0x3;

diff --git a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h
index 1f23dfa..805bfd2 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h

@@ -74,6 +74,8 @@
 #define XLP_IO_USB_OHCI2_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 2, 4)
 #define XLP_IO_USB_OHCI3_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 2, 5)
 
+#define XLP_IO_SATA_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 3, 2)
+
 /* XLP2xx has an updated USB block */
 #define XLP2XX_IO_USB_OFFSET(node, i)	XLP_HDR_OFFSET(node, 0, 4, i)
 #define XLP2XX_IO_USB_XHCI0_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 4, 1)
@@ -103,13 +105,11 @@
 #define XLP_IO_SYS_OFFSET(node)		XLP_HDR_OFFSET(node, 0, 6, 5)
 #define XLP_IO_JTAG_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 6, 6)
 
+/* Flash */
 #define XLP_IO_NOR_OFFSET(node)		XLP_HDR_OFFSET(node, 0, 7, 0)
 #define XLP_IO_NAND_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 7, 1)
 #define XLP_IO_SPI_OFFSET(node)		XLP_HDR_OFFSET(node, 0, 7, 2)
-/* SD flash */
-#define XLP_IO_SD_OFFSET(node)		XLP_HDR_OFFSET(node, 0, 7, 3)
-#define XLP_IO_MMC_OFFSET(node, slot)	\
-		((XLP_IO_SD_OFFSET(node))+(slot*0x100)+XLP_IO_PCI_HDRSZ)
+#define XLP_IO_MMC_OFFSET(node)		XLP_HDR_OFFSET(node, 0, 7, 3)
 
 /* Things have changed drastically in XLP 9XX */
 #define XLP9XX_HDR_OFFSET(n, d, f)	\
@@ -120,6 +120,8 @@
 #define XLP9XX_IO_UART_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 2, 2)
 #define XLP9XX_IO_SYS_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 0)
 #define XLP9XX_IO_FUSE_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 1)
+#define XLP9XX_IO_CLOCK_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 2)
+#define XLP9XX_IO_POWER_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 3)
 #define XLP9XX_IO_JTAG_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 4)
 
 #define XLP9XX_IO_PCIE_OFFSET(node, i)	XLP9XX_HDR_OFFSET(node, 1, i)
@@ -135,11 +137,11 @@
 /* XLP9XX on-chip SATA controller */
 #define XLP9XX_IO_SATA_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 3, 2)
 
+/* Flash */
 #define XLP9XX_IO_NOR_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 0)
 #define XLP9XX_IO_NAND_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 1)
 #define XLP9XX_IO_SPI_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 2)
-/* SD flash */
-#define XLP9XX_IO_MMCSD_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 3)
+#define XLP9XX_IO_MMC_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 3)
 
 /* PCI config header register id's */
 #define XLP_PCI_CFGREG0			0x00
@@ -186,8 +188,10 @@
 #define PCI_DEVICE_ID_NLM_NOR		0x1015
 #define PCI_DEVICE_ID_NLM_NAND		0x1016
 #define PCI_DEVICE_ID_NLM_MMC		0x1018
-#define PCI_DEVICE_ID_NLM_XHCI		0x101d
+#define PCI_DEVICE_ID_NLM_SATA		0x101A
+#define PCI_DEVICE_ID_NLM_XHCI		0x101D
 
+#define PCI_DEVICE_ID_XLP9XX_MMC	0x9018
 #define PCI_DEVICE_ID_XLP9XX_SATA	0x901A
 #define PCI_DEVICE_ID_XLP9XX_XHCI	0x901D
 

diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h
index d4deb87..91540f4 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h

@@ -69,6 +69,20 @@
 #define PCIE_9XX_BYTE_SWAP_IO_BASE	0x25e
 #define PCIE_9XX_BYTE_SWAP_IO_LIM	0x25f
 
+#define PCIE_9XX_BRIDGE_MSIX_ADDR_BASE	0x264
+#define PCIE_9XX_BRIDGE_MSIX_ADDR_LIMIT	0x265
+#define PCIE_9XX_MSI_STATUS		0x283
+#define PCIE_9XX_MSI_EN			0x284
+/* 128 MSIX vectors available in 9xx */
+#define PCIE_9XX_MSIX_STATUS0		0x286
+#define PCIE_9XX_MSIX_STATUSX(n)	(n + 0x286)
+#define PCIE_9XX_MSIX_VEC		0x296
+#define PCIE_9XX_MSIX_VECX(n)		(n + 0x296)
+#define PCIE_9XX_INT_STATUS0		0x397
+#define PCIE_9XX_INT_STATUS1		0x398
+#define PCIE_9XX_INT_EN0		0x399
+#define PCIE_9XX_INT_EN1		0x39a
+
 /* other */
 #define PCIE_NLINKS			4
 

diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pic.h b/arch/mips/include/asm/netlogic/xlp-hal/pic.h
index f10bf3b..41cefe9 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/pic.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/pic.h

@@ -199,6 +199,10 @@
 #define PIC_IRT_PCIE_LINK_3_INDEX	81
 #define PIC_IRT_PCIE_LINK_INDEX(num)	((num) + PIC_IRT_PCIE_LINK_0_INDEX)
 
+#define PIC_9XX_IRT_PCIE_LINK_0_INDEX	191
+#define PIC_9XX_IRT_PCIE_LINK_INDEX(num) \
+				((num) + PIC_9XX_IRT_PCIE_LINK_0_INDEX)
+
 #define PIC_CLOCK_TIMER			7
 
 #if !defined(LOCORE) && !defined(__ASSEMBLY__)

diff --git a/arch/mips/include/asm/netlogic/xlp-hal/sys.h b/arch/mips/include/asm/netlogic/xlp-hal/sys.h
index d9b107f..bc7bddf 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/sys.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/sys.h

@@ -118,6 +118,10 @@
 #define SYS_SCRTCH3				0x4c
 
 /* PLL registers XLP2XX */
+#define SYS_CPU_PLL_CTRL0(core)			(0x1c0 + (core * 4))
+#define SYS_CPU_PLL_CTRL1(core)			(0x1c1 + (core * 4))
+#define SYS_CPU_PLL_CTRL2(core)			(0x1c2 + (core * 4))
+#define SYS_CPU_PLL_CTRL3(core)			(0x1c3 + (core * 4))
 #define SYS_PLL_CTRL0				0x240
 #define SYS_PLL_CTRL1				0x241
 #define SYS_PLL_CTRL2				0x242
@@ -147,6 +151,32 @@
 #define SYS_SYS_PLL_MEM_REQ			0x2a3
 #define SYS_PLL_MEM_STAT			0x2a4
 
+/* PLL registers XLP9XX */
+#define SYS_9XX_CPU_PLL_CTRL0(core)		(0xc0 + (core * 4))
+#define SYS_9XX_CPU_PLL_CTRL1(core)		(0xc1 + (core * 4))
+#define SYS_9XX_CPU_PLL_CTRL2(core)		(0xc2 + (core * 4))
+#define SYS_9XX_CPU_PLL_CTRL3(core)		(0xc3 + (core * 4))
+#define SYS_9XX_DMC_PLL_CTRL0			0x140
+#define SYS_9XX_DMC_PLL_CTRL1			0x141
+#define SYS_9XX_DMC_PLL_CTRL2			0x142
+#define SYS_9XX_DMC_PLL_CTRL3			0x143
+#define SYS_9XX_PLL_CTRL0			0x144
+#define SYS_9XX_PLL_CTRL1			0x145
+#define SYS_9XX_PLL_CTRL2			0x146
+#define SYS_9XX_PLL_CTRL3			0x147
+
+#define SYS_9XX_PLL_CTRL0_DEVX(x)		(0x148 + (x) * 4)
+#define SYS_9XX_PLL_CTRL1_DEVX(x)		(0x149 + (x) * 4)
+#define SYS_9XX_PLL_CTRL2_DEVX(x)		(0x14a + (x) * 4)
+#define SYS_9XX_PLL_CTRL3_DEVX(x)		(0x14b + (x) * 4)
+
+#define SYS_9XX_CPU_PLL_CHG_CTRL		0x188
+#define SYS_9XX_PLL_CHG_CTRL			0x189
+#define SYS_9XX_CLK_DEV_DIS			0x18a
+#define SYS_9XX_CLK_DEV_SEL			0x18b
+#define SYS_9XX_CLK_DEV_DIV			0x18d
+#define SYS_9XX_CLK_DEV_CHG			0x18f
+
 /* Registers changed on 9XX */
 #define SYS_9XX_POWER_ON_RESET_CFG		0x00
 #define SYS_9XX_CHIP_RESET			0x01
@@ -170,6 +200,11 @@
 #define nlm_get_fuse_regbase(node)	\
 			(nlm_get_fuse_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
+#define nlm_get_clock_pcibase(node)	\
+			nlm_pcicfg_base(XLP9XX_IO_CLOCK_OFFSET(node))
+#define nlm_get_clock_regbase(node)	\
+			(nlm_get_clock_pcibase(node) + XLP_IO_PCI_HDRSZ)
+
 unsigned int nlm_get_pic_frequency(int node);
 #endif
 #endif

diff --git a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
index 2b0c959..a862b93 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h

@@ -58,6 +58,10 @@
 #define PIC_I2C_1_IRQ			31
 #define PIC_I2C_2_IRQ			32
 #define PIC_I2C_3_IRQ			33
+#define PIC_SPI_IRQ			34
+#define PIC_NAND_IRQ			37
+#define PIC_SATA_IRQ			38
+#define PIC_GPIO_IRQ			39
 
 #define PIC_PCIE_LINK_MSI_IRQ_BASE	44	/* 44 - 47 MSI IRQ */
 #define PIC_PCIE_LINK_MSI_IRQ(i)	(44 + (i))
@@ -66,8 +70,9 @@
 #define PIC_PCIE_MSIX_IRQ_BASE		48	/* 48 - 51 MSI-X IRQ */
 #define PIC_PCIE_MSIX_IRQ(i)		(48 + (i))
 
-#define NLM_MSIX_VEC_BASE		96	/* 96 - 127 - MSIX mapped */
-#define NLM_MSI_VEC_BASE		128	/* 128 -255 - MSI mapped */
+/* XLP9xx and XLP8xx has 128 and 32 MSIX vectors respectively */
+#define NLM_MSIX_VEC_BASE		96	/* 96 - 223 - MSIX mapped */
+#define NLM_MSI_VEC_BASE		224	/* 224 -351 - MSI mapped */
 
 #define NLM_PIC_INDIRECT_VEC_BASE	512
 #define NLM_GPIO_VEC_BASE		768
@@ -95,17 +100,19 @@
 
 static inline int cpu_is_xlpii(void)
 {
-	int chip = read_c0_prid() & 0xff00;
+	int chip = read_c0_prid() & PRID_IMP_MASK;
 
 	return chip == PRID_IMP_NETLOGIC_XLP2XX ||
-		chip == PRID_IMP_NETLOGIC_XLP9XX;
+		chip == PRID_IMP_NETLOGIC_XLP9XX ||
+		chip == PRID_IMP_NETLOGIC_XLP5XX;
 }
 
 static inline int cpu_is_xlp9xx(void)
 {
-	int chip = read_c0_prid() & 0xff00;
+	int chip = read_c0_prid() & PRID_IMP_MASK;
 
-	return chip == PRID_IMP_NETLOGIC_XLP9XX;
+	return chip == PRID_IMP_NETLOGIC_XLP9XX ||
+		chip == PRID_IMP_NETLOGIC_XLP5XX;
 }
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_NLM_XLP_H */

diff --git a/arch/mips/include/asm/nile4.h b/arch/mips/include/asm/nile4.h
index 2e2436d..99e97f8 100644
--- a/arch/mips/include/asm/nile4.h
+++ b/arch/mips/include/asm/nile4.h

@@ -1,7 +1,7 @@
 /*
  *  asm-mips/nile4.h -- NEC Vrc-5074 Nile 4 definitions
  *
- *  Copyright (C) 2000 Geert Uytterhoeven <geert@sonycom.com>
+ *  Copyright (C) 2000 Geert Uytterhoeven <geert@linux-m68k.org>
  *		       Sony Software Development Center Europe (SDCE), Brussels
  *
  *  This file is based on the following documentation:

diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index f5d77b9..d781f9e 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h

@@ -211,7 +211,6 @@
 
 extern void octeon_write_lcd(const char *s);
 extern void octeon_check_cpu_bist(void);
-extern int octeon_get_boot_debug_flag(void);
 extern int octeon_get_boot_uart(void);
 
 struct uart_port;

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 008324d..539ddd1 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h

@@ -32,6 +32,8 @@
 				 _page_cachable_default)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _page_cachable_default)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
+				 _PAGE_GLOBAL | _CACHE_CACHABLE_NONCOHERENT)
 #define PAGE_USERIO	__pgprot(_PAGE_PRESENT | (cpu_has_rixi ? 0 : _PAGE_READ) | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_KERNEL_UNCACHED __pgprot(_PAGE_PRESENT | __READABLE | \

diff --git a/arch/mips/include/asm/pm-cps.h b/arch/mips/include/asm/pm-cps.h
new file mode 100644
index 0000000..625eda5
--- /dev/null
+++ b/arch/mips/include/asm/pm-cps.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MIPS_ASM_PM_CPS_H__
+#define __MIPS_ASM_PM_CPS_H__
+
+/*
+ * The CM & CPC can only handle coherence & power control on a per-core basis,
+ * thus in an MT system the VPEs within each core are coupled and can only
+ * enter or exit states requiring CM or CPC assistance in unison.
+ */
+#ifdef CONFIG_MIPS_MT
+# define coupled_coherence cpu_has_mipsmt
+#else
+# define coupled_coherence 0
+#endif
+
+/* Enumeration of possible PM states */
+enum cps_pm_state {
+	CPS_PM_NC_WAIT,		/* MIPS wait instruction, non-coherent */
+	CPS_PM_CLOCK_GATED,	/* Core clock gated */
+	CPS_PM_POWER_GATED,	/* Core power gated */
+	CPS_PM_STATE_COUNT,
+};
+
+/**
+ * cps_pm_support_state - determine whether the system supports a PM state
+ * @state: the state to test for support
+ *
+ * Returns true if the system supports the given state, otherwise false.
+ */
+extern bool cps_pm_support_state(enum cps_pm_state state);
+
+/**
+ * cps_pm_enter_state - enter a PM state
+ * @state: the state to enter
+ *
+ * Enter the given PM state. If coupled_coherence is non-zero then it is
+ * expected that this function be called at approximately the same time on
+ * each coupled CPU. Returns 0 on successful entry & exit, otherwise -errno.
+ */
+extern int cps_pm_enter_state(enum cps_pm_state state);
+
+#endif /* __MIPS_ASM_PM_CPS_H__ */

diff --git a/arch/mips/include/asm/pm.h b/arch/mips/include/asm/pm.h
new file mode 100644
index 0000000..7c03469
--- /dev/null
+++ b/arch/mips/include/asm/pm.h

@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies Ltd
+ *
+ * This program is free software; you can redistribute	it and/or modify it
+ * under  the terms of	the GNU General	 Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * PM helper macros for CPU power off (e.g. Suspend-to-RAM).
+ */
+
+#ifndef __ASM_PM_H
+#define __ASM_PM_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm-offsets.h>
+#include <asm/asm.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+/* Save CPU state to stack for suspend to RAM */
+.macro SUSPEND_SAVE_REGS
+	subu	sp, PT_SIZE
+	/* Call preserved GPRs */
+	LONG_S	$16, PT_R16(sp)
+	LONG_S	$17, PT_R17(sp)
+	LONG_S	$18, PT_R18(sp)
+	LONG_S	$19, PT_R19(sp)
+	LONG_S	$20, PT_R20(sp)
+	LONG_S	$21, PT_R21(sp)
+	LONG_S	$22, PT_R22(sp)
+	LONG_S	$23, PT_R23(sp)
+	LONG_S	$28, PT_R28(sp)
+	LONG_S	$30, PT_R30(sp)
+	LONG_S	$31, PT_R31(sp)
+	/* A couple of CP0 registers with space in pt_regs */
+	mfc0	k0, CP0_STATUS
+	LONG_S	k0, PT_STATUS(sp)
+.endm
+
+/* Restore CPU state from stack after resume from RAM */
+.macro RESUME_RESTORE_REGS_RETURN
+	.set	push
+	.set	noreorder
+	/* A couple of CP0 registers with space in pt_regs */
+	LONG_L	k0, PT_STATUS(sp)
+	mtc0	k0, CP0_STATUS
+	/* Call preserved GPRs */
+	LONG_L	$16, PT_R16(sp)
+	LONG_L	$17, PT_R17(sp)
+	LONG_L	$18, PT_R18(sp)
+	LONG_L	$19, PT_R19(sp)
+	LONG_L	$20, PT_R20(sp)
+	LONG_L	$21, PT_R21(sp)
+	LONG_L	$22, PT_R22(sp)
+	LONG_L	$23, PT_R23(sp)
+	LONG_L	$28, PT_R28(sp)
+	LONG_L	$30, PT_R30(sp)
+	LONG_L	$31, PT_R31(sp)
+	/* Pop and return */
+	jr	ra
+	 addiu	sp, PT_SIZE
+	.set	pop
+.endm
+
+/* Get address of static suspend state into t1 */
+.macro LA_STATIC_SUSPEND
+	la	t1, mips_static_suspend_state
+.endm
+
+/* Save important CPU state for early restoration to global data */
+.macro SUSPEND_SAVE_STATIC
+#ifdef CONFIG_EVA
+	/*
+	 * Segment configuration is saved in global data where it can be easily
+	 * reloaded without depending on the segment configuration.
+	 */
+	mfc0	k0, CP0_PAGEMASK, 2	/* SegCtl0 */
+	LONG_S	k0, SSS_SEGCTL0(t1)
+	mfc0	k0, CP0_PAGEMASK, 3	/* SegCtl1 */
+	LONG_S	k0, SSS_SEGCTL1(t1)
+	mfc0	k0, CP0_PAGEMASK, 4	/* SegCtl2 */
+	LONG_S	k0, SSS_SEGCTL2(t1)
+#endif
+	/* save stack pointer (pointing to GPRs) */
+	LONG_S	sp, SSS_SP(t1)
+.endm
+
+/* Restore important CPU state early from global data */
+.macro RESUME_RESTORE_STATIC
+#ifdef CONFIG_EVA
+	/*
+	 * Segment configuration must be restored prior to any access to
+	 * allocated memory, as it may reside outside of the legacy kernel
+	 * segments.
+	 */
+	LONG_L	k0, SSS_SEGCTL0(t1)
+	mtc0	k0, CP0_PAGEMASK, 2	/* SegCtl0 */
+	LONG_L	k0, SSS_SEGCTL1(t1)
+	mtc0	k0, CP0_PAGEMASK, 3	/* SegCtl1 */
+	LONG_L	k0, SSS_SEGCTL2(t1)
+	mtc0	k0, CP0_PAGEMASK, 4	/* SegCtl2 */
+	tlbw_use_hazard
+#endif
+	/* restore stack pointer (pointing to GPRs) */
+	LONG_L	sp, SSS_SP(t1)
+.endm
+
+/* flush caches to make sure context has reached memory */
+.macro SUSPEND_CACHE_FLUSH
+	.extern	__wback_cache_all
+	.set	push
+	.set	noreorder
+	la	t1, __wback_cache_all
+	LONG_L	t0, 0(t1)
+	jalr	t0
+	 nop
+	.set	pop
+ .endm
+
+/* Save suspend state and flush data caches to RAM */
+.macro SUSPEND_SAVE
+	SUSPEND_SAVE_REGS
+	LA_STATIC_SUSPEND
+	SUSPEND_SAVE_STATIC
+	SUSPEND_CACHE_FLUSH
+.endm
+
+/* Restore saved state after resume from RAM and return */
+.macro RESUME_RESTORE_RETURN
+	LA_STATIC_SUSPEND
+	RESUME_RESTORE_STATIC
+	RESUME_RESTORE_REGS_RETURN
+.endm
+
+#else /* __ASSEMBLY__ */
+
+/**
+ * struct mips_static_suspend_state - Core saved CPU state across S2R.
+ * @segctl:	CP0 Segment control registers.
+ * @sp:		Stack frame where GP register context is saved.
+ *
+ * This structure contains minimal CPU state that must be saved in static kernel
+ * data in order to be able to restore the rest of the state. This includes
+ * segmentation configuration in the case of EVA being enabled, as they must be
+ * restored prior to any kmalloc'd memory being referenced (even the stack
+ * pointer).
+ */
+struct mips_static_suspend_state {
+#ifdef CONFIG_EVA
+	unsigned long segctl[3];
+#endif
+	unsigned long sp;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_PM_HELPERS_H */

diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index bf1ac8d3..7e6e682 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h

@@ -39,9 +39,6 @@
 	unsigned long cp0_badvaddr;
 	unsigned long cp0_cause;
 	unsigned long cp0_epc;
-#ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long cp0_tcstatus;
-#endif /* CONFIG_MIPS_MT_SMTC */
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 	unsigned long long mpl[3];	  /* MTM{0,1,2} */
 	unsigned long long mtp[3];	  /* MTP{0,1,2} */

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index ca64cbe..0b8bd28 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h

@@ -43,11 +43,10 @@
 	: "i" (op), "R" (*(unsigned char *)(addr)))
 
 #ifdef CONFIG_MIPS_MT
-/*
- * Temporary hacks for SMTC debug. Optionally force single-threaded
- * execution during I-cache flushes.
- */
 
+/*
+ * Optionally force single-threaded execution during I-cache flushes.
+ */
 #define PROTECT_CACHE_FLUSHES 1
 
 #ifdef PROTECT_CACHE_FLUSHES
@@ -524,6 +523,8 @@
 __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 64, )
 __BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64, )
 __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64, )
+__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 128, )
+__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 128, )
 __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128, )
 
 __BUILD_BLAST_CACHE(inv_d, dcache, Index_Writeback_Inv_D, Hit_Invalidate_D, 16, )

diff --git a/arch/mips/include/asm/sgi/ip22.h b/arch/mips/include/asm/sgi/ip22.h
index 8db1a35..87ec9ea 100644
--- a/arch/mips/include/asm/sgi/ip22.h
+++ b/arch/mips/include/asm/sgi/ip22.h

@@ -69,6 +69,8 @@
 #define SGI_EISA_IRQ	SGINT_LOCAL2 + 3	/* EISA interrupts */
 #define SGI_KEYBD_IRQ	SGINT_LOCAL2 + 4	/* keyboard */
 #define SGI_SERIAL_IRQ	SGINT_LOCAL2 + 5	/* onboard serial */
+#define SGI_GIOEXP0_IRQ	(SGINT_LOCAL2 + 6)	/* Indy GIO EXP0 */
+#define SGI_GIOEXP1_IRQ	(SGINT_LOCAL2 + 7)	/* Indy GIO EXP1 */
 
 #define ip22_is_fullhouse()	(sgioc->sysid & SGIOC_SYSID_FULLHOUSE)
 

diff --git a/arch/mips/include/asm/smp-cps.h b/arch/mips/include/asm/smp-cps.h
index d60d1a2..a06a08a 100644
--- a/arch/mips/include/asm/smp-cps.h
+++ b/arch/mips/include/asm/smp-cps.h

@@ -13,17 +13,28 @@
 
 #ifndef __ASSEMBLY__
 
-struct boot_config {
-	unsigned int core;
-	unsigned int vpe;
+struct vpe_boot_config {
 	unsigned long pc;
 	unsigned long sp;
 	unsigned long gp;
 };
 
-extern struct boot_config mips_cps_bootcfg;
+struct core_boot_config {
+	atomic_t vpe_mask;
+	struct vpe_boot_config *vpe_config;
+};
+
+extern struct core_boot_config *mips_cps_core_bootcfg;
 
 extern void mips_cps_core_entry(void);
+extern void mips_cps_core_init(void);
+
+extern struct vpe_boot_config *mips_cps_boot_vpes(void);
+
+extern bool mips_cps_smp_in_use(void);
+
+extern void mips_cps_pm_save(void);
+extern void mips_cps_pm_restore(void);
 
 #else /* __ASSEMBLY__ */
 

diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 73d35b1..6ba1fb8 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h

@@ -26,7 +26,6 @@
 	void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
 	void (*init_secondary)(void);
 	void (*smp_finish)(void);
-	void (*cpus_done)(void);
 	void (*boot_secondary)(int cpu, struct task_struct *idle);
 	void (*smp_setup)(void);
 	void (*prepare_cpus)(unsigned int max_cpus);

diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index efa02ac..b037334 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h

@@ -46,6 +46,9 @@
 
 extern volatile cpumask_t cpu_callin_map;
 
+/* Mask of CPUs which are currently definitely operating coherently */
+extern cpumask_t cpu_coherent_mask;
+
 extern void asmlinkage smp_bootstrap(void);
 
 /*

diff --git a/arch/mips/include/asm/smtc.h b/arch/mips/include/asm/smtc.h
deleted file mode 100644
index e56b439..0000000
--- a/arch/mips/include/asm/smtc.h
+++ /dev/null

@@ -1,78 +0,0 @@
-#ifndef _ASM_SMTC_MT_H
-#define _ASM_SMTC_MT_H
-
-/*
- * Definitions for SMTC multitasking on MIPS MT cores
- */
-
-#include <asm/mips_mt.h>
-#include <asm/smtc_ipi.h>
-
-/*
- * System-wide SMTC status information
- */
-
-extern unsigned int smtc_status;
-
-#define SMTC_TLB_SHARED 0x00000001
-#define SMTC_MTC_ACTIVE 0x00000002
-
-/*
- * TLB/ASID Management information
- */
-
-#define MAX_SMTC_TLBS 2
-#define MAX_SMTC_ASIDS 256
-#if NR_CPUS <= 8
-typedef char asiduse;
-#else
-#if NR_CPUS <= 16
-typedef short asiduse;
-#else
-typedef long asiduse;
-#endif
-#endif
-
-/*
- * VPE Management information
- */
-
-#define MAX_SMTC_VPES	MAX_SMTC_TLBS	/* FIXME: May not always be true. */
-
-extern asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
-
-struct mm_struct;
-struct task_struct;
-
-void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu);
-void self_ipi(struct smtc_ipi *);
-void smtc_flush_tlb_asid(unsigned long asid);
-extern int smtc_build_cpu_map(int startslot);
-extern void smtc_prepare_cpus(int cpus);
-extern void smtc_smp_finish(void);
-extern void smtc_boot_secondary(int cpu, struct task_struct *t);
-extern void smtc_cpus_done(void);
-extern void smtc_init_secondary(void);
-
-
-/*
- * Sharing the TLB between multiple VPEs means that the
- * "random" index selection function is not allowed to
- * select the current value of the Index register. To
- * avoid additional TLB pressure, the Index registers
- * are "parked" with an non-Valid value.
- */
-
-#define PARKED_INDEX	((unsigned int)0x80000000)
-
-/*
- * Define low-level interrupt mask for IPIs, if necessary.
- * By default, use SW interrupt 1, which requires no external
- * hardware support, but which works only for single-core
- * MIPS MT systems.
- */
-#ifndef MIPS_CPU_IPI_IRQ
-#define MIPS_CPU_IPI_IRQ 1
-#endif
-
-#endif /*  _ASM_SMTC_MT_H */

diff --git a/arch/mips/include/asm/smtc_ipi.h b/arch/mips/include/asm/smtc_ipi.h
deleted file mode 100644
index 15278db..0000000
--- a/arch/mips/include/asm/smtc_ipi.h
+++ /dev/null

@@ -1,129 +0,0 @@
-/*
- * Definitions used in MIPS MT SMTC "Interprocessor Interrupt" code.
- */
-#ifndef __ASM_SMTC_IPI_H
-#define __ASM_SMTC_IPI_H
-
-#include <linux/spinlock.h>
-
-//#define SMTC_IPI_DEBUG
-
-#ifdef SMTC_IPI_DEBUG
-#include <asm/mipsregs.h>
-#include <asm/mipsmtregs.h>
-#endif /* SMTC_IPI_DEBUG */
-
-/*
- * An IPI "message"
- */
-
-struct smtc_ipi {
-	struct smtc_ipi *flink;
-	int type;
-	void *arg;
-	int dest;
-#ifdef	SMTC_IPI_DEBUG
-	int sender;
-	long stamp;
-#endif /* SMTC_IPI_DEBUG */
-};
-
-/*
- * Defined IPI Types
- */
-
-#define LINUX_SMP_IPI 1
-#define SMTC_CLOCK_TICK 2
-#define IRQ_AFFINITY_IPI 3
-
-/*
- * A queue of IPI messages
- */
-
-struct smtc_ipi_q {
-	struct smtc_ipi *head;
-	spinlock_t lock;
-	struct smtc_ipi *tail;
-	int depth;
-	int resched_flag;	/* reschedule already queued */
-};
-
-static inline void smtc_ipi_nq(struct smtc_ipi_q *q, struct smtc_ipi *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	if (q->head == NULL)
-		q->head = q->tail = p;
-	else
-		q->tail->flink = p;
-	p->flink = NULL;
-	q->tail = p;
-	q->depth++;
-#ifdef	SMTC_IPI_DEBUG
-	p->sender = read_c0_tcbind();
-	p->stamp = read_c0_count();
-#endif /* SMTC_IPI_DEBUG */
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-static inline struct smtc_ipi *__smtc_ipi_dq(struct smtc_ipi_q *q)
-{
-	struct smtc_ipi *p;
-
-	if (q->head == NULL)
-		p = NULL;
-	else {
-		p = q->head;
-		q->head = q->head->flink;
-		q->depth--;
-		/* Arguably unnecessary, but leaves queue cleaner */
-		if (q->head == NULL)
-			q->tail = NULL;
-	}
-
-	return p;
-}
-
-static inline struct smtc_ipi *smtc_ipi_dq(struct smtc_ipi_q *q)
-{
-	unsigned long flags;
-	struct smtc_ipi *p;
-
-	spin_lock_irqsave(&q->lock, flags);
-	p = __smtc_ipi_dq(q);
-	spin_unlock_irqrestore(&q->lock, flags);
-
-	return p;
-}
-
-static inline void smtc_ipi_req(struct smtc_ipi_q *q, struct smtc_ipi *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	if (q->head == NULL) {
-		q->head = q->tail = p;
-		p->flink = NULL;
-	} else {
-		p->flink = q->head;
-		q->head = p;
-	}
-	q->depth++;
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-static inline int smtc_ipi_qdepth(struct smtc_ipi_q *q)
-{
-	unsigned long flags;
-	int retval;
-
-	spin_lock_irqsave(&q->lock, flags);
-	retval = q->depth;
-	spin_unlock_irqrestore(&q->lock, flags);
-	return retval;
-}
-
-extern void smtc_send_ipi(int cpu, int type, unsigned int action);
-
-#endif /* __ASM_SMTC_IPI_H */

diff --git a/arch/mips/include/asm/smtc_proc.h b/arch/mips/include/asm/smtc_proc.h
deleted file mode 100644
index 25da651..0000000
--- a/arch/mips/include/asm/smtc_proc.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/*
- * Definitions for SMTC /proc entries
- * Copyright(C) 2005 MIPS Technologies Inc.
- */
-#ifndef __ASM_SMTC_PROC_H
-#define __ASM_SMTC_PROC_H
-
-/*
- * per-"CPU" statistics
- */
-
-struct smtc_cpu_proc {
-	unsigned long timerints;
-	unsigned long selfipis;
-};
-
-extern struct smtc_cpu_proc smtc_cpu_stats[NR_CPUS];
-
-/* Count of number of recoveries of "stolen" FPU access rights on 34K */
-
-extern atomic_t smtc_fpu_recoveries;
-
-#endif /* __ASM_SMTC_PROC_H */

diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index d301e10..b188c79 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h

@@ -19,22 +19,12 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
-/*
- * For SMTC kernel, global IE should be left set, and interrupts
- * controlled exclusively via IXMT.
- */
-#ifdef CONFIG_MIPS_MT_SMTC
-#define STATMASK 0x1e
-#elif defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 #define STATMASK 0x3f
 #else
 #define STATMASK 0x1f
 #endif
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#include <asm/mipsmtregs.h>
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 		.macro	SAVE_AT
 		.set	push
 		.set	noat
@@ -186,16 +176,6 @@
 		mfc0	v1, CP0_STATUS
 		LONG_S	$2, PT_R2(sp)
 		LONG_S	v1, PT_STATUS(sp)
-#ifdef CONFIG_MIPS_MT_SMTC
-		/*
-		 * Ideally, these instructions would be shuffled in
-		 * to cover the pipeline delay.
-		 */
-		.set	mips32
-		mfc0	k0, CP0_TCSTATUS
-		.set	mips0
-		LONG_S	k0, PT_TCSTATUS(sp)
-#endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_S	$4, PT_R4(sp)
 		mfc0	v1, CP0_CAUSE
 		LONG_S	$5, PT_R5(sp)
@@ -321,36 +301,6 @@
 		.set	push
 		.set	reorder
 		.set	noat
-#ifdef CONFIG_MIPS_MT_SMTC
-		.set	mips32r2
-		/*
-		 * We need to make sure the read-modify-write
-		 * of Status below isn't perturbed by an interrupt
-		 * or cross-TC access, so we need to do at least a DMT,
-		 * protected by an interrupt-inhibit. But setting IXMT
-		 * also creates a few-cycle window where an IPI could
-		 * be queued and not be detected before potentially
-		 * returning to a WAIT or user-mode loop. It must be
-		 * replayed.
-		 *
-		 * We're in the middle of a context switch, and
-		 * we can't dispatch it directly without trashing
-		 * some registers, so we'll try to detect this unlikely
-		 * case and program a software interrupt in the VPE,
-		 * as would be done for a cross-VPE IPI.  To accommodate
-		 * the handling of that case, we're doing a DVPE instead
-		 * of just a DMT here to protect against other threads.
-		 * This is a lot of cruft to cover a tiny window.
-		 * If you can find a better design, implement it!
-		 *
-		 */
-		mfc0	v0, CP0_TCSTATUS
-		ori	v0, TCSTATUS_IXMT
-		mtc0	v0, CP0_TCSTATUS
-		_ehb
-		DVPE	5				# dvpe a1
-		jal	mips_ihb
-#endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	a0, CP0_STATUS
 		ori	a0, STATMASK
 		xori	a0, STATMASK
@@ -362,59 +312,6 @@
 		and	v0, v1
 		or	v0, a0
 		mtc0	v0, CP0_STATUS
-#ifdef CONFIG_MIPS_MT_SMTC
-/*
- * Only after EXL/ERL have been restored to status can we
- * restore TCStatus.IXMT.
- */
-		LONG_L	v1, PT_TCSTATUS(sp)
-		_ehb
-		mfc0	a0, CP0_TCSTATUS
-		andi	v1, TCSTATUS_IXMT
-		bnez	v1, 0f
-
-/*
- * We'd like to detect any IPIs queued in the tiny window
- * above and request an software interrupt to service them
- * when we ERET.
- *
- * Computing the offset into the IPIQ array of the executing
- * TC's IPI queue in-line would be tedious.  We use part of
- * the TCContext register to hold 16 bits of offset that we
- * can add in-line to find the queue head.
- */
-		mfc0	v0, CP0_TCCONTEXT
-		la	a2, IPIQ
-		srl	v0, v0, 16
-		addu	a2, a2, v0
-		LONG_L	v0, 0(a2)
-		beqz	v0, 0f
-/*
- * If we have a queue, provoke dispatch within the VPE by setting C_SW1
- */
-		mfc0	v0, CP0_CAUSE
-		ori	v0, v0, C_SW1
-		mtc0	v0, CP0_CAUSE
-0:
-		/*
-		 * This test should really never branch but
-		 * let's be prudent here.  Having atomized
-		 * the shared register modifications, we can
-		 * now EVPE, and must do so before interrupts
-		 * are potentially re-enabled.
-		 */
-		andi	a1, a1, MVPCONTROL_EVP
-		beqz	a1, 1f
-		evpe
-1:
-		/* We know that TCStatua.IXMT should be set from above */
-		xori	a0, a0, TCSTATUS_IXMT
-		or	a0, a0, v1
-		mtc0	a0, CP0_TCSTATUS
-		_ehb
-
-		.set	mips0
-#endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_L	v1, PT_EPC(sp)
 		MTC0	v1, CP0_EPC
 		LONG_L	$31, PT_R31(sp)
@@ -467,33 +364,11 @@
  * Set cp0 enable bit as sign that we're running on the kernel stack
  */
 		.macro	CLI
-#if !defined(CONFIG_MIPS_MT_SMTC)
 		mfc0	t0, CP0_STATUS
 		li	t1, ST0_CU0 | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK
 		mtc0	t0, CP0_STATUS
-#else /* CONFIG_MIPS_MT_SMTC */
-		/*
-		 * For SMTC, we need to set privilege
-		 * and disable interrupts only for the
-		 * current TC, using the TCStatus register.
-		 */
-		mfc0	t0, CP0_TCSTATUS
-		/* Fortunately CU 0 is in the same place in both registers */
-		/* Set TCU0, TMX, TKSU (for later inversion) and IXMT */
-		li	t1, ST0_CU0 | 0x08001c00
-		or	t0, t1
-		/* Clear TKSU, leave IXMT */
-		xori	t0, 0x00001800
-		mtc0	t0, CP0_TCSTATUS
-		_ehb
-		/* We need to leave the global IE bit set, but clear EXL...*/
-		mfc0	t0, CP0_STATUS
-		ori	t0, ST0_EXL | ST0_ERL
-		xori	t0, ST0_EXL | ST0_ERL
-		mtc0	t0, CP0_STATUS
-#endif /* CONFIG_MIPS_MT_SMTC */
 		irq_disable_hazard
 		.endm
 
@@ -502,35 +377,11 @@
  * Set cp0 enable bit as sign that we're running on the kernel stack
  */
 		.macro	STI
-#if !defined(CONFIG_MIPS_MT_SMTC)
 		mfc0	t0, CP0_STATUS
 		li	t1, ST0_CU0 | STATMASK
 		or	t0, t1
 		xori	t0, STATMASK & ~1
 		mtc0	t0, CP0_STATUS
-#else /* CONFIG_MIPS_MT_SMTC */
-		/*
-		 * For SMTC, we need to set privilege
-		 * and enable interrupts only for the
-		 * current TC, using the TCStatus register.
-		 */
-		_ehb
-		mfc0	t0, CP0_TCSTATUS
-		/* Fortunately CU 0 is in the same place in both registers */
-		/* Set TCU0, TKSU (for later inversion) and IXMT */
-		li	t1, ST0_CU0 | 0x08001c00
-		or	t0, t1
-		/* Clear TKSU *and* IXMT */
-		xori	t0, 0x00001c00
-		mtc0	t0, CP0_TCSTATUS
-		_ehb
-		/* We need to leave the global IE bit set, but clear EXL...*/
-		mfc0	t0, CP0_STATUS
-		ori	t0, ST0_EXL
-		xori	t0, ST0_EXL
-		mtc0	t0, CP0_STATUS
-		/* irq_enable_hazard below should expand to EHB for 24K/34K cpus */
-#endif /* CONFIG_MIPS_MT_SMTC */
 		irq_enable_hazard
 		.endm
 
@@ -540,32 +391,6 @@
  * Set cp0 enable bit as sign that we're running on the kernel stack
  */
 		.macro	KMODE
-#ifdef CONFIG_MIPS_MT_SMTC
-		/*
-		 * This gets baroque in SMTC.  We want to
-		 * protect the non-atomic clearing of EXL
-		 * with DMT/EMT, but we don't want to take
-		 * an interrupt while DMT is still in effect.
-		 */
-
-		/* KMODE gets invoked from both reorder and noreorder code */
-		.set	push
-		.set	mips32r2
-		.set	noreorder
-		mfc0	v0, CP0_TCSTATUS
-		andi	v1, v0, TCSTATUS_IXMT
-		ori	v0, TCSTATUS_IXMT
-		mtc0	v0, CP0_TCSTATUS
-		_ehb
-		DMT	2				# dmt	v0
-		/*
-		 * We don't know a priori if ra is "live"
-		 */
-		move	t0, ra
-		jal	mips_ihb
-		nop	/* delay slot */
-		move	ra, t0
-#endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	t0, CP0_STATUS
 		li	t1, ST0_CU0 | (STATMASK & ~1)
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
@@ -576,25 +401,6 @@
 		or	t0, t1
 		xori	t0, STATMASK & ~1
 		mtc0	t0, CP0_STATUS
-#ifdef CONFIG_MIPS_MT_SMTC
-		_ehb
-		andi	v0, v0, VPECONTROL_TE
-		beqz	v0, 2f
-		nop	/* delay slot */
-		emt
-2:
-		mfc0	v0, CP0_TCSTATUS
-		/* Clear IXMT, then OR in previous value */
-		ori	v0, TCSTATUS_IXMT
-		xori	v0, TCSTATUS_IXMT
-		or	v0, v1, v0
-		mtc0	v0, CP0_TCSTATUS
-		/*
-		 * irq_disable_hazard below should expand to EHB
-		 * on 24K/34K CPUS
-		 */
-		.set pop
-#endif /* CONFIG_MIPS_MT_SMTC */
 		irq_disable_hazard
 		.endm
 

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d2d961d..7de8658 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h

@@ -159,11 +159,7 @@
  * We stash processor id into a COP0 register to retrieve it fast
  * at kernel exception entry.
  */
-#if defined(CONFIG_MIPS_MT_SMTC)
-#define SMP_CPUID_REG		2, 2	/* TCBIND */
-#define ASM_SMP_CPUID_REG	$2, 2
-#define SMP_CPUID_PTRSHIFT	19
-#elif defined(CONFIG_MIPS_PGD_C0_CONTEXT)
+#if   defined(CONFIG_MIPS_PGD_C0_CONTEXT)
 #define SMP_CPUID_REG		20, 0	/* XCONTEXT */
 #define ASM_SMP_CPUID_REG	$20
 #define SMP_CPUID_PTRSHIFT	48
@@ -179,13 +175,8 @@
 #define SMP_CPUID_REGSHIFT	(SMP_CPUID_PTRSHIFT + 2)
 #endif
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define ASM_CPUID_MFC0		mfc0
-#define UASM_i_CPUID_MFC0	uasm_i_mfc0
-#else
 #define ASM_CPUID_MFC0		MFC0
 #define UASM_i_CPUID_MFC0	UASM_i_MFC0
-#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */

diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index 24f534a..8f3047d 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h

@@ -52,14 +52,11 @@
  */
 extern unsigned int __weak get_c0_compare_int(void);
 extern int r4k_clockevent_init(void);
-extern int smtc_clockevent_init(void);
 extern int gic_clockevent_init(void);
 
 static inline int mips_clockevent_init(void)
 {
-#ifdef CONFIG_MIPS_MT_SMTC
-	return smtc_clockevent_init();
-#elif defined(CONFIG_CEVT_GIC)
+#if   defined(CONFIG_CEVT_GIC)
 	return (gic_clockevent_init() | r4k_clockevent_init());
 #elif defined(CONFIG_CEVT_R4K)
 	return r4k_clockevent_init();

diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index c542475..b05bb70 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h

@@ -4,12 +4,16 @@
  * for more details.
  *
  * Copyright (C) 1998, 1999, 2003 by Ralf Baechle
+ * Copyright (C) 2014 by Maciej W. Rozycki
  */
 #ifndef _ASM_TIMEX_H
 #define _ASM_TIMEX_H
 
 #ifdef __KERNEL__
 
+#include <linux/compiler.h>
+
+#include <asm/cpu.h>
 #include <asm/cpu-features.h>
 #include <asm/mipsregs.h>
 #include <asm/cpu-type.h>
@@ -45,30 +49,55 @@
  * However for now the implementaton of this function doesn't get these
  * fine details right.
  */
+static inline int can_use_mips_counter(unsigned int prid)
+{
+	int comp = (prid & PRID_COMP_MASK) != PRID_COMP_LEGACY;
+
+	if (__builtin_constant_p(cpu_has_counter) && !cpu_has_counter)
+		return 0;
+	else if (__builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r)
+		return 1;
+	else if (likely(!__builtin_constant_p(cpu_has_mips_r) && comp))
+		return 1;
+	/* Make sure we don't peek at cpu_data[0].options in the fast path! */
+	if (!__builtin_constant_p(cpu_has_counter))
+		asm volatile("" : "=m" (cpu_data[0].options));
+	if (likely(cpu_has_counter &&
+		   prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0))))
+		return 1;
+	else
+		return 0;
+}
+
 static inline cycles_t get_cycles(void)
 {
-	switch (boot_cpu_type()) {
-	case CPU_R4400PC:
-	case CPU_R4400SC:
-	case CPU_R4400MC:
-		if ((read_c0_prid() & 0xff) >= 0x0050)
-			return read_c0_count();
-		break;
-
-        case CPU_R4000PC:
-        case CPU_R4000SC:
-        case CPU_R4000MC:
-		break;
-
-	default:
-		if (cpu_has_counter)
-			return read_c0_count();
-		break;
-	}
-
-	return 0;	/* no usable counter */
+	if (can_use_mips_counter(read_c0_prid()))
+		return read_c0_count();
+	else
+		return 0;	/* no usable counter */
 }
 
+/*
+ * Like get_cycles - but where c0_count is not available we desperately
+ * use c0_random in an attempt to get at least a little bit of entropy.
+ *
+ * R6000 and R6000A neither have a count register nor a random register.
+ * That leaves no entropy source in the CPU itself.
+ */
+static inline unsigned long random_get_entropy(void)
+{
+	unsigned int prid = read_c0_prid();
+	unsigned int imp = prid & PRID_IMP_MASK;
+
+	if (can_use_mips_counter(prid))
+		return read_c0_count();
+	else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
+		return read_c0_random();
+	else
+		return 0;	/* no usable register */
+}
+#define random_get_entropy random_get_entropy
+
 #endif /* __KERNEL__ */
 
 #endif /*  _ASM_TIMEX_H */

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index c33a956..f8d63b3 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h

@@ -55,6 +55,9 @@
 #define Ip_u2u1u3(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
+#define Ip_u3u2u1(op)							\
+void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+
 #define Ip_u3u1u2(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
@@ -74,6 +77,9 @@
 #define Ip_u1u2(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
 
+#define Ip_u2u1(op)							\
+void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
+
 #define Ip_u1s2(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, signed int b)
 
@@ -99,6 +105,7 @@
 Ip_u3u1u2(_daddu);
 Ip_u2u1msbu3(_dins);
 Ip_u2u1msbu3(_dinsm);
+Ip_u1u2(_divu);
 Ip_u1u2u3(_dmfc0);
 Ip_u1u2u3(_dmtc0);
 Ip_u2u1u3(_drotr);
@@ -114,16 +121,22 @@
 Ip_u2u1msbu3(_ins);
 Ip_u1(_j);
 Ip_u1(_jal);
+Ip_u2u1(_jalr);
 Ip_u1(_jr);
+Ip_u2s3u1(_lb);
 Ip_u2s3u1(_ld);
 Ip_u3u1u2(_ldx);
+Ip_u2s3u1(_lh);
 Ip_u2s3u1(_ll);
 Ip_u2s3u1(_lld);
 Ip_u1s2(_lui);
 Ip_u2s3u1(_lw);
 Ip_u3u1u2(_lwx);
 Ip_u1u2u3(_mfc0);
+Ip_u1(_mfhi);
+Ip_u1(_mflo);
 Ip_u1u2u3(_mtc0);
+Ip_u3u1u2(_mul);
 Ip_u3u1u2(_or);
 Ip_u2u1u3(_ori);
 Ip_u2s3u1(_pref);
@@ -133,17 +146,25 @@
 Ip_u2s3u1(_scd);
 Ip_u2s3u1(_sd);
 Ip_u2u1u3(_sll);
+Ip_u3u2u1(_sllv);
+Ip_u2u1s3(_sltiu);
+Ip_u3u1u2(_sltu);
 Ip_u2u1u3(_sra);
 Ip_u2u1u3(_srl);
+Ip_u3u2u1(_srlv);
 Ip_u3u1u2(_subu);
 Ip_u2s3u1(_sw);
+Ip_u1(_sync);
 Ip_u1(_syscall);
 Ip_0(_tlbp);
 Ip_0(_tlbr);
 Ip_0(_tlbwi);
 Ip_0(_tlbwr);
+Ip_u1(_wait);
+Ip_u2u1(_wsbh);
 Ip_u3u1u2(_xor);
 Ip_u2u1u3(_xori);
+Ip_u2u1(_yield);
 
 
 /* Handle labels. */
@@ -264,6 +285,8 @@
 		   unsigned int bit, int lid);
 void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg,
 		   unsigned int bit, int lid);
+void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1,
+		 unsigned int r2, int lid);
 void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);

diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index be7196e..96fe739 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild

@@ -4,6 +4,7 @@
 generic-y += auxvec.h
 generic-y += ipcbuf.h
 
+header-y += bitfield.h
 header-y += bitsperlong.h
 header-y += break.h
 header-y += byteorder.h

diff --git a/arch/mips/include/uapi/asm/bitfield.h b/arch/mips/include/uapi/asm/bitfield.h
new file mode 100644
index 0000000..ad98613
--- /dev/null
+++ b/arch/mips/include/uapi/asm/bitfield.h

@@ -0,0 +1,29 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2014 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __UAPI_ASM_BITFIELD_H
+#define __UAPI_ASM_BITFIELD_H
+
+/*
+ *  * Damn ...  bitfields depend from byteorder :-(
+ *   */
+#ifdef __MIPSEB__
+#define __BITFIELD_FIELD(field, more)					\
+	field;								\
+	more
+
+#elif defined(__MIPSEL__)
+
+#define __BITFIELD_FIELD(field, more)					\
+	more								\
+	field;
+
+#else /* !defined (__MIPSEB__) && !defined (__MIPSEL__) */
+#error "MIPS but neither __MIPSEL__ nor __MIPSEB__?"
+#endif
+
+#endif /* __UAPI_ASM_BITFIELD_H */

diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 3125797..4b71602 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h

@@ -13,6 +13,8 @@
 #ifndef _UAPI_ASM_INST_H
 #define _UAPI_ASM_INST_H
 
+#include <asm/bitfield.h>
+
 /*
  * Major opcodes; before MIPS IV cop1x was called cop3.
  */
@@ -74,16 +76,17 @@
 enum spec3_op {
 	ext_op, dextm_op, dextu_op, dext_op,
 	ins_op, dinsm_op, dinsu_op, dins_op,
-	lx_op     = 0x0a, lwle_op   = 0x19,
-	lwre_op   = 0x1a, cachee_op = 0x1b,
-	sbe_op    = 0x1c, she_op    = 0x1d,
-	sce_op    = 0x1e, swe_op    = 0x1f,
-	bshfl_op  = 0x20, swle_op   = 0x21,
-	swre_op   = 0x22, prefe_op  = 0x23,
-	dbshfl_op = 0x24, lbue_op   = 0x28,
-	lhue_op   = 0x29, lbe_op    = 0x2c,
-	lhe_op    = 0x2d, lle_op    = 0x2e,
-	lwe_op    = 0x2f, rdhwr_op  = 0x3b
+	yield_op  = 0x09, lx_op     = 0x0a,
+	lwle_op   = 0x19, lwre_op   = 0x1a,
+	cachee_op = 0x1b, sbe_op    = 0x1c,
+	she_op    = 0x1d, sce_op    = 0x1e,
+	swe_op    = 0x1f, bshfl_op  = 0x20,
+	swle_op   = 0x21, swre_op   = 0x22,
+	prefe_op  = 0x23, dbshfl_op = 0x24,
+	lbue_op   = 0x28, lhue_op   = 0x29,
+	lbe_op    = 0x2c, lhe_op    = 0x2d,
+	lle_op    = 0x2e, lwe_op    = 0x2f,
+	rdhwr_op  = 0x3b
 };
 
 /*
@@ -125,7 +128,8 @@
 enum cop0_coi_func {
 	tlbr_op	      = 0x01, tlbwi_op	    = 0x02,
 	tlbwr_op      = 0x06, tlbp_op	    = 0x08,
-	rfe_op	      = 0x10, eret_op	    = 0x18
+	rfe_op	      = 0x10, eret_op	    = 0x18,
+	wait_op       = 0x20,
 };
 
 /*
@@ -202,6 +206,16 @@
 };
 
 /*
+ * BSHFL opcodes
+ */
+enum bshfl_func {
+	wsbh_op = 0x2,
+	dshd_op = 0x5,
+	seb_op  = 0x10,
+	seh_op  = 0x18,
+};
+
+/*
  * (microMIPS) Major opcodes.
  */
 enum mm_major_op {
@@ -244,17 +258,22 @@
 enum mm_32a_minor_op {
 	mm_sll32_op = 0x000,
 	mm_ins_op = 0x00c,
+	mm_sllv32_op = 0x010,
 	mm_ext_op = 0x02c,
 	mm_pool32axf_op = 0x03c,
 	mm_srl32_op = 0x040,
 	mm_sra_op = 0x080,
+	mm_srlv32_op = 0x090,
 	mm_rotr_op = 0x0c0,
 	mm_lwxs_op = 0x118,
 	mm_addu32_op = 0x150,
 	mm_subu32_op = 0x1d0,
+	mm_wsbh_op = 0x1ec,
+	mm_mul_op = 0x210,
 	mm_and_op = 0x250,
 	mm_or32_op = 0x290,
 	mm_xor32_op = 0x310,
+	mm_sltu_op = 0x390,
 };
 
 /*
@@ -294,15 +313,20 @@
 	mm_mfc0_op = 0x003,
 	mm_mtc0_op = 0x00b,
 	mm_tlbp_op = 0x00d,
+	mm_mfhi32_op = 0x035,
 	mm_jalr_op = 0x03c,
 	mm_tlbr_op = 0x04d,
+	mm_mflo32_op = 0x075,
 	mm_jalrhb_op = 0x07c,
 	mm_tlbwi_op = 0x08d,
 	mm_tlbwr_op = 0x0cd,
 	mm_jalrs_op = 0x13c,
 	mm_jalrshb_op = 0x17c,
+	mm_sync_op = 0x1ad,
 	mm_syscall_op = 0x22d,
+	mm_wait_op = 0x24d,
 	mm_eret_op = 0x3cd,
+	mm_divu_op = 0x5dc,
 };
 
 /*
@@ -480,24 +504,6 @@
  */
 #define MM_NOP16	0x0c00
 
-/*
- * Damn ...  bitfields depend from byteorder :-(
- */
-#ifdef __MIPSEB__
-#define __BITFIELD_FIELD(field, more)					\
-	field;								\
-	more
-
-#elif defined(__MIPSEL__)
-
-#define __BITFIELD_FIELD(field, more)					\
-	more								\
-	field;
-
-#else /* !defined (__MIPSEB__) && !defined (__MIPSEL__) */
-#error "MIPS but neither __MIPSEL__ nor __MIPSEB__?"
-#endif
-
 struct j_format {
 	__BITFIELD_FIELD(unsigned int opcode : 6, /* Jump format */
 	__BITFIELD_FIELD(unsigned int target : 26,

diff --git a/arch/mips/include/uapi/asm/kvm_para.h b/arch/mips/include/uapi/asm/kvm_para.h
index 14fab8f..7e16d7c 100644
--- a/arch/mips/include/uapi/asm/kvm_para.h
+++ b/arch/mips/include/uapi/asm/kvm_para.h

@@ -1 +1,5 @@
-#include <asm-generic/kvm_para.h>
+#ifndef _UAPI_ASM_MIPS_KVM_PARA_H
+#define _UAPI_ASM_MIPS_KVM_PARA_H
+
+
+#endif /* _UAPI_ASM_MIPS_KVM_PARA_H */

diff --git a/arch/mips/include/uapi/asm/types.h b/arch/mips/include/uapi/asm/types.h
index 7ac9d0b..f3dd9ff 100644
--- a/arch/mips/include/uapi/asm/types.h
+++ b/arch/mips/include/uapi/asm/types.h

@@ -14,9 +14,12 @@
 /*
  * We don't use int-l64.h for the kernel anymore but still use it for
  * userspace to avoid code changes.
+ *
+ * However, some user programs (e.g. perf) may not want this. They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
  */
 #ifndef __KERNEL__
-# if _MIPS_SZLONG == 64
+# if _MIPS_SZLONG == 64 && !defined(__SANE_USERSPACE_TYPES__)
 #  include <asm-generic/int-l64.h>
 # else
 #  include <asm-generic/int-ll64.h>

diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 277dab3..008a2fe 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile

@@ -17,7 +17,6 @@
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
-obj-$(CONFIG_MIPS_MT_SMTC)	+= cevt-smtc.o
 obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GIC)		+= cevt-gic.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
@@ -42,7 +41,7 @@
 obj-$(CONFIG_CPU_R3000)		+= r2300_fpu.o r2300_switch.o
 obj-$(CONFIG_CPU_R6000)		+= r6000_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_TX39XX)	+= r2300_fpu.o r2300_switch.o
-obj-$(CONFIG_CPU_CAVIUM_OCTEON) += octeon_switch.o
+obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= r4k_fpu.o octeon_switch.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP_UP)		+= smp-up.o
@@ -50,7 +49,6 @@
 
 obj-$(CONFIG_MIPS_MT)		+= mips-mt.o
 obj-$(CONFIG_MIPS_MT_FPAFF)	+= mips-mt-fpaff.o
-obj-$(CONFIG_MIPS_MT_SMTC)	+= smtc.o smtc-asm.o smtc-proc.o
 obj-$(CONFIG_MIPS_MT_SMP)	+= smp-mt.o
 obj-$(CONFIG_MIPS_CMP)		+= smp-cmp.o
 obj-$(CONFIG_MIPS_CPS)		+= smp-cps.o cps-vec.o
@@ -107,6 +105,9 @@
 obj-$(CONFIG_MIPS_CM)		+= mips-cm.o
 obj-$(CONFIG_MIPS_CPC)		+= mips-cpc.o
 
+obj-$(CONFIG_CPU_PM)		+= pm.o
+obj-$(CONFIG_MIPS_CPS_PM)	+= pm-cps.o
+
 #
 # DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is not
 # safe to unconditionnaly use the assembler -mdsp / -mdspr2 switches

diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 0ea75c2..02f075d 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c

@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/kbuild.h>
 #include <linux/suspend.h>
+#include <asm/pm.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/smp-cps.h>
@@ -64,9 +65,6 @@
 	OFFSET(PT_BVADDR, pt_regs, cp0_badvaddr);
 	OFFSET(PT_STATUS, pt_regs, cp0_status);
 	OFFSET(PT_CAUSE, pt_regs, cp0_cause);
-#ifdef CONFIG_MIPS_MT_SMTC
-	OFFSET(PT_TCSTATUS, pt_regs, cp0_tcstatus);
-#endif /* CONFIG_MIPS_MT_SMTC */
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 	OFFSET(PT_MPL, pt_regs, mpl);
 	OFFSET(PT_MTP, pt_regs, mtp);
@@ -404,6 +402,20 @@
 }
 #endif
 
+#ifdef CONFIG_CPU_PM
+void output_pm_defines(void)
+{
+	COMMENT(" PM offsets. ");
+#ifdef CONFIG_EVA
+	OFFSET(SSS_SEGCTL0,	mips_static_suspend_state, segctl[0]);
+	OFFSET(SSS_SEGCTL1,	mips_static_suspend_state, segctl[1]);
+	OFFSET(SSS_SEGCTL2,	mips_static_suspend_state, segctl[2]);
+#endif
+	OFFSET(SSS_SP,		mips_static_suspend_state, sp);
+	BLANK();
+}
+#endif
+
 void output_kvm_defines(void)
 {
 	COMMENT(" KVM/MIPS Specfic offsets. ");
@@ -472,10 +484,14 @@
 void output_cps_defines(void)
 {
 	COMMENT(" MIPS CPS offsets. ");
-	OFFSET(BOOTCFG_CORE, boot_config, core);
-	OFFSET(BOOTCFG_VPE, boot_config, vpe);
-	OFFSET(BOOTCFG_PC, boot_config, pc);
-	OFFSET(BOOTCFG_SP, boot_config, sp);
-	OFFSET(BOOTCFG_GP, boot_config, gp);
+
+	OFFSET(COREBOOTCFG_VPEMASK, core_boot_config, vpe_mask);
+	OFFSET(COREBOOTCFG_VPECONFIG, core_boot_config, vpe_config);
+	DEFINE(COREBOOTCFG_SIZE, sizeof(struct core_boot_config));
+
+	OFFSET(VPEBOOTCFG_PC, vpe_boot_config, pc);
+	OFFSET(VPEBOOTCFG_SP, vpe_boot_config, sp);
+	OFFSET(VPEBOOTCFG_GP, vpe_boot_config, gp);
+	DEFINE(VPEBOOTCFG_SIZE, sizeof(struct vpe_boot_config));
 }
 #endif

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 76122ff..7b2df22 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c

@@ -48,6 +48,202 @@
 	return epc;
 }
 
+/* (microMIPS) Convert 16-bit register encoding to 32-bit register encoding. */
+static const unsigned int reg16to32map[8] = {16, 17, 2, 3, 4, 5, 6, 7};
+
+int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+		       unsigned long *contpc)
+{
+	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
+	int bc_false = 0;
+	unsigned int fcr31;
+	unsigned int bit;
+
+	if (!cpu_has_mmips)
+		return 0;
+
+	switch (insn.mm_i_format.opcode) {
+	case mm_pool32a_op:
+		if ((insn.mm_i_format.simmediate & MM_POOL32A_MINOR_MASK) ==
+		    mm_pool32axf_op) {
+			switch (insn.mm_i_format.simmediate >>
+				MM_POOL32A_MINOR_SHIFT) {
+			case mm_jalr_op:
+			case mm_jalrhb_op:
+			case mm_jalrs_op:
+			case mm_jalrshb_op:
+				if (insn.mm_i_format.rt != 0)	/* Not mm_jr */
+					regs->regs[insn.mm_i_format.rt] =
+						regs->cp0_epc +
+						dec_insn.pc_inc +
+						dec_insn.next_pc_inc;
+				*contpc = regs->regs[insn.mm_i_format.rs];
+				return 1;
+			}
+		}
+		break;
+	case mm_pool32i_op:
+		switch (insn.mm_i_format.rt) {
+		case mm_bltzals_op:
+		case mm_bltzal_op:
+			regs->regs[31] = regs->cp0_epc +
+				dec_insn.pc_inc +
+				dec_insn.next_pc_inc;
+			/* Fall through */
+		case mm_bltz_op:
+			if ((long)regs->regs[insn.mm_i_format.rs] < 0)
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.mm_i_format.simmediate << 1);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			return 1;
+		case mm_bgezals_op:
+		case mm_bgezal_op:
+			regs->regs[31] = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			/* Fall through */
+		case mm_bgez_op:
+			if ((long)regs->regs[insn.mm_i_format.rs] >= 0)
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.mm_i_format.simmediate << 1);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			return 1;
+		case mm_blez_op:
+			if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.mm_i_format.simmediate << 1);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			return 1;
+		case mm_bgtz_op:
+			if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.mm_i_format.simmediate << 1);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			return 1;
+		case mm_bc2f_op:
+		case mm_bc1f_op:
+			bc_false = 1;
+			/* Fall through */
+		case mm_bc2t_op:
+		case mm_bc1t_op:
+			preempt_disable();
+			if (is_fpu_owner())
+				asm volatile("cfc1\t%0,$31" : "=r" (fcr31));
+			else
+				fcr31 = current->thread.fpu.fcr31;
+			preempt_enable();
+
+			if (bc_false)
+				fcr31 = ~fcr31;
+
+			bit = (insn.mm_i_format.rs >> 2);
+			bit += (bit != 0);
+			bit += 23;
+			if (fcr31 & (1 << bit))
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.mm_i_format.simmediate << 1);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc + dec_insn.next_pc_inc;
+			return 1;
+		}
+		break;
+	case mm_pool16c_op:
+		switch (insn.mm_i_format.rt) {
+		case mm_jalr16_op:
+		case mm_jalrs16_op:
+			regs->regs[31] = regs->cp0_epc +
+				dec_insn.pc_inc + dec_insn.next_pc_inc;
+			/* Fall through */
+		case mm_jr16_op:
+			*contpc = regs->regs[insn.mm_i_format.rs];
+			return 1;
+		}
+		break;
+	case mm_beqz16_op:
+		if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] == 0)
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc +
+				(insn.mm_b1_format.simmediate << 1);
+		else
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc + dec_insn.next_pc_inc;
+		return 1;
+	case mm_bnez16_op:
+		if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] != 0)
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc +
+				(insn.mm_b1_format.simmediate << 1);
+		else
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc + dec_insn.next_pc_inc;
+		return 1;
+	case mm_b16_op:
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			 (insn.mm_b0_format.simmediate << 1);
+		return 1;
+	case mm_beq32_op:
+		if (regs->regs[insn.mm_i_format.rs] ==
+		    regs->regs[insn.mm_i_format.rt])
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc +
+				(insn.mm_i_format.simmediate << 1);
+		else
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc +
+				dec_insn.next_pc_inc;
+		return 1;
+	case mm_bne32_op:
+		if (regs->regs[insn.mm_i_format.rs] !=
+		    regs->regs[insn.mm_i_format.rt])
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc +
+				(insn.mm_i_format.simmediate << 1);
+		else
+			*contpc = regs->cp0_epc +
+				dec_insn.pc_inc + dec_insn.next_pc_inc;
+		return 1;
+	case mm_jalx32_op:
+		regs->regs[31] = regs->cp0_epc +
+			dec_insn.pc_inc + dec_insn.next_pc_inc;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc;
+		*contpc >>= 28;
+		*contpc <<= 28;
+		*contpc |= (insn.j_format.target << 2);
+		return 1;
+	case mm_jals32_op:
+	case mm_jal32_op:
+		regs->regs[31] = regs->cp0_epc +
+			dec_insn.pc_inc + dec_insn.next_pc_inc;
+		/* Fall through */
+	case mm_j32_op:
+		*contpc = regs->cp0_epc + dec_insn.pc_inc;
+		*contpc >>= 27;
+		*contpc <<= 27;
+		*contpc |= (insn.j_format.target << 1);
+		set_isa16_mode(*contpc);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * Compute return address and emulate branch in microMIPS mode after an
  * exception only. It does not handle compact branches/jumps and cannot
@@ -366,7 +562,11 @@
 	case cop1_op:
 		preempt_disable();
 		if (is_fpu_owner())
-			asm volatile("cfc1\t%0,$31" : "=r" (fcr31));
+			asm volatile(
+				".set push\n"
+				"\t.set mips1\n"
+				"\tcfc1\t%0,$31\n"
+				"\t.set pop" : "=r" (fcr31));
 		else
 			fcr31 = current->thread.fpu.fcr31;
 		preempt_enable();

diff --git a/arch/mips/kernel/cevt-gic.c b/arch/mips/kernel/cevt-gic.c
index 594cbbf..6093716 100644
--- a/arch/mips/kernel/cevt-gic.c
+++ b/arch/mips/kernel/cevt-gic.c

@@ -26,7 +26,7 @@
 
 	cnt = gic_read_count();
 	cnt += (u64)delta;
-	gic_write_compare(cnt);
+	gic_write_cpu_compare(cnt, cpumask_first(evt->cpumask));
 	res = ((int)(gic_read_count() - cnt) >= 0) ? -ETIME : 0;
 	return res;
 }
@@ -73,7 +73,8 @@
 	cd = &per_cpu(gic_clockevent_device, cpu);
 
 	cd->name		= "MIPS GIC";
-	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_C3STOP;
 
 	clockevent_set_clock(cd, gic_frequency);
 

diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 50d3f5a..bc127e2 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c

@@ -12,17 +12,10 @@
 #include <linux/smp.h>
 #include <linux/irq.h>
 
-#include <asm/smtc_ipi.h>
 #include <asm/time.h>
 #include <asm/cevt-r4k.h>
 #include <asm/gic.h>
 
-/*
- * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
- * of these routines with SMTC-specific variants.
- */
-
-#ifndef CONFIG_MIPS_MT_SMTC
 static int mips_next_event(unsigned long delta,
 			   struct clock_event_device *evt)
 {
@@ -36,8 +29,6 @@
 	return res;
 }
 
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 void mips_set_clock_mode(enum clock_event_mode mode,
 				struct clock_event_device *evt)
 {
@@ -47,7 +38,6 @@
 DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 int cp0_timer_irq_installed;
 
-#ifndef CONFIG_MIPS_MT_SMTC
 irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2;
@@ -72,9 +62,6 @@
 		/* Clear Count/Compare Interrupt */
 		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
-#ifdef CONFIG_CEVT_GIC
-		if (!gic_present)
-#endif
 		cd->event_handler(cd);
 	}
 
@@ -82,8 +69,6 @@
 	return IRQ_HANDLED;
 }
 
-#endif /* Not CONFIG_MIPS_MT_SMTC */
-
 struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
 	.flags = IRQF_PERCPU | IRQF_TIMER,
@@ -170,7 +155,6 @@
 	return 1;
 }
 
-#ifndef CONFIG_MIPS_MT_SMTC
 int r4k_clockevent_init(void)
 {
 	unsigned int cpu = smp_processor_id();
@@ -195,7 +179,9 @@
 	cd = &per_cpu(mips_clockevent_device, cpu);
 
 	cd->name		= "MIPS";
-	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_C3STOP |
+				  CLOCK_EVT_FEAT_PERCPU;
 
 	clockevent_set_clock(cd, mips_hpt_frequency);
 
@@ -210,9 +196,6 @@
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
 
-#ifdef CONFIG_CEVT_GIC
-	if (!gic_present)
-#endif
 	clockevents_register_device(cd);
 
 	if (cp0_timer_irq_installed)
@@ -225,4 +208,3 @@
 	return 0;
 }
 
-#endif /* Not CONFIG_MIPS_MT_SMTC */

diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
deleted file mode 100644
index b6cf0a6..0000000
--- a/arch/mips/kernel/cevt-smtc.c
+++ /dev/null

@@ -1,324 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 MIPS Technologies, Inc.
- * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
- * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
- */
-#include <linux/clockchips.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <linux/smp.h>
-#include <linux/irq.h>
-
-#include <asm/smtc_ipi.h>
-#include <asm/time.h>
-#include <asm/cevt-r4k.h>
-
-/*
- * Variant clock event timer support for SMTC on MIPS 34K, 1004K
- * or other MIPS MT cores.
- *
- * Notes on SMTC Support:
- *
- * SMTC has multiple microthread TCs pretending to be Linux CPUs.
- * But there's only one Count/Compare pair per VPE, and Compare
- * interrupts are taken opportunisitically by available TCs
- * bound to the VPE with the Count register.  The new timer
- * framework provides for global broadcasts, but we really
- * want VPE-level multicasts for best behavior. So instead
- * of invoking the high-level clock-event broadcast code,
- * this version of SMTC support uses the historical SMTC
- * multicast mechanisms "under the hood", appearing to the
- * generic clock layer as if the interrupts are per-CPU.
- *
- * The approach taken here is to maintain a set of NR_CPUS
- * virtual timers, and track which "CPU" needs to be alerted
- * at each event.
- *
- * It's unlikely that we'll see a MIPS MT core with more than
- * 2 VPEs, but we *know* that we won't need to handle more
- * VPEs than we have "CPUs".  So NCPUs arrays of NCPUs elements
- * is always going to be overkill, but always going to be enough.
- */
-
-unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
-static int smtc_nextinvpe[NR_CPUS];
-
-/*
- * Timestamps stored are absolute values to be programmed
- * into Count register.	 Valid timestamps will never be zero.
- * If a Zero Count value is actually calculated, it is converted
- * to be a 1, which will introduce 1 or two CPU cycles of error
- * roughly once every four billion events, which at 1000 HZ means
- * about once every 50 days.  If that's actually a problem, one
- * could alternate squashing 0 to 1 and to -1.
- */
-
-#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
-#define ISVALID(x) ((x) != 0L)
-
-/*
- * Time comparison is subtle, as it's really truncated
- * modular arithmetic.
- */
-
-#define IS_SOONER(a, b, reference) \
-    (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
-
-/*
- * CATCHUP_INCREMENT, used when the function falls behind the counter.
- * Could be an increasing function instead of a constant;
- */
-
-#define CATCHUP_INCREMENT 64
-
-static int mips_next_event(unsigned long delta,
-				struct clock_event_device *evt)
-{
-	unsigned long flags;
-	unsigned int mtflags;
-	unsigned long timestamp, reference, previous;
-	unsigned long nextcomp = 0L;
-	int vpe = current_cpu_data.vpe_id;
-	int cpu = smp_processor_id();
-	local_irq_save(flags);
-	mtflags = dmt();
-
-	/*
-	 * Maintain the per-TC virtual timer
-	 * and program the per-VPE shared Count register
-	 * as appropriate here...
-	 */
-	reference = (unsigned long)read_c0_count();
-	timestamp = MAKEVALID(reference + delta);
-	/*
-	 * To really model the clock, we have to catch the case
-	 * where the current next-in-VPE timestamp is the old
-	 * timestamp for the calling CPE, but the new value is
-	 * in fact later.  In that case, we have to do a full
-	 * scan and discover the new next-in-VPE CPU id and
-	 * timestamp.
-	 */
-	previous = smtc_nexttime[vpe][cpu];
-	if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
-	    && IS_SOONER(previous, timestamp, reference)) {
-		int i;
-		int soonest = cpu;
-
-		/*
-		 * Update timestamp array here, so that new
-		 * value gets considered along with those of
-		 * other virtual CPUs on the VPE.
-		 */
-		smtc_nexttime[vpe][cpu] = timestamp;
-		for_each_online_cpu(i) {
-			if (ISVALID(smtc_nexttime[vpe][i])
-			    && IS_SOONER(smtc_nexttime[vpe][i],
-				smtc_nexttime[vpe][soonest], reference)) {
-				    soonest = i;
-			}
-		}
-		smtc_nextinvpe[vpe] = soonest;
-		nextcomp = smtc_nexttime[vpe][soonest];
-	/*
-	 * Otherwise, we don't have to process the whole array rank,
-	 * we just have to see if the event horizon has gotten closer.
-	 */
-	} else {
-		if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
-		    IS_SOONER(timestamp,
-			smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
-			    smtc_nextinvpe[vpe] = cpu;
-			    nextcomp = timestamp;
-		}
-		/*
-		 * Since next-in-VPE may me the same as the executing
-		 * virtual CPU, we update the array *after* checking
-		 * its value.
-		 */
-		smtc_nexttime[vpe][cpu] = timestamp;
-	}
-
-	/*
-	 * It may be that, in fact, we don't need to update Compare,
-	 * but if we do, we want to make sure we didn't fall into
-	 * a crack just behind Count.
-	 */
-	if (ISVALID(nextcomp)) {
-		write_c0_compare(nextcomp);
-		ehb();
-		/*
-		 * We never return an error, we just make sure
-		 * that we trigger the handlers as quickly as
-		 * we can if we fell behind.
-		 */
-		while ((nextcomp - (unsigned long)read_c0_count())
-			> (unsigned long)LONG_MAX) {
-			nextcomp += CATCHUP_INCREMENT;
-			write_c0_compare(nextcomp);
-			ehb();
-		}
-	}
-	emt(mtflags);
-	local_irq_restore(flags);
-	return 0;
-}
-
-
-void smtc_distribute_timer(int vpe)
-{
-	unsigned long flags;
-	unsigned int mtflags;
-	int cpu;
-	struct clock_event_device *cd;
-	unsigned long nextstamp;
-	unsigned long reference;
-
-
-repeat:
-	nextstamp = 0L;
-	for_each_online_cpu(cpu) {
-	    /*
-	     * Find virtual CPUs within the current VPE who have
-	     * unserviced timer requests whose time is now past.
-	     */
-	    local_irq_save(flags);
-	    mtflags = dmt();
-	    if (cpu_data[cpu].vpe_id == vpe &&
-		ISVALID(smtc_nexttime[vpe][cpu])) {
-		reference = (unsigned long)read_c0_count();
-		if ((smtc_nexttime[vpe][cpu] - reference)
-			 > (unsigned long)LONG_MAX) {
-			    smtc_nexttime[vpe][cpu] = 0L;
-			    emt(mtflags);
-			    local_irq_restore(flags);
-			    /*
-			     * We don't send IPIs to ourself.
-			     */
-			    if (cpu != smp_processor_id()) {
-				smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-			    } else {
-				cd = &per_cpu(mips_clockevent_device, cpu);
-				cd->event_handler(cd);
-			    }
-		} else {
-			/* Local to VPE but Valid Time not yet reached. */
-			if (!ISVALID(nextstamp) ||
-			    IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
-			    reference)) {
-				smtc_nextinvpe[vpe] = cpu;
-				nextstamp = smtc_nexttime[vpe][cpu];
-			}
-			emt(mtflags);
-			local_irq_restore(flags);
-		}
-	    } else {
-		emt(mtflags);
-		local_irq_restore(flags);
-
-	    }
-	}
-	/* Reprogram for interrupt at next soonest timestamp for VPE */
-	if (ISVALID(nextstamp)) {
-		write_c0_compare(nextstamp);
-		ehb();
-		if ((nextstamp - (unsigned long)read_c0_count())
-			> (unsigned long)LONG_MAX)
-				goto repeat;
-	}
-}
-
-
-irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	/* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
-	handle_perf_irq(1);
-
-	if (read_c0_cause() & (1 << 30)) {
-		/* Clear Count/Compare Interrupt */
-		write_c0_compare(read_c0_compare());
-		smtc_distribute_timer(cpu_data[cpu].vpe_id);
-	}
-	return IRQ_HANDLED;
-}
-
-
-int smtc_clockevent_init(void)
-{
-	uint64_t mips_freq = mips_hpt_frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-	unsigned int irq;
-	int i;
-	int j;
-
-	if (!cpu_has_counter || !mips_hpt_frequency)
-		return -ENXIO;
-	if (cpu == 0) {
-		for (i = 0; i < num_possible_cpus(); i++) {
-			smtc_nextinvpe[i] = 0;
-			for (j = 0; j < num_possible_cpus(); j++)
-				smtc_nexttime[i][j] = 0L;
-		}
-		/*
-		 * SMTC also can't have the usablility test
-		 * run by secondary TCs once Compare is in use.
-		 */
-		if (!c0_compare_int_usable())
-			return -ENXIO;
-	}
-
-	/*
-	 * With vectored interrupts things are getting platform specific.
-	 * get_c0_compare_int is a hook to allow a platform to return the
-	 * interrupt number of it's liking.
-	 */
-	irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
-	if (get_c0_compare_int)
-		irq = get_c0_compare_int();
-
-	cd = &per_cpu(mips_clockevent_device, cpu);
-
-	cd->name		= "MIPS";
-	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
-
-	/* Calculate the min / max delta */
-	cd->mult	= div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 32;
-	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
-
-	cd->rating		= 300;
-	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
-	cd->set_next_event	= mips_next_event;
-	cd->set_mode		= mips_set_clock_mode;
-	cd->event_handler	= mips_event_handler;
-
-	clockevents_register_device(cd);
-
-	/*
-	 * On SMTC we only want to do the data structure
-	 * initialization and IRQ setup once.
-	 */
-	if (cpu)
-		return 0;
-	/*
-	 * And we need the hwmask associated with the c0_compare
-	 * vector to be initialized.
-	 */
-	irq_hwmask[irq] = (0x100 << cp0_compare_irq);
-	if (cp0_timer_irq_installed)
-		return 0;
-
-	cp0_timer_irq_installed = 1;
-
-	setup_irq(irq, &c0_compare_irqaction);
-
-	return 0;
-}

diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index f7a46db..6f4f739 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S

@@ -14,19 +14,43 @@
 #include <asm/asmmacro.h>
 #include <asm/cacheops.h>
 #include <asm/mipsregs.h>
+#include <asm/mipsmtregs.h>
+#include <asm/pm.h>
 
-#define GCR_CL_COHERENCE_OFS 0x2008
+#define GCR_CL_COHERENCE_OFS	0x2008
+#define GCR_CL_ID_OFS		0x2028
+
+.extern mips_cm_base
+
+.set noreorder
+
+	/*
+	 * Set dest to non-zero if the core supports the MT ASE, else zero. If
+	 * MT is not supported then branch to nomt.
+	 */
+	.macro	has_mt	dest, nomt
+	mfc0	\dest, CP0_CONFIG
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 1
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 2
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 3
+	andi	\dest, \dest, MIPS_CONF3_MT
+	beqz	\dest, \nomt
+	.endm
 
 .section .text.cps-vec
 .balign 0x1000
-.set noreorder
 
 LEAF(mips_cps_core_entry)
 	/*
-	 * These first 8 bytes will be patched by cps_smp_setup to load the
-	 * base address of the CM GCRs into register v1.
+	 * These first 12 bytes will be patched by cps_smp_setup to load the
+	 * base address of the CM GCRs into register v1 and the CCA to use into
+	 * register s0.
 	 */
 	.quad	0
+	.word	0
 
 	/* Check whether we're here due to an NMI */
 	mfc0	k0, CP0_STATUS
@@ -117,10 +141,11 @@
 	 add	a0, a0, t0
 dcache_done:
 
-	/* Set Kseg0 cacheable, coherent, write-back, write-allocate */
+	/* Set Kseg0 CCA to that in s0 */
 	mfc0	t0, CP0_CONFIG
 	ori	t0, 0x7
-	xori	t0, 0x2
+	xori	t0, 0x7
+	or	t0, t0, s0
 	mtc0	t0, CP0_CONFIG
 	ehb
 
@@ -134,21 +159,24 @@
 	jr	t0
 	 nop
 
-1:	/* We're up, cached & coherent */
+	/*
+	 * We're up, cached & coherent. Perform any further required core-level
+	 * initialisation.
+	 */
+1:	jal	mips_cps_core_init
+	 nop
 
 	/*
-	 * TODO: We should check the VPE number we intended to boot here, and
-	 *       if non-zero we should start that VPE and stop this one. For
-	 *       the moment this doesn't matter since CPUs are brought up
-	 *       sequentially and in order, but once hotplug is implemented
-	 *       this will need revisiting.
+	 * Boot any other VPEs within this core that should be online, and
+	 * deactivate this VPE if it should be offline.
 	 */
+	jal	mips_cps_boot_vpes
+	 nop
 
 	/* Off we go! */
-	la	t0, mips_cps_bootcfg
-	lw	t1, BOOTCFG_PC(t0)
-	lw	gp, BOOTCFG_GP(t0)
-	lw	sp, BOOTCFG_SP(t0)
+	lw	t1, VPEBOOTCFG_PC(v0)
+	lw	gp, VPEBOOTCFG_GP(v0)
+	lw	sp, VPEBOOTCFG_SP(v0)
 	jr	t1
 	 nop
 	END(mips_cps_core_entry)
@@ -189,3 +217,271 @@
 	jr	k0
 	 nop
 	END(excep_ejtag)
+
+LEAF(mips_cps_core_init)
+#ifdef CONFIG_MIPS_MT
+	/* Check that the core implements the MT ASE */
+	has_mt	t0, 3f
+	 nop
+
+	.set	push
+	.set	mt
+
+	/* Only allow 1 TC per VPE to execute... */
+	dmt
+
+	/* ...and for the moment only 1 VPE */
+	dvpe
+	la	t1, 1f
+	jr.hb	t1
+	 nop
+
+	/* Enter VPE configuration state */
+1:	mfc0	t0, CP0_MVPCONTROL
+	ori	t0, t0, MVPCONTROL_VPC
+	mtc0	t0, CP0_MVPCONTROL
+
+	/* Retrieve the number of VPEs within the core */
+	mfc0	t0, CP0_MVPCONF0
+	srl	t0, t0, MVPCONF0_PVPE_SHIFT
+	andi	t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT)
+	addi	t7, t0, 1
+
+	/* If there's only 1, we're done */
+	beqz	t0, 2f
+	 nop
+
+	/* Loop through each VPE within this core */
+	li	t5, 1
+
+1:	/* Operate on the appropriate TC */
+	mtc0	t5, CP0_VPECONTROL
+	ehb
+
+	/* Bind TC to VPE (1:1 TC:VPE mapping) */
+	mttc0	t5, CP0_TCBIND
+
+	/* Set exclusive TC, non-active, master */
+	li	t0, VPECONF0_MVP
+	sll	t1, t5, VPECONF0_XTC_SHIFT
+	or	t0, t0, t1
+	mttc0	t0, CP0_VPECONF0
+
+	/* Set TC non-active, non-allocatable */
+	mttc0	zero, CP0_TCSTATUS
+
+	/* Set TC halted */
+	li	t0, TCHALT_H
+	mttc0	t0, CP0_TCHALT
+
+	/* Next VPE */
+	addi	t5, t5, 1
+	slt	t0, t5, t7
+	bnez	t0, 1b
+	 nop
+
+	/* Leave VPE configuration state */
+2:	mfc0	t0, CP0_MVPCONTROL
+	xori	t0, t0, MVPCONTROL_VPC
+	mtc0	t0, CP0_MVPCONTROL
+
+3:	.set	pop
+#endif
+	jr	ra
+	 nop
+	END(mips_cps_core_init)
+
+LEAF(mips_cps_boot_vpes)
+	/* Retrieve CM base address */
+	la	t0, mips_cm_base
+	lw	t0, 0(t0)
+
+	/* Calculate a pointer to this cores struct core_boot_config */
+	lw	t0, GCR_CL_ID_OFS(t0)
+	li	t1, COREBOOTCFG_SIZE
+	mul	t0, t0, t1
+	la	t1, mips_cps_core_bootcfg
+	lw	t1, 0(t1)
+	addu	t0, t0, t1
+
+	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
+	has_mt	t6, 1f
+	 li	t9, 0
+
+	/* Find the number of VPEs present in the core */
+	mfc0	t1, CP0_MVPCONF0
+	srl	t1, t1, MVPCONF0_PVPE_SHIFT
+	andi	t1, t1, MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT
+	addi	t1, t1, 1
+
+	/* Calculate a mask for the VPE ID from EBase.CPUNum */
+	clz	t1, t1
+	li	t2, 31
+	subu	t1, t2, t1
+	li	t2, 1
+	sll	t1, t2, t1
+	addiu	t1, t1, -1
+
+	/* Retrieve the VPE ID from EBase.CPUNum */
+	mfc0	t9, $15, 1
+	and	t9, t9, t1
+
+1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
+	li	t1, VPEBOOTCFG_SIZE
+	mul	v0, t9, t1
+	lw	t7, COREBOOTCFG_VPECONFIG(t0)
+	addu	v0, v0, t7
+
+#ifdef CONFIG_MIPS_MT
+
+	/* If the core doesn't support MT then return */
+	bnez	t6, 1f
+	 nop
+	jr	ra
+	 nop
+
+	.set	push
+	.set	mt
+
+1:	/* Enter VPE configuration state */
+	dvpe
+	la	t1, 1f
+	jr.hb	t1
+	 nop
+1:	mfc0	t1, CP0_MVPCONTROL
+	ori	t1, t1, MVPCONTROL_VPC
+	mtc0	t1, CP0_MVPCONTROL
+	ehb
+
+	/* Loop through each VPE */
+	lw	t6, COREBOOTCFG_VPEMASK(t0)
+	move	t8, t6
+	li	t5, 0
+
+	/* Check whether the VPE should be running. If not, skip it */
+1:	andi	t0, t6, 1
+	beqz	t0, 2f
+	 nop
+
+	/* Operate on the appropriate TC */
+	mfc0	t0, CP0_VPECONTROL
+	ori	t0, t0, VPECONTROL_TARGTC
+	xori	t0, t0, VPECONTROL_TARGTC
+	or	t0, t0, t5
+	mtc0	t0, CP0_VPECONTROL
+	ehb
+
+	/* Skip the VPE if its TC is not halted */
+	mftc0	t0, CP0_TCHALT
+	beqz	t0, 2f
+	 nop
+
+	/* Calculate a pointer to the VPEs struct vpe_boot_config */
+	li	t0, VPEBOOTCFG_SIZE
+	mul	t0, t0, t5
+	addu	t0, t0, t7
+
+	/* Set the TC restart PC */
+	lw	t1, VPEBOOTCFG_PC(t0)
+	mttc0	t1, CP0_TCRESTART
+
+	/* Set the TC stack pointer */
+	lw	t1, VPEBOOTCFG_SP(t0)
+	mttgpr	t1, sp
+
+	/* Set the TC global pointer */
+	lw	t1, VPEBOOTCFG_GP(t0)
+	mttgpr	t1, gp
+
+	/* Copy config from this VPE */
+	mfc0	t0, CP0_CONFIG
+	mttc0	t0, CP0_CONFIG
+
+	/* Ensure no software interrupts are pending */
+	mttc0	zero, CP0_CAUSE
+	mttc0	zero, CP0_STATUS
+
+	/* Set TC active, not interrupt exempt */
+	mftc0	t0, CP0_TCSTATUS
+	li	t1, ~TCSTATUS_IXMT
+	and	t0, t0, t1
+	ori	t0, t0, TCSTATUS_A
+	mttc0	t0, CP0_TCSTATUS
+
+	/* Clear the TC halt bit */
+	mttc0	zero, CP0_TCHALT
+
+	/* Set VPE active */
+	mftc0	t0, CP0_VPECONF0
+	ori	t0, t0, VPECONF0_VPA
+	mttc0	t0, CP0_VPECONF0
+
+	/* Next VPE */
+2:	srl	t6, t6, 1
+	addi	t5, t5, 1
+	bnez	t6, 1b
+	 nop
+
+	/* Leave VPE configuration state */
+	mfc0	t1, CP0_MVPCONTROL
+	xori	t1, t1, MVPCONTROL_VPC
+	mtc0	t1, CP0_MVPCONTROL
+	ehb
+	evpe
+
+	/* Check whether this VPE is meant to be running */
+	li	t0, 1
+	sll	t0, t0, t9
+	and	t0, t0, t8
+	bnez	t0, 2f
+	 nop
+
+	/* This VPE should be offline, halt the TC */
+	li	t0, TCHALT_H
+	mtc0	t0, CP0_TCHALT
+	la	t0, 1f
+1:	jr.hb	t0
+	 nop
+
+2:	.set	pop
+
+#endif /* CONFIG_MIPS_MT */
+
+	/* Return */
+	jr	ra
+	 nop
+	END(mips_cps_boot_vpes)
+
+#if defined(CONFIG_MIPS_CPS_PM) && defined(CONFIG_CPU_PM)
+
+	/* Calculate a pointer to this CPUs struct mips_static_suspend_state */
+	.macro	psstate	dest
+	.set	push
+	.set	noat
+	lw	$1, TI_CPU(gp)
+	sll	$1, $1, LONGLOG
+	la	\dest, __per_cpu_offset
+	addu	$1, $1, \dest
+	lw	$1, 0($1)
+	la	\dest, cps_cpu_state
+	addu	\dest, \dest, $1
+	.set	pop
+	.endm
+
+LEAF(mips_cps_pm_save)
+	/* Save CPU state */
+	SUSPEND_SAVE_REGS
+	psstate	t1
+	SUSPEND_SAVE_STATIC
+	jr	v0
+	 nop
+	END(mips_cps_pm_save)
+
+LEAF(mips_cps_pm_restore)
+	/* Restore CPU state */
+	psstate	t1
+	RESUME_RESTORE_STATIC
+	RESUME_RESTORE_REGS_RETURN
+	END(mips_cps_pm_restore)
+
+#endif /* CONFIG_MIPS_CPS_PM && CONFIG_CPU_PM */

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6e8fb85..d74f957 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c

@@ -62,7 +62,7 @@
 	case CPU_34K:
 		/*
 		 * Erratum "RPS May Cause Incorrect Instruction Execution"
-		 * This code only handles VPE0, any SMP/SMTC/RTOS code
+		 * This code only handles VPE0, any SMP/RTOS code
 		 * making use of VPE1 will be responsable for that VPE.
 		 */
 		if ((c->processor_id & PRID_REV_MASK) <= PRID_REV_34K_V1_0_2)
@@ -423,7 +423,7 @@
 
 #ifndef CONFIG_MIPS_CPS
 	if (cpu_has_mips_r2) {
-		c->core = read_c0_ebase() & 0x3ff;
+		c->core = get_ebase_cpunum();
 		if (cpu_has_mipsmt)
 			c->core >>= fls(core_nvpes()) - 1;
 	}
@@ -684,21 +684,6 @@
 		 */
 		c->tlbsize = (read_c0_info() & (1 << 29)) ? 64 : 48;
 		break;
-	case PRID_IMP_RM9000:
-		c->cputype = CPU_RM9000;
-		__cpu_name[cpu] = "RM9000";
-		set_isa(c, MIPS_CPU_ISA_IV);
-		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
-			     MIPS_CPU_LLSC;
-		/*
-		 * Bit 29 in the info register of the RM9000
-		 * indicates if the TLB has 48 or 64 entries.
-		 *
-		 * 29	   1 =>	   64 entry JTLB
-		 *	   0 =>	   48 entry JTLB
-		 */
-		c->tlbsize = (read_c0_info() & (1 << 29)) ? 64 : 48;
-		break;
 	case PRID_IMP_R8000:
 		c->cputype = CPU_R8000;
 		__cpu_name[cpu] = "RM8000";
@@ -1041,6 +1026,7 @@
 	decode_configs(c);
 	/* JZRISC does not implement the CP0 counter. */
 	c->options &= ~MIPS_CPU_COUNTER;
+	BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter);
 	switch (c->processor_id & PRID_IMP_MASK) {
 	case PRID_IMP_JZRISC:
 		c->cputype = CPU_JZRISC;
@@ -1074,6 +1060,7 @@
 	switch (c->processor_id & PRID_IMP_MASK) {
 	case PRID_IMP_NETLOGIC_XLP2XX:
 	case PRID_IMP_NETLOGIC_XLP9XX:
+	case PRID_IMP_NETLOGIC_XLP5XX:
 		c->cputype = CPU_XLP;
 		__cpu_name[cpu] = "Broadcom XLPII";
 		break;

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e578685..4353d32 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S

@@ -16,9 +16,6 @@
 #include <asm/isadep.h>
 #include <asm/thread_info.h>
 #include <asm/war.h>
-#ifdef CONFIG_MIPS_MT_SMTC
-#include <asm/mipsmtregs.h>
-#endif
 
 #ifndef CONFIG_PREEMPT
 #define resume_kernel	restore_all
@@ -89,41 +86,6 @@
 	bnez	t0, syscall_exit_work
 
 restore_all:				# restore full frame
-#ifdef CONFIG_MIPS_MT_SMTC
-#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
-/* Re-arm any temporarily masked interrupts not explicitly "acked" */
-	mfc0	v0, CP0_TCSTATUS
-	ori	v1, v0, TCSTATUS_IXMT
-	mtc0	v1, CP0_TCSTATUS
-	andi	v0, TCSTATUS_IXMT
-	_ehb
-	mfc0	t0, CP0_TCCONTEXT
-	DMT	9				# dmt t1
-	jal	mips_ihb
-	mfc0	t2, CP0_STATUS
-	andi	t3, t0, 0xff00
-	or	t2, t2, t3
-	mtc0	t2, CP0_STATUS
-	_ehb
-	andi	t1, t1, VPECONTROL_TE
-	beqz	t1, 1f
-	EMT
-1:
-	mfc0	v1, CP0_TCSTATUS
-	/* We set IXMT above, XOR should clear it here */
-	xori	v1, v1, TCSTATUS_IXMT
-	or	v1, v0, v1
-	mtc0	v1, CP0_TCSTATUS
-	_ehb
-	xor	t0, t0, t3
-	mtc0	t0, CP0_TCCONTEXT
-#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
-/* Detect and execute deferred IPI "interrupts" */
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	jal	deferred_smtc_ipi
-	LONG_S	s0, TI_REGS($28)
-#endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP
 	RESTORE_AT

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index a9ce340..ac35e12 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S

@@ -21,20 +21,6 @@
 #include <asm/war.h>
 #include <asm/thread_info.h>
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define PANIC_PIC(msg)					\
-		.set	push;				\
-		.set	nomicromips;			\
-		.set	reorder;			\
-		PTR_LA	a0,8f;				\
-		.set	noat;				\
-		PTR_LA	AT, panic;			\
-		jr	AT;				\
-9:		b	9b;				\
-		.set	pop;				\
-		TEXT(msg)
-#endif
-
 	__INIT
 
 /*
@@ -251,15 +237,6 @@
 	SAVE_AT
 	.set	push
 	.set	noreorder
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * To keep from blindly blocking *all* interrupts
-	 * during service by SMTC kernel, we also want to
-	 * pass the IM value to be cleared.
-	 */
-FEXPORT(except_vec_vi_mori)
-	ori	a0, $0, 0
-#endif /* CONFIG_MIPS_MT_SMTC */
 	PTR_LA	v1, except_vec_vi_handler
 FEXPORT(except_vec_vi_lui)
 	lui	v0, 0		/* Patched */
@@ -277,37 +254,10 @@
 NESTED(except_vec_vi_handler, 0, sp)
 	SAVE_TEMP
 	SAVE_STATIC
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC has an interesting problem that interrupts are level-triggered,
-	 * and the CLI macro will clear EXL, potentially causing a duplicate
-	 * interrupt service invocation. So we need to clear the associated
-	 * IM bit of Status prior to doing CLI, and restore it after the
-	 * service routine has been invoked - we must assume that the
-	 * service routine will have cleared the state, and any active
-	 * level represents a new or otherwised unserviced event...
-	 */
-	mfc0	t1, CP0_STATUS
-	and	t0, a0, t1
-#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
-	mfc0	t2, CP0_TCCONTEXT
-	or	t2, t0, t2
-	mtc0	t2, CP0_TCCONTEXT
-#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
-	xor	t1, t1, t0
-	mtc0	t1, CP0_STATUS
-	_ehb
-#endif /* CONFIG_MIPS_MT_SMTC */
 	CLI
 #ifdef CONFIG_TRACE_IRQFLAGS
 	move	s0, v0
-#ifdef CONFIG_MIPS_MT_SMTC
-	move	s1, a0
-#endif
 	TRACE_IRQS_OFF
-#ifdef CONFIG_MIPS_MT_SMTC
-	move	a0, s1
-#endif
 	move	v0, s0
 #endif
 
@@ -496,9 +446,6 @@
 
 	.align	5
 	LEAF(handle_ri_rdhwr_vivt)
-#ifdef CONFIG_MIPS_MT_SMTC
-	PANIC_PIC("handle_ri_rdhwr_vivt called")
-#else
 	.set	push
 	.set	noat
 	.set	noreorder
@@ -517,7 +464,6 @@
 	.set	pop
 	bltz	k1, handle_ri	/* slow path */
 	/* fall thru */
-#endif
 	END(handle_ri_rdhwr_vivt)
 
 	LEAF(handle_ri_rdhwr)

diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index e712dcf..95afd66 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S

@@ -35,33 +35,12 @@
 	 */
 	.macro	setup_c0_status set clr
 	.set	push
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * For SMTC, we need to set privilege and disable interrupts only for
-	 * the current TC, using the TCStatus register.
-	 */
-	mfc0	t0, CP0_TCSTATUS
-	/* Fortunately CU 0 is in the same place in both registers */
-	/* Set TCU0, TMX, TKSU (for later inversion) and IXMT */
-	li	t1, ST0_CU0 | 0x08001c00
-	or	t0, t1
-	/* Clear TKSU, leave IXMT */
-	xori	t0, 0x00001800
-	mtc0	t0, CP0_TCSTATUS
-	_ehb
-	/* We need to leave the global IE bit set, but clear EXL...*/
-	mfc0	t0, CP0_STATUS
-	or	t0, ST0_CU0 | ST0_EXL | ST0_ERL | \set | \clr
-	xor	t0, ST0_EXL | ST0_ERL | \clr
-	mtc0	t0, CP0_STATUS
-#else
 	mfc0	t0, CP0_STATUS
 	or	t0, ST0_CU0|\set|0x1f|\clr
 	xor	t0, 0x1f|\clr
 	mtc0	t0, CP0_STATUS
 	.set	noreorder
 	sll	zero,3				# ehb
-#endif
 	.set	pop
 	.endm
 
@@ -115,24 +94,6 @@
 	jr	t0
 0:
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * In SMTC kernel, "CLI" is thread-specific, in TCStatus.
-	 * We still need to enable interrupts globally in Status,
-	 * and clear EXL/ERL.
-	 *
-	 * TCContext is used to track interrupt levels under
-	 * service in SMTC kernel. Clear for boot TC before
-	 * allowing any interrupts.
-	 */
-	mtc0	zero, CP0_TCCONTEXT
-
-	mfc0	t0, CP0_STATUS
-	ori	t0, t0, 0xff1f
-	xori	t0, t0, 0x001e
-	mtc0	t0, CP0_STATUS
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 	PTR_LA		t0, __bss_start		# clear .bss
 	LONG_S		zero, (t0)
 	PTR_LA		t1, __bss_stop - LONGSIZE
@@ -164,25 +125,8 @@
  * function after setting up the stack and gp registers.
  */
 NESTED(smp_bootstrap, 16, sp)
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * Read-modify-writes of Status must be atomic, and this
-	 * is one case where CLI is invoked without EXL being
-	 * necessarily set. The CLI and setup_c0_status will
-	 * in fact be redundant for all but the first TC of
-	 * each VPE being booted.
-	 */
-	DMT	10	# dmt t2 /* t0, t1 are used by CLI and setup_c0_status() */
-	jal	mips_ihb
-#endif /* CONFIG_MIPS_MT_SMTC */
 	smp_slave_setup
 	setup_c0_status_sec
-#ifdef CONFIG_MIPS_MT_SMTC
-	andi	t2, t2, VPECONTROL_TE
-	beqz	t2, 2f
-	EMT		# emt
-2:
-#endif /* CONFIG_MIPS_MT_SMTC */
 	j	start_secondary
 	END(smp_bootstrap)
 #endif /* CONFIG_SMP */

diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c
index 2b91fe8..50b3648 100644
--- a/arch/mips/kernel/i8259.c
+++ b/arch/mips/kernel/i8259.c

@@ -42,9 +42,6 @@
 	.irq_disable		= disable_8259A_irq,
 	.irq_unmask		= enable_8259A_irq,
 	.irq_mask_ack		= mask_and_ack_8259A,
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-	.irq_set_affinity	= plat_set_irq_affinity,
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
 };
 
 /*
@@ -180,7 +177,6 @@
 		outb(cached_master_mask, PIC_MASTER_IMR);
 		outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
 	}
-	smtc_im_ack_irq(irq);
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 	return;
 

diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 837ff27..09ce459 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c

@@ -224,29 +224,26 @@
 		   cpu_wait = r4k_wait;
 		 */
 		break;
-	case CPU_RM9000:
-		if ((c->processor_id & 0x00ff) >= 0x40)
-			cpu_wait = r4k_wait;
-		break;
 	default:
 		break;
 	}
 }
 
-static void smtc_idle_hook(void)
-{
-#ifdef CONFIG_MIPS_MT_SMTC
-	void smtc_idle_loop_hook(void);
-
-	smtc_idle_loop_hook();
-#endif
-}
-
 void arch_cpu_idle(void)
 {
-	smtc_idle_hook();
 	if (cpu_wait)
 		cpu_wait();
 	else
 		local_irq_enable();
 }
+
+#ifdef CONFIG_CPU_IDLE
+
+int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
+			    struct cpuidle_driver *drv, int index)
+{
+	arch_cpu_idle();
+	return index;
+}
+
+#endif

diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 8520dad..88e4c32 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c

@@ -54,6 +54,21 @@
 				(int)(cnt & 0xffffffff));
 }
 
+void gic_write_cpu_compare(cycle_t cnt, int cpu)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	GICWRITE(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), cpu);
+	GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_COMPARE_HI),
+				(int)(cnt >> 32));
+	GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_COMPARE_LO),
+				(int)(cnt & 0xffffffff));
+
+	local_irq_restore(flags);
+}
+
 cycle_t gic_read_compare(void)
 {
 	unsigned int hi, lo;

diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index fab40f7..4858642 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c

@@ -53,13 +53,9 @@
  */
 static void level_mask_and_ack_msc_irq(struct irq_data *d)
 {
-	unsigned int irq = d->irq;
-
 	mask_msc_irq(d);
 	if (!cpu_has_veic)
 		MSCIC_WRITE(MSC01_IC_EOI, 0);
-	/* This actually needs to be a call into platform code */
-	smtc_im_ack_irq(irq);
 }
 
 /*
@@ -78,7 +74,6 @@
 		MSCIC_WRITE(MSC01_IC_SUP+irq*8, r | ~MSC01_IC_SUP_EDGE_BIT);
 		MSCIC_WRITE(MSC01_IC_SUP+irq*8, r);
 	}
-	smtc_im_ack_irq(irq);
 }
 
 /*

diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 1818da4..d2bfbc2 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c

@@ -73,7 +73,6 @@
  */
 void ack_bad_irq(unsigned int irq)
 {
-	smtc_im_ack_irq(irq);
 	printk("unexpected IRQ # %d\n", irq);
 }
 
@@ -142,23 +141,7 @@
 {
 	irq_enter();
 	check_stack_overflow();
-	if (!smtc_handle_on_other_cpu(irq))
-		generic_handle_irq(irq);
-	irq_exit();
-}
-
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-/*
- * To avoid inefficient and in some cases pathological re-checking of
- * IRQ affinity, we have this variant that skips the affinity check.
- */
-
-void __irq_entry do_IRQ_no_affinity(unsigned int irq)
-{
-	irq_enter();
-	smtc_im_backstop(irq);
 	generic_handle_irq(irq);
 	irq_exit();
 }
 
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */

diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index c9dc674..ba47360 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c

@@ -9,12 +9,18 @@
  */
 
 #include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
 
 #include <asm/mips-cm.h>
 #include <asm/mips-cpc.h>
 
 void __iomem *mips_cpc_base;
 
+static DEFINE_PER_CPU_ALIGNED(spinlock_t, cpc_core_lock);
+
+static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
+
 phys_t __weak mips_cpc_phys_base(void)
 {
 	u32 cpc_base;
@@ -39,6 +45,10 @@
 int mips_cpc_probe(void)
 {
 	phys_t addr;
+	unsigned cpu;
+
+	for_each_possible_cpu(cpu)
+		spin_lock_init(&per_cpu(cpc_core_lock, cpu));
 
 	addr = mips_cpc_phys_base();
 	if (!addr)
@@ -50,3 +60,21 @@
 
 	return 0;
 }
+
+void mips_cpc_lock_other(unsigned int core)
+{
+	unsigned curr_core;
+	preempt_disable();
+	curr_core = current_cpu_data.core;
+	spin_lock_irqsave(&per_cpu(cpc_core_lock, curr_core),
+			  per_cpu(cpc_core_lock_flags, curr_core));
+	write_cpc_cl_other(core << CPC_Cx_OTHER_CORENUM_SHF);
+}
+
+void mips_cpc_unlock_other(void)
+{
+	unsigned curr_core = current_cpu_data.core;
+	spin_unlock_irqrestore(&per_cpu(cpc_core_lock, curr_core),
+			       per_cpu(cpc_core_lock_flags, curr_core));
+	preempt_enable();
+}

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index cb09862..362bb37 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c

@@ -1,5 +1,5 @@
 /*
- * General MIPS MT support routines, usable in AP/SP, SMVP, or SMTC kernels
+ * General MIPS MT support routines, usable in AP/SP and SMVP.
  * Copyright (C) 2005 Mips Technologies, Inc
  */
 #include <linux/cpu.h>

diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index 6ded9bd..88b1ef5 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c

@@ -1,5 +1,5 @@
 /*
- * General MIPS MT support routines, usable in AP/SP, SMVP, or SMTC kernels
+ * General MIPS MT support routines, usable in AP/SP and SMVP.
  * Copyright (C) 2005 Mips Technologies, Inc
  */
 
@@ -57,9 +57,6 @@
 	int tc;
 	unsigned long haltval;
 	unsigned long tcstatval;
-#ifdef CONFIG_MIPS_MT_SMTC
-	void smtc_soft_dump(void);
-#endif /* CONFIG_MIPT_MT_SMTC */
 
 	local_irq_save(flags);
 	vpflags = dvpe();
@@ -116,9 +113,6 @@
 		if (!haltval)
 			write_tc_c0_tchalt(0);
 	}
-#ifdef CONFIG_MIPS_MT_SMTC
-	smtc_soft_dump();
-#endif /* CONFIG_MIPT_MT_SMTC */
 	printk("===========================\n");
 	evpe(vpflags);
 	local_irq_restore(flags);
@@ -295,21 +289,11 @@
 
 void mt_cflush_lockdown(void)
 {
-#ifdef CONFIG_MIPS_MT_SMTC
-	void smtc_cflush_lockdown(void);
-
-	smtc_cflush_lockdown();
-#endif /* CONFIG_MIPS_MT_SMTC */
 	/* FILL IN VSMP and AP/SP VERSIONS HERE */
 }
 
 void mt_cflush_release(void)
 {
-#ifdef CONFIG_MIPS_MT_SMTC
-	void smtc_cflush_release(void);
-
-	smtc_cflush_release();
-#endif /* CONFIG_MIPS_MT_SMTC */
 	/* FILL IN VSMP and AP/SP VERSIONS HERE */
 }
 

diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index 029e002..f654768 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S

@@ -10,24 +10,12 @@
  * Copyright (C) 2000 MIPS Technologies, Inc.
  *    written by Carsten Langgaard, carstenl@mips.com
  */
-#include <asm/asm.h>
-#include <asm/cachectl.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable-bits.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/thread_info.h>
 
-#include <asm/asmmacro.h>
-
-/*
- * Offset to the current process status flags, the first 32 bytes of the
- * stack are not used.
- */
-#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
-
+#define USE_ALTERNATE_RESUME_IMPL 1
+	.set push
+	.set arch=mips64r2
+#include "r4k_switch.S"
+	.set pop
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
  *		       struct thread_info *next_ti, int usedfpu)
@@ -40,6 +28,61 @@
 	cpu_save_nonscratch a0
 	LONG_S	ra, THREAD_REG31(a0)
 
+	/*
+	 * check if we need to save FPU registers
+	 */
+	PTR_L	t3, TASK_THREAD_INFO(a0)
+	LONG_L	t0, TI_FLAGS(t3)
+	li	t1, _TIF_USEDFPU
+	and	t2, t0, t1
+	beqz	t2, 1f
+	nor	t1, zero, t1
+
+	and	t0, t0, t1
+	LONG_S	t0, TI_FLAGS(t3)
+
+	/*
+	 * clear saved user stack CU1 bit
+	 */
+	LONG_L	t0, ST_OFF(t3)
+	li	t1, ~ST0_CU1
+	and	t0, t0, t1
+	LONG_S	t0, ST_OFF(t3)
+
+	.set push
+	.set arch=mips64r2
+	fpu_save_double a0 t0 t1		# c0_status passed in t0
+						# clobbers t1
+	.set pop
+1:
+
+	/* check if we need to save COP2 registers */
+	PTR_L	t2, TASK_THREAD_INFO(a0)
+	LONG_L	t0, ST_OFF(t2)
+	bbit0	t0, 30, 1f
+
+	/* Disable COP2 in the stored process state */
+	li	t1, ST0_CU2
+	xor	t0, t1
+	LONG_S	t0, ST_OFF(t2)
+
+	/* Enable COP2 so we can save it */
+	mfc0	t0, CP0_STATUS
+	or	t0, t1
+	mtc0	t0, CP0_STATUS
+
+	/* Save COP2 */
+	daddu	a0, THREAD_CP2
+	jal octeon_cop2_save
+	dsubu	a0, THREAD_CP2
+
+	/* Disable COP2 now that we are done */
+	mfc0	t0, CP0_STATUS
+	li	t1, ST0_CU2
+	xor	t0, t1
+	mtc0	t0, CP0_STATUS
+
+1:
 #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
 	/* Check if we need to store CVMSEG state */
 	mfc0	t0, $11,7	/* CvmMemCtl */
@@ -85,12 +128,7 @@
 	move	$28, a2
 	cpu_restore_nonscratch a1
 
-#if (_THREAD_SIZE - 32) < 0x8000
-	PTR_ADDIU	t0, $28, _THREAD_SIZE - 32
-#else
-	PTR_LI		t0, _THREAD_SIZE - 32
-	PTR_ADDU	t0, $28
-#endif
+	PTR_ADDU	t0, $28, _THREAD_SIZE - 32
 	set_saved_sp	t0, t1, t2
 
 	mfc0	t1, CP0_STATUS		/* Do we really need this? */

diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
new file mode 100644
index 0000000..5aa4c6f
--- /dev/null
+++ b/arch/mips/kernel/pm-cps.c

@@ -0,0 +1,716 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheops.h>
+#include <asm/idle.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/mipsmtregs.h>
+#include <asm/pm.h>
+#include <asm/pm-cps.h>
+#include <asm/smp-cps.h>
+#include <asm/uasm.h>
+
+/*
+ * cps_nc_entry_fn - type of a generated non-coherent state entry function
+ * @online: the count of online coupled VPEs
+ * @nc_ready_count: pointer to a non-coherent mapping of the core ready_count
+ *
+ * The code entering & exiting non-coherent states is generated at runtime
+ * using uasm, in order to ensure that the compiler cannot insert a stray
+ * memory access at an unfortunate time and to allow the generation of optimal
+ * core-specific code particularly for cache routines. If coupled_coherence
+ * is non-zero and this is the entry function for the CPS_PM_NC_WAIT state,
+ * returns the number of VPEs that were in the wait state at the point this
+ * VPE left it. Returns garbage if coupled_coherence is zero or this is not
+ * the entry function for CPS_PM_NC_WAIT.
+ */
+typedef unsigned (*cps_nc_entry_fn)(unsigned online, u32 *nc_ready_count);
+
+/*
+ * The entry point of the generated non-coherent idle state entry/exit
+ * functions. Actually per-core rather than per-CPU.
+ */
+static DEFINE_PER_CPU_READ_MOSTLY(cps_nc_entry_fn[CPS_PM_STATE_COUNT],
+				  nc_asm_enter);
+
+/* Bitmap indicating which states are supported by the system */
+DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT);
+
+/*
+ * Indicates the number of coupled VPEs ready to operate in a non-coherent
+ * state. Actually per-core rather than per-CPU.
+ */
+static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
+static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
+
+/* Indicates online CPUs coupled with the current CPU */
+static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
+
+/*
+ * Used to synchronize entry to deep idle states. Actually per-core rather
+ * than per-CPU.
+ */
+static DEFINE_PER_CPU_ALIGNED(atomic_t, pm_barrier);
+
+/* Saved CPU state across the CPS_PM_POWER_GATED state */
+DEFINE_PER_CPU_ALIGNED(struct mips_static_suspend_state, cps_cpu_state);
+
+/* A somewhat arbitrary number of labels & relocs for uasm */
+static struct uasm_label labels[32] __initdata;
+static struct uasm_reloc relocs[32] __initdata;
+
+/* CPU dependant sync types */
+static unsigned stype_intervention;
+static unsigned stype_memory;
+static unsigned stype_ordering;
+
+enum mips_reg {
+	zero, at, v0, v1, a0, a1, a2, a3,
+	t0, t1, t2, t3, t4, t5, t6, t7,
+	s0, s1, s2, s3, s4, s5, s6, s7,
+	t8, t9, k0, k1, gp, sp, fp, ra,
+};
+
+bool cps_pm_support_state(enum cps_pm_state state)
+{
+	return test_bit(state, state_support);
+}
+
+static void coupled_barrier(atomic_t *a, unsigned online)
+{
+	/*
+	 * This function is effectively the same as
+	 * cpuidle_coupled_parallel_barrier, which can't be used here since
+	 * there's no cpuidle device.
+	 */
+
+	if (!coupled_coherence)
+		return;
+
+	smp_mb__before_atomic_inc();
+	atomic_inc(a);
+
+	while (atomic_read(a) < online)
+		cpu_relax();
+
+	if (atomic_inc_return(a) == online * 2) {
+		atomic_set(a, 0);
+		return;
+	}
+
+	while (atomic_read(a) > online)
+		cpu_relax();
+}
+
+int cps_pm_enter_state(enum cps_pm_state state)
+{
+	unsigned cpu = smp_processor_id();
+	unsigned core = current_cpu_data.core;
+	unsigned online, left;
+	cpumask_t *coupled_mask = this_cpu_ptr(&online_coupled);
+	u32 *core_ready_count, *nc_core_ready_count;
+	void *nc_addr;
+	cps_nc_entry_fn entry;
+	struct core_boot_config *core_cfg;
+	struct vpe_boot_config *vpe_cfg;
+
+	/* Check that there is an entry function for this state */
+	entry = per_cpu(nc_asm_enter, core)[state];
+	if (!entry)
+		return -EINVAL;
+
+	/* Calculate which coupled CPUs (VPEs) are online */
+#ifdef CONFIG_MIPS_MT
+	if (cpu_online(cpu)) {
+		cpumask_and(coupled_mask, cpu_online_mask,
+			    &cpu_sibling_map[cpu]);
+		online = cpumask_weight(coupled_mask);
+		cpumask_clear_cpu(cpu, coupled_mask);
+	} else
+#endif
+	{
+		cpumask_clear(coupled_mask);
+		online = 1;
+	}
+
+	/* Setup the VPE to run mips_cps_pm_restore when started again */
+	if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
+		core_cfg = &mips_cps_core_bootcfg[core];
+		vpe_cfg = &core_cfg->vpe_config[current_cpu_data.vpe_id];
+		vpe_cfg->pc = (unsigned long)mips_cps_pm_restore;
+		vpe_cfg->gp = (unsigned long)current_thread_info();
+		vpe_cfg->sp = 0;
+	}
+
+	/* Indicate that this CPU might not be coherent */
+	cpumask_clear_cpu(cpu, &cpu_coherent_mask);
+	smp_mb__after_clear_bit();
+
+	/* Create a non-coherent mapping of the core ready_count */
+	core_ready_count = per_cpu(ready_count, core);
+	nc_addr = kmap_noncoherent(virt_to_page(core_ready_count),
+				   (unsigned long)core_ready_count);
+	nc_addr += ((unsigned long)core_ready_count & ~PAGE_MASK);
+	nc_core_ready_count = nc_addr;
+
+	/* Ensure ready_count is zero-initialised before the assembly runs */
+	ACCESS_ONCE(*nc_core_ready_count) = 0;
+	coupled_barrier(&per_cpu(pm_barrier, core), online);
+
+	/* Run the generated entry code */
+	left = entry(online, nc_core_ready_count);
+
+	/* Remove the non-coherent mapping of ready_count */
+	kunmap_noncoherent();
+
+	/* Indicate that this CPU is definitely coherent */
+	cpumask_set_cpu(cpu, &cpu_coherent_mask);
+
+	/*
+	 * If this VPE is the first to leave the non-coherent wait state then
+	 * it needs to wake up any coupled VPEs still running their wait
+	 * instruction so that they return to cpuidle, which can then complete
+	 * coordination between the coupled VPEs & provide the governor with
+	 * a chance to reflect on the length of time the VPEs were in the
+	 * idle state.
+	 */
+	if (coupled_coherence && (state == CPS_PM_NC_WAIT) && (left == online))
+		arch_send_call_function_ipi_mask(coupled_mask);
+
+	return 0;
+}
+
+static void __init cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
+					 struct uasm_reloc **pr,
+					 const struct cache_desc *cache,
+					 unsigned op, int lbl)
+{
+	unsigned cache_size = cache->ways << cache->waybit;
+	unsigned i;
+	const unsigned unroll_lines = 32;
+
+	/* If the cache isn't present this function has it easy */
+	if (cache->flags & MIPS_CACHE_NOT_PRESENT)
+		return;
+
+	/* Load base address */
+	UASM_i_LA(pp, t0, (long)CKSEG0);
+
+	/* Calculate end address */
+	if (cache_size < 0x8000)
+		uasm_i_addiu(pp, t1, t0, cache_size);
+	else
+		UASM_i_LA(pp, t1, (long)(CKSEG0 + cache_size));
+
+	/* Start of cache op loop */
+	uasm_build_label(pl, *pp, lbl);
+
+	/* Generate the cache ops */
+	for (i = 0; i < unroll_lines; i++)
+		uasm_i_cache(pp, op, i * cache->linesz, t0);
+
+	/* Update the base address */
+	uasm_i_addiu(pp, t0, t0, unroll_lines * cache->linesz);
+
+	/* Loop if we haven't reached the end address yet */
+	uasm_il_bne(pp, pr, t0, t1, lbl);
+	uasm_i_nop(pp);
+}
+
+static int __init cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
+				    struct uasm_reloc **pr,
+				    const struct cpuinfo_mips *cpu_info,
+				    int lbl)
+{
+	unsigned i, fsb_size = 8;
+	unsigned num_loads = (fsb_size * 3) / 2;
+	unsigned line_stride = 2;
+	unsigned line_size = cpu_info->dcache.linesz;
+	unsigned perf_counter, perf_event;
+	unsigned revision = cpu_info->processor_id & PRID_REV_MASK;
+
+	/*
+	 * Determine whether this CPU requires an FSB flush, and if so which
+	 * performance counter/event reflect stalls due to a full FSB.
+	 */
+	switch (__get_cpu_type(cpu_info->cputype)) {
+	case CPU_INTERAPTIV:
+		perf_counter = 1;
+		perf_event = 51;
+		break;
+
+	case CPU_PROAPTIV:
+		/* Newer proAptiv cores don't require this workaround */
+		if (revision >= PRID_REV_ENCODE_332(1, 1, 0))
+			return 0;
+
+		/* On older ones it's unavailable */
+		return -1;
+
+	/* CPUs which do not require the workaround */
+	case CPU_P5600:
+		return 0;
+
+	default:
+		WARN_ONCE(1, "pm-cps: FSB flush unsupported for this CPU\n");
+		return -1;
+	}
+
+	/*
+	 * Ensure that the fill/store buffer (FSB) is not holding the results
+	 * of a prefetch, since if it is then the CPC sequencer may become
+	 * stuck in the D3 (ClrBus) state whilst entering a low power state.
+	 */
+
+	/* Preserve perf counter setup */
+	uasm_i_mfc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+	uasm_i_mfc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */
+
+	/* Setup perf counter to count FSB full pipeline stalls */
+	uasm_i_addiu(pp, t0, zero, (perf_event << 5) | 0xf);
+	uasm_i_mtc0(pp, t0, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+	uasm_i_ehb(pp);
+	uasm_i_mtc0(pp, zero, 25, (perf_counter * 2) + 1); /* PerfCntN */
+	uasm_i_ehb(pp);
+
+	/* Base address for loads */
+	UASM_i_LA(pp, t0, (long)CKSEG0);
+
+	/* Start of clear loop */
+	uasm_build_label(pl, *pp, lbl);
+
+	/* Perform some loads to fill the FSB */
+	for (i = 0; i < num_loads; i++)
+		uasm_i_lw(pp, zero, i * line_size * line_stride, t0);
+
+	/*
+	 * Invalidate the new D-cache entries so that the cache will need
+	 * refilling (via the FSB) if the loop is executed again.
+	 */
+	for (i = 0; i < num_loads; i++) {
+		uasm_i_cache(pp, Hit_Invalidate_D,
+			     i * line_size * line_stride, t0);
+		uasm_i_cache(pp, Hit_Writeback_Inv_SD,
+			     i * line_size * line_stride, t0);
+	}
+
+	/* Completion barrier */
+	uasm_i_sync(pp, stype_memory);
+	uasm_i_ehb(pp);
+
+	/* Check whether the pipeline stalled due to the FSB being full */
+	uasm_i_mfc0(pp, t1, 25, (perf_counter * 2) + 1); /* PerfCntN */
+
+	/* Loop if it didn't */
+	uasm_il_beqz(pp, pr, t1, lbl);
+	uasm_i_nop(pp);
+
+	/* Restore perf counter 1. The count may well now be wrong... */
+	uasm_i_mtc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+	uasm_i_ehb(pp);
+	uasm_i_mtc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */
+	uasm_i_ehb(pp);
+
+	return 0;
+}
+
+static void __init cps_gen_set_top_bit(u32 **pp, struct uasm_label **pl,
+				       struct uasm_reloc **pr,
+				       unsigned r_addr, int lbl)
+{
+	uasm_i_lui(pp, t0, uasm_rel_hi(0x80000000));
+	uasm_build_label(pl, *pp, lbl);
+	uasm_i_ll(pp, t1, 0, r_addr);
+	uasm_i_or(pp, t1, t1, t0);
+	uasm_i_sc(pp, t1, 0, r_addr);
+	uasm_il_beqz(pp, pr, t1, lbl);
+	uasm_i_nop(pp);
+}
+
+static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
+{
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	u32 *buf, *p;
+	const unsigned r_online = a0;
+	const unsigned r_nc_count = a1;
+	const unsigned r_pcohctl = t7;
+	const unsigned max_instrs = 256;
+	unsigned cpc_cmd;
+	int err;
+	enum {
+		lbl_incready = 1,
+		lbl_poll_cont,
+		lbl_secondary_hang,
+		lbl_disable_coherence,
+		lbl_flush_fsb,
+		lbl_invicache,
+		lbl_flushdcache,
+		lbl_hang,
+		lbl_set_cont,
+		lbl_secondary_cont,
+		lbl_decready,
+	};
+
+	/* Allocate a buffer to hold the generated code */
+	p = buf = kcalloc(max_instrs, sizeof(u32), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	/* Clear labels & relocs ready for (re)use */
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
+		/*
+		 * Save CPU state. Note the non-standard calling convention
+		 * with the return address placed in v0 to avoid clobbering
+		 * the ra register before it is saved.
+		 */
+		UASM_i_LA(&p, t0, (long)mips_cps_pm_save);
+		uasm_i_jalr(&p, v0, t0);
+		uasm_i_nop(&p);
+	}
+
+	/*
+	 * Load addresses of required CM & CPC registers. This is done early
+	 * because they're needed in both the enable & disable coherence steps
+	 * but in the coupled case the enable step will only run on one VPE.
+	 */
+	UASM_i_LA(&p, r_pcohctl, (long)addr_gcr_cl_coherence());
+
+	if (coupled_coherence) {
+		/* Increment ready_count */
+		uasm_i_sync(&p, stype_ordering);
+		uasm_build_label(&l, p, lbl_incready);
+		uasm_i_ll(&p, t1, 0, r_nc_count);
+		uasm_i_addiu(&p, t2, t1, 1);
+		uasm_i_sc(&p, t2, 0, r_nc_count);
+		uasm_il_beqz(&p, &r, t2, lbl_incready);
+		uasm_i_addiu(&p, t1, t1, 1);
+
+		/* Ordering barrier */
+		uasm_i_sync(&p, stype_ordering);
+
+		/*
+		 * If this is the last VPE to become ready for non-coherence
+		 * then it should branch below.
+		 */
+		uasm_il_beq(&p, &r, t1, r_online, lbl_disable_coherence);
+		uasm_i_nop(&p);
+
+		if (state < CPS_PM_POWER_GATED) {
+			/*
+			 * Otherwise this is not the last VPE to become ready
+			 * for non-coherence. It needs to wait until coherence
+			 * has been disabled before proceeding, which it will do
+			 * by polling for the top bit of ready_count being set.
+			 */
+			uasm_i_addiu(&p, t1, zero, -1);
+			uasm_build_label(&l, p, lbl_poll_cont);
+			uasm_i_lw(&p, t0, 0, r_nc_count);
+			uasm_il_bltz(&p, &r, t0, lbl_secondary_cont);
+			uasm_i_ehb(&p);
+			uasm_i_yield(&p, zero, t1);
+			uasm_il_b(&p, &r, lbl_poll_cont);
+			uasm_i_nop(&p);
+		} else {
+			/*
+			 * The core will lose power & this VPE will not continue
+			 * so it can simply halt here.
+			 */
+			uasm_i_addiu(&p, t0, zero, TCHALT_H);
+			uasm_i_mtc0(&p, t0, 2, 4);
+			uasm_build_label(&l, p, lbl_secondary_hang);
+			uasm_il_b(&p, &r, lbl_secondary_hang);
+			uasm_i_nop(&p);
+		}
+	}
+
+	/*
+	 * This is the point of no return - this VPE will now proceed to
+	 * disable coherence. At this point we *must* be sure that no other
+	 * VPE within the core will interfere with the L1 dcache.
+	 */
+	uasm_build_label(&l, p, lbl_disable_coherence);
+
+	/* Invalidate the L1 icache */
+	cps_gen_cache_routine(&p, &l, &r, &cpu_data[cpu].icache,
+			      Index_Invalidate_I, lbl_invicache);
+
+	/* Writeback & invalidate the L1 dcache */
+	cps_gen_cache_routine(&p, &l, &r, &cpu_data[cpu].dcache,
+			      Index_Writeback_Inv_D, lbl_flushdcache);
+
+	/* Completion barrier */
+	uasm_i_sync(&p, stype_memory);
+	uasm_i_ehb(&p);
+
+	/*
+	 * Disable all but self interventions. The load from COHCTL is defined
+	 * by the interAptiv & proAptiv SUMs as ensuring that the operation
+	 * resulting from the preceeding store is complete.
+	 */
+	uasm_i_addiu(&p, t0, zero, 1 << cpu_data[cpu].core);
+	uasm_i_sw(&p, t0, 0, r_pcohctl);
+	uasm_i_lw(&p, t0, 0, r_pcohctl);
+
+	/* Sync to ensure previous interventions are complete */
+	uasm_i_sync(&p, stype_intervention);
+	uasm_i_ehb(&p);
+
+	/* Disable coherence */
+	uasm_i_sw(&p, zero, 0, r_pcohctl);
+	uasm_i_lw(&p, t0, 0, r_pcohctl);
+
+	if (state >= CPS_PM_CLOCK_GATED) {
+		err = cps_gen_flush_fsb(&p, &l, &r, &cpu_data[cpu],
+					lbl_flush_fsb);
+		if (err)
+			goto out_err;
+
+		/* Determine the CPC command to issue */
+		switch (state) {
+		case CPS_PM_CLOCK_GATED:
+			cpc_cmd = CPC_Cx_CMD_CLOCKOFF;
+			break;
+		case CPS_PM_POWER_GATED:
+			cpc_cmd = CPC_Cx_CMD_PWRDOWN;
+			break;
+		default:
+			BUG();
+			goto out_err;
+		}
+
+		/* Issue the CPC command */
+		UASM_i_LA(&p, t0, (long)addr_cpc_cl_cmd());
+		uasm_i_addiu(&p, t1, zero, cpc_cmd);
+		uasm_i_sw(&p, t1, 0, t0);
+
+		if (state == CPS_PM_POWER_GATED) {
+			/* If anything goes wrong just hang */
+			uasm_build_label(&l, p, lbl_hang);
+			uasm_il_b(&p, &r, lbl_hang);
+			uasm_i_nop(&p);
+
+			/*
+			 * There's no point generating more code, the core is
+			 * powered down & if powered back up will run from the
+			 * reset vector not from here.
+			 */
+			goto gen_done;
+		}
+
+		/* Completion barrier */
+		uasm_i_sync(&p, stype_memory);
+		uasm_i_ehb(&p);
+	}
+
+	if (state == CPS_PM_NC_WAIT) {
+		/*
+		 * At this point it is safe for all VPEs to proceed with
+		 * execution. This VPE will set the top bit of ready_count
+		 * to indicate to the other VPEs that they may continue.
+		 */
+		if (coupled_coherence)
+			cps_gen_set_top_bit(&p, &l, &r, r_nc_count,
+					    lbl_set_cont);
+
+		/*
+		 * VPEs which did not disable coherence will continue
+		 * executing, after coherence has been disabled, from this
+		 * point.
+		 */
+		uasm_build_label(&l, p, lbl_secondary_cont);
+
+		/* Now perform our wait */
+		uasm_i_wait(&p, 0);
+	}
+
+	/*
+	 * Re-enable coherence. Note that for CPS_PM_NC_WAIT all coupled VPEs
+	 * will run this. The first will actually re-enable coherence & the
+	 * rest will just be performing a rather unusual nop.
+	 */
+	uasm_i_addiu(&p, t0, zero, CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK);
+	uasm_i_sw(&p, t0, 0, r_pcohctl);
+	uasm_i_lw(&p, t0, 0, r_pcohctl);
+
+	/* Completion barrier */
+	uasm_i_sync(&p, stype_memory);
+	uasm_i_ehb(&p);
+
+	if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
+		/* Decrement ready_count */
+		uasm_build_label(&l, p, lbl_decready);
+		uasm_i_sync(&p, stype_ordering);
+		uasm_i_ll(&p, t1, 0, r_nc_count);
+		uasm_i_addiu(&p, t2, t1, -1);
+		uasm_i_sc(&p, t2, 0, r_nc_count);
+		uasm_il_beqz(&p, &r, t2, lbl_decready);
+		uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
+
+		/* Ordering barrier */
+		uasm_i_sync(&p, stype_ordering);
+	}
+
+	if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
+		/*
+		 * At this point it is safe for all VPEs to proceed with
+		 * execution. This VPE will set the top bit of ready_count
+		 * to indicate to the other VPEs that they may continue.
+		 */
+		cps_gen_set_top_bit(&p, &l, &r, r_nc_count, lbl_set_cont);
+
+		/*
+		 * This core will be reliant upon another core sending a
+		 * power-up command to the CPC in order to resume operation.
+		 * Thus an arbitrary VPE can't trigger the core leaving the
+		 * idle state and the one that disables coherence might as well
+		 * be the one to re-enable it. The rest will continue from here
+		 * after that has been done.
+		 */
+		uasm_build_label(&l, p, lbl_secondary_cont);
+
+		/* Ordering barrier */
+		uasm_i_sync(&p, stype_ordering);
+	}
+
+	/* The core is coherent, time to return to C code */
+	uasm_i_jr(&p, ra);
+	uasm_i_nop(&p);
+
+gen_done:
+	/* Ensure the code didn't exceed the resources allocated for it */
+	BUG_ON((p - buf) > max_instrs);
+	BUG_ON((l - labels) > ARRAY_SIZE(labels));
+	BUG_ON((r - relocs) > ARRAY_SIZE(relocs));
+
+	/* Patch branch offsets */
+	uasm_resolve_relocs(relocs, labels);
+
+	/* Flush the icache */
+	local_flush_icache_range((unsigned long)buf, (unsigned long)p);
+
+	return buf;
+out_err:
+	kfree(buf);
+	return NULL;
+}
+
+static int __init cps_gen_core_entries(unsigned cpu)
+{
+	enum cps_pm_state state;
+	unsigned core = cpu_data[cpu].core;
+	unsigned dlinesz = cpu_data[cpu].dcache.linesz;
+	void *entry_fn, *core_rc;
+
+	for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
+		if (per_cpu(nc_asm_enter, core)[state])
+			continue;
+		if (!test_bit(state, state_support))
+			continue;
+
+		entry_fn = cps_gen_entry_code(cpu, state);
+		if (!entry_fn) {
+			pr_err("Failed to generate core %u state %u entry\n",
+			       core, state);
+			clear_bit(state, state_support);
+		}
+
+		per_cpu(nc_asm_enter, core)[state] = entry_fn;
+	}
+
+	if (!per_cpu(ready_count, core)) {
+		core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+		if (!core_rc) {
+			pr_err("Failed allocate core %u ready_count\n", core);
+			return -ENOMEM;
+		}
+		per_cpu(ready_count_alloc, core) = core_rc;
+
+		/* Ensure ready_count is aligned to a cacheline boundary */
+		core_rc += dlinesz - 1;
+		core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
+		per_cpu(ready_count, core) = core_rc;
+	}
+
+	return 0;
+}
+
+static int __init cps_pm_init(void)
+{
+	unsigned cpu;
+	int err;
+
+	/* Detect appropriate sync types for the system */
+	switch (current_cpu_data.cputype) {
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
+	case CPU_M5150:
+	case CPU_P5600:
+		stype_intervention = 0x2;
+		stype_memory = 0x3;
+		stype_ordering = 0x10;
+		break;
+
+	default:
+		pr_warn("Power management is using heavyweight sync 0\n");
+	}
+
+	/* A CM is required for all non-coherent states */
+	if (!mips_cm_present()) {
+		pr_warn("pm-cps: no CM, non-coherent states unavailable\n");
+		goto out;
+	}
+
+	/*
+	 * If interrupts were enabled whilst running a wait instruction on a
+	 * non-coherent core then the VPE may end up processing interrupts
+	 * whilst non-coherent. That would be bad.
+	 */
+	if (cpu_wait == r4k_wait_irqoff)
+		set_bit(CPS_PM_NC_WAIT, state_support);
+	else
+		pr_warn("pm-cps: non-coherent wait unavailable\n");
+
+	/* Detect whether a CPC is present */
+	if (mips_cpc_present()) {
+		/* Detect whether clock gating is implemented */
+		if (read_cpc_cl_stat_conf() & CPC_Cx_STAT_CONF_CLKGAT_IMPL_MSK)
+			set_bit(CPS_PM_CLOCK_GATED, state_support);
+		else
+			pr_warn("pm-cps: CPC does not support clock gating\n");
+
+		/* Power gating is available with CPS SMP & any CPC */
+		if (mips_cps_smp_in_use())
+			set_bit(CPS_PM_POWER_GATED, state_support);
+		else
+			pr_warn("pm-cps: CPS SMP not in use, power gating unavailable\n");
+	} else {
+		pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
+	}
+
+	for_each_present_cpu(cpu) {
+		err = cps_gen_core_entries(cpu);
+		if (err)
+			return err;
+	}
+out:
+	return 0;
+}
+arch_initcall(cps_pm_init);

diff --git a/arch/mips/kernel/pm.c b/arch/mips/kernel/pm.c
new file mode 100644
index 0000000..fefdf39
--- /dev/null
+++ b/arch/mips/kernel/pm.c

@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute	it and/or modify it
+ * under  the terms of	the GNU General	 Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * CPU PM notifiers for saving/restoring general CPU state.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/init.h>
+
+#include <asm/dsp.h>
+#include <asm/fpu.h>
+#include <asm/mmu_context.h>
+#include <asm/pm.h>
+#include <asm/watch.h>
+
+/* Used by PM helper macros in asm/pm.h */
+struct mips_static_suspend_state mips_static_suspend_state;
+
+/**
+ * mips_cpu_save() - Save general CPU state.
+ * Ensures that general CPU context is saved, notably FPU and DSP.
+ */
+static int mips_cpu_save(void)
+{
+	/* Save FPU state */
+	lose_fpu(1);
+
+	/* Save DSP state */
+	save_dsp(current);
+
+	return 0;
+}
+
+/**
+ * mips_cpu_restore() - Restore general CPU state.
+ * Restores important CPU context.
+ */
+static void mips_cpu_restore(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	/* Restore ASID */
+	if (current->mm)
+		write_c0_entryhi(cpu_asid(cpu, current->mm));
+
+	/* Restore DSP state */
+	restore_dsp(current);
+
+	/* Restore UserLocal */
+	if (cpu_has_userlocal)
+		write_c0_userlocal(current_thread_info()->tp_value);
+
+	/* Restore watch registers */
+	__restore_watch();
+}
+
+/**
+ * mips_pm_notifier() - Notifier for preserving general CPU context.
+ * @self:	Notifier block.
+ * @cmd:	CPU PM event.
+ * @v:		Private data (unused).
+ *
+ * This is called when a CPU power management event occurs, and is used to
+ * ensure that important CPU context is preserved across a CPU power down.
+ */
+static int mips_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			    void *v)
+{
+	int ret;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		ret = mips_cpu_save();
+		if (ret)
+			return NOTIFY_STOP;
+		break;
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		mips_cpu_restore();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mips_pm_notifier_block = {
+	.notifier_call = mips_pm_notifier,
+};
+
+static int __init mips_pm_init(void)
+{
+	return cpu_pm_register_notifier(&mips_pm_notifier_block);
+}
+arch_initcall(mips_pm_init);

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 60e39dc..0a1ec0f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c

@@ -140,13 +140,6 @@
 	 */
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC restores TCStatus after Status, and the CU bits
-	 * are aliased there.
-	 */
-	childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
-#endif
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
 #ifdef CONFIG_MIPS_MT_FPAFF

diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index abacac7..81ca3f7 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S

@@ -28,6 +28,7 @@
  */
 #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
 
+#ifndef USE_ALTERNATE_RESUME_IMPL
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
  *		       struct thread_info *next_ti, s32 fp_save)
@@ -87,18 +88,6 @@
 
 	PTR_ADDU	t0, $28, _THREAD_SIZE - 32
 	set_saved_sp	t0, t1, t2
-#ifdef CONFIG_MIPS_MT_SMTC
-	/* Read-modify-writes of Status must be atomic on a VPE */
-	mfc0	t2, CP0_TCSTATUS
-	ori	t1, t2, TCSTATUS_IXMT
-	mtc0	t1, CP0_TCSTATUS
-	andi	t2, t2, TCSTATUS_IXMT
-	_ehb
-	DMT	8				# dmt	t0
-	move	t1,ra
-	jal	mips_ihb
-	move	ra,t1
-#endif /* CONFIG_MIPS_MT_SMTC */
 	mfc0	t1, CP0_STATUS		/* Do we really need this? */
 	li	a3, 0xff01
 	and	t1, a3
@@ -107,22 +96,12 @@
 	and	a2, a3
 	or	a2, t1
 	mtc0	a2, CP0_STATUS
-#ifdef CONFIG_MIPS_MT_SMTC
-	_ehb
-	andi	t0, t0, VPECONTROL_TE
-	beqz	t0, 1f
-	emt
-1:
-	mfc0	t1, CP0_TCSTATUS
-	xori	t1, t1, TCSTATUS_IXMT
-	or	t1, t1, t2
-	mtc0	t1, CP0_TCSTATUS
-	_ehb
-#endif /* CONFIG_MIPS_MT_SMTC */
 	move	v0, a0
 	jr	ra
 	END(resume)
 
+#endif /* USE_ALTERNATE_RESUME_IMPL */
+
 /*
  * Save a thread's fp context.
  */
@@ -176,19 +155,10 @@
 #define FPU_DEFAULT  0x00000000
 
 LEAF(_init_fpu)
-#ifdef CONFIG_MIPS_MT_SMTC
-	/* Rather than manipulate per-VPE Status, set per-TC bit in TCStatus */
-	mfc0	t0, CP0_TCSTATUS
-	/* Bit position is the same for Status, TCStatus */
-	li	t1, ST0_CU1
-	or	t0, t1
-	mtc0	t0, CP0_TCSTATUS
-#else /* Normal MIPS CU1 enable */
 	mfc0	t0, CP0_STATUS
 	li	t1, ST0_CU1
 	or	t0, t1
 	mtc0	t0, CP0_STATUS
-#endif /* CONFIG_MIPS_MT_SMTC */
 	enable_fpu_hazard
 
 	li	t1, FPU_DEFAULT

diff --git a/arch/mips/kernel/rtlx-mt.c b/arch/mips/kernel/rtlx-mt.c
index 9c1aca0..5a66b97 100644
--- a/arch/mips/kernel/rtlx-mt.c
+++ b/arch/mips/kernel/rtlx-mt.c

@@ -36,7 +36,6 @@
 	unsigned long flags;
 	int i;
 
-	/* Ought not to be strictly necessary for SMTC builds */
 	local_irq_save(flags);
 	vpeflags = dvpe();
 	set_c0_status(0x100 << MIPS_CPU_RTLX_IRQ);

diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index ea4c2dc..df9e2bd 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c

@@ -281,13 +281,6 @@
 }
 
 /*
- * Runs on CPU0 after all CPUs have been booted
- */
-static void bmips_cpus_done(void)
-{
-}
-
-/*
  * BMIPS5000 raceless IPIs
  *
  * Each CPU has two inbound SW IRQs which are independent of all other CPUs.
@@ -434,7 +427,6 @@
 	.boot_secondary		= bmips_boot_secondary,
 	.smp_finish		= bmips_smp_finish,
 	.init_secondary		= bmips_init_secondary,
-	.cpus_done		= bmips_cpus_done,
 	.send_ipi_single	= bmips43xx_send_ipi_single,
 	.send_ipi_mask		= bmips43xx_send_ipi_mask,
 #ifdef CONFIG_HOTPLUG_CPU
@@ -449,7 +441,6 @@
 	.boot_secondary		= bmips_boot_secondary,
 	.smp_finish		= bmips_smp_finish,
 	.init_secondary		= bmips_init_secondary,
-	.cpus_done		= bmips_cpus_done,
 	.send_ipi_single	= bmips5000_send_ipi_single,
 	.send_ipi_mask		= bmips5000_send_ipi_mask,
 #ifdef CONFIG_HOTPLUG_CPU

diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index 3ef55fb7..fc8a515 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c

@@ -49,14 +49,11 @@
 
 	/* Enable per-cpu interrupts: platform specific */
 
-#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_MIPS_MT_SMP
 	if (cpu_has_mipsmt)
 		c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) &
 			TCBIND_CURVPE;
 #endif
-#ifdef CONFIG_MIPS_MT_SMTC
-	c->tc_id  = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT;
-#endif
 }
 
 static void cmp_smp_finish(void)
@@ -75,11 +72,6 @@
 	local_irq_enable();
 }
 
-static void cmp_cpus_done(void)
-{
-	pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
-}
-
 /*
  * Setup the PC, SP, and GP of a secondary processor and start it running
  * smp_bootstrap is the place to resume from
@@ -135,10 +127,6 @@
 		unsigned int mvpconf0 = read_c0_mvpconf0();
 
 		nvpe = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-#elif defined(CONFIG_MIPS_MT_SMTC)
-		unsigned int mvpconf0 = read_c0_mvpconf0();
-
-		nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
 #endif
 		smp_num_siblings = nvpe;
 	}
@@ -165,7 +153,6 @@
 	.send_ipi_mask		= gic_send_ipi_mask,
 	.init_secondary		= cmp_init_secondary,
 	.smp_finish		= cmp_smp_finish,
-	.cpus_done		= cmp_cpus_done,
 	.boot_secondary		= cmp_boot_secondary,
 	.smp_setup		= cmp_smp_setup,
 	.prepare_cpus		= cmp_prepare_cpus,

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 536eec0..df0598d 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c

@@ -20,104 +20,43 @@
 #include <asm/mips-cpc.h>
 #include <asm/mips_mt.h>
 #include <asm/mipsregs.h>
+#include <asm/pm-cps.h>
 #include <asm/smp-cps.h>
 #include <asm/time.h>
 #include <asm/uasm.h>
 
 static DECLARE_BITMAP(core_power, NR_CPUS);
 
-struct boot_config mips_cps_bootcfg;
+struct core_boot_config *mips_cps_core_bootcfg;
 
-static void init_core(void)
+static unsigned core_vpe_count(unsigned core)
 {
-	unsigned int nvpes, t;
-	u32 mvpconf0, vpeconf0, vpecontrol, tcstatus, tcbind, status;
+	unsigned cfg;
 
-	if (!cpu_has_mipsmt)
-		return;
+	if (!config_enabled(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt)
+		return 1;
 
-	/* Enter VPE configuration state */
-	dvpe();
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	/* Retrieve the count of VPEs in this core */
-	mvpconf0 = read_c0_mvpconf0();
-	nvpes = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-	smp_num_siblings = nvpes;
-
-	for (t = 1; t < nvpes; t++) {
-		/* Use a 1:1 mapping of TC index to VPE index */
-		settc(t);
-
-		/* Bind 1 TC to this VPE */
-		tcbind = read_tc_c0_tcbind();
-		tcbind &= ~TCBIND_CURVPE;
-		tcbind |= t << TCBIND_CURVPE_SHIFT;
-		write_tc_c0_tcbind(tcbind);
-
-		/* Set exclusive TC, non-active, master */
-		vpeconf0 = read_vpe_c0_vpeconf0();
-		vpeconf0 &= ~(VPECONF0_XTC | VPECONF0_VPA);
-		vpeconf0 |= t << VPECONF0_XTC_SHIFT;
-		vpeconf0 |= VPECONF0_MVP;
-		write_vpe_c0_vpeconf0(vpeconf0);
-
-		/* Declare TC non-active, non-allocatable & interrupt exempt */
-		tcstatus = read_tc_c0_tcstatus();
-		tcstatus &= ~(TCSTATUS_A | TCSTATUS_DA);
-		tcstatus |= TCSTATUS_IXMT;
-		write_tc_c0_tcstatus(tcstatus);
-
-		/* Halt the TC */
-		write_tc_c0_tchalt(TCHALT_H);
-
-		/* Allow only 1 TC to execute */
-		vpecontrol = read_vpe_c0_vpecontrol();
-		vpecontrol &= ~VPECONTROL_TE;
-		write_vpe_c0_vpecontrol(vpecontrol);
-
-		/* Copy (most of) Status from VPE 0 */
-		status = read_c0_status();
-		status &= ~(ST0_IM | ST0_IE | ST0_KSU);
-		status |= ST0_CU0;
-		write_vpe_c0_status(status);
-
-		/* Copy Config from VPE 0 */
-		write_vpe_c0_config(read_c0_config());
-		write_vpe_c0_config7(read_c0_config7());
-
-		/* Ensure no software interrupts are pending */
-		write_vpe_c0_cause(0);
-
-		/* Sync Count */
-		write_vpe_c0_count(read_c0_count());
-	}
-
-	/* Leave VPE configuration state */
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+	write_gcr_cl_other(core << CM_GCR_Cx_OTHER_CORENUM_SHF);
+	cfg = read_gcr_co_config() & CM_GCR_Cx_CONFIG_PVPE_MSK;
+	return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1;
 }
 
 static void __init cps_smp_setup(void)
 {
 	unsigned int ncores, nvpes, core_vpes;
 	int c, v;
-	u32 core_cfg, *entry_code;
 
 	/* Detect & record VPE topology */
 	ncores = mips_cm_numcores();
 	pr_info("VPE topology ");
 	for (c = nvpes = 0; c < ncores; c++) {
-		if (cpu_has_mipsmt && config_enabled(CONFIG_MIPS_MT_SMP)) {
-			write_gcr_cl_other(c << CM_GCR_Cx_OTHER_CORENUM_SHF);
-			core_cfg = read_gcr_co_config();
-			core_vpes = ((core_cfg & CM_GCR_Cx_CONFIG_PVPE_MSK) >>
-				     CM_GCR_Cx_CONFIG_PVPE_SHF) + 1;
-		} else {
-			core_vpes = 1;
-		}
-
+		core_vpes = core_vpe_count(c);
 		pr_cont("%c%u", c ? ',' : '{', core_vpes);
 
+		/* Use the number of VPEs in core 0 for smp_num_siblings */
+		if (!c)
+			smp_num_siblings = core_vpes;
+
 		for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
 			cpu_data[nvpes + v].core = c;
 #ifdef CONFIG_MIPS_MT_SMP
@@ -137,19 +76,14 @@
 		__cpu_logical_map[v] = v;
 	}
 
+	/* Set a coherent default CCA (CWB) */
+	change_c0_config(CONF_CM_CMASK, 0x5);
+
 	/* Core 0 is powered up (we're running on it) */
 	bitmap_set(core_power, 0, 1);
 
-	/* Disable MT - we only want to run 1 TC per VPE */
-	if (cpu_has_mipsmt)
-		dmt();
-
 	/* Initialise core 0 */
-	init_core();
-
-	/* Patch the start of mips_cps_core_entry to provide the CM base */
-	entry_code = (u32 *)&mips_cps_core_entry;
-	UASM_i_LA(&entry_code, 3, (long)mips_cm_base);
+	mips_cps_core_init();
 
 	/* Make core 0 coherent with everything */
 	write_gcr_cl_coherence(0xff);
@@ -157,15 +91,99 @@
 
 static void __init cps_prepare_cpus(unsigned int max_cpus)
 {
+	unsigned ncores, core_vpes, c, cca;
+	bool cca_unsuitable;
+	u32 *entry_code;
+
 	mips_mt_set_cpuoptions();
+
+	/* Detect whether the CCA is unsuited to multi-core SMP */
+	cca = read_c0_config() & CONF_CM_CMASK;
+	switch (cca) {
+	case 0x4: /* CWBE */
+	case 0x5: /* CWB */
+		/* The CCA is coherent, multi-core is fine */
+		cca_unsuitable = false;
+		break;
+
+	default:
+		/* CCA is not coherent, multi-core is not usable */
+		cca_unsuitable = true;
+	}
+
+	/* Warn the user if the CCA prevents multi-core */
+	ncores = mips_cm_numcores();
+	if (cca_unsuitable && ncores > 1) {
+		pr_warn("Using only one core due to unsuitable CCA 0x%x\n",
+			cca);
+
+		for_each_present_cpu(c) {
+			if (cpu_data[c].core)
+				set_cpu_present(c, false);
+		}
+	}
+
+	/*
+	 * Patch the start of mips_cps_core_entry to provide:
+	 *
+	 * v0 = CM base address
+	 * s0 = kseg0 CCA
+	 */
+	entry_code = (u32 *)&mips_cps_core_entry;
+	UASM_i_LA(&entry_code, 3, (long)mips_cm_base);
+	uasm_i_addiu(&entry_code, 16, 0, cca);
+	dma_cache_wback_inv((unsigned long)&mips_cps_core_entry,
+			    (void *)entry_code - (void *)&mips_cps_core_entry);
+
+	/* Allocate core boot configuration structs */
+	mips_cps_core_bootcfg = kcalloc(ncores, sizeof(*mips_cps_core_bootcfg),
+					GFP_KERNEL);
+	if (!mips_cps_core_bootcfg) {
+		pr_err("Failed to allocate boot config for %u cores\n", ncores);
+		goto err_out;
+	}
+
+	/* Allocate VPE boot configuration structs */
+	for (c = 0; c < ncores; c++) {
+		core_vpes = core_vpe_count(c);
+		mips_cps_core_bootcfg[c].vpe_config = kcalloc(core_vpes,
+				sizeof(*mips_cps_core_bootcfg[c].vpe_config),
+				GFP_KERNEL);
+		if (!mips_cps_core_bootcfg[c].vpe_config) {
+			pr_err("Failed to allocate %u VPE boot configs\n",
+			       core_vpes);
+			goto err_out;
+		}
+	}
+
+	/* Mark this CPU as booted */
+	atomic_set(&mips_cps_core_bootcfg[current_cpu_data.core].vpe_mask,
+		   1 << cpu_vpe_id(&current_cpu_data));
+
+	return;
+err_out:
+	/* Clean up allocations */
+	if (mips_cps_core_bootcfg) {
+		for (c = 0; c < ncores; c++)
+			kfree(mips_cps_core_bootcfg[c].vpe_config);
+		kfree(mips_cps_core_bootcfg);
+		mips_cps_core_bootcfg = NULL;
+	}
+
+	/* Effectively disable SMP by declaring CPUs not present */
+	for_each_possible_cpu(c) {
+		if (c == 0)
+			continue;
+		set_cpu_present(c, false);
+	}
 }
 
-static void boot_core(struct boot_config *cfg)
+static void boot_core(unsigned core)
 {
 	u32 access;
 
 	/* Select the appropriate core */
-	write_gcr_cl_other(cfg->core << CM_GCR_Cx_OTHER_CORENUM_SHF);
+	write_gcr_cl_other(core << CM_GCR_Cx_OTHER_CORENUM_SHF);
 
 	/* Set its reset vector */
 	write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
@@ -175,104 +193,74 @@
 
 	/* Ensure the core can access the GCRs */
 	access = read_gcr_access();
-	access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + cfg->core);
+	access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + core);
 	write_gcr_access(access);
 
-	/* Copy cfg */
-	mips_cps_bootcfg = *cfg;
-
 	if (mips_cpc_present()) {
-		/* Select the appropriate core */
-		write_cpc_cl_other(cfg->core << CPC_Cx_OTHER_CORENUM_SHF);
-
 		/* Reset the core */
+		mips_cpc_lock_other(core);
 		write_cpc_co_cmd(CPC_Cx_CMD_RESET);
+		mips_cpc_unlock_other();
 	} else {
 		/* Take the core out of reset */
 		write_gcr_co_reset_release(0);
 	}
 
 	/* The core is now powered up */
-	bitmap_set(core_power, cfg->core, 1);
+	bitmap_set(core_power, core, 1);
 }
 
-static void boot_vpe(void *info)
+static void remote_vpe_boot(void *dummy)
 {
-	struct boot_config *cfg = info;
-	u32 tcstatus, vpeconf0;
-
-	/* Enter VPE configuration state */
-	dvpe();
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	settc(cfg->vpe);
-
-	/* Set the TC restart PC */
-	write_tc_c0_tcrestart((unsigned long)&smp_bootstrap);
-
-	/* Activate the TC, allow interrupts */
-	tcstatus = read_tc_c0_tcstatus();
-	tcstatus &= ~TCSTATUS_IXMT;
-	tcstatus |= TCSTATUS_A;
-	write_tc_c0_tcstatus(tcstatus);
-
-	/* Clear the TC halt bit */
-	write_tc_c0_tchalt(0);
-
-	/* Activate the VPE */
-	vpeconf0 = read_vpe_c0_vpeconf0();
-	vpeconf0 |= VPECONF0_VPA;
-	write_vpe_c0_vpeconf0(vpeconf0);
-
-	/* Set the stack & global pointer registers */
-	write_tc_gpr_sp(cfg->sp);
-	write_tc_gpr_gp(cfg->gp);
-
-	/* Leave VPE configuration state */
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	/* Enable other VPEs to execute */
-	evpe(EVPE_ENABLE);
+	mips_cps_boot_vpes();
 }
 
 static void cps_boot_secondary(int cpu, struct task_struct *idle)
 {
-	struct boot_config cfg;
+	unsigned core = cpu_data[cpu].core;
+	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
+	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
 	unsigned int remote;
 	int err;
 
-	cfg.core = cpu_data[cpu].core;
-	cfg.vpe = cpu_vpe_id(&cpu_data[cpu]);
-	cfg.pc = (unsigned long)&smp_bootstrap;
-	cfg.sp = __KSTK_TOS(idle);
-	cfg.gp = (unsigned long)task_thread_info(idle);
+	vpe_cfg->pc = (unsigned long)&smp_bootstrap;
+	vpe_cfg->sp = __KSTK_TOS(idle);
+	vpe_cfg->gp = (unsigned long)task_thread_info(idle);
 
-	if (!test_bit(cfg.core, core_power)) {
+	atomic_or(1 << cpu_vpe_id(&cpu_data[cpu]), &core_cfg->vpe_mask);
+
+	preempt_disable();
+
+	if (!test_bit(core, core_power)) {
 		/* Boot a VPE on a powered down core */
-		boot_core(&cfg);
-		return;
+		boot_core(core);
+		goto out;
 	}
 
-	if (cfg.core != current_cpu_data.core) {
+	if (core != current_cpu_data.core) {
 		/* Boot a VPE on another powered up core */
 		for (remote = 0; remote < NR_CPUS; remote++) {
-			if (cpu_data[remote].core != cfg.core)
+			if (cpu_data[remote].core != core)
 				continue;
 			if (cpu_online(remote))
 				break;
 		}
 		BUG_ON(remote >= NR_CPUS);
 
-		err = smp_call_function_single(remote, boot_vpe, &cfg, 1);
+		err = smp_call_function_single(remote, remote_vpe_boot,
+					       NULL, 1);
 		if (err)
 			panic("Failed to call remote CPU\n");
-		return;
+		goto out;
 	}
 
 	BUG_ON(!cpu_has_mipsmt);
 
 	/* Boot a VPE on this core */
-	boot_vpe(&cfg);
+	mips_cps_boot_vpes();
+out:
+	preempt_enable();
 }
 
 static void cps_init_secondary(void)
@@ -281,10 +269,6 @@
 	if (cpu_has_mipsmt)
 		dmt();
 
-	/* TODO: revisit this assumption once hotplug is implemented */
-	if (cpu_vpe_id(&current_cpu_data) == 0)
-		init_core();
-
 	change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 |
 				 STATUSF_IP6 | STATUSF_IP7);
 }
@@ -302,10 +286,148 @@
 	local_irq_enable();
 }
 
-static void cps_cpus_done(void)
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int cps_cpu_disable(void)
 {
+	unsigned cpu = smp_processor_id();
+	struct core_boot_config *core_cfg;
+
+	if (!cpu)
+		return -EBUSY;
+
+	if (!cps_pm_support_state(CPS_PM_POWER_GATED))
+		return -EINVAL;
+
+	core_cfg = &mips_cps_core_bootcfg[current_cpu_data.core];
+	atomic_sub(1 << cpu_vpe_id(&current_cpu_data), &core_cfg->vpe_mask);
+	smp_mb__after_atomic_dec();
+	set_cpu_online(cpu, false);
+	cpu_clear(cpu, cpu_callin_map);
+
+	return 0;
 }
 
+static DECLARE_COMPLETION(cpu_death_chosen);
+static unsigned cpu_death_sibling;
+static enum {
+	CPU_DEATH_HALT,
+	CPU_DEATH_POWER,
+} cpu_death;
+
+void play_dead(void)
+{
+	unsigned cpu, core;
+
+	local_irq_disable();
+	idle_task_exit();
+	cpu = smp_processor_id();
+	cpu_death = CPU_DEATH_POWER;
+
+	if (cpu_has_mipsmt) {
+		core = cpu_data[cpu].core;
+
+		/* Look for another online VPE within the core */
+		for_each_online_cpu(cpu_death_sibling) {
+			if (cpu_data[cpu_death_sibling].core != core)
+				continue;
+
+			/*
+			 * There is an online VPE within the core. Just halt
+			 * this TC and leave the core alone.
+			 */
+			cpu_death = CPU_DEATH_HALT;
+			break;
+		}
+	}
+
+	/* This CPU has chosen its way out */
+	complete(&cpu_death_chosen);
+
+	if (cpu_death == CPU_DEATH_HALT) {
+		/* Halt this TC */
+		write_c0_tchalt(TCHALT_H);
+		instruction_hazard();
+	} else {
+		/* Power down the core */
+		cps_pm_enter_state(CPS_PM_POWER_GATED);
+	}
+
+	/* This should never be reached */
+	panic("Failed to offline CPU %u", cpu);
+}
+
+static void wait_for_sibling_halt(void *ptr_cpu)
+{
+	unsigned cpu = (unsigned)ptr_cpu;
+	unsigned vpe_id = cpu_data[cpu].vpe_id;
+	unsigned halted;
+	unsigned long flags;
+
+	do {
+		local_irq_save(flags);
+		settc(vpe_id);
+		halted = read_tc_c0_tchalt();
+		local_irq_restore(flags);
+	} while (!(halted & TCHALT_H));
+}
+
+static void cps_cpu_die(unsigned int cpu)
+{
+	unsigned core = cpu_data[cpu].core;
+	unsigned stat;
+	int err;
+
+	/* Wait for the cpu to choose its way out */
+	if (!wait_for_completion_timeout(&cpu_death_chosen,
+					 msecs_to_jiffies(5000))) {
+		pr_err("CPU%u: didn't offline\n", cpu);
+		return;
+	}
+
+	/*
+	 * Now wait for the CPU to actually offline. Without doing this that
+	 * offlining may race with one or more of:
+	 *
+	 *   - Onlining the CPU again.
+	 *   - Powering down the core if another VPE within it is offlined.
+	 *   - A sibling VPE entering a non-coherent state.
+	 *
+	 * In the non-MT halt case (ie. infinite loop) the CPU is doing nothing
+	 * with which we could race, so do nothing.
+	 */
+	if (cpu_death == CPU_DEATH_POWER) {
+		/*
+		 * Wait for the core to enter a powered down or clock gated
+		 * state, the latter happening when a JTAG probe is connected
+		 * in which case the CPC will refuse to power down the core.
+		 */
+		do {
+			mips_cpc_lock_other(core);
+			stat = read_cpc_co_stat_conf();
+			stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
+			mips_cpc_unlock_other();
+		} while (stat != CPC_Cx_STAT_CONF_SEQSTATE_D0 &&
+			 stat != CPC_Cx_STAT_CONF_SEQSTATE_D2 &&
+			 stat != CPC_Cx_STAT_CONF_SEQSTATE_U2);
+
+		/* Indicate the core is powered off */
+		bitmap_clear(core_power, core, 1);
+	} else if (cpu_has_mipsmt) {
+		/*
+		 * Have a CPU with access to the offlined CPUs registers wait
+		 * for its TC to halt.
+		 */
+		err = smp_call_function_single(cpu_death_sibling,
+					       wait_for_sibling_halt,
+					       (void *)cpu, 1);
+		if (err)
+			panic("Failed to call remote sibling CPU\n");
+	}
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
 static struct plat_smp_ops cps_smp_ops = {
 	.smp_setup		= cps_smp_setup,
 	.prepare_cpus		= cps_prepare_cpus,
@@ -314,9 +436,18 @@
 	.smp_finish		= cps_smp_finish,
 	.send_ipi_single	= gic_send_ipi_single,
 	.send_ipi_mask		= gic_send_ipi_mask,
-	.cpus_done		= cps_cpus_done,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= cps_cpu_disable,
+	.cpu_die		= cps_cpu_die,
+#endif
 };
 
+bool mips_cps_smp_in_use(void)
+{
+	extern struct plat_smp_ops *mp_ops;
+	return mp_ops == &cps_smp_ops;
+}
+
 int register_cps_smp_ops(void)
 {
 	if (!mips_cm_present()) {

diff --git a/arch/mips/kernel/smp-gic.c b/arch/mips/kernel/smp-gic.c
index 3bb1f92..3b21a96 100644
--- a/arch/mips/kernel/smp-gic.c
+++ b/arch/mips/kernel/smp-gic.c

@@ -15,12 +15,14 @@
 #include <linux/printk.h>
 
 #include <asm/gic.h>
+#include <asm/mips-cpc.h>
 #include <asm/smp-ops.h>
 
 void gic_send_ipi_single(int cpu, unsigned int action)
 {
 	unsigned long flags;
 	unsigned int intr;
+	unsigned int core = cpu_data[cpu].core;
 
 	pr_debug("CPU%d: %s cpu %d action %u status %08x\n",
 		 smp_processor_id(), __func__, cpu, action, read_c0_status());
@@ -41,6 +43,15 @@
 	}
 
 	gic_send_ipi(intr);
+
+	if (mips_cpc_present() && (core != current_cpu_data.core)) {
+		while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+			mips_cpc_lock_other(core);
+			write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
+			mips_cpc_unlock_other();
+		}
+	}
+
 	local_irq_restore(flags);
 }
 

diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index f8e1314..3babf6e 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c

@@ -183,10 +183,6 @@
 	local_irq_enable();
 }
 
-static void vsmp_cpus_done(void)
-{
-}
-
 /*
  * Setup the PC, SP, and GP of a secondary processor and start it
  * running!
@@ -287,7 +283,6 @@
 	.send_ipi_mask		= vsmp_send_ipi_mask,
 	.init_secondary		= vsmp_init_secondary,
 	.smp_finish		= vsmp_smp_finish,
-	.cpus_done		= vsmp_cpus_done,
 	.boot_secondary		= vsmp_boot_secondary,
 	.smp_setup		= vsmp_smp_setup,
 	.prepare_cpus		= vsmp_prepare_cpus,

diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c
index 7fde3e4..17878d7 100644
--- a/arch/mips/kernel/smp-up.c
+++ b/arch/mips/kernel/smp-up.c

@@ -36,11 +36,6 @@
 {
 }
 
-/* Hook for after all CPUs are online */
-static void up_cpus_done(void)
-{
-}
-
 /*
  * Firmware CPU startup hook
  */
@@ -73,7 +68,6 @@
 	.send_ipi_mask		= up_send_ipi_mask,
 	.init_secondary		= up_init_secondary,
 	.smp_finish		= up_smp_finish,
-	.cpus_done		= up_cpus_done,
 	.boot_secondary		= up_boot_secondary,
 	.smp_setup		= up_smp_setup,
 	.prepare_cpus		= up_prepare_cpus,

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 0a022ee..9bad52e 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c

@@ -43,10 +43,6 @@
 #include <asm/time.h>
 #include <asm/setup.h>
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#include <asm/mipsmtregs.h>
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
 
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
@@ -66,6 +62,8 @@
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
+cpumask_t cpu_coherent_mask;
+
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
@@ -102,12 +100,6 @@
 {
 	unsigned int cpu;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/* Only do cpu_probe for first TC of CPU */
-	if ((read_c0_tcbind() & TCBIND_CURTC) != 0)
-		__cpu_name[smp_processor_id()] = __cpu_name[0];
-	else
-#endif /* CONFIG_MIPS_MT_SMTC */
 	cpu_probe();
 	cpu_report();
 	per_cpu_trap_init(false);
@@ -124,6 +116,7 @@
 	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
+	cpu_set(cpu, cpu_coherent_mask);
 	notify_cpu_starting(cpu);
 
 	set_cpu_online(cpu, true);
@@ -173,7 +166,6 @@
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-	mp_ops->cpus_done();
 }
 
 /* called from main before smp_init() */
@@ -186,6 +178,7 @@
 #ifndef CONFIG_HOTPLUG_CPU
 	init_cpu_present(cpu_possible_mask);
 #endif
+	cpumask_copy(&cpu_coherent_mask, cpu_possible_mask);
 }
 
 /* preload SMP state for boot cpu */
@@ -238,13 +231,10 @@
  *  o collapses to normal function call on UP kernels
  *  o collapses to normal function call on systems with a single shared
  *    primary cache.
- *  o CONFIG_MIPS_MT_SMTC currently implies there is only one physical core.
  */
 static inline void smp_on_other_tlbs(void (*func) (void *info), void *info)
 {
-#ifndef CONFIG_MIPS_MT_SMTC
 	smp_call_function(func, info, 1);
-#endif
 }
 
 static inline void smp_on_each_tlb(void (*func) (void *info), void *info)
@@ -404,3 +394,46 @@
 }
 EXPORT_SYMBOL(dump_send_ipi);
 #endif
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+
+static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
+static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
+
+void tick_broadcast(const struct cpumask *mask)
+{
+	atomic_t *count;
+	struct call_single_data *csd;
+	int cpu;
+
+	for_each_cpu(cpu, mask) {
+		count = &per_cpu(tick_broadcast_count, cpu);
+		csd = &per_cpu(tick_broadcast_csd, cpu);
+
+		if (atomic_inc_return(count) == 1)
+			smp_call_function_single_async(cpu, csd);
+	}
+}
+
+static void tick_broadcast_callee(void *info)
+{
+	int cpu = smp_processor_id();
+	tick_receive_broadcast();
+	atomic_set(&per_cpu(tick_broadcast_count, cpu), 0);
+}
+
+static int __init tick_broadcast_init(void)
+{
+	struct call_single_data *csd;
+	int cpu;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		csd = &per_cpu(tick_broadcast_csd, cpu);
+		csd->func = tick_broadcast_callee;
+	}
+
+	return 0;
+}
+early_initcall(tick_broadcast_init);
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */

diff --git a/arch/mips/kernel/smtc-asm.S b/arch/mips/kernel/smtc-asm.S
deleted file mode 100644
index 2866863..0000000
--- a/arch/mips/kernel/smtc-asm.S
+++ /dev/null

@@ -1,133 +0,0 @@
-/*
- * Assembly Language Functions for MIPS MT SMTC support
- */
-
-/*
- * This file should be built into the kernel only if CONFIG_MIPS_MT_SMTC is set. */
-
-#include <asm/regdef.h>
-#include <asm/asmmacro.h>
-#include <asm/stackframe.h>
-#include <asm/irqflags.h>
-
-/*
- * "Software Interrupt" linkage.
- *
- * This is invoked when an "Interrupt" is sent from one TC to another,
- * where the TC to be interrupted is halted, has it's Restart address
- * and Status values saved by the "remote control" thread, then modified
- * to cause execution to begin here, in kenel mode. This code then
- * disguises the TC state as that of an exception and transfers
- * control to the general exception or vectored interrupt handler.
- */
-	.set noreorder
-
-/*
-The __smtc_ipi_vector would use k0 and k1 as temporaries and
-1) Set EXL (this is per-VPE, so this can't be done by proxy!)
-2) Restore the K/CU and IXMT bits to the pre "exception" state
-   (EXL means no interrupts and access to the kernel map).
-3) Set EPC to be the saved value of TCRestart.
-4) Jump to the exception handler entry point passed by the sender.
-
-CAN WE PROVE THAT WE WON'T DO THIS IF INTS DISABLED??
-*/
-
-/*
- * Reviled and slandered vision: Set EXL and restore K/CU/IXMT
- * state of pre-halt thread, then save everything and call
- * thought some function pointer to imaginary_exception, which
- * will parse a register value or memory message queue to
- * deliver things like interprocessor interrupts. On return
- * from that function, jump to the global ret_from_irq code
- * to invoke the scheduler and return as appropriate.
- */
-
-#define PT_PADSLOT4 (PT_R0-8)
-#define PT_PADSLOT5 (PT_R0-4)
-
-	.text
-	.align 5
-FEXPORT(__smtc_ipi_vector)
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-#endif
-	.set	noat
-	/* Disable thread scheduling to make Status update atomic */
-	DMT	27					# dmt	k1
-	_ehb
-	/* Set EXL */
-	mfc0	k0,CP0_STATUS
-	ori	k0,k0,ST0_EXL
-	mtc0	k0,CP0_STATUS
-	_ehb
-	/* Thread scheduling now inhibited by EXL. Restore TE state. */
-	andi	k1,k1,VPECONTROL_TE
-	beqz	k1,1f
-	emt
-1:
-	/*
-	 * The IPI sender has put some information on the anticipated
-	 * kernel stack frame.	If we were in user mode, this will be
-	 * built above the saved kernel SP.  If we were already in the
-	 * kernel, it will be built above the current CPU SP.
-	 *
-	 * Were we in kernel mode, as indicated by CU0?
-	 */
-	sll	k1,k0,3
-	.set noreorder
-	bltz	k1,2f
-	move	k1,sp
-	.set reorder
-	/*
-	 * If previously in user mode, set CU0 and use kernel stack.
-	 */
-	li	k1,ST0_CU0
-	or	k1,k1,k0
-	mtc0	k1,CP0_STATUS
-	_ehb
-	get_saved_sp
-	/* Interrupting TC will have pre-set values in slots in the new frame */
-2:	subu	k1,k1,PT_SIZE
-	/* Load TCStatus Value */
-	lw	k0,PT_TCSTATUS(k1)
-	/* Write it to TCStatus to restore CU/KSU/IXMT state */
-	mtc0	k0,$2,1
-	_ehb
-	lw	k0,PT_EPC(k1)
-	mtc0	k0,CP0_EPC
-	/* Save all will redundantly recompute the SP, but use it for now */
-	SAVE_ALL
-	CLI
-	TRACE_IRQS_OFF
-	/* Function to be invoked passed stack pad slot 5 */
-	lw	t0,PT_PADSLOT5(sp)
-	/* Argument from sender passed in stack pad slot 4 */
-	lw	a0,PT_PADSLOT4(sp)
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	PTR_LA	ra, ret_from_irq
-	jr	t0
-
-/*
- * Called from idle loop to provoke processing of queued IPIs
- * First IPI message in queue passed as argument.
- */
-
-LEAF(self_ipi)
-	/* Before anything else, block interrupts */
-	mfc0	t0,CP0_TCSTATUS
-	ori	t1,t0,TCSTATUS_IXMT
-	mtc0	t1,CP0_TCSTATUS
-	_ehb
-	/* We know we're in kernel mode, so prepare stack frame */
-	subu	t1,sp,PT_SIZE
-	sw	ra,PT_EPC(t1)
-	sw	a0,PT_PADSLOT4(t1)
-	la	t2,ipi_decode
-	sw	t2,PT_PADSLOT5(t1)
-	/* Save pre-disable value of TCStatus */
-	sw	t0,PT_TCSTATUS(t1)
-	j	__smtc_ipi_vector
-	nop
-END(self_ipi)

diff --git a/arch/mips/kernel/smtc-proc.c b/arch/mips/kernel/smtc-proc.c
deleted file mode 100644
index 38635a9..0000000
--- a/arch/mips/kernel/smtc-proc.c
+++ /dev/null

@@ -1,102 +0,0 @@
-/*
- * /proc hooks for SMTC kernel
- * Copyright (C) 2005 Mips Technologies, Inc
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-
-#include <asm/cpu.h>
-#include <asm/processor.h>
-#include <linux/atomic.h>
-#include <asm/hardirq.h>
-#include <asm/mmu_context.h>
-#include <asm/mipsregs.h>
-#include <asm/cacheflush.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-
-#include <asm/smtc_proc.h>
-
-/*
- * /proc diagnostic and statistics hooks
- */
-
-/*
- * Statistics gathered
- */
-unsigned long selfipis[NR_CPUS];
-
-struct smtc_cpu_proc smtc_cpu_stats[NR_CPUS];
-
-atomic_t smtc_fpu_recoveries;
-
-static int smtc_proc_show(struct seq_file *m, void *v)
-{
-	int i;
-	extern unsigned long ebase;
-
-	seq_printf(m, "SMTC Status Word: 0x%08x\n", smtc_status);
-	seq_printf(m, "Config7: 0x%08x\n", read_c0_config7());
-	seq_printf(m, "EBASE: 0x%08lx\n", ebase);
-	seq_printf(m, "Counter Interrupts taken per CPU (TC)\n");
-	for (i=0; i < NR_CPUS; i++)
-		seq_printf(m, "%d: %ld\n", i, smtc_cpu_stats[i].timerints);
-	seq_printf(m, "Self-IPIs by CPU:\n");
-	for(i = 0; i < NR_CPUS; i++)
-		seq_printf(m, "%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
-	seq_printf(m, "%d Recoveries of \"stolen\" FPU\n",
-		   atomic_read(&smtc_fpu_recoveries));
-	return 0;
-}
-
-static int smtc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smtc_proc_show, NULL);
-}
-
-static const struct file_operations smtc_proc_fops = {
-	.open		= smtc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void init_smtc_stats(void)
-{
-	int i;
-
-	for (i=0; i<NR_CPUS; i++) {
-		smtc_cpu_stats[i].timerints = 0;
-		smtc_cpu_stats[i].selfipis = 0;
-	}
-
-	atomic_set(&smtc_fpu_recoveries, 0);
-
-	proc_create("smtc", 0444, NULL, &smtc_proc_fops);
-}
-
-static int proc_cpuinfo_chain_call(struct notifier_block *nfb,
-	unsigned long action_unused, void *data)
-{
-	struct proc_cpuinfo_notifier_args *pcn = data;
-	struct seq_file *m = pcn->m;
-	unsigned long n = pcn->n;
-
-	if (!cpu_has_mipsmt)
-		return NOTIFY_OK;
-
-	seq_printf(m, "VPE\t\t\t: %d\n", cpu_data[n].vpe_id);
-	seq_printf(m, "TC\t\t\t: %d\n", cpu_data[n].tc_id);
-
-	return NOTIFY_OK;
-}
-
-static int __init proc_cpuinfo_notifier_init(void)
-{
-	return proc_cpuinfo_notifier(proc_cpuinfo_chain_call, 0);
-}
-
-subsys_initcall(proc_cpuinfo_notifier_init);

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
deleted file mode 100644
index c1681d6..0000000
--- a/arch/mips/kernel/smtc.c
+++ /dev/null

@@ -1,1528 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * Copyright (C) 2004 Mips Technologies, Inc
- * Copyright (C) 2008 Kevin D. Kissell
- */
-
-#include <linux/clockchips.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/ftrace.h>
-#include <linux/slab.h>
-
-#include <asm/cpu.h>
-#include <asm/processor.h>
-#include <linux/atomic.h>
-#include <asm/hardirq.h>
-#include <asm/hazards.h>
-#include <asm/irq.h>
-#include <asm/idle.h>
-#include <asm/mmu_context.h>
-#include <asm/mipsregs.h>
-#include <asm/cacheflush.h>
-#include <asm/time.h>
-#include <asm/addrspace.h>
-#include <asm/smtc.h>
-#include <asm/smtc_proc.h>
-#include <asm/setup.h>
-
-/*
- * SMTC Kernel needs to manipulate low-level CPU interrupt mask
- * in do_IRQ. These are passed in setup_irq_smtc() and stored
- * in this table.
- */
-unsigned long irq_hwmask[NR_IRQS];
-
-#define LOCK_MT_PRA() \
-	local_irq_save(flags); \
-	mtflags = dmt()
-
-#define UNLOCK_MT_PRA() \
-	emt(mtflags); \
-	local_irq_restore(flags)
-
-#define LOCK_CORE_PRA() \
-	local_irq_save(flags); \
-	mtflags = dvpe()
-
-#define UNLOCK_CORE_PRA() \
-	evpe(mtflags); \
-	local_irq_restore(flags)
-
-/*
- * Data structures purely associated with SMTC parallelism
- */
-
-
-/*
- * Table for tracking ASIDs whose lifetime is prolonged.
- */
-
-asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
-
-/*
- * Number of InterProcessor Interrupt (IPI) message buffers to allocate
- */
-
-#define IPIBUF_PER_CPU 4
-
-struct smtc_ipi_q IPIQ[NR_CPUS];
-static struct smtc_ipi_q freeIPIq;
-
-
-/*
- * Number of FPU contexts for each VPE
- */
-
-static int smtc_nconf1[MAX_SMTC_VPES];
-
-
-/* Forward declarations */
-
-void ipi_decode(struct smtc_ipi *);
-static void post_direct_ipi(int cpu, struct smtc_ipi *pipi);
-static void setup_cross_vpe_interrupts(unsigned int nvpe);
-void init_smtc_stats(void);
-
-/* Global SMTC Status */
-
-unsigned int smtc_status;
-
-/* Boot command line configuration overrides */
-
-static int vpe0limit;
-static int ipibuffers;
-static int nostlb;
-static int asidmask;
-unsigned long smtc_asid_mask = 0xff;
-
-static int __init vpe0tcs(char *str)
-{
-	get_option(&str, &vpe0limit);
-
-	return 1;
-}
-
-static int __init ipibufs(char *str)
-{
-	get_option(&str, &ipibuffers);
-	return 1;
-}
-
-static int __init stlb_disable(char *s)
-{
-	nostlb = 1;
-	return 1;
-}
-
-static int __init asidmask_set(char *str)
-{
-	get_option(&str, &asidmask);
-	switch (asidmask) {
-	case 0x1:
-	case 0x3:
-	case 0x7:
-	case 0xf:
-	case 0x1f:
-	case 0x3f:
-	case 0x7f:
-	case 0xff:
-		smtc_asid_mask = (unsigned long)asidmask;
-		break;
-	default:
-		printk("ILLEGAL ASID mask 0x%x from command line\n", asidmask);
-	}
-	return 1;
-}
-
-__setup("vpe0tcs=", vpe0tcs);
-__setup("ipibufs=", ipibufs);
-__setup("nostlb", stlb_disable);
-__setup("asidmask=", asidmask_set);
-
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
-
-static int hang_trig;
-
-static int __init hangtrig_enable(char *s)
-{
-	hang_trig = 1;
-	return 1;
-}
-
-
-__setup("hangtrig", hangtrig_enable);
-
-#define DEFAULT_BLOCKED_IPI_LIMIT 32
-
-static int timerq_limit = DEFAULT_BLOCKED_IPI_LIMIT;
-
-static int __init tintq(char *str)
-{
-	get_option(&str, &timerq_limit);
-	return 1;
-}
-
-__setup("tintq=", tintq);
-
-static int imstuckcount[MAX_SMTC_VPES][8];
-/* vpemask represents IM/IE bits of per-VPE Status registers, low-to-high */
-static int vpemask[MAX_SMTC_VPES][8] = {
-	{0, 0, 1, 0, 0, 0, 0, 1},
-	{0, 0, 0, 0, 0, 0, 0, 1}
-};
-int tcnoprog[NR_CPUS];
-static atomic_t idle_hook_initialized = ATOMIC_INIT(0);
-static int clock_hang_reported[NR_CPUS];
-
-#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
-
-/*
- * Configure shared TLB - VPC configuration bit must be set by caller
- */
-
-static void smtc_configure_tlb(void)
-{
-	int i, tlbsiz, vpes;
-	unsigned long mvpconf0;
-	unsigned long config1val;
-
-	/* Set up ASID preservation table */
-	for (vpes=0; vpes<MAX_SMTC_TLBS; vpes++) {
-	    for(i = 0; i < MAX_SMTC_ASIDS; i++) {
-		smtc_live_asid[vpes][i] = 0;
-	    }
-	}
-	mvpconf0 = read_c0_mvpconf0();
-
-	if ((vpes = ((mvpconf0 & MVPCONF0_PVPE)
-			>> MVPCONF0_PVPE_SHIFT) + 1) > 1) {
-	    /* If we have multiple VPEs, try to share the TLB */
-	    if ((mvpconf0 & MVPCONF0_TLBS) && !nostlb) {
-		/*
-		 * If TLB sizing is programmable, shared TLB
-		 * size is the total available complement.
-		 * Otherwise, we have to take the sum of all
-		 * static VPE TLB entries.
-		 */
-		if ((tlbsiz = ((mvpconf0 & MVPCONF0_PTLBE)
-				>> MVPCONF0_PTLBE_SHIFT)) == 0) {
-		    /*
-		     * If there's more than one VPE, there had better
-		     * be more than one TC, because we need one to bind
-		     * to each VPE in turn to be able to read
-		     * its configuration state!
-		     */
-		    settc(1);
-		    /* Stop the TC from doing anything foolish */
-		    write_tc_c0_tchalt(TCHALT_H);
-		    mips_ihb();
-		    /* No need to un-Halt - that happens later anyway */
-		    for (i=0; i < vpes; i++) {
-			write_tc_c0_tcbind(i);
-			/*
-			 * To be 100% sure we're really getting the right
-			 * information, we exit the configuration state
-			 * and do an IHB after each rebinding.
-			 */
-			write_c0_mvpcontrol(
-				read_c0_mvpcontrol() & ~ MVPCONTROL_VPC );
-			mips_ihb();
-			/*
-			 * Only count if the MMU Type indicated is TLB
-			 */
-			if (((read_vpe_c0_config() & MIPS_CONF_MT) >> 7) == 1) {
-				config1val = read_vpe_c0_config1();
-				tlbsiz += ((config1val >> 25) & 0x3f) + 1;
-			}
-
-			/* Put core back in configuration state */
-			write_c0_mvpcontrol(
-				read_c0_mvpcontrol() | MVPCONTROL_VPC );
-			mips_ihb();
-		    }
-		}
-		write_c0_mvpcontrol(read_c0_mvpcontrol() | MVPCONTROL_STLB);
-		ehb();
-
-		/*
-		 * Setup kernel data structures to use software total,
-		 * rather than read the per-VPE Config1 value. The values
-		 * for "CPU 0" gets copied to all the other CPUs as part
-		 * of their initialization in smtc_cpu_setup().
-		 */
-
-		/* MIPS32 limits TLB indices to 64 */
-		if (tlbsiz > 64)
-			tlbsiz = 64;
-		cpu_data[0].tlbsize = current_cpu_data.tlbsize = tlbsiz;
-		smtc_status |= SMTC_TLB_SHARED;
-		local_flush_tlb_all();
-
-		printk("TLB of %d entry pairs shared by %d VPEs\n",
-			tlbsiz, vpes);
-	    } else {
-		printk("WARNING: TLB Not Sharable on SMTC Boot!\n");
-	    }
-	}
-}
-
-
-/*
- * Incrementally build the CPU map out of constituent MIPS MT cores,
- * using the specified available VPEs and TCs.	Plaform code needs
- * to ensure that each MIPS MT core invokes this routine on reset,
- * one at a time(!).
- *
- * This version of the build_cpu_map and prepare_cpus routines assumes
- * that *all* TCs of a MIPS MT core will be used for Linux, and that
- * they will be spread across *all* available VPEs (to minimise the
- * loss of efficiency due to exception service serialization).
- * An improved version would pick up configuration information and
- * possibly leave some TCs/VPEs as "slave" processors.
- *
- * Use c0_MVPConf0 to find out how many TCs are available, setting up
- * cpu_possible_mask and the logical/physical mappings.
- */
-
-int __init smtc_build_cpu_map(int start_cpu_slot)
-{
-	int i, ntcs;
-
-	/*
-	 * The CPU map isn't actually used for anything at this point,
-	 * so it's not clear what else we should do apart from set
-	 * everything up so that "logical" = "physical".
-	 */
-	ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
-	for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
-		set_cpu_possible(i, true);
-		__cpu_number_map[i] = i;
-		__cpu_logical_map[i] = i;
-	}
-#ifdef CONFIG_MIPS_MT_FPAFF
-	/* Initialize map of CPUs with FPUs */
-	cpus_clear(mt_fpu_cpumask);
-#endif
-
-	/* One of those TC's is the one booting, and not a secondary... */
-	printk("%i available secondary CPU TC(s)\n", i - 1);
-
-	return i;
-}
-
-/*
- * Common setup before any secondaries are started
- * Make sure all CPUs are in a sensible state before we boot any of the
- * secondaries.
- *
- * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly
- * as possible across the available VPEs.
- */
-
-static void smtc_tc_setup(int vpe, int tc, int cpu)
-{
-	static int cp1contexts[MAX_SMTC_VPES];
-
-	/*
-	 * Make a local copy of the available FPU contexts in order
-	 * to keep track of TCs that can have one.
-	 */
-	if (tc == 1)
-	{
-		/*
-		 * FIXME: Multi-core SMTC hasn't been tested and the
-		 *	  maximum number of VPEs may change.
-		 */
-		cp1contexts[0] = smtc_nconf1[0] - 1;
-		cp1contexts[1] = smtc_nconf1[1];
-	}
-
-	settc(tc);
-	write_tc_c0_tchalt(TCHALT_H);
-	mips_ihb();
-	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
-			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
-			| TCSTATUS_A);
-	/*
-	 * TCContext gets an offset from the base of the IPIQ array
-	 * to be used in low-level code to detect the presence of
-	 * an active IPI queue.
-	 */
-	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
-
-	/* Bind TC to VPE. */
-	write_tc_c0_tcbind(vpe);
-
-	/* In general, all TCs should have the same cpu_data indications. */
-	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
-
-	/* Check to see if there is a FPU context available for this TC. */
-	if (!cp1contexts[vpe])
-		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
-	else
-		cp1contexts[vpe]--;
-
-	/* Store the TC and VPE into the cpu_data structure. */
-	cpu_data[cpu].vpe_id = vpe;
-	cpu_data[cpu].tc_id = tc;
-
-	/* FIXME: Multi-core SMTC hasn't been tested, but be prepared. */
-	cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
-}
-
-/*
- * Tweak to get Count registers synced as closely as possible. The
- * value seems good for 34K-class cores.
- */
-
-#define CP0_SKEW 8
-
-void smtc_prepare_cpus(int cpus)
-{
-	int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
-	unsigned long flags;
-	unsigned long val;
-	int nipi;
-	struct smtc_ipi *pipi;
-
-	/* disable interrupts so we can disable MT */
-	local_irq_save(flags);
-	/* disable MT so we can configure */
-	dvpe();
-	dmt();
-
-	spin_lock_init(&freeIPIq.lock);
-
-	/*
-	 * We probably don't have as many VPEs as we do SMP "CPUs",
-	 * but it's possible - and in any case we'll never use more!
-	 */
-	for (i=0; i<NR_CPUS; i++) {
-		IPIQ[i].head = IPIQ[i].tail = NULL;
-		spin_lock_init(&IPIQ[i].lock);
-		IPIQ[i].depth = 0;
-		IPIQ[i].resched_flag = 0; /* No reschedules queued initially */
-	}
-
-	/* cpu_data index starts at zero */
-	cpu = 0;
-	cpu_data[cpu].vpe_id = 0;
-	cpu_data[cpu].tc_id = 0;
-	cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
-	cpu++;
-
-	/* Report on boot-time options */
-	mips_mt_set_cpuoptions();
-	if (vpelimit > 0)
-		printk("Limit of %d VPEs set\n", vpelimit);
-	if (tclimit > 0)
-		printk("Limit of %d TCs set\n", tclimit);
-	if (nostlb) {
-		printk("Shared TLB Use Inhibited - UNSAFE for Multi-VPE Operation\n");
-	}
-	if (asidmask)
-		printk("ASID mask value override to 0x%x\n", asidmask);
-
-	/* Temporary */
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
-	if (hang_trig)
-		printk("Logic Analyser Trigger on suspected TC hang\n");
-#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
-
-	/* Put MVPE's into 'configuration state' */
-	write_c0_mvpcontrol( read_c0_mvpcontrol() | MVPCONTROL_VPC );
-
-	val = read_c0_mvpconf0();
-	nvpe = ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-	if (vpelimit > 0 && nvpe > vpelimit)
-		nvpe = vpelimit;
-	ntc = ((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
-	if (ntc > NR_CPUS)
-		ntc = NR_CPUS;
-	if (tclimit > 0 && ntc > tclimit)
-		ntc = tclimit;
-	slop = ntc % nvpe;
-	for (i = 0; i < nvpe; i++) {
-		tcpervpe[i] = ntc / nvpe;
-		if (slop) {
-			if((slop - i) > 0) tcpervpe[i]++;
-		}
-	}
-	/* Handle command line override for VPE0 */
-	if (vpe0limit > ntc) vpe0limit = ntc;
-	if (vpe0limit > 0) {
-		int slopslop;
-		if (vpe0limit < tcpervpe[0]) {
-		    /* Reducing TC count - distribute to others */
-		    slop = tcpervpe[0] - vpe0limit;
-		    slopslop = slop % (nvpe - 1);
-		    tcpervpe[0] = vpe0limit;
-		    for (i = 1; i < nvpe; i++) {
-			tcpervpe[i] += slop / (nvpe - 1);
-			if(slopslop && ((slopslop - (i - 1) > 0)))
-				tcpervpe[i]++;
-		    }
-		} else if (vpe0limit > tcpervpe[0]) {
-		    /* Increasing TC count - steal from others */
-		    slop = vpe0limit - tcpervpe[0];
-		    slopslop = slop % (nvpe - 1);
-		    tcpervpe[0] = vpe0limit;
-		    for (i = 1; i < nvpe; i++) {
-			tcpervpe[i] -= slop / (nvpe - 1);
-			if(slopslop && ((slopslop - (i - 1) > 0)))
-				tcpervpe[i]--;
-		    }
-		}
-	}
-
-	/* Set up shared TLB */
-	smtc_configure_tlb();
-
-	for (tc = 0, vpe = 0 ; (vpe < nvpe) && (tc < ntc) ; vpe++) {
-		/* Get number of CP1 contexts for each VPE. */
-		if (tc == 0)
-		{
-			/*
-			 * Do not call settc() for TC0 or the FPU context
-			 * value will be incorrect. Besides, we know that
-			 * we are TC0 anyway.
-			 */
-			smtc_nconf1[0] = ((read_vpe_c0_vpeconf1() &
-				VPECONF1_NCP1) >> VPECONF1_NCP1_SHIFT);
-			if (nvpe == 2)
-			{
-				settc(1);
-				smtc_nconf1[1] = ((read_vpe_c0_vpeconf1() &
-					VPECONF1_NCP1) >> VPECONF1_NCP1_SHIFT);
-				settc(0);
-			}
-		}
-		if (tcpervpe[vpe] == 0)
-			continue;
-		if (vpe != 0)
-			printk(", ");
-		printk("VPE %d: TC", vpe);
-		for (i = 0; i < tcpervpe[vpe]; i++) {
-			/*
-			 * TC 0 is bound to VPE 0 at reset,
-			 * and is presumably executing this
-			 * code.  Leave it alone!
-			 */
-			if (tc != 0) {
-				smtc_tc_setup(vpe, tc, cpu);
-				if (vpe != 0) {
-					/*
-					 * Set MVP bit (possibly again).  Do it
-					 * here to catch CPUs that have no TCs
-					 * bound to the VPE at reset.  In that
-					 * case, a TC must be bound to the VPE
-					 * before we can set VPEControl[MVP]
-					 */
-					write_vpe_c0_vpeconf0(
-						read_vpe_c0_vpeconf0() |
-						VPECONF0_MVP);
-				}
-				cpu++;
-			}
-			printk(" %d", tc);
-			tc++;
-		}
-		if (vpe != 0) {
-			/*
-			 * Allow this VPE to control others.
-			 */
-			write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() |
-					      VPECONF0_MVP);
-
-			/*
-			 * Clear any stale software interrupts from VPE's Cause
-			 */
-			write_vpe_c0_cause(0);
-
-			/*
-			 * Clear ERL/EXL of VPEs other than 0
-			 * and set restricted interrupt enable/mask.
-			 */
-			write_vpe_c0_status((read_vpe_c0_status()
-				& ~(ST0_BEV | ST0_ERL | ST0_EXL | ST0_IM))
-				| (STATUSF_IP0 | STATUSF_IP1 | STATUSF_IP7
-				| ST0_IE));
-			/*
-			 * set config to be the same as vpe0,
-			 *  particularly kseg0 coherency alg
-			 */
-			write_vpe_c0_config(read_c0_config());
-			/* Clear any pending timer interrupt */
-			write_vpe_c0_compare(0);
-			/* Propagate Config7 */
-			write_vpe_c0_config7(read_c0_config7());
-			write_vpe_c0_count(read_c0_count() + CP0_SKEW);
-			ehb();
-		}
-		/* enable multi-threading within VPE */
-		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
-		/* enable the VPE */
-		write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
-	}
-
-	/*
-	 * Pull any physically present but unused TCs out of circulation.
-	 */
-	while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
-		set_cpu_possible(tc, false);
-		set_cpu_present(tc, false);
-		tc++;
-	}
-
-	/* release config state */
-	write_c0_mvpcontrol( read_c0_mvpcontrol() & ~ MVPCONTROL_VPC );
-
-	printk("\n");
-
-	/* Set up coprocessor affinity CPU mask(s) */
-
-#ifdef CONFIG_MIPS_MT_FPAFF
-	for (tc = 0; tc < ntc; tc++) {
-		if (cpu_data[tc].options & MIPS_CPU_FPU)
-			cpu_set(tc, mt_fpu_cpumask);
-	}
-#endif
-
-	/* set up ipi interrupts... */
-
-	/* If we have multiple VPEs running, set up the cross-VPE interrupt */
-
-	setup_cross_vpe_interrupts(nvpe);
-
-	/* Set up queue of free IPI "messages". */
-	nipi = NR_CPUS * IPIBUF_PER_CPU;
-	if (ipibuffers > 0)
-		nipi = ipibuffers;
-
-	pipi = kmalloc(nipi *sizeof(struct smtc_ipi), GFP_KERNEL);
-	if (pipi == NULL)
-		panic("kmalloc of IPI message buffers failed");
-	else
-		printk("IPI buffer pool of %d buffers\n", nipi);
-	for (i = 0; i < nipi; i++) {
-		smtc_ipi_nq(&freeIPIq, pipi);
-		pipi++;
-	}
-
-	/* Arm multithreading and enable other VPEs - but all TCs are Halted */
-	emt(EMT_ENABLE);
-	evpe(EVPE_ENABLE);
-	local_irq_restore(flags);
-	/* Initialize SMTC /proc statistics/diagnostics */
-	init_smtc_stats();
-}
-
-
-/*
- * Setup the PC, SP, and GP of a secondary processor and start it
- * running!
- * smp_bootstrap is the place to resume from
- * __KSTK_TOS(idle) is apparently the stack pointer
- * (unsigned long)idle->thread_info the gp
- *
- */
-void smtc_boot_secondary(int cpu, struct task_struct *idle)
-{
-	extern u32 kernelsp[NR_CPUS];
-	unsigned long flags;
-	int mtflags;
-
-	LOCK_MT_PRA();
-	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		dvpe();
-	}
-	settc(cpu_data[cpu].tc_id);
-
-	/* pc */
-	write_tc_c0_tcrestart((unsigned long)&smp_bootstrap);
-
-	/* stack pointer */
-	kernelsp[cpu] = __KSTK_TOS(idle);
-	write_tc_gpr_sp(__KSTK_TOS(idle));
-
-	/* global pointer */
-	write_tc_gpr_gp((unsigned long)task_thread_info(idle));
-
-	smtc_status |= SMTC_MTC_ACTIVE;
-	write_tc_c0_tchalt(0);
-	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		evpe(EVPE_ENABLE);
-	}
-	UNLOCK_MT_PRA();
-}
-
-void smtc_init_secondary(void)
-{
-}
-
-void smtc_smp_finish(void)
-{
-	int cpu = smp_processor_id();
-
-	/*
-	 * Lowest-numbered CPU per VPE starts a clock tick.
-	 * Like per_cpu_trap_init() hack, this assumes that
-	 * SMTC init code assigns TCs consdecutively and
-	 * in ascending order across available VPEs.
-	 */
-	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
-		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-
-	local_irq_enable();
-
-	printk("TC %d going on-line as CPU %d\n",
-		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
-}
-
-void smtc_cpus_done(void)
-{
-}
-
-/*
- * Support for SMTC-optimized driver IRQ registration
- */
-
-/*
- * SMTC Kernel needs to manipulate low-level CPU interrupt mask
- * in do_IRQ. These are passed in setup_irq_smtc() and stored
- * in this table.
- */
-
-int setup_irq_smtc(unsigned int irq, struct irqaction * new,
-			unsigned long hwmask)
-{
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
-	unsigned int vpe = current_cpu_data.vpe_id;
-
-	vpemask[vpe][irq - MIPS_CPU_IRQ_BASE] = 1;
-#endif
-	irq_hwmask[irq] = hwmask;
-
-	return setup_irq(irq, new);
-}
-
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-/*
- * Support for IRQ affinity to TCs
- */
-
-void smtc_set_irq_affinity(unsigned int irq, cpumask_t affinity)
-{
-	/*
-	 * If a "fast path" cache of quickly decodable affinity state
-	 * is maintained, this is where it gets done, on a call up
-	 * from the platform affinity code.
-	 */
-}
-
-void smtc_forward_irq(struct irq_data *d)
-{
-	unsigned int irq = d->irq;
-	int target;
-
-	/*
-	 * OK wise guy, now figure out how to get the IRQ
-	 * to be serviced on an authorized "CPU".
-	 *
-	 * Ideally, to handle the situation where an IRQ has multiple
-	 * eligible CPUS, we would maintain state per IRQ that would
-	 * allow a fair distribution of service requests.  Since the
-	 * expected use model is any-or-only-one, for simplicity
-	 * and efficiency, we just pick the easiest one to find.
-	 */
-
-	target = cpumask_first(d->affinity);
-
-	/*
-	 * We depend on the platform code to have correctly processed
-	 * IRQ affinity change requests to ensure that the IRQ affinity
-	 * mask has been purged of bits corresponding to nonexistent and
-	 * offline "CPUs", and to TCs bound to VPEs other than the VPE
-	 * connected to the physical interrupt input for the interrupt
-	 * in question.	 Otherwise we have a nasty problem with interrupt
-	 * mask management.  This is best handled in non-performance-critical
-	 * platform IRQ affinity setting code,	to minimize interrupt-time
-	 * checks.
-	 */
-
-	/* If no one is eligible, service locally */
-	if (target >= NR_CPUS)
-		do_IRQ_no_affinity(irq);
-	else
-		smtc_send_ipi(target, IRQ_AFFINITY_IPI, irq);
-}
-
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
-
-/*
- * IPI model for SMTC is tricky, because interrupts aren't TC-specific.
- * Within a VPE one TC can interrupt another by different approaches.
- * The easiest to get right would probably be to make all TCs except
- * the target IXMT and set a software interrupt, but an IXMT-based
- * scheme requires that a handler must run before a new IPI could
- * be sent, which would break the "broadcast" loops in MIPS MT.
- * A more gonzo approach within a VPE is to halt the TC, extract
- * its Restart, Status, and a couple of GPRs, and program the Restart
- * address to emulate an interrupt.
- *
- * Within a VPE, one can be confident that the target TC isn't in
- * a critical EXL state when halted, since the write to the Halt
- * register could not have issued on the writing thread if the
- * halting thread had EXL set. So k0 and k1 of the target TC
- * can be used by the injection code.  Across VPEs, one can't
- * be certain that the target TC isn't in a critical exception
- * state. So we try a two-step process of sending a software
- * interrupt to the target VPE, which either handles the event
- * itself (if it was the target) or injects the event within
- * the VPE.
- */
-
-static void smtc_ipi_qdump(void)
-{
-	int i;
-	struct smtc_ipi *temp;
-
-	for (i = 0; i < NR_CPUS ;i++) {
-		pr_info("IPIQ[%d]: head = 0x%x, tail = 0x%x, depth = %d\n",
-			i, (unsigned)IPIQ[i].head, (unsigned)IPIQ[i].tail,
-			IPIQ[i].depth);
-		temp = IPIQ[i].head;
-
-		while (temp != IPIQ[i].tail) {
-			pr_debug("%d %d %d: ", temp->type, temp->dest,
-			       (int)temp->arg);
-#ifdef	SMTC_IPI_DEBUG
-		    pr_debug("%u %lu\n", temp->sender, temp->stamp);
-#else
-		    pr_debug("\n");
-#endif
-		    temp = temp->flink;
-		}
-	}
-}
-
-/*
- * The standard atomic.h primitives don't quite do what we want
- * here: We need an atomic add-and-return-previous-value (which
- * could be done with atomic_add_return and a decrement) and an
- * atomic set/zero-and-return-previous-value (which can't really
- * be done with the atomic.h primitives). And since this is
- * MIPS MT, we can assume that we have LL/SC.
- */
-static inline int atomic_postincrement(atomic_t *v)
-{
-	unsigned long result;
-
-	unsigned long temp;
-
-	__asm__ __volatile__(
-	"1:	ll	%0, %2					\n"
-	"	addu	%1, %0, 1				\n"
-	"	sc	%1, %2					\n"
-	"	beqz	%1, 1b					\n"
-	__WEAK_LLSC_MB
-	: "=&r" (result), "=&r" (temp), "=m" (v->counter)
-	: "m" (v->counter)
-	: "memory");
-
-	return result;
-}
-
-void smtc_send_ipi(int cpu, int type, unsigned int action)
-{
-	int tcstatus;
-	struct smtc_ipi *pipi;
-	unsigned long flags;
-	int mtflags;
-	unsigned long tcrestart;
-	int set_resched_flag = (type == LINUX_SMP_IPI &&
-				action == SMP_RESCHEDULE_YOURSELF);
-
-	if (cpu == smp_processor_id()) {
-		printk("Cannot Send IPI to self!\n");
-		return;
-	}
-	if (set_resched_flag && IPIQ[cpu].resched_flag != 0)
-		return; /* There is a reschedule queued already */
-
-	/* Set up a descriptor, to be delivered either promptly or queued */
-	pipi = smtc_ipi_dq(&freeIPIq);
-	if (pipi == NULL) {
-		bust_spinlocks(1);
-		mips_mt_regdump(dvpe());
-		panic("IPI Msg. Buffers Depleted");
-	}
-	pipi->type = type;
-	pipi->arg = (void *)action;
-	pipi->dest = cpu;
-	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		/* If not on same VPE, enqueue and send cross-VPE interrupt */
-		IPIQ[cpu].resched_flag |= set_resched_flag;
-		smtc_ipi_nq(&IPIQ[cpu], pipi);
-		LOCK_CORE_PRA();
-		settc(cpu_data[cpu].tc_id);
-		write_vpe_c0_cause(read_vpe_c0_cause() | C_SW1);
-		UNLOCK_CORE_PRA();
-	} else {
-		/*
-		 * Not sufficient to do a LOCK_MT_PRA (dmt) here,
-		 * since ASID shootdown on the other VPE may
-		 * collide with this operation.
-		 */
-		LOCK_CORE_PRA();
-		settc(cpu_data[cpu].tc_id);
-		/* Halt the targeted TC */
-		write_tc_c0_tchalt(TCHALT_H);
-		mips_ihb();
-
-		/*
-		 * Inspect TCStatus - if IXMT is set, we have to queue
-		 * a message. Otherwise, we set up the "interrupt"
-		 * of the other TC
-		 */
-		tcstatus = read_tc_c0_tcstatus();
-
-		if ((tcstatus & TCSTATUS_IXMT) != 0) {
-			/*
-			 * If we're in the the irq-off version of the wait
-			 * loop, we need to force exit from the wait and
-			 * do a direct post of the IPI.
-			 */
-			if (cpu_wait == r4k_wait_irqoff) {
-				tcrestart = read_tc_c0_tcrestart();
-				if (address_is_in_r4k_wait_irqoff(tcrestart)) {
-					write_tc_c0_tcrestart(__pastwait);
-					tcstatus &= ~TCSTATUS_IXMT;
-					write_tc_c0_tcstatus(tcstatus);
-					goto postdirect;
-				}
-			}
-			/*
-			 * Otherwise we queue the message for the target TC
-			 * to pick up when he does a local_irq_restore()
-			 */
-			write_tc_c0_tchalt(0);
-			UNLOCK_CORE_PRA();
-			IPIQ[cpu].resched_flag |= set_resched_flag;
-			smtc_ipi_nq(&IPIQ[cpu], pipi);
-		} else {
-postdirect:
-			post_direct_ipi(cpu, pipi);
-			write_tc_c0_tchalt(0);
-			UNLOCK_CORE_PRA();
-		}
-	}
-}
-
-/*
- * Send IPI message to Halted TC, TargTC/TargVPE already having been set
- */
-static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
-{
-	struct pt_regs *kstack;
-	unsigned long tcstatus;
-	unsigned long tcrestart;
-	extern u32 kernelsp[NR_CPUS];
-	extern void __smtc_ipi_vector(void);
-//printk("%s: on %d for %d\n", __func__, smp_processor_id(), cpu);
-
-	/* Extract Status, EPC from halted TC */
-	tcstatus = read_tc_c0_tcstatus();
-	tcrestart = read_tc_c0_tcrestart();
-	/* If TCRestart indicates a WAIT instruction, advance the PC */
-	if ((tcrestart & 0x80000000)
-	    && ((*(unsigned int *)tcrestart & 0xfe00003f) == 0x42000020)) {
-		tcrestart += 4;
-	}
-	/*
-	 * Save on TC's future kernel stack
-	 *
-	 * CU bit of Status is indicator that TC was
-	 * already running on a kernel stack...
-	 */
-	if (tcstatus & ST0_CU0)	 {
-		/* Note that this "- 1" is pointer arithmetic */
-		kstack = ((struct pt_regs *)read_tc_gpr_sp()) - 1;
-	} else {
-		kstack = ((struct pt_regs *)kernelsp[cpu]) - 1;
-	}
-
-	kstack->cp0_epc = (long)tcrestart;
-	/* Save TCStatus */
-	kstack->cp0_tcstatus = tcstatus;
-	/* Pass token of operation to be performed kernel stack pad area */
-	kstack->pad0[4] = (unsigned long)pipi;
-	/* Pass address of function to be called likewise */
-	kstack->pad0[5] = (unsigned long)&ipi_decode;
-	/* Set interrupt exempt and kernel mode */
-	tcstatus |= TCSTATUS_IXMT;
-	tcstatus &= ~TCSTATUS_TKSU;
-	write_tc_c0_tcstatus(tcstatus);
-	ehb();
-	/* Set TC Restart address to be SMTC IPI vector */
-	write_tc_c0_tcrestart(__smtc_ipi_vector);
-}
-
-static void ipi_resched_interrupt(void)
-{
-	scheduler_ipi();
-}
-
-static void ipi_call_interrupt(void)
-{
-	/* Invoke generic function invocation code in smp.c */
-	smp_call_function_interrupt();
-}
-
-DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
-
-static void __irq_entry smtc_clock_tick_interrupt(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-	int irq = MIPS_CPU_IRQ_BASE + 1;
-
-	irq_enter();
-	kstat_incr_irq_this_cpu(irq);
-	cd = &per_cpu(mips_clockevent_device, cpu);
-	cd->event_handler(cd);
-	irq_exit();
-}
-
-void ipi_decode(struct smtc_ipi *pipi)
-{
-	void *arg_copy = pipi->arg;
-	int type_copy = pipi->type;
-
-	smtc_ipi_nq(&freeIPIq, pipi);
-
-	switch (type_copy) {
-	case SMTC_CLOCK_TICK:
-		smtc_clock_tick_interrupt();
-		break;
-
-	case LINUX_SMP_IPI:
-		switch ((int)arg_copy) {
-		case SMP_RESCHEDULE_YOURSELF:
-			ipi_resched_interrupt();
-			break;
-		case SMP_CALL_FUNCTION:
-			ipi_call_interrupt();
-			break;
-		default:
-			printk("Impossible SMTC IPI Argument %p\n", arg_copy);
-			break;
-		}
-		break;
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-	case IRQ_AFFINITY_IPI:
-		/*
-		 * Accept a "forwarded" interrupt that was initially
-		 * taken by a TC who doesn't have affinity for the IRQ.
-		 */
-		do_IRQ_no_affinity((int)arg_copy);
-		break;
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
-	default:
-		printk("Impossible SMTC IPI Type 0x%x\n", type_copy);
-		break;
-	}
-}
-
-/*
- * Similar to smtc_ipi_replay(), but invoked from context restore,
- * so it reuses the current exception frame rather than set up a
- * new one with self_ipi.
- */
-
-void deferred_smtc_ipi(void)
-{
-	int cpu = smp_processor_id();
-
-	/*
-	 * Test is not atomic, but much faster than a dequeue,
-	 * and the vast majority of invocations will have a null queue.
-	 * If irq_disabled when this was called, then any IPIs queued
-	 * after we test last will be taken on the next irq_enable/restore.
-	 * If interrupts were enabled, then any IPIs added after the
-	 * last test will be taken directly.
-	 */
-
-	while (IPIQ[cpu].head != NULL) {
-		struct smtc_ipi_q *q = &IPIQ[cpu];
-		struct smtc_ipi *pipi;
-		unsigned long flags;
-
-		/*
-		 * It may be possible we'll come in with interrupts
-		 * already enabled.
-		 */
-		local_irq_save(flags);
-		spin_lock(&q->lock);
-		pipi = __smtc_ipi_dq(q);
-		spin_unlock(&q->lock);
-		if (pipi != NULL) {
-			if (pipi->type == LINUX_SMP_IPI &&
-			    (int)pipi->arg == SMP_RESCHEDULE_YOURSELF)
-				IPIQ[cpu].resched_flag = 0;
-			ipi_decode(pipi);
-		}
-		/*
-		 * The use of the __raw_local restore isn't
-		 * as obviously necessary here as in smtc_ipi_replay(),
-		 * but it's more efficient, given that we're already
-		 * running down the IPI queue.
-		 */
-		__arch_local_irq_restore(flags);
-	}
-}
-
-/*
- * Cross-VPE interrupts in the SMTC prototype use "software interrupts"
- * set via cross-VPE MTTR manipulation of the Cause register. It would be
- * in some regards preferable to have external logic for "doorbell" hardware
- * interrupts.
- */
-
-static int cpu_ipi_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_IRQ;
-
-static irqreturn_t ipi_interrupt(int irq, void *dev_idm)
-{
-	int my_vpe = cpu_data[smp_processor_id()].vpe_id;
-	int my_tc = cpu_data[smp_processor_id()].tc_id;
-	int cpu;
-	struct smtc_ipi *pipi;
-	unsigned long tcstatus;
-	int sent;
-	unsigned long flags;
-	unsigned int mtflags;
-	unsigned int vpflags;
-
-	/*
-	 * So long as cross-VPE interrupts are done via
-	 * MFTR/MTTR read-modify-writes of Cause, we need
-	 * to stop other VPEs whenever the local VPE does
-	 * anything similar.
-	 */
-	local_irq_save(flags);
-	vpflags = dvpe();
-	clear_c0_cause(0x100 << MIPS_CPU_IPI_IRQ);
-	set_c0_status(0x100 << MIPS_CPU_IPI_IRQ);
-	irq_enable_hazard();
-	evpe(vpflags);
-	local_irq_restore(flags);
-
-	/*
-	 * Cross-VPE Interrupt handler: Try to directly deliver IPIs
-	 * queued for TCs on this VPE other than the current one.
-	 * Return-from-interrupt should cause us to drain the queue
-	 * for the current TC, so we ought not to have to do it explicitly here.
-	 */
-
-	for_each_online_cpu(cpu) {
-		if (cpu_data[cpu].vpe_id != my_vpe)
-			continue;
-
-		pipi = smtc_ipi_dq(&IPIQ[cpu]);
-		if (pipi != NULL) {
-			if (cpu_data[cpu].tc_id != my_tc) {
-				sent = 0;
-				LOCK_MT_PRA();
-				settc(cpu_data[cpu].tc_id);
-				write_tc_c0_tchalt(TCHALT_H);
-				mips_ihb();
-				tcstatus = read_tc_c0_tcstatus();
-				if ((tcstatus & TCSTATUS_IXMT) == 0) {
-					post_direct_ipi(cpu, pipi);
-					sent = 1;
-				}
-				write_tc_c0_tchalt(0);
-				UNLOCK_MT_PRA();
-				if (!sent) {
-					smtc_ipi_req(&IPIQ[cpu], pipi);
-				}
-			} else {
-				/*
-				 * ipi_decode() should be called
-				 * with interrupts off
-				 */
-				local_irq_save(flags);
-				if (pipi->type == LINUX_SMP_IPI &&
-				    (int)pipi->arg == SMP_RESCHEDULE_YOURSELF)
-					IPIQ[cpu].resched_flag = 0;
-				ipi_decode(pipi);
-				local_irq_restore(flags);
-			}
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-static void ipi_irq_dispatch(void)
-{
-	do_IRQ(cpu_ipi_irq);
-}
-
-static struct irqaction irq_ipi = {
-	.handler	= ipi_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "SMTC_IPI"
-};
-
-static void setup_cross_vpe_interrupts(unsigned int nvpe)
-{
-	if (nvpe < 1)
-		return;
-
-	if (!cpu_has_vint)
-		panic("SMTC Kernel requires Vectored Interrupt support");
-
-	set_vi_handler(MIPS_CPU_IPI_IRQ, ipi_irq_dispatch);
-
-	setup_irq_smtc(cpu_ipi_irq, &irq_ipi, (0x100 << MIPS_CPU_IPI_IRQ));
-
-	irq_set_handler(cpu_ipi_irq, handle_percpu_irq);
-}
-
-/*
- * SMTC-specific hacks invoked from elsewhere in the kernel.
- */
-
- /*
-  * smtc_ipi_replay is called from raw_local_irq_restore
-  */
-
-void smtc_ipi_replay(void)
-{
-	unsigned int cpu = smp_processor_id();
-
-	/*
-	 * To the extent that we've ever turned interrupts off,
-	 * we may have accumulated deferred IPIs.  This is subtle.
-	 * we should be OK:  If we pick up something and dispatch
-	 * it here, that's great. If we see nothing, but concurrent
-	 * with this operation, another TC sends us an IPI, IXMT
-	 * is clear, and we'll handle it as a real pseudo-interrupt
-	 * and not a pseudo-pseudo interrupt.  The important thing
-	 * is to do the last check for queued message *after* the
-	 * re-enabling of interrupts.
-	 */
-	while (IPIQ[cpu].head != NULL) {
-		struct smtc_ipi_q *q = &IPIQ[cpu];
-		struct smtc_ipi *pipi;
-		unsigned long flags;
-
-		/*
-		 * It's just possible we'll come in with interrupts
-		 * already enabled.
-		 */
-		local_irq_save(flags);
-
-		spin_lock(&q->lock);
-		pipi = __smtc_ipi_dq(q);
-		spin_unlock(&q->lock);
-		/*
-		 ** But use a raw restore here to avoid recursion.
-		 */
-		__arch_local_irq_restore(flags);
-
-		if (pipi) {
-			self_ipi(pipi);
-			smtc_cpu_stats[cpu].selfipis++;
-		}
-	}
-}
-
-EXPORT_SYMBOL(smtc_ipi_replay);
-
-void smtc_idle_loop_hook(void)
-{
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
-	int im;
-	int flags;
-	int mtflags;
-	int bit;
-	int vpe;
-	int tc;
-	int hook_ntcs;
-	/*
-	 * printk within DMT-protected regions can deadlock,
-	 * so buffer diagnostic messages for later output.
-	 */
-	char *pdb_msg;
-	char id_ho_db_msg[768]; /* worst-case use should be less than 700 */
-
-	if (atomic_read(&idle_hook_initialized) == 0) { /* fast test */
-		if (atomic_add_return(1, &idle_hook_initialized) == 1) {
-			int mvpconf0;
-			/* Tedious stuff to just do once */
-			mvpconf0 = read_c0_mvpconf0();
-			hook_ntcs = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
-			if (hook_ntcs > NR_CPUS)
-				hook_ntcs = NR_CPUS;
-			for (tc = 0; tc < hook_ntcs; tc++) {
-				tcnoprog[tc] = 0;
-				clock_hang_reported[tc] = 0;
-			}
-			for (vpe = 0; vpe < 2; vpe++)
-				for (im = 0; im < 8; im++)
-					imstuckcount[vpe][im] = 0;
-			printk("Idle loop test hook initialized for %d TCs\n", hook_ntcs);
-			atomic_set(&idle_hook_initialized, 1000);
-		} else {
-			/* Someone else is initializing in parallel - let 'em finish */
-			while (atomic_read(&idle_hook_initialized) < 1000)
-				;
-		}
-	}
-
-	/* Have we stupidly left IXMT set somewhere? */
-	if (read_c0_tcstatus() & 0x400) {
-		write_c0_tcstatus(read_c0_tcstatus() & ~0x400);
-		ehb();
-		printk("Dangling IXMT in cpu_idle()\n");
-	}
-
-	/* Have we stupidly left an IM bit turned off? */
-#define IM_LIMIT 2000
-	local_irq_save(flags);
-	mtflags = dmt();
-	pdb_msg = &id_ho_db_msg[0];
-	im = read_c0_status();
-	vpe = current_cpu_data.vpe_id;
-	for (bit = 0; bit < 8; bit++) {
-		/*
-		 * In current prototype, I/O interrupts
-		 * are masked for VPE > 0
-		 */
-		if (vpemask[vpe][bit]) {
-			if (!(im & (0x100 << bit)))
-				imstuckcount[vpe][bit]++;
-			else
-				imstuckcount[vpe][bit] = 0;
-			if (imstuckcount[vpe][bit] > IM_LIMIT) {
-				set_c0_status(0x100 << bit);
-				ehb();
-				imstuckcount[vpe][bit] = 0;
-				pdb_msg += sprintf(pdb_msg,
-					"Dangling IM %d fixed for VPE %d\n", bit,
-					vpe);
-			}
-		}
-	}
-
-	emt(mtflags);
-	local_irq_restore(flags);
-	if (pdb_msg != &id_ho_db_msg[0])
-		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
-#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
-
-	smtc_ipi_replay();
-}
-
-void smtc_soft_dump(void)
-{
-	int i;
-
-	printk("Counter Interrupts taken per CPU (TC)\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %ld\n", i, smtc_cpu_stats[i].timerints);
-	}
-	printk("Self-IPI invocations:\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
-	}
-	smtc_ipi_qdump();
-	printk("%d Recoveries of \"stolen\" FPU\n",
-	       atomic_read(&smtc_fpu_recoveries));
-}
-
-
-/*
- * TLB management routines special to SMTC
- */
-
-void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
-{
-	unsigned long flags, mtflags, tcstat, prevhalt, asid;
-	int tlb, i;
-
-	/*
-	 * It would be nice to be able to use a spinlock here,
-	 * but this is invoked from within TLB flush routines
-	 * that protect themselves with DVPE, so if a lock is
-	 * held by another TC, it'll never be freed.
-	 *
-	 * DVPE/DMT must not be done with interrupts enabled,
-	 * so even so most callers will already have disabled
-	 * them, let's be really careful...
-	 */
-
-	local_irq_save(flags);
-	if (smtc_status & SMTC_TLB_SHARED) {
-		mtflags = dvpe();
-		tlb = 0;
-	} else {
-		mtflags = dmt();
-		tlb = cpu_data[cpu].vpe_id;
-	}
-	asid = asid_cache(cpu);
-
-	do {
-		if (!((asid += ASID_INC) & ASID_MASK) ) {
-			if (cpu_has_vtag_icache)
-				flush_icache_all();
-			/* Traverse all online CPUs (hack requires contiguous range) */
-			for_each_online_cpu(i) {
-				/*
-				 * We don't need to worry about our own CPU, nor those of
-				 * CPUs who don't share our TLB.
-				 */
-				if ((i != smp_processor_id()) &&
-				    ((smtc_status & SMTC_TLB_SHARED) ||
-				     (cpu_data[i].vpe_id == cpu_data[cpu].vpe_id))) {
-					settc(cpu_data[i].tc_id);
-					prevhalt = read_tc_c0_tchalt() & TCHALT_H;
-					if (!prevhalt) {
-						write_tc_c0_tchalt(TCHALT_H);
-						mips_ihb();
-					}
-					tcstat = read_tc_c0_tcstatus();
-					smtc_live_asid[tlb][(tcstat & ASID_MASK)] |= (asiduse)(0x1 << i);
-					if (!prevhalt)
-						write_tc_c0_tchalt(0);
-				}
-			}
-			if (!asid)		/* fix version if needed */
-				asid = ASID_FIRST_VERSION;
-			local_flush_tlb_all();	/* start new asid cycle */
-		}
-	} while (smtc_live_asid[tlb][(asid & ASID_MASK)]);
-
-	/*
-	 * SMTC shares the TLB within VPEs and possibly across all VPEs.
-	 */
-	for_each_online_cpu(i) {
-		if ((smtc_status & SMTC_TLB_SHARED) ||
-		    (cpu_data[i].vpe_id == cpu_data[cpu].vpe_id))
-			cpu_context(i, mm) = asid_cache(i) = asid;
-	}
-
-	if (smtc_status & SMTC_TLB_SHARED)
-		evpe(mtflags);
-	else
-		emt(mtflags);
-	local_irq_restore(flags);
-}
-
-/*
- * Invoked from macros defined in mmu_context.h
- * which must already have disabled interrupts
- * and done a DVPE or DMT as appropriate.
- */
-
-void smtc_flush_tlb_asid(unsigned long asid)
-{
-	int entry;
-	unsigned long ehi;
-
-	entry = read_c0_wired();
-
-	/* Traverse all non-wired entries */
-	while (entry < current_cpu_data.tlbsize) {
-		write_c0_index(entry);
-		ehb();
-		tlb_read();
-		ehb();
-		ehi = read_c0_entryhi();
-		if ((ehi & ASID_MASK) == asid) {
-		    /*
-		     * Invalidate only entries with specified ASID,
-		     * makiing sure all entries differ.
-		     */
-		    write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1)));
-		    write_c0_entrylo0(0);
-		    write_c0_entrylo1(0);
-		    mtc0_tlbw_hazard();
-		    tlb_write_indexed();
-		}
-		entry++;
-	}
-	write_c0_index(PARKED_INDEX);
-	tlbw_use_hazard();
-}
-
-/*
- * Support for single-threading cache flush operations.
- */
-
-static int halt_state_save[NR_CPUS];
-
-/*
- * To really, really be sure that nothing is being done
- * by other TCs, halt them all.	 This code assumes that
- * a DVPE has already been done, so while their Halted
- * state is theoretically architecturally unstable, in
- * practice, it's not going to change while we're looking
- * at it.
- */
-
-void smtc_cflush_lockdown(void)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (cpu != smp_processor_id()) {
-			settc(cpu_data[cpu].tc_id);
-			halt_state_save[cpu] = read_tc_c0_tchalt();
-			write_tc_c0_tchalt(TCHALT_H);
-		}
-	}
-	mips_ihb();
-}
-
-/* It would be cheating to change the cpu_online states during a flush! */
-
-void smtc_cflush_release(void)
-{
-	int cpu;
-
-	/*
-	 * Start with a hazard barrier to ensure
-	 * that all CACHE ops have played through.
-	 */
-	mips_ihb();
-
-	for_each_online_cpu(cpu) {
-		if (cpu != smp_processor_id()) {
-			settc(cpu_data[cpu].tc_id);
-			write_tc_c0_tchalt(halt_state_save[cpu]);
-		}
-	}
-	mips_ihb();
-}

diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index c24ad5f..2242bdd 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c

@@ -6,8 +6,6 @@
  * not have done anything significant (but they may have had interrupts
  * enabled briefly - prom_smp_finish() should not be responsible for enabling
  * interrupts...)
- *
- * FIXME: broken for SMTC
  */
 
 #include <linux/kernel.h>
@@ -33,14 +31,6 @@
 	unsigned long flags;
 	unsigned int initcount;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC needs to synchronise per VPE, not per CPU
-	 * ignore for now
-	 */
-	return;
-#endif
-
 	printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu);
 
 	local_irq_save(flags);
@@ -110,14 +100,6 @@
 	int i;
 	unsigned int initcount;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC needs to synchronise per VPE, not per CPU
-	 * ignore for now
-	 */
-	return;
-#endif
-
 	/*
 	 * Not every cpu is online at the time this gets called,
 	 * so we first wait for the master to say everyone is ready

diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index dcb8e5d..8d01709 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c

@@ -26,7 +26,6 @@
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/div64.h>
-#include <asm/smtc_ipi.h>
 #include <asm/time.h>
 
 /*

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 8119ac2..51706d6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -15,6 +15,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/context_tracking.h>
+#include <linux/cpu_pm.h>
 #include <linux/kexec.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -370,9 +371,6 @@
 {
 	static int die_counter;
 	int sig = SIGSEGV;
-#ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long dvpret;
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 	oops_enter();
 
@@ -382,13 +380,7 @@
 
 	console_verbose();
 	raw_spin_lock_irq(&die_lock);
-#ifdef CONFIG_MIPS_MT_SMTC
-	dvpret = dvpe();
-#endif /* CONFIG_MIPS_MT_SMTC */
 	bust_spinlocks(1);
-#ifdef CONFIG_MIPS_MT_SMTC
-	mips_mt_regdump(dvpret);
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
@@ -712,10 +704,12 @@
 		si.si_addr = fault_addr;
 		si.si_signo = sig;
 		if (sig == SIGSEGV) {
+			down_read(&current->mm->mmap_sem);
 			if (find_vma(current->mm, (unsigned long)fault_addr))
 				si.si_code = SEGV_ACCERR;
 			else
 				si.si_code = SEGV_MAPERR;
+			up_read(&current->mm->mmap_sem);
 		} else {
 			si.si_code = BUS_ADRERR;
 		}
@@ -1759,19 +1753,6 @@
 		extern char rollback_except_vec_vi;
 		char *vec_start = using_rollback_handler() ?
 			&rollback_except_vec_vi : &except_vec_vi;
-#ifdef CONFIG_MIPS_MT_SMTC
-		/*
-		 * We need to provide the SMTC vectored interrupt handler
-		 * not only with the address of the handler, but with the
-		 * Status.IM bit to be masked before going there.
-		 */
-		extern char except_vec_vi_mori;
-#if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
-		const int mori_offset = &except_vec_vi_mori - vec_start + 2;
-#else
-		const int mori_offset = &except_vec_vi_mori - vec_start;
-#endif
-#endif /* CONFIG_MIPS_MT_SMTC */
 #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
 		const int lui_offset = &except_vec_vi_lui - vec_start + 2;
 		const int ori_offset = &except_vec_vi_ori - vec_start + 2;
@@ -1795,12 +1776,6 @@
 #else
 				handler_len);
 #endif
-#ifdef CONFIG_MIPS_MT_SMTC
-		BUG_ON(n > 7);	/* Vector index %d exceeds SMTC maximum. */
-
-		h = (u16 *)(b + mori_offset);
-		*h = (0x100 << n);
-#endif /* CONFIG_MIPS_MT_SMTC */
 		h = (u16 *)(b + lui_offset);
 		*h = (handler >> 16) & 0xffff;
 		h = (u16 *)(b + ori_offset);
@@ -1865,32 +1840,16 @@
 }
 __setup("noulri", ulri_disable);
 
-void per_cpu_trap_init(bool is_boot_cpu)
+/* configure STATUS register */
+static void configure_status(void)
 {
-	unsigned int cpu = smp_processor_id();
-	unsigned int status_set = ST0_CU0;
-	unsigned int hwrena = cpu_hwrena_impl_bits;
-#ifdef CONFIG_MIPS_MT_SMTC
-	int secondaryTC = 0;
-	int bootTC = (cpu == 0);
-
-	/*
-	 * Only do per_cpu_trap_init() for first TC of Each VPE.
-	 * Note that this hack assumes that the SMTC init code
-	 * assigns TCs consecutively and in ascending order.
-	 */
-
-	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
-	    ((read_c0_tcbind() & TCBIND_CURVPE) == cpu_data[cpu - 1].vpe_id))
-		secondaryTC = 1;
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 	/*
 	 * Disable coprocessors and select 32-bit or 64-bit addressing
 	 * and the 16/32 or 32/32 FPR register model.  Reset the BEV
 	 * flag that some firmware may have left set and the TS bit (for
 	 * IP27).  Set XX for ISA IV code to work.
 	 */
+	unsigned int status_set = ST0_CU0;
 #ifdef CONFIG_64BIT
 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
 #endif
@@ -1901,6 +1860,12 @@
 
 	change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX,
 			 status_set);
+}
+
+/* configure HWRENA register */
+static void configure_hwrena(void)
+{
+	unsigned int hwrena = cpu_hwrena_impl_bits;
 
 	if (cpu_has_mips_r2)
 		hwrena |= 0x0000000f;
@@ -1910,11 +1875,10 @@
 
 	if (hwrena)
 		write_c0_hwrena(hwrena);
+}
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	if (!secondaryTC) {
-#endif /* CONFIG_MIPS_MT_SMTC */
-
+static void configure_exception_vector(void)
+{
 	if (cpu_has_veic || cpu_has_vint) {
 		unsigned long sr = set_c0_status(ST0_BEV);
 		write_c0_ebase(ebase);
@@ -1930,6 +1894,16 @@
 		} else
 			set_c0_cause(CAUSEF_IV);
 	}
+}
+
+void per_cpu_trap_init(bool is_boot_cpu)
+{
+	unsigned int cpu = smp_processor_id();
+
+	configure_status();
+	configure_hwrena();
+
+	configure_exception_vector();
 
 	/*
 	 * Before R2 both interrupt numbers were fixed to 7, so on R2 only:
@@ -1949,10 +1923,6 @@
 		cp0_perfcount_irq = -1;
 	}
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	}
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 	if (!cpu_data[cpu].asid_cache)
 		cpu_data[cpu].asid_cache = ASID_FIRST_VERSION;
 
@@ -1961,23 +1931,10 @@
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	if (bootTC) {
-#endif /* CONFIG_MIPS_MT_SMTC */
 		/* Boot CPU's cache setup in setup_arch(). */
 		if (!is_boot_cpu)
 			cpu_cache_init();
 		tlb_init();
-#ifdef CONFIG_MIPS_MT_SMTC
-	} else if (!secondaryTC) {
-		/*
-		 * First TC in non-boot VPE must do subset of tlb_init()
-		 * for MMU countrol registers.
-		 */
-		write_c0_pagemask(PM_DEFAULT_MASK);
-		write_c0_wired(0);
-	}
-#endif /* CONFIG_MIPS_MT_SMTC */
 	TLBMISS_HANDLER_SETUP();
 }
 
@@ -2185,3 +2142,32 @@
 
 	cu2_notifier(default_cu2_call, 0x80000000);	/* Run last  */
 }
+
+static int trap_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			    void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		configure_status();
+		configure_hwrena();
+		configure_exception_vector();
+
+		/* Restore register with CPU number for TLB handlers */
+		TLBMISS_HANDLER_RESTORE();
+
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block trap_pm_notifier_block = {
+	.notifier_call = trap_pm_notifier,
+};
+
+static int __init trap_pm_init(void)
+{
+	return cpu_pm_register_notifier(&trap_pm_notifier_block);
+}
+arch_initcall(trap_pm_init);

diff --git a/arch/mips/kernel/vpe-mt.c b/arch/mips/kernel/vpe-mt.c
index 949ae0e..2e003b1 100644
--- a/arch/mips/kernel/vpe-mt.c
+++ b/arch/mips/kernel/vpe-mt.c

@@ -127,9 +127,8 @@
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
 	/*
-	 * SMTC/SMVP kernels manage VPE enable independently,
-	 * but uniprocessor kernels need to turn it on, even
-	 * if that wasn't the pre-dvpe() state.
+	 * SMVP kernels manage VPE enable independently, but uniprocessor
+	 * kernels need to turn it on, even if that wasn't the pre-dvpe() state.
 	 */
 #ifdef CONFIG_SMP
 	evpe(vpeflags);
@@ -454,12 +453,11 @@
 
 			settc(tc);
 
-			/* Any TC that is bound to VPE0 gets left as is - in
-			 * case we are running SMTC on VPE0. A TC that is bound
-			 * to any other VPE gets bound to VPE0, ideally I'd like
-			 * to make it homeless but it doesn't appear to let me
-			 * bind a TC to a non-existent VPE. Which is perfectly
-			 * reasonable.
+			/*
+			 * A TC that is bound to any other VPE gets bound to
+			 * VPE0, ideally I'd like to make it homeless but it
+			 * doesn't appear to let me bind a TC to a non-existent
+			 * VPE. Which is perfectly reasonable.
 			 *
 			 * The (un)bound state is visible to an EJTAG probe so
 			 * may notify GDB...

diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 85685e1..030568a 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c

@@ -61,7 +61,7 @@
 /* we have a cascade of 8 irqs */
 #define MIPS_CPU_IRQ_CASCADE		8
 
-#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_MIPS_MT_SMP
 int gic_present;
 #endif
 
@@ -440,7 +440,7 @@
 	arch_init_ipiirq(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ, &irq_call);
 #endif
 
-#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
+#ifndef CONFIG_MIPS_MT_SMP
 	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
 		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
 #else

diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c
index 705cfb7..21d27c6 100644
--- a/arch/mips/lib/delay.c
+++ b/arch/mips/lib/delay.c

@@ -11,7 +11,9 @@
 #include <linux/module.h>
 #include <linux/param.h>
 #include <linux/smp.h>
+#include <linux/stringify.h>
 
+#include <asm/asm.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -27,11 +29,7 @@
 	"	.set	noreorder				\n"
 	"	.align	3					\n"
 	"1:	bnez	%0, 1b					\n"
-#if BITS_PER_LONG == 32
-	"	subu	%0, %1					\n"
-#else
-	"	dsubu	%0, %1					\n"
-#endif
+	"	 " __stringify(LONG_SUBU) "	%0, %1		\n"
 	"	.set	reorder					\n"
 	: "=r" (loops)
 	: GCC_DADDI_IMM_ASM() (1), "0" (loops));

diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index 6807f71..57bcdaf1 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c

@@ -15,7 +15,7 @@
 #include <linux/export.h>
 #include <linux/stringify.h>
 
-#if !defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT_SMTC)
+#ifndef CONFIG_CPU_MIPSR2
 
 /*
  * For cli() we have to insert nops to make sure that the new value
@@ -42,12 +42,7 @@
 	__asm__ __volatile__(
 	"	.set	push						\n"
 	"	.set	noat						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	$1, $2, 1					\n"
-	"	ori	$1, 0x400					\n"
-	"	.set	noreorder					\n"
-	"	mtc0	$1, $2, 1					\n"
-#elif defined(CONFIG_CPU_MIPSR2)
+#if   defined(CONFIG_CPU_MIPSR2)
 	/* see irqflags.h for inline function */
 #else
 	"	mfc0	$1,$12						\n"
@@ -77,13 +72,7 @@
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	%[flags], $2, 1				\n"
-	"	ori	$1, %[flags], 0x400				\n"
-	"	.set	noreorder					\n"
-	"	mtc0	$1, $2, 1					\n"
-	"	andi	%[flags], %[flags], 0x400			\n"
-#elif defined(CONFIG_CPU_MIPSR2)
+#if   defined(CONFIG_CPU_MIPSR2)
 	/* see irqflags.h for inline function */
 #else
 	"	mfc0	%[flags], $12					\n"
@@ -108,29 +97,13 @@
 {
 	unsigned long __tmp1;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	/*
-	 * SMTC kernel needs to do a software replay of queued
-	 * IPIs, at the cost of branch and call overhead on each
-	 * local_irq_restore()
-	 */
-	if (unlikely(!(flags & 0x0400)))
-		smtc_ipi_replay();
-#endif
 	preempt_disable();
 
 	__asm__ __volatile__(
 	"	.set	push						\n"
 	"	.set	noreorder					\n"
 	"	.set	noat						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	$1, $2, 1					\n"
-	"	andi	%[flags], 0x400					\n"
-	"	ori	$1, 0x400					\n"
-	"	xori	$1, 0x400					\n"
-	"	or	%[flags], $1					\n"
-	"	mtc0	%[flags], $2, 1					\n"
-#elif defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU)
+#if   defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU)
 	/* see irqflags.h for inline function */
 #elif defined(CONFIG_CPU_MIPSR2)
 	/* see irqflags.h for inline function */
@@ -163,14 +136,7 @@
 	"	.set	push						\n"
 	"	.set	noreorder					\n"
 	"	.set	noat						\n"
-#ifdef CONFIG_MIPS_MT_SMTC
-	"	mfc0	$1, $2, 1					\n"
-	"	andi	%[flags], 0x400					\n"
-	"	ori	$1, 0x400					\n"
-	"	xori	$1, 0x400					\n"
-	"	or	%[flags], $1					\n"
-	"	mtc0	%[flags], $2, 1					\n"
-#elif defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU)
+#if   defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU)
 	/* see irqflags.h for inline function */
 #elif defined(CONFIG_CPU_MIPSR2)
 	/* see irqflags.h for inline function */
@@ -192,4 +158,4 @@
 }
 EXPORT_SYMBOL(__arch_local_irq_restore);
 
-#endif /* !defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT_SMTC) */
+#endif /* !CONFIG_CPU_MIPSR2 */

diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 603d79a..e6a86cc 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig

@@ -95,10 +95,11 @@
 
 config CS5536_MFGPT
 	bool "CS5536 MFGPT Timer"
-	depends on CS5536
+	depends on CS5536 && !HIGH_RES_TIMERS
 	select MIPS_EXTERNAL_TIMER
 	help
-	  This option enables the mfgpt0 timer of AMD CS5536.
+	  This option enables the mfgpt0 timer of AMD CS5536. With this timer
+	  switched on you can not use high resolution timers.
 
 	  If you want to enable the Loongson2 CPUFreq Driver, Please enable
 	  this option at first, otherwise, You will get wrong system time.

diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c
index c665fe1..1e88940 100644
--- a/arch/mips/loongson/loongson-3/smp.c
+++ b/arch/mips/loongson/loongson-3/smp.c

@@ -279,13 +279,6 @@
 	loongson3_ipi_write64(startargs[0], (void *)(ipi_mailbox_buf[cpu]+0x0));
 }
 
-/*
- * Final cleanup after all secondaries booted
- */
-static void __init loongson3_cpus_done(void)
-{
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 static int loongson3_cpu_disable(void)
@@ -432,7 +425,6 @@
 	.send_ipi_mask = loongson3_send_ipi_mask,
 	.init_secondary = loongson3_init_secondary,
 	.smp_finish = loongson3_smp_finish,
-	.cpus_done = loongson3_cpus_done,
 	.boot_secondary = loongson3_boot_secondary,
 	.smp_setup = loongson3_smp_setup,
 	.prepare_cpus = loongson3_prepare_cpus,

diff --git a/arch/mips/loongson1/Kconfig b/arch/mips/loongson1/Kconfig
index fbf75f6..e23c25d 100644
--- a/arch/mips/loongson1/Kconfig
+++ b/arch/mips/loongson1/Kconfig

@@ -14,6 +14,7 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_HIGHMEM
+	select SYS_SUPPORTS_MIPS16
 	select SYS_HAS_EARLY_PRINTK
 	select COMMON_CLK
 

diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index 121a848..619cfc1 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile

@@ -2,10 +2,12 @@
 # Makefile for the Linux/MIPS kernel FPU emulation.
 #
 
-obj-y	:= cp1emu.o ieee754m.o ieee754d.o ieee754dp.o ieee754sp.o ieee754.o \
-	   ieee754xcpt.o dp_frexp.o dp_modf.o dp_div.o dp_mul.o dp_sub.o \
-	   dp_add.o dp_fsp.o dp_cmp.o dp_logb.o dp_scalb.o dp_simple.o \
-	   dp_tint.o dp_fint.o dp_tlong.o dp_flong.o sp_frexp.o sp_modf.o \
-	   sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_logb.o \
-	   sp_scalb.o sp_simple.o sp_tint.o sp_fint.o sp_tlong.o sp_flong.o \
-	   dp_sqrt.o sp_sqrt.o kernel_linkage.o dsemul.o
+obj-y	+= cp1emu.o ieee754dp.o ieee754sp.o ieee754.o dp_div.o dp_mul.o \
+	   dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o dp_tint.o \
+	   dp_fint.o dp_tlong.o dp_flong.o sp_div.o sp_mul.o sp_sub.o \
+	   sp_add.o sp_fdp.o sp_cmp.o sp_simple.o sp_tint.o sp_fint.o \
+	   sp_tlong.o sp_flong.o dsemul.o
+
+lib-y	+= ieee754d.o dp_sqrt.o sp_sqrt.o
+
+obj-$(CONFIG_DEBUG_FS) += me-debugfs.o

diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 7b3c9ac..736c17a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c

@@ -1,5 +1,5 @@
 /*
- * cp1emu.c: a MIPS coprocessor 1 (fpu) instruction emulator
+ * cp1emu.c: a MIPS coprocessor 1 (FPU) instruction emulator
  *
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
@@ -18,61 +18,46 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  *
  * A complete emulator for MIPS coprocessor 1 instructions.  This is
  * required for #float(switch) or #float(trap), where it catches all
  * COP1 instructions via the "CoProcessor Unusable" exception.
  *
  * More surprisingly it is also required for #float(ieee), to help out
- * the hardware fpu at the boundaries of the IEEE-754 representation
+ * the hardware FPU at the boundaries of the IEEE-754 representation
  * (denormalised values, infinities, underflow, etc).  It is made
  * quite nasty because emulation of some non-COP1 instructions is
  * required, e.g. in branch delay slots.
  *
- * Note if you know that you won't have an fpu, then you'll get much
+ * Note if you know that you won't have an FPU, then you'll get much
  * better performance by compiling with -msoft-float!
  */
 #include <linux/sched.h>
-#include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/kconfig.h>
+#include <linux/percpu-defs.h>
 #include <linux/perf_event.h>
 
+#include <asm/branch.h>
 #include <asm/inst.h>
-#include <asm/bootinfo.h>
-#include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/signal.h>
-#include <asm/mipsregs.h>
+#include <asm/uaccess.h>
+
+#include <asm/processor.h>
 #include <asm/fpu_emulator.h>
 #include <asm/fpu.h>
-#include <asm/uaccess.h>
-#include <asm/branch.h>
 
 #include "ieee754.h"
 
-/* Strap kernel emulator for full MIPS IV emulation */
-
-#ifdef __mips
-#undef __mips
-#endif
-#define __mips 4
-
 /* Function which emulates a floating point instruction. */
 
 static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *,
 	mips_instruction);
 
-#if __mips >= 4 && __mips != 32
 static int fpux_emu(struct pt_regs *,
 	struct mips_fpu_struct *, mips_instruction, void *__user *);
-#endif
-
-/* Further private data for which no space exists in mips_fpu_struct */
-
-#ifdef CONFIG_DEBUG_FS
-DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
-#endif
 
 /* Control registers */
 
@@ -82,27 +67,6 @@
 /* Determine rounding mode from the RM bits of the FCSR */
 #define modeindex(v) ((v) & FPU_CSR_RM)
 
-/* microMIPS bitfields */
-#define MM_POOL32A_MINOR_MASK	0x3f
-#define MM_POOL32A_MINOR_SHIFT	0x6
-#define MM_MIPS32_COND_FC	0x30
-
-/* Convert Mips rounding mode (0..3) to IEEE library modes. */
-static const unsigned char ieee_rm[4] = {
-	[FPU_CSR_RN] = IEEE754_RN,
-	[FPU_CSR_RZ] = IEEE754_RZ,
-	[FPU_CSR_RU] = IEEE754_RU,
-	[FPU_CSR_RD] = IEEE754_RD,
-};
-/* Convert IEEE library modes to Mips rounding mode (0..3). */
-static const unsigned char mips_rm[4] = {
-	[IEEE754_RN] = FPU_CSR_RN,
-	[IEEE754_RZ] = FPU_CSR_RZ,
-	[IEEE754_RD] = FPU_CSR_RD,
-	[IEEE754_RU] = FPU_CSR_RU,
-};
-
-#if __mips >= 4
 /* convert condition code register number to csr bit */
 static const unsigned int fpucondbit[8] = {
 	FPU_CSR_COND0,
@@ -114,10 +78,6 @@
 	FPU_CSR_COND6,
 	FPU_CSR_COND7
 };
-#endif
-
-/* (microMIPS) Convert 16-bit register encoding to 32-bit register encoding. */
-static const unsigned int reg16to32map[8] = {16, 17, 2, 3, 4, 5, 6, 7};
 
 /* (microMIPS) Convert certain microMIPS instructions to MIPS32 format. */
 static const int sd_format[] = {16, 17, 0, 0, 0, 0, 0, 0};
@@ -466,199 +426,6 @@
 	return 0;
 }
 
-int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
-		     unsigned long *contpc)
-{
-	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
-	int bc_false = 0;
-	unsigned int fcr31;
-	unsigned int bit;
-
-	if (!cpu_has_mmips)
-		return 0;
-
-	switch (insn.mm_i_format.opcode) {
-	case mm_pool32a_op:
-		if ((insn.mm_i_format.simmediate & MM_POOL32A_MINOR_MASK) ==
-		    mm_pool32axf_op) {
-			switch (insn.mm_i_format.simmediate >>
-				MM_POOL32A_MINOR_SHIFT) {
-			case mm_jalr_op:
-			case mm_jalrhb_op:
-			case mm_jalrs_op:
-			case mm_jalrshb_op:
-				if (insn.mm_i_format.rt != 0)	/* Not mm_jr */
-					regs->regs[insn.mm_i_format.rt] =
-						regs->cp0_epc +
-						dec_insn.pc_inc +
-						dec_insn.next_pc_inc;
-				*contpc = regs->regs[insn.mm_i_format.rs];
-				return 1;
-			}
-		}
-		break;
-	case mm_pool32i_op:
-		switch (insn.mm_i_format.rt) {
-		case mm_bltzals_op:
-		case mm_bltzal_op:
-			regs->regs[31] = regs->cp0_epc +
-				dec_insn.pc_inc +
-				dec_insn.next_pc_inc;
-			/* Fall through */
-		case mm_bltz_op:
-			if ((long)regs->regs[insn.mm_i_format.rs] < 0)
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					(insn.mm_i_format.simmediate << 1);
-			else
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					dec_insn.next_pc_inc;
-			return 1;
-		case mm_bgezals_op:
-		case mm_bgezal_op:
-			regs->regs[31] = regs->cp0_epc +
-					dec_insn.pc_inc +
-					dec_insn.next_pc_inc;
-			/* Fall through */
-		case mm_bgez_op:
-			if ((long)regs->regs[insn.mm_i_format.rs] >= 0)
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					(insn.mm_i_format.simmediate << 1);
-			else
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					dec_insn.next_pc_inc;
-			return 1;
-		case mm_blez_op:
-			if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					(insn.mm_i_format.simmediate << 1);
-			else
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					dec_insn.next_pc_inc;
-			return 1;
-		case mm_bgtz_op:
-			if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					(insn.mm_i_format.simmediate << 1);
-			else
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					dec_insn.next_pc_inc;
-			return 1;
-		case mm_bc2f_op:
-		case mm_bc1f_op:
-			bc_false = 1;
-			/* Fall through */
-		case mm_bc2t_op:
-		case mm_bc1t_op:
-			preempt_disable();
-			if (is_fpu_owner())
-				asm volatile("cfc1\t%0,$31" : "=r" (fcr31));
-			else
-				fcr31 = current->thread.fpu.fcr31;
-			preempt_enable();
-
-			if (bc_false)
-				fcr31 = ~fcr31;
-
-			bit = (insn.mm_i_format.rs >> 2);
-			bit += (bit != 0);
-			bit += 23;
-			if (fcr31 & (1 << bit))
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc +
-					(insn.mm_i_format.simmediate << 1);
-			else
-				*contpc = regs->cp0_epc +
-					dec_insn.pc_inc + dec_insn.next_pc_inc;
-			return 1;
-		}
-		break;
-	case mm_pool16c_op:
-		switch (insn.mm_i_format.rt) {
-		case mm_jalr16_op:
-		case mm_jalrs16_op:
-			regs->regs[31] = regs->cp0_epc +
-				dec_insn.pc_inc + dec_insn.next_pc_inc;
-			/* Fall through */
-		case mm_jr16_op:
-			*contpc = regs->regs[insn.mm_i_format.rs];
-			return 1;
-		}
-		break;
-	case mm_beqz16_op:
-		if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] == 0)
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc +
-				(insn.mm_b1_format.simmediate << 1);
-		else
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc + dec_insn.next_pc_inc;
-		return 1;
-	case mm_bnez16_op:
-		if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] != 0)
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc +
-				(insn.mm_b1_format.simmediate << 1);
-		else
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc + dec_insn.next_pc_inc;
-		return 1;
-	case mm_b16_op:
-		*contpc = regs->cp0_epc + dec_insn.pc_inc +
-			 (insn.mm_b0_format.simmediate << 1);
-		return 1;
-	case mm_beq32_op:
-		if (regs->regs[insn.mm_i_format.rs] ==
-		    regs->regs[insn.mm_i_format.rt])
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc +
-				(insn.mm_i_format.simmediate << 1);
-		else
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc +
-				dec_insn.next_pc_inc;
-		return 1;
-	case mm_bne32_op:
-		if (regs->regs[insn.mm_i_format.rs] !=
-		    regs->regs[insn.mm_i_format.rt])
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc +
-				(insn.mm_i_format.simmediate << 1);
-		else
-			*contpc = regs->cp0_epc +
-				dec_insn.pc_inc + dec_insn.next_pc_inc;
-		return 1;
-	case mm_jalx32_op:
-		regs->regs[31] = regs->cp0_epc +
-			dec_insn.pc_inc + dec_insn.next_pc_inc;
-		*contpc = regs->cp0_epc + dec_insn.pc_inc;
-		*contpc >>= 28;
-		*contpc <<= 28;
-		*contpc |= (insn.j_format.target << 2);
-		return 1;
-	case mm_jals32_op:
-	case mm_jal32_op:
-		regs->regs[31] = regs->cp0_epc +
-			dec_insn.pc_inc + dec_insn.next_pc_inc;
-		/* Fall through */
-	case mm_j32_op:
-		*contpc = regs->cp0_epc + dec_insn.pc_inc;
-		*contpc >>= 27;
-		*contpc <<= 27;
-		*contpc |= (insn.j_format.target << 1);
-		set_isa16_mode(*contpc);
-		return 1;
-	}
-	return 0;
-}
-
 /*
  * Redundant with logic already in kernel/branch.c,
  * embedded in compute_return_epc.  At some point,
@@ -817,7 +584,11 @@
 		if (insn.i_format.rs == bc_op) {
 			preempt_disable();
 			if (is_fpu_owner())
-				asm volatile("cfc1\t%0,$31" : "=r" (fcr31));
+				asm volatile(
+					".set push\n"
+					"\t.set mips1\n"
+					"\tcfc1\t%0,$31\n"
+					"\t.set pop" : "=r" (fcr31));
 			else
 				fcr31 = current->thread.fpu.fcr31;
 			preempt_enable();
@@ -867,23 +638,25 @@
  */
 static inline int cop1_64bit(struct pt_regs *xcp)
 {
-#if defined(CONFIG_64BIT) && !defined(CONFIG_MIPS32_O32)
-	return 1;
-#elif defined(CONFIG_32BIT) && !defined(CONFIG_MIPS_O32_FP64_SUPPORT)
-	return 0;
-#else
+	if (config_enabled(CONFIG_64BIT) && !config_enabled(CONFIG_MIPS32_O32))
+		return 1;
+	else if (config_enabled(CONFIG_32BIT) &&
+		 !config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
+		return 0;
+
 	return !test_thread_flag(TIF_32BIT_FPREGS);
-#endif
 }
 
-#define SIFROMREG(si, x) do {						\
+#define SIFROMREG(si, x)						\
+do {									\
 	if (cop1_64bit(xcp))						\
 		(si) = get_fpr32(&ctx->fpr[x], 0);			\
 	else								\
 		(si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1);		\
 } while (0)
 
-#define SITOREG(si, x) do {						\
+#define SITOREG(si, x)							\
+do {									\
 	if (cop1_64bit(xcp)) {						\
 		unsigned i;						\
 		set_fpr32(&ctx->fpr[x], 0, si);				\
@@ -896,17 +669,19 @@
 
 #define SIFROMHREG(si, x)	((si) = get_fpr32(&ctx->fpr[x], 1))
 
-#define SITOHREG(si, x) do {						\
+#define SITOHREG(si, x)							\
+do {									\
 	unsigned i;							\
 	set_fpr32(&ctx->fpr[x], 1, si);					\
 	for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++)		\
 		set_fpr32(&ctx->fpr[x], i, 0);				\
 } while (0)
 
-#define DIFROMREG(di, x) \
+#define DIFROMREG(di, x)						\
 	((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
 
-#define DITOREG(di, x) do {						\
+#define DITOREG(di, x)							\
+do {									\
 	unsigned fpr, i;						\
 	fpr = (x) & ~(cop1_64bit(xcp) == 0);				\
 	set_fpr64(&ctx->fpr[fpr], 0, di);				\
@@ -927,23 +702,36 @@
 static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 		struct mm_decoded_insn dec_insn, void *__user *fault_addr)
 {
-	mips_instruction ir;
 	unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc;
-	unsigned int cond;
-	int pc_inc;
+	unsigned int cond, cbit;
+	mips_instruction ir;
+	int likely, pc_inc;
+	u32 __user *wva;
+	u64 __user *dva;
+	u32 value;
+	u32 wval;
+	u64 dval;
+	int sig;
+
+	/*
+	 * These are giving gcc a gentle hint about what to expect in
+	 * dec_inst in order to do better optimization.
+	 */
+	if (!cpu_has_mmips && dec_insn.micro_mips_mode)
+		unreachable();
 
 	/* XXX NEC Vr54xx bug workaround */
-	if (xcp->cp0_cause & CAUSEF_BD) {
+	if (delay_slot(xcp)) {
 		if (dec_insn.micro_mips_mode) {
 			if (!mm_isBranchInstr(xcp, dec_insn, &contpc))
-				xcp->cp0_cause &= ~CAUSEF_BD;
+				clear_delay_slot(xcp);
 		} else {
 			if (!isBranchInstr(xcp, dec_insn, &contpc))
-				xcp->cp0_cause &= ~CAUSEF_BD;
+				clear_delay_slot(xcp);
 		}
 	}
 
-	if (xcp->cp0_cause & CAUSEF_BD) {
+	if (delay_slot(xcp)) {
 		/*
 		 * The instruction to be emulated is in a branch delay slot
 		 * which means that we have to	emulate the branch instruction
@@ -985,96 +773,85 @@
 			return SIGILL;
 	}
 
-      emul:
+emul:
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
 	MIPS_FPU_EMU_INC_STATS(emulated);
 	switch (MIPSInst_OPCODE(ir)) {
-	case ldc1_op:{
-		u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
-			MIPSInst_SIMM(ir));
-		u64 val;
-
+	case ldc1_op:
+		dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
+				     MIPSInst_SIMM(ir));
 		MIPS_FPU_EMU_INC_STATS(loads);
 
-		if (!access_ok(VERIFY_READ, va, sizeof(u64))) {
+		if (!access_ok(VERIFY_READ, dva, sizeof(u64))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = dva;
 			return SIGBUS;
 		}
-		if (__get_user(val, va)) {
+		if (__get_user(dval, dva)) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = dva;
 			return SIGSEGV;
 		}
-		DITOREG(val, MIPSInst_RT(ir));
+		DITOREG(dval, MIPSInst_RT(ir));
 		break;
-	}
 
-	case sdc1_op:{
-		u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
-			MIPSInst_SIMM(ir));
-		u64 val;
-
+	case sdc1_op:
+		dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
+				      MIPSInst_SIMM(ir));
 		MIPS_FPU_EMU_INC_STATS(stores);
-		DIFROMREG(val, MIPSInst_RT(ir));
-		if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) {
+		DIFROMREG(dval, MIPSInst_RT(ir));
+		if (!access_ok(VERIFY_WRITE, dva, sizeof(u64))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = dva;
 			return SIGBUS;
 		}
-		if (__put_user(val, va)) {
+		if (__put_user(dval, dva)) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = dva;
 			return SIGSEGV;
 		}
 		break;
-	}
 
-	case lwc1_op:{
-		u32 __user *va = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] +
-			MIPSInst_SIMM(ir));
-		u32 val;
-
+	case lwc1_op:
+		wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] +
+				      MIPSInst_SIMM(ir));
 		MIPS_FPU_EMU_INC_STATS(loads);
-		if (!access_ok(VERIFY_READ, va, sizeof(u32))) {
+		if (!access_ok(VERIFY_READ, wva, sizeof(u32))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = wva;
 			return SIGBUS;
 		}
-		if (__get_user(val, va)) {
+		if (__get_user(wval, wva)) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = wva;
 			return SIGSEGV;
 		}
-		SITOREG(val, MIPSInst_RT(ir));
+		SITOREG(wval, MIPSInst_RT(ir));
 		break;
-	}
 
-	case swc1_op:{
-		u32 __user *va = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] +
-			MIPSInst_SIMM(ir));
-		u32 val;
-
+	case swc1_op:
+		wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] +
+				      MIPSInst_SIMM(ir));
 		MIPS_FPU_EMU_INC_STATS(stores);
-		SIFROMREG(val, MIPSInst_RT(ir));
-		if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) {
+		SIFROMREG(wval, MIPSInst_RT(ir));
+		if (!access_ok(VERIFY_WRITE, wva, sizeof(u32))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = wva;
 			return SIGBUS;
 		}
-		if (__put_user(val, va)) {
+		if (__put_user(wval, wva)) {
 			MIPS_FPU_EMU_INC_STATS(errors);
-			*fault_addr = va;
+			*fault_addr = wva;
 			return SIGSEGV;
 		}
 		break;
-	}
 
 	case cop1_op:
 		switch (MIPSInst_RS(ir)) {
-
-#if defined(__mips64)
 		case dmfc_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
+
 			/* copregister fs -> gpr[rt] */
 			if (MIPSInst_RT(ir) != 0) {
 				DIFROMREG(xcp->regs[MIPSInst_RT(ir)],
@@ -1083,10 +860,12 @@
 			break;
 
 		case dmtc_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
+
 			/* copregister fs <- rt */
 			DITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir));
 			break;
-#endif
 
 		case mfhc_op:
 			if (!cpu_has_mips_r2)
@@ -1120,19 +899,14 @@
 			SITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir));
 			break;
 
-		case cfc_op:{
+		case cfc_op:
 			/* cop control register rd -> gpr[rt] */
-			u32 value;
-
 			if (MIPSInst_RD(ir) == FPCREG_CSR) {
 				value = ctx->fcr31;
-				value = (value & ~FPU_CSR_RM) |
-					mips_rm[modeindex(value)];
-#ifdef CSRTRACE
-				printk("%p gpr[%d]<-csr=%08x\n",
-					(void *) (xcp->cp0_epc),
-					MIPSInst_RT(ir), value);
-#endif
+				value = (value & ~FPU_CSR_RM) | modeindex(value);
+				pr_debug("%p gpr[%d]<-csr=%08x\n",
+					 (void *) (xcp->cp0_epc),
+					 MIPSInst_RT(ir), value);
 			}
 			else if (MIPSInst_RD(ir) == FPCREG_RID)
 				value = 0;
@@ -1141,12 +915,9 @@
 			if (MIPSInst_RT(ir))
 				xcp->regs[MIPSInst_RT(ir)] = value;
 			break;
-		}
 
-		case ctc_op:{
+		case ctc_op:
 			/* copregister rd <- rt */
-			u32 value;
-
 			if (MIPSInst_RT(ir) == 0)
 				value = 0;
 			else
@@ -1155,37 +926,33 @@
 			/* we only have one writable control reg
 			 */
 			if (MIPSInst_RD(ir) == FPCREG_CSR) {
-#ifdef CSRTRACE
-				printk("%p gpr[%d]->csr=%08x\n",
-					(void *) (xcp->cp0_epc),
-					MIPSInst_RT(ir), value);
-#endif
+				pr_debug("%p gpr[%d]->csr=%08x\n",
+					 (void *) (xcp->cp0_epc),
+					 MIPSInst_RT(ir), value);
 
 				/*
 				 * Don't write reserved bits,
 				 * and convert to ieee library modes
 				 */
-				ctx->fcr31 = (value &
-						~(FPU_CSR_RSVD | FPU_CSR_RM)) |
-						ieee_rm[modeindex(value)];
+				ctx->fcr31 = (value & ~(FPU_CSR_RSVD | FPU_CSR_RM)) |
+					     modeindex(value);
 			}
 			if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
 				return SIGFPE;
 			}
 			break;
-		}
 
-		case bc_op:{
-			int likely = 0;
-
-			if (xcp->cp0_cause & CAUSEF_BD)
+		case bc_op:
+			if (delay_slot(xcp))
 				return SIGILL;
 
-#if __mips >= 4
-			cond = ctx->fcr31 & fpucondbit[MIPSInst_RT(ir) >> 2];
-#else
-			cond = ctx->fcr31 & FPU_CSR_COND;
-#endif
+			if (cpu_has_mips_4_5_r)
+				cbit = fpucondbit[MIPSInst_RT(ir) >> 2];
+			else
+				cbit = FPU_CSR_COND;
+			cond = ctx->fcr31 & cbit;
+
+			likely = 0;
 			switch (MIPSInst_RT(ir) & 3) {
 			case bcfl_op:
 				likely = 1;
@@ -1201,10 +968,10 @@
 				return SIGILL;
 			}
 
-			xcp->cp0_cause |= CAUSEF_BD;
+			set_delay_slot(xcp);
 			if (cond) {
-				/* branch taken: emulate dslot
-				 * instruction
+				/*
+				 * Branch taken: emulate dslot instruction
 				 */
 				xcp->cp0_epc += dec_insn.pc_inc;
 
@@ -1238,23 +1005,37 @@
 
 				switch (MIPSInst_OPCODE(ir)) {
 				case lwc1_op:
+					goto emul;
+
 				case swc1_op:
-#if (__mips >= 2 || defined(__mips64))
+					goto emul;
+
 				case ldc1_op:
 				case sdc1_op:
-#endif
-				case cop1_op:
-#if __mips >= 4 && __mips != 32
-				case cop1x_op:
-#endif
-					/* its one of ours */
+					if (cpu_has_mips_2_3_4_5 ||
+					    cpu_has_mips64)
+						goto emul;
+
+					return SIGILL;
 					goto emul;
-#if __mips >= 4
+
+				case cop1_op:
+					goto emul;
+
+				case cop1x_op:
+					if (cpu_has_mips_4_5 || cpu_has_mips64)
+						/* its one of ours */
+						goto emul;
+
+					return SIGILL;
+
 				case spec_op:
+					if (!cpu_has_mips_4_5_r)
+						return SIGILL;
+
 					if (MIPSInst_FUNC(ir) == movc_op)
 						goto emul;
 					break;
-#endif
 				}
 
 				/*
@@ -1262,10 +1043,7 @@
 				 * instruction in the dslot
 				 */
 				return mips_dsemul(xcp, ir, contpc);
-			}
-			else {
-				/* branch not taken */
-				if (likely) {
+			} else if (likely) {	/* branch not taken */
 					/*
 					 * branch likely nullifies
 					 * dslot if not taken
@@ -1277,34 +1055,31 @@
 					 * dslot as normal insn
 					 */
 				}
-			}
 			break;
-		}
 
 		default:
 			if (!(MIPSInst_RS(ir) & 0x10))
 				return SIGILL;
-			{
-				int sig;
 
-				/* a real fpu computation instruction */
-				if ((sig = fpu_emu(xcp, ctx, ir)))
-					return sig;
-			}
+			/* a real fpu computation instruction */
+			if ((sig = fpu_emu(xcp, ctx, ir)))
+				return sig;
 		}
 		break;
 
-#if __mips >= 4 && __mips != 32
-	case cop1x_op:{
-		int sig = fpux_emu(xcp, ctx, ir, fault_addr);
+	case cop1x_op:
+		if (!cpu_has_mips_4_5 && !cpu_has_mips64)
+			return SIGILL;
+
+		sig = fpux_emu(xcp, ctx, ir, fault_addr);
 		if (sig)
 			return sig;
 		break;
-	}
-#endif
 
-#if __mips >= 4
 	case spec_op:
+		if (!cpu_has_mips_4_5_r)
+			return SIGILL;
+
 		if (MIPSInst_FUNC(ir) != movc_op)
 			return SIGILL;
 		cond = fpucondbit[MIPSInst_RT(ir) >> 2];
@@ -1312,8 +1087,6 @@
 			xcp->regs[MIPSInst_RD(ir)] =
 				xcp->regs[MIPSInst_RS(ir)];
 		break;
-#endif
-
 	default:
 sigill:
 		return SIGILL;
@@ -1321,7 +1094,7 @@
 
 	/* we did it !! */
 	xcp->cp0_epc = contpc;
-	xcp->cp0_cause &= ~CAUSEF_BD;
+	clear_delay_slot(xcp);
 
 	return 0;
 }
@@ -1342,44 +1115,42 @@
 };
 
 
-#if __mips >= 4 && __mips != 32
-
 /*
  * Additional MIPS4 instructions
  */
 
-#define DEF3OP(name, p, f1, f2, f3) \
-static ieee754##p fpemu_##p##_##name(ieee754##p r, ieee754##p s, \
-    ieee754##p t) \
-{ \
-	struct _ieee754_csr ieee754_csr_save; \
-	s = f1(s, t); \
-	ieee754_csr_save = ieee754_csr; \
-	s = f2(s, r); \
-	ieee754_csr_save.cx |= ieee754_csr.cx; \
-	ieee754_csr_save.sx |= ieee754_csr.sx; \
-	s = f3(s); \
-	ieee754_csr.cx |= ieee754_csr_save.cx; \
-	ieee754_csr.sx |= ieee754_csr_save.sx; \
-	return s; \
+#define DEF3OP(name, p, f1, f2, f3)					\
+static union ieee754##p fpemu_##p##_##name(union ieee754##p r,		\
+	union ieee754##p s, union ieee754##p t)				\
+{									\
+	struct _ieee754_csr ieee754_csr_save;				\
+	s = f1(s, t);							\
+	ieee754_csr_save = ieee754_csr;					\
+	s = f2(s, r);							\
+	ieee754_csr_save.cx |= ieee754_csr.cx;				\
+	ieee754_csr_save.sx |= ieee754_csr.sx;				\
+	s = f3(s);							\
+	ieee754_csr.cx |= ieee754_csr_save.cx;				\
+	ieee754_csr.sx |= ieee754_csr_save.sx;				\
+	return s;							\
 }
 
-static ieee754dp fpemu_dp_recip(ieee754dp d)
+static union ieee754dp fpemu_dp_recip(union ieee754dp d)
 {
 	return ieee754dp_div(ieee754dp_one(0), d);
 }
 
-static ieee754dp fpemu_dp_rsqrt(ieee754dp d)
+static union ieee754dp fpemu_dp_rsqrt(union ieee754dp d)
 {
 	return ieee754dp_div(ieee754dp_one(0), ieee754dp_sqrt(d));
 }
 
-static ieee754sp fpemu_sp_recip(ieee754sp s)
+static union ieee754sp fpemu_sp_recip(union ieee754sp s)
 {
 	return ieee754sp_div(ieee754sp_one(0), s);
 }
 
-static ieee754sp fpemu_sp_rsqrt(ieee754sp s)
+static union ieee754sp fpemu_sp_rsqrt(union ieee754sp s)
 {
 	return ieee754sp_div(ieee754sp_one(0), ieee754sp_sqrt(s));
 }
@@ -1403,8 +1174,8 @@
 	switch (MIPSInst_FMA_FFMT(ir)) {
 	case s_fmt:{		/* 0 */
 
-		ieee754sp(*handler) (ieee754sp, ieee754sp, ieee754sp);
-		ieee754sp fd, fr, fs, ft;
+		union ieee754sp(*handler) (union ieee754sp, union ieee754sp, union ieee754sp);
+		union ieee754sp fd, fr, fs, ft;
 		u32 __user *va;
 		u32 val;
 
@@ -1467,18 +1238,26 @@
 			SPTOREG(fd, MIPSInst_FD(ir));
 
 		      copcsr:
-			if (ieee754_cxtest(IEEE754_INEXACT))
+			if (ieee754_cxtest(IEEE754_INEXACT)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_inexact);
 				rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S;
-			if (ieee754_cxtest(IEEE754_UNDERFLOW))
+			}
+			if (ieee754_cxtest(IEEE754_UNDERFLOW)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_underflow);
 				rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S;
-			if (ieee754_cxtest(IEEE754_OVERFLOW))
+			}
+			if (ieee754_cxtest(IEEE754_OVERFLOW)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_overflow);
 				rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S;
-			if (ieee754_cxtest(IEEE754_INVALID_OPERATION))
+			}
+			if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_invalidop);
 				rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S;
+			}
 
 			ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr;
 			if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
-				/*printk ("SIGFPE: fpu csr = %08x\n",
+				/*printk ("SIGFPE: FPU csr = %08x\n",
 				   ctx->fcr31); */
 				return SIGFPE;
 			}
@@ -1492,8 +1271,8 @@
 	}
 
 	case d_fmt:{		/* 1 */
-		ieee754dp(*handler) (ieee754dp, ieee754dp, ieee754dp);
-		ieee754dp fd, fr, fs, ft;
+		union ieee754dp(*handler) (union ieee754dp, union ieee754dp, union ieee754dp);
+		union ieee754dp fd, fr, fs, ft;
 		u64 __user *va;
 		u64 val;
 
@@ -1574,7 +1353,6 @@
 
 	return 0;
 }
-#endif
 
 
 
@@ -1586,23 +1364,25 @@
 {
 	int rfmt;		/* resulting format */
 	unsigned rcsr = 0;	/* resulting csr */
+	unsigned int oldrm;
+	unsigned int cbit;
 	unsigned cond;
 	union {
-		ieee754dp d;
-		ieee754sp s;
+		union ieee754dp d;
+		union ieee754sp s;
 		int w;
-#ifdef __mips64
 		s64 l;
-#endif
 	} rv;			/* resulting value */
+	u64 bits;
 
 	MIPS_FPU_EMU_INC_STATS(cp1ops);
 	switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) {
-	case s_fmt:{		/* 0 */
+	case s_fmt: {		/* 0 */
 		union {
-			ieee754sp(*b) (ieee754sp, ieee754sp);
-			ieee754sp(*u) (ieee754sp);
+			union ieee754sp(*b) (union ieee754sp, union ieee754sp);
+			union ieee754sp(*u) (union ieee754sp);
 		} handler;
+		union ieee754sp fs, ft;
 
 		switch (MIPSInst_FUNC(ir)) {
 			/* binary ops */
@@ -1620,148 +1400,167 @@
 			goto scopbop;
 
 			/* unary  ops */
-#if __mips >= 2 || defined(__mips64)
 		case fsqrt_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			handler.u = ieee754sp_sqrt;
 			goto scopuop;
-#endif
-#if __mips >= 4 && __mips != 32
+
+		/*
+		 * Note that on some MIPS IV implementations such as the
+		 * R5000 and R8000 the FSQRT and FRECIP instructions do not
+		 * achieve full IEEE-754 accuracy - however this emulator does.
+		 */
 		case frsqrt_op:
+			if (!cpu_has_mips_4_5_r2)
+				return SIGILL;
+
 			handler.u = fpemu_sp_rsqrt;
 			goto scopuop;
+
 		case frecip_op:
+			if (!cpu_has_mips_4_5_r2)
+				return SIGILL;
+
 			handler.u = fpemu_sp_recip;
 			goto scopuop;
-#endif
-#if __mips >= 4
+
 		case fmovc_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			cond = fpucondbit[MIPSInst_FT(ir) >> 2];
 			if (((ctx->fcr31 & cond) != 0) !=
 				((MIPSInst_FT(ir) & 1) != 0))
 				return 0;
 			SPFROMREG(rv.s, MIPSInst_FS(ir));
 			break;
+
 		case fmovz_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			if (xcp->regs[MIPSInst_FT(ir)] != 0)
 				return 0;
 			SPFROMREG(rv.s, MIPSInst_FS(ir));
 			break;
+
 		case fmovn_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			if (xcp->regs[MIPSInst_FT(ir)] == 0)
 				return 0;
 			SPFROMREG(rv.s, MIPSInst_FS(ir));
 			break;
-#endif
+
 		case fabs_op:
 			handler.u = ieee754sp_abs;
 			goto scopuop;
+
 		case fneg_op:
 			handler.u = ieee754sp_neg;
 			goto scopuop;
+
 		case fmov_op:
 			/* an easy one */
 			SPFROMREG(rv.s, MIPSInst_FS(ir));
 			goto copcsr;
 
 			/* binary op on handler */
-		      scopbop:
-			{
-				ieee754sp fs, ft;
+scopbop:
+			SPFROMREG(fs, MIPSInst_FS(ir));
+			SPFROMREG(ft, MIPSInst_FT(ir));
 
-				SPFROMREG(fs, MIPSInst_FS(ir));
-				SPFROMREG(ft, MIPSInst_FT(ir));
-
-				rv.s = (*handler.b) (fs, ft);
-				goto copcsr;
-			}
-		      scopuop:
-			{
-				ieee754sp fs;
-
-				SPFROMREG(fs, MIPSInst_FS(ir));
-				rv.s = (*handler.u) (fs);
-				goto copcsr;
-			}
-		      copcsr:
-			if (ieee754_cxtest(IEEE754_INEXACT))
+			rv.s = (*handler.b) (fs, ft);
+			goto copcsr;
+scopuop:
+			SPFROMREG(fs, MIPSInst_FS(ir));
+			rv.s = (*handler.u) (fs);
+			goto copcsr;
+copcsr:
+			if (ieee754_cxtest(IEEE754_INEXACT)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_inexact);
 				rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S;
-			if (ieee754_cxtest(IEEE754_UNDERFLOW))
+			}
+			if (ieee754_cxtest(IEEE754_UNDERFLOW)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_underflow);
 				rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S;
-			if (ieee754_cxtest(IEEE754_OVERFLOW))
+			}
+			if (ieee754_cxtest(IEEE754_OVERFLOW)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_overflow);
 				rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S;
-			if (ieee754_cxtest(IEEE754_ZERO_DIVIDE))
+			}
+			if (ieee754_cxtest(IEEE754_ZERO_DIVIDE)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_zerodiv);
 				rcsr |= FPU_CSR_DIV_X | FPU_CSR_DIV_S;
-			if (ieee754_cxtest(IEEE754_INVALID_OPERATION))
+			}
+			if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) {
+				MIPS_FPU_EMU_INC_STATS(ieee754_invalidop);
 				rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S;
+			}
 			break;
 
 			/* unary conv ops */
 		case fcvts_op:
 			return SIGILL;	/* not defined */
-		case fcvtd_op:{
-			ieee754sp fs;
 
+		case fcvtd_op:
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fsp(fs);
 			rfmt = d_fmt;
 			goto copcsr;
-		}
-		case fcvtw_op:{
-			ieee754sp fs;
 
+		case fcvtw_op:
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.w = ieee754sp_tint(fs);
 			rfmt = w_fmt;
 			goto copcsr;
-		}
 
-#if __mips >= 2 || defined(__mips64)
 		case fround_op:
 		case ftrunc_op:
 		case fceil_op:
-		case ffloor_op:{
-			unsigned int oldrm = ieee754_csr.rm;
-			ieee754sp fs;
+		case ffloor_op:
+			if (!cpu_has_mips_2_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
 
+			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = ieee_rm[modeindex(MIPSInst_FUNC(ir))];
+			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
 			rv.w = ieee754sp_tint(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = w_fmt;
 			goto copcsr;
-		}
-#endif /* __mips >= 2 */
 
-#if defined(__mips64)
-		case fcvtl_op:{
-			ieee754sp fs;
+		case fcvtl_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
 
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.l = ieee754sp_tlong(fs);
 			rfmt = l_fmt;
 			goto copcsr;
-		}
 
 		case froundl_op:
 		case ftruncl_op:
 		case fceill_op:
-		case ffloorl_op:{
-			unsigned int oldrm = ieee754_csr.rm;
-			ieee754sp fs;
+		case ffloorl_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
 
+			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = ieee_rm[modeindex(MIPSInst_FUNC(ir))];
+			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
 			rv.l = ieee754sp_tlong(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = l_fmt;
 			goto copcsr;
-		}
-#endif /* defined(__mips64) */
 
 		default:
 			if (MIPSInst_FUNC(ir) >= fcmp_op) {
 				unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op;
-				ieee754sp fs, ft;
+				union ieee754sp fs, ft;
 
 				SPFROMREG(fs, MIPSInst_FS(ir));
 				SPFROMREG(ft, MIPSInst_FT(ir));
@@ -1774,19 +1573,18 @@
 				else
 					goto copcsr;
 
-			}
-			else {
+			} else
 				return SIGILL;
-			}
 			break;
 		}
 		break;
 	}
 
-	case d_fmt:{
+	case d_fmt: {
+		union ieee754dp fs, ft;
 		union {
-			ieee754dp(*b) (ieee754dp, ieee754dp);
-			ieee754dp(*u) (ieee754dp);
+			union ieee754dp(*b) (union ieee754dp, union ieee754dp);
+			union ieee754dp(*u) (union ieee754dp);
 		} handler;
 
 		switch (MIPSInst_FUNC(ir)) {
@@ -1805,21 +1603,33 @@
 			goto dcopbop;
 
 			/* unary  ops */
-#if __mips >= 2 || defined(__mips64)
 		case fsqrt_op:
+			if (!cpu_has_mips_2_3_4_5_r)
+				return SIGILL;
+
 			handler.u = ieee754dp_sqrt;
 			goto dcopuop;
-#endif
-#if __mips >= 4 && __mips != 32
+		/*
+		 * Note that on some MIPS IV implementations such as the
+		 * R5000 and R8000 the FSQRT and FRECIP instructions do not
+		 * achieve full IEEE-754 accuracy - however this emulator does.
+		 */
 		case frsqrt_op:
+			if (!cpu_has_mips_4_5_r2)
+				return SIGILL;
+
 			handler.u = fpemu_dp_rsqrt;
 			goto dcopuop;
 		case frecip_op:
+			if (!cpu_has_mips_4_5_r2)
+				return SIGILL;
+
 			handler.u = fpemu_dp_recip;
 			goto dcopuop;
-#endif
-#if __mips >= 4
 		case fmovc_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			cond = fpucondbit[MIPSInst_FT(ir) >> 2];
 			if (((ctx->fcr31 & cond) != 0) !=
 				((MIPSInst_FT(ir) & 1) != 0))
@@ -1827,16 +1637,21 @@
 			DPFROMREG(rv.d, MIPSInst_FS(ir));
 			break;
 		case fmovz_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			if (xcp->regs[MIPSInst_FT(ir)] != 0)
 				return 0;
 			DPFROMREG(rv.d, MIPSInst_FS(ir));
 			break;
 		case fmovn_op:
+			if (!cpu_has_mips_4_5_r)
+				return SIGILL;
+
 			if (xcp->regs[MIPSInst_FT(ir)] == 0)
 				return 0;
 			DPFROMREG(rv.d, MIPSInst_FS(ir));
 			break;
-#endif
 		case fabs_op:
 			handler.u = ieee754dp_abs;
 			goto dcopuop;
@@ -1851,91 +1666,78 @@
 			goto copcsr;
 
 			/* binary op on handler */
-		      dcopbop:{
-				ieee754dp fs, ft;
+dcopbop:
+			DPFROMREG(fs, MIPSInst_FS(ir));
+			DPFROMREG(ft, MIPSInst_FT(ir));
 
-				DPFROMREG(fs, MIPSInst_FS(ir));
-				DPFROMREG(ft, MIPSInst_FT(ir));
+			rv.d = (*handler.b) (fs, ft);
+			goto copcsr;
+dcopuop:
+			DPFROMREG(fs, MIPSInst_FS(ir));
+			rv.d = (*handler.u) (fs);
+			goto copcsr;
 
-				rv.d = (*handler.b) (fs, ft);
-				goto copcsr;
-			}
-		      dcopuop:{
-				ieee754dp fs;
-
-				DPFROMREG(fs, MIPSInst_FS(ir));
-				rv.d = (*handler.u) (fs);
-				goto copcsr;
-			}
-
-			/* unary conv ops */
-		case fcvts_op:{
-			ieee754dp fs;
-
+		/*
+		 * unary conv ops
+		 */
+		case fcvts_op:
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fdp(fs);
 			rfmt = s_fmt;
 			goto copcsr;
-		}
+
 		case fcvtd_op:
 			return SIGILL;	/* not defined */
 
-		case fcvtw_op:{
-			ieee754dp fs;
-
+		case fcvtw_op:
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.w = ieee754dp_tint(fs);	/* wrong */
 			rfmt = w_fmt;
 			goto copcsr;
-		}
 
-#if __mips >= 2 || defined(__mips64)
 		case fround_op:
 		case ftrunc_op:
 		case fceil_op:
-		case ffloor_op:{
-			unsigned int oldrm = ieee754_csr.rm;
-			ieee754dp fs;
+		case ffloor_op:
+			if (!cpu_has_mips_2_3_4_5_r)
+				return SIGILL;
 
+			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = ieee_rm[modeindex(MIPSInst_FUNC(ir))];
+			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
 			rv.w = ieee754dp_tint(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = w_fmt;
 			goto copcsr;
-		}
-#endif
 
-#if defined(__mips64)
-		case fcvtl_op:{
-			ieee754dp fs;
+		case fcvtl_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
 
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.l = ieee754dp_tlong(fs);
 			rfmt = l_fmt;
 			goto copcsr;
-		}
 
 		case froundl_op:
 		case ftruncl_op:
 		case fceill_op:
-		case ffloorl_op:{
-			unsigned int oldrm = ieee754_csr.rm;
-			ieee754dp fs;
+		case ffloorl_op:
+			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+				return SIGILL;
 
+			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = ieee_rm[modeindex(MIPSInst_FUNC(ir))];
+			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
 			rv.l = ieee754dp_tlong(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = l_fmt;
 			goto copcsr;
-		}
-#endif /* __mips >= 3 */
 
 		default:
 			if (MIPSInst_FUNC(ir) >= fcmp_op) {
 				unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op;
-				ieee754dp fs, ft;
+				union ieee754dp fs, ft;
 
 				DPFROMREG(fs, MIPSInst_FS(ir));
 				DPFROMREG(ft, MIPSInst_FT(ir));
@@ -1957,11 +1759,8 @@
 			break;
 		}
 		break;
-	}
 
-	case w_fmt:{
-		ieee754sp fs;
-
+	case w_fmt:
 		switch (MIPSInst_FUNC(ir)) {
 		case fcvts_op:
 			/* convert word to single precision real */
@@ -1981,9 +1780,11 @@
 		break;
 	}
 
-#if defined(__mips64)
-	case l_fmt:{
-		u64 bits;
+	case l_fmt:
+
+		if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			return SIGILL;
+
 		DIFROMREG(bits, MIPSInst_FS(ir));
 
 		switch (MIPSInst_FUNC(ir)) {
@@ -2001,8 +1802,6 @@
 			return SIGILL;
 		}
 		break;
-	}
-#endif
 
 	default:
 		return SIGILL;
@@ -2017,7 +1816,7 @@
 	 */
 	ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr;
 	if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
-		/*printk ("SIGFPE: fpu csr = %08x\n",ctx->fcr31); */
+		/*printk ("SIGFPE: FPU csr = %08x\n",ctx->fcr31); */
 		return SIGFPE;
 	}
 
@@ -2025,18 +1824,18 @@
 	 * Now we can safely write the result back to the register file.
 	 */
 	switch (rfmt) {
-	case -1:{
-#if __mips >= 4
-		cond = fpucondbit[MIPSInst_FD(ir) >> 2];
-#else
-		cond = FPU_CSR_COND;
-#endif
-		if (rv.w)
-			ctx->fcr31 |= cond;
+	case -1:
+
+		if (cpu_has_mips_4_5_r)
+			cbit = fpucondbit[MIPSInst_RT(ir) >> 2];
 		else
-			ctx->fcr31 &= ~cond;
+			cbit = FPU_CSR_COND;
+		if (rv.w)
+			ctx->fcr31 |= cbit;
+		else
+			ctx->fcr31 &= ~cbit;
 		break;
-	}
+
 	case d_fmt:
 		DPTOREG(rv.d, MIPSInst_FD(ir));
 		break;
@@ -2046,11 +1845,12 @@
 	case w_fmt:
 		SITOREG(rv.w, MIPSInst_FD(ir));
 		break;
-#if defined(__mips64)
 	case l_fmt:
+		if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			return SIGILL;
+
 		DITOREG(rv.l, MIPSInst_FD(ir));
 		break;
-#endif
 	default:
 		return SIGILL;
 	}
@@ -2138,11 +1938,7 @@
 			 * ieee754_csr.	 But ieee754_csr.rm is ieee
 			 * library modes. (not mips rounding mode)
 			 */
-			/* convert to ieee library modes */
-			ieee754_csr.rm = ieee_rm[ieee754_csr.rm];
 			sig = cop1Emulate(xcp, ctx, dec_insn, fault_addr);
-			/* revert to mips rounding mode */
-			ieee754_csr.rm = mips_rm[ieee754_csr.rm];
 		}
 
 		if (has_fpu)
@@ -2155,58 +1951,8 @@
 
 	/* SIGILL indicates a non-fpu instruction */
 	if (sig == SIGILL && xcp->cp0_epc != oldepc)
-		/* but if epc has advanced, then ignore it */
+		/* but if EPC has advanced, then ignore it */
 		sig = 0;
 
 	return sig;
 }
-
-#ifdef CONFIG_DEBUG_FS
-
-static int fpuemu_stat_get(void *data, u64 *val)
-{
-	int cpu;
-	unsigned long sum = 0;
-	for_each_online_cpu(cpu) {
-		struct mips_fpu_emulator_stats *ps;
-		local_t *pv;
-		ps = &per_cpu(fpuemustats, cpu);
-		pv = (void *)ps + (unsigned long)data;
-		sum += local_read(pv);
-	}
-	*val = sum;
-	return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
-
-extern struct dentry *mips_debugfs_dir;
-static int __init debugfs_fpuemu(void)
-{
-	struct dentry *d, *dir;
-
-	if (!mips_debugfs_dir)
-		return -ENODEV;
-	dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
-	if (!dir)
-		return -ENOMEM;
-
-#define FPU_STAT_CREATE(M)						\
-	do {								\
-		d = debugfs_create_file(#M , S_IRUGO, dir,		\
-			(void *)offsetof(struct mips_fpu_emulator_stats, M), \
-			&fops_fpuemu_stat);				\
-		if (!d)							\
-			return -ENOMEM;					\
-	} while (0)
-
-	FPU_STAT_CREATE(emulated);
-	FPU_STAT_CREATE(loads);
-	FPU_STAT_CREATE(stores);
-	FPU_STAT_CREATE(cp1ops);
-	FPU_STAT_CREATE(cp1xops);
-	FPU_STAT_CREATE(errors);
-
-	return 0;
-}
-__initcall(debugfs_fpuemu);
-#endif

diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c
index c57c8ad..7f64577 100644
--- a/arch/mips/math-emu/dp_add.c
+++ b/arch/mips/math-emu/dp_add.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,24 +16,22 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- *
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_add(ieee754dp x, ieee754dp y)
+union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
 {
+	int s;
+
 	COMPXDP;
 	COMPYDP;
 
 	EXPLODEXDP;
 	EXPLODEYDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXDP;
 	FLUSHYDP;
@@ -52,8 +48,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "add", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -69,14 +65,14 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 		if (xs == ys)
 			return x;
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_xcpt(ieee754dp_indef(), "add", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -88,15 +84,14 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return x;
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 		if (xs == ys)
 			return x;
 		else
-			return ieee754dp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
@@ -125,20 +120,24 @@
 	assert(xm & DP_HIDDEN_BIT);
 	assert(ym & DP_HIDDEN_BIT);
 
-	/* provide guard,round and stick bit space */
+	/*
+	 * Provide guard,round and stick bit space.
+	 */
 	xm <<= 3;
 	ym <<= 3;
 
 	if (xe > ye) {
-		/* have to shift y fraction right to align
+		/*
+		 * Have to shift y fraction right to align.
 		 */
-		int s = xe - ye;
+		s = xe - ye;
 		ym = XDPSRS(ym, s);
 		ye += s;
 	} else if (ye > xe) {
-		/* have to shift x fraction right to align
+		/*
+		 * Have to shift x fraction right to align.
 		 */
-		int s = ye - xe;
+		s = ye - xe;
 		xm = XDPSRS(xm, s);
 		xe += s;
 	}
@@ -146,14 +145,15 @@
 	assert(xe <= DP_EMAX);
 
 	if (xs == ys) {
-		/* generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in xm,xs,xe
+		/*
+		 * Generate 28 bit result of adding two 27 bit numbers
+		 * leaving result in xm, xs and xe.
 		 */
 		xm = xm + ym;
 		xe = xe;
 		xs = xs;
 
-		if (xm >> (DP_MBITS + 1 + 3)) { /* carry out */
+		if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */
 			xm = XDPSRS1(xm);
 			xe++;
 		}
@@ -168,15 +168,16 @@
 			xs = ys;
 		}
 		if (xm == 0)
-			return ieee754dp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
-		/* normalize to rounding precision */
-		while ((xm >> (DP_MBITS + 3)) == 0) {
+		/*
+		 * Normalize to rounding precision.
+		 */
+		while ((xm >> (DP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
-
 	}
-	DPNORMRET2(xs, xe, xm, "add", x, y);
+
+	return ieee754dp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/dp_cmp.c b/arch/mips/math-emu/dp_cmp.c
index 0f32486..30f95f6 100644
--- a/arch/mips/math-emu/dp_cmp.c
+++ b/arch/mips/math-emu/dp_cmp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,16 +16,16 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-int ieee754dp_cmp(ieee754dp x, ieee754dp y, int cmp, int sig)
+int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cmp, int sig)
 {
+	s64 vx;
+	s64 vy;
+
 	COMPXDP;
 	COMPYDP;
 
@@ -35,21 +33,21 @@
 	EXPLODEYDP;
 	FLUSHXDP;
 	FLUSHYDP;
-	CLEARCX;	/* Even clear inexact flag here */
+	ieee754_clearcx();	/* Even clear inexact flag here */
 
 	if (ieee754dp_isnan(x) || ieee754dp_isnan(y)) {
 		if (sig || xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
-			SETCX(IEEE754_INVALID_OPERATION);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
 		if (cmp & IEEE754_CUN)
 			return 1;
 		if (cmp & (IEEE754_CLT | IEEE754_CGT)) {
-			if (sig && SETANDTESTCX(IEEE754_INVALID_OPERATION))
-				return ieee754si_xcpt(0, "fcmpf", x);
+			if (sig && ieee754_setandtestcx(IEEE754_INVALID_OPERATION))
+				return 0;
 		}
 		return 0;
 	} else {
-		s64 vx = x.bits;
-		s64 vy = y.bits;
+		vx = x.bits;
+		vy = y.bits;
 
 		if (vx < 0)
 			vx = -vx ^ DP_SIGN_BIT;

diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c
index a1bce1b..bef0e55 100644
--- a/arch/mips/math-emu/dp_div.c
+++ b/arch/mips/math-emu/dp_div.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,24 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_div(ieee754dp x, ieee754dp y)
+union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y)
 {
+	u64 rm;
+	int re;
+	u64 bm;
+
 	COMPXDP;
 	COMPYDP;
 
 	EXPLODEXDP;
 	EXPLODEYDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXDP;
 	FLUSHYDP;
@@ -51,8 +50,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,12 +67,12 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_xcpt(ieee754dp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -85,17 +84,17 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return ieee754dp_inf(xs ^ ys);
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_xcpt(ieee754dp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		SETCX(IEEE754_ZERO_DIVIDE);
-		return ieee754dp_xcpt(ieee754dp_inf(xs ^ ys), "div", x, y);
+		ieee754_setcx(IEEE754_ZERO_DIVIDE);
+		return ieee754dp_inf(xs ^ ys);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
@@ -122,35 +121,34 @@
 	xm <<= 3;
 	ym <<= 3;
 
-	{
-		/* now the dirty work */
+	/* now the dirty work */
 
-		u64 rm = 0;
-		int re = xe - ye;
-		u64 bm;
+	rm = 0;
+	re = xe - ye;
 
-		for (bm = DP_MBIT(DP_MBITS + 2); bm; bm >>= 1) {
-			if (xm >= ym) {
-				xm -= ym;
-				rm |= bm;
-				if (xm == 0)
-					break;
-			}
-			xm <<= 1;
+	for (bm = DP_MBIT(DP_FBITS + 2); bm; bm >>= 1) {
+		if (xm >= ym) {
+			xm -= ym;
+			rm |= bm;
+			if (xm == 0)
+				break;
 		}
-		rm <<= 1;
-		if (xm)
-			rm |= 1;	/* have remainder, set sticky */
-
-		assert(rm);
-
-		/* normalise rm to rounding precision ?
-		 */
-		while ((rm >> (DP_MBITS + 3)) == 0) {
-			rm <<= 1;
-			re--;
-		}
-
-		DPNORMRET2(xs == ys ? 0 : 1, re, rm, "div", x, y);
+		xm <<= 1;
 	}
+
+	rm <<= 1;
+	if (xm)
+		rm |= 1;	/* have remainder, set sticky */
+
+	assert(rm);
+
+	/*
+	 * Normalise rm to rounding precision ?
+	 */
+	while ((rm >> (DP_FBITS + 3)) == 0) {
+		rm <<= 1;
+		re--;
+	}
+
+	return ieee754dp_format(xs == ys ? 0 : 1, re, rm);
 }

diff --git a/arch/mips/math-emu/dp_fint.c b/arch/mips/math-emu/dp_fint.c
index 8857128..10258f0 100644
--- a/arch/mips/math-emu/dp_fint.c
+++ b/arch/mips/math-emu/dp_fint.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,21 +16,18 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_fint(int x)
+union ieee754dp ieee754dp_fint(int x)
 {
 	u64 xm;
 	int xe;
 	int xs;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	if (x == 0)
 		return ieee754dp_zero(0);
@@ -51,29 +46,11 @@
 		xm = x;
 	}
 
-#if 1
 	/* normalize - result can never be inexact or overflow */
-	xe = DP_MBITS;
-	while ((xm >> DP_MBITS) == 0) {
+	xe = DP_FBITS;
+	while ((xm >> DP_FBITS) == 0) {
 		xm <<= 1;
 		xe--;
 	}
 	return builddp(xs, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
-#else
-	/* normalize */
-	xe = DP_MBITS + 3;
-	while ((xm >> (DP_MBITS + 3)) == 0) {
-		xm <<= 1;
-		xe--;
-	}
-	DPNORMRET1(xs, xe, xm, "fint", x);
-#endif
-}
-
-ieee754dp ieee754dp_funs(unsigned int u)
-{
-	if ((int) u < 0)
-		return ieee754dp_add(ieee754dp_1e31(),
-				     ieee754dp_fint(u & ~(1 << 31)));
-	return ieee754dp_fint(u);
 }

diff --git a/arch/mips/math-emu/dp_flong.c b/arch/mips/math-emu/dp_flong.c
index 14fc01e..a267c2e 100644
--- a/arch/mips/math-emu/dp_flong.c
+++ b/arch/mips/math-emu/dp_flong.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,21 +16,18 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_flong(s64 x)
+union ieee754dp ieee754dp_flong(s64 x)
 {
 	u64 xm;
 	int xe;
 	int xs;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	if (x == 0)
 		return ieee754dp_zero(0);
@@ -52,26 +47,19 @@
 	}
 
 	/* normalize */
-	xe = DP_MBITS + 3;
-	if (xm >> (DP_MBITS + 1 + 3)) {
+	xe = DP_FBITS + 3;
+	if (xm >> (DP_FBITS + 1 + 3)) {
 		/* shunt out overflow bits */
-		while (xm >> (DP_MBITS + 1 + 3)) {
+		while (xm >> (DP_FBITS + 1 + 3)) {
 			XDPSRSX1();
 		}
 	} else {
 		/* normalize in grs extended double precision */
-		while ((xm >> (DP_MBITS + 3)) == 0) {
+		while ((xm >> (DP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 	}
-	DPNORMRET1(xs, xe, xm, "dp_flong", x);
-}
 
-ieee754dp ieee754dp_fulong(u64 u)
-{
-	if ((s64) u < 0)
-		return ieee754dp_add(ieee754dp_1e63(),
-				     ieee754dp_flong(u & ~(1ULL << 63)));
-	return ieee754dp_flong(u);
+	return ieee754dp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/dp_frexp.c b/arch/mips/math-emu/dp_frexp.c
deleted file mode 100644
index cb15a5e..0000000
--- a/arch/mips/math-emu/dp_frexp.c
+++ /dev/null

@@ -1,52 +0,0 @@
-/* IEEE754 floating point arithmetic
- * double precision: common utilities
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754dp.h"
-
-/* close to ieeep754dp_logb
-*/
-ieee754dp ieee754dp_frexp(ieee754dp x, int *eptr)
-{
-	COMPXDP;
-	CLEARCX;
-	EXPLODEXDP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		*eptr = 0;
-		return x;
-	case IEEE754_CLASS_DNORM:
-		DPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	*eptr = xe + 1;
-	return builddp(xs, -1 + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
-}

diff --git a/arch/mips/math-emu/dp_fsp.c b/arch/mips/math-emu/dp_fsp.c
index daed683..ffb69c5 100644
--- a/arch/mips/math-emu/dp_fsp.c
+++ b/arch/mips/math-emu/dp_fsp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,56 +16,58 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
+#include "ieee754sp.h"
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_fsp(ieee754sp x)
+union ieee754dp ieee754dp_fsp(union ieee754sp x)
 {
 	COMPXSP;
 
 	EXPLODEXSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXSP;
 
 	switch (xc) {
 	case IEEE754_CLASS_SNAN:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "fsp");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
+
 	case IEEE754_CLASS_QNAN:
 		return ieee754dp_nanxcpt(builddp(xs,
 						 DP_EMAX + 1 + DP_EBIAS,
 						 ((u64) xm
-						  << (DP_MBITS -
-						      SP_MBITS))), "fsp",
-					 x);
+						  << (DP_FBITS -
+						      SP_FBITS))));
 	case IEEE754_CLASS_INF:
 		return ieee754dp_inf(xs);
+
 	case IEEE754_CLASS_ZERO:
 		return ieee754dp_zero(xs);
+
 	case IEEE754_CLASS_DNORM:
 		/* normalize */
-		while ((xm >> SP_MBITS) == 0) {
+		while ((xm >> SP_FBITS) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 		break;
+
 	case IEEE754_CLASS_NORM:
 		break;
 	}
 
-	/* CAN'T possibly overflow,underflow, or need rounding
+	/*
+	 * Can't possibly overflow,underflow, or need rounding
 	 */
 
 	/* drop the hidden bit */
 	xm &= ~SP_HIDDEN_BIT;
 
 	return builddp(xs, xe + DP_EBIAS,
-		       (u64) xm << (DP_MBITS - SP_MBITS));
+		       (u64) xm << (DP_FBITS - SP_FBITS));
 }

diff --git a/arch/mips/math-emu/dp_logb.c b/arch/mips/math-emu/dp_logb.c
deleted file mode 100644
index 151127e..0000000
--- a/arch/mips/math-emu/dp_logb.c
+++ /dev/null

@@ -1,53 +0,0 @@
-/* IEEE754 floating point arithmetic
- * double precision: common utilities
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754dp.h"
-
-ieee754dp ieee754dp_logb(ieee754dp x)
-{
-	COMPXDP;
-
-	CLEARCX;
-
-	EXPLODEXDP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-		return ieee754dp_nanxcpt(x, "logb", x);
-	case IEEE754_CLASS_QNAN:
-		return x;
-	case IEEE754_CLASS_INF:
-		return ieee754dp_inf(0);
-	case IEEE754_CLASS_ZERO:
-		return ieee754dp_inf(1);
-	case IEEE754_CLASS_DNORM:
-		DPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	return ieee754dp_fint(xe);
-}

diff --git a/arch/mips/math-emu/dp_modf.c b/arch/mips/math-emu/dp_modf.c
deleted file mode 100644
index b01f9cf..0000000
--- a/arch/mips/math-emu/dp_modf.c
+++ /dev/null

@@ -1,79 +0,0 @@
-/* IEEE754 floating point arithmetic
- * double precision: common utilities
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754dp.h"
-
-/* modf function is always exact for a finite number
-*/
-ieee754dp ieee754dp_modf(ieee754dp x, ieee754dp *ip)
-{
-	COMPXDP;
-
-	CLEARCX;
-
-	EXPLODEXDP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		*ip = x;
-		return x;
-	case IEEE754_CLASS_DNORM:
-		/* far to small */
-		*ip = ieee754dp_zero(xs);
-		return x;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	if (xe < 0) {
-		*ip = ieee754dp_zero(xs);
-		return x;
-	}
-	if (xe >= DP_MBITS) {
-		*ip = x;
-		return ieee754dp_zero(xs);
-	}
-	/* generate ipart mantissa by clearing bottom bits
-	 */
-	*ip = builddp(xs, xe + DP_EBIAS,
-		      ((xm >> (DP_MBITS - xe)) << (DP_MBITS - xe)) &
-		      ~DP_HIDDEN_BIT);
-
-	/* generate fpart mantissa by clearing top bits
-	 * and normalizing (must be able to normalize)
-	 */
-	xm = (xm << (64 - (DP_MBITS - xe))) >> (64 - (DP_MBITS - xe));
-	if (xm == 0)
-		return ieee754dp_zero(xs);
-
-	while ((xm >> DP_MBITS) == 0) {
-		xm <<= 1;
-		xe--;
-	}
-	return builddp(xs, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
-}

diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index 09175f4..d3acded 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,32 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_mul(ieee754dp x, ieee754dp y)
+union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
 {
+	int re;
+	int rs;
+	u64 rm;
+	unsigned lxm;
+	unsigned hxm;
+	unsigned lym;
+	unsigned hym;
+	u64 lrm;
+	u64 hrm;
+	u64 t;
+	u64 at;
+
 	COMPXDP;
 	COMPYDP;
 
 	EXPLODEXDP;
 	EXPLODEYDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXDP;
 	FLUSHYDP;
@@ -51,8 +58,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "mul", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,12 +75,13 @@
 		return x;
 
 
-		/* Infinity handling */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_xcpt(ieee754dp_indef(), "mul", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
@@ -107,70 +115,59 @@
 	/* rm = xm * ym, re = xe+ye basically */
 	assert(xm & DP_HIDDEN_BIT);
 	assert(ym & DP_HIDDEN_BIT);
-	{
-		int re = xe + ye;
-		int rs = xs ^ ys;
-		u64 rm;
 
-		/* shunt to top of word */
-		xm <<= 64 - (DP_MBITS + 1);
-		ym <<= 64 - (DP_MBITS + 1);
+	re = xe + ye;
+	rs = xs ^ ys;
 
-		/* multiply 32bits xm,ym to give high 32bits rm with stickness
-		 */
+	/* shunt to top of word */
+	xm <<= 64 - (DP_FBITS + 1);
+	ym <<= 64 - (DP_FBITS + 1);
 
-		/* 32 * 32 => 64 */
+	/*
+	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+	 */
+
+	/* 32 * 32 => 64 */
 #define DPXMULT(x, y)	((u64)(x) * (u64)y)
 
-		{
-			unsigned lxm = xm;
-			unsigned hxm = xm >> 32;
-			unsigned lym = ym;
-			unsigned hym = ym >> 32;
-			u64 lrm;
-			u64 hrm;
+	lxm = xm;
+	hxm = xm >> 32;
+	lym = ym;
+	hym = ym >> 32;
 
-			lrm = DPXMULT(lxm, lym);
-			hrm = DPXMULT(hxm, hym);
+	lrm = DPXMULT(lxm, lym);
+	hrm = DPXMULT(hxm, hym);
 
-			{
-				u64 t = DPXMULT(lxm, hym);
-				{
-					u64 at =
-					    lrm + (t << 32);
-					hrm += at < lrm;
-					lrm = at;
-				}
-				hrm = hrm + (t >> 32);
-			}
+	t = DPXMULT(lxm, hym);
 
-			{
-				u64 t = DPXMULT(hxm, lym);
-				{
-					u64 at =
-					    lrm + (t << 32);
-					hrm += at < lrm;
-					lrm = at;
-				}
-				hrm = hrm + (t >> 32);
-			}
-			rm = hrm | (lrm != 0);
-		}
+	at = lrm + (t << 32);
+	hrm += at < lrm;
+	lrm = at;
 
-		/*
-		 * sticky shift down to normal rounding precision
-		 */
-		if ((s64) rm < 0) {
-			rm =
-			    (rm >> (64 - (DP_MBITS + 1 + 3))) |
-			    ((rm << (DP_MBITS + 1 + 3)) != 0);
+	hrm = hrm + (t >> 32);
+
+	t = DPXMULT(hxm, lym);
+
+	at = lrm + (t << 32);
+	hrm += at < lrm;
+	lrm = at;
+
+	hrm = hrm + (t >> 32);
+
+	rm = hrm | (lrm != 0);
+
+	/*
+	 * Sticky shift down to normal rounding precision.
+	 */
+	if ((s64) rm < 0) {
+		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
+		     ((rm << (DP_FBITS + 1 + 3)) != 0);
 			re++;
-		} else {
-			rm =
-			    (rm >> (64 - (DP_MBITS + 1 + 3 + 1))) |
-			    ((rm << (DP_MBITS + 1 + 3 + 1)) != 0);
-		}
-		assert(rm & (DP_HIDDEN_BIT << 3));
-		DPNORMRET2(rs, re, rm, "mul", x, y);
+	} else {
+		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
+		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
 	}
+	assert(rm & (DP_HIDDEN_BIT << 3));
+
+	return ieee754dp_format(rs, re, rm);
 }

diff --git a/arch/mips/math-emu/dp_scalb.c b/arch/mips/math-emu/dp_scalb.c
deleted file mode 100644
index 6f5df43..0000000
--- a/arch/mips/math-emu/dp_scalb.c
+++ /dev/null

@@ -1,57 +0,0 @@
-/* IEEE754 floating point arithmetic
- * double precision: common utilities
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754dp.h"
-
-ieee754dp ieee754dp_scalb(ieee754dp x, int n)
-{
-	COMPXDP;
-
-	CLEARCX;
-
-	EXPLODEXDP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-		return ieee754dp_nanxcpt(x, "scalb", x, n);
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		return x;
-	case IEEE754_CLASS_DNORM:
-		DPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	DPNORMRET2(xs, xe + n, xm << 3, "scalb", x, n);
-}
-
-
-ieee754dp ieee754dp_ldexp(ieee754dp x, int n)
-{
-	return ieee754dp_scalb(x, n);
-}

diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c
index 79ce267..bccbe90 100644
--- a/arch/mips/math-emu/dp_simple.c
+++ b/arch/mips/math-emu/dp_simple.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,33 +16,17 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-int ieee754dp_finite(ieee754dp x)
-{
-	return DPBEXP(x) != DP_EMAX + 1 + DP_EBIAS;
-}
-
-ieee754dp ieee754dp_copysign(ieee754dp x, ieee754dp y)
-{
-	CLEARCX;
-	DPSIGN(x) = DPSIGN(y);
-	return x;
-}
-
-
-ieee754dp ieee754dp_neg(ieee754dp x)
+union ieee754dp ieee754dp_neg(union ieee754dp x)
 {
 	COMPXDP;
 
 	EXPLODEXDP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXDP;
 
 	/*
@@ -55,30 +37,29 @@
 	DPSIGN(x) ^= 1;
 
 	if (xc == IEEE754_CLASS_SNAN) {
-		ieee754dp y = ieee754dp_indef();
-		SETCX(IEEE754_INVALID_OPERATION);
+		union ieee754dp y = ieee754dp_indef();
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		DPSIGN(y) = DPSIGN(x);
-		return ieee754dp_nanxcpt(y, "neg");
+		return ieee754dp_nanxcpt(y);
 	}
 
 	return x;
 }
 
-
-ieee754dp ieee754dp_abs(ieee754dp x)
+union ieee754dp ieee754dp_abs(union ieee754dp x)
 {
 	COMPXDP;
 
 	EXPLODEXDP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXDP;
 
 	/* Clear sign ALWAYS, irrespective of NaN */
 	DPSIGN(x) = 0;
 
 	if (xc == IEEE754_CLASS_SNAN) {
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "abs");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
 	}
 
 	return x;

diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c
index b874d60..041bbb61 100644
--- a/arch/mips/math-emu/dp_sqrt.c
+++ b/arch/mips/math-emu/dp_sqrt.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,12 +16,9 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
 static const unsigned table[] = {
@@ -34,44 +29,49 @@
 	1742, 661, 130
 };
 
-ieee754dp ieee754dp_sqrt(ieee754dp x)
+union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 {
 	struct _ieee754_csr oldcsr;
-	ieee754dp y, z, t;
+	union ieee754dp y, z, t;
 	unsigned scalx, yh;
 	COMPXDP;
 
 	EXPLODEXDP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXDP;
 
 	/* x == INF or NAN? */
 	switch (xc) {
 	case IEEE754_CLASS_QNAN:
 		/* sqrt(Nan) = Nan */
-		return ieee754dp_nanxcpt(x, "sqrt");
+		return ieee754dp_nanxcpt(x);
+
 	case IEEE754_CLASS_SNAN:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
+
 	case IEEE754_CLASS_ZERO:
 		/* sqrt(0) = 0 */
 		return x;
+
 	case IEEE754_CLASS_INF:
 		if (xs) {
 			/* sqrt(-Inf) = Nan */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754dp_nanxcpt(ieee754dp_indef());
 		}
 		/* sqrt(+Inf) = Inf */
 		return x;
+
 	case IEEE754_CLASS_DNORM:
 		DPDNORMX;
 		/* fall through */
+
 	case IEEE754_CLASS_NORM:
 		if (xs) {
 			/* sqrt(-x) = Nan */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754dp_nanxcpt(ieee754dp_indef());
 		}
 		break;
 	}
@@ -80,7 +80,7 @@
 	oldcsr = ieee754_csr;
 	ieee754_csr.mx &= ~IEEE754_INEXACT;
 	ieee754_csr.sx &= ~IEEE754_INEXACT;
-	ieee754_csr.rm = IEEE754_RN;
+	ieee754_csr.rm = FPU_CSR_RN;
 
 	/* adjust exponent to prevent overflow */
 	scalx = 0;
@@ -110,19 +110,19 @@
 	/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
 	/* t=y*y; z=t;	pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
 	z = t = ieee754dp_mul(y, y);
-	t.parts.bexp += 0x001;
+	t.bexp += 0x001;
 	t = ieee754dp_add(t, z);
 	z = ieee754dp_mul(ieee754dp_sub(x, z), y);
 
 	/* t=z/(t+x) ;	pt[n0]+=0x00100000; y+=t; */
 	t = ieee754dp_div(z, ieee754dp_add(t, x));
-	t.parts.bexp += 0x001;
+	t.bexp += 0x001;
 	y = ieee754dp_add(y, t);
 
 	/* twiddle last bit to force y correctly rounded */
 
 	/* set RZ, clear INEX flag */
-	ieee754_csr.rm = IEEE754_RZ;
+	ieee754_csr.rm = FPU_CSR_RZ;
 	ieee754_csr.sx &= ~IEEE754_INEXACT;
 
 	/* t=x/y; ...chopped quotient, possibly inexact */
@@ -139,10 +139,10 @@
 		oldcsr.sx |= IEEE754_INEXACT;
 
 		switch (oldcsr.rm) {
-		case IEEE754_RP:
+		case FPU_CSR_RU:
 			y.bits += 1;
 			/* drop through */
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			t.bits += 1;
 			break;
 		}
@@ -155,7 +155,7 @@
 	}
 
 	/* py[n0]=py[n0]+scalx; ...scale back y */
-	y.parts.bexp += scalx;
+	y.bexp += scalx;
 
 	/* restore rounding mode, possibly set inexact */
 	ieee754_csr = oldcsr;

diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index 91e0a4b..7a17402 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,22 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-ieee754dp ieee754dp_sub(ieee754dp x, ieee754dp y)
+union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
 {
+	int s;
+
 	COMPXDP;
 	COMPYDP;
 
 	EXPLODEXDP;
 	EXPLODEYDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXDP;
 	FLUSHYDP;
@@ -51,8 +48,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef(), "sub", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_nanxcpt(ieee754dp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,14 +65,14 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 		if (xs != ys)
 			return x;
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754dp_xcpt(ieee754dp_indef(), "sub", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754dp_indef();
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
@@ -87,15 +84,14 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return x;
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 		if (xs != ys)
 			return x;
 		else
-			return ieee754dp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
@@ -136,15 +132,17 @@
 	ym <<= 3;
 
 	if (xe > ye) {
-		/* have to shift y fraction right to align
+		/*
+		 * Have to shift y fraction right to align
 		 */
-		int s = xe - ye;
+		s = xe - ye;
 		ym = XDPSRS(ym, s);
 		ye += s;
 	} else if (ye > xe) {
-		/* have to shift x fraction right to align
+		/*
+		 * Have to shift x fraction right to align
 		 */
-		int s = ye - xe;
+		s = ye - xe;
 		xm = XDPSRS(xm, s);
 		xe += s;
 	}
@@ -158,7 +156,7 @@
 		xe = xe;
 		xs = xs;
 
-		if (xm >> (DP_MBITS + 1 + 3)) { /* carry out */
+		if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */
 			xm = XDPSRS1(xm);	/* shift preserving sticky */
 			xe++;
 		}
@@ -173,7 +171,7 @@
 			xs = ys;
 		}
 		if (xm == 0) {
-			if (ieee754_csr.rm == IEEE754_RD)
+			if (ieee754_csr.rm == FPU_CSR_RD)
 				return ieee754dp_zero(1);	/* round negative inf. => sign = -1 */
 			else
 				return ieee754dp_zero(0);	/* other round modes   => sign = 1 */
@@ -181,10 +179,11 @@
 
 		/* normalize to rounding precision
 		 */
-		while ((xm >> (DP_MBITS + 3)) == 0) {
+		while ((xm >> (DP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 	}
-	DPNORMRET2(xs, xe, xm, "sub", x, y);
+
+	return ieee754dp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/dp_tint.c b/arch/mips/math-emu/dp_tint.c
index 0ebe859..6ffc336 100644
--- a/arch/mips/math-emu/dp_tint.c
+++ b/arch/mips/math-emu/dp_tint.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,20 +16,21 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
-#include <linux/kernel.h>
 #include "ieee754dp.h"
 
-int ieee754dp_tint(ieee754dp x)
+int ieee754dp_tint(union ieee754dp x)
 {
+	u64 residue;
+	int round;
+	int sticky;
+	int odd;
+
 	COMPXDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	EXPLODEXDP;
 	FLUSHXDP;
@@ -40,10 +39,12 @@
 	case IEEE754_CLASS_SNAN:
 	case IEEE754_CLASS_QNAN:
 	case IEEE754_CLASS_INF:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754si_xcpt(ieee754si_indef(), "dp_tint", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754si_indef();
+
 	case IEEE754_CLASS_ZERO:
 		return 0;
+
 	case IEEE754_CLASS_DNORM:
 	case IEEE754_CLASS_NORM:
 		break;
@@ -51,44 +52,39 @@
 	if (xe > 31) {
 		/* Set invalid. We will only use overflow for floating
 		   point overflow */
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754si_xcpt(ieee754si_indef(), "dp_tint", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754si_indef();
 	}
 	/* oh gawd */
-	if (xe > DP_MBITS) {
-		xm <<= xe - DP_MBITS;
-	} else if (xe < DP_MBITS) {
-		u64 residue;
-		int round;
-		int sticky;
-		int odd;
-
+	if (xe > DP_FBITS) {
+		xm <<= xe - DP_FBITS;
+	} else if (xe < DP_FBITS) {
 		if (xe < -1) {
 			residue = xm;
 			round = 0;
 			sticky = residue != 0;
 			xm = 0;
 		} else {
-			residue = xm << (64 - DP_MBITS + xe);
+			residue = xm << (64 - DP_FBITS + xe);
 			round = (residue >> 63) != 0;
 			sticky = (residue << 1) != 0;
-			xm >>= DP_MBITS - xe;
+			xm >>= DP_FBITS - xe;
 		}
 		/* Note: At this point upper 32 bits of xm are guaranteed
 		   to be zero */
 		odd = (xm & 0x1) != 0x0;
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			if (round && (sticky || odd))
 				xm++;
 			break;
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if ((round || sticky) && !xs)
 				xm++;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if ((round || sticky) && xs)
 				xm++;
 			break;
@@ -96,27 +92,14 @@
 		/* look for valid corner case 0x80000000 */
 		if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
 			/* This can happen after rounding */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754si_xcpt(ieee754si_indef(), "dp_tint", x);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754si_indef();
 		}
 		if (round || sticky)
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_INEXACT);
 	}
 	if (xs)
 		return -xm;
 	else
 		return xm;
 }
-
-
-unsigned int ieee754dp_tuns(ieee754dp x)
-{
-	ieee754dp hb = ieee754dp_1e31();
-
-	/* what if x < 0 ?? */
-	if (ieee754dp_lt(x, hb))
-		return (unsigned) ieee754dp_tint(x);
-
-	return (unsigned) ieee754dp_tint(ieee754dp_sub(x, hb)) |
-	    ((unsigned) 1 << 31);
-}

diff --git a/arch/mips/math-emu/dp_tlong.c b/arch/mips/math-emu/dp_tlong.c
index 133ce2b..9cdc145 100644
--- a/arch/mips/math-emu/dp_tlong.c
+++ b/arch/mips/math-emu/dp_tlong.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,19 +16,21 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754dp.h"
 
-s64 ieee754dp_tlong(ieee754dp x)
+s64 ieee754dp_tlong(union ieee754dp x)
 {
+	u64 residue;
+	int round;
+	int sticky;
+	int odd;
+
 	COMPXDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	EXPLODEXDP;
 	FLUSHXDP;
@@ -39,10 +39,12 @@
 	case IEEE754_CLASS_SNAN:
 	case IEEE754_CLASS_QNAN:
 	case IEEE754_CLASS_INF:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754di_xcpt(ieee754di_indef(), "dp_tlong", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754di_indef();
+
 	case IEEE754_CLASS_ZERO:
 		return 0;
+
 	case IEEE754_CLASS_DNORM:
 	case IEEE754_CLASS_NORM:
 		break;
@@ -53,18 +55,13 @@
 			return -0x8000000000000000LL;
 		/* Set invalid. We will only use overflow for floating
 		   point overflow */
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754di_xcpt(ieee754di_indef(), "dp_tlong", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754di_indef();
 	}
 	/* oh gawd */
-	if (xe > DP_MBITS) {
-		xm <<= xe - DP_MBITS;
-	} else if (xe < DP_MBITS) {
-		u64 residue;
-		int round;
-		int sticky;
-		int odd;
-
+	if (xe > DP_FBITS) {
+		xm <<= xe - DP_FBITS;
+	} else if (xe < DP_FBITS) {
 		if (xe < -1) {
 			residue = xm;
 			round = 0;
@@ -75,51 +72,38 @@
 			* so we do it in two steps. Be aware that xe
 			* may be -1 */
 			residue = xm << (xe + 1);
-			residue <<= 63 - DP_MBITS;
+			residue <<= 63 - DP_FBITS;
 			round = (residue >> 63) != 0;
 			sticky = (residue << 1) != 0;
-			xm >>= DP_MBITS - xe;
+			xm >>= DP_FBITS - xe;
 		}
 		odd = (xm & 0x1) != 0x0;
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			if (round && (sticky || odd))
 				xm++;
 			break;
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if ((round || sticky) && !xs)
 				xm++;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if ((round || sticky) && xs)
 				xm++;
 			break;
 		}
 		if ((xm >> 63) != 0) {
 			/* This can happen after rounding */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754di_xcpt(ieee754di_indef(), "dp_tlong", x);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754di_indef();
 		}
 		if (round || sticky)
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_INEXACT);
 	}
 	if (xs)
 		return -xm;
 	else
 		return xm;
 }
-
-
-u64 ieee754dp_tulong(ieee754dp x)
-{
-	ieee754dp hb = ieee754dp_1e63();
-
-	/* what if x < 0 ?? */
-	if (ieee754dp_lt(x, hb))
-		return (u64) ieee754dp_tlong(x);
-
-	return (u64) ieee754dp_tlong(ieee754dp_sub(x, hb)) |
-	    (1ULL << 63);
-}

diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 7ea622a..4f514f3 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c

@@ -1,30 +1,12 @@
-#include <linux/compiler.h>
-#include <linux/mm.h>
-#include <linux/signal.h>
-#include <linux/smp.h>
-
-#include <asm/asm.h>
-#include <asm/bootinfo.h>
-#include <asm/byteorder.h>
-#include <asm/cpu.h>
-#include <asm/inst.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
 #include <asm/branch.h>
-#include <asm/mipsregs.h>
 #include <asm/cacheflush.h>
-
 #include <asm/fpu_emulator.h>
+#include <asm/inst.h>
+#include <asm/mipsregs.h>
+#include <asm/uaccess.h>
 
 #include "ieee754.h"
 
-/* Strap kernel emulator for full MIPS IV emulation */
-
-#ifdef __mips
-#undef __mips
-#endif
-#define __mips 4
-
 /*
  * Emulate the arbritrary instruction ir at xcp->cp0_epc.  Required when
  * we have to emulate the instruction in a COP1 branch delay slot.  Do
@@ -59,13 +41,11 @@
 		(ir == 0)) {
 		/* NOP is easy */
 		regs->cp0_epc = cpc;
-		regs->cp0_cause &= ~CAUSEF_BD;
+		clear_delay_slot(regs);
 		return 0;
 	}
-#ifdef DSEMUL_TRACE
-	printk("dsemul %lx %lx\n", regs->cp0_epc, cpc);
 
-#endif
+	pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
 
 	/*
 	 * The strategy is to push the instruction onto the user stack
@@ -167,9 +147,8 @@
 	 * emulating the branch delay instruction.
 	 */
 
-#ifdef DSEMUL_TRACE
-	printk("dsemulret\n");
-#endif
+	pr_debug("dsemulret\n");
+
 	if (__get_user(epc, &fr->epc)) {		/* Saved EPC */
 		/* This is not a good situation to be in */
 		force_sig(SIGBUS, current);

diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c
index 0015cf1..53f1d22 100644
--- a/arch/mips/math-emu/ieee754.c
+++ b/arch/mips/math-emu/ieee754.c

@@ -10,8 +10,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -23,105 +21,69 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
+#include <linux/compiler.h>
 
-#include "ieee754int.h"
+#include "ieee754.h"
 #include "ieee754sp.h"
 #include "ieee754dp.h"
 
-#define DP_EBIAS	1023
-#define DP_EMIN		(-1022)
-#define DP_EMAX		1023
+/*
+ * Special constants
+ */
 
-#define SP_EBIAS	127
-#define SP_EMIN		(-126)
-#define SP_EMAX		127
-
-/* special constants
-*/
-
-
-#if (defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN) || defined(__MIPSEL__)
-#define SPSTR(s, b, m) {m, b, s}
-#define DPSTR(s, b, mh, ml) {ml, mh, b, s}
-#endif
-
-#ifdef __MIPSEB__
-#define SPSTR(s, b, m) {s, b, m}
-#define DPSTR(s, b, mh, ml) {s, b, mh, ml}
-#endif
-
-const struct ieee754dp_konst __ieee754dp_spcvals[] = {
-	DPSTR(0, DP_EMIN - 1 + DP_EBIAS, 0, 0), /* + zero   */
-	DPSTR(1, DP_EMIN - 1 + DP_EBIAS, 0, 0), /* - zero   */
-	DPSTR(0, DP_EBIAS, 0, 0),	/* + 1.0   */
-	DPSTR(1, DP_EBIAS, 0, 0),	/* - 1.0   */
-	DPSTR(0, 3 + DP_EBIAS, 0x40000, 0),	/* + 10.0   */
-	DPSTR(1, 3 + DP_EBIAS, 0x40000, 0),	/* - 10.0   */
-	DPSTR(0, DP_EMAX + 1 + DP_EBIAS, 0, 0), /* + infinity */
-	DPSTR(1, DP_EMAX + 1 + DP_EBIAS, 0, 0), /* - infinity */
-	DPSTR(0, DP_EMAX+1+DP_EBIAS, 0x7FFFF, 0xFFFFFFFF), /* + indef quiet Nan */
-	DPSTR(0, DP_EMAX + DP_EBIAS, 0xFFFFF, 0xFFFFFFFF),	/* + max */
-	DPSTR(1, DP_EMAX + DP_EBIAS, 0xFFFFF, 0xFFFFFFFF),	/* - max */
-	DPSTR(0, DP_EMIN + DP_EBIAS, 0, 0),	/* + min normal */
-	DPSTR(1, DP_EMIN + DP_EBIAS, 0, 0),	/* - min normal */
-	DPSTR(0, DP_EMIN - 1 + DP_EBIAS, 0, 1), /* + min denormal */
-	DPSTR(1, DP_EMIN - 1 + DP_EBIAS, 0, 1), /* - min denormal */
-	DPSTR(0, 31 + DP_EBIAS, 0, 0),	/* + 1.0e31 */
-	DPSTR(0, 63 + DP_EBIAS, 0, 0),	/* + 1.0e63 */
-};
-
-const struct ieee754sp_konst __ieee754sp_spcvals[] = {
-	SPSTR(0, SP_EMIN - 1 + SP_EBIAS, 0),	/* + zero   */
-	SPSTR(1, SP_EMIN - 1 + SP_EBIAS, 0),	/* - zero   */
-	SPSTR(0, SP_EBIAS, 0),	/* + 1.0   */
-	SPSTR(1, SP_EBIAS, 0),	/* - 1.0   */
-	SPSTR(0, 3 + SP_EBIAS, 0x200000),	/* + 10.0   */
-	SPSTR(1, 3 + SP_EBIAS, 0x200000),	/* - 10.0   */
-	SPSTR(0, SP_EMAX + 1 + SP_EBIAS, 0),	/* + infinity */
-	SPSTR(1, SP_EMAX + 1 + SP_EBIAS, 0),	/* - infinity */
-	SPSTR(0, SP_EMAX+1+SP_EBIAS, 0x3FFFFF),	    /* + indef quiet Nan  */
-	SPSTR(0, SP_EMAX + SP_EBIAS, 0x7FFFFF), /* + max normal */
-	SPSTR(1, SP_EMAX + SP_EBIAS, 0x7FFFFF), /* - max normal */
-	SPSTR(0, SP_EMIN + SP_EBIAS, 0),	/* + min normal */
-	SPSTR(1, SP_EMIN + SP_EBIAS, 0),	/* - min normal */
-	SPSTR(0, SP_EMIN - 1 + SP_EBIAS, 1),	/* + min denormal */
-	SPSTR(1, SP_EMIN - 1 + SP_EBIAS, 1),	/* - min denormal */
-	SPSTR(0, 31 + SP_EBIAS, 0),	/* + 1.0e31 */
-	SPSTR(0, 63 + SP_EBIAS, 0),	/* + 1.0e63 */
-};
-
-
-int ieee754si_xcpt(int r, const char *op, ...)
-{
-	struct ieee754xctx ax;
-
-	if (!TSTX())
-		return r;
-	ax.op = op;
-	ax.rt = IEEE754_RT_SI;
-	ax.rv.si = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.si;
+#define DPCNST(s, b, m)							\
+{									\
+	.sign	= (s),							\
+	.bexp	= (b) + DP_EBIAS,					\
+	.mant	= (m)							\
 }
 
-s64 ieee754di_xcpt(s64 r, const char *op, ...)
-{
-	struct ieee754xctx ax;
+const union ieee754dp __ieee754dp_spcvals[] = {
+	DPCNST(0, DP_EMIN - 1, 0x0000000000000ULL),	/* + zero   */
+	DPCNST(1, DP_EMIN - 1, 0x0000000000000ULL),	/* - zero   */
+	DPCNST(0, 0,	       0x0000000000000ULL),	/* + 1.0   */
+	DPCNST(1, 0,	       0x0000000000000ULL),	/* - 1.0   */
+	DPCNST(0, 3,           0x4000000000000ULL),	/* + 10.0   */
+	DPCNST(1, 3,           0x4000000000000ULL),	/* - 10.0   */
+	DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL),	/* + infinity */
+	DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL),	/* - infinity */
+	DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),	/* + indef quiet Nan */
+	DPCNST(0, DP_EMAX,     0xFFFFFFFFFFFFFULL),	/* + max */
+	DPCNST(1, DP_EMAX,     0xFFFFFFFFFFFFFULL),	/* - max */
+	DPCNST(0, DP_EMIN,     0x0000000000000ULL),	/* + min normal */
+	DPCNST(1, DP_EMIN,     0x0000000000000ULL),	/* - min normal */
+	DPCNST(0, DP_EMIN - 1, 0x0000000000001ULL),	/* + min denormal */
+	DPCNST(1, DP_EMIN - 1, 0x0000000000001ULL),	/* - min denormal */
+	DPCNST(0, 31,          0x0000000000000ULL),	/* + 1.0e31 */
+	DPCNST(0, 63,          0x0000000000000ULL),	/* + 1.0e63 */
+};
 
-	if (!TSTX())
-		return r;
-	ax.op = op;
-	ax.rt = IEEE754_RT_DI;
-	ax.rv.di = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.di;
+#define SPCNST(s, b, m)							\
+{									\
+	.sign	= (s),							\
+	.bexp	= (b) + SP_EBIAS,					\
+	.mant	= (m)							\
 }
+
+const union ieee754sp __ieee754sp_spcvals[] = {
+	SPCNST(0, SP_EMIN - 1, 0x000000),	/* + zero   */
+	SPCNST(1, SP_EMIN - 1, 0x000000),	/* - zero   */
+	SPCNST(0, 0,	       0x000000),	/* + 1.0   */
+	SPCNST(1, 0,	       0x000000),	/* - 1.0   */
+	SPCNST(0, 3,	       0x200000),	/* + 10.0   */
+	SPCNST(1, 3,	       0x200000),	/* - 10.0   */
+	SPCNST(0, SP_EMAX + 1, 0x000000),	/* + infinity */
+	SPCNST(1, SP_EMAX + 1, 0x000000),	/* - infinity */
+	SPCNST(0, SP_EMAX + 1, 0x3FFFFF),	/* + indef quiet Nan  */
+	SPCNST(0, SP_EMAX,     0x7FFFFF),	/* + max normal */
+	SPCNST(1, SP_EMAX,     0x7FFFFF),	/* - max normal */
+	SPCNST(0, SP_EMIN,     0x000000),	/* + min normal */
+	SPCNST(1, SP_EMIN,     0x000000),	/* - min normal */
+	SPCNST(0, SP_EMIN - 1, 0x000001),	/* + min denormal */
+	SPCNST(1, SP_EMIN - 1, 0x000001),	/* - min denormal */
+	SPCNST(0, 31,	       0x000000),	/* + 1.0e31 */
+	SPCNST(0, 63,	       0x000000),	/* + 1.0e63 */
+};

diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index 22796e0..43c4fb5 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h

@@ -13,7 +13,7 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  *
  *  Nov 7, 2000
  *  Modification to allow integration with Linux kernel
@@ -24,186 +24,93 @@
 #ifndef __ARCH_MIPS_MATH_EMU_IEEE754_H
 #define __ARCH_MIPS_MATH_EMU_IEEE754_H
 
+#include <linux/compiler.h>
 #include <asm/byteorder.h>
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <asm/bitfield.h>
 
-/*
- * Not very pretty, but the Linux kernel's normal va_list definition
- * does not allow it to be used as a structure element, as it is here.
- */
-#ifndef _STDARG_H
-#include <stdarg.h>
-#endif
-
-#ifdef __LITTLE_ENDIAN
-struct ieee754dp_konst {
-	unsigned mantlo:32;
-	unsigned manthi:20;
-	unsigned bexp:11;
-	unsigned sign:1;
-};
-struct ieee754sp_konst {
-	unsigned mant:23;
-	unsigned bexp:8;
-	unsigned sign:1;
-};
-
-typedef union _ieee754dp {
-	struct ieee754dp_konst oparts;
+union ieee754dp {
 	struct {
-		u64 mant:52;
-		unsigned int bexp:11;
-		unsigned int sign:1;
-	} parts;
+		__BITFIELD_FIELD(unsigned int sign:1,
+		__BITFIELD_FIELD(unsigned int bexp:11,
+		__BITFIELD_FIELD(u64 mant:52,
+		;)))
+	};
 	u64 bits;
-	double d;
-} ieee754dp;
-
-typedef union _ieee754sp {
-	struct ieee754sp_konst parts;
-	float f;
-	u32 bits;
-} ieee754sp;
-#endif
-
-#ifdef __BIG_ENDIAN
-struct ieee754dp_konst {
-	unsigned sign:1;
-	unsigned bexp:11;
-	unsigned manthi:20;
-	unsigned mantlo:32;
 };
 
-typedef union _ieee754dp {
-	struct ieee754dp_konst oparts;
+union ieee754sp {
 	struct {
-		unsigned int sign:1;
-		unsigned int bexp:11;
-		u64 mant:52;
-	} parts;
-	double d;
-	u64 bits;
-} ieee754dp;
-
-struct ieee754sp_konst {
-	unsigned sign:1;
-	unsigned bexp:8;
-	unsigned mant:23;
-};
-
-typedef union _ieee754sp {
-	struct ieee754sp_konst parts;
-	float f;
+		__BITFIELD_FIELD(unsigned sign:1,
+		__BITFIELD_FIELD(unsigned bexp:8,
+		__BITFIELD_FIELD(unsigned mant:23,
+		;)))
+	};
 	u32 bits;
-} ieee754sp;
-#endif
+};
 
 /*
  * single precision (often aka float)
 */
-int ieee754sp_finite(ieee754sp x);
-int ieee754sp_class(ieee754sp x);
+int ieee754sp_class(union ieee754sp x);
 
-ieee754sp ieee754sp_abs(ieee754sp x);
-ieee754sp ieee754sp_neg(ieee754sp x);
-ieee754sp ieee754sp_scalb(ieee754sp x, int);
-ieee754sp ieee754sp_logb(ieee754sp x);
+union ieee754sp ieee754sp_abs(union ieee754sp x);
+union ieee754sp ieee754sp_neg(union ieee754sp x);
 
-/* x with sign of y */
-ieee754sp ieee754sp_copysign(ieee754sp x, ieee754sp y);
+union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y);
+union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y);
+union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y);
+union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y);
 
-ieee754sp ieee754sp_add(ieee754sp x, ieee754sp y);
-ieee754sp ieee754sp_sub(ieee754sp x, ieee754sp y);
-ieee754sp ieee754sp_mul(ieee754sp x, ieee754sp y);
-ieee754sp ieee754sp_div(ieee754sp x, ieee754sp y);
+union ieee754sp ieee754sp_fint(int x);
+union ieee754sp ieee754sp_flong(s64 x);
+union ieee754sp ieee754sp_fdp(union ieee754dp x);
 
-ieee754sp ieee754sp_fint(int x);
-ieee754sp ieee754sp_funs(unsigned x);
-ieee754sp ieee754sp_flong(s64 x);
-ieee754sp ieee754sp_fulong(u64 x);
-ieee754sp ieee754sp_fdp(ieee754dp x);
+int ieee754sp_tint(union ieee754sp x);
+s64 ieee754sp_tlong(union ieee754sp x);
 
-int ieee754sp_tint(ieee754sp x);
-unsigned int ieee754sp_tuns(ieee754sp x);
-s64 ieee754sp_tlong(ieee754sp x);
-u64 ieee754sp_tulong(ieee754sp x);
+int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cop, int sig);
 
-int ieee754sp_cmp(ieee754sp x, ieee754sp y, int cop, int sig);
-/*
- * basic sp math
- */
-ieee754sp ieee754sp_modf(ieee754sp x, ieee754sp * ip);
-ieee754sp ieee754sp_frexp(ieee754sp x, int *exp);
-ieee754sp ieee754sp_ldexp(ieee754sp x, int exp);
-
-ieee754sp ieee754sp_ceil(ieee754sp x);
-ieee754sp ieee754sp_floor(ieee754sp x);
-ieee754sp ieee754sp_trunc(ieee754sp x);
-
-ieee754sp ieee754sp_sqrt(ieee754sp x);
+union ieee754sp ieee754sp_sqrt(union ieee754sp x);
 
 /*
  * double precision (often aka double)
 */
-int ieee754dp_finite(ieee754dp x);
-int ieee754dp_class(ieee754dp x);
+int ieee754dp_class(union ieee754dp x);
 
-/* x with sign of y */
-ieee754dp ieee754dp_copysign(ieee754dp x, ieee754dp y);
+union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y);
+union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y);
+union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y);
+union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y);
 
-ieee754dp ieee754dp_add(ieee754dp x, ieee754dp y);
-ieee754dp ieee754dp_sub(ieee754dp x, ieee754dp y);
-ieee754dp ieee754dp_mul(ieee754dp x, ieee754dp y);
-ieee754dp ieee754dp_div(ieee754dp x, ieee754dp y);
+union ieee754dp ieee754dp_abs(union ieee754dp x);
+union ieee754dp ieee754dp_neg(union ieee754dp x);
 
-ieee754dp ieee754dp_abs(ieee754dp x);
-ieee754dp ieee754dp_neg(ieee754dp x);
-ieee754dp ieee754dp_scalb(ieee754dp x, int);
+union ieee754dp ieee754dp_fint(int x);
+union ieee754dp ieee754dp_flong(s64 x);
+union ieee754dp ieee754dp_fsp(union ieee754sp x);
 
-/* return exponent as integer in floating point format
- */
-ieee754dp ieee754dp_logb(ieee754dp x);
+int ieee754dp_tint(union ieee754dp x);
+s64 ieee754dp_tlong(union ieee754dp x);
 
-ieee754dp ieee754dp_fint(int x);
-ieee754dp ieee754dp_funs(unsigned x);
-ieee754dp ieee754dp_flong(s64 x);
-ieee754dp ieee754dp_fulong(u64 x);
-ieee754dp ieee754dp_fsp(ieee754sp x);
+int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cop, int sig);
 
-ieee754dp ieee754dp_ceil(ieee754dp x);
-ieee754dp ieee754dp_floor(ieee754dp x);
-ieee754dp ieee754dp_trunc(ieee754dp x);
-
-int ieee754dp_tint(ieee754dp x);
-unsigned int ieee754dp_tuns(ieee754dp x);
-s64 ieee754dp_tlong(ieee754dp x);
-u64 ieee754dp_tulong(ieee754dp x);
-
-int ieee754dp_cmp(ieee754dp x, ieee754dp y, int cop, int sig);
-/*
- * basic sp math
- */
-ieee754dp ieee754dp_modf(ieee754dp x, ieee754dp * ip);
-ieee754dp ieee754dp_frexp(ieee754dp x, int *exp);
-ieee754dp ieee754dp_ldexp(ieee754dp x, int exp);
-
-ieee754dp ieee754dp_ceil(ieee754dp x);
-ieee754dp ieee754dp_floor(ieee754dp x);
-ieee754dp ieee754dp_trunc(ieee754dp x);
-
-ieee754dp ieee754dp_sqrt(ieee754dp x);
+union ieee754dp ieee754dp_sqrt(union ieee754dp x);
 
 
 
 /* 5 types of floating point number
 */
-#define IEEE754_CLASS_NORM	0x00
-#define IEEE754_CLASS_ZERO	0x01
-#define IEEE754_CLASS_DNORM	0x02
-#define IEEE754_CLASS_INF	0x03
-#define IEEE754_CLASS_SNAN	0x04
-#define IEEE754_CLASS_QNAN	0x05
+enum {
+	IEEE754_CLASS_NORM	= 0x00,
+	IEEE754_CLASS_ZERO	= 0x01,
+	IEEE754_CLASS_DNORM	= 0x02,
+	IEEE754_CLASS_INF	= 0x03,
+	IEEE754_CLASS_SNAN	= 0x04,
+	IEEE754_CLASS_QNAN	= 0x05,
+};
 
 /* exception numbers */
 #define IEEE754_INEXACT			0x01
@@ -219,114 +126,84 @@
 #define IEEE754_CGT	0x04
 #define IEEE754_CUN	0x08
 
-/* rounding mode
-*/
-#define IEEE754_RN	0	/* round to nearest */
-#define IEEE754_RZ	1	/* round toward zero  */
-#define IEEE754_RD	2	/* round toward -Infinity */
-#define IEEE754_RU	3	/* round toward +Infinity */
-
-/* other naming */
-#define IEEE754_RM	IEEE754_RD
-#define IEEE754_RP	IEEE754_RU
-
 /* "normal" comparisons
 */
-static inline int ieee754sp_eq(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_eq(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y, IEEE754_CEQ, 0);
 }
 
-static inline int ieee754sp_ne(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_ne(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y,
 			     IEEE754_CLT | IEEE754_CGT | IEEE754_CUN, 0);
 }
 
-static inline int ieee754sp_lt(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_lt(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y, IEEE754_CLT, 0);
 }
 
-static inline int ieee754sp_le(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_le(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y, IEEE754_CLT | IEEE754_CEQ, 0);
 }
 
-static inline int ieee754sp_gt(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_gt(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y, IEEE754_CGT, 0);
 }
 
 
-static inline int ieee754sp_ge(ieee754sp x, ieee754sp y)
+static inline int ieee754sp_ge(union ieee754sp x, union ieee754sp y)
 {
 	return ieee754sp_cmp(x, y, IEEE754_CGT | IEEE754_CEQ, 0);
 }
 
-static inline int ieee754dp_eq(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_eq(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y, IEEE754_CEQ, 0);
 }
 
-static inline int ieee754dp_ne(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_ne(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y,
 			     IEEE754_CLT | IEEE754_CGT | IEEE754_CUN, 0);
 }
 
-static inline int ieee754dp_lt(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_lt(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y, IEEE754_CLT, 0);
 }
 
-static inline int ieee754dp_le(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_le(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y, IEEE754_CLT | IEEE754_CEQ, 0);
 }
 
-static inline int ieee754dp_gt(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_gt(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y, IEEE754_CGT, 0);
 }
 
-static inline int ieee754dp_ge(ieee754dp x, ieee754dp y)
+static inline int ieee754dp_ge(union ieee754dp x, union ieee754dp y)
 {
 	return ieee754dp_cmp(x, y, IEEE754_CGT | IEEE754_CEQ, 0);
 }
 
-
-/*
- * Like strtod
- */
-ieee754dp ieee754dp_fstr(const char *s, char **endp);
-char *ieee754dp_tstr(ieee754dp x, int prec, int fmt, int af);
-
-
 /*
  * The control status register
  */
 struct _ieee754_csr {
-#ifdef __BIG_ENDIAN
-	unsigned pad0:7;
-	unsigned nod:1;		/* set 1 for no denormalised numbers */
-	unsigned c:1;		/* condition */
-	unsigned pad1:5;
-	unsigned cx:6;		/* exceptions this operation */
-	unsigned mx:5;		/* exception enable  mask */
-	unsigned sx:5;		/* exceptions total */
-	unsigned rm:2;		/* current rounding mode */
-#endif
-#ifdef __LITTLE_ENDIAN
-	unsigned rm:2;		/* current rounding mode */
-	unsigned sx:5;		/* exceptions total */
-	unsigned mx:5;		/* exception enable  mask */
-	unsigned cx:6;		/* exceptions this operation */
-	unsigned pad1:5;
-	unsigned c:1;		/* condition */
-	unsigned nod:1;		/* set 1 for no denormalised numbers */
-	unsigned pad0:7;
-#endif
+	__BITFIELD_FIELD(unsigned pad0:7,
+	__BITFIELD_FIELD(unsigned nod:1,	/* set 1 for no denormalised numbers */
+	__BITFIELD_FIELD(unsigned c:1,		/* condition */
+	__BITFIELD_FIELD(unsigned pad1:5,
+	__BITFIELD_FIELD(unsigned cx:6,		/* exceptions this operation */
+	__BITFIELD_FIELD(unsigned mx:5,		/* exception enable  mask */
+	__BITFIELD_FIELD(unsigned sx:5,		/* exceptions total */
+	__BITFIELD_FIELD(unsigned rm:2,		/* current rounding mode */
+	;))))))))
 };
 #define ieee754_csr (*(struct _ieee754_csr *)(&current->thread.fpu.fcr31))
 
@@ -377,8 +254,8 @@
 }
 
 /* debugging */
-ieee754sp ieee754sp_dump(char *s, ieee754sp x);
-ieee754dp ieee754dp_dump(char *s, ieee754dp x);
+union ieee754sp ieee754sp_dump(char *s, union ieee754sp x);
+union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
 
 #define IEEE754_SPCVAL_PZERO	0
 #define IEEE754_SPCVAL_NZERO	1
@@ -398,10 +275,10 @@
 #define IEEE754_SPCVAL_P1E31	15	/* + 1.0e31 */
 #define IEEE754_SPCVAL_P1E63	16	/* + 1.0e63 */
 
-extern const struct ieee754dp_konst __ieee754dp_spcvals[];
-extern const struct ieee754sp_konst __ieee754sp_spcvals[];
-#define ieee754dp_spcvals ((const ieee754dp *)__ieee754dp_spcvals)
-#define ieee754sp_spcvals ((const ieee754sp *)__ieee754sp_spcvals)
+extern const union ieee754dp __ieee754dp_spcvals[];
+extern const union ieee754sp __ieee754sp_spcvals[];
+#define ieee754dp_spcvals ((const union ieee754dp *)__ieee754dp_spcvals)
+#define ieee754sp_spcvals ((const union ieee754sp *)__ieee754sp_spcvals)
 
 /*
  * Return infinity with given sign
@@ -431,28 +308,15 @@
 /*
  * Indefinite integer value
  */
-#define ieee754si_indef()	INT_MAX
-#ifdef LONG_LONG_MAX
-#define ieee754di_indef()	LONG_LONG_MAX
-#else
-#define ieee754di_indef()	((s64)(~0ULL>>1))
-#endif
+static inline int ieee754si_indef(void)
+{
+	return INT_MAX;
+}
 
-/* IEEE exception context, passed to handler */
-struct ieee754xctx {
-	const char *op;		/* operation name */
-	int rt;			/* result type */
-	union {
-		ieee754sp sp;	/* single precision */
-		ieee754dp dp;	/* double precision */
-#ifdef IEEE854_XP
-		ieee754xp xp;	/* extended precision */
-#endif
-		int si;		/* standard signed integer (32bits) */
-		s64 di;		/* extended signed integer (64bits) */
-	} rv;			/* default result format implied by op */
-	va_list ap;
-};
+static inline s64 ieee754di_indef(void)
+{
+	return S64_MAX;
+}
 
 /* result types for xctx.rt */
 #define IEEE754_RT_SP	0
@@ -461,8 +325,6 @@
 #define IEEE754_RT_SI	3
 #define IEEE754_RT_DI	4
 
-extern void ieee754_xcpt(struct ieee754xctx *xcp);
-
 /* compat */
 #define ieee754dp_fix(x)	ieee754dp_tint(x)
 #define ieee754sp_fix(x)	ieee754sp_tint(x)

diff --git a/arch/mips/math-emu/ieee754d.c b/arch/mips/math-emu/ieee754d.c
index 9599bdd..a04e8a7 100644
--- a/arch/mips/math-emu/ieee754d.c
+++ b/arch/mips/math-emu/ieee754d.c

@@ -16,7 +16,7 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  *
  *  Nov 7, 2000
  *  Modified to build and operate in Linux kernel environment.
@@ -25,38 +25,13 @@
  *  Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
  */
 
-#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/printk.h>
 #include "ieee754.h"
+#include "ieee754sp.h"
+#include "ieee754dp.h"
 
-#define DP_EBIAS	1023
-#define DP_EMIN		(-1022)
-#define DP_EMAX		1023
-#define DP_FBITS	52
-
-#define SP_EBIAS	127
-#define SP_EMIN		(-126)
-#define SP_EMAX		127
-#define SP_FBITS	23
-
-#define DP_MBIT(x)	((u64)1 << (x))
-#define DP_HIDDEN_BIT	DP_MBIT(DP_FBITS)
-#define DP_SIGN_BIT	DP_MBIT(63)
-
-
-#define SP_MBIT(x)	((u32)1 << (x))
-#define SP_HIDDEN_BIT	SP_MBIT(SP_FBITS)
-#define SP_SIGN_BIT	SP_MBIT(31)
-
-
-#define SPSIGN(sp)	(sp.parts.sign)
-#define SPBEXP(sp)	(sp.parts.bexp)
-#define SPMANT(sp)	(sp.parts.mant)
-
-#define DPSIGN(dp)	(dp.parts.sign)
-#define DPBEXP(dp)	(dp.parts.bexp)
-#define DPMANT(dp)	(dp.parts.mant)
-
-ieee754dp ieee754dp_dump(char *m, ieee754dp x)
+union ieee754dp ieee754dp_dump(char *m, union ieee754dp x)
 {
 	int i;
 
@@ -96,7 +71,7 @@
 	return x;
 }
 
-ieee754sp ieee754sp_dump(char *m, ieee754sp x)
+union ieee754sp ieee754sp_dump(char *m, union ieee754sp x)
 {
 	int i;
 

diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index 068e56b..fd13467 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,104 +16,68 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
+#include <linux/compiler.h>
 
 #include "ieee754dp.h"
 
-int ieee754dp_class(ieee754dp x)
+int ieee754dp_class(union ieee754dp x)
 {
 	COMPXDP;
 	EXPLODEXDP;
 	return xc;
 }
 
-int ieee754dp_isnan(ieee754dp x)
+int ieee754dp_isnan(union ieee754dp x)
 {
 	return ieee754dp_class(x) >= IEEE754_CLASS_SNAN;
 }
 
-int ieee754dp_issnan(ieee754dp x)
+static inline int ieee754dp_issnan(union ieee754dp x)
 {
 	assert(ieee754dp_isnan(x));
-	return ((DPMANT(x) & DP_MBIT(DP_MBITS-1)) == DP_MBIT(DP_MBITS-1));
+	return ((DPMANT(x) & DP_MBIT(DP_FBITS-1)) == DP_MBIT(DP_FBITS-1));
 }
 
 
-ieee754dp ieee754dp_xcpt(ieee754dp r, const char *op, ...)
+union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
 {
-	struct ieee754xctx ax;
-	if (!TSTX())
-		return r;
-
-	ax.op = op;
-	ax.rt = IEEE754_RT_DP;
-	ax.rv.dp = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.dp;
-}
-
-ieee754dp ieee754dp_nanxcpt(ieee754dp r, const char *op, ...)
-{
-	struct ieee754xctx ax;
-
 	assert(ieee754dp_isnan(r));
 
 	if (!ieee754dp_issnan(r))	/* QNAN does not cause invalid op !! */
 		return r;
 
-	if (!SETANDTESTCX(IEEE754_INVALID_OPERATION)) {
+	if (!ieee754_setandtestcx(IEEE754_INVALID_OPERATION)) {
 		/* not enabled convert to a quiet NaN */
-		DPMANT(r) &= (~DP_MBIT(DP_MBITS-1));
+		DPMANT(r) &= (~DP_MBIT(DP_FBITS-1));
 		if (ieee754dp_isnan(r))
 			return r;
 		else
 			return ieee754dp_indef();
 	}
 
-	ax.op = op;
-	ax.rt = 0;
-	ax.rv.dp = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.dp;
+	return r;
 }
 
-ieee754dp ieee754dp_bestnan(ieee754dp x, ieee754dp y)
-{
-	assert(ieee754dp_isnan(x));
-	assert(ieee754dp_isnan(y));
-
-	if (DPMANT(x) > DPMANT(y))
-		return x;
-	else
-		return y;
-}
-
-
-static u64 get_rounding(int sn, u64 xm)
+static u64 ieee754dp_get_rounding(int sn, u64 xm)
 {
 	/* inexact must round of 3 bits
 	 */
 	if (xm & (DP_MBIT(3) - 1)) {
 		switch (ieee754_csr.rm) {
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			xm += 0x3 + ((xm >> 3) & 1);
 			/* xm += (xm&0x8)?0x4:0x3 */
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if (!sn)	/* ?? */
 				xm += 0x8;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if (sn) /* ?? */
 				xm += 0x8;
 			break;
@@ -130,11 +92,11 @@
  * xe is an unbiased exponent
  * xm is 3bit extended precision value.
  */
-ieee754dp ieee754dp_format(int sn, int xe, u64 xm)
+union ieee754dp ieee754dp_format(int sn, int xe, u64 xm)
 {
 	assert(xm);		/* we don't gen exact zeros (probably should) */
 
-	assert((xm >> (DP_MBITS + 1 + 3)) == 0);	/* no execess */
+	assert((xm >> (DP_FBITS + 1 + 3)) == 0);	/* no execess */
 	assert(xm & (DP_HIDDEN_BIT << 3));
 
 	if (xe < DP_EMIN) {
@@ -142,32 +104,32 @@
 		int es = DP_EMIN - xe;
 
 		if (ieee754_csr.nod) {
-			SETCX(IEEE754_UNDERFLOW);
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_INEXACT);
 
 			switch(ieee754_csr.rm) {
-			case IEEE754_RN:
-			case IEEE754_RZ:
+			case FPU_CSR_RN:
+			case FPU_CSR_RZ:
 				return ieee754dp_zero(sn);
-			case IEEE754_RU:    /* toward +Infinity */
-				if(sn == 0)
+			case FPU_CSR_RU:    /* toward +Infinity */
+				if (sn == 0)
 					return ieee754dp_min(0);
 				else
 					return ieee754dp_zero(1);
-			case IEEE754_RD:    /* toward -Infinity */
-				if(sn == 0)
+			case FPU_CSR_RD:    /* toward -Infinity */
+				if (sn == 0)
 					return ieee754dp_zero(0);
 				else
 					return ieee754dp_min(1);
 			}
 		}
 
-		if (xe == DP_EMIN - 1
-				&& get_rounding(sn, xm) >> (DP_MBITS + 1 + 3))
+		if (xe == DP_EMIN - 1 &&
+		    ieee754dp_get_rounding(sn, xm) >> (DP_FBITS + 1 + 3))
 		{
 			/* Not tiny after rounding */
-			SETCX(IEEE754_INEXACT);
-			xm = get_rounding(sn, xm);
+			ieee754_setcx(IEEE754_INEXACT);
+			xm = ieee754dp_get_rounding(sn, xm);
 			xm >>= 1;
 			/* Clear grs bits */
 			xm &= ~(DP_MBIT(3) - 1);
@@ -183,17 +145,17 @@
 		}
 	}
 	if (xm & (DP_MBIT(3) - 1)) {
-		SETCX(IEEE754_INEXACT);
+		ieee754_setcx(IEEE754_INEXACT);
 		if ((xm & (DP_HIDDEN_BIT << 3)) == 0) {
-			SETCX(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_UNDERFLOW);
 		}
 
 		/* inexact must round of 3 bits
 		 */
-		xm = get_rounding(sn, xm);
+		xm = ieee754dp_get_rounding(sn, xm);
 		/* adjust exponent for rounding add overflowing
 		 */
-		if (xm >> (DP_MBITS + 3 + 1)) {
+		if (xm >> (DP_FBITS + 3 + 1)) {
 			/* add causes mantissa overflow */
 			xm >>= 1;
 			xe++;
@@ -202,24 +164,24 @@
 	/* strip grs bits */
 	xm >>= 3;
 
-	assert((xm >> (DP_MBITS + 1)) == 0);	/* no execess */
+	assert((xm >> (DP_FBITS + 1)) == 0);	/* no execess */
 	assert(xe >= DP_EMIN);
 
 	if (xe > DP_EMAX) {
-		SETCX(IEEE754_OVERFLOW);
-		SETCX(IEEE754_INEXACT);
+		ieee754_setcx(IEEE754_OVERFLOW);
+		ieee754_setcx(IEEE754_INEXACT);
 		/* -O can be table indexed by (rm,sn) */
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			return ieee754dp_inf(sn);
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			return ieee754dp_max(sn);
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if (sn == 0)
 				return ieee754dp_inf(0);
 			else
 				return ieee754dp_max(1);
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if (sn == 0)
 				return ieee754dp_max(0);
 			else
@@ -232,10 +194,10 @@
 		/* we underflow (tiny/zero) */
 		assert(xe == DP_EMIN);
 		if (ieee754_csr.mx & IEEE754_UNDERFLOW)
-			SETCX(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_UNDERFLOW);
 		return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm);
 	} else {
-		assert((xm >> (DP_MBITS + 1)) == 0);	/* no execess */
+		assert((xm >> (DP_FBITS + 1)) == 0);	/* no execess */
 		assert(xm & DP_HIDDEN_BIT);
 
 		return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);

diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index f139c72..61fd6fd 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h

@@ -6,8 +6,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -19,64 +17,66 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
+#include <linux/compiler.h>
 
 #include "ieee754int.h"
 
 #define assert(expr) ((void)0)
 
+#define DP_EBIAS	1023
+#define DP_EMIN		(-1022)
+#define DP_EMAX		1023
+#define DP_FBITS	52
+#define DP_MBITS	52
+
+#define DP_MBIT(x)	((u64)1 << (x))
+#define DP_HIDDEN_BIT	DP_MBIT(DP_FBITS)
+#define DP_SIGN_BIT	DP_MBIT(63)
+
+#define DPSIGN(dp)	(dp.sign)
+#define DPBEXP(dp)	(dp.bexp)
+#define DPMANT(dp)	(dp.mant)
+
+static inline int ieee754dp_finite(union ieee754dp x)
+{
+	return DPBEXP(x) != DP_EMAX + 1 + DP_EBIAS;
+}
+
 /* 3bit extended double precision sticky right shift */
 #define XDPSRS(v,rs)	\
-  ((rs > (DP_MBITS+3))?1:((v) >> (rs)) | ((v) << (64-(rs)) != 0))
+	((rs > (DP_FBITS+3))?1:((v) >> (rs)) | ((v) << (64-(rs)) != 0))
 
 #define XDPSRSX1() \
-  (xe++, (xm = (xm >> 1) | (xm & 1)))
+	(xe++, (xm = (xm >> 1) | (xm & 1)))
 
 #define XDPSRS1(v)	\
-  (((v) >> 1) | ((v) & 1))
+	(((v) >> 1) | ((v) & 1))
 
 /* convert denormal to normalized with extended exponent */
 #define DPDNORMx(m,e) \
-  while( (m >> DP_MBITS) == 0) { m <<= 1; e--; }
+	while ((m >> DP_FBITS) == 0) { m <<= 1; e--; }
 #define DPDNORMX	DPDNORMx(xm, xe)
 #define DPDNORMY	DPDNORMx(ym, ye)
 
-static inline ieee754dp builddp(int s, int bx, u64 m)
+static inline union ieee754dp builddp(int s, int bx, u64 m)
 {
-	ieee754dp r;
+	union ieee754dp r;
 
 	assert((s) == 0 || (s) == 1);
 	assert((bx) >= DP_EMIN - 1 + DP_EBIAS
 	       && (bx) <= DP_EMAX + 1 + DP_EBIAS);
-	assert(((m) >> DP_MBITS) == 0);
+	assert(((m) >> DP_FBITS) == 0);
 
-	r.parts.sign = s;
-	r.parts.bexp = bx;
-	r.parts.mant = m;
+	r.sign = s;
+	r.bexp = bx;
+	r.mant = m;
+
 	return r;
 }
 
-extern int ieee754dp_isnan(ieee754dp);
-extern int ieee754dp_issnan(ieee754dp);
-extern int ieee754si_xcpt(int, const char *, ...);
-extern s64 ieee754di_xcpt(s64, const char *, ...);
-extern ieee754dp ieee754dp_xcpt(ieee754dp, const char *, ...);
-extern ieee754dp ieee754dp_nanxcpt(ieee754dp, const char *, ...);
-extern ieee754dp ieee754dp_bestnan(ieee754dp, ieee754dp);
-extern ieee754dp ieee754dp_format(int, int, u64);
-
-
-#define DPNORMRET2(s, e, m, name, a0, a1) \
-{ \
-    ieee754dp V = ieee754dp_format(s, e, m); \
-    if(TSTX()) \
-      return ieee754dp_xcpt(V, name, a0, a1); \
-    else \
-      return V; \
-}
-
-#define DPNORMRET1(s, e, m, name, a0)  DPNORMRET2(s, e, m, name, a0, a0)
+extern int ieee754dp_isnan(union ieee754dp);
+extern union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp);
+extern union ieee754dp ieee754dp_format(int, int, u64);

diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 4b6c6fb3..f0365bb 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h

@@ -6,8 +6,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -19,146 +17,125 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
-
+#ifndef __IEEE754INT_H
+#define __IEEE754INT_H
 
 #include "ieee754.h"
 
-#define DP_EBIAS	1023
-#define DP_EMIN		(-1022)
-#define DP_EMAX		1023
-#define DP_MBITS	52
-
-#define SP_EBIAS	127
-#define SP_EMIN		(-126)
-#define SP_EMAX		127
-#define SP_MBITS	23
-
-#define DP_MBIT(x)	((u64)1 << (x))
-#define DP_HIDDEN_BIT	DP_MBIT(DP_MBITS)
-#define DP_SIGN_BIT	DP_MBIT(63)
-
-#define SP_MBIT(x)	((u32)1 << (x))
-#define SP_HIDDEN_BIT	SP_MBIT(SP_MBITS)
-#define SP_SIGN_BIT	SP_MBIT(31)
-
-
-#define SPSIGN(sp)	(sp.parts.sign)
-#define SPBEXP(sp)	(sp.parts.bexp)
-#define SPMANT(sp)	(sp.parts.mant)
-
-#define DPSIGN(dp)	(dp.parts.sign)
-#define DPBEXP(dp)	(dp.parts.bexp)
-#define DPMANT(dp)	(dp.parts.mant)
-
 #define CLPAIR(x, y)	((x)*6+(y))
 
-#define CLEARCX \
-  (ieee754_csr.cx = 0)
+static inline void ieee754_clearcx(void)
+{
+	ieee754_csr.cx = 0;
+}
 
-#define SETCX(x) \
-  (ieee754_csr.cx |= (x), ieee754_csr.sx |= (x))
+static inline void ieee754_setcx(const unsigned int flags)
+{
+	ieee754_csr.cx |= flags;
+	ieee754_csr.sx |= flags;
+}
 
-#define SETANDTESTCX(x) \
-  (SETCX(x), ieee754_csr.mx & (x))
+static inline int ieee754_setandtestcx(const unsigned int x)
+{
+	ieee754_setcx(x);
 
-#define TSTX()	\
-	(ieee754_csr.cx & ieee754_csr.mx)
-
+	return ieee754_csr.mx & x;
+}
 
 #define COMPXSP \
-  unsigned xm; int xe; int xs __maybe_unused; int xc
+	unsigned xm; int xe; int xs __maybe_unused; int xc
 
 #define COMPYSP \
-  unsigned ym; int ye; int ys; int yc
+	unsigned ym; int ye; int ys; int yc
 
-#define EXPLODESP(v, vc, vs, ve, vm) \
-{\
-    vs = SPSIGN(v);\
-    ve = SPBEXP(v);\
-    vm = SPMANT(v);\
-    if(ve == SP_EMAX+1+SP_EBIAS){\
-	if(vm == 0)\
-	  vc = IEEE754_CLASS_INF;\
-	else if(vm & SP_MBIT(SP_MBITS-1)) \
-	  vc = IEEE754_CLASS_SNAN;\
-	else \
-	  vc = IEEE754_CLASS_QNAN;\
-    } else if(ve == SP_EMIN-1+SP_EBIAS) {\
-	if(vm) {\
-	    ve = SP_EMIN;\
-	    vc = IEEE754_CLASS_DNORM;\
-	} else\
-	  vc = IEEE754_CLASS_ZERO;\
-    } else {\
-	ve -= SP_EBIAS;\
-	vm |= SP_HIDDEN_BIT;\
-	vc = IEEE754_CLASS_NORM;\
-    }\
+#define EXPLODESP(v, vc, vs, ve, vm)					\
+{									\
+	vs = SPSIGN(v);							\
+	ve = SPBEXP(v);							\
+	vm = SPMANT(v);							\
+	if (ve == SP_EMAX+1+SP_EBIAS) {					\
+		if (vm == 0)						\
+			vc = IEEE754_CLASS_INF;				\
+		else if (vm & SP_MBIT(SP_FBITS-1))			\
+			vc = IEEE754_CLASS_SNAN;			\
+	else								\
+		vc = IEEE754_CLASS_QNAN;				\
+	} else if (ve == SP_EMIN-1+SP_EBIAS) {				\
+		if (vm) {						\
+			ve = SP_EMIN;					\
+			vc = IEEE754_CLASS_DNORM;			\
+		} else							\
+			vc = IEEE754_CLASS_ZERO;			\
+	} else {							\
+		ve -= SP_EBIAS;						\
+		vm |= SP_HIDDEN_BIT;					\
+		vc = IEEE754_CLASS_NORM;				\
+	}								\
 }
 #define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm)
 #define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym)
 
 
 #define COMPXDP \
-u64 xm; int xe; int xs __maybe_unused; int xc
+	u64 xm; int xe; int xs __maybe_unused; int xc
 
 #define COMPYDP \
-u64 ym; int ye; int ys; int yc
+	u64 ym; int ye; int ys; int yc
 
-#define EXPLODEDP(v, vc, vs, ve, vm) \
-{\
-    vm = DPMANT(v);\
-    vs = DPSIGN(v);\
-    ve = DPBEXP(v);\
-    if(ve == DP_EMAX+1+DP_EBIAS){\
-	if(vm == 0)\
-	  vc = IEEE754_CLASS_INF;\
-	else if(vm & DP_MBIT(DP_MBITS-1)) \
-	  vc = IEEE754_CLASS_SNAN;\
-	else \
-	  vc = IEEE754_CLASS_QNAN;\
-    } else if(ve == DP_EMIN-1+DP_EBIAS) {\
-	if(vm) {\
-	    ve = DP_EMIN;\
-	    vc = IEEE754_CLASS_DNORM;\
-	} else\
-	  vc = IEEE754_CLASS_ZERO;\
-    } else {\
-	ve -= DP_EBIAS;\
-	vm |= DP_HIDDEN_BIT;\
-	vc = IEEE754_CLASS_NORM;\
-    }\
+#define EXPLODEDP(v, vc, vs, ve, vm)					\
+{									\
+	vm = DPMANT(v);							\
+	vs = DPSIGN(v);							\
+	ve = DPBEXP(v);							\
+	if (ve == DP_EMAX+1+DP_EBIAS) {					\
+		if (vm == 0)						\
+			vc = IEEE754_CLASS_INF;				\
+		else if (vm & DP_MBIT(DP_FBITS-1))			\
+			vc = IEEE754_CLASS_SNAN;			\
+		else							\
+			vc = IEEE754_CLASS_QNAN;			\
+	} else if (ve == DP_EMIN-1+DP_EBIAS) {				\
+		if (vm) {						\
+			ve = DP_EMIN;					\
+			vc = IEEE754_CLASS_DNORM;			\
+	} else								\
+		vc = IEEE754_CLASS_ZERO;				\
+	} else {							\
+		ve -= DP_EBIAS;						\
+		vm |= DP_HIDDEN_BIT;					\
+		vc = IEEE754_CLASS_NORM;				\
+	}								\
 }
 #define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm)
 #define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym)
 
-#define FLUSHDP(v, vc, vs, ve, vm) \
-	if(vc==IEEE754_CLASS_DNORM) {\
-	    if(ieee754_csr.nod) {\
-		SETCX(IEEE754_INEXACT);\
-		vc = IEEE754_CLASS_ZERO;\
-		ve = DP_EMIN-1+DP_EBIAS;\
-		vm = 0;\
-		v = ieee754dp_zero(vs);\
-	    }\
+#define FLUSHDP(v, vc, vs, ve, vm)					\
+	if (vc==IEEE754_CLASS_DNORM) {					\
+		if (ieee754_csr.nod) {					\
+			ieee754_setcx(IEEE754_INEXACT);			\
+			vc = IEEE754_CLASS_ZERO;			\
+			ve = DP_EMIN-1+DP_EBIAS;			\
+			vm = 0;						\
+			v = ieee754dp_zero(vs);				\
+		}							\
 	}
 
-#define FLUSHSP(v, vc, vs, ve, vm) \
-	if(vc==IEEE754_CLASS_DNORM) {\
-	    if(ieee754_csr.nod) {\
-		SETCX(IEEE754_INEXACT);\
-		vc = IEEE754_CLASS_ZERO;\
-		ve = SP_EMIN-1+SP_EBIAS;\
-		vm = 0;\
-		v = ieee754sp_zero(vs);\
-	    }\
+#define FLUSHSP(v, vc, vs, ve, vm)					\
+	if (vc==IEEE754_CLASS_DNORM) {					\
+		if (ieee754_csr.nod) {					\
+			ieee754_setcx(IEEE754_INEXACT);			\
+			vc = IEEE754_CLASS_ZERO;			\
+			ve = SP_EMIN-1+SP_EBIAS;			\
+			vm = 0;						\
+			v = ieee754sp_zero(vs);				\
+		}							\
 	}
 
 #define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm)
 #define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym)
 #define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm)
 #define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym)
+
+#endif /* __IEEE754INT_H  */

diff --git a/arch/mips/math-emu/ieee754m.c b/arch/mips/math-emu/ieee754m.c
deleted file mode 100644
index 24190f3..0000000
--- a/arch/mips/math-emu/ieee754m.c
+++ /dev/null

@@ -1,55 +0,0 @@
-/*
- * floor, trunc, ceil
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754.h"
-
-ieee754dp ieee754dp_floor(ieee754dp x)
-{
-	ieee754dp i;
-
-	if (ieee754dp_lt(ieee754dp_modf(x, &i), ieee754dp_zero(0)))
-		return ieee754dp_sub(i, ieee754dp_one(0));
-	else
-		return i;
-}
-
-ieee754dp ieee754dp_ceil(ieee754dp x)
-{
-	ieee754dp i;
-
-	if (ieee754dp_gt(ieee754dp_modf(x, &i), ieee754dp_zero(0)))
-		return ieee754dp_add(i, ieee754dp_one(0));
-	else
-		return i;
-}
-
-ieee754dp ieee754dp_trunc(ieee754dp x)
-{
-	ieee754dp i;
-
-	(void) ieee754dp_modf(x, &i);
-	return i;
-}

diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index 15d1e36..d348efe 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,105 +16,68 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
+#include <linux/compiler.h>
 
 #include "ieee754sp.h"
 
-int ieee754sp_class(ieee754sp x)
+int ieee754sp_class(union ieee754sp x)
 {
 	COMPXSP;
 	EXPLODEXSP;
 	return xc;
 }
 
-int ieee754sp_isnan(ieee754sp x)
+int ieee754sp_isnan(union ieee754sp x)
 {
 	return ieee754sp_class(x) >= IEEE754_CLASS_SNAN;
 }
 
-int ieee754sp_issnan(ieee754sp x)
+static inline int ieee754sp_issnan(union ieee754sp x)
 {
 	assert(ieee754sp_isnan(x));
-	return (SPMANT(x) & SP_MBIT(SP_MBITS-1));
+	return (SPMANT(x) & SP_MBIT(SP_FBITS-1));
 }
 
 
-ieee754sp ieee754sp_xcpt(ieee754sp r, const char *op, ...)
+union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
 {
-	struct ieee754xctx ax;
-
-	if (!TSTX())
-		return r;
-
-	ax.op = op;
-	ax.rt = IEEE754_RT_SP;
-	ax.rv.sp = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.sp;
-}
-
-ieee754sp ieee754sp_nanxcpt(ieee754sp r, const char *op, ...)
-{
-	struct ieee754xctx ax;
-
 	assert(ieee754sp_isnan(r));
 
 	if (!ieee754sp_issnan(r))	/* QNAN does not cause invalid op !! */
 		return r;
 
-	if (!SETANDTESTCX(IEEE754_INVALID_OPERATION)) {
+	if (!ieee754_setandtestcx(IEEE754_INVALID_OPERATION)) {
 		/* not enabled convert to a quiet NaN */
-		SPMANT(r) &= (~SP_MBIT(SP_MBITS-1));
+		SPMANT(r) &= (~SP_MBIT(SP_FBITS-1));
 		if (ieee754sp_isnan(r))
 			return r;
 		else
 			return ieee754sp_indef();
 	}
 
-	ax.op = op;
-	ax.rt = 0;
-	ax.rv.sp = r;
-	va_start(ax.ap, op);
-	ieee754_xcpt(&ax);
-	va_end(ax.ap);
-	return ax.rv.sp;
+	return r;
 }
 
-ieee754sp ieee754sp_bestnan(ieee754sp x, ieee754sp y)
-{
-	assert(ieee754sp_isnan(x));
-	assert(ieee754sp_isnan(y));
-
-	if (SPMANT(x) > SPMANT(y))
-		return x;
-	else
-		return y;
-}
-
-
-static unsigned get_rounding(int sn, unsigned xm)
+static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
 {
 	/* inexact must round of 3 bits
 	 */
 	if (xm & (SP_MBIT(3) - 1)) {
 		switch (ieee754_csr.rm) {
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			xm += 0x3 + ((xm >> 3) & 1);
 			/* xm += (xm&0x8)?0x4:0x3 */
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if (!sn)	/* ?? */
 				xm += 0x8;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if (sn) /* ?? */
 				xm += 0x8;
 			break;
@@ -131,11 +92,11 @@
  * xe is an unbiased exponent
  * xm is 3bit extended precision value.
  */
-ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
+union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
 {
 	assert(xm);		/* we don't gen exact zeros (probably should) */
 
-	assert((xm >> (SP_MBITS + 1 + 3)) == 0);	/* no execess */
+	assert((xm >> (SP_FBITS + 1 + 3)) == 0);	/* no execess */
 	assert(xm & (SP_HIDDEN_BIT << 3));
 
 	if (xe < SP_EMIN) {
@@ -143,38 +104,37 @@
 		int es = SP_EMIN - xe;
 
 		if (ieee754_csr.nod) {
-			SETCX(IEEE754_UNDERFLOW);
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_INEXACT);
 
 			switch(ieee754_csr.rm) {
-			case IEEE754_RN:
-			case IEEE754_RZ:
+			case FPU_CSR_RN:
+			case FPU_CSR_RZ:
 				return ieee754sp_zero(sn);
-			case IEEE754_RU:      /* toward +Infinity */
-				if(sn == 0)
+			case FPU_CSR_RU:      /* toward +Infinity */
+				if (sn == 0)
 					return ieee754sp_min(0);
 				else
 					return ieee754sp_zero(1);
-			case IEEE754_RD:      /* toward -Infinity */
-				if(sn == 0)
+			case FPU_CSR_RD:      /* toward -Infinity */
+				if (sn == 0)
 					return ieee754sp_zero(0);
 				else
 					return ieee754sp_min(1);
 			}
 		}
 
-		if (xe == SP_EMIN - 1
-				&& get_rounding(sn, xm) >> (SP_MBITS + 1 + 3))
+		if (xe == SP_EMIN - 1 &&
+		    ieee754sp_get_rounding(sn, xm) >> (SP_FBITS + 1 + 3))
 		{
 			/* Not tiny after rounding */
-			SETCX(IEEE754_INEXACT);
-			xm = get_rounding(sn, xm);
+			ieee754_setcx(IEEE754_INEXACT);
+			xm = ieee754sp_get_rounding(sn, xm);
 			xm >>= 1;
 			/* Clear grs bits */
 			xm &= ~(SP_MBIT(3) - 1);
 			xe++;
-		}
-		else {
+		} else {
 			/* sticky right shift es bits
 			 */
 			SPXSRSXn(es);
@@ -183,17 +143,17 @@
 		}
 	}
 	if (xm & (SP_MBIT(3) - 1)) {
-		SETCX(IEEE754_INEXACT);
+		ieee754_setcx(IEEE754_INEXACT);
 		if ((xm & (SP_HIDDEN_BIT << 3)) == 0) {
-			SETCX(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_UNDERFLOW);
 		}
 
 		/* inexact must round of 3 bits
 		 */
-		xm = get_rounding(sn, xm);
+		xm = ieee754sp_get_rounding(sn, xm);
 		/* adjust exponent for rounding add overflowing
 		 */
-		if (xm >> (SP_MBITS + 1 + 3)) {
+		if (xm >> (SP_FBITS + 1 + 3)) {
 			/* add causes mantissa overflow */
 			xm >>= 1;
 			xe++;
@@ -202,24 +162,24 @@
 	/* strip grs bits */
 	xm >>= 3;
 
-	assert((xm >> (SP_MBITS + 1)) == 0);	/* no execess */
+	assert((xm >> (SP_FBITS + 1)) == 0);	/* no execess */
 	assert(xe >= SP_EMIN);
 
 	if (xe > SP_EMAX) {
-		SETCX(IEEE754_OVERFLOW);
-		SETCX(IEEE754_INEXACT);
+		ieee754_setcx(IEEE754_OVERFLOW);
+		ieee754_setcx(IEEE754_INEXACT);
 		/* -O can be table indexed by (rm,sn) */
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			return ieee754sp_inf(sn);
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			return ieee754sp_max(sn);
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if (sn == 0)
 				return ieee754sp_inf(0);
 			else
 				return ieee754sp_max(1);
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if (sn == 0)
 				return ieee754sp_max(0);
 			else
@@ -232,10 +192,10 @@
 		/* we underflow (tiny/zero) */
 		assert(xe == SP_EMIN);
 		if (ieee754_csr.mx & IEEE754_UNDERFLOW)
-			SETCX(IEEE754_UNDERFLOW);
+			ieee754_setcx(IEEE754_UNDERFLOW);
 		return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm);
 	} else {
-		assert((xm >> (SP_MBITS + 1)) == 0);	/* no execess */
+		assert((xm >> (SP_FBITS + 1)) == 0);	/* no execess */
 		assert(xm & SP_HIDDEN_BIT);
 
 		return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT);

diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 754fd54..ad268e3 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h

@@ -6,8 +6,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -19,70 +17,71 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
+#include <linux/compiler.h>
 
 #include "ieee754int.h"
 
 #define assert(expr) ((void)0)
 
+#define SP_EBIAS	127
+#define SP_EMIN		(-126)
+#define SP_EMAX		127
+#define SP_FBITS	23
+#define SP_MBITS	23
+
+#define SP_MBIT(x)	((u32)1 << (x))
+#define SP_HIDDEN_BIT	SP_MBIT(SP_FBITS)
+#define SP_SIGN_BIT	SP_MBIT(31)
+
+#define SPSIGN(sp)	(sp.sign)
+#define SPBEXP(sp)	(sp.bexp)
+#define SPMANT(sp)	(sp.mant)
+
+static inline int ieee754sp_finite(union ieee754sp x)
+{
+	return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
+}
+
 /* 3bit extended single precision sticky right shift */
-#define SPXSRSXn(rs) \
-  (xe += rs, \
-   xm = (rs > (SP_MBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0))
+#define SPXSRSXn(rs)							\
+	(xe += rs,							\
+	 xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0))
 
 #define SPXSRSX1() \
-  (xe++, (xm = (xm >> 1) | (xm & 1)))
+	(xe++, (xm = (xm >> 1) | (xm & 1)))
 
-#define SPXSRSYn(rs) \
-   (ye+=rs, \
-    ym = (rs > (SP_MBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0))
+#define SPXSRSYn(rs)								\
+	(ye+=rs,								\
+	 ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0))
 
 #define SPXSRSY1() \
-   (ye++, (ym = (ym >> 1) | (ym & 1)))
+	(ye++, (ym = (ym >> 1) | (ym & 1)))
 
 /* convert denormal to normalized with extended exponent */
 #define SPDNORMx(m,e) \
-  while( (m >> SP_MBITS) == 0) { m <<= 1; e--; }
+	while ((m >> SP_FBITS) == 0) { m <<= 1; e--; }
 #define SPDNORMX	SPDNORMx(xm, xe)
 #define SPDNORMY	SPDNORMx(ym, ye)
 
-static inline ieee754sp buildsp(int s, int bx, unsigned m)
+static inline union ieee754sp buildsp(int s, int bx, unsigned m)
 {
-	ieee754sp r;
+	union ieee754sp r;
 
 	assert((s) == 0 || (s) == 1);
 	assert((bx) >= SP_EMIN - 1 + SP_EBIAS
 	       && (bx) <= SP_EMAX + 1 + SP_EBIAS);
-	assert(((m) >> SP_MBITS) == 0);
+	assert(((m) >> SP_FBITS) == 0);
 
-	r.parts.sign = s;
-	r.parts.bexp = bx;
-	r.parts.mant = m;
+	r.sign = s;
+	r.bexp = bx;
+	r.mant = m;
 
 	return r;
 }
 
-extern int ieee754sp_isnan(ieee754sp);
-extern int ieee754sp_issnan(ieee754sp);
-extern int ieee754si_xcpt(int, const char *, ...);
-extern s64 ieee754di_xcpt(s64, const char *, ...);
-extern ieee754sp ieee754sp_xcpt(ieee754sp, const char *, ...);
-extern ieee754sp ieee754sp_nanxcpt(ieee754sp, const char *, ...);
-extern ieee754sp ieee754sp_bestnan(ieee754sp, ieee754sp);
-extern ieee754sp ieee754sp_format(int, int, unsigned);
-
-
-#define SPNORMRET2(s, e, m, name, a0, a1) \
-{ \
-    ieee754sp V = ieee754sp_format(s, e, m); \
-    if(TSTX()) \
-      return ieee754sp_xcpt(V, name, a0, a1); \
-    else \
-      return V; \
-}
-
-#define SPNORMRET1(s, e, m, name, a0)  SPNORMRET2(s, e, m, name, a0, a0)
+extern int ieee754sp_isnan(union ieee754sp);
+extern union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp);
+extern union ieee754sp ieee754sp_format(int, int, unsigned);

diff --git a/arch/mips/math-emu/ieee754xcpt.c b/arch/mips/math-emu/ieee754xcpt.c
deleted file mode 100644
index 9671671..0000000
--- a/arch/mips/math-emu/ieee754xcpt.c
+++ /dev/null

@@ -1,47 +0,0 @@
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-/**************************************************************************
- *  Nov 7, 2000
- *  Added preprocessor hacks to map to Linux kernel diagnostics.
- *
- *  Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
- *  Copyright (C) 2000 MIPS Technologies, Inc.	All rights reserved.
- *************************************************************************/
-
-#include <linux/kernel.h>
-#include "ieee754.h"
-
-/*
- * Very naff exception handler (you can plug in your own and
- * override this).
- */
-
-static const char *const rtnames[] = {
-	"sp", "dp", "xp", "si", "di"
-};
-
-void ieee754_xcpt(struct ieee754xctx *xcp)
-{
-	printk(KERN_DEBUG "floating point exception in \"%s\", type=%s\n",
-		xcp->op, rtnames[xcp->rt]);
-}

diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
deleted file mode 100644
index eb58a85..0000000
--- a/arch/mips/math-emu/kernel_linkage.c
+++ /dev/null

@@ -1,45 +0,0 @@
-/*
- *  Kevin D. Kissell, kevink@mips and Carsten Langgaard, carstenl@mips.com
- *  Copyright (C) 2000 MIPS Technologies, Inc.	All rights reserved.
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Routines corresponding to Linux kernel FP context
- * manipulation primitives for the Algorithmics MIPS
- * FPU Emulator
- */
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <asm/signal.h>
-#include <asm/uaccess.h>
-
-#include <asm/fpu.h>
-#include <asm/fpu_emulator.h>
-
-#define SIGNALLING_NAN 0x7ff800007ff80000LL
-
-void fpu_emulator_init_fpu(void)
-{
-	static int first = 1;
-	int i;
-
-	if (first) {
-		first = 0;
-		printk("Algorithmics/MIPS FPU Emulator v1.5\n");
-	}
-
-	current->thread.fpu.fcr31 = 0;
-	for (i = 0; i < 32; i++)
-		set_fpr64(&current->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
-}

diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
new file mode 100644
index 0000000..becdd63
--- /dev/null
+++ b/arch/mips/math-emu/me-debugfs.c

@@ -0,0 +1,67 @@
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <asm/fpu_emulator.h>
+#include <asm/local.h>
+
+DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
+
+static int fpuemu_stat_get(void *data, u64 *val)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_online_cpu(cpu) {
+		struct mips_fpu_emulator_stats *ps;
+		local_t *pv;
+
+		ps = &per_cpu(fpuemustats, cpu);
+		pv = (void *)ps + (unsigned long)data;
+		sum += local_read(pv);
+	}
+	*val = sum;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
+
+extern struct dentry *mips_debugfs_dir;
+static int __init debugfs_fpuemu(void)
+{
+	struct dentry *d, *dir;
+
+	if (!mips_debugfs_dir)
+		return -ENODEV;
+	dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
+	if (!dir)
+		return -ENOMEM;
+
+#define FPU_EMU_STAT_OFFSET(m)						\
+	offsetof(struct mips_fpu_emulator_stats, m)
+
+#define FPU_STAT_CREATE(m)						\
+do {									\
+	d = debugfs_create_file(#m , S_IRUGO, dir,			\
+				(void *)FPU_EMU_STAT_OFFSET(m),		\
+				&fops_fpuemu_stat);			\
+	if (!d)								\
+		return -ENOMEM;						\
+} while (0)
+
+	FPU_STAT_CREATE(emulated);
+	FPU_STAT_CREATE(loads);
+	FPU_STAT_CREATE(stores);
+	FPU_STAT_CREATE(cp1ops);
+	FPU_STAT_CREATE(cp1xops);
+	FPU_STAT_CREATE(errors);
+	FPU_STAT_CREATE(ieee754_inexact);
+	FPU_STAT_CREATE(ieee754_underflow);
+	FPU_STAT_CREATE(ieee754_overflow);
+	FPU_STAT_CREATE(ieee754_zerodiv);
+	FPU_STAT_CREATE(ieee754_invalidop);
+
+	return 0;
+}
+__initcall(debugfs_fpuemu);

diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index c446e64..2d84d46 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,22 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_add(ieee754sp x, ieee754sp y)
+union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
 {
+	int s;
+
 	COMPXSP;
 	COMPYSP;
 
 	EXPLODEXSP;
 	EXPLODEYSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXSP;
 	FLUSHYSP;
@@ -51,8 +48,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "add", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,14 +65,14 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 		if (xs == ys)
 			return x;
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_xcpt(ieee754sp_indef(), "add", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -87,15 +84,14 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return x;
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 		if (xs == ys)
 			return x;
 		else
-			return ieee754sp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
@@ -108,6 +104,8 @@
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
 
+		/* FALL THROUGH */
+
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
@@ -122,33 +120,38 @@
 	assert(xm & SP_HIDDEN_BIT);
 	assert(ym & SP_HIDDEN_BIT);
 
-	/* provide guard,round and stick bit space */
+	/*
+	 * Provide guard, round and stick bit space.
+	 */
 	xm <<= 3;
 	ym <<= 3;
 
 	if (xe > ye) {
-		/* have to shift y fraction right to align
+		/*
+		 * Have to shift y fraction right to align.
 		 */
-		int s = xe - ye;
+		s = xe - ye;
 		SPXSRSYn(s);
 	} else if (ye > xe) {
-		/* have to shift x fraction right to align
+		/*
+		 * Have to shift x fraction right to align.
 		 */
-		int s = ye - xe;
+		s = ye - xe;
 		SPXSRSXn(s);
 	}
 	assert(xe == ye);
 	assert(xe <= SP_EMAX);
 
 	if (xs == ys) {
-		/* generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in xm,xs,xe
+		/*
+		 * Generate 28 bit result of adding two 27 bit numbers
+		 * leaving result in xm, xs and xe.
 		 */
 		xm = xm + ym;
 		xe = xe;
 		xs = xs;
 
-		if (xm >> (SP_MBITS + 1 + 3)) { /* carry out */
+		if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */
 			SPXSRSX1();
 		}
 	} else {
@@ -162,15 +165,16 @@
 			xs = ys;
 		}
 		if (xm == 0)
-			return ieee754sp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
-		/* normalize in extended single precision */
-		while ((xm >> (SP_MBITS + 3)) == 0) {
+		/*
+		 * Normalize in extended single precision
+		 */
+		while ((xm >> (SP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
-
 	}
-	SPNORMRET2(xs, xe, xm, "add", x, y);
+
+	return ieee754sp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/sp_cmp.c b/arch/mips/math-emu/sp_cmp.c
index 716cf37..addbccb 100644
--- a/arch/mips/math-emu/sp_cmp.c
+++ b/arch/mips/math-emu/sp_cmp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,16 +16,16 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-int ieee754sp_cmp(ieee754sp x, ieee754sp y, int cmp, int sig)
+int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cmp, int sig)
 {
+	int vx;
+	int vy;
+
 	COMPXSP;
 	COMPYSP;
 
@@ -35,21 +33,21 @@
 	EXPLODEYSP;
 	FLUSHXSP;
 	FLUSHYSP;
-	CLEARCX;	/* Even clear inexact flag here */
+	ieee754_clearcx();	/* Even clear inexact flag here */
 
 	if (ieee754sp_isnan(x) || ieee754sp_isnan(y)) {
 		if (sig || xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
-			SETCX(IEEE754_INVALID_OPERATION);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
 		if (cmp & IEEE754_CUN)
 			return 1;
 		if (cmp & (IEEE754_CLT | IEEE754_CGT)) {
-			if (sig && SETANDTESTCX(IEEE754_INVALID_OPERATION))
-				return ieee754si_xcpt(0, "fcmpf", x);
+			if (sig && ieee754_setandtestcx(IEEE754_INVALID_OPERATION))
+				return 0;
 		}
 		return 0;
 	} else {
-		int vx = x.bits;
-		int vy = y.bits;
+		vx = x.bits;
+		vy = y.bits;
 
 		if (vx < 0)
 			vx = -vx ^ SP_SIGN_BIT;

diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c
index d774792..721f317 100644
--- a/arch/mips/math-emu/sp_div.c
+++ b/arch/mips/math-emu/sp_div.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,24 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_div(ieee754sp x, ieee754sp y)
+union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
 {
+	unsigned rm;
+	int re;
+	unsigned bm;
+
 	COMPXSP;
 	COMPYSP;
 
 	EXPLODEXSP;
 	EXPLODEYSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXSP;
 	FLUSHYSP;
@@ -51,8 +50,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,12 +67,12 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_xcpt(ieee754sp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -85,17 +84,17 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return ieee754sp_inf(xs ^ ys);
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_xcpt(ieee754sp_indef(), "div", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		SETCX(IEEE754_ZERO_DIVIDE);
-		return ieee754sp_xcpt(ieee754sp_inf(xs ^ ys), "div", x, y);
+		ieee754_setcx(IEEE754_ZERO_DIVIDE);
+		return ieee754sp_inf(xs ^ ys);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
@@ -122,35 +121,33 @@
 	xm <<= 3;
 	ym <<= 3;
 
-	{
-		/* now the dirty work */
+	/* now the dirty work */
 
-		unsigned rm = 0;
-		int re = xe - ye;
-		unsigned bm;
+	rm = 0;
+	re = xe - ye;
 
-		for (bm = SP_MBIT(SP_MBITS + 2); bm; bm >>= 1) {
-			if (xm >= ym) {
-				xm -= ym;
-				rm |= bm;
-				if (xm == 0)
-					break;
-			}
-			xm <<= 1;
+	for (bm = SP_MBIT(SP_FBITS + 2); bm; bm >>= 1) {
+		if (xm >= ym) {
+			xm -= ym;
+			rm |= bm;
+			if (xm == 0)
+				break;
 		}
-		rm <<= 1;
-		if (xm)
-			rm |= 1;	/* have remainder, set sticky */
-
-		assert(rm);
-
-		/* normalise rm to rounding precision ?
-		 */
-		while ((rm >> (SP_MBITS + 3)) == 0) {
-			rm <<= 1;
-			re--;
-		}
-
-		SPNORMRET2(xs == ys ? 0 : 1, re, rm, "div", x, y);
+		xm <<= 1;
 	}
+
+	rm <<= 1;
+	if (xm)
+		rm |= 1;	/* have remainder, set sticky */
+
+	assert(rm);
+
+	/* normalise rm to rounding precision ?
+	 */
+	while ((rm >> (SP_FBITS + 3)) == 0) {
+		rm <<= 1;
+		re--;
+	}
+
+	return ieee754sp_format(xs == ys ? 0 : 1, re, rm);
 }

diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index e1515aa..1b266fb 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,59 +16,61 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
+#include "ieee754dp.h"
 
-ieee754sp ieee754sp_fdp(ieee754dp x)
+union ieee754sp ieee754sp_fdp(union ieee754dp x)
 {
+	u32 rm;
+
 	COMPXDP;
-	ieee754sp nan;
+	union ieee754sp nan;
 
 	EXPLODEXDP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXDP;
 
 	switch (xc) {
 	case IEEE754_CLASS_SNAN:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "fdp");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
+
 	case IEEE754_CLASS_QNAN:
 		nan = buildsp(xs, SP_EMAX + 1 + SP_EBIAS, (u32)
-				(xm >> (DP_MBITS - SP_MBITS)));
+				(xm >> (DP_FBITS - SP_FBITS)));
 		if (!ieee754sp_isnan(nan))
 			nan = ieee754sp_indef();
-		return ieee754sp_nanxcpt(nan, "fdp", x);
+		return ieee754sp_nanxcpt(nan);
+
 	case IEEE754_CLASS_INF:
 		return ieee754sp_inf(xs);
+
 	case IEEE754_CLASS_ZERO:
 		return ieee754sp_zero(xs);
+
 	case IEEE754_CLASS_DNORM:
 		/* can't possibly be sp representable */
-		SETCX(IEEE754_UNDERFLOW);
-		SETCX(IEEE754_INEXACT);
-		if ((ieee754_csr.rm == IEEE754_RU && !xs) ||
-				(ieee754_csr.rm == IEEE754_RD && xs))
-			return ieee754sp_xcpt(ieee754sp_mind(xs), "fdp", x);
-		return ieee754sp_xcpt(ieee754sp_zero(xs), "fdp", x);
+		ieee754_setcx(IEEE754_UNDERFLOW);
+		ieee754_setcx(IEEE754_INEXACT);
+		if ((ieee754_csr.rm == FPU_CSR_RU && !xs) ||
+				(ieee754_csr.rm == FPU_CSR_RD && xs))
+			return ieee754sp_mind(xs);
+		return ieee754sp_zero(xs);
+
 	case IEEE754_CLASS_NORM:
 		break;
 	}
 
-	{
-		u32 rm;
+	/*
+	 * Convert from DP_FBITS to SP_FBITS+3 with sticky right shift.
+	 */
+	rm = (xm >> (DP_FBITS - (SP_FBITS + 3))) |
+	     ((xm << (64 - (DP_FBITS - (SP_FBITS + 3)))) != 0);
 
-		/* convert from DP_MBITS to SP_MBITS+3 with sticky right shift
-		 */
-		rm = (xm >> (DP_MBITS - (SP_MBITS + 3))) |
-		    ((xm << (64 - (DP_MBITS - (SP_MBITS + 3)))) != 0);
-
-		SPNORMRET1(xs, xe, rm, "fdp", x);
-	}
+	return ieee754sp_format(xs, xe, rm);
 }

diff --git a/arch/mips/math-emu/sp_fint.c b/arch/mips/math-emu/sp_fint.c
index 9694d6c..d5d8495 100644
--- a/arch/mips/math-emu/sp_fint.c
+++ b/arch/mips/math-emu/sp_fint.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,21 +16,18 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_fint(int x)
+union ieee754sp ieee754sp_fint(int x)
 {
 	unsigned xm;
 	int xe;
 	int xs;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	if (x == 0)
 		return ieee754sp_zero(0);
@@ -50,30 +45,21 @@
 	} else {
 		xm = x;
 	}
-	xe = SP_MBITS + 3;
+	xe = SP_FBITS + 3;
 
-	if (xm >> (SP_MBITS + 1 + 3)) {
+	if (xm >> (SP_FBITS + 1 + 3)) {
 		/* shunt out overflow bits
 		 */
-		while (xm >> (SP_MBITS + 1 + 3)) {
+		while (xm >> (SP_FBITS + 1 + 3)) {
 			SPXSRSX1();
 		}
 	} else {
 		/* normalize in grs extended single precision
 		 */
-		while ((xm >> (SP_MBITS + 3)) == 0) {
+		while ((xm >> (SP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 	}
-	SPNORMRET1(xs, xe, xm, "fint", x);
-}
-
-
-ieee754sp ieee754sp_funs(unsigned int u)
-{
-	if ((int) u < 0)
-		return ieee754sp_add(ieee754sp_1e31(),
-				     ieee754sp_fint(u & ~(1 << 31)));
-	return ieee754sp_fint(u);
+	return ieee754sp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/sp_flong.c b/arch/mips/math-emu/sp_flong.c
index 16a651f..012e30c 100644
--- a/arch/mips/math-emu/sp_flong.c
+++ b/arch/mips/math-emu/sp_flong.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,21 +16,18 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_flong(s64 x)
+union ieee754sp ieee754sp_flong(s64 x)
 {
 	u64 xm;		/* <--- need 64-bit mantissa temp */
 	int xe;
 	int xs;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	if (x == 0)
 		return ieee754sp_zero(0);
@@ -50,29 +45,20 @@
 	} else {
 		xm = x;
 	}
-	xe = SP_MBITS + 3;
+	xe = SP_FBITS + 3;
 
-	if (xm >> (SP_MBITS + 1 + 3)) {
+	if (xm >> (SP_FBITS + 1 + 3)) {
 		/* shunt out overflow bits
 		 */
-		while (xm >> (SP_MBITS + 1 + 3)) {
+		while (xm >> (SP_FBITS + 1 + 3)) {
 			SPXSRSX1();
 		}
 	} else {
 		/* normalize in grs extended single precision */
-		while ((xm >> (SP_MBITS + 3)) == 0) {
+		while ((xm >> (SP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 	}
-	SPNORMRET1(xs, xe, xm, "sp_flong", x);
-}
-
-
-ieee754sp ieee754sp_fulong(u64 u)
-{
-	if ((s64) u < 0)
-		return ieee754sp_add(ieee754sp_1e63(),
-				     ieee754sp_flong(u & ~(1ULL << 63)));
-	return ieee754sp_flong(u);
+	return ieee754sp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/sp_frexp.c b/arch/mips/math-emu/sp_frexp.c
deleted file mode 100644
index 5bc993c..0000000
--- a/arch/mips/math-emu/sp_frexp.c
+++ /dev/null

@@ -1,52 +0,0 @@
-/* IEEE754 floating point arithmetic
- * single precision
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754sp.h"
-
-/* close to ieeep754sp_logb
-*/
-ieee754sp ieee754sp_frexp(ieee754sp x, int *eptr)
-{
-	COMPXSP;
-	CLEARCX;
-	EXPLODEXSP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		*eptr = 0;
-		return x;
-	case IEEE754_CLASS_DNORM:
-		SPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	*eptr = xe + 1;
-	return buildsp(xs, -1 + SP_EBIAS, xm & ~SP_HIDDEN_BIT);
-}

diff --git a/arch/mips/math-emu/sp_logb.c b/arch/mips/math-emu/sp_logb.c
deleted file mode 100644
index 9c14e0c..0000000
--- a/arch/mips/math-emu/sp_logb.c
+++ /dev/null

@@ -1,53 +0,0 @@
-/* IEEE754 floating point arithmetic
- * single precision
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754sp.h"
-
-ieee754sp ieee754sp_logb(ieee754sp x)
-{
-	COMPXSP;
-
-	CLEARCX;
-
-	EXPLODEXSP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-		return ieee754sp_nanxcpt(x, "logb", x);
-	case IEEE754_CLASS_QNAN:
-		return x;
-	case IEEE754_CLASS_INF:
-		return ieee754sp_inf(0);
-	case IEEE754_CLASS_ZERO:
-		return ieee754sp_inf(1);
-	case IEEE754_CLASS_DNORM:
-		SPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	return ieee754sp_fint(xe);
-}

diff --git a/arch/mips/math-emu/sp_modf.c b/arch/mips/math-emu/sp_modf.c
deleted file mode 100644
index 25a0fba..0000000
--- a/arch/mips/math-emu/sp_modf.c
+++ /dev/null

@@ -1,79 +0,0 @@
-/* IEEE754 floating point arithmetic
- * single precision
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754sp.h"
-
-/* modf function is always exact for a finite number
-*/
-ieee754sp ieee754sp_modf(ieee754sp x, ieee754sp *ip)
-{
-	COMPXSP;
-
-	CLEARCX;
-
-	EXPLODEXSP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		*ip = x;
-		return x;
-	case IEEE754_CLASS_DNORM:
-		/* far to small */
-		*ip = ieee754sp_zero(xs);
-		return x;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	if (xe < 0) {
-		*ip = ieee754sp_zero(xs);
-		return x;
-	}
-	if (xe >= SP_MBITS) {
-		*ip = x;
-		return ieee754sp_zero(xs);
-	}
-	/* generate ipart mantissa by clearing bottom bits
-	 */
-	*ip = buildsp(xs, xe + SP_EBIAS,
-		      ((xm >> (SP_MBITS - xe)) << (SP_MBITS - xe)) &
-		      ~SP_HIDDEN_BIT);
-
-	/* generate fpart mantissa by clearing top bits
-	 * and normalizing (must be able to normalize)
-	 */
-	xm = (xm << (32 - (SP_MBITS - xe))) >> (32 - (SP_MBITS - xe));
-	if (xm == 0)
-		return ieee754sp_zero(xs);
-
-	while ((xm >> SP_MBITS) == 0) {
-		xm <<= 1;
-		xe--;
-	}
-	return buildsp(xs, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT);
-}

diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c
index fa4675c..890c13a 100644
--- a/arch/mips/math-emu/sp_mul.c
+++ b/arch/mips/math-emu/sp_mul.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,32 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_mul(ieee754sp x, ieee754sp y)
+union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
 {
+	int re;
+	int rs;
+	unsigned rm;
+	unsigned short lxm;
+	unsigned short hxm;
+	unsigned short lym;
+	unsigned short hym;
+	unsigned lrm;
+	unsigned hrm;
+	unsigned t;
+	unsigned at;
+
 	COMPXSP;
 	COMPYSP;
 
 	EXPLODEXSP;
 	EXPLODEYSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXSP;
 	FLUSHYSP;
@@ -51,8 +58,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "mul", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,12 +75,13 @@
 		return x;
 
 
-		/* Infinity handling */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_xcpt(ieee754sp_indef(), "mul", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_indef();
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
@@ -108,63 +116,50 @@
 	assert(xm & SP_HIDDEN_BIT);
 	assert(ym & SP_HIDDEN_BIT);
 
-	{
-		int re = xe + ye;
-		int rs = xs ^ ys;
-		unsigned rm;
+	re = xe + ye;
+	rs = xs ^ ys;
 
-		/* shunt to top of word */
-		xm <<= 32 - (SP_MBITS + 1);
-		ym <<= 32 - (SP_MBITS + 1);
+	/* shunt to top of word */
+	xm <<= 32 - (SP_FBITS + 1);
+	ym <<= 32 - (SP_FBITS + 1);
 
-		/* multiply 32bits xm,ym to give high 32bits rm with stickness
-		 */
-		{
-			unsigned short lxm = xm & 0xffff;
-			unsigned short hxm = xm >> 16;
-			unsigned short lym = ym & 0xffff;
-			unsigned short hym = ym >> 16;
-			unsigned lrm;
-			unsigned hrm;
+	/*
+	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+	 */
+	lxm = xm & 0xffff;
+	hxm = xm >> 16;
+	lym = ym & 0xffff;
+	hym = ym >> 16;
 
-			lrm = lxm * lym;	/* 16 * 16 => 32 */
-			hrm = hxm * hym;	/* 16 * 16 => 32 */
+	lrm = lxm * lym;	/* 16 * 16 => 32 */
+	hrm = hxm * hym;	/* 16 * 16 => 32 */
 
-			{
-				unsigned t = lxm * hym; /* 16 * 16 => 32 */
-				{
-					unsigned at = lrm + (t << 16);
-					hrm += at < lrm;
-					lrm = at;
-				}
-				hrm = hrm + (t >> 16);
-			}
+	t = lxm * hym; /* 16 * 16 => 32 */
+	at = lrm + (t << 16);
+	hrm += at < lrm;
+	lrm = at;
+	hrm = hrm + (t >> 16);
 
-			{
-				unsigned t = hxm * lym; /* 16 * 16 => 32 */
-				{
-					unsigned at = lrm + (t << 16);
-					hrm += at < lrm;
-					lrm = at;
-				}
-				hrm = hrm + (t >> 16);
-			}
-			rm = hrm | (lrm != 0);
-		}
+	t = hxm * lym; /* 16 * 16 => 32 */
+	at = lrm + (t << 16);
+	hrm += at < lrm;
+	lrm = at;
+	hrm = hrm + (t >> 16);
 
-		/*
-		 * sticky shift down to normal rounding precision
-		 */
-		if ((int) rm < 0) {
-			rm = (rm >> (32 - (SP_MBITS + 1 + 3))) |
-			    ((rm << (SP_MBITS + 1 + 3)) != 0);
-			re++;
-		} else {
-			rm = (rm >> (32 - (SP_MBITS + 1 + 3 + 1))) |
-			    ((rm << (SP_MBITS + 1 + 3 + 1)) != 0);
-		}
-		assert(rm & (SP_HIDDEN_BIT << 3));
+	rm = hrm | (lrm != 0);
 
-		SPNORMRET2(rs, re, rm, "mul", x, y);
+	/*
+	 * Sticky shift down to normal rounding precision.
+	 */
+	if ((int) rm < 0) {
+		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
+		    ((rm << (SP_FBITS + 1 + 3)) != 0);
+		re++;
+	} else {
+		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
+		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
 	}
+	assert(rm & (SP_HIDDEN_BIT << 3));
+
+	return ieee754sp_format(rs, re, rm);
 }

diff --git a/arch/mips/math-emu/sp_scalb.c b/arch/mips/math-emu/sp_scalb.c
deleted file mode 100644
index dd76196..0000000
--- a/arch/mips/math-emu/sp_scalb.c
+++ /dev/null

@@ -1,57 +0,0 @@
-/* IEEE754 floating point arithmetic
- * single precision
- */
-/*
- * MIPS floating point support
- * Copyright (C) 1994-2000 Algorithmics Ltd.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- */
-
-
-#include "ieee754sp.h"
-
-ieee754sp ieee754sp_scalb(ieee754sp x, int n)
-{
-	COMPXSP;
-
-	CLEARCX;
-
-	EXPLODEXSP;
-
-	switch (xc) {
-	case IEEE754_CLASS_SNAN:
-		return ieee754sp_nanxcpt(x, "scalb", x, n);
-	case IEEE754_CLASS_QNAN:
-	case IEEE754_CLASS_INF:
-	case IEEE754_CLASS_ZERO:
-		return x;
-	case IEEE754_CLASS_DNORM:
-		SPDNORMX;
-		break;
-	case IEEE754_CLASS_NORM:
-		break;
-	}
-	SPNORMRET2(xs, xe + n, xm << 3, "scalb", x, n);
-}
-
-
-ieee754sp ieee754sp_ldexp(ieee754sp x, int n)
-{
-	return ieee754sp_scalb(x, n);
-}

diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c
index ae4fcfa..f1ffaa9 100644
--- a/arch/mips/math-emu/sp_simple.c
+++ b/arch/mips/math-emu/sp_simple.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,33 +16,17 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-int ieee754sp_finite(ieee754sp x)
-{
-	return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
-}
-
-ieee754sp ieee754sp_copysign(ieee754sp x, ieee754sp y)
-{
-	CLEARCX;
-	SPSIGN(x) = SPSIGN(y);
-	return x;
-}
-
-
-ieee754sp ieee754sp_neg(ieee754sp x)
+union ieee754sp ieee754sp_neg(union ieee754sp x)
 {
 	COMPXSP;
 
 	EXPLODEXSP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXSP;
 
 	/*
@@ -55,30 +37,29 @@
 	SPSIGN(x) ^= 1;
 
 	if (xc == IEEE754_CLASS_SNAN) {
-		ieee754sp y = ieee754sp_indef();
-		SETCX(IEEE754_INVALID_OPERATION);
+		union ieee754sp y = ieee754sp_indef();
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		SPSIGN(y) = SPSIGN(x);
-		return ieee754sp_nanxcpt(y, "neg");
+		return ieee754sp_nanxcpt(y);
 	}
 
 	return x;
 }
 
-
-ieee754sp ieee754sp_abs(ieee754sp x)
+union ieee754sp ieee754sp_abs(union ieee754sp x)
 {
 	COMPXSP;
 
 	EXPLODEXSP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXSP;
 
 	/* Clear sign ALWAYS, irrespective of NaN */
 	SPSIGN(x) = 0;
 
 	if (xc == IEEE754_CLASS_SNAN) {
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "abs");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
 	}
 
 	return x;

diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c
index fed2017..b7c098a 100644
--- a/arch/mips/math-emu/sp_sqrt.c
+++ b/arch/mips/math-emu/sp_sqrt.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,15 +16,12 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_sqrt(ieee754sp x)
+union ieee754sp ieee754sp_sqrt(union ieee754sp x)
 {
 	int ix, s, q, m, t, i;
 	unsigned int r;
@@ -35,34 +30,38 @@
 	/* take care of Inf and NaN */
 
 	EXPLODEXSP;
-	CLEARCX;
+	ieee754_clearcx();
 	FLUSHXSP;
 
 	/* x == INF or NAN? */
 	switch (xc) {
 	case IEEE754_CLASS_QNAN:
 		/* sqrt(Nan) = Nan */
-		return ieee754sp_nanxcpt(x, "sqrt");
+		return ieee754sp_nanxcpt(x);
+
 	case IEEE754_CLASS_SNAN:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "sqrt");
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
+
 	case IEEE754_CLASS_ZERO:
 		/* sqrt(0) = 0 */
 		return x;
+
 	case IEEE754_CLASS_INF:
 		if (xs) {
 			/* sqrt(-Inf) = Nan */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754sp_nanxcpt(ieee754sp_indef(), "sqrt");
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754sp_nanxcpt(ieee754sp_indef());
 		}
 		/* sqrt(+Inf) = Inf */
 		return x;
+
 	case IEEE754_CLASS_DNORM:
 	case IEEE754_CLASS_NORM:
 		if (xs) {
 			/* sqrt(-x) = Nan */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754sp_nanxcpt(ieee754sp_indef(), "sqrt");
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754sp_nanxcpt(ieee754sp_indef());
 		}
 		break;
 	}
@@ -99,12 +98,12 @@
 	}
 
 	if (ix != 0) {
-		SETCX(IEEE754_INEXACT);
+		ieee754_setcx(IEEE754_INEXACT);
 		switch (ieee754_csr.rm) {
-		case IEEE754_RP:
+		case FPU_CSR_RU:
 			q += 2;
 			break;
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			q += (q & 1);
 			break;
 		}

diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index e595c6f..8592e49 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,23 +16,22 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
 
-ieee754sp ieee754sp_sub(ieee754sp x, ieee754sp y)
+union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
 {
+	int s;
+
 	COMPXSP;
 	COMPYSP;
 
 	EXPLODEXSP;
 	EXPLODEYSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	FLUSHXSP;
 	FLUSHYSP;
@@ -51,8 +48,8 @@
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef(), "sub", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_nanxcpt(ieee754sp_indef());
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -68,14 +65,14 @@
 		return x;
 
 
-		/* Infinity handling
-		 */
-
+	/*
+	 * Infinity handling
+	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 		if (xs != ys)
 			return x;
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754sp_xcpt(ieee754sp_indef(), "sub", x, y);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754sp_indef();
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
@@ -87,15 +84,14 @@
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 		return x;
 
-		/* Zero handling
-		 */
-
+	/*
+	 * Zero handling
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 		if (xs != ys)
 			return x;
 		else
-			return ieee754sp_zero(ieee754_csr.rm ==
-					      IEEE754_RD);
+			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
@@ -104,7 +100,7 @@
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
 		/* quick fix up */
-		DPSIGN(y) ^= 1;
+		SPSIGN(y) ^= 1;
 		return y;
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
@@ -133,14 +129,16 @@
 	ym <<= 3;
 
 	if (xe > ye) {
-		/* have to shift y fraction right to align
+		/*
+		 * have to shift y fraction right to align
 		 */
-		int s = xe - ye;
+		s = xe - ye;
 		SPXSRSYn(s);
 	} else if (ye > xe) {
-		/* have to shift x fraction right to align
+		/*
+		 * have to shift x fraction right to align
 		 */
-		int s = ye - xe;
+		s = ye - xe;
 		SPXSRSXn(s);
 	}
 	assert(xe == ye);
@@ -153,7 +151,7 @@
 		xe = xe;
 		xs = xs;
 
-		if (xm >> (SP_MBITS + 1 + 3)) { /* carry out */
+		if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */
 			SPXSRSX1();	/* shift preserving sticky */
 		}
 	} else {
@@ -167,17 +165,18 @@
 			xs = ys;
 		}
 		if (xm == 0) {
-			if (ieee754_csr.rm == IEEE754_RD)
+			if (ieee754_csr.rm == FPU_CSR_RD)
 				return ieee754sp_zero(1);	/* round negative inf. => sign = -1 */
 			else
 				return ieee754sp_zero(0);	/* other round modes   => sign = 1 */
 		}
 		/* normalize to rounding precision
 		 */
-		while ((xm >> (SP_MBITS + 3)) == 0) {
+		while ((xm >> (SP_FBITS + 3)) == 0) {
 			xm <<= 1;
 			xe--;
 		}
 	}
-	SPNORMRET2(xs, xe, xm, "sub", x, y);
+
+	return ieee754sp_format(xs, xe, xm);
 }

diff --git a/arch/mips/math-emu/sp_tint.c b/arch/mips/math-emu/sp_tint.c
index 0fe9acc..091299a 100644
--- a/arch/mips/math-emu/sp_tint.c
+++ b/arch/mips/math-emu/sp_tint.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,20 +16,21 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
-#include <linux/kernel.h>
 #include "ieee754sp.h"
 
-int ieee754sp_tint(ieee754sp x)
+int ieee754sp_tint(union ieee754sp x)
 {
+	u32 residue;
+	int round;
+	int sticky;
+	int odd;
+
 	COMPXSP;
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	EXPLODEXSP;
 	FLUSHXSP;
@@ -40,10 +39,12 @@
 	case IEEE754_CLASS_SNAN:
 	case IEEE754_CLASS_QNAN:
 	case IEEE754_CLASS_INF:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754si_xcpt(ieee754si_indef(), "sp_tint", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754si_indef();
+
 	case IEEE754_CLASS_ZERO:
 		return 0;
+
 	case IEEE754_CLASS_DNORM:
 	case IEEE754_CLASS_NORM:
 		break;
@@ -54,18 +55,13 @@
 			return -0x80000000;
 		/* Set invalid. We will only use overflow for floating
 		   point overflow */
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754si_xcpt(ieee754si_indef(), "sp_tint", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754si_indef();
 	}
 	/* oh gawd */
-	if (xe > SP_MBITS) {
-		xm <<= xe - SP_MBITS;
+	if (xe > SP_FBITS) {
+		xm <<= xe - SP_FBITS;
 	} else {
-		u32 residue;
-		int round;
-		int sticky;
-		int odd;
-
 		if (xe < -1) {
 			residue = xm;
 			round = 0;
@@ -76,51 +72,38 @@
 			* so we do it in two steps. Be aware that xe
 			* may be -1 */
 			residue = xm << (xe + 1);
-			residue <<= 31 - SP_MBITS;
+			residue <<= 31 - SP_FBITS;
 			round = (residue >> 31) != 0;
 			sticky = (residue << 1) != 0;
-			xm >>= SP_MBITS - xe;
+			xm >>= SP_FBITS - xe;
 		}
 		odd = (xm & 0x1) != 0x0;
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			if (round && (sticky || odd))
 				xm++;
 			break;
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if ((round || sticky) && !xs)
 				xm++;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if ((round || sticky) && xs)
 				xm++;
 			break;
 		}
 		if ((xm >> 31) != 0) {
 			/* This can happen after rounding */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754si_xcpt(ieee754si_indef(), "sp_tint", x);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754si_indef();
 		}
 		if (round || sticky)
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_INEXACT);
 	}
 	if (xs)
 		return -xm;
 	else
 		return xm;
 }
-
-
-unsigned int ieee754sp_tuns(ieee754sp x)
-{
-	ieee754sp hb = ieee754sp_1e31();
-
-	/* what if x < 0 ?? */
-	if (ieee754sp_lt(x, hb))
-		return (unsigned) ieee754sp_tint(x);
-
-	return (unsigned) ieee754sp_tint(ieee754sp_sub(x, hb)) |
-	    ((unsigned) 1 << 31);
-}

diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c
index d0ca6e2..9f3c742 100644
--- a/arch/mips/math-emu/sp_tlong.c
+++ b/arch/mips/math-emu/sp_tlong.c

@@ -5,8 +5,6 @@
  * MIPS floating point support
  * Copyright (C) 1994-2000 Algorithmics Ltd.
  *
- * ########################################################################
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -18,19 +16,22 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
+ *  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
  */
 
-
 #include "ieee754sp.h"
+#include "ieee754dp.h"
 
-s64 ieee754sp_tlong(ieee754sp x)
+s64 ieee754sp_tlong(union ieee754sp x)
 {
+	u32 residue;
+	int round;
+	int sticky;
+	int odd;
+
 	COMPXDP;		/* <-- need 64-bit mantissa tmp */
 
-	CLEARCX;
+	ieee754_clearcx();
 
 	EXPLODEXSP;
 	FLUSHXSP;
@@ -39,10 +40,12 @@
 	case IEEE754_CLASS_SNAN:
 	case IEEE754_CLASS_QNAN:
 	case IEEE754_CLASS_INF:
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754di_xcpt(ieee754di_indef(), "sp_tlong", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754di_indef();
+
 	case IEEE754_CLASS_ZERO:
 		return 0;
+
 	case IEEE754_CLASS_DNORM:
 	case IEEE754_CLASS_NORM:
 		break;
@@ -53,69 +56,51 @@
 			return -0x8000000000000000LL;
 		/* Set invalid. We will only use overflow for floating
 		   point overflow */
-		SETCX(IEEE754_INVALID_OPERATION);
-		return ieee754di_xcpt(ieee754di_indef(), "sp_tlong", x);
+		ieee754_setcx(IEEE754_INVALID_OPERATION);
+		return ieee754di_indef();
 	}
 	/* oh gawd */
-	if (xe > SP_MBITS) {
-		xm <<= xe - SP_MBITS;
-	} else if (xe < SP_MBITS) {
-		u32 residue;
-		int round;
-		int sticky;
-		int odd;
-
+	if (xe > SP_FBITS) {
+		xm <<= xe - SP_FBITS;
+	} else if (xe < SP_FBITS) {
 		if (xe < -1) {
 			residue = xm;
 			round = 0;
 			sticky = residue != 0;
 			xm = 0;
 		} else {
-			residue = xm << (32 - SP_MBITS + xe);
+			residue = xm << (32 - SP_FBITS + xe);
 			round = (residue >> 31) != 0;
 			sticky = (residue << 1) != 0;
-			xm >>= SP_MBITS - xe;
+			xm >>= SP_FBITS - xe;
 		}
 		odd = (xm & 0x1) != 0x0;
 		switch (ieee754_csr.rm) {
-		case IEEE754_RN:
+		case FPU_CSR_RN:
 			if (round && (sticky || odd))
 				xm++;
 			break;
-		case IEEE754_RZ:
+		case FPU_CSR_RZ:
 			break;
-		case IEEE754_RU:	/* toward +Infinity */
+		case FPU_CSR_RU:	/* toward +Infinity */
 			if ((round || sticky) && !xs)
 				xm++;
 			break;
-		case IEEE754_RD:	/* toward -Infinity */
+		case FPU_CSR_RD:	/* toward -Infinity */
 			if ((round || sticky) && xs)
 				xm++;
 			break;
 		}
 		if ((xm >> 63) != 0) {
 			/* This can happen after rounding */
-			SETCX(IEEE754_INVALID_OPERATION);
-			return ieee754di_xcpt(ieee754di_indef(), "sp_tlong", x);
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754di_indef();
 		}
 		if (round || sticky)
-			SETCX(IEEE754_INEXACT);
+			ieee754_setcx(IEEE754_INEXACT);
 	}
 	if (xs)
 		return -xm;
 	else
 		return xm;
 }
-
-
-u64 ieee754sp_tulong(ieee754sp x)
-{
-	ieee754sp hb = ieee754sp_1e63();
-
-	/* what if x < 0 ?? */
-	if (ieee754sp_lt(x, hb))
-		return (u64) ieee754sp_tlong(x);
-
-	return (u64) ieee754sp_tlong(ieee754sp_sub(x, hb)) |
-	    (1ULL << 63);
-}

diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index f41a5c5..05b1d7c 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c

@@ -137,8 +137,10 @@
 {
 	struct vm_area_struct *vma;
 
+	down_read(&current->mm->mmap_sem);
 	vma = find_vma(current->mm, addr);
 	octeon_flush_icache_all_cores(vma);
+	up_read(&current->mm->mmap_sem);
 }
 
 

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 1c74a6a..f2e8302 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c

@@ -7,6 +7,7 @@
  * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
+#include <linux/cpu_pm.h>
 #include <linux/hardirq.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
@@ -50,7 +51,7 @@
 {
 	preempt_disable();
 
-#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
+#ifndef CONFIG_MIPS_MT_SMP
 	smp_call_function(func, info, 1);
 #endif
 	func(info);
@@ -105,22 +106,37 @@
 
 static inline void r4k_blast_dcache_page_dc64(unsigned long addr)
 {
-	R4600_HIT_CACHEOP_WAR_IMPL;
 	blast_dcache64_page(addr);
 }
 
+static inline void r4k_blast_dcache_page_dc128(unsigned long addr)
+{
+	blast_dcache128_page(addr);
+}
+
 static void r4k_blast_dcache_page_setup(void)
 {
 	unsigned long  dc_lsize = cpu_dcache_line_size();
 
-	if (dc_lsize == 0)
+	switch (dc_lsize) {
+	case 0:
 		r4k_blast_dcache_page = (void *)cache_noop;
-	else if (dc_lsize == 16)
+		break;
+	case 16:
 		r4k_blast_dcache_page = blast_dcache16_page;
-	else if (dc_lsize == 32)
+		break;
+	case 32:
 		r4k_blast_dcache_page = r4k_blast_dcache_page_dc32;
-	else if (dc_lsize == 64)
+		break;
+	case 64:
 		r4k_blast_dcache_page = r4k_blast_dcache_page_dc64;
+		break;
+	case 128:
+		r4k_blast_dcache_page = r4k_blast_dcache_page_dc128;
+		break;
+	default:
+		break;
+	}
 }
 
 #ifndef CONFIG_EVA
@@ -159,6 +175,8 @@
 		r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed;
 	else if (dc_lsize == 64)
 		r4k_blast_dcache_page_indexed = blast_dcache64_page_indexed;
+	else if (dc_lsize == 128)
+		r4k_blast_dcache_page_indexed = blast_dcache128_page_indexed;
 }
 
 void (* r4k_blast_dcache)(void);
@@ -176,6 +194,8 @@
 		r4k_blast_dcache = blast_dcache32;
 	else if (dc_lsize == 64)
 		r4k_blast_dcache = blast_dcache64;
+	else if (dc_lsize == 128)
+		r4k_blast_dcache = blast_dcache128;
 }
 
 /* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */
@@ -265,6 +285,8 @@
 		r4k_blast_icache_page = blast_icache32_page;
 	else if (ic_lsize == 64)
 		r4k_blast_icache_page = blast_icache64_page;
+	else if (ic_lsize == 128)
+		r4k_blast_icache_page = blast_icache128_page;
 }
 
 #ifndef CONFIG_EVA
@@ -338,6 +360,8 @@
 			r4k_blast_icache = blast_icache32;
 	} else if (ic_lsize == 64)
 		r4k_blast_icache = blast_icache64;
+	else if (ic_lsize == 128)
+		r4k_blast_icache = blast_icache128;
 }
 
 static void (* r4k_blast_scache_page)(unsigned long addr);
@@ -428,7 +452,7 @@
 
 static inline int has_valid_asid(const struct mm_struct *mm)
 {
-#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_MIPS_MT_SMP
 	int i;
 
 	for_each_online_cpu(i)
@@ -1094,6 +1118,21 @@
 		c->dcache.waybit = 0;
 		break;
 
+	case CPU_CAVIUM_OCTEON3:
+		/* For now lie about the number of ways. */
+		c->icache.linesz = 128;
+		c->icache.sets = 16;
+		c->icache.ways = 8;
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz;
+
+		c->dcache.linesz = 128;
+		c->dcache.ways = 8;
+		c->dcache.sets = 8;
+		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
 	default:
 		if (!(config & MIPS_CONF_M))
 			panic("Don't know how to probe P-caches on this cpu.");
@@ -1414,6 +1453,7 @@
 		loongson3_sc_init();
 		return;
 
+	case CPU_CAVIUM_OCTEON3:
 	case CPU_XLP:
 		/* don't need to worry about L2, fully coherent */
 		return;
@@ -1644,3 +1684,26 @@
 	coherency_setup();
 	board_cache_error_setup = r4k_cache_error_setup;
 }
+
+static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			       void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		coherency_setup();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block r4k_cache_pm_notifier_block = {
+	.notifier_call = r4k_cache_pm_notifier,
+};
+
+int __init r4k_cache_init_pm(void)
+{
+	return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block);
+}
+arch_initcall(r4k_cache_init_pm);

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 4fc74c7..6e44133 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c

@@ -44,27 +44,6 @@
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
 
-/* Atomicity and interruptability */
-#ifdef CONFIG_MIPS_MT_SMTC
-
-#include <asm/mipsmtregs.h>
-
-#define ENTER_CRITICAL(flags) \
-	{ \
-	unsigned int mvpflags; \
-	local_irq_save(flags);\
-	mvpflags = dvpe()
-#define EXIT_CRITICAL(flags) \
-	evpe(mvpflags); \
-	local_irq_restore(flags); \
-	}
-#else
-
-#define ENTER_CRITICAL(flags) local_irq_save(flags)
-#define EXIT_CRITICAL(flags) local_irq_restore(flags)
-
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
  * when needed.	 This is necessary only on R4000 / R4400 SC and MC versions
@@ -100,21 +79,7 @@
 	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
 
-#ifdef CONFIG_MIPS_MT_SMTC
-static pte_t *kmap_coherent_pte;
-static void __init kmap_coherent_init(void)
-{
-	unsigned long vaddr;
-
-	/* cache the first coherent kmap pte */
-	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
-	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
-}
-#else
-static inline void kmap_coherent_init(void) {}
-#endif
-
-void *kmap_coherent(struct page *page, unsigned long addr)
+static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr, flags, entrylo;
@@ -126,58 +91,48 @@
 
 	pagefault_disable();
 	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
-#ifdef CONFIG_MIPS_MT_SMTC
-	idx += FIX_N_COLOURS * smp_processor_id() +
-		(in_interrupt() ? (FIX_N_COLOURS * NR_CPUS) : 0);
-#else
 	idx += in_interrupt() ? FIX_N_COLOURS : 0;
-#endif
 	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
+	pte = mk_pte(page, prot);
 #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
 	entrylo = pte.pte_high;
 #else
 	entrylo = pte_to_entrylo(pte_val(pte));
 #endif
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	old_ctx = read_c0_entryhi();
 	write_c0_entryhi(vaddr & (PAGE_MASK << 1));
 	write_c0_entrylo0(entrylo);
 	write_c0_entrylo1(entrylo);
-#ifdef CONFIG_MIPS_MT_SMTC
-	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
-	/* preload TLB instead of local_flush_tlb_one() */
-	mtc0_tlbw_hazard();
-	tlb_probe();
-	tlb_probe_hazard();
-	tlbidx = read_c0_index();
-	mtc0_tlbw_hazard();
-	if (tlbidx < 0)
-		tlb_write_random();
-	else
-		tlb_write_indexed();
-#else
 	tlbidx = read_c0_wired();
 	write_c0_wired(tlbidx + 1);
 	write_c0_index(tlbidx);
 	mtc0_tlbw_hazard();
 	tlb_write_indexed();
-#endif
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 
 	return (void*) vaddr;
 }
 
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	return __kmap_pgprot(page, addr, PAGE_KERNEL);
+}
+
+void *kmap_noncoherent(struct page *page, unsigned long addr)
+{
+	return __kmap_pgprot(page, addr, PAGE_KERNEL_NC);
+}
+
 void kunmap_coherent(void)
 {
-#ifndef CONFIG_MIPS_MT_SMTC
 	unsigned int wired;
 	unsigned long flags, old_ctx;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	old_ctx = read_c0_entryhi();
 	wired = read_c0_wired() - 1;
 	write_c0_wired(wired);
@@ -189,8 +144,7 @@
 	tlb_write_indexed();
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
-	EXIT_CRITICAL(flags);
-#endif
+	local_irq_restore(flags);
 	pagefault_enable();
 }
 
@@ -256,7 +210,7 @@
 void __init fixrange_init(unsigned long start, unsigned long end,
 	pgd_t *pgd_base)
 {
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_MIPS_MT_SMTC)
+#ifdef CONFIG_HIGHMEM
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
@@ -327,8 +281,6 @@
 #ifdef CONFIG_HIGHMEM
 	kmap_init();
 #endif
-	kmap_coherent_init();
-
 #ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 #endif

diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index eeaf50f..3914e27 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c

@@ -8,6 +8,7 @@
  * Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
  */
+#include <linux/cpu_pm.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
@@ -25,28 +26,6 @@
 
 extern void build_tlb_refill_handler(void);
 
-/* Atomicity and interruptability */
-#ifdef CONFIG_MIPS_MT_SMTC
-
-#include <asm/smtc.h>
-#include <asm/mipsmtregs.h>
-
-#define ENTER_CRITICAL(flags) \
-	{ \
-	unsigned int mvpflags; \
-	local_irq_save(flags);\
-	mvpflags = dvpe()
-#define EXIT_CRITICAL(flags) \
-	evpe(mvpflags); \
-	local_irq_restore(flags); \
-	}
-#else
-
-#define ENTER_CRITICAL(flags) local_irq_save(flags)
-#define EXIT_CRITICAL(flags) local_irq_restore(flags)
-
-#endif /* CONFIG_MIPS_MT_SMTC */
-
 /*
  * LOONGSON2/3 has a 4 entry itlb which is a subset of dtlb,
  * unfortunately, itlb is not totally transparent to software.
@@ -75,7 +54,7 @@
 	unsigned long old_ctx;
 	int entry, ftlbhighset;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
 	write_c0_entrylo0(0);
@@ -112,7 +91,7 @@
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
 	flush_itlb();
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(local_flush_tlb_all);
 
@@ -142,7 +121,7 @@
 	if (cpu_context(cpu, mm) != 0) {
 		unsigned long size, flags;
 
-		ENTER_CRITICAL(flags);
+		local_irq_save(flags);
 		start = round_down(start, PAGE_SIZE << 1);
 		end = round_up(end, PAGE_SIZE << 1);
 		size = (end - start) >> (PAGE_SHIFT + 1);
@@ -176,7 +155,7 @@
 			drop_mmu_context(mm, cpu);
 		}
 		flush_itlb();
-		EXIT_CRITICAL(flags);
+		local_irq_restore(flags);
 	}
 }
 
@@ -184,7 +163,7 @@
 {
 	unsigned long size, flags;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	size = (size + 1) >> 1;
 	if (size <= (current_cpu_data.tlbsizeftlbsets ?
@@ -220,7 +199,7 @@
 		local_flush_tlb_all();
 	}
 	flush_itlb();
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 }
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
@@ -233,7 +212,7 @@
 
 		newpid = cpu_asid(cpu, vma->vm_mm);
 		page &= (PAGE_MASK << 1);
-		ENTER_CRITICAL(flags);
+		local_irq_save(flags);
 		oldpid = read_c0_entryhi();
 		write_c0_entryhi(page | newpid);
 		mtc0_tlbw_hazard();
@@ -253,7 +232,7 @@
 	finish:
 		write_c0_entryhi(oldpid);
 		flush_itlb_vm(vma);
-		EXIT_CRITICAL(flags);
+		local_irq_restore(flags);
 	}
 }
 
@@ -266,7 +245,7 @@
 	unsigned long flags;
 	int oldpid, idx;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	oldpid = read_c0_entryhi();
 	page &= (PAGE_MASK << 1);
 	write_c0_entryhi(page);
@@ -285,7 +264,7 @@
 	}
 	write_c0_entryhi(oldpid);
 	flush_itlb();
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 }
 
 /*
@@ -308,7 +287,7 @@
 	if (current->active_mm != vma->vm_mm)
 		return;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 
 	pid = read_c0_entryhi() & ASID_MASK;
 	address &= (PAGE_MASK << 1);
@@ -358,7 +337,7 @@
 	}
 	tlbw_use_hazard();
 	flush_itlb_vm(vma);
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 }
 
 void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
@@ -369,7 +348,7 @@
 	unsigned long old_pagemask;
 	unsigned long old_ctx;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
 	old_pagemask = read_c0_pagemask();
@@ -389,7 +368,7 @@
 	tlbw_use_hazard();	/* What is the hazard here? */
 	write_c0_pagemask(old_pagemask);
 	local_flush_tlb_all();
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -399,13 +378,13 @@
 	unsigned int mask;
 	unsigned long flags;
 
-	ENTER_CRITICAL(flags);
+	local_irq_save(flags);
 	write_c0_pagemask(PM_HUGE_MASK);
 	back_to_back_c0_hazard();
 	mask = read_c0_pagemask();
 	write_c0_pagemask(PM_DEFAULT_MASK);
 
-	EXIT_CRITICAL(flags);
+	local_irq_restore(flags);
 
 	return mask == PM_HUGE_MASK;
 }
@@ -421,7 +400,10 @@
 
 __setup("ntlb=", set_ntlb);
 
-void tlb_init(void)
+/*
+ * Configure TLB (for init or after a CPU has been powered off).
+ */
+static void r4k_tlb_configure(void)
 {
 	/*
 	 * You should never change this register:
@@ -453,6 +435,11 @@
 	local_flush_tlb_all();
 
 	/* Did I tell you that ARC SUCKS?  */
+}
+
+void tlb_init(void)
+{
+	r4k_tlb_configure();
 
 	if (ntlb) {
 		if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) {
@@ -466,3 +453,26 @@
 
 	build_tlb_refill_handler();
 }
+
+static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			       void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		r4k_tlb_configure();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block r4k_tlb_pm_notifier_block = {
+	.notifier_call = r4k_tlb_pm_notifier,
+};
+
+static int __init r4k_tlb_init_pm(void)
+{
+	return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block);
+}
+arch_initcall(r4k_tlb_init_pm);

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index f99ec587..e80e10b 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c

@@ -1256,7 +1256,7 @@
 	memset(relocs, 0, sizeof(relocs));
 	memset(final_handler, 0, sizeof(final_handler));
 
-	if ((scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) {
+	if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) {
 		htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
 							  scratch_reg);
 		vmalloc_mode = refill_scratch;

diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index b8d580c..775c280 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c

@@ -63,6 +63,7 @@
 	{ insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM },
 	{ insn_daddu, 0, 0 },
 	{ insn_daddiu, 0, 0 },
+	{ insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS },
 	{ insn_dmfc0, 0, 0 },
 	{ insn_dmtc0, 0, 0 },
 	{ insn_dsll, 0, 0 },
@@ -78,14 +79,20 @@
 	{ insn_ext, M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE },
 	{ insn_j, M(mm_j32_op, 0, 0, 0, 0, 0), JIMM },
 	{ insn_jal, M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM },
+	{ insn_jalr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS },
 	{ insn_jr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS },
+	{ insn_lb, M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
 	{ insn_ld, 0, 0 },
+	{ insn_lh, M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM },
 	{ insn_ll, M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM },
 	{ insn_lld, 0, 0 },
 	{ insn_lui, M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM },
 	{ insn_lw, M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
 	{ insn_mfc0, M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD },
+	{ insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS },
+	{ insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS },
 	{ insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD },
+	{ insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD },
 	{ insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD },
 	{ insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
 	{ insn_pref, M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM },
@@ -94,15 +101,22 @@
 	{ insn_scd, 0, 0 },
 	{ insn_sd, 0, 0 },
 	{ insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD },
+	{ insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD },
+	{ insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
+	{ insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD },
 	{ insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD },
 	{ insn_srl, M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD },
+	{ insn_srlv, M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD },
 	{ insn_rotr, M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD },
 	{ insn_subu, M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD },
 	{ insn_sw, M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
+	{ insn_sync, M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS },
 	{ insn_tlbp, M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0 },
 	{ insn_tlbr, M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0 },
 	{ insn_tlbwi, M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0 },
 	{ insn_tlbwr, M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0 },
+	{ insn_wait, M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM },
+	{ insn_wsbh, M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS },
 	{ insn_xor, M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD },
 	{ insn_xori, M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
 	{ insn_dins, 0, 0 },

diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 3abd609..38792c2 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c

@@ -67,6 +67,7 @@
 	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
 	{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
 	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
+	{ insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT },
 	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
 	{ insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
 	{ insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE },
@@ -82,17 +83,23 @@
 	{ insn_ins, M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE },
 	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM },
 	{ insn_jal,  M(jal_op, 0, 0, 0, 0, 0),	JIMM },
+	{ insn_jalr,  M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD },
 	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM },
 	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jr_op),  RS },
+	{ insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
+	{ insn_lh,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM },
 	{ insn_lw,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD },
 	{ insn_mfc0,  M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
+	{ insn_mfhi,  M(spec_op, 0, 0, 0, 0, mfhi_op), RD },
+	{ insn_mflo,  M(spec_op, 0, 0, 0, 0, mflo_op), RD },
 	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
+	{ insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
 	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM },
 	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD },
 	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
@@ -102,17 +109,25 @@
 	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE },
+	{ insn_sllv,  M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD },
+	{ insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
+	{ insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD },
 	{ insn_sra,  M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE },
 	{ insn_srl,  M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE },
+	{ insn_srlv,  M(spec_op, 0, 0, 0, 0, srlv_op),  RS | RT | RD },
 	{ insn_subu,  M(spec_op, 0, 0, 0, 0, subu_op),	RS | RT | RD },
 	{ insn_sw,  M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	{ insn_sync, M(spec_op, 0, 0, 0, 0, sync_op), RE },
 	{ insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM},
 	{ insn_tlbp,  M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0 },
 	{ insn_tlbr,  M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0 },
 	{ insn_tlbwi,  M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0 },
 	{ insn_tlbwr,  M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0 },
+	{ insn_wait, M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM },
+	{ insn_wsbh, M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD },
 	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
 	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD },
+	{ insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD },
 	{ insn_invalid, 0, 0 }
 };
 

diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index b9d14b6..0051580 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c

@@ -47,14 +47,16 @@
 	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
 	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
 	insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm,
-	insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
+	insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
 	insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret,
-	insn_ext, insn_ins, insn_j, insn_jal, insn_jr, insn_ld, insn_ldx,
-	insn_ll, insn_lld, insn_lui, insn_lw, insn_lwx, insn_mfc0, insn_mtc0,
+	insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb,
+	insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw,
+	insn_lwx, insn_mfc0, insn_mfhi, insn_mflo, insn_mtc0, insn_mul,
 	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd,
-	insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
-	insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_xor,
-	insn_xori,
+	insn_sd, insn_sll, insn_sllv, insn_sltiu, insn_sltu, insn_sra,
+	insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall,
+	insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh,
+	insn_xor, insn_xori, insn_yield,
 };
 
 struct insn {
@@ -144,6 +146,13 @@
 }							\
 UASM_EXPORT_SYMBOL(uasm_i##op);
 
+#define I_u3u2u1(op)					\
+Ip_u3u2u1(op)						\
+{							\
+	build_insn(buf, insn##op, c, b, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
 #define I_u3u1u2(op)					\
 Ip_u3u1u2(op)						\
 {							\
@@ -200,6 +209,13 @@
 }							\
 UASM_EXPORT_SYMBOL(uasm_i##op);
 
+#define I_u2u1(op)					\
+Ip_u1u2(op)						\
+{							\
+	build_insn(buf, insn##op, b, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
 #define I_u1s2(op)					\
 Ip_u1s2(op)						\
 {							\
@@ -237,6 +253,7 @@
 I_u1u2u3(_dmtc0)
 I_u2u1s3(_daddiu)
 I_u3u1u2(_daddu)
+I_u1u2(_divu)
 I_u2u1u3(_dsll)
 I_u2u1u3(_dsll32)
 I_u2u1u3(_dsra)
@@ -250,14 +267,20 @@
 I_u2u1msbu3(_ins)
 I_u1(_j)
 I_u1(_jal)
+I_u2u1(_jalr)
 I_u1(_jr)
+I_u2s3u1(_lb)
 I_u2s3u1(_ld)
+I_u2s3u1(_lh)
 I_u2s3u1(_ll)
 I_u2s3u1(_lld)
 I_u1s2(_lui)
 I_u2s3u1(_lw)
 I_u1u2u3(_mfc0)
+I_u1(_mfhi)
+I_u1(_mflo)
 I_u1u2u3(_mtc0)
+I_u3u1u2(_mul)
 I_u2u1u3(_ori)
 I_u3u1u2(_or)
 I_0(_rfe)
@@ -265,17 +288,25 @@
 I_u2s3u1(_scd)
 I_u2s3u1(_sd)
 I_u2u1u3(_sll)
+I_u3u2u1(_sllv)
+I_u2u1s3(_sltiu)
+I_u3u1u2(_sltu)
 I_u2u1u3(_sra)
 I_u2u1u3(_srl)
+I_u3u2u1(_srlv)
 I_u2u1u3(_rotr)
 I_u3u1u2(_subu)
 I_u2s3u1(_sw)
+I_u1(_sync)
 I_0(_tlbp)
 I_0(_tlbr)
 I_0(_tlbwi)
 I_0(_tlbwr)
+I_u1(_wait);
+I_u2u1(_wsbh)
 I_u3u1u2(_xor)
 I_u2u1u3(_xori)
+I_u2u1(_yield)
 I_u2u1msbu3(_dins);
 I_u2u1msb32u3(_dinsm);
 I_u1(_syscall);
@@ -469,6 +500,14 @@
 }
 UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b));
 
+void ISAFUNC(uasm_il_beq)(u32 **p, struct uasm_reloc **r, unsigned int r1,
+			  unsigned int r2, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	ISAFUNC(uasm_i_beq)(p, r1, r2, 0);
+}
+UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beq));
+
 void ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {

diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index eae0ba3..b9510ea 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile

@@ -9,5 +9,4 @@
 				   malta-int.o malta-memory.o malta-platform.o \
 				   malta-reset.o malta-setup.o malta-time.o
 
-# FIXME FIXME FIXME
-obj-$(CONFIG_MIPS_MT_SMTC)	+= malta-smtc.o
+obj-$(CONFIG_MIPS_MALTA_PM)	+= malta-pm.o

diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 4f9e44d..0f60256 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c

@@ -116,8 +116,6 @@
 	return CPC_BASE_ADDR;
 }
 
-extern struct plat_smp_ops msmtc_smp_ops;
-
 void __init prom_init(void)
 {
 	mips_display_message("LINUX");
@@ -304,8 +302,4 @@
 		return;
 	if (!register_vsmp_smp_ops())
 		return;
-
-#ifdef CONFIG_MIPS_MT_SMTC
-	register_smp_ops(&msmtc_smp_ops);
-#endif
 }

diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index b71ee80..ecc2785 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c

@@ -504,28 +504,9 @@
 	} else if (cpu_has_vint) {
 		set_vi_handler(MIPSCPU_INT_I8259A, malta_hw0_irqdispatch);
 		set_vi_handler(MIPSCPU_INT_COREHI, corehi_irqdispatch);
-#ifdef CONFIG_MIPS_MT_SMTC
-		setup_irq_smtc(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_I8259A, &i8259irq,
-			(0x100 << MIPSCPU_INT_I8259A));
-		setup_irq_smtc(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI,
-			&corehi_irqaction, (0x100 << MIPSCPU_INT_COREHI));
-		/*
-		 * Temporary hack to ensure that the subsidiary device
-		 * interrupts coing in via the i8259A, but associated
-		 * with low IRQ numbers, will restore the Status.IM
-		 * value associated with the i8259A.
-		 */
-		{
-			int i;
-
-			for (i = 0; i < 16; i++)
-				irq_hwmask[i] = (0x100 << MIPSCPU_INT_I8259A);
-		}
-#else /* Not SMTC */
 		setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_I8259A, &i8259irq);
 		setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI,
 						&corehi_irqaction);
-#endif /* CONFIG_MIPS_MT_SMTC */
 	} else {
 		setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_I8259A, &i8259irq);
 		setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI,

diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index f2364e4..6d97730 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c

@@ -26,8 +26,8 @@
 
 fw_memblock_t * __init fw_getmdesc(int eva)
 {
-	char *memsize_str, *ememsize_str __maybe_unused = NULL, *ptr;
-	unsigned long memsize = 0, ememsize __maybe_unused = 0;
+	char *memsize_str, *ememsize_str = NULL, *ptr;
+	unsigned long memsize = 0, ememsize = 0;
 	static char cmdline[COMMAND_LINE_SIZE] __initdata;
 	int tmp;
 

diff --git a/arch/mips/mti-malta/malta-pm.c b/arch/mips/mti-malta/malta-pm.c
new file mode 100644
index 0000000..c1e456c
--- /dev/null
+++ b/arch/mips/mti-malta/malta-pm.c

@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+
+#include <asm/mach-malta/malta-pm.h>
+
+static struct pci_bus *pm_pci_bus;
+static resource_size_t pm_io_offset;
+
+int mips_pm_suspend(unsigned state)
+{
+	int spec_devid;
+	u16 sts;
+
+	if (!pm_pci_bus || !pm_io_offset)
+		return -ENODEV;
+
+	/* Ensure the power button status is clear */
+	while (1) {
+		sts = inw(pm_io_offset + PIIX4_FUNC3IO_PMSTS);
+		if (!(sts & PIIX4_FUNC3IO_PMSTS_PWRBTN_STS))
+			break;
+		outw(sts, pm_io_offset + PIIX4_FUNC3IO_PMSTS);
+	}
+
+	/* Enable entry to suspend */
+	outw(state | PIIX4_FUNC3IO_PMCNTRL_SUS_EN,
+	     pm_io_offset + PIIX4_FUNC3IO_PMCNTRL);
+
+	/* If the special cycle occurs too soon this doesn't work... */
+	mdelay(10);
+
+	/*
+	 * The PIIX4 will enter the suspend state only after seeing a special
+	 * cycle with the correct magic data on the PCI bus. Generate that
+	 * cycle now.
+	 */
+	spec_devid = PCI_DEVID(0, PCI_DEVFN(0x1f, 0x7));
+	pci_bus_write_config_dword(pm_pci_bus, spec_devid, 0,
+				   PIIX4_SUSPEND_MAGIC);
+
+	/* Give the system some time to power down */
+	mdelay(1000);
+
+	return 0;
+}
+
+static int __init malta_pm_setup(void)
+{
+	struct pci_dev *dev;
+	int res, io_region = PCI_BRIDGE_RESOURCES;
+
+	/* Find a reference to the PCI bus */
+	pm_pci_bus = pci_find_next_bus(NULL);
+	if (!pm_pci_bus) {
+		pr_warn("malta-pm: failed to find reference to PCI bus\n");
+		return -ENODEV;
+	}
+
+	/* Find the PIIX4 PM device */
+	dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			     PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID,
+			     PCI_ANY_ID, NULL);
+	if (!dev) {
+		pr_warn("malta-pm: failed to find PIIX4 PM\n");
+		return -ENODEV;
+	}
+
+	/* Request access to the PIIX4 PM IO registers */
+	res = pci_request_region(dev, io_region, "PIIX4 PM IO registers");
+	if (res) {
+		pr_warn("malta-pm: failed to request PM IO registers (%d)\n",
+			res);
+		pci_dev_put(dev);
+		return -ENODEV;
+	}
+
+	/* Find the offset to the PIIX4 PM IO registers */
+	pm_io_offset = pci_resource_start(dev, io_region);
+
+	pci_dev_put(dev);
+	return 0;
+}
+
+late_initcall(malta_pm_setup);

diff --git a/arch/mips/mti-malta/malta-reset.c b/arch/mips/mti-malta/malta-reset.c
index d627d4b..2fd2cc2 100644
--- a/arch/mips/mti-malta/malta-reset.c
+++ b/arch/mips/mti-malta/malta-reset.c

@@ -10,6 +10,7 @@
 #include <linux/pm.h>
 
 #include <asm/reboot.h>
+#include <asm/mach-malta/malta-pm.h>
 
 #define SOFTRES_REG	0x1f000500
 #define GORESET		0x42
@@ -24,17 +25,22 @@
 
 static void mips_machine_halt(void)
 {
-	unsigned int __iomem *softres_reg =
-		ioremap(SOFTRES_REG, sizeof(unsigned int));
+	while (true);
+}
 
-	__raw_writel(GORESET, softres_reg);
+static void mips_machine_power_off(void)
+{
+	mips_pm_suspend(PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_SOFF);
+
+	pr_info("Failed to power down, resetting\n");
+	mips_machine_restart(NULL);
 }
 
 static int __init mips_reboot_setup(void)
 {
 	_machine_restart = mips_machine_restart;
 	_machine_halt = mips_machine_halt;
-	pm_power_off = mips_machine_halt;
+	pm_power_off = mips_machine_power_off;
 
 	return 0;
 }

diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index bf62151..db7c9e5 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c

@@ -77,11 +77,7 @@
 	return "MIPS Malta";
 }
 
-#if defined(CONFIG_MIPS_MT_SMTC)
-const char display_string[] = "	      SMTC LINUX ON MALTA	";
-#else
 const char display_string[] = "	       LINUX ON MALTA	    ";
-#endif /* CONFIG_MIPS_MT_SMTC */
 
 #ifdef CONFIG_BLK_DEV_FD
 static void __init fd_activate(void)

diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
deleted file mode 100644
index c484990..0000000
--- a/arch/mips/mti-malta/malta-smtc.c
+++ /dev/null

@@ -1,162 +0,0 @@
-/*
- * Malta Platform-specific hooks for SMP operation
- */
-#include <linux/irq.h>
-#include <linux/init.h>
-
-#include <asm/mipsregs.h>
-#include <asm/mipsmtregs.h>
-#include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
-
-/* VPE/SMP Prototype implements platform interfaces directly */
-
-/*
- * Cause the specified action to be performed on a targeted "CPU"
- */
-
-static void msmtc_send_ipi_single(int cpu, unsigned int action)
-{
-	/* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
-	smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
-}
-
-static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action)
-{
-	unsigned int i;
-
-	for_each_cpu(i, mask)
-		msmtc_send_ipi_single(i, action);
-}
-
-/*
- * Post-config but pre-boot cleanup entry point
- */
-static void msmtc_init_secondary(void)
-{
-	int myvpe;
-
-	/* Don't enable Malta I/O interrupts (IP2) for secondary VPEs */
-	myvpe = read_c0_tcbind() & TCBIND_CURVPE;
-	if (myvpe != 0) {
-		/* Ideally, this should be done only once per VPE, but... */
-		clear_c0_status(ST0_IM);
-		set_c0_status((0x100 << cp0_compare_irq)
-				| (0x100 << MIPS_CPU_IPI_IRQ));
-		if (cp0_perfcount_irq >= 0)
-			set_c0_status(0x100 << cp0_perfcount_irq);
-	}
-
-	smtc_init_secondary();
-}
-
-/*
- * Platform "CPU" startup hook
- */
-static void msmtc_boot_secondary(int cpu, struct task_struct *idle)
-{
-	smtc_boot_secondary(cpu, idle);
-}
-
-/*
- * SMP initialization finalization entry point
- */
-static void msmtc_smp_finish(void)
-{
-	smtc_smp_finish();
-}
-
-/*
- * Hook for after all CPUs are online
- */
-
-static void msmtc_cpus_done(void)
-{
-}
-
-/*
- * Platform SMP pre-initialization
- *
- * As noted above, we can assume a single CPU for now
- * but it may be multithreaded.
- */
-
-static void __init msmtc_smp_setup(void)
-{
-	/*
-	 * we won't get the definitive value until
-	 * we've run smtc_prepare_cpus later, but
-	 * we would appear to need an upper bound now.
-	 */
-	smp_num_siblings = smtc_build_cpu_map(0);
-}
-
-static void __init msmtc_prepare_cpus(unsigned int max_cpus)
-{
-	smtc_prepare_cpus(max_cpus);
-}
-
-struct plat_smp_ops msmtc_smp_ops = {
-	.send_ipi_single	= msmtc_send_ipi_single,
-	.send_ipi_mask		= msmtc_send_ipi_mask,
-	.init_secondary		= msmtc_init_secondary,
-	.smp_finish		= msmtc_smp_finish,
-	.cpus_done		= msmtc_cpus_done,
-	.boot_secondary		= msmtc_boot_secondary,
-	.smp_setup		= msmtc_smp_setup,
-	.prepare_cpus		= msmtc_prepare_cpus,
-};
-
-#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-/*
- * IRQ affinity hook
- */
-
-
-int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
-			  bool force)
-{
-	cpumask_t tmask;
-	int cpu = 0;
-	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
-
-	/*
-	 * On the legacy Malta development board, all I/O interrupts
-	 * are routed through the 8259 and combined in a single signal
-	 * to the CPU daughterboard, and on the CoreFPGA2/3 34K models,
-	 * that signal is brought to IP2 of both VPEs. To avoid racing
-	 * concurrent interrupt service events, IP2 is enabled only on
-	 * one VPE, by convention VPE0.	 So long as no bits are ever
-	 * cleared in the affinity mask, there will never be any
-	 * interrupt forwarding.  But as soon as a program or operator
-	 * sets affinity for one of the related IRQs, we need to make
-	 * sure that we don't ever try to forward across the VPE boundary,
-	 * at least not until we engineer a system where the interrupt
-	 * _ack() or _end() function can somehow know that it corresponds
-	 * to an interrupt taken on another VPE, and perform the appropriate
-	 * restoration of Status.IM state using MFTR/MTTR instead of the
-	 * normal local behavior. We also ensure that no attempt will
-	 * be made to forward to an offline "CPU".
-	 */
-
-	cpumask_copy(&tmask, affinity);
-	for_each_cpu(cpu, affinity) {
-		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
-			cpu_clear(cpu, tmask);
-	}
-	cpumask_copy(d->affinity, &tmask);
-
-	if (cpus_empty(tmask))
-		/*
-		 * We could restore a default mask here, but the
-		 * runtime code can anyway deal with the null set
-		 */
-		printk(KERN_WARNING
-		       "IRQ affinity leaves no legal CPU for IRQ %d\n", d->irq);
-
-	/* Do any generic SMTC IRQ affinity setup */
-	smtc_set_irq_affinity(d->irq, tmask);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */

diff --git a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c b/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
index b921e5e..80fe194 100644
--- a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
+++ b/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c

@@ -312,16 +312,13 @@
 
 	pr_debug("i2c_platform_probe\n");
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		ret = -ENODEV;
-		goto out;
-	}
+	if (!r)
+		return -ENODEV;
 
-	priv = kzalloc(sizeof(struct i2c_platform_data), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct i2c_platform_data),
+			    GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
 	/* FIXME: need to allocate resource in PIC32 space */
 #if 0
@@ -330,10 +327,8 @@
 #else
 	priv->base = r->start;
 #endif
-	if (!priv->base) {
-		ret = -EBUSY;
-		goto out_mem;
-	}
+	if (!priv->base)
+		return -EBUSY;
 
 	priv->xfer_timeout = 200;
 	priv->ack_timeout = 200;
@@ -348,17 +343,13 @@
 	i2c_platform_setup(priv);
 
 	ret = i2c_add_numbered_adapter(&priv->adap);
-	if (ret == 0) {
-		platform_set_drvdata(pdev, priv);
-		return 0;
+	if (ret) {
+		i2c_platform_disable(priv);
+		return ret;
 	}
 
-	i2c_platform_disable(priv);
-
-out_mem:
-	kfree(priv);
-out:
-	return ret;
+	platform_set_drvdata(pdev, priv);
+	return 0;
 }
 
 static int i2c_platform_remove(struct platform_device *pdev)
@@ -369,7 +360,6 @@
 	platform_set_drvdata(pdev, NULL);
 	i2c_del_adapter(&priv->adap);
 	i2c_platform_disable(priv);
-	kfree(priv);
 	return 0;
 }
 

diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
new file mode 100644
index 0000000..ae74b3a
--- /dev/null
+++ b/arch/mips/net/Makefile

@@ -0,0 +1,3 @@
+# MIPS networking code
+
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
new file mode 100644
index 0000000..a67b975
--- /dev/null
+++ b/arch/mips/net/bpf_jit.c

@@ -0,0 +1,1399 @@
+/*
+ * Just-In-Time compiler for BPF filters on MIPS
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <linux/kconfig.h>
+#include <linux/moduleloader.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-features.h>
+#include <asm/uasm.h>
+
+#include "bpf_jit.h"
+
+/* ABI
+ *
+ * s0	1st scratch register
+ * s1	2nd scratch register
+ * s2	offset register
+ * s3	BPF register A
+ * s4	BPF register X
+ * s5	*skb
+ * s6	*scratch memory
+ *
+ * On entry (*bpf_func)(*skb, *filter)
+ * a0 = MIPS_R_A0 = skb;
+ * a1 = MIPS_R_A1 = filter;
+ *
+ * Stack
+ * ...
+ * M[15]
+ * M[14]
+ * M[13]
+ * ...
+ * M[0] <-- r_M
+ * saved reg k-1
+ * saved reg k-2
+ * ...
+ * saved reg 0 <-- r_sp
+ * <no argument area>
+ *
+ *                     Packet layout
+ *
+ * <--------------------- len ------------------------>
+ * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
+ * ----------------------------------------------------
+ * |                  skb->data                       |
+ * ----------------------------------------------------
+ */
+
+#define RSIZE	(sizeof(unsigned long))
+#define ptr typeof(unsigned long)
+
+/* ABI specific return values */
+#ifdef CONFIG_32BIT /* O32 */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define r_err	MIPS_R_V1
+#define r_val	MIPS_R_V0
+#else /* CONFIG_CPU_LITTLE_ENDIAN */
+#define r_err	MIPS_R_V0
+#define r_val	MIPS_R_V1
+#endif
+#else /* N64 */
+#define r_err	MIPS_R_V0
+#define r_val	MIPS_R_V0
+#endif
+
+#define r_ret	MIPS_R_V0
+
+/*
+ * Use 2 scratch registers to avoid pipeline interlocks.
+ * There is no overhead during epilogue and prologue since
+ * any of the $s0-$s6 registers will only be preserved if
+ * they are going to actually be used.
+ */
+#define r_s0		MIPS_R_S0 /* scratch reg 1 */
+#define r_s1		MIPS_R_S1 /* scratch reg 2 */
+#define r_off		MIPS_R_S2
+#define r_A		MIPS_R_S3
+#define r_X		MIPS_R_S4
+#define r_skb		MIPS_R_S5
+#define r_M		MIPS_R_S6
+#define r_tmp_imm	MIPS_R_T6 /* No need to preserve this */
+#define r_tmp		MIPS_R_T7 /* No need to preserve this */
+#define r_zero		MIPS_R_ZERO
+#define r_sp		MIPS_R_SP
+#define r_ra		MIPS_R_RA
+
+#define SCRATCH_OFF(k)		(4 * (k))
+
+/* JIT flags */
+#define SEEN_CALL		(1 << BPF_MEMWORDS)
+#define SEEN_SREG_SFT		(BPF_MEMWORDS + 1)
+#define SEEN_SREG_BASE		(1 << SEEN_SREG_SFT)
+#define SEEN_SREG(x)		(SEEN_SREG_BASE << (x))
+#define SEEN_S0			SEEN_SREG(0)
+#define SEEN_S1			SEEN_SREG(1)
+#define SEEN_OFF		SEEN_SREG(2)
+#define SEEN_A			SEEN_SREG(3)
+#define SEEN_X			SEEN_SREG(4)
+#define SEEN_SKB		SEEN_SREG(5)
+#define SEEN_MEM		SEEN_SREG(6)
+
+/* Arguments used by JIT */
+#define ARGS_USED_BY_JIT	2 /* only applicable to 64-bit */
+
+#define FLAG_NEED_X_RESET	(1 << 0)
+
+#define SBIT(x)			(1 << (x)) /* Signed version of BIT() */
+
+/**
+ * struct jit_ctx - JIT context
+ * @skf:		The sk_filter
+ * @prologue_bytes:	Number of bytes for prologue
+ * @idx:		Instruction index
+ * @flags:		JIT flags
+ * @offsets:		Instruction offsets
+ * @target:		Memory location for the compiled filter
+ */
+struct jit_ctx {
+	const struct sk_filter *skf;
+	unsigned int prologue_bytes;
+	u32 idx;
+	u32 flags;
+	u32 *offsets;
+	u32 *target;
+};
+
+
+static inline int optimize_div(u32 *k)
+{
+	/* power of 2 divides can be implemented with right shift */
+	if (!(*k & (*k-1))) {
+		*k = ilog2(*k);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Simply emit the instruction if the JIT memory space has been allocated */
+#define emit_instr(ctx, func, ...)			\
+do {							\
+	if ((ctx)->target != NULL) {			\
+		u32 *p = &(ctx)->target[ctx->idx];	\
+		uasm_i_##func(&p, ##__VA_ARGS__);	\
+	}						\
+	(ctx)->idx++;					\
+} while (0)
+
+/* Determine if immediate is within the 16-bit signed range */
+static inline bool is_range16(s32 imm)
+{
+	if (imm >= SBIT(15) || imm < -SBIT(15))
+		return true;
+	return false;
+}
+
+static inline void emit_addu(unsigned int dst, unsigned int src1,
+			     unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, addu, dst, src1, src2);
+}
+
+static inline void emit_nop(struct jit_ctx *ctx)
+{
+	emit_instr(ctx, nop);
+}
+
+/* Load a u32 immediate to a register */
+static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
+{
+	if (ctx->target != NULL) {
+		/* addiu can only handle s16 */
+		if (is_range16(imm)) {
+			u32 *p = &ctx->target[ctx->idx];
+			uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
+			p = &ctx->target[ctx->idx + 1];
+			uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
+		} else {
+			u32 *p = &ctx->target[ctx->idx];
+			uasm_i_addiu(&p, dst, r_zero, imm);
+		}
+	}
+	ctx->idx++;
+
+	if (is_range16(imm))
+		ctx->idx++;
+}
+
+static inline void emit_or(unsigned int dst, unsigned int src1,
+			   unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, or, dst, src1, src2);
+}
+
+static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
+			    struct jit_ctx *ctx)
+{
+	if (imm >= BIT(16)) {
+		emit_load_imm(r_tmp, imm, ctx);
+		emit_or(dst, src, r_tmp, ctx);
+	} else {
+		emit_instr(ctx, ori, dst, src, imm);
+	}
+}
+
+
+static inline void emit_daddu(unsigned int dst, unsigned int src1,
+			      unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, daddu, dst, src1, src2);
+}
+
+static inline void emit_daddiu(unsigned int dst, unsigned int src,
+			       int imm, struct jit_ctx *ctx)
+{
+	/*
+	 * Only used for stack, so the imm is relatively small
+	 * and it fits in 15-bits
+	 */
+	emit_instr(ctx, daddiu, dst, src, imm);
+}
+
+static inline void emit_addiu(unsigned int dst, unsigned int src,
+			      u32 imm, struct jit_ctx *ctx)
+{
+	if (is_range16(imm)) {
+		emit_load_imm(r_tmp, imm, ctx);
+		emit_addu(dst, r_tmp, src, ctx);
+	} else {
+		emit_instr(ctx, addiu, dst, src, imm);
+	}
+}
+
+static inline void emit_and(unsigned int dst, unsigned int src1,
+			    unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, and, dst, src1, src2);
+}
+
+static inline void emit_andi(unsigned int dst, unsigned int src,
+			     u32 imm, struct jit_ctx *ctx)
+{
+	/* If imm does not fit in u16 then load it to register */
+	if (imm >= BIT(16)) {
+		emit_load_imm(r_tmp, imm, ctx);
+		emit_and(dst, src, r_tmp, ctx);
+	} else {
+		emit_instr(ctx, andi, dst, src, imm);
+	}
+}
+
+static inline void emit_xor(unsigned int dst, unsigned int src1,
+			    unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, xor, dst, src1, src2);
+}
+
+static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
+{
+	/* If imm does not fit in u16 then load it to register */
+	if (imm >= BIT(16)) {
+		emit_load_imm(r_tmp, imm, ctx);
+		emit_xor(dst, src, r_tmp, ctx);
+	} else {
+		emit_instr(ctx, xori, dst, src, imm);
+	}
+}
+
+static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
+{
+	if (config_enabled(CONFIG_64BIT))
+		emit_instr(ctx, daddiu, r_sp, r_sp, offset);
+	else
+		emit_instr(ctx, addiu, r_sp, r_sp, offset);
+
+}
+
+static inline void emit_subu(unsigned int dst, unsigned int src1,
+			     unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, subu, dst, src1, src2);
+}
+
+static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
+{
+	emit_subu(reg, r_zero, reg, ctx);
+}
+
+static inline void emit_sllv(unsigned int dst, unsigned int src,
+			     unsigned int sa, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, sllv, dst, src, sa);
+}
+
+static inline void emit_sll(unsigned int dst, unsigned int src,
+			    unsigned int sa, struct jit_ctx *ctx)
+{
+	/* sa is 5-bits long */
+	BUG_ON(sa >= BIT(5));
+	emit_instr(ctx, sll, dst, src, sa);
+}
+
+static inline void emit_srlv(unsigned int dst, unsigned int src,
+			     unsigned int sa, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, srlv, dst, src, sa);
+}
+
+static inline void emit_srl(unsigned int dst, unsigned int src,
+			    unsigned int sa, struct jit_ctx *ctx)
+{
+	/* sa is 5-bits long */
+	BUG_ON(sa >= BIT(5));
+	emit_instr(ctx, srl, dst, src, sa);
+}
+
+static inline void emit_sltu(unsigned int dst, unsigned int src1,
+			     unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, sltu, dst, src1, src2);
+}
+
+static inline void emit_sltiu(unsigned dst, unsigned int src,
+			      unsigned int imm, struct jit_ctx *ctx)
+{
+	/* 16 bit immediate */
+	if (is_range16((s32)imm)) {
+		emit_load_imm(r_tmp, imm, ctx);
+		emit_sltu(dst, src, r_tmp, ctx);
+	} else {
+		emit_instr(ctx, sltiu, dst, src, imm);
+	}
+
+}
+
+/* Store register on the stack */
+static inline void emit_store_stack_reg(ptr reg, ptr base,
+					unsigned int offset,
+					struct jit_ctx *ctx)
+{
+	if (config_enabled(CONFIG_64BIT))
+		emit_instr(ctx, sd, reg, offset, base);
+	else
+		emit_instr(ctx, sw, reg, offset, base);
+}
+
+static inline void emit_store(ptr reg, ptr base, unsigned int offset,
+			      struct jit_ctx *ctx)
+{
+	emit_instr(ctx, sw, reg, offset, base);
+}
+
+static inline void emit_load_stack_reg(ptr reg, ptr base,
+				       unsigned int offset,
+				       struct jit_ctx *ctx)
+{
+	if (config_enabled(CONFIG_64BIT))
+		emit_instr(ctx, ld, reg, offset, base);
+	else
+		emit_instr(ctx, lw, reg, offset, base);
+}
+
+static inline void emit_load(unsigned int reg, unsigned int base,
+			     unsigned int offset, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, lw, reg, offset, base);
+}
+
+static inline void emit_load_byte(unsigned int reg, unsigned int base,
+				  unsigned int offset, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, lb, reg, offset, base);
+}
+
+static inline void emit_half_load(unsigned int reg, unsigned int base,
+				  unsigned int offset, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, lh, reg, offset, base);
+}
+
+static inline void emit_mul(unsigned int dst, unsigned int src1,
+			    unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, mul, dst, src1, src2);
+}
+
+static inline void emit_div(unsigned int dst, unsigned int src,
+			    struct jit_ctx *ctx)
+{
+	if (ctx->target != NULL) {
+		u32 *p = &ctx->target[ctx->idx];
+		uasm_i_divu(&p, dst, src);
+		p = &ctx->target[ctx->idx + 1];
+		uasm_i_mfhi(&p, dst);
+	}
+	ctx->idx += 2; /* 2 insts */
+}
+
+static inline void emit_mod(unsigned int dst, unsigned int src,
+			    struct jit_ctx *ctx)
+{
+	if (ctx->target != NULL) {
+		u32 *p = &ctx->target[ctx->idx];
+		uasm_i_divu(&p, dst, src);
+		p = &ctx->target[ctx->idx + 1];
+		uasm_i_mflo(&p, dst);
+	}
+	ctx->idx += 2; /* 2 insts */
+}
+
+static inline void emit_dsll(unsigned int dst, unsigned int src,
+			     unsigned int sa, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, dsll, dst, src, sa);
+}
+
+static inline void emit_dsrl32(unsigned int dst, unsigned int src,
+			       unsigned int sa, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, dsrl32, dst, src, sa);
+}
+
+static inline void emit_wsbh(unsigned int dst, unsigned int src,
+			     struct jit_ctx *ctx)
+{
+	emit_instr(ctx, wsbh, dst, src);
+}
+
+/* load a function pointer to register */
+static inline void emit_load_func(unsigned int reg, ptr imm,
+				  struct jit_ctx *ctx)
+{
+	if (config_enabled(CONFIG_64BIT)) {
+		/* At this point imm is always 64-bit */
+		emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
+		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
+		emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
+		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
+		emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
+	} else {
+		emit_load_imm(reg, imm, ctx);
+	}
+}
+
+/* Move to real MIPS register */
+static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
+{
+	if (config_enabled(CONFIG_64BIT))
+		emit_daddu(dst, src, r_zero, ctx);
+	else
+		emit_addu(dst, src, r_zero, ctx);
+}
+
+/* Move to JIT (32-bit) register */
+static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
+{
+	emit_addu(dst, src, r_zero, ctx);
+}
+
+/* Compute the immediate value for PC-relative branches. */
+static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
+{
+	if (ctx->target == NULL)
+		return 0;
+
+	/*
+	 * We want a pc-relative branch. We only do forward branches
+	 * so tgt is always after pc. tgt is the instruction offset
+	 * we want to jump to.
+
+	 * Branch on MIPS:
+	 * I: target_offset <- sign_extend(offset)
+	 * I+1: PC += target_offset (delay slot)
+	 *
+	 * ctx->idx currently points to the branch instruction
+	 * but the offset is added to the delay slot so we need
+	 * to subtract 4.
+	 */
+	return ctx->offsets[tgt] -
+		(ctx->idx * 4 - ctx->prologue_bytes) - 4;
+}
+
+static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
+			     unsigned int imm, struct jit_ctx *ctx)
+{
+	if (ctx->target != NULL) {
+		u32 *p = &ctx->target[ctx->idx];
+
+		switch (cond) {
+		case MIPS_COND_EQ:
+			uasm_i_beq(&p, reg1, reg2, imm);
+			break;
+		case MIPS_COND_NE:
+			uasm_i_bne(&p, reg1, reg2, imm);
+			break;
+		case MIPS_COND_ALL:
+			uasm_i_b(&p, imm);
+			break;
+		default:
+			pr_warn("%s: Unhandled branch conditional: %d\n",
+				__func__, cond);
+		}
+	}
+	ctx->idx++;
+}
+
+static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
+{
+	emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
+}
+
+static inline void emit_jalr(unsigned int link, unsigned int reg,
+			     struct jit_ctx *ctx)
+{
+	emit_instr(ctx, jalr, link, reg);
+}
+
+static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, jr, reg);
+}
+
+static inline u16 align_sp(unsigned int num)
+{
+	/* Double word alignment for 32-bit, quadword for 64-bit */
+	unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
+	num = (num + (align - 1)) & -align;
+	return num;
+}
+
+static inline void update_on_xread(struct jit_ctx *ctx)
+{
+	if (!(ctx->flags & SEEN_X))
+		ctx->flags |= FLAG_NEED_X_RESET;
+
+	ctx->flags |= SEEN_X;
+}
+
+static bool is_load_to_a(u16 inst)
+{
+	switch (inst) {
+	case BPF_S_LD_W_LEN:
+	case BPF_S_LD_W_ABS:
+	case BPF_S_LD_H_ABS:
+	case BPF_S_LD_B_ABS:
+	case BPF_S_ANC_CPU:
+	case BPF_S_ANC_IFINDEX:
+	case BPF_S_ANC_MARK:
+	case BPF_S_ANC_PROTOCOL:
+	case BPF_S_ANC_RXHASH:
+	case BPF_S_ANC_VLAN_TAG:
+	case BPF_S_ANC_VLAN_TAG_PRESENT:
+	case BPF_S_ANC_QUEUE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
+{
+	int i = 0, real_off = 0;
+	u32 sflags, tmp_flags;
+
+	/* Adjust the stack pointer */
+	emit_stack_offset(-align_sp(offset), ctx);
+
+	if (ctx->flags & SEEN_CALL) {
+		/* Argument save area */
+		if (config_enabled(CONFIG_64BIT))
+			/* Bottom of current frame */
+			real_off = align_sp(offset) - RSIZE;
+		else
+			/* Top of previous frame */
+			real_off = align_sp(offset) + RSIZE;
+		emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
+		emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
+
+		real_off = 0;
+	}
+
+	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
+	/* sflags is essentially a bitmap */
+	while (tmp_flags) {
+		if ((sflags >> i) & 0x1) {
+			emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
+					     ctx);
+			real_off += RSIZE;
+		}
+		i++;
+		tmp_flags >>= 1;
+	}
+
+	/* save return address */
+	if (ctx->flags & SEEN_CALL) {
+		emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
+		real_off += RSIZE;
+	}
+
+	/* Setup r_M leaving the alignment gap if necessary */
+	if (ctx->flags & SEEN_MEM) {
+		if (real_off % (RSIZE * 2))
+			real_off += RSIZE;
+		emit_addiu(r_M, r_sp, real_off, ctx);
+	}
+}
+
+static void restore_bpf_jit_regs(struct jit_ctx *ctx,
+				 unsigned int offset)
+{
+	int i, real_off = 0;
+	u32 sflags, tmp_flags;
+
+	if (ctx->flags & SEEN_CALL) {
+		if (config_enabled(CONFIG_64BIT))
+			/* Bottom of current frame */
+			real_off = align_sp(offset) - RSIZE;
+		else
+			/* Top of previous frame */
+			real_off = align_sp(offset) + RSIZE;
+		emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
+		emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
+
+		real_off = 0;
+	}
+
+	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
+	/* sflags is a bitmap */
+	i = 0;
+	while (tmp_flags) {
+		if ((sflags >> i) & 0x1) {
+			emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
+					    ctx);
+			real_off += RSIZE;
+		}
+		i++;
+		tmp_flags >>= 1;
+	}
+
+	/* restore return address */
+	if (ctx->flags & SEEN_CALL)
+		emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
+
+	/* Restore the sp and discard the scrach memory */
+	emit_stack_offset(align_sp(offset), ctx);
+}
+
+static unsigned int get_stack_depth(struct jit_ctx *ctx)
+{
+	int sp_off = 0;
+
+
+	/* How may s* regs do we need to preserved? */
+	sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
+
+	if (ctx->flags & SEEN_MEM)
+		sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
+
+	if (ctx->flags & SEEN_CALL)
+		/*
+		 * The JIT code make calls to external functions using 2
+		 * arguments. Therefore, for o32 we don't need to allocate
+		 * space because we don't care if the argumetns are lost
+		 * across calls. We do need however to preserve incoming
+		 * arguments but the space is already allocated for us by
+		 * the caller. On the other hand, for n64, we need to allocate
+		 * this space ourselves. We need to preserve $ra as well.
+		 */
+		sp_off += config_enabled(CONFIG_64BIT) ?
+			(ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
+
+	/*
+	 * Subtract the bytes for the last registers since we only care about
+	 * the location on the stack pointer.
+	 */
+	return sp_off - RSIZE;
+}
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+	u16 first_inst = ctx->skf->insns[0].code;
+	int sp_off;
+
+	/* Calculate the total offset for the stack pointer */
+	sp_off = get_stack_depth(ctx);
+	save_bpf_jit_regs(ctx, sp_off);
+
+	if (ctx->flags & SEEN_SKB)
+		emit_reg_move(r_skb, MIPS_R_A0, ctx);
+
+	if (ctx->flags & FLAG_NEED_X_RESET)
+		emit_jit_reg_move(r_X, r_zero, ctx);
+
+	/* Do not leak kernel data to userspace */
+	if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst)))
+		emit_jit_reg_move(r_A, r_zero, ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	unsigned int sp_off;
+
+	/* Calculate the total offset for the stack pointer */
+
+	sp_off = get_stack_depth(ctx);
+	restore_bpf_jit_regs(ctx, sp_off);
+
+	/* Return */
+	emit_jr(r_ra, ctx);
+	emit_nop(ctx);
+}
+
+static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
+{
+	u8 ret;
+	int err;
+
+	err = skb_copy_bits(skb, offset, &ret, 1);
+
+	return (u64)err << 32 | ret;
+}
+
+static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
+{
+	u16 ret;
+	int err;
+
+	err = skb_copy_bits(skb, offset, &ret, 2);
+
+	return (u64)err << 32 | ntohs(ret);
+}
+
+static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
+{
+	u32 ret;
+	int err;
+
+	err = skb_copy_bits(skb, offset, &ret, 4);
+
+	return (u64)err << 32 | ntohl(ret);
+}
+
+#define PKT_TYPE_MAX 7
+static int pkt_type_offset(void)
+{
+	struct sk_buff skb_probe = {
+		.pkt_type = ~0,
+	};
+	char *ct = (char *)&skb_probe;
+	unsigned int off;
+
+	for (off = 0; off < sizeof(struct sk_buff); off++) {
+		if (ct[off] == PKT_TYPE_MAX)
+			return off;
+	}
+	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
+	return -1;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
+	const struct sk_filter *prog = ctx->skf;
+	const struct sock_filter *inst;
+	unsigned int i, off, load_order, condt;
+	u32 k, b_off __maybe_unused;
+
+	for (i = 0; i < prog->len; i++) {
+		inst = &(prog->insns[i]);
+		pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
+			 __func__, inst->code, inst->jt, inst->jf, inst->k);
+		k = inst->k;
+
+		if (ctx->target == NULL)
+			ctx->offsets[i] = ctx->idx * 4;
+
+		switch (inst->code) {
+		case BPF_S_LD_IMM:
+			/* A <- k ==> li r_A, k */
+			ctx->flags |= SEEN_A;
+			emit_load_imm(r_A, k, ctx);
+			break;
+		case BPF_S_LD_W_LEN:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+			/* A <- len ==> lw r_A, offset(skb) */
+			ctx->flags |= SEEN_SKB | SEEN_A;
+			off = offsetof(struct sk_buff, len);
+			emit_load(r_A, r_skb, off, ctx);
+			break;
+		case BPF_S_LD_MEM:
+			/* A <- M[k] ==> lw r_A, offset(M) */
+			ctx->flags |= SEEN_MEM | SEEN_A;
+			emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
+			break;
+		case BPF_S_LD_W_ABS:
+			/* A <- P[k:4] */
+			load_order = 2;
+			goto load;
+		case BPF_S_LD_H_ABS:
+			/* A <- P[k:2] */
+			load_order = 1;
+			goto load;
+		case BPF_S_LD_B_ABS:
+			/* A <- P[k:1] */
+			load_order = 0;
+load:
+			emit_load_imm(r_off, k, ctx);
+load_common:
+			ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
+				SEEN_SKB | SEEN_A;
+
+			emit_load_func(r_s0, (ptr)load_func[load_order],
+				      ctx);
+			emit_reg_move(MIPS_R_A0, r_skb, ctx);
+			emit_jalr(MIPS_R_RA, r_s0, ctx);
+			/* Load second argument to delay slot */
+			emit_reg_move(MIPS_R_A1, r_off, ctx);
+			/* Check the error value */
+			if (config_enabled(CONFIG_64BIT)) {
+				/* Get error code from the top 32-bits */
+				emit_dsrl32(r_s0, r_val, 0, ctx);
+				/* Branch to 3 instructions ahead */
+				emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
+					   ctx);
+			} else {
+				/* Branch to 3 instructions ahead */
+				emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
+					   ctx);
+			}
+			emit_nop(ctx);
+			/* We are good */
+			emit_b(b_imm(i + 1, ctx), ctx);
+			emit_jit_reg_move(r_A, r_val, ctx);
+			/* Return with error */
+			emit_b(b_imm(prog->len, ctx), ctx);
+			emit_reg_move(r_ret, r_zero, ctx);
+			break;
+		case BPF_S_LD_W_IND:
+			/* A <- P[X + k:4] */
+			load_order = 2;
+			goto load_ind;
+		case BPF_S_LD_H_IND:
+			/* A <- P[X + k:2] */
+			load_order = 1;
+			goto load_ind;
+		case BPF_S_LD_B_IND:
+			/* A <- P[X + k:1] */
+			load_order = 0;
+load_ind:
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_OFF | SEEN_X;
+			emit_addiu(r_off, r_X, k, ctx);
+			goto load_common;
+		case BPF_S_LDX_IMM:
+			/* X <- k */
+			ctx->flags |= SEEN_X;
+			emit_load_imm(r_X, k, ctx);
+			break;
+		case BPF_S_LDX_MEM:
+			/* X <- M[k] */
+			ctx->flags |= SEEN_X | SEEN_MEM;
+			emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
+			break;
+		case BPF_S_LDX_W_LEN:
+			/* X <- len */
+			ctx->flags |= SEEN_X | SEEN_SKB;
+			off = offsetof(struct sk_buff, len);
+			emit_load(r_X, r_skb, off, ctx);
+			break;
+		case BPF_S_LDX_B_MSH:
+			/* X <- 4 * (P[k:1] & 0xf) */
+			ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
+			/* Load offset to a1 */
+			emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
+			/*
+			 * This may emit two instructions so it may not fit
+			 * in the delay slot. So use a0 in the delay slot.
+			 */
+			emit_load_imm(MIPS_R_A1, k, ctx);
+			emit_jalr(MIPS_R_RA, r_s0, ctx);
+			emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
+			/* Check the error value */
+			if (config_enabled(CONFIG_64BIT)) {
+				/* Top 32-bits of $v0 on 64-bit */
+				emit_dsrl32(r_s0, r_val, 0, ctx);
+				emit_bcond(MIPS_COND_NE, r_s0, r_zero,
+					   3 << 2, ctx);
+			} else {
+				emit_bcond(MIPS_COND_NE, r_err, r_zero,
+					   3 << 2, ctx);
+			}
+			/* No need for delay slot */
+			/* We are good */
+			/* X <- P[1:K] & 0xf */
+			emit_andi(r_X, r_val, 0xf, ctx);
+			/* X << 2 */
+			emit_b(b_imm(i + 1, ctx), ctx);
+			emit_sll(r_X, r_X, 2, ctx); /* delay slot */
+			/* Return with error */
+			emit_b(b_imm(prog->len, ctx), ctx);
+			emit_load_imm(r_ret, 0, ctx); /* delay slot */
+			break;
+		case BPF_S_ST:
+			/* M[k] <- A */
+			ctx->flags |= SEEN_MEM | SEEN_A;
+			emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
+			break;
+		case BPF_S_STX:
+			/* M[k] <- X */
+			ctx->flags |= SEEN_MEM | SEEN_X;
+			emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
+			break;
+		case BPF_S_ALU_ADD_K:
+			/* A += K */
+			ctx->flags |= SEEN_A;
+			emit_addiu(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ALU_ADD_X:
+			/* A += X */
+			ctx->flags |= SEEN_A | SEEN_X;
+			emit_addu(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_SUB_K:
+			/* A -= K */
+			ctx->flags |= SEEN_A;
+			emit_addiu(r_A, r_A, -k, ctx);
+			break;
+		case BPF_S_ALU_SUB_X:
+			/* A -= X */
+			ctx->flags |= SEEN_A | SEEN_X;
+			emit_subu(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_MUL_K:
+			/* A *= K */
+			/* Load K to scratch register before MUL */
+			ctx->flags |= SEEN_A | SEEN_S0;
+			emit_load_imm(r_s0, k, ctx);
+			emit_mul(r_A, r_A, r_s0, ctx);
+			break;
+		case BPF_S_ALU_MUL_X:
+			/* A *= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_A | SEEN_X;
+			emit_mul(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_DIV_K:
+			/* A /= k */
+			if (k == 1)
+				break;
+			if (optimize_div(&k)) {
+				ctx->flags |= SEEN_A;
+				emit_srl(r_A, r_A, k, ctx);
+				break;
+			}
+			ctx->flags |= SEEN_A | SEEN_S0;
+			emit_load_imm(r_s0, k, ctx);
+			emit_div(r_A, r_s0, ctx);
+			break;
+		case BPF_S_ALU_MOD_K:
+			/* A %= k */
+			if (k == 1 || optimize_div(&k)) {
+				ctx->flags |= SEEN_A;
+				emit_jit_reg_move(r_A, r_zero, ctx);
+			} else {
+				ctx->flags |= SEEN_A | SEEN_S0;
+				emit_load_imm(r_s0, k, ctx);
+				emit_mod(r_A, r_s0, ctx);
+			}
+			break;
+		case BPF_S_ALU_DIV_X:
+			/* A /= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_X | SEEN_A;
+			/* Check if r_X is zero */
+			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
+				   b_imm(prog->len, ctx), ctx);
+			emit_load_imm(r_val, 0, ctx); /* delay slot */
+			emit_div(r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_MOD_X:
+			/* A %= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_X | SEEN_A;
+			/* Check if r_X is zero */
+			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
+				   b_imm(prog->len, ctx), ctx);
+			emit_load_imm(r_val, 0, ctx); /* delay slot */
+			emit_mod(r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_OR_K:
+			/* A |= K */
+			ctx->flags |= SEEN_A;
+			emit_ori(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ALU_OR_X:
+			/* A |= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_A;
+			emit_ori(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_XOR_K:
+			/* A ^= k */
+			ctx->flags |= SEEN_A;
+			emit_xori(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ANC_ALU_XOR_X:
+		case BPF_S_ALU_XOR_X:
+			/* A ^= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_A;
+			emit_xor(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_AND_K:
+			/* A &= K */
+			ctx->flags |= SEEN_A;
+			emit_andi(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ALU_AND_X:
+			/* A &= X */
+			update_on_xread(ctx);
+			ctx->flags |= SEEN_A | SEEN_X;
+			emit_and(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_LSH_K:
+			/* A <<= K */
+			ctx->flags |= SEEN_A;
+			emit_sll(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ALU_LSH_X:
+			/* A <<= X */
+			ctx->flags |= SEEN_A | SEEN_X;
+			update_on_xread(ctx);
+			emit_sllv(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_RSH_K:
+			/* A >>= K */
+			ctx->flags |= SEEN_A;
+			emit_srl(r_A, r_A, k, ctx);
+			break;
+		case BPF_S_ALU_RSH_X:
+			ctx->flags |= SEEN_A | SEEN_X;
+			update_on_xread(ctx);
+			emit_srlv(r_A, r_A, r_X, ctx);
+			break;
+		case BPF_S_ALU_NEG:
+			/* A = -A */
+			ctx->flags |= SEEN_A;
+			emit_neg(r_A, ctx);
+			break;
+		case BPF_S_JMP_JA:
+			/* pc += K */
+			emit_b(b_imm(i + k + 1, ctx), ctx);
+			emit_nop(ctx);
+			break;
+		case BPF_S_JMP_JEQ_K:
+			/* pc += ( A == K ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_EQ | MIPS_COND_K;
+			goto jmp_cmp;
+		case BPF_S_JMP_JEQ_X:
+			ctx->flags |= SEEN_X;
+			/* pc += ( A == X ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_EQ | MIPS_COND_X;
+			goto jmp_cmp;
+		case BPF_S_JMP_JGE_K:
+			/* pc += ( A >= K ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_GE | MIPS_COND_K;
+			goto jmp_cmp;
+		case BPF_S_JMP_JGE_X:
+			ctx->flags |= SEEN_X;
+			/* pc += ( A >= X ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_GE | MIPS_COND_X;
+			goto jmp_cmp;
+		case BPF_S_JMP_JGT_K:
+			/* pc += ( A > K ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_GT | MIPS_COND_K;
+			goto jmp_cmp;
+		case BPF_S_JMP_JGT_X:
+			ctx->flags |= SEEN_X;
+			/* pc += ( A > X ) ? pc->jt : pc->jf */
+			condt = MIPS_COND_GT | MIPS_COND_X;
+jmp_cmp:
+			/* Greater or Equal */
+			if ((condt & MIPS_COND_GE) ||
+			    (condt & MIPS_COND_GT)) {
+				if (condt & MIPS_COND_K) { /* K */
+					ctx->flags |= SEEN_S0 | SEEN_A;
+					emit_sltiu(r_s0, r_A, k, ctx);
+				} else { /* X */
+					ctx->flags |= SEEN_S0 | SEEN_A |
+						SEEN_X;
+					emit_sltu(r_s0, r_A, r_X, ctx);
+				}
+				/* A < (K|X) ? r_scrach = 1 */
+				b_off = b_imm(i + inst->jf + 1, ctx);
+				emit_bcond(MIPS_COND_GT, r_s0, r_zero, b_off,
+					   ctx);
+				emit_nop(ctx);
+				/* A > (K|X) ? scratch = 0 */
+				if (condt & MIPS_COND_GT) {
+					/* Checking for equality */
+					ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
+					if (condt & MIPS_COND_K)
+						emit_load_imm(r_s0, k, ctx);
+					else
+						emit_jit_reg_move(r_s0, r_X,
+								  ctx);
+					b_off = b_imm(i + inst->jf + 1, ctx);
+					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
+						   b_off, ctx);
+					emit_nop(ctx);
+					/* Finally, A > K|X */
+					b_off = b_imm(i + inst->jt + 1, ctx);
+					emit_b(b_off, ctx);
+					emit_nop(ctx);
+				} else {
+					/* A >= (K|X) so jump */
+					b_off = b_imm(i + inst->jt + 1, ctx);
+					emit_b(b_off, ctx);
+					emit_nop(ctx);
+				}
+			} else {
+				/* A == K|X */
+				if (condt & MIPS_COND_K) { /* K */
+					ctx->flags |= SEEN_S0 | SEEN_A;
+					emit_load_imm(r_s0, k, ctx);
+					/* jump true */
+					b_off = b_imm(i + inst->jt + 1, ctx);
+					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
+						   b_off, ctx);
+					emit_nop(ctx);
+					/* jump false */
+					b_off = b_imm(i + inst->jf + 1,
+						      ctx);
+					emit_bcond(MIPS_COND_NE, r_A, r_s0,
+						   b_off, ctx);
+					emit_nop(ctx);
+				} else { /* X */
+					/* jump true */
+					ctx->flags |= SEEN_A | SEEN_X;
+					b_off = b_imm(i + inst->jt + 1,
+						      ctx);
+					emit_bcond(MIPS_COND_EQ, r_A, r_X,
+						   b_off, ctx);
+					emit_nop(ctx);
+					/* jump false */
+					b_off = b_imm(i + inst->jf + 1, ctx);
+					emit_bcond(MIPS_COND_NE, r_A, r_X,
+						   b_off, ctx);
+					emit_nop(ctx);
+				}
+			}
+			break;
+		case BPF_S_JMP_JSET_K:
+			ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
+			/* pc += (A & K) ? pc -> jt : pc -> jf */
+			emit_load_imm(r_s1, k, ctx);
+			emit_and(r_s0, r_A, r_s1, ctx);
+			/* jump true */
+			b_off = b_imm(i + inst->jt + 1, ctx);
+			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
+			emit_nop(ctx);
+			/* jump false */
+			b_off = b_imm(i + inst->jf + 1, ctx);
+			emit_b(b_off, ctx);
+			emit_nop(ctx);
+			break;
+		case BPF_S_JMP_JSET_X:
+			ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
+			/* pc += (A & X) ? pc -> jt : pc -> jf */
+			emit_and(r_s0, r_A, r_X, ctx);
+			/* jump true */
+			b_off = b_imm(i + inst->jt + 1, ctx);
+			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
+			emit_nop(ctx);
+			/* jump false */
+			b_off = b_imm(i + inst->jf + 1, ctx);
+			emit_b(b_off, ctx);
+			emit_nop(ctx);
+			break;
+		case BPF_S_RET_A:
+			ctx->flags |= SEEN_A;
+			if (i != prog->len - 1)
+				/*
+				 * If this is not the last instruction
+				 * then jump to the epilogue
+				 */
+				emit_b(b_imm(prog->len, ctx), ctx);
+			emit_reg_move(r_ret, r_A, ctx); /* delay slot */
+			break;
+		case BPF_S_RET_K:
+			/*
+			 * It can emit two instructions so it does not fit on
+			 * the delay slot.
+			 */
+			emit_load_imm(r_ret, k, ctx);
+			if (i != prog->len - 1) {
+				/*
+				 * If this is not the last instruction
+				 * then jump to the epilogue
+				 */
+				emit_b(b_imm(prog->len, ctx), ctx);
+				emit_nop(ctx);
+			}
+			break;
+		case BPF_S_MISC_TAX:
+			/* X = A */
+			ctx->flags |= SEEN_X | SEEN_A;
+			emit_jit_reg_move(r_X, r_A, ctx);
+			break;
+		case BPF_S_MISC_TXA:
+			/* A = X */
+			ctx->flags |= SEEN_A | SEEN_X;
+			update_on_xread(ctx);
+			emit_jit_reg_move(r_A, r_X, ctx);
+			break;
+		/* AUX */
+		case BPF_S_ANC_PROTOCOL:
+			/* A = ntohs(skb->protocol */
+			ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  protocol) != 2);
+			off = offsetof(struct sk_buff, protocol);
+			emit_half_load(r_A, r_skb, off, ctx);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			/* This needs little endian fixup */
+			if (cpu_has_mips_r2) {
+				/* R2 and later have the wsbh instruction */
+				emit_wsbh(r_A, r_A, ctx);
+			} else {
+				/* Get first byte */
+				emit_andi(r_tmp_imm, r_A, 0xff, ctx);
+				/* Shift it */
+				emit_sll(r_tmp, r_tmp_imm, 8, ctx);
+				/* Get second byte */
+				emit_srl(r_tmp_imm, r_A, 8, ctx);
+				emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
+				/* Put everyting together in r_A */
+				emit_or(r_A, r_tmp, r_tmp_imm, ctx);
+			}
+#endif
+			break;
+		case BPF_S_ANC_CPU:
+			ctx->flags |= SEEN_A | SEEN_OFF;
+			/* A = current_thread_info()->cpu */
+			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
+						  cpu) != 4);
+			off = offsetof(struct thread_info, cpu);
+			/* $28/gp points to the thread_info struct */
+			emit_load(r_A, 28, off, ctx);
+			break;
+		case BPF_S_ANC_IFINDEX:
+			/* A = skb->dev->ifindex */
+			ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
+			off = offsetof(struct sk_buff, dev);
+			emit_load(r_s0, r_skb, off, ctx);
+			/* error (0) in the delay slot */
+			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
+				   b_imm(prog->len, ctx), ctx);
+			emit_reg_move(r_ret, r_zero, ctx);
+			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+						  ifindex) != 4);
+			off = offsetof(struct net_device, ifindex);
+			emit_load(r_A, r_s0, off, ctx);
+			break;
+		case BPF_S_ANC_MARK:
+			ctx->flags |= SEEN_SKB | SEEN_A;
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+			off = offsetof(struct sk_buff, mark);
+			emit_load(r_A, r_skb, off, ctx);
+			break;
+		case BPF_S_ANC_RXHASH:
+			ctx->flags |= SEEN_SKB | SEEN_A;
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+			off = offsetof(struct sk_buff, hash);
+			emit_load(r_A, r_skb, off, ctx);
+			break;
+		case BPF_S_ANC_VLAN_TAG:
+		case BPF_S_ANC_VLAN_TAG_PRESENT:
+			ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  vlan_tci) != 2);
+			off = offsetof(struct sk_buff, vlan_tci);
+			emit_half_load(r_s0, r_skb, off, ctx);
+			if (inst->code == BPF_S_ANC_VLAN_TAG)
+				emit_and(r_A, r_s0, VLAN_VID_MASK, ctx);
+			else
+				emit_and(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
+			break;
+		case BPF_S_ANC_PKTTYPE:
+			off = pkt_type_offset();
+
+			if (off < 0)
+				return -1;
+			emit_load_byte(r_tmp, r_skb, off, ctx);
+			/* Keep only the last 3 bits */
+			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
+			break;
+		case BPF_S_ANC_QUEUE:
+			ctx->flags |= SEEN_SKB | SEEN_A;
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  queue_mapping) != 2);
+			BUILD_BUG_ON(offsetof(struct sk_buff,
+					      queue_mapping) > 0xff);
+			off = offsetof(struct sk_buff, queue_mapping);
+			emit_half_load(r_A, r_skb, off, ctx);
+			break;
+		default:
+			pr_warn("%s: Unhandled opcode: 0x%02x\n", __FILE__,
+				inst->code);
+			return -1;
+		}
+	}
+
+	/* compute offsets only during the first pass */
+	if (ctx->target == NULL)
+		ctx->offsets[i] = ctx->idx * 4;
+
+	return 0;
+}
+
+int bpf_jit_enable __read_mostly;
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	struct jit_ctx ctx;
+	unsigned int alloc_size, tmp_idx;
+
+	if (!bpf_jit_enable)
+		return;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
+	if (ctx.offsets == NULL)
+		return;
+
+	ctx.skf = fp;
+
+	if (build_body(&ctx))
+		goto out;
+
+	tmp_idx = ctx.idx;
+	build_prologue(&ctx);
+	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
+	/* just to complete the ctx.idx count */
+	build_epilogue(&ctx);
+
+	alloc_size = 4 * ctx.idx;
+	ctx.target = module_alloc(alloc_size);
+	if (ctx.target == NULL)
+		goto out;
+
+	/* Clean it */
+	memset(ctx.target, 0, alloc_size);
+
+	ctx.idx = 0;
+
+	/* Generate the actual JIT code */
+	build_prologue(&ctx);
+	build_body(&ctx);
+	build_epilogue(&ctx);
+
+	/* Update the icache */
+	flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
+
+	if (bpf_jit_enable > 1)
+		/* Dump JIT code */
+		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+
+	fp->bpf_func = (void *)ctx.target;
+	fp->jited = 1;
+
+out:
+	kfree(ctx.offsets);
+}
+
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->jited)
+		module_free(NULL, fp->bpf_func);
+	kfree(fp);
+}

diff --git a/arch/mips/net/bpf_jit.h b/arch/mips/net/bpf_jit.h
new file mode 100644
index 0000000..3a5751b
--- /dev/null
+++ b/arch/mips/net/bpf_jit.h

@@ -0,0 +1,44 @@
+/*
+ * Just-In-Time compiler for BPF filters on MIPS
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#ifndef BPF_JIT_MIPS_OP_H
+#define BPF_JIT_MIPS_OP_H
+
+/* Registers used by JIT */
+#define MIPS_R_ZERO	0
+#define MIPS_R_V0	2
+#define MIPS_R_V1	3
+#define MIPS_R_A0	4
+#define MIPS_R_A1	5
+#define MIPS_R_T6	14
+#define MIPS_R_T7	15
+#define MIPS_R_S0	16
+#define MIPS_R_S1	17
+#define MIPS_R_S2	18
+#define MIPS_R_S3	19
+#define MIPS_R_S4	20
+#define MIPS_R_S5	21
+#define MIPS_R_S6	22
+#define MIPS_R_S7	23
+#define MIPS_R_SP	29
+#define MIPS_R_RA	31
+
+/* Conditional codes */
+#define MIPS_COND_EQ	0x1
+#define MIPS_COND_GE	(0x1 << 1)
+#define MIPS_COND_GT	(0x1 << 2)
+#define MIPS_COND_NE	(0x1 << 3)
+#define MIPS_COND_ALL	(0x1 << 4)
+/* Conditionals on X register or K immediate */
+#define MIPS_COND_X	(0x1 << 5)
+#define MIPS_COND_K	(0x1 << 6)
+
+#endif /* BPF_JIT_MIPS_OP_H */

diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c
index 5afc4b7..c100b9a 100644
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c

@@ -203,6 +203,8 @@
 
 	xirq = nlm_irq_to_xirq(node, irq);
 	pic_data = irq_get_handler_data(xirq);
+	if (WARN_ON(!pic_data))
+		return;
 	pic_data->extra_ack = xack;
 }
 

diff --git a/arch/mips/netlogic/common/reset.S b/arch/mips/netlogic/common/reset.S
index b231fe1..701c4bc 100644
--- a/arch/mips/netlogic/common/reset.S
+++ b/arch/mips/netlogic/common/reset.S

@@ -35,6 +35,7 @@
 
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/cpu.h>
 #include <asm/cacheops.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
@@ -74,13 +75,25 @@
 .endm
 
 /*
+ * Allow access to physical mem >64G by enabling ELPA in PAGEGRAIN
+ * register. This is needed before going to C code since the SP can
+ * in this region. Called from all HW threads.
+ */
+.macro xlp_early_mmu_init
+	mfc0	t0, CP0_PAGEMASK, 1
+	li	t1, (1 << 29)		/* ELPA bit */
+	or	t0, t1
+	mtc0	t0, CP0_PAGEMASK, 1
+.endm
+
+/*
  * L1D cache has to be flushed before enabling threads in XLP.
  * On XLP8xx/XLP3xx, we do a low level flush using processor control
  * registers. On XLPII CPUs, usual cache instructions work.
  */
 .macro	xlp_flush_l1_dcache
 	mfc0	t0, CP0_EBASE, 0
-	andi	t0, t0, 0xff00
+	andi	t0, t0, PRID_IMP_MASK
 	slt	t1, t0, 0x1200
 	beqz	t1, 15f
 	nop
@@ -159,11 +172,15 @@
 
 1:	/* Entry point on core wakeup */
 	mfc0	t0, CP0_EBASE, 0	/* processor ID */
-	andi	t0, 0xff00
+	andi	t0, PRID_IMP_MASK
 	li	t1, 0x1500		/* XLP 9xx */
 	beq	t0, t1, 2f		/* does not need to set coherent */
 	nop
 
+	li	t1, 0x1300		/* XLP 5xx */
+	beq	t0, t1, 2f		/* does not need to set coherent */
+	nop
+
 	/* set bit in SYS coherent register for the core */
 	mfc0	t0, CP0_EBASE, 1
 	mfc0	t1, CP0_EBASE, 1
@@ -197,6 +214,9 @@
 EXPORT(nlm_boot_siblings)
 	/* core L1D flush before enable threads */
 	xlp_flush_l1_dcache
+	/* save ra and sp, will be used later (only for boot cpu) */
+	dmtc0	ra, $22, 6
+	dmtc0	sp, $22, 7
 	/* Enable hw threads by writing to MAP_THREADMODE of the core */
 	li	t0, CKSEG1ADDR(RESET_DATA_PHYS)
 	lw	t1, BOOT_THREAD_MODE(t0)	/* t1 <- thread mode */
@@ -225,6 +245,8 @@
 #endif
 	mtc0	t1, CP0_STATUS
 
+	xlp_early_mmu_init
+
 	/* mark CPU ready */
 	li	t3, CKSEG1ADDR(RESET_DATA_PHYS)
 	ADDIU	t1, t3, BOOT_CPU_READY
@@ -238,14 +260,12 @@
 	nop
 
 	/*
-	 * For the boot CPU, we have to restore registers and
-	 * return
+	 * For the boot CPU, we have to restore ra and sp and return, rest
+	 * of the registers will be restored by the caller
 	 */
-4:	dmfc0	t0, $4, 2	/* restore SP from UserLocal */
-	li	t1, 0xfadebeef
-	dmtc0	t1, $4, 2	/* restore SP from UserLocal */
-	PTR_SUBU sp, t0, PT_SIZE
-	RESTORE_ALL
+4:
+	dmfc0	ra, $22, 6
+	dmfc0	sp, $22, 7
 	jr	ra
 	nop
 EXPORT(nlm_reset_entry_end)
@@ -253,6 +273,7 @@
 LEAF(nlm_init_boot_cpu)
 #ifdef CONFIG_CPU_XLP
 	xlp_config_lsu
+	xlp_early_mmu_init
 #endif
 	jr	ra
 	nop

diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 6baae15..4fde7ac 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c

@@ -135,10 +135,6 @@
 	local_irq_enable();
 }
 
-void nlm_cpus_done(void)
-{
-}
-
 /*
  * Boot all other cpus in the system, initialize them, and bring them into
  * the boot function
@@ -198,7 +194,7 @@
 	cpumask_scnprintf(buf, ARRAY_SIZE(buf), cpu_possible_mask);
 	pr_info("Possible CPU mask: %s\n", buf);
 
-	/* check with the cores we have worken up */
+	/* check with the cores we have woken up */
 	for (ncore = 0, i = 0; i < NLM_NR_NODES; i++)
 		ncore += hweight32(nlm_get_node(i)->coremask);
 
@@ -213,6 +209,7 @@
 {
 	uint32_t core0_thr_mask, core_thr_mask;
 	int threadmode, i, j;
+	char buf[64];
 
 	core0_thr_mask = 0;
 	for (i = 0; i < NLM_THREADS_PER_CORE; i++)
@@ -247,8 +244,8 @@
 	return threadmode;
 
 unsupp:
-	panic("Unsupported CPU mask %lx",
-		(unsigned long)cpumask_bits(wakeup_mask)[0]);
+	cpumask_scnprintf(buf, ARRAY_SIZE(buf), wakeup_mask);
+	panic("Unsupported CPU mask %s", buf);
 	return 0;
 }
 
@@ -277,7 +274,6 @@
 	.send_ipi_mask		= nlm_send_ipi_mask,
 	.init_secondary		= nlm_init_secondary,
 	.smp_finish		= nlm_smp_finish,
-	.cpus_done		= nlm_cpus_done,
 	.boot_secondary		= nlm_boot_secondary,
 	.smp_setup		= nlm_smp_setup,
 	.prepare_cpus		= nlm_prepare_cpus,

diff --git a/arch/mips/netlogic/common/smpboot.S b/arch/mips/netlogic/common/smpboot.S
index 8597657..805355b 100644
--- a/arch/mips/netlogic/common/smpboot.S
+++ b/arch/mips/netlogic/common/smpboot.S

@@ -54,8 +54,9 @@
 	.set	noat
 	.set	arch=xlr		/* for mfcr/mtcr, XLR is sufficient */
 
-FEXPORT(xlp_boot_core0_siblings)	/* "Master" cpu starts from here */
-	dmtc0	sp, $4, 2		/* SP saved in UserLocal */
+/* Called by the boot cpu to wake up its sibling threads */
+NESTED(xlp_boot_core0_siblings, PT_SIZE, sp)
+	/* CPU register contents lost when enabling threads, save them first */
 	SAVE_ALL
 	sync
 	/* find the location to which nlm_boot_siblings was relocated */
@@ -65,9 +66,12 @@
 	dsubu	t2, t1
 	daddu	t2, t0
 	/* call it */
-	jr	t2
+	jalr	t2
 	nop
-	/* not reached */
+	RESTORE_ALL
+	jr	ra
+	nop
+END(xlp_boot_core0_siblings)
 
 NESTED(nlm_boot_secondary_cpus, 16, sp)
 	/* Initialize CP0 Status */

diff --git a/arch/mips/netlogic/common/time.c b/arch/mips/netlogic/common/time.c
index 13391b8..0c0a1a6 100644
--- a/arch/mips/netlogic/common/time.c
+++ b/arch/mips/netlogic/common/time.c

@@ -82,6 +82,7 @@
 static void nlm_init_pic_timer(void)
 {
 	uint64_t picbase = nlm_get_node(0)->picbase;
+	u32 picfreq;
 
 	nlm_pic_set_timer(picbase, PIC_CLOCK_TIMER, ~0ULL, 0, 0);
 	if (current_cpu_data.cputype == CPU_XLR) {
@@ -92,7 +93,9 @@
 		csrc_pic.read	= nlm_get_pic_timer;
 	}
 	csrc_pic.rating = 1000;
-	clocksource_register_hz(&csrc_pic, pic_timer_freq());
+	picfreq = pic_timer_freq();
+	clocksource_register_hz(&csrc_pic, picfreq);
+	pr_info("PIC clock source added, frequency %d\n", picfreq);
 }
 
 void __init plat_time_init(void)

diff --git a/arch/mips/netlogic/dts/xlp_gvp.dts b/arch/mips/netlogic/dts/xlp_gvp.dts
index 047d27f..bb4ecd1 100644
--- a/arch/mips/netlogic/dts/xlp_gvp.dts
+++ b/arch/mips/netlogic/dts/xlp_gvp.dts

@@ -26,11 +26,12 @@
 			interrupt-parent = <&pic>;
 			interrupts = <17>;
 		};
-		pic: pic@4000 {
-			interrupt-controller;
+		pic: pic@110000 {
+			compatible = "netlogic,xlp-pic";
 			#address-cells = <0>;
 			#interrupt-cells = <1>;
 			reg = <0 0x110000 0x200>;
+			interrupt-controller;
 		};
 
 		nor_flash@1,0 {

diff --git a/arch/mips/netlogic/xlp/Makefile b/arch/mips/netlogic/xlp/Makefile
index ed9a93c..be358a8 100644
--- a/arch/mips/netlogic/xlp/Makefile
+++ b/arch/mips/netlogic/xlp/Makefile

@@ -2,3 +2,5 @@
 obj-$(CONFIG_SMP)		+= wakeup.o
 obj-$(CONFIG_USB)		+= usb-init.o
 obj-$(CONFIG_USB)		+= usb-init-xlp2.o
+obj-$(CONFIG_SATA_AHCI)		+= ahci-init.o
+obj-$(CONFIG_SATA_AHCI)		+= ahci-init-xlp2.o

diff --git a/arch/mips/netlogic/xlp/ahci-init-xlp2.c b/arch/mips/netlogic/xlp/ahci-init-xlp2.c
new file mode 100644
index 0000000..c83dbf3
--- /dev/null
+++ b/arch/mips/netlogic/xlp/ahci-init-xlp2.c

@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2003-2014 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the Broadcom
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+#include <linux/pci_ids.h>
+#include <linux/nodemask.h>
+
+#include <asm/cpu.h>
+#include <asm/mipsregs.h>
+
+#include <asm/netlogic/common.h>
+#include <asm/netlogic/haldefs.h>
+#include <asm/netlogic/mips-extns.h>
+#include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/iomap.h>
+
+#define SATA_CTL		0x0
+#define SATA_STATUS		0x1 /* Status Reg */
+#define SATA_INT		0x2 /* Interrupt Reg */
+#define SATA_INT_MASK		0x3 /* Interrupt Mask Reg */
+#define SATA_BIU_TIMEOUT	0x4
+#define AXIWRSPERRLOG		0x5
+#define AXIRDSPERRLOG		0x6
+#define BiuTimeoutLow		0x7
+#define BiuTimeoutHi		0x8
+#define BiuSlvErLow		0x9
+#define BiuSlvErHi		0xa
+#define IO_CONFIG_SWAP_DIS	0xb
+#define CR_REG_TIMER		0xc
+#define CORE_ID			0xd
+#define AXI_SLAVE_OPT1		0xe
+#define PHY_MEM_ACCESS		0xf
+#define PHY0_CNTRL		0x10
+#define PHY0_STAT		0x11
+#define PHY0_RX_ALIGN		0x12
+#define PHY0_RX_EQ_LO		0x13
+#define PHY0_RX_EQ_HI		0x14
+#define PHY0_BIST_LOOP		0x15
+#define PHY1_CNTRL		0x16
+#define PHY1_STAT		0x17
+#define PHY1_RX_ALIGN		0x18
+#define PHY1_RX_EQ_LO		0x19
+#define PHY1_RX_EQ_HI		0x1a
+#define PHY1_BIST_LOOP		0x1b
+#define RdExBase		0x1c
+#define RdExLimit		0x1d
+#define CacheAllocBase		0x1e
+#define CacheAllocLimit		0x1f
+#define BiuSlaveCmdGstNum	0x20
+
+/*SATA_CTL Bits */
+#define SATA_RST_N		BIT(0)  /* Active low reset sata_core phy */
+#define SataCtlReserve0		BIT(1)
+#define M_CSYSREQ		BIT(2)  /* AXI master low power, not used */
+#define S_CSYSREQ		BIT(3)  /* AXI slave low power, not used */
+#define P0_CP_DET		BIT(8)  /* Reserved, bring in from pad */
+#define P0_MP_SW		BIT(9)  /* Mech Switch */
+#define P0_DISABLE		BIT(10) /* disable p0 */
+#define P0_ACT_LED_EN		BIT(11) /* Active LED enable */
+#define P0_IRST_HARD_SYNTH	BIT(12) /* PHY hard synth reset */
+#define P0_IRST_HARD_TXRX	BIT(13) /* PHY lane hard reset */
+#define P0_IRST_POR		BIT(14) /* PHY power on reset*/
+#define P0_IPDTXL		BIT(15) /* PHY Tx lane dis/power down */
+#define P0_IPDRXL		BIT(16) /* PHY Rx lane dis/power down */
+#define P0_IPDIPDMSYNTH		BIT(17) /* PHY synthesizer dis/porwer down */
+#define P0_CP_POD_EN		BIT(18) /* CP_POD enable */
+#define P0_AT_BYPASS		BIT(19) /* P0 address translation by pass */
+#define P1_CP_DET		BIT(20) /* Reserved,Cold Detect */
+#define P1_MP_SW		BIT(21) /* Mech Switch */
+#define P1_DISABLE		BIT(22) /* disable p1 */
+#define P1_ACT_LED_EN		BIT(23) /* Active LED enable */
+#define P1_IRST_HARD_SYNTH	BIT(24) /* PHY hard synth reset */
+#define P1_IRST_HARD_TXRX	BIT(25) /* PHY lane hard reset */
+#define P1_IRST_POR		BIT(26) /* PHY power on reset*/
+#define P1_IPDTXL		BIT(27) /* PHY Tx lane dis/porwer down */
+#define P1_IPDRXL		BIT(28) /* PHY Rx lane dis/porwer down */
+#define P1_IPDIPDMSYNTH		BIT(29) /* PHY synthesizer dis/porwer down */
+#define P1_CP_POD_EN		BIT(30)
+#define P1_AT_BYPASS		BIT(31) /* P1 address translation by pass */
+
+/* Status register */
+#define M_CACTIVE		BIT(0)  /* m_cactive, not used */
+#define S_CACTIVE		BIT(1)  /* s_cactive, not used */
+#define P0_PHY_READY		BIT(8)  /* phy is ready */
+#define P0_CP_POD		BIT(9)  /* Cold PowerOn */
+#define P0_SLUMBER		BIT(10) /* power mode slumber */
+#define P0_PATIAL		BIT(11) /* power mode patial */
+#define P0_PHY_SIG_DET		BIT(12) /* phy dignal detect */
+#define P0_PHY_CALI		BIT(13) /* phy calibration done */
+#define P1_PHY_READY		BIT(16) /* phy is ready */
+#define P1_CP_POD		BIT(17) /* Cold PowerOn */
+#define P1_SLUMBER		BIT(18) /* power mode slumber */
+#define P1_PATIAL		BIT(19) /* power mode patial */
+#define P1_PHY_SIG_DET		BIT(20) /* phy dignal detect */
+#define P1_PHY_CALI		BIT(21) /* phy calibration done */
+
+/* SATA CR_REG_TIMER bits */
+#define CR_TIME_SCALE		(0x1000 << 0)
+
+/* SATA PHY specific registers start and end address */
+#define RXCDRCALFOSC0		0x0065
+#define CALDUTY			0x006e
+#define RXDPIF			0x8065
+#define PPMDRIFTMAX_HI		0x80A4
+
+#define nlm_read_sata_reg(b, r)		nlm_read_reg(b, r)
+#define nlm_write_sata_reg(b, r, v)	nlm_write_reg(b, r, v)
+#define nlm_get_sata_pcibase(node)	\
+		nlm_pcicfg_base(XLP9XX_IO_SATA_OFFSET(node))
+#define nlm_get_sata_regbase(node)	\
+		(nlm_get_sata_pcibase(node) + 0x100)
+
+/* SATA PHY config for register block 1 0x0065 .. 0x006e */
+static const u8 sata_phy_config1[]  = {
+	0xC9, 0xC9, 0x07, 0x07, 0x18, 0x18, 0x01, 0x01, 0x22, 0x00
+};
+
+/* SATA PHY config for register block 2 0x0x8065 .. 0x0x80A4 */
+static const u8 sata_phy_config2[]  = {
+	0xAA, 0x00, 0x4C, 0xC9, 0xC9, 0x07, 0x07, 0x18,
+	0x18, 0x05, 0x0C, 0x10, 0x00, 0x10, 0x00, 0xFF,
+	0xCF, 0xF7, 0xE1, 0xF5, 0xFD, 0xFD, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xE3, 0xE7, 0xDB, 0xF5, 0xFD, 0xFD,
+	0xF5, 0xF5, 0xFF, 0xFF, 0xE3, 0xE7, 0xDB, 0xF5,
+	0xFD, 0xFD, 0xF5, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5,
+	0x3F, 0x00, 0x32, 0x00, 0x03, 0x01, 0x05, 0x05,
+	0x04, 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0x04,
+};
+
+const int sata_phy_debug = 0;	/* set to verify PHY writes */
+
+static void sata_clear_glue_reg(u64 regbase, u32 off, u32 bit)
+{
+	u32 reg_val;
+
+	reg_val = nlm_read_sata_reg(regbase, off);
+	nlm_write_sata_reg(regbase, off, (reg_val & ~bit));
+}
+
+static void sata_set_glue_reg(u64 regbase, u32 off, u32 bit)
+{
+	u32 reg_val;
+
+	reg_val = nlm_read_sata_reg(regbase, off);
+	nlm_write_sata_reg(regbase, off, (reg_val | bit));
+}
+
+static void write_phy_reg(u64 regbase, u32 addr, u32 physel, u8 data)
+{
+	nlm_write_sata_reg(regbase, PHY_MEM_ACCESS,
+		(1u << 31) | (physel << 24) | (data << 16) | addr);
+	udelay(850);
+}
+
+static u8 read_phy_reg(u64 regbase, u32 addr, u32 physel)
+{
+	u32 val;
+
+	nlm_write_sata_reg(regbase, PHY_MEM_ACCESS,
+		(0 << 31) | (physel << 24) | (0 << 16) | addr);
+	udelay(850);
+	val = nlm_read_sata_reg(regbase, PHY_MEM_ACCESS);
+	return (val >> 16) & 0xff;
+}
+
+static void config_sata_phy(u64 regbase)
+{
+	u32 port, i, reg;
+
+	for (port = 0; port < 2; port++) {
+		for (i = 0, reg = RXCDRCALFOSC0; reg <= CALDUTY; reg++, i++)
+			write_phy_reg(regbase, reg, port, sata_phy_config1[i]);
+
+		for (i = 0, reg = RXDPIF; reg <= PPMDRIFTMAX_HI; reg++, i++)
+			write_phy_reg(regbase, reg, port, sata_phy_config2[i]);
+	}
+}
+
+static void check_phy_register(u64 regbase, u32 addr, u32 physel, u8 xdata)
+{
+	u8 data;
+
+	data = read_phy_reg(regbase, addr, physel);
+	pr_info("PHY read addr = 0x%x physel = %d data = 0x%x %s\n",
+		addr, physel, data, data == xdata ? "TRUE" : "FALSE");
+}
+
+static void verify_sata_phy_config(u64 regbase)
+{
+	u32 port, i, reg;
+
+	for (port = 0; port < 2; port++) {
+		for (i = 0, reg = RXCDRCALFOSC0; reg <= CALDUTY; reg++, i++)
+			check_phy_register(regbase, reg, port,
+						sata_phy_config1[i]);
+
+		for (i = 0, reg = RXDPIF; reg <= PPMDRIFTMAX_HI; reg++, i++)
+			check_phy_register(regbase, reg, port,
+						sata_phy_config2[i]);
+	}
+}
+
+static void nlm_sata_firmware_init(int node)
+{
+	u32 reg_val;
+	u64 regbase;
+	int n;
+
+	pr_info("Initializing XLP9XX On-chip AHCI...\n");
+	regbase = nlm_get_sata_regbase(node);
+
+	/* Reset port0 */
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IRST_POR);
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IRST_HARD_TXRX);
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IRST_HARD_SYNTH);
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IPDTXL);
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IPDRXL);
+	sata_clear_glue_reg(regbase, SATA_CTL, P0_IPDIPDMSYNTH);
+
+	/* port1 */
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IRST_POR);
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IRST_HARD_TXRX);
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IRST_HARD_SYNTH);
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IPDTXL);
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IPDRXL);
+	sata_clear_glue_reg(regbase, SATA_CTL, P1_IPDIPDMSYNTH);
+	udelay(300);
+
+	/* Set PHY */
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IPDTXL);
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IPDRXL);
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IPDIPDMSYNTH);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IPDTXL);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IPDRXL);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IPDIPDMSYNTH);
+
+	udelay(1000);
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IRST_POR);
+	udelay(1000);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IRST_POR);
+	udelay(1000);
+
+	/* setup PHY */
+	config_sata_phy(regbase);
+	if (sata_phy_debug)
+		verify_sata_phy_config(regbase);
+
+	udelay(1000);
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IRST_HARD_TXRX);
+	sata_set_glue_reg(regbase, SATA_CTL, P0_IRST_HARD_SYNTH);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IRST_HARD_TXRX);
+	sata_set_glue_reg(regbase, SATA_CTL, P1_IRST_HARD_SYNTH);
+	udelay(300);
+
+	/* Override reset in serial PHY mode */
+	sata_set_glue_reg(regbase, CR_REG_TIMER, CR_TIME_SCALE);
+	/* Set reset SATA */
+	sata_set_glue_reg(regbase, SATA_CTL, SATA_RST_N);
+	sata_set_glue_reg(regbase, SATA_CTL, M_CSYSREQ);
+	sata_set_glue_reg(regbase, SATA_CTL, S_CSYSREQ);
+
+	pr_debug("Waiting for PHYs to come up.\n");
+	n = 10000;
+	do {
+		reg_val = nlm_read_sata_reg(regbase, SATA_STATUS);
+		if ((reg_val & P1_PHY_READY) && (reg_val & P0_PHY_READY))
+			break;
+		udelay(10);
+	} while (--n > 0);
+
+	if (reg_val  & P0_PHY_READY)
+		pr_info("PHY0 is up.\n");
+	else
+		pr_info("PHY0 is down.\n");
+	if (reg_val  & P1_PHY_READY)
+		pr_info("PHY1 is up.\n");
+	else
+		pr_info("PHY1 is down.\n");
+
+	pr_info("XLP AHCI Init Done.\n");
+}
+
+static int __init nlm_ahci_init(void)
+{
+	int node;
+
+	if (!cpu_is_xlp9xx())
+		return 0;
+	for (node = 0; node < NLM_NR_NODES; node++)
+		if (nlm_node_present(node))
+			nlm_sata_firmware_init(node);
+	return 0;
+}
+
+static void nlm_sata_intr_ack(struct irq_data *data)
+{
+	u64 regbase;
+	u32 val;
+	int node;
+
+	node = data->irq / NLM_IRQS_PER_NODE;
+	regbase = nlm_get_sata_regbase(node);
+	val = nlm_read_sata_reg(regbase, SATA_INT);
+	sata_set_glue_reg(regbase, SATA_INT, val);
+}
+
+static void nlm_sata_fixup_bar(struct pci_dev *dev)
+{
+	dev->resource[5] = dev->resource[0];
+	memset(&dev->resource[0], 0, sizeof(dev->resource[0]));
+}
+
+static void nlm_sata_fixup_final(struct pci_dev *dev)
+{
+	u32 val;
+	u64 regbase;
+	int node;
+
+	/* Find end bridge function to find node */
+	node = xlp_socdev_to_node(dev);
+	regbase = nlm_get_sata_regbase(node);
+
+	/* clear pending interrupts and then enable them */
+	val = nlm_read_sata_reg(regbase, SATA_INT);
+	sata_set_glue_reg(regbase, SATA_INT, val);
+
+	/* Enable only the core interrupt */
+	sata_set_glue_reg(regbase, SATA_INT_MASK, 0x1);
+
+	dev->irq = nlm_irq_to_xirq(node, PIC_SATA_IRQ);
+	nlm_set_pic_extra_ack(node, PIC_SATA_IRQ, nlm_sata_intr_ack);
+}
+
+arch_initcall(nlm_ahci_init);
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_XLP9XX_SATA,
+		nlm_sata_fixup_bar);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_XLP9XX_SATA,
+		nlm_sata_fixup_final);

diff --git a/arch/mips/netlogic/xlp/ahci-init.c b/arch/mips/netlogic/xlp/ahci-init.c
new file mode 100644
index 0000000..a9d0fae
--- /dev/null
+++ b/arch/mips/netlogic/xlp/ahci-init.c

@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2003-2014 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the Broadcom
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+
+#include <asm/cpu.h>
+#include <asm/mipsregs.h>
+
+#include <asm/netlogic/haldefs.h>
+#include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/common.h>
+#include <asm/netlogic/xlp-hal/iomap.h>
+#include <asm/netlogic/mips-extns.h>
+
+#define SATA_CTL		0x0
+#define SATA_STATUS		0x1	/* Status Reg */
+#define SATA_INT		0x2	/* Interrupt Reg */
+#define SATA_INT_MASK		0x3	/* Interrupt Mask Reg */
+#define SATA_CR_REG_TIMER	0x4	/* PHY Conrol Timer Reg */
+#define SATA_CORE_ID		0x5	/* Core ID Reg */
+#define SATA_AXI_SLAVE_OPT1	0x6	/* AXI Slave Options Reg */
+#define SATA_PHY_LOS_LEV	0x7	/* PHY LOS Level Reg */
+#define SATA_PHY_MULTI		0x8	/* PHY Multiplier Reg */
+#define SATA_PHY_CLK_SEL	0x9	/* Clock Select Reg */
+#define SATA_PHY_AMP1_GEN1	0xa	/* PHY Transmit Amplitude Reg 1 */
+#define SATA_PHY_AMP1_GEN2	0xb	/* PHY Transmit Amplitude Reg 2 */
+#define SATA_PHY_AMP1_GEN3	0xc	/* PHY Transmit Amplitude Reg 3 */
+#define SATA_PHY_PRE1		0xd	/* PHY Transmit Preemphasis Reg 1 */
+#define SATA_PHY_PRE2		0xe	/* PHY Transmit Preemphasis Reg 2 */
+#define SATA_PHY_PRE3		0xf	/* PHY Transmit Preemphasis Reg 3 */
+#define SATA_SPDMODE		0x10	/* Speed Mode Reg */
+#define SATA_REFCLK		0x11	/* Reference Clock Control Reg */
+#define SATA_BYTE_SWAP_DIS	0x12	/* byte swap disable */
+
+/*SATA_CTL Bits */
+#define SATA_RST_N		BIT(0)
+#define PHY0_RESET_N		BIT(16)
+#define PHY1_RESET_N		BIT(17)
+#define PHY2_RESET_N		BIT(18)
+#define PHY3_RESET_N		BIT(19)
+#define M_CSYSREQ		BIT(2)
+#define S_CSYSREQ		BIT(3)
+
+/*SATA_STATUS Bits */
+#define P0_PHY_READY		BIT(4)
+#define P1_PHY_READY		BIT(5)
+#define P2_PHY_READY		BIT(6)
+#define P3_PHY_READY		BIT(7)
+
+#define nlm_read_sata_reg(b, r)		nlm_read_reg(b, r)
+#define nlm_write_sata_reg(b, r, v)	nlm_write_reg(b, r, v)
+#define nlm_get_sata_pcibase(node)	\
+		nlm_pcicfg_base(XLP_IO_SATA_OFFSET(node))
+/* SATA device specific configuration registers are starts at 0x900 offset */
+#define nlm_get_sata_regbase(node)	\
+		(nlm_get_sata_pcibase(node) + 0x900)
+
+static void sata_clear_glue_reg(uint64_t regbase, uint32_t off, uint32_t bit)
+{
+	uint32_t reg_val;
+
+	reg_val = nlm_read_sata_reg(regbase, off);
+	nlm_write_sata_reg(regbase, off, (reg_val & ~bit));
+}
+
+static void sata_set_glue_reg(uint64_t regbase, uint32_t off, uint32_t bit)
+{
+	uint32_t reg_val;
+
+	reg_val = nlm_read_sata_reg(regbase, off);
+	nlm_write_sata_reg(regbase, off, (reg_val | bit));
+}
+
+static void nlm_sata_firmware_init(int node)
+{
+	uint32_t reg_val;
+	uint64_t regbase;
+	int i;
+
+	pr_info("XLP AHCI Initialization started.\n");
+	regbase = nlm_get_sata_regbase(node);
+
+	/* Reset SATA */
+	sata_clear_glue_reg(regbase, SATA_CTL, SATA_RST_N);
+	/* Reset PHY */
+	sata_clear_glue_reg(regbase, SATA_CTL,
+			(PHY3_RESET_N | PHY2_RESET_N
+			 | PHY1_RESET_N | PHY0_RESET_N));
+
+	/* Set SATA */
+	sata_set_glue_reg(regbase, SATA_CTL, SATA_RST_N);
+	/* Set PHY */
+	sata_set_glue_reg(regbase, SATA_CTL,
+			(PHY3_RESET_N | PHY2_RESET_N
+			 | PHY1_RESET_N | PHY0_RESET_N));
+
+	pr_debug("Waiting for PHYs to come up.\n");
+	i = 0;
+	do {
+		reg_val = nlm_read_sata_reg(regbase, SATA_STATUS);
+		i++;
+	} while (((reg_val & 0xF0) != 0xF0) && (i < 10000));
+
+	for (i = 0; i < 4; i++) {
+		if (reg_val  & (P0_PHY_READY << i))
+			pr_info("PHY%d is up.\n", i);
+		else
+			pr_info("PHY%d is down.\n", i);
+	}
+
+	pr_info("XLP AHCI init done.\n");
+}
+
+static int __init nlm_ahci_init(void)
+{
+	int node = 0;
+	int chip = read_c0_prid() & PRID_REV_MASK;
+
+	if (chip == PRID_IMP_NETLOGIC_XLP3XX)
+		nlm_sata_firmware_init(node);
+	return 0;
+}
+
+static void nlm_sata_intr_ack(struct irq_data *data)
+{
+	uint32_t val = 0;
+	uint64_t regbase;
+
+	regbase = nlm_get_sata_regbase(nlm_nodeid());
+	val = nlm_read_sata_reg(regbase, SATA_INT);
+	sata_set_glue_reg(regbase, SATA_INT, val);
+}
+
+static void nlm_sata_fixup_bar(struct pci_dev *dev)
+{
+	/*
+	 * The AHCI resource is in BAR 0, move it to
+	 * BAR 5, where it is expected
+	 */
+	dev->resource[5] = dev->resource[0];
+	memset(&dev->resource[0], 0, sizeof(dev->resource[0]));
+}
+
+static void nlm_sata_fixup_final(struct pci_dev *dev)
+{
+	uint32_t val;
+	uint64_t regbase;
+	int node = 0; /* XLP3XX does not support multi-node */
+
+	regbase = nlm_get_sata_regbase(node);
+
+	/* clear pending interrupts and then enable them */
+	val = nlm_read_sata_reg(regbase, SATA_INT);
+	sata_set_glue_reg(regbase, SATA_INT, val);
+
+	/* Mask the core interrupt. If all the interrupts
+	 * are enabled there are spurious interrupt flow
+	 * happening, to avoid only enable core interrupt
+	 * mask.
+	 */
+	sata_set_glue_reg(regbase, SATA_INT_MASK, 0x1);
+
+	dev->irq = PIC_SATA_IRQ;
+	nlm_set_pic_extra_ack(node, PIC_SATA_IRQ, nlm_sata_intr_ack);
+}
+
+arch_initcall(nlm_ahci_init);
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_NETLOGIC, PCI_DEVICE_ID_NLM_SATA,
+		nlm_sata_fixup_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_NETLOGIC, PCI_DEVICE_ID_NLM_SATA,
+		nlm_sata_fixup_final);

diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index bdde331..7cc4603 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c

@@ -48,9 +48,10 @@
 void __init *xlp_dt_init(void *fdtp)
 {
 	if (!fdtp) {
-		switch (current_cpu_data.processor_id & 0xff00) {
+		switch (current_cpu_data.processor_id & PRID_IMP_MASK) {
 #ifdef CONFIG_DT_XLP_GVP
 		case PRID_IMP_NETLOGIC_XLP9XX:
+		case PRID_IMP_NETLOGIC_XLP5XX:
 			fdtp = __dtb_xlp_gvp_begin;
 			break;
 #endif

diff --git a/arch/mips/netlogic/xlp/nlm_hal.c b/arch/mips/netlogic/xlp/nlm_hal.c
index 997cd9e..bc24beb 100644
--- a/arch/mips/netlogic/xlp/nlm_hal.c
+++ b/arch/mips/netlogic/xlp/nlm_hal.c

@@ -54,6 +54,8 @@
 	struct nlm_soc_info *nodep;
 
 	nodep = nlm_get_node(node);
+	if (node == 0)
+		nodep->coremask = 1;	/* node 0, boot cpu */
 	nodep->sysbase = nlm_get_sys_regbase(node);
 	nodep->picbase = nlm_get_pic_regbase(node);
 	nodep->ebase = read_c0_ebase() & (~((1 << 12) - 1));
@@ -64,31 +66,39 @@
 	spin_lock_init(&nodep->piclock);
 }
 
-int nlm_irq_to_irt(int irq)
+static int xlp9xx_irq_to_irt(int irq)
+{
+	switch (irq) {
+	case PIC_GPIO_IRQ:
+		return 12;
+	case PIC_9XX_XHCI_0_IRQ:
+		return 114;
+	case PIC_9XX_XHCI_1_IRQ:
+		return 115;
+	case PIC_UART_0_IRQ:
+		return 133;
+	case PIC_UART_1_IRQ:
+		return 134;
+	case PIC_SATA_IRQ:
+		return 143;
+	case PIC_SPI_IRQ:
+		return 152;
+	case PIC_MMC_IRQ:
+		return 153;
+	case PIC_PCIE_LINK_LEGACY_IRQ(0):
+	case PIC_PCIE_LINK_LEGACY_IRQ(1):
+	case PIC_PCIE_LINK_LEGACY_IRQ(2):
+	case PIC_PCIE_LINK_LEGACY_IRQ(3):
+		return 191 + irq - PIC_PCIE_LINK_LEGACY_IRQ_BASE;
+	}
+	return -1;
+}
+
+static int xlp_irq_to_irt(int irq)
 {
 	uint64_t pcibase;
 	int devoff, irt;
 
-	/* bypass for 9xx */
-	if (cpu_is_xlp9xx()) {
-		switch (irq) {
-		case PIC_9XX_XHCI_0_IRQ:
-			return 114;
-		case PIC_9XX_XHCI_1_IRQ:
-			return 115;
-		case PIC_UART_0_IRQ:
-			return 133;
-		case PIC_UART_1_IRQ:
-			return 134;
-		case PIC_PCIE_LINK_LEGACY_IRQ(0):
-		case PIC_PCIE_LINK_LEGACY_IRQ(1):
-		case PIC_PCIE_LINK_LEGACY_IRQ(2):
-		case PIC_PCIE_LINK_LEGACY_IRQ(3):
-			return 191 + irq - PIC_PCIE_LINK_LEGACY_IRQ_BASE;
-		}
-		return -1;
-	}
-
 	devoff = 0;
 	switch (irq) {
 	case PIC_UART_0_IRQ:
@@ -98,7 +108,7 @@
 		devoff = XLP_IO_UART1_OFFSET(0);
 		break;
 	case PIC_MMC_IRQ:
-		devoff = XLP_IO_SD_OFFSET(0);
+		devoff = XLP_IO_MMC_OFFSET(0);
 		break;
 	case PIC_I2C_0_IRQ:	/* I2C will be fixed up */
 	case PIC_I2C_1_IRQ:
@@ -109,6 +119,18 @@
 		else
 			devoff = XLP_IO_I2C0_OFFSET(0);
 		break;
+	case PIC_SATA_IRQ:
+		devoff = XLP_IO_SATA_OFFSET(0);
+		break;
+	case PIC_GPIO_IRQ:
+		devoff = XLP_IO_GPIO_OFFSET(0);
+		break;
+	case PIC_NAND_IRQ:
+		devoff = XLP_IO_NAND_OFFSET(0);
+		break;
+	case PIC_SPI_IRQ:
+		devoff = XLP_IO_SPI_OFFSET(0);
+		break;
 	default:
 		if (cpu_is_xlpii()) {
 			switch (irq) {
@@ -164,61 +186,123 @@
 		/* HW bug, PCI IRT entries are bad on early silicon, fix */
 		irt = PIC_IRT_PCIE_LINK_INDEX(irq -
 					PIC_PCIE_LINK_LEGACY_IRQ_BASE);
-	} else if (irq >= PIC_PCIE_LINK_MSI_IRQ(0) &&
-			irq <= PIC_PCIE_LINK_MSI_IRQ(3)) {
-		irt = -2;
-	} else if (irq >= PIC_PCIE_MSIX_IRQ(0) &&
-			irq <= PIC_PCIE_MSIX_IRQ(3)) {
-		irt = -2;
 	} else {
 		irt = -1;
 	}
 	return irt;
 }
 
-unsigned int nlm_get_core_frequency(int node, int core)
+int nlm_irq_to_irt(int irq)
+{
+	/* return -2 for irqs without 1-1 mapping */
+	if (irq >= PIC_PCIE_LINK_MSI_IRQ(0) && irq <= PIC_PCIE_LINK_MSI_IRQ(3))
+		return -2;
+	if (irq >= PIC_PCIE_MSIX_IRQ(0) && irq <= PIC_PCIE_MSIX_IRQ(3))
+		return -2;
+
+	if (cpu_is_xlp9xx())
+		return xlp9xx_irq_to_irt(irq);
+	else
+		return xlp_irq_to_irt(irq);
+}
+
+static unsigned int nlm_xlp2_get_core_frequency(int node, int core)
+{
+	unsigned int pll_post_div, ctrl_val0, ctrl_val1, denom;
+	uint64_t num, sysbase, clockbase;
+
+	if (cpu_is_xlp9xx()) {
+		clockbase = nlm_get_clock_regbase(node);
+		ctrl_val0 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_CPU_PLL_CTRL0(core));
+		ctrl_val1 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_CPU_PLL_CTRL1(core));
+	} else {
+		sysbase = nlm_get_node(node)->sysbase;
+		ctrl_val0 = nlm_read_sys_reg(sysbase,
+						SYS_CPU_PLL_CTRL0(core));
+		ctrl_val1 = nlm_read_sys_reg(sysbase,
+						SYS_CPU_PLL_CTRL1(core));
+	}
+
+	/* Find PLL post divider value */
+	switch ((ctrl_val0 >> 24) & 0x7) {
+	case 1:
+		pll_post_div = 2;
+		break;
+	case 3:
+		pll_post_div = 4;
+		break;
+	case 7:
+		pll_post_div = 8;
+		break;
+	case 6:
+		pll_post_div = 16;
+		break;
+	case 0:
+	default:
+		pll_post_div = 1;
+		break;
+	}
+
+	num = 1000000ULL * (400 * 3 + 100 * (ctrl_val1 & 0x3f));
+	denom = 3 * pll_post_div;
+	do_div(num, denom);
+
+	return (unsigned int)num;
+}
+
+static unsigned int nlm_xlp_get_core_frequency(int node, int core)
 {
 	unsigned int pll_divf, pll_divr, dfs_div, ext_div;
 	unsigned int rstval, dfsval, denom;
 	uint64_t num, sysbase;
 
 	sysbase = nlm_get_node(node)->sysbase;
-	if (cpu_is_xlp9xx())
-		rstval = nlm_read_sys_reg(sysbase, SYS_9XX_POWER_ON_RESET_CFG);
-	else
-		rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG);
-	if (cpu_is_xlpii()) {
-		num = 1000000ULL * (400 * 3 + 100 * (rstval >> 26));
-		denom = 3;
-	} else {
-		dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE);
-		pll_divf = ((rstval >> 10) & 0x7f) + 1;
-		pll_divr = ((rstval >> 8)  & 0x3) + 1;
-		ext_div  = ((rstval >> 30) & 0x3) + 1;
-		dfs_div  = ((dfsval >> (core * 4)) & 0xf) + 1;
+	rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG);
+	dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE);
+	pll_divf = ((rstval >> 10) & 0x7f) + 1;
+	pll_divr = ((rstval >> 8)  & 0x3) + 1;
+	ext_div  = ((rstval >> 30) & 0x3) + 1;
+	dfs_div  = ((dfsval >> (core * 4)) & 0xf) + 1;
 
-		num = 800000000ULL * pll_divf;
-		denom = 3 * pll_divr * ext_div * dfs_div;
-	}
+	num = 800000000ULL * pll_divf;
+	denom = 3 * pll_divr * ext_div * dfs_div;
 	do_div(num, denom);
+
 	return (unsigned int)num;
 }
 
-/* Calculate Frequency to the PIC from PLL.
- * freq_out = ( ref_freq/2 * (6 + ctrl2[7:0]) + ctrl2[20:8]/2^13 ) /
- * ((2^ctrl0[7:5]) * Table(ctrl0[26:24]))
- */
-static unsigned int nlm_2xx_get_pic_frequency(int node)
+unsigned int nlm_get_core_frequency(int node, int core)
 {
-	u32 ctrl_val0, ctrl_val2, vco_post_div, pll_post_div;
+	if (cpu_is_xlpii())
+		return nlm_xlp2_get_core_frequency(node, core);
+	else
+		return nlm_xlp_get_core_frequency(node, core);
+}
+
+/*
+ * Calculate PIC frequency from PLL registers.
+ * freq_out = (ref_freq/2 * (6 + ctrl2[7:0]) + ctrl2[20:8]/2^13) /
+ * 		((2^ctrl0[7:5]) * Table(ctrl0[26:24]))
+ */
+static unsigned int nlm_xlp2_get_pic_frequency(int node)
+{
+	u32 ctrl_val0, ctrl_val2, vco_post_div, pll_post_div, cpu_xlp9xx;
 	u32 mdiv, fdiv, pll_out_freq_den, reg_select, ref_div, pic_div;
-	u64 ref_clk, sysbase, pll_out_freq_num, ref_clk_select;
+	u64 sysbase, pll_out_freq_num, ref_clk_select, clockbase, ref_clk;
 
 	sysbase = nlm_get_node(node)->sysbase;
+	clockbase = nlm_get_clock_regbase(node);
+	cpu_xlp9xx = cpu_is_xlp9xx();
 
 	/* Find ref_clk_base */
-	ref_clk_select =
-		(nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG) >> 18) & 0x3;
+	if (cpu_xlp9xx)
+		ref_clk_select = (nlm_read_sys_reg(sysbase,
+				SYS_9XX_POWER_ON_RESET_CFG) >> 18) & 0x3;
+	else
+		ref_clk_select = (nlm_read_sys_reg(sysbase,
+					SYS_POWER_ON_RESET_CFG) >> 18) & 0x3;
 	switch (ref_clk_select) {
 	case 0:
 		ref_clk = 200000000ULL;
@@ -239,30 +323,70 @@
 	}
 
 	/* Find the clock source PLL device for PIC */
-	reg_select = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_SEL) >> 22) & 0x3;
-	switch (reg_select) {
-	case 0:
-		ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0);
-		ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2);
-		break;
-	case 1:
-		ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(0));
-		ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(0));
-		break;
-	case 2:
-		ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(1));
-		ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(1));
-		break;
-	case 3:
-		ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(2));
-		ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(2));
-		break;
+	if (cpu_xlp9xx) {
+		reg_select = nlm_read_sys_reg(clockbase,
+				SYS_9XX_CLK_DEV_SEL) & 0x3;
+		switch (reg_select) {
+		case 0:
+			ctrl_val0 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL0);
+			ctrl_val2 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL2);
+			break;
+		case 1:
+			ctrl_val0 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL0_DEVX(0));
+			ctrl_val2 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL2_DEVX(0));
+			break;
+		case 2:
+			ctrl_val0 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL0_DEVX(1));
+			ctrl_val2 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL2_DEVX(1));
+			break;
+		case 3:
+			ctrl_val0 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL0_DEVX(2));
+			ctrl_val2 = nlm_read_sys_reg(clockbase,
+					SYS_9XX_PLL_CTRL2_DEVX(2));
+			break;
+		}
+	} else {
+		reg_select = (nlm_read_sys_reg(sysbase,
+					SYS_CLK_DEV_SEL) >> 22) & 0x3;
+		switch (reg_select) {
+		case 0:
+			ctrl_val0 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL0);
+			ctrl_val2 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL2);
+			break;
+		case 1:
+			ctrl_val0 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL0_DEVX(0));
+			ctrl_val2 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL2_DEVX(0));
+			break;
+		case 2:
+			ctrl_val0 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL0_DEVX(1));
+			ctrl_val2 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL2_DEVX(1));
+			break;
+		case 3:
+			ctrl_val0 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL0_DEVX(2));
+			ctrl_val2 = nlm_read_sys_reg(sysbase,
+					SYS_PLL_CTRL2_DEVX(2));
+			break;
+		}
 	}
 
 	vco_post_div = (ctrl_val0 >> 5) & 0x7;
 	pll_post_div = (ctrl_val0 >> 24) & 0x7;
 	mdiv = ctrl_val2 & 0xff;
-	fdiv = (ctrl_val2 >> 8) & 0xfff;
+	fdiv = (ctrl_val2 >> 8) & 0x1fff;
 
 	/* Find PLL post divider value */
 	switch (pll_post_div) {
@@ -292,7 +416,12 @@
 		do_div(pll_out_freq_num, pll_out_freq_den);
 
 	/* PIC post divider, which happens after PLL */
-	pic_div = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_DIV) >> 22) & 0x3;
+	if (cpu_xlp9xx)
+		pic_div = nlm_read_sys_reg(clockbase,
+				SYS_9XX_CLK_DEV_DIV) & 0x3;
+	else
+		pic_div = (nlm_read_sys_reg(sysbase,
+					SYS_CLK_DEV_DIV) >> 22) & 0x3;
 	do_div(pll_out_freq_num, 1 << pic_div);
 
 	return pll_out_freq_num;
@@ -300,12 +429,8 @@
 
 unsigned int nlm_get_pic_frequency(int node)
 {
-	/* TODO Has to calculate freq as like 2xx */
-	if (cpu_is_xlp9xx())
-		return 250000000;
-
 	if (cpu_is_xlpii())
-		return nlm_2xx_get_pic_frequency(node);
+		return nlm_xlp2_get_pic_frequency(node);
 	else
 		return 133333333;
 }

diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 8c60a2d..4fdd9fd 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c

@@ -121,8 +121,9 @@
 
 const char *get_system_type(void)
 {
-	switch (read_c0_prid() & 0xff00) {
+	switch (read_c0_prid() & PRID_IMP_MASK) {
 	case PRID_IMP_NETLOGIC_XLP9XX:
+	case PRID_IMP_NETLOGIC_XLP5XX:
 	case PRID_IMP_NETLOGIC_XLP2XX:
 		return "Broadcom XLPII Series";
 	default:

diff --git a/arch/mips/netlogic/xlp/wakeup.c b/arch/mips/netlogic/xlp/wakeup.c
index 9a92617..e5f44d2 100644
--- a/arch/mips/netlogic/xlp/wakeup.c
+++ b/arch/mips/netlogic/xlp/wakeup.c

@@ -135,11 +135,19 @@
 		if (cpu_is_xlp9xx()) {
 			fusebase = nlm_get_fuse_regbase(n);
 			fusemask = nlm_read_reg(fusebase, FUSE_9XX_DEVCFG6);
-			mask = 0xfffff;
+			switch (read_c0_prid() & PRID_IMP_MASK) {
+			case PRID_IMP_NETLOGIC_XLP5XX:
+				mask = 0xff;
+				break;
+			case PRID_IMP_NETLOGIC_XLP9XX:
+			default:
+				mask = 0xfffff;
+				break;
+			}
 		} else {
 			fusemask = nlm_read_sys_reg(nodep->sysbase,
 						SYS_EFUSE_DEVICE_CFG_STATUS0);
-			switch (read_c0_prid() & 0xff00) {
+			switch (read_c0_prid() & PRID_IMP_MASK) {
 			case PRID_IMP_NETLOGIC_XLP3XX:
 				mask = 0xf;
 				break;
@@ -159,10 +167,6 @@
 		 */
 		syscoremask = (1 << hweight32(~fusemask & mask)) - 1;
 
-		/* The boot cpu */
-		if (n == 0)
-			nodep->coremask = 1;
-
 		pr_info("Node %d - SYS/FUSE coremask %x\n", n, syscoremask);
 		for (core = 0; core < nlm_cores_per_node(); core++) {
 			/* we will be on node 0 core 0 */

diff --git a/arch/mips/paravirt/Kconfig b/arch/mips/paravirt/Kconfig
new file mode 100644
index 0000000..ecae586
--- /dev/null
+++ b/arch/mips/paravirt/Kconfig

@@ -0,0 +1,6 @@
+if MIPS_PARAVIRT
+
+config MIPS_PCI_VIRTIO
+	def_bool y
+
+endif #  MIPS_PARAVIRT

diff --git a/arch/mips/paravirt/Makefile b/arch/mips/paravirt/Makefile
new file mode 100644
index 0000000..5023af7
--- /dev/null
+++ b/arch/mips/paravirt/Makefile

@@ -0,0 +1,14 @@
+#
+# Makefile for MIPS para-virtualized specific kernel interface routines
+# under Linux.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2013 Cavium, Inc.
+#
+
+obj-y := setup.o serial.o paravirt-irq.o
+
+obj-$(CONFIG_SMP)		+= paravirt-smp.o

diff --git a/arch/mips/paravirt/Platform b/arch/mips/paravirt/Platform
new file mode 100644
index 0000000..7e76ef2
--- /dev/null
+++ b/arch/mips/paravirt/Platform

@@ -0,0 +1,8 @@
+#
+# Generic para-virtualized guest.
+#
+platform-$(CONFIG_MIPS_PARAVIRT)	+= paravirt/
+cflags-$(CONFIG_MIPS_PARAVIRT)		+=				\
+		-I$(srctree)/arch/mips/include/asm/mach-paravirt
+
+load-$(CONFIG_MIPS_PARAVIRT)	= 0xffffffff80010000

diff --git a/arch/mips/paravirt/paravirt-irq.c b/arch/mips/paravirt/paravirt-irq.c
new file mode 100644
index 0000000..8987b06
--- /dev/null
+++ b/arch/mips/paravirt/paravirt-irq.c

@@ -0,0 +1,368 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+
+#include <asm/io.h>
+
+#define MBOX_BITS_PER_CPU 2
+
+static int cpunum_for_cpu(int cpu)
+{
+#ifdef CONFIG_SMP
+	return cpu_logical_map(cpu);
+#else
+	return get_ebase_cpunum();
+#endif
+}
+
+struct core_chip_data {
+	struct mutex core_irq_mutex;
+	bool current_en;
+	bool desired_en;
+	u8 bit;
+};
+
+static struct core_chip_data irq_core_chip_data[8];
+
+static void irq_core_ack(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	unsigned int bit = cd->bit;
+
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	clear_c0_status(0x100 << bit);
+	/* The two user interrupts must be cleared manually. */
+	if (bit < 2)
+		clear_c0_cause(0x100 << bit);
+}
+
+static void irq_core_eoi(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	set_c0_status(0x100 << cd->bit);
+}
+
+static void irq_core_set_enable_local(void *arg)
+{
+	struct irq_data *data = arg;
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	unsigned int mask = 0x100 << cd->bit;
+
+	/*
+	 * Interrupts are already disabled, so these are atomic.
+	 */
+	if (cd->desired_en)
+		set_c0_status(mask);
+	else
+		clear_c0_status(mask);
+
+}
+
+static void irq_core_disable(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	cd->desired_en = false;
+}
+
+static void irq_core_enable(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	cd->desired_en = true;
+}
+
+static void irq_core_bus_lock(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&cd->core_irq_mutex);
+}
+
+static void irq_core_bus_sync_unlock(struct irq_data *data)
+{
+	struct core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	if (cd->desired_en != cd->current_en) {
+		on_each_cpu(irq_core_set_enable_local, data, 1);
+		cd->current_en = cd->desired_en;
+	}
+
+	mutex_unlock(&cd->core_irq_mutex);
+}
+
+static struct irq_chip irq_chip_core = {
+	.name = "Core",
+	.irq_enable = irq_core_enable,
+	.irq_disable = irq_core_disable,
+	.irq_ack = irq_core_ack,
+	.irq_eoi = irq_core_eoi,
+	.irq_bus_lock = irq_core_bus_lock,
+	.irq_bus_sync_unlock = irq_core_bus_sync_unlock,
+
+	.irq_cpu_online = irq_core_eoi,
+	.irq_cpu_offline = irq_core_ack,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+static void __init irq_init_core(void)
+{
+	int i;
+	int irq;
+	struct core_chip_data *cd;
+
+	/* Start with a clean slate */
+	clear_c0_status(ST0_IM);
+	clear_c0_cause(CAUSEF_IP0 | CAUSEF_IP1);
+
+	for (i = 0; i < ARRAY_SIZE(irq_core_chip_data); i++) {
+		cd = irq_core_chip_data + i;
+		cd->current_en = false;
+		cd->desired_en = false;
+		cd->bit = i;
+		mutex_init(&cd->core_irq_mutex);
+
+		irq = MIPS_CPU_IRQ_BASE + i;
+
+		switch (i) {
+		case 0: /* SW0 */
+		case 1: /* SW1 */
+		case 5: /* IP5 */
+		case 6: /* IP6 */
+		case 7: /* IP7 */
+			irq_set_chip_data(irq, cd);
+			irq_set_chip_and_handler(irq, &irq_chip_core,
+						 handle_percpu_irq);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void __iomem *mips_irq_chip;
+#define MIPS_IRQ_CHIP_NUM_BITS 0
+#define MIPS_IRQ_CHIP_REGS 8
+
+static int mips_irq_cpu_stride;
+static int mips_irq_chip_reg_raw;
+static int mips_irq_chip_reg_src;
+static int mips_irq_chip_reg_en;
+static int mips_irq_chip_reg_raw_w1s;
+static int mips_irq_chip_reg_raw_w1c;
+static int mips_irq_chip_reg_en_w1s;
+static int mips_irq_chip_reg_en_w1c;
+
+static void irq_pci_enable(struct irq_data *data)
+{
+	u32 mask = 1u << data->irq;
+
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_en_w1s);
+}
+
+static void irq_pci_disable(struct irq_data *data)
+{
+	u32 mask = 1u << data->irq;
+
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_en_w1c);
+}
+
+static void irq_pci_ack(struct irq_data *data)
+{
+}
+
+static void irq_pci_mask(struct irq_data *data)
+{
+	u32 mask = 1u << data->irq;
+
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_en_w1c);
+}
+
+static void irq_pci_unmask(struct irq_data *data)
+{
+	u32 mask = 1u << data->irq;
+
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_en_w1s);
+}
+
+static struct irq_chip irq_chip_pci = {
+	.name = "PCI",
+	.irq_enable = irq_pci_enable,
+	.irq_disable = irq_pci_disable,
+	.irq_ack = irq_pci_ack,
+	.irq_mask = irq_pci_mask,
+	.irq_unmask = irq_pci_unmask,
+};
+
+static void irq_mbox_all(struct irq_data *data,  void __iomem *base)
+{
+	int cpu;
+	unsigned int mbox = data->irq - MIPS_IRQ_MBOX0;
+	u32 mask;
+
+	WARN_ON(mbox >= MBOX_BITS_PER_CPU);
+
+	for_each_online_cpu(cpu) {
+		unsigned int cpuid = cpunum_for_cpu(cpu);
+		mask = 1 << (cpuid * MBOX_BITS_PER_CPU + mbox);
+		__raw_writel(mask, base + (cpuid * mips_irq_cpu_stride));
+	}
+}
+
+static void irq_mbox_enable(struct irq_data *data)
+{
+	irq_mbox_all(data, mips_irq_chip + mips_irq_chip_reg_en_w1s + sizeof(u32));
+}
+
+static void irq_mbox_disable(struct irq_data *data)
+{
+	irq_mbox_all(data, mips_irq_chip + mips_irq_chip_reg_en_w1c + sizeof(u32));
+}
+
+static void irq_mbox_ack(struct irq_data *data)
+{
+	u32 mask;
+	unsigned int mbox = data->irq - MIPS_IRQ_MBOX0;
+
+	WARN_ON(mbox >= MBOX_BITS_PER_CPU);
+
+	mask = 1 << (get_ebase_cpunum() * MBOX_BITS_PER_CPU + mbox);
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_raw_w1c + sizeof(u32));
+}
+
+void irq_mbox_ipi(int cpu, unsigned int actions)
+{
+	unsigned int cpuid = cpunum_for_cpu(cpu);
+	u32 mask;
+
+	WARN_ON(actions >= (1 << MBOX_BITS_PER_CPU));
+
+	mask = actions << (cpuid * MBOX_BITS_PER_CPU);
+	__raw_writel(mask, mips_irq_chip + mips_irq_chip_reg_raw_w1s + sizeof(u32));
+}
+
+static void irq_mbox_cpu_onoffline(struct irq_data *data,  void __iomem *base)
+{
+	unsigned int mbox = data->irq - MIPS_IRQ_MBOX0;
+	unsigned int cpuid = get_ebase_cpunum();
+	u32 mask;
+
+	WARN_ON(mbox >= MBOX_BITS_PER_CPU);
+
+	mask = 1 << (cpuid * MBOX_BITS_PER_CPU + mbox);
+	__raw_writel(mask, base + (cpuid * mips_irq_cpu_stride));
+
+}
+
+static void irq_mbox_cpu_online(struct irq_data *data)
+{
+	irq_mbox_cpu_onoffline(data, mips_irq_chip + mips_irq_chip_reg_en_w1s + sizeof(u32));
+}
+
+static void irq_mbox_cpu_offline(struct irq_data *data)
+{
+	irq_mbox_cpu_onoffline(data, mips_irq_chip + mips_irq_chip_reg_en_w1c + sizeof(u32));
+}
+
+static struct irq_chip irq_chip_mbox = {
+	.name = "MBOX",
+	.irq_enable = irq_mbox_enable,
+	.irq_disable = irq_mbox_disable,
+	.irq_ack = irq_mbox_ack,
+	.irq_cpu_online = irq_mbox_cpu_online,
+	.irq_cpu_offline = irq_mbox_cpu_offline,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+static void __init irq_pci_init(void)
+{
+	int i, stride;
+	u32 num_bits;
+
+	mips_irq_chip = ioremap(0x1e010000, 4096);
+
+	num_bits = __raw_readl(mips_irq_chip + MIPS_IRQ_CHIP_NUM_BITS);
+	stride = 8 * (1 + ((num_bits - 1) / 64));
+
+
+	pr_notice("mips_irq_chip: %u bits, reg stride: %d\n", num_bits, stride);
+	mips_irq_chip_reg_raw		= MIPS_IRQ_CHIP_REGS + 0 * stride;
+	mips_irq_chip_reg_raw_w1s	= MIPS_IRQ_CHIP_REGS + 1 * stride;
+	mips_irq_chip_reg_raw_w1c	= MIPS_IRQ_CHIP_REGS + 2 * stride;
+	mips_irq_chip_reg_src		= MIPS_IRQ_CHIP_REGS + 3 * stride;
+	mips_irq_chip_reg_en		= MIPS_IRQ_CHIP_REGS + 4 * stride;
+	mips_irq_chip_reg_en_w1s	= MIPS_IRQ_CHIP_REGS + 5 * stride;
+	mips_irq_chip_reg_en_w1c	= MIPS_IRQ_CHIP_REGS + 6 * stride;
+	mips_irq_cpu_stride		= stride * 4;
+
+	for (i = 0; i < 4; i++)
+		irq_set_chip_and_handler(i + MIPS_IRQ_PCIA, &irq_chip_pci, handle_level_irq);
+
+	for (i = 0; i < 2; i++)
+		irq_set_chip_and_handler(i + MIPS_IRQ_MBOX0, &irq_chip_mbox, handle_percpu_irq);
+
+
+	set_c0_status(STATUSF_IP2);
+}
+
+static void irq_pci_dispatch(void)
+{
+	unsigned int cpuid = get_ebase_cpunum();
+	u32 en;
+
+	en = __raw_readl(mips_irq_chip + mips_irq_chip_reg_src +
+			(cpuid * mips_irq_cpu_stride));
+
+	if (!en) {
+		en = __raw_readl(mips_irq_chip + mips_irq_chip_reg_src + (cpuid * mips_irq_cpu_stride) + sizeof(u32));
+		en = (en >> (2 * cpuid)) & 3;
+
+		if (!en)
+			spurious_interrupt();
+		else
+			do_IRQ(__ffs(en) + MIPS_IRQ_MBOX0);	/* MBOX type */
+	} else {
+		do_IRQ(__ffs(en));
+	}
+}
+
+
+void __init arch_init_irq(void)
+{
+	irq_init_core();
+	irq_pci_init();
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM;
+	int ip;
+
+	if (unlikely(!pending)) {
+		spurious_interrupt();
+		return;
+	}
+
+	ip = ffs(pending) - 1 - STATUSB_IP0;
+	if (ip == 2)
+		irq_pci_dispatch();
+	else
+		do_IRQ(MIPS_CPU_IRQ_BASE + ip);
+}

diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c
new file mode 100644
index 0000000..0164b0c
--- /dev/null
+++ b/arch/mips/paravirt/paravirt-smp.c

@@ -0,0 +1,143 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/mipsregs.h>
+#include <asm/setup.h>
+#include <asm/time.h>
+#include <asm/smp.h>
+
+/*
+ * Writing the sp releases the CPU, so writes must be ordered, gp
+ * first, then sp.
+ */
+unsigned long paravirt_smp_sp[NR_CPUS];
+unsigned long paravirt_smp_gp[NR_CPUS];
+
+static int numcpus = 1;
+
+static int __init set_numcpus(char *str)
+{
+	int newval;
+
+	if (get_option(&str, &newval)) {
+		if (newval < 1 || newval >= NR_CPUS)
+			goto bad;
+		numcpus = newval;
+		return 0;
+	}
+bad:
+	return -EINVAL;
+}
+early_param("numcpus", set_numcpus);
+
+
+static void paravirt_smp_setup(void)
+{
+	int id;
+	unsigned int cpunum = get_ebase_cpunum();
+
+	if (WARN_ON(cpunum >= NR_CPUS))
+		return;
+
+	/* The present CPUs are initially just the boot cpu (CPU 0). */
+	for (id = 0; id < NR_CPUS; id++) {
+		set_cpu_possible(id, id == 0);
+		set_cpu_present(id, id == 0);
+	}
+	__cpu_number_map[cpunum] = 0;
+	__cpu_logical_map[0] = cpunum;
+
+	for (id = 0; id < numcpus; id++) {
+		set_cpu_possible(id, true);
+		set_cpu_present(id, true);
+		__cpu_number_map[id] = id;
+		__cpu_logical_map[id] = id;
+	}
+}
+
+void irq_mbox_ipi(int cpu, unsigned int actions);
+static void paravirt_send_ipi_single(int cpu, unsigned int action)
+{
+	irq_mbox_ipi(cpu, action);
+}
+
+static void paravirt_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	unsigned int cpu;
+
+	for_each_cpu_mask(cpu, *mask)
+		paravirt_send_ipi_single(cpu, action);
+}
+
+static void paravirt_init_secondary(void)
+{
+	unsigned int sr;
+
+	sr = set_c0_status(ST0_BEV);
+	write_c0_ebase((u32)ebase);
+
+	sr |= STATUSF_IP2; /* Interrupt controller on IP2 */
+	write_c0_status(sr);
+
+	irq_cpu_online();
+}
+
+static void paravirt_smp_finish(void)
+{
+	/* to generate the first CPU timer interrupt */
+	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+	local_irq_enable();
+}
+
+static void paravirt_boot_secondary(int cpu, struct task_struct *idle)
+{
+	paravirt_smp_gp[cpu] = (unsigned long)task_thread_info(idle);
+	smp_wmb();
+	paravirt_smp_sp[cpu] = __KSTK_TOS(idle);
+}
+
+static irqreturn_t paravirt_reched_interrupt(int irq, void *dev_id)
+{
+	scheduler_ipi();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t paravirt_function_interrupt(int irq, void *dev_id)
+{
+	smp_call_function_interrupt();
+	return IRQ_HANDLED;
+}
+
+static void paravirt_prepare_cpus(unsigned int max_cpus)
+{
+	if (request_irq(MIPS_IRQ_MBOX0, paravirt_reched_interrupt,
+			IRQF_PERCPU | IRQF_NO_THREAD, "Scheduler",
+			paravirt_reched_interrupt)) {
+		panic("Cannot request_irq for SchedulerIPI");
+	}
+	if (request_irq(MIPS_IRQ_MBOX1, paravirt_function_interrupt,
+			IRQF_PERCPU | IRQF_NO_THREAD, "SMP-Call",
+			paravirt_function_interrupt)) {
+		panic("Cannot request_irq for SMP-Call");
+	}
+}
+
+struct plat_smp_ops paravirt_smp_ops = {
+	.send_ipi_single	= paravirt_send_ipi_single,
+	.send_ipi_mask		= paravirt_send_ipi_mask,
+	.init_secondary		= paravirt_init_secondary,
+	.smp_finish		= paravirt_smp_finish,
+	.boot_secondary		= paravirt_boot_secondary,
+	.smp_setup		= paravirt_smp_setup,
+	.prepare_cpus		= paravirt_prepare_cpus,
+};

diff --git a/arch/mips/paravirt/serial.c b/arch/mips/paravirt/serial.c
new file mode 100644
index 0000000..02b665c
--- /dev/null
+++ b/arch/mips/paravirt/serial.c

@@ -0,0 +1,40 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/virtio_console.h>
+#include <linux/kvm_para.h>
+
+/*
+ * Emit one character to the boot console.
+ */
+int prom_putchar(char c)
+{
+	kvm_hypercall3(KVM_HC_MIPS_CONSOLE_OUTPUT, 0 /*  port 0 */,
+		(unsigned long)&c, 1 /* len == 1 */);
+
+	return 1;
+}
+
+#ifdef CONFIG_VIRTIO_CONSOLE
+static int paravirt_put_chars(u32 vtermno, const char *buf, int count)
+{
+	kvm_hypercall3(KVM_HC_MIPS_CONSOLE_OUTPUT, vtermno,
+		(unsigned long)buf, count);
+
+	return count;
+}
+
+static int __init paravirt_cons_init(void)
+{
+	virtio_cons_early_init(paravirt_put_chars);
+	return 0;
+}
+core_initcall(paravirt_cons_init);
+
+#endif

diff --git a/arch/mips/paravirt/setup.c b/arch/mips/paravirt/setup.c
new file mode 100644
index 0000000..cb8448b
--- /dev/null
+++ b/arch/mips/paravirt/setup.c

@@ -0,0 +1,67 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+
+#include <asm/reboot.h>
+#include <asm/bootinfo.h>
+#include <asm/smp-ops.h>
+#include <asm/time.h>
+
+extern struct plat_smp_ops paravirt_smp_ops;
+
+const char *get_system_type(void)
+{
+	return "MIPS Para-Virtualized Guest";
+}
+
+void __init plat_time_init(void)
+{
+	mips_hpt_frequency = kvm_hypercall0(KVM_HC_MIPS_GET_CLOCK_FREQ);
+
+	preset_lpj = mips_hpt_frequency / (2 * HZ);
+}
+
+static void pv_machine_halt(void)
+{
+	kvm_hypercall0(KVM_HC_MIPS_EXIT_VM);
+}
+
+/*
+ * Early entry point for arch setup
+ */
+void __init prom_init(void)
+{
+	int i;
+	int argc = fw_arg0;
+	char **argv = (char **)fw_arg1;
+
+#ifdef CONFIG_32BIT
+	set_io_port_base(KSEG1ADDR(0x1e000000));
+#else /* CONFIG_64BIT */
+	set_io_port_base(PHYS_TO_XKSEG_UNCACHED(0x1e000000));
+#endif
+
+	for (i = 0; i < argc; i++) {
+		strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE);
+		if (i < argc - 1)
+			strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
+	}
+	_machine_halt = pv_machine_halt;
+	register_smp_ops(&paravirt_smp_ops);
+}
+
+void __init plat_mem_setup(void)
+{
+	/* Do nothing, the "mem=???" parser handles our memory. */
+}
+
+void __init prom_free_prom_memory(void)
+{
+}

diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index d61138a..ff8a553 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile

@@ -21,7 +21,7 @@
 obj-$(CONFIG_MIPS_ALCHEMY)	+= pci-alchemy.o
 obj-$(CONFIG_SOC_AR71XX)	+= pci-ar71xx.o
 obj-$(CONFIG_PCI_AR724X)	+= pci-ar724x.o
-
+obj-$(CONFIG_MIPS_PCI_VIRTIO)	+= pci-virtio-guest.o
 #
 # These are still pretty much in the old state, watch, go blind.
 #

diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 2f9e52a..40e920c 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c

@@ -68,6 +68,7 @@
 {
 	unsigned char reg_val;
 	u32 reg_val32;
+	u16 reg_val16;
 	/* PIIX PIRQC[A:D] irq mappings */
 	static int piixirqmap[PIIX4_FUNC0_PIRQRC_IRQ_ROUTING_MAX] = {
 		0,  0,	0,  3,
@@ -107,6 +108,11 @@
 	pci_read_config_byte(pdev, PIIX4_FUNC0_SERIRQC, &reg_val);
 	reg_val |= PIIX4_FUNC0_SERIRQC_EN | PIIX4_FUNC0_SERIRQC_CONT;
 	pci_write_config_byte(pdev, PIIX4_FUNC0_SERIRQC, reg_val);
+
+	/* Enable response to special cycles */
+	pci_read_config_word(pdev, PCI_COMMAND, &reg_val16);
+	pci_write_config_word(pdev, PCI_COMMAND,
+			      reg_val16 | PCI_COMMAND_SPECIAL);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0,

diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index 2b91b0e..ab0c5d1 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c

@@ -15,6 +15,7 @@
 #include <asm/octeon/cvmx-npi-defs.h>
 #include <asm/octeon/cvmx-pci-defs.h>
 #include <asm/octeon/cvmx-npei-defs.h>
+#include <asm/octeon/cvmx-sli-defs.h>
 #include <asm/octeon/cvmx-pexp-defs.h>
 #include <asm/octeon/pci-octeon.h>
 
@@ -162,6 +163,11 @@
 		msg.address_lo = (0 + CVMX_NPEI_PCIE_MSI_RCV) & 0xffffffff;
 		msg.address_hi = (0 + CVMX_NPEI_PCIE_MSI_RCV) >> 32;
 		break;
+	case OCTEON_DMA_BAR_TYPE_PCIE2:
+		/* When using PCIe2, Bar 0 is based at 0 */
+		msg.address_lo = (0 + CVMX_SLI_PCIE_MSI_RCV) & 0xffffffff;
+		msg.address_hi = (0 + CVMX_SLI_PCIE_MSI_RCV) >> 32;
+		break;
 	default:
 		panic("arch_setup_msi_irq: Invalid octeon_dma_bar_type");
 	}

diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
index 3249685..fa374fe 100644
--- a/arch/mips/pci/msi-xlp.c
+++ b/arch/mips/pci/msi-xlp.c

@@ -56,8 +56,8 @@
 #include <asm/netlogic/xlp-hal/bridge.h>
 
 #define XLP_MSIVEC_PER_LINK	32
-#define XLP_MSIXVEC_TOTAL	32
-#define XLP_MSIXVEC_PER_LINK	8
+#define XLP_MSIXVEC_TOTAL	(cpu_is_xlp9xx() ? 128 : 32)
+#define XLP_MSIXVEC_PER_LINK	(cpu_is_xlp9xx() ? 32 : 8)
 
 /* 128 MSI irqs per node, mapped starting at NLM_MSI_VEC_BASE */
 static inline int nlm_link_msiirq(int link, int msivec)
@@ -65,35 +65,44 @@
 	return NLM_MSI_VEC_BASE + link * XLP_MSIVEC_PER_LINK + msivec;
 }
 
+/* get the link MSI vector from irq number */
 static inline int nlm_irq_msivec(int irq)
 {
-	return irq % XLP_MSIVEC_PER_LINK;
+	return (irq - NLM_MSI_VEC_BASE) % XLP_MSIVEC_PER_LINK;
 }
 
+/* get the link from the irq number */
 static inline int nlm_irq_msilink(int irq)
 {
-	return (irq % (XLP_MSIVEC_PER_LINK * PCIE_NLINKS)) /
-						XLP_MSIVEC_PER_LINK;
+	int total_msivec = XLP_MSIVEC_PER_LINK * PCIE_NLINKS;
+
+	return ((irq - NLM_MSI_VEC_BASE) % total_msivec) /
+		XLP_MSIVEC_PER_LINK;
 }
 
 /*
- * Only 32 MSI-X vectors are possible because there are only 32 PIC
- * interrupts for MSI. We split them statically and use 8 MSI-X vectors
- * per link - this keeps the allocation and lookup simple.
+ * For XLP 8xx/4xx/3xx/2xx, only 32 MSI-X vectors are possible because
+ * there are only 32 PIC interrupts for MSI. We split them statically
+ * and use 8 MSI-X vectors per link - this keeps the allocation and
+ * lookup simple.
+ * On XLP 9xx, there are 32 vectors per link, and the interrupts are
+ * not routed thru PIC, so we can use all 128 MSI-X vectors.
  */
 static inline int nlm_link_msixirq(int link, int bit)
 {
 	return NLM_MSIX_VEC_BASE + link * XLP_MSIXVEC_PER_LINK + bit;
 }
 
+/* get the link MSI vector from irq number */
 static inline int nlm_irq_msixvec(int irq)
 {
-	return irq % XLP_MSIXVEC_TOTAL;  /* works when given xirq */
+	return (irq - NLM_MSIX_VEC_BASE) % XLP_MSIXVEC_TOTAL;
 }
 
-static inline int nlm_irq_msixlink(int irq)
+/* get the link from MSIX vec */
+static inline int nlm_irq_msixlink(int msixvec)
 {
-	return nlm_irq_msixvec(irq) / XLP_MSIXVEC_PER_LINK;
+	return msixvec / XLP_MSIXVEC_PER_LINK;
 }
 
 /*
@@ -129,7 +138,11 @@
 	vec = nlm_irq_msivec(d->irq);
 	spin_lock_irqsave(&md->msi_lock, flags);
 	md->msi_enabled_mask |= 1u << vec;
-	nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
+	if (cpu_is_xlp9xx())
+		nlm_write_reg(md->lnkbase, PCIE_9XX_MSI_EN,
+				md->msi_enabled_mask);
+	else
+		nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
 	spin_unlock_irqrestore(&md->msi_lock, flags);
 }
 
@@ -142,7 +155,11 @@
 	vec = nlm_irq_msivec(d->irq);
 	spin_lock_irqsave(&md->msi_lock, flags);
 	md->msi_enabled_mask &= ~(1u << vec);
-	nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
+	if (cpu_is_xlp9xx())
+		nlm_write_reg(md->lnkbase, PCIE_9XX_MSI_EN,
+				md->msi_enabled_mask);
+	else
+		nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
 	spin_unlock_irqrestore(&md->msi_lock, flags);
 }
 
@@ -156,11 +173,18 @@
 	xlp_msi_disable(d);
 
 	/* Ack MSI on bridge */
-	nlm_write_reg(md->lnkbase, PCIE_MSI_STATUS, 1u << vec);
+	if (cpu_is_xlp9xx())
+		nlm_write_reg(md->lnkbase, PCIE_9XX_MSI_STATUS, 1u << vec);
+	else
+		nlm_write_reg(md->lnkbase, PCIE_MSI_STATUS, 1u << vec);
 
 	/* Ack at eirr and PIC */
 	ack_c0_eirr(PIC_PCIE_LINK_MSI_IRQ(link));
-	nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_LINK_INDEX(link));
+	if (cpu_is_xlp9xx())
+		nlm_pic_ack(md->node->picbase,
+				PIC_9XX_IRT_PCIE_LINK_INDEX(link));
+	else
+		nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_LINK_INDEX(link));
 }
 
 static struct irq_chip xlp_msi_chip = {
@@ -172,30 +196,45 @@
 };
 
 /*
- * The MSI-X interrupt handling is different from MSI, there are 32
- * MSI-X interrupts generated by the PIC and each of these correspond
- * to a MSI-X vector (0-31) that can be assigned.
+ * XLP8XX/4XX/3XX/2XX:
+ * The MSI-X interrupt handling is different from MSI, there are 32 MSI-X
+ * interrupts generated by the PIC and each of these correspond to a MSI-X
+ * vector (0-31) that can be assigned.
  *
- * We divide the MSI-X vectors to 8 per link and do a per-link
- * allocation
+ * We divide the MSI-X vectors to 8 per link and do a per-link allocation
+ *
+ * XLP9XX:
+ * 32 MSI-X vectors are available per link, and the interrupts are not routed
+ * thru the PIC. PIC ack not needed.
  *
  * Enable and disable done using standard MSI functions.
  */
 static void xlp_msix_mask_ack(struct irq_data *d)
 {
-	struct xlp_msi_data *md = irq_data_get_irq_handler_data(d);
+	struct xlp_msi_data *md;
 	int link, msixvec;
+	uint32_t status_reg, bit;
 
 	msixvec = nlm_irq_msixvec(d->irq);
-	link = nlm_irq_msixlink(d->irq);
+	link = nlm_irq_msixlink(msixvec);
 	mask_msi_irq(d);
+	md = irq_data_get_irq_handler_data(d);
 
 	/* Ack MSI on bridge */
-	nlm_write_reg(md->lnkbase, PCIE_MSIX_STATUS, 1u << msixvec);
+	if (cpu_is_xlp9xx()) {
+		status_reg = PCIE_9XX_MSIX_STATUSX(link);
+		bit = msixvec % XLP_MSIXVEC_PER_LINK;
+	} else {
+		status_reg = PCIE_MSIX_STATUS;
+		bit = msixvec;
+	}
+	nlm_write_reg(md->lnkbase, status_reg, 1u << bit);
 
 	/* Ack at eirr and PIC */
 	ack_c0_eirr(PIC_PCIE_MSIX_IRQ(link));
-	nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_MSIX_INDEX(msixvec));
+	if (!cpu_is_xlp9xx())
+		nlm_pic_ack(md->node->picbase,
+				PIC_IRT_PCIE_MSIX_INDEX(msixvec));
 }
 
 static struct irq_chip xlp_msix_chip = {
@@ -219,10 +258,18 @@
 {
 	u32 val;
 
-	val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
-	if ((val & 0x200) == 0) {
-		val |= 0x200;		/* MSI Interrupt enable */
-		nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+	if (cpu_is_xlp9xx()) {
+		val = nlm_read_reg(lnkbase, PCIE_9XX_INT_EN0);
+		if ((val & 0x200) == 0) {
+			val |= 0x200;		/* MSI Interrupt enable */
+			nlm_write_reg(lnkbase, PCIE_9XX_INT_EN0, val);
+		}
+	} else {
+		val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
+		if ((val & 0x200) == 0) {
+			val |= 0x200;
+			nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+		}
 	}
 
 	val = nlm_read_reg(lnkbase, 0x1);	/* CMD */
@@ -269,9 +316,12 @@
 
 	spin_lock_irqsave(&md->msi_lock, flags);
 	if (md->msi_alloc_mask == 0) {
-		/* switch the link IRQ to MSI range */
 		xlp_config_link_msi(lnkbase, lirq, msiaddr);
-		irt = PIC_IRT_PCIE_LINK_INDEX(link);
+		/* switch the link IRQ to MSI range */
+		if (cpu_is_xlp9xx())
+			irt = PIC_9XX_IRT_PCIE_LINK_INDEX(link);
+		else
+			irt = PIC_IRT_PCIE_LINK_INDEX(link);
 		nlm_setup_pic_irq(node, lirq, lirq, irt);
 		nlm_pic_init_irt(nlm_get_node(node)->picbase, irt, lirq,
 				 node * nlm_threads_per_node(), 1 /*en */);
@@ -311,10 +361,19 @@
 		val |= 0x80000000U;
 		nlm_write_reg(lnkbase, 0x2C, val);
 	}
-	val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
-	if ((val & 0x200) == 0) {
-		val |= 0x200;		/* MSI Interrupt enable */
-		nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+
+	if (cpu_is_xlp9xx()) {
+		val = nlm_read_reg(lnkbase, PCIE_9XX_INT_EN0);
+		if ((val & 0x200) == 0) {
+			val |= 0x200;		/* MSI Interrupt enable */
+			nlm_write_reg(lnkbase, PCIE_9XX_INT_EN0, val);
+		}
+	} else {
+		val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
+		if ((val & 0x200) == 0) {
+			val |= 0x200;		/* MSI Interrupt enable */
+			nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+		}
 	}
 
 	val = nlm_read_reg(lnkbase, 0x1);	/* CMD */
@@ -329,10 +388,19 @@
 	val |= (1 << 8) | lirq;
 	nlm_write_pci_reg(lnkbase, 0xf, val);
 
-	/* MSI-X addresses */
-	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_BASE, msixaddr >> 8);
-	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_LIMIT,
-					(msixaddr + MSI_ADDR_SZ) >> 8);
+	if (cpu_is_xlp9xx()) {
+		/* MSI-X addresses */
+		nlm_write_reg(lnkbase, PCIE_9XX_BRIDGE_MSIX_ADDR_BASE,
+				msixaddr >> 8);
+		nlm_write_reg(lnkbase, PCIE_9XX_BRIDGE_MSIX_ADDR_LIMIT,
+				(msixaddr + MSI_ADDR_SZ) >> 8);
+	} else {
+		/* MSI-X addresses */
+		nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_BASE,
+				msixaddr >> 8);
+		nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_LIMIT,
+				(msixaddr + MSI_ADDR_SZ) >> 8);
+	}
 }
 
 /*
@@ -369,6 +437,7 @@
 
 	xirq += t;
 	msixvec = nlm_irq_msixvec(xirq);
+
 	msg.address_hi = msixaddr >> 32;
 	msg.address_lo = msixaddr & 0xffffffff;
 	msg.data = 0xc00 | msixvec;
@@ -409,7 +478,7 @@
 {
 	struct nlm_soc_info *nodep;
 	struct xlp_msi_data *md;
-	int irq, i, irt, msixvec;
+	int irq, i, irt, msixvec, val;
 
 	pr_info("[%d %d] Init node PCI IRT\n", node, link);
 	nodep = nlm_get_node(node);
@@ -430,19 +499,28 @@
 		irq_set_handler_data(i, md);
 	}
 
-	for (i = 0; i < XLP_MSIXVEC_PER_LINK; i++) {
-		/* Initialize MSI-X irts to generate one interrupt per link */
-		msixvec = link * XLP_MSIXVEC_PER_LINK + i;
-		irt = PIC_IRT_PCIE_MSIX_INDEX(msixvec);
-		nlm_pic_init_irt(nodep->picbase, irt, PIC_PCIE_MSIX_IRQ(link),
-			node * nlm_threads_per_node(), 1 /* enable */);
+	for (i = 0; i < XLP_MSIXVEC_PER_LINK ; i++) {
+		if (cpu_is_xlp9xx()) {
+			val = ((node * nlm_threads_per_node()) << 7 |
+				PIC_PCIE_MSIX_IRQ(link) << 1 | 0 << 0);
+			nlm_write_pcie_reg(md->lnkbase, PCIE_9XX_MSIX_VECX(i +
+					(link * XLP_MSIXVEC_PER_LINK)), val);
+		} else {
+			/* Initialize MSI-X irts to generate one interrupt
+			 * per link
+			 */
+			msixvec = link * XLP_MSIXVEC_PER_LINK + i;
+			irt = PIC_IRT_PCIE_MSIX_INDEX(msixvec);
+			nlm_pic_init_irt(nodep->picbase, irt,
+					PIC_PCIE_MSIX_IRQ(link),
+					node * nlm_threads_per_node(), 1);
+		}
 
 		/* Initialize MSI-X extended irq space for the link  */
 		irq = nlm_irq_to_xirq(node, nlm_link_msixirq(link, i));
 		irq_set_chip_and_handler(irq, &xlp_msix_chip, handle_level_irq);
 		irq_set_handler_data(irq, md);
 	}
-
 }
 
 void nlm_dispatch_msi(int node, int lirq)
@@ -454,7 +532,11 @@
 	link = lirq - PIC_PCIE_LINK_MSI_IRQ_BASE;
 	irqbase = nlm_irq_to_xirq(node, nlm_link_msiirq(link, 0));
 	md = irq_get_handler_data(irqbase);
-	status = nlm_read_reg(md->lnkbase, PCIE_MSI_STATUS) &
+	if (cpu_is_xlp9xx())
+		status = nlm_read_reg(md->lnkbase, PCIE_9XX_MSI_STATUS) &
+						md->msi_enabled_mask;
+	else
+		status = nlm_read_reg(md->lnkbase, PCIE_MSI_STATUS) &
 						md->msi_enabled_mask;
 	while (status) {
 		i = __ffs(status);
@@ -472,10 +554,14 @@
 	link = lirq - PIC_PCIE_MSIX_IRQ_BASE;
 	irqbase = nlm_irq_to_xirq(node, nlm_link_msixirq(link, 0));
 	md = irq_get_handler_data(irqbase);
-	status = nlm_read_reg(md->lnkbase, PCIE_MSIX_STATUS);
+	if (cpu_is_xlp9xx())
+		status = nlm_read_reg(md->lnkbase, PCIE_9XX_MSIX_STATUSX(link));
+	else
+		status = nlm_read_reg(md->lnkbase, PCIE_MSIX_STATUS);
 
 	/* narrow it down to the MSI-x vectors for our link */
-	status = (status >> (link * XLP_MSIXVEC_PER_LINK)) &
+	if (!cpu_is_xlp9xx())
+		status = (status >> (link * XLP_MSIXVEC_PER_LINK)) &
 			((1 << XLP_MSIXVEC_PER_LINK) - 1);
 
 	while (status) {

diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index 3d27800..50034f9 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c

@@ -7,7 +7,7 @@
  * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
  *
  * Much of the code is derived from the original DDB5074 port by
- * Geert Uytterhoeven <geert@sonycom.com>
+ * Geert Uytterhoeven <geert@linux-m68k.org>
  *
  * This program is free software; you can redistribute	it and/or modify it
  * under  the terms of	the GNU General	 Public License as published by the

diff --git a/arch/mips/pci/ops-tx3927.c b/arch/mips/pci/ops-tx3927.c
index 02d64f77..d35dc9c 100644
--- a/arch/mips/pci/ops-tx3927.c
+++ b/arch/mips/pci/ops-tx3927.c

@@ -11,7 +11,7 @@
  *     Define the pci_ops for TX3927.
  *
  * Much of the code is derived from the original DDB5074 port by
- * Geert Uytterhoeven <geert@sonycom.com>
+ * Geert Uytterhoeven <geert@linux-m68k.org>
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the

diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c
index 3d5df51..0e046d8 100644
--- a/arch/mips/pci/ops-tx4927.c
+++ b/arch/mips/pci/ops-tx4927.c

@@ -202,17 +202,20 @@
 	unsigned long val;
 
 	if (!strncmp(str, "trdyto=", 7)) {
-		if (strict_strtoul(str + 7, 0, &val) == 0)
+		u8 val = 0;
+		if (kstrtou8(str + 7, 0, &val) == 0)
 			tx4927_pci_opts.trdyto = val;
 		return NULL;
 	}
 	if (!strncmp(str, "retryto=", 8)) {
-		if (strict_strtoul(str + 8, 0, &val) == 0)
+		u8 val = 0;
+		if (kstrtou8(str + 8, 0, &val) == 0)
 			tx4927_pci_opts.retryto = val;
 		return NULL;
 	}
 	if (!strncmp(str, "gbwc=", 5)) {
-		if (strict_strtoul(str + 5, 0, &val) == 0)
+		u16 val;
+		if (kstrtou16(str + 5, 0, &val) == 0)
 			tx4927_pci_opts.gbwc = val;
 		return NULL;
 	}

diff --git a/arch/mips/pci/pci-virtio-guest.c b/arch/mips/pci/pci-virtio-guest.c
new file mode 100644
index 0000000..40a078b
--- /dev/null
+++ b/arch/mips/pci/pci-virtio-guest.c

@@ -0,0 +1,131 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include <uapi/asm/bitfield.h>
+#include <asm/byteorder.h>
+#include <asm/io.h>
+
+#define PCI_CONFIG_ADDRESS	0xcf8
+#define PCI_CONFIG_DATA		0xcfc
+
+union pci_config_address {
+	struct {
+		__BITFIELD_FIELD(unsigned enable_bit	  : 1,	/* 31       */
+		__BITFIELD_FIELD(unsigned reserved	  : 7,	/* 30 .. 24 */
+		__BITFIELD_FIELD(unsigned bus_number	  : 8,	/* 23 .. 16 */
+		__BITFIELD_FIELD(unsigned devfn_number	  : 8,	/* 15 .. 8  */
+		__BITFIELD_FIELD(unsigned register_number : 8,	/* 7  .. 0  */
+		)))));
+	};
+	u32 w;
+};
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	return 0;
+}
+
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return ((pin + slot) % 4)+ MIPS_IRQ_PCIA;
+}
+
+static void pci_virtio_guest_write_config_addr(struct pci_bus *bus,
+					unsigned int devfn, int reg)
+{
+	union pci_config_address pca = { .w = 0 };
+
+	pca.register_number = reg;
+	pca.devfn_number = devfn;
+	pca.bus_number = bus->number;
+	pca.enable_bit = 1;
+
+	outl(pca.w, PCI_CONFIG_ADDRESS);
+}
+
+static int pci_virtio_guest_write_config(struct pci_bus *bus,
+		unsigned int devfn, int reg, int size, u32 val)
+{
+	pci_virtio_guest_write_config_addr(bus, devfn, reg);
+
+	switch (size) {
+	case 1:
+		outb(val, PCI_CONFIG_DATA + (reg & 3));
+		break;
+	case 2:
+		outw(val, PCI_CONFIG_DATA + (reg & 2));
+		break;
+	case 4:
+		outl(val, PCI_CONFIG_DATA);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_virtio_guest_read_config(struct pci_bus *bus, unsigned int devfn,
+					int reg, int size, u32 *val)
+{
+	pci_virtio_guest_write_config_addr(bus, devfn, reg);
+
+	switch (size) {
+	case 1:
+		*val = inb(PCI_CONFIG_DATA + (reg & 3));
+		break;
+	case 2:
+		*val = inw(PCI_CONFIG_DATA + (reg & 2));
+		break;
+	case 4:
+		*val = inl(PCI_CONFIG_DATA);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pci_virtio_guest_ops = {
+	.read  = pci_virtio_guest_read_config,
+	.write = pci_virtio_guest_write_config,
+};
+
+static struct resource pci_virtio_guest_mem_resource = {
+	.name = "Virtio MEM",
+	.flags = IORESOURCE_MEM,
+	.start	= 0x10000000,
+	.end	= 0x1dffffff
+};
+
+static struct resource pci_virtio_guest_io_resource = {
+	.name = "Virtio IO",
+	.flags = IORESOURCE_IO,
+	.start	= 0,
+	.end	= 0xffff
+};
+
+static struct pci_controller pci_virtio_guest_controller = {
+	.pci_ops = &pci_virtio_guest_ops,
+	.mem_resource = &pci_virtio_guest_mem_resource,
+	.io_resource = &pci_virtio_guest_io_resource,
+};
+
+static int __init pci_virtio_guest_setup(void)
+{
+	pr_err("pci_virtio_guest_setup\n");
+
+	/* Virtio comes pre-assigned */
+	pci_set_flags(PCI_PROBE_ONLY);
+
+	pci_virtio_guest_controller.io_map_base = mips_io_port_base;
+	register_pci_controller(&pci_virtio_guest_controller);
+	return 0;
+}
+arch_initcall(pci_virtio_guest_setup);

diff --git a/arch/mips/pmcs-msp71xx/Makefile b/arch/mips/pmcs-msp71xx/Makefile
index 9201c8b..d4f7220 100644
--- a/arch/mips/pmcs-msp71xx/Makefile
+++ b/arch/mips/pmcs-msp71xx/Makefile

@@ -10,4 +10,3 @@
 obj-$(CONFIG_MSP_HAS_MAC) += msp_eth.o
 obj-$(CONFIG_MSP_HAS_USB) += msp_usb.o
 obj-$(CONFIG_MIPS_MT_SMP) += msp_smp.o
-obj-$(CONFIG_MIPS_MT_SMTC) += msp_smtc.o

diff --git a/arch/mips/pmcs-msp71xx/msp_eth.c b/arch/mips/pmcs-msp71xx/msp_eth.c
index c584df3..15679b4 100644
--- a/arch/mips/pmcs-msp71xx/msp_eth.c
+++ b/arch/mips/pmcs-msp71xx/msp_eth.c

@@ -38,73 +38,6 @@
 #define MSP_ETHERNET_GPIO1	15
 #define MSP_ETHERNET_GPIO2	16
 
-#ifdef CONFIG_MSP_HAS_TSMAC
-#define MSP_TSMAC_SIZE	0x10020
-#define MSP_TSMAC_ID	"pmc_tsmac"
-
-static struct resource msp_tsmac0_resources[] = {
-	[0] = {
-		.start	= MSP_MAC0_BASE,
-		.end	= MSP_MAC0_BASE + MSP_TSMAC_SIZE - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_MAC0,
-		.end	= MSP_INT_MAC0,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct resource msp_tsmac1_resources[] = {
-	[0] = {
-		.start	= MSP_MAC1_BASE,
-		.end	= MSP_MAC1_BASE + MSP_TSMAC_SIZE - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_MAC1,
-		.end	= MSP_INT_MAC1,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-static struct resource msp_tsmac2_resources[] = {
-	[0] = {
-		.start	= MSP_MAC2_BASE,
-		.end	= MSP_MAC2_BASE + MSP_TSMAC_SIZE - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_SAR,
-		.end	= MSP_INT_SAR,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-
-static struct platform_device tsmac_device[] = {
-	[0] = {
-		.name	= MSP_TSMAC_ID,
-		.id	= 0,
-		.num_resources = ARRAY_SIZE(msp_tsmac0_resources),
-		.resource = msp_tsmac0_resources,
-	},
-	[1] = {
-		.name	= MSP_TSMAC_ID,
-		.id	= 1,
-		.num_resources = ARRAY_SIZE(msp_tsmac1_resources),
-		.resource = msp_tsmac1_resources,
-	},
-	[2] = {
-		.name	= MSP_TSMAC_ID,
-		.id	= 2,
-		.num_resources = ARRAY_SIZE(msp_tsmac2_resources),
-		.resource = msp_tsmac2_resources,
-	},
-};
-#define msp_eth_devs	tsmac_device
-
-#else
-/* If it is not TSMAC assume MSP_ETH (100Mbps) */
 #define MSP_ETH_ID	"pmc_mspeth"
 #define MSP_ETH_SIZE	0xE0
 static struct resource msp_eth0_resources[] = {
@@ -152,7 +85,6 @@
 };
 #define msp_eth_devs	mspeth_device
 
-#endif
 int __init msp_eth_setup(void)
 {
 	int i, ret = 0;
@@ -161,14 +93,6 @@
 	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO0);
 	msp_gpio_pin_hi(MSP_ETHERNET_GPIO0);
 
-#ifdef CONFIG_MSP_HAS_TSMAC
-	/* 3 phys on boards with TSMAC */
-	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO1);
-	msp_gpio_pin_hi(MSP_ETHERNET_GPIO1);
-
-	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO2);
-	msp_gpio_pin_hi(MSP_ETHERNET_GPIO2);
-#endif
 	for (i = 0; i < ARRAY_SIZE(msp_eth_devs); i++) {
 		ret = platform_device_register(&msp_eth_devs[i]);
 		printk(KERN_INFO "device: %d, return value = %d\n", i, ret);

diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c
index 9da5619..941744a 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq.c

@@ -32,7 +32,7 @@
 
 /* vectored interrupt implementation */
 
-/* SW0/1 interrupts are used for SMP/SMTC */
+/* SW0/1 interrupts are used for SMP  */
 static inline void mac0_int_dispatch(void) { do_IRQ(MSP_INT_MAC0); }
 static inline void mac1_int_dispatch(void) { do_IRQ(MSP_INT_MAC1); }
 static inline void mac2_int_dispatch(void) { do_IRQ(MSP_INT_SAR); }
@@ -138,14 +138,6 @@
 	set_vi_handler(MSP_INT_SEC, sec_int_dispatch);
 #ifdef CONFIG_MIPS_MT_SMP
 	msp_vsmp_int_init();
-#elif defined CONFIG_MIPS_MT_SMTC
-	/*Set hwmask for all platform devices */
-	irq_hwmask[MSP_INT_MAC0] = C_IRQ0;
-	irq_hwmask[MSP_INT_MAC1] = C_IRQ1;
-	irq_hwmask[MSP_INT_USB] = C_IRQ2;
-	irq_hwmask[MSP_INT_SAR] = C_IRQ3;
-	irq_hwmask[MSP_INT_SEC] = C_IRQ5;
-
 #endif	/* CONFIG_MIPS_MT_SMP */
 #endif	/* CONFIG_MIPS_MT */
 	/* setup the cascaded interrupts */
@@ -153,8 +145,10 @@
 	setup_irq(MSP_INT_PER, &per_cascade_msp);
 
 #else
-	/* setup the 2nd-level SLP register based interrupt controller */
-	/* VSMP /SMTC support support is not enabled for SLP */
+	/*
+	 * Setup the 2nd-level SLP register based interrupt controller.
+	 * VSMP support support is not enabled for SLP.
+	 */
 	msp_slp_irq_init();
 
 	/* setup the cascaded SLP/PER interrupts */

diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
index e49b499..b8df2f7 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq_cic.c

@@ -120,10 +120,9 @@
 	* hurt for the others
 	*/
 	*CIC_STS_REG = (1 << (d->irq - MSP_CIC_INTBASE));
-	smtc_im_ack_irq(d->irq);
 }
 
-/*Note: Limiting to VSMP . Not tested in SMTC */
+/* Note: Limiting to VSMP.  */
 
 #ifdef CONFIG_MIPS_MT_SMP
 static int msp_cic_irq_set_affinity(struct irq_data *d,
@@ -183,10 +182,6 @@
 	for (i = MSP_CIC_INTBASE ; i < MSP_CIC_INTBASE + 32 ; i++) {
 		irq_set_chip_and_handler(i, &msp_cic_irq_controller,
 					 handle_level_irq);
-#ifdef CONFIG_MIPS_MT_SMTC
-		/* Mask of CIC interrupt */
-		irq_hwmask[i] = C_IRQ4;
-#endif
 	}
 
 	/* Initialize the PER interrupt sub-system */

diff --git a/arch/mips/pmcs-msp71xx/msp_irq_per.c b/arch/mips/pmcs-msp71xx/msp_irq_per.c
index d1fd530..a111836 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq_per.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq_per.c

@@ -113,9 +113,6 @@
 	/* initialize all the IRQ descriptors */
 	for (i = MSP_PER_INTBASE; i < MSP_PER_INTBASE + 32; i++) {
 		irq_set_chip(i, &msp_per_irq_controller);
-#ifdef CONFIG_MIPS_MT_SMTC
-		irq_hwmask[i] = C_IRQ4;
-#endif
 	}
 }
 

diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c
index 7e98076..4f925e0 100644
--- a/arch/mips/pmcs-msp71xx/msp_setup.c
+++ b/arch/mips/pmcs-msp71xx/msp_setup.c

@@ -27,7 +27,6 @@
 #endif
 
 extern void msp_serial_setup(void);
-extern void pmctwiled_setup(void);
 
 #if defined(CONFIG_PMC_MSP7120_EVAL) || \
     defined(CONFIG_PMC_MSP7120_GW) || \
@@ -148,8 +147,6 @@
 	pm_power_off = msp_power_off;
 }
 
-extern struct plat_smp_ops msp_smtc_smp_ops;
-
 void __init prom_init(void)
 {
 	unsigned long family;
@@ -230,17 +227,5 @@
 	 */
 	msp_serial_setup();
 
-	if (register_vsmp_smp_ops()) {
-#ifdef CONFIG_MIPS_MT_SMTC
-		register_smp_ops(&msp_smtc_smp_ops);
-#endif
-	}
-
-#ifdef CONFIG_PMCTWILED
-	/*
-	 * Setup LED states before the subsys_initcall loads other
-	 * dependent drivers/modules.
-	 */
-	pmctwiled_setup();
-#endif
+	register_vsmp_smp_ops();
 }

diff --git a/arch/mips/pmcs-msp71xx/msp_smtc.c b/arch/mips/pmcs-msp71xx/msp_smtc.c
deleted file mode 100644
index 6b5607f..0000000
--- a/arch/mips/pmcs-msp71xx/msp_smtc.c
+++ /dev/null

@@ -1,104 +0,0 @@
-/*
- * MSP71xx Platform-specific hooks for SMP operation
- */
-#include <linux/irq.h>
-#include <linux/init.h>
-
-#include <asm/mipsmtregs.h>
-#include <asm/mipsregs.h>
-#include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
-
-/* VPE/SMP Prototype implements platform interfaces directly */
-
-/*
- * Cause the specified action to be performed on a targeted "CPU"
- */
-
-static void msp_smtc_send_ipi_single(int cpu, unsigned int action)
-{
-	/* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
-	smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
-}
-
-static void msp_smtc_send_ipi_mask(const struct cpumask *mask,
-						unsigned int action)
-{
-	unsigned int i;
-
-	for_each_cpu(i, mask)
-		msp_smtc_send_ipi_single(i, action);
-}
-
-/*
- * Post-config but pre-boot cleanup entry point
- */
-static void msp_smtc_init_secondary(void)
-{
-	int myvpe;
-
-	/* Don't enable Malta I/O interrupts (IP2) for secondary VPEs */
-	myvpe = read_c0_tcbind() & TCBIND_CURVPE;
-	if (myvpe > 0)
-		change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 |
-				STATUSF_IP6 | STATUSF_IP7);
-	smtc_init_secondary();
-}
-
-/*
- * Platform "CPU" startup hook
- */
-static void msp_smtc_boot_secondary(int cpu, struct task_struct *idle)
-{
-	smtc_boot_secondary(cpu, idle);
-}
-
-/*
- * SMP initialization finalization entry point
- */
-static void msp_smtc_smp_finish(void)
-{
-	smtc_smp_finish();
-}
-
-/*
- * Hook for after all CPUs are online
- */
-
-static void msp_smtc_cpus_done(void)
-{
-}
-
-/*
- * Platform SMP pre-initialization
- *
- * As noted above, we can assume a single CPU for now
- * but it may be multithreaded.
- */
-
-static void __init msp_smtc_smp_setup(void)
-{
-	/*
-	 * we won't get the definitive value until
-	 * we've run smtc_prepare_cpus later, but
-	 */
-
-	if (read_c0_config3() & (1 << 2))
-		smp_num_siblings = smtc_build_cpu_map(0);
-}
-
-static void __init msp_smtc_prepare_cpus(unsigned int max_cpus)
-{
-	smtc_prepare_cpus(max_cpus);
-}
-
-struct plat_smp_ops msp_smtc_smp_ops = {
-	.send_ipi_single	= msp_smtc_send_ipi_single,
-	.send_ipi_mask		= msp_smtc_send_ipi_mask,
-	.init_secondary		= msp_smtc_init_secondary,
-	.smp_finish		= msp_smtc_smp_finish,
-	.cpus_done		= msp_smtc_cpus_done,
-	.boot_secondary		= msp_smtc_boot_secondary,
-	.smp_setup		= msp_smtc_smp_setup,
-	.prepare_cpus		= msp_smtc_prepare_cpus,
-};

diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c
index 4dab915..c87c5f8 100644
--- a/arch/mips/pmcs-msp71xx/msp_usb.c
+++ b/arch/mips/pmcs-msp71xx/msp_usb.c

@@ -75,47 +75,6 @@
 		.resource	= msp_usbhost0_resources,
 	},
 };
-
-/* MSP7140/MSP82XX has two USB2 hosts. */
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-static u64 msp_usbhost1_dma_mask = 0xffffffffUL;
-
-static struct resource msp_usbhost1_resources[] = {
-	[0] = { /* EHCI-HS operational and capabilities registers */
-		.start	= MSP_USB1_HS_START,
-		.end	= MSP_USB1_HS_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_USB,
-		.end	= MSP_INT_USB,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = { /* MSBus-to-AMBA bridge register space */
-		.start	= MSP_USB1_MAB_START,
-		.end	= MSP_USB1_MAB_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[3] = { /* Identification and general hardware parameters */
-		.start	= MSP_USB1_ID_START,
-		.end	= MSP_USB1_ID_END,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct mspusb_device msp_usbhost1_device = {
-	.dev	= {
-		.name	= "pmcmsp-ehci",
-		.id	= 1,
-		.dev	= {
-			.dma_mask = &msp_usbhost1_dma_mask,
-			.coherent_dma_mask = 0xffffffffUL,
-		},
-		.num_resources	= ARRAY_SIZE(msp_usbhost1_resources),
-		.resource	= msp_usbhost1_resources,
-	},
-};
-#endif /* CONFIG_MSP_HAS_DUAL_USB */
 #endif /* CONFIG_USB_EHCI_HCD */
 
 #if defined(CONFIG_USB_GADGET)
@@ -157,46 +116,6 @@
 		.resource	= msp_usbdev0_resources,
 	},
 };
-
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-static struct resource msp_usbdev1_resources[] = {
-	[0] = { /* EHCI-HS operational and capabilities registers */
-		.start	= MSP_USB1_HS_START,
-		.end	= MSP_USB1_HS_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_USB,
-		.end	= MSP_INT_USB,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = { /* MSBus-to-AMBA bridge register space */
-		.start	= MSP_USB1_MAB_START,
-		.end	= MSP_USB1_MAB_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[3] = { /* Identification and general hardware parameters */
-		.start	= MSP_USB1_ID_START,
-		.end	= MSP_USB1_ID_END,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-/* This may need to be converted to a mspusb_device, too. */
-static struct mspusb_device msp_usbdev1_device = {
-	.dev	= {
-		.name	= "msp71xx_udc",
-		.id	= 0,
-		.dev	= {
-			.dma_mask = &msp_usbdev_dma_mask,
-			.coherent_dma_mask = 0xffffffffUL,
-		},
-		.num_resources	= ARRAY_SIZE(msp_usbdev1_resources),
-		.resource	= msp_usbdev1_resources,
-	},
-};
-
-#endif /* CONFIG_MSP_HAS_DUAL_USB */
 #endif /* CONFIG_USB_GADGET */
 
 static int __init msp_usb_setup(void)
@@ -231,10 +150,6 @@
 #if defined(CONFIG_USB_EHCI_HCD)
 		msp_devs[0] = &msp_usbhost0_device.dev;
 		ppfinit("platform add USB HOST done %s.\n", msp_devs[0]->name);
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-		msp_devs[1] = &msp_usbhost1_device.dev;
-		ppfinit("platform add USB HOST done %s.\n", msp_devs[1]->name);
-#endif
 #else
 		ppfinit("%s: echi_hcd not supported\n", __FILE__);
 #endif	/* CONFIG_USB_EHCI_HCD */
@@ -244,11 +159,6 @@
 		msp_devs[0] = &msp_usbdev0_device.dev;
 		ppfinit("platform add USB DEVICE done %s.\n"
 					, msp_devs[0]->name);
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-		msp_devs[1] = &msp_usbdev1_device.dev;
-		ppfinit("platform add USB DEVICE done %s.\n"
-					, msp_devs[1]->name);
-#endif
 #else
 		ppfinit("%s: usb_gadget not supported\n", __FILE__);
 #endif	/* CONFIG_USB_GADGET */

diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index 2b7e837..b4b774b 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c

@@ -33,11 +33,6 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 
-#ifdef CONFIG_I2C_PNX0105
-/* Until i2c driver available in kernel.*/
-#include <linux/i2c-pnx0105.h>
-#endif
-
 #include <irq.h>
 #include <irq-mapping.h>
 #include <pnx833x.h>
@@ -134,70 +129,6 @@
 	.resource	= pnx833x_usb_ehci_resources,
 };
 
-#ifdef CONFIG_I2C_PNX0105
-static struct resource pnx833x_i2c0_resources[] = {
-	{
-		.start		= PNX833X_I2C0_PORTS_START,
-		.end		= PNX833X_I2C0_PORTS_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= PNX833X_PIC_I2C0_INT,
-		.end		= PNX833X_PIC_I2C0_INT,
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-static struct resource pnx833x_i2c1_resources[] = {
-	{
-		.start		= PNX833X_I2C1_PORTS_START,
-		.end		= PNX833X_I2C1_PORTS_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= PNX833X_PIC_I2C1_INT,
-		.end		= PNX833X_PIC_I2C1_INT,
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-static struct i2c_pnx0105_dev pnx833x_i2c_dev[] = {
-	{
-		.base = PNX833X_I2C0_PORTS_START,
-		.irq = -1, /* should be PNX833X_PIC_I2C0_INT but polling is faster */
-		.clock = 6,	/* 0 == 400 kHz, 4 == 100 kHz(Maximum HDMI), 6 = 50kHz(Preferred HDCP) */
-		.bus_addr = 0,	/* no slave support */
-	},
-	{
-		.base = PNX833X_I2C1_PORTS_START,
-		.irq = -1,	/* on high freq, polling is faster */
-		/*.irq = PNX833X_PIC_I2C1_INT,*/
-		.clock = 4,	/* 0 == 400 kHz, 4 == 100 kHz. 100 kHz seems a safe default for now */
-		.bus_addr = 0,	/* no slave support */
-	},
-};
-
-static struct platform_device pnx833x_i2c0_device = {
-	.name		= "i2c-pnx0105",
-	.id		= 0,
-	.dev = {
-		.platform_data = &pnx833x_i2c_dev[0],
-	},
-	.num_resources	= ARRAY_SIZE(pnx833x_i2c0_resources),
-	.resource	= pnx833x_i2c0_resources,
-};
-
-static struct platform_device pnx833x_i2c1_device = {
-	.name		= "i2c-pnx0105",
-	.id		= 1,
-	.dev = {
-		.platform_data = &pnx833x_i2c_dev[1],
-	},
-	.num_resources	= ARRAY_SIZE(pnx833x_i2c1_resources),
-	.resource	= pnx833x_i2c1_resources,
-};
-#endif
-
 static u64 ethernet_dmamask = DMA_BIT_MASK(32);
 
 static struct resource pnx833x_ethernet_resources[] = {
@@ -294,10 +225,6 @@
 static struct platform_device *pnx833x_platform_devices[] __initdata = {
 	&pnx833x_uart_device,
 	&pnx833x_usb_ehci_device,
-#ifdef CONFIG_I2C_PNX0105
-	&pnx833x_i2c0_device,
-	&pnx833x_i2c1_device,
-#endif
 	&pnx833x_ethernet_device,
 	&pnx833x_sata_device,
 	&pnx833x_flash_nand,

diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index ab0e379..8e52446 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c

@@ -19,6 +19,9 @@
 } gio_name_table[] = {
 	{ .name = "SGI Impact", .id = 0x10 },
 	{ .name = "Phobos G160", .id = 0x35 },
+	{ .name = "Phobos G130", .id = 0x36 },
+	{ .name = "Phobos G100", .id = 0x37 },
+	{ .name = "Set Engineering GFE", .id = 0x38 },
 	/* fake IDs */
 	{ .name = "SGI Newport", .id = 0x7e },
 	{ .name = "SGI GR2/GR3", .id = 0x7f },
@@ -293,7 +296,16 @@
 		 * data matches
 		 */
 		ptr8 = (void *)CKSEG1ADDR(addr + 3);
-		get_dbe(tmp8, ptr8);
+		if (get_dbe(tmp8, ptr8)) {
+			/*
+			 * 32bit access worked, but 8bit doesn't
+			 * so we don't see phantom reads on
+			 * a pipelined bus, but a real card which
+			 * doesn't support 8 bit reads
+			 */
+			*res = tmp32;
+			return 1;
+		}
 		ptr16 = (void *)CKSEG1ADDR(addr + 2);
 		get_dbe(tmp16, ptr16);
 		if (tmp8 == (tmp16 & 0xff) &&
@@ -324,7 +336,7 @@
 }
 
 
-static void ip22_check_gio(int slotno, unsigned long addr)
+static void ip22_check_gio(int slotno, unsigned long addr, int irq)
 {
 	const char *name = "Unknown";
 	struct gio_device *gio_dev;
@@ -338,9 +350,9 @@
 	else {
 		if (!ip22_gio_id(addr, &tmp)) {
 			/*
-			 * no GIO signature at start address of slot, but
-			 * Newport doesn't have one, so let's check usea
-			 * status register
+			 * no GIO signature at start address of slot
+			 * since Newport doesn't have one, we check if
+			 * user status register is readable
 			 */
 			if (ip22_gio_id(addr + NEWPORT_USTATUS_OFFS, &tmp))
 				tmp = 0x7e;
@@ -369,6 +381,7 @@
 		gio_dev->resource.start = addr;
 		gio_dev->resource.end = addr + 0x3fffff;
 		gio_dev->resource.flags = IORESOURCE_MEM;
+		gio_dev->irq = irq;
 		dev_set_name(&gio_dev->dev, "%d", slotno);
 		gio_device_register(gio_dev);
 	} else
@@ -408,16 +421,17 @@
 		request_resource(&iomem_resource, &gio_bus_resource);
 		printk(KERN_INFO "GIO: Probing bus...\n");
 
-		if (ip22_is_fullhouse() ||
-		    !get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1])) {
-			/* Indigo2 and ChallengeS */
-			ip22_check_gio(0, GIO_SLOT_GFX_BASE);
-			ip22_check_gio(1, GIO_SLOT_EXP0_BASE);
+		if (ip22_is_fullhouse()) {
+			/* Indigo2 */
+			ip22_check_gio(0, GIO_SLOT_GFX_BASE, SGI_GIO_1_IRQ);
+			ip22_check_gio(1, GIO_SLOT_EXP0_BASE, SGI_GIO_1_IRQ);
 		} else {
-			/* Indy */
-			ip22_check_gio(0, GIO_SLOT_GFX_BASE);
-			ip22_check_gio(1, GIO_SLOT_EXP0_BASE);
-			ip22_check_gio(2, GIO_SLOT_EXP1_BASE);
+			/* Indy/Challenge S */
+			if (get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1]))
+				ip22_check_gio(0, GIO_SLOT_GFX_BASE,
+					       SGI_GIO_0_IRQ);
+			ip22_check_gio(1, GIO_SLOT_EXP0_BASE, SGI_GIOEXP0_IRQ);
+			ip22_check_gio(2, GIO_SLOT_EXP1_BASE, SGI_GIOEXP1_IRQ);
 		}
 	} else
 		device_unregister(&gio_bus);

diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c
index 58b40ae..c66889f 100644
--- a/arch/mips/sgi-ip22/ip22-int.c
+++ b/arch/mips/sgi-ip22/ip22-int.c

@@ -119,9 +119,14 @@
 	} else
 		irq = lc0msk_to_irqnr[mask];
 
-	/* if irq == 0, then the interrupt has already been cleared */
+	/*
+	 * workaround for INT2 bug; if irq == 0, INT2 has seen a fifo full
+	 * irq, but failed to latch it into status register
+	 */
 	if (irq)
 		do_IRQ(irq);
+	else
+		do_IRQ(SGINT_LOCAL0 + 0);
 }
 
 static void indy_local1_irqdispatch(void)

diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f4ea8aa..f9ae6a8 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c

@@ -186,10 +186,6 @@
 	local_irq_enable();
 }
 
-static void __init ip27_cpus_done(void)
-{
-}
-
 /*
  * Launch a slave into smp_bootstrap().	 It doesn't take an argument, and we
  * set sp to the kernel stack of the newly created idle process, gp to the proc
@@ -236,7 +232,6 @@
 	.send_ipi_mask		= ip27_send_ipi_mask,
 	.init_secondary		= ip27_init_secondary,
 	.smp_finish		= ip27_smp_finish,
-	.cpus_done		= ip27_cpus_done,
 	.boot_secondary		= ip27_boot_secondary,
 	.smp_setup		= ip27_smp_setup,
 	.prepare_cpus		= ip27_prepare_cpus,

diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index 59cfe26..373fbbc 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c

@@ -347,19 +347,8 @@
 	unsigned int cpu = smp_processor_id();
 	unsigned int pending;
 
-#ifdef CONFIG_SIBYTE_BCM1480_PROF
-	/* Set compare to count to silence count/compare timer interrupts */
-	write_c0_compare(read_c0_count());
-#endif
-
 	pending = read_c0_cause() & read_c0_status();
 
-#ifdef CONFIG_SIBYTE_BCM1480_PROF
-	if (pending & CAUSEF_IP7)	/* Cpu performance counter interrupt */
-		sbprof_cpu_intr();
-	else
-#endif
-
 	if (pending & CAUSEF_IP4)
 		do_IRQ(K_BCM1480_INT_TIMER_0 + cpu);
 #ifdef CONFIG_SMP

diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 70d9182..af7d44e 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c

@@ -115,13 +115,6 @@
 }
 
 /*
- * Final cleanup after all secondaries booted
- */
-static void bcm1480_cpus_done(void)
-{
-}
-
-/*
  * Setup the PC, SP, and GP of a secondary processor and start it
  * running!
  */
@@ -170,7 +163,6 @@
 	.send_ipi_mask		= bcm1480_send_ipi_mask,
 	.init_secondary		= bcm1480_init_secondary,
 	.smp_finish		= bcm1480_smp_finish,
-	.cpus_done		= bcm1480_cpus_done,
 	.boot_secondary		= bcm1480_boot_secondary,
 	.smp_setup		= bcm1480_smp_setup,
 	.prepare_cpus		= bcm1480_prepare_cpus,

diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index db97611..c0c4b3f 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c

@@ -103,13 +103,6 @@
 }
 
 /*
- * Final cleanup after all secondaries booted
- */
-static void sb1250_cpus_done(void)
-{
-}
-
-/*
  * Setup the PC, SP, and GP of a secondary processor and start it
  * running!
  */
@@ -158,7 +151,6 @@
 	.send_ipi_mask		= sb1250_send_ipi_mask,
 	.init_secondary		= sb1250_init_secondary,
 	.smp_finish		= sb1250_smp_finish,
-	.cpus_done		= sb1250_cpus_done,
 	.boot_secondary		= sb1250_boot_secondary,
 	.smp_setup		= sb1250_smp_setup,
 	.prepare_cpus		= sb1250_prepare_cpus,

diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 2b0b83c..dd2cf25 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c

@@ -309,8 +309,8 @@
 			txx9_board_vec = find_board_byname(str + 6);
 			continue;
 		} else if (strncmp(str, "masterclk=", 10) == 0) {
-			unsigned long val;
-			if (strict_strtoul(str + 10, 10, &val) == 0)
+			unsigned int val;
+			if (kstrtouint(str + 10, 10, &val) == 0)
 				txx9_master_clock = val;
 			continue;
 		} else if (strcmp(str, "icdisable") == 0) {

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e099899..bd6dd6e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig

@@ -453,6 +453,14 @@
 	default "4"
 	depends on NEED_MULTIPLE_NODES
 
+config USE_PERCPU_NUMA_NODE_ID
+	def_bool y
+	depends on NUMA
+
+config HAVE_MEMORYLESS_NODES
+	def_bool y
+	depends on NUMA
+
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on PPC64

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 21c9f30..790352f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug

@@ -235,11 +235,6 @@
 	  Select this to enable early debugging for Nintendo GameCube/Wii
 	  consoles via an external USB Gecko adapter.
 
-config PPC_EARLY_DEBUG_WSP
-	bool "Early debugging via WSP's internal UART"
-	depends on PPC_WSP
-	select PPC_UDBG_16550
-
 config PPC_EARLY_DEBUG_PS3GELIC
 	bool "Early debugging through the PS3 Ethernet port"
 	depends on PPC_PS3

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ce4c68a..5687e29 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile

@@ -113,8 +113,13 @@
 endif
 endif
 
-CFLAGS-$(CONFIG_PPC64)	:= -mtraceback=no -mcall-aixdesc
-CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
+CFLAGS-$(CONFIG_PPC64)	:= -mtraceback=no
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
+else
+CFLAGS-$(CONFIG_PPC64)	+= -mcall-aixdesc
+endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,-mminimal-toc)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
 CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 $(MULTIPLEWORD)
@@ -153,7 +158,7 @@
 asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
 
 KBUILD_CPPFLAGS	+= -Iarch/$(ARCH) $(asinstr)
-KBUILD_AFLAGS	+= -Iarch/$(ARCH)
+KBUILD_AFLAGS	+= -Iarch/$(ARCH) $(AFLAGS-y)
 KBUILD_CFLAGS	+= -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
 CPP		= $(CC) -E $(KBUILD_CFLAGS)
 
@@ -161,6 +166,11 @@
 
 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
 
+ifeq ($(CONFIG_476FPE_ERR46),y)
+	KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
+		-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
+endif
+
 # No AltiVec or VSX instructions when building kernel
 KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
 KBUILD_CFLAGS += $(call cc-option,-mno-vsx)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index a1f8c7f..ccc25ed 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile

@@ -22,8 +22,14 @@
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
-		 -mbig-endian
+		 -isystem $(shell $(CROSS32CC) -print-file-name=include)
+ifdef CONFIG_PPC64_BOOT_WRAPPER
+BOOTCFLAGS	+= -m64
+endif
+ifdef CONFIG_CPU_BIG_ENDIAN
+BOOTCFLAGS	+= -mbig-endian
+endif
+
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
 ifdef CONFIG_DEBUG_INFO
@@ -47,6 +53,7 @@
 $(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405
 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405
 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405
+$(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405
 $(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405
 
 
@@ -86,6 +93,7 @@
 				cuboot-taishan.c cuboot-katmai.c \
 				cuboot-warp.c cuboot-yosemite.c \
 				treeboot-iss4xx.c treeboot-currituck.c \
+				treeboot-akebono.c \
 				simpleboot.c fixed-head.S virtex.c
 src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
 src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
@@ -99,6 +107,11 @@
 src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
 src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
 src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
+src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
+src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
+src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
+src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S
+src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
 
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
@@ -137,7 +150,11 @@
 $(obj)/empty.c:
 	@touch $@
 
-$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds: $(obj)/%: $(srctree)/$(src)/%.S
+$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
+	$(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
+		-D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
+
+$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
 	@cp $< $@
 
 clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
@@ -235,6 +252,7 @@
 image-$(CONFIG_ISS4xx)			+= treeImage.iss4xx \
 					   treeImage.iss4xx-mpic
 image-$(CONFIG_CURRITUCK)			+= treeImage.currituck
+image-$(CONFIG_AKEBONO)			+= treeImage.akebono
 
 # Board ports in arch/powerpc/platform/8xx/Kconfig
 image-$(CONFIG_MPC86XADS)		+= cuImage.mpc866ads
@@ -315,8 +333,8 @@
 $(obj)/zImage.initrd.%: vmlinux $(wrapperbits)
 	$(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz)
 
-$(obj)/zImage.%: vmlinux $(wrapperbits)
-	$(call if_changed,wrap,$*)
+$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits)
+	$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
 
 # dtbImage% - a dtbImage is a zImage with an embedded device tree blob
 $(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb

diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 349b553..9d9f6f3 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c

@@ -6,6 +6,8 @@
  *
  * Copyright 2000 Paul Mackerras.
  *
+ * Adapted for 64 bit little endian images by Andrew Tauferner.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -55,36 +57,61 @@
 
 #define ROUNDUP(len)	(((len) + 3) & ~3)
 
-unsigned char buf[512];
+unsigned char buf[1024];
+#define ELFDATA2LSB     1
+#define ELFDATA2MSB     2
+static int e_data = ELFDATA2MSB;
+#define ELFCLASS32      1
+#define ELFCLASS64      2
+static int e_class = ELFCLASS32;
 
 #define GET_16BE(off)	((buf[off] << 8) + (buf[(off)+1]))
-#define GET_32BE(off)	((GET_16BE(off) << 16) + GET_16BE((off)+2))
+#define GET_32BE(off)	((GET_16BE(off) << 16U) + GET_16BE((off)+2U))
+#define GET_64BE(off)	((((unsigned long long)GET_32BE(off)) << 32ULL) + \
+			((unsigned long long)GET_32BE((off)+4ULL)))
+#define PUT_16BE(off, v)(buf[off] = ((v) >> 8) & 0xff, \
+			 buf[(off) + 1] = (v) & 0xff)
+#define PUT_32BE(off, v)(PUT_16BE((off), (v) >> 16L), PUT_16BE((off) + 2, (v)))
+#define PUT_64BE(off, v)((PUT_32BE((off), (v) >> 32L), \
+			  PUT_32BE((off) + 4, (v))))
 
-#define PUT_16BE(off, v)	(buf[off] = ((v) >> 8) & 0xff, \
-				 buf[(off) + 1] = (v) & 0xff)
-#define PUT_32BE(off, v)	(PUT_16BE((off), (v) >> 16), \
-				 PUT_16BE((off) + 2, (v)))
+#define GET_16LE(off)	((buf[off]) + (buf[(off)+1] << 8))
+#define GET_32LE(off)	(GET_16LE(off) + (GET_16LE((off)+2U) << 16U))
+#define GET_64LE(off)	((unsigned long long)GET_32LE(off) + \
+			(((unsigned long long)GET_32LE((off)+4ULL)) << 32ULL))
+#define PUT_16LE(off, v) (buf[off] = (v) & 0xff, \
+			  buf[(off) + 1] = ((v) >> 8) & 0xff)
+#define PUT_32LE(off, v) (PUT_16LE((off), (v)), PUT_16LE((off) + 2, (v) >> 16L))
+#define PUT_64LE(off, v) (PUT_32LE((off), (v)), PUT_32LE((off) + 4, (v) >> 32L))
+
+#define GET_16(off)	(e_data == ELFDATA2MSB ? GET_16BE(off) : GET_16LE(off))
+#define GET_32(off)	(e_data == ELFDATA2MSB ? GET_32BE(off) : GET_32LE(off))
+#define GET_64(off)	(e_data == ELFDATA2MSB ? GET_64BE(off) : GET_64LE(off))
+#define PUT_16(off, v)	(e_data == ELFDATA2MSB ? PUT_16BE(off, v) : \
+			 PUT_16LE(off, v))
+#define PUT_32(off, v)  (e_data == ELFDATA2MSB ? PUT_32BE(off, v) : \
+			 PUT_32LE(off, v))
+#define PUT_64(off, v)  (e_data == ELFDATA2MSB ? PUT_64BE(off, v) : \
+			 PUT_64LE(off, v))
 
 /* Structure of an ELF file */
 #define E_IDENT		0	/* ELF header */
-#define	E_PHOFF		28
-#define E_PHENTSIZE	42
-#define E_PHNUM		44
-#define E_HSIZE		52	/* size of ELF header */
+#define	E_PHOFF		(e_class == ELFCLASS32 ? 28 : 32)
+#define E_PHENTSIZE	(e_class == ELFCLASS32 ? 42 : 54)
+#define E_PHNUM		(e_class == ELFCLASS32 ? 44 : 56)
+#define E_HSIZE		(e_class == ELFCLASS32 ? 52 : 64)
 
 #define EI_MAGIC	0	/* offsets in E_IDENT area */
 #define EI_CLASS	4
 #define EI_DATA		5
 
 #define PH_TYPE		0	/* ELF program header */
-#define PH_OFFSET	4
-#define PH_FILESZ	16
-#define PH_HSIZE	32	/* size of program header */
+#define PH_OFFSET	(e_class == ELFCLASS32 ? 4 : 8)
+#define PH_FILESZ	(e_class == ELFCLASS32 ? 16 : 32)
+#define PH_HSIZE	(e_class == ELFCLASS32 ? 32 : 56)
 
 #define PT_NOTE		4	/* Program header type = note */
 
-#define ELFCLASS32	1
-#define ELFDATA2MSB	2
 
 unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' };
 
@@ -92,8 +119,8 @@
 main(int ac, char **av)
 {
 	int fd, n, i;
-	int ph, ps, np;
-	int nnote, nnote2, ns;
+	unsigned long ph, ps, np;
+	long nnote, nnote2, ns;
 
 	if (ac != 2) {
 		fprintf(stderr, "Usage: %s elf-file\n", av[0]);
@@ -114,26 +141,27 @@
 		exit(1);
 	}
 
-	if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+	if (memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+		goto notelf;
+	e_class = buf[E_IDENT+EI_CLASS];
+	if (e_class != ELFCLASS32 && e_class != ELFCLASS64)
+		goto notelf;
+	e_data = buf[E_IDENT+EI_DATA];
+	if (e_data != ELFDATA2MSB && e_data != ELFDATA2LSB)
+		goto notelf;
+	if (n < E_HSIZE)
 		goto notelf;
 
-	if (buf[E_IDENT+EI_CLASS] != ELFCLASS32
-	    || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) {
-		fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n",
-			av[1]);
-		exit(1);
-	}
-
-	ph = GET_32BE(E_PHOFF);
-	ps = GET_16BE(E_PHENTSIZE);
-	np = GET_16BE(E_PHNUM);
+	ph = (e_class == ELFCLASS32 ? GET_32(E_PHOFF) : GET_64(E_PHOFF));
+	ps = GET_16(E_PHENTSIZE);
+	np = GET_16(E_PHNUM);
 	if (ph < E_HSIZE || ps < PH_HSIZE || np < 1)
 		goto notelf;
 	if (ph + (np + 2) * ps + nnote + nnote2 > n)
 		goto nospace;
 
 	for (i = 0; i < np; ++i) {
-		if (GET_32BE(ph + PH_TYPE) == PT_NOTE) {
+		if (GET_32(ph + PH_TYPE) == PT_NOTE) {
 			fprintf(stderr, "%s already has a note entry\n",
 				av[1]);
 			exit(0);
@@ -148,15 +176,22 @@
 
 	/* fill in the program header entry */
 	ns = ph + 2 * ps;
-	PUT_32BE(ph + PH_TYPE, PT_NOTE);
-	PUT_32BE(ph + PH_OFFSET, ns);
-	PUT_32BE(ph + PH_FILESZ, nnote);
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote);
 
 	/* fill in the note area we point to */
 	/* XXX we should probably make this a proper section */
-	PUT_32BE(ns, strlen(arch) + 1);
-	PUT_32BE(ns + 4, N_DESCR * 4);
-	PUT_32BE(ns + 8, 0x1275);
+	PUT_32(ns, strlen(arch) + 1);
+	PUT_32(ns + 4, N_DESCR * 4);
+	PUT_32(ns + 8, 0x1275);
 	strcpy((char *) &buf[ns + 12], arch);
 	ns += 12 + strlen(arch) + 1;
 	for (i = 0; i < N_DESCR; ++i, ns += 4)
@@ -164,21 +199,28 @@
 
 	/* fill in the second program header entry and the RPA note area */
 	ph += ps;
-	PUT_32BE(ph + PH_TYPE, PT_NOTE);
-	PUT_32BE(ph + PH_OFFSET, ns);
-	PUT_32BE(ph + PH_FILESZ, nnote2);
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote2);
 
 	/* fill in the note area we point to */
-	PUT_32BE(ns, strlen(rpaname) + 1);
-	PUT_32BE(ns + 4, sizeof(rpanote));
-	PUT_32BE(ns + 8, 0x12759999);
+	PUT_32(ns, strlen(rpaname) + 1);
+	PUT_32(ns + 4, sizeof(rpanote));
+	PUT_32(ns + 8, 0x12759999);
 	strcpy((char *) &buf[ns + 12], rpaname);
 	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
 	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, rpanote[i]);
 
 	/* Update the number of program headers */
-	PUT_16BE(E_PHNUM, np + 2);
+	PUT_16(E_PHNUM, np + 2);
 
 	/* write back */
 	lseek(fd, (long) 0, SEEK_SET);

diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 0f7428a..14de4f8 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S

@@ -1,17 +1,20 @@
 /*
  * Copyright (C) Paul Mackerras 1997.
  *
+ * Adapted for 64 bit LE PowerPC by Andrew Tauferner
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * NOTE: this code runs in 32 bit mode, is position-independent,
- * and is packaged as ELF32.
  */
 
 #include "ppc_asm.h"
 
+RELA = 7
+RELACOUNT = 0x6ffffff9
+
 	.text
 	/* A procedure descriptor used when booting this as a COFF file.
 	 * When making COFF, this comes first in the link and we're
@@ -21,6 +24,20 @@
 _zimage_start_opd:
 	.long	0x500000, 0, 0, 0
 
+#ifdef __powerpc64__
+.balign 8
+p_start:	.llong	_start
+p_etext:	.llong	_etext
+p_bss_start:	.llong	__bss_start
+p_end:		.llong	_end
+
+p_toc:		.llong	__toc_start + 0x8000 - p_base
+p_dyn:		.llong	__dynamic_start - p_base
+p_rela:		.llong	__rela_dyn_start - p_base
+p_prom:		.llong	0
+	.weak	_platform_stack_top
+p_pstack:	.llong	_platform_stack_top
+#else
 p_start:	.long	_start
 p_etext:	.long	_etext
 p_bss_start:	.long	__bss_start
@@ -28,6 +45,7 @@
 
 	.weak	_platform_stack_top
 p_pstack:	.long	_platform_stack_top
+#endif
 
 	.weak	_zimage_start
 	.globl	_zimage_start
@@ -38,6 +56,7 @@
 	   and the address where we're running. */
 	bl	.+4
 p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+#ifndef __powerpc64__
 	/* grab the link address of the dynamic section in r11 */
 	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
 	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
@@ -51,8 +70,6 @@
 
 	/* The dynamic section contains a series of tagged entries.
 	 * We need the RELA and RELACOUNT entries. */
-RELA = 7
-RELACOUNT = 0x6ffffff9
 	li	r9,0
 	li	r0,0
 9:	lwz	r8,0(r12)	/* get tag */
@@ -120,9 +137,164 @@
 	li	r0,0
 	stwu	r0,-16(r1)	/* establish a stack frame */
 6:
+#else /* __powerpc64__ */
+	/* Save the prom pointer at p_prom. */
+	std	r5,(p_prom-p_base)(r10)
 
+	/* Set r2 to the TOC. */
+	ld	r2,(p_toc-p_base)(r10)
+	add	r2,r2,r10
+
+	/* Grab the link address of the dynamic section in r11. */
+	ld	r11,-32768(r2)
+	cmpwi	r11,0
+	beq	3f              /* if not linked -pie then no dynamic section */
+
+	ld	r11,(p_dyn-p_base)(r10)
+	add	r11,r11,r10
+	ld	r9,(p_rela-p_base)(r10)
+	add	r9,r9,r10
+
+	li	r7,0
+	li	r8,0
+9:	ld	r6,0(r11)       /* get tag */
+	cmpdi	r6,0
+	beq	12f              /* end of list */
+	cmpdi	r6,RELA
+	bne	10f
+	ld	r7,8(r11)       /* get RELA pointer in r7 */
+	b	11f
+10:	addis	r6,r6,(-RELACOUNT)@ha
+	cmpdi	r6,RELACOUNT@l
+	bne	11f
+	ld	r8,8(r11)       /* get RELACOUNT value in r8 */
+11:	addi	r11,r11,16
+	b	9b
+12:
+	cmpdi	r7,0            /* check we have both RELA and RELACOUNT */
+	cmpdi	cr1,r8,0
+	beq	3f
+	beq	cr1,3f
+
+	/* Calcuate the runtime offset. */
+	subf	r7,r7,r9
+
+	/* Run through the list of relocations and process the
+	 * R_PPC64_RELATIVE ones. */
+	mtctr	r8
+13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
+	cmpdi	r0,22           /* R_PPC64_RELATIVE */
+	bne	3f
+	ld	r6,0(r9)        /* reloc->r_offset */
+	ld	r0,16(r9)       /* reloc->r_addend */
+	add	r0,r0,r7
+	stdx	r0,r7,r6
+	addi	r9,r9,24
+	bdnz	13b
+
+	/* Do a cache flush for our text, in case the loader didn't */
+3:	ld	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	ld	r8,p_etext-p_base(r10)
+4:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmpld	cr0,r9,r8
+	blt	4b
+	sync
+	isync
+
+	/* Clear the BSS */
+	ld	r9,p_bss_start-p_base(r10)
+	ld	r8,p_end-p_base(r10)
+	li	r0,0
+5:	std	r0,0(r9)
+	addi	r9,r9,8
+	cmpld	cr0,r9,r8
+	blt	5b
+
+	/* Possibly set up a custom stack */
+	ld	r8,p_pstack-p_base(r10)
+	cmpdi	r8,0
+	beq	6f
+	ld	r1,0(r8)
+	li	r0,0
+	stdu	r0,-16(r1)	/* establish a stack frame */
+6:
+#endif  /* __powerpc64__ */
 	/* Call platform_init() */
 	bl	platform_init
 
 	/* Call start */
 	b	start
+
+#ifdef __powerpc64__
+
+#define PROM_FRAME_SIZE 512
+#define SAVE_GPR(n, base)       std     n,8*(n)(base)
+#define REST_GPR(n, base)       ld      n,8*(n)(base)
+#define SAVE_2GPRS(n, base)     SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+#define SAVE_4GPRS(n, base)     SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+#define SAVE_8GPRS(n, base)     SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+#define SAVE_10GPRS(n, base)    SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+#define REST_2GPRS(n, base)     REST_GPR(n, base); REST_GPR(n+1, base)
+#define REST_4GPRS(n, base)     REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+#define REST_8GPRS(n, base)     REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+#define REST_10GPRS(n, base)    REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+/* prom handles the jump into and return from firmware.  The prom args pointer
+   is loaded in r3. */
+.globl prom
+prom:
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+	SAVE_GPR(2, r1)
+	SAVE_GPR(13, r1)
+	SAVE_8GPRS(14, r1)
+	SAVE_10GPRS(22, r1)
+	mfcr    r10
+	std     r10,8*32(r1)
+	mfmsr   r10
+	std     r10,8*33(r1)
+
+	/* remove MSR_LE from msr but keep MSR_SF */
+	mfmsr	r10
+	rldicr	r10,r10,0,62
+	mtsrr1	r10
+
+	/* Load FW address, set LR to label 1, and jump to FW */
+	bl	0f
+0:	mflr	r10
+	addi	r11,r10,(1f-0b)
+	mtlr	r11
+
+	ld	r10,(p_prom-0b)(r10)
+	mtsrr0	r10
+
+	rfid
+
+1:	/* Return from OF */
+	FIXUP_ENDIAN
+
+	/* Restore registers and return. */
+	rldicl  r1,r1,0,32
+
+	/* Restore the MSR (back to 64 bits) */
+	ld      r10,8*(33)(r1)
+	mtmsr	r10
+	isync
+
+	/* Restore other registers */
+	REST_GPR(2, r1)
+	REST_GPR(13, r1)
+	REST_8GPRS(14, r1)
+	REST_10GPRS(22, r1)
+	ld      r10,8*32(r1)
+	mtcr	r10
+
+	addi    r1,r1,PROM_FRAME_SIZE
+	ld      r0,16(r1)
+	mtlr    r0
+	blr
+#endif

diff --git a/arch/powerpc/boot/dcr.h b/arch/powerpc/boot/dcr.h
index cc73f7a..bf8f4ed 100644
--- a/arch/powerpc/boot/dcr.h
+++ b/arch/powerpc/boot/dcr.h

@@ -15,6 +15,10 @@
 		asm volatile("mfdcrx %0,%1" : "=r"(rval) : "r"(rn)); \
 		rval; \
 	})
+#define mtdcrx(rn, val) \
+	({	\
+		asm volatile("mtdcrx %0,%1" : : "r"(rn), "r" (val)); \
+	})
 
 /* 440GP/440GX SDRAM controller DCRs */
 #define DCRN_SDRAM0_CFGADDR				0x010

diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
new file mode 100644
index 0000000..f92ecfe
--- /dev/null
+++ b/arch/powerpc/boot/dts/akebono.dts

@@ -0,0 +1,415 @@
+/*
+ * Device Tree Source for IBM Embedded PPC 476 Platform
+ *
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x01f00000 0x00100000;	// spin table
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	model = "ibm,akebono";
+	compatible = "ibm,akebono", "ibm,476gtr";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <0>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "ok";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <1>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x01f00000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>; // filled in by zImage
+	};
+
+	MPIC: interrupt-controller {
+		compatible = "chrp,open-pic";
+		interrupt-controller;
+		dcr-reg = <0xffc00000 0x00040000>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		single-cpu-affinity;
+	};
+
+	plb {
+		compatible = "ibm,plb6";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		clock-frequency = <200000000>; // 200Mhz
+
+		HSTA0: hsta@310000e0000 {
+			compatible = "ibm,476gtr-hsta-msi", "ibm,hsta-msi";
+			reg = <0x310 0x000e0000 0x0 0xf0>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <108 0
+				      109 0
+				      110 0
+				      111 0
+				      112 0
+				      113 0
+				      114 0
+				      115 0
+				      116 0
+				      117 0
+				      118 0
+				      119 0
+				      120 0
+				      121 0
+				      122 0
+				      123 0>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-476gtr", "ibm,mcmal2";
+			dcr-reg = <0xc0000000 0x062>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <	/*TXEOB*/ 77 0x4
+					/*RXEOB*/ 78 0x4
+					/*SERR*/  76 0x4
+					/*TXDE*/  79 0x4
+					/*RXDE*/  80 0x4>;
+		};
+
+		SATA0: sata@30000010000 {
+			compatible = "ibm,476gtr-ahci";
+			reg = <0x300 0x00010000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <93 2>;
+		};
+
+		EHCI0: ehci@30010000000 {
+			compatible = "ibm,476gtr-ehci", "generic-ehci";
+			reg = <0x300 0x10000000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <85 2>;
+		};
+
+		SD0: sd@30000000000 {
+			compatible = "ibm,476gtr-sdhci", "generic-sdhci";
+			reg = <0x300 0x00000000 0x0 0x10000>;
+			interrupts = <91 2>;
+			interrupt-parent = <&MPIC>;
+		};
+
+		OHCI0: ohci@30010010000 {
+			compatible = "ibm,476gtr-ohci", "generic-ohci";
+			reg = <0x300 0x10010000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <89 1>;
+			};
+
+		OHCI1: ohci@30010020000 {
+			compatible = "ibm,476gtr-ohci", "generic-ohci";
+			reg = <0x300 0x10020000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <88 1>;
+			};
+
+		POB0: opb {
+			compatible = "ibm,opb-4xx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full
+			 * 32-bit range
+			 */
+			ranges = <0x00000000 0x0000033f 0x00000000 0x80000000
+				  0x80000000 0x0000033f 0x80000000 0x80000000>;
+			clock-frequency = <100000000>;
+
+			RGMII0: emac-rgmii-wol@50004 {
+				compatible = "ibm,rgmii-wol-476gtr", "ibm,rgmii-wol";
+				reg = <0x50004 0x00000008>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@30000 {
+				device_type = "network";
+				compatible = "ibm,emac-476gtr", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &MPIC 81 0x4
+						 /*Wake*/   0x1 &MPIC 82 0x4>;
+				reg = <0x30000 0x78>;
+
+				/* local-mac-address will normally be added by
+				 * the wrapper. If your device doesn't support
+				 * passing data to the wrapper (in the form
+				 * local-mac-addr=<hwaddr>) then you will need
+				 * to set it manually here. */
+				//local-mac-address = [000000000000];
+
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-wol-device = <&RGMII0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			UART0: serial@10000 {
+				device_type = "serial";
+				compatible = "ns16750", "ns16550";
+				reg = <0x10000 0x00000008>;
+				virtual-reg = <0xe8010000>;
+				clock-frequency = <1851851>;
+				current-speed = <38400>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <39 2>;
+			};
+
+			IIC0: i2c@00000000 {
+				compatible = "ibm,iic-476gtr", "ibm,iic";
+				reg = <0x0 0x00000020>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <37 2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				rtc@68 {
+					compatible = "stm,m41t80", "m41st85";
+					reg = <0x68>;
+				};
+			};
+
+			IIC1: i2c@00000100 {
+				compatible = "ibm,iic-476gtr", "ibm,iic";
+				reg = <0x100 0x00000020>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <38 2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				avr@58 {
+					compatible = "ibm,akebono-avr";
+					reg = <0x58>;
+				};
+			};
+
+			FPGA0: fpga@ebc00000 {
+				compatible = "ibm,akebono-fpga";
+				reg = <0xebc00000 0x8>;
+			};
+		};
+
+		PCIE0: pciex@10100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x00000101 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000100 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0xc0 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000110 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000140 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 45 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 46 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 47 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>;
+		};
+
+		PCIE1: pciex@20100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x00000201 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000200 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x100 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000210 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000240 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 53 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 54 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 55 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>;
+		};
+
+		PCIE2: pciex@18100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x2>; /* port number */
+			reg = <0x00000181 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000180 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0xe0 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000190 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x000001c0 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 61 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 62 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 63 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>;
+		};
+
+		PCIE3: pciex@28100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x3>; /* port number */
+			reg = <0x00000281 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000280 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x120 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000290 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x000002c0 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 69 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 70 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 71 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 72 0x2 /* int D */>;
+		};
+	};
+
+	chosen {
+		linux,stdout-path = &UART0;
+	};
+};

diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts
index 7290021..85646b4 100644
--- a/arch/powerpc/boot/dts/b4860emu.dts
+++ b/arch/powerpc/boot/dts/b4860emu.dts

@@ -61,21 +61,25 @@
 			device_type = "cpu";
 			reg = <0 1>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu1: PowerPC,e6500@2 {
 			device_type = "cpu";
 			reg = <2 3>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu2: PowerPC,e6500@4 {
 			device_type = "cpu";
 			reg = <4 5>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu3: PowerPC,e6500@6 {
 			device_type = "cpu";
 			reg = <6 7>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 	};
 };
@@ -157,7 +161,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,b4-corenet-cf";
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 0>;
 		fsl,ccf-num-csdids = <32>;
@@ -167,6 +171,7 @@
 	iommu@20000 {
 		compatible = "fsl,pamu-v1.0", "fsl,pamu";
 		reg = <0x20000 0x4000>;
+		fsl,portid-mapping = <0x8000>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 		interrupts = <

diff --git a/arch/powerpc/boot/dts/bsc9132qds.dts b/arch/powerpc/boot/dts/bsc9132qds.dts
new file mode 100644
index 0000000..6cab106
--- /dev/null
+++ b/arch/powerpc/boot/dts/bsc9132qds.dts

@@ -0,0 +1,35 @@
+/*
+ * BSC9132 QDS Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "fsl/bsc9132si-pre.dtsi"
+
+/ {
+	model = "fsl,bsc9132qds";
+	compatible = "fsl,bsc9132qds";
+
+	memory {
+		device_type = "memory";
+	};
+
+	ifc: ifc@ff71e000 {
+		/* NOR, NAND Flash on board */
+		ranges = <0x0 0x0 0x0 0x88000000 0x08000000
+			  0x1 0x0 0x0 0xff800000 0x00010000>;
+		reg = <0x0 0xff71e000 0x0 0x2000>;
+	};
+
+	soc: soc@ff700000 {
+		ranges = <0x0 0x0 0xff700000 0x100000>;
+	};
+};
+
+/include/ "bsc9132qds.dtsi"
+/include/ "fsl/bsc9132si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/bsc9132qds.dtsi b/arch/powerpc/boot/dts/bsc9132qds.dtsi
new file mode 100644
index 0000000..af8e888
--- /dev/null
+++ b/arch/powerpc/boot/dts/bsc9132qds.dtsi

@@ -0,0 +1,101 @@
+/*
+ * BSC9132 QDS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,ifc-nand";
+		reg = <0x1 0x0 0x4000>;
+	};
+};
+
+&soc {
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801";
+			reg = <0>;
+			spi-max-frequency = <30000000>;
+		};
+	};
+
+	i2c@3000 {
+		fpga: fpga@66 {
+			compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
+			reg = <0x66>;
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x1f>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy1>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+};

diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
index 60566f99..d678944 100644
--- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi

@@ -76,10 +76,6 @@
 		compatible = "fsl,b4420-l3-cache-controller", "cache";
 	};
 
-	corenet-cf@18000 {
-		compatible = "fsl,b4420-corenet-cf";
-	};
-
 	guts: global-utilities@e0000 {
 		compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0";
 	};

diff --git a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
index 2419731..338af7e 100644
--- a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi

@@ -66,12 +66,14 @@
 			reg = <0 1>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu1: PowerPC,e6500@2 {
 			device_type = "cpu";
 			reg = <2 3>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 	};
 };

diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index cbc354b..582381d 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi

@@ -120,10 +120,6 @@
 		compatible = "fsl,b4860-l3-cache-controller", "cache";
 	};
 
-	corenet-cf@18000 {
-		compatible = "fsl,b4860-corenet-cf";
-	};
-
 	guts: global-utilities@e0000 {
 		compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0";
 	};

diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
index 142ac86..1948f73 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi

@@ -66,24 +66,28 @@
 			reg = <0 1>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu1: PowerPC,e6500@2 {
 			device_type = "cpu";
 			reg = <2 3>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu2: PowerPC,e6500@4 {
 			device_type = "cpu";
 			reg = <4 5>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu3: PowerPC,e6500@6 {
 			device_type = "cpu";
 			reg = <6 7>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 	};
 };

diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 4f6e482..1a54ba7 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi

@@ -158,7 +158,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,b4-corenet-cf";
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 0>;
 		fsl,ccf-num-csdids = <32>;
@@ -168,6 +168,7 @@
 	iommu@20000 {
 		compatible =  "fsl,pamu-v1.0", "fsl,pamu";
 		reg = <0x20000 0x4000>;
+		fsl,portid-mapping = <0x8000>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 		interrupts = <

diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
new file mode 100644
index 0000000..c723071
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi

@@ -0,0 +1,185 @@
+/*
+ * BSC9132 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	/* FIXME: Test whether interrupts are split */
+	interrupts = <16 2 0 0 20 2 0 0>;
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,bsc9132-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,bsc9132-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,bsc9132-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 1 8>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+	i2c@3000 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-i2c-1.dtsi"
+	i2c@3100 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-duart-0.dtsi"
+	serial0: serial@4500 {
+		interrupts = <18 2 0 0>;
+	};
+
+	serial1: serial@4600 {
+		interrupts = <18 2 0 0 >;
+	};
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <1>;
+		interrupts = <22 0x2 0 0>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+	gpio-controller@f000 {
+		interrupts = <19 0x2 0 0>;
+		};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,bsc9132-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 1 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+
+dma@21300 {
+
+	dma-channel@0 {
+		interrupts = <62 2 0 0>;
+	};
+
+	dma-channel@80 {
+		interrupts = <63 2 0 0>;
+	};
+
+	dma-channel@100 {
+		interrupts = <64 2 0 0>;
+	};
+
+	dma-channel@180 {
+		interrupts = <65 2 0 0>;
+	};
+};
+
+/include/ "pq3-usb2-dr-0.dtsi"
+usb@22000 {
+	compatible = "fsl-usb2-dr","fsl-usb2-dr-v2.2";
+	interrupts = <40 0x2 0 0>;
+};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		fsl,sdhci-auto-cmd12;
+		interrupts = <41 0x2 0 0>;
+	};
+
+/include/ "pq3-sec4.4-0.dtsi"
+crypto@30000 {
+	interrupts	 = <57 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		interrupts	 = <58 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		interrupts	 = <59 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		interrupts	 = <60 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		interrupts	 = <61 2 0 0>;
+	};
+};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+enet0: ethernet@b0000 {
+	queue-group@b0000 {
+		fsl,rx-bit-map = <0xff>;
+		fsl,tx-bit-map = <0xff>;
+		interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
+	};
+};
+
+/include/ "pq3-etsec2-1.dtsi"
+enet1: ethernet@b1000 {
+	queue-group@b1000 {
+		fsl,rx-bit-map = <0xff>;
+		fsl,tx-bit-map = <0xff>;
+		interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
+	};
+};
+
+global-utilities@e0000 {
+		compatible = "fsl,bsc9132-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};

diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
new file mode 100644
index 0000000..301a9db
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi

@@ -0,0 +1,66 @@
+/*
+ * BSC9132 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e500v2@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		cpu1: PowerPC,e500v2@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};

diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index e2987a3..5290df8 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi

@@ -246,7 +246,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -262,6 +262,7 @@
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;
+		fsl,portid-mapping = <0x0f000000>;
 
 		pamu0: pamu@0 {
 			reg = <0 0x1000>;

diff --git a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
index 22f3b14..b1ea147 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi

@@ -83,6 +83,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
 			L2_0: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -92,6 +93,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -101,6 +103,7 @@
 			reg = <2>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -110,6 +113,7 @@
 			reg = <3>;
 			clocks = <&mux3>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
 			L2_3: l2-cache {
 				next-level-cache = <&cpc>;
 			};

diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
index 7af6d45..cd63cb1 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi

@@ -273,7 +273,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -289,6 +289,7 @@
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;
+		fsl,portid-mapping = <0x0f000000>;
 
 		pamu0: pamu@0 {
 			reg = <0 0x1000>;

diff --git a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
index 468e8be..dc5f4b3 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi

@@ -84,6 +84,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
 			L2_0: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -93,6 +94,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -102,6 +104,7 @@
 			reg = <2>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -111,6 +114,7 @@
 			reg = <3>;
 			clocks = <&mux3>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
 			L2_3: l2-cache {
 				next-level-cache = <&cpc>;
 			};

diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
index 2415e1f..12947cc 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi

@@ -281,7 +281,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -297,6 +297,7 @@
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;
+		fsl,portid-mapping = <0x00f80000>;
 
 		pamu0: pamu@0 {
 			reg = <0 0x1000>;

diff --git a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
index 0040b5a..38bde09 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi

@@ -83,6 +83,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
 			L2_0: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -92,6 +93,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -101,6 +103,7 @@
 			reg = <2>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -110,6 +113,7 @@
 			reg = <3>;
 			clocks = <&mux3>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
 			L2_3: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -119,6 +123,7 @@
 			reg = <4>;
 			clocks = <&mux4>;
 			next-level-cache = <&L2_4>;
+			fsl,portid-mapping = <0x08000000>;
 			L2_4: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -128,6 +133,7 @@
 			reg = <5>;
 			clocks = <&mux5>;
 			next-level-cache = <&L2_5>;
+			fsl,portid-mapping = <0x04000000>;
 			L2_5: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -137,6 +143,7 @@
 			reg = <6>;
 			clocks = <&mux6>;
 			next-level-cache = <&L2_6>;
+			fsl,portid-mapping = <0x02000000>;
 			L2_6: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -146,6 +153,7 @@
 			reg = <7>;
 			clocks = <&mux7>;
 			next-level-cache = <&L2_7>;
+			fsl,portid-mapping = <0x01000000>;
 			L2_7: l2-cache {
 				next-level-cache = <&cpc>;
 			};

diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 2985de4..4c4a2b0 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi

@@ -278,7 +278,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -294,6 +294,7 @@
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;
+		fsl,portid-mapping = <0x3c000000>;
 
 		pamu0: pamu@0 {
 			reg = <0 0x1000>;

diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
index fe1a2e6..1cc61e1 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi

@@ -90,6 +90,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
 			L2_0: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -99,6 +100,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};

diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
index 546a899..67296fd 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi

@@ -233,7 +233,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -248,6 +248,7 @@
 		#size-cells = <1>;
 		interrupts = <24 2 0 0
 			      16 2 1 30>;
+		fsl,portid-mapping = <0x0f800000>;
 
 		pamu0: pamu@0 {
 			reg = <0 0x1000>;

diff --git a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
index 3674686..b048a2b 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi

@@ -83,6 +83,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
 			L2_0: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -92,6 +93,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -101,6 +103,7 @@
 			reg = <2>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -110,6 +113,7 @@
 			reg = <3>;
 			clocks = <&mux3>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
 			L2_3: l2-cache {
 				next-level-cache = <&cpc>;
 			};

diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
new file mode 100644
index 0000000..12e597e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi

@@ -0,0 +1,430 @@
+/*
+ * T1040 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+&pci0 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+&pci1 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+&pci2 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&pci3 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t1040-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t1040-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,t1040-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t1040-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t1040-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t1040-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t1040-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t1040-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <16>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v5.0",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t1040-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 27>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x1000>;
+		ranges = <0 0x20000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <128 1>;
+			fsl,secondary-cache-geometry = <16 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t1040-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0";
+		ranges = <0x0 0xe1000 0x1000>;
+		reg = <0xe1000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		sysclk: sysclk {
+			#clock-cells = <0>;
+			compatible = "fsl,qoriq-sysclk-2.0";
+			clock-output-names = "sysclk", "fixed-clock";
+		};
+
+
+		pll0: pll0@800 {
+			#clock-cells = <1>;
+			reg = <0x800 4>;
+			compatible = "fsl,qoriq-core-pll-2.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll0", "pll0-div2", "pll0-div4";
+		};
+
+		pll1: pll1@820 {
+			#clock-cells = <1>;
+			reg = <0x820 4>;
+			compatible = "fsl,qoriq-core-pll-2.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll1", "pll1-div2", "pll1-div4";
+		};
+
+		mux0: mux0@0 {
+			#clock-cells = <0>;
+			reg = <0x0 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0", "pll0-div2", "pll1-div4",
+				"pll1", "pll1-div2", "pll1-div4";
+			clock-output-names = "cmux0";
+		};
+
+		mux1: mux1@20 {
+			#clock-cells = <0>;
+			reg = <0x20 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0", "pll0-div2", "pll1-div4",
+				"pll1", "pll1-div2", "pll1-div4";
+			clock-output-names = "cmux1";
+		};
+
+		mux2: mux2@40 {
+			#clock-cells = <0>;
+			reg = <0x40 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0", "pll0-div2", "pll1-div4",
+				"pll1", "pll1-div2", "pll1-div4";
+			clock-output-names = "cmux2";
+		};
+
+		mux3: mux3@60 {
+			#clock-cells = <0>;
+			reg = <0x60 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0_0", "pll0_1", "pll0_2",
+				"pll1_0", "pll1_1", "pll1_2";
+			clock-output-names = "cmux3";
+		};
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t1040-sfp";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t1040-serdes";
+		reg	   = <0xea000 0x4000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t1040-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+		usb0: usb@210000 {
+			compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+			fsl,iommu-parent = <&pamu0>;
+			fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+			phy_type = "utmi";
+			port0;
+		};
+/include/ "qoriq-usb2-dr-0.dtsi"
+		usb1: usb@211000 {
+			compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+			fsl,iommu-parent = <&pamu0>;
+			fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+			dr_mode = "host";
+			phy_type = "utmi";
+		};
+
+	display@180000 {
+		compatible = "fsl,t1040-diu", "fsl,diu";
+		reg = <0x180000 1000>;
+		interrupts = <74 2 0 0>;
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+/include/ "qoriq-sec5.0-0.dtsi"
+};

diff --git a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
new file mode 100644
index 0000000..319b74f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi

@@ -0,0 +1,37 @@
+/*
+ * T1042 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t1040si-post.dtsi"
+
+/* Place holder for ethernet related device tree nodes */

diff --git a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
new file mode 100644
index 0000000..bbb7025
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi

@@ -0,0 +1,104 @@
+/*
+ * T1040/T1042 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		sdhc = &sdhc;
+
+		crypto = &crypto;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e5500@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e5500@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&mux1>;
+			next-level-cache = <&L2_2>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e5500@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&mux2>;
+			next-level-cache = <&L2_3>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e5500@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&mux3>;
+			next-level-cache = <&L2_4>;
+			L2_4: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};

diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index f99d74f..793669b 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi

@@ -343,7 +343,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -353,6 +353,7 @@
 	iommu@20000 {
 		compatible = "fsl,pamu-v1.0", "fsl,pamu";
 		reg = <0x20000 0x6000>;
+		fsl,portid-mapping = <0x8000>;
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;

diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
index 0b8ccc5..d2f157e 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi

@@ -69,72 +69,84 @@
 			reg = <0 1>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu1: PowerPC,e6500@2 {
 			device_type = "cpu";
 			reg = <2 3>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu2: PowerPC,e6500@4 {
 			device_type = "cpu";
 			reg = <4 5>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu3: PowerPC,e6500@6 {
 			device_type = "cpu";
 			reg = <6 7>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu4: PowerPC,e6500@8 {
 			device_type = "cpu";
 			reg = <8 9>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu5: PowerPC,e6500@10 {
 			device_type = "cpu";
 			reg = <10 11>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu6: PowerPC,e6500@12 {
 			device_type = "cpu";
 			reg = <12 13>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu7: PowerPC,e6500@14 {
 			device_type = "cpu";
 			reg = <14 15>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu8: PowerPC,e6500@16 {
 			device_type = "cpu";
 			reg = <16 17>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu9: PowerPC,e6500@18 {
 			device_type = "cpu";
 			reg = <18 19>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu10: PowerPC,e6500@20 {
 			device_type = "cpu";
 			reg = <20 21>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu11: PowerPC,e6500@22 {
 			device_type = "cpu";
 			reg = <22 23>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 	};
 };

diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts
new file mode 100644
index 0000000..89b4119
--- /dev/null
+++ b/arch/powerpc/boot/dts/kmcoge4.dts

@@ -0,0 +1,152 @@
+/*
+ * Keymile kmcoge4 Device Tree Source, based on the P2041RDB DTS
+ *
+ * (C) Copyright 2014
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "fsl/p2041si-pre.dtsi"
+
+/ {
+	model = "keymile,kmcoge4";
+	compatible = "keymile,kmcoge4", "keymile,kmp204x";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25fl256s1";
+				reg = <0>;
+				spi-max-frequency = <20000000>; /* input clock */
+			};
+
+			network_clock@1 {
+				compatible = "zarlink,zl30343";
+				reg = <1>;
+				spi-max-frequency = <8000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,m25p32";
+				reg = <2>;
+				spi-max-frequency = <15000000>;
+			};
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+
+		sata@220000 {
+			status = "disabled";
+		};
+
+		sata@221000 {
+			status = "disabled";
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		status = "disabled";
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xffa00000 0x00040000		/* LB 0 */
+			  1 0 0xf 0xfb000000 0x00010000		/* LB 1 */
+			  2 0 0xf 0xd0000000 0x10000000		/* LB 2 */
+			  3 0 0xf 0xe0000000 0x10000000>;	/* LB 3 */
+
+		nand@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0 0 0x40000>;
+		};
+
+		board-control@1,0 {
+			compatible = "keymile,qriox";
+			reg = <1 0 0x80>;
+		};
+
+		chassis-mgmt@3,0 {
+			compatible = "keymile,bfticu";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <3 0 0x100>;
+			interrupt-parent = <&mpic>;
+			interrupts = <6 1 0 0>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "fsl/p2041si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 651e4f5..57f86cd 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts

@@ -296,7 +296,7 @@
 		};
 
 		dma@2c000 {
-			compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+			compatible = "fsl,mpc8308-dma";
 			reg = <0x2c000 0x1800>;
 			interrupts = <3 0x8
 					94 0x8>;

diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts
index 9ce45f2..d0211f0 100644
--- a/arch/powerpc/boot/dts/mpc8308rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8308rdb.dts

@@ -265,7 +265,7 @@
 		};
 
 		dma@2c000 {
-			compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+			compatible = "fsl,mpc8308-dma";
 			reg = <0x2c000 0x1800>;
 			interrupts = <3 0x8
 					94 0x8>;

diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/oca4080.dts
new file mode 100644
index 0000000..3d4c751
--- /dev/null
+++ b/arch/powerpc/boot/dts/oca4080.dts

@@ -0,0 +1,118 @@
+/*
+ * OCA4080 Device Tree Source
+ *
+ * Copyright 2014 Prodrive Technologies B.V.
+ *
+ * Based on:
+ * P4080DS Device Tree Source
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p4080si-pre.dtsi"
+
+/ {
+	model = "fsl,OCA4080";
+	compatible = "fsl,OCA4080";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		i2c@118000 {
+			status = "disabled";
+		};
+
+		i2c@118100 {
+			status = "disabled";
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xef800000 0x800000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x00800000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe201000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe202000 {
+		status = "disabled";
+	};
+};
+
+/include/ "fsl/p4080si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/p1023rds.dts b/arch/powerpc/boot/dts/p1023rds.dts
deleted file mode 100644
index beb6cb1..0000000
--- a/arch/powerpc/boot/dts/p1023rds.dts
+++ /dev/null

@@ -1,219 +0,0 @@
-/*
- * P1023 RDS Device Tree Source
- *
- * Copyright 2010-2011 Freescale Semiconductor Inc.
- *
- * Author: Roy Zang <tie-fei.zang@freescale.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of Freescale Semiconductor nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/include/ "fsl/p1023si-pre.dtsi"
-
-/ {
-	model = "fsl,P1023";
-	compatible = "fsl,P1023RDS";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&mpic>;
-
-	memory {
-		device_type = "memory";
-	};
-
-	soc: soc@ff600000 {
-		ranges = <0x0 0x0 0xff600000 0x200000>;
-
-		i2c@3000 {
-			rtc@68 {
-				compatible = "dallas,ds1374";
-				reg = <0x68>;
-			};
-		};
-
-		spi@7000 {
-			fsl_dataflash@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				compatible = "atmel,at45db081d";
-				reg = <0>;
-				spi-max-frequency = <40000000>; /* input clock */
-				partition@u-boot {
-					/* 512KB for u-boot Bootloader Image */
-					label = "u-boot-spi";
-					reg = <0x00000000 0x00080000>;
-					read-only;
-				};
-				partition@dtb {
-					/* 512KB for DTB Image */
-					label = "dtb-spi";
-					reg = <0x00080000 0x00080000>;
-					read-only;
-				};
-			};
-		};
-
-		usb@22000 {
-			dr_mode = "host";
-			phy_type = "ulpi";
-		};
-	};
-
-	lbc: localbus@ff605000 {
-		reg = <0 0xff605000 0 0x1000>;
-
-		/* NOR Flash, BCSR */
-		ranges = <0x0 0x0 0x0 0xee000000 0x02000000
-			  0x1 0x0 0x0 0xe0000000 0x00008000>;
-
-		nor@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "cfi-flash";
-			reg = <0x0 0x0 0x02000000>;
-			bank-width = <2>;
-			device-width = <1>;
-			partition@0 {
-				label = "ramdisk";
-				reg = <0x00000000 0x01c00000>;
-			};
-			partition@1c00000 {
-				label = "kernel";
-				reg = <0x01c00000 0x002e0000>;
-			};
-			partiton@1ee0000 {
-				label = "dtb";
-				reg = <0x01ee0000 0x00020000>;
-			};
-			partition@1f00000 {
-				label = "firmware";
-				reg = <0x01f00000 0x00080000>;
-				read-only;
-			};
-			partition@1f80000 {
-				label = "u-boot";
-				reg = <0x01f80000 0x00080000>;
-				read-only;
-			};
-		};
-
-		fpga@1,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,p1023rds-fpga";
-			reg = <1 0 0x8000>;
-			ranges = <0 1 0 0x8000>;
-
-			bcsr@20 {
-				compatible = "fsl,p1023rds-bcsr";
-				reg = <0x20 0x20>;
-			};
-		};
-	};
-
-	pci0: pcie@ff60a000 {
-		reg = <0 0xff60a000 0 0x1000>;
-		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
-			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
-		pcie@0 {
-			/* IRQ[0:3] are pulled up on board, set to active-low */
-			interrupt-map-mask = <0xf800 0 0 7>;
-			interrupt-map = <
-				/* IDSEL 0x0 */
-				0000 0 0 1 &mpic 0 1 0 0
-				0000 0 0 2 &mpic 1 1 0 0
-				0000 0 0 3 &mpic 2 1 0 0
-				0000 0 0 4 &mpic 3 1 0 0
-				>;
-			ranges = <0x2000000 0x0 0xc0000000
-				  0x2000000 0x0 0xc0000000
-				  0x0 0x20000000
-
-				  0x1000000 0x0 0x0
-				  0x1000000 0x0 0x0
-				  0x0 0x100000>;
-		};
-	};
-
-	board_pci1: pci1: pcie@ff609000 {
-		reg = <0 0xff609000 0 0x1000>;
-		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
-			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		pcie@0 {
-			/*
-			 * IRQ[4:6] only for PCIe, set to active-high,
-			 * IRQ[7] is pulled up on board, set to active-low
-			 */
-			interrupt-map-mask = <0xf800 0 0 7>;
-			interrupt-map = <
-				/* IDSEL 0x0 */
-				0000 0 0 1 &mpic 4 2 0 0
-				0000 0 0 2 &mpic 5 2 0 0
-				0000 0 0 3 &mpic 6 2 0 0
-				0000 0 0 4 &mpic 7 1 0 0
-				>;
-			ranges = <0x2000000 0x0 0xa0000000
-				  0x2000000 0x0 0xa0000000
-				  0x0 0x20000000
-
-				  0x1000000 0x0 0x0
-				  0x1000000 0x0 0x0
-				  0x0 0x100000>;
-		};
-	};
-
-	pci2: pcie@ff60b000 {
-		reg = <0 0xff60b000 0 0x1000>;
-		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
-			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		pcie@0 {
-			/*
-			 * IRQ[8:10] are pulled up on board, set to active-low
-			 * IRQ[11] only for PCIe, set to active-high,
-			 */
-			interrupt-map-mask = <0xf800 0 0 7>;
-			interrupt-map = <
-				/* IDSEL 0x0 */
-				0000 0 0 1 &mpic 8 1 0 0
-				0000 0 0 2 &mpic 9 1 0 0
-				0000 0 0 3 &mpic 10 1 0 0
-				0000 0 0 4 &mpic 11 2 0 0
-				>;
-			ranges = <0x2000000 0x0 0x80000000
-				  0x2000000 0x0 0x80000000
-				  0x0 0x20000000
-
-				  0x1000000 0x0 0x0
-				  0x1000000 0x0 0x0
-				  0x0 0x100000>;
-		};
-	};
-};
-
-/include/ "fsl/p1023si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/t1040qds.dts b/arch/powerpc/boot/dts/t1040qds.dts
new file mode 100644
index 0000000..973c29c
--- /dev/null
+++ b/arch/powerpc/boot/dts/t1040qds.dts

@@ -0,0 +1,46 @@
+/*
+ * T1040QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xqds.dtsi"
+
+/ {
+	model = "fsl,T1040QDS";
+	compatible = "fsl,T1040QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+/include/ "fsl/t1040si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/t1042qds.dts b/arch/powerpc/boot/dts/t1042qds.dts
new file mode 100644
index 0000000..45bd037
--- /dev/null
+++ b/arch/powerpc/boot/dts/t1042qds.dts

@@ -0,0 +1,46 @@
+/*
+ * T1042QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xqds.dtsi"
+
+/ {
+	model = "fsl,T1042QDS";
+	compatible = "fsl,T1042QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+/include/ "fsl/t1042si-post.dtsi"

diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/t104xqds.dtsi
new file mode 100644
index 0000000..234f4b5
--- /dev/null
+++ b/arch/powerpc/boot/dts/t104xqds.dtsi

@@ -0,0 +1,166 @@
+/*
+ * T104xQDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T1040QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q128a11";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			pca9547@77 {
+				compatible = "philips,pca9547";
+				reg = <0x77>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};

diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts
index ee24ab3..bc12127a 100644
--- a/arch/powerpc/boot/dts/t4240emu.dts
+++ b/arch/powerpc/boot/dts/t4240emu.dts

@@ -60,63 +60,75 @@
 			device_type = "cpu";
 			reg = <0 1>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu1: PowerPC,e6500@2 {
 			device_type = "cpu";
 			reg = <2 3>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu2: PowerPC,e6500@4 {
 			device_type = "cpu";
 			reg = <4 5>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 		cpu3: PowerPC,e6500@6 {
 			device_type = "cpu";
 			reg = <6 7>;
 			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
 		};
 
 		cpu4: PowerPC,e6500@8 {
 			device_type = "cpu";
 			reg = <8 9>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu5: PowerPC,e6500@10 {
 			device_type = "cpu";
 			reg = <10 11>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu6: PowerPC,e6500@12 {
 			device_type = "cpu";
 			reg = <12 13>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 		cpu7: PowerPC,e6500@14 {
 			device_type = "cpu";
 			reg = <14 15>;
 			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
 		};
 
 		cpu8: PowerPC,e6500@16 {
 			device_type = "cpu";
 			reg = <16 17>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu9: PowerPC,e6500@18 {
 			device_type = "cpu";
 			reg = <18 19>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu10: PowerPC,e6500@20 {
 			device_type = "cpu";
 			reg = <20 21>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 		cpu11: PowerPC,e6500@22 {
 			device_type = "cpu";
 			reg = <22 23>;
 			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
 		};
 	};
 };
@@ -213,7 +225,7 @@
 	};
 
 	corenet-cf@18000 {
-		compatible = "fsl,corenet-cf";
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
 		reg = <0x18000 0x1000>;
 		interrupts = <16 2 1 31>;
 		fsl,ccf-num-csdids = <32>;
@@ -223,6 +235,7 @@
 	iommu@20000 {
 		compatible = "fsl,pamu-v1.0", "fsl,pamu";
 		reg = <0x20000 0x6000>;
+		fsl,portid-mapping = <0x8000>;
 		interrupts = <
 			24 2 0 0
 			16 2 1 30>;

diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 1567a0c..316552d 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c

@@ -26,7 +26,11 @@
 	      elf64->e_ident[EI_MAG2]  == ELFMAG2	&&
 	      elf64->e_ident[EI_MAG3]  == ELFMAG3	&&
 	      elf64->e_ident[EI_CLASS] == ELFCLASS64	&&
+#ifdef __LITTLE_ENDIAN__
+	      elf64->e_ident[EI_DATA]  == ELFDATA2LSB	&&
+#else
 	      elf64->e_ident[EI_DATA]  == ELFDATA2MSB	&&
+#endif
 	      (elf64->e_type            == ET_EXEC ||
 	       elf64->e_type            == ET_DYN)	&&
 	      elf64->e_machine         == EM_PPC64))

diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 62e2f43..7ca910c 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c

@@ -40,8 +40,8 @@
 #ifdef DEBUG
 		printf("    trying: 0x%08lx\n\r", claim_base);
 #endif
-		addr = (unsigned long)of_claim(claim_base, size, 0);
-		if ((void *)addr != (void *)-1)
+		addr = (unsigned long) of_claim(claim_base, size, 0);
+		if (addr != PROM_ERROR)
 			break;
 	}
 	if (addr == 0)

diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index e4c68f7..c8c1750 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h

@@ -1,12 +1,15 @@
 #ifndef _PPC_BOOT_OF_H_
 #define _PPC_BOOT_OF_H_
 
+#include "swab.h"
+
 typedef void *phandle;
-typedef void *ihandle;
+typedef u32 ihandle;
 
 void of_init(void *promptr);
 int of_call_prom(const char *service, int nargs, int nret, ...);
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align);
+unsigned int of_claim(unsigned long virt, unsigned long size,
+	unsigned long align);
 void *of_vmlinux_alloc(unsigned long size);
 void of_exit(void);
 void *of_finddevice(const char *name);
@@ -18,4 +21,16 @@
 /* Console functions */
 void of_console_init(void);
 
+typedef u32			__be32;
+
+#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be32(x) swab32(x)
+#define be32_to_cpu(x) swab32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define PROM_ERROR (-1u)
+
 #endif /* _PPC_BOOT_OF_H_ */

diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c
index ce0e024..8b75470 100644
--- a/arch/powerpc/boot/ofconsole.c
+++ b/arch/powerpc/boot/ofconsole.c

@@ -18,7 +18,7 @@
 
 #include "of.h"
 
-static void *of_stdout_handle;
+static unsigned int of_stdout_handle;
 
 static int of_console_open(void)
 {
@@ -27,8 +27,10 @@
 	if (((devp = of_finddevice("/chosen")) != NULL)
 	    && (of_getprop(devp, "stdout", &of_stdout_handle,
 			   sizeof(of_stdout_handle))
-		== sizeof(of_stdout_handle)))
+		== sizeof(of_stdout_handle))) {
+		of_stdout_handle = be32_to_cpu(of_stdout_handle);
 		return 0;
+	}
 
 	return -1;
 }

diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index b0ec9cf..46c98a4 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c

@@ -16,74 +16,83 @@
 
 #include "of.h"
 
+typedef u32 prom_arg_t;
+
+/* The following structure is used to communicate with open firmware.
+ * All arguments in and out are in big endian format. */
+struct prom_args {
+	__be32 service;	/* Address of service name string. */
+	__be32 nargs;	/* Number of input arguments. */
+	__be32 nret;	/* Number of output arguments. */
+	__be32 args[10];	/* Input/output arguments. */
+};
+
+#ifdef __powerpc64__
+extern int prom(void *);
+#else
 static int (*prom) (void *);
+#endif
 
 void of_init(void *promptr)
 {
+#ifndef __powerpc64__
 	prom = (int (*)(void *))promptr;
+#endif
 }
 
+#define ADDR(x)		(u32)(unsigned long)(x)
+
 int of_call_prom(const char *service, int nargs, int nret, ...)
 {
 	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
+	struct prom_args args;
 	va_list list;
 
-	args.service = service;
-	args.nargs = nargs;
-	args.nret = nret;
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
 
 	va_start(list, nret);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
 		args.args[nargs+i] = 0;
 
 	if (prom(&args) < 0)
-		return -1;
+		return PROM_ERROR;
 
-	return (nret > 0)? args.args[nargs]: 0;
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
 }
 
 static int of_call_prom_ret(const char *service, int nargs, int nret,
-			    unsigned int *rets, ...)
+			    prom_arg_t *rets, ...)
 {
 	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
+	struct prom_args args;
 	va_list list;
 
-	args.service = service;
-	args.nargs = nargs;
-	args.nret = nret;
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
 
 	va_start(list, rets);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
 		args.args[nargs+i] = 0;
 
 	if (prom(&args) < 0)
-		return -1;
+		return PROM_ERROR;
 
-	if (rets != (void *) 0)
+	if (rets != NULL)
 		for (i = 1; i < nret; ++i)
-			rets[i-1] = args.args[nargs+i];
+			rets[i-1] = be32_to_cpu(args.args[nargs+i]);
 
-	return (nret > 0)? args.args[nargs]: 0;
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
 }
 
 /* returns true if s2 is a prefix of s1 */
@@ -103,7 +112,7 @@
  */
 static int need_map = -1;
 static ihandle chosen_mmu;
-static phandle memory;
+static ihandle memory;
 
 static int check_of_version(void)
 {
@@ -132,10 +141,10 @@
 		printf("no mmu\n");
 		return 0;
 	}
-	memory = (ihandle) of_call_prom("open", 1, 1, "/memory");
-	if (memory == (ihandle) -1) {
-		memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0");
-		if (memory == (ihandle) -1) {
+	memory = of_call_prom("open", 1, 1, "/memory");
+	if (memory == PROM_ERROR) {
+		memory = of_call_prom("open", 1, 1, "/memory@0");
+		if (memory == PROM_ERROR) {
 			printf("no memory node\n");
 			return 0;
 		}
@@ -144,40 +153,41 @@
 	return 1;
 }
 
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
+unsigned int of_claim(unsigned long virt, unsigned long size,
+		      unsigned long align)
 {
 	int ret;
-	unsigned int result;
+	prom_arg_t result;
 
 	if (need_map < 0)
 		need_map = check_of_version();
 	if (align || !need_map)
-		return (void *) of_call_prom("claim", 3, 1, virt, size, align);
+		return of_call_prom("claim", 3, 1, virt, size, align);
 
 	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
 			       align, size, virt);
 	if (ret != 0 || result == -1)
-		return (void *) -1;
+		return  -1;
 	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
 			       align, size, virt);
 	/* 0x12 == coherent + read/write */
 	ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
 			   0x12, size, virt, virt);
-	return (void *) virt;
+	return virt;
 }
 
 void *of_vmlinux_alloc(unsigned long size)
 {
 	unsigned long start = (unsigned long)_start, end = (unsigned long)_end;
-	void *addr;
+	unsigned long addr;
 	void *p;
 
 	/* With some older POWER4 firmware we need to claim the area the kernel
 	 * will reside in.  Newer firmwares don't need this so we just ignore
 	 * the return value.
 	 */
-	addr = of_claim(start, end - start, 0);
-	printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %p\r\n",
+	addr = (unsigned long) of_claim(start, end - start, 0);
+	printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %lx\r\n",
 	       start, end, end - start, addr);
 
 	p = malloc(size);
@@ -197,7 +207,7 @@
  */
 void *of_finddevice(const char *name)
 {
-	return (phandle) of_call_prom("finddevice", 1, 1, name);
+	return (void *) (unsigned long) of_call_prom("finddevice", 1, 1, name);
 }
 
 int of_getprop(const void *phandle, const char *name, void *buf,

diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index eb0e98b..35ea60c 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h

@@ -62,4 +62,16 @@
 #define SPRN_TBRL	268
 #define SPRN_TBRU	269
 
+#define FIXUP_ENDIAN						   \
+	tdi   0, 0, 0x48; /* Reverse endian of b . + 8		*/ \
+	b     $+36;	  /* Skip trampoline if endian is good	*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x1c004a39; /* addi r10,r10,28			*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+
 #endif /* _PPC64_PPC_ASM_H */

diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
new file mode 100644
index 0000000..6ef6e02
--- /dev/null
+++ b/arch/powerpc/boot/pseries-head.S

@@ -0,0 +1,8 @@
+#include "ppc_asm.h"
+
+	.text
+
+	.globl _zimage_start
+_zimage_start:
+	FIXUP_ENDIAN
+	b _zimage_start_lib

diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index 5b57800..a701261 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c

@@ -21,6 +21,18 @@
 	return sc - s;
 }
 
+#ifdef __powerpc64__
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+#else
+
 extern unsigned int __div64_32(unsigned long long *dividend,
 			       unsigned int divisor);
 
@@ -39,6 +51,8 @@
 	__rem;								\
  })
 
+#endif /* __powerpc64__ */
+
 static int skip_atoi(const char **s)
 {
 	int i, c;

diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
new file mode 100644
index 0000000..d0e1431
--- /dev/null
+++ b/arch/powerpc/boot/swab.h

@@ -0,0 +1,29 @@
+#ifndef _PPC_BOOT_SWAB_H_
+#define _PPC_BOOT_SWAB_H_
+
+static inline u16 swab16(u16 x)
+{
+	return  ((x & (u16)0x00ffU) << 8) |
+		((x & (u16)0xff00U) >> 8);
+}
+
+static inline u32 swab32(u32 x)
+{
+	return  ((x & (u32)0x000000ffUL) << 24) |
+		((x & (u32)0x0000ff00UL) <<  8) |
+		((x & (u32)0x00ff0000UL) >>  8) |
+		((x & (u32)0xff000000UL) >> 24);
+}
+
+static inline u64 swab64(u64 x)
+{
+	return  (u64)((x & (u64)0x00000000000000ffULL) << 56) |
+		(u64)((x & (u64)0x000000000000ff00ULL) << 40) |
+		(u64)((x & (u64)0x0000000000ff0000ULL) << 24) |
+		(u64)((x & (u64)0x00000000ff000000ULL) <<  8) |
+		(u64)((x & (u64)0x000000ff00000000ULL) >>  8) |
+		(u64)((x & (u64)0x0000ff0000000000ULL) >> 24) |
+		(u64)((x & (u64)0x00ff000000000000ULL) >> 40) |
+		(u64)((x & (u64)0xff00000000000000ULL) >> 56);
+}
+#endif /* _PPC_BOOT_SWAB_H_ */

diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c
new file mode 100644
index 0000000..b73174c
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-akebono.c

@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ *    Copyright 2007 David Gibson, IBM Corporation.
+ *    Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
+ *    Copyright © 2011 David Kleikamp IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "reg.h"
+#include "io.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "libfdt.h"
+
+BSS_STACK(4096);
+
+#define SPRN_PIR	0x11E	/* Processor Indentification Register */
+#define USERDATA_LEN	256	/* Length of userdata passed in by PIBS */
+#define MAX_RANKS	0x4
+#define DDR3_MR0CF	0x80010011U
+#define CCTL0_MCO2	0x8000080FU
+#define CCTL0_MCO3	0x80000810U
+#define CCTL0_MCO4	0x80000811U
+#define CCTL0_MCO5	0x80000812U
+#define CCTL0_MCO6	0x80000813U
+
+static unsigned long long ibm_akebono_memsize;
+static long long unsigned mac_addr;
+
+static unsigned long long ibm_akebono_detect_memsize(void)
+{
+	u32 reg;
+	unsigned i;
+	unsigned long long memsize = 0;
+
+	for (i = 0; i < MAX_RANKS; i++) {
+		reg = mfdcrx(DDR3_MR0CF + i);
+
+		if (!(reg & 1))
+			continue;
+
+		reg &= 0x0000f000;
+		reg >>= 12;
+		memsize += (0x800000ULL << reg);
+	}
+
+	return memsize;
+}
+
+static void ibm_akebono_fixups(void)
+{
+	void *emac;
+	u32 reg;
+
+	dt_fixup_memory(0x0ULL,  ibm_akebono_memsize);
+
+	/* Fixup the SD timeout frequency */
+	mtdcrx(CCTL0_MCO4, 0x1);
+
+	/* Disable SD high-speed mode (which seems to be broken) */
+	reg = mfdcrx(CCTL0_MCO2) & ~0x2;
+	mtdcrx(CCTL0_MCO2, reg);
+
+	/* Set the MAC address */
+	emac = finddevice("/plb/opb/ethernet");
+	if (emac > 0) {
+		if (mac_addr)
+			setprop(emac, "local-mac-address",
+				((u8 *) &mac_addr) + 2 , 6);
+	}
+}
+
+void platform_init(char *userdata)
+{
+	unsigned long end_of_ram, avail_ram;
+	u32 pir_reg;
+	int node, size;
+	const u32 *timebase;
+	int len, i, userdata_len;
+	char *end;
+
+	userdata[USERDATA_LEN - 1] = '\0';
+	userdata_len = strlen(userdata);
+	for (i = 0; i < userdata_len - 15; i++) {
+		if (strncmp(&userdata[i], "local-mac-addr=", 15) == 0) {
+			if (i > 0 && userdata[i - 1] != ' ') {
+				/* We've only found a substring ending
+				 * with local-mac-addr so this isn't
+				 * our mac address. */
+				continue;
+			}
+
+			mac_addr = strtoull(&userdata[i + 15], &end, 16);
+
+			/* Remove the "local-mac-addr=<...>" from the kernel
+			 * command line, including the tailing space if
+			 * present. */
+			if (*end == ' ')
+				end++;
+
+			len = ((int) end) - ((int) &userdata[i]);
+			memmove(&userdata[i], end,
+				userdata_len - (len + i) + 1);
+			break;
+		}
+	}
+
+	loader_info.cmdline = userdata;
+	loader_info.cmdline_len = 256;
+
+	ibm_akebono_memsize = ibm_akebono_detect_memsize();
+	if (ibm_akebono_memsize >> 32)
+		end_of_ram = ~0UL;
+	else
+		end_of_ram = ibm_akebono_memsize;
+	avail_ram = end_of_ram - (unsigned long)_end;
+
+	simple_alloc_init(_end, avail_ram, 128, 64);
+	platform_ops.fixups = ibm_akebono_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	pir_reg = mfspr(SPRN_PIR);
+
+	/* Make sure FDT blob is sane */
+	if (fdt_check_header(_dtb_start) != 0)
+		fatal("Invalid device tree blob\n");
+
+	node = fdt_node_offset_by_prop_value(_dtb_start, -1, "device_type",
+					     "cpu", sizeof("cpu"));
+	if (!node)
+		fatal("Cannot find cpu node\n");
+	timebase = fdt_getprop(_dtb_start, node, "timebase-frequency", &size);
+	if (timebase && (size == 4))
+		timebase_period_ns = 1000000000 / *timebase;
+
+	fdt_set_boot_cpuid_phys(_dtb_start, pir_reg);
+	fdt_init(_dtb_start);
+
+	serial_console_init();
+}

diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index 6636b1d..243b849 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S

@@ -45,7 +45,7 @@
 	mfspr	r4,SPRN_PVR
 	srwi	r4,r4,16
 	cmpwi	0,r4,1		/* 601 ? */
-	bne	.udelay_not_601
+	bne	.Ludelay_not_601
 00:	li	r0,86	/* Instructions / microsecond? */
 	mtctr	r0
 10:	addi	r0,r0,0 /* NOP */
@@ -54,7 +54,7 @@
 	bne	00b
 	blr
 
-.udelay_not_601:
+.Ludelay_not_601:
 	mulli	r4,r3,1000	/* nanoseconds */
 	/*  Change r4 to be the number of ticks using:
 	 *	(nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index d27a255..ae0f88e 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper

@@ -40,6 +40,7 @@
 binary=
 gzip=.gz
 pie=
+format=
 
 # cross-compilation prefix
 CROSS=
@@ -136,6 +137,14 @@
     kernel=vmlinux
 fi
 
+elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+case "$elfformat" in
+    elf64-powerpcle)	format=elf64lppc	;;
+    elf64-powerpc)	format=elf32ppc	;;
+    elf32-powerpc)	format=elf32ppc	;;
+esac
+
+
 platformo=$object/"$platform".o
 lds=$object/zImage.lds
 ext=strip
@@ -152,8 +161,12 @@
     make_space=n
     ;;
 pseries)
-    platformo="$object/of.o $object/epapr.o"
+    platformo="$object/pseries-head.o $object/of.o $object/epapr.o"
     link_address='0x4000000'
+    if [ "$format" != "elf32ppc" ]; then
+	link_address=
+	pie=-pie
+    fi
     make_space=n
     ;;
 maple)
@@ -257,6 +270,9 @@
 treeboot-currituck)
     link_address='0x1000000'
     ;;
+treeboot-akebono)
+    link_address='0x1000000'
+    ;;
 treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
@@ -379,7 +395,7 @@
     if [ -n "$link_address" ] ; then
         text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
+    ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi

diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 2bd8731..861e721 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S

@@ -1,4 +1,10 @@
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+OUTPUT_ARCH(powerpc:common64)
+#else
 OUTPUT_ARCH(powerpc:common)
+#endif
 ENTRY(_zimage_start)
 EXTERN(_zimage_start)
 SECTIONS
@@ -16,7 +22,9 @@
     *(.rodata*)
     *(.data*)
     *(.sdata*)
+#ifndef CONFIG_PPC64_BOOT_WRAPPER
     *(.got2)
+#endif
   }
   .dynsym : { *(.dynsym) }
   .dynstr : { *(.dynstr) }
@@ -27,7 +35,13 @@
   }
   .hash : { *(.hash) }
   .interp : { *(.interp) }
-  .rela.dyn : { *(.rela*) }
+  .rela.dyn :
+  {
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+    __rela_dyn_start = .;
+#endif
+    *(.rela*)
+  }
 
   . = ALIGN(8);
   .kernel:dtb :
@@ -53,6 +67,15 @@
     _initrd_end =  .;
   }
 
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+  .got :
+  {
+    __toc_start = .;
+    *(.got)
+    *(.toc)
+  }
+#endif
+
   . = ALIGN(4096);
   .bss       :
   {

diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
new file mode 100644
index 0000000..7e2530c
--- /dev/null
+++ b/arch/powerpc/configs/44x/akebono_defconfig

@@ -0,0 +1,148 @@
+CONFIG_44x=y
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_XZ=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_SLUB_CPU_PARTIAL is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_POWERNV_MSI is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_AKEBONO=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_100=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_COMPACTION is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+# CONFIG_SUSPEND is not set
+CONFIG_PCI_MSI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SATA_PMP is not set
+# CONFIG_ATA_SFF is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+CONFIG_IBM_EMAC=y
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_DEFAULT_PERSIST is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_HCD_PCI is not set
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="n"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x00010000
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x33f
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1_PPC=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set

diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
new file mode 100644
index 0000000..e9a81e5
--- /dev/null
+++ b/arch/powerpc/configs/85xx/kmp204x_defconfig

@@ -0,0 +1,225 @@
+CONFIG_PPC_85xx=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+CONFIG_CORENET_GENERIC=y
+CONFIG_MPIC_MSGR=y
+CONFIG_HIGHMEM=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_KEXEC=y
+CONFIG_FORCE_MAX_ZONEORDER=13
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCI_MSI=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_LOWMEM_SIZE_BOOL=y
+CONFIG_LOWMEM_SIZE=0x20000000
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
+CONFIG_TIPC=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_HFSC=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_RED=y
+CONFIG_NET_SCH_SFQ=y
+CONFIG_NET_SCH_TEQL=y
+CONFIG_NET_SCH_TBF=y
+CONFIG_NET_SCH_GRED=y
+CONFIG_NET_CLS_BASIC=y
+CONFIG_NET_CLS_TCINDEX=y
+CONFIG_NET_CLS_U32=y
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/mdev"
+CONFIG_DEVTMPFS=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_PHRAM=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_ECC_BCH=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=2048
+CONFIG_EEPROM_AT24=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+CONFIG_FSL_PQ_MDIO=y
+CONFIG_FSL_XGMAC_MDIO=y
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
+CONFIG_FIXED_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_SPI_FSL_ESPI=y
+CONFIG_SPI_SPIDEV=m
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
+CONFIG_EDAC_MPC85XX=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS3232=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_UIO=y
+CONFIG_STAGING=y
+# CONFIG_NET_VENDOR_SILICOM is not set
+CONFIG_CLK_PPC_CORENET=y
+CONFIG_EXT2_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_CRAMFS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=m
+CONFIG_CRC_ITU_T=m
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_RCU_TRACE=y
+CONFIG_UPROBE_EVENT=y
+CONFIG_CRYPTO_NULL=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y

diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
deleted file mode 100644
index 4f35fc4..0000000
--- a/arch/powerpc/configs/chroma_defconfig
+++ /dev/null

@@ -1,307 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
-CONFIG_SMP=y
-CONFIG_NR_CPUS=256
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=19
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
-CONFIG_NAMESPACES=y
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_INITRAMFS_COMPRESSION_GZIP=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_SCOM_DEBUGFS=y
-CONFIG_PPC_A2_DD2=y
-CONFIG_KVM_GUEST=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_100=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=y
-CONFIG_NUMA=y
-# CONFIG_MIGRATION is not set
-CONFIG_PPC_64K_PAGES=y
-CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
-# CONFIG_SECCOMP is not set
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_MSI=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_IPV6=y
-CONFIG_IPV6_PRIVACY=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=y
-CONFIG_INET6_ESP=y
-CONFIG_INET6_IPCOMP=y
-CONFIG_IPV6_MIP6=y
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y
-CONFIG_IPV6_TUNNEL=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NET_TCPPROBE=y
-# CONFIG_WIRELESS is not set
-CONFIG_NET_9P=y
-CONFIG_NET_9P_DEBUG=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_LE_BYTE_SWAP=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_CDROM_PKTCDVD=y
-CONFIG_MISC_DEVICES=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SRP_ATTRS=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_SIL24=y
-CONFIG_SATA_MV=y
-CONFIG_SATA_SIL=y
-CONFIG_PATA_CMD64X=y
-CONFIG_PATA_MARVELL=y
-CONFIG_PATA_SIL680=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_SNAPSHOT=y
-CONFIG_DM_MIRROR=y
-CONFIG_DM_ZERO=y
-CONFIG_DM_UEVENT=y
-CONFIG_NETDEVICES=y
-CONFIG_TUN=y
-CONFIG_E1000E=y
-CONFIG_TIGON3=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-# CONFIG_HWMON is not set
-# CONFIG_VGA_ARB is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1511=y
-CONFIG_RTC_DRV_DS1553=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=y
-# CONFIG_DNOTIFY is not set
-CONFIG_FUSE_FS=y
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_CONFIGFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_V4_1=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=y
-CONFIG_CIFS_WEAK_PW_HASH=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
-CONFIG_PRINTK_TIME=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_PPC_EMULATED_STATS=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_VIRTUALIZATION=y

diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index bbd794d..c19ff05 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig

@@ -72,6 +72,7 @@
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_M25P80=y

diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index 19f0fbe..55765c8 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig

@@ -32,7 +32,6 @@
 CONFIG_P1022_DS=y
 CONFIG_P1022_RDK=y
 CONFIG_P1023_RDB=y
-CONFIG_P1023_RDS=y
 CONFIG_SOCRATES=y
 CONFIG_KSI8560=y
 CONFIG_XES_MPC85xx=y

diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 062312e..5c6ecdc 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig

@@ -35,7 +35,6 @@
 CONFIG_P1022_DS=y
 CONFIG_P1022_RDK=y
 CONFIG_P1023_RDB=y
-CONFIG_P1023_RDS=y
 CONFIG_SOCRATES=y
 CONFIG_KSI8560=y
 CONFIG_XES_MPC85xx=y

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 97e02f9..37991e1 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h

@@ -42,15 +42,47 @@
 } while (0)
 #endif
 
+#define OP_RT_RA_MASK	0xffff0000UL
+#define LIS_R2		0x3c020000UL
+#define ADDIS_R2_R12	0x3c4c0000UL
+#define ADDI_R2_R2	0x38420000UL
+
 static inline unsigned long ppc_function_entry(void *func)
 {
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	u32 *insn = func;
+
 	/*
-	 * On PPC64 the function pointer actually points to the function's
-	 * descriptor. The first entry in the descriptor is the address
-	 * of the function text.
+	 * A PPC64 ABIv2 function may have a local and a global entry
+	 * point. We need to use the local entry point when patching
+	 * functions, so identify and step over the global entry point
+	 * sequence.
+	 *
+	 * The global entry point sequence is always of the form:
+	 *
+	 * addis r2,r12,XXXX
+	 * addi  r2,r2,XXXX
+	 *
+	 * A linker optimisation may convert the addis to lis:
+	 *
+	 * lis   r2,XXXX
+	 * addi  r2,r2,XXXX
+	 */
+	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+	    ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+		return (unsigned long)(insn + 2);
+	else
+		return (unsigned long)func;
+#else
+	/*
+	 * On PPC64 ABIv1 the function pointer actually points to the
+	 * function's descriptor. The first entry in the descriptor is the
+	 * address of the function text.
 	 */
 	return ((func_descr_t *)func)->entry;
+#endif
 #else
 	return (unsigned long)func;
 #endif

diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h
index b6f5a33..4001492 100644
--- a/arch/powerpc/include/asm/context_tracking.h
+++ b/arch/powerpc/include/asm/context_tracking.h

@@ -2,9 +2,9 @@
 #define _ASM_POWERPC_CONTEXT_TRACKING_H
 
 #ifdef CONFIG_CONTEXT_TRACKING
-#define SCHEDULE_USER bl	.schedule_user
+#define SCHEDULE_USER bl	schedule_user
 #else
-#define SCHEDULE_USER bl	.schedule
+#define SCHEDULE_USER bl	schedule
 #endif
 
 #endif

diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index f42e9ba..7c8608b 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h

@@ -489,7 +489,6 @@
 #define FCC_GFMR_TCI		((uint)0x20000000)
 #define FCC_GFMR_TRX		((uint)0x10000000)
 #define FCC_GFMR_TTX		((uint)0x08000000)
-#define FCC_GFMR_TTX		((uint)0x08000000)
 #define FCC_GFMR_CDP		((uint)0x04000000)
 #define FCC_GFMR_CTSP		((uint)0x02000000)
 #define FCC_GFMR_CDS		((uint)0x01000000)

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index ac3eedb..2bf8e93 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h

@@ -18,10 +18,12 @@
 
 #ifdef CONFIG_SMP
 extern int threads_per_core;
+extern int threads_per_subcore;
 extern int threads_shift;
 extern cpumask_t threads_core_mask;
 #else
 #define threads_per_core	1
+#define threads_per_subcore	1
 #define threads_shift		0
 #define threads_core_mask	(CPU_MASK_CPU0)
 #endif
@@ -74,6 +76,11 @@
 	return cpu & (threads_per_core - 1);
 }
 
+static inline int cpu_thread_in_subcore(int cpu)
+{
+	return cpu & (threads_per_subcore - 1);
+}
+
 static inline int cpu_first_thread_sibling(int cpu)
 {
 	return cpu & ~(threads_per_core - 1);

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index d251630..a954e49 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h

@@ -46,7 +46,8 @@
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
-int set_breakpoint(struct arch_hw_breakpoint *brk);
+void set_breakpoint(struct arch_hw_breakpoint *brk);
+void __set_breakpoint(struct arch_hw_breakpoint *brk);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
 			 unsigned long error_code, int signal_code, int brkpt);

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d4dd41f..fab7743 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h

@@ -32,6 +32,22 @@
 
 #ifdef CONFIG_EEH
 
+/* EEH subsystem flags */
+#define EEH_ENABLED		0x1	/* EEH enabled		*/
+#define EEH_FORCE_DISABLED	0x2	/* EEH disabled		*/
+#define EEH_PROBE_MODE_DEV	0x4	/* From PCI device	*/
+#define EEH_PROBE_MODE_DEVTREE	0x8	/* From device tree	*/
+
+/*
+ * Delay for PE reset, all in ms
+ *
+ * PCI specification has reset hold time of 100 milliseconds.
+ * We have 250 milliseconds here. The PCI bus settlement time
+ * is specified as 1.5 seconds and we have 1.8 seconds.
+ */
+#define EEH_PE_RST_HOLD_TIME		250
+#define EEH_PE_RST_SETTLE_TIME		1800
+
 /*
  * The struct is used to trace PE related EEH functionality.
  * In theory, there will have one instance of the struct to
@@ -53,7 +69,7 @@
 
 #define EEH_PE_ISOLATED		(1 << 0)	/* Isolated PE		*/
 #define EEH_PE_RECOVERING	(1 << 1)	/* Recovering PE	*/
-#define EEH_PE_PHB_DEAD		(1 << 2)	/* Dead PHB		*/
+#define EEH_PE_RESET		(1 << 2)	/* PE reset in progress	*/
 
 #define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
 
@@ -92,6 +108,7 @@
 
 #define EEH_DEV_NO_HANDLER	(1 << 8)	/* No error handler	*/
 #define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
+#define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
@@ -99,7 +116,9 @@
 	int config_addr;		/* Config address		*/
 	int pe_config_addr;		/* PE config address		*/
 	u32 config_space[16];		/* Saved PCI config space	*/
-	u8 pcie_cap;			/* Saved PCIe capability	*/
+	int pcix_cap;			/* Saved PCIx capability	*/
+	int pcie_cap;			/* Saved PCIe capability	*/
+	int aer_cap;			/* Saved AER capability		*/
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
@@ -171,37 +190,40 @@
 	int (*restore_config)(struct device_node *dn);
 };
 
+extern int eeh_subsystem_flags;
 extern struct eeh_ops *eeh_ops;
-extern bool eeh_subsystem_enabled;
 extern raw_spinlock_t confirm_error_lock;
-extern int eeh_probe_mode;
 
 static inline bool eeh_enabled(void)
 {
-	return eeh_subsystem_enabled;
+	if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) ||
+	    !(eeh_subsystem_flags & EEH_ENABLED))
+		return false;
+
+	return true;
 }
 
 static inline void eeh_set_enable(bool mode)
 {
-	eeh_subsystem_enabled = mode;
+	if (mode)
+		eeh_subsystem_flags |= EEH_ENABLED;
+	else
+		eeh_subsystem_flags &= ~EEH_ENABLED;
 }
 
-#define EEH_PROBE_MODE_DEV	(1<<0)	/* From PCI device	*/
-#define EEH_PROBE_MODE_DEVTREE	(1<<1)	/* From device tree	*/
-
 static inline void eeh_probe_mode_set(int flag)
 {
-	eeh_probe_mode = flag;
+	eeh_subsystem_flags |= flag;
 }
 
 static inline int eeh_probe_mode_devtree(void)
 {
-	return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE);
+	return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE);
 }
 
 static inline int eeh_probe_mode_dev(void)
 {
-	return (eeh_probe_mode == EEH_PROBE_MODE_DEV);
+	return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV);
 }
 
 static inline void eeh_serialize_lock(unsigned long *flags)
@@ -232,6 +254,7 @@
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
+const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 void *eeh_dev_init(struct device_node *dn, void *data);

diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 89d5670..1e551a2 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h

@@ -33,7 +33,7 @@
 
 int eeh_event_init(void);
 int eeh_send_failure_event(struct eeh_pe *pe);
-void eeh_remove_event(struct eeh_pe *pe);
+void eeh_remove_event(struct eeh_pe *pe, bool force);
 void eeh_handle_event(struct eeh_pe *pe);
 
 #endif /* __KERNEL__ */

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 935b5e7..888d8f3 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h

@@ -90,6 +90,8 @@
 do {								\
 	if (((ex).e_flags & 0x3) == 2)				\
 		set_thread_flag(TIF_ELF2ABI);			\
+	else							\
+		clear_thread_flag(TIF_ELF2ABI);			\
 	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)		\
 		set_thread_flag(TIF_32BIT);			\
 	else							\

diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a563d9af..a8b52b6 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h

@@ -174,10 +174,10 @@
 	mtlr	r16;
 #define TLB_MISS_STATS_D(name)						    \
 	addi	r9,r13,MMSTAT_DSTATS+name;				    \
-	bl	.tlb_stat_inc;
+	bl	tlb_stat_inc;
 #define TLB_MISS_STATS_I(name)						    \
 	addi	r9,r13,MMSTAT_ISTATS+name;				    \
-	bl	.tlb_stat_inc;
+	bl	tlb_stat_inc;
 #define TLB_MISS_STATS_X(name)						    \
 	ld	r8,PACA_EXTLB+EX_TLB_ESR(r13);				    \
 	cmpdi	cr2,r8,-1;						    \
@@ -185,7 +185,7 @@
 	addi	r9,r13,MMSTAT_DSTATS+name;				    \
 	b	62f;							    \
 61:	addi	r9,r13,MMSTAT_ISTATS+name;				    \
-62:	bl	.tlb_stat_inc;
+62:	bl	tlb_stat_inc;
 #define TLB_MISS_STATS_SAVE_INFO					    \
 	std	r14,EX_TLB_ESR(r12);	/* save ESR */
 #define TLB_MISS_STATS_SAVE_INFO_BOLTED					    \

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index aeaa56c..8f35cd7 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h

@@ -517,7 +517,7 @@
 #define DISABLE_INTS	RECONCILE_IRQ_STATE(r10,r11)
 
 #define ADD_NVGPRS				\
-	bl	.save_nvgprs
+	bl	save_nvgprs
 
 #define RUNLATCH_ON				\
 BEGIN_FTR_SECTION				\

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 169d039..e366187 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h

@@ -61,6 +61,7 @@
 #endif
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
@@ -72,6 +73,7 @@
 	 */
 	return !strcmp(sym + 4, name + 3);
 }
+#endif
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_FTRACE */

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index eb0f4ac..ac6432d 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h

@@ -79,7 +79,7 @@
 	brk.address = 0;
 	brk.type = 0;
 	brk.len = 0;
-	set_breakpoint(&brk);
+	__set_breakpoint(&brk);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 

diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index f51a558..e20eb95 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h

@@ -20,9 +20,9 @@
  */
 #define TRACE_WITH_FRAME_BUFFER(func)		\
 	mflr	r0;				\
-	stdu	r1, -32(r1);			\
+	stdu	r1, -STACK_FRAME_OVERHEAD(r1);	\
 	std	r0, 16(r1);			\
-	stdu	r1, -32(r1);			\
+	stdu	r1, -STACK_FRAME_OVERHEAD(r1);	\
 	bl func;				\
 	ld	r1, 0(r1);			\
 	ld	r1, 0(r1);
@@ -36,8 +36,8 @@
  * have to call a C function so call a wrapper that saves all the
  * C-clobbered registers.
  */
-#define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)
-#define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)
+#define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on)
+#define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
 
 /*
  * This is used by assembly code to soft-disable interrupts first and

diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 7b6feab..af15d4d 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h

@@ -30,6 +30,7 @@
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 #include <asm/probes.h>
+#include <asm/code-patching.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
 
@@ -56,9 +57,9 @@
 		if ((colon = strchr(name, ':')) != NULL) {		\
 			colon++;					\
 			if (*colon != '\0' && *colon != '.')		\
-				addr = *(kprobe_opcode_t **)addr;	\
+				addr = (kprobe_opcode_t *)ppc_function_entry(addr); \
 		} else if (name[0] != '.')				\
-			addr = *(kprobe_opcode_t **)addr;		\
+			addr = (kprobe_opcode_t *)ppc_function_entry(addr); \
 	} else {							\
 		char dot_name[KSYM_NAME_LEN];				\
 		dot_name[0] = '.';					\

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4a7cc45..9c89cdd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h

@@ -337,6 +337,10 @@
 	vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
 }
 
+extern void kvm_hv_vm_activated(void);
+extern void kvm_hv_vm_deactivated(void);
+extern bool kvm_hv_mode_active(void);
+
 #else
 static inline void __init kvm_cma_reserve(void)
 {}
@@ -356,6 +360,9 @@
 {
 	kvm_vcpu_kick(vcpu);
 }
+
+static inline bool kvm_hv_mode_active(void)		{ return false; }
+
 #endif
 
 #ifdef CONFIG_KVM_XICS

diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h
index b36f650..e3ad5c7 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h

@@ -2,6 +2,7 @@
 #define _ASM_POWERPC_LINKAGE_H
 
 #ifdef CONFIG_PPC64
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 #define cond_syscall(x) \
 	asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n"		\
 	     "\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n")
@@ -9,5 +10,6 @@
 	asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n"	\
 	     "\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
 #endif
+#endif
 
 #endif	/* _ASM_POWERPC_LINKAGE_H */

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 5b6c03f..f92b0b5 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h

@@ -98,6 +98,9 @@
 	void		(*iommu_save)(void);
 	void		(*iommu_restore)(void);
 #endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	unsigned long	(*memory_block_size)(void);
+#endif
 #endif /* CONFIG_PPC64 */
 
 	void		(*pci_dma_dev_setup)(struct pci_dev *dev);
@@ -113,6 +116,8 @@
 	/* Optional, may be NULL. */
 	void		(*show_cpuinfo)(struct seq_file *m);
 	void		(*show_percpuinfo)(struct seq_file *m, int i);
+	/* Returns the current operating frequency of "cpu" in Hz */
+	unsigned long  	(*get_proc_freq)(unsigned int cpu);
 
 	void		(*init_IRQ)(void);
 
@@ -241,6 +246,9 @@
 	/* Called during PCI resource reassignment */
 	resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
 
+	/* Reset the secondary bus of bridge */
+	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
+
 	/* Called to shutdown machine specific hardware not already controlled
 	 * by other drivers.
 	 */

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 901dac6..d0918e0 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h

@@ -223,10 +223,6 @@
 	unsigned int	id;
 	unsigned int	active;
 	unsigned long	vdso_base;
-#ifdef CONFIG_PPC_ICSWX
-	struct spinlock *cop_lockp;	/* guard cop related stuff */
-	unsigned long acop;		/* mask of enabled coprocessor types */
-#endif /* CONFIG_PPC_ICSWX */
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;   /* SLB page size encodings */
 	u64 high_slices_psize;  /* 4 bits per slice for now */

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 49fa55b..dcfcad1 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h

@@ -35,6 +35,7 @@
 #ifdef __powerpc64__
 	unsigned int stubs_section;	/* Index of stubs section in module */
 	unsigned int toc_section;	/* What section is the TOC? */
+	bool toc_fixed;			/* Have we fixed up .TOC.? */
 #ifdef CONFIG_DYNAMIC_FTRACE
 	unsigned long toc;
 	unsigned long tramp;
@@ -77,6 +78,9 @@
 #    endif	/* MODULE */
 #endif
 
+bool is_module_trampoline(u32 *insns);
+int module_trampoline_target(struct module *mod, u32 *trampoline,
+			     unsigned long *target);
 
 struct exception_table_entry;
 void sort_ex_table(struct exception_table_entry *start,

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 66ad7a7..4600188 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h

@@ -154,6 +154,7 @@
 #define OPAL_LPC_READ				67
 #define OPAL_LPC_WRITE				68
 #define OPAL_RETURN_CPU				69
+#define OPAL_REINIT_CPUS			70
 #define OPAL_ELOG_READ				71
 #define OPAL_ELOG_WRITE				72
 #define OPAL_ELOG_ACK				73
@@ -509,7 +510,7 @@
 struct OpalMemoryErrorData {
 	enum OpalMemErr_Version	version:8;	/* 0x00 */
 	enum OpalMemErrType	type:8;		/* 0x01 */
-	uint16_t		flags;		/* 0x02 */
+	__be16			flags;		/* 0x02 */
 	uint8_t			reserved_1[4];	/* 0x04 */
 
 	union {
@@ -517,15 +518,15 @@
 		struct {
 			enum OpalMemErr_ResilErrType resil_err_type:8;
 			uint8_t		reserved_1[7];
-			uint64_t	physical_address_start;
-			uint64_t	physical_address_end;
+			__be64		physical_address_start;
+			__be64		physical_address_end;
 		} resilience;
 		/* Dynamic memory deallocation error info */
 		struct {
 			enum OpalMemErr_DynErrType dyn_err_type:8;
 			uint8_t		reserved_1[7];
-			uint64_t	physical_address_start;
-			uint64_t	physical_address_end;
+			__be64		physical_address_start;
+			__be64		physical_address_end;
 		} dyn_dealloc;
 	} u;
 };
@@ -598,9 +599,9 @@
 };
 
 struct OpalIoPhbErrorCommon {
-	uint32_t version;
-	uint32_t ioType;
-	uint32_t len;
+	__be32 version;
+	__be32 ioType;
+	__be32 len;
 };
 
 struct OpalIoP7IOCPhbErrorData {
@@ -665,64 +666,69 @@
 struct OpalIoPhb3ErrorData {
 	struct OpalIoPhbErrorCommon common;
 
-	uint32_t brdgCtl;
+	__be32 brdgCtl;
 
 	/* PHB3 UTL regs */
-	uint32_t portStatusReg;
-	uint32_t rootCmplxStatus;
-	uint32_t busAgentStatus;
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
 
 	/* PHB3 cfg regs */
-	uint32_t deviceStatus;
-	uint32_t slotStatus;
-	uint32_t linkStatus;
-	uint32_t devCmdStatus;
-	uint32_t devSecStatus;
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
 
 	/* cfg AER regs */
-	uint32_t rootErrorStatus;
-	uint32_t uncorrErrorStatus;
-	uint32_t corrErrorStatus;
-	uint32_t tlpHdr1;
-	uint32_t tlpHdr2;
-	uint32_t tlpHdr3;
-	uint32_t tlpHdr4;
-	uint32_t sourceId;
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
 
-	uint32_t rsv3;
+	__be32 rsv3;
 
 	/* Record data about the call to allocate a buffer */
-	uint64_t errorClass;
-	uint64_t correlator;
+	__be64 errorClass;
+	__be64 correlator;
 
-	uint64_t nFir;			/* 000 */
-	uint64_t nFirMask;		/* 003 */
-	uint64_t nFirWOF;		/* 008 */
+	__be64 nFir;			/* 000 */
+	__be64 nFirMask;		/* 003 */
+	__be64 nFirWOF;		/* 008 */
 
 	/* PHB3 MMIO Error Regs */
-	uint64_t phbPlssr;		/* 120 */
-	uint64_t phbCsr;		/* 110 */
-	uint64_t lemFir;		/* C00 */
-	uint64_t lemErrorMask;		/* C18 */
-	uint64_t lemWOF;		/* C40 */
-	uint64_t phbErrorStatus;	/* C80 */
-	uint64_t phbFirstErrorStatus;	/* C88 */
-	uint64_t phbErrorLog0;		/* CC0 */
-	uint64_t phbErrorLog1;		/* CC8 */
-	uint64_t mmioErrorStatus;	/* D00 */
-	uint64_t mmioFirstErrorStatus;	/* D08 */
-	uint64_t mmioErrorLog0;		/* D40 */
-	uint64_t mmioErrorLog1;		/* D48 */
-	uint64_t dma0ErrorStatus;	/* D80 */
-	uint64_t dma0FirstErrorStatus;	/* D88 */
-	uint64_t dma0ErrorLog0;		/* DC0 */
-	uint64_t dma0ErrorLog1;		/* DC8 */
-	uint64_t dma1ErrorStatus;	/* E00 */
-	uint64_t dma1FirstErrorStatus;	/* E08 */
-	uint64_t dma1ErrorLog0;		/* E40 */
-	uint64_t dma1ErrorLog1;		/* E48 */
-	uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS];
-	uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 phbPlssr;		/* 120 */
+	__be64 phbCsr;		/* 110 */
+	__be64 lemFir;		/* C00 */
+	__be64 lemErrorMask;		/* C18 */
+	__be64 lemWOF;		/* C40 */
+	__be64 phbErrorStatus;	/* C80 */
+	__be64 phbFirstErrorStatus;	/* C88 */
+	__be64 phbErrorLog0;		/* CC0 */
+	__be64 phbErrorLog1;		/* CC8 */
+	__be64 mmioErrorStatus;	/* D00 */
+	__be64 mmioFirstErrorStatus;	/* D08 */
+	__be64 mmioErrorLog0;		/* D40 */
+	__be64 mmioErrorLog1;		/* D48 */
+	__be64 dma0ErrorStatus;	/* D80 */
+	__be64 dma0FirstErrorStatus;	/* D88 */
+	__be64 dma0ErrorLog0;		/* DC0 */
+	__be64 dma0ErrorLog1;		/* DC8 */
+	__be64 dma1ErrorStatus;	/* E00 */
+	__be64 dma1FirstErrorStatus;	/* E08 */
+	__be64 dma1ErrorLog0;		/* E40 */
+	__be64 dma1ErrorLog1;		/* E48 */
+	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
+enum {
+	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
+	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
 };
 
 typedef struct oppanel_line {
@@ -845,10 +851,11 @@
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
 int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
-int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
-			    uint16_t *pci_error_type, uint16_t *severity);
+int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
+			    __be16 *pci_error_type, __be16 *severity);
 int64_t opal_pci_poll(uint64_t phb_id);
 int64_t opal_return_cpu(void);
+int64_t opal_reinit_cpus(uint64_t flags);
 
 int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val);
 int64_t opal_xscom_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val);
@@ -916,6 +923,7 @@
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_init(void);
+extern void opal_flash_term_callback(void);
 extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
 extern void opal_sys_param_init(void);

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 8e956a0..bb0bd25 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h

@@ -92,7 +92,10 @@
 	struct slb_shadow *slb_shadow_ptr;
 	struct dtl_entry *dispatch_log;
 	struct dtl_entry *dispatch_log_end;
+#endif /* CONFIG_PPC_STD_MMU_64 */
+	u64 dscr_default;		/* per-CPU default DSCR */
 
+#ifdef CONFIG_PPC_STD_MMU_64
 	/*
 	 * Now, starting in cacheline 2, the exception save areas
 	 */

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index ed57fa7..db1e2b8 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h

@@ -58,6 +58,7 @@
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
 void eeh_pe_state_mark(struct eeh_pe *pe, int state);
 void eeh_pe_state_clear(struct eeh_pe *pe, int state);
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
 
 void eeh_sysfs_add_device(struct pci_dev *pdev);
 void eeh_sysfs_remove_device(struct pci_dev *pdev);

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cded7c1..9ea266e 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h

@@ -57,7 +57,7 @@
 	LDX_BE	r10,0,r10;		/* get log write index */	\
 	cmpd	cr1,r11,r10;						\
 	beq+	cr1,33f;						\
-	bl	.accumulate_stolen_time;				\
+	bl	accumulate_stolen_time;				\
 	ld	r12,_MSR(r1);						\
 	andi.	r10,r12,MSR_PR;		/* Restore cr0 (coming from user) */ \
 33:									\
@@ -189,9 +189,45 @@
 #define __STK_REG(i)   (112 + ((i)-14)*8)
 #define STK_REG(i)     __STK_REG(__REG_##i)
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STK_GOT		24
+#define __STK_PARAM(i)	(32 + ((i)-3)*8)
+#else
+#define STK_GOT		40
 #define __STK_PARAM(i)	(48 + ((i)-3)*8)
+#endif
 #define STK_PARAM(i)	__STK_PARAM(__REG_##i)
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define _GLOBAL(name) \
+	.section ".text"; \
+	.align 2 ; \
+	.type name,@function; \
+	.globl name; \
+name:
+
+#define _GLOBAL_TOC(name) \
+	.section ".text"; \
+	.align 2 ; \
+	.type name,@function; \
+	.globl name; \
+name: \
+0:	addis r2,r12,(.TOC.-0b)@ha; \
+	addi r2,r2,(.TOC.-0b)@l; \
+	.localentry name,.-name
+
+#define _KPROBE(name) \
+	.section ".kprobes.text","a"; \
+	.align 2 ; \
+	.type name,@function; \
+	.globl name; \
+name:
+
+#define DOTSYM(a)	a
+
+#else
+
 #define XGLUE(a,b) a##b
 #define GLUE(a,b) XGLUE(a,b)
 
@@ -209,19 +245,7 @@
 	.type GLUE(.,name),@function; \
 GLUE(.,name):
 
-#define _INIT_GLOBAL(name) \
-	__REF; \
-	.align 2 ; \
-	.globl name; \
-	.globl GLUE(.,name); \
-	.section ".opd","aw"; \
-name: \
-	.quad GLUE(.,name); \
-	.quad .TOC.@tocbase; \
-	.quad 0; \
-	.previous; \
-	.type GLUE(.,name),@function; \
-GLUE(.,name):
+#define _GLOBAL_TOC(name) _GLOBAL(name)
 
 #define _KPROBE(name) \
 	.section ".kprobes.text","a"; \
@@ -237,29 +261,9 @@
 	.type GLUE(.,name),@function; \
 GLUE(.,name):
 
-#define _STATIC(name) \
-	.section ".text"; \
-	.align 2 ; \
-	.section ".opd","aw"; \
-name: \
-	.quad GLUE(.,name); \
-	.quad .TOC.@tocbase; \
-	.quad 0; \
-	.previous; \
-	.type GLUE(.,name),@function; \
-GLUE(.,name):
+#define DOTSYM(a)	GLUE(.,a)
 
-#define _INIT_STATIC(name) \
-	__REF; \
-	.align 2 ; \
-	.section ".opd","aw"; \
-name: \
-	.quad GLUE(.,name); \
-	.quad .TOC.@tocbase; \
-	.quad 0; \
-	.previous; \
-	.type GLUE(.,name),@function; \
-GLUE(.,name):
+#endif
 
 #else /* 32-bit */
 

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index d660dc3..6d59072 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h

@@ -449,7 +449,7 @@
 enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
-extern void power7_nap(void);
+extern void power7_nap(int check_irq);
 extern void power7_sleep(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4852bcf..bffd89d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h

@@ -215,6 +215,7 @@
 #define SPRN_TEXASR	0x82	/* Transaction EXception & Summary */
 #define   TEXASR_FS	__MASK(63-36)	/* Transaction Failure Summary */
 #define SPRN_TEXASRU	0x83	/* ''	   ''	   ''	 Upper 32  */
+#define   TEXASR_FS     __MASK(63-36) /* TEXASR Failure Summary */
 #define SPRN_TFHAR	0x80	/* Transaction Failure Handler Addr */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
@@ -224,6 +225,7 @@
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DAWR	0xB4
+#define SPRN_RPR	0xBA	/* Relative Priority Register */
 #define SPRN_CIABR	0xBB
 #define   CIABR_PRIV		0x3
 #define   CIABR_PRIV_USER	1
@@ -272,8 +274,10 @@
 #define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_IC		0x350	/* Virtual Instruction Count */
 #define SPRN_VTB	0x351	/* Virtual Time Base */
+#define SPRN_LDBAR	0x352	/* LD Base Address Register */
 #define SPRN_PMICR	0x354   /* Power Management Idle Control Reg */
 #define SPRN_PMSR	0x355   /* Power Management Status Reg */
+#define SPRN_PMMAR	0x356	/* Power Management Memory Activity Register */
 #define SPRN_PMCR	0x374	/* Power Management Control Register */
 
 /* HFSCR and FSCR bit numbers are the same */
@@ -433,6 +437,12 @@
 #define HID0_BTCD	(1<<1)		/* Branch target cache disable */
 #define HID0_NOPDST	(1<<1)		/* No-op dst, dstt, etc. instr. */
 #define HID0_NOPTI	(1<<0)		/* No-op dcbt and dcbst instr. */
+/* POWER8 HID0 bits */
+#define HID0_POWER8_4LPARMODE	__MASK(61)
+#define HID0_POWER8_2LPARMODE	__MASK(57)
+#define HID0_POWER8_1TO2LPAR	__MASK(52)
+#define HID0_POWER8_1TO4LPAR	__MASK(51)
+#define HID0_POWER8_DYNLPARDIS	__MASK(48)
 
 #define SPRN_HID1	0x3F1		/* Hardware Implementation Register 1 */
 #ifdef CONFIG_6xx

diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
index 3d52a11..3ba9c6f 100644
--- a/arch/powerpc/include/asm/reg_a2.h
+++ b/arch/powerpc/include/asm/reg_a2.h

@@ -110,15 +110,6 @@
 #define TLB1_UR			ASM_CONST(0x0000000000000002)
 #define TLB1_SR			ASM_CONST(0x0000000000000001)
 
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-#define WSP_UART_PHYS	0xffc000c000
-/* This needs to be careful chosen to hit a !0 congruence class
- * in the TLB since we bolt it in way 3, which is already occupied
- * by our linear mapping primary bolted entry in CC 0.
- */
-#define WSP_UART_VIRT	0xf000000000001000
-#endif
-
 /* A2 erativax attributes definitions */
 #define ERATIVAX_RS_IS_ALL		0x000
 #define ERATIVAX_RS_IS_TID		0x040

diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 5217903..a5e930a 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h

@@ -50,6 +50,7 @@
 #endif
 }
 
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 #undef dereference_function_descriptor
 static inline void *dereference_function_descriptor(void *ptr)
 {
@@ -60,6 +61,7 @@
 		ptr = p;
 	return ptr;
 }
+#endif
 
 #endif
 

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ff51046..5a6614a 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h

@@ -68,14 +68,6 @@
 void generic_set_cpu_dead(unsigned int cpu);
 void generic_set_cpu_up(unsigned int cpu);
 int generic_check_cpu_restart(unsigned int cpu);
-
-extern void inhibit_secondary_onlining(void);
-extern void uninhibit_secondary_onlining(void);
-
-#else /* HOTPLUG_CPU */
-static inline void inhibit_secondary_onlining(void) {}
-static inline void uninhibit_secondary_onlining(void) {}
-
 #endif
 
 #ifdef CONFIG_PPC64

diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 0dffad6..e40010a 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h

@@ -10,9 +10,7 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
-#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
-#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -24,9 +22,7 @@
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
-#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
-#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 0e83e7d..58abeda 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h

@@ -16,13 +16,15 @@
 extern struct task_struct *_switch(struct thread_struct *prev,
 				   struct thread_struct *next);
 #ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_tar(struct thread_struct *prev)
+static inline void save_early_sprs(struct thread_struct *prev)
 {
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		prev->tar = mfspr(SPRN_TAR);
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		prev->dscr = mfspr(SPRN_DSCR);
 }
 #else
-static inline void save_tar(struct thread_struct *prev) {}
+static inline void save_early_sprs(struct thread_struct *prev) {}
 #endif
 
 extern void enable_kernel_fp(void);
@@ -84,6 +86,8 @@
 {
 #ifdef CONFIG_PPC_BOOK3S_64
     /* EBB perf events are not inherited, so clear all EBB state. */
+    t->thread.ebbrr = 0;
+    t->thread.ebbhr = 0;
     t->thread.bescr = 0;
     t->thread.mmcr2 = 0;
     t->thread.mmcr0 = 0;

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index ea4dc3a..babbeca 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h

@@ -62,7 +62,7 @@
 SYSCALL(ni_syscall)
 SYSCALL_SPU(setpgid)
 SYSCALL(ni_syscall)
-SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
+SYSX(sys_ni_syscall,sys_olduname,sys_olduname)
 SYSCALL_SPU(umask)
 SYSCALL_SPU(chroot)
 COMPAT_SYS(ustat)
@@ -190,7 +190,7 @@
 SYSCALL_SPU(capget)
 SYSCALL_SPU(capset)
 COMPAT_SYS(sigaltstack)
-COMPAT_SYS_SPU(sendfile)
+SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 PPC_SYS(vfork)
@@ -258,7 +258,7 @@
 COMPAT_SYS_SPU(utimes)
 COMPAT_SYS_SPU(statfs64)
 COMPAT_SYS_SPU(fstatfs64)
-SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
+SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64)
 PPC_SYS_SPU(rtas)
 OLDSYS(debug_setcontext)
 SYSCALL(ni_syscall)
@@ -295,7 +295,7 @@
 SYSCALL_SPU(mknodat)
 SYSCALL_SPU(fchownat)
 COMPAT_SYS_SPU(futimesat)
-SYSX_SPU(sys_newfstatat, sys_fstatat64, sys_fstatat64)
+SYSX_SPU(sys_newfstatat,sys_fstatat64,sys_fstatat64)
 SYSCALL_SPU(unlinkat)
 SYSCALL_SPU(renameat)
 SYSCALL_SPU(linkat)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 6c8a8c5..5f1048e 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h

@@ -16,19 +16,6 @@
 
 #include <asm/mmzone.h>
 
-static inline int cpu_to_node(int cpu)
-{
-	int nid;
-
-	nid = numa_cpu_lookup_table[cpu];
-
-	/*
-	 * During early boot, the numa-cpu lookup table might not have been
-	 * setup for all CPUs yet. In such cases, default to node 0.
-	 */
-	return (nid < 0) ? 0 : nid;
-}
-
 #define parent_node(node)	(node)
 
 #define cpumask_of_node(node) ((node) == -1 ?				\

diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
deleted file mode 100644
index c7dc830..0000000
--- a/arch/powerpc/include/asm/wsp.h
+++ /dev/null

@@ -1,14 +0,0 @@
-/*
- *  Copyright 2011 Michael Ellerman, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-#ifndef __ASM_POWERPC_WSP_H
-#define __ASM_POWERPC_WSP_H
-
-extern int wsp_get_chip_id(struct device_node *dn);
-
-#endif /* __ASM_POWERPC_WSP_H */

diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 48be855..7a3f795 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild

@@ -15,7 +15,6 @@
 header-y += ipcbuf.h
 header-y += kvm.h
 header-y += kvm_para.h
-header-y += linkage.h
 header-y += mman.h
 header-y += msgbuf.h
 header-y += nvram.h

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 5b76579..de2c0e4 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h

@@ -41,5 +41,6 @@
 #define PPC_FEATURE2_EBB		0x10000000
 #define PPC_FEATURE2_ISEL		0x08000000
 #define PPC_FEATURE2_TAR		0x04000000
+#define PPC_FEATURE2_VEC_CRYPTO		0x02000000
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */

diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 7e39c91..59dad11 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h

@@ -291,9 +291,17 @@
 #define R_PPC64_DTPREL16_HIGHERA 104 /* half16	(sym+add)@dtprel@highera */
 #define R_PPC64_DTPREL16_HIGHEST 105 /* half16	(sym+add)@dtprel@highest */
 #define R_PPC64_DTPREL16_HIGHESTA 106 /* half16	(sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD		107
+#define R_PPC64_TLSLD		108
+#define R_PPC64_TOCSAVE		109
+
+#define R_PPC64_REL16		249
+#define R_PPC64_REL16_LO	250
+#define R_PPC64_REL16_HI	251
+#define R_PPC64_REL16_HA	252
 
 /* Keep this the last entry.  */
-#define R_PPC64_NUM		107
+#define R_PPC64_NUM		253
 
 /* There's actually a third entry here, but it's unused */
 struct ppc64_opd_entry

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fab19ec..670c312 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile

@@ -43,7 +43,6 @@
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
-obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 93e1465..f5995a9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c

@@ -248,6 +248,7 @@
 #endif
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
 	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+	DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
 	DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
 	DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));

diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
deleted file mode 100644
index 61f079e..0000000
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ /dev/null

@@ -1,120 +0,0 @@
-/*
- *  A2 specific assembly support code
- *
- *  Copyright 2009 Ben Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/processor.h>
-#include <asm/reg_a2.h>
-#include <asm/reg.h>
-#include <asm/thread_info.h>
-
-/*
- * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
- * This also prevents external LPID accesses but that isn't a problem when not a
- * guest. Under PV, this setting will be ignored and MMUCR will return the right
- * number of PID bits we can use.
- */
-#define MMUCR1_EXTEND_PID \
-	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
-	 MMUCR1_DTTID | MMUCR1_DCCD)
-
-/*
- * Use extended PIDs if enabled.
- * Don't clear the ERATs on context sync events and enable I & D LRU.
- * Enable ERAT back invalidate when tlbwe overwrites an entry.
- */
-#define INITIAL_MMUCR1 \
-	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
-	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
-
-_GLOBAL(__setup_cpu_a2)
-	/* Some of these are actually thread local and some are
-	 * core local but doing it always won't hurt
-	 */
-
-#ifdef CONFIG_PPC_ICSWX
-	/* Make sure ACOP starts out as zero */
-	li	r3,0
-	mtspr   SPRN_ACOP,r3
-
-	/* Skip the following if we are in Guest mode */
-	mfmsr	r3
-	andis.	r0,r3,MSR_GS@h
-	bne	_icswx_skip_guest
-
-	/* Enable icswx instruction */
-	mfspr   r3,SPRN_A2_CCR2
-	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
-	mtspr   SPRN_A2_CCR2,r3
-
-	/* Unmask all CTs in HACOP */
-	li      r3,-1
-	mtspr   SPRN_HACOP,r3
-_icswx_skip_guest:
-#endif /* CONFIG_PPC_ICSWX */
-
-	/* Enable doorbell */
-	mfspr   r3,SPRN_A2_CCR2
-	oris     r3,r3,A2_CCR2_ENABLE_PC@h
-	mtspr   SPRN_A2_CCR2,r3
-	isync
-
-	/* Setup CCR0 to disable power saving for now as it's busted
-	 * in the current implementations. Setup CCR1 to wake on
-	 * interrupts normally (we write the default value but who
-	 * knows what FW may have clobbered...)
-	 */
-	li	r3,0
-	mtspr	SPRN_A2_CCR0, r3
-	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
-	mtspr	SPRN_A2_CCR1, r3
-
-	/* Initialise MMUCR1 */
-	lis	r3,INITIAL_MMUCR1@h
-	ori	r3,r3,INITIAL_MMUCR1@l
-	mtspr	SPRN_MMUCR1,r3
-
-	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
-	LOAD_REG_IMMEDIATE(r3, 0x000a7531)
-	mtspr	SPRN_MMUCR2,r3
-
-	/* Set MMUCR3 to write all thids bit to the TLB */
-	LOAD_REG_IMMEDIATE(r3, 0x0000000f)
-	mtspr	SPRN_MMUCR3,r3
-
-	/* Don't do ERAT stuff if running guest mode */
-	mfmsr	r3
-	andis.	r0,r3,MSR_GS@h
-	bne	1f
-
-	/* Now set the I-ERAT watermark to 15 */
-	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
-	mtspr	SPRN_MMUCR0, r4
-	li	r4,A2_IERAT_SIZE-1
-	PPC_ERATWE(R4,R4,3)
-
-	/* Now set the D-ERAT watermark to 31 */
-	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
-	mtspr	SPRN_MMUCR0, r4
-	li	r4,A2_DERAT_SIZE-1
-	PPC_ERATWE(R4,R4,3)
-
-	/* And invalidate the beast just in case. That won't get rid of
-	 * a bolted entry though it will be in LRU and so will go away eventually
-	 * but let's not bother for now
-	 */
-	PPC_ERATILX(0,0,R0)
-1:
-	blr
-
-_GLOBAL(__restore_cpu_a2)
-	b	__setup_cpu_a2

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index cc2d896..4f1393d 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S

@@ -94,12 +94,12 @@
 _GLOBAL(__setup_cpu_e6500)
 	mflr	r6
 #ifdef CONFIG_PPC64
-	bl	.setup_altivec_ivors
+	bl	setup_altivec_ivors
 	/* Touch IVOR42 only if the CPU supports E.HV category */
 	mfspr	r10,SPRN_MMUCFG
 	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
 	beq	1f
-	bl	.setup_lrat_ivor
+	bl	setup_lrat_ivor
 1:
 #endif
 	bl	setup_pw20_idle
@@ -164,15 +164,15 @@
 #ifdef CONFIG_PPC_BOOK3E_64
 _GLOBAL(__restore_cpu_e6500)
 	mflr	r5
-	bl	.setup_altivec_ivors
+	bl	setup_altivec_ivors
 	/* Touch IVOR42 only if the CPU supports E.HV category */
 	mfspr	r10,SPRN_MMUCFG
 	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
 	beq	1f
-	bl	.setup_lrat_ivor
+	bl	setup_lrat_ivor
 1:
-	bl	.setup_pw20_idle
-	bl	.setup_altivec_idle
+	bl	setup_pw20_idle
+	bl	setup_altivec_idle
 	bl	__restore_cpu_e5500
 	mtlr	r5
 	blr
@@ -181,9 +181,9 @@
 	mflr	r4
 	bl	__e500_icache_setup
 	bl	__e500_dcache_setup
-	bl	.__setup_base_ivors
-	bl	.setup_perfmon_ivor
-	bl	.setup_doorbell_ivors
+	bl	__setup_base_ivors
+	bl	setup_perfmon_ivor
+	bl	setup_doorbell_ivors
 	/*
 	 * We only want to touch IVOR38-41 if we're running on hardware
 	 * that supports category E.HV.  The architectural way to determine
@@ -192,7 +192,7 @@
 	mfspr	r10,SPRN_MMUCFG
 	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
 	beq	1f
-	bl	.setup_ehv_ivors
+	bl	setup_ehv_ivors
 1:
 	mtlr	r4
 	blr
@@ -201,9 +201,9 @@
 	mflr	r5
 	bl	__e500_icache_setup
 	bl	__e500_dcache_setup
-	bl	.__setup_base_ivors
-	bl	.setup_perfmon_ivor
-	bl	.setup_doorbell_ivors
+	bl	__setup_base_ivors
+	bl	setup_perfmon_ivor
+	bl	setup_doorbell_ivors
 	/*
 	 * We only want to touch IVOR38-41 if we're running on hardware
 	 * that supports category E.HV.  The architectural way to determine
@@ -212,7 +212,7 @@
 	mfspr	r10,SPRN_MMUCFG
 	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
 	beq	1f
-	bl	.setup_ehv_ivors
+	bl	setup_ehv_ivors
 	b	2f
 1:
 	ld	r10,CPU_SPEC_FEATURES(r4)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 1557e7c..4673353 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S

@@ -56,6 +56,7 @@
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mfspr	r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power8
@@ -74,6 +75,7 @@
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mfspr   r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power8

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c1faade..965291b 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c

@@ -109,7 +109,8 @@
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
 #define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
 				 PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
-				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
 #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -2148,44 +2149,6 @@
 	}
 #endif /* CONFIG_PPC32 */
 #endif /* CONFIG_E500 */
-
-#ifdef CONFIG_PPC_A2
-	{	/* Standard A2 (>= DD2) + FPU core */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00480000,
-		.cpu_name		= "A2 (>= DD2)",
-		.cpu_features		= CPU_FTRS_A2,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTRS_A2,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 0,
-		.cpu_setup		= __setup_cpu_a2,
-		.cpu_restore		= __restore_cpu_a2,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppca2",
-	},
-	{	/* This is a default entry to get going, to be replaced by
-		 * a real one at some stage
-		 */
-#define CPU_FTRS_BASE_BOOK3E	(CPU_FTR_USE_TB | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
-	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "Book3E",
-		.cpu_features		= CPU_FTRS_BASE_BOOK3E,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
-					  MMU_FTR_USE_TLBIVAX_BCAST |
-					  MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 0,
-		.machine_check		= machine_check_generic,
-		.platform		= "power6",
-	},
-#endif /* CONFIG_PPC_A2 */
 };
 
 static struct cpu_spec the_cpu_spec;

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e7b76a6..86e2570 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c

@@ -22,6 +22,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/debugfs.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -35,6 +36,7 @@
 #include <linux/of.h>
 
 #include <linux/atomic.h>
+#include <asm/debug.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
@@ -87,23 +89,22 @@
 /* Time to wait for a PCI slot to report status, in milliseconds */
 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
 
+/*
+ * EEH probe mode support, which is part of the flags,
+ * is to support multiple platforms for EEH. Some platforms
+ * like pSeries do PCI emunation based on device tree.
+ * However, other platforms like powernv probe PCI devices
+ * from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for
+ * particular OF node or PCI device so that the corresponding
+ * PE would be created there.
+ */
+int eeh_subsystem_flags;
+EXPORT_SYMBOL(eeh_subsystem_flags);
+
 /* Platform dependent EEH operations */
 struct eeh_ops *eeh_ops = NULL;
 
-bool eeh_subsystem_enabled = false;
-EXPORT_SYMBOL(eeh_subsystem_enabled);
-
-/*
- * EEH probe mode support. The intention is to support multiple
- * platforms for EEH. Some platforms like pSeries do PCI emunation
- * based on device tree. However, other platforms like powernv probe
- * PCI devices from hardware. The flag is used to distinguish that.
- * In addition, struct eeh_ops::probe would be invoked for particular
- * OF node or PCI device so that the corresponding PE would be created
- * there.
- */
-int eeh_probe_mode;
-
 /* Lock to avoid races due to multiple reports of an error */
 DEFINE_RAW_SPINLOCK(confirm_error_lock);
 
@@ -133,6 +134,15 @@
 
 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
+static int __init eeh_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+	return 1;
+}
+__setup("eeh=", eeh_setup);
+
 /**
  * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
  * @edev: device to report data for
@@ -145,73 +155,67 @@
 static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 {
 	struct device_node *dn = eeh_dev_to_of_node(edev);
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	u32 cfg;
 	int cap, i;
 	int n = 0;
 
 	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
-	printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
+	pr_warn("EEH: of node=%s\n", dn->full_name);
 
 	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
-	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
 
 	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
-	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
-
-	if (!dev) {
-		printk(KERN_WARNING "EEH: no PCI device for this of node\n");
-		return n;
-	}
+	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
 
 	/* Gather bridge-specific registers */
-	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+	if (edev->mode & EEH_DEV_BRIDGE) {
 		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
-		printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
+		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
 
 		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
-		printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
+		pr_warn("EEH: Bridge control: %04x\n", cfg);
 	}
 
 	/* Dump out the PCI-X command and status regs */
-	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	cap = edev->pcix_cap;
 	if (cap) {
 		eeh_ops->read_config(dn, cap, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
-		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
 
 		eeh_ops->read_config(dn, cap+4, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
-		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+		pr_warn("EEH: PCI-X status: %08x\n", cfg);
 	}
 
-	/* If PCI-E capable, dump PCI-E cap 10, and the AER */
-	if (pci_is_pcie(dev)) {
+	/* If PCI-E capable, dump PCI-E cap 10 */
+	cap = edev->pcie_cap;
+	if (cap) {
 		n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
-		printk(KERN_WARNING
-		       "EEH: PCI-E capabilities and status follow:\n");
+		pr_warn("EEH: PCI-E capabilities and status follow:\n");
 
 		for (i=0; i<=8; i++) {
-			eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg);
+			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+			pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);
 		}
+	}
 
-		cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-		if (cap) {
-			n += scnprintf(buf+n, len-n, "pci-e AER:\n");
-			printk(KERN_WARNING
-			       "EEH: PCI-E AER capability register set follows:\n");
+	/* If AER capable, dump it */
+	cap = edev->aer_cap;
+	if (cap) {
+		n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
-			for (i=0; i<14; i++) {
-				eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
-				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
-			}
+		for (i=0; i<14; i++) {
+			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+			pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);
 		}
 	}
 
@@ -232,21 +236,19 @@
 {
 	size_t loglen = 0;
 	struct eeh_dev *edev, *tmp;
-	bool valid_cfg_log = true;
 
 	/*
 	 * When the PHB is fenced or dead, it's pointless to collect
 	 * the data from PCI config space because it should return
 	 * 0xFF's. For ER, we still retrieve the data from the PCI
 	 * config space.
+	 *
+	 * For pHyp, we have to enable IO for log retrieval. Otherwise,
+	 * 0xFF's is always returned from PCI config space.
 	 */
-	if (eeh_probe_mode_dev() &&
-	    (pe->type & EEH_PE_PHB) &&
-	    (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
-		valid_cfg_log = false;
-
-	if (valid_cfg_log) {
-		eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+	if (!(pe->type & EEH_PE_PHB)) {
+		if (eeh_probe_mode_devtree())
+			eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 		eeh_ops->configure_bridge(pe);
 		eeh_pe_restore_bars(pe);
 
@@ -309,7 +311,7 @@
 
 	/* If the PHB has been in problematic state */
 	eeh_serialize_lock(&flags);
-	if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
+	if (phb_pe->state & EEH_PE_ISOLATED) {
 		ret = 0;
 		goto out;
 	}
@@ -328,8 +330,8 @@
 	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 	eeh_serialize_unlock(flags);
 
-	pr_err("EEH: PHB#%x failure detected\n",
-		phb_pe->phb->global_number);
+	pr_err("EEH: PHB#%x failure detected, location: %s\n",
+		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
 	dump_stack();
 	eeh_send_failure_event(phb_pe);
 
@@ -356,10 +358,11 @@
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
 	int ret;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
 	struct device_node *dn;
 	struct pci_dev *dev;
-	struct eeh_pe *pe;
+	struct eeh_pe *pe, *parent_pe, *phb_pe;
 	int rc = 0;
 	const char *location;
 
@@ -437,14 +440,34 @@
 	 */
 	if ((ret < 0) ||
 	    (ret == EEH_STATE_NOT_SUPPORT) ||
-	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+	    ((ret & active_flags) == active_flags)) {
 		eeh_stats.false_positives++;
 		pe->false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
+	/*
+	 * It should be corner case that the parent PE has been
+	 * put into frozen state as well. We should take care
+	 * that at first.
+	 */
+	parent_pe = pe->parent;
+	while (parent_pe) {
+		/* Hit the ceiling ? */
+		if (parent_pe->type & EEH_PE_PHB)
+			break;
+
+		/* Frozen parent PE ? */
+		ret = eeh_ops->get_state(parent_pe, NULL);
+		if (ret > 0 &&
+		    (ret & active_flags) != active_flags)
+			pe = parent_pe;
+
+		/* Next parent level */
+		parent_pe = parent_pe->parent;
+	}
+
 	eeh_stats.slot_resets++;
 
 	/* Avoid repeated reports of this failure, including problems
@@ -458,8 +481,11 @@
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out.
 	 */
-	pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
-		pe->addr, pe->phb->global_number);
+	phb_pe = eeh_phb_pe_get(pe->phb);
+	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+	       pe->phb->global_number, pe->addr);
+	pr_err("EEH: PE location: %s, PHB location: %s\n",
+	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
 	dump_stack();
 
 	eeh_send_failure_event(pe);
@@ -515,16 +541,42 @@
  */
 int eeh_pci_enable(struct eeh_pe *pe, int function)
 {
-	int rc;
+	int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+
+	/*
+	 * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
+	 * Also, it's pointless to enable them on unfrozen PE. So
+	 * we have the check here.
+	 */
+	if (function == EEH_OPT_THAW_MMIO ||
+	    function == EEH_OPT_THAW_DMA) {
+		rc = eeh_ops->get_state(pe, NULL);
+		if (rc < 0)
+			return rc;
+
+		/* Needn't to enable or already enabled */
+		if ((rc == EEH_STATE_NOT_SUPPORT) ||
+		    ((rc & flags) == flags))
+			return 0;
+	}
 
 	rc = eeh_ops->set_option(pe, function);
 	if (rc)
-		pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
-			__func__, function, pe->phb->global_number, pe->addr, rc);
+		pr_warn("%s: Unexpected state change %d on "
+			"PHB#%d-PE#%x, err=%d\n",
+			__func__, function, pe->phb->global_number,
+			pe->addr, rc);
 
 	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-	if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
-	   (function == EEH_OPT_THAW_MMIO))
+	if (rc <= 0)
+		return rc;
+
+	if ((function == EEH_OPT_THAW_MMIO) &&
+	    (rc & EEH_STATE_MMIO_ENABLED))
+		return 0;
+
+	if ((function == EEH_OPT_THAW_DMA) &&
+	    (rc & EEH_STATE_DMA_ENABLED))
 		return 0;
 
 	return rc;
@@ -612,26 +664,7 @@
 	else
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 
-	/* The PCI bus requires that the reset be held high for at least
-	 * a 100 milliseconds. We wait a bit longer 'just in case'.
-	 */
-#define PCI_BUS_RST_HOLD_TIME_MSEC 250
-	msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
-
-	/* We might get hit with another EEH freeze as soon as the
-	 * pci slot reset line is dropped. Make sure we don't miss
-	 * these, and clear the flag now.
-	 */
-	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
 	eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-
-	/* After a PCI slot has been reset, the PCI Express spec requires
-	 * a 1.5 second idle time for the bus to stabilize, before starting
-	 * up traffic.
-	 */
-#define PCI_BUS_SETTLE_TIME_MSEC 1800
-	msleep(PCI_BUS_SETTLE_TIME_MSEC);
 }
 
 /**
@@ -651,6 +684,10 @@
 	for (i=0; i<3; i++) {
 		eeh_reset_pe_once(pe);
 
+		/*
+		 * EEH_PE_ISOLATED is expected to be removed after
+		 * BAR restore.
+		 */
 		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 		if ((rc & flags) == flags)
 			return 0;
@@ -826,8 +863,8 @@
 			&hose_list, list_node)
 			pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
 	} else {
-		pr_warning("%s: Invalid probe mode %d\n",
-			   __func__, eeh_probe_mode);
+		pr_warn("%s: Invalid probe mode %x",
+			__func__, eeh_subsystem_flags);
 		return -EINVAL;
 	}
 
@@ -1102,10 +1139,45 @@
 	.release   = single_release,
 };
 
+#ifdef CONFIG_DEBUG_FS
+static int eeh_enable_dbgfs_set(void *data, u64 val)
+{
+	if (val)
+		eeh_subsystem_flags &= ~EEH_FORCE_DISABLED;
+	else
+		eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+	/* Notify the backend */
+	if (eeh_ops->post_init)
+		eeh_ops->post_init();
+
+	return 0;
+}
+
+static int eeh_enable_dbgfs_get(void *data, u64 *val)
+{
+	if (eeh_enabled())
+		*val = 0x1ul;
+	else
+		*val = 0x0ul;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+			eeh_enable_dbgfs_set, "0x%llx\n");
+#endif
+
 static int __init eeh_init_proc(void)
 {
-	if (machine_is(pseries) || machine_is(powernv))
+	if (machine_is(pseries) || machine_is(powernv)) {
 		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+#ifdef CONFIG_DEBUG_FS
+		debugfs_create_file("eeh_enable", 0600,
+                                    powerpc_debugfs_root, NULL,
+                                    &eeh_enable_dbgfs_ops);
+#endif
+	}
+
 	return 0;
 }
 __initcall(eeh_init_proc);

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index bb61ca5..420da61 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c

@@ -171,6 +171,15 @@
 	}
 }
 
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+	/* EEH device removed ? */
+	if (!edev || (edev->mode & EEH_DEV_REMOVED))
+		return true;
+
+	return false;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -187,10 +196,8 @@
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	/* We might not have the associated PCI device,
-	 * then we should continue for next one.
-	 */
-	if (!dev) return NULL;
+	if (!dev || eeh_dev_removed(edev))
+		return NULL;
 	dev->error_state = pci_channel_io_frozen;
 
 	driver = eeh_pcid_get(dev);
@@ -230,6 +237,9 @@
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
+	if (!dev || eeh_dev_removed(edev))
+		return NULL;
+
 	driver = eeh_pcid_get(dev);
 	if (!driver) return NULL;
 
@@ -267,7 +277,8 @@
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev) return NULL;
+	if (!dev || eeh_dev_removed(edev))
+		return NULL;
 	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
@@ -307,7 +318,8 @@
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
-	if (!dev) return NULL;
+	if (!dev || eeh_dev_removed(edev))
+		return NULL;
 	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
@@ -343,7 +355,8 @@
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
-	if (!dev) return NULL;
+	if (!dev || eeh_dev_removed(edev))
+		return NULL;
 	dev->error_state = pci_channel_io_perm_failure;
 
 	driver = eeh_pcid_get(dev);
@@ -380,6 +393,16 @@
 	if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
 		return NULL;
 
+	/*
+	 * We rely on count-based pcibios_release_device() to
+	 * detach permanently offlined PEs. Unfortunately, that's
+	 * not reliable enough. We might have the permanently
+	 * offlined PEs attached, but we needn't take care of
+	 * them and their child devices.
+	 */
+	if (eeh_dev_removed(edev))
+		return NULL;
+
 	driver = eeh_pcid_get(dev);
 	if (driver) {
 		eeh_pcid_put(dev);
@@ -417,6 +440,48 @@
 	return NULL;
 }
 
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	int i, rc;
+
+	for (i = 0; i < 3; i++) {
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+		if (rc)
+			continue;
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+		if (!rc)
+			break;
+	}
+
+	/* The PE has been isolated, clear it */
+	if (rc) {
+		pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
+			__func__, pe->phb->global_number, pe->addr, rc);
+		return (void *)pe;
+	}
+
+	return NULL;
+}
+
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+	void *rc;
+
+	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+	if (!rc)
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+	return rc ? -EIO : 0;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -451,19 +516,33 @@
 		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
 	}
 
-	/* Reset the pci controller. (Asserts RST#; resets config space).
+	/*
+	 * Reset the pci controller. (Asserts RST#; resets config space).
 	 * Reconfigure bridges and devices. Don't try to bring the system
 	 * up if the reset failed for some reason.
+	 *
+	 * During the reset, it's very dangerous to have uncontrolled PCI
+	 * config accesses. So we prefer to block them. However, controlled
+	 * PCI config accesses initiated from EEH itself are allowed.
 	 */
+	eeh_pe_state_mark(pe, EEH_PE_RESET);
 	rc = eeh_reset_pe(pe);
-	if (rc)
+	if (rc) {
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		return rc;
+	}
 
 	pci_lock_rescan_remove();
 
 	/* Restore PE */
 	eeh_ops->configure_bridge(pe);
 	eeh_pe_restore_bars(pe);
+	eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+	/* Clear frozen state */
+	rc = eeh_clear_pe_frozen_state(pe);
+	if (rc)
+		return rc;
 
 	/* Give the system 5 seconds to finish running the user-space
 	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
@@ -573,7 +652,6 @@
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			pr_info("EEH: Notify device drivers to resume I/O\n");
-			result = PCI_ERS_RESULT_NONE;
 			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
 		}
 	}
@@ -585,10 +663,17 @@
 
 		if (rc < 0)
 			goto hard_fail;
-		if (rc)
+		if (rc) {
 			result = PCI_ERS_RESULT_NEED_RESET;
-		else
+		} else {
+			/*
+			 * We didn't do PE reset for the case. The PE
+			 * is still in frozen state. Clear it before
+			 * resuming the PE.
+			 */
+			eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 			result = PCI_ERS_RESULT_RECOVERED;
+		}
 	}
 
 	/* If any device has a hard failure, then shut off everything. */
@@ -650,8 +735,17 @@
 	/* Notify all devices that they're about to go down. */
 	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
-	/* Shut down the device drivers for good. */
+	/* Mark the PE to be removed permanently */
+	pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+
+	/*
+	 * Shut down the device drivers for good. We mark
+	 * all removed devices correctly to avoid access
+	 * the their PCI config any more.
+	 */
 	if (frozen_bus) {
+		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
 		pci_lock_rescan_remove();
 		pcibios_remove_pci_devices(frozen_bus);
 		pci_unlock_rescan_remove();
@@ -676,14 +770,13 @@
 			eeh_serialize_lock(&flags);
 
 			/* Purge all events */
-			eeh_remove_event(NULL);
+			eeh_remove_event(NULL, true);
 
 			list_for_each_entry(hose, &hose_list, list_node) {
 				phb_pe = eeh_phb_pe_get(hose);
 				if (!phb_pe) continue;
 
-				eeh_pe_state_mark(phb_pe,
-					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+				eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 			}
 
 			eeh_serialize_unlock(flags);
@@ -696,11 +789,10 @@
 			eeh_serialize_lock(&flags);
 
 			/* Purge all events of the PHB */
-			eeh_remove_event(pe);
+			eeh_remove_event(pe, true);
 
 			if (rc == EEH_NEXT_ERR_DEAD_PHB)
-				eeh_pe_state_mark(pe,
-					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+				eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 			else
 				eeh_pe_state_mark(pe,
 					EEH_PE_ISOLATED | EEH_PE_RECOVERING);
@@ -724,12 +816,14 @@
 		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
 		    rc == EEH_NEXT_ERR_FENCED_PHB) {
 			eeh_handle_normal_event(pe);
+			eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 		} else {
 			pci_lock_rescan_remove();
 			list_for_each_entry(hose, &hose_list, list_node) {
 				phb_pe = eeh_phb_pe_get(hose);
 				if (!phb_pe ||
-				    !(phb_pe->state & EEH_PE_PHB_DEAD))
+				    !(phb_pe->state & EEH_PE_ISOLATED) ||
+				    (phb_pe->state & EEH_PE_RECOVERING))
 					continue;
 
 				/* Notify all devices to be down */

diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 72d748b..4eefb6e 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c

@@ -152,24 +152,33 @@
 /**
  * eeh_remove_event - Remove EEH event from the queue
  * @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
  *
  * On PowerNV platform, we might have subsequent coming events
  * is part of the former one. For that case, those subsequent
  * coming events are totally duplicated and unnecessary, thus
  * they should be removed.
  */
-void eeh_remove_event(struct eeh_pe *pe)
+void eeh_remove_event(struct eeh_pe *pe, bool force)
 {
 	unsigned long flags;
 	struct eeh_event *event, *tmp;
 
+	/*
+	 * If we have NULL PE passed in, we have dead IOC
+	 * or we're sure we can report all existing errors
+	 * by the caller.
+	 *
+	 * With "force", the event with associated PE that
+	 * have been isolated, the event won't be removed
+	 * to avoid event lost.
+	 */
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);
 	list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
-		/*
-		 * If we don't have valid PE passed in, that means
-		 * we already have event corresponding to dead IOC
-		 * and all events should be purged.
-		 */
+		if (!force && event->pe &&
+		    (event->pe->state & EEH_PE_ISOLATED))
+			continue;
+
 		if (!pe) {
 			list_del(&event->list);
 			kfree(event);

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f0c353f..fbd01eb 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c

@@ -503,13 +503,17 @@
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
 
-	/*
-	 * Mark the PE with the indicated state. Also,
-	 * the associated PCI device will be put into
-	 * I/O frozen state to avoid I/O accesses from
-	 * the PCI device driver.
-	 */
+	/* Keep the state of permanently removed PE intact */
+	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+	    (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+		return NULL;
+
 	pe->state |= state;
+
+	/* Offline PCI devices if applicable */
+	if (state != EEH_PE_ISOLATED)
+		return NULL;
+
 	eeh_pe_for_each_dev(pe, edev, tmp) {
 		pdev = eeh_dev_to_pci_dev(edev);
 		if (pdev)
@@ -532,6 +536,27 @@
 	eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
 }
 
+static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+{
+	struct eeh_dev *edev = data;
+	int mode = *((int *)flag);
+
+	edev->mode |= mode;
+
+	return NULL;
+}
+
+/**
+ * eeh_pe_dev_state_mark - Mark state for all device under the PE
+ * @pe: EEH PE
+ *
+ * Mark specific state for all child devices of the PE.
+ */
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
+{
+	eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
+}
+
 /**
  * __eeh_pe_state_clear - Clear state for the PE
  * @data: EEH PE
@@ -546,8 +571,16 @@
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
 
+	/* Keep the state of permanently removed PE intact */
+	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+	    (state & EEH_PE_ISOLATED))
+		return NULL;
+
 	pe->state &= ~state;
-	pe->check_count = 0;
+
+	/* Clear check count since last isolation */
+	if (state & EEH_PE_ISOLATED)
+		pe->check_count = 0;
 
 	return NULL;
 }
@@ -759,6 +792,66 @@
 }
 
 /**
+ * eeh_pe_loc_get - Retrieve location code binding to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the location code of the given PE. If the primary PE bus
+ * is root bus, we will grab location code from PHB device tree node
+ * or root port. Otherwise, the upstream bridge's device tree node
+ * of the primary PE bus will be checked for the location code.
+ */
+const char *eeh_pe_loc_get(struct eeh_pe *pe)
+{
+	struct pci_controller *hose;
+	struct pci_bus *bus = eeh_pe_bus_get(pe);
+	struct pci_dev *pdev;
+	struct device_node *dn;
+	const char *loc;
+
+	if (!bus)
+		return "N/A";
+
+	/* PHB PE or root PE ? */
+	if (pci_is_root_bus(bus)) {
+		hose = pci_bus_to_host(bus);
+		loc = of_get_property(hose->dn,
+				"ibm,loc-code", NULL);
+		if (loc)
+			return loc;
+		loc = of_get_property(hose->dn,
+				"ibm,io-base-loc-code", NULL);
+		if (loc)
+			return loc;
+
+		pdev = pci_get_slot(bus, 0x0);
+	} else {
+		pdev = bus->self;
+	}
+
+	if (!pdev) {
+		loc = "N/A";
+		goto out;
+	}
+
+	dn = pci_device_to_OF_node(pdev);
+	if (!dn) {
+		loc = "N/A";
+		goto out;
+	}
+
+	loc = of_get_property(dn, "ibm,loc-code", NULL);
+	if (!loc)
+		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
+	if (!loc)
+		loc = "N/A";
+
+out:
+	if (pci_is_root_bus(bus) && pdev)
+		pci_dev_put(pdev);
+	return loc;
+}
+
+/**
  * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
  * @pe: EEH PE
  *

diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 5d753d4..e2595ba 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c

@@ -59,6 +59,9 @@
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 	int rc=0;
 
+	if (!eeh_enabled())
+		return;
+
 	if (edev && (edev->mode & EEH_DEV_SYSFS))
 		return;
 

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 662c6dd..6528c5e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S

@@ -39,8 +39,8 @@
  * System calls.
  */
 	.section	".toc","aw"
-.SYS_CALL_TABLE:
-	.tc .sys_call_table[TC],.sys_call_table
+SYS_CALL_TABLE:
+	.tc sys_call_table[TC],sys_call_table
 
 /* This value is used to mark exception frames on the stack. */
 exception_marker:
@@ -106,7 +106,7 @@
 	LDX_BE	r10,0,r10		/* get log write index */
 	cmpd	cr1,r11,r10
 	beq+	cr1,33f
-	bl	.accumulate_stolen_time
+	bl	accumulate_stolen_time
 	REST_GPR(0,r1)
 	REST_4GPRS(3,r1)
 	REST_2GPRS(7,r1)
@@ -143,7 +143,7 @@
 	std	r10,SOFTE(r1)
 
 #ifdef SHOW_SYSCALLS
-	bl	.do_show_syscall
+	bl	do_show_syscall
 	REST_GPR(0,r1)
 	REST_4GPRS(3,r1)
 	REST_2GPRS(7,r1)
@@ -162,7 +162,7 @@
  * Need to vector to 32 Bit or default sys_call_table here,
  * based on caller's run-mode / personality.
  */
-	ld	r11,.SYS_CALL_TABLE@toc(2)
+	ld	r11,SYS_CALL_TABLE@toc(2)
 	andi.	r10,r10,_TIF_32BIT
 	beq	15f
 	addi	r11,r11,8	/* use 32-bit syscall entries */
@@ -174,14 +174,14 @@
 	clrldi	r8,r8,32
 15:
 	slwi	r0,r0,4
-	ldx	r10,r11,r0	/* Fetch system call handler [ptr] */
-	mtctr   r10
+	ldx	r12,r11,r0	/* Fetch system call handler [ptr] */
+	mtctr   r12
 	bctrl			/* Call handler */
 
 syscall_exit:
 	std	r3,RESULT(r1)
 #ifdef SHOW_SYSCALLS
-	bl	.do_show_syscall_exit
+	bl	do_show_syscall_exit
 	ld	r3,RESULT(r1)
 #endif
 	CURRENT_THREAD_INFO(r12, r1)
@@ -248,9 +248,9 @@
 	
 /* Traced system call support */
 syscall_dotrace:
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_syscall_trace_enter
+	bl	do_syscall_trace_enter
 	/*
 	 * Restore argument registers possibly just changed.
 	 * We use the return value of do_syscall_trace_enter
@@ -308,7 +308,7 @@
 4:	/* Anything else left to do? */
 	SET_DEFAULT_THREAD_PPR(r3, r10)		/* Set thread.ppr = 3 */
 	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
-	beq	.ret_from_except_lite
+	beq	ret_from_except_lite
 
 	/* Re-enable interrupts */
 #ifdef CONFIG_PPC_BOOK3E
@@ -319,10 +319,10 @@
 	mtmsrd	r10,1
 #endif /* CONFIG_PPC_BOOK3E */
 
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_syscall_trace_leave
-	b	.ret_from_except
+	bl	do_syscall_trace_leave
+	b	ret_from_except
 
 /* Save non-volatile GPRs, if not already saved. */
 _GLOBAL(save_nvgprs)
@@ -345,52 +345,48 @@
  */
 
 _GLOBAL(ppc_fork)
-	bl	.save_nvgprs
-	bl	.sys_fork
+	bl	save_nvgprs
+	bl	sys_fork
 	b	syscall_exit
 
 _GLOBAL(ppc_vfork)
-	bl	.save_nvgprs
-	bl	.sys_vfork
+	bl	save_nvgprs
+	bl	sys_vfork
 	b	syscall_exit
 
 _GLOBAL(ppc_clone)
-	bl	.save_nvgprs
-	bl	.sys_clone
+	bl	save_nvgprs
+	bl	sys_clone
 	b	syscall_exit
 
 _GLOBAL(ppc32_swapcontext)
-	bl	.save_nvgprs
-	bl	.compat_sys_swapcontext
+	bl	save_nvgprs
+	bl	compat_sys_swapcontext
 	b	syscall_exit
 
 _GLOBAL(ppc64_swapcontext)
-	bl	.save_nvgprs
-	bl	.sys_swapcontext
+	bl	save_nvgprs
+	bl	sys_swapcontext
 	b	syscall_exit
 
 _GLOBAL(ret_from_fork)
-	bl	.schedule_tail
+	bl	schedule_tail
 	REST_NVGPRS(r1)
 	li	r3,0
 	b	syscall_exit
 
 _GLOBAL(ret_from_kernel_thread)
-	bl	.schedule_tail
+	bl	schedule_tail
 	REST_NVGPRS(r1)
-	ld	r14, 0(r14)
 	mtlr	r14
 	mr	r3,r15
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	mr	r12,r14
+#endif
 	blrl
 	li	r3,0
 	b	syscall_exit
 
-	.section	".toc","aw"
-DSCR_DEFAULT:
-	.tc dscr_default[TC],dscr_default
-
-	.section	".text"
-
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -432,12 +428,6 @@
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	mfspr	r25,SPRN_DSCR
-	std	r25,THREAD_DSCR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0
@@ -575,11 +565,10 @@
 #ifdef CONFIG_PPC64
 BEGIN_FTR_SECTION
 	lwz	r6,THREAD_DSCR_INHERIT(r4)
-	ld	r7,DSCR_DEFAULT@toc(2)
 	ld	r0,THREAD_DSCR(r4)
 	cmpwi	r6,0
 	bne	1f
-	ld	r0,0(r7)
+	ld	r0,PACA_DSCR(r13)
 1:
 BEGIN_FTR_SECTION_NESTED(70)
 	mfspr	r8, SPRN_FSCR
@@ -611,7 +600,7 @@
 _GLOBAL(ret_from_except)
 	ld	r11,_TRAP(r1)
 	andi.	r0,r11,1
-	bne	.ret_from_except_lite
+	bne	ret_from_except_lite
 	REST_NVGPRS(r1)
 
 _GLOBAL(ret_from_except_lite)
@@ -661,23 +650,23 @@
 #endif
 1:	andi.	r0,r4,_TIF_NEED_RESCHED
 	beq	2f
-	bl	.restore_interrupts
+	bl	restore_interrupts
 	SCHEDULE_USER
-	b	.ret_from_except_lite
+	b	ret_from_except_lite
 2:
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	andi.	r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
 	bne	3f		/* only restore TM if nothing else to do */
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.restore_tm_state
+	bl	restore_tm_state
 	b	restore
 3:
 #endif
-	bl	.save_nvgprs
-	bl	.restore_interrupts
+	bl	save_nvgprs
+	bl	restore_interrupts
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_notify_resume
-	b	.ret_from_except
+	bl	do_notify_resume
+	b	ret_from_except
 
 resume_kernel:
 	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
@@ -730,7 +719,7 @@
 	 * sure we are soft-disabled first and reconcile irq state.
 	 */
 	RECONCILE_IRQ_STATE(r3,r4)
-1:	bl	.preempt_schedule_irq
+1:	bl	preempt_schedule_irq
 
 	/* Re-test flags and eventually loop */
 	CURRENT_THREAD_INFO(r9, r1)
@@ -792,7 +781,7 @@
 	 */
 do_restore:
 #ifdef CONFIG_PPC_BOOK3E
-	b	.exception_return_book3e
+	b	exception_return_book3e
 #else
 	/*
 	 * Clear the reservation. If we know the CPU tracks the address of
@@ -907,7 +896,7 @@
 	 *
 	 * Still, this might be useful for things like hash_page
 	 */
-	bl	.__check_irq_replay
+	bl	__check_irq_replay
 	cmpwi	cr0,r3,0
  	beq	restore_no_replay
  
@@ -928,13 +917,13 @@
 	cmpwi	cr0,r3,0x500
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
- 	bl	.do_IRQ
-	b	.ret_from_except
+ 	bl	do_IRQ
+	b	ret_from_except
 1:	cmpwi	cr0,r3,0x900
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	.timer_interrupt
-	b	.ret_from_except
+	bl	timer_interrupt
+	b	ret_from_except
 #ifdef CONFIG_PPC_DOORBELL
 1:
 #ifdef CONFIG_PPC_BOOK3E
@@ -948,14 +937,14 @@
 #endif /* CONFIG_PPC_BOOK3E */
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	.doorbell_exception
-	b	.ret_from_except
+	bl	doorbell_exception
+	b	ret_from_except
 #endif /* CONFIG_PPC_DOORBELL */
-1:	b	.ret_from_except /* What else to do here ? */
+1:	b	ret_from_except /* What else to do here ? */
  
 unrecov_restore:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
+	bl	unrecoverable_exception
 	b	unrecov_restore
 
 #ifdef CONFIG_PPC_RTAS
@@ -1021,7 +1010,7 @@
         std	r6,PACASAVEDMSR(r13)
 
 	/* Setup our real return addr */	
-	LOAD_REG_ADDR(r4,.rtas_return_loc)
+	LOAD_REG_ADDR(r4,rtas_return_loc)
 	clrldi	r4,r4,2			/* convert to realmode address */
        	mtlr	r4
 
@@ -1045,7 +1034,7 @@
 	rfid
 	b	.	/* prevent speculative execution */
 
-_STATIC(rtas_return_loc)
+rtas_return_loc:
 	FIXUP_ENDIAN
 
 	/* relocation is off at this point */
@@ -1054,7 +1043,7 @@
 
 	bcl	20,31,$+4
 0:	mflr	r3
-	ld	r3,(1f-0b)(r3)		/* get &.rtas_restore_regs */
+	ld	r3,(1f-0b)(r3)		/* get &rtas_restore_regs */
 
 	mfmsr   r6
 	li	r0,MSR_RI
@@ -1071,9 +1060,9 @@
 	b	.	/* prevent speculative execution */
 
 	.align	3
-1:	.llong	.rtas_restore_regs
+1:	.llong	rtas_restore_regs
 
-_STATIC(rtas_restore_regs)
+rtas_restore_regs:
 	/* relocation is on at this point */
 	REST_GPR(2, r1)			/* Restore the TOC */
 	REST_GPR(13, r1)		/* Restore paca */
@@ -1173,7 +1162,7 @@
 _GLOBAL(_mcount)
 	blr
 
-_GLOBAL(ftrace_caller)
+_GLOBAL_TOC(ftrace_caller)
 	/* Taken from output of objdump from lib64/glibc */
 	mflr	r3
 	ld	r11, 0(r1)
@@ -1197,10 +1186,7 @@
 _GLOBAL(ftrace_stub)
 	blr
 #else
-_GLOBAL(mcount)
-	blr
-
-_GLOBAL(_mcount)
+_GLOBAL_TOC(_mcount)
 	/* Taken from output of objdump from lib64/glibc */
 	mflr	r3
 	ld	r11, 0(r1)
@@ -1238,7 +1224,7 @@
 	ld	r11, 112(r1)
 	addi	r3, r11, 16
 
-	bl	.prepare_ftrace_return
+	bl	prepare_ftrace_return
 	nop
 
 	ld	r0, 128(r1)
@@ -1254,7 +1240,7 @@
 	mr	r31, r1
 	stdu	r1, -112(r1)
 
-	bl	.ftrace_return_to_handler
+	bl	ftrace_return_to_handler
 	nop
 
 	/* return value has real return address */
@@ -1284,7 +1270,7 @@
 	 */
 	ld	r2, PACATOC(r13)
 
-	bl	.ftrace_return_to_handler
+	bl	ftrace_return_to_handler
 	nop
 
 	/* return value has real return address */

diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 60d1a22..59e4ba7 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c

@@ -30,6 +30,7 @@
 #endif
 
 bool epapr_paravirt_enabled;
+static bool __maybe_unused epapr_has_idle;
 
 static int __init early_init_dt_scan_epapr(unsigned long node,
 					   const char *uname,
@@ -56,7 +57,7 @@
 
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 	if (of_get_flat_dt_prop(node, "has-idle", NULL))
-		ppc_md.power_save = epapr_ev_idle;
+		epapr_has_idle = true;
 #endif
 
 	epapr_paravirt_enabled = true;
@@ -71,3 +72,14 @@
 	return 0;
 }
 
+static int __init epapr_idle_init(void)
+{
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+	if (epapr_has_idle)
+		ppc_md.power_save = epapr_ev_idle;
+#endif
+
+	return 0;
+}
+
+postcore_initcall(epapr_idle_init);

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index c1bee3c..bb9cac6 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S

@@ -499,7 +499,7 @@
 	CHECK_NAPPING();						\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
 	bl	hdlr;							\
-	b	.ret_from_except_lite;
+	b	ret_from_except_lite;
 
 /* This value is used to mark exception frames on the stack. */
 	.section	".toc","aw"
@@ -550,11 +550,11 @@
 	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x100)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unknown_exception
+	bl	unknown_exception
 	b	ret_from_crit_except
 
 /* Machine Check Interrupt */
@@ -562,11 +562,11 @@
 	MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
 			    PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_MC(0x000)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.machine_check_exception
+	bl	machine_check_exception
 	b	ret_from_mc_except
 
 /* Data Storage Interrupt */
@@ -591,7 +591,7 @@
 
 /* External Input Interrupt */
 	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
-			   external_input, .do_IRQ, ACK_NONE)
+			   external_input, do_IRQ, ACK_NONE)
 
 /* Alignment */
 	START_EXCEPTION(alignment);
@@ -612,9 +612,9 @@
 	std	r14,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
-	bl	.save_nvgprs
-	bl	.program_check_exception
-	b	.ret_from_except
+	bl	save_nvgprs
+	bl	program_check_exception
+	b	ret_from_except
 
 /* Floating Point Unavailable Interrupt */
 	START_EXCEPTION(fp_unavailable);
@@ -625,13 +625,13 @@
 	ld	r12,_MSR(r1)
 	andi.	r0,r12,MSR_PR;
 	beq-	1f
-	bl	.load_up_fpu
+	bl	load_up_fpu
 	b	fast_exception_return
 1:	INTS_DISABLE
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.kernel_fp_unavailable_exception
-	b	.ret_from_except
+	bl	kernel_fp_unavailable_exception
+	b	ret_from_except
 
 /* Altivec Unavailable Interrupt */
 	START_EXCEPTION(altivec_unavailable);
@@ -644,16 +644,16 @@
 	ld	r12,_MSR(r1)
 	andi.	r0,r12,MSR_PR;
 	beq-	1f
-	bl	.load_up_altivec
+	bl	load_up_altivec
 	b	fast_exception_return
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	INTS_DISABLE
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.altivec_unavailable_exception
-	b	.ret_from_except
+	bl	altivec_unavailable_exception
+	b	ret_from_except
 
 /* AltiVec Assist */
 	START_EXCEPTION(altivec_assist);
@@ -662,39 +662,39 @@
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x220)
 	INTS_DISABLE
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
-	bl	.altivec_assist_exception
+	bl	altivec_assist_exception
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #else
-	bl	.unknown_exception
+	bl	unknown_exception
 #endif
-	b	.ret_from_except
+	b	ret_from_except
 
 
 /* Decrementer Interrupt */
 	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
-			   decrementer, .timer_interrupt, ACK_DEC)
+			   decrementer, timer_interrupt, ACK_DEC)
 
 /* Fixed Interval Timer Interrupt */
 	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
-			   fixed_interval, .unknown_exception, ACK_FIT)
+			   fixed_interval, unknown_exception, ACK_FIT)
 
 /* Watchdog Timer Interrupt */
 	START_EXCEPTION(watchdog);
 	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x9f0)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_BOOKE_WDT
-	bl	.WatchdogException
+	bl	WatchdogException
 #else
-	bl	.unknown_exception
+	bl	unknown_exception
 #endif
 	b	ret_from_crit_except
 
@@ -712,10 +712,10 @@
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0xf20)
 	INTS_DISABLE
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unknown_exception
-	b	.ret_from_except
+	bl	unknown_exception
+	b	ret_from_except
 
 /* Debug exception as a critical interrupt*/
 	START_EXCEPTION(debug_crit);
@@ -774,9 +774,9 @@
 	mr	r4,r14
 	ld	r14,PACA_EXCRIT+EX_R14(r13)
 	ld	r15,PACA_EXCRIT+EX_R15(r13)
-	bl	.save_nvgprs
-	bl	.DebugException
-	b	.ret_from_except
+	bl	save_nvgprs
+	bl	DebugException
+	b	ret_from_except
 
 kernel_dbg_exc:
 	b	.	/* NYI */
@@ -839,9 +839,9 @@
 	mr	r4,r14
 	ld	r14,PACA_EXDBG+EX_R14(r13)
 	ld	r15,PACA_EXDBG+EX_R15(r13)
-	bl	.save_nvgprs
-	bl	.DebugException
-	b	.ret_from_except
+	bl	save_nvgprs
+	bl	DebugException
+	b	ret_from_except
 
 	START_EXCEPTION(perfmon);
 	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
@@ -850,23 +850,23 @@
 	INTS_DISABLE
 	CHECK_NAPPING()
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.performance_monitor_exception
-	b	.ret_from_except_lite
+	bl	performance_monitor_exception
+	b	ret_from_except_lite
 
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
-			   doorbell, .doorbell_exception, ACK_NONE)
+			   doorbell, doorbell_exception, ACK_NONE)
 
 /* Doorbell critical Interrupt */
 	START_EXCEPTION(doorbell_crit);
 	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x2a0)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unknown_exception
+	bl	unknown_exception
 	b	ret_from_crit_except
 
 /*
@@ -878,21 +878,21 @@
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x2c0)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	INTS_RESTORE_HARD
-	bl	.unknown_exception
-	b	.ret_from_except
+	bl	unknown_exception
+	b	ret_from_except
 
 /* Guest Doorbell critical Interrupt */
 	START_EXCEPTION(guest_doorbell_crit);
 	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x2e0)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unknown_exception
+	bl	unknown_exception
 	b	ret_from_crit_except
 
 /* Hypervisor call */
@@ -901,10 +901,10 @@
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x310)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	INTS_RESTORE_HARD
-	bl	.unknown_exception
-	b	.ret_from_except
+	bl	unknown_exception
+	b	ret_from_except
 
 /* Embedded Hypervisor priviledged  */
 	START_EXCEPTION(ehpriv);
@@ -912,10 +912,10 @@
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x320)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	INTS_RESTORE_HARD
-	bl	.unknown_exception
-	b	.ret_from_except
+	bl	unknown_exception
+	b	ret_from_except
 
 /* LRAT Error interrupt */
 	START_EXCEPTION(lrat_error);
@@ -1014,16 +1014,16 @@
 	mr	r5,r15
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
-	bl	.do_page_fault
+	bl	do_page_fault
 	cmpdi	r3,0
 	bne-	1f
-	b	.ret_from_except_lite
-1:	bl	.save_nvgprs
+	b	ret_from_except_lite
+1:	bl	save_nvgprs
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
-	bl	.bad_page_fault
-	b	.ret_from_except
+	bl	bad_page_fault
+	b	ret_from_except
 
 /*
  * Alignment exception doesn't fit entirely in the 0x100 bytes so it
@@ -1035,10 +1035,10 @@
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	INTS_RESTORE_HARD
-	bl	.alignment_exception
-	b	.ret_from_except
+	bl	alignment_exception
+	b	ret_from_except
 
 /*
  * We branch here from entry_64.S for the last stage of the exception
@@ -1172,7 +1172,7 @@
 	std	r12,0(r11)
 	ld	r2,PACATOC(r13)
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.kernel_bad_stack
+	bl	kernel_bad_stack
 	b	1b
 
 /*
@@ -1467,22 +1467,6 @@
 	.globl  a2_tlbinit_after_iprot_flush
 a2_tlbinit_after_iprot_flush:
 
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-	/* Now establish early debug mappings if applicable */
-	/* Restore the MAS0 we used for linear mapping load */
-	mtspr	SPRN_MAS0,r11
-
-	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
-	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
-	mtspr	SPRN_MAS1,r3
-	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
-	mtspr	SPRN_MAS2,r3
-	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
-	mtspr	SPRN_MAS7_MAS3,r3
-	/* re-use the MAS8 value from the linear mapping */
-	tlbwe
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
-
 	PPC_TLBILX(0,0,R0)
 	sync
 	isync
@@ -1521,13 +1505,13 @@
 	 * and always use AS 0, so we just set it up to match our link
 	 * address and never use 0 based addresses.
 	 */
-	bl	.initial_tlb_book3e
+	bl	initial_tlb_book3e
 
 	/* Init global core bits */
-	bl	.init_core_book3e
+	bl	init_core_book3e
 
 	/* Init per-thread bits */
-	bl	.init_thread_book3e
+	bl	init_thread_book3e
 
 	/* Return to common init code */
 	tovirt(r28,r28)
@@ -1548,7 +1532,7 @@
  */
 _GLOBAL(book3e_secondary_core_init_tlb_set)
 	li	r4,1
-	b	.generic_secondary_smp_init
+	b	generic_secondary_smp_init
 
 _GLOBAL(book3e_secondary_core_init)
 	mflr	r28
@@ -1558,18 +1542,18 @@
 	bne	2f
 
 	/* Setup TLB for this core */
-	bl	.initial_tlb_book3e
+	bl	initial_tlb_book3e
 
 	/* We can return from the above running at a different
 	 * address, so recalculate r2 (TOC)
 	 */
-	bl	.relative_toc
+	bl	relative_toc
 
 	/* Init global core bits */
-2:	bl	.init_core_book3e
+2:	bl	init_core_book3e
 
 	/* Init per-thread bits */
-3:	bl	.init_thread_book3e
+3:	bl	init_thread_book3e
 
 	/* Return to common init code at proper virtual address.
 	 *
@@ -1596,14 +1580,14 @@
 	mflr	r28
 	b	3b
 
-_STATIC(init_core_book3e)
+init_core_book3e:
 	/* Establish the interrupt vector base */
 	LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
 	mtspr	SPRN_IVPR,r3
 	sync
 	blr
 
-_STATIC(init_thread_book3e)
+init_thread_book3e:
 	lis	r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
 	mtspr	SPRN_EPCR,r3
 

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3afd391..a7d36b1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -132,12 +132,12 @@
 #endif
 
 	beq	cr1,2f
-	b	.power7_wakeup_noloss
-2:	b	.power7_wakeup_loss
+	b	power7_wakeup_noloss
+2:	b	power7_wakeup_loss
 
 	/* Fast Sleep wakeup on PowerNV */
 8:	GET_PACA(r13)
-	b 	.power7_wakeup_tb_loss
+	b 	power7_wakeup_tb_loss
 
 9:
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
@@ -211,16 +211,16 @@
 #endif /* __DISABLED__ */
 	mfspr	r12,SPRN_SRR1
 #ifndef CONFIG_RELOCATABLE
-	b	.slb_miss_realmode
+	b	slb_miss_realmode
 #else
 	/*
-	 * We can't just use a direct branch to .slb_miss_realmode
+	 * We can't just use a direct branch to slb_miss_realmode
 	 * because the distance from here to there depends on where
 	 * the kernel ends up being put.
 	 */
 	mfctr	r11
 	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10, .slb_miss_realmode)
+	LOAD_HANDLER(r10, slb_miss_realmode)
 	mtctr	r10
 	bctr
 #endif
@@ -243,11 +243,11 @@
 #endif /* __DISABLED__ */
 	mfspr	r12,SPRN_SRR1
 #ifndef CONFIG_RELOCATABLE
-	b	.slb_miss_realmode
+	b	slb_miss_realmode
 #else
 	mfctr	r11
 	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10, .slb_miss_realmode)
+	LOAD_HANDLER(r10, slb_miss_realmode)
 	mtctr	r10
 	bctr
 #endif
@@ -439,9 +439,9 @@
 	 * R9		= CR
 	 * Original R9 to R13 is saved on PACA_EXMC
 	 *
-	 * Switch to mc_emergency stack and handle re-entrancy (though we
-	 * currently don't test for overflow). Save MCE registers srr1,
-	 * srr0, dar and dsisr and then set ME=1
+	 * Switch to mc_emergency stack and handle re-entrancy (we limit
+	 * the nested MCE upto level 4 to avoid stack overflow).
+	 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
 	 *
 	 * We use paca->in_mce to check whether this is the first entry or
 	 * nested machine check. We increment paca->in_mce to track nested
@@ -464,6 +464,9 @@
 0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
 	addi	r10,r10,1		/* increment paca->in_mce */
 	sth	r10,PACA_IN_MCE(r13)
+	/* Limit nested MCE to level 4 to avoid stack overflow */
+	cmpwi	r10,4
+	bgt	2f			/* Check if we hit limit of 4 */
 	std	r11,GPR1(r1)		/* Save r1 on the stack. */
 	std	r11,0(r1)		/* make stack chain pointer */
 	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
@@ -482,10 +485,23 @@
 	ori	r11,r11,MSR_RI		/* turn on RI bit */
 	ld	r12,PACAKBASE(r13)	/* get high part of &label */
 	LOAD_HANDLER(r12, machine_check_handle_early)
-	mtspr	SPRN_SRR0,r12
+1:	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r11
 	rfid
 	b	.	/* prevent speculative execution */
+2:
+	/* Stack overflow. Stay on emergency stack and panic.
+	 * Keep the ME bit off while panic-ing, so that if we hit
+	 * another machine check we checkstop.
+	 */
+	addi	r1,r1,INT_FRAME_SIZE	/* go back to previous stack frame */
+	ld	r11,PACAKMSR(r13)
+	ld	r12,PACAKBASE(r13)
+	LOAD_HANDLER(r12, unrecover_mce)
+	li	r10,MSR_ME
+	andc	r11,r11,r10		/* Turn off MSR_ME */
+	b	1b
+	b	.	/* prevent speculative execution */
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
 machine_check_pSeries:
@@ -524,7 +540,7 @@
 	std	r12,PACA_EXSLB+EX_R12(r13)
 	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
+	EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD)
 
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
 	KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -769,38 +785,38 @@
 
 /*** Common interrupt handlers ***/
 
-	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
+	STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
 
 	STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
-	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
-	STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
+	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
+	STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
 #ifdef CONFIG_PPC_DOORBELL
-	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .doorbell_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
 #else
-	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .unknown_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
 #endif
-	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
-	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
-	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
-	STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt)
-	STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
+	STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
+	STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
+	STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
+	STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception)
 #ifdef CONFIG_PPC_DOORBELL
-	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
 #else
-	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .unknown_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
 #endif
-	STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
-	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
-	STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
+	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
+	STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
 #ifdef CONFIG_ALTIVEC
-	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
+	STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
 #else
-	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
+	STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
 #endif
 #ifdef CONFIG_CBE_RAS
-	STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
-	STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
-	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
+	STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
+	STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
+	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
 #endif /* CONFIG_CBE_RAS */
 
 	/*
@@ -829,16 +845,16 @@
 	mfspr	r3,SPRN_DAR
 	mfspr	r12,SPRN_SRR1
 #ifndef CONFIG_RELOCATABLE
-	b	.slb_miss_realmode
+	b	slb_miss_realmode
 #else
 	/*
-	 * We can't just use a direct branch to .slb_miss_realmode
+	 * We can't just use a direct branch to slb_miss_realmode
 	 * because the distance from here to there depends on where
 	 * the kernel ends up being put.
 	 */
 	mfctr	r11
 	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10, .slb_miss_realmode)
+	LOAD_HANDLER(r10, slb_miss_realmode)
 	mtctr	r10
 	bctr
 #endif
@@ -854,11 +870,11 @@
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 	mfspr	r12,SPRN_SRR1
 #ifndef CONFIG_RELOCATABLE
-	b	.slb_miss_realmode
+	b	slb_miss_realmode
 #else
 	mfctr	r11
 	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10, .slb_miss_realmode)
+	LOAD_HANDLER(r10, slb_miss_realmode)
 	mtctr	r10
 	bctr
 #endif
@@ -966,7 +982,7 @@
 	b	system_call_common
 
 ppc64_runlatch_on_trampoline:
-	b	.__ppc64_runlatch_on
+	b	__ppc64_runlatch_on
 
 /*
  * Here we have detected that the kernel stack pointer is bad.
@@ -1025,7 +1041,7 @@
 	std	r12,RESULT(r1)
 	std	r11,STACK_FRAME_OVERHEAD-16(r1)
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.kernel_bad_stack
+	bl	kernel_bad_stack
 	b	1b
 
 /*
@@ -1046,7 +1062,7 @@
 	ld	r3,PACA_EXGEN+EX_DAR(r13)
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	li	r5,0x300
-	b	.do_hash_page		/* Try to handle as hpte fault */
+	b	do_hash_page		/* Try to handle as hpte fault */
 
 	.align  7
 	.globl  h_data_storage_common
@@ -1056,11 +1072,11 @@
 	mfspr   r10,SPRN_HDSISR
 	stw     r10,PACA_EXGEN+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
-	bl      .save_nvgprs
+	bl      save_nvgprs
 	DISABLE_INTS
 	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      .unknown_exception
-	b       .ret_from_except
+	bl      unknown_exception
+	b       ret_from_except
 
 	.align	7
 	.globl instruction_access_common
@@ -1071,9 +1087,9 @@
 	ld	r3,_NIP(r1)
 	andis.	r4,r12,0x5820
 	li	r5,0x400
-	b	.do_hash_page		/* Try to handle as hpte fault */
+	b	do_hash_page		/* Try to handle as hpte fault */
 
-	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
 
 /*
  * Here is the common SLB miss user that is used when going to virtual
@@ -1088,7 +1104,7 @@
 	stw	r9,PACA_EXGEN+EX_CCR(r13)
 	std	r10,PACA_EXGEN+EX_LR(r13)
 	std	r11,PACA_EXGEN+EX_SRR0(r13)
-	bl	.slb_allocate_user
+	bl	slb_allocate_user
 
 	ld	r10,PACA_EXGEN+EX_LR(r13)
 	ld	r3,PACA_EXGEN+EX_R3(r13)
@@ -1131,9 +1147,9 @@
 unrecov_user_slb:
 	EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
 	DISABLE_INTS
-	bl	.save_nvgprs
+	bl	save_nvgprs
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
+	bl	unrecoverable_exception
 	b	1b
 
 #endif /* __DISABLED__ */
@@ -1158,10 +1174,10 @@
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.machine_check_exception
-	b	.ret_from_except
+	bl	machine_check_exception
+	b	ret_from_except
 
 	.align	7
 	.globl alignment_common
@@ -1175,31 +1191,31 @@
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.alignment_exception
-	b	.ret_from_except
+	bl	alignment_exception
+	b	ret_from_except
 
 	.align	7
 	.globl program_check_common
 program_check_common:
 	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.program_check_exception
-	b	.ret_from_except
+	bl	program_check_exception
+	b	ret_from_except
 
 	.align	7
 	.globl fp_unavailable_common
 fp_unavailable_common:
 	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
 	bne	1f			/* if from user, just load it up */
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.kernel_fp_unavailable_exception
+	bl	kernel_fp_unavailable_exception
 	BUG_OPCODE
 1:
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1211,15 +1227,15 @@
 	bne-	2f
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
-	bl	.load_up_fpu
+	bl	load_up_fpu
 	b	fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.fp_unavailable_tm
-	b	.ret_from_except
+	bl	fp_unavailable_tm
+	b	ret_from_except
 #endif
 	.align	7
 	.globl altivec_unavailable_common
@@ -1237,24 +1253,24 @@
 	bne-	2f
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
-	bl	.load_up_altivec
+	bl	load_up_altivec
 	b	fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.altivec_unavailable_tm
-	b	.ret_from_except
+	bl	altivec_unavailable_tm
+	b	ret_from_except
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.altivec_unavailable_exception
-	b	.ret_from_except
+	bl	altivec_unavailable_exception
+	b	ret_from_except
 
 	.align	7
 	.globl vsx_unavailable_common
@@ -1272,26 +1288,26 @@
 	bne-	2f
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
-	b	.load_up_vsx
+	b	load_up_vsx
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.vsx_unavailable_tm
-	b	.ret_from_except
+	bl	vsx_unavailable_tm
+	b	ret_from_except
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	DISABLE_INTS
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.vsx_unavailable_exception
-	b	.ret_from_except
+	bl	vsx_unavailable_exception
+	b	ret_from_except
 
-	STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
-	STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
+	STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
+	STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
 
 	.align	7
 	.globl	__end_handlers
@@ -1386,9 +1402,10 @@
 machine_check_handle_early:
 	std	r0,GPR0(r1)	/* Save r0 */
 	EXCEPTION_PROLOG_COMMON_3(0x200)
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.machine_check_early
+	bl	machine_check_early
+	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
@@ -1408,11 +1425,11 @@
 	/* Supervisor state loss */
 	li	r0,1
 	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	.machine_check_queue_event
+3:	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
-	b	.power7_enter_nap_mode
+	b	power7_enter_nap_mode
 4:
 #endif
 	/*
@@ -1443,16 +1460,38 @@
 	 */
 	andi.	r11,r12,MSR_RI
 	bne	2f
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
+1:	mfspr	r11,SPRN_SRR0
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10,unrecover_mce)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
+	/*
+	 * We are going down. But there are chances that we might get hit by
+	 * another MCE during panic path and we may run into unstable state
+	 * with no way out. Hence, turn ME bit off while going down, so that
+	 * when another MCE is hit during panic path, system will checkstop
+	 * and hypervisor will get restarted cleanly by SP.
+	 */
+	li	r3,MSR_ME
+	andc	r10,r10,r3		/* Turn off MSR_ME */
+	mtspr	SPRN_SRR1,r10
+	rfid
+	b	.
 2:
 	/*
+	 * Check if we have successfully handled/recovered from error, if not
+	 * then stay on emergency stack and panic.
+	 */
+	ld	r3,RESULT(r1)	/* Load result */
+	cmpdi	r3,0		/* see if we handled MCE successfully */
+
+	beq	1b		/* if !handled then panic */
+	/*
 	 * Return from MC interrupt.
 	 * Queue up the MCE event so that we can log it later, while
 	 * returning from kernel or opal call.
 	 */
-	bl	.machine_check_queue_event
+	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
 	rfid
 9:
@@ -1460,6 +1499,17 @@
 	MACHINE_CHECK_HANDLER_WINDUP
 	b	machine_check_pSeries
 
+unrecover_mce:
+	/* Invoke machine_check_exception to print MCE event and panic. */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	machine_check_exception
+	/*
+	 * We will not reach here. Even if we did, there is no way out. Call
+	 * unrecoverable_exception and die.
+	 */
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	unrecoverable_exception
+	b	1b
 /*
  * r13 points to the PACA, r9 contains the saved CR,
  * r12 contain the saved SRR1, SRR0 is still ready for return
@@ -1468,7 +1518,7 @@
  * r3 is saved in paca->slb_r3
  * We assume we aren't going to take any exceptions during this procedure.
  */
-_GLOBAL(slb_miss_realmode)
+slb_miss_realmode:
 	mflr	r10
 #ifdef CONFIG_RELOCATABLE
 	mtctr	r11
@@ -1477,7 +1527,7 @@
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
 
-	bl	.slb_allocate_realmode
+	bl	slb_allocate_realmode
 
 	/* All done -- return from exception. */
 
@@ -1517,9 +1567,9 @@
 unrecov_slb:
 	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
 	DISABLE_INTS
-	bl	.save_nvgprs
+	bl	save_nvgprs
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
+	bl	unrecoverable_exception
 	b	1b
 
 
@@ -1536,7 +1586,7 @@
  * Hash table stuff
  */
 	.align	7
-_STATIC(do_hash_page)
+do_hash_page:
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
 
@@ -1573,7 +1623,7 @@
 	 *
 	 * at return r3 = 0 for success, 1 for page fault, negative for error
 	 */
-	bl	.hash_page		/* build HPTE if possible */
+	bl	hash_page		/* build HPTE if possible */
 	cmpdi	r3,0			/* see if hash_page succeeded */
 
 	/* Success */
@@ -1587,35 +1637,35 @@
 11:	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_page_fault
+	bl	do_page_fault
 	cmpdi	r3,0
 	beq+	12f
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	lwz	r4,_DAR(r1)
-	bl	.bad_page_fault
-	b	.ret_from_except
+	bl	bad_page_fault
+	b	ret_from_except
 
 /* We have a data breakpoint exception - handle it */
 handle_dabr_fault:
-	bl	.save_nvgprs
+	bl	save_nvgprs
 	ld      r4,_DAR(r1)
 	ld      r5,_DSISR(r1)
 	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      .do_break
-12:	b       .ret_from_except_lite
+	bl      do_break
+12:	b       ret_from_except_lite
 
 
 /* We have a page fault that hash_page could handle but HV refused
  * the PTE insertion
  */
-13:	bl	.save_nvgprs
+13:	bl	save_nvgprs
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
-	bl	.low_hash_fault
-	b	.ret_from_except
+	bl	low_hash_fault
+	b	ret_from_except
 
 /*
  * We come here as a result of a DSI at a point where we don't want
@@ -1624,16 +1674,16 @@
  * were soft-disabled.  We want to invoke the exception handler for
  * the access, or panic if there isn't a handler.
  */
-77:	bl	.save_nvgprs
+77:	bl	save_nvgprs
 	mr	r4,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	li	r5,SIGSEGV
-	bl	.bad_page_fault
-	b	.ret_from_except
+	bl	bad_page_fault
+	b	ret_from_except
 
 	/* here we have a segment miss */
 do_ste_alloc:
-	bl	.ste_allocate		/* try to insert stab entry */
+	bl	ste_allocate		/* try to insert stab entry */
 	cmpdi	r3,0
 	bne-	handle_page_fault
 	b	fast_exception_return
@@ -1646,7 +1696,7 @@
  * We assume (DAR >> 60) == 0xc.
  */
 	.align	7
-_GLOBAL(do_stab_bolted)
+do_stab_bolted:
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
 	mfspr	r11,SPRN_DAR			/* ea */

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 7213d93..742694c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c

@@ -69,7 +69,7 @@
 	 */
 	token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
 	if (!token)
-		return 0;
+		return 1;
 
 	fw_dump.fadump_supported = 1;
 	fw_dump.ibm_configure_kernel_dump = *token;
@@ -92,7 +92,7 @@
 					&size);
 
 	if (!sections)
-		return 0;
+		return 1;
 
 	num_sections = size / (3 * sizeof(u32));
 
@@ -110,6 +110,7 @@
 			break;
 		}
 	}
+
 	return 1;
 }
 
@@ -645,7 +646,7 @@
 		}
 		/* Lower 4 bytes of reg_value contains logical cpu id */
 		cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
-		if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+		if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
 			SKIP_TO_NEXT_CPU(reg_entry);
 			continue;
 		}
@@ -662,9 +663,11 @@
 	}
 	fadump_final_note(note_buf);
 
-	pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+	if (fdh) {
+		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
 							fdh->elfcorehdr_addr);
-	fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+		fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+	}
 	return 0;
 
 error_out:

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 6a014c7..f202d07 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c

@@ -105,11 +105,9 @@
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int op;
-	unsigned int jmp[5];
 	unsigned long ptr;
 	unsigned long ip = rec->ip;
-	unsigned long tramp;
-	int offset;
+	void *tramp;
 
 	/* read where this goes */
 	if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
@@ -122,96 +120,41 @@
 	}
 
 	/* lets find where the pointer goes */
-	tramp = find_bl_target(ip, op);
+	tramp = (void *)find_bl_target(ip, op);
 
-	/*
-	 * On PPC64 the trampoline looks like:
-	 * 0x3d, 0x82, 0x00, 0x00,    addis   r12,r2, <high>
-	 * 0x39, 0x8c, 0x00, 0x00,    addi    r12,r12, <low>
-	 *   Where the bytes 2,3,6 and 7 make up the 32bit offset
-	 *   to the TOC that holds the pointer.
-	 *   to jump to.
-	 * 0xf8, 0x41, 0x00, 0x28,    std     r2,40(r1)
-	 * 0xe9, 0x6c, 0x00, 0x20,    ld      r11,32(r12)
-	 *   The actually address is 32 bytes from the offset
-	 *   into the TOC.
-	 * 0xe8, 0x4c, 0x00, 0x28,    ld      r2,40(r12)
-	 */
+	pr_devel("ip:%lx jumps to %p", ip, tramp);
 
-	pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
-
-	/* Find where the trampoline jumps to */
-	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
-		printk(KERN_ERR "Failed to read %lx\n", tramp);
-		return -EFAULT;
-	}
-
-	pr_devel(" %08x %08x", jmp[0], jmp[1]);
-
-	/* verify that this is what we expect it to be */
-	if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
-	    ((jmp[1] & 0xffff0000) != 0x398c0000) ||
-	    (jmp[2] != 0xf8410028) ||
-	    (jmp[3] != 0xe96c0020) ||
-	    (jmp[4] != 0xe84c0028)) {
+	if (!is_module_trampoline(tramp)) {
 		printk(KERN_ERR "Not a trampoline\n");
 		return -EINVAL;
 	}
 
-	/* The bottom half is signed extended */
-	offset = ((unsigned)((unsigned short)jmp[0]) << 16) +
-		(int)((short)jmp[1]);
-
-	pr_devel(" %x ", offset);
-
-	/* get the address this jumps too */
-	tramp = mod->arch.toc + offset + 32;
-	pr_devel("toc: %lx", tramp);
-
-	if (probe_kernel_read(jmp, (void *)tramp, 8)) {
-		printk(KERN_ERR "Failed to read %lx\n", tramp);
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		printk(KERN_ERR "Failed to get trampoline target\n");
 		return -EFAULT;
 	}
 
-	pr_devel(" %08x %08x\n", jmp[0], jmp[1]);
-
-#ifdef __LITTLE_ENDIAN__
-	ptr = ((unsigned long)jmp[1] << 32) + jmp[0];
-#else
-	ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
-#endif
+	pr_devel("trampoline target %lx", ptr);
 
 	/* This should match what was called */
 	if (ptr != ppc_function_entry((void *)addr)) {
-		printk(KERN_ERR "addr does not match %lx\n", ptr);
+		printk(KERN_ERR "addr %lx does not match expected %lx\n",
+			ptr, ppc_function_entry((void *)addr));
 		return -EINVAL;
 	}
 
 	/*
-	 * We want to nop the line, but the next line is
-	 *  0xe8, 0x41, 0x00, 0x28   ld r2,40(r1)
-	 * This needs to be turned to a nop too.
-	 */
-	if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
-		return -EFAULT;
-
-	if (op != 0xe8410028) {
-		printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
-		return -EINVAL;
-	}
-
-	/*
-	 * Milton Miller pointed out that we can not blindly do nops.
-	 * If a task was preempted when calling a trace function,
-	 * the nops will remove the way to restore the TOC in r2
-	 * and the r2 TOC will get corrupted.
-	 */
-
-	/*
-	 * Replace:
-	 *   bl <tramp>  <==== will be replaced with "b 1f"
-	 *   ld r2,40(r1)
-	 *  1:
+	 * Our original call site looks like:
+	 *
+	 * bl <tramp>
+	 * ld r2,XX(r1)
+	 *
+	 * Milton Miller pointed out that we can not simply nop the branch.
+	 * If a task was preempted when calling a trace function, the nops
+	 * will remove the way to restore the TOC in r2 and the r2 TOC will
+	 * get corrupted.
+	 *
+	 * Use a b +8 to jump over the load.
 	 */
 	op = 0x48000008;	/* b +8 */
 
@@ -349,19 +292,24 @@
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int op[2];
-	unsigned long ip = rec->ip;
+	void *ip = (void *)rec->ip;
 
 	/* read where this goes */
-	if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
+	if (probe_kernel_read(op, ip, sizeof(op)))
 		return -EFAULT;
 
 	/*
-	 * It should be pointing to two nops or
-	 *  b +8; ld r2,40(r1)
+	 * We expect to see:
+	 *
+	 * b +8
+	 * ld r2,XX(r1)
+	 *
+	 * The load offset is different depending on the ABI. For simplicity
+	 * just mask it out when doing the compare.
 	 */
-	if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
-	    ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) {
-		printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
+	if ((op[0] != 0x48000008) || ((op[1] & 0xffff00000) != 0xe8410000)) {
+		printk(KERN_ERR "Unexpected call sequence: %x %x\n",
+			op[0], op[1]);
 		return -EINVAL;
 	}
 
@@ -371,23 +319,16 @@
 		return -EINVAL;
 	}
 
-	/* create the branch to the trampoline */
-	op[0] = create_branch((unsigned int *)ip,
-			      rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
-	if (!op[0]) {
-		printk(KERN_ERR "REL24 out of range!\n");
+	/* Ensure branch is within 24 bits */
+	if (create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+		printk(KERN_ERR "Branch out of range");
 		return -EINVAL;
 	}
 
-	/* ld r2,40(r1) */
-	op[1] = 0xe8410028;
-
-	pr_devel("write to %lx\n", rec->ip);
-
-	if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
-		return -EPERM;
-
-	flush_icache_range(ip, ip + 8);
+	if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+		printk(KERN_ERR "REL24 out of range!\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 67ee0d6..7d7d863 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S

@@ -930,25 +930,6 @@
 	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
 	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
 
-#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
-
-	/* Load a TLB entry for the UART, so that ppc4xx_progress() can use
-	 * the UARTs nice and early.  We use a 4k real==virtual mapping. */
-
-	lis	r3,SERIAL_DEBUG_IO_BASE@h
-	ori	r3,r3,SERIAL_DEBUG_IO_BASE@l
-	mr	r4,r3
-	clrrwi	r4,r4,12
-	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
-
-	clrrwi	r3,r3,12
-	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
-	li	r0,0			/* TLB slot 0 */
-	tlbwe	r4,r0,TLB_DATA
-	tlbwe	r3,r0,TLB_TAG
-#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
-
 	isync
 
 	/* Establish the exception vector base

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index b7363bd..a95145d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S

@@ -70,16 +70,15 @@
 	/* NOP this out unconditionally */
 BEGIN_FTR_SECTION
 	FIXUP_ENDIAN
-	b	.__start_initialization_multiplatform
+	b	__start_initialization_multiplatform
 END_FTR_SECTION(0, 1)
 
 	/* Catch branch to 0 in real mode */
 	trap
 
-	/* Secondary processors spin on this value until it becomes nonzero.
-	 * When it does it contains the real address of the descriptor
-	 * of the function that the cpu should jump to to continue
-	 * initialization.
+	/* Secondary processors spin on this value until it becomes non-zero.
+	 * When non-zero, it contains the real address of the function the cpu
+	 * should jump to.
 	 */
 	.balign 8
 	.globl  __secondary_hold_spinloop
@@ -140,16 +139,15 @@
 	tovirt(r26,r26)
 #endif
 	/* All secondary cpus wait here until told to start. */
-100:	ld	r4,__secondary_hold_spinloop-_stext(r26)
-	cmpdi	0,r4,0
+100:	ld	r12,__secondary_hold_spinloop-_stext(r26)
+	cmpdi	0,r12,0
 	beq	100b
 
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
 #ifdef CONFIG_PPC_BOOK3E
-	tovirt(r4,r4)
+	tovirt(r12,r12)
 #endif
-	ld	r4,0(r4)		/* deref function descriptor */
-	mtctr	r4
+	mtctr	r12
 	mr	r3,r24
 	/*
 	 * it may be the case that other platforms have r4 right to
@@ -186,16 +184,16 @@
 	mr	r24,r3
 
 	/* turn on 64-bit mode */
-	bl	.enable_64b_mode
+	bl	enable_64b_mode
 
 	/* get a valid TOC pointer, wherever we're mapped at */
-	bl	.relative_toc
+	bl	relative_toc
 	tovirt(r2,r2)
 
 #ifdef CONFIG_PPC_BOOK3E
 	/* Book3E initialization */
 	mr	r3,r24
-	bl	.book3e_secondary_thread_init
+	bl	book3e_secondary_thread_init
 #endif
 	b	generic_secondary_common_init
 
@@ -214,17 +212,17 @@
 	mr	r25,r4
 
 	/* turn on 64-bit mode */
-	bl	.enable_64b_mode
+	bl	enable_64b_mode
 
 	/* get a valid TOC pointer, wherever we're mapped at */
-	bl	.relative_toc
+	bl	relative_toc
 	tovirt(r2,r2)
 
 #ifdef CONFIG_PPC_BOOK3E
 	/* Book3E initialization */
 	mr	r3,r24
 	mr	r4,r25
-	bl	.book3e_secondary_core_init
+	bl	book3e_secondary_core_init
 #endif
 
 generic_secondary_common_init:
@@ -236,7 +234,7 @@
 	ld	r13,0(r13)		/* Get base vaddr of paca array	 */
 #ifndef CONFIG_SMP
 	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */
-	b	.kexec_wait		/* wait for next kernel if !SMP	 */
+	b	kexec_wait		/* wait for next kernel if !SMP	 */
 #else
 	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
 	lwz	r7,0(r7)		/* also the max paca allocated 	 */
@@ -250,7 +248,7 @@
 	blt	1b
 
 	mr	r3,r24			/* not found, copy phys to r3	 */
-	b	.kexec_wait		/* next kernel might do better	 */
+	b	kexec_wait		/* next kernel might do better	 */
 
 2:	SET_PACA(r13)
 #ifdef CONFIG_PPC_BOOK3E
@@ -264,11 +262,13 @@
 	/* See if we need to call a cpu state restore handler */
 	LOAD_REG_ADDR(r23, cur_cpu_spec)
 	ld	r23,0(r23)
-	ld	r23,CPU_SPEC_RESTORE(r23)
-	cmpdi	0,r23,0
+	ld	r12,CPU_SPEC_RESTORE(r23)
+	cmpdi	0,r12,0
 	beq	3f
-	ld	r23,0(r23)
-	mtctr	r23
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+	ld	r12,0(r12)
+#endif
+	mtctr	r12
 	bctrl
 
 3:	LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
@@ -299,7 +299,7 @@
  * Assumes we're mapped EA == RA if the MMU is on.
  */
 #ifdef CONFIG_PPC_BOOK3S
-_STATIC(__mmu_off)
+__mmu_off:
 	mfmsr	r3
 	andi.	r0,r3,MSR_IR|MSR_DR
 	beqlr
@@ -324,12 +324,12 @@
  *                 DT block, r4 is a physical pointer to the kernel itself
  *
  */
-_GLOBAL(__start_initialization_multiplatform)
+__start_initialization_multiplatform:
 	/* Make sure we are running in 64 bits mode */
-	bl	.enable_64b_mode
+	bl	enable_64b_mode
 
 	/* Get TOC pointer (current runtime address) */
-	bl	.relative_toc
+	bl	relative_toc
 
 	/* find out where we are now */
 	bcl	20,31,$+4
@@ -342,7 +342,7 @@
 	 */
 	cmpldi	cr0,r5,0
 	beq	1f
-	b	.__boot_from_prom		/* yes -> prom */
+	b	__boot_from_prom		/* yes -> prom */
 1:
 	/* Save parameters */
 	mr	r31,r3
@@ -354,8 +354,8 @@
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E
-	bl	.start_initialization_book3e
-	b	.__after_prom_start
+	bl	start_initialization_book3e
+	b	__after_prom_start
 #else
 	/* Setup some critical 970 SPRs before switching MMU off */
 	mfspr	r0,SPRN_PVR
@@ -368,15 +368,15 @@
 	beq	1f
 	cmpwi	r0,0x45		/* 970GX */
 	bne	2f
-1:	bl	.__cpu_preinit_ppc970
+1:	bl	__cpu_preinit_ppc970
 2:
 
 	/* Switch off MMU if not already off */
-	bl	.__mmu_off
-	b	.__after_prom_start
+	bl	__mmu_off
+	b	__after_prom_start
 #endif /* CONFIG_PPC_BOOK3E */
 
-_INIT_STATIC(__boot_from_prom)
+__boot_from_prom:
 #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
 	/* Save parameters */
 	mr	r31,r3
@@ -395,7 +395,7 @@
 #ifdef CONFIG_RELOCATABLE
 	/* Relocate code for where we are now */
 	mr	r3,r26
-	bl	.relocate
+	bl	relocate
 #endif
 
 	/* Restore parameters */
@@ -407,14 +407,14 @@
 
 	/* Do all of the interaction with OF client interface */
 	mr	r8,r26
-	bl	.prom_init
+	bl	prom_init
 #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
 
 	/* We never return. We also hit that trap if trying to boot
 	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
 	trap
 
-_STATIC(__after_prom_start)
+__after_prom_start:
 #ifdef CONFIG_RELOCATABLE
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
@@ -424,7 +424,7 @@
 	bne	1f
 	add	r25,r25,r26
 1:	mr	r3,r25
-	bl	.relocate
+	bl	relocate
 #endif
 
 /*
@@ -464,12 +464,12 @@
 	lis	r5,(copy_to_here - _stext)@ha
 	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 
-	bl	.copy_and_flush		/* copy the first n bytes	 */
+	bl	copy_and_flush		/* copy the first n bytes	 */
 					/* this includes the code being	 */
 					/* executed here.		 */
 	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */
-	addi	r8,r8,(4f - _stext)@l	/* that we just made */
-	mtctr	r8
+	addi	r12,r8,(4f - _stext)@l	/* that we just made */
+	mtctr	r12
 	bctr
 
 .balign 8
@@ -478,9 +478,9 @@
 4:	/* Now copy the rest of the kernel up to _end */
 	addis	r5,r26,(p_end - _stext)@ha
 	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
-5:	bl	.copy_and_flush		/* copy the rest */
+5:	bl	copy_and_flush		/* copy the rest */
 
-9:	b	.start_here_multiplatform
+9:	b	start_here_multiplatform
 
 /*
  * Copy routine used to copy the kernel to start at physical address 0
@@ -544,7 +544,7 @@
 	
 _GLOBAL(pmac_secondary_start)
 	/* turn on 64-bit mode */
-	bl	.enable_64b_mode
+	bl	enable_64b_mode
 
 	li	r0,0
 	mfspr	r3,SPRN_HID4
@@ -556,11 +556,11 @@
 	slbia
 
 	/* get TOC pointer (real address) */
-	bl	.relative_toc
+	bl	relative_toc
 	tovirt(r2,r2)
 
 	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_ppc970
+	bl	__restore_cpu_ppc970
 
 	/* pSeries do that early though I don't think we really need it */
 	mfmsr	r3
@@ -619,7 +619,7 @@
 	std	r14,PACAKSAVE(r13)
 
 	/* Do early setup for that CPU (stab, slb, hash table pointer) */
-	bl	.early_setup_secondary
+	bl	early_setup_secondary
 
 	/*
 	 * setup the new stack pointer, but *don't* use this until
@@ -639,7 +639,7 @@
 	stb	r0,PACAIRQHAPPENED(r13)
 
 	/* enable MMU and jump to start_secondary */
-	LOAD_REG_ADDR(r3, .start_secondary_prolog)
+	LOAD_REG_ADDR(r3, start_secondary_prolog)
 	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
 
 	mtspr	SPRN_SRR0,r3
@@ -652,11 +652,11 @@
  * zero the stack back-chain pointer and get the TOC virtual address
  * before going into C code.
  */
-_GLOBAL(start_secondary_prolog)
+start_secondary_prolog:
 	ld	r2,PACATOC(r13)
 	li	r3,0
 	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	.start_secondary
+	bl	start_secondary
 	b	.
 /*
  * Reset stack pointer and call start_secondary
@@ -667,14 +667,14 @@
 	ld	r1,PACAKSAVE(r13)	/* Reload kernel stack pointer */
 	li	r3,0
 	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	.start_secondary
+	bl	start_secondary
 	b	.
 #endif
 
 /*
  * This subroutine clobbers r11 and r12
  */
-_GLOBAL(enable_64b_mode)
+enable_64b_mode:
 	mfmsr	r11			/* grab the current MSR */
 #ifdef CONFIG_PPC_BOOK3E
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
@@ -715,9 +715,9 @@
 /*
  * This is where the main kernel code starts.
  */
-_INIT_STATIC(start_here_multiplatform)
+start_here_multiplatform:
 	/* set up the TOC */
-	bl      .relative_toc
+	bl      relative_toc
 	tovirt(r2,r2)
 
 	/* Clear out the BSS. It may have been done in prom_init,
@@ -776,9 +776,9 @@
 
 	/* Restore parameters passed from prom_init/kexec */
 	mr	r3,r31
-	bl	.early_setup		/* also sets r13 and SPRG_PACA */
+	bl	early_setup		/* also sets r13 and SPRG_PACA */
 
-	LOAD_REG_ADDR(r3, .start_here_common)
+	LOAD_REG_ADDR(r3, start_here_common)
 	ld	r4,PACAKMSR(r13)
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
@@ -786,7 +786,8 @@
 	b	.	/* prevent speculative execution */
 	
 	/* This is where all platforms converge execution */
-_INIT_GLOBAL(start_here_common)
+
+start_here_common:
 	/* relocation is on at this point */
 	std	r1,PACAKSAVE(r13)
 
@@ -794,7 +795,7 @@
 	ld	r2,PACATOC(r13)
 
 	/* Do more system initializations in virtual mode */
-	bl	.setup_system
+	bl	setup_system
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
@@ -805,7 +806,7 @@
 	stb	r0,PACAIRQHAPPENED(r13)
 
 	/* Generic kernel entry */
-	bl	.start_kernel
+	bl	start_kernel
 
 	/* Not reached */
 	BUG_OPCODE

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index b0a1792..0bb5918 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c

@@ -72,7 +72,7 @@
 	 * If so, DABR will be populated in single_step_dabr_instruction().
 	 */
 	if (current->thread.last_hit_ubp != bp)
-		set_breakpoint(info);
+		__set_breakpoint(info);
 
 	return 0;
 }
@@ -198,7 +198,7 @@
 
 	info = counter_arch_bp(tsk->thread.last_hit_ubp);
 	regs->msr &= ~MSR_SE;
-	set_breakpoint(info);
+	__set_breakpoint(info);
 	tsk->thread.last_hit_ubp = NULL;
 }
 
@@ -284,7 +284,7 @@
 	if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
 		perf_bp_event(bp, regs);
 
-	set_breakpoint(info);
+	__set_breakpoint(info);
 out:
 	rcu_read_unlock();
 	return rc;
@@ -316,7 +316,7 @@
 	if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
 		perf_bp_event(bp, regs);
 
-	set_breakpoint(info);
+	__set_breakpoint(info);
 	current->thread.last_hit_ubp = NULL;
 
 	/*

diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index bfb73cc..48c21ac 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S

@@ -43,7 +43,7 @@
 	 */
 #ifdef CONFIG_TRACE_IRQFLAGS
 	stdu    r1,-128(r1)
-	bl	.trace_hardirqs_on
+	bl	trace_hardirqs_on
 	addi    r1,r1,128
 #endif
 	li	r0,1

diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index e3edaa1..f57a193 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S

@@ -46,7 +46,7 @@
 	mflr	r0
 	std	r0,16(r1)
 	stdu    r1,-128(r1)
-	bl	.trace_hardirqs_on
+	bl	trace_hardirqs_on
 	addi    r1,r1,128
 	ld	r0,16(r1)
 	mtlr	r0

diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index c3ab869..2480256 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S

@@ -39,6 +39,10 @@
  * Pass requested state in r3:
  * 	0 - nap
  * 	1 - sleep
+ *
+ * To check IRQ_HAPPENED in r4
+ * 	0 - don't check
+ * 	1 - check
  */
 _GLOBAL(power7_powersave_common)
 	/* Use r3 to pass state nap/sleep/winkle */
@@ -58,7 +62,7 @@
 	/* Make sure FPU, VSX etc... are flushed as we may lose
 	 * state when going to nap mode
 	 */
-	bl	.discard_lazy_cpu_state
+	bl	discard_lazy_cpu_state
 #endif /* CONFIG_SMP */
 
 	/* Hard disable interrupts */
@@ -71,6 +75,8 @@
 	lbz	r0,PACAIRQHAPPENED(r13)
 	cmpwi	cr0,r0,0
 	beq	1f
+	cmpwi	cr0,r4,0
+	beq	1f
 	addi	r1,r1,INT_FRAME_SIZE
 	ld	r0,16(r1)
 	mtlr	r0
@@ -114,15 +120,18 @@
 	lwz	r4,ADDROFF(powersave_nap)(r3)
 	cmpwi	0,r4,0
 	beqlr
+	li	r3, 1
 	/* fall through */
 
 _GLOBAL(power7_nap)
+	mr	r4,r3
 	li	r3,0
 	b	power7_powersave_common
 	/* No return */
 
 _GLOBAL(power7_sleep)
 	li	r3,1
+	li	r4,0
 	b	power7_powersave_common
 	/* No return */
 
@@ -168,7 +177,7 @@
 _GLOBAL(power7_wakeup_noloss)
 	lbz	r0,PACA_NAPSTATELOST(r13)
 	cmpwi	r0,0
-	bne	.power7_wakeup_loss
+	bne	power7_wakeup_loss
 	ld	r1,PACAR1(r13)
 	ld	r4,_MSR(r1)
 	ld	r5,_NIP(r1)

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 40bd7bd..9362588 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c

@@ -48,6 +48,9 @@
 static unsigned int legacy_serial_count;
 static int legacy_serial_console = -1;
 
+static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+	UPF_SHARE_IRQ | UPF_FIXED_PORT;
+
 static unsigned int tsi_serial_in(struct uart_port *p, int offset)
 {
 	unsigned int tmp;
@@ -71,8 +74,9 @@
 				  phys_addr_t taddr, unsigned long irq,
 				  upf_t flags, int irq_check_parent)
 {
-	const __be32 *clk, *spd;
+	const __be32 *clk, *spd, *rs;
 	u32 clock = BASE_BAUD * 16;
+	u32 shift = 0;
 	int index;
 
 	/* get clock freq. if present */
@@ -83,6 +87,11 @@
 	/* get default speed if present */
 	spd = of_get_property(np, "current-speed", NULL);
 
+	/* get register shift if present */
+	rs = of_get_property(np, "reg-shift", NULL);
+	if (rs && *rs)
+		shift = be32_to_cpup(rs);
+
 	/* If we have a location index, then try to use it */
 	if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
 		index = want_index;
@@ -126,6 +135,7 @@
 	legacy_serial_ports[index].uartclk = clock;
 	legacy_serial_ports[index].irq = irq;
 	legacy_serial_ports[index].flags = flags;
+	legacy_serial_ports[index].regshift = shift;
 	legacy_serial_infos[index].taddr = taddr;
 	legacy_serial_infos[index].np = of_node_get(np);
 	legacy_serial_infos[index].clock = clock;
@@ -153,8 +163,6 @@
 {
 	u64 addr;
 	const __be32 *addrp;
-	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ
-		| UPF_FIXED_PORT;
 	struct device_node *tsi = of_get_parent(np);
 
 	/* We only support ports that have a clock frequency properly
@@ -163,9 +171,8 @@
 	if (of_get_property(np, "clock-frequency", NULL) == NULL)
 		return -1;
 
-	/* if reg-shift or offset, don't try to use it */
-	if ((of_get_property(np, "reg-shift", NULL) != NULL) ||
-		(of_get_property(np, "reg-offset", NULL) != NULL))
+	/* if reg-offset don't try to use it */
+	if ((of_get_property(np, "reg-offset", NULL) != NULL))
 		return -1;
 
 	/* if rtas uses this device, don't try to use it as well */
@@ -185,9 +192,11 @@
 	 * IO port value. It will be fixed up later along with the irq
 	 */
 	if (tsi && !strcmp(tsi->type, "tsi-bridge"))
-		return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0);
+		return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
+				       NO_IRQ, legacy_port_flags, 0);
 	else
-		return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
+		return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
+				       NO_IRQ, legacy_port_flags, 0);
 }
 
 static int __init add_legacy_isa_port(struct device_node *np,
@@ -233,7 +242,7 @@
 
 	/* Add port, irq will be dealt with later */
 	return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
-			       taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0);
+			       taddr, NO_IRQ, legacy_port_flags, 0);
 
 }
 
@@ -306,7 +315,7 @@
 	 * IO port value. It will be fixed up later along with the irq
 	 */
 	return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
-			       UPF_BOOT_AUTOCONF, np != pci_dev);
+			       legacy_port_flags, np != pci_dev);
 }
 #endif
 
@@ -315,17 +324,20 @@
 	struct legacy_serial_info *info = &legacy_serial_infos[console];
 	struct plat_serial8250_port *port = &legacy_serial_ports[console];
 	void __iomem *addr;
+	unsigned int stride;
+
+	stride = 1 << port->regshift;
 
 	/* Check if a translated MMIO address has been found */
 	if (info->taddr) {
 		addr = ioremap(info->taddr, 0x1000);
 		if (addr == NULL)
 			return;
-		udbg_uart_init_mmio(addr, 1);
+		udbg_uart_init_mmio(addr, stride);
 	} else {
 		/* Check if it's PIO and we support untranslated PIO */
 		if (port->iotype == UPIO_PORT && isa_io_special)
-			udbg_uart_init_pio(port->iobase, 1);
+			udbg_uart_init_pio(port->iobase, stride);
 		else
 			return;
 	}

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3d02495..4e314b9 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S

@@ -34,7 +34,7 @@
 	std	r0,16(r1)
 	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
 	mr	r1,r3
-	bl	.__do_softirq
+	bl	__do_softirq
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
@@ -45,7 +45,7 @@
 	std	r0,16(r1)
 	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
 	mr	r1,r4
-	bl	.__do_irq
+	bl	__do_irq
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
@@ -506,7 +506,7 @@
 	stb	r4,PACAKEXECSTATE(r13)
 	SYNC
 
-	b	.kexec_wait
+	b	kexec_wait
 
 /*
  * switch to real mode (turn mmu off)
@@ -576,7 +576,7 @@
 
 	/* copy dest pages, flush whole dest image */
 	mr	r3,r29
-	bl	.kexec_copy_flush	/* (image) */
+	bl	kexec_copy_flush	/* (image) */
 
 	/* turn off mmu */
 	bl	real_mode
@@ -586,7 +586,7 @@
 	mr	r4,r30		/* start, aka phys mem offset */
 	li	r5,0x100
 	li	r6,0
-	bl	.copy_and_flush	/* (dest, src, copy limit, start offset) */
+	bl	copy_and_flush	/* (dest, src, copy limit, start offset) */
 1:	/* assume normal blr return */
 
 	/* release other cpus to the new kernel secondary start at 0x60 */
@@ -595,8 +595,12 @@
 	stw	r6,kexec_flag-1b(5)
 
 	/* clear out hardware hash page table and tlb */
-	ld	r5,0(r27)		/* deref function descriptor */
-	mtctr	r5
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+	ld	r12,0(r27)		/* deref function descriptor */
+#else
+	mr	r12,r27
+#endif
+	mtctr	r12
 	bctrl				/* ppc_md.hpte_clear_all(void); */
 
 /*
@@ -630,3 +634,31 @@
 	li	r5,0
 	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
+
+#ifdef CONFIG_MODULES
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#ifdef CONFIG_MODVERSIONS
+.weak __crc_TOC.
+.section "___kcrctab+TOC.","a"
+.globl __kcrctab_TOC.
+__kcrctab_TOC.:
+	.llong	__crc_TOC.
+#endif
+
+/*
+ * Export a fake .TOC. since both modpost and depmod will complain otherwise.
+ * Both modpost and depmod strip the leading . so we do the same here.
+ */
+.section "__ksymtab_strings","a"
+__kstrtab_TOC.:
+	.asciz "TOC."
+
+.section "___ksymtab+TOC.","a"
+/* This symbol name is important: it's used by modpost to find exported syms */
+.globl __ksymtab_TOC.
+__ksymtab_TOC.:
+	.llong 0 /* .value */
+	.llong __kstrtab_TOC.
+#endif /* ELFv2 */
+#endif /* MODULES */

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 12664c1..077d2ce 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c

@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/ftrace.h>
 #include <linux/bug.h>
+#include <linux/uaccess.h>
 #include <asm/module.h>
 #include <asm/firmware.h>
 #include <asm/code-patching.h>
@@ -41,46 +42,170 @@
 #define DEBUGP(fmt , ...)
 #endif
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET 24
+
+/* An address is simply the address of the function. */
+typedef unsigned long func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
+{
+	return addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+	return addr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+	return func;
+}
+
+/* PowerPC64 specific values for the Elf64_Sym st_other field.  */
+#define STO_PPC64_LOCAL_BIT	5
+#define STO_PPC64_LOCAL_MASK	(7 << STO_PPC64_LOCAL_BIT)
+#define PPC64_LOCAL_ENTRY_OFFSET(other)					\
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
+
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+	/* sym->st_other indicates offset to local entry point
+	 * (otherwise it will assume r12 is the address of the start
+	 * of function and try to derive r2 from it). */
+	return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#else
+#define R2_STACK_OFFSET 40
+
+/* An address is address of the OPD entry, which contains address of fn. */
+typedef struct ppc64_opd_entry func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
+{
+	return *(struct ppc64_opd_entry *)addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+	return func_desc(addr).funcaddr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+	return func.funcaddr;
+}
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+	return 0;
+}
+#endif
+
 /* Like PPC32, we need little trampolines to do > 24-bit jumps (into
    the kernel itself).  But on PPC64, these need to be used for every
    jump, actually, to reset r2 (TOC+0x8000). */
 struct ppc64_stub_entry
 {
-	/* 28 byte jump instruction sequence (7 instructions) */
-	unsigned char jump[28];
-	unsigned char unused[4];
+	/* 28 byte jump instruction sequence (7 instructions). We only
+	 * need 6 instructions on ABIv2 but we always allocate 7 so
+	 * so we don't have to modify the trampoline load instruction. */
+	u32 jump[7];
+	u32 unused;
 	/* Data for the above code */
-	struct ppc64_opd_entry opd;
+	func_desc_t funcdata;
 };
 
-/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
-   function which may be more than 24-bits away.  We could simply
-   patch the new r2 value and function pointer into the stub, but it's
-   significantly shorter to put these values at the end of the stub
-   code, and patch the stub address (32-bits relative to the TOC ptr,
-   r2) into the stub. */
-static struct ppc64_stub_entry ppc64_stub =
-{ .jump = {
-#ifdef __LITTLE_ENDIAN__
-	0x00, 0x00, 0x82, 0x3d, /* addis   r12,r2, <high> */
-	0x00, 0x00, 0x8c, 0x39, /* addi    r12,r12, <low> */
+/*
+ * PPC64 uses 24 bit jumps, but we need to jump into other modules or
+ * the kernel which may be further.  So we jump to a stub.
+ *
+ * For ELFv1 we need to use this to set up the new r2 value (aka TOC
+ * pointer).  For ELFv2 it's the callee's responsibility to set up the
+ * new r2, but for both we need to save the old r2.
+ *
+ * We could simply patch the new r2 value and function pointer into
+ * the stub, but it's significantly shorter to put these values at the
+ * end of the stub code, and patch the stub address (32-bits relative
+ * to the TOC ptr, r2) into the stub.
+ */
+
+static u32 ppc64_stub_insns[] = {
+	0x3d620000,			/* addis   r11,r2, <high> */
+	0x396b0000,			/* addi    r11,r11, <low> */
 	/* Save current r2 value in magic place on the stack. */
-	0x28, 0x00, 0x41, 0xf8, /* std     r2,40(r1) */
-	0x20, 0x00, 0x6c, 0xe9, /* ld      r11,32(r12) */
-	0x28, 0x00, 0x4c, 0xe8, /* ld      r2,40(r12) */
-	0xa6, 0x03, 0x69, 0x7d, /* mtctr   r11 */
-	0x20, 0x04, 0x80, 0x4e  /* bctr */
-#else
-	0x3d, 0x82, 0x00, 0x00, /* addis   r12,r2, <high> */
-	0x39, 0x8c, 0x00, 0x00, /* addi    r12,r12, <low> */
-	/* Save current r2 value in magic place on the stack. */
-	0xf8, 0x41, 0x00, 0x28, /* std     r2,40(r1) */
-	0xe9, 0x6c, 0x00, 0x20, /* ld      r11,32(r12) */
-	0xe8, 0x4c, 0x00, 0x28, /* ld      r2,40(r12) */
-	0x7d, 0x69, 0x03, 0xa6, /* mtctr   r11 */
-	0x4e, 0x80, 0x04, 0x20  /* bctr */
+	0xf8410000|R2_STACK_OFFSET,	/* std     r2,R2_STACK_OFFSET(r1) */
+	0xe98b0020,			/* ld      r12,32(r11) */
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+	/* Set up new r2 from function descriptor */
+	0xe84b0028,			/* ld      r2,40(r11) */
 #endif
-} };
+	0x7d8903a6,			/* mtctr   r12 */
+	0x4e800420			/* bctr */
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static u32 ppc64_stub_mask[] = {
+	0xffff0000,
+	0xffff0000,
+	0xffffffff,
+	0xffffffff,
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+	0xffffffff,
+#endif
+	0xffffffff,
+	0xffffffff
+};
+
+bool is_module_trampoline(u32 *p)
+{
+	unsigned int i;
+	u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
+
+	BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
+
+	if (probe_kernel_read(insns, p, sizeof(insns)))
+		return -EFAULT;
+
+	for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+		u32 insna = insns[i];
+		u32 insnb = ppc64_stub_insns[i];
+		u32 mask = ppc64_stub_mask[i];
+
+		if ((insna & mask) != (insnb & mask))
+			return false;
+	}
+
+	return true;
+}
+
+int module_trampoline_target(struct module *mod, u32 *trampoline,
+			     unsigned long *target)
+{
+	u32 buf[2];
+	u16 upper, lower;
+	long offset;
+	void *toc_entry;
+
+	if (probe_kernel_read(buf, trampoline, sizeof(buf)))
+		return -EFAULT;
+
+	upper = buf[0] & 0xffff;
+	lower = buf[1] & 0xffff;
+
+	/* perform the addis/addi, both signed */
+	offset = ((short)upper << 16) + (short)lower;
+
+	/*
+	 * Now get the address this trampoline jumps to. This
+	 * is always 32 bytes into our trampoline stub.
+	 */
+	toc_entry = (void *)mod->arch.toc + offset + 32;
+
+	if (probe_kernel_read(target, toc_entry, sizeof(*target)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
 
 /* Count how many different 24-bit relocations (different symbol,
    different addend) */
@@ -183,6 +308,7 @@
 	return relocs * sizeof(struct ppc64_stub_entry);
 }
 
+/* Still needed for ELFv2, for .TOC. */
 static void dedotify_versions(struct modversion_info *vers,
 			      unsigned long size)
 {
@@ -193,7 +319,7 @@
 			memmove(vers->name, vers->name+1, strlen(vers->name));
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname */
+/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
 	unsigned int i;
@@ -207,6 +333,24 @@
 	}
 }
 
+static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
+			       const char *strtab,
+			       unsigned int symindex)
+{
+	unsigned int i, numsyms;
+	Elf64_Sym *syms;
+
+	syms = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
+
+	for (i = 1; i < numsyms; i++) {
+		if (syms[i].st_shndx == SHN_UNDEF
+		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
+			return &syms[i];
+	}
+	return NULL;
+}
+
 int module_frob_arch_sections(Elf64_Ehdr *hdr,
 			      Elf64_Shdr *sechdrs,
 			      char *secstrings,
@@ -271,21 +415,12 @@
 /* Patch stub to reference function and correct r2 value. */
 static inline int create_stub(Elf64_Shdr *sechdrs,
 			      struct ppc64_stub_entry *entry,
-			      struct ppc64_opd_entry *opd,
+			      unsigned long addr,
 			      struct module *me)
 {
-	Elf64_Half *loc1, *loc2;
 	long reladdr;
 
-	*entry = ppc64_stub;
-
-#ifdef __LITTLE_ENDIAN__
-	loc1 = (Elf64_Half *)&entry->jump[0];
-	loc2 = (Elf64_Half *)&entry->jump[4];
-#else
-	loc1 = (Elf64_Half *)&entry->jump[2];
-	loc2 = (Elf64_Half *)&entry->jump[6];
-#endif
+	memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
 
 	/* Stub uses address relative to r2. */
 	reladdr = (unsigned long)entry - my_r2(sechdrs, me);
@@ -296,35 +431,33 @@
 	}
 	DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
 
-	*loc1 = PPC_HA(reladdr);
-	*loc2 = PPC_LO(reladdr);
-	entry->opd.funcaddr = opd->funcaddr;
-	entry->opd.r2 = opd->r2;
+	entry->jump[0] |= PPC_HA(reladdr);
+	entry->jump[1] |= PPC_LO(reladdr);
+	entry->funcdata = func_desc(addr);
 	return 1;
 }
 
-/* Create stub to jump to function described in this OPD: we need the
+/* Create stub to jump to function described in this OPD/ptr: we need the
    stub to set up the TOC ptr (r2) for the function. */
 static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
-				   unsigned long opdaddr,
+				   unsigned long addr,
 				   struct module *me)
 {
 	struct ppc64_stub_entry *stubs;
-	struct ppc64_opd_entry *opd = (void *)opdaddr;
 	unsigned int i, num_stubs;
 
 	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
 
 	/* Find this stub, or if that fails, the next avail. entry */
 	stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
-	for (i = 0; stubs[i].opd.funcaddr; i++) {
+	for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
 		BUG_ON(i >= num_stubs);
 
-		if (stubs[i].opd.funcaddr == opd->funcaddr)
+		if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
 			return (unsigned long)&stubs[i];
 	}
 
-	if (!create_stub(sechdrs, &stubs[i], opd, me))
+	if (!create_stub(sechdrs, &stubs[i], addr, me))
 		return 0;
 
 	return (unsigned long)&stubs[i];
@@ -339,7 +472,8 @@
 		       me->name, *instruction);
 		return 0;
 	}
-	*instruction = 0xe8410028;	/* ld r2,40(r1) */
+	/* ld r2,R2_STACK_OFFSET(r1) */
+	*instruction = 0xe8410000 | R2_STACK_OFFSET;
 	return 1;
 }
 
@@ -357,6 +491,17 @@
 
 	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
+
+	/* First time we're called, we can fix up .TOC. */
+	if (!me->arch.toc_fixed) {
+		sym = find_dot_toc(sechdrs, strtab, symindex);
+		/* It's theoretically possible that a module doesn't want a
+		 * .TOC. so don't fail it just for that. */
+		if (sym)
+			sym->st_value = my_r2(sechdrs, me);
+		me->arch.toc_fixed = true;
+	}
+
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
 		/* This is where to make the change */
 		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -453,7 +598,8 @@
 					return -ENOENT;
 				if (!restore_r2((u32 *)location + 1, me))
 					return -ENOEXEC;
-			}
+			} else
+				value += local_entry_offset(sym);
 
 			/* Convert value to relative */
 			value -= (unsigned long)location;
@@ -474,6 +620,31 @@
 			*location = value - (unsigned long)location;
 			break;
 
+		case R_PPC64_TOCSAVE:
+			/*
+			 * Marker reloc indicates we don't have to save r2.
+			 * That would only save us one instruction, so ignore
+			 * it.
+			 */
+			break;
+
+		case R_PPC64_REL16_HA:
+			/* Subtract location pointer */
+			value -= (unsigned long)location;
+			value = ((value + 0x8000) >> 16);
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+		case R_PPC64_REL16_LO:
+			/* Subtract location pointer */
+			value -= (unsigned long)location;
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
 		default:
 			printk("%s: Unknown ADD relocation: %lu\n",
 			       me->name,

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 24d342e..b49c72f 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c

@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
@@ -120,6 +121,25 @@
 	return 1;
 }
 
+void pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	if (ppc_md.pcibios_reset_secondary_bus) {
+		ppc_md.pcibios_reset_secondary_bus(dev);
+		return;
+	}
+
+	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+	ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+	msleep(2);
+
+	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+	ssleep(1);
+}
+
 static resource_size_t pcibios_io_size(const struct pci_controller *hose)
 {
 #ifdef CONFIG_PPC64
@@ -646,60 +666,36 @@
 void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				  struct device_node *dev, int primary)
 {
-	const __be32 *ranges;
-	int rlen;
-	int pna = of_n_addr_cells(dev);
-	int np = pna + 5;
 	int memno = 0;
-	u32 pci_space;
-	unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
 	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
 
 	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
 	       dev->full_name, primary ? "(primary)" : "");
 
-	/* Get ranges property */
-	ranges = of_get_property(dev, "ranges", &rlen);
-	if (ranges == NULL)
+	/* Check for ranges property */
+	if (of_pci_range_parser_init(&parser, dev))
 		return;
 
 	/* Parse it */
-	while ((rlen -= np * 4) >= 0) {
-		/* Read next ranges element */
-		pci_space = of_read_number(ranges, 1);
-		pci_addr = of_read_number(ranges + 1, 2);
-		cpu_addr = of_translate_address(dev, ranges + 3);
-		size = of_read_number(ranges + pna + 3, 2);
-		ranges += np;
-
+	for_each_of_pci_range(&parser, &range) {
 		/* If we failed translation or got a zero-sized region
 		 * (some FW try to feed us with non sensical zero sized regions
 		 * such as power3 which look like some kind of attempt at exposing
 		 * the VGA memory hole)
 		 */
-		if (cpu_addr == OF_BAD_ADDR || size == 0)
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
 			continue;
 
-		/* Now consume following elements while they are contiguous */
-		for (; rlen >= np * sizeof(u32);
-		     ranges += np, rlen -= np * 4) {
-			if (of_read_number(ranges, 1) != pci_space)
-				break;
-			pci_next = of_read_number(ranges + 1, 2);
-			cpu_next = of_translate_address(dev, ranges + 3);
-			if (pci_next != pci_addr + size ||
-			    cpu_next != cpu_addr + size)
-				break;
-			size += of_read_number(ranges + pna + 3, 2);
-		}
-
 		/* Act based on address space type */
 		res = NULL;
-		switch ((pci_space >> 24) & 0x3) {
-		case 1:		/* PCI IO space */
+		switch (range.flags & IORESOURCE_TYPE_BITS) {
+		case IORESOURCE_IO:
 			printk(KERN_INFO
 			       "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
-			       cpu_addr, cpu_addr + size - 1, pci_addr);
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr);
 
 			/* We support only one IO range */
 			if (hose->pci_io_size) {
@@ -709,11 +705,12 @@
 			}
 #ifdef CONFIG_PPC32
 			/* On 32 bits, limit I/O space to 16MB */
-			if (size > 0x01000000)
-				size = 0x01000000;
+			if (range.size > 0x01000000)
+				range.size = 0x01000000;
 
 			/* 32 bits needs to map IOs here */
-			hose->io_base_virt = ioremap(cpu_addr, size);
+			hose->io_base_virt = ioremap(range.cpu_addr,
+						range.size);
 
 			/* Expect trouble if pci_addr is not 0 */
 			if (primary)
@@ -723,20 +720,20 @@
 			/* pci_io_size and io_base_phys always represent IO
 			 * space starting at 0 so we factor in pci_addr
 			 */
-			hose->pci_io_size = pci_addr + size;
-			hose->io_base_phys = cpu_addr - pci_addr;
+			hose->pci_io_size = range.pci_addr + range.size;
+			hose->io_base_phys = range.cpu_addr - range.pci_addr;
 
 			/* Build resource */
 			res = &hose->io_resource;
-			res->flags = IORESOURCE_IO;
-			res->start = pci_addr;
+			range.cpu_addr = range.pci_addr;
 			break;
-		case 2:		/* PCI Memory space */
-		case 3:		/* PCI 64 bits Memory space */
+		case IORESOURCE_MEM:
 			printk(KERN_INFO
 			       " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
-			       cpu_addr, cpu_addr + size - 1, pci_addr,
-			       (pci_space & 0x40000000) ? "Prefetch" : "");
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr,
+			       (range.pci_space & 0x40000000) ?
+			       "Prefetch" : "");
 
 			/* We support only 3 memory ranges */
 			if (memno >= 3) {
@@ -745,28 +742,21 @@
 				continue;
 			}
 			/* Handles ISA memory hole space here */
-			if (pci_addr == 0) {
+			if (range.pci_addr == 0) {
 				if (primary || isa_mem_base == 0)
-					isa_mem_base = cpu_addr;
-				hose->isa_mem_phys = cpu_addr;
-				hose->isa_mem_size = size;
+					isa_mem_base = range.cpu_addr;
+				hose->isa_mem_phys = range.cpu_addr;
+				hose->isa_mem_size = range.size;
 			}
 
 			/* Build resource */
-			hose->mem_offset[memno] = cpu_addr - pci_addr;
+			hose->mem_offset[memno] = range.cpu_addr -
+							range.pci_addr;
 			res = &hose->mem_resources[memno++];
-			res->flags = IORESOURCE_MEM;
-			if (pci_space & 0x40000000)
-				res->flags |= IORESOURCE_PREFETCH;
-			res->start = cpu_addr;
 			break;
 		}
 		if (res != NULL) {
-			res->name = dev->full_name;
-			res->end = res->start + size - 1;
-			res->parent = NULL;
-			res->sibling = NULL;
-			res->child = NULL;
+			of_pci_range_to_resource(&range, dev, res);
 		}
 	}
 }

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 059e244..44562aa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c

@@ -304,6 +304,9 @@
 	struct pci_dev *dev = NULL;
 	const __be32 *reg;
 	int reglen, devfn;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+#endif
 
 	pr_debug("  * %s\n", dn->full_name);
 	if (!of_device_is_available(dn))
@@ -321,6 +324,12 @@
 		return dev;
 	}
 
+	/* Device removed permanently ? */
+#ifdef CONFIG_EEH
+	if (edev && (edev->mode & EEH_DEV_REMOVED))
+		return NULL;
+#endif
+
 	/* create a new pci_dev for this device */
 	dev = of_create_pci_dev(dn, bus, devfn);
 	if (!dev)

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 450850a..48d17d6f 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c

@@ -155,9 +155,7 @@
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
-#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
-#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 31d0215..be99774 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c

@@ -54,6 +54,7 @@
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #endif
+#include <asm/code-patching.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 
@@ -495,14 +496,21 @@
 	return 0;
 }
 
-int set_breakpoint(struct arch_hw_breakpoint *brk)
+void __set_breakpoint(struct arch_hw_breakpoint *brk)
 {
 	__get_cpu_var(current_brk) = *brk;
 
 	if (cpu_has_feature(CPU_FTR_DAWR))
-		return set_dawr(brk);
+		set_dawr(brk);
+	else
+		set_dabr(brk);
+}
 
-	return set_dabr(brk);
+void set_breakpoint(struct arch_hw_breakpoint *brk)
+{
+	preempt_disable();
+	__set_breakpoint(brk);
+	preempt_enable();
 }
 
 #ifdef CONFIG_PPC64
@@ -747,15 +755,15 @@
 
 	WARN_ON(!irqs_disabled());
 
-	/* Back up the TAR across context switches.
+	/* Back up the TAR and DSCR across context switches.
 	 * Note that the TAR is not available for use in the kernel.  (To
 	 * provide this, the TAR should be backed up/restored on exception
 	 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
 	 * pt_regs anyway (for debug).)
-	 * Save the TAR here before we do treclaim/trecheckpoint as these
-	 * will change the TAR.
+	 * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
+	 * these will change them.
 	 */
-	save_tar(&prev->thread);
+	save_early_sprs(&prev->thread);
 
 	__switch_to_tm(prev);
 
@@ -834,7 +842,7 @@
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 	if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
-		set_breakpoint(&new->thread.hw_brk);
+		__set_breakpoint(&new->thread.hw_brk);
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 
@@ -1108,7 +1116,9 @@
 		struct thread_info *ti = (void *)task_stack_page(p);
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
-		childregs->gpr[14] = usp;	/* function */
+		/* function */
+		if (usp)
+			childregs->gpr[14] = ppc_function_entry((void *)usp);
 #ifdef CONFIG_PPC64
 		clear_tsk_thread_flag(p, TIF_32BIT);
 		childregs->softe = 1;
@@ -1187,17 +1197,7 @@
 	if (cpu_has_feature(CPU_FTR_HAS_PPR))
 		p->thread.ppr = INIT_PPR;
 #endif
-	/*
-	 * The PPC64 ABI makes use of a TOC to contain function 
-	 * pointers.  The function (ret_from_except) is actually a pointer
-	 * to the TOC entry.  The first entry is a pointer to the actual
-	 * function.
-	 */
-#ifdef CONFIG_PPC64
-	kregs->nip = *((unsigned long *)f);
-#else
-	kregs->nip = (unsigned long)f;
-#endif
+	kregs->nip = ppc_function_entry(f);
 	return 0;
 }
 

diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index b0c263d..77aa1e9 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh

@@ -23,7 +23,7 @@
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
 opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry
 boot_command_line __prom_init_toc_start __prom_init_toc_end
-btext_setup_display"
+btext_setup_display TOC."
 
 NM="$1"
 OBJ="$2"

diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 7d4c717..c168337 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c

@@ -80,10 +80,6 @@
 	if (ret)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	if (returnval == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -92,18 +88,39 @@
 				int where, int size, u32 *val)
 {
 	struct device_node *busdn, *dn;
-
-	busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+	bool found = false;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev;
+#endif
+	int ret;
 
 	/* Search only direct children of the bus */
+	*val = 0xFFFFFFFF;
+	busdn = pci_bus_to_OF_node(bus);
 	for (dn = busdn->child; dn; dn = dn->sibling) {
-		struct pci_dn *pdn = PCI_DN(dn);
+		pdn = PCI_DN(dn);
 		if (pdn && pdn->devfn == devfn
-		    && of_device_is_available(dn))
-			return rtas_read_config(pdn, where, size, val);
+		    && of_device_is_available(dn)) {
+			found = true;
+			break;
+		}
 	}
 
-	return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!found)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+	edev = of_node_to_eeh_dev(dn);
+	if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+
+	ret = rtas_read_config(pdn, where, size, val);
+	if (*val == EEH_IO_ERROR_VALUE(size) &&
+	    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return ret;
 }
 
 int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
@@ -136,17 +153,34 @@
 				 int where, int size, u32 val)
 {
 	struct device_node *busdn, *dn;
-
-	busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+	bool found = false;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev;
+#endif
+	int ret;
 
 	/* Search only direct children of the bus */
+	busdn = pci_bus_to_OF_node(bus);
 	for (dn = busdn->child; dn; dn = dn->sibling) {
-		struct pci_dn *pdn = PCI_DN(dn);
+		pdn = PCI_DN(dn);
 		if (pdn && pdn->devfn == devfn
-		    && of_device_is_available(dn))
-			return rtas_write_config(pdn, where, size, val);
+		    && of_device_is_available(dn)) {
+			found = true;
+			break;
+		}
 	}
-	return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!found)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+	edev = of_node_to_eeh_dev(dn);
+	if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+	ret = rtas_write_config(pdn, where, size, val);
+
+	return ret;
 }
 
 static struct pci_ops rtas_pci_ops = {

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 79b7612..e239df3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c

@@ -212,6 +212,7 @@
 {
 	unsigned long cpu_id = (unsigned long)v - 1;
 	unsigned int pvr;
+	unsigned long proc_freq;
 	unsigned short maj;
 	unsigned short min;
 
@@ -263,12 +264,19 @@
 #endif /* CONFIG_TAU */
 
 	/*
-	 * Assume here that all clock rates are the same in a
-	 * smp system.  -- Cort
+	 * Platforms that have variable clock rates, should implement
+	 * the method ppc_md.get_proc_freq() that reports the clock
+	 * rate of a given cpu. The rest can use ppc_proc_freq to
+	 * report the clock rate that is same across all cpus.
 	 */
-	if (ppc_proc_freq)
+	if (ppc_md.get_proc_freq)
+		proc_freq = ppc_md.get_proc_freq(cpu_id);
+	else
+		proc_freq = ppc_proc_freq;
+
+	if (proc_freq)
 		seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
-			   ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+			   proc_freq / 1000000, proc_freq % 1000000);
 
 	if (ppc_md.show_percpuinfo != NULL)
 		ppc_md.show_percpuinfo(m, cpu_id);
@@ -382,9 +390,10 @@
 
 #ifdef CONFIG_SMP
 
-int threads_per_core, threads_shift;
+int threads_per_core, threads_per_subcore, threads_shift;
 cpumask_t threads_core_mask;
 EXPORT_SYMBOL_GPL(threads_per_core);
+EXPORT_SYMBOL_GPL(threads_per_subcore);
 EXPORT_SYMBOL_GPL(threads_shift);
 EXPORT_SYMBOL_GPL(threads_core_mask);
 
@@ -393,6 +402,7 @@
 	int i;
 
 	threads_per_core = tpc;
+	threads_per_subcore = tpc;
 	cpumask_clear(&threads_core_mask);
 
 	/* This implementation only supports power of 2 number of threads
@@ -461,7 +471,7 @@
 		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, be32_to_cpu(intserv[j]));
-			set_cpu_present(cpu, true);
+			set_cpu_present(cpu, of_device_is_available(dn));
 			set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
 			set_cpu_possible(cpu, true);
 			cpu++;
@@ -718,33 +728,6 @@
 arch_initcall(powerpc_debugfs_init);
 #endif
 
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-
-/* Checks wdt=x and wdt_period=xx command-line option */
-notrace int __init early_parse_wdt(char *p)
-{
-	if (p && strncmp(p, "0", 1) != 0)
-		booke_wdt_enabled = 1;
-
-	return 0;
-}
-early_param("wdt", early_parse_wdt);
-
-int __init early_parse_wdt_period(char *p)
-{
-	unsigned long ret;
-	if (p) {
-		if (!kstrtol(p, 0, &ret))
-			booke_wdt_period = ret;
-	}
-
-	return 0;
-}
-early_param("wdt_period", early_parse_wdt_period);
-#endif	/* CONFIG_BOOKE_WDT */
-
 void ppc_printk_progress(char *s, unsigned short hex)
 {
 	pr_info("%s\n", s);

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fbe2437..ee082d7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c

@@ -36,6 +36,7 @@
 #include <linux/lockdep.h>
 #include <linux/memblock.h>
 #include <linux/hugetlb.h>
+#include <linux/memory.h>
 
 #include <asm/io.h>
 #include <asm/kdump.h>
@@ -341,7 +342,7 @@
 
 	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
 			- PHYSICAL_START);
-	*ptr = __pa(generic_secondary_smp_init);
+	*ptr = ppc_function_entry(generic_secondary_smp_init);
 
 	/* And wait a bit for them to catch up */
 	for (i = 0; i < 100000; i++) {
@@ -780,6 +781,15 @@
 }
 #endif
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+unsigned long memory_block_size_bytes(void)
+{
+	if (ppc_md.memory_block_size)
+		return ppc_md.memory_block_size();
+
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
 
 #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
 struct ppc_pci_io ppc_pci_io;

diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 8fc4177..1c794ce 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c

@@ -134,7 +134,7 @@
 	 */
 	if (current->thread.hw_brk.address &&
 		current->thread.hw_brk.type)
-		set_breakpoint(&current->thread.hw_brk);
+		__set_breakpoint(&current->thread.hw_brk);
 #endif
 	/* Re-enable the breakpoints for the signal stack */
 	thread_change_pc(current, regs);

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 10ffffe..51a3ff7 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -36,6 +36,7 @@
 #include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/kvm_ppc.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
@@ -390,6 +391,7 @@
 #ifdef CONFIG_PPC64
 	paca[boot_cpuid].__current = current;
 #endif
+	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
 	current_set[boot_cpuid] = task_thread_info(current);
 }
 
@@ -457,38 +459,9 @@
 	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
 }
 
-static atomic_t secondary_inhibit_count;
-
-/*
- * Don't allow secondary CPU threads to come online
- */
-void inhibit_secondary_onlining(void)
+static bool secondaries_inhibited(void)
 {
-	/*
-	 * This makes secondary_inhibit_count stable during cpu
-	 * online/offline operations.
-	 */
-	get_online_cpus();
-
-	atomic_inc(&secondary_inhibit_count);
-	put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
-
-/*
- * Allow secondary CPU threads to come online again
- */
-void uninhibit_secondary_onlining(void)
-{
-	get_online_cpus();
-	atomic_dec(&secondary_inhibit_count);
-	put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
-
-static int secondaries_inhibited(void)
-{
-	return atomic_read(&secondary_inhibit_count);
+	return kvm_hv_mode_active();
 }
 
 #else /* HOTPLUG_CPU */
@@ -517,7 +490,7 @@
 	 * Don't allow secondary threads to come online if inhibited
 	 */
 	if (threads_per_core > 1 && secondaries_inhibited() &&
-	    cpu % threads_per_core != 0)
+	    cpu_thread_in_subcore(cpu))
 		return -EBUSY;
 
 	if (smp_ops == NULL ||
@@ -750,6 +723,12 @@
 	}
 	traverse_core_siblings(cpu, true);
 
+	/*
+	 * numa_node_id() works after this.
+	 */
+	set_numa_node(numa_cpu_lookup_table[cpu]);
+	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
 	smp_wmb();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
@@ -770,7 +749,7 @@
 /* cpumask of CPUs with asymetric SMT dependancy */
 static const int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 
 	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index d90d4b7..67fd2fd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c

@@ -404,7 +404,7 @@
 }
 EXPORT_SYMBOL(ppc_enable_pmcs);
 
-#define __SYSFS_SPRSETUP(NAME, ADDRESS, EXTRA) \
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
 static void read_##NAME(void *val) \
 { \
 	*(unsigned long *)val = mfspr(ADDRESS);	\
@@ -413,7 +413,9 @@
 { \
 	EXTRA; \
 	mtspr(ADDRESS, *(unsigned long *)val);	\
-} \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
 static ssize_t show_##NAME(struct device *dev, \
 			struct device_attribute *attr, \
 			char *buf) \
@@ -436,10 +438,15 @@
 	return count; \
 }
 
-#define SYSFS_PMCSETUP(NAME, ADDRESS)	\
-	__SYSFS_SPRSETUP(NAME, ADDRESS, ppc_enable_pmcs())
-#define SYSFS_SPRSETUP(NAME, ADDRESS)	\
-	__SYSFS_SPRSETUP(NAME, ADDRESS, )
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
 
 /* Let's define all possible registers, we'll only hook up the ones
  * that are implemented on the current processor
@@ -477,7 +484,6 @@
 SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
-SYSFS_SPRSETUP(dscr, SPRN_DSCR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
 
 /*
@@ -487,12 +493,27 @@
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 
-unsigned long dscr_default = 0;
-EXPORT_SYMBOL(dscr_default);
+static unsigned long dscr_default;
+
+static void read_dscr(void *val)
+{
+	*(unsigned long *)val = get_paca()->dscr_default;
+}
+
+static void write_dscr(void *val)
+{
+	get_paca()->dscr_default = *(unsigned long *)val;
+	if (!current->thread.dscr_inherit) {
+		current->thread.dscr = *(unsigned long *)val;
+		mtspr(SPRN_DSCR, *(unsigned long *)val);
+	}
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
 
 static void add_write_permission_dev_attr(struct device_attribute *attr)
 {
@@ -505,14 +526,6 @@
 	return sprintf(buf, "%lx\n", dscr_default);
 }
 
-static void update_dscr(void *dummy)
-{
-	if (!current->thread.dscr_inherit) {
-		current->thread.dscr = dscr_default;
-		mtspr(SPRN_DSCR, dscr_default);
-	}
-}
-
 static ssize_t __used store_dscr_default(struct device *dev,
 		struct device_attribute *attr, const char *buf,
 		size_t count)
@@ -525,7 +538,7 @@
 		return -EINVAL;
 	dscr_default = val;
 
-	on_each_cpu(update_dscr, NULL, 1);
+	on_each_cpu(write_dscr, &val, 1);
 
 	return count;
 }

diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 93219c3..895c50c 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S

@@ -17,12 +17,12 @@
 #include <asm/ppc_asm.h>
 
 #ifdef CONFIG_PPC64
-#define SYSCALL(func)		.llong	.sys_##func,.sys_##func
-#define COMPAT_SYS(func)	.llong	.sys_##func,.compat_sys_##func
-#define PPC_SYS(func)		.llong	.ppc_##func,.ppc_##func
-#define OLDSYS(func)		.llong	.sys_ni_syscall,.sys_ni_syscall
-#define SYS32ONLY(func)		.llong	.sys_ni_syscall,.compat_sys_##func
-#define SYSX(f, f3264, f32)	.llong	.f,.f3264
+#define SYSCALL(func)		.llong	DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func)	.llong	DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func)		.llong	DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define SYSX(f, f3264, f32)	.llong	DOTSYM(f),DOTSYM(f3264)
 #else
 #define SYSCALL(func)		.long	sys_##func
 #define COMPAT_SYS(func)	.long	sys_##func
@@ -36,6 +36,8 @@
 #define PPC_SYS_SPU(func)	PPC_SYS(func)
 #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32)
 
+.section .rodata,"a"
+
 #ifdef CONFIG_PPC64
 #define sys_sigpending	sys_ni_syscall
 #define sys_old_getrlimit sys_ni_syscall
@@ -43,5 +45,7 @@
 	.p2align	3
 #endif
 
-_GLOBAL(sys_call_table)
+.globl sys_call_table
+sys_call_table:
+
 #include <asm/systbl.h>

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7e711bd..9fff9cd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c

@@ -551,7 +551,7 @@
 	may_hard_irq_enable();
 
 
-#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 #endif

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 03567c0..2a324f4 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S

@@ -10,6 +10,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/ptrace.h>
 #include <asm/reg.h>
+#include <asm/bug.h>
 
 #ifdef CONFIG_VSX
 /* See fpu.S, this is borrowed from there */
@@ -41,7 +42,6 @@
 /* Stack frame offsets for local variables. */
 #define TM_FRAME_L0	TM_FRAME_SIZE-16
 #define TM_FRAME_L1	TM_FRAME_SIZE-8
-#define STACK_PARAM(x)	(48+((x)*8))
 
 
 /* In order to access the TM SPRs, TM must be enabled.  So, do so: */
@@ -78,12 +78,6 @@
 	TABORT(R3)
 	blr
 
-	.section	".toc","aw"
-DSCR_DEFAULT:
-	.tc dscr_default[TC],dscr_default
-
-	.section	".text"
-
 /* void tm_reclaim(struct thread_struct *thread,
  *                 unsigned long orig_msr,
  *		   uint8_t cause)
@@ -108,12 +102,12 @@
 	mflr	r0
 	stw	r6, 8(r1)
 	std	r0, 16(r1)
-	std	r2, 40(r1)
+	std	r2, STK_GOT(r1)
 	stdu	r1, -TM_FRAME_SIZE(r1)
 
 	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
 
-	std	r3, STACK_PARAM(0)(r1)
+	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
 
 	/* We need to setup MSR for VSX register save instructions.  Here we
@@ -175,6 +169,13 @@
 	stfd    fr0,FPSTATE_FPSCR(r7)
 
 dont_backup_fp:
+	/* Do sanity check on MSR to make sure we are suspended */
+	li	r7, (MSR_TS_S)@higher
+	srdi	r6, r14, 32
+	and	r6, r6, r7
+1:	tdeqi   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
 	/* The moment we treclaim, ALL of our GPRs will switch
 	 * to user register state.  (FPRs, CCR etc. also!)
 	 * Use an sprg and a tm_scratch in the PACA to shuffle.
@@ -202,7 +203,7 @@
 	/* Now get some more GPRS free */
 	std	r7, GPR7(r1)			/* Temporary stash */
 	std	r12, GPR12(r1)			/* ''   ''    ''   */
-	ld	r12, STACK_PARAM(0)(r1)		/* Param 0, thread_struct * */
+	ld	r12, STK_PARAM(R3)(r1)		/* Param 0, thread_struct * */
 
 	std	r11, THREAD_TM_PPR(r12)		/* Store PPR and free r11 */
 
@@ -289,11 +290,10 @@
 	ld	r0, 16(r1)
 	mtcr	r4
 	mtlr	r0
-	ld	r2, 40(r1)
+	ld	r2, STK_GOT(r1)
 
-	/* Load system default DSCR */
-	ld	r4, DSCR_DEFAULT@toc(r2)
-	ld	r0, 0(r4)
+	/* Load CPU's default DSCR */
+	ld	r0, PACA_DSCR(r13)
 	mtspr	SPRN_DSCR, r0
 
 	blr
@@ -312,7 +312,7 @@
 	mflr	r0
 	stw	r5, 8(r1)
 	std	r0, 16(r1)
-	std	r2, 40(r1)
+	std	r2, STK_GOT(r1)
 	stdu	r1, -TM_FRAME_SIZE(r1)
 
 	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
@@ -320,8 +320,6 @@
 	 */
 	SAVE_NVGPRS(r1)
 
-	std	r1, PACAR1(r13)
-
 	/* Load complete register state from ts_ckpt* registers */
 
 	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */
@@ -385,12 +383,10 @@
 	/* ******************** CR,LR,CCR,MSR ********** */
 	ld	r4, _CTR(r7)
 	ld	r5, _LINK(r7)
-	ld	r6, _CCR(r7)
 	ld	r8, _XER(r7)
 
 	mtctr	r4
 	mtlr	r5
-	mtcr	r6
 	mtxer	r8
 
 	/* ******************** TAR ******************** */
@@ -406,7 +402,8 @@
 	li	r4, 0
 	mtmsrd	r4, 1
 
-	REST_4GPRS(0, r7)			/* GPR0-3 */
+	REST_GPR(0, r7)				/* GPR0 */
+	REST_2GPRS(2, r7)			/* GPR2-3 */
 	REST_GPR(4, r7)				/* GPR4 */
 	REST_4GPRS(8, r7)			/* GPR8-11 */
 	REST_2GPRS(12, r7)			/* GPR12-13 */
@@ -418,6 +415,31 @@
 	mtspr	SPRN_DSCR, r5
 	mtspr	SPRN_PPR, r6
 
+	/* Do final sanity check on TEXASR to make sure FS is set.  Do this
+	 * here before we load up the userspace r1 so any bugs we hit will get
+	 * a call chain */
+	mfspr	r5, SPRN_TEXASR
+	srdi	r5, r5, 16
+	li	r6, (TEXASR_FS)@h
+	and	r6, r6, r5
+1:	tdeqi	r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Do final sanity check on MSR to make sure we are not transactional
+	 * or suspended
+	 */
+	mfmsr   r6
+	li	r5, (MSR_TS_MASK)@higher
+	srdi	r6, r6, 32
+	and	r6, r6, r5
+1:	tdnei   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Restore CR */
+	ld	r6, _CCR(r7)
+	mtcr    r6
+
+	REST_GPR(1, r7)				/* GPR1 */
 	REST_GPR(5, r7)				/* GPR5-7 */
 	REST_GPR(6, r7)
 	ld	r7, GPR7(r7)
@@ -448,11 +470,10 @@
 	ld	r0, 16(r1)
 	mtcr	r4
 	mtlr	r0
-	ld	r2, 40(r1)
+	ld	r2, STK_GOT(r1)
 
-	/* Load system default DSCR */
-	ld	r4, DSCR_DEFAULT@toc(r2)
-	ld	r0, 0(r4)
+	/* Load CPU's default DSCR */
+	ld	r0, PACA_DSCR(r13)
 	mtspr	SPRN_DSCR, r0
 
 	blr

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1bd7ca2..239f1cd 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c

@@ -295,6 +295,8 @@
 {
 	long handled = 0;
 
+	__get_cpu_var(irq_stat).mce_exceptions++;
+
 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;

diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a158375..b7aa072 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c

@@ -62,8 +62,6 @@
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
-	udbg_init_wsp();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
 	/* In memory console */
 	udbg_init_memcons();

diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 75702e2..6e7c492 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c

@@ -296,14 +296,3 @@
 }
 
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
-
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-
-void __init udbg_init_wsp(void)
-{
-	udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1);
-	udbg_uart_setup(57600, 50000000);
-}
-
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index aba05bb..7a12edb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c

@@ -1236,7 +1236,7 @@
 	int core;
 	struct kvmppc_vcore *vcore;
 
-	core = id / threads_per_core;
+	core = id / threads_per_subcore;
 	if (core >= KVM_MAX_VCORES)
 		goto out;
 
@@ -1286,7 +1286,7 @@
 			init_waitqueue_head(&vcore->wq);
 			vcore->preempt_tb = TB_NIL;
 			vcore->lpcr = kvm->arch.lpcr;
-			vcore->first_vcpuid = core * threads_per_core;
+			vcore->first_vcpuid = core * threads_per_subcore;
 			vcore->kvm = kvm;
 		}
 		kvm->arch.vcores[core] = vcore;
@@ -1476,16 +1476,19 @@
 static int on_primary_thread(void)
 {
 	int cpu = smp_processor_id();
-	int thr = cpu_thread_in_core(cpu);
+	int thr;
 
-	if (thr)
+	/* Are we on a primary subcore? */
+	if (cpu_thread_in_subcore(cpu))
 		return 0;
-	while (++thr < threads_per_core)
+
+	thr = 0;
+	while (++thr < threads_per_subcore)
 		if (cpu_online(cpu + thr))
 			return 0;
 
 	/* Grab all hw threads so they can't go into the kernel */
-	for (thr = 1; thr < threads_per_core; ++thr) {
+	for (thr = 1; thr < threads_per_subcore; ++thr) {
 		if (kvmppc_grab_hwthread(cpu + thr)) {
 			/* Couldn't grab one; let the others go */
 			do {
@@ -1544,15 +1547,18 @@
 	}
 
 	/*
-	 * Make sure we are running on thread 0, and that
-	 * secondary threads are offline.
+	 * Make sure we are running on primary threads, and that secondary
+	 * threads are offline.  Also check if the number of threads in this
+	 * guest are greater than the current system threads per guest.
 	 */
-	if (threads_per_core > 1 && !on_primary_thread()) {
+	if ((threads_per_core > 1) &&
+	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
 		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 			vcpu->arch.ret = -EBUSY;
 		goto out;
 	}
 
+
 	vc->pcpu = smp_processor_id();
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
@@ -1580,7 +1586,7 @@
 	/* wait for secondary threads to finish writing their state to memory */
 	if (vc->nap_count < vc->n_woken)
 		kvmppc_wait_for_nap(vc);
-	for (i = 0; i < threads_per_core; ++i)
+	for (i = 0; i < threads_per_subcore; ++i)
 		kvmppc_release_hwthread(vc->pcpu + i);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
@@ -2305,10 +2311,10 @@
 	spin_lock_init(&kvm->arch.slot_phys_lock);
 
 	/*
-	 * Don't allow secondary CPU threads to come online
-	 * while any KVM VMs exist.
+	 * Track that we now have a HV mode VM active. This blocks secondary
+	 * CPU threads from coming online.
 	 */
-	inhibit_secondary_onlining();
+	kvm_hv_vm_activated();
 
 	return 0;
 }
@@ -2324,7 +2330,7 @@
 
 static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
-	uninhibit_secondary_onlining();
+	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
 	if (kvm->arch.rma) {

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 8cd0dae..7cde8a6 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c

@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/cpu.h>
 #include <linux/kvm_host.h>
 #include <linux/preempt.h>
 #include <linux/export.h>
@@ -181,3 +182,33 @@
 		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
 }
+
+/*
+ * When running HV mode KVM we need to block certain operations while KVM VMs
+ * exist in the system. We use a counter of VMs to track this.
+ *
+ * One of the operations we need to block is onlining of secondaries, so we
+ * protect hv_vm_count with get/put_online_cpus().
+ */
+static atomic_t hv_vm_count;
+
+void kvm_hv_vm_activated(void)
+{
+	get_online_cpus();
+	atomic_inc(&hv_vm_count);
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
+
+void kvm_hv_vm_deactivated(void)
+{
+	get_online_cpus();
+	atomic_dec(&hv_vm_count);
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
+
+bool kvm_hv_mode_active(void)
+{
+	return atomic_read(&hv_vm_count) != 0;
+}

diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index e18e3cf..8c86422 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S

@@ -171,7 +171,7 @@
 #endif /* CONFIG_SMP */
 
 	/* Jump to partition switch code */
-	bl	.kvmppc_hv_entry_trampoline
+	bl	kvmppc_hv_entry_trampoline
 	nop
 
 /*

diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 768a9f9..3a5c568 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c

@@ -113,10 +113,8 @@
 	 * We assume that if the condition is recovered then linux host
 	 * will have generated an error log event that we will pick
 	 * up and log later.
-	 * Don't release mce event now. In case if condition is not
-	 * recovered we do guest exit and go back to linux host machine
-	 * check handler. Hence we need make sure that current mce event
-	 * is available for linux host to consume.
+	 * Don't release mce event now. We will queue up the event so that
+	 * we can log the MCE event info on host console.
 	 */
 	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
 		goto out;
@@ -128,11 +126,12 @@
 
 out:
 	/*
-	 * If we have handled the error, then release the mce event because
-	 * we will be delivering machine check to guest.
+	 * We are now going enter guest either through machine check
+	 * interrupt (for unhandled errors) or will continue from
+	 * current HSRR0 (for handled errors) in guest. Hence
+	 * queue up the event so that we can log it from host console later.
 	 */
-	if (handled)
-		release_mce_event();
+	machine_check_queue_event();
 
 	return handled;
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9747934..868347e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -292,8 +292,7 @@
 	beq	kvm_no_guest
 
 	/* Set HSTATE_DSCR(r13) to something sensible */
-	LOAD_REG_ADDR(r6, dscr_default)
-	ld	r6, 0(r6)
+	ld	r6, PACA_DSCR(r13)
 	std	r6, HSTATE_DSCR(r13)
 
 	bl	kvmppc_hv_entry
@@ -1799,7 +1798,7 @@
 	/* Search the hash table. */
 	mr	r3, r9			/* vcpu pointer */
 	li	r7, 1			/* data fault */
-	bl	.kvmppc_hpte_hv_fault
+	bl	kvmppc_hpte_hv_fault
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	ld	r10, VCPU_PC(r9)
 	ld	r11, VCPU_MSR(r9)
@@ -1873,7 +1872,7 @@
 	mr	r4, r10
 	mr	r6, r11
 	li	r7, 0			/* instruction fault */
-	bl	.kvmppc_hpte_hv_fault
+	bl	kvmppc_hpte_hv_fault
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	ld	r10, VCPU_PC(r9)
 	ld	r11, VCPU_MSR(r9)
@@ -1947,16 +1946,16 @@
 	.globl	hcall_real_table
 hcall_real_table:
 	.long	0		/* 0 - unused */
-	.long	.kvmppc_h_remove - hcall_real_table
-	.long	.kvmppc_h_enter - hcall_real_table
-	.long	.kvmppc_h_read - hcall_real_table
+	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
 	.long	0		/* 0x10 - H_CLEAR_MOD */
 	.long	0		/* 0x14 - H_CLEAR_REF */
-	.long	.kvmppc_h_protect - hcall_real_table
-	.long	.kvmppc_h_get_tce - hcall_real_table
-	.long	.kvmppc_h_put_tce - hcall_real_table
+	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table
 	.long	0		/* 0x24 - H_SET_SPRG0 */
-	.long	.kvmppc_h_set_dabr - hcall_real_table
+	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
 	.long	0		/* 0x2c */
 	.long	0		/* 0x30 */
 	.long	0		/* 0x34 */
@@ -1972,11 +1971,11 @@
 	.long	0		/* 0x5c */
 	.long	0		/* 0x60 */
 #ifdef CONFIG_KVM_XICS
-	.long	.kvmppc_rm_h_eoi - hcall_real_table
-	.long	.kvmppc_rm_h_cppr - hcall_real_table
-	.long	.kvmppc_rm_h_ipi - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
 	.long	0		/* 0x70 - H_IPOLL */
-	.long	.kvmppc_rm_h_xirr - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
 #else
 	.long	0		/* 0x64 - H_EOI */
 	.long	0		/* 0x68 - H_CPPR */
@@ -2010,7 +2009,7 @@
 	.long	0		/* 0xd4 */
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
-	.long	.kvmppc_h_cede - hcall_real_table
+	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
 	.long	0		/* 0xe4 */
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
@@ -2027,11 +2026,11 @@
 	.long	0		/* 0x118 */
 	.long	0		/* 0x11c */
 	.long	0		/* 0x120 */
-	.long	.kvmppc_h_bulk_remove - hcall_real_table
+	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
 	.long	0		/* 0x128 */
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
-	.long	.kvmppc_h_set_xdabr - hcall_real_table
+	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
 hcall_real_table_end:
 
 ignore_hdec:
@@ -2256,17 +2255,30 @@
 	/* Try to handle a machine check in real mode */
 machine_check_realmode:
 	mr	r3, r9		/* get vcpu pointer */
-	bl	.kvmppc_realmode_machine_check
+	bl	kvmppc_realmode_machine_check
 	nop
-	cmpdi	r3, 0		/* continue exiting from guest? */
+	cmpdi	r3, 0		/* Did we handle MCE ? */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	beq	mc_cont
+	/*
+	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
+	 * machine check interrupt (set HSRR0 to 0x200). And for handled
+	 * errors (no-fatal), just go back to guest execution with current
+	 * HSRR0 instead of exiting guest. This new approach will inject
+	 * machine check to guest for fatal error causing guest to crash.
+	 *
+	 * The old code used to return to host for unhandled errors which
+	 * was causing guest to hang with soft lockups inside guest and
+	 * makes it difficult to recover guest instance.
+	 */
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	bne	2f	/* Continue guest execution. */
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 	ld	r11, VCPU_MSR(r9)
 	bl	kvmppc_msr_interrupt
-	b	fast_interrupt_c_return
+2:	b	fast_interrupt_c_return
 
 /*
  * Check the reason we woke from nap, and take appropriate action.

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bab20f4..61c738a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c

@@ -426,7 +426,7 @@
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_SMT:
 		if (hv_enabled)
-			r = threads_per_core;
+			r = threads_per_subcore;
 		else
 			r = 0;
 		break;

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 95a20e1..59fa2de 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile

@@ -23,9 +23,7 @@
 obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o
 endif
 
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
 obj-$(CONFIG_PPC64)		+= memcpy_power7.o memcpy_64.o 
-endif
 
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 

diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 9f9434a..a3c4dc4 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S

@@ -16,11 +16,11 @@
         .tc             ppc64_caches[TC],ppc64_caches
         .section        ".text"
 
-_GLOBAL(copy_page)
+_GLOBAL_TOC(copy_page)
 BEGIN_FTR_SECTION
 	lis	r5,PAGE_SIZE@h
 FTR_SECTION_ELSE
-	b	.copypage_power7
+	b	copypage_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION

diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index 395c594..d7dafb3 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S

@@ -56,15 +56,15 @@
 
 #ifdef CONFIG_ALTIVEC
 	mflr	r0
-	std	r3,48(r1)
-	std	r4,56(r1)
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	.enter_vmx_copy
+	bl	enter_vmx_copy
 	cmpwi	r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STACKFRAMESIZE+48(r1)
-	ld	r4,STACKFRAMESIZE+56(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
 	mtlr	r0
 
 	li	r0,(PAGE_SIZE/128)
@@ -103,7 +103,7 @@
 	addi	r3,r3,128
 	bdnz	1b
 
-	b	.exit_vmx_copy		/* tail call optimise */
+	b	exit_vmx_copy		/* tail call optimise */
 
 #else
 	li	r0,(PAGE_SIZE/128)

diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 596a285..0860ee4 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S

@@ -18,7 +18,7 @@
 #endif
 
 	.align	7
-_GLOBAL(__copy_tofrom_user)
+_GLOBAL_TOC(__copy_tofrom_user)
 BEGIN_FTR_SECTION
 	nop
 FTR_SECTION_ELSE

diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index e8e9c36..c46c876 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S

@@ -66,7 +66,7 @@
 	ld	r15,STK_REG(R15)(r1)
 	ld	r14,STK_REG(R14)(r1)
 .Ldo_err3:
-	bl	.exit_vmx_usercopy
+	bl	exit_vmx_usercopy
 	ld	r0,STACKFRAMESIZE+16(r1)
 	mtlr	r0
 	b	.Lexit
@@ -85,9 +85,9 @@
 .Lexit:
 	addi	r1,r1,STACKFRAMESIZE
 .Ldo_err1:
-	ld	r3,48(r1)
-	ld	r4,56(r1)
-	ld	r5,64(r1)
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 	b	__copy_tofrom_user_base
 
 
@@ -96,18 +96,18 @@
 	cmpldi	r5,16
 	cmpldi	cr1,r5,4096
 
-	std	r3,48(r1)
-	std	r4,56(r1)
-	std	r5,64(r1)
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 
 	blt	.Lshort_copy
 	bgt	cr1,.Lvmx_copy
 #else
 	cmpldi	r5,16
 
-	std	r3,48(r1)
-	std	r4,56(r1)
-	std	r5,64(r1)
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 
 	blt	.Lshort_copy
 #endif
@@ -295,12 +295,12 @@
 	mflr	r0
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	.enter_vmx_usercopy
+	bl	enter_vmx_usercopy
 	cmpwi	cr1,r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STACKFRAMESIZE+48(r1)
-	ld	r4,STACKFRAMESIZE+56(r1)
-	ld	r5,STACKFRAMESIZE+64(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
+	ld	r5,STK_REG(R29)(r1)
 	mtlr	r0
 
 	/*
@@ -514,7 +514,7 @@
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	.exit_vmx_usercopy	/* tail call optimise */
+	b	exit_vmx_usercopy	/* tail call optimise */
 
 .Lvmx_unaligned_copy:
 	/* Get the destination 16B aligned */
@@ -717,5 +717,5 @@
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	.exit_vmx_usercopy	/* tail call optimise */
+	b	exit_vmx_usercopy	/* tail call optimise */
 #endif /* CONFiG_ALTIVEC */

diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 9b96ff2..19e6600 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S

@@ -24,7 +24,7 @@
 
 _GLOBAL(__arch_hweight8)
 BEGIN_FTR_SECTION
-	b .__sw_hweight8
+	b __sw_hweight8
 	nop
 	nop
 FTR_SECTION_ELSE
@@ -35,7 +35,7 @@
 
 _GLOBAL(__arch_hweight16)
 BEGIN_FTR_SECTION
-	b .__sw_hweight16
+	b __sw_hweight16
 	nop
 	nop
 	nop
@@ -57,7 +57,7 @@
 
 _GLOBAL(__arch_hweight32)
 BEGIN_FTR_SECTION
-	b .__sw_hweight32
+	b __sw_hweight32
 	nop
 	nop
 	nop
@@ -82,7 +82,7 @@
 
 _GLOBAL(__arch_hweight64)
 BEGIN_FTR_SECTION
-	b .__sw_hweight64
+	b __sw_hweight64
 	nop
 	nop
 	nop

diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index f4fcb0b..0738f96 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S

@@ -79,8 +79,8 @@
 
 _GLOBAL(memmove)
 	cmplw	0,r3,r4
-	bgt	.backwards_memcpy
-	b	.memcpy
+	bgt	backwards_memcpy
+	b	memcpy
 
 _GLOBAL(backwards_memcpy)
 	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */

diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 72ad055..32a06ec 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S

@@ -10,14 +10,29 @@
 #include <asm/ppc_asm.h>
 
 	.align	7
-_GLOBAL(memcpy)
+_GLOBAL_TOC(memcpy)
 BEGIN_FTR_SECTION
-	std	r3,48(r1)	/* save destination pointer for return value */
+#ifdef __LITTLE_ENDIAN__
+	cmpdi	cr7,r5,0
+#else
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
+#endif
 FTR_SECTION_ELSE
 #ifndef SELFTEST
 	b	memcpy_power7
 #endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+	/* dumb little-endian memcpy that will get replaced at runtime */
+	addi r9,r3,-1
+	addi r4,r4,-1
+	beqlr cr7
+	mtctr r5
+1:	lbzu r10,1(r4)
+	stbu r10,1(r9)
+	bdnz 1b
+	blr
+#else
 	PPC_MTOCRF(0x01,r5)
 	cmpldi	cr1,r5,16
 	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
@@ -73,7 +88,7 @@
 2:	bf	cr7*4+3,3f
 	lbz	r9,8(r4)
 	stb	r9,0(r3)
-3:	ld	r3,48(r1)	/* return dest pointer */
+3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
 	blr
 
 .Lsrc_unaligned:
@@ -156,7 +171,7 @@
 2:	bf	cr7*4+3,3f
 	rotldi	r9,r9,8
 	stb	r9,0(r3)
-3:	ld	r3,48(r1)	/* return dest pointer */
+3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
 	blr
 
 .Ldst_unaligned:
@@ -201,5 +216,6 @@
 3:	bf	cr7*4+3,4f
 	lbz	r0,0(r4)
 	stb	r0,0(r3)
-4:	ld	r3,48(r1)	/* return dest pointer */
+4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
 	blr
+#endif

diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index e4177db..2ff5c14 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S

@@ -33,14 +33,14 @@
 	cmpldi	r5,16
 	cmpldi	cr1,r5,4096
 
-	std	r3,48(r1)
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 
 	blt	.Lshort_copy
 	bgt	cr1,.Lvmx_copy
 #else
 	cmpldi	r5,16
 
-	std	r3,48(r1)
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 
 	blt	.Lshort_copy
 #endif
@@ -216,7 +216,7 @@
 	lbz	r0,0(r4)
 	stb	r0,0(r3)
 
-15:	ld	r3,48(r1)
+15:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 	blr
 
 .Lunwind_stack_nonvmx_copy:
@@ -226,16 +226,16 @@
 #ifdef CONFIG_ALTIVEC
 .Lvmx_copy:
 	mflr	r0
-	std	r4,56(r1)
-	std	r5,64(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	.enter_vmx_copy
+	bl	enter_vmx_copy
 	cmpwi	cr1,r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STACKFRAMESIZE+48(r1)
-	ld	r4,STACKFRAMESIZE+56(r1)
-	ld	r5,STACKFRAMESIZE+64(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
+	ld	r5,STK_REG(R29)(r1)
 	mtlr	r0
 
 	/*
@@ -447,8 +447,8 @@
 	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	ld	r3,48(r1)
-	b	.exit_vmx_copy		/* tail call optimise */
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	b	exit_vmx_copy		/* tail call optimise */
 
 .Lvmx_unaligned_copy:
 	/* Get the destination 16B aligned */
@@ -651,6 +651,6 @@
 	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	ld	r3,48(r1)
-	b	.exit_vmx_copy		/* tail call optimise */
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	b	exit_vmx_copy		/* tail call optimise */
 #endif /* CONFiG_ALTIVEC */

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c0511c2..412dd46 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c

@@ -1470,7 +1470,7 @@
 				regs->gpr[rd] = byterev_4(val);
 			goto ldst_done;
 
-#ifdef CONFIG_PPC_CPU
+#ifdef CONFIG_PPC_FPU
 		case 535:	/* lfsx */
 		case 567:	/* lfsux */
 			if (!(regs->msr & MSR_FP))

diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 3b1e480..7bd9549 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S

@@ -77,7 +77,7 @@
 	mr	r3,r4
 	blr
 
-_GLOBAL(__clear_user)
+_GLOBAL_TOC(__clear_user)
 	cmpdi	r4,32
 	neg	r6,r3
 	li	r0,0

diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 1136d26..057cbbb 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S

@@ -159,7 +159,7 @@
 BEGIN_FTR_SECTION
 	mr	r4,r30
 	mr	r5,r7
-	bl	.hash_page_do_lazy_icache
+	bl	hash_page_do_lazy_icache
 END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 	/* At this point, r3 contains new PP bits, save them in
@@ -201,7 +201,8 @@
 	li	r8,MMU_PAGE_4K		/* page size */
 	li	r9,MMU_PAGE_4K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(htab_call_hpte_insert1)
+.globl htab_call_hpte_insert1
+htab_call_hpte_insert1:
 	bl	.			/* Patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge	htab_pte_insert_ok	/* Insertion successful */
@@ -225,7 +226,8 @@
 	li	r8,MMU_PAGE_4K		/* page size */
 	li	r9,MMU_PAGE_4K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(htab_call_hpte_insert2)
+.globl htab_call_hpte_insert2
+htab_call_hpte_insert2:
 	bl	.			/* Patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge+	htab_pte_insert_ok	/* Insertion successful */
@@ -242,7 +244,8 @@
 2:	and	r0,r5,r27
 	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	
 	/* Call ppc_md.hpte_remove */
-_GLOBAL(htab_call_hpte_remove)
+.globl htab_call_hpte_remove
+htab_call_hpte_remove:
 	bl	.			/* Patched by htab_finish_init() */
 
 	/* Try all again */
@@ -296,7 +299,8 @@
 	li	r7,MMU_PAGE_4K		/* actual page size */
 	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */
-_GLOBAL(htab_call_hpte_updatepp)
+.globl htab_call_hpte_updatepp
+htab_call_hpte_updatepp:
 	bl	.			/* Patched by htab_finish_init() */
 
 	/* if we failed because typically the HPTE wasn't really here
@@ -471,7 +475,7 @@
 BEGIN_FTR_SECTION
 	mr	r4,r30
 	mr	r5,r7
-	bl	.hash_page_do_lazy_icache
+	bl	hash_page_do_lazy_icache
 END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 	/* At this point, r3 contains new PP bits, save them in
@@ -526,7 +530,8 @@
 	li	r8,MMU_PAGE_4K		/* page size */
 	li	r9,MMU_PAGE_4K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(htab_call_hpte_insert1)
+.globl htab_call_hpte_insert1
+htab_call_hpte_insert1:
 	bl	.			/* patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge	htab_pte_insert_ok	/* Insertion successful */
@@ -554,7 +559,8 @@
 	li	r8,MMU_PAGE_4K		/* page size */
 	li	r9,MMU_PAGE_4K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(htab_call_hpte_insert2)
+.globl htab_call_hpte_insert2
+htab_call_hpte_insert2:
 	bl	.			/* patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge+	htab_pte_insert_ok	/* Insertion successful */
@@ -571,7 +577,8 @@
 2:	and	r0,r5,r27
 	rldicr	r3,r0,3,63-3		/* r0 = (hash & mask) << 3 */
 	/* Call ppc_md.hpte_remove */
-_GLOBAL(htab_call_hpte_remove)
+.globl htab_call_hpte_remove
+htab_call_hpte_remove:
 	bl	.			/* patched by htab_finish_init() */
 
 	/* Try all again */
@@ -588,7 +595,7 @@
 	li	r6,MMU_PAGE_64K		/* psize */
 	ld	r7,STK_PARAM(R9)(r1)	/* ssize */
 	ld	r8,STK_PARAM(R8)(r1)	/* local */
-	bl	.flush_hash_page
+	bl	flush_hash_page
 	/* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
 	lis	r0,_PAGE_HPTE_SUB@h
 	ori	r0,r0,_PAGE_HPTE_SUB@l
@@ -660,7 +667,8 @@
 	li	r7,MMU_PAGE_4K		/* actual page size */
 	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */
-_GLOBAL(htab_call_hpte_updatepp)
+.globl htab_call_hpte_updatepp
+htab_call_hpte_updatepp:
 	bl	.			/* patched by htab_finish_init() */
 
 	/* if we failed because typically the HPTE wasn't really here
@@ -812,7 +820,7 @@
 BEGIN_FTR_SECTION
 	mr	r4,r30
 	mr	r5,r7
-	bl	.hash_page_do_lazy_icache
+	bl	hash_page_do_lazy_icache
 END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 	/* At this point, r3 contains new PP bits, save them in
@@ -857,7 +865,8 @@
 	li	r8,MMU_PAGE_64K
 	li	r9,MMU_PAGE_64K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(ht64_call_hpte_insert1)
+.globl ht64_call_hpte_insert1
+ht64_call_hpte_insert1:
 	bl	.			/* patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge	ht64_pte_insert_ok	/* Insertion successful */
@@ -881,7 +890,8 @@
 	li	r8,MMU_PAGE_64K
 	li	r9,MMU_PAGE_64K		/* actual page size */
 	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-_GLOBAL(ht64_call_hpte_insert2)
+.globl ht64_call_hpte_insert2
+ht64_call_hpte_insert2:
 	bl	.			/* patched by htab_finish_init() */
 	cmpdi	0,r3,0
 	bge+	ht64_pte_insert_ok	/* Insertion successful */
@@ -898,7 +908,8 @@
 2:	and	r0,r5,r27
 	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
 	/* Call ppc_md.hpte_remove */
-_GLOBAL(ht64_call_hpte_remove)
+.globl ht64_call_hpte_remove
+ht64_call_hpte_remove:
 	bl	.			/* patched by htab_finish_init() */
 
 	/* Try all again */
@@ -952,7 +963,8 @@
 	li	r7,MMU_PAGE_64K		/* actual page size */
 	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */
-_GLOBAL(ht64_call_hpte_updatepp)
+.globl ht64_call_hpte_updatepp
+ht64_call_hpte_updatepp:
 	bl	.			/* patched by htab_finish_init() */
 
 	/* if we failed because typically the HPTE wasn't really here

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 350aa58..88fdd9d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c

@@ -449,6 +449,24 @@
 			mmu_psize_defs[bpsize].penc[apsize] = -1;
 }
 
+#ifdef CONFIG_PPC_64K_PAGES
+
+static bool might_have_hea(void)
+{
+	/*
+	 * The HEA ethernet adapter requires awareness of the
+	 * GX bus. Without that awareness we can easily assume
+	 * we will never see an HEA ethernet device.
+	 */
+#ifdef CONFIG_IBMEBUS
+	return !cpu_has_feature(CPU_FTR_ARCH_207S);
+#else
+	return false;
+#endif
+}
+
+#endif /* #ifdef CONFIG_PPC_64K_PAGES */
+
 static void __init htab_init_page_sizes(void)
 {
 	int rc;
@@ -503,10 +521,11 @@
 			mmu_linear_psize = MMU_PAGE_64K;
 		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
 			/*
-			 * Don't use 64k pages for ioremap on pSeries, since
-			 * that would stop us accessing the HEA ethernet.
+			 * When running on pSeries using 64k pages for ioremap
+			 * would stop us accessing the HEA ethernet. So if we
+			 * have the chance of ever seeing one, stay at 4k.
 			 */
-			if (!machine_is(pseries))
+			if (!might_have_hea() || !machine_is(pseries))
 				mmu_io_psize = MMU_PAGE_64K;
 		} else
 			mmu_ci_restrictions = 1;
@@ -607,47 +626,43 @@
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-#define FUNCTION_TEXT(A)	((*(unsigned long *)(A)))
+extern u32 htab_call_hpte_insert1[];
+extern u32 htab_call_hpte_insert2[];
+extern u32 htab_call_hpte_remove[];
+extern u32 htab_call_hpte_updatepp[];
+extern u32 ht64_call_hpte_insert1[];
+extern u32 ht64_call_hpte_insert2[];
+extern u32 ht64_call_hpte_remove[];
+extern u32 ht64_call_hpte_updatepp[];
 
 static void __init htab_finish_init(void)
 {
-	extern unsigned int *htab_call_hpte_insert1;
-	extern unsigned int *htab_call_hpte_insert2;
-	extern unsigned int *htab_call_hpte_remove;
-	extern unsigned int *htab_call_hpte_updatepp;
-
 #ifdef CONFIG_PPC_HAS_HASH_64K
-	extern unsigned int *ht64_call_hpte_insert1;
-	extern unsigned int *ht64_call_hpte_insert2;
-	extern unsigned int *ht64_call_hpte_remove;
-	extern unsigned int *ht64_call_hpte_updatepp;
-
 	patch_branch(ht64_call_hpte_insert1,
-		FUNCTION_TEXT(ppc_md.hpte_insert),
+		ppc_function_entry(ppc_md.hpte_insert),
 		BRANCH_SET_LINK);
 	patch_branch(ht64_call_hpte_insert2,
-		FUNCTION_TEXT(ppc_md.hpte_insert),
+		ppc_function_entry(ppc_md.hpte_insert),
 		BRANCH_SET_LINK);
 	patch_branch(ht64_call_hpte_remove,
-		FUNCTION_TEXT(ppc_md.hpte_remove),
+		ppc_function_entry(ppc_md.hpte_remove),
 		BRANCH_SET_LINK);
 	patch_branch(ht64_call_hpte_updatepp,
-		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		ppc_function_entry(ppc_md.hpte_updatepp),
 		BRANCH_SET_LINK);
-
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 
 	patch_branch(htab_call_hpte_insert1,
-		FUNCTION_TEXT(ppc_md.hpte_insert),
+		ppc_function_entry(ppc_md.hpte_insert),
 		BRANCH_SET_LINK);
 	patch_branch(htab_call_hpte_insert2,
-		FUNCTION_TEXT(ppc_md.hpte_insert),
+		ppc_function_entry(ppc_md.hpte_insert),
 		BRANCH_SET_LINK);
 	patch_branch(htab_call_hpte_remove,
-		FUNCTION_TEXT(ppc_md.hpte_remove),
+		ppc_function_entry(ppc_md.hpte_remove),
 		BRANCH_SET_LINK);
 	patch_branch(htab_call_hpte_updatepp,
-		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		ppc_function_entry(ppc_md.hpte_updatepp),
 		BRANCH_SET_LINK);
 }
 
@@ -964,6 +979,22 @@
 		trap, vsid, ssize, psize, lpsize, pte);
 }
 
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+			     int psize, bool user_region)
+{
+	if (user_region) {
+		if (psize != get_paca_psize(ea)) {
+			get_paca()->context = mm->context;
+			slb_flush_and_rebolt();
+		}
+	} else if (get_paca()->vmalloc_sllp !=
+		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+		get_paca()->vmalloc_sllp =
+			mmu_psize_defs[mmu_vmalloc_psize].sllp;
+		slb_vmalloc_update();
+	}
+}
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -1085,6 +1116,8 @@
 			WARN_ON(1);
 		}
 #endif
+		check_paca_psize(ea, mm, psize, user_region);
+
 		goto bail;
 	}
 
@@ -1125,17 +1158,8 @@
 #endif
 		}
 	}
-	if (user_region) {
-		if (psize != get_paca_psize(ea)) {
-			get_paca()->context = mm->context;
-			slb_flush_and_rebolt();
-		}
-	} else if (get_paca()->vmalloc_sllp !=
-		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
-		get_paca()->vmalloc_sllp =
-			mmu_psize_defs[mmu_vmalloc_psize].sllp;
-		slb_vmalloc_update();
-	}
+
+	check_paca_psize(ea, mm, psize, user_region);
 #endif /* CONFIG_PPC_64K_PAGES */
 
 #ifdef CONFIG_PPC_HAS_HASH_64K

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 964a5f6..0399a67 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c

@@ -256,10 +256,14 @@
 	patch_instruction(insn_addr, insn);
 }
 
+extern u32 slb_compare_rr_to_size[];
+extern u32 slb_miss_kernel_load_linear[];
+extern u32 slb_miss_kernel_load_io[];
+extern u32 slb_compare_rr_to_size[];
+extern u32 slb_miss_kernel_load_vmemmap[];
+
 void slb_set_size(u16 size)
 {
-	extern unsigned int *slb_compare_rr_to_size;
-
 	if (mmu_slb_size == size)
 		return;
 
@@ -272,11 +276,7 @@
 	unsigned long linear_llp, vmalloc_llp, io_llp;
 	unsigned long lflags, vflags;
 	static int slb_encoding_inited;
-	extern unsigned int *slb_miss_kernel_load_linear;
-	extern unsigned int *slb_miss_kernel_load_io;
-	extern unsigned int *slb_compare_rr_to_size;
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-	extern unsigned int *slb_miss_kernel_load_vmemmap;
 	unsigned long vmemmap_llp;
 #endif
 

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 17aa6df..736d18b 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S

@@ -35,7 +35,7 @@
 	 * check for bad kernel/user address
 	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
 	 */
-	rldicr. r9,r3,4,(63 - 46 - 4)
+	rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
 	bne-	8f
 
 	srdi	r9,r3,60		/* get region */
@@ -59,7 +59,8 @@
 	/* Linear mapping encoding bits, the "li" instruction below will
 	 * be patched by the kernel at boot
 	 */
-_GLOBAL(slb_miss_kernel_load_linear)
+.globl slb_miss_kernel_load_linear
+slb_miss_kernel_load_linear:
 	li	r11,0
 	/*
 	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
@@ -79,7 +80,8 @@
 	/* Check virtual memmap region. To be patches at kernel boot */
 	cmpldi	cr0,r9,0xf
 	bne	1f
-_GLOBAL(slb_miss_kernel_load_vmemmap)
+.globl slb_miss_kernel_load_vmemmap
+slb_miss_kernel_load_vmemmap:
 	li	r11,0
 	b	6f
 1:
@@ -95,7 +97,8 @@
 	b	6f
 5:
 	/* IO mapping */
-	_GLOBAL(slb_miss_kernel_load_io)
+.globl slb_miss_kernel_load_io
+slb_miss_kernel_load_io:
 	li	r11,0
 6:
 	/*
@@ -250,7 +253,8 @@
 7:	ld	r10,PACASTABRR(r13)
 	addi	r10,r10,1
 	/* This gets soft patched on boot. */
-_GLOBAL(slb_compare_rr_to_size)
+.globl slb_compare_rr_to_size
+slb_compare_rr_to_size:
 	cmpldi	r10,0
 
 	blt+	4f

diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ae3d5b7..92cb18d 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c

@@ -596,8 +596,13 @@
 	/* XXX This should be decided at runtime based on supported
 	 * page sizes in the TLB, but for now let's assume 16M is
 	 * always there and a good fit (which it probably is)
+	 *
+	 * Freescale booke only supports 4K pages in TLB0, so use that.
 	 */
-	mmu_vmemmap_psize = MMU_PAGE_16M;
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		mmu_vmemmap_psize = MMU_PAGE_4K;
+	else
+		mmu_vmemmap_psize = MMU_PAGE_16M;
 
 	/* XXX This code only checks for TLB 0 capabilities and doesn't
 	 *     check what page size combos are supported by the HW. It

diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index e76eba7..8f87d92 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S

@@ -78,7 +78,7 @@
 	blr
 
 /*
- * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
+ * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
  * r_addr is the offset value
  */
 	.globl sk_load_byte_msh

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 808ce1c..6dcdade 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c

@@ -79,19 +79,11 @@
 	}
 
 	switch (filter[0].code) {
-	case BPF_S_RET_K:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
+	case BPF_RET | BPF_K:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		/* first instruction sets A register (or is RET 'constant') */
 		break;
 	default:
@@ -144,6 +136,7 @@
 
 	for (i = 0; i < flen; i++) {
 		unsigned int K = filter[i].k;
+		u16 code = bpf_anc_helper(&filter[i]);
 
 		/*
 		 * addrs[] maps a BPF bytecode address into a real offset from
@@ -151,35 +144,35 @@
 		 */
 		addrs[i] = ctx->idx * 4;
 
-		switch (filter[i].code) {
+		switch (code) {
 			/*** ALU ops ***/
-		case BPF_S_ALU_ADD_X: /* A += X; */
+		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_ADD(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_ADD_K: /* A += K; */
+		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(K));
 			break;
-		case BPF_S_ALU_SUB_X: /* A -= X; */
+		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SUB(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_SUB_K: /* A -= K */
+		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(-K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
 			break;
-		case BPF_S_ALU_MUL_X: /* A *= X; */
+		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_MUL(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_MUL_K: /* A *= K */
+		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 			if (K < 32768)
 				PPC_MULI(r_A, r_A, K);
 			else {
@@ -187,7 +180,7 @@
 				PPC_MUL(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_MOD_X: /* A %= X; */
+		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -201,13 +194,13 @@
 			PPC_MUL(r_scratch1, r_X, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_MOD_K: /* A %= K; */
+		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
 			PPC_LI32(r_scratch2, K);
 			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
 			PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_DIV_X: /* A /= X; */
+		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -223,17 +216,17 @@
 			}
 			PPC_DIVWU(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_DIV_K: /* A /= K */
+		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 			if (K == 1)
 				break;
 			PPC_LI32(r_scratch1, K);
 			PPC_DIVWU(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_AND(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			if (!IMM_H(K))
 				PPC_ANDI(r_A, r_A, K);
 			else {
@@ -241,51 +234,51 @@
 				PPC_AND(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_OR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			if (IMM_L(K))
 				PPC_ORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_ORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X: /* A ^= X */
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
 			ctx->seen |= SEEN_XREG;
 			PPC_XOR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_XOR_K: /* A ^= K */
+		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 			if (IMM_L(K))
 				PPC_XORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_XORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ALU_LSH_X: /* A <<= X; */
+		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SLW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (K == 0)
 				break;
 			else
 				PPC_SLWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_RSH_X: /* A >>= X; */
+		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SRW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_RSH_K: /* A >>= K; */
+		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 			if (K == 0)
 				break;
 			else
 				PPC_SRWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			PPC_NEG(r_A, r_A);
 			break;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			PPC_LI32(r_ret, K);
 			if (!K) {
 				if (ctx->pc_ret0 == -1)
@@ -312,7 +305,7 @@
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			PPC_MR(r_ret, r_A);
 			if (i != flen - 1) {
 				if (ctx->seen)
@@ -321,53 +314,53 @@
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_MISC_TAX: /* X = A */
+		case BPF_MISC | BPF_TAX: /* X = A */
 			PPC_MR(r_X, r_A);
 			break;
-		case BPF_S_MISC_TXA: /* A = X */
+		case BPF_MISC | BPF_TXA: /* A = X */
 			ctx->seen |= SEEN_XREG;
 			PPC_MR(r_A, r_X);
 			break;
 
 			/*** Constant loads/M[] access ***/
-		case BPF_S_LD_IMM: /* A = K */
+		case BPF_LD | BPF_IMM: /* A = K */
 			PPC_LI32(r_A, K);
 			break;
-		case BPF_S_LDX_IMM: /* X = K */
+		case BPF_LDX | BPF_IMM: /* X = K */
 			PPC_LI32(r_X, K);
 			break;
-		case BPF_S_LD_MEM: /* A = mem[K] */
+		case BPF_LD | BPF_MEM: /* A = mem[K] */
 			PPC_MR(r_A, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LDX_MEM: /* X = mem[K] */
+		case BPF_LDX | BPF_MEM: /* X = mem[K] */
 			PPC_MR(r_X, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_ST: /* mem[K] = A */
+		case BPF_ST: /* mem[K] = A */
 			PPC_MR(r_M + (K & 0xf), r_A);
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_STX: /* mem[K] = X */
+		case BPF_STX: /* mem[K] = X */
 			PPC_MR(r_M + (K & 0xf), r_X);
 			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LD_W_LEN: /*	A = skb->len; */
+		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
-		case BPF_S_LDX_W_LEN: /* X = skb->len; */
+		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
 			break;
 
 			/*** Ancillary info loads ***/
-		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  protocol) != 2);
 			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							    protocol));
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
 			PPC_CMPDI(r_scratch1, 0);
@@ -384,33 +377,33 @@
 			PPC_LWZ_OFFS(r_A, r_scratch1,
 				     offsetof(struct net_device, ifindex));
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  mark));
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  hash));
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  vlan_tci));
-			if (filter[i].code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				PPC_ANDI(r_A, r_A, VLAN_VID_MASK);
 			else
 				PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  queue_mapping));
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 #ifdef CONFIG_SMP
 			/*
 			 * PACA ptr is r13:
@@ -426,13 +419,13 @@
 			break;
 
 			/*** Absolute loads from packet header/data ***/
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 			goto common_load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 			goto common_load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 		common_load:
 			/* Load from [K]. */
@@ -449,13 +442,13 @@
 			break;
 
 			/*** Indirect loads from packet header/data ***/
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			func = sk_load_word;
 			goto common_load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			func = sk_load_half;
 			goto common_load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			func = sk_load_byte;
 		common_load_ind:
 			/*
@@ -473,31 +466,31 @@
 			PPC_BCC(COND_LT, exit_addr);
 			break;
 
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 			goto common_load;
 			break;
 
 			/*** Jump and branches ***/
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			if (K != 0)
 				PPC_JMP(addrs[i + 1 + K]);
 			break;
 
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
 			goto cond_branch;
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			true_cond = COND_NE;
 			/* Fall through */
 		cond_branch:
@@ -508,20 +501,20 @@
 				break;
 			}
 
-			switch (filter[i].code) {
-			case BPF_S_JMP_JGT_X:
-			case BPF_S_JMP_JGE_X:
-			case BPF_S_JMP_JEQ_X:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_CMPLW(r_A, r_X);
 				break;
-			case BPF_S_JMP_JSET_X:
+			case BPF_JMP | BPF_JSET | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_AND_DOT(r_scratch1, r_A, r_X);
 				break;
-			case BPF_S_JMP_JEQ_K:
-			case BPF_S_JMP_JGT_K:
-			case BPF_S_JMP_JGE_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
 				if (K < 32768)
 					PPC_CMPLWI(r_A, K);
 				else {
@@ -529,7 +522,7 @@
 					PPC_CMPLW(r_A, r_scratch1);
 				}
 				break;
-			case BPF_S_JMP_JSET_K:
+			case BPF_JMP | BPF_JSET | BPF_K:
 				if (K < 32768)
 					/* PPC_ANDI is /only/ dot-form */
 					PPC_ANDI(r_scratch1, r_A, K);

diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index dc1a264..4d88f6a 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig

@@ -199,6 +199,34 @@
 	help
 	  This option enables support for the IBM Currituck (476fpe) evaluation board
 
+config AKEBONO
+	bool "IBM Akebono (476gtr) Support"
+	depends on PPC_47x
+	default n
+	select SWIOTLB
+	select 476FPE
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_HSTA_MSI
+	select I2C
+	select I2C_IBM_IIC
+	select NETDEVICES
+	select ETHERNET
+	select NET_VENDOR_IBM
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII_WOL
+	select USB
+	select USB_OHCI_HCD_PLATFORM
+	select USB_EHCI_HCD_PLATFORM
+	select MMC_SDHCI
+	select MMC_SDHCI_PLTFM
+	select MMC_SDHCI_OF_476GTR
+	select ATA
+	select SATA_AHCI_PLATFORM
+	help
+	  This option enables support for the IBM Akebono (476gtr) evaluation board
+
+
 config ICON
 	bool "Icon"
 	depends on 44x
@@ -323,6 +351,20 @@
 	select IBM_EMAC_EMAC4
 	select IBM_EMAC_TAH
 
+config 476FPE_ERR46
+	depends on 476FPE
+	bool "Enable linker work around for PPC476FPE errata #46"
+	help
+	  This option enables a work around for an icache bug on 476
+	  that can cause execution of stale instructions when falling
+	  through pages (IBM errata #46). It requires a recent version
+	  of binutils which supports the --ppc476-workaround option.
+
+	  The work around enables the appropriate linker options and
+	  ensures that all module output sections are aligned to 4K
+	  page boundaries. The work around is only required when
+	  building modules.
+
 # 44x errata/workaround config symbols, selected by the CPU models above
 config IBM440EP_ERR42
 	bool

diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index d03833a..26d35b5 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile

@@ -10,4 +10,5 @@
 obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o
 obj-$(CONFIG_ISS4xx)	+= iss4xx.o
 obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
-obj-$(CONFIG_CURRITUCK)	+= currituck.o
+obj-$(CONFIG_CURRITUCK)	+= ppc476.o
+obj-$(CONFIG_AKEBONO)	+= ppc476.o

diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/ppc476.c
similarity index 72%
rename from arch/powerpc/platforms/44x/currituck.c
rename to arch/powerpc/platforms/44x/ppc476.c
index 7f1b71a..33986c1 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/ppc476.c

@@ -1,7 +1,8 @@
 /*
- * Currituck board specific routines
+ * PowerPC 476FPE board specific routines
  *
- * Copyright © 2011 Tony Breeds IBM Corporation
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
  *
  * Based on earlier code:
  *    Matt Porter <mporter@kernel.crashing.org>
@@ -35,8 +36,9 @@
 #include <asm/mmu.h>
 
 #include <linux/pci.h>
+#include <linux/i2c.h>
 
-static __initdata struct of_device_id ppc47x_of_bus[] = {
+static struct of_device_id ppc47x_of_bus[] __initdata = {
 	{ .compatible = "ibm,plb4", },
 	{ .compatible = "ibm,plb6", },
 	{ .compatible = "ibm,opb", },
@@ -55,15 +57,69 @@
 }
 DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
 
+/* Akebono has an AVR microcontroller attached to the I2C bus
+ * which is used to power off/reset the system. */
+
+/* AVR I2C Commands */
+#define AVR_PWRCTL_CMD (0x26)
+
+/* Flags for the power control I2C commands */
+#define AVR_PWRCTL_PWROFF (0x01)
+#define AVR_PWRCTL_RESET (0x02)
+
+static struct i2c_client *avr_i2c_client;
+static void avr_halt_system(int pwrctl_flags)
+{
+	/* Request the AVR to reset the system */
+	i2c_smbus_write_byte_data(avr_i2c_client,
+				  AVR_PWRCTL_CMD, pwrctl_flags);
+
+	/* Wait for system to be reset */
+	while (1)
+		;
+}
+
+static void avr_power_off_system(void)
+{
+	avr_halt_system(AVR_PWRCTL_PWROFF);
+}
+
+static void avr_reset_system(char *cmd)
+{
+	avr_halt_system(AVR_PWRCTL_RESET);
+}
+
+static int avr_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	avr_i2c_client = client;
+	ppc_md.restart = avr_reset_system;
+	ppc_md.power_off = avr_power_off_system;
+	return 0;
+}
+
+static const struct i2c_device_id avr_id[] = {
+	{ "akebono-avr", 0 },
+	{ }
+};
+
+static struct i2c_driver avr_driver = {
+	.driver = {
+		.name = "akebono-avr",
+	},
+	.probe = avr_probe,
+	.id_table = avr_id,
+};
+
 static int __init ppc47x_device_probe(void)
 {
+	i2c_add_driver(&avr_driver);
 	of_platform_bus_probe(NULL, ppc47x_of_bus, NULL);
 
 	return 0;
 }
 machine_device_initcall(ppc47x, ppc47x_device_probe);
 
-/* We can have either UICs or MPICs */
 static void __init ppc47x_init_irq(void)
 {
 	struct device_node *np;
@@ -157,43 +213,36 @@
 {
 
 	/* No need to check the DMA config as we /know/ our windows are all of
- 	 * RAM.  Lets hope that doesn't change */
+	 * RAM.  Lets hope that doesn't change */
 	swiotlb_detect_4g();
 
 	ppc47x_smp_init();
 }
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init ppc47x_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "ibm,currituck"))
-		return 0;
-
-	return 1;
-}
-
 static int board_rev = -1;
 static int __init ppc47x_get_board_rev(void)
 {
-	u8 fpga_reg0;
-	void *fpga;
-	struct device_node *np;
+	int reg;
+	u8 *fpga;
+	struct device_node *np = NULL;
 
-	np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+	if (of_machine_is_compatible("ibm,currituck")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+		reg = 0;
+	} else if (of_machine_is_compatible("ibm,akebono")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga");
+		reg = 2;
+	}
+
 	if (!np)
 		goto fail;
 
-	fpga = of_iomap(np, 0);
+	fpga = (u8 *) of_iomap(np, 0);
 	of_node_put(np);
 	if (!fpga)
 		goto fail;
 
-	fpga_reg0 = ioread8(fpga);
-	board_rev = fpga_reg0 & 0x03;
+	board_rev = ioread8(fpga + reg) & 0x03;
 	pr_info("%s: Found board revision %d\n", __func__, board_rev);
 	iounmap(fpga);
 	return 0;
@@ -208,7 +257,7 @@
 static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
 {
 	if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
-	                              dev->device == 0x00e0)) {
+				      dev->device == 0x00e0)) {
 		if (board_rev == 0) {
 			dev->irq = irq_create_mapping(NULL, 47);
 			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
@@ -221,13 +270,30 @@
 	}
 }
 
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ppc47x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "ibm,akebono"))
+		return 1;
+
+	if (of_flat_dt_is_compatible(root, "ibm,currituck")) {
+		ppc_md.pci_irq_fixup = ppc47x_pci_irq_fixup;
+		return 1;
+	}
+
+	return 0;
+}
+
 define_machine(ppc47x) {
 	.name			= "PowerPC 47x",
 	.probe			= ppc47x_probe,
 	.progress		= udbg_progress,
 	.init_IRQ		= ppc47x_init_irq,
 	.setup_arch		= ppc47x_setup_arch,
-	.pci_irq_fixup		= ppc47x_pci_irq_fixup,
 	.restart		= ppc4xx_reset_system,
 	.calibrate_decr		= generic_calibrate_decr,
 };

diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 0000000..9fec5d3
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds

@@ -0,0 +1,15 @@
+SECTIONS
+{
+	.text : ALIGN(4096)
+	{
+		*(.text .text.* .fixup)
+	}
+	.init.text : ALIGN(4096)
+	{
+		*(.init.text .init.text.*)
+	}
+	.exit.text : ALIGN(4096)
+	{
+		*(.exit.text .exit.text.*)
+	}
+}

diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index c17aae8..f442120 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig

@@ -38,6 +38,15 @@
 	  help
 	  This option enables support for the C293PCIE board
 
+config BSC9132_QDS
+	bool "Freescale BSC9132QDS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9132 QDS board.
+	  BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores
+	  and dual StarCore SC3850 DSP cores.
+	  Manufacturer : Freescale Semiconductor, Inc
+
 config MPC8540_ADS
 	bool "Freescale MPC8540 ADS"
 	select DEFAULT_UIMAGE
@@ -117,11 +126,11 @@
 	  This option enables support for the Freescale / iVeia P1022RDK
 	  reference board.
 
-config P1023_RDS
-	bool "Freescale P1023 RDS/RDB"
+config P1023_RDB
+	bool "Freescale P1023 RDB"
 	select DEFAULT_UIMAGE
 	help
-	  This option enables support for the P1023 RDS and RDB boards
+	  This option enables support for the P1023 RDB board.
 
 config TWR_P102x
 	bool "Freescale TWR-P102x"
@@ -263,11 +272,11 @@
 	help
 	  This option enables support for the FSL CoreNet based boards.
 	  For 32bit kernel, the following boards are supported:
-	    P2041 RDB, P3041 DS and P4080 DS
+	    P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
 	  For 64bit kernel, the following boards are supported:
 	    T4240 QDS and B4 QDS
 	  The following boards are supported for both 32bit and 64bit kernel:
-	    P5020 DS and P5040 DS
+	    P5020 DS, P5040 DS and T104xQDS
 
 endif # FSL_SOC_BOOKE
 

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 25cebe7..7303260 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile

@@ -6,6 +6,7 @@
 obj-y += common.o
 
 obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
+obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
 obj-$(CONFIG_C293_PCIE)   += c293pcie.o
 obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o
 obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o
@@ -17,7 +18,7 @@
 obj-$(CONFIG_P1010_RDB)   += p1010rdb.o
 obj-$(CONFIG_P1022_DS)    += p1022_ds.o
 obj-$(CONFIG_P1022_RDK)   += p1022_rdk.o
-obj-$(CONFIG_P1023_RDS)   += p1023_rds.o
+obj-$(CONFIG_P1023_RDB)   += p1023_rdb.o
 obj-$(CONFIG_TWR_P102x)   += twr_p102x.o
 obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o

diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
new file mode 100644
index 0000000..f0927e5
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c

@@ -0,0 +1,74 @@
+/*
+ * BSC913xQDS Board Setup
+ *
+ * Author:
+ *   Harninder Rai <harninder.rai@freescale.com>
+ *   Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+#include "smp.h"
+
+void __init bsc913x_qds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_qds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_qds_setup_arch()", 0);
+
+#if defined(CONFIG_SMP)
+	mpc85xx_smp_init();
+#endif
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+
+static int __init bsc9132_qds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,bsc9132qds");
+}
+
+define_machine(bsc9132_qds) {
+	.name			= "BSC9132 QDS",
+	.probe			= bsc9132_qds_probe,
+	.setup_arch		= bsc913x_qds_setup_arch,
+	.init_IRQ		= bsc913x_qds_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};

diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 8e4b1e1..5db1e11 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c

@@ -67,7 +67,7 @@
 
 	swiotlb_detect_4g();
 
-	pr_info("%s board from Freescale Semiconductor\n", ppc_md.name);
+	pr_info("%s board\n", ppc_md.name);
 
 	mpc85xx_qe_init();
 }
@@ -115,6 +115,7 @@
 static const char * const boards[] __initconst = {
 	"fsl,P2041RDB",
 	"fsl,P3041DS",
+	"fsl,OCA4080",
 	"fsl,P4080DS",
 	"fsl,P5020DS",
 	"fsl,P5040DS",
@@ -122,12 +123,16 @@
 	"fsl,B4860QDS",
 	"fsl,B4420QDS",
 	"fsl,B4220QDS",
+	"fsl,T1040QDS",
+	"fsl,T1042QDS",
+	"keymile,kmcoge4",
 	NULL
 };
 
 static const char * const hv_boards[] __initconst = {
 	"fsl,P2041RDB-hv",
 	"fsl,P3041DS-hv",
+	"fsl,OCA4080-hv",
 	"fsl,P4080DS-hv",
 	"fsl,P5020DS-hv",
 	"fsl,P5040DS-hv",
@@ -135,6 +140,8 @@
 	"fsl,B4860QDS-hv",
 	"fsl,B4420QDS-hv",
 	"fsl,B4220QDS-hv",
+	"fsl,T1040QDS-hv",
+	"fsl,T1042QDS-hv",
 	NULL
 };
 

diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
similarity index 75%
rename from arch/powerpc/platforms/85xx/p1023_rds.c
rename to arch/powerpc/platforms/85xx/p1023_rdb.c
index 0e61400..d5b7509 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c

@@ -4,7 +4,7 @@
  * Author: Roy Zang <tie-fei.zang@freescale.com>
  *
  * Description:
- * P1023 RDS Board Setup
+ * P1023 RDB Board Setup
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -41,12 +41,12 @@
  * Setup the architecture
  *
  */
-static void __init mpc85xx_rds_setup_arch(void)
+static void __init mpc85xx_rdb_setup_arch(void)
 {
 	struct device_node *np;
 
 	if (ppc_md.progress)
-		ppc_md.progress("p1023_rds_setup_arch()", 0);
+		ppc_md.progress("p1023_rdb_setup_arch()", 0);
 
 	/* Map BCSR area */
 	np = of_find_node_by_name(NULL, "bcsr");
@@ -85,10 +85,9 @@
 	fsl_pci_assign_primary();
 }
 
-machine_arch_initcall(p1023_rds, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
 
-static void __init mpc85xx_rds_pic_init(void)
+static void __init mpc85xx_rdb_pic_init(void)
 {
 	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
 		MPIC_SINGLE_DEST_CPU,
@@ -99,14 +98,6 @@
 	mpic_init(mpic);
 }
 
-static int __init p1023_rds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	return of_flat_dt_is_compatible(root, "fsl,P1023RDS");
-
-}
-
 static int __init p1023_rdb_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
@@ -115,26 +106,11 @@
 
 }
 
-define_machine(p1023_rds) {
-	.name			= "P1023 RDS",
-	.probe			= p1023_rds_probe,
-	.setup_arch		= mpc85xx_rds_setup_arch,
-	.init_IRQ		= mpc85xx_rds_pic_init,
-	.get_irq		= mpic_get_irq,
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
-#endif
-};
-
 define_machine(p1023_rdb) {
 	.name			= "P1023 RDB",
 	.probe			= p1023_rdb_probe,
-	.setup_arch		= mpc85xx_rds_setup_arch,
-	.init_IRQ		= mpc85xx_rds_pic_init,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
 	.calibrate_decr		= generic_calibrate_decr,

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6382098..ba093f5 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c

@@ -27,6 +27,7 @@
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
 #include <asm/fsl_guts.h>
+#include <asm/code-patching.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/mpic.h>
@@ -267,7 +268,7 @@
 	flush_spin_table(spin_table);
 	out_be32(&spin_table->pir, hw_cpu);
 	out_be64((u64 *)(&spin_table->addr_h),
-	  __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
+		__pa(ppc_function_entry(generic_secondary_smp_init)));
 	flush_spin_table(spin_table);
 #endif
 

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index bf9c6d4..391b3f6 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig

@@ -19,7 +19,6 @@
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
-source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index d9e2b19..a41bd02 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype

@@ -148,10 +148,6 @@
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
-config PPC_A2
-	bool
-	depends on PPC_BOOK3E_64
-
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -280,7 +276,7 @@
 
 config PPC_ICSWX
 	bool "Support for PowerPC icswx coprocessor instruction"
-	depends on POWER4 || PPC_A2
+	depends on POWER4
 	default n
 	---help---
 
@@ -422,6 +418,7 @@
 
 config CPU_LITTLE_ENDIAN
 	bool "Build little endian kernel"
+	select PPC64_BOOT_WRAPPER
 	help
 	  Build a little endian kernel.
 
@@ -430,3 +427,7 @@
 	  little endian powerpc.
 
 endchoice
+
+config PPC64_BOOT_WRAPPER
+	def_bool n
+	depends on CPU_LITTLE_ENDIAN

diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 879b4a4..469ef17 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile

@@ -22,4 +22,3 @@
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
-obj-$(CONFIG_PPC_WSP)		+= wsp/

diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 90745ea..c8017a7 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c

@@ -40,6 +40,7 @@
 #include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/cputhreads.h>
+#include <asm/code-patching.h>
 
 #include "interrupt.h"
 #include <asm/udbg.h>
@@ -70,8 +71,8 @@
 static inline int smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
-	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 

diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 0ba3c95..bcfd6f0 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h

@@ -35,7 +35,6 @@
 #define SPUFS_PS_MAP_SIZE	0x20000
 #define SPUFS_MFC_MAP_SIZE	0x1000
 #define SPUFS_CNTL_MAP_SIZE	0x1000
-#define SPUFS_CNTL_MAP_SIZE	0x1000
 #define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
 #define SPUFS_MSS_MAP_SIZE	0x1000
 

diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 2a7024d..a25f496 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig

@@ -65,6 +65,7 @@
 	select PPC_INDIRECT_PCI
 	select PPC_I8259
 	select PPC_NATIVE
+	select PPC_UDBG_16550
 	help
 	  This option enables support for the Motorola (now Emerson) MVME5100
 	  board.

diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
index 56f45ad..81ab555 100644
--- a/arch/powerpc/platforms/pasemi/powersave.S
+++ b/arch/powerpc/platforms/pasemi/powersave.S

@@ -66,7 +66,7 @@
 	std	r3, 48(r1)
 
 	/* Only do power savings when in astate 0 */
-	bl	.check_astate
+	bl	check_astate
 	cmpwi	r3,0
 	bne	1f
 

diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index c252ee9..45a8ed0 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig

@@ -17,6 +17,7 @@
 	select CPU_FREQ_GOV_USERSPACE
 	select CPU_FREQ_GOV_ONDEMAND
 	select CPU_FREQ_GOV_CONSERVATIVE
+	select PPC_DOORBELL
 	default y
 
 config PPC_POWERNV_RTAS

diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 63cebb9..d55891f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile

@@ -3,7 +3,7 @@
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o
 
-obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 5b51079..8ad0c5b 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c

@@ -42,11 +42,19 @@
 {
 	uint64_t changed_evts = (uint64_t)change;
 
-	/* We simply send special EEH event */
-	if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
-	    (events & OPAL_EVENT_PCI_ERROR) &&
-	    eeh_enabled())
+	/*
+	 * We simply send special EEH event if EEH has
+	 * been enabled, or clear pending events in
+	 * case that we enable EEH soon
+	 */
+	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+	    !(events & OPAL_EVENT_PCI_ERROR))
+		return 0;
+
+	if (eeh_enabled())
 		eeh_send_failure_event(NULL);
+	else
+		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
 	return 0;
 }
@@ -141,7 +149,9 @@
 	}
 
 #ifdef CONFIG_DEBUG_FS
-	if (phb->dbgfs) {
+	if (!phb->has_dbgfs && phb->dbgfs) {
+		phb->has_dbgfs = 1;
+
 		debugfs_create_file("err_injct_outbound", 0600,
 				    phb->dbgfs, hose,
 				    &ioda_eeh_outb_dbgfs_ops);
@@ -154,7 +164,14 @@
 	}
 #endif
 
-	phb->eeh_state |= PNV_EEH_STATE_ENABLED;
+	/* If EEH is enabled, we're going to rely on that.
+	 * Otherwise, we restore to conventional mechanism
+	 * to clear frozen PE during PCI config access.
+	 */
+	if (eeh_enabled())
+		phb->flags |= PNV_PHB_FLAG_EEH;
+	else
+		phb->flags &= ~PNV_PHB_FLAG_EEH;
 
 	return 0;
 }
@@ -250,7 +267,7 @@
 {
 	s64 ret = 0;
 	u8 fstate;
-	u16 pcierr;
+	__be16 pcierr;
 	u32 pe_no;
 	int result;
 	struct pci_controller *hose = pe->phb;
@@ -268,6 +285,21 @@
 		return EEH_STATE_NOT_SUPPORT;
 	}
 
+	/*
+	 * If we're in middle of PE reset, return normal
+	 * state to keep EEH core going. For PHB reset, we
+	 * still expect to have fenced PHB cleared with
+	 * PHB reset.
+	 */
+	if (!(pe->type & EEH_PE_PHB) &&
+	    (pe->state & EEH_PE_RESET)) {
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
 	/* Retrieve PE status through OPAL */
 	pe_no = pe->addr;
 	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
@@ -284,7 +316,7 @@
 		result = 0;
 		result &= ~EEH_STATE_RESET_ACTIVE;
 
-		if (pcierr != OPAL_EEH_PHB_ERROR) {
+		if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
 			result |= EEH_STATE_MMIO_ACTIVE;
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
@@ -347,52 +379,6 @@
 	return result;
 }
 
-static int ioda_eeh_pe_clear(struct eeh_pe *pe)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	u32 pe_no;
-	u8 fstate;
-	u16 pcierr;
-	s64 ret;
-
-	pe_no = pe->addr;
-	hose = pe->phb;
-	phb = pe->phb->private_data;
-
-	/* Clear the EEH error on the PE */
-	ret = opal_pci_eeh_freeze_clear(phb->opal_id,
-			pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-	if (ret) {
-		pr_err("%s: Failed to clear EEH error for "
-		       "PHB#%x-PE#%x, err=%lld\n",
-		       __func__, hose->global_number, pe_no, ret);
-		return -EIO;
-	}
-
-	/*
-	 * Read the PE state back and verify that the frozen
-	 * state has been removed.
-	 */
-	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
-			&fstate, &pcierr, NULL);
-	if (ret) {
-		pr_err("%s: Failed to get EEH status on "
-		       "PHB#%x-PE#%x\n, err=%lld\n",
-		       __func__, hose->global_number, pe_no, ret);
-		return -EIO;
-	}
-
-	if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
-		pr_err("%s: Frozen state not cleared on "
-		       "PHB#%x-PE#%x, sts=%x\n",
-		       __func__, hose->global_number, pe_no, fstate);
-		return -EIO;
-	}
-
-	return 0;
-}
-
 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
 {
 	s64 rc = OPAL_HARDWARE;
@@ -402,13 +388,16 @@
 		if (rc <= 0)
 			break;
 
-		msleep(rc);
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
 	}
 
 	return rc;
 }
 
-static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
+int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
 {
 	struct pnv_phb *phb = hose->private_data;
 	s64 rc = OPAL_HARDWARE;
@@ -431,9 +420,17 @@
 
 	/*
 	 * Poll state of the PHB until the request is done
-	 * successfully.
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
 	 */
 	rc = ioda_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
 out:
 	if (rc != OPAL_SUCCESS)
 		return -EIO;
@@ -471,6 +468,8 @@
 
 	/* Poll state of the PHB until the request is done */
 	rc = ioda_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
 out:
 	if (rc != OPAL_SUCCESS)
 		return -EIO;
@@ -478,32 +477,71 @@
 	return 0;
 }
 
-static int ioda_eeh_bridge_reset(struct pci_controller *hose,
-		struct pci_dev *dev, int option)
-{
-	u16 ctrl;
+static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
 
-	pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
-		 __func__, hose->global_number, dev->bus->number,
-		 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
+{
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
 
 	switch (option) {
 	case EEH_RESET_FUNDAMENTAL:
 	case EEH_RESET_HOT:
-		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+                }
+
+		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
 		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
-		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		msleep(EEH_PE_RST_HOLD_TIME);
+
 		break;
 	case EEH_RESET_DEACTIVATE:
-		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
 		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
-		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
 		break;
 	}
 
 	return 0;
 }
 
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		ioda_eeh_root_reset(hose, EEH_RESET_HOT);
+		ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
+}
+
 /**
  * ioda_eeh_reset - Reset the indicated PE
  * @pe: EEH PE
@@ -523,27 +561,18 @@
 	int ret;
 
 	/*
-	 * Anyway, we have to clear the problematic state for the
-	 * corresponding PE. However, we needn't do it if the PE
-	 * is PHB associated. That means the PHB is having fatal
-	 * errors and it needs reset. Further more, the AIB interface
-	 * isn't reliable any more.
-	 */
-	if (!(pe->type & EEH_PE_PHB) &&
-	    (option == EEH_RESET_HOT ||
-	    option == EEH_RESET_FUNDAMENTAL)) {
-		ret = ioda_eeh_pe_clear(pe);
-		if (ret)
-			return -EIO;
-	}
-
-	/*
-	 * The rules applied to reset, either fundamental or hot reset:
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
 	 *
-	 * We always reset the direct upstream bridge of the PE. If the
-	 * direct upstream bridge isn't root bridge, we always take hot
-	 * reset no matter what option (fundamental or hot) is. Otherwise,
-	 * we should do the reset according to the required option.
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recrusive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
 	 */
 	if (pe->type & EEH_PE_PHB) {
 		ret = ioda_eeh_phb_reset(hose, option);
@@ -553,7 +582,7 @@
 		    pci_is_root_bus(bus->parent))
 			ret = ioda_eeh_root_reset(hose, option);
 		else
-			ret = ioda_eeh_bridge_reset(hose, bus->self, option);
+			ret = ioda_eeh_bridge_reset(bus->self, option);
 	}
 
 	return ret;
@@ -640,22 +669,6 @@
 	}
 }
 
-static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
-			       struct eeh_pe **pe)
-{
-	struct eeh_pe *phb_pe;
-
-	phb_pe = eeh_phb_pe_get(hose);
-	if (!phb_pe) {
-		pr_warning("%s Can't find PE for PHB#%d\n",
-			   __func__, hose->global_number);
-		return -EEXIST;
-	}
-
-	*pe = phb_pe;
-	return 0;
-}
-
 static int ioda_eeh_get_pe(struct pci_controller *hose,
 			   u16 pe_no, struct eeh_pe **pe)
 {
@@ -663,7 +676,8 @@
 	struct eeh_dev dev;
 
 	/* Find the PHB PE */
-	if (ioda_eeh_get_phb_pe(hose, &phb_pe))
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe)
 		return -EEXIST;
 
 	/* Find the PE according to PE# */
@@ -691,26 +705,30 @@
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
-	u64 frozen_pe_no;
-	u16 err_type, severity;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	long rc;
-	int ret = EEH_NEXT_ERR_NONE;
+	int state, ret = EEH_NEXT_ERR_NONE;
 
 	/*
 	 * While running here, it's safe to purge the event queue.
 	 * And we should keep the cached OPAL notifier event sychronized
 	 * between the kernel and firmware.
 	 */
-	eeh_remove_event(NULL);
+	eeh_remove_event(NULL, false);
 	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
 	list_for_each_entry(hose, &hose_list, list_node) {
 		/*
 		 * If the subordinate PCI buses of the PHB has been
-		 * removed, we needn't take care of it any more.
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
 		 */
 		phb = hose->private_data;
-		if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
 			continue;
 
 		rc = opal_pci_next_error(phb->opal_id,
@@ -725,8 +743,8 @@
 		}
 
 		/* If the PHB doesn't have error, stop processing */
-		if (err_type == OPAL_EEH_NO_ERROR ||
-		    severity == OPAL_EEH_SEV_NO_ERROR) {
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
 			pr_devel("%s: No error found on PHB#%x\n",
 				 __func__, hose->global_number);
 			continue;
@@ -738,20 +756,14 @@
 		 * specific PHB.
 		 */
 		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
-			 __func__, err_type, severity,
-			 frozen_pe_no, hose->global_number);
-		switch (err_type) {
+			 __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
+			 be64_to_cpu(frozen_pe_no), hose->global_number);
+		switch (be16_to_cpu(err_type)) {
 		case OPAL_EEH_IOC_ERROR:
-			if (severity == OPAL_EEH_SEV_IOC_DEAD) {
-				list_for_each_entry(hose, &hose_list,
-						    list_node) {
-					phb = hose->private_data;
-					phb->eeh_state |= PNV_EEH_STATE_REMOVED;
-				}
-
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
 				pr_err("EEH: dead IOC detected\n");
 				ret = EEH_NEXT_ERR_DEAD_IOC;
-			} else if (severity == OPAL_EEH_SEV_INF) {
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: IOC informative error "
 					"detected\n");
 				ioda_eeh_hub_diag(hose);
@@ -760,25 +772,26 @@
 
 			break;
 		case OPAL_EEH_PHB_ERROR:
-			if (severity == OPAL_EEH_SEV_PHB_DEAD) {
-				if (ioda_eeh_get_phb_pe(hose, pe))
-					break;
-
-				pr_err("EEH: dead PHB#%x detected\n",
-					hose->global_number);
-				phb->eeh_state |= PNV_EEH_STATE_REMOVED;
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_DEAD_PHB;
-			} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
-				if (ioda_eeh_get_phb_pe(hose, pe))
-					break;
-
-				pr_err("EEH: fenced PHB#%x detected\n",
-					hose->global_number);
+			} else if (be16_to_cpu(severity) ==
+						OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_FENCED_PHB;
-			} else if (severity == OPAL_EEH_SEV_INF) {
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: PHB#%x informative error "
-					"detected\n",
-					hose->global_number);
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
 				ioda_eeh_phb_diag(hose);
 				ret = EEH_NEXT_ERR_NONE;
 			}
@@ -786,30 +799,33 @@
 			break;
 		case OPAL_EEH_PE_ERROR:
 			/*
-			 * If we can't find the corresponding PE, the
-			 * PEEV / PEST would be messy. So we force an
-			 * fenced PHB so that it can be recovered.
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
 			 */
-			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
-				if (!ioda_eeh_get_phb_pe(hose, pe)) {
-					pr_err("EEH: Escalated fenced PHB#%x "
-					       "detected for PE#%llx\n",
-						hose->global_number,
-						frozen_pe_no);
-					ret = EEH_NEXT_ERR_FENCED_PHB;
-				} else {
-					ret = EEH_NEXT_ERR_NONE;
-				}
+			if (ioda_eeh_get_pe(hose,
+					    be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED) {
+				ret = EEH_NEXT_ERR_NONE;
 			} else {
 				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
 					(*pe)->addr, (*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, PHB location: %s\n",
+					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_FROZEN_PE;
 			}
 
 			break;
 		default:
 			pr_warn("%s: Unexpected error type %d\n",
-				__func__, err_type);
+				__func__, be16_to_cpu(err_type));
 		}
 
 		/*
@@ -827,6 +843,31 @@
 		}
 
 		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = ioda_eeh_get_state(parent_pe);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
 		 * Otherwise, we need actions to be taken by upper

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a59788e..56a206f 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c

@@ -126,6 +126,7 @@
 	edev->mode	&= 0xFFFFFF00;
 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
 		edev->mode |= EEH_DEV_BRIDGE;
+	edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
 	if (pci_is_pcie(dev)) {
 		edev->pcie_cap = pci_pcie_cap(dev);
 
@@ -133,6 +134,9 @@
 			edev->mode |= EEH_DEV_ROOT_PORT;
 		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
 			edev->mode |= EEH_DEV_DS_PORT;
+
+		edev->aer_cap = pci_find_ext_capability(dev,
+							PCI_EXT_CAP_ID_ERR);
 	}
 
 	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);

diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index dc487ff..5c21d9c 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c

@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
+#include <linux/delay.h>
 
 #include <asm/opal.h>
 
@@ -130,7 +131,8 @@
 {
 	long ret;
 	void *buf = validate_flash_data.buf;
-	__be32 size, result;
+	__be32 size = cpu_to_be32(validate_flash_data.buf_size);
+	__be32 result;
 
 	ret = opal_validate_flash(__pa(buf), &size, &result);
 
@@ -290,11 +292,6 @@
 	/* First entry address */
 	addr = __pa(list);
 
-	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
-	pr_alert("FLASH: Image update requested\n");
-	pr_alert("FLASH: Image will be updated during system reboot\n");
-	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
-
 flash:
 	rc = opal_update_flash(addr);
 
@@ -302,6 +299,47 @@
 	return rc;
 }
 
+/* Return CPUs to OPAL before starting FW update */
+static void flash_return_cpu(void *info)
+{
+	int cpu = smp_processor_id();
+
+	if (!cpu_online(cpu))
+		return;
+
+	/* Disable IRQ */
+	hard_irq_disable();
+
+	/* Return the CPU to OPAL */
+	opal_return_cpu();
+}
+
+/* This gets called just before system reboots */
+void opal_flash_term_callback(void)
+{
+	struct cpumask mask;
+
+	if (update_flash_data.status != FLASH_IMG_READY)
+		return;
+
+	pr_alert("FLASH: Flashing new firmware\n");
+	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+	pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+	/* Small delay to help getting the above message out */
+	msleep(500);
+
+	/* Return secondary CPUs to firmware */
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	if (!cpumask_empty(&mask))
+		smp_call_function_many(&mask,
+				       flash_return_cpu, NULL, false);
+	/* Hard disable interrupts */
+	hard_irq_disable();
+}
+
 /*
  * Show candidate image status
  */

diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 79d83ca..f04b4d8 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c

@@ -12,12 +12,17 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/slab.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/xics.h>
 #include <asm/opal.h>
 #include <asm/prom.h>
+#include <asm/uaccess.h>
+#include <asm/debug.h>
 
 static int opal_lpc_chip_id = -1;
 
@@ -176,6 +181,152 @@
 	.outsl	= opal_lpc_outsl,
 };
 
+#ifdef CONFIG_DEBUG_FS
+struct lpc_debugfs_entry {
+	enum OpalLPCAddressType lpc_type;
+};
+
+static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
+			      size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_WRITE, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
+				   &data, len);
+		if (rc)
+			return -ENXIO;
+		switch(len) {
+		case 4:
+			rc = __put_user((u32)data, (u32 __user *)ubuf);
+			break;
+		case 2:
+			rc = __put_user((u16)data, (u16 __user *)ubuf);
+			break;
+		default:
+			rc = __put_user((u8)data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_READ, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		switch(len) {
+		case 4:
+			rc = __get_user(data, (u32 __user *)ubuf);
+			break;
+		case 2:
+			rc = __get_user(data, (u16 __user *)ubuf);
+			break;
+		default:
+			rc = __get_user(data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+
+		rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
+				    data, len);
+		if (rc)
+			return -ENXIO;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static const struct file_operations lpc_fops = {
+	.read =		lpc_debug_read,
+	.write =	lpc_debug_write,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int opal_lpc_debugfs_create_type(struct dentry *folder,
+					const char *fname,
+					enum OpalLPCAddressType type)
+{
+	struct lpc_debugfs_entry *entry;
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->lpc_type = type;
+	debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
+	return 0;
+}
+
+static int opal_lpc_init_debugfs(void)
+{
+	struct dentry *root;
+	int rc = 0;
+
+	if (opal_lpc_chip_id < 0)
+		return -ENODEV;
+
+	root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+
+	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
+	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
+	rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
+	return rc;
+}
+device_initcall(opal_lpc_init_debugfs);
+#endif  /* CONFIG_DEBUG_FS */
+
 void opal_lpc_init(void)
 {
 	struct device_node *np;

diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index ec41322..b17a34b 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c

@@ -47,12 +47,12 @@
 		  __func__, merr_evt->type);
 	switch (merr_evt->type) {
 	case OPAL_MEM_ERR_TYPE_RESILIENCE:
-		paddr_start = merr_evt->u.resilience.physical_address_start;
-		paddr_end = merr_evt->u.resilience.physical_address_end;
+		paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
 		break;
 	case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
-		paddr_start = merr_evt->u.dyn_dealloc.physical_address_start;
-		paddr_end = merr_evt->u.dyn_dealloc.physical_address_end;
+		paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
 		break;
 	default:
 		return;

diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 1bb25b9..44ed78a 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c

@@ -37,7 +37,8 @@
 {
 	struct memcons *mc = bin_attr->private;
 	const char *conbuf;
-	size_t ret, first_read = 0;
+	ssize_t ret;
+	size_t first_read = 0;
 	uint32_t out_pos, avail;
 
 	if (!mc)
@@ -69,6 +70,9 @@
 		to += first_read;
 		count -= first_read;
 		pos -= avail;
+
+		if (count <= 0)
+			goto out;
 	}
 
 	/* Sanity check. The firmware should not do this to us. */

diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index d202f9b..9d1acf2 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c

@@ -260,10 +260,10 @@
 			attr[i].kobj_attr.attr.mode = S_IRUGO;
 			break;
 		case OPAL_SYSPARAM_WRITE:
-			attr[i].kobj_attr.attr.mode = S_IWUGO;
+			attr[i].kobj_attr.attr.mode = S_IWUSR;
 			break;
 		case OPAL_SYSPARAM_RW:
-			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO;
+			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
 			break;
 		default:
 			break;

diff --git a/arch/powerpc/platforms/powernv/opal-takeover.S b/arch/powerpc/platforms/powernv/opal-takeover.S
index 3cd2628..11a3169 100644
--- a/arch/powerpc/platforms/powernv/opal-takeover.S
+++ b/arch/powerpc/platforms/powernv/opal-takeover.S

@@ -21,11 +21,13 @@
 _GLOBAL(opal_query_takeover)
 	mfcr	r0
 	stw	r0,8(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
 	std	r3,STK_PARAM(R3)(r1)
 	std	r4,STK_PARAM(R4)(r1)
 	li	r3,H_HAL_TAKEOVER
 	li	r4,H_HAL_TAKEOVER_QUERY_MAGIC
 	HVSC
+	addi	r1,r1,STACKFRAMESIZE
 	ld	r10,STK_PARAM(R3)(r1)
 	std	r4,0(r10)
 	ld	r10,STK_PARAM(R4)(r1)

diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f531ffe..4abbff2 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S

@@ -32,7 +32,7 @@
 	std	r12,PACASAVEDMSR(r13);	\
 	andc	r12,r12,r0;		\
 	mtmsrd	r12,1;			\
-	LOAD_REG_ADDR(r0,.opal_return);	\
+	LOAD_REG_ADDR(r0,opal_return);	\
 	mtlr	r0;			\
 	li	r0,MSR_DR|MSR_IR|MSR_LE;\
 	andc	r12,r12,r0;		\
@@ -44,7 +44,7 @@
 	mtspr	SPRN_HSRR0,r12;		\
 	hrfid
 
-_STATIC(opal_return)
+opal_return:
 	/*
 	 * Fixup endian on OPAL return... we should be able to simplify
 	 * this by instead converting the below trampoline to a set of
@@ -124,6 +124,7 @@
 OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
 OPAL_CALL(opal_lpc_write,			OPAL_LPC_WRITE);
 OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus,			OPAL_REINIT_CPUS);
 OPAL_CALL(opal_read_elog,			OPAL_ELOG_READ);
 OPAL_CALL(opal_send_ack_elog,			OPAL_ELOG_ACK);
 OPAL_CALL(opal_get_elog_size,			OPAL_ELOG_SIZE);

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f343183..1999756 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c

@@ -57,6 +57,21 @@
 static uint64_t last_notified_mask = 0x0ul;
 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
 
+static void opal_reinit_cores(void)
+{
+	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+	 *
+	 * It will preserve non volatile GPRs and HSPRG0/1. It will
+	 * also restore HIDs and other SPRs to their original value
+	 * but it might clobber a bunch.
+	 */
+#ifdef __BIG_ENDIAN__
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+#else
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+#endif
+}
+
 int __init early_init_dt_scan_opal(unsigned long node,
 				   const char *uname, int depth, void *data)
 {
@@ -96,6 +111,13 @@
 		printk("OPAL V1 detected !\n");
 	}
 
+	/* Reinit all cores with the right endian */
+	opal_reinit_cores();
+
+	/* Restore some bits */
+	if (cur_cpu_spec->cpu_restore)
+		cur_cpu_spec->cpu_restore();
+
 	return 1;
 }
 

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 98824aa..de19ede 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c

@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/crash_dump.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/string.h>
@@ -663,15 +664,15 @@
 		 * errors, and on the first pass the data will be a relative
 		 * bus number, print that out instead.
 		 */
-		tbl->it_busno = 0;
 		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
 		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
 				8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
-			       TCE_PCI_SWINV_PAIR;
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE |
+				 TCE_PCI_SWINV_FREE   |
+				 TCE_PCI_SWINV_PAIR);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
 	if (pe->pdev)
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
@@ -793,14 +794,13 @@
 		 * errors, and on the first pass the data will be a relative
 		 * bus number, print that out instead.
 		 */
-		tbl->it_busno = 0;
 		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
 		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
 				8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
 	if (pe->pdev)
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
@@ -1386,12 +1386,24 @@
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
+	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
 	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
 	if (rc)
 		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
+
+	/* If we're running in kdump kerenl, the previous kerenl never
+	 * shutdown PCI devices correctly. We already got IODA table
+	 * cleaned out. So we have to issue PHB reset to stop all PCI
+	 * transactions from previous kerenl.
+	 */
+	if (is_kdump_kernel()) {
+		pr_info("  Issue PHB reset ...\n");
+		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
+	}
 }
 
 void __init pnv_pci_init_ioda2_phb(struct device_node *np)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 8518817..f91a4e5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c

@@ -131,65 +131,60 @@
 	int i;
 
 	data = (struct OpalIoP7IOCPhbErrorData *)common;
-	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
+	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
 		hose->global_number, common->version);
 
 	if (data->brdgCtl)
-		pr_info("  brdgCtl:     %08x\n",
+		pr_info("brdgCtl:     %08x\n",
 			data->brdgCtl);
 	if (data->portStatusReg || data->rootCmplxStatus ||
 	    data->busAgentStatus)
-		pr_info("  UtlSts:      %08x %08x %08x\n",
+		pr_info("UtlSts:      %08x %08x %08x\n",
 			data->portStatusReg, data->rootCmplxStatus,
 			data->busAgentStatus);
 	if (data->deviceStatus || data->slotStatus   ||
 	    data->linkStatus   || data->devCmdStatus ||
 	    data->devSecStatus)
-		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
 			data->deviceStatus, data->slotStatus,
 			data->linkStatus, data->devCmdStatus,
 			data->devSecStatus);
 	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
 	    data->corrErrorStatus)
-		pr_info("  RootErrSts:  %08x %08x %08x\n",
+		pr_info("RootErrSts:  %08x %08x %08x\n",
 			data->rootErrorStatus, data->uncorrErrorStatus,
 			data->corrErrorStatus);
 	if (data->tlpHdr1 || data->tlpHdr2 ||
 	    data->tlpHdr3 || data->tlpHdr4)
-		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
 			data->tlpHdr1, data->tlpHdr2,
 			data->tlpHdr3, data->tlpHdr4);
 	if (data->sourceId || data->errorClass ||
 	    data->correlator)
-		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
 			data->sourceId, data->errorClass,
 			data->correlator);
 	if (data->p7iocPlssr || data->p7iocCsr)
-		pr_info("  PhbSts:      %016llx %016llx\n",
+		pr_info("PhbSts:      %016llx %016llx\n",
 			data->p7iocPlssr, data->p7iocCsr);
-	if (data->lemFir || data->lemErrorMask ||
-	    data->lemWOF)
-		pr_info("  Lem:         %016llx %016llx %016llx\n",
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
 			data->lemFir, data->lemErrorMask,
 			data->lemWOF);
-	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
-	    data->phbErrorLog0   || data->phbErrorLog1)
-		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
 			data->phbErrorStatus, data->phbFirstErrorStatus,
 			data->phbErrorLog0, data->phbErrorLog1);
-	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
-	    data->mmioErrorLog0   || data->mmioErrorLog1)
-		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
 			data->mmioErrorStatus, data->mmioFirstErrorStatus,
 			data->mmioErrorLog0, data->mmioErrorLog1);
-	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
-	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
-		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
 			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
 			data->dma0ErrorLog0, data->dma0ErrorLog1);
-	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
-	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
-		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
 			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
 			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
@@ -198,7 +193,7 @@
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
 			i, data->pestA[i], data->pestB[i]);
 	}
 }
@@ -210,79 +205,92 @@
 	int i;
 
 	data = (struct OpalIoPhb3ErrorData*)common;
-	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
-		hose->global_number, common->version);
+	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
 	if (data->brdgCtl)
-		pr_info("  brdgCtl:     %08x\n",
-			data->brdgCtl);
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
 	if (data->portStatusReg || data->rootCmplxStatus ||
 	    data->busAgentStatus)
-		pr_info("  UtlSts:      %08x %08x %08x\n",
-			data->portStatusReg, data->rootCmplxStatus,
-			data->busAgentStatus);
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
 	if (data->deviceStatus || data->slotStatus   ||
 	    data->linkStatus   || data->devCmdStatus ||
 	    data->devSecStatus)
-		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
-			data->deviceStatus, data->slotStatus,
-			data->linkStatus, data->devCmdStatus,
-			data->devSecStatus);
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
 	if (data->rootErrorStatus || data->uncorrErrorStatus ||
 	    data->corrErrorStatus)
-		pr_info("  RootErrSts:  %08x %08x %08x\n",
-			data->rootErrorStatus, data->uncorrErrorStatus,
-			data->corrErrorStatus);
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
 	if (data->tlpHdr1 || data->tlpHdr2 ||
 	    data->tlpHdr3 || data->tlpHdr4)
-		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
-			data->tlpHdr1, data->tlpHdr2,
-			data->tlpHdr3, data->tlpHdr4);
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
 	if (data->sourceId || data->errorClass ||
 	    data->correlator)
-		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
-			data->sourceId, data->errorClass,
-			data->correlator);
-	if (data->nFir || data->nFirMask ||
-	    data->nFirWOF)
-		pr_info("  nFir:        %016llx %016llx %016llx\n",
-			data->nFir, data->nFirMask,
-			data->nFirWOF);
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->nFir)
+		pr_info("nFir:        %016llx %016llx %016llx\n",
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
 	if (data->phbPlssr || data->phbCsr)
-		pr_info("  PhbSts:      %016llx %016llx\n",
-			data->phbPlssr, data->phbCsr);
-	if (data->lemFir || data->lemErrorMask ||
-	    data->lemWOF)
-		pr_info("  Lem:         %016llx %016llx %016llx\n",
-			data->lemFir, data->lemErrorMask,
-			data->lemWOF);
-	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
-	    data->phbErrorLog0   || data->phbErrorLog1)
-		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
-			data->phbErrorStatus, data->phbFirstErrorStatus,
-			data->phbErrorLog0, data->phbErrorLog1);
-	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
-	    data->mmioErrorLog0   || data->mmioErrorLog1)
-		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
-			data->mmioErrorStatus, data->mmioFirstErrorStatus,
-			data->mmioErrorLog0, data->mmioErrorLog1);
-	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
-	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
-		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
-			data->dma0ErrorLog0, data->dma0ErrorLog1);
-	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
-	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
-		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
-			data->dma1ErrorLog0, data->dma1ErrorLog1);
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
-		if ((data->pestA[i] >> 63) == 0 &&
-		    (data->pestB[i] >> 63) == 0)
+		if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
+		    (be64_to_cpu(data->pestB[i]) >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
-			i, data->pestA[i], data->pestB[i]);
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
+				i, be64_to_cpu(data->pestA[i]),
+				be64_to_cpu(data->pestB[i]));
 	}
 }
 
@@ -295,7 +303,7 @@
 		return;
 
 	common = (struct OpalIoPhbErrorCommon *)log_buff;
-	switch (common->ioType) {
+	switch (be32_to_cpu(common->ioType)) {
 	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
 		pnv_pci_dump_p7ioc_diag_data(hose, common);
 		break;
@@ -304,7 +312,7 @@
 		break;
 	default:
 		pr_warn("%s: Unrecognized ioType %d\n",
-			__func__, common->ioType);
+			__func__, be32_to_cpu(common->ioType));
 	}
 }
 
@@ -384,9 +392,6 @@
 	struct pci_dn *pdn = PCI_DN(dn);
 	struct pnv_phb *phb = pdn->phb->private_data;
 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
-#ifdef CONFIG_EEH
-	struct eeh_pe *phb_pe = NULL;
-#endif
 	s64 rc;
 
 	switch (size) {
@@ -412,31 +417,9 @@
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
+
 	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
 		__func__, pdn->busno, pdn->devfn, where, size, *val);
-
-	/*
-	 * Check if the specified PE has been put into frozen
-	 * state. On the other hand, we needn't do that while
-	 * the PHB has been put into frozen state because of
-	 * PHB-fatal errors.
-	 */
-#ifdef CONFIG_EEH
-	phb_pe = eeh_phb_pe_get(pdn->phb);
-	if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
-		return PCIBIOS_SUCCESSFUL;
-
-	if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
-		if (*val == EEH_IO_ERROR_VALUE(size) &&
-		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
-			return PCIBIOS_DEVICE_NOT_FOUND;
-	} else {
-		pnv_pci_config_check_eeh(phb, dn);
-	}
-#else
-	pnv_pci_config_check_eeh(phb, dn);
-#endif
-
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -463,33 +446,74 @@
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
 
-	/* Check if the PHB got frozen due to an error (no response) */
-#ifdef CONFIG_EEH
-	if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
-		pnv_pci_config_check_eeh(phb, dn);
-#else
-	pnv_pci_config_check_eeh(phb, dn);
-#endif
-
 	return PCIBIOS_SUCCESSFUL;
 }
 
+#if CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_controller *hose,
+			      struct device_node *dn)
+{
+	struct eeh_dev *edev = NULL;
+	struct pnv_phb *phb = hose->private_data;
+
+	/* EEH not enabled ? */
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		return true;
+
+	/* PE reset or device removed ? */
+	edev = of_node_to_eeh_dev(dn);
+	if (edev) {
+		if (edev->pe &&
+		    (edev->pe->state & EEH_PE_RESET))
+			return false;
+
+		if (edev->mode & EEH_DEV_REMOVED)
+			return false;
+	}
+
+	return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_controller *hose,
+				struct device_node *dn)
+{
+	return true;
+}
+#endif /* CONFIG_EEH */
+
 static int pnv_pci_read_config(struct pci_bus *bus,
 			       unsigned int devfn,
 			       int where, int size, u32 *val)
 {
 	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
-
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn)
-			return pnv_pci_cfg_read(dn, where, size, val);
-	}
+	struct pnv_phb *phb;
+	bool found = false;
+	int ret;
 
 	*val = 0xFFFFFFFF;
-	return PCIBIOS_DEVICE_NOT_FOUND;
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn) {
+			phb = pdn->phb->private_data;
+			found = true;
+			break;
+		}
+	}
 
+	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_read(dn, where, size, val);
+	if (phb->flags & PNV_PHB_FLAG_EEH) {
+		if (*val == EEH_IO_ERROR_VALUE(size) &&
+		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+                        return PCIBIOS_DEVICE_NOT_FOUND;
+	} else {
+		pnv_pci_config_check_eeh(phb, dn);
+	}
+
+	return ret;
 }
 
 static int pnv_pci_write_config(struct pci_bus *bus,
@@ -498,14 +522,27 @@
 {
 	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	bool found = false;
+	int ret;
 
 	for (dn = busdn->child; dn; dn = dn->sibling) {
 		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn)
-			return pnv_pci_cfg_write(dn, where, size, val);
+		if (pdn && pdn->devfn == devfn) {
+			phb = pdn->phb->private_data;
+			found = true;
+			break;
+		}
 	}
 
-	return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_write(dn, where, size, val);
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		pnv_pci_config_check_eeh(phb, dn);
+
+	return ret;
 }
 
 struct pci_ops pnv_pci_ops = {

diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index cde1694..676232c 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h

@@ -81,28 +81,27 @@
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*next_error)(struct eeh_pe **pe);
 };
-
-#define PNV_EEH_STATE_ENABLED	(1 << 0)	/* EEH enabled	*/
-#define PNV_EEH_STATE_REMOVED	(1 << 1)	/* PHB removed	*/
-
 #endif /* CONFIG_EEH */
 
+#define PNV_PHB_FLAG_EEH	(1 << 0)
+
 struct pnv_phb {
 	struct pci_controller	*hose;
 	enum pnv_phb_type	type;
 	enum pnv_phb_model	model;
 	u64			hub_id;
 	u64			opal_id;
+	int			flags;
 	void __iomem		*regs;
 	int			initialized;
 	spinlock_t		lock;
 
 #ifdef CONFIG_EEH
 	struct pnv_eeh_ops	*eeh_ops;
-	int			eeh_state;
 #endif
 
 #ifdef CONFIG_DEBUG_FS
+	int			has_dbgfs;
 	struct dentry		*dbgfs;
 #endif
 
@@ -205,5 +204,7 @@
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 					__be64 *startp, __be64 *endp, bool rm);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
 
 #endif /* __POWERNV_PCI_H */

diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 0051e10..75501bf 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h

@@ -25,4 +25,6 @@
 
 extern void pnv_lpc_init(void);
 
+bool cpu_core_split_required(void);
+
 #endif /* _POWERNV_H */

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 8723d32..d9b88fa 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c

@@ -27,6 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/bug.h>
 #include <linux/pci.h>
+#include <linux/cpufreq.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -34,11 +35,14 @@
 #include <asm/rtas.h>
 #include <asm/opal.h>
 #include <asm/kexec.h>
+#include <asm/smp.h>
 
 #include "powernv.h"
 
 static void __init pnv_setup_arch(void)
 {
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
 	/* Initialize SMP */
 	pnv_smp_init();
 
@@ -98,11 +102,32 @@
 	of_node_put(root);
 }
 
+static void pnv_prepare_going_down(void)
+{
+	/*
+	 * Disable all notifiers from OPAL, we can't
+	 * service interrupts anymore anyway
+	 */
+	opal_notifier_disable();
+
+	/* Soft disable interrupts */
+	local_irq_disable();
+
+	/*
+	 * Return secondary CPUs to firwmare if a flash update
+	 * is pending otherwise we will get all sort of error
+	 * messages about CPU being stuck etc.. This will also
+	 * have the side effect of hard disabling interrupts so
+	 * past this point, the kernel is effectively dead.
+	 */
+	opal_flash_term_callback();
+}
+
 static void  __noreturn pnv_restart(char *cmd)
 {
 	long rc = OPAL_BUSY;
 
-	opal_notifier_disable();
+	pnv_prepare_going_down();
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_reboot();
@@ -119,7 +144,7 @@
 {
 	long rc = OPAL_BUSY;
 
-	opal_notifier_disable();
+	pnv_prepare_going_down();
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_power_down(0);
@@ -222,6 +247,13 @@
 }
 #endif /* CONFIG_KEXEC */
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static unsigned long pnv_memory_block_size(void)
+{
+	return 256UL * 1024 * 1024;
+}
+#endif
+
 static void __init pnv_setup_machdep_opal(void)
 {
 	ppc_md.get_boot_time = opal_get_boot_time;
@@ -269,6 +301,25 @@
 	return 1;
 }
 
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+	unsigned long ret_freq;
+
+	ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+
+	/*
+	 * If the backend cpufreq driver does not exist,
+         * then fallback to old way of reporting the clockrate.
+	 */
+	if (!ret_freq)
+		ret_freq = ppc_proc_freq;
+	return ret_freq;
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -276,6 +327,7 @@
 	.setup_arch		= pnv_setup_arch,
 	.init_IRQ		= pnv_init_IRQ,
 	.show_cpuinfo		= pnv_show_cpuinfo,
+	.get_proc_freq          = pnv_get_proc_freq,
 	.progress		= pnv_progress,
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = power7_idle,
@@ -284,4 +336,7 @@
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pnv_memory_block_size,
+#endif
 };

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index bf5fcd4..5fcfcf4 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c

@@ -31,6 +31,8 @@
 #include <asm/xics.h>
 #include <asm/opal.h>
 #include <asm/runlatch.h>
+#include <asm/code-patching.h>
+#include <asm/dbell.h>
 
 #include "powernv.h"
 
@@ -45,13 +47,18 @@
 {
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
+
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+#endif
 }
 
 int pnv_smp_kick_cpu(int nr)
 {
 	unsigned int pcpu = get_hard_smp_processor_id(nr);
-	unsigned long start_here = __pa(*((unsigned long *)
-					  generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	long rc;
 
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
@@ -158,17 +165,19 @@
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
 		ppc64_runlatch_off();
-		power7_nap();
+		power7_nap(1);
 		ppc64_runlatch_on();
-		if (!generic_check_cpu_restart(cpu)) {
+
+		/* Reenable IRQs briefly to clear the IPI that woke us */
+		local_irq_enable();
+		local_irq_disable();
+		mb();
+
+		if (cpu_core_split_required())
+			continue;
+
+		if (!generic_check_cpu_restart(cpu))
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
-			/* We may be getting an IPI, so we re-enable
-			 * interrupts to process it, it will be ignored
-			 * since we aren't online (hopefully)
-			 */
-			local_irq_enable();
-			local_irq_disable();
-		}
 	}
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
 	DBG("CPU%d coming online...\n", cpu);

diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
new file mode 100644
index 0000000..39bb24a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore-asm.S

@@ -0,0 +1,95 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+#include "subcore.h"
+
+
+_GLOBAL(split_core_secondary_loop)
+	/*
+	 * r3 = u8 *state, used throughout the routine
+	 * r4 = temp
+	 * r5 = temp
+	 * ..
+	 * r12 = MSR
+	 */
+	mfmsr	r12
+
+	/* Disable interrupts so SRR0/1 don't get trashed */
+	li	r4,0
+	ori	r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+	andc	r4,r12,r4
+	sync
+	mtmsrd	r4
+
+	/* Switch to real mode and leave interrupts off */
+	li	r5, MSR_IR|MSR_DR
+	andc	r5, r4, r5
+
+	LOAD_REG_ADDR(r4, real_mode)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+	rfid
+	b	.	/* prevent speculative execution */
+
+real_mode:
+	/* Grab values from unsplit SPRs */
+	mfspr	r6,  SPRN_LDBAR
+	mfspr	r7,  SPRN_PMMAR
+	mfspr	r8,  SPRN_PMCR
+	mfspr	r9,  SPRN_RPR
+	mfspr	r10, SPRN_SDR1
+
+	/* Order reading the SPRs vs telling the primary we are ready to split */
+	sync
+
+	/* Tell thread 0 we are in real mode */
+	li	r4, SYNC_STEP_REAL_MODE
+	stb	r4, 0(r3)
+
+	li	r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
+	sldi	r5, r5, 48
+
+	/* Loop until we see the split happen in HID0 */
+1:	mfspr	r4, SPRN_HID0
+	and.	r4, r4, r5
+	beq	1b
+
+	/*
+	 * We only need to initialise the below regs once for each subcore,
+	 * but it's simpler and harmless to do it on each thread.
+	 */
+
+	/* Make sure various SPRS have sane values */
+	li	r4, 0
+	mtspr	SPRN_LPID, r4
+	mtspr	SPRN_PCR, r4
+	mtspr	SPRN_HDEC, r4
+
+	/* Restore SPR values now we are split */
+	mtspr	SPRN_LDBAR, r6
+	mtspr	SPRN_PMMAR, r7
+	mtspr	SPRN_PMCR, r8
+	mtspr	SPRN_RPR, r9
+	mtspr	SPRN_SDR1, r10
+
+	LOAD_REG_ADDR(r5, virtual_mode)
+
+	/* Get out of real mode */
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r12
+	rfid
+	b	.	/* prevent speculative execution */
+
+virtual_mode:
+	blr

diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
new file mode 100644
index 0000000..894ecb3
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.c

@@ -0,0 +1,392 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv: " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputhreads.h>
+#include <asm/kvm_ppc.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include "subcore.h"
+
+
+/*
+ * Split/unsplit procedure:
+ *
+ * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
+ *
+ * The mapping to subcores_per_core is simple:
+ *
+ *  State       | subcores_per_core
+ *  ------------|------------------
+ *  Unsplit     |        1
+ *  2-way split |        2
+ *  4-way split |        4
+ *
+ * The core is split along thread boundaries, the mapping between subcores and
+ * threads is as follows:
+ *
+ *  Unsplit:
+ *          ----------------------------
+ *  Subcore |            0             |
+ *          ----------------------------
+ *  Thread  |  0  1  2  3  4  5  6  7  |
+ *          ----------------------------
+ *
+ *  2-way split:
+ *          -------------------------------------
+ *  Subcore |        0        |        1        |
+ *          -------------------------------------
+ *  Thread  |  0   1   2   3  |  4   5   6   7  |
+ *          -------------------------------------
+ *
+ *  4-way split:
+ *          -----------------------------------------
+ *  Subcore |    0    |    1    |    2    |    3    |
+ *          -----------------------------------------
+ *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
+ *          -----------------------------------------
+ *
+ *
+ * Transitions
+ * -----------
+ *
+ * It is not possible to transition between either of the split states, the
+ * core must first be unsplit. The legal transitions are:
+ *
+ *  -----------          ---------------
+ *  |         |  <---->  | 2-way split |
+ *  |         |          ---------------
+ *  | Unsplit |
+ *  |         |          ---------------
+ *  |         |  <---->  | 4-way split |
+ *  -----------          ---------------
+ *
+ * Unsplitting
+ * -----------
+ *
+ * Unsplitting is the simpler procedure. It requires thread 0 to request the
+ * unsplit while all other threads NAP.
+ *
+ * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
+ * the hardware that if all threads except 0 are napping, the hardware should
+ * unsplit the core.
+ *
+ * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
+ * see the core unsplit.
+ *
+ * Core 0 spins waiting for the hardware to see all the other threads napping
+ * and perform the unsplit.
+ *
+ * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
+ * out of NAP. They will then see the core unsplit and exit the NAP loop.
+ *
+ * Splitting
+ * ---------
+ *
+ * The basic splitting procedure is fairly straight forward. However it is
+ * complicated by the fact that after the split occurs, the newly created
+ * subcores are not in a fully initialised state.
+ *
+ * Most notably the subcores do not have the correct value for SDR1, which
+ * means they must not be running in virtual mode when the split occurs. The
+ * subcores have separate timebases SPRs but these are pre-synchronised by
+ * opal.
+ *
+ * To begin with secondary threads are sent to an assembly routine. There they
+ * switch to real mode, so they are immune to the uninitialised SDR1 value.
+ * Once in real mode they indicate that they are in real mode, and spin waiting
+ * to see the core split.
+ *
+ * Thread 0 waits to see that all secondaries are in real mode, and then begins
+ * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
+ * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
+ * to request the split, and spins waiting to see that the split has happened.
+ *
+ * Concurrently the secondaries will notice the split. When they do they set up
+ * their SPRs, notably SDR1, and then they can return to virtual mode and exit
+ * the procedure.
+ */
+
+/* Initialised at boot by subcore_init() */
+static int subcores_per_core;
+
+/*
+ * Used to communicate to offline cpus that we want them to pop out of the
+ * offline loop and do a split or unsplit.
+ *
+ * 0 - no split happening
+ * 1 - unsplit in progress
+ * 2 - split to 2 in progress
+ * 4 - split to 4 in progress
+ */
+static int new_split_mode;
+
+static cpumask_var_t cpu_offline_mask;
+
+struct split_state {
+	u8 step;
+	u8 master;
+};
+
+static DEFINE_PER_CPU(struct split_state, split_state);
+
+static void wait_for_sync_step(int step)
+{
+	int i, cpu = smp_processor_id();
+
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		while(per_cpu(split_state, i).step < step)
+			barrier();
+
+	/* Order the wait loop vs any subsequent loads/stores. */
+	mb();
+}
+
+static void unsplit_core(void)
+{
+	u64 hid0, mask;
+	int i, cpu;
+
+	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		while (mfspr(SPRN_HID0) & mask)
+			power7_nap(0);
+
+		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
+		return;
+	}
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 &= ~HID0_POWER8_DYNLPARDIS;
+	mtspr(SPRN_HID0, hid0);
+
+	while (mfspr(SPRN_HID0) & mask)
+		cpu_relax();
+
+	/* Wake secondaries out of NAP */
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		smp_send_reschedule(i);
+
+	wait_for_sync_step(SYNC_STEP_UNSPLIT);
+}
+
+static void split_core(int new_mode)
+{
+	struct {  u64 value; u64 mask; } split_parms[2] = {
+		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
+		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
+	};
+	int i, cpu;
+	u64 hid0;
+
+	/* Convert new_mode (2 or 4) into an index into our parms array */
+	i = (new_mode >> 1) - 1;
+	BUG_ON(i < 0 || i > 1);
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		split_core_secondary_loop(&per_cpu(split_state, cpu).step);
+		return;
+	}
+
+	wait_for_sync_step(SYNC_STEP_REAL_MODE);
+
+	/* Write new mode */
+	hid0  = mfspr(SPRN_HID0);
+	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
+	mtspr(SPRN_HID0, hid0);
+
+	/* Wait for it to happen */
+	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
+		cpu_relax();
+}
+
+static void cpu_do_split(int new_mode)
+{
+	/*
+	 * At boot subcores_per_core will be 0, so we will always unsplit at
+	 * boot. In the usual case where the core is already unsplit it's a
+	 * nop, and this just ensures the kernel's notion of the mode is
+	 * consistent with the hardware.
+	 */
+	if (subcores_per_core != 1)
+		unsplit_core();
+
+	if (new_mode != 1)
+		split_core(new_mode);
+
+	mb();
+	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
+}
+
+bool cpu_core_split_required(void)
+{
+	smp_rmb();
+
+	if (!new_split_mode)
+		return false;
+
+	cpu_do_split(new_split_mode);
+
+	return true;
+}
+
+static int cpu_update_split_mode(void *data)
+{
+	int cpu, new_mode = *(int *)data;
+
+	if (this_cpu_ptr(&split_state)->master) {
+		new_split_mode = new_mode;
+		smp_wmb();
+
+		cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+			       cpu_online_mask);
+
+		/* This should work even though the cpu is offline */
+		for_each_cpu(cpu, cpu_offline_mask)
+			smp_send_reschedule(cpu);
+	}
+
+	cpu_do_split(new_mode);
+
+	if (this_cpu_ptr(&split_state)->master) {
+		/* Wait for all cpus to finish before we touch subcores_per_core */
+		for_each_present_cpu(cpu) {
+			if (cpu >= setup_max_cpus)
+				break;
+
+			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
+				barrier();
+		}
+
+		new_split_mode = 0;
+
+		/* Make the new mode public */
+		subcores_per_core = new_mode;
+		threads_per_subcore = threads_per_core / subcores_per_core;
+
+		/* Make sure the new mode is written before we exit */
+		mb();
+	}
+
+	return 0;
+}
+
+static int set_subcores_per_core(int new_mode)
+{
+	struct split_state *state;
+	int cpu;
+
+	if (kvm_hv_mode_active()) {
+		pr_err("Unable to change split core mode while KVM active.\n");
+		return -EBUSY;
+	}
+
+	/*
+	 * We are only called at boot, or from the sysfs write. If that ever
+	 * changes we'll need a lock here.
+	 */
+	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
+
+	for_each_present_cpu(cpu) {
+		state = &per_cpu(split_state, cpu);
+		state->step = SYNC_STEP_INITIAL;
+		state->master = 0;
+	}
+
+	get_online_cpus();
+
+	/* This cpu will update the globals before exiting stop machine */
+	this_cpu_ptr(&split_state)->master = 1;
+
+	/* Ensure state is consistent before we call the other cpus */
+	mb();
+
+	stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+
+	put_online_cpus();
+
+	return 0;
+}
+
+static ssize_t __used store_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int rc;
+
+	/* We are serialised by the attribute lock */
+
+	rc = sscanf(buf, "%lx", &val);
+	if (rc != 1)
+		return -EINVAL;
+
+	switch (val) {
+	case 1:
+	case 2:
+	case 4:
+		if (subcores_per_core == val)
+			/* Nothing to do */
+			goto out;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = set_subcores_per_core(val);
+	if (rc)
+		return rc;
+
+out:
+	return count;
+}
+
+static ssize_t show_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%x\n", subcores_per_core);
+}
+
+static DEVICE_ATTR(subcores_per_core, 0644,
+		show_subcores_per_core, store_subcores_per_core);
+
+static int subcore_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 0;
+
+	/*
+	 * We need all threads in a core to be present to split/unsplit so
+         * continue only if max_cpus are aligned to threads_per_core.
+	 */
+	if (setup_max_cpus % threads_per_core)
+		return 0;
+
+	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
+
+	set_subcores_per_core(1);
+
+	return device_create_file(cpu_subsys.dev_root,
+				  &dev_attr_subcores_per_core);
+}
+machine_device_initcall(powernv, subcore_init);

diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
new file mode 100644
index 0000000..148abc9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.h

@@ -0,0 +1,18 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* These are ordered and tested with <= */
+#define SYNC_STEP_INITIAL	0
+#define SYNC_STEP_UNSPLIT	1	/* Set by secondary when it sees unsplit */
+#define SYNC_STEP_REAL_MODE	2	/* Set by secondary when in real mode  */
+#define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
+
+#ifndef __ASSEMBLY__
+void split_core_secondary_loop(u8 *state);
+#endif

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2cb8b77..756b482 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig

@@ -21,6 +21,7 @@
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
 	select ARCH_RANDOM
+	select PPC_DOORBELL
 	default y
 
 config PPC_SPLPAR

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8a8f047..0bec0c0 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c

@@ -175,6 +175,36 @@
 	return 0;
 }
 
+static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	u32 header;
+	int pos = 256;
+	int ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
 /**
  * pseries_eeh_of_probe - EEH probe on the given device
  * @dn: OF node
@@ -220,7 +250,9 @@
 	 * or PCIe switch downstream port.
 	 */
 	edev->class_code = class_code;
+	edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX);
 	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR);
 	edev->mode &= 0xFFFFFF00;
 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
@@ -464,6 +496,7 @@
 			} else {
 				result = EEH_STATE_NOT_SUPPORT;
 			}
+			break;
 		default:
 			result = EEH_STATE_NOT_SUPPORT;
 		}
@@ -499,11 +532,19 @@
 	/* If fundamental-reset not supported, try hot-reset */
 	if (option == EEH_RESET_FUNDAMENTAL &&
 	    ret == -8) {
+		option = EEH_RESET_HOT;
 		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), EEH_RESET_HOT);
+				BUID_LO(pe->phb->buid), option);
 	}
 
+	/* We need reset hold or settlement delay */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		msleep(EEH_PE_RST_HOLD_TIME);
+	else
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
 	return ret;
 }
 

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7f75c94..7995135 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c

@@ -21,7 +21,7 @@
 #include <asm/prom.h>
 #include <asm/sparsemem.h>
 
-static unsigned long get_memblock_size(void)
+unsigned long pseries_memory_block_size(void)
 {
 	struct device_node *np;
 	unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
@@ -64,17 +64,6 @@
 	return memblock_size;
 }
 
-/* WARNING: This is going to override the generic definition whenever
- * pseries is built-in regardless of what platform is active at boot
- * time. This is fine for now as this is the only "option" and it
- * should work everywhere. If not, we'll have to turn this into a
- * ppc_md. callback
- */
-unsigned long memory_block_size_bytes(void)
-{
-	return get_memblock_size();
-}
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memory(u64 start, u64 size)
 {
@@ -105,7 +94,7 @@
 	if (!pfn_valid(start_pfn))
 		goto out;
 
-	block_sz = memory_block_size_bytes();
+	block_sz = pseries_memory_block_size();
 	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
 	nid = memory_add_physaddr_to_nid(base);
 
@@ -201,7 +190,7 @@
 	u32 *p;
 	int i, rc = -EINVAL;
 
-	memblock_size = get_memblock_size();
+	memblock_size = pseries_memory_block_size();
 	if (!memblock_size)
 		return -EINVAL;
 

diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 444fe77..99ecf0a 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S

@@ -49,7 +49,7 @@
 	std	r0,16(r1);					\
 	addi	r4,r1,STK_PARAM(FIRST_REG);			\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
-	bl	.__trace_hcall_entry;				\
+	bl	__trace_hcall_entry;				\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
 	ld	r0,16(r1);					\
 	ld	r3,STK_PARAM(R3)(r1);				\
@@ -83,7 +83,7 @@
 	mr	r3,r6;						\
 	std	r0,16(r1);					\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
-	bl	.__trace_hcall_exit;				\
+	bl	__trace_hcall_exit;				\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
 	ld	r0,16(r1);					\
 	ld	r3,STK_PARAM(R3)(r1);				\
@@ -106,7 +106,7 @@
 
 	.text
 
-_GLOBAL(plpar_hcall_norets)
+_GLOBAL_TOC(plpar_hcall_norets)
 	HMT_MEDIUM
 
 	mfcr	r0
@@ -122,7 +122,7 @@
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
 
-_GLOBAL(plpar_hcall)
+_GLOBAL_TOC(plpar_hcall)
 	HMT_MEDIUM
 
 	mfcr	r0
@@ -188,7 +188,7 @@
 
 	blr				/* return r3 = status */
 
-_GLOBAL(plpar_hcall9)
+_GLOBAL_TOC(plpar_hcall9)
 	HMT_MEDIUM
 
 	mfcr	r0

diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 9921953..361add6 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h

@@ -64,4 +64,6 @@
 struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
+unsigned long pseries_memory_block_size(void);
+
 #endif /* _PSERIES_PSERIES_H */

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 099d2df..f2f40e6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c

@@ -510,7 +510,11 @@
 static int __init pSeries_init_panel(void)
 {
 	/* Manually leave the kernel version on the panel. */
+#ifdef __BIG_ENDIAN__
 	ppc_md.progress("Linux ppc64\n", 0);
+#else
+	ppc_md.progress("Linux ppc64le\n", 0);
+#endif
 	ppc_md.progress(init_utsname()->version, 0);
 
 	return 0;
@@ -806,4 +810,7 @@
 #ifdef CONFIG_KEXEC
 	.machine_kexec          = pSeries_machine_kexec,
 #endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pseries_memory_block_size,
+#endif
 };

diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 24f58cb..a3555b1 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c

@@ -44,6 +44,7 @@
 #include <asm/xics.h>
 #include <asm/dbell.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/code-patching.h>
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -96,8 +97,8 @@
 static inline int smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
-	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 

diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
deleted file mode 100644
index 422a175..0000000
--- a/arch/powerpc/platforms/wsp/Kconfig
+++ /dev/null

@@ -1,30 +0,0 @@
-config PPC_WSP
-	bool
-	select PPC_A2
-	select GENERIC_TBSYNC
-	select PPC_ICSWX
-	select PPC_SCOM
-	select PPC_XICS
-	select PPC_ICP_NATIVE
-	select PCI
-	select PPC_IO_WORKAROUNDS if PCI
-	select PPC_INDIRECT_PIO if PCI
-	default n
-
-menu "WSP platform selection"
-	depends on PPC_BOOK3E_64
-
-config PPC_PSR2
-	bool "PowerEN System Reference Platform 2"
-	select EPAPR_BOOT
-	select PPC_WSP
-	default y
-
-config PPC_CHROMA
-	bool "PowerEN PCIe Chroma Card"
-	select EPAPR_BOOT
-	select PPC_WSP
-	select OF_DYNAMIC
-	default y
-
-endmenu

diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
deleted file mode 100644
index 162fc60..0000000
--- a/arch/powerpc/platforms/wsp/Makefile
+++ /dev/null

@@ -1,10 +0,0 @@
-ccflags-y			+= $(NO_MINIMAL_TOC)
-
-obj-y				+= setup.o ics.o wsp.o
-obj-$(CONFIG_PPC_PSR2)		+= psr2.o
-obj-$(CONFIG_PPC_CHROMA)	+= chroma.o h8.o
-obj-$(CONFIG_PPC_WSP)		+= opb_pic.o
-obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
-obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
-obj-$(CONFIG_PCI)		+= wsp_pci.o
-obj-$(CONFIG_PCI_MSI)		+= msi.o

diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c
deleted file mode 100644
index aaa46b3..0000000
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ /dev/null

@@ -1,56 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-#include <linux/of_fdt.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-void __init chroma_setup_arch(void)
-{
-	wsp_setup_arch();
-	wsp_setup_h8();
-
-}
-
-static int __init chroma_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(chroma_md) {
-	.name			= "Chroma PCIe",
-	.probe			= chroma_probe,
-	.setup_arch		= chroma_setup_arch,
-	.restart		= wsp_h8_restart,
-	.power_off		= wsp_h8_power_off,
-	.halt			= wsp_halt,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(chroma_md, wsp_probe_devices);

diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c
deleted file mode 100644
index a3c87f3..0000000
--- a/arch/powerpc/platforms/wsp/h8.c
+++ /dev/null

@@ -1,135 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/io.h>
-#include <linux/of_address.h>
-
-#include "wsp.h"
-
-/*
- * The UART connection to the H8 is over ttyS1 which is just a 16550.
- * We assume that FW has it setup right and no one messes with it.
- */
-
-
-static u8 __iomem *h8;
-
-#define RBR 0		/* Receiver Buffer Register */
-#define THR 0		/* Transmitter Holding Register */
-#define LSR 5		/* Line Status Register */
-#define LSR_DR 0x01	/* LSR value for Data-Ready */
-#define LSR_THRE 0x20	/* LSR value for Transmitter-Holding-Register-Empty */
-static void wsp_h8_putc(int c)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_THRE) != LSR_THRE);
-	writeb(c, h8 + THR);
-}
-
-static int wsp_h8_getc(void)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_DR) != LSR_DR);
-
-	return readb(h8 + RBR);
-}
-
-static void wsp_h8_puts(const char *s, int sz)
-{
-	int i;
-
-	for (i = 0; i < sz; i++) {
-		wsp_h8_putc(s[i]);
-
-		/* no flow control so wait for echo */
-		wsp_h8_getc();
-	}
-	wsp_h8_putc('\r');
-	wsp_h8_putc('\n');
-}
-
-static void wsp_h8_terminal_cmd(const char *cmd, int sz)
-{
-	hard_irq_disable();
-	wsp_h8_puts(cmd, sz);
-	/* should never return, but just in case */
-	for (;;)
-		continue;
-}
-
-
-void wsp_h8_restart(char *cmd)
-{
-	static const char restart[] = "warm-reset";
-
-	(void)cmd;
-	wsp_h8_terminal_cmd(restart, sizeof(restart) - 1);
-}
-
-void wsp_h8_power_off(void)
-{
-	static const char off[] = "power-off";
-
-	wsp_h8_terminal_cmd(off, sizeof(off) - 1);
-}
-
-static void __iomem *wsp_h8_getaddr(void)
-{
-	struct device_node *aliases;
-	struct device_node *uart;
-	struct property *path;
-	void __iomem *va = NULL;
-
-	/*
-	 * there is nothing in the devtree to tell us which is mapped
-	 * to the H8, but se know it is the second serial port.
-	 */
-
-	aliases = of_find_node_by_path("/aliases");
-	if (aliases == NULL)
-		return NULL;
-
-	path = of_find_property(aliases, "serial1", NULL);
-	if (path == NULL)
-		goto out;
-
-	uart = of_find_node_by_path(path->value);
-	if (uart == NULL)
-		goto out;
-
-	va = of_iomap(uart, 0);
-
-	/* remove it so no one messes with it */
-	of_detach_node(uart);
-	of_node_put(uart);
-
-out:
-	of_node_put(aliases);
-
-	return va;
-}
-
-void __init wsp_setup_h8(void)
-{
-	h8 = wsp_h8_getaddr();
-
-	/* Devtree change? lets hard map it anyway */
-	if (h8 == NULL) {
-		pr_warn("UART to H8 could not be found");
-		h8 = ioremap(0xffc0008000ULL, 0x100);
-	}
-}

diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
deleted file mode 100644
index 9cd92e6..0000000
--- a/arch/powerpc/platforms/wsp/ics.c
+++ /dev/null

@@ -1,762 +0,0 @@
-/*
- * Copyright 2008-2011 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/xics.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-
-/* WSP ICS */
-
-struct wsp_ics {
-	struct ics ics;
-	struct device_node *dn;
-	void __iomem *regs;
-	spinlock_t lock;
-	unsigned long *bitmap;
-	u32 chip_id;
-	u32 lsi_base;
-	u32 lsi_count;
-	u64 hwirq_start;
-	u64 count;
-#ifdef CONFIG_SMP
-	int *hwirq_cpu_map;
-#endif
-};
-
-#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
-
-#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
-#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
-#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
-#define XIVE_UPDATE_REG(base)		((base) + 0x28)
-#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
-
-#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
-#define TBL_SELECT_XIST		(1UL << 48)
-#define TBL_SELECT_XIVT		(1UL << 49)
-
-#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
-
-#define XIST_REQUIRED		0x8
-#define XIST_REJECTED		0x4
-#define XIST_PRESENTED		0x2
-#define XIST_PENDING		0x1
-
-#define XIVE_SERVER_SHIFT	42
-#define XIVE_SERVER_MASK	0xFFFFULL
-#define XIVE_PRIORITY_MASK	0xFFULL
-#define XIVE_PRIORITY_SHIFT	32
-#define XIVE_WRITE_ENABLE	(1ULL << 63)
-
-/*
- * The docs refer to a 6 bit field called ChipID, which consists of a
- * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
- * so we ignore it, and every where we use "chip id" in this code we
- * mean the NodeID.
- */
-#define WSP_ICS_CHIP_SHIFT		17
-
-
-static struct wsp_ics *ics_list;
-static int num_ics;
-
-/* ICS Source controller accessors */
-
-static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
-{
-	unsigned long flags;
-	u64 xive;
-
-	spin_lock_irqsave(&ics->lock, flags);
-	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
-	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
-	spin_unlock_irqrestore(&ics->lock, flags);
-
-	return xive;
-}
-
-static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
-{
-	xive &= ~XIVE_ADDR_MASK;
-	xive |= (irq & XIVE_ADDR_MASK);
-	xive |= XIVE_WRITE_ENABLE;
-
-	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
-}
-
-static u64 xive_set_server(u64 xive, unsigned int server)
-{
-	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
-
-	xive &= mask;
-	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
-
-	return xive;
-}
-
-static u64 xive_set_priority(u64 xive, unsigned int priority)
-{
-	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
-
-	xive &= mask;
-	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
-
-	return xive;
-}
-
-
-#ifdef CONFIG_SMP
-/* Find logical CPUs within mask on a given chip and store result in ret */
-void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
-{
-	int cpu, chip;
-	struct device_node *cpu_dn, *dn;
-	const u32 *prop;
-
-	cpumask_clear(ret);
-	for_each_cpu(cpu, mask) {
-		cpu_dn = of_get_cpu_node(cpu, NULL);
-		if (!cpu_dn)
-			continue;
-
-		prop = of_get_property(cpu_dn, "at-node", NULL);
-		if (!prop) {
-			of_node_put(cpu_dn);
-			continue;
-		}
-
-		dn = of_find_node_by_phandle(*prop);
-		of_node_put(cpu_dn);
-
-		chip = wsp_get_chip_id(dn);
-		if (chip == chip_id)
-			cpumask_set_cpu(cpu, ret);
-
-		of_node_put(dn);
-	}
-}
-
-/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	cpumask_var_t avail, newmask;
-	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
-	int index = hwirq - ics->hwirq_start;
-	unsigned int nodeid;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return -ENOMEM;
-
-	if (!distribute_irqs) {
-		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
-		return 0;
-	}
-
-	/* Allocate needed CPU masks */
-	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
-		goto ret;
-	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
-		goto freeavail;
-
-	/* Find PBus attached to the source of this IRQ */
-	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
-
-	/* Find CPUs that could handle this IRQ */
-	if (affinity)
-		cpumask_and(avail, cpu_online_mask, affinity);
-	else
-		cpumask_copy(avail, cpu_online_mask);
-
-	/* Narrow selection down to logical CPUs on the same chip */
-	cpus_on_chip(nodeid, avail, newmask);
-
-	/* Ensure we haven't narrowed it down to 0 */
-	if (unlikely(cpumask_empty(newmask))) {
-		if (unlikely(cpumask_empty(avail))) {
-			ret = -1;
-			goto out;
-		}
-		cpumask_copy(newmask, avail);
-	}
-
-	/* Choose a CPU out of those we narrowed it down to in round robin */
-	target = hwirq % cpumask_weight(newmask);
-	for_each_cpu(cpu, newmask) {
-		if (cpu_rover++ >= target) {
-			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
-			ret = 0;
-			goto out;
-		}
-	}
-
-	/* Shouldn't happen */
-	WARN_ON(1);
-
-out:
-	free_cpumask_var(newmask);
-freeavail:
-	free_cpumask_var(avail);
-ret:
-	if (ret < 0) {
-		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
-		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
-			   hwirq, ics->hwirq_cpu_map[index]);
-	}
-	return ret;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics)
-{
-	int i;
-
-	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
-	if (!ics->hwirq_cpu_map) {
-		pr_warning("Allocate hwirq_cpu_map failed, "
-			   "IRQ balancing disabled\n");
-		return;
-	}
-
-	for (i=0; i < ics->count; i++)
-		ics->hwirq_cpu_map[i] = xics_default_server;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	int index = hwirq - ics->hwirq_start;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return xics_default_server;
-
-	return ics->hwirq_cpu_map[index];
-}
-#else /* !CONFIG_SMP */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	return 0;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	return xics_default_server;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics) { }
-#endif
-
-static void wsp_chip_unmask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int server;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return;
-
-	server = get_irq_server(ics, hw_irq);
-
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, server);
-	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static unsigned int wsp_chip_startup(struct irq_data *d)
-{
-	/* unmask it */
-	wsp_chip_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
-{
-	u64 xive;
-
-	if (hw_irq == XICS_IPI)
-		return;
-
-	if (WARN_ON(!ics))
-		return;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, xics_default_server);
-	xive = xive_set_priority(xive, LOWEST_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static void wsp_chip_mask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics = d->chip_data;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	wsp_mask_real_irq(hw_irq, ics);
-}
-
-static int wsp_chip_set_affinity(struct irq_data *d,
-				 const struct cpumask *cpumask, bool force)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int ret;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return -1;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-
-	/*
-	 * For the moment only implement delivery to all cpus or one cpu.
-	 * Get current irq_server for the given irq
-	 */
-	ret = cache_hwirq_map(ics, hw_irq, cpumask);
-	if (ret == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
-			   __func__, cpulist, d->irq);
-		return -1;
-	} else if (ret == -ENOMEM) {
-		pr_warning("%s: Out of memory\n", __func__);
-		return -1;
-	}
-
-	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
-	wsp_ics_set_xive(ics, hw_irq, xive);
-
-	return IRQ_SET_MASK_OK;
-}
-
-static struct irq_chip wsp_irq_chip = {
-	.name = "WSP ICS",
-	.irq_startup		= wsp_chip_startup,
-	.irq_mask		= wsp_chip_mask_irq,
-	.irq_unmask		= wsp_chip_unmask_irq,
-	.irq_set_affinity	= wsp_chip_set_affinity
-};
-
-static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
-{
-	/* All ICSs in the system implement a global irq number space,
-	 * so match against them all. */
-	return of_device_is_compatible(dn, "ibm,ppc-xics");
-}
-
-static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
-{
-	if (hwirq >= wsp_ics->hwirq_start &&
-	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
-		return 1;
-
-	return 0;
-}
-
-static int wsp_ics_map(struct ics *ics, unsigned int virq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-	unsigned int hw_irq = virq_to_hw(virq);
-	unsigned long flags;
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
-
-	irq_set_chip_data(virq, wsp_ics);
-
-	spin_lock_irqsave(&wsp_ics->lock, flags);
-	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
-	spin_unlock_irqrestore(&wsp_ics->lock, flags);
-
-	return 0;
-}
-
-static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return;
-
-	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
-	wsp_mask_real_irq(hw_irq, wsp_ics);
-}
-
-static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	return get_irq_server(wsp_ics, hw_irq);
-}
-
-/* HW Number allocation API */
-
-static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
-{
-	struct device_node *iparent;
-	int i;
-
-	iparent = of_irq_find_parent(dn);
-	if (!iparent) {
-		pr_err("wsp_ics: Failed to find interrupt parent!\n");
-		return NULL;
-	}
-
-	for(i = 0; i < num_ics; i++) {
-		if(ics_list[i].dn == iparent)
-			break;
-	}
-
-	if (i >= num_ics) {
-		pr_err("wsp_ics: Unable to find parent bitmap!\n");
-		return NULL;
-	}
-
-	return &ics_list[i];
-}
-
-int wsp_ics_alloc_irq(struct device_node *dn, int num)
-{
-	struct wsp_ics *ics;
-	int order, offset;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (!ics)
-		return -ENODEV;
-
-	/* Fast, but overly strict if num isn't a power of two */
-	order = get_count_order(num);
-
-	spin_lock_irq(&ics->lock);
-	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
-	spin_unlock_irq(&ics->lock);
-
-	if (offset < 0)
-		return offset;
-
-	return offset + ics->hwirq_start;
-}
-
-void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
-{
-	struct wsp_ics *ics;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (WARN_ON(!ics))
-		return;
-
-	spin_lock_irq(&ics->lock);
-	bitmap_release_region(ics->bitmap, irq, 0);
-	spin_unlock_irq(&ics->lock);
-}
-
-/* Initialisation */
-
-static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
-				      struct device_node *dn)
-{
-	int len, i, j, size;
-	u32 start, count;
-	const u32 *p;
-
-	size = BITS_TO_LONGS(ics->count) * sizeof(long);
-	ics->bitmap = kzalloc(size, GFP_KERNEL);
-	if (!ics->bitmap) {
-		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
-		return -ENOMEM;
-	}
-
-	spin_lock_init(&ics->lock);
-
-	p = of_get_property(dn, "available-ranges", &len);
-	if (!p || !len) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: No available-ranges defined for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	if (len % (2 * sizeof(u32)) != 0) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: Invalid available-ranges for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	bitmap_fill(ics->bitmap, ics->count);
-
-	for (i = 0; i < len / sizeof(u32); i += 2) {
-		start = of_read_number(p + i, 1);
-		count = of_read_number(p + i + 1, 1);
-
-		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
-
-		if ((start + count) > (ics->hwirq_start + ics->count) ||
-		     start < ics->hwirq_start) {
-			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
-					start, start + count);
-			break;
-		}
-
-		for (j = 0; j < count; j++)
-			bitmap_release_region(ics->bitmap,
-				(start + j) - ics->hwirq_start, 0);
-	}
-
-	/* Ensure LSIs are not available for allocation */
-	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
-			       get_count_order(ics->lsi_count));
-
-	return 0;
-}
-
-static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
-{
-	u32 lsi_buid, msi_buid, msi_base, msi_count;
-	void __iomem *regs;
-	const u32 *p;
-	int rc, len, i;
-	u64 caps, buid;
-
-	p = of_get_property(dn, "interrupt-ranges", &len);
-	if (!p || len < (2 * sizeof(u32))) {
-		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
-			dn->full_name);
-		return -ENOENT;
-	}
-
-	if (len > (2 * sizeof(u32))) {
-		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
-		return -EINVAL;
-	}
-
-	regs = of_iomap(dn, 0);
-	if (!regs) {
-		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
-		return -ENXIO;
-	}
-
-	ics->hwirq_start = of_read_number(p, 1);
-	ics->count = of_read_number(p + 1, 1);
-	ics->regs = regs;
-
-	ics->chip_id = wsp_get_chip_id(dn);
-	if (WARN_ON(ics->chip_id < 0))
-		ics->chip_id = 0;
-
-	/* Get some informations about the critter */
-	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
-	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
-	ics->lsi_count = caps >> 56;
-	msi_count = (caps >> 44) & 0x7ff;
-
-	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
-	 * rest is mixed in the interrupt number. We store the whole
-	 * thing though
-	 */
-	lsi_buid = (buid >> 48) & 0x1ff;
-	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
-	msi_buid = (buid >> 37) & 0x7;
-	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
-
-	pr_info("wsp_ics: Found %s\n", dn->full_name);
-	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
-		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
-	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
-		ics->lsi_count, ics->lsi_base,
-		ics->lsi_base + ics->lsi_count - 1);
-	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
-		msi_count, msi_base,
-		msi_base + msi_count - 1);
-
-	/* Let's check the HW config is sane */
-	if (ics->lsi_base < ics->hwirq_start ||
-	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
-	if (msi_base < ics->hwirq_start ||
-	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
-
-	/* We don't check for overlap between LSI and MSI, which will happen
-	 * if we use the same BUID, I'm not sure yet how legit that is.
-	 */
-
-	rc = wsp_ics_bitmap_setup(ics, dn);
-	if (rc) {
-		iounmap(regs);
-		return rc;
-	}
-
-	ics->dn = of_node_get(dn);
-	alloc_irq_map(ics);
-
-	for(i = 0; i < ics->count; i++)
-		wsp_mask_real_irq(ics->hwirq_start + i, ics);
-
-	ics->ics.map = wsp_ics_map;
-	ics->ics.mask_unknown = wsp_ics_mask_unknown;
-	ics->ics.get_server = wsp_ics_get_server;
-	ics->ics.host_match = wsp_ics_host_match;
-
-	xics_register_ics(&ics->ics);
-
-	return 0;
-}
-
-static void __init wsp_ics_set_default_server(void)
-{
-	struct device_node *np;
-	u32 hwid;
-
-	/* Find the server number for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	hwid = get_hard_smp_processor_id(boot_cpuid);
-
-	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
-	xics_default_server = hwid;
-
-	of_node_put(np);
-}
-
-static int __init wsp_ics_init(void)
-{
-	struct device_node *dn;
-	struct wsp_ics *ics;
-	int rc, found;
-
-	wsp_ics_set_default_server();
-
-	found = 0;
-	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
-		found++;
-
-	if (found == 0) {
-		pr_err("wsp_ics: No ICS's found!\n");
-		return -ENODEV;
-	}
-
-	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
-	if (!ics_list) {
-		pr_err("wsp_ics: No memory for structs.\n");
-		return -ENOMEM;
-	}
-
-	num_ics = 0;
-	ics = ics_list;
-	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
-		rc = wsp_ics_setup(ics, dn);
-		if (rc == 0) {
-			ics++;
-			num_ics++;
-		}
-	}
-
-	if (found != num_ics) {
-		pr_err("wsp_ics: Failed setting up %d ICS's\n",
-			found - num_ics);
-		return -1;
-	}
-
-	return 0;
-}
-
-void __init wsp_init_irq(void)
-{
-	wsp_ics_init();
-	xics_init();
-
-	/* We need to patch our irq chip's EOI to point to the right ICP */
-	wsp_irq_chip.irq_eoi = icp_ops->eoi;
-}
-
-#ifdef CONFIG_PCI_MSI
-static void wsp_ics_msi_unmask_irq(struct irq_data *d)
-{
-	wsp_chip_unmask_irq(d);
-	unmask_msi_irq(d);
-}
-
-static unsigned int wsp_ics_msi_startup(struct irq_data *d)
-{
-	wsp_ics_msi_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_ics_msi_mask_irq(struct irq_data *d)
-{
-	mask_msi_irq(d);
-	wsp_chip_mask_irq(d);
-}
-
-/*
- * we do it this way because we reassinge default EOI handling in
- * irq_init() above
- */
-static void wsp_ics_eoi(struct irq_data *data)
-{
-	wsp_irq_chip.irq_eoi(data);
-}
-
-static struct irq_chip wsp_ics_msi = {
-	.name = "WSP ICS MSI",
-	.irq_startup = wsp_ics_msi_startup,
-	.irq_mask = wsp_ics_msi_mask_irq,
-	.irq_unmask = wsp_ics_msi_unmask_irq,
-	.irq_eoi = wsp_ics_eoi,
-	.irq_set_affinity = wsp_chip_set_affinity
-};
-
-void wsp_ics_set_msi_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_ics_msi);
-}
-
-void wsp_ics_set_std_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_irq_chip);
-}
-#endif /* CONFIG_PCI_MSI */

diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
deleted file mode 100644
index 07b644e..0000000
--- a/arch/powerpc/platforms/wsp/ics.h
+++ /dev/null

@@ -1,25 +0,0 @@
-/*
- * Copyright 2009 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __ICS_H
-#define __ICS_H
-
-#define XIVE_ADDR_MASK		0x7FFULL
-
-extern void wsp_init_irq(void);
-
-extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
-extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_ics_set_msi_chip(unsigned int irq);
-extern void wsp_ics_set_std_chip(unsigned int irq);
-#endif /* CONFIG_PCI_MSI */
-
-#endif /* __ICS_H */

diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c
deleted file mode 100644
index 380882f..0000000
--- a/arch/powerpc/platforms/wsp/msi.c
+++ /dev/null

@@ -1,102 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-
-#include "msi.h"
-#include "ics.h"
-#include "wsp_pci.h"
-
-/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */
-#define MSI_ADDR_32		0xFFFF0000ul
-#define MSI_ADDR_64		0x1000000000000000ul
-
-int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	struct msi_msg msg;
-	unsigned int virq;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-	if (!phb)
-		return -ENOENT;
-
-	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
-	if (entry->msi_attrib.is_64) {
-		msg.address_lo = 0;
-		msg.address_hi = MSI_ADDR_64 >> 32;
-	} else {
-		msg.address_lo = MSI_ADDR_32;
-		msg.address_hi = 0;
-	}
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		hwirq = wsp_ics_alloc_irq(phb->dn, 1);
-		if (hwirq < 0) {
-			dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n");
-			return hwirq;
-		}
-
-		virq = irq_create_mapping(NULL, hwirq);
-		if (virq == NO_IRQ) {
-			dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n");
-			return -1;
-		}
-
-		dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n",
-			hwirq, virq);
-
-		wsp_ics_set_msi_chip(virq);
-		irq_set_msi_desc(virq, entry);
-		msg.data = hwirq & XIVE_ADDR_MASK;
-		write_msi_msg(virq, &msg);
-	}
-
-	return 0;
-}
-
-void wsp_teardown_msi_irqs(struct pci_dev *dev)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-
-	dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n");
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq == NO_IRQ)
-			continue;
-
-		irq_set_msi_desc(entry->irq, NULL);
-		wsp_ics_set_std_chip(entry->irq);
-
-		hwirq = virq_to_hw(entry->irq);
-		/* In this order to avoid racing with irq_create_mapping() */
-		irq_dispose_mapping(entry->irq);
-		wsp_ics_free_irq(phb->dn, hwirq);
-	}
-}
-
-void wsp_setup_phb_msi(struct pci_controller *phb)
-{
-	/* Create a single MVE at offset 0 that matches everything */
-	out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT);
-	out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63);
-
-	ppc_md.setup_msi_irqs = wsp_setup_msi_irqs;
-	ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs;
-}

diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h
deleted file mode 100644
index 0ab27b7..0000000
--- a/arch/powerpc/platforms/wsp/msi.h
+++ /dev/null

@@ -1,19 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_MSI_H
-#define __WSP_MSI_H
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_setup_phb_msi(struct pci_controller *phb);
-#else
-static inline void wsp_setup_phb_msi(struct pci_controller *phb) { }
-#endif
-
-#endif /* __WSP_MSI_H */

diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
deleted file mode 100644
index 3f67298..0000000
--- a/arch/powerpc/platforms/wsp/opb_pic.c
+++ /dev/null

@@ -1,321 +0,0 @@
-/*
- * IBM Onboard Peripheral Bus Interrupt Controller
- *
- * Copyright 2010 Jack Miller, IBM Corporation.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <asm/reg_a2.h>
-#include <asm/irq.h>
-
-#define OPB_NR_IRQS 32
-
-#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
-#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
-#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
-#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
-#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
-
-static int opb_index = 0;
-
-struct opb_pic {
-	struct irq_domain *host;
-	void *regs;
-	int index;
-	spinlock_t lock;
-};
-
-static u32 opb_in(struct opb_pic *opb, int offset)
-{
-	return in_be32(opb->regs + offset);
-}
-
-static void opb_out(struct opb_pic *opb, int offset, u32 val)
-{
-	out_be32(opb->regs + offset, val);
-}
-
-static void opb_unmask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier | bitset);
-	ier = opb_in(opb, OPB_MLSIER);
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, mask;
-
-	opb = d->chip_data;
-	mask = ~(1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & mask);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-	u32 ier, ir;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & ~bitset);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	int invert, ipr, mask, bit;
-
-	opb = d->chip_data;
-
-	/* The only information we're interested in in the type is whether it's
-	 * a high or low trigger. For high triggered interrupts, the polarity
-	 * set for it in the MLS Interrupt Polarity Register is 0, for low
-	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
-	 * Register is interrupted as asserting the interrupt. */
-
-	switch (flow) {
-		case IRQ_TYPE_NONE:
-			opb_mask_irq(d);
-			return 0;
-
-		case IRQ_TYPE_LEVEL_HIGH:
-			invert = 0;
-			break;
-
-		case IRQ_TYPE_LEVEL_LOW:
-			invert = 1;
-			break;
-
-		default:
-			return -EINVAL;
-	}
-
-	bit = (1 << (31 - irqd_to_hwirq(d)));
-	mask = ~bit;
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ipr = opb_in(opb, OPB_MLSIPR);
-	ipr = (ipr & mask) | (invert ? bit : 0);
-	opb_out(opb, OPB_MLSIPR, ipr);
-	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-
-	/* Record the type in the interrupt descriptor */
-	irqd_set_trigger_type(d, flow);
-
-	return 0;
-}
-
-static struct irq_chip opb_irq_chip = {
-	.name		= "OPB",
-	.irq_mask	= opb_mask_irq,
-	.irq_unmask	= opb_unmask_irq,
-	.irq_mask_ack	= opb_mask_ack_irq,
-	.irq_ack	= opb_ack_irq,
-	.irq_set_type	= opb_set_irq_type
-};
-
-static int opb_host_map(struct irq_domain *host, unsigned int virq,
-		irq_hw_number_t hwirq)
-{
-	struct opb_pic *opb;
-
-	opb = host->host_data;
-
-	/* Most of the important stuff is handled by the generic host code, like
-	 * the lookup, so just attach some info to the virtual irq */
-
-	irq_set_chip_data(virq, opb);
-	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
-	irq_set_irq_type(virq, IRQ_TYPE_NONE);
-
-	return 0;
-}
-
-static const struct irq_domain_ops opb_host_ops = {
-	.map = opb_host_map,
-	.xlate = irq_domain_xlate_twocell,
-};
-
-irqreturn_t opb_irq_handler(int irq, void *private)
-{
-	struct opb_pic *opb;
-	u32 ir, src, subvirq;
-
-	opb = (struct opb_pic *) private;
-
-	/* Read the OPB MLS Interrupt Register for
-	 * asserted interrupts */
-	ir = opb_in(opb, OPB_MLSIR);
-	if (!ir)
-		return IRQ_NONE;
-
-	do {
-		/* Get 1 - 32 source, *NOT* bit */
-		src = 32 - ffs(ir);
-
-		/* Translate from the OPB's conception of interrupt number to
-		 * Linux's virtual IRQ */
-
-		subvirq = irq_linear_revmap(opb->host, src);
-
-		generic_handle_irq(subvirq);
-	} while ((ir = opb_in(opb, OPB_MLSIR)));
-
-	return IRQ_HANDLED;
-}
-
-struct opb_pic *opb_pic_init_one(struct device_node *dn)
-{
-	struct opb_pic *opb;
-	struct resource res;
-
-	if (of_address_to_resource(dn, 0, &res)) {
-		printk(KERN_ERR "opb: Couldn't translate resource\n");
-		return  NULL;
-	}
-
-	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
-	if (!opb) {
-		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
-		return NULL;
-	}
-
-	/* Get access to the OPB MMIO registers */
-	opb->regs = ioremap(res.start + 0x10000, 0x1000);
-	if (!opb->regs) {
-		printk(KERN_ERR "opb: Failed to allocate register space!\n");
-		goto free_opb;
-	}
-
-	/* Allocate an irq domain so that Linux knows that despite only
-	 * having one interrupt to issue, we're the controller for multiple
-	 * hardware IRQs, so later we can lookup their virtual IRQs. */
-
-	opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb);
-	if (!opb->host) {
-		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
-		goto free_regs;
-	}
-
-	opb->index = opb_index++;
-	spin_lock_init(&opb->lock);
-
-	/* Disable all interrupts by default */
-	opb_out(opb, OPB_MLSASIER, 0);
-	opb_out(opb, OPB_MLSIER, 0);
-
-	/* ACK any interrupts left by FW */
-	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
-
-	return opb;
-
-free_regs:
-	iounmap(opb->regs);
-free_opb:
-	kfree(opb);
-	return NULL;
-}
-
-void __init opb_pic_init(void)
-{
-	struct device_node *dn;
-	struct opb_pic *opb;
-	int virq;
-	int rc;
-
-	/* Call init_one for each OPB device */
-	for_each_compatible_node(dn, NULL, "ibm,opb") {
-
-		/* Fill in an OPB struct */
-		opb = opb_pic_init_one(dn);
-		if (!opb) {
-			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
-			continue;
-		}
-
-		/* Map / get opb's hardware virtual irq */
-		virq = irq_of_parse_and_map(dn, 0);
-		if (virq <= 0) {
-			printk("opb: irq_op_parse_and_map failed!\n");
-			continue;
-		}
-
-		/* Attach opb interrupt handler to new virtual IRQ */
-		rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD,
-				 "OPB LS Cascade", opb);
-		if (rc) {
-			printk("opb: request_irq failed: %d\n", rc);
-			continue;
-		}
-
-		printk("OPB%d init with %d IRQs at %p\n", opb->index,
-				OPB_NR_IRQS, opb->regs);
-	}
-}

diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
deleted file mode 100644
index a87b414..0000000
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ /dev/null

@@ -1,67 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-#include <linux/of_fdt.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-
-static void psr2_spin(void)
-{
-	hard_irq_disable();
-	for (;;)
-		continue;
-}
-
-static void psr2_restart(char *cmd)
-{
-	psr2_spin();
-}
-
-static int __init psr2_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) {
-		/* chroma systems also claim they are psr2s */
-		return 0;
-	}
-
-	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(psr2_md) {
-	.name			= "PSR2 A2",
-	.probe			= psr2_probe,
-	.setup_arch		= wsp_setup_arch,
-	.restart		= psr2_restart,
-	.power_off		= psr2_spin,
-	.halt			= psr2_spin,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(psr2_md, wsp_probe_devices);

diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
deleted file mode 100644
index 268bc89..0000000
--- a/arch/powerpc/platforms/wsp/scom_smp.c
+++ /dev/null

@@ -1,434 +0,0 @@
-/*
- * SCOM support for A2 platforms
- *
- * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
- *		       Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-
-#include "wsp.h"
-
-#define SCOM_RAMC		0x2a		/* Ram Command */
-#define SCOM_RAMC_TGT1_EXT	0x80000000
-#define SCOM_RAMC_SRC1_EXT	0x40000000
-#define SCOM_RAMC_SRC2_EXT	0x20000000
-#define SCOM_RAMC_SRC3_EXT	0x10000000
-#define SCOM_RAMC_ENABLE	0x00080000
-#define SCOM_RAMC_THREADSEL	0x00060000
-#define SCOM_RAMC_EXECUTE	0x00010000
-#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
-#define SCOM_RAMC_MSR_PR	0x00004000
-#define SCOM_RAMC_MSR_GS	0x00002000
-#define SCOM_RAMC_FORCE		0x00001000
-#define SCOM_RAMC_FLUSH		0x00000800
-#define SCOM_RAMC_INTERRUPT	0x00000004
-#define SCOM_RAMC_ERROR		0x00000002
-#define SCOM_RAMC_DONE		0x00000001
-#define SCOM_RAMI		0x29		/* Ram Instruction */
-#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
-#define SCOM_RAMIC_INSN		0xffffffff00000000
-#define SCOM_RAMD		0x2d		/* Ram Data */
-#define SCOM_RAMDH		0x2e		/* Ram Data High */
-#define SCOM_RAMDL		0x2f		/* Ram Data Low */
-#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
-#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
-#define SCOM_PCCR0_ENABLE_RAM	0x40000000
-#define SCOM_THRCTL		0x30		/* Thread Control and Status */
-#define SCOM_THRCTL_T0_STOP	0x80000000
-#define SCOM_THRCTL_T1_STOP	0x40000000
-#define SCOM_THRCTL_T2_STOP	0x20000000
-#define SCOM_THRCTL_T3_STOP	0x10000000
-#define SCOM_THRCTL_T0_STEP	0x08000000
-#define SCOM_THRCTL_T1_STEP	0x04000000
-#define SCOM_THRCTL_T2_STEP	0x02000000
-#define SCOM_THRCTL_T3_STEP	0x01000000
-#define SCOM_THRCTL_T0_RUN	0x00800000
-#define SCOM_THRCTL_T1_RUN	0x00400000
-#define SCOM_THRCTL_T2_RUN	0x00200000
-#define SCOM_THRCTL_T3_RUN	0x00100000
-#define SCOM_THRCTL_T0_PM	0x00080000
-#define SCOM_THRCTL_T1_PM	0x00040000
-#define SCOM_THRCTL_T2_PM	0x00020000
-#define SCOM_THRCTL_T3_PM	0x00010000
-#define SCOM_THRCTL_T0_UDE	0x00008000
-#define SCOM_THRCTL_T1_UDE	0x00004000
-#define SCOM_THRCTL_T2_UDE	0x00002000
-#define SCOM_THRCTL_T3_UDE	0x00001000
-#define SCOM_THRCTL_ASYNC_DIS	0x00000800
-#define SCOM_THRCTL_TB_DIS	0x00000400
-#define SCOM_THRCTL_DEC_DIS	0x00000200
-#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
-#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
-
-
-static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
-
-static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
-{
-	scom_map_t scom = per_cpu(scom_ptrs, cpu);
-	int tcpu;
-
-	if (scom_map_ok(scom)) {
-		*first_thread = 0;
-		return scom;
-	}
-
-	*first_thread = 1;
-
-	scom = scom_map_device(np, 0);
-
-	for (tcpu = cpu_first_thread_sibling(cpu);
-	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
-		per_cpu(scom_ptrs, tcpu) = scom;
-
-	/* Hack: for the boot core, this will actually get called on
-	 * the second thread up, not the first so our test above will
-	 * set first_thread incorrectly. */
-	if (cpu_first_thread_sibling(cpu) == 0)
-		*first_thread = 0;
-
-	return scom;
-}
-
-static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
-{
-	u64 cmd, mask, val;
-	int n = 0;
-
-	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
-		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
-	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
-
-	scom_write(scom, SCOM_RAMIC, cmd);
-
-	for (;;) {
-		if (scom_read(scom, SCOM_RAMC, &val) != 0) {
-			pr_err("SCOM error on instruction 0x%08x, thread %d\n",
-			       insn, thread);
-			return -1;
-		}
-		if (val & mask)
-			break;
-		pr_devel("Waiting on RAMC = 0x%llx\n", val);
-		if (++n == 3) {
-			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
-			       insn, thread);
-			return -1;
-		}
-	}
-
-	if (val & SCOM_RAMC_INTERRUPT) {
-		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_INTERRUPT;
-	}
-
-	if (val & SCOM_RAMC_ERROR) {
-		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_ERROR;
-	}
-
-	return 0;
-}
-
-static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
-			  u64 *out_gpr)
-{
-	int rc;
-
-	/* or rN, rN, rN */
-	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
-	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
-	if (rc)
-		return rc;
-
-	return scom_read(scom, SCOM_RAMD, out_gpr);
-}
-
-static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
-{
-	int rc, sprhi, sprlo;
-	u32 insn;
-
-	sprhi = spr >> 5;
-	sprlo = spr & 0x1f;
-	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
-
-	if (spr == 0x0ff0)
-		insn = 0x7c2000a6; /* mfmsr r1 */
-
-	rc = a2_scom_ram(scom, thread, insn, 0xf);
-	if (rc)
-		return rc;
-	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
-}
-
-static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
-			  int alt, u64 val)
-{
-	u32 lis = 0x3c000000 | (gpr << 21);
-	u32 li = 0x38000000 | (gpr << 21);
-	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
-	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
-	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
-	u32 highest = val >> 48;
-	u32 higher = (val >> 32) & 0xffff;
-	u32 high = (val >> 16) & 0xffff;
-	u32 low = val & 0xffff;
-	int lext = alt ? 0x8 : 0x0;
-	int oext = alt ? 0xf : 0x0;
-	int rc = 0;
-
-	if (highest)
-		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
-
-	if (higher) {
-		if (highest)
-			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, li | higher, lext);
-	}
-
-	if (highest || higher)
-		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
-
-	if (high) {
-		if (highest || higher)
-			rc |= a2_scom_ram(scom, thread, oris | high, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, lis | high, lext);
-	}
-
-	if (highest || higher || high)
-		rc |= a2_scom_ram(scom, thread, ori | low, oext);
-	else
-		rc |= a2_scom_ram(scom, thread, li | low, lext);
-
-	return rc;
-}
-
-static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
-{
-	int sprhi = spr >> 5;
-	int sprlo = spr & 0x1f;
-	/* mtspr spr, r1 */
-	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
-
-	if (spr == 0x0ff0)
-		insn = 0x7c200124; /* mtmsr r1 */
-
-	if (a2_scom_setgpr(scom, thread, 1, 1, val))
-		return -1;
-
-	return a2_scom_ram(scom, thread, insn, 0xf);
-}
-
-static int a2_scom_initial_tlb(scom_map_t scom, int thread)
-{
-	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
-	extern u32 a2_tlbinit_after_iprot_flush[];
-	extern u32 a2_tlbinit_after_linear_map[];
-	u32 assoc, entries, i;
-	u64 epn, tlbcfg;
-	u32 *p;
-	int rc;
-
-	/* Invalidate all entries (including iprot) */
-
-	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
-	if (rc)
-		goto scom_fail;
-	entries = tlbcfg & TLBnCFG_N_ENTRY;
-	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
-	epn = 0;
-
-	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
-	/* Set MMUCR3 to write all thids bit to the TLB */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
-
-	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
-	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
-	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-	for (i = 0; i < entries; i++) {
-
-		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
-
-		/* tlbwe */
-		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
-		if (rc)
-			goto scom_fail;
-
-		/* Next entry is new address? */
-		if((i + 1) % assoc == 0) {
-			epn += (1 << 30);
-			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-		}
-	}
-
-	/* Setup args for linear mapping */
-	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
-	if (rc)
-		goto scom_fail;
-
-	/* Linear mapping */
-	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-	/*
-	 * For the boot thread, between the linear mapping and the debug
-	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
-	 * branches, but the secondary threads don't need to be nearly as smart
-	 * (i.e. we don't need to worry about invalidating the mapping we're
-	 * standing on).
-	 */
-
-	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
-	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-scom_fail:
-	if (rc)
-		pr_err("Setting up initial TLB failed, err %d\n", rc);
-
-	if (rc == -SCOM_RAMC_INTERRUPT) {
-		/* Interrupt, dump some status */
-		int rc[10];
-		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
-		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
-		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
-		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
-		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
-		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
-		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
-		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
-		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
-		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
-		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
-		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
-		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
-		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
-		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
-		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
-		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
-		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
-		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
-		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
-		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
-	}
-
-	return rc;
-}
-
-int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np)
-{
-	u64 init_iar, init_msr, init_ccr2;
-	unsigned long start_here;
-	int rc, core_setup;
-	scom_map_t scom;
-	u64 pccr0;
-
-	scom = get_scom(lcpu, np, &core_setup);
-	if (!scom) {
-		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
-		return -1;
-	}
-
-	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
-
-	if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) {
-		printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu);
-		return -1;
-	}
-	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
-				     SCOM_PCCR0_ENABLE_RAM);
-
-	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
-	 * threads. We also disable asynchronous interrupts while RAMing.
-	 */
-	if (core_setup)
-		scom_write(scom, SCOM_THRCTL_OR,
-			      SCOM_THRCTL_T0_STOP |
-			      SCOM_THRCTL_T1_STOP |
-			      SCOM_THRCTL_T2_STOP |
-			      SCOM_THRCTL_T3_STOP |
-			      SCOM_THRCTL_ASYNC_DIS);
-	else
-		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
-
-	/* Flush its pipeline just in case */
-	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
-		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
-
-	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
-	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
-	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
-
-	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
-	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
-	if (rc) {
-		pr_err("Failed to set MSR ! err %d\n", rc);
-		return rc;
-	}
-
-	/* RAM in an sync/isync for the sake of it */
-	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
-	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
-
-	if (core_setup) {
-		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
-			 lcpu);
-		rc = a2_scom_initial_tlb(scom, thr_idx);
-		if (rc)
-			goto fail;
-	}
-
-	start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
-					: generic_secondary_thread_init);
-	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
-	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
-			     get_hard_smp_processor_id(lcpu));
-	/*
-	 * Tell book3e_secondary_core_init not to set up the TLB, we've
-	 * already done that.
-	 */
-	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
-
-	scom_write(scom, SCOM_RAMC, 0);
-	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
-	scom_write(scom, SCOM_PCCR0, pccr0);
-fail:
-	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
-	if (rc) {
-		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
-		       init_iar, init_msr, init_ccr2);
-	}
-
-	return rc;
-}

diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
deleted file mode 100644
index 6538b4d..0000000
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ /dev/null

@@ -1,82 +0,0 @@
-/*
- *  SCOM backend for WSP
- *
- *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/of_address.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-
-#include "wsp.h"
-
-
-static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
-{
-	struct resource r;
-	u64 xscom_addr;
-
-	if (!of_get_property(dev, "scom-controller", NULL)) {
-		pr_err("%s: device %s is not a SCOM controller\n",
-			__func__, dev->full_name);
-		return SCOM_MAP_INVALID;
-	}
-
-	if (of_address_to_resource(dev, 0, &r)) {
-		pr_debug("Failed to find SCOM controller address\n");
-		return 0;
-	}
-
-	/* Transform the SCOM address into an XSCOM offset */
-	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
-
-	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
-}
-
-static void wsp_scom_unmap(scom_map_t map)
-{
-	iounmap((void *)map);
-}
-
-static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	*value = in_be64(addr + reg);
-
-	return 0;
-}
-
-static int wsp_scom_write(scom_map_t map, u64 reg, u64 value)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	out_be64(addr + reg, value);
-
-	return 0;
-}
-
-static const struct scom_controller wsp_scom_controller = {
-	.map	= wsp_scom_map,
-	.unmap	= wsp_scom_unmap,
-	.read	= wsp_scom_read,
-	.write	= wsp_scom_write
-};
-
-void scom_init_wsp(void)
-{
-	scom_init(&wsp_scom_controller);
-}

diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
deleted file mode 100644
index 11ac2f0..0000000
--- a/arch/powerpc/platforms/wsp/setup.c
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- * Copyright 2010 Michael Ellerman, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of_platform.h>
-
-#include "wsp.h"
-
-/*
- * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
- * Won't work for nodes that are not a descendant of a wsp node.
- */
-int wsp_get_chip_id(struct device_node *dn)
-{
-	const u32 *p;
-	int rc;
-
-	/* Start looking at the specified node, not its parent */
-	dn = of_node_get(dn);
-	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
-		dn = of_get_next_parent(dn);
-
-	if (!dn)
-		return -1;
-
-	rc = *p;
-	of_node_put(dn);
-
-	return rc;
-}

diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
deleted file mode 100644
index 332a18b..0000000
--- a/arch/powerpc/platforms/wsp/smp.c
+++ /dev/null

@@ -1,88 +0,0 @@
-/*
- *  SMP Support for A2 platforms
- *
- *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/dbell.h>
-#include <asm/machdep.h>
-#include <asm/xics.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-static void smp_a2_setup_cpu(int cpu)
-{
-	doorbell_setup_this_cpu();
-
-	if (cpu != boot_cpuid)
-		xics_setup_cpu();
-}
-
-int smp_a2_kick_cpu(int nr)
-{
-	const char *enable_method;
-	struct device_node *np;
-	int thr_idx;
-
-	if (nr < 0 || nr >= NR_CPUS)
-		return -ENOENT;
-
-	np = of_get_cpu_node(nr, &thr_idx);
-	if (!np)
-		return -ENODEV;
-
-	enable_method = of_get_property(np, "enable-method", NULL);
-	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
-
-	if (!enable_method) {
-                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
-		return -ENOENT;
-	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
-		if (a2_scom_startup_cpu(nr, thr_idx, np))
-			return -1;
-	} else {
-		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
-                       nr, enable_method);
-		return -EINVAL;
-	}
-
-	/*
-	 * The processor is currently spinning, waiting for the
-	 * cpu_start field to become non-zero After we set cpu_start,
-	 * the processor will continue on to secondary_start
-	 */
-	paca[nr].cpu_start = 1;
-
-	return 0;
-}
-
-static int __init smp_a2_probe(void)
-{
-	return num_possible_cpus();
-}
-
-static struct smp_ops_t a2_smp_ops = {
-	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
-	.cause_ipi	= doorbell_cause_ipi,
-	.probe		= smp_a2_probe,
-	.kick_cpu	= smp_a2_kick_cpu,
-	.setup_cpu	= smp_a2_setup_cpu,
-};
-
-void __init a2_setup_smp(void)
-{
-	smp_ops = &a2_smp_ops;
-}

diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c
deleted file mode 100644
index 58cd1f0..0000000
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ /dev/null

@@ -1,117 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/time.h>
-#include <linux/of_address.h>
-
-#include <asm/scom.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-#define WSP_SOC_COMPATIBLE	"ibm,wsp-soc"
-#define PBIC_COMPATIBLE		"ibm,wsp-pbic"
-#define COPRO_COMPATIBLE	"ibm,wsp-coprocessor"
-
-static int __init wsp_probe_buses(void)
-{
-	static __initdata struct of_device_id bus_ids[] = {
-		/*
-		 * every node in between needs to be here or you won't
-		 * find it
-		 */
-		{ .compatible = WSP_SOC_COMPATIBLE, },
-		{ .compatible = PBIC_COMPATIBLE, },
-		{ .compatible = COPRO_COMPATIBLE, },
-		{},
-	};
-	of_platform_bus_probe(NULL, bus_ids, NULL);
-
-	return 0;
-}
-
-void __init wsp_setup_arch(void)
-{
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-	scom_init_wsp();
-
-	/* Setup SMP callback */
-#ifdef CONFIG_SMP
-	a2_setup_smp();
-#endif
-#ifdef CONFIG_PCI
-	wsp_setup_pci();
-#endif
-}
-
-void __init wsp_setup_irq(void)
-{
-	wsp_init_irq();
-	opb_pic_init();
-}
-
-
-int __init wsp_probe_devices(void)
-{
-	struct device_node *np;
-
-	/* Our RTC is a ds1500. It seems to be programatically compatible
-	 * with the ds1511 for which we have a driver so let's use that
-	 */
-	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
-	if (np != NULL) {
-		struct resource res;
-		if (of_address_to_resource(np, 0, &res) == 0)
-			platform_device_register_simple("ds1511", 0, &res, 1);
-	}
-
-	wsp_probe_buses();
-
-	return 0;
-}
-
-void wsp_halt(void)
-{
-	u64 val;
-	scom_map_t m;
-	struct device_node *dn;
-	struct device_node *mine;
-	struct device_node *me;
-	int rc;
-
-	me = of_get_cpu_node(smp_processor_id(), NULL);
-	mine = scom_find_parent(me);
-
-	/* This will halt all the A2s but not power off the chip */
-	for_each_node_with_property(dn, "scom-controller") {
-		if (dn == mine)
-			continue;
-		m = scom_map(dn, 0, 1);
-
-		/* read-modify-write it so the HW probe does not get
-		 * confused */
-		rc = scom_read(m, 0, &val);
-		if (rc == 0)
-			scom_write(m, 0, val | 1);
-		scom_unmap(m);
-	}
-	m = scom_map(mine, 0, 1);
-	rc = scom_read(m, 0, &val);
-	if (rc == 0)
-		scom_write(m, 0, val | 1);
-	/* should never return */
-	scom_unmap(m);
-}

diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
deleted file mode 100644
index a563a8a..0000000
--- a/arch/powerpc/platforms/wsp/wsp.h
+++ /dev/null

@@ -1,29 +0,0 @@
-#ifndef __WSP_H
-#define __WSP_H
-
-#include <asm/wsp.h>
-
-/* Devtree compatible strings for major devices */
-#define PCIE_COMPATIBLE     "ibm,wsp-pciex"
-
-extern void wsp_setup_arch(void);
-extern void wsp_setup_irq(void);
-extern int wsp_probe_devices(void);
-extern void wsp_halt(void);
-
-extern void wsp_setup_pci(void);
-extern void scom_init_wsp(void);
-
-extern void a2_setup_smp(void);
-extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
-			       struct device_node *np);
-extern int smp_a2_kick_cpu(int nr);
-
-extern void opb_pic_init(void);
-
-/* chroma specific managment */
-extern void wsp_h8_restart(char *cmd);
-extern void wsp_h8_power_off(void);
-extern void __init wsp_setup_h8(void);
-
-#endif /*  __WSP_H */

diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
deleted file mode 100644
index 9a15e5b..0000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ /dev/null

@@ -1,1134 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define DEBUG
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/debugfs.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-#include <asm/iommu.h>
-#include <asm/io-workarounds.h>
-#include <asm/debug.h>
-
-#include "wsp.h"
-#include "wsp_pci.h"
-#include "msi.h"
-
-
-/* Max number of TVTs for one table. Only 32-bit tables can use
- * multiple TVTs and so the max currently supported is thus 8
- * since only 2G of DMA space is supported
- */
-#define MAX_TABLE_TVT_COUNT		8
-
-struct wsp_dma_table {
-	struct list_head	link;
-	struct iommu_table	table;
-	struct wsp_phb	*phb;
-	struct page		*tces[MAX_TABLE_TVT_COUNT];
-};
-
-/* We support DMA regions from 0...2G in 32bit space (no support for
- * 64-bit DMA just yet). Each device gets a separate TCE table (TVT
- * entry) with validation enabled (though not supported by SimiCS
- * just yet).
- *
- * To simplify things, we divide this 2G space into N regions based
- * on the constant below which could be turned into a tunable eventually
- *
- * We then assign dynamically those regions to devices as they show up.
- *
- * We use a bitmap as an allocator for these.
- *
- * Tables are allocated/created dynamically as devices are discovered,
- * multiple TVT entries are used if needed
- *
- * When 64-bit DMA support is added we should simply use a separate set
- * of larger regions (the HW supports 64 TVT entries). We can
- * additionally create a bypass region in 64-bit space for performances
- * though that would have a cost in term of security.
- *
- * If you set NUM_DMA32_REGIONS to 1, then a single table is shared
- * for all devices and bus/dev/fn validation is disabled
- *
- * Note that a DMA32 region cannot be smaller than 256M so the max
- * supported here for now is 8. We don't yet support sharing regions
- * between multiple devices so the max number of devices supported
- * is MAX_TABLE_TVT_COUNT.
- */
-#define NUM_DMA32_REGIONS	1
-
-struct wsp_phb {
-	struct pci_controller	*hose;
-
-	/* Lock controlling access to the list of dma tables.
-	 * It does -not- protect against dma_* operations on
-	 * those tables, those should be stopped before an entry
-	 * is removed from the list.
-	 *
-	 * The lock is also used for error handling operations
-	 */
-	spinlock_t		lock;
-	struct list_head	dma_tables;
-	unsigned long		dma32_map;
-	unsigned long		dma32_base;
-	unsigned int		dma32_num_regions;
-	unsigned long		dma32_region_size;
-
-	/* Debugfs stuff */
-	struct dentry		*ddir;
-
-	struct list_head	all;
-};
-static LIST_HEAD(wsp_phbs);
-
-//#define cfg_debug(fmt...)	pr_debug(fmt)
-#define cfg_debug(fmt...)
-
-
-static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
-				  int offset, int len, u32 *val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xff;
-		cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xffff;
-		cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA);
-		cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
-				   int offset, int len, u32 val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops wsp_pcie_pci_ops =
-{
-	.read = wsp_pcie_read_config,
-	.write = wsp_pcie_write_config,
-};
-
-#define TCE_SHIFT		12
-#define TCE_PAGE_SIZE		(1 << TCE_SHIFT)
-#define TCE_PCI_WRITE		0x2		 /* write from PCI allowed */
-#define TCE_PCI_READ		0x1	 	 /* read from PCI allowed */
-#define TCE_RPN_MASK		0x3fffffffffful  /* 42-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
-
-//#define dma_debug(fmt...)	pr_debug(fmt)
-#define dma_debug(fmt...)
-
-static int tce_build_wsp(struct iommu_table *tbl, long index, long npages,
-			   unsigned long uaddr, enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-	u64 proto_tce;
-	u64 *tcep;
-	u64 rpn;
-
-	proto_tce = TCE_PCI_READ;
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	proto_tce |= TCE_PCI_WRITE;
-#else
-	if (direction != DMA_TO_DEVICE)
-		proto_tce |= TCE_PCI_WRITE;
-#endif
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-
-		/* can't move this out since we might cross LMB boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-
-		dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n",
-			  tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K);
-
-		uaddr += TCE_PAGE_SIZE;
-		index++;
-	}
-	return 0;
-}
-
-static void tce_free_wsp(struct iommu_table *tbl, long index, long npages)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	struct pci_controller *hose = ptbl->phb->hose;
-#endif
-	u64 *tcep;
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table. Also use line-kill option to kill multiple
-	 * TCEs at once
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-		dma_debug("[DMA] TCE %p cleared\n", tcep);
-		*tcep = 0;
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-		/* Don't write there since it would pollute other MMIO accesses */
-		out_be64(hose->cfg_data + PCIE_REG_TCE_KILL,
-			 PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K |
-			 (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK));
-#endif
-		index++;
-	}
-}
-
-static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb,
-							    unsigned int region,
-							    struct pci_dev *validate)
-{
-	struct pci_controller *hose = phb->hose;
-	unsigned long size = phb->dma32_region_size;
-	unsigned long addr = phb->dma32_region_size * region + phb->dma32_base;
-	struct wsp_dma_table *tbl;
-	int tvts_per_table, i, tvt, nid;
-	unsigned long flags;
-
-	nid = of_node_to_nid(phb->hose->dn);
-
-	/* Calculate how many TVTs are needed */
-	tvts_per_table = size / 0x10000000;
-	if (tvts_per_table == 0)
-		tvts_per_table = 1;
-
-	/* Calculate the base TVT index. We know all tables have the same
-	 * size so we just do a simple multiply here
-	 */
-	tvt = region * tvts_per_table;
-
-	pr_debug("         Region : %d\n", region);
-	pr_debug("      DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1);
-	pr_debug(" Number of TVTs : %d\n", tvts_per_table);
-	pr_debug("       Base TVT : %d\n", tvt);
-	pr_debug("         Node   : %d\n", nid);
-
-	tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid);
-	if (!tbl)
-		return ERR_PTR(-ENOMEM);
-	tbl->phb = phb;
-
-	/* Create as many TVTs as needed, each represents 256M at most */
-	for (i = 0; i < tvts_per_table; i++) {
-		u64 tvt_data1, tvt_data0;
-
-		/* Allocate table. We use a 4K TCE size for now always so
-		 * one table is always 8 * (258M / 4K) == 512K
-		 */
-		tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000));
-		if (tbl->tces[i] == NULL)
-			goto fail;
-		memset(page_address(tbl->tces[i]), 0, 0x80000);
-
-		pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i]));
-
-		/* Table size. We currently set it to be the whole 256M region */
-		tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT;
-		/* IO page size set to 4K */
-		tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT;
-		/* Shift in the address */
-		tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT;
-
-		/* Validation stuff. We only validate fully bus/dev/fn for now
-		 * one day maybe we can group devices but that isn't the case
-		 * at the moment
-		 */
-		if (validate) {
-			tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK;
-			tvt_data0 |= validate->bus->number;
-			tvt_data1 |= IODA_TVT1_DEVNUM_VALID;
-			tvt_data1 |= ((u64)PCI_SLOT(validate->devfn))
-				<< IODA_TVT1_DEVNUM_VALUE_SHIFT;
-			tvt_data1 |= IODA_TVT1_FUNCNUM_VALID;
-			tvt_data1 |= ((u64)PCI_FUNC(validate->devfn))
-				<< IODA_TVT1_FUNCNUM_VALUE_SHIFT;
-		}
-
-		/* XX PE number is always 0 for now */
-
-		/* Program the values using the PHB lock */
-		spin_lock_irqsave(&phb->lock, flags);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0);
-		spin_unlock_irqrestore(&phb->lock, flags);
-	}
-
-	/* Init bits and pieces */
-	tbl->table.it_blocksize = 16;
-	tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
-	tbl->table.it_offset = addr >> tbl->table.it_page_shift;
-	tbl->table.it_size = size >> tbl->table.it_page_shift;
-
-	/*
-	 * It's already blank but we clear it anyway.
-	 * Consider an aditiona interface that makes cleaing optional
-	 */
-	iommu_init_table(&tbl->table, nid);
-
-	list_add(&tbl->link, &phb->dma_tables);
-	return tbl;
-
- fail:
-	pr_debug("  Failed to allocate a 256M TCE table !\n");
-	for (i = 0; i < tvts_per_table; i++)
-		if (tbl->tces[i])
-			__free_pages(tbl->tces[i], get_order(0x80000));
-	kfree(tbl);
-	return ERR_PTR(-ENOMEM);
-}
-
-static void wsp_pci_dma_dev_setup(struct pci_dev *pdev)
-{
-	struct dev_archdata *archdata = &pdev->dev.archdata;
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct wsp_phb *phb = hose->private_data;
-	struct wsp_dma_table *table = NULL;
-	unsigned long flags;
-	int i;
-
-	/* Don't assign an iommu table to a bridge */
-	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		return;
-
-	pr_debug("%s: Setting up DMA...\n", pci_name(pdev));
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	/* If only one region, check if it already exist */
-	if (phb->dma32_num_regions == 1) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		if (list_empty(&phb->dma_tables))
-			table = wsp_pci_create_dma32_table(phb, 0, NULL);
-		else
-			table = list_first_entry(&phb->dma_tables,
-						 struct wsp_dma_table,
-						 link);
-	} else {
-		/* else find a free region */
-		for (i = 0; i < phb->dma32_num_regions && !table; i++) {
-			if (__test_and_set_bit(i, &phb->dma32_map))
-				continue;
-			spin_unlock_irqrestore(&phb->lock, flags);
-			table = wsp_pci_create_dma32_table(phb, i, pdev);
-		}
-	}
-
-	/* Check if we got an error */
-	if (IS_ERR(table)) {
-		pr_err("%s: Failed to create DMA table, err %ld !\n",
-		       pci_name(pdev), PTR_ERR(table));
-		return;
-	}
-
-	/* Or a valid table */
-	if (table) {
-		pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n",
-			pci_name(pdev),
-			table->table.it_offset << IOMMU_PAGE_SHIFT_4K,
-			(table->table.it_offset << IOMMU_PAGE_SHIFT_4K)
-			+ phb->dma32_region_size - 1);
-		archdata->dma_data.iommu_table_base = &table->table;
-		return;
-	}
-
-	/* Or no room */
-	spin_unlock_irqrestore(&phb->lock, flags);
-	pr_err("%s: Out of DMA space !\n", pci_name(pdev));
-}
-
-static void __init wsp_pcie_configure_hw(struct pci_controller *hose)
-{
-	u64 val;
-	int i;
-
-#define DUMP_REG(x) \
-	pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x))
-
-	/*
-	 * Some WSP variants  has a bogus class code by default in the PCI-E
-	 * root complex's built-in P2P bridge
-	 */
-	val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1);
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1,
-		 (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8));
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1));
-
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	/* XXX Disable TCE caching, it doesn't work on DD1 */
-	out_be64(hose->cfg_data + 0xe50,
-		 in_be64(hose->cfg_data + 0xe50) | (3ull << 62));
-	printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50));
-#endif
-
-	/* Configure M32A and IO. IO is hard wired to be 1M for now */
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys);
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK,
-		 (~(hose->io_resource.end - hose->io_resource.start)) &
-		 0x3fffffff000ul);
-	out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1);
-
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR,
-		 hose->mem_resources[0].start);
-	printk("Want to write to M32A_BASE_MASK : 0x%llx\n",
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK,
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR,
-		 (hose->mem_resources[0].start - hose->mem_offset[0]) | 1);
-
-	/* Clear all TVT entries
-	 *
-	 * XX Might get TVT count from device-tree
-	 */
-	for (i = 0; i < IODA_TVT_COUNT; i++) {
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 PCIE_REG_IODA_AD_TBL_TVT | i);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0);
-	}
-
-	/* Kill the TCE cache */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG,
-		 in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) |
-		 PCIE_REG_PHBC_64B_TCE_EN);
-
-	/* Enable 32 & 64-bit MSIs, IO space and M32A */
-	val = PCIE_REG_PHBC_32BIT_MSI_EN |
-	      PCIE_REG_PHBC_IO_EN |
-	      PCIE_REG_PHBC_64BIT_MSI_EN |
-	      PCIE_REG_PHBC_M32A_EN;
-	if (iommu_is_off)
-		val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS;
-	pr_debug("Will write config: 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val);
-
-	/* Enable error reporting */
-	out_be64(hose->cfg_data + 0xe00,
-		 in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull);
-
-	/* Mask an error that's generated when doing config space probe
-	 *
-	 * XXX Maybe we should only mask it around config space cycles... that or
-	 * ignore it when we know we had a config space cycle recently ?
-	 */
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull);
-
-	/* Enable UTL errors, for now, all of them got to UTL irq 1
-	 *
-	 * We similarily mask one UTL error caused apparently during normal
-	 * probing. We also mask the link up error
-	 */
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull);
-
-	DUMP_REG(PCIE_REG_IO_BASE_ADDR);
-	DUMP_REG(PCIE_REG_IO_BASE_MASK);
-	DUMP_REG(PCIE_REG_IO_START_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32A_START_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32B_START_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_MASK);
-	DUMP_REG(PCIE_REG_M64_START_ADDR);
-	DUMP_REG(PCIE_REG_PHB_CONFIG);
-}
-
-static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < 10000; i++) {
-		val = in_be64(phb->hose->cfg_data + 0xe08);
-		if ((val & 0x1900000000000000ull) == 0x0100000000000000ull)
-			return;
-		udelay(1);
-	}
-	pr_warning("PCI IO timeout on domain %d port 0x%lx\n",
-		   phb->hose->global_number, port);
-}
-
-#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa)		\
-static ret wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	ret rval;						\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	rval = __do_##name al;					\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-	return rval;						\
-}
-
-#define DEF_PCI_AC_NORET_pio(name, at, al, aa)			\
-static void wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	__do_##name al;						\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-}
-
-#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa)
-#define DEF_PCI_AC_NORET_mem(name, at, al, aa)
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
-	DEF_PCI_AC_RET_##space(name, ret, at, al, aa)
-
-#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
-	DEF_PCI_AC_NORET_##space(name, at, al, aa)		\
-
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-static struct ppc_pci_io wsp_pci_iops = {
-	.inb = wsp_pci_inb,
-	.inw = wsp_pci_inw,
-	.inl = wsp_pci_inl,
-	.outb = wsp_pci_outb,
-	.outw = wsp_pci_outw,
-	.outl = wsp_pci_outl,
-	.insb = wsp_pci_insb,
-	.insw = wsp_pci_insw,
-	.insl = wsp_pci_insl,
-	.outsb = wsp_pci_outsb,
-	.outsw = wsp_pci_outsw,
-	.outsl = wsp_pci_outsl,
-};
-
-static int __init wsp_setup_one_phb(struct device_node *np)
-{
-	struct pci_controller *hose;
-	struct wsp_phb *phb;
-
-	pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name);
-
-	phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL);
-	if (!phb)
-		return -ENOMEM;
-	hose = pcibios_alloc_controller(np);
-	if (!hose) {
-		/* Can't really free the phb */
-		return -ENOMEM;
-	}
-	hose->private_data = phb;
-	phb->hose = hose;
-
-	INIT_LIST_HEAD(&phb->dma_tables);
-	spin_lock_init(&phb->lock);
-
-	/* XXX Use bus-range property ? */
-	hose->first_busno = 0;
-	hose->last_busno = 0xff;
-
-	/* We use cfg_data as the address for the whole bridge MMIO space
-	 */
-	hose->cfg_data = of_iomap(hose->dn, 0);
-
-	pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data);
-
-	/* Get the ranges of the device-tree */
-	pci_process_bridge_OF_ranges(hose, np, 0);
-
-	/* XXX Force re-assigning of everything for now */
-	pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC |
-		      PCI_ENABLE_PROC_DOMAINS);
-
-	/* Calculate how the TCE space is divided */
-	phb->dma32_base		= 0;
-	phb->dma32_num_regions	= NUM_DMA32_REGIONS;
-	if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) {
-		pr_warning("IOMMU: Clamped to %d DMA32 regions\n",
-			   MAX_TABLE_TVT_COUNT);
-		phb->dma32_num_regions = MAX_TABLE_TVT_COUNT;
-	}
-	phb->dma32_region_size	= 0x80000000 / phb->dma32_num_regions;
-
-	BUG_ON(!is_power_of_2(phb->dma32_region_size));
-
-	/* Setup config ops */
-	hose->ops = &wsp_pcie_pci_ops;
-
-	/* Configure the HW */
-	wsp_pcie_configure_hw(hose);
-
-	/* Instanciate IO workarounds */
-	iowa_register_bus(hose, &wsp_pci_iops, NULL, phb);
-#ifdef CONFIG_PCI_MSI
-	wsp_setup_phb_msi(hose);
-#endif
-
-	/* Add to global list */
-	list_add(&phb->all, &wsp_phbs);
-
-	return 0;
-}
-
-void __init wsp_setup_pci(void)
-{
-	struct device_node *np;
-	int rc;
-
-	/* Find host bridges */
-	for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) {
-		rc = wsp_setup_one_phb(np);
-		if (rc)
-			pr_err("Failed to setup PCIe bridge %s, rc=%d\n",
-			       np->full_name, rc);
-	}
-
-	/* Establish device-tree linkage */
-	pci_devs_phb_init();
-
-	/* Set DMA ops to use TCEs */
-	if (iommu_is_off) {
-		pr_info("PCI-E: Disabled TCEs, using direct DMA\n");
-		set_pci_dma_ops(&dma_direct_ops);
-	} else {
-		ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup;
-		ppc_md.tce_build = tce_build_wsp;
-		ppc_md.tce_free = tce_free_wsp;
-		set_pci_dma_ops(&dma_iommu_ops);
-	}
-}
-
-#define err_debug(fmt...)	pr_debug(fmt)
-//#define err_debug(fmt...)
-
-static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np)
-{
-	const u32 *prop;
-	int hw_irq;
-
-	/* Ok, no interrupts property, let's try to find our child P2P */
-	np = of_get_next_child(np, NULL);
-	if (np == NULL)
-		return 0;
-
-	/* Grab it's interrupt map */
-	prop = of_get_property(np, "interrupt-map", NULL);
-	if (prop == NULL)
-		return 0;
-
-	/* Grab one of the interrupts in there, keep the low 4 bits */
-	hw_irq = prop[5] & 0xf;
-
-	/* 0..4 for PHB 0 and 5..9 for PHB 1 */
-	if (hw_irq < 5)
-		hw_irq = 4;
-	else
-		hw_irq = 9;
-	hw_irq |= prop[5] & ~0xf;
-
-	err_debug("PCI: Using 0x%x as error IRQ for %s\n",
-		  hw_irq, np->parent->full_name);
-	return irq_create_mapping(NULL, hw_irq);
-}
-
-static const struct {
-	u32 offset;
-	const char *name;
-} wsp_pci_regs[] = {
-#define DREG(x) { PCIE_REG_##x, #x }
-#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x }
-	/* Architected registers except CONFIG_ and IODA
-         * to avoid side effects
-	 */
-	DREG(DMA_CHAN_STATUS),
-	DREG(CPU_LOADSTORE_STATUS),
-	DREG(LOCK0),
-	DREG(LOCK1),
-	DREG(PHB_CONFIG),
-	DREG(IO_BASE_ADDR),
-	DREG(IO_BASE_MASK),
-	DREG(IO_START_ADDR),
-	DREG(M32A_BASE_ADDR),
-	DREG(M32A_BASE_MASK),
-	DREG(M32A_START_ADDR),
-	DREG(M32B_BASE_ADDR),
-	DREG(M32B_BASE_MASK),
-	DREG(M32B_START_ADDR),
-	DREG(M64_BASE_ADDR),
-	DREG(M64_BASE_MASK),
-	DREG(M64_START_ADDR),
-	DREG(TCE_KILL),
-	DREG(LOCK2),
-	DREG(PHB_GEN_CAP),
-	DREG(PHB_TCE_CAP),
-	DREG(PHB_IRQ_CAP),
-	DREG(PHB_EEH_CAP),
-	DREG(PAPR_ERR_INJ_CONTROL),
-	DREG(PAPR_ERR_INJ_ADDR),
-	DREG(PAPR_ERR_INJ_MASK),
-
-	/* UTL core regs */
-	DUTL(SYS_BUS_CONTROL),
-	DUTL(STATUS),
-	DUTL(SYS_BUS_AGENT_STATUS),
-	DUTL(SYS_BUS_AGENT_ERR_SEV),
-	DUTL(SYS_BUS_AGENT_IRQ_EN),
-	DUTL(SYS_BUS_BURST_SZ_CONF),
-	DUTL(REVISION_ID),
-	DUTL(OUT_POST_HDR_BUF_ALLOC),
-	DUTL(OUT_POST_DAT_BUF_ALLOC),
-	DUTL(IN_POST_HDR_BUF_ALLOC),
-	DUTL(IN_POST_DAT_BUF_ALLOC),
-	DUTL(OUT_NP_BUF_ALLOC),
-	DUTL(IN_NP_BUF_ALLOC),
-	DUTL(PCIE_TAGS_ALLOC),
-	DUTL(GBIF_READ_TAGS_ALLOC),
-
-	DUTL(PCIE_PORT_CONTROL),
-	DUTL(PCIE_PORT_STATUS),
-	DUTL(PCIE_PORT_ERROR_SEV),
-	DUTL(PCIE_PORT_IRQ_EN),
-	DUTL(RC_STATUS),
-	DUTL(RC_ERR_SEVERITY),
-	DUTL(RC_IRQ_EN),
-	DUTL(EP_STATUS),
-	DUTL(EP_ERR_SEVERITY),
-	DUTL(EP_ERR_IRQ_EN),
-	DUTL(PCI_PM_CTRL1),
-	DUTL(PCI_PM_CTRL2),
-
-	/* PCIe stack regs */
-	DREG(SYSTEM_CONFIG1),
-	DREG(SYSTEM_CONFIG2),
-	DREG(EP_SYSTEM_CONFIG),
-	DREG(EP_FLR),
-	DREG(EP_BAR_CONFIG),
-	DREG(LINK_CONFIG),
-	DREG(PM_CONFIG),
-	DREG(DLP_CONTROL),
-	DREG(DLP_STATUS),
-	DREG(ERR_REPORT_CONTROL),
-	DREG(SLOT_CONTROL1),
-	DREG(SLOT_CONTROL2),
-	DREG(UTL_CONFIG),
-	DREG(BUFFERS_CONFIG),
-	DREG(ERROR_INJECT),
-	DREG(SRIOV_CONFIG),
-	DREG(PF0_SRIOV_STATUS),
-	DREG(PF1_SRIOV_STATUS),
-	DREG(PORT_NUMBER),
-	DREG(POR_SYSTEM_CONFIG),
-
-	/* Internal logic regs */
-	DREG(PHB_VERSION),
-	DREG(RESET),
-	DREG(PHB_CONTROL),
-	DREG(PHB_TIMEOUT_CONTROL1),
-	DREG(PHB_QUIESCE_DMA),
-	DREG(PHB_DMA_READ_TAG_ACTV),
-	DREG(PHB_TCE_READ_TAG_ACTV),
-
-	/* FIR registers */
-	DREG(LEM_FIR_ACCUM),
-	DREG(LEM_FIR_AND_MASK),
-	DREG(LEM_FIR_OR_MASK),
-	DREG(LEM_ACTION0),
-	DREG(LEM_ACTION1),
-	DREG(LEM_ERROR_MASK),
-	DREG(LEM_ERROR_AND_MASK),
-	DREG(LEM_ERROR_OR_MASK),
-
-	/* Error traps registers */
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR1_STATUS),
-	DREG(PHB_ERR_INJECT),
-	DREG(PHB_ERR_LEM_ENABLE),
-	DREG(PHB_ERR_IRQ_ENABLE),
-	DREG(PHB_ERR_FREEZE_ENABLE),
-	DREG(PHB_ERR_SIDE_ENABLE),
-	DREG(PHB_ERR_LOG_0),
-	DREG(PHB_ERR_LOG_1),
-	DREG(PHB_ERR_STATUS_MASK),
-	DREG(PHB_ERR1_STATUS_MASK),
-	DREG(MMIO_ERR_STATUS),
-	DREG(MMIO_ERR1_STATUS),
-	DREG(MMIO_ERR_INJECT),
-	DREG(MMIO_ERR_LEM_ENABLE),
-	DREG(MMIO_ERR_IRQ_ENABLE),
-	DREG(MMIO_ERR_FREEZE_ENABLE),
-	DREG(MMIO_ERR_SIDE_ENABLE),
-	DREG(MMIO_ERR_LOG_0),
-	DREG(MMIO_ERR_LOG_1),
-	DREG(MMIO_ERR_STATUS_MASK),
-	DREG(MMIO_ERR1_STATUS_MASK),
-	DREG(DMA_ERR_STATUS),
-	DREG(DMA_ERR1_STATUS),
-	DREG(DMA_ERR_INJECT),
-	DREG(DMA_ERR_LEM_ENABLE),
-	DREG(DMA_ERR_IRQ_ENABLE),
-	DREG(DMA_ERR_FREEZE_ENABLE),
-	DREG(DMA_ERR_SIDE_ENABLE),
-	DREG(DMA_ERR_LOG_0),
-	DREG(DMA_ERR_LOG_1),
-	DREG(DMA_ERR_STATUS_MASK),
-	DREG(DMA_ERR1_STATUS_MASK),
-
-	/* Debug and Trace registers */
-	DREG(PHB_DEBUG_CONTROL0),
-	DREG(PHB_DEBUG_STATUS0),
-	DREG(PHB_DEBUG_CONTROL1),
-	DREG(PHB_DEBUG_STATUS1),
-	DREG(PHB_DEBUG_CONTROL2),
-	DREG(PHB_DEBUG_STATUS2),
-	DREG(PHB_DEBUG_CONTROL3),
-	DREG(PHB_DEBUG_STATUS3),
-	DREG(PHB_DEBUG_CONTROL4),
-	DREG(PHB_DEBUG_STATUS4),
-	DREG(PHB_DEBUG_CONTROL5),
-	DREG(PHB_DEBUG_STATUS5),
-
-	/* Don't seem to exist ...
-	DREG(PHB_DEBUG_CONTROL6),
-	DREG(PHB_DEBUG_STATUS6),
-	*/
-};
-
-static int wsp_pci_regs_show(struct seq_file *m, void *private)
-{
-	struct wsp_phb *phb = m->private;
-	struct pci_controller *hose = phb->hose;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-		/* Skip write-only regs */
-		if (wsp_pci_regs[i].offset == 0xc08 ||
-		    wsp_pci_regs[i].offset == 0xc10 ||
-		    wsp_pci_regs[i].offset == 0xc38 ||
-		    wsp_pci_regs[i].offset == 0xc40)
-			continue;
-		seq_printf(m, "0x%03x: 0x%016llx %s\n",
-			   wsp_pci_regs[i].offset,
-			   in_be64(hose->cfg_data + wsp_pci_regs[i].offset),
-			   wsp_pci_regs[i].name);
-	}
-	return 0;
-}
-
-static int wsp_pci_regs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wsp_pci_regs_show, inode->i_private);
-}
-
-static const struct file_operations wsp_pci_regs_fops = {
-	.open = wsp_pci_regs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int wsp_pci_reg_set(void *data, u64 val)
-{
-	out_be64((void __iomem *)data, val);
-	return 0;
-}
-
-static int wsp_pci_reg_get(void *data, u64 *val)
-{
-	*val = in_be64((void __iomem *)data);
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n");
-
-static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id)
-{
-	struct wsp_phb *phb = dev_id;
-	struct pci_controller *hose = phb->hose;
-	irqreturn_t handled = IRQ_NONE;
-	struct wsp_pcie_err_log_data ed;
-
-	pr_err("PCI: Error interrupt on %s (PHB %d)\n",
-	       hose->dn->full_name, hose->global_number);
- again:
-	memset(&ed, 0, sizeof(ed));
-
-	/* Read and clear UTL errors */
-	ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS);
-	if (ed.utl_sys_err)
-		out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err);
-	ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS);
-	if (ed.utl_port_err)
-		out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err);
-	ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS);
-	if (ed.utl_rc_err)
-		out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err);
-
-	/* Read and clear main trap errors */
-	ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS);
-	if (ed.phb_err) {
-		ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS);
-		ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0);
-		ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0);
-	}
-	ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS);
-	if (ed.mmio_err) {
-		ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS);
-		ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0);
-		ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0);
-	}
-	ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS);
-	if (ed.dma_err) {
-		ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS);
-		ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0);
-		ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0);
-	}
-
-	/* Now print things out */
-	if (ed.phb_err) {
-		pr_err("   PHB Error Status      : 0x%016llx\n", ed.phb_err);
-		pr_err("   PHB First Error Status: 0x%016llx\n", ed.phb_err1);
-		pr_err("   PHB Error Log 0       : 0x%016llx\n", ed.phb_log0);
-		pr_err("   PHB Error Log 1       : 0x%016llx\n", ed.phb_log1);
-	}
-	if (ed.mmio_err) {
-		pr_err("  MMIO Error Status      : 0x%016llx\n", ed.mmio_err);
-		pr_err("  MMIO First Error Status: 0x%016llx\n", ed.mmio_err1);
-		pr_err("  MMIO Error Log 0       : 0x%016llx\n", ed.mmio_log0);
-		pr_err("  MMIO Error Log 1       : 0x%016llx\n", ed.mmio_log1);
-	}
-	if (ed.dma_err) {
-		pr_err("   DMA Error Status      : 0x%016llx\n", ed.dma_err);
-		pr_err("   DMA First Error Status: 0x%016llx\n", ed.dma_err1);
-		pr_err("   DMA Error Log 0       : 0x%016llx\n", ed.dma_log0);
-		pr_err("   DMA Error Log 1       : 0x%016llx\n", ed.dma_log1);
-	}
-	if (ed.utl_sys_err)
-		pr_err("   UTL Sys Error Status  : 0x%016llx\n", ed.utl_sys_err);
-	if (ed.utl_port_err)
-		pr_err("   UTL Port Error Status : 0x%016llx\n", ed.utl_port_err);
-	if (ed.utl_rc_err)
-		pr_err("   UTL RC Error Status   : 0x%016llx\n", ed.utl_rc_err);
-
-	/* Interrupts are caused by the error traps. If we had any error there
-	 * we loop again in case the UTL buffered some new stuff between
-	 * going there and going to the traps
-	 */
-	if (ed.dma_err || ed.mmio_err || ed.phb_err) {
-		handled = IRQ_HANDLED;
-		goto again;
-	}
-	return handled;
-}
-
-static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb)
-{
-	struct pci_controller *hose = phb->hose;
-	int err_irq, i, rc;
-	char fname[16];
-
-	/* Create a debugfs file for that PHB */
-	sprintf(fname, "phb%d", phb->hose->global_number);
-	phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root);
-
-	/* Some useful debug output */
-	if (phb->ddir) {
-		struct dentry *d = debugfs_create_dir("regs", phb->ddir);
-		char tmp[64];
-
-		for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-			sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset,
-				wsp_pci_regs[i].name);
-			debugfs_create_file(tmp, 0600, d,
-					    hose->cfg_data + wsp_pci_regs[i].offset,
-					    &wsp_pci_reg_fops);
-		}
-		debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops);
-	}
-
-	/* Find the IRQ number for that PHB */
-	err_irq = irq_of_parse_and_map(hose->dn, 0);
-	if (err_irq == 0)
-		/* XXX Error IRQ lacking from device-tree */
-		err_irq = wsp_pci_get_err_irq_no_dt(hose->dn);
-	if (err_irq == 0) {
-		pr_err("PCI: Failed to fetch error interrupt for %s\n",
-		       hose->dn->full_name);
-		return;
-	}
-	/* Request it */
-	rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb);
-	if (rc) {
-		pr_err("PCI: Failed to request interrupt for %s\n",
-		       hose->dn->full_name);
-	}
-	/* Enable interrupts for all errors for now */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-}
-
-/*
- * This is called later to hookup with the error interrupt
- */
-static int __init wsp_setup_pci_late(void)
-{
-	struct wsp_phb *phb;
-
-	list_for_each_entry(phb, &wsp_phbs, all)
-		wsp_setup_pci_err_reporting(phb);
-
-	return 0;
-}
-arch_initcall(wsp_setup_pci_late);

diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h
deleted file mode 100644
index 52e9bd9..0000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.h
+++ /dev/null

@@ -1,268 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_PCI_H
-#define __WSP_PCI_H
-
-/* Architected registers */
-#define PCIE_REG_DMA_CHAN_STATUS	0x110
-#define PCIE_REG_CPU_LOADSTORE_STATUS	0x120
-
-#define PCIE_REG_CONFIG_DATA		0x130
-#define PCIE_REG_LOCK0			0x138
-#define PCIE_REG_CONFIG_ADDRESS		0x140
-#define   PCIE_REG_CA_ENABLE			0x8000000000000000ull
-#define	  PCIE_REG_CA_BUS_MASK			0x0ff0000000000000ull
-#define   PCIE_REG_CA_BUS_SHIFT			(20+32)
-#define   PCIE_REG_CA_DEV_MASK			0x000f800000000000ull
-#define   PCIE_REG_CA_DEV_SHIFT			(15+32)
-#define   PCIE_REG_CA_FUNC_MASK			0x0000700000000000ull
-#define   PCIE_REG_CA_FUNC_SHIFT		(12+32)
-#define   PCIE_REG_CA_REG_MASK			0x00000fff00000000ull
-#define   PCIE_REG_CA_REG_SHIFT			( 0+32)
-#define   PCIE_REG_CA_BE_MASK			0x00000000f0000000ull
-#define   PCIE_REG_CA_BE_SHIFT			(   28)
-#define PCIE_REG_LOCK1			0x148
-
-#define PCIE_REG_PHB_CONFIG		0x160
-#define   PCIE_REG_PHBC_64B_TCE_EN		0x2000000000000000ull
-#define   PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN	0x1000000000000000ull
-#define   PCIE_REG_PHBC_32BIT_MSI_EN		0x0080000000000000ull
-#define   PCIE_REG_PHBC_M64_EN			0x0040000000000000ull
-#define   PCIE_REG_PHBC_IO_EN			0x0008000000000000ull
-#define   PCIE_REG_PHBC_64BIT_MSI_EN		0x0002000000000000ull
-#define   PCIE_REG_PHBC_M32A_EN			0x0000800000000000ull
-#define   PCIE_REG_PHBC_M32B_EN			0x0000400000000000ull
-#define   PCIE_REG_PHBC_MSI_PE_VALIDATE		0x0000200000000000ull
-#define   PCIE_REG_PHBC_DMA_XLATE_BYPASS	0x0000100000000000ull
-
-#define PCIE_REG_IO_BASE_ADDR		0x170
-#define PCIE_REG_IO_BASE_MASK		0x178
-#define PCIE_REG_IO_START_ADDR		0x180
-
-#define PCIE_REG_M32A_BASE_ADDR		0x190
-#define PCIE_REG_M32A_BASE_MASK		0x198
-#define PCIE_REG_M32A_START_ADDR	0x1a0
-
-#define PCIE_REG_M32B_BASE_ADDR		0x1b0
-#define PCIE_REG_M32B_BASE_MASK		0x1b8
-#define PCIE_REG_M32B_START_ADDR	0x1c0
-
-#define PCIE_REG_M64_BASE_ADDR		0x1e0
-#define PCIE_REG_M64_BASE_MASK		0x1e8
-#define PCIE_REG_M64_START_ADDR		0x1f0
-
-#define PCIE_REG_TCE_KILL		0x210
-#define   PCIE_REG_TCEKILL_SINGLE	0x8000000000000000ull
-#define   PCIE_REG_TCEKILL_ADDR_MASK	0x000003fffffffff8ull
-#define   PCIE_REG_TCEKILL_PS_4K	0
-#define   PCIE_REG_TCEKILL_PS_64K	1
-#define   PCIE_REG_TCEKILL_PS_16M	2
-#define   PCIE_REG_TCEKILL_PS_16G	3
-
-#define PCIE_REG_IODA_ADDR		0x220
-#define   PCIE_REG_IODA_AD_AUTOINC	0x8000000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_MVT	0x0005000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PELT	0x0006000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTA	0x0007000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTB	0x0008000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TVT	0x0009000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TCE	0x000a000000000000ull
-#define PCIE_REG_IODA_DATA0		0x228
-#define PCIE_REG_IODA_DATA1		0x230
-
-#define PCIE_REG_LOCK2			0x240
-
-#define PCIE_REG_PHB_GEN_CAP		0x250
-#define PCIE_REG_PHB_TCE_CAP		0x258
-#define PCIE_REG_PHB_IRQ_CAP		0x260
-#define PCIE_REG_PHB_EEH_CAP		0x268
-
-#define PCIE_REG_PAPR_ERR_INJ_CONTROL	0x2b0
-#define PCIE_REG_PAPR_ERR_INJ_ADDR	0x2b8
-#define PCIE_REG_PAPR_ERR_INJ_MASK	0x2c0
-
-
-#define PCIE_REG_SYS_CFG1		0x600
-#define   PCIE_REG_SYS_CFG1_CLASS_CODE	0x0000000000ffffffull
-
-#define IODA_TVT0_TTA_MASK		0x000fffffffff0000ull
-#define IODA_TVT0_TTA_SHIFT		4
-#define IODA_TVT0_BUSNUM_VALID_MASK	0x000000000000e000ull
-#define IODA_TVT0_TCE_TABLE_SIZE_MASK	0x0000000000001f00ull
-#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT	8
-#define IODA_TVT0_BUSNUM_VALUE_MASK	0x00000000000000ffull
-#define IODA_TVT0_BUSNUM_VALID_SHIFT	0
-#define IODA_TVT1_DEVNUM_VALID		0x2000000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_MASK	0x1f00000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_SHIFT	56
-#define IODA_TVT1_FUNCNUM_VALID		0x0008000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_MASK	0x0007000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_SHIFT	48
-#define IODA_TVT1_IO_PAGE_SIZE_MASK	0x00001f0000000000ull
-#define IODA_TVT1_IO_PAGE_SIZE_SHIFT	40
-#define IODA_TVT1_PE_NUMBER_MASK	0x000000000000003full
-#define IODA_TVT1_PE_NUMBER_SHIFT	0
-
-#define IODA_TVT_COUNT			64
-
-/* UTL Core registers */
-#define PCIE_UTL_SYS_BUS_CONTROL	0x400
-#define PCIE_UTL_STATUS			0x408
-#define PCIE_UTL_SYS_BUS_AGENT_STATUS	0x410
-#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV	0x418
-#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN	0x420
-#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF	0x440
-#define PCIE_UTL_REVISION_ID		0x448
-
-#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC	0x4c0
-#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC	0x4d0
-#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC	0x4e0
-#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC	0x4f0
-#define PCIE_UTL_OUT_NP_BUF_ALLOC	0x500
-#define PCIE_UTL_IN_NP_BUF_ALLOC	0x510
-#define PCIE_UTL_PCIE_TAGS_ALLOC	0x520
-#define PCIE_UTL_GBIF_READ_TAGS_ALLOC	0x530
-
-#define PCIE_UTL_PCIE_PORT_CONTROL	0x540
-#define PCIE_UTL_PCIE_PORT_STATUS	0x548
-#define PCIE_UTL_PCIE_PORT_ERROR_SEV	0x550
-#define PCIE_UTL_PCIE_PORT_IRQ_EN	0x558
-#define PCIE_UTL_RC_STATUS		0x560
-#define PCIE_UTL_RC_ERR_SEVERITY	0x568
-#define PCIE_UTL_RC_IRQ_EN		0x570
-#define PCIE_UTL_EP_STATUS		0x578
-#define PCIE_UTL_EP_ERR_SEVERITY	0x580
-#define PCIE_UTL_EP_ERR_IRQ_EN		0x588
-
-#define PCIE_UTL_PCI_PM_CTRL1		0x590
-#define PCIE_UTL_PCI_PM_CTRL2		0x598
-
-/* PCIe stack registers */
-#define PCIE_REG_SYSTEM_CONFIG1		0x600
-#define PCIE_REG_SYSTEM_CONFIG2		0x608
-#define PCIE_REG_EP_SYSTEM_CONFIG	0x618
-#define PCIE_REG_EP_FLR			0x620
-#define PCIE_REG_EP_BAR_CONFIG		0x628
-#define PCIE_REG_LINK_CONFIG		0x630
-#define PCIE_REG_PM_CONFIG		0x640
-#define PCIE_REG_DLP_CONTROL		0x650
-#define PCIE_REG_DLP_STATUS		0x658
-#define PCIE_REG_ERR_REPORT_CONTROL	0x660
-#define PCIE_REG_SLOT_CONTROL1		0x670
-#define PCIE_REG_SLOT_CONTROL2		0x678
-#define PCIE_REG_UTL_CONFIG		0x680
-#define PCIE_REG_BUFFERS_CONFIG		0x690
-#define PCIE_REG_ERROR_INJECT		0x698
-#define PCIE_REG_SRIOV_CONFIG		0x6a0
-#define PCIE_REG_PF0_SRIOV_STATUS	0x6a8
-#define PCIE_REG_PF1_SRIOV_STATUS	0x6b0
-#define PCIE_REG_PORT_NUMBER		0x700
-#define PCIE_REG_POR_SYSTEM_CONFIG	0x708
-
-/* PHB internal logic registers */
-#define PCIE_REG_PHB_VERSION		0x800
-#define PCIE_REG_RESET			0x808
-#define PCIE_REG_PHB_CONTROL		0x810
-#define PCIE_REG_PHB_TIMEOUT_CONTROL1	0x878
-#define PCIE_REG_PHB_QUIESCE_DMA	0x888
-#define PCIE_REG_PHB_DMA_READ_TAG_ACTV	0x900
-#define PCIE_REG_PHB_TCE_READ_TAG_ACTV	0x908
-
-/* FIR registers */
-#define PCIE_REG_LEM_FIR_ACCUM		0xc00
-#define PCIE_REG_LEM_FIR_AND_MASK	0xc08
-#define PCIE_REG_LEM_FIR_OR_MASK	0xc10
-#define PCIE_REG_LEM_ACTION0		0xc18
-#define PCIE_REG_LEM_ACTION1		0xc20
-#define PCIE_REG_LEM_ERROR_MASK		0xc30
-#define PCIE_REG_LEM_ERROR_AND_MASK	0xc38
-#define PCIE_REG_LEM_ERROR_OR_MASK	0xc40
-
-/* PHB Error registers */
-#define PCIE_REG_PHB_ERR_STATUS		0xc80
-#define PCIE_REG_PHB_ERR1_STATUS	0xc88
-#define PCIE_REG_PHB_ERR_INJECT		0xc90
-#define PCIE_REG_PHB_ERR_LEM_ENABLE	0xc98
-#define PCIE_REG_PHB_ERR_IRQ_ENABLE	0xca0
-#define PCIE_REG_PHB_ERR_FREEZE_ENABLE	0xca8
-#define PCIE_REG_PHB_ERR_SIDE_ENABLE	0xcb8
-#define PCIE_REG_PHB_ERR_LOG_0		0xcc0
-#define PCIE_REG_PHB_ERR_LOG_1		0xcc8
-#define PCIE_REG_PHB_ERR_STATUS_MASK	0xcd0
-#define PCIE_REG_PHB_ERR1_STATUS_MASK	0xcd8
-
-#define PCIE_REG_MMIO_ERR_STATUS	0xd00
-#define PCIE_REG_MMIO_ERR1_STATUS	0xd08
-#define PCIE_REG_MMIO_ERR_INJECT	0xd10
-#define PCIE_REG_MMIO_ERR_LEM_ENABLE	0xd18
-#define PCIE_REG_MMIO_ERR_IRQ_ENABLE	0xd20
-#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE	0xd28
-#define PCIE_REG_MMIO_ERR_SIDE_ENABLE	0xd38
-#define PCIE_REG_MMIO_ERR_LOG_0		0xd40
-#define PCIE_REG_MMIO_ERR_LOG_1		0xd48
-#define PCIE_REG_MMIO_ERR_STATUS_MASK	0xd50
-#define PCIE_REG_MMIO_ERR1_STATUS_MASK	0xd58
-
-#define PCIE_REG_DMA_ERR_STATUS		0xd80
-#define PCIE_REG_DMA_ERR1_STATUS	0xd88
-#define PCIE_REG_DMA_ERR_INJECT		0xd90
-#define PCIE_REG_DMA_ERR_LEM_ENABLE	0xd98
-#define PCIE_REG_DMA_ERR_IRQ_ENABLE	0xda0
-#define PCIE_REG_DMA_ERR_FREEZE_ENABLE	0xda8
-#define PCIE_REG_DMA_ERR_SIDE_ENABLE	0xdb8
-#define PCIE_REG_DMA_ERR_LOG_0		0xdc0
-#define PCIE_REG_DMA_ERR_LOG_1		0xdc8
-#define PCIE_REG_DMA_ERR_STATUS_MASK	0xdd0
-#define PCIE_REG_DMA_ERR1_STATUS_MASK	0xdd8
-
-/* Shortcuts for access to the above using the PHB definitions
- * with an offset
- */
-#define PCIE_REG_ERR_PHB_OFFSET		0x0
-#define PCIE_REG_ERR_MMIO_OFFSET	0x80
-#define PCIE_REG_ERR_DMA_OFFSET		0x100
-
-/* Debug and Trace registers */
-#define PCIE_REG_PHB_DEBUG_CONTROL0	0xe00
-#define PCIE_REG_PHB_DEBUG_STATUS0	0xe08
-#define PCIE_REG_PHB_DEBUG_CONTROL1	0xe10
-#define PCIE_REG_PHB_DEBUG_STATUS1	0xe18
-#define PCIE_REG_PHB_DEBUG_CONTROL2	0xe20
-#define PCIE_REG_PHB_DEBUG_STATUS2	0xe28
-#define PCIE_REG_PHB_DEBUG_CONTROL3	0xe30
-#define PCIE_REG_PHB_DEBUG_STATUS3	0xe38
-#define PCIE_REG_PHB_DEBUG_CONTROL4	0xe40
-#define PCIE_REG_PHB_DEBUG_STATUS4	0xe48
-#define PCIE_REG_PHB_DEBUG_CONTROL5	0xe50
-#define PCIE_REG_PHB_DEBUG_STATUS5	0xe58
-#define PCIE_REG_PHB_DEBUG_CONTROL6	0xe60
-#define PCIE_REG_PHB_DEBUG_STATUS6	0xe68
-
-/* Definition for PCIe errors */
-struct wsp_pcie_err_log_data {
-	__u64	phb_err;
-	__u64	phb_err1;
-	__u64	phb_log0;
-	__u64	phb_log1;
-	__u64	mmio_err;
-	__u64	mmio_err1;
-	__u64	mmio_log0;
-	__u64	mmio_log1;
-	__u64	dma_err;
-	__u64	dma_err1;
-	__u64	dma_log0;
-	__u64	dma_log1;
-	__u64	utl_sys_err;
-	__u64	utl_port_err;
-	__u64	utl_rc_err;
-	__u64	unused;
-};
-
-#endif /* __WSP_PCI_H */

diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 7baa70d..a19332a 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig

@@ -7,6 +7,12 @@
 	depends on PCI && 4xx
 	default n
 
+config PPC4xx_HSTA_MSI
+	bool
+	depends on PCI_MSI
+	depends on PCI && 4xx
+	default n
+
 config PPC4xx_MSI
 	bool
 	depends on PCI_MSI

diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index afbcc37..f7cb2a1 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile

@@ -45,6 +45,7 @@
 ifeq ($(CONFIG_PCI),y)
 obj-$(CONFIG_4xx)		+= ppc4xx_pci.o
 endif
+obj-$(CONFIG_PPC4xx_HSTA_MSI)	+= ppc4xx_hsta_msi.o
 obj-$(CONFIG_PPC4xx_MSI)	+= ppc4xx_msi.o
 obj-$(CONFIG_PPC4xx_CPM)	+= ppc4xx_cpm.o
 obj-$(CONFIG_PPC4xx_GPIO)	+= ppc4xx_gpio.o

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 3f415e2..4bd091a 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c

@@ -1150,8 +1150,7 @@
 	pci = hose->private_data;
 
 	/* Enable PTOD, ENL23D & EXL23D */
-	out_be32(&pci->pex_pme_mes_disr, 0);
-	setbits32(&pci->pex_pme_mes_disr,
+	clrbits32(&pci->pex_pme_mes_disr,
 		  PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
 
 	out_be32(&pci->pex_pme_mes_ier, 0);

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index cf2b084..c04b718 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c

@@ -391,8 +391,10 @@
 	ops->get_inb_message = fsl_get_inb_message;
 
 	rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0);
-	if (!rmu_node)
+	if (!rmu_node) {
+		dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n");
 		goto err_rmu;
+	}
 	rc = of_address_to_resource(rmu_node, 0, &rmu_regs);
 	if (rc) {
 		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
@@ -413,6 +415,7 @@
 	/*set up doobell node*/
 	np = of_find_compatible_node(NULL, NULL, "fsl,srio-dbell-unit");
 	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-dbell-unit node\n");
 		rc = -ENODEV;
 		goto err_dbell;
 	}
@@ -441,6 +444,7 @@
 	/*set up port write node*/
 	np = of_find_compatible_node(NULL, NULL, "fsl,srio-port-write-unit");
 	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-port-write-unit node\n");
 		rc = -ENODEV;
 		goto err_pw;
 	}
@@ -633,14 +637,18 @@
 	return 0;
 err:
 	kfree(pw);
+	pw = NULL;
 err_pw:
 	kfree(dbell);
+	dbell = NULL;
 err_dbell:
 	iounmap(rmu_regs_win);
+	rmu_regs_win = NULL;
 err_rmu:
 	kfree(ops);
 err_ops:
 	iounmap(rio_regs_win);
+	rio_regs_win = NULL;
 err_rio_regs:
 	return rc;
 }

diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index 00e224a..b48197a 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c

@@ -881,9 +881,9 @@
 	rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0,
 			 "msg_rx", (void *)mport);
 	if (rc < 0) {
-		dma_free_coherent(priv->dev, RIO_MSG_BUFFER_SIZE,
-			rmu->msg_tx_ring.virt_buffer[i],
-			rmu->msg_tx_ring.phys_buffer[i]);
+		dma_free_coherent(priv->dev,
+			rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+			rmu->msg_rx_ring.virt, rmu->msg_rx_ring.phys);
 		goto out;
 	}
 

diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 228cf91..ffd1169 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c

@@ -25,7 +25,6 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/phy.h>
-#include <linux/phy_fixed.h>
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
 #include <linux/fs_enet_pd.h>
@@ -178,37 +177,6 @@
 EXPORT_SYMBOL(get_baudrate);
 #endif /* CONFIG_CPM2 */
 
-#ifdef CONFIG_FIXED_PHY
-static int __init of_add_fixed_phys(void)
-{
-	int ret;
-	struct device_node *np;
-	u32 *fixed_link;
-	struct fixed_phy_status status = {};
-
-	for_each_node_by_name(np, "ethernet") {
-		fixed_link  = (u32 *)of_get_property(np, "fixed-link", NULL);
-		if (!fixed_link)
-			continue;
-
-		status.link = 1;
-		status.duplex = fixed_link[1];
-		status.speed = fixed_link[2];
-		status.pause = fixed_link[3];
-		status.asym_pause = fixed_link[4];
-
-		ret = fixed_phy_add(PHY_POLL, fixed_link[0], &status);
-		if (ret) {
-			of_node_put(np);
-			return ret;
-		}
-	}
-
-	return 0;
-}
-arch_initcall(of_add_fixed_phys);
-#endif /* CONFIG_FIXED_PHY */
-
 #if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
 static __be32 __iomem *rstcr;
 

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 8209744..be33c97 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c

@@ -1588,10 +1588,6 @@
 			num_timers = 8;
 	}
 
-	/* FSL mpic error interrupt intialization */
-	if (mpic->flags & MPIC_FSL_HAS_EIMR)
-		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
-
 	/* Initialize timers to our reserved vectors and mask them for now */
 	for (i = 0; i < num_timers; i++) {
 		unsigned int offset = mpic_tm_offset(mpic, i);
@@ -1675,6 +1671,10 @@
 			irq_set_chained_handler(virq, &mpic_cascade);
 		}
 	}
+
+	/* FSL mpic error interrupt intialization */
+	if (mpic->flags & MPIC_FSL_HAS_EIMR)
+		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
 }
 
 void __init mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio)

diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
new file mode 100644
index 0000000..11c8884
--- /dev/null
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c

@@ -0,0 +1,215 @@
+/*
+ * MSI support for PPC4xx SoCs using High Speed Transfer Assist (HSTA) for
+ * generation of the interrupt.
+ *
+ * Copyright © 2013 Alistair Popple <alistair@popple.id.au> IBM Corporation
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <asm/msi_bitmap.h>
+
+struct ppc4xx_hsta_msi {
+	struct device *dev;
+
+	/* The ioremapped HSTA MSI IO space */
+	u32 __iomem *data;
+
+	/* Physical address of HSTA MSI IO space */
+	u64 address;
+	struct msi_bitmap bmp;
+
+	/* An array mapping offsets to hardware IRQs */
+	int *irq_map;
+
+	/* Number of hwirqs supported */
+	int irq_count;
+};
+static struct ppc4xx_hsta_msi ppc4xx_hsta_msi;
+
+static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_msg msg;
+	struct msi_desc *entry;
+	int irq, hwirq;
+	u64 addr;
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1);
+		if (irq < 0) {
+			pr_debug("%s: Failed to allocate msi interrupt\n",
+				 __func__);
+			return irq;
+		}
+
+		hwirq = ppc4xx_hsta_msi.irq_map[irq];
+		if (hwirq == NO_IRQ) {
+			pr_err("%s: Failed mapping irq %d\n", __func__, irq);
+			return -EINVAL;
+		}
+
+		/*
+		 * HSTA generates interrupts on writes to 128-bit aligned
+		 * addresses.
+		 */
+		addr = ppc4xx_hsta_msi.address + irq*0x10;
+		msg.address_hi = upper_32_bits(addr);
+		msg.address_lo = lower_32_bits(addr);
+
+		/* Data is not used by the HSTA. */
+		msg.data = 0;
+
+		pr_debug("%s: Setup irq %d (0x%0llx)\n", __func__, hwirq,
+			 (((u64) msg.address_hi) << 32) | msg.address_lo);
+
+		if (irq_set_msi_desc(hwirq, entry)) {
+			pr_err(
+			"%s: Invalid hwirq %d specified in device tree\n",
+			__func__, hwirq);
+			msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+			return -EINVAL;
+		}
+		write_msi_msg(hwirq, &msg);
+	}
+
+	return 0;
+}
+
+static int hsta_find_hwirq_offset(int hwirq)
+{
+	int irq;
+
+	/* Find the offset given the hwirq */
+	for (irq = 0; irq < ppc4xx_hsta_msi.irq_count; irq++)
+		if (ppc4xx_hsta_msi.irq_map[irq] == hwirq)
+			return irq;
+
+	return -EINVAL;
+}
+
+static void hsta_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+	int irq;
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+
+		irq = hsta_find_hwirq_offset(entry->irq);
+
+		/* entry->irq should always be in irq_map */
+		BUG_ON(irq < 0);
+		irq_set_msi_desc(entry->irq, NULL);
+		msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+		pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__,
+			 entry->irq, irq);
+	}
+}
+
+static int hsta_msi_check_device(struct pci_dev *pdev, int nvec, int type)
+{
+	/* We don't support MSI-X */
+	if (type == PCI_CAP_ID_MSIX) {
+		pr_debug("%s: MSI-X not supported.\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hsta_msi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	int irq, ret, irq_count;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (IS_ERR(mem)) {
+		dev_err(dev, "Unable to get mmio space\n");
+		return -EINVAL;
+	}
+
+	irq_count = of_irq_count(dev->of_node);
+	if (!irq_count) {
+		dev_err(dev, "Unable to find IRQ range\n");
+		return -EINVAL;
+	}
+
+	ppc4xx_hsta_msi.dev = dev;
+	ppc4xx_hsta_msi.address = mem->start;
+	ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem));
+	ppc4xx_hsta_msi.irq_count = irq_count;
+	if (IS_ERR(ppc4xx_hsta_msi.data)) {
+		dev_err(dev, "Unable to map memory\n");
+		return -ENOMEM;
+	}
+
+	ret = msi_bitmap_alloc(&ppc4xx_hsta_msi.bmp, irq_count, dev->of_node);
+	if (ret)
+		goto out;
+
+	ppc4xx_hsta_msi.irq_map = kmalloc(sizeof(int) * irq_count, GFP_KERNEL);
+	if (IS_ERR(ppc4xx_hsta_msi.irq_map)) {
+		ret = -ENOMEM;
+		goto out1;
+	}
+
+	/* Setup a mapping from irq offsets to hardware irq numbers */
+	for (irq = 0; irq < irq_count; irq++) {
+		ppc4xx_hsta_msi.irq_map[irq] =
+			irq_of_parse_and_map(dev->of_node, irq);
+		if (ppc4xx_hsta_msi.irq_map[irq] == NO_IRQ) {
+			dev_err(dev, "Unable to map IRQ\n");
+			ret = -EINVAL;
+			goto out2;
+		}
+	}
+
+	ppc_md.setup_msi_irqs = hsta_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs;
+	ppc_md.msi_check_device = hsta_msi_check_device;
+	return 0;
+
+out2:
+	kfree(ppc4xx_hsta_msi.irq_map);
+
+out1:
+	msi_bitmap_free(&ppc4xx_hsta_msi.bmp);
+
+out:
+	iounmap(ppc4xx_hsta_msi.data);
+	return ret;
+}
+
+static const struct of_device_id hsta_msi_ids[] = {
+	{
+		.compatible = "ibm,hsta-msi",
+	},
+	{}
+};
+
+static struct platform_driver hsta_msi_driver = {
+	.probe = hsta_msi_probe,
+	.driver = {
+		.name = "hsta-msi",
+		.owner = THIS_MODULE,
+		.of_match_table = hsta_msi_ids,
+	},
+};
+
+static int hsta_msi_init(void)
+{
+	return platform_driver_register(&hsta_msi_driver);
+}
+subsys_initcall(hsta_msi_init);

diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c
index 4914fd3..df6e2fc 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/sysdev/ppc4xx_pci.c

@@ -176,8 +176,12 @@
 		return -ENXIO;
 	}
 
-	/* Check that we are fully contained within 32 bits space */
-	if (res->end > 0xffffffff) {
+	/* Check that we are fully contained within 32 bits space if we are not
+	 * running on a 460sx or 476fpe which have 64 bit bus addresses.
+	 */
+	if (res->end > 0xffffffff &&
+	    !(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
+	      || of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
 		printk(KERN_ERR "%s: dma-ranges outside of 32 bits space\n",
 		       hose->dn->full_name);
 		return -ENXIO;
@@ -1440,7 +1444,8 @@
 		ppc4xx_pciex_hwops = &ppc405ex_pcie_hwops;
 #endif
 #ifdef CONFIG_476FPE
-	if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe"))
+	if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe")
+		|| of_device_is_compatible(np, "ibm,plb-pciex-476gtr"))
 		ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
 #endif
 	if (ppc4xx_pciex_hwops == NULL) {
@@ -1751,7 +1756,10 @@
 			dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
 				sa | DCRO_PEGPL_460SX_OMR1MSKL_UOT
 					| DCRO_PEGPL_OMRxMSKL_VAL);
-		else if (of_device_is_compatible(port->node, "ibm,plb-pciex-476fpe"))
+		else if (of_device_is_compatible(
+				port->node, "ibm,plb-pciex-476fpe") ||
+			of_device_is_compatible(
+				port->node, "ibm,plb-pciex-476gtr"))
 			dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
 				sa | DCRO_PEGPL_476FPE_OMR1MSKL_UOT
 					| DCRO_PEGPL_OMRxMSKL_VAL);
@@ -1881,7 +1889,10 @@
 			sa |= PCI_BASE_ADDRESS_MEM_PREFETCH;
 
 		if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx") ||
-		    of_device_is_compatible(port->node, "ibm,plb-pciex-476fpe"))
+		    of_device_is_compatible(
+			    port->node, "ibm,plb-pciex-476fpe") ||
+		    of_device_is_compatible(
+			    port->node, "ibm,plb-pciex-476gtr"))
 			sa |= PCI_BASE_ADDRESS_MEM_TYPE_64;
 
 		out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa));

diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 9dee470..de8d948 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c

@@ -26,6 +26,7 @@
 #include <asm/errno.h>
 #include <asm/xics.h>
 #include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
 
 struct icp_ipl {
 	union {
@@ -145,7 +146,13 @@
 static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
 	kvmppc_set_host_ipi(cpu, 1);
-	icp_native_set_qirr(cpu, IPI_PRIORITY);
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL) &&
+	    (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id()))))
+		doorbell_cause_ipi(cpu, data);
+	else
+#endif
+		icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
 void xics_wake_cpu(int cpu)

diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
index bce3dcf..c987486 100644
--- a/arch/powerpc/xmon/nonstdio.c
+++ b/arch/powerpc/xmon/nonstdio.c

@@ -122,7 +122,7 @@
 
 	if (n && rc == 0) {
 		/* No udbg hooks, fallback to printk() - dangerous */
-		printk(xmon_outbuf);
+		printk("%s", xmon_outbuf);
 	}
 }
 

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 08504e7..d199bfa 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c

@@ -419,7 +419,7 @@
 		get_output_lock();
 		excprint(regs);
 		if (bp) {
-			printf("cpu 0x%x stopped at breakpoint 0x%x (",
+			printf("cpu 0x%x stopped at breakpoint 0x%lx (",
 			       cpu, BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
@@ -513,7 +513,7 @@
 		excprint(regs);
 		bp = at_breakpoint(regs->nip);
 		if (bp) {
-			printf("Stopped at breakpoint %x (", BP_NUM(bp));
+			printf("Stopped at breakpoint %lx (", BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
 		if (unrecoverable_excp(regs))
@@ -759,7 +759,7 @@
 		brk.address = dabr.address;
 		brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
 		brk.len = 8;
-		set_breakpoint(&brk);
+		__set_breakpoint(&brk);
 	}
 	if (iabr && cpu_has_feature(CPU_FTR_IABR))
 		mtspr(SPRN_IABR, iabr->address
@@ -997,14 +997,14 @@
 					last_cpu = cpu;
 				} else {
 					if (last_cpu != first_cpu)
-						printf("-%lx", last_cpu);
+						printf("-0x%lx", last_cpu);
 					last_cpu = first_cpu = cpu;
-					printf(" %lx", cpu);
+					printf(" 0x%lx", cpu);
 				}
 			}
 		}
 		if (last_cpu != first_cpu)
-			printf("-%lx", last_cpu);
+			printf("-0x%lx", last_cpu);
 		printf("\n");
 		return 0;
 	}
@@ -1024,7 +1024,7 @@
 			/* take control back */
 			mb();
 			xmon_owner = smp_processor_id();
-			printf("cpu %u didn't take control\n", cpu);
+			printf("cpu 0x%x didn't take control\n", cpu);
 			return 0;
 		}
 		barrier();
@@ -1086,7 +1086,7 @@
 	fcs = 0xffff;
 	for (i = 0; i < ncsum; ++i) {
 		if (mread(adrs+i, &v, 1) == 0) {
-			printf("csum stopped at %x\n", adrs+i);
+			printf("csum stopped at "REG"\n", adrs+i);
 			break;
 		}
 		fcs = FCS(fcs, v);
@@ -1202,12 +1202,12 @@
 			/* assume a breakpoint address */
 			bp = at_breakpoint(a);
 			if (bp == NULL) {
-				printf("No breakpoint at %x\n", a);
+				printf("No breakpoint at %lx\n", a);
 				break;
 			}
 		}
 
-		printf("Cleared breakpoint %x (", BP_NUM(bp));
+		printf("Cleared breakpoint %lx (", BP_NUM(bp));
 		xmon_print_symbol(bp->address, " ", ")\n");
 		bp->enabled = 0;
 		break;
@@ -1746,7 +1746,7 @@
 		__delay(200);
 		n = size;
 	} else {
-		printf("*** Error writing address %x\n", adrs + n);
+		printf("*** Error writing address "REG"\n", adrs + n);
 	}
 	catch_memory_errors = 0;
 	return n;
@@ -2435,7 +2435,7 @@
 		ret = func(args[0], args[1], args[2], args[3],
 			   args[4], args[5], args[6], args[7]);
 		sync();
-		printf("return value is %x\n", ret);
+		printf("return value is 0x%lx\n", ret);
 	} else {
 		printf("*** %x exception occurred\n", fault_except);
 	}
@@ -2700,7 +2700,7 @@
 	unsigned long esid,vsid,valid;
 	unsigned long llp;
 
-	printf("SLB contents of cpu %x\n", smp_processor_id());
+	printf("SLB contents of cpu 0x%x\n", smp_processor_id());
 
 	for (i = 0; i < mmu_slb_size; i++) {
 		asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
@@ -2732,7 +2732,7 @@
 	int i;
 	unsigned long *tmp = (unsigned long *)local_paca->stab_addr;
 
-	printf("Segment table contents of cpu %x\n", smp_processor_id());
+	printf("Segment table contents of cpu 0x%x\n", smp_processor_id());
 
 	for (i = 0; i < PAGE_SIZE/16; i++) {
 		unsigned long a, b;

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e9f8fa9..a2cbd87 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c

@@ -269,27 +269,17 @@
 		EMIT4(0xa7c80000);
 	/* Clear A if the first register does not set it. */
 	switch (filter[0].code) {
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_IND:
-	case BPF_S_LD_H_IND:
-	case BPF_S_LD_B_IND:
-	case BPF_S_LD_IMM:
-	case BPF_S_LD_MEM:
-	case BPF_S_MISC_TXA:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_PKTTYPE:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_ANC_HATYPE:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_RET_K:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_IND:
+	case BPF_LD | BPF_H | BPF_IND:
+	case BPF_LD | BPF_B | BPF_IND:
+	case BPF_LD | BPF_IMM:
+	case BPF_LD | BPF_MEM:
+	case BPF_MISC | BPF_TXA:
+	case BPF_RET | BPF_K:
 		/* first instruction sets A register */
 		break;
 	default: /* A = 0 */
@@ -304,15 +294,18 @@
 	unsigned int K;
 	int offset;
 	unsigned int mask;
+	u16 code;
 
 	K = filter->k;
-	switch (filter->code) {
-	case BPF_S_ALU_ADD_X: /* A += X */
+	code = bpf_anc_helper(filter);
+
+	switch (code) {
+	case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
 		jit->seen |= SEEN_XREG;
 		/* ar %r5,%r12 */
 		EMIT2(0x1a5c);
 		break;
-	case BPF_S_ALU_ADD_K: /* A += K */
+	case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
 		if (!K)
 			break;
 		if (K <= 16383)
@@ -325,12 +318,12 @@
 			/* a %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_SUB_X: /* A -= X */
+	case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
 		jit->seen |= SEEN_XREG;
 		/* sr %r5,%r12 */
 		EMIT2(0x1b5c);
 		break;
-	case BPF_S_ALU_SUB_K: /* A -= K */
+	case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 		if (!K)
 			break;
 		if (K <= 16384)
@@ -343,12 +336,12 @@
 			/* s %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MUL_X: /* A *= X */
+	case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
 		jit->seen |= SEEN_XREG;
 		/* msr %r5,%r12 */
 		EMIT4(0xb252005c);
 		break;
-	case BPF_S_ALU_MUL_K: /* A *= K */
+	case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 		if (K <= 16383)
 			/* mhi %r5,K */
 			EMIT4_IMM(0xa75c0000, K);
@@ -359,7 +352,7 @@
 			/* ms %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x7150d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_DIV_X: /* A /= X */
+	case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -370,7 +363,7 @@
 		/* dlr %r4,%r12 */
 		EMIT4(0xb997004c);
 		break;
-	case BPF_S_ALU_DIV_K: /* A /= K */
+	case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 		if (K == 1)
 			break;
 		/* lhi %r4,0 */
@@ -378,7 +371,7 @@
 		/* dl %r4,<d(K)>(%r13) */
 		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MOD_X: /* A %= X */
+	case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -391,7 +384,7 @@
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_MOD_K: /* A %= K */
+	case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
 		if (K == 1) {
 			/* lhi %r5,0 */
 			EMIT4(0xa7580000);
@@ -404,12 +397,12 @@
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_AND_X: /* A &= X */
+	case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
 		jit->seen |= SEEN_XREG;
 		/* nr %r5,%r12 */
 		EMIT2(0x145c);
 		break;
-	case BPF_S_ALU_AND_K: /* A &= K */
+	case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
 		if (test_facility(21))
 			/* nilf %r5,<K> */
 			EMIT6_IMM(0xc05b0000, K);
@@ -417,12 +410,12 @@
 			/* n %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5450d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_OR_X: /* A |= X */
+	case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
 		jit->seen |= SEEN_XREG;
 		/* or %r5,%r12 */
 		EMIT2(0x165c);
 		break;
-	case BPF_S_ALU_OR_K: /* A |= K */
+	case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
 		if (test_facility(21))
 			/* oilf %r5,<K> */
 			EMIT6_IMM(0xc05d0000, K);
@@ -430,55 +423,55 @@
 			/* o %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5650d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-	case BPF_S_ALU_XOR_X:
+	case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+	case BPF_ALU | BPF_XOR | BPF_X:
 		jit->seen |= SEEN_XREG;
 		/* xr %r5,%r12 */
 		EMIT2(0x175c);
 		break;
-	case BPF_S_ALU_XOR_K: /* A ^= K */
+	case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 		if (!K)
 			break;
 		/* x %r5,<d(K)>(%r13) */
 		EMIT4_DISP(0x5750d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_LSH_X: /* A <<= X; */
+	case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 		jit->seen |= SEEN_XREG;
 		/* sll %r5,0(%r12) */
 		EMIT4(0x8950c000);
 		break;
-	case BPF_S_ALU_LSH_K: /* A <<= K */
+	case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
 		if (K == 0)
 			break;
 		/* sll %r5,K */
 		EMIT4_DISP(0x89500000, K);
 		break;
-	case BPF_S_ALU_RSH_X: /* A >>= X; */
+	case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 		jit->seen |= SEEN_XREG;
 		/* srl %r5,0(%r12) */
 		EMIT4(0x8850c000);
 		break;
-	case BPF_S_ALU_RSH_K: /* A >>= K; */
+	case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 		if (K == 0)
 			break;
 		/* srl %r5,K */
 		EMIT4_DISP(0x88500000, K);
 		break;
-	case BPF_S_ALU_NEG: /* A = -A */
+	case BPF_ALU | BPF_NEG: /* A = -A */
 		/* lnr %r5,%r5 */
 		EMIT2(0x1155);
 		break;
-	case BPF_S_JMP_JA: /* ip += K */
+	case BPF_JMP | BPF_JA: /* ip += K */
 		offset = addrs[i + K] + jit->start - jit->prg;
 		EMIT4_PCREL(0xa7f40000, offset);
 		break;
-	case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto kbranch;
-	case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto kbranch;
-	case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
 		mask = 0x800000; /* je */
 kbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -511,7 +504,7 @@
 			EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
 		}
 		break;
-	case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -525,13 +518,13 @@
 				EMIT4_IMM(0xa7510000, K);
 		}
 		goto branch;
-	case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto xbranch;
-	case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto xbranch;
-	case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
 		mask = 0x800000; /* je */
 xbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -540,7 +533,7 @@
 			EMIT2(0x195c);
 		}
 		goto branch;
-	case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -551,15 +544,15 @@
 			EMIT2(0x144c);
 		}
 		goto branch;
-	case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */
+	case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
 		offset = jit->off_load_word;
 		goto load_abs;
-	case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */
+	case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
 		offset = jit->off_load_half;
 		goto load_abs;
-	case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */
+	case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
 		offset = jit->off_load_byte;
 load_abs:	if ((int) K < 0)
@@ -573,19 +566,19 @@
 		/* jnz <ret0> */
 		EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
 		break;
-	case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */
+	case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
 		offset = jit->off_load_iword;
 		goto call_fn;
-	case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */
+	case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
 		offset = jit->off_load_ihalf;
 		goto call_fn;
-	case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */
+	case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
 		offset = jit->off_load_ibyte;
 		goto call_fn;
-	case BPF_S_LDX_B_MSH:
+	case BPF_LDX | BPF_B | BPF_MSH:
 		/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
 		jit->seen |= SEEN_RET0;
 		if ((int) K < 0) {
@@ -596,17 +589,17 @@
 		jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
 		offset = jit->off_load_bmsh;
 		goto call_fn;
-	case BPF_S_LD_W_LEN: /*	A = skb->len; */
+	case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 		/* l %r5,<d(len)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LDX_W_LEN: /* X = skb->len; */
+	case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 		jit->seen |= SEEN_XREG;
 		/* l %r12,<d(len)>(%r2) */
 		EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LD_IMM: /* A = K */
+	case BPF_LD | BPF_IMM: /* A = K */
 		if (K <= 16383)
 			/* lhi %r5,K */
 			EMIT4_IMM(0xa7580000, K);
@@ -617,7 +610,7 @@
 			/* l %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5850d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LDX_IMM: /* X = K */
+	case BPF_LDX | BPF_IMM: /* X = K */
 		jit->seen |= SEEN_XREG;
 		if (K <= 16383)
 			/* lhi %r12,<K> */
@@ -629,29 +622,29 @@
 			/* l %r12,<d(K)>(%r13) */
 			EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LD_MEM: /* A = mem[K] */
+	case BPF_LD | BPF_MEM: /* A = mem[K] */
 		jit->seen |= SEEN_MEM;
 		/* l %r5,<K>(%r15) */
 		EMIT4_DISP(0x5850f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_LDX_MEM: /* X = mem[K] */
+	case BPF_LDX | BPF_MEM: /* X = mem[K] */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* l %r12,<K>(%r15) */
 		EMIT4_DISP(0x58c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_MISC_TAX: /* X = A */
+	case BPF_MISC | BPF_TAX: /* X = A */
 		jit->seen |= SEEN_XREG;
 		/* lr %r12,%r5 */
 		EMIT2(0x18c5);
 		break;
-	case BPF_S_MISC_TXA: /* A = X */
+	case BPF_MISC | BPF_TXA: /* A = X */
 		jit->seen |= SEEN_XREG;
 		/* lr %r5,%r12 */
 		EMIT2(0x185c);
 		break;
-	case BPF_S_RET_K:
+	case BPF_RET | BPF_K:
 		if (K == 0) {
 			jit->seen |= SEEN_RET0;
 			if (last)
@@ -671,33 +664,33 @@
 			EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		}
 		break;
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_A:
 		/* llgfr %r2,%r5 */
 		EMIT4(0xb9160025);
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		break;
-	case BPF_S_ST: /* mem[K] = A */
+	case BPF_ST: /* mem[K] = A */
 		jit->seen |= SEEN_MEM;
 		/* st %r5,<K>(%r15) */
 		EMIT4_DISP(0x5050f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+	case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* st %r12,<K>(%r15) */
 		EMIT4_DISP(0x50c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+	case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(protocol)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
 		break;
-	case BPF_S_ANC_IFINDEX:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->ifindex */
+	case BPF_ANC | SKF_AD_IFINDEX:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->ifindex */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -709,20 +702,20 @@
 		/* l %r5,<d(ifindex)>(%r1) */
 		EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
 		break;
-	case BPF_S_ANC_MARK: /* A = skb->mark */
+	case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 		/* l %r5,<d(mark)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
 		break;
-	case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */
+	case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(queue_mapping)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
 		break;
-	case BPF_S_ANC_HATYPE:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->type */
+	case BPF_ANC | SKF_AD_HATYPE:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->type */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -736,20 +729,20 @@
 		/* icm	%r5,3,<d(type)>(%r1) */
 		EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
 		break;
-	case BPF_S_ANC_RXHASH: /* A = skb->hash */
+	case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 		/* l %r5,<d(hash)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
 		break;
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
+	case BPF_ANC | SKF_AD_VLAN_TAG:
+	case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(vlan_tci)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
-		if (filter->code == BPF_S_ANC_VLAN_TAG) {
+		if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 			/* nill %r5,0xefff */
 			EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
 		} else {
@@ -759,7 +752,7 @@
 			EMIT4_DISP(0x88500000, 12);
 		}
 		break;
-	case BPF_S_ANC_PKTTYPE:
+	case BPF_ANC | SKF_AD_PKTTYPE:
 		if (pkt_type_offset < 0)
 			goto out;
 		/* lhi %r5,0 */
@@ -769,7 +762,7 @@
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
-	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
+	case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
 #ifdef CONFIG_SMP
 		/* l %r5,<d(cpu_nr)> */
 		EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));

diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
index bdbda14..04471dc 100644
--- a/arch/sparc/include/asm/checksum_32.h
+++ b/arch/sparc/include/asm/checksum_32.h

@@ -238,4 +238,16 @@
 	return csum_fold(csum_partial(buff, len, 0));
 }
 
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+	__asm__ __volatile__(
+		"addcc   %0, %1, %0\n"
+		"addx    %0, %%g0, %0"
+		: "=r" (csum)
+		: "r" (addend), "0" (csum));
+
+	return csum;
+}
+
 #endif /* !(__SPARC_CHECKSUM_H) */

diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
index 019b961..2ff81ae 100644
--- a/arch/sparc/include/asm/checksum_64.h
+++ b/arch/sparc/include/asm/checksum_64.h

@@ -164,4 +164,16 @@
 	return csum_fold(csum_partial(buff, len, 0));
 }
 
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+	__asm__ __volatile__(
+		"addcc   %0, %1, %0\n"
+		"addx    %0, %%g0, %0"
+		: "=r" (csum)
+		: "r" (addend), "0" (csum));
+
+	return csum;
+}
+
 #endif /* !(__SPARC64_CHECKSUM_H) */

diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index a82c6b2..892a102 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c

@@ -83,9 +83,9 @@
 #define BNE		(F2(0, 2) | CONDNE)
 
 #ifdef CONFIG_SPARC64
-#define BNE_PTR		(F2(0, 1) | CONDNE | (2 << 20))
+#define BE_PTR		(F2(0, 1) | CONDE | (2 << 20))
 #else
-#define BNE_PTR		BNE
+#define BE_PTR		BE
 #endif
 
 #define SETHI(K, REG)	\
@@ -415,20 +415,11 @@
 		emit_reg_move(O7, r_saved_O7);
 
 		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_RET | BPF_K:
+		case BPF_LD | BPF_W | BPF_LEN:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			/* The first instruction sets the A register (or is
 			 * a "RET 'constant'")
 			 */
@@ -445,59 +436,60 @@
 			unsigned int t_offset;
 			unsigned int f_offset;
 			u32 t_op, f_op;
+			u16 code = bpf_anc_helper(&filter[i]);
 			int ilen;
 
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X:	/* A += X; */
+			switch (code) {
+			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
 				emit_alu_X(ADD);
 				break;
-			case BPF_S_ALU_ADD_K:	/* A += K; */
+			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
 				emit_alu_K(ADD, K);
 				break;
-			case BPF_S_ALU_SUB_X:	/* A -= X; */
+			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
 				emit_alu_X(SUB);
 				break;
-			case BPF_S_ALU_SUB_K:	/* A -= K */
+			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
 				emit_alu_K(SUB, K);
 				break;
-			case BPF_S_ALU_AND_X:	/* A &= X */
+			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
 				emit_alu_X(AND);
 				break;
-			case BPF_S_ALU_AND_K:	/* A &= K */
+			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
 				emit_alu_K(AND, K);
 				break;
-			case BPF_S_ALU_OR_X:	/* A |= X */
+			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
 				emit_alu_X(OR);
 				break;
-			case BPF_S_ALU_OR_K:	/* A |= K */
+			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
 				emit_alu_K(OR, K);
 				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
+			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+			case BPF_ALU | BPF_XOR | BPF_X:
 				emit_alu_X(XOR);
 				break;
-			case BPF_S_ALU_XOR_K:	/* A ^= K */
+			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
 				emit_alu_K(XOR, K);
 				break;
-			case BPF_S_ALU_LSH_X:	/* A <<= X */
+			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
 				emit_alu_X(SLL);
 				break;
-			case BPF_S_ALU_LSH_K:	/* A <<= K */
+			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
 				emit_alu_K(SLL, K);
 				break;
-			case BPF_S_ALU_RSH_X:	/* A >>= X */
+			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
 				emit_alu_X(SRL);
 				break;
-			case BPF_S_ALU_RSH_K:	/* A >>= K */
+			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
 				emit_alu_K(SRL, K);
 				break;
-			case BPF_S_ALU_MUL_X:	/* A *= X; */
+			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
 				emit_alu_X(MUL);
 				break;
-			case BPF_S_ALU_MUL_K:	/* A *= K */
+			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
 				emit_alu_K(MUL, K);
 				break;
-			case BPF_S_ALU_DIV_K:	/* A /= K with K != 0*/
+			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
 				if (K == 1)
 					break;
 				emit_write_y(G0);
@@ -512,7 +504,7 @@
 #endif
 				emit_alu_K(DIV, K);
 				break;
-			case BPF_S_ALU_DIV_X:	/* A /= X; */
+			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
 				emit_cmpi(r_X, 0);
 				if (pc_ret0 > 0) {
 					t_offset = addrs[pc_ret0 - 1];
@@ -544,10 +536,10 @@
 #endif
 				emit_alu_X(DIV);
 				break;
-			case BPF_S_ALU_NEG:
+			case BPF_ALU | BPF_NEG:
 				emit_neg();
 				break;
-			case BPF_S_RET_K:
+			case BPF_RET | BPF_K:
 				if (!K) {
 					if (pc_ret0 == -1)
 						pc_ret0 = i;
@@ -556,7 +548,7 @@
 					emit_loadimm(K, r_A);
 				}
 				/* Fallthrough */
-			case BPF_S_RET_A:
+			case BPF_RET | BPF_A:
 				if (seen_or_pass0) {
 					if (i != flen - 1) {
 						emit_jump(cleanup_addr);
@@ -573,18 +565,18 @@
 				emit_jmpl(r_saved_O7, 8, G0);
 				emit_reg_move(r_A, O0); /* delay slot */
 				break;
-			case BPF_S_MISC_TAX:
+			case BPF_MISC | BPF_TAX:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_A, r_X);
 				break;
-			case BPF_S_MISC_TXA:
+			case BPF_MISC | BPF_TXA:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_X, r_A);
 				break;
-			case BPF_S_ANC_CPU:
+			case BPF_ANC | SKF_AD_CPU:
 				emit_load_cpu(r_A);
 				break;
-			case BPF_S_ANC_PROTOCOL:
+			case BPF_ANC | SKF_AD_PROTOCOL:
 				emit_skb_load16(protocol, r_A);
 				break;
 #if 0
@@ -592,38 +584,38 @@
 				 * a bit field even though we very much
 				 * know what we are doing here.
 				 */
-			case BPF_S_ANC_PKTTYPE:
+			case BPF_ANC | SKF_AD_PKTTYPE:
 				__emit_skb_load8(pkt_type, r_A);
 				emit_alu_K(SRL, 5);
 				break;
 #endif
-			case BPF_S_ANC_IFINDEX:
+			case BPF_ANC | SKF_AD_IFINDEX:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
-				emit_branch(BNE_PTR, cleanup_addr + 4);
+				emit_branch(BE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load32(r_A, struct net_device, ifindex, r_A);
 				break;
-			case BPF_S_ANC_MARK:
+			case BPF_ANC | SKF_AD_MARK:
 				emit_skb_load32(mark, r_A);
 				break;
-			case BPF_S_ANC_QUEUE:
+			case BPF_ANC | SKF_AD_QUEUE:
 				emit_skb_load16(queue_mapping, r_A);
 				break;
-			case BPF_S_ANC_HATYPE:
+			case BPF_ANC | SKF_AD_HATYPE:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
-				emit_branch(BNE_PTR, cleanup_addr + 4);
+				emit_branch(BE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load16(r_A, struct net_device, type, r_A);
 				break;
-			case BPF_S_ANC_RXHASH:
+			case BPF_ANC | SKF_AD_RXHASH:
 				emit_skb_load32(hash, r_A);
 				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
+			case BPF_ANC | SKF_AD_VLAN_TAG:
+			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 				emit_skb_load16(vlan_tci, r_A);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
+				if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 					emit_andi(r_A, VLAN_VID_MASK, r_A);
 				} else {
 					emit_loadimm(VLAN_TAG_PRESENT, r_TMP);
@@ -631,44 +623,44 @@
 				}
 				break;
 
-			case BPF_S_LD_IMM:
+			case BPF_LD | BPF_IMM:
 				emit_loadimm(K, r_A);
 				break;
-			case BPF_S_LDX_IMM:
+			case BPF_LDX | BPF_IMM:
 				emit_loadimm(K, r_X);
 				break;
-			case BPF_S_LD_MEM:
+			case BPF_LD | BPF_MEM:
 				emit_ldmem(K * 4, r_A);
 				break;
-			case BPF_S_LDX_MEM:
+			case BPF_LDX | BPF_MEM:
 				emit_ldmem(K * 4, r_X);
 				break;
-			case BPF_S_ST:
+			case BPF_ST:
 				emit_stmem(K * 4, r_A);
 				break;
-			case BPF_S_STX:
+			case BPF_STX:
 				emit_stmem(K * 4, r_X);
 				break;
 
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-			case BPF_S_LD_W_ABS:
+			case BPF_LD | BPF_W | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
 common_load:			seen |= SEEN_DATAREF;
 				emit_loadimm(K, r_OFF);
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_ABS:
+			case BPF_LD | BPF_H | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
 				goto common_load;
-			case BPF_S_LD_B_ABS:
+			case BPF_LD | BPF_B | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
 				goto common_load;
-			case BPF_S_LDX_B_MSH:
+			case BPF_LDX | BPF_B | BPF_MSH:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
 				goto common_load;
-			case BPF_S_LD_W_IND:
+			case BPF_LD | BPF_W | BPF_IND:
 				func = bpf_jit_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				if (K) {
@@ -683,13 +675,13 @@
 				}
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_IND:
+			case BPF_LD | BPF_H | BPF_IND:
 				func = bpf_jit_load_half;
 				goto common_load_ind;
-			case BPF_S_LD_B_IND:
+			case BPF_LD | BPF_B | BPF_IND:
 				func = bpf_jit_load_byte;
 				goto common_load_ind;
-			case BPF_S_JMP_JA:
+			case BPF_JMP | BPF_JA:
 				emit_jump(addrs[i + K]);
 				emit_nop();
 				break;
@@ -700,14 +692,14 @@
 		f_op = FOP;		\
 		goto cond_branch
 
-			COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_K, BNE, BE);
-			COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_X, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
 
 cond_branch:			f_offset = addrs[i + filter[i].jf];
 				t_offset = addrs[i + filter[i].jt];
@@ -719,20 +711,20 @@
 					break;
 				}
 
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
+				switch (code) {
+				case BPF_JMP | BPF_JGT | BPF_X:
+				case BPF_JMP | BPF_JGE | BPF_X:
+				case BPF_JMP | BPF_JEQ | BPF_X:
 					seen |= SEEN_XREG;
 					emit_cmp(r_A, r_X);
 					break;
-				case BPF_S_JMP_JSET_X:
+				case BPF_JMP | BPF_JSET | BPF_X:
 					seen |= SEEN_XREG;
 					emit_btst(r_A, r_X);
 					break;
-				case BPF_S_JMP_JEQ_K:
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
+				case BPF_JMP | BPF_JEQ | BPF_K:
+				case BPF_JMP | BPF_JGT | BPF_K:
+				case BPF_JMP | BPF_JGE | BPF_K:
 					if (is_simm13(K)) {
 						emit_cmpi(r_A, K);
 					} else {
@@ -740,7 +732,7 @@
 						emit_cmp(r_A, r_TMP);
 					}
 					break;
-				case BPF_S_JMP_JSET_K:
+				case BPF_JMP | BPF_JSET | BPF_K:
 					if (is_simm13(K)) {
 						emit_btsti(r_A, K);
 					} else {

diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index d767ff9..48e4fd0 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h

@@ -94,7 +94,7 @@
 /* Sit on a nap instruction until interrupted. */
 extern void smp_nap(void);
 
-/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
+/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */
 extern void _cpu_idle(void);
 
 #else /* __ASSEMBLY__ */

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 74c9172..112abab 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c

@@ -228,13 +228,10 @@
 #if defined(CONFIG_PCI) && !defined(__tilegx__)
 static int __init setup_pci_reserve(char* str)
 {
-	unsigned long mb;
-
-	if (str == NULL || strict_strtoul(str, 0, &mb) != 0 ||
-	    mb > 3 * 1024)
+	if (str == NULL || kstrtouint(str, 0, &pci_reserve_mb) != 0 ||
+	    pci_reserve_mb > 3 * 1024)
 		return -EINVAL;
 
-	pci_reserve_mb = mb;
 	pr_info("Reserving %dMB for PCIE root complex mappings\n",
 		pci_reserve_mb);
 	return 0;
@@ -691,7 +688,7 @@
 	/* Reserve any memory excluded by "memmap" arguments. */
 	for (i = 0; i < memmap_nr; ++i) {
 		struct memmap_entry *m = &memmap_map[i];
-		reserve_bootmem(m->addr, m->size, 0);
+		reserve_bootmem(m->addr, m->size, BOOTMEM_DEFAULT);
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -715,7 +712,8 @@
 
 #ifdef CONFIG_KEXEC
 	if (crashk_res.start != crashk_res.end)
-		reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0);
+		reserve_bootmem(crashk_res.start, resource_size(&crashk_res),
+				BOOTMEM_DEFAULT);
 #endif
 }
 

diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 2d1dbf3..d1d026f 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c

@@ -321,14 +321,13 @@
 
 static int __init crashinfo(char *str)
 {
-	unsigned long val;
 	const char *word;
 
 	if (*str == '\0')
-		val = 2;
-	else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0)
+		show_unhandled_signals = 2;
+	else if (*str != '=' || kstrtoint(++str, 0, &show_unhandled_signals) != 0)
 		return 0;
-	show_unhandled_signals = val;
+
 	switch (show_unhandled_signals) {
 	case 0:
 		word = "No";

diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 6b603d5..f3ceb63 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c

@@ -42,10 +42,9 @@
 	 * will still parse the instruction, then fire a SIGBUS with
 	 * the correct address from inside the single_step code.
 	 */
-	long val;
-	if (strict_strtol(str, 0, &val) != 0)
+	if (kstrtoint(str, 0, &unaligned_fixup) != 0)
 		return 0;
-	unaligned_fixup = val;
+
 	pr_info("Fixups for unaligned data accesses are %s\n",
 	       unaligned_fixup >= 0 ?
 	       (unaligned_fixup ? "enabled" : "disabled") :

diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index b030b4e..c02ea2a 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c

@@ -182,18 +182,7 @@
 	int i;
 	uint64_t reg;
 	uint64_t reg_map = 0, alias_reg_map = 0, map;
-	bool alias;
-
-	*ra = -1;
-	*rb = -1;
-
-	if (rd)
-		*rd = -1;
-
-	*clob1 = -1;
-	*clob2 = -1;
-	*clob3 = -1;
-	alias = false;
+	bool alias = false;
 
 	/*
 	 * Parse fault bundle, find potential used registers and mark
@@ -569,7 +558,7 @@
 	tilegx_bundle_bits bundle_2 = 0;
 	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
 	bool     bundle_2_enable = true;
-	uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
+	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
 	/*
 	 * Indicate if the unalign access
 	 * instruction's registers hit with

diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 0fa1acf..bfb3127 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c

@@ -273,9 +273,9 @@
 	/*
 	 * Otherwise we just hand out consecutive cpus.  To avoid
 	 * requiring this function to hold state, we just walk forward from
-	 * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach
-	 * the requested address, while walking cpu home around kdata_mask.
-	 * This is typically no more than a dozen or so iterations.
+	 * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to
+	 * reach the requested address, while walking cpu home around
+	 * kdata_mask. This is typically no more than a dozen or so iterations.
 	 */
 	page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK;
 	BUG_ON(address < page || address >= (ulong)_end);
@@ -912,7 +912,7 @@
 static int __init set_initfree(char *str)
 {
 	long val;
-	if (strict_strtol(str, 0, &val) == 0) {
+	if (kstrtol(str, 0, &val) == 0) {
 		initfree = val;
 		pr_info("initfree: %s free init pages\n",
 			initfree ? "will" : "won't");

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 36e658a..e4b1a96 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile

@@ -111,8 +111,7 @@
 KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
 
 archheaders:
-	$(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \
-		ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders
+	$(Q)$(MAKE) KBUILD_SRC= ARCH=$(HEADER_ARCH) archheaders
 
 archprepare: include/generated/user_constants.h
 

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b660088..fcefdda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -121,6 +121,7 @@
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
 	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_QUEUE_RWLOCK
 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 4582e8e..7730c1c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h

@@ -57,6 +57,12 @@
 	.long (from) - . ;					\
 	.long (to) - . + 0x7ffffff0 ;				\
 	.popsection
+
+# define _ASM_NOKPROBE(entry)					\
+	.pushsection "_kprobe_blacklist","aw" ;			\
+	_ASM_ALIGN ;						\
+	_ASM_PTR (entry);					\
+	.popsection
 #else
 # define _ASM_EXTABLE(from,to)					\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
@@ -71,6 +77,7 @@
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - . + 0x7ffffff0\n"			\
 	" .popsection\n"
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
 #endif /* _ASM_X86_ASM_H */

diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index e6fd8a0..cd00e17 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h

@@ -184,8 +184,15 @@
 	asm("addl %2,%0\n\t"
 	    "adcl $0,%0"
 	    : "=r" (a)
-	    : "0" (a), "r" (b));
+	    : "0" (a), "rm" (b));
 	return a;
 }
 
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+	return (__force __wsum)add32_with_carry((__force unsigned)csum,
+						(__force unsigned)addend);
+}
+
 #endif /* _ASM_X86_CHECKSUM_64_H */

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 9454c16..53cdfb2 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h

@@ -116,4 +116,6 @@
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
+extern int kprobe_int3_handler(struct pt_regs *regs);
+extern int kprobe_debug_handler(struct pt_regs *regs);
 #endif /* _ASM_X86_KPROBES_H */

diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
new file mode 100644
index 0000000..70f46f0
--- /dev/null
+++ b/arch/x86/include/asm/qrwlock.h

@@ -0,0 +1,17 @@
+#ifndef _ASM_X86_QRWLOCK_H
+#define _ASM_X86_QRWLOCK_H
+
+#include <asm-generic/qrwlock_types.h>
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+#define queue_write_unlock queue_write_unlock
+static inline void queue_write_unlock(struct qrwlock *lock)
+{
+        barrier();
+        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
+}
+#endif
+
+#include <asm-generic/qrwlock.h>
+
+#endif /* _ASM_X86_QRWLOCK_H */

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 0f62f54..54f1c80 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h

@@ -187,6 +187,7 @@
 		cpu_relax();
 }
 
+#ifndef CONFIG_QUEUE_RWLOCK
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -269,6 +270,9 @@
 	asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
 		     : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
 }
+#else
+#include <asm/qrwlock.h>
+#endif /* CONFIG_QUEUE_RWLOCK */
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)

diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 4f1bea1..73c4c00 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h

@@ -34,6 +34,10 @@
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
+#ifdef CONFIG_QUEUE_RWLOCK
+#include <asm-generic/qrwlock_types.h>
+#else
 #include <asm/rwlock.h>
+#endif
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 8ba1884..bc8352e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h

@@ -68,7 +68,7 @@
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
+asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
@@ -103,7 +103,6 @@
 
 extern int panic_on_unrecovered_nmi;
 
-void math_error(struct pt_regs *, int, int);
 void math_emulate(struct math_emu_info *);
 #ifndef CONFIG_X86_32
 asmlinkage void smp_thermal_interrupt(void);

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 93bee7b..74f4c2f 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h

@@ -41,18 +41,18 @@
 		u8			ixol[MAX_UINSN_BYTES];
 	};
 
-	u16				fixups;
 	const struct uprobe_xol_ops	*ops;
 
 	union {
-#ifdef CONFIG_X86_64
-		unsigned long			rip_rela_target_address;
-#endif
 		struct {
 			s32	offs;
 			u8	ilen;
 			u8	opc1;
-		}				branch;
+		}			branch;
+		struct {
+			u8	fixups;
+			u8	ilen;
+		} 			defparam;
 	};
 };
 

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 491ef3e..047f9ff 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile

@@ -26,6 +26,7 @@
 obj-y			+= probe_roms.o
 obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
+obj-$(CONFIG_X86_64)	+= mcount_64.o
 obj-y			+= syscall_$(BITS).o vsyscall_gtod.o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
 obj-$(CONFIG_X86_64)	+= vsyscall_emu_64.o

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df94598..703130f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c

@@ -5,7 +5,6 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
-#include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
@@ -551,7 +550,7 @@
  *
  * Note: Must be called under text_mutex.
  */
-void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+void *text_poke(void *addr, const void *opcode, size_t len)
 {
 	unsigned long flags;
 	char *vaddr;

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index eab6704..c3fcb5d 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c

@@ -60,7 +60,7 @@
 	smp_mb__after_atomic();
 }
 
-static int __kprobes
+static int
 arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	int cpu;
@@ -80,6 +80,7 @@
 
 	return NMI_DONE;
 }
+NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
 
 static int __init register_trigger_all_cpu_backtrace(void)
 {

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9d0a979..81e08ef 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c

@@ -2297,7 +2297,7 @@
 	int err;
 
 	if (!config_enabled(CONFIG_SMP))
-		return -1;
+		return -EPERM;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -EINVAL;
@@ -2328,7 +2328,7 @@
 	int ret;
 
 	if (!config_enabled(CONFIG_SMP))
-		return -1;
+		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	ret = __ioapic_set_affinity(data, mask, &dest);
@@ -3001,9 +3001,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	__get_cached_msi_msg(data->msi_desc, &msg);
 
@@ -3100,9 +3102,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest, irq = data->irq;
 	struct msi_msg msg;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	dmar_msi_read(irq, &msg);
 
@@ -3149,9 +3153,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	hpet_msi_read(data->handler_data, &msg);
 
@@ -3218,9 +3224,11 @@
 {
 	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	target_ht_irq(data->irq, dest, cfg->vector);
 	return IRQ_SET_MASK_OK_NOCOPY;

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2cbbf88..ef1b93f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/kprobes.h>
 #include <linux/kgdb.h>
 #include <linux/smp.h>
 #include <linux/io.h>
@@ -1193,6 +1194,7 @@
 		(addr <= __get_cpu_var(debug_stack_addr) &&
 		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
+NOKPROBE_SYMBOL(is_debug_stack);
 
 DEFINE_PER_CPU(u32, debug_idt_ctr);
 
@@ -1201,6 +1203,7 @@
 	this_cpu_inc(debug_idt_ctr);
 	load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_set_zero);
 
 void debug_stack_reset(void)
 {
@@ -1209,6 +1212,7 @@
 	if (this_cpu_dec_return(debug_idt_ctr) == 0)
 		load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_reset);
 
 #else	/* CONFIG_X86_64 */
 

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 76f98fe..a450373 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c

@@ -132,15 +132,6 @@
 		lapic_timer_frequency = hv_lapic_frequency;
 		printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
 				lapic_timer_frequency);
-
-		/*
-		 * On Hyper-V, when we are booting off an EFI firmware stack,
-		 * we do not have many legacy devices including PIC, PIT etc.
-		 */
-		if (efi_enabled(EFI_BOOT)) {
-			printk(KERN_INFO "HyperV: Using null_legacy_pic\n");
-			legacy_pic = &null_legacy_pic;
-		}
 	}
 #endif
 

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 89f3b7c..2bdfbff 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c

@@ -303,15 +303,6 @@
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
-	} else {
-		/*
-		 * If we have a PMU initialized but no APIC
-		 * interrupts, we cannot sample hardware
-		 * events (user-space has to fall back and
-		 * sample via a hrtimer based software event):
-		 */
-		if (!x86_pmu.apic)
-			return -EOPNOTSUPP;
 	}
 
 	if (attr->type == PERF_TYPE_RAW)
@@ -1293,7 +1284,7 @@
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-static int __kprobes
+static int
 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	u64 start_clock;
@@ -1311,6 +1302,7 @@
 
 	return ret;
 }
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
 
 struct event_constraint emptyconstraint;
 struct event_constraint unconstrained;
@@ -1366,6 +1358,15 @@
 	x86_pmu.apic = 0;
 	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
 	pr_info("no hardware sampling interrupt available.\n");
+
+	/*
+	 * If we have a PMU initialized but no APIC
+	 * interrupts, we cannot sample hardware
+	 * events (user-space has to fall back and
+	 * sample via a hrtimer based software event):
+	 */
+	pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
 }
 
 static struct attribute_group x86_pmu_format_group = {

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 4c36bbe..cbb1be3e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c

@@ -593,7 +593,7 @@
 	return 1;
 }
 
-static int __kprobes
+static int
 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	int handled = 0;
@@ -606,6 +606,7 @@
 
 	return handled;
 }
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
 
 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 {

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d82d155..9dd2459 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c

@@ -384,6 +384,9 @@
 	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
 		mask |= X86_BR_NO_TX;
 
+	if (br_type & PERF_SAMPLE_BRANCH_COND)
+		mask |= X86_BR_JCC;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -678,6 +681,7 @@
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
 	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -689,6 +693,7 @@
 	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
 					| LBR_FAR,
 	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
 };
 
 /* core */

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index d9c12d3..b74ebc7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c

@@ -200,7 +200,7 @@
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
-unsigned __kprobes long oops_begin(void)
+unsigned long oops_begin(void)
 {
 	int cpu;
 	unsigned long flags;
@@ -223,8 +223,9 @@
 	return flags;
 }
 EXPORT_SYMBOL_GPL(oops_begin);
+NOKPROBE_SYMBOL(oops_begin);
 
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
 	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
@@ -247,8 +248,9 @@
 		panic("Fatal exception");
 	do_exit(signr);
 }
+NOKPROBE_SYMBOL(oops_end);
 
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+int __die(const char *str, struct pt_regs *regs, long err)
 {
 #ifdef CONFIG_X86_32
 	unsigned short ss;
@@ -291,6 +293,7 @@
 #endif
 	return 0;
 }
+NOKPROBE_SYMBOL(__die);
 
 /*
  * This is gone through when something in the kernel has done something bad

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6cda0ba..2e1a685 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c

@@ -419,7 +419,7 @@
 	return gmch_ctrl << 25; /* 32 MB units */
 }
 
-static size_t gen8_stolen_size(int num, int slot, int func)
+static size_t __init gen8_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
 
@@ -429,48 +429,73 @@
 	return gmch_ctrl << 25; /* 32 MB units */
 }
 
+static size_t __init chv_stolen_size(int num, int slot, int func)
+{
+	u16 gmch_ctrl;
+
+	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
+	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GMS_MASK;
+
+	/*
+	 * 0x0  to 0x10: 32MB increments starting at 0MB
+	 * 0x11 to 0x16: 4MB increments starting at 8MB
+	 * 0x17 to 0x1d: 4MB increments start at 36MB
+	 */
+	if (gmch_ctrl < 0x11)
+		return gmch_ctrl << 25;
+	else if (gmch_ctrl < 0x17)
+		return (gmch_ctrl - 0x11 + 2) << 22;
+	else
+		return (gmch_ctrl - 0x17 + 9) << 22;
+}
 
 struct intel_stolen_funcs {
 	size_t (*size)(int num, int slot, int func);
 	u32 (*base)(int num, int slot, int func, size_t size);
 };
 
-static const struct intel_stolen_funcs i830_stolen_funcs = {
+static const struct intel_stolen_funcs i830_stolen_funcs __initconst = {
 	.base = i830_stolen_base,
 	.size = i830_stolen_size,
 };
 
-static const struct intel_stolen_funcs i845_stolen_funcs = {
+static const struct intel_stolen_funcs i845_stolen_funcs __initconst = {
 	.base = i845_stolen_base,
 	.size = i830_stolen_size,
 };
 
-static const struct intel_stolen_funcs i85x_stolen_funcs = {
+static const struct intel_stolen_funcs i85x_stolen_funcs __initconst = {
 	.base = i85x_stolen_base,
 	.size = gen3_stolen_size,
 };
 
-static const struct intel_stolen_funcs i865_stolen_funcs = {
+static const struct intel_stolen_funcs i865_stolen_funcs __initconst = {
 	.base = i865_stolen_base,
 	.size = gen3_stolen_size,
 };
 
-static const struct intel_stolen_funcs gen3_stolen_funcs = {
+static const struct intel_stolen_funcs gen3_stolen_funcs __initconst = {
 	.base = intel_stolen_base,
 	.size = gen3_stolen_size,
 };
 
-static const struct intel_stolen_funcs gen6_stolen_funcs = {
+static const struct intel_stolen_funcs gen6_stolen_funcs __initconst = {
 	.base = intel_stolen_base,
 	.size = gen6_stolen_size,
 };
 
-static const struct intel_stolen_funcs gen8_stolen_funcs = {
+static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = {
 	.base = intel_stolen_base,
 	.size = gen8_stolen_size,
 };
 
-static struct pci_device_id intel_stolen_ids[] __initdata = {
+static const struct intel_stolen_funcs chv_stolen_funcs __initconst = {
+	.base = intel_stolen_base,
+	.size = chv_stolen_size,
+};
+
+static const struct pci_device_id intel_stolen_ids[] __initconst = {
 	INTEL_I830_IDS(&i830_stolen_funcs),
 	INTEL_I845G_IDS(&i845_stolen_funcs),
 	INTEL_I85X_IDS(&i85x_stolen_funcs),
@@ -496,7 +521,8 @@
 	INTEL_HSW_D_IDS(&gen6_stolen_funcs),
 	INTEL_HSW_M_IDS(&gen6_stolen_funcs),
 	INTEL_BDW_M_IDS(&gen8_stolen_funcs),
-	INTEL_BDW_D_IDS(&gen8_stolen_funcs)
+	INTEL_BDW_D_IDS(&gen8_stolen_funcs),
+	INTEL_CHV_IDS(&chv_stolen_funcs),
 };
 
 static void __init intel_graphics_stolen(int num, int slot, int func)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 98313ff..f0da82b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S

@@ -315,10 +315,6 @@
 ENDPROC(ret_from_kernel_thread)
 
 /*
- * Interrupt exit functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-/*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
  * go as quickly as possible which is why some of this is
@@ -372,10 +368,6 @@
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -495,10 +487,6 @@
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
-/*
- * syscall stub including irq exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -690,10 +678,6 @@
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -784,10 +768,6 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-/*
- *  Irq entries should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -964,10 +944,6 @@
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
-/*
- * End of kprobes section
- */
-	.popsection
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1242,11 +1218,6 @@
 	jmp *%ecx
 #endif
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 #ifdef CONFIG_TRACING
 ENTRY(trace_page_fault)
 	RING0_EC_FRAME
@@ -1460,7 +1431,3 @@
 END(async_page_fault)
 #endif
 
-/*
- * End of kprobes section
- */
-	.popsection

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9698798..b25ca96 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -53,7 +53,6 @@
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
-#include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
 #include <asm/context_tracking.h>
@@ -70,209 +69,6 @@
 	.code64
 	.section .entry.text, "ax"
 
-#ifdef CONFIG_FUNCTION_TRACER
-
-#ifdef CC_USING_FENTRY
-# define function_hook	__fentry__
-#else
-# define function_hook	mcount
-#endif
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-ENTRY(function_hook)
-	retq
-END(function_hook)
-
-/* skip is set if stack has been adjusted */
-.macro ftrace_caller_setup skip=0
-	MCOUNT_SAVE_FRAME \skip
-
-	/* Load the ftrace_ops into the 3rd parameter */
-	movq function_trace_op(%rip), %rdx
-
-	/* Load ip into the first parameter */
-	movq RIP(%rsp), %rdi
-	subq $MCOUNT_INSN_SIZE, %rdi
-	/* Load the parent_ip into the second parameter */
-#ifdef CC_USING_FENTRY
-	movq SS+16(%rsp), %rsi
-#else
-	movq 8(%rbp), %rsi
-#endif
-.endm
-
-ENTRY(ftrace_caller)
-	/* Check if tracing was disabled (quick check) */
-	cmpl $0, function_trace_stop
-	jne  ftrace_stub
-
-	ftrace_caller_setup
-	/* regs go into 4th parameter (but make it NULL) */
-	movq $0, %rcx
-
-GLOBAL(ftrace_call)
-	call ftrace_stub
-
-	MCOUNT_RESTORE_FRAME
-ftrace_return:
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
-	jmp ftrace_stub
-#endif
-
-GLOBAL(ftrace_stub)
-	retq
-END(ftrace_caller)
-
-ENTRY(ftrace_regs_caller)
-	/* Save the current flags before compare (in SS location)*/
-	pushfq
-
-	/* Check if tracing was disabled (quick check) */
-	cmpl $0, function_trace_stop
-	jne  ftrace_restore_flags
-
-	/* skip=8 to skip flags saved in SS */
-	ftrace_caller_setup 8
-
-	/* Save the rest of pt_regs */
-	movq %r15, R15(%rsp)
-	movq %r14, R14(%rsp)
-	movq %r13, R13(%rsp)
-	movq %r12, R12(%rsp)
-	movq %r11, R11(%rsp)
-	movq %r10, R10(%rsp)
-	movq %rbp, RBP(%rsp)
-	movq %rbx, RBX(%rsp)
-	/* Copy saved flags */
-	movq SS(%rsp), %rcx
-	movq %rcx, EFLAGS(%rsp)
-	/* Kernel segments */
-	movq $__KERNEL_DS, %rcx
-	movq %rcx, SS(%rsp)
-	movq $__KERNEL_CS, %rcx
-	movq %rcx, CS(%rsp)
-	/* Stack - skipping return address */
-	leaq SS+16(%rsp), %rcx
-	movq %rcx, RSP(%rsp)
-
-	/* regs go into 4th parameter */
-	leaq (%rsp), %rcx
-
-GLOBAL(ftrace_regs_call)
-	call ftrace_stub
-
-	/* Copy flags back to SS, to restore them */
-	movq EFLAGS(%rsp), %rax
-	movq %rax, SS(%rsp)
-
-	/* Handlers can change the RIP */
-	movq RIP(%rsp), %rax
-	movq %rax, SS+8(%rsp)
-
-	/* restore the rest of pt_regs */
-	movq R15(%rsp), %r15
-	movq R14(%rsp), %r14
-	movq R13(%rsp), %r13
-	movq R12(%rsp), %r12
-	movq R10(%rsp), %r10
-	movq RBP(%rsp), %rbp
-	movq RBX(%rsp), %rbx
-
-	/* skip=8 to skip flags saved in SS */
-	MCOUNT_RESTORE_FRAME 8
-
-	/* Restore flags */
-	popfq
-
-	jmp ftrace_return
-ftrace_restore_flags:
-	popfq
-	jmp  ftrace_stub
-
-END(ftrace_regs_caller)
-
-
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(function_hook)
-	cmpl $0, function_trace_stop
-	jne  ftrace_stub
-
-	cmpq $ftrace_stub, ftrace_trace_function
-	jnz trace
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	cmpq $ftrace_stub, ftrace_graph_return
-	jnz ftrace_graph_caller
-
-	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
-	jnz ftrace_graph_caller
-#endif
-
-GLOBAL(ftrace_stub)
-	retq
-
-trace:
-	MCOUNT_SAVE_FRAME
-
-	movq RIP(%rsp), %rdi
-#ifdef CC_USING_FENTRY
-	movq SS+16(%rsp), %rsi
-#else
-	movq 8(%rbp), %rsi
-#endif
-	subq $MCOUNT_INSN_SIZE, %rdi
-
-	call   *ftrace_trace_function
-
-	MCOUNT_RESTORE_FRAME
-
-	jmp ftrace_stub
-END(function_hook)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	MCOUNT_SAVE_FRAME
-
-#ifdef CC_USING_FENTRY
-	leaq SS+16(%rsp), %rdi
-	movq $0, %rdx	/* No framepointers needed */
-#else
-	leaq 8(%rbp), %rdi
-	movq (%rbp), %rdx
-#endif
-	movq RIP(%rsp), %rsi
-	subq $MCOUNT_INSN_SIZE, %rsi
-
-	call	prepare_ftrace_return
-
-	MCOUNT_RESTORE_FRAME
-
-	retq
-END(ftrace_graph_caller)
-
-GLOBAL(return_to_handler)
-	subq  $24, %rsp
-
-	/* Save the return values */
-	movq %rax, (%rsp)
-	movq %rdx, 8(%rsp)
-	movq %rbp, %rdi
-
-	call ftrace_return_to_handler
-
-	movq %rax, %rdi
-	movq 8(%rsp), %rdx
-	movq (%rsp), %rax
-	addq $24, %rsp
-	jmp *%rdi
-#endif
-
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
@@ -488,8 +284,6 @@
 	TRACE_IRQS_OFF
 	.endm
 
-/* save complete stack frame */
-	.pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -518,7 +312,6 @@
 1:	ret
 	CFI_ENDPROC
 END(save_paranoid)
-	.popsection
 
 /*
  * A newly forked process directly context switches into this address.
@@ -976,10 +769,6 @@
 	call \func
 	.endm
 
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
 	 * then jump to common_interrupt.
@@ -1187,11 +976,6 @@
 #endif
 
 /*
- * End of kprobes section
- */
-       .popsection
-
-/*
  * APIC interrupts.
  */
 .macro apicinterrupt3 num sym do_sym
@@ -1525,11 +1309,6 @@
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
@@ -1946,7 +1725,3 @@
 	CFI_ENDPROC
 END(ignore_sysret)
 
-/*
- * End of kprobes section
- */
-	.popsection

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 52819e8..cbc4a91 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c

@@ -297,16 +297,7 @@
 
 static int ftrace_write(unsigned long ip, const char *val, int size)
 {
-	/*
-	 * On x86_64, kernel text mappings are mapped read-only with
-	 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
-	 * of the kernel text mapping to modify the kernel text.
-	 *
-	 * For 32bit kernels, these mappings are same and we can use
-	 * kernel identity mapping to modify code.
-	 */
-	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
-		ip = (unsigned long)__va(__pa_symbol(ip));
+	ip = text_ip_addr(ip);
 
 	if (probe_kernel_write((void *)ip, val, size))
 		return -EPERM;
@@ -349,40 +340,14 @@
 	return add_break(rec->ip, old);
 }
 
-/*
- * If the record has the FTRACE_FL_REGS set, that means that it
- * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
- */
-static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
-/*
- * The FTRACE_FL_REGS_EN is set when the record already points to
- * a function that saves all the regs. Basically the '_EN' version
- * represents the current state of the function.
- */
-static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS_EN)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_addr;
 	int ret;
 
-	ret = ftrace_test_record(rec, enable);
+	ftrace_addr = ftrace_get_addr_curr(rec);
 
-	ftrace_addr = get_ftrace_addr(rec);
+	ret = ftrace_test_record(rec, enable);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -392,10 +357,7 @@
 		/* converting nop to call */
 		return add_brk_on_nop(rec);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
-		ftrace_addr = get_ftrace_old_addr(rec);
-		/* fall through */
 	case FTRACE_UPDATE_MAKE_NOP:
 		/* converting a call to a nop */
 		return add_brk_on_call(rec, ftrace_addr);
@@ -440,14 +402,14 @@
 		 * If not, don't touch the breakpoint, we make just create
 		 * a disaster.
 		 */
-		ftrace_addr = get_ftrace_addr(rec);
+		ftrace_addr = ftrace_get_addr_new(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 			goto update;
 
 		/* Check both ftrace_addr and ftrace_old_addr */
-		ftrace_addr = get_ftrace_old_addr(rec);
+		ftrace_addr = ftrace_get_addr_curr(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
@@ -491,13 +453,12 @@
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr  = get_ftrace_addr(rec);
+	ftrace_addr  = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
@@ -538,13 +499,12 @@
 
 	ret = ftrace_update_record(rec, enable);
 
-	ftrace_addr = get_ftrace_addr(rec);
+	ftrace_addr = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
@@ -621,8 +581,8 @@
 	return;
 
  remove_breakpoints:
+	pr_warn("Failed on %s (%d):\n", report, count);
 	ftrace_bug(ret, rec ? rec->ip : 0);
-	printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
 	for_ftrace_rec_iter(iter) {
 		rec = ftrace_rec_iter_record(iter);
 		/*

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a67b47c..5f9cf20 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c

@@ -32,7 +32,6 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
-#include <linux/kprobes.h>
 #include <linux/percpu.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
@@ -424,7 +423,7 @@
  * NOTIFY_STOP returned for all other cases
  *
  */
-static int __kprobes hw_breakpoint_handler(struct die_args *args)
+static int hw_breakpoint_handler(struct die_args *args)
 {
 	int i, cpu, rc = NOTIFY_STOP;
 	struct perf_event *bp;
@@ -511,7 +510,7 @@
 /*
  * Handle debug exception notifications.
  */
-int __kprobes hw_breakpoint_exceptions_notify(
+int hw_breakpoint_exceptions_notify(
 		struct notifier_block *unused, unsigned long val, void *data)
 {
 	if (val != DIE_DEBUG)

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 2e977b5..8af8171 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c

@@ -299,13 +299,31 @@
 static void init_8259A(int auto_eoi)
 {
 	unsigned long flags;
+	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
+	unsigned char new_val;
 
 	i8259A_auto_eoi = auto_eoi;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	/*
+	 * Check to see if we have a PIC.
+	 * Mask all except the cascade and read
+	 * back the value we just wrote. If we don't
+	 * have a PIC, we will read 0xff as opposed to the
+	 * value we wrote.
+	 */
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+	outb(probe_val, PIC_MASTER_IMR);
+	new_val = inb(PIC_MASTER_IMR);
+	if (new_val != probe_val) {
+		printk(KERN_INFO "Using NULL legacy PIC\n");
+		legacy_pic = &null_legacy_pic;
+		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+		return;
+	}
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 
 	/*
 	 * outb_pic - this has to work on a wide range of PC hardware.

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 11ccfb0..922d285 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c

@@ -365,6 +365,7 @@
 	struct irq_desc *desc;
 	struct irq_data *data;
 	struct irq_chip *chip;
+	int ret;
 
 	for_each_irq_desc(irq, desc) {
 		int break_affinity = 0;
@@ -403,10 +404,14 @@
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 
-		if (chip->irq_set_affinity)
-			chip->irq_set_affinity(data, affinity, true);
-		else if (!(warned++))
-			set_affinity = 0;
+		if (chip->irq_set_affinity) {
+			ret = chip->irq_set_affinity(data, affinity, true);
+			if (ret == -ENOSPC)
+				pr_crit("IRQ %d set affinity failed because there are no available vectors.  The device assigned to this IRQ is unstable.\n", irq);
+		} else {
+			if (!(warned++))
+				set_affinity = 0;
+		}
 
 		/*
 		 * We unmask if the irq was not marked masked by the

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 61b17dc..7596df6 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c

@@ -112,7 +112,8 @@
 
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
-static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
+static nokprobe_inline void
+__synthesize_relative_insn(void *from, void *to, u8 op)
 {
 	struct __arch_relative_insn {
 		u8 op;
@@ -125,21 +126,23 @@
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void __kprobes synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *from, void *to)
 {
 	__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_reljump);
 
 /* Insert a call instruction at address 'from', which calls address 'to'.*/
-void __kprobes synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *from, void *to)
 {
 	__synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_relcall);
 
 /*
  * Skip the prefixes of the instruction.
  */
-static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
+static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
 {
 	insn_attr_t attr;
 
@@ -154,12 +157,13 @@
 #endif
 	return insn;
 }
+NOKPROBE_SYMBOL(skip_prefixes);
 
 /*
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
@@ -260,7 +264,7 @@
 }
 
 /* Check if paddr is at an instruction boundary */
-static int __kprobes can_probe(unsigned long paddr)
+static int can_probe(unsigned long paddr)
 {
 	unsigned long addr, __addr, offset = 0;
 	struct insn insn;
@@ -299,7 +303,7 @@
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
-static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
+static int is_IF_modifier(kprobe_opcode_t *insn)
 {
 	/* Skip prefixes */
 	insn = skip_prefixes(insn);
@@ -322,7 +326,7 @@
  * If not, return null.
  * Only applicable to 64-bit x86.
  */
-int __kprobes __copy_instruction(u8 *dest, u8 *src)
+int __copy_instruction(u8 *dest, u8 *src)
 {
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -365,7 +369,7 @@
 	return insn.length;
 }
 
-static int __kprobes arch_copy_kprobe(struct kprobe *p)
+static int arch_copy_kprobe(struct kprobe *p)
 {
 	int ret;
 
@@ -392,7 +396,7 @@
 	return 0;
 }
 
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
 {
 	if (alternatives_text_reserved(p->addr, p->addr))
 		return -EINVAL;
@@ -407,17 +411,17 @@
 	return arch_copy_kprobe(p);
 }
 
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, &p->opcode, 1);
 }
 
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
 {
 	if (p->ainsn.insn) {
 		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
@@ -425,7 +429,8 @@
 	}
 }
 
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	kcb->prev_kprobe.kp = kprobe_running();
 	kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -433,7 +438,8 @@
 	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 }
 
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -441,8 +447,9 @@
 	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 }
 
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-				struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+		   struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, p);
 	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
@@ -451,7 +458,7 @@
 		kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
-static void __kprobes clear_btf(void)
+static nokprobe_inline void clear_btf(void)
 {
 	if (test_thread_flag(TIF_BLOCKSTEP)) {
 		unsigned long debugctl = get_debugctlmsr();
@@ -461,7 +468,7 @@
 	}
 }
 
-static void __kprobes restore_btf(void)
+static nokprobe_inline void restore_btf(void)
 {
 	if (test_thread_flag(TIF_BLOCKSTEP)) {
 		unsigned long debugctl = get_debugctlmsr();
@@ -471,8 +478,7 @@
 	}
 }
 
-void __kprobes
-arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	unsigned long *sara = stack_addr(regs);
 
@@ -481,9 +487,10 @@
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
-static void __kprobes
-setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb, int reenter)
 {
 	if (setup_detour_execution(p, regs, reenter))
 		return;
@@ -519,22 +526,24 @@
 	else
 		regs->ip = (unsigned long)p->ainsn.insn;
 }
+NOKPROBE_SYMBOL(setup_singlestep);
 
 /*
  * We have reentered the kprobe_handler(), since another probe was hit while
  * within the handler. We save the original kprobes variables and just single
  * step on the instruction of the new probe without calling any user handlers.
  */
-static int __kprobes
-reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+			  struct kprobe_ctlblk *kcb)
 {
 	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SSDONE:
 	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SS:
 		kprobes_inc_nmissed_count(p);
 		setup_singlestep(p, regs, kcb, 1);
 		break;
-	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
 		/* A probe has been hit in the codepath leading up to, or just
 		 * after, single-stepping of a probed instruction. This entire
 		 * codepath should strictly reside in .kprobes.text section.
@@ -553,12 +562,13 @@
 
 	return 1;
 }
+NOKPROBE_SYMBOL(reenter_kprobe);
 
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int kprobe_int3_handler(struct pt_regs *regs)
 {
 	kprobe_opcode_t *addr;
 	struct kprobe *p;
@@ -621,12 +631,13 @@
 	preempt_enable_no_resched();
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_int3_handler);
 
 /*
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
-static void __used __kprobes kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile (
 			".global kretprobe_trampoline\n"
@@ -657,11 +668,13 @@
 #endif
 			"	ret\n");
 }
+NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
+NOKPROBE_SYMBOL(kretprobe_trampoline);
 
 /*
  * Called from kretprobe_trampoline
  */
-__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
@@ -747,6 +760,7 @@
 	}
 	return (void *)orig_ret_address;
 }
+NOKPROBE_SYMBOL(trampoline_handler);
 
 /*
  * Called after single-stepping.  p->addr is the address of the
@@ -775,8 +789,8 @@
  * jump instruction after the copied instruction, that jumps to the next
  * instruction after the probepoint.
  */
-static void __kprobes
-resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb)
 {
 	unsigned long *tos = stack_addr(regs);
 	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -851,12 +865,13 @@
 no_change:
 	restore_btf();
 }
+NOKPROBE_SYMBOL(resume_execution);
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int kprobe_debug_handler(struct pt_regs *regs)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -891,8 +906,9 @@
 
 	return 1;
 }
+NOKPROBE_SYMBOL(kprobe_debug_handler);
 
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -949,12 +965,13 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_fault_handler);
 
 /*
  * Wrapper routine for handling exceptions.
  */
-int __kprobes
-kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+			     void *data)
 {
 	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
@@ -962,22 +979,7 @@
 	if (args->regs && user_mode_vm(args->regs))
 		return ret;
 
-	switch (val) {
-	case DIE_INT3:
-		if (kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_DEBUG:
-		if (post_kprobe_handler(args->regs)) {
-			/*
-			 * Reset the BS bit in dr6 (pointed by args->err) to
-			 * denote completion of processing
-			 */
-			(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
-			ret = NOTIFY_STOP;
-		}
-		break;
-	case DIE_GPF:
+	if (val == DIE_GPF) {
 		/*
 		 * To be potentially processing a kprobe fault and to
 		 * trust the result from kprobe_running(), we have
@@ -986,14 +988,12 @@
 		if (!preemptible() && kprobe_running() &&
 		    kprobe_fault_handler(args->regs, args->trapnr))
 			ret = NOTIFY_STOP;
-		break;
-	default:
-		break;
 	}
 	return ret;
 }
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
 
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr;
@@ -1017,8 +1017,9 @@
 	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
+NOKPROBE_SYMBOL(setjmp_pre_handler);
 
-void __kprobes jprobe_return(void)
+void jprobe_return(void)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
@@ -1034,8 +1035,10 @@
 			"       nop			\n"::"b"
 			(kcb->jprobe_saved_sp):"memory");
 }
+NOKPROBE_SYMBOL(jprobe_return);
+NOKPROBE_SYMBOL(jprobe_return_end);
 
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->ip - 1);
@@ -1063,13 +1066,22 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(longjmp_break_handler);
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	return  (addr >= (unsigned long)__kprobes_text_start &&
+		 addr < (unsigned long)__kprobes_text_end) ||
+		(addr >= (unsigned long)__entry_text_start &&
+		 addr < (unsigned long)__entry_text_end);
+}
 
 int __init arch_init_kprobes(void)
 {
 	return 0;
 }
 
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
 {
 	return 0;
 }

diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 23ef5c5..717b02a 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c

@@ -25,8 +25,9 @@
 
 #include "common.h"
 
-static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			     struct kprobe_ctlblk *kcb)
+static nokprobe_inline
+int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+		      struct kprobe_ctlblk *kcb)
 {
 	/*
 	 * Emulate singlestep (and also recover regs->ip)
@@ -41,18 +42,19 @@
 	return 1;
 }
 
-int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			      struct kprobe_ctlblk *kcb)
+int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+		    struct kprobe_ctlblk *kcb)
 {
 	if (kprobe_ftrace(p))
 		return __skip_singlestep(p, regs, kcb);
 	else
 		return 0;
 }
+NOKPROBE_SYMBOL(skip_singlestep);
 
 /* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				     struct ftrace_ops *ops, struct pt_regs *regs)
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct pt_regs *regs)
 {
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
@@ -84,8 +86,9 @@
 end:
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
 	p->ainsn.insn = NULL;
 	p->ainsn.boostable = -1;

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 898160b..f304773 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c

@@ -77,7 +77,7 @@
 }
 
 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 {
 #ifdef CONFIG_X86_64
 	*addr++ = 0x48;
@@ -138,7 +138,8 @@
 #define INT3_SIZE sizeof(kprobe_opcode_t)
 
 /* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long flags;
@@ -168,8 +169,9 @@
 	}
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(optimized_callback);
 
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src)
 {
 	int len = 0, ret;
 
@@ -189,7 +191,7 @@
 }
 
 /* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
 {
 	return ((insn->opcode.bytes[0] == 0xff &&
 		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -224,7 +226,7 @@
 }
 
 /* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
+static int can_optimize(unsigned long paddr)
 {
 	unsigned long addr, size = 0, offset = 0;
 	struct insn insn;
@@ -275,7 +277,7 @@
 }
 
 /* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 {
 	int i;
 	struct kprobe *p;
@@ -290,15 +292,15 @@
 }
 
 /* Check the addr is within the optimized instructions. */
-int __kprobes
-arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+				 unsigned long addr)
 {
 	return ((unsigned long)op->kp.addr <= addr &&
 		(unsigned long)op->kp.addr + op->optinsn.size > addr);
 }
 
 /* Free optimized instruction slot */
-static __kprobes
+static
 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 {
 	if (op->optinsn.insn) {
@@ -308,7 +310,7 @@
 	}
 }
 
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 {
 	__arch_remove_optimized_kprobe(op, 1);
 }
@@ -318,7 +320,7 @@
  * Target instructions MUST be relocatable (checked inside)
  * This is called when new aggr(opt)probe is allocated or reused.
  */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 {
 	u8 *buf;
 	int ret;
@@ -372,7 +374,7 @@
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
  */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+void arch_optimize_kprobes(struct list_head *oplist)
 {
 	struct optimized_kprobe *op, *tmp;
 	u8 insn_buf[RELATIVEJUMP_SIZE];
@@ -398,7 +400,7 @@
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
 	u8 insn_buf[RELATIVEJUMP_SIZE];
 
@@ -424,8 +426,7 @@
 	}
 }
 
-int  __kprobes
-setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 {
 	struct optimized_kprobe *op;
 
@@ -441,3 +442,4 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(setup_detour_execution);

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7e97371..3dd8e2c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -251,8 +251,9 @@
 	return reason;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
+NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	enum ctx_state prev_state;
@@ -276,6 +277,7 @@
 		break;
 	}
 }
+NOKPROBE_SYMBOL(do_async_page_fault);
 
 static void __init paravirt_ops_setup(void)
 {

diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
new file mode 100644
index 0000000..c050a01
--- /dev/null
+++ b/arch/x86/kernel/mcount_64.S

@@ -0,0 +1,217 @@
+/*
+ *  linux/arch/x86_64/mcount_64.S
+ *
+ *  Copyright (C) 2014  Steven Rostedt, Red Hat Inc
+ */
+
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm/ftrace.h>
+
+
+	.code64
+	.section .entry.text, "ax"
+
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook	__fentry__
+#else
+# define function_hook	mcount
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(function_hook)
+	retq
+END(function_hook)
+
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+	MCOUNT_SAVE_FRAME \skip
+
+	/* Load the ftrace_ops into the 3rd parameter */
+	movq function_trace_op(%rip), %rdx
+
+	/* Load ip into the first parameter */
+	movq RIP(%rsp), %rdi
+	subq $MCOUNT_INSN_SIZE, %rdi
+	/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
+	movq 8(%rbp), %rsi
+#endif
+.endm
+
+ENTRY(ftrace_caller)
+	/* Check if tracing was disabled (quick check) */
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
+
+	ftrace_caller_setup
+	/* regs go into 4th parameter (but make it NULL) */
+	movq $0, %rcx
+
+GLOBAL(ftrace_call)
+	call ftrace_stub
+
+	MCOUNT_RESTORE_FRAME
+ftrace_return:
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call)
+	jmp ftrace_stub
+#endif
+
+GLOBAL(ftrace_stub)
+	retq
+END(ftrace_caller)
+
+ENTRY(ftrace_regs_caller)
+	/* Save the current flags before compare (in SS location)*/
+	pushfq
+
+	/* Check if tracing was disabled (quick check) */
+	cmpl $0, function_trace_stop
+	jne  ftrace_restore_flags
+
+	/* skip=8 to skip flags saved in SS */
+	ftrace_caller_setup 8
+
+	/* Save the rest of pt_regs */
+	movq %r15, R15(%rsp)
+	movq %r14, R14(%rsp)
+	movq %r13, R13(%rsp)
+	movq %r12, R12(%rsp)
+	movq %r11, R11(%rsp)
+	movq %r10, R10(%rsp)
+	movq %rbp, RBP(%rsp)
+	movq %rbx, RBX(%rsp)
+	/* Copy saved flags */
+	movq SS(%rsp), %rcx
+	movq %rcx, EFLAGS(%rsp)
+	/* Kernel segments */
+	movq $__KERNEL_DS, %rcx
+	movq %rcx, SS(%rsp)
+	movq $__KERNEL_CS, %rcx
+	movq %rcx, CS(%rsp)
+	/* Stack - skipping return address */
+	leaq SS+16(%rsp), %rcx
+	movq %rcx, RSP(%rsp)
+
+	/* regs go into 4th parameter */
+	leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+	call ftrace_stub
+
+	/* Copy flags back to SS, to restore them */
+	movq EFLAGS(%rsp), %rax
+	movq %rax, SS(%rsp)
+
+	/* Handlers can change the RIP */
+	movq RIP(%rsp), %rax
+	movq %rax, SS+8(%rsp)
+
+	/* restore the rest of pt_regs */
+	movq R15(%rsp), %r15
+	movq R14(%rsp), %r14
+	movq R13(%rsp), %r13
+	movq R12(%rsp), %r12
+	movq R10(%rsp), %r10
+	movq RBP(%rsp), %rbp
+	movq RBX(%rsp), %rbx
+
+	/* skip=8 to skip flags saved in SS */
+	MCOUNT_RESTORE_FRAME 8
+
+	/* Restore flags */
+	popfq
+
+	jmp ftrace_return
+ftrace_restore_flags:
+	popfq
+	jmp  ftrace_stub
+
+END(ftrace_regs_caller)
+
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(function_hook)
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
+
+	cmpq $ftrace_stub, ftrace_trace_function
+	jnz trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	cmpq $ftrace_stub, ftrace_graph_return
+	jnz ftrace_graph_caller
+
+	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
+	jnz ftrace_graph_caller
+#endif
+
+GLOBAL(ftrace_stub)
+	retq
+
+trace:
+	MCOUNT_SAVE_FRAME
+
+	movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
+	movq 8(%rbp), %rsi
+#endif
+	subq $MCOUNT_INSN_SIZE, %rdi
+
+	call   *ftrace_trace_function
+
+	MCOUNT_RESTORE_FRAME
+
+	jmp ftrace_stub
+END(function_hook)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+	MCOUNT_SAVE_FRAME
+
+#ifdef CC_USING_FENTRY
+	leaq SS+16(%rsp), %rdi
+	movq $0, %rdx	/* No framepointers needed */
+#else
+	leaq 8(%rbp), %rdi
+	movq (%rbp), %rdx
+#endif
+	movq RIP(%rsp), %rsi
+	subq $MCOUNT_INSN_SIZE, %rsi
+
+	call	prepare_ftrace_return
+
+	MCOUNT_RESTORE_FRAME
+
+	retq
+END(ftrace_graph_caller)
+
+GLOBAL(return_to_handler)
+	subq  $24, %rsp
+
+	/* Save the return values */
+	movq %rax, (%rsp)
+	movq %rdx, 8(%rsp)
+	movq %rbp, %rdi
+
+	call ftrace_return_to_handler
+
+	movq %rax, %rdi
+	movq 8(%rsp), %rdx
+	movq (%rsp), %rax
+	addq $24, %rsp
+	jmp *%rdi
+#endif

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index b4872b9..c3e985d 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c

@@ -110,7 +110,7 @@
 		a->handler, whole_msecs, decimal_msecs);
 }
 
-static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
 	struct nmiaction *a;
@@ -146,6 +146,7 @@
 	/* return total number of NMI events handled */
 	return handled;
 }
+NOKPROBE_SYMBOL(nmi_handle);
 
 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 {
@@ -208,7 +209,7 @@
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
-static __kprobes void
+static void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
 	/* check to see if anyone registered against these types of errors */
@@ -238,8 +239,9 @@
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
 	outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(pci_serr_error);
 
-static __kprobes void
+static void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
 	unsigned long i;
@@ -269,8 +271,9 @@
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(io_check_error);
 
-static __kprobes void
+static void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
 	int handled;
@@ -298,11 +301,12 @@
 
 	pr_emerg("Dazed and confused, but trying to continue\n");
 }
+NOKPROBE_SYMBOL(unknown_nmi_error);
 
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 
-static __kprobes void default_do_nmi(struct pt_regs *regs)
+static void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int handled;
@@ -401,6 +405,7 @@
 	else
 		unknown_nmi_error(reason, regs);
 }
+NOKPROBE_SYMBOL(default_do_nmi);
 
 /*
  * NMIs can hit breakpoints which will cause it to lose its
@@ -520,7 +525,7 @@
 }
 #endif
 
-dotraplinkage notrace __kprobes void
+dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
 	nmi_nesting_preprocess(regs);
@@ -537,6 +542,7 @@
 	/* On i386, may loop back to preprocess */
 	nmi_nesting_postprocess();
 }
+NOKPROBE_SYMBOL(do_nmi);
 
 void stop_nmi(void)
 {

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b10af8..548d25f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c

@@ -23,6 +23,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/highmem.h>
+#include <linux/kprobes.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -389,6 +390,11 @@
 	.end_context_switch = paravirt_nop,
 };
 
+/* At this point, native_get/set_debugreg has real function entries */
+NOKPROBE_SYMBOL(native_get_debugreg);
+NOKPROBE_SYMBOL(native_set_debugreg);
+NOKPROBE_SYMBOL(native_load_idt);
+
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 898d077..ca5b02d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c

@@ -413,12 +413,11 @@
 	set_thread_flag(TIF_ADDR32);
 
 	/* Mark the associated mm as containing 32-bit tasks. */
-	if (current->mm)
-		current->mm->context.ia32_compat = 1;
-
 	if (x32) {
 		clear_thread_flag(TIF_IA32);
 		set_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_X32;
 		current->personality &= ~READ_IMPLIES_EXEC;
 		/* is_compat_task() uses the presence of the x32
 		   syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@
 	} else {
 		set_thread_flag(TIF_IA32);
 		clear_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_IA32;
 		current->personality |= force_personality32;
 		/* Prepare the first "return" to user space */
 		current_thread_info()->status |= TS_COMPAT;

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f73b5d4..c6eb418 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -106,7 +107,7 @@
 	preempt_count_dec();
 }
 
-static int __kprobes
+static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
 {
@@ -136,7 +137,38 @@
 	return -1;
 }
 
-static void __kprobes
+static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
+				siginfo_t *info)
+{
+	unsigned long siaddr;
+	int sicode;
+
+	switch (trapnr) {
+	default:
+		return SEND_SIG_PRIV;
+
+	case X86_TRAP_DE:
+		sicode = FPE_INTDIV;
+		siaddr = uprobe_get_trap_addr(regs);
+		break;
+	case X86_TRAP_UD:
+		sicode = ILL_ILLOPN;
+		siaddr = uprobe_get_trap_addr(regs);
+		break;
+	case X86_TRAP_AC:
+		sicode = BUS_ADRALN;
+		siaddr = 0;
+		break;
+	}
+
+	info->si_signo = signr;
+	info->si_errno = 0;
+	info->si_code = sicode;
+	info->si_addr = (void __user *)siaddr;
+	return info;
+}
+
+static void
 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	long error_code, siginfo_t *info)
 {
@@ -168,60 +200,43 @@
 	}
 #endif
 
-	if (info)
-		force_sig_info(signr, info, tsk);
-	else
-		force_sig(signr, tsk);
+	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
+			  unsigned long trapnr, int signr)
+{
+	enum ctx_state prev_state = exception_enter();
+	siginfo_t info;
+
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+			NOTIFY_STOP) {
+		conditional_sti(regs);
+		do_trap(trapnr, signr, str, regs, error_code,
+			fill_trap_info(regs, signr, trapnr, &info));
+	}
+
+	exception_exit(prev_state);
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	enum ctx_state prev_state;					\
-									\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
-	exception_exit(prev_state);					\
+	do_error_trap(regs, error_code, str, trapnr, signr);		\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
-dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
-{									\
-	siginfo_t info;							\
-	enum ctx_state prev_state;					\
-									\
-	info.si_signo = signr;						\
-	info.si_errno = 0;						\
-	info.si_code = sicode;						\
-	info.si_addr = (void __user *)siaddr;				\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, &info);		\
-	exception_exit(prev_state);					\
-}
-
-DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
-DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow					  )
-DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds						  )
-DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op,		     ILL_ILLOPN, regs->ip )
-DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun			  )
-DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS					  )
-DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present				  )
+DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",		divide_error)
+DO_ERROR(X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
+DO_ERROR(X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
+DO_ERROR(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
+DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",coprocessor_segment_overrun)
+DO_ERROR(X86_TRAP_TS,     SIGSEGV, "invalid TSS",		invalid_TSS)
+DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present",	segment_not_present)
 #ifdef CONFIG_X86_32
-DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment					  )
+DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
 #endif
-DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check,	     BUS_ADRALN, 0	  )
+DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
@@ -263,7 +278,7 @@
 }
 #endif
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_general_protection(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk;
@@ -305,13 +320,14 @@
 		pr_cont("\n");
 	}
 
-	force_sig(SIGSEGV, tsk);
+	force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_general_protection);
 
 /* May run on IST stack. */
-dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
 	enum ctx_state prev_state;
 
@@ -327,13 +343,18 @@
 	if (poke_int3_handler(regs))
 		return;
 
-	prev_state = exception_enter();
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 				SIGTRAP) == NOTIFY_STOP)
 		goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
+#ifdef CONFIG_KPROBES
+	if (kprobe_int3_handler(regs))
+		return;
+#endif
+	prev_state = exception_enter();
+
 	if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 			SIGTRAP) == NOTIFY_STOP)
 		goto exit;
@@ -350,6 +371,7 @@
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
@@ -357,7 +379,7 @@
  * for scheduling or signal handling. The actual stack switch is done in
  * entry.S
  */
-asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
 	/* Did already sync */
@@ -376,6 +398,7 @@
 		*regs = *eregs;
 	return regs;
 }
+NOKPROBE_SYMBOL(sync_regs);
 #endif
 
 /*
@@ -402,7 +425,7 @@
  *
  * May run on IST stack.
  */
-dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
+dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
 	enum ctx_state prev_state;
@@ -410,8 +433,6 @@
 	unsigned long dr6;
 	int si_code;
 
-	prev_state = exception_enter();
-
 	get_debugreg(dr6, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
@@ -440,6 +461,12 @@
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
+#ifdef CONFIG_KPROBES
+	if (kprobe_debug_handler(regs))
+		goto exit;
+#endif
+	prev_state = exception_enter();
+
 	if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
 							SIGTRAP) == NOTIFY_STOP)
 		goto exit;
@@ -482,13 +509,14 @@
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_debug);
 
 /*
  * Note that we play around with the 'TS' bit in an attempt to get
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(struct pt_regs *regs, int error_code, int trapnr)
+static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 {
 	struct task_struct *task = current;
 	siginfo_t info;
@@ -518,7 +546,7 @@
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_addr = (void __user *)regs->ip;
+	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
 	if (trapnr == X86_TRAP_MF) {
 		unsigned short cwd, swd;
 		/*
@@ -645,7 +673,7 @@
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
 		drop_init_fpu(tsk);
-		force_sig(SIGSEGV, tsk);
+		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 		return;
 	}
 
@@ -653,7 +681,7 @@
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
 	enum ctx_state prev_state;
@@ -679,6 +707,7 @@
 #endif
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_device_not_available);
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace2291..5d1cbfe 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c

@@ -32,20 +32,20 @@
 
 /* Post-execution fixups. */
 
-/* No fixup needed */
-#define UPROBE_FIX_NONE		0x0
-
 /* Adjust IP back to vicinity of actual insn */
-#define UPROBE_FIX_IP		0x1
+#define UPROBE_FIX_IP		0x01
 
 /* Adjust the return address of a call insn */
-#define UPROBE_FIX_CALL	0x2
+#define UPROBE_FIX_CALL		0x02
 
 /* Instruction will modify TF, don't change it */
-#define UPROBE_FIX_SETF	0x4
+#define UPROBE_FIX_SETF		0x04
 
-#define UPROBE_FIX_RIP_AX	0x8000
-#define UPROBE_FIX_RIP_CX	0x4000
+#define UPROBE_FIX_RIP_SI	0x08
+#define UPROBE_FIX_RIP_DI	0x10
+#define UPROBE_FIX_RIP_BX	0x20
+#define UPROBE_FIX_RIP_MASK	\
+	(UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
 
 #define	UPROBE_TRAP_NR		UINT_MAX
 
@@ -67,6 +67,7 @@
  * to keep gcc from statically optimizing it out, as variable_test_bit makes
  * some versions of gcc to think only *(unsigned long*) is used.
  */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
@@ -89,6 +90,37 @@
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
+#else
+#define good_insns_32	NULL
+#endif
+
+/* Good-instruction tables for 64-bit apps */
+#if defined(CONFIG_X86_64)
+static volatile u32 good_insns_64[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+	/*      ----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
+	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
+	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
+	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+	/*      ----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+};
+#else
+#define good_insns_64	NULL
+#endif
 
 /* Using this for both 64-bit and 32-bit apps */
 static volatile u32 good_2byte_insns[256 / 32] = {
@@ -113,32 +145,6 @@
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
-
-#ifdef CONFIG_X86_64
-/* Good-instruction tables for 64-bit apps */
-static volatile u32 good_insns_64[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-	/*      ----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
-	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
-	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
-	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
-	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
-	/*      ----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-};
-#endif
 #undef W
 
 /*
@@ -209,16 +215,25 @@
 	return false;
 }
 
-static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
+static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
-	insn_init(insn, auprobe->insn, false);
+	u32 volatile *good_insns;
 
-	/* Skip good instruction prefixes; reject "bad" ones. */
-	insn_get_opcode(insn);
+	insn_init(insn, auprobe->insn, x86_64);
+	/* has the side-effect of processing the entire instruction */
+	insn_get_length(insn);
+	if (WARN_ON_ONCE(!insn_complete(insn)))
+		return -ENOEXEC;
+
 	if (is_prefix_bad(insn))
 		return -ENOTSUPP;
 
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
+	if (x86_64)
+		good_insns = good_insns_64;
+	else
+		good_insns = good_insns_32;
+
+	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
 		return 0;
 
 	if (insn->opcode.nbytes == 2) {
@@ -230,14 +245,18 @@
 }
 
 #ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return	!config_enabled(CONFIG_IA32_EMULATION) ||
+		!(mm->context.ia32_compat == TIF_IA32);
+}
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
  * its memory operand indirectly through a scratch register.  Set
- * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
- * accordingly.  (The contents of the scratch register will be saved
- * before we single-step the modified instruction, and restored
- * afterward.)
+ * defparam->fixups accordingly. (The contents of the scratch register
+ * will be saved before we single-step the modified instruction,
+ * and restored afterward).
  *
  * We do this because a rip-relative instruction can access only a
  * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -248,164 +267,192 @@
  *
  * Some useful facts about rip-relative instructions:
  *
- *  - There's always a modrm byte.
+ *  - There's always a modrm byte with bit layout "00 reg 101".
  *  - There's never a SIB byte.
  *  - The displacement is always 4 bytes.
+ *  - REX.B=1 bit in REX prefix, which normally extends r/m field,
+ *    has no effect on rip-relative mode. It doesn't make modrm byte
+ *    with r/m=101 refer to register 1101 = R13.
  */
-static void
-handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 *cursor;
 	u8 reg;
+	u8 reg2;
 
 	if (!insn_rip_relative(insn))
 		return;
 
 	/*
-	 * insn_rip_relative() would have decoded rex_prefix, modrm.
+	 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
 	 * Clear REX.b bit (extension of MODRM.rm field):
-	 * we want to encode rax/rcx, not r8/r9.
+	 * we want to encode low numbered reg, not r8+.
 	 */
 	if (insn->rex_prefix.nbytes) {
 		cursor = auprobe->insn + insn_offset_rex_prefix(insn);
-		*cursor &= 0xfe;	/* Clearing REX.B bit */
+		/* REX byte has 0100wrxb layout, clearing REX.b bit */
+		*cursor &= 0xfe;
+	}
+	/*
+	 * Similar treatment for VEX3 prefix.
+	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 */
+	if (insn->vex_prefix.nbytes == 3) {
+		/*
+		 * vex2:     c5    rvvvvLpp   (has no b bit)
+		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+		 *   (evex will need setting of both b and x since
+		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+		 * Setting VEX3.b (setting because it has inverted meaning):
+		 */
+		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+		*cursor |= 0x20;
 	}
 
 	/*
+	 * Convert from rip-relative addressing to register-relative addressing
+	 * via a scratch register.
+	 *
+	 * This is tricky since there are insns with modrm byte
+	 * which also use registers not encoded in modrm byte:
+	 * [i]div/[i]mul: implicitly use dx:ax
+	 * shift ops: implicitly use cx
+	 * cmpxchg: implicitly uses ax
+	 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
+	 *   Encoding: 0f c7/1 modrm
+	 *   The code below thinks that reg=1 (cx), chooses si as scratch.
+	 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
+	 *   First appeared in Haswell (BMI2 insn). It is vex-encoded.
+	 *   Example where none of bx,cx,dx can be used as scratch reg:
+	 *   c4 e2 63 f6 0d disp32   mulx disp32(%rip),%ebx,%ecx
+	 * [v]pcmpistri: implicitly uses cx, xmm0
+	 * [v]pcmpistrm: implicitly uses xmm0
+	 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
+	 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
+	 *   Evil SSE4.2 string comparison ops from hell.
+	 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
+	 *   Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
+	 *   Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
+	 *   AMD says it has no 3-operand form (vex.vvvv must be 1111)
+	 *   and that it can have only register operands, not mem
+	 *   (its modrm byte must have mode=11).
+	 *   If these restrictions will ever be lifted,
+	 *   we'll need code to prevent selection of di as scratch reg!
+	 *
+	 * Summary: I don't know any insns with modrm byte which
+	 * use SI register implicitly. DI register is used only
+	 * by one insn (maskmovq) and BX register is used
+	 * only by one too (cmpxchg8b).
+	 * BP is stack-segment based (may be a problem?).
+	 * AX, DX, CX are off-limits (many implicit users).
+	 * SP is unusable (it's stack pointer - think about "pop mem";
+	 * also, rsp+disp32 needs sib encoding -> insn length change).
+	 */
+
+	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
+	reg2 = 0xff;		/* Fetch vex.vvvv */
+	if (insn->vex_prefix.nbytes == 2)
+		reg2 = insn->vex_prefix.bytes[1];
+	else if (insn->vex_prefix.nbytes == 3)
+		reg2 = insn->vex_prefix.bytes[2];
+	/*
+	 * TODO: add XOP, EXEV vvvv reading.
+	 *
+	 * vex.vvvv field is in bits 6-3, bits are inverted.
+	 * But in 32-bit mode, high-order bit may be ignored.
+	 * Therefore, let's consider only 3 low-order bits.
+	 */
+	reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
+	/*
+	 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
+	 *
+	 * Choose scratch reg. Order is important: must not select bx
+	 * if we can use si (cmpxchg8b case!)
+	 */
+	if (reg != 6 && reg2 != 6) {
+		reg2 = 6;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI;
+	} else if (reg != 7 && reg2 != 7) {
+		reg2 = 7;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI;
+		/* TODO (paranoia): force maskmovq to not use di */
+	} else {
+		reg2 = 3;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX;
+	}
+	/*
 	 * Point cursor at the modrm byte.  The next 4 bytes are the
 	 * displacement.  Beyond the displacement, for some instructions,
 	 * is the immediate operand.
 	 */
 	cursor = auprobe->insn + insn_offset_modrm(insn);
-	insn_get_length(insn);
-
 	/*
-	 * Convert from rip-relative addressing to indirect addressing
-	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
-	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+	 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
+	 * 89 05 disp32  mov %eax,disp32(%rip) becomes
+	 * 89 86 disp32  mov %eax,disp32(%rsi)
 	 */
-	reg = MODRM_REG(insn);
-	if (reg == 0) {
-		/*
-		 * The register operand (if any) is either the A register
-		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
-		 * REX prefix) %r8.  In any case, we know the C register
-		 * is NOT the register operand, so we use %rcx (register
-		 * #1) for the scratch register.
-		 */
-		auprobe->fixups = UPROBE_FIX_RIP_CX;
-		/* Change modrm from 00 000 101 to 00 000 001. */
-		*cursor = 0x1;
-	} else {
-		/* Use %rax (register #0) for the scratch register. */
-		auprobe->fixups = UPROBE_FIX_RIP_AX;
-		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
-		*cursor = (reg << 3);
-	}
+	*cursor = 0x80 | (reg << 3) | reg2;
+}
 
-	/* Target address = address of next instruction + (signed) offset */
-	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
-
-	/* Displacement field is gone; slide immediate field (if any) over. */
-	if (insn->immediate.nbytes) {
-		cursor++;
-		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
-	}
+static inline unsigned long *
+scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI)
+		return &regs->si;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI)
+		return &regs->di;
+	return &regs->bx;
 }
 
 /*
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
-		autask->saved_scratch_register = regs->ax;
-		regs->ax = current->utask->vaddr;
-		regs->ax += auprobe->rip_rela_target_address;
-	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
-		autask->saved_scratch_register = regs->cx;
-		regs->cx = current->utask->vaddr;
-		regs->cx += auprobe->rip_rela_target_address;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
+
+		utask->autask.saved_scratch_register = *sr;
+		*sr = utask->vaddr + auprobe->defparam.ilen;
 	}
 }
 
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
-		struct arch_uprobe_task *autask;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
 
-		autask = &current->utask->autask;
-		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
-			regs->ax = autask->saved_scratch_register;
-		else
-			regs->cx = autask->saved_scratch_register;
-
-		/*
-		 * The original instruction includes a displacement, and so
-		 * is 4 bytes longer than what we've just single-stepped.
-		 * Caller may need to apply other fixups to handle stuff
-		 * like "jmpq *...(%rip)" and "callq *...(%rip)".
-		 */
-		if (correction)
-			*correction += 4;
+		*sr = utask->autask.saved_scratch_register;
 	}
 }
-
-static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
-{
-	insn_init(insn, auprobe->insn, true);
-
-	/* Skip good instruction prefixes; reject "bad" ones. */
-	insn_get_opcode(insn);
-	if (is_prefix_bad(insn))
-		return -ENOTSUPP;
-
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
-		return 0;
-
-	if (insn->opcode.nbytes == 2) {
-		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
-			return 0;
-	}
-	return -ENOTSUPP;
-}
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
-{
-	if (mm->context.ia32_compat)
-		return validate_insn_32bits(auprobe, insn);
-	return validate_insn_64bits(auprobe, insn);
-}
 #else /* 32-bit: */
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return false;
+}
 /*
  * No RIP-relative addressing on 32-bit
  */
-static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 }
-static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-					long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
-{
-	return validate_insn_32bits(auprobe, insn);
-}
 #endif /* CONFIG_X86_64 */
 
 struct uprobe_xol_ops {
 	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *);
 	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *);
 	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *);
+	void	(*abort)(struct arch_uprobe *, struct pt_regs *);
 };
 
 static inline int sizeof_long(void)
@@ -415,50 +462,67 @@
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
+	riprel_pre_xol(auprobe, regs);
+	return 0;
+}
+
+static int push_ret_address(struct pt_regs *regs, unsigned long ip)
+{
+	unsigned long new_sp = regs->sp - sizeof_long();
+
+	if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
+		return -EFAULT;
+
+	regs->sp = new_sp;
 	return 0;
 }
 
 /*
- * Adjust the return address pushed by a call insn executed out of line.
+ * We have to fix things up as follows:
+ *
+ * Typically, the new ip is relative to the copied instruction.  We need
+ * to make it relative to the original instruction (FIX_IP).  Exceptions
+ * are return instructions and absolute or indirect jump or call instructions.
+ *
+ * If the single-stepped instruction was a call, the return address that
+ * is atop the stack is the address following the copied instruction.  We
+ * need to make it the address following the original instruction (FIX_CALL).
+ *
+ * If the original instruction was a rip-relative instruction such as
+ * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
+ * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
+ * We need to restore the contents of the scratch register
+ * (FIX_RIP_reg).
  */
-static int adjust_ret_addr(unsigned long sp, long correction)
+static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	int rasize = sizeof_long();
-	long ra;
+	struct uprobe_task *utask = current->utask;
 
-	if (copy_from_user(&ra, (void __user *)sp, rasize))
-		return -EFAULT;
-
-	ra += correction;
-	if (copy_to_user((void __user *)sp, &ra, rasize))
-		return -EFAULT;
+	riprel_post_xol(auprobe, regs);
+	if (auprobe->defparam.fixups & UPROBE_FIX_IP) {
+		long correction = utask->vaddr - utask->xol_vaddr;
+		regs->ip += correction;
+	} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
+		regs->sp += sizeof_long(); /* Pop incorrect return address */
+		if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
+			return -ERESTART;
+	}
+	/* popf; tell the caller to not touch TF */
+	if (auprobe->defparam.fixups & UPROBE_FIX_SETF)
+		utask->autask.saved_tf = true;
 
 	return 0;
 }
 
-static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	struct uprobe_task *utask = current->utask;
-	long correction = (long)(utask->vaddr - utask->xol_vaddr);
-
-	handle_riprel_post_xol(auprobe, regs, &correction);
-	if (auprobe->fixups & UPROBE_FIX_IP)
-		regs->ip += correction;
-
-	if (auprobe->fixups & UPROBE_FIX_CALL) {
-		if (adjust_ret_addr(regs->sp, correction)) {
-			regs->sp += sizeof_long();
-			return -ERESTART;
-		}
-	}
-
-	return 0;
+	riprel_post_xol(auprobe, regs);
 }
 
 static struct uprobe_xol_ops default_xol_ops = {
 	.pre_xol  = default_pre_xol_op,
 	.post_xol = default_post_xol_op,
+	.abort	  = default_abort_op,
 };
 
 static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +584,6 @@
 	unsigned long offs = (long)auprobe->branch.offs;
 
 	if (branch_is_call(auprobe)) {
-		unsigned long new_sp = regs->sp - sizeof_long();
 		/*
 		 * If it fails we execute this (mangled, see the comment in
 		 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +593,8 @@
 		 *
 		 * But there is corner case, see the comment in ->post_xol().
 		 */
-		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
+		if (push_ret_address(regs, new_ip))
 			return false;
-		regs->sp = new_sp;
 	} else if (!check_jmp_cond(auprobe, regs)) {
 		offs = 0;
 	}
@@ -583,11 +645,7 @@
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
-
-	/* has the side-effect of processing the entire instruction */
-	insn_get_length(insn);
-	if (WARN_ON_ONCE(!insn_complete(insn)))
-		return -ENOEXEC;
+	int i;
 
 	switch (opc1) {
 	case 0xeb:	/* jmp 8 */
@@ -612,6 +670,16 @@
 			return -ENOSYS;
 	}
 
+	/*
+	 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
+	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
+	 * No one uses these insns, reject any branch insns with such prefix.
+	 */
+	for (i = 0; i < insn->prefixes.nbytes; i++) {
+		if (insn->prefixes.bytes[i] == 0x66)
+			return -ENOTSUPP;
+	}
+
 	auprobe->branch.opc1 = opc1;
 	auprobe->branch.ilen = insn->length;
 	auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +698,10 @@
 int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
 	struct insn insn;
-	bool fix_ip = true, fix_call = false;
+	u8 fix_ip_or_call = UPROBE_FIX_IP;
 	int ret;
 
-	ret = validate_insn_bits(auprobe, mm, &insn);
+	ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
 	if (ret)
 		return ret;
 
@@ -642,44 +710,39 @@
 		return ret;
 
 	/*
-	 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
-	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
-	 * is either zero or it reflects rip-related fixups.
+	 * Figure out which fixups default_post_xol_op() will need to perform,
+	 * and annotate defparam->fixups accordingly.
 	 */
 	switch (OPCODE1(&insn)) {
 	case 0x9d:		/* popf */
-		auprobe->fixups |= UPROBE_FIX_SETF;
+		auprobe->defparam.fixups |= UPROBE_FIX_SETF;
 		break;
 	case 0xc3:		/* ret or lret -- ip is correct */
 	case 0xcb:
 	case 0xc2:
 	case 0xca:
-		fix_ip = false;
+	case 0xea:		/* jmp absolute -- ip is correct */
+		fix_ip_or_call = 0;
 		break;
 	case 0x9a:		/* call absolute - Fix return addr, not ip */
-		fix_call = true;
-		fix_ip = false;
-		break;
-	case 0xea:		/* jmp absolute -- ip is correct */
-		fix_ip = false;
+		fix_ip_or_call = UPROBE_FIX_CALL;
 		break;
 	case 0xff:
-		insn_get_modrm(&insn);
 		switch (MODRM_REG(&insn)) {
 		case 2: case 3:			/* call or lcall, indirect */
-			fix_call = true;
+			fix_ip_or_call = UPROBE_FIX_CALL;
+			break;
 		case 4: case 5:			/* jmp or ljmp, indirect */
-			fix_ip = false;
+			fix_ip_or_call = 0;
+			break;
 		}
 		/* fall through */
 	default:
-		handle_riprel_insn(auprobe, &insn);
+		riprel_analyze(auprobe, &insn);
 	}
 
-	if (fix_ip)
-		auprobe->fixups |= UPROBE_FIX_IP;
-	if (fix_call)
-		auprobe->fixups |= UPROBE_FIX_CALL;
+	auprobe->defparam.ilen = insn.length;
+	auprobe->defparam.fixups |= fix_ip_or_call;
 
 	auprobe->ops = &default_xol_ops;
 	return 0;
@@ -694,6 +757,12 @@
 {
 	struct uprobe_task *utask = current->utask;
 
+	if (auprobe->ops->pre_xol) {
+		int err = auprobe->ops->pre_xol(auprobe, regs);
+		if (err)
+			return err;
+	}
+
 	regs->ip = utask->xol_vaddr;
 	utask->autask.saved_trap_nr = current->thread.trap_nr;
 	current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +772,6 @@
 	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
 		set_task_blockstep(current, false);
 
-	if (auprobe->ops->pre_xol)
-		return auprobe->ops->pre_xol(auprobe, regs);
 	return 0;
 }
 
@@ -732,56 +799,42 @@
  * single-step, we single-stepped a copy of the instruction.
  *
  * This function prepares to resume execution after the single-step.
- * We have to fix things up as follows:
- *
- * Typically, the new ip is relative to the copied instruction.  We need
- * to make it relative to the original instruction (FIX_IP).  Exceptions
- * are return instructions and absolute or indirect jump or call instructions.
- *
- * If the single-stepped instruction was a call, the return address that
- * is atop the stack is the address following the copied instruction.  We
- * need to make it the address following the original instruction (FIX_CALL).
- *
- * If the original instruction was a rip-relative instruction such as
- * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
- * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
- * We need to restore the contents of the scratch register and adjust
- * the ip, keeping in mind that the instruction we executed is 4 bytes
- * shorter than the original instruction (since we squeezed out the offset
- * field).  (FIX_RIP_AX or FIX_RIP_CX)
  */
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
+	bool send_sigtrap = utask->autask.saved_tf;
+	int err = 0;
 
 	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
 
 	if (auprobe->ops->post_xol) {
-		int err = auprobe->ops->post_xol(auprobe, regs);
+		err = auprobe->ops->post_xol(auprobe, regs);
 		if (err) {
-			arch_uprobe_abort_xol(auprobe, regs);
 			/*
-			 * Restart the probed insn. ->post_xol() must ensure
-			 * this is really possible if it returns -ERESTART.
+			 * Restore ->ip for restart or post mortem analysis.
+			 * ->post_xol() must not return -ERESTART unless this
+			 * is really possible.
 			 */
+			regs->ip = utask->vaddr;
 			if (err == -ERESTART)
-				return 0;
-			return err;
+				err = 0;
+			send_sigtrap = false;
 		}
 	}
-
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
 	/*
 	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
 	 * so we can get an extra SIGTRAP if we do not clear TF. We need
 	 * to examine the opcode to make it right.
 	 */
-	if (utask->autask.saved_tf)
+	if (send_sigtrap)
 		send_sig(SIGTRAP, current, 0);
-	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+
+	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;
 
-	return 0;
+	return err;
 }
 
 /* callback routine for handling exceptions. */
@@ -815,18 +868,18 @@
 
 /*
  * This function gets called when XOL instruction either gets trapped or
- * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
- * Reset the instruction pointer to its probed address for the potential
- * restart or for post mortem analysis.
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
  */
 void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
 
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
-	handle_riprel_post_xol(auprobe, regs, NULL);
-	instruction_pointer_set(regs, utask->vaddr);
+	if (auprobe->ops->abort)
+		auprobe->ops->abort(auprobe, regs);
 
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
+	regs->ip = utask->vaddr;
 	/* clear TF if it was set by us in arch_uprobe_pre_xol() */
 	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;

diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 2930ae0..28f85c91 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S

@@ -4,8 +4,8 @@
  *  (inspired by Andi Kleen's thunk_64.S)
  * Subject to the GNU public license, v.2. No warranty of any kind.
  */
-
 	#include <linux/linkage.h>
+	#include <asm/asm.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 	/* put return address in eax (arg1) */
@@ -22,6 +22,7 @@
 	popl %ecx
 	popl %eax
 	ret
+	_ASM_NOKPROBE(\name)
 	.endm
 
 	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index a63efd6..92d9fea 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S

@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/calling.h>
+#include <asm/asm.h>
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -25,6 +26,7 @@
 	call \func
 	jmp  restore
 	CFI_ENDPROC
+	_ASM_NOKPROBE(\name)
 	.endm
 
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -43,3 +45,4 @@
 	RESTORE_ARGS
 	ret
 	CFI_ENDPROC
+	_ASM_NOKPROBE(restore)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 858b47b..3664279 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c

@@ -8,7 +8,7 @@
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/module.h>		/* search_exception_table	*/
 #include <linux/bootmem.h>		/* max_low_pfn			*/
-#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
@@ -46,7 +46,7 @@
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int __kprobes
+static nokprobe_inline int
 kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
 	if (unlikely(is_kmmio_active()))
@@ -55,7 +55,7 @@
 	return 0;
 }
 
-static inline int __kprobes kprobes_fault(struct pt_regs *regs)
+static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
 {
 	int ret = 0;
 
@@ -262,7 +262,7 @@
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
 	unsigned long pgd_paddr;
 	pmd_t *pmd_k;
@@ -292,6 +292,7 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 /*
  * Did it hit the DOS screen memory VA from vm86 mode?
@@ -359,7 +360,7 @@
  *
  * This assumes no large pages in there.
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
 	pgd_t *pgd, *pgd_ref;
 	pud_t *pud, *pud_ref;
@@ -426,6 +427,7 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 #ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
@@ -928,7 +930,7 @@
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline __kprobes int
+static noinline int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
 	pgd_t *pgd;
@@ -976,6 +978,7 @@
 
 	return ret;
 }
+NOKPROBE_SYMBOL(spurious_fault);
 
 int show_unhandled_signals = 1;
 
@@ -1031,7 +1034,7 @@
  * {,trace_}do_page_fault() have notrace on. Having this an actual function
  * guarantees there's a function trace entry.
  */
-static void __kprobes noinline
+static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address)
 {
@@ -1254,8 +1257,9 @@
 
 	up_read(&mm->mmap_sem);
 }
+NOKPROBE_SYMBOL(__do_page_fault);
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	unsigned long address = read_cr2(); /* Get the faulting address */
@@ -1273,10 +1277,12 @@
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_page_fault);
 
 #ifdef CONFIG_TRACING
-static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-				     unsigned long error_code)
+static nokprobe_inline void
+trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+			 unsigned long error_code)
 {
 	if (user_mode(regs))
 		trace_page_fault_user(address, regs, error_code);
@@ -1284,7 +1290,7 @@
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	/*
@@ -1301,4 +1307,5 @@
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(trace_do_page_fault);
 #endif /* CONFIG_TRACING */

diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 0149575..6440221 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S

@@ -12,13 +12,16 @@
 
 /*
  * Calling convention :
- * rdi : skb pointer
+ * rbx : skb pointer (callee saved)
  * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r8  : copy of skb->data
+ * r10 : copy of skb->data
  * r9d : hlen = skb->len - skb->data_len
  */
-#define SKBDATA	%r8
+#define SKBDATA	%r10
 #define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
+#define MAX_BPF_STACK (512 /* from filter.h */ + \
+	32 /* space for rbx,r13,r14,r15 */ + \
+	8 /* space for skb_copy_bits */)
 
 sk_load_word:
 	.globl	sk_load_word
@@ -68,53 +71,31 @@
 	movzbl	(SKBDATA,%rsi),%eax
 	ret
 
-/**
- * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
- *
- * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
- * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value
- */
-sk_load_byte_msh:
-	.globl	sk_load_byte_msh
-	test	%esi,%esi
-	js	bpf_slow_path_byte_msh_neg
-
-sk_load_byte_msh_positive_offset:
-	.globl	sk_load_byte_msh_positive_offset
-	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
-	jle	bpf_slow_path_byte_msh
-	movzbl	(SKBDATA,%rsi),%ebx
-	and	$15,%bl
-	shl	$2,%bl
-	ret
-
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
-	push	%rdi;    /* save skb */		\
+	mov	%rbx, %rdi; /* arg1 == skb */	\
 	push	%r9;				\
 	push	SKBDATA;			\
 /* rsi already has offset */			\
 	mov	$LEN,%ecx;	/* len */	\
-	lea	-12(%rbp),%rdx;			\
+	lea	- MAX_BPF_STACK + 32(%rbp),%rdx;			\
 	call	skb_copy_bits;			\
 	test    %eax,%eax;			\
 	pop	SKBDATA;			\
-	pop	%r9;				\
-	pop	%rdi
+	pop	%r9;
 
 
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	js	bpf_error
-	mov	-12(%rbp),%eax
+	mov	- MAX_BPF_STACK + 32(%rbp),%eax
 	bswap	%eax
 	ret
 
 bpf_slow_path_half:
 	bpf_slow_path_common(2)
 	js	bpf_error
-	mov	-12(%rbp),%ax
+	mov	- MAX_BPF_STACK + 32(%rbp),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
 	ret
@@ -122,21 +103,11 @@
 bpf_slow_path_byte:
 	bpf_slow_path_common(1)
 	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	ret
-
-bpf_slow_path_byte_msh:
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	bpf_slow_path_common(1)
-	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
+	movzbl	- MAX_BPF_STACK + 32(%rbp),%eax
 	ret
 
 #define sk_negative_common(SIZE)				\
-	push	%rdi;	/* save skb */				\
+	mov	%rbx, %rdi; /* arg1 == skb */			\
 	push	%r9;						\
 	push	SKBDATA;					\
 /* rsi already has offset */					\
@@ -145,10 +116,8 @@
 	test	%rax,%rax;					\
 	pop	SKBDATA;					\
 	pop	%r9;						\
-	pop	%rdi;						\
 	jz	bpf_error
 
-
 bpf_slow_path_word_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
 	jl	bpf_error	/* offset lower -> error  */
@@ -179,22 +148,12 @@
 	movzbl	(%rax), %eax
 	ret
 
-bpf_slow_path_byte_msh_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi
-	jl	bpf_error
-sk_load_byte_msh_negative_offset:
-	.globl	sk_load_byte_msh_negative_offset
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	sk_negative_common(1)
-	movzbl	(%rax),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
-	ret
-
 bpf_error:
 # force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
+	xor	%eax,%eax
+	mov	- MAX_BPF_STACK(%rbp),%rbx
+	mov	- MAX_BPF_STACK + 8(%rbp),%r13
+	mov	- MAX_BPF_STACK + 16(%rbp),%r14
+	mov	- MAX_BPF_STACK + 24(%rbp),%r15
 	leaveq
 	ret

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 6d5663a..99bef86 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c

@@ -1,6 +1,7 @@
 /* bpf_jit_comp.c : BPF JIT compiler
  *
  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
+ * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -14,28 +15,16 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 
-/*
- * Conventions :
- *  EAX : BPF A accumulator
- *  EBX : BPF X accumulator
- *  RDI : pointer to skb   (first argument given to JIT function)
- *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
- *  ECX,EDX,ESI : scratch registers
- *  r9d : skb->len - skb->data_len (headlen)
- *  r8  : skb->data
- * -8(RBP) : saved RBX value
- * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
- */
 int bpf_jit_enable __read_mostly;
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_byte_positive_offset[];
 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
+extern u8 sk_load_byte_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -56,30 +45,44 @@
 #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
 #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
-#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
-
-#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
-#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+#define EMIT1_off32(b1, off) \
+	do {EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+	do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+	do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+	do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
 
 static inline bool is_imm8(int value)
 {
 	return value <= 127 && value >= -128;
 }
 
-static inline bool is_near(int offset)
+static inline bool is_simm32(s64 value)
 {
-	return offset <= 127 && offset >= -128;
+	return value == (s64) (s32) value;
 }
 
-#define EMIT_JMP(offset)						\
-do {									\
-	if (offset) {							\
-		if (is_near(offset))					\
-			EMIT2(0xeb, offset); /* jmp .+off8 */		\
-		else							\
-			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
-	}								\
-} while (0)
+/* mov dst, src */
+#define EMIT_mov(DST, SRC) \
+	do {if (DST != SRC) \
+		EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
+	} while (0)
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+	if (bpf_size == BPF_W)
+		return 4;
+	else if (bpf_size == BPF_H)
+		return 2;
+	else if (bpf_size == BPF_B)
+		return 1;
+	else if (bpf_size == BPF_DW)
+		return 4; /* imm32 */
+	else
+		return 0;
+}
 
 /* list of x86 cond jumps opcodes (. + s8)
  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
@@ -90,27 +93,8 @@
 #define X86_JNE 0x75
 #define X86_JBE 0x76
 #define X86_JA  0x77
-
-#define EMIT_COND_JMP(op, offset)				\
-do {								\
-	if (is_near(offset))					\
-		EMIT2(op, offset); /* jxx .+off8 */		\
-	else {							\
-		EMIT2(0x0f, op + 0x10);				\
-		EMIT(offset, 4); /* jxx .+off32 */		\
-	}							\
-} while (0)
-
-#define COND_SEL(CODE, TOP, FOP)	\
-	case CODE:			\
-		t_op = TOP;		\
-		f_op = FOP;		\
-		goto cond_branch
-
-
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG    2 /* ebx is used */
-#define SEEN_MEM     4 /* use mem[] for temporary storage */
+#define X86_JGE 0x7D
+#define X86_JG  0x7F
 
 static inline void bpf_flush_icache(void *start, void *end)
 {
@@ -125,26 +109,6 @@
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-/* Helper to find the offset of pkt_type in sk_buff
- * We want to make sure its still a 3bit field starting at a byte boundary.
- */
-#define PKT_TYPE_MAX 7
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 struct bpf_binary_header {
 	unsigned int	pages;
 	/* Note : for security reasons, bpf code will follow a randomly
@@ -178,583 +142,771 @@
 	return header;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+/* pick a register outside of BPF range for JIT internal work */
+#define AUX_REG (MAX_BPF_REG + 1)
+
+/* the following table maps BPF registers to x64 registers.
+ * x64 register r12 is unused, since if used as base address register
+ * in load/store instructions, it always needs an extra byte of encoding
+ */
+static const int reg2hex[] = {
+	[BPF_REG_0] = 0,  /* rax */
+	[BPF_REG_1] = 7,  /* rdi */
+	[BPF_REG_2] = 6,  /* rsi */
+	[BPF_REG_3] = 2,  /* rdx */
+	[BPF_REG_4] = 1,  /* rcx */
+	[BPF_REG_5] = 0,  /* r8 */
+	[BPF_REG_6] = 3,  /* rbx callee saved */
+	[BPF_REG_7] = 5,  /* r13 callee saved */
+	[BPF_REG_8] = 6,  /* r14 callee saved */
+	[BPF_REG_9] = 7,  /* r15 callee saved */
+	[BPF_REG_FP] = 5, /* rbp readonly */
+	[AUX_REG] = 3,    /* r11 temp register */
+};
+
+/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+ * which need extra byte of encoding.
+ * rax,rcx,...,rbp have simpler encoding
+ */
+static inline bool is_ereg(u32 reg)
 {
-	u8 temp[64];
-	u8 *prog;
-	unsigned int proglen, oldproglen = 0;
-	int ilen, i;
-	int t_offset, f_offset;
-	u8 t_op, f_op, seen = 0, pass;
-	u8 *image = NULL;
-	struct bpf_binary_header *header = NULL;
-	u8 *func;
-	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+	if (reg == BPF_REG_5 || reg == AUX_REG ||
+	    (reg >= BPF_REG_7 && reg <= BPF_REG_9))
+		return true;
+	else
+		return false;
+}
+
+/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+static inline u8 add_1mod(u8 byte, u32 reg)
+{
+	if (is_ereg(reg))
+		byte |= 1;
+	return byte;
+}
+
+static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
+{
+	if (is_ereg(r1))
+		byte |= 1;
+	if (is_ereg(r2))
+		byte |= 4;
+	return byte;
+}
+
+/* encode 'dst_reg' register into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 dst_reg)
+{
+	return byte + reg2hex[dst_reg];
+}
+
+/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
+{
+	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
+}
+
+struct jit_context {
 	unsigned int cleanup_addr; /* epilogue code offset */
-	unsigned int *addrs;
-	const struct sock_filter *filter = fp->insns;
-	int flen = fp->len;
+	bool seen_ld_abs;
+};
+
+static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+		  int oldproglen, struct jit_context *ctx)
+{
+	struct sock_filter_int *insn = bpf_prog->insnsi;
+	int insn_cnt = bpf_prog->len;
+	u8 temp[64];
+	int i;
+	int proglen = 0;
+	u8 *prog = temp;
+	int stacksize = MAX_BPF_STACK +
+		32 /* space for rbx, r13, r14, r15 */ +
+		8 /* space for skb_copy_bits() buffer */;
+
+	EMIT1(0x55); /* push rbp */
+	EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
+
+	/* sub rsp, stacksize */
+	EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
+
+	/* all classic BPF filters use R6(rbx) save it */
+
+	/* mov qword ptr [rbp-X],rbx */
+	EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
+
+	/* sk_convert_filter() maps classic BPF register X to R7 and uses R8
+	 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
+	 * R8(r14). R9(r15) spill could be made conditional, but there is only
+	 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
+	 * The overhead of extra spill is negligible for any filter other
+	 * than synthetic ones. Therefore not worth adding complexity.
+	 */
+
+	/* mov qword ptr [rbp-X],r13 */
+	EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
+	/* mov qword ptr [rbp-X],r14 */
+	EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
+	/* mov qword ptr [rbp-X],r15 */
+	EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
+
+	/* clear A and X registers */
+	EMIT2(0x31, 0xc0); /* xor eax, eax */
+	EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+
+	if (ctx->seen_ld_abs) {
+		/* r9d : skb->len - skb->data_len (headlen)
+		 * r10 : skb->data
+		 */
+		if (is_imm8(offsetof(struct sk_buff, len)))
+			/* mov %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x8b, 0x4f,
+			      offsetof(struct sk_buff, len));
+		else
+			/* mov %r9d, off32(%rdi) */
+			EMIT3_off32(0x44, 0x8b, 0x8f,
+				    offsetof(struct sk_buff, len));
+
+		if (is_imm8(offsetof(struct sk_buff, data_len)))
+			/* sub %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x2b, 0x4f,
+			      offsetof(struct sk_buff, data_len));
+		else
+			EMIT3_off32(0x44, 0x2b, 0x8f,
+				    offsetof(struct sk_buff, data_len));
+
+		if (is_imm8(offsetof(struct sk_buff, data)))
+			/* mov %r10, off8(%rdi) */
+			EMIT4(0x4c, 0x8b, 0x57,
+			      offsetof(struct sk_buff, data));
+		else
+			/* mov %r10, off32(%rdi) */
+			EMIT3_off32(0x4c, 0x8b, 0x97,
+				    offsetof(struct sk_buff, data));
+	}
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		const s32 imm32 = insn->imm;
+		u32 dst_reg = insn->dst_reg;
+		u32 src_reg = insn->src_reg;
+		u8 b1 = 0, b2 = 0, b3 = 0;
+		s64 jmp_offset;
+		u8 jmp_cond;
+		int ilen;
+		u8 *func;
+
+		switch (insn->code) {
+			/* ALU */
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU64 | BPF_ADD | BPF_X:
+		case BPF_ALU64 | BPF_SUB | BPF_X:
+		case BPF_ALU64 | BPF_AND | BPF_X:
+		case BPF_ALU64 | BPF_OR | BPF_X:
+		case BPF_ALU64 | BPF_XOR | BPF_X:
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b2 = 0x01; break;
+			case BPF_SUB: b2 = 0x29; break;
+			case BPF_AND: b2 = 0x21; break;
+			case BPF_OR: b2 = 0x09; break;
+			case BPF_XOR: b2 = 0x31; break;
+			}
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, dst_reg, src_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
+			break;
+
+			/* mov dst, src */
+		case BPF_ALU64 | BPF_MOV | BPF_X:
+			EMIT_mov(dst_reg, src_reg);
+			break;
+
+			/* mov32 dst, src */
+		case BPF_ALU | BPF_MOV | BPF_X:
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+			break;
+
+			/* neg dst */
+		case BPF_ALU | BPF_NEG:
+		case BPF_ALU64 | BPF_NEG:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT2(0xF7, add_1reg(0xD8, dst_reg));
+			break;
+
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU64 | BPF_ADD | BPF_K:
+		case BPF_ALU64 | BPF_SUB | BPF_K:
+		case BPF_ALU64 | BPF_AND | BPF_K:
+		case BPF_ALU64 | BPF_OR | BPF_K:
+		case BPF_ALU64 | BPF_XOR | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b3 = 0xC0; break;
+			case BPF_SUB: b3 = 0xE8; break;
+			case BPF_AND: b3 = 0xE0; break;
+			case BPF_OR: b3 = 0xC8; break;
+			case BPF_XOR: b3 = 0xF0; break;
+			}
+
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
+			else
+				EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
+			break;
+
+		case BPF_ALU64 | BPF_MOV | BPF_K:
+			/* optimization: if imm32 is positive,
+			 * use 'mov eax, imm32' (which zero-extends imm32)
+			 * to save 2 bytes
+			 */
+			if (imm32 < 0) {
+				/* 'mov rax, imm32' sign extends imm32 */
+				b1 = add_1mod(0x48, dst_reg);
+				b2 = 0xC7;
+				b3 = 0xC0;
+				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+				break;
+			}
+
+		case BPF_ALU | BPF_MOV | BPF_K:
+			/* mov %eax, imm32 */
+			if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+			break;
+
+			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
+		case BPF_ALU | BPF_MOD | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_MOD | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU64 | BPF_MOD | BPF_X:
+		case BPF_ALU64 | BPF_DIV | BPF_X:
+		case BPF_ALU64 | BPF_MOD | BPF_K:
+		case BPF_ALU64 | BPF_DIV | BPF_K:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov r11, src_reg */
+				EMIT_mov(AUX_REG, src_reg);
+			else
+				/* mov r11, imm32 */
+				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
+
+			/* mov rax, dst_reg */
+			EMIT_mov(BPF_REG_0, dst_reg);
+
+			/* xor edx, edx
+			 * equivalent to 'xor rdx, rdx', but one byte less
+			 */
+			EMIT2(0x31, 0xd2);
+
+			if (BPF_SRC(insn->code) == BPF_X) {
+				/* if (src_reg == 0) return 0 */
+
+				/* cmp r11, 0 */
+				EMIT4(0x49, 0x83, 0xFB, 0x00);
+
+				/* jne .+9 (skip over pop, pop, xor and jmp) */
+				EMIT2(X86_JNE, 1 + 1 + 2 + 5);
+				EMIT1(0x5A); /* pop rdx */
+				EMIT1(0x58); /* pop rax */
+				EMIT2(0x31, 0xc0); /* xor eax, eax */
+
+				/* jmp cleanup_addr
+				 * addrs[i] - 11, because there are 11 bytes
+				 * after this insn: div, mov, pop, pop, mov
+				 */
+				jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
+				EMIT1_off32(0xE9, jmp_offset);
+			}
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				/* div r11 */
+				EMIT3(0x49, 0xF7, 0xF3);
+			else
+				/* div r11d */
+				EMIT3(0x41, 0xF7, 0xF3);
+
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* mov r11, rdx */
+				EMIT3(0x49, 0x89, 0xD3);
+			else
+				/* mov r11, rax */
+				EMIT3(0x49, 0x89, 0xC3);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
+			break;
+
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU64 | BPF_MUL | BPF_K:
+		case BPF_ALU64 | BPF_MUL | BPF_X:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			/* mov r11, dst_reg */
+			EMIT_mov(AUX_REG, dst_reg);
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov rax, src_reg */
+				EMIT_mov(BPF_REG_0, src_reg);
+			else
+				/* mov rax, imm32 */
+				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, AUX_REG));
+			else if (is_ereg(AUX_REG))
+				EMIT1(add_1mod(0x40, AUX_REG));
+			/* mul(q) r11 */
+			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
+
+			/* mov r11, rax */
+			EMIT_mov(AUX_REG, BPF_REG_0);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
+			break;
+
+			/* shifts */
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_ARSH | BPF_K:
+		case BPF_ALU64 | BPF_LSH | BPF_K:
+		case BPF_ALU64 | BPF_RSH | BPF_K:
+		case BPF_ALU64 | BPF_ARSH | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_LSH: b3 = 0xE0; break;
+			case BPF_RSH: b3 = 0xE8; break;
+			case BPF_ARSH: b3 = 0xF8; break;
+			}
+			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+			switch (imm32) {
+			case 16:
+				/* emit 'ror %ax, 8' to swap lower 2 bytes */
+				EMIT1(0x66);
+				if (is_ereg(dst_reg))
+					EMIT1(0x41);
+				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+				break;
+			case 32:
+				/* emit 'bswap eax' to swap lower 4 bytes */
+				if (is_ereg(dst_reg))
+					EMIT2(0x41, 0x0F);
+				else
+					EMIT1(0x0F);
+				EMIT1(add_1reg(0xC8, dst_reg));
+				break;
+			case 64:
+				/* emit 'bswap rax' to swap 8 bytes */
+				EMIT3(add_1mod(0x48, dst_reg), 0x0F,
+				      add_1reg(0xC8, dst_reg));
+				break;
+			}
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			break;
+
+			/* ST: *(u8*)(dst_reg + off) = imm */
+		case BPF_ST | BPF_MEM | BPF_B:
+			if (is_ereg(dst_reg))
+				EMIT2(0x41, 0xC6);
+			else
+				EMIT1(0xC6);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_H:
+			if (is_ereg(dst_reg))
+				EMIT3(0x66, 0x41, 0xC7);
+			else
+				EMIT2(0x66, 0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_W:
+			if (is_ereg(dst_reg))
+				EMIT2(0x41, 0xC7);
+			else
+				EMIT1(0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_DW:
+			EMIT2(add_1mod(0x48, dst_reg), 0xC7);
+
+st:			if (is_imm8(insn->off))
+				EMIT2(add_1reg(0x40, dst_reg), insn->off);
+			else
+				EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
+
+			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+			break;
+
+			/* STX: *(u8*)(dst_reg + off) = src_reg */
+		case BPF_STX | BPF_MEM | BPF_B:
+			/* emit 'mov byte ptr [rax + off], al' */
+			if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+			    /* have to add extra byte for x86 SIL, DIL regs */
+			    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+			else
+				EMIT1(0x88);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_H:
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+			else
+				EMIT2(0x66, 0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_W:
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+			else
+				EMIT1(0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_DW:
+			EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+stx:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
+					    insn->off);
+			break;
+
+			/* LDX: dst_reg = *(u8*)(src_reg + off) */
+		case BPF_LDX | BPF_MEM | BPF_B:
+			/* emit 'movzx rax, byte ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_H:
+			/* emit 'movzx rax, word ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_W:
+			/* emit 'mov eax, dword ptr [rax+0x14]' */
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+			else
+				EMIT1(0x8B);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_DW:
+			/* emit 'mov rax, qword ptr [rax+0x14]' */
+			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ldx:			/* if insn->off == 0 we can save one extra byte, but
+			 * special case of x86 r13 which always needs an offset
+			 * is not worth the hassle
+			 */
+			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
+					    insn->off);
+			break;
+
+			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
+		case BPF_STX | BPF_XADD | BPF_W:
+			/* emit 'lock add dword ptr [rax + off], eax' */
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
+			else
+				EMIT2(0xF0, 0x01);
+			goto xadd;
+		case BPF_STX | BPF_XADD | BPF_DW:
+			EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
+xadd:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
+					    insn->off);
+			break;
+
+			/* call */
+		case BPF_JMP | BPF_CALL:
+			func = (u8 *) __bpf_call_base + imm32;
+			jmp_offset = func - (image + addrs[i]);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x52); /* push %r10 */
+				EMIT2(0x41, 0x51); /* push %r9 */
+				/* need to adjust jmp offset, since
+				 * pop %r9, pop %r10 take 4 bytes after call insn
+				 */
+				jmp_offset += 4;
+			}
+			if (!imm32 || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       imm32, func, image);
+				return -EINVAL;
+			}
+			EMIT1_off32(0xE8, jmp_offset);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x59); /* pop %r9 */
+				EMIT2(0x41, 0x5A); /* pop %r10 */
+			}
+			break;
+
+			/* cond jump */
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+			/* cmp dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
+			      add_2reg(0xC0, dst_reg, src_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* test dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
+			      add_2reg(0xC0, dst_reg, src_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_K:
+			/* test dst_reg, imm32 */
+			EMIT1(add_1mod(0x48, dst_reg));
+			EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+			/* cmp dst_reg, imm8/32 */
+			EMIT1(add_1mod(0x48, dst_reg));
+
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
+			else
+				EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
+
+emit_cond_jmp:		/* convert BPF opcode to x86 */
+			switch (BPF_OP(insn->code)) {
+			case BPF_JEQ:
+				jmp_cond = X86_JE;
+				break;
+			case BPF_JSET:
+			case BPF_JNE:
+				jmp_cond = X86_JNE;
+				break;
+			case BPF_JGT:
+				/* GT is unsigned '>', JA in x86 */
+				jmp_cond = X86_JA;
+				break;
+			case BPF_JGE:
+				/* GE is unsigned '>=', JAE in x86 */
+				jmp_cond = X86_JAE;
+				break;
+			case BPF_JSGT:
+				/* signed '>', GT in x86 */
+				jmp_cond = X86_JG;
+				break;
+			case BPF_JSGE:
+				/* signed '>=', GE in x86 */
+				jmp_cond = X86_JGE;
+				break;
+			default: /* to silence gcc warning */
+				return -EFAULT;
+			}
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (is_imm8(jmp_offset)) {
+				EMIT2(jmp_cond, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+			} else {
+				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+
+			break;
+
+		case BPF_JMP | BPF_JA:
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (!jmp_offset)
+				/* optimize out nop jumps */
+				break;
+emit_jmp:
+			if (is_imm8(jmp_offset)) {
+				EMIT2(0xEB, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT1_off32(0xE9, jmp_offset);
+			} else {
+				pr_err("jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+			break;
+
+		case BPF_LD | BPF_IND | BPF_W:
+			func = sk_load_word;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_W:
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
+common_load:		ctx->seen_ld_abs = true;
+			jmp_offset = func - (image + addrs[i]);
+			if (!func || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       imm32, func, image);
+				return -EINVAL;
+			}
+			if (BPF_MODE(insn->code) == BPF_ABS) {
+				/* mov %esi, imm32 */
+				EMIT1_off32(0xBE, imm32);
+			} else {
+				/* mov %rsi, src_reg */
+				EMIT_mov(BPF_REG_2, src_reg);
+				if (imm32) {
+					if (is_imm8(imm32))
+						/* add %esi, imm8 */
+						EMIT3(0x83, 0xC6, imm32);
+					else
+						/* add %esi, imm32 */
+						EMIT2_off32(0x81, 0xC6, imm32);
+				}
+			}
+			/* skb pointer is in R6 (%rbx), it will be copied into
+			 * %rdi if skb_copy_bits() call is necessary.
+			 * sk_load_* helpers also use %r10 and %r9d.
+			 * See bpf_jit.S
+			 */
+			EMIT1_off32(0xE8, jmp_offset); /* call */
+			break;
+
+		case BPF_LD | BPF_IND | BPF_H:
+			func = sk_load_half;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_H:
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
+			goto common_load;
+		case BPF_LD | BPF_IND | BPF_B:
+			func = sk_load_byte;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_B:
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
+			goto common_load;
+
+		case BPF_JMP | BPF_EXIT:
+			if (i != insn_cnt - 1) {
+				jmp_offset = ctx->cleanup_addr - addrs[i];
+				goto emit_jmp;
+			}
+			/* update cleanup_addr */
+			ctx->cleanup_addr = proglen;
+			/* mov rbx, qword ptr [rbp-X] */
+			EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
+			/* mov r13, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
+			/* mov r14, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
+			/* mov r15, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
+
+			EMIT1(0xC9); /* leave */
+			EMIT1(0xC3); /* ret */
+			break;
+
+		default:
+			/* By design x64 JIT should support all BPF instructions
+			 * This error will be seen if new instruction was added
+			 * to interpreter, but not to JIT
+			 * or if there is junk in sk_filter
+			 */
+			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
+			return -EINVAL;
+		}
+
+		ilen = prog - temp;
+		if (image) {
+			if (unlikely(proglen + ilen > oldproglen)) {
+				pr_err("bpf_jit_compile fatal error\n");
+				return -EFAULT;
+			}
+			memcpy(image + proglen, temp, ilen);
+		}
+		proglen += ilen;
+		addrs[i] = proglen;
+		prog = temp;
+	}
+	return proglen;
+}
+
+void bpf_jit_compile(struct sk_filter *prog)
+{
+}
+
+void bpf_int_jit_compile(struct sk_filter *prog)
+{
+	struct bpf_binary_header *header = NULL;
+	int proglen, oldproglen = 0;
+	struct jit_context ctx = {};
+	u8 *image = NULL;
+	int *addrs;
+	int pass;
+	int i;
 
 	if (!bpf_jit_enable)
 		return;
 
-	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL)
+	if (!prog || !prog->len)
+		return;
+
+	addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+	if (!addrs)
 		return;
 
 	/* Before first pass, make a rough estimation of addrs[]
 	 * each bpf instruction is translated to less than 64 bytes
 	 */
-	for (proglen = 0, i = 0; i < flen; i++) {
+	for (proglen = 0, i = 0; i < prog->len; i++) {
 		proglen += 64;
 		addrs[i] = proglen;
 	}
-	cleanup_addr = proglen; /* epilogue address */
+	ctx.cleanup_addr = proglen;
 
 	for (pass = 0; pass < 10; pass++) {
-		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
-		/* no prologue/epilogue for trivial filters (RET something) */
-		proglen = 0;
-		prog = temp;
-
-		if (seen_or_pass0) {
-			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
-			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
-			/* note : must save %rbx in case bpf_error is hit */
-			if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
-				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
-			if (seen_or_pass0 & SEEN_XREG)
-				CLEAR_X(); /* make sure we dont leek kernel memory */
-
-			/*
-			 * If this filter needs to access skb data,
-			 * loads r9 and r8 with :
-			 *  r9 = skb->len - skb->data_len
-			 *  r8 = skb->data
-			 */
-			if (seen_or_pass0 & SEEN_DATAREF) {
-				if (offsetof(struct sk_buff, len) <= 127)
-					/* mov    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
-				else {
-					/* mov    off32(%rdi),%r9d */
-					EMIT3(0x44, 0x8b, 0x8f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				if (is_imm8(offsetof(struct sk_buff, data_len)))
-					/* sub    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
-				else {
-					EMIT3(0x44, 0x2b, 0x8f);
-					EMIT(offsetof(struct sk_buff, data_len), 4);
-				}
-
-				if (is_imm8(offsetof(struct sk_buff, data)))
-					/* mov off8(%rdi),%r8 */
-					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
-				else {
-					/* mov off32(%rdi),%r8 */
-					EMIT3(0x4c, 0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, data), 4);
-				}
-			}
+		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+		if (proglen <= 0) {
+			image = NULL;
+			if (header)
+				module_free(NULL, header);
+			goto out;
 		}
-
-		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
-			/* first instruction sets A register (or is RET 'constant') */
-			break;
-		default:
-			/* make sure we dont leak kernel information to user */
-			CLEAR_A(); /* A = 0 */
-		}
-
-		for (i = 0; i < flen; i++) {
-			unsigned int K = filter[i].k;
-
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X: /* A += X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
-				break;
-			case BPF_S_ALU_ADD_K: /* A += K; */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
-				else
-					EMIT1_off32(0x05, K);	/* add imm32,%eax */
-				break;
-			case BPF_S_ALU_SUB_X: /* A -= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
-				break;
-			case BPF_S_ALU_SUB_K: /* A -= K */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
-				else
-					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
-				break;
-			case BPF_S_ALU_MUL_X: /* A *= X; */
-				seen |= SEEN_XREG;
-				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
-				break;
-			case BPF_S_ALU_MUL_K: /* A *= K */
-				if (is_imm8(K))
-					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
-				else {
-					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
-					EMIT(K, 4);
-				}
-				break;
-			case BPF_S_ALU_DIV_X: /* A /= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 4) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
-								(addrs[i] - 4));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
-				}
-				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
-				break;
-			case BPF_S_ALU_MOD_X: /* A %= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 6) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
-								(addrs[i] - 6));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT2(0xf7, 0xf3);	/* div %ebx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_MOD_K: /* A %= K; */
-				if (K == 1) {
-					CLEAR_A();
-					break;
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_DIV_K: /* A /= K */
-				if (K == 1)
-					break;
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				break;
-			case BPF_S_ALU_AND_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
-				break;
-			case BPF_S_ALU_AND_K:
-				if (K >= 0xFFFFFF00) {
-					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
-				} else if (K >= 0xFFFF0000) {
-					EMIT2(0x66, 0x25);	/* and imm16,%ax */
-					EMIT(K, 2);
-				} else {
-					EMIT1_off32(0x25, K);	/* and imm32,%eax */
-				}
-				break;
-			case BPF_S_ALU_OR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
-				break;
-			case BPF_S_ALU_OR_K:
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
-				else
-					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
-				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x31, 0xd8);		/* xor %ebx,%eax */
-				break;
-			case BPF_S_ALU_XOR_K: /* A ^= K; */
-				if (K == 0)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xf0, K);	/* xor imm8,%eax */
-				else
-					EMIT1_off32(0x35, K);	/* xor imm32,%eax */
-				break;
-			case BPF_S_ALU_LSH_X: /* A <<= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
-				break;
-			case BPF_S_ALU_LSH_K:
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe0); /* shl %eax */
-				else
-					EMIT3(0xc1, 0xe0, K);
-				break;
-			case BPF_S_ALU_RSH_X: /* A >>= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
-				break;
-			case BPF_S_ALU_RSH_K: /* A >>= K; */
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe8); /* shr %eax */
-				else
-					EMIT3(0xc1, 0xe8, K);
-				break;
-			case BPF_S_ALU_NEG:
-				EMIT2(0xf7, 0xd8);		/* neg %eax */
-				break;
-			case BPF_S_RET_K:
-				if (!K) {
-					if (pc_ret0 == -1)
-						pc_ret0 = i;
-					CLEAR_A();
-				} else {
-					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
-				}
-				/* fallinto */
-			case BPF_S_RET_A:
-				if (seen_or_pass0) {
-					if (i != flen - 1) {
-						EMIT_JMP(cleanup_addr - addrs[i]);
-						break;
-					}
-					if (seen_or_pass0 & SEEN_XREG)
-						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
-					EMIT1(0xc9);		/* leaveq */
-				}
-				EMIT1(0xc3);		/* ret */
-				break;
-			case BPF_S_MISC_TAX: /* X = A */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
-				break;
-			case BPF_S_MISC_TXA: /* A = X */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
-				break;
-			case BPF_S_LD_IMM: /* A = K */
-				if (!K)
-					CLEAR_A();
-				else
-					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
-				break;
-			case BPF_S_LDX_IMM: /* X = K */
-				seen |= SEEN_XREG;
-				if (!K)
-					CLEAR_X();
-				else
-					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
-				break;
-			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
-				seen |= SEEN_MEM;
-				EMIT3(0x8b, 0x45, 0xf0 - K*4);
-				break;
-			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
-				seen |= SEEN_MEM;
-				EMIT3(0x89, 0x45, 0xf0 - K*4);
-				break;
-			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x89, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_LD_W_LEN: /*	A = skb->len; */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov    off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_LDX_W_LEN: /* X = skb->len; */
-				seen |= SEEN_XREG;
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov off8(%rdi),%ebx */
-					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x9f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-				if (is_imm8(offsetof(struct sk_buff, protocol))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, protocol), 4);
-				}
-				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
-				break;
-			case BPF_S_ANC_IFINDEX:
-				if (is_imm8(offsetof(struct sk_buff, dev))) {
-					/* movq off8(%rdi),%rax */
-					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
-				} else {
-					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
-					EMIT(offsetof(struct sk_buff, dev), 4);
-				}
-				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
-				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
-				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
-				EMIT(offsetof(struct net_device, ifindex), 4);
-				break;
-			case BPF_S_ANC_MARK:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-				if (is_imm8(offsetof(struct sk_buff, mark))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, mark), 4);
-				}
-				break;
-			case BPF_S_ANC_RXHASH:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-				if (is_imm8(offsetof(struct sk_buff, hash))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, hash), 4);
-				}
-				break;
-			case BPF_S_ANC_QUEUE:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
-				}
-				break;
-			case BPF_S_ANC_CPU:
-#ifdef CONFIG_SMP
-				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
-				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
-#else
-				CLEAR_A();
-#endif
-				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-				if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, vlan_tci), 4);
-				}
-				BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
-					EMIT3(0x80, 0xe4, 0xef); /* and    $0xef,%ah */
-				} else {
-					EMIT3(0xc1, 0xe8, 0x0c); /* shr    $0xc,%eax */
-					EMIT3(0x83, 0xe0, 0x01); /* and    $0x1,%eax */
-				}
-				break;
-			case BPF_S_ANC_PKTTYPE:
-			{
-				int off = pkt_type_offset();
-
-				if (off < 0)
-					goto out;
-				if (is_imm8(off)) {
-					/* movzbl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb6, 0x47, off);
-				} else {
-					/* movbl off32(%rdi),%eax */
-					EMIT3(0x0f, 0xb6, 0x87);
-					EMIT(off, 4);
-				}
-				EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and    $0x7,%eax */
-				break;
-			}
-			case BPF_S_LD_W_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-common_load:			seen |= SEEN_DATAREF;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call */
-				break;
-			case BPF_S_LD_H_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
-				goto common_load;
-			case BPF_S_LD_B_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
-				goto common_load;
-			case BPF_S_LDX_B_MSH:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
-				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
-				break;
-			case BPF_S_LD_W_IND:
-				func = sk_load_word;
-common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				if (K) {
-					if (is_imm8(K)) {
-						EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
-					} else {
-						EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
-						EMIT(K, 4);
-					}
-				} else {
-					EMIT2(0x89,0xde); /* mov %ebx,%esi */
-				}
-				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
-				break;
-			case BPF_S_LD_H_IND:
-				func = sk_load_half;
-				goto common_load_ind;
-			case BPF_S_LD_B_IND:
-				func = sk_load_byte;
-				goto common_load_ind;
-			case BPF_S_JMP_JA:
-				t_offset = addrs[i + K] - addrs[i];
-				EMIT_JMP(t_offset);
-				break;
-			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
-			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
-
-cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
-				t_offset = addrs[i + filter[i].jt] - addrs[i];
-
-				/* same targets, can avoid doing the test :) */
-				if (filter[i].jt == filter[i].jf) {
-					EMIT_JMP(t_offset);
-					break;
-				}
-
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
-					break;
-				case BPF_S_JMP_JSET_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
-					break;
-				case BPF_S_JMP_JEQ_K:
-					if (K == 0) {
-						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
-						break;
-					}
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
-					if (K <= 127)
-						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
-					else
-						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
-					break;
-				case BPF_S_JMP_JSET_K:
-					if (K <= 0xFF)
-						EMIT2(0xa8, K); /* test imm8,%al */
-					else if (!(K & 0xFFFF00FF))
-						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
-					else if (K <= 0xFFFF) {
-						EMIT2(0x66, 0xa9); /* test imm16,%ax */
-						EMIT(K, 2);
-					} else {
-						EMIT1_off32(0xa9, K); /* test imm32,%eax */
-					}
-					break;
-				}
-				if (filter[i].jt != 0) {
-					if (filter[i].jf && f_offset)
-						t_offset += is_near(f_offset) ? 2 : 5;
-					EMIT_COND_JMP(t_op, t_offset);
-					if (filter[i].jf)
-						EMIT_JMP(f_offset);
-					break;
-				}
-				EMIT_COND_JMP(f_op, f_offset);
-				break;
-			default:
-				/* hmm, too complex filter, give up with jit compiler */
-				goto out;
-			}
-			ilen = prog - temp;
-			if (image) {
-				if (unlikely(proglen + ilen > oldproglen)) {
-					pr_err("bpb_jit_compile fatal error\n");
-					kfree(addrs);
-					module_free(NULL, header);
-					return;
-				}
-				memcpy(image + proglen, temp, ilen);
-			}
-			proglen += ilen;
-			addrs[i] = proglen;
-			prog = temp;
-		}
-		/* last bpf instruction is always a RET :
-		 * use it to give the cleanup instruction(s) addr
-		 */
-		cleanup_addr = proglen - 1; /* ret */
-		if (seen_or_pass0)
-			cleanup_addr -= 1; /* leaveq */
-		if (seen_or_pass0 & SEEN_XREG)
-			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
-
 		if (image) {
 			if (proglen != oldproglen)
-				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
+				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
+				       proglen, oldproglen);
 			break;
 		}
 		if (proglen == oldproglen) {
@@ -766,17 +918,16 @@
 	}
 
 	if (bpf_jit_enable > 1)
-		bpf_jit_dump(flen, proglen, pass, image);
+		bpf_jit_dump(prog->len, proglen, 0, image);
 
 	if (image) {
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
-		fp->bpf_func = (void *)image;
-		fp->jited = 1;
+		prog->bpf_func = (void *)image;
+		prog->jited = 1;
 	}
 out:
 	kfree(addrs);
-	return;
 }
 
 static void bpf_jit_free_deferred(struct work_struct *work)

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 097e7a7..af9307f 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile

@@ -20,3 +20,4 @@
 obj-$(subst m,y,$(CONFIG_SERIAL_MRST_MAX3110)) += platform_max3111.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
new file mode 100644
index 0000000..973cf3b
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c

@@ -0,0 +1,72 @@
+/*
+ * platform_wdt.c: Watchdog platform library file
+ *
+ * (C) Copyright 2014 Intel Corporation
+ * Author: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+#include <asm/intel-mid.h>
+#include <asm/io_apic.h>
+
+#define TANGIER_EXT_TIMER0_MSI 15
+
+static struct platform_device wdt_dev = {
+	.name = "intel_mid_wdt",
+	.id = -1,
+};
+
+static int tangier_probe(struct platform_device *pdev)
+{
+	int ioapic;
+	int irq;
+	struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+	struct io_apic_irq_attr irq_attr = { 0 };
+
+	if (!pdata)
+		return -EINVAL;
+
+	irq = pdata->irq;
+	ioapic = mp_find_ioapic(irq);
+	if (ioapic >= 0) {
+		int ret;
+		irq_attr.ioapic = ioapic;
+		irq_attr.ioapic_pin = irq;
+		irq_attr.trigger = 1;
+		/* irq_attr.polarity = 0; -> Active high */
+		ret = io_apic_set_pci_routing(NULL, irq, &irq_attr);
+		if (ret)
+			return ret;
+	} else {
+		dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
+			 irq);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct intel_mid_wdt_pdata tangier_pdata = {
+	.irq = TANGIER_EXT_TIMER0_MSI,
+	.probe = tangier_probe,
+};
+
+static int __init register_mid_wdt(void)
+{
+	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
+		wdt_dev.dev.platform_data = &tangier_pdata;
+		return platform_device_register(&wdt_dev);
+	}
+
+	return -ENODEV;
+}
+
+rootfs_initcall(register_mid_wdt);

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 9769df0..3c0809a 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile

@@ -9,18 +9,9 @@
 VDSO32-$(CONFIG_X86_32)		:= y
 VDSO32-$(CONFIG_COMPAT)		:= y
 
-vdso-install-$(VDSO64-y)	+= vdso.so
-vdso-install-$(VDSOX32-y)	+= vdsox32.so
-vdso-install-$(VDSO32-y)	+= $(vdso32-images)
-
-
 # files to link into the vdso
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
-
-vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
-
-# Filter out x32 objects.
-vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
+vobjs-nox32 := vdso-fakesections.o
 
 # files to link into kernel
 obj-y				+= vma.o
@@ -34,7 +25,7 @@
 
 obj-$(VDSO32-y)			+= vdso32-setup.o
 
-vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
@@ -104,7 +95,13 @@
 			   -Wl,-z,max-page-size=4096 \
 			   -Wl,-z,common-page-size=4096
 
-vobjx32s-y := $(vobj64s:.o=-x32.o)
+# 64-bit objects to re-brand as x32
+vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
+
+# x32-rebranded versions
+vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+
+# same thing, but in the output directory
 vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
 
 # Convert 64bit object file to x32 for x32 vDSO.
@@ -176,15 +173,20 @@
 GCOV_PROFILE := n
 
 #
-# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+# Install the unstripped copies of vdso*.so.
 #
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+      cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%)
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
 	@mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
 	$(call cmd,vdso_install)
 
-PHONY += vdso_install $(vdso-install-y)
-vdso_install: $(vdso-install-y)
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
 
 clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80*

diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
new file mode 100644
index 0000000..cb8a8d7
--- /dev/null
+++ b/arch/x86/vdso/vdso-fakesections.c

@@ -0,0 +1,32 @@
+/*
+ * Copyright 2014 Andy Lutomirski
+ * Subject to the GNU Public License, v.2
+ *
+ * Hack to keep broken Go programs working.
+ *
+ * The Go runtime had a couple of bugs: it would read the section table to try
+ * to figure out how many dynamic symbols there were (it shouldn't have looked
+ * at the section table at all) and, if there were no SHT_SYNDYM section table
+ * entry, it would use an uninitialized value for the number of symbols.  As a
+ * workaround, we supply a minimal section table.  vdso2c will adjust the
+ * in-memory image so that "vdso_fake_sections" becomes the section table.
+ *
+ * The bug was introduced by:
+ * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31)
+ * and is being addressed in the Go runtime in this issue:
+ * https://code.google.com/p/go/issues/detail?id=8197
+ */
+
+#ifndef __x86_64__
+#error This hack is specific to the 64-bit vDSO
+#endif
+
+#include <linux/elf.h>
+
+extern const __visible struct elf64_shdr vdso_fake_sections[];
+const __visible struct elf64_shdr vdso_fake_sections[] = {
+	{
+		.sh_type = SHT_DYNSYM,
+		.sh_entsize = sizeof(Elf64_Sym),
+	}
+};

diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 450ac6e..7a6bf50 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c

@@ -54,7 +54,7 @@
 }
 
 /*
- * Evil macros to do a little-endian read.
+ * Evil macros for little-endian reads and writes
  */
 #define GLE(x, bits, ifnot)						\
 	__builtin_choose_expr(						\
@@ -62,11 +62,24 @@
 		(__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
 
 extern void bad_get_le(void);
-#define LAST_LE(x)							\
+#define LAST_GLE(x)							\
 	__builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
 
 #define GET_LE(x)							\
-	GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x))))
+	GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
+
+#define PLE(x, val, bits, ifnot)					\
+	__builtin_choose_expr(						\
+		(sizeof(*(x)) == bits/8),				\
+		put_unaligned_le##bits((val), (x)), ifnot)
+
+extern void bad_put_le(void);
+#define LAST_PLE(x, val)						\
+	__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
+
+#define PUT_LE(x, val)					\
+	PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
+
 
 #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
 

diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index 8a07463..c6eefaf 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h

@@ -18,6 +18,8 @@
 	const char *secstrings;
 	uint64_t syms[NSYMS] = {};
 
+	uint64_t fake_sections_value = 0, fake_sections_size = 0;
+
 	Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff));
 
 	/* Walk the segment table. */
@@ -84,6 +86,7 @@
 			GET_LE(&symtab_hdr->sh_entsize) * i;
 		const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
 			GET_LE(&sym->st_name);
+
 		for (k = 0; k < NSYMS; k++) {
 			if (!strcmp(name, required_syms[k])) {
 				if (syms[k]) {
@@ -93,6 +96,13 @@
 				syms[k] = GET_LE(&sym->st_value);
 			}
 		}
+
+		if (!strcmp(name, "vdso_fake_sections")) {
+			if (fake_sections_value)
+				fail("duplicate vdso_fake_sections\n");
+			fake_sections_value = GET_LE(&sym->st_value);
+			fake_sections_size = GET_LE(&sym->st_size);
+		}
 	}
 
 	/* Validate mapping addresses. */
@@ -112,11 +122,14 @@
 	if (syms[sym_end_mapping] % 4096)
 		fail("end_mapping must be a multiple of 4096\n");
 
-	/* Remove sections. */
-	hdr->e_shoff = 0;
-	hdr->e_shentsize = 0;
-	hdr->e_shnum = 0;
-	hdr->e_shstrndx = htole16(SHN_UNDEF);
+	/* Remove sections or use fakes */
+	if (fake_sections_size % sizeof(Elf_Shdr))
+		fail("vdso_fake_sections size is not a multiple of %ld\n",
+		     (long)sizeof(Elf_Shdr));
+	PUT_LE(&hdr->e_shoff, fake_sections_value);
+	PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0);
+	PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr));
+	PUT_LE(&hdr->e_shstrndx, SHN_UNDEF);
 
 	if (!name) {
 		fwrite(addr, load_size, 1, outfile);

diff --git a/block/bio.c b/block/bio.c
index 96d28ee..8c2e55e 100644
--- a/block/bio.c
+++ b/block/bio.c

@@ -849,7 +849,13 @@
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
+	unsigned int max_sectors;
+
+	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+	if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
+		max_sectors = len >> 9;
+
+	return __bio_add_page(q, bio, page, len, offset, max_sectors);
 }
 EXPORT_SYMBOL(bio_add_page);
 
@@ -1971,7 +1977,7 @@
 	/* associate blkcg if exists */
 	rcu_read_lock();
 	css = task_css(current, blkio_cgrp_id);
-	if (css && css_tryget(css))
+	if (css && css_tryget_online(css))
 		bio->bi_css = css;
 	rcu_read_unlock();
 

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1039fb9..069bc20 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c

@@ -185,7 +185,7 @@
 	lockdep_assert_held(q->queue_lock);
 
 	/* blkg holds a reference to blkcg */
-	if (!css_tryget(&blkcg->css)) {
+	if (!css_tryget_online(&blkcg->css)) {
 		ret = -EINVAL;
 		goto err_free_blkg;
 	}
@@ -1093,7 +1093,7 @@
  * Register @pol with blkcg core.  Might sleep and @pol may be modified on
  * successful registration.  Returns 0 on success and -errno on failure.
  */
-int blkcg_policy_register(struct blkcg_policy *pol)
+int __init blkcg_policy_register(struct blkcg_policy *pol)
 {
 	int i, ret;
 

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 371fe8e..cbb7f94 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h

@@ -145,7 +145,7 @@
 void blkcg_exit_queue(struct request_queue *q);
 
 /* Blkio controller policy registration */
-int blkcg_policy_register(struct blkcg_policy *pol);
+int __init blkcg_policy_register(struct blkcg_policy *pol);
 void blkcg_policy_unregister(struct blkcg_policy *pol);
 int blkcg_activate_policy(struct request_queue *q,
 			  const struct blkcg_policy *pol);
@@ -204,7 +204,7 @@
  */
 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 {
-	return css_to_blkcg(css_parent(&blkcg->css));
+	return css_to_blkcg(blkcg->css.parent);
 }
 
 /**
@@ -580,7 +580,7 @@
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline int __init blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 static inline int blkcg_activate_policy(struct request_queue *q,
 					const struct blkcg_policy *pol) { return 0; }

diff --git a/block/blk-core.c b/block/blk-core.c
index 40d6548..f6f6b9a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -43,6 +43,7 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
@@ -1218,6 +1219,8 @@
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 
+	blk_rq_set_block_pc(rq);
+
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
@@ -1235,6 +1238,22 @@
 EXPORT_SYMBOL(blk_make_request);
 
 /**
+ * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * @rq:		request to be initialized
+ *
+ */
+void blk_rq_set_block_pc(struct request *rq)
+{
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
+	memset(rq->__cmd, 0, sizeof(rq->__cmd));
+	rq->cmd = rq->__cmd;
+}
+EXPORT_SYMBOL(blk_rq_set_block_pc);
+
+/**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted

diff --git a/block/blk-exec.c b/block/blk-exec.c
index dbf4502..f4d27b1 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c

@@ -132,6 +132,11 @@
 	if (rq->errors)
 		err = -EIO;
 
+	if (rq->sense == sense)	{
+		rq->sense = NULL;
+		rq->sense_len = 0;
+	}
+
 	return err;
 }
 EXPORT_SYMBOL(blk_execute_rq);

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e4cd62..e11f5f8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c

@@ -82,8 +82,10 @@
 
 	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
 	smp_wmb();
-	/* we have problems to freeze the queue if it's initializing */
-	if (!blk_queue_bypass(q) || !blk_queue_init_done(q))
+
+	/* we have problems freezing the queue if it's initializing */
+	if (!blk_queue_dying(q) &&
+	    (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
 		return 0;
 
 	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
@@ -183,6 +185,7 @@
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->rq_disk = NULL;
 	rq->part = NULL;
+	rq->start_time = jiffies;
 #ifdef CONFIG_BLK_CGROUP
 	rq->rl = NULL;
 	set_start_time_ns(rq);
@@ -202,6 +205,8 @@
 	rq->sense = NULL;
 
 	INIT_LIST_HEAD(&rq->timeout_list);
+	rq->timeout = 0;
+
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
@@ -406,16 +411,7 @@
 	if (unlikely(blk_bidi_rq(rq)))
 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
 
-	/*
-	 * Just mark start time and set the started bit. Due to memory
-	 * ordering, we know we'll see the correct deadline as long as
-	 * REQ_ATOMIC_STARTED is seen. Use the default queue timeout,
-	 * unless one has been set in the request.
-	 */
-	if (!rq->timeout)
-		rq->deadline = jiffies + q->rq_timeout;
-	else
-		rq->deadline = jiffies + rq->timeout;
+	blk_add_timer(rq);
 
 	/*
 	 * Mark us as started and clear complete. Complete might have been
@@ -967,11 +963,6 @@
 		list_add_tail(&rq->queuelist, &ctx->rq_list);
 
 	blk_mq_hctx_mark_pending(hctx, ctx);
-
-	/*
-	 * We do this early, to ensure we are on the right CPU.
-	 */
-	blk_add_timer(rq);
 }
 
 void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
@@ -1100,10 +1091,8 @@
 {
 	init_request_from_bio(rq, bio);
 
-	if (blk_do_io_stat(rq)) {
-		rq->start_time = jiffies;
+	if (blk_do_io_stat(rq))
 		blk_account_io_start(rq, 1);
-	}
 }
 
 static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
@@ -1216,7 +1205,6 @@
 
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_start_request(rq, true);
-		blk_add_timer(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
@@ -1967,13 +1955,19 @@
 	return NOTIFY_OK;
 }
 
+/*
+ * Alloc a tag set to be associated with one or more request queues.
+ * May fail with EINVAL for various error conditions. May adjust the
+ * requested depth down, if if it too large. In that case, the set
+ * value will be stored in set->queue_depth.
+ */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
 	int i;
 
 	if (!set->nr_hw_queues)
 		return -EINVAL;
-	if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+	if (!set->queue_depth)
 		return -EINVAL;
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
@@ -1981,6 +1975,11 @@
 	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
+	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+		pr_info("blk-mq: reduced tag depth to %u\n",
+			BLK_MQ_MAX_DEPTH);
+		set->queue_depth = BLK_MQ_MAX_DEPTH;
+	}
 
 	set->tags = kmalloc_node(set->nr_hw_queues *
 				 sizeof(struct blk_mq_tags *),

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5d21239..f1a1795 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -113,6 +113,7 @@
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -277,6 +278,26 @@
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
+ * blk_queue_chunk_sectors - set size of the chunk for this queue
+ * @q:  the request queue for the device
+ * @chunk_sectors:  chunk sectors in the usual 512b unit
+ *
+ * Description:
+ *    If a driver doesn't want IOs to cross a given chunk size, it can set
+ *    this limit and prevent merging across chunks. Note that the chunk size
+ *    must currently be a power-of-2 in sectors. Also note that the block
+ *    layer must accept a page worth of data at any offset. So if the
+ *    crossing of chunks is a hard limitation in the driver, it must still be
+ *    prepared to split single page bios.
+ **/
+void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
+{
+	BUG_ON(!is_power_of_2(chunk_sectors));
+	q->limits.chunk_sectors = chunk_sectors;
+}
+EXPORT_SYMBOL(blk_queue_chunk_sectors);
+
+/**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
  * @q:  the request queue for the device
  * @max_discard_sectors: maximum number of sectors to discard

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9353b46..3fdb21a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c

@@ -1346,10 +1346,10 @@
 	return 0;
 }
 
-static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
-		       const char *buf, bool is_u64)
+static ssize_t tg_set_conf(struct kernfs_open_file *of,
+			   char *buf, size_t nbytes, loff_t off, bool is_u64)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	struct throtl_service_queue *sq;
@@ -1368,9 +1368,9 @@
 		ctx.v = -1;
 
 	if (is_u64)
-		*(u64 *)((void *)tg + cft->private) = ctx.v;
+		*(u64 *)((void *)tg + of_cft(of)->private) = ctx.v;
 	else
-		*(unsigned int *)((void *)tg + cft->private) = ctx.v;
+		*(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v;
 
 	throtl_log(&tg->service_queue,
 		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
@@ -1404,19 +1404,19 @@
 	}
 
 	blkg_conf_finish(&ctx);
-	return 0;
+	return nbytes;
 }
 
-static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft,
-			   char *buf)
+static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, true);
+	return tg_set_conf(of, buf, nbytes, off, true);
 }
 
-static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buf)
+static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, false);
+	return tg_set_conf(of, buf, nbytes, off, false);
 }
 
 static struct cftype throtl_files[] = {
@@ -1424,25 +1424,25 @@
 		.name = "throttle.read_bps_device",
 		.private = offsetof(struct throtl_grp, bps[READ]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.write_bps_device",
 		.private = offsetof(struct throtl_grp, bps[WRITE]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.read_iops_device",
 		.private = offsetof(struct throtl_grp, iops[READ]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.write_iops_device",
 		.private = offsetof(struct throtl_grp, iops[WRITE]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.io_service_bytes",

diff --git a/block/bsg.c b/block/bsg.c
index e5214c1..ff46add 100644
--- a/block/bsg.c
+++ b/block/bsg.c

@@ -196,7 +196,6 @@
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->request_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -273,6 +272,8 @@
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
+	blk_rq_set_block_pc(rq);
+
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
 		goto out;

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 22dffeb..cadc378 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -1670,11 +1670,11 @@
 	return 0;
 }
 
-static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				    struct cftype *cft, const char *buf,
-				    bool is_leaf_weight)
+static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
+					char *buf, size_t nbytes, loff_t off,
+					bool is_leaf_weight)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct cfq_group *cfqg;
 	int ret;
@@ -1697,19 +1697,19 @@
 	}
 
 	blkg_conf_finish(&ctx);
-	return ret;
+	return ret ?: nbytes;
 }
 
-static int cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buf)
+static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, false);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, false);
 }
 
-static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css,
-				       struct cftype *cft, char *buf)
+static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
+					   char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, true);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, true);
 }
 
 static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -1837,7 +1837,7 @@
 		.name = "weight_device",
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1851,7 +1851,7 @@
 		.name = "weight_device",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cfqg_print_weight_device,
-		.write_string = cfqg_set_weight_device,
+		.write = cfqg_set_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1863,7 +1863,7 @@
 	{
 		.name = "leaf_weight_device",
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "leaf_weight",

diff --git a/block/elevator.c b/block/elevator.c
index 1e01b66..f35eddd 100644
--- a/block/elevator.c
+++ b/block/elevator.c

@@ -845,7 +845,7 @@
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int elv_register(struct elevator_type *e)
+int __init elv_register(struct elevator_type *e)
 {
 	char *def = "";
 

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9c28a5b..14695c6 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c

@@ -229,7 +229,6 @@
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -311,6 +310,7 @@
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
 		return -ENOMEM;
+	blk_rq_set_block_pc(rq);
 
 	if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
 		blk_put_request(rq);
@@ -491,7 +491,7 @@
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -524,7 +524,7 @@
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 147bc1b..3f2bdc8 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c

@@ -1810,6 +1810,16 @@
 	acpi_os_map_generic_address(&acpi_gbl_FADT.xpm1b_event_block);
 	acpi_os_map_generic_address(&acpi_gbl_FADT.xgpe0_block);
 	acpi_os_map_generic_address(&acpi_gbl_FADT.xgpe1_block);
+	if (acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) {
+		/*
+		 * Use acpi_os_map_generic_address to pre-map the reset
+		 * register if it's in system memory.
+		 */
+		int rv;
+
+		rv = acpi_os_map_generic_address(&acpi_gbl_FADT.reset_register);
+		pr_debug(PREFIX "%s: map reset_reg status %d\n", __func__, rv);
+	}
 
 	return AE_OK;
 }
@@ -1838,6 +1848,8 @@
 	acpi_os_unmap_generic_address(&acpi_gbl_FADT.xgpe0_block);
 	acpi_os_unmap_generic_address(&acpi_gbl_FADT.xpm1b_event_block);
 	acpi_os_unmap_generic_address(&acpi_gbl_FADT.xpm1a_event_block);
+	if (acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER)
+		acpi_os_unmap_generic_address(&acpi_gbl_FADT.reset_register);
 
 	destroy_workqueue(kacpid_wq);
 	destroy_workqueue(kacpi_notify_wq);

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index c11e379..b3e3cc7 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c

@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <asm/io.h>
+#include <trace/events/power.h>
 
 #include "internal.h"
 #include "sleep.h"
@@ -501,6 +502,7 @@
 
 	ACPI_FLUSH_CPU_CACHE();
 
+	trace_suspend_resume(TPS("acpi_suspend"), acpi_state, true);
 	switch (acpi_state) {
 	case ACPI_STATE_S1:
 		barrier();
@@ -516,6 +518,7 @@
 		pr_info(PREFIX "Low-level resume complete\n");
 		break;
 	}
+	trace_suspend_resume(TPS("acpi_suspend"), acpi_state, false);
 
 	/* This violates the spec but is required for bug compatibility. */
 	acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1);

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 101fb09..fb9ffe9 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c

@@ -82,7 +82,7 @@
  * For Windows 8 systems: used to decide if video module
  * should skip registering backlight interface of its own.
  */
-static int use_native_backlight_param = -1;
+static int use_native_backlight_param = 1;
 module_param_named(use_native_backlight, use_native_backlight_param, int, 0444);
 static bool use_native_backlight_dmi = false;
 

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 0033faf..7671dba 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig

@@ -123,6 +123,15 @@
 
 	  If unsure, say N.
 
+config AHCI_MVEBU
+	tristate "Marvell EBU AHCI SATA support"
+	depends on ARCH_MVEBU
+	help
+	  This option enables support for the Marvebu EBU SoC's
+	  onboard AHCI SATA.
+
+	  If unsure, say N.
+
 config AHCI_SUNXI
 	tristate "Allwinner sunxi AHCI SATA support"
 	depends on ARCH_SUNXI

diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 44c8016..5a02aee 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile

@@ -12,6 +12,7 @@
 obj-$(CONFIG_SATA_HIGHBANK)	+= sata_highbank.o libahci.o
 obj-$(CONFIG_AHCI_DA850)	+= ahci_da850.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_IMX)		+= ahci_imx.o libahci.o libahci_platform.o
+obj-$(CONFIG_AHCI_MVEBU)	+= ahci_mvebu.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_SUNXI)	+= ahci_sunxi.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_ST)		+= ahci_st.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_XGENE)	+= ahci_xgene.o libahci.o libahci_platform.o

diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index b51605a..0cd7c7a 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c

@@ -77,7 +77,7 @@
 static int acard_ahci_port_start(struct ata_port *ap);
 static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int acard_ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
 static int acard_ahci_pci_device_resume(struct pci_dev *pdev);
 #endif
@@ -118,13 +118,13 @@
 	.id_table		= acard_ahci_pci_tbl,
 	.probe			= acard_ahci_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= acard_ahci_pci_device_suspend,
 	.resume			= acard_ahci_pci_device_resume,
 #endif
 };
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int acard_ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 6070781..dae5607 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c

@@ -445,10 +445,14 @@
 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 */
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9192),
 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 on some Gigabyte */
+	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0),
+	  .driver_data = board_ahci_yes_fbs },
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x91a3),
 	  .driver_data = board_ahci_yes_fbs },
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230),
 	  .driver_data = board_ahci_yes_fbs },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642),
+	  .driver_data = board_ahci_yes_fbs },
 
 	/* Promise */
 	{ PCI_VDEVICE(PROMISE, 0x3f20), board_ahci },	/* PDC42819 */

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index af63c75..05882e4 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h

@@ -237,6 +237,7 @@
 						        error-handling stage) */
 	AHCI_HFLAG_MULTI_MSI		= (1 << 16), /* multiple PCI MSIs */
 	AHCI_HFLAG_NO_DEVSLP		= (1 << 17), /* no device sleep */
+	AHCI_HFLAG_NO_FBS		= (1 << 18), /* no FBS */
 
 	/* ap->flags bits */
 

diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 2c83613..2b77d53 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c

@@ -85,7 +85,8 @@
 
 	da850_sata_init(dev, pwrdn_reg, hpriv->mmio);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info,
+				     0, 0, 0);
 	if (rc)
 		goto disable_resources;
 

diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 8befeb6..3a90152 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c

@@ -432,7 +432,8 @@
 	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
 	writel(reg_val, hpriv->mmio + IMX_TIMER1MS);
 
-	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, 0, 0);
+	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info,
+				      0, 0, 0);
 	if (ret)
 		imx_sata_disable(hpriv);
 

diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
new file mode 100644
index 0000000..fd3dfd7
--- /dev/null
+++ b/drivers/ata/ahci_mvebu.c

@@ -0,0 +1,128 @@
+/*
+ * AHCI glue platform driver for Marvell EBU SOCs
+ *
+ * Copyright (C) 2014 Marvell
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ * Marcin Wojtas <mw@semihalf.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/ahci_platform.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include "ahci.h"
+
+#define AHCI_VENDOR_SPECIFIC_0_ADDR  0xa0
+#define AHCI_VENDOR_SPECIFIC_0_DATA  0xa4
+
+#define AHCI_WINDOW_CTRL(win)	(0x60 + ((win) << 4))
+#define AHCI_WINDOW_BASE(win)	(0x64 + ((win) << 4))
+#define AHCI_WINDOW_SIZE(win)	(0x68 + ((win) << 4))
+
+static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv,
+				   const struct mbus_dram_target_info *dram)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		writel(0, hpriv->mmio + AHCI_WINDOW_CTRL(i));
+		writel(0, hpriv->mmio + AHCI_WINDOW_BASE(i));
+		writel(0, hpriv->mmio + AHCI_WINDOW_SIZE(i));
+	}
+
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		writel((cs->mbus_attr << 8) |
+		       (dram->mbus_dram_target_id << 4) | 1,
+		       hpriv->mmio + AHCI_WINDOW_CTRL(i));
+		writel(cs->base, hpriv->mmio + AHCI_WINDOW_BASE(i));
+		writel(((cs->size - 1) & 0xffff0000),
+		       hpriv->mmio + AHCI_WINDOW_SIZE(i));
+	}
+}
+
+static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
+{
+	/*
+	 * Enable the regret bit to allow the SATA unit to regret a
+	 * request that didn't receive an acknowlegde and avoid a
+	 * deadlock
+	 */
+	writel(0x4, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_ADDR);
+	writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
+}
+
+static const struct ata_port_info ahci_mvebu_port_info = {
+	.flags	   = AHCI_FLAG_COMMON,
+	.pio_mask  = ATA_PIO4,
+	.udma_mask = ATA_UDMA6,
+	.port_ops  = &ahci_platform_ops,
+};
+
+static int ahci_mvebu_probe(struct platform_device *pdev)
+{
+	struct ahci_host_priv *hpriv;
+	const struct mbus_dram_target_info *dram;
+	int rc;
+
+	hpriv = ahci_platform_get_resources(pdev);
+	if (IS_ERR(hpriv))
+		return PTR_ERR(hpriv);
+
+	rc = ahci_platform_enable_resources(hpriv);
+	if (rc)
+		return rc;
+
+	dram = mv_mbus_dram_info();
+	if (!dram)
+		return -ENODEV;
+
+	ahci_mvebu_mbus_config(hpriv, dram);
+	ahci_mvebu_regret_option(hpriv);
+
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info,
+				     0, 0, 0);
+	if (rc)
+		goto disable_resources;
+
+	return 0;
+
+disable_resources:
+	ahci_platform_disable_resources(hpriv);
+	return rc;
+}
+
+static const struct of_device_id ahci_mvebu_of_match[] = {
+	{ .compatible = "marvell,armada-380-ahci", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match);
+
+/*
+ * We currently don't provide power management related operations,
+ * since there is no suspend/resume support at the platform level for
+ * Armada 38x for the moment.
+ */
+static struct platform_driver ahci_mvebu_driver = {
+	.probe = ahci_mvebu_probe,
+	.remove = ata_platform_remove_one,
+	.driver = {
+		.name = "ahci-mvebu",
+		.owner = THIS_MODULE,
+		.of_match_table = ahci_mvebu_of_match,
+	},
+};
+module_platform_driver(ahci_mvebu_driver);
+
+MODULE_DESCRIPTION("Marvell EBU AHCI SATA driver");
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>, Marcin Wojtas <mw@semihalf.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ahci_mvebu");

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index ef67e79..ebe505c 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c

@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/device.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/ahci_platform.h>
@@ -33,6 +34,7 @@
 	struct device *dev = &pdev->dev;
 	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv;
+	unsigned long hflags = 0;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -55,7 +57,11 @@
 			goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0);
+	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
+		hflags |= AHCI_HFLAG_NO_FBS;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto pdata_exit;
 
@@ -76,6 +82,7 @@
 	{ .compatible = "snps,exynos5440-ahci", },
 	{ .compatible = "ibm,476gtr-ahci", },
 	{ .compatible = "snps,dwc-ahci", },
+	{ .compatible = "hisilicon,hisi-ahci", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, ahci_of_match);

diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index 6332222..2595598 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c

@@ -166,7 +166,7 @@
 	if (err)
 		return err;
 
-	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0);
+	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0, 0);
 	if (err) {
 		ahci_platform_disable_resources(hpriv);
 		return err;

diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index 42d3f64..02002f1 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c

@@ -157,8 +157,6 @@
 }
 
 static const struct ata_port_info ahci_sunxi_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-			  AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask	= ATA_PIO4,
 	.udma_mask	= ATA_UDMA6,
@@ -169,6 +167,7 @@
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -185,7 +184,11 @@
 	if (rc)
 		goto disable_resources;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, 0, 0);
+	hflags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
+		 AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 

diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 77c89bf..042a9bb 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c

@@ -303,7 +303,6 @@
 };
 
 static const struct ata_port_info xgene_ahci_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask = ATA_PIO4,
 	.udma_mask = ATA_UDMA6,
@@ -382,6 +381,7 @@
 	struct ahci_host_priv *hpriv;
 	struct xgene_ahci_context *ctx;
 	struct resource *res;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -450,7 +450,10 @@
 		goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, 0, 0);
+	hflags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 

diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 9498a7d..9ff545c 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c

@@ -241,7 +241,7 @@
 	.id_table	= ata_generic,
 	.probe 		= ata_generic_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 6334c8d..893e30e 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c

@@ -830,7 +830,7 @@
 	return ap->ops->bmdma_status(ap) & ATA_DMA_INTR;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int piix_broken_suspend(void)
 {
 	static const struct dmi_system_id sysids[] = {
@@ -1767,7 +1767,7 @@
 	.id_table		= piix_pci_tbl,
 	.probe			= piix_init_one,
 	.remove			= piix_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= piix_pci_device_suspend,
 	.resume			= piix_pci_device_resume,
 #endif

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index b986145..40ea583 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c

@@ -464,6 +464,11 @@
 		cap |= HOST_CAP_FBS;
 	}
 
+	if ((cap & HOST_CAP_FBS) && (hpriv->flags & AHCI_HFLAG_NO_FBS)) {
+		dev_info(dev, "controller can't do FBS, turning off CAP_FBS\n");
+		cap &= ~HOST_CAP_FBS;
+	}
+
 	if (force_port_map && port_map != force_port_map) {
 		dev_info(dev, "forcing port_map 0x%x -> 0x%x\n",
 			 port_map, force_port_map);

diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 7cb3a85..3a5b4ed 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c

@@ -283,6 +283,7 @@
  * @pdev: platform device pointer for the host
  * @hpriv: ahci-host private data for the host
  * @pi_template: template for the ata_port_info to use
+ * @host_flags: ahci host flags used in ahci_host_priv
  * @force_port_map: param passed to ahci_save_initial_config
  * @mask_port_map: param passed to ahci_save_initial_config
  *
@@ -296,6 +297,7 @@
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map)
 {
@@ -312,7 +314,8 @@
 	}
 
 	/* prepare host */
-	hpriv->flags |= (unsigned long)pi.private_data;
+	pi.private_data = (void *)host_flags;
+	hpriv->flags |= host_flags;
 
 	ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map);
 

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index ef8567d..72691fd 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c

@@ -1993,7 +1993,11 @@
 	memcpy(rbuf, hdr, sizeof(hdr));
 	memcpy(&rbuf[8], "ATA     ", 8);
 	ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
-	ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
+
+	/* From SAT, use last 2 words from fw rev unless they are spaces */
+	ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV + 2, 4);
+	if (strncmp(&rbuf[32], "    ", 4) == 0)
+		ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
 
 	if (rbuf[32] == 0 || rbuf[32] == ' ')
 		memcpy(&rbuf[32], "n/a ", 4);

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index b603720..1121153 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c

@@ -2433,15 +2433,6 @@
 		mask = (1 << 2) | (1 << 0);
 		if ((tmp8 & mask) != mask)
 			legacy_mode = 1;
-#if defined(CONFIG_NO_ATA_LEGACY)
-		/* Some platforms with PCI limits cannot address compat
-		   port space. In that case we punt if their firmware has
-		   left a device in compatibility mode */
-		if (legacy_mode) {
-			printk(KERN_ERR "ata: Compatibility mode ATA is not supported on this platform, skipping.\n");
-			return -EOPNOTSUPP;
-		}
-#endif
 	}
 
 	if (!devres_open_group(dev, NULL, GFP_KERNEL))

diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index 5108b87..b70fce2 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c

@@ -265,7 +265,7 @@
 	.id_table		= pacpi_pci_tbl,
 	.probe			= pacpi_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 1b7b2cc..d19cd88 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c

@@ -589,7 +589,7 @@
 		return ata_pci_bmdma_init_one(pdev, ppi, &ali_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int ali_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -616,7 +616,7 @@
 	.id_table	= ali,
 	.probe 		= ali_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ali_reinit_one,
 #endif

diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c
index 1206fa6..8d4d959 100644
--- a/drivers/ata/pata_amd.c
+++ b/drivers/ata/pata_amd.c

@@ -574,7 +574,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &amd_sht, hpriv, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int amd_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -625,7 +625,7 @@
 	.id_table	= amd,
 	.probe 		= amd_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= amd_reinit_one,
 #endif

diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c
index 3aa4e65..96c05c9 100644
--- a/drivers/ata/pata_artop.c
+++ b/drivers/ata/pata_artop.c

@@ -422,7 +422,7 @@
 	{ }	/* terminate list */
 };
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int atp8xx_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -444,7 +444,7 @@
 	.id_table		= artop_pci_tbl,
 	.probe			= artop_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= atp8xx_reinit_one,
 #endif

diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index 30fa4ca..970f776 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c

@@ -298,7 +298,7 @@
 	.id_table	= atiixp,
 	.probe 		= atiixp_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.resume		= ata_pci_device_resume,
 	.suspend	= ata_pci_device_suspend,
 #endif

diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
index 7e73a0f..a705cfc 100644
--- a/drivers/ata/pata_atp867x.c
+++ b/drivers/ata/pata_atp867x.c

@@ -530,7 +530,7 @@
 	return rc;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int atp867x_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -558,7 +558,7 @@
 	.id_table 	= atp867x_pci_tbl,
 	.probe 		= atp867x_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= atp867x_reinit_one,
 #endif

diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index ba0d8a2..03f2f2b 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c

@@ -1619,7 +1619,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);

diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index 57f1be6..c47caa8 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c

@@ -231,7 +231,7 @@
 	return ata_pci_sff_init_one(pdev, ppi, &cmd640_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int cmd640_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -256,7 +256,7 @@
 	.id_table	= cmd640,
 	.probe 		= cmd640_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= cmd640_reinit_one,
 #endif

diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index 6bca350..13ca588 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c

@@ -487,7 +487,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &cmd64x_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int cmd64x_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -518,7 +518,7 @@
 	.id_table	= cmd64x,
 	.probe 		= cmd64x_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= cmd64x_reinit_one,
 #endif

diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index bcde4b7..d65cb9d 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c

@@ -229,7 +229,7 @@
 	return ata_host_register(host, &cs5520_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 /**
  *	cs5520_reinit_one	-	device resume
  *	@pdev: PCI device
@@ -278,7 +278,7 @@
 	pci_save_state(pdev);
 	return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 /* For now keep DMA off. We can set it for all but A rev CS5510 once the
    core ATA code can handle it */
@@ -295,7 +295,7 @@
 	.id_table	= pata_cs5520,
 	.probe 		= cs5520_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= cs5520_pci_device_suspend,
 	.resume		= cs5520_reinit_one,
 #endif

diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c
index 8afe854..48ae4b4 100644
--- a/drivers/ata/pata_cs5530.c
+++ b/drivers/ata/pata_cs5530.c

@@ -326,7 +326,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &cs5530_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int cs5530_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -343,7 +343,7 @@
 	ata_host_resume(host);
 	return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct pci_device_id cs5530[] = {
 	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_IDE), },
@@ -356,7 +356,7 @@
 	.id_table	= cs5530,
 	.probe 		= cs5530_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= cs5530_reinit_one,
 #endif

diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c
index 2c0986f..97584e8 100644
--- a/drivers/ata/pata_cs5535.c
+++ b/drivers/ata/pata_cs5535.c

@@ -200,7 +200,7 @@
 	.id_table	= cs5535,
 	.probe 		= cs5535_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index 32ddcae..6c15a55 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c

@@ -297,7 +297,7 @@
 	.id_table	= cs5536,
 	.probe		= cs5536_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c
index 3435bd6..7930184 100644
--- a/drivers/ata/pata_cypress.c
+++ b/drivers/ata/pata_cypress.c

@@ -151,7 +151,7 @@
 	.id_table	= cy82c693,
 	.probe 		= cy82c693_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index f440892..4a57a6f 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c

@@ -288,7 +288,7 @@
 	.id_table		= efar_pci_tbl,
 	.probe			= efar_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c
index cad9d45..6ad5c07 100644
--- a/drivers/ata/pata_ep93xx.c
+++ b/drivers/ata/pata_ep93xx.c

@@ -708,8 +708,8 @@
 	struct dma_chan *channel = qc->dma_dir == DMA_TO_DEVICE
 		? drv_data->dma_tx_channel : drv_data->dma_rx_channel;
 
-	txd = channel->device->device_prep_slave_sg(channel, qc->sg,
-		 qc->n_elem, qc->dma_dir, DMA_CTRL_ACK, NULL);
+	txd = dmaengine_prep_slave_sg(channel, qc->sg, qc->n_elem, qc->dma_dir,
+		DMA_CTRL_ACK);
 	if (!txd) {
 		dev_err(qc->ap->dev, "failed to prepare slave for sg dma\n");
 		return;

diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 8e76f79..cbc3de7 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c

@@ -386,7 +386,7 @@
 	return ata_pci_bmdma_init_one(dev, ppi, &hpt36x_sht, hpriv, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int hpt36x_reinit_one(struct pci_dev *dev)
 {
 	struct ata_host *host = pci_get_drvdata(dev);
@@ -411,7 +411,7 @@
 	.id_table	= hpt36x,
 	.probe		= hpt36x_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= hpt36x_reinit_one,
 #endif

diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index 255c5aa..d019cdd 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c

@@ -249,7 +249,7 @@
 				 IRQF_SHARED, &hpt3x3_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int hpt3x3_reinit_one(struct pci_dev *dev)
 {
 	struct ata_host *host = pci_get_drvdata(dev);
@@ -277,7 +277,7 @@
 	.id_table	= hpt3x3,
 	.probe 		= hpt3x3_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= hpt3x3_reinit_one,
 #endif

diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index e0872db..af42457 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c

@@ -185,7 +185,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int pata_imx_suspend(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
@@ -244,7 +244,7 @@
 		.name		= DRV_NAME,
 		.of_match_table	= imx_pata_dt_ids,
 		.owner		= THIS_MODULE,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 		.pm		= &pata_imx_pm_ops,
 #endif
 	},

diff --git a/drivers/ata/pata_it8213.c b/drivers/ata/pata_it8213.c
index 81369d1..4f97d1e 100644
--- a/drivers/ata/pata_it8213.c
+++ b/drivers/ata/pata_it8213.c

@@ -283,7 +283,7 @@
 	.id_table		= it8213_pci_tbl,
 	.probe			= it8213_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index dc3d787..a5088ec 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c

@@ -935,7 +935,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &it821x_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int it821x_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -965,7 +965,7 @@
 	.id_table	= it821x,
 	.probe 		= it821x_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= it821x_reinit_one,
 #endif

diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c
index b1cfa02..4d1a5d2 100644
--- a/drivers/ata/pata_jmicron.c
+++ b/drivers/ata/pata_jmicron.c

@@ -157,7 +157,7 @@
 	.id_table		= jmicron_pci_tbl,
 	.probe			= jmicron_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index c28d064..a02f76f 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c

@@ -845,8 +845,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int pata_macio_do_suspend(struct pata_macio_priv *priv, pm_message_t mesg)
 {
 	int rc;
@@ -907,8 +906,7 @@
 
 	return 0;
 }
-
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static struct scsi_host_template pata_macio_sht = {
 	ATA_BASE_SHT(DRV_NAME),
@@ -1208,8 +1206,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int pata_macio_suspend(struct macio_dev *mdev, pm_message_t mesg)
 {
 	struct ata_host *host = macio_get_drvdata(mdev);
@@ -1223,8 +1220,7 @@
 
 	return pata_macio_do_resume(host->private_data);
 }
-
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PMAC_MEDIABAY
 static void pata_macio_mb_event(struct macio_dev* mdev, int mb_state)
@@ -1316,8 +1312,7 @@
 	ata_host_detach(host);
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int pata_macio_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -1331,8 +1326,7 @@
 
 	return pata_macio_do_resume(host->private_data);
 }
-
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static struct of_device_id pata_macio_match[] =
 {
@@ -1360,7 +1354,7 @@
 	},
 	.probe		= pata_macio_attach,
 	.remove		= pata_macio_detach,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= pata_macio_suspend,
 	.resume		= pata_macio_resume,
 #endif
@@ -1383,7 +1377,7 @@
 	.id_table	= pata_macio_pci_match,
 	.probe		= pata_macio_pci_attach,
 	.remove		= pata_macio_pci_detach,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= pata_macio_pci_suspend,
 	.resume		= pata_macio_pci_resume,
 #endif

diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index 6bad3df..ae9feb1 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c

@@ -171,7 +171,7 @@
 	.id_table		= marvell_pci_tbl,
 	.probe			= marvell_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c
index 0024ced..ccd1c83 100644
--- a/drivers/ata/pata_mpc52xx.c
+++ b/drivers/ata/pata_mpc52xx.c

@@ -819,9 +819,7 @@
 	return 0;
 }
 
-
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int
 mpc52xx_ata_suspend(struct platform_device *op, pm_message_t state)
 {
@@ -847,10 +845,8 @@
 
 	return 0;
 }
-
 #endif
 
-
 static struct of_device_id mpc52xx_ata_of_match[] = {
 	{ .compatible = "fsl,mpc5200-ata", },
 	{ .compatible = "mpc5200-ata", },
@@ -861,7 +857,7 @@
 static struct platform_driver mpc52xx_ata_of_platform_driver = {
 	.probe		= mpc52xx_ata_probe,
 	.remove		= mpc52xx_ata_remove,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= mpc52xx_ata_suspend,
 	.resume		= mpc52xx_ata_resume,
 #endif

diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c
index f39a537..202b4d6 100644
--- a/drivers/ata/pata_mpiix.c
+++ b/drivers/ata/pata_mpiix.c

@@ -223,7 +223,7 @@
 	.id_table	= mpiix,
 	.probe 		= mpiix_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index e3b9709..0ea1833 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c

@@ -92,7 +92,7 @@
 	.id_table		= netcell_pci_tbl,
 	.probe			= netcell_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index 56201a6..efb272d 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c

@@ -152,8 +152,7 @@
 				 IRQF_SHARED, &ninja32_sht);
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int ninja32_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -183,7 +182,7 @@
 	.id_table	= ninja32,
 	.probe 		= ninja32_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ninja32_reinit_one,
 #endif

diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c
index 6154c3e..200e1eb 100644
--- a/drivers/ata/pata_ns87410.c
+++ b/drivers/ata/pata_ns87410.c

@@ -161,7 +161,7 @@
 	.id_table	= ns87410,
 	.probe 		= ns87410_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index d44df7c..84c6b22 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c

@@ -385,7 +385,7 @@
 	{ }	/* terminate list */
 };
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int ns87415_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -407,7 +407,7 @@
 	.id_table		= ns87415_pci_tbl,
 	.probe			= ns87415_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ns87415_reinit_one,
 #endif

diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 83c4ddb..2a97d3a 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c

@@ -865,7 +865,7 @@
 	if (node == NULL)
 		return -EINVAL;
 
-	cf_port = kzalloc(sizeof(*cf_port), GFP_KERNEL);
+	cf_port = devm_kzalloc(&pdev->dev, sizeof(*cf_port), GFP_KERNEL);
 	if (!cf_port)
 		return -ENOMEM;
 
@@ -881,10 +881,9 @@
 	n_size = of_n_size_cells(node);
 
 	reg_prop = of_find_property(node, "reg", &reg_len);
-	if (!reg_prop || reg_len < sizeof(__be32)) {
-		rv = -EINVAL;
-		goto free_cf_port;
-	}
+	if (!reg_prop || reg_len < sizeof(__be32))
+		return -EINVAL;
+
 	cs_num = reg_prop->value;
 	cf_port->cs0 = be32_to_cpup(cs_num);
 
@@ -901,16 +900,13 @@
 				res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0);
 				if (!res_dma) {
 					of_node_put(dma_node);
-					rv = -EINVAL;
-					goto free_cf_port;
+					return -EINVAL;
 				}
 				cf_port->dma_base = (u64)devm_ioremap_nocache(&pdev->dev, res_dma->start,
 									 resource_size(res_dma));
-
 				if (!cf_port->dma_base) {
 					of_node_put(dma_node);
-					rv = -EINVAL;
-					goto free_cf_port;
+					return -EINVAL;
 				}
 
 				irq_handler = octeon_cf_interrupt;
@@ -921,41 +917,34 @@
 			of_node_put(dma_node);
 		}
 		res_cs1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		if (!res_cs1) {
-			rv = -EINVAL;
-			goto free_cf_port;
-		}
+		if (!res_cs1)
+			return -EINVAL;
+
 		cs1 = devm_ioremap_nocache(&pdev->dev, res_cs1->start,
 					   resource_size(res_cs1));
-
 		if (!cs1)
-			goto free_cf_port;
+			return rv;
 
-		if (reg_len < (n_addr + n_size + 1) * sizeof(__be32)) {
-			rv = -EINVAL;
-			goto free_cf_port;
-		}
+		if (reg_len < (n_addr + n_size + 1) * sizeof(__be32))
+			return -EINVAL;
+
 		cs_num += n_addr + n_size;
 		cf_port->cs1 = be32_to_cpup(cs_num);
 	}
 
 	res_cs0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	if (!res_cs0) {
-		rv = -EINVAL;
-		goto free_cf_port;
-	}
+	if (!res_cs0)
+		return -EINVAL;
 
 	cs0 = devm_ioremap_nocache(&pdev->dev, res_cs0->start,
 				   resource_size(res_cs0));
-
 	if (!cs0)
-		goto free_cf_port;
+		return rv;
 
 	/* allocate host */
 	host = ata_host_alloc(&pdev->dev, 1);
 	if (!host)
-		goto free_cf_port;
+		return rv;
 
 	ap = host->ports[0];
 	ap->private_data = cf_port;
@@ -1020,17 +1009,12 @@
 
 	ata_port_desc(ap, "cmd %p ctl %p", base, ap->ioaddr.ctl_addr);
 
-
 	dev_info(&pdev->dev, "version " DRV_VERSION" %d bit%s.\n",
 		 is_16bit ? 16 : 8,
 		 cf_port->is_true_ide ? ", True IDE" : "");
 
 	return ata_host_activate(host, irq, irq_handler,
 				 IRQF_SHARED, &octeon_cf_sht);
-
-free_cf_port:
-	kfree(cf_port);
-	return rv;
 }
 
 static void octeon_cf_shutdown(struct device *dev)

diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c
index 319b644..b9bf78b 100644
--- a/drivers/ata/pata_oldpiix.c
+++ b/drivers/ata/pata_oldpiix.c

@@ -258,7 +258,7 @@
 	.id_table		= oldpiix_pci_tbl,
 	.probe			= oldpiix_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_opti.c b/drivers/ata/pata_opti.c
index fb042e0..3a944a0 100644
--- a/drivers/ata/pata_opti.c
+++ b/drivers/ata/pata_opti.c

@@ -184,7 +184,7 @@
 	.id_table	= opti,
 	.probe 		= opti_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_optidma.c b/drivers/ata/pata_optidma.c
index bb71ea2..bdec7ef 100644
--- a/drivers/ata/pata_optidma.c
+++ b/drivers/ata/pata_optidma.c

@@ -440,7 +440,7 @@
 	.id_table	= optidma,
 	.probe 		= optidma_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index 1151f23..4d06a5c 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c

@@ -62,7 +62,7 @@
 };
 
 static int pdc2027x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int pdc2027x_reinit_one(struct pci_dev *pdev);
 #endif
 static int pdc2027x_prereset(struct ata_link *link, unsigned long deadline);
@@ -128,7 +128,7 @@
 	.id_table		= pdc2027x_pci_tbl,
 	.probe			= pdc2027x_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= pdc2027x_reinit_one,
 #endif
@@ -761,7 +761,7 @@
 				 IRQF_SHARED, &pdc2027x_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int pdc2027x_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);

diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c
index defa050..9001991 100644
--- a/drivers/ata/pata_pdc202xx_old.c
+++ b/drivers/ata/pata_pdc202xx_old.c

@@ -377,7 +377,7 @@
 	.id_table	= pdc202xx,
 	.probe 		= pdc202xx_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c
index 0b46be1..35cb0e2 100644
--- a/drivers/ata/pata_piccolo.c
+++ b/drivers/ata/pata_piccolo.c

@@ -110,7 +110,7 @@
 	.id_table	= ata_tosh,
 	.probe 		= ata_tosh_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c
index be3f102..a3f1123d 100644
--- a/drivers/ata/pata_radisys.c
+++ b/drivers/ata/pata_radisys.c

@@ -237,7 +237,7 @@
 	.id_table		= radisys_pci_tbl,
 	.probe			= radisys_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_rdc.c b/drivers/ata/pata_rdc.c
index 521b213..9ce5952 100644
--- a/drivers/ata/pata_rdc.c
+++ b/drivers/ata/pata_rdc.c

@@ -382,7 +382,7 @@
 	.id_table		= rdc_pci_tbl,
 	.probe			= rdc_init_one,
 	.remove			= rdc_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index caedc90..b3ec18c 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c

@@ -101,7 +101,7 @@
 	return -ENODEV;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int rz1000_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -133,7 +133,7 @@
 	.id_table	= pata_rz1000,
 	.probe 		= rz1000_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= rz1000_reinit_one,
 #endif

diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index 0610e78..fb52883 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c

@@ -619,7 +619,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int pata_s3c_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -670,7 +670,7 @@
 	.driver		= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 		.pm	= &pata_s3c_pm_ops,
 #endif
 	},

diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index 96a232f..c71de5d 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c

@@ -254,7 +254,7 @@
 	.id_table	= sc1200,
 	.probe 		= sc1200_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index f1f5b5a..4e006d7 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c

@@ -1096,7 +1096,7 @@
 	.id_table		= scc_pci_tbl,
 	.probe			= scc_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_sch.c b/drivers/ata/pata_sch.c
index 5a1cde0..b920c34 100644
--- a/drivers/ata/pata_sch.c
+++ b/drivers/ata/pata_sch.c

@@ -64,7 +64,7 @@
 	.id_table		= sch_pci_tbl,
 	.probe			= sch_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index e27f31f..fc5f31d 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c

@@ -436,7 +436,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &serverworks_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int serverworks_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -468,7 +468,7 @@
 	.id_table	= serverworks,
 	.probe 		= serverworks_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= serverworks_reinit_one,
 #endif

diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 73fe362..f597edc 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c

@@ -403,7 +403,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &sil680_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sil680_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -429,7 +429,7 @@
 	.id_table	= sil680,
 	.probe 		= sil680_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= sil680_reinit_one,
 #endif

diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index 78d913a..626f989 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c

@@ -869,7 +869,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &sis_sht, chipset, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sis_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -899,7 +899,7 @@
 	.id_table		= sis_pci_tbl,
 	.probe			= sis_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= sis_reinit_one,
 #endif

diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c
index 900f0e4..4935f61 100644
--- a/drivers/ata/pata_sl82c105.c
+++ b/drivers/ata/pata_sl82c105.c

@@ -337,7 +337,7 @@
 	return ata_pci_bmdma_init_one(dev, ppi, &sl82c105_sht, NULL, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sl82c105_reinit_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -365,7 +365,7 @@
 	.id_table	= sl82c105,
 	.probe 		= sl82c105_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= sl82c105_reinit_one,
 #endif

diff --git a/drivers/ata/pata_triflex.c b/drivers/ata/pata_triflex.c
index 7bc78e2..d9364af 100644
--- a/drivers/ata/pata_triflex.c
+++ b/drivers/ata/pata_triflex.c

@@ -207,7 +207,7 @@
 	{ },
 };
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int triflex_ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -233,7 +233,7 @@
 	.id_table	= triflex,
 	.probe 		= triflex_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= triflex_ata_pci_device_suspend,
 	.resume		= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index f6c9632..1ca6bca 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c

@@ -659,7 +659,7 @@
 	return ata_pci_bmdma_init_one(pdev, ppi, &via_sht, (void *)config, 0);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 /**
  *	via_reinit_one		-	reinit after resume
  *	@pdev; PCI device
@@ -704,7 +704,7 @@
 	.id_table	= via,
 	.probe 		= via_init_one,
 	.remove		= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= via_reinit_one,
 #endif

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index fb0b40a..616a6d2 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c

@@ -774,20 +774,6 @@
 	VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
 	VPRINTK("CHBA  = 0x%x\n", ioread32(hcr_base + CHBA));
 
-#ifdef CONFIG_MPC8315_DS
-	/*
-	 * Workaround for 8315DS board 3gbps link-up issue,
-	 * currently limit SATA port to GEN1 speed
-	 */
-	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
-	temp &= ~(0xF << 4);
-	temp |= (0x1 << 4);
-	sata_fsl_scr_write(&ap->link, SCR_CONTROL, temp);
-
-	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
-	dev_warn(dev, "scr_control, speed limited to %x\n", temp);
-#endif
-
 	return 0;
 }
 
@@ -1588,7 +1574,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sata_fsl_suspend(struct platform_device *op, pm_message_t state)
 {
 	struct ata_host *host = platform_get_drvdata(op);
@@ -1644,7 +1630,7 @@
 	},
 	.probe		= sata_fsl_probe,
 	.remove		= sata_fsl_remove,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= sata_fsl_suspend,
 	.resume		= sata_fsl_resume,
 #endif

diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 5c54d95..0698278 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c

@@ -785,7 +785,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int inic_pci_device_resume(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -898,7 +898,7 @@
 static struct pci_driver inic_pci_driver = {
 	.name 		= DRV_NAME,
 	.id_table	= inic_pci_tbl,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= ata_pci_device_suspend,
 	.resume		= inic_pci_device_resume,
 #endif

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 05c8a44..391cfda 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c

@@ -4222,7 +4222,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int mv_platform_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);
@@ -4289,7 +4289,7 @@
 #ifdef CONFIG_PCI
 static int mv_pci_init_one(struct pci_dev *pdev,
 			   const struct pci_device_id *ent);
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int mv_pci_device_resume(struct pci_dev *pdev);
 #endif
 
@@ -4299,7 +4299,7 @@
 	.id_table		= mv_pci_tbl,
 	.probe			= mv_pci_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= mv_pci_device_resume,
 #endif
@@ -4457,7 +4457,7 @@
 				 IS_GEN_I(hpriv) ? &mv5_sht : &mv6_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int mv_pci_device_resume(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index ba5f271..cdf99fa 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c

@@ -295,7 +295,7 @@
 #define NV_ADMA_CHECK_INTR(GCTL, PORT) ((GCTL) & (1 << (19 + (12 * (PORT)))))
 
 static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int nv_pci_device_resume(struct pci_dev *pdev);
 #endif
 static void nv_ck804_host_stop(struct ata_host *host);
@@ -379,7 +379,7 @@
 	.name			= DRV_NAME,
 	.id_table		= nv_pci_tbl,
 	.probe			= nv_init_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= nv_pci_device_resume,
 #endif
@@ -2431,7 +2431,7 @@
 	return ata_pci_sff_activate_host(host, ipriv->irq_handler, ipriv->sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int nv_pci_device_resume(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);

diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 2b25bd8..61eb6d7 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c

@@ -937,7 +937,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sata_rcar_suspend(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
@@ -991,7 +991,7 @@
 		.name		= DRV_NAME,
 		.owner		= THIS_MODULE,
 		.of_match_table	= sata_rcar_match,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 		.pm		= &sata_rcar_pm_ops,
 #endif
 	},

diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 3062f86..40b76b2 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c

@@ -112,7 +112,7 @@
 };
 
 static int sil_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sil_pci_device_resume(struct pci_dev *pdev);
 #endif
 static void sil_dev_config(struct ata_device *dev);
@@ -166,7 +166,7 @@
 	.id_table		= sil_pci_tbl,
 	.probe			= sil_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= sil_pci_device_resume,
 #endif
@@ -802,7 +802,7 @@
 				 &sil_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sil_pci_device_resume(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);

diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index aa1051b..0534890 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c

@@ -353,8 +353,10 @@
 static void sil24_post_internal_cmd(struct ata_queued_cmd *qc);
 static int sil24_port_start(struct ata_port *ap);
 static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sil24_pci_device_resume(struct pci_dev *pdev);
+#endif
+#ifdef CONFIG_PM
 static int sil24_port_resume(struct ata_port *ap);
 #endif
 
@@ -375,7 +377,7 @@
 	.id_table		= sil24_pci_tbl,
 	.probe			= sil24_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= sil24_pci_device_resume,
 #endif
@@ -1350,7 +1352,7 @@
 				 &sil24_sht);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int sil24_pci_device_resume(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
@@ -1370,7 +1372,9 @@
 
 	return 0;
 }
+#endif
 
+#ifdef CONFIG_PM
 static int sil24_port_resume(struct ata_port *ap)
 {
 	sil24_config_pmp(ap, ap->nr_pmp_links);

diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index b513428..d1637ac 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c

@@ -82,7 +82,7 @@
 	.id_table		= sis_pci_tbl,
 	.probe			= sis_init_one,
 	.remove			= ata_pci_remove_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index f72e842..47bf894 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c

@@ -103,7 +103,7 @@
 	.name			= DRV_NAME,
 	.id_table		= svia_pci_tbl,
 	.probe			= svia_init_one,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.suspend		= ata_pci_device_suspend,
 	.resume			= ata_pci_device_resume,
 #endif

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 204814e..d4725fc 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c

@@ -2780,7 +2780,7 @@
 
 static int __init fore200e_module_init(void)
 {
-	int err;
+	int err = 0;
 
 	printk(FORE200E "FORE Systems 200E-series ATM driver - version " FORE200E_VERSION "\n");
 

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 1bdf104..b621f56 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c

@@ -2551,12 +2551,12 @@
 		timeout = 5 * 1000;
 		while (atomic_read(&vc->scq->used) > 0) {
 			timeout = msleep_interruptible(timeout);
-			if (!timeout)
+			if (!timeout) {
+				pr_warn("%s: SCQ drain timeout: %u used\n",
+					card->name, atomic_read(&vc->scq->used));
 				break;
+			}
 		}
-		if (!timeout)
-			printk("%s: SCQ drain timeout: %u used\n",
-			       card->name, atomic_read(&vc->scq->used));
 
 		writel(TCMDQ_HALT | vc->index, SAR_REG_TCMDQ);
 		clear_scd(card, vc->scq, vc->class);

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 343ffad..bf41296 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c

@@ -214,9 +214,6 @@
 		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
 			error, (unsigned long long)nsecs >> 10);
 	}
-
-	trace_device_pm_report_time(dev, info, nsecs, pm_verb(state.event),
-				    error);
 }
 
 /**
@@ -387,7 +384,9 @@
 	calltime = initcall_debug_start(dev);
 
 	pm_dev_dbg(dev, state, info);
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
 	initcall_debug_report(dev, calltime, error, state, info);
@@ -545,6 +544,7 @@
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 
@@ -587,6 +587,7 @@
 	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
 	cpuidle_resume();
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
 }
 
 /**
@@ -664,6 +665,7 @@
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 
@@ -703,6 +705,7 @@
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, "early");
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
 }
 
 /**
@@ -834,6 +837,7 @@
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -875,6 +879,7 @@
 	dpm_show_time(starttime, state, NULL);
 
 	cpufreq_resume();
+	trace_suspend_resume(TPS("dpm_resume"), state.event, false);
 }
 
 /**
@@ -913,7 +918,9 @@
 
 	if (callback) {
 		pm_dev_dbg(dev, state, info);
+		trace_device_pm_callback_start(dev, info, state.event);
 		callback(dev);
+		trace_device_pm_callback_end(dev, 0);
 	}
 
 	device_unlock(dev);
@@ -932,6 +939,7 @@
 {
 	struct list_head list;
 
+	trace_suspend_resume(TPS("dpm_complete"), state.event, true);
 	might_sleep();
 
 	INIT_LIST_HEAD(&list);
@@ -951,6 +959,7 @@
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
 /**
@@ -1086,6 +1095,7 @@
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
 	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
@@ -1126,6 +1136,7 @@
 	} else {
 		dpm_show_time(starttime, state, "noirq");
 	}
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
 	return error;
 }
 
@@ -1222,6 +1233,7 @@
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -1257,6 +1269,7 @@
 	} else {
 		dpm_show_time(starttime, state, "late");
 	}
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false);
 	return error;
 }
 
@@ -1295,7 +1308,9 @@
 
 	calltime = initcall_debug_start(dev);
 
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
 	initcall_debug_report(dev, calltime, error, state, info);
@@ -1461,6 +1476,7 @@
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
 	might_sleep();
 
 	cpufreq_suspend();
@@ -1498,6 +1514,7 @@
 		dpm_save_failed_step(SUSPEND_SUSPEND);
 	} else
 		dpm_show_time(starttime, state, NULL);
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
 	return error;
 }
 
@@ -1549,8 +1566,11 @@
 		callback = dev->driver->pm->prepare;
 	}
 
-	if (callback)
+	if (callback) {
+		trace_device_pm_callback_start(dev, info, state.event);
 		ret = callback(dev);
+		trace_device_pm_callback_end(dev, ret);
+	}
 
 	device_unlock(dev);
 
@@ -1582,6 +1602,7 @@
 {
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -1612,6 +1633,7 @@
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
 	return error;
 }
 

diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index e8d11b6..dbb8350 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c

@@ -10,6 +10,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <trace/events/power.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -49,6 +50,7 @@
 	struct syscore_ops *ops;
 	int ret = 0;
 
+	trace_suspend_resume(TPS("syscore_suspend"), 0, true);
 	pr_debug("Checking wakeup interrupts\n");
 
 	/* Return error code if there are any wakeup interrupts pending. */
@@ -70,6 +72,7 @@
 				"Interrupts enabled after %pF\n", ops->suspend);
 		}
 
+	trace_suspend_resume(TPS("syscore_suspend"), 0, false);
 	return 0;
 
  err_out:
@@ -92,6 +95,7 @@
 {
 	struct syscore_ops *ops;
 
+	trace_suspend_resume(TPS("syscore_resume"), 0, true);
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core resume.\n");
 
@@ -103,6 +107,7 @@
 			WARN_ONCE(!irqs_disabled(),
 				"Interrupts enabled after %pF\n", ops->resume);
 		}
+	trace_suspend_resume(TPS("syscore_resume"), 0, false);
 }
 EXPORT_SYMBOL_GPL(syscore_resume);
 #endif /* CONFIG_PM_SLEEP */

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 74abd49..295f3af 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c

@@ -39,6 +39,7 @@
 #include <../drivers/ata/ahci.h>
 #include <linux/export.h>
 #include <linux/debugfs.h>
+#include <linux/prefetch.h>
 #include "mtip32xx.h"
 
 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
@@ -2380,6 +2381,8 @@
 	/* Map the scatter list for DMA access */
 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
 
+	prefetch(&port->flags);
+
 	command->scatter_ents = nents;
 
 	/*
@@ -2392,7 +2395,7 @@
 	fis = command->command;
 	fis->type        = 0x27;
 	fis->opts        = 1 << 7;
-	if (rq_data_dir(rq) == READ)
+	if (dma_dir == DMA_FROM_DEVICE)
 		fis->command = ATA_CMD_FPDMA_READ;
 	else
 		fis->command = ATA_CMD_FPDMA_WRITE;
@@ -2412,7 +2415,7 @@
 	fis->res3        = 0;
 	fill_command_sg(dd, command, nents);
 
-	if (command->unaligned)
+	if (unlikely(command->unaligned))
 		fis->device |= 1 << 7;
 
 	/* Populate the command header */
@@ -2433,7 +2436,7 @@
 	 * To prevent this command from being issued
 	 * if an internal command is in progress or error handling is active.
 	 */
-	if (port->flags & MTIP_PF_PAUSE_IO) {
+	if (unlikely(port->flags & MTIP_PF_PAUSE_IO)) {
 		set_bit(rq->tag, port->cmds_to_issue);
 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
 		return;
@@ -3754,7 +3757,7 @@
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	if (!dd->unal_qdepth || rq_data_dir(rq) == READ)
+	if (rq_data_dir(rq) == READ || !dd->unal_qdepth)
 		return false;
 
 	/*
@@ -3776,11 +3779,11 @@
 {
 	int ret;
 
-	if (mtip_check_unal_depth(hctx, rq))
+	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
 	ret = mtip_submit_request(hctx, rq);
-	if (!ret)
+	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
 
 	rq->errors = ret;

diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 4b9b554..ba1b31e 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h

@@ -493,19 +493,19 @@
 
 	struct workqueue_struct *isr_workq;
 
-	struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
-
 	atomic_t irq_workers_active;
 
+	struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
+
 	int isr_binding;
 
 	struct block_device *bdev;
 
-	int unal_qdepth; /* qdepth of unaligned IO queue */
-
 	struct list_head online_list; /* linkage for online list */
 
 	struct list_head remove_list; /* linkage for removing list */
+
+	int unal_qdepth; /* qdepth of unaligned IO queue */
 };
 
 #endif

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a842c71..02351e2 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c

@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/nvme.h>
@@ -46,16 +42,26 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
-#define NVME_Q_DEPTH 1024
+#include <trace/events/block.h>
+
+#define NVME_Q_DEPTH		1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT	(60 * HZ)
-#define IOD_TIMEOUT	(4 * NVME_IO_TIMEOUT)
+#define ADMIN_TIMEOUT		(admin_timeout * HZ)
+#define IOD_TIMEOUT		(retry_time * HZ)
 
-unsigned char io_timeout = 30;
-module_param(io_timeout, byte, 0644);
+static unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 
+static unsigned char retry_time = 30;
+module_param(retry_time, byte, 0644);
+MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O");
+
 static int nvme_major;
 module_param(nvme_major, int, 0);
 
@@ -67,6 +73,7 @@
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
+static struct notifier_block nvme_nb;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
 
@@ -199,16 +206,13 @@
 #define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
-#define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
-#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
+#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
 		return;
-	if (ctx == CMD_CTX_FLUSH)
-		return;
 	if (ctx == CMD_CTX_ABORT) {
 		++nvmeq->dev->abort_limit;
 		return;
@@ -247,8 +251,9 @@
 	void *ctx;
 	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
 
-	if (cmdid >= nvmeq->q_depth) {
-		*fn = special_completion;
+	if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) {
+		if (fn)
+			*fn = special_completion;
 		return CMD_CTX_INVALID;
 	}
 	if (fn)
@@ -281,9 +286,17 @@
 
 static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
 {
+	struct nvme_queue *nvmeq;
 	unsigned queue_id = get_cpu_var(*dev->io_queue);
+
 	rcu_read_lock();
-	return rcu_dereference(dev->queues[queue_id]);
+	nvmeq = rcu_dereference(dev->queues[queue_id]);
+	if (nvmeq)
+		return nvmeq;
+
+	rcu_read_unlock();
+	put_cpu_var(*dev->io_queue);
+	return NULL;
 }
 
 static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -295,8 +308,15 @@
 static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
 							__acquires(RCU)
 {
+	struct nvme_queue *nvmeq;
+
 	rcu_read_lock();
-	return rcu_dereference(dev->queues[q_idx]);
+	nvmeq = rcu_dereference(dev->queues[q_idx]);
+	if (nvmeq)
+		return nvmeq;
+
+	rcu_read_unlock();
+	return NULL;
 }
 
 static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -387,25 +407,30 @@
 static void nvme_start_io_acct(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	const int rw = bio_data_dir(bio);
-	int cpu = part_stat_lock();
-	part_round_stats(cpu, &disk->part0);
-	part_stat_inc(cpu, &disk->part0, ios[rw]);
-	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
-	part_inc_in_flight(&disk->part0, rw);
-	part_stat_unlock();
+	if (blk_queue_io_stat(disk->queue)) {
+		const int rw = bio_data_dir(bio);
+		int cpu = part_stat_lock();
+		part_round_stats(cpu, &disk->part0);
+		part_stat_inc(cpu, &disk->part0, ios[rw]);
+		part_stat_add(cpu, &disk->part0, sectors[rw],
+							bio_sectors(bio));
+		part_inc_in_flight(&disk->part0, rw);
+		part_stat_unlock();
+	}
 }
 
 static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	const int rw = bio_data_dir(bio);
-	unsigned long duration = jiffies - start_time;
-	int cpu = part_stat_lock();
-	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
-	part_round_stats(cpu, &disk->part0);
-	part_dec_in_flight(&disk->part0, rw);
-	part_stat_unlock();
+	if (blk_queue_io_stat(disk->queue)) {
+		const int rw = bio_data_dir(bio);
+		unsigned long duration = jiffies - start_time;
+		int cpu = part_stat_lock();
+		part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+		part_round_stats(cpu, &disk->part0);
+		part_dec_in_flight(&disk->part0, rw);
+		part_stat_unlock();
+	}
 }
 
 static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -414,6 +439,7 @@
 	struct nvme_iod *iod = ctx;
 	struct bio *bio = iod->private;
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	int error = 0;
 
 	if (unlikely(status)) {
 		if (!(status & NVME_SC_DNR ||
@@ -426,6 +452,7 @@
 			wake_up(&nvmeq->sq_full);
 			return;
 		}
+		error = -EIO;
 	}
 	if (iod->nents) {
 		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
@@ -433,10 +460,9 @@
 		nvme_end_io_acct(bio, iod->start_time);
 	}
 	nvme_free_iod(nvmeq->dev, iod);
-	if (status)
-		bio_endio(bio, -EIO);
-	else
-		bio_endio(bio, 0);
+
+	trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error);
+	bio_endio(bio, error);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -525,6 +551,8 @@
 	if (!split)
 		return -ENOMEM;
 
+	trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+					split->bi_iter.bi_sector);
 	bio_chain(split, bio);
 
 	if (!waitqueue_active(&nvmeq->sq_full))
@@ -627,16 +655,6 @@
 	return 0;
 }
 
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
-{
-	int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
-					special_completion, NVME_IO_TIMEOUT);
-	if (unlikely(cmdid < 0))
-		return cmdid;
-
-	return nvme_submit_flush(nvmeq, ns, cmdid);
-}
-
 static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 {
 	struct bio *bio = iod->private;
@@ -652,7 +670,7 @@
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
+	if (bio->bi_rw & REQ_FLUSH)
 		return nvme_submit_flush(nvmeq, ns, cmdid);
 
 	control = 0;
@@ -686,6 +704,26 @@
 	return 0;
 }
 
+static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
+{
+	struct bio *split = bio_clone(bio, GFP_ATOMIC);
+	if (!split)
+		return -ENOMEM;
+
+	split->bi_iter.bi_size = 0;
+	split->bi_phys_segments = 0;
+	bio->bi_rw &= ~REQ_FLUSH;
+	bio_chain(split, bio);
+
+	if (!waitqueue_active(&nvmeq->sq_full))
+		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+	bio_list_add(&nvmeq->sq_cong, split);
+	bio_list_add(&nvmeq->sq_cong, bio);
+	wake_up_process(nvme_thread);
+
+	return 0;
+}
+
 /*
  * Called with local interrupts disabled and the q_lock held.  May not sleep.
  */
@@ -696,11 +734,8 @@
 	int psegs = bio_phys_segments(ns->queue, bio);
 	int result;
 
-	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-		result = nvme_submit_flush_data(nvmeq, ns);
-		if (result)
-			return result;
-	}
+	if ((bio->bi_rw & REQ_FLUSH) && psegs)
+		return nvme_split_flush_data(nvmeq, bio);
 
 	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
 	if (!iod)
@@ -795,7 +830,6 @@
 	int result = -EBUSY;
 
 	if (!nvmeq) {
-		put_nvmeq(NULL);
 		bio_endio(bio, -EIO);
 		return;
 	}
@@ -870,10 +904,8 @@
 	struct nvme_queue *nvmeq;
 
 	nvmeq = lock_nvmeq(dev, q_idx);
-	if (!nvmeq) {
-		unlock_nvmeq(nvmeq);
+	if (!nvmeq)
 		return -ENODEV;
-	}
 
 	cmdinfo.task = current;
 	cmdinfo.status = -EINTR;
@@ -898,9 +930,10 @@
 
 	if (cmdinfo.status == -EINTR) {
 		nvmeq = lock_nvmeq(dev, q_idx);
-		if (nvmeq)
+		if (nvmeq) {
 			nvme_abort_command(nvmeq, cmdid);
-		unlock_nvmeq(nvmeq);
+			unlock_nvmeq(nvmeq);
+		}
 		return -EINTR;
 	}
 
@@ -1358,7 +1391,8 @@
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
 			dev_err(&dev->pci_dev->dev,
-				"Device not ready; aborting initialisation\n");
+				"Device not ready; aborting %s\n", enabled ?
+						"initialisation" : "reset");
 			return -ENODEV;
 		}
 	}
@@ -1481,7 +1515,11 @@
 		goto put_pages;
 	}
 
+	err = -ENOMEM;
 	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
+	if (!iod)
+		goto put_pages;
+
 	sg = iod->sg;
 	sg_init_table(sg, count);
 	for (i = 0; i < count; i++) {
@@ -1494,7 +1532,6 @@
 	sg_mark_end(&sg[i - 1]);
 	iod->nents = count;
 
-	err = -ENOMEM;
 	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
 				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	if (!nents)
@@ -1894,6 +1931,8 @@
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -2062,8 +2101,13 @@
 
 	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
 								&result);
-	if (status)
-		return status < 0 ? -EIO : -EBUSY;
+	if (status < 0)
+		return status;
+	if (status > 0) {
+		dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
+									status);
+		return -EBUSY;
+	}
 	return min(result & 0xffff, result >> 16) + 1;
 }
 
@@ -2072,14 +2116,25 @@
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static void nvme_cpu_workfn(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
+	if (dev->initialized)
+		nvme_assign_io_queues(dev);
+}
+
 static int nvme_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
+	struct nvme_dev *dev;
+
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-		nvme_assign_io_queues(dev);
+		spin_lock(&dev_list_lock);
+		list_for_each_entry(dev, &dev_list, node)
+			schedule_work(&dev->cpu_work);
+		spin_unlock(&dev_list_lock);
 		break;
 	}
 	return NOTIFY_OK;
@@ -2148,11 +2203,6 @@
 	nvme_free_queues(dev, nr_io_queues + 1);
 	nvme_assign_io_queues(dev);
 
-	dev->nb.notifier_call = &nvme_cpu_notify;
-	result = register_hotcpu_notifier(&dev->nb);
-	if (result)
-		goto free_queues;
-
 	return 0;
 
  free_queues:
@@ -2184,6 +2234,7 @@
 
 	res = nvme_identify(dev, 0, 1, dma_addr);
 	if (res) {
+		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
 		res = -EIO;
 		goto out;
 	}
@@ -2192,6 +2243,7 @@
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
+	dev->vwc = ctrl->vwc;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2450,8 +2502,6 @@
 	int i;
 
 	dev->initialized = 0;
-	unregister_hotcpu_notifier(&dev->nb);
-
 	nvme_dev_list_remove(dev);
 
 	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@ -2722,6 +2772,7 @@
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
 	dev->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2801,6 +2852,7 @@
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->cpu_work);
 	misc_deregister(&dev->miscdev);
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
@@ -2889,11 +2941,18 @@
 	else if (result > 0)
 		nvme_major = result;
 
-	result = pci_register_driver(&nvme_driver);
+	nvme_nb.notifier_call = &nvme_cpu_notify;
+	result = register_hotcpu_notifier(&nvme_nb);
 	if (result)
 		goto unregister_blkdev;
+
+	result = pci_register_driver(&nvme_driver);
+	if (result)
+		goto unregister_hotcpu;
 	return 0;
 
+ unregister_hotcpu:
+	unregister_hotcpu_notifier(&nvme_nb);
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
  kill_workq:
@@ -2904,9 +2963,11 @@
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
+	unregister_hotcpu_notifier(&nvme_nb);
 	unregister_blkdev(nvme_major, "nvme");
 	destroy_workqueue(nvme_workq);
 	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
+	_nvme_check_size();
 }
 
 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 2c3f5be..a4cd6d6 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c

@@ -1,6 +1,6 @@
 /*
  * NVM Express device driver
- * Copyright (c) 2011, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 /*
@@ -243,8 +239,6 @@
 #define READ_CAP_16_RESP_SIZE				32
 
 /* NVMe Namespace and Command Defines */
-#define NVME_GET_SMART_LOG_PAGE				0x02
-#define NVME_GET_FEAT_TEMP_THRESH			0x04
 #define BYTES_TO_DWORDS					4
 #define NVME_MAX_FIRMWARE_SLOT				7
 
@@ -686,6 +680,7 @@
 	u8 resp_data_format = 0x02;
 	u8 protect;
 	u8 cmdque = 0x01 << 1;
+	u8 fw_offset = sizeof(dev->firmware_rev);
 
 	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
 				&dma_addr, GFP_KERNEL);
@@ -721,7 +716,11 @@
 	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
 	strncpy(&inq_response[8], "NVMe    ", 8);
 	strncpy(&inq_response[16], dev->model, 16);
-	strncpy(&inq_response[32], dev->firmware_rev, 4);
+
+	while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+		fw_offset--;
+	fw_offset -= 4;
+	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -1018,8 +1017,8 @@
 	c.common.opcode = nvme_admin_get_log_page;
 	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
 	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
+	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
+			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
 	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
@@ -1086,8 +1085,8 @@
 	c.common.opcode = nvme_admin_get_log_page;
 	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
 	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
+	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
+			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
 	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
@@ -1477,7 +1476,7 @@
 		goto out_dma;
 	}
 	id_ctrl = mem;
-	lowest_pow_st = id_ctrl->npss - 1;
+	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
 
 	switch (pc) {
 	case NVME_POWER_STATE_START_VALID:
@@ -1494,20 +1493,19 @@
 		break;
 	case NVME_POWER_STATE_IDLE:
 		/* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */
-		/* min of desired state and (lps-1) because lps is STOP */
 		if (pcmod == 0x0)
-			ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_1;
 		else if (pcmod == 0x1)
-			ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_2;
 		else if (pcmod == 0x2)
-			ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_3;
 		break;
 	case NVME_POWER_STATE_STANDBY:
 		/* Action unspecified if POWER CONDITION MODIFIER != [0,1] */
 		if (pcmod == 0x0)
-			ps_desired = max(0, (lowest_pow_st - 2));
+			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2));
 		else if (pcmod == 0x1)
-			ps_desired = max(0, (lowest_pow_st - 1));
+			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1));
 		break;
 	case NVME_POWER_STATE_LU_CONTROL:
 	default:

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ef166ad..758ac44 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c

@@ -704,6 +704,7 @@
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
 			     WRITE : READ, __GFP_WAIT);
+	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,7 +717,6 @@
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	if (cgc->quiet)
 		rq->cmd_flags |= REQ_QUIET;
 

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4c95b50..bbeb404 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -541,7 +541,6 @@
 		return -ENOENT;
 
 	(void) get_device(&rbd_dev->dev);
-	set_device_ro(bdev, rbd_dev->mapping.read_only);
 
 	return 0;
 }
@@ -559,10 +558,76 @@
 	put_device(&rbd_dev->dev);
 }
 
+static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
+{
+	int ret = 0;
+	int val;
+	bool ro;
+	bool ro_changed = false;
+
+	/* get_user() may sleep, so call it before taking rbd_dev->lock */
+	if (get_user(val, (int __user *)(arg)))
+		return -EFAULT;
+
+	ro = val ? true : false;
+	/* Snapshot doesn't allow to write*/
+	if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
+		return -EROFS;
+
+	spin_lock_irq(&rbd_dev->lock);
+	/* prevent others open this device */
+	if (rbd_dev->open_count > 1) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (rbd_dev->mapping.read_only != ro) {
+		rbd_dev->mapping.read_only = ro;
+		ro_changed = true;
+	}
+
+out:
+	spin_unlock_irq(&rbd_dev->lock);
+	/* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */
+	if (ret == 0 && ro_changed)
+		set_disk_ro(rbd_dev->disk, ro ? 1 : 0);
+
+	return ret;
+}
+
+static int rbd_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case BLKROSET:
+		ret = rbd_ioctl_set_ro(rbd_dev, arg);
+		break;
+	default:
+		ret = -ENOTTY;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode,
+				unsigned int cmd, unsigned long arg)
+{
+	return rbd_ioctl(bdev, mode, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
 static const struct block_device_operations rbd_bd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= rbd_open,
 	.release		= rbd_release,
+	.ioctl			= rbd_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl		= rbd_compat_ioctl,
+#endif
 };
 
 /*
@@ -1382,6 +1447,13 @@
 	kref_put(&obj_request->kref, rbd_obj_request_destroy);
 }
 
+static void rbd_img_request_get(struct rbd_img_request *img_request)
+{
+	dout("%s: img %p (was %d)\n", __func__, img_request,
+	     atomic_read(&img_request->kref.refcount));
+	kref_get(&img_request->kref);
+}
+
 static bool img_request_child_test(struct rbd_img_request *img_request);
 static void rbd_parent_request_destroy(struct kref *kref);
 static void rbd_img_request_destroy(struct kref *kref);
@@ -2142,6 +2214,7 @@
 	img_request->next_completion = which;
 out:
 	spin_unlock_irq(&img_request->completion_lock);
+	rbd_img_request_put(img_request);
 
 	if (!more)
 		rbd_img_request_complete(img_request);
@@ -2242,6 +2315,7 @@
 			goto out_unwind;
 		obj_request->osd_req = osd_req;
 		obj_request->callback = rbd_img_obj_callback;
+		rbd_img_request_get(img_request);
 
 		if (write_request) {
 			osd_req_op_alloc_hint_init(osd_req, which,
@@ -2872,56 +2946,55 @@
 }
 
 /*
- * Request sync osd watch/unwatch.  The value of "start" determines
- * whether a watch request is being initiated or torn down.
+ * Initiate a watch request, synchronously.
  */
-static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	int ret;
 
-	rbd_assert(start ^ !!rbd_dev->watch_event);
-	rbd_assert(start ^ !!rbd_dev->watch_request);
+	rbd_assert(!rbd_dev->watch_event);
+	rbd_assert(!rbd_dev->watch_request);
 
-	if (start) {
-		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
-						&rbd_dev->watch_event);
-		if (ret < 0)
-			return ret;
-		rbd_assert(rbd_dev->watch_event != NULL);
-	}
+	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+				     &rbd_dev->watch_event);
+	if (ret < 0)
+		return ret;
 
-	ret = -ENOMEM;
+	rbd_assert(rbd_dev->watch_event);
+
 	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
-							OBJ_REQUEST_NODATA);
-	if (!obj_request)
+					     OBJ_REQUEST_NODATA);
+	if (!obj_request) {
+		ret = -ENOMEM;
 		goto out_cancel;
+	}
 
 	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
 						  obj_request);
-	if (!obj_request->osd_req)
-		goto out_cancel;
+	if (!obj_request->osd_req) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
 
-	if (start)
-		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
-	else
-		ceph_osdc_unregister_linger_request(osdc,
-					rbd_dev->watch_request->osd_req);
+	ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
 
 	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
-				rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
+			      rbd_dev->watch_event->cookie, 0, 1);
 	rbd_osd_req_format_write(obj_request);
 
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
+
 	ret = rbd_obj_request_wait(obj_request);
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
+
 	ret = obj_request->result;
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
 
 	/*
 	 * A watch request is set to linger, so the underlying osd
@@ -2931,36 +3004,84 @@
 	 * it.  We'll drop that reference (below) after we've
 	 * unregistered it.
 	 */
-	if (start) {
-		rbd_dev->watch_request = obj_request;
+	rbd_dev->watch_request = obj_request;
 
-		return 0;
-	}
+	return 0;
 
-	/* We have successfully torn down the watch request */
-
-	rbd_obj_request_put(rbd_dev->watch_request);
-	rbd_dev->watch_request = NULL;
+out_linger:
+	ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req);
+out_put:
+	rbd_obj_request_put(obj_request);
 out_cancel:
-	/* Cancel the event if we're tearing down, or on error */
 	ceph_osdc_cancel_event(rbd_dev->watch_event);
 	rbd_dev->watch_event = NULL;
-	if (obj_request)
-		rbd_obj_request_put(obj_request);
 
 	return ret;
 }
 
-static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+/*
+ * Tear down a watch request, synchronously.
+ */
+static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
 {
-	return __rbd_dev_header_watch_sync(rbd_dev, true);
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	int ret;
+
+	rbd_assert(rbd_dev->watch_event);
+	rbd_assert(rbd_dev->watch_request);
+
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+					     OBJ_REQUEST_NODATA);
+	if (!obj_request) {
+		ret = -ENOMEM;
+		goto out_cancel;
+	}
+
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
+						  obj_request);
+	if (!obj_request->osd_req) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
+			      rbd_dev->watch_event->cookie, 0, 0);
+	rbd_osd_req_format_write(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out_put;
+
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out_put;
+
+	ret = obj_request->result;
+	if (ret)
+		goto out_put;
+
+	/* We have successfully torn down the watch request */
+
+	ceph_osdc_unregister_linger_request(osdc,
+					    rbd_dev->watch_request->osd_req);
+	rbd_obj_request_put(rbd_dev->watch_request);
+	rbd_dev->watch_request = NULL;
+
+out_put:
+	rbd_obj_request_put(obj_request);
+out_cancel:
+	ceph_osdc_cancel_event(rbd_dev->watch_event);
+	rbd_dev->watch_event = NULL;
+
+	return ret;
 }
 
 static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
 {
 	int ret;
 
-	ret = __rbd_dev_header_watch_sync(rbd_dev, false);
+	ret = __rbd_dev_header_unwatch_sync(rbd_dev);
 	if (ret) {
 		rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
 			 ret);
@@ -3058,7 +3179,6 @@
 		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct rbd_device *rbd_dev = q->queuedata;
-	bool read_only = rbd_dev->mapping.read_only;
 	struct request *rq;
 	int result;
 
@@ -3094,7 +3214,7 @@
 
 		if (write_request) {
 			result = -EROFS;
-			if (read_only)
+			if (rbd_dev->mapping.read_only)
 				goto end_request;
 			rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
 		}
@@ -4683,6 +4803,38 @@
 }
 
 /*
+ * Return pool id (>= 0) or a negative error code.
+ */
+static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
+{
+	u64 newest_epoch;
+	unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
+	int tries = 0;
+	int ret;
+
+again:
+	ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
+	if (ret == -ENOENT && tries++ < 1) {
+		ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
+					       &newest_epoch);
+		if (ret < 0)
+			return ret;
+
+		if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
+			ceph_monc_request_next_osdmap(&rbdc->client->monc);
+			(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
+						     newest_epoch, timeout);
+			goto again;
+		} else {
+			/* the osdmap we have is new enough */
+			return -ENOENT;
+		}
+	}
+
+	return ret;
+}
+
+/*
  * An rbd format 2 image has a unique identifier, distinct from the
  * name given to it by the user.  Internally, that identifier is
  * what's used to specify the names of objects related to the image.
@@ -4752,7 +4904,7 @@
 
 		image_id = ceph_extract_encoded_string(&p, p + ret,
 						NULL, GFP_NOIO);
-		ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
+		ret = PTR_ERR_OR_ZERO(image_id);
 		if (!ret)
 			rbd_dev->image_format = 2;
 	} else {
@@ -4907,6 +5059,7 @@
 	if (ret)
 		goto err_out_disk;
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
+	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
 	ret = rbd_bus_add_dev(rbd_dev);
 	if (ret)
@@ -5053,7 +5206,6 @@
 	struct rbd_options *rbd_opts = NULL;
 	struct rbd_spec *spec = NULL;
 	struct rbd_client *rbdc;
-	struct ceph_osd_client *osdc;
 	bool read_only;
 	int rc = -ENOMEM;
 
@@ -5075,8 +5227,7 @@
 	}
 
 	/* pick the pool */
-	osdc = &rbdc->client->osdc;
-	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
+	rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
 	if (rc < 0)
 		goto err_out_client;
 	spec->pool_id = (u64)rc;
@@ -5387,6 +5538,7 @@
 
 static void __exit rbd_exit(void)
 {
+	ida_destroy(&rbd_dev_id_ida);
 	rbd_sysfs_cleanup();
 	if (single_major)
 		unregister_blkdev(rbd_major, RBD_DRV_NAME);

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index a83b57e..f983806 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c

@@ -193,9 +193,10 @@
 	sent += 20;
 	count -= 20;
 
+	pipe = usb_sndbulkpipe(udev, 0x02);
+
 	while (count) {
 		size = min_t(uint, count, BULK_SIZE);
-		pipe = usb_sndbulkpipe(udev, 0x02);
 		memcpy(send_buf, firmware->data + sent, size);
 
 		err = usb_bulk_msg(udev, pipe, send_buf, size,

diff --git a/drivers/bluetooth/btmrvl_drv.h b/drivers/bluetooth/btmrvl_drv.h
index 7399303..dc79f88 100644
--- a/drivers/bluetooth/btmrvl_drv.h
+++ b/drivers/bluetooth/btmrvl_drv.h

@@ -59,6 +59,8 @@
 };
 
 struct btmrvl_adapter {
+	void *hw_regs_buf;
+	u8 *hw_regs;
 	u32 int_count;
 	struct sk_buff_head tx_queue;
 	u8 psmode;
@@ -140,7 +142,7 @@
 bool btmrvl_check_evtpkt(struct btmrvl_private *priv, struct sk_buff *skb);
 int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb);
 
-int btmrvl_send_module_cfg_cmd(struct btmrvl_private *priv, int subcmd);
+int btmrvl_send_module_cfg_cmd(struct btmrvl_private *priv, u8 subcmd);
 int btmrvl_send_hscfg_cmd(struct btmrvl_private *priv);
 int btmrvl_enable_ps(struct btmrvl_private *priv);
 int btmrvl_prepare_command(struct btmrvl_private *priv);

diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index 2c4997c..e9dbddb 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c

@@ -24,6 +24,7 @@
 #include <net/bluetooth/hci_core.h>
 
 #include "btmrvl_drv.h"
+#include "btmrvl_sdio.h"
 
 #define VERSION "1.0"
 
@@ -201,7 +202,7 @@
 	return 0;
 }
 
-int btmrvl_send_module_cfg_cmd(struct btmrvl_private *priv, int subcmd)
+int btmrvl_send_module_cfg_cmd(struct btmrvl_private *priv, u8 subcmd)
 {
 	int ret;
 
@@ -337,10 +338,25 @@
 
 static void btmrvl_init_adapter(struct btmrvl_private *priv)
 {
+	int buf_size;
+
 	skb_queue_head_init(&priv->adapter->tx_queue);
 
 	priv->adapter->ps_state = PS_AWAKE;
 
+	buf_size = ALIGN_SZ(SDIO_BLOCK_SIZE, BTSDIO_DMA_ALIGN);
+	priv->adapter->hw_regs_buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!priv->adapter->hw_regs_buf) {
+		priv->adapter->hw_regs = NULL;
+		BT_ERR("Unable to allocate buffer for hw_regs.");
+	} else {
+		priv->adapter->hw_regs =
+			(u8 *)ALIGN_ADDR(priv->adapter->hw_regs_buf,
+					 BTSDIO_DMA_ALIGN);
+		BT_DBG("hw_regs_buf=%p hw_regs=%p",
+		       priv->adapter->hw_regs_buf, priv->adapter->hw_regs);
+	}
+
 	init_waitqueue_head(&priv->adapter->cmd_wait_q);
 }
 
@@ -348,6 +364,7 @@
 {
 	skb_queue_purge(&priv->adapter->tx_queue);
 
+	kfree(priv->adapter->hw_regs_buf);
 	kfree(priv->adapter);
 
 	priv->adapter = NULL;

diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 1b52c9f..9dedca5 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c

@@ -64,6 +64,7 @@
 	.io_port_0 = 0x00,
 	.io_port_1 = 0x01,
 	.io_port_2 = 0x02,
+	.int_read_to_clear = false,
 };
 static const struct btmrvl_sdio_card_reg btmrvl_reg_87xx = {
 	.cfg = 0x00,
@@ -80,6 +81,7 @@
 	.io_port_0 = 0x78,
 	.io_port_1 = 0x79,
 	.io_port_2 = 0x7a,
+	.int_read_to_clear = false,
 };
 
 static const struct btmrvl_sdio_card_reg btmrvl_reg_88xx = {
@@ -97,6 +99,9 @@
 	.io_port_0 = 0xd8,
 	.io_port_1 = 0xd9,
 	.io_port_2 = 0xda,
+	.int_read_to_clear = true,
+	.host_int_rsr = 0x01,
+	.card_misc_cfg = 0xcc,
 };
 
 static const struct btmrvl_sdio_device btmrvl_sdio_sd8688 = {
@@ -667,6 +672,53 @@
 	return 0;
 }
 
+static int btmrvl_sdio_read_to_clear(struct btmrvl_sdio_card *card, u8 *ireg)
+{
+	struct btmrvl_adapter *adapter = card->priv->adapter;
+	int ret;
+
+	ret = sdio_readsb(card->func, adapter->hw_regs, 0, SDIO_BLOCK_SIZE);
+	if (ret) {
+		BT_ERR("sdio_readsb: read int hw_regs failed: %d", ret);
+		return ret;
+	}
+
+	*ireg = adapter->hw_regs[card->reg->host_intstatus];
+	BT_DBG("hw_regs[%#x]=%#x", card->reg->host_intstatus, *ireg);
+
+	return 0;
+}
+
+static int btmrvl_sdio_write_to_clear(struct btmrvl_sdio_card *card, u8 *ireg)
+{
+	int ret;
+
+	*ireg = sdio_readb(card->func, card->reg->host_intstatus, &ret);
+	if (ret) {
+		BT_ERR("sdio_readb: read int status failed: %d", ret);
+		return ret;
+	}
+
+	if (*ireg) {
+		/*
+		 * DN_LD_HOST_INT_STATUS and/or UP_LD_HOST_INT_STATUS
+		 * Clear the interrupt status register and re-enable the
+		 * interrupt.
+		 */
+		BT_DBG("int_status = 0x%x", *ireg);
+
+		sdio_writeb(card->func, ~(*ireg) & (DN_LD_HOST_INT_STATUS |
+						    UP_LD_HOST_INT_STATUS),
+			    card->reg->host_intstatus, &ret);
+		if (ret) {
+			BT_ERR("sdio_writeb: clear int status failed: %d", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static void btmrvl_sdio_interrupt(struct sdio_func *func)
 {
 	struct btmrvl_private *priv;
@@ -684,28 +736,13 @@
 
 	priv = card->priv;
 
-	ireg = sdio_readb(card->func, card->reg->host_intstatus, &ret);
-	if (ret) {
-		BT_ERR("sdio_readb: read int status register failed");
+	if (card->reg->int_read_to_clear)
+		ret = btmrvl_sdio_read_to_clear(card, &ireg);
+	else
+		ret = btmrvl_sdio_write_to_clear(card, &ireg);
+
+	if (ret)
 		return;
-	}
-
-	if (ireg != 0) {
-		/*
-		 * DN_LD_HOST_INT_STATUS and/or UP_LD_HOST_INT_STATUS
-		 * Clear the interrupt status register and re-enable the
-		 * interrupt.
-		 */
-		BT_DBG("ireg = 0x%x", ireg);
-
-		sdio_writeb(card->func, ~(ireg) & (DN_LD_HOST_INT_STATUS |
-					UP_LD_HOST_INT_STATUS),
-				card->reg->host_intstatus, &ret);
-		if (ret) {
-			BT_ERR("sdio_writeb: clear int status register failed");
-			return;
-		}
-	}
 
 	spin_lock_irqsave(&priv->driver_lock, flags);
 	sdio_ireg |= ireg;
@@ -777,6 +814,30 @@
 
 	BT_DBG("SDIO FUNC%d IO port: 0x%x", func->num, card->ioport);
 
+	if (card->reg->int_read_to_clear) {
+		reg = sdio_readb(func, card->reg->host_int_rsr, &ret);
+		if (ret < 0) {
+			ret = -EIO;
+			goto release_irq;
+		}
+		sdio_writeb(func, reg | 0x3f, card->reg->host_int_rsr, &ret);
+		if (ret < 0) {
+			ret = -EIO;
+			goto release_irq;
+		}
+
+		reg = sdio_readb(func, card->reg->card_misc_cfg, &ret);
+		if (ret < 0) {
+			ret = -EIO;
+			goto release_irq;
+		}
+		sdio_writeb(func, reg | 0x10, card->reg->card_misc_cfg, &ret);
+		if (ret < 0) {
+			ret = -EIO;
+			goto release_irq;
+		}
+	}
+
 	sdio_set_drvdata(func, card);
 
 	sdio_release_host(func);

diff --git a/drivers/bluetooth/btmrvl_sdio.h b/drivers/bluetooth/btmrvl_sdio.h
index 43d35a6..d4dd3b0 100644
--- a/drivers/bluetooth/btmrvl_sdio.h
+++ b/drivers/bluetooth/btmrvl_sdio.h

@@ -78,6 +78,9 @@
 	u8 io_port_0;
 	u8 io_port_1;
 	u8 io_port_2;
+	bool int_read_to_clear;
+	u8 host_int_rsr;
+	u8 card_misc_cfg;
 };
 
 struct btmrvl_sdio_card {

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a7dfbf9..a1c80b0 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c

@@ -49,6 +49,7 @@
 #define BTUSB_WRONG_SCO_MTU	0x40
 #define BTUSB_ATH3012		0x80
 #define BTUSB_INTEL		0x100
+#define BTUSB_BCM_PATCHRAM	0x200
 
 static const struct usb_device_id btusb_table[] = {
 	/* Generic Bluetooth USB device */
@@ -111,7 +112,8 @@
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0489, 0xff, 0x01, 0x01) },
 
 	/* Broadcom devices with vendor specific id */
-	{ USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) },
+	{ USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01),
+	  .driver_info = BTUSB_BCM_PATCHRAM },
 
 	/* Belkin F8065bf - Broadcom based */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x050d, 0xff, 0x01, 0x01) },
@@ -1381,6 +1383,154 @@
 	return 0;
 }
 
+static int btusb_setup_bcm_patchram(struct hci_dev *hdev)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+	struct usb_device *udev = data->udev;
+	char fw_name[64];
+	const struct firmware *fw;
+	const u8 *fw_ptr;
+	size_t fw_size;
+	const struct hci_command_hdr *cmd;
+	const u8 *cmd_param;
+	u16 opcode;
+	struct sk_buff *skb;
+	struct hci_rp_read_local_version *ver;
+	long ret;
+
+	snprintf(fw_name, sizeof(fw_name), "brcm/%s-%04x-%04x.hcd",
+		 udev->product ? udev->product : "BCM",
+		 le16_to_cpu(udev->descriptor.idVendor),
+		 le16_to_cpu(udev->descriptor.idProduct));
+
+	ret = request_firmware(&fw, fw_name, &hdev->dev);
+	if (ret < 0) {
+		BT_INFO("%s: BCM: patch %s not found", hdev->name,
+			fw_name);
+		return 0;
+	}
+
+	/* Reset */
+	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		BT_ERR("%s: HCI_OP_RESET failed (%ld)", hdev->name, ret);
+		goto done;
+	}
+	kfree_skb(skb);
+
+	/* Read Local Version Info */
+	skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL,
+			     HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		BT_ERR("%s: HCI_OP_READ_LOCAL_VERSION failed (%ld)",
+			hdev->name, ret);
+		goto done;
+	}
+
+	if (skb->len != sizeof(*ver)) {
+		BT_ERR("%s: HCI_OP_READ_LOCAL_VERSION event length mismatch",
+			hdev->name);
+		kfree_skb(skb);
+		ret = -EIO;
+		goto done;
+	}
+
+	ver = (struct hci_rp_read_local_version *) skb->data;
+	BT_INFO("%s: BCM: patching hci_ver=%02x hci_rev=%04x lmp_ver=%02x "
+		"lmp_subver=%04x", hdev->name, ver->hci_ver, ver->hci_rev,
+		ver->lmp_ver, ver->lmp_subver);
+	kfree_skb(skb);
+
+	/* Start Download */
+	skb = __hci_cmd_sync(hdev, 0xfc2e, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		BT_ERR("%s: BCM: Download Minidrv command failed (%ld)",
+			hdev->name, ret);
+		goto reset_fw;
+	}
+	kfree_skb(skb);
+
+	/* 50 msec delay after Download Minidrv completes */
+	msleep(50);
+
+	fw_ptr = fw->data;
+	fw_size = fw->size;
+
+	while (fw_size >= sizeof(*cmd)) {
+		cmd = (struct hci_command_hdr *) fw_ptr;
+		fw_ptr += sizeof(*cmd);
+		fw_size -= sizeof(*cmd);
+
+		if (fw_size < cmd->plen) {
+			BT_ERR("%s: BCM: patch %s is corrupted",
+				hdev->name, fw_name);
+			ret = -EINVAL;
+			goto reset_fw;
+		}
+
+		cmd_param = fw_ptr;
+		fw_ptr += cmd->plen;
+		fw_size -= cmd->plen;
+
+		opcode = le16_to_cpu(cmd->opcode);
+
+		skb = __hci_cmd_sync(hdev, opcode, cmd->plen, cmd_param,
+				     HCI_INIT_TIMEOUT);
+		if (IS_ERR(skb)) {
+			ret = PTR_ERR(skb);
+			BT_ERR("%s: BCM: patch command %04x failed (%ld)",
+				hdev->name, opcode, ret);
+			goto reset_fw;
+		}
+		kfree_skb(skb);
+	}
+
+	/* 250 msec delay after Launch Ram completes */
+	msleep(250);
+
+reset_fw:
+	/* Reset */
+	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		BT_ERR("%s: HCI_OP_RESET failed (%ld)", hdev->name, ret);
+		goto done;
+	}
+	kfree_skb(skb);
+
+	/* Read Local Version Info */
+	skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL,
+			     HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		BT_ERR("%s: HCI_OP_READ_LOCAL_VERSION failed (%ld)",
+			hdev->name, ret);
+		goto done;
+	}
+
+	if (skb->len != sizeof(*ver)) {
+		BT_ERR("%s: HCI_OP_READ_LOCAL_VERSION event length mismatch",
+			hdev->name);
+		kfree_skb(skb);
+		ret = -EIO;
+		goto done;
+	}
+
+	ver = (struct hci_rp_read_local_version *) skb->data;
+	BT_INFO("%s: BCM: firmware hci_ver=%02x hci_rev=%04x lmp_ver=%02x "
+		"lmp_subver=%04x", hdev->name, ver->hci_ver, ver->hci_rev,
+		ver->lmp_ver, ver->lmp_subver);
+	kfree_skb(skb);
+
+done:
+	release_firmware(fw);
+
+	return ret;
+}
+
 static int btusb_probe(struct usb_interface *intf,
 				const struct usb_device_id *id)
 {
@@ -1486,6 +1636,9 @@
 	if (id->driver_info & BTUSB_BCM92035)
 		hdev->setup = btusb_setup_bcm92035;
 
+	if (id->driver_info & BTUSB_BCM_PATCHRAM)
+		hdev->setup = btusb_setup_bcm_patchram;
+
 	if (id->driver_info & BTUSB_INTEL)
 		hdev->setup = btusb_setup_intel;
 

diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 7048a58..66db9a8 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c

@@ -55,13 +55,6 @@
 	struct sk_buff_head txq;
 };
 
-/* H4 receiver States */
-#define H4_W4_PACKET_TYPE	0
-#define H4_W4_EVENT_HDR		1
-#define H4_W4_ACL_HDR		2
-#define H4_W4_SCO_HDR		3
-#define H4_W4_DATA		4
-
 /* Initialize protocol */
 static int h4_open(struct hci_uart *hu)
 {

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 2a44767..898b84b 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c

@@ -2184,6 +2184,7 @@
 			ret = -ENOMEM;
 			break;
 		}
+		blk_rq_set_block_pc(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
@@ -2203,7 +2204,6 @@
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 2ce0e22..f3e7150 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c

@@ -25,88 +25,115 @@
 #include <linux/virtio_rng.h>
 #include <linux/module.h>
 
-static struct virtqueue *vq;
-static unsigned int data_avail;
-static DECLARE_COMPLETION(have_data);
-static bool busy;
+static DEFINE_IDA(rng_index_ida);
+
+struct virtrng_info {
+	struct virtio_device *vdev;
+	struct hwrng hwrng;
+	struct virtqueue *vq;
+	unsigned int data_avail;
+	struct completion have_data;
+	bool busy;
+	char name[25];
+	int index;
+};
 
 static void random_recv_done(struct virtqueue *vq)
 {
+	struct virtrng_info *vi = vq->vdev->priv;
+
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
-	if (!virtqueue_get_buf(vq, &data_avail))
+	if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
 		return;
 
-	complete(&have_data);
+	complete(&vi->have_data);
 }
 
 /* The host will fill any buffer we give it with sweet, sweet randomness. */
-static void register_buffer(u8 *buf, size_t size)
+static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
 {
 	struct scatterlist sg;
 
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	virtqueue_add_inbuf(vq, &sg, 1, buf, GFP_KERNEL);
+	virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
 
-	virtqueue_kick(vq);
+	virtqueue_kick(vi->vq);
 }
 
 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 {
 	int ret;
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
-	if (!busy) {
-		busy = true;
-		init_completion(&have_data);
-		register_buffer(buf, size);
+	if (!vi->busy) {
+		vi->busy = true;
+		init_completion(&vi->have_data);
+		register_buffer(vi, buf, size);
 	}
 
 	if (!wait)
 		return 0;
 
-	ret = wait_for_completion_killable(&have_data);
+	ret = wait_for_completion_killable(&vi->have_data);
 	if (ret < 0)
 		return ret;
 
-	busy = false;
+	vi->busy = false;
 
-	return data_avail;
+	return vi->data_avail;
 }
 
 static void virtio_cleanup(struct hwrng *rng)
 {
-	if (busy)
-		wait_for_completion(&have_data);
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
+
+	if (vi->busy)
+		wait_for_completion(&vi->have_data);
 }
 
-
-static struct hwrng virtio_hwrng = {
-	.name		= "virtio",
-	.cleanup	= virtio_cleanup,
-	.read		= virtio_read,
-};
-
 static int probe_common(struct virtio_device *vdev)
 {
-	int err;
+	int err, index;
+	struct virtrng_info *vi = NULL;
 
-	if (vq) {
-		/* We only support one device for now */
-		return -EBUSY;
+	vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL);
+	if (!vi)
+		return -ENOMEM;
+
+	vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL);
+	if (index < 0) {
+		kfree(vi);
+		return index;
 	}
+	sprintf(vi->name, "virtio_rng.%d", index);
+	init_completion(&vi->have_data);
+
+	vi->hwrng = (struct hwrng) {
+		.read = virtio_read,
+		.cleanup = virtio_cleanup,
+		.priv = (unsigned long)vi,
+		.name = vi->name,
+	};
+	vdev->priv = vi;
+
 	/* We expect a single virtqueue. */
-	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
-	if (IS_ERR(vq)) {
-		err = PTR_ERR(vq);
-		vq = NULL;
+	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
+	if (IS_ERR(vi->vq)) {
+		err = PTR_ERR(vi->vq);
+		vi->vq = NULL;
+		kfree(vi);
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
-	err = hwrng_register(&virtio_hwrng);
+	err = hwrng_register(&vi->hwrng);
 	if (err) {
 		vdev->config->del_vqs(vdev);
-		vq = NULL;
+		vi->vq = NULL;
+		kfree(vi);
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
@@ -115,11 +142,13 @@
 
 static void remove_common(struct virtio_device *vdev)
 {
+	struct virtrng_info *vi = vdev->priv;
 	vdev->config->reset(vdev);
-	busy = false;
-	hwrng_unregister(&virtio_hwrng);
+	vi->busy = false;
+	hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
-	vq = NULL;
+	ida_simple_remove(&rng_index_ida, vi->index);
+	kfree(vi);
 }
 
 static int virtrng_probe(struct virtio_device *vdev)

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 6e8d65e..0102dc7 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c

@@ -284,10 +284,10 @@
 #endif
 
 static const struct file_operations raw_fops = {
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= blkdev_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= blkdev_write_iter,
 	.fsync		= blkdev_fsync,
 	.open		= raw_open,
 	.release	= raw_release,

diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index 527a43d..3795fce 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c

@@ -116,9 +116,25 @@
 
 int __init am43xx_dt_clk_init(void)
 {
+	struct clk *clk1, *clk2;
+
 	ti_dt_clocks_register(am43xx_clks);
 
 	omap2_clk_disable_autoidle_all();
 
+	/*
+	 * cpsw_cpts_rft_clk  has got the choice of 3 clocksources
+	 * dpll_core_m4_ck, dpll_core_m5_ck and dpll_disp_m2_ck.
+	 * By default dpll_core_m4_ck is selected, witn this as clock
+	 * source the CPTS doesnot work properly. It gives clockcheck errors
+	 * while running PTP.
+	 * clockcheck: clock jumped backward or running slower than expected!
+	 * By selecting dpll_core_m5_ck as the clocksource fixes this issue.
+	 * In AM335x dpll_core_m5_ck is the default clocksource.
+	 */
+	clk1 = clk_get_sys(NULL, "cpsw_cpts_rft_clk");
+	clk2 = clk_get_sys(NULL, "dpll_core_m5_ck");
+	clk_set_parent(clk1, clk2);
+
 	return 0;
 }

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 1fbe11f..e473d65 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig

@@ -185,7 +185,7 @@
 
 config GENERIC_CPUFREQ_CPU0
 	tristate "Generic CPU0 cpufreq driver"
-	depends on HAVE_CLK && REGULATOR && OF && THERMAL && CPU_THERMAL
+	depends on HAVE_CLK && OF
 	select PM_OPP
 	help
 	  This adds a generic cpufreq driver for CPU0 frequency management.

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 36d20d0..ebac671 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm

@@ -5,8 +5,7 @@
 # big LITTLE core layer and glue drivers
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
-	depends on (BIG_LITTLE && ARM_CPU_TOPOLOGY) || (ARM64 && SMP)
-	depends on HAVE_CLK
+	depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK
 	select PM_OPP
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.

diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index 09b9129..ee1ae30 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c

@@ -104,7 +104,7 @@
 }
 
 static struct cpufreq_driver cpu0_cpufreq_driver = {
-	.flags = CPUFREQ_STICKY,
+	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.target_index = cpu0_set_target,
 	.get = cpufreq_generic_get,

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ae11dd5..aed2b0c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -1816,20 +1816,55 @@
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/* Must set freqs->new to intermediate frequency */
+static int __target_intermediate(struct cpufreq_policy *policy,
+				 struct cpufreq_freqs *freqs, int index)
+{
+	int ret;
+
+	freqs->new = cpufreq_driver->get_intermediate(policy, index);
+
+	/* We don't need to switch to intermediate freq */
+	if (!freqs->new)
+		return 0;
+
+	pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
+		 __func__, policy->cpu, freqs->old, freqs->new);
+
+	cpufreq_freq_transition_begin(policy, freqs);
+	ret = cpufreq_driver->target_intermediate(policy, index);
+	cpufreq_freq_transition_end(policy, freqs, ret);
+
+	if (ret)
+		pr_err("%s: Failed to change to intermediate frequency: %d\n",
+		       __func__, ret);
+
+	return ret;
+}
+
 static int __target_index(struct cpufreq_policy *policy,
 			  struct cpufreq_frequency_table *freq_table, int index)
 {
-	struct cpufreq_freqs freqs;
+	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
+	unsigned int intermediate_freq = 0;
 	int retval = -EINVAL;
 	bool notify;
 
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
-
 	if (notify) {
-		freqs.old = policy->cur;
-		freqs.new = freq_table[index].frequency;
-		freqs.flags = 0;
+		/* Handle switching to intermediate frequency */
+		if (cpufreq_driver->get_intermediate) {
+			retval = __target_intermediate(policy, &freqs, index);
+			if (retval)
+				return retval;
 
+			intermediate_freq = freqs.new;
+			/* Set old freq to intermediate */
+			if (intermediate_freq)
+				freqs.old = freqs.new;
+		}
+
+		freqs.new = freq_table[index].frequency;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1841,9 +1876,23 @@
 		pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
 		       retval);
 
-	if (notify)
+	if (notify) {
 		cpufreq_freq_transition_end(policy, &freqs, retval);
 
+		/*
+		 * Failed after setting to intermediate freq? Driver should have
+		 * reverted back to initial frequency and so should we. Check
+		 * here for intermediate_freq instead of get_intermediate, in
+		 * case we have't switched to intermediate freq at all.
+		 */
+		if (unlikely(retval && intermediate_freq)) {
+			freqs.old = intermediate_freq;
+			freqs.new = policy->restore_freq;
+			cpufreq_freq_transition_begin(policy, &freqs);
+			cpufreq_freq_transition_end(policy, &freqs, 0);
+		}
+	}
+
 	return retval;
 }
 
@@ -1875,6 +1924,9 @@
 	if (target_freq == policy->cur)
 		return 0;
 
+	/* Save last value to restore later on errors */
+	policy->restore_freq = policy->cur;
+
 	if (cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
 	else if (cpufreq_driver->target_index) {
@@ -2361,7 +2413,8 @@
 	    !(driver_data->setpolicy || driver_data->target_index ||
 		    driver_data->target) ||
 	     (driver_data->setpolicy && (driver_data->target_index ||
-		    driver_data->target)))
+		    driver_data->target)) ||
+	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e1c6433..1b44496 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c

@@ -36,14 +36,29 @@
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	struct cpufreq_policy *policy;
+	unsigned int sampling_rate;
 	unsigned int max_load = 0;
 	unsigned int ignore_nice;
 	unsigned int j;
 
-	if (dbs_data->cdata->governor == GOV_ONDEMAND)
+	if (dbs_data->cdata->governor == GOV_ONDEMAND) {
+		struct od_cpu_dbs_info_s *od_dbs_info =
+				dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+
+		/*
+		 * Sometimes, the ondemand governor uses an additional
+		 * multiplier to give long delays. So apply this multiplier to
+		 * the 'sampling_rate', so as to keep the wake-up-from-idle
+		 * detection logic a bit conservative.
+		 */
+		sampling_rate = od_tuners->sampling_rate;
+		sampling_rate *= od_dbs_info->rate_mult;
+
 		ignore_nice = od_tuners->ignore_nice_load;
-	else
+	} else {
+		sampling_rate = cs_tuners->sampling_rate;
 		ignore_nice = cs_tuners->ignore_nice_load;
+	}
 
 	policy = cdbs->cur_policy;
 
@@ -96,7 +111,46 @@
 		if (unlikely(!wall_time || wall_time < idle_time))
 			continue;
 
-		load = 100 * (wall_time - idle_time) / wall_time;
+		/*
+		 * If the CPU had gone completely idle, and a task just woke up
+		 * on this CPU now, it would be unfair to calculate 'load' the
+		 * usual way for this elapsed time-window, because it will show
+		 * near-zero load, irrespective of how CPU intensive that task
+		 * actually is. This is undesirable for latency-sensitive bursty
+		 * workloads.
+		 *
+		 * To avoid this, we reuse the 'load' from the previous
+		 * time-window and give this task a chance to start with a
+		 * reasonably high CPU frequency. (However, we shouldn't over-do
+		 * this copy, lest we get stuck at a high load (high frequency)
+		 * for too long, even when the current system load has actually
+		 * dropped down. So we perform the copy only once, upon the
+		 * first wake-up from idle.)
+		 *
+		 * Detecting this situation is easy: the governor's deferrable
+		 * timer would not have fired during CPU-idle periods. Hence
+		 * an unusually large 'wall_time' (as compared to the sampling
+		 * rate) indicates this scenario.
+		 *
+		 * prev_load can be zero in two cases and we must recalculate it
+		 * for both cases:
+		 * - during long idle intervals
+		 * - explicitly set to zero
+		 */
+		if (unlikely(wall_time > (2 * sampling_rate) &&
+			     j_cdbs->prev_load)) {
+			load = j_cdbs->prev_load;
+
+			/*
+			 * Perform a destructive copy, to ensure that we copy
+			 * the previous load only once, upon the first wake-up
+			 * from idle.
+			 */
+			j_cdbs->prev_load = 0;
+		} else {
+			load = 100 * (wall_time - idle_time) / wall_time;
+			j_cdbs->prev_load = load;
+		}
 
 		if (load > max_load)
 			max_load = load;
@@ -318,11 +372,18 @@
 		for_each_cpu(j, policy->cpus) {
 			struct cpu_dbs_common_info *j_cdbs =
 				dbs_data->cdata->get_cpu_cdbs(j);
+			unsigned int prev_load;
 
 			j_cdbs->cpu = j;
 			j_cdbs->cur_policy = policy;
 			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
 					       &j_cdbs->prev_cpu_wall, io_busy);
+
+			prev_load = (unsigned int)
+				(j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle);
+			j_cdbs->prev_load = 100 * prev_load /
+					(unsigned int) j_cdbs->prev_cpu_wall;
+
 			if (ignore_nice)
 				j_cdbs->prev_cpu_nice =
 					kcpustat_cpu(j).cpustat[CPUTIME_NICE];

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index bfb9ae1..cc401d1 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h

@@ -134,6 +134,13 @@
 	u64 prev_cpu_idle;
 	u64 prev_cpu_wall;
 	u64 prev_cpu_nice;
+	/*
+	 * Used to keep track of load in the previous interval. However, when
+	 * explicitly set to zero, it is used as a flag to ensure that we copy
+	 * the previous load to the current interval only once, upon the first
+	 * wake-up from idle.
+	 */
+	unsigned int prev_load;
 	struct cpufreq_policy *cur_policy;
 	struct delayed_work work;
 	/*

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index aebd457..4e7f492 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -691,14 +691,8 @@
 
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
-
-	const struct x86_cpu_id *id;
 	struct cpudata *cpu;
 
-	id = x86_match_cpu(intel_pstate_cpu_ids);
-	if (!id)
-		return -ENODEV;
-
 	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
 	if (!all_cpu_data[cpunum])
 		return -ENOMEM;

diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c
index 0af618a..3607070 100644
--- a/drivers/cpufreq/ppc-corenet-cpufreq.c
+++ b/drivers/cpufreq/ppc-corenet-cpufreq.c

@@ -138,7 +138,7 @@
 	struct cpufreq_frequency_table *table;
 	struct cpu_data *data;
 	unsigned int cpu = policy->cpu;
-	u64 transition_latency_hz;
+	u64 u64temp;
 
 	np = of_get_cpu_node(cpu, NULL);
 	if (!np)
@@ -206,9 +206,10 @@
 	for_each_cpu(i, per_cpu(cpu_mask, cpu))
 		per_cpu(cpu_data, i) = data;
 
-	transition_latency_hz = 12ULL * NSEC_PER_SEC;
-	policy->cpuinfo.transition_latency =
-		do_div(transition_latency_hz, fsl_get_sys_freq());
+	/* Minimum transition latency is 12 platform clocks */
+	u64temp = 12ULL * NSEC_PER_SEC;
+	do_div(u64temp, fsl_get_sys_freq());
+	policy->cpuinfo.transition_latency = u64temp + 1;
 
 	of_node_put(np);
 

diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra-cpufreq.c
index 6e774c6..8084c7f 100644
--- a/drivers/cpufreq/tegra-cpufreq.c
+++ b/drivers/cpufreq/tegra-cpufreq.c

@@ -45,46 +45,54 @@
 static struct clk *pll_x_clk;
 static struct clk *pll_p_clk;
 static struct clk *emc_clk;
+static bool pll_x_prepared;
 
-static int tegra_cpu_clk_set_rate(unsigned long rate)
+static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy,
+					   unsigned int index)
+{
+	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
+
+	/*
+	 * Don't switch to intermediate freq if:
+	 * - we are already at it, i.e. policy->cur == ifreq
+	 * - index corresponds to ifreq
+	 */
+	if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq))
+		return 0;
+
+	return ifreq;
+}
+
+static int tegra_target_intermediate(struct cpufreq_policy *policy,
+				     unsigned int index)
 {
 	int ret;
 
 	/*
 	 * Take an extra reference to the main pll so it doesn't turn
-	 * off when we move the cpu off of it
+	 * off when we move the cpu off of it as enabling it again while we
+	 * switch to it from tegra_target() would take additional time.
+	 *
+	 * When target-freq is equal to intermediate freq we don't need to
+	 * switch to an intermediate freq and so this routine isn't called.
+	 * Also, we wouldn't be using pll_x anymore and must not take extra
+	 * reference to it, as it can be disabled now to save some power.
 	 */
 	clk_prepare_enable(pll_x_clk);
 
 	ret = clk_set_parent(cpu_clk, pll_p_clk);
-	if (ret) {
-		pr_err("Failed to switch cpu to clock pll_p\n");
-		goto out;
-	}
+	if (ret)
+		clk_disable_unprepare(pll_x_clk);
+	else
+		pll_x_prepared = true;
 
-	if (rate == clk_get_rate(pll_p_clk))
-		goto out;
-
-	ret = clk_set_rate(pll_x_clk, rate);
-	if (ret) {
-		pr_err("Failed to change pll_x to %lu\n", rate);
-		goto out;
-	}
-
-	ret = clk_set_parent(cpu_clk, pll_x_clk);
-	if (ret) {
-		pr_err("Failed to switch cpu to clock pll_x\n");
-		goto out;
-	}
-
-out:
-	clk_disable_unprepare(pll_x_clk);
 	return ret;
 }
 
 static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	unsigned long rate = freq_table[index].frequency;
+	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
 	int ret = 0;
 
 	/*
@@ -98,10 +106,30 @@
 	else
 		clk_set_rate(emc_clk, 100000000);  /* emc 50Mhz */
 
-	ret = tegra_cpu_clk_set_rate(rate * 1000);
+	/*
+	 * target freq == pll_p, don't need to take extra reference to pll_x_clk
+	 * as it isn't used anymore.
+	 */
+	if (rate == ifreq)
+		return clk_set_parent(cpu_clk, pll_p_clk);
+
+	ret = clk_set_rate(pll_x_clk, rate * 1000);
+	/* Restore to earlier frequency on error, i.e. pll_x */
 	if (ret)
-		pr_err("cpu-tegra: Failed to set cpu frequency to %lu kHz\n",
-			rate);
+		pr_err("Failed to change pll_x to %lu\n", rate);
+
+	ret = clk_set_parent(cpu_clk, pll_x_clk);
+	/* This shouldn't fail while changing or restoring */
+	WARN_ON(ret);
+
+	/*
+	 * Drop count to pll_x clock only if we switched to intermediate freq
+	 * earlier while transitioning to a target frequency.
+	 */
+	if (pll_x_prepared) {
+		clk_disable_unprepare(pll_x_clk);
+		pll_x_prepared = false;
+	}
 
 	return ret;
 }
@@ -137,16 +165,18 @@
 }
 
 static struct cpufreq_driver tegra_cpufreq_driver = {
-	.flags		= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify		= cpufreq_generic_frequency_table_verify,
-	.target_index	= tegra_target,
-	.get		= cpufreq_generic_get,
-	.init		= tegra_cpu_init,
-	.exit		= tegra_cpu_exit,
-	.name		= "tegra",
-	.attr		= cpufreq_generic_attr,
+	.flags			= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify			= cpufreq_generic_frequency_table_verify,
+	.get_intermediate	= tegra_get_intermediate,
+	.target_intermediate	= tegra_target_intermediate,
+	.target_index		= tegra_target,
+	.get			= cpufreq_generic_get,
+	.init			= tegra_cpu_init,
+	.exit			= tegra_cpu_exit,
+	.name			= "tegra",
+	.attr			= cpufreq_generic_attr,
 #ifdef CONFIG_PM
-	.suspend	= cpufreq_generic_suspend,
+	.suspend		= cpufreq_generic_suspend,
 #endif
 };
 

diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index f04e25f..1b96fb9 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig

@@ -35,6 +35,11 @@
 source "drivers/cpuidle/Kconfig.arm"
 endmenu
 
+menu "MIPS CPU Idle Drivers"
+depends on MIPS
+source "drivers/cpuidle/Kconfig.mips"
+endmenu
+
 menu "POWERPC CPU Idle Drivers"
 depends on PPC
 source "drivers/cpuidle/Kconfig.powerpc"

diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
new file mode 100644
index 0000000..0e70ee2
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.mips

@@ -0,0 +1,17 @@
+#
+# MIPS CPU Idle Drivers
+#
+config MIPS_CPS_CPUIDLE
+	bool "CPU Idle driver for MIPS CPS platforms"
+	depends on CPU_IDLE
+	depends on SYS_SUPPORTS_MIPS_CPS
+	select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select MIPS_CPS_PM
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle for systems built around the MIPS Coherent
+	  Processing System (CPS) architecture. In order to make use of
+	  the deepest idle states you will need to ensure that you are
+	  also using the CONFIG_MIPS_CPS SMP implementation.

diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 9b5b2b5..d8bb1ff 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile

@@ -18,6 +18,10 @@
 obj-$(CONFIG_ARM_EXYNOS_CPUIDLE)        += cpuidle-exynos.o
 
 ###############################################################################
+# MIPS drivers
+obj-$(CONFIG_MIPS_CPS_CPUIDLE)		+= cpuidle-cps.o
+
+###############################################################################
 # POWERPC drivers
 obj-$(CONFIG_PSERIES_CPUIDLE)		+= cpuidle-pseries.o
 obj-$(CONFIG_POWERNV_CPUIDLE)		+= cpuidle-powernv.o

diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
new file mode 100644
index 0000000..fc7b627
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-cps.c

@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/init.h>
+
+#include <asm/idle.h>
+#include <asm/pm-cps.h>
+
+/* Enumeration of the various idle states this driver may enter */
+enum cps_idle_state {
+	STATE_WAIT = 0,		/* MIPS wait instruction, coherent */
+	STATE_NC_WAIT,		/* MIPS wait instruction, non-coherent */
+	STATE_CLOCK_GATED,	/* Core clock gated */
+	STATE_POWER_GATED,	/* Core power gated */
+	STATE_COUNT
+};
+
+static int cps_nc_enter(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int index)
+{
+	enum cps_pm_state pm_state;
+	int err;
+
+	/*
+	 * At least one core must remain powered up & clocked in order for the
+	 * system to have any hope of functioning.
+	 *
+	 * TODO: don't treat core 0 specially, just prevent the final core
+	 * TODO: remap interrupt affinity temporarily
+	 */
+	if (!cpu_data[dev->cpu].core && (index > STATE_NC_WAIT))
+		index = STATE_NC_WAIT;
+
+	/* Select the appropriate cps_pm_state */
+	switch (index) {
+	case STATE_NC_WAIT:
+		pm_state = CPS_PM_NC_WAIT;
+		break;
+	case STATE_CLOCK_GATED:
+		pm_state = CPS_PM_CLOCK_GATED;
+		break;
+	case STATE_POWER_GATED:
+		pm_state = CPS_PM_POWER_GATED;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+
+	/* Notify listeners the CPU is about to power down */
+	if ((pm_state == CPS_PM_POWER_GATED) && cpu_pm_enter())
+		return -EINTR;
+
+	/* Enter that state */
+	err = cps_pm_enter_state(pm_state);
+
+	/* Notify listeners the CPU is back up */
+	if (pm_state == CPS_PM_POWER_GATED)
+		cpu_pm_exit();
+
+	return err ?: index;
+}
+
+static struct cpuidle_driver cps_driver = {
+	.name			= "cpc_cpuidle",
+	.owner			= THIS_MODULE,
+	.states = {
+		[STATE_WAIT] = MIPS_CPUIDLE_WAIT_STATE,
+		[STATE_NC_WAIT] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 200,
+			.target_residency	= 450,
+			.flags	= CPUIDLE_FLAG_TIME_VALID,
+			.name	= "nc-wait",
+			.desc	= "non-coherent MIPS wait",
+		},
+		[STATE_CLOCK_GATED] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 300,
+			.target_residency	= 700,
+			.flags	= CPUIDLE_FLAG_TIME_VALID |
+				  CPUIDLE_FLAG_TIMER_STOP,
+			.name	= "clock-gated",
+			.desc	= "core clock gated",
+		},
+		[STATE_POWER_GATED] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 600,
+			.target_residency	= 1000,
+			.flags	= CPUIDLE_FLAG_TIME_VALID |
+				  CPUIDLE_FLAG_TIMER_STOP,
+			.name	= "power-gated",
+			.desc	= "core power gated",
+		},
+	},
+	.state_count		= STATE_COUNT,
+	.safe_state_index	= 0,
+};
+
+static void __init cps_cpuidle_unregister(void)
+{
+	int cpu;
+	struct cpuidle_device *device;
+
+	for_each_possible_cpu(cpu) {
+		device = &per_cpu(cpuidle_dev, cpu);
+		cpuidle_unregister_device(device);
+	}
+
+	cpuidle_unregister_driver(&cps_driver);
+}
+
+static int __init cps_cpuidle_init(void)
+{
+	int err, cpu, core, i;
+	struct cpuidle_device *device;
+
+	/* Detect supported states */
+	if (!cps_pm_support_state(CPS_PM_POWER_GATED))
+		cps_driver.state_count = STATE_CLOCK_GATED + 1;
+	if (!cps_pm_support_state(CPS_PM_CLOCK_GATED))
+		cps_driver.state_count = STATE_NC_WAIT + 1;
+	if (!cps_pm_support_state(CPS_PM_NC_WAIT))
+		cps_driver.state_count = STATE_WAIT + 1;
+
+	/* Inform the user if some states are unavailable */
+	if (cps_driver.state_count < STATE_COUNT) {
+		pr_info("cpuidle-cps: limited to ");
+		switch (cps_driver.state_count - 1) {
+		case STATE_WAIT:
+			pr_cont("coherent wait\n");
+			break;
+		case STATE_NC_WAIT:
+			pr_cont("non-coherent wait\n");
+			break;
+		case STATE_CLOCK_GATED:
+			pr_cont("clock gating\n");
+			break;
+		}
+	}
+
+	/*
+	 * Set the coupled flag on the appropriate states if this system
+	 * requires it.
+	 */
+	if (coupled_coherence)
+		for (i = STATE_NC_WAIT; i < cps_driver.state_count; i++)
+			cps_driver.states[i].flags |= CPUIDLE_FLAG_COUPLED;
+
+	err = cpuidle_register_driver(&cps_driver);
+	if (err) {
+		pr_err("Failed to register CPS cpuidle driver\n");
+		return err;
+	}
+
+	for_each_possible_cpu(cpu) {
+		core = cpu_data[cpu].core;
+		device = &per_cpu(cpuidle_dev, cpu);
+		device->cpu = cpu;
+#ifdef CONFIG_MIPS_MT
+		cpumask_copy(&device->coupled_cpus, &cpu_sibling_map[cpu]);
+#endif
+
+		err = cpuidle_register_device(device);
+		if (err) {
+			pr_err("Failed to register CPU%d cpuidle device\n",
+			       cpu);
+			goto err_out;
+		}
+	}
+
+	return 0;
+err_out:
+	cps_cpuidle_unregister();
+	return err;
+}
+device_initcall(cps_cpuidle_init);

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 719f6fb..74f5788 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c

@@ -73,12 +73,10 @@
 		return index;
 
 	new_lpcr = old_lpcr;
-	new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */
-
-	/* exit powersave upon external interrupt, but not decrementer
-	 * interrupt.
+	/* Do not exit powersave upon decrementer as we've setup the timer
+	 * offload.
 	 */
-	new_lpcr |= LPCR_PECE0;
+	new_lpcr &= ~LPCR_PECE1;
 
 	mtspr(SPRN_LPCR, new_lpcr);
 	power7_sleep();

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 136d6a2..9634f20 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c

@@ -187,8 +187,11 @@
 
 	t1 = ktime_get();
 	local_irq_enable();
-	while (!need_resched())
-		cpu_relax();
+	if (!current_set_polling_and_test()) {
+		while (!need_resched())
+			cpu_relax();
+	}
+	current_clr_polling();
 
 	t2 = ktime_get();
 	diff = ktime_to_us(ktime_sub(t2, t1));

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index f066fa2..02f177a 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig

@@ -313,7 +313,7 @@
 
 config CRYPTO_DEV_NX
 	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-	depends on PPC64 && IBMVIO
+	depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
 	default n
 	help
 	  Support for Power7+ in-Nest cryptographic acceleration.

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 5c58638..1eca7b9 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig

@@ -234,7 +234,7 @@
 
 config PCH_DMA
 	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
-	depends on PCI && X86
+	depends on PCI && (X86_32 || COMPILE_TEST)
 	select DMA_ENGINE
 	help
 	  Enable support for Intel EG20T PCH DMA engine.
@@ -269,7 +269,7 @@
 	select DMA_ENGINE
 	help
 	  Support the MXS DMA engine. This engine including APBH-DMA
-	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
+	  and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips.
 
 config EP93XX_DMA
 	bool "Cirrus Logic EP93xx DMA support"
@@ -361,6 +361,20 @@
 	  multiplexing capability for DMA request sources(slot).
 	  This module can be found on Freescale Vybrid and LS-1 SoCs.
 
+config XILINX_VDMA
+	tristate "Xilinx AXI VDMA Engine"
+	depends on (ARCH_ZYNQ || MICROBLAZE)
+	select DMA_ENGINE
+	help
+	  Enable support for Xilinx AXI VDMA Soft IP.
+
+	  This engine provides high-bandwidth direct memory access
+	  between memory and AXI4-Stream video type target
+	  peripherals including peripherals which support AXI4-
+	  Stream Video Protocol.  It has two stream interfaces/
+	  channels, Memory Mapped to Stream (MM2S) and Stream to
+	  Memory Mapped (S2MM) for the data transfers.
+
 config DMA_ENGINE
 	bool
 

diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 5150c82..c779e1e 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile

@@ -46,3 +46,4 @@
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
+obj-y += xilinx/

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 7a74076..a27ded5 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c

@@ -1493,6 +1493,13 @@
 	dw->regs = chip->regs;
 	chip->dw = dw;
 
+	dw->clk = devm_clk_get(chip->dev, "hclk");
+	if (IS_ERR(dw->clk))
+		return PTR_ERR(dw->clk);
+	err = clk_prepare_enable(dw->clk);
+	if (err)
+		return err;
+
 	dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
 	autocfg = dw_params >> DW_PARAMS_EN & 0x1;
 
@@ -1500,15 +1507,19 @@
 
 	if (!pdata && autocfg) {
 		pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
-			return -ENOMEM;
+		if (!pdata) {
+			err = -ENOMEM;
+			goto err_pdata;
+		}
 
 		/* Fill platform data with the default values */
 		pdata->is_private = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
-	} else if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
-		return -EINVAL;
+	} else if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+		err = -EINVAL;
+		goto err_pdata;
+	}
 
 	if (autocfg)
 		nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 0x7) + 1;
@@ -1517,13 +1528,10 @@
 
 	dw->chan = devm_kcalloc(chip->dev, nr_channels, sizeof(*dw->chan),
 				GFP_KERNEL);
-	if (!dw->chan)
-		return -ENOMEM;
-
-	dw->clk = devm_clk_get(chip->dev, "hclk");
-	if (IS_ERR(dw->clk))
-		return PTR_ERR(dw->clk);
-	clk_prepare_enable(dw->clk);
+	if (!dw->chan) {
+		err = -ENOMEM;
+		goto err_pdata;
+	}
 
 	/* Get hardware configuration parameters */
 	if (autocfg) {
@@ -1553,7 +1561,8 @@
 					 sizeof(struct dw_desc), 4, 0);
 	if (!dw->desc_pool) {
 		dev_err(chip->dev, "No memory for descriptors dma pool\n");
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto err_pdata;
 	}
 
 	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
@@ -1561,7 +1570,7 @@
 	err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED,
 			  "dw_dmac", dw);
 	if (err)
-		return err;
+		goto err_pdata;
 
 	INIT_LIST_HEAD(&dw->dma.channels);
 	for (i = 0; i < nr_channels; i++) {
@@ -1650,12 +1659,20 @@
 
 	dma_writel(dw, CFG, DW_CFG_DMA_EN);
 
+	err = dma_async_device_register(&dw->dma);
+	if (err)
+		goto err_dma_register;
+
 	dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n",
 		 nr_channels);
 
-	dma_async_device_register(&dw->dma);
-
 	return 0;
+
+err_dma_register:
+	free_irq(chip->irq, dw);
+err_pdata:
+	clk_disable_unprepare(dw->clk);
+	return err;
 }
 EXPORT_SYMBOL_GPL(dw_dma_probe);
 
@@ -1676,6 +1693,8 @@
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 	}
 
+	clk_disable_unprepare(dw->clk);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dw_dma_remove);

diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index fec59f1..39e30c3 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c

@@ -93,19 +93,13 @@
 	return dw_dma_resume(chip);
 };
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define dw_pci_suspend_late	NULL
-#define dw_pci_resume_early	NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops dw_pci_dev_pm_ops = {
-	.suspend_late = dw_pci_suspend_late,
-	.resume_early = dw_pci_resume_early,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early)
 };
 
-static DEFINE_PCI_DEVICE_TABLE(dw_pci_id_table) = {
+static const struct pci_device_id dw_pci_id_table[] = {
 	/* Medfield */
 	{ PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_pdata },
 	{ PCI_VDEVICE(INTEL, 0x0830), (kernel_ulong_t)&dw_pci_pdata },

diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 453822c..c5b339a 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c

@@ -256,7 +256,7 @@
 
 #ifdef CONFIG_PM_SLEEP
 
-static int dw_suspend_noirq(struct device *dev)
+static int dw_suspend_late(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
@@ -264,7 +264,7 @@
 	return dw_dma_suspend(chip);
 }
 
-static int dw_resume_noirq(struct device *dev)
+static int dw_resume_early(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
@@ -272,20 +272,10 @@
 	return dw_dma_resume(chip);
 }
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define dw_suspend_noirq	NULL
-#define dw_resume_noirq		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops dw_dev_pm_ops = {
-	.suspend_noirq = dw_suspend_noirq,
-	.resume_noirq = dw_resume_noirq,
-	.freeze_noirq = dw_suspend_noirq,
-	.thaw_noirq = dw_resume_noirq,
-	.restore_noirq = dw_resume_noirq,
-	.poweroff_noirq = dw_suspend_noirq,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early)
 };
 
 static struct platform_driver dw_driver = {

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index f157c6f..e0fec68 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c

@@ -61,6 +61,16 @@
 	return DMA_IN(chan, &chan->regs->sr, 32);
 }
 
+static void set_mr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->mr, val, 32);
+}
+
+static u32 get_mr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->mr, 32);
+}
+
 static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
 {
 	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
@@ -71,6 +81,11 @@
 	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
 }
 
+static void set_bcr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->bcr, val, 32);
+}
+
 static u32 get_bcr(struct fsldma_chan *chan)
 {
 	return DMA_IN(chan, &chan->regs->bcr, 32);
@@ -135,7 +150,7 @@
 static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
-	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+	set_mr(chan, 0);
 
 	switch (chan->feature & FSL_DMA_IP_MASK) {
 	case FSL_DMA_IP_85XX:
@@ -144,16 +159,15 @@
 		 * EOLNIE - End of links interrupt enable
 		 * BWC - Bandwidth sharing among channels
 		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
-				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32);
+		set_mr(chan, FSL_DMA_MR_BWC | FSL_DMA_MR_EIE
+			| FSL_DMA_MR_EOLNIE);
 		break;
 	case FSL_DMA_IP_83XX:
 		/* Set the channel to below modes:
 		 * EOTIE - End-of-transfer interrupt enable
 		 * PRC_RM - PCI read multiple
 		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
-				| FSL_DMA_MR_PRC_RM, 32);
+		set_mr(chan, FSL_DMA_MR_EOTIE | FSL_DMA_MR_PRC_RM);
 		break;
 	}
 }
@@ -175,10 +189,10 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
-		DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+		set_bcr(chan, 0);
 		mode |= FSL_DMA_MR_EMP_EN;
 	} else {
 		mode &= ~FSL_DMA_MR_EMP_EN;
@@ -191,7 +205,7 @@
 		mode |= FSL_DMA_MR_CS;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 static void dma_halt(struct fsldma_chan *chan)
@@ -200,7 +214,7 @@
 	int i;
 
 	/* read the mode register */
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	/*
 	 * The 85xx controller supports channel abort, which will stop
@@ -209,14 +223,14 @@
 	 */
 	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
 		mode |= FSL_DMA_MR_CA;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+		set_mr(chan, mode);
 
 		mode &= ~FSL_DMA_MR_CA;
 	}
 
 	/* stop the DMA controller */
 	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 
 	/* wait for the DMA controller to become idle */
 	for (i = 0; i < 100; i++) {
@@ -245,7 +259,7 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	switch (size) {
 	case 0:
@@ -259,7 +273,7 @@
 		break;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -277,7 +291,7 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	switch (size) {
 	case 0:
@@ -291,7 +305,7 @@
 		break;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -312,10 +326,10 @@
 
 	BUG_ON(size > 1024);
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 	mode |= (__ilog2(size) << 24) & 0x0f000000;
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -404,6 +418,19 @@
 }
 
 /**
+ * fsl_dma_free_descriptor - Free descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ * @desc: descriptor to be freed
+ */
+static void fsl_dma_free_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	list_del(&desc->node);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
  * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
  * @chan : Freescale DMA channel
  *
@@ -426,14 +453,107 @@
 	desc->async_tx.tx_submit = fsl_dma_tx_submit;
 	desc->async_tx.phys = pdesc;
 
-#ifdef FSL_DMA_LD_DEBUG
 	chan_dbg(chan, "LD %p allocated\n", desc);
-#endif
 
 	return desc;
 }
 
 /**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = get_mr(chan);
+		mode &= ~FSL_DMA_MR_CS;
+		set_mr(chan, mode);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+		chan_dbg(chan, "LD %p callback\n", desc);
+		txd->callback(txd->callback_param);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	dma_descriptor_unmap(txd);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, txd->phys);
+}
+
+/**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
  * @chan : Freescale DMA channel
  *
@@ -477,13 +597,8 @@
 {
 	struct fsl_desc_sw *desc, *_desc;
 
-	list_for_each_entry_safe(desc, _desc, list, node) {
-		list_del(&desc->node);
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p free\n", desc);
-#endif
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
-	}
+	list_for_each_entry_safe(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
 }
 
 static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
@@ -491,13 +606,8 @@
 {
 	struct fsl_desc_sw *desc, *_desc;
 
-	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
-		list_del(&desc->node);
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p free\n", desc);
-#endif
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
-	}
+	list_for_each_entry_safe_reverse(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
 }
 
 /**
@@ -520,35 +630,6 @@
 }
 
 static struct dma_async_tx_descriptor *
-fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
-{
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *new;
-
-	if (!dchan)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-
-	new = fsl_dma_alloc_descriptor(chan);
-	if (!new) {
-		chan_err(chan, "%s\n", msg_ld_oom);
-		return NULL;
-	}
-
-	new->async_tx.cookie = -EBUSY;
-	new->async_tx.flags = flags;
-
-	/* Insert the link descriptor to the LD ring */
-	list_add_tail(&new->node, &new->tx_list);
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	return &new->async_tx;
-}
-
-static struct dma_async_tx_descriptor *
 fsl_dma_prep_memcpy(struct dma_chan *dchan,
 	dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
@@ -817,105 +898,6 @@
 }
 
 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
- * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
- *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
- */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
-{
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
-
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-		txd->callback(txd->callback_param);
-	}
-
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
-
-	dma_descriptor_unmap(txd);
-#ifdef FSL_DMA_LD_DEBUG
-	chan_dbg(chan, "LD %p free\n", desc);
-#endif
-	dma_pool_free(chan->desc_pool, desc, txd->phys);
-}
-
-/**
- * fsl_chan_xfer_ld_queue - transfer any pending transactions
- * @chan : Freescale DMA channel
- *
- * HARDWARE STATE: idle
- * LOCKING: must hold chan->desc_lock
- */
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-
-	/*
-	 * If the list of pending descriptors is empty, then we
-	 * don't need to do any work at all
-	 */
-	if (list_empty(&chan->ld_pending)) {
-		chan_dbg(chan, "no pending LDs\n");
-		return;
-	}
-
-	/*
-	 * The DMA controller is not idle, which means that the interrupt
-	 * handler will start any queued transactions when it runs after
-	 * this transaction finishes
-	 */
-	if (!chan->idle) {
-		chan_dbg(chan, "DMA controller still busy\n");
-		return;
-	}
-
-	/*
-	 * If there are some link descriptors which have not been
-	 * transferred, we need to start the controller
-	 */
-
-	/*
-	 * Move all elements from the queue of pending transactions
-	 * onto the list of running transactions
-	 */
-	chan_dbg(chan, "idle, starting controller\n");
-	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
-	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
-
-	/*
-	 * The 85xx DMA controller doesn't clear the channel start bit
-	 * automatically at the end of a transfer. Therefore we must clear
-	 * it in software before starting the transfer.
-	 */
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		u32 mode;
-
-		mode = DMA_IN(chan, &chan->regs->mr, 32);
-		mode &= ~FSL_DMA_MR_CS;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
-	}
-
-	/*
-	 * Program the descriptor's address into the DMA controller,
-	 * then start the DMA transaction
-	 */
-	set_cdar(chan, desc->async_tx.phys);
-	get_cdar(chan);
-
-	dma_start(chan);
-	chan->idle = false;
-}
-
-/**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
  * @chan : Freescale DMA channel
  */
@@ -1304,12 +1286,10 @@
 	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
-	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
 	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
-	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
 	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 19041ce..1287146 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c

@@ -607,8 +607,6 @@
 
 		if (bd->mode.status & BD_RROR)
 			sdmac->status = DMA_ERROR;
-		else
-			sdmac->status = DMA_IN_PROGRESS;
 
 		bd->mode.status |= BD_DONE;
 		sdmac->buf_tail++;

diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index bf02e7b..a7b186d 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c

@@ -29,8 +29,8 @@
 #define DALGN		0x00a0
 #define DINT		0x00f0
 #define DDADR		0x0200
-#define DSADR		0x0204
-#define DTADR		0x0208
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
 #define DCMD		0x020c
 
 #define DCSR_RUN	BIT(31)	/* Run Bit (read / write) */
@@ -277,7 +277,7 @@
 		return;
 
 	/* clear the channel mapping in DRCMR */
-	reg = DRCMR(pchan->phy->vchan->drcmr);
+	reg = DRCMR(pchan->drcmr);
 	writel(0, pchan->phy->base + reg);
 
 	spin_lock_irqsave(&pdev->phy_lock, flags);
@@ -748,11 +748,92 @@
 	return 0;
 }
 
+static unsigned int mmp_pdma_residue(struct mmp_pdma_chan *chan,
+				     dma_cookie_t cookie)
+{
+	struct mmp_pdma_desc_sw *sw;
+	u32 curr, residue = 0;
+	bool passed = false;
+	bool cyclic = chan->cyclic_first != NULL;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	if (chan->dir == DMA_DEV_TO_MEM)
+		curr = readl(chan->phy->base + DTADR(chan->phy->idx));
+	else
+		curr = readl(chan->phy->base + DSADR(chan->phy->idx));
+
+	list_for_each_entry(sw, &chan->chain_running, node) {
+		u32 start, end, len;
+
+		if (chan->dir == DMA_DEV_TO_MEM)
+			start = sw->desc.dtadr;
+		else
+			start = sw->desc.dsadr;
+
+		len = sw->desc.dcmd & DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor which
+		 * lies inside the boundaries of the curr pointer. All
+		 * descriptors that occur in the list _after_ we found that
+		 * partially handled descriptor are still to be processed and
+		 * are hence added to the residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+
+		/*
+		 * Descriptors that have the ENDIRQEN bit set mark the end of a
+		 * transaction chain, and the cookie assigned with it has been
+		 * returned previously from mmp_pdma_tx_submit().
+		 *
+		 * In case we have multiple transactions in the running chain,
+		 * and the cookie does not match the one the user asked us
+		 * about, reset the state variables and start over.
+		 *
+		 * This logic does not apply to cyclic transactions, where all
+		 * descriptors have the ENDIRQEN bit set, and for which we
+		 * can't have multiple transactions on one channel anyway.
+		 */
+		if (cyclic || !(sw->desc.dcmd & DCMD_ENDIRQEN))
+			continue;
+
+		if (sw->async_tx.cookie == cookie) {
+			return residue;
+		} else {
+			residue = 0;
+			passed = false;
+		}
+	}
+
+	/* We should only get here in case of cyclic transactions */
+	return residue;
+}
+
 static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
 					  dma_cookie_t cookie,
 					  struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dchan, cookie, txstate);
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(ret != DMA_ERROR))
+		dma_set_residue(txstate, mmp_pdma_residue(chan, cookie));
+
+	return ret;
 }
 
 /**
@@ -858,8 +939,7 @@
 	struct mmp_pdma_chan *chan;
 	int ret;
 
-	chan = devm_kzalloc(pdev->dev, sizeof(struct mmp_pdma_chan),
-			    GFP_KERNEL);
+	chan = devm_kzalloc(pdev->dev, sizeof(*chan), GFP_KERNEL);
 	if (chan == NULL)
 		return -ENOMEM;
 
@@ -946,8 +1026,7 @@
 			irq_num++;
 	}
 
-	pdev->phy = devm_kcalloc(pdev->dev,
-				 dma_channels, sizeof(struct mmp_pdma_chan),
+	pdev->phy = devm_kcalloc(pdev->dev, dma_channels, sizeof(*pdev->phy),
 				 GFP_KERNEL);
 	if (pdev->phy == NULL)
 		return -ENOMEM;

diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 448750d..2ad4373 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c

@@ -2,6 +2,7 @@
  * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
  * Copyright (C) Semihalf 2009
  * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
  *
  * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
  * (defines, structures and comments) was taken from MPC5121 DMA driver
@@ -29,8 +30,18 @@
  */
 
 /*
- * This is initial version of MPC5121 DMA driver. Only memory to memory
- * transfers are supported (tested using dmatest module).
+ * MPC512x and MPC8308 DMA driver. It supports
+ * memory to memory data transfers (tested using dmatest module) and
+ * data transfers between memory and peripheral I/O memory
+ * by means of slave scatter/gather with these limitations:
+ *  - chunked transfers (described by s/g lists with more than one item)
+ *     are refused as long as proper support for scatter/gather is missing;
+ *  - transfers on MPC8308 always start from software as this SoC appears
+ *     not to have external request lines for peripheral flow control;
+ *  - only peripheral devices with 4-byte FIFO access register are supported;
+ *  - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently
+ *     source and destination addresses must be 4-byte aligned
+ *     and transfer size must be aligned on (4 * maxburst) boundary;
  */
 
 #include <linux/module.h>
@@ -52,9 +63,17 @@
 #define MPC_DMA_DESCRIPTORS	64
 
 /* Macro definitions */
-#define MPC_DMA_CHANNELS	64
 #define MPC_DMA_TCD_OFFSET	0x1000
 
+/*
+ * Maximum channel counts for individual hardware variants
+ * and the maximum channel count over all supported controllers,
+ * used for data structure size
+ */
+#define MPC8308_DMACHAN_MAX	16
+#define MPC512x_DMACHAN_MAX	64
+#define MPC_DMA_CHANNELS	64
+
 /* Arbitration mode of group and channel */
 #define MPC_DMA_DMACR_EDCG	(1 << 31)
 #define MPC_DMA_DMACR_ERGA	(1 << 3)
@@ -181,6 +200,7 @@
 	dma_addr_t			tcd_paddr;
 	int				error;
 	struct list_head		node;
+	int				will_access_peripheral;
 };
 
 struct mpc_dma_chan {
@@ -193,6 +213,12 @@
 	struct mpc_dma_tcd		*tcd;
 	dma_addr_t			tcd_paddr;
 
+	/* Settings for access to peripheral FIFO */
+	dma_addr_t			src_per_paddr;
+	u32				src_tcd_nunits;
+	dma_addr_t			dst_per_paddr;
+	u32				dst_tcd_nunits;
+
 	/* Lock for this structure */
 	spinlock_t			lock;
 };
@@ -243,8 +269,23 @@
 	struct mpc_dma_desc *mdesc;
 	int cid = mchan->chan.chan_id;
 
-	/* Move all queued descriptors to active list */
-	list_splice_tail_init(&mchan->queued, &mchan->active);
+	while (!list_empty(&mchan->queued)) {
+		mdesc = list_first_entry(&mchan->queued,
+						struct mpc_dma_desc, node);
+		/*
+		 * Grab either several mem-to-mem transfer descriptors
+		 * or one peripheral transfer descriptor,
+		 * don't mix mem-to-mem and peripheral transfer descriptors
+		 * within the same 'active' list.
+		 */
+		if (mdesc->will_access_peripheral) {
+			if (list_empty(&mchan->active))
+				list_move_tail(&mdesc->node, &mchan->active);
+			break;
+		} else {
+			list_move_tail(&mdesc->node, &mchan->active);
+		}
+	}
 
 	/* Chain descriptors into one transaction */
 	list_for_each_entry(mdesc, &mchan->active, node) {
@@ -270,7 +311,17 @@
 
 	if (first != prev)
 		mdma->tcd[cid].e_sg = 1;
-	out_8(&mdma->regs->dmassrt, cid);
+
+	if (mdma->is_mpc8308) {
+		/* MPC8308, no request lines, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	} else if (first->will_access_peripheral) {
+		/* Peripherals involved, start by external request signal */
+		out_8(&mdma->regs->dmaserq, cid);
+	} else {
+		/* Memory to memory transfer, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	}
 }
 
 /* Handle interrupt on one half of DMA controller (32 channels) */
@@ -588,6 +639,7 @@
 	}
 
 	mdesc->error = 0;
+	mdesc->will_access_peripheral = 0;
 	tcd = mdesc->tcd;
 
 	/* Prepare Transfer Control Descriptor for this transaction */
@@ -635,6 +687,193 @@
 	return &mdesc->desc;
 }
 
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	dma_addr_t per_paddr;
+	u32 tcd_nunits;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+	struct scatterlist *sg;
+	size_t len;
+	int iter, i;
+
+	/* Currently there is no proper support for scatter/gather */
+	if (sg_len != 1)
+		return NULL;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		spin_lock_irqsave(&mchan->lock, iflags);
+
+		mdesc = list_first_entry(&mchan->free,
+						struct mpc_dma_desc, node);
+		if (!mdesc) {
+			spin_unlock_irqrestore(&mchan->lock, iflags);
+			/* Try to free completed descriptors */
+			mpc_dma_process_completed(mdma);
+			return NULL;
+		}
+
+		list_del(&mdesc->node);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			per_paddr = mchan->src_per_paddr;
+			tcd_nunits = mchan->src_tcd_nunits;
+		} else {
+			per_paddr = mchan->dst_per_paddr;
+			tcd_nunits = mchan->dst_tcd_nunits;
+		}
+
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+
+		if (per_paddr == 0 || tcd_nunits == 0)
+			goto err_prep;
+
+		mdesc->error = 0;
+		mdesc->will_access_peripheral = 1;
+
+		/* Prepare Transfer Control Descriptor for this transaction */
+		tcd = mdesc->tcd;
+
+		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+		if (!IS_ALIGNED(sg_dma_address(sg), 4))
+			goto err_prep;
+
+		if (direction == DMA_DEV_TO_MEM) {
+			tcd->saddr = per_paddr;
+			tcd->daddr = sg_dma_address(sg);
+			tcd->soff = 0;
+			tcd->doff = 4;
+		} else {
+			tcd->saddr = sg_dma_address(sg);
+			tcd->daddr = per_paddr;
+			tcd->soff = 4;
+			tcd->doff = 0;
+		}
+
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+
+		len = sg_dma_len(sg);
+		tcd->nbytes = tcd_nunits * 4;
+		if (!IS_ALIGNED(len, tcd->nbytes))
+			goto err_prep;
+
+		iter = len / tcd->nbytes;
+		if (iter >= 1 << 15) {
+			/* len is too big */
+			goto err_prep;
+		}
+		/* citer_linkch contains the high bits of iter */
+		tcd->biter = iter & 0x1ff;
+		tcd->biter_linkch = iter >> 9;
+		tcd->citer = tcd->biter;
+		tcd->citer_linkch = tcd->biter_linkch;
+
+		tcd->e_sg = 0;
+		tcd->d_req = 1;
+
+		/* Place descriptor in prepared list */
+		spin_lock_irqsave(&mchan->lock, iflags);
+		list_add_tail(&mdesc->node, &mchan->prepared);
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+	}
+
+	return &mdesc->desc;
+
+err_prep:
+	/* Put the descriptor back */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return NULL;
+}
+
+static int mpc_dma_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+							unsigned long arg)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma *mdma;
+	struct dma_slave_config *cfg;
+	unsigned long flags;
+
+	mchan = dma_chan_to_mpc_dma_chan(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Disable channel requests */
+		mdma = dma_chan_to_mpc_dma(chan);
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		out_8(&mdma->regs->dmacerq, chan->chan_id);
+		list_splice_tail_init(&mchan->prepared, &mchan->free);
+		list_splice_tail_init(&mchan->queued, &mchan->free);
+		list_splice_tail_init(&mchan->active, &mchan->free);
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		/*
+		 * Software constraints:
+		 *  - only transfers between a peripheral device and
+		 *     memory are supported;
+		 *  - only peripheral devices with 4-byte FIFO access register
+		 *     are supported;
+		 *  - minimal transfer chunk is 4 bytes and consequently
+		 *     source and destination addresses must be 4-byte aligned
+		 *     and transfer size must be aligned on (4 * maxburst)
+		 *     boundary;
+		 *  - during the transfer RAM address is being incremented by
+		 *     the size of minimal transfer chunk;
+		 *  - peripheral port's address is constant during the transfer.
+		 */
+
+		cfg = (void *)arg;
+
+		if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
+		    cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
+		    !IS_ALIGNED(cfg->src_addr, 4) ||
+		    !IS_ALIGNED(cfg->dst_addr, 4)) {
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		mchan->src_per_paddr = cfg->src_addr;
+		mchan->src_tcd_nunits = cfg->src_maxburst;
+		mchan->dst_per_paddr = cfg->dst_addr;
+		mchan->dst_tcd_nunits = cfg->dst_maxburst;
+
+		/* Apply defaults */
+		if (mchan->src_tcd_nunits == 0)
+			mchan->src_tcd_nunits = 1;
+		if (mchan->dst_tcd_nunits == 0)
+			mchan->dst_tcd_nunits = 1;
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+
+	default:
+		/* Unknown command */
+		break;
+	}
+
+	return -ENXIO;
+}
+
 static int mpc_dma_probe(struct platform_device *op)
 {
 	struct device_node *dn = op->dev.of_node;
@@ -649,13 +888,15 @@
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
 		dev_err(dev, "Memory exhausted!\n");
-		return -ENOMEM;
+		retval = -ENOMEM;
+		goto err;
 	}
 
 	mdma->irq = irq_of_parse_and_map(dn, 0);
 	if (mdma->irq == NO_IRQ) {
 		dev_err(dev, "Error mapping IRQ!\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto err;
 	}
 
 	if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
@@ -663,14 +904,15 @@
 		mdma->irq2 = irq_of_parse_and_map(dn, 1);
 		if (mdma->irq2 == NO_IRQ) {
 			dev_err(dev, "Error mapping IRQ!\n");
-			return -EINVAL;
+			retval = -EINVAL;
+			goto err_dispose1;
 		}
 	}
 
 	retval = of_address_to_resource(dn, 0, &res);
 	if (retval) {
 		dev_err(dev, "Error parsing memory region!\n");
-		return retval;
+		goto err_dispose2;
 	}
 
 	regs_start = res.start;
@@ -678,31 +920,34 @@
 
 	if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
 		dev_err(dev, "Error requesting memory region!\n");
-		return -EBUSY;
+		retval = -EBUSY;
+		goto err_dispose2;
 	}
 
 	mdma->regs = devm_ioremap(dev, regs_start, regs_size);
 	if (!mdma->regs) {
 		dev_err(dev, "Error mapping memory region!\n");
-		return -ENOMEM;
+		retval = -ENOMEM;
+		goto err_dispose2;
 	}
 
 	mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
 							+ MPC_DMA_TCD_OFFSET);
 
-	retval = devm_request_irq(dev, mdma->irq, &mpc_dma_irq, 0, DRV_NAME,
-									mdma);
+	retval = request_irq(mdma->irq, &mpc_dma_irq, 0, DRV_NAME, mdma);
 	if (retval) {
 		dev_err(dev, "Error requesting IRQ!\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto err_dispose2;
 	}
 
 	if (mdma->is_mpc8308) {
-		retval = devm_request_irq(dev, mdma->irq2, &mpc_dma_irq, 0,
-				DRV_NAME, mdma);
+		retval = request_irq(mdma->irq2, &mpc_dma_irq, 0,
+							DRV_NAME, mdma);
 		if (retval) {
 			dev_err(dev, "Error requesting IRQ2!\n");
-			return -EINVAL;
+			retval = -EINVAL;
+			goto err_free1;
 		}
 	}
 
@@ -710,18 +955,21 @@
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (!mdma->is_mpc8308)
-		dma->chancnt = MPC_DMA_CHANNELS;
+	if (mdma->is_mpc8308)
+		dma->chancnt = MPC8308_DMACHAN_MAX;
 	else
-		dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */
+		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
 	dma->device_tx_status = mpc_dma_tx_status;
 	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+	dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
+	dma->device_control = mpc_dma_device_control;
 
 	INIT_LIST_HEAD(&dma->channels);
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
 	for (i = 0; i < dma->chancnt; i++) {
 		mchan = &mdma->channels[i];
@@ -747,7 +995,19 @@
 	 * - Round-robin group arbitration,
 	 * - Round-robin channel arbitration.
 	 */
-	if (!mdma->is_mpc8308) {
+	if (mdma->is_mpc8308) {
+		/* MPC8308 has 16 channels and lacks some registers */
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+		/* enable snooping */
+		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmaintl, 0xFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+	} else {
 		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
 					MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
 
@@ -768,29 +1028,28 @@
 		/* Route interrupts to IPIC */
 		out_be32(&mdma->regs->dmaihsa, 0);
 		out_be32(&mdma->regs->dmailsa, 0);
-	} else {
-		/* MPC8308 has 16 channels and lacks some registers */
-		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
-
-		/* enable snooping */
-		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
-		/* Disable error interrupts */
-		out_be32(&mdma->regs->dmaeeil, 0);
-
-		/* Clear interrupts status */
-		out_be32(&mdma->regs->dmaintl, 0xFFFF);
-		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
 	}
 
 	/* Register DMA engine */
 	dev_set_drvdata(dev, mdma);
 	retval = dma_async_device_register(dma);
-	if (retval) {
-		devm_free_irq(dev, mdma->irq, mdma);
-		irq_dispose_mapping(mdma->irq);
-	}
+	if (retval)
+		goto err_free2;
 
 	return retval;
+
+err_free2:
+	if (mdma->is_mpc8308)
+		free_irq(mdma->irq2, mdma);
+err_free1:
+	free_irq(mdma->irq, mdma);
+err_dispose2:
+	if (mdma->is_mpc8308)
+		irq_dispose_mapping(mdma->irq2);
+err_dispose1:
+	irq_dispose_mapping(mdma->irq);
+err:
+	return retval;
 }
 
 static int mpc_dma_remove(struct platform_device *op)
@@ -799,7 +1058,11 @@
 	struct mpc_dma *mdma = dev_get_drvdata(dev);
 
 	dma_async_device_unregister(&mdma->dma);
-	devm_free_irq(dev, mdma->irq, mdma);
+	if (mdma->is_mpc8308) {
+		free_irq(mdma->irq2, mdma);
+		irq_dispose_mapping(mdma->irq2);
+	}
+	free_irq(mdma->irq, mdma);
 	irq_dispose_mapping(mdma->irq);
 
 	return 0;
@@ -807,6 +1070,7 @@
 
 static struct of_device_id mpc_dma_match[] = {
 	{ .compatible = "fsl,mpc5121-dma", },
+	{ .compatible = "fsl,mpc8308-dma", },
 	{},
 };
 

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 05fa548..9f9ca9f 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c

@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/pch_dma.h>
@@ -996,7 +997,7 @@
 #define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
 #define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
 
-DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
+const struct pci_device_id pch_dma_id_table[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */

diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index b209a0f..012520c 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c

@@ -164,6 +164,7 @@
  * @disrcc: value for source control register
  * @didstc: value for destination control register
  * @dcon: base value for dcon register
+ * @cyclic: indicate cyclic transfer
  */
 struct s3c24xx_txd {
 	struct virt_dma_desc vd;
@@ -173,6 +174,7 @@
 	u32 disrcc;
 	u32 didstc;
 	u32 dcon;
+	bool cyclic;
 };
 
 struct s3c24xx_dma_chan;
@@ -669,8 +671,10 @@
 		/* when more sg's are in this txd, start the next one */
 		if (!list_is_last(txd->at, &txd->dsg_list)) {
 			txd->at = txd->at->next;
+			if (txd->cyclic)
+				vchan_cyclic_callback(&txd->vd);
 			s3c24xx_dma_start_next_sg(s3cchan, txd);
-		} else {
+		} else if (!txd->cyclic) {
 			s3cchan->at = NULL;
 			vchan_cookie_complete(&txd->vd);
 
@@ -682,6 +686,12 @@
 				s3c24xx_dma_start_next_txd(s3cchan);
 			else
 				s3c24xx_dma_phy_free(s3cchan);
+		} else {
+			vchan_cyclic_callback(&txd->vd);
+
+			/* Cyclic: reset at beginning */
+			txd->at = txd->dsg_list.next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
 		}
 	}
 	spin_unlock(&s3cchan->vc.lock);
@@ -877,6 +887,104 @@
 	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
 }
 
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	unsigned sg_len;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int i;
+
+	dev_dbg(&s3cdma->pdev->dev,
+		"prepare cyclic transaction of %zu bytes with period %zu from %s\n",
+		size, period, s3cchan->name);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = 1;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	}
+
+	sg_len = size / period;
+
+	for (i = 0; i < sg_len; i++) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = period;
+		/* Check last period length */
+		if (i == sg_len - 1)
+			dsg->len = size - period * i;
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = addr + period * i;
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = addr + period * i;
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
 static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -961,7 +1069,6 @@
 			dsg->src_addr = slave_addr;
 			dsg->dst_addr = sg_dma_address(sg);
 		}
-		break;
 	}
 
 	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
@@ -1198,6 +1305,7 @@
 
 	/* Initialize slave engine for SoC internal dedicated peripherals */
 	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
 	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
 	s3cdma->slave.dev = &pdev->dev;
 	s3cdma->slave.device_alloc_chan_resources =
@@ -1207,6 +1315,7 @@
 	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
 	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
 	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
 	s3cdma->slave.device_control = s3c24xx_dma_control;
 
 	/* Register as many memcpy channels as there are physical channels */

diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index b4c8138..0f71981 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig

@@ -4,7 +4,7 @@
 
 config SH_DMAE_BASE
 	bool "Renesas SuperH DMA Engine support"
-	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
+	depends on (SUPERH && SH_DMA) || ARCH_SHMOBILE || COMPILE_TEST
 	depends on !SH_DMA_API
 	default y
 	select DMA_ENGINE

diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
index 3083d90..b212d94 100644
--- a/drivers/dma/sh/rcar-hpbdma.c
+++ b/drivers/dma/sh/rcar-hpbdma.c

@@ -18,6 +18,7 @@
 
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>

diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 5239677..b35007e 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c

@@ -73,8 +73,7 @@
 static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct shdma_desc *chunk, *c, *desc =
-		container_of(tx, struct shdma_desc, async_tx),
-		*last = desc;
+		container_of(tx, struct shdma_desc, async_tx);
 	struct shdma_chan *schan = to_shdma_chan(tx->chan);
 	dma_async_tx_callback callback = tx->callback;
 	dma_cookie_t cookie;
@@ -98,19 +97,20 @@
 				      &chunk->node == &schan->ld_free))
 			break;
 		chunk->mark = DESC_SUBMITTED;
-		/* Callback goes to the last chunk */
-		chunk->async_tx.callback = NULL;
+		if (chunk->chunks == 1) {
+			chunk->async_tx.callback = callback;
+			chunk->async_tx.callback_param = tx->callback_param;
+		} else {
+			/* Callback goes to the last chunk */
+			chunk->async_tx.callback = NULL;
+		}
 		chunk->cookie = cookie;
 		list_move_tail(&chunk->node, &schan->ld_queue);
-		last = chunk;
 
 		dev_dbg(schan->dev, "submit #%d@%p on %d\n",
-			tx->cookie, &last->async_tx, schan->id);
+			tx->cookie, &chunk->async_tx, schan->id);
 	}
 
-	last->async_tx.callback = callback;
-	last->async_tx.callback_param = tx->callback_param;
-
 	if (power_up) {
 		int ret;
 		schan->pm_state = SHDMA_PM_BUSY;
@@ -304,6 +304,7 @@
 	dma_async_tx_callback callback = NULL;
 	void *param = NULL;
 	unsigned long flags;
+	LIST_HEAD(cyclic_list);
 
 	spin_lock_irqsave(&schan->chan_lock, flags);
 	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
@@ -369,10 +370,16 @@
 		if (((desc->mark == DESC_COMPLETED ||
 		      desc->mark == DESC_WAITING) &&
 		     async_tx_test_ack(&desc->async_tx)) || all) {
-			/* Remove from ld_queue list */
-			desc->mark = DESC_IDLE;
 
-			list_move(&desc->node, &schan->ld_free);
+			if (all || !desc->cyclic) {
+				/* Remove from ld_queue list */
+				desc->mark = DESC_IDLE;
+				list_move(&desc->node, &schan->ld_free);
+			} else {
+				/* reuse as cyclic */
+				desc->mark = DESC_SUBMITTED;
+				list_move_tail(&desc->node, &cyclic_list);
+			}
 
 			if (list_empty(&schan->ld_queue)) {
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
@@ -389,6 +396,8 @@
 		 */
 		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
 
+	list_splice_tail(&cyclic_list, &schan->ld_queue);
+
 	spin_unlock_irqrestore(&schan->chan_lock, flags);
 
 	if (callback)
@@ -521,7 +530,7 @@
  */
 static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
 {
 	struct scatterlist *sg;
 	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
@@ -569,7 +578,11 @@
 			if (!new)
 				goto err_get_desc;
 
-			new->chunks = chunks--;
+			new->cyclic = cyclic;
+			if (cyclic)
+				new->chunks = 1;
+			else
+				new->chunks = chunks--;
 			list_add_tail(&new->node, &tx_list);
 		} while (len);
 	}
@@ -612,7 +625,8 @@
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags);
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			     flags, false);
 }
 
 static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
@@ -640,7 +654,58 @@
 	slave_addr = ops->slave_addr(schan);
 
 	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
-			      direction, flags);
+			     direction, flags, false);
+}
+
+#define SHDMA_MAX_SG_LEN 32
+
+static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned int sg_len = buf_len / period_len;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+	struct scatterlist sgl[SHDMA_MAX_SG_LEN];
+	int i;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	if (sg_len > SHDMA_MAX_SG_LEN) {
+		dev_err(schan->dev, "sg length %d exceds limit %d",
+				sg_len, SHDMA_MAX_SG_LEN);
+		return NULL;
+	}
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || (buf_len < period_len)) {
+		dev_warn(schan->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	sg_init_table(sgl, sg_len);
+	for (i = 0; i < sg_len; i++) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, true);
 }
 
 static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -915,6 +980,7 @@
 
 	/* Compulsory for DMA_SLAVE fields */
 	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
 	dma_dev->device_control = shdma_control;
 
 	dma_dev->dev = dev;

diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index dda7e75..146d5df 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c

@@ -18,21 +18,22 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-#include <linux/sh_dma.h>
-#include <linux/notifier.h>
-#include <linux/kdebug.h>
-#include <linux/spinlock.h>
 #include <linux/rculist.h>
+#include <linux/sh_dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #include "../dmaengine.h"
 #include "shdma.h"

diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index 4e7df43..3ce1039 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c

@@ -14,12 +14,13 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
 #include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/sudmac.h>
 
 struct sudmac_chan {

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index bf18c78..c798445 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c

@@ -556,7 +556,6 @@
  * later
  * @reg_val_backup_chan: Backup data for standard channel parameter registers.
  * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
- * @initialized: true if the dma has been initialized
  * @gen_dmac: the struct for generic registers values to represent u8500/8540
  * DMA controller
  */
@@ -594,7 +593,6 @@
 	u32				  reg_val_backup_v4[BACKUP_REGS_SZ_MAX];
 	u32				 *reg_val_backup_chan;
 	u16				  gcc_pwr_off_mask;
-	bool				  initialized;
 	struct d40_gen_dmac		  gen_dmac;
 };
 
@@ -1056,62 +1054,6 @@
 	return len;
 }
 
-
-#ifdef CONFIG_PM
-static void dma40_backup(void __iomem *baseaddr, u32 *backup,
-			 u32 *regaddr, int num, bool save)
-{
-	int i;
-
-	for (i = 0; i < num; i++) {
-		void __iomem *addr = baseaddr + regaddr[i];
-
-		if (save)
-			backup[i] = readl_relaxed(addr);
-		else
-			writel_relaxed(backup[i], addr);
-	}
-}
-
-static void d40_save_restore_registers(struct d40_base *base, bool save)
-{
-	int i;
-
-	/* Save/Restore channel specific registers */
-	for (i = 0; i < base->num_phy_chans; i++) {
-		void __iomem *addr;
-		int idx;
-
-		if (base->phy_res[i].reserved)
-			continue;
-
-		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
-		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
-
-		dma40_backup(addr, &base->reg_val_backup_chan[idx],
-			     d40_backup_regs_chan,
-			     ARRAY_SIZE(d40_backup_regs_chan),
-			     save);
-	}
-
-	/* Save/Restore global registers */
-	dma40_backup(base->virtbase, base->reg_val_backup,
-		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
-		     save);
-
-	/* Save/Restore registers only existing on dma40 v3 and later */
-	if (base->gen_dmac.backup)
-		dma40_backup(base->virtbase, base->reg_val_backup_v4,
-			     base->gen_dmac.backup,
-			base->gen_dmac.backup_size,
-			save);
-}
-#else
-static void d40_save_restore_registers(struct d40_base *base, bool save)
-{
-}
-#endif
-
 static int __d40_execute_command_phy(struct d40_chan *d40c,
 				     enum d40_command command)
 {
@@ -1495,8 +1437,8 @@
 	if (!d40c->busy)
 		return 0;
 
-	pm_runtime_get_sync(d40c->base->dev);
 	spin_lock_irqsave(&d40c->lock, flags);
+	pm_runtime_get_sync(d40c->base->dev);
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 
@@ -2998,16 +2940,86 @@
 }
 
 /* Suspend resume functionality */
-#ifdef CONFIG_PM
-static int dma40_pm_suspend(struct device *dev)
+#ifdef CONFIG_PM_SLEEP
+static int dma40_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (base->lcpa_regulator)
+		ret = regulator_disable(base->lcpa_regulator);
+	return ret;
+}
+
+static int dma40_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct d40_base *base = platform_get_drvdata(pdev);
 	int ret = 0;
 
-	if (base->lcpa_regulator)
-		ret = regulator_disable(base->lcpa_regulator);
-	return ret;
+	if (base->lcpa_regulator) {
+		ret = regulator_enable(base->lcpa_regulator);
+		if (ret)
+			return ret;
+	}
+
+	return pm_runtime_force_resume(dev);
+}
+#endif
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+			 u32 *regaddr, int num, bool save)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		void __iomem *addr = baseaddr + regaddr[i];
+
+		if (save)
+			backup[i] = readl_relaxed(addr);
+		else
+			writel_relaxed(backup[i], addr);
+	}
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+	int i;
+
+	/* Save/Restore channel specific registers */
+	for (i = 0; i < base->num_phy_chans; i++) {
+		void __iomem *addr;
+		int idx;
+
+		if (base->phy_res[i].reserved)
+			continue;
+
+		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+		dma40_backup(addr, &base->reg_val_backup_chan[idx],
+			     d40_backup_regs_chan,
+			     ARRAY_SIZE(d40_backup_regs_chan),
+			     save);
+	}
+
+	/* Save/Restore global registers */
+	dma40_backup(base->virtbase, base->reg_val_backup,
+		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+		     save);
+
+	/* Save/Restore registers only existing on dma40 v3 and later */
+	if (base->gen_dmac.backup)
+		dma40_backup(base->virtbase, base->reg_val_backup_v4,
+			     base->gen_dmac.backup,
+			base->gen_dmac.backup_size,
+			save);
 }
 
 static int dma40_runtime_suspend(struct device *dev)
@@ -3030,36 +3042,20 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct d40_base *base = platform_get_drvdata(pdev);
 
-	if (base->initialized)
-		d40_save_restore_registers(base, false);
+	d40_save_restore_registers(base, false);
 
 	writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
 		       base->virtbase + D40_DREG_GCC);
 	return 0;
 }
-
-static int dma40_resume(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct d40_base *base = platform_get_drvdata(pdev);
-	int ret = 0;
-
-	if (base->lcpa_regulator)
-		ret = regulator_enable(base->lcpa_regulator);
-
-	return ret;
-}
+#endif
 
 static const struct dev_pm_ops dma40_pm_ops = {
-	.suspend		= dma40_pm_suspend,
-	.runtime_suspend	= dma40_runtime_suspend,
-	.runtime_resume		= dma40_runtime_resume,
-	.resume			= dma40_resume,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume)
+	SET_PM_RUNTIME_PM_OPS(dma40_runtime_suspend,
+				dma40_runtime_resume,
+				NULL)
 };
-#define DMA40_PM_OPS	(&dma40_pm_ops)
-#else
-#define DMA40_PM_OPS	NULL
-#endif
 
 /* Initialization functions. */
 
@@ -3645,12 +3641,6 @@
 		goto failure;
 	}
 
-	pm_runtime_irq_safe(base->dev);
-	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
-	pm_runtime_use_autosuspend(base->dev);
-	pm_runtime_enable(base->dev);
-	pm_runtime_resume(base->dev);
-
 	if (base->plat_data->use_esram_lcla) {
 
 		base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
@@ -3671,7 +3661,15 @@
 		}
 	}
 
-	base->initialized = true;
+	writel_relaxed(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+
+	pm_runtime_irq_safe(base->dev);
+	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(base->dev);
+	pm_runtime_mark_last_busy(base->dev);
+	pm_runtime_set_active(base->dev);
+	pm_runtime_enable(base->dev);
+
 	ret = d40_dmaengine_init(base, num_reserved_chans);
 	if (ret)
 		goto failure;
@@ -3754,7 +3752,7 @@
 	.driver = {
 		.owner = THIS_MODULE,
 		.name  = D40_NAME,
-		.pm = DMA40_PM_OPS,
+		.pm = &dma40_pm_ops,
 		.of_match_table = d40_match,
 	},
 };

diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
new file mode 100644
index 0000000..3c4e9f2
--- /dev/null
+++ b/drivers/dma/xilinx/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_XILINX_VDMA) += xilinx_vdma.o

diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
new file mode 100644
index 0000000..42a13e8
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_vdma.c

@@ -0,0 +1,1379 @@
+/*
+ * DMA driver for Xilinx Video DMA Engine
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * Based on the Freescale DMA driver.
+ *
+ * Description:
+ * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP
+ * core that provides high-bandwidth direct memory access between memory
+ * and AXI4-Stream type video target peripherals. The core provides efficient
+ * two dimensional DMA operations with independent asynchronous read (S2MM)
+ * and write (MM2S) channel operation. It can be configured to have either
+ * one channel or two channels. If configured as two channels, one is to
+ * transmit to the video device (MM2S) and another is to receive from the
+ * video device (S2MM). Initialization, status, interrupt and management
+ * registers are accessed through an AXI4-Lite slave interface.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/amba/xilinx_dma.h>
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include "../dmaengine.h"
+
+/* Register/Descriptor Offsets */
+#define XILINX_VDMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_VDMA_S2MM_CTRL_OFFSET		0x0030
+#define XILINX_VDMA_MM2S_DESC_OFFSET		0x0050
+#define XILINX_VDMA_S2MM_DESC_OFFSET		0x00a0
+
+/* Control Registers */
+#define XILINX_VDMA_REG_DMACR			0x0000
+#define XILINX_VDMA_DMACR_DELAY_MAX		0xff
+#define XILINX_VDMA_DMACR_DELAY_SHIFT		24
+#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX	0xff
+#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT	16
+#define XILINX_VDMA_DMACR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMACR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMACR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMACR_MASTER_SHIFT		8
+#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT	5
+#define XILINX_VDMA_DMACR_FRAMECNT_EN		BIT(4)
+#define XILINX_VDMA_DMACR_GENLOCK_EN		BIT(3)
+#define XILINX_VDMA_DMACR_RESET			BIT(2)
+#define XILINX_VDMA_DMACR_CIRC_EN		BIT(1)
+#define XILINX_VDMA_DMACR_RUNSTOP		BIT(0)
+#define XILINX_VDMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
+
+#define XILINX_VDMA_REG_DMASR			0x0004
+#define XILINX_VDMA_DMASR_EOL_LATE_ERR		BIT(15)
+#define XILINX_VDMA_DMASR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMASR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMASR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMASR_SOF_LATE_ERR		BIT(11)
+#define XILINX_VDMA_DMASR_SG_DEC_ERR		BIT(10)
+#define XILINX_VDMA_DMASR_SG_SLV_ERR		BIT(9)
+#define XILINX_VDMA_DMASR_EOF_EARLY_ERR		BIT(8)
+#define XILINX_VDMA_DMASR_SOF_EARLY_ERR		BIT(7)
+#define XILINX_VDMA_DMASR_DMA_DEC_ERR		BIT(6)
+#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR		BIT(5)
+#define XILINX_VDMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_VDMA_DMASR_IDLE			BIT(1)
+#define XILINX_VDMA_DMASR_HALTED		BIT(0)
+#define XILINX_VDMA_DMASR_DELAY_MASK		GENMASK(31, 24)
+#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
+
+#define XILINX_VDMA_REG_CURDESC			0x0008
+#define XILINX_VDMA_REG_TAILDESC		0x0010
+#define XILINX_VDMA_REG_REG_INDEX		0x0014
+#define XILINX_VDMA_REG_FRMSTORE		0x0018
+#define XILINX_VDMA_REG_THRESHOLD		0x001c
+#define XILINX_VDMA_REG_FRMPTR_STS		0x0024
+#define XILINX_VDMA_REG_PARK_PTR		0x0028
+#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT	8
+#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT	0
+#define XILINX_VDMA_REG_VDMA_VERSION		0x002c
+
+/* Register Direct Mode Registers */
+#define XILINX_VDMA_REG_VSIZE			0x0000
+#define XILINX_VDMA_REG_HSIZE			0x0004
+
+#define XILINX_VDMA_REG_FRMDLY_STRIDE		0x0008
+#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
+#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
+
+#define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
+
+/* HW specific definitions */
+#define XILINX_VDMA_MAX_CHANS_PER_DEVICE	0x2
+
+#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK	\
+		(XILINX_VDMA_DMASR_FRM_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_DLY_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_ERR_IRQ)
+
+#define XILINX_VDMA_DMASR_ALL_ERR_MASK	\
+		(XILINX_VDMA_DMASR_EOL_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SG_DEC_ERR | \
+		 XILINX_VDMA_DMASR_SG_SLV_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_DEC_ERR | \
+		 XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/*
+ * Recoverable errors are DMA Internal error, SOF Early, EOF Early
+ * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
+ * is enabled in the h/w system.
+ */
+#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK	\
+		(XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/* Axi VDMA Flush on Fsync bits */
+#define XILINX_VDMA_FLUSH_S2MM		3
+#define XILINX_VDMA_FLUSH_MM2S		2
+#define XILINX_VDMA_FLUSH_BOTH		1
+
+/* Delay loop counter to prevent hardware failure */
+#define XILINX_VDMA_LOOP_COUNT		1000000
+
+/**
+ * struct xilinx_vdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @pad2: Reserved @0x0C
+ * @vsize: Vertical Size @0x10
+ * @hsize: Horizontal Size @0x14
+ * @stride: Number of bytes between the first
+ *	    pixels of each horizontal line @0x18
+ */
+struct xilinx_vdma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 buf_addr;
+	u32 pad2;
+	u32 vsize;
+	u32 hsize;
+	u32 stride;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_vdma_tx_segment {
+	struct xilinx_vdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_descriptor - Per Transaction structure
+ * @async_tx: Async transaction descriptor
+ * @segments: TX segments list
+ * @node: Node in the channel descriptors list
+ */
+struct xilinx_vdma_tx_descriptor {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head segments;
+	struct list_head node;
+};
+
+/**
+ * struct xilinx_vdma_chan - Driver specific VDMA channel structure
+ * @xdev: Driver specific device structure
+ * @ctrl_offset: Control registers offset
+ * @desc_offset: TX descriptor registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @active_desc: Active descriptor
+ * @allocated_desc: Allocated descriptor
+ * @done_list: Complete descriptors
+ * @common: DMA common channel
+ * @desc_pool: Descriptors pool
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @id: Channel ID
+ * @direction: Transfer direction
+ * @num_frms: Number of frames
+ * @has_sg: Support scatter transfers
+ * @genlock: Support genlock mode
+ * @err: Channel has errors
+ * @tasklet: Cleanup work after irq
+ * @config: Device configuration info
+ * @flush_on_fsync: Flush on Frame sync
+ */
+struct xilinx_vdma_chan {
+	struct xilinx_vdma_device *xdev;
+	u32 ctrl_offset;
+	u32 desc_offset;
+	spinlock_t lock;
+	struct list_head pending_list;
+	struct xilinx_vdma_tx_descriptor *active_desc;
+	struct xilinx_vdma_tx_descriptor *allocated_desc;
+	struct list_head done_list;
+	struct dma_chan common;
+	struct dma_pool *desc_pool;
+	struct device *dev;
+	int irq;
+	int id;
+	enum dma_transfer_direction direction;
+	int num_frms;
+	bool has_sg;
+	bool genlock;
+	bool err;
+	struct tasklet_struct tasklet;
+	struct xilinx_vdma_config config;
+	bool flush_on_fsync;
+};
+
+/**
+ * struct xilinx_vdma_device - VDMA device structure
+ * @regs: I/O mapped base address
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific VDMA channel
+ * @has_sg: Specifies whether Scatter-Gather is present or not
+ * @flush_on_fsync: Flush on frame sync
+ */
+struct xilinx_vdma_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE];
+	bool has_sg;
+	u32 flush_on_fsync;
+};
+
+/* Macros */
+#define to_xilinx_chan(chan) \
+	container_of(chan, struct xilinx_vdma_chan, common)
+#define to_vdma_tx_descriptor(tx) \
+	container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx)
+
+/* IO accessors */
+static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return ioread32(chan->xdev->regs + reg);
+}
+
+static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value)
+{
+	iowrite32(value, chan->xdev->regs + reg);
+}
+
+static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->desc_offset + reg, value);
+}
+
+static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return vdma_read(chan, chan->ctrl_offset + reg);
+}
+
+static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->ctrl_offset + reg, value);
+}
+
+static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 clr)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr);
+}
+
+static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 set)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set);
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors and segments alloc and free
+ */
+
+/**
+ * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_segment *
+xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	memset(segment, 0, sizeof(*segment));
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific VDMA channel
+ * @segment: VDMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan,
+					struct xilinx_vdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated descriptor on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_descriptor *
+xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	if (chan->allocated_desc)
+		return chan->allocated_desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->allocated_desc = desc;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	INIT_LIST_HEAD(&desc->segments);
+
+	return desc;
+}
+
+/**
+ * xilinx_vdma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific VDMA channel
+ * @desc: VDMA transaction descriptor
+ */
+static void
+xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan,
+			       struct xilinx_vdma_tx_descriptor *desc)
+{
+	struct xilinx_vdma_tx_segment *segment, *next;
+
+	if (!desc)
+		return;
+
+	list_for_each_entry_safe(segment, next, &desc->segments, node) {
+		list_del(&segment->node);
+		xilinx_vdma_free_tx_segment(chan, segment);
+	}
+
+	kfree(desc);
+}
+
+/* Required functions */
+
+/**
+ * xilinx_vdma_free_desc_list - Free descriptors list
+ * @chan: Driver specific VDMA channel
+ * @list: List to parse and delete the descriptor
+ */
+static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan,
+					struct list_head *list)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node) {
+		list_del(&desc->node);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+}
+
+/**
+ * xilinx_vdma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	xilinx_vdma_free_desc_list(chan, &chan->pending_list);
+	xilinx_vdma_free_desc_list(chan, &chan->done_list);
+
+	xilinx_vdma_free_tx_descriptor(chan, chan->active_desc);
+	chan->active_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	dev_dbg(chan->dev, "Free all channel resources.\n");
+
+	xilinx_vdma_free_descriptors(chan);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+/**
+ * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		/* Remove from the list of running transactions */
+		list_del(&desc->node);
+
+		/* Run the link descriptor callback function */
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock_irqrestore(&chan->lock, flags);
+			callback(callback_param);
+			spin_lock_irqsave(&chan->lock, flags);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		dma_run_dependencies(&desc->async_tx);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Xilinx VDMA channel structure
+ */
+static void xilinx_vdma_do_tasklet(unsigned long data)
+{
+	struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data;
+
+	xilinx_vdma_chan_desc_cleanup(chan);
+}
+
+/**
+ * xilinx_vdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 0;
+
+	/*
+	 * We need the descriptor to be aligned to 64bytes
+	 * for meeting Xilinx VDMA specification requirement.
+	 */
+	chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+				chan->dev,
+				sizeof(struct xilinx_vdma_tx_segment),
+				__alignof__(struct xilinx_vdma_tx_segment), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev,
+			"unable to allocate channel %d descriptor pool\n",
+			chan->id);
+		return -ENOMEM;
+	}
+
+	dma_cookie_init(dchan);
+	return 0;
+}
+
+/**
+ * xilinx_vdma_tx_status - Get VDMA transaction status
+ * @dchan: DMA channel
+ * @cookie: Transaction identifier
+ * @txstate: Transaction state
+ *
+ * Return: DMA transaction status
+ */
+static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+/**
+ * xilinx_vdma_is_running - Check if VDMA channel is running
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if running, '0' if not.
+ */
+static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan)
+{
+	return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		 XILINX_VDMA_DMASR_HALTED) &&
+		(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		 XILINX_VDMA_DMACR_RUNSTOP);
+}
+
+/**
+ * xilinx_vdma_is_idle - Check if VDMA channel is idle
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if idle, '0' if not.
+ */
+static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan)
+{
+	return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		XILINX_VDMA_DMASR_IDLE;
+}
+
+/**
+ * xilinx_vdma_halt - Halt VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to halt */
+	do {
+		if (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		    XILINX_VDMA_DMASR_HALTED)
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start - Start VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_start(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to start */
+	do {
+		if (!(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		      XILINX_VDMA_DMASR_HALTED))
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot start channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start_transfer - Starts VDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_config *config = &chan->config;
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+	u32 reg;
+	struct xilinx_vdma_tx_segment *head, *tail = NULL;
+
+	if (chan->err)
+		return;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* There's already an active descriptor, bail out. */
+	if (chan->active_desc)
+		goto out_unlock;
+
+	if (list_empty(&chan->pending_list))
+		goto out_unlock;
+
+	desc = list_first_entry(&chan->pending_list,
+				struct xilinx_vdma_tx_descriptor, node);
+
+	/* If it is SG mode and hardware is busy, cannot submit */
+	if (chan->has_sg && xilinx_vdma_is_running(chan) &&
+	    !xilinx_vdma_is_idle(chan)) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		goto out_unlock;
+	}
+
+	/*
+	 * If hardware is idle, then all descriptors on the running lists are
+	 * done, start new transfers
+	 */
+	if (chan->has_sg) {
+		head = list_first_entry(&desc->segments,
+					struct xilinx_vdma_tx_segment, node);
+		tail = list_entry(desc->segments.prev,
+				  struct xilinx_vdma_tx_segment, node);
+
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, head->phys);
+	}
+
+	/* Configure the hardware using info in the config structure */
+	reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	if (config->frm_cnt_en)
+		reg |= XILINX_VDMA_DMACR_FRAMECNT_EN;
+	else
+		reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN;
+
+	/*
+	 * With SG, start with circular mode, so that BDs can be fetched.
+	 * In direct register mode, if not parking, enable circular mode
+	 */
+	if (chan->has_sg || !config->park)
+		reg |= XILINX_VDMA_DMACR_CIRC_EN;
+
+	if (config->park)
+		reg &= ~XILINX_VDMA_DMACR_CIRC_EN;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg);
+
+	if (config->park && (config->park_frm >= 0) &&
+			(config->park_frm < chan->num_frms)) {
+		if (chan->direction == DMA_MEM_TO_DEV)
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_RD_REF_SHIFT);
+		else
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_WR_REF_SHIFT);
+	}
+
+	/* Start the hardware */
+	xilinx_vdma_start(chan);
+
+	if (chan->err)
+		goto out_unlock;
+
+	/* Start the transfer */
+	if (chan->has_sg) {
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, tail->phys);
+	} else {
+		struct xilinx_vdma_tx_segment *segment, *last = NULL;
+		int i = 0;
+
+		list_for_each_entry(segment, &desc->segments, node) {
+			vdma_desc_write(chan,
+					XILINX_VDMA_REG_START_ADDRESS(i++),
+					segment->hw.buf_addr);
+			last = segment;
+		}
+
+		if (!last)
+			goto out_unlock;
+
+		/* HW expects these parameters to be same for one transaction */
+		vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize);
+		vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE,
+				last->hw.stride);
+		vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize);
+	}
+
+	list_del(&desc->node);
+	chan->active_desc = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_issue_pending(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	xilinx_vdma_start_transfer(chan);
+}
+
+/**
+ * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete
+ * @chan : xilinx DMA channel
+ *
+ * CONTEXT: hardirq
+ */
+static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = chan->active_desc;
+	if (!desc) {
+		dev_dbg(chan->dev, "no running descriptors\n");
+		goto out_unlock;
+	}
+
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &chan->done_list);
+
+	chan->active_desc = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_reset - Reset VDMA channel
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+	u32 tmp;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET);
+
+	tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		XILINX_VDMA_DMACR_RESET;
+
+	/* Wait for the hardware to finish reset */
+	do {
+		tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+			XILINX_VDMA_DMACR_RESET;
+	} while (loop-- && tmp);
+
+	if (!loop) {
+		dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR),
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		return -ETIMEDOUT;
+	}
+
+	chan->err = false;
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan)
+{
+	int err;
+
+	/* Reset VDMA */
+	err = xilinx_vdma_reset(chan);
+	if (err)
+		return err;
+
+	/* Enable interrupts */
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_irq_handler - VDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx VDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_vdma_chan *chan = data;
+	u32 status;
+
+	/* Read the status and ack the interrupts. */
+	status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR);
+	if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK))
+		return IRQ_NONE;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+			status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (status & XILINX_VDMA_DMASR_ERR_IRQ) {
+		/*
+		 * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
+		 * error is recoverable, ignore it. Otherwise flag the error.
+		 *
+		 * Only recoverable errors can be cleared in the DMASR register,
+		 * make sure not to write to other error bits to 1.
+		 */
+		u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK;
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+				errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK);
+
+		if (!chan->flush_on_fsync ||
+		    (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) {
+			dev_err(chan->dev,
+				"Channel %p has errors %x, cdr %x tdr %x\n",
+				chan, errors,
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC),
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC));
+			chan->err = true;
+		}
+	}
+
+	if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) {
+		xilinx_vdma_complete_descriptor(chan);
+		xilinx_vdma_start_transfer(chan);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * xilinx_vdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor
+ *
+ * Return: cookie value on success and failure value on error
+ */
+static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx);
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int err;
+
+	if (chan->err) {
+		/*
+		 * If reset fails, need to hard reset the system.
+		 * Channel is no longer functional
+		 */
+		err = xilinx_vdma_chan_reset(chan);
+		if (err < 0)
+			return err;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Append the transaction to the pending transactions queue. */
+	list_add_tail(&desc->node, &chan->pending_list);
+
+	/* Free the allocated desc */
+	chan->allocated_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_vdma_tx_descriptor *desc;
+	struct xilinx_vdma_tx_segment *segment, *prev = NULL;
+	struct xilinx_vdma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_vdma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_vdma_tx_submit;
+	async_tx_ack(&desc->async_tx);
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_vdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	/* Fill in the hardware descriptor */
+	hw = &segment->hw;
+	hw->vsize = xt->numf;
+	hw->hsize = xt->sgl[0].size;
+	hw->stride = xt->sgl[0].icg <<
+			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+	hw->stride |= chan->config.frm_dly <<
+			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+
+	if (xt->dir != DMA_MEM_TO_DEV)
+		hw->buf_addr = xt->dst_start;
+	else
+		hw->buf_addr = xt->src_start;
+
+	/* Link the previous next descriptor to current */
+	prev = list_last_entry(&desc->segments,
+				struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	prev = segment;
+
+	/* Link the last hardware descriptor with the first. */
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_vdma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_vdma_terminate_all - Halt the channel and free descriptors
+ * @chan: Driver specific VDMA Channel pointer
+ */
+static void xilinx_vdma_terminate_all(struct xilinx_vdma_chan *chan)
+{
+	/* Halt the DMA engine */
+	xilinx_vdma_halt(chan);
+
+	/* Remove and free all of the descriptors in the lists */
+	xilinx_vdma_free_descriptors(chan);
+}
+
+/**
+ * xilinx_vdma_channel_set_config - Configure VDMA channel
+ * Run-time configuration for Axi VDMA, supports:
+ * . halt the channel
+ * . configure interrupt coalescing and inter-packet delay threshold
+ * . start/stop parking
+ * . enable genlock
+ *
+ * @dchan: DMA channel
+ * @cfg: VDMA device configuration pointer
+ *
+ * Return: '0' on success and failure value on error
+ */
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	u32 dmacr;
+
+	if (cfg->reset)
+		return xilinx_vdma_chan_reset(chan);
+
+	dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	chan->config.frm_dly = cfg->frm_dly;
+	chan->config.park = cfg->park;
+
+	/* genlock settings */
+	chan->config.gen_lock = cfg->gen_lock;
+	chan->config.master = cfg->master;
+
+	if (cfg->gen_lock && chan->genlock) {
+		dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN;
+		dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT;
+	}
+
+	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	if (cfg->park)
+		chan->config.park_frm = cfg->park_frm;
+	else
+		chan->config.park_frm = -1;
+
+	chan->config.coalesc = cfg->coalesc;
+	chan->config.delay = cfg->delay;
+
+	if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) {
+		dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT;
+		chan->config.coalesc = cfg->coalesc;
+	}
+
+	if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) {
+		dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT;
+		chan->config.delay = cfg->delay;
+	}
+
+	/* FSync Source selection */
+	dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK;
+	dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr);
+
+	return 0;
+}
+EXPORT_SYMBOL(xilinx_vdma_channel_set_config);
+
+/**
+ * xilinx_vdma_device_control - Configure DMA channel of the device
+ * @dchan: DMA Channel pointer
+ * @cmd: DMA control command
+ * @arg: Channel configuration
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_device_control(struct dma_chan *dchan,
+				      enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	xilinx_vdma_terminate_all(chan);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+/**
+ * xilinx_vdma_chan_remove - Per Channel remove function
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan)
+{
+	/* Disable all interrupts */
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (chan->irq > 0)
+		free_irq(chan->irq, chan);
+
+	tasklet_kill(&chan->tasklet);
+
+	list_del(&chan->common.device_node);
+}
+
+/**
+ * xilinx_vdma_chan_probe - Per Channel Probing
+ * It get channel features from the device tree entry and
+ * initialize special channel handling routines
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev,
+				  struct device_node *node)
+{
+	struct xilinx_vdma_chan *chan;
+	bool has_dre = false;
+	u32 value, width;
+	int err;
+
+	/* Allocate and initialize the channel structure */
+	chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	chan->dev = xdev->dev;
+	chan->xdev = xdev;
+	chan->has_sg = xdev->has_sg;
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->pending_list);
+	INIT_LIST_HEAD(&chan->done_list);
+
+	/* Retrieve the channel properties from the device tree */
+	has_dre = of_property_read_bool(node, "xlnx,include-dre");
+
+	chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
+
+	err = of_property_read_u32(node, "xlnx,datawidth", &value);
+	if (err) {
+		dev_err(xdev->dev, "missing xlnx,datawidth property\n");
+		return err;
+	}
+	width = value >> 3; /* Convert bits to bytes */
+
+	/* If data width is greater than 8 bytes, DRE is not in hw */
+	if (width > 8)
+		has_dre = false;
+
+	if (!has_dre)
+		xdev->common.copy_align = fls(width - 1);
+
+	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel")) {
+		chan->direction = DMA_MEM_TO_DEV;
+		chan->id = 0;
+
+		chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S)
+			chan->flush_on_fsync = true;
+	} else if (of_device_is_compatible(node,
+					    "xlnx,axi-vdma-s2mm-channel")) {
+		chan->direction = DMA_DEV_TO_MEM;
+		chan->id = 1;
+
+		chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM)
+			chan->flush_on_fsync = true;
+	} else {
+		dev_err(xdev->dev, "Invalid channel compatible node\n");
+		return -EINVAL;
+	}
+
+	/* Request the interrupt */
+	chan->irq = irq_of_parse_and_map(node, 0);
+	err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED,
+			  "xilinx-vdma-controller", chan);
+	if (err) {
+		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
+		return err;
+	}
+
+	/* Initialize the tasklet */
+	tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet,
+			(unsigned long)chan);
+
+	/*
+	 * Initialize the DMA channel and add it to the DMA engine channels
+	 * list.
+	 */
+	chan->common.device = &xdev->common;
+
+	list_add_tail(&chan->common.device_node, &xdev->common.channels);
+	xdev->chan[chan->id] = chan;
+
+	/* Reset the channel */
+	err = xilinx_vdma_chan_reset(chan);
+	if (err < 0) {
+		dev_err(xdev->dev, "Reset channel failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * of_dma_xilinx_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct xilinx_vdma_device *xdev = ofdma->of_dma_data;
+	int chan_id = dma_spec->args[0];
+
+	if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE)
+		return NULL;
+
+	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
+}
+
+/**
+ * xilinx_vdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct xilinx_vdma_device *xdev;
+	struct device_node *child;
+	struct resource *io;
+	u32 num_frames;
+	int i, err;
+
+	/* Allocate and initialize the DMA engine structure */
+	xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+	if (!xdev)
+		return -ENOMEM;
+
+	xdev->dev = &pdev->dev;
+
+	/* Request and map I/O memory */
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	if (IS_ERR(xdev->regs))
+		return PTR_ERR(xdev->regs);
+
+	/* Retrieve the DMA engine properties from the device tree */
+	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
+
+	err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames);
+	if (err < 0) {
+		dev_err(xdev->dev, "missing xlnx,num-fstores property\n");
+		return err;
+	}
+
+	err = of_property_read_u32(node, "xlnx,flush-fsync",
+					&xdev->flush_on_fsync);
+	if (err < 0)
+		dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n");
+
+	/* Initialize the DMA engine */
+	xdev->common.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&xdev->common.channels);
+	dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+	dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+
+	xdev->common.device_alloc_chan_resources =
+				xilinx_vdma_alloc_chan_resources;
+	xdev->common.device_free_chan_resources =
+				xilinx_vdma_free_chan_resources;
+	xdev->common.device_prep_interleaved_dma =
+				xilinx_vdma_dma_prep_interleaved;
+	xdev->common.device_control = xilinx_vdma_device_control;
+	xdev->common.device_tx_status = xilinx_vdma_tx_status;
+	xdev->common.device_issue_pending = xilinx_vdma_issue_pending;
+
+	platform_set_drvdata(pdev, xdev);
+
+	/* Initialize the channels */
+	for_each_child_of_node(node, child) {
+		err = xilinx_vdma_chan_probe(xdev, child);
+		if (err < 0)
+			goto error;
+	}
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xdev->chan[i]->num_frms = num_frames;
+
+	/* Register the DMA engine with the core */
+	dma_async_device_register(&xdev->common);
+
+	err = of_dma_controller_register(node, of_dma_xilinx_xlate,
+					 xdev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+		dma_async_device_unregister(&xdev->common);
+		goto error;
+	}
+
+	dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
+
+	return 0;
+
+error:
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return err;
+}
+
+/**
+ * xilinx_vdma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int xilinx_vdma_remove(struct platform_device *pdev)
+{
+	struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev);
+	int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+
+	dma_async_device_unregister(&xdev->common);
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return 0;
+}
+
+static const struct of_device_id xilinx_vdma_of_ids[] = {
+	{ .compatible = "xlnx,axi-vdma-1.00.a",},
+	{}
+};
+
+static struct platform_driver xilinx_vdma_driver = {
+	.driver = {
+		.name = "xilinx-vdma",
+		.owner = THIS_MODULE,
+		.of_match_table = xilinx_vdma_of_ids,
+	},
+	.probe = xilinx_vdma_probe,
+	.remove = xilinx_vdma_remove,
+};
+
+module_platform_driver(xilinx_vdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx VDMA driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d9c9cb4..2ebc907 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c

@@ -2614,7 +2614,7 @@
 
 		desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
 						&of_flags);
-		if (!IS_ERR(desc))
+		if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
 			break;
 	}
 

diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index d8a22c2..70da9eb 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile

@@ -1,2 +1,3 @@
 obj-y			+= drm/ vga/
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
+obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index d1cc2f6..f512004 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig

@@ -83,6 +83,8 @@
 
 source "drivers/gpu/drm/i2c/Kconfig"
 
+source "drivers/gpu/drm/bridge/Kconfig"
+
 config DRM_TDFX
 	tristate "3dfx Banshee/Voodoo3+"
 	depends on DRM && PCI
@@ -199,5 +201,3 @@
 source "drivers/gpu/drm/tegra/Kconfig"
 
 source "drivers/gpu/drm/panel/Kconfig"
-
-source "drivers/gpu/drm/bridge/Kconfig"

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 48e38ba..dd2ba42 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile

@@ -14,7 +14,7 @@
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
 		drm_trace_points.o drm_global.o drm_prime.o \
 		drm_rect.o drm_vma_manager.o drm_flip_work.o \
-		drm_plane_helper.o
+		drm_modeset_lock.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
@@ -23,7 +23,8 @@
 
 drm-usb-y   := drm_usb.o
 
-drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o
+drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
+		drm_plane_helper.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_KMS_FB_HELPER) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o

diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 32982da..8ab3cd1 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c

@@ -173,7 +173,7 @@
 	if (ret)
 		goto err_kms;
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	if (ret)
 		goto err_kms;
 
@@ -402,7 +402,7 @@
 
 static int __init armada_drm_init(void)
 {
-	armada_drm_driver.num_ioctls = DRM_ARRAY_SIZE(armada_ioctls);
+	armada_drm_driver.num_ioctls = ARRAY_SIZE(armada_ioctls);
 	return platform_driver_register(&armada_drm_platform_driver);
 }
 module_init(armada_drm_init);

diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index 948cb14..fd166f5 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c

@@ -181,10 +181,8 @@
 {
 	struct armada_private *priv = dev->dev_private;
 
-	drm_modeset_lock_all(dev);
 	if (priv->fbdev)
-		drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-	drm_modeset_unlock_all(dev);
+		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 }
 
 void armada_fbdev_fini(struct drm_device *dev)

diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 887816f..bb9b642 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c

@@ -433,7 +433,6 @@
 
 	if (dobj->obj.filp) {
 		struct address_space *mapping;
-		gfp_t gfp;
 		int count;
 
 		count = dobj->obj.size / PAGE_SIZE;
@@ -441,12 +440,11 @@
 			goto free_sgt;
 
 		mapping = file_inode(dobj->obj.filp)->i_mapping;
-		gfp = mapping_gfp_mask(mapping);
 
 		for_each_sg(sgt->sgl, sg, count, i) {
 			struct page *page;
 
-			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			page = shmem_read_mapping_page(mapping, i);
 			if (IS_ERR(page)) {
 				num = i;
 				goto release;

diff --git a/drivers/gpu/drm/ast/Makefile b/drivers/gpu/drm/ast/Makefile
index 8df4f28..171aa06 100644
--- a/drivers/gpu/drm/ast/Makefile
+++ b/drivers/gpu/drm/ast/Makefile

@@ -4,6 +4,6 @@
 
 ccflags-y := -Iinclude/drm
 
-ast-y := ast_drv.o ast_main.o ast_mode.o ast_fb.o ast_ttm.o ast_post.o
+ast-y := ast_drv.o ast_main.o ast_mode.o ast_fb.o ast_ttm.o ast_post.o ast_dp501.o
 
-obj-$(CONFIG_DRM_AST) := ast.o
\ No newline at end of file
+obj-$(CONFIG_DRM_AST) := ast.o

diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c
new file mode 100644
index 0000000..5da4b62
--- /dev/null
+++ b/drivers/gpu/drm/ast/ast_dp501.c

@@ -0,0 +1,410 @@
+
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "ast_drv.h"
+MODULE_FIRMWARE("ast_dp501_fw.bin");
+
+int ast_load_dp501_microcode(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	static char *fw_name = "ast_dp501_fw.bin";
+	int err;
+	err = request_firmware(&ast->dp501_fw, fw_name, dev->dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void send_ack(struct ast_private *ast)
+{
+	u8 sendack;
+	sendack = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, 0xff);
+	sendack |= 0x80;
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, 0x00, sendack);
+}
+
+static void send_nack(struct ast_private *ast)
+{
+	u8 sendack;
+	sendack = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, 0xff);
+	sendack &= ~0x80;
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, 0x00, sendack);
+}
+
+static bool wait_ack(struct ast_private *ast)
+{
+	u8 waitack;
+	u32 retry = 0;
+	do {
+		waitack = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd2, 0xff);
+		waitack &= 0x80;
+		udelay(100);
+	} while ((!waitack) && (retry++ < 1000));
+
+	if (retry < 1000)
+		return true;
+	else
+		return false;
+}
+
+static bool wait_nack(struct ast_private *ast)
+{
+	u8 waitack;
+	u32 retry = 0;
+	do {
+		waitack = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd2, 0xff);
+		waitack &= 0x80;
+		udelay(100);
+	} while ((waitack) && (retry++ < 1000));
+
+	if (retry < 1000)
+		return true;
+	else
+		return false;
+}
+
+static void set_cmd_trigger(struct ast_private *ast)
+{
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, ~0x40, 0x40);
+}
+
+static void clear_cmd_trigger(struct ast_private *ast)
+{
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9b, ~0x40, 0x00);
+}
+
+#if 0
+static bool wait_fw_ready(struct ast_private *ast)
+{
+	u8 waitready;
+	u32 retry = 0;
+	do {
+		waitready = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd2, 0xff);
+		waitready &= 0x40;
+		udelay(100);
+	} while ((!waitready) && (retry++ < 1000));
+
+	if (retry < 1000)
+		return true;
+	else
+		return false;
+}
+#endif
+
+static bool ast_write_cmd(struct drm_device *dev, u8 data)
+{
+	struct ast_private *ast = dev->dev_private;
+	int retry = 0;
+	if (wait_nack(ast)) {
+		send_nack(ast);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9a, 0x00, data);
+		send_ack(ast);
+		set_cmd_trigger(ast);
+		do {
+			if (wait_ack(ast)) {
+				clear_cmd_trigger(ast);
+				send_nack(ast);
+				return true;
+			}
+		} while (retry++ < 100);
+	}
+	clear_cmd_trigger(ast);
+	send_nack(ast);
+	return false;
+}
+
+static bool ast_write_data(struct drm_device *dev, u8 data)
+{
+	struct ast_private *ast = dev->dev_private;
+
+	if (wait_nack(ast)) {
+		send_nack(ast);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9a, 0x00, data);
+		send_ack(ast);
+		if (wait_ack(ast)) {
+			send_nack(ast);
+			return true;
+		}
+	}
+	send_nack(ast);
+	return false;
+}
+
+#if 0
+static bool ast_read_data(struct drm_device *dev, u8 *data)
+{
+	struct ast_private *ast = dev->dev_private;
+	u8 tmp;
+
+	*data = 0;
+
+	if (wait_ack(ast) == false)
+		return false;
+	tmp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd3, 0xff);
+	*data = tmp;
+	if (wait_nack(ast) == false) {
+		send_nack(ast);
+		return false;
+	}
+	send_nack(ast);
+	return true;
+}
+
+static void clear_cmd(struct ast_private *ast)
+{
+	send_nack(ast);
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x9a, 0x00, 0x00);
+}
+#endif
+
+void ast_set_dp501_video_output(struct drm_device *dev, u8 mode)
+{
+	ast_write_cmd(dev, 0x40);
+	ast_write_data(dev, mode);
+
+	msleep(10);
+}
+
+static u32 get_fw_base(struct ast_private *ast)
+{
+	return ast_mindwm(ast, 0x1e6e2104) & 0x7fffffff;
+}
+
+bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 i, data;
+	u32 boot_address;
+
+	data = ast_mindwm(ast, 0x1e6e2100) & 0x01;
+	if (data) {
+		boot_address = get_fw_base(ast);
+		for (i = 0; i < size; i += 4)
+			*(u32 *)(addr + i) = ast_mindwm(ast, boot_address + i);
+		return true;
+	}
+	return false;
+}
+
+bool ast_launch_m68k(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 i, data, len = 0;
+	u32 boot_address;
+	u8 *fw_addr = NULL;
+	u8 jreg;
+
+	data = ast_mindwm(ast, 0x1e6e2100) & 0x01;
+	if (!data) {
+
+		if (ast->dp501_fw_addr) {
+			fw_addr = ast->dp501_fw_addr;
+			len = 32*1024;
+		} else if (ast->dp501_fw) {
+			fw_addr = (u8 *)ast->dp501_fw->data;
+			len = ast->dp501_fw->size;
+		}
+		/* Get BootAddress */
+		ast_moutdwm(ast, 0x1e6e2000, 0x1688a8a8);
+		data = ast_mindwm(ast, 0x1e6e0004);
+		switch (data & 0x03) {
+		case 0:
+			boot_address = 0x44000000;
+			break;
+		default:
+		case 1:
+			boot_address = 0x48000000;
+			break;
+		case 2:
+			boot_address = 0x50000000;
+			break;
+		case 3:
+			boot_address = 0x60000000;
+			break;
+		}
+		boot_address -= 0x200000; /* -2MB */
+
+		/* copy image to buffer */
+		for (i = 0; i < len; i += 4) {
+			data = *(u32 *)(fw_addr + i);
+			ast_moutdwm(ast, boot_address + i, data);
+		}
+
+		/* Init SCU */
+		ast_moutdwm(ast, 0x1e6e2000, 0x1688a8a8);
+
+		/* Launch FW */
+		ast_moutdwm(ast, 0x1e6e2104, 0x80000000 + boot_address);
+		ast_moutdwm(ast, 0x1e6e2100, 1);
+
+		/* Update Scratch */
+		data = ast_mindwm(ast, 0x1e6e2040) & 0xfffff1ff;		/* D[11:9] = 100b: UEFI handling */
+		data |= 0x800;
+		ast_moutdwm(ast, 0x1e6e2040, data);
+
+		jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x99, 0xfc); /* D[1:0]: Reserved Video Buffer */
+		jreg |= 0x02;
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x99, jreg);
+	}
+	return true;
+}
+
+u8 ast_get_dp501_max_clk(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 boot_address, offset, data;
+	u8 linkcap[4], linkrate, linklanes, maxclk = 0xff;
+
+	boot_address = get_fw_base(ast);
+
+	/* validate FW version */
+	offset = 0xf000;
+	data = ast_mindwm(ast, boot_address + offset);
+	if ((data & 0xf0) != 0x10) /* version: 1x */
+		return maxclk;
+
+	/* Read Link Capability */
+	offset  = 0xf014;
+	*(u32 *)linkcap = ast_mindwm(ast, boot_address + offset);
+	if (linkcap[2] == 0) {
+		linkrate = linkcap[0];
+		linklanes = linkcap[1];
+		data = (linkrate == 0x0a) ? (90 * linklanes) : (54 * linklanes);
+		if (data > 0xff)
+			data = 0xff;
+		maxclk = (u8)data;
+	}
+	return maxclk;
+}
+
+bool ast_dp501_read_edid(struct drm_device *dev, u8 *ediddata)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 i, boot_address, offset, data;
+
+	boot_address = get_fw_base(ast);
+
+	/* validate FW version */
+	offset = 0xf000;
+	data = ast_mindwm(ast, boot_address + offset);
+	if ((data & 0xf0) != 0x10)
+		return false;
+
+	/* validate PnP Monitor */
+	offset = 0xf010;
+	data = ast_mindwm(ast, boot_address + offset);
+	if (!(data & 0x01))
+		return false;
+
+	/* Read EDID */
+	offset = 0xf020;
+	for (i = 0; i < 128; i += 4) {
+		data = ast_mindwm(ast, boot_address + offset + i);
+		*(u32 *)(ediddata + i) = data;
+	}
+
+	return true;
+}
+
+static bool ast_init_dvo(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u8 jreg;
+	u32 data;
+	ast_write32(ast, 0xf004, 0x1e6e0000);
+	ast_write32(ast, 0xf000, 0x1);
+	ast_write32(ast, 0x12000, 0x1688a8a8);
+
+	jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	if (!(jreg & 0x80)) {
+		/* Init SCU DVO Settings */
+		data = ast_read32(ast, 0x12008);
+		/* delay phase */
+		data &= 0xfffff8ff;
+		data |= 0x00000500;
+		ast_write32(ast, 0x12008, data);
+
+		if (ast->chip == AST2300) {
+			data = ast_read32(ast, 0x12084);
+			/* multi-pins for DVO single-edge */
+			data |= 0xfffe0000;
+			ast_write32(ast, 0x12084, data);
+
+			data = ast_read32(ast, 0x12088);
+			/* multi-pins for DVO single-edge */
+			data |= 0x000fffff;
+			ast_write32(ast, 0x12088, data);
+
+			data = ast_read32(ast, 0x12090);
+			/* multi-pins for DVO single-edge */
+			data &= 0xffffffcf;
+			data |= 0x00000020;
+			ast_write32(ast, 0x12090, data);
+		} else { /* AST2400 */
+			data = ast_read32(ast, 0x12088);
+			/* multi-pins for DVO single-edge */
+			data |= 0x30000000;
+			ast_write32(ast, 0x12088, data);
+
+			data = ast_read32(ast, 0x1208c);
+			/* multi-pins for DVO single-edge */
+			data |= 0x000000cf;
+			ast_write32(ast, 0x1208c, data);
+
+			data = ast_read32(ast, 0x120a4);
+			/* multi-pins for DVO single-edge */
+			data |= 0xffff0000;
+			ast_write32(ast, 0x120a4, data);
+
+			data = ast_read32(ast, 0x120a8);
+			/* multi-pins for DVO single-edge */
+			data |= 0x0000000f;
+			ast_write32(ast, 0x120a8, data);
+
+			data = ast_read32(ast, 0x12094);
+			/* multi-pins for DVO single-edge */
+			data |= 0x00000002;
+			ast_write32(ast, 0x12094, data);
+		}
+	}
+
+	/* Force to DVO */
+	data = ast_read32(ast, 0x1202c);
+	data &= 0xfffbffff;
+	ast_write32(ast, 0x1202c, data);
+
+	/* Init VGA DVO Settings */
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80);
+	return true;
+}
+
+void ast_init_3rdtx(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u8 jreg;
+	u32 data;
+	if (ast->chip == AST2300 || ast->chip == AST2400) {
+		jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
+		switch (jreg & 0x0e) {
+		case 0x04:
+			ast_init_dvo(dev);
+			break;
+		case 0x08:
+			ast_launch_m68k(dev);
+			break;
+		case 0x0c:
+			ast_init_dvo(dev);
+			break;
+		default:
+			if (ast->tx_chip_type == AST_TX_SIL164)
+				ast_init_dvo(dev);
+			else {
+				ast_write32(ast, 0x12000, 0x1688a8a8);
+				data = ast_read32(ast, 0x1202c);
+				data &= 0xfffcffff;
+				ast_write32(ast, 0, data);
+			}
+		}
+	}
+}

diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 5137f15..44074fb 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c

@@ -94,9 +94,7 @@
 	ast_post_gpu(dev);
 
 	drm_mode_config_reset(dev);
-	drm_modeset_lock_all(dev);
 	drm_helper_resume_force_mode(dev);
-	drm_modeset_unlock_all(dev);
 
 	console_lock();
 	ast_fbdev_set_suspend(dev, 0);
@@ -198,7 +196,6 @@
 
 static struct drm_driver driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM,
-	.dev_priv_size = 0,
 
 	.load = ast_driver_load,
 	.unload = ast_driver_unload,

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 9833a1b..5d6a875 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h

@@ -61,9 +61,17 @@
 	AST2200,
 	AST2150,
 	AST2300,
+	AST2400,
 	AST1180,
 };
 
+enum ast_tx_chip {
+	AST_TX_NONE,
+	AST_TX_SIL164,
+	AST_TX_ITE66121,
+	AST_TX_DP501,
+};
+
 #define AST_DRAM_512Mx16 0
 #define AST_DRAM_1Gx16   1
 #define AST_DRAM_512Mx32 2
@@ -102,6 +110,12 @@
 	 * we have. */
 	struct ttm_bo_kmap_obj cache_kmap;
 	int next_cursor;
+	bool support_wide_screen;
+
+	enum ast_tx_chip tx_chip_type;
+	u8 dp501_maxclk;
+	u8 *dp501_fw_addr;
+	const struct firmware *dp501_fw;	/* dp501 fw */
 };
 
 int ast_driver_load(struct drm_device *dev, unsigned long flags);
@@ -368,4 +382,14 @@
 
 /* ast post */
 void ast_post_gpu(struct drm_device *dev);
+u32 ast_mindwm(struct ast_private *ast, u32 r);
+void ast_moutdwm(struct ast_private *ast, u32 r, u32 v);
+/* ast dp501 */
+int ast_load_dp501_microcode(struct drm_device *dev);
+void ast_set_dp501_video_output(struct drm_device *dev, u8 mode);
+bool ast_launch_m68k(struct drm_device *dev);
+bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size);
+bool ast_dp501_read_edid(struct drm_device *dev, u8 *ediddata);
+u8 ast_get_dp501_max_clk(struct drm_device *dev);
+void ast_init_3rdtx(struct drm_device *dev);
 #endif

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 50535fd..a2cc6be 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c

@@ -66,12 +66,16 @@
 static int ast_detect_chip(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
+	uint32_t data, jreg;
 
 	if (dev->pdev->device == PCI_CHIP_AST1180) {
 		ast->chip = AST1100;
 		DRM_INFO("AST 1180 detected\n");
 	} else {
-		if (dev->pdev->revision >= 0x20) {
+		if (dev->pdev->revision >= 0x30) {
+			ast->chip = AST2400;
+			DRM_INFO("AST 2400 detected\n");
+		} else if (dev->pdev->revision >= 0x20) {
 			ast->chip = AST2300;
 			DRM_INFO("AST 2300 detected\n");
 		} else if (dev->pdev->revision >= 0x10) {
@@ -104,6 +108,59 @@
 			DRM_INFO("AST 2000 detected\n");
 		}
 	}
+
+	switch (ast->chip) {
+	case AST1180:
+		ast->support_wide_screen = true;
+		break;
+	case AST2000:
+		ast->support_wide_screen = false;
+		break;
+	default:
+		jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+		if (!(jreg & 0x80))
+			ast->support_wide_screen = true;
+		else if (jreg & 0x01)
+			ast->support_wide_screen = true;
+		else {
+			ast->support_wide_screen = false;
+			ast_write32(ast, 0xf004, 0x1e6e0000);
+			ast_write32(ast, 0xf000, 0x1);
+			data = ast_read32(ast, 0x1207c);
+			data &= 0x300;
+			if (ast->chip == AST2300 && data == 0x0) /* ast1300 */
+				ast->support_wide_screen = true;
+			if (ast->chip == AST2400 && data == 0x100) /* ast1400 */
+				ast->support_wide_screen = true;
+		}
+		break;
+	}
+
+	ast->tx_chip_type = AST_TX_NONE;
+	jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xff);
+	if (jreg & 0x80)
+		ast->tx_chip_type = AST_TX_SIL164;
+	if ((ast->chip == AST2300) || (ast->chip == AST2400)) {
+		jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
+		switch (jreg) {
+		case 0x04:
+			ast->tx_chip_type = AST_TX_SIL164;
+			break;
+		case 0x08:
+			ast->dp501_fw_addr = kzalloc(32*1024, GFP_KERNEL);
+			if (ast->dp501_fw_addr) {
+				/* backup firmware */
+				if (ast_backup_fw(dev, ast->dp501_fw_addr, 32*1024)) {
+					kfree(ast->dp501_fw_addr);
+					ast->dp501_fw_addr = NULL;
+				}
+			}
+			/* fallthrough */
+		case 0x0c:
+			ast->tx_chip_type = AST_TX_DP501;
+		}
+	}
+
 	return 0;
 }
 
@@ -129,7 +186,7 @@
 	else
 		ast->dram_bus_width = 32;
 
-	if (ast->chip == AST2300) {
+	if (ast->chip == AST2300 || ast->chip == AST2400) {
 		switch (data & 0x03) {
 		case 0:
 			ast->dram_type = AST_DRAM_512Mx16;
@@ -257,17 +314,32 @@
 {
 	struct ast_private *ast = dev->dev_private;
 	u8 jreg;
-
+	u32 vram_size;
 	ast_open_key(ast);
 
+	vram_size = AST_VIDMEM_DEFAULT_SIZE;
 	jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xaa, 0xff);
 	switch (jreg & 3) {
-	case 0: return AST_VIDMEM_SIZE_8M;
-	case 1: return AST_VIDMEM_SIZE_16M;
-	case 2: return AST_VIDMEM_SIZE_32M;
-	case 3: return AST_VIDMEM_SIZE_64M;
+	case 0: vram_size = AST_VIDMEM_SIZE_8M; break;
+	case 1: vram_size = AST_VIDMEM_SIZE_16M; break;
+	case 2: vram_size = AST_VIDMEM_SIZE_32M; break;
+	case 3: vram_size = AST_VIDMEM_SIZE_64M; break;
 	}
-	return AST_VIDMEM_DEFAULT_SIZE;
+
+	jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x99, 0xff);
+	switch (jreg & 0x03) {
+	case 1:
+		vram_size -= 0x100000;
+		break;
+	case 2:
+		vram_size -= 0x200000;
+		break;
+	case 3:
+		vram_size -= 0x400000;
+		break;
+	}
+
+	return vram_size;
 }
 
 int ast_driver_load(struct drm_device *dev, unsigned long flags)
@@ -316,6 +388,7 @@
 	if (ast->chip == AST2100 ||
 	    ast->chip == AST2200 ||
 	    ast->chip == AST2300 ||
+	    ast->chip == AST2400 ||
 	    ast->chip == AST1180) {
 		dev->mode_config.max_width = 1920;
 		dev->mode_config.max_height = 2048;
@@ -343,6 +416,7 @@
 {
 	struct ast_private *ast = dev->dev_private;
 
+	kfree(ast->dp501_fw_addr);
 	ast_mode_fini(dev);
 	ast_fbdev_fini(dev);
 	drm_mode_config_cleanup(dev);
@@ -411,16 +485,13 @@
 
 	tbo = &((*bo)->bo);
 	ttm_bo_unref(&tbo);
-	if (tbo == NULL)
-		*bo = NULL;
-
+	*bo = NULL;
 }
+
 void ast_gem_free_object(struct drm_gem_object *obj)
 {
 	struct ast_bo *ast_bo = gem_to_ast_bo(obj);
 
-	if (!ast_bo)
-		return;
 	ast_bo_unref(&ast_bo);
 }
 

diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index a4afdc8..114aee9 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c

@@ -115,11 +115,17 @@
 		else
 			vbios_mode->enh_table = &res_1280x1024[refresh_rate_index];
 		break;
+	case 1360:
+		vbios_mode->enh_table = &res_1360x768[refresh_rate_index];
+		break;
 	case 1440:
 		vbios_mode->enh_table = &res_1440x900[refresh_rate_index];
 		break;
 	case 1600:
-		vbios_mode->enh_table = &res_1600x1200[refresh_rate_index];
+		if (crtc->mode.crtc_vdisplay == 900)
+			vbios_mode->enh_table = &res_1600x900[refresh_rate_index];
+		else
+			vbios_mode->enh_table = &res_1600x1200[refresh_rate_index];
 		break;
 	case 1680:
 		vbios_mode->enh_table = &res_1680x1050[refresh_rate_index];
@@ -175,14 +181,17 @@
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8d, refresh_rate_index & 0xff);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8e, mode_id & 0xff);
 
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0xa8);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x92, crtc->primary->fb->bits_per_pixel);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x93, adjusted_mode->clock / 1000);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x94, adjusted_mode->crtc_hdisplay);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x95, adjusted_mode->crtc_hdisplay >> 8);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0x00);
+		if (vbios_mode->enh_table->flags & NewModeInfo) {
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0xa8);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x92, crtc->primary->fb->bits_per_pixel);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x93, adjusted_mode->clock / 1000);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x94, adjusted_mode->crtc_hdisplay);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x95, adjusted_mode->crtc_hdisplay >> 8);
 
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x96, adjusted_mode->crtc_vdisplay);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x97, adjusted_mode->crtc_vdisplay >> 8);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x96, adjusted_mode->crtc_vdisplay);
+			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x97, adjusted_mode->crtc_vdisplay >> 8);
+		}
 	}
 
 	return true;
@@ -389,7 +398,7 @@
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa8, 0xfd, jregA8);
 
 	/* Set Threshold */
-	if (ast->chip == AST2300) {
+	if (ast->chip == AST2300 || ast->chip == AST2400) {
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa7, 0x78);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa6, 0x60);
 	} else if (ast->chip == AST2100 ||
@@ -451,9 +460,13 @@
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 		ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0);
+		if (ast->tx_chip_type == AST_TX_DP501)
+			ast_set_dp501_video_output(crtc->dev, 1);
 		ast_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_OFF:
+		if (ast->tx_chip_type == AST_TX_DP501)
+			ast_set_dp501_video_output(crtc->dev, 0);
 		ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0x20);
 		break;
 	}
@@ -729,10 +742,24 @@
 static int ast_get_modes(struct drm_connector *connector)
 {
 	struct ast_connector *ast_connector = to_ast_connector(connector);
+	struct ast_private *ast = connector->dev->dev_private;
 	struct edid *edid;
 	int ret;
+	bool flags = false;
+	if (ast->tx_chip_type == AST_TX_DP501) {
+		ast->dp501_maxclk = 0xff;
+		edid = kmalloc(128, GFP_KERNEL);
+		if (!edid)
+			return -ENOMEM;
 
-	edid = drm_get_edid(connector, &ast_connector->i2c->adapter);
+		flags = ast_dp501_read_edid(connector->dev, (u8 *)edid);
+		if (flags)
+			ast->dp501_maxclk = ast_get_dp501_max_clk(connector->dev);
+		else
+			kfree(edid);
+	}
+	if (!flags)
+		edid = drm_get_edid(connector, &ast_connector->i2c->adapter);
 	if (edid) {
 		drm_mode_connector_update_edid_property(&ast_connector->base, edid);
 		ret = drm_add_edid_modes(connector, edid);
@@ -746,7 +773,56 @@
 static int ast_mode_valid(struct drm_connector *connector,
 			  struct drm_display_mode *mode)
 {
-	return MODE_OK;
+	struct ast_private *ast = connector->dev->dev_private;
+	int flags = MODE_NOMODE;
+	uint32_t jtemp;
+
+	if (ast->support_wide_screen) {
+		if ((mode->hdisplay == 1680) && (mode->vdisplay == 1050))
+			return MODE_OK;
+		if ((mode->hdisplay == 1280) && (mode->vdisplay == 800))
+			return MODE_OK;
+		if ((mode->hdisplay == 1440) && (mode->vdisplay == 900))
+			return MODE_OK;
+		if ((mode->hdisplay == 1360) && (mode->vdisplay == 768))
+			return MODE_OK;
+		if ((mode->hdisplay == 1600) && (mode->vdisplay == 900))
+			return MODE_OK;
+
+		if ((ast->chip == AST2100) || (ast->chip == AST2200) || (ast->chip == AST2300) || (ast->chip == AST2400) || (ast->chip == AST1180)) {
+			if ((mode->hdisplay == 1920) && (mode->vdisplay == 1080))
+				return MODE_OK;
+
+			if ((mode->hdisplay == 1920) && (mode->vdisplay == 1200)) {
+				jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
+				if (jtemp & 0x01)
+					return MODE_NOMODE;
+				else
+					return MODE_OK;
+			}
+		}
+	}
+	switch (mode->hdisplay) {
+	case 640:
+		if (mode->vdisplay == 480) flags = MODE_OK;
+		break;
+	case 800:
+		if (mode->vdisplay == 600) flags = MODE_OK;
+		break;
+	case 1024:
+		if (mode->vdisplay == 768) flags = MODE_OK;
+		break;
+	case 1280:
+		if (mode->vdisplay == 1024) flags = MODE_OK;
+		break;
+	case 1600:
+		if (mode->vdisplay == 1200) flags = MODE_OK;
+		break;
+	default:
+		return flags;
+	}
+
+	return flags;
 }
 
 static void ast_connector_destroy(struct drm_connector *connector)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 635f6ff..38d437f 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c

@@ -78,7 +78,7 @@
 	for (i = 0x81; i <= 0x8f; i++)
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, 0x00);
 
-	if (ast->chip == AST2300) {
+	if (ast->chip == AST2300 || ast->chip == AST2400) {
 		if (dev->pdev->revision >= 0x20)
 			ext_reg_info = extreginfo_ast2300;
 		else
@@ -102,23 +102,32 @@
 
 	/* Enable RAMDAC for A1 */
 	reg = 0x04;
-	if (ast->chip == AST2300)
+	if (ast->chip == AST2300 || ast->chip == AST2400)
 		reg |= 0x20;
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff, reg);
 }
 
-static inline u32 mindwm(struct ast_private *ast, u32 r)
+u32 ast_mindwm(struct ast_private *ast, u32 r)
 {
+	uint32_t data;
+
 	ast_write32(ast, 0xf004, r & 0xffff0000);
 	ast_write32(ast, 0xf000, 0x1);
 
+	do {
+		data = ast_read32(ast, 0xf004) & 0xffff0000;
+	} while (data != (r & 0xffff0000));
 	return ast_read32(ast, 0x10000 + (r & 0x0000ffff));
 }
 
-static inline void moutdwm(struct ast_private *ast, u32 r, u32 v)
+void ast_moutdwm(struct ast_private *ast, u32 r, u32 v)
 {
+	uint32_t data;
 	ast_write32(ast, 0xf004, r & 0xffff0000);
 	ast_write32(ast, 0xf000, 0x1);
+	do {
+		data = ast_read32(ast, 0xf004) & 0xffff0000;
+	} while (data != (r & 0xffff0000));
 	ast_write32(ast, 0x10000 + (r & 0x0000ffff), v);
 }
 
@@ -154,28 +163,28 @@
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x00000001 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000001 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x40;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x40;
 		if (++timeout > TIMEOUT_AST2150) {
-			moutdwm(ast, 0x1e6e0070, 0x00000000);
+			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 			return 0xffffffff;
 		}
 	} while (!data);
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x00000003 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000003 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x40;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x40;
 		if (++timeout > TIMEOUT_AST2150) {
-			moutdwm(ast, 0x1e6e0070, 0x00000000);
+			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 			return 0xffffffff;
 		}
 	} while (!data);
-	data = (mindwm(ast, 0x1e6e0070) & 0x80) >> 7;
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
+	data = (ast_mindwm(ast, 0x1e6e0070) & 0x80) >> 7;
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 	return data;
 }
 
@@ -184,18 +193,18 @@
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x40;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x40;
 		if (++timeout > TIMEOUT_AST2150) {
-			moutdwm(ast, 0x1e6e0070, 0x00000000);
+			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 			return 0xffffffff;
 		}
 	} while (!data);
-	data = (mindwm(ast, 0x1e6e0070) & 0x80) >> 7;
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
+	data = (ast_mindwm(ast, 0x1e6e0070) & 0x80) >> 7;
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 	return data;
 }
 #endif
@@ -215,7 +224,7 @@
 	u32 patcnt, loop;
 
 	for (patcnt = 0; patcnt < CBR_PATNUM_AST2150; patcnt++) {
-		moutdwm(ast, 0x1e6e007c, pattern_AST2150[patcnt]);
+		ast_moutdwm(ast, 0x1e6e007c, pattern_AST2150[patcnt]);
 		for (loop = 0; loop < CBR_PASSNUM_AST2150; loop++) {
 			if (cbrtest_ast2150(ast))
 				break;
@@ -237,7 +246,7 @@
 	passcnt = 0;
 
 	for (dlli = 0; dlli < 100; dlli++) {
-		moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24));
+		ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24));
 		data = cbrscan_ast2150(ast, busw);
 		if (data != 0) {
 			if (data & 0x1) {
@@ -254,7 +263,7 @@
 		goto cbr_start;
 
 	dlli = dll_min[0] + (((dll_max[0] - dll_min[0]) * 7) >> 4);
-	moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24));
+	ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24));
 }
 
 
@@ -365,10 +374,12 @@
 	ast_open_key(ast);
 	ast_set_def_ext_reg(dev);
 
-	if (ast->chip == AST2300)
+	if (ast->chip == AST2300 || ast->chip == AST2400)
 		ast_init_dram_2300(dev);
 	else
 		ast_init_dram_reg(dev);
+
+	ast_init_3rdtx(dev);
 }
 
 /* AST 2300 DRAM settings */
@@ -403,6 +414,7 @@
 /*
  * DQSI DLL CBR Setting
  */
+#define CBR_SIZE0            ((1  << 10) - 1)
 #define CBR_SIZE1            ((4  << 10) - 1)
 #define CBR_SIZE2            ((64 << 10) - 1)
 #define CBR_PASSNUM          5
@@ -423,88 +435,84 @@
 	0x7C61D253
 };
 
-#if 0 /* unused in DDX, included for completeness */
 static int mmc_test_burst(struct ast_private *ast, u32 datagen)
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x000000c1 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x000000c1 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x3000;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
 		if (data & 0x2000) {
 			return 0;
 		}
 		if (++timeout > TIMEOUT) {
-			moutdwm(ast, 0x1e6e0070, 0x00000000);
+			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 			return 0;
 		}
 	} while (!data);
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 	return 1;
 }
-#endif
 
 static int mmc_test_burst2(struct ast_private *ast, u32 datagen)
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x00000041 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000041 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x1000;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
 		if (++timeout > TIMEOUT) {
-			moutdwm(ast, 0x1e6e0070, 0x0);
+			ast_moutdwm(ast, 0x1e6e0070, 0x0);
 			return -1;
 		}
 	} while (!data);
-	data = mindwm(ast, 0x1e6e0078);
+	data = ast_mindwm(ast, 0x1e6e0078);
 	data = (data | (data >> 16)) & 0xffff;
-	moutdwm(ast, 0x1e6e0070, 0x0);
+	ast_moutdwm(ast, 0x1e6e0070, 0x0);
 	return data;
 }
 
-#if 0 /* Unused in DDX here for completeness */
 static int mmc_test_single(struct ast_private *ast, u32 datagen)
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x000000c5 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x000000c5 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x3000;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
 		if (data & 0x2000)
 			return 0;
 		if (++timeout > TIMEOUT) {
-			moutdwm(ast, 0x1e6e0070, 0x0);
+			ast_moutdwm(ast, 0x1e6e0070, 0x0);
 			return 0;
 		}
 	} while (!data);
-	moutdwm(ast, 0x1e6e0070, 0x0);
+	ast_moutdwm(ast, 0x1e6e0070, 0x0);
 	return 1;
 }
-#endif
 
 static int mmc_test_single2(struct ast_private *ast, u32 datagen)
 {
 	u32 data, timeout;
 
-	moutdwm(ast, 0x1e6e0070, 0x00000000);
-	moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
 	timeout = 0;
 	do {
-		data = mindwm(ast, 0x1e6e0070) & 0x1000;
+		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
 		if (++timeout > TIMEOUT) {
-			moutdwm(ast, 0x1e6e0070, 0x0);
+			ast_moutdwm(ast, 0x1e6e0070, 0x0);
 			return -1;
 		}
 	} while (!data);
-	data = mindwm(ast, 0x1e6e0078);
+	data = ast_mindwm(ast, 0x1e6e0078);
 	data = (data | (data >> 16)) & 0xffff;
-	moutdwm(ast, 0x1e6e0070, 0x0);
+	ast_moutdwm(ast, 0x1e6e0070, 0x0);
 	return data;
 }
 
@@ -533,7 +541,7 @@
 
 	data2 = 3;
 	for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) {
-		moutdwm(ast, 0x1e6e007c, pattern[patcnt]);
+		ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]);
 		for (loop = 0; loop < CBR_PASSNUM2; loop++) {
 			if ((data = cbr_test(ast)) != 0) {
 				data2 &= data;
@@ -568,7 +576,7 @@
 
 	data2 = 0xffff;
 	for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) {
-		moutdwm(ast, 0x1e6e007c, pattern[patcnt]);
+		ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]);
 		for (loop = 0; loop < CBR_PASSNUM2; loop++) {
 			if ((data = cbr_test2(ast)) != 0) {
 				data2 &= data;
@@ -583,106 +591,35 @@
 	return data2;
 }
 
-#if 0 /* unused in DDX - added for completeness */
-static void finetuneDQI(struct ast_private *ast, struct ast2300_dram_param *param)
+static u32 cbr_test3(struct ast_private *ast)
 {
-	u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt;
+	if (!mmc_test_burst(ast, 0))
+		return 0;
+	if (!mmc_test_single(ast, 0))
+		return 0;
+	return 1;
+}
 
-	gold_sadj[0] = (mindwm(ast, 0x1E6E0024) >> 16) & 0xffff;
-	gold_sadj[1] = gold_sadj[0] >> 8;
-	gold_sadj[0] = gold_sadj[0] & 0xff;
-	gold_sadj[0] = (gold_sadj[0] + gold_sadj[1]) >> 1;
-	gold_sadj[1] = gold_sadj[0];
-
-	for (cnt = 0; cnt < 16; cnt++) {
-		dllmin[cnt] = 0xff;
-		dllmax[cnt] = 0x0;
-	}
-	passcnt = 0;
-	for (dlli = 0; dlli < 76; dlli++) {
-		moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24));
-		/* Wait DQSI latch phase calibration */
-		moutdwm(ast, 0x1E6E0074, 0x00000010);
-		moutdwm(ast, 0x1E6E0070, 0x00000003);
-		do {
-			data = mindwm(ast, 0x1E6E0070);
-		} while (!(data & 0x00001000));
-		moutdwm(ast, 0x1E6E0070, 0x00000000);
-
-		moutdwm(ast, 0x1E6E0074, CBR_SIZE1);
-		data = cbr_scan2(ast);
-		if (data != 0) {
-			mask = 0x00010001;
-			for (cnt = 0; cnt < 16; cnt++) {
-				if (data & mask) {
-					if (dllmin[cnt] > dlli) {
-						dllmin[cnt] = dlli;
-					}
-					if (dllmax[cnt] < dlli) {
-						dllmax[cnt] = dlli;
-					}
-				}
-				mask <<= 1;
-			}
-			passcnt++;
-		} else if (passcnt >= CBR_THRESHOLD) {
-			break;
-		}
-	}
-	data = 0;
-	for (cnt = 0; cnt < 8; cnt++) {
-		data >>= 3;
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD)) {
-			dlli = (dllmin[cnt] + dllmax[cnt]) >> 1;
-			if (gold_sadj[0] >= dlli) {
-				dlli = (gold_sadj[0] - dlli) >> 1;
-				if (dlli > 3) {
-					dlli = 3;
-				}
-			} else {
-				dlli = (dlli - gold_sadj[0]) >> 1;
-				if (dlli > 4) {
-					dlli = 4;
-				}
-				dlli = (8 - dlli) & 0x7;
-			}
-			data |= dlli << 21;
-		}
-	}
-	moutdwm(ast, 0x1E6E0080, data);
-
-	data = 0;
-	for (cnt = 8; cnt < 16; cnt++) {
-		data >>= 3;
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD)) {
-			dlli = (dllmin[cnt] + dllmax[cnt]) >> 1;
-			if (gold_sadj[1] >= dlli) {
-				dlli = (gold_sadj[1] - dlli) >> 1;
-				if (dlli > 3) {
-					dlli = 3;
-				} else {
-					dlli = (dlli - 1) & 0x7;
-				}
-			} else {
-				dlli = (dlli - gold_sadj[1]) >> 1;
-				dlli += 1;
-				if (dlli > 4) {
-					dlli = 4;
-				}
-				dlli = (8 - dlli) & 0x7;
-			}
-			data |= dlli << 21;
-		}
-	}
-	moutdwm(ast, 0x1E6E0084, data);
-
-} /* finetuneDQI */
-#endif
-
-static void finetuneDQI_L(struct ast_private *ast, struct ast2300_dram_param *param)
+static u32 cbr_scan3(struct ast_private *ast)
 {
-	u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt;
+	u32 patcnt, loop;
 
+	for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) {
+		ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]);
+		for (loop = 0; loop < 2; loop++) {
+			if (cbr_test3(ast))
+				break;
+		}
+		if (loop == 2)
+			return 0;
+	}
+	return 1;
+}
+
+static bool finetuneDQI_L(struct ast_private *ast, struct ast2300_dram_param *param)
+{
+	u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt, retry = 0;
+	bool status = false;
 FINETUNE_START:
 	for (cnt = 0; cnt < 16; cnt++) {
 		dllmin[cnt] = 0xff;
@@ -690,16 +627,8 @@
 	}
 	passcnt = 0;
 	for (dlli = 0; dlli < 76; dlli++) {
-		moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24));
-		/* Wait DQSI latch phase calibration */
-		moutdwm(ast, 0x1E6E0074, 0x00000010);
-		moutdwm(ast, 0x1E6E0070, 0x00000003);
-		do {
-			data = mindwm(ast, 0x1E6E0070);
-		} while (!(data & 0x00001000));
-		moutdwm(ast, 0x1E6E0070, 0x00000000);
-
-		moutdwm(ast, 0x1E6E0074, CBR_SIZE1);
+		ast_moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24));
+		ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE1);
 		data = cbr_scan2(ast);
 		if (data != 0) {
 			mask = 0x00010001;
@@ -727,9 +656,13 @@
 			passcnt++;
 		}
 	}
+	if (retry++ > 10)
+		goto FINETUNE_DONE;
 	if (passcnt != 16) {
 		goto FINETUNE_START;
 	}
+	status = true;
+FINETUNE_DONE:
 	gold_sadj[0] = gold_sadj[0] >> 4;
 	gold_sadj[1] = gold_sadj[0];
 
@@ -753,7 +686,7 @@
 			data |= dlli << 21;
 		}
 	}
-	moutdwm(ast, 0x1E6E0080, data);
+	ast_moutdwm(ast, 0x1E6E0080, data);
 
 	data = 0;
 	for (cnt = 8; cnt < 16; cnt++) {
@@ -778,162 +711,116 @@
 			data |= dlli << 21;
 		}
 	}
-	moutdwm(ast, 0x1E6E0084, data);
-
+	ast_moutdwm(ast, 0x1E6E0084, data);
+	return status;
 } /* finetuneDQI_L */
 
-static void finetuneDQI_L2(struct ast_private *ast, struct ast2300_dram_param *param)
+static void finetuneDQSI(struct ast_private *ast)
 {
-	u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt, data2;
+	u32 dlli, dqsip, dqidly;
+	u32 reg_mcr18, reg_mcr0c, passcnt[2], diff;
+	u32 g_dqidly, g_dqsip, g_margin, g_side;
+	u16 pass[32][2][2];
+	char tag[2][76];
 
-	for (cnt = 0; cnt < 16; cnt++) {
-		dllmin[cnt] = 0xff;
-		dllmax[cnt] = 0x0;
-	}
-	passcnt = 0;
+	/* Disable DQI CBR */
+	reg_mcr0c  = ast_mindwm(ast, 0x1E6E000C);
+	reg_mcr18  = ast_mindwm(ast, 0x1E6E0018);
+	reg_mcr18 &= 0x0000ffff;
+	ast_moutdwm(ast, 0x1E6E0018, reg_mcr18);
+
 	for (dlli = 0; dlli < 76; dlli++) {
-		moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24));
-		/* Wait DQSI latch phase calibration */
-		moutdwm(ast, 0x1E6E0074, 0x00000010);
-		moutdwm(ast, 0x1E6E0070, 0x00000003);
-		do {
-			data = mindwm(ast, 0x1E6E0070);
-		} while (!(data & 0x00001000));
-		moutdwm(ast, 0x1E6E0070, 0x00000000);
-
-		moutdwm(ast, 0x1E6E0074, CBR_SIZE2);
-		data = cbr_scan2(ast);
-		if (data != 0) {
-			mask = 0x00010001;
-			for (cnt = 0; cnt < 16; cnt++) {
-				if (data & mask) {
-					if (dllmin[cnt] > dlli) {
-						dllmin[cnt] = dlli;
-					}
-					if (dllmax[cnt] < dlli) {
-						dllmax[cnt] = dlli;
-					}
-				}
-				mask <<= 1;
-			}
-			passcnt++;
-		} else if (passcnt >= CBR_THRESHOLD2) {
-			break;
-		}
+		tag[0][dlli] = 0x0;
+		tag[1][dlli] = 0x0;
 	}
-	gold_sadj[0] = 0x0;
-	gold_sadj[1] = 0xFF;
-	for (cnt = 0; cnt < 8; cnt++) {
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) {
-			if (gold_sadj[0] < dllmin[cnt]) {
-				gold_sadj[0] = dllmin[cnt];
-			}
-			if (gold_sadj[1] > dllmax[cnt]) {
-				gold_sadj[1] = dllmax[cnt];
-			}
-		}
+	for (dqidly = 0; dqidly < 32; dqidly++) {
+		pass[dqidly][0][0] = 0xff;
+		pass[dqidly][0][1] = 0x0;
+		pass[dqidly][1][0] = 0xff;
+		pass[dqidly][1][1] = 0x0;
 	}
-	gold_sadj[0] = (gold_sadj[1] + gold_sadj[0]) >> 1;
-	gold_sadj[1] = mindwm(ast, 0x1E6E0080);
-
-	data = 0;
-	for (cnt = 0; cnt < 8; cnt++) {
-		data >>= 3;
-		data2 = gold_sadj[1] & 0x7;
-		gold_sadj[1] >>= 3;
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) {
-			dlli = (dllmin[cnt] + dllmax[cnt]) >> 1;
-			if (gold_sadj[0] >= dlli) {
-				dlli = (gold_sadj[0] - dlli) >> 1;
-				if (dlli > 0) {
-					dlli = 1;
-				}
-				if (data2 != 3) {
-					data2 = (data2 + dlli) & 0x7;
-				}
-			} else {
-				dlli = (dlli - gold_sadj[0]) >> 1;
-				if (dlli > 0) {
-					dlli = 1;
-				}
-				if (data2 != 4) {
-					data2 = (data2 - dlli) & 0x7;
+	for (dqidly = 0; dqidly < 32; dqidly++) {
+		passcnt[0] = passcnt[1] = 0;
+		for (dqsip = 0; dqsip < 2; dqsip++) {
+			ast_moutdwm(ast, 0x1E6E000C, 0);
+			ast_moutdwm(ast, 0x1E6E0018, reg_mcr18 | (dqidly << 16) | (dqsip << 23));
+			ast_moutdwm(ast, 0x1E6E000C, reg_mcr0c);
+			for (dlli = 0; dlli < 76; dlli++) {
+				ast_moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24));
+				ast_moutdwm(ast, 0x1E6E0070, 0);
+				ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE0);
+				if (cbr_scan3(ast)) {
+					if (dlli == 0)
+						break;
+					passcnt[dqsip]++;
+					tag[dqsip][dlli] = 'P';
+					if (dlli < pass[dqidly][dqsip][0])
+						pass[dqidly][dqsip][0] = (u16) dlli;
+					if (dlli > pass[dqidly][dqsip][1])
+						pass[dqidly][dqsip][1] = (u16) dlli;
+				} else if (passcnt[dqsip] >= 5)
+					break;
+				else {
+					pass[dqidly][dqsip][0] = 0xff;
+					pass[dqidly][dqsip][1] = 0x0;
 				}
 			}
 		}
-		data |= data2 << 21;
+		if (passcnt[0] == 0 && passcnt[1] == 0)
+			dqidly++;
 	}
-	moutdwm(ast, 0x1E6E0080, data);
+	/* Search margin */
+	g_dqidly = g_dqsip = g_margin = g_side = 0;
 
-	gold_sadj[0] = 0x0;
-	gold_sadj[1] = 0xFF;
-	for (cnt = 8; cnt < 16; cnt++) {
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) {
-			if (gold_sadj[0] < dllmin[cnt]) {
-				gold_sadj[0] = dllmin[cnt];
-			}
-			if (gold_sadj[1] > dllmax[cnt]) {
-				gold_sadj[1] = dllmax[cnt];
+	for (dqidly = 0; dqidly < 32; dqidly++) {
+		for (dqsip = 0; dqsip < 2; dqsip++) {
+			if (pass[dqidly][dqsip][0] > pass[dqidly][dqsip][1])
+				continue;
+			diff = pass[dqidly][dqsip][1] - pass[dqidly][dqsip][0];
+			if ((diff+2) < g_margin)
+				continue;
+			passcnt[0] = passcnt[1] = 0;
+			for (dlli = pass[dqidly][dqsip][0]; dlli > 0  && tag[dqsip][dlli] != 0; dlli--, passcnt[0]++);
+			for (dlli = pass[dqidly][dqsip][1]; dlli < 76 && tag[dqsip][dlli] != 0; dlli++, passcnt[1]++);
+			if (passcnt[0] > passcnt[1])
+				passcnt[0] = passcnt[1];
+			passcnt[1] = 0;
+			if (passcnt[0] > g_side)
+				passcnt[1] = passcnt[0] - g_side;
+			if (diff > (g_margin+1) && (passcnt[1] > 0 || passcnt[0] > 8)) {
+				g_margin = diff;
+				g_dqidly = dqidly;
+				g_dqsip  = dqsip;
+				g_side   = passcnt[0];
+			} else if (passcnt[1] > 1 && g_side < 8) {
+				if (diff > g_margin)
+					g_margin = diff;
+				g_dqidly = dqidly;
+				g_dqsip  = dqsip;
+				g_side   = passcnt[0];
 			}
 		}
 	}
-	gold_sadj[0] = (gold_sadj[1] + gold_sadj[0]) >> 1;
-	gold_sadj[1] = mindwm(ast, 0x1E6E0084);
+	reg_mcr18 = reg_mcr18 | (g_dqidly << 16) | (g_dqsip << 23);
+	ast_moutdwm(ast, 0x1E6E0018, reg_mcr18);
 
-	data = 0;
-	for (cnt = 8; cnt < 16; cnt++) {
-		data >>= 3;
-		data2 = gold_sadj[1] & 0x7;
-		gold_sadj[1] >>= 3;
-		if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) {
-			dlli = (dllmin[cnt] + dllmax[cnt]) >> 1;
-			if (gold_sadj[0] >= dlli) {
-				dlli = (gold_sadj[0] - dlli) >> 1;
-				if (dlli > 0) {
-					dlli = 1;
-				}
-				if (data2 != 3) {
-					data2 = (data2 + dlli) & 0x7;
-				}
-			} else {
-				dlli = (dlli - gold_sadj[0]) >> 1;
-				if (dlli > 0) {
-					dlli = 1;
-				}
-				if (data2 != 4) {
-					data2 = (data2 - dlli) & 0x7;
-				}
-			}
-		}
-		data |= data2 << 21;
-	}
-	moutdwm(ast, 0x1E6E0084, data);
-
-} /* finetuneDQI_L2 */
-
-static void cbr_dll2(struct ast_private *ast, struct ast2300_dram_param *param)
+}
+static bool cbr_dll2(struct ast_private *ast, struct ast2300_dram_param *param)
 {
-	u32 dllmin[2], dllmax[2], dlli, data, data2, passcnt;
+	u32 dllmin[2], dllmax[2], dlli, data, passcnt, retry = 0;
+	bool status = false;
 
-
-	finetuneDQI_L(ast, param);
-	finetuneDQI_L2(ast, param);
+	finetuneDQSI(ast);
+	if (finetuneDQI_L(ast, param) == false)
+		return status;
 
 CBR_START2:
 	dllmin[0] = dllmin[1] = 0xff;
 	dllmax[0] = dllmax[1] = 0x0;
 	passcnt = 0;
 	for (dlli = 0; dlli < 76; dlli++) {
-		moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24));
-		/* Wait DQSI latch phase calibration */
-		moutdwm(ast, 0x1E6E0074, 0x00000010);
-		moutdwm(ast, 0x1E6E0070, 0x00000003);
-		do {
-			data = mindwm(ast, 0x1E6E0070);
-		} while (!(data & 0x00001000));
-		moutdwm(ast, 0x1E6E0070, 0x00000000);
-
-		moutdwm(ast, 0x1E6E0074, CBR_SIZE2);
+		ast_moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24));
+		ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE2);
 		data = cbr_scan(ast);
 		if (data != 0) {
 			if (data & 0x1) {
@@ -957,44 +844,31 @@
 			break;
 		}
 	}
+	if (retry++ > 10)
+		goto CBR_DONE2;
 	if (dllmax[0] == 0 || (dllmax[0]-dllmin[0]) < CBR_THRESHOLD) {
 		goto CBR_START2;
 	}
 	if (dllmax[1] == 0 || (dllmax[1]-dllmin[1]) < CBR_THRESHOLD) {
 		goto CBR_START2;
 	}
+	status = true;
+CBR_DONE2:
 	dlli  = (dllmin[1] + dllmax[1]) >> 1;
 	dlli <<= 8;
 	dlli += (dllmin[0] + dllmax[0]) >> 1;
-	moutdwm(ast, 0x1E6E0068, (mindwm(ast, 0x1E6E0068) & 0xFFFF) | (dlli << 16));
-
-	data  = (mindwm(ast, 0x1E6E0080) >> 24) & 0x1F;
-	data2 = (mindwm(ast, 0x1E6E0018) & 0xff80ffff) | (data << 16);
-	moutdwm(ast, 0x1E6E0018, data2);
-	moutdwm(ast, 0x1E6E0024, 0x8001 | (data << 1) | (param->dll2_finetune_step << 8));
-
-	/* Wait DQSI latch phase calibration */
-	moutdwm(ast, 0x1E6E0074, 0x00000010);
-	moutdwm(ast, 0x1E6E0070, 0x00000003);
-	do {
-		data = mindwm(ast, 0x1E6E0070);
-	} while (!(data & 0x00001000));
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
-	moutdwm(ast, 0x1E6E0070, 0x00000003);
-	do {
-		data = mindwm(ast, 0x1E6E0070);
-	} while (!(data & 0x00001000));
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0068, ast_mindwm(ast, 0x1E720058) | (dlli << 16));
+	return status;
 } /* CBRDLL2 */
 
 static void get_ddr3_info(struct ast_private *ast, struct ast2300_dram_param *param)
 {
 	u32 trap, trap_AC2, trap_MRS;
 
-	moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
+	ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
 
 	/* Ger trap info */
-	trap = (mindwm(ast, 0x1E6E2070) >> 25) & 0x3;
+	trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3;
 	trap_AC2  = 0x00020000 + (trap << 16);
 	trap_AC2 |= 0x00300000 + ((trap & 0x2) << 19);
 	trap_MRS  = 0x00000010 + (trap << 4);
@@ -1008,22 +882,35 @@
 
 	switch (param->dram_freq) {
 	case 336:
-		moutdwm(ast, 0x1E6E2020, 0x0190);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0190);
 		param->wodt          = 0;
 		param->reg_AC1       = 0x22202725;
 		param->reg_AC2       = 0xAA007613 | trap_AC2;
 		param->reg_DQSIC     = 0x000000BA;
 		param->reg_MRS       = 0x04001400 | trap_MRS;
 		param->reg_EMRS      = 0x00000000;
-		param->reg_IOZ       = 0x00000034;
+		param->reg_IOZ       = 0x00000023;
 		param->reg_DQIDLY    = 0x00000074;
 		param->reg_FREQ      = 0x00004DC0;
 		param->madj_max      = 96;
 		param->dll2_finetune_step = 3;
+		switch (param->dram_chipid) {
+		default:
+		case AST_DRAM_512Mx16:
+		case AST_DRAM_1Gx16:
+			param->reg_AC2   = 0xAA007613 | trap_AC2;
+			break;
+		case AST_DRAM_2Gx16:
+			param->reg_AC2   = 0xAA00761C | trap_AC2;
+			break;
+		case AST_DRAM_4Gx16:
+			param->reg_AC2   = 0xAA007636 | trap_AC2;
+			break;
+		}
 		break;
 	default:
 	case 396:
-		moutdwm(ast, 0x1E6E2020, 0x03F1);
+		ast_moutdwm(ast, 0x1E6E2020, 0x03F1);
 		param->wodt          = 1;
 		param->reg_AC1       = 0x33302825;
 		param->reg_AC2       = 0xCC009617 | trap_AC2;
@@ -1033,7 +920,7 @@
 		param->reg_IOZ       = 0x00000034;
 		param->reg_DRV       = 0x000000FA;
 		param->reg_DQIDLY    = 0x00000089;
-		param->reg_FREQ      = 0x000050C0;
+		param->reg_FREQ      = 0x00005040;
 		param->madj_max      = 96;
 		param->dll2_finetune_step = 4;
 
@@ -1053,14 +940,14 @@
 		break;
 
 	case 408:
-		moutdwm(ast, 0x1E6E2020, 0x01F0);
+		ast_moutdwm(ast, 0x1E6E2020, 0x01F0);
 		param->wodt          = 1;
 		param->reg_AC1       = 0x33302825;
 		param->reg_AC2       = 0xCC009617 | trap_AC2;
 		param->reg_DQSIC     = 0x000000E2;
 		param->reg_MRS       = 0x04001600 | trap_MRS;
 		param->reg_EMRS      = 0x00000000;
-		param->reg_IOZ       = 0x00000034;
+		param->reg_IOZ       = 0x00000023;
 		param->reg_DRV       = 0x000000FA;
 		param->reg_DQIDLY    = 0x00000089;
 		param->reg_FREQ      = 0x000050C0;
@@ -1083,7 +970,7 @@
 
 		break;
 	case 456:
-		moutdwm(ast, 0x1E6E2020, 0x0230);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0230);
 		param->wodt          = 0;
 		param->reg_AC1       = 0x33302926;
 		param->reg_AC2       = 0xCD44961A;
@@ -1097,7 +984,7 @@
 		param->dll2_finetune_step = 4;
 		break;
 	case 504:
-		moutdwm(ast, 0x1E6E2020, 0x0270);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0270);
 		param->wodt          = 1;
 		param->reg_AC1       = 0x33302926;
 		param->reg_AC2       = 0xDE44A61D;
@@ -1111,7 +998,7 @@
 		param->dll2_finetune_step = 4;
 		break;
 	case 528:
-		moutdwm(ast, 0x1E6E2020, 0x0290);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0290);
 		param->wodt          = 1;
 		param->rodt          = 1;
 		param->reg_AC1       = 0x33302926;
@@ -1127,7 +1014,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 576:
-		moutdwm(ast, 0x1E6E2020, 0x0140);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0140);
 		param->reg_MADJ      = 0x00136868;
 		param->reg_SADJ      = 0x00004534;
 		param->wodt          = 1;
@@ -1145,7 +1032,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 600:
-		moutdwm(ast, 0x1E6E2020, 0x02E1);
+		ast_moutdwm(ast, 0x1E6E2020, 0x02E1);
 		param->reg_MADJ      = 0x00136868;
 		param->reg_SADJ      = 0x00004534;
 		param->wodt          = 1;
@@ -1163,7 +1050,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 624:
-		moutdwm(ast, 0x1E6E2020, 0x0160);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0160);
 		param->reg_MADJ      = 0x00136868;
 		param->reg_SADJ      = 0x00004534;
 		param->wodt          = 1;
@@ -1196,7 +1083,7 @@
 	case AST_DRAM_4Gx16:
 		param->dram_config = 0x133;
 		break;
-	}; /* switch size */
+	} /* switch size */
 
 	switch (param->vram_size) {
 	default:
@@ -1218,106 +1105,98 @@
 
 static void ddr3_init(struct ast_private *ast, struct ast2300_dram_param *param)
 {
-	u32 data, data2;
+	u32 data, data2, retry = 0;
 
-	moutdwm(ast, 0x1E6E0000, 0xFC600309);
-	moutdwm(ast, 0x1E6E0018, 0x00000100);
-	moutdwm(ast, 0x1E6E0024, 0x00000000);
-	moutdwm(ast, 0x1E6E0034, 0x00000000);
+ddr3_init_start:
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+	ast_moutdwm(ast, 0x1E6E0018, 0x00000100);
+	ast_moutdwm(ast, 0x1E6E0024, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0034, 0x00000000);
 	udelay(10);
-	moutdwm(ast, 0x1E6E0064, param->reg_MADJ);
-	moutdwm(ast, 0x1E6E0068, param->reg_SADJ);
+	ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ);
+	ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ);
 	udelay(10);
-	moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000);
+	ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000);
 	udelay(10);
 
-	moutdwm(ast, 0x1E6E0004, param->dram_config);
-	moutdwm(ast, 0x1E6E0008, 0x90040f);
-	moutdwm(ast, 0x1E6E0010, param->reg_AC1);
-	moutdwm(ast, 0x1E6E0014, param->reg_AC2);
-	moutdwm(ast, 0x1E6E0020, param->reg_DQSIC);
-	moutdwm(ast, 0x1E6E0080, 0x00000000);
-	moutdwm(ast, 0x1E6E0084, 0x00000000);
-	moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY);
-	moutdwm(ast, 0x1E6E0018, 0x4040A170);
-	moutdwm(ast, 0x1E6E0018, 0x20402370);
-	moutdwm(ast, 0x1E6E0038, 0x00000000);
-	moutdwm(ast, 0x1E6E0040, 0xFF444444);
-	moutdwm(ast, 0x1E6E0044, 0x22222222);
-	moutdwm(ast, 0x1E6E0048, 0x22222222);
-	moutdwm(ast, 0x1E6E004C, 0x00000002);
-	moutdwm(ast, 0x1E6E0050, 0x80000000);
-	moutdwm(ast, 0x1E6E0050, 0x00000000);
-	moutdwm(ast, 0x1E6E0054, 0);
-	moutdwm(ast, 0x1E6E0060, param->reg_DRV);
-	moutdwm(ast, 0x1E6E006C, param->reg_IOZ);
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
-	moutdwm(ast, 0x1E6E0074, 0x00000000);
-	moutdwm(ast, 0x1E6E0078, 0x00000000);
-	moutdwm(ast, 0x1E6E007C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0004, param->dram_config);
+	ast_moutdwm(ast, 0x1E6E0008, 0x90040f);
+	ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1);
+	ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2);
+	ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC);
+	ast_moutdwm(ast, 0x1E6E0080, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0084, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY);
+	ast_moutdwm(ast, 0x1E6E0018, 0x4000A170);
+	ast_moutdwm(ast, 0x1E6E0018, 0x00002370);
+	ast_moutdwm(ast, 0x1E6E0038, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0040, 0xFF444444);
+	ast_moutdwm(ast, 0x1E6E0044, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E0048, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E004C, 0x00000002);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0054, 0);
+	ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV);
+	ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ);
+	ast_moutdwm(ast, 0x1E6E0070, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0074, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0078, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E007C, 0x00000000);
 	/* Wait MCLK2X lock to MCLK */
 	do {
-		data = mindwm(ast, 0x1E6E001C);
+		data = ast_mindwm(ast, 0x1E6E001C);
 	} while (!(data & 0x08000000));
-	moutdwm(ast, 0x1E6E0034, 0x00000001);
-	moutdwm(ast, 0x1E6E000C, 0x00005C04);
-	udelay(10);
-	moutdwm(ast, 0x1E6E000C, 0x00000000);
-	moutdwm(ast, 0x1E6E0034, 0x00000000);
-	data = mindwm(ast, 0x1E6E001C);
+	data = ast_mindwm(ast, 0x1E6E001C);
 	data = (data >> 8) & 0xff;
 	while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) {
-		data2 = (mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4;
+		data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4;
 		if ((data2 & 0xff) > param->madj_max) {
 			break;
 		}
-		moutdwm(ast, 0x1E6E0064, data2);
+		ast_moutdwm(ast, 0x1E6E0064, data2);
 		if (data2 & 0x00100000) {
 			data2 = ((data2 & 0xff) >> 3) + 3;
 		} else {
 			data2 = ((data2 & 0xff) >> 2) + 5;
 		}
-		data = mindwm(ast, 0x1E6E0068) & 0xffff00ff;
+		data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff;
 		data2 += data & 0xff;
 		data = data | (data2 << 8);
-		moutdwm(ast, 0x1E6E0068, data);
+		ast_moutdwm(ast, 0x1E6E0068, data);
 		udelay(10);
-		moutdwm(ast, 0x1E6E0064, mindwm(ast, 0x1E6E0064) | 0xC0000);
+		ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000);
 		udelay(10);
-		data = mindwm(ast, 0x1E6E0018) & 0xfffff1ff;
-		moutdwm(ast, 0x1E6E0018, data);
+		data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff;
+		ast_moutdwm(ast, 0x1E6E0018, data);
 		data = data | 0x200;
-		moutdwm(ast, 0x1E6E0018, data);
+		ast_moutdwm(ast, 0x1E6E0018, data);
 		do {
-			data = mindwm(ast, 0x1E6E001C);
+			data = ast_mindwm(ast, 0x1E6E001C);
 		} while (!(data & 0x08000000));
 
-		moutdwm(ast, 0x1E6E0034, 0x00000001);
-		moutdwm(ast, 0x1E6E000C, 0x00005C04);
-		udelay(10);
-		moutdwm(ast, 0x1E6E000C, 0x00000000);
-		moutdwm(ast, 0x1E6E0034, 0x00000000);
-		data = mindwm(ast, 0x1E6E001C);
+		data = ast_mindwm(ast, 0x1E6E001C);
 		data = (data >> 8) & 0xff;
 	}
-	data = mindwm(ast, 0x1E6E0018) | 0xC00;
-	moutdwm(ast, 0x1E6E0018, data);
+	ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0068) & 0xffff);
+	data = ast_mindwm(ast, 0x1E6E0018) | 0xC00;
+	ast_moutdwm(ast, 0x1E6E0018, data);
 
-	moutdwm(ast, 0x1E6E0034, 0x00000001);
-	moutdwm(ast, 0x1E6E000C, 0x00000040);
+	ast_moutdwm(ast, 0x1E6E0034, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E000C, 0x00000040);
 	udelay(50);
 	/* Mode Register Setting */
-	moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100);
-	moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
-	moutdwm(ast, 0x1E6E0028, 0x00000005);
-	moutdwm(ast, 0x1E6E0028, 0x00000007);
-	moutdwm(ast, 0x1E6E0028, 0x00000003);
-	moutdwm(ast, 0x1E6E0028, 0x00000001);
-	moutdwm(ast, 0x1E6E002C, param->reg_MRS);
-	moutdwm(ast, 0x1E6E000C, 0x00005C08);
-	moutdwm(ast, 0x1E6E0028, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100);
+	ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000005);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000007);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000003);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS);
+	ast_moutdwm(ast, 0x1E6E000C, 0x00005C08);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000001);
 
-	moutdwm(ast, 0x1E6E000C, 0x7FFF5C01);
+	ast_moutdwm(ast, 0x1E6E000C, 0x00005C01);
 	data = 0;
 	if (param->wodt) {
 		data = 0x300;
@@ -1325,30 +1204,23 @@
 	if (param->rodt) {
 		data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3);
 	}
-	moutdwm(ast, 0x1E6E0034, data | 0x3);
+	ast_moutdwm(ast, 0x1E6E0034, data | 0x3);
 
-	/* Wait DQI delay lock */
-	do {
-		data = mindwm(ast, 0x1E6E0080);
-	} while (!(data & 0x40000000));
-	/* Wait DQSI delay lock */
-	do {
-		data = mindwm(ast, 0x1E6E0020);
-	} while (!(data & 0x00000800));
 	/* Calibrate the DQSI delay */
-	cbr_dll2(ast, param);
+	if ((cbr_dll2(ast, param) == false) && (retry++ < 10))
+		goto ddr3_init_start;
 
-	moutdwm(ast, 0x1E6E0120, param->reg_FREQ);
+	ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ);
 	/* ECC Memory Initialization */
 #ifdef ECC
-	moutdwm(ast, 0x1E6E007C, 0x00000000);
-	moutdwm(ast, 0x1E6E0070, 0x221);
+	ast_moutdwm(ast, 0x1E6E007C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0070, 0x221);
 	do {
-		data = mindwm(ast, 0x1E6E0070);
+		data = ast_mindwm(ast, 0x1E6E0070);
 	} while (!(data & 0x00001000));
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
-	moutdwm(ast, 0x1E6E0050, 0x80000000);
-	moutdwm(ast, 0x1E6E0050, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0070, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x00000000);
 #endif
 
 
@@ -1358,10 +1230,10 @@
 {
 	u32 trap, trap_AC2, trap_MRS;
 
-	moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
+	ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
 
 	/* Ger trap info */
-	trap = (mindwm(ast, 0x1E6E2070) >> 25) & 0x3;
+	trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3;
 	trap_AC2  = (trap << 20) | (trap << 16);
 	trap_AC2 += 0x00110000;
 	trap_MRS  = 0x00000040 | (trap << 4);
@@ -1375,7 +1247,7 @@
 
 	switch (param->dram_freq) {
 	case 264:
-		moutdwm(ast, 0x1E6E2020, 0x0130);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0130);
 		param->wodt          = 0;
 		param->reg_AC1       = 0x11101513;
 		param->reg_AC2       = 0x78117011;
@@ -1390,7 +1262,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 336:
-		moutdwm(ast, 0x1E6E2020, 0x0190);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0190);
 		param->wodt          = 1;
 		param->reg_AC1       = 0x22202613;
 		param->reg_AC2       = 0xAA009016 | trap_AC2;
@@ -1403,10 +1275,25 @@
 		param->reg_FREQ      = 0x00004DC0;
 		param->madj_max      = 96;
 		param->dll2_finetune_step = 3;
+		switch (param->dram_chipid) {
+		default:
+		case AST_DRAM_512Mx16:
+			param->reg_AC2   = 0xAA009012 | trap_AC2;
+			break;
+		case AST_DRAM_1Gx16:
+			param->reg_AC2   = 0xAA009016 | trap_AC2;
+			break;
+		case AST_DRAM_2Gx16:
+			param->reg_AC2   = 0xAA009023 | trap_AC2;
+			break;
+		case AST_DRAM_4Gx16:
+			param->reg_AC2   = 0xAA00903B | trap_AC2;
+			break;
+		}
 		break;
 	default:
 	case 396:
-		moutdwm(ast, 0x1E6E2020, 0x03F1);
+		ast_moutdwm(ast, 0x1E6E2020, 0x03F1);
 		param->wodt          = 1;
 		param->rodt          = 0;
 		param->reg_AC1       = 0x33302714;
@@ -1417,7 +1304,7 @@
 		param->reg_DRV       = 0x000000FA;
 		param->reg_IOZ       = 0x00000034;
 		param->reg_DQIDLY    = 0x00000089;
-		param->reg_FREQ      = 0x000050C0;
+		param->reg_FREQ      = 0x00005040;
 		param->madj_max      = 96;
 		param->dll2_finetune_step = 4;
 
@@ -1440,7 +1327,7 @@
 		break;
 
 	case 408:
-		moutdwm(ast, 0x1E6E2020, 0x01F0);
+		ast_moutdwm(ast, 0x1E6E2020, 0x01F0);
 		param->wodt          = 1;
 		param->rodt          = 0;
 		param->reg_AC1       = 0x33302714;
@@ -1473,7 +1360,7 @@
 
 		break;
 	case 456:
-		moutdwm(ast, 0x1E6E2020, 0x0230);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0230);
 		param->wodt          = 0;
 		param->reg_AC1       = 0x33302815;
 		param->reg_AC2       = 0xCD44B01E;
@@ -1488,7 +1375,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 504:
-		moutdwm(ast, 0x1E6E2020, 0x0261);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0261);
 		param->wodt          = 1;
 		param->rodt          = 1;
 		param->reg_AC1       = 0x33302815;
@@ -1504,7 +1391,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 528:
-		moutdwm(ast, 0x1E6E2020, 0x0120);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0120);
 		param->wodt          = 1;
 		param->rodt          = 1;
 		param->reg_AC1       = 0x33302815;
@@ -1520,7 +1407,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 552:
-		moutdwm(ast, 0x1E6E2020, 0x02A1);
+		ast_moutdwm(ast, 0x1E6E2020, 0x02A1);
 		param->wodt          = 1;
 		param->rodt          = 1;
 		param->reg_AC1       = 0x43402915;
@@ -1536,7 +1423,7 @@
 		param->dll2_finetune_step = 3;
 		break;
 	case 576:
-		moutdwm(ast, 0x1E6E2020, 0x0140);
+		ast_moutdwm(ast, 0x1E6E2020, 0x0140);
 		param->wodt          = 1;
 		param->rodt          = 1;
 		param->reg_AC1       = 0x43402915;
@@ -1567,7 +1454,7 @@
 	case AST_DRAM_4Gx16:
 		param->dram_config = 0x123;
 		break;
-	}; /* switch size */
+	} /* switch size */
 
 	switch (param->vram_size) {
 	default:
@@ -1588,110 +1475,102 @@
 
 static void ddr2_init(struct ast_private *ast, struct ast2300_dram_param *param)
 {
-	u32 data, data2;
+	u32 data, data2, retry = 0;
 
-	moutdwm(ast, 0x1E6E0000, 0xFC600309);
-	moutdwm(ast, 0x1E6E0018, 0x00000100);
-	moutdwm(ast, 0x1E6E0024, 0x00000000);
-	moutdwm(ast, 0x1E6E0064, param->reg_MADJ);
-	moutdwm(ast, 0x1E6E0068, param->reg_SADJ);
+ddr2_init_start:
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+	ast_moutdwm(ast, 0x1E6E0018, 0x00000100);
+	ast_moutdwm(ast, 0x1E6E0024, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ);
+	ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ);
 	udelay(10);
-	moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000);
+	ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000);
 	udelay(10);
 
-	moutdwm(ast, 0x1E6E0004, param->dram_config);
-	moutdwm(ast, 0x1E6E0008, 0x90040f);
-	moutdwm(ast, 0x1E6E0010, param->reg_AC1);
-	moutdwm(ast, 0x1E6E0014, param->reg_AC2);
-	moutdwm(ast, 0x1E6E0020, param->reg_DQSIC);
-	moutdwm(ast, 0x1E6E0080, 0x00000000);
-	moutdwm(ast, 0x1E6E0084, 0x00000000);
-	moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY);
-	moutdwm(ast, 0x1E6E0018, 0x4040A130);
-	moutdwm(ast, 0x1E6E0018, 0x20402330);
-	moutdwm(ast, 0x1E6E0038, 0x00000000);
-	moutdwm(ast, 0x1E6E0040, 0xFF808000);
-	moutdwm(ast, 0x1E6E0044, 0x88848466);
-	moutdwm(ast, 0x1E6E0048, 0x44440008);
-	moutdwm(ast, 0x1E6E004C, 0x00000000);
-	moutdwm(ast, 0x1E6E0050, 0x80000000);
-	moutdwm(ast, 0x1E6E0050, 0x00000000);
-	moutdwm(ast, 0x1E6E0054, 0);
-	moutdwm(ast, 0x1E6E0060, param->reg_DRV);
-	moutdwm(ast, 0x1E6E006C, param->reg_IOZ);
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
-	moutdwm(ast, 0x1E6E0074, 0x00000000);
-	moutdwm(ast, 0x1E6E0078, 0x00000000);
-	moutdwm(ast, 0x1E6E007C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0004, param->dram_config);
+	ast_moutdwm(ast, 0x1E6E0008, 0x90040f);
+	ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1);
+	ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2);
+	ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC);
+	ast_moutdwm(ast, 0x1E6E0080, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0084, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY);
+	ast_moutdwm(ast, 0x1E6E0018, 0x4000A130);
+	ast_moutdwm(ast, 0x1E6E0018, 0x00002330);
+	ast_moutdwm(ast, 0x1E6E0038, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0040, 0xFF808000);
+	ast_moutdwm(ast, 0x1E6E0044, 0x88848466);
+	ast_moutdwm(ast, 0x1E6E0048, 0x44440008);
+	ast_moutdwm(ast, 0x1E6E004C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0054, 0);
+	ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV);
+	ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ);
+	ast_moutdwm(ast, 0x1E6E0070, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0074, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0078, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E007C, 0x00000000);
 
 	/* Wait MCLK2X lock to MCLK */
 	do {
-		data = mindwm(ast, 0x1E6E001C);
+		data = ast_mindwm(ast, 0x1E6E001C);
 	} while (!(data & 0x08000000));
-	moutdwm(ast, 0x1E6E0034, 0x00000001);
-	moutdwm(ast, 0x1E6E000C, 0x00005C04);
-	udelay(10);
-	moutdwm(ast, 0x1E6E000C, 0x00000000);
-	moutdwm(ast, 0x1E6E0034, 0x00000000);
-	data = mindwm(ast, 0x1E6E001C);
+	data = ast_mindwm(ast, 0x1E6E001C);
 	data = (data >> 8) & 0xff;
 	while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) {
-		data2 = (mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4;
+		data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4;
 		if ((data2 & 0xff) > param->madj_max) {
 			break;
 		}
-		moutdwm(ast, 0x1E6E0064, data2);
+		ast_moutdwm(ast, 0x1E6E0064, data2);
 		if (data2 & 0x00100000) {
 			data2 = ((data2 & 0xff) >> 3) + 3;
 		} else {
 			data2 = ((data2 & 0xff) >> 2) + 5;
 		}
-		data = mindwm(ast, 0x1E6E0068) & 0xffff00ff;
+		data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff;
 		data2 += data & 0xff;
 		data = data | (data2 << 8);
-		moutdwm(ast, 0x1E6E0068, data);
+		ast_moutdwm(ast, 0x1E6E0068, data);
 		udelay(10);
-		moutdwm(ast, 0x1E6E0064, mindwm(ast, 0x1E6E0064) | 0xC0000);
+		ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000);
 		udelay(10);
-		data = mindwm(ast, 0x1E6E0018) & 0xfffff1ff;
-		moutdwm(ast, 0x1E6E0018, data);
+		data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff;
+		ast_moutdwm(ast, 0x1E6E0018, data);
 		data = data | 0x200;
-		moutdwm(ast, 0x1E6E0018, data);
+		ast_moutdwm(ast, 0x1E6E0018, data);
 		do {
-			data = mindwm(ast, 0x1E6E001C);
+			data = ast_mindwm(ast, 0x1E6E001C);
 		} while (!(data & 0x08000000));
 
-		moutdwm(ast, 0x1E6E0034, 0x00000001);
-		moutdwm(ast, 0x1E6E000C, 0x00005C04);
-		udelay(10);
-		moutdwm(ast, 0x1E6E000C, 0x00000000);
-		moutdwm(ast, 0x1E6E0034, 0x00000000);
-		data = mindwm(ast, 0x1E6E001C);
+		data = ast_mindwm(ast, 0x1E6E001C);
 		data = (data >> 8) & 0xff;
 	}
-	data = mindwm(ast, 0x1E6E0018) | 0xC00;
-	moutdwm(ast, 0x1E6E0018, data);
+	ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0008) & 0xffff);
+	data = ast_mindwm(ast, 0x1E6E0018) | 0xC00;
+	ast_moutdwm(ast, 0x1E6E0018, data);
 
-	moutdwm(ast, 0x1E6E0034, 0x00000001);
-	moutdwm(ast, 0x1E6E000C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0034, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
 	udelay(50);
 	/* Mode Register Setting */
-	moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100);
-	moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
-	moutdwm(ast, 0x1E6E0028, 0x00000005);
-	moutdwm(ast, 0x1E6E0028, 0x00000007);
-	moutdwm(ast, 0x1E6E0028, 0x00000003);
-	moutdwm(ast, 0x1E6E0028, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100);
+	ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000005);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000007);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000003);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000001);
 
-	moutdwm(ast, 0x1E6E000C, 0x00005C08);
-	moutdwm(ast, 0x1E6E002C, param->reg_MRS);
-	moutdwm(ast, 0x1E6E0028, 0x00000001);
-	moutdwm(ast, 0x1E6E0030, param->reg_EMRS | 0x380);
-	moutdwm(ast, 0x1E6E0028, 0x00000003);
-	moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
-	moutdwm(ast, 0x1E6E0028, 0x00000003);
+	ast_moutdwm(ast, 0x1E6E000C, 0x00005C08);
+	ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000001);
+	ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS | 0x380);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000003);
+	ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS);
+	ast_moutdwm(ast, 0x1E6E0028, 0x00000003);
 
-	moutdwm(ast, 0x1E6E000C, 0x7FFF5C01);
+	ast_moutdwm(ast, 0x1E6E000C, 0x7FFF5C01);
 	data = 0;
 	if (param->wodt) {
 		data = 0x500;
@@ -1699,30 +1578,23 @@
 	if (param->rodt) {
 		data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3);
 	}
-	moutdwm(ast, 0x1E6E0034, data | 0x3);
-	moutdwm(ast, 0x1E6E0120, param->reg_FREQ);
+	ast_moutdwm(ast, 0x1E6E0034, data | 0x3);
+	ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ);
 
-	/* Wait DQI delay lock */
-	do {
-		data = mindwm(ast, 0x1E6E0080);
-	} while (!(data & 0x40000000));
-	/* Wait DQSI delay lock */
-	do {
-		data = mindwm(ast, 0x1E6E0020);
-	} while (!(data & 0x00000800));
 	/* Calibrate the DQSI delay */
-	cbr_dll2(ast, param);
+	if ((cbr_dll2(ast, param) == false) && (retry++ < 10))
+		goto ddr2_init_start;
 
 	/* ECC Memory Initialization */
 #ifdef ECC
-	moutdwm(ast, 0x1E6E007C, 0x00000000);
-	moutdwm(ast, 0x1E6E0070, 0x221);
+	ast_moutdwm(ast, 0x1E6E007C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0070, 0x221);
 	do {
-		data = mindwm(ast, 0x1E6E0070);
+		data = ast_mindwm(ast, 0x1E6E0070);
 	} while (!(data & 0x00001000));
-	moutdwm(ast, 0x1E6E0070, 0x00000000);
-	moutdwm(ast, 0x1E6E0050, 0x80000000);
-	moutdwm(ast, 0x1E6E0050, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0070, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0050, 0x00000000);
 #endif
 
 }
@@ -1768,8 +1640,8 @@
 			ddr2_init(ast, &param);
 		}
 
-		temp = mindwm(ast, 0x1e6e2040);
-		moutdwm(ast, 0x1e6e2040, temp | 0x40);
+		temp = ast_mindwm(ast, 0x1e6e2040);
+		ast_moutdwm(ast, 0x1e6e2040, temp | 0x40);
 	}
 
 	/* wait ready */

diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index 95fa6ab..4c761dc 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h

@@ -42,7 +42,7 @@
 #define HBorder                 0x00000020
 #define VBorder                 0x00000010
 #define WideScreenMode		0x00000100
-
+#define NewModeInfo		0x00000200
 
 /* DCLK Index */
 #define VCLK25_175     		0x00
@@ -67,6 +67,11 @@
 #define VCLK106_5   		0x12
 #define VCLK146_25  		0x13
 #define VCLK148_5   		0x14
+#define VCLK71      		0x15
+#define VCLK88_75   		0x16
+#define VCLK119     		0x17
+#define VCLK85_5     		0x18
+#define VCLK97_75     		0x19
 
 static struct ast_vbios_dclk_info dclk_table[] = {
 	{0x2C, 0xE7, 0x03},					/* 00: VCLK25_175	*/
@@ -90,6 +95,10 @@
 	{0x28, 0x49, 0x80},					/* 12: VCLK106.5        */
 	{0x37, 0x49, 0x80},					/* 13: VCLK146.25       */
 	{0x1f, 0x45, 0x80},					/* 14: VCLK148.5        */
+	{0x47, 0x6c, 0x80},					/* 15: VCLK71       */
+	{0x25, 0x65, 0x80},					/* 16: VCLK88.75    */
+	{0x77, 0x58, 0x80},					/* 17: VCLK119      */
+	{0x32, 0x67, 0x80},				    /* 18: VCLK85_5     */
 };
 
 static struct ast_vbios_stdtable vbios_stdtable[] = {
@@ -225,41 +234,63 @@
 	 (SyncPP | Charx8Dot), 0xFF, 1, 0x33 },
 };
 
-static struct ast_vbios_enhtable res_1920x1200[] = {
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz */
-	 (SyncNP | Charx8Dot), 60, 1, 0x34 },
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz */
-	 (SyncNP | Charx8Dot), 0xFF, 1, 0x34 },
+/* 16:9 */
+static struct ast_vbios_enhtable res_1360x768[] = {
+	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* 60Hz */
+	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x39 },
+	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* end */
+	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x39 },
 };
 
+static struct ast_vbios_enhtable res_1600x900[] = {
+	{1760, 1600, 48, 32, 926,  900, 3, 5, VCLK97_75,	/* 60Hz CVT RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x3A },
+	{1760, 1600, 48, 32, 926,  900, 3, 5, VCLK97_75,	/* end */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x3A }
+};
+
+static struct ast_vbios_enhtable res_1920x1080[] = {
+	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x38 },
+	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x38 },
+};
+
+
 /* 16:10 */
 static struct ast_vbios_enhtable res_1280x800[] = {
+	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,	/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 60, 1, 0x35 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 0xFF, 1, 0x35 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x35 },
 
 };
 
 static struct ast_vbios_enhtable res_1440x900[] = {
+	{1600, 1440, 48, 32,  926,  900, 3, 6, VCLK88_75,	/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 60, 1, 0x36 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 0xFF, 1, 0x36 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x36 },
 };
 
 static struct ast_vbios_enhtable res_1680x1050[] = {
+	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,	/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 60, 1, 0x37 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
-	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode), 0xFF, 1, 0x37 },
+	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x37 },
 };
 
-/* HDTV */
-static struct ast_vbios_enhtable res_1920x1080[] = {
-	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode), 60, 1, 0x38 },
-	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode), 0xFF, 1, 0x38 },
+static struct ast_vbios_enhtable res_1920x1200[] = {
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x34 },
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x34 },
 };
+
 #endif

diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index f488be5..b9a695d 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c

@@ -434,17 +434,13 @@
 
 	tbo = &((*bo)->bo);
 	ttm_bo_unref(&tbo);
-	if (tbo == NULL)
-		*bo = NULL;
-
+	*bo = NULL;
 }
 
 void bochs_gem_free_object(struct drm_gem_object *obj)
 {
 	struct bochs_bo *bochs_bo = gem_to_bochs_bo(obj);
 
-	if (!bochs_bo)
-		return;
 	bochs_bo_unref(&bochs_bo);
 }
 

diff --git a/drivers/gpu/drm/bridge/ptn3460.c b/drivers/gpu/drm/bridge/ptn3460.c
index b171901..98fd17a 100644
--- a/drivers/gpu/drm/bridge/ptn3460.c
+++ b/drivers/gpu/drm/bridge/ptn3460.c

@@ -225,12 +225,6 @@
 	return num_modes;
 }
 
-static int ptn3460_mode_valid(struct drm_connector *connector,
-		struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector)
 {
 	struct ptn3460_bridge *ptn_bridge;
@@ -242,7 +236,6 @@
 
 struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = {
 	.get_modes = ptn3460_get_modes,
-	.mode_valid = ptn3460_mode_valid,
 	.best_encoder = ptn3460_best_encoder,
 };
 

diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 4b0170c..99c1983 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c

@@ -264,17 +264,13 @@
 
 	tbo = &((*bo)->bo);
 	ttm_bo_unref(&tbo);
-	if (tbo == NULL)
-		*bo = NULL;
-
+	*bo = NULL;
 }
 
 void cirrus_gem_free_object(struct drm_gem_object *obj)
 {
 	struct cirrus_bo *cirrus_bo = gem_to_cirrus_bo(obj);
 
-	if (!cirrus_bo)
-		return;
 	cirrus_bo_unref(&cirrus_bo);
 }
 

diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index f59433b..49332c5 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c

@@ -505,13 +505,6 @@
 	return count;
 }
 
-static int cirrus_vga_mode_valid(struct drm_connector *connector,
-				 struct drm_display_mode *mode)
-{
-	/* Any mode we've added is valid */
-	return MODE_OK;
-}
-
 static struct drm_encoder *cirrus_connector_best_encoder(struct drm_connector
 						  *connector)
 {
@@ -546,7 +539,6 @@
 
 struct drm_connector_helper_funcs cirrus_vga_connector_helper_funcs = {
 	.get_modes = cirrus_vga_get_modes,
-	.mode_valid = cirrus_vga_mode_valid,
 	.best_encoder = cirrus_connector_best_encoder,
 };
 

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index edec31f..68175b5 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c

@@ -363,7 +363,7 @@
 		list->master = dev->primary->master;
 	*maplist = list;
 	return 0;
-	}
+}
 
 int drm_addmap(struct drm_device * dev, resource_size_t offset,
 	       unsigned int size, enum drm_map_type type,
@@ -656,13 +656,13 @@
 		DRM_DEBUG("zone invalid\n");
 		return -EINVAL;
 	}
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -805,13 +805,13 @@
 	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
 	total = PAGE_SIZE << page_order;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -1015,13 +1015,13 @@
 	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -1175,7 +1175,7 @@
  * \param arg pointer to a drm_buf_info structure.
  * \return zero on success or a negative number on failure.
  *
- * Increments drm_device::buf_use while holding the drm_device::count_lock
+ * Increments drm_device::buf_use while holding the drm_device::buf_lock
  * lock, preventing of allocating more buffers after this call. Information
  * about each requested buffer is then copied into user space.
  */
@@ -1196,13 +1196,13 @@
 	if (!dma)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	++dev->buf_use;		/* Can't allocate more after this call */
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) {
 		if (dma->bufs[i].buf_count)
@@ -1381,13 +1381,13 @@
 	if (!dma)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	dev->buf_use++;		/* Can't allocate more after this call */
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	if (request->count >= dma->buf_count) {
 		if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP))

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 534cb89..a6b6906 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c

@@ -131,14 +131,14 @@
 EXPORT_SYMBOL(drm_clflush_sg);
 
 void
-drm_clflush_virt_range(char *addr, unsigned long length)
+drm_clflush_virt_range(void *addr, unsigned long length)
 {
 #if defined(CONFIG_X86)
 	if (cpu_has_clflush) {
-		char *end = addr + length;
+		void *end = addr + length;
 		mb();
 		for (; addr < end; addr += boot_cpu_data.x86_clflush_size)
-			clflush(addr);
+			clflushopt(addr);
 		clflushopt(end - 1);
 		mb();
 		return;

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index d8b7099..fe94cc1 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c

@@ -37,6 +37,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_lock.h>
 
 #include "drm_crtc_internal.h"
 
@@ -50,12 +51,42 @@
  */
 void drm_modeset_lock_all(struct drm_device *dev)
 {
-	struct drm_crtc *crtc;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
 
-	mutex_lock(&dev->mode_config.mutex);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex);
+	mutex_lock(&config->mutex);
+
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail;
+	ret = drm_modeset_lock_all_crtcs(dev, ctx);
+	if (ret)
+		goto fail;
+
+	WARN_ON(config->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_all():
+	 */
+	config->acquire_ctx = ctx;
+
+	drm_warn_on_modeset_not_all_locked(dev);
+
+	return;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
 }
 EXPORT_SYMBOL(drm_modeset_lock_all);
 
@@ -67,10 +98,17 @@
  */
 void drm_modeset_unlock_all(struct drm_device *dev)
 {
-	struct drm_crtc *crtc;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		mutex_unlock(&crtc->mutex);
+	if (WARN_ON(!ctx))
+		return;
+
+	config->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
+	kfree(ctx);
 
 	mutex_unlock(&dev->mode_config.mutex);
 }
@@ -91,8 +129,9 @@
 		return;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		WARN_ON(!mutex_is_locked(&crtc->mutex));
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 }
 EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
@@ -227,6 +266,7 @@
 	{ DRM_MODE_ENCODER_TVDAC, "TV" },
 	{ DRM_MODE_ENCODER_VIRTUAL, "Virtual" },
 	{ DRM_MODE_ENCODER_DSI, "DSI" },
+	{ DRM_MODE_ENCODER_DPMST, "DP MST" },
 };
 
 static const struct drm_prop_enum_list drm_subpixel_enum_list[] =
@@ -256,46 +296,6 @@
 }
 
 /**
- * drm_get_encoder_name - return a string for encoder
- * @encoder: encoder to compute name of
- *
- * Note that the buffer used by this function is globally shared and owned by
- * the function itself.
- *
- * FIXME: This isn't really multithreading safe.
- */
-const char *drm_get_encoder_name(const struct drm_encoder *encoder)
-{
-	static char buf[32];
-
-	snprintf(buf, 32, "%s-%d",
-		 drm_encoder_enum_list[encoder->encoder_type].name,
-		 encoder->base.id);
-	return buf;
-}
-EXPORT_SYMBOL(drm_get_encoder_name);
-
-/**
- * drm_get_connector_name - return a string for connector
- * @connector: connector to compute name of
- *
- * Note that the buffer used by this function is globally shared and owned by
- * the function itself.
- *
- * FIXME: This isn't really multithreading safe.
- */
-const char *drm_get_connector_name(const struct drm_connector *connector)
-{
-	static char buf[32];
-
-	snprintf(buf, 32, "%s-%d",
-		 drm_connector_enum_list[connector->connector_type].name,
-		 connector->connector_type_id);
-	return buf;
-}
-EXPORT_SYMBOL(drm_get_connector_name);
-
-/**
  * drm_get_connector_status_name - return a string for connector status
  * @status: connector status to compute name of
  *
@@ -409,6 +409,21 @@
 	mutex_unlock(&dev->mode_config.idr_mutex);
 }
 
+static struct drm_mode_object *_object_find(struct drm_device *dev,
+		uint32_t id, uint32_t type)
+{
+	struct drm_mode_object *obj = NULL;
+
+	mutex_lock(&dev->mode_config.idr_mutex);
+	obj = idr_find(&dev->mode_config.crtc_idr, id);
+	if (!obj || (type != DRM_MODE_OBJECT_ANY && obj->type != type) ||
+	    (obj->id != id))
+		obj = NULL;
+	mutex_unlock(&dev->mode_config.idr_mutex);
+
+	return obj;
+}
+
 /**
  * drm_mode_object_find - look up a drm object with static lifetime
  * @dev: drm device
@@ -416,7 +431,9 @@
  * @type: type of the mode object
  *
  * Note that framebuffers cannot be looked up with this functions - since those
- * are reference counted, they need special treatment.
+ * are reference counted, they need special treatment.  Even with
+ * DRM_MODE_OBJECT_ANY (although that will simply return NULL
+ * rather than WARN_ON()).
  */
 struct drm_mode_object *drm_mode_object_find(struct drm_device *dev,
 		uint32_t id, uint32_t type)
@@ -426,13 +443,10 @@
 	/* Framebuffers are reference counted and need their own lookup
 	 * function.*/
 	WARN_ON(type == DRM_MODE_OBJECT_FB);
-
-	mutex_lock(&dev->mode_config.idr_mutex);
-	obj = idr_find(&dev->mode_config.crtc_idr, id);
-	if (!obj || (obj->type != type) || (obj->id != id))
+	obj = _object_find(dev, id, type);
+	/* don't leak out unref'd fb's */
+	if (obj && (obj->type == DRM_MODE_OBJECT_FB))
 		obj = NULL;
-	mutex_unlock(&dev->mode_config.idr_mutex);
-
 	return obj;
 }
 EXPORT_SYMBOL(drm_mode_object_find);
@@ -538,7 +552,7 @@
  */
 void drm_framebuffer_unreference(struct drm_framebuffer *fb)
 {
-	DRM_DEBUG("FB ID: %d\n", fb->base.id);
+	DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
 	kref_put(&fb->refcount, drm_framebuffer_free);
 }
 EXPORT_SYMBOL(drm_framebuffer_unreference);
@@ -551,7 +565,7 @@
  */
 void drm_framebuffer_reference(struct drm_framebuffer *fb)
 {
-	DRM_DEBUG("FB ID: %d\n", fb->base.id);
+	DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
 	kref_get(&fb->refcount);
 }
 EXPORT_SYMBOL(drm_framebuffer_reference);
@@ -563,7 +577,7 @@
 
 static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)
 {
-	DRM_DEBUG("FB ID: %d\n", fb->base.id);
+	DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
 	kref_put(&fb->refcount, drm_framebuffer_free_bug);
 }
 
@@ -691,6 +705,8 @@
 }
 EXPORT_SYMBOL(drm_framebuffer_remove);
 
+DEFINE_WW_CLASS(crtc_ww_class);
+
 /**
  * drm_crtc_init_with_planes - Initialise a new CRTC object with
  *    specified primary and cursor planes.
@@ -710,6 +726,7 @@
 			      void *cursor,
 			      const struct drm_crtc_funcs *funcs)
 {
+	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
 
 	crtc->dev = dev;
@@ -717,8 +734,9 @@
 	crtc->invert_dimensions = false;
 
 	drm_modeset_lock_all(dev);
-	mutex_init(&crtc->mutex);
-	mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex);
+	drm_modeset_lock_init(&crtc->mutex);
+	/* dropped by _unlock_all(): */
+	drm_modeset_lock(&crtc->mutex, config->acquire_ctx);
 
 	ret = drm_mode_object_get(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
 	if (ret)
@@ -726,8 +744,8 @@
 
 	crtc->base.properties = &crtc->properties;
 
-	list_add_tail(&crtc->head, &dev->mode_config.crtc_list);
-	dev->mode_config.num_crtc++;
+	list_add_tail(&crtc->head, &config->crtc_list);
+	config->num_crtc++;
 
 	crtc->primary = primary;
 	if (primary)
@@ -755,6 +773,8 @@
 	kfree(crtc->gamma_store);
 	crtc->gamma_store = NULL;
 
+	drm_modeset_lock_fini(&crtc->mutex);
+
 	drm_mode_object_put(dev, &crtc->base);
 	list_del(&crtc->head);
 	dev->mode_config.num_crtc--;
@@ -824,7 +844,7 @@
 
 	ret = drm_mode_object_get(dev, &connector->base, DRM_MODE_OBJECT_CONNECTOR);
 	if (ret)
-		goto out;
+		goto out_unlock;
 
 	connector->base.properties = &connector->properties;
 	connector->dev = dev;
@@ -834,9 +854,17 @@
 		ida_simple_get(connector_ida, 1, 0, GFP_KERNEL);
 	if (connector->connector_type_id < 0) {
 		ret = connector->connector_type_id;
-		drm_mode_object_put(dev, &connector->base);
-		goto out;
+		goto out_put;
 	}
+	connector->name =
+		kasprintf(GFP_KERNEL, "%s-%d",
+			  drm_connector_enum_list[connector_type].name,
+			  connector->connector_type_id);
+	if (!connector->name) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
 	INIT_LIST_HEAD(&connector->probed_modes);
 	INIT_LIST_HEAD(&connector->modes);
 	connector->edid_blob_ptr = NULL;
@@ -853,7 +881,11 @@
 	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dpms_property, 0);
 
- out:
+out_put:
+	if (ret)
+		drm_mode_object_put(dev, &connector->base);
+
+out_unlock:
 	drm_modeset_unlock_all(dev);
 
 	return ret;
@@ -881,6 +913,8 @@
 		   connector->connector_type_id);
 
 	drm_mode_object_put(dev, &connector->base);
+	kfree(connector->name);
+	connector->name = NULL;
 	list_del(&connector->head);
 	dev->mode_config.num_connector--;
 }
@@ -982,16 +1016,27 @@
 
 	ret = drm_mode_object_get(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER);
 	if (ret)
-		goto out;
+		goto out_unlock;
 
 	encoder->dev = dev;
 	encoder->encoder_type = encoder_type;
 	encoder->funcs = funcs;
+	encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
+				  drm_encoder_enum_list[encoder_type].name,
+				  encoder->base.id);
+	if (!encoder->name) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
 
 	list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
 	dev->mode_config.num_encoder++;
 
- out:
+out_put:
+	if (ret)
+		drm_mode_object_put(dev, &encoder->base);
+
+out_unlock:
 	drm_modeset_unlock_all(dev);
 
 	return ret;
@@ -1009,6 +1054,8 @@
 	struct drm_device *dev = encoder->dev;
 	drm_modeset_lock_all(dev);
 	drm_mode_object_put(dev, &encoder->base);
+	kfree(encoder->name);
+	encoder->name = NULL;
 	list_del(&encoder->head);
 	dev->mode_config.num_encoder--;
 	drm_modeset_unlock_all(dev);
@@ -1145,16 +1192,19 @@
  */
 void drm_plane_force_disable(struct drm_plane *plane)
 {
+	struct drm_framebuffer *old_fb = plane->fb;
 	int ret;
 
-	if (!plane->fb)
+	if (!old_fb)
 		return;
 
 	ret = plane->funcs->disable_plane(plane);
-	if (ret)
+	if (ret) {
 		DRM_ERROR("failed to disable plane with busy fb\n");
+		return;
+	}
 	/* disconnect the plane from the fb and crtc: */
-	__drm_framebuffer_unreference(plane->fb);
+	__drm_framebuffer_unreference(old_fb);
 	plane->fb = NULL;
 	plane->crtc = NULL;
 }
@@ -1378,6 +1428,12 @@
 	return 0;
 }
 
+void drm_mode_group_destroy(struct drm_mode_group *group)
+{
+	kfree(group->id_list);
+	group->id_list = NULL;
+}
+
 /*
  * NOTE: Driver's shouldn't ever call drm_mode_group_init_legacy_group - it is
  * the drm core's responsibility to set up mode control groups.
@@ -1614,7 +1670,7 @@
 					    &dev->mode_config.encoder_list,
 					    head) {
 				DRM_DEBUG_KMS("[ENCODER:%d:%s]\n", encoder->base.id,
-						drm_get_encoder_name(encoder));
+						encoder->name);
 				if (put_user(encoder->base.id, encoder_id +
 					     copied)) {
 					ret = -EFAULT;
@@ -1646,7 +1702,7 @@
 					    head) {
 				DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 					connector->base.id,
-					drm_get_connector_name(connector));
+					connector->name);
 				if (put_user(connector->base.id,
 					     connector_id + copied)) {
 					ret = -EFAULT;
@@ -1695,7 +1751,6 @@
 {
 	struct drm_mode_crtc *crtc_resp = data;
 	struct drm_crtc *crtc;
-	struct drm_mode_object *obj;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
@@ -1703,13 +1758,11 @@
 
 	drm_modeset_lock_all(dev);
 
-	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_resp->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	crtc_resp->x = crtc->x;
 	crtc_resp->y = crtc->y;
@@ -1763,7 +1816,6 @@
 			  struct drm_file *file_priv)
 {
 	struct drm_mode_get_connector *out_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_connector *connector;
 	struct drm_display_mode *mode;
 	int mode_count = 0;
@@ -1787,13 +1839,11 @@
 
 	mutex_lock(&dev->mode_config.mutex);
 
-	obj = drm_mode_object_find(dev, out_resp->connector_id,
-				   DRM_MODE_OBJECT_CONNECTOR);
-	if (!obj) {
+	connector = drm_connector_find(dev, out_resp->connector_id);
+	if (!connector) {
 		ret = -ENOENT;
 		goto out;
 	}
-	connector = obj_to_connector(obj);
 
 	props_count = connector->properties.count;
 
@@ -1821,10 +1871,12 @@
 	out_resp->mm_height = connector->display_info.height_mm;
 	out_resp->subpixel = connector->display_info.subpixel_order;
 	out_resp->connection = connector->status;
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	if (connector->encoder)
 		out_resp->encoder_id = connector->encoder->base.id;
 	else
 		out_resp->encoder_id = 0;
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 
 	/*
 	 * This ioctl is called twice, once to determine how much space is
@@ -1908,7 +1960,6 @@
 			struct drm_file *file_priv)
 {
 	struct drm_mode_get_encoder *enc_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_encoder *encoder;
 	int ret = 0;
 
@@ -1916,13 +1967,11 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
-				   DRM_MODE_OBJECT_ENCODER);
-	if (!obj) {
+	encoder = drm_encoder_find(dev, enc_resp->encoder_id);
+	if (!encoder) {
 		ret = -ENOENT;
 		goto out;
 	}
-	encoder = obj_to_encoder(obj);
 
 	if (encoder->crtc)
 		enc_resp->crtc_id = encoder->crtc->base.id;
@@ -2020,7 +2069,6 @@
 		      struct drm_file *file_priv)
 {
 	struct drm_mode_get_plane *plane_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_plane *plane;
 	uint32_t __user *format_ptr;
 	int ret = 0;
@@ -2029,13 +2077,11 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, plane_resp->plane_id,
-				   DRM_MODE_OBJECT_PLANE);
-	if (!obj) {
+	plane = drm_plane_find(dev, plane_resp->plane_id);
+	if (!plane) {
 		ret = -ENOENT;
 		goto out;
 	}
-	plane = obj_to_plane(obj);
 
 	if (plane->crtc)
 		plane_resp->crtc_id = plane->crtc->base.id;
@@ -2088,7 +2134,6 @@
 		      struct drm_file *file_priv)
 {
 	struct drm_mode_set_plane *plane_req = data;
-	struct drm_mode_object *obj;
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL, *old_fb = NULL;
@@ -2103,35 +2148,42 @@
 	 * First, find the plane, crtc, and fb objects.  If not available,
 	 * we don't bother to call the driver.
 	 */
-	obj = drm_mode_object_find(dev, plane_req->plane_id,
-				   DRM_MODE_OBJECT_PLANE);
-	if (!obj) {
+	plane = drm_plane_find(dev, plane_req->plane_id);
+	if (!plane) {
 		DRM_DEBUG_KMS("Unknown plane ID %d\n",
 			      plane_req->plane_id);
 		return -ENOENT;
 	}
-	plane = obj_to_plane(obj);
 
 	/* No fb means shut it down */
 	if (!plane_req->fb_id) {
 		drm_modeset_lock_all(dev);
 		old_fb = plane->fb;
-		plane->funcs->disable_plane(plane);
-		plane->crtc = NULL;
-		plane->fb = NULL;
+		ret = plane->funcs->disable_plane(plane);
+		if (!ret) {
+			plane->crtc = NULL;
+			plane->fb = NULL;
+		} else {
+			old_fb = NULL;
+		}
 		drm_modeset_unlock_all(dev);
 		goto out;
 	}
 
-	obj = drm_mode_object_find(dev, plane_req->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, plane_req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown crtc ID %d\n",
 			      plane_req->crtc_id);
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
+
+	/* Check whether this plane is usable on this CRTC */
+	if (!(plane->possible_crtcs & drm_crtc_mask(crtc))) {
+		DRM_DEBUG_KMS("Invalid crtc for plane\n");
+		ret = -EINVAL;
+		goto out;
+	}
 
 	fb = drm_framebuffer_lookup(dev, plane_req->fb_id);
 	if (!fb) {
@@ -2187,16 +2239,18 @@
 	}
 
 	drm_modeset_lock_all(dev);
+	old_fb = plane->fb;
 	ret = plane->funcs->update_plane(plane, crtc, fb,
 					 plane_req->crtc_x, plane_req->crtc_y,
 					 plane_req->crtc_w, plane_req->crtc_h,
 					 plane_req->src_x, plane_req->src_y,
 					 plane_req->src_w, plane_req->src_h);
 	if (!ret) {
-		old_fb = plane->fb;
 		plane->crtc = crtc;
 		plane->fb = fb;
 		fb = NULL;
+	} else {
+		old_fb = NULL;
 	}
 	drm_modeset_unlock_all(dev);
 
@@ -2239,9 +2293,7 @@
 	ret = crtc->funcs->set_config(set);
 	if (ret == 0) {
 		crtc->primary->crtc = crtc;
-
-		/* crtc->fb must be updated by ->set_config, enforces this. */
-		WARN_ON(fb != crtc->primary->fb);
+		crtc->primary->fb = fb;
 	}
 
 	list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) {
@@ -2318,7 +2370,6 @@
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_mode_crtc *crtc_req = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	struct drm_connector **connector_set = NULL, *connector;
 	struct drm_framebuffer *fb = NULL;
@@ -2336,14 +2387,12 @@
 		return -ERANGE;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown CRTC ID %d\n", crtc_req->crtc_id);
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
 
 	if (crtc_req->mode_valid) {
@@ -2426,18 +2475,16 @@
 				goto out;
 			}
 
-			obj = drm_mode_object_find(dev, out_id,
-						   DRM_MODE_OBJECT_CONNECTOR);
-			if (!obj) {
+			connector = drm_connector_find(dev, out_id);
+			if (!connector) {
 				DRM_DEBUG_KMS("Connector id %d unknown\n",
 						out_id);
 				ret = -ENOENT;
 				goto out;
 			}
-			connector = obj_to_connector(obj);
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 					connector->base.id,
-					drm_get_connector_name(connector));
+					connector->name);
 
 			connector_set[i] = connector;
 		}
@@ -2466,7 +2513,6 @@
 				  struct drm_mode_cursor2 *req,
 				  struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	int ret = 0;
 
@@ -2476,14 +2522,13 @@
 	if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
 		return -EINVAL;
 
-	obj = drm_mode_object_find(dev, req->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown CRTC ID %d\n", req->crtc_id);
 		return -ENOENT;
 	}
-	crtc = obj_to_crtc(obj);
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	if (req->flags & DRM_MODE_CURSOR_BO) {
 		if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) {
 			ret = -ENXIO;
@@ -2507,7 +2552,7 @@
 		}
 	}
 out:
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	return ret;
 
@@ -3097,6 +3142,8 @@
 	if (!property)
 		return NULL;
 
+	property->dev = dev;
+
 	if (num_values) {
 		property->values = kzalloc(sizeof(uint64_t)*num_values, GFP_KERNEL);
 		if (!property->values)
@@ -3117,6 +3164,9 @@
 	}
 
 	list_add_tail(&property->head, &dev->mode_config.property_list);
+
+	WARN_ON(!drm_property_type_valid(property));
+
 	return property;
 fail:
 	kfree(property->values);
@@ -3217,6 +3267,22 @@
 }
 EXPORT_SYMBOL(drm_property_create_bitmask);
 
+static struct drm_property *property_create_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 uint64_t min, uint64_t max)
+{
+	struct drm_property *property;
+
+	property = drm_property_create(dev, flags, name, 2);
+	if (!property)
+		return NULL;
+
+	property->values[0] = min;
+	property->values[1] = max;
+
+	return property;
+}
+
 /**
  * drm_property_create - create a new ranged property type
  * @dev: drm device
@@ -3239,20 +3305,36 @@
 					 const char *name,
 					 uint64_t min, uint64_t max)
 {
+	return property_create_range(dev, DRM_MODE_PROP_RANGE | flags,
+			name, min, max);
+}
+EXPORT_SYMBOL(drm_property_create_range);
+
+struct drm_property *drm_property_create_signed_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 int64_t min, int64_t max)
+{
+	return property_create_range(dev, DRM_MODE_PROP_SIGNED_RANGE | flags,
+			name, I642U64(min), I642U64(max));
+}
+EXPORT_SYMBOL(drm_property_create_signed_range);
+
+struct drm_property *drm_property_create_object(struct drm_device *dev,
+					 int flags, const char *name, uint32_t type)
+{
 	struct drm_property *property;
 
-	flags |= DRM_MODE_PROP_RANGE;
+	flags |= DRM_MODE_PROP_OBJECT;
 
-	property = drm_property_create(dev, flags, name, 2);
+	property = drm_property_create(dev, flags, name, 1);
 	if (!property)
 		return NULL;
 
-	property->values[0] = min;
-	property->values[1] = max;
+	property->values[0] = type;
 
 	return property;
 }
-EXPORT_SYMBOL(drm_property_create_range);
+EXPORT_SYMBOL(drm_property_create_object);
 
 /**
  * drm_property_add_enum - add a possible value to an enumeration property
@@ -3274,14 +3356,16 @@
 {
 	struct drm_property_enum *prop_enum;
 
-	if (!(property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)))
+	if (!(drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)))
 		return -EINVAL;
 
 	/*
 	 * Bitmask enum properties have the additional constraint of values
 	 * from 0 to 63
 	 */
-	if ((property->flags & DRM_MODE_PROP_BITMASK) && (value > 63))
+	if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK) &&
+			(value > 63))
 		return -EINVAL;
 
 	if (!list_empty(&property->enum_blob_list)) {
@@ -3438,7 +3522,6 @@
 int drm_mode_getproperty_ioctl(struct drm_device *dev,
 			       void *data, struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_mode_get_property *out_resp = data;
 	struct drm_property *property;
 	int enum_count = 0;
@@ -3457,17 +3540,17 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
-	if (!obj) {
+	property = drm_property_find(dev, out_resp->prop_id);
+	if (!property) {
 		ret = -ENOENT;
 		goto done;
 	}
-	property = obj_to_property(obj);
 
-	if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		list_for_each_entry(prop_enum, &property->enum_blob_list, head)
 			enum_count++;
-	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		list_for_each_entry(prop_blob, &property->enum_blob_list, head)
 			blob_count++;
 	}
@@ -3489,7 +3572,8 @@
 	}
 	out_resp->count_values = value_count;
 
-	if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		if ((out_resp->count_enum_blobs >= enum_count) && enum_count) {
 			copied = 0;
 			enum_ptr = (struct drm_mode_property_enum __user *)(unsigned long)out_resp->enum_blob_ptr;
@@ -3511,7 +3595,7 @@
 		out_resp->count_enum_blobs = enum_count;
 	}
 
-	if (property->flags & DRM_MODE_PROP_BLOB) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		if ((out_resp->count_enum_blobs >= blob_count) && blob_count) {
 			copied = 0;
 			blob_id_ptr = (uint32_t __user *)(unsigned long)out_resp->enum_blob_ptr;
@@ -3590,7 +3674,6 @@
 int drm_mode_getblob_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_mode_get_blob *out_resp = data;
 	struct drm_property_blob *blob;
 	int ret = 0;
@@ -3600,12 +3683,11 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
-	if (!obj) {
+	blob = drm_property_blob_find(dev, out_resp->blob_id);
+	if (!blob) {
 		ret = -ENOENT;
 		goto done;
 	}
-	blob = obj_to_blob(obj);
 
 	if (out_resp->length == blob->length) {
 		blob_ptr = (void __user *)(unsigned long)out_resp->data;
@@ -3667,19 +3749,40 @@
 {
 	if (property->flags & DRM_MODE_PROP_IMMUTABLE)
 		return false;
-	if (property->flags & DRM_MODE_PROP_RANGE) {
+
+	if (drm_property_type_is(property, DRM_MODE_PROP_RANGE)) {
 		if (value < property->values[0] || value > property->values[1])
 			return false;
 		return true;
-	} else if (property->flags & DRM_MODE_PROP_BITMASK) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_SIGNED_RANGE)) {
+		int64_t svalue = U642I64(value);
+		if (svalue < U642I64(property->values[0]) ||
+				svalue > U642I64(property->values[1]))
+			return false;
+		return true;
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		int i;
 		uint64_t valid_mask = 0;
 		for (i = 0; i < property->num_values; i++)
 			valid_mask |= (1ULL << property->values[i]);
 		return !(value & ~valid_mask);
-	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		/* Only the driver knows */
 		return true;
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_OBJECT)) {
+		struct drm_mode_object *obj;
+		/* a zero value for an object property translates to null: */
+		if (value == 0)
+			return true;
+		/*
+		 * NOTE: use _object_find() directly to bypass restriction on
+		 * looking up refcnt'd objects (ie. fb's).  For a refcnt'd
+		 * object this could race against object finalization, so it
+		 * simply tells us that the object *was* valid.  Which is good
+		 * enough.
+		 */
+		obj = _object_find(property->dev, value, property->values[0]);
+		return obj != NULL;
 	} else {
 		int i;
 		for (i = 0; i < property->num_values; i++)
@@ -3987,7 +4090,6 @@
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_lut *crtc_lut = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	void *r_base, *g_base, *b_base;
 	int size;
@@ -3997,12 +4099,11 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	if (crtc->funcs->gamma_set == NULL) {
 		ret = -ENOSYS;
@@ -4061,7 +4162,6 @@
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_lut *crtc_lut = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	void *r_base, *g_base, *b_base;
 	int size;
@@ -4071,12 +4171,11 @@
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	/* memcpy into gamma store */
 	if (crtc_lut->gamma_size != crtc->gamma_size) {
@@ -4129,7 +4228,6 @@
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_page_flip *page_flip = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL, *old_fb = NULL;
 	struct drm_pending_vblank_event *e = NULL;
@@ -4143,12 +4241,11 @@
 	if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip)
 		return -EINVAL;
 
-	obj = drm_mode_object_find(dev, page_flip->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj)
+	crtc = drm_crtc_find(dev, page_flip->crtc_id);
+	if (!crtc)
 		return -ENOENT;
-	crtc = obj_to_crtc(obj);
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	if (crtc->primary->fb == NULL) {
 		/* The framebuffer is currently unbound, presumably
 		 * due to a hotplug event, that userspace has not
@@ -4232,7 +4329,7 @@
 		drm_framebuffer_unreference(fb);
 	if (old_fb)
 		drm_framebuffer_unreference(old_fb);
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	return ret;
 }
@@ -4597,6 +4694,7 @@
 void drm_mode_config_init(struct drm_device *dev)
 {
 	mutex_init(&dev->mode_config.mutex);
+	drm_modeset_lock_init(&dev->mode_config.connection_mutex);
 	mutex_init(&dev->mode_config.idr_mutex);
 	mutex_init(&dev->mode_config.fb_lock);
 	INIT_LIST_HEAD(&dev->mode_config.fb_list);
@@ -4696,5 +4794,6 @@
 	}
 
 	idr_destroy(&dev->mode_config.crtc_idr);
+	drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
 }
 EXPORT_SYMBOL(drm_mode_config_cleanup);

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 872ba11..78b37f3 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c

@@ -93,8 +93,10 @@
 	 * We can expect this mutex to be locked if we are not panicking.
 	 * Locking is currently fubar in the panic handler.
 	 */
-	if (!oops_in_progress)
+	if (!oops_in_progress) {
 		WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+		WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	}
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder)
@@ -153,20 +155,14 @@
 static void __drm_helper_disable_unused_functions(struct drm_device *dev)
 {
 	struct drm_encoder *encoder;
-	struct drm_connector *connector;
 	struct drm_crtc *crtc;
 
 	drm_warn_on_modeset_not_all_locked(dev);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (!connector->encoder)
-			continue;
-	}
-
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (!drm_helper_encoder_in_use(encoder)) {
 			drm_encoder_disable(encoder);
-			/* disconnector encoder from any connector */
+			/* disconnect encoder from any connector */
 			encoder->crtc = NULL;
 		}
 	}
@@ -349,7 +345,7 @@
 			continue;
 
 		DRM_DEBUG_KMS("[ENCODER:%d:%s] set [MODE:%d:%s]\n",
-			encoder->base.id, drm_get_encoder_name(encoder),
+			encoder->base.id, encoder->name,
 			mode->base.id, mode->name);
 		encoder_funcs = encoder->helper_private;
 		encoder_funcs->mode_set(encoder, mode, adjusted_mode);
@@ -400,8 +396,7 @@
 }
 EXPORT_SYMBOL(drm_crtc_helper_set_mode);
 
-
-static int
+static void
 drm_crtc_helper_disable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -430,7 +425,6 @@
 	}
 
 	__drm_helper_disable_unused_functions(dev);
-	return 0;
 }
 
 /**
@@ -481,7 +475,8 @@
 				(int)set->num_connectors, set->x, set->y);
 	} else {
 		DRM_DEBUG_KMS("[CRTC:%d] [NOFB]\n", set->crtc->base.id);
-		return drm_crtc_helper_disable(set->crtc);
+		drm_crtc_helper_disable(set->crtc);
+		return 0;
 	}
 
 	dev = set->crtc->dev;
@@ -620,11 +615,11 @@
 		}
 		if (new_crtc) {
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d]\n",
-				connector->base.id, drm_get_connector_name(connector),
+				connector->base.id, connector->name,
 				new_crtc->base.id);
 		} else {
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [NOCRTC]\n",
-				connector->base.id, drm_get_connector_name(connector));
+				connector->base.id, connector->name);
 		}
 	}
 
@@ -650,7 +645,7 @@
 			DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
 			for (i = 0; i < set->num_connectors; i++) {
 				DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-					      drm_get_connector_name(set->connectors[i]));
+					      set->connectors[i]->name);
 				set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
 			}
 		}

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 4b6e6f3..08e33b8 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c

@@ -206,13 +206,17 @@
  * i2c_dp_aux_add_bus() - register an i2c adapter using the aux ch helper
  * @adapter: i2c adapter to register
  *
- * This registers an i2c adapater that uses dp aux channel as it's underlaying
+ * This registers an i2c adapter that uses dp aux channel as it's underlaying
  * transport. The driver needs to fill out the &i2c_algo_dp_aux_data structure
  * and store it in the algo_data member of the @adapter argument. This will be
  * used by the i2c over dp aux algorithm to drive the hardware.
  *
  * RETURNS:
  * 0 on success, -ERRNO on failure.
+ *
+ * IMPORTANT:
+ * This interface is deprecated, please switch to the new dp aux helpers and
+ * drm_dp_aux_register().
  */
 int
 i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
@@ -378,7 +382,10 @@
 	 * transactions.
 	 */
 	for (retry = 0; retry < 7; retry++) {
+
+		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
+		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
@@ -592,7 +599,9 @@
 	 * before giving up the AUX transaction.
 	 */
 	for (retry = 0; retry < 7; retry++) {
+		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, msg);
+		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
@@ -725,13 +734,15 @@
 };
 
 /**
- * drm_dp_aux_register_i2c_bus() - register an I2C adapter for I2C-over-AUX
+ * drm_dp_aux_register() - initialise and register aux channel
  * @aux: DisplayPort AUX channel
  *
  * Returns 0 on success or a negative error code on failure.
  */
-int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux)
+int drm_dp_aux_register(struct drm_dp_aux *aux)
 {
+	mutex_init(&aux->hw_mutex);
+
 	aux->ddc.algo = &drm_dp_i2c_algo;
 	aux->ddc.algo_data = aux;
 	aux->ddc.retries = 3;
@@ -746,14 +757,14 @@
 
 	return i2c_add_adapter(&aux->ddc);
 }
-EXPORT_SYMBOL(drm_dp_aux_register_i2c_bus);
+EXPORT_SYMBOL(drm_dp_aux_register);
 
 /**
- * drm_dp_aux_unregister_i2c_bus() - unregister an I2C-over-AUX adapter
+ * drm_dp_aux_unregister() - unregister an AUX adapter
  * @aux: DisplayPort AUX channel
  */
-void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux)
+void drm_dp_aux_unregister(struct drm_dp_aux *aux)
 {
 	i2c_del_adapter(&aux->ddc);
 }
-EXPORT_SYMBOL(drm_dp_aux_unregister_i2c_bus);
+EXPORT_SYMBOL(drm_dp_aux_unregister);

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d4e3f9d..dfa9769 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c

@@ -70,6 +70,8 @@
 #define EDID_QUIRK_FORCE_REDUCED_BLANKING	(1 << 7)
 /* Force 8bpc */
 #define EDID_QUIRK_FORCE_8BPC			(1 << 8)
+/* Force 12bpc */
+#define EDID_QUIRK_FORCE_12BPC			(1 << 9)
 
 struct detailed_mode_closure {
 	struct drm_connector *connector;
@@ -125,6 +127,9 @@
 	{ "SAM", 596, EDID_QUIRK_PREFER_LARGE_60 },
 	{ "SAM", 638, EDID_QUIRK_PREFER_LARGE_60 },
 
+	/* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */
+	{ "SNY", 0x2541, EDID_QUIRK_FORCE_12BPC },
+
 	/* ViewSonic VA2026w */
 	{ "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING },
 
@@ -984,9 +989,13 @@
 	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00
 };
 
- /*
- * Sanity check the header of the base EDID block.  Return 8 if the header
- * is perfect, down to 0 if it's totally wrong.
+/**
+ * drm_edid_header_is_valid - sanity check the header of the base EDID block
+ * @raw_edid: pointer to raw base EDID block
+ *
+ * Sanity check the header of the base EDID block.
+ *
+ * Return: 8 if the header is perfect, down to 0 if it's totally wrong.
  */
 int drm_edid_header_is_valid(const u8 *raw_edid)
 {
@@ -1005,9 +1014,16 @@
 MODULE_PARM_DESC(edid_fixup,
 		 "Minimum number of valid EDID header bytes (0-8, default 6)");
 
-/*
- * Sanity check the EDID block (base or extension).  Return 0 if the block
- * doesn't check out, or 1 if it's valid.
+/**
+ * drm_edid_block_valid - Sanity check the EDID block (base or extension)
+ * @raw_edid: pointer to raw EDID block
+ * @block: type of block to validate (0 for base, extension otherwise)
+ * @print_bad_edid: if true, dump bad EDID blocks to the console
+ *
+ * Validate a base or extension EDID block and optionally dump bad blocks to
+ * the console.
+ *
+ * Return: True if the block is valid, false otherwise.
  */
 bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid)
 {
@@ -1077,6 +1093,8 @@
  * @edid: EDID data
  *
  * Sanity-check an entire EDID record (including extensions)
+ *
+ * Return: True if the EDID data is valid, false otherwise.
  */
 bool drm_edid_is_valid(struct edid *edid)
 {
@@ -1096,18 +1114,15 @@
 
 #define DDC_SEGMENT_ADDR 0x30
 /**
- * Get EDID information via I2C.
- *
- * @adapter : i2c device adaptor
+ * drm_do_probe_ddc_edid() - get EDID information via I2C
+ * @adapter: I2C device adaptor
  * @buf: EDID data buffer to be filled
  * @block: 128 byte EDID block to start fetching from
  * @len: EDID data buffer length to fetch
  *
- * Returns:
+ * Try to fetch EDID information by calling I2C driver functions.
  *
- * 0 on success or -1 on failure.
- *
- * Try to fetch EDID information by calling i2c driver function.
+ * Return: 0 on success or -1 on failure.
  */
 static int
 drm_do_probe_ddc_edid(struct i2c_adapter *adapter, unsigned char *buf,
@@ -1118,7 +1133,8 @@
 	unsigned char xfers = segment ? 3 : 2;
 	int ret, retries = 5;
 
-	/* The core i2c driver will automatically retry the transfer if the
+	/*
+	 * The core I2C driver will automatically retry the transfer if the
 	 * adapter reports EAGAIN. However, we find that bit-banging transfers
 	 * are susceptible to errors under a heavily loaded machine and
 	 * generate spurious NAKs and timeouts. Retrying the transfer
@@ -1144,10 +1160,10 @@
 			}
 		};
 
-	/*
-	 * Avoid sending the segment addr to not upset non-compliant ddc
-	 * monitors.
-	 */
+		/*
+		 * Avoid sending the segment addr to not upset non-compliant
+		 * DDC monitors.
+		 */
 		ret = i2c_transfer(adapter, &msgs[3 - xfers], xfers);
 
 		if (ret == -ENXIO) {
@@ -1216,7 +1232,7 @@
 		if (i == 4 && print_bad_edid) {
 			dev_warn(connector->dev->dev,
 			 "%s: Ignoring invalid EDID block %d.\n",
-			 drm_get_connector_name(connector), j);
+			 connector->name, j);
 
 			connector->bad_edid_counter++;
 		}
@@ -1236,7 +1252,7 @@
 carp:
 	if (print_bad_edid) {
 		dev_warn(connector->dev->dev, "%s: EDID block %d invalid.\n",
-			 drm_get_connector_name(connector), j);
+			 connector->name, j);
 	}
 	connector->bad_edid_counter++;
 
@@ -1246,12 +1262,10 @@
 }
 
 /**
- * Probe DDC presence.
- * @adapter: i2c adapter to probe
+ * drm_probe_ddc() - probe DDC presence
+ * @adapter: I2C adapter to probe
  *
- * Returns:
- *
- * 1 on success
+ * Return: True on success, false on failure.
  */
 bool
 drm_probe_ddc(struct i2c_adapter *adapter)
@@ -1265,12 +1279,12 @@
 /**
  * drm_get_edid - get EDID data, if available
  * @connector: connector we're probing
- * @adapter: i2c adapter to use for DDC
+ * @adapter: I2C adapter to use for DDC
  *
- * Poke the given i2c channel to grab EDID data if possible.  If found,
+ * Poke the given I2C channel to grab EDID data if possible.  If found,
  * attach it to the connector.
  *
- * Return edid data or NULL if we couldn't find any.
+ * Return: Pointer to valid EDID or NULL if we couldn't find any.
  */
 struct edid *drm_get_edid(struct drm_connector *connector,
 			  struct i2c_adapter *adapter)
@@ -1288,7 +1302,7 @@
  * drm_edid_duplicate - duplicate an EDID and the extensions
  * @edid: EDID to duplicate
  *
- * Return duplicate edid or NULL on allocation failure.
+ * Return: Pointer to duplicated EDID or NULL on allocation failure.
  */
 struct edid *drm_edid_duplicate(const struct edid *edid)
 {
@@ -1411,7 +1425,8 @@
  * @rb: Mode reduced-blanking-ness
  *
  * Walk the DMT mode list looking for a match for the given parameters.
- * Return a newly allocated copy of the mode, or NULL if not found.
+ *
+ * Return: A newly allocated copy of the mode, or NULL if not found.
  */
 struct drm_display_mode *drm_mode_find_dmt(struct drm_device *dev,
 					   int hsize, int vsize, int fresh,
@@ -1595,14 +1610,13 @@
  * @connector: connector of for the EDID block
  * @edid: EDID block to scan
  * @t: standard timing params
- * @revision: standard timing level
  *
  * Take the standard timing params (in this case width, aspect, and refresh)
  * and convert them into a real mode using CVT/GTF/DMT.
  */
 static struct drm_display_mode *
 drm_mode_std(struct drm_connector *connector, struct edid *edid,
-	     struct std_timing *t, int revision)
+	     struct std_timing *t)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *m, *mode = NULL;
@@ -1623,7 +1637,7 @@
 	vrefresh_rate = vfreq + 60;
 	/* the vdisplay is calculated based on the aspect ratio */
 	if (aspect_ratio == 0) {
-		if (revision < 3)
+		if (edid->revision < 3)
 			vsize = hsize;
 		else
 			vsize = (hsize * 10) / 16;
@@ -2140,7 +2154,7 @@
 
 /**
  * add_established_modes - get est. modes from EDID and add them
- * @connector: connector of for the EDID block
+ * @connector: connector to add mode(s) to
  * @edid: EDID block to scan
  *
  * Each EDID block contains a bitmap of the supported "established modes" list
@@ -2191,8 +2205,7 @@
 			struct drm_display_mode *newmode;
 
 			std = &data->data.timings[i];
-			newmode = drm_mode_std(connector, edid, std,
-					       edid->revision);
+			newmode = drm_mode_std(connector, edid, std);
 			if (newmode) {
 				drm_mode_probed_add(connector, newmode);
 				closure->modes++;
@@ -2203,7 +2216,7 @@
 
 /**
  * add_standard_modes - get std. modes from EDID and add them
- * @connector: connector of for the EDID block
+ * @connector: connector to add mode(s) to
  * @edid: EDID block to scan
  *
  * Standard modes can be calculated using the appropriate standard (DMT,
@@ -2221,8 +2234,7 @@
 		struct drm_display_mode *newmode;
 
 		newmode = drm_mode_std(connector, edid,
-				       &edid->standard_timings[i],
-				       edid->revision);
+				       &edid->standard_timings[i]);
 		if (newmode) {
 			drm_mode_probed_add(connector, newmode);
 			modes++;
@@ -2425,7 +2437,7 @@
  * drm_match_cea_mode - look for a CEA mode matching given mode
  * @to_match: display mode
  *
- * Returns the CEA Video ID (VIC) of the mode or 0 if it isn't a CEA-861
+ * Return: The CEA Video ID (VIC) of the mode or 0 if it isn't a CEA-861
  * mode.
  */
 u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
@@ -2452,6 +2464,22 @@
 }
 EXPORT_SYMBOL(drm_match_cea_mode);
 
+/**
+ * drm_get_cea_aspect_ratio - get the picture aspect ratio corresponding to
+ * the input VIC from the CEA mode list
+ * @video_code: ID given to each of the CEA modes
+ *
+ * Returns picture aspect ratio
+ */
+enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code)
+{
+	/* return picture aspect ratio for video_code - 1 to access the
+	 * right array element
+	*/
+	return edid_cea_modes[video_code-1].picture_aspect_ratio;
+}
+EXPORT_SYMBOL(drm_get_cea_aspect_ratio);
+
 /*
  * Calculate the alternate clock for HDMI modes (those from the HDMI vendor
  * specific block).
@@ -3023,11 +3051,9 @@
  * @connector: connector corresponding to the HDMI/DP sink
  * @edid: EDID to parse
  *
- * Fill the ELD (EDID-Like Data) buffer for passing to the audio driver.
- * Some ELD fields are left to the graphics driver caller:
- * - Conn_Type
- * - HDCP
- * - Port_ID
+ * Fill the ELD (EDID-Like Data) buffer for passing to the audio driver. The
+ * Conn_Type, HDCP and Port_ID ELD fields are left for the graphics driver to
+ * fill in.
  */
 void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 {
@@ -3111,9 +3137,10 @@
  * @sads: pointer that will be set to the extracted SADs
  *
  * Looks for CEA EDID block and extracts SADs (Short Audio Descriptors) from it.
- * Note: returned pointer needs to be kfreed
  *
- * Return number of found SADs or negative number on error.
+ * Note: The returned pointer needs to be freed using kfree().
+ *
+ * Return: The number of found SADs or negative number on error.
  */
 int drm_edid_to_sad(struct edid *edid, struct cea_sad **sads)
 {
@@ -3170,9 +3197,11 @@
  * @sadb: pointer to the speaker block
  *
  * Looks for CEA EDID block and extracts the Speaker Allocation Data Block from it.
- * Note: returned pointer needs to be kfreed
  *
- * Return number of found Speaker Allocation Blocks or negative number on error.
+ * Note: The returned pointer needs to be freed using kfree().
+ *
+ * Return: The number of found Speaker Allocation Blocks or negative number on
+ * error.
  */
 int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb)
 {
@@ -3204,10 +3233,9 @@
 
 			/* Speaker Allocation Data Block */
 			if (dbl == 3) {
-				*sadb = kmalloc(dbl, GFP_KERNEL);
+				*sadb = kmemdup(&db[1], dbl, GFP_KERNEL);
 				if (!*sadb)
 					return -ENOMEM;
-				memcpy(*sadb, &db[1], dbl);
 				count = dbl;
 				break;
 			}
@@ -3219,9 +3247,12 @@
 EXPORT_SYMBOL(drm_edid_to_speaker_allocation);
 
 /**
- * drm_av_sync_delay - HDMI/DP sink audio-video sync delay in millisecond
+ * drm_av_sync_delay - compute the HDMI/DP sink audio-video sync delay
  * @connector: connector associated with the HDMI/DP sink
  * @mode: the display mode
+ *
+ * Return: The HDMI/DP sink's audio-video sync delay in milliseconds or 0 if
+ * the sink doesn't support audio or video.
  */
 int drm_av_sync_delay(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
@@ -3263,6 +3294,9 @@
  *
  * It's possible for one encoder to be associated with multiple HDMI/DP sinks.
  * The policy is now hard coded to simply use the first HDMI/DP sink's ELD.
+ *
+ * Return: The connector associated with the first HDMI/DP sink that has ELD
+ * attached to it.
  */
 struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
 				     struct drm_display_mode *mode)
@@ -3270,6 +3304,8 @@
 	struct drm_connector *connector;
 	struct drm_device *dev = encoder->dev;
 
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder && connector->eld[0])
 			return connector;
@@ -3279,11 +3315,12 @@
 EXPORT_SYMBOL(drm_select_eld);
 
 /**
- * drm_detect_hdmi_monitor - detect whether monitor is hdmi.
+ * drm_detect_hdmi_monitor - detect whether monitor is HDMI
  * @edid: monitor EDID information
  *
  * Parse the CEA extension according to CEA-861-B.
- * Return true if HDMI, false if not or unknown.
+ *
+ * Return: True if the monitor is HDMI, false if not or unknown.
  */
 bool drm_detect_hdmi_monitor(struct edid *edid)
 {
@@ -3321,6 +3358,7 @@
  * audio format, assume at least 'basic audio' support, even if 'basic
  * audio' is not defined in EDID.
  *
+ * Return: True if the monitor supports audio, false otherwise.
  */
 bool drm_detect_monitor_audio(struct edid *edid)
 {
@@ -3364,6 +3402,8 @@
  * Check whether the monitor reports the RGB quantization range selection
  * as supported. The AVI infoframe can then be used to inform the monitor
  * which quantization range (full or limited) is used.
+ *
+ * Return: True if the RGB quantization range is selectable, false otherwise.
  */
 bool drm_rgb_quant_range_selectable(struct edid *edid)
 {
@@ -3390,16 +3430,119 @@
 EXPORT_SYMBOL(drm_rgb_quant_range_selectable);
 
 /**
+ * drm_assign_hdmi_deep_color_info - detect whether monitor supports
+ * hdmi deep color modes and update drm_display_info if so.
+ *
+ * @edid: monitor EDID information
+ * @info: Updated with maximum supported deep color bpc and color format
+ *        if deep color supported.
+ *
+ * Parse the CEA extension according to CEA-861-B.
+ * Return true if HDMI deep color supported, false if not or unknown.
+ */
+static bool drm_assign_hdmi_deep_color_info(struct edid *edid,
+                                            struct drm_display_info *info,
+                                            struct drm_connector *connector)
+{
+	u8 *edid_ext, *hdmi;
+	int i;
+	int start_offset, end_offset;
+	unsigned int dc_bpc = 0;
+
+	edid_ext = drm_find_cea_extension(edid);
+	if (!edid_ext)
+		return false;
+
+	if (cea_db_offsets(edid_ext, &start_offset, &end_offset))
+		return false;
+
+	/*
+	 * Because HDMI identifier is in Vendor Specific Block,
+	 * search it from all data blocks of CEA extension.
+	 */
+	for_each_cea_db(edid_ext, i, start_offset, end_offset) {
+		if (cea_db_is_hdmi_vsdb(&edid_ext[i])) {
+			/* HDMI supports at least 8 bpc */
+			info->bpc = 8;
+
+			hdmi = &edid_ext[i];
+			if (cea_db_payload_len(hdmi) < 6)
+				return false;
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_30) {
+				dc_bpc = 10;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30;
+				DRM_DEBUG("%s: HDMI sink does deep color 30.\n",
+						  connector->name);
+			}
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_36) {
+				dc_bpc = 12;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36;
+				DRM_DEBUG("%s: HDMI sink does deep color 36.\n",
+						  connector->name);
+			}
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_48) {
+				dc_bpc = 16;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48;
+				DRM_DEBUG("%s: HDMI sink does deep color 48.\n",
+						  connector->name);
+			}
+
+			if (dc_bpc > 0) {
+				DRM_DEBUG("%s: Assigning HDMI sink color depth as %d bpc.\n",
+						  connector->name, dc_bpc);
+				info->bpc = dc_bpc;
+
+				/*
+				 * Deep color support mandates RGB444 support for all video
+				 * modes and forbids YCRCB422 support for all video modes per
+				 * HDMI 1.3 spec.
+				 */
+				info->color_formats = DRM_COLOR_FORMAT_RGB444;
+
+				/* YCRCB444 is optional according to spec. */
+				if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) {
+					info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
+					DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n",
+							  connector->name);
+				}
+
+				/*
+				 * Spec says that if any deep color mode is supported at all,
+				 * then deep color 36 bit must be supported.
+				 */
+				if (!(hdmi[6] & DRM_EDID_HDMI_DC_36)) {
+					DRM_DEBUG("%s: HDMI sink should do DC_36, but does not!\n",
+							  connector->name);
+				}
+
+				return true;
+			}
+			else {
+				DRM_DEBUG("%s: No deep color support on this HDMI sink.\n",
+						  connector->name);
+			}
+		}
+	}
+
+	return false;
+}
+
+/**
  * drm_add_display_info - pull display info out if present
  * @edid: EDID data
  * @info: display info (attached to connector)
+ * @connector: connector whose edid is used to build display info
  *
  * Grab any available display info and stuff it into the drm_display_info
  * structure that's part of the connector.  Useful for tracking bpp and
  * color spaces.
  */
 static void drm_add_display_info(struct edid *edid,
-				 struct drm_display_info *info)
+                                 struct drm_display_info *info,
+                                 struct drm_connector *connector)
 {
 	u8 *edid_ext;
 
@@ -3429,6 +3572,9 @@
 			info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
 	}
 
+	/* HDMI deep color modes supported? Assign to info, if so */
+	drm_assign_hdmi_deep_color_info(edid, info, connector);
+
 	/* Only defined for 1.4 with digital displays */
 	if (edid->revision < 4)
 		return;
@@ -3458,6 +3604,9 @@
 		break;
 	}
 
+	DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n",
+			  connector->name, info->bpc);
+
 	info->color_formats |= DRM_COLOR_FORMAT_RGB444;
 	if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444)
 		info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
@@ -3468,11 +3617,11 @@
 /**
  * drm_add_edid_modes - add modes from EDID data, if available
  * @connector: connector we're probing
- * @edid: edid data
+ * @edid: EDID data
  *
  * Add the specified modes to the connector's mode list.
  *
- * Return number of modes added or 0 if we couldn't find any.
+ * Return: The number of modes added or 0 if we couldn't find any.
  */
 int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 {
@@ -3484,7 +3633,7 @@
 	}
 	if (!drm_edid_is_valid(edid)) {
 		dev_warn(connector->dev->dev, "%s: EDID invalid.\n",
-			 drm_get_connector_name(connector));
+			 connector->name);
 		return 0;
 	}
 
@@ -3516,11 +3665,14 @@
 	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
 		edid_fixup_preferred(connector, quirks);
 
-	drm_add_display_info(edid, &connector->display_info);
+	drm_add_display_info(edid, &connector->display_info, connector);
 
 	if (quirks & EDID_QUIRK_FORCE_8BPC)
 		connector->display_info.bpc = 8;
 
+	if (quirks & EDID_QUIRK_FORCE_12BPC)
+		connector->display_info.bpc = 12;
+
 	return num_modes;
 }
 EXPORT_SYMBOL(drm_add_edid_modes);
@@ -3534,7 +3686,7 @@
  * Add the specified modes to the connector's mode list. Only when the
  * hdisplay/vdisplay is not beyond the given limit, it will be added.
  *
- * Return number of modes added or 0 if we couldn't find any.
+ * Return: The number of modes added or 0 if we couldn't find any.
  */
 int drm_add_modes_noedid(struct drm_connector *connector,
 			int hdisplay, int vdisplay)
@@ -3573,13 +3725,22 @@
 }
 EXPORT_SYMBOL(drm_add_modes_noedid);
 
+/**
+ * drm_set_preferred_mode - Sets the preferred mode of a connector
+ * @connector: connector whose mode list should be processed
+ * @hpref: horizontal resolution of preferred mode
+ * @vpref: vertical resolution of preferred mode
+ *
+ * Marks a mode as preferred if it matches the resolution specified by @hpref
+ * and @vpref.
+ */
 void drm_set_preferred_mode(struct drm_connector *connector,
 			   int hpref, int vpref)
 {
 	struct drm_display_mode *mode;
 
 	list_for_each_entry(mode, &connector->probed_modes, head) {
-		if (mode->hdisplay  == hpref &&
+		if (mode->hdisplay == hpref &&
 		    mode->vdisplay == vpref)
 			mode->type |= DRM_MODE_TYPE_PREFERRED;
 	}
@@ -3592,7 +3753,7 @@
  * @frame: HDMI AVI infoframe
  * @mode: DRM display mode
  *
- * Returns 0 on success or a negative error code on failure.
+ * Return: 0 on success or a negative error code on failure.
  */
 int
 drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
@@ -3613,6 +3774,12 @@
 	frame->video_code = drm_match_cea_mode(mode);
 
 	frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
+
+	/* Populate picture aspect ratio from CEA mode list */
+	if (frame->video_code > 0)
+		frame->picture_aspect = drm_get_cea_aspect_ratio(
+						frame->video_code);
+
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
 	frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
 
@@ -3657,7 +3824,7 @@
  * 4k or stereoscopic 3D mode. So when giving any other mode as input this
  * function will return -EINVAL, error that can be safely ignored.
  *
- * Returns 0 on success or a negative error code on failure.
+ * Return: 0 on success or a negative error code on failure.
  */
 int
 drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,

diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index 1b4c7a5..0a235fe 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c

@@ -31,8 +31,9 @@
 MODULE_PARM_DESC(edid_firmware, "Do not probe monitor, use specified EDID blob "
 	"from built-in data or /lib/firmware instead. ");
 
-#define GENERIC_EDIDS 5
+#define GENERIC_EDIDS 6
 static const char *generic_edid_name[GENERIC_EDIDS] = {
+	"edid/800x600.bin",
 	"edid/1024x768.bin",
 	"edid/1280x1024.bin",
 	"edid/1600x1200.bin",
@@ -44,6 +45,24 @@
 	{
 	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
 	0x31, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x05, 0x16, 0x01, 0x03, 0x6d, 0x1b, 0x14, 0x78,
+	0xea, 0x5e, 0xc0, 0xa4, 0x59, 0x4a, 0x98, 0x25,
+	0x20, 0x50, 0x54, 0x01, 0x00, 0x00, 0x45, 0x40,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xa0, 0x0f,
+	0x20, 0x00, 0x31, 0x58, 0x1c, 0x20, 0x28, 0x80,
+	0x14, 0x00, 0x15, 0xd0, 0x10, 0x00, 0x00, 0x1e,
+	0x00, 0x00, 0x00, 0xff, 0x00, 0x4c, 0x69, 0x6e,
+	0x75, 0x78, 0x20, 0x23, 0x30, 0x0a, 0x20, 0x20,
+	0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x3b,
+	0x3d, 0x24, 0x26, 0x05, 0x00, 0x0a, 0x20, 0x20,
+	0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfc,
+	0x00, 0x4c, 0x69, 0x6e, 0x75, 0x78, 0x20, 0x53,
+	0x56, 0x47, 0x41, 0x0a, 0x20, 0x20, 0x00, 0xc2,
+	},
+	{
+	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+	0x31, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x05, 0x16, 0x01, 0x03, 0x6d, 0x23, 0x1a, 0x78,
 	0xea, 0x5e, 0xc0, 0xa4, 0x59, 0x4a, 0x98, 0x25,
 	0x20, 0x50, 0x54, 0x00, 0x08, 0x00, 0x61, 0x40,
@@ -242,7 +261,7 @@
 
 int drm_load_edid_firmware(struct drm_connector *connector)
 {
-	const char *connector_name = drm_get_connector_name(connector);
+	const char *connector_name = connector->name;
 	char *edidname = edid_firmware, *last, *colon;
 	int ret;
 	struct edid *edid;

diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 61b5a47..f27c883 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c

@@ -429,13 +429,8 @@
  */
 void drm_fbdev_cma_restore_mode(struct drm_fbdev_cma *fbdev_cma)
 {
-	if (fbdev_cma) {
-		struct drm_device *dev = fbdev_cma->fb_helper.dev;
-
-		drm_modeset_lock_all(dev);
-		drm_fb_helper_restore_fbdev_mode(&fbdev_cma->fb_helper);
-		drm_modeset_unlock_all(dev);
-	}
+	if (fbdev_cma)
+		drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev_cma->fb_helper);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_restore_mode);
 

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 04d3fd3..d5d8cea 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c

@@ -45,13 +45,13 @@
  * DOC: fbdev helpers
  *
  * The fb helper functions are useful to provide an fbdev on top of a drm kernel
- * mode setting driver. They can be used mostly independantely from the crtc
+ * mode setting driver. They can be used mostly independently from the crtc
  * helper functions used by many drivers to implement the kernel mode setting
  * interfaces.
  *
  * Initialization is done as a three-step process with drm_fb_helper_init(),
  * drm_fb_helper_single_add_all_connectors() and drm_fb_helper_initial_config().
- * Drivers with fancier requirements than the default beheviour can override the
+ * Drivers with fancier requirements than the default behaviour can override the
  * second step with their own code.  Teardown is done with drm_fb_helper_fini().
  *
  * At runtime drivers should restore the fbdev console by calling
@@ -59,7 +59,7 @@
  * should also notify the fb helper code from updates to the output
  * configuration by calling drm_fb_helper_hotplug_event(). For easier
  * integration with the output polling code in drm_crtc_helper.c the modeset
- * code proves a ->output_poll_changed callback.
+ * code provides a ->output_poll_changed callback.
  *
  * All other functions exported by the fb helper library can be used to
  * implement the fbdev driver interface by the driver.
@@ -120,7 +120,7 @@
 		mode = &fb_helper_conn->cmdline_mode;
 
 		/* do something on return - turn off connector maybe */
-		if (fb_get_options(drm_get_connector_name(connector), &option))
+		if (fb_get_options(connector->name, &option))
 			continue;
 
 		if (drm_mode_parse_command_line_for_connector(option,
@@ -142,12 +142,12 @@
 				}
 
 				DRM_INFO("forcing %s connector %s\n",
-					 drm_get_connector_name(connector), s);
+					 connector->name, s);
 				connector->force = mode->force;
 			}
 
 			DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
-				      drm_get_connector_name(connector),
+				      connector->name,
 				      mode->xres, mode->yres,
 				      mode->refresh_specified ? mode->refresh : 60,
 				      mode->rb ? " reduced blanking" : "",
@@ -273,15 +273,7 @@
 }
 EXPORT_SYMBOL(drm_fb_helper_debug_leave);
 
-/**
- * drm_fb_helper_restore_fbdev_mode - restore fbdev configuration
- * @fb_helper: fbcon to restore
- *
- * This should be called from driver's drm ->lastclose callback
- * when implementing an fbcon on top of kms using this helper. This ensures that
- * the user isn't greeted with a black screen when e.g. X dies.
- */
-bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_plane *plane;
@@ -311,7 +303,40 @@
 	}
 	return error;
 }
-EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode);
+/**
+ * drm_fb_helper_restore_fbdev_mode - restore fbdev configuration
+ * @fb_helper: fbcon to restore
+ *
+ * This should be called from driver's drm ->lastclose callback
+ * when implementing an fbcon on top of kms using this helper. This ensures that
+ * the user isn't greeted with a black screen when e.g. X dies.
+ *
+ * Use this variant if you need to bypass locking (panic), or already
+ * hold all modeset locks.  Otherwise use drm_fb_helper_restore_fbdev_mode_unlocked()
+ */
+static bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+{
+	return restore_fbdev_mode(fb_helper);
+}
+
+/**
+ * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration
+ * @fb_helper: fbcon to restore
+ *
+ * This should be called from driver's drm ->lastclose callback
+ * when implementing an fbcon on top of kms using this helper. This ensures that
+ * the user isn't greeted with a black screen when e.g. X dies.
+ */
+bool drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper)
+{
+	struct drm_device *dev = fb_helper->dev;
+	bool ret;
+	drm_modeset_lock_all(dev);
+	ret = restore_fbdev_mode(fb_helper);
+	drm_modeset_unlock_all(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked);
 
 /*
  * restore fbcon display for all kms driver's using this helper, used for sysrq
@@ -326,12 +351,25 @@
 		return false;
 
 	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
-		if (helper->dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		struct drm_device *dev = helper->dev;
+
+		if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 			continue;
 
+		/* NOTE: we use lockless flag below to avoid grabbing other
+		 * modeset locks.  So just trylock the underlying mutex
+		 * directly:
+		 */
+		if (!mutex_trylock(&dev->mode_config.mutex)) {
+			error = true;
+			continue;
+		}
+
 		ret = drm_fb_helper_restore_fbdev_mode(helper);
 		if (ret)
 			error = true;
+
+		mutex_unlock(&dev->mode_config.mutex);
 	}
 	return error;
 }
@@ -811,7 +849,6 @@
 int drm_fb_helper_set_par(struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
-	struct drm_device *dev = fb_helper->dev;
 	struct fb_var_screeninfo *var = &info->var;
 
 	if (var->pixclock != 0) {
@@ -819,9 +856,7 @@
 		return -EINVAL;
 	}
 
-	drm_modeset_lock_all(dev);
-	drm_fb_helper_restore_fbdev_mode(fb_helper);
-	drm_modeset_unlock_all(dev);
+	drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
 
 	if (fb_helper->delayed_hotplug) {
 		fb_helper->delayed_hotplug = false;

diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index e1eba0b..021fe5d 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c

@@ -43,8 +43,7 @@
 DEFINE_MUTEX(drm_global_mutex);
 EXPORT_SYMBOL(drm_global_mutex);
 
-static int drm_open_helper(struct inode *inode, struct file *filp,
-			   struct drm_minor *minor);
+static int drm_open_helper(struct file *filp, struct drm_minor *minor);
 
 static int drm_setup(struct drm_device * dev)
 {
@@ -95,7 +94,7 @@
 	/* share address_space across all char-devs of a single device */
 	filp->f_mapping = dev->anon_inode->i_mapping;
 
-	retcode = drm_open_helper(inode, filp, minor);
+	retcode = drm_open_helper(filp, minor);
 	if (retcode)
 		goto err_undo;
 	if (need_setup) {
@@ -171,7 +170,6 @@
 /**
  * Called whenever a process opens /dev/drm.
  *
- * \param inode device inode.
  * \param filp file pointer.
  * \param minor acquired minor-object.
  * \return zero on success or a negative number on failure.
@@ -179,8 +177,7 @@
  * Creates and initializes a drm_file structure for the file private data in \p
  * filp and add it into the double linked list in \p dev.
  */
-static int drm_open_helper(struct inode *inode, struct file *filp,
-			   struct drm_minor *minor)
+static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct drm_file *priv;

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 9909bef..f7d7119 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c

@@ -474,21 +474,10 @@
 			goto fail;
 		pages[i] = p;
 
-		/* There is a hypothetical issue w/ drivers that require
-		 * buffer memory in the low 4GB.. if the pages are un-
-		 * pinned, and swapped out, they can end up swapped back
-		 * in above 4GB.  If pages are already in memory, then
-		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
-		 * even if the already in-memory page disobeys the mask.
-		 *
-		 * It is only a theoretical issue today, because none of
-		 * the devices with this limitation can be populated with
-		 * enough memory to trigger the issue.  But this BUG_ON()
-		 * is here as a reminder in case the problem with
-		 * shmem_read_mapping_page_gfp() isn't solved by the time
-		 * it does become a real issue.
-		 *
-		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		/* Make sure shmem keeps __GFP_DMA32 allocated pages in the
+		 * correct region during swapin. Note that this requires
+		 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping)
+		 * so shmem can relocate pages during swapin if required.
 		 */
 		BUG_ON((gfpmask & __GFP_DMA32) &&
 				(page_to_pfn(p) >= 0x00100000UL));

diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 7473035..86feedd 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c

@@ -47,18 +47,16 @@
 	struct drm_minor *minor = node->minor;
 	struct drm_device *dev = minor->dev;
 	struct drm_master *master = minor->master;
-	const char *bus_name;
 	if (!master)
 		return 0;
 
-	bus_name = dev->driver->bus->get_name(dev);
 	if (master->unique) {
 		seq_printf(m, "%s %s %s\n",
-			   bus_name,
+			   dev->driver->name,
 			   dev_name(dev->dev), master->unique);
 	} else {
 		seq_printf(m, "%s %s\n",
-			   bus_name, dev_name(dev->dev));
+			   dev->driver->name, dev_name(dev->dev));
 	}
 	return 0;
 }

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 93a4204..69c61f3 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c

@@ -72,9 +72,6 @@
 drm_unset_busid(struct drm_device *dev,
 		struct drm_master *master)
 {
-	kfree(dev->devname);
-	dev->devname = NULL;
-
 	kfree(master->unique);
 	master->unique = NULL;
 	master->unique_len = 0;
@@ -93,7 +90,8 @@
  * Copies the bus id from userspace into drm_device::unique, and verifies that
  * it matches the device this DRM is attached to (EINVAL otherwise).  Deprecated
  * in interface version 1.1 and will return EBUSY when setversion has requested
- * version 1.1 or greater.
+ * version 1.1 or greater. Also note that KMS is all version 1.1 and later and
+ * UMS was only ever supported on pci devices.
  */
 int drm_setunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
@@ -108,10 +106,13 @@
 	if (!u->unique_len || u->unique_len > 1024)
 		return -EINVAL;
 
-	if (!dev->driver->bus->set_unique)
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+
+	if (WARN_ON(!dev->pdev))
 		return -EINVAL;
 
-	ret = dev->driver->bus->set_unique(dev, master, u);
+	ret = drm_pci_set_unique(dev, master, u);
 	if (ret)
 		goto err;
 
@@ -130,13 +131,25 @@
 	if (master->unique != NULL)
 		drm_unset_busid(dev, master);
 
-	ret = dev->driver->bus->set_busid(dev, master);
-	if (ret)
-		goto err;
+	if (dev->driver->bus && dev->driver->bus->set_busid) {
+		ret = dev->driver->bus->set_busid(dev, master);
+		if (ret) {
+			drm_unset_busid(dev, master);
+			return ret;
+		}
+	} else {
+		if (WARN(dev->unique == NULL,
+			 "No drm_bus.set_busid() implementation provided by "
+			 "%ps. Use drm_dev_set_unique() to set the unique "
+			 "name explicitly.", dev->driver))
+			return -EINVAL;
+
+		master->unique = kstrdup(dev->unique, GFP_KERNEL);
+		if (master->unique)
+			master->unique_len = strlen(dev->unique);
+	}
+
 	return 0;
-err:
-	drm_unset_busid(dev, master);
-	return ret;
 }
 
 /**

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index ec5c3f4..0de123a 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c

@@ -1,6 +1,5 @@
-/**
- * \file drm_irq.c
- * IRQ support
+/*
+ * drm_irq.c IRQ and vblank support
  *
  * \author Rickard E. (Rik) Faith <faith@valinux.com>
  * \author Gareth Hughes <gareth@valinux.com>
@@ -56,33 +55,6 @@
  */
 #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000
 
-/**
- * Get interrupt from bus id.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument, pointing to a drm_irq_busid structure.
- * \return zero on success or a negative number on failure.
- *
- * Finds the PCI device with the specified bus id and gets its IRQ number.
- * This IOCTL is deprecated, and will now return EINVAL for any busid not equal
- * to that of the device that this DRM instance attached to.
- */
-int drm_irq_by_busid(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	struct drm_irq_busid *p = data;
-
-	if (!dev->driver->bus->irq_by_busid)
-		return -EINVAL;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-		return -EINVAL;
-
-	return dev->driver->bus->irq_by_busid(dev, p);
-}
-
 /*
  * Clear vblank timestamp buffer for a crtc.
  */
@@ -167,33 +139,40 @@
 
 static void vblank_disable_fn(unsigned long arg)
 {
-	struct drm_device *dev = (struct drm_device *)arg;
+	struct drm_vblank_crtc *vblank = (void *)arg;
+	struct drm_device *dev = vblank->dev;
 	unsigned long irqflags;
-	int i;
+	int crtc = vblank->crtc;
 
 	if (!dev->vblank_disable_allowed)
 		return;
 
-	for (i = 0; i < dev->num_crtcs; i++) {
-		spin_lock_irqsave(&dev->vbl_lock, irqflags);
-		if (atomic_read(&dev->vblank[i].refcount) == 0 &&
-		    dev->vblank[i].enabled) {
-			DRM_DEBUG("disabling vblank on crtc %d\n", i);
-			vblank_disable_and_save(dev, i);
-		}
-		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	if (atomic_read(&vblank->refcount) == 0 && vblank->enabled) {
+		DRM_DEBUG("disabling vblank on crtc %d\n", crtc);
+		vblank_disable_and_save(dev, crtc);
 	}
+	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 
+/**
+ * drm_vblank_cleanup - cleanup vblank support
+ * @dev: DRM device
+ *
+ * This function cleans up any resources allocated in drm_vblank_init.
+ */
 void drm_vblank_cleanup(struct drm_device *dev)
 {
+	int crtc;
+
 	/* Bail if the driver didn't call drm_vblank_init() */
 	if (dev->num_crtcs == 0)
 		return;
 
-	del_timer_sync(&dev->vblank_disable_timer);
-
-	vblank_disable_fn((unsigned long)dev);
+	for (crtc = 0; crtc < dev->num_crtcs; crtc++) {
+		del_timer_sync(&dev->vblank[crtc].disable_timer);
+		vblank_disable_fn((unsigned long)&dev->vblank[crtc]);
+	}
 
 	kfree(dev->vblank);
 
@@ -201,12 +180,20 @@
 }
 EXPORT_SYMBOL(drm_vblank_cleanup);
 
+/**
+ * drm_vblank_init - initialize vblank support
+ * @dev: drm_device
+ * @num_crtcs: number of crtcs supported by @dev
+ *
+ * This function initializes vblank support for @num_crtcs display pipelines.
+ *
+ * Returns:
+ * Zero on success or a negative error code on failure.
+ */
 int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 {
 	int i, ret = -ENOMEM;
 
-	setup_timer(&dev->vblank_disable_timer, vblank_disable_fn,
-		    (unsigned long)dev);
 	spin_lock_init(&dev->vbl_lock);
 	spin_lock_init(&dev->vblank_time_lock);
 
@@ -216,8 +203,13 @@
 	if (!dev->vblank)
 		goto err;
 
-	for (i = 0; i < num_crtcs; i++)
+	for (i = 0; i < num_crtcs; i++) {
+		dev->vblank[i].dev = dev;
+		dev->vblank[i].crtc = i;
 		init_waitqueue_head(&dev->vblank[i].queue);
+		setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn,
+			    (unsigned long)&dev->vblank[i]);
+	}
 
 	DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n");
 
@@ -261,42 +253,42 @@
 }
 
 /**
- * Install IRQ handler.
- *
- * \param dev DRM device.
+ * drm_irq_install - install IRQ handler
+ * @dev: DRM device
+ * @irq: IRQ number to install the handler for
  *
  * Initializes the IRQ related data. Installs the handler, calling the driver
- * \c irq_preinstall() and \c irq_postinstall() functions
- * before and after the installation.
+ * irq_preinstall() and irq_postinstall() functions before and after the
+ * installation.
+ *
+ * This is the simplified helper interface provided for drivers with no special
+ * needs. Drivers which need to install interrupt handlers for multiple
+ * interrupts must instead set drm_device->irq_enabled to signal the DRM core
+ * that vblank interrupts are available.
+ *
+ * Returns:
+ * Zero on success or a negative error code on failure.
  */
-int drm_irq_install(struct drm_device *dev)
+int drm_irq_install(struct drm_device *dev, int irq)
 {
 	int ret;
 	unsigned long sh_flags = 0;
-	char *irqname;
 
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
-	if (drm_dev_to_irq(dev) == 0)
+	if (irq == 0)
 		return -EINVAL;
 
-	mutex_lock(&dev->struct_mutex);
-
 	/* Driver must have been initialized */
-	if (!dev->dev_private) {
-		mutex_unlock(&dev->struct_mutex);
+	if (!dev->dev_private)
 		return -EINVAL;
-	}
 
-	if (dev->irq_enabled) {
-		mutex_unlock(&dev->struct_mutex);
+	if (dev->irq_enabled)
 		return -EBUSY;
-	}
 	dev->irq_enabled = true;
-	mutex_unlock(&dev->struct_mutex);
 
-	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
+	DRM_DEBUG("irq=%d\n", irq);
 
 	/* Before installing handler */
 	if (dev->driver->irq_preinstall)
@@ -306,18 +298,11 @@
 	if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
 		sh_flags = IRQF_SHARED;
 
-	if (dev->devname)
-		irqname = dev->devname;
-	else
-		irqname = dev->driver->name;
-
-	ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
-			  sh_flags, irqname, dev);
+	ret = request_irq(irq, dev->driver->irq_handler,
+			  sh_flags, dev->driver->name, dev);
 
 	if (ret < 0) {
-		mutex_lock(&dev->struct_mutex);
 		dev->irq_enabled = false;
-		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
 
@@ -329,12 +314,12 @@
 		ret = dev->driver->irq_postinstall(dev);
 
 	if (ret < 0) {
-		mutex_lock(&dev->struct_mutex);
 		dev->irq_enabled = false;
-		mutex_unlock(&dev->struct_mutex);
 		if (!drm_core_check_feature(dev, DRIVER_MODESET))
 			vga_client_register(dev->pdev, NULL, NULL, NULL);
-		free_irq(drm_dev_to_irq(dev), dev);
+		free_irq(irq, dev);
+	} else {
+		dev->irq = irq;
 	}
 
 	return ret;
@@ -342,11 +327,20 @@
 EXPORT_SYMBOL(drm_irq_install);
 
 /**
- * Uninstall the IRQ handler.
+ * drm_irq_uninstall - uninstall the IRQ handler
+ * @dev: DRM device
  *
- * \param dev DRM device.
+ * Calls the driver's irq_uninstall() function and unregisters the IRQ handler.
+ * This should only be called by drivers which used drm_irq_install() to set up
+ * their interrupt handler. Other drivers must only reset
+ * drm_device->irq_enabled to false.
  *
- * Calls the driver's \c irq_uninstall() function, and stops the irq.
+ * Note that for kernel modesetting drivers it is a bug if this function fails.
+ * The sanity checks are only to catch buggy user modesetting drivers which call
+ * the same function through an ioctl.
+ *
+ * Returns:
+ * Zero on success or a negative error code on failure.
  */
 int drm_irq_uninstall(struct drm_device *dev)
 {
@@ -357,10 +351,8 @@
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
-	mutex_lock(&dev->struct_mutex);
 	irq_enabled = dev->irq_enabled;
 	dev->irq_enabled = false;
-	mutex_unlock(&dev->struct_mutex);
 
 	/*
 	 * Wake up any waiters so they don't hang.
@@ -379,7 +371,7 @@
 	if (!irq_enabled)
 		return -EINVAL;
 
-	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
+	DRM_DEBUG("irq=%d\n", dev->irq);
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
@@ -387,13 +379,13 @@
 	if (dev->driver->irq_uninstall)
 		dev->driver->irq_uninstall(dev);
 
-	free_irq(drm_dev_to_irq(dev), dev);
+	free_irq(dev->irq, dev);
 
 	return 0;
 }
 EXPORT_SYMBOL(drm_irq_uninstall);
 
-/**
+/*
  * IRQ control ioctl.
  *
  * \param inode device inode.
@@ -408,43 +400,52 @@
 		struct drm_file *file_priv)
 {
 	struct drm_control *ctl = data;
+	int ret = 0, irq;
 
 	/* if we haven't irq we fallback for compatibility reasons -
 	 * this used to be a separate function in drm_dma.h
 	 */
 
+	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
+		return 0;
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+	/* UMS was only ever support on pci devices. */
+	if (WARN_ON(!dev->pdev))
+		return -EINVAL;
 
 	switch (ctl->func) {
 	case DRM_INST_HANDLER:
-		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-			return 0;
-		if (drm_core_check_feature(dev, DRIVER_MODESET))
-			return 0;
+		irq = dev->pdev->irq;
+
 		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
-		    ctl->irq != drm_dev_to_irq(dev))
+		    ctl->irq != irq)
 			return -EINVAL;
-		return drm_irq_install(dev);
+		mutex_lock(&dev->struct_mutex);
+		ret = drm_irq_install(dev, irq);
+		mutex_unlock(&dev->struct_mutex);
+
+		return ret;
 	case DRM_UNINST_HANDLER:
-		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-			return 0;
-		if (drm_core_check_feature(dev, DRIVER_MODESET))
-			return 0;
-		return drm_irq_uninstall(dev);
+		mutex_lock(&dev->struct_mutex);
+		ret = drm_irq_uninstall(dev);
+		mutex_unlock(&dev->struct_mutex);
+
+		return ret;
 	default:
 		return -EINVAL;
 	}
 }
 
 /**
- * drm_calc_timestamping_constants - Calculate vblank timestamp constants
- *
- * @crtc drm_crtc whose timestamp constants should be updated.
- * @mode display mode containing the scanout timings
+ * drm_calc_timestamping_constants - calculate vblank timestamp constants
+ * @crtc: drm_crtc whose timestamp constants should be updated.
+ * @mode: display mode containing the scanout timings
  *
  * Calculate and store various constants which are later
  * needed by vblank and swap-completion timestamping, e.g,
  * by drm_calc_vbltimestamp_from_scanoutpos(). They are
- * derived from crtc's true scanout timing, so they take
+ * derived from CRTC's true scanout timing, so they take
  * things like panel scaling or other adjustments into account.
  */
 void drm_calc_timestamping_constants(struct drm_crtc *crtc,
@@ -489,11 +490,22 @@
 EXPORT_SYMBOL(drm_calc_timestamping_constants);
 
 /**
- * drm_calc_vbltimestamp_from_scanoutpos - helper routine for kms
- * drivers. Implements calculation of exact vblank timestamps from
- * given drm_display_mode timings and current video scanout position
- * of a crtc. This can be called from within get_vblank_timestamp()
- * implementation of a kms driver to implement the actual timestamping.
+ * drm_calc_vbltimestamp_from_scanoutpos - precise vblank timestamp helper
+ * @dev: DRM device
+ * @crtc: Which CRTC's vblank timestamp to retrieve
+ * @max_error: Desired maximum allowable error in timestamps (nanosecs)
+ *             On return contains true maximum error of timestamp
+ * @vblank_time: Pointer to struct timeval which should receive the timestamp
+ * @flags: Flags to pass to driver:
+ *         0 = Default,
+ *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl IRQ handler
+ * @refcrtc: CRTC which defines scanout timing
+ * @mode: mode which defines the scanout timings
+ *
+ * Implements calculation of exact vblank timestamps from given drm_display_mode
+ * timings and current video scanout position of a CRTC. This can be called from
+ * within get_vblank_timestamp() implementation of a kms driver to implement the
+ * actual timestamping.
  *
  * Should return timestamps conforming to the OML_sync_control OpenML
  * extension specification. The timestamp corresponds to the end of
@@ -508,21 +520,11 @@
  * returns as no operation if a doublescan or interlaced video mode is
  * active. Higher level code is expected to handle this.
  *
- * @dev: DRM device.
- * @crtc: Which crtc's vblank timestamp to retrieve.
- * @max_error: Desired maximum allowable error in timestamps (nanosecs).
- *             On return contains true maximum error of timestamp.
- * @vblank_time: Pointer to struct timeval which should receive the timestamp.
- * @flags: Flags to pass to driver:
- *         0 = Default.
- *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl irq handler.
- * @refcrtc: drm_crtc* of crtc which defines scanout timing.
- * @mode: mode which defines the scanout timings
- *
- * Returns negative value on error, failure or if not supported in current
+ * Returns:
+ * Negative value on error, failure or if not supported in current
  * video mode:
  *
- * -EINVAL   - Invalid crtc.
+ * -EINVAL   - Invalid CRTC.
  * -EAGAIN   - Temporary unavailable, e.g., called before initial modeset.
  * -ENOTSUPP - Function not supported in current display mode.
  * -EIO      - Failed, e.g., due to failed scanout position query.
@@ -671,23 +673,23 @@
 
 /**
  * drm_get_last_vbltimestamp - retrieve raw timestamp for the most recent
- * vblank interval.
- *
+ * 			       vblank interval
  * @dev: DRM device
- * @crtc: which crtc's vblank timestamp to retrieve
+ * @crtc: which CRTC's vblank timestamp to retrieve
  * @tvblank: Pointer to target struct timeval which should receive the timestamp
  * @flags: Flags to pass to driver:
- *         0 = Default.
- *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl irq handler.
+ *         0 = Default,
+ *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl IRQ handler
  *
  * Fetches the system timestamp corresponding to the time of the most recent
- * vblank interval on specified crtc. May call into kms-driver to
+ * vblank interval on specified CRTC. May call into kms-driver to
  * compute the timestamp with a high-precision GPU specific method.
  *
  * Returns zero if timestamp originates from uncorrected do_gettimeofday()
  * call, i.e., it isn't very precisely locked to the true vblank.
  *
- * Returns non-zero if timestamp is considered to be very precise.
+ * Returns:
+ * Non-zero if timestamp is considered to be very precise, zero otherwise.
  */
 u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 			      struct timeval *tvblank, unsigned flags)
@@ -722,6 +724,9 @@
  * Fetches the "cooked" vblank count value that represents the number of
  * vblank events since the system was booted, including lost events due to
  * modesetting activity.
+ *
+ * Returns:
+ * The software vblank counter.
  */
 u32 drm_vblank_count(struct drm_device *dev, int crtc)
 {
@@ -740,8 +745,7 @@
  * Fetches the "cooked" vblank count value that represents the number of
  * vblank events since the system was booted, including lost events due to
  * modesetting activity. Returns corresponding system timestamp of the time
- * of the vblank interval that corresponds to the current value vblank counter
- * value.
+ * of the vblank interval that corresponds to the current vblank counter value.
  */
 u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 			      struct timeval *vblanktime)
@@ -870,6 +874,42 @@
 }
 
 /**
+ * drm_vblank_enable - enable the vblank interrupt on a CRTC
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ */
+static int drm_vblank_enable(struct drm_device *dev, int crtc)
+{
+	int ret = 0;
+
+	assert_spin_locked(&dev->vbl_lock);
+
+	spin_lock(&dev->vblank_time_lock);
+
+	if (!dev->vblank[crtc].enabled) {
+		/*
+		 * Enable vblank irqs under vblank_time_lock protection.
+		 * All vblank count & timestamp updates are held off
+		 * until we are done reinitializing master counter and
+		 * timestamps. Filtercode in drm_handle_vblank() will
+		 * prevent double-accounting of same vblank interval.
+		 */
+		ret = dev->driver->enable_vblank(dev, crtc);
+		DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret);
+		if (ret)
+			atomic_dec(&dev->vblank[crtc].refcount);
+		else {
+			dev->vblank[crtc].enabled = true;
+			drm_update_vblank_count(dev, crtc);
+		}
+	}
+
+	spin_unlock(&dev->vblank_time_lock);
+
+	return ret;
+}
+
+/**
  * drm_vblank_get - get a reference count on vblank events
  * @dev: DRM device
  * @crtc: which CRTC to own
@@ -877,36 +917,20 @@
  * Acquire a reference count on vblank events to avoid having them disabled
  * while in use.
  *
- * RETURNS
+ * This is the legacy version of drm_crtc_vblank_get().
+ *
+ * Returns:
  * Zero on success, nonzero on failure.
  */
 int drm_vblank_get(struct drm_device *dev, int crtc)
 {
-	unsigned long irqflags, irqflags2;
+	unsigned long irqflags;
 	int ret = 0;
 
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
 	/* Going from 0->1 means we have to enable interrupts again */
 	if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) {
-		spin_lock_irqsave(&dev->vblank_time_lock, irqflags2);
-		if (!dev->vblank[crtc].enabled) {
-			/* Enable vblank irqs under vblank_time_lock protection.
-			 * All vblank count & timestamp updates are held off
-			 * until we are done reinitializing master counter and
-			 * timestamps. Filtercode in drm_handle_vblank() will
-			 * prevent double-accounting of same vblank interval.
-			 */
-			ret = dev->driver->enable_vblank(dev, crtc);
-			DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n",
-				  crtc, ret);
-			if (ret)
-				atomic_dec(&dev->vblank[crtc].refcount);
-			else {
-				dev->vblank[crtc].enabled = true;
-				drm_update_vblank_count(dev, crtc);
-			}
-		}
-		spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags2);
+		ret = drm_vblank_enable(dev, crtc);
 	} else {
 		if (!dev->vblank[crtc].enabled) {
 			atomic_dec(&dev->vblank[crtc].refcount);
@@ -920,12 +944,32 @@
 EXPORT_SYMBOL(drm_vblank_get);
 
 /**
+ * drm_crtc_vblank_get - get a reference count on vblank events
+ * @crtc: which CRTC to own
+ *
+ * Acquire a reference count on vblank events to avoid having them disabled
+ * while in use.
+ *
+ * This is the native kms version of drm_vblank_off().
+ *
+ * Returns:
+ * Zero on success, nonzero on failure.
+ */
+int drm_crtc_vblank_get(struct drm_crtc *crtc)
+{
+	return drm_vblank_get(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_get);
+
+/**
  * drm_vblank_put - give up ownership of vblank events
  * @dev: DRM device
  * @crtc: which counter to give up
  *
  * Release ownership of a given vblank counter, turning off interrupts
  * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the legacy version of drm_crtc_vblank_put().
  */
 void drm_vblank_put(struct drm_device *dev, int crtc)
 {
@@ -934,17 +978,39 @@
 	/* Last user schedules interrupt disable */
 	if (atomic_dec_and_test(&dev->vblank[crtc].refcount) &&
 	    (drm_vblank_offdelay > 0))
-		mod_timer(&dev->vblank_disable_timer,
+		mod_timer(&dev->vblank[crtc].disable_timer,
 			  jiffies + ((drm_vblank_offdelay * HZ)/1000));
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
 /**
+ * drm_crtc_vblank_put - give up ownership of vblank events
+ * @crtc: which counter to give up
+ *
+ * Release ownership of a given vblank counter, turning off interrupts
+ * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the native kms version of drm_vblank_put().
+ */
+void drm_crtc_vblank_put(struct drm_crtc *crtc)
+{
+	drm_vblank_put(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_put);
+
+/**
  * drm_vblank_off - disable vblank events on a CRTC
  * @dev: DRM device
  * @crtc: CRTC in question
  *
- * Caller must hold event lock.
+ * Drivers can use this function to shut down the vblank interrupt handling when
+ * disabling a crtc. This function ensures that the latest vblank frame count is
+ * stored so that drm_vblank_on() can restore it again.
+ *
+ * Drivers must use this function when the hardware vblank counter can get
+ * reset, e.g. when suspending.
+ *
+ * This is the legacy version of drm_crtc_vblank_off().
  */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
@@ -978,12 +1044,87 @@
 EXPORT_SYMBOL(drm_vblank_off);
 
 /**
+ * drm_crtc_vblank_off - disable vblank events on a CRTC
+ * @crtc: CRTC in question
+ *
+ * Drivers can use this function to shut down the vblank interrupt handling when
+ * disabling a crtc. This function ensures that the latest vblank frame count is
+ * stored so that drm_vblank_on can restore it again.
+ *
+ * Drivers must use this function when the hardware vblank counter can get
+ * reset, e.g. when suspending.
+ *
+ * This is the native kms version of drm_vblank_off().
+ */
+void drm_crtc_vblank_off(struct drm_crtc *crtc)
+{
+	drm_vblank_off(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_off);
+
+/**
+ * drm_vblank_on - enable vblank events on a CRTC
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ *
+ * This functions restores the vblank interrupt state captured with
+ * drm_vblank_off() again. Note that calls to drm_vblank_on() and
+ * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
+ * in driver load code to reflect the current hardware state of the crtc.
+ *
+ * This is the legacy version of drm_crtc_vblank_on().
+ */
+void drm_vblank_on(struct drm_device *dev, int crtc)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	/* re-enable interrupts if there's are users left */
+	if (atomic_read(&dev->vblank[crtc].refcount) != 0)
+		WARN_ON(drm_vblank_enable(dev, crtc));
+	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+}
+EXPORT_SYMBOL(drm_vblank_on);
+
+/**
+ * drm_crtc_vblank_on - enable vblank events on a CRTC
+ * @crtc: CRTC in question
+ *
+ * This functions restores the vblank interrupt state captured with
+ * drm_vblank_off() again. Note that calls to drm_vblank_on() and
+ * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
+ * in driver load code to reflect the current hardware state of the crtc.
+ *
+ * This is the native kms version of drm_vblank_on().
+ */
+void drm_crtc_vblank_on(struct drm_crtc *crtc)
+{
+	drm_vblank_on(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_on);
+
+/**
  * drm_vblank_pre_modeset - account for vblanks across mode sets
  * @dev: DRM device
  * @crtc: CRTC in question
  *
  * Account for vblank events across mode setting events, which will likely
  * reset the hardware frame counter.
+ *
+ * This is done by grabbing a temporary vblank reference to ensure that the
+ * vblank interrupt keeps running across the modeset sequence. With this the
+ * software-side vblank frame counting will ensure that there are no jumps or
+ * discontinuities.
+ *
+ * Unfortunately this approach is racy and also doesn't work when the vblank
+ * interrupt stops running, e.g. across system suspend resume. It is therefore
+ * highly recommended that drivers use the newer drm_vblank_off() and
+ * drm_vblank_on() instead. drm_vblank_pre_modeset() only works correctly when
+ * using "cooked" software vblank frame counters and not relying on any hardware
+ * counters.
+ *
+ * Drivers must call drm_vblank_post_modeset() when re-enabling the same crtc
+ * again.
  */
 void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
 {
@@ -1005,6 +1146,14 @@
 }
 EXPORT_SYMBOL(drm_vblank_pre_modeset);
 
+/**
+ * drm_vblank_post_modeset - undo drm_vblank_pre_modeset changes
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ *
+ * This function again drops the temporary vblank reference acquired in
+ * drm_vblank_pre_modeset.
+ */
 void drm_vblank_post_modeset(struct drm_device *dev, int crtc)
 {
 	unsigned long irqflags;
@@ -1026,7 +1175,7 @@
 }
 EXPORT_SYMBOL(drm_vblank_post_modeset);
 
-/**
+/*
  * drm_modeset_ctl - handle vblank event counter changes across mode switch
  * @DRM_IOCTL_ARGS: standard ioctl arguments
  *
@@ -1139,7 +1288,7 @@
 	return ret;
 }
 
-/**
+/*
  * Wait for VBLANK.
  *
  * \param inode device inode.
@@ -1150,7 +1299,7 @@
  *
  * This function enables the vblank interrupt on the pipe requested, then
  * sleeps waiting for the requested sequence number to occur, and drops
- * the vblank interrupt refcount afterwards. (vblank irq disable follows that
+ * the vblank interrupt refcount afterwards. (vblank IRQ disable follows that
  * after a timeout with no further vblank waits scheduled).
  */
 int drm_wait_vblank(struct drm_device *dev, void *data,
@@ -1160,9 +1309,8 @@
 	int ret;
 	unsigned int flags, seq, crtc, high_crtc;
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-		if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled))
-			return -EINVAL;
+	if (!dev->irq_enabled)
+		return -EINVAL;
 
 	if (vblwait->request.type & _DRM_VBLANK_SIGNAL)
 		return -EINVAL;
@@ -1222,6 +1370,7 @@
 	DRM_WAIT_ON(ret, dev->vblank[crtc].queue, 3 * HZ,
 		    (((drm_vblank_count(dev, crtc) -
 		       vblwait->request.sequence) <= (1 << 23)) ||
+		     !dev->vblank[crtc].enabled ||
 		     !dev->irq_enabled));
 
 	if (ret != -EINTR) {

diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 09821f4..e633df2 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c

@@ -282,6 +282,14 @@
 	return drv->remove(dsi);
 }
 
+static void mipi_dsi_drv_shutdown(struct device *dev)
+{
+	struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+
+	drv->shutdown(dsi);
+}
+
 /**
  * mipi_dsi_driver_register - register a driver for DSI devices
  * @drv: DSI driver structure
@@ -293,6 +301,8 @@
 		drv->driver.probe = mipi_dsi_drv_probe;
 	if (drv->remove)
 		drv->driver.remove = mipi_dsi_drv_remove;
+	if (drv->shutdown)
+		drv->driver.shutdown = mipi_dsi_drv_shutdown;
 
 	return driver_register(&drv->driver);
 }

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 8b41057..bedf189 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c

@@ -1013,6 +1013,7 @@
 /**
  * drm_mode_connector_list_update - update the mode list for the connector
  * @connector: the connector to update
+ * @merge_type_bits: whether to merge or overright type bits.
  *
  * This moves the modes from the @connector probed_modes list
  * to the actual mode list. It compares the probed mode against the current
@@ -1021,7 +1022,8 @@
  * This is just a helper functions doesn't validate any modes itself and also
  * doesn't prune any invalid modes. Callers need to do that themselves.
  */
-void drm_mode_connector_list_update(struct drm_connector *connector)
+void drm_mode_connector_list_update(struct drm_connector *connector,
+				    bool merge_type_bits)
 {
 	struct drm_display_mode *mode;
 	struct drm_display_mode *pmode, *pt;
@@ -1039,7 +1041,10 @@
 				/* if equal delete the probed mode */
 				mode->status = pmode->status;
 				/* Merge type bits together */
-				mode->type |= pmode->type;
+				if (merge_type_bits)
+					mode->type |= pmode->type;
+				else
+					mode->type = pmode->type;
 				list_del(&pmode->head);
 				drm_mode_destroy(connector->dev, pmode);
 				break;

diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
new file mode 100644
index 0000000..7c2497d
--- /dev/null
+++ b/drivers/gpu/drm/drm_modeset_lock.c

@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_modeset_lock.h>
+
+/**
+ * DOC: kms locking
+ *
+ * As KMS moves toward more fine grained locking, and atomic ioctl where
+ * userspace can indirectly control locking order, it becomes necessary
+ * to use ww_mutex and acquire-contexts to avoid deadlocks.  But because
+ * the locking is more distributed around the driver code, we want a bit
+ * of extra utility/tracking out of our acquire-ctx.  This is provided
+ * by drm_modeset_lock / drm_modeset_acquire_ctx.
+ *
+ * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt
+ *
+ * The basic usage pattern is to:
+ *
+ *     drm_modeset_acquire_init(&ctx)
+ *   retry:
+ *     foreach (lock in random_ordered_set_of_locks) {
+ *       ret = drm_modeset_lock(lock, &ctx)
+ *       if (ret == -EDEADLK) {
+ *          drm_modeset_backoff(&ctx);
+ *          goto retry;
+ *       }
+ *     }
+ *
+ *     ... do stuff ...
+ *
+ *     drm_modeset_drop_locks(&ctx);
+ *     drm_modeset_acquire_fini(&ctx);
+ */
+
+
+/**
+ * drm_modeset_acquire_init - initialize acquire context
+ * @ctx: the acquire context
+ * @flags: for future
+ */
+void drm_modeset_acquire_init(struct drm_modeset_acquire_ctx *ctx,
+		uint32_t flags)
+{
+	ww_acquire_init(&ctx->ww_ctx, &crtc_ww_class);
+	INIT_LIST_HEAD(&ctx->locked);
+}
+EXPORT_SYMBOL(drm_modeset_acquire_init);
+
+/**
+ * drm_modeset_acquire_fini - cleanup acquire context
+ * @ctx: the acquire context
+ */
+void drm_modeset_acquire_fini(struct drm_modeset_acquire_ctx *ctx)
+{
+	ww_acquire_fini(&ctx->ww_ctx);
+}
+EXPORT_SYMBOL(drm_modeset_acquire_fini);
+
+/**
+ * drm_modeset_drop_locks - drop all locks
+ * @ctx: the acquire context
+ *
+ * Drop all locks currently held against this acquire context.
+ */
+void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx)
+{
+	WARN_ON(ctx->contended);
+	while (!list_empty(&ctx->locked)) {
+		struct drm_modeset_lock *lock;
+
+		lock = list_first_entry(&ctx->locked,
+				struct drm_modeset_lock, head);
+
+		drm_modeset_unlock(lock);
+	}
+}
+EXPORT_SYMBOL(drm_modeset_drop_locks);
+
+static inline int modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx,
+		bool interruptible, bool slow)
+{
+	int ret;
+
+	WARN_ON(ctx->contended);
+
+	if (interruptible && slow) {
+		ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx);
+	} else if (interruptible) {
+		ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx);
+	} else if (slow) {
+		ww_mutex_lock_slow(&lock->mutex, &ctx->ww_ctx);
+		ret = 0;
+	} else {
+		ret = ww_mutex_lock(&lock->mutex, &ctx->ww_ctx);
+	}
+	if (!ret) {
+		WARN_ON(!list_empty(&lock->head));
+		list_add(&lock->head, &ctx->locked);
+	} else if (ret == -EALREADY) {
+		/* we already hold the lock.. this is fine.  For atomic
+		 * we will need to be able to drm_modeset_lock() things
+		 * without having to keep track of what is already locked
+		 * or not.
+		 */
+		ret = 0;
+	} else if (ret == -EDEADLK) {
+		ctx->contended = lock;
+	}
+
+	return ret;
+}
+
+static int modeset_backoff(struct drm_modeset_acquire_ctx *ctx,
+		bool interruptible)
+{
+	struct drm_modeset_lock *contended = ctx->contended;
+
+	ctx->contended = NULL;
+
+	if (WARN_ON(!contended))
+		return 0;
+
+	drm_modeset_drop_locks(ctx);
+
+	return modeset_lock(contended, ctx, interruptible, true);
+}
+
+/**
+ * drm_modeset_backoff - deadlock avoidance backoff
+ * @ctx: the acquire context
+ *
+ * If deadlock is detected (ie. drm_modeset_lock() returns -EDEADLK),
+ * you must call this function to drop all currently held locks and
+ * block until the contended lock becomes available.
+ */
+void drm_modeset_backoff(struct drm_modeset_acquire_ctx *ctx)
+{
+	modeset_backoff(ctx, false);
+}
+EXPORT_SYMBOL(drm_modeset_backoff);
+
+/**
+ * drm_modeset_backoff_interruptible - deadlock avoidance backoff
+ * @ctx: the acquire context
+ *
+ * Interruptible version of drm_modeset_backoff()
+ */
+int drm_modeset_backoff_interruptible(struct drm_modeset_acquire_ctx *ctx)
+{
+	return modeset_backoff(ctx, true);
+}
+EXPORT_SYMBOL(drm_modeset_backoff_interruptible);
+
+/**
+ * drm_modeset_lock - take modeset lock
+ * @lock: lock to take
+ * @ctx: acquire ctx
+ *
+ * If ctx is not NULL, then its ww acquire context is used and the
+ * lock will be tracked by the context and can be released by calling
+ * drm_modeset_drop_locks().  If -EDEADLK is returned, this means a
+ * deadlock scenario has been detected and it is an error to attempt
+ * to take any more locks without first calling drm_modeset_backoff().
+ */
+int drm_modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	if (ctx)
+		return modeset_lock(lock, ctx, false, false);
+
+	ww_mutex_lock(&lock->mutex, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(drm_modeset_lock);
+
+/**
+ * drm_modeset_lock_interruptible - take modeset lock
+ * @lock: lock to take
+ * @ctx: acquire ctx
+ *
+ * Interruptible version of drm_modeset_lock()
+ */
+int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	if (ctx)
+		return modeset_lock(lock, ctx, true, false);
+
+	return ww_mutex_lock_interruptible(&lock->mutex, NULL);
+}
+EXPORT_SYMBOL(drm_modeset_lock_interruptible);
+
+/**
+ * drm_modeset_unlock - drop modeset lock
+ * @lock: lock to release
+ */
+void drm_modeset_unlock(struct drm_modeset_lock *lock)
+{
+	list_del_init(&lock->head);
+	ww_mutex_unlock(&lock->mutex);
+}
+EXPORT_SYMBOL(drm_modeset_unlock);
+
+/* Temporary.. until we have sufficiently fine grained locking, there
+ * are a couple scenarios where it is convenient to grab all crtc locks.
+ * It is planned to remove this:
+ */
+int drm_modeset_lock_all_crtcs(struct drm_device *dev,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	list_for_each_entry(crtc, &config->crtc_list, head) {
+		ret = drm_modeset_lock(&crtc->mutex, ctx);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_modeset_lock_all_crtcs);

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 9c696a5..020cfd9 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c

@@ -1,17 +1,3 @@
-/* drm_pci.h -- PCI DMA memory management wrappers for DRM -*- linux-c -*- */
-/**
- * \file drm_pci.c
- * \brief Functions and ioctls to manage PCI memory
- *
- * \warning These interfaces aren't stable yet.
- *
- * \todo Implement the remaining ioctl's for the PCI pools.
- * \todo The wrappers here are so thin that they would be better off inlined..
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- * \author Leif Delgass <ldelgass@retinalburn.net>
- */
-
 /*
  * Copyright 2003 José Fonseca.
  * Copyright 2003 Leif Delgass.
@@ -42,12 +28,14 @@
 #include <linux/export.h>
 #include <drm/drmP.h>
 
-/**********************************************************************/
-/** \name PCI memory */
-/*@{*/
-
 /**
- * \brief Allocate a PCI consistent memory block, for DMA.
+ * drm_pci_alloc - Allocate a PCI consistent memory block, for DMA.
+ * @dev: DRM device
+ * @size: size of block to allocate
+ * @align: alignment of block
+ *
+ * Return: A handle to the allocated memory block on success or NULL on
+ * failure.
  */
 drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
 {
@@ -88,8 +76,8 @@
 
 EXPORT_SYMBOL(drm_pci_alloc);
 
-/**
- * \brief Free a PCI consistent memory block without freeing its descriptor.
+/*
+ * Free a PCI consistent memory block without freeing its descriptor.
  *
  * This function is for internal use in the Linux-specific DRM core code.
  */
@@ -111,7 +99,9 @@
 }
 
 /**
- * \brief Free a PCI consistent memory block
+ * drm_pci_free - Free a PCI consistent memory block
+ * @dev: DRM device
+ * @dmah: handle to memory block
  */
 void drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 {
@@ -137,21 +127,9 @@
 	return pci_domain_nr(dev->pdev->bus);
 }
 
-static int drm_pci_get_irq(struct drm_device *dev)
-{
-	return dev->pdev->irq;
-}
-
-static const char *drm_pci_get_name(struct drm_device *dev)
-{
-	struct pci_driver *pdriver = dev->driver->kdriver.pci;
-	return pdriver->name;
-}
-
 static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 {
 	int len, ret;
-	struct pci_driver *pdriver = dev->driver->kdriver.pci;
 	master->unique_len = 40;
 	master->unique_size = master->unique_len;
 	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
@@ -173,29 +151,16 @@
 	} else
 		master->unique_len = len;
 
-	dev->devname =
-		kmalloc(strlen(pdriver->name) +
-			master->unique_len + 2, GFP_KERNEL);
-
-	if (dev->devname == NULL) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", pdriver->name,
-		master->unique);
-
 	return 0;
 err:
 	return ret;
 }
 
-static int drm_pci_set_unique(struct drm_device *dev,
-			      struct drm_master *master,
-			      struct drm_unique *u)
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u)
 {
 	int domain, bus, slot, func, ret;
-	const char *bus_name;
 
 	master->unique_len = u->unique_len;
 	master->unique_size = u->unique_len + 1;
@@ -212,17 +177,6 @@
 
 	master->unique[master->unique_len] = '\0';
 
-	bus_name = dev->driver->bus->get_name(dev);
-	dev->devname = kmalloc(strlen(bus_name) +
-			       strlen(master->unique) + 2, GFP_KERNEL);
-	if (!dev->devname) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", bus_name,
-		master->unique);
-
 	/* Return error if the busid submitted doesn't match the device's actual
 	 * busid.
 	 */
@@ -247,7 +201,6 @@
 	return ret;
 }
 
-
 static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p)
 {
 	if ((p->busnum >> 8) != drm_get_pci_domain(dev) ||
@@ -262,6 +215,36 @@
 	return 0;
 }
 
+/**
+ * drm_irq_by_busid - Get interrupt from bus ID
+ * @dev: DRM device
+ * @data: IOCTL parameter pointing to a drm_irq_busid structure
+ * @file_priv: DRM file private.
+ *
+ * Finds the PCI device with the specified bus id and gets its IRQ number.
+ * This IOCTL is deprecated, and will now return EINVAL for any busid not equal
+ * to that of the device that this DRM instance attached to.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_irq_busid *p = data;
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
+	/* UMS was only ever support on PCI devices. */
+	if (WARN_ON(!dev->pdev))
+		return -EINVAL;
+
+	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
+		return -EINVAL;
+
+	return drm_pci_irq_by_busid(dev, p);
+}
+
 static void drm_pci_agp_init(struct drm_device *dev)
 {
 	if (drm_core_check_feature(dev, DRIVER_USE_AGP)) {
@@ -287,24 +270,20 @@
 }
 
 static struct drm_bus drm_pci_bus = {
-	.bus_type = DRIVER_BUS_PCI,
-	.get_irq = drm_pci_get_irq,
-	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
-	.set_unique = drm_pci_set_unique,
-	.irq_by_busid = drm_pci_irq_by_busid,
 };
 
 /**
- * Register.
- *
- * \param pdev - PCI device structure
- * \param ent entry from the PCI ID table with device type flags
- * \return zero on success or a negative number on failure.
+ * drm_get_pci_dev - Register a PCI device with the DRM subsystem
+ * @pdev: PCI device
+ * @ent: entry from the PCI ID table that matches @pdev
+ * @driver: DRM device driver
  *
  * Attempt to gets inter module "drm" information. If we are first
  * then register the character device and inter module information.
  * Try and register, if we fail to register, backout previous work.
+ *
+ * Return: 0 on success or a negative error code on failure.
  */
 int drm_get_pci_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 		    struct drm_driver *driver)
@@ -357,15 +336,14 @@
 EXPORT_SYMBOL(drm_get_pci_dev);
 
 /**
- * PCI device initialization. Called direct from modules at load time.
+ * drm_pci_init - Register matching PCI devices with the DRM subsystem
+ * @driver: DRM device driver
+ * @pdriver: PCI device driver
  *
- * \return zero on success or a negative number on failure.
+ * Initializes a drm_device structures, registering the stubs and initializing
+ * the AGP device.
  *
- * Initializes a drm_device structures,registering the
- * stubs and initializing the AGP device.
- *
- * Expands the \c DRIVER_PREINIT and \c DRIVER_POST_INIT macros before and
- * after the initialization for driver customization.
+ * Return: 0 on success or a negative error code on failure.
  */
 int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 {
@@ -375,7 +353,6 @@
 
 	DRM_DEBUG("\n");
 
-	driver->kdriver.pci = pdriver;
 	driver->bus = &drm_pci_bus;
 
 	if (driver->driver_features & DRIVER_MODESET)
@@ -453,11 +430,31 @@
 }
 
 void drm_pci_agp_destroy(struct drm_device *dev) {}
+
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	return -EINVAL;
+}
+
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u)
+{
+	return -EINVAL;
+}
 #endif
 
 EXPORT_SYMBOL(drm_pci_init);
 
-/*@}*/
+/**
+ * drm_pci_exit - Unregister matching PCI devices from the DRM subsystem
+ * @driver: DRM device driver
+ * @pdriver: PCI device driver
+ *
+ * Unregisters one or more devices matched by a PCI driver from the DRM
+ * subsystem.
+ */
 void drm_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver)
 {
 	struct drm_device *dev, *tmp;

diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index d2b1c03..6d13314 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c

@@ -25,7 +25,9 @@
 
 #include <linux/list.h>
 #include <drm/drmP.h>
+#include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
+#include <drm/drm_plane_helper.h>
 
 #define SUBPIXEL_MASK 0xffff
 
@@ -36,9 +38,9 @@
  * creating the primary plane.  However drivers that still call
  * drm_plane_init() will use this minimal format list as the default.
  */
-const static uint32_t safe_modeset_formats[] = {
-       DRM_FORMAT_XRGB8888,
-       DRM_FORMAT_ARGB8888,
+static const uint32_t safe_modeset_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
 };
 
 /*
@@ -54,6 +56,13 @@
 	struct drm_connector *connector;
 	int count = 0;
 
+	/*
+	 * Note: Once we change the plane hooks to more fine-grained locking we
+	 * need to grab the connection_mutex here to be able to make these
+	 * checks.
+	 */
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder && connector->encoder->crtc == crtc) {
 			if (connector_list != NULL && count < num_connectors)
@@ -66,6 +75,79 @@
 }
 
 /**
+ * drm_plane_helper_check_update() - Check plane update for validity
+ * @plane: plane object to update
+ * @crtc: owning CRTC of owning plane
+ * @fb: framebuffer to flip onto plane
+ * @src: source coordinates in 16.16 fixed point
+ * @dest: integer destination coordinates
+ * @clip: integer clipping coordinates
+ * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point
+ * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point
+ * @can_position: is it legal to position the plane such that it
+ *                doesn't cover the entire crtc?  This will generally
+ *                only be false for primary planes.
+ * @can_update_disabled: can the plane be updated while the crtc
+ *                       is disabled?
+ * @visible: output parameter indicating whether plane is still visible after
+ *           clipping
+ *
+ * Checks that a desired plane update is valid.  Drivers that provide
+ * their own plane handling rather than helper-provided implementations may
+ * still wish to call this function to avoid duplication of error checking
+ * code.
+ *
+ * RETURNS:
+ * Zero if update appears valid, error code on failure
+ */
+int drm_plane_helper_check_update(struct drm_plane *plane,
+				    struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb,
+				    struct drm_rect *src,
+				    struct drm_rect *dest,
+				    const struct drm_rect *clip,
+				    int min_scale,
+				    int max_scale,
+				    bool can_position,
+				    bool can_update_disabled,
+				    bool *visible)
+{
+	int hscale, vscale;
+
+	if (!crtc->enabled && !can_update_disabled) {
+		DRM_DEBUG_KMS("Cannot update plane of a disabled CRTC.\n");
+		return -EINVAL;
+	}
+
+	/* Check scaling */
+	hscale = drm_rect_calc_hscale(src, dest, min_scale, max_scale);
+	vscale = drm_rect_calc_vscale(src, dest, min_scale, max_scale);
+	if (hscale < 0 || vscale < 0) {
+		DRM_DEBUG_KMS("Invalid scaling of plane\n");
+		return -ERANGE;
+	}
+
+	*visible = drm_rect_clip_scaled(src, dest, clip, hscale, vscale);
+	if (!*visible)
+		/*
+		 * Plane isn't visible; some drivers can handle this
+		 * so we just return success here.  Drivers that can't
+		 * (including those that use the primary plane helper's
+		 * update function) will return an error from their
+		 * update_plane handler.
+		 */
+		return 0;
+
+	if (!can_position && !drm_rect_equals(dest, clip)) {
+		DRM_DEBUG_KMS("Plane must cover entire CRTC\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_plane_helper_check_update);
+
+/**
  * drm_primary_helper_update() - Helper for primary plane update
  * @plane: plane object to update
  * @crtc: owning CRTC of owning plane
@@ -113,56 +195,42 @@
 		.x = src_x >> 16,
 		.y = src_y >> 16,
 	};
+	struct drm_rect src = {
+		.x1 = src_x,
+		.y1 = src_y,
+		.x2 = src_x + src_w,
+		.y2 = src_y + src_h,
+	};
 	struct drm_rect dest = {
 		.x1 = crtc_x,
 		.y1 = crtc_y,
 		.x2 = crtc_x + crtc_w,
 		.y2 = crtc_y + crtc_h,
 	};
-	struct drm_rect clip = {
+	const struct drm_rect clip = {
 		.x2 = crtc->mode.hdisplay,
 		.y2 = crtc->mode.vdisplay,
 	};
 	struct drm_connector **connector_list;
-	struct drm_framebuffer *tmpfb;
 	int num_connectors, ret;
+	bool visible;
 
-	if (!crtc->enabled) {
-		DRM_DEBUG_KMS("Cannot update primary plane of a disabled CRTC.\n");
-		return -EINVAL;
-	}
-
-	/* Disallow subpixel positioning */
-	if ((src_x | src_y | src_w | src_h) & SUBPIXEL_MASK) {
-		DRM_DEBUG_KMS("Primary plane does not support subpixel positioning\n");
-		return -EINVAL;
-	}
-
-	/* Primary planes are locked to their owning CRTC */
-	if (plane->possible_crtcs != drm_crtc_mask(crtc)) {
-		DRM_DEBUG_KMS("Cannot change primary plane CRTC\n");
-		return -EINVAL;
-	}
-
-	/* Disallow scaling */
-	if (crtc_w != src_w || crtc_h != src_h) {
-		DRM_DEBUG_KMS("Can't scale primary plane\n");
-		return -EINVAL;
-	}
-
-	/* Make sure primary plane covers entire CRTC */
-	drm_rect_intersect(&dest, &clip);
-	if (dest.x1 != 0 || dest.y1 != 0 ||
-	    dest.x2 != crtc->mode.hdisplay || dest.y2 != crtc->mode.vdisplay) {
-		DRM_DEBUG_KMS("Primary plane must cover entire CRTC\n");
-		return -EINVAL;
-	}
-
-	/* Framebuffer must be big enough to cover entire plane */
-	ret = drm_crtc_check_viewport(crtc, crtc_x, crtc_y, &crtc->mode, fb);
+	ret = drm_plane_helper_check_update(plane, crtc, fb,
+					    &src, &dest, &clip,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    false, false, &visible);
 	if (ret)
 		return ret;
 
+	if (!visible)
+		/*
+		 * Primary plane isn't visible.  Note that unless a driver
+		 * provides their own disable function, this will just
+		 * wind up returning -EINVAL to userspace.
+		 */
+		return plane->funcs->disable_plane(plane);
+
 	/* Find current connectors for CRTC */
 	num_connectors = get_connectors_for_crtc(crtc, NULL, 0);
 	BUG_ON(num_connectors == 0);
@@ -176,21 +244,14 @@
 	set.num_connectors = num_connectors;
 
 	/*
-	 * set_config() adjusts crtc->primary->fb; however the DRM setplane
-	 * code that called us expects to handle the framebuffer update and
-	 * reference counting; save and restore the current fb before
-	 * calling it.
-	 *
-	 * N.B., we call set_config() directly here rather than using
+	 * We call set_config() directly here rather than using
 	 * drm_mode_set_config_internal.  We're reprogramming the same
 	 * connectors that were already in use, so we shouldn't need the extra
 	 * cross-CRTC fb refcounting to accomodate stealing connectors.
 	 * drm_mode_setplane() already handles the basic refcounting for the
 	 * framebuffers involved in this operation.
 	 */
-	tmpfb = plane->fb;
 	ret = crtc->funcs->set_config(&set);
-	plane->fb = tmpfb;
 
 	kfree(connector_list);
 	return ret;
@@ -232,7 +293,6 @@
  */
 void drm_primary_helper_destroy(struct drm_plane *plane)
 {
-	plane->funcs->disable_plane(plane);
 	drm_plane_cleanup(plane);
 	kfree(plane);
 }

diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index 319ff53..d5b76f1 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c

@@ -68,16 +68,6 @@
 	return ret;
 }
 
-static int drm_platform_get_irq(struct drm_device *dev)
-{
-	return platform_get_irq(dev->platformdev, 0);
-}
-
-static const char *drm_platform_get_name(struct drm_device *dev)
-{
-	return dev->platformdev->name;
-}
-
 static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *master)
 {
 	int len, ret, id;
@@ -106,46 +96,30 @@
 		goto err;
 	}
 
-	dev->devname =
-		kmalloc(strlen(dev->platformdev->name) +
-			master->unique_len + 2, GFP_KERNEL);
-
-	if (dev->devname == NULL) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", dev->platformdev->name,
-		master->unique);
 	return 0;
 err:
 	return ret;
 }
 
 static struct drm_bus drm_platform_bus = {
-	.bus_type = DRIVER_BUS_PLATFORM,
-	.get_irq = drm_platform_get_irq,
-	.get_name = drm_platform_get_name,
 	.set_busid = drm_platform_set_busid,
 };
 
 /**
- * Platform device initialization. Called direct from modules.
+ * drm_platform_init - Register a platform device with the DRM subsystem
+ * @driver: DRM device driver
+ * @platform_device: platform device to register
  *
- * \return zero on success or a negative number on failure.
+ * Registers the specified DRM device driver and platform device with the DRM
+ * subsystem, initializing a drm_device structure and calling the driver's
+ * .load() function.
  *
- * Initializes a drm_device structures,registering the
- * stubs
- *
- * Expands the \c DRIVER_PREINIT and \c DRIVER_POST_INIT macros before and
- * after the initialization for driver customization.
+ * Return: 0 on success or a negative error code on failure.
  */
-
 int drm_platform_init(struct drm_driver *driver, struct platform_device *platform_device)
 {
 	DRM_DEBUG("\n");
 
-	driver->kdriver.platform_device = platform_device;
 	driver->bus = &drm_platform_bus;
 	return drm_get_platform_dev(platform_device, driver);
 }

diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index e70f54d..d22676b 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c

@@ -82,26 +82,8 @@
 	return;
 }
 
-/**
- * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * Based on the helper callbacks implemented by @connector try to detect all
- * valid modes.  Modes will first be added to the connector's probed_modes list,
- * then culled (based on validity and the @maxX, @maxY parameters) and put into
- * the normal modes list.
- *
- * Intended to be use as a generic implementation of the ->fill_modes()
- * @connector vfunc for drivers that use the crtc helpers for output mode
- * filtering and detection.
- *
- * Returns:
- * The number of modes found on @connector.
- */
-int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
+static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector,
+							      uint32_t maxX, uint32_t maxY, bool merge_type_bits)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *mode;
@@ -114,7 +96,7 @@
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
-			drm_get_connector_name(connector));
+			connector->name);
 	/* set all modes to the unverified state */
 	list_for_each_entry(mode, &connector->modes, head)
 		mode->status = MODE_UNVERIFIED;
@@ -138,7 +120,7 @@
 
 	if (connector->status == connector_status_disconnected) {
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n",
-			connector->base.id, drm_get_connector_name(connector));
+			connector->base.id, connector->name);
 		drm_mode_connector_update_edid_property(connector, NULL);
 		verbose_prune = false;
 		goto prune;
@@ -155,7 +137,7 @@
 	if (count == 0)
 		goto prune;
 
-	drm_mode_connector_list_update(connector);
+	drm_mode_connector_list_update(connector, merge_type_bits);
 
 	if (maxX && maxY)
 		drm_mode_validate_size(dev, &connector->modes, maxX, maxY);
@@ -169,7 +151,7 @@
 	drm_mode_validate_flag(connector, mode_flags);
 
 	list_for_each_entry(mode, &connector->modes, head) {
-		if (mode->status == MODE_OK)
+		if (mode->status == MODE_OK && connector_funcs->mode_valid)
 			mode->status = connector_funcs->mode_valid(connector,
 								   mode);
 	}
@@ -186,7 +168,7 @@
 	drm_mode_sort(&connector->modes);
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed modes :\n", connector->base.id,
-			drm_get_connector_name(connector));
+			connector->name);
 	list_for_each_entry(mode, &connector->modes, head) {
 		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
 		drm_mode_debug_printmodeline(mode);
@@ -194,9 +176,49 @@
 
 	return count;
 }
+
+/**
+ * drm_helper_probe_single_connector_modes - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * Based on the helper callbacks implemented by @connector try to detect all
+ * valid modes.  Modes will first be added to the connector's probed_modes list,
+ * then culled (based on validity and the @maxX, @maxY parameters) and put into
+ * the normal modes list.
+ *
+ * Intended to be use as a generic implementation of the ->fill_modes()
+ * @connector vfunc for drivers that use the crtc helpers for output mode
+ * filtering and detection.
+ *
+ * Returns:
+ * The number of modes found on @connector.
+ */
+int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
+{
+	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, true);
+}
 EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
 
 /**
+ * drm_helper_probe_single_connector_modes_nomerge - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * This operates like drm_hehlper_probe_single_connector_modes except it
+ * replaces the mode bits instead of merging them for preferred modes.
+ */
+int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
+{
+	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, false);
+}
+EXPORT_SYMBOL(drm_helper_probe_single_connector_modes_nomerge);
+
+/**
  * drm_kms_helper_hotplug_event - fire off KMS hotplug events
  * @dev: drm_device whose connector state changed
  *
@@ -264,7 +286,7 @@
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] "
 				      "status updated from %s to %s\n",
 				      connector->base.id,
-				      drm_get_connector_name(connector),
+				      connector->name,
 				      old, new);
 
 			changed = true;
@@ -409,7 +431,7 @@
 		connector->status = connector->funcs->detect(connector, false);
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
 			      connector->base.id,
-			      drm_get_connector_name(connector),
+			      connector->name,
 			      drm_get_connector_status_name(old_status),
 			      drm_get_connector_status_name(connector->status));
 		if (old_status != connector->status)

diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 4c24c3a..14d1646 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c

@@ -1,16 +1,11 @@
-/**
- * \file drm_stub.h
- * Stub support
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- */
-
 /*
  * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org
  *
  * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California.
  * All Rights Reserved.
  *
+ * Author Rickard E. (Rik) Faith <faith@valinux.com>
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
@@ -128,7 +123,10 @@
 	kref_init(&master->refcount);
 	spin_lock_init(&master->lock.spinlock);
 	init_waitqueue_head(&master->lock.lock_queue);
-	drm_ht_create(&master->magiclist, DRM_MAGIC_HASH_ORDER);
+	if (drm_ht_create(&master->magiclist, DRM_MAGIC_HASH_ORDER)) {
+		kfree(master);
+		return NULL;
+	}
 	INIT_LIST_HEAD(&master->magicfree);
 	master->minor = minor;
 
@@ -166,9 +164,6 @@
 		master->unique_len = 0;
 	}
 
-	kfree(dev->devname);
-	dev->devname = NULL;
-
 	list_for_each_entry_safe(pt, next, &master->magicfree, head) {
 		list_del(&pt->head);
 		drm_ht_remove_item(&master->magiclist, &pt->hash_item);
@@ -294,6 +289,7 @@
 
 	slot = drm_minor_get_slot(dev, type);
 	if (*slot) {
+		drm_mode_group_destroy(&(*slot)->mode_group);
 		kfree(*slot);
 		*slot = NULL;
 	}
@@ -424,11 +420,15 @@
 }
 
 /**
- * Called via drm_exit() at module unload time or when pci device is
- * unplugged.
+ * drm_put_dev - Unregister and release a DRM device
+ * @dev: DRM device
+ *
+ * Called at module unload time or when a PCI device is unplugged.
+ *
+ * Use of this function is discouraged. It will eventually go away completely.
+ * Please use drm_dev_unregister() and drm_dev_unref() explicitly instead.
  *
  * Cleans up all DRM device, calling drm_lastclose().
- *
  */
 void drm_put_dev(struct drm_device *dev)
 {
@@ -535,7 +535,7 @@
 }
 
 /**
- * drm_dev_alloc - Allocate new drm device
+ * drm_dev_alloc - Allocate new DRM device
  * @driver: DRM driver to allocate device for
  * @parent: Parent device object
  *
@@ -569,7 +569,7 @@
 	INIT_LIST_HEAD(&dev->maplist);
 	INIT_LIST_HEAD(&dev->vblank_event_list);
 
-	spin_lock_init(&dev->count_lock);
+	spin_lock_init(&dev->buf_lock);
 	spin_lock_init(&dev->event_lock);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);
@@ -648,9 +648,8 @@
 	drm_minor_free(dev, DRM_MINOR_RENDER);
 	drm_minor_free(dev, DRM_MINOR_CONTROL);
 
-	kfree(dev->devname);
-
 	mutex_destroy(&dev->master_mutex);
+	kfree(dev->unique);
 	kfree(dev);
 }
 
@@ -690,6 +689,7 @@
 /**
  * drm_dev_register - Register DRM device
  * @dev: Device to register
+ * @flags: Flags passed to the driver's .load() function
  *
  * Register the DRM device @dev with the system, advertise device to user-space
  * and start normal device operation. @dev must be allocated via drm_dev_alloc()
@@ -778,3 +778,28 @@
 	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 }
 EXPORT_SYMBOL(drm_dev_unregister);
+
+/**
+ * drm_dev_set_unique - Set the unique name of a DRM device
+ * @dev: device of which to set the unique name
+ * @fmt: format string for unique name
+ *
+ * Sets the unique name of a DRM device using the specified format string and
+ * a variable list of arguments. Drivers can use this at driver probe time if
+ * the unique name of the devices they drive is static.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...)
+{
+	va_list ap;
+
+	kfree(dev->unique);
+
+	va_start(ap, fmt);
+	dev->unique = kvasprintf(GFP_KERNEL, fmt, ap);
+	va_end(ap);
+
+	return dev->unique ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(drm_dev_set_unique);

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index c22c309..369b262 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c

@@ -380,9 +380,9 @@
 
 	connector->kdev = device_create(drm_class, dev->primary->kdev,
 					0, connector, "card%d-%s",
-					dev->primary->index, drm_get_connector_name(connector));
+					dev->primary->index, connector->name);
 	DRM_DEBUG("adding \"%s\" to sysfs\n",
-		  drm_get_connector_name(connector));
+		  connector->name);
 
 	if (IS_ERR(connector->kdev)) {
 		DRM_ERROR("failed to register connector device: %ld\n", PTR_ERR(connector->kdev));
@@ -460,7 +460,7 @@
 	if (!connector->kdev)
 		return;
 	DRM_DEBUG("removing \"%s\" from sysfs\n",
-		  drm_get_connector_name(connector));
+		  connector->name);
 
 	for (i = 0; i < ARRAY_SIZE(connector_attrs); i++)
 		device_remove_file(connector->kdev, &connector_attrs[i]);

diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index c3406aa..f2fe94a 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c

@@ -36,16 +36,6 @@
 }
 EXPORT_SYMBOL(drm_get_usb_dev);
 
-static int drm_usb_get_irq(struct drm_device *dev)
-{
-	return 0;
-}
-
-static const char *drm_usb_get_name(struct drm_device *dev)
-{
-	return "USB";
-}
-
 static int drm_usb_set_busid(struct drm_device *dev,
 			       struct drm_master *master)
 {
@@ -53,18 +43,24 @@
 }
 
 static struct drm_bus drm_usb_bus = {
-	.bus_type = DRIVER_BUS_USB,
-	.get_irq = drm_usb_get_irq,
-	.get_name = drm_usb_get_name,
 	.set_busid = drm_usb_set_busid,
 };
-    
+
+/**
+ * drm_usb_init - Register matching USB devices with the DRM subsystem
+ * @driver: DRM device driver
+ * @udriver: USB device driver
+ *
+ * Registers one or more devices matched by a USB driver with the DRM
+ * subsystem.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
 int drm_usb_init(struct drm_driver *driver, struct usb_driver *udriver)
 {
 	int res;
 	DRM_DEBUG("\n");
 
-	driver->kdriver.usb = udriver;
 	driver->bus = &drm_usb_bus;
 
 	res = usb_register(udriver);
@@ -72,6 +68,14 @@
 }
 EXPORT_SYMBOL(drm_usb_init);
 
+/**
+ * drm_usb_exit - Unregister matching USB devices from the DRM subsystem
+ * @driver: DRM device driver
+ * @udriver: USB device driver
+ *
+ * Unregisters one or more devices matched by a USB driver from the DRM
+ * subsystem.
+ */
 void drm_usb_exit(struct drm_driver *driver,
 		  struct usb_driver *udriver)
 {

diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 5bf5bca..178d2a9 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig

@@ -26,14 +26,14 @@
 
 config DRM_EXYNOS_FIMD
 	bool "Exynos DRM FIMD"
-	depends on DRM_EXYNOS && !FB_S3C && !ARCH_MULTIPLATFORM
+	depends on DRM_EXYNOS && !FB_S3C
 	select FB_MODE_HELPERS
 	help
 	  Choose this option if you want to use Exynos FIMD for DRM.
 
 config DRM_EXYNOS_DPI
 	bool "EXYNOS DRM parallel output support"
-	depends on DRM_EXYNOS
+	depends on DRM_EXYNOS_FIMD
 	select DRM_PANEL
 	default n
 	help
@@ -41,7 +41,7 @@
 
 config DRM_EXYNOS_DSI
 	bool "EXYNOS DRM MIPI-DSI driver support"
-	depends on DRM_EXYNOS
+	depends on DRM_EXYNOS_FIMD
 	select DRM_MIPI_DSI
 	select DRM_PANEL
 	default n
@@ -50,7 +50,7 @@
 
 config DRM_EXYNOS_DP
 	bool "EXYNOS DRM DP driver support"
-	depends on DRM_EXYNOS && ARCH_EXYNOS
+	depends on DRM_EXYNOS_FIMD && ARCH_EXYNOS && (DRM_PTN3460=n || DRM_PTN3460=y || DRM_PTN3460=DRM_EXYNOS)
 	default DRM_EXYNOS
 	help
 	  This enables support for DP device.

diff --git a/drivers/gpu/drm/exynos/exynos_ddc.c b/drivers/gpu/drm/exynos/exynos_ddc.c
deleted file mode 100644
index 6a8c84e..0000000
--- a/drivers/gpu/drm/exynos/exynos_ddc.c
+++ /dev/null

@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2011 Samsung Electronics Co.Ltd
- * Authors:
- *	Seung-Woo Kim <sw0312.kim@samsung.com>
- *	Inki Dae <inki.dae@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-#include <drm/drmP.h>
-
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/of.h>
-
-#include "exynos_drm_drv.h"
-#include "exynos_hdmi.h"
-
-static int s5p_ddc_probe(struct i2c_client *client,
-			const struct i2c_device_id *dev_id)
-{
-	hdmi_attach_ddc_client(client);
-
-	dev_info(&client->adapter->dev,
-		"attached %s into i2c adapter successfully\n",
-		client->name);
-
-	return 0;
-}
-
-static int s5p_ddc_remove(struct i2c_client *client)
-{
-	dev_info(&client->adapter->dev,
-		"detached %s from i2c adapter successfully\n",
-		client->name);
-
-	return 0;
-}
-
-static struct of_device_id hdmiddc_match_types[] = {
-	{
-		.compatible = "samsung,exynos5-hdmiddc",
-	}, {
-		.compatible = "samsung,exynos4210-hdmiddc",
-	}, {
-		/* end node */
-	}
-};
-
-struct i2c_driver ddc_driver = {
-	.driver = {
-		.name = "exynos-hdmiddc",
-		.owner = THIS_MODULE,
-		.of_match_table = hdmiddc_match_types,
-	},
-	.probe		= s5p_ddc_probe,
-	.remove		= s5p_ddc_remove,
-	.command		= NULL,
-};

diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index aed533b..a8ffc8c 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c

@@ -18,6 +18,9 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
+#include <linux/component.h>
 #include <linux/phy/phy.h>
 #include <video/of_display_timing.h>
 #include <video/of_videomode.h>
@@ -141,15 +144,15 @@
 			return -EIO;
 		}
 
-		exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_TEST_REQUEST,
+		exynos_dp_read_byte_from_dpcd(dp, DP_TEST_REQUEST,
 					&test_vector);
-		if (test_vector & DPCD_TEST_EDID_READ) {
+		if (test_vector & DP_TEST_LINK_EDID_READ) {
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_EDID_CHECKSUM,
+				DP_TEST_EDID_CHECKSUM,
 				edid[EDID_BLOCK_LENGTH + EDID_CHECKSUM]);
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_RESPONSE,
-				DPCD_TEST_EDID_CHECKSUM_WRITE);
+				DP_TEST_RESPONSE,
+				DP_TEST_EDID_CHECKSUM_WRITE);
 		}
 	} else {
 		dev_info(dp->dev, "EDID data does not include any extensions.\n");
@@ -171,15 +174,15 @@
 		}
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TEST_REQUEST,
+			DP_TEST_REQUEST,
 			&test_vector);
-		if (test_vector & DPCD_TEST_EDID_READ) {
+		if (test_vector & DP_TEST_LINK_EDID_READ) {
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_EDID_CHECKSUM,
+				DP_TEST_EDID_CHECKSUM,
 				edid[EDID_CHECKSUM]);
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_RESPONSE,
-				DPCD_TEST_EDID_CHECKSUM_WRITE);
+				DP_TEST_RESPONSE,
+				DP_TEST_EDID_CHECKSUM_WRITE);
 		}
 	}
 
@@ -193,8 +196,8 @@
 	int i;
 	int retval;
 
-	/* Read DPCD DPCD_ADDR_DPCD_REV~RECEIVE_PORT1_CAP_1 */
-	retval = exynos_dp_read_bytes_from_dpcd(dp, DPCD_ADDR_DPCD_REV,
+	/* Read DPCD DP_DPCD_REV~RECEIVE_PORT1_CAP_1 */
+	retval = exynos_dp_read_bytes_from_dpcd(dp, DP_DPCD_REV,
 				12, buf);
 	if (retval)
 		return retval;
@@ -214,14 +217,14 @@
 {
 	u8 data;
 
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_LANE_COUNT_SET, &data);
 
 	if (enable)
-		exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET,
-			DPCD_ENHANCED_FRAME_EN |
+		exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET,
+			DP_LANE_COUNT_ENHANCED_FRAME_EN |
 			DPCD_LANE_COUNT_SET(data));
 	else
-		exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET,
+		exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET,
 			DPCD_LANE_COUNT_SET(data));
 }
 
@@ -230,7 +233,7 @@
 	u8 data;
 	int retval;
 
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data);
 	retval = DPCD_ENHANCED_FRAME_CAP(data);
 
 	return retval;
@@ -250,8 +253,8 @@
 	exynos_dp_set_training_pattern(dp, DP_NONE);
 
 	exynos_dp_write_byte_to_dpcd(dp,
-		DPCD_ADDR_TRAINING_PATTERN_SET,
-		DPCD_TRAINING_PATTERN_DISABLED);
+		DP_TRAINING_PATTERN_SET,
+		DP_TRAINING_PATTERN_DISABLE);
 }
 
 static void exynos_dp_set_lane_lane_pre_emphasis(struct exynos_dp_device *dp,
@@ -295,7 +298,7 @@
 	/* Setup RX configuration */
 	buf[0] = dp->link_train.link_rate;
 	buf[1] = dp->link_train.lane_count;
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_LINK_BW_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_LINK_BW_SET,
 				2, buf);
 	if (retval)
 		return retval;
@@ -322,16 +325,16 @@
 
 	/* Set RX training pattern */
 	retval = exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			DPCD_SCRAMBLING_DISABLED | DPCD_TRAINING_PATTERN_1);
+			DP_TRAINING_PATTERN_SET,
+			DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_1);
 	if (retval)
 		return retval;
 
 	for (lane = 0; lane < lane_count; lane++)
-		buf[lane] = DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0 |
-			    DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0;
+		buf[lane] = DP_TRAIN_PRE_EMPHASIS_0 |
+			    DP_TRAIN_VOLTAGE_SWING_400;
 
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET,
 			lane_count, buf);
 
 	return retval;
@@ -352,7 +355,7 @@
 
 	for (lane = 0; lane < lane_count; lane++) {
 		lane_status = exynos_dp_get_lane_status(link_status, lane);
-		if ((lane_status & DPCD_LANE_CR_DONE) == 0)
+		if ((lane_status & DP_LANE_CR_DONE) == 0)
 			return -EINVAL;
 	}
 	return 0;
@@ -364,13 +367,13 @@
 	int lane;
 	u8 lane_status;
 
-	if ((link_align & DPCD_INTERLANE_ALIGN_DONE) == 0)
+	if ((link_align & DP_INTERLANE_ALIGN_DONE) == 0)
 		return -EINVAL;
 
 	for (lane = 0; lane < lane_count; lane++) {
 		lane_status = exynos_dp_get_lane_status(link_status, lane);
-		lane_status &= DPCD_CHANNEL_EQ_BITS;
-		if (lane_status != DPCD_CHANNEL_EQ_BITS)
+		lane_status &= DP_CHANNEL_EQ_BITS;
+		if (lane_status != DP_CHANNEL_EQ_BITS)
 			return -EINVAL;
 	}
 
@@ -468,9 +471,9 @@
 				DPCD_PRE_EMPHASIS_SET(pre_emphasis);
 
 		if (voltage_swing == VOLTAGE_LEVEL_3)
-			training_lane |= DPCD_MAX_SWING_REACHED;
+			training_lane |= DP_TRAIN_MAX_SWING_REACHED;
 		if (pre_emphasis == PRE_EMPHASIS_LEVEL_3)
-			training_lane |= DPCD_MAX_PRE_EMPHASIS_REACHED;
+			training_lane |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
 
 		dp->link_train.training_lane[lane] = training_lane;
 	}
@@ -487,12 +490,12 @@
 	lane_count = dp->link_train.lane_count;
 
 	retval =  exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_LANE0_1_STATUS, 2, link_status);
+			DP_LANE0_1_STATUS, 2, link_status);
 	if (retval)
 		return retval;
 
 	retval =  exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
+			DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
 	if (retval)
 		return retval;
 
@@ -501,9 +504,9 @@
 		exynos_dp_set_training_pattern(dp, TRAINING_PTN2);
 
 		retval = exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TRAINING_PATTERN_SET,
-				DPCD_SCRAMBLING_DISABLED |
-				DPCD_TRAINING_PATTERN_2);
+				DP_TRAINING_PATTERN_SET,
+				DP_LINK_SCRAMBLING_DISABLE |
+				DP_TRAINING_PATTERN_2);
 		if (retval)
 			return retval;
 
@@ -543,7 +546,7 @@
 			dp->link_train.training_lane[lane], lane);
 
 	retval = exynos_dp_write_bytes_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_LANE0_SET, lane_count,
+			DP_TRAINING_LANE0_SET, lane_count,
 			dp->link_train.training_lane);
 	if (retval)
 		return retval;
@@ -562,7 +565,7 @@
 	lane_count = dp->link_train.lane_count;
 
 	retval = exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_LANE0_1_STATUS, 2, link_status);
+			DP_LANE0_1_STATUS, 2, link_status);
 	if (retval)
 		return retval;
 
@@ -572,12 +575,12 @@
 	}
 
 	retval = exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
+			DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
 	if (retval)
 		return retval;
 
 	retval = exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED, &link_align);
+			DP_LANE_ALIGN_STATUS_UPDATED, &link_align);
 	if (retval)
 		return retval;
 
@@ -619,7 +622,7 @@
 		exynos_dp_set_lane_link_training(dp,
 			dp->link_train.training_lane[lane], lane);
 
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET,
 			lane_count, dp->link_train.training_lane);
 
 	return retval;
@@ -634,7 +637,7 @@
 	 * For DP rev.1.1, Maximum link rate of Main Link lanes
 	 * 0x06 = 1.62 Gbps, 0x0a = 2.7 Gbps
 	 */
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LINK_RATE, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LINK_RATE, &data);
 	*bandwidth = data;
 }
 
@@ -647,7 +650,7 @@
 	 * For DP rev.1.1, Maximum number of Main Link lanes
 	 * 0x01 = 1 lane, 0x02 = 2 lanes, 0x04 = 4 lanes
 	 */
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data);
 	*lane_count = DPCD_MAX_LANE_COUNT(data);
 }
 
@@ -819,20 +822,20 @@
 		exynos_dp_enable_scrambling(dp);
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
+			DP_TRAINING_PATTERN_SET,
 			&data);
 		exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			(u8)(data & ~DPCD_SCRAMBLING_DISABLED));
+			DP_TRAINING_PATTERN_SET,
+			(u8)(data & ~DP_LINK_SCRAMBLING_DISABLE));
 	} else {
 		exynos_dp_disable_scrambling(dp);
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
+			DP_TRAINING_PATTERN_SET,
 			&data);
 		exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			(u8)(data | DPCD_SCRAMBLING_DISABLED));
+			DP_TRAINING_PATTERN_SET,
+			(u8)(data | DP_LINK_SCRAMBLING_DISABLE));
 	}
 }
 
@@ -949,12 +952,6 @@
 	return 1;
 }
 
-static int exynos_dp_mode_valid(struct drm_connector *connector,
-			struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *exynos_dp_best_encoder(
 			struct drm_connector *connector)
 {
@@ -965,20 +962,9 @@
 
 static struct drm_connector_helper_funcs exynos_dp_connector_helper_funcs = {
 	.get_modes = exynos_dp_get_modes,
-	.mode_valid = exynos_dp_mode_valid,
 	.best_encoder = exynos_dp_best_encoder,
 };
 
-static int exynos_dp_initialize(struct exynos_drm_display *display,
-				struct drm_device *drm_dev)
-{
-	struct exynos_dp_device *dp = display->ctx;
-
-	dp->drm_dev = drm_dev;
-
-	return 0;
-}
-
 static bool find_bridge(const char *compat, struct bridge_init *bridge)
 {
 	bridge->client = NULL;
@@ -1101,12 +1087,11 @@
 		break;
 	default:
 		break;
-	};
+	}
 	dp->dpms_mode = mode;
 }
 
 static struct exynos_drm_display_ops exynos_dp_display_ops = {
-	.initialize = exynos_dp_initialize,
 	.create_connector = exynos_dp_create_connector,
 	.dpms = exynos_dp_dpms,
 };
@@ -1123,10 +1108,8 @@
 
 	dp_video_config = devm_kzalloc(dev,
 				sizeof(*dp_video_config), GFP_KERNEL);
-	if (!dp_video_config) {
-		dev_err(dev, "memory allocation for video config failed\n");
+	if (!dp_video_config)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	dp_video_config->h_sync_polarity =
 		of_property_read_bool(dp_node, "hsync-active-high");
@@ -1185,10 +1168,7 @@
 	dp_phy_node = of_find_node_by_name(dp_phy_node, "dptx-phy");
 	if (!dp_phy_node) {
 		dp->phy = devm_phy_get(dp->dev, "dp");
-		if (IS_ERR(dp->phy))
-			return PTR_ERR(dp->phy);
-		else
-			return 0;
+		return PTR_ERR_OR_ZERO(dp->phy);
 	}
 
 	if (of_property_read_u32(dp_phy_node, "reg", &phy_base)) {
@@ -1230,19 +1210,20 @@
 	return 0;
 }
 
-static int exynos_dp_probe(struct platform_device *pdev)
+static int exynos_dp_bind(struct device *dev, struct device *master, void *data)
 {
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm_dev = data;
 	struct resource *res;
 	struct exynos_dp_device *dp;
+	unsigned int irq_flags;
 
 	int ret = 0;
 
 	dp = devm_kzalloc(&pdev->dev, sizeof(struct exynos_dp_device),
 				GFP_KERNEL);
-	if (!dp) {
-		dev_err(&pdev->dev, "no memory for device data\n");
+	if (!dp)
 		return -ENOMEM;
-	}
 
 	dp->dev = &pdev->dev;
 	dp->dpms_mode = DRM_MODE_DPMS_OFF;
@@ -1273,7 +1254,30 @@
 	if (IS_ERR(dp->reg_base))
 		return PTR_ERR(dp->reg_base);
 
-	dp->irq = platform_get_irq(pdev, 0);
+	dp->hpd_gpio = of_get_named_gpio(dev->of_node, "samsung,hpd-gpio", 0);
+
+	if (gpio_is_valid(dp->hpd_gpio)) {
+		/*
+		 * Set up the hotplug GPIO from the device tree as an interrupt.
+		 * Simply specifying a different interrupt in the device tree
+		 * doesn't work since we handle hotplug rather differently when
+		 * using a GPIO.  We also need the actual GPIO specifier so
+		 * that we can get the current state of the GPIO.
+		 */
+		ret = devm_gpio_request_one(&pdev->dev, dp->hpd_gpio, GPIOF_IN,
+					    "hpd_gpio");
+		if (ret) {
+			dev_err(&pdev->dev, "failed to get hpd gpio\n");
+			return ret;
+		}
+		dp->irq = gpio_to_irq(dp->hpd_gpio);
+		irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
+	} else {
+		dp->hpd_gpio = -ENODEV;
+		dp->irq = platform_get_irq(pdev, 0);
+		irq_flags = 0;
+	}
+
 	if (dp->irq == -ENXIO) {
 		dev_err(&pdev->dev, "failed to get irq\n");
 		return -ENODEV;
@@ -1285,28 +1289,61 @@
 
 	exynos_dp_init_dp(dp);
 
-	ret = devm_request_irq(&pdev->dev, dp->irq, exynos_dp_irq_handler, 0,
-				"exynos-dp", dp);
+	ret = devm_request_irq(&pdev->dev, dp->irq, exynos_dp_irq_handler,
+			irq_flags, "exynos-dp", dp);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to request irq\n");
 		return ret;
 	}
 	disable_irq(dp->irq);
 
+	dp->drm_dev = drm_dev;
 	exynos_dp_display.ctx = dp;
 
 	platform_set_drvdata(pdev, &exynos_dp_display);
-	exynos_drm_display_register(&exynos_dp_display);
 
-	return 0;
+	return exynos_drm_create_enc_conn(drm_dev, &exynos_dp_display);
+}
+
+static void exynos_dp_unbind(struct device *dev, struct device *master,
+				void *data)
+{
+	struct exynos_drm_display *display = dev_get_drvdata(dev);
+	struct exynos_dp_device *dp = display->ctx;
+	struct drm_encoder *encoder = dp->encoder;
+
+	exynos_dp_dpms(display, DRM_MODE_DPMS_OFF);
+
+	encoder->funcs->destroy(encoder);
+	drm_connector_cleanup(&dp->connector);
+}
+
+static const struct component_ops exynos_dp_ops = {
+	.bind	= exynos_dp_bind,
+	.unbind	= exynos_dp_unbind,
+};
+
+static int exynos_dp_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = exynos_drm_component_add(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR,
+					exynos_dp_display.type);
+	if (ret)
+		return ret;
+
+	ret = component_add(&pdev->dev, &exynos_dp_ops);
+	if (ret)
+		exynos_drm_component_del(&pdev->dev,
+						EXYNOS_DEVICE_TYPE_CONNECTOR);
+
+	return ret;
 }
 
 static int exynos_dp_remove(struct platform_device *pdev)
 {
-	struct exynos_drm_display *display = platform_get_drvdata(pdev);
-
-	exynos_dp_dpms(display, DRM_MODE_DPMS_OFF);
-	exynos_drm_display_unregister(&exynos_dp_display);
+	component_del(&pdev->dev, &exynos_dp_ops);
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h
index d6a900d..02cc4f9 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.h
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.h

@@ -14,6 +14,7 @@
 #define _EXYNOS_DP_CORE_H
 
 #include <drm/drm_crtc.h>
+#include <drm/drm_dp_helper.h>
 #include <drm/exynos_drm.h>
 
 #define DP_TIMEOUT_LOOP_COUNT 100
@@ -159,6 +160,7 @@
 	struct work_struct	hotplug_work;
 	struct phy		*phy;
 	int			dpms_mode;
+	int			hpd_gpio;
 
 	struct exynos_drm_panel_info panel;
 };
@@ -261,69 +263,17 @@
 #define EDID_EXTENSION_FLAG			0x7e
 #define EDID_CHECKSUM				0x7f
 
-/* Definition for DPCD Register */
-#define DPCD_ADDR_DPCD_REV			0x0000
-#define DPCD_ADDR_MAX_LINK_RATE			0x0001
-#define DPCD_ADDR_MAX_LANE_COUNT		0x0002
-#define DPCD_ADDR_LINK_BW_SET			0x0100
-#define DPCD_ADDR_LANE_COUNT_SET		0x0101
-#define DPCD_ADDR_TRAINING_PATTERN_SET		0x0102
-#define DPCD_ADDR_TRAINING_LANE0_SET		0x0103
-#define DPCD_ADDR_LANE0_1_STATUS		0x0202
-#define DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED	0x0204
-#define DPCD_ADDR_ADJUST_REQUEST_LANE0_1	0x0206
-#define DPCD_ADDR_ADJUST_REQUEST_LANE2_3	0x0207
-#define DPCD_ADDR_TEST_REQUEST			0x0218
-#define DPCD_ADDR_TEST_RESPONSE			0x0260
-#define DPCD_ADDR_TEST_EDID_CHECKSUM		0x0261
-#define DPCD_ADDR_SINK_POWER_STATE		0x0600
-
-/* DPCD_ADDR_MAX_LANE_COUNT */
+/* DP_MAX_LANE_COUNT */
 #define DPCD_ENHANCED_FRAME_CAP(x)		(((x) >> 7) & 0x1)
 #define DPCD_MAX_LANE_COUNT(x)			((x) & 0x1f)
 
-/* DPCD_ADDR_LANE_COUNT_SET */
-#define DPCD_ENHANCED_FRAME_EN			(0x1 << 7)
+/* DP_LANE_COUNT_SET */
 #define DPCD_LANE_COUNT_SET(x)			((x) & 0x1f)
 
-/* DPCD_ADDR_TRAINING_PATTERN_SET */
-#define DPCD_SCRAMBLING_DISABLED		(0x1 << 5)
-#define DPCD_SCRAMBLING_ENABLED			(0x0 << 5)
-#define DPCD_TRAINING_PATTERN_2			(0x2 << 0)
-#define DPCD_TRAINING_PATTERN_1			(0x1 << 0)
-#define DPCD_TRAINING_PATTERN_DISABLED		(0x0 << 0)
-
-/* DPCD_ADDR_TRAINING_LANE0_SET */
-#define DPCD_MAX_PRE_EMPHASIS_REACHED		(0x1 << 5)
+/* DP_TRAINING_LANE0_SET */
 #define DPCD_PRE_EMPHASIS_SET(x)		(((x) & 0x3) << 3)
 #define DPCD_PRE_EMPHASIS_GET(x)		(((x) >> 3) & 0x3)
-#define DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0	(0x0 << 3)
-#define DPCD_MAX_SWING_REACHED			(0x1 << 2)
 #define DPCD_VOLTAGE_SWING_SET(x)		(((x) & 0x3) << 0)
 #define DPCD_VOLTAGE_SWING_GET(x)		(((x) >> 0) & 0x3)
-#define DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0	(0x0 << 0)
-
-/* DPCD_ADDR_LANE0_1_STATUS */
-#define DPCD_LANE_SYMBOL_LOCKED			(0x1 << 2)
-#define DPCD_LANE_CHANNEL_EQ_DONE		(0x1 << 1)
-#define DPCD_LANE_CR_DONE			(0x1 << 0)
-#define DPCD_CHANNEL_EQ_BITS			(DPCD_LANE_CR_DONE|	\
-						 DPCD_LANE_CHANNEL_EQ_DONE|\
-						 DPCD_LANE_SYMBOL_LOCKED)
-
-/* DPCD_ADDR_LANE_ALIGN__STATUS_UPDATED */
-#define DPCD_LINK_STATUS_UPDATED		(0x1 << 7)
-#define DPCD_DOWNSTREAM_PORT_STATUS_CHANGED	(0x1 << 6)
-#define DPCD_INTERLANE_ALIGN_DONE		(0x1 << 0)
-
-/* DPCD_ADDR_TEST_REQUEST */
-#define DPCD_TEST_EDID_READ			(0x1 << 2)
-
-/* DPCD_ADDR_TEST_RESPONSE */
-#define DPCD_TEST_EDID_CHECKSUM_WRITE		(0x1 << 2)
-
-/* DPCD_ADDR_SINK_POWER_STATE */
-#define DPCD_SET_POWER_STATE_D0			(0x1 << 0)
-#define DPCD_SET_POWER_STATE_D4			(0x2 << 0)
 
 #endif /* _EXYNOS_DP_CORE_H */

diff --git a/drivers/gpu/drm/exynos/exynos_dp_reg.c b/drivers/gpu/drm/exynos/exynos_dp_reg.c
index b70da50..c1f87a2 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_reg.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_reg.c

@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
 
 #include "exynos_dp_core.h"
 #include "exynos_dp_reg.h"
@@ -326,6 +327,9 @@
 {
 	u32 reg;
 
+	if (gpio_is_valid(dp->hpd_gpio))
+		return;
+
 	reg = HOTPLUG_CHG | HPD_LOST | PLUG;
 	writel(reg, dp->reg_base + EXYNOS_DP_COMMON_INT_STA_4);
 
@@ -337,6 +341,9 @@
 {
 	u32 reg;
 
+	if (gpio_is_valid(dp->hpd_gpio))
+		return;
+
 	exynos_dp_clear_hotplug_interrupts(dp);
 
 	reg = readl(dp->reg_base + EXYNOS_DP_SYS_CTL_3);
@@ -348,19 +355,27 @@
 {
 	u32 reg;
 
-	/* Parse hotplug interrupt status register */
-	reg = readl(dp->reg_base + EXYNOS_DP_COMMON_INT_STA_4);
+	if (gpio_is_valid(dp->hpd_gpio)) {
+		reg = gpio_get_value(dp->hpd_gpio);
+		if (reg)
+			return DP_IRQ_TYPE_HP_CABLE_IN;
+		else
+			return DP_IRQ_TYPE_HP_CABLE_OUT;
+	} else {
+		/* Parse hotplug interrupt status register */
+		reg = readl(dp->reg_base + EXYNOS_DP_COMMON_INT_STA_4);
 
-	if (reg & PLUG)
-		return DP_IRQ_TYPE_HP_CABLE_IN;
+		if (reg & PLUG)
+			return DP_IRQ_TYPE_HP_CABLE_IN;
 
-	if (reg & HPD_LOST)
-		return DP_IRQ_TYPE_HP_CABLE_OUT;
+		if (reg & HPD_LOST)
+			return DP_IRQ_TYPE_HP_CABLE_OUT;
 
-	if (reg & HOTPLUG_CHG)
-		return DP_IRQ_TYPE_HP_CHANGE;
+		if (reg & HOTPLUG_CHG)
+			return DP_IRQ_TYPE_HP_CHANGE;
 
-	return DP_IRQ_TYPE_UNKNOWN;
+		return DP_IRQ_TYPE_UNKNOWN;
+	}
 }
 
 void exynos_dp_reset_aux(struct exynos_dp_device *dp)
@@ -386,7 +401,7 @@
 	/* Disable AUX transaction H/W retry */
 	reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3) | AUX_HW_RETRY_COUNT_SEL(0)|
 		AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
-	writel(reg, dp->reg_base + EXYNOS_DP_AUX_HW_RETRY_CTL) ;
+	writel(reg, dp->reg_base + EXYNOS_DP_AUX_HW_RETRY_CTL);
 
 	/* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */
 	reg = DEFER_CTRL_EN | DEFER_COUNT(1);
@@ -402,9 +417,14 @@
 {
 	u32 reg;
 
-	reg = readl(dp->reg_base + EXYNOS_DP_SYS_CTL_3);
-	if (reg & HPD_STATUS)
-		return 0;
+	if (gpio_is_valid(dp->hpd_gpio)) {
+		if (gpio_get_value(dp->hpd_gpio))
+			return 0;
+	} else {
+		reg = readl(dp->reg_base + EXYNOS_DP_SYS_CTL_3);
+		if (reg & HPD_STATUS)
+			return 0;
+	}
 
 	return -EINVAL;
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 0e9e06c..4c9f972 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c

@@ -19,21 +19,19 @@
 #include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
-static LIST_HEAD(exynos_drm_manager_list);
-static LIST_HEAD(exynos_drm_display_list);
 
-static int exynos_drm_create_enc_conn(struct drm_device *dev,
+int exynos_drm_create_enc_conn(struct drm_device *dev,
 					struct exynos_drm_display *display)
 {
 	struct drm_encoder *encoder;
-	struct exynos_drm_manager *manager;
 	int ret;
 	unsigned long possible_crtcs = 0;
 
-	/* Find possible crtcs for this display */
-	list_for_each_entry(manager, &exynos_drm_manager_list, list)
-		if (manager->type == display->type)
-			possible_crtcs |= 1 << manager->pipe;
+	ret = exynos_drm_crtc_get_pipe_from_type(dev, display->type);
+	if (ret < 0)
+		return ret;
+
+	possible_crtcs |= 1 << ret;
 
 	/* create and initialize a encoder for this sub driver. */
 	encoder = exynos_drm_encoder_create(dev, display, possible_crtcs);
@@ -57,196 +55,6 @@
 	return ret;
 }
 
-static int exynos_drm_subdrv_probe(struct drm_device *dev,
-					struct exynos_drm_subdrv *subdrv)
-{
-	if (subdrv->probe) {
-		int ret;
-
-		subdrv->drm_dev = dev;
-
-		/*
-		 * this probe callback would be called by sub driver
-		 * after setting of all resources to this sub driver,
-		 * such as clock, irq and register map are done or by load()
-		 * of exynos drm driver.
-		 *
-		 * P.S. note that this driver is considered for modularization.
-		 */
-		ret = subdrv->probe(dev, subdrv->dev);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static void exynos_drm_subdrv_remove(struct drm_device *dev,
-				      struct exynos_drm_subdrv *subdrv)
-{
-	if (subdrv->remove)
-		subdrv->remove(dev, subdrv->dev);
-}
-
-int exynos_drm_initialize_managers(struct drm_device *dev)
-{
-	struct exynos_drm_manager *manager, *n;
-	int ret, pipe = 0;
-
-	list_for_each_entry(manager, &exynos_drm_manager_list, list) {
-		if (manager->ops->initialize) {
-			ret = manager->ops->initialize(manager, dev, pipe);
-			if (ret) {
-				DRM_ERROR("Mgr init [%d] failed with %d\n",
-						manager->type, ret);
-				goto err;
-			}
-		}
-
-		manager->drm_dev = dev;
-		manager->pipe = pipe++;
-
-		ret = exynos_drm_crtc_create(manager);
-		if (ret) {
-			DRM_ERROR("CRTC create [%d] failed with %d\n",
-					manager->type, ret);
-			goto err;
-		}
-	}
-	return 0;
-
-err:
-	list_for_each_entry_safe(manager, n, &exynos_drm_manager_list, list) {
-		if (pipe-- > 0)
-			exynos_drm_manager_unregister(manager);
-		else
-			list_del(&manager->list);
-	}
-	return ret;
-}
-
-void exynos_drm_remove_managers(struct drm_device *dev)
-{
-	struct exynos_drm_manager *manager, *n;
-
-	list_for_each_entry_safe(manager, n, &exynos_drm_manager_list, list)
-		exynos_drm_manager_unregister(manager);
-}
-
-int exynos_drm_initialize_displays(struct drm_device *dev)
-{
-	struct exynos_drm_display *display, *n;
-	int ret, initialized = 0;
-
-	list_for_each_entry(display, &exynos_drm_display_list, list) {
-		if (display->ops->initialize) {
-			ret = display->ops->initialize(display, dev);
-			if (ret) {
-				DRM_ERROR("Display init [%d] failed with %d\n",
-						display->type, ret);
-				goto err;
-			}
-		}
-
-		initialized++;
-
-		ret = exynos_drm_create_enc_conn(dev, display);
-		if (ret) {
-			DRM_ERROR("Encoder create [%d] failed with %d\n",
-					display->type, ret);
-			goto err;
-		}
-	}
-	return 0;
-
-err:
-	list_for_each_entry_safe(display, n, &exynos_drm_display_list, list) {
-		if (initialized-- > 0)
-			exynos_drm_display_unregister(display);
-		else
-			list_del(&display->list);
-	}
-	return ret;
-}
-
-void exynos_drm_remove_displays(struct drm_device *dev)
-{
-	struct exynos_drm_display *display, *n;
-
-	list_for_each_entry_safe(display, n, &exynos_drm_display_list, list)
-		exynos_drm_display_unregister(display);
-}
-
-int exynos_drm_device_register(struct drm_device *dev)
-{
-	struct exynos_drm_subdrv *subdrv, *n;
-	int err;
-
-	if (!dev)
-		return -EINVAL;
-
-	list_for_each_entry_safe(subdrv, n, &exynos_drm_subdrv_list, list) {
-		err = exynos_drm_subdrv_probe(dev, subdrv);
-		if (err) {
-			DRM_DEBUG("exynos drm subdrv probe failed.\n");
-			list_del(&subdrv->list);
-			continue;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(exynos_drm_device_register);
-
-int exynos_drm_device_unregister(struct drm_device *dev)
-{
-	struct exynos_drm_subdrv *subdrv;
-
-	if (!dev) {
-		WARN(1, "Unexpected drm device unregister!\n");
-		return -EINVAL;
-	}
-
-	list_for_each_entry(subdrv, &exynos_drm_subdrv_list, list) {
-		exynos_drm_subdrv_remove(dev, subdrv);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(exynos_drm_device_unregister);
-
-int exynos_drm_manager_register(struct exynos_drm_manager *manager)
-{
-	BUG_ON(!manager->ops);
-	list_add_tail(&manager->list, &exynos_drm_manager_list);
-	return 0;
-}
-
-int exynos_drm_manager_unregister(struct exynos_drm_manager *manager)
-{
-	if (manager->ops->remove)
-		manager->ops->remove(manager);
-
-	list_del(&manager->list);
-	return 0;
-}
-
-int exynos_drm_display_register(struct exynos_drm_display *display)
-{
-	BUG_ON(!display->ops);
-	list_add_tail(&display->list, &exynos_drm_display_list);
-	return 0;
-}
-
-int exynos_drm_display_unregister(struct exynos_drm_display *display)
-{
-	if (display->ops->remove)
-		display->ops->remove(display);
-
-	list_del(&display->list);
-	return 0;
-}
-
 int exynos_drm_subdrv_register(struct exynos_drm_subdrv *subdrv)
 {
 	if (!subdrv)
@@ -269,6 +77,54 @@
 }
 EXPORT_SYMBOL_GPL(exynos_drm_subdrv_unregister);
 
+int exynos_drm_device_subdrv_probe(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv, *n;
+	int err;
+
+	if (!dev)
+		return -EINVAL;
+
+	list_for_each_entry_safe(subdrv, n, &exynos_drm_subdrv_list, list) {
+		if (subdrv->probe) {
+			subdrv->drm_dev = dev;
+
+			/*
+			 * this probe callback would be called by sub driver
+			 * after setting of all resources to this sub driver,
+			 * such as clock, irq and register map are done.
+			 */
+			err = subdrv->probe(dev, subdrv->dev);
+			if (err) {
+				DRM_DEBUG("exynos drm subdrv probe failed.\n");
+				list_del(&subdrv->list);
+				continue;
+			}
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_subdrv_probe);
+
+int exynos_drm_device_subdrv_remove(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv;
+
+	if (!dev) {
+		WARN(1, "Unexpected drm device unregister!\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(subdrv, &exynos_drm_subdrv_list, list) {
+		if (subdrv->remove)
+			subdrv->remove(dev, subdrv->dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_subdrv_remove);
+
 int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct exynos_drm_subdrv *subdrv;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 1ef5ab9..95c9435 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c

@@ -368,6 +368,7 @@
 		return -ENOMEM;
 	}
 
+	manager->crtc = &exynos_crtc->drm_crtc;
 	crtc = &exynos_crtc->drm_crtc;
 
 	private->crtc[manager->pipe] = crtc;
@@ -491,3 +492,19 @@
 			manager->ops->wait_for_vblank(manager);
 	}
 }
+
+int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev,
+					unsigned int out_type)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &drm_dev->mode_config.crtc_list, head) {
+		struct exynos_drm_crtc *exynos_crtc;
+
+		exynos_crtc = to_exynos_crtc(crtc);
+		if (exynos_crtc->manager->type == out_type)
+			return exynos_crtc->manager->pipe;
+	}
+
+	return -EPERM;
+}

diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
index c27b66c..9f74b10 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h

@@ -32,4 +32,8 @@
 void exynos_drm_crtc_plane_enable(struct drm_crtc *crtc, int zpos);
 void exynos_drm_crtc_plane_disable(struct drm_crtc *crtc, int zpos);
 
+/* This function gets pipe value to crtc device matched with out_type. */
+int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev,
+					unsigned int out_type);
+
 #endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 2b09c7c..482127f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c

@@ -40,20 +40,10 @@
 {
 	struct exynos_dpi *ctx = connector_to_dpi(connector);
 
-	/* panels supported only by boot-loader are always connected */
-	if (!ctx->panel_node)
-		return connector_status_connected;
+	if (!ctx->panel->connector)
+		drm_panel_attach(ctx->panel, &ctx->connector);
 
-	if (!ctx->panel) {
-		ctx->panel = of_drm_find_panel(ctx->panel_node);
-		if (ctx->panel)
-			drm_panel_attach(ctx->panel, &ctx->connector);
-	}
-
-	if (ctx->panel)
-		return connector_status_connected;
-
-	return connector_status_disconnected;
+	return connector_status_connected;
 }
 
 static void exynos_dpi_connector_destroy(struct drm_connector *connector)
@@ -94,12 +84,6 @@
 	return 0;
 }
 
-static int exynos_dpi_mode_valid(struct drm_connector *connector,
-				 struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *
 exynos_dpi_best_encoder(struct drm_connector *connector)
 {
@@ -110,7 +94,6 @@
 
 static struct drm_connector_helper_funcs exynos_dpi_connector_helper_funcs = {
 	.get_modes = exynos_dpi_get_modes,
-	.mode_valid = exynos_dpi_mode_valid,
 	.best_encoder = exynos_dpi_best_encoder,
 };
 
@@ -123,10 +106,7 @@
 
 	ctx->encoder = encoder;
 
-	if (ctx->panel_node)
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-	else
-		connector->polled = DRM_CONNECTOR_POLL_HPD;
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
 
 	ret = drm_connector_init(encoder->dev, connector,
 				 &exynos_dpi_connector_funcs,
@@ -172,7 +152,7 @@
 		break;
 	default:
 		break;
-	};
+	}
 	ctx->dpms_mode = mode;
 }
 
@@ -294,8 +274,10 @@
 			return -ENOMEM;
 
 		ret = of_get_videomode(dn, vm, 0);
-		if (ret < 0)
+		if (ret < 0) {
+			devm_kfree(dev, vm);
 			return ret;
+		}
 
 		ctx->vm = vm;
 
@@ -308,32 +290,58 @@
 	return 0;
 }
 
-int exynos_dpi_probe(struct device *dev)
+struct exynos_drm_display *exynos_dpi_probe(struct device *dev)
 {
 	struct exynos_dpi *ctx;
 	int ret;
 
+	ret = exynos_drm_component_add(dev,
+					EXYNOS_DEVICE_TYPE_CONNECTOR,
+					exynos_dpi_display.type);
+	if (ret)
+		return ERR_PTR(ret);
+
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
-		return -ENOMEM;
+		goto err_del_component;
 
 	ctx->dev = dev;
 	exynos_dpi_display.ctx = ctx;
 	ctx->dpms_mode = DRM_MODE_DPMS_OFF;
 
 	ret = exynos_dpi_parse_dt(ctx);
-	if (ret < 0)
-		return ret;
+	if (ret < 0) {
+		devm_kfree(dev, ctx);
+		goto err_del_component;
+	}
 
-	exynos_drm_display_register(&exynos_dpi_display);
+	if (ctx->panel_node) {
+		ctx->panel = of_drm_find_panel(ctx->panel_node);
+		if (!ctx->panel) {
+			exynos_drm_component_del(dev,
+						EXYNOS_DEVICE_TYPE_CONNECTOR);
+			return ERR_PTR(-EPROBE_DEFER);
+		}
+	}
 
-	return 0;
+	return &exynos_dpi_display;
+
+err_del_component:
+	exynos_drm_component_del(dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
+
+	return NULL;
 }
 
 int exynos_dpi_remove(struct device *dev)
 {
+	struct drm_encoder *encoder = exynos_dpi_display.encoder;
+	struct exynos_dpi *ctx = exynos_dpi_display.ctx;
+
 	exynos_dpi_dpms(&exynos_dpi_display, DRM_MODE_DPMS_OFF);
-	exynos_drm_display_unregister(&exynos_dpi_display);
+	encoder->funcs->destroy(encoder);
+	drm_connector_cleanup(&ctx->connector);
+
+	exynos_drm_component_del(dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 2d27ba2..d91f277 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c

@@ -16,6 +16,7 @@
 #include <drm/drm_crtc_helper.h>
 
 #include <linux/anon_inodes.h>
+#include <linux/component.h>
 
 #include <drm/exynos_drm.h>
 
@@ -40,9 +41,19 @@
 
 #define VBLANK_OFF_DELAY	50000
 
-/* platform device pointer for eynos drm device. */
 static struct platform_device *exynos_drm_pdev;
 
+static DEFINE_MUTEX(drm_component_lock);
+static LIST_HEAD(drm_component_list);
+
+struct component_dev {
+	struct list_head list;
+	struct device *crtc_dev;
+	struct device *conn_dev;
+	enum exynos_drm_output_type out_type;
+	unsigned int dev_type_flag;
+};
+
 static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 {
 	struct exynos_drm_private *private;
@@ -73,38 +84,21 @@
 
 	exynos_drm_mode_config_init(dev);
 
-	ret = exynos_drm_initialize_managers(dev);
-	if (ret)
-		goto err_mode_config_cleanup;
-
 	for (nr = 0; nr < MAX_PLANE; nr++) {
 		struct drm_plane *plane;
 		unsigned long possible_crtcs = (1 << MAX_CRTC) - 1;
 
 		plane = exynos_plane_init(dev, possible_crtcs, false);
 		if (!plane)
-			goto err_manager_cleanup;
+			goto err_mode_config_cleanup;
 	}
 
-	ret = exynos_drm_initialize_displays(dev);
-	if (ret)
-		goto err_manager_cleanup;
-
 	/* init kms poll for handling hpd */
 	drm_kms_helper_poll_init(dev);
 
 	ret = drm_vblank_init(dev, MAX_CRTC);
 	if (ret)
-		goto err_display_cleanup;
-
-	/*
-	 * probe sub drivers such as display controller and hdmi driver,
-	 * that were registered at probe() of platform driver
-	 * to the sub driver and create encoder and connector for them.
-	 */
-	ret = exynos_drm_device_register(dev);
-	if (ret)
-		goto err_vblank;
+		goto err_mode_config_cleanup;
 
 	/* setup possible_clones. */
 	exynos_drm_encoder_setup(dev);
@@ -113,17 +107,25 @@
 
 	platform_set_drvdata(dev->platformdev, dev);
 
+	/* Try to bind all sub drivers. */
+	ret = component_bind_all(dev->dev, dev);
+	if (ret)
+		goto err_cleanup_vblank;
+
+	/* Probe non kms sub drivers and virtual display driver. */
+	ret = exynos_drm_device_subdrv_probe(dev);
+	if (ret)
+		goto err_unbind_all;
+
 	/* force connectors detection */
 	drm_helper_hpd_irq_event(dev);
 
 	return 0;
 
-err_vblank:
+err_unbind_all:
+	component_unbind_all(dev->dev, dev);
+err_cleanup_vblank:
 	drm_vblank_cleanup(dev);
-err_display_cleanup:
-	exynos_drm_remove_displays(dev);
-err_manager_cleanup:
-	exynos_drm_remove_managers(dev);
 err_mode_config_cleanup:
 	drm_mode_config_cleanup(dev);
 	drm_release_iommu_mapping(dev);
@@ -135,17 +137,17 @@
 
 static int exynos_drm_unload(struct drm_device *dev)
 {
+	exynos_drm_device_subdrv_remove(dev);
+
 	exynos_drm_fbdev_fini(dev);
-	exynos_drm_device_unregister(dev);
 	drm_vblank_cleanup(dev);
 	drm_kms_helper_poll_fini(dev);
-	exynos_drm_remove_displays(dev);
-	exynos_drm_remove_managers(dev);
 	drm_mode_config_cleanup(dev);
 
 	drm_release_iommu_mapping(dev);
 	kfree(dev->dev_private);
 
+	component_unbind_all(dev->dev, dev);
 	dev->dev_private = NULL;
 
 	return 0;
@@ -183,9 +185,9 @@
 		if (connector->funcs->dpms)
 			connector->funcs->dpms(connector, connector->dpms);
 	}
+	drm_modeset_unlock_all(dev);
 
 	drm_helper_resume_force_mode(dev);
-	drm_modeset_unlock_all(dev);
 
 	return 0;
 }
@@ -323,8 +325,7 @@
 };
 
 static struct drm_driver exynos_drm_driver = {
-	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_MODESET |
-					DRIVER_GEM | DRIVER_PRIME,
+	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
 	.load			= exynos_drm_load,
 	.unload			= exynos_drm_unload,
 	.suspend		= exynos_drm_suspend,
@@ -355,27 +356,6 @@
 	.minor	= DRIVER_MINOR,
 };
 
-static int exynos_drm_platform_probe(struct platform_device *pdev)
-{
-	int ret;
-
-	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
-	if (ret)
-		return ret;
-
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
-
-	return drm_platform_init(&exynos_drm_driver, pdev);
-}
-
-static int exynos_drm_platform_remove(struct platform_device *pdev)
-{
-	drm_put_dev(platform_get_drvdata(pdev));
-
-	return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int exynos_drm_sys_suspend(struct device *dev)
 {
@@ -400,237 +380,409 @@
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
-static int exynos_drm_runtime_suspend(struct device *dev)
-{
-	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	pm_message_t message;
-
-	if (pm_runtime_suspended(dev))
-		return 0;
-
-	message.event = PM_EVENT_SUSPEND;
-	return exynos_drm_suspend(drm_dev, message);
-}
-
-static int exynos_drm_runtime_resume(struct device *dev)
-{
-	struct drm_device *drm_dev = dev_get_drvdata(dev);
-
-	if (!pm_runtime_suspended(dev))
-		return 0;
-
-	return exynos_drm_resume(drm_dev);
-}
-#endif
-
 static const struct dev_pm_ops exynos_drm_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(exynos_drm_sys_suspend, exynos_drm_sys_resume)
-	SET_RUNTIME_PM_OPS(exynos_drm_runtime_suspend,
-			exynos_drm_runtime_resume, NULL)
 };
 
+int exynos_drm_component_add(struct device *dev,
+				enum exynos_drm_device_type dev_type,
+				enum exynos_drm_output_type out_type)
+{
+	struct component_dev *cdev;
+
+	if (dev_type != EXYNOS_DEVICE_TYPE_CRTC &&
+			dev_type != EXYNOS_DEVICE_TYPE_CONNECTOR) {
+		DRM_ERROR("invalid device type.\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&drm_component_lock);
+
+	/*
+	 * Make sure to check if there is a component which has two device
+	 * objects, for connector and for encoder/connector.
+	 * It should make sure that crtc and encoder/connector drivers are
+	 * ready before exynos drm core binds them.
+	 */
+	list_for_each_entry(cdev, &drm_component_list, list) {
+		if (cdev->out_type == out_type) {
+			/*
+			 * If crtc and encoder/connector device objects are
+			 * added already just return.
+			 */
+			if (cdev->dev_type_flag == (EXYNOS_DEVICE_TYPE_CRTC |
+						EXYNOS_DEVICE_TYPE_CONNECTOR)) {
+				mutex_unlock(&drm_component_lock);
+				return 0;
+			}
+
+			if (dev_type == EXYNOS_DEVICE_TYPE_CRTC) {
+				cdev->crtc_dev = dev;
+				cdev->dev_type_flag |= dev_type;
+			}
+
+			if (dev_type == EXYNOS_DEVICE_TYPE_CONNECTOR) {
+				cdev->conn_dev = dev;
+				cdev->dev_type_flag |= dev_type;
+			}
+
+			mutex_unlock(&drm_component_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&drm_component_lock);
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return -ENOMEM;
+
+	if (dev_type == EXYNOS_DEVICE_TYPE_CRTC)
+		cdev->crtc_dev = dev;
+	if (dev_type == EXYNOS_DEVICE_TYPE_CONNECTOR)
+		cdev->conn_dev = dev;
+
+	cdev->out_type = out_type;
+	cdev->dev_type_flag = dev_type;
+
+	mutex_lock(&drm_component_lock);
+	list_add_tail(&cdev->list, &drm_component_list);
+	mutex_unlock(&drm_component_lock);
+
+	return 0;
+}
+
+void exynos_drm_component_del(struct device *dev,
+				enum exynos_drm_device_type dev_type)
+{
+	struct component_dev *cdev, *next;
+
+	mutex_lock(&drm_component_lock);
+
+	list_for_each_entry_safe(cdev, next, &drm_component_list, list) {
+		if (dev_type == EXYNOS_DEVICE_TYPE_CRTC) {
+			if (cdev->crtc_dev == dev) {
+				cdev->crtc_dev = NULL;
+				cdev->dev_type_flag &= ~dev_type;
+			}
+		}
+
+		if (dev_type == EXYNOS_DEVICE_TYPE_CONNECTOR) {
+			if (cdev->conn_dev == dev) {
+				cdev->conn_dev = NULL;
+				cdev->dev_type_flag &= ~dev_type;
+			}
+		}
+
+		/*
+		 * Release cdev object only in case that both of crtc and
+		 * encoder/connector device objects are NULL.
+		 */
+		if (!cdev->crtc_dev && !cdev->conn_dev) {
+			list_del(&cdev->list);
+			kfree(cdev);
+		}
+
+		break;
+	}
+
+	mutex_unlock(&drm_component_lock);
+}
+
+static int compare_of(struct device *dev, void *data)
+{
+	return dev == (struct device *)data;
+}
+
+static int exynos_drm_add_components(struct device *dev, struct master *m)
+{
+	struct component_dev *cdev;
+	unsigned int attach_cnt = 0;
+
+	mutex_lock(&drm_component_lock);
+
+	list_for_each_entry(cdev, &drm_component_list, list) {
+		int ret;
+
+		/*
+		 * Add components to master only in case that crtc and
+		 * encoder/connector device objects exist.
+		 */
+		if (!cdev->crtc_dev || !cdev->conn_dev)
+			continue;
+
+		attach_cnt++;
+
+		mutex_unlock(&drm_component_lock);
+
+		/*
+		 * fimd and dpi modules have same device object so add
+		 * only crtc device object in this case.
+		 *
+		 * TODO. if dpi module follows driver-model driver then
+		 * below codes can be removed.
+		 */
+		if (cdev->crtc_dev == cdev->conn_dev) {
+			ret = component_master_add_child(m, compare_of,
+					cdev->crtc_dev);
+			if (ret < 0)
+				return ret;
+
+			goto out_lock;
+		}
+
+		/*
+		 * Do not chage below call order.
+		 * crtc device first should be added to master because
+		 * connector/encoder need pipe number of crtc when they
+		 * are created.
+		 */
+		ret = component_master_add_child(m, compare_of, cdev->crtc_dev);
+		ret |= component_master_add_child(m, compare_of,
+							cdev->conn_dev);
+		if (ret < 0)
+			return ret;
+
+out_lock:
+		mutex_lock(&drm_component_lock);
+	}
+
+	mutex_unlock(&drm_component_lock);
+
+	return attach_cnt ? 0 : -ENODEV;
+}
+
+static int exynos_drm_bind(struct device *dev)
+{
+	return drm_platform_init(&exynos_drm_driver, to_platform_device(dev));
+}
+
+static void exynos_drm_unbind(struct device *dev)
+{
+	drm_put_dev(dev_get_drvdata(dev));
+}
+
+static const struct component_master_ops exynos_drm_ops = {
+	.add_components = exynos_drm_add_components,
+	.bind		= exynos_drm_bind,
+	.unbind		= exynos_drm_unbind,
+};
+
+static int exynos_drm_platform_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	exynos_drm_driver.num_ioctls = ARRAY_SIZE(exynos_ioctls);
+
+#ifdef CONFIG_DRM_EXYNOS_FIMD
+	ret = platform_driver_register(&fimd_driver);
+	if (ret < 0)
+		return ret;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DP
+	ret = platform_driver_register(&dp_driver);
+	if (ret < 0)
+		goto err_unregister_fimd_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DSI
+	ret = platform_driver_register(&dsi_driver);
+	if (ret < 0)
+		goto err_unregister_dp_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_HDMI
+	ret = platform_driver_register(&mixer_driver);
+	if (ret < 0)
+		goto err_unregister_dsi_drv;
+	ret = platform_driver_register(&hdmi_driver);
+	if (ret < 0)
+		goto err_unregister_mixer_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_G2D
+	ret = platform_driver_register(&g2d_driver);
+	if (ret < 0)
+		goto err_unregister_hdmi_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	ret = platform_driver_register(&fimc_driver);
+	if (ret < 0)
+		goto err_unregister_g2d_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	ret = platform_driver_register(&rotator_driver);
+	if (ret < 0)
+		goto err_unregister_fimc_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	ret = platform_driver_register(&gsc_driver);
+	if (ret < 0)
+		goto err_unregister_rotator_drv;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+	ret = platform_driver_register(&ipp_driver);
+	if (ret < 0)
+		goto err_unregister_gsc_drv;
+
+	ret = exynos_platform_device_ipp_register();
+	if (ret < 0)
+		goto err_unregister_ipp_drv;
+#endif
+
+	ret = component_master_add(&pdev->dev, &exynos_drm_ops);
+	if (ret < 0)
+		DRM_DEBUG_KMS("re-tried by last sub driver probed later.\n");
+
+	return 0;
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+err_unregister_ipp_drv:
+	platform_driver_unregister(&ipp_driver);
+err_unregister_gsc_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	platform_driver_unregister(&gsc_driver);
+err_unregister_rotator_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	platform_driver_unregister(&rotator_driver);
+err_unregister_fimc_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	platform_driver_unregister(&fimc_driver);
+err_unregister_g2d_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_G2D
+	platform_driver_unregister(&g2d_driver);
+err_unregister_hdmi_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_HDMI
+	platform_driver_unregister(&hdmi_driver);
+err_unregister_mixer_drv:
+	platform_driver_unregister(&mixer_driver);
+err_unregister_dsi_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DSI
+	platform_driver_unregister(&dsi_driver);
+err_unregister_dp_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DP
+	platform_driver_unregister(&dp_driver);
+err_unregister_fimd_drv:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMD
+	platform_driver_unregister(&fimd_driver);
+#endif
+	return ret;
+}
+
+static int exynos_drm_platform_remove(struct platform_device *pdev)
+{
+#ifdef CONFIG_DRM_EXYNOS_IPP
+	exynos_platform_device_ipp_unregister();
+	platform_driver_unregister(&ipp_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	platform_driver_unregister(&gsc_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	platform_driver_unregister(&rotator_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	platform_driver_unregister(&fimc_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_G2D
+	platform_driver_unregister(&g2d_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_HDMI
+	platform_driver_unregister(&mixer_driver);
+	platform_driver_unregister(&hdmi_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMD
+	platform_driver_unregister(&fimd_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DSI
+	platform_driver_unregister(&dsi_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_DP
+	platform_driver_unregister(&dp_driver);
+#endif
+	component_master_del(&pdev->dev, &exynos_drm_ops);
+	return 0;
+}
+
 static struct platform_driver exynos_drm_platform_driver = {
-	.probe		= exynos_drm_platform_probe,
-	.remove		= exynos_drm_platform_remove,
-	.driver		= {
+	.probe	= exynos_drm_platform_probe,
+	.remove	= exynos_drm_platform_remove,
+	.driver	= {
 		.owner	= THIS_MODULE,
 		.name	= "exynos-drm",
 		.pm	= &exynos_drm_pm_ops,
 	},
 };
 
-static int __init exynos_drm_init(void)
+static int exynos_drm_init(void)
 {
 	int ret;
 
-#ifdef CONFIG_DRM_EXYNOS_DP
-	ret = platform_driver_register(&dp_driver);
-	if (ret < 0)
-		goto out_dp;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_DSI
-	ret = platform_driver_register(&dsi_driver);
-	if (ret < 0)
-		goto out_dsi;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMD
-	ret = platform_driver_register(&fimd_driver);
-	if (ret < 0)
-		goto out_fimd;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_HDMI
-	ret = platform_driver_register(&hdmi_driver);
-	if (ret < 0)
-		goto out_hdmi;
-	ret = platform_driver_register(&mixer_driver);
-	if (ret < 0)
-		goto out_mixer;
-#endif
+	exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1,
+								NULL, 0);
+	if (IS_ERR(exynos_drm_pdev))
+		return PTR_ERR(exynos_drm_pdev);
 
 #ifdef CONFIG_DRM_EXYNOS_VIDI
-	ret = platform_driver_register(&vidi_driver);
+	ret = exynos_drm_probe_vidi();
 	if (ret < 0)
-		goto out_vidi;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_G2D
-	ret = platform_driver_register(&g2d_driver);
-	if (ret < 0)
-		goto out_g2d;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMC
-	ret = platform_driver_register(&fimc_driver);
-	if (ret < 0)
-		goto out_fimc;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_ROTATOR
-	ret = platform_driver_register(&rotator_driver);
-	if (ret < 0)
-		goto out_rotator;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_GSC
-	ret = platform_driver_register(&gsc_driver);
-	if (ret < 0)
-		goto out_gsc;
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_IPP
-	ret = platform_driver_register(&ipp_driver);
-	if (ret < 0)
-		goto out_ipp;
-
-	ret = exynos_platform_device_ipp_register();
-	if (ret < 0)
-		goto out_ipp_dev;
+		goto err_unregister_pd;
 #endif
 
 	ret = platform_driver_register(&exynos_drm_platform_driver);
-	if (ret < 0)
-		goto out_drm;
-
-	exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1,
-				NULL, 0);
-	if (IS_ERR(exynos_drm_pdev)) {
-		ret = PTR_ERR(exynos_drm_pdev);
-		goto out;
-	}
+	if (ret)
+		goto err_remove_vidi;
 
 	return 0;
 
-out:
-	platform_driver_unregister(&exynos_drm_platform_driver);
+err_unregister_pd:
+	platform_device_unregister(exynos_drm_pdev);
 
-out_drm:
-#ifdef CONFIG_DRM_EXYNOS_IPP
-	exynos_platform_device_ipp_unregister();
-out_ipp_dev:
-	platform_driver_unregister(&ipp_driver);
-out_ipp:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_GSC
-	platform_driver_unregister(&gsc_driver);
-out_gsc:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_ROTATOR
-	platform_driver_unregister(&rotator_driver);
-out_rotator:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMC
-	platform_driver_unregister(&fimc_driver);
-out_fimc:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_G2D
-	platform_driver_unregister(&g2d_driver);
-out_g2d:
-#endif
-
+err_remove_vidi:
 #ifdef CONFIG_DRM_EXYNOS_VIDI
-	platform_driver_unregister(&vidi_driver);
-out_vidi:
+	exynos_drm_remove_vidi();
 #endif
 
-#ifdef CONFIG_DRM_EXYNOS_HDMI
-	platform_driver_unregister(&mixer_driver);
-out_mixer:
-	platform_driver_unregister(&hdmi_driver);
-out_hdmi:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMD
-	platform_driver_unregister(&fimd_driver);
-out_fimd:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_DSI
-	platform_driver_unregister(&dsi_driver);
-out_dsi:
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_DP
-	platform_driver_unregister(&dp_driver);
-out_dp:
-#endif
 	return ret;
 }
 
-static void __exit exynos_drm_exit(void)
+static void exynos_drm_exit(void)
 {
-	platform_device_unregister(exynos_drm_pdev);
-
-	platform_driver_unregister(&exynos_drm_platform_driver);
-
-#ifdef CONFIG_DRM_EXYNOS_IPP
-	exynos_platform_device_ipp_unregister();
-	platform_driver_unregister(&ipp_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_GSC
-	platform_driver_unregister(&gsc_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_ROTATOR
-	platform_driver_unregister(&rotator_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMC
-	platform_driver_unregister(&fimc_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_G2D
-	platform_driver_unregister(&g2d_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_HDMI
-	platform_driver_unregister(&mixer_driver);
-	platform_driver_unregister(&hdmi_driver);
-#endif
-
 #ifdef CONFIG_DRM_EXYNOS_VIDI
-	platform_driver_unregister(&vidi_driver);
+	exynos_drm_remove_vidi();
 #endif
-
-#ifdef CONFIG_DRM_EXYNOS_FIMD
-	platform_driver_unregister(&fimd_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_DSI
-	platform_driver_unregister(&dsi_driver);
-#endif
-
-#ifdef CONFIG_DRM_EXYNOS_DP
-	platform_driver_unregister(&dp_driver);
-#endif
+	platform_device_unregister(exynos_drm_pdev);
+	platform_driver_unregister(&exynos_drm_platform_driver);
 }
 
 module_init(exynos_drm_init);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index ce3e6a3..36535f3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h

@@ -42,6 +42,13 @@
 
 extern unsigned int drm_vblank_offdelay;
 
+/* This enumerates device type. */
+enum exynos_drm_device_type {
+	EXYNOS_DEVICE_TYPE_NONE,
+	EXYNOS_DEVICE_TYPE_CRTC,
+	EXYNOS_DEVICE_TYPE_CONNECTOR,
+};
+
 /* this enumerates display type. */
 enum exynos_drm_output_type {
 	EXYNOS_DISPLAY_TYPE_NONE,
@@ -122,7 +129,6 @@
  * Exynos DRM Display Structure.
  *	- this structure is common to analog tv, digital tv and lcd panel.
  *
- * @initialize: initializes the display with drm_dev
  * @remove: cleans up the display for removal
  * @mode_fixup: fix mode data comparing to hw specific display mode.
  * @mode_set: convert drm_display_mode to hw specific display mode and
@@ -133,8 +139,6 @@
  */
 struct exynos_drm_display;
 struct exynos_drm_display_ops {
-	int (*initialize)(struct exynos_drm_display *display,
-				struct drm_device *drm_dev);
 	int (*create_connector)(struct exynos_drm_display *display,
 				struct drm_encoder *encoder);
 	void (*remove)(struct exynos_drm_display *display);
@@ -172,8 +176,6 @@
 /*
  * Exynos drm manager ops
  *
- * @initialize: initializes the manager with drm_dev
- * @remove: cleans up the manager for removal
  * @dpms: control device power.
  * @mode_fixup: fix mode data before applying it
  * @mode_set: set the given mode to the manager
@@ -189,9 +191,6 @@
  */
 struct exynos_drm_manager;
 struct exynos_drm_manager_ops {
-	int (*initialize)(struct exynos_drm_manager *mgr,
-				struct drm_device *drm_dev, int pipe);
-	void (*remove)(struct exynos_drm_manager *mgr);
 	void (*dpms)(struct exynos_drm_manager *mgr, int mode);
 	bool (*mode_fixup)(struct exynos_drm_manager *mgr,
 				const struct drm_display_mode *mode,
@@ -215,6 +214,7 @@
  * @list: the list entry for this manager
  * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI.
  * @drm_dev: pointer to the drm device
+ * @crtc: crtc object.
  * @pipe: the pipe number for this crtc/manager
  * @ops: pointer to callbacks for exynos drm specific functionality
  * @ctx: A pointer to the manager's implementation specific context
@@ -223,6 +223,7 @@
 	struct list_head list;
 	enum exynos_drm_output_type type;
 	struct drm_device *drm_dev;
+	struct drm_crtc *crtc;
 	int pipe;
 	struct exynos_drm_manager_ops *ops;
 	void *ctx;
@@ -254,6 +255,7 @@
  *	otherwise default one.
  * @da_space_size: size of device address space.
  *	if 0 then default value is used for it.
+ * @pipe: the pipe number for this crtc/manager.
  */
 struct exynos_drm_private {
 	struct drm_fb_helper *fb_helper;
@@ -271,6 +273,8 @@
 
 	unsigned long da_start;
 	unsigned long da_space_size;
+
+	unsigned int pipe;
 };
 
 /*
@@ -281,11 +285,11 @@
  * @drm_dev: pointer to drm_device and this pointer would be set
  *	when sub driver calls exynos_drm_subdrv_register().
  * @manager: subdrv has its own manager to control a hardware appropriately
- *	and we can access a hardware drawing on this manager.
+ *     and we can access a hardware drawing on this manager.
  * @probe: this callback would be called by exynos drm driver after
- *	subdrv is registered to it.
+ *     subdrv is registered to it.
  * @remove: this callback is used to release resources created
- *	by probe callback.
+ *     by probe callback.
  * @open: this would be called with drm device file open.
  * @close: this would be called with drm device file close.
  */
@@ -302,39 +306,14 @@
 			struct drm_file *file);
 };
 
-/*
- * this function calls a probe callback registered to sub driver list and
- * create its own encoder and connector and then set drm_device object
- * to global one.
- */
-int exynos_drm_device_register(struct drm_device *dev);
-/*
- * this function calls a remove callback registered to sub driver list and
- * destroy its own encoder and connetor.
- */
-int exynos_drm_device_unregister(struct drm_device *dev);
-
-int exynos_drm_initialize_managers(struct drm_device *dev);
-void exynos_drm_remove_managers(struct drm_device *dev);
-int exynos_drm_initialize_displays(struct drm_device *dev);
-void exynos_drm_remove_displays(struct drm_device *dev);
-
-int exynos_drm_manager_register(struct exynos_drm_manager *manager);
-int exynos_drm_manager_unregister(struct exynos_drm_manager *manager);
-int exynos_drm_display_register(struct exynos_drm_display *display);
-int exynos_drm_display_unregister(struct exynos_drm_display *display);
-
-/*
- * this function would be called by sub drivers such as display controller
- * or hdmi driver to register this sub driver object to exynos drm driver
- * and when a sub driver is registered to exynos drm driver a probe callback
- * of the sub driver is called and creates its own encoder and connector.
- */
+ /* This function would be called by non kms drivers such as g2d and ipp. */
 int exynos_drm_subdrv_register(struct exynos_drm_subdrv *drm_subdrv);
 
 /* this function removes subdrv list from exynos drm driver */
 int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *drm_subdrv);
 
+int exynos_drm_device_subdrv_probe(struct drm_device *dev);
+int exynos_drm_device_subdrv_remove(struct drm_device *dev);
 int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file);
 void exynos_drm_subdrv_close(struct drm_device *dev, struct drm_file *file);
 
@@ -360,18 +339,40 @@
 void exynos_platform_device_ipp_unregister(void);
 
 #ifdef CONFIG_DRM_EXYNOS_DPI
-int exynos_dpi_probe(struct device *dev);
+struct exynos_drm_display * exynos_dpi_probe(struct device *dev);
 int exynos_dpi_remove(struct device *dev);
 #else
-static inline int exynos_dpi_probe(struct device *dev) { return 0; }
+static inline struct exynos_drm_display *
+exynos_dpi_probe(struct device *dev) { return 0; }
 static inline int exynos_dpi_remove(struct device *dev) { return 0; }
 #endif
 
+/*
+ * this function registers exynos drm vidi platform device/driver.
+ */
+int exynos_drm_probe_vidi(void);
+
+/*
+ * this function unregister exynos drm vidi platform device/driver.
+ */
+void exynos_drm_remove_vidi(void);
+
+/* This function creates a encoder and a connector, and initializes them. */
+int exynos_drm_create_enc_conn(struct drm_device *dev,
+				struct exynos_drm_display *display);
+
+int exynos_drm_component_add(struct device *dev,
+				enum exynos_drm_device_type dev_type,
+				enum exynos_drm_output_type out_type);
+
+void exynos_drm_component_del(struct device *dev,
+				enum exynos_drm_device_type dev_type);
+
+extern struct platform_driver fimd_driver;
 extern struct platform_driver dp_driver;
 extern struct platform_driver dsi_driver;
-extern struct platform_driver fimd_driver;
-extern struct platform_driver hdmi_driver;
 extern struct platform_driver mixer_driver;
+extern struct platform_driver hdmi_driver;
 extern struct platform_driver exynos_drm_common_hdmi_driver;
 extern struct platform_driver vidi_driver;
 extern struct platform_driver g2d_driver;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 4ac4381..6302aa6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c

@@ -19,6 +19,7 @@
 #include <linux/irq.h>
 #include <linux/phy/phy.h>
 #include <linux/regulator/consumer.h>
+#include <linux/component.h>
 
 #include <video/mipi_display.h>
 #include <video/videomode.h>
@@ -1378,16 +1379,60 @@
 	return ret;
 }
 
+static int exynos_dsi_bind(struct device *dev, struct device *master,
+				void *data)
+{
+	struct drm_device *drm_dev = data;
+	struct exynos_dsi *dsi;
+	int ret;
+
+	ret = exynos_drm_create_enc_conn(drm_dev, &exynos_dsi_display);
+	if (ret) {
+		DRM_ERROR("Encoder create [%d] failed with %d\n",
+				exynos_dsi_display.type, ret);
+		return ret;
+	}
+
+	dsi = exynos_dsi_display.ctx;
+
+	return mipi_dsi_host_register(&dsi->dsi_host);
+}
+
+static void exynos_dsi_unbind(struct device *dev, struct device *master,
+				void *data)
+{
+	struct exynos_dsi *dsi = exynos_dsi_display.ctx;
+	struct drm_encoder *encoder = dsi->encoder;
+
+	exynos_dsi_dpms(&exynos_dsi_display, DRM_MODE_DPMS_OFF);
+
+	mipi_dsi_host_unregister(&dsi->dsi_host);
+
+	encoder->funcs->destroy(encoder);
+	drm_connector_cleanup(&dsi->connector);
+}
+
+static const struct component_ops exynos_dsi_component_ops = {
+	.bind	= exynos_dsi_bind,
+	.unbind	= exynos_dsi_unbind,
+};
+
 static int exynos_dsi_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct exynos_dsi *dsi;
 	int ret;
 
+	ret = exynos_drm_component_add(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR,
+					exynos_dsi_display.type);
+	if (ret)
+		return ret;
+
 	dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL);
 	if (!dsi) {
 		dev_err(&pdev->dev, "failed to allocate dsi object.\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_del_component;
 	}
 
 	init_completion(&dsi->completed);
@@ -1401,7 +1446,7 @@
 
 	ret = exynos_dsi_parse_dt(dsi);
 	if (ret)
-		return ret;
+		goto err_del_component;
 
 	dsi->supplies[0].supply = "vddcore";
 	dsi->supplies[1].supply = "vddio";
@@ -1415,32 +1460,37 @@
 	dsi->pll_clk = devm_clk_get(&pdev->dev, "pll_clk");
 	if (IS_ERR(dsi->pll_clk)) {
 		dev_info(&pdev->dev, "failed to get dsi pll input clock\n");
-		return -EPROBE_DEFER;
+		ret = PTR_ERR(dsi->pll_clk);
+		goto err_del_component;
 	}
 
 	dsi->bus_clk = devm_clk_get(&pdev->dev, "bus_clk");
 	if (IS_ERR(dsi->bus_clk)) {
 		dev_info(&pdev->dev, "failed to get dsi bus clock\n");
-		return -EPROBE_DEFER;
+		ret = PTR_ERR(dsi->bus_clk);
+		goto err_del_component;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->reg_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(dsi->reg_base)) {
 		dev_err(&pdev->dev, "failed to remap io region\n");
-		return PTR_ERR(dsi->reg_base);
+		ret = PTR_ERR(dsi->reg_base);
+		goto err_del_component;
 	}
 
 	dsi->phy = devm_phy_get(&pdev->dev, "dsim");
 	if (IS_ERR(dsi->phy)) {
 		dev_info(&pdev->dev, "failed to get dsim phy\n");
-		return -EPROBE_DEFER;
+		ret = PTR_ERR(dsi->phy);
+		goto err_del_component;
 	}
 
 	dsi->irq = platform_get_irq(pdev, 0);
 	if (dsi->irq < 0) {
 		dev_err(&pdev->dev, "failed to request dsi irq resource\n");
-		return dsi->irq;
+		ret = dsi->irq;
+		goto err_del_component;
 	}
 
 	irq_set_status_flags(dsi->irq, IRQ_NOAUTOEN);
@@ -1449,59 +1499,32 @@
 					dev_name(&pdev->dev), dsi);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to request dsi irq\n");
-		return ret;
+		goto err_del_component;
 	}
 
 	exynos_dsi_display.ctx = dsi;
 
 	platform_set_drvdata(pdev, &exynos_dsi_display);
-	exynos_drm_display_register(&exynos_dsi_display);
 
-	return mipi_dsi_host_register(&dsi->dsi_host);
+	ret = component_add(&pdev->dev, &exynos_dsi_component_ops);
+	if (ret)
+		goto err_del_component;
+
+	return ret;
+
+err_del_component:
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
+	return ret;
 }
 
 static int exynos_dsi_remove(struct platform_device *pdev)
 {
-	struct exynos_dsi *dsi = exynos_dsi_display.ctx;
-
-	exynos_dsi_dpms(&exynos_dsi_display, DRM_MODE_DPMS_OFF);
-
-	exynos_drm_display_unregister(&exynos_dsi_display);
-	mipi_dsi_host_unregister(&dsi->dsi_host);
+	component_del(&pdev->dev, &exynos_dsi_component_ops);
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
 
 	return 0;
 }
 
-#if CONFIG_PM_SLEEP
-static int exynos_dsi_resume(struct device *dev)
-{
-	struct exynos_dsi *dsi = exynos_dsi_display.ctx;
-
-	if (dsi->state & DSIM_STATE_ENABLED) {
-		dsi->state &= ~DSIM_STATE_ENABLED;
-		exynos_dsi_enable(dsi);
-	}
-
-	return 0;
-}
-
-static int exynos_dsi_suspend(struct device *dev)
-{
-	struct exynos_dsi *dsi = exynos_dsi_display.ctx;
-
-	if (dsi->state & DSIM_STATE_ENABLED) {
-		exynos_dsi_disable(dsi);
-		dsi->state |= DSIM_STATE_ENABLED;
-	}
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops exynos_dsi_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume)
-};
-
 static struct of_device_id exynos_dsi_of_match[] = {
 	{ .compatible = "samsung,exynos4210-mipi-dsi" },
 	{ }
@@ -1513,7 +1536,6 @@
 	.driver = {
 		   .name = "exynos-dsi",
 		   .owner = THIS_MODULE,
-		   .pm = &exynos_dsi_pm_ops,
 		   .of_match_table = exynos_dsi_of_match,
 	},
 };

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index addbf75..d771b46 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c

@@ -121,16 +121,8 @@
 	offset = fbi->var.xoffset * (fb->bits_per_pixel >> 3);
 	offset += fbi->var.yoffset * fb->pitches[0];
 
-	dev->mode_config.fb_base = (resource_size_t)buffer->dma_addr;
 	fbi->screen_base = buffer->kvaddr + offset;
-	if (is_drm_iommu_supported(dev))
-		fbi->fix.smem_start = (unsigned long)
-			(page_to_phys(sg_page(buffer->sgt->sgl)) + offset);
-	else
-		fbi->fix.smem_start = (unsigned long)buffer->dma_addr;
-
 	fbi->screen_size = size;
-	fbi->fix.smem_len = size;
 
 	return 0;
 }
@@ -237,7 +229,7 @@
 	.fb_probe =	exynos_drm_fbdev_create,
 };
 
-bool exynos_drm_fbdev_is_anything_connected(struct drm_device *dev)
+static bool exynos_drm_fbdev_is_anything_connected(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	bool ret = false;
@@ -375,7 +367,5 @@
 	if (!private || !private->fb_helper)
 		return;
 
-	drm_modeset_lock_all(dev);
-	drm_fb_helper_restore_fbdev_mode(private->fb_helper);
-	drm_modeset_unlock_all(dev);
+	drm_fb_helper_restore_fbdev_mode_unlocked(private->fb_helper);
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 30d76b2..831dde9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c

@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
+#include <linux/spinlock.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
@@ -57,7 +58,6 @@
 #define FIMC_SHFACTOR	10
 #define FIMC_BUF_STOP	1
 #define FIMC_BUF_START	2
-#define FIMC_REG_SZ		32
 #define FIMC_WIDTH_ITU_709	1280
 #define FIMC_REFRESH_MAX	60
 #define FIMC_REFRESH_MIN	12
@@ -69,9 +69,6 @@
 #define get_fimc_context(dev)	platform_get_drvdata(to_platform_device(dev))
 #define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
 					struct fimc_context, ippdrv);
-#define fimc_read(offset)		readl(ctx->regs + (offset))
-#define fimc_write(cfg, offset)	writel(cfg, ctx->regs + (offset))
-
 enum fimc_wb {
 	FIMC_WB_NONE,
 	FIMC_WB_A,
@@ -161,7 +158,7 @@
 	struct exynos_drm_ippdrv	ippdrv;
 	struct resource	*regs_res;
 	void __iomem	*regs;
-	struct mutex	lock;
+	spinlock_t	lock;
 	struct clk	*clocks[FIMC_CLKS_MAX];
 	u32		clk_frequency;
 	struct regmap	*sysreg;
@@ -172,39 +169,53 @@
 	bool	suspended;
 };
 
+static u32 fimc_read(struct fimc_context *ctx, u32 reg)
+{
+	return readl(ctx->regs + reg);
+}
+
+static void fimc_write(struct fimc_context *ctx, u32 val, u32 reg)
+{
+	writel(val, ctx->regs + reg);
+}
+
+static void fimc_set_bits(struct fimc_context *ctx, u32 reg, u32 bits)
+{
+	void __iomem *r = ctx->regs + reg;
+
+	writel(readl(r) | bits, r);
+}
+
+static void fimc_clear_bits(struct fimc_context *ctx, u32 reg, u32 bits)
+{
+	void __iomem *r = ctx->regs + reg;
+
+	writel(readl(r) & ~bits, r);
+}
+
 static void fimc_sw_reset(struct fimc_context *ctx)
 {
 	u32 cfg;
 
 	/* stop dma operation */
-	cfg = fimc_read(EXYNOS_CISTATUS);
-	if (EXYNOS_CISTATUS_GET_ENVID_STATUS(cfg)) {
-		cfg = fimc_read(EXYNOS_MSCTRL);
-		cfg &= ~EXYNOS_MSCTRL_ENVID;
-		fimc_write(cfg, EXYNOS_MSCTRL);
-	}
+	cfg = fimc_read(ctx, EXYNOS_CISTATUS);
+	if (EXYNOS_CISTATUS_GET_ENVID_STATUS(cfg))
+		fimc_clear_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
 
-	cfg = fimc_read(EXYNOS_CISRCFMT);
-	cfg |= EXYNOS_CISRCFMT_ITU601_8BIT;
-	fimc_write(cfg, EXYNOS_CISRCFMT);
+	fimc_set_bits(ctx, EXYNOS_CISRCFMT, EXYNOS_CISRCFMT_ITU601_8BIT);
 
 	/* disable image capture */
-	cfg = fimc_read(EXYNOS_CIIMGCPT);
-	cfg &= ~(EXYNOS_CIIMGCPT_IMGCPTEN_SC | EXYNOS_CIIMGCPT_IMGCPTEN);
-	fimc_write(cfg, EXYNOS_CIIMGCPT);
+	fimc_clear_bits(ctx, EXYNOS_CIIMGCPT,
+		EXYNOS_CIIMGCPT_IMGCPTEN_SC | EXYNOS_CIIMGCPT_IMGCPTEN);
 
 	/* s/w reset */
-	cfg = fimc_read(EXYNOS_CIGCTRL);
-	cfg |= (EXYNOS_CIGCTRL_SWRST);
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_SWRST);
 
 	/* s/w reset complete */
-	cfg = fimc_read(EXYNOS_CIGCTRL);
-	cfg &= ~EXYNOS_CIGCTRL_SWRST;
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_clear_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_SWRST);
 
 	/* reset sequence */
-	fimc_write(0x0, EXYNOS_CIFCNTSEQ);
+	fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ);
 }
 
 static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx)
@@ -220,7 +231,7 @@
 
 	DRM_DEBUG_KMS("wb[%d]\n", wb);
 
-	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK |
 		EXYNOS_CIGCTRL_SELCAM_ITU_MASK |
 		EXYNOS_CIGCTRL_SELCAM_MIPI_MASK |
@@ -246,7 +257,7 @@
 		break;
 	}
 
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
 
 static void fimc_set_polarity(struct fimc_context *ctx,
@@ -259,7 +270,7 @@
 	DRM_DEBUG_KMS("inv_href[%d]inv_hsync[%d]\n",
 		pol->inv_href, pol->inv_hsync);
 
-	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	cfg &= ~(EXYNOS_CIGCTRL_INVPOLPCLK | EXYNOS_CIGCTRL_INVPOLVSYNC |
 		 EXYNOS_CIGCTRL_INVPOLHREF | EXYNOS_CIGCTRL_INVPOLHSYNC);
 
@@ -272,7 +283,7 @@
 	if (pol->inv_hsync)
 		cfg |= EXYNOS_CIGCTRL_INVPOLHSYNC;
 
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
 
 static void fimc_handle_jpeg(struct fimc_context *ctx, bool enable)
@@ -281,70 +292,54 @@
 
 	DRM_DEBUG_KMS("enable[%d]\n", enable);
 
-	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	if (enable)
 		cfg |= EXYNOS_CIGCTRL_CAM_JPEG;
 	else
 		cfg &= ~EXYNOS_CIGCTRL_CAM_JPEG;
 
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
 
-static void fimc_handle_irq(struct fimc_context *ctx, bool enable,
-		bool overflow, bool level)
+static void fimc_mask_irq(struct fimc_context *ctx, bool enable)
 {
 	u32 cfg;
 
-	DRM_DEBUG_KMS("enable[%d]overflow[%d]level[%d]\n",
-			enable, overflow, level);
+	DRM_DEBUG_KMS("enable[%d]\n", enable);
 
-	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	if (enable) {
-		cfg &= ~(EXYNOS_CIGCTRL_IRQ_OVFEN | EXYNOS_CIGCTRL_IRQ_LEVEL);
-		cfg |= EXYNOS_CIGCTRL_IRQ_ENABLE;
-		if (overflow)
-			cfg |= EXYNOS_CIGCTRL_IRQ_OVFEN;
-		if (level)
-			cfg |= EXYNOS_CIGCTRL_IRQ_LEVEL;
+		cfg &= ~EXYNOS_CIGCTRL_IRQ_OVFEN;
+		cfg |= EXYNOS_CIGCTRL_IRQ_ENABLE | EXYNOS_CIGCTRL_IRQ_LEVEL;
 	} else
-		cfg &= ~(EXYNOS_CIGCTRL_IRQ_OVFEN | EXYNOS_CIGCTRL_IRQ_ENABLE);
-
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+		cfg &= ~EXYNOS_CIGCTRL_IRQ_ENABLE;
+	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
 
 static void fimc_clear_irq(struct fimc_context *ctx)
 {
-	u32 cfg;
-
-	cfg = fimc_read(EXYNOS_CIGCTRL);
-	cfg |= EXYNOS_CIGCTRL_IRQ_CLR;
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_CLR);
 }
 
 static bool fimc_check_ovf(struct fimc_context *ctx)
 {
 	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	u32 cfg, status, flag;
+	u32 status, flag;
 
-	status = fimc_read(EXYNOS_CISTATUS);
+	status = fimc_read(ctx, EXYNOS_CISTATUS);
 	flag = EXYNOS_CISTATUS_OVFIY | EXYNOS_CISTATUS_OVFICB |
 		EXYNOS_CISTATUS_OVFICR;
 
 	DRM_DEBUG_KMS("flag[0x%x]\n", flag);
 
 	if (status & flag) {
-		cfg = fimc_read(EXYNOS_CIWDOFST);
-		cfg |= (EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
+		fimc_set_bits(ctx, EXYNOS_CIWDOFST,
+			EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
 			EXYNOS_CIWDOFST_CLROVFICR);
-
-		fimc_write(cfg, EXYNOS_CIWDOFST);
-
-		cfg = fimc_read(EXYNOS_CIWDOFST);
-		cfg &= ~(EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
+		fimc_clear_bits(ctx, EXYNOS_CIWDOFST,
+			EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
 			EXYNOS_CIWDOFST_CLROVFICR);
 
-		fimc_write(cfg, EXYNOS_CIWDOFST);
-
 		dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
 			ctx->id, status);
 		return true;
@@ -357,7 +352,7 @@
 {
 	u32 cfg;
 
-	cfg = fimc_read(EXYNOS_CISTATUS);
+	cfg = fimc_read(ctx, EXYNOS_CISTATUS);
 
 	DRM_DEBUG_KMS("cfg[0x%x]\n", cfg);
 
@@ -365,7 +360,7 @@
 		return false;
 
 	cfg &= ~(EXYNOS_CISTATUS_FRAMEEND);
-	fimc_write(cfg, EXYNOS_CISTATUS);
+	fimc_write(ctx, cfg, EXYNOS_CISTATUS);
 
 	return true;
 }
@@ -375,7 +370,7 @@
 	u32 cfg;
 	int frame_cnt, buf_id;
 
-	cfg = fimc_read(EXYNOS_CISTATUS2);
+	cfg = fimc_read(ctx, EXYNOS_CISTATUS2);
 	frame_cnt = EXYNOS_CISTATUS2_GET_FRAMECOUNT_BEFORE(cfg);
 
 	if (frame_cnt == 0)
@@ -402,13 +397,13 @@
 
 	DRM_DEBUG_KMS("enable[%d]\n", enable);
 
-	cfg = fimc_read(EXYNOS_CIOCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIOCTRL);
 	if (enable)
 		cfg |= EXYNOS_CIOCTRL_LASTENDEN;
 	else
 		cfg &= ~EXYNOS_CIOCTRL_LASTENDEN;
 
-	fimc_write(cfg, EXYNOS_CIOCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
 }
 
 
@@ -420,18 +415,18 @@
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
 
 	/* RGB */
-	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CISCCTRL);
 	cfg &= ~EXYNOS_CISCCTRL_INRGB_FMT_RGB_MASK;
 
 	switch (fmt) {
 	case DRM_FORMAT_RGB565:
 		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565;
-		fimc_write(cfg, EXYNOS_CISCCTRL);
+		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 		return 0;
 	case DRM_FORMAT_RGB888:
 	case DRM_FORMAT_XRGB8888:
 		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888;
-		fimc_write(cfg, EXYNOS_CISCCTRL);
+		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 		return 0;
 	default:
 		/* bypass */
@@ -439,7 +434,7 @@
 	}
 
 	/* YUV */
-	cfg = fimc_read(EXYNOS_MSCTRL);
+	cfg = fimc_read(ctx, EXYNOS_MSCTRL);
 	cfg &= ~(EXYNOS_MSCTRL_ORDER2P_SHIFT_MASK |
 		EXYNOS_MSCTRL_C_INT_IN_2PLANE |
 		EXYNOS_MSCTRL_ORDER422_YCBYCR);
@@ -479,7 +474,7 @@
 		return -EINVAL;
 	}
 
-	fimc_write(cfg, EXYNOS_MSCTRL);
+	fimc_write(ctx, cfg, EXYNOS_MSCTRL);
 
 	return 0;
 }
@@ -492,7 +487,7 @@
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
 
-	cfg = fimc_read(EXYNOS_MSCTRL);
+	cfg = fimc_read(ctx, EXYNOS_MSCTRL);
 	cfg &= ~EXYNOS_MSCTRL_INFORMAT_RGB;
 
 	switch (fmt) {
@@ -527,9 +522,9 @@
 		return -EINVAL;
 	}
 
-	fimc_write(cfg, EXYNOS_MSCTRL);
+	fimc_write(ctx, cfg, EXYNOS_MSCTRL);
 
-	cfg = fimc_read(EXYNOS_CIDMAPARAM);
+	cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
 	cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK;
 
 	if (fmt == DRM_FORMAT_NV12MT)
@@ -537,7 +532,7 @@
 	else
 		cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
 
-	fimc_write(cfg, EXYNOS_CIDMAPARAM);
+	fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
 
 	return fimc_src_set_fmt_order(ctx, fmt);
 }
@@ -552,11 +547,11 @@
 
 	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
 
-	cfg1 = fimc_read(EXYNOS_MSCTRL);
+	cfg1 = fimc_read(ctx, EXYNOS_MSCTRL);
 	cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR |
 		EXYNOS_MSCTRL_FLIP_Y_MIRROR);
 
-	cfg2 = fimc_read(EXYNOS_CITRGFMT);
+	cfg2 = fimc_read(ctx, EXYNOS_CITRGFMT);
 	cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
 
 	switch (degree) {
@@ -595,8 +590,8 @@
 		return -EINVAL;
 	}
 
-	fimc_write(cfg1, EXYNOS_MSCTRL);
-	fimc_write(cfg2, EXYNOS_CITRGFMT);
+	fimc_write(ctx, cfg1, EXYNOS_MSCTRL);
+	fimc_write(ctx, cfg2, EXYNOS_CITRGFMT);
 	*swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0;
 
 	return 0;
@@ -621,17 +616,17 @@
 	 * set window offset 1, 2 size
 	 * check figure 43-21 in user manual
 	 */
-	cfg = fimc_read(EXYNOS_CIWDOFST);
+	cfg = fimc_read(ctx, EXYNOS_CIWDOFST);
 	cfg &= ~(EXYNOS_CIWDOFST_WINHOROFST_MASK |
 		EXYNOS_CIWDOFST_WINVEROFST_MASK);
 	cfg |= (EXYNOS_CIWDOFST_WINHOROFST(h1) |
 		EXYNOS_CIWDOFST_WINVEROFST(v1));
 	cfg |= EXYNOS_CIWDOFST_WINOFSEN;
-	fimc_write(cfg, EXYNOS_CIWDOFST);
+	fimc_write(ctx, cfg, EXYNOS_CIWDOFST);
 
 	cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) |
 		EXYNOS_CIWDOFST2_WINVEROFST2(v2));
-	fimc_write(cfg, EXYNOS_CIWDOFST2);
+	fimc_write(ctx, cfg, EXYNOS_CIWDOFST2);
 
 	return 0;
 }
@@ -651,7 +646,7 @@
 	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) |
 		EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize));
 
-	fimc_write(cfg, EXYNOS_ORGISIZE);
+	fimc_write(ctx, cfg, EXYNOS_ORGISIZE);
 
 	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
 
@@ -663,12 +658,12 @@
 	}
 
 	/* set input DMA image size */
-	cfg = fimc_read(EXYNOS_CIREAL_ISIZE);
+	cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE);
 	cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK |
 		EXYNOS_CIREAL_ISIZE_WIDTH_MASK);
 	cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) |
 		EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h));
-	fimc_write(cfg, EXYNOS_CIREAL_ISIZE);
+	fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE);
 
 	/*
 	 * set input FIFO image size
@@ -677,18 +672,18 @@
 	cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
 		EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) |
 		EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize));
-	fimc_write(cfg, EXYNOS_CISRCFMT);
+	fimc_write(ctx, cfg, EXYNOS_CISRCFMT);
 
 	/* offset Y(RGB), Cb, Cr */
 	cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIIYOFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIIYOFF);
+	fimc_write(ctx, cfg, EXYNOS_CIIYOFF);
 	cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIICBOFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIICBOFF);
+	fimc_write(ctx, cfg, EXYNOS_CIICBOFF);
 	cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIICROFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIICROFF);
+	fimc_write(ctx, cfg, EXYNOS_CIICROFF);
 
 	return fimc_set_window(ctx, &img_pos, &img_sz);
 }
@@ -722,25 +717,25 @@
 	switch (buf_type) {
 	case IPP_BUF_ENQUEUE:
 		config = &property->config[EXYNOS_DRM_OPS_SRC];
-		fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+		fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
 			EXYNOS_CIIYSA(buf_id));
 
 		if (config->fmt == DRM_FORMAT_YVU420) {
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
 				EXYNOS_CIICBSA(buf_id));
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
 				EXYNOS_CIICRSA(buf_id));
 		} else {
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
 				EXYNOS_CIICBSA(buf_id));
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
 				EXYNOS_CIICRSA(buf_id));
 		}
 		break;
 	case IPP_BUF_DEQUEUE:
-		fimc_write(0x0, EXYNOS_CIIYSA(buf_id));
-		fimc_write(0x0, EXYNOS_CIICBSA(buf_id));
-		fimc_write(0x0, EXYNOS_CIICRSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIIYSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIICBSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIICRSA(buf_id));
 		break;
 	default:
 		/* bypass */
@@ -765,22 +760,22 @@
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
 
 	/* RGB */
-	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CISCCTRL);
 	cfg &= ~EXYNOS_CISCCTRL_OUTRGB_FMT_RGB_MASK;
 
 	switch (fmt) {
 	case DRM_FORMAT_RGB565:
 		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565;
-		fimc_write(cfg, EXYNOS_CISCCTRL);
+		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 		return 0;
 	case DRM_FORMAT_RGB888:
 		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888;
-		fimc_write(cfg, EXYNOS_CISCCTRL);
+		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 		return 0;
 	case DRM_FORMAT_XRGB8888:
 		cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 |
 			EXYNOS_CISCCTRL_EXTRGB_EXTENSION);
-		fimc_write(cfg, EXYNOS_CISCCTRL);
+		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 		break;
 	default:
 		/* bypass */
@@ -788,7 +783,7 @@
 	}
 
 	/* YUV */
-	cfg = fimc_read(EXYNOS_CIOCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIOCTRL);
 	cfg &= ~(EXYNOS_CIOCTRL_ORDER2P_MASK |
 		EXYNOS_CIOCTRL_ORDER422_MASK |
 		EXYNOS_CIOCTRL_YCBCR_PLANE_MASK);
@@ -830,7 +825,7 @@
 		return -EINVAL;
 	}
 
-	fimc_write(cfg, EXYNOS_CIOCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
 
 	return 0;
 }
@@ -843,16 +838,16 @@
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
 
-	cfg = fimc_read(EXYNOS_CIEXTEN);
+	cfg = fimc_read(ctx, EXYNOS_CIEXTEN);
 
 	if (fmt == DRM_FORMAT_AYUV) {
 		cfg |= EXYNOS_CIEXTEN_YUV444_OUT;
-		fimc_write(cfg, EXYNOS_CIEXTEN);
+		fimc_write(ctx, cfg, EXYNOS_CIEXTEN);
 	} else {
 		cfg &= ~EXYNOS_CIEXTEN_YUV444_OUT;
-		fimc_write(cfg, EXYNOS_CIEXTEN);
+		fimc_write(ctx, cfg, EXYNOS_CIEXTEN);
 
-		cfg = fimc_read(EXYNOS_CITRGFMT);
+		cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
 		cfg &= ~EXYNOS_CITRGFMT_OUTFORMAT_MASK;
 
 		switch (fmt) {
@@ -885,10 +880,10 @@
 			return -EINVAL;
 		}
 
-		fimc_write(cfg, EXYNOS_CITRGFMT);
+		fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
 	}
 
-	cfg = fimc_read(EXYNOS_CIDMAPARAM);
+	cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
 	cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK;
 
 	if (fmt == DRM_FORMAT_NV12MT)
@@ -896,7 +891,7 @@
 	else
 		cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
 
-	fimc_write(cfg, EXYNOS_CIDMAPARAM);
+	fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
 
 	return fimc_dst_set_fmt_order(ctx, fmt);
 }
@@ -911,7 +906,7 @@
 
 	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
 
-	cfg = fimc_read(EXYNOS_CITRGFMT);
+	cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
 	cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK;
 	cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
 
@@ -951,53 +946,23 @@
 		return -EINVAL;
 	}
 
-	fimc_write(cfg, EXYNOS_CITRGFMT);
+	fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
 	*swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0;
 
 	return 0;
 }
 
-static int fimc_get_ratio_shift(u32 src, u32 dst, u32 *ratio, u32 *shift)
-{
-	DRM_DEBUG_KMS("src[%d]dst[%d]\n", src, dst);
-
-	if (src >= dst * 64) {
-		DRM_ERROR("failed to make ratio and shift.\n");
-		return -EINVAL;
-	} else if (src >= dst * 32) {
-		*ratio = 32;
-		*shift = 5;
-	} else if (src >= dst * 16) {
-		*ratio = 16;
-		*shift = 4;
-	} else if (src >= dst * 8) {
-		*ratio = 8;
-		*shift = 3;
-	} else if (src >= dst * 4) {
-		*ratio = 4;
-		*shift = 2;
-	} else if (src >= dst * 2) {
-		*ratio = 2;
-		*shift = 1;
-	} else {
-		*ratio = 1;
-		*shift = 0;
-	}
-
-	return 0;
-}
-
 static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
 		struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
 {
 	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg, cfg_ext, shfactor;
 	u32 pre_dst_width, pre_dst_height;
-	u32 pre_hratio, hfactor, pre_vratio, vfactor;
+	u32 hfactor, vfactor;
 	int ret = 0;
 	u32 src_w, src_h, dst_w, dst_h;
 
-	cfg_ext = fimc_read(EXYNOS_CITRGFMT);
+	cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT);
 	if (cfg_ext & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) {
 		src_w = src->h;
 		src_h = src->w;
@@ -1014,24 +979,24 @@
 		dst_h = dst->h;
 	}
 
-	ret = fimc_get_ratio_shift(src_w, dst_w, &pre_hratio, &hfactor);
-	if (ret) {
+	/* fimc_ippdrv_check_property assures that dividers are not null */
+	hfactor = fls(src_w / dst_w / 2);
+	if (hfactor > FIMC_SHFACTOR / 2) {
 		dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
-		return ret;
+		return -EINVAL;
 	}
 
-	ret = fimc_get_ratio_shift(src_h, dst_h, &pre_vratio, &vfactor);
-	if (ret) {
+	vfactor = fls(src_h / dst_h / 2);
+	if (vfactor > FIMC_SHFACTOR / 2) {
 		dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
-		return ret;
+		return -EINVAL;
 	}
 
-	pre_dst_width = src_w / pre_hratio;
-	pre_dst_height = src_h / pre_vratio;
+	pre_dst_width = src_w >> hfactor;
+	pre_dst_height = src_h >> vfactor;
 	DRM_DEBUG_KMS("pre_dst_width[%d]pre_dst_height[%d]\n",
 		pre_dst_width, pre_dst_height);
-	DRM_DEBUG_KMS("pre_hratio[%d]hfactor[%d]pre_vratio[%d]vfactor[%d]\n",
-		pre_hratio, hfactor, pre_vratio, vfactor);
+	DRM_DEBUG_KMS("hfactor[%d]vfactor[%d]\n", hfactor, vfactor);
 
 	sc->hratio = (src_w << 14) / (dst_w << hfactor);
 	sc->vratio = (src_h << 14) / (dst_h << vfactor);
@@ -1044,13 +1009,13 @@
 	DRM_DEBUG_KMS("shfactor[%d]\n", shfactor);
 
 	cfg = (EXYNOS_CISCPRERATIO_SHFACTOR(shfactor) |
-		EXYNOS_CISCPRERATIO_PREHORRATIO(pre_hratio) |
-		EXYNOS_CISCPRERATIO_PREVERRATIO(pre_vratio));
-	fimc_write(cfg, EXYNOS_CISCPRERATIO);
+		EXYNOS_CISCPRERATIO_PREHORRATIO(1 << hfactor) |
+		EXYNOS_CISCPRERATIO_PREVERRATIO(1 << vfactor));
+	fimc_write(ctx, cfg, EXYNOS_CISCPRERATIO);
 
 	cfg = (EXYNOS_CISCPREDST_PREDSTWIDTH(pre_dst_width) |
 		EXYNOS_CISCPREDST_PREDSTHEIGHT(pre_dst_height));
-	fimc_write(cfg, EXYNOS_CISCPREDST);
+	fimc_write(ctx, cfg, EXYNOS_CISCPREDST);
 
 	return ret;
 }
@@ -1064,7 +1029,7 @@
 	DRM_DEBUG_KMS("hratio[%d]vratio[%d]\n",
 		sc->hratio, sc->vratio);
 
-	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CISCCTRL);
 	cfg &= ~(EXYNOS_CISCCTRL_SCALERBYPASS |
 		EXYNOS_CISCCTRL_SCALEUP_H | EXYNOS_CISCCTRL_SCALEUP_V |
 		EXYNOS_CISCCTRL_MAIN_V_RATIO_MASK |
@@ -1084,14 +1049,14 @@
 
 	cfg |= (EXYNOS_CISCCTRL_MAINHORRATIO((sc->hratio >> 6)) |
 		EXYNOS_CISCCTRL_MAINVERRATIO((sc->vratio >> 6)));
-	fimc_write(cfg, EXYNOS_CISCCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
 
-	cfg_ext = fimc_read(EXYNOS_CIEXTEN);
+	cfg_ext = fimc_read(ctx, EXYNOS_CIEXTEN);
 	cfg_ext &= ~EXYNOS_CIEXTEN_MAINHORRATIO_EXT_MASK;
 	cfg_ext &= ~EXYNOS_CIEXTEN_MAINVERRATIO_EXT_MASK;
 	cfg_ext |= (EXYNOS_CIEXTEN_MAINHORRATIO_EXT(sc->hratio) |
 		EXYNOS_CIEXTEN_MAINVERRATIO_EXT(sc->vratio));
-	fimc_write(cfg_ext, EXYNOS_CIEXTEN);
+	fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN);
 }
 
 static int fimc_dst_set_size(struct device *dev, int swap,
@@ -1109,12 +1074,12 @@
 	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) |
 		EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize));
 
-	fimc_write(cfg, EXYNOS_ORGOSIZE);
+	fimc_write(ctx, cfg, EXYNOS_ORGOSIZE);
 
 	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
 
 	/* CSC ITU */
-	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	cfg &= ~EXYNOS_CIGCTRL_CSC_MASK;
 
 	if (sz->hsize >= FIMC_WIDTH_ITU_709)
@@ -1122,7 +1087,7 @@
 	else
 		cfg |= EXYNOS_CIGCTRL_CSC_ITU601;
 
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 
 	if (swap) {
 		img_pos.w = pos->h;
@@ -1132,41 +1097,38 @@
 	}
 
 	/* target image size */
-	cfg = fimc_read(EXYNOS_CITRGFMT);
+	cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
 	cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK |
 		EXYNOS_CITRGFMT_TARGETV_MASK);
 	cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) |
 		EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h));
-	fimc_write(cfg, EXYNOS_CITRGFMT);
+	fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
 
 	/* target area */
 	cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h);
-	fimc_write(cfg, EXYNOS_CITAREA);
+	fimc_write(ctx, cfg, EXYNOS_CITAREA);
 
 	/* offset Y(RGB), Cb, Cr */
 	cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIOYOFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIOYOFF);
+	fimc_write(ctx, cfg, EXYNOS_CIOYOFF);
 	cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIOCBOFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIOCBOFF);
+	fimc_write(ctx, cfg, EXYNOS_CIOCBOFF);
 	cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) |
 		EXYNOS_CIOCROFF_VERTICAL(img_pos.y));
-	fimc_write(cfg, EXYNOS_CIOCROFF);
+	fimc_write(ctx, cfg, EXYNOS_CIOCROFF);
 
 	return 0;
 }
 
-static int fimc_dst_get_buf_seq(struct fimc_context *ctx)
+static int fimc_dst_get_buf_count(struct fimc_context *ctx)
 {
-	u32 cfg, i, buf_num = 0;
-	u32 mask = 0x00000001;
+	u32 cfg, buf_num;
 
-	cfg = fimc_read(EXYNOS_CIFCNTSEQ);
+	cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ);
 
-	for (i = 0; i < FIMC_REG_SZ; i++)
-		if (cfg & (mask << i))
-			buf_num++;
+	buf_num = hweight32(cfg);
 
 	DRM_DEBUG_KMS("buf_num[%d]\n", buf_num);
 
@@ -1181,13 +1143,14 @@
 	u32 cfg;
 	u32 mask = 0x00000001 << buf_id;
 	int ret = 0;
+	unsigned long flags;
 
 	DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
 
-	mutex_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	/* mask register set */
-	cfg = fimc_read(EXYNOS_CIFCNTSEQ);
+	cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ);
 
 	switch (buf_type) {
 	case IPP_BUF_ENQUEUE:
@@ -1205,20 +1168,20 @@
 	/* sequence id */
 	cfg &= ~mask;
 	cfg |= (enable << buf_id);
-	fimc_write(cfg, EXYNOS_CIFCNTSEQ);
+	fimc_write(ctx, cfg, EXYNOS_CIFCNTSEQ);
 
 	/* interrupt enable */
 	if (buf_type == IPP_BUF_ENQUEUE &&
-	    fimc_dst_get_buf_seq(ctx) >= FIMC_BUF_START)
-		fimc_handle_irq(ctx, true, false, true);
+	    fimc_dst_get_buf_count(ctx) >= FIMC_BUF_START)
+		fimc_mask_irq(ctx, true);
 
 	/* interrupt disable */
 	if (buf_type == IPP_BUF_DEQUEUE &&
-	    fimc_dst_get_buf_seq(ctx) <= FIMC_BUF_STOP)
-		fimc_handle_irq(ctx, false, false, true);
+	    fimc_dst_get_buf_count(ctx) <= FIMC_BUF_STOP)
+		fimc_mask_irq(ctx, false);
 
 err_unlock:
-	mutex_unlock(&ctx->lock);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 	return ret;
 }
 
@@ -1252,25 +1215,25 @@
 	case IPP_BUF_ENQUEUE:
 		config = &property->config[EXYNOS_DRM_OPS_DST];
 
-		fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+		fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
 			EXYNOS_CIOYSA(buf_id));
 
 		if (config->fmt == DRM_FORMAT_YVU420) {
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
 				EXYNOS_CIOCBSA(buf_id));
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
 				EXYNOS_CIOCRSA(buf_id));
 		} else {
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
 				EXYNOS_CIOCBSA(buf_id));
-			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
 				EXYNOS_CIOCRSA(buf_id));
 		}
 		break;
 	case IPP_BUF_DEQUEUE:
-		fimc_write(0x0, EXYNOS_CIOYSA(buf_id));
-		fimc_write(0x0, EXYNOS_CIOCBSA(buf_id));
-		fimc_write(0x0, EXYNOS_CIOCRSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id));
+		fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id));
 		break;
 	default:
 		/* bypass */
@@ -1342,11 +1305,7 @@
 
 static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
 {
-	struct drm_exynos_ipp_prop_list *prop_list;
-
-	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
-	if (!prop_list)
-		return -ENOMEM;
+	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
 
 	prop_list->version = 1;
 	prop_list->writeback = 1;
@@ -1371,8 +1330,6 @@
 	prop_list->scale_min.hsize = FIMC_SCALE_MIN;
 	prop_list->scale_min.vsize = FIMC_SCALE_MIN;
 
-	ippdrv->prop_list = prop_list;
-
 	return 0;
 }
 
@@ -1395,7 +1352,7 @@
 {
 	struct fimc_context *ctx = get_fimc_context(dev);
 	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_prop_list *pp = ippdrv->prop_list;
+	struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
 	struct drm_exynos_ipp_config *config;
 	struct drm_exynos_pos *pos;
 	struct drm_exynos_sz *sz;
@@ -1508,15 +1465,15 @@
 	int i;
 
 	for (i = 0; i < FIMC_MAX_SRC; i++) {
-		fimc_write(0, EXYNOS_CIIYSA(i));
-		fimc_write(0, EXYNOS_CIICBSA(i));
-		fimc_write(0, EXYNOS_CIICRSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIIYSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIICBSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIICRSA(i));
 	}
 
 	for (i = 0; i < FIMC_MAX_DST; i++) {
-		fimc_write(0, EXYNOS_CIOYSA(i));
-		fimc_write(0, EXYNOS_CIOCBSA(i));
-		fimc_write(0, EXYNOS_CIOCRSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIOYSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIOCBSA(i));
+		fimc_write(ctx, 0, EXYNOS_CIOCRSA(i));
 	}
 }
 
@@ -1556,7 +1513,7 @@
 
 	property = &c_node->property;
 
-	fimc_handle_irq(ctx, true, false, true);
+	fimc_mask_irq(ctx, true);
 
 	for_each_ipp_ops(i) {
 		config = &property->config[i];
@@ -1582,10 +1539,10 @@
 		fimc_handle_lastend(ctx, false);
 
 		/* setup dma */
-		cfg0 = fimc_read(EXYNOS_MSCTRL);
+		cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
 		cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
 		cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
-		fimc_write(cfg0, EXYNOS_MSCTRL);
+		fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
 		break;
 	case IPP_CMD_WB:
 		fimc_set_type_ctrl(ctx, FIMC_WB_A);
@@ -1610,41 +1567,33 @@
 	}
 
 	/* Reset status */
-	fimc_write(0x0, EXYNOS_CISTATUS);
+	fimc_write(ctx, 0x0, EXYNOS_CISTATUS);
 
-	cfg0 = fimc_read(EXYNOS_CIIMGCPT);
+	cfg0 = fimc_read(ctx, EXYNOS_CIIMGCPT);
 	cfg0 &= ~EXYNOS_CIIMGCPT_IMGCPTEN_SC;
 	cfg0 |= EXYNOS_CIIMGCPT_IMGCPTEN_SC;
 
 	/* Scaler */
-	cfg1 = fimc_read(EXYNOS_CISCCTRL);
+	cfg1 = fimc_read(ctx, EXYNOS_CISCCTRL);
 	cfg1 &= ~EXYNOS_CISCCTRL_SCAN_MASK;
 	cfg1 |= (EXYNOS_CISCCTRL_PROGRESSIVE |
 		EXYNOS_CISCCTRL_SCALERSTART);
 
-	fimc_write(cfg1, EXYNOS_CISCCTRL);
+	fimc_write(ctx, cfg1, EXYNOS_CISCCTRL);
 
 	/* Enable image capture*/
 	cfg0 |= EXYNOS_CIIMGCPT_IMGCPTEN;
-	fimc_write(cfg0, EXYNOS_CIIMGCPT);
+	fimc_write(ctx, cfg0, EXYNOS_CIIMGCPT);
 
 	/* Disable frame end irq */
-	cfg0 = fimc_read(EXYNOS_CIGCTRL);
-	cfg0 &= ~EXYNOS_CIGCTRL_IRQ_END_DISABLE;
-	fimc_write(cfg0, EXYNOS_CIGCTRL);
+	fimc_clear_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE);
 
-	cfg0 = fimc_read(EXYNOS_CIOCTRL);
-	cfg0 &= ~EXYNOS_CIOCTRL_WEAVE_MASK;
-	fimc_write(cfg0, EXYNOS_CIOCTRL);
+	fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK);
 
 	if (cmd == IPP_CMD_M2M) {
-		cfg0 = fimc_read(EXYNOS_MSCTRL);
-		cfg0 |= EXYNOS_MSCTRL_ENVID;
-		fimc_write(cfg0, EXYNOS_MSCTRL);
+		fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
 
-		cfg0 = fimc_read(EXYNOS_MSCTRL);
-		cfg0 |= EXYNOS_MSCTRL_ENVID;
-		fimc_write(cfg0, EXYNOS_MSCTRL);
+		fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
 	}
 
 	return 0;
@@ -1661,10 +1610,10 @@
 	switch (cmd) {
 	case IPP_CMD_M2M:
 		/* Source clear */
-		cfg = fimc_read(EXYNOS_MSCTRL);
+		cfg = fimc_read(ctx, EXYNOS_MSCTRL);
 		cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
 		cfg &= ~EXYNOS_MSCTRL_ENVID;
-		fimc_write(cfg, EXYNOS_MSCTRL);
+		fimc_write(ctx, cfg, EXYNOS_MSCTRL);
 		break;
 	case IPP_CMD_WB:
 		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
@@ -1675,25 +1624,20 @@
 		break;
 	}
 
-	fimc_handle_irq(ctx, false, false, true);
+	fimc_mask_irq(ctx, false);
 
 	/* reset sequence */
-	fimc_write(0x0, EXYNOS_CIFCNTSEQ);
+	fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ);
 
 	/* Scaler disable */
-	cfg = fimc_read(EXYNOS_CISCCTRL);
-	cfg &= ~EXYNOS_CISCCTRL_SCALERSTART;
-	fimc_write(cfg, EXYNOS_CISCCTRL);
+	fimc_clear_bits(ctx, EXYNOS_CISCCTRL, EXYNOS_CISCCTRL_SCALERSTART);
 
 	/* Disable image capture */
-	cfg = fimc_read(EXYNOS_CIIMGCPT);
-	cfg &= ~(EXYNOS_CIIMGCPT_IMGCPTEN_SC | EXYNOS_CIIMGCPT_IMGCPTEN);
-	fimc_write(cfg, EXYNOS_CIIMGCPT);
+	fimc_clear_bits(ctx, EXYNOS_CIIMGCPT,
+		EXYNOS_CIIMGCPT_IMGCPTEN_SC | EXYNOS_CIIMGCPT_IMGCPTEN);
 
 	/* Enable frame end irq */
-	cfg = fimc_read(EXYNOS_CIGCTRL);
-	cfg |= EXYNOS_CIGCTRL_IRQ_END_DISABLE;
-	fimc_write(cfg, EXYNOS_CIGCTRL);
+	fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE);
 }
 
 static void fimc_put_clocks(struct fimc_context *ctx)
@@ -1848,7 +1792,7 @@
 
 	DRM_DEBUG_KMS("id[%d]ippdrv[0x%x]\n", ctx->id, (int)ippdrv);
 
-	mutex_init(&ctx->lock);
+	spin_lock_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
 
 	pm_runtime_set_active(dev);
@@ -1879,7 +1823,6 @@
 	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 
 	exynos_drm_ippdrv_unregister(ippdrv);
-	mutex_destroy(&ctx->lock);
 
 	fimc_put_clocks(ctx);
 	pm_runtime_set_suspended(dev);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 40fd6cc..bb45ab2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c

@@ -19,6 +19,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/component.h>
 
 #include <video/of_display_timing.h>
 #include <video/of_videomode.h>
@@ -38,6 +39,7 @@
  */
 
 #define FIMD_DEFAULT_FRAMERATE 60
+#define MIN_FB_WIDTH_FOR_16WORD_BURST 128
 
 /* position control register for hardware window 0, 2 ~ 4.*/
 #define VIDOSD_A(win)		(VIDOSD_BASE + 0x00 + (win) * 16)
@@ -122,6 +124,7 @@
 
 	struct exynos_drm_panel_info panel;
 	struct fimd_driver_data *driver_data;
+	struct exynos_drm_display *display;
 };
 
 static const struct of_device_id fimd_driver_dt_match[] = {
@@ -143,13 +146,57 @@
 	return (struct fimd_driver_data *)of_id->data;
 }
 
-static int fimd_mgr_initialize(struct exynos_drm_manager *mgr,
-			struct drm_device *drm_dev, int pipe)
+static void fimd_wait_for_vblank(struct exynos_drm_manager *mgr)
 {
 	struct fimd_context *ctx = mgr->ctx;
 
-	ctx->drm_dev = drm_dev;
-	ctx->pipe = pipe;
+	if (ctx->suspended)
+		return;
+
+	atomic_set(&ctx->wait_vsync_event, 1);
+
+	/*
+	 * wait for FIMD to signal VSYNC interrupt or return after
+	 * timeout which is set to 50ms (refresh rate of 20).
+	 */
+	if (!wait_event_timeout(ctx->wait_vsync_queue,
+				!atomic_read(&ctx->wait_vsync_event),
+				HZ/20))
+		DRM_DEBUG_KMS("vblank wait timed out.\n");
+}
+
+
+static void fimd_clear_channel(struct exynos_drm_manager *mgr)
+{
+	struct fimd_context *ctx = mgr->ctx;
+	int win, ch_enabled = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* Check if any channel is enabled. */
+	for (win = 0; win < WINDOWS_NR; win++) {
+		u32 val = readl(ctx->regs + SHADOWCON);
+		if (val & SHADOWCON_CHx_ENABLE(win)) {
+			val &= ~SHADOWCON_CHx_ENABLE(win);
+			writel(val, ctx->regs + SHADOWCON);
+			ch_enabled = 1;
+		}
+	}
+
+	/* Wait for vsync, as disable channel takes effect at next vsync */
+	if (ch_enabled)
+		fimd_wait_for_vblank(mgr);
+}
+
+static int fimd_mgr_initialize(struct exynos_drm_manager *mgr,
+			struct drm_device *drm_dev)
+{
+	struct fimd_context *ctx = mgr->ctx;
+	struct exynos_drm_private *priv;
+	priv = drm_dev->dev_private;
+
+	mgr->drm_dev = ctx->drm_dev = drm_dev;
+	mgr->pipe = ctx->pipe = priv->pipe++;
 
 	/*
 	 * enable drm irq mode.
@@ -169,8 +216,14 @@
 	drm_dev->vblank_disable_allowed = true;
 
 	/* attach this sub driver to iommu mapping if supported. */
-	if (is_drm_iommu_supported(ctx->drm_dev))
+	if (is_drm_iommu_supported(ctx->drm_dev)) {
+		/*
+		 * If any channel is already active, iommu will throw
+		 * a PAGE FAULT when enabled. So clear any channel if enabled.
+		 */
+		fimd_clear_channel(mgr);
 		drm_iommu_attach_device(ctx->drm_dev, ctx->dev);
+	}
 
 	return 0;
 }
@@ -324,25 +377,6 @@
 	}
 }
 
-static void fimd_wait_for_vblank(struct exynos_drm_manager *mgr)
-{
-	struct fimd_context *ctx = mgr->ctx;
-
-	if (ctx->suspended)
-		return;
-
-	atomic_set(&ctx->wait_vsync_event, 1);
-
-	/*
-	 * wait for FIMD to signal VSYNC interrupt or return after
-	 * timeout which is set to 50ms (refresh rate of 20).
-	 */
-	if (!wait_event_timeout(ctx->wait_vsync_queue,
-				!atomic_read(&ctx->wait_vsync_event),
-				HZ/20))
-		DRM_DEBUG_KMS("vblank wait timed out.\n");
-}
-
 static void fimd_win_mode_set(struct exynos_drm_manager *mgr,
 			struct exynos_drm_overlay *overlay)
 {
@@ -446,6 +480,19 @@
 
 	DRM_DEBUG_KMS("bpp = %d\n", win_data->bpp);
 
+	/*
+	 * In case of exynos, setting dma-burst to 16Word causes permanent
+	 * tearing for very small buffers, e.g. cursor buffer. Burst Mode
+	 * switching which is based on overlay size is not recommended as
+	 * overlay size varies alot towards the end of the screen and rapid
+	 * movement causes unstable DMA which results into iommu crash/tear.
+	 */
+
+	if (win_data->fb_width < MIN_FB_WIDTH_FOR_16WORD_BURST) {
+		val &= ~WINCONx_BURSTLEN_MASK;
+		val |= WINCONx_BURSTLEN_4WORD;
+	}
+
 	writel(val, ctx->regs + WINCON(win));
 }
 
@@ -656,19 +703,6 @@
 	win_data->enabled = false;
 }
 
-static void fimd_clear_win(struct fimd_context *ctx, int win)
-{
-	writel(0, ctx->regs + WINCON(win));
-	writel(0, ctx->regs + VIDOSD_A(win));
-	writel(0, ctx->regs + VIDOSD_B(win));
-	writel(0, ctx->regs + VIDOSD_C(win));
-
-	if (win == 1 || win == 2)
-		writel(0, ctx->regs + VIDOSD_D(win));
-
-	fimd_shadow_protect_win(ctx, win, false);
-}
-
 static void fimd_window_suspend(struct exynos_drm_manager *mgr)
 {
 	struct fimd_context *ctx = mgr->ctx;
@@ -803,8 +837,6 @@
 }
 
 static struct exynos_drm_manager_ops fimd_manager_ops = {
-	.initialize = fimd_mgr_initialize,
-	.remove = fimd_mgr_remove,
 	.dpms = fimd_dpms,
 	.mode_fixup = fimd_mode_fixup,
 	.mode_set = fimd_mode_set,
@@ -849,20 +881,64 @@
 	return IRQ_HANDLED;
 }
 
+static int fimd_bind(struct device *dev, struct device *master, void *data)
+{
+	struct fimd_context *ctx = fimd_manager.ctx;
+	struct drm_device *drm_dev = data;
+
+	fimd_mgr_initialize(&fimd_manager, drm_dev);
+	exynos_drm_crtc_create(&fimd_manager);
+	if (ctx->display)
+		exynos_drm_create_enc_conn(drm_dev, ctx->display);
+
+	return 0;
+
+}
+
+static void fimd_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct exynos_drm_manager *mgr = dev_get_drvdata(dev);
+	struct fimd_context *ctx = fimd_manager.ctx;
+	struct drm_crtc *crtc = mgr->crtc;
+
+	fimd_dpms(mgr, DRM_MODE_DPMS_OFF);
+
+	if (ctx->display)
+		exynos_dpi_remove(dev);
+
+	fimd_mgr_remove(mgr);
+
+	crtc->funcs->destroy(crtc);
+}
+
+static const struct component_ops fimd_component_ops = {
+	.bind	= fimd_bind,
+	.unbind = fimd_unbind,
+};
+
 static int fimd_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fimd_context *ctx;
 	struct resource *res;
-	int win;
 	int ret = -EINVAL;
 
-	if (!dev->of_node)
-		return -ENODEV;
+	ret = exynos_drm_component_add(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC,
+					fimd_manager.type);
+	if (ret)
+		return ret;
+
+	if (!dev->of_node) {
+		ret = -ENODEV;
+		goto err_del_component;
+	}
 
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
+	if (!ctx) {
+		ret = -ENOMEM;
+		goto err_del_component;
+	}
 
 	ctx->dev = dev;
 	ctx->suspended = true;
@@ -875,32 +951,37 @@
 	ctx->bus_clk = devm_clk_get(dev, "fimd");
 	if (IS_ERR(ctx->bus_clk)) {
 		dev_err(dev, "failed to get bus clock\n");
-		return PTR_ERR(ctx->bus_clk);
+		ret = PTR_ERR(ctx->bus_clk);
+		goto err_del_component;
 	}
 
 	ctx->lcd_clk = devm_clk_get(dev, "sclk_fimd");
 	if (IS_ERR(ctx->lcd_clk)) {
 		dev_err(dev, "failed to get lcd clock\n");
-		return PTR_ERR(ctx->lcd_clk);
+		ret = PTR_ERR(ctx->lcd_clk);
+		goto err_del_component;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
 	ctx->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(ctx->regs))
-		return PTR_ERR(ctx->regs);
+	if (IS_ERR(ctx->regs)) {
+		ret = PTR_ERR(ctx->regs);
+		goto err_del_component;
+	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "vsync");
 	if (!res) {
 		dev_err(dev, "irq request failed.\n");
-		return -ENXIO;
+		ret = -ENXIO;
+		goto err_del_component;
 	}
 
 	ret = devm_request_irq(dev, res->start, fimd_irq_handler,
 							0, "drm_fimd", ctx);
 	if (ret) {
 		dev_err(dev, "irq request failed.\n");
-		return ret;
+		goto err_del_component;
 	}
 
 	ctx->driver_data = drm_fimd_get_driver_data(pdev);
@@ -910,30 +991,34 @@
 	platform_set_drvdata(pdev, &fimd_manager);
 
 	fimd_manager.ctx = ctx;
-	exynos_drm_manager_register(&fimd_manager);
 
-	exynos_dpi_probe(ctx->dev);
+	ctx->display = exynos_dpi_probe(dev);
+	if (IS_ERR(ctx->display))
+		return PTR_ERR(ctx->display);
 
-	pm_runtime_enable(dev);
+	pm_runtime_enable(&pdev->dev);
 
-	for (win = 0; win < WINDOWS_NR; win++)
-		fimd_clear_win(ctx, win);
+	ret = component_add(&pdev->dev, &fimd_component_ops);
+	if (ret)
+		goto err_disable_pm_runtime;
 
-	return 0;
+	return ret;
+
+err_disable_pm_runtime:
+	pm_runtime_disable(&pdev->dev);
+
+err_del_component:
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC);
+	return ret;
 }
 
 static int fimd_remove(struct platform_device *pdev)
 {
-	struct exynos_drm_manager *mgr = platform_get_drvdata(pdev);
-
-	exynos_dpi_remove(&pdev->dev);
-
-	exynos_drm_manager_unregister(&fimd_manager);
-
-	fimd_dpms(mgr, DRM_MODE_DPMS_OFF);
-
 	pm_runtime_disable(&pdev->dev);
 
+	component_del(&pdev->dev, &fimd_component_ops);
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC);
+
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 42d2904..163a054 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c

@@ -612,22 +612,20 @@
 	args->pitch = args->width * ((args->bpp + 7) / 8);
 	args->size = args->pitch * args->height;
 
-	exynos_gem_obj = exynos_drm_gem_create(dev, EXYNOS_BO_CONTIG |
-						EXYNOS_BO_WC, args->size);
-	/*
-	 * If physically contiguous memory allocation fails and if IOMMU is
-	 * supported then try to get buffer from non physically contiguous
-	 * memory area.
-	 */
-	if (IS_ERR(exynos_gem_obj) && is_drm_iommu_supported(dev)) {
-		dev_warn(dev->dev, "contiguous FB allocation failed, falling back to non-contiguous\n");
+	if (is_drm_iommu_supported(dev)) {
 		exynos_gem_obj = exynos_drm_gem_create(dev,
-					EXYNOS_BO_NONCONTIG | EXYNOS_BO_WC,
-					args->size);
+			EXYNOS_BO_NONCONTIG | EXYNOS_BO_WC,
+			args->size);
+	} else {
+		exynos_gem_obj = exynos_drm_gem_create(dev,
+			EXYNOS_BO_CONTIG | EXYNOS_BO_WC,
+			args->size);
 	}
 
-	if (IS_ERR(exynos_gem_obj))
+	if (IS_ERR(exynos_gem_obj)) {
+		dev_warn(dev->dev, "FB allocation failed.\n");
 		return PTR_ERR(exynos_gem_obj);
+	}
 
 	ret = exynos_drm_gem_handle_create(&exynos_gem_obj->base, file_priv,
 			&args->handle);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index fa75059..9e3ff16 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c

@@ -1335,11 +1335,7 @@
 
 static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
 {
-	struct drm_exynos_ipp_prop_list *prop_list;
-
-	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
-	if (!prop_list)
-		return -ENOMEM;
+	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
 
 	prop_list->version = 1;
 	prop_list->writeback = 1;
@@ -1363,8 +1359,6 @@
 	prop_list->scale_min.hsize = GSC_SCALE_MIN;
 	prop_list->scale_min.vsize = GSC_SCALE_MIN;
 
-	ippdrv->prop_list = prop_list;
-
 	return 0;
 }
 
@@ -1387,7 +1381,7 @@
 {
 	struct gsc_context *ctx = get_gsc_context(dev);
 	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_prop_list *pp = ippdrv->prop_list;
+	struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
 	struct drm_exynos_ipp_config *config;
 	struct drm_exynos_pos *pos;
 	struct drm_exynos_sz *sz;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 3d78144..a1888e1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c

@@ -167,6 +167,13 @@
 	return 0;
 }
 
+static void ipp_remove_id(struct idr *id_idr, struct mutex *lock, u32 id)
+{
+	mutex_lock(lock);
+	idr_remove(id_idr, id);
+	mutex_unlock(lock);
+}
+
 static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id)
 {
 	void *obj;
@@ -276,11 +283,6 @@
 
 	DRM_DEBUG_KMS("prop_id[%d]\n", prop_id);
 
-	if (list_empty(&exynos_drm_ippdrv_list)) {
-		DRM_DEBUG_KMS("ippdrv_list is empty.\n");
-		return ERR_PTR(-ENODEV);
-	}
-
 	/*
 	 * This case is search ipp driver by prop_id handle.
 	 * sometimes, ipp subsystem find driver by prop_id.
@@ -289,11 +291,14 @@
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
 		DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n", count++, (int)ippdrv);
 
-		if (!list_empty(&ippdrv->cmd_list)) {
-			list_for_each_entry(c_node, &ippdrv->cmd_list, list)
-				if (c_node->property.prop_id == prop_id)
-					return ippdrv;
+		mutex_lock(&ippdrv->cmd_lock);
+		list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
+			if (c_node->property.prop_id == prop_id) {
+				mutex_unlock(&ippdrv->cmd_lock);
+				return ippdrv;
+			}
 		}
+		mutex_unlock(&ippdrv->cmd_lock);
 	}
 
 	return ERR_PTR(-ENODEV);
@@ -325,6 +330,7 @@
 	if (!prop_list->ipp_id) {
 		list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list)
 			count++;
+
 		/*
 		 * Supports ippdrv list count for user application.
 		 * First step user application getting ippdrv count.
@@ -346,7 +352,7 @@
 			return PTR_ERR(ippdrv);
 		}
 
-		prop_list = ippdrv->prop_list;
+		*prop_list = ippdrv->prop_list;
 	}
 
 	return 0;
@@ -386,9 +392,11 @@
 	 * when we find this command no using prop_id.
 	 * return property information set in this command node.
 	 */
+	mutex_lock(&ippdrv->cmd_lock);
 	list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
 		if ((c_node->property.prop_id == prop_id) &&
 		    (c_node->state == IPP_STATE_STOP)) {
+			mutex_unlock(&ippdrv->cmd_lock);
 			DRM_DEBUG_KMS("found cmd[%d]ippdrv[0x%x]\n",
 				property->cmd, (int)ippdrv);
 
@@ -396,6 +404,7 @@
 			return 0;
 		}
 	}
+	mutex_unlock(&ippdrv->cmd_lock);
 
 	DRM_ERROR("failed to search property.\n");
 
@@ -499,7 +508,7 @@
 	c_node->start_work = ipp_create_cmd_work();
 	if (IS_ERR(c_node->start_work)) {
 		DRM_ERROR("failed to create start work.\n");
-		goto err_clear;
+		goto err_remove_id;
 	}
 
 	c_node->stop_work = ipp_create_cmd_work();
@@ -514,7 +523,7 @@
 		goto err_free_stop;
 	}
 
-	mutex_init(&c_node->cmd_lock);
+	mutex_init(&c_node->lock);
 	mutex_init(&c_node->mem_lock);
 	mutex_init(&c_node->event_lock);
 
@@ -526,7 +535,9 @@
 
 	INIT_LIST_HEAD(&c_node->event_list);
 	list_splice_init(&priv->event_list, &c_node->event_list);
+	mutex_lock(&ippdrv->cmd_lock);
 	list_add_tail(&c_node->list, &ippdrv->cmd_list);
+	mutex_unlock(&ippdrv->cmd_lock);
 
 	/* make dedicated state without m2m */
 	if (!ipp_is_m2m_cmd(property->cmd))
@@ -538,18 +549,24 @@
 	kfree(c_node->stop_work);
 err_free_start:
 	kfree(c_node->start_work);
+err_remove_id:
+	ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, property->prop_id);
 err_clear:
 	kfree(c_node);
 	return ret;
 }
 
-static void ipp_clean_cmd_node(struct drm_exynos_ipp_cmd_node *c_node)
+static void ipp_clean_cmd_node(struct ipp_context *ctx,
+				struct drm_exynos_ipp_cmd_node *c_node)
 {
 	/* delete list */
 	list_del(&c_node->list);
 
+	ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock,
+			c_node->property.prop_id);
+
 	/* destroy mutex */
-	mutex_destroy(&c_node->cmd_lock);
+	mutex_destroy(&c_node->lock);
 	mutex_destroy(&c_node->mem_lock);
 	mutex_destroy(&c_node->event_lock);
 
@@ -567,17 +584,10 @@
 	struct list_head *head;
 	int ret, i, count[EXYNOS_DRM_OPS_MAX] = { 0, };
 
-	mutex_lock(&c_node->mem_lock);
-
 	for_each_ipp_ops(i) {
 		/* source/destination memory list */
 		head = &c_node->mem_list[i];
 
-		if (list_empty(head)) {
-			DRM_DEBUG_KMS("%s memory empty.\n", i ? "dst" : "src");
-			continue;
-		}
-
 		/* find memory node entry */
 		list_for_each_entry(m_node, head, list) {
 			DRM_DEBUG_KMS("%s,count[%d]m_node[0x%x]\n",
@@ -602,8 +612,6 @@
 		ret = max(count[EXYNOS_DRM_OPS_SRC],
 			count[EXYNOS_DRM_OPS_DST]);
 
-	mutex_unlock(&c_node->mem_lock);
-
 	return ret;
 }
 
@@ -646,16 +654,13 @@
 		return -EFAULT;
 	}
 
-	mutex_lock(&c_node->mem_lock);
-
 	DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
 
 	/* get operations callback */
 	ops = ippdrv->ops[m_node->ops_id];
 	if (!ops) {
 		DRM_ERROR("not support ops.\n");
-		ret = -EFAULT;
-		goto err_unlock;
+		return -EFAULT;
 	}
 
 	/* set address and enable irq */
@@ -664,12 +669,10 @@
 			m_node->buf_id, IPP_BUF_ENQUEUE);
 		if (ret) {
 			DRM_ERROR("failed to set addr.\n");
-			goto err_unlock;
+			return ret;
 		}
 	}
 
-err_unlock:
-	mutex_unlock(&c_node->mem_lock);
 	return ret;
 }
 
@@ -684,11 +687,9 @@
 	void *addr;
 	int i;
 
-	mutex_lock(&c_node->mem_lock);
-
 	m_node = kzalloc(sizeof(*m_node), GFP_KERNEL);
 	if (!m_node)
-		goto err_unlock;
+		return ERR_PTR(-ENOMEM);
 
 	/* clear base address for error handling */
 	memset(&buf_info, 0x0, sizeof(buf_info));
@@ -722,15 +723,14 @@
 
 	m_node->filp = file;
 	m_node->buf_info = buf_info;
+	mutex_lock(&c_node->mem_lock);
 	list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]);
-
 	mutex_unlock(&c_node->mem_lock);
+
 	return m_node;
 
 err_clear:
 	kfree(m_node);
-err_unlock:
-	mutex_unlock(&c_node->mem_lock);
 	return ERR_PTR(-EFAULT);
 }
 
@@ -747,13 +747,6 @@
 		return -EFAULT;
 	}
 
-	if (list_empty(&m_node->list)) {
-		DRM_ERROR("empty memory node.\n");
-		return -ENOMEM;
-	}
-
-	mutex_lock(&c_node->mem_lock);
-
 	DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
 
 	/* put gem buffer */
@@ -768,8 +761,6 @@
 	list_del(&m_node->list);
 	kfree(m_node);
 
-	mutex_unlock(&c_node->mem_lock);
-
 	return 0;
 }
 
@@ -805,7 +796,9 @@
 	e->base.event = &e->event.base;
 	e->base.file_priv = file;
 	e->base.destroy = ipp_free_event;
+	mutex_lock(&c_node->event_lock);
 	list_add_tail(&e->base.link, &c_node->event_list);
+	mutex_unlock(&c_node->event_lock);
 
 	return 0;
 }
@@ -816,11 +809,7 @@
 	struct drm_exynos_ipp_send_event *e, *te;
 	int count = 0;
 
-	if (list_empty(&c_node->event_list)) {
-		DRM_DEBUG_KMS("event_list is empty.\n");
-		return;
-	}
-
+	mutex_lock(&c_node->event_lock);
 	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
 		DRM_DEBUG_KMS("count[%d]e[0x%x]\n", count++, (int)e);
 
@@ -841,9 +830,13 @@
 			/* delete list */
 			list_del(&e->base.link);
 			kfree(e);
-			return;
+			goto out_unlock;
 		}
 	}
+
+out_unlock:
+	mutex_unlock(&c_node->event_lock);
+	return;
 }
 
 static void ipp_handle_cmd_work(struct device *dev,
@@ -887,7 +880,9 @@
 		return 0;
 	}
 
+	mutex_lock(&c_node->mem_lock);
 	if (!ipp_check_mem_list(c_node)) {
+		mutex_unlock(&c_node->mem_lock);
 		DRM_DEBUG_KMS("empty memory.\n");
 		return 0;
 	}
@@ -904,10 +899,12 @@
 	} else {
 		ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 		if (ret) {
+			mutex_unlock(&c_node->mem_lock);
 			DRM_ERROR("failed to set m node.\n");
 			return ret;
 		}
 	}
+	mutex_unlock(&c_node->mem_lock);
 
 	return 0;
 }
@@ -918,15 +915,15 @@
 {
 	struct drm_exynos_ipp_mem_node *m_node, *tm_node;
 
-	if (!list_empty(&c_node->mem_list[qbuf->ops_id])) {
-		/* delete list */
-		list_for_each_entry_safe(m_node, tm_node,
-			&c_node->mem_list[qbuf->ops_id], list) {
-			if (m_node->buf_id == qbuf->buf_id &&
-			    m_node->ops_id == qbuf->ops_id)
-				ipp_put_mem_node(drm_dev, c_node, m_node);
-		}
+	/* delete list */
+	mutex_lock(&c_node->mem_lock);
+	list_for_each_entry_safe(m_node, tm_node,
+		&c_node->mem_list[qbuf->ops_id], list) {
+		if (m_node->buf_id == qbuf->buf_id &&
+		    m_node->ops_id == qbuf->ops_id)
+			ipp_put_mem_node(drm_dev, c_node, m_node);
 	}
+	mutex_unlock(&c_node->mem_lock);
 }
 
 int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
@@ -998,7 +995,7 @@
 		}
 		break;
 	case IPP_BUF_DEQUEUE:
-		mutex_lock(&c_node->cmd_lock);
+		mutex_lock(&c_node->lock);
 
 		/* put event for destination buffer */
 		if (qbuf->ops_id == EXYNOS_DRM_OPS_DST)
@@ -1006,7 +1003,7 @@
 
 		ipp_clean_queue_buf(drm_dev, c_node, qbuf);
 
-		mutex_unlock(&c_node->cmd_lock);
+		mutex_unlock(&c_node->lock);
 		break;
 	default:
 		DRM_ERROR("invalid buffer control.\n");
@@ -1109,12 +1106,12 @@
 	case IPP_CTRL_PLAY:
 		if (pm_runtime_suspended(ippdrv->dev))
 			pm_runtime_get_sync(ippdrv->dev);
+
 		c_node->state = IPP_STATE_START;
 
 		cmd_work = c_node->start_work;
 		cmd_work->ctrl = cmd_ctrl->ctrl;
 		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-		c_node->state = IPP_STATE_START;
 		break;
 	case IPP_CTRL_STOP:
 		cmd_work = c_node->stop_work;
@@ -1129,10 +1126,12 @@
 
 		c_node->state = IPP_STATE_STOP;
 		ippdrv->dedicated = false;
-		ipp_clean_cmd_node(c_node);
+		mutex_lock(&ippdrv->cmd_lock);
+		ipp_clean_cmd_node(ctx, c_node);
 
 		if (list_empty(&ippdrv->cmd_list))
 			pm_runtime_put_sync(ippdrv->dev);
+		mutex_unlock(&ippdrv->cmd_lock);
 		break;
 	case IPP_CTRL_PAUSE:
 		cmd_work = c_node->stop_work;
@@ -1260,9 +1259,11 @@
 	/* store command info in ippdrv */
 	ippdrv->c_node = c_node;
 
+	mutex_lock(&c_node->mem_lock);
 	if (!ipp_check_mem_list(c_node)) {
 		DRM_DEBUG_KMS("empty memory.\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_unlock;
 	}
 
 	/* set current property in ippdrv */
@@ -1270,7 +1271,7 @@
 	if (ret) {
 		DRM_ERROR("failed to set property.\n");
 		ippdrv->c_node = NULL;
-		return ret;
+		goto err_unlock;
 	}
 
 	/* check command */
@@ -1285,7 +1286,7 @@
 			if (!m_node) {
 				DRM_ERROR("failed to get node.\n");
 				ret = -EFAULT;
-				return ret;
+				goto err_unlock;
 			}
 
 			DRM_DEBUG_KMS("m_node[0x%x]\n", (int)m_node);
@@ -1293,7 +1294,7 @@
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
 				DRM_ERROR("failed to set m node.\n");
-				return ret;
+				goto err_unlock;
 			}
 		}
 		break;
@@ -1305,7 +1306,7 @@
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
 				DRM_ERROR("failed to set m node.\n");
-				return ret;
+				goto err_unlock;
 			}
 		}
 		break;
@@ -1317,14 +1318,16 @@
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
 				DRM_ERROR("failed to set m node.\n");
-				return ret;
+				goto err_unlock;
 			}
 		}
 		break;
 	default:
 		DRM_ERROR("invalid operations.\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_unlock;
 	}
+	mutex_unlock(&c_node->mem_lock);
 
 	DRM_DEBUG_KMS("cmd[%d]\n", property->cmd);
 
@@ -1333,11 +1336,17 @@
 		ret = ippdrv->start(ippdrv->dev, property->cmd);
 		if (ret) {
 			DRM_ERROR("failed to start ops.\n");
+			ippdrv->c_node = NULL;
 			return ret;
 		}
 	}
 
 	return 0;
+
+err_unlock:
+	mutex_unlock(&c_node->mem_lock);
+	ippdrv->c_node = NULL;
+	return ret;
 }
 
 static int ipp_stop_property(struct drm_device *drm_dev,
@@ -1354,6 +1363,8 @@
 	/* put event */
 	ipp_put_event(c_node, NULL);
 
+	mutex_lock(&c_node->mem_lock);
+
 	/* check command */
 	switch (property->cmd) {
 	case IPP_CMD_M2M:
@@ -1361,11 +1372,6 @@
 			/* source/destination memory list */
 			head = &c_node->mem_list[i];
 
-			if (list_empty(head)) {
-				DRM_DEBUG_KMS("mem_list is empty.\n");
-				break;
-			}
-
 			list_for_each_entry_safe(m_node, tm_node,
 				head, list) {
 				ret = ipp_put_mem_node(drm_dev, c_node,
@@ -1381,11 +1387,6 @@
 		/* destination memory list */
 		head = &c_node->mem_list[EXYNOS_DRM_OPS_DST];
 
-		if (list_empty(head)) {
-			DRM_DEBUG_KMS("mem_list is empty.\n");
-			break;
-		}
-
 		list_for_each_entry_safe(m_node, tm_node, head, list) {
 			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
 			if (ret) {
@@ -1398,11 +1399,6 @@
 		/* source memory list */
 		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
 
-		if (list_empty(head)) {
-			DRM_DEBUG_KMS("mem_list is empty.\n");
-			break;
-		}
-
 		list_for_each_entry_safe(m_node, tm_node, head, list) {
 			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
 			if (ret) {
@@ -1418,6 +1414,8 @@
 	}
 
 err_clear:
+	mutex_unlock(&c_node->mem_lock);
+
 	/* stop operations */
 	if (ippdrv->stop)
 		ippdrv->stop(ippdrv->dev, property->cmd);
@@ -1446,7 +1444,7 @@
 		return;
 	}
 
-	mutex_lock(&c_node->cmd_lock);
+	mutex_lock(&c_node->lock);
 
 	property = &c_node->property;
 
@@ -1494,7 +1492,7 @@
 	DRM_DEBUG_KMS("ctrl[%d] done.\n", cmd_work->ctrl);
 
 err_unlock:
-	mutex_unlock(&c_node->cmd_lock);
+	mutex_unlock(&c_node->lock);
 }
 
 static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
@@ -1524,14 +1522,18 @@
 		return -EINVAL;
 	}
 
+	mutex_lock(&c_node->event_lock);
 	if (list_empty(&c_node->event_list)) {
 		DRM_DEBUG_KMS("event list is empty.\n");
-		return 0;
+		ret = 0;
+		goto err_event_unlock;
 	}
 
+	mutex_lock(&c_node->mem_lock);
 	if (!ipp_check_mem_list(c_node)) {
 		DRM_DEBUG_KMS("empty memory.\n");
-		return 0;
+		ret = 0;
+		goto err_mem_unlock;
 	}
 
 	/* check command */
@@ -1545,7 +1547,8 @@
 				struct drm_exynos_ipp_mem_node, list);
 			if (!m_node) {
 				DRM_ERROR("empty memory node.\n");
-				return -ENOMEM;
+				ret = -ENOMEM;
+				goto err_mem_unlock;
 			}
 
 			tbuf_id[i] = m_node->buf_id;
@@ -1567,7 +1570,8 @@
 		m_node = ipp_find_mem_node(c_node, &qbuf);
 		if (!m_node) {
 			DRM_ERROR("empty memory node.\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto err_mem_unlock;
 		}
 
 		tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id;
@@ -1584,7 +1588,8 @@
 			struct drm_exynos_ipp_mem_node, list);
 		if (!m_node) {
 			DRM_ERROR("empty memory node.\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto err_mem_unlock;
 		}
 
 		tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id;
@@ -1595,8 +1600,10 @@
 		break;
 	default:
 		DRM_ERROR("invalid operations.\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_mem_unlock;
 	}
+	mutex_unlock(&c_node->mem_lock);
 
 	if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST])
 		DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n",
@@ -1611,11 +1618,6 @@
 	e = list_first_entry(&c_node->event_list,
 		struct drm_exynos_ipp_send_event, base.link);
 
-	if (!e) {
-		DRM_ERROR("empty event.\n");
-		return -EINVAL;
-	}
-
 	do_gettimeofday(&now);
 	DRM_DEBUG_KMS("tv_sec[%ld]tv_usec[%ld]\n", now.tv_sec, now.tv_usec);
 	e->event.tv_sec = now.tv_sec;
@@ -1630,11 +1632,18 @@
 	list_move_tail(&e->base.link, &e->base.file_priv->event_list);
 	wake_up_interruptible(&e->base.file_priv->event_wait);
 	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+	mutex_unlock(&c_node->event_lock);
 
 	DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n",
 		property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]);
 
 	return 0;
+
+err_mem_unlock:
+	mutex_unlock(&c_node->mem_lock);
+err_event_unlock:
+	mutex_unlock(&c_node->event_lock);
+	return ret;
 }
 
 void ipp_sched_event(struct work_struct *work)
@@ -1676,8 +1685,6 @@
 		goto err_completion;
 	}
 
-	mutex_lock(&c_node->event_lock);
-
 	ret = ipp_send_event(ippdrv, c_node, event_work->buf_id);
 	if (ret) {
 		DRM_ERROR("failed to send event.\n");
@@ -1687,8 +1694,6 @@
 err_completion:
 	if (ipp_is_m2m_cmd(c_node->property.cmd))
 		complete(&c_node->start_complete);
-
-	mutex_unlock(&c_node->event_lock);
 }
 
 static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
@@ -1699,23 +1704,21 @@
 
 	/* get ipp driver entry */
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+		u32 ipp_id;
+
 		ippdrv->drm_dev = drm_dev;
 
 		ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv,
-			&ippdrv->ipp_id);
-		if (ret) {
+				    &ipp_id);
+		if (ret || ipp_id == 0) {
 			DRM_ERROR("failed to create id.\n");
-			goto err_idr;
+			goto err;
 		}
 
 		DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]ipp_id[%d]\n",
-			count++, (int)ippdrv, ippdrv->ipp_id);
+			count++, (int)ippdrv, ipp_id);
 
-		if (ippdrv->ipp_id == 0) {
-			DRM_ERROR("failed to get ipp_id[%d]\n",
-				ippdrv->ipp_id);
-			goto err_idr;
-		}
+		ippdrv->prop_list.ipp_id = ipp_id;
 
 		/* store parent device for node */
 		ippdrv->parent_dev = dev;
@@ -1724,39 +1727,46 @@
 		ippdrv->event_workq = ctx->event_workq;
 		ippdrv->sched_event = ipp_sched_event;
 		INIT_LIST_HEAD(&ippdrv->cmd_list);
+		mutex_init(&ippdrv->cmd_lock);
 
 		if (is_drm_iommu_supported(drm_dev)) {
 			ret = drm_iommu_attach_device(drm_dev, ippdrv->dev);
 			if (ret) {
 				DRM_ERROR("failed to activate iommu\n");
-				goto err_iommu;
+				goto err;
 			}
 		}
 	}
 
 	return 0;
 
-err_iommu:
+err:
 	/* get ipp driver entry */
-	list_for_each_entry_reverse(ippdrv, &exynos_drm_ippdrv_list, drv_list)
+	list_for_each_entry_continue_reverse(ippdrv, &exynos_drm_ippdrv_list,
+						drv_list) {
 		if (is_drm_iommu_supported(drm_dev))
 			drm_iommu_detach_device(drm_dev, ippdrv->dev);
 
-err_idr:
-	idr_destroy(&ctx->ipp_idr);
-	idr_destroy(&ctx->prop_idr);
+		ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
+				ippdrv->prop_list.ipp_id);
+	}
+
 	return ret;
 }
 
 static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
 {
 	struct exynos_drm_ippdrv *ippdrv;
+	struct ipp_context *ctx = get_ipp_context(dev);
 
 	/* get ipp driver entry */
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
 		if (is_drm_iommu_supported(drm_dev))
 			drm_iommu_detach_device(drm_dev, ippdrv->dev);
 
+		ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
+				ippdrv->prop_list.ipp_id);
+
 		ippdrv->drm_dev = NULL;
 		exynos_drm_ippdrv_unregister(ippdrv);
 	}
@@ -1787,20 +1797,14 @@
 	struct drm_exynos_file_private *file_priv = file->driver_priv;
 	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
 	struct exynos_drm_ippdrv *ippdrv = NULL;
+	struct ipp_context *ctx = get_ipp_context(dev);
 	struct drm_exynos_ipp_cmd_node *c_node, *tc_node;
 	int count = 0;
 
 	DRM_DEBUG_KMS("for priv[0x%x]\n", (int)priv);
 
-	if (list_empty(&exynos_drm_ippdrv_list)) {
-		DRM_DEBUG_KMS("ippdrv_list is empty.\n");
-		goto err_clear;
-	}
-
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		if (list_empty(&ippdrv->cmd_list))
-			continue;
-
+		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry_safe(c_node, tc_node,
 			&ippdrv->cmd_list, list) {
 			DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n",
@@ -1820,14 +1824,14 @@
 				}
 
 				ippdrv->dedicated = false;
-				ipp_clean_cmd_node(c_node);
+				ipp_clean_cmd_node(ctx, c_node);
 				if (list_empty(&ippdrv->cmd_list))
 					pm_runtime_put_sync(ippdrv->dev);
 			}
 		}
+		mutex_unlock(&ippdrv->cmd_lock);
 	}
 
-err_clear:
 	kfree(priv);
 	return;
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
index ab1634b..7aaeaae 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h

@@ -52,7 +52,7 @@
  * @list: list head to command queue information.
  * @event_list: list head of event.
  * @mem_list: list head to source,destination memory queue information.
- * @cmd_lock: lock for synchronization of access to ioctl.
+ * @lock: lock for synchronization of access to ioctl.
  * @mem_lock: lock for synchronization of access to memory nodes.
  * @event_lock: lock for synchronization of access to scheduled event.
  * @start_complete: completion of start of command.
@@ -68,7 +68,7 @@
 	struct list_head	list;
 	struct list_head	event_list;
 	struct list_head	mem_list[EXYNOS_DRM_OPS_MAX];
-	struct mutex	cmd_lock;
+	struct mutex	lock;
 	struct mutex	mem_lock;
 	struct mutex	event_lock;
 	struct completion	start_complete;
@@ -83,7 +83,7 @@
 /*
  * A structure of buffer information.
  *
- * @gem_objs: Y, Cb, Cr each gem object.
+ * @handles: Y, Cb, Cr each gem object handle.
  * @base: Y, Cb, Cr each planar address.
  */
 struct drm_exynos_ipp_buf_info {
@@ -142,12 +142,12 @@
  * @parent_dev: parent device information.
  * @dev: platform device.
  * @drm_dev: drm device.
- * @ipp_id: id of ipp driver.
  * @dedicated: dedicated ipp device.
  * @ops: source, destination operations.
  * @event_workq: event work queue.
  * @c_node: current command information.
  * @cmd_list: list head for command information.
+ * @cmd_lock: lock for synchronization of access to cmd_list.
  * @prop_list: property informations of current ipp driver.
  * @check_property: check property about format, size, buffer.
  * @reset: reset ipp block.
@@ -160,13 +160,13 @@
 	struct device	*parent_dev;
 	struct device	*dev;
 	struct drm_device	*drm_dev;
-	u32	ipp_id;
 	bool	dedicated;
 	struct exynos_drm_ipp_ops	*ops[EXYNOS_DRM_OPS_MAX];
 	struct workqueue_struct	*event_workq;
 	struct drm_exynos_ipp_cmd_node *c_node;
 	struct list_head	cmd_list;
-	struct drm_exynos_ipp_prop_list *prop_list;
+	struct mutex	cmd_lock;
+	struct drm_exynos_ipp_prop_list prop_list;
 
 	int (*check_property)(struct device *dev,
 		struct drm_exynos_ipp_property *property);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 7b90168..f01fbb6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c

@@ -158,8 +158,9 @@
 			rot->cur_buf_id[EXYNOS_DRM_OPS_DST];
 		queue_work(ippdrv->event_workq,
 			(struct work_struct *)event_work);
-	} else
+	} else {
 		DRM_ERROR("the SFR is set illegally\n");
+	}
 
 	return IRQ_HANDLED;
 }
@@ -469,11 +470,7 @@
 
 static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
 {
-	struct drm_exynos_ipp_prop_list *prop_list;
-
-	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
-	if (!prop_list)
-		return -ENOMEM;
+	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
 
 	prop_list->version = 1;
 	prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
@@ -486,8 +483,6 @@
 	prop_list->crop = 0;
 	prop_list->scale = 0;
 
-	ippdrv->prop_list = prop_list;
-
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 852f2da..2fb8705 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c

@@ -51,6 +51,7 @@
 	struct drm_crtc			*crtc;
 	struct drm_encoder		*encoder;
 	struct drm_connector		connector;
+	struct exynos_drm_subdrv	subdrv;
 	struct vidi_win_data		win_data[WINDOWS_NR];
 	struct edid			*raw_edid;
 	unsigned int			clkdiv;
@@ -294,14 +295,13 @@
 }
 
 static int vidi_mgr_initialize(struct exynos_drm_manager *mgr,
-			struct drm_device *drm_dev, int pipe)
+			struct drm_device *drm_dev)
 {
 	struct vidi_context *ctx = mgr->ctx;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	DRM_ERROR("vidi initialize ct=%p dev=%p pipe=%d\n", ctx, drm_dev, pipe);
-
-	ctx->drm_dev = drm_dev;
-	ctx->pipe = pipe;
+	mgr->drm_dev = ctx->drm_dev = drm_dev;
+	mgr->pipe = ctx->pipe = priv->pipe++;
 
 	/*
 	 * enable drm irq mode.
@@ -324,7 +324,6 @@
 }
 
 static struct exynos_drm_manager_ops vidi_manager_ops = {
-	.initialize = vidi_mgr_initialize,
 	.dpms = vidi_dpms,
 	.commit = vidi_commit,
 	.enable_vblank = vidi_enable_vblank,
@@ -533,12 +532,6 @@
 	return drm_add_edid_modes(connector, edid);
 }
 
-static int vidi_mode_valid(struct drm_connector *connector,
-			struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector)
 {
 	struct vidi_context *ctx = ctx_from_connector(connector);
@@ -548,7 +541,6 @@
 
 static struct drm_connector_helper_funcs vidi_connector_helper_funcs = {
 	.get_modes = vidi_get_modes,
-	.mode_valid = vidi_mode_valid,
 	.best_encoder = vidi_best_encoder,
 };
 
@@ -586,13 +578,38 @@
 	.ops = &vidi_display_ops,
 };
 
+static int vidi_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	struct exynos_drm_manager *mgr = get_vidi_mgr(dev);
+	struct vidi_context *ctx = mgr->ctx;
+	struct drm_crtc *crtc = ctx->crtc;
+	int ret;
+
+	vidi_mgr_initialize(mgr, drm_dev);
+
+	ret = exynos_drm_crtc_create(&vidi_manager);
+	if (ret) {
+		DRM_ERROR("failed to create crtc.\n");
+		return ret;
+	}
+
+	ret = exynos_drm_create_enc_conn(drm_dev, &vidi_display);
+	if (ret) {
+		crtc->funcs->destroy(crtc);
+		DRM_ERROR("failed to create encoder and connector.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static int vidi_probe(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
+	struct exynos_drm_subdrv *subdrv;
 	struct vidi_context *ctx;
 	int ret;
 
-	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
@@ -607,28 +624,43 @@
 
 	platform_set_drvdata(pdev, &vidi_manager);
 
-	ret = device_create_file(dev, &dev_attr_connection);
-	if (ret < 0)
-		DRM_INFO("failed to create connection sysfs.\n");
+	subdrv = &ctx->subdrv;
+	subdrv->dev = &pdev->dev;
+	subdrv->probe = vidi_subdrv_probe;
 
-	exynos_drm_manager_register(&vidi_manager);
-	exynos_drm_display_register(&vidi_display);
+	ret = exynos_drm_subdrv_register(subdrv);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to register drm vidi device\n");
+		return ret;
+	}
+
+	ret = device_create_file(&pdev->dev, &dev_attr_connection);
+	if (ret < 0) {
+		exynos_drm_subdrv_unregister(subdrv);
+		DRM_INFO("failed to create connection sysfs.\n");
+	}
 
 	return 0;
 }
 
 static int vidi_remove(struct platform_device *pdev)
 {
-	struct vidi_context *ctx = platform_get_drvdata(pdev);
-
-	exynos_drm_display_unregister(&vidi_display);
-	exynos_drm_manager_unregister(&vidi_manager);
+	struct exynos_drm_manager *mgr = platform_get_drvdata(pdev);
+	struct vidi_context *ctx = mgr->ctx;
+	struct drm_encoder *encoder = ctx->encoder;
+	struct drm_crtc *crtc = mgr->crtc;
 
 	if (ctx->raw_edid != (struct edid *)fake_edid_info) {
 		kfree(ctx->raw_edid);
 		ctx->raw_edid = NULL;
+
+		return -EINVAL;
 	}
 
+	crtc->funcs->destroy(crtc);
+	encoder->funcs->destroy(encoder);
+	drm_connector_cleanup(&ctx->connector);
+
 	return 0;
 }
 
@@ -640,3 +672,31 @@
 		.owner	= THIS_MODULE,
 	},
 };
+
+int exynos_drm_probe_vidi(void)
+{
+	struct platform_device *pdev;
+	int ret;
+
+	pdev = platform_device_register_simple("exynos-drm-vidi", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	ret = platform_driver_register(&vidi_driver);
+	if (ret) {
+		platform_device_unregister(pdev);
+		return ret;
+	}
+
+	return ret;
+}
+
+void exynos_drm_remove_vidi(void)
+{
+	struct vidi_context *ctx = vidi_manager.ctx;
+	struct exynos_drm_subdrv *subdrv = &ctx->subdrv;
+	struct platform_device *pdev = to_platform_device(subdrv->dev);
+
+	platform_driver_unregister(&vidi_driver);
+	platform_device_unregister(pdev);
+}

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 9a6d652..c104d0c 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c

@@ -33,13 +33,17 @@
 #include <linux/regulator/consumer.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/i2c.h>
+#include <linux/of_address.h>
 #include <linux/of_gpio.h>
 #include <linux/hdmi.h>
+#include <linux/component.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include <drm/exynos_drm.h>
 
 #include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
 #include "exynos_mixer.h"
 
 #include <linux/gpio.h>
@@ -48,6 +52,8 @@
 #define get_hdmi_display(dev)	platform_get_drvdata(to_platform_device(dev))
 #define ctx_from_connector(c)	container_of(c, struct hdmi_context, connector)
 
+#define HOTPLUG_DEBOUNCE_MS		1100
+
 /* AVI header and aspect ratio */
 #define HDMI_AVI_VERSION		0x02
 #define HDMI_AVI_LENGTH		0x0D
@@ -66,6 +72,8 @@
 
 struct hdmi_driver_data {
 	unsigned int type;
+	const struct hdmiphy_config *phy_confs;
+	unsigned int phy_conf_count;
 	unsigned int is_apb_phy:1;
 };
 
@@ -74,7 +82,6 @@
 	struct clk			*sclk_hdmi;
 	struct clk			*sclk_pixel;
 	struct clk			*sclk_hdmiphy;
-	struct clk			*hdmiphy;
 	struct clk			*mout_hdmi;
 	struct regulator_bulk_data	*regul_bulk;
 	int				regul_count;
@@ -185,17 +192,23 @@
 
 	void __iomem			*regs;
 	int				irq;
+	struct delayed_work		hotplug_work;
 
 	struct i2c_adapter		*ddc_adpt;
 	struct i2c_client		*hdmiphy_port;
 
 	/* current hdmiphy conf regs */
+	struct drm_display_mode		current_mode;
 	struct hdmi_conf_regs		mode_conf;
 
 	struct hdmi_resources		res;
 
 	int				hpd_gpio;
+	void __iomem			*regs_hdmiphy;
+	const struct hdmiphy_config		*phy_confs;
+	unsigned int			phy_conf_count;
 
+	struct regmap			*pmureg;
 	enum hdmi_type			type;
 };
 
@@ -204,14 +217,6 @@
 	u8 conf[32];
 };
 
-struct hdmi_driver_data exynos4212_hdmi_driver_data = {
-	.type	= HDMI_TYPE14,
-};
-
-struct hdmi_driver_data exynos5_hdmi_driver_data = {
-	.type	= HDMI_TYPE14,
-};
-
 /* list of phy config settings */
 static const struct hdmiphy_config hdmiphy_v13_configs[] = {
 	{
@@ -319,18 +324,18 @@
 	{
 		.pixel_clock = 71000000,
 		.conf = {
-			0x01, 0x91, 0x1e, 0x15, 0x40, 0x3c, 0xce, 0x08,
-			0x04, 0x20, 0xb2, 0xd8, 0x45, 0xa0, 0xac, 0x80,
-			0x06, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
+			0x01, 0xd1, 0x3b, 0x35, 0x40, 0x0c, 0x04, 0x08,
+			0x85, 0xa0, 0x63, 0xd9, 0x45, 0xa0, 0xac, 0x80,
+			0x08, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
 			0x54, 0xad, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
 		},
 	},
 	{
 		.pixel_clock = 73250000,
 		.conf = {
-			0x01, 0xd1, 0x1f, 0x15, 0x40, 0x18, 0xe9, 0x08,
-			0x02, 0xa0, 0xb7, 0xd8, 0x45, 0xa0, 0xac, 0x80,
-			0x06, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
+			0x01, 0xd1, 0x3d, 0x35, 0x40, 0x18, 0x02, 0x08,
+			0x83, 0xa0, 0x6e, 0xd9, 0x45, 0xa0, 0xac, 0x80,
+			0x08, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
 			0x54, 0xa8, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
 		},
 	},
@@ -362,15 +367,6 @@
 		},
 	},
 	{
-		.pixel_clock = 88750000,
-		.conf = {
-			0x01, 0x91, 0x25, 0x17, 0x40, 0x30, 0xfe, 0x08,
-			0x06, 0x20, 0xde, 0xd8, 0x45, 0xa0, 0xac, 0x80,
-			0x06, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
-			0x54, 0x8a, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
-		},
-	},
-	{
 		.pixel_clock = 106500000,
 		.conf = {
 			0x01, 0xd1, 0x2c, 0x12, 0x40, 0x0c, 0x09, 0x08,
@@ -391,18 +387,18 @@
 	{
 		.pixel_clock = 115500000,
 		.conf = {
-			0x01, 0xd1, 0x30, 0x1a, 0x40, 0x40, 0x10, 0x04,
-			0x04, 0xa0, 0x21, 0xd9, 0x45, 0xa0, 0xac, 0x80,
-			0x06, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
+			0x01, 0xd1, 0x30, 0x12, 0x40, 0x40, 0x10, 0x08,
+			0x80, 0x80, 0x21, 0xd9, 0x45, 0xa0, 0xac, 0x80,
+			0x08, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
 			0x54, 0xaa, 0x25, 0x03, 0x00, 0x00, 0x01, 0x80,
 		},
 	},
 	{
 		.pixel_clock = 119000000,
 		.conf = {
-			0x01, 0x91, 0x32, 0x14, 0x40, 0x60, 0xd8, 0x08,
-			0x06, 0x20, 0x2a, 0xd9, 0x45, 0xa0, 0xac, 0x80,
-			0x06, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
+			0x01, 0xd1, 0x32, 0x1a, 0x40, 0x30, 0xd8, 0x08,
+			0x04, 0xa0, 0x2a, 0xd9, 0x45, 0xa0, 0xac, 0x80,
+			0x08, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86,
 			0x54, 0x9d, 0x25, 0x03, 0x00, 0x00, 0x01, 0x80,
 		},
 	},
@@ -426,6 +422,183 @@
 	},
 };
 
+static const struct hdmiphy_config hdmiphy_5420_configs[] = {
+	{
+		.pixel_clock = 25200000,
+		.conf = {
+			0x01, 0x52, 0x3F, 0x55, 0x40, 0x01, 0x00, 0xC8,
+			0x82, 0xC8, 0xBD, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x06, 0x80, 0x01, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xF4, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 27000000,
+		.conf = {
+			0x01, 0xD1, 0x22, 0x51, 0x40, 0x08, 0xFC, 0xE0,
+			0x98, 0xE8, 0xCB, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x06, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xE4, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 27027000,
+		.conf = {
+			0x01, 0xD1, 0x2D, 0x72, 0x40, 0x64, 0x12, 0xC8,
+			0x43, 0xE8, 0x0E, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x26, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xE3, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 36000000,
+		.conf = {
+			0x01, 0x51, 0x2D, 0x55, 0x40, 0x40, 0x00, 0xC8,
+			0x02, 0xC8, 0x0E, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xAB, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 40000000,
+		.conf = {
+			0x01, 0xD1, 0x21, 0x31, 0x40, 0x3C, 0x28, 0xC8,
+			0x87, 0xE8, 0xC8, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x9A, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 65000000,
+		.conf = {
+			0x01, 0xD1, 0x36, 0x34, 0x40, 0x0C, 0x04, 0xC8,
+			0x82, 0xE8, 0x45, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xBD, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 71000000,
+		.conf = {
+			0x01, 0xD1, 0x3B, 0x35, 0x40, 0x0C, 0x04, 0xC8,
+			0x85, 0xE8, 0x63, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x57, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 73250000,
+		.conf = {
+			0x01, 0xD1, 0x1F, 0x10, 0x40, 0x78, 0x8D, 0xC8,
+			0x81, 0xE8, 0xB7, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x56, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xA8, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 74176000,
+		.conf = {
+			0x01, 0xD1, 0x1F, 0x10, 0x40, 0x5B, 0xEF, 0xC8,
+			0x81, 0xE8, 0xB9, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x56, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xA6, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 74250000,
+		.conf = {
+			0x01, 0xD1, 0x1F, 0x10, 0x40, 0x40, 0xF8, 0x08,
+			0x81, 0xE8, 0xBA, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x26, 0x80, 0x09, 0x84, 0x05, 0x22, 0x24, 0x66,
+			0x54, 0xA5, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 83500000,
+		.conf = {
+			0x01, 0xD1, 0x23, 0x11, 0x40, 0x0C, 0xFB, 0xC8,
+			0x85, 0xE8, 0xD1, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x4A, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 88750000,
+		.conf = {
+			0x01, 0xD1, 0x25, 0x11, 0x40, 0x18, 0xFF, 0xC8,
+			0x83, 0xE8, 0xDE, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x45, 0x24, 0x00, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 106500000,
+		.conf = {
+			0x01, 0xD1, 0x2C, 0x12, 0x40, 0x0C, 0x09, 0xC8,
+			0x84, 0xE8, 0x0A, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x73, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 108000000,
+		.conf = {
+			0x01, 0x51, 0x2D, 0x15, 0x40, 0x01, 0x00, 0xC8,
+			0x82, 0xC8, 0x0E, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0xC7, 0x25, 0x03, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 115500000,
+		.conf = {
+			0x01, 0xD1, 0x30, 0x14, 0x40, 0x0C, 0x03, 0xC8,
+			0x88, 0xE8, 0x21, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x6A, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 146250000,
+		.conf = {
+			0x01, 0xD1, 0x3D, 0x15, 0x40, 0x18, 0xFD, 0xC8,
+			0x83, 0xE8, 0x6E, 0xD9, 0x45, 0xA0, 0xAC, 0x80,
+			0x08, 0x80, 0x09, 0x84, 0x05, 0x02, 0x24, 0x66,
+			0x54, 0x54, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80,
+		},
+	},
+	{
+		.pixel_clock = 148500000,
+		.conf = {
+			0x01, 0xD1, 0x1F, 0x00, 0x40, 0x40, 0xF8, 0x08,
+			0x81, 0xE8, 0xBA, 0xD8, 0x45, 0xA0, 0xAC, 0x80,
+			0x26, 0x80, 0x09, 0x84, 0x05, 0x22, 0x24, 0x66,
+			0x54, 0x4B, 0x25, 0x03, 0x00, 0x80, 0x01, 0x80,
+		},
+	},
+};
+
+static struct hdmi_driver_data exynos5420_hdmi_driver_data = {
+	.type		= HDMI_TYPE14,
+	.phy_confs	= hdmiphy_5420_configs,
+	.phy_conf_count	= ARRAY_SIZE(hdmiphy_5420_configs),
+	.is_apb_phy	= 1,
+};
+
+static struct hdmi_driver_data exynos4212_hdmi_driver_data = {
+	.type		= HDMI_TYPE14,
+	.phy_confs	= hdmiphy_v14_configs,
+	.phy_conf_count	= ARRAY_SIZE(hdmiphy_v14_configs),
+	.is_apb_phy	= 0,
+};
+
+static struct hdmi_driver_data exynos5_hdmi_driver_data = {
+	.type		= HDMI_TYPE14,
+	.phy_confs	= hdmiphy_v13_configs,
+	.phy_conf_count	= ARRAY_SIZE(hdmiphy_v13_configs),
+	.is_apb_phy	= 0,
+};
+
 static inline u32 hdmi_reg_read(struct hdmi_context *hdata, u32 reg_id)
 {
 	return readl(hdata->regs + reg_id);
@@ -445,6 +618,48 @@
 	writel(value, hdata->regs + reg_id);
 }
 
+static int hdmiphy_reg_writeb(struct hdmi_context *hdata,
+			u32 reg_offset, u8 value)
+{
+	if (hdata->hdmiphy_port) {
+		u8 buffer[2];
+		int ret;
+
+		buffer[0] = reg_offset;
+		buffer[1] = value;
+
+		ret = i2c_master_send(hdata->hdmiphy_port, buffer, 2);
+		if (ret == 2)
+			return 0;
+		return ret;
+	} else {
+		writeb(value, hdata->regs_hdmiphy + (reg_offset<<2));
+		return 0;
+	}
+}
+
+static int hdmiphy_reg_write_buf(struct hdmi_context *hdata,
+			u32 reg_offset, const u8 *buf, u32 len)
+{
+	if ((reg_offset + len) > 32)
+		return -EINVAL;
+
+	if (hdata->hdmiphy_port) {
+		int ret;
+
+		ret = i2c_master_send(hdata->hdmiphy_port, buf, len);
+		if (ret == len)
+			return 0;
+		return ret;
+	} else {
+		int i;
+		for (i = 0; i < len; i++)
+			writeb(buf[i], hdata->regs_hdmiphy +
+				((reg_offset + i)<<2));
+		return 0;
+	}
+}
+
 static void hdmi_v13_regs_dump(struct hdmi_context *hdata, char *prefix)
 {
 #define DUMPREG(reg_id) \
@@ -809,6 +1024,8 @@
 {
 	struct hdmi_context *hdata = ctx_from_connector(connector);
 
+	hdata->hpd = gpio_get_value(hdata->hpd_gpio);
+
 	return hdata->hpd ? connector_status_connected :
 			connector_status_disconnected;
 }
@@ -848,20 +1065,10 @@
 
 static int hdmi_find_phy_conf(struct hdmi_context *hdata, u32 pixel_clock)
 {
-	const struct hdmiphy_config *confs;
-	int count, i;
+	int i;
 
-	if (hdata->type == HDMI_TYPE13) {
-		confs = hdmiphy_v13_configs;
-		count = ARRAY_SIZE(hdmiphy_v13_configs);
-	} else if (hdata->type == HDMI_TYPE14) {
-		confs = hdmiphy_v14_configs;
-		count = ARRAY_SIZE(hdmiphy_v14_configs);
-	} else
-		return -EINVAL;
-
-	for (i = 0; i < count; i++)
-		if (confs[i].pixel_clock == pixel_clock)
+	for (i = 0; i < hdata->phy_conf_count; i++)
+		if (hdata->phy_confs[i].pixel_clock == pixel_clock)
 			return i;
 
 	DRM_DEBUG_KMS("Could not find phy config for %d\n", pixel_clock);
@@ -928,16 +1135,6 @@
 	return 0;
 }
 
-static int hdmi_initialize(struct exynos_drm_display *display,
-			struct drm_device *drm_dev)
-{
-	struct hdmi_context *hdata = display->ctx;
-
-	hdata->drm_dev = drm_dev;
-
-	return 0;
-}
-
 static void hdmi_mode_fixup(struct exynos_drm_display *display,
 				struct drm_connector *connector,
 				const struct drm_display_mode *mode,
@@ -1136,20 +1333,15 @@
 			HDMI_ASP_EN : HDMI_ASP_DIS, HDMI_ASP_MASK);
 }
 
-static void hdmi_conf_reset(struct hdmi_context *hdata)
+static void hdmi_start(struct hdmi_context *hdata, bool start)
 {
-	u32 reg;
+	u32 val = start ? HDMI_TG_EN : 0;
 
-	if (hdata->type == HDMI_TYPE13)
-		reg = HDMI_V13_CORE_RSTOUT;
-	else
-		reg = HDMI_CORE_RSTOUT;
+	if (hdata->current_mode.flags & DRM_MODE_FLAG_INTERLACE)
+		val |= HDMI_FIELD_EN;
 
-	/* resetting HDMI core */
-	hdmi_reg_writemask(hdata, reg,  0, HDMI_CORE_SW_RSTOUT);
-	usleep_range(10000, 12000);
-	hdmi_reg_writemask(hdata, reg, ~0, HDMI_CORE_SW_RSTOUT);
-	usleep_range(10000, 12000);
+	hdmi_reg_writemask(hdata, HDMI_CON_0, val, HDMI_EN);
+	hdmi_reg_writemask(hdata, HDMI_TG_CMD, val, HDMI_TG_EN | HDMI_FIELD_EN);
 }
 
 static void hdmi_conf_init(struct hdmi_context *hdata)
@@ -1163,6 +1355,8 @@
 	/* choose HDMI mode */
 	hdmi_reg_writemask(hdata, HDMI_MODE_SEL,
 		HDMI_MODE_HDMI_EN, HDMI_MODE_MASK);
+	/* Apply Video preable and Guard band in HDMI mode only */
+	hdmi_reg_writeb(hdata, HDMI_CON_2, 0);
 	/* disable bluescreen */
 	hdmi_reg_writemask(hdata, HDMI_CON_0, 0, HDMI_BLUE_SCR_EN);
 
@@ -1286,12 +1480,7 @@
 	clk_prepare_enable(hdata->res.sclk_hdmi);
 
 	/* enable HDMI and timing generator */
-	hdmi_reg_writemask(hdata, HDMI_CON_0, ~0, HDMI_EN);
-	if (core->int_pro_mode[0])
-		hdmi_reg_writemask(hdata, HDMI_TG_CMD, ~0, HDMI_TG_EN |
-				HDMI_FIELD_EN);
-	else
-		hdmi_reg_writemask(hdata, HDMI_TG_CMD, ~0, HDMI_TG_EN);
+	hdmi_start(hdata, true);
 }
 
 static void hdmi_v14_mode_apply(struct hdmi_context *hdata)
@@ -1453,12 +1642,7 @@
 	clk_prepare_enable(hdata->res.sclk_hdmi);
 
 	/* enable HDMI and timing generator */
-	hdmi_reg_writemask(hdata, HDMI_CON_0, ~0, HDMI_EN);
-	if (core->int_pro_mode[0])
-		hdmi_reg_writemask(hdata, HDMI_TG_CMD, ~0, HDMI_TG_EN |
-				HDMI_FIELD_EN);
-	else
-		hdmi_reg_writemask(hdata, HDMI_TG_CMD, ~0, HDMI_TG_EN);
+	hdmi_start(hdata, true);
 }
 
 static void hdmi_mode_apply(struct hdmi_context *hdata)
@@ -1499,32 +1683,51 @@
 
 static void hdmiphy_poweron(struct hdmi_context *hdata)
 {
-	if (hdata->type == HDMI_TYPE14)
-		hdmi_reg_writemask(hdata, HDMI_PHY_CON_0, 0,
-			HDMI_PHY_POWER_OFF_EN);
+	if (hdata->type != HDMI_TYPE14)
+		return;
+
+	DRM_DEBUG_KMS("\n");
+
+	/* For PHY Mode Setting */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_ENABLE_MODE_SET);
+	/* Phy Power On */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_POWER,
+				HDMI_PHY_POWER_ON);
+	/* For PHY Mode Setting */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_DISABLE_MODE_SET);
+	/* PHY SW Reset */
+	hdmiphy_conf_reset(hdata);
 }
 
 static void hdmiphy_poweroff(struct hdmi_context *hdata)
 {
-	if (hdata->type == HDMI_TYPE14)
-		hdmi_reg_writemask(hdata, HDMI_PHY_CON_0, ~0,
-			HDMI_PHY_POWER_OFF_EN);
+	if (hdata->type != HDMI_TYPE14)
+		return;
+
+	DRM_DEBUG_KMS("\n");
+
+	/* PHY SW Reset */
+	hdmiphy_conf_reset(hdata);
+	/* For PHY Mode Setting */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_ENABLE_MODE_SET);
+
+	/* PHY Power Off */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_POWER,
+				HDMI_PHY_POWER_OFF);
+
+	/* For PHY Mode Setting */
+	hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_DISABLE_MODE_SET);
 }
 
 static void hdmiphy_conf_apply(struct hdmi_context *hdata)
 {
-	const u8 *hdmiphy_data;
-	u8 buffer[32];
-	u8 operation[2];
-	u8 read_buffer[32] = {0, };
 	int ret;
 	int i;
 
-	if (!hdata->hdmiphy_port) {
-		DRM_ERROR("hdmiphy is not attached\n");
-		return;
-	}
-
 	/* pixel clock */
 	i = hdmi_find_phy_conf(hdata, hdata->mode_conf.pixel_clock);
 	if (i < 0) {
@@ -1532,39 +1735,21 @@
 		return;
 	}
 
-	if (hdata->type == HDMI_TYPE13)
-		hdmiphy_data = hdmiphy_v13_configs[i].conf;
-	else
-		hdmiphy_data = hdmiphy_v14_configs[i].conf;
-
-	memcpy(buffer, hdmiphy_data, 32);
-	ret = i2c_master_send(hdata->hdmiphy_port, buffer, 32);
-	if (ret != 32) {
-		DRM_ERROR("failed to configure HDMIPHY via I2C\n");
+	ret = hdmiphy_reg_write_buf(hdata, 0, hdata->phy_confs[i].conf, 32);
+	if (ret) {
+		DRM_ERROR("failed to configure hdmiphy\n");
 		return;
 	}
 
 	usleep_range(10000, 12000);
 
-	/* operation mode */
-	operation[0] = 0x1f;
-	operation[1] = 0x80;
-
-	ret = i2c_master_send(hdata->hdmiphy_port, operation, 2);
-	if (ret != 2) {
+	ret = hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_DISABLE_MODE_SET);
+	if (ret) {
 		DRM_ERROR("failed to enable hdmiphy\n");
 		return;
 	}
 
-	ret = i2c_master_recv(hdata->hdmiphy_port, read_buffer, 32);
-	if (ret < 0) {
-		DRM_ERROR("failed to read hdmiphy config\n");
-		return;
-	}
-
-	for (i = 0; i < ret; i++)
-		DRM_DEBUG_KMS("hdmiphy[0x%02x] write[0x%02x] - "
-			"recv [0x%02x]\n", i, buffer[i], read_buffer[i]);
 }
 
 static void hdmi_conf_apply(struct hdmi_context *hdata)
@@ -1573,7 +1758,7 @@
 	hdmiphy_conf_apply(hdata);
 
 	mutex_lock(&hdata->hdmi_mutex);
-	hdmi_conf_reset(hdata);
+	hdmi_start(hdata, false);
 	hdmi_conf_init(hdata);
 	mutex_unlock(&hdata->hdmi_mutex);
 
@@ -1814,6 +1999,9 @@
 		m->vrefresh, (m->flags & DRM_MODE_FLAG_INTERLACE) ?
 		"INTERLACED" : "PROGERESSIVE");
 
+	/* preserve mode information for later use. */
+	drm_mode_copy(&hdata->current_mode, mode);
+
 	if (hdata->type == HDMI_TYPE13)
 		hdmi_v13_mode_set(hdata, mode);
 	else
@@ -1854,7 +2042,10 @@
 	if (regulator_bulk_enable(res->regul_count, res->regul_bulk))
 		DRM_DEBUG_KMS("failed to enable regulator bulk\n");
 
-	clk_prepare_enable(res->hdmiphy);
+	/* set pmu hdmiphy control bit to enable hdmiphy */
+	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
+			PMU_HDMI_PHY_ENABLE_BIT, 1);
+
 	clk_prepare_enable(res->hdmi);
 	clk_prepare_enable(res->sclk_hdmi);
 
@@ -1872,16 +2063,20 @@
 		goto out;
 	mutex_unlock(&hdata->hdmi_mutex);
 
-	/*
-	 * The TV power domain needs any condition of hdmiphy to turn off and
-	 * its reset state seems to meet the condition.
-	 */
-	hdmiphy_conf_reset(hdata);
+	/* HDMI System Disable */
+	hdmi_reg_writemask(hdata, HDMI_CON_0, 0, HDMI_EN);
+
 	hdmiphy_poweroff(hdata);
 
+	cancel_delayed_work(&hdata->hotplug_work);
+
 	clk_disable_unprepare(res->sclk_hdmi);
 	clk_disable_unprepare(res->hdmi);
-	clk_disable_unprepare(res->hdmiphy);
+
+	/* reset pmu hdmiphy control bit to disable hdmiphy */
+	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
+			PMU_HDMI_PHY_ENABLE_BIT, 0);
+
 	regulator_bulk_disable(res->regul_count, res->regul_bulk);
 
 	pm_runtime_put_sync(hdata->dev);
@@ -1913,7 +2108,6 @@
 }
 
 static struct exynos_drm_display_ops hdmi_display_ops = {
-	.initialize	= hdmi_initialize,
 	.create_connector = hdmi_create_connector,
 	.mode_fixup	= hdmi_mode_fixup,
 	.mode_set	= hdmi_mode_set,
@@ -1926,9 +2120,11 @@
 	.ops = &hdmi_display_ops,
 };
 
-static irqreturn_t hdmi_irq_thread(int irq, void *arg)
+static void hdmi_hotplug_work_func(struct work_struct *work)
 {
-	struct hdmi_context *hdata = arg;
+	struct hdmi_context *hdata;
+
+	hdata = container_of(work, struct hdmi_context, hotplug_work.work);
 
 	mutex_lock(&hdata->hdmi_mutex);
 	hdata->hpd = gpio_get_value(hdata->hpd_gpio);
@@ -1936,6 +2132,14 @@
 
 	if (hdata->drm_dev)
 		drm_helper_hpd_irq_event(hdata->drm_dev);
+}
+
+static irqreturn_t hdmi_irq_thread(int irq, void *arg)
+{
+	struct hdmi_context *hdata = arg;
+
+	mod_delayed_work(system_wq, &hdata->hotplug_work,
+			msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS));
 
 	return IRQ_HANDLED;
 }
@@ -1954,37 +2158,35 @@
 
 	DRM_DEBUG_KMS("HDMI resource init\n");
 
-	memset(res, 0, sizeof(*res));
-
 	/* get clocks, power */
 	res->hdmi = devm_clk_get(dev, "hdmi");
 	if (IS_ERR(res->hdmi)) {
 		DRM_ERROR("failed to get clock 'hdmi'\n");
+		ret = PTR_ERR(res->hdmi);
 		goto fail;
 	}
 	res->sclk_hdmi = devm_clk_get(dev, "sclk_hdmi");
 	if (IS_ERR(res->sclk_hdmi)) {
 		DRM_ERROR("failed to get clock 'sclk_hdmi'\n");
+		ret = PTR_ERR(res->sclk_hdmi);
 		goto fail;
 	}
 	res->sclk_pixel = devm_clk_get(dev, "sclk_pixel");
 	if (IS_ERR(res->sclk_pixel)) {
 		DRM_ERROR("failed to get clock 'sclk_pixel'\n");
+		ret = PTR_ERR(res->sclk_pixel);
 		goto fail;
 	}
 	res->sclk_hdmiphy = devm_clk_get(dev, "sclk_hdmiphy");
 	if (IS_ERR(res->sclk_hdmiphy)) {
 		DRM_ERROR("failed to get clock 'sclk_hdmiphy'\n");
-		goto fail;
-	}
-	res->hdmiphy = devm_clk_get(dev, "hdmiphy");
-	if (IS_ERR(res->hdmiphy)) {
-		DRM_ERROR("failed to get clock 'hdmiphy'\n");
+		ret = PTR_ERR(res->sclk_hdmiphy);
 		goto fail;
 	}
 	res->mout_hdmi = devm_clk_get(dev, "mout_hdmi");
 	if (IS_ERR(res->mout_hdmi)) {
 		DRM_ERROR("failed to get clock 'mout_hdmi'\n");
+		ret = PTR_ERR(res->mout_hdmi);
 		goto fail;
 	}
 
@@ -1992,8 +2194,10 @@
 
 	res->regul_bulk = devm_kzalloc(dev, ARRAY_SIZE(supply) *
 		sizeof(res->regul_bulk[0]), GFP_KERNEL);
-	if (!res->regul_bulk)
+	if (!res->regul_bulk) {
+		ret = -ENOMEM;
 		goto fail;
+	}
 	for (i = 0; i < ARRAY_SIZE(supply); ++i) {
 		res->regul_bulk[i].supply = supply[i];
 		res->regul_bulk[i].consumer = NULL;
@@ -2001,14 +2205,14 @@
 	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(supply), res->regul_bulk);
 	if (ret) {
 		DRM_ERROR("failed to get regulators\n");
-		goto fail;
+		return ret;
 	}
 	res->regul_count = ARRAY_SIZE(supply);
 
-	return 0;
+	return ret;
 fail:
 	DRM_ERROR("HDMI resource init - failed\n");
-	return -ENODEV;
+	return ret;
 }
 
 static struct s5p_hdmi_platform_data *drm_hdmi_dt_parse_pdata
@@ -2043,42 +2247,105 @@
 		.compatible = "samsung,exynos4212-hdmi",
 		.data = &exynos4212_hdmi_driver_data,
 	}, {
+		.compatible = "samsung,exynos5420-hdmi",
+		.data = &exynos5420_hdmi_driver_data,
+	}, {
 		/* end node */
 	}
 };
 
+static int hdmi_bind(struct device *dev, struct device *master, void *data)
+{
+	struct drm_device *drm_dev = data;
+	struct hdmi_context *hdata;
+
+	hdata = hdmi_display.ctx;
+	hdata->drm_dev = drm_dev;
+
+	return exynos_drm_create_enc_conn(drm_dev, &hdmi_display);
+}
+
+static void hdmi_unbind(struct device *dev, struct device *master, void *data)
+{
+	struct exynos_drm_display *display = get_hdmi_display(dev);
+	struct drm_encoder *encoder = display->encoder;
+	struct hdmi_context *hdata = display->ctx;
+
+	encoder->funcs->destroy(encoder);
+	drm_connector_cleanup(&hdata->connector);
+}
+
+static const struct component_ops hdmi_component_ops = {
+	.bind	= hdmi_bind,
+	.unbind = hdmi_unbind,
+};
+
+static struct device_node *hdmi_legacy_ddc_dt_binding(struct device *dev)
+{
+	const char *compatible_str = "samsung,exynos4210-hdmiddc";
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, compatible_str);
+	if (np)
+		return of_get_next_parent(np);
+
+	return NULL;
+}
+
+static struct device_node *hdmi_legacy_phy_dt_binding(struct device *dev)
+{
+	const char *compatible_str = "samsung,exynos4212-hdmiphy";
+
+	return of_find_compatible_node(NULL, NULL, compatible_str);
+}
+
 static int hdmi_probe(struct platform_device *pdev)
 {
+	struct device_node *ddc_node, *phy_node;
+	struct s5p_hdmi_platform_data *pdata;
+	struct hdmi_driver_data *drv_data;
+	const struct of_device_id *match;
 	struct device *dev = &pdev->dev;
 	struct hdmi_context *hdata;
-	struct s5p_hdmi_platform_data *pdata;
 	struct resource *res;
-	const struct of_device_id *match;
-	struct device_node *ddc_node, *phy_node;
-	struct hdmi_driver_data *drv_data;
 	int ret;
 
-	 if (!dev->of_node)
-		return -ENODEV;
+	ret = exynos_drm_component_add(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR,
+					hdmi_display.type);
+	if (ret)
+		return ret;
+
+	if (!dev->of_node) {
+		ret = -ENODEV;
+		goto err_del_component;
+	}
 
 	pdata = drm_hdmi_dt_parse_pdata(dev);
-	if (!pdata)
-		return -EINVAL;
+	if (!pdata) {
+		ret = -EINVAL;
+		goto err_del_component;
+	}
 
 	hdata = devm_kzalloc(dev, sizeof(struct hdmi_context), GFP_KERNEL);
-	if (!hdata)
-		return -ENOMEM;
+	if (!hdata) {
+		ret = -ENOMEM;
+		goto err_del_component;
+	}
 
 	mutex_init(&hdata->hdmi_mutex);
 
 	platform_set_drvdata(pdev, &hdmi_display);
 
 	match = of_match_node(hdmi_match_types, dev->of_node);
-	if (!match)
-		return -ENODEV;
+	if (!match) {
+		ret = -ENODEV;
+		goto err_del_component;
+	}
 
 	drv_data = (struct hdmi_driver_data *)match->data;
 	hdata->type = drv_data->type;
+	hdata->phy_confs = drv_data->phy_confs;
+	hdata->phy_conf_count = drv_data->phy_conf_count;
 
 	hdata->hpd_gpio = pdata->hpd_gpio;
 	hdata->dev = dev;
@@ -2086,35 +2353,44 @@
 	ret = hdmi_resources_init(hdata);
 	if (ret) {
 		DRM_ERROR("hdmi_resources_init failed\n");
-		return -EINVAL;
+		return ret;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	hdata->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(hdata->regs))
-		return PTR_ERR(hdata->regs);
+	if (IS_ERR(hdata->regs)) {
+		ret = PTR_ERR(hdata->regs);
+		goto err_del_component;
+	}
 
 	ret = devm_gpio_request(dev, hdata->hpd_gpio, "HPD");
 	if (ret) {
 		DRM_ERROR("failed to request HPD gpio\n");
-		return ret;
+		goto err_del_component;
 	}
 
+	ddc_node = hdmi_legacy_ddc_dt_binding(dev);
+	if (ddc_node)
+		goto out_get_ddc_adpt;
+
 	/* DDC i2c driver */
 	ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
 	if (!ddc_node) {
 		DRM_ERROR("Failed to find ddc node in device tree\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_del_component;
 	}
+
+out_get_ddc_adpt:
 	hdata->ddc_adpt = of_find_i2c_adapter_by_node(ddc_node);
 	if (!hdata->ddc_adpt) {
 		DRM_ERROR("Failed to get ddc i2c adapter by node\n");
-		return -ENODEV;
+		return -EPROBE_DEFER;
 	}
 
-	/* Not support APB PHY yet. */
-	if (drv_data->is_apb_phy)
-		return -EPERM;
+	phy_node = hdmi_legacy_phy_dt_binding(dev);
+	if (phy_node)
+		goto out_get_phy_port;
 
 	/* hdmiphy i2c driver */
 	phy_node = of_parse_phandle(dev->of_node, "phy", 0);
@@ -2123,11 +2399,22 @@
 		ret = -ENODEV;
 		goto err_ddc;
 	}
-	hdata->hdmiphy_port = of_find_i2c_device_by_node(phy_node);
-	if (!hdata->hdmiphy_port) {
-		DRM_ERROR("Failed to get hdmi phy i2c client from node\n");
-		ret = -ENODEV;
-		goto err_ddc;
+
+out_get_phy_port:
+	if (drv_data->is_apb_phy) {
+		hdata->regs_hdmiphy = of_iomap(phy_node, 0);
+		if (!hdata->regs_hdmiphy) {
+			DRM_ERROR("failed to ioremap hdmi phy\n");
+			ret = -ENOMEM;
+			goto err_ddc;
+		}
+	} else {
+		hdata->hdmiphy_port = of_find_i2c_device_by_node(phy_node);
+		if (!hdata->hdmiphy_port) {
+			DRM_ERROR("Failed to get hdmi phy i2c client\n");
+			ret = -EPROBE_DEFER;
+			goto err_ddc;
+		}
 	}
 
 	hdata->irq = gpio_to_irq(hdata->hpd_gpio);
@@ -2139,6 +2426,8 @@
 
 	hdata->hpd = gpio_get_value(hdata->hpd_gpio);
 
+	INIT_DELAYED_WORK(&hdata->hotplug_work, hdmi_hotplug_work_func);
+
 	ret = devm_request_threaded_irq(dev, hdata->irq, NULL,
 			hdmi_irq_thread, IRQF_TRIGGER_RISING |
 			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
@@ -2148,30 +2437,51 @@
 		goto err_hdmiphy;
 	}
 
+	hdata->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node,
+			"samsung,syscon-phandle");
+	if (IS_ERR(hdata->pmureg)) {
+		DRM_ERROR("syscon regmap lookup failed.\n");
+		ret = -EPROBE_DEFER;
+		goto err_hdmiphy;
+	}
+
 	pm_runtime_enable(dev);
-
 	hdmi_display.ctx = hdata;
-	exynos_drm_display_register(&hdmi_display);
 
-	return 0;
+	ret = component_add(&pdev->dev, &hdmi_component_ops);
+	if (ret)
+		goto err_disable_pm_runtime;
+
+	return ret;
+
+err_disable_pm_runtime:
+	pm_runtime_disable(dev);
 
 err_hdmiphy:
-	put_device(&hdata->hdmiphy_port->dev);
+	if (hdata->hdmiphy_port)
+		put_device(&hdata->hdmiphy_port->dev);
 err_ddc:
 	put_device(&hdata->ddc_adpt->dev);
+
+err_del_component:
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
+
 	return ret;
 }
 
 static int hdmi_remove(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	struct exynos_drm_display *display = get_hdmi_display(dev);
-	struct hdmi_context *hdata = display->ctx;
+	struct hdmi_context *hdata = hdmi_display.ctx;
+
+	cancel_delayed_work_sync(&hdata->hotplug_work);
 
 	put_device(&hdata->hdmiphy_port->dev);
 	put_device(&hdata->ddc_adpt->dev);
-	pm_runtime_disable(&pdev->dev);
 
+	pm_runtime_disable(&pdev->dev);
+	component_del(&pdev->dev, &hdmi_component_ops);
+
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CONNECTOR);
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.h b/drivers/gpu/drm/exynos/exynos_hdmi.h
deleted file mode 100644
index 0ddf395..0000000
--- a/drivers/gpu/drm/exynos/exynos_hdmi.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/*
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- * Authors:
- *	Inki Dae <inki.dae@samsung.com>
- *	Seung-Woo Kim <sw0312.kim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_HDMI_H_
-#define _EXYNOS_HDMI_H_
-
-void hdmi_attach_ddc_client(struct i2c_client *ddc);
-void hdmi_attach_hdmiphy_client(struct i2c_client *hdmiphy);
-
-extern struct i2c_driver hdmiphy_driver;
-extern struct i2c_driver ddc_driver;
-
-#endif

diff --git a/drivers/gpu/drm/exynos/exynos_hdmiphy.c b/drivers/gpu/drm/exynos/exynos_hdmiphy.c
deleted file mode 100644
index 59abb14..0000000
--- a/drivers/gpu/drm/exynos/exynos_hdmiphy.c
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2011 Samsung Electronics Co.Ltd
- * Authors:
- *	Seung-Woo Kim <sw0312.kim@samsung.com>
- *	Inki Dae <inki.dae@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-#include <drm/drmP.h>
-
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/of.h>
-
-#include "exynos_drm_drv.h"
-#include "exynos_hdmi.h"
-
-
-static int hdmiphy_probe(struct i2c_client *client,
-	const struct i2c_device_id *id)
-{
-	hdmi_attach_hdmiphy_client(client);
-
-	dev_info(&client->adapter->dev, "attached s5p_hdmiphy "
-		"into i2c adapter successfully\n");
-
-	return 0;
-}
-
-static int hdmiphy_remove(struct i2c_client *client)
-{
-	dev_info(&client->adapter->dev, "detached s5p_hdmiphy "
-		"from i2c adapter successfully\n");
-
-	return 0;
-}
-
-static struct of_device_id hdmiphy_match_types[] = {
-	{
-		.compatible = "samsung,exynos5-hdmiphy",
-	}, {
-		.compatible = "samsung,exynos4210-hdmiphy",
-	}, {
-		.compatible = "samsung,exynos4212-hdmiphy",
-	}, {
-		/* end node */
-	}
-};
-
-struct i2c_driver hdmiphy_driver = {
-	.driver = {
-		.name	= "exynos-hdmiphy",
-		.owner	= THIS_MODULE,
-		.of_match_table = hdmiphy_match_types,
-	},
-	.probe		= hdmiphy_probe,
-	.remove		= hdmiphy_remove,
-	.command		= NULL,
-};
-EXPORT_SYMBOL(hdmiphy_driver);

diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index ce28881..4c5aed7 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c

@@ -31,6 +31,7 @@
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
 #include <linux/of.h>
+#include <linux/component.h>
 
 #include <drm/exynos_drm.h>
 
@@ -830,13 +831,15 @@
 }
 
 static int mixer_initialize(struct exynos_drm_manager *mgr,
-			struct drm_device *drm_dev, int pipe)
+			struct drm_device *drm_dev)
 {
 	int ret;
 	struct mixer_context *mixer_ctx = mgr->ctx;
+	struct exynos_drm_private *priv;
+	priv = drm_dev->dev_private;
 
-	mixer_ctx->drm_dev = drm_dev;
-	mixer_ctx->pipe = pipe;
+	mgr->drm_dev = mixer_ctx->drm_dev = drm_dev;
+	mgr->pipe = mixer_ctx->pipe = priv->pipe++;
 
 	/* acquire resources: regs, irqs, clocks */
 	ret = mixer_resources_init(mixer_ctx);
@@ -1142,8 +1145,6 @@
 }
 
 static struct exynos_drm_manager_ops mixer_manager_ops = {
-	.initialize		= mixer_initialize,
-	.remove			= mixer_mgr_remove,
 	.dpms			= mixer_dpms,
 	.enable_vblank		= mixer_enable_vblank,
 	.disable_vblank		= mixer_disable_vblank,
@@ -1200,11 +1201,13 @@
 	}
 };
 
-static int mixer_probe(struct platform_device *pdev)
+static int mixer_bind(struct device *dev, struct device *manager, void *data)
 {
-	struct device *dev = &pdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm_dev = data;
 	struct mixer_context *ctx;
 	struct mixer_drv_data *drv;
+	int ret;
 
 	dev_info(dev, "probe start\n");
 
@@ -1233,19 +1236,61 @@
 	atomic_set(&ctx->wait_vsync_event, 0);
 
 	mixer_manager.ctx = ctx;
+	ret = mixer_initialize(&mixer_manager, drm_dev);
+	if (ret)
+		return ret;
+
 	platform_set_drvdata(pdev, &mixer_manager);
-	exynos_drm_manager_register(&mixer_manager);
+	ret = exynos_drm_crtc_create(&mixer_manager);
+	if (ret) {
+		mixer_mgr_remove(&mixer_manager);
+		return ret;
+	}
 
 	pm_runtime_enable(dev);
 
 	return 0;
 }
 
+static void mixer_unbind(struct device *dev, struct device *master, void *data)
+{
+	struct exynos_drm_manager *mgr = dev_get_drvdata(dev);
+	struct drm_crtc *crtc = mgr->crtc;
+
+	dev_info(dev, "remove successful\n");
+
+	mixer_mgr_remove(mgr);
+
+	pm_runtime_disable(dev);
+
+	crtc->funcs->destroy(crtc);
+}
+
+static const struct component_ops mixer_component_ops = {
+	.bind	= mixer_bind,
+	.unbind	= mixer_unbind,
+};
+
+static int mixer_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = exynos_drm_component_add(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC,
+					mixer_manager.type);
+	if (ret)
+		return ret;
+
+	ret = component_add(&pdev->dev, &mixer_component_ops);
+	if (ret)
+		exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC);
+
+	return ret;
+}
+
 static int mixer_remove(struct platform_device *pdev)
 {
-	dev_info(&pdev->dev, "remove successful\n");
-
-	pm_runtime_disable(&pdev->dev);
+	component_del(&pdev->dev, &mixer_component_ops);
+	exynos_drm_component_del(&pdev->dev, EXYNOS_DEVICE_TYPE_CRTC);
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h
index ef1b3eb..3f35ac6 100644
--- a/drivers/gpu/drm/exynos/regs-hdmi.h
+++ b/drivers/gpu/drm/exynos/regs-hdmi.h

@@ -578,4 +578,20 @@
 #define HDMI_TG_VACT_ST4_H		HDMI_TG_BASE(0x0074)
 #define HDMI_TG_3D			HDMI_TG_BASE(0x00F0)
 
+/* HDMI PHY Registers Offsets*/
+#define HDMIPHY_POWER		(0x74 >> 2)
+#define HDMIPHY_MODE_SET_DONE		(0x7c >> 2)
+
+/* HDMI PHY Values */
+#define HDMI_PHY_POWER_ON              0x80
+#define HDMI_PHY_POWER_OFF             0xff
+
+/* HDMI PHY Values */
+#define HDMI_PHY_DISABLE_MODE_SET	0x80
+#define HDMI_PHY_ENABLE_MODE_SET	0x00
+
+/* PMU Registers for PHY */
+#define PMU_HDMI_PHY_CONTROL		0x700
+#define PMU_HDMI_PHY_ENABLE_BIT		BIT(0)
+
 #endif /* SAMSUNG_REGS_HDMI_H */

diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
index 489ffd2..87885d8 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c

@@ -148,7 +148,7 @@
 		break;
 	case BIT(14):
 		/*wait for all fifo empty*/
-		/*wait_for_all_fifos_empty(sender)*/;
+		/*wait_for_all_fifos_empty(sender)*/
 		break;
 	case BIT(15):
 		dev_dbg(sender->dev->dev, "No Action required\n");

diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index b686e56..6e8fe9e 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c

@@ -112,11 +112,9 @@
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct psb_fbdev *fbdev = dev_priv->fbdev;
 
-	drm_modeset_lock_all(dev);
-	ret = drm_fb_helper_restore_fbdev_mode(&fbdev->psb_fb_helper);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->psb_fb_helper);
 	if (ret)
 		DRM_DEBUG("failed to restore crtc mode\n");
-	drm_modeset_unlock_all(dev);
 
 	return;
 }
@@ -354,7 +352,7 @@
 	PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R);
 	spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
 
-	drm_irq_install(dev);
+	drm_irq_install(dev, dev->pdev->irq);
 
 	dev->vblank_disable_allowed = true;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
@@ -479,7 +477,7 @@
 	.lastclose = psb_driver_lastclose,
 	.preclose = psb_driver_preclose,
 
-	.num_ioctls = DRM_ARRAY_SIZE(psb_ioctls),
+	.num_ioctls = ARRAY_SIZE(psb_ioctls),
 	.device_is_agp = psb_driver_device_is_agp,
 	.irq_preinstall = psb_irq_preinstall,
 	.irq_postinstall = psb_irq_postinstall,

diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 48af5ca..240c331 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c

@@ -568,11 +568,11 @@
 
 static uint8_t tda998x_cksum(uint8_t *buf, size_t bytes)
 {
-	uint8_t sum = 0;
+	int sum = 0;
 
 	while (bytes--)
-		sum += *buf++;
-	return (255 - sum) + 1;
+		sum -= *buf++;
+	return sum;
 }
 
 #define HB(x) (x)

diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index aeace37..e88bac1 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c

@@ -1251,7 +1251,7 @@
 	DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
 };
 
-int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls);
+int i810_max_ioctl = ARRAY_SIZE(i810_ioctls);
 
 /**
  * Determine if the device really is AGP or not.

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index bea2d67..437e182 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig

@@ -5,6 +5,7 @@
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
@@ -71,7 +72,7 @@
 
 config DRM_I915_UMS
 	bool "Enable userspace modesetting on Intel hardware (DEPRECATED)"
-	depends on DRM_I915
+	depends on DRM_I915 && BROKEN
 	default n
 	help
 	  Choose this option if you still need userspace modesetting.

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b1445b7..cad1683 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile

@@ -18,6 +18,7 @@
 # GEM code
 i915-y += i915_cmd_parser.o \
 	  i915_gem_context.o \
+	  i915_gem_render_state.o \
 	  i915_gem_debug.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
@@ -26,12 +27,18 @@
 	  i915_gem.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_irq.o \
 	  i915_trace_points.o \
 	  intel_ringbuffer.o \
 	  intel_uncore.o
 
+# autogenerated null render state
+i915-y += intel_renderstate_gen6.o \
+	  intel_renderstate_gen7.o \
+	  intel_renderstate_gen8.o
+
 # modesetting core code
 i915-y += intel_bios.o \
 	  intel_display.o \
@@ -55,6 +62,7 @@
 	  intel_dsi_cmd.o \
 	  intel_dsi.o \
 	  intel_dsi_pll.o \
+	  intel_dsi_panel_vbt.o \
 	  intel_dvo.o \
 	  intel_hdmi.o \
 	  intel_i2c.o \

diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c
index a0f5bdd..80449f4 100644
--- a/drivers/gpu/drm/i915/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c

@@ -160,7 +160,7 @@
 	if (i2c_transfer(adapter, msgs, 2) == 2) {
 		*ch = in_buf[0];
 		return true;
-	};
+	}
 
 	if (!ch7xxx->quiet) {
 		DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n",

diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
index 0f1865d..0f2587f 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/dvo_ivch.c

@@ -195,7 +195,7 @@
 	if (i2c_transfer(adapter, msgs, 3) == 3) {
 		*data = (in_buf[1] << 8) | in_buf[0];
 		return true;
-	};
+	}
 
 	if (!priv->quiet) {
 		DRM_DEBUG_KMS("Unable to read register 0x%02x from "

diff --git a/drivers/gpu/drm/i915/dvo_ns2501.c b/drivers/gpu/drm/i915/dvo_ns2501.c
index 8155ded..74f2af7 100644
--- a/drivers/gpu/drm/i915/dvo_ns2501.c
+++ b/drivers/gpu/drm/i915/dvo_ns2501.c

@@ -121,7 +121,7 @@
 	if (i2c_transfer(adapter, msgs, 2) == 2) {
 		*ch = in_buf[0];
 		return true;
-	};
+	}
 
 	if (!ns->quiet) {
 		DRM_DEBUG_KMS
@@ -233,9 +233,8 @@
 					      struct drm_display_mode *mode)
 {
 	DRM_DEBUG_KMS
-	    ("%s: is mode valid (hdisplay=%d,htotal=%d,vdisplay=%d,vtotal=%d)\n",
-	     __FUNCTION__, mode->hdisplay, mode->htotal, mode->vdisplay,
-	     mode->vtotal);
+	    ("is mode valid (hdisplay=%d,htotal=%d,vdisplay=%d,vtotal=%d)\n",
+	     mode->hdisplay, mode->htotal, mode->vdisplay, mode->vtotal);
 
 	/*
 	 * Currently, these are all the modes I have data from.
@@ -261,9 +260,8 @@
 	struct ns2501_priv *ns = (struct ns2501_priv *)(dvo->dev_priv);
 
 	DRM_DEBUG_KMS
-	    ("%s: set mode (hdisplay=%d,htotal=%d,vdisplay=%d,vtotal=%d).\n",
-	     __FUNCTION__, mode->hdisplay, mode->htotal, mode->vdisplay,
-	     mode->vtotal);
+	    ("set mode (hdisplay=%d,htotal=%d,vdisplay=%d,vtotal=%d).\n",
+	     mode->hdisplay, mode->htotal, mode->vdisplay, mode->vtotal);
 
 	/*
 	 * Where do I find the native resolution for which scaling is not required???
@@ -277,8 +275,7 @@
 		if (mode->hdisplay == 800 && mode->vdisplay == 600) {
 			/* mode 277 */
 			ns->reg_8_shadow &= ~NS2501_8_BPAS;
-			DRM_DEBUG_KMS("%s: switching to 800x600\n",
-				      __FUNCTION__);
+			DRM_DEBUG_KMS("switching to 800x600\n");
 
 			/*
 			 * No, I do not know where this data comes from.
@@ -341,8 +338,7 @@
 
 		} else if (mode->hdisplay == 640 && mode->vdisplay == 480) {
 			/* mode 274 */
-			DRM_DEBUG_KMS("%s: switching to 640x480\n",
-				      __FUNCTION__);
+			DRM_DEBUG_KMS("switching to 640x480\n");
 			/*
 			 * No, I do not know where this data comes from.
 			 * It is just what the video bios left in the DVO, so
@@ -406,8 +402,7 @@
 
 		} else if (mode->hdisplay == 1024 && mode->vdisplay == 768) {
 			/* mode 280 */
-			DRM_DEBUG_KMS("%s: switching to 1024x768\n",
-				      __FUNCTION__);
+			DRM_DEBUG_KMS("switching to 1024x768\n");
 			/*
 			 * This might or might not work, actually. I'm silently
 			 * assuming here that the native panel resolution is
@@ -458,8 +453,7 @@
 	struct ns2501_priv *ns = (struct ns2501_priv *)(dvo->dev_priv);
 	unsigned char ch;
 
-	DRM_DEBUG_KMS("%s: Trying set the dpms of the DVO to %i\n",
-		      __FUNCTION__, enable);
+	DRM_DEBUG_KMS("Trying set the dpms of the DVO to %i\n", enable);
 
 	ch = ns->reg_8_shadow;
 

diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c
index 7b3e9e9..fa01149 100644
--- a/drivers/gpu/drm/i915/dvo_sil164.c
+++ b/drivers/gpu/drm/i915/dvo_sil164.c

@@ -93,7 +93,7 @@
 	if (i2c_transfer(adapter, msgs, 2) == 2) {
 		*ch = in_buf[0];
 		return true;
-	};
+	}
 
 	if (!sil->quiet) {
 		DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n",

diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c
index 12ea4b1..7853719 100644
--- a/drivers/gpu/drm/i915/dvo_tfp410.c
+++ b/drivers/gpu/drm/i915/dvo_tfp410.c

@@ -118,7 +118,7 @@
 	if (i2c_transfer(adapter, msgs, 2) == 2) {
 		*ch = in_buf[0];
 		return true;
-	};
+	}
 
 	if (!tfp->quiet) {
 		DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n",

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 4cf6d02..9d79543 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c

@@ -28,7 +28,7 @@
 #include "i915_drv.h"
 
 /**
- * DOC: i915 batch buffer command parser
+ * DOC: batch buffer command parser
  *
  * Motivation:
  * Certain OpenGL features (e.g. transform feedback, performance monitoring)
@@ -86,6 +86,367 @@
  * general bitmasking mechanism.
  */
 
+#define STD_MI_OPCODE_MASK  0xFF800000
+#define STD_3D_OPCODE_MASK  0xFFFF0000
+#define STD_2D_OPCODE_MASK  0xFFC00000
+#define STD_MFX_OPCODE_MASK 0xFFFF0000
+
+#define CMD(op, opm, f, lm, fl, ...)				\
+	{							\
+		.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),	\
+		.cmd = { (op), (opm) }, 			\
+		.length = { (lm) },				\
+		__VA_ARGS__					\
+	}
+
+/* Convenience macros to compress the tables */
+#define SMI STD_MI_OPCODE_MASK
+#define S3D STD_3D_OPCODE_MASK
+#define S2D STD_2D_OPCODE_MASK
+#define SMFX STD_MFX_OPCODE_MASK
+#define F true
+#define S CMD_DESC_SKIP
+#define R CMD_DESC_REJECT
+#define W CMD_DESC_REGISTER
+#define B CMD_DESC_BITMASK
+#define M CMD_DESC_MASTER
+
+/*            Command                          Mask   Fixed Len   Action
+	      ---------------------------------------------------------- */
+static const struct drm_i915_cmd_descriptor common_cmds[] = {
+	CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
+	CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
+	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
+	CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
+	CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
+	CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
+	CMD(  MI_SEMAPHORE_MBOX,                SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_STORE_DWORD_INDEX,             SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+	CMD(  MI_STORE_REGISTER_MEM(1),         SMI,   !F,  0xFF,   W | B,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC },
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_LOAD_REGISTER_MEM,             SMI,   !F,  0xFF,   W | B,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC },
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
+};
+
+static const struct drm_i915_cmd_descriptor render_cmds[] = {
+	CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
+	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+	CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
+	CMD(  MI_TOPOLOGY_FILTER,               SMI,    F,  1,      S  ),
+	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_SET_CONTEXT,                   SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_URB_CLEAR,                     SMI,   !F,  0xFF,   S  ),
+	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3F,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_CLFLUSH,                       SMI,   !F,  0x3FF,  B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_REPORT_PERF_COUNT,             SMI,   !F,  0x3F,   B,
+	      .bits = {{
+			.offset = 1,
+			.mask = MI_REPORT_PERF_COUNT_GGTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  GFX_OP_3DSTATE_VF_STATISTICS,     S3D,    F,  1,      S  ),
+	CMD(  PIPELINE_SELECT,                  S3D,    F,  1,      S  ),
+	CMD(  MEDIA_VFE_STATE,			S3D,   !F,  0xFFFF, B,
+	      .bits = {{
+			.offset = 2,
+			.mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK,
+			.expected = 0,
+	      }},						       ),
+	CMD(  GPGPU_OBJECT,                     S3D,   !F,  0xFF,   S  ),
+	CMD(  GPGPU_WALKER,                     S3D,   !F,  0xFF,   S  ),
+	CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,      S3D,   !F,  0x1FF,  S  ),
+	CMD(  GFX_OP_PIPE_CONTROL(5),           S3D,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 1,
+			.mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY),
+			.expected = 0,
+	      },
+	      {
+			.offset = 1,
+		        .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+				 PIPE_CONTROL_STORE_DATA_INDEX),
+			.expected = 0,
+			.condition_offset = 1,
+			.condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK,
+	      }},						       ),
+};
+
+static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
+	CMD(  MI_SET_PREDICATE,                 SMI,    F,  1,      S  ),
+	CMD(  MI_RS_CONTROL,                    SMI,    F,  1,      S  ),
+	CMD(  MI_URB_ATOMIC_ALLOC,              SMI,    F,  1,      S  ),
+	CMD(  MI_RS_CONTEXT,                    SMI,    F,  1,      S  ),
+	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
+	CMD(  MI_LOAD_REGISTER_REG,             SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_RS_STORE_DATA_IMM,             SMI,   !F,  0xFF,   S  ),
+	CMD(  MI_LOAD_URB_MEM,                  SMI,   !F,  0xFF,   S  ),
+	CMD(  MI_STORE_URB_MEM,                 SMI,   !F,  0xFF,   S  ),
+	CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_VS,  S3D,   !F,  0x7FF,  S  ),
+	CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_PS,  S3D,   !F,  0x7FF,  S  ),
+
+	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS,  S3D,   !F,  0x1FF,  S  ),
+	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS,  S3D,   !F,  0x1FF,  S  ),
+	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS,  S3D,   !F,  0x1FF,  S  ),
+	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS,  S3D,   !F,  0x1FF,  S  ),
+	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS,  S3D,   !F,  0x1FF,  S  ),
+};
+
+static const struct drm_i915_cmd_descriptor video_cmds[] = {
+	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
+	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_FLUSH_DW_NOTIFY,
+			.expected = 0,
+	      },
+	      {
+			.offset = 1,
+			.mask = MI_FLUSH_DW_USE_GTT,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      },
+	      {
+			.offset = 0,
+			.mask = MI_FLUSH_DW_STORE_INDEX,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      }},						       ),
+	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	/*
+	 * MFX_WAIT doesn't fit the way we handle length for most commands.
+	 * It has a length field but it uses a non-standard length bias.
+	 * It is always 1 dword though, so just treat it as fixed length.
+	 */
+	CMD(  MFX_WAIT,                         SMFX,   F,  1,      S  ),
+};
+
+static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
+	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_FLUSH_DW_NOTIFY,
+			.expected = 0,
+	      },
+	      {
+			.offset = 1,
+			.mask = MI_FLUSH_DW_USE_GTT,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      },
+	      {
+			.offset = 0,
+			.mask = MI_FLUSH_DW_STORE_INDEX,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      }},						       ),
+	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+};
+
+static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
+	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_GLOBAL_GTT,
+			.expected = 0,
+	      }},						       ),
+	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
+	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
+	      .bits = {{
+			.offset = 0,
+			.mask = MI_FLUSH_DW_NOTIFY,
+			.expected = 0,
+	      },
+	      {
+			.offset = 1,
+			.mask = MI_FLUSH_DW_USE_GTT,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      },
+	      {
+			.offset = 0,
+			.mask = MI_FLUSH_DW_STORE_INDEX,
+			.expected = 0,
+			.condition_offset = 0,
+			.condition_mask = MI_FLUSH_DW_OP_MASK,
+	      }},						       ),
+	CMD(  COLOR_BLT,                        S2D,   !F,  0x3F,   S  ),
+	CMD(  SRC_COPY_BLT,                     S2D,   !F,  0x3F,   S  ),
+};
+
+static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
+	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
+};
+
+#undef CMD
+#undef SMI
+#undef S3D
+#undef S2D
+#undef SMFX
+#undef F
+#undef S
+#undef R
+#undef W
+#undef B
+#undef M
+
+static const struct drm_i915_cmd_table gen7_render_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ render_cmds, ARRAY_SIZE(render_cmds) },
+};
+
+static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ render_cmds, ARRAY_SIZE(render_cmds) },
+	{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
+};
+
+static const struct drm_i915_cmd_table gen7_video_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ video_cmds, ARRAY_SIZE(video_cmds) },
+};
+
+static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+};
+
+static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
+};
+
+static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
+	{ common_cmds, ARRAY_SIZE(common_cmds) },
+	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
+	{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
+};
+
+/*
+ * Register whitelists, sorted by increasing register offset.
+ *
+ * Some registers that userspace accesses are 64 bits. The register
+ * access commands only allow 32-bit accesses. Hence, we have to include
+ * entries for both halves of the 64-bit registers.
+ */
+
+/* Convenience macro for adding 64-bit registers */
+#define REG64(addr) (addr), (addr + sizeof(u32))
+
+static const u32 gen7_render_regs[] = {
+	REG64(HS_INVOCATION_COUNT),
+	REG64(DS_INVOCATION_COUNT),
+	REG64(IA_VERTICES_COUNT),
+	REG64(IA_PRIMITIVES_COUNT),
+	REG64(VS_INVOCATION_COUNT),
+	REG64(GS_INVOCATION_COUNT),
+	REG64(GS_PRIMITIVES_COUNT),
+	REG64(CL_INVOCATION_COUNT),
+	REG64(CL_PRIMITIVES_COUNT),
+	REG64(PS_INVOCATION_COUNT),
+	REG64(PS_DEPTH_COUNT),
+	OACONTROL, /* Only allowed for LRI and SRM. See below. */
+	GEN7_3DPRIM_END_OFFSET,
+	GEN7_3DPRIM_START_VERTEX,
+	GEN7_3DPRIM_VERTEX_COUNT,
+	GEN7_3DPRIM_INSTANCE_COUNT,
+	GEN7_3DPRIM_START_INSTANCE,
+	GEN7_3DPRIM_BASE_VERTEX,
+	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
+	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
+	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
+	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
+	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)),
+	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)),
+	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)),
+	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)),
+	GEN7_SO_WRITE_OFFSET(0),
+	GEN7_SO_WRITE_OFFSET(1),
+	GEN7_SO_WRITE_OFFSET(2),
+	GEN7_SO_WRITE_OFFSET(3),
+};
+
+static const u32 gen7_blt_regs[] = {
+	BCS_SWCTRL,
+};
+
+static const u32 ivb_master_regs[] = {
+	FORCEWAKE_MT,
+	DERRMR,
+	GEN7_PIPE_DE_LOAD_SL(PIPE_A),
+	GEN7_PIPE_DE_LOAD_SL(PIPE_B),
+	GEN7_PIPE_DE_LOAD_SL(PIPE_C),
+};
+
+static const u32 hsw_master_regs[] = {
+	FORCEWAKE_MT,
+	DERRMR,
+};
+
+#undef REG64
+
 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
 {
 	u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
@@ -137,15 +498,18 @@
 	return 0;
 }
 
-static void validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_engine_cs *ring,
+				 const struct drm_i915_cmd_table *cmd_tables,
+				 int cmd_table_count)
 {
 	int i;
+	bool ret = true;
 
-	if (!ring->cmd_tables || ring->cmd_table_count == 0)
-		return;
+	if (!cmd_tables || cmd_table_count == 0)
+		return true;
 
-	for (i = 0; i < ring->cmd_table_count; i++) {
-		const struct drm_i915_cmd_table *table = &ring->cmd_tables[i];
+	for (i = 0; i < cmd_table_count; i++) {
+		const struct drm_i915_cmd_table *table = &cmd_tables[i];
 		u32 previous = 0;
 		int j;
 
@@ -154,35 +518,107 @@
 				&table->table[i];
 			u32 curr = desc->cmd.value & desc->cmd.mask;
 
-			if (curr < previous)
+			if (curr < previous) {
 				DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
 					  ring->id, i, j, curr, previous);
+				ret = false;
+			}
 
 			previous = curr;
 		}
 	}
+
+	return ret;
 }
 
-static void check_sorted(int ring_id, const u32 *reg_table, int reg_count)
+static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
 {
 	int i;
 	u32 previous = 0;
+	bool ret = true;
 
 	for (i = 0; i < reg_count; i++) {
 		u32 curr = reg_table[i];
 
-		if (curr < previous)
+		if (curr < previous) {
 			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
 				  ring_id, i, curr, previous);
+			ret = false;
+		}
 
 		previous = curr;
 	}
+
+	return ret;
 }
 
-static void validate_regs_sorted(struct intel_ring_buffer *ring)
+static bool validate_regs_sorted(struct intel_engine_cs *ring)
 {
-	check_sorted(ring->id, ring->reg_table, ring->reg_count);
-	check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count);
+	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
+		check_sorted(ring->id, ring->master_reg_table,
+			     ring->master_reg_count);
+}
+
+struct cmd_node {
+	const struct drm_i915_cmd_descriptor *desc;
+	struct hlist_node node;
+};
+
+/*
+ * Different command ranges have different numbers of bits for the opcode. For
+ * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The
+ * problem is that, for example, MI commands use bits 22:16 for other fields
+ * such as GGTT vs PPGTT bits. If we include those bits in the mask then when
+ * we mask a command from a batch it could hash to the wrong bucket due to
+ * non-opcode bits being set. But if we don't include those bits, some 3D
+ * commands may hash to the same bucket due to not including opcode bits that
+ * make the command unique. For now, we will risk hashing to the same bucket.
+ *
+ * If we attempt to generate a perfect hash, we should be able to look at bits
+ * 31:29 of a command from a batch buffer and use the full mask for that
+ * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
+ */
+#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+
+static int init_hash_table(struct intel_engine_cs *ring,
+			   const struct drm_i915_cmd_table *cmd_tables,
+			   int cmd_table_count)
+{
+	int i, j;
+
+	hash_init(ring->cmd_hash);
+
+	for (i = 0; i < cmd_table_count; i++) {
+		const struct drm_i915_cmd_table *table = &cmd_tables[i];
+
+		for (j = 0; j < table->count; j++) {
+			const struct drm_i915_cmd_descriptor *desc =
+				&table->table[j];
+			struct cmd_node *desc_node =
+				kmalloc(sizeof(*desc_node), GFP_KERNEL);
+
+			if (!desc_node)
+				return -ENOMEM;
+
+			desc_node->desc = desc;
+			hash_add(ring->cmd_hash, &desc_node->node,
+				 desc->cmd.value & CMD_HASH_MASK);
+		}
+	}
+
+	return 0;
+}
+
+static void fini_hash_table(struct intel_engine_cs *ring)
+{
+	struct hlist_node *tmp;
+	struct cmd_node *desc_node;
+	int i;
+
+	hash_for_each_safe(ring->cmd_hash, i, tmp, desc_node, node) {
+		hash_del(&desc_node->node);
+		kfree(desc_node);
+	}
 }
 
 /**
@@ -190,25 +626,74 @@
  * @ring: the ringbuffer to initialize
  *
  * Optionally initializes fields related to batch buffer command parsing in the
- * struct intel_ring_buffer based on whether the platform requires software
+ * struct intel_engine_cs based on whether the platform requires software
  * command parsing.
+ *
+ * Return: non-zero if initialization fails
  */
-void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
+int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 {
+	const struct drm_i915_cmd_table *cmd_tables;
+	int cmd_table_count;
+	int ret;
+
 	if (!IS_GEN7(ring->dev))
-		return;
+		return 0;
 
 	switch (ring->id) {
 	case RCS:
+		if (IS_HASWELL(ring->dev)) {
+			cmd_tables = hsw_render_ring_cmds;
+			cmd_table_count =
+				ARRAY_SIZE(hsw_render_ring_cmds);
+		} else {
+			cmd_tables = gen7_render_cmds;
+			cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+		}
+
+		ring->reg_table = gen7_render_regs;
+		ring->reg_count = ARRAY_SIZE(gen7_render_regs);
+
+		if (IS_HASWELL(ring->dev)) {
+			ring->master_reg_table = hsw_master_regs;
+			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
+		} else {
+			ring->master_reg_table = ivb_master_regs;
+			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
+		}
+
 		ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
 		break;
 	case VCS:
+		cmd_tables = gen7_video_cmds;
+		cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
 		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;
 	case BCS:
+		if (IS_HASWELL(ring->dev)) {
+			cmd_tables = hsw_blt_ring_cmds;
+			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+		} else {
+			cmd_tables = gen7_blt_cmds;
+			cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+		}
+
+		ring->reg_table = gen7_blt_regs;
+		ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
+
+		if (IS_HASWELL(ring->dev)) {
+			ring->master_reg_table = hsw_master_regs;
+			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
+		} else {
+			ring->master_reg_table = ivb_master_regs;
+			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
+		}
+
 		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
 		break;
 	case VECS:
+		cmd_tables = hsw_vebox_cmds;
+		cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
 		/* VECS can use the same length_mask function as VCS */
 		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;
@@ -218,18 +703,45 @@
 		BUG();
 	}
 
-	validate_cmds_sorted(ring);
-	validate_regs_sorted(ring);
+	BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count));
+	BUG_ON(!validate_regs_sorted(ring));
+
+	ret = init_hash_table(ring, cmd_tables, cmd_table_count);
+	if (ret) {
+		DRM_ERROR("CMD: cmd_parser_init failed!\n");
+		fini_hash_table(ring);
+		return ret;
+	}
+
+	ring->needs_cmd_parser = true;
+
+	return 0;
+}
+
+/**
+ * i915_cmd_parser_fini_ring() - clean up cmd parser related fields
+ * @ring: the ringbuffer to clean up
+ *
+ * Releases any resources related to command parsing that may have been
+ * initialized for the specified ring.
+ */
+void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
+{
+	if (!ring->needs_cmd_parser)
+		return;
+
+	fini_hash_table(ring);
 }
 
 static const struct drm_i915_cmd_descriptor*
-find_cmd_in_table(const struct drm_i915_cmd_table *table,
+find_cmd_in_table(struct intel_engine_cs *ring,
 		  u32 cmd_header)
 {
-	int i;
+	struct cmd_node *desc_node;
 
-	for (i = 0; i < table->count; i++) {
-		const struct drm_i915_cmd_descriptor *desc = &table->table[i];
+	hash_for_each_possible(ring->cmd_hash, desc_node, node,
+			       cmd_header & CMD_HASH_MASK) {
+		const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
 		u32 masked_cmd = desc->cmd.mask & cmd_header;
 		u32 masked_value = desc->cmd.value & desc->cmd.mask;
 
@@ -249,20 +761,16 @@
  * ring's default length encoding and returns default_desc.
  */
 static const struct drm_i915_cmd_descriptor*
-find_cmd(struct intel_ring_buffer *ring,
+find_cmd(struct intel_engine_cs *ring,
 	 u32 cmd_header,
 	 struct drm_i915_cmd_descriptor *default_desc)
 {
+	const struct drm_i915_cmd_descriptor *desc;
 	u32 mask;
-	int i;
 
-	for (i = 0; i < ring->cmd_table_count; i++) {
-		const struct drm_i915_cmd_descriptor *desc;
-
-		desc = find_cmd_in_table(&ring->cmd_tables[i], cmd_header);
-		if (desc)
-			return desc;
-	}
+	desc = find_cmd_in_table(ring, cmd_header);
+	if (desc)
+		return desc;
 
 	mask = ring->get_cmd_length_mask(cmd_header);
 	if (!mask)
@@ -329,15 +837,112 @@
  *
  * Return: true if the ring requires software command parsing
  */
-bool i915_needs_cmd_parser(struct intel_ring_buffer *ring)
+bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
 {
-	/* No command tables indicates a platform without parsing */
-	if (!ring->cmd_tables)
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	if (!ring->needs_cmd_parser)
+		return false;
+
+	/*
+	 * XXX: VLV is Gen7 and therefore has cmd_tables, but has PPGTT
+	 * disabled. That will cause all of the parser's PPGTT checks to
+	 * fail. For now, disable parsing when PPGTT is off.
+	 */
+	if (!dev_priv->mm.aliasing_ppgtt)
 		return false;
 
 	return (i915.enable_cmd_parser == 1);
 }
 
+static bool check_cmd(const struct intel_engine_cs *ring,
+		      const struct drm_i915_cmd_descriptor *desc,
+		      const u32 *cmd,
+		      const bool is_master,
+		      bool *oacontrol_set)
+{
+	if (desc->flags & CMD_DESC_REJECT) {
+		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
+		return false;
+	}
+
+	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
+		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
+				 *cmd);
+		return false;
+	}
+
+	if (desc->flags & CMD_DESC_REGISTER) {
+		u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask;
+
+		/*
+		 * OACONTROL requires some special handling for writes. We
+		 * want to make sure that any batch which enables OA also
+		 * disables it before the end of the batch. The goal is to
+		 * prevent one process from snooping on the perf data from
+		 * another process. To do that, we need to check the value
+		 * that will be written to the register. Hence, limit
+		 * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands.
+		 */
+		if (reg_addr == OACONTROL) {
+			if (desc->cmd.value == MI_LOAD_REGISTER_MEM)
+				return false;
+
+			if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1))
+				*oacontrol_set = (cmd[2] != 0);
+		}
+
+		if (!valid_reg(ring->reg_table,
+			       ring->reg_count, reg_addr)) {
+			if (!is_master ||
+			    !valid_reg(ring->master_reg_table,
+				       ring->master_reg_count,
+				       reg_addr)) {
+				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
+						 reg_addr,
+						 *cmd,
+						 ring->id);
+				return false;
+			}
+		}
+	}
+
+	if (desc->flags & CMD_DESC_BITMASK) {
+		int i;
+
+		for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) {
+			u32 dword;
+
+			if (desc->bits[i].mask == 0)
+				break;
+
+			if (desc->bits[i].condition_mask != 0) {
+				u32 offset =
+					desc->bits[i].condition_offset;
+				u32 condition = cmd[offset] &
+					desc->bits[i].condition_mask;
+
+				if (condition == 0)
+					continue;
+			}
+
+			dword = cmd[desc->bits[i].offset] &
+				desc->bits[i].mask;
+
+			if (dword != desc->bits[i].expected) {
+				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n",
+						 *cmd,
+						 desc->bits[i].mask,
+						 desc->bits[i].expected,
+						 dword, ring->id);
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 #define LENGTH_BIAS 2
 
 /**
@@ -352,7 +957,7 @@
  *
  * Return: non-zero if the parser finds violations or otherwise fails
  */
-int i915_parse_cmds(struct intel_ring_buffer *ring,
+int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    u32 batch_start_offset,
 		    bool is_master)
@@ -361,6 +966,7 @@
 	u32 *cmd, *batch_base, *batch_end;
 	struct drm_i915_cmd_descriptor default_desc = { 0 };
 	int needs_clflush = 0;
+	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
 	ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
 	if (ret) {
@@ -402,76 +1008,27 @@
 			length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
 
 		if ((batch_end - cmd) < length) {
-			DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%d batchlen=%td\n",
+			DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
 					 *cmd,
 					 length,
-					 (unsigned long)(batch_end - cmd));
+					 batch_end - cmd);
 			ret = -EINVAL;
 			break;
 		}
 
-		if (desc->flags & CMD_DESC_REJECT) {
-			DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
+		if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) {
 			ret = -EINVAL;
 			break;
 		}
 
-		if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
-			DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
-					 *cmd);
-			ret = -EINVAL;
-			break;
-		}
-
-		if (desc->flags & CMD_DESC_REGISTER) {
-			u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask;
-
-			if (!valid_reg(ring->reg_table,
-				       ring->reg_count, reg_addr)) {
-				if (!is_master ||
-				    !valid_reg(ring->master_reg_table,
-					       ring->master_reg_count,
-					       reg_addr)) {
-					DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
-							 reg_addr,
-							 *cmd,
-							 ring->id);
-					ret = -EINVAL;
-					break;
-				}
-			}
-		}
-
-		if (desc->flags & CMD_DESC_BITMASK) {
-			int i;
-
-			for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) {
-				u32 dword;
-
-				if (desc->bits[i].mask == 0)
-					break;
-
-				dword = cmd[desc->bits[i].offset] &
-					desc->bits[i].mask;
-
-				if (dword != desc->bits[i].expected) {
-					DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n",
-							 *cmd,
-							 desc->bits[i].mask,
-							 desc->bits[i].expected,
-							 dword, ring->id);
-					ret = -EINVAL;
-					break;
-				}
-			}
-
-			if (ret)
-				break;
-		}
-
 		cmd += length;
 	}
 
+	if (oacontrol_set) {
+		DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
+		ret = -EINVAL;
+	}
+
 	if (cmd >= batch_end) {
 		DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
 		ret = -EINVAL;
@@ -483,3 +1040,22 @@
 
 	return ret;
 }
+
+/**
+ * i915_cmd_parser_get_version() - get the cmd parser version number
+ *
+ * The cmd parser maintains a simple increasing integer version number suitable
+ * for passing to userspace clients to determine what operations are permitted.
+ *
+ * Return: the current version number of the cmd parser
+ */
+int i915_cmd_parser_get_version(void)
+{
+	/*
+	 * Command parser version history
+	 *
+	 * 1. Initial version. Checks batches and reports violations, but leaves
+	 *    hardware parsing enabled (so does not allow new use cases).
+	 */
+	return 1;
+}

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 195fe5b..601caa8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -79,7 +79,7 @@
 
 static int i915_capabilities(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	const struct intel_device_info *info = INTEL_INFO(dev);
 
@@ -172,7 +172,7 @@
 		seq_printf(m, " (%s)", obj->ring->name);
 }
 
-static void describe_ctx(struct seq_file *m, struct i915_hw_context *ctx)
+static void describe_ctx(struct seq_file *m, struct intel_context *ctx)
 {
 	seq_putc(m, ctx->is_initialized ? 'I' : 'i');
 	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
@@ -181,7 +181,7 @@
 
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct list_head *head;
 	struct drm_device *dev = node->minor->dev;
@@ -239,7 +239,7 @@
 
 static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
@@ -371,7 +371,7 @@
 
 static int i915_gem_object_info(struct seq_file *m, void* data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 count, mappable_count, purgeable_count;
@@ -474,7 +474,7 @@
 
 static int i915_gem_gtt_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -509,12 +509,12 @@
 
 static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	unsigned long flags;
 	struct intel_crtc *crtc;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		const char pipe = pipe_name(crtc->pipe);
 		const char plane = plane_name(crtc->plane);
 		struct intel_unpin_work *work;
@@ -559,10 +559,10 @@
 
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	struct drm_i915_gem_request *gem_request;
 	int ret, count, i;
 
@@ -594,7 +594,7 @@
 }
 
 static void i915_ring_seqno_info(struct seq_file *m,
-				 struct intel_ring_buffer *ring)
+				 struct intel_engine_cs *ring)
 {
 	if (ring->get_seqno) {
 		seq_printf(m, "Current sequence (%s): %u\n",
@@ -604,10 +604,10 @@
 
 static int i915_gem_seqno_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int ret, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -627,10 +627,10 @@
 
 static int i915_interrupt_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int ret, i, pipe;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -638,7 +638,47 @@
 		return ret;
 	intel_runtime_pm_get(dev_priv);
 
-	if (INTEL_INFO(dev)->gen >= 8) {
+	if (IS_CHERRYVIEW(dev)) {
+		int i;
+		seq_printf(m, "Master Interrupt Control:\t%08x\n",
+			   I915_READ(GEN8_MASTER_IRQ));
+
+		seq_printf(m, "Display IER:\t%08x\n",
+			   I915_READ(VLV_IER));
+		seq_printf(m, "Display IIR:\t%08x\n",
+			   I915_READ(VLV_IIR));
+		seq_printf(m, "Display IIR_RW:\t%08x\n",
+			   I915_READ(VLV_IIR_RW));
+		seq_printf(m, "Display IMR:\t%08x\n",
+			   I915_READ(VLV_IMR));
+		for_each_pipe(pipe)
+			seq_printf(m, "Pipe %c stat:\t%08x\n",
+				   pipe_name(pipe),
+				   I915_READ(PIPESTAT(pipe)));
+
+		seq_printf(m, "Port hotplug:\t%08x\n",
+			   I915_READ(PORT_HOTPLUG_EN));
+		seq_printf(m, "DPFLIPSTAT:\t%08x\n",
+			   I915_READ(VLV_DPFLIPSTAT));
+		seq_printf(m, "DPINVGTT:\t%08x\n",
+			   I915_READ(DPINVGTT));
+
+		for (i = 0; i < 4; i++) {
+			seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
+				   i, I915_READ(GEN8_GT_IMR(i)));
+			seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
+				   i, I915_READ(GEN8_GT_IIR(i)));
+			seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
+				   i, I915_READ(GEN8_GT_IER(i)));
+		}
+
+		seq_printf(m, "PCU interrupt mask:\t%08x\n",
+			   I915_READ(GEN8_PCU_IMR));
+		seq_printf(m, "PCU interrupt identity:\t%08x\n",
+			   I915_READ(GEN8_PCU_IIR));
+		seq_printf(m, "PCU interrupt enable:\t%08x\n",
+			   I915_READ(GEN8_PCU_IER));
+	} else if (INTEL_INFO(dev)->gen >= 8) {
 		seq_printf(m, "Master Interrupt Control:\t%08x\n",
 			   I915_READ(GEN8_MASTER_IRQ));
 
@@ -768,7 +808,7 @@
 
 static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i, ret;
@@ -797,10 +837,10 @@
 
 static int i915_hws_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	const u32 *hws;
 	int i;
 
@@ -945,7 +985,7 @@
 
 static int i915_rstdby_delays(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u16 crstanddelay;
@@ -966,9 +1006,9 @@
 	return 0;
 }
 
-static int i915_cur_delayinfo(struct seq_file *m, void *unused)
+static int i915_frequency_info(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret = 0;
@@ -991,6 +1031,7 @@
 		u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 		u32 rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
 		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+		u32 rpmodectl, rpinclimit, rpdeclimit;
 		u32 rpstat, cagf, reqf;
 		u32 rpupei, rpcurup, rpprevup;
 		u32 rpdownei, rpcurdown, rpprevdown;
@@ -1011,6 +1052,10 @@
 			reqf >>= 25;
 		reqf *= GT_FREQUENCY_MULTIPLIER;
 
+		rpmodectl = I915_READ(GEN6_RP_CONTROL);
+		rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
+		rpdeclimit = I915_READ(GEN6_RP_DOWN_THRESHOLD);
+
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
 		rpcurup = I915_READ(GEN6_RP_CUR_UP);
@@ -1027,14 +1072,23 @@
 		gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 		mutex_unlock(&dev->struct_mutex);
 
+		seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
+			   I915_READ(GEN6_PMIER),
+			   I915_READ(GEN6_PMIMR),
+			   I915_READ(GEN6_PMISR),
+			   I915_READ(GEN6_PMIIR),
+			   I915_READ(GEN6_PMINTRMSK));
 		seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
-		seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
 		seq_printf(m, "Render p-state ratio: %d\n",
 			   (gt_perf_status & 0xff00) >> 8);
 		seq_printf(m, "Render p-state VID: %d\n",
 			   gt_perf_status & 0xff);
 		seq_printf(m, "Render p-state limit: %d\n",
 			   rp_state_limits & 0xff);
+		seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
+		seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
+		seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+		seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
 		seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
 		seq_printf(m, "CAGF: %dMHz\n", cagf);
 		seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
@@ -1094,7 +1148,7 @@
 
 static int i915_delayfreq_table(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 delayfreq;
@@ -1125,7 +1179,7 @@
 
 static int i915_inttoext_table(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 inttoext;
@@ -1149,7 +1203,7 @@
 
 static int ironlake_drpc_info(struct seq_file *m)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 rgvmodectl, rstdbyctl;
@@ -1219,15 +1273,19 @@
 static int vlv_drpc_info(struct seq_file *m)
 {
 
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 rpmodectl1, rcctl1;
 	unsigned fw_rendercount = 0, fw_mediacount = 0;
 
+	intel_runtime_pm_get(dev_priv);
+
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
 
+	intel_runtime_pm_put(dev_priv);
+
 	seq_printf(m, "Video Turbo Mode: %s\n",
 		   yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
 	seq_printf(m, "Turbo enabled: %s\n",
@@ -1247,6 +1305,11 @@
 			(I915_READ(VLV_GTLC_PW_STATUS) &
 				VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
+	seq_printf(m, "Render RC6 residency since boot: %u\n",
+		   I915_READ(VLV_GT_RENDER_RC6));
+	seq_printf(m, "Media RC6 residency since boot: %u\n",
+		   I915_READ(VLV_GT_MEDIA_RC6));
+
 	spin_lock_irq(&dev_priv->uncore.lock);
 	fw_rendercount = dev_priv->uncore.fw_rendercount;
 	fw_mediacount = dev_priv->uncore.fw_mediacount;
@@ -1263,7 +1326,7 @@
 static int gen6_drpc_info(struct seq_file *m)
 {
 
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
@@ -1362,7 +1425,7 @@
 
 static int i915_drpc_info(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 
 	if (IS_VALLEYVIEW(dev))
@@ -1375,7 +1438,7 @@
 
 static int i915_fbc_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -1437,7 +1500,7 @@
 
 static int i915_ips_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -1460,7 +1523,7 @@
 
 static int i915_sr_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	bool sr_enabled = false;
@@ -1486,7 +1549,7 @@
 
 static int i915_emon_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long temp, chipset, gfx;
@@ -1514,7 +1577,7 @@
 
 static int i915_ring_freq_table(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret = 0;
@@ -1557,7 +1620,7 @@
 
 static int i915_gfxec(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
@@ -1577,7 +1640,7 @@
 
 static int i915_opregion(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_opregion *opregion = &dev_priv->opregion;
@@ -1605,7 +1668,7 @@
 
 static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct intel_fbdev *ifbdev = NULL;
 	struct intel_framebuffer *fb;
@@ -1651,11 +1714,11 @@
 
 static int i915_context_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_context *ctx;
+	struct intel_engine_cs *ring;
+	struct intel_context *ctx;
 	int ret, i;
 
 	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
@@ -1675,6 +1738,9 @@
 	}
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		if (ctx->obj == NULL)
+			continue;
+
 		seq_puts(m, "HW context ");
 		describe_ctx(m, ctx);
 		for_each_ring(ring, dev_priv, i)
@@ -1692,7 +1758,7 @@
 
 static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned forcewake_count = 0, fw_rendercount = 0, fw_mediacount = 0;
@@ -1740,7 +1806,7 @@
 
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
@@ -1788,10 +1854,14 @@
 
 static int per_file_ctx(int id, void *ptr, void *data)
 {
-	struct i915_hw_context *ctx = ptr;
+	struct intel_context *ctx = ptr;
 	struct seq_file *m = data;
 	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
 
+	if (i915_gem_context_is_default(ctx))
+		seq_puts(m, "  default context:\n");
+	else
+		seq_printf(m, "  context %d:\n", ctx->id);
 	ppgtt->debug_dump(ppgtt, m);
 
 	return 0;
@@ -1800,7 +1870,7 @@
 static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 	int unused, i;
 
@@ -1816,8 +1886,7 @@
 			u64 pdp = I915_READ(ring->mmio_base + offset + 4);
 			pdp <<= 32;
 			pdp |= I915_READ(ring->mmio_base + offset);
-			for (i = 0; i < 4; i++)
-				seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
+			seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
 		}
 	}
 }
@@ -1825,7 +1894,7 @@
 static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	struct drm_file *file;
 	int i;
 
@@ -1852,12 +1921,9 @@
 
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
-		struct i915_hw_ppgtt *pvt_ppgtt;
 
-		pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx);
 		seq_printf(m, "proc: %s\n",
 			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
-		seq_puts(m, "  default context:\n");
 		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
 	}
 	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
@@ -1865,7 +1931,7 @@
 
 static int i915_ppgtt_info(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -1885,56 +1951,9 @@
 	return 0;
 }
 
-static int i915_dpio_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-
-	if (!IS_VALLEYVIEW(dev)) {
-		seq_puts(m, "unsupported\n");
-		return 0;
-	}
-
-	ret = mutex_lock_interruptible(&dev_priv->dpio_lock);
-	if (ret)
-		return ret;
-
-	seq_printf(m, "DPIO_CTL: 0x%08x\n", I915_READ(DPIO_CTL));
-
-	seq_printf(m, "DPIO PLL DW3 CH0 : 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW3(0)));
-	seq_printf(m, "DPIO PLL DW3 CH1: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW3(1)));
-
-	seq_printf(m, "DPIO PLL DW5 CH0: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW5(0)));
-	seq_printf(m, "DPIO PLL DW5 CH1: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW5(1)));
-
-	seq_printf(m, "DPIO PLL DW7 CH0: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW7(0)));
-	seq_printf(m, "DPIO PLL DW7 CH1: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW7(1)));
-
-	seq_printf(m, "DPIO PLL DW10 CH0: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW10(0)));
-	seq_printf(m, "DPIO PLL DW10 CH1: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_PLL_DW10(1)));
-
-	seq_printf(m, "DPIO_FASTCLK_DISABLE: 0x%08x\n",
-		   vlv_dpio_read(dev_priv, PIPE_A, VLV_CMN_DW0));
-
-	mutex_unlock(&dev_priv->dpio_lock);
-
-	return 0;
-}
-
 static int i915_llc(struct seq_file *m, void *data)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -2040,11 +2059,11 @@
 
 static int i915_pc8_status(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (!IS_HASWELL(dev)) {
+	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
 		seq_puts(m, "not supported\n");
 		return 0;
 	}
@@ -2115,7 +2134,7 @@
 
 static int i915_power_domain_info(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
@@ -2170,7 +2189,7 @@
 			       struct intel_crtc *intel_crtc,
 			       struct intel_encoder *intel_encoder)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_crtc *crtc = &intel_crtc->base;
 	struct intel_connector *intel_connector;
@@ -2178,12 +2197,12 @@
 
 	encoder = &intel_encoder->base;
 	seq_printf(m, "\tencoder %d: type: %s, connectors:\n",
-		   encoder->base.id, drm_get_encoder_name(encoder));
+		   encoder->base.id, encoder->name);
 	for_each_connector_on_encoder(dev, encoder, intel_connector) {
 		struct drm_connector *connector = &intel_connector->base;
 		seq_printf(m, "\t\tconnector %d: type: %s, status: %s",
 			   connector->base.id,
-			   drm_get_connector_name(connector),
+			   connector->name,
 			   drm_get_connector_status_name(connector->status));
 		if (connector->status == connector_status_connected) {
 			struct drm_display_mode *mode = &crtc->mode;
@@ -2197,7 +2216,7 @@
 
 static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_crtc *crtc = &intel_crtc->base;
 	struct intel_encoder *intel_encoder;
@@ -2254,7 +2273,7 @@
 	struct drm_display_mode *mode;
 
 	seq_printf(m, "connector %d: type %s, status: %s\n",
-		   connector->base.id, drm_get_connector_name(connector),
+		   connector->base.id, connector->name,
 		   drm_get_connector_status_name(connector->status));
 	if (connector->status == connector_status_connected) {
 		seq_printf(m, "\tname: %s\n", connector->display_info.name);
@@ -2286,10 +2305,8 @@
 
 	if (IS_845G(dev) || IS_I865G(dev))
 		state = I915_READ(_CURACNTR) & CURSOR_ENABLE;
-	else if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev))
-		state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
 	else
-		state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE;
+		state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
 
 	return state;
 }
@@ -2299,10 +2316,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pos;
 
-	if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev))
-		pos = I915_READ(CURPOS_IVB(pipe));
-	else
-		pos = I915_READ(CURPOS(pipe));
+	pos = I915_READ(CURPOS(pipe));
 
 	*x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
 	if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
@@ -2317,7 +2331,7 @@
 
 static int i915_display_info(struct seq_file *m, void *unused)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *crtc;
@@ -2327,7 +2341,7 @@
 	drm_modeset_lock_all(dev);
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		bool active;
 		int x, y;
 
@@ -2339,10 +2353,14 @@
 
 			active = cursor_position(dev, crtc->pipe, &x, &y);
 			seq_printf(m, "\tcursor visible? %s, position (%d, %d), addr 0x%08x, active? %s\n",
-				   yesno(crtc->cursor_visible),
+				   yesno(crtc->cursor_base),
 				   x, y, crtc->cursor_addr,
 				   yesno(active));
 		}
+
+		seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
+			   yesno(!crtc->cpu_fifo_underrun_disabled),
+			   yesno(!crtc->pch_fifo_underrun_disabled));
 	}
 
 	seq_printf(m, "\n");
@@ -2595,7 +2613,7 @@
 
 	*source = INTEL_PIPE_CRC_SOURCE_PIPE;
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock_all(dev);
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 			    base.head) {
 		if (!encoder->base.crtc)
@@ -2631,7 +2649,7 @@
 			break;
 		}
 	}
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock_all(dev);
 
 	return ret;
 }
@@ -3106,7 +3124,7 @@
 static void wm_latency_show(struct seq_file *m, const uint16_t wm[5])
 {
 	struct drm_device *dev = m->private;
-	int num_levels = IS_HASWELL(dev) || IS_BROADWELL(dev) ? 5 : 4;
+	int num_levels = ilk_wm_max_level(dev) + 1;
 	int level;
 
 	drm_modeset_lock_all(dev);
@@ -3189,7 +3207,7 @@
 	struct seq_file *m = file->private_data;
 	struct drm_device *dev = m->private;
 	uint16_t new[5] = { 0 };
-	int num_levels = IS_HASWELL(dev) || IS_BROADWELL(dev) ? 5 : 4;
+	int num_levels = ilk_wm_max_level(dev) + 1;
 	int level;
 	int ret;
 	char tmp[32];
@@ -3286,9 +3304,15 @@
 i915_wedged_set(void *data, u64 val)
 {
 	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	intel_runtime_pm_get(dev_priv);
 
 	i915_handle_error(dev, val,
 			  "Manually setting wedged to %llu", val);
+
+	intel_runtime_pm_put(dev_priv);
+
 	return 0;
 }
 
@@ -3774,7 +3798,7 @@
 	{"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS},
 	{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
 	{"i915_rstdby_delays", i915_rstdby_delays, 0},
-	{"i915_cur_delayinfo", i915_cur_delayinfo, 0},
+	{"i915_frequency_info", i915_frequency_info, 0},
 	{"i915_delayfreq_table", i915_delayfreq_table, 0},
 	{"i915_inttoext_table", i915_inttoext_table, 0},
 	{"i915_drpc_info", i915_drpc_info, 0},
@@ -3790,7 +3814,6 @@
 	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
-	{"i915_dpio", i915_dpio_info, 0},
 	{"i915_llc", i915_llc, 0},
 	{"i915_edp_psr_status", i915_edp_psr_status, 0},
 	{"i915_sink_crc_eDP1", i915_sink_crc, 0},

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index eedb023..4c22a5b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c

@@ -44,6 +44,7 @@
 #include <acpi/video.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
+#include <linux/oom.h>
 
 #define LP_RING(d) (&((struct drm_i915_private *)(d))->ring[RCS])
 
@@ -63,7 +64,7 @@
  * has access to the ring.
  */
 #define RING_LOCK_TEST_WITH_RETURN(dev, file) do {			\
-	if (LP_RING(dev->dev_private)->obj == NULL)			\
+	if (LP_RING(dev->dev_private)->buffer->obj == NULL)			\
 		LOCK_TEST_WITH_RETURN(dev, file);			\
 } while (0)
 
@@ -119,7 +120,7 @@
 static void i915_free_hws(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = LP_RING(dev_priv);
+	struct intel_engine_cs *ring = LP_RING(dev_priv);
 
 	if (dev_priv->status_page_dmah) {
 		drm_pci_free(dev, dev_priv->status_page_dmah);
@@ -139,7 +140,8 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_master_private *master_priv;
-	struct intel_ring_buffer *ring = LP_RING(dev_priv);
+	struct intel_engine_cs *ring = LP_RING(dev_priv);
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 
 	/*
 	 * We should never lose context on the ring with modesetting
@@ -148,17 +150,17 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return;
 
-	ring->head = I915_READ_HEAD(ring) & HEAD_ADDR;
-	ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-	ring->space = ring->head - (ring->tail + I915_RING_FREE_SPACE);
-	if (ring->space < 0)
-		ring->space += ring->size;
+	ringbuf->head = I915_READ_HEAD(ring) & HEAD_ADDR;
+	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
+	ringbuf->space = ringbuf->head - (ringbuf->tail + I915_RING_FREE_SPACE);
+	if (ringbuf->space < 0)
+		ringbuf->space += ringbuf->size;
 
 	if (!dev->primary->master)
 		return;
 
 	master_priv = dev->primary->master->driver_priv;
-	if (ring->head == ring->tail && master_priv->sarea_priv)
+	if (ringbuf->head == ringbuf->tail && master_priv->sarea_priv)
 		master_priv->sarea_priv->perf_boxes |= I915_BOX_RING_EMPTY;
 }
 
@@ -201,7 +203,7 @@
 	}
 
 	if (init->ring_size != 0) {
-		if (LP_RING(dev_priv)->obj != NULL) {
+		if (LP_RING(dev_priv)->buffer->obj != NULL) {
 			i915_dma_cleanup(dev);
 			DRM_ERROR("Client tried to initialize ringbuffer in "
 				  "GEM mode\n");
@@ -234,11 +236,11 @@
 static int i915_dma_resume(struct drm_device * dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = LP_RING(dev_priv);
+	struct intel_engine_cs *ring = LP_RING(dev_priv);
 
 	DRM_DEBUG_DRIVER("%s\n", __func__);
 
-	if (ring->virtual_start == NULL) {
+	if (ring->buffer->virtual_start == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
 			  " ring buffer\n");
 		return -ENOMEM;
@@ -360,7 +362,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i, ret;
 
-	if ((dwords+1) * sizeof(int) >= LP_RING(dev_priv)->size - 8)
+	if ((dwords+1) * sizeof(int) >= LP_RING(dev_priv)->buffer->size - 8)
 		return -EINVAL;
 
 	for (i = 0; i < dwords;) {
@@ -782,7 +784,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	int ret = 0;
-	struct intel_ring_buffer *ring = LP_RING(dev_priv);
+	struct intel_engine_cs *ring = LP_RING(dev_priv);
 
 	DRM_DEBUG_DRIVER("irq_nr=%d breadcrumb=%d\n", irq_nr,
 		  READ_BREADCRUMB(dev_priv));
@@ -823,7 +825,7 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return -ENODEV;
 
-	if (!dev_priv || !LP_RING(dev_priv)->virtual_start) {
+	if (!dev_priv || !LP_RING(dev_priv)->buffer->virtual_start) {
 		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
@@ -1017,6 +1019,9 @@
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 		value = 1;
 		break;
+	case I915_PARAM_CMD_PARSER_VERSION:
+		value = i915_cmd_parser_get_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
@@ -1070,7 +1075,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	drm_i915_hws_addr_t *hws = data;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return -ENODEV;
@@ -1277,12 +1282,13 @@
 static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
@@ -1326,7 +1332,7 @@
 
 	intel_power_domains_init_hw(dev_priv);
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, dev->pdev->irq);
 	if (ret)
 		goto cleanup_gem_stolen;
 
@@ -1336,7 +1342,7 @@
 
 	ret = i915_gem_init(dev);
 	if (ret)
-		goto cleanup_power;
+		goto cleanup_irq;
 
 	INIT_WORK(&dev_priv->console_resume_work, intel_console_resume);
 
@@ -1345,10 +1351,8 @@
 	/* Always safe in the mode setting case. */
 	/* FIXME: do pre/post-mode set stuff in core KMS code */
 	dev->vblank_disable_allowed = true;
-	if (INTEL_INFO(dev)->num_pipes == 0) {
-		intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
+	if (INTEL_INFO(dev)->num_pipes == 0)
 		return 0;
-	}
 
 	ret = intel_fbdev_init(dev);
 	if (ret)
@@ -1383,8 +1387,7 @@
 	mutex_unlock(&dev->struct_mutex);
 	WARN_ON(dev_priv->mm.aliasing_ppgtt);
 	drm_mm_takedown(&dev_priv->gtt.base.mm);
-cleanup_power:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
+cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
 	i915_gem_cleanup_stolen(dev);
@@ -1739,8 +1742,8 @@
 	intel_power_domains_remove(dev_priv);
 	drm_vblank_cleanup(dev);
 out_gem_unload:
-	if (dev_priv->mm.inactive_shrinker.scan_objects)
-		unregister_shrinker(&dev_priv->mm.inactive_shrinker);
+	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
+	unregister_shrinker(&dev_priv->mm.shrinker);
 
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
@@ -1791,8 +1794,8 @@
 
 	i915_teardown_sysfs(dev);
 
-	if (dev_priv->mm.inactive_shrinker.scan_objects)
-		unregister_shrinker(&dev_priv->mm.inactive_shrinker);
+	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
+	unregister_shrinker(&dev_priv->mm.shrinker);
 
 	io_mapping_free(dev_priv->gtt.mappable);
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
@@ -1864,7 +1867,7 @@
 		kmem_cache_destroy(dev_priv->slab);
 
 	pci_dev_put(dev_priv->bridge_dev);
-	kfree(dev->dev_private);
+	kfree(dev_priv);
 
 	return 0;
 }
@@ -1925,6 +1928,8 @@
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
+	if (file_priv && file_priv->bsd_ring)
+		file_priv->bsd_ring = NULL;
 	kfree(file_priv);
 }
 
@@ -1978,9 +1983,10 @@
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
-int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
+int i915_max_ioctl = ARRAY_SIZE(i915_ioctls);
 
 /*
  * This is really ugly: Because old userspace abused the linux agp interface to

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 82f4d1f..651e65e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c

@@ -36,6 +36,7 @@
 
 #include <linux/console.h>
 #include <linux/module.h>
+#include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 
 static struct drm_driver driver;
@@ -49,12 +50,30 @@
 	.dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET }, \
 	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET }
 
+#define GEN_CHV_PIPEOFFSETS \
+	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
+			  CHV_PIPE_C_OFFSET }, \
+	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
+			   CHV_TRANSCODER_C_OFFSET, }, \
+	.dpll_offsets = { DPLL_A_OFFSET, DPLL_B_OFFSET, \
+			  CHV_DPLL_C_OFFSET }, \
+	.dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET, \
+			     CHV_DPLL_C_MD_OFFSET }, \
+	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \
+			     CHV_PALETTE_C_OFFSET }
+
+#define CURSOR_OFFSETS \
+	.cursor_offsets = { CURSOR_A_OFFSET, CURSOR_B_OFFSET, CHV_CURSOR_C_OFFSET }
+
+#define IVB_CURSOR_OFFSETS \
+	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
 
 static const struct intel_device_info intel_i830_info = {
 	.gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_845g_info = {
@@ -62,6 +81,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_i85x_info = {
@@ -71,6 +91,7 @@
 	.has_fbc = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_i865g_info = {
@@ -78,6 +99,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_i915g_info = {
@@ -85,6 +107,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 static const struct intel_device_info intel_i915gm_info = {
 	.gen = 3, .is_mobile = 1, .num_pipes = 2,
@@ -94,12 +117,14 @@
 	.has_fbc = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 static const struct intel_device_info intel_i945g_info = {
 	.gen = 3, .has_hotplug = 1, .cursor_needs_physical = 1, .num_pipes = 2,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 static const struct intel_device_info intel_i945gm_info = {
 	.gen = 3, .is_i945gm = 1, .is_mobile = 1, .num_pipes = 2,
@@ -109,6 +134,7 @@
 	.has_fbc = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_i965g_info = {
@@ -117,6 +143,7 @@
 	.has_overlay = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_i965gm_info = {
@@ -126,6 +153,7 @@
 	.supports_tv = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_g33_info = {
@@ -134,6 +162,7 @@
 	.has_overlay = 1,
 	.ring_mask = RENDER_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_g45_info = {
@@ -141,6 +170,7 @@
 	.has_pipe_cxsr = 1, .has_hotplug = 1,
 	.ring_mask = RENDER_RING | BSD_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_gm45_info = {
@@ -150,6 +180,7 @@
 	.supports_tv = 1,
 	.ring_mask = RENDER_RING | BSD_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_pineview_info = {
@@ -157,6 +188,7 @@
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.has_overlay = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ironlake_d_info = {
@@ -164,6 +196,7 @@
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.ring_mask = RENDER_RING | BSD_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ironlake_m_info = {
@@ -172,6 +205,7 @@
 	.has_fbc = 1,
 	.ring_mask = RENDER_RING | BSD_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_sandybridge_d_info = {
@@ -181,6 +215,7 @@
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
 	.has_llc = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_sandybridge_m_info = {
@@ -190,6 +225,7 @@
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
 	.has_llc = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 #define GEN7_FEATURES  \
@@ -203,6 +239,7 @@
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_m_info = {
@@ -210,6 +247,7 @@
 	.is_ivybridge = 1,
 	.is_mobile = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_q_info = {
@@ -217,6 +255,7 @@
 	.is_ivybridge = 1,
 	.num_pipes = 0, /* legal, last one wins */
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_valleyview_m_info = {
@@ -228,6 +267,7 @@
 	.has_fbc = 0, /* legal, last one wins */
 	.has_llc = 0, /* legal, last one wins */
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_valleyview_d_info = {
@@ -238,6 +278,7 @@
 	.has_fbc = 0, /* legal, last one wins */
 	.has_llc = 0, /* legal, last one wins */
 	GEN_DEFAULT_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_haswell_d_info = {
@@ -247,6 +288,7 @@
 	.has_fpga_dbg = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_haswell_m_info = {
@@ -257,6 +299,7 @@
 	.has_fpga_dbg = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_d_info = {
@@ -267,6 +310,7 @@
 	.has_ddi = 1,
 	.has_fbc = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_m_info = {
@@ -277,6 +321,40 @@
 	.has_ddi = 1,
 	.has_fbc = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+	.gen = 8, .num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+	.gen = 8, .is_mobile = 1, .num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
+static const struct intel_device_info intel_cherryview_info = {
+	.is_preliminary = 1,
+	.gen = 8, .num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.is_valleyview = 1,
+	.display_mmio_offset = VLV_DISPLAY_BASE,
+	GEN_CHV_PIPEOFFSETS,
+	CURSOR_OFFSETS,
 };
 
 /*
@@ -311,8 +389,11 @@
 	INTEL_HSW_M_IDS(&intel_haswell_m_info), \
 	INTEL_VLV_M_IDS(&intel_valleyview_m_info),	\
 	INTEL_VLV_D_IDS(&intel_valleyview_d_info),	\
-	INTEL_BDW_M_IDS(&intel_broadwell_m_info),	\
-	INTEL_BDW_D_IDS(&intel_broadwell_d_info)
+	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),	\
+	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),	\
+	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),	\
+	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), \
+	INTEL_CHV_IDS(&intel_cherryview_info)
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_PCI_IDS,
@@ -445,18 +526,20 @@
 			return error;
 		}
 
-		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
-
 		drm_irq_uninstall(dev);
 		dev_priv->enable_hotplug_processing = false;
+
+		intel_disable_gt_powersave(dev);
+
 		/*
 		 * Disable CRTCs directly since we want to preserve sw state
 		 * for _thaw.
 		 */
-		mutex_lock(&dev->mode_config.mutex);
-		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		drm_modeset_lock_all(dev);
+		for_each_crtc(dev, crtc) {
 			dev_priv->display.crtc_disable(crtc);
-		mutex_unlock(&dev->mode_config.mutex);
+		}
+		drm_modeset_unlock_all(dev);
 
 		intel_modeset_suspend_hw(dev);
 	}
@@ -519,24 +602,6 @@
 	console_unlock();
 }
 
-static void intel_resume_hotplug(struct drm_device *dev)
-{
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
-
-	mutex_lock(&mode_config->mutex);
-	DRM_DEBUG_KMS("running encoder hotplug functions\n");
-
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
-		if (encoder->hot_plug)
-			encoder->hot_plug(encoder);
-
-	mutex_unlock(&mode_config->mutex);
-
-	/* Just fire off a uevent and let userspace tell us what to do */
-	drm_helper_hpd_irq_event(dev);
-}
-
 static int i915_drm_thaw_early(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -551,7 +616,6 @@
 static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int error = 0;
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET) &&
 	    restore_gtt_mappings) {
@@ -569,12 +633,14 @@
 		drm_mode_config_reset(dev);
 
 		mutex_lock(&dev->struct_mutex);
-
-		error = i915_gem_init_hw(dev);
+		if (i915_gem_init_hw(dev)) {
+			DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
+			atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		}
 		mutex_unlock(&dev->struct_mutex);
 
 		/* We need working interrupts for modeset enabling ... */
-		drm_irq_install(dev);
+		drm_irq_install(dev, dev->pdev->irq);
 
 		intel_modeset_init_hw(dev);
 
@@ -591,7 +657,7 @@
 		intel_hpd_init(dev);
 		dev_priv->enable_hotplug_processing = true;
 		/* Config may have changed between suspend and resume */
-		intel_resume_hotplug(dev);
+		drm_helper_hpd_irq_event(dev);
 	}
 
 	intel_opregion_init(dev);
@@ -613,7 +679,7 @@
 	mutex_unlock(&dev_priv->modeset_restore_lock);
 
 	intel_runtime_pm_put(dev_priv);
-	return error;
+	return 0;
 }
 
 static int i915_drm_thaw(struct drm_device *dev)
@@ -746,18 +812,20 @@
 			return ret;
 		}
 
-		drm_irq_uninstall(dev);
-		drm_irq_install(dev);
+		/*
+		 * FIXME: This races pretty badly against concurrent holders of
+		 * ring interrupts. This is possible since we've started to drop
+		 * dev->struct_mutex in select places when waiting for the gpu.
+		 */
 
-		/* rps/rc6 re-init is necessary to restore state lost after the
-		 * reset and the re-install of drm irq. Skip for ironlake per
+		/*
+		 * rps/rc6 re-init is necessary to restore state lost after the
+		 * reset and the re-install of gt irqs. Skip for ironlake per
 		 * previous concerns that it doesn't respond well to some forms
-		 * of re-init after reset. */
-		if (INTEL_INFO(dev)->gen > 5) {
-			mutex_lock(&dev->struct_mutex);
-			intel_enable_gt_powersave(dev);
-			mutex_unlock(&dev->struct_mutex);
-		}
+		 * of re-init after reset.
+		 */
+		if (INTEL_INFO(dev)->gen > 5)
+			intel_reset_gt_powersave(dev);
 
 		intel_hpd_init(dev);
 	} else {
@@ -891,21 +959,453 @@
 	return i915_drm_freeze(drm_dev);
 }
 
-static int i915_runtime_suspend(struct device *device)
+static int hsw_runtime_suspend(struct drm_i915_private *dev_priv)
+{
+	hsw_enable_pc8(dev_priv);
+
+	return 0;
+}
+
+static int snb_runtime_resume(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	intel_init_pch_refclk(dev);
+
+	return 0;
+}
+
+static int hsw_runtime_resume(struct drm_i915_private *dev_priv)
+{
+	hsw_disable_pc8(dev_priv);
+
+	return 0;
+}
+
+/*
+ * Save all Gunit registers that may be lost after a D3 and a subsequent
+ * S0i[R123] transition. The list of registers needing a save/restore is
+ * defined in the VLV2_S0IXRegs document. This documents marks all Gunit
+ * registers in the following way:
+ * - Driver: saved/restored by the driver
+ * - Punit : saved/restored by the Punit firmware
+ * - No, w/o marking: no need to save/restore, since the register is R/O or
+ *                    used internally by the HW in a way that doesn't depend
+ *                    keeping the content across a suspend/resume.
+ * - Debug : used for debugging
+ *
+ * We save/restore all registers marked with 'Driver', with the following
+ * exceptions:
+ * - Registers out of use, including also registers marked with 'Debug'.
+ *   These have no effect on the driver's operation, so we don't save/restore
+ *   them to reduce the overhead.
+ * - Registers that are fully setup by an initialization function called from
+ *   the resume path. For example many clock gating and RPS/RC6 registers.
+ * - Registers that provide the right functionality with their reset defaults.
+ *
+ * TODO: Except for registers that based on the above 3 criteria can be safely
+ * ignored, we save/restore all others, practically treating the HW context as
+ * a black-box for the driver. Further investigation is needed to reduce the
+ * saved/restored registers even further, by following the same 3 criteria.
+ */
+static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
+{
+	struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state;
+	int i;
+
+	/* GAM 0x4000-0x4770 */
+	s->wr_watermark		= I915_READ(GEN7_WR_WATERMARK);
+	s->gfx_prio_ctrl	= I915_READ(GEN7_GFX_PRIO_CTRL);
+	s->arb_mode		= I915_READ(ARB_MODE);
+	s->gfx_pend_tlb0	= I915_READ(GEN7_GFX_PEND_TLB0);
+	s->gfx_pend_tlb1	= I915_READ(GEN7_GFX_PEND_TLB1);
+
+	for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++)
+		s->lra_limits[i] = I915_READ(GEN7_LRA_LIMITS_BASE + i * 4);
+
+	s->media_max_req_count	= I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
+	s->gfx_max_req_count	= I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
+
+	s->render_hwsp		= I915_READ(RENDER_HWS_PGA_GEN7);
+	s->ecochk		= I915_READ(GAM_ECOCHK);
+	s->bsd_hwsp		= I915_READ(BSD_HWS_PGA_GEN7);
+	s->blt_hwsp		= I915_READ(BLT_HWS_PGA_GEN7);
+
+	s->tlb_rd_addr		= I915_READ(GEN7_TLB_RD_ADDR);
+
+	/* MBC 0x9024-0x91D0, 0x8500 */
+	s->g3dctl		= I915_READ(VLV_G3DCTL);
+	s->gsckgctl		= I915_READ(VLV_GSCKGCTL);
+	s->mbctl		= I915_READ(GEN6_MBCTL);
+
+	/* GCP 0x9400-0x9424, 0x8100-0x810C */
+	s->ucgctl1		= I915_READ(GEN6_UCGCTL1);
+	s->ucgctl3		= I915_READ(GEN6_UCGCTL3);
+	s->rcgctl1		= I915_READ(GEN6_RCGCTL1);
+	s->rcgctl2		= I915_READ(GEN6_RCGCTL2);
+	s->rstctl		= I915_READ(GEN6_RSTCTL);
+	s->misccpctl		= I915_READ(GEN7_MISCCPCTL);
+
+	/* GPM 0xA000-0xAA84, 0x8000-0x80FC */
+	s->gfxpause		= I915_READ(GEN6_GFXPAUSE);
+	s->rpdeuhwtc		= I915_READ(GEN6_RPDEUHWTC);
+	s->rpdeuc		= I915_READ(GEN6_RPDEUC);
+	s->ecobus		= I915_READ(ECOBUS);
+	s->pwrdwnupctl		= I915_READ(VLV_PWRDWNUPCTL);
+	s->rp_down_timeout	= I915_READ(GEN6_RP_DOWN_TIMEOUT);
+	s->rp_deucsw		= I915_READ(GEN6_RPDEUCSW);
+	s->rcubmabdtmr		= I915_READ(GEN6_RCUBMABDTMR);
+	s->rcedata		= I915_READ(VLV_RCEDATA);
+	s->spare2gh		= I915_READ(VLV_SPAREG2H);
+
+	/* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */
+	s->gt_imr		= I915_READ(GTIMR);
+	s->gt_ier		= I915_READ(GTIER);
+	s->pm_imr		= I915_READ(GEN6_PMIMR);
+	s->pm_ier		= I915_READ(GEN6_PMIER);
+
+	for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++)
+		s->gt_scratch[i] = I915_READ(GEN7_GT_SCRATCH_BASE + i * 4);
+
+	/* GT SA CZ domain, 0x100000-0x138124 */
+	s->tilectl		= I915_READ(TILECTL);
+	s->gt_fifoctl		= I915_READ(GTFIFOCTL);
+	s->gtlc_wake_ctrl	= I915_READ(VLV_GTLC_WAKE_CTRL);
+	s->gtlc_survive		= I915_READ(VLV_GTLC_SURVIVABILITY_REG);
+	s->pmwgicz		= I915_READ(VLV_PMWGICZ);
+
+	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
+	s->gu_ctl0		= I915_READ(VLV_GU_CTL0);
+	s->gu_ctl1		= I915_READ(VLV_GU_CTL1);
+	s->clock_gate_dis2	= I915_READ(VLV_GUNIT_CLOCK_GATE2);
+
+	/*
+	 * Not saving any of:
+	 * DFT,		0x9800-0x9EC0
+	 * SARB,	0xB000-0xB1FC
+	 * GAC,		0x5208-0x524C, 0x14000-0x14C000
+	 * PCI CFG
+	 */
+}
+
+static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
+{
+	struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state;
+	u32 val;
+	int i;
+
+	/* GAM 0x4000-0x4770 */
+	I915_WRITE(GEN7_WR_WATERMARK,	s->wr_watermark);
+	I915_WRITE(GEN7_GFX_PRIO_CTRL,	s->gfx_prio_ctrl);
+	I915_WRITE(ARB_MODE,		s->arb_mode | (0xffff << 16));
+	I915_WRITE(GEN7_GFX_PEND_TLB0,	s->gfx_pend_tlb0);
+	I915_WRITE(GEN7_GFX_PEND_TLB1,	s->gfx_pend_tlb1);
+
+	for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++)
+		I915_WRITE(GEN7_LRA_LIMITS_BASE + i * 4, s->lra_limits[i]);
+
+	I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->media_max_req_count);
+	I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->gfx_max_req_count);
+
+	I915_WRITE(RENDER_HWS_PGA_GEN7,	s->render_hwsp);
+	I915_WRITE(GAM_ECOCHK,		s->ecochk);
+	I915_WRITE(BSD_HWS_PGA_GEN7,	s->bsd_hwsp);
+	I915_WRITE(BLT_HWS_PGA_GEN7,	s->blt_hwsp);
+
+	I915_WRITE(GEN7_TLB_RD_ADDR,	s->tlb_rd_addr);
+
+	/* MBC 0x9024-0x91D0, 0x8500 */
+	I915_WRITE(VLV_G3DCTL,		s->g3dctl);
+	I915_WRITE(VLV_GSCKGCTL,	s->gsckgctl);
+	I915_WRITE(GEN6_MBCTL,		s->mbctl);
+
+	/* GCP 0x9400-0x9424, 0x8100-0x810C */
+	I915_WRITE(GEN6_UCGCTL1,	s->ucgctl1);
+	I915_WRITE(GEN6_UCGCTL3,	s->ucgctl3);
+	I915_WRITE(GEN6_RCGCTL1,	s->rcgctl1);
+	I915_WRITE(GEN6_RCGCTL2,	s->rcgctl2);
+	I915_WRITE(GEN6_RSTCTL,		s->rstctl);
+	I915_WRITE(GEN7_MISCCPCTL,	s->misccpctl);
+
+	/* GPM 0xA000-0xAA84, 0x8000-0x80FC */
+	I915_WRITE(GEN6_GFXPAUSE,	s->gfxpause);
+	I915_WRITE(GEN6_RPDEUHWTC,	s->rpdeuhwtc);
+	I915_WRITE(GEN6_RPDEUC,		s->rpdeuc);
+	I915_WRITE(ECOBUS,		s->ecobus);
+	I915_WRITE(VLV_PWRDWNUPCTL,	s->pwrdwnupctl);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,s->rp_down_timeout);
+	I915_WRITE(GEN6_RPDEUCSW,	s->rp_deucsw);
+	I915_WRITE(GEN6_RCUBMABDTMR,	s->rcubmabdtmr);
+	I915_WRITE(VLV_RCEDATA,		s->rcedata);
+	I915_WRITE(VLV_SPAREG2H,	s->spare2gh);
+
+	/* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */
+	I915_WRITE(GTIMR,		s->gt_imr);
+	I915_WRITE(GTIER,		s->gt_ier);
+	I915_WRITE(GEN6_PMIMR,		s->pm_imr);
+	I915_WRITE(GEN6_PMIER,		s->pm_ier);
+
+	for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++)
+		I915_WRITE(GEN7_GT_SCRATCH_BASE + i * 4, s->gt_scratch[i]);
+
+	/* GT SA CZ domain, 0x100000-0x138124 */
+	I915_WRITE(TILECTL,			s->tilectl);
+	I915_WRITE(GTFIFOCTL,			s->gt_fifoctl);
+	/*
+	 * Preserve the GT allow wake and GFX force clock bit, they are not
+	 * be restored, as they are used to control the s0ix suspend/resume
+	 * sequence by the caller.
+	 */
+	val = I915_READ(VLV_GTLC_WAKE_CTRL);
+	val &= VLV_GTLC_ALLOWWAKEREQ;
+	val |= s->gtlc_wake_ctrl & ~VLV_GTLC_ALLOWWAKEREQ;
+	I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
+
+	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
+	val &= VLV_GFX_CLK_FORCE_ON_BIT;
+	val |= s->gtlc_survive & ~VLV_GFX_CLK_FORCE_ON_BIT;
+	I915_WRITE(VLV_GTLC_SURVIVABILITY_REG, val);
+
+	I915_WRITE(VLV_PMWGICZ,			s->pmwgicz);
+
+	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
+	I915_WRITE(VLV_GU_CTL0,			s->gu_ctl0);
+	I915_WRITE(VLV_GU_CTL1,			s->gu_ctl1);
+	I915_WRITE(VLV_GUNIT_CLOCK_GATE2,	s->clock_gate_dis2);
+}
+
+int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
+{
+	u32 val;
+	int err;
+
+	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
+	WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
+
+#define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
+	/* Wait for a previous force-off to settle */
+	if (force_on) {
+		err = wait_for(!COND, 20);
+		if (err) {
+			DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
+				  I915_READ(VLV_GTLC_SURVIVABILITY_REG));
+			return err;
+		}
+	}
+
+	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
+	val &= ~VLV_GFX_CLK_FORCE_ON_BIT;
+	if (force_on)
+		val |= VLV_GFX_CLK_FORCE_ON_BIT;
+	I915_WRITE(VLV_GTLC_SURVIVABILITY_REG, val);
+
+	if (!force_on)
+		return 0;
+
+	err = wait_for(COND, 20);
+	if (err)
+		DRM_ERROR("timeout waiting for GFX clock force-on (%08x)\n",
+			  I915_READ(VLV_GTLC_SURVIVABILITY_REG));
+
+	return err;
+#undef COND
+}
+
+static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
+{
+	u32 val;
+	int err = 0;
+
+	val = I915_READ(VLV_GTLC_WAKE_CTRL);
+	val &= ~VLV_GTLC_ALLOWWAKEREQ;
+	if (allow)
+		val |= VLV_GTLC_ALLOWWAKEREQ;
+	I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
+	POSTING_READ(VLV_GTLC_WAKE_CTRL);
+
+#define COND (!!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEACK) == \
+	      allow)
+	err = wait_for(COND, 1);
+	if (err)
+		DRM_ERROR("timeout disabling GT waking\n");
+	return err;
+#undef COND
+}
+
+static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
+				 bool wait_for_on)
+{
+	u32 mask;
+	u32 val;
+	int err;
+
+	mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK;
+	val = wait_for_on ? mask : 0;
+#define COND ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val)
+	if (COND)
+		return 0;
+
+	DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n",
+			wait_for_on ? "on" : "off",
+			I915_READ(VLV_GTLC_PW_STATUS));
+
+	/*
+	 * RC6 transitioning can be delayed up to 2 msec (see
+	 * valleyview_enable_rps), use 3 msec for safety.
+	 */
+	err = wait_for(COND, 3);
+	if (err)
+		DRM_ERROR("timeout waiting for GT wells to go %s\n",
+			  wait_for_on ? "on" : "off");
+
+	return err;
+#undef COND
+}
+
+static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
+{
+	if (!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEERR))
+		return;
+
+	DRM_ERROR("GT register access while GT waking disabled\n");
+	I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR);
+}
+
+static int vlv_runtime_suspend(struct drm_i915_private *dev_priv)
+{
+	u32 mask;
+	int err;
+
+	/*
+	 * Bspec defines the following GT well on flags as debug only, so
+	 * don't treat them as hard failures.
+	 */
+	(void)vlv_wait_for_gt_wells(dev_priv, false);
+
+	mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS;
+	WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask);
+
+	vlv_check_no_gt_access(dev_priv);
+
+	err = vlv_force_gfx_clock(dev_priv, true);
+	if (err)
+		goto err1;
+
+	err = vlv_allow_gt_wake(dev_priv, false);
+	if (err)
+		goto err2;
+	vlv_save_gunit_s0ix_state(dev_priv);
+
+	err = vlv_force_gfx_clock(dev_priv, false);
+	if (err)
+		goto err2;
+
+	return 0;
+
+err2:
+	/* For safety always re-enable waking and disable gfx clock forcing */
+	vlv_allow_gt_wake(dev_priv, true);
+err1:
+	vlv_force_gfx_clock(dev_priv, false);
+
+	return err;
+}
+
+static int vlv_runtime_resume(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int err;
+	int ret;
+
+	/*
+	 * If any of the steps fail just try to continue, that's the best we
+	 * can do at this point. Return the first error code (which will also
+	 * leave RPM permanently disabled).
+	 */
+	ret = vlv_force_gfx_clock(dev_priv, true);
+
+	vlv_restore_gunit_s0ix_state(dev_priv);
+
+	err = vlv_allow_gt_wake(dev_priv, true);
+	if (!ret)
+		ret = err;
+
+	err = vlv_force_gfx_clock(dev_priv, false);
+	if (!ret)
+		ret = err;
+
+	vlv_check_no_gt_access(dev_priv);
+
+	intel_init_clock_gating(dev);
+	i915_gem_restore_fences(dev);
+
+	return ret;
+}
+
+static int intel_runtime_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev))))
+		return -ENODEV;
 
 	WARN_ON(!HAS_RUNTIME_PM(dev));
 	assert_force_wake_inactive(dev_priv);
 
 	DRM_DEBUG_KMS("Suspending device\n");
 
-	if (HAS_PC8(dev))
-		hsw_enable_pc8(dev_priv);
+	/*
+	 * We could deadlock here in case another thread holding struct_mutex
+	 * calls RPM suspend concurrently, since the RPM suspend will wait
+	 * first for this RPM suspend to finish. In this case the concurrent
+	 * RPM resume will be followed by its RPM suspend counterpart. Still
+	 * for consistency return -EAGAIN, which will reschedule this suspend.
+	 */
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
+		/*
+		 * Bump the expiration timestamp, otherwise the suspend won't
+		 * be rescheduled.
+		 */
+		pm_runtime_mark_last_busy(device);
 
+		return -EAGAIN;
+	}
+	/*
+	 * We are safe here against re-faults, since the fault handler takes
+	 * an RPM reference.
+	 */
 	i915_gem_release_all_mmaps(dev_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+	/*
+	 * rps.work can't be rearmed here, since we get here only after making
+	 * sure the GPU is idle and the RPS freq is set to the minimum. See
+	 * intel_mark_idle().
+	 */
+	cancel_work_sync(&dev_priv->rps.work);
+	intel_runtime_pm_disable_interrupts(dev);
+
+	if (IS_GEN6(dev)) {
+		ret = 0;
+	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+		ret = hsw_runtime_suspend(dev_priv);
+	} else if (IS_VALLEYVIEW(dev)) {
+		ret = vlv_runtime_suspend(dev_priv);
+	} else {
+		ret = -ENODEV;
+		WARN_ON(1);
+	}
+
+	if (ret) {
+		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
+		intel_runtime_pm_restore_interrupts(dev);
+
+		return ret;
+	}
 
 	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
 	dev_priv->pm.suspended = true;
@@ -923,11 +1423,12 @@
 	return 0;
 }
 
-static int i915_runtime_resume(struct device *device)
+static int intel_runtime_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
 
 	WARN_ON(!HAS_RUNTIME_PM(dev));
 
@@ -936,11 +1437,33 @@
 	intel_opregion_notify_adapter(dev, PCI_D0);
 	dev_priv->pm.suspended = false;
 
-	if (HAS_PC8(dev))
-		hsw_disable_pc8(dev_priv);
+	if (IS_GEN6(dev)) {
+		ret = snb_runtime_resume(dev_priv);
+	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+		ret = hsw_runtime_resume(dev_priv);
+	} else if (IS_VALLEYVIEW(dev)) {
+		ret = vlv_runtime_resume(dev_priv);
+	} else {
+		WARN_ON(1);
+		ret = -ENODEV;
+	}
 
-	DRM_DEBUG_KMS("Device resumed\n");
-	return 0;
+	/*
+	 * No point of rolling back things in case of an error, as the best
+	 * we can do is to hope that things will still work (and disable RPM).
+	 */
+	i915_gem_init_swizzling(dev);
+	gen6_update_ring_freq(dev);
+
+	intel_runtime_pm_restore_interrupts(dev);
+	intel_reset_gt_powersave(dev);
+
+	if (ret)
+		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
+	else
+		DRM_DEBUG_KMS("Device resumed\n");
+
+	return ret;
 }
 
 static const struct dev_pm_ops i915_pm_ops = {
@@ -954,8 +1477,8 @@
 	.poweroff = i915_pm_poweroff,
 	.restore_early = i915_pm_resume_early,
 	.restore = i915_pm_resume,
-	.runtime_suspend = i915_runtime_suspend,
-	.runtime_resume = i915_runtime_resume,
+	.runtime_suspend = intel_runtime_suspend,
+	.runtime_resume = intel_runtime_resume,
 };
 
 static const struct vm_operations_struct i915_gem_vm_ops = {
@@ -1062,6 +1585,7 @@
 		driver.get_vblank_timestamp = NULL;
 #ifndef CONFIG_DRM_I915_UMS
 		/* Silently fail loading to not upset userspace. */
+		DRM_DEBUG_DRIVER("KMS and UMS disabled.\n");
 		return 0;
 #endif
 	}

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 388c028..49414d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -35,11 +35,13 @@
 #include "i915_reg.h"
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
+#include "i915_gem_gtt.h"
 #include <linux/io-mapping.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -91,7 +93,7 @@
 };
 #define port_name(p) ((p) + 'A')
 
-#define I915_NUM_PHYS_VLV 1
+#define I915_NUM_PHYS_VLV 2
 
 enum dpio_channel {
 	DPIO_CH0,
@@ -162,6 +164,12 @@
 #define for_each_pipe(p) for ((p) = 0; (p) < INTEL_INFO(dev)->num_pipes; (p)++)
 #define for_each_sprite(p, s) for ((s) = 0; (s) < INTEL_INFO(dev)->num_sprites[(p)]; (s)++)
 
+#define for_each_crtc(dev, crtc) \
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+
+#define for_each_intel_crtc(dev, intel_crtc) \
+	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
+
 #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
 	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
 		if ((intel_encoder)->base.crtc == (__crtc))
@@ -171,6 +179,7 @@
 		if ((intel_connector)->base.encoder == (__encoder))
 
 struct drm_i915_private;
+struct i915_mmu_object;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -312,7 +321,6 @@
 	u32 gab_ctl;
 	u32 gfx_mode;
 	u32 extra_instdone[I915_NUM_INSTDONE_REG];
-	u32 pipestat[I915_MAX_PIPES];
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
@@ -346,7 +354,7 @@
 		u64 bbaddr;
 		u64 acthd;
 		u32 fault_reg;
-		u32 faddr;
+		u64 faddr;
 		u32 rc_psmi; /* sleep state */
 		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
 
@@ -385,6 +393,7 @@
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -449,10 +458,11 @@
 	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
 			  struct drm_framebuffer *fb,
 			  struct drm_i915_gem_object *obj,
+			  struct intel_engine_cs *ring,
 			  uint32_t flags);
-	int (*update_primary_plane)(struct drm_crtc *crtc,
-				    struct drm_framebuffer *fb,
-				    int x, int y);
+	void (*update_primary_plane)(struct drm_crtc *crtc,
+				     struct drm_framebuffer *fb,
+				     int x, int y);
 	void (*hpd_irq_setup)(struct drm_device *dev);
 	/* clock updates for mode set */
 	/* cursor updates */
@@ -545,6 +555,7 @@
 	int dpll_offsets[I915_MAX_PIPES];
 	int dpll_md_offsets[I915_MAX_PIPES];
 	int palette_offsets[I915_MAX_PIPES];
+	int cursor_offsets[I915_MAX_PIPES];
 };
 
 #undef DEFINE_FLAG
@@ -560,168 +571,6 @@
 	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
 };
 
-typedef uint32_t gen6_gtt_pte_t;
-
-/**
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
-	struct drm_mm_node node;
-	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm;
-
-	/** This object's place on the active/inactive lists */
-	struct list_head mm_list;
-
-	struct list_head vma_link; /* Link in the object's VMA list */
-
-	/** This vma's place in the batchbuffer or on the eviction list */
-	struct list_head exec_list;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	struct hlist_node exec_node;
-	unsigned long exec_handle;
-	struct drm_i915_gem_exec_object2 *exec_entry;
-
-	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
-	 * (via user_pin_count), execbuffer (objects are not allowed multiple
-	 * times for the same batchbuffer), and the framebuffer code. When
-	 * switching/pageflipping, the framebuffer code has at most two buffers
-	 * pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
-
-	/** Unmap an object from an address space. This usually consists of
-	 * setting the valid PTE entries to a reserved scratch page. */
-	void (*unbind_vma)(struct i915_vma *vma);
-	/* Map an object into an address space with the given cache flags. */
-#define GLOBAL_BIND (1<<0)
-	void (*bind_vma)(struct i915_vma *vma,
-			 enum i915_cache_level cache_level,
-			 u32 flags);
-};
-
-struct i915_address_space {
-	struct drm_mm mm;
-	struct drm_device *dev;
-	struct list_head global_link;
-	unsigned long start;		/* Start offset always 0 for dri2 */
-	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
-
-	struct {
-		dma_addr_t addr;
-		struct page *page;
-	} scratch;
-
-	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives.  last_rendering_seqno
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
-	 */
-	struct list_head active_list;
-
-	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_rendering_seqno is 0 while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
-	/* FIXME: Need a more generic return type */
-	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-				     enum i915_cache_level level,
-				     bool valid); /* Create a valid PTE */
-	void (*clear_range)(struct i915_address_space *vm,
-			    uint64_t start,
-			    uint64_t length,
-			    bool use_scratch);
-	void (*insert_entries)(struct i915_address_space *vm,
-			       struct sg_table *st,
-			       uint64_t start,
-			       enum i915_cache_level cache_level);
-	void (*cleanup)(struct i915_address_space *vm);
-};
-
-/* The Graphics Translation Table is the way in which GEN hardware translates a
- * Graphics Virtual Address into a Physical Address. In addition to the normal
- * collateral associated with any va->pa translations GEN hardware also has a
- * portion of the GTT which can be mapped by the CPU and remain both coherent
- * and correct (in cases like swizzling). That region is referred to as GMADR in
- * the spec.
- */
-struct i915_gtt {
-	struct i915_address_space base;
-	size_t stolen_size;		/* Total size of stolen memory */
-
-	unsigned long mappable_end;	/* End offset that we can CPU map */
-	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
-	phys_addr_t mappable_base;	/* PA of our GMADR */
-
-	/** "Graphics Stolen Memory" holds the global PTEs */
-	void __iomem *gsm;
-
-	bool do_idle_maps;
-
-	int mtrr;
-
-	/* global gtt ops */
-	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
-			  size_t *stolen, phys_addr_t *mappable_base,
-			  unsigned long *mappable_end);
-};
-#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
-
-#define GEN8_LEGACY_PDPS 4
-struct i915_hw_ppgtt {
-	struct i915_address_space base;
-	struct kref ref;
-	struct drm_mm_node node;
-	unsigned num_pd_entries;
-	unsigned num_pd_pages; /* gen8+ */
-	union {
-		struct page **pt_pages;
-		struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
-	};
-	struct page *pd_pages;
-	union {
-		uint32_t pd_offset;
-		dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
-	};
-	union {
-		dma_addr_t *pt_dma_addr;
-		dma_addr_t *gen8_pt_dma_addr[4];
-	};
-
-	struct i915_hw_context *ctx;
-
-	int (*enable)(struct i915_hw_ppgtt *ppgtt);
-	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_ring_buffer *ring,
-			 bool synchronous);
-	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
-};
-
 struct i915_ctx_hang_stats {
 	/* This context had batch pending when hang was declared */
 	unsigned batch_pending;
@@ -738,13 +587,13 @@
 
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_ID 0
-struct i915_hw_context {
+struct intel_context {
 	struct kref ref;
 	int id;
 	bool is_initialized;
 	uint8_t remap_slice;
 	struct drm_i915_file_private *file_priv;
-	struct intel_ring_buffer *last_ring;
+	struct intel_engine_cs *last_ring;
 	struct drm_i915_gem_object *obj;
 	struct i915_ctx_hang_stats hang_stats;
 	struct i915_address_space *vm;
@@ -782,6 +631,10 @@
 	} no_fbc_reason;
 };
 
+struct i915_drrs {
+	struct intel_connector *connector;
+};
+
 struct i915_psr {
 	bool sink_support;
 	bool source_ok;
@@ -965,6 +818,67 @@
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct vlv_s0ix_state {
+	/* GAM */
+	u32 wr_watermark;
+	u32 gfx_prio_ctrl;
+	u32 arb_mode;
+	u32 gfx_pend_tlb0;
+	u32 gfx_pend_tlb1;
+	u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM];
+	u32 media_max_req_count;
+	u32 gfx_max_req_count;
+	u32 render_hwsp;
+	u32 ecochk;
+	u32 bsd_hwsp;
+	u32 blt_hwsp;
+	u32 tlb_rd_addr;
+
+	/* MBC */
+	u32 g3dctl;
+	u32 gsckgctl;
+	u32 mbctl;
+
+	/* GCP */
+	u32 ucgctl1;
+	u32 ucgctl3;
+	u32 rcgctl1;
+	u32 rcgctl2;
+	u32 rstctl;
+	u32 misccpctl;
+
+	/* GPM */
+	u32 gfxpause;
+	u32 rpdeuhwtc;
+	u32 rpdeuc;
+	u32 ecobus;
+	u32 pwrdwnupctl;
+	u32 rp_down_timeout;
+	u32 rp_deucsw;
+	u32 rcubmabdtmr;
+	u32 rcedata;
+	u32 spare2gh;
+
+	/* Display 1 CZ domain */
+	u32 gt_imr;
+	u32 gt_ier;
+	u32 pm_imr;
+	u32 pm_ier;
+	u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM];
+
+	/* GT SA CZ domain */
+	u32 tilectl;
+	u32 gt_fifoctl;
+	u32 gtlc_wake_ctrl;
+	u32 gtlc_survive;
+	u32 pmwgicz;
+
+	/* Display 2 CZ domain */
+	u32 gu_ctl0;
+	u32 gu_ctl1;
+	u32 clock_gate_dis2;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -1074,6 +988,7 @@
 	 * time are on. They are kept on until after the first modeset.
 	 */
 	bool init_power_on;
+	bool initializing;
 	int power_well_count;
 
 	struct mutex lock;
@@ -1132,7 +1047,8 @@
 	/** PPGTT used for aliasing the PPGTT with the GTT */
 	struct i915_hw_ppgtt *aliasing_ppgtt;
 
-	struct shrinker inactive_shrinker;
+	struct notifier_block oom_notifier;
+	struct shrinker shrinker;
 	bool shrinker_no_lock_stealing;
 
 	/** LRU list of objects with fence regs on them. */
@@ -1170,6 +1086,9 @@
 	 */
 	bool busy;
 
+	/* the indicator for dispatch video commands on two BSD rings */
+	int bsd_ring_dispatch_index;
+
 	/** Bit 6 swizzling required for X tiling */
 	uint32_t bit_6_swizzle_x;
 	/** Bit 6 swizzling required for Y tiling */
@@ -1245,8 +1164,12 @@
 	 */
 	wait_queue_head_t reset_queue;
 
-	/* For gpu hang simulation. */
-	unsigned int stop_rings;
+	/* Userspace knobs for gpu hang simulation;
+	 * combines both a ring mask, and extra flags
+	 */
+	u32 stop_rings;
+#define I915_STOP_RING_ALLOW_BAN       (1 << 31)
+#define I915_STOP_RING_ALLOW_WARN      (1 << 30)
 
 	/* For missed irq/seqno simulation. */
 	unsigned int test_irq_rings;
@@ -1266,6 +1189,12 @@
 	uint8_t supports_dp:1;
 };
 
+enum drrs_support_type {
+	DRRS_NOT_SUPPORTED = 0,
+	STATIC_DRRS_SUPPORT = 1,
+	SEAMLESS_DRRS_SUPPORT = 2
+};
+
 struct intel_vbt_data {
 	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
 	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
@@ -1278,9 +1207,12 @@
 	unsigned int lvds_use_ssc:1;
 	unsigned int display_clock_mode:1;
 	unsigned int fdi_rx_polarity_inverted:1;
+	unsigned int has_mipi:1;
 	int lvds_ssc_freq;
 	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
 
+	enum drrs_support_type drrs_type;
+
 	/* eDP */
 	int edp_rate;
 	int edp_lanes;
@@ -1299,7 +1231,14 @@
 
 	/* MIPI DSI */
 	struct {
+		u16 port;
 		u16 panel_id;
+		struct mipi_config *config;
+		struct mipi_pps_data *pps;
+		u8 seq_version;
+		u32 size;
+		u8 *data;
+		u8 *sequence[MIPI_SEQ_MAX];
 	} dsi;
 
 	int crt_ddc_pin;
@@ -1351,23 +1290,13 @@
  * goes back to false exactly before we reenable the IRQs. We use this variable
  * to check if someone is trying to enable/disable IRQs while they're supposed
  * to be disabled. This shouldn't happen and we'll print some error messages in
- * case it happens, but if it actually happens we'll also update the variables
- * inside struct regsave so when we restore the IRQs they will contain the
- * latest expected values.
+ * case it happens.
  *
  * For more, read the Documentation/power/runtime_pm.txt.
  */
 struct i915_runtime_pm {
 	bool suspended;
 	bool irqs_disabled;
-
-	struct {
-		uint32_t deimr;
-		uint32_t sdeimr;
-		uint32_t gtimr;
-		uint32_t gtier;
-		uint32_t gen6_pmimr;
-	} regsave;
 };
 
 enum intel_pipe_crc_source {
@@ -1400,7 +1329,7 @@
 	wait_queue_head_t wq;
 };
 
-typedef struct drm_i915_private {
+struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
 
@@ -1424,10 +1353,13 @@
 	 */
 	uint32_t gpio_mmio_base;
 
+	/* MMIO base address for MIPI regs */
+	uint32_t mipi_mmio_base;
+
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
-	struct intel_ring_buffer ring[I915_NUM_RINGS];
+	struct intel_engine_cs ring[I915_NUM_RINGS];
 	uint32_t last_seqno, next_seqno;
 
 	drm_dma_handle_t *status_page_dmah;
@@ -1469,6 +1401,7 @@
 	struct timer_list hotplug_reenable_timer;
 
 	struct i915_fbc fbc;
+	struct i915_drrs drrs;
 	struct intel_opregion opregion;
 	struct intel_vbt_data vbt;
 
@@ -1486,6 +1419,7 @@
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
 	unsigned int fsb_freq, mem_freq, is_ddr3;
+	unsigned int vlv_cdclk_freq;
 
 	/**
 	 * wq - Driver workqueue for GEM.
@@ -1509,9 +1443,12 @@
 	struct mutex modeset_restore_lock;
 
 	struct list_head vm_list; /* Global list of all address spaces */
-	struct i915_gtt gtt; /* VMA representing the global address space */
+	struct i915_gtt gtt; /* VM representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1580,6 +1517,7 @@
 
 	u32 suspend_count;
 	struct i915_suspend_saved_registers regfile;
+	struct vlv_s0ix_state vlv_s0ix_state;
 
 	struct {
 		/*
@@ -1605,7 +1543,12 @@
 	struct i915_dri1_state dri1;
 	/* Old ums support infrastructure, same warning applies. */
 	struct i915_ums_state ums;
-} drm_i915_private_t;
+
+	/*
+	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
+	 * will be rejected. Instead look for a better place.
+	 */
+};
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 {
@@ -1642,6 +1585,8 @@
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	int (*dmabuf_export)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1732,7 +1677,7 @@
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 
 	/** Breadcrumb of last rendering to the buffer. */
 	uint32_t last_read_seqno;
@@ -1755,8 +1700,20 @@
 
 	/** for phy allocated objects */
 	drm_dma_handle_t *phys_handle;
-};
 
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned workers :4;
+#define I915_GEM_USERPTR_MAX_WORKERS 15
+
+			struct mm_struct *mm;
+			struct i915_mmu_object *mn;
+			struct work_struct *work;
+		} userptr;
+	};
+};
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1771,7 +1728,7 @@
  */
 struct drm_i915_gem_request {
 	/** On Which ring this request was generated */
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 
 	/** GEM sequence number associated with this request. */
 	uint32_t seqno;
@@ -1783,7 +1740,7 @@
 	u32 tail;
 
 	/** Context related to this request */
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 
 	/** Batch buffer related to this request if any */
 	struct drm_i915_gem_object *batch_obj;
@@ -1810,8 +1767,8 @@
 	} mm;
 	struct idr context_idr;
 
-	struct i915_hw_context *private_default_ctx;
 	atomic_t rps_wait_boost;
+	struct  intel_engine_cs *bsd_ring;
 };
 
 /*
@@ -1879,11 +1836,17 @@
 	 * the expected value, the parser rejects it. Only valid if flags has
 	 * the CMD_DESC_BITMASK bit set. Only entries where mask is non-zero
 	 * are valid.
+	 *
+	 * If the check specifies a non-zero condition_mask then the parser
+	 * only performs the check when the bits specified by condition_mask
+	 * are non-zero.
 	 */
 	struct {
 		u32 offset;
 		u32 mask;
 		u32 expected;
+		u32 condition_offset;
+		u32 condition_mask;
 	} bits[MAX_CMD_DESC_BITMASKS];
 };
 
@@ -1925,8 +1888,9 @@
 				 (dev)->pdev->device == 0x0106 || \
 				 (dev)->pdev->device == 0x010A)
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
+#define IS_CHERRYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
-#define IS_BROADWELL(dev)	(INTEL_INFO(dev)->gen == 8)
+#define IS_BROADWELL(dev)	(!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
 				 ((dev)->pdev->device & 0xFF00) == 0x0C00)
@@ -1962,17 +1926,21 @@
 #define BSD_RING		(1<<VCS)
 #define BLT_RING		(1<<BCS)
 #define VEBOX_RING		(1<<VECS)
-#define HAS_BSD(dev)            (INTEL_INFO(dev)->ring_mask & BSD_RING)
-#define HAS_BLT(dev)            (INTEL_INFO(dev)->ring_mask & BLT_RING)
-#define HAS_VEBOX(dev)            (INTEL_INFO(dev)->ring_mask & VEBOX_RING)
-#define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
-#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
+#define BSD2_RING		(1<<VCS2)
+#define HAS_BSD(dev)		(INTEL_INFO(dev)->ring_mask & BSD_RING)
+#define HAS_BSD2(dev)		(INTEL_INFO(dev)->ring_mask & BSD2_RING)
+#define HAS_BLT(dev)		(INTEL_INFO(dev)->ring_mask & BLT_RING)
+#define HAS_VEBOX(dev)		(INTEL_INFO(dev)->ring_mask & VEBOX_RING)
+#define HAS_LLC(dev)		(INTEL_INFO(dev)->has_llc)
+#define HAS_WT(dev)		((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \
+				 to_i915(dev)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
-#define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev))
-#define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) \
-				 && !IS_BROADWELL(dev))
+#define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6 && \
+				 (!IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)))
+#define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 \
+				 && !IS_GEN8(dev))
 #define USES_PPGTT(dev)		intel_enable_ppgtt(dev, false)
 #define USES_FULL_PPGTT(dev)	intel_enable_ppgtt(dev, true)
 
@@ -2010,8 +1978,8 @@
 #define HAS_DDI(dev)		(INTEL_INFO(dev)->has_ddi)
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)
 #define HAS_PSR(dev)		(IS_HASWELL(dev) || IS_BROADWELL(dev))
-#define HAS_PC8(dev)		(IS_HASWELL(dev)) /* XXX HSW:ULX */
-#define HAS_RUNTIME_PM(dev)	(IS_HASWELL(dev))
+#define HAS_RUNTIME_PM(dev)	(IS_GEN6(dev) || IS_HASWELL(dev) || \
+				 IS_BROADWELL(dev) || IS_VALLEYVIEW(dev))
 
 #define INTEL_PCH_DEVICE_ID_MASK		0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
@@ -2068,6 +2036,7 @@
 	bool prefault_disable;
 	bool reset;
 	bool disable_display;
+	bool disable_vtd_wa;
 };
 extern struct i915_params i915 __read_mostly;
 
@@ -2096,6 +2065,7 @@
 extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
+int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 extern void intel_console_resume(struct work_struct *work);
 
@@ -2170,6 +2140,9 @@
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
@@ -2227,9 +2200,9 @@
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_ring_buffer *to);
+			 struct intel_engine_cs *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_ring_buffer *ring);
+			     struct intel_engine_cs *ring);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
@@ -2249,31 +2222,14 @@
 int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
-static inline bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-		dev_priv->fence_regs[obj->fence_reg].pin_count++;
-		return true;
-	} else
-		return false;
-}
-
-static inline void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
-		dev_priv->fence_regs[obj->fence_reg].pin_count--;
-	}
-}
+bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
 struct drm_i915_gem_request *
-i915_gem_find_active_request(struct intel_ring_buffer *ring);
+i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
+void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
@@ -2292,23 +2248,35 @@
 	return ((atomic_read(&error->reset_counter) & ~I915_WEDGED) + 1) / 2;
 }
 
+static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
+{
+	return dev_priv->gpu_error.stop_rings == 0 ||
+		dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN;
+}
+
+static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
+{
+	return dev_priv->gpu_error.stop_rings == 0 ||
+		dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN;
+}
+
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice);
+int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
-int __i915_add_request(struct intel_ring_buffer *ring,
+int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *batch_obj,
 		       u32 *seqno);
 #define i915_add_request(ring, seqno) \
 	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
+int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
@@ -2319,7 +2287,7 @@
 int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_ring_buffer *pipelined);
+				     struct intel_engine_cs *pipelined);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 				int align);
@@ -2416,22 +2384,22 @@
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 int i915_gem_context_enable(struct drm_i915_private *dev_priv);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
-int i915_switch_context(struct intel_ring_buffer *ring,
-			struct i915_hw_context *to);
-struct i915_hw_context *
+int i915_switch_context(struct intel_engine_cs *ring,
+			struct intel_context *to);
+struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
 void i915_gem_context_free(struct kref *ctx_ref);
-static inline void i915_gem_context_reference(struct i915_hw_context *ctx)
+static inline void i915_gem_context_reference(struct intel_context *ctx)
 {
 	kref_get(&ctx->ref);
 }
 
-static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
+static inline void i915_gem_context_unreference(struct intel_context *ctx)
 {
 	kref_put(&ctx->ref, i915_gem_context_free);
 }
 
-static inline bool i915_gem_context_is_default(const struct i915_hw_context *c)
+static inline bool i915_gem_context_is_default(const struct intel_context *c)
 {
 	return c->id == DEFAULT_CONTEXT_ID;
 }
@@ -2441,6 +2409,8 @@
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
+/* i915_gem_render_state.c */
+int i915_gem_render_state_init(struct intel_engine_cs *ring);
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  struct i915_address_space *vm,
@@ -2453,23 +2423,12 @@
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
-/* i915_gem_gtt.c */
-void i915_check_and_clear_faults(struct drm_device *dev);
-void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
-void i915_gem_restore_gtt_mappings(struct drm_device *dev);
-int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
-void i915_gem_init_global_gtt(struct drm_device *dev);
-void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
-			       unsigned long mappable_end, unsigned long end);
-int i915_gem_gtt_init(struct drm_device *dev);
+/* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_device *dev)
 {
 	if (INTEL_INFO(dev)->gen < 6)
 		intel_gtt_chipset_flush();
 }
-int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
-bool intel_enable_ppgtt(struct drm_device *dev, bool full);
 
 /* i915_gem_stolen.c */
 int i915_gem_init_stolen(struct drm_device *dev);
@@ -2537,9 +2496,11 @@
 const char *i915_cache_level_str(int type);
 
 /* i915_cmd_parser.c */
-void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring);
-bool i915_needs_cmd_parser(struct intel_ring_buffer *ring);
-int i915_parse_cmds(struct intel_ring_buffer *ring,
+int i915_cmd_parser_get_version(void);
+int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
+void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring);
+bool i915_needs_cmd_parser(struct intel_engine_cs *ring);
+int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    u32 batch_start_offset,
 		    bool is_master);
@@ -2688,20 +2649,6 @@
 int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val);
 int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
 
-void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine);
-void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
-
-#define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \
-	(((reg) >= 0x2000 && (reg) < 0x4000) ||\
-	((reg) >= 0x5000 && (reg) < 0x8000) ||\
-	((reg) >= 0xB000 && (reg) < 0x12000) ||\
-	((reg) >= 0x2E000 && (reg) < 0x30000))
-
-#define FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)\
-	(((reg) >= 0x12000 && (reg) < 0x14000) ||\
-	((reg) >= 0x22000 && (reg) < 0x24000) ||\
-	((reg) >= 0x30000 && (reg) < 0x40000))
-
 #define FORCEWAKE_RENDER	(1 << 0)
 #define FORCEWAKE_MEDIA		(1 << 1)
 #define FORCEWAKE_ALL		(FORCEWAKE_RENDER | FORCEWAKE_MEDIA)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3326770..f361263 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -31,6 +31,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include <linux/oom.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
@@ -43,6 +44,8 @@
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
 
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
@@ -50,14 +53,15 @@
 					 struct drm_i915_fence_reg *fence,
 					 bool enable);
 
-static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
+static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker,
 					     struct shrink_control *sc);
-static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
+static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker,
 					    struct shrink_control *sc);
+static int i915_gem_shrinker_oom(struct notifier_block *nb,
+				 unsigned long event,
+				 void *ptr);
 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
-static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
 
 static bool cpu_cache_is_coherent(struct drm_device *dev,
 				  enum i915_cache_level level)
@@ -470,6 +474,8 @@
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -885,6 +891,8 @@
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1088,7 +1096,7 @@
  * equal.
  */
 static int
-i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
+i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
 {
 	int ret;
 
@@ -1107,7 +1115,7 @@
 }
 
 static bool missed_irq(struct drm_i915_private *dev_priv,
-		       struct intel_ring_buffer *ring)
+		       struct intel_engine_cs *ring)
 {
 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
@@ -1138,7 +1146,7 @@
  * Returns 0 if the seqno was found within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 			unsigned reset_counter,
 			bool interruptible,
 			struct timespec *timeout,
@@ -1245,7 +1253,7 @@
  * request and object lists appropriately for that event.
  */
 int
-i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
+i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1270,9 +1278,10 @@
 
 static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_ring_buffer *ring)
+				     struct intel_engine_cs *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1282,7 +1291,6 @@
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1295,7 +1303,7 @@
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_ring_buffer *ring = obj->ring;
+	struct intel_engine_cs *ring = obj->ring;
 	u32 seqno;
 	int ret;
 
@@ -1320,7 +1328,7 @@
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = obj->ring;
+	struct intel_engine_cs *ring = obj->ring;
 	unsigned reset_counter;
 	u32 seqno;
 	int ret;
@@ -1536,7 +1544,7 @@
 
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
-		ret = -EINVAL;
+		ret = -EFAULT;
 		goto unlock;
 	}
 
@@ -1803,12 +1811,16 @@
 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }
 
+static inline int
+i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
+{
+	return obj->madv == I915_MADV_DONTNEED;
+}
+
 /* Immediately discard the backing storage */
 static void
 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
-	struct inode *inode;
-
 	i915_gem_object_free_mmap_offset(obj);
 
 	if (obj->base.filp == NULL)
@@ -1819,16 +1831,28 @@
 	 * To do this we must instruct the shmfs to drop all of its
 	 * backing pages, *now*.
 	 */
-	inode = file_inode(obj->base.filp);
-	shmem_truncate_range(inode, 0, (loff_t)-1);
-
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 	obj->madv = __I915_MADV_PURGED;
 }
 
-static inline int
-i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
+/* Try to discard unwanted pages */
+static void
+i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 {
-	return obj->madv == I915_MADV_DONTNEED;
+	struct address_space *mapping;
+
+	switch (obj->madv) {
+	case I915_MADV_DONTNEED:
+		i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return;
+	}
+
+	if (obj->base.filp == NULL)
+		return;
+
+	mapping = file_inode(obj->base.filp)->i_mapping,
+	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
 }
 
 static void
@@ -1893,8 +1917,7 @@
 	ops->put_pages(obj);
 	obj->pages = NULL;
 
-	if (i915_gem_object_is_purgeable(obj))
-		i915_gem_object_truncate(obj);
+	i915_gem_object_invalidate(obj);
 
 	return 0;
 }
@@ -1903,58 +1926,58 @@
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -1966,7 +1989,7 @@
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -1980,17 +2003,8 @@
 static unsigned long
 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_object *obj, *next;
-	long freed = 0;
-
 	i915_gem_evict_everything(dev_priv->dev);
-
-	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
-				 global_list) {
-		if (i915_gem_object_put_pages(obj) == 0)
-			freed += obj->base.size >> PAGE_SHIFT;
-	}
-	return freed;
+	return __i915_gem_shrink(dev_priv, LONG_MAX, false);
 }
 
 static int
@@ -2094,7 +2108,19 @@
 		page_cache_release(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
-	return PTR_ERR(page);
+
+	/* shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (PTR_ERR(page) == -ENOSPC)
+		return -ENOMEM;
+	else
+		return PTR_ERR(page);
 }
 
 /* Ensure that the associated pages are gathered from the backing storage
@@ -2131,7 +2157,7 @@
 
 static void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_ring_buffer *ring)
+			       struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2169,7 +2195,7 @@
 }
 
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_ring_buffer *ring)
+			     struct intel_engine_cs *ring)
 {
 	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 	return i915_gem_object_move_to_active(vma->obj, ring);
@@ -2207,11 +2233,24 @@
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_engine_cs *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int ret, i, j;
 
 	/* Carefully retire all requests without writing to the rings */
@@ -2226,8 +2265,8 @@
 	for_each_ring(ring, dev_priv, i) {
 		intel_ring_init_seqno(ring, seqno);
 
-		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
-			ring->sync_seqno[j] = 0;
+		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
+			ring->semaphore.sync_seqno[j] = 0;
 	}
 
 	return 0;
@@ -2277,7 +2316,7 @@
 	return 0;
 }
 
-int __i915_add_request(struct intel_ring_buffer *ring,
+int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *obj,
 		       u32 *out_seqno)
@@ -2382,7 +2421,7 @@
 }
 
 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
-				   const struct i915_hw_context *ctx)
+				   const struct intel_context *ctx)
 {
 	unsigned long elapsed;
 
@@ -2395,8 +2434,9 @@
 		if (!i915_gem_context_is_default(ctx)) {
 			DRM_DEBUG("context hanging too fast, banning!\n");
 			return true;
-		} else if (dev_priv->gpu_error.stop_rings == 0) {
-			DRM_ERROR("gpu hanging too fast, banning!\n");
+		} else if (i915_stop_ring_allow_ban(dev_priv)) {
+			if (i915_stop_ring_allow_warn(dev_priv))
+				DRM_ERROR("gpu hanging too fast, banning!\n");
 			return true;
 		}
 	}
@@ -2405,7 +2445,7 @@
 }
 
 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
-				  struct i915_hw_context *ctx,
+				  struct intel_context *ctx,
 				  const bool guilty)
 {
 	struct i915_ctx_hang_stats *hs;
@@ -2436,7 +2476,7 @@
 }
 
 struct drm_i915_gem_request *
-i915_gem_find_active_request(struct intel_ring_buffer *ring)
+i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *request;
 	u32 completed_seqno;
@@ -2454,7 +2494,7 @@
 }
 
 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
-				       struct intel_ring_buffer *ring)
+				       struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *request;
 	bool ring_hung;
@@ -2473,7 +2513,7 @@
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
-					struct intel_ring_buffer *ring)
+					struct intel_engine_cs *ring)
 {
 	while (!list_empty(&ring->active_list)) {
 		struct drm_i915_gem_object *obj;
@@ -2501,6 +2541,11 @@
 
 		i915_gem_free_request(request);
 	}
+
+	/* These may not have been flush before the reset, do so now */
+	kfree(ring->preallocated_lazy_request);
+	ring->preallocated_lazy_request = NULL;
+	ring->outstanding_lazy_seqno = 0;
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2527,7 +2572,7 @@
 void i915_gem_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 
 	/*
@@ -2541,8 +2586,6 @@
 	for_each_ring(ring, dev_priv, i)
 		i915_gem_reset_ring_cleanup(dev_priv, ring);
 
-	i915_gem_cleanup_ringbuffer(dev);
-
 	i915_gem_context_reset(dev);
 
 	i915_gem_restore_fences(dev);
@@ -2551,8 +2594,8 @@
 /**
  * This function clears the request list as sequence numbers are passed.
  */
-static void
-i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
+void
+i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
 	uint32_t seqno;
 
@@ -2597,7 +2640,7 @@
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = request->tail;
 
 		i915_gem_free_request(request);
 	}
@@ -2615,7 +2658,7 @@
 i915_gem_retire_requests(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	bool idle = true;
 	int i;
 
@@ -2709,7 +2752,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_ring_buffer *ring = NULL;
+	struct intel_engine_cs *ring = NULL;
 	struct timespec timeout_stack, *timeout = NULL;
 	unsigned reset_counter;
 	u32 seqno = 0;
@@ -2780,9 +2823,9 @@
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_ring_buffer *to)
+		     struct intel_engine_cs *to)
 {
-	struct intel_ring_buffer *from = obj->ring;
+	struct intel_engine_cs *from = obj->ring;
 	u32 seqno;
 	int ret, idx;
 
@@ -2795,7 +2838,7 @@
 	idx = intel_ring_sync_index(from, to);
 
 	seqno = obj->last_read_seqno;
-	if (seqno <= from->sync_seqno[idx])
+	if (seqno <= from->semaphore.sync_seqno[idx])
 		return 0;
 
 	ret = i915_gem_check_olr(obj->ring, seqno);
@@ -2803,13 +2846,13 @@
 		return ret;
 
 	trace_i915_gem_ring_sync_to(from, to, seqno);
-	ret = to->sync_to(to, from, seqno);
+	ret = to->semaphore.sync_to(to, from, seqno);
 	if (!ret)
 		/* We use last_read_seqno because sync_to()
 		 * might have just caused seqno wrap under
 		 * the radar.
 		 */
-		from->sync_seqno[idx] = obj->last_read_seqno;
+		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
 
 	return ret;
 }
@@ -2865,12 +2908,14 @@
 	 * cause memory corruption through use-after-free.
 	 */
 
-	i915_gem_object_finish_gtt(obj);
+	if (i915_is_ggtt(vma->vm)) {
+		i915_gem_object_finish_gtt(obj);
 
-	/* release the fence reg _after_ flushing */
-	ret = i915_gem_object_put_fence(obj);
-	if (ret)
-		return ret;
+		/* release the fence reg _after_ flushing */
+		ret = i915_gem_object_put_fence(obj);
+		if (ret)
+			return ret;
+	}
 
 	trace_i915_vma_unbind(vma);
 
@@ -2903,7 +2948,7 @@
 int i915_gpu_idle(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int ret, i;
 
 	/* Flush everything onto the inactive list. */
@@ -3144,6 +3189,9 @@
 
 	fence = &dev_priv->fence_regs[obj->fence_reg];
 
+	if (WARN_ON(fence->pin_count))
+		return -EBUSY;
+
 	i915_gem_object_fence_lost(obj);
 	i915_gem_object_update_fence(obj, fence, false);
 
@@ -3548,6 +3596,7 @@
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3646,6 +3695,7 @@
 		 * in obj->write_domain and have been skipping the clflushes.
 		 * Just set it to the CPU cache for now.
 		 */
+		i915_gem_object_retire(obj);
 		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 
 		old_read_domains = obj->base.read_domains;
@@ -3743,6 +3793,15 @@
 
 static bool is_pin_display(struct drm_i915_gem_object *obj)
 {
+	struct i915_vma *vma;
+
+	if (list_empty(&obj->vma_list))
+		return false;
+
+	vma = i915_gem_obj_to_ggtt(obj);
+	if (!vma)
+		return false;
+
 	/* There are 3 sources that pin objects:
 	 *   1. The display engine (scanouts, sprites, cursors);
 	 *   2. Reservations for execbuffer;
@@ -3754,7 +3813,7 @@
 	 * subtracting the potential reference by the user, any pin_count
 	 * remains, it must be due to another use by the display engine.
 	 */
-	return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
+	return vma->pin_count - !!obj->user_pin_count;
 }
 
 /*
@@ -3765,9 +3824,10 @@
 int
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_ring_buffer *pipelined)
+				     struct intel_engine_cs *pipelined)
 {
 	u32 old_read_domains, old_write_domain;
+	bool was_pin_display;
 	int ret;
 
 	if (pipelined != obj->ring) {
@@ -3779,6 +3839,7 @@
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
+	was_pin_display = obj->pin_display;
 	obj->pin_display = true;
 
 	/* The display engine is not coherent with the LLC cache on gen6.  As
@@ -3821,7 +3882,8 @@
 	return 0;
 
 err_unpin_display:
-	obj->pin_display = is_pin_display(obj);
+	WARN_ON(was_pin_display != is_pin_display(obj));
+	obj->pin_display = was_pin_display;
 	return ret;
 }
 
@@ -3868,6 +3930,7 @@
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
@@ -3917,7 +3980,7 @@
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
 	struct drm_i915_gem_request *request;
-	struct intel_ring_buffer *ring = NULL;
+	struct intel_engine_cs *ring = NULL;
 	unsigned reset_counter;
 	u32 seqno = 0;
 	int ret;
@@ -3976,9 +4039,13 @@
 		    uint32_t alignment,
 		    uint64_t flags)
 {
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	struct i915_vma *vma;
 	int ret;
 
+	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
+		return -ENODEV;
+
 	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
 		return -EINVAL;
 
@@ -4032,6 +4099,32 @@
 		obj->pin_mappable = false;
 }
 
+bool
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
+
+		WARN_ON(!ggtt_vma ||
+			dev_priv->fence_regs[obj->fence_reg].pin_count >
+			ggtt_vma->pin_count);
+		dev_priv->fence_regs[obj->fence_reg].pin_count++;
+		return true;
+	} else
+		return false;
+}
+
+void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
+		dev_priv->fence_regs[obj->fence_reg].pin_count--;
+	}
+}
+
 int
 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 		   struct drm_file *file)
@@ -4292,6 +4385,30 @@
 	return obj;
 }
 
+static bool discard_backing_storage(struct drm_i915_gem_object *obj)
+{
+	/* If we are the last user of the backing storage (be it shmemfs
+	 * pages or stolen etc), we know that the pages are going to be
+	 * immediately released. In this case, we can then skip copying
+	 * back the contents from the GPU.
+	 */
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		return false;
+
+	if (obj->base.filp == NULL)
+		return true;
+
+	/* At first glance, this looks racy, but then again so would be
+	 * userspace racing mmap against close. However, the first external
+	 * reference to the filp can only be obtained through the
+	 * i915_gem_mmap_ioctl() which safeguards us against the user
+	 * acquiring such a reference whilst we are in the middle of
+	 * freeing the object.
+	 */
+	return atomic_long_read(&obj->base.filp->f_count) == 1;
+}
+
 void i915_gem_free_object(struct drm_gem_object *gem_obj)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
@@ -4329,6 +4446,8 @@
 
 	if (WARN_ON(obj->pages_pin_count))
 		obj->pages_pin_count = 0;
+	if (discard_backing_storage(obj))
+		obj->madv = I915_MADV_DONTNEED;
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 	i915_gem_object_release_stolen(obj);
@@ -4338,6 +4457,9 @@
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4371,6 +4493,17 @@
 	kfree(vma);
 }
 
+static void
+i915_gem_stop_ringbuffers(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring;
+	int i;
+
+	for_each_ring(ring, dev_priv, i)
+		intel_stop_ring_buffer(ring);
+}
+
 int
 i915_gem_suspend(struct drm_device *dev)
 {
@@ -4392,7 +4525,7 @@
 		i915_gem_evict_everything(dev);
 
 	i915_kernel_lost_context(dev);
-	i915_gem_cleanup_ringbuffer(dev);
+	i915_gem_stop_ringbuffers(dev);
 
 	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
 	 * We need to replace this with a semaphore, or something.
@@ -4413,7 +4546,7 @@
 	return ret;
 }
 
-int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
+int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4512,13 +4645,20 @@
 			goto cleanup_blt_ring;
 	}
 
+	if (HAS_BSD2(dev)) {
+		ret = intel_init_bsd2_ring_buffer(dev);
+		if (ret)
+			goto cleanup_vebox_ring;
+	}
 
 	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
 	if (ret)
-		goto cleanup_vebox_ring;
+		goto cleanup_bsd2_ring;
 
 	return 0;
 
+cleanup_bsd2_ring:
+	intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
 cleanup_vebox_ring:
 	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
 cleanup_blt_ring:
@@ -4576,15 +4716,11 @@
 	 * the do_switch), but before enabling PPGTT. So don't move this.
 	 */
 	ret = i915_gem_context_enable(dev_priv);
-	if (ret) {
+	if (ret && ret != -EIO) {
 		DRM_ERROR("Context enable failed %d\n", ret);
-		goto err_out;
+		i915_gem_cleanup_ringbuffer(dev);
 	}
 
-	return 0;
-
-err_out:
-	i915_gem_cleanup_ringbuffer(dev);
 	return ret;
 }
 
@@ -4597,11 +4733,13 @@
 
 	if (IS_VALLEYVIEW(dev)) {
 		/* VLVA0 (potential hack), BIOS isn't actually waking us */
-		I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
-		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
+		I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
+		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
+			      VLV_GTLC_ALLOWWAKEACK), 10))
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
@@ -4611,25 +4749,28 @@
 	}
 
 	ret = i915_gem_init_hw(dev);
-	mutex_unlock(&dev->struct_mutex);
-	if (ret) {
-		WARN_ON(dev_priv->mm.aliasing_ppgtt);
-		i915_gem_context_fini(dev);
-		drm_mm_takedown(&dev_priv->gtt.base.mm);
-		return ret;
+	if (ret == -EIO) {
+		/* Allow ring initialisation to fail by marking the GPU as
+		 * wedged. But we only want to do this where the GPU is angry,
+		 * for all other failure, such as an allocation failure, bail.
+		 */
+		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
+		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		ret = 0;
 	}
+	mutex_unlock(&dev->struct_mutex);
 
 	/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		dev_priv->dri1.allow_batchbuffer = 1;
-	return 0;
+	return ret;
 }
 
 void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
@@ -4661,16 +4802,15 @@
 	}
 
 	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
-	mutex_unlock(&dev->struct_mutex);
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, dev->pdev->irq);
 	if (ret)
 		goto cleanup_ringbuffer;
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 
 cleanup_ringbuffer:
-	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
 	dev_priv->ums.mm_suspended = 1;
 	mutex_unlock(&dev->struct_mutex);
@@ -4685,7 +4825,9 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return 0;
 
+	mutex_lock(&dev->struct_mutex);
 	drm_irq_uninstall(dev);
+	mutex_unlock(&dev->struct_mutex);
 
 	return i915_gem_suspend(dev);
 }
@@ -4704,7 +4846,7 @@
 }
 
 static void
-init_ring_lists(struct intel_ring_buffer *ring)
+init_ring_lists(struct intel_engine_cs *ring)
 {
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
@@ -4752,7 +4894,7 @@
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 
 	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
-	if (IS_GEN3(dev)) {
+	if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) {
 		I915_WRITE(MI_ARB_STATE,
 			   _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
 	}
@@ -4779,10 +4921,13 @@
 
 	dev_priv->mm.interruptible = true;
 
-	dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
-	dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
-	dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&dev_priv->mm.inactive_shrinker);
+	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
+	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
+	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
+	register_shrinker(&dev_priv->mm.shrinker);
+
+	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
+	register_oom_notifier(&dev_priv->mm.oom_notifier);
 }
 
 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
@@ -4857,27 +5002,46 @@
 #endif
 }
 
-static unsigned long
-i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
+static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 {
-	struct drm_i915_private *dev_priv =
-		container_of(shrinker,
-			     struct drm_i915_private,
-			     mm.inactive_shrinker);
-	struct drm_device *dev = dev_priv->dev;
-	struct drm_i915_gem_object *obj;
-	bool unlock = true;
-	unsigned long count;
-
 	if (!mutex_trylock(&dev->struct_mutex)) {
 		if (!mutex_is_locked_by(&dev->struct_mutex, current))
-			return 0;
+			return false;
 
-		if (dev_priv->mm.shrinker_no_lock_stealing)
-			return 0;
+		if (to_i915(dev)->mm.shrinker_no_lock_stealing)
+			return false;
 
-		unlock = false;
-	}
+		*unlock = false;
+	} else
+		*unlock = true;
+
+	return true;
+}
+
+static int num_vma_bound(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	int count = 0;
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		if (drm_mm_node_allocated(&vma->node))
+			count++;
+
+	return count;
+}
+
+static unsigned long
+i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_i915_gem_object *obj;
+	unsigned long count;
+	bool unlock;
+
+	if (!i915_gem_shrinker_lock(dev, &unlock))
+		return 0;
 
 	count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
@@ -4885,10 +5049,8 @@
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (obj->active)
-			continue;
-
-		if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
+		if (!i915_gem_obj_is_pinned(obj) &&
+		    obj->pages_pin_count == num_vma_bound(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
@@ -4961,44 +5123,99 @@
 }
 
 static unsigned long
-i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
+i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
-		container_of(shrinker,
-			     struct drm_i915_private,
-			     mm.inactive_shrinker);
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	struct drm_device *dev = dev_priv->dev;
 	unsigned long freed;
-	bool unlock = true;
+	bool unlock;
 
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		if (!mutex_is_locked_by(&dev->struct_mutex, current))
-			return SHRINK_STOP;
-
-		if (dev_priv->mm.shrinker_no_lock_stealing)
-			return SHRINK_STOP;
-
-		unlock = false;
-	}
+	if (!i915_gem_shrinker_lock(dev, &unlock))
+		return SHRINK_STOP;
 
 	freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
 	if (freed < sc->nr_to_scan)
 		freed += __i915_gem_shrink(dev_priv,
 					   sc->nr_to_scan - freed,
 					   false);
-	if (freed < sc->nr_to_scan)
-		freed += i915_gem_shrink_all(dev_priv);
-
 	if (unlock)
 		mutex_unlock(&dev->struct_mutex);
 
 	return freed;
 }
 
+static int
+i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(nb, struct drm_i915_private, mm.oom_notifier);
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_i915_gem_object *obj;
+	unsigned long timeout = msecs_to_jiffies(5000) + 1;
+	unsigned long pinned, bound, unbound, freed;
+	bool was_interruptible;
+	bool unlock;
+
+	while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout)
+		schedule_timeout_killable(1);
+	if (timeout == 0) {
+		pr_err("Unable to purge GPU memory due lock contention.\n");
+		return NOTIFY_DONE;
+	}
+
+	was_interruptible = dev_priv->mm.interruptible;
+	dev_priv->mm.interruptible = false;
+
+	freed = i915_gem_shrink_all(dev_priv);
+
+	dev_priv->mm.interruptible = was_interruptible;
+
+	/* Because we may be allocating inside our own driver, we cannot
+	 * assert that there are no objects with pinned pages that are not
+	 * being pointed to by hardware.
+	 */
+	unbound = bound = pinned = 0;
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
+		if (!obj->base.filp) /* not backed by a freeable object */
+			continue;
+
+		if (obj->pages_pin_count)
+			pinned += obj->base.size;
+		else
+			unbound += obj->base.size;
+	}
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		if (!obj->base.filp)
+			continue;
+
+		if (obj->pages_pin_count)
+			pinned += obj->base.size;
+		else
+			bound += obj->base.size;
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n",
+		freed, pinned);
+	if (unbound || bound)
+		pr_err("%lu and %lu bytes still available in the "
+		       "bound and unbound GPU page lists.\n",
+		       bound, unbound);
+
+	*(unsigned long *)ptr += freed;
+	return NOTIFY_DONE;
+}
+
 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
+	/* This WARN has probably outlived its usefulness (callers already
+	 * WARN if they don't find the GGTT vma they expect). When removing,
+	 * remember to remove the pre-check in is_pin_display() as well */
 	if (WARN_ON(list_empty(&obj->vma_list)))
 		return NULL;
 

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d72db15..3ffe308 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c

@@ -178,7 +178,7 @@
 
 void i915_gem_context_free(struct kref *ctx_ref)
 {
-	struct i915_hw_context *ctx = container_of(ctx_ref,
+	struct intel_context *ctx = container_of(ctx_ref,
 						   typeof(*ctx), ref);
 	struct i915_hw_ppgtt *ppgtt = NULL;
 
@@ -199,7 +199,7 @@
 }
 
 static struct i915_hw_ppgtt *
-create_vm_for_ctx(struct drm_device *dev, struct i915_hw_context *ctx)
+create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx)
 {
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
@@ -218,12 +218,12 @@
 	return ppgtt;
 }
 
-static struct i915_hw_context *
+static struct intel_context *
 __create_hw_context(struct drm_device *dev,
 		  struct drm_i915_file_private *file_priv)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -240,7 +240,15 @@
 			goto err_out;
 		}
 
-		if (INTEL_INFO(dev)->gen >= 7) {
+		/*
+		 * Try to make the context utilize L3 as well as LLC.
+		 *
+		 * On VLV we don't have L3 controls in the PTEs so we
+		 * shouldn't touch the cache level, especially as that
+		 * would make the object snooped which might have a
+		 * negative performance impact.
+		 */
+		if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) {
 			ret = i915_gem_object_set_cache_level(ctx->obj,
 							      I915_CACHE_L3_LLC);
 			/* Failure shouldn't ever happen this early */
@@ -277,14 +285,14 @@
  * context state of the GPU for applications that don't utilize HW contexts, as
  * well as an idle case.
  */
-static struct i915_hw_context *
+static struct intel_context *
 i915_gem_create_context(struct drm_device *dev,
 			struct drm_i915_file_private *file_priv,
 			bool create_vm)
 {
 	const bool is_global_default_ctx = file_priv == NULL;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int ret = 0;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -356,8 +364,8 @@
 	/* Prevent the hardware from restoring the last context (which hung) on
 	 * the next switch */
 	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct intel_ring_buffer *ring = &dev_priv->ring[i];
-		struct i915_hw_context *dctx = ring->default_context;
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
+		struct intel_context *dctx = ring->default_context;
 
 		/* Do a fake switch to the default context */
 		if (ring->last_context == dctx)
@@ -383,7 +391,7 @@
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int i;
 
 	/* Init should only be called once per module load. Eventually the
@@ -418,7 +426,7 @@
 void i915_gem_context_fini(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_context *dctx = dev_priv->ring[RCS].default_context;
+	struct intel_context *dctx = dev_priv->ring[RCS].default_context;
 	int i;
 
 	if (dctx->obj) {
@@ -441,10 +449,12 @@
 			i915_gem_context_unreference(dctx);
 			dev_priv->ring[RCS].last_context = NULL;
 		}
+
+		i915_gem_object_ggtt_unpin(dctx->obj);
 	}
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
 
 		if (ring->last_context)
 			i915_gem_context_unreference(ring->last_context);
@@ -453,13 +463,12 @@
 		ring->last_context = NULL;
 	}
 
-	i915_gem_object_ggtt_unpin(dctx->obj);
 	i915_gem_context_unreference(dctx);
 }
 
 int i915_gem_context_enable(struct drm_i915_private *dev_priv)
 {
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int ret, i;
 
 	/* This is the only place the aliasing PPGTT gets enabled, which means
@@ -486,11 +495,7 @@
 
 static int context_idr_cleanup(int id, void *p, void *data)
 {
-	struct i915_hw_context *ctx = p;
-
-	/* Ignore the default context because close will handle it */
-	if (i915_gem_context_is_default(ctx))
-		return 0;
+	struct intel_context *ctx = p;
 
 	i915_gem_context_unreference(ctx);
 	return 0;
@@ -499,17 +504,17 @@
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct intel_context *ctx;
 
 	idr_init(&file_priv->context_idr);
 
 	mutex_lock(&dev->struct_mutex);
-	file_priv->private_default_ctx =
-		i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
+	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 
-	if (IS_ERR(file_priv->private_default_ctx)) {
+	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
-		return PTR_ERR(file_priv->private_default_ctx);
+		return PTR_ERR(ctx);
 	}
 
 	return 0;
@@ -521,16 +526,14 @@
 
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
-
-	i915_gem_context_unreference(file_priv->private_default_ctx);
 }
 
-struct i915_hw_context *
+struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 {
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 
-	ctx = (struct i915_hw_context *)idr_find(&file_priv->context_idr, id);
+	ctx = (struct intel_context *)idr_find(&file_priv->context_idr, id);
 	if (!ctx)
 		return ERR_PTR(-ENOENT);
 
@@ -538,8 +541,8 @@
 }
 
 static inline int
-mi_set_context(struct intel_ring_buffer *ring,
-	       struct i915_hw_context *new_context,
+mi_set_context(struct intel_engine_cs *ring,
+	       struct intel_context *new_context,
 	       u32 hw_flags)
 {
 	int ret;
@@ -549,7 +552,7 @@
 	 * explicitly, so we rely on the value at ring init, stored in
 	 * itlb_before_ctx_switch.
 	 */
-	if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) {
+	if (IS_GEN6(ring->dev)) {
 		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
 		if (ret)
 			return ret;
@@ -559,8 +562,8 @@
 	if (ret)
 		return ret;
 
-	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw */
-	if (IS_GEN7(ring->dev))
+	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+	if (INTEL_INFO(ring->dev)->gen >= 7)
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 	else
 		intel_ring_emit(ring, MI_NOOP);
@@ -578,7 +581,7 @@
 	 */
 	intel_ring_emit(ring, MI_NOOP);
 
-	if (IS_GEN7(ring->dev))
+	if (INTEL_INFO(ring->dev)->gen >= 7)
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
 	else
 		intel_ring_emit(ring, MI_NOOP);
@@ -588,11 +591,11 @@
 	return ret;
 }
 
-static int do_switch(struct intel_ring_buffer *ring,
-		     struct i915_hw_context *to)
+static int do_switch(struct intel_engine_cs *ring,
+		     struct intel_context *to)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct i915_hw_context *from = ring->last_context;
+	struct intel_context *from = ring->last_context;
 	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	int ret, i;
@@ -693,13 +696,19 @@
 		i915_gem_context_unreference(from);
 	}
 
-	to->is_initialized = true;
-
 done:
 	i915_gem_context_reference(to);
 	ring->last_context = to;
 	to->last_ring = ring;
 
+	if (ring->id == RCS && !to->is_initialized && from == NULL) {
+		ret = i915_gem_render_state_init(ring);
+		if (ret)
+			DRM_ERROR("init render state: %d\n", ret);
+	}
+
+	to->is_initialized = true;
+
 	return 0;
 
 unpin_out:
@@ -718,8 +727,8 @@
  * it will have a refoucnt > 1. This allows us to destroy the context abstract
  * object while letting the normal object tracking destroy the backing BO.
  */
-int i915_switch_context(struct intel_ring_buffer *ring,
-			struct i915_hw_context *to)
+int i915_switch_context(struct intel_engine_cs *ring,
+			struct intel_context *to)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
@@ -748,7 +757,7 @@
 {
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int ret;
 
 	if (!hw_context_enabled(dev))
@@ -774,7 +783,7 @@
 {
 	struct drm_i915_gem_context_destroy *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int ret;
 
 	if (args->ctx_id == DEFAULT_CONTEXT_ID)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 9bb533e..580aa42 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c

@@ -161,12 +161,8 @@
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
-	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return;
-
+	mutex_lock(&dev->struct_mutex);
 	if (--obj->vmapping_count == 0) {
 		vunmap(obj->dma_buf_vmapping);
 		obj->dma_buf_vmapping = NULL;
@@ -233,6 +229,14 @@
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->ops->dmabuf_export) {
+		int ret = obj->ops->dmabuf_export(obj);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
 	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 20fef6c..3a30133 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -265,10 +265,12 @@
 
 static int
 relocate_entry_cpu(struct drm_i915_gem_object *obj,
-		   struct drm_i915_gem_relocation_entry *reloc)
+		   struct drm_i915_gem_relocation_entry *reloc,
+		   uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
 	uint32_t page_offset = offset_in_page(reloc->offset);
+	uint64_t delta = reloc->delta + target_offset;
 	char *vaddr;
 	int ret;
 
@@ -278,7 +280,7 @@
 
 	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 				reloc->offset >> PAGE_SHIFT));
-	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 
 	if (INTEL_INFO(dev)->gen >= 8) {
 		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
@@ -289,7 +291,7 @@
 			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
 		}
 
-		*(uint32_t *)(vaddr + page_offset) = 0;
+		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 	}
 
 	kunmap_atomic(vaddr);
@@ -299,10 +301,12 @@
 
 static int
 relocate_entry_gtt(struct drm_i915_gem_object *obj,
-		   struct drm_i915_gem_relocation_entry *reloc)
+		   struct drm_i915_gem_relocation_entry *reloc,
+		   uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint64_t delta = reloc->delta + target_offset;
 	uint32_t __iomem *reloc_entry;
 	void __iomem *reloc_page;
 	int ret;
@@ -321,7 +325,7 @@
 			reloc->offset & PAGE_MASK);
 	reloc_entry = (uint32_t __iomem *)
 		(reloc_page + offset_in_page(reloc->offset));
-	iowrite32(reloc->delta, reloc_entry);
+	iowrite32(lower_32_bits(delta), reloc_entry);
 
 	if (INTEL_INFO(dev)->gen >= 8) {
 		reloc_entry += 1;
@@ -334,7 +338,7 @@
 			reloc_entry = reloc_page;
 		}
 
-		iowrite32(0, reloc_entry);
+		iowrite32(upper_32_bits(delta), reloc_entry);
 	}
 
 	io_mapping_unmap_atomic(reloc_page);
@@ -351,7 +355,7 @@
 	struct drm_gem_object *target_obj;
 	struct drm_i915_gem_object *target_i915_obj;
 	struct i915_vma *target_vma;
-	uint32_t target_offset;
+	uint64_t target_offset;
 	int ret;
 
 	/* we've already hold a reference to all valid objects */
@@ -429,11 +433,10 @@
 	if (obj->active && in_atomic())
 		return -EFAULT;
 
-	reloc->delta += target_offset;
 	if (use_cpu_reloc(obj))
-		ret = relocate_entry_cpu(obj, reloc);
+		ret = relocate_entry_cpu(obj, reloc, target_offset);
 	else
-		ret = relocate_entry_gtt(obj, reloc);
+		ret = relocate_entry_gtt(obj, reloc, target_offset);
 
 	if (ret)
 		return ret;
@@ -541,7 +544,7 @@
 
 static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
-				struct intel_ring_buffer *ring,
+				struct intel_engine_cs *ring,
 				bool *need_reloc)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
@@ -628,7 +631,7 @@
 }
 
 static int
-i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
+i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			    struct list_head *vmas,
 			    bool *need_relocs)
 {
@@ -642,6 +645,8 @@
 	if (list_empty(vmas))
 		return 0;
 
+	i915_gem_retire_requests_ring(ring);
+
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
@@ -727,7 +732,7 @@
 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 				  struct drm_i915_gem_execbuffer2 *args,
 				  struct drm_file *file,
-				  struct intel_ring_buffer *ring,
+				  struct intel_engine_cs *ring,
 				  struct eb_vmas *eb,
 				  struct drm_i915_gem_exec_object2 *exec)
 {
@@ -843,7 +848,7 @@
 }
 
 static int
-i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
+i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 				struct list_head *vmas)
 {
 	struct i915_vma *vma;
@@ -926,11 +931,11 @@
 	return 0;
 }
 
-static struct i915_hw_context *
+static struct intel_context *
 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
-			  struct intel_ring_buffer *ring, const u32 ctx_id)
+			  struct intel_engine_cs *ring, const u32 ctx_id)
 {
-	struct i915_hw_context *ctx = NULL;
+	struct intel_context *ctx = NULL;
 	struct i915_ctx_hang_stats *hs;
 
 	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID)
@@ -951,7 +956,7 @@
 
 static void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-				   struct intel_ring_buffer *ring)
+				   struct intel_engine_cs *ring)
 {
 	struct i915_vma *vma;
 
@@ -974,6 +979,9 @@
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
@@ -983,7 +991,7 @@
 static void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
-				    struct intel_ring_buffer *ring,
+				    struct intel_engine_cs *ring,
 				    struct drm_i915_gem_object *obj)
 {
 	/* Unconditionally force add_request to emit a full flush. */
@@ -995,13 +1003,15 @@
 
 static int
 i915_reset_gen7_sol_offsets(struct drm_device *dev,
-			    struct intel_ring_buffer *ring)
+			    struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret, i;
 
-	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
-		return 0;
+	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
+		DRM_DEBUG("sol reset is gen7/rcs only\n");
+		return -EINVAL;
+	}
 
 	ret = intel_ring_begin(ring, 4 * 3);
 	if (ret)
@@ -1018,6 +1028,37 @@
 	return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+
+	/* Check whether the file_priv is using one ring */
+	if (file_priv->bsd_ring)
+		return file_priv->bsd_ring->id;
+	else {
+		/* If no, use the ping-pong mechanism to select one ring */
+		int ring_id;
+
+		mutex_lock(&dev->struct_mutex);
+		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
+			ring_id = VCS;
+			dev_priv->mm.bsd_ring_dispatch_index = 1;
+		} else {
+			ring_id = VCS2;
+			dev_priv->mm.bsd_ring_dispatch_index = 0;
+		}
+		file_priv->bsd_ring = &dev_priv->ring[ring_id];
+		mutex_unlock(&dev->struct_mutex);
+		return ring_id;
+	}
+}
+
 static struct drm_i915_gem_object *
 eb_get_batch(struct eb_vmas *eb)
 {
@@ -1047,11 +1088,11 @@
 	struct eb_vmas *eb;
 	struct drm_i915_gem_object *batch_obj;
 	struct drm_clip_rect *cliprects = NULL;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_context *ctx;
+	struct intel_engine_cs *ring;
+	struct intel_context *ctx;
 	struct i915_address_space *vm;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start = args->batch_start_offset, exec_len;
+	u64 exec_start = args->batch_start_offset, exec_len;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1073,7 +1114,7 @@
 	if (args->flags & I915_EXEC_IS_PINNED)
 		flags |= I915_DISPATCH_PINNED;
 
-	if ((args->flags & I915_EXEC_RING_MASK) > I915_NUM_RINGS) {
+	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
 		DRM_DEBUG("execbuf with unknown ring: %d\n",
 			  (int)(args->flags & I915_EXEC_RING_MASK));
 		return -EINVAL;
@@ -1081,7 +1122,14 @@
 
 	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
 		ring = &dev_priv->ring[RCS];
-	else
+	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+		if (HAS_BSD2(dev)) {
+			int ring_id;
+			ring_id = gen8_dispatch_bsd_ring(dev, file);
+			ring = &dev_priv->ring[ring_id];
+		} else
+			ring = &dev_priv->ring[VCS];
+	} else
 		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
 
 	if (!intel_ring_initialized(ring)) {
@@ -1096,14 +1144,22 @@
 	case I915_EXEC_CONSTANTS_REL_GENERAL:
 	case I915_EXEC_CONSTANTS_ABSOLUTE:
 	case I915_EXEC_CONSTANTS_REL_SURFACE:
-		if (ring == &dev_priv->ring[RCS] &&
-		    mode != dev_priv->relative_constants_mode) {
-			if (INTEL_INFO(dev)->gen < 4)
+		if (mode != 0 && ring != &dev_priv->ring[RCS]) {
+			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
+			return -EINVAL;
+		}
+
+		if (mode != dev_priv->relative_constants_mode) {
+			if (INTEL_INFO(dev)->gen < 4) {
+				DRM_DEBUG("no rel constants on pre-gen4\n");
 				return -EINVAL;
+			}
 
 			if (INTEL_INFO(dev)->gen > 5 &&
-			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
+			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
+				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
 				return -EINVAL;
+			}
 
 			/* The HW changed the meaning on this bit on gen6 */
 			if (INTEL_INFO(dev)->gen >= 6)
@@ -1151,6 +1207,16 @@
 			ret = -EFAULT;
 			goto pre_mutex_err;
 		}
+	} else {
+		if (args->DR4 == 0xffffffff) {
+			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+			args->DR4 = 0;
+		}
+
+		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
+			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
+			return -EINVAL;
+		}
 	}
 
 	intel_runtime_pm_get(dev_priv);
@@ -1170,7 +1236,7 @@
 		mutex_unlock(&dev->struct_mutex);
 		ret = PTR_ERR(ctx);
 		goto pre_mutex_err;
-	} 
+	}
 
 	i915_gem_context_reference(ctx);
 
@@ -1180,6 +1246,7 @@
 
 	eb = eb_create(args);
 	if (eb == NULL) {
+		i915_gem_context_unreference(ctx);
 		mutex_unlock(&dev->struct_mutex);
 		ret = -ENOMEM;
 		goto pre_mutex_err;
@@ -1430,6 +1497,11 @@
 		return -EINVAL;
 	}
 
+	if (args->rsvd2 != 0) {
+		DRM_DEBUG("dirty rvsd2 field\n");
+		return -EINVAL;
+	}
+
 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 	if (exec2_list == NULL)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5deb228..eec820a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -30,7 +30,8 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
+static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
+static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
 
 bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 {
@@ -65,59 +66,6 @@
 	return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
 }
 
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
-typedef uint64_t gen8_gtt_pte_t;
-typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
-
-/* PPGTT stuff */
-#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
-#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
-
-#define GEN6_PDE_VALID			(1 << 0)
-/* gen6+ has bit 11-4 for physical addr bit 39-32 */
-#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-
-#define GEN6_PTE_VALID			(1 << 0)
-#define GEN6_PTE_UNCACHED		(1 << 1)
-#define HSW_PTE_UNCACHED		(0)
-#define GEN6_PTE_CACHE_LLC		(2 << 1)
-#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
-#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
-
-/* Cacheability Control is a 4-bit value. The low three bits are stored in *
- * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
- */
-#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
-					 (((bits) & 0x8) << (11 - 3)))
-#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
-#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
-#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
-#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
-#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
-#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
-
-#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
-#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
-
-/* GEN8 legacy style addressis defined as a 3 level page table:
- * 31:30 | 29:21 | 20:12 |  11:0
- * PDPE  |  PDE  |  PTE  | offset
- * The difference as compared to normal x86 3 level page table is the PDPEs are
- * programmed via register.
- */
-#define GEN8_PDPE_SHIFT			30
-#define GEN8_PDPE_MASK			0x3
-#define GEN8_PDE_SHIFT			21
-#define GEN8_PDE_MASK			0x1ff
-#define GEN8_PTE_SHIFT			12
-#define GEN8_PTE_MASK			0x1ff
-
-#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
-#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
 
 static void ppgtt_bind_vma(struct i915_vma *vma,
 			   enum i915_cache_level cache_level,
@@ -131,10 +79,19 @@
 {
 	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 	pte |= addr;
-	if (level != I915_CACHE_NONE)
-		pte |= PPAT_CACHED_INDEX;
-	else
+
+	switch (level) {
+	case I915_CACHE_NONE:
 		pte |= PPAT_UNCACHED_INDEX;
+		break;
+	case I915_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC_INDEX;
+		break;
+	default:
+		pte |= PPAT_CACHED_INDEX;
+		break;
+	}
+
 	return pte;
 }
 
@@ -197,9 +154,6 @@
 	return pte;
 }
 
-#define BYT_PTE_WRITEABLE		(1 << 1)
-#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
-
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
 				     bool valid)
@@ -253,7 +207,7 @@
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
+static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 			   uint64_t val, bool synchronous)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -283,7 +237,7 @@
 }
 
 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_ring_buffer *ring,
+			  struct intel_engine_cs *ring,
 			  bool synchronous)
 {
 	int i, ret;
@@ -332,6 +286,8 @@
 			num_entries--;
 		}
 
+		if (!HAS_LLC(ppgtt->base.dev))
+			drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 		kunmap_atomic(pt_vaddr);
 
 		pte = 0;
@@ -368,6 +324,8 @@
 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
 					cache_level, true);
 		if (++pte == GEN8_PTES_PER_PAGE) {
+			if (!HAS_LLC(ppgtt->base.dev))
+				drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
 			if (++pde == GEN8_PDES_PER_PAGE) {
@@ -377,8 +335,11 @@
 			pte = 0;
 		}
 	}
-	if (pt_vaddr)
+	if (pt_vaddr) {
+		if (!HAS_LLC(ppgtt->base.dev))
+			drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 		kunmap_atomic(pt_vaddr);
+	}
 }
 
 static void gen8_free_page_tables(struct page **pt_pages)
@@ -641,6 +602,8 @@
 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
 						      I915_CACHE_LLC);
 		}
+		if (!HAS_LLC(ppgtt->base.dev))
+			drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
 		kunmap_atomic(pd_vaddr);
 	}
 
@@ -753,7 +716,7 @@
 }
 
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_ring_buffer *ring,
+			 struct intel_engine_cs *ring,
 			 bool synchronous)
 {
 	struct drm_device *dev = ppgtt->base.dev;
@@ -797,7 +760,7 @@
 }
 
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_ring_buffer *ring,
+			  struct intel_engine_cs *ring,
 			  bool synchronous)
 {
 	struct drm_device *dev = ppgtt->base.dev;
@@ -848,7 +811,7 @@
 }
 
 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_ring_buffer *ring,
+			  struct intel_engine_cs *ring,
 			  bool synchronous)
 {
 	struct drm_device *dev = ppgtt->base.dev;
@@ -869,7 +832,7 @@
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int j, ret;
 
 	for_each_ring(ring, dev_priv, j) {
@@ -899,7 +862,7 @@
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	uint32_t ecochk, ecobits;
 	int i;
 
@@ -938,7 +901,7 @@
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	uint32_t ecochk, gab_ctl, ecobits;
 	int i;
 
@@ -1067,8 +1030,6 @@
 
 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 {
-#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
-#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	bool retried = false;
@@ -1084,8 +1045,7 @@
 						  &ppgtt->node, GEN6_PD_SIZE,
 						  GEN6_PD_ALIGN, 0,
 						  0, dev_priv->gtt.base.total,
-						  DRM_MM_SEARCH_DEFAULT,
-						  DRM_MM_CREATE_DEFAULT);
+						  DRM_MM_TOPDOWN);
 	if (ret == -ENOSPC && !retried) {
 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
@@ -1311,7 +1271,7 @@
 void i915_check_and_clear_faults(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 
 	if (INTEL_INFO(dev)->gen < 6)
@@ -1386,7 +1346,11 @@
 
 
 	if (INTEL_INFO(dev)->gen >= 8) {
-		gen8_setup_private_ppat(dev_priv);
+		if (IS_CHERRYVIEW(dev))
+			chv_setup_private_ppat(dev_priv);
+		else
+			bdw_setup_private_ppat(dev_priv);
+
 		return;
 	}
 
@@ -1438,7 +1402,7 @@
 		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
 	struct sg_page_iter sg_iter;
-	dma_addr_t addr;
+	dma_addr_t addr = 0;
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_dma_address(sg_iter.sg) +
@@ -1811,9 +1775,27 @@
 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
 	if (bdw_gmch_ctl)
 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
+	if (bdw_gmch_ctl > 4)
+		bdw_gmch_ctl = 4;
+#endif
+
 	return bdw_gmch_ctl << 20;
 }
 
+static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+	if (gmch_ctrl)
+		return 1 << (20 + gmch_ctrl);
+
+	return 0;
+}
+
 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
@@ -1828,6 +1810,24 @@
 	return bdw_gmch_ctl << 25; /* 32 MB units */
 }
 
+static size_t chv_get_stolen_size(u16 gmch_ctrl)
+{
+	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GMS_MASK;
+
+	/*
+	 * 0x0  to 0x10: 32MB increments starting at 0MB
+	 * 0x11 to 0x16: 4MB increments starting at 8MB
+	 * 0x17 to 0x1d: 4MB increments start at 36MB
+	 */
+	if (gmch_ctrl < 0x11)
+		return gmch_ctrl << 25;
+	else if (gmch_ctrl < 0x17)
+		return (gmch_ctrl - 0x11 + 2) << 22;
+	else
+		return (gmch_ctrl - 0x17 + 9) << 22;
+}
+
 static int ggtt_probe_common(struct drm_device *dev,
 			     size_t gtt_size)
 {
@@ -1858,19 +1858,8 @@
 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  * bits. When using advanced contexts each context stores its own PAT, but
  * writing this data shouldn't be harmful even in those cases. */
-static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
+static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
 {
-#define GEN8_PPAT_UC		(0<<0)
-#define GEN8_PPAT_WC		(1<<0)
-#define GEN8_PPAT_WT		(2<<0)
-#define GEN8_PPAT_WB		(3<<0)
-#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
-/* FIXME(BDW): Bspec is completely confused about cache control bits. */
-#define GEN8_PPAT_LLC		(1<<2)
-#define GEN8_PPAT_LLCELLC	(2<<2)
-#define GEN8_PPAT_LLCeLLC	(3<<2)
-#define GEN8_PPAT_AGE(x)	(x<<4)
-#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
 	uint64_t pat;
 
 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
@@ -1888,6 +1877,33 @@
 	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
 }
 
+static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
+{
+	uint64_t pat;
+
+	/*
+	 * Map WB on BDW to snooped on CHV.
+	 *
+	 * Only the snoop bit has meaning for CHV, the rest is
+	 * ignored.
+	 *
+	 * Note that the harware enforces snooping for all page
+	 * table accesses. The snoop bit is actually ignored for
+	 * PDEs.
+	 */
+	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(1, 0) |
+	      GEN8_PPAT(2, 0) |
+	      GEN8_PPAT(3, 0) |
+	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+	I915_WRITE(GEN8_PRIVATE_PAT, pat);
+	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+}
+
 static int gen8_gmch_probe(struct drm_device *dev,
 			   size_t *gtt_total,
 			   size_t *stolen,
@@ -1908,12 +1924,20 @@
 
 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
-	*stolen = gen8_get_stolen_size(snb_gmch_ctl);
+	if (IS_CHERRYVIEW(dev)) {
+		*stolen = chv_get_stolen_size(snb_gmch_ctl);
+		gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
+	} else {
+		*stolen = gen8_get_stolen_size(snb_gmch_ctl);
+		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
+	}
 
-	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
 	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
 
-	gen8_setup_private_ppat(dev_priv);
+	if (IS_CHERRYVIEW(dev))
+		chv_setup_private_ppat(dev_priv);
+	else
+		bdw_setup_private_ppat(dev_priv);
 
 	ret = ggtt_probe_common(dev, gtt_size);
 
@@ -2043,6 +2067,10 @@
 		 gtt->base.total >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
+#ifdef CONFIG_INTEL_IOMMU
+	if (intel_iommu_gfx_mapped)
+		DRM_INFO("VT-d active for gfx access\n");
+#endif
 	/*
 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
 	 * user's requested state against the hardware/driver capabilities.  We

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
new file mode 100644
index 0000000..1b96a06
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h

@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Please try to maintain the following order within this file unless it makes
+ * sense to do otherwise. From top to bottom:
+ * 1. typedefs
+ * 2. #defines, and macros
+ * 3. structure definitions
+ * 4. function prototypes
+ *
+ * Within each section, please try to order by generation in ascending order,
+ * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ */
+
+#ifndef __I915_GEM_GTT_H__
+#define __I915_GEM_GTT_H__
+
+typedef uint32_t gen6_gtt_pte_t;
+typedef uint64_t gen8_gtt_pte_t;
+typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
+
+#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
+
+#define I915_PPGTT_PT_ENTRIES		(PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PTE_CACHE_LLC		(2 << 1)
+#define GEN6_PTE_UNCACHED		(1 << 1)
+#define GEN6_PTE_VALID			(1 << 0)
+
+#define GEN6_PPGTT_PD_ENTRIES		512
+#define GEN6_PD_SIZE			(GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
+#define GEN6_PD_ALIGN			(PAGE_SIZE * 16)
+#define GEN6_PDE_VALID			(1 << 0)
+
+#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
+
+#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
+#define BYT_PTE_WRITEABLE		(1 << 1)
+
+/* Cacheability Control is a 4-bit value. The low three bits are stored in bits
+ * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
+#define HSW_PTE_UNCACHED		(0)
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
+
+/* GEN8 legacy style address is defined as a 3 level page table:
+ * 31:30 | 29:21 | 20:12 |  11:0
+ * PDPE  |  PDE  |  PTE  | offset
+ * The difference as compared to normal x86 3 level page table is the PDPEs are
+ * programmed via register.
+ */
+#define GEN8_PDPE_SHIFT			30
+#define GEN8_PDPE_MASK			0x3
+#define GEN8_PDE_SHIFT			21
+#define GEN8_PDE_MASK			0x1ff
+#define GEN8_PTE_SHIFT			12
+#define GEN8_PTE_MASK			0x1ff
+#define GEN8_LEGACY_PDPS		4
+#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
+#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
+
+#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
+#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+
+#define CHV_PPAT_SNOOP			(1<<6)
+#define GEN8_PPAT_AGE(x)		(x<<4)
+#define GEN8_PPAT_LLCeLLC		(3<<2)
+#define GEN8_PPAT_LLCELLC		(2<<2)
+#define GEN8_PPAT_LLC			(1<<2)
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
+#define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
+
+enum i915_cache_level;
+/**
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head mm_list;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
+
+	/** This vma's place in the batchbuffer or on the eviction list */
+	struct list_head exec_list;
+
+	/**
+	 * Used for performing relocations during execbuffer insertion.
+	 */
+	struct hlist_node exec_node;
+	unsigned long exec_handle;
+	struct drm_i915_gem_exec_object2 *exec_entry;
+
+	/**
+	 * How many users have pinned this object in GTT space. The following
+	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
+	 * (via user_pin_count), execbuffer (objects are not allowed multiple
+	 * times for the same batchbuffer), and the framebuffer code. When
+	 * switching/pageflipping, the framebuffer code has at most two buffers
+	 * pinned per crtc.
+	 *
+	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+	 * bits with absolutely no headroom. So use 4 bits. */
+	unsigned int pin_count:4;
+#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
+
+	/** Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page. */
+	void (*unbind_vma)(struct i915_vma *vma);
+	/* Map an object into an address space with the given cache flags. */
+#define GLOBAL_BIND (1<<0)
+	void (*bind_vma)(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags);
+};
+
+struct i915_address_space {
+	struct drm_mm mm;
+	struct drm_device *dev;
+	struct list_head global_link;
+	unsigned long start;		/* Start offset always 0 for dri2 */
+	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
+
+	struct {
+		dma_addr_t addr;
+		struct page *page;
+	} scratch;
+
+	/**
+	 * List of objects currently involved in rendering.
+	 *
+	 * Includes buffers having the contents of their GPU caches
+	 * flushed, not necessarily primitives.  last_rendering_seqno
+	 * represents when the rendering involved will be completed.
+	 *
+	 * A reference is held on the buffer while on this list.
+	 */
+	struct list_head active_list;
+
+	/**
+	 * LRU list of objects which are not in the ringbuffer and
+	 * are ready to unbind, but are still in the GTT.
+	 *
+	 * last_rendering_seqno is 0 while an object is in this list.
+	 *
+	 * A reference is not held on the buffer while on this list,
+	 * as merely being GTT-bound shouldn't prevent its being
+	 * freed, and we'll pull it off the list in the free path.
+	 */
+	struct list_head inactive_list;
+
+	/* FIXME: Need a more generic return type */
+	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
+				     enum i915_cache_level level,
+				     bool valid); /* Create a valid PTE */
+	void (*clear_range)(struct i915_address_space *vm,
+			    uint64_t start,
+			    uint64_t length,
+			    bool use_scratch);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct sg_table *st,
+			       uint64_t start,
+			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_address_space *vm);
+};
+
+/* The Graphics Translation Table is the way in which GEN hardware translates a
+ * Graphics Virtual Address into a Physical Address. In addition to the normal
+ * collateral associated with any va->pa translations GEN hardware also has a
+ * portion of the GTT which can be mapped by the CPU and remain both coherent
+ * and correct (in cases like swizzling). That region is referred to as GMADR in
+ * the spec.
+ */
+struct i915_gtt {
+	struct i915_address_space base;
+	size_t stolen_size;		/* Total size of stolen memory */
+
+	unsigned long mappable_end;	/* End offset that we can CPU map */
+	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
+	phys_addr_t mappable_base;	/* PA of our GMADR */
+
+	/** "Graphics Stolen Memory" holds the global PTEs */
+	void __iomem *gsm;
+
+	bool do_idle_maps;
+
+	int mtrr;
+
+	/* global gtt ops */
+	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
+			  size_t *stolen, phys_addr_t *mappable_base,
+			  unsigned long *mappable_end);
+};
+
+struct i915_hw_ppgtt {
+	struct i915_address_space base;
+	struct kref ref;
+	struct drm_mm_node node;
+	unsigned num_pd_entries;
+	unsigned num_pd_pages; /* gen8+ */
+	union {
+		struct page **pt_pages;
+		struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
+	};
+	struct page *pd_pages;
+	union {
+		uint32_t pd_offset;
+		dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
+	};
+	union {
+		dma_addr_t *pt_dma_addr;
+		dma_addr_t *gen8_pt_dma_addr[4];
+	};
+
+	struct intel_context *ctx;
+
+	int (*enable)(struct i915_hw_ppgtt *ppgtt);
+	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
+			 struct intel_engine_cs *ring,
+			 bool synchronous);
+	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
+};
+
+int i915_gem_gtt_init(struct drm_device *dev);
+void i915_gem_init_global_gtt(struct drm_device *dev);
+void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
+			       unsigned long mappable_end, unsigned long end);
+
+bool intel_enable_ppgtt(struct drm_device *dev, bool full);
+int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
+
+void i915_check_and_clear_faults(struct drm_device *dev);
+void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
+void i915_gem_restore_gtt_mappings(struct drm_device *dev);
+
+int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
+void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
+
+#endif

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
new file mode 100644
index 0000000..3521f99
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c

@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Mika Kuoppala <mika.kuoppala@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_renderstate.h"
+
+struct i915_render_state {
+	struct drm_i915_gem_object *obj;
+	unsigned long ggtt_offset;
+	void *batch;
+	u32 size;
+	u32 len;
+};
+
+static struct i915_render_state *render_state_alloc(struct drm_device *dev)
+{
+	struct i915_render_state *so;
+	struct page *page;
+	int ret;
+
+	so = kzalloc(sizeof(*so), GFP_KERNEL);
+	if (!so)
+		return ERR_PTR(-ENOMEM);
+
+	so->obj = i915_gem_alloc_object(dev, 4096);
+	if (so->obj == NULL) {
+		ret = -ENOMEM;
+		goto free;
+	}
+	so->size = 4096;
+
+	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
+	if (ret)
+		goto free_gem;
+
+	BUG_ON(so->obj->pages->nents != 1);
+	page = sg_page(so->obj->pages->sgl);
+
+	so->batch = kmap(page);
+	if (!so->batch) {
+		ret = -ENOMEM;
+		goto unpin;
+	}
+
+	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
+
+	return so;
+unpin:
+	i915_gem_object_ggtt_unpin(so->obj);
+free_gem:
+	drm_gem_object_unreference(&so->obj->base);
+free:
+	kfree(so);
+	return ERR_PTR(ret);
+}
+
+static void render_state_free(struct i915_render_state *so)
+{
+	kunmap(so->batch);
+	i915_gem_object_ggtt_unpin(so->obj);
+	drm_gem_object_unreference(&so->obj->base);
+	kfree(so);
+}
+
+static const struct intel_renderstate_rodata *
+render_state_get_rodata(struct drm_device *dev, const int gen)
+{
+	switch (gen) {
+	case 6:
+		return &gen6_null_state;
+	case 7:
+		return &gen7_null_state;
+	case 8:
+		return &gen8_null_state;
+	}
+
+	return NULL;
+}
+
+static int render_state_setup(const int gen,
+			      const struct intel_renderstate_rodata *rodata,
+			      struct i915_render_state *so)
+{
+	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
+	u32 reloc_index = 0;
+	u32 * const d = so->batch;
+	unsigned int i = 0;
+	int ret;
+
+	if (!rodata || rodata->batch_items * 4 > so->size)
+		return -EINVAL;
+
+	ret = i915_gem_object_set_to_cpu_domain(so->obj, true);
+	if (ret)
+		return ret;
+
+	while (i < rodata->batch_items) {
+		u32 s = rodata->batch[i];
+
+		if (reloc_index < rodata->reloc_items &&
+		    i * 4  == rodata->reloc[reloc_index]) {
+
+			s += goffset & 0xffffffff;
+
+			/* We keep batch offsets max 32bit */
+			if (gen >= 8) {
+				if (i + 1 >= rodata->batch_items ||
+				    rodata->batch[i + 1] != 0)
+					return -EINVAL;
+
+				d[i] = s;
+				i++;
+				s = (goffset & 0xffffffff00000000ull) >> 32;
+			}
+
+			reloc_index++;
+		}
+
+		d[i] = s;
+		i++;
+	}
+
+	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
+	if (ret)
+		return ret;
+
+	if (rodata->reloc_items != reloc_index) {
+		DRM_ERROR("not all relocs resolved, %d out of %d\n",
+			  reloc_index, rodata->reloc_items);
+		return -EINVAL;
+	}
+
+	so->len = rodata->batch_items * 4;
+
+	return 0;
+}
+
+int i915_gem_render_state_init(struct intel_engine_cs *ring)
+{
+	const int gen = INTEL_INFO(ring->dev)->gen;
+	struct i915_render_state *so;
+	const struct intel_renderstate_rodata *rodata;
+	int ret;
+
+	if (WARN_ON(ring->id != RCS))
+		return -ENOENT;
+
+	rodata = render_state_get_rodata(ring->dev, gen);
+	if (rodata == NULL)
+		return 0;
+
+	so = render_state_alloc(ring->dev);
+	if (IS_ERR(so))
+		return PTR_ERR(so);
+
+	ret = render_state_setup(gen, rodata, so);
+	if (ret)
+		goto out;
+
+	ret = ring->dispatch_execbuffer(ring,
+					i915_gem_obj_ggtt_offset(so->obj),
+					so->len,
+					I915_DISPATCH_SECURE);
+	if (ret)
+		goto out;
+
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so->obj), ring);
+
+	ret = __i915_add_request(ring, NULL, so->obj, NULL);
+	/* __i915_add_request moves object to inactive if it fails */
+out:
+	render_state_free(so);
+	return ret;
+}

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..21ea928
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c

@@ -0,0 +1,711 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex. Hence we defer the
+	 * unregistration to a workqueue where we hold no locks.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object in the range as it
+		 * either has cancelled gup work queued and we need to
+		 * return back to the user to give time for the gup-workers
+		 * to flush their object references upon which the object will
+		 * be removed from the interval-tree, or the the range is
+		 * still in use by another client and the overlap is invalid.
+		 */
+
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+
+#if IS_ENABLED(CONFIG_SWIOTLB)
+#define swiotlb_active() swiotlb_nr_tbl()
+#else
+#define swiotlb_active() 0
+#endif
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int ret, n;
+
+	*st = kmalloc(sizeof(**st), GFP_KERNEL);
+	if (*st == NULL)
+		return -ENOMEM;
+
+	if (swiotlb_active()) {
+		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
+		if (ret)
+			goto err;
+
+		for_each_sg((*st)->sgl, sg, num_pages, n)
+			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	} else {
+		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
+						0, num_pages << PAGE_SHIFT,
+						GFP_KERNEL);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(*st);
+	*st = NULL;
+	return ret;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.workers--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.workers++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static int
+i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mn)
+		return 0;
+
+	return i915_gem_userptr_init__mmu_notifier(obj, 0);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		/* On almost all of the current hw, we cannot tell the GPU that a
+		 * page is readonly, so this is just a placeholder in the uAPI.
+		 */
+		return -ENODEV;
+	}
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 12f1d43..87ec60e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c

@@ -42,6 +42,7 @@
 	case VCS: return "bsd";
 	case BCS: return "blt";
 	case VECS: return "vebox";
+	case VCS2: return "bsd2";
 	default: return "";
 	}
 }
@@ -204,6 +205,7 @@
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -257,7 +259,8 @@
 		err_printf(m, "  INSTPS: 0x%08x\n", ring->instps);
 	}
 	err_printf(m, "  INSTPM: 0x%08x\n", ring->instpm);
-	err_printf(m, "  FADDR: 0x%08x\n", ring->faddr);
+	err_printf(m, "  FADDR: 0x%08x %08x\n", upper_32_bits(ring->faddr),
+		   lower_32_bits(ring->faddr));
 	if (INTEL_INFO(dev)->gen >= 6) {
 		err_printf(m, "  RC PSMI: 0x%08x\n", ring->rc_psmi);
 		err_printf(m, "  FAULT_REG: 0x%08x\n", ring->fault_reg);
@@ -452,16 +455,7 @@
 			err_printf(m, "%s --- HW Context = 0x%08x\n",
 				   dev_priv->ring[i].name,
 				   obj->gtt_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   obj->pages[0][elt],
-					   obj->pages[0][elt+1],
-					   obj->pages[0][elt+2],
-					   obj->pages[0][elt+3]);
-					offset += 16;
-			}
+			print_error_obj(m, obj);
 		}
 	}
 
@@ -648,6 +642,7 @@
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != NULL;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
@@ -752,7 +747,7 @@
 }
 
 static void i915_record_ring_state(struct drm_device *dev,
-				   struct intel_ring_buffer *ring,
+				   struct intel_engine_cs *ring,
 				   struct drm_i915_error_ring *ering)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -764,14 +759,14 @@
 			= I915_READ(RING_SYNC_0(ring->mmio_base));
 		ering->semaphore_mboxes[1]
 			= I915_READ(RING_SYNC_1(ring->mmio_base));
-		ering->semaphore_seqno[0] = ring->sync_seqno[0];
-		ering->semaphore_seqno[1] = ring->sync_seqno[1];
+		ering->semaphore_seqno[0] = ring->semaphore.sync_seqno[0];
+		ering->semaphore_seqno[1] = ring->semaphore.sync_seqno[1];
 	}
 
 	if (HAS_VEBOX(dev)) {
 		ering->semaphore_mboxes[2] =
 			I915_READ(RING_SYNC_2(ring->mmio_base));
-		ering->semaphore_seqno[2] = ring->sync_seqno[2];
+		ering->semaphore_seqno[2] = ring->semaphore.sync_seqno[2];
 	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
@@ -781,8 +776,10 @@
 		ering->instdone = I915_READ(RING_INSTDONE(ring->mmio_base));
 		ering->instps = I915_READ(RING_INSTPS(ring->mmio_base));
 		ering->bbaddr = I915_READ(RING_BBADDR(ring->mmio_base));
-		if (INTEL_INFO(dev)->gen >= 8)
+		if (INTEL_INFO(dev)->gen >= 8) {
+			ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(ring->mmio_base)) << 32;
 			ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(ring->mmio_base)) << 32;
+		}
 		ering->bbstate = I915_READ(RING_BBSTATE(ring->mmio_base));
 	} else {
 		ering->faddr = I915_READ(DMA_FADD_I8XX);
@@ -828,8 +825,8 @@
 		ering->hws = I915_READ(mmio);
 	}
 
-	ering->cpu_ring_head = ring->head;
-	ering->cpu_ring_tail = ring->tail;
+	ering->cpu_ring_head = ring->buffer->head;
+	ering->cpu_ring_tail = ring->buffer->tail;
 
 	ering->hangcheck_score = ring->hangcheck.score;
 	ering->hangcheck_action = ring->hangcheck.action;
@@ -862,7 +859,7 @@
 }
 
 
-static void i915_gem_record_active_context(struct intel_ring_buffer *ring,
+static void i915_gem_record_active_context(struct intel_engine_cs *ring,
 					   struct drm_i915_error_state *error,
 					   struct drm_i915_error_ring *ering)
 {
@@ -875,10 +872,7 @@
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
-			ering->ctx = i915_error_object_create_sized(dev_priv,
-								    obj,
-								    &dev_priv->gtt.base,
-								    1);
+			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
 			break;
 		}
 	}
@@ -892,7 +886,7 @@
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
 
 		if (ring->dev == NULL)
 			continue;
@@ -936,7 +930,7 @@
 		}
 
 		error->ring[i].ringbuffer =
-			i915_error_ggtt_object_create(dev_priv, ring->obj);
+			i915_error_ggtt_object_create(dev_priv, ring->buffer->obj);
 
 		if (ring->status_page.obj)
 			error->ring[i].hws_page =
@@ -1037,7 +1031,6 @@
 				   struct drm_i915_error_state *error)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int pipe;
 
 	/* General organization
 	 * 1. Registers specific to a single generation
@@ -1062,9 +1055,6 @@
 		error->gfx_mode = I915_READ(GFX_MODE);
 	}
 
-	if (IS_GEN2(dev))
-		error->ier = I915_READ16(IER);
-
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_INFO(dev)->gen >= 7)
 		error->forcewake = I915_READ(FORCEWAKE_MT);
@@ -1088,9 +1078,10 @@
 	if (HAS_PCH_SPLIT(dev))
 		error->ier = I915_READ(DEIER) | I915_READ(GTIER);
 	else {
-		error->ier = I915_READ(IER);
-		for_each_pipe(pipe)
-			error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
+		if (IS_GEN2(dev))
+			error->ier = I915_READ16(IER);
+		else
+			error->ier = I915_READ(IER);
 	}
 
 	/* 4: Everything else */

diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c
index 3c59584..2e0613e 100644
--- a/drivers/gpu/drm/i915/i915_ioc32.c
+++ b/drivers/gpu/drm/i915/i915_ioc32.c

@@ -208,7 +208,7 @@
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(i915_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(i915_compat_ioctls))
 		fn = i915_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0b99de9..6f8017a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -80,17 +80,64 @@
 	[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
 };
 
+/* IIR can theoretically queue up two events. Be paranoid. */
+#define GEN8_IRQ_RESET_NDX(type, which) do { \
+	I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \
+	POSTING_READ(GEN8_##type##_IMR(which)); \
+	I915_WRITE(GEN8_##type##_IER(which), 0); \
+	I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \
+	POSTING_READ(GEN8_##type##_IIR(which)); \
+	I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \
+	POSTING_READ(GEN8_##type##_IIR(which)); \
+} while (0)
+
+#define GEN5_IRQ_RESET(type) do { \
+	I915_WRITE(type##IMR, 0xffffffff); \
+	POSTING_READ(type##IMR); \
+	I915_WRITE(type##IER, 0); \
+	I915_WRITE(type##IIR, 0xffffffff); \
+	POSTING_READ(type##IIR); \
+	I915_WRITE(type##IIR, 0xffffffff); \
+	POSTING_READ(type##IIR); \
+} while (0)
+
+/*
+ * We should clear IMR at preinstall/uninstall, and just check at postinstall.
+ */
+#define GEN5_ASSERT_IIR_IS_ZERO(reg) do { \
+	u32 val = I915_READ(reg); \
+	if (val) { \
+		WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n", \
+		     (reg), val); \
+		I915_WRITE((reg), 0xffffffff); \
+		POSTING_READ(reg); \
+		I915_WRITE((reg), 0xffffffff); \
+		POSTING_READ(reg); \
+	} \
+} while (0)
+
+#define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \
+	GEN5_ASSERT_IIR_IS_ZERO(GEN8_##type##_IIR(which)); \
+	I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
+	I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
+	POSTING_READ(GEN8_##type##_IER(which)); \
+} while (0)
+
+#define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
+	GEN5_ASSERT_IIR_IS_ZERO(type##IIR); \
+	I915_WRITE(type##IMR, (imr_val)); \
+	I915_WRITE(type##IER, (ier_val)); \
+	POSTING_READ(type##IER); \
+} while (0)
+
 /* For display hotplug interrupt */
 static void
 ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (dev_priv->pm.irqs_disabled) {
-		WARN(1, "IRQs disabled\n");
-		dev_priv->pm.regsave.deimr &= ~mask;
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
 		return;
-	}
 
 	if ((dev_priv->irq_mask & mask) != 0) {
 		dev_priv->irq_mask &= ~mask;
@@ -104,11 +151,8 @@
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (dev_priv->pm.irqs_disabled) {
-		WARN(1, "IRQs disabled\n");
-		dev_priv->pm.regsave.deimr |= mask;
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
 		return;
-	}
 
 	if ((dev_priv->irq_mask & mask) != mask) {
 		dev_priv->irq_mask |= mask;
@@ -129,13 +173,8 @@
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (dev_priv->pm.irqs_disabled) {
-		WARN(1, "IRQs disabled\n");
-		dev_priv->pm.regsave.gtimr &= ~interrupt_mask;
-		dev_priv->pm.regsave.gtimr |= (~enabled_irq_mask &
-						interrupt_mask);
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
 		return;
-	}
 
 	dev_priv->gt_irq_mask &= ~interrupt_mask;
 	dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask);
@@ -167,13 +206,8 @@
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (dev_priv->pm.irqs_disabled) {
-		WARN(1, "IRQs disabled\n");
-		dev_priv->pm.regsave.gen6_pmimr &= ~interrupt_mask;
-		dev_priv->pm.regsave.gen6_pmimr |= (~enabled_irq_mask &
-						     interrupt_mask);
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
 		return;
-	}
 
 	new_val = dev_priv->pm_irq_mask;
 	new_val &= ~interrupt_mask;
@@ -214,6 +248,46 @@
 	return true;
 }
 
+/**
+  * bdw_update_pm_irq - update GT interrupt 2
+  * @dev_priv: driver private
+  * @interrupt_mask: mask of interrupt bits to update
+  * @enabled_irq_mask: mask of interrupt bits to enable
+  *
+  * Copied from the snb function, updated with relevant register offsets
+  */
+static void bdw_update_pm_irq(struct drm_i915_private *dev_priv,
+			      uint32_t interrupt_mask,
+			      uint32_t enabled_irq_mask)
+{
+	uint32_t new_val;
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
+		return;
+
+	new_val = dev_priv->pm_irq_mask;
+	new_val &= ~interrupt_mask;
+	new_val |= (~enabled_irq_mask & interrupt_mask);
+
+	if (new_val != dev_priv->pm_irq_mask) {
+		dev_priv->pm_irq_mask = new_val;
+		I915_WRITE(GEN8_GT_IMR(2), dev_priv->pm_irq_mask);
+		POSTING_READ(GEN8_GT_IMR(2));
+	}
+}
+
+void bdw_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	bdw_update_pm_irq(dev_priv, mask, mask);
+}
+
+void bdw_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	bdw_update_pm_irq(dev_priv, mask, 0);
+}
+
 static bool cpt_can_enable_serr_int(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -232,16 +306,51 @@
 	return true;
 }
 
-static void i9xx_clear_fifo_underrun(struct drm_device *dev, enum pipe pipe)
+void i9xx_check_fifo_underruns(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+
+	for_each_intel_crtc(dev, crtc) {
+		u32 reg = PIPESTAT(crtc->pipe);
+		u32 pipestat;
+
+		if (crtc->cpu_fifo_underrun_disabled)
+			continue;
+
+		pipestat = I915_READ(reg) & 0xffff0000;
+		if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0)
+			continue;
+
+		I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
+		POSTING_READ(reg);
+
+		DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe));
+	}
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+}
+
+static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev,
+					     enum pipe pipe,
+					     bool enable, bool old)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg = PIPESTAT(pipe);
-	u32 pipestat = I915_READ(reg) & 0x7fff0000;
+	u32 pipestat = I915_READ(reg) & 0xffff0000;
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
-	POSTING_READ(reg);
+	if (enable) {
+		I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
+		POSTING_READ(reg);
+	} else {
+		if (old && pipestat & PIPE_FIFO_UNDERRUN_STATUS)
+			DRM_ERROR("pipe %c underrun\n", pipe_name(pipe));
+	}
 }
 
 static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -258,7 +367,8 @@
 }
 
 static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
-						  enum pipe pipe, bool enable)
+						  enum pipe pipe,
+						  bool enable, bool old)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	if (enable) {
@@ -269,15 +379,12 @@
 
 		ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
 	} else {
-		bool was_enabled = !(I915_READ(DEIMR) & DE_ERR_INT_IVB);
-
-		/* Change the state _after_ we've read out the current one. */
 		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
 
-		if (!was_enabled &&
-		    (I915_READ(GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe))) {
-			DRM_DEBUG_KMS("uncleared fifo underrun on pipe %c\n",
-				      pipe_name(pipe));
+		if (old &&
+		    I915_READ(GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe)) {
+			DRM_ERROR("uncleared fifo underrun on pipe %c\n",
+				  pipe_name(pipe));
 		}
 	}
 }
@@ -313,14 +420,8 @@
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (dev_priv->pm.irqs_disabled &&
-	    (interrupt_mask & SDE_HOTPLUG_MASK_CPT)) {
-		WARN(1, "IRQs disabled\n");
-		dev_priv->pm.regsave.sdeimr &= ~interrupt_mask;
-		dev_priv->pm.regsave.sdeimr |= (~enabled_irq_mask &
-						 interrupt_mask);
+	if (WARN_ON(dev_priv->pm.irqs_disabled))
 		return;
-	}
 
 	I915_WRITE(SDEIMR, sdeimr);
 	POSTING_READ(SDEIMR);
@@ -346,7 +447,7 @@
 
 static void cpt_set_fifo_underrun_reporting(struct drm_device *dev,
 					    enum transcoder pch_transcoder,
-					    bool enable)
+					    bool enable, bool old)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -359,16 +460,12 @@
 
 		ibx_enable_display_interrupt(dev_priv, SDE_ERROR_CPT);
 	} else {
-		uint32_t tmp = I915_READ(SERR_INT);
-		bool was_enabled = !(I915_READ(SDEIMR) & SDE_ERROR_CPT);
-
-		/* Change the state _after_ we've read out the current one. */
 		ibx_disable_display_interrupt(dev_priv, SDE_ERROR_CPT);
 
-		if (!was_enabled &&
-		    (tmp & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder))) {
-			DRM_DEBUG_KMS("uncleared pch fifo underrun on pch transcoder %c\n",
-				      transcoder_name(pch_transcoder));
+		if (old && I915_READ(SERR_INT) &
+		    SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) {
+			DRM_ERROR("uncleared pch fifo underrun on pch transcoder %c\n",
+				  transcoder_name(pch_transcoder));
 		}
 	}
 }
@@ -387,34 +484,29 @@
  *
  * Returns the previous state of underrun reporting.
  */
-bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
-					     enum pipe pipe, bool enable)
+static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
+						    enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	bool ret;
+	bool old;
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	ret = !intel_crtc->cpu_fifo_underrun_disabled;
-
-	if (enable == ret)
-		goto done;
-
+	old = !intel_crtc->cpu_fifo_underrun_disabled;
 	intel_crtc->cpu_fifo_underrun_disabled = !enable;
 
-	if (enable && (INTEL_INFO(dev)->gen < 5 || IS_VALLEYVIEW(dev)))
-		i9xx_clear_fifo_underrun(dev, pipe);
+	if (INTEL_INFO(dev)->gen < 5 || IS_VALLEYVIEW(dev))
+		i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (IS_GEN5(dev) || IS_GEN6(dev))
 		ironlake_set_fifo_underrun_reporting(dev, pipe, enable);
 	else if (IS_GEN7(dev))
-		ivybridge_set_fifo_underrun_reporting(dev, pipe, enable);
+		ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (IS_GEN8(dev))
 		broadwell_set_fifo_underrun_reporting(dev, pipe, enable);
 
-done:
-	return ret;
+	return old;
 }
 
 bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
@@ -463,7 +555,7 @@
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	unsigned long flags;
-	bool ret;
+	bool old;
 
 	/*
 	 * NOTE: Pre-LPT has a fixed cpu pipe -> pch transcoder mapping, but LPT
@@ -476,21 +568,16 @@
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 
-	ret = !intel_crtc->pch_fifo_underrun_disabled;
-
-	if (enable == ret)
-		goto done;
-
+	old = !intel_crtc->pch_fifo_underrun_disabled;
 	intel_crtc->pch_fifo_underrun_disabled = !enable;
 
 	if (HAS_PCH_IBX(dev))
 		ibx_set_fifo_underrun_reporting(dev, pch_transcoder, enable);
 	else
-		cpt_set_fifo_underrun_reporting(dev, pch_transcoder, enable);
+		cpt_set_fifo_underrun_reporting(dev, pch_transcoder, enable, old);
 
-done:
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
-	return ret;
+	return old;
 }
 
 
@@ -503,8 +590,10 @@
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (WARN_ON_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK ||
-	                 status_mask & ~PIPESTAT_INT_STATUS_MASK))
+	if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK ||
+		      status_mask & ~PIPESTAT_INT_STATUS_MASK,
+		      "pipe %c: enable_mask=0x%x, status_mask=0x%x\n",
+		      pipe_name(pipe), enable_mask, status_mask))
 		return;
 
 	if ((pipestat & enable_mask) == enable_mask)
@@ -527,8 +616,10 @@
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (WARN_ON_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK ||
-	                 status_mask & ~PIPESTAT_INT_STATUS_MASK))
+	if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK ||
+		      status_mask & ~PIPESTAT_INT_STATUS_MASK,
+		      "pipe %c: enable_mask=0x%x, status_mask=0x%x\n",
+		      pipe_name(pipe), enable_mask, status_mask))
 		return;
 
 	if ((pipestat & enable_mask) == 0)
@@ -546,11 +637,17 @@
 	u32 enable_mask = status_mask << 16;
 
 	/*
-	 * On pipe A we don't support the PSR interrupt yet, on pipe B the
-	 * same bit MBZ.
+	 * On pipe A we don't support the PSR interrupt yet,
+	 * on pipe B and C the same bit MBZ.
 	 */
 	if (WARN_ON_ONCE(status_mask & PIPE_A_PSR_STATUS_VLV))
 		return 0;
+	/*
+	 * On pipe B and C we don't support the PSR interrupt yet, on pipe
+	 * A the same bit is for perf counters which we don't use either.
+	 */
+	if (WARN_ON_ONCE(status_mask & PIPE_B_PSR_STATUS_VLV))
+		return 0;
 
 	enable_mask &= ~(PIPE_FIFO_UNDERRUN_STATUS |
 			 SPRITE0_FLIP_DONE_INT_EN_VLV |
@@ -637,6 +734,56 @@
 	}
 }
 
+/*
+ * This timing diagram depicts the video signal in and
+ * around the vertical blanking period.
+ *
+ * Assumptions about the fictitious mode used in this example:
+ *  vblank_start >= 3
+ *  vsync_start = vblank_start + 1
+ *  vsync_end = vblank_start + 2
+ *  vtotal = vblank_start + 3
+ *
+ *           start of vblank:
+ *           latch double buffered registers
+ *           increment frame counter (ctg+)
+ *           generate start of vblank interrupt (gen4+)
+ *           |
+ *           |          frame start:
+ *           |          generate frame start interrupt (aka. vblank interrupt) (gmch)
+ *           |          may be shifted forward 1-3 extra lines via PIPECONF
+ *           |          |
+ *           |          |  start of vsync:
+ *           |          |  generate vsync interrupt
+ *           |          |  |
+ * ___xxxx___    ___xxxx___    ___xxxx___    ___xxxx___    ___xxxx___    ___xxxx
+ *       .   \hs/   .      \hs/          \hs/          \hs/   .      \hs/
+ * ----va---> <-----------------vb--------------------> <--------va-------------
+ *       |          |       <----vs----->                     |
+ * -vbs-----> <---vbs+1---> <---vbs+2---> <-----0-----> <-----1-----> <-----2--- (scanline counter gen2)
+ * -vbs-2---> <---vbs-1---> <---vbs-----> <---vbs+1---> <---vbs+2---> <-----0--- (scanline counter gen3+)
+ * -vbs-2---> <---vbs-2---> <---vbs-1---> <---vbs-----> <---vbs+1---> <---vbs+2- (scanline counter hsw+ hdmi)
+ *       |          |                                         |
+ *       last visible pixel                                   first visible pixel
+ *                  |                                         increment frame counter (gen3/4)
+ *                  pixel counter = vblank_start * htotal     pixel counter = 0 (gen3/4)
+ *
+ * x  = horizontal active
+ * _  = horizontal blanking
+ * hs = horizontal sync
+ * va = vertical active
+ * vb = vertical blanking
+ * vs = vertical sync
+ * vbs = vblank_start (number)
+ *
+ * Summary:
+ * - most events happen at the start of horizontal sync
+ * - frame start happens at the start of horizontal blank, 1-4 lines
+ *   (depending on PIPECONF settings) after the start of vblank
+ * - gen3/4 pixel and frame counter are synchronized with the start
+ *   of horizontal active on the first line of vertical active
+ */
+
 static u32 i8xx_get_vblank_counter(struct drm_device *dev, int pipe)
 {
 	/* Gen2 doesn't have a hardware frame counter */
@@ -651,7 +798,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long high_frame;
 	unsigned long low_frame;
-	u32 high1, high2, low, pixel, vbl_start;
+	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
 
 	if (!i915_pipe_enabled(dev, pipe)) {
 		DRM_DEBUG_DRIVER("trying to get vblank count for disabled "
@@ -665,17 +812,28 @@
 		const struct drm_display_mode *mode =
 			&intel_crtc->config.adjusted_mode;
 
-		vbl_start = mode->crtc_vblank_start * mode->crtc_htotal;
+		htotal = mode->crtc_htotal;
+		hsync_start = mode->crtc_hsync_start;
+		vbl_start = mode->crtc_vblank_start;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			vbl_start = DIV_ROUND_UP(vbl_start, 2);
 	} else {
 		enum transcoder cpu_transcoder = (enum transcoder) pipe;
-		u32 htotal;
 
 		htotal = ((I915_READ(HTOTAL(cpu_transcoder)) >> 16) & 0x1fff) + 1;
+		hsync_start = (I915_READ(HSYNC(cpu_transcoder))  & 0x1fff) + 1;
 		vbl_start = (I915_READ(VBLANK(cpu_transcoder)) & 0x1fff) + 1;
-
-		vbl_start *= htotal;
+		if ((I915_READ(PIPECONF(cpu_transcoder)) &
+		     PIPECONF_INTERLACE_MASK) != PIPECONF_PROGRESSIVE)
+			vbl_start = DIV_ROUND_UP(vbl_start, 2);
 	}
 
+	/* Convert to pixel count */
+	vbl_start *= htotal;
+
+	/* Start of vblank event occurs at start of hsync */
+	vbl_start -= htotal - hsync_start;
+
 	high_frame = PIPEFRAME(pipe);
 	low_frame = PIPEFRAMEPIXEL(pipe);
 
@@ -719,24 +877,28 @@
 /* raw reads, only for fast reads of display block, no need for forcewake etc. */
 #define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
 
-static bool ilk_pipe_in_vblank_locked(struct drm_device *dev, enum pipe pipe)
+static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 {
+	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t status;
-	int reg;
+	const struct drm_display_mode *mode = &crtc->config.adjusted_mode;
+	enum pipe pipe = crtc->pipe;
+	int position, vtotal;
 
-	if (INTEL_INFO(dev)->gen >= 8) {
-		status = GEN8_PIPE_VBLANK;
-		reg = GEN8_DE_PIPE_ISR(pipe);
-	} else if (INTEL_INFO(dev)->gen >= 7) {
-		status = DE_PIPE_VBLANK_IVB(pipe);
-		reg = DEISR;
-	} else {
-		status = DE_PIPE_VBLANK(pipe);
-		reg = DEISR;
-	}
+	vtotal = mode->crtc_vtotal;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		vtotal /= 2;
 
-	return __raw_i915_read32(dev_priv, reg) & status;
+	if (IS_GEN2(dev))
+		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
+	else
+		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
+
+	/*
+	 * See update_scanline_offset() for the details on the
+	 * scanline_offset adjustment.
+	 */
+	return (position + crtc->scanline_offset) % vtotal;
 }
 
 static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
@@ -748,7 +910,7 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	const struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
 	int position;
-	int vbl_start, vbl_end, htotal, vtotal;
+	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
 	bool in_vbl = true;
 	int ret = 0;
 	unsigned long irqflags;
@@ -760,6 +922,7 @@
 	}
 
 	htotal = mode->crtc_htotal;
+	hsync_start = mode->crtc_hsync_start;
 	vtotal = mode->crtc_vtotal;
 	vbl_start = mode->crtc_vblank_start;
 	vbl_end = mode->crtc_vblank_end;
@@ -778,7 +941,7 @@
 	 * following code must not block on uncore.lock.
 	 */
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-	
+
 	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
 
 	/* Get optional system timestamp before query. */
@@ -789,68 +952,7 @@
 		/* No obvious pixelcount register. Only query vertical
 		 * scanout position from Display scan line register.
 		 */
-		if (IS_GEN2(dev))
-			position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
-		else
-			position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
-
-		if (HAS_DDI(dev)) {
-			/*
-			 * On HSW HDMI outputs there seems to be a 2 line
-			 * difference, whereas eDP has the normal 1 line
-			 * difference that earlier platforms have. External
-			 * DP is unknown. For now just check for the 2 line
-			 * difference case on all output types on HSW+.
-			 *
-			 * This might misinterpret the scanline counter being
-			 * one line too far along on eDP, but that's less
-			 * dangerous than the alternative since that would lead
-			 * the vblank timestamp code astray when it sees a
-			 * scanline count before vblank_start during a vblank
-			 * interrupt.
-			 */
-			in_vbl = ilk_pipe_in_vblank_locked(dev, pipe);
-			if ((in_vbl && (position == vbl_start - 2 ||
-					position == vbl_start - 1)) ||
-			    (!in_vbl && (position == vbl_end - 2 ||
-					 position == vbl_end - 1)))
-				position = (position + 2) % vtotal;
-		} else if (HAS_PCH_SPLIT(dev)) {
-			/*
-			 * The scanline counter increments at the leading edge
-			 * of hsync, ie. it completely misses the active portion
-			 * of the line. Fix up the counter at both edges of vblank
-			 * to get a more accurate picture whether we're in vblank
-			 * or not.
-			 */
-			in_vbl = ilk_pipe_in_vblank_locked(dev, pipe);
-			if ((in_vbl && position == vbl_start - 1) ||
-			    (!in_vbl && position == vbl_end - 1))
-				position = (position + 1) % vtotal;
-		} else {
-			/*
-			 * ISR vblank status bits don't work the way we'd want
-			 * them to work on non-PCH platforms (for
-			 * ilk_pipe_in_vblank_locked()), and there doesn't
-			 * appear any other way to determine if we're currently
-			 * in vblank.
-			 *
-			 * Instead let's assume that we're already in vblank if
-			 * we got called from the vblank interrupt and the
-			 * scanline counter value indicates that we're on the
-			 * line just prior to vblank start. This should result
-			 * in the correct answer, unless the vblank interrupt
-			 * delivery really got delayed for almost exactly one
-			 * full frame/field.
-			 */
-			if (flags & DRM_CALLED_FROM_VBLIRQ &&
-			    position == vbl_start - 1) {
-				position = (position + 1) % vtotal;
-
-				/* Signal this correction as "applied". */
-				ret |= 0x8;
-			}
-		}
+		position = __intel_get_crtc_scanline(intel_crtc);
 	} else {
 		/* Have access to pixelcount since start of frame.
 		 * We can split this into vertical and horizontal
@@ -862,6 +964,29 @@
 		vbl_start *= htotal;
 		vbl_end *= htotal;
 		vtotal *= htotal;
+
+		/*
+		 * In interlaced modes, the pixel counter counts all pixels,
+		 * so one field will have htotal more pixels. In order to avoid
+		 * the reported position from jumping backwards when the pixel
+		 * counter is beyond the length of the shorter field, just
+		 * clamp the position the length of the shorter field. This
+		 * matches how the scanline counter based position works since
+		 * the scanline counter doesn't count the two half lines.
+		 */
+		if (position >= vtotal)
+			position = vtotal - 1;
+
+		/*
+		 * Start of vblank interrupt is triggered at start of hsync,
+		 * just prior to the first active line of vblank. However we
+		 * consider lines to start at the leading edge of horizontal
+		 * active. So, should we get here before we've crossed into
+		 * the horizontal active of the first line in vblank, we would
+		 * not set the DRM_SCANOUTPOS_INVBL flag. In order to fix that,
+		 * always add htotal-hsync_start to the current pixel position.
+		 */
+		position = (position + htotal - hsync_start) % vtotal;
 	}
 
 	/* Get optional system timestamp after query. */
@@ -900,6 +1025,19 @@
 	return ret;
 }
 
+int intel_get_crtc_scanline(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	unsigned long irqflags;
+	int position;
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	position = __intel_get_crtc_scanline(crtc);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
+	return position;
+}
+
 static int i915_get_vblank_timestamp(struct drm_device *dev, int pipe,
 			      int *max_error,
 			      struct timeval *vblank_time,
@@ -945,7 +1083,7 @@
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
 		      connector->base.id,
-		      drm_get_connector_name(connector),
+		      connector->name,
 		      drm_get_connector_status_name(old_status),
 		      drm_get_connector_status_name(connector->status));
 
@@ -990,7 +1128,7 @@
 		    connector->polled == DRM_CONNECTOR_POLL_HPD) {
 			DRM_INFO("HPD interrupt storm detected on connector %s: "
 				 "switching from hotplug detection to polling\n",
-				drm_get_connector_name(connector));
+				connector->name);
 			dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark = HPD_DISABLED;
 			connector->polled = DRM_CONNECTOR_POLL_CONNECT
 				| DRM_CONNECTOR_POLL_DISCONNECT;
@@ -998,7 +1136,7 @@
 		}
 		if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) {
 			DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n",
-				      drm_get_connector_name(connector), intel_encoder->hpd_pin);
+				      connector->name, intel_encoder->hpd_pin);
 		}
 	}
 	 /* if there were no outputs to poll, poll was disabled,
@@ -1073,9 +1211,9 @@
 }
 
 static void notify_ring(struct drm_device *dev,
-			struct intel_ring_buffer *ring)
+			struct intel_engine_cs *ring)
 {
-	if (ring->obj == NULL)
+	if (!intel_ring_initialized(ring))
 		return;
 
 	trace_i915_gem_request_complete(ring);
@@ -1094,8 +1232,12 @@
 	spin_lock_irq(&dev_priv->irq_lock);
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
-	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	if (IS_BROADWELL(dev_priv->dev))
+		bdw_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	else {
+		/* Make sure not to corrupt PMIMR state used by ringbuffer */
+		snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	}
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* Make sure we didn't queue anything we're not going to process. */
@@ -1292,6 +1434,19 @@
 		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
+static void gen8_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	if ((pm_iir & dev_priv->pm_rps_events) == 0)
+		return;
+
+	spin_lock(&dev_priv->irq_lock);
+	dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
+	bdw_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
+	spin_unlock(&dev_priv->irq_lock);
+
+	queue_work(dev_priv->wq, &dev_priv->rps.work);
+}
+
 static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 				       struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
@@ -1315,18 +1470,32 @@
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
 
-	if (master_ctl & GEN8_GT_VCS1_IRQ) {
+	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
 		tmp = I915_READ(GEN8_GT_IIR(1));
 		if (tmp) {
 			ret = IRQ_HANDLED;
 			vcs = tmp >> GEN8_VCS1_IRQ_SHIFT;
 			if (vcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, &dev_priv->ring[VCS]);
+			vcs = tmp >> GEN8_VCS2_IRQ_SHIFT;
+			if (vcs & GT_RENDER_USER_INTERRUPT)
+				notify_ring(dev, &dev_priv->ring[VCS2]);
 			I915_WRITE(GEN8_GT_IIR(1), tmp);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
 
+	if (master_ctl & GEN8_GT_PM_IRQ) {
+		tmp = I915_READ(GEN8_GT_IIR(2));
+		if (tmp & dev_priv->pm_rps_events) {
+			ret = IRQ_HANDLED;
+			gen8_rps_irq_handler(dev_priv, tmp);
+			I915_WRITE(GEN8_GT_IIR(2),
+				   tmp & dev_priv->pm_rps_events);
+		} else
+			DRM_ERROR("The master control interrupt lied (PM)!\n");
+	}
+
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
 		tmp = I915_READ(GEN8_GT_IIR(3));
 		if (tmp) {
@@ -1549,6 +1718,19 @@
 	}
 }
 
+static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe)
+{
+	struct intel_crtc *crtc;
+
+	if (!drm_handle_vblank(dev, pipe))
+		return false;
+
+	crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
+	wake_up(&crtc->vbl_wait);
+
+	return true;
+}
+
 static void valleyview_pipestat_irq_handler(struct drm_device *dev, u32 iir)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1578,6 +1760,9 @@
 		case PIPE_B:
 			iir_bit = I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
 			break;
+		case PIPE_C:
+			iir_bit = I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+			break;
 		}
 		if (iir & iir_bit)
 			mask |= dev_priv->pipestat_irq_mask[pipe];
@@ -1600,7 +1785,7 @@
 
 	for_each_pipe(pipe) {
 		if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS)
-			drm_handle_vblank(dev, pipe);
+			intel_pipe_handle_vblank(dev, pipe);
 
 		if (pipe_stats[pipe] & PLANE_FLIP_DONE_INT_STATUS_VLV) {
 			intel_prepare_page_flip(dev, pipe);
@@ -1619,9 +1804,36 @@
 		gmbus_irq_handler(dev);
 }
 
+static void i9xx_hpd_irq_handler(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
+
+	if (IS_G4X(dev)) {
+		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
+
+		intel_hpd_irq_handler(dev, hotplug_trigger, hpd_status_g4x);
+	} else {
+		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
+
+		intel_hpd_irq_handler(dev, hotplug_trigger, hpd_status_i915);
+	}
+
+	if ((IS_G4X(dev) || IS_VALLEYVIEW(dev)) &&
+	    hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
+		dp_aux_irq_handler(dev);
+
+	I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
+	/*
+	 * Make sure hotplug status is cleared before we clear IIR, or else we
+	 * may miss hotplug events.
+	 */
+	POSTING_READ(PORT_HOTPLUG_STAT);
+}
+
 static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
+	struct drm_device *dev = arg;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 iir, gt_iir, pm_iir;
 	irqreturn_t ret = IRQ_NONE;
@@ -1641,19 +1853,8 @@
 		valleyview_pipestat_irq_handler(dev, iir);
 
 		/* Consume port.  Then clear IIR or we'll miss events */
-		if (iir & I915_DISPLAY_PORT_INTERRUPT) {
-			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
-			u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
-
-			intel_hpd_irq_handler(dev, hotplug_trigger, hpd_status_i915);
-
-			if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
-				dp_aux_irq_handler(dev);
-
-			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
-			I915_READ(PORT_HOTPLUG_STAT);
-		}
-
+		if (iir & I915_DISPLAY_PORT_INTERRUPT)
+			i9xx_hpd_irq_handler(dev);
 
 		if (pm_iir)
 			gen6_rps_irq_handler(dev_priv, pm_iir);
@@ -1667,6 +1868,40 @@
 	return ret;
 }
 
+static irqreturn_t cherryview_irq_handler(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 master_ctl, iir;
+	irqreturn_t ret = IRQ_NONE;
+
+	for (;;) {
+		master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL;
+		iir = I915_READ(VLV_IIR);
+
+		if (master_ctl == 0 && iir == 0)
+			break;
+
+		I915_WRITE(GEN8_MASTER_IRQ, 0);
+
+		gen8_gt_irq_handler(dev, dev_priv, master_ctl);
+
+		valleyview_pipestat_irq_handler(dev, iir);
+
+		/* Consume port.  Then clear IIR or we'll miss events */
+		i9xx_hpd_irq_handler(dev);
+
+		I915_WRITE(VLV_IIR, iir);
+
+		I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL);
+		POSTING_READ(GEN8_MASTER_IRQ);
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
 static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1827,7 +2062,7 @@
 
 	for_each_pipe(pipe) {
 		if (de_iir & DE_PIPE_VBLANK(pipe))
-			drm_handle_vblank(dev, pipe);
+			intel_pipe_handle_vblank(dev, pipe);
 
 		if (de_iir & DE_PIPE_FIFO_UNDERRUN(pipe))
 			if (intel_set_cpu_fifo_underrun_reporting(dev, pipe, false))
@@ -1877,7 +2112,7 @@
 
 	for_each_pipe(pipe) {
 		if (de_iir & (DE_PIPE_VBLANK_IVB(pipe)))
-			drm_handle_vblank(dev, pipe);
+			intel_pipe_handle_vblank(dev, pipe);
 
 		/* plane/pipes map 1:1 on ilk+ */
 		if (de_iir & DE_PLANE_FLIP_DONE_IVB(pipe)) {
@@ -1899,7 +2134,7 @@
 
 static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
+	struct drm_device *dev = arg;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
 	irqreturn_t ret = IRQ_NONE;
@@ -2020,9 +2255,9 @@
 
 		pipe_iir = I915_READ(GEN8_DE_PIPE_IIR(pipe));
 		if (pipe_iir & GEN8_PIPE_VBLANK)
-			drm_handle_vblank(dev, pipe);
+			intel_pipe_handle_vblank(dev, pipe);
 
-		if (pipe_iir & GEN8_PIPE_FLIP_DONE) {
+		if (pipe_iir & GEN8_PIPE_PRIMARY_FLIP_DONE) {
 			intel_prepare_page_flip(dev, pipe);
 			intel_finish_page_flip_plane(dev, pipe);
 		}
@@ -2075,7 +2310,7 @@
 static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 			       bool reset_completed)
 {
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 
 	/*
@@ -2137,6 +2372,14 @@
 				   reset_event);
 
 		/*
+		 * In most cases it's guaranteed that we get here with an RPM
+		 * reference held, for example because there is a pending GPU
+		 * request that won't finish until the reset is done. This
+		 * isn't the case at least when we get here by doing a
+		 * simulated reset via debugs, so get an RPM reference.
+		 */
+		intel_runtime_pm_get(dev_priv);
+		/*
 		 * All state reset _must_ be completed before we update the
 		 * reset counter, for otherwise waiters might miss the reset
 		 * pending state and not properly drop locks, resulting in
@@ -2146,6 +2389,8 @@
 
 		intel_display_handle_reset(dev);
 
+		intel_runtime_pm_put(dev_priv);
+
 		if (ret == 0) {
 			/*
 			 * After all the gem state is reset, increment the reset
@@ -2383,10 +2628,6 @@
 	else
 		i915_enable_pipestat(dev_priv, pipe,
 				     PIPE_VBLANK_INTERRUPT_STATUS);
-
-	/* maintain vblank delivery even in deep C-states */
-	if (INTEL_INFO(dev)->gen == 3)
-		I915_WRITE(INSTPM, _MASKED_BIT_DISABLE(INSTPM_AGPBUSY_DIS));
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -2450,9 +2691,6 @@
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	if (INTEL_INFO(dev)->gen == 3)
-		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_DIS));
-
 	i915_disable_pipestat(dev_priv, pipe,
 			      PIPE_VBLANK_INTERRUPT_STATUS |
 			      PIPE_START_VBLANK_INTERRUPT_STATUS);
@@ -2498,29 +2736,77 @@
 }
 
 static u32
-ring_last_seqno(struct intel_ring_buffer *ring)
+ring_last_seqno(struct intel_engine_cs *ring)
 {
 	return list_entry(ring->request_list.prev,
 			  struct drm_i915_gem_request, list)->seqno;
 }
 
 static bool
-ring_idle(struct intel_ring_buffer *ring, u32 seqno)
+ring_idle(struct intel_engine_cs *ring, u32 seqno)
 {
 	return (list_empty(&ring->request_list) ||
 		i915_seqno_passed(seqno, ring_last_seqno(ring)));
 }
 
-static struct intel_ring_buffer *
-semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
+static bool
+ipehr_is_semaphore_wait(struct drm_device *dev, u32 ipehr)
+{
+	if (INTEL_INFO(dev)->gen >= 8) {
+		/*
+		 * FIXME: gen8 semaphore support - currently we don't emit
+		 * semaphores on bdw anyway, but this needs to be addressed when
+		 * we merge that code.
+		 */
+		return false;
+	} else {
+		ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
+		return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
+				 MI_SEMAPHORE_REGISTER);
+	}
+}
+
+static struct intel_engine_cs *
+semaphore_wait_to_signaller_ring(struct intel_engine_cs *ring, u32 ipehr)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_engine_cs *signaller;
+	int i;
+
+	if (INTEL_INFO(dev_priv->dev)->gen >= 8) {
+		/*
+		 * FIXME: gen8 semaphore support - currently we don't emit
+		 * semaphores on bdw anyway, but this needs to be addressed when
+		 * we merge that code.
+		 */
+		return NULL;
+	} else {
+		u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
+
+		for_each_ring(signaller, dev_priv, i) {
+			if(ring == signaller)
+				continue;
+
+			if (sync_bits == signaller->semaphore.mbox.wait[ring->id])
+				return signaller;
+		}
+	}
+
+	DRM_ERROR("No signaller ring found for ring %i, ipehr 0x%08x\n",
+		  ring->id, ipehr);
+
+	return NULL;
+}
+
+static struct intel_engine_cs *
+semaphore_waits_for(struct intel_engine_cs *ring, u32 *seqno)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	u32 cmd, ipehr, head;
 	int i;
 
 	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
-	if ((ipehr & ~(0x3 << 16)) !=
-	    (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER))
+	if (!ipehr_is_semaphore_wait(ring->dev, ipehr))
 		return NULL;
 
 	/*
@@ -2538,10 +2824,10 @@
 		 * our ring is smaller than what the hardware (and hence
 		 * HEAD_ADDR) allows. Also handles wrap-around.
 		 */
-		head &= ring->size - 1;
+		head &= ring->buffer->size - 1;
 
 		/* This here seems to blow up */
-		cmd = ioread32(ring->virtual_start + head);
+		cmd = ioread32(ring->buffer->virtual_start + head);
 		if (cmd == ipehr)
 			break;
 
@@ -2551,14 +2837,14 @@
 	if (!i)
 		return NULL;
 
-	*seqno = ioread32(ring->virtual_start + head + 4) + 1;
-	return &dev_priv->ring[(ring->id + (((ipehr >> 17) & 1) + 1)) % 3];
+	*seqno = ioread32(ring->buffer->virtual_start + head + 4) + 1;
+	return semaphore_wait_to_signaller_ring(ring, ipehr);
 }
 
-static int semaphore_passed(struct intel_ring_buffer *ring)
+static int semaphore_passed(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct intel_ring_buffer *signaller;
+	struct intel_engine_cs *signaller;
 	u32 seqno, ctl;
 
 	ring->hangcheck.deadlock = true;
@@ -2577,7 +2863,7 @@
 
 static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 {
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
@@ -2585,7 +2871,7 @@
 }
 
 static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
+ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2641,7 +2927,7 @@
 {
 	struct drm_device *dev = (struct drm_device *)data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	int i;
 	int busy_count = 0, rings_hung = 0;
 	bool stuck[I915_NUM_RINGS] = { 0 };
@@ -2759,57 +3045,63 @@
 		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 }
 
-static void ibx_irq_preinstall(struct drm_device *dev)
+static void ibx_irq_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (HAS_PCH_NOP(dev))
 		return;
 
-	/* south display irq */
-	I915_WRITE(SDEIMR, 0xffffffff);
-	/*
-	 * SDEIER is also touched by the interrupt handler to work around missed
-	 * PCH interrupts. Hence we can't update it after the interrupt handler
-	 * is enabled - instead we unconditionally enable all PCH interrupt
-	 * sources here, but then only unmask them as needed with SDEIMR.
-	 */
+	GEN5_IRQ_RESET(SDE);
+
+	if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
+		I915_WRITE(SERR_INT, 0xffffffff);
+}
+
+/*
+ * SDEIER is also touched by the interrupt handler to work around missed PCH
+ * interrupts. Hence we can't update it after the interrupt handler is enabled -
+ * instead we unconditionally enable all PCH interrupt sources here, but then
+ * only unmask them as needed with SDEIMR.
+ *
+ * This function needs to be called before interrupts are enabled.
+ */
+static void ibx_irq_pre_postinstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (HAS_PCH_NOP(dev))
+		return;
+
+	WARN_ON(I915_READ(SDEIER) != 0);
 	I915_WRITE(SDEIER, 0xffffffff);
 	POSTING_READ(SDEIER);
 }
 
-static void gen5_gt_irq_preinstall(struct drm_device *dev)
+static void gen5_gt_irq_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	/* and GT */
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
-
-	if (INTEL_INFO(dev)->gen >= 6) {
-		/* and PM */
-		I915_WRITE(GEN6_PMIMR, 0xffffffff);
-		I915_WRITE(GEN6_PMIER, 0x0);
-		POSTING_READ(GEN6_PMIER);
-	}
+	GEN5_IRQ_RESET(GT);
+	if (INTEL_INFO(dev)->gen >= 6)
+		GEN5_IRQ_RESET(GEN6_PM);
 }
 
 /* drm_dma.h hooks
 */
-static void ironlake_irq_preinstall(struct drm_device *dev)
+static void ironlake_irq_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	I915_WRITE(HWSTAM, 0xeffe);
+	I915_WRITE(HWSTAM, 0xffffffff);
 
-	I915_WRITE(DEIMR, 0xffffffff);
-	I915_WRITE(DEIER, 0x0);
-	POSTING_READ(DEIER);
+	GEN5_IRQ_RESET(DE);
+	if (IS_GEN7(dev))
+		I915_WRITE(GEN7_ERR_INT, 0xffffffff);
 
-	gen5_gt_irq_preinstall(dev);
+	gen5_gt_irq_reset(dev);
 
-	ibx_irq_preinstall(dev);
+	ibx_irq_reset(dev);
 }
 
 static void valleyview_irq_preinstall(struct drm_device *dev)
@@ -2827,7 +3119,7 @@
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 
-	gen5_gt_irq_preinstall(dev);
+	gen5_gt_irq_reset(dev);
 
 	I915_WRITE(DPINVGTT, 0xff);
 
@@ -2841,7 +3133,15 @@
 	POSTING_READ(VLV_IER);
 }
 
-static void gen8_irq_preinstall(struct drm_device *dev)
+static void gen8_gt_irq_reset(struct drm_i915_private *dev_priv)
+{
+	GEN8_IRQ_RESET_NDX(GT, 0);
+	GEN8_IRQ_RESET_NDX(GT, 1);
+	GEN8_IRQ_RESET_NDX(GT, 2);
+	GEN8_IRQ_RESET_NDX(GT, 3);
+}
+
+static void gen8_irq_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
@@ -2849,43 +3149,44 @@
 	I915_WRITE(GEN8_MASTER_IRQ, 0);
 	POSTING_READ(GEN8_MASTER_IRQ);
 
-	/* IIR can theoretically queue up two events. Be paranoid */
-#define GEN8_IRQ_INIT_NDX(type, which) do { \
-		I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \
-		POSTING_READ(GEN8_##type##_IMR(which)); \
-		I915_WRITE(GEN8_##type##_IER(which), 0); \
-		I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \
-		POSTING_READ(GEN8_##type##_IIR(which)); \
-		I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \
-	} while (0)
+	gen8_gt_irq_reset(dev_priv);
 
-#define GEN8_IRQ_INIT(type) do { \
-		I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \
-		POSTING_READ(GEN8_##type##_IMR); \
-		I915_WRITE(GEN8_##type##_IER, 0); \
-		I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \
-		POSTING_READ(GEN8_##type##_IIR); \
-		I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \
-	} while (0)
+	for_each_pipe(pipe)
+		GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
 
-	GEN8_IRQ_INIT_NDX(GT, 0);
-	GEN8_IRQ_INIT_NDX(GT, 1);
-	GEN8_IRQ_INIT_NDX(GT, 2);
-	GEN8_IRQ_INIT_NDX(GT, 3);
+	GEN5_IRQ_RESET(GEN8_DE_PORT_);
+	GEN5_IRQ_RESET(GEN8_DE_MISC_);
+	GEN5_IRQ_RESET(GEN8_PCU_);
 
-	for_each_pipe(pipe) {
-		GEN8_IRQ_INIT_NDX(DE_PIPE, pipe);
-	}
+	ibx_irq_reset(dev);
+}
 
-	GEN8_IRQ_INIT(DE_PORT);
-	GEN8_IRQ_INIT(DE_MISC);
-	GEN8_IRQ_INIT(PCU);
-#undef GEN8_IRQ_INIT
-#undef GEN8_IRQ_INIT_NDX
+static void cherryview_irq_preinstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe;
+
+	I915_WRITE(GEN8_MASTER_IRQ, 0);
+	POSTING_READ(GEN8_MASTER_IRQ);
+
+	gen8_gt_irq_reset(dev_priv);
+
+	GEN5_IRQ_RESET(GEN8_PCU_);
 
 	POSTING_READ(GEN8_PCU_IIR);
 
-	ibx_irq_preinstall(dev);
+	I915_WRITE(DPINVGTT, DPINVGTT_STATUS_MASK_CHV);
+
+	I915_WRITE(PORT_HOTPLUG_EN, 0);
+	I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
+
+	for_each_pipe(pipe)
+		I915_WRITE(PIPESTAT(pipe), 0xffff);
+
+	I915_WRITE(VLV_IMR, 0xffffffff);
+	I915_WRITE(VLV_IER, 0x0);
+	I915_WRITE(VLV_IIR, 0xffffffff);
+	POSTING_READ(VLV_IIR);
 }
 
 static void ibx_hpd_irq_setup(struct drm_device *dev)
@@ -2931,15 +3232,12 @@
 	if (HAS_PCH_NOP(dev))
 		return;
 
-	if (HAS_PCH_IBX(dev)) {
+	if (HAS_PCH_IBX(dev))
 		mask = SDE_GMBUS | SDE_AUX_MASK | SDE_POISON;
-	} else {
+	else
 		mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT;
 
-		I915_WRITE(SERR_INT, I915_READ(SERR_INT));
-	}
-
-	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
+	GEN5_ASSERT_IIR_IS_ZERO(SDEIIR);
 	I915_WRITE(SDEIMR, ~mask);
 }
 
@@ -2965,10 +3263,7 @@
 		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
 	}
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
+	GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		pm_irqs |= dev_priv->pm_rps_events;
@@ -2977,10 +3272,7 @@
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
-		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
-		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
-		I915_WRITE(GEN6_PMIER, pm_irqs);
-		POSTING_READ(GEN6_PMIER);
+		GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_irq_mask, pm_irqs);
 	}
 }
 
@@ -2997,8 +3289,6 @@
 				DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB);
 		extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB |
 			      DE_PIPEA_VBLANK_IVB | DE_ERR_INT_IVB);
-
-		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
 	} else {
 		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
 				DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
@@ -3011,11 +3301,11 @@
 
 	dev_priv->irq_mask = ~display_mask;
 
-	/* should always can generate irq */
-	I915_WRITE(DEIIR, I915_READ(DEIIR));
-	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER, display_mask | extra_mask);
-	POSTING_READ(DEIER);
+	I915_WRITE(HWSTAM, 0xeffe);
+
+	ibx_irq_pre_postinstall(dev);
+
+	GEN5_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask);
 
 	gen5_gt_irq_postinstall(dev);
 
@@ -3175,21 +3465,16 @@
 		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT
 		};
 
-	for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) {
-		u32 tmp = I915_READ(GEN8_GT_IIR(i));
-		if (tmp)
-			DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n",
-				  i, tmp);
-		I915_WRITE(GEN8_GT_IMR(i), ~gt_interrupts[i]);
-		I915_WRITE(GEN8_GT_IER(i), gt_interrupts[i]);
-	}
-	POSTING_READ(GEN8_GT_IER(0));
+	for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++)
+		GEN8_IRQ_INIT_NDX(GT, i, ~gt_interrupts[i], gt_interrupts[i]);
+
+	dev_priv->pm_irq_mask = 0xffffffff;
 }
 
 static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
-	uint32_t de_pipe_masked = GEN8_PIPE_FLIP_DONE |
+	uint32_t de_pipe_masked = GEN8_PIPE_PRIMARY_FLIP_DONE |
 		GEN8_PIPE_CDCLK_CRC_DONE |
 		GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
 	uint32_t de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |
@@ -3199,25 +3484,19 @@
 	dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked;
 	dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked;
 
-	for_each_pipe(pipe) {
-		u32 tmp = I915_READ(GEN8_DE_PIPE_IIR(pipe));
-		if (tmp)
-			DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n",
-				  pipe, tmp);
-		I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-		I915_WRITE(GEN8_DE_PIPE_IER(pipe), de_pipe_enables);
-	}
-	POSTING_READ(GEN8_DE_PIPE_ISR(0));
+	for_each_pipe(pipe)
+		GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe],
+				  de_pipe_enables);
 
-	I915_WRITE(GEN8_DE_PORT_IMR, ~GEN8_AUX_CHANNEL_A);
-	I915_WRITE(GEN8_DE_PORT_IER, GEN8_AUX_CHANNEL_A);
-	POSTING_READ(GEN8_DE_PORT_IER);
+	GEN5_IRQ_INIT(GEN8_DE_PORT_, ~GEN8_AUX_CHANNEL_A, GEN8_AUX_CHANNEL_A);
 }
 
 static int gen8_irq_postinstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	ibx_irq_pre_postinstall(dev);
+
 	gen8_gt_irq_postinstall(dev_priv);
 	gen8_de_irq_postinstall(dev_priv);
 
@@ -3229,44 +3508,55 @@
 	return 0;
 }
 
+static int cherryview_irq_postinstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 enable_mask = I915_DISPLAY_PORT_INTERRUPT |
+		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+	u32 pipestat_enable = PLANE_FLIP_DONE_INT_STATUS_VLV |
+		PIPE_CRC_DONE_INTERRUPT_STATUS;
+	unsigned long irqflags;
+	int pipe;
+
+	/*
+	 * Leave vblank interrupts masked initially.  enable/disable will
+	 * toggle them based on usage.
+	 */
+	dev_priv->irq_mask = ~enable_mask;
+
+	for_each_pipe(pipe)
+		I915_WRITE(PIPESTAT(pipe), 0xffff);
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS);
+	for_each_pipe(pipe)
+		i915_enable_pipestat(dev_priv, pipe, pipestat_enable);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	I915_WRITE(VLV_IIR, 0xffffffff);
+	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
+	I915_WRITE(VLV_IER, enable_mask);
+
+	gen8_gt_irq_postinstall(dev_priv);
+
+	I915_WRITE(GEN8_MASTER_IRQ, MASTER_INTERRUPT_ENABLE);
+	POSTING_READ(GEN8_MASTER_IRQ);
+
+	return 0;
+}
+
 static void gen8_irq_uninstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int pipe;
 
 	if (!dev_priv)
 		return;
 
-	I915_WRITE(GEN8_MASTER_IRQ, 0);
+	intel_hpd_irq_uninstall(dev_priv);
 
-#define GEN8_IRQ_FINI_NDX(type, which) do { \
-		I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \
-		I915_WRITE(GEN8_##type##_IER(which), 0); \
-		I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \
-	} while (0)
-
-#define GEN8_IRQ_FINI(type) do { \
-		I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \
-		I915_WRITE(GEN8_##type##_IER, 0); \
-		I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \
-	} while (0)
-
-	GEN8_IRQ_FINI_NDX(GT, 0);
-	GEN8_IRQ_FINI_NDX(GT, 1);
-	GEN8_IRQ_FINI_NDX(GT, 2);
-	GEN8_IRQ_FINI_NDX(GT, 3);
-
-	for_each_pipe(pipe) {
-		GEN8_IRQ_FINI_NDX(DE_PIPE, pipe);
-	}
-
-	GEN8_IRQ_FINI(DE_PORT);
-	GEN8_IRQ_FINI(DE_MISC);
-	GEN8_IRQ_FINI(PCU);
-#undef GEN8_IRQ_FINI
-#undef GEN8_IRQ_FINI_NDX
-
-	POSTING_READ(GEN8_PCU_IIR);
+	gen8_irq_reset(dev);
 }
 
 static void valleyview_irq_uninstall(struct drm_device *dev)
@@ -3278,6 +3568,8 @@
 	if (!dev_priv)
 		return;
 
+	I915_WRITE(VLV_MASTER_IER, 0);
+
 	intel_hpd_irq_uninstall(dev_priv);
 
 	for_each_pipe(pipe)
@@ -3300,6 +3592,57 @@
 	POSTING_READ(VLV_IER);
 }
 
+static void cherryview_irq_uninstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe;
+
+	if (!dev_priv)
+		return;
+
+	I915_WRITE(GEN8_MASTER_IRQ, 0);
+	POSTING_READ(GEN8_MASTER_IRQ);
+
+#define GEN8_IRQ_FINI_NDX(type, which)				\
+do {								\
+	I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff);	\
+	I915_WRITE(GEN8_##type##_IER(which), 0);		\
+	I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff);	\
+	POSTING_READ(GEN8_##type##_IIR(which));			\
+	I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff);	\
+} while (0)
+
+#define GEN8_IRQ_FINI(type)				\
+do {							\
+	I915_WRITE(GEN8_##type##_IMR, 0xffffffff);	\
+	I915_WRITE(GEN8_##type##_IER, 0);		\
+	I915_WRITE(GEN8_##type##_IIR, 0xffffffff);	\
+	POSTING_READ(GEN8_##type##_IIR);		\
+	I915_WRITE(GEN8_##type##_IIR, 0xffffffff);	\
+} while (0)
+
+	GEN8_IRQ_FINI_NDX(GT, 0);
+	GEN8_IRQ_FINI_NDX(GT, 1);
+	GEN8_IRQ_FINI_NDX(GT, 2);
+	GEN8_IRQ_FINI_NDX(GT, 3);
+
+	GEN8_IRQ_FINI(PCU);
+
+#undef GEN8_IRQ_FINI
+#undef GEN8_IRQ_FINI_NDX
+
+	I915_WRITE(PORT_HOTPLUG_EN, 0);
+	I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
+
+	for_each_pipe(pipe)
+		I915_WRITE(PIPESTAT(pipe), 0xffff);
+
+	I915_WRITE(VLV_IMR, 0xffffffff);
+	I915_WRITE(VLV_IER, 0x0);
+	I915_WRITE(VLV_IIR, 0xffffffff);
+	POSTING_READ(VLV_IIR);
+}
+
 static void ironlake_irq_uninstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3309,26 +3652,7 @@
 
 	intel_hpd_irq_uninstall(dev_priv);
 
-	I915_WRITE(HWSTAM, 0xffffffff);
-
-	I915_WRITE(DEIMR, 0xffffffff);
-	I915_WRITE(DEIER, 0x0);
-	I915_WRITE(DEIIR, I915_READ(DEIIR));
-	if (IS_GEN7(dev))
-		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
-
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-
-	if (HAS_PCH_NOP(dev))
-		return;
-
-	I915_WRITE(SDEIMR, 0xffffffff);
-	I915_WRITE(SDEIER, 0x0);
-	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
-	if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
-		I915_WRITE(SERR_INT, I915_READ(SERR_INT));
+	ironlake_irq_reset(dev);
 }
 
 static void i8xx_irq_preinstall(struct drm_device * dev)
@@ -3386,7 +3710,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u16 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane);
 
-	if (!drm_handle_vblank(dev, pipe))
+	if (!intel_pipe_handle_vblank(dev, pipe))
 		return false;
 
 	if ((iir & flip_pending) == 0)
@@ -3410,7 +3734,7 @@
 
 static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
+	struct drm_device *dev = arg;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u16 iir, new_iir;
 	u32 pipe_stats[2];
@@ -3571,7 +3895,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane);
 
-	if (!drm_handle_vblank(dev, pipe))
+	if (!intel_pipe_handle_vblank(dev, pipe))
 		return false;
 
 	if ((iir & flip_pending) == 0)
@@ -3595,7 +3919,7 @@
 
 static irqreturn_t i915_irq_handler(int irq, void *arg)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
+	struct drm_device *dev = arg;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 iir, new_iir, pipe_stats[I915_MAX_PIPES];
 	unsigned long irqflags;
@@ -3636,16 +3960,9 @@
 			break;
 
 		/* Consume port.  Then clear IIR or we'll miss events */
-		if ((I915_HAS_HOTPLUG(dev)) &&
-		    (iir & I915_DISPLAY_PORT_INTERRUPT)) {
-			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
-			u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
-
-			intel_hpd_irq_handler(dev, hotplug_trigger, hpd_status_i915);
-
-			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
-			POSTING_READ(PORT_HOTPLUG_STAT);
-		}
+		if (I915_HAS_HOTPLUG(dev) &&
+		    iir & I915_DISPLAY_PORT_INTERRUPT)
+			i9xx_hpd_irq_handler(dev);
 
 		I915_WRITE(IIR, iir & ~flip_mask);
 		new_iir = I915_READ(IIR); /* Flush posted writes */
@@ -3832,7 +4149,7 @@
 
 static irqreturn_t i965_irq_handler(int irq, void *arg)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
+	struct drm_device *dev = arg;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 iir, new_iir;
 	u32 pipe_stats[I915_MAX_PIPES];
@@ -3879,22 +4196,8 @@
 		ret = IRQ_HANDLED;
 
 		/* Consume port.  Then clear IIR or we'll miss events */
-		if (iir & I915_DISPLAY_PORT_INTERRUPT) {
-			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
-			u32 hotplug_trigger = hotplug_status & (IS_G4X(dev) ?
-								  HOTPLUG_INT_STATUS_G4X :
-								  HOTPLUG_INT_STATUS_I915);
-
-			intel_hpd_irq_handler(dev, hotplug_trigger,
-					      IS_G4X(dev) ? hpd_status_g4x : hpd_status_i915);
-
-			if (IS_G4X(dev) &&
-			    (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X))
-				dp_aux_irq_handler(dev);
-
-			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
-			I915_READ(PORT_HOTPLUG_STAT);
-		}
+		if (iir & I915_DISPLAY_PORT_INTERRUPT)
+			i9xx_hpd_irq_handler(dev);
 
 		I915_WRITE(IIR, iir & ~flip_mask);
 		new_iir = I915_READ(IIR); /* Flush posted writes */
@@ -3997,7 +4300,7 @@
 			if (intel_connector->encoder->hpd_pin == i) {
 				if (connector->polled != intel_connector->polled)
 					DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n",
-							 drm_get_connector_name(connector));
+							 connector->name);
 				connector->polled = intel_connector->polled;
 				if (!connector->polled)
 					connector->polled = DRM_CONNECTOR_POLL_HPD;
@@ -4045,7 +4348,15 @@
 		dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
 	}
 
-	if (IS_VALLEYVIEW(dev)) {
+	if (IS_CHERRYVIEW(dev)) {
+		dev->driver->irq_handler = cherryview_irq_handler;
+		dev->driver->irq_preinstall = cherryview_irq_preinstall;
+		dev->driver->irq_postinstall = cherryview_irq_postinstall;
+		dev->driver->irq_uninstall = cherryview_irq_uninstall;
+		dev->driver->enable_vblank = valleyview_enable_vblank;
+		dev->driver->disable_vblank = valleyview_disable_vblank;
+		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
+	} else if (IS_VALLEYVIEW(dev)) {
 		dev->driver->irq_handler = valleyview_irq_handler;
 		dev->driver->irq_preinstall = valleyview_irq_preinstall;
 		dev->driver->irq_postinstall = valleyview_irq_postinstall;
@@ -4055,7 +4366,7 @@
 		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
 	} else if (IS_GEN8(dev)) {
 		dev->driver->irq_handler = gen8_irq_handler;
-		dev->driver->irq_preinstall = gen8_irq_preinstall;
+		dev->driver->irq_preinstall = gen8_irq_reset;
 		dev->driver->irq_postinstall = gen8_irq_postinstall;
 		dev->driver->irq_uninstall = gen8_irq_uninstall;
 		dev->driver->enable_vblank = gen8_enable_vblank;
@@ -4063,7 +4374,7 @@
 		dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev->driver->irq_handler = ironlake_irq_handler;
-		dev->driver->irq_preinstall = ironlake_irq_preinstall;
+		dev->driver->irq_preinstall = ironlake_irq_reset;
 		dev->driver->irq_postinstall = ironlake_irq_postinstall;
 		dev->driver->irq_uninstall = ironlake_irq_uninstall;
 		dev->driver->enable_vblank = ironlake_enable_vblank;
@@ -4121,57 +4432,20 @@
 }
 
 /* Disable interrupts so we can allow runtime PM. */
-void hsw_runtime_pm_disable_interrupts(struct drm_device *dev)
+void intel_runtime_pm_disable_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->pm.regsave.deimr = I915_READ(DEIMR);
-	dev_priv->pm.regsave.sdeimr = I915_READ(SDEIMR);
-	dev_priv->pm.regsave.gtimr = I915_READ(GTIMR);
-	dev_priv->pm.regsave.gtier = I915_READ(GTIER);
-	dev_priv->pm.regsave.gen6_pmimr = I915_READ(GEN6_PMIMR);
-
-	ironlake_disable_display_irq(dev_priv, 0xffffffff);
-	ibx_disable_display_interrupt(dev_priv, 0xffffffff);
-	ilk_disable_gt_irq(dev_priv, 0xffffffff);
-	snb_disable_pm_irq(dev_priv, 0xffffffff);
-
+	dev->driver->irq_uninstall(dev);
 	dev_priv->pm.irqs_disabled = true;
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 /* Restore interrupts so we can recover from runtime PM. */
-void hsw_runtime_pm_restore_interrupts(struct drm_device *dev)
+void intel_runtime_pm_restore_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long irqflags;
-	uint32_t val;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	val = I915_READ(DEIMR);
-	WARN(val != 0xffffffff, "DEIMR is 0x%08x\n", val);
-
-	val = I915_READ(SDEIMR);
-	WARN(val != 0xffffffff, "SDEIMR is 0x%08x\n", val);
-
-	val = I915_READ(GTIMR);
-	WARN(val != 0xffffffff, "GTIMR is 0x%08x\n", val);
-
-	val = I915_READ(GEN6_PMIMR);
-	WARN(val != 0xffffffff, "GEN6_PMIMR is 0x%08x\n", val);
 
 	dev_priv->pm.irqs_disabled = false;
-
-	ironlake_enable_display_irq(dev_priv, ~dev_priv->pm.regsave.deimr);
-	ibx_enable_display_interrupt(dev_priv, ~dev_priv->pm.regsave.sdeimr);
-	ilk_enable_gt_irq(dev_priv, ~dev_priv->pm.regsave.gtimr);
-	snb_enable_pm_irq(dev_priv, ~dev_priv->pm.regsave.gen6_pmimr);
-	I915_WRITE(GTIER, dev_priv->pm.regsave.gtier);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	dev->driver->irq_preinstall(dev);
+	dev->driver->irq_postinstall(dev);
 }

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index d1d7980..d05a2af 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c

@@ -46,7 +46,8 @@
 	.reset = true,
 	.invert_brightness = 0,
 	.disable_display = 0,
-	.enable_cmd_parser = 0,
+	.enable_cmd_parser = 1,
+	.disable_vtd_wa = 0,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -149,6 +150,9 @@
 module_param_named(disable_display, i915.disable_display, bool, 0600);
 MODULE_PARM_DESC(disable_display, "Disable display (default: false)");
 
+module_param_named(disable_vtd_wa, i915.disable_vtd_wa, bool, 0600);
+MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)");
+
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
-		 "Enable command parsing (1=enabled, 0=disabled [default])");
+		 "Enable command parsing (1=enabled [default], 0=disabled)");

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c77af69..e691b30 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h

@@ -29,6 +29,8 @@
 #define _TRANSCODER(tran, a, b) ((a) + (tran)*((b)-(a)))
 
 #define _PORT(port, a, b) ((a) + (port)*((b)-(a)))
+#define _PIPE3(pipe, a, b, c) (pipe < 2 ? _PIPE(pipe, a, b) : c)
+#define _PORT3(port, a, b, c) (port < 2 ? _PORT(port, a, b) : c)
 
 #define _MASKED_BIT_ENABLE(a) (((a) << 16) | (a))
 #define _MASKED_BIT_DISABLE(a) ((a) << 16)
@@ -77,13 +79,19 @@
 
 /* Graphics reset regs */
 #define I965_GDRST 0xc0 /* PCI config register */
-#define ILK_GDSR 0x2ca4 /* MCHBAR offset */
 #define  GRDOM_FULL	(0<<2)
 #define  GRDOM_RENDER	(1<<2)
 #define  GRDOM_MEDIA	(3<<2)
 #define  GRDOM_MASK	(3<<2)
 #define  GRDOM_RESET_ENABLE (1<<0)
 
+#define ILK_GDSR 0x2ca4 /* MCHBAR offset */
+#define  ILK_GRDOM_FULL		(0<<1)
+#define  ILK_GRDOM_RENDER	(1<<1)
+#define  ILK_GRDOM_MEDIA	(3<<1)
+#define  ILK_GRDOM_MASK		(3<<1)
+#define  ILK_GRDOM_RESET_ENABLE (1<<0)
+
 #define GEN6_MBCUNIT_SNPCR	0x900c /* for LLC config */
 #define   GEN6_MBC_SNPCR_SHIFT	21
 #define   GEN6_MBC_SNPCR_MASK	(3<<21)
@@ -92,6 +100,9 @@
 #define   GEN6_MBC_SNPCR_LOW	(2<<21)
 #define   GEN6_MBC_SNPCR_MIN	(3<<21) /* only 1/16th of the cache is shared */
 
+#define VLV_G3DCTL		0x9024
+#define VLV_GSCKGCTL		0x9028
+
 #define GEN6_MBCTL		0x0907c
 #define   GEN6_MBCTL_ENABLE_BOOT_FETCH	(1 << 4)
 #define   GEN6_MBCTL_CTX_FETCH_NEEDED	(1 << 3)
@@ -190,6 +201,8 @@
  * Memory interface instructions used by the kernel
  */
 #define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
+#define  MI_GLOBAL_GTT    (1<<22)
 
 #define MI_NOOP			MI_INSTR(0, 0)
 #define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
@@ -244,7 +257,8 @@
 #define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
 #define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
 #define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
-#define   MI_SEMAPHORE_SYNC_INVALID  (3<<16)
+#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
+#define   MI_SEMAPHORE_SYNC_MASK    (3<<16)
 #define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
 #define   MI_MM_SPACE_GTT		(1<<8)
 #define   MI_MM_SPACE_PHYSICAL		(0<<8)
@@ -262,13 +276,16 @@
  * - One can actually load arbitrary many arbitrary registers: Simply issue x
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
  */
-#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
-#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
+#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)
+#define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
 #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
 #define   MI_INVALIDATE_TLB		(1<<18)
 #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_FLUSH_DW_OP_MASK		(3<<14)
+#define   MI_FLUSH_DW_NOTIFY		(1<<8)
 #define   MI_INVALIDATE_BSD		(1<<7)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
 #define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
@@ -330,9 +347,12 @@
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
+#define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
 #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
@@ -347,6 +367,94 @@
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
+/*
+ * Commands used only by the command parser
+ */
+#define MI_SET_PREDICATE        MI_INSTR(0x01, 0)
+#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
+#define MI_RS_CONTROL           MI_INSTR(0x06, 0)
+#define MI_URB_ATOMIC_ALLOC     MI_INSTR(0x09, 0)
+#define MI_PREDICATE            MI_INSTR(0x0C, 0)
+#define MI_RS_CONTEXT           MI_INSTR(0x0F, 0)
+#define MI_TOPOLOGY_FILTER      MI_INSTR(0x0D, 0)
+#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
+#define MI_URB_CLEAR            MI_INSTR(0x19, 0)
+#define MI_UPDATE_GTT           MI_INSTR(0x23, 0)
+#define MI_CLFLUSH              MI_INSTR(0x27, 0)
+#define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
+#define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
+#define MI_LOAD_REGISTER_MEM    MI_INSTR(0x29, 0)
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
+#define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
+#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
+#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
+#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
+
+#define PIPELINE_SELECT                ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
+#define GFX_OP_3DSTATE_VF_STATISTICS   ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
+#define MEDIA_VFE_STATE                ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
+#define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
+#define GFX_OP_3DSTATE_SO_DECL_LIST \
+	((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
+
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
+
+#define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
+
+#define COLOR_BLT     ((0x2<<29)|(0x40<<22))
+#define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
+
+/*
+ * Registers used only by the command parser
+ */
+#define BCS_SWCTRL 0x22200
+
+#define HS_INVOCATION_COUNT 0x2300
+#define DS_INVOCATION_COUNT 0x2308
+#define IA_VERTICES_COUNT   0x2310
+#define IA_PRIMITIVES_COUNT 0x2318
+#define VS_INVOCATION_COUNT 0x2320
+#define GS_INVOCATION_COUNT 0x2328
+#define GS_PRIMITIVES_COUNT 0x2330
+#define CL_INVOCATION_COUNT 0x2338
+#define CL_PRIMITIVES_COUNT 0x2340
+#define PS_INVOCATION_COUNT 0x2348
+#define PS_DEPTH_COUNT      0x2350
+
+/* There are the 4 64-bit counter registers, one for each stream output */
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
+
+#define GEN7_3DPRIM_END_OFFSET          0x2420
+#define GEN7_3DPRIM_START_VERTEX        0x2430
+#define GEN7_3DPRIM_VERTEX_COUNT        0x2434
+#define GEN7_3DPRIM_INSTANCE_COUNT      0x2438
+#define GEN7_3DPRIM_START_INSTANCE      0x243C
+#define GEN7_3DPRIM_BASE_VERTEX         0x2440
+
+#define OACONTROL 0x2360
+
+#define _GEN7_PIPEA_DE_LOAD_SL	0x70068
+#define _GEN7_PIPEB_DE_LOAD_SL	0x71068
+#define GEN7_PIPE_DE_LOAD_SL(pipe) _PIPE(pipe, \
+					 _GEN7_PIPEA_DE_LOAD_SL, \
+					 _GEN7_PIPEB_DE_LOAD_SL)
 
 /*
  * Reset registers
@@ -370,6 +478,7 @@
 #define   IOSF_PORT_PUNIT			0x4
 #define   IOSF_PORT_NC				0x11
 #define   IOSF_PORT_DPIO			0x12
+#define   IOSF_PORT_DPIO_2			0x1a
 #define   IOSF_PORT_GPIO_NC			0x13
 #define   IOSF_PORT_CCK				0x14
 #define   IOSF_PORT_CCU				0xA9
@@ -381,9 +490,6 @@
 /* See configdb bunit SB addr map */
 #define BUNIT_REG_BISOC				0x11
 
-#define PUNIT_OPCODE_REG_READ			6
-#define PUNIT_OPCODE_REG_WRITE			7
-
 #define PUNIT_REG_DSPFREQ			0x36
 #define   DSPFREQSTAT_SHIFT			30
 #define   DSPFREQSTAT_MASK			(0x3 << DSPFREQSTAT_SHIFT)
@@ -469,16 +575,91 @@
 #define  DSI_PLL_M1_DIV_MASK			(0x1ff << 0)
 #define CCK_DISPLAY_CLOCK_CONTROL		0x6b
 
+/**
+ * DOC: DPIO
+ *
+ * VLV and CHV have slightly peculiar display PHYs for driving DP/HDMI
+ * ports. DPIO is the name given to such a display PHY. These PHYs
+ * don't follow the standard programming model using direct MMIO
+ * registers, and instead their registers must be accessed trough IOSF
+ * sideband. VLV has one such PHY for driving ports B and C, and CHV
+ * adds another PHY for driving port D. Each PHY responds to specific
+ * IOSF-SB port.
+ *
+ * Each display PHY is made up of one or two channels. Each channel
+ * houses a common lane part which contains the PLL and other common
+ * logic. CH0 common lane also contains the IOSF-SB logic for the
+ * Common Register Interface (CRI) ie. the DPIO registers. CRI clock
+ * must be running when any DPIO registers are accessed.
+ *
+ * In addition to having their own registers, the PHYs are also
+ * controlled through some dedicated signals from the display
+ * controller. These include PLL reference clock enable, PLL enable,
+ * and CRI clock selection, for example.
+ *
+ * Eeach channel also has two splines (also called data lanes), and
+ * each spline is made up of one Physical Access Coding Sub-Layer
+ * (PCS) block and two TX lanes. So each channel has two PCS blocks
+ * and four TX lanes. The TX lanes are used as DP lanes or TMDS
+ * data/clock pairs depending on the output type.
+ *
+ * Additionally the PHY also contains an AUX lane with AUX blocks
+ * for each channel. This is used for DP AUX communication, but
+ * this fact isn't really relevant for the driver since AUX is
+ * controlled from the display controller side. No DPIO registers
+ * need to be accessed during AUX communication,
+ *
+ * Generally the common lane corresponds to the pipe and
+ * the spline (PCS/TX) correponds to the port.
+ *
+ * For dual channel PHY (VLV/CHV):
+ *
+ *  pipe A == CMN/PLL/REF CH0
+ *
+ *  pipe B == CMN/PLL/REF CH1
+ *
+ *  port B == PCS/TX CH0
+ *
+ *  port C == PCS/TX CH1
+ *
+ * This is especially important when we cross the streams
+ * ie. drive port B with pipe B, or port C with pipe A.
+ *
+ * For single channel PHY (CHV):
+ *
+ *  pipe C == CMN/PLL/REF CH0
+ *
+ *  port D == PCS/TX CH0
+ *
+ * Note: digital port B is DDI0, digital port C is DDI1,
+ * digital port D is DDI2
+ */
 /*
- * DPIO - a special bus for various display related registers to hide behind
+ * Dual channel PHY (VLV/CHV)
+ * ---------------------------------
+ * |      CH0      |      CH1      |
+ * |  CMN/PLL/REF  |  CMN/PLL/REF  |
+ * |---------------|---------------| Display PHY
+ * | PCS01 | PCS23 | PCS01 | PCS23 |
+ * |-------|-------|-------|-------|
+ * |TX0|TX1|TX2|TX3|TX0|TX1|TX2|TX3|
+ * ---------------------------------
+ * |     DDI0      |     DDI1      | DP/HDMI ports
+ * ---------------------------------
  *
- * DPIO is VLV only.
- *
- * Note: digital port B is DDI0, digital pot C is DDI1
+ * Single channel PHY (CHV)
+ * -----------------
+ * |      CH0      |
+ * |  CMN/PLL/REF  |
+ * |---------------| Display PHY
+ * | PCS01 | PCS23 |
+ * |-------|-------|
+ * |TX0|TX1|TX2|TX3|
+ * -----------------
+ * |     DDI2      | DP/HDMI port
+ * -----------------
  */
 #define DPIO_DEVFN			0
-#define DPIO_OPCODE_REG_WRITE		1
-#define DPIO_OPCODE_REG_READ		0
 
 #define DPIO_CTL			(VLV_DISPLAY_BASE + 0x2110)
 #define  DPIO_MODSEL1			(1<<3) /* if ref clk b == 27 */
@@ -555,14 +736,29 @@
 #define   DPIO_PCS_TX_LANE1_RESET	(1<<7)
 #define VLV_PCS_DW0(ch) _PORT(ch, _VLV_PCS_DW0_CH0, _VLV_PCS_DW0_CH1)
 
+#define _VLV_PCS01_DW0_CH0		0x200
+#define _VLV_PCS23_DW0_CH0		0x400
+#define _VLV_PCS01_DW0_CH1		0x2600
+#define _VLV_PCS23_DW0_CH1		0x2800
+#define VLV_PCS01_DW0(ch) _PORT(ch, _VLV_PCS01_DW0_CH0, _VLV_PCS01_DW0_CH1)
+#define VLV_PCS23_DW0(ch) _PORT(ch, _VLV_PCS23_DW0_CH0, _VLV_PCS23_DW0_CH1)
+
 #define _VLV_PCS_DW1_CH0		0x8204
 #define _VLV_PCS_DW1_CH1		0x8404
+#define   CHV_PCS_REQ_SOFTRESET_EN	(1<<23)
 #define   DPIO_PCS_CLK_CRI_RXEB_EIOS_EN	(1<<22)
 #define   DPIO_PCS_CLK_CRI_RXDIGFILTSG_EN (1<<21)
 #define   DPIO_PCS_CLK_DATAWIDTH_SHIFT	(6)
 #define   DPIO_PCS_CLK_SOFT_RESET	(1<<5)
 #define VLV_PCS_DW1(ch) _PORT(ch, _VLV_PCS_DW1_CH0, _VLV_PCS_DW1_CH1)
 
+#define _VLV_PCS01_DW1_CH0		0x204
+#define _VLV_PCS23_DW1_CH0		0x404
+#define _VLV_PCS01_DW1_CH1		0x2604
+#define _VLV_PCS23_DW1_CH1		0x2804
+#define VLV_PCS01_DW1(ch) _PORT(ch, _VLV_PCS01_DW1_CH0, _VLV_PCS01_DW1_CH1)
+#define VLV_PCS23_DW1(ch) _PORT(ch, _VLV_PCS23_DW1_CH0, _VLV_PCS23_DW1_CH1)
+
 #define _VLV_PCS_DW8_CH0		0x8220
 #define _VLV_PCS_DW8_CH1		0x8420
 #define VLV_PCS_DW8(ch) _PORT(ch, _VLV_PCS_DW8_CH0, _VLV_PCS_DW8_CH1)
@@ -578,6 +774,19 @@
 #define _VLV_PCS_DW9_CH1		0x8424
 #define	VLV_PCS_DW9(ch) _PORT(ch, _VLV_PCS_DW9_CH0, _VLV_PCS_DW9_CH1)
 
+#define _CHV_PCS_DW10_CH0		0x8228
+#define _CHV_PCS_DW10_CH1		0x8428
+#define   DPIO_PCS_SWING_CALC_TX0_TX2	(1<<30)
+#define   DPIO_PCS_SWING_CALC_TX1_TX3	(1<<31)
+#define CHV_PCS_DW10(ch) _PORT(ch, _CHV_PCS_DW10_CH0, _CHV_PCS_DW10_CH1)
+
+#define _VLV_PCS01_DW10_CH0		0x0228
+#define _VLV_PCS23_DW10_CH0		0x0428
+#define _VLV_PCS01_DW10_CH1		0x2628
+#define _VLV_PCS23_DW10_CH1		0x2828
+#define VLV_PCS01_DW10(port) _PORT(port, _VLV_PCS01_DW10_CH0, _VLV_PCS01_DW10_CH1)
+#define VLV_PCS23_DW10(port) _PORT(port, _VLV_PCS23_DW10_CH0, _VLV_PCS23_DW10_CH1)
+
 #define _VLV_PCS_DW11_CH0		0x822c
 #define _VLV_PCS_DW11_CH1		0x842c
 #define VLV_PCS_DW11(ch) _PORT(ch, _VLV_PCS_DW11_CH0, _VLV_PCS_DW11_CH1)
@@ -596,14 +805,21 @@
 
 #define _VLV_TX_DW2_CH0			0x8288
 #define _VLV_TX_DW2_CH1			0x8488
+#define   DPIO_SWING_MARGIN_SHIFT	16
+#define   DPIO_SWING_MARGIN_MASK	(0xff << DPIO_SWING_MARGIN_SHIFT)
+#define   DPIO_UNIQ_TRANS_SCALE_SHIFT	8
 #define VLV_TX_DW2(ch) _PORT(ch, _VLV_TX_DW2_CH0, _VLV_TX_DW2_CH1)
 
 #define _VLV_TX_DW3_CH0			0x828c
 #define _VLV_TX_DW3_CH1			0x848c
+/* The following bit for CHV phy */
+#define   DPIO_TX_UNIQ_TRANS_SCALE_EN	(1<<27)
 #define VLV_TX_DW3(ch) _PORT(ch, _VLV_TX_DW3_CH0, _VLV_TX_DW3_CH1)
 
 #define _VLV_TX_DW4_CH0			0x8290
 #define _VLV_TX_DW4_CH1			0x8490
+#define   DPIO_SWING_DEEMPH9P5_SHIFT	24
+#define   DPIO_SWING_DEEMPH9P5_MASK	(0xff << DPIO_SWING_DEEMPH9P5_SHIFT)
 #define VLV_TX_DW4(ch) _PORT(ch, _VLV_TX_DW4_CH0, _VLV_TX_DW4_CH1)
 
 #define _VLV_TX3_DW4_CH0		0x690
@@ -623,6 +839,73 @@
 #define _VLV_TX_DW14_CH1		0x84b8
 #define VLV_TX_DW14(ch) _PORT(ch, _VLV_TX_DW14_CH0, _VLV_TX_DW14_CH1)
 
+/* CHV dpPhy registers */
+#define _CHV_PLL_DW0_CH0		0x8000
+#define _CHV_PLL_DW0_CH1		0x8180
+#define CHV_PLL_DW0(ch) _PIPE(ch, _CHV_PLL_DW0_CH0, _CHV_PLL_DW0_CH1)
+
+#define _CHV_PLL_DW1_CH0		0x8004
+#define _CHV_PLL_DW1_CH1		0x8184
+#define   DPIO_CHV_N_DIV_SHIFT		8
+#define   DPIO_CHV_M1_DIV_BY_2		(0 << 0)
+#define CHV_PLL_DW1(ch) _PIPE(ch, _CHV_PLL_DW1_CH0, _CHV_PLL_DW1_CH1)
+
+#define _CHV_PLL_DW2_CH0		0x8008
+#define _CHV_PLL_DW2_CH1		0x8188
+#define CHV_PLL_DW2(ch) _PIPE(ch, _CHV_PLL_DW2_CH0, _CHV_PLL_DW2_CH1)
+
+#define _CHV_PLL_DW3_CH0		0x800c
+#define _CHV_PLL_DW3_CH1		0x818c
+#define  DPIO_CHV_FRAC_DIV_EN		(1 << 16)
+#define  DPIO_CHV_FIRST_MOD		(0 << 8)
+#define  DPIO_CHV_SECOND_MOD		(1 << 8)
+#define  DPIO_CHV_FEEDFWD_GAIN_SHIFT	0
+#define CHV_PLL_DW3(ch) _PIPE(ch, _CHV_PLL_DW3_CH0, _CHV_PLL_DW3_CH1)
+
+#define _CHV_PLL_DW6_CH0		0x8018
+#define _CHV_PLL_DW6_CH1		0x8198
+#define   DPIO_CHV_GAIN_CTRL_SHIFT	16
+#define	  DPIO_CHV_INT_COEFF_SHIFT	8
+#define   DPIO_CHV_PROP_COEFF_SHIFT	0
+#define CHV_PLL_DW6(ch) _PIPE(ch, _CHV_PLL_DW6_CH0, _CHV_PLL_DW6_CH1)
+
+#define _CHV_CMN_DW13_CH0		0x8134
+#define _CHV_CMN_DW0_CH1		0x8080
+#define   DPIO_CHV_S1_DIV_SHIFT		21
+#define   DPIO_CHV_P1_DIV_SHIFT		13 /* 3 bits */
+#define   DPIO_CHV_P2_DIV_SHIFT		8  /* 5 bits */
+#define   DPIO_CHV_K_DIV_SHIFT		4
+#define   DPIO_PLL_FREQLOCK		(1 << 1)
+#define   DPIO_PLL_LOCK			(1 << 0)
+#define CHV_CMN_DW13(ch) _PIPE(ch, _CHV_CMN_DW13_CH0, _CHV_CMN_DW0_CH1)
+
+#define _CHV_CMN_DW14_CH0		0x8138
+#define _CHV_CMN_DW1_CH1		0x8084
+#define   DPIO_AFC_RECAL		(1 << 14)
+#define   DPIO_DCLKP_EN			(1 << 13)
+#define CHV_CMN_DW14(ch) _PIPE(ch, _CHV_CMN_DW14_CH0, _CHV_CMN_DW1_CH1)
+
+#define CHV_CMN_DW30			0x8178
+#define   DPIO_LRC_BYPASS		(1 << 3)
+
+#define _TXLANE(ch, lane, offset) ((ch ? 0x2400 : 0) + \
+					(lane) * 0x200 + (offset))
+
+#define CHV_TX_DW0(ch, lane) _TXLANE(ch, lane, 0x80)
+#define CHV_TX_DW1(ch, lane) _TXLANE(ch, lane, 0x84)
+#define CHV_TX_DW2(ch, lane) _TXLANE(ch, lane, 0x88)
+#define CHV_TX_DW3(ch, lane) _TXLANE(ch, lane, 0x8c)
+#define CHV_TX_DW4(ch, lane) _TXLANE(ch, lane, 0x90)
+#define CHV_TX_DW5(ch, lane) _TXLANE(ch, lane, 0x94)
+#define CHV_TX_DW6(ch, lane) _TXLANE(ch, lane, 0x98)
+#define CHV_TX_DW7(ch, lane) _TXLANE(ch, lane, 0x9c)
+#define CHV_TX_DW8(ch, lane) _TXLANE(ch, lane, 0xa0)
+#define CHV_TX_DW9(ch, lane) _TXLANE(ch, lane, 0xa4)
+#define CHV_TX_DW10(ch, lane) _TXLANE(ch, lane, 0xa8)
+#define CHV_TX_DW11(ch, lane) _TXLANE(ch, lane, 0xac)
+#define   DPIO_FRC_LATENCY_SHFIT	8
+#define CHV_TX_DW14(ch, lane) _TXLANE(ch, lane, 0xb8)
+#define   DPIO_UPAR_SHIFT		30
 /*
  * Fence registers
  */
@@ -663,6 +946,7 @@
 #define RENDER_RING_BASE	0x02000
 #define BSD_RING_BASE		0x04000
 #define GEN6_BSD_RING_BASE	0x12000
+#define GEN8_BSD2_RING_BASE	0x1c000
 #define VEBOX_RING_BASE		0x1a000
 #define BLT_RING_BASE		0x22000
 #define RING_TAIL(base)		((base)+0x30)
@@ -688,9 +972,20 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
-#define ARB_MODE		0x04030
+
+#define GEN7_WR_WATERMARK	0x4028
+#define GEN7_GFX_PRIO_CTRL	0x402C
+#define ARB_MODE		0x4030
 #define   ARB_MODE_SWIZZLE_SNB	(1<<4)
 #define   ARB_MODE_SWIZZLE_IVB	(1<<5)
+#define GEN7_GFX_PEND_TLB0	0x4034
+#define GEN7_GFX_PEND_TLB1	0x4038
+/* L3, CVS, ZTLB, RCC, CASC LRA min, max values */
+#define GEN7_LRA_LIMITS_BASE	0x403C
+#define GEN7_LRA_LIMITS_REG_NUM	13
+#define GEN7_MEDIA_MAX_REQ_COUNT	0x4070
+#define GEN7_GFX_MAX_REQ_COUNT		0x4074
+
 #define GAMTARBMODE		0x04a08
 #define   ARB_MODE_BWGTLB_DISABLE (1<<9)
 #define   ARB_MODE_SWIZZLE_BDW	(1<<1)
@@ -725,6 +1020,9 @@
 #define   RING_WAIT_I8XX	(1<<0) /* gen2, PRBx_HEAD */
 #define   RING_WAIT		(1<<11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE	(1<<10) /* gen6+ */
+
+#define GEN7_TLB_RD_ADDR	0x4700
+
 #if 0
 #define PRB0_TAIL	0x02030
 #define PRB0_HEAD	0x02034
@@ -748,6 +1046,7 @@
 #define RING_INSTDONE(base)	((base)+0x6c)
 #define RING_INSTPS(base)	((base)+0x70)
 #define RING_DMA_FADD(base)	((base)+0x78)
+#define RING_DMA_FADD_UDW(base)	((base)+0x60) /* gen8+ */
 #define RING_INSTPM(base)	((base)+0xc0)
 #define RING_MI_MODE(base)	((base)+0x9c)
 #define INSTPS		0x02070 /* 965+ only */
@@ -842,21 +1141,26 @@
 #define GFX_MODE_GEN7	0x0229c
 #define RING_MODE_GEN7(ring)	((ring)->mmio_base+0x29c)
 #define   GFX_RUN_LIST_ENABLE		(1<<15)
-#define   GFX_TLB_INVALIDATE_ALWAYS	(1<<13)
+#define   GFX_TLB_INVALIDATE_EXPLICIT	(1<<13)
 #define   GFX_SURFACE_FAULT_ENABLE	(1<<12)
 #define   GFX_REPLAY_MODE		(1<<11)
 #define   GFX_PSMI_GRANULARITY		(1<<10)
 #define   GFX_PPGTT_ENABLE		(1<<9)
 
 #define VLV_DISPLAY_BASE 0x180000
+#define VLV_MIPI_BASE VLV_DISPLAY_BASE
 
+#define VLV_GU_CTL0	(VLV_DISPLAY_BASE + 0x2030)
+#define VLV_GU_CTL1	(VLV_DISPLAY_BASE + 0x2034)
 #define SCPD0		0x0209c /* 915+ only */
 #define IER		0x020a0
 #define IIR		0x020a4
 #define IMR		0x020a8
 #define ISR		0x020ac
 #define VLV_GUNIT_CLOCK_GATE	(VLV_DISPLAY_BASE + 0x2060)
+#define   GINT_DIS		(1<<22)
 #define   GCFG_DIS		(1<<8)
+#define VLV_GUNIT_CLOCK_GATE2	(VLV_DISPLAY_BASE + 0x2064)
 #define VLV_IIR_RW	(VLV_DISPLAY_BASE + 0x2084)
 #define VLV_IER		(VLV_DISPLAY_BASE + 0x20a0)
 #define VLV_IIR		(VLV_DISPLAY_BASE + 0x20a4)
@@ -875,7 +1179,7 @@
 #define   I915_ERROR_INSTRUCTION			(1<<0)
 #define INSTPM	        0x020c0
 #define   INSTPM_SELF_EN (1<<12) /* 915GM only */
-#define   INSTPM_AGPBUSY_DIS (1<<11) /* gen3: when disabled, pending interrupts
+#define   INSTPM_AGPBUSY_INT_EN (1<<11) /* gen3: when disabled, pending interrupts
 					will not assert AGPBUSY# and will only
 					be delivered when out of C3. */
 #define   INSTPM_FORCE_ORDERING				(1<<7) /* GEN6+ */
@@ -956,6 +1260,10 @@
 #define   MI_ARB_DISPLAY_PRIORITY_A_B		(0 << 0)	/* display A > display B */
 #define   MI_ARB_DISPLAY_PRIORITY_B_A		(1 << 0)	/* display B > display A */
 
+#define MI_STATE	0x020e4 /* gen2 only */
+#define   MI_AGPBUSY_INT_EN			(1 << 1) /* 85x only */
+#define   MI_AGPBUSY_830_MODE			(1 << 0) /* 85x only */
+
 #define CACHE_MODE_0	0x02120 /* 915+ only */
 #define   CM0_PIPELINED_RENDER_FLUSH_DISABLE (1<<8)
 #define   CM0_IZ_OPT_DISABLE      (1<<6)
@@ -973,6 +1281,7 @@
 #define   ECO_FLIP_DONE		(1<<0)
 
 #define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
+#define RC_OP_FLUSH_ENABLE (1<<0)
 #define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
 #define CACHE_MODE_1		0x7004 /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1<<6)
@@ -984,6 +1293,7 @@
 
 #define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
+#define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	0x12050
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
@@ -1024,24 +1334,43 @@
 
 /* These are all the "old" interrupts */
 #define ILK_BSD_USER_INTERRUPT				(1<<5)
+
+#define I915_PM_INTERRUPT				(1<<31)
+#define I915_ISP_INTERRUPT				(1<<22)
+#define I915_LPE_PIPE_B_INTERRUPT			(1<<21)
+#define I915_LPE_PIPE_A_INTERRUPT			(1<<20)
+#define I915_MIPIB_INTERRUPT				(1<<19)
+#define I915_MIPIA_INTERRUPT				(1<<18)
 #define I915_PIPE_CONTROL_NOTIFY_INTERRUPT		(1<<18)
 #define I915_DISPLAY_PORT_INTERRUPT			(1<<17)
+#define I915_DISPLAY_PIPE_C_HBLANK_INTERRUPT		(1<<16)
+#define I915_MASTER_ERROR_INTERRUPT			(1<<15)
 #define I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT	(1<<15)
+#define I915_DISPLAY_PIPE_B_HBLANK_INTERRUPT		(1<<14)
 #define I915_GMCH_THERMAL_SENSOR_EVENT_INTERRUPT	(1<<14) /* p-state */
+#define I915_DISPLAY_PIPE_A_HBLANK_INTERRUPT		(1<<13)
 #define I915_HWB_OOM_INTERRUPT				(1<<13)
+#define I915_LPE_PIPE_C_INTERRUPT			(1<<12)
 #define I915_SYNC_STATUS_INTERRUPT			(1<<12)
+#define I915_MISC_INTERRUPT				(1<<11)
 #define I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT	(1<<11)
+#define I915_DISPLAY_PIPE_C_VBLANK_INTERRUPT		(1<<10)
 #define I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT	(1<<10)
+#define I915_DISPLAY_PIPE_C_EVENT_INTERRUPT		(1<<9)
 #define I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT	(1<<9)
+#define I915_DISPLAY_PIPE_C_DPBM_INTERRUPT		(1<<8)
 #define I915_DISPLAY_PLANE_C_FLIP_PENDING_INTERRUPT	(1<<8)
 #define I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT		(1<<7)
 #define I915_DISPLAY_PIPE_A_EVENT_INTERRUPT		(1<<6)
 #define I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT		(1<<5)
 #define I915_DISPLAY_PIPE_B_EVENT_INTERRUPT		(1<<4)
+#define I915_DISPLAY_PIPE_A_DPBM_INTERRUPT		(1<<3)
+#define I915_DISPLAY_PIPE_B_DPBM_INTERRUPT		(1<<2)
 #define I915_DEBUG_INTERRUPT				(1<<2)
+#define I915_WINVALID_INTERRUPT				(1<<1)
 #define I915_USER_INTERRUPT				(1<<1)
 #define I915_ASLE_INTERRUPT				(1<<0)
-#define I915_BSD_USER_INTERRUPT				(1 << 25)
+#define I915_BSD_USER_INTERRUPT				(1<<25)
 
 #define GEN6_BSD_RNCID			0x12198
 
@@ -1198,6 +1527,7 @@
 #define   GMBUS_PORT_SSC	1
 #define   GMBUS_PORT_VGADDC	2
 #define   GMBUS_PORT_PANEL	3
+#define   GMBUS_PORT_DPD_CHV	3 /* HDMID_CHV */
 #define   GMBUS_PORT_DPC	4 /* HDMIC */
 #define   GMBUS_PORT_DPB	5 /* SDVO, HDMIB */
 #define   GMBUS_PORT_DPD	6 /* HDMID */
@@ -1239,6 +1569,7 @@
  */
 #define DPLL_A_OFFSET 0x6014
 #define DPLL_B_OFFSET 0x6018
+#define CHV_DPLL_C_OFFSET 0x6030
 #define DPLL(pipe) (dev_priv->info.dpll_offsets[pipe] + \
 		    dev_priv->info.display_mmio_offset)
 
@@ -1273,10 +1604,23 @@
 #define   DPLL_LOCK_VLV			(1<<15)
 #define   DPLL_INTEGRATED_CRI_CLK_VLV	(1<<14)
 #define   DPLL_INTEGRATED_CLOCK_VLV	(1<<13)
+#define   DPLL_SSC_REF_CLOCK_CHV	(1<<13)
 #define   DPLL_PORTC_READY_MASK		(0xf << 4)
 #define   DPLL_PORTB_READY_MASK		(0xf)
 
 #define   DPLL_FPA01_P1_POST_DIV_MASK_I830	0x001f0000
+
+/* Additional CHV pll/phy registers */
+#define DPIO_PHY_STATUS			(VLV_DISPLAY_BASE + 0x6240)
+#define   DPLL_PORTD_READY_MASK		(0xf)
+#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
+#define   PHY_COM_LANE_RESET_DEASSERT(phy, val) \
+				((phy == DPIO_PHY0) ? (val | 1) : (val | 2))
+#define   PHY_COM_LANE_RESET_ASSERT(phy, val) \
+				((phy == DPIO_PHY0) ? (val & ~1) : (val & ~2))
+#define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104)
+#define   PHY_POWERGOOD(phy)	((phy == DPIO_PHY0) ? (1<<31) : (1<<30))
+
 /*
  * The i830 generation, in LVDS mode, defines P1 as the bit number set within
  * this field (only one bit may be set).
@@ -1317,6 +1661,7 @@
 
 #define DPLL_A_MD_OFFSET 0x601c /* 965+ only */
 #define DPLL_B_MD_OFFSET 0x6020 /* 965+ only */
+#define CHV_DPLL_C_MD_OFFSET 0x603c
 #define DPLL_MD(pipe) (dev_priv->info.dpll_md_offsets[pipe] + \
 		       dev_priv->info.display_mmio_offset)
 
@@ -1416,7 +1761,7 @@
 # define DPIOUNIT_CLOCK_GATE_DISABLE		(1 << 6) /* 915-945 */
 # define OVFUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define OVBUNIT_CLOCK_GATE_DISABLE		(1 << 4)
-/**
+/*
  * This bit must be set on the 830 to prevent hangs when turning off the
  * overlay scaler.
  */
@@ -1436,12 +1781,12 @@
 # define COLOR_CALCULATOR_CLOCK_GATE_DISABLE	(1 << 7)
 # define MOTION_COMP_CLOCK_GATE_DISABLE		(1 << 6)
 # define MAG_CLOCK_GATE_DISABLE			(1 << 5)
-/** This bit must be unset on 855,865 */
+/* This bit must be unset on 855,865 */
 # define MECI_CLOCK_GATE_DISABLE		(1 << 4)
 # define DCMP_CLOCK_GATE_DISABLE		(1 << 3)
 # define MEC_CLOCK_GATE_DISABLE			(1 << 2)
 # define MECO_CLOCK_GATE_DISABLE		(1 << 1)
-/** This bit must be set on 855,865. */
+/* This bit must be set on 855,865. */
 # define SV_CLOCK_GATE_DISABLE			(1 << 0)
 # define I915_MPEG_CLOCK_GATE_DISABLE		(1 << 16)
 # define I915_VLD_IP_PR_CLOCK_GATE_DISABLE	(1 << 15)
@@ -1462,14 +1807,14 @@
 # define I915_BY_CLOCK_GATE_DISABLE		(1 << 0)
 
 # define I965_RCZ_CLOCK_GATE_DISABLE		(1 << 30)
-/** This bit must always be set on 965G/965GM */
+/* This bit must always be set on 965G/965GM */
 # define I965_RCC_CLOCK_GATE_DISABLE		(1 << 29)
 # define I965_RCPB_CLOCK_GATE_DISABLE		(1 << 28)
 # define I965_DAP_CLOCK_GATE_DISABLE		(1 << 27)
 # define I965_ROC_CLOCK_GATE_DISABLE		(1 << 26)
 # define I965_GW_CLOCK_GATE_DISABLE		(1 << 25)
 # define I965_TD_CLOCK_GATE_DISABLE		(1 << 24)
-/** This bit must always be set on 965G */
+/* This bit must always be set on 965G */
 # define I965_ISC_CLOCK_GATE_DISABLE		(1 << 23)
 # define I965_IC_CLOCK_GATE_DISABLE		(1 << 22)
 # define I965_EU_CLOCK_GATE_DISABLE		(1 << 21)
@@ -1494,6 +1839,10 @@
 #define VF_UNIT_CLOCK_GATE_DISABLE		(1 << 9)
 #define GS_UNIT_CLOCK_GATE_DISABLE		(1 << 7)
 #define CL_UNIT_CLOCK_GATE_DISABLE		(1 << 6)
+
+#define VDECCLK_GATE_D		0x620C		/* g4x only */
+#define  VCP_UNIT_CLOCK_GATE_DISABLE		(1 << 4)
+
 #define RAMCLK_GATE_D		0x6210		/* CRL only */
 #define DEUC			0x6214          /* CRL only */
 
@@ -1513,6 +1862,7 @@
  */
 #define PALETTE_A_OFFSET 0xa000
 #define PALETTE_B_OFFSET 0xa800
+#define CHV_PALETTE_C_OFFSET 0xc000
 #define PALETTE(pipe) (dev_priv->info.palette_offsets[pipe] + \
 		       dev_priv->info.display_mmio_offset)
 
@@ -1535,7 +1885,7 @@
 /* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
 #define DCLK (MCHBAR_MIRROR_BASE_SNB + 0x5e04)
 
-/** 915-945 and GM965 MCH register controlling DRAM channel access */
+/* 915-945 and GM965 MCH register controlling DRAM channel access */
 #define DCC			0x10200
 #define DCC_ADDRESSING_MODE_SINGLE_CHANNEL		(0 << 0)
 #define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC	(1 << 0)
@@ -1544,15 +1894,15 @@
 #define DCC_CHANNEL_XOR_DISABLE				(1 << 10)
 #define DCC_CHANNEL_XOR_BIT_17				(1 << 9)
 
-/** Pineview MCH register contains DDR3 setting */
+/* Pineview MCH register contains DDR3 setting */
 #define CSHRDDR3CTL            0x101a8
 #define CSHRDDR3CTL_DDR3       (1 << 2)
 
-/** 965 MCH register controlling DRAM channel configuration */
+/* 965 MCH register controlling DRAM channel configuration */
 #define C0DRB3			0x10206
 #define C1DRB3			0x10606
 
-/** snb MCH registers for reading the DRAM channel configuration */
+/* snb MCH registers for reading the DRAM channel configuration */
 #define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
 #define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
 #define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
@@ -1574,7 +1924,7 @@
 #define   MAD_DIMM_A_SIZE_SHIFT		0
 #define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
 
-/** snb MCH registers for priority tuning */
+/* snb MCH registers for priority tuning */
 #define MCH_SSKPD			(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
 #define   MCH_SSKPD_WM0_MASK		0x3f
 #define   MCH_SSKPD_WM0_VAL		0xc
@@ -2002,6 +2352,7 @@
 #define TRANSCODER_A_OFFSET 0x60000
 #define TRANSCODER_B_OFFSET 0x61000
 #define TRANSCODER_C_OFFSET 0x62000
+#define CHV_TRANSCODER_C_OFFSET 0x63000
 #define TRANSCODER_EDP_OFFSET 0x6f000
 
 #define _TRANSCODER2(pipe, reg) (dev_priv->info.trans_offsets[(pipe)] - \
@@ -2226,6 +2577,7 @@
 #define GEN3_SDVOC	0x61160
 #define GEN4_HDMIB	GEN3_SDVOB
 #define GEN4_HDMIC	GEN3_SDVOC
+#define CHV_HDMID	0x6116C
 #define PCH_SDVOB	0xe1140
 #define PCH_HDMIB	PCH_SDVOB
 #define PCH_HDMIC	0xe1150
@@ -2246,7 +2598,7 @@
 #define   SDVO_PIPE_B_SELECT			(1 << 30)
 #define   SDVO_STALL_SELECT			(1 << 29)
 #define   SDVO_INTERRUPT_ENABLE			(1 << 26)
-/**
+/*
  * 915G/GM SDVO pixel multiplier.
  * Programmed value is multiplier - 1, up to 5x.
  * \sa DPLL_MD_UDI_MULTIPLIER_MASK
@@ -2286,6 +2638,10 @@
 #define   SDVO_PIPE_SEL_CPT(pipe)		((pipe) << 29)
 #define   SDVO_PIPE_SEL_MASK_CPT		(3 << 29)
 
+/* CHV SDVO/HDMI bits: */
+#define   SDVO_PIPE_SEL_CHV(pipe)		((pipe) << 24)
+#define   SDVO_PIPE_SEL_MASK_CHV		(3 << 24)
+
 
 /* DVO port control */
 #define DVOA			0x61120
@@ -2556,65 +2912,65 @@
 
 /* TV port control */
 #define TV_CTL			0x68000
-/** Enables the TV encoder */
+/* Enables the TV encoder */
 # define TV_ENC_ENABLE			(1 << 31)
-/** Sources the TV encoder input from pipe B instead of A. */
+/* Sources the TV encoder input from pipe B instead of A. */
 # define TV_ENC_PIPEB_SELECT		(1 << 30)
-/** Outputs composite video (DAC A only) */
+/* Outputs composite video (DAC A only) */
 # define TV_ENC_OUTPUT_COMPOSITE	(0 << 28)
-/** Outputs SVideo video (DAC B/C) */
+/* Outputs SVideo video (DAC B/C) */
 # define TV_ENC_OUTPUT_SVIDEO		(1 << 28)
-/** Outputs Component video (DAC A/B/C) */
+/* Outputs Component video (DAC A/B/C) */
 # define TV_ENC_OUTPUT_COMPONENT	(2 << 28)
-/** Outputs Composite and SVideo (DAC A/B/C) */
+/* Outputs Composite and SVideo (DAC A/B/C) */
 # define TV_ENC_OUTPUT_SVIDEO_COMPOSITE	(3 << 28)
 # define TV_TRILEVEL_SYNC		(1 << 21)
-/** Enables slow sync generation (945GM only) */
+/* Enables slow sync generation (945GM only) */
 # define TV_SLOW_SYNC			(1 << 20)
-/** Selects 4x oversampling for 480i and 576p */
+/* Selects 4x oversampling for 480i and 576p */
 # define TV_OVERSAMPLE_4X		(0 << 18)
-/** Selects 2x oversampling for 720p and 1080i */
+/* Selects 2x oversampling for 720p and 1080i */
 # define TV_OVERSAMPLE_2X		(1 << 18)
-/** Selects no oversampling for 1080p */
+/* Selects no oversampling for 1080p */
 # define TV_OVERSAMPLE_NONE		(2 << 18)
-/** Selects 8x oversampling */
+/* Selects 8x oversampling */
 # define TV_OVERSAMPLE_8X		(3 << 18)
-/** Selects progressive mode rather than interlaced */
+/* Selects progressive mode rather than interlaced */
 # define TV_PROGRESSIVE			(1 << 17)
-/** Sets the colorburst to PAL mode.  Required for non-M PAL modes. */
+/* Sets the colorburst to PAL mode.  Required for non-M PAL modes. */
 # define TV_PAL_BURST			(1 << 16)
-/** Field for setting delay of Y compared to C */
+/* Field for setting delay of Y compared to C */
 # define TV_YC_SKEW_MASK		(7 << 12)
-/** Enables a fix for 480p/576p standard definition modes on the 915GM only */
+/* Enables a fix for 480p/576p standard definition modes on the 915GM only */
 # define TV_ENC_SDP_FIX			(1 << 11)
-/**
+/*
  * Enables a fix for the 915GM only.
  *
  * Not sure what it does.
  */
 # define TV_ENC_C0_FIX			(1 << 10)
-/** Bits that must be preserved by software */
+/* Bits that must be preserved by software */
 # define TV_CTL_SAVE			((1 << 11) | (3 << 9) | (7 << 6) | 0xf)
 # define TV_FUSE_STATE_MASK		(3 << 4)
-/** Read-only state that reports all features enabled */
+/* Read-only state that reports all features enabled */
 # define TV_FUSE_STATE_ENABLED		(0 << 4)
-/** Read-only state that reports that Macrovision is disabled in hardware*/
+/* Read-only state that reports that Macrovision is disabled in hardware*/
 # define TV_FUSE_STATE_NO_MACROVISION	(1 << 4)
-/** Read-only state that reports that TV-out is disabled in hardware. */
+/* Read-only state that reports that TV-out is disabled in hardware. */
 # define TV_FUSE_STATE_DISABLED		(2 << 4)
-/** Normal operation */
+/* Normal operation */
 # define TV_TEST_MODE_NORMAL		(0 << 0)
-/** Encoder test pattern 1 - combo pattern */
+/* Encoder test pattern 1 - combo pattern */
 # define TV_TEST_MODE_PATTERN_1		(1 << 0)
-/** Encoder test pattern 2 - full screen vertical 75% color bars */
+/* Encoder test pattern 2 - full screen vertical 75% color bars */
 # define TV_TEST_MODE_PATTERN_2		(2 << 0)
-/** Encoder test pattern 3 - full screen horizontal 75% color bars */
+/* Encoder test pattern 3 - full screen horizontal 75% color bars */
 # define TV_TEST_MODE_PATTERN_3		(3 << 0)
-/** Encoder test pattern 4 - random noise */
+/* Encoder test pattern 4 - random noise */
 # define TV_TEST_MODE_PATTERN_4		(4 << 0)
-/** Encoder test pattern 5 - linear color ramps */
+/* Encoder test pattern 5 - linear color ramps */
 # define TV_TEST_MODE_PATTERN_5		(5 << 0)
-/**
+/*
  * This test mode forces the DACs to 50% of full output.
  *
  * This is used for load detection in combination with TVDAC_SENSE_MASK
@@ -2624,35 +2980,35 @@
 
 #define TV_DAC			0x68004
 # define TV_DAC_SAVE		0x00ffff00
-/**
+/*
  * Reports that DAC state change logic has reported change (RO).
  *
  * This gets cleared when TV_DAC_STATE_EN is cleared
 */
 # define TVDAC_STATE_CHG		(1 << 31)
 # define TVDAC_SENSE_MASK		(7 << 28)
-/** Reports that DAC A voltage is above the detect threshold */
+/* Reports that DAC A voltage is above the detect threshold */
 # define TVDAC_A_SENSE			(1 << 30)
-/** Reports that DAC B voltage is above the detect threshold */
+/* Reports that DAC B voltage is above the detect threshold */
 # define TVDAC_B_SENSE			(1 << 29)
-/** Reports that DAC C voltage is above the detect threshold */
+/* Reports that DAC C voltage is above the detect threshold */
 # define TVDAC_C_SENSE			(1 << 28)
-/**
+/*
  * Enables DAC state detection logic, for load-based TV detection.
  *
  * The PLL of the chosen pipe (in TV_CTL) must be running, and the encoder set
  * to off, for load detection to work.
  */
 # define TVDAC_STATE_CHG_EN		(1 << 27)
-/** Sets the DAC A sense value to high */
+/* Sets the DAC A sense value to high */
 # define TVDAC_A_SENSE_CTL		(1 << 26)
-/** Sets the DAC B sense value to high */
+/* Sets the DAC B sense value to high */
 # define TVDAC_B_SENSE_CTL		(1 << 25)
-/** Sets the DAC C sense value to high */
+/* Sets the DAC C sense value to high */
 # define TVDAC_C_SENSE_CTL		(1 << 24)
-/** Overrides the ENC_ENABLE and DAC voltage levels */
+/* Overrides the ENC_ENABLE and DAC voltage levels */
 # define DAC_CTL_OVERRIDE		(1 << 7)
-/** Sets the slew rate.  Must be preserved in software */
+/* Sets the slew rate.  Must be preserved in software */
 # define ENC_TVDAC_SLEW_FAST		(1 << 6)
 # define DAC_A_1_3_V			(0 << 4)
 # define DAC_A_1_1_V			(1 << 4)
@@ -2667,7 +3023,7 @@
 # define DAC_C_0_7_V			(2 << 0)
 # define DAC_C_MASK			(3 << 0)
 
-/**
+/*
  * CSC coefficients are stored in a floating point format with 9 bits of
  * mantissa and 2 or 3 bits of exponent.  The exponent is represented as 2**-n,
  * where 2-bit exponents are unsigned n, and 3-bit exponents are signed n with
@@ -2682,7 +3038,7 @@
 #define TV_CSC_Y2		0x68014
 # define TV_BY_MASK			0x07ff0000
 # define TV_BY_SHIFT			16
-/**
+/*
  * Y attenuation for component video.
  *
  * Stored in 1.9 fixed point.
@@ -2699,7 +3055,7 @@
 #define TV_CSC_U2		0x6801c
 # define TV_BU_MASK			0x07ff0000
 # define TV_BU_SHIFT			16
-/**
+/*
  * U attenuation for component video.
  *
  * Stored in 1.9 fixed point.
@@ -2716,7 +3072,7 @@
 #define TV_CSC_V2		0x68024
 # define TV_BV_MASK			0x07ff0000
 # define TV_BV_SHIFT			16
-/**
+/*
  * V attenuation for component video.
  *
  * Stored in 1.9 fixed point.
@@ -2725,74 +3081,74 @@
 # define TV_AV_SHIFT			0
 
 #define TV_CLR_KNOBS		0x68028
-/** 2s-complement brightness adjustment */
+/* 2s-complement brightness adjustment */
 # define TV_BRIGHTNESS_MASK		0xff000000
 # define TV_BRIGHTNESS_SHIFT		24
-/** Contrast adjustment, as a 2.6 unsigned floating point number */
+/* Contrast adjustment, as a 2.6 unsigned floating point number */
 # define TV_CONTRAST_MASK		0x00ff0000
 # define TV_CONTRAST_SHIFT		16
-/** Saturation adjustment, as a 2.6 unsigned floating point number */
+/* Saturation adjustment, as a 2.6 unsigned floating point number */
 # define TV_SATURATION_MASK		0x0000ff00
 # define TV_SATURATION_SHIFT		8
-/** Hue adjustment, as an integer phase angle in degrees */
+/* Hue adjustment, as an integer phase angle in degrees */
 # define TV_HUE_MASK			0x000000ff
 # define TV_HUE_SHIFT			0
 
 #define TV_CLR_LEVEL		0x6802c
-/** Controls the DAC level for black */
+/* Controls the DAC level for black */
 # define TV_BLACK_LEVEL_MASK		0x01ff0000
 # define TV_BLACK_LEVEL_SHIFT		16
-/** Controls the DAC level for blanking */
+/* Controls the DAC level for blanking */
 # define TV_BLANK_LEVEL_MASK		0x000001ff
 # define TV_BLANK_LEVEL_SHIFT		0
 
 #define TV_H_CTL_1		0x68030
-/** Number of pixels in the hsync. */
+/* Number of pixels in the hsync. */
 # define TV_HSYNC_END_MASK		0x1fff0000
 # define TV_HSYNC_END_SHIFT		16
-/** Total number of pixels minus one in the line (display and blanking). */
+/* Total number of pixels minus one in the line (display and blanking). */
 # define TV_HTOTAL_MASK			0x00001fff
 # define TV_HTOTAL_SHIFT		0
 
 #define TV_H_CTL_2		0x68034
-/** Enables the colorburst (needed for non-component color) */
+/* Enables the colorburst (needed for non-component color) */
 # define TV_BURST_ENA			(1 << 31)
-/** Offset of the colorburst from the start of hsync, in pixels minus one. */
+/* Offset of the colorburst from the start of hsync, in pixels minus one. */
 # define TV_HBURST_START_SHIFT		16
 # define TV_HBURST_START_MASK		0x1fff0000
-/** Length of the colorburst */
+/* Length of the colorburst */
 # define TV_HBURST_LEN_SHIFT		0
 # define TV_HBURST_LEN_MASK		0x0001fff
 
 #define TV_H_CTL_3		0x68038
-/** End of hblank, measured in pixels minus one from start of hsync */
+/* End of hblank, measured in pixels minus one from start of hsync */
 # define TV_HBLANK_END_SHIFT		16
 # define TV_HBLANK_END_MASK		0x1fff0000
-/** Start of hblank, measured in pixels minus one from start of hsync */
+/* Start of hblank, measured in pixels minus one from start of hsync */
 # define TV_HBLANK_START_SHIFT		0
 # define TV_HBLANK_START_MASK		0x0001fff
 
 #define TV_V_CTL_1		0x6803c
-/** XXX */
+/* XXX */
 # define TV_NBR_END_SHIFT		16
 # define TV_NBR_END_MASK		0x07ff0000
-/** XXX */
+/* XXX */
 # define TV_VI_END_F1_SHIFT		8
 # define TV_VI_END_F1_MASK		0x00003f00
-/** XXX */
+/* XXX */
 # define TV_VI_END_F2_SHIFT		0
 # define TV_VI_END_F2_MASK		0x0000003f
 
 #define TV_V_CTL_2		0x68040
-/** Length of vsync, in half lines */
+/* Length of vsync, in half lines */
 # define TV_VSYNC_LEN_MASK		0x07ff0000
 # define TV_VSYNC_LEN_SHIFT		16
-/** Offset of the start of vsync in field 1, measured in one less than the
+/* Offset of the start of vsync in field 1, measured in one less than the
  * number of half lines.
  */
 # define TV_VSYNC_START_F1_MASK		0x00007f00
 # define TV_VSYNC_START_F1_SHIFT	8
-/**
+/*
  * Offset of the start of vsync in field 2, measured in one less than the
  * number of half lines.
  */
@@ -2800,17 +3156,17 @@
 # define TV_VSYNC_START_F2_SHIFT	0
 
 #define TV_V_CTL_3		0x68044
-/** Enables generation of the equalization signal */
+/* Enables generation of the equalization signal */
 # define TV_EQUAL_ENA			(1 << 31)
-/** Length of vsync, in half lines */
+/* Length of vsync, in half lines */
 # define TV_VEQ_LEN_MASK		0x007f0000
 # define TV_VEQ_LEN_SHIFT		16
-/** Offset of the start of equalization in field 1, measured in one less than
+/* Offset of the start of equalization in field 1, measured in one less than
  * the number of half lines.
  */
 # define TV_VEQ_START_F1_MASK		0x0007f00
 # define TV_VEQ_START_F1_SHIFT		8
-/**
+/*
  * Offset of the start of equalization in field 2, measured in one less than
  * the number of half lines.
  */
@@ -2818,13 +3174,13 @@
 # define TV_VEQ_START_F2_SHIFT		0
 
 #define TV_V_CTL_4		0x68048
-/**
+/*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
  */
 # define TV_VBURST_START_F1_MASK	0x003f0000
 # define TV_VBURST_START_F1_SHIFT	16
-/**
+/*
  * Offset to the end of vertical colorburst, measured in one less than the
  * number of lines from the start of NBR.
  */
@@ -2832,13 +3188,13 @@
 # define TV_VBURST_END_F1_SHIFT		0
 
 #define TV_V_CTL_5		0x6804c
-/**
+/*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
  */
 # define TV_VBURST_START_F2_MASK	0x003f0000
 # define TV_VBURST_START_F2_SHIFT	16
-/**
+/*
  * Offset to the end of vertical colorburst, measured in one less than the
  * number of lines from the start of NBR.
  */
@@ -2846,13 +3202,13 @@
 # define TV_VBURST_END_F2_SHIFT		0
 
 #define TV_V_CTL_6		0x68050
-/**
+/*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
  */
 # define TV_VBURST_START_F3_MASK	0x003f0000
 # define TV_VBURST_START_F3_SHIFT	16
-/**
+/*
  * Offset to the end of vertical colorburst, measured in one less than the
  * number of lines from the start of NBR.
  */
@@ -2860,13 +3216,13 @@
 # define TV_VBURST_END_F3_SHIFT		0
 
 #define TV_V_CTL_7		0x68054
-/**
+/*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
  */
 # define TV_VBURST_START_F4_MASK	0x003f0000
 # define TV_VBURST_START_F4_SHIFT	16
-/**
+/*
  * Offset to the end of vertical colorburst, measured in one less than the
  * number of lines from the start of NBR.
  */
@@ -2874,56 +3230,56 @@
 # define TV_VBURST_END_F4_SHIFT		0
 
 #define TV_SC_CTL_1		0x68060
-/** Turns on the first subcarrier phase generation DDA */
+/* Turns on the first subcarrier phase generation DDA */
 # define TV_SC_DDA1_EN			(1 << 31)
-/** Turns on the first subcarrier phase generation DDA */
+/* Turns on the first subcarrier phase generation DDA */
 # define TV_SC_DDA2_EN			(1 << 30)
-/** Turns on the first subcarrier phase generation DDA */
+/* Turns on the first subcarrier phase generation DDA */
 # define TV_SC_DDA3_EN			(1 << 29)
-/** Sets the subcarrier DDA to reset frequency every other field */
+/* Sets the subcarrier DDA to reset frequency every other field */
 # define TV_SC_RESET_EVERY_2		(0 << 24)
-/** Sets the subcarrier DDA to reset frequency every fourth field */
+/* Sets the subcarrier DDA to reset frequency every fourth field */
 # define TV_SC_RESET_EVERY_4		(1 << 24)
-/** Sets the subcarrier DDA to reset frequency every eighth field */
+/* Sets the subcarrier DDA to reset frequency every eighth field */
 # define TV_SC_RESET_EVERY_8		(2 << 24)
-/** Sets the subcarrier DDA to never reset the frequency */
+/* Sets the subcarrier DDA to never reset the frequency */
 # define TV_SC_RESET_NEVER		(3 << 24)
-/** Sets the peak amplitude of the colorburst.*/
+/* Sets the peak amplitude of the colorburst.*/
 # define TV_BURST_LEVEL_MASK		0x00ff0000
 # define TV_BURST_LEVEL_SHIFT		16
-/** Sets the increment of the first subcarrier phase generation DDA */
+/* Sets the increment of the first subcarrier phase generation DDA */
 # define TV_SCDDA1_INC_MASK		0x00000fff
 # define TV_SCDDA1_INC_SHIFT		0
 
 #define TV_SC_CTL_2		0x68064
-/** Sets the rollover for the second subcarrier phase generation DDA */
+/* Sets the rollover for the second subcarrier phase generation DDA */
 # define TV_SCDDA2_SIZE_MASK		0x7fff0000
 # define TV_SCDDA2_SIZE_SHIFT		16
-/** Sets the increent of the second subcarrier phase generation DDA */
+/* Sets the increent of the second subcarrier phase generation DDA */
 # define TV_SCDDA2_INC_MASK		0x00007fff
 # define TV_SCDDA2_INC_SHIFT		0
 
 #define TV_SC_CTL_3		0x68068
-/** Sets the rollover for the third subcarrier phase generation DDA */
+/* Sets the rollover for the third subcarrier phase generation DDA */
 # define TV_SCDDA3_SIZE_MASK		0x7fff0000
 # define TV_SCDDA3_SIZE_SHIFT		16
-/** Sets the increent of the third subcarrier phase generation DDA */
+/* Sets the increent of the third subcarrier phase generation DDA */
 # define TV_SCDDA3_INC_MASK		0x00007fff
 # define TV_SCDDA3_INC_SHIFT		0
 
 #define TV_WIN_POS		0x68070
-/** X coordinate of the display from the start of horizontal active */
+/* X coordinate of the display from the start of horizontal active */
 # define TV_XPOS_MASK			0x1fff0000
 # define TV_XPOS_SHIFT			16
-/** Y coordinate of the display from the start of vertical active (NBR) */
+/* Y coordinate of the display from the start of vertical active (NBR) */
 # define TV_YPOS_MASK			0x00000fff
 # define TV_YPOS_SHIFT			0
 
 #define TV_WIN_SIZE		0x68074
-/** Horizontal size of the display window, measured in pixels*/
+/* Horizontal size of the display window, measured in pixels*/
 # define TV_XSIZE_MASK			0x1fff0000
 # define TV_XSIZE_SHIFT			16
-/**
+/*
  * Vertical size of the display window, measured in pixels.
  *
  * Must be even for interlaced modes.
@@ -2932,28 +3288,28 @@
 # define TV_YSIZE_SHIFT			0
 
 #define TV_FILTER_CTL_1		0x68080
-/**
+/*
  * Enables automatic scaling calculation.
  *
  * If set, the rest of the registers are ignored, and the calculated values can
  * be read back from the register.
  */
 # define TV_AUTO_SCALE			(1 << 31)
-/**
+/*
  * Disables the vertical filter.
  *
  * This is required on modes more than 1024 pixels wide */
 # define TV_V_FILTER_BYPASS		(1 << 29)
-/** Enables adaptive vertical filtering */
+/* Enables adaptive vertical filtering */
 # define TV_VADAPT			(1 << 28)
 # define TV_VADAPT_MODE_MASK		(3 << 26)
-/** Selects the least adaptive vertical filtering mode */
+/* Selects the least adaptive vertical filtering mode */
 # define TV_VADAPT_MODE_LEAST		(0 << 26)
-/** Selects the moderately adaptive vertical filtering mode */
+/* Selects the moderately adaptive vertical filtering mode */
 # define TV_VADAPT_MODE_MODERATE	(1 << 26)
-/** Selects the most adaptive vertical filtering mode */
+/* Selects the most adaptive vertical filtering mode */
 # define TV_VADAPT_MODE_MOST		(3 << 26)
-/**
+/*
  * Sets the horizontal scaling factor.
  *
  * This should be the fractional part of the horizontal scaling factor divided
@@ -2965,14 +3321,14 @@
 # define TV_HSCALE_FRAC_SHIFT		0
 
 #define TV_FILTER_CTL_2		0x68084
-/**
+/*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
  * TV_VSCALE should be (src height - 1) / ((interlace * dest height) - 1)
  */
 # define TV_VSCALE_INT_MASK		0x00038000
 # define TV_VSCALE_INT_SHIFT		15
-/**
+/*
  * Sets the fractional part of the 3.15 fixed-point vertical scaling factor.
  *
  * \sa TV_VSCALE_INT_MASK
@@ -2981,7 +3337,7 @@
 # define TV_VSCALE_FRAC_SHIFT		0
 
 #define TV_FILTER_CTL_3		0x68088
-/**
+/*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
  * TV_VSCALE should be (src height - 1) / (1/4 * (dest height - 1))
@@ -2990,7 +3346,7 @@
  */
 # define TV_VSCALE_IP_INT_MASK		0x00038000
 # define TV_VSCALE_IP_INT_SHIFT		15
-/**
+/*
  * Sets the fractional part of the 3.15 fixed-point vertical scaling factor.
  *
  * For progressive modes, TV_VSCALE_IP_INT should be set to zeroes.
@@ -3002,26 +3358,26 @@
 
 #define TV_CC_CONTROL		0x68090
 # define TV_CC_ENABLE			(1 << 31)
-/**
+/*
  * Specifies which field to send the CC data in.
  *
  * CC data is usually sent in field 0.
  */
 # define TV_CC_FID_MASK			(1 << 27)
 # define TV_CC_FID_SHIFT		27
-/** Sets the horizontal position of the CC data.  Usually 135. */
+/* Sets the horizontal position of the CC data.  Usually 135. */
 # define TV_CC_HOFF_MASK		0x03ff0000
 # define TV_CC_HOFF_SHIFT		16
-/** Sets the vertical position of the CC data.  Usually 21 */
+/* Sets the vertical position of the CC data.  Usually 21 */
 # define TV_CC_LINE_MASK		0x0000003f
 # define TV_CC_LINE_SHIFT		0
 
 #define TV_CC_DATA		0x68094
 # define TV_CC_RDY			(1 << 31)
-/** Second word of CC data to be transmitted. */
+/* Second word of CC data to be transmitted. */
 # define TV_CC_DATA_2_MASK		0x007f0000
 # define TV_CC_DATA_2_SHIFT		16
-/** First word of CC data to be transmitted. */
+/* First word of CC data to be transmitted. */
 # define TV_CC_DATA_1_MASK		0x0000007f
 # define TV_CC_DATA_1_SHIFT		0
 
@@ -3043,6 +3399,8 @@
 #define   DP_PORT_EN			(1 << 31)
 #define   DP_PIPEB_SELECT		(1 << 30)
 #define   DP_PIPE_MASK			(1 << 30)
+#define   DP_PIPE_SELECT_CHV(pipe)	((pipe) << 16)
+#define   DP_PIPE_MASK_CHV		(3 << 16)
 
 /* Link training mode - select a suitable mode for each stage */
 #define   DP_LINK_TRAIN_PAT_1		(0 << 28)
@@ -3090,32 +3448,32 @@
 #define   DP_PLL_FREQ_160MHZ		(1 << 16)
 #define   DP_PLL_FREQ_MASK		(3 << 16)
 
-/** locked once port is enabled */
+/* locked once port is enabled */
 #define   DP_PORT_REVERSAL		(1 << 15)
 
 /* eDP */
 #define   DP_PLL_ENABLE			(1 << 14)
 
-/** sends the clock on lane 15 of the PEG for debug */
+/* sends the clock on lane 15 of the PEG for debug */
 #define   DP_CLOCK_OUTPUT_ENABLE	(1 << 13)
 
 #define   DP_SCRAMBLING_DISABLE		(1 << 12)
 #define   DP_SCRAMBLING_DISABLE_IRONLAKE	(1 << 7)
 
-/** limit RGB values to avoid confusing TVs */
+/* limit RGB values to avoid confusing TVs */
 #define   DP_COLOR_RANGE_16_235		(1 << 8)
 
-/** Turn on the audio link */
+/* Turn on the audio link */
 #define   DP_AUDIO_OUTPUT_ENABLE	(1 << 6)
 
-/** vs and hs sync polarity */
+/* vs and hs sync polarity */
 #define   DP_SYNC_VS_HIGH		(1 << 4)
 #define   DP_SYNC_HS_HIGH		(1 << 3)
 
-/** A fantasy */
+/* A fantasy */
 #define   DP_DETECTED			(1 << 2)
 
-/** The aux channel provides a way to talk to the
+/* The aux channel provides a way to talk to the
  * signal sink for DDC etc. Max packet size supported
  * is 20 bytes in each direction, hence the 5 fixed
  * data registers
@@ -3258,6 +3616,7 @@
 #define   PIPECONF_INTERLACED_DBL_ILK		(4 << 21) /* ilk/snb only */
 #define   PIPECONF_PFIT_PF_INTERLACED_DBL_ILK	(5 << 21) /* ilk/snb only */
 #define   PIPECONF_INTERLACE_MODE_MASK		(7 << 21)
+#define   PIPECONF_EDP_RR_MODE_SWITCH		(1 << 20)
 #define   PIPECONF_CXSR_DOWNCLOCK	(1<<16)
 #define   PIPECONF_COLOR_RANGE_SELECT	(1 << 13)
 #define   PIPECONF_BPC_MASK	(0x7 << 5)
@@ -3276,6 +3635,7 @@
 #define   SPRITE1_FLIP_DONE_INT_EN_VLV		(1UL<<30)
 #define   PIPE_CRC_ERROR_ENABLE			(1UL<<29)
 #define   PIPE_CRC_DONE_ENABLE			(1UL<<28)
+#define   PERF_COUNTER2_INTERRUPT_EN		(1UL<<27)
 #define   PIPE_GMBUS_EVENT_ENABLE		(1UL<<27)
 #define   PLANE_FLIP_DONE_INT_EN_VLV		(1UL<<26)
 #define   PIPE_HOTPLUG_INTERRUPT_ENABLE		(1UL<<26)
@@ -3287,8 +3647,10 @@
 #define   PIPE_ODD_FIELD_INTERRUPT_ENABLE	(1UL<<21)
 #define   PIPE_EVEN_FIELD_INTERRUPT_ENABLE	(1UL<<20)
 #define   PIPE_B_PSR_INTERRUPT_ENABLE_VLV	(1UL<<19)
+#define   PERF_COUNTER_INTERRUPT_EN		(1UL<<19)
 #define   PIPE_HOTPLUG_TV_INTERRUPT_ENABLE	(1UL<<18) /* pre-965 */
 #define   PIPE_START_VBLANK_INTERRUPT_ENABLE	(1UL<<18) /* 965 or later */
+#define   PIPE_FRAMESTART_INTERRUPT_ENABLE	(1UL<<17)
 #define   PIPE_VBLANK_INTERRUPT_ENABLE		(1UL<<17)
 #define   PIPEA_HBLANK_INT_EN_VLV		(1UL<<16)
 #define   PIPE_OVERLAY_UPDATED_ENABLE		(1UL<<16)
@@ -3296,6 +3658,7 @@
 #define   SPRITE0_FLIP_DONE_INT_STATUS_VLV	(1UL<<14)
 #define   PIPE_CRC_ERROR_INTERRUPT_STATUS	(1UL<<13)
 #define   PIPE_CRC_DONE_INTERRUPT_STATUS	(1UL<<12)
+#define   PERF_COUNTER2_INTERRUPT_STATUS	(1UL<<11)
 #define   PIPE_GMBUS_INTERRUPT_STATUS		(1UL<<11)
 #define   PLANE_FLIP_DONE_INT_STATUS_VLV	(1UL<<10)
 #define   PIPE_HOTPLUG_INTERRUPT_STATUS		(1UL<<10)
@@ -3304,20 +3667,25 @@
 #define   PIPE_DPST_EVENT_STATUS		(1UL<<7)
 #define   PIPE_LEGACY_BLC_EVENT_STATUS		(1UL<<6)
 #define   PIPE_A_PSR_STATUS_VLV			(1UL<<6)
+#define   PIPE_LEGACY_BLC_EVENT_STATUS		(1UL<<6)
 #define   PIPE_ODD_FIELD_INTERRUPT_STATUS	(1UL<<5)
 #define   PIPE_EVEN_FIELD_INTERRUPT_STATUS	(1UL<<4)
 #define   PIPE_B_PSR_STATUS_VLV			(1UL<<3)
+#define   PERF_COUNTER_INTERRUPT_STATUS		(1UL<<3)
 #define   PIPE_HOTPLUG_TV_INTERRUPT_STATUS	(1UL<<2) /* pre-965 */
 #define   PIPE_START_VBLANK_INTERRUPT_STATUS	(1UL<<2) /* 965 or later */
+#define   PIPE_FRAMESTART_INTERRUPT_STATUS	(1UL<<1)
 #define   PIPE_VBLANK_INTERRUPT_STATUS		(1UL<<1)
+#define   PIPE_HBLANK_INT_STATUS		(1UL<<0)
 #define   PIPE_OVERLAY_UPDATED_STATUS		(1UL<<0)
 
 #define PIPESTAT_INT_ENABLE_MASK		0x7fff0000
 #define PIPESTAT_INT_STATUS_MASK		0x0000ffff
 
-#define PIPE_A_OFFSET	0x70000
-#define PIPE_B_OFFSET	0x71000
-#define PIPE_C_OFFSET	0x72000
+#define PIPE_A_OFFSET		0x70000
+#define PIPE_B_OFFSET		0x71000
+#define PIPE_C_OFFSET		0x72000
+#define CHV_PIPE_C_OFFSET	0x74000
 /*
  * There's actually no pipe EDP. Some pipe registers have
  * simply shifted from the pipe to the transcoder, while
@@ -3355,14 +3723,25 @@
 #define   SPRITED_FLIP_DONE_INT_EN		(1<<26)
 #define   SPRITEC_FLIP_DONE_INT_EN		(1<<25)
 #define   PLANEB_FLIP_DONE_INT_EN		(1<<24)
+#define   PIPE_PSR_INT_EN			(1<<22)
 #define   PIPEA_LINE_COMPARE_INT_EN		(1<<21)
 #define   PIPEA_HLINE_INT_EN			(1<<20)
 #define   PIPEA_VBLANK_INT_EN			(1<<19)
 #define   SPRITEB_FLIP_DONE_INT_EN		(1<<18)
 #define   SPRITEA_FLIP_DONE_INT_EN		(1<<17)
 #define   PLANEA_FLIPDONE_INT_EN		(1<<16)
+#define   PIPEC_LINE_COMPARE_INT_EN		(1<<13)
+#define   PIPEC_HLINE_INT_EN			(1<<12)
+#define   PIPEC_VBLANK_INT_EN			(1<<11)
+#define   SPRITEF_FLIPDONE_INT_EN		(1<<10)
+#define   SPRITEE_FLIPDONE_INT_EN		(1<<9)
+#define   PLANEC_FLIPDONE_INT_EN		(1<<8)
 
-#define DPINVGTT				(VLV_DISPLAY_BASE + 0x7002c) /* VLV only */
+#define DPINVGTT				(VLV_DISPLAY_BASE + 0x7002c) /* VLV/CHV only */
+#define   SPRITEF_INVALID_GTT_INT_EN		(1<<27)
+#define   SPRITEE_INVALID_GTT_INT_EN		(1<<26)
+#define   PLANEC_INVALID_GTT_INT_EN		(1<<25)
+#define   CURSORC_INVALID_GTT_INT_EN		(1<<24)
 #define   CURSORB_INVALID_GTT_INT_EN		(1<<23)
 #define   CURSORA_INVALID_GTT_INT_EN		(1<<22)
 #define   SPRITED_INVALID_GTT_INT_EN		(1<<21)
@@ -3372,6 +3751,11 @@
 #define   SPRITEA_INVALID_GTT_INT_EN		(1<<17)
 #define   PLANEA_INVALID_GTT_INT_EN		(1<<16)
 #define   DPINVGTT_EN_MASK			0xff0000
+#define   DPINVGTT_EN_MASK_CHV			0xfff0000
+#define   SPRITEF_INVALID_GTT_STATUS		(1<<11)
+#define   SPRITEE_INVALID_GTT_STATUS		(1<<10)
+#define   PLANEC_INVALID_GTT_STATUS		(1<<9)
+#define   CURSORC_INVALID_GTT_STATUS		(1<<8)
 #define   CURSORB_INVALID_GTT_STATUS		(1<<7)
 #define   CURSORA_INVALID_GTT_STATUS		(1<<6)
 #define   SPRITED_INVALID_GTT_STATUS		(1<<5)
@@ -3381,6 +3765,7 @@
 #define   SPRITEA_INVALID_GTT_STATUS		(1<<1)
 #define   PLANEA_INVALID_GTT_STATUS		(1<<0)
 #define   DPINVGTT_STATUS_MASK			0xff
+#define   DPINVGTT_STATUS_MASK_CHV		0xfff
 
 #define DSPARB			0x70030
 #define   DSPARB_CSTART_MASK	(0x7f << 7)
@@ -3420,14 +3805,43 @@
 #define DDL_CURSORA_PRECISION_32	(1<<31)
 #define DDL_CURSORA_PRECISION_16	(0<<31)
 #define DDL_CURSORA_SHIFT		24
+#define DDL_SPRITEB_PRECISION_32	(1<<23)
+#define DDL_SPRITEB_PRECISION_16	(0<<23)
+#define DDL_SPRITEB_SHIFT		16
+#define DDL_SPRITEA_PRECISION_32	(1<<15)
+#define DDL_SPRITEA_PRECISION_16	(0<<15)
+#define DDL_SPRITEA_SHIFT		8
 #define DDL_PLANEA_PRECISION_32		(1<<7)
 #define DDL_PLANEA_PRECISION_16		(0<<7)
+#define DDL_PLANEA_SHIFT		0
+
 #define VLV_DDL2			(VLV_DISPLAY_BASE + 0x70054)
 #define DDL_CURSORB_PRECISION_32	(1<<31)
 #define DDL_CURSORB_PRECISION_16	(0<<31)
 #define DDL_CURSORB_SHIFT		24
+#define DDL_SPRITED_PRECISION_32	(1<<23)
+#define DDL_SPRITED_PRECISION_16	(0<<23)
+#define DDL_SPRITED_SHIFT		16
+#define DDL_SPRITEC_PRECISION_32	(1<<15)
+#define DDL_SPRITEC_PRECISION_16	(0<<15)
+#define DDL_SPRITEC_SHIFT		8
 #define DDL_PLANEB_PRECISION_32		(1<<7)
 #define DDL_PLANEB_PRECISION_16		(0<<7)
+#define DDL_PLANEB_SHIFT		0
+
+#define VLV_DDL3			(VLV_DISPLAY_BASE + 0x70058)
+#define DDL_CURSORC_PRECISION_32	(1<<31)
+#define DDL_CURSORC_PRECISION_16	(0<<31)
+#define DDL_CURSORC_SHIFT		24
+#define DDL_SPRITEF_PRECISION_32	(1<<23)
+#define DDL_SPRITEF_PRECISION_16	(0<<23)
+#define DDL_SPRITEF_SHIFT		16
+#define DDL_SPRITEE_PRECISION_32	(1<<15)
+#define DDL_SPRITEE_PRECISION_16	(0<<15)
+#define DDL_SPRITEE_SHIFT		8
+#define DDL_PLANEC_PRECISION_32		(1<<7)
+#define DDL_PLANEC_PRECISION_16		(0<<7)
+#define DDL_PLANEC_SHIFT		0
 
 /* FIFO watermark sizes etc */
 #define G4X_FIFO_LINE_SIZE	64
@@ -3535,12 +3949,13 @@
 #define   PIPE_PIXEL_MASK         0x00ffffff
 #define   PIPE_PIXEL_SHIFT        0
 /* GM45+ just has to be different */
-#define _PIPEA_FRMCOUNT_GM45	(dev_priv->info.display_mmio_offset + 0x70040)
-#define _PIPEA_FLIPCOUNT_GM45	(dev_priv->info.display_mmio_offset + 0x70044)
-#define PIPE_FRMCOUNT_GM45(pipe) _PIPE(pipe, _PIPEA_FRMCOUNT_GM45, _PIPEB_FRMCOUNT_GM45)
+#define _PIPEA_FRMCOUNT_GM45	0x70040
+#define _PIPEA_FLIPCOUNT_GM45	0x70044
+#define PIPE_FRMCOUNT_GM45(pipe) _PIPE2(pipe, _PIPEA_FRMCOUNT_GM45)
+#define PIPE_FLIPCOUNT_GM45(pipe) _PIPE2(pipe, _PIPEA_FLIPCOUNT_GM45)
 
 /* Cursor A & B regs */
-#define _CURACNTR		(dev_priv->info.display_mmio_offset + 0x70080)
+#define _CURACNTR		0x70080
 /* Old style CUR*CNTR flags (desktop 8xx) */
 #define   CURSOR_ENABLE		0x80000000
 #define   CURSOR_GAMMA_ENABLE	0x40000000
@@ -3567,28 +3982,34 @@
 #define   MCURSOR_PIPE_B	(1 << 28)
 #define   MCURSOR_GAMMA_ENABLE  (1 << 26)
 #define   CURSOR_TRICKLE_FEED_DISABLE	(1 << 14)
-#define _CURABASE		(dev_priv->info.display_mmio_offset + 0x70084)
-#define _CURAPOS		(dev_priv->info.display_mmio_offset + 0x70088)
+#define _CURABASE		0x70084
+#define _CURAPOS		0x70088
 #define   CURSOR_POS_MASK       0x007FF
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
 #define CURSIZE			0x700a0
-#define _CURBCNTR		(dev_priv->info.display_mmio_offset + 0x700c0)
-#define _CURBBASE		(dev_priv->info.display_mmio_offset + 0x700c4)
-#define _CURBPOS		(dev_priv->info.display_mmio_offset + 0x700c8)
+#define _CURBCNTR		0x700c0
+#define _CURBBASE		0x700c4
+#define _CURBPOS		0x700c8
 
 #define _CURBCNTR_IVB		0x71080
 #define _CURBBASE_IVB		0x71084
 #define _CURBPOS_IVB		0x71088
 
-#define CURCNTR(pipe) _PIPE(pipe, _CURACNTR, _CURBCNTR)
-#define CURBASE(pipe) _PIPE(pipe, _CURABASE, _CURBBASE)
-#define CURPOS(pipe) _PIPE(pipe, _CURAPOS, _CURBPOS)
+#define _CURSOR2(pipe, reg) (dev_priv->info.cursor_offsets[(pipe)] - \
+	dev_priv->info.cursor_offsets[PIPE_A] + (reg) + \
+	dev_priv->info.display_mmio_offset)
 
-#define CURCNTR_IVB(pipe) _PIPE(pipe, _CURACNTR, _CURBCNTR_IVB)
-#define CURBASE_IVB(pipe) _PIPE(pipe, _CURABASE, _CURBBASE_IVB)
-#define CURPOS_IVB(pipe) _PIPE(pipe, _CURAPOS, _CURBPOS_IVB)
+#define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR)
+#define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
+#define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
+
+#define CURSOR_A_OFFSET 0x70080
+#define CURSOR_B_OFFSET 0x700c0
+#define CHV_CURSOR_C_OFFSET 0x700e0
+#define IVB_CURSOR_B_OFFSET 0x71080
+#define IVB_CURSOR_C_OFFSET 0x72080
 
 /* Display A control */
 #define _DSPACNTR				0x70180
@@ -4093,6 +4514,7 @@
 #define  GEN8_DE_PIPE_A_IRQ		(1<<16)
 #define  GEN8_DE_PIPE_IRQ(pipe)		(1<<(16+pipe))
 #define  GEN8_GT_VECS_IRQ		(1<<6)
+#define  GEN8_GT_PM_IRQ			(1<<4)
 #define  GEN8_GT_VCS2_IRQ		(1<<3)
 #define  GEN8_GT_VCS1_IRQ		(1<<2)
 #define  GEN8_GT_BCS_IRQ		(1<<1)
@@ -4120,7 +4542,7 @@
 #define  GEN8_PIPE_SPRITE_FAULT		(1 << 9)
 #define  GEN8_PIPE_PRIMARY_FAULT	(1 << 8)
 #define  GEN8_PIPE_SPRITE_FLIP_DONE	(1 << 5)
-#define  GEN8_PIPE_FLIP_DONE		(1 << 4)
+#define  GEN8_PIPE_PRIMARY_FLIP_DONE	(1 << 4)
 #define  GEN8_PIPE_SCAN_LINE_EVENT	(1 << 2)
 #define  GEN8_PIPE_VSYNC		(1 << 1)
 #define  GEN8_PIPE_VBLANK		(1 << 0)
@@ -4832,6 +5254,8 @@
 #define  PORT_TRANS_SEL_CPT(pipe)	((pipe) << 29)
 #define  PORT_TO_PIPE(val)	(((val) & (1<<30)) >> 30)
 #define  PORT_TO_PIPE_CPT(val)	(((val) & PORT_TRANS_SEL_MASK) >> 29)
+#define  SDVO_PORT_TO_PIPE_CHV(val)	(((val) & (3<<24)) >> 24)
+#define  DP_PORT_TO_PIPE_CHV(val)	(((val) & (3<<16)) >> 16)
 
 #define TRANS_DP_CTL_A		0xe0300
 #define TRANS_DP_CTL_B		0xe1300
@@ -4888,6 +5312,8 @@
 
 #define  EDP_LINK_TRAIN_VOL_EMP_MASK_IVB	(0x3f<<22)
 
+#define  VLV_PMWGICZ				0x1300a4
+
 #define  FORCEWAKE				0xA18C
 #define  FORCEWAKE_VLV				0x1300b0
 #define  FORCEWAKE_ACK_VLV			0x1300b4
@@ -4896,15 +5322,22 @@
 #define  FORCEWAKE_ACK_HSW			0x130044
 #define  FORCEWAKE_ACK				0x130090
 #define  VLV_GTLC_WAKE_CTRL			0x130090
+#define   VLV_GTLC_RENDER_CTX_EXISTS		(1 << 25)
+#define   VLV_GTLC_MEDIA_CTX_EXISTS		(1 << 24)
+#define   VLV_GTLC_ALLOWWAKEREQ			(1 << 0)
+
 #define  VLV_GTLC_PW_STATUS			0x130094
-#define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
-#define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define   VLV_GTLC_ALLOWWAKEACK			(1 << 0)
+#define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
+#define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
+#define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
 #define  FORCEWAKE_MT_ACK			0x130040
 #define  ECOBUS					0xa180
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
+#define  VLV_SPAREG2H				0xA194
 
 #define  GTFIFODBG				0x120000
 #define    GT_FIFO_SBDROPERR			(1<<6)
@@ -4924,6 +5357,7 @@
 #define  HSW_EDRAM_PRESENT			0x120010
 
 #define GEN6_UCGCTL1				0x9400
+# define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 
@@ -4934,12 +5368,19 @@
 # define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE		(1 << 12)
 # define GEN6_RCCUNIT_CLOCK_GATE_DISABLE		(1 << 11)
 
+#define GEN6_UCGCTL3				0x9408
+
 #define GEN7_UCGCTL4				0x940c
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
 
+#define GEN6_RCGCTL1				0x9410
+#define GEN6_RCGCTL2				0x9414
+#define GEN6_RSTCTL				0x9420
+
 #define GEN8_UCGCTL6				0x9430
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE	(1<<14)
 
+#define GEN6_GFXPAUSE				0xA000
 #define GEN6_RPNSWREQ				0xA008
 #define   GEN6_TURBO_DISABLE			(1<<31)
 #define   GEN6_FREQUENCY(x)			((x)<<25)
@@ -4992,6 +5433,9 @@
 #define GEN6_RP_UP_EI				0xA068
 #define GEN6_RP_DOWN_EI				0xA06C
 #define GEN6_RP_IDLE_HYSTERSIS			0xA070
+#define GEN6_RPDEUHWTC				0xA080
+#define GEN6_RPDEUC				0xA084
+#define GEN6_RPDEUCSW				0xA088
 #define GEN6_RC_STATE				0xA094
 #define GEN6_RC1_WAKE_RATE_LIMIT		0xA098
 #define GEN6_RC6_WAKE_RATE_LIMIT		0xA09C
@@ -4999,11 +5443,15 @@
 #define GEN6_RC_EVALUATION_INTERVAL		0xA0A8
 #define GEN6_RC_IDLE_HYSTERSIS			0xA0AC
 #define GEN6_RC_SLEEP				0xA0B0
+#define GEN6_RCUBMABDTMR			0xA0B0
 #define GEN6_RC1e_THRESHOLD			0xA0B4
 #define GEN6_RC6_THRESHOLD			0xA0B8
 #define GEN6_RC6p_THRESHOLD			0xA0BC
+#define VLV_RCEDATA				0xA0BC
 #define GEN6_RC6pp_THRESHOLD			0xA0C0
 #define GEN6_PMINTRMSK				0xA168
+#define GEN8_PMINTR_REDIRECT_TO_NON_DISP	(1<<31)
+#define VLV_PWRDWNUPCTL				0xA294
 
 #define GEN6_PMISR				0x44020
 #define GEN6_PMIMR				0x44024 /* rps_lock */
@@ -5020,6 +5468,9 @@
 						 GEN6_PM_RP_DOWN_THRESHOLD | \
 						 GEN6_PM_RP_DOWN_TIMEOUT)
 
+#define GEN7_GT_SCRATCH_BASE			0x4F100
+#define GEN7_GT_SCRATCH_REG_NUM			8
+
 #define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
 #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
@@ -5030,6 +5481,9 @@
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
+#define VLV_GT_RENDER_RC6			0x138108
+#define VLV_GT_MEDIA_RC6			0x13810C
+
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
 

diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 56785e8..043123c 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c

@@ -328,8 +328,6 @@
 		}
 	}
 
-	intel_disable_gt_powersave(dev);
-
 	/* Cache mode state */
 	if (INTEL_INFO(dev)->gen < 7)
 		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 9c57029..86ce39a 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c

@@ -186,7 +186,7 @@
 	struct drm_minor *dminor = dev_to_drm_minor(dev);
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	u32 *temp = NULL; /* Just here to make handling failures easy */
 	int slice = (int)(uintptr_t)attr->private;
 	int ret;
@@ -263,6 +263,8 @@
 
 	flush_delayed_work(&dev_priv->rps.delayed_resume_work);
 
+	intel_runtime_pm_get(dev_priv);
+
 	mutex_lock(&dev_priv->rps.hw_lock);
 	if (IS_VALLEYVIEW(dev_priv->dev)) {
 		u32 freq;
@@ -273,6 +275,8 @@
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 
+	intel_runtime_pm_put(dev_priv);
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
 }
 

diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 23c26f1..f5aa006 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h

@@ -7,6 +7,7 @@
 
 #include <drm/drmP.h>
 #include "i915_drv.h"
+#include "intel_drv.h"
 #include "intel_ringbuffer.h"
 
 #undef TRACE_SYSTEM
@@ -14,6 +15,80 @@
 #define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE i915_trace
 
+/* pipe updates */
+
+TRACE_EVENT(i915_pipe_update_start,
+	    TP_PROTO(struct intel_crtc *crtc, u32 min, u32 max),
+	    TP_ARGS(crtc, min, max),
+
+	    TP_STRUCT__entry(
+			     __field(enum pipe, pipe)
+			     __field(u32, frame)
+			     __field(u32, scanline)
+			     __field(u32, min)
+			     __field(u32, max)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pipe = crtc->pipe;
+			   __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev,
+										       crtc->pipe);
+			   __entry->scanline = intel_get_crtc_scanline(crtc);
+			   __entry->min = min;
+			   __entry->max = max;
+			   ),
+
+	    TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
+		      pipe_name(__entry->pipe), __entry->frame,
+		       __entry->scanline, __entry->min, __entry->max)
+);
+
+TRACE_EVENT(i915_pipe_update_vblank_evaded,
+	    TP_PROTO(struct intel_crtc *crtc, u32 min, u32 max, u32 frame),
+	    TP_ARGS(crtc, min, max, frame),
+
+	    TP_STRUCT__entry(
+			     __field(enum pipe, pipe)
+			     __field(u32, frame)
+			     __field(u32, scanline)
+			     __field(u32, min)
+			     __field(u32, max)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pipe = crtc->pipe;
+			   __entry->frame = frame;
+			   __entry->scanline = intel_get_crtc_scanline(crtc);
+			   __entry->min = min;
+			   __entry->max = max;
+			   ),
+
+	    TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
+		      pipe_name(__entry->pipe), __entry->frame,
+		       __entry->scanline, __entry->min, __entry->max)
+);
+
+TRACE_EVENT(i915_pipe_update_end,
+	    TP_PROTO(struct intel_crtc *crtc, u32 frame),
+	    TP_ARGS(crtc, frame),
+
+	    TP_STRUCT__entry(
+			     __field(enum pipe, pipe)
+			     __field(u32, frame)
+			     __field(u32, scanline)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pipe = crtc->pipe;
+			   __entry->frame = frame;
+			   __entry->scanline = intel_get_crtc_scanline(crtc);
+			   ),
+
+	    TP_printk("pipe %c, frame=%u, scanline=%u",
+		      pipe_name(__entry->pipe), __entry->frame,
+		      __entry->scanline)
+);
+
 /* object tracking */
 
 TRACE_EVENT(i915_gem_object_create,
@@ -251,8 +326,8 @@
 );
 
 TRACE_EVENT(i915_gem_ring_sync_to,
-	    TP_PROTO(struct intel_ring_buffer *from,
-		     struct intel_ring_buffer *to,
+	    TP_PROTO(struct intel_engine_cs *from,
+		     struct intel_engine_cs *to,
 		     u32 seqno),
 	    TP_ARGS(from, to, seqno),
 
@@ -277,7 +352,7 @@
 );
 
 TRACE_EVENT(i915_gem_ring_dispatch,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno, u32 flags),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno, u32 flags),
 	    TP_ARGS(ring, seqno, flags),
 
 	    TP_STRUCT__entry(
@@ -300,7 +375,7 @@
 );
 
 TRACE_EVENT(i915_gem_ring_flush,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 invalidate, u32 flush),
 	    TP_ARGS(ring, invalidate, flush),
 
 	    TP_STRUCT__entry(
@@ -323,7 +398,7 @@
 );
 
 DECLARE_EVENT_CLASS(i915_gem_request,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
 	    TP_ARGS(ring, seqno),
 
 	    TP_STRUCT__entry(
@@ -343,12 +418,12 @@
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
 	    TP_ARGS(ring, seqno)
 );
 
 TRACE_EVENT(i915_gem_request_complete,
-	    TP_PROTO(struct intel_ring_buffer *ring),
+	    TP_PROTO(struct intel_engine_cs *ring),
 	    TP_ARGS(ring),
 
 	    TP_STRUCT__entry(
@@ -368,12 +443,12 @@
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_retire,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
 	    TP_ARGS(ring, seqno)
 );
 
 TRACE_EVENT(i915_gem_request_wait_begin,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
 	    TP_ARGS(ring, seqno),
 
 	    TP_STRUCT__entry(
@@ -402,12 +477,12 @@
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
+	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
 	    TP_ARGS(ring, seqno)
 );
 
 DECLARE_EVENT_CLASS(i915_ring,
-	    TP_PROTO(struct intel_ring_buffer *ring),
+	    TP_PROTO(struct intel_engine_cs *ring),
 	    TP_ARGS(ring),
 
 	    TP_STRUCT__entry(
@@ -424,12 +499,12 @@
 );
 
 DEFINE_EVENT(i915_ring, i915_ring_wait_begin,
-	    TP_PROTO(struct intel_ring_buffer *ring),
+	    TP_PROTO(struct intel_engine_cs *ring),
 	    TP_ARGS(ring)
 );
 
 DEFINE_EVENT(i915_ring, i915_ring_wait_end,
-	    TP_PROTO(struct intel_ring_buffer *ring),
+	    TP_PROTO(struct intel_engine_cs *ring),
 	    TP_ARGS(ring)
 );
 

diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index aff4a11..1ee98f1 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c

@@ -49,13 +49,19 @@
 	total = bdb->bdb_size;
 
 	/* walk the sections looking for section_id */
-	while (index < total) {
+	while (index + 3 < total) {
 		current_id = *(base + index);
 		index++;
+
 		current_size = *((u16 *)(base + index));
 		index += 2;
+
+		if (index + current_size > total)
+			return NULL;
+
 		if (current_id == section_id)
 			return base + index;
+
 		index += current_size;
 	}
 
@@ -206,7 +212,7 @@
 	const struct lvds_dvo_timing *panel_dvo_timing;
 	const struct lvds_fp_timing *fp_timing;
 	struct drm_display_mode *panel_fixed_mode;
-	int i, downclock;
+	int i, downclock, drrs_mode;
 
 	lvds_options = find_section(bdb, BDB_LVDS_OPTIONS);
 	if (!lvds_options)
@@ -218,6 +224,28 @@
 
 	panel_type = lvds_options->panel_type;
 
+	drrs_mode = (lvds_options->dps_panel_type_bits
+				>> (panel_type * 2)) & MODE_MASK;
+	/*
+	 * VBT has static DRRS = 0 and seamless DRRS = 2.
+	 * The below piece of code is required to adjust vbt.drrs_type
+	 * to match the enum drrs_support_type.
+	 */
+	switch (drrs_mode) {
+	case 0:
+		dev_priv->vbt.drrs_type = STATIC_DRRS_SUPPORT;
+		DRM_DEBUG_KMS("DRRS supported mode is static\n");
+		break;
+	case 2:
+		dev_priv->vbt.drrs_type = SEAMLESS_DRRS_SUPPORT;
+		DRM_DEBUG_KMS("DRRS supported mode is seamless\n");
+		break;
+	default:
+		dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED;
+		DRM_DEBUG_KMS("DRRS not supported (VBT input)\n");
+		break;
+	}
+
 	lvds_lfp_data = find_section(bdb, BDB_LVDS_LFP_DATA);
 	if (!lvds_lfp_data)
 		return;
@@ -526,6 +554,16 @@
 
 	if (driver->dual_frequency)
 		dev_priv->render_reclock_avail = true;
+
+	DRM_DEBUG_KMS("DRRS State Enabled:%d\n", driver->drrs_enabled);
+	/*
+	 * If DRRS is not supported, drrs_type has to be set to 0.
+	 * This is because, VBT is configured in such a way that
+	 * static DRRS is 0 and DRRS not supported is represented by
+	 * driver->drrs_enabled=false
+	 */
+	if (!driver->drrs_enabled)
+		dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED;
 }
 
 static void
@@ -628,19 +666,221 @@
 	}
 }
 
+static u8 *goto_next_sequence(u8 *data, int *size)
+{
+	u16 len;
+	int tmp = *size;
+
+	if (--tmp < 0)
+		return NULL;
+
+	/* goto first element */
+	data++;
+	while (1) {
+		switch (*data) {
+		case MIPI_SEQ_ELEM_SEND_PKT:
+			/*
+			 * skip by this element payload size
+			 * skip elem id, command flag and data type
+			 */
+			tmp -= 5;
+			if (tmp < 0)
+				return NULL;
+
+			data += 3;
+			len = *((u16 *)data);
+
+			tmp -= len;
+			if (tmp < 0)
+				return NULL;
+
+			/* skip by len */
+			data = data + 2 + len;
+			break;
+		case MIPI_SEQ_ELEM_DELAY:
+			/* skip by elem id, and delay is 4 bytes */
+			tmp -= 5;
+			if (tmp < 0)
+				return NULL;
+
+			data += 5;
+			break;
+		case MIPI_SEQ_ELEM_GPIO:
+			tmp -= 3;
+			if (tmp < 0)
+				return NULL;
+
+			data += 3;
+			break;
+		default:
+			DRM_ERROR("Unknown element\n");
+			return NULL;
+		}
+
+		/* end of sequence ? */
+		if (*data == 0)
+			break;
+	}
+
+	/* goto next sequence or end of block byte */
+	if (--tmp < 0)
+		return NULL;
+
+	data++;
+
+	/* update amount of data left for the sequence block to be parsed */
+	*size = tmp;
+	return data;
+}
+
 static void
 parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 {
-	struct bdb_mipi *mipi;
+	struct bdb_mipi_config *start;
+	struct bdb_mipi_sequence *sequence;
+	struct mipi_config *config;
+	struct mipi_pps_data *pps;
+	u8 *data, *seq_data;
+	int i, panel_id, seq_size;
+	u16 block_size;
 
-	mipi = find_section(bdb, BDB_MIPI_CONFIG);
-	if (!mipi) {
-		DRM_DEBUG_KMS("No MIPI BDB found");
+	/* parse MIPI blocks only if LFP type is MIPI */
+	if (!dev_priv->vbt.has_mipi)
+		return;
+
+	/* Initialize this to undefined indicating no generic MIPI support */
+	dev_priv->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID;
+
+	/* Block #40 is already parsed and panel_fixed_mode is
+	 * stored in dev_priv->lfp_lvds_vbt_mode
+	 * resuse this when needed
+	 */
+
+	/* Parse #52 for panel index used from panel_type already
+	 * parsed
+	 */
+	start = find_section(bdb, BDB_MIPI_CONFIG);
+	if (!start) {
+		DRM_DEBUG_KMS("No MIPI config BDB found");
 		return;
 	}
 
-	/* XXX: add more info */
+	DRM_DEBUG_DRIVER("Found MIPI Config block, panel index = %d\n",
+								panel_type);
+
+	/*
+	 * get hold of the correct configuration block and pps data as per
+	 * the panel_type as index
+	 */
+	config = &start->config[panel_type];
+	pps = &start->pps[panel_type];
+
+	/* store as of now full data. Trim when we realise all is not needed */
+	dev_priv->vbt.dsi.config = kmemdup(config, sizeof(struct mipi_config), GFP_KERNEL);
+	if (!dev_priv->vbt.dsi.config)
+		return;
+
+	dev_priv->vbt.dsi.pps = kmemdup(pps, sizeof(struct mipi_pps_data), GFP_KERNEL);
+	if (!dev_priv->vbt.dsi.pps) {
+		kfree(dev_priv->vbt.dsi.config);
+		return;
+	}
+
+	/* We have mandatory mipi config blocks. Initialize as generic panel */
 	dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID;
+
+	/* Check if we have sequence block as well */
+	sequence = find_section(bdb, BDB_MIPI_SEQUENCE);
+	if (!sequence) {
+		DRM_DEBUG_KMS("No MIPI Sequence found, parsing complete\n");
+		return;
+	}
+
+	DRM_DEBUG_DRIVER("Found MIPI sequence block\n");
+
+	block_size = get_blocksize(sequence);
+
+	/*
+	 * parse the sequence block for individual sequences
+	 */
+	dev_priv->vbt.dsi.seq_version = sequence->version;
+
+	seq_data = &sequence->data[0];
+
+	/*
+	 * sequence block is variable length and hence we need to parse and
+	 * get the sequence data for specific panel id
+	 */
+	for (i = 0; i < MAX_MIPI_CONFIGURATIONS; i++) {
+		panel_id = *seq_data;
+		seq_size = *((u16 *) (seq_data + 1));
+		if (panel_id == panel_type)
+			break;
+
+		/* skip the sequence including seq header of 3 bytes */
+		seq_data = seq_data + 3 + seq_size;
+		if ((seq_data - &sequence->data[0]) > block_size) {
+			DRM_ERROR("Sequence start is beyond sequence block size, corrupted sequence block\n");
+			return;
+		}
+	}
+
+	if (i == MAX_MIPI_CONFIGURATIONS) {
+		DRM_ERROR("Sequence block detected but no valid configuration\n");
+		return;
+	}
+
+	/* check if found sequence is completely within the sequence block
+	 * just being paranoid */
+	if (seq_size > block_size) {
+		DRM_ERROR("Corrupted sequence/size, bailing out\n");
+		return;
+	}
+
+	/* skip the panel id(1 byte) and seq size(2 bytes) */
+	dev_priv->vbt.dsi.data = kmemdup(seq_data + 3, seq_size, GFP_KERNEL);
+	if (!dev_priv->vbt.dsi.data)
+		return;
+
+	/*
+	 * loop into the sequence data and split into multiple sequneces
+	 * There are only 5 types of sequences as of now
+	 */
+	data = dev_priv->vbt.dsi.data;
+	dev_priv->vbt.dsi.size = seq_size;
+
+	/* two consecutive 0x00 indicate end of all sequences */
+	while (1) {
+		int seq_id = *data;
+		if (MIPI_SEQ_MAX > seq_id && seq_id > MIPI_SEQ_UNDEFINED) {
+			dev_priv->vbt.dsi.sequence[seq_id] = data;
+			DRM_DEBUG_DRIVER("Found mipi sequence - %d\n", seq_id);
+		} else {
+			DRM_ERROR("undefined sequence\n");
+			goto err;
+		}
+
+		/* partial parsing to skip elements */
+		data = goto_next_sequence(data, &seq_size);
+
+		if (data == NULL) {
+			DRM_ERROR("Sequence elements going beyond block itself. Sequence block parsing failed\n");
+			goto err;
+		}
+
+		if (*data == 0)
+			break; /* end of sequence reached */
+	}
+
+	DRM_DEBUG_DRIVER("MIPI related vbt parsing complete\n");
+	return;
+err:
+	kfree(dev_priv->vbt.dsi.data);
+	dev_priv->vbt.dsi.data = NULL;
+
+	/* error during parsing so set all pointers to null
+	 * because of partial parsing */
+	memset(dev_priv->vbt.dsi.sequence, 0, MIPI_SEQ_MAX);
 }
 
 static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
@@ -823,6 +1063,15 @@
 			/* skip the device block if device type is invalid */
 			continue;
 		}
+
+		if (p_child->common.dvo_port >= DVO_PORT_MIPIA
+		    && p_child->common.dvo_port <= DVO_PORT_MIPID
+		    &&p_child->common.device_type & DEVICE_TYPE_MIPI_OUTPUT) {
+			DRM_DEBUG_KMS("Found MIPI as LFP\n");
+			dev_priv->vbt.has_mipi = 1;
+			dev_priv->vbt.dsi.port = p_child->common.dvo_port;
+		}
+
 		child_dev_ptr = dev_priv->vbt.child_dev + count;
 		count++;
 		memcpy((void *)child_dev_ptr, (void *)p_child,
@@ -893,6 +1142,46 @@
 	{ }
 };
 
+static struct bdb_header *validate_vbt(char *base, size_t size,
+				       struct vbt_header *vbt,
+				       const char *source)
+{
+	size_t offset;
+	struct bdb_header *bdb;
+
+	if (vbt == NULL) {
+		DRM_DEBUG_DRIVER("VBT signature missing\n");
+		return NULL;
+	}
+
+	offset = (char *)vbt - base;
+	if (offset + sizeof(struct vbt_header) > size) {
+		DRM_DEBUG_DRIVER("VBT header incomplete\n");
+		return NULL;
+	}
+
+	if (memcmp(vbt->signature, "$VBT", 4)) {
+		DRM_DEBUG_DRIVER("VBT invalid signature\n");
+		return NULL;
+	}
+
+	offset += vbt->bdb_offset;
+	if (offset + sizeof(struct bdb_header) > size) {
+		DRM_DEBUG_DRIVER("BDB header incomplete\n");
+		return NULL;
+	}
+
+	bdb = (struct bdb_header *)(base + offset);
+	if (offset + bdb->bdb_size > size) {
+		DRM_DEBUG_DRIVER("BDB incomplete\n");
+		return NULL;
+	}
+
+	DRM_DEBUG_KMS("Using VBT from %s: %20s\n",
+		      source, vbt->signature);
+	return bdb;
+}
+
 /**
  * intel_parse_bios - find VBT and initialize settings from the BIOS
  * @dev: DRM device
@@ -916,20 +1205,13 @@
 	init_vbt_defaults(dev_priv);
 
 	/* XXX Should this validation be moved to intel_opregion.c? */
-	if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt) {
-		struct vbt_header *vbt = dev_priv->opregion.vbt;
-		if (memcmp(vbt->signature, "$VBT", 4) == 0) {
-			DRM_DEBUG_KMS("Using VBT from OpRegion: %20s\n",
-					 vbt->signature);
-			bdb = (struct bdb_header *)((char *)vbt + vbt->bdb_offset);
-		} else
-			dev_priv->opregion.vbt = NULL;
-	}
+	if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt)
+		bdb = validate_vbt((char *)dev_priv->opregion.header, OPREGION_SIZE,
+				   (struct vbt_header *)dev_priv->opregion.vbt,
+				   "OpRegion");
 
 	if (bdb == NULL) {
-		struct vbt_header *vbt = NULL;
-		size_t size;
-		int i;
+		size_t i, size;
 
 		bios = pci_map_rom(pdev, &size);
 		if (!bios)
@@ -937,19 +1219,18 @@
 
 		/* Scour memory looking for the VBT signature */
 		for (i = 0; i + 4 < size; i++) {
-			if (!memcmp(bios + i, "$VBT", 4)) {
-				vbt = (struct vbt_header *)(bios + i);
+			if (memcmp(bios + i, "$VBT", 4) == 0) {
+				bdb = validate_vbt(bios, size,
+						   (struct vbt_header *)(bios + i),
+						   "PCI ROM");
 				break;
 			}
 		}
 
-		if (!vbt) {
-			DRM_DEBUG_DRIVER("VBT signature missing\n");
+		if (!bdb) {
 			pci_unmap_rom(pdev, bios);
 			return -1;
 		}
-
-		bdb = (struct bdb_header *)(bios + i + vbt->bdb_offset);
 	}
 
 	/* Grab useful general definitions */

diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index f27f7b2..b986677 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h

@@ -282,6 +282,9 @@
 	union child_device_config devices[0];
 } __packed;
 
+/* Mask for DRRS / Panel Channel / SSC / BLT control bits extraction */
+#define MODE_MASK		0x3
+
 struct bdb_lvds_options {
 	u8 panel_type;
 	u8 rsvd1;
@@ -294,6 +297,18 @@
 	u8 lvds_edid:1;
 	u8 rsvd2:1;
 	u8 rsvd4;
+	/* LVDS Panel channel bits stored here */
+	u32 lvds_panel_channel_bits;
+	/* LVDS SSC (Spread Spectrum Clock) bits stored here. */
+	u16 ssc_bits;
+	u16 ssc_freq;
+	u16 ssc_ddt;
+	/* Panel color depth defined here */
+	u16 panel_color_depth;
+	/* LVDS panel type bits stored here */
+	u32 dps_panel_type_bits;
+	/* LVDS backlight control type bits stored here */
+	u32 blt_control_type_bits;
 } __packed;
 
 /* LFP pointer table contains entries to the struct below */
@@ -482,6 +497,20 @@
 
 	u8 hdmi_termination;
 	u8 custom_vbt_version;
+	/* Driver features data block */
+	u16 rmpm_enabled:1;
+	u16 s2ddt_enabled:1;
+	u16 dpst_enabled:1;
+	u16 bltclt_enabled:1;
+	u16 adb_enabled:1;
+	u16 drrs_enabled:1;
+	u16 grs_enabled:1;
+	u16 gpmt_enabled:1;
+	u16 tbt_enabled:1;
+	u16 psr_enabled:1;
+	u16 ips_enabled:1;
+	u16 reserved3:4;
+	u16 pc_feature_valid:1;
 } __packed;
 
 #define EDP_18BPP	0
@@ -714,6 +743,10 @@
 #define DVO_PORT_DPC	8
 #define DVO_PORT_DPD	9
 #define DVO_PORT_DPA	10
+#define DVO_PORT_MIPIA	21
+#define DVO_PORT_MIPIB	22
+#define DVO_PORT_MIPIC	23
+#define DVO_PORT_MIPID	24
 
 /* Block 52 contains MIPI Panel info
  * 6 such enteries will there. Index into correct
@@ -870,4 +903,35 @@
 	u8 data[0];
 };
 
+/* MIPI Sequnece Block definitions */
+enum mipi_seq {
+	MIPI_SEQ_UNDEFINED = 0,
+	MIPI_SEQ_ASSERT_RESET,
+	MIPI_SEQ_INIT_OTP,
+	MIPI_SEQ_DISPLAY_ON,
+	MIPI_SEQ_DISPLAY_OFF,
+	MIPI_SEQ_DEASSERT_RESET,
+	MIPI_SEQ_MAX
+};
+
+enum mipi_seq_element {
+	MIPI_SEQ_ELEM_UNDEFINED = 0,
+	MIPI_SEQ_ELEM_SEND_PKT,
+	MIPI_SEQ_ELEM_DELAY,
+	MIPI_SEQ_ELEM_GPIO,
+	MIPI_SEQ_ELEM_STATUS,
+	MIPI_SEQ_ELEM_MAX
+};
+
+enum mipi_gpio_pin_index {
+	MIPI_GPIO_UNDEFINED = 0,
+	MIPI_GPIO_PANEL_ENABLE,
+	MIPI_GPIO_BL_ENABLE,
+	MIPI_GPIO_PWM_ENABLE,
+	MIPI_GPIO_RESET_N,
+	MIPI_GPIO_PWR_DOWN_R,
+	MIPI_GPIO_STDBY_RST_N,
+	MIPI_GPIO_MAX
+};
+
 #endif /* _I830_BIOS_H_ */

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index aa5a3dc..5a045d3 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c

@@ -144,28 +144,49 @@
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
-	u32 temp;
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
+	u32 adpa;
 
-	temp = I915_READ(crt->adpa_reg);
-	temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
-	temp &= ~ADPA_DAC_ENABLE;
+	if (INTEL_INFO(dev)->gen >= 5)
+		adpa = ADPA_HOTPLUG_BITS;
+	else
+		adpa = 0;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
+		adpa |= ADPA_HSYNC_ACTIVE_HIGH;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
+		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
+
+	/* For CPT allow 3 pipe config, for others just use A or B */
+	if (HAS_PCH_LPT(dev))
+		; /* Those bits don't exist here */
+	else if (HAS_PCH_CPT(dev))
+		adpa |= PORT_TRANS_SEL_CPT(crtc->pipe);
+	else if (crtc->pipe == 0)
+		adpa |= ADPA_PIPE_A_SELECT;
+	else
+		adpa |= ADPA_PIPE_B_SELECT;
+
+	if (!HAS_PCH_SPLIT(dev))
+		I915_WRITE(BCLRPAT(crtc->pipe), 0);
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		temp |= ADPA_DAC_ENABLE;
+		adpa |= ADPA_DAC_ENABLE;
 		break;
 	case DRM_MODE_DPMS_STANDBY:
-		temp |= ADPA_DAC_ENABLE | ADPA_HSYNC_CNTL_DISABLE;
+		adpa |= ADPA_DAC_ENABLE | ADPA_HSYNC_CNTL_DISABLE;
 		break;
 	case DRM_MODE_DPMS_SUSPEND:
-		temp |= ADPA_DAC_ENABLE | ADPA_VSYNC_CNTL_DISABLE;
+		adpa |= ADPA_DAC_ENABLE | ADPA_VSYNC_CNTL_DISABLE;
 		break;
 	case DRM_MODE_DPMS_OFF:
-		temp |= ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE;
+		adpa |= ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE;
 		break;
 	}
 
-	I915_WRITE(crt->adpa_reg, temp);
+	I915_WRITE(crt->adpa_reg, adpa);
 }
 
 static void intel_disable_crt(struct intel_encoder *encoder)
@@ -274,42 +295,6 @@
 	return true;
 }
 
-static void intel_crt_mode_set(struct intel_encoder *encoder)
-{
-
-	struct drm_device *dev = encoder->base.dev;
-	struct intel_crt *crt = intel_encoder_to_crt(encoder);
-	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
-	u32 adpa;
-
-	if (INTEL_INFO(dev)->gen >= 5)
-		adpa = ADPA_HOTPLUG_BITS;
-	else
-		adpa = 0;
-
-	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
-		adpa |= ADPA_HSYNC_ACTIVE_HIGH;
-	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
-		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
-
-	/* For CPT allow 3 pipe config, for others just use A or B */
-	if (HAS_PCH_LPT(dev))
-		; /* Those bits don't exist here */
-	else if (HAS_PCH_CPT(dev))
-		adpa |= PORT_TRANS_SEL_CPT(crtc->pipe);
-	else if (crtc->pipe == 0)
-		adpa |= ADPA_PIPE_A_SELECT;
-	else
-		adpa |= ADPA_PIPE_B_SELECT;
-
-	if (!HAS_PCH_SPLIT(dev))
-		I915_WRITE(BCLRPAT(crtc->pipe), 0);
-
-	I915_WRITE(crt->adpa_reg, adpa);
-}
-
 static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -645,11 +630,12 @@
 	enum intel_display_power_domain power_domain;
 	enum drm_connector_status status;
 	struct intel_load_detect_pipe tmp;
+	struct drm_modeset_acquire_ctx ctx;
 
 	intel_runtime_pm_get(dev_priv);
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
-		      connector->base.id, drm_get_connector_name(connector),
+		      connector->base.id, connector->name,
 		      force);
 
 	power_domain = intel_display_port_power_domain(intel_encoder);
@@ -688,12 +674,12 @@
 	}
 
 	/* for pre-945g platforms use load detect */
-	if (intel_get_load_detect_pipe(connector, NULL, &tmp)) {
+	if (intel_get_load_detect_pipe(connector, NULL, &tmp, &ctx)) {
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
 		else
 			status = intel_crt_load_detect(crt);
-		intel_release_load_detect_pipe(connector, &tmp);
+		intel_release_load_detect_pipe(connector, &tmp, &ctx);
 	} else
 		status = connector_status_unknown;
 
@@ -867,7 +853,6 @@
 		crt->adpa_reg = ADPA;
 
 	crt->base.compute_config = intel_crt_compute_config;
-	crt->base.mode_set = intel_crt_mode_set;
 	crt->base.disable = intel_disable_crt;
 	crt->base.enable = intel_enable_crt;
 	if (I915_HAS_HOTPLUG(dev))

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 0ad4e96..b17b9c7 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c

@@ -364,55 +364,6 @@
 	DRM_ERROR("FDI link training failed!\n");
 }
 
-static void intel_ddi_mode_set(struct intel_encoder *encoder)
-{
-	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
-	int port = intel_ddi_get_encoder_port(encoder);
-	int pipe = crtc->pipe;
-	int type = encoder->type;
-	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
-
-	DRM_DEBUG_KMS("Preparing DDI mode on port %c, pipe %c\n",
-		      port_name(port), pipe_name(pipe));
-
-	crtc->eld_vld = false;
-	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-		struct intel_digital_port *intel_dig_port =
-			enc_to_dig_port(&encoder->base);
-
-		intel_dp->DP = intel_dig_port->saved_port_bits |
-			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
-		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
-
-		if (intel_dp->has_audio) {
-			DRM_DEBUG_DRIVER("DP audio on pipe %c on DDI\n",
-					 pipe_name(crtc->pipe));
-
-			/* write eld */
-			DRM_DEBUG_DRIVER("DP audio: write eld information\n");
-			intel_write_eld(&encoder->base, adjusted_mode);
-		}
-	} else if (type == INTEL_OUTPUT_HDMI) {
-		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-
-		if (intel_hdmi->has_audio) {
-			/* Proper support for digital audio needs a new logic
-			 * and a new set of registers, so we leave it for future
-			 * patch bombing.
-			 */
-			DRM_DEBUG_DRIVER("HDMI audio on pipe %c on DDI\n",
-					 pipe_name(crtc->pipe));
-
-			/* write eld */
-			DRM_DEBUG_DRIVER("HDMI audio: write eld information\n");
-			intel_write_eld(&encoder->base, adjusted_mode);
-		}
-
-		intel_hdmi->set_infoframes(&encoder->base, adjusted_mode);
-	}
-}
-
 static struct intel_encoder *
 intel_ddi_get_crtc_encoder(struct drm_crtc *crtc)
 {
@@ -1062,9 +1013,7 @@
 	}
 
 	if (type == INTEL_OUTPUT_HDMI) {
-		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-
-		if (intel_hdmi->has_hdmi_sink)
+		if (intel_crtc->config.has_hdmi_sink)
 			temp |= TRANS_DDI_MODE_SELECT_HDMI;
 		else
 			temp |= TRANS_DDI_MODE_SELECT_DVI;
@@ -1293,28 +1242,48 @@
 static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
-	struct drm_crtc *crtc = encoder->crtc;
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
 	int type = intel_encoder->type;
 
+	if (crtc->config.has_audio) {
+		DRM_DEBUG_DRIVER("Audio on pipe %c on DDI\n",
+				 pipe_name(crtc->pipe));
+
+		/* write eld */
+		DRM_DEBUG_DRIVER("DDI audio: write eld information\n");
+		intel_write_eld(encoder, &crtc->config.adjusted_mode);
+	}
+
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 		intel_edp_panel_on(intel_dp);
 	}
 
-	WARN_ON(intel_crtc->ddi_pll_sel == PORT_CLK_SEL_NONE);
-	I915_WRITE(PORT_CLK_SEL(port), intel_crtc->ddi_pll_sel);
+	WARN_ON(crtc->ddi_pll_sel == PORT_CLK_SEL_NONE);
+	I915_WRITE(PORT_CLK_SEL(port), crtc->ddi_pll_sel);
 
 	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+		struct intel_digital_port *intel_dig_port =
+			enc_to_dig_port(encoder);
+
+		intel_dp->DP = intel_dig_port->saved_port_bits |
+			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
+		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_complete_link_train(intel_dp);
 		if (port != PORT_A)
 			intel_dp_stop_link_train(intel_dp);
+	} else if (type == INTEL_OUTPUT_HDMI) {
+		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+
+		intel_hdmi->set_infoframes(encoder,
+					   crtc->config.has_hdmi_sink,
+					   &crtc->config.adjusted_mode);
 	}
 }
 
@@ -1385,7 +1354,8 @@
 		intel_edp_psr_enable(intel_dp);
 	}
 
-	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
+	if (intel_crtc->config.has_audio) {
+		intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
 		tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
 		tmp |= ((AUDIO_OUTPUT_ENABLE_A | AUDIO_ELD_VALID_A) << (pipe * 4));
 		I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp);
@@ -1403,11 +1373,14 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
-	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
+	/* We can't touch HSW_AUD_PIN_ELD_CP_VLD uncionditionally because this
+	 * register is part of the power well on Haswell. */
+	if (intel_crtc->config.has_audio) {
 		tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
 		tmp &= ~((AUDIO_OUTPUT_ENABLE_A | AUDIO_ELD_VALID_A) <<
 			 (pipe * 4));
 		I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
 	}
 
 	if (type == INTEL_OUTPUT_EDP) {
@@ -1580,6 +1553,7 @@
 
 	switch (temp & TRANS_DDI_MODE_SELECT_MASK) {
 	case TRANS_DDI_MODE_SELECT_HDMI:
+		pipe_config->has_hdmi_sink = true;
 	case TRANS_DDI_MODE_SELECT_DVI:
 	case TRANS_DDI_MODE_SELECT_FDI:
 		break;
@@ -1592,6 +1566,12 @@
 		break;
 	}
 
+	if (intel_display_power_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+		if (temp & (AUDIO_OUTPUT_ENABLE_A << (intel_crtc->pipe * 4)))
+			pipe_config->has_audio = true;
+	}
+
 	if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp_bpp &&
 	    pipe_config->pipe_bpp > dev_priv->vbt.edp_bpp) {
 		/*
@@ -1708,7 +1688,6 @@
 			 DRM_MODE_ENCODER_TMDS);
 
 	intel_encoder->compute_config = intel_ddi_compute_config;
-	intel_encoder->mode_set = intel_ddi_mode_set;
 	intel_encoder->enable = intel_enable_ddi;
 	intel_encoder->pre_enable = intel_ddi_pre_enable;
 	intel_encoder->disable = intel_disable_ddi;

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5b60e25..efd3cf5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c

@@ -41,6 +41,9 @@
 #include <drm/drm_crtc_helper.h>
 #include <linux/dma_remapping.h>
 
+#define DIV_ROUND_CLOSEST_ULL(ll, d)	\
+	({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
+
 static void intel_increase_pllclock(struct drm_crtc *crtc);
 static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
 
@@ -55,6 +58,15 @@
 				  struct intel_framebuffer *ifb,
 				  struct drm_mode_fb_cmd2 *mode_cmd,
 				  struct drm_i915_gem_object *obj);
+static void intel_dp_set_m_n(struct intel_crtc *crtc);
+static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc);
+static void intel_set_pipe_timings(struct intel_crtc *intel_crtc);
+static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc,
+					 struct intel_link_m_n *m_n);
+static void ironlake_set_pipeconf(struct drm_crtc *crtc);
+static void haswell_set_pipeconf(struct drm_crtc *crtc);
+static void intel_set_pipe_csc(struct drm_crtc *crtc);
+static void vlv_prepare_pll(struct intel_crtc *crtc);
 
 typedef struct {
 	int	min, max;
@@ -328,6 +340,22 @@
 	.p2 = { .p2_slow = 2, .p2_fast = 20 }, /* slow=min, fast=max */
 };
 
+static const intel_limit_t intel_limits_chv = {
+	/*
+	 * These are the data rate limits (measured in fast clocks)
+	 * since those are the strictest limits we have.  The fast
+	 * clock and actual rate limits are more relaxed, so checking
+	 * them would make no difference.
+	 */
+	.dot = { .min = 25000 * 5, .max = 540000 * 5},
+	.vco = { .min = 4860000, .max = 6700000 },
+	.n = { .min = 1, .max = 1 },
+	.m1 = { .min = 2, .max = 2 },
+	.m2 = { .min = 24 << 22, .max = 175 << 22 },
+	.p1 = { .min = 2, .max = 4 },
+	.p2 = {	.p2_slow = 1, .p2_fast = 14 },
+};
+
 static void vlv_clock(int refclk, intel_clock_t *clock)
 {
 	clock->m = clock->m1 * clock->m2;
@@ -412,6 +440,8 @@
 			limit = &intel_limits_pineview_lvds;
 		else
 			limit = &intel_limits_pineview_sdvo;
+	} else if (IS_CHERRYVIEW(dev)) {
+		limit = &intel_limits_chv;
 	} else if (IS_VALLEYVIEW(dev)) {
 		limit = &intel_limits_vlv;
 	} else if (!IS_GEN2(dev)) {
@@ -456,6 +486,17 @@
 	clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p);
 }
 
+static void chv_clock(int refclk, intel_clock_t *clock)
+{
+	clock->m = clock->m1 * clock->m2;
+	clock->p = clock->p1 * clock->p2;
+	if (WARN_ON(clock->n == 0 || clock->p == 0))
+		return;
+	clock->vco = DIV_ROUND_CLOSEST_ULL((uint64_t)refclk * clock->m,
+			clock->n << 22);
+	clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p);
+}
+
 #define INTELPllInvalid(s)   do { /* DRM_DEBUG(s); */ return false; } while (0)
 /**
  * Returns whether the given set of divisors are valid for a given refclk with
@@ -731,6 +772,58 @@
 	return found;
 }
 
+static bool
+chv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
+		   int target, int refclk, intel_clock_t *match_clock,
+		   intel_clock_t *best_clock)
+{
+	struct drm_device *dev = crtc->dev;
+	intel_clock_t clock;
+	uint64_t m2;
+	int found = false;
+
+	memset(best_clock, 0, sizeof(*best_clock));
+
+	/*
+	 * Based on hardware doc, the n always set to 1, and m1 always
+	 * set to 2.  If requires to support 200Mhz refclk, we need to
+	 * revisit this because n may not 1 anymore.
+	 */
+	clock.n = 1, clock.m1 = 2;
+	target *= 5;	/* fast clock */
+
+	for (clock.p1 = limit->p1.max; clock.p1 >= limit->p1.min; clock.p1--) {
+		for (clock.p2 = limit->p2.p2_fast;
+				clock.p2 >= limit->p2.p2_slow;
+				clock.p2 -= clock.p2 > 10 ? 2 : 1) {
+
+			clock.p = clock.p1 * clock.p2;
+
+			m2 = DIV_ROUND_CLOSEST_ULL(((uint64_t)target * clock.p *
+					clock.n) << 22, refclk * clock.m1);
+
+			if (m2 > INT_MAX/clock.m1)
+				continue;
+
+			clock.m2 = m2;
+
+			chv_clock(refclk, &clock);
+
+			if (!intel_PLL_is_valid(dev, limit, &clock))
+				continue;
+
+			/* based on hardware requirement, prefer bigger p
+			 */
+			if (clock.p > best_clock->p) {
+				*best_clock = clock;
+				found = true;
+			}
+		}
+	}
+
+	return found;
+}
+
 bool intel_crtc_active(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -765,7 +858,7 @@
 	frame = I915_READ(frame_reg);
 
 	if (wait_for(I915_READ_NOTRACE(frame_reg) != frame, 50))
-		DRM_DEBUG_KMS("vblank wait timed out\n");
+		WARN(1, "vblank wait timed out\n");
 }
 
 /**
@@ -878,7 +971,7 @@
 	u32 bit;
 
 	if (HAS_PCH_IBX(dev_priv->dev)) {
-		switch(port->port) {
+		switch (port->port) {
 		case PORT_B:
 			bit = SDE_PORTB_HOTPLUG;
 			break;
@@ -892,7 +985,7 @@
 			return true;
 		}
 	} else {
-		switch(port->port) {
+		switch (port->port) {
 		case PORT_B:
 			bit = SDE_PORTB_HOTPLUG_CPT;
 			break;
@@ -1097,10 +1190,8 @@
 
 	if (IS_845G(dev) || IS_I865G(dev))
 		cur_state = I915_READ(_CURACNTR) & CURSOR_ENABLE;
-	else if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev))
-		cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
 	else
-		cur_state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE;
+		cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
 
 	WARN(cur_state != state,
 	     "cursor on pipe %c assertion failure (expected %s, current %s)\n",
@@ -1253,6 +1344,9 @@
 		u32	trans_dp_ctl = I915_READ(trans_dp_ctl_reg);
 		if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel)
 			return false;
+	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
+		if ((val & DP_PIPE_MASK_CHV) != DP_PIPE_SELECT_CHV(pipe))
+			return false;
 	} else {
 		if ((val & DP_PIPE_MASK) != (pipe << 30))
 			return false;
@@ -1269,6 +1363,9 @@
 	if (HAS_PCH_CPT(dev_priv->dev)) {
 		if ((val & SDVO_PIPE_SEL_MASK_CPT) != SDVO_PIPE_SEL_CPT(pipe))
 			return false;
+	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
+		if ((val & SDVO_PIPE_SEL_MASK_CHV) != SDVO_PIPE_SEL_CHV(pipe))
+			return false;
 	} else {
 		if ((val & SDVO_PIPE_SEL_MASK) != SDVO_PIPE_SEL(pipe))
 			return false;
@@ -1367,7 +1464,17 @@
 	if (!IS_VALLEYVIEW(dev))
 		return;
 
-	DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO;
+	/*
+	 * IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C),
+	 * CHV x1 PHY (DP/HDMI D)
+	 * IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C)
+	 */
+	if (IS_CHERRYVIEW(dev)) {
+		DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2;
+		DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO;
+	} else {
+		DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO;
+	}
 }
 
 static void intel_reset_dpio(struct drm_device *dev)
@@ -1377,25 +1484,48 @@
 	if (!IS_VALLEYVIEW(dev))
 		return;
 
-	/*
-	 * Enable the CRI clock source so we can get at the display and the
-	 * reference clock for VGA hotplug / manual detection.
-	 */
-	I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
-		   DPLL_REFA_CLK_ENABLE_VLV |
-		   DPLL_INTEGRATED_CRI_CLK_VLV);
+	if (IS_CHERRYVIEW(dev)) {
+		enum dpio_phy phy;
+		u32 val;
 
-	/*
-	 * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
-	 *  6.	De-assert cmn_reset/side_reset. Same as VLV X0.
-	 *   a.	GUnit 0x2110 bit[0] set to 1 (def 0)
-	 *   b.	The other bits such as sfr settings / modesel may all be set
-	 *      to 0.
-	 *
-	 * This should only be done on init and resume from S3 with both
-	 * PLLs disabled, or we risk losing DPIO and PLL synchronization.
-	 */
-	I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
+		for (phy = DPIO_PHY0; phy < I915_NUM_PHYS_VLV; phy++) {
+			/* Poll for phypwrgood signal */
+			if (wait_for(I915_READ(DISPLAY_PHY_STATUS) &
+						PHY_POWERGOOD(phy), 1))
+				DRM_ERROR("Display PHY %d is not power up\n", phy);
+
+			/*
+			 * Deassert common lane reset for PHY.
+			 *
+			 * This should only be done on init and resume from S3
+			 * with both PLLs disabled, or we risk losing DPIO and
+			 * PLL synchronization.
+			 */
+			val = I915_READ(DISPLAY_PHY_CONTROL);
+			I915_WRITE(DISPLAY_PHY_CONTROL,
+				PHY_COM_LANE_RESET_DEASSERT(phy, val));
+		}
+
+	} else {
+		/*
+		 * If DPIO has already been reset, e.g. by BIOS, just skip all
+		 * this.
+		 */
+		if (I915_READ(DPIO_CTL) & DPIO_CMNRST)
+			return;
+
+		/*
+		 * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx:
+		 * Need to assert and de-assert PHY SB reset by gating the
+		 * common lane power, then un-gating it.
+		 * Simply ungating isn't enough to reset the PHY enough to get
+		 * ports and lanes running.
+		 */
+		__vlv_set_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC,
+				     false);
+		__vlv_set_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC,
+				     true);
+	}
 }
 
 static void vlv_enable_pll(struct intel_crtc *crtc)
@@ -1436,6 +1566,44 @@
 	udelay(150); /* wait for warmup */
 }
 
+static void chv_enable_pll(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = crtc->pipe;
+	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	u32 tmp;
+
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+
+	BUG_ON(!IS_CHERRYVIEW(dev_priv->dev));
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Enable back the 10bit clock to display controller */
+	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
+	tmp |= DPIO_DCLKP_EN;
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
+
+	/*
+	 * Need to wait > 100ns between dclkp clock enable bit and PLL enable.
+	 */
+	udelay(1);
+
+	/* Enable PLL */
+	I915_WRITE(DPLL(pipe), crtc->config.dpll_hw_state.dpll);
+
+	/* Check PLL is locked */
+	if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
+		DRM_ERROR("PLL %d failed to lock\n", pipe);
+
+	/* not sure when this should be written */
+	I915_WRITE(DPLL_MD(pipe), crtc->config.dpll_hw_state.dpll_md);
+	POSTING_READ(DPLL_MD(pipe));
+
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
 static void i9xx_enable_pll(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -1519,45 +1687,92 @@
 		val = DPLL_INTEGRATED_CRI_CLK_VLV | DPLL_REFA_CLK_ENABLE_VLV;
 	I915_WRITE(DPLL(pipe), val);
 	POSTING_READ(DPLL(pipe));
+
+}
+
+static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
+{
+	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	u32 val;
+
+	/* Make sure the pipe isn't still relying on us */
+	assert_pipe_disabled(dev_priv, pipe);
+
+	/* Set PLL en = 0 */
+	val = DPLL_SSC_REF_CLOCK_CHV;
+	if (pipe != PIPE_A)
+		val |= DPLL_INTEGRATED_CRI_CLK_VLV;
+	I915_WRITE(DPLL(pipe), val);
+	POSTING_READ(DPLL(pipe));
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Disable 10bit clock to display controller */
+	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
+	val &= ~DPIO_DCLKP_EN;
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val);
+
+	mutex_unlock(&dev_priv->dpio_lock);
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 		struct intel_digital_port *dport)
 {
 	u32 port_mask;
+	int dpll_reg;
 
 	switch (dport->port) {
 	case PORT_B:
 		port_mask = DPLL_PORTB_READY_MASK;
+		dpll_reg = DPLL(0);
 		break;
 	case PORT_C:
 		port_mask = DPLL_PORTC_READY_MASK;
+		dpll_reg = DPLL(0);
+		break;
+	case PORT_D:
+		port_mask = DPLL_PORTD_READY_MASK;
+		dpll_reg = DPIO_PHY_STATUS;
 		break;
 	default:
 		BUG();
 	}
 
-	if (wait_for((I915_READ(DPLL(0)) & port_mask) == 0, 1000))
+	if (wait_for((I915_READ(dpll_reg) & port_mask) == 0, 1000))
 		WARN(1, "timed out waiting for port %c ready: 0x%08x\n",
-		     port_name(dport->port), I915_READ(DPLL(0)));
+		     port_name(dport->port), I915_READ(dpll_reg));
+}
+
+static void intel_prepare_shared_dpll(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
+
+	WARN_ON(!pll->refcount);
+	if (pll->active == 0) {
+		DRM_DEBUG_DRIVER("setting up %s\n", pll->name);
+		WARN_ON(pll->on);
+		assert_shared_dpll_disabled(dev_priv, pll);
+
+		pll->mode_set(dev_priv, pll);
+	}
 }
 
 /**
- * ironlake_enable_shared_dpll - enable PCH PLL
+ * intel_enable_shared_dpll - enable PCH PLL
  * @dev_priv: i915 private structure
  * @pipe: pipe PLL to enable
  *
  * The PCH PLL needs to be enabled before the PCH transcoder, since it
  * drives the transcoder clock.
  */
-static void ironlake_enable_shared_dpll(struct intel_crtc *crtc)
+static void intel_enable_shared_dpll(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
 
-	/* PCH PLLs only available on ILK, SNB and IVB */
-	BUG_ON(INTEL_INFO(dev)->gen < 5);
 	if (WARN_ON(pll == NULL))
 		return;
 
@@ -1804,16 +2019,6 @@
 
 	I915_WRITE(reg, val | PIPECONF_ENABLE);
 	POSTING_READ(reg);
-
-	/*
-	 * There's no guarantee the pipe will really start running now. It
-	 * depends on the Gen, the output type and the relative order between
-	 * pipe and plane enabling. Avoid waiting on HSW+ since it's not
-	 * necessary.
-	 * TODO: audit the previous gens.
-	 */
-	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
-		intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
 /**
@@ -1890,18 +2095,17 @@
 	/* If the pipe isn't enabled, we can't pump pixels and may hang */
 	assert_pipe_enabled(dev_priv, pipe);
 
-	WARN(intel_crtc->primary_enabled, "Primary plane already enabled\n");
+	if (intel_crtc->primary_enabled)
+		return;
 
 	intel_crtc->primary_enabled = true;
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	if (val & DISPLAY_PLANE_ENABLE)
-		return;
+	WARN_ON(val & DISPLAY_PLANE_ENABLE);
 
 	I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
 	intel_flush_primary_plane(dev_priv, plane);
-	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
 /**
@@ -1920,18 +2124,17 @@
 	int reg;
 	u32 val;
 
-	WARN(!intel_crtc->primary_enabled, "Primary plane already disabled\n");
+	if (!intel_crtc->primary_enabled)
+		return;
 
 	intel_crtc->primary_enabled = false;
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	if ((val & DISPLAY_PLANE_ENABLE) == 0)
-		return;
+	WARN_ON((val & DISPLAY_PLANE_ENABLE) == 0);
 
 	I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE);
 	intel_flush_primary_plane(dev_priv, plane);
-	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
 static bool need_vtd_wa(struct drm_device *dev)
@@ -1954,7 +2157,7 @@
 int
 intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			   struct drm_i915_gem_object *obj,
-			   struct intel_ring_buffer *pipelined)
+			   struct intel_engine_cs *pipelined)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 alignment;
@@ -2134,7 +2337,7 @@
 	 * Failed to alloc the obj, check to see if we should share
 	 * an fb with another CRTC instead
 	 */
-	list_for_each_entry(c, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, c) {
 		i = to_intel_crtc(c);
 
 		if (c == &intel_crtc->base)
@@ -2152,9 +2355,9 @@
 	}
 }
 
-static int i9xx_update_primary_plane(struct drm_crtc *crtc,
-				     struct drm_framebuffer *fb,
-				     int x, int y)
+static void i9xx_update_primary_plane(struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb,
+				      int x, int y)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2166,15 +2369,6 @@
 	u32 dspcntr;
 	u32 reg;
 
-	switch (plane) {
-	case 0:
-	case 1:
-		break;
-	default:
-		DRM_ERROR("Can't update plane %c in SAREA\n", plane_name(plane));
-		return -EINVAL;
-	}
-
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
@@ -2249,13 +2443,11 @@
 	} else
 		I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
 	POSTING_READ(reg);
-
-	return 0;
 }
 
-static int ironlake_update_primary_plane(struct drm_crtc *crtc,
-					 struct drm_framebuffer *fb,
-					 int x, int y)
+static void ironlake_update_primary_plane(struct drm_crtc *crtc,
+					  struct drm_framebuffer *fb,
+					  int x, int y)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2267,16 +2459,6 @@
 	u32 dspcntr;
 	u32 reg;
 
-	switch (plane) {
-	case 0:
-	case 1:
-	case 2:
-		break;
-	default:
-		DRM_ERROR("Can't update plane %c in SAREA\n", plane_name(plane));
-		return -EINVAL;
-	}
-
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
@@ -2343,8 +2525,6 @@
 		I915_WRITE(DSPLINOFF(plane), linear_offset);
 	}
 	POSTING_READ(reg);
-
-	return 0;
 }
 
 /* Assume fb object is pinned & idle & fenced and just update base pointers */
@@ -2359,7 +2539,9 @@
 		dev_priv->display.disable_fbc(dev);
 	intel_increase_pllclock(crtc);
 
-	return dev_priv->display.update_primary_plane(crtc, fb, x, y);
+	dev_priv->display.update_primary_plane(crtc, fb, x, y);
+
+	return 0;
 }
 
 void intel_display_handle_reset(struct drm_device *dev)
@@ -2381,7 +2563,7 @@
 	 * pending_flip_queue really got woken up.
 	 */
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 		enum plane plane = intel_crtc->plane;
 
@@ -2389,10 +2571,10 @@
 		intel_finish_page_flip_plane(dev, plane);
 	}
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-		mutex_lock(&crtc->mutex);
+		drm_modeset_lock(&crtc->mutex, NULL);
 		/*
 		 * FIXME: Once we have proper support for primary planes (and
 		 * disabling them without disabling the entire crtc) allow again
@@ -2403,7 +2585,7 @@
 							       crtc->primary->fb,
 							       crtc->x,
 							       crtc->y);
-		mutex_unlock(&crtc->mutex);
+		drm_modeset_unlock(&crtc->mutex);
 	}
 }
 
@@ -2518,14 +2700,7 @@
 		intel_crtc->config.pipe_src_h = adjusted_mode->crtc_vdisplay;
 	}
 
-	ret = dev_priv->display.update_primary_plane(crtc, fb, x, y);
-	if (ret) {
-		mutex_lock(&dev->struct_mutex);
-		intel_unpin_fb_obj(to_intel_framebuffer(fb)->obj);
-		mutex_unlock(&dev->struct_mutex);
-		DRM_ERROR("failed to update base address\n");
-		return ret;
-	}
+	dev_priv->display.update_primary_plane(crtc, fb, x, y);
 
 	old_fb = crtc->primary->fb;
 	crtc->primary->fb = fb;
@@ -2628,12 +2803,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 	u32 reg, temp, tries;
 
-	/* FDI needs bits from pipe & plane first */
+	/* FDI needs bits from pipe first */
 	assert_pipe_enabled(dev_priv, pipe);
-	assert_plane_enabled(dev_priv, plane);
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -3064,9 +3237,8 @@
 	udelay(100);
 
 	/* Ironlake workaround, disable clock pointer after downing FDI */
-	if (HAS_PCH_IBX(dev)) {
+	if (HAS_PCH_IBX(dev))
 		I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR);
-	}
 
 	/* still set train pattern 1 */
 	reg = FDI_TX_CTL(pipe);
@@ -3104,7 +3276,7 @@
 	 * cannot claim and pin a new fb without at least acquring the
 	 * struct_mutex and so serialising with us.
 	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		if (atomic_read(&crtc->unpin_work_count) == 0)
 			continue;
 
@@ -3117,7 +3289,7 @@
 	return false;
 }
 
-static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
+void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3127,8 +3299,9 @@
 
 	WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue));
 
-	wait_event(dev_priv->pending_flip_queue,
-		   !intel_crtc_has_pending_flip(crtc));
+	WARN_ON(wait_event_timeout(dev_priv->pending_flip_queue,
+				   !intel_crtc_has_pending_flip(crtc),
+				   60*HZ) == 0);
 
 	mutex_lock(&dev->struct_mutex);
 	intel_finish_fb(crtc->primary->fb);
@@ -3341,7 +3514,7 @@
 	 * Note that enable_shared_dpll tries to do the right thing, but
 	 * get_shared_dpll unconditionally resets the pll - we need that to have
 	 * the right LVDS enable sequence. */
-	ironlake_enable_shared_dpll(intel_crtc);
+	intel_enable_shared_dpll(intel_crtc);
 
 	/* set transcoder timing, panel must allow it */
 	assert_panel_unlocked(dev_priv, pipe);
@@ -3445,6 +3618,8 @@
 		DRM_DEBUG_KMS("CRTC:%d using pre-allocated %s\n",
 			      crtc->base.base.id, pll->name);
 
+		WARN_ON(pll->refcount);
+
 		goto found;
 	}
 
@@ -3478,20 +3653,13 @@
 	return NULL;
 
 found:
+	if (pll->refcount == 0)
+		pll->hw_state = crtc->config.dpll_hw_state;
+
 	crtc->config.shared_dpll = i;
 	DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->name,
 			 pipe_name(crtc->pipe));
 
-	if (pll->active == 0) {
-		memcpy(&pll->hw_state, &crtc->config.dpll_hw_state,
-		       sizeof(pll->hw_state));
-
-		DRM_DEBUG_DRIVER("setting up %s\n", pll->name);
-		WARN_ON(pll->on);
-		assert_shared_dpll_disabled(dev_priv, pll);
-
-		pll->mode_set(dev_priv, pll);
-	}
 	pll->refcount++;
 
 	return pll;
@@ -3562,17 +3730,17 @@
 
 void hsw_enable_ips(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (!crtc->config.ips_enabled)
 		return;
 
-	/* We can only enable IPS after we enable a plane and wait for a vblank.
-	 * We guarantee that the plane is enabled by calling intel_enable_ips
-	 * only after intel_enable_plane. And intel_enable_plane already waits
-	 * for a vblank, so all we need to do here is to enable the IPS bit. */
+	/* We can only enable IPS after we enable a plane and wait for a vblank */
+	intel_wait_for_vblank(dev, crtc->pipe);
+
 	assert_plane_enabled(dev_priv, crtc->plane);
-	if (IS_BROADWELL(crtc->base.dev)) {
+	if (IS_BROADWELL(dev)) {
 		mutex_lock(&dev_priv->rps.hw_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0xc0000000));
 		mutex_unlock(&dev_priv->rps.hw_lock);
@@ -3602,10 +3770,13 @@
 		return;
 
 	assert_plane_enabled(dev_priv, crtc->plane);
-	if (IS_BROADWELL(crtc->base.dev)) {
+	if (IS_BROADWELL(dev)) {
 		mutex_lock(&dev_priv->rps.hw_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
 		mutex_unlock(&dev_priv->rps.hw_lock);
+		/* wait for pcode to finish disabling IPS, which may take up to 42ms */
+		if (wait_for((I915_READ(IPS_CTL) & IPS_ENABLE) == 0, 42))
+			DRM_ERROR("Timed out waiting for IPS disable\n");
 	} else {
 		I915_WRITE(IPS_CTL, 0);
 		POSTING_READ(IPS_CTL);
@@ -3662,6 +3833,94 @@
 		hsw_enable_ips(intel_crtc);
 }
 
+static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable)
+{
+	if (!enable && intel_crtc->overlay) {
+		struct drm_device *dev = intel_crtc->base.dev;
+		struct drm_i915_private *dev_priv = dev->dev_private;
+
+		mutex_lock(&dev->struct_mutex);
+		dev_priv->mm.interruptible = false;
+		(void) intel_overlay_switch_off(intel_crtc->overlay);
+		dev_priv->mm.interruptible = true;
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	/* Let userspace switch the overlay on again. In most cases userspace
+	 * has to recompute where to put it anyway.
+	 */
+}
+
+/**
+ * i9xx_fixup_plane - ugly workaround for G45 to fire up the hardware
+ * cursor plane briefly if not already running after enabling the display
+ * plane.
+ * This workaround avoids occasional blank screens when self refresh is
+ * enabled.
+ */
+static void
+g4x_fixup_plane(struct drm_i915_private *dev_priv, enum pipe pipe)
+{
+	u32 cntl = I915_READ(CURCNTR(pipe));
+
+	if ((cntl & CURSOR_MODE) == 0) {
+		u32 fw_bcl_self = I915_READ(FW_BLC_SELF);
+
+		I915_WRITE(FW_BLC_SELF, fw_bcl_self & ~FW_BLC_SELF_EN);
+		I915_WRITE(CURCNTR(pipe), CURSOR_MODE_64_ARGB_AX);
+		intel_wait_for_vblank(dev_priv->dev, pipe);
+		I915_WRITE(CURCNTR(pipe), cntl);
+		I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
+		I915_WRITE(FW_BLC_SELF, fw_bcl_self);
+	}
+}
+
+static void intel_crtc_enable_planes(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+
+	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
+	intel_enable_planes(crtc);
+	/* The fixup needs to happen before cursor is enabled */
+	if (IS_G4X(dev))
+		g4x_fixup_plane(dev_priv, pipe);
+	intel_crtc_update_cursor(crtc, true);
+	intel_crtc_dpms_overlay(intel_crtc, true);
+
+	hsw_enable_ips(intel_crtc);
+
+	mutex_lock(&dev->struct_mutex);
+	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void intel_crtc_disable_planes(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+
+	intel_crtc_wait_for_pending_flips(crtc);
+	drm_crtc_vblank_off(crtc);
+
+	if (dev_priv->fbc.plane == plane)
+		intel_disable_fbc(dev);
+
+	hsw_disable_ips(intel_crtc);
+
+	intel_crtc_dpms_overlay(intel_crtc, false);
+	intel_crtc_update_cursor(crtc, false);
+	intel_disable_planes(crtc);
+	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
+}
+
 static void ironlake_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3669,13 +3928,35 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
+	enum plane plane = intel_crtc->plane;
 
 	WARN_ON(!crtc->enabled);
 
 	if (intel_crtc->active)
 		return;
 
+	if (intel_crtc->config.has_pch_encoder)
+		intel_prepare_shared_dpll(intel_crtc);
+
+	if (intel_crtc->config.has_dp_encoder)
+		intel_dp_set_m_n(intel_crtc);
+
+	intel_set_pipe_timings(intel_crtc);
+
+	if (intel_crtc->config.has_pch_encoder) {
+		intel_cpu_transcoder_set_m_n(intel_crtc,
+					     &intel_crtc->config.fdi_m_n);
+	}
+
+	ironlake_set_pipeconf(crtc);
+
+	/* Set up the display plane register */
+	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE);
+	POSTING_READ(DSPCNTR(plane));
+
+	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
+					       crtc->x, crtc->y);
+
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
@@ -3705,32 +3986,19 @@
 
 	intel_update_watermarks(crtc);
 	intel_enable_pipe(intel_crtc);
-	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
-	intel_enable_planes(crtc);
-	intel_crtc_update_cursor(crtc, true);
 
 	if (intel_crtc->config.has_pch_encoder)
 		ironlake_pch_enable(crtc);
 
-	mutex_lock(&dev->struct_mutex);
-	intel_update_fbc(dev);
-	mutex_unlock(&dev->struct_mutex);
-
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
 
 	if (HAS_PCH_CPT(dev))
 		cpt_verify_modeset(dev, intel_crtc->pipe);
 
-	/*
-	 * There seems to be a race in PCH platform hw (at least on some
-	 * outputs) where an enabled pipe still completes any pageflip right
-	 * away (as if the pipe is off) instead of waiting for vblank. As soon
-	 * as the first vblank happend, everything works as expected. Hence just
-	 * wait for one vblank before returning to avoid strange things
-	 * happening.
-	 */
-	intel_wait_for_vblank(dev, intel_crtc->pipe);
+	intel_crtc_enable_planes(crtc);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -3739,47 +4007,6 @@
 	return HAS_IPS(crtc->base.dev) && crtc->pipe == PIPE_A;
 }
 
-static void haswell_crtc_enable_planes(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
-
-	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
-	intel_enable_planes(crtc);
-	intel_crtc_update_cursor(crtc, true);
-
-	hsw_enable_ips(intel_crtc);
-
-	mutex_lock(&dev->struct_mutex);
-	intel_update_fbc(dev);
-	mutex_unlock(&dev->struct_mutex);
-}
-
-static void haswell_crtc_disable_planes(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
-
-	intel_crtc_wait_for_pending_flips(crtc);
-	drm_vblank_off(dev, pipe);
-
-	/* FBC must be disabled before disabling the plane on HSW. */
-	if (dev_priv->fbc.plane == plane)
-		intel_disable_fbc(dev);
-
-	hsw_disable_ips(intel_crtc);
-
-	intel_crtc_update_cursor(crtc, false);
-	intel_disable_planes(crtc);
-	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
-}
-
 /*
  * This implements the workaround described in the "notes" section of the mode
  * set sequence documentation. When going from no pipes or single pipe to
@@ -3793,7 +4020,7 @@
 
 	/* We want to get the other_active_crtc only if there's only 1 other
 	 * active crtc. */
-	list_for_each_entry(crtc_it, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc_it) {
 		if (!crtc_it->active || crtc_it == crtc)
 			continue;
 
@@ -3816,12 +4043,34 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
+	enum plane plane = intel_crtc->plane;
 
 	WARN_ON(!crtc->enabled);
 
 	if (intel_crtc->active)
 		return;
 
+	if (intel_crtc->config.has_dp_encoder)
+		intel_dp_set_m_n(intel_crtc);
+
+	intel_set_pipe_timings(intel_crtc);
+
+	if (intel_crtc->config.has_pch_encoder) {
+		intel_cpu_transcoder_set_m_n(intel_crtc,
+					     &intel_crtc->config.fdi_m_n);
+	}
+
+	haswell_set_pipeconf(crtc);
+
+	intel_set_pipe_csc(crtc);
+
+	/* Set up the display plane register */
+	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE);
+	POSTING_READ(DSPCNTR(plane));
+
+	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
+					       crtc->x, crtc->y);
+
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
@@ -3862,7 +4111,9 @@
 	/* If we change the relative order between pipe/planes enabling, we need
 	 * to change the workaround. */
 	haswell_mode_set_planes_workaround(intel_crtc);
-	haswell_crtc_enable_planes(crtc);
+	intel_crtc_enable_planes(crtc);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 static void ironlake_pfit_disable(struct intel_crtc *crtc)
@@ -3887,26 +4138,16 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 	u32 reg, temp;
 
-
 	if (!intel_crtc->active)
 		return;
 
+	intel_crtc_disable_planes(crtc);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->disable(encoder);
 
-	intel_crtc_wait_for_pending_flips(crtc);
-	drm_vblank_off(dev, pipe);
-
-	if (dev_priv->fbc.plane == plane)
-		intel_disable_fbc(dev);
-
-	intel_crtc_update_cursor(crtc, false);
-	intel_disable_planes(crtc);
-	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
-
 	if (intel_crtc->config.has_pch_encoder)
 		intel_set_pch_fifo_underrun_reporting(dev, pipe, false);
 
@@ -3950,6 +4191,7 @@
 
 	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
 	mutex_unlock(&dev->struct_mutex);
 }
 
@@ -3965,7 +4207,7 @@
 	if (!intel_crtc->active)
 		return;
 
-	haswell_crtc_disable_planes(crtc);
+	intel_crtc_disable_planes(crtc);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
 		intel_opregion_notify_encoder(encoder, false);
@@ -3997,6 +4239,7 @@
 
 	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
 	mutex_unlock(&dev->struct_mutex);
 }
 
@@ -4011,48 +4254,6 @@
 	intel_ddi_put_crtc_pll(crtc);
 }
 
-static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable)
-{
-	if (!enable && intel_crtc->overlay) {
-		struct drm_device *dev = intel_crtc->base.dev;
-		struct drm_i915_private *dev_priv = dev->dev_private;
-
-		mutex_lock(&dev->struct_mutex);
-		dev_priv->mm.interruptible = false;
-		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		dev_priv->mm.interruptible = true;
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	/* Let userspace switch the overlay on again. In most cases userspace
-	 * has to recompute where to put it anyway.
-	 */
-}
-
-/**
- * i9xx_fixup_plane - ugly workaround for G45 to fire up the hardware
- * cursor plane briefly if not already running after enabling the display
- * plane.
- * This workaround avoids occasional blank screens when self refresh is
- * enabled.
- */
-static void
-g4x_fixup_plane(struct drm_i915_private *dev_priv, enum pipe pipe)
-{
-	u32 cntl = I915_READ(CURCNTR(pipe));
-
-	if ((cntl & CURSOR_MODE) == 0) {
-		u32 fw_bcl_self = I915_READ(FW_BLC_SELF);
-
-		I915_WRITE(FW_BLC_SELF, fw_bcl_self & ~FW_BLC_SELF_EN);
-		I915_WRITE(CURCNTR(pipe), CURSOR_MODE_64_ARGB_AX);
-		intel_wait_for_vblank(dev_priv->dev, pipe);
-		I915_WRITE(CURCNTR(pipe), cntl);
-		I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
-		I915_WRITE(FW_BLC_SELF, fw_bcl_self);
-	}
-}
-
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -4164,7 +4365,7 @@
 	 * First get all needed power domains, then put all unneeded, to avoid
 	 * any unnecessary toggling of the power wells.
 	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		enum intel_display_power_domain domain;
 
 		if (!crtc->base.enabled)
@@ -4176,7 +4377,7 @@
 			intel_display_power_get(dev_priv, domain);
 	}
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		enum intel_display_power_domain domain;
 
 		for_each_power_domain(domain, crtc->enabled_power_domains)
@@ -4207,6 +4408,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 val, cmd;
 
+	WARN_ON(valleyview_cur_cdclk(dev_priv) != dev_priv->vlv_cdclk_freq);
+	dev_priv->vlv_cdclk_freq = cdclk;
+
 	if (cdclk >= 320) /* jump to highest voltage for 400MHz too */
 		cmd = 2;
 	else if (cdclk == 266)
@@ -4261,7 +4465,7 @@
 	intel_i2c_reset(dev);
 }
 
-static int valleyview_cur_cdclk(struct drm_i915_private *dev_priv)
+int valleyview_cur_cdclk(struct drm_i915_private *dev_priv)
 {
 	int cur_cdclk, vco;
 	int divider;
@@ -4282,10 +4486,6 @@
 static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv,
 				 int max_pixclk)
 {
-	int cur_cdclk;
-
-	cur_cdclk = valleyview_cur_cdclk(dev_priv);
-
 	/*
 	 * Really only a few cases to deal with, as only 4 CDclks are supported:
 	 *   200MHz
@@ -4311,8 +4511,7 @@
 	struct intel_crtc *intel_crtc;
 	int max_pixclk = 0;
 
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, intel_crtc) {
 		if (intel_crtc->new_enabled)
 			max_pixclk = max(max_pixclk,
 					 intel_crtc->new_config->adjusted_mode.crtc_clock);
@@ -4327,14 +4526,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc;
 	int max_pixclk = intel_mode_max_pixclk(dev_priv);
-	int cur_cdclk = valleyview_cur_cdclk(dev_priv);
 
-	if (valleyview_calc_cdclk(dev_priv, max_pixclk) == cur_cdclk)
+	if (valleyview_calc_cdclk(dev_priv, max_pixclk) ==
+	    dev_priv->vlv_cdclk_freq)
 		return;
 
 	/* disable/enable all currently active pipes while we change cdclk */
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
-			    base.head)
+	for_each_intel_crtc(dev, intel_crtc)
 		if (intel_crtc->base.enabled)
 			*prepare_pipes |= (1 << intel_crtc->pipe);
 }
@@ -4343,10 +4541,9 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int max_pixclk = intel_mode_max_pixclk(dev_priv);
-	int cur_cdclk = valleyview_cur_cdclk(dev_priv);
 	int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
 
-	if (req_cdclk != cur_cdclk)
+	if (req_cdclk != dev_priv->vlv_cdclk_freq)
 		valleyview_set_cdclk(dev, req_cdclk);
 	modeset_update_crtc_power_domains(dev);
 }
@@ -4360,22 +4557,55 @@
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
 	bool is_dsi;
+	u32 dspcntr;
 
 	WARN_ON(!crtc->enabled);
 
 	if (intel_crtc->active)
 		return;
 
+	vlv_prepare_pll(intel_crtc);
+
+	/* Set up the display plane register */
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	if (intel_crtc->config.has_dp_encoder)
+		intel_dp_set_m_n(intel_crtc);
+
+	intel_set_pipe_timings(intel_crtc);
+
+	/* pipesrc and dspsize control the size that is scaled from,
+	 * which should always be the user's requested size.
+	 */
+	I915_WRITE(DSPSIZE(plane),
+		   ((intel_crtc->config.pipe_src_h - 1) << 16) |
+		   (intel_crtc->config.pipe_src_w - 1));
+	I915_WRITE(DSPPOS(plane), 0);
+
+	i9xx_set_pipeconf(intel_crtc);
+
+	I915_WRITE(DSPCNTR(plane), dspcntr);
+	POSTING_READ(DSPCNTR(plane));
+
+	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
+					       crtc->x, crtc->y);
+
 	intel_crtc->active = true;
 
+	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_pll_enable)
 			encoder->pre_pll_enable(encoder);
 
 	is_dsi = intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI);
 
-	if (!is_dsi)
-		vlv_enable_pll(intel_crtc);
+	if (!is_dsi) {
+		if (IS_CHERRYVIEW(dev))
+			chv_enable_pll(intel_crtc);
+		else
+			vlv_enable_pll(intel_crtc);
+	}
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
@@ -4387,15 +4617,25 @@
 
 	intel_update_watermarks(crtc);
 	intel_enable_pipe(intel_crtc);
-	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
-	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
-	intel_enable_planes(crtc);
-	intel_crtc_update_cursor(crtc, true);
-
-	intel_update_fbc(dev);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	intel_crtc_enable_planes(crtc);
+
+	drm_crtc_vblank_on(crtc);
+
+	/* Underruns don't raise interrupts, so check manually. */
+	i9xx_check_fifo_underruns(dev);
+}
+
+static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(FP0(crtc->pipe), crtc->config.dpll_hw_state.fp0);
+	I915_WRITE(FP1(crtc->pipe), crtc->config.dpll_hw_state.fp1);
 }
 
 static void i9xx_crtc_enable(struct drm_crtc *crtc)
@@ -4406,14 +4646,49 @@
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
+	u32 dspcntr;
 
 	WARN_ON(!crtc->enabled);
 
 	if (intel_crtc->active)
 		return;
 
+	i9xx_set_pll_dividers(intel_crtc);
+
+	/* Set up the display plane register */
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	if (pipe == 0)
+		dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
+	else
+		dspcntr |= DISPPLANE_SEL_PIPE_B;
+
+	if (intel_crtc->config.has_dp_encoder)
+		intel_dp_set_m_n(intel_crtc);
+
+	intel_set_pipe_timings(intel_crtc);
+
+	/* pipesrc and dspsize control the size that is scaled from,
+	 * which should always be the user's requested size.
+	 */
+	I915_WRITE(DSPSIZE(plane),
+		   ((intel_crtc->config.pipe_src_h - 1) << 16) |
+		   (intel_crtc->config.pipe_src_w - 1));
+	I915_WRITE(DSPPOS(plane), 0);
+
+	i9xx_set_pipeconf(intel_crtc);
+
+	I915_WRITE(DSPCNTR(plane), dspcntr);
+	POSTING_READ(DSPCNTR(plane));
+
+	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
+					       crtc->x, crtc->y);
+
 	intel_crtc->active = true;
 
+	if (!IS_GEN2(dev))
+		intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
 			encoder->pre_enable(encoder);
@@ -4426,21 +4701,26 @@
 
 	intel_update_watermarks(crtc);
 	intel_enable_pipe(intel_crtc);
-	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
-	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
-	intel_enable_planes(crtc);
-	/* The fixup needs to happen before cursor is enabled */
-	if (IS_G4X(dev))
-		g4x_fixup_plane(dev_priv, pipe);
-	intel_crtc_update_cursor(crtc, true);
-
-	/* Give the overlay scaler a chance to enable if it's on this pipe */
-	intel_crtc_dpms_overlay(intel_crtc, true);
-
-	intel_update_fbc(dev);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	intel_crtc_enable_planes(crtc);
+
+	/*
+	 * Gen2 reports pipe underruns whenever all planes are disabled.
+	 * So don't enable underrun reporting before at least some planes
+	 * are enabled.
+	 * FIXME: Need to fix the logic to work when we turn off all planes
+	 * but leave the pipe running.
+	 */
+	if (IS_GEN2(dev))
+		intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
+
+	drm_crtc_vblank_on(crtc);
+
+	/* Underruns don't raise interrupts, so check manually. */
+	i9xx_check_fifo_underruns(dev);
 }
 
 static void i9xx_pfit_disable(struct intel_crtc *crtc)
@@ -4465,27 +4745,31 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 
 	if (!intel_crtc->active)
 		return;
 
+	/*
+	 * Gen2 reports pipe underruns whenever all planes are disabled.
+	 * So diasble underrun reporting before all the planes get disabled.
+	 * FIXME: Need to fix the logic to work when we turn off all planes
+	 * but leave the pipe running.
+	 */
+	if (IS_GEN2(dev))
+		intel_set_cpu_fifo_underrun_reporting(dev, pipe, false);
+
+	intel_crtc_disable_planes(crtc);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->disable(encoder);
 
-	/* Give the overlay scaler a chance to disable if it's on this pipe */
-	intel_crtc_wait_for_pending_flips(crtc);
-	drm_vblank_off(dev, pipe);
+	/*
+	 * On gen2 planes are double buffered but the pipe isn't, so we must
+	 * wait for planes to fully turn off before disabling the pipe.
+	 */
+	if (IS_GEN2(dev))
+		intel_wait_for_vblank(dev, pipe);
 
-	if (dev_priv->fbc.plane == plane)
-		intel_disable_fbc(dev);
-
-	intel_crtc_dpms_overlay(intel_crtc, false);
-	intel_crtc_update_cursor(crtc, false);
-	intel_disable_planes(crtc);
-	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
-
-	intel_set_cpu_fifo_underrun_reporting(dev, pipe, false);
 	intel_disable_pipe(dev_priv, pipe);
 
 	i9xx_pfit_disable(intel_crtc);
@@ -4494,15 +4778,25 @@
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
 
-	if (IS_VALLEYVIEW(dev) && !intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI))
-		vlv_disable_pll(dev_priv, pipe);
-	else if (!IS_VALLEYVIEW(dev))
-		i9xx_disable_pll(dev_priv, pipe);
+	if (!intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI)) {
+		if (IS_CHERRYVIEW(dev))
+			chv_disable_pll(dev_priv, pipe);
+		else if (IS_VALLEYVIEW(dev))
+			vlv_disable_pll(dev_priv, pipe);
+		else
+			i9xx_disable_pll(dev_priv, pipe);
+	}
+
+	if (!IS_GEN2(dev))
+		intel_set_cpu_fifo_underrun_reporting(dev, pipe, false);
 
 	intel_crtc->active = false;
 	intel_update_watermarks(crtc);
 
+	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
+	mutex_unlock(&dev->struct_mutex);
 }
 
 static void i9xx_crtc_off(struct drm_crtc *crtc)
@@ -4565,13 +4859,11 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_connector *connector;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 	/* crtc should still be enabled when we disable it. */
 	WARN_ON(!crtc->enabled);
 
 	dev_priv->display.crtc_disable(crtc);
-	intel_crtc->eld_vld = false;
 	intel_crtc_update_sarea(crtc, false);
 	dev_priv->display.off(crtc);
 
@@ -4635,7 +4927,7 @@
 
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 			      connector->base.base.id,
-			      drm_get_connector_name(&connector->base));
+			      connector->base.name);
 
 		WARN(connector->base.dpms == DRM_MODE_DPMS_OFF,
 		     "wrong connector dpms state\n");
@@ -5039,8 +5331,6 @@
 				     intel_clock_t *reduced_clock)
 {
 	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int pipe = crtc->pipe;
 	u32 fp, fp2 = 0;
 
 	if (IS_PINEVIEW(dev)) {
@@ -5053,17 +5343,14 @@
 			fp2 = i9xx_dpll_compute_fp(reduced_clock);
 	}
 
-	I915_WRITE(FP0(pipe), fp);
 	crtc->config.dpll_hw_state.fp0 = fp;
 
 	crtc->lowfreq_avail = false;
 	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_LVDS) &&
 	    reduced_clock && i915.powersave) {
-		I915_WRITE(FP1(pipe), fp2);
 		crtc->config.dpll_hw_state.fp1 = fp2;
 		crtc->lowfreq_avail = true;
 	} else {
-		I915_WRITE(FP1(pipe), fp);
 		crtc->config.dpll_hw_state.fp1 = fp;
 	}
 }
@@ -5141,12 +5428,34 @@
 
 static void vlv_update_pll(struct intel_crtc *crtc)
 {
+	u32 dpll, dpll_md;
+
+	/*
+	 * Enable DPIO clock input. We should never disable the reference
+	 * clock for pipe B, since VGA hotplug / manual detection depends
+	 * on it.
+	 */
+	dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV |
+		DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV;
+	/* We should never disable this, set it here for state tracking */
+	if (crtc->pipe == PIPE_B)
+		dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
+	dpll |= DPLL_VCO_ENABLE;
+	crtc->config.dpll_hw_state.dpll = dpll;
+
+	dpll_md = (crtc->config.pixel_multiplier - 1)
+		<< DPLL_MD_UDI_MULTIPLIER_SHIFT;
+	crtc->config.dpll_hw_state.dpll_md = dpll_md;
+}
+
+static void vlv_prepare_pll(struct intel_crtc *crtc)
+{
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
-	u32 dpll, mdiv;
+	u32 mdiv;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2;
-	u32 coreclk, reg_val, dpll_md;
+	u32 coreclk, reg_val;
 
 	mutex_lock(&dev_priv->dpio_lock);
 
@@ -5159,7 +5468,7 @@
 	/* See eDP HDMI DPIO driver vbios notes doc */
 
 	/* PLL B needs special handling */
-	if (pipe)
+	if (pipe == PIPE_B)
 		vlv_pllb_recal_opamp(dev_priv, pipe);
 
 	/* Set up Tx target for periodic Rcomp update */
@@ -5203,7 +5512,7 @@
 	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_EDP) ||
 	    intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_DISPLAYPORT)) {
 		/* Use SSC source */
-		if (!pipe)
+		if (pipe == PIPE_A)
 			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
 					 0x0df40000);
 		else
@@ -5211,7 +5520,7 @@
 					 0x0df70000);
 	} else { /* HDMI or VGA */
 		/* Use bend source */
-		if (!pipe)
+		if (pipe == PIPE_A)
 			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
 					 0x0df70000);
 		else
@@ -5227,26 +5536,84 @@
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
 
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
+static void chv_update_pll(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = crtc->pipe;
+	int dpll_reg = DPLL(crtc->pipe);
+	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	u32 loopfilter, intcoeff;
+	u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac;
+	int refclk;
+
+	crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV |
+		DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS |
+		DPLL_VCO_ENABLE;
+	if (pipe != PIPE_A)
+		crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
+
+	crtc->config.dpll_hw_state.dpll_md =
+		(crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
+
+	bestn = crtc->config.dpll.n;
+	bestm2_frac = crtc->config.dpll.m2 & 0x3fffff;
+	bestm1 = crtc->config.dpll.m1;
+	bestm2 = crtc->config.dpll.m2 >> 22;
+	bestp1 = crtc->config.dpll.p1;
+	bestp2 = crtc->config.dpll.p2;
 
 	/*
-	 * Enable DPIO clock input. We should never disable the reference
-	 * clock for pipe B, since VGA hotplug / manual detection depends
-	 * on it.
+	 * Enable Refclk and SSC
 	 */
-	dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV |
-		DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV;
-	/* We should never disable this, set it here for state tracking */
-	if (pipe == PIPE_B)
-		dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
-	dpll |= DPLL_VCO_ENABLE;
-	crtc->config.dpll_hw_state.dpll = dpll;
+	I915_WRITE(dpll_reg,
+		   crtc->config.dpll_hw_state.dpll & ~DPLL_VCO_ENABLE);
 
-	dpll_md = (crtc->config.pixel_multiplier - 1)
-		<< DPLL_MD_UDI_MULTIPLIER_SHIFT;
-	crtc->config.dpll_hw_state.dpll_md = dpll_md;
+	mutex_lock(&dev_priv->dpio_lock);
 
-	if (crtc->config.has_dp_encoder)
-		intel_dp_set_m_n(crtc);
+	/* p1 and p2 divider */
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
+			5 << DPIO_CHV_S1_DIV_SHIFT |
+			bestp1 << DPIO_CHV_P1_DIV_SHIFT |
+			bestp2 << DPIO_CHV_P2_DIV_SHIFT |
+			1 << DPIO_CHV_K_DIV_SHIFT);
+
+	/* Feedback post-divider - m2 */
+	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW0(port), bestm2);
+
+	/* Feedback refclk divider - n and m1 */
+	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW1(port),
+			DPIO_CHV_M1_DIV_BY_2 |
+			1 << DPIO_CHV_N_DIV_SHIFT);
+
+	/* M2 fraction division */
+	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW2(port), bestm2_frac);
+
+	/* M2 fraction division enable */
+	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW3(port),
+		       DPIO_CHV_FRAC_DIV_EN |
+		       (2 << DPIO_CHV_FEEDFWD_GAIN_SHIFT));
+
+	/* Loop filter */
+	refclk = i9xx_get_refclk(&crtc->base, 0);
+	loopfilter = 5 << DPIO_CHV_PROP_COEFF_SHIFT |
+		2 << DPIO_CHV_GAIN_CTRL_SHIFT;
+	if (refclk == 100000)
+		intcoeff = 11;
+	else if (refclk == 38400)
+		intcoeff = 10;
+	else
+		intcoeff = 9;
+	loopfilter |= intcoeff << DPIO_CHV_INT_COEFF_SHIFT;
+	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW6(port), loopfilter);
+
+	/* AFC Recal */
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port),
+			vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
+			DPIO_AFC_RECAL);
 
 	mutex_unlock(&dev_priv->dpio_lock);
 }
@@ -5325,9 +5692,6 @@
 			<< DPLL_MD_UDI_MULTIPLIER_SHIFT;
 		crtc->config.dpll_hw_state.dpll_md = dpll_md;
 	}
-
-	if (crtc->config.has_dp_encoder)
-		intel_dp_set_m_n(crtc);
 }
 
 static void i8xx_update_pll(struct intel_crtc *crtc,
@@ -5567,16 +5931,12 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
-	u32 dspcntr;
 	bool ok, has_reduced_clock = false;
 	bool is_lvds = false, is_dsi = false;
 	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
-	int ret;
 
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
 		switch (encoder->type) {
@@ -5592,7 +5952,7 @@
 	}
 
 	if (is_dsi)
-		goto skip_dpll;
+		return 0;
 
 	if (!intel_crtc->config.clock_set) {
 		refclk = i9xx_get_refclk(crtc, num_connectors);
@@ -5637,43 +5997,17 @@
 		i8xx_update_pll(intel_crtc,
 				has_reduced_clock ? &reduced_clock : NULL,
 				num_connectors);
+	} else if (IS_CHERRYVIEW(dev)) {
+		chv_update_pll(intel_crtc);
 	} else if (IS_VALLEYVIEW(dev)) {
 		vlv_update_pll(intel_crtc);
 	} else {
 		i9xx_update_pll(intel_crtc,
 				has_reduced_clock ? &reduced_clock : NULL,
-                                num_connectors);
+				num_connectors);
 	}
 
-skip_dpll:
-	/* Set up the display plane register */
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
-
-	if (!IS_VALLEYVIEW(dev)) {
-		if (pipe == 0)
-			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
-		else
-			dspcntr |= DISPPLANE_SEL_PIPE_B;
-	}
-
-	intel_set_pipe_timings(intel_crtc);
-
-	/* pipesrc and dspsize control the size that is scaled from,
-	 * which should always be the user's requested size.
-	 */
-	I915_WRITE(DSPSIZE(plane),
-		   ((intel_crtc->config.pipe_src_h - 1) << 16) |
-		   (intel_crtc->config.pipe_src_w - 1));
-	I915_WRITE(DSPPOS(plane), 0);
-
-	i9xx_set_pipeconf(intel_crtc);
-
-	I915_WRITE(DSPCNTR(plane), dspcntr);
-	POSTING_READ(DSPCNTR(plane));
-
-	ret = intel_pipe_set_base(crtc, x, y, fb);
-
-	return ret;
+	return 0;
 }
 
 static void i9xx_get_pfit_config(struct intel_crtc *crtc,
@@ -5793,6 +6127,36 @@
 
 }
 
+static void chv_crtc_clock_get(struct intel_crtc *crtc,
+			       struct intel_crtc_config *pipe_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = pipe_config->cpu_transcoder;
+	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	intel_clock_t clock;
+	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2;
+	int refclk = 100000;
+
+	mutex_lock(&dev_priv->dpio_lock);
+	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
+	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
+	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
+	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
+	clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff);
+	clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf;
+	clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7;
+	clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f;
+
+	chv_clock(refclk, &clock);
+
+	/* clock.dot is the fast clock */
+	pipe_config->port_clock = clock.dot / 5;
+}
+
 static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 				 struct intel_crtc_config *pipe_config)
 {
@@ -5827,6 +6191,9 @@
 		}
 	}
 
+	if (IS_VALLEYVIEW(dev) && (tmp & PIPECONF_COLOR_RANGE_SELECT))
+		pipe_config->limited_color_range = true;
+
 	if (INTEL_INFO(dev)->gen < 4)
 		pipe_config->double_wide = tmp & PIPECONF_DOUBLE_WIDE;
 
@@ -5862,7 +6229,9 @@
 						     DPLL_PORTB_READY_MASK);
 	}
 
-	if (IS_VALLEYVIEW(dev))
+	if (IS_CHERRYVIEW(dev))
+		chv_crtc_clock_get(crtc, pipe_config);
+	else if (IS_VALLEYVIEW(dev))
 		vlv_crtc_clock_get(crtc, pipe_config);
 	else
 		i9xx_crtc_clock_get(crtc, pipe_config);
@@ -5983,8 +6352,7 @@
 			if (intel_panel_use_ssc(dev_priv) && can_ssc) {
 				DRM_DEBUG_KMS("Using SSC on eDP\n");
 				val |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
-			}
-			else
+			} else
 				val |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
 		} else
 			val |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
@@ -6563,10 +6931,7 @@
 				  struct drm_framebuffer *fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 	int num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
 	u32 dpll = 0, fp = 0, fp2 = 0;
@@ -6574,7 +6939,6 @@
 	bool is_lvds = false;
 	struct intel_encoder *encoder;
 	struct intel_shared_dpll *pll;
-	int ret;
 
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
 		switch (encoder->type) {
@@ -6624,36 +6988,18 @@
 		pll = intel_get_shared_dpll(intel_crtc);
 		if (pll == NULL) {
 			DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
-					 pipe_name(pipe));
+					 pipe_name(intel_crtc->pipe));
 			return -EINVAL;
 		}
 	} else
 		intel_put_shared_dpll(intel_crtc);
 
-	if (intel_crtc->config.has_dp_encoder)
-		intel_dp_set_m_n(intel_crtc);
-
 	if (is_lvds && has_reduced_clock && i915.powersave)
 		intel_crtc->lowfreq_avail = true;
 	else
 		intel_crtc->lowfreq_avail = false;
 
-	intel_set_pipe_timings(intel_crtc);
-
-	if (intel_crtc->config.has_pch_encoder) {
-		intel_cpu_transcoder_set_m_n(intel_crtc,
-					     &intel_crtc->config.fdi_m_n);
-	}
-
-	ironlake_set_pipeconf(crtc);
-
-	/* Set up the display plane register */
-	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE);
-	POSTING_READ(DSPCNTR(plane));
-
-	ret = intel_pipe_set_base(crtc, x, y, fb);
-
-	return ret;
+	return 0;
 }
 
 static void intel_pch_transcoder_get_m_n(struct intel_crtc *crtc,
@@ -6831,6 +7177,9 @@
 		break;
 	}
 
+	if (tmp & PIPECONF_COLOR_RANGE_SELECT)
+		pipe_config->limited_color_range = true;
+
 	if (I915_READ(PCH_TRANSCONF(crtc->pipe)) & TRANS_ENABLE) {
 		struct intel_shared_dpll *pll;
 
@@ -6880,10 +7229,8 @@
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
 	struct intel_crtc *crtc;
-	unsigned long irqflags;
-	uint32_t val;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+	for_each_intel_crtc(dev, crtc)
 		WARN(crtc->active, "CRTC for pipe %c enabled\n",
 		     pipe_name(crtc->pipe));
 
@@ -6902,14 +7249,29 @@
 	     "Utility pin enabled\n");
 	WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	val = I915_READ(DEIMR);
-	WARN((val | DE_PCH_EVENT_IVB) != 0xffffffff,
-	     "Unexpected DEIMR bits enabled: 0x%x\n", val);
-	val = I915_READ(SDEIMR);
-	WARN((val | SDE_HOTPLUG_MASK_CPT) != 0xffffffff,
-	     "Unexpected SDEIMR bits enabled: 0x%x\n", val);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	/*
+	 * In theory we can still leave IRQs enabled, as long as only the HPD
+	 * interrupts remain enabled. We used to check for that, but since it's
+	 * gen-specific and since we only disable LCPLL after we fully disable
+	 * the interrupts, the check below should be enough.
+	 */
+	WARN(!dev_priv->pm.irqs_disabled, "IRQs enabled\n");
+}
+
+static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (IS_HASWELL(dev)) {
+		mutex_lock(&dev_priv->rps.hw_lock);
+		if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
+					    val))
+			DRM_ERROR("Failed to disable D_COMP\n");
+		mutex_unlock(&dev_priv->rps.hw_lock);
+	} else {
+		I915_WRITE(D_COMP, val);
+	}
+	POSTING_READ(D_COMP);
 }
 
 /*
@@ -6949,11 +7311,7 @@
 
 	val = I915_READ(D_COMP);
 	val |= D_COMP_COMP_DISABLE;
-	mutex_lock(&dev_priv->rps.hw_lock);
-	if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val))
-		DRM_ERROR("Failed to disable D_COMP\n");
-	mutex_unlock(&dev_priv->rps.hw_lock);
-	POSTING_READ(D_COMP);
+	hsw_write_dcomp(dev_priv, val);
 	ndelay(100);
 
 	if (wait_for((I915_READ(D_COMP) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
@@ -7008,11 +7366,7 @@
 	val = I915_READ(D_COMP);
 	val |= D_COMP_COMP_FORCE;
 	val &= ~D_COMP_COMP_DISABLE;
-	mutex_lock(&dev_priv->rps.hw_lock);
-	if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val))
-		DRM_ERROR("Failed to enable D_COMP\n");
-	mutex_unlock(&dev_priv->rps.hw_lock);
-	POSTING_READ(D_COMP);
+	hsw_write_dcomp(dev_priv, val);
 
 	val = I915_READ(LCPLL_CTL);
 	val &= ~LCPLL_PLL_DISABLE;
@@ -7066,8 +7420,6 @@
 	struct drm_device *dev = dev_priv->dev;
 	uint32_t val;
 
-	WARN_ON(!HAS_PC8(dev));
-
 	DRM_DEBUG_KMS("Enabling package C8+\n");
 
 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
@@ -7077,7 +7429,6 @@
 	}
 
 	lpt_disable_clkout_dp(dev);
-	hsw_runtime_pm_disable_interrupts(dev);
 	hsw_disable_lcpll(dev_priv, true, true);
 }
 
@@ -7086,12 +7437,9 @@
 	struct drm_device *dev = dev_priv->dev;
 	uint32_t val;
 
-	WARN_ON(!HAS_PC8(dev));
-
 	DRM_DEBUG_KMS("Disabling package C8+\n");
 
 	hsw_restore_lcpll(dev_priv);
-	hsw_runtime_pm_restore_interrupts(dev);
 	lpt_init_pch_refclk(dev);
 
 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
@@ -7101,10 +7449,11 @@
 	}
 
 	intel_prepare_ddi(dev);
-	i915_gem_init_swizzling(dev);
-	mutex_lock(&dev_priv->rps.hw_lock);
-	gen6_update_ring_freq(dev);
-	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void snb_modeset_global_resources(struct drm_device *dev)
+{
+	modeset_update_crtc_power_domains(dev);
 }
 
 static void haswell_modeset_global_resources(struct drm_device *dev)
@@ -7116,39 +7465,15 @@
 				 int x, int y,
 				 struct drm_framebuffer *fb)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int plane = intel_crtc->plane;
-	int ret;
 
 	if (!intel_ddi_pll_select(intel_crtc))
 		return -EINVAL;
 	intel_ddi_pll_enable(intel_crtc);
 
-	if (intel_crtc->config.has_dp_encoder)
-		intel_dp_set_m_n(intel_crtc);
-
 	intel_crtc->lowfreq_avail = false;
 
-	intel_set_pipe_timings(intel_crtc);
-
-	if (intel_crtc->config.has_pch_encoder) {
-		intel_cpu_transcoder_set_m_n(intel_crtc,
-					     &intel_crtc->config.fdi_m_n);
-	}
-
-	haswell_set_pipeconf(crtc);
-
-	intel_set_pipe_csc(crtc);
-
-	/* Set up the display plane register */
-	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE);
-	POSTING_READ(DSPCNTR(plane));
-
-	ret = intel_pipe_set_base(crtc, x, y, fb);
-
-	return ret;
+	return 0;
 }
 
 static bool haswell_get_pipe_config(struct intel_crtc *crtc,
@@ -7228,38 +7553,6 @@
 	return true;
 }
 
-static int intel_crtc_mode_set(struct drm_crtc *crtc,
-			       int x, int y,
-			       struct drm_framebuffer *fb)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_encoder *encoder;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_display_mode *mode = &intel_crtc->config.requested_mode;
-	int pipe = intel_crtc->pipe;
-	int ret;
-
-	drm_vblank_pre_modeset(dev, pipe);
-
-	ret = dev_priv->display.crtc_mode_set(crtc, x, y, fb);
-
-	drm_vblank_post_modeset(dev, pipe);
-
-	if (ret != 0)
-		return ret;
-
-	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		DRM_DEBUG_KMS("[ENCODER:%d:%s] set [MODE:%d:%s]\n",
-			encoder->base.base.id,
-			drm_get_encoder_name(&encoder->base),
-			mode->base.id, mode->name);
-		encoder->mode_set(encoder);
-	}
-
-	return 0;
-}
-
 static struct {
 	int clock;
 	u32 config;
@@ -7374,8 +7667,6 @@
 {
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	uint8_t *eld = connector->eld;
-	struct drm_device *dev = crtc->dev;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t eldv;
 	uint32_t i;
 	int len;
@@ -7387,17 +7678,14 @@
 	int aud_config = HSW_AUD_CFG(pipe);
 	int aud_cntrl_st2 = HSW_AUD_PIN_ELD_CP_VLD;
 
-
-	DRM_DEBUG_DRIVER("HDMI: Haswell Audio initialize....\n");
-
 	/* Audio output enable */
 	DRM_DEBUG_DRIVER("HDMI audio: enable codec\n");
 	tmp = I915_READ(aud_cntrl_st2);
 	tmp |= (AUDIO_OUTPUT_ENABLE_A << (pipe * 4));
 	I915_WRITE(aud_cntrl_st2, tmp);
+	POSTING_READ(aud_cntrl_st2);
 
-	/* Wait for 1 vertical blank */
-	intel_wait_for_vblank(dev, pipe);
+	assert_pipe_disabled(dev_priv, to_intel_crtc(crtc)->pipe);
 
 	/* Set ELD valid state */
 	tmp = I915_READ(aud_cntrl_st2);
@@ -7417,7 +7705,6 @@
 	DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(pipe));
 
 	eldv = AUDIO_ELD_VALID_A << (pipe * 4);
-	intel_crtc->eld_vld = true;
 
 	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
 		DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n");
@@ -7564,9 +7851,9 @@
 
 	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 			 connector->base.id,
-			 drm_get_connector_name(connector),
+			 connector->name,
 			 connector->encoder->base.id,
-			 drm_get_encoder_name(connector->encoder));
+			 connector->encoder->name);
 
 	connector->eld[6] = drm_av_sync_delay(connector, mode) / 2;
 
@@ -7579,29 +7866,33 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	bool visible = base != 0;
-	u32 cntl;
+	uint32_t cntl;
 
-	if (intel_crtc->cursor_visible == visible)
-		return;
-
-	cntl = I915_READ(_CURACNTR);
-	if (visible) {
+	if (base != intel_crtc->cursor_base) {
 		/* On these chipsets we can only modify the base whilst
 		 * the cursor is disabled.
 		 */
+		if (intel_crtc->cursor_cntl) {
+			I915_WRITE(_CURACNTR, 0);
+			POSTING_READ(_CURACNTR);
+			intel_crtc->cursor_cntl = 0;
+		}
+
 		I915_WRITE(_CURABASE, base);
+		POSTING_READ(_CURABASE);
+	}
 
-		cntl &= ~(CURSOR_FORMAT_MASK);
-		/* XXX width must be 64, stride 256 => 0x00 << 28 */
-		cntl |= CURSOR_ENABLE |
+	/* XXX width must be 64, stride 256 => 0x00 << 28 */
+	cntl = 0;
+	if (base)
+		cntl = (CURSOR_ENABLE |
 			CURSOR_GAMMA_ENABLE |
-			CURSOR_FORMAT_ARGB;
-	} else
-		cntl &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE);
-	I915_WRITE(_CURACNTR, cntl);
-
-	intel_crtc->cursor_visible = visible;
+			CURSOR_FORMAT_ARGB);
+	if (intel_crtc->cursor_cntl != cntl) {
+		I915_WRITE(_CURACNTR, cntl);
+		POSTING_READ(_CURACNTR);
+		intel_crtc->cursor_cntl = cntl;
+	}
 }
 
 static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base)
@@ -7610,16 +7901,12 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	bool visible = base != 0;
+	uint32_t cntl;
 
-	if (intel_crtc->cursor_visible != visible) {
-		int16_t width = intel_crtc->cursor_width;
-		uint32_t cntl = I915_READ(CURCNTR(pipe));
-		if (base) {
-			cntl &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT);
-			cntl |= MCURSOR_GAMMA_ENABLE;
-
-			switch (width) {
+	cntl = 0;
+	if (base) {
+		cntl = MCURSOR_GAMMA_ENABLE;
+		switch (intel_crtc->cursor_width) {
 			case 64:
 				cntl |= CURSOR_MODE_64_ARGB_AX;
 				break;
@@ -7632,18 +7919,16 @@
 			default:
 				WARN_ON(1);
 				return;
-			}
-			cntl |= pipe << 28; /* Connect to correct pipe */
-		} else {
-			cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE);
-			cntl |= CURSOR_MODE_DISABLE;
 		}
-		I915_WRITE(CURCNTR(pipe), cntl);
-
-		intel_crtc->cursor_visible = visible;
+		cntl |= pipe << 28; /* Connect to correct pipe */
 	}
+	if (intel_crtc->cursor_cntl != cntl) {
+		I915_WRITE(CURCNTR(pipe), cntl);
+		POSTING_READ(CURCNTR(pipe));
+		intel_crtc->cursor_cntl = cntl;
+	}
+
 	/* and commit changes on next vblank */
-	POSTING_READ(CURCNTR(pipe));
 	I915_WRITE(CURBASE(pipe), base);
 	POSTING_READ(CURBASE(pipe));
 }
@@ -7654,15 +7939,12 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	bool visible = base != 0;
+	uint32_t cntl;
 
-	if (intel_crtc->cursor_visible != visible) {
-		int16_t width = intel_crtc->cursor_width;
-		uint32_t cntl = I915_READ(CURCNTR_IVB(pipe));
-		if (base) {
-			cntl &= ~CURSOR_MODE;
-			cntl |= MCURSOR_GAMMA_ENABLE;
-			switch (width) {
+	cntl = 0;
+	if (base) {
+		cntl = MCURSOR_GAMMA_ENABLE;
+		switch (intel_crtc->cursor_width) {
 			case 64:
 				cntl |= CURSOR_MODE_64_ARGB_AX;
 				break;
@@ -7675,23 +7957,20 @@
 			default:
 				WARN_ON(1);
 				return;
-			}
-		} else {
-			cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE);
-			cntl |= CURSOR_MODE_DISABLE;
 		}
-		if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
-			cntl |= CURSOR_PIPE_CSC_ENABLE;
-			cntl &= ~CURSOR_TRICKLE_FEED_DISABLE;
-		}
-		I915_WRITE(CURCNTR_IVB(pipe), cntl);
-
-		intel_crtc->cursor_visible = visible;
 	}
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+		cntl |= CURSOR_PIPE_CSC_ENABLE;
+
+	if (intel_crtc->cursor_cntl != cntl) {
+		I915_WRITE(CURCNTR(pipe), cntl);
+		POSTING_READ(CURCNTR(pipe));
+		intel_crtc->cursor_cntl = cntl;
+	}
+
 	/* and commit changes on next vblank */
-	POSTING_READ(CURCNTR_IVB(pipe));
-	I915_WRITE(CURBASE_IVB(pipe), base);
-	POSTING_READ(CURBASE_IVB(pipe));
+	I915_WRITE(CURBASE(pipe), base);
+	POSTING_READ(CURBASE(pipe));
 }
 
 /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */
@@ -7705,7 +7984,6 @@
 	int x = intel_crtc->cursor_x;
 	int y = intel_crtc->cursor_y;
 	u32 base = 0, pos = 0;
-	bool visible;
 
 	if (on)
 		base = intel_crtc->cursor_addr;
@@ -7734,20 +8012,18 @@
 	}
 	pos |= y << CURSOR_Y_SHIFT;
 
-	visible = base != 0;
-	if (!visible && !intel_crtc->cursor_visible)
+	if (base == 0 && intel_crtc->cursor_base == 0)
 		return;
 
-	if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev)) {
-		I915_WRITE(CURPOS_IVB(pipe), pos);
+	I915_WRITE(CURPOS(pipe), pos);
+
+	if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev))
 		ivb_update_cursor(crtc, base);
-	} else {
-		I915_WRITE(CURPOS(pipe), pos);
-		if (IS_845G(dev) || IS_I865G(dev))
-			i845_update_cursor(crtc, base);
-		else
-			i9xx_update_cursor(crtc, base);
-	}
+	else if (IS_845G(dev) || IS_I865G(dev))
+		i845_update_cursor(crtc, base);
+	else
+		i9xx_update_cursor(crtc, base);
+	intel_crtc->cursor_base = base;
 }
 
 static int intel_crtc_cursor_set(struct drm_crtc *crtc,
@@ -8015,7 +8291,8 @@
 
 bool intel_get_load_detect_pipe(struct drm_connector *connector,
 				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old)
+				struct intel_load_detect_pipe *old,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_crtc *intel_crtc;
 	struct intel_encoder *intel_encoder =
@@ -8025,11 +8302,19 @@
 	struct drm_crtc *crtc = NULL;
 	struct drm_device *dev = encoder->dev;
 	struct drm_framebuffer *fb;
-	int i = -1;
+	struct drm_mode_config *config = &dev->mode_config;
+	int ret, i = -1;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector),
-		      encoder->base.id, drm_get_encoder_name(encoder));
+		      connector->base.id, connector->name,
+		      encoder->base.id, encoder->name);
+
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail_unlock;
 
 	/*
 	 * Algorithm gets a little messy:
@@ -8045,7 +8330,9 @@
 	if (encoder->crtc) {
 		crtc = encoder->crtc;
 
-		mutex_lock(&crtc->mutex);
+		ret = drm_modeset_lock(&crtc->mutex, ctx);
+		if (ret)
+			goto fail_unlock;
 
 		old->dpms_mode = connector->dpms;
 		old->load_detect_temp = false;
@@ -8058,7 +8345,7 @@
 	}
 
 	/* Find an unused one (if possible) */
-	list_for_each_entry(possible_crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, possible_crtc) {
 		i++;
 		if (!(encoder->possible_crtcs & (1 << i)))
 			continue;
@@ -8073,10 +8360,12 @@
 	 */
 	if (!crtc) {
 		DRM_DEBUG_KMS("no pipe available for load-detect\n");
-		return false;
+		goto fail_unlock;
 	}
 
-	mutex_lock(&crtc->mutex);
+	ret = drm_modeset_lock(&crtc->mutex, ctx);
+	if (ret)
+		goto fail_unlock;
 	intel_encoder->new_crtc = to_intel_crtc(crtc);
 	to_intel_connector(connector)->new_encoder = intel_encoder;
 
@@ -8126,12 +8415,21 @@
 		intel_crtc->new_config = &intel_crtc->config;
 	else
 		intel_crtc->new_config = NULL;
-	mutex_unlock(&crtc->mutex);
+fail_unlock:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
 	return false;
 }
 
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old)
+				    struct intel_load_detect_pipe *old,
+				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *intel_encoder =
 		intel_attached_encoder(connector);
@@ -8140,8 +8438,8 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector),
-		      encoder->base.id, drm_get_encoder_name(encoder));
+		      connector->base.id, connector->name,
+		      encoder->base.id, encoder->name);
 
 	if (old->load_detect_temp) {
 		to_intel_connector(connector)->new_encoder = NULL;
@@ -8155,7 +8453,7 @@
 			drm_framebuffer_unreference(old->release_fb);
 		}
 
-		mutex_unlock(&crtc->mutex);
+		goto unlock;
 		return;
 	}
 
@@ -8163,7 +8461,9 @@
 	if (old->dpms_mode != DRM_MODE_DPMS_ON)
 		connector->funcs->dpms(connector, old->dpms_mode);
 
-	mutex_unlock(&crtc->mutex);
+unlock:
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -8449,7 +8749,7 @@
 	if (!i915.powersave)
 		goto out;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		if (!crtc->primary->fb)
 			continue;
 
@@ -8464,7 +8764,7 @@
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_ring_buffer *ring)
+			struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_crtc *crtc;
@@ -8472,7 +8772,7 @@
 	if (!i915.powersave)
 		return;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		if (!crtc->primary->fb)
 			continue;
 
@@ -8560,7 +8860,7 @@
 	if (work->event)
 		drm_send_vblank_event(dev, intel_crtc->pipe, work->event);
 
-	drm_vblank_put(dev, intel_crtc->pipe);
+	drm_crtc_vblank_put(crtc);
 
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
@@ -8587,6 +8887,48 @@
 	do_intel_finish_page_flip(dev, crtc);
 }
 
+/* Is 'a' after or equal to 'b'? */
+static bool g4x_flip_count_after_eq(u32 a, u32 b)
+{
+	return !((a - b) & 0x80000000);
+}
+
+static bool page_flip_finished(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * The relevant registers doen't exist on pre-ctg.
+	 * As the flip done interrupt doesn't trigger for mmio
+	 * flips on gmch platforms, a flip count check isn't
+	 * really needed there. But since ctg has the registers,
+	 * include it in the check anyway.
+	 */
+	if (INTEL_INFO(dev)->gen < 5 && !IS_G4X(dev))
+		return true;
+
+	/*
+	 * A DSPSURFLIVE check isn't enough in case the mmio and CS flips
+	 * used the same base address. In that case the mmio flip might
+	 * have completed, but the CS hasn't even executed the flip yet.
+	 *
+	 * A flip count check isn't enough as the CS might have updated
+	 * the base address just after start of vblank, but before we
+	 * managed to process the interrupt. This means we'd complete the
+	 * CS flip too soon.
+	 *
+	 * Combining both checks should get us a good enough result. It may
+	 * still happen that the CS flip has been executed, but has not
+	 * yet actually completed. But in case the base address is the same
+	 * anyway, we don't really care.
+	 */
+	return (I915_READ(DSPSURFLIVE(crtc->plane)) & ~0xfff) ==
+		crtc->unpin_work->gtt_offset &&
+		g4x_flip_count_after_eq(I915_READ(PIPE_FLIPCOUNT_GM45(crtc->pipe)),
+				    crtc->unpin_work->flip_count);
+}
+
 void intel_prepare_page_flip(struct drm_device *dev, int plane)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -8599,12 +8941,12 @@
 	 * is also accompanied by a spurious intel_prepare_page_flip().
 	 */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work)
+	if (intel_crtc->unpin_work && page_flip_finished(intel_crtc))
 		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
-inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
+static inline void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
 {
 	/* Ensure that the work item is consistent when activating it ... */
 	smp_wmb();
@@ -8617,21 +8959,16 @@
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
+				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	u32 flip_mask;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto err;
-
 	ret = intel_ring_begin(ring, 6);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	/* Can't queue multiple flips, so wait for the previous
 	 * one to finish before executing the next.
@@ -8645,38 +8982,28 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
-	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
 
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
 	return 0;
-
-err_unpin:
-	intel_unpin_fb_obj(obj);
-err:
-	return ret;
 }
 
 static int intel_gen3_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
+				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	u32 flip_mask;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto err;
-
 	ret = intel_ring_begin(ring, 6);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	if (intel_crtc->plane)
 		flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
@@ -8687,38 +9014,29 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
-	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
 	return 0;
-
-err_unpin:
-	intel_unpin_fb_obj(obj);
-err:
-	return ret;
 }
 
 static int intel_gen4_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
+				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t pf, pipesrc;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto err;
-
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	/* i965+ uses the linear or tiled offsets from the
 	 * Display Registers (which do not change across a page-flip)
@@ -8727,8 +9045,7 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
-	intel_ring_emit(ring,
-			(i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset) |
+	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset |
 			obj->tiling_mode);
 
 	/* XXX Enabling the panel-fitter across page-flip is so far
@@ -8742,37 +9059,28 @@
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
 	return 0;
-
-err_unpin:
-	intel_unpin_fb_obj(obj);
-err:
-	return ret;
 }
 
 static int intel_gen6_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
+				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
 	uint32_t pf, pipesrc;
 	int ret;
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto err;
-
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode);
-	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
 
 	/* Contrary to the suggestions in the documentation,
 	 * "Enable Panel Fitter" does not seem to be required when page
@@ -8787,34 +9095,20 @@
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
 	return 0;
-
-err_unpin:
-	intel_unpin_fb_obj(obj);
-err:
-	return ret;
 }
 
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
+				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_ring_buffer *ring;
 	uint32_t plane_bit = 0;
 	int len, ret;
 
-	ring = obj->ring;
-	if (IS_VALLEYVIEW(dev) || ring == NULL || ring->id != RCS)
-		ring = &dev_priv->ring[BCS];
-
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto err;
-
-	switch(intel_crtc->plane) {
+	switch (intel_crtc->plane) {
 	case PLANE_A:
 		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
 		break;
@@ -8826,13 +9120,20 @@
 		break;
 	default:
 		WARN_ONCE(1, "unknown plane in flip command\n");
-		ret = -ENODEV;
-		goto err_unpin;
+		return -ENODEV;
 	}
 
 	len = 4;
-	if (ring->id == RCS)
+	if (ring->id == RCS) {
 		len += 6;
+		/*
+		 * On Gen 8, SRM is now taking an extra dword to accommodate
+		 * 48bits addresses, and we need a NOOP for the batch size to
+		 * stay even.
+		 */
+		if (IS_GEN8(dev))
+			len += 2;
+	}
 
 	/*
 	 * BSpec MI_DISPLAY_FLIP for IVB:
@@ -8846,11 +9147,11 @@
 	 */
 	ret = intel_ring_cacheline_align(ring);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	ret = intel_ring_begin(ring, len);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	/* Unmask the flip-done completion message. Note that the bspec says that
 	 * we should do this for both the BCS and RCS, and that we must not unmask
@@ -8867,31 +9168,35 @@
 		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 					DERRMR_PIPEB_PRI_FLIP_DONE |
 					DERRMR_PIPEC_PRI_FLIP_DONE));
-		intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) |
-				MI_SRM_LRM_GLOBAL_GTT);
+		if (IS_GEN8(dev))
+			intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) |
+					      MI_SRM_LRM_GLOBAL_GTT);
+		else
+			intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) |
+					      MI_SRM_LRM_GLOBAL_GTT);
 		intel_ring_emit(ring, DERRMR);
 		intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
+		if (IS_GEN8(dev)) {
+			intel_ring_emit(ring, 0);
+			intel_ring_emit(ring, MI_NOOP);
+		}
 	}
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
-	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
 	intel_ring_emit(ring, (MI_NOOP));
 
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
 	return 0;
-
-err_unpin:
-	intel_unpin_fb_obj(obj);
-err:
-	return ret;
 }
 
 static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
 				    struct drm_i915_gem_object *obj,
+				    struct intel_engine_cs *ring,
 				    uint32_t flags)
 {
 	return -ENODEV;
@@ -8908,6 +9213,7 @@
 	struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)->obj;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_unpin_work *work;
+	struct intel_engine_cs *ring;
 	unsigned long flags;
 	int ret;
 
@@ -8936,7 +9242,7 @@
 	work->old_fb_obj = to_intel_framebuffer(old_fb)->obj;
 	INIT_WORK(&work->work, intel_unpin_work_fn);
 
-	ret = drm_vblank_get(dev, intel_crtc->pipe);
+	ret = drm_crtc_vblank_get(crtc);
 	if (ret)
 		goto free_work;
 
@@ -8945,7 +9251,7 @@
 	if (intel_crtc->unpin_work) {
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		kfree(work);
-		drm_vblank_put(dev, intel_crtc->pipe);
+		drm_crtc_vblank_put(crtc);
 
 		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
 		return -EBUSY;
@@ -8973,10 +9279,30 @@
 	atomic_inc(&intel_crtc->unpin_work_count);
 	intel_crtc->reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 
-	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, page_flip_flags);
+	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
+		work->flip_count = I915_READ(PIPE_FLIPCOUNT_GM45(intel_crtc->pipe)) + 1;
+
+	if (IS_VALLEYVIEW(dev)) {
+		ring = &dev_priv->ring[BCS];
+	} else if (INTEL_INFO(dev)->gen >= 7) {
+		ring = obj->ring;
+		if (ring == NULL || ring->id != RCS)
+			ring = &dev_priv->ring[BCS];
+	} else {
+		ring = &dev_priv->ring[RCS];
+	}
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
 	if (ret)
 		goto cleanup_pending;
 
+	work->gtt_offset =
+		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+
+	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring, page_flip_flags);
+	if (ret)
+		goto cleanup_unpin;
+
 	intel_disable_fbc(dev);
 	intel_mark_fb_busy(obj, NULL);
 	mutex_unlock(&dev->struct_mutex);
@@ -8985,6 +9311,8 @@
 
 	return 0;
 
+cleanup_unpin:
+	intel_unpin_fb_obj(obj);
 cleanup_pending:
 	atomic_dec(&intel_crtc->unpin_work_count);
 	crtc->primary->fb = old_fb;
@@ -8997,7 +9325,7 @@
 	intel_crtc->unpin_work = NULL;
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	drm_vblank_put(dev, intel_crtc->pipe);
+	drm_crtc_vblank_put(crtc);
 free_work:
 	kfree(work);
 
@@ -9040,8 +9368,7 @@
 			to_intel_crtc(encoder->base.crtc);
 	}
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = crtc->base.enabled;
 
 		if (crtc->new_enabled)
@@ -9072,21 +9399,20 @@
 		encoder->base.crtc = &encoder->new_crtc->base;
 	}
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		crtc->base.enabled = crtc->new_enabled;
 	}
 }
 
 static void
-connected_sink_compute_bpp(struct intel_connector * connector,
+connected_sink_compute_bpp(struct intel_connector *connector,
 			   struct intel_crtc_config *pipe_config)
 {
 	int bpp = pipe_config->pipe_bpp;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] checking for sink bpp constrains\n",
 		connector->base.base.id,
-		drm_get_connector_name(&connector->base));
+		connector->base.name);
 
 	/* Don't use an invalid EDID bpc value */
 	if (connector->base.display_info.bpc &&
@@ -9427,8 +9753,7 @@
 	}
 
 	/* Check for pipes that will be enabled/disabled ... */
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, intel_crtc) {
 		if (intel_crtc->base.enabled == intel_crtc->new_enabled)
 			continue;
 
@@ -9501,8 +9826,7 @@
 	intel_modeset_commit_output_state(dev);
 
 	/* Double check state. */
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, intel_crtc) {
 		WARN_ON(intel_crtc->base.enabled != intel_crtc_in_use(&intel_crtc->base));
 		WARN_ON(intel_crtc->new_config &&
 			intel_crtc->new_config != &intel_crtc->config);
@@ -9631,6 +9955,12 @@
 	PIPE_CONF_CHECK_I(adjusted_mode.crtc_vsync_end);
 
 	PIPE_CONF_CHECK_I(pixel_multiplier);
+	PIPE_CONF_CHECK_I(has_hdmi_sink);
+	if ((INTEL_INFO(dev)->gen < 8 && !IS_HASWELL(dev)) ||
+	    IS_VALLEYVIEW(dev))
+		PIPE_CONF_CHECK_I(limited_color_range);
+
+	PIPE_CONF_CHECK_I(has_audio);
 
 	PIPE_CONF_CHECK_FLAGS(adjusted_mode.flags,
 			      DRM_MODE_FLAG_INTERLACE);
@@ -9728,7 +10058,7 @@
 
 		DRM_DEBUG_KMS("[ENCODER:%d:%s]\n",
 			      encoder->base.base.id,
-			      drm_get_encoder_name(&encoder->base));
+			      encoder->base.name);
 
 		WARN(&encoder->new_crtc->base != encoder->base.crtc,
 		     "encoder's stage crtc doesn't match current crtc\n");
@@ -9780,8 +10110,7 @@
 	struct intel_encoder *encoder;
 	struct intel_crtc_config pipe_config;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		bool enabled = false;
 		bool active = false;
 
@@ -9870,8 +10199,7 @@
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     pll->on, active);
 
-		list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-				    base.head) {
+		for_each_intel_crtc(dev, crtc) {
 			if (crtc->base.enabled && intel_crtc_to_shared_dpll(crtc) == pll)
 				enabled_crtcs++;
 			if (crtc->active && intel_crtc_to_shared_dpll(crtc) == pll)
@@ -9911,6 +10239,44 @@
 	     pipe_config->adjusted_mode.crtc_clock, dotclock);
 }
 
+static void update_scanline_offset(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+
+	/*
+	 * The scanline counter increments at the leading edge of hsync.
+	 *
+	 * On most platforms it starts counting from vtotal-1 on the
+	 * first active line. That means the scanline counter value is
+	 * always one less than what we would expect. Ie. just after
+	 * start of vblank, which also occurs at start of hsync (on the
+	 * last active line), the scanline counter will read vblank_start-1.
+	 *
+	 * On gen2 the scanline counter starts counting from 1 instead
+	 * of vtotal-1, so we have to subtract one (or rather add vtotal-1
+	 * to keep the value positive), instead of adding one.
+	 *
+	 * On HSW+ the behaviour of the scanline counter depends on the output
+	 * type. For DP ports it behaves like most other platforms, but on HDMI
+	 * there's an extra 1 line difference. So we need to add two instead of
+	 * one to the value.
+	 */
+	if (IS_GEN2(dev)) {
+		const struct drm_display_mode *mode = &crtc->config.adjusted_mode;
+		int vtotal;
+
+		vtotal = mode->crtc_vtotal;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			vtotal /= 2;
+
+		crtc->scanline_offset = vtotal - 1;
+	} else if (HAS_DDI(dev) &&
+		   intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI)) {
+		crtc->scanline_offset = 2;
+	} else
+		crtc->scanline_offset = 1;
+}
+
 static int __intel_set_mode(struct drm_crtc *crtc,
 			    struct drm_display_mode *mode,
 			    int x, int y, struct drm_framebuffer *fb)
@@ -10002,15 +10368,38 @@
 	 * on the DPLL.
 	 */
 	for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) {
-		ret = intel_crtc_mode_set(&intel_crtc->base,
-					  x, y, fb);
+		struct drm_framebuffer *old_fb;
+
+		mutex_lock(&dev->struct_mutex);
+		ret = intel_pin_and_fence_fb_obj(dev,
+						 to_intel_framebuffer(fb)->obj,
+						 NULL);
+		if (ret != 0) {
+			DRM_ERROR("pin & fence failed\n");
+			mutex_unlock(&dev->struct_mutex);
+			goto done;
+		}
+		old_fb = crtc->primary->fb;
+		if (old_fb)
+			intel_unpin_fb_obj(to_intel_framebuffer(old_fb)->obj);
+		mutex_unlock(&dev->struct_mutex);
+
+		crtc->primary->fb = fb;
+		crtc->x = x;
+		crtc->y = y;
+
+		ret = dev_priv->display.crtc_mode_set(&intel_crtc->base,
+						      x, y, fb);
 		if (ret)
 			goto done;
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
-	for_each_intel_crtc_masked(dev, prepare_pipes, intel_crtc)
+	for_each_intel_crtc_masked(dev, prepare_pipes, intel_crtc) {
+		update_scanline_offset(intel_crtc);
+
 		dev_priv->display.crtc_enable(&intel_crtc->base);
+	}
 
 	/* FIXME: add subpixel order */
 done:
@@ -10086,7 +10475,7 @@
 	 * restored, not the drivers personal bookkeeping.
 	 */
 	count = 0;
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		config->save_crtc_enabled[count++] = crtc->enabled;
 	}
 
@@ -10112,7 +10501,7 @@
 	int count;
 
 	count = 0;
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = config->save_crtc_enabled[count++];
 
 		if (crtc->new_enabled)
@@ -10236,7 +10625,7 @@
 
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [NOCRTC]\n",
 				connector->base.base.id,
-				drm_get_connector_name(&connector->base));
+				connector->base.name);
 		}
 
 
@@ -10271,7 +10660,7 @@
 
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d]\n",
 			connector->base.base.id,
-			drm_get_connector_name(&connector->base),
+			connector->base.name,
 			new_crtc->base.id);
 	}
 
@@ -10302,8 +10691,7 @@
 	}
 	/* Now we've also updated encoder->new_crtc for all encoders. */
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = false;
 
 		list_for_each_entry(encoder,
@@ -10516,7 +10904,7 @@
 	struct intel_crtc *crtc;
 
 	/* Make sure no transcoder isn't still depending on us. */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		if (intel_crtc_to_shared_dpll(crtc) == pll)
 			assert_pch_transcoder_disabled(dev_priv, crtc->pipe);
 	}
@@ -10573,16 +10961,6 @@
 
 	drm_crtc_init(dev, &intel_crtc->base, &intel_crtc_funcs);
 
-	if (IS_GEN2(dev)) {
-		intel_crtc->max_cursor_width = GEN2_CURSOR_WIDTH;
-		intel_crtc->max_cursor_height = GEN2_CURSOR_HEIGHT;
-	} else {
-		intel_crtc->max_cursor_width = CURSOR_WIDTH;
-		intel_crtc->max_cursor_height = CURSOR_HEIGHT;
-	}
-	dev->mode_config.cursor_width = intel_crtc->max_cursor_width;
-	dev->mode_config.cursor_height = intel_crtc->max_cursor_height;
-
 	drm_mode_crtc_set_gamma_size(&intel_crtc->base, 256);
 	for (i = 0; i < 256; i++) {
 		intel_crtc->lut_r[i] = i;
@@ -10601,19 +10979,27 @@
 		intel_crtc->plane = !pipe;
 	}
 
+	intel_crtc->cursor_base = ~0;
+	intel_crtc->cursor_cntl = ~0;
+
+	init_waitqueue_head(&intel_crtc->vbl_wait);
+
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
 	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
 	dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
 	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
 
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
+
+	WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
 }
 
 enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
 {
 	struct drm_encoder *encoder = connector->base.encoder;
+	struct drm_device *dev = connector->base.dev;
 
-	WARN_ON(!mutex_is_locked(&connector->base.dev->mode_config.mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!encoder)
 		return INVALID_PIPE;
@@ -10709,7 +11095,7 @@
 
 	intel_lvds_init(dev);
 
-	if (!IS_ULT(dev))
+	if (!IS_ULT(dev) && !IS_CHERRYVIEW(dev) && dev_priv->vbt.int_crt_support)
 		intel_crt_init(dev);
 
 	if (HAS_DDI(dev)) {
@@ -10773,6 +11159,15 @@
 				intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
 		}
 
+		if (IS_CHERRYVIEW(dev)) {
+			if (I915_READ(VLV_DISPLAY_BASE + CHV_HDMID) & SDVO_DETECTED) {
+				intel_hdmi_init(dev, VLV_DISPLAY_BASE + CHV_HDMID,
+						PORT_D);
+				if (I915_READ(VLV_DISPLAY_BASE + DP_D) & DP_DETECTED)
+					intel_dp_init(dev, VLV_DISPLAY_BASE + DP_D, PORT_D);
+			}
+		}
+
 		intel_dsi_init(dev);
 	} else if (SUPPORTS_DIGITAL_OUTPUTS(dev)) {
 		bool found = false;
@@ -11002,6 +11397,8 @@
 
 	if (HAS_PCH_SPLIT(dev) || IS_G4X(dev))
 		dev_priv->display.find_dpll = g4x_find_best_dpll;
+	else if (IS_CHERRYVIEW(dev))
+		dev_priv->display.find_dpll = chv_find_best_dpll;
 	else if (IS_VALLEYVIEW(dev))
 		dev_priv->display.find_dpll = vlv_find_best_dpll;
 	else if (IS_PINEVIEW(dev))
@@ -11083,6 +11480,8 @@
 		} else if (IS_GEN6(dev)) {
 			dev_priv->display.fdi_link_train = gen6_fdi_link_train;
 			dev_priv->display.write_eld = ironlake_write_eld;
+			dev_priv->display.modeset_global_resources =
+				snb_modeset_global_resources;
 		} else if (IS_IVYBRIDGE(dev)) {
 			/* FIXME: detect B0+ stepping and use auto training */
 			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
@@ -11211,9 +11610,6 @@
 	/* ThinkPad T60 needs pipe A force quirk (bug #16494) */
 	{ 0x2782, 0x17aa, 0x201a, quirk_pipea_force },
 
-	/* 830 needs to leave pipe A & dpll A up */
-	{ 0x3577, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
-
 	/* Lenovo U160 cannot use SSC on LVDS */
 	{ 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable },
 
@@ -11287,9 +11683,7 @@
 
 	intel_reset_dpio(dev);
 
-	mutex_lock(&dev->struct_mutex);
 	intel_enable_gt_powersave(dev);
-	mutex_unlock(&dev->struct_mutex);
 }
 
 void intel_modeset_suspend_hw(struct drm_device *dev)
@@ -11333,6 +11727,15 @@
 		dev->mode_config.max_width = 8192;
 		dev->mode_config.max_height = 8192;
 	}
+
+	if (IS_GEN2(dev)) {
+		dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH;
+		dev->mode_config.cursor_height = GEN2_CURSOR_HEIGHT;
+	} else {
+		dev->mode_config.cursor_width = MAX_CURSOR_WIDTH;
+		dev->mode_config.cursor_height = MAX_CURSOR_HEIGHT;
+	}
+
 	dev->mode_config.fb_base = dev_priv->gtt.mappable_base;
 
 	DRM_DEBUG_KMS("%d display pipe%s available.\n",
@@ -11362,12 +11765,11 @@
 	/* Just in case the BIOS is doing something questionable. */
 	intel_disable_fbc(dev);
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock_all(dev);
 	intel_modeset_setup_hw_state(dev, false);
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock_all(dev);
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		if (!crtc->active)
 			continue;
 
@@ -11395,6 +11797,7 @@
 	struct intel_connector *connector;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
+	struct drm_modeset_acquire_ctx ctx;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
 	 * proper mode and output configuration. As a gross hack, enable pipe A
@@ -11411,8 +11814,8 @@
 	if (!crt)
 		return;
 
-	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp))
-		intel_release_load_detect_pipe(crt, &load_detect_temp);
+	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, &ctx))
+		intel_release_load_detect_pipe(crt, &load_detect_temp, &ctx);
 
 
 }
@@ -11447,6 +11850,12 @@
 	reg = PIPECONF(crtc->config.cpu_transcoder);
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
+	/* restore vblank interrupts to correct state */
+	if (crtc->active)
+		drm_vblank_on(dev, crtc->pipe);
+	else
+		drm_vblank_off(dev, crtc->pipe);
+
 	/* We need to sanitize the plane -> pipe mapping first because this will
 	 * disable the crtc (and hence change the state) if it is wrong. Note
 	 * that gen4+ has a fixed plane -> pipe mapping.  */
@@ -11525,16 +11934,25 @@
 			encoder->base.crtc = NULL;
 		}
 	}
-	if (crtc->active) {
+
+	if (crtc->active || IS_VALLEYVIEW(dev) || INTEL_INFO(dev)->gen < 5) {
 		/*
 		 * We start out with underrun reporting disabled to avoid races.
 		 * For correct bookkeeping mark this on active crtcs.
 		 *
+		 * Also on gmch platforms we dont have any hardware bits to
+		 * disable the underrun reporting. Which means we need to start
+		 * out with underrun reporting disabled also on inactive pipes,
+		 * since otherwise we'll complain about the garbage we read when
+		 * e.g. coming up after runtime pm.
+		 *
 		 * No protection against concurrent access is required - at
 		 * worst a fifo underrun happens which also sets this to false.
 		 */
 		crtc->cpu_fifo_underrun_disabled = true;
 		crtc->pch_fifo_underrun_disabled = true;
+
+		update_scanline_offset(crtc);
 	}
 }
 
@@ -11552,7 +11970,7 @@
 	if (encoder->connectors_active && !has_active_crtc) {
 		DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n",
 			      encoder->base.base.id,
-			      drm_get_encoder_name(&encoder->base));
+			      encoder->base.name);
 
 		/* Connector is active, but has no active pipe. This is
 		 * fallout from our resume register restoring. Disable
@@ -11560,7 +11978,7 @@
 		if (encoder->base.crtc) {
 			DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
 				      encoder->base.base.id,
-				      drm_get_encoder_name(&encoder->base));
+				      encoder->base.name);
 			encoder->disable(encoder);
 		}
 		encoder->base.crtc = NULL;
@@ -11611,6 +12029,16 @@
 	i915_redisable_vga_power_on(dev);
 }
 
+static bool primary_get_hw_state(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (!crtc->active)
+		return false;
+
+	return I915_READ(DSPCNTR(crtc->plane)) & DISPLAY_PLANE_ENABLE;
+}
+
 static void intel_modeset_readout_hw_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -11620,8 +12048,7 @@
 	struct intel_connector *connector;
 	int i;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		memset(&crtc->config, 0, sizeof(crtc->config));
 
 		crtc->config.quirks |= PIPE_CONFIG_QUIRK_INHERITED_MODE;
@@ -11630,7 +12057,7 @@
 								 &crtc->config);
 
 		crtc->base.enabled = crtc->active;
-		crtc->primary_enabled = crtc->active;
+		crtc->primary_enabled = primary_get_hw_state(crtc);
 
 		DRM_DEBUG_KMS("[CRTC:%d] hw state readout: %s\n",
 			      crtc->base.base.id,
@@ -11646,8 +12073,7 @@
 
 		pll->on = pll->get_hw_state(dev_priv, pll, &pll->hw_state);
 		pll->active = 0;
-		list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-				    base.head) {
+		for_each_intel_crtc(dev, crtc) {
 			if (crtc->active && intel_crtc_to_shared_dpll(crtc) == pll)
 				pll->active++;
 		}
@@ -11672,7 +12098,7 @@
 		encoder->connectors_active = false;
 		DRM_DEBUG_KMS("[ENCODER:%d:%s] hw state readout: %s, pipe %c\n",
 			      encoder->base.base.id,
-			      drm_get_encoder_name(&encoder->base),
+			      encoder->base.name,
 			      encoder->base.crtc ? "enabled" : "disabled",
 			      pipe_name(pipe));
 	}
@@ -11689,7 +12115,7 @@
 		}
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] hw state readout: %s\n",
 			      connector->base.base.id,
-			      drm_get_connector_name(&connector->base),
+			      connector->base.name,
 			      connector->base.encoder ? "enabled" : "disabled");
 	}
 }
@@ -11712,8 +12138,7 @@
 	 * Note that this could go away if we move to using crtc_config
 	 * checking everywhere.
 	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
-			    base.head) {
+	for_each_intel_crtc(dev, crtc) {
 		if (crtc->active && i915.fastboot) {
 			intel_mode_from_pipe_config(&crtc->base.mode, &crtc->config);
 			DRM_DEBUG_KMS("[CRTC:%d] found active mode: ",
@@ -11789,7 +12214,7 @@
 	 * for this.
 	 */
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(c, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, c) {
 		if (!c->primary->fb)
 			continue;
 
@@ -11835,7 +12260,7 @@
 
 	intel_unregister_dsm_handler();
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		/* Skip inactive CRTCs */
 		if (!crtc->primary->fb)
 			continue;
@@ -11933,6 +12358,7 @@
 	struct intel_pipe_error_state {
 		bool power_domain_on;
 		u32 source;
+		u32 stat;
 	} pipe[I915_MAX_PIPES];
 
 	struct intel_plane_error_state {
@@ -11990,15 +12416,9 @@
 		if (!error->pipe[i].power_domain_on)
 			continue;
 
-		if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) {
-			error->cursor[i].control = I915_READ(CURCNTR(i));
-			error->cursor[i].position = I915_READ(CURPOS(i));
-			error->cursor[i].base = I915_READ(CURBASE(i));
-		} else {
-			error->cursor[i].control = I915_READ(CURCNTR_IVB(i));
-			error->cursor[i].position = I915_READ(CURPOS_IVB(i));
-			error->cursor[i].base = I915_READ(CURBASE_IVB(i));
-		}
+		error->cursor[i].control = I915_READ(CURCNTR(i));
+		error->cursor[i].position = I915_READ(CURPOS(i));
+		error->cursor[i].base = I915_READ(CURBASE(i));
 
 		error->plane[i].control = I915_READ(DSPCNTR(i));
 		error->plane[i].stride = I915_READ(DSPSTRIDE(i));
@@ -12014,6 +12434,9 @@
 		}
 
 		error->pipe[i].source = I915_READ(PIPESRC(i));
+
+		if (!HAS_PCH_SPLIT(dev))
+			error->pipe[i].stat = I915_READ(PIPESTAT(i));
 	}
 
 	error->num_transcoders = INTEL_INFO(dev)->num_pipes;
@@ -12064,6 +12487,7 @@
 		err_printf(m, "  Power: %s\n",
 			   error->pipe[i].power_domain_on ? "on" : "off");
 		err_printf(m, "  SRC: %08x\n", error->pipe[i].source);
+		err_printf(m, "  STAT: %08x\n", error->pipe[i].stat);
 
 		err_printf(m, "Plane [%d]:\n", i);
 		err_printf(m, "  CNTR: %08x\n", error->plane[i].control);

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2a00cb8..52fda95 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c

@@ -64,6 +64,24 @@
 		{ .p1 = 2, .p2 = 2, .n = 1, .m1 = 2, .m2 = 27 } }
 };
 
+/*
+ * CHV supports eDP 1.4 that have  more link rates.
+ * Below only provides the fixed rate but exclude variable rate.
+ */
+static const struct dp_link_dpll chv_dpll[] = {
+	/*
+	 * CHV requires to program fractional division for m2.
+	 * m2 is stored in fixed point format using formula below
+	 * (m2_int << 22) | m2_fraction
+	 */
+	{ DP_LINK_BW_1_62,	/* m2_int = 32, m2_fraction = 1677722 */
+		{ .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x819999a } },
+	{ DP_LINK_BW_2_7,	/* m2_int = 27, m2_fraction = 0 */
+		{ .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } },
+	{ DP_LINK_BW_5_4,	/* m2_int = 27, m2_fraction = 0 */
+		{ .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } }
+};
+
 /**
  * is_edp - is the given port attached to an eDP panel (either CPU or PCH)
  * @intel_dp: DP struct
@@ -330,8 +348,12 @@
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
+	enum intel_display_power_domain power_domain;
 
-	return !dev_priv->pm.suspended &&
+	power_domain = intel_display_port_power_domain(intel_encoder);
+	return intel_display_power_enabled(dev_priv, power_domain) &&
 	       (I915_READ(_pp_ctrl_reg(intel_dp)) & EDP_FORCE_VDD) != 0;
 }
 
@@ -697,9 +719,9 @@
 	DRM_DEBUG_KMS("registering %s bus for %s\n", name,
 		      connector->base.kdev->kobj.name);
 
-	ret = drm_dp_aux_register_i2c_bus(&intel_dp->aux);
+	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (ret < 0) {
-		DRM_ERROR("drm_dp_aux_register_i2c_bus() for %s failed (%d)\n",
+		DRM_ERROR("drm_dp_aux_register() for %s failed (%d)\n",
 			  name, ret);
 		return;
 	}
@@ -709,7 +731,7 @@
 				intel_dp->aux.ddc.dev.kobj.name);
 	if (ret < 0) {
 		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n", name, ret);
-		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+		drm_dp_aux_unregister(&intel_dp->aux);
 	}
 }
 
@@ -739,6 +761,9 @@
 	} else if (HAS_PCH_SPLIT(dev)) {
 		divisor = pch_dpll;
 		count = ARRAY_SIZE(pch_dpll);
+	} else if (IS_CHERRYVIEW(dev)) {
+		divisor = chv_dpll;
+		count = ARRAY_SIZE(chv_dpll);
 	} else if (IS_VALLEYVIEW(dev)) {
 		divisor = vlv_dpll;
 		count = ARRAY_SIZE(vlv_dpll);
@@ -755,6 +780,20 @@
 	}
 }
 
+static void
+intel_dp_set_m2_n2(struct intel_crtc *crtc, struct intel_link_m_n *m_n)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum transcoder transcoder = crtc->config.cpu_transcoder;
+
+	I915_WRITE(PIPE_DATA_M2(transcoder),
+		TU_SIZE(m_n->tu) | m_n->gmch_m);
+	I915_WRITE(PIPE_DATA_N2(transcoder), m_n->gmch_n);
+	I915_WRITE(PIPE_LINK_M2(transcoder), m_n->link_m);
+	I915_WRITE(PIPE_LINK_N2(transcoder), m_n->link_n);
+}
+
 bool
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_config *pipe_config)
@@ -780,6 +819,7 @@
 		pipe_config->has_pch_encoder = true;
 
 	pipe_config->has_dp_encoder = true;
+	pipe_config->has_audio = intel_dp->has_audio;
 
 	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
 		intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
@@ -880,6 +920,14 @@
 			       pipe_config->port_clock,
 			       &pipe_config->dp_m_n);
 
+	if (intel_connector->panel.downclock_mode != NULL &&
+		intel_dp->drrs_state.type == SEAMLESS_DRRS_SUPPORT) {
+			intel_link_compute_m_n(bpp, lane_count,
+				intel_connector->panel.downclock_mode->clock,
+				pipe_config->port_clock,
+				&pipe_config->dp_m2_n2);
+	}
+
 	intel_dp_set_clock(encoder, pipe_config, intel_dp->link_bw);
 
 	return true;
@@ -915,7 +963,7 @@
 	udelay(500);
 }
 
-static void intel_dp_mode_set(struct intel_encoder *encoder)
+static void intel_dp_prepare(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -950,7 +998,7 @@
 	intel_dp->DP |= DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0;
 	intel_dp->DP |= DP_PORT_WIDTH(intel_dp->lane_count);
 
-	if (intel_dp->has_audio) {
+	if (crtc->config.has_audio) {
 		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
 				 pipe_name(crtc->pipe));
 		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
@@ -983,14 +1031,15 @@
 		if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
 			intel_dp->DP |= DP_ENHANCED_FRAMING;
 
-		if (crtc->pipe == 1)
-			intel_dp->DP |= DP_PIPEB_SELECT;
+		if (!IS_CHERRYVIEW(dev)) {
+			if (crtc->pipe == 1)
+				intel_dp->DP |= DP_PIPEB_SELECT;
+		} else {
+			intel_dp->DP |= DP_PIPE_SELECT_CHV(crtc->pipe);
+		}
 	} else {
 		intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
 	}
-
-	if (port == PORT_A && !IS_VALLEYVIEW(dev))
-		ironlake_set_pll_cpu_edp(intel_dp);
 }
 
 #define IDLE_ON_MASK		(PP_ON | PP_SEQUENCE_MASK | 0                     | PP_SEQUENCE_STATE_MASK)
@@ -1082,7 +1131,10 @@
 static bool _edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	u32 pp;
 	u32 pp_stat_reg, pp_ctrl_reg;
 	bool need_to_disable = !intel_dp->want_panel_vdd;
@@ -1095,7 +1147,8 @@
 	if (edp_have_panel_vdd(intel_dp))
 		return need_to_disable;
 
-	intel_runtime_pm_get(dev_priv);
+	power_domain = intel_display_port_power_domain(intel_encoder);
+	intel_display_power_get(dev_priv, power_domain);
 
 	DRM_DEBUG_KMS("Turning eDP VDD on\n");
 
@@ -1139,9 +1192,14 @@
 	u32 pp;
 	u32 pp_stat_reg, pp_ctrl_reg;
 
-	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!intel_dp->want_panel_vdd && edp_have_panel_vdd(intel_dp)) {
+		struct intel_digital_port *intel_dig_port =
+						dp_to_dig_port(intel_dp);
+		struct intel_encoder *intel_encoder = &intel_dig_port->base;
+		enum intel_display_power_domain power_domain;
+
 		DRM_DEBUG_KMS("Turning eDP VDD off\n");
 
 		pp = ironlake_get_pp_control(intel_dp);
@@ -1160,7 +1218,8 @@
 		if ((pp & POWER_TARGET_ON) == 0)
 			intel_dp->last_power_cycle = jiffies;
 
-		intel_runtime_pm_put(dev_priv);
+		power_domain = intel_display_port_power_domain(intel_encoder);
+		intel_display_power_put(dev_priv, power_domain);
 	}
 }
 
@@ -1170,9 +1229,9 @@
 						 struct intel_dp, panel_vdd_work);
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	edp_panel_vdd_off_sync(intel_dp);
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 }
 
 static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
@@ -1244,8 +1303,11 @@
 
 void intel_edp_panel_off(struct intel_dp *intel_dp)
 {
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	u32 pp;
 	u32 pp_ctrl_reg;
 
@@ -1275,7 +1337,8 @@
 	wait_panel_off(intel_dp);
 
 	/* We got a reference when we enabled the VDD. */
-	intel_runtime_pm_put(dev_priv);
+	power_domain = intel_display_port_power_domain(intel_encoder);
+	intel_display_power_put(dev_priv, power_domain);
 }
 
 void intel_edp_backlight_on(struct intel_dp *intel_dp)
@@ -1432,6 +1495,8 @@
 
 	if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
 		*pipe = PORT_TO_PIPE_CPT(tmp);
+	} else if (IS_CHERRYVIEW(dev)) {
+		*pipe = DP_PORT_TO_PIPE_CHV(tmp);
 	} else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
 		*pipe = PORT_TO_PIPE(tmp);
 	} else {
@@ -1479,8 +1544,11 @@
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	int dotclock;
 
+	tmp = I915_READ(intel_dp->output_reg);
+	if (tmp & DP_AUDIO_OUTPUT_ENABLE)
+		pipe_config->has_audio = true;
+
 	if ((port == PORT_A) || !HAS_PCH_CPT(dev)) {
-		tmp = I915_READ(intel_dp->output_reg);
 		if (tmp & DP_SYNC_HS_HIGH)
 			flags |= DRM_MODE_FLAG_PHSYNC;
 		else
@@ -1816,17 +1884,59 @@
 		intel_dp_link_down(intel_dp);
 }
 
-static void intel_post_disable_dp(struct intel_encoder *encoder)
+static void g4x_post_disable_dp(struct intel_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	enum port port = dp_to_dig_port(intel_dp)->port;
-	struct drm_device *dev = encoder->base.dev;
 
-	if (port == PORT_A || IS_VALLEYVIEW(dev)) {
-		intel_dp_link_down(intel_dp);
-		if (!IS_VALLEYVIEW(dev))
-			ironlake_edp_pll_off(intel_dp);
-	}
+	if (port != PORT_A)
+		return;
+
+	intel_dp_link_down(intel_dp);
+	ironlake_edp_pll_off(intel_dp);
+}
+
+static void vlv_post_disable_dp(struct intel_encoder *encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+
+	intel_dp_link_down(intel_dp);
+}
+
+static void chv_post_disable_dp(struct intel_encoder *encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc =
+		to_intel_crtc(encoder->base.crtc);
+	enum dpio_channel ch = vlv_dport_to_channel(dport);
+	enum pipe pipe = intel_crtc->pipe;
+	u32 val;
+
+	intel_dp_link_down(intel_dp);
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Propagate soft reset to data lane reset */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch));
+	val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch));
+	val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
+
+	mutex_unlock(&dev_priv->dpio_lock);
 }
 
 static void intel_enable_dp(struct intel_encoder *encoder)
@@ -1868,8 +1978,13 @@
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 
-	if (dport->port == PORT_A)
+	intel_dp_prepare(encoder);
+
+	/* Only ilk+ has port A */
+	if (dport->port == PORT_A) {
+		ironlake_set_pll_cpu_edp(intel_dp);
 		ironlake_edp_pll_on(intel_dp);
+	}
 }
 
 static void vlv_pre_enable_dp(struct intel_encoder *encoder)
@@ -1921,6 +2036,8 @@
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	int pipe = intel_crtc->pipe;
 
+	intel_dp_prepare(encoder);
+
 	/* Program Tx lane resets to default */
 	mutex_lock(&dev_priv->dpio_lock);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
@@ -1939,6 +2056,69 @@
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 
+static void chv_pre_enable_dp(struct intel_encoder *encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct edp_power_seq power_seq;
+	struct intel_crtc *intel_crtc =
+		to_intel_crtc(encoder->base.crtc);
+	enum dpio_channel ch = vlv_dport_to_channel(dport);
+	int pipe = intel_crtc->pipe;
+	int data, i;
+	u32 val;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Deassert soft data lane reset*/
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch));
+	val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch));
+	val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
+
+	/* Program Tx lane latency optimal setting*/
+	for (i = 0; i < 4; i++) {
+		/* Set the latency optimal bit */
+		data = (i == 1) ? 0x0 : 0x6;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW11(ch, i),
+				data << DPIO_FRC_LATENCY_SHFIT);
+
+		/* Set the upar bit */
+		data = (i == 1) ? 0x0 : 0x1;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW14(ch, i),
+				data << DPIO_UPAR_SHIFT);
+	}
+
+	/* Data lane stagger programming */
+	/* FIXME: Fix up value only after power analysis */
+
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	if (is_edp(intel_dp)) {
+		/* init power sequencer on this pipe and port */
+		intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq);
+		intel_dp_init_panel_power_sequencer_registers(dev, intel_dp,
+							      &power_seq);
+	}
+
+	intel_enable_dp(encoder);
+
+	vlv_wait_port_ready(dev_priv, dport);
+}
+
 /*
  * Native read with retry for link status and receiver capability reads for
  * cases where the sink may still be asleep.
@@ -2163,6 +2343,166 @@
 	return 0;
 }
 
+static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	struct intel_crtc *intel_crtc = to_intel_crtc(dport->base.base.crtc);
+	u32 deemph_reg_value, margin_reg_value, val;
+	uint8_t train_set = intel_dp->train_set[0];
+	enum dpio_channel ch = vlv_dport_to_channel(dport);
+	enum pipe pipe = intel_crtc->pipe;
+	int i;
+
+	switch (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) {
+	case DP_TRAIN_PRE_EMPHASIS_0:
+		switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) {
+		case DP_TRAIN_VOLTAGE_SWING_400:
+			deemph_reg_value = 128;
+			margin_reg_value = 52;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_600:
+			deemph_reg_value = 128;
+			margin_reg_value = 77;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_800:
+			deemph_reg_value = 128;
+			margin_reg_value = 102;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_1200:
+			deemph_reg_value = 128;
+			margin_reg_value = 154;
+			/* FIXME extra to set for 1200 */
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case DP_TRAIN_PRE_EMPHASIS_3_5:
+		switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) {
+		case DP_TRAIN_VOLTAGE_SWING_400:
+			deemph_reg_value = 85;
+			margin_reg_value = 78;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_600:
+			deemph_reg_value = 85;
+			margin_reg_value = 116;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_800:
+			deemph_reg_value = 85;
+			margin_reg_value = 154;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case DP_TRAIN_PRE_EMPHASIS_6:
+		switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) {
+		case DP_TRAIN_VOLTAGE_SWING_400:
+			deemph_reg_value = 64;
+			margin_reg_value = 104;
+			break;
+		case DP_TRAIN_VOLTAGE_SWING_600:
+			deemph_reg_value = 64;
+			margin_reg_value = 154;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case DP_TRAIN_PRE_EMPHASIS_9_5:
+		switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) {
+		case DP_TRAIN_VOLTAGE_SWING_400:
+			deemph_reg_value = 43;
+			margin_reg_value = 154;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Clear calc init */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+	val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+
+	/* Program swing deemph */
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW4(ch, i));
+		val &= ~DPIO_SWING_DEEMPH9P5_MASK;
+		val |= deemph_reg_value << DPIO_SWING_DEEMPH9P5_SHIFT;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW4(ch, i), val);
+	}
+
+	/* Program swing margin */
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
+		val &= ~DPIO_SWING_MARGIN_MASK;
+		val |= margin_reg_value << DPIO_SWING_MARGIN_SHIFT;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
+	}
+
+	/* Disable unique transition scale */
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i));
+		val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val);
+	}
+
+	if (((train_set & DP_TRAIN_PRE_EMPHASIS_MASK)
+			== DP_TRAIN_PRE_EMPHASIS_0) &&
+		((train_set & DP_TRAIN_VOLTAGE_SWING_MASK)
+			== DP_TRAIN_VOLTAGE_SWING_1200)) {
+
+		/*
+		 * The document said it needs to set bit 27 for ch0 and bit 26
+		 * for ch1. Might be a typo in the doc.
+		 * For now, for this unique transition scale selection, set bit
+		 * 27 for ch0 and ch1.
+		 */
+		for (i = 0; i < 4; i++) {
+			val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i));
+			val |= DPIO_TX_UNIQ_TRANS_SCALE_EN;
+			vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val);
+		}
+
+		for (i = 0; i < 4; i++) {
+			val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
+			val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT);
+			val |= (0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT);
+			vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
+		}
+	}
+
+	/* Start swing calculation */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+	val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+
+	/* LRC Bypass */
+	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
+	val |= DPIO_LRC_BYPASS;
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
+
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	return 0;
+}
+
 static void
 intel_get_adjust_train(struct intel_dp *intel_dp,
 		       const uint8_t link_status[DP_LINK_STATUS_SIZE])
@@ -2377,6 +2717,9 @@
 	} else if (IS_HASWELL(dev)) {
 		signal_levels = intel_hsw_signal_levels(train_set);
 		mask = DDI_BUF_EMP_MASK;
+	} else if (IS_CHERRYVIEW(dev)) {
+		signal_levels = intel_chv_signal_levels(intel_dp);
+		mask = 0;
 	} else if (IS_VALLEYVIEW(dev)) {
 		signal_levels = intel_vlv_signal_levels(intel_dp);
 		mask = 0;
@@ -2743,22 +3086,7 @@
 		to_intel_crtc(intel_dig_port->base.base.crtc);
 	uint32_t DP = intel_dp->DP;
 
-	/*
-	 * DDI code has a strict mode set sequence and we should try to respect
-	 * it, otherwise we might hang the machine in many different ways. So we
-	 * really should be disabling the port only on a complete crtc_disable
-	 * sequence. This function is just called under two conditions on DDI
-	 * code:
-	 * - Link train failed while doing crtc_enable, and on this case we
-	 *   really should respect the mode set sequence and wait for a
-	 *   crtc_disable.
-	 * - Someone turned the monitor off and intel_dp_check_link_status
-	 *   called us. We don't need to disable the whole port on this case, so
-	 *   when someone turns the monitor on again,
-	 *   intel_ddi_prepare_link_retrain will take care of redoing the link
-	 *   train.
-	 */
-	if (HAS_DDI(dev))
+	if (WARN_ON(HAS_DDI(dev)))
 		return;
 
 	if (WARN_ON((I915_READ(intel_dp->output_reg) & DP_PORT_EN) == 0))
@@ -2775,9 +3103,6 @@
 	}
 	POSTING_READ(intel_dp->output_reg);
 
-	/* We don't really know why we're doing this */
-	intel_wait_for_vblank(dev, intel_crtc->pipe);
-
 	if (HAS_PCH_IBX(dev) &&
 	    I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
 		struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
@@ -2948,6 +3273,7 @@
 	u8 sink_irq_vector;
 	u8 link_status[DP_LINK_STATUS_SIZE];
 
+	/* FIXME: This access isn't protected by any locks. */
 	if (!intel_encoder->connectors_active)
 		return;
 
@@ -2980,7 +3306,7 @@
 
 	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
 		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
-			      drm_get_encoder_name(&intel_encoder->base));
+			      intel_encoder->base.name);
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_complete_link_train(intel_dp);
 		intel_dp_stop_link_train(intel_dp);
@@ -3166,7 +3492,7 @@
 	intel_display_power_get(dev_priv, power_domain);
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	intel_dp->has_audio = false;
 
@@ -3374,13 +3700,13 @@
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+	drm_dp_aux_unregister(&intel_dp->aux);
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-		mutex_lock(&dev->mode_config.mutex);
+		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 		edp_panel_vdd_off_sync(intel_dp);
-		mutex_unlock(&dev->mode_config.mutex);
+		drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	}
 	kfree(intel_dig_port);
 }
@@ -3651,6 +3977,130 @@
 		      I915_READ(pp_div_reg));
 }
 
+void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_encoder *encoder;
+	struct intel_dp *intel_dp = NULL;
+	struct intel_crtc_config *config = NULL;
+	struct intel_crtc *intel_crtc = NULL;
+	struct intel_connector *intel_connector = dev_priv->drrs.connector;
+	u32 reg, val;
+	enum edp_drrs_refresh_rate_type index = DRRS_HIGH_RR;
+
+	if (refresh_rate <= 0) {
+		DRM_DEBUG_KMS("Refresh rate should be positive non-zero.\n");
+		return;
+	}
+
+	if (intel_connector == NULL) {
+		DRM_DEBUG_KMS("DRRS supported for eDP only.\n");
+		return;
+	}
+
+	if (INTEL_INFO(dev)->gen < 8 && intel_edp_is_psr_enabled(dev)) {
+		DRM_DEBUG_KMS("DRRS is disabled as PSR is enabled\n");
+		return;
+	}
+
+	encoder = intel_attached_encoder(&intel_connector->base);
+	intel_dp = enc_to_intel_dp(&encoder->base);
+	intel_crtc = encoder->new_crtc;
+
+	if (!intel_crtc) {
+		DRM_DEBUG_KMS("DRRS: intel_crtc not initialized\n");
+		return;
+	}
+
+	config = &intel_crtc->config;
+
+	if (intel_dp->drrs_state.type < SEAMLESS_DRRS_SUPPORT) {
+		DRM_DEBUG_KMS("Only Seamless DRRS supported.\n");
+		return;
+	}
+
+	if (intel_connector->panel.downclock_mode->vrefresh == refresh_rate)
+		index = DRRS_LOW_RR;
+
+	if (index == intel_dp->drrs_state.refresh_rate_type) {
+		DRM_DEBUG_KMS(
+			"DRRS requested for previously set RR...ignoring\n");
+		return;
+	}
+
+	if (!intel_crtc->active) {
+		DRM_DEBUG_KMS("eDP encoder disabled. CRTC not Active\n");
+		return;
+	}
+
+	if (INTEL_INFO(dev)->gen > 6 && INTEL_INFO(dev)->gen < 8) {
+		reg = PIPECONF(intel_crtc->config.cpu_transcoder);
+		val = I915_READ(reg);
+		if (index > DRRS_HIGH_RR) {
+			val |= PIPECONF_EDP_RR_MODE_SWITCH;
+			intel_dp_set_m2_n2(intel_crtc, &config->dp_m2_n2);
+		} else {
+			val &= ~PIPECONF_EDP_RR_MODE_SWITCH;
+		}
+		I915_WRITE(reg, val);
+	}
+
+	/*
+	 * mutex taken to ensure that there is no race between differnt
+	 * drrs calls trying to update refresh rate. This scenario may occur
+	 * in future when idleness detection based DRRS in kernel and
+	 * possible calls from user space to set differnt RR are made.
+	 */
+
+	mutex_lock(&intel_dp->drrs_state.mutex);
+
+	intel_dp->drrs_state.refresh_rate_type = index;
+
+	mutex_unlock(&intel_dp->drrs_state.mutex);
+
+	DRM_DEBUG_KMS("eDP Refresh Rate set to : %dHz\n", refresh_rate);
+}
+
+static struct drm_display_mode *
+intel_dp_drrs_init(struct intel_digital_port *intel_dig_port,
+			struct intel_connector *intel_connector,
+			struct drm_display_mode *fixed_mode)
+{
+	struct drm_connector *connector = &intel_connector->base;
+	struct intel_dp *intel_dp = &intel_dig_port->dp;
+	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_display_mode *downclock_mode = NULL;
+
+	if (INTEL_INFO(dev)->gen <= 6) {
+		DRM_DEBUG_KMS("DRRS supported for Gen7 and above\n");
+		return NULL;
+	}
+
+	if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) {
+		DRM_INFO("VBT doesn't support DRRS\n");
+		return NULL;
+	}
+
+	downclock_mode = intel_find_panel_downclock
+					(dev, fixed_mode, connector);
+
+	if (!downclock_mode) {
+		DRM_INFO("DRRS not supported\n");
+		return NULL;
+	}
+
+	dev_priv->drrs.connector = intel_connector;
+
+	mutex_init(&intel_dp->drrs_state.mutex);
+
+	intel_dp->drrs_state.type = dev_priv->vbt.drrs_type;
+
+	intel_dp->drrs_state.refresh_rate_type = DRRS_HIGH_RR;
+	DRM_INFO("seamless DRRS supported for eDP panel.\n");
+	return downclock_mode;
+}
+
 static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 				     struct intel_connector *intel_connector,
 				     struct edp_power_seq *power_seq)
@@ -3661,10 +4111,13 @@
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_display_mode *fixed_mode = NULL;
+	struct drm_display_mode *downclock_mode = NULL;
 	bool has_dpcd;
 	struct drm_display_mode *scan;
 	struct edid *edid;
 
+	intel_dp->drrs_state.type = DRRS_NOT_SUPPORTED;
+
 	if (!is_edp(intel_dp))
 		return true;
 
@@ -3715,6 +4168,9 @@
 	list_for_each_entry(scan, &connector->probed_modes, head) {
 		if ((scan->type & DRM_MODE_TYPE_PREFERRED)) {
 			fixed_mode = drm_mode_duplicate(dev, scan);
+			downclock_mode = intel_dp_drrs_init(
+						intel_dig_port,
+						intel_connector, fixed_mode);
 			break;
 		}
 	}
@@ -3728,7 +4184,7 @@
 	}
 	mutex_unlock(&dev->mode_config.mutex);
 
-	intel_panel_init(&intel_connector->panel, fixed_mode, NULL);
+	intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode);
 	intel_panel_setup_backlight(connector);
 
 	return true;
@@ -3826,12 +4282,12 @@
 	intel_dp->psr_setup_done = false;
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector, &power_seq)) {
-		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+		drm_dp_aux_unregister(&intel_dp->aux);
 		if (is_edp(intel_dp)) {
 			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-			mutex_lock(&dev->mode_config.mutex);
+			drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 			edp_panel_vdd_off_sync(intel_dp);
-			mutex_unlock(&dev->mode_config.mutex);
+			drm_modeset_unlock(&dev->mode_config.connection_mutex);
 		}
 		drm_sysfs_connector_remove(connector);
 		drm_connector_cleanup(connector);
@@ -3877,25 +4333,36 @@
 			 DRM_MODE_ENCODER_TMDS);
 
 	intel_encoder->compute_config = intel_dp_compute_config;
-	intel_encoder->mode_set = intel_dp_mode_set;
 	intel_encoder->disable = intel_disable_dp;
-	intel_encoder->post_disable = intel_post_disable_dp;
 	intel_encoder->get_hw_state = intel_dp_get_hw_state;
 	intel_encoder->get_config = intel_dp_get_config;
-	if (IS_VALLEYVIEW(dev)) {
+	if (IS_CHERRYVIEW(dev)) {
+		intel_encoder->pre_enable = chv_pre_enable_dp;
+		intel_encoder->enable = vlv_enable_dp;
+		intel_encoder->post_disable = chv_post_disable_dp;
+	} else if (IS_VALLEYVIEW(dev)) {
 		intel_encoder->pre_pll_enable = vlv_dp_pre_pll_enable;
 		intel_encoder->pre_enable = vlv_pre_enable_dp;
 		intel_encoder->enable = vlv_enable_dp;
+		intel_encoder->post_disable = vlv_post_disable_dp;
 	} else {
 		intel_encoder->pre_enable = g4x_pre_enable_dp;
 		intel_encoder->enable = g4x_enable_dp;
+		intel_encoder->post_disable = g4x_post_disable_dp;
 	}
 
 	intel_dig_port->port = port;
 	intel_dig_port->dp.output_reg = output_reg;
 
 	intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	if (IS_CHERRYVIEW(dev)) {
+		if (port == PORT_D)
+			intel_encoder->crtc_mask = 1 << 2;
+		else
+			intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	} else {
+		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	}
 	intel_encoder->cloneable = 0;
 	intel_encoder->hot_plug = intel_dp_hot_plug;
 

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 328b1a7..bda0ae3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h

@@ -81,8 +81,8 @@
 /* Maximum cursor sizes */
 #define GEN2_CURSOR_WIDTH 64
 #define GEN2_CURSOR_HEIGHT 64
-#define CURSOR_WIDTH 256
-#define CURSOR_HEIGHT 256
+#define MAX_CURSOR_WIDTH 256
+#define MAX_CURSOR_HEIGHT 256
 
 #define INTEL_I2C_BUS_DVO 1
 #define INTEL_I2C_BUS_SDVO 2
@@ -106,8 +106,8 @@
 #define INTEL_DVO_CHIP_TMDS 2
 #define INTEL_DVO_CHIP_TVOUT 4
 
-#define INTEL_DSI_COMMAND_MODE	0
-#define INTEL_DSI_VIDEO_MODE	1
+#define INTEL_DSI_VIDEO_MODE	0
+#define INTEL_DSI_COMMAND_MODE	1
 
 struct intel_framebuffer {
 	struct drm_framebuffer base;
@@ -273,6 +273,13 @@
 	 * accordingly. */
 	bool has_dp_encoder;
 
+	/* Whether we should send NULL infoframes. Required for audio. */
+	bool has_hdmi_sink;
+
+	/* Audio enabled on this pipe. Only valid if either has_hdmi_sink or
+	 * has_dp_encoder is set. */
+	bool has_audio;
+
 	/*
 	 * Enable dithering, used when the selected pipe bpp doesn't match the
 	 * plane bpp.
@@ -306,6 +313,9 @@
 	int pipe_bpp;
 	struct intel_link_m_n dp_m_n;
 
+	/* m2_n2 for eDP downclock */
+	struct intel_link_m_n dp_m2_n2;
+
 	/*
 	 * Frequence the dpll for the port should run at. Differs from the
 	 * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also
@@ -343,6 +353,9 @@
 	struct intel_wm_level wm[5];
 	uint32_t linetime;
 	bool fbc_wm_enabled;
+	bool pipe_enabled;
+	bool sprites_enabled;
+	bool sprites_scaled;
 };
 
 struct intel_crtc {
@@ -357,7 +370,6 @@
 	 */
 	bool active;
 	unsigned long enabled_power_domains;
-	bool eld_vld;
 	bool primary_enabled; /* is the primary plane (partially) visible? */
 	bool lowfreq_avail;
 	struct intel_overlay *overlay;
@@ -374,8 +386,8 @@
 	uint32_t cursor_addr;
 	int16_t cursor_x, cursor_y;
 	int16_t cursor_width, cursor_height;
-	int16_t max_cursor_width, max_cursor_height;
-	bool cursor_visible;
+	uint32_t cursor_cntl;
+	uint32_t cursor_base;
 
 	struct intel_plane_config plane_config;
 	struct intel_crtc_config config;
@@ -396,6 +408,10 @@
 		/* watermarks currently being used  */
 		struct intel_pipe_wm active;
 	} wm;
+
+	wait_queue_head_t vbl_wait;
+
+	int scanline_offset;
 };
 
 struct intel_plane_wm_parameters {
@@ -479,11 +495,23 @@
 				enum hdmi_infoframe_type type,
 				const void *frame, ssize_t len);
 	void (*set_infoframes)(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode);
 };
 
 #define DP_MAX_DOWNSTREAM_PORTS		0x10
 
+/**
+ * HIGH_RR is the highest eDP panel refresh rate read from EDID
+ * LOW_RR is the lowest eDP panel refresh rate found from EDID
+ * parsing for same resolution.
+ */
+enum edp_drrs_refresh_rate_type {
+	DRRS_HIGH_RR,
+	DRRS_LOW_RR,
+	DRRS_MAX_RR, /* RR count */
+};
+
 struct intel_dp {
 	uint32_t output_reg;
 	uint32_t aux_ch_ctl_reg;
@@ -522,6 +550,12 @@
 				     bool has_aux_irq,
 				     int send_bytes,
 				     uint32_t aux_clock_divider);
+	struct {
+		enum drrs_support_type type;
+		enum edp_drrs_refresh_rate_type refresh_rate_type;
+		struct mutex mutex;
+	} drrs_state;
+
 };
 
 struct intel_digital_port {
@@ -537,6 +571,7 @@
 {
 	switch (dport->port) {
 	case PORT_B:
+	case PORT_D:
 		return DPIO_CH0;
 	case PORT_C:
 		return DPIO_CH1;
@@ -545,6 +580,20 @@
 	}
 }
 
+static inline int
+vlv_pipe_to_channel(enum pipe pipe)
+{
+	switch (pipe) {
+	case PIPE_A:
+	case PIPE_C:
+		return DPIO_CH0;
+	case PIPE_B:
+		return DPIO_CH1;
+	default:
+		BUG();
+	}
+}
+
 static inline struct drm_crtc *
 intel_get_crtc_for_pipe(struct drm_device *dev, int pipe)
 {
@@ -569,6 +618,8 @@
 #define INTEL_FLIP_INACTIVE	0
 #define INTEL_FLIP_PENDING	1
 #define INTEL_FLIP_COMPLETE	2
+	u32 flip_count;
+	u32 gtt_offset;
 	bool enable_stall_check;
 };
 
@@ -620,8 +671,6 @@
 /* i915_irq.c */
 bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
 					   enum pipe pipe, bool enable);
-bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
-					     enum pipe pipe, bool enable);
 bool intel_set_pch_fifo_underrun_reporting(struct drm_device *dev,
 					   enum transcoder pch_transcoder,
 					   bool enable);
@@ -629,8 +678,12 @@
 void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
-void hsw_runtime_pm_disable_interrupts(struct drm_device *dev);
-void hsw_runtime_pm_restore_interrupts(struct drm_device *dev);
+void bdw_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+void bdw_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+void intel_runtime_pm_disable_interrupts(struct drm_device *dev);
+void intel_runtime_pm_restore_interrupts(struct drm_device *dev);
+int intel_get_crtc_scanline(struct intel_crtc *crtc);
+void i9xx_check_fifo_underruns(struct drm_device *dev);
 
 
 /* intel_crt.c */
@@ -666,9 +719,10 @@
 const char *intel_output_name(int output);
 bool intel_has_pending_fb_unpin(struct drm_device *dev);
 int intel_pch_rawclk(struct drm_device *dev);
+int valleyview_cur_cdclk(struct drm_i915_private *dev_priv);
 void intel_mark_busy(struct drm_device *dev);
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_ring_buffer *ring);
+			struct intel_engine_cs *ring);
 void intel_mark_idle(struct drm_device *dev);
 void intel_crtc_restore_mode(struct drm_crtc *crtc);
 void intel_crtc_update_dpms(struct drm_crtc *crtc);
@@ -695,12 +749,14 @@
 			 struct intel_digital_port *dport);
 bool intel_get_load_detect_pipe(struct drm_connector *connector,
 				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old);
+				struct intel_load_detect_pipe *old,
+				struct drm_modeset_acquire_ctx *ctx);
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old);
+				    struct intel_load_detect_pipe *old,
+				    struct drm_modeset_acquire_ctx *ctx);
 int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			       struct drm_i915_gem_object *obj,
-			       struct intel_ring_buffer *pipelined);
+			       struct intel_engine_cs *pipelined);
 void intel_unpin_fb_obj(struct drm_i915_gem_object *obj);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
@@ -751,6 +807,8 @@
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_config *pipe_config);
 int intel_format_to_fourcc(int format);
+void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
+
 
 /* intel_dp.c */
 void intel_dp_init(struct drm_device *dev, int output_reg, enum port port);
@@ -774,7 +832,7 @@
 void intel_edp_psr_enable(struct intel_dp *intel_dp);
 void intel_edp_psr_disable(struct intel_dp *intel_dp);
 void intel_edp_psr_update(struct drm_device *dev);
-
+void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate);
 
 /* intel_dsi.c */
 bool intel_dsi_init(struct drm_device *dev);
@@ -876,6 +934,7 @@
 /* intel_pm.c */
 void intel_init_clock_gating(struct drm_device *dev);
 void intel_suspend_hw(struct drm_device *dev);
+int ilk_wm_max_level(const struct drm_device *dev);
 void intel_update_watermarks(struct drm_crtc *crtc);
 void intel_update_sprite_watermarks(struct drm_plane *plane,
 				    struct drm_crtc *crtc,
@@ -902,6 +961,7 @@
 void intel_cleanup_gt_powersave(struct drm_device *dev);
 void intel_enable_gt_powersave(struct drm_device *dev);
 void intel_disable_gt_powersave(struct drm_device *dev);
+void intel_reset_gt_powersave(struct drm_device *dev);
 void ironlake_teardown_rc6(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
@@ -909,11 +969,13 @@
 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
 void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
 void intel_init_runtime_pm(struct drm_i915_private *dev_priv);
 void intel_fini_runtime_pm(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_device *dev);
-
+void __vlv_set_power_well(struct drm_i915_private *dev_priv,
+			  enum punit_power_well power_well_id, bool enable);
 
 /* intel_sdvo.c */
 bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 3365664..02f99d7 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c

@@ -35,6 +35,11 @@
 
 /* the sub-encoders aka panel drivers */
 static const struct intel_dsi_device intel_dsi_devices[] = {
+	{
+		.panel_id = MIPI_DSI_GENERIC_PANEL_ID,
+		.name = "vbt-generic-dsi-vid-mode-display",
+		.dev_ops = &vbt_generic_dsi_display_ops,
+	},
 };
 
 static void band_gap_reset(struct drm_i915_private *dev_priv)
@@ -59,12 +64,12 @@
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
 {
-	return intel_dsi->dev.type == INTEL_DSI_VIDEO_MODE;
+	return intel_dsi->operation_mode == INTEL_DSI_VIDEO_MODE;
 }
 
 static inline bool is_cmd_mode(struct intel_dsi *intel_dsi)
 {
-	return intel_dsi->dev.type == INTEL_DSI_COMMAND_MODE;
+	return intel_dsi->operation_mode == INTEL_DSI_COMMAND_MODE;
 }
 
 static void intel_dsi_hot_plug(struct intel_encoder *encoder)
@@ -94,13 +99,6 @@
 	return true;
 }
 
-static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder)
-{
-	DRM_DEBUG_KMS("\n");
-
-	vlv_enable_dsi_pll(encoder);
-}
-
 static void intel_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
@@ -110,6 +108,15 @@
 
 	DRM_DEBUG_KMS("\n");
 
+	mutex_lock(&dev_priv->dpio_lock);
+	/* program rcomp for compliance, reduce from 50 ohms to 45 ohms
+	 * needed everytime after power gate */
+	vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	/* bandgap reset is needed after everytime we do power gate */
+	band_gap_reset(dev_priv);
+
 	val = I915_READ(MIPI_PORT_CTRL(pipe));
 	I915_WRITE(MIPI_PORT_CTRL(pipe), val | LP_OUTPUT_HOLD);
 	usleep_range(1000, 1500);
@@ -122,21 +129,6 @@
 	I915_WRITE(MIPI_DEVICE_READY(pipe), DEVICE_READY);
 	usleep_range(2000, 2500);
 }
-static void intel_dsi_pre_enable(struct intel_encoder *encoder)
-{
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-
-	DRM_DEBUG_KMS("\n");
-
-	if (intel_dsi->dev.dev_ops->panel_reset)
-		intel_dsi->dev.dev_ops->panel_reset(&intel_dsi->dev);
-
-	/* put device in ready state */
-	intel_dsi_device_ready(encoder);
-
-	if (intel_dsi->dev.dev_ops->send_otp_cmds)
-		intel_dsi->dev.dev_ops->send_otp_cmds(&intel_dsi->dev);
-}
 
 static void intel_dsi_enable(struct intel_encoder *encoder)
 {
@@ -153,18 +145,78 @@
 		I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(pipe), 8 * 4);
 	else {
 		msleep(20); /* XXX */
-		dpi_send_cmd(intel_dsi, TURN_ON);
+		dpi_send_cmd(intel_dsi, TURN_ON, DPI_LP_MODE_EN);
 		msleep(100);
 
+		if (intel_dsi->dev.dev_ops->enable)
+			intel_dsi->dev.dev_ops->enable(&intel_dsi->dev);
+
 		/* assert ip_tg_enable signal */
 		temp = I915_READ(MIPI_PORT_CTRL(pipe)) & ~LANE_CONFIGURATION_MASK;
 		temp = temp | intel_dsi->port_bits;
 		I915_WRITE(MIPI_PORT_CTRL(pipe), temp | DPI_ENABLE);
 		POSTING_READ(MIPI_PORT_CTRL(pipe));
 	}
+}
 
-	if (intel_dsi->dev.dev_ops->enable)
-		intel_dsi->dev.dev_ops->enable(&intel_dsi->dev);
+static void intel_dsi_pre_enable(struct intel_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	enum pipe pipe = intel_crtc->pipe;
+	u32 tmp;
+
+	DRM_DEBUG_KMS("\n");
+
+	/* Disable DPOunit clock gating, can stall pipe
+	 * and we need DPLL REFA always enabled */
+	tmp = I915_READ(DPLL(pipe));
+	tmp |= DPLL_REFA_CLK_ENABLE_VLV;
+	I915_WRITE(DPLL(pipe), tmp);
+
+	tmp = I915_READ(DSPCLK_GATE_D);
+	tmp |= DPOUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(DSPCLK_GATE_D, tmp);
+
+	/* put device in ready state */
+	intel_dsi_device_ready(encoder);
+
+	msleep(intel_dsi->panel_on_delay);
+
+	if (intel_dsi->dev.dev_ops->panel_reset)
+		intel_dsi->dev.dev_ops->panel_reset(&intel_dsi->dev);
+
+	if (intel_dsi->dev.dev_ops->send_otp_cmds)
+		intel_dsi->dev.dev_ops->send_otp_cmds(&intel_dsi->dev);
+
+	/* Enable port in pre-enable phase itself because as per hw team
+	 * recommendation, port should be enabled befor plane & pipe */
+	intel_dsi_enable(encoder);
+}
+
+static void intel_dsi_enable_nop(struct intel_encoder *encoder)
+{
+	DRM_DEBUG_KMS("\n");
+
+	/* for DSI port enable has to be done before pipe
+	 * and plane enable, so port enable is done in
+	 * pre_enable phase itself unlike other encoders
+	 */
+}
+
+static void intel_dsi_pre_disable(struct intel_encoder *encoder)
+{
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+
+	DRM_DEBUG_KMS("\n");
+
+	if (is_vid_mode(intel_dsi)) {
+		/* Send Shutdown command to the panel in LP mode */
+		dpi_send_cmd(intel_dsi, SHUTDOWN, DPI_LP_MODE_EN);
+		msleep(10);
+	}
 }
 
 static void intel_dsi_disable(struct intel_encoder *encoder)
@@ -179,9 +231,6 @@
 	DRM_DEBUG_KMS("\n");
 
 	if (is_vid_mode(intel_dsi)) {
-		dpi_send_cmd(intel_dsi, SHUTDOWN);
-		msleep(10);
-
 		/* de-assert ip_tg_enable signal */
 		temp = I915_READ(MIPI_PORT_CTRL(pipe));
 		I915_WRITE(MIPI_PORT_CTRL(pipe), temp & ~DPI_ENABLE);
@@ -190,6 +239,23 @@
 		msleep(2);
 	}
 
+	/* Panel commands can be sent when clock is in LP11 */
+	I915_WRITE(MIPI_DEVICE_READY(pipe), 0x0);
+
+	temp = I915_READ(MIPI_CTRL(pipe));
+	temp &= ~ESCAPE_CLOCK_DIVIDER_MASK;
+	I915_WRITE(MIPI_CTRL(pipe), temp |
+			intel_dsi->escape_clk_div <<
+			ESCAPE_CLOCK_DIVIDER_SHIFT);
+
+	I915_WRITE(MIPI_EOT_DISABLE(pipe), CLOCKSTOP);
+
+	temp = I915_READ(MIPI_DSI_FUNC_PRG(pipe));
+	temp &= ~VID_MODE_FORMAT_MASK;
+	I915_WRITE(MIPI_DSI_FUNC_PRG(pipe), temp);
+
+	I915_WRITE(MIPI_DEVICE_READY(pipe), 0x1);
+
 	/* if disable packets are sent before sending shutdown packet then in
 	 * some next enable sequence send turn on packet error is observed */
 	if (intel_dsi->dev.dev_ops->disable)
@@ -227,16 +293,28 @@
 
 	vlv_disable_dsi_pll(encoder);
 }
+
 static void intel_dsi_post_disable(struct intel_encoder *encoder)
 {
+	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	u32 val;
 
 	DRM_DEBUG_KMS("\n");
 
+	intel_dsi_disable(encoder);
+
 	intel_dsi_clear_device_ready(encoder);
 
+	val = I915_READ(DSPCLK_GATE_D);
+	val &= ~DPOUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(DSPCLK_GATE_D, val);
+
 	if (intel_dsi->dev.dev_ops->disable_panel_power)
 		intel_dsi->dev.dev_ops->disable_panel_power(&intel_dsi->dev);
+
+	msleep(intel_dsi->panel_off_delay);
+	msleep(intel_dsi->panel_pwr_cycle_delay);
 }
 
 static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
@@ -364,7 +442,7 @@
 	I915_WRITE(MIPI_VBP_COUNT(pipe), vbp);
 }
 
-static void intel_dsi_mode_set(struct intel_encoder *intel_encoder)
+static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_device *dev = encoder->dev;
@@ -379,9 +457,6 @@
 
 	DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
 
-	/* XXX: Location of the call */
-	band_gap_reset(dev_priv);
-
 	/* escape clock divider, 20MHz, shared for A and C. device ready must be
 	 * off when doing this! txclkesc? */
 	tmp = I915_READ(MIPI_CTRL(0));
@@ -452,10 +527,20 @@
 	/* dphy stuff */
 
 	/* in terms of low power clock */
-	I915_WRITE(MIPI_INIT_COUNT(pipe), txclkesc(ESCAPE_CLOCK_DIVIDER_1, 100));
+	I915_WRITE(MIPI_INIT_COUNT(pipe), txclkesc(intel_dsi->escape_clk_div, 100));
+
+	val = 0;
+	if (intel_dsi->eotp_pkt == 0)
+		val |= EOT_DISABLE;
+
+	if (intel_dsi->clock_stop)
+		val |= CLOCKSTOP;
 
 	/* recovery disables */
-	I915_WRITE(MIPI_EOT_DISABLE(pipe), intel_dsi->eot_disable);
+	I915_WRITE(MIPI_EOT_DISABLE(pipe), val);
+
+	/* in terms of low power clock */
+	I915_WRITE(MIPI_INIT_COUNT(pipe), intel_dsi->init_count);
 
 	/* in terms of txbyteclkhs. actual high to low switch +
 	 * MIPI_STOP_STATE_STALL * MIPI_LP_BYTECLK.
@@ -484,9 +569,23 @@
 		   intel_dsi->clk_hs_to_lp_count << HS_LP_PWR_SW_CNT_SHIFT);
 
 	if (is_vid_mode(intel_dsi))
+		/* Some panels might have resolution which is not a multiple of
+		 * 64 like 1366 x 768. Enable RANDOM resolution support for such
+		 * panels by default */
 		I915_WRITE(MIPI_VIDEO_MODE_FORMAT(pipe),
 				intel_dsi->video_frmt_cfg_bits |
-				intel_dsi->video_mode_format);
+				intel_dsi->video_mode_format |
+				IP_TG_CONFIG |
+				RANDOM_DPI_DISPLAY_RESOLUTION);
+}
+
+static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder)
+{
+	DRM_DEBUG_KMS("\n");
+
+	intel_dsi_prepare(encoder);
+
+	vlv_enable_dsi_pll(encoder);
 }
 
 static enum drm_connector_status
@@ -566,11 +665,16 @@
 	struct intel_connector *intel_connector;
 	struct drm_connector *connector;
 	struct drm_display_mode *fixed_mode = NULL;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	const struct intel_dsi_device *dsi;
 	unsigned int i;
 
 	DRM_DEBUG_KMS("\n");
 
+	/* There is no detection method for MIPI so rely on VBT */
+	if (!dev_priv->vbt.has_mipi)
+		return false;
+
 	intel_dsi = kzalloc(sizeof(*intel_dsi), GFP_KERNEL);
 	if (!intel_dsi)
 		return false;
@@ -585,6 +689,13 @@
 	encoder = &intel_encoder->base;
 	intel_dsi->attached_connector = intel_connector;
 
+	if (IS_VALLEYVIEW(dev)) {
+		dev_priv->mipi_mmio_base = VLV_MIPI_BASE;
+	} else {
+		DRM_ERROR("Unsupported Mipi device to reg base");
+		return false;
+	}
+
 	connector = &intel_connector->base;
 
 	drm_encoder_init(dev, encoder, &intel_dsi_funcs, DRM_MODE_ENCODER_DSI);
@@ -594,9 +705,8 @@
 	intel_encoder->compute_config = intel_dsi_compute_config;
 	intel_encoder->pre_pll_enable = intel_dsi_pre_pll_enable;
 	intel_encoder->pre_enable = intel_dsi_pre_enable;
-	intel_encoder->enable = intel_dsi_enable;
-	intel_encoder->mode_set = intel_dsi_mode_set;
-	intel_encoder->disable = intel_dsi_disable;
+	intel_encoder->enable = intel_dsi_enable_nop;
+	intel_encoder->disable = intel_dsi_pre_disable;
 	intel_encoder->post_disable = intel_dsi_post_disable;
 	intel_encoder->get_hw_state = intel_dsi_get_hw_state;
 	intel_encoder->get_config = intel_dsi_get_config;

diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h
index b4a27ce..31db33d 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/intel_dsi.h

@@ -31,7 +31,6 @@
 struct intel_dsi_device {
 	unsigned int panel_id;
 	const char *name;
-	int type;
 	const struct intel_dsi_dev_ops *dev_ops;
 	void *dev_priv;
 };
@@ -85,6 +84,9 @@
 	/* virtual channel */
 	int channel;
 
+	/* Video mode or command mode */
+	u16 operation_mode;
+
 	/* number of DSI lanes */
 	unsigned int lane_count;
 
@@ -95,8 +97,10 @@
 	u32 video_mode_format;
 
 	/* eot for MIPI_EOT_DISABLE register */
-	u32 eot_disable;
+	u8 eotp_pkt;
+	u8 clock_stop;
 
+	u8 escape_clk_div;
 	u32 port_bits;
 	u32 bw_timer;
 	u32 dphy_reg;
@@ -110,6 +114,15 @@
 	u16 hs_to_lp_count;
 	u16 clk_lp_to_hs_count;
 	u16 clk_hs_to_lp_count;
+
+	u16 init_count;
+
+	/* all delays in ms */
+	u16 backlight_off_delay;
+	u16 backlight_on_delay;
+	u16 panel_on_delay;
+	u16 panel_off_delay;
+	u16 panel_pwr_cycle_delay;
 };
 
 static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder)
@@ -120,4 +133,6 @@
 extern void vlv_enable_dsi_pll(struct intel_encoder *encoder);
 extern void vlv_disable_dsi_pll(struct intel_encoder *encoder);
 
+extern struct intel_dsi_dev_ops vbt_generic_dsi_display_ops;
+
 #endif /* _INTEL_DSI_H */

diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.c b/drivers/gpu/drm/i915/intel_dsi_cmd.c
index 7c40f98..3eeb21b 100644
--- a/drivers/gpu/drm/i915/intel_dsi_cmd.c
+++ b/drivers/gpu/drm/i915/intel_dsi_cmd.c

@@ -389,7 +389,7 @@
  *
  * XXX: commands with data in MIPI_DPI_DATA?
  */
-int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd)
+int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs)
 {
 	struct drm_encoder *encoder = &intel_dsi->base.base;
 	struct drm_device *dev = encoder->dev;
@@ -399,7 +399,7 @@
 	u32 mask;
 
 	/* XXX: pipe, hs */
-	if (intel_dsi->hs)
+	if (hs)
 		cmd &= ~DPI_LP_MODE;
 	else
 		cmd |= DPI_LP_MODE;

diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.h b/drivers/gpu/drm/i915/intel_dsi_cmd.h
index 54c8a23..9a18cbf 100644
--- a/drivers/gpu/drm/i915/intel_dsi_cmd.h
+++ b/drivers/gpu/drm/i915/intel_dsi_cmd.h

@@ -33,6 +33,9 @@
 #include "intel_drv.h"
 #include "intel_dsi.h"
 
+#define DPI_LP_MODE_EN	false
+#define DPI_HS_MODE_EN	true
+
 void dsi_hs_mode_enable(struct intel_dsi *intel_dsi, bool enable);
 
 int dsi_vc_dcs_write(struct intel_dsi *intel_dsi, int channel,
@@ -47,7 +50,7 @@
 int dsi_vc_generic_read(struct intel_dsi *intel_dsi, int channel,
 			u8 *reqdata, int reqlen, u8 *buf, int buflen);
 
-int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd);
+int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs);
 
 /* XXX: questionable write helpers */
 static inline int dsi_vc_dcs_write_0(struct intel_dsi *intel_dsi,

diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
new file mode 100644
index 0000000..21a0d34
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c

@@ -0,0 +1,589 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Shobhit Kumar <shobhit.kumar@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_edid.h>
+#include <drm/i915_drm.h>
+#include <linux/slab.h>
+#include <video/mipi_display.h>
+#include <asm/intel-mid.h>
+#include <video/mipi_display.h>
+#include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_dsi.h"
+#include "intel_dsi_cmd.h"
+
+#define MIPI_TRANSFER_MODE_SHIFT	0
+#define MIPI_VIRTUAL_CHANNEL_SHIFT	1
+#define MIPI_PORT_SHIFT			3
+
+#define PREPARE_CNT_MAX		0x3F
+#define EXIT_ZERO_CNT_MAX	0x3F
+#define CLK_ZERO_CNT_MAX	0xFF
+#define TRAIL_CNT_MAX		0x1F
+
+#define NS_KHZ_RATIO 1000000
+
+#define GPI0_NC_0_HV_DDI0_HPD           0x4130
+#define GPIO_NC_0_HV_DDI0_PAD           0x4138
+#define GPIO_NC_1_HV_DDI0_DDC_SDA       0x4120
+#define GPIO_NC_1_HV_DDI0_DDC_SDA_PAD   0x4128
+#define GPIO_NC_2_HV_DDI0_DDC_SCL       0x4110
+#define GPIO_NC_2_HV_DDI0_DDC_SCL_PAD   0x4118
+#define GPIO_NC_3_PANEL0_VDDEN          0x4140
+#define GPIO_NC_3_PANEL0_VDDEN_PAD      0x4148
+#define GPIO_NC_4_PANEL0_BLKEN          0x4150
+#define GPIO_NC_4_PANEL0_BLKEN_PAD      0x4158
+#define GPIO_NC_5_PANEL0_BLKCTL         0x4160
+#define GPIO_NC_5_PANEL0_BLKCTL_PAD     0x4168
+#define GPIO_NC_6_PCONF0                0x4180
+#define GPIO_NC_6_PAD                   0x4188
+#define GPIO_NC_7_PCONF0                0x4190
+#define GPIO_NC_7_PAD                   0x4198
+#define GPIO_NC_8_PCONF0                0x4170
+#define GPIO_NC_8_PAD                   0x4178
+#define GPIO_NC_9_PCONF0                0x4100
+#define GPIO_NC_9_PAD                   0x4108
+#define GPIO_NC_10_PCONF0               0x40E0
+#define GPIO_NC_10_PAD                  0x40E8
+#define GPIO_NC_11_PCONF0               0x40F0
+#define GPIO_NC_11_PAD                  0x40F8
+
+struct gpio_table {
+	u16 function_reg;
+	u16 pad_reg;
+	u8 init;
+};
+
+static struct gpio_table gtable[] = {
+	{ GPI0_NC_0_HV_DDI0_HPD, GPIO_NC_0_HV_DDI0_PAD, 0 },
+	{ GPIO_NC_1_HV_DDI0_DDC_SDA, GPIO_NC_1_HV_DDI0_DDC_SDA_PAD, 0 },
+	{ GPIO_NC_2_HV_DDI0_DDC_SCL, GPIO_NC_2_HV_DDI0_DDC_SCL_PAD, 0 },
+	{ GPIO_NC_3_PANEL0_VDDEN, GPIO_NC_3_PANEL0_VDDEN_PAD, 0 },
+	{ GPIO_NC_4_PANEL0_BLKEN, GPIO_NC_4_PANEL0_BLKEN_PAD, 0 },
+	{ GPIO_NC_5_PANEL0_BLKCTL, GPIO_NC_5_PANEL0_BLKCTL_PAD, 0 },
+	{ GPIO_NC_6_PCONF0, GPIO_NC_6_PAD, 0 },
+	{ GPIO_NC_7_PCONF0, GPIO_NC_7_PAD, 0 },
+	{ GPIO_NC_8_PCONF0, GPIO_NC_8_PAD, 0 },
+	{ GPIO_NC_9_PCONF0, GPIO_NC_9_PAD, 0 },
+	{ GPIO_NC_10_PCONF0, GPIO_NC_10_PAD, 0},
+	{ GPIO_NC_11_PCONF0, GPIO_NC_11_PAD, 0}
+};
+
+static u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, u8 *data)
+{
+	u8 type, byte, mode, vc, port;
+	u16 len;
+
+	byte = *data++;
+	mode = (byte >> MIPI_TRANSFER_MODE_SHIFT) & 0x1;
+	vc = (byte >> MIPI_VIRTUAL_CHANNEL_SHIFT) & 0x3;
+	port = (byte >> MIPI_PORT_SHIFT) & 0x3;
+
+	/* LP or HS mode */
+	intel_dsi->hs = mode;
+
+	/* get packet type and increment the pointer */
+	type = *data++;
+
+	len = *((u16 *) data);
+	data += 2;
+
+	switch (type) {
+	case MIPI_DSI_GENERIC_SHORT_WRITE_0_PARAM:
+		dsi_vc_generic_write_0(intel_dsi, vc);
+		break;
+	case MIPI_DSI_GENERIC_SHORT_WRITE_1_PARAM:
+		dsi_vc_generic_write_1(intel_dsi, vc, *data);
+		break;
+	case MIPI_DSI_GENERIC_SHORT_WRITE_2_PARAM:
+		dsi_vc_generic_write_2(intel_dsi, vc, *data, *(data + 1));
+		break;
+	case MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM:
+	case MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM:
+	case MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM:
+		DRM_DEBUG_DRIVER("Generic Read not yet implemented or used\n");
+		break;
+	case MIPI_DSI_GENERIC_LONG_WRITE:
+		dsi_vc_generic_write(intel_dsi, vc, data, len);
+		break;
+	case MIPI_DSI_DCS_SHORT_WRITE:
+		dsi_vc_dcs_write_0(intel_dsi, vc, *data);
+		break;
+	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
+		dsi_vc_dcs_write_1(intel_dsi, vc, *data, *(data + 1));
+		break;
+	case MIPI_DSI_DCS_READ:
+		DRM_DEBUG_DRIVER("DCS Read not yet implemented or used\n");
+		break;
+	case MIPI_DSI_DCS_LONG_WRITE:
+		dsi_vc_dcs_write(intel_dsi, vc, data, len);
+		break;
+	};
+
+	data += len;
+
+	return data;
+}
+
+static u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, u8 *data)
+{
+	u32 delay = *((u32 *) data);
+
+	usleep_range(delay, delay + 10);
+	data += 4;
+
+	return data;
+}
+
+static u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, u8 *data)
+{
+	u8 gpio, action;
+	u16 function, pad;
+	u32 val;
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gpio = *data++;
+
+	/* pull up/down */
+	action = *data++;
+
+	function = gtable[gpio].function_reg;
+	pad = gtable[gpio].pad_reg;
+
+	mutex_lock(&dev_priv->dpio_lock);
+	if (!gtable[gpio].init) {
+		/* program the function */
+		/* FIXME: remove constant below */
+		vlv_gpio_nc_write(dev_priv, function, 0x2000CC00);
+		gtable[gpio].init = 1;
+	}
+
+	val = 0x4 | action;
+
+	/* pull up/down */
+	vlv_gpio_nc_write(dev_priv, pad, val);
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	return data;
+}
+
+typedef u8 * (*fn_mipi_elem_exec)(struct intel_dsi *intel_dsi, u8 *data);
+static const fn_mipi_elem_exec exec_elem[] = {
+	NULL, /* reserved */
+	mipi_exec_send_packet,
+	mipi_exec_delay,
+	mipi_exec_gpio,
+	NULL, /* status read; later */
+};
+
+/*
+ * MIPI Sequence from VBT #53 parsing logic
+ * We have already separated each seqence during bios parsing
+ * Following is generic execution function for any sequence
+ */
+
+static const char * const seq_name[] = {
+	"UNDEFINED",
+	"MIPI_SEQ_ASSERT_RESET",
+	"MIPI_SEQ_INIT_OTP",
+	"MIPI_SEQ_DISPLAY_ON",
+	"MIPI_SEQ_DISPLAY_OFF",
+	"MIPI_SEQ_DEASSERT_RESET"
+};
+
+static void generic_exec_sequence(struct intel_dsi *intel_dsi, char *sequence)
+{
+	u8 *data = sequence;
+	fn_mipi_elem_exec mipi_elem_exec;
+	int index;
+
+	if (!sequence)
+		return;
+
+	DRM_DEBUG_DRIVER("Starting MIPI sequence - %s\n", seq_name[*data]);
+
+	/* go to the first element of the sequence */
+	data++;
+
+	/* parse each byte till we reach end of sequence byte - 0x00 */
+	while (1) {
+		index = *data;
+		mipi_elem_exec = exec_elem[index];
+		if (!mipi_elem_exec) {
+			DRM_ERROR("Unsupported MIPI element, skipping sequence execution\n");
+			return;
+		}
+
+		/* goto element payload */
+		data++;
+
+		/* execute the element specific rotines */
+		data = mipi_elem_exec(intel_dsi, data);
+
+		/*
+		 * After processing the element, data should point to
+		 * next element or end of sequence
+		 * check if have we reached end of sequence
+		 */
+		if (*data == 0x00)
+			break;
+	}
+}
+
+static bool generic_init(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+	struct mipi_pps_data *pps = dev_priv->vbt.dsi.pps;
+	struct drm_display_mode *mode = dev_priv->vbt.lfp_lvds_vbt_mode;
+	u32 bits_per_pixel = 24;
+	u32 tlpx_ns, extra_byte_count, bitrate, tlpx_ui;
+	u32 ui_num, ui_den;
+	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
+	u32 ths_prepare_ns, tclk_trail_ns;
+	u32 tclk_prepare_clkzero, ths_prepare_hszero;
+	u32 lp_to_hs_switch, hs_to_lp_switch;
+
+	DRM_DEBUG_KMS("\n");
+
+	intel_dsi->eotp_pkt = mipi_config->eot_pkt_disabled ? 0 : 1;
+	intel_dsi->clock_stop = mipi_config->enable_clk_stop ? 1 : 0;
+	intel_dsi->lane_count = mipi_config->lane_cnt + 1;
+	intel_dsi->pixel_format = mipi_config->videomode_color_format << 7;
+
+	if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB666)
+		bits_per_pixel = 18;
+	else if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB565)
+		bits_per_pixel = 16;
+
+	bitrate = (mode->clock * bits_per_pixel) / intel_dsi->lane_count;
+
+	intel_dsi->operation_mode = mipi_config->is_cmd_mode;
+	intel_dsi->video_mode_format = mipi_config->video_transfer_mode;
+	intel_dsi->escape_clk_div = mipi_config->byte_clk_sel;
+	intel_dsi->lp_rx_timeout = mipi_config->lp_rx_timeout;
+	intel_dsi->turn_arnd_val = mipi_config->turn_around_timeout;
+	intel_dsi->rst_timer_val = mipi_config->device_reset_timer;
+	intel_dsi->init_count = mipi_config->master_init_timer;
+	intel_dsi->bw_timer = mipi_config->dbi_bw_timer;
+	intel_dsi->video_frmt_cfg_bits = mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0;
+
+	switch (intel_dsi->escape_clk_div) {
+	case 0:
+		tlpx_ns = 50;
+		break;
+	case 1:
+		tlpx_ns = 100;
+		break;
+
+	case 2:
+		tlpx_ns = 200;
+		break;
+	default:
+		tlpx_ns = 50;
+		break;
+	}
+
+	switch (intel_dsi->lane_count) {
+	case 1:
+	case 2:
+		extra_byte_count = 2;
+		break;
+	case 3:
+		extra_byte_count = 4;
+		break;
+	case 4:
+	default:
+		extra_byte_count = 3;
+		break;
+	}
+
+	/*
+	 * ui(s) = 1/f [f in hz]
+	 * ui(ns) = 10^9 / (f*10^6) [f in Mhz] -> 10^3/f(Mhz)
+	 */
+
+	/* in Kbps */
+	ui_num = NS_KHZ_RATIO;
+	ui_den = bitrate;
+
+	tclk_prepare_clkzero = mipi_config->tclk_prepare_clkzero;
+	ths_prepare_hszero = mipi_config->ths_prepare_hszero;
+
+	/*
+	 * B060
+	 * LP byte clock = TLPX/ (8UI)
+	 */
+	intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num);
+
+	/* count values in UI = (ns value) * (bitrate / (2 * 10^6))
+	 *
+	 * Since txddrclkhs_i is 2xUI, all the count values programmed in
+	 * DPHY param register are divided by 2
+	 *
+	 * prepare count
+	 */
+	ths_prepare_ns = max(mipi_config->ths_prepare, mipi_config->tclk_prepare);
+	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * 2);
+
+	/* exit zero count */
+	exit_zero_cnt = DIV_ROUND_UP(
+				(ths_prepare_hszero - ths_prepare_ns) * ui_den,
+				ui_num * 2
+				);
+
+	/*
+	 * Exit zero  is unified val ths_zero and ths_exit
+	 * minimum value for ths_exit = 110ns
+	 * min (exit_zero_cnt * 2) = 110/UI
+	 * exit_zero_cnt = 55/UI
+	 */
+	 if (exit_zero_cnt < (55 * ui_den / ui_num))
+		if ((55 * ui_den) % ui_num)
+			exit_zero_cnt += 1;
+
+	/* clk zero count */
+	clk_zero_cnt = DIV_ROUND_UP(
+			(tclk_prepare_clkzero -	ths_prepare_ns)
+			* ui_den, 2 * ui_num);
+
+	/* trail count */
+	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
+	trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, 2 * ui_num);
+
+	if (prepare_cnt > PREPARE_CNT_MAX ||
+		exit_zero_cnt > EXIT_ZERO_CNT_MAX ||
+		clk_zero_cnt > CLK_ZERO_CNT_MAX ||
+		trail_cnt > TRAIL_CNT_MAX)
+		DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n");
+
+	if (prepare_cnt > PREPARE_CNT_MAX)
+		prepare_cnt = PREPARE_CNT_MAX;
+
+	if (exit_zero_cnt > EXIT_ZERO_CNT_MAX)
+		exit_zero_cnt = EXIT_ZERO_CNT_MAX;
+
+	if (clk_zero_cnt > CLK_ZERO_CNT_MAX)
+		clk_zero_cnt = CLK_ZERO_CNT_MAX;
+
+	if (trail_cnt > TRAIL_CNT_MAX)
+		trail_cnt = TRAIL_CNT_MAX;
+
+	/* B080 */
+	intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 |
+						clk_zero_cnt << 8 | prepare_cnt;
+
+	/*
+	 * LP to HS switch count = 4TLPX + PREP_COUNT * 2 + EXIT_ZERO_COUNT * 2
+	 *					+ 10UI + Extra Byte Count
+	 *
+	 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
+	 * Extra Byte Count is calculated according to number of lanes.
+	 * High Low Switch Count is the Max of LP to HS and
+	 * HS to LP switch count
+	 *
+	 */
+	tlpx_ui = DIV_ROUND_UP(tlpx_ns * ui_den, ui_num);
+
+	/* B044 */
+	/* FIXME:
+	 * The comment above does not match with the code */
+	lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * 2 +
+						exit_zero_cnt * 2 + 10, 8);
+
+	hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
+
+	intel_dsi->hs_to_lp_count = max(lp_to_hs_switch, hs_to_lp_switch);
+	intel_dsi->hs_to_lp_count += extra_byte_count;
+
+	/* B088 */
+	/* LP -> HS for clock lanes
+	 * LP clk sync + LP11 + LP01 + tclk_prepare + tclk_zero +
+	 *						extra byte count
+	 * 2TPLX + 1TLPX + 1 TPLX(in ns) + prepare_cnt * 2 + clk_zero_cnt *
+	 *					2(in UI) + extra byte count
+	 * In byteclks = (4TLPX + prepare_cnt * 2 + clk_zero_cnt *2 (in UI)) /
+	 *					8 + extra byte count
+	 */
+	intel_dsi->clk_lp_to_hs_count =
+		DIV_ROUND_UP(
+			4 * tlpx_ui + prepare_cnt * 2 +
+			clk_zero_cnt * 2,
+			8);
+
+	intel_dsi->clk_lp_to_hs_count += extra_byte_count;
+
+	/* HS->LP for Clock Lanes
+	 * Low Power clock synchronisations + 1Tx byteclk + tclk_trail +
+	 *						Extra byte count
+	 * 2TLPX + 8UI + (trail_count*2)(in UI) + Extra byte count
+	 * In byteclks = (2*TLpx(in UI) + trail_count*2 +8)(in UI)/8 +
+	 *						Extra byte count
+	 */
+	intel_dsi->clk_hs_to_lp_count =
+		DIV_ROUND_UP(2 * tlpx_ui + trail_cnt * 2 + 8,
+			8);
+	intel_dsi->clk_hs_to_lp_count += extra_byte_count;
+
+	DRM_DEBUG_KMS("Eot %s\n", intel_dsi->eotp_pkt ? "enabled" : "disabled");
+	DRM_DEBUG_KMS("Clockstop %s\n", intel_dsi->clock_stop ?
+						"disabled" : "enabled");
+	DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video");
+	DRM_DEBUG_KMS("Pixel Format %d\n", intel_dsi->pixel_format);
+	DRM_DEBUG_KMS("TLPX %d\n", intel_dsi->escape_clk_div);
+	DRM_DEBUG_KMS("LP RX Timeout 0x%x\n", intel_dsi->lp_rx_timeout);
+	DRM_DEBUG_KMS("Turnaround Timeout 0x%x\n", intel_dsi->turn_arnd_val);
+	DRM_DEBUG_KMS("Init Count 0x%x\n", intel_dsi->init_count);
+	DRM_DEBUG_KMS("HS to LP Count 0x%x\n", intel_dsi->hs_to_lp_count);
+	DRM_DEBUG_KMS("LP Byte Clock %d\n", intel_dsi->lp_byte_clk);
+	DRM_DEBUG_KMS("DBI BW Timer 0x%x\n", intel_dsi->bw_timer);
+	DRM_DEBUG_KMS("LP to HS Clock Count 0x%x\n", intel_dsi->clk_lp_to_hs_count);
+	DRM_DEBUG_KMS("HS to LP Clock Count 0x%x\n", intel_dsi->clk_hs_to_lp_count);
+	DRM_DEBUG_KMS("BTA %s\n",
+			intel_dsi->video_frmt_cfg_bits & DISABLE_VIDEO_BTA ?
+			"disabled" : "enabled");
+
+	/* delays in VBT are in unit of 100us, so need to convert
+	 * here in ms
+	 * Delay (100us) * 100 /1000 = Delay / 10 (ms) */
+	intel_dsi->backlight_off_delay = pps->bl_disable_delay / 10;
+	intel_dsi->backlight_on_delay = pps->bl_enable_delay / 10;
+	intel_dsi->panel_on_delay = pps->panel_on_delay / 10;
+	intel_dsi->panel_off_delay = pps->panel_off_delay / 10;
+	intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10;
+
+	return true;
+}
+
+static int generic_mode_valid(struct intel_dsi_device *dsi,
+		   struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static bool generic_mode_fixup(struct intel_dsi_device *dsi,
+		    const struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode) {
+	return true;
+}
+
+static void generic_panel_reset(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	char *sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET];
+
+	generic_exec_sequence(intel_dsi, sequence);
+}
+
+static void generic_disable_panel_power(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	char *sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET];
+
+	generic_exec_sequence(intel_dsi, sequence);
+}
+
+static void generic_send_otp_cmds(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	char *sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+
+	generic_exec_sequence(intel_dsi, sequence);
+}
+
+static void generic_enable(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	char *sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON];
+
+	generic_exec_sequence(intel_dsi, sequence);
+}
+
+static void generic_disable(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	char *sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_OFF];
+
+	generic_exec_sequence(intel_dsi, sequence);
+}
+
+static enum drm_connector_status generic_detect(struct intel_dsi_device *dsi)
+{
+	return connector_status_connected;
+}
+
+static bool generic_get_hw_state(struct intel_dsi_device *dev)
+{
+	return true;
+}
+
+static struct drm_display_mode *generic_get_modes(struct intel_dsi_device *dsi)
+{
+	struct intel_dsi *intel_dsi = container_of(dsi, struct intel_dsi, dev);
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->vbt.lfp_lvds_vbt_mode->type |= DRM_MODE_TYPE_PREFERRED;
+	return dev_priv->vbt.lfp_lvds_vbt_mode;
+}
+
+static void generic_destroy(struct intel_dsi_device *dsi) { }
+
+/* Callbacks. We might not need them all. */
+struct intel_dsi_dev_ops vbt_generic_dsi_display_ops = {
+	.init = generic_init,
+	.mode_valid = generic_mode_valid,
+	.mode_fixup = generic_mode_fixup,
+	.panel_reset = generic_panel_reset,
+	.disable_panel_power = generic_disable_panel_power,
+	.send_otp_cmds = generic_send_otp_cmds,
+	.enable = generic_enable,
+	.disable = generic_disable,
+	.detect = generic_detect,
+	.get_hw_state = generic_get_hw_state,
+	.get_modes = generic_get_modes,
+	.destroy = generic_destroy,
+};

diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 7fe3fee..a3631c0 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c

@@ -285,7 +285,7 @@
 	return true;
 }
 
-static void intel_dvo_mode_set(struct intel_encoder *encoder)
+static void intel_dvo_pre_enable(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -343,7 +343,7 @@
 {
 	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 	return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev);
 }
 
@@ -475,7 +475,7 @@
 	intel_encoder->get_hw_state = intel_dvo_get_hw_state;
 	intel_encoder->get_config = intel_dvo_get_config;
 	intel_encoder->compute_config = intel_dvo_compute_config;
-	intel_encoder->mode_set = intel_dvo_mode_set;
+	intel_encoder->pre_enable = intel_dvo_pre_enable;
 	intel_connector->get_hw_state = intel_dvo_connector_get_hw_state;
 	intel_connector->unregister = intel_connector_unregister;
 

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index f73ba5e..088fe93 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c

@@ -343,15 +343,15 @@
 			num_connectors_detected++;
 
 		if (!enabled[i]) {
-			DRM_DEBUG_KMS("connector %d not enabled, skipping\n",
-				      connector->base.id);
+			DRM_DEBUG_KMS("connector %s not enabled, skipping\n",
+				      connector->name);
 			continue;
 		}
 
 		encoder = connector->encoder;
 		if (!encoder || WARN_ON(!encoder->crtc)) {
-			DRM_DEBUG_KMS("connector %d has no encoder or crtc, skipping\n",
-				      connector->base.id);
+			DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n",
+				      connector->name);
 			enabled[i] = false;
 			continue;
 		}
@@ -373,16 +373,16 @@
 			}
 		}
 
-		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
-			      fb_conn->connector->base.id);
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n",
+			      connector->name);
 
 		/* go for command line mode first */
 		modes[i] = drm_pick_cmdline_mode(fb_conn, width, height);
 
 		/* try for preferred next */
 		if (!modes[i]) {
-			DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
-				      fb_conn->connector->base.id);
+			DRM_DEBUG_KMS("looking for preferred mode on connector %s\n",
+				      connector->name);
 			modes[i] = drm_has_preferred_mode(fb_conn, width,
 							  height);
 		}
@@ -390,7 +390,7 @@
 		/* No preferred mode marked by the EDID? Are there any modes? */
 		if (!modes[i] && !list_empty(&connector->modes)) {
 			DRM_DEBUG_KMS("using first mode listed on connector %s\n",
-				      drm_get_connector_name(connector));
+				      connector->name);
 			modes[i] = list_first_entry(&connector->modes,
 						    struct drm_display_mode,
 						    head);
@@ -409,16 +409,20 @@
 			 * since the fb helper layer wants a pointer to
 			 * something we own.
 			 */
+			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
+				      connector->name);
 			intel_mode_from_pipe_config(&encoder->crtc->hwmode,
 						    &to_intel_crtc(encoder->crtc)->config);
 			modes[i] = &encoder->crtc->hwmode;
 		}
 		crtcs[i] = new_crtc;
 
-		DRM_DEBUG_KMS("connector %s on crtc %d: %s\n",
-			      drm_get_connector_name(connector),
+		DRM_DEBUG_KMS("connector %s on pipe %d [CRTC:%d]: %dx%d%s\n",
+			      connector->name,
+			      pipe_name(to_intel_crtc(encoder->crtc)->pipe),
 			      encoder->crtc->base.id,
-			      modes[i]->name);
+			      modes[i]->hdisplay, modes[i]->vdisplay,
+			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" :"");
 
 		fallback = false;
 	}
@@ -497,7 +501,7 @@
 		return false;
 
 	/* Find the largest fb */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		intel_crtc = to_intel_crtc(crtc);
 
 		if (!intel_crtc->active || !crtc->primary->fb) {
@@ -521,7 +525,7 @@
 	}
 
 	/* Now make sure all the pipes will fit into it */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		unsigned int cur_size;
 
 		intel_crtc = to_intel_crtc(crtc);
@@ -586,7 +590,7 @@
 	drm_framebuffer_reference(&ifbdev->fb->base);
 
 	/* Final pass to check if any active pipes don't have fbs */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		intel_crtc = to_intel_crtc(crtc);
 
 		if (!intel_crtc->active)
@@ -692,11 +696,7 @@
 	if (!dev_priv->fbdev)
 		return;
 
-	drm_modeset_lock_all(dev);
-
-	ret = drm_fb_helper_restore_fbdev_mode(&dev_priv->fbdev->helper);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(&dev_priv->fbdev->helper);
 	if (ret)
 		DRM_DEBUG("failed to restore crtc mode\n");
-
-	drm_modeset_unlock_all(dev);
 }

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 157267a..eee2bbe 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c

@@ -418,6 +418,7 @@
 }
 
 static void g4x_set_infoframes(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
@@ -440,7 +441,7 @@
 	 * either. */
 	val |= VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC;
 
-	if (!intel_hdmi->has_hdmi_sink) {
+	if (!enable) {
 		if (!(val & VIDEO_DIP_ENABLE))
 			return;
 		val &= ~VIDEO_DIP_ENABLE;
@@ -471,6 +472,7 @@
 }
 
 static void ibx_set_infoframes(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
@@ -486,7 +488,7 @@
 	/* See the big comment in g4x_set_infoframes() */
 	val |= VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC;
 
-	if (!intel_hdmi->has_hdmi_sink) {
+	if (!enable) {
 		if (!(val & VIDEO_DIP_ENABLE))
 			return;
 		val &= ~VIDEO_DIP_ENABLE;
@@ -518,6 +520,7 @@
 }
 
 static void cpt_set_infoframes(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
@@ -531,7 +534,7 @@
 	/* See the big comment in g4x_set_infoframes() */
 	val |= VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC;
 
-	if (!intel_hdmi->has_hdmi_sink) {
+	if (!enable) {
 		if (!(val & VIDEO_DIP_ENABLE))
 			return;
 		val &= ~(VIDEO_DIP_ENABLE | VIDEO_DIP_ENABLE_AVI);
@@ -554,20 +557,23 @@
 }
 
 static void vlv_set_infoframes(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
 	assert_hdmi_port_disabled(intel_hdmi);
 
 	/* See the big comment in g4x_set_infoframes() */
 	val |= VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC;
 
-	if (!intel_hdmi->has_hdmi_sink) {
+	if (!enable) {
 		if (!(val & VIDEO_DIP_ENABLE))
 			return;
 		val &= ~VIDEO_DIP_ENABLE;
@@ -576,9 +582,19 @@
 		return;
 	}
 
+	if (port != (val & VIDEO_DIP_PORT_MASK)) {
+		if (val & VIDEO_DIP_ENABLE) {
+			val &= ~VIDEO_DIP_ENABLE;
+			I915_WRITE(reg, val);
+			POSTING_READ(reg);
+		}
+		val &= ~VIDEO_DIP_PORT_MASK;
+		val |= port;
+	}
+
 	val |= VIDEO_DIP_ENABLE;
-	val &= ~(VIDEO_DIP_ENABLE_VENDOR | VIDEO_DIP_ENABLE_GAMUT |
-		 VIDEO_DIP_ENABLE_GCP);
+	val &= ~(VIDEO_DIP_ENABLE_AVI | VIDEO_DIP_ENABLE_VENDOR |
+		 VIDEO_DIP_ENABLE_GAMUT | VIDEO_DIP_ENABLE_GCP);
 
 	I915_WRITE(reg, val);
 	POSTING_READ(reg);
@@ -589,6 +605,7 @@
 }
 
 static void hsw_set_infoframes(struct drm_encoder *encoder,
+			       bool enable,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
@@ -599,7 +616,7 @@
 
 	assert_hdmi_port_disabled(intel_hdmi);
 
-	if (!intel_hdmi->has_hdmi_sink) {
+	if (!enable) {
 		I915_WRITE(reg, 0);
 		POSTING_READ(reg);
 		return;
@@ -616,7 +633,7 @@
 	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
-static void intel_hdmi_mode_set(struct intel_encoder *encoder)
+static void intel_hdmi_prepare(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -638,27 +655,26 @@
 	else
 		hdmi_val |= SDVO_COLOR_FORMAT_8bpc;
 
-	/* Required on CPT */
-	if (intel_hdmi->has_hdmi_sink && HAS_PCH_CPT(dev))
+	if (crtc->config.has_hdmi_sink)
 		hdmi_val |= HDMI_MODE_SELECT_HDMI;
 
-	if (intel_hdmi->has_audio) {
+	if (crtc->config.has_audio) {
+		WARN_ON(!crtc->config.has_hdmi_sink);
 		DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
 				 pipe_name(crtc->pipe));
 		hdmi_val |= SDVO_AUDIO_ENABLE;
-		hdmi_val |= HDMI_MODE_SELECT_HDMI;
 		intel_write_eld(&encoder->base, adjusted_mode);
 	}
 
 	if (HAS_PCH_CPT(dev))
 		hdmi_val |= SDVO_PIPE_SEL_CPT(crtc->pipe);
+	else if (IS_CHERRYVIEW(dev))
+		hdmi_val |= SDVO_PIPE_SEL_CHV(crtc->pipe);
 	else
 		hdmi_val |= SDVO_PIPE_SEL(crtc->pipe);
 
 	I915_WRITE(intel_hdmi->hdmi_reg, hdmi_val);
 	POSTING_READ(intel_hdmi->hdmi_reg);
-
-	intel_hdmi->set_infoframes(&encoder->base, adjusted_mode);
 }
 
 static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
@@ -681,6 +697,8 @@
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
+	else if (IS_CHERRYVIEW(dev))
+		*pipe = SDVO_PORT_TO_PIPE_CHV(tmp);
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
@@ -707,6 +725,12 @@
 	else
 		flags |= DRM_MODE_FLAG_NVSYNC;
 
+	if (tmp & HDMI_MODE_SELECT_HDMI)
+		pipe_config->has_hdmi_sink = true;
+
+	if (tmp & HDMI_MODE_SELECT_HDMI)
+		pipe_config->has_audio = true;
+
 	pipe_config->adjusted_mode.flags |= flags;
 
 	if ((tmp & SDVO_COLOR_FORMAT_MASK) == HDMI_COLOR_FORMAT_12bpc)
@@ -729,7 +753,7 @@
 	u32 temp;
 	u32 enable_bits = SDVO_ENABLE;
 
-	if (intel_hdmi->has_audio)
+	if (intel_crtc->config.has_audio)
 		enable_bits |= SDVO_AUDIO_ENABLE;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -883,9 +907,11 @@
 	int portclock_limit = hdmi_portclock_limit(intel_hdmi, false);
 	int desired_bpp;
 
+	pipe_config->has_hdmi_sink = intel_hdmi->has_hdmi_sink;
+
 	if (intel_hdmi->color_range_auto) {
 		/* See CEA-861-E - 5.1 Default Encoding Parameters */
-		if (intel_hdmi->has_hdmi_sink &&
+		if (pipe_config->has_hdmi_sink &&
 		    drm_match_cea_mode(adjusted_mode) > 1)
 			intel_hdmi->color_range = HDMI_COLOR_RANGE_16_235;
 		else
@@ -898,13 +924,16 @@
 	if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev))
 		pipe_config->has_pch_encoder = true;
 
+	if (pipe_config->has_hdmi_sink && intel_hdmi->has_audio)
+		pipe_config->has_audio = true;
+
 	/*
 	 * HDMI is either 12 or 8, so if the display lets 10bpc sneak
 	 * through, clamp it down. Note that g4x/vlv don't support 12bpc hdmi
 	 * outputs. We also need to check that the higher clock still fits
 	 * within limits.
 	 */
-	if (pipe_config->pipe_bpp > 8*3 && intel_hdmi->has_hdmi_sink &&
+	if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink &&
 	    clock_12bpc <= portclock_limit &&
 	    hdmi_12bpc_possible(encoder->new_crtc)) {
 		DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
@@ -944,7 +973,7 @@
 	enum drm_connector_status status = connector_status_disconnected;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	power_domain = intel_display_port_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
@@ -1104,20 +1133,34 @@
 	return 0;
 }
 
+static void intel_hdmi_pre_enable(struct intel_encoder *encoder)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	struct drm_display_mode *adjusted_mode =
+		&intel_crtc->config.adjusted_mode;
+
+	intel_hdmi_prepare(encoder);
+
+	intel_hdmi->set_infoframes(&encoder->base,
+				   intel_crtc->config.has_hdmi_sink,
+				   adjusted_mode);
+}
+
 static void vlv_hdmi_pre_enable(struct intel_encoder *encoder)
 {
 	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_hdmi *intel_hdmi = &dport->hdmi;
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc =
 		to_intel_crtc(encoder->base.crtc);
+	struct drm_display_mode *adjusted_mode =
+		&intel_crtc->config.adjusted_mode;
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	int pipe = intel_crtc->pipe;
 	u32 val;
 
-	if (!IS_VALLEYVIEW(dev))
-		return;
-
 	/* Enable clock channels for this port */
 	mutex_lock(&dev_priv->dpio_lock);
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
@@ -1144,6 +1187,10 @@
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
 	mutex_unlock(&dev_priv->dpio_lock);
 
+	intel_hdmi->set_infoframes(&encoder->base,
+				   intel_crtc->config.has_hdmi_sink,
+				   adjusted_mode);
+
 	intel_enable_hdmi(encoder);
 
 	vlv_wait_port_ready(dev_priv, dport);
@@ -1159,8 +1206,7 @@
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	int pipe = intel_crtc->pipe;
 
-	if (!IS_VALLEYVIEW(dev))
-		return;
+	intel_hdmi_prepare(encoder);
 
 	/* Program Tx lane resets to default */
 	mutex_lock(&dev_priv->dpio_lock);
@@ -1199,6 +1245,152 @@
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 
+static void chv_hdmi_post_disable(struct intel_encoder *encoder)
+{
+	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc =
+		to_intel_crtc(encoder->base.crtc);
+	enum dpio_channel ch = vlv_dport_to_channel(dport);
+	enum pipe pipe = intel_crtc->pipe;
+	u32 val;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Propagate soft reset to data lane reset */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch));
+	val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch));
+	val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
+
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
+static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
+{
+	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc =
+		to_intel_crtc(encoder->base.crtc);
+	enum dpio_channel ch = vlv_dport_to_channel(dport);
+	int pipe = intel_crtc->pipe;
+	int data, i;
+	u32 val;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	/* Deassert soft data lane reset*/
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch));
+	val |= CHV_PCS_REQ_SOFTRESET_EN;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch));
+	val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch));
+	val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
+
+	/* Program Tx latency optimal setting */
+	for (i = 0; i < 4; i++) {
+		/* Set the latency optimal bit */
+		data = (i == 1) ? 0x0 : 0x6;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW11(ch, i),
+				data << DPIO_FRC_LATENCY_SHFIT);
+
+		/* Set the upar bit */
+		data = (i == 1) ? 0x0 : 0x1;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW14(ch, i),
+				data << DPIO_UPAR_SHIFT);
+	}
+
+	/* Data lane stagger programming */
+	/* FIXME: Fix up value only after power analysis */
+
+	/* Clear calc init */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+	val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+
+	/* FIXME: Program the support xxx V-dB */
+	/* Use 800mV-0dB */
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW4(ch, i));
+		val &= ~DPIO_SWING_DEEMPH9P5_MASK;
+		val |= 128 << DPIO_SWING_DEEMPH9P5_SHIFT;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW4(ch, i), val);
+	}
+
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
+		val &= ~DPIO_SWING_MARGIN_MASK;
+		val |= 102 << DPIO_SWING_MARGIN_SHIFT;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
+	}
+
+	/* Disable unique transition scale */
+	for (i = 0; i < 4; i++) {
+		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i));
+		val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN;
+		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val);
+	}
+
+	/* Additional steps for 1200mV-0dB */
+#if 0
+	val = vlv_dpio_read(dev_priv, pipe, VLV_TX_DW3(ch));
+	if (ch)
+		val |= DPIO_TX_UNIQ_TRANS_SCALE_CH1;
+	else
+		val |= DPIO_TX_UNIQ_TRANS_SCALE_CH0;
+	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW3(ch), val);
+
+	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(ch),
+			vlv_dpio_read(dev_priv, pipe, VLV_TX_DW2(ch)) |
+				(0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT));
+#endif
+	/* Start swing calculation */
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+
+	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+	val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
+	vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+
+	/* LRC Bypass */
+	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
+	val |= DPIO_LRC_BYPASS;
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
+
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	intel_enable_hdmi(encoder);
+
+	vlv_wait_port_ready(dev_priv, dport);
+}
+
 static void intel_hdmi_destroy(struct drm_connector *connector)
 {
 	drm_connector_cleanup(connector);
@@ -1259,7 +1451,10 @@
 		intel_encoder->hpd_pin = HPD_PORT_C;
 		break;
 	case PORT_D:
-		intel_hdmi->ddc_bus = GMBUS_PORT_DPD;
+		if (IS_CHERRYVIEW(dev))
+			intel_hdmi->ddc_bus = GMBUS_PORT_DPD_CHV;
+		else
+			intel_hdmi->ddc_bus = GMBUS_PORT_DPD;
 		intel_encoder->hpd_pin = HPD_PORT_D;
 		break;
 	case PORT_A:
@@ -1329,21 +1524,32 @@
 			 DRM_MODE_ENCODER_TMDS);
 
 	intel_encoder->compute_config = intel_hdmi_compute_config;
-	intel_encoder->mode_set = intel_hdmi_mode_set;
 	intel_encoder->disable = intel_disable_hdmi;
 	intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
 	intel_encoder->get_config = intel_hdmi_get_config;
-	if (IS_VALLEYVIEW(dev)) {
+	if (IS_CHERRYVIEW(dev)) {
+		intel_encoder->pre_enable = chv_hdmi_pre_enable;
+		intel_encoder->enable = vlv_enable_hdmi;
+		intel_encoder->post_disable = chv_hdmi_post_disable;
+	} else if (IS_VALLEYVIEW(dev)) {
 		intel_encoder->pre_pll_enable = vlv_hdmi_pre_pll_enable;
 		intel_encoder->pre_enable = vlv_hdmi_pre_enable;
 		intel_encoder->enable = vlv_enable_hdmi;
 		intel_encoder->post_disable = vlv_hdmi_post_disable;
 	} else {
+		intel_encoder->pre_enable = intel_hdmi_pre_enable;
 		intel_encoder->enable = intel_enable_hdmi;
 	}
 
 	intel_encoder->type = INTEL_OUTPUT_HDMI;
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	if (IS_CHERRYVIEW(dev)) {
+		if (port == PORT_D)
+			intel_encoder->crtc_mask = 1 << 2;
+		else
+			intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	} else {
+		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	}
 	intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG;
 	/*
 	 * BSpec is unclear about HDMI+HDMI cloning on g4x, but it seems

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index f1ecf91..2312602 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c

@@ -111,13 +111,6 @@
 
 	pipe_config->adjusted_mode.flags |= flags;
 
-	/* gen2/3 store dither state in pfit control, needs to match */
-	if (INTEL_INFO(dev)->gen < 4) {
-		tmp = I915_READ(PFIT_CONTROL);
-
-		pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
-	}
-
 	dotclock = pipe_config->port_clock;
 
 	if (HAS_PCH_SPLIT(dev_priv->dev))
@@ -126,10 +119,6 @@
 	pipe_config->adjusted_mode.crtc_clock = dotclock;
 }
 
-/* The LVDS pin pair needs to be on before the DPLLs are enabled.
- * This is an exception to the general rule that mode_set doesn't turn
- * things on.
- */
 static void intel_pre_enable_lvds(struct intel_encoder *encoder)
 {
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
@@ -331,15 +320,6 @@
 	return true;
 }
 
-static void intel_lvds_mode_set(struct intel_encoder *encoder)
-{
-	/*
-	 * We don't do anything here, the LVDS port is fully set up in the pre
-	 * enable hook - the ordering constraints for enabling the lvds port vs.
-	 * enabling the display pll are too strict.
-	 */
-}
-
 /**
  * Detect the LVDS connection.
  *
@@ -354,7 +334,7 @@
 	enum drm_connector_status status;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	status = intel_panel_detect(dev);
 	if (status != connector_status_unknown)
@@ -953,7 +933,6 @@
 	intel_encoder->enable = intel_enable_lvds;
 	intel_encoder->pre_enable = intel_pre_enable_lvds;
 	intel_encoder->compute_config = intel_lvds_compute_config;
-	intel_encoder->mode_set = intel_lvds_mode_set;
 	intel_encoder->disable = intel_disable_lvds;
 	intel_encoder->get_hw_state = intel_lvds_get_hw_state;
 	intel_encoder->get_config = intel_lvds_get_config;

diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index acde294..2e2c71f 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c

@@ -410,7 +410,7 @@
 	if (bclp > 255)
 		return ASLC_BACKLIGHT_FAILED;
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
 	/*
 	 * Update backlight on all connectors that support backlight (usually
@@ -421,7 +421,7 @@
 		intel_panel_set_backlight(intel_connector, bclp, 255);
 	iowrite32(DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID, &asle->cblv);
 
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 
 
 	return 0;

diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 129db0c..daa1189 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c

@@ -213,7 +213,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
 	BUG_ON(overlay->last_flip_req);
@@ -236,7 +236,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
 	BUG_ON(overlay->active);
@@ -263,7 +263,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	u32 flip_addr = overlay->flip_addr;
 	u32 tmp;
 	int ret;
@@ -320,7 +320,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	u32 flip_addr = overlay->flip_addr;
 	int ret;
 
@@ -363,7 +363,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
 	if (overlay->last_flip_req == 0)
@@ -389,7 +389,7 @@
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
 	/* Only wait if there is actually an old frame to release to
@@ -688,7 +688,7 @@
 	u32 swidth, swidthsw, sheight, ostride;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	BUG_ON(!overlay);
 
 	ret = intel_overlay_release_old_vid(overlay);
@@ -793,7 +793,7 @@
 	int ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
 	if (ret != 0)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index cb8cfb7..5e6c888 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c

@@ -42,6 +42,59 @@
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 }
 
+/**
+ * intel_find_panel_downclock - find the reduced downclock for LVDS in EDID
+ * @dev: drm device
+ * @fixed_mode : panel native mode
+ * @connector: LVDS/eDP connector
+ *
+ * Return downclock_avail
+ * Find the reduced downclock for LVDS/eDP in EDID.
+ */
+struct drm_display_mode *
+intel_find_panel_downclock(struct drm_device *dev,
+			struct drm_display_mode *fixed_mode,
+			struct drm_connector *connector)
+{
+	struct drm_display_mode *scan, *tmp_mode;
+	int temp_downclock;
+
+	temp_downclock = fixed_mode->clock;
+	tmp_mode = NULL;
+
+	list_for_each_entry(scan, &connector->probed_modes, head) {
+		/*
+		 * If one mode has the same resolution with the fixed_panel
+		 * mode while they have the different refresh rate, it means
+		 * that the reduced downclock is found. In such
+		 * case we can set the different FPx0/1 to dynamically select
+		 * between low and high frequency.
+		 */
+		if (scan->hdisplay == fixed_mode->hdisplay &&
+		    scan->hsync_start == fixed_mode->hsync_start &&
+		    scan->hsync_end == fixed_mode->hsync_end &&
+		    scan->htotal == fixed_mode->htotal &&
+		    scan->vdisplay == fixed_mode->vdisplay &&
+		    scan->vsync_start == fixed_mode->vsync_start &&
+		    scan->vsync_end == fixed_mode->vsync_end &&
+		    scan->vtotal == fixed_mode->vtotal) {
+			if (scan->clock < temp_downclock) {
+				/*
+				 * The downclock is already found. But we
+				 * expect to find the lower downclock.
+				 */
+				temp_downclock = scan->clock;
+				tmp_mode = scan;
+			}
+		}
+	}
+
+	if (temp_downclock < fixed_mode->clock)
+		return drm_mode_duplicate(dev, tmp_mode);
+	else
+		return NULL;
+}
+
 /* adjusted_mode has been preset to be the panel's fixed mode */
 void
 intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
@@ -308,21 +361,43 @@
 		pfit_control |= ((intel_crtc->pipe << PFIT_PIPE_SHIFT) |
 				 PFIT_FILTER_FUZZY);
 
+	/* Make sure pre-965 set dither correctly for 18bpp panels. */
+	if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18)
+		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
+
 out:
 	if ((pfit_control & PFIT_ENABLE) == 0) {
 		pfit_control = 0;
 		pfit_pgm_ratios = 0;
 	}
 
-	/* Make sure pre-965 set dither correctly for 18bpp panels. */
-	if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18)
-		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
-
 	pipe_config->gmch_pfit.control = pfit_control;
 	pipe_config->gmch_pfit.pgm_ratios = pfit_pgm_ratios;
 	pipe_config->gmch_pfit.lvds_border_bits = border;
 }
 
+enum drm_connector_status
+intel_panel_detect(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Assume that the BIOS does not lie through the OpRegion... */
+	if (!i915.panel_ignore_lid && dev_priv->opregion.lid_state) {
+		return ioread32(dev_priv->opregion.lid_state) & 0x1 ?
+			connector_status_connected :
+			connector_status_disconnected;
+	}
+
+	switch (i915.panel_ignore_lid) {
+	case -2:
+		return connector_status_connected;
+	case -1:
+		return connector_status_disconnected;
+	default:
+		return connector_status_unknown;
+	}
+}
+
 static u32 intel_panel_compute_brightness(struct intel_connector *connector,
 					  u32 val)
 {
@@ -795,40 +870,18 @@
 	spin_unlock_irqrestore(&dev_priv->backlight_lock, flags);
 }
 
-enum drm_connector_status
-intel_panel_detect(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/* Assume that the BIOS does not lie through the OpRegion... */
-	if (!i915.panel_ignore_lid && dev_priv->opregion.lid_state) {
-		return ioread32(dev_priv->opregion.lid_state) & 0x1 ?
-			connector_status_connected :
-			connector_status_disconnected;
-	}
-
-	switch (i915.panel_ignore_lid) {
-	case -2:
-		return connector_status_connected;
-	case -1:
-		return connector_status_disconnected;
-	default:
-		return connector_status_unknown;
-	}
-}
-
 #if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
 static int intel_backlight_device_update_status(struct backlight_device *bd)
 {
 	struct intel_connector *connector = bl_get_data(bd);
 	struct drm_device *dev = connector->base.dev;
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n",
 		      bd->props.brightness, bd->props.max_brightness);
 	intel_panel_set_backlight(connector, bd->props.brightness,
 				  bd->props.max_brightness);
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	return 0;
 }
 
@@ -840,9 +893,9 @@
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	ret = intel_panel_get_backlight(connector);
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret;
@@ -1077,7 +1130,7 @@
 
 	if (ret) {
 		DRM_DEBUG_KMS("failed to setup backlight for connector %s\n",
-			      drm_get_connector_name(connector));
+			      connector->name);
 		return ret;
 	}
 
@@ -1103,59 +1156,6 @@
 	intel_backlight_device_unregister(intel_connector);
 }
 
-/**
- * intel_find_panel_downclock - find the reduced downclock for LVDS in EDID
- * @dev: drm device
- * @fixed_mode : panel native mode
- * @connector: LVDS/eDP connector
- *
- * Return downclock_avail
- * Find the reduced downclock for LVDS/eDP in EDID.
- */
-struct drm_display_mode *
-intel_find_panel_downclock(struct drm_device *dev,
-			struct drm_display_mode *fixed_mode,
-			struct drm_connector *connector)
-{
-	struct drm_display_mode *scan, *tmp_mode;
-	int temp_downclock;
-
-	temp_downclock = fixed_mode->clock;
-	tmp_mode = NULL;
-
-	list_for_each_entry(scan, &connector->probed_modes, head) {
-		/*
-		 * If one mode has the same resolution with the fixed_panel
-		 * mode while they have the different refresh rate, it means
-		 * that the reduced downclock is found. In such
-		 * case we can set the different FPx0/1 to dynamically select
-		 * between low and high frequency.
-		 */
-		if (scan->hdisplay == fixed_mode->hdisplay &&
-		    scan->hsync_start == fixed_mode->hsync_start &&
-		    scan->hsync_end == fixed_mode->hsync_end &&
-		    scan->htotal == fixed_mode->htotal &&
-		    scan->vdisplay == fixed_mode->vdisplay &&
-		    scan->vsync_start == fixed_mode->vsync_start &&
-		    scan->vsync_end == fixed_mode->vsync_end &&
-		    scan->vtotal == fixed_mode->vtotal) {
-			if (scan->clock < temp_downclock) {
-				/*
-				 * The downclock is already found. But we
-				 * expect to find the lower downclock.
-				 */
-				temp_downclock = scan->clock;
-				tmp_mode = scan;
-			}
-		}
-	}
-
-	if (temp_downclock < fixed_mode->clock)
-		return drm_mode_duplicate(dev, tmp_mode);
-	else
-		return NULL;
-}
-
 /* Set up chip specific backlight functions */
 void intel_panel_init_backlight_funcs(struct drm_device *dev)
 {

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d93dcf6..d1e53ab 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c

@@ -487,7 +487,7 @@
 	 *   - new fb is too large to fit in compressed buffer
 	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
 	 */
-	list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, tmp_crtc) {
 		if (intel_crtc_active(tmp_crtc) &&
 		    to_intel_crtc(tmp_crtc)->primary_enabled) {
 			if (crtc) {
@@ -1010,7 +1010,7 @@
 {
 	struct drm_crtc *crtc, *enabled = NULL;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	for_each_crtc(dev, crtc) {
 		if (intel_crtc_active(crtc)) {
 			if (enabled)
 				return NULL;
@@ -1831,6 +1831,40 @@
 		return 512;
 }
 
+static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
+					 int level, bool is_sprite)
+{
+	if (INTEL_INFO(dev)->gen >= 8)
+		/* BDW primary/sprite plane watermarks */
+		return level == 0 ? 255 : 2047;
+	else if (INTEL_INFO(dev)->gen >= 7)
+		/* IVB/HSW primary/sprite plane watermarks */
+		return level == 0 ? 127 : 1023;
+	else if (!is_sprite)
+		/* ILK/SNB primary plane watermarks */
+		return level == 0 ? 127 : 511;
+	else
+		/* ILK/SNB sprite plane watermarks */
+		return level == 0 ? 63 : 255;
+}
+
+static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
+					  int level)
+{
+	if (INTEL_INFO(dev)->gen >= 7)
+		return level == 0 ? 63 : 255;
+	else
+		return level == 0 ? 31 : 63;
+}
+
+static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
+{
+	if (INTEL_INFO(dev)->gen >= 8)
+		return 31;
+	else
+		return 15;
+}
+
 /* Calculate the maximum primary/sprite plane watermark */
 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
 				     int level,
@@ -1839,7 +1873,6 @@
 				     bool is_sprite)
 {
 	unsigned int fifo_size = ilk_display_fifo_size(dev);
-	unsigned int max;
 
 	/* if sprites aren't enabled, sprites get nothing */
 	if (is_sprite && !config->sprites_enabled)
@@ -1870,19 +1903,7 @@
 	}
 
 	/* clamp to max that the registers can hold */
-	if (INTEL_INFO(dev)->gen >= 8)
-		max = level == 0 ? 255 : 2047;
-	else if (INTEL_INFO(dev)->gen >= 7)
-		/* IVB/HSW primary/sprite plane watermarks */
-		max = level == 0 ? 127 : 1023;
-	else if (!is_sprite)
-		/* ILK/SNB primary plane watermarks */
-		max = level == 0 ? 127 : 511;
-	else
-		/* ILK/SNB sprite plane watermarks */
-		max = level == 0 ? 63 : 255;
-
-	return min(fifo_size, max);
+	return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
 }
 
 /* Calculate the maximum cursor plane watermark */
@@ -1895,20 +1916,7 @@
 		return 64;
 
 	/* otherwise just report max that registers can hold */
-	if (INTEL_INFO(dev)->gen >= 7)
-		return level == 0 ? 63 : 255;
-	else
-		return level == 0 ? 31 : 63;
-}
-
-/* Calculate the maximum FBC watermark */
-static unsigned int ilk_fbc_wm_max(const struct drm_device *dev)
-{
-	/* max that registers can hold */
-	if (INTEL_INFO(dev)->gen >= 8)
-		return 31;
-	else
-		return 15;
+	return ilk_cursor_wm_reg_max(dev, level);
 }
 
 static void ilk_compute_wm_maximums(const struct drm_device *dev,
@@ -1920,7 +1928,17 @@
 	max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
 	max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
 	max->cur = ilk_cursor_wm_max(dev, level, config);
-	max->fbc = ilk_fbc_wm_max(dev);
+	max->fbc = ilk_fbc_wm_reg_max(dev);
+}
+
+static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
+					int level,
+					struct ilk_wm_maximums *max)
+{
+	max->pri = ilk_plane_wm_reg_max(dev, level, false);
+	max->spr = ilk_plane_wm_reg_max(dev, level, true);
+	max->cur = ilk_cursor_wm_reg_max(dev, level);
+	max->fbc = ilk_fbc_wm_reg_max(dev);
 }
 
 static bool ilk_validate_wm_level(int level,
@@ -2059,7 +2077,7 @@
 		wm[3] *= 2;
 }
 
-static int ilk_wm_max_level(const struct drm_device *dev)
+int ilk_wm_max_level(const struct drm_device *dev)
 {
 	/* how many WM levels are we expecting */
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@@ -2155,38 +2173,52 @@
 }
 
 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
-				      struct ilk_pipe_wm_parameters *p,
-				      struct intel_wm_config *config)
+				      struct ilk_pipe_wm_parameters *p)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	struct drm_plane *plane;
 
-	p->active = intel_crtc_active(crtc);
-	if (p->active) {
-		p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
-		p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
-		p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8;
-		p->cur.bytes_per_pixel = 4;
-		p->pri.horiz_pixels = intel_crtc->config.pipe_src_w;
-		p->cur.horiz_pixels = intel_crtc->cursor_width;
-		/* TODO: for now, assume primary and cursor planes are always enabled. */
-		p->pri.enabled = true;
-		p->cur.enabled = true;
-	}
+	if (!intel_crtc_active(crtc))
+		return;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		config->num_pipes_active += intel_crtc_active(crtc);
+	p->active = true;
+	p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
+	p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
+	p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8;
+	p->cur.bytes_per_pixel = 4;
+	p->pri.horiz_pixels = intel_crtc->config.pipe_src_w;
+	p->cur.horiz_pixels = intel_crtc->cursor_width;
+	/* TODO: for now, assume primary and cursor planes are always enabled. */
+	p->pri.enabled = true;
+	p->cur.enabled = true;
 
 	drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) {
 		struct intel_plane *intel_plane = to_intel_plane(plane);
 
-		if (intel_plane->pipe == pipe)
+		if (intel_plane->pipe == pipe) {
 			p->spr = intel_plane->wm;
+			break;
+		}
+	}
+}
 
-		config->sprites_enabled |= intel_plane->wm.enabled;
-		config->sprites_scaled |= intel_plane->wm.scaled;
+static void ilk_compute_wm_config(struct drm_device *dev,
+				  struct intel_wm_config *config)
+{
+	struct intel_crtc *intel_crtc;
+
+	/* Compute the currently _active_ config */
+	for_each_intel_crtc(dev, intel_crtc) {
+		const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
+
+		if (!wm->pipe_enabled)
+			continue;
+
+		config->sprites_enabled |= wm->sprites_enabled;
+		config->sprites_scaled |= wm->sprites_scaled;
+		config->num_pipes_active++;
 	}
 }
 
@@ -2206,8 +2238,9 @@
 	};
 	struct ilk_wm_maximums max;
 
-	/* LP0 watermarks always use 1/2 DDB partitioning */
-	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
+	pipe_wm->pipe_enabled = params->active;
+	pipe_wm->sprites_enabled = params->spr.enabled;
+	pipe_wm->sprites_scaled = params->spr.scaled;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
 	if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
@@ -2217,15 +2250,37 @@
 	if (params->spr.scaled)
 		max_level = 0;
 
-	for (level = 0; level <= max_level; level++)
-		ilk_compute_wm_level(dev_priv, level, params,
-				     &pipe_wm->wm[level]);
+	ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]);
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
 
+	/* LP0 watermarks always use 1/2 DDB partitioning */
+	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
+
 	/* At least LP0 must be valid */
-	return ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]);
+	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
+		return false;
+
+	ilk_compute_wm_reg_maximums(dev, 1, &max);
+
+	for (level = 1; level <= max_level; level++) {
+		struct intel_wm_level wm = {};
+
+		ilk_compute_wm_level(dev_priv, level, params, &wm);
+
+		/*
+		 * Disable any watermark level that exceeds the
+		 * register maximums since such watermarks are
+		 * always invalid.
+		 */
+		if (!ilk_validate_wm_level(level, &max, &wm))
+			break;
+
+		pipe_wm->wm[level] = wm;
+	}
+
+	return true;
 }
 
 /*
@@ -2237,20 +2292,28 @@
 {
 	const struct intel_crtc *intel_crtc;
 
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
-		const struct intel_wm_level *wm =
-			&intel_crtc->wm.active.wm[level];
+	ret_wm->enable = true;
 
+	for_each_intel_crtc(dev, intel_crtc) {
+		const struct intel_pipe_wm *active = &intel_crtc->wm.active;
+		const struct intel_wm_level *wm = &active->wm[level];
+
+		if (!active->pipe_enabled)
+			continue;
+
+		/*
+		 * The watermark values may have been used in the past,
+		 * so we must maintain them in the registers for some
+		 * time even if the level is now disabled.
+		 */
 		if (!wm->enable)
-			return;
+			ret_wm->enable = false;
 
 		ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
 		ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
 		ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
 		ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
 	}
-
-	ret_wm->enable = true;
 }
 
 /*
@@ -2262,6 +2325,7 @@
 			 struct intel_pipe_wm *merged)
 {
 	int level, max_level = ilk_wm_max_level(dev);
+	int last_enabled_level = max_level;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
 	if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
@@ -2277,15 +2341,19 @@
 
 		ilk_merge_wm_level(dev, level, wm);
 
-		if (!ilk_validate_wm_level(level, max, wm))
-			break;
+		if (level > last_enabled_level)
+			wm->enable = false;
+		else if (!ilk_validate_wm_level(level, max, wm))
+			/* make sure all following levels get disabled */
+			last_enabled_level = level - 1;
 
 		/*
 		 * The spec says it is preferred to disable
 		 * FBC WMs instead of disabling a WM level.
 		 */
 		if (wm->fbc_val > max->fbc) {
-			merged->fbc_wm_enabled = false;
+			if (wm->enable)
+				merged->fbc_wm_enabled = false;
 			wm->fbc_val = 0;
 		}
 	}
@@ -2340,14 +2408,19 @@
 		level = ilk_wm_lp_to_level(wm_lp, merged);
 
 		r = &merged->wm[level];
-		if (!r->enable)
-			break;
 
-		results->wm_lp[wm_lp - 1] = WM3_LP_EN |
+		/*
+		 * Maintain the watermark values even if the level is
+		 * disabled. Doing otherwise could cause underruns.
+		 */
+		results->wm_lp[wm_lp - 1] =
 			(ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
 			(r->pri_val << WM1_LP_SR_SHIFT) |
 			r->cur_val;
 
+		if (r->enable)
+			results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
+
 		if (INTEL_INFO(dev)->gen >= 8)
 			results->wm_lp[wm_lp - 1] |=
 				r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
@@ -2355,6 +2428,10 @@
 			results->wm_lp[wm_lp - 1] |=
 				r->fbc_val << WM1_LP_FBC_SHIFT;
 
+		/*
+		 * Always set WM1S_LP_EN when spr_val != 0, even if the
+		 * level is disabled. Doing otherwise could cause underruns.
+		 */
 		if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
 			WARN_ON(wm_lp != 1);
 			results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
@@ -2363,7 +2440,7 @@
 	}
 
 	/* LP0 register values */
-	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
+	for_each_intel_crtc(dev, intel_crtc) {
 		enum pipe pipe = intel_crtc->pipe;
 		const struct intel_wm_level *r =
 			&intel_crtc->wm.active.wm[0];
@@ -2598,7 +2675,7 @@
 	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
 	struct intel_wm_config config = {};
 
-	ilk_compute_wm_parameters(crtc, &params, &config);
+	ilk_compute_wm_parameters(crtc, &params);
 
 	intel_compute_pipe_wm(crtc, &params, &pipe_wm);
 
@@ -2607,6 +2684,8 @@
 
 	intel_crtc->wm.active = pipe_wm;
 
+	ilk_compute_wm_config(dev, &config);
+
 	ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
 	ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
 
@@ -2673,7 +2752,9 @@
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
 
-	if (intel_crtc_active(crtc)) {
+	active->pipe_enabled = intel_crtc_active(crtc);
+
+	if (active->pipe_enabled) {
 		u32 tmp = hw->wm_pipe[pipe];
 
 		/*
@@ -2706,7 +2787,7 @@
 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
 	struct drm_crtc *crtc;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+	for_each_crtc(dev, crtc)
 		ilk_pipe_wm_get_hw_state(crtc);
 
 	hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
@@ -2714,8 +2795,10 @@
 	hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
 
 	hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
-	hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
-	hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
+	if (INTEL_INFO(dev)->gen >= 7) {
+		hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
+		hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
+	}
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
@@ -3071,6 +3154,9 @@
 	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
 		mask |= GEN6_PM_RP_UP_EI_EXPIRED;
 
+	if (IS_GEN8(dev_priv->dev))
+		mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
+
 	return ~mask;
 }
 
@@ -3091,7 +3177,7 @@
 	if (val != dev_priv->rps.cur_freq) {
 		gen6_set_rps_thresholds(dev_priv, val);
 
-		if (IS_HASWELL(dev))
+		if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 			I915_WRITE(GEN6_RPNSWREQ,
 				   HSW_FREQUENCY(val));
 		else
@@ -3134,16 +3220,7 @@
 	/* Mask turbo interrupt so that they will not come in between */
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 
-	/* Bring up the Gfx clock */
-	I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
-		I915_READ(VLV_GTLC_SURVIVABILITY_REG) |
-				VLV_GFX_CLK_FORCE_ON_BIT);
-
-	if (wait_for(((VLV_GFX_CLK_STATUS_BIT &
-		I915_READ(VLV_GTLC_SURVIVABILITY_REG)) != 0), 5)) {
-			DRM_ERROR("GFX_CLK_ON request timed out\n");
-		return;
-	}
+	vlv_force_gfx_clock(dev_priv, true);
 
 	dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
 
@@ -3154,10 +3231,7 @@
 				& GENFREQSTATUS) == 0, 5))
 		DRM_ERROR("timed out waiting for Punit\n");
 
-	/* Release the Gfx clock */
-	I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
-		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
-				~VLV_GFX_CLK_FORCE_ON_BIT);
+	vlv_force_gfx_clock(dev_priv, false);
 
 	I915_WRITE(GEN6_PMINTRMSK,
 		   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
@@ -3215,6 +3289,26 @@
 	trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val));
 }
 
+static void gen8_disable_rps_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
+	I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
+				   ~dev_priv->pm_rps_events);
+	/* Complete PM interrupt masking here doesn't race with the rps work
+	 * item again unmasking PM interrupts because that is using a different
+	 * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in
+	 * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
+	 * gen8_enable_rps will clean up. */
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	dev_priv->rps.pm_iir = 0;
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+}
+
 static void gen6_disable_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3241,7 +3335,10 @@
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
 
-	gen6_disable_rps_interrupts(dev);
+	if (IS_BROADWELL(dev))
+		gen8_disable_rps_interrupts(dev);
+	else
+		gen6_disable_rps_interrupts(dev);
 }
 
 static void valleyview_disable_rps(struct drm_device *dev)
@@ -3255,21 +3352,44 @@
 
 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
 {
+	if (IS_VALLEYVIEW(dev)) {
+		if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
+			mode = GEN6_RC_CTL_RC6_ENABLE;
+		else
+			mode = 0;
+	}
 	DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
 		 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
 		 (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
 		 (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
 }
 
-int intel_enable_rc6(const struct drm_device *dev)
+static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
 {
 	/* No RC6 before Ironlake */
 	if (INTEL_INFO(dev)->gen < 5)
 		return 0;
 
+	/* RC6 is only on Ironlake mobile not on desktop */
+	if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
+		return 0;
+
 	/* Respect the kernel parameter if it is set */
-	if (i915.enable_rc6 >= 0)
-		return i915.enable_rc6;
+	if (enable_rc6 >= 0) {
+		int mask;
+
+		if (INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev))
+			mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
+			       INTEL_RC6pp_ENABLE;
+		else
+			mask = INTEL_RC6_ENABLE;
+
+		if ((enable_rc6 & mask) != enable_rc6)
+			DRM_INFO("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
+				 enable_rc6 & mask, enable_rc6, mask);
+
+		return enable_rc6 & mask;
+	}
 
 	/* Disable RC6 on Ironlake */
 	if (INTEL_INFO(dev)->gen == 5)
@@ -3281,6 +3401,22 @@
 	return INTEL_RC6_ENABLE;
 }
 
+int intel_enable_rc6(const struct drm_device *dev)
+{
+	return i915.enable_rc6;
+}
+
+static void gen8_enable_rps_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	WARN_ON(dev_priv->rps.pm_iir);
+	bdw_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
 static void gen6_enable_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3292,10 +3428,31 @@
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
+static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_cap)
+{
+	/* All of these values are in units of 50MHz */
+	dev_priv->rps.cur_freq		= 0;
+	/* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
+	dev_priv->rps.rp1_freq		= (rp_state_cap >>  8) & 0xff;
+	dev_priv->rps.rp0_freq		= (rp_state_cap >>  0) & 0xff;
+	dev_priv->rps.min_freq		= (rp_state_cap >> 16) & 0xff;
+	/* XXX: only BYT has a special efficient freq */
+	dev_priv->rps.efficient_freq	= dev_priv->rps.rp1_freq;
+	/* hw_max = RP0 until we check for overclocking */
+	dev_priv->rps.max_freq		= dev_priv->rps.rp0_freq;
+
+	/* Preserve min/max settings in case of re-init */
+	if (dev_priv->rps.max_freq_softlimit == 0)
+		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+
+	if (dev_priv->rps.min_freq_softlimit == 0)
+		dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+}
+
 static void gen8_enable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	uint32_t rc6_mask = 0, rp_state_cap;
 	int unused;
 
@@ -3310,6 +3467,7 @@
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+	parse_rp_state_cap(dev_priv, rp_state_cap);
 
 	/* 2b: Program RC6 thresholds.*/
 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
@@ -3329,8 +3487,10 @@
 				    rc6_mask);
 
 	/* 4 Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
-	I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(12)); /* Request 600 MHz */
+	I915_WRITE(GEN6_RPNSWREQ,
+		   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+	I915_WRITE(GEN6_RC_VIDEO_FREQ,
+		   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
 	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
 
@@ -3346,11 +3506,15 @@
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
+	/* WaDisablePwrmtrEvent:chv (pre-production hw) */
+	I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff);
+	I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00);
+
 	/* 5: Enable RPS */
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
 		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */
 		   GEN6_RP_ENABLE |
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
@@ -3359,7 +3523,7 @@
 
 	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
 
-	gen6_enable_rps_interrupts(dev);
+	gen8_enable_rps_interrupts(dev);
 
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -3367,7 +3531,7 @@
 static void gen6_enable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	u32 rp_state_cap;
 	u32 gt_perf_status;
 	u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
@@ -3396,23 +3560,7 @@
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
-	/* All of these values are in units of 50MHz */
-	dev_priv->rps.cur_freq		= 0;
-	/* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
-	dev_priv->rps.rp1_freq		= (rp_state_cap >>  8) & 0xff;
-	dev_priv->rps.rp0_freq		= (rp_state_cap >>  0) & 0xff;
-	dev_priv->rps.min_freq		= (rp_state_cap >> 16) & 0xff;
-	/* XXX: only BYT has a special efficient freq */
-	dev_priv->rps.efficient_freq	= dev_priv->rps.rp1_freq;
-	/* hw_max = RP0 until we check for overclocking */
-	dev_priv->rps.max_freq		= dev_priv->rps.rp0_freq;
-
-	/* Preserve min/max settings in case of re-init */
-	if (dev_priv->rps.max_freq_softlimit == 0)
-		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
-
-	if (dev_priv->rps.min_freq_softlimit == 0)
-		dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+	parse_rp_state_cap(dev_priv, rp_state_cap);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -3494,7 +3642,7 @@
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-void gen6_update_ring_freq(struct drm_device *dev)
+static void __gen6_update_ring_freq(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int min_freq = 15;
@@ -3564,6 +3712,18 @@
 	}
 }
 
+void gen6_update_ring_freq(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
+		return;
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	__gen6_update_ring_freq(dev);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
 int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
 {
 	u32 val, rp0;
@@ -3658,10 +3818,49 @@
 	dev_priv->vlv_pctx = NULL;
 }
 
+static void valleyview_init_gt_powersave(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	valleyview_setup_pctx(dev);
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+
+	dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
+	dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
+			 dev_priv->rps.max_freq);
+
+	dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
+			 dev_priv->rps.efficient_freq);
+
+	dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+			 dev_priv->rps.min_freq);
+
+	/* Preserve min/max settings in case of re-init */
+	if (dev_priv->rps.max_freq_softlimit == 0)
+		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+
+	if (dev_priv->rps.min_freq_softlimit == 0)
+		dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
+{
+	valleyview_cleanup_pctx(dev);
+}
+
 static void valleyview_enable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	u32 gtfifodbg, val, rc6_mode = 0;
 	int i;
 
@@ -3724,29 +3923,6 @@
 			 vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
 			 dev_priv->rps.cur_freq);
 
-	dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
-	dev_priv->rps.rp0_freq  = dev_priv->rps.max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
-			 dev_priv->rps.max_freq);
-
-	dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
-			 dev_priv->rps.efficient_freq);
-
-	dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
-			 dev_priv->rps.min_freq);
-
-	/* Preserve min/max settings in case of re-init */
-	if (dev_priv->rps.max_freq_softlimit == 0)
-		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
-
-	if (dev_priv->rps.min_freq_softlimit == 0)
-		dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
-
 	DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
 			 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
 			 dev_priv->rps.efficient_freq);
@@ -3815,7 +3991,7 @@
 static void ironlake_enable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	bool was_interruptible;
 	int ret;
 
@@ -3873,7 +4049,7 @@
 	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 
-	intel_print_rc6_info(dev, INTEL_RC6_ENABLE);
+	intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
 }
 
 static unsigned long intel_pxfreq(u32 vidfreq)
@@ -4327,7 +4503,7 @@
 bool i915_gpu_busy(void)
 {
 	struct drm_i915_private *dev_priv;
-	struct intel_ring_buffer *ring;
+	struct intel_engine_cs *ring;
 	bool ret = false;
 	int i;
 
@@ -4487,14 +4663,16 @@
 
 void intel_init_gt_powersave(struct drm_device *dev)
 {
+	i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
+
 	if (IS_VALLEYVIEW(dev))
-		valleyview_setup_pctx(dev);
+		valleyview_init_gt_powersave(dev);
 }
 
 void intel_cleanup_gt_powersave(struct drm_device *dev)
 {
 	if (IS_VALLEYVIEW(dev))
-		valleyview_cleanup_pctx(dev);
+		valleyview_cleanup_gt_powersave(dev);
 }
 
 void intel_disable_gt_powersave(struct drm_device *dev)
@@ -4507,8 +4685,10 @@
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_disable_drps(dev);
 		ironlake_disable_rc6(dev);
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
+	} else if (IS_GEN6(dev) || IS_GEN7(dev) || IS_BROADWELL(dev)) {
+		if (cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work))
+			intel_runtime_pm_put(dev_priv);
+
 		cancel_work_sync(&dev_priv->rps.work);
 		mutex_lock(&dev_priv->rps.hw_lock);
 		if (IS_VALLEYVIEW(dev))
@@ -4533,13 +4713,15 @@
 		valleyview_enable_rps(dev);
 	} else if (IS_BROADWELL(dev)) {
 		gen8_enable_rps(dev);
-		gen6_update_ring_freq(dev);
+		__gen6_update_ring_freq(dev);
 	} else {
 		gen6_enable_rps(dev);
-		gen6_update_ring_freq(dev);
+		__gen6_update_ring_freq(dev);
 	}
 	dev_priv->rps.enabled = true;
 	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	intel_runtime_pm_put(dev_priv);
 }
 
 void intel_enable_gt_powersave(struct drm_device *dev)
@@ -4547,20 +4729,38 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (IS_IRONLAKE_M(dev)) {
+		mutex_lock(&dev->struct_mutex);
 		ironlake_enable_drps(dev);
 		ironlake_enable_rc6(dev);
 		intel_init_emon(dev);
-	} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+		mutex_unlock(&dev->struct_mutex);
+	} else if (IS_GEN6(dev) || IS_GEN7(dev) || IS_BROADWELL(dev)) {
 		/*
 		 * PCU communication is slow and this doesn't need to be
 		 * done at any specific time, so do this out of our fast path
 		 * to make resume and init faster.
+		 *
+		 * We depend on the HW RC6 power context save/restore
+		 * mechanism when entering D3 through runtime PM suspend. So
+		 * disable RPM until RPS/RC6 is properly setup. We can only
+		 * get here via the driver load/system resume/runtime resume
+		 * paths, so the _noresume version is enough (and in case of
+		 * runtime resume it's necessary).
 		 */
-		schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
-				      round_jiffies_up_relative(HZ));
+		if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
+					   round_jiffies_up_relative(HZ)))
+			intel_runtime_pm_get_noresume(dev_priv);
 	}
 }
 
+void intel_reset_gt_powersave(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->rps.enabled = false;
+	intel_enable_gt_powersave(dev);
+}
+
 static void ibx_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4666,6 +4866,9 @@
 	I915_WRITE(CACHE_MODE_0,
 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:ilk */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	g4x_disable_trickle_feed(dev);
 
 	ibx_init_clock_gating(dev);
@@ -4741,6 +4944,9 @@
 		I915_WRITE(GEN6_GT_MODE,
 			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:snb */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/*
 	 * BSpec recoomends 8x4 when MSAA is used,
 	 * however in practice 16x4 seems fastest.
@@ -4909,6 +5115,10 @@
 	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
 		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
 
+	/* WaDisableDopClockGating:bdw May not be needed for production */
+	I915_WRITE(GEN7_ROW_CHICKEN2,
+		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -4980,6 +5190,9 @@
 	I915_WRITE(GEN7_FF_THREAD_MODE,
 		   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
 
+	/* WaDisable_RenderCache_OperationalFlush:hsw */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* enable HiZ Raw Stall Optimization */
 	I915_WRITE(CACHE_MODE_0_GEN7,
 		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
@@ -5032,6 +5245,9 @@
 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:ivb */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -5126,6 +5342,10 @@
 	}
 	DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
 
+	dev_priv->vlv_cdclk_freq = valleyview_cur_cdclk(dev_priv);
+	DRM_DEBUG_DRIVER("Current CD clock rate: %d MHz",
+			 dev_priv->vlv_cdclk_freq);
+
 	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaDisableEarlyCull:vlv */
@@ -5143,6 +5363,9 @@
 		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
 				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:vlv */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* WaForceL3Serialization:vlv */
 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
@@ -5165,8 +5388,11 @@
 	I915_WRITE(GEN6_UCGCTL2,
 		   GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
-	/* WaDisableL3Bank2xClockGate:vlv */
-	I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
+	/* WaDisableL3Bank2xClockGate:vlv
+	 * Disabling L3 clock gating- MMIO 940c[25] = 1
+	 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
+	I915_WRITE(GEN7_UCGCTL4,
+		   I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
 
 	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
@@ -5191,6 +5417,59 @@
 	I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
+static void cherryview_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
+
+	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
+
+	/* WaDisablePartialInstShootdown:chv */
+	I915_WRITE(GEN8_ROW_CHICKEN,
+		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
+
+	/* WaDisableThreadStallDopClockGating:chv */
+	I915_WRITE(GEN8_ROW_CHICKEN,
+		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+
+	/* WaVSRefCountFullforceMissDisable:chv */
+	/* WaDSRefCountFullforceMissDisable:chv */
+	I915_WRITE(GEN7_FF_THREAD_MODE,
+		   I915_READ(GEN7_FF_THREAD_MODE) &
+		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+
+	/* WaDisableSemaphoreAndSyncFlipWait:chv */
+	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
+		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+
+	/* WaDisableCSUnitClockGating:chv */
+	I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableSDEUnitClockGating:chv */
+	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
+	I915_WRITE(HALF_SLICE_CHICKEN3,
+		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+
+	/* WaDisableGunitClockGating:chv (pre-production hw) */
+	I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) |
+		   GINT_DIS);
+
+	/* WaDisableFfDopClockGating:chv (pre-production hw) */
+	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
+		   _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE));
+
+	/* WaDisableDopClockGating:chv (pre-production hw) */
+	I915_WRITE(GEN7_ROW_CHICKEN2,
+		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+	I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+		   GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
+}
+
 static void g4x_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5212,6 +5491,9 @@
 	I915_WRITE(CACHE_MODE_0,
 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:g4x */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	g4x_disable_trickle_feed(dev);
 }
 
@@ -5226,6 +5508,9 @@
 	I915_WRITE16(DEUC, 0);
 	I915_WRITE(MI_ARB_STATE,
 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+
+	/* WaDisable_RenderCache_OperationalFlush:gen4 */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 }
 
 static void broadwater_init_clock_gating(struct drm_device *dev)
@@ -5240,6 +5525,9 @@
 	I915_WRITE(RENCLK_GATE_D2, 0);
 	I915_WRITE(MI_ARB_STATE,
 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+
+	/* WaDisable_RenderCache_OperationalFlush:gen4 */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 }
 
 static void gen3_init_clock_gating(struct drm_device *dev)
@@ -5256,6 +5544,12 @@
 
 	/* IIR "flip pending" means done if this bit is set */
 	I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
+
+	/* interrupts should cause a wake up from C3 */
+	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
+
+	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
+	I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
 }
 
 static void i85x_init_clock_gating(struct drm_device *dev)
@@ -5263,6 +5557,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
+
+	/* interrupts should cause a wake up from C3 */
+	I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
+		   _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
 }
 
 static void i830_init_clock_gating(struct drm_device *dev)
@@ -5314,10 +5612,25 @@
 				    enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains;
+	struct i915_power_well *power_well;
+	bool is_enabled;
+	int i;
+
+	if (dev_priv->pm.suspended)
+		return false;
 
 	power_domains = &dev_priv->power_domains;
+	is_enabled = true;
+	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
+		if (power_well->always_on)
+			continue;
 
-	return power_domains->domain_use_count[domain];
+		if (!power_well->count) {
+			is_enabled = false;
+			break;
+		}
+	}
+	return is_enabled;
 }
 
 bool intel_display_power_enabled(struct drm_i915_private *dev_priv,
@@ -5392,33 +5705,6 @@
 	}
 }
 
-static void reset_vblank_counter(struct drm_device *dev, enum pipe pipe)
-{
-	assert_spin_locked(&dev->vbl_lock);
-
-	dev->vblank[pipe].last = 0;
-}
-
-static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	enum pipe pipe;
-	unsigned long irqflags;
-
-	/*
-	 * After this, the registers on the pipes that are part of the power
-	 * well will become zero, so we have to adjust our counters according to
-	 * that.
-	 *
-	 * FIXME: Should we do this in general in drm_vblank_post_modeset?
-	 */
-	spin_lock_irqsave(&dev->vbl_lock, irqflags);
-	for_each_pipe(pipe)
-		if (pipe != PIPE_A)
-			reset_vblank_counter(dev, pipe);
-	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
-}
-
 static void hsw_set_power_well(struct drm_i915_private *dev_priv,
 			       struct i915_power_well *power_well, bool enable)
 {
@@ -5447,8 +5733,6 @@
 			I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
 			POSTING_READ(HSW_PWR_WELL_DRIVER);
 			DRM_DEBUG_KMS("Requesting to disable the power well\n");
-
-			hsw_power_well_post_disable(dev_priv);
 		}
 	}
 }
@@ -5489,13 +5773,34 @@
 	return true;
 }
 
-static void vlv_set_power_well(struct drm_i915_private *dev_priv,
-			       struct i915_power_well *power_well, bool enable)
+void __vlv_set_power_well(struct drm_i915_private *dev_priv,
+			  enum punit_power_well power_well_id, bool enable)
 {
-	enum punit_power_well power_well_id = power_well->data;
+	struct drm_device *dev = dev_priv->dev;
 	u32 mask;
 	u32 state;
 	u32 ctrl;
+	enum pipe pipe;
+
+	if (power_well_id == PUNIT_POWER_WELL_DPIO_CMN_BC) {
+		if (enable) {
+			/*
+			 * Enable the CRI clock source so we can get at the
+			 * display and the reference clock for VGA
+			 * hotplug / manual detection.
+			 */
+			I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
+				   DPLL_REFA_CLK_ENABLE_VLV |
+				   DPLL_INTEGRATED_CRI_CLK_VLV);
+			udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
+		} else {
+			for_each_pipe(pipe)
+				assert_pll_disabled(dev_priv, pipe);
+			/* Assert common reset */
+			I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) &
+				   ~DPIO_CMNRST);
+		}
+	}
 
 	mask = PUNIT_PWRGT_MASK(power_well_id);
 	state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
@@ -5523,6 +5828,28 @@
 
 out:
 	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	/*
+	 * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
+	 *  6.	De-assert cmn_reset/side_reset. Same as VLV X0.
+	 *   a.	GUnit 0x2110 bit[0] set to 1 (def 0)
+	 *   b.	The other bits such as sfr settings / modesel may all
+	 *	be set to 0.
+	 *
+	 * This should only be done on init and resume from S3 with
+	 * both PLLs disabled, or we risk losing DPIO and PLL
+	 * synchronization.
+	 */
+	if (power_well_id == PUNIT_POWER_WELL_DPIO_CMN_BC && enable)
+		I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
+}
+
+static void vlv_set_power_well(struct drm_i915_private *dev_priv,
+			       struct i915_power_well *power_well, bool enable)
+{
+	enum punit_power_well power_well_id = power_well->data;
+
+	__vlv_set_power_well(dev_priv, power_well_id, enable);
 }
 
 static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv,
@@ -5591,11 +5918,13 @@
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/*
-	 * During driver initialization we need to defer enabling hotplug
-	 * processing until fbdev is set up.
+	 * During driver initialization/resume we can avoid restoring the
+	 * part of the HW/SW state that will be inited anyway explicitly.
 	 */
-	if (dev_priv->enable_hotplug_processing)
-		intel_hpd_init(dev_priv->dev);
+	if (dev_priv->power_domains.initializing)
+		return;
+
+	intel_hpd_init(dev_priv->dev);
 
 	i915_redisable_vga_power_on(dev_priv->dev);
 }
@@ -5603,23 +5932,12 @@
 static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
-	struct drm_device *dev = dev_priv->dev;
-	enum pipe pipe;
-
 	WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D);
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	for_each_pipe(pipe)
-		__intel_set_cpu_fifo_underrun_reporting(dev, pipe, false);
-
 	valleyview_disable_display_irqs(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	spin_lock_irq(&dev->vbl_lock);
-	for_each_pipe(pipe)
-		reset_vblank_counter(dev, pipe);
-	spin_unlock_irq(&dev->vbl_lock);
-
 	vlv_set_power_well(dev_priv, power_well, false);
 }
 
@@ -5867,12 +6185,6 @@
 		.ops = &vlv_display_power_well_ops,
 	},
 	{
-		.name = "dpio-common",
-		.domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
-		.data = PUNIT_POWER_WELL_DPIO_CMN_BC,
-		.ops = &vlv_dpio_power_well_ops,
-	},
-	{
 		.name = "dpio-tx-b-01",
 		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
 			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
@@ -5908,6 +6220,12 @@
 		.ops = &vlv_dpio_power_well_ops,
 		.data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
 	},
+	{
+		.name = "dpio-common",
+		.domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
+		.data = PUNIT_POWER_WELL_DPIO_CMN_BC,
+		.ops = &vlv_dpio_power_well_ops,
+	},
 };
 
 #define set_power_wells(power_domains, __power_wells) ({		\
@@ -5959,9 +6277,13 @@
 
 void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
 {
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+	power_domains->initializing = true;
 	/* For now, we need the power well to be always enabled. */
 	intel_display_set_init_power(dev_priv, true);
 	intel_power_domains_resume(dev_priv);
+	power_domains->initializing = false;
 }
 
 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
@@ -5986,6 +6308,18 @@
 	WARN(dev_priv->pm.suspended, "Device still suspended.\n");
 }
 
+void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct device *device = &dev->pdev->dev;
+
+	if (!HAS_RUNTIME_PM(dev))
+		return;
+
+	WARN(dev_priv->pm.suspended, "Getting nosync-ref while suspended.\n");
+	pm_runtime_get_noresume(device);
+}
+
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -6008,6 +6342,15 @@
 
 	pm_runtime_set_active(device);
 
+	/*
+	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
+	 * requirement.
+	 */
+	if (!intel_enable_rc6(dev)) {
+		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
+		return;
+	}
+
 	pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */
 	pm_runtime_mark_last_busy(device);
 	pm_runtime_use_autosuspend(device);
@@ -6023,6 +6366,9 @@
 	if (!HAS_RUNTIME_PM(dev))
 		return;
 
+	if (!intel_enable_rc6(dev))
+		return;
+
 	/* Make sure we're not suspended first. */
 	pm_runtime_get_sync(device);
 	pm_runtime_disable(device);
@@ -6087,6 +6433,10 @@
 			dev_priv->display.init_clock_gating = haswell_init_clock_gating;
 		else if (INTEL_INFO(dev)->gen == 8)
 			dev_priv->display.init_clock_gating = gen8_init_clock_gating;
+	} else if (IS_CHERRYVIEW(dev)) {
+		dev_priv->display.update_wm = valleyview_update_wm;
+		dev_priv->display.init_clock_gating =
+			cherryview_init_clock_gating;
 	} else if (IS_VALLEYVIEW(dev)) {
 		dev_priv->display.update_wm = valleyview_update_wm;
 		dev_priv->display.init_clock_gating =

diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/intel_renderstate.h
new file mode 100644
index 0000000..a5e783a
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_RENDERSTATE_H
+#define _INTEL_RENDERSTATE_H
+
+#include <linux/types.h>
+
+struct intel_renderstate_rodata {
+	const u32 *reloc;
+	const u32 reloc_items;
+	const u32 *batch;
+	const u32 batch_items;
+};
+
+extern const struct intel_renderstate_rodata gen6_null_state;
+extern const struct intel_renderstate_rodata gen7_null_state;
+extern const struct intel_renderstate_rodata gen8_null_state;
+
+#define RO_RENDERSTATE(_g)						\
+	const struct intel_renderstate_rodata gen ## _g ## _null_state = { \
+		.reloc = gen ## _g ## _null_state_relocs,		\
+		.reloc_items = sizeof(gen ## _g ## _null_state_relocs)/4, \
+		.batch = gen ## _g ## _null_state_batch,		\
+		.batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \
+	}
+
+#endif /* INTEL_RENDERSTATE_H */

diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.c b/drivers/gpu/drm/i915/intel_renderstate_gen6.c
new file mode 100644
index 0000000..740538a
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.c

@@ -0,0 +1,289 @@
+#include "intel_renderstate.h"
+
+static const u32 gen6_null_state_relocs[] = {
+	0x00000020,
+	0x00000024,
+	0x0000002c,
+	0x000001e0,
+	0x000001e4,
+};
+
+static const u32 gen6_null_state_batch[] = {
+	0x69040000,
+	0x790d0001,
+	0x00000000,
+	0x00000000,
+	0x78180000,
+	0x00000001,
+	0x61010008,
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000001,
+	0x00000000,
+	0x00000001,
+	0x61020000,
+	0x00000000,
+	0x78050001,
+	0x00000018,
+	0x00000000,
+	0x780d1002,
+	0x00000000,
+	0x00000000,
+	0x00000420,
+	0x78150003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78100004,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78160003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78110005,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78120002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78170003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x79050005,
+	0xe0040000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x79100000,
+	0x00000000,
+	0x79000002,
+	0xffffffff,
+	0x00000000,
+	0x00000000,
+	0x780e0002,
+	0x00000441,
+	0x00000401,
+	0x00000401,
+	0x78021002,
+	0x00000000,
+	0x00000000,
+	0x00000400,
+	0x78130012,
+	0x00400810,
+	0x00000000,
+	0x20000000,
+	0x04000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78140007,
+	0x00000280,
+	0x08080000,
+	0x00000000,
+	0x00060000,
+	0x4e080002,
+	0x00100400,
+	0x00000000,
+	0x00000000,
+	0x78090005,
+	0x02000000,
+	0x22220000,
+	0x02f60000,
+	0x11330000,
+	0x02850004,
+	0x11220000,
+	0x78011002,
+	0x00000000,
+	0x00000000,
+	0x00000200,
+	0x78080003,
+	0x00002000,
+	0x00000448,	 /* reloc */
+	0x00000448,	 /* reloc */
+	0x00000000,
+	0x05000000,	 /* cmds end */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000220,	 /* state start */
+	0x00000240,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0060005a,
+	0x204077be,
+	0x000000c0,
+	0x008d0040,
+	0x0060005a,
+	0x206077be,
+	0x000000c0,
+	0x008d0080,
+	0x0060005a,
+	0x208077be,
+	0x000000d0,
+	0x008d0040,
+	0x0060005a,
+	0x20a077be,
+	0x000000d0,
+	0x008d0080,
+	0x00000201,
+	0x20080061,
+	0x00000000,
+	0x00000000,
+	0x00600001,
+	0x20200022,
+	0x008d0000,
+	0x00000000,
+	0x02800031,
+	0x21c01cc9,
+	0x00000020,
+	0x0a8a0001,
+	0x00600001,
+	0x204003be,
+	0x008d01c0,
+	0x00000000,
+	0x00600001,
+	0x206003be,
+	0x008d01e0,
+	0x00000000,
+	0x00600001,
+	0x208003be,
+	0x008d0200,
+	0x00000000,
+	0x00600001,
+	0x20a003be,
+	0x008d0220,
+	0x00000000,
+	0x00600001,
+	0x20c003be,
+	0x008d0240,
+	0x00000000,
+	0x00600001,
+	0x20e003be,
+	0x008d0260,
+	0x00000000,
+	0x00600001,
+	0x210003be,
+	0x008d0280,
+	0x00000000,
+	0x00600001,
+	0x212003be,
+	0x008d02a0,
+	0x00000000,
+	0x05800031,
+	0x24001cc8,
+	0x00000040,
+	0x90019000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0000007e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x30000000,
+	0x00000124,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xf99a130c,
+	0x799a130c,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x80000031,
+	0x00000003,
+	0x00000000,	 /* state end */
+};
+
+RO_RENDERSTATE(6);

diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.c b/drivers/gpu/drm/i915/intel_renderstate_gen7.c
new file mode 100644
index 0000000..6fa7ff2
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.c

@@ -0,0 +1,253 @@
+#include "intel_renderstate.h"
+
+static const u32 gen7_null_state_relocs[] = {
+	0x0000000c,
+	0x00000010,
+	0x00000018,
+	0x000001ec,
+};
+
+static const u32 gen7_null_state_batch[] = {
+	0x69040000,
+	0x61010008,
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000001,
+	0x00000000,
+	0x00000001,
+	0x790d0002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78180000,
+	0x00000001,
+	0x79160000,
+	0x00000008,
+	0x78300000,
+	0x02010040,
+	0x78310000,
+	0x04000000,
+	0x78320000,
+	0x04000000,
+	0x78330000,
+	0x02000000,
+	0x78100004,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781b0005,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781c0002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781d0004,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78110005,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78120002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78210000,
+	0x00000000,
+	0x78130005,
+	0x00000000,
+	0x20000000,
+	0x04000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78140001,
+	0x20000800,
+	0x00000000,
+	0x781e0001,
+	0x00000000,
+	0x00000000,
+	0x78050005,
+	0xe0040000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78040001,
+	0x00000000,
+	0x00000000,
+	0x78240000,
+	0x00000240,
+	0x78230000,
+	0x00000260,
+	0x782f0000,
+	0x00000280,
+	0x781f000c,
+	0x00400810,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78200006,
+	0x000002c0,
+	0x08080000,
+	0x00000000,
+	0x28000402,
+	0x00060000,
+	0x00000000,
+	0x00000000,
+	0x78090005,
+	0x02000000,
+	0x22220000,
+	0x02f60000,
+	0x11230000,
+	0x02f60004,
+	0x11230000,
+	0x78080003,
+	0x00006008,
+	0x00000340,	 /* reloc */
+	0xffffffff,
+	0x00000000,
+	0x782a0000,
+	0x00000360,
+	0x79000002,
+	0xffffffff,
+	0x00000000,
+	0x00000000,
+	0x7b000005,
+	0x0000000f,
+	0x00000003,
+	0x00000000,
+	0x00000001,
+	0x00000000,
+	0x00000000,
+	0x05000000,	 /* cmds end */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000031,	 /* state start */
+	0x00000003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xf99a130c,
+	0x799a130c,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000492,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0080005a,
+	0x2e2077bd,
+	0x000000c0,
+	0x008d0040,
+	0x0080005a,
+	0x2e6077bd,
+	0x000000d0,
+	0x008d0040,
+	0x02800031,
+	0x21801fa9,
+	0x008d0e20,
+	0x08840001,
+	0x00800001,
+	0x2e2003bd,
+	0x008d0180,
+	0x00000000,
+	0x00800001,
+	0x2e6003bd,
+	0x008d01c0,
+	0x00000000,
+	0x00800001,
+	0x2ea003bd,
+	0x008d0200,
+	0x00000000,
+	0x00800001,
+	0x2ee003bd,
+	0x008d0240,
+	0x00000000,
+	0x05800031,
+	0x20001fa8,
+	0x008d0e20,
+	0x90031000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000380,
+	0x000003a0,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,	 /* state end */
+};
+
+RO_RENDERSTATE(7);

diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
new file mode 100644
index 0000000..5c87561
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c

@@ -0,0 +1,479 @@
+#include "intel_renderstate.h"
+
+static const u32 gen8_null_state_relocs[] = {
+	0x00000048,
+	0x00000050,
+	0x00000060,
+	0x000003ec,
+};
+
+static const u32 gen8_null_state_batch[] = {
+	0x69040000,
+	0x61020001,
+	0x00000000,
+	0x00000000,
+	0x79120000,
+	0x00000000,
+	0x79130000,
+	0x00000000,
+	0x79140000,
+	0x00000000,
+	0x79150000,
+	0x00000000,
+	0x79160000,
+	0x00000000,
+	0x6101000e,
+	0x00000001,
+	0x00000000,
+	0x00000001,
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000001,	 /* reloc */
+	0x00000000,
+	0xfffff001,
+	0x00001001,
+	0xfffff001,
+	0x00001001,
+	0x78230000,
+	0x000006e0,
+	0x78210000,
+	0x00000700,
+	0x78300000,
+	0x08010040,
+	0x78330000,
+	0x08000000,
+	0x78310000,
+	0x08000000,
+	0x78320000,
+	0x08000000,
+	0x78240000,
+	0x00000641,
+	0x780e0000,
+	0x00000601,
+	0x780d0000,
+	0x00000000,
+	0x78180000,
+	0x00000001,
+	0x78520003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78190009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781b0007,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78270000,
+	0x00000000,
+	0x782c0000,
+	0x00000000,
+	0x781c0002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78160009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78110008,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78290000,
+	0x00000000,
+	0x782e0000,
+	0x00000000,
+	0x781a0009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781d0007,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78280000,
+	0x00000000,
+	0x782d0000,
+	0x00000000,
+	0x78260000,
+	0x00000000,
+	0x782b0000,
+	0x00000000,
+	0x78150009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78100007,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781e0003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78120002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x781f0002,
+	0x30400820,
+	0x00000000,
+	0x00000000,
+	0x78510009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78500003,
+	0x00210000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78130002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x782a0000,
+	0x00000480,
+	0x782f0000,
+	0x00000540,
+	0x78140000,
+	0x00000800,
+	0x78170009,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x7820000a,
+	0x00000580,
+	0x00000000,
+	0x08080000,
+	0x00000000,
+	0x00000000,
+	0x1f000002,
+	0x00060000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x784d0000,
+	0x40000000,
+	0x784f0000,
+	0x80000100,
+	0x780f0000,
+	0x00000740,
+	0x78050006,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78070003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78060003,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x78040001,
+	0x00000000,
+	0x00000001,
+	0x79000002,
+	0xffffffff,
+	0x00000000,
+	0x00000000,
+	0x78080003,
+	0x00006000,
+	0x000005e0,	 /* reloc */
+	0x00000000,
+	0x00000000,
+	0x78090005,
+	0x02000000,
+	0x22220000,
+	0x02f60000,
+	0x11230000,
+	0x02850004,
+	0x11230000,
+	0x784b0000,
+	0x0000000f,
+	0x78490001,
+	0x00000000,
+	0x00000000,
+	0x7b000005,
+	0x00000000,
+	0x00000003,
+	0x00000000,
+	0x00000001,
+	0x00000000,
+	0x00000000,
+	0x05000000,	 /* cmds end */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x000004c0,	 /* state start */
+	0x00000500,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000092,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x0060005a,
+	0x21403ae8,
+	0x3a0000c0,
+	0x008d0040,
+	0x0060005a,
+	0x21603ae8,
+	0x3a0000c0,
+	0x008d0080,
+	0x0060005a,
+	0x21803ae8,
+	0x3a0000d0,
+	0x008d0040,
+	0x0060005a,
+	0x21a03ae8,
+	0x3a0000d0,
+	0x008d0080,
+	0x02800031,
+	0x2e0022e8,
+	0x0e000140,
+	0x08840001,
+	0x05800031,
+	0x200022e0,
+	0x0e000e00,
+	0x90031000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x06200000,
+	0x00000002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xf99a130c,
+	0x799a130c,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x3f800000,
+	0x00000000,
+	0x3f800000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,	 /* state end */
+};
+
+RO_RENDERSTATE(8);

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 79fb4cc..279488a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -33,26 +33,44 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-static inline int ring_space(struct intel_ring_buffer *ring)
+/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+
+static inline int __ring_space(int head, int tail, int size)
 {
-	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
+	int space = head - (tail + I915_RING_FREE_SPACE);
 	if (space < 0)
-		space += ring->size;
+		space += size;
 	return space;
 }
 
-void __intel_ring_advance(struct intel_ring_buffer *ring)
+static inline int ring_space(struct intel_engine_cs *ring)
+{
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
+}
+
+static bool intel_ring_stopped(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
+}
 
-	ring->tail &= ring->size - 1;
-	if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
+void __intel_ring_advance(struct intel_engine_cs *ring)
+{
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	ringbuf->tail &= ringbuf->size - 1;
+	if (intel_ring_stopped(ring))
 		return;
-	ring->write_tail(ring, ring->tail);
+	ring->write_tail(ring, ringbuf->tail);
 }
 
 static int
-gen2_render_ring_flush(struct intel_ring_buffer *ring,
+gen2_render_ring_flush(struct intel_engine_cs *ring,
 		       u32	invalidate_domains,
 		       u32	flush_domains)
 {
@@ -78,7 +96,7 @@
 }
 
 static int
-gen4_render_ring_flush(struct intel_ring_buffer *ring,
+gen4_render_ring_flush(struct intel_engine_cs *ring,
 		       u32	invalidate_domains,
 		       u32	flush_domains)
 {
@@ -173,9 +191,9 @@
  * really our business.  That leaves only stall at scoreboard.
  */
 static int
-intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
 {
-	u32 scratch_addr = ring->scratch.gtt_offset + 128;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 
@@ -208,11 +226,11 @@
 }
 
 static int
-gen6_render_ring_flush(struct intel_ring_buffer *ring,
+gen6_render_ring_flush(struct intel_engine_cs *ring,
                          u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 128;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	/* Force SNB workarounds for PIPE_CONTROL flushes */
@@ -260,7 +278,7 @@
 }
 
 static int
-gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
+gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
 {
 	int ret;
 
@@ -278,7 +296,7 @@
 	return 0;
 }
 
-static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
+static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
 {
 	int ret;
 
@@ -302,11 +320,11 @@
 }
 
 static int
-gen7_render_ring_flush(struct intel_ring_buffer *ring,
+gen7_render_ring_flush(struct intel_engine_cs *ring,
 		       u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 128;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	/*
@@ -363,11 +381,11 @@
 }
 
 static int
-gen8_render_ring_flush(struct intel_ring_buffer *ring,
+gen8_render_ring_flush(struct intel_engine_cs *ring,
 		       u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 128;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	flags |= PIPE_CONTROL_CS_STALL;
@@ -403,14 +421,14 @@
 
 }
 
-static void ring_write_tail(struct intel_ring_buffer *ring,
+static void ring_write_tail(struct intel_engine_cs *ring,
 			    u32 value)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	I915_WRITE_TAIL(ring, value);
 }
 
-u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
+u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	u64 acthd;
@@ -426,7 +444,7 @@
 	return acthd;
 }
 
-static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
+static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	u32 addr;
@@ -437,7 +455,7 @@
 	I915_WRITE(HWS_PGA, addr);
 }
 
-static bool stop_ring(struct intel_ring_buffer *ring)
+static bool stop_ring(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
 
@@ -461,11 +479,12 @@
 	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
 }
 
-static int init_ring_common(struct intel_ring_buffer *ring)
+static int init_ring_common(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj = ring->obj;
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	struct drm_i915_gem_object *obj = ringbuf->obj;
 	int ret = 0;
 
 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
@@ -504,7 +523,7 @@
 	 * register values. */
 	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
 	I915_WRITE_CTL(ring,
-			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
+			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
@@ -512,12 +531,11 @@
 		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
 		DRM_ERROR("%s initialization failed "
-				"ctl %08x head %08x tail %08x start %08x\n",
-				ring->name,
-				I915_READ_CTL(ring),
-				I915_READ_HEAD(ring),
-				I915_READ_TAIL(ring),
-				I915_READ_START(ring));
+			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
+			  ring->name,
+			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
+			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
+			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
 		ret = -EIO;
 		goto out;
 	}
@@ -525,10 +543,10 @@
 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
 		i915_kernel_lost_context(ring->dev);
 	else {
-		ring->head = I915_READ_HEAD(ring);
-		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-		ring->space = ring_space(ring);
-		ring->last_retired_head = -1;
+		ringbuf->head = I915_READ_HEAD(ring);
+		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
+		ringbuf->space = ring_space(ring);
+		ringbuf->last_retired_head = -1;
 	}
 
 	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
@@ -540,7 +558,7 @@
 }
 
 static int
-init_pipe_control(struct intel_ring_buffer *ring)
+init_pipe_control(struct intel_engine_cs *ring)
 {
 	int ret;
 
@@ -581,7 +599,7 @@
 	return ret;
 }
 
-static int init_render_ring(struct intel_ring_buffer *ring)
+static int init_render_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -595,19 +613,21 @@
 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
 	 * programmed to '1' on all products.
 	 *
-	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw
+	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
 	 */
 	if (INTEL_INFO(dev)->gen >= 6)
 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
 
 	/* Required for the hardware to program scanline values for waiting */
+	/* WaEnableFlushTlbInvalidationMode:snb */
 	if (INTEL_INFO(dev)->gen == 6)
 		I915_WRITE(GFX_MODE,
-			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
+			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
 
+	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
 	if (IS_GEN7(dev))
 		I915_WRITE(GFX_MODE_GEN7,
-			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
+			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 
 	if (INTEL_INFO(dev)->gen >= 5) {
@@ -624,13 +644,6 @@
 		 */
 		I915_WRITE(CACHE_MODE_0,
 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-
-		/* This is not explicitly set for GEN6, so read the register.
-		 * see intel_ring_mi_set_context() for why we care.
-		 * TODO: consider explicitly setting the bit for GEN5
-		 */
-		ring->itlb_before_ctx_switch =
-			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
 	}
 
 	if (INTEL_INFO(dev)->gen >= 6)
@@ -642,7 +655,7 @@
 	return ret;
 }
 
-static void render_ring_cleanup(struct intel_ring_buffer *ring)
+static void render_ring_cleanup(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 
@@ -658,20 +671,46 @@
 	ring->scratch.obj = NULL;
 }
 
-static void
-update_mboxes(struct intel_ring_buffer *ring,
-	      u32 mmio_offset)
+static int gen6_signal(struct intel_engine_cs *signaller,
+		       unsigned int num_dwords)
 {
-/* NB: In order to be able to do semaphore MBOX updates for varying number
- * of rings, it's easiest if we round up each individual update to a
- * multiple of 2 (since ring updates must always be a multiple of 2)
- * even though the actual update only requires 3 dwords.
- */
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *useless;
+	int i, ret;
+
+	/* NB: In order to be able to do semaphore MBOX updates for varying
+	 * number of rings, it's easiest if we round up each individual update
+	 * to a multiple of 2 (since ring updates must always be a multiple of
+	 * 2) even though the actual update only requires 3 dwords.
+	 */
 #define MBOX_UPDATE_DWORDS 4
-	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, mmio_offset);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
-	intel_ring_emit(ring, MI_NOOP);
+	if (i915_semaphore_is_enabled(dev))
+		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
+	else
+		return intel_ring_begin(signaller, num_dwords);
+
+	ret = intel_ring_begin(signaller, num_dwords);
+	if (ret)
+		return ret;
+#undef MBOX_UPDATE_DWORDS
+
+	for_each_ring(useless, dev_priv, i) {
+		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
+		if (mbox_reg != GEN6_NOSYNC) {
+			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(signaller, mbox_reg);
+			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, MI_NOOP);
+		} else {
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+		}
+	}
+
+	return 0;
 }
 
 /**
@@ -684,29 +723,14 @@
  * This acts like a signal in the canonical semaphore.
  */
 static int
-gen6_add_request(struct intel_ring_buffer *ring)
+gen6_add_request(struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *useless;
-	int i, ret, num_dwords = 4;
+	int ret;
 
-	if (i915_semaphore_is_enabled(dev))
-		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
-#undef MBOX_UPDATE_DWORDS
-
-	ret = intel_ring_begin(ring, num_dwords);
+	ret = ring->semaphore.signal(ring, 4);
 	if (ret)
 		return ret;
 
-	if (i915_semaphore_is_enabled(dev)) {
-		for_each_ring(useless, dev_priv, i) {
-			u32 mbox_reg = ring->signal_mbox[i];
-			if (mbox_reg != GEN6_NOSYNC)
-				update_mboxes(ring, mbox_reg);
-		}
-	}
-
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
@@ -731,14 +755,15 @@
  * @seqno - seqno which the waiter will block on
  */
 static int
-gen6_ring_sync(struct intel_ring_buffer *waiter,
-	       struct intel_ring_buffer *signaller,
+gen6_ring_sync(struct intel_engine_cs *waiter,
+	       struct intel_engine_cs *signaller,
 	       u32 seqno)
 {
-	int ret;
 	u32 dw1 = MI_SEMAPHORE_MBOX |
 		  MI_SEMAPHORE_COMPARE |
 		  MI_SEMAPHORE_REGISTER;
+	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
+	int ret;
 
 	/* Throughout all of the GEM code, seqno passed implies our current
 	 * seqno is >= the last seqno executed. However for hardware the
@@ -746,8 +771,7 @@
 	 */
 	seqno -= 1;
 
-	WARN_ON(signaller->semaphore_register[waiter->id] ==
-		MI_SEMAPHORE_SYNC_INVALID);
+	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
 
 	ret = intel_ring_begin(waiter, 4);
 	if (ret)
@@ -755,9 +779,7 @@
 
 	/* If seqno wrap happened, omit the wait with no-ops */
 	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
-		intel_ring_emit(waiter,
-				dw1 |
-				signaller->semaphore_register[waiter->id]);
+		intel_ring_emit(waiter, dw1 | wait_mbox);
 		intel_ring_emit(waiter, seqno);
 		intel_ring_emit(waiter, 0);
 		intel_ring_emit(waiter, MI_NOOP);
@@ -782,9 +804,9 @@
 } while (0)
 
 static int
-pc_render_add_request(struct intel_ring_buffer *ring)
+pc_render_add_request(struct intel_engine_cs *ring)
 {
-	u32 scratch_addr = ring->scratch.gtt_offset + 128;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
@@ -806,15 +828,15 @@
 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	scratch_addr += 128; /* write to separate cachelines */
+	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	scratch_addr += 128;
+	scratch_addr += 2 * CACHELINE_BYTES;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	scratch_addr += 128;
+	scratch_addr += 2 * CACHELINE_BYTES;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	scratch_addr += 128;
+	scratch_addr += 2 * CACHELINE_BYTES;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	scratch_addr += 128;
+	scratch_addr += 2 * CACHELINE_BYTES;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
@@ -830,7 +852,7 @@
 }
 
 static u32
-gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
+gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
@@ -844,31 +866,31 @@
 }
 
 static u32
-ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
+ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
 static void
-ring_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
+ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
 static u32
-pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
+pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
 	return ring->scratch.cpu_page[0];
 }
 
 static void
-pc_render_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
+pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	ring->scratch.cpu_page[0] = seqno;
 }
 
 static bool
-gen5_ring_get_irq(struct intel_ring_buffer *ring)
+gen5_ring_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -886,7 +908,7 @@
 }
 
 static void
-gen5_ring_put_irq(struct intel_ring_buffer *ring)
+gen5_ring_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -899,7 +921,7 @@
 }
 
 static bool
-i9xx_ring_get_irq(struct intel_ring_buffer *ring)
+i9xx_ring_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -920,7 +942,7 @@
 }
 
 static void
-i9xx_ring_put_irq(struct intel_ring_buffer *ring)
+i9xx_ring_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -936,7 +958,7 @@
 }
 
 static bool
-i8xx_ring_get_irq(struct intel_ring_buffer *ring)
+i8xx_ring_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -957,7 +979,7 @@
 }
 
 static void
-i8xx_ring_put_irq(struct intel_ring_buffer *ring)
+i8xx_ring_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -972,7 +994,7 @@
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
-void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
+void intel_ring_setup_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -989,6 +1011,11 @@
 		case BCS:
 			mmio = BLT_HWS_PGA_GEN7;
 			break;
+		/*
+		 * VCS2 actually doesn't exist on Gen7. Only shut up
+		 * gcc switch check warning
+		 */
+		case VCS2:
 		case VCS:
 			mmio = BSD_HWS_PGA_GEN7;
 			break;
@@ -1030,7 +1057,7 @@
 }
 
 static int
-bsd_ring_flush(struct intel_ring_buffer *ring,
+bsd_ring_flush(struct intel_engine_cs *ring,
 	       u32     invalidate_domains,
 	       u32     flush_domains)
 {
@@ -1047,7 +1074,7 @@
 }
 
 static int
-i9xx_add_request(struct intel_ring_buffer *ring)
+i9xx_add_request(struct intel_engine_cs *ring)
 {
 	int ret;
 
@@ -1065,7 +1092,7 @@
 }
 
 static bool
-gen6_ring_get_irq(struct intel_ring_buffer *ring)
+gen6_ring_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1090,7 +1117,7 @@
 }
 
 static void
-gen6_ring_put_irq(struct intel_ring_buffer *ring)
+gen6_ring_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1108,7 +1135,7 @@
 }
 
 static bool
-hsw_vebox_get_irq(struct intel_ring_buffer *ring)
+hsw_vebox_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1128,7 +1155,7 @@
 }
 
 static void
-hsw_vebox_put_irq(struct intel_ring_buffer *ring)
+hsw_vebox_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1146,7 +1173,7 @@
 }
 
 static bool
-gen8_ring_get_irq(struct intel_ring_buffer *ring)
+gen8_ring_get_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1172,7 +1199,7 @@
 }
 
 static void
-gen8_ring_put_irq(struct intel_ring_buffer *ring)
+gen8_ring_put_irq(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1192,8 +1219,8 @@
 }
 
 static int
-i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			 u32 offset, u32 length,
+i965_dispatch_execbuffer(struct intel_engine_cs *ring,
+			 u64 offset, u32 length,
 			 unsigned flags)
 {
 	int ret;
@@ -1215,8 +1242,8 @@
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
 #define I830_BATCH_LIMIT (256*1024)
 static int
-i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
-				u32 offset, u32 len,
+i830_dispatch_execbuffer(struct intel_engine_cs *ring,
+				u64 offset, u32 len,
 				unsigned flags)
 {
 	int ret;
@@ -1266,8 +1293,8 @@
 }
 
 static int
-i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			 u32 offset, u32 len,
+i915_dispatch_execbuffer(struct intel_engine_cs *ring,
+			 u64 offset, u32 len,
 			 unsigned flags)
 {
 	int ret;
@@ -1283,7 +1310,7 @@
 	return 0;
 }
 
-static void cleanup_status_page(struct intel_ring_buffer *ring)
+static void cleanup_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_object *obj;
 
@@ -1297,50 +1324,44 @@
 	ring->status_page.obj = NULL;
 }
 
-static int init_status_page(struct intel_ring_buffer *ring)
+static int init_status_page(struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *obj;
-	int ret;
 
-	obj = i915_gem_alloc_object(dev, 4096);
-	if (obj == NULL) {
-		DRM_ERROR("Failed to allocate status page\n");
-		ret = -ENOMEM;
-		goto err;
+	if ((obj = ring->status_page.obj) == NULL) {
+		int ret;
+
+		obj = i915_gem_alloc_object(ring->dev, 4096);
+		if (obj == NULL) {
+			DRM_ERROR("Failed to allocate status page\n");
+			return -ENOMEM;
+		}
+
+		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+		if (ret)
+			goto err_unref;
+
+		ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
+		if (ret) {
+err_unref:
+			drm_gem_object_unreference(&obj->base);
+			return ret;
+		}
+
+		ring->status_page.obj = obj;
 	}
 
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-	if (ret)
-		goto err_unref;
-
-	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
-	if (ret)
-		goto err_unref;
-
 	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
 	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
-	if (ring->status_page.page_addr == NULL) {
-		ret = -ENOMEM;
-		goto err_unpin;
-	}
-	ring->status_page.obj = obj;
 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
 
 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
 			ring->name, ring->status_page.gfx_addr);
 
 	return 0;
-
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-err:
-	return ret;
 }
 
-static int init_phys_status_page(struct intel_ring_buffer *ring)
+static int init_phys_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
@@ -1357,44 +1378,24 @@
 	return 0;
 }
 
-static int intel_init_ring_buffer(struct drm_device *dev,
-				  struct intel_ring_buffer *ring)
+static int allocate_ring_buffer(struct intel_engine_cs *ring)
 {
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 	struct drm_i915_gem_object *obj;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
-	INIT_LIST_HEAD(&ring->request_list);
-	ring->size = 32 * PAGE_SIZE;
-	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
-
-	init_waitqueue_head(&ring->irq_queue);
-
-	if (I915_NEED_GFX_HWS(dev)) {
-		ret = init_status_page(ring);
-		if (ret)
-			return ret;
-	} else {
-		BUG_ON(ring->id != RCS);
-		ret = init_phys_status_page(ring);
-		if (ret)
-			return ret;
-	}
+	if (intel_ring_initialized(ring))
+		return 0;
 
 	obj = NULL;
 	if (!HAS_LLC(dev))
-		obj = i915_gem_object_create_stolen(dev, ring->size);
+		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
 	if (obj == NULL)
-		obj = i915_gem_alloc_object(dev, ring->size);
-	if (obj == NULL) {
-		DRM_ERROR("Failed to allocate ringbuffer\n");
-		ret = -ENOMEM;
-		goto err_hws;
-	}
-
-	ring->obj = obj;
+		obj = i915_gem_alloc_object(dev, ringbuf->size);
+	if (obj == NULL)
+		return -ENOMEM;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
 	if (ret)
@@ -1404,65 +1405,102 @@
 	if (ret)
 		goto err_unpin;
 
-	ring->virtual_start =
+	ringbuf->virtual_start =
 		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
-			   ring->size);
-	if (ring->virtual_start == NULL) {
-		DRM_ERROR("Failed to map ringbuffer.\n");
+				ringbuf->size);
+	if (ringbuf->virtual_start == NULL) {
 		ret = -EINVAL;
 		goto err_unpin;
 	}
 
-	ret = ring->init(ring);
-	if (ret)
-		goto err_unmap;
+	ringbuf->obj = obj;
+	return 0;
+
+err_unpin:
+	i915_gem_object_ggtt_unpin(obj);
+err_unref:
+	drm_gem_object_unreference(&obj->base);
+	return ret;
+}
+
+static int intel_init_ring_buffer(struct drm_device *dev,
+				  struct intel_engine_cs *ring)
+{
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	int ret;
+
+	if (ringbuf == NULL) {
+		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
+		if (!ringbuf)
+			return -ENOMEM;
+		ring->buffer = ringbuf;
+	}
+
+	ring->dev = dev;
+	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->request_list);
+	ringbuf->size = 32 * PAGE_SIZE;
+	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
+
+	init_waitqueue_head(&ring->irq_queue);
+
+	if (I915_NEED_GFX_HWS(dev)) {
+		ret = init_status_page(ring);
+		if (ret)
+			goto error;
+	} else {
+		BUG_ON(ring->id != RCS);
+		ret = init_phys_status_page(ring);
+		if (ret)
+			goto error;
+	}
+
+	ret = allocate_ring_buffer(ring);
+	if (ret) {
+		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
+		goto error;
+	}
 
 	/* Workaround an erratum on the i830 which causes a hang if
 	 * the TAIL pointer points to within the last 2 cachelines
 	 * of the buffer.
 	 */
-	ring->effective_size = ring->size;
-	if (IS_I830(ring->dev) || IS_845G(ring->dev))
-		ring->effective_size -= 128;
+	ringbuf->effective_size = ringbuf->size;
+	if (IS_I830(dev) || IS_845G(dev))
+		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
 
-	i915_cmd_parser_init_ring(ring);
+	ret = i915_cmd_parser_init_ring(ring);
+	if (ret)
+		goto error;
+
+	ret = ring->init(ring);
+	if (ret)
+		goto error;
 
 	return 0;
 
-err_unmap:
-	iounmap(ring->virtual_start);
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-	ring->obj = NULL;
-err_hws:
-	cleanup_status_page(ring);
+error:
+	kfree(ringbuf);
+	ring->buffer = NULL;
 	return ret;
 }
 
-void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
+void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 {
-	struct drm_i915_private *dev_priv;
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 
-	if (ring->obj == NULL)
+	if (!intel_ring_initialized(ring))
 		return;
 
-	/* Disable the ring buffer. The ring must be idle at this point */
-	dev_priv = ring->dev->dev_private;
-	ret = intel_ring_idle(ring);
-	if (ret && !i915_reset_in_progress(&dev_priv->gpu_error))
-		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
-			  ring->name, ret);
+	intel_stop_ring_buffer(ring);
+	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
-	I915_WRITE_CTL(ring, 0);
+	iounmap(ringbuf->virtual_start);
 
-	iounmap(ring->virtual_start);
-
-	i915_gem_object_ggtt_unpin(ring->obj);
-	drm_gem_object_unreference(&ring->obj->base);
-	ring->obj = NULL;
+	i915_gem_object_ggtt_unpin(ringbuf->obj);
+	drm_gem_object_unreference(&ringbuf->obj->base);
+	ringbuf->obj = NULL;
 	ring->preallocated_lazy_request = NULL;
 	ring->outstanding_lazy_seqno = 0;
 
@@ -1470,44 +1508,34 @@
 		ring->cleanup(ring);
 
 	cleanup_status_page(ring);
+
+	i915_cmd_parser_fini_ring(ring);
+
+	kfree(ringbuf);
+	ring->buffer = NULL;
 }
 
-static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
+static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 {
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 	struct drm_i915_gem_request *request;
-	u32 seqno = 0, tail;
+	u32 seqno = 0;
 	int ret;
 
-	if (ring->last_retired_head != -1) {
-		ring->head = ring->last_retired_head;
-		ring->last_retired_head = -1;
+	if (ringbuf->last_retired_head != -1) {
+		ringbuf->head = ringbuf->last_retired_head;
+		ringbuf->last_retired_head = -1;
 
-		ring->space = ring_space(ring);
-		if (ring->space >= n)
+		ringbuf->space = ring_space(ring);
+		if (ringbuf->space >= n)
 			return 0;
 	}
 
 	list_for_each_entry(request, &ring->request_list, list) {
-		int space;
-
-		if (request->tail == -1)
-			continue;
-
-		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
-		if (space < 0)
-			space += ring->size;
-		if (space >= n) {
+		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
 			seqno = request->seqno;
-			tail = request->tail;
 			break;
 		}
-
-		/* Consume this request in case we need more space than
-		 * is available and so need to prevent a race between
-		 * updating last_retired_head and direct reads of
-		 * I915_RING_HEAD. It also provides a nice sanity check.
-		 */
-		request->tail = -1;
 	}
 
 	if (seqno == 0)
@@ -1517,18 +1545,19 @@
 	if (ret)
 		return ret;
 
-	ring->head = tail;
-	ring->space = ring_space(ring);
-	if (WARN_ON(ring->space < n))
-		return -ENOSPC;
+	i915_gem_retire_requests_ring(ring);
+	ringbuf->head = ringbuf->last_retired_head;
+	ringbuf->last_retired_head = -1;
 
+	ringbuf->space = ring_space(ring);
 	return 0;
 }
 
-static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
+static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 	unsigned long end;
 	int ret;
 
@@ -1539,7 +1568,6 @@
 	/* force the tail write in case we have been skipping them */
 	__intel_ring_advance(ring);
 
-	trace_i915_ring_wait_begin(ring);
 	/* With GEM the hangcheck timer should kick us out of the loop,
 	 * leaving it early runs the risk of corrupting GEM state (due
 	 * to running on almost untested codepaths). But on resume
@@ -1547,12 +1575,13 @@
 	 * case by choosing an insanely large timeout. */
 	end = jiffies + 60 * HZ;
 
+	trace_i915_ring_wait_begin(ring);
 	do {
-		ring->head = I915_READ_HEAD(ring);
-		ring->space = ring_space(ring);
-		if (ring->space >= n) {
-			trace_i915_ring_wait_end(ring);
-			return 0;
+		ringbuf->head = I915_READ_HEAD(ring);
+		ringbuf->space = ring_space(ring);
+		if (ringbuf->space >= n) {
+			ret = 0;
+			break;
 		}
 
 		if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
@@ -1564,38 +1593,49 @@
 
 		msleep(1);
 
+		if (dev_priv->mm.interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
 		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
 					   dev_priv->mm.interruptible);
 		if (ret)
-			return ret;
-	} while (!time_after(jiffies, end));
+			break;
+
+		if (time_after(jiffies, end)) {
+			ret = -EBUSY;
+			break;
+		}
+	} while (1);
 	trace_i915_ring_wait_end(ring);
-	return -EBUSY;
+	return ret;
 }
 
-static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
+static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 {
 	uint32_t __iomem *virt;
-	int rem = ring->size - ring->tail;
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	int rem = ringbuf->size - ringbuf->tail;
 
-	if (ring->space < rem) {
+	if (ringbuf->space < rem) {
 		int ret = ring_wait_for_space(ring, rem);
 		if (ret)
 			return ret;
 	}
 
-	virt = ring->virtual_start + ring->tail;
+	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
 		iowrite32(MI_NOOP, virt++);
 
-	ring->tail = 0;
-	ring->space = ring_space(ring);
+	ringbuf->tail = 0;
+	ringbuf->space = ring_space(ring);
 
 	return 0;
 }
 
-int intel_ring_idle(struct intel_ring_buffer *ring)
+int intel_ring_idle(struct intel_engine_cs *ring)
 {
 	u32 seqno;
 	int ret;
@@ -1619,7 +1659,7 @@
 }
 
 static int
-intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
+intel_ring_alloc_seqno(struct intel_engine_cs *ring)
 {
 	if (ring->outstanding_lazy_seqno)
 		return 0;
@@ -1637,18 +1677,19 @@
 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
 }
 
-static int __intel_ring_prepare(struct intel_ring_buffer *ring,
+static int __intel_ring_prepare(struct intel_engine_cs *ring,
 				int bytes)
 {
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int ret;
 
-	if (unlikely(ring->tail + bytes > ring->effective_size)) {
+	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
 		ret = intel_wrap_ring_buffer(ring);
 		if (unlikely(ret))
 			return ret;
 	}
 
-	if (unlikely(ring->space < bytes)) {
+	if (unlikely(ringbuf->space < bytes)) {
 		ret = ring_wait_for_space(ring, bytes);
 		if (unlikely(ret))
 			return ret;
@@ -1657,7 +1698,7 @@
 	return 0;
 }
 
-int intel_ring_begin(struct intel_ring_buffer *ring,
+int intel_ring_begin(struct intel_engine_cs *ring,
 		     int num_dwords)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1677,19 +1718,20 @@
 	if (ret)
 		return ret;
 
-	ring->space -= num_dwords * sizeof(uint32_t);
+	ring->buffer->space -= num_dwords * sizeof(uint32_t);
 	return 0;
 }
 
 /* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct intel_ring_buffer *ring)
+int intel_ring_cacheline_align(struct intel_engine_cs *ring)
 {
-	int num_dwords = (64 - (ring->tail & 63)) / sizeof(uint32_t);
+	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	int ret;
 
 	if (num_dwords == 0)
 		return 0;
 
+	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
 	ret = intel_ring_begin(ring, num_dwords);
 	if (ret)
 		return ret;
@@ -1702,7 +1744,7 @@
 	return 0;
 }
 
-void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
+void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
@@ -1719,7 +1761,7 @@
 	ring->hangcheck.seqno = seqno;
 }
 
-static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
+static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
 				     u32 value)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1752,7 +1794,7 @@
 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 }
 
-static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring,
+static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 			       u32 invalidate, u32 flush)
 {
 	uint32_t cmd;
@@ -1788,8 +1830,8 @@
 }
 
 static int
-gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			      u32 offset, u32 len,
+gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+			      u64 offset, u32 len,
 			      unsigned flags)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1803,8 +1845,8 @@
 
 	/* FIXME(BDW): Address space and security selectors. */
 	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
-	intel_ring_emit(ring, offset);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, lower_32_bits(offset));
+	intel_ring_emit(ring, upper_32_bits(offset));
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 
@@ -1812,8 +1854,8 @@
 }
 
 static int
-hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			      u32 offset, u32 len,
+hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+			      u64 offset, u32 len,
 			      unsigned flags)
 {
 	int ret;
@@ -1833,8 +1875,8 @@
 }
 
 static int
-gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			      u32 offset, u32 len,
+gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+			      u64 offset, u32 len,
 			      unsigned flags)
 {
 	int ret;
@@ -1855,7 +1897,7 @@
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct intel_ring_buffer *ring,
+static int gen6_ring_flush(struct intel_engine_cs *ring,
 			   u32 invalidate, u32 flush)
 {
 	struct drm_device *dev = ring->dev;
@@ -1898,7 +1940,7 @@
 int intel_init_render_ring_buffer(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 
 	ring->name = "render ring";
 	ring->id = RCS;
@@ -1920,15 +1962,24 @@
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
-		ring->sync_to = gen6_ring_sync;
-		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
-		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
-		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
-		ring->signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->signal_mbox[VCS] = GEN6_VRSYNC;
-		ring->signal_mbox[BCS] = GEN6_BRSYNC;
-		ring->signal_mbox[VECS] = GEN6_VERSYNC;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.signal = gen6_signal;
+		/*
+		 * The current semaphore is only applied on pre-gen8 platform.
+		 * And there is no VCS2 ring on the pre-gen8 platform. So the
+		 * semaphore between RCS and VCS2 is initialized as INVALID.
+		 * Gen8 will initialize the sema between VCS2 and RCS later.
+		 */
+		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
+		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
+		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
+		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
+		ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
+		ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
+		ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
+		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 	} else if (IS_GEN5(dev)) {
 		ring->add_request = pc_render_add_request;
 		ring->flush = gen4_render_ring_flush;
@@ -1999,16 +2050,25 @@
 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int ret;
 
+	if (ringbuf == NULL) {
+		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
+		if (!ringbuf)
+			return -ENOMEM;
+		ring->buffer = ringbuf;
+	}
+
 	ring->name = "render ring";
 	ring->id = RCS;
 	ring->mmio_base = RENDER_RING_BASE;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		/* non-kms not supported on gen6+ */
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_ringbuf;
 	}
 
 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
@@ -2043,31 +2103,39 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
-	ring->size = size;
-	ring->effective_size = ring->size;
+	ringbuf->size = size;
+	ringbuf->effective_size = ringbuf->size;
 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
-		ring->effective_size -= 128;
+		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
 
-	ring->virtual_start = ioremap_wc(start, size);
-	if (ring->virtual_start == NULL) {
+	ringbuf->virtual_start = ioremap_wc(start, size);
+	if (ringbuf->virtual_start == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
 			  " ring buffer\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_ringbuf;
 	}
 
 	if (!I915_NEED_GFX_HWS(dev)) {
 		ret = init_phys_status_page(ring);
 		if (ret)
-			return ret;
+			goto err_vstart;
 	}
 
 	return 0;
+
+err_vstart:
+	iounmap(ringbuf->virtual_start);
+err_ringbuf:
+	kfree(ringbuf);
+	ring->buffer = NULL;
+	return ret;
 }
 
 int intel_init_bsd_ring_buffer(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
 
 	ring->name = "bsd ring";
 	ring->id = VCS;
@@ -2096,15 +2164,24 @@
 			ring->dispatch_execbuffer =
 				gen6_ring_dispatch_execbuffer;
 		}
-		ring->sync_to = gen6_ring_sync;
-		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR;
-		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VB;
-		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_VVE;
-		ring->signal_mbox[RCS] = GEN6_RVSYNC;
-		ring->signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->signal_mbox[BCS] = GEN6_BVSYNC;
-		ring->signal_mbox[VECS] = GEN6_VEVSYNC;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.signal = gen6_signal;
+		/*
+		 * The current semaphore is only applied on pre-gen8 platform.
+		 * And there is no VCS2 ring on the pre-gen8 platform. So the
+		 * semaphore between VCS and VCS2 is initialized as INVALID.
+		 * Gen8 will initialize the sema between VCS2 and VCS later.
+		 */
+		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
+		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
+		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
+		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
+		ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
+		ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
+		ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
+		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 	} else {
 		ring->mmio_base = BSD_RING_BASE;
 		ring->flush = bsd_ring_flush;
@@ -2127,10 +2204,63 @@
 	return intel_init_ring_buffer(dev, ring);
 }
 
+/**
+ * Initialize the second BSD ring for Broadwell GT3.
+ * It is noted that this only exists on Broadwell GT3.
+ */
+int intel_init_bsd2_ring_buffer(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
+
+	if ((INTEL_INFO(dev)->gen != 8)) {
+		DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
+		return -EINVAL;
+	}
+
+	ring->name = "bds2_ring";
+	ring->id = VCS2;
+
+	ring->write_tail = ring_write_tail;
+	ring->mmio_base = GEN8_BSD2_RING_BASE;
+	ring->flush = gen6_bsd_ring_flush;
+	ring->add_request = gen6_add_request;
+	ring->get_seqno = gen6_ring_get_seqno;
+	ring->set_seqno = ring_set_seqno;
+	ring->irq_enable_mask =
+			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
+	ring->irq_get = gen8_ring_get_irq;
+	ring->irq_put = gen8_ring_put_irq;
+	ring->dispatch_execbuffer =
+			gen8_ring_dispatch_execbuffer;
+	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.signal = gen6_signal;
+	/*
+	 * The current semaphore is only applied on the pre-gen8. And there
+	 * is no bsd2 ring on the pre-gen8. So now the semaphore_register
+	 * between VCS2 and other ring is initialized as invalid.
+	 * Gen8 will initialize the sema between VCS2 and other ring later.
+	 */
+	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
+
+	ring->init = init_ring_common;
+
+	return intel_init_ring_buffer(dev, ring);
+}
+
 int intel_init_blt_ring_buffer(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
 
 	ring->name = "blitter ring";
 	ring->id = BCS;
@@ -2153,15 +2283,24 @@
 		ring->irq_put = gen6_ring_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->sync_to = gen6_ring_sync;
-	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR;
-	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV;
-	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_BVE;
-	ring->signal_mbox[RCS] = GEN6_RBSYNC;
-	ring->signal_mbox[VCS] = GEN6_VBSYNC;
-	ring->signal_mbox[BCS] = GEN6_NOSYNC;
-	ring->signal_mbox[VECS] = GEN6_VEBSYNC;
+	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.signal = gen6_signal;
+	/*
+	 * The current semaphore is only applied on pre-gen8 platform. And
+	 * there is no VCS2 ring on the pre-gen8 platform. So the semaphore
+	 * between BCS and VCS2 is initialized as INVALID.
+	 * Gen8 will initialize the sema between BCS and VCS2 later.
+	 */
+	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
+	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
+	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
+	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
+	ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
+	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
+	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
@@ -2170,7 +2309,7 @@
 int intel_init_vebox_ring_buffer(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring = &dev_priv->ring[VECS];
+	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
 
 	ring->name = "video enhancement ring";
 	ring->id = VECS;
@@ -2194,22 +2333,25 @@
 		ring->irq_put = hsw_vebox_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->sync_to = gen6_ring_sync;
-	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER;
-	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV;
-	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VEB;
-	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->signal_mbox[RCS] = GEN6_RVESYNC;
-	ring->signal_mbox[VCS] = GEN6_VVESYNC;
-	ring->signal_mbox[BCS] = GEN6_BVESYNC;
-	ring->signal_mbox[VECS] = GEN6_NOSYNC;
+	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.signal = gen6_signal;
+	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
+	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
+	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
+	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
+	ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
+	ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
+	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
+	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
 }
 
 int
-intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
+intel_ring_flush_all_caches(struct intel_engine_cs *ring)
 {
 	int ret;
 
@@ -2227,7 +2369,7 @@
 }
 
 int
-intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
+intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
 {
 	uint32_t flush_domains;
 	int ret;
@@ -2245,3 +2387,19 @@
 	ring->gpu_caches_dirty = false;
 	return 0;
 }
+
+void
+intel_stop_ring_buffer(struct intel_engine_cs *ring)
+{
+	int ret;
+
+	if (!intel_ring_initialized(ring))
+		return;
+
+	ret = intel_ring_idle(ring);
+	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
+		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
+			  ring->name, ret);
+
+	stop_ring(ring);
+}

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2b91c4b..910c83c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

@@ -1,6 +1,10 @@
 #ifndef _INTEL_RINGBUFFER_H_
 #define _INTEL_RINGBUFFER_H_
 
+#include <linux/hashtable.h>
+
+#define I915_CMD_HASH_ORDER 9
+
 /*
  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -54,26 +58,15 @@
 	bool deadlock;
 };
 
-struct  intel_ring_buffer {
-	const char	*name;
-	enum intel_ring_id {
-		RCS = 0x0,
-		VCS,
-		BCS,
-		VECS,
-	} id;
-#define I915_NUM_RINGS 4
-	u32		mmio_base;
-	void		__iomem *virtual_start;
-	struct		drm_device *dev;
-	struct		drm_i915_gem_object *obj;
+struct intel_ringbuffer {
+	struct drm_i915_gem_object *obj;
+	void __iomem *virtual_start;
 
-	u32		head;
-	u32		tail;
-	int		space;
-	int		size;
-	int		effective_size;
-	struct intel_hw_status_page status_page;
+	u32 head;
+	u32 tail;
+	int space;
+	int size;
+	int effective_size;
 
 	/** We track the position of the requests in the ring buffer, and
 	 * when each is retired we increment last_retired_head as the GPU
@@ -83,47 +76,75 @@
 	 * last_retired_head is set to -1 after the value is consumed so
 	 * we can detect new retirements.
 	 */
-	u32		last_retired_head;
+	u32 last_retired_head;
+};
+
+struct  intel_engine_cs {
+	const char	*name;
+	enum intel_ring_id {
+		RCS = 0x0,
+		VCS,
+		BCS,
+		VECS,
+		VCS2
+	} id;
+#define I915_NUM_RINGS 5
+#define LAST_USER_RING (VECS + 1)
+	u32		mmio_base;
+	struct		drm_device *dev;
+	struct intel_ringbuffer *buffer;
+
+	struct intel_hw_status_page status_page;
 
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
 	u32		trace_irq_seqno;
-	u32		sync_seqno[I915_NUM_RINGS-1];
-	bool __must_check (*irq_get)(struct intel_ring_buffer *ring);
-	void		(*irq_put)(struct intel_ring_buffer *ring);
+	bool __must_check (*irq_get)(struct intel_engine_cs *ring);
+	void		(*irq_put)(struct intel_engine_cs *ring);
 
-	int		(*init)(struct intel_ring_buffer *ring);
+	int		(*init)(struct intel_engine_cs *ring);
 
-	void		(*write_tail)(struct intel_ring_buffer *ring,
+	void		(*write_tail)(struct intel_engine_cs *ring,
 				      u32 value);
-	int __must_check (*flush)(struct intel_ring_buffer *ring,
+	int __must_check (*flush)(struct intel_engine_cs *ring,
 				  u32	invalidate_domains,
 				  u32	flush_domains);
-	int		(*add_request)(struct intel_ring_buffer *ring);
+	int		(*add_request)(struct intel_engine_cs *ring);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
 	 * seen value is good enough. Note that the seqno will always be
 	 * monotonic, even if not coherent.
 	 */
-	u32		(*get_seqno)(struct intel_ring_buffer *ring,
+	u32		(*get_seqno)(struct intel_engine_cs *ring,
 				     bool lazy_coherency);
-	void		(*set_seqno)(struct intel_ring_buffer *ring,
+	void		(*set_seqno)(struct intel_engine_cs *ring,
 				     u32 seqno);
-	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
-					       u32 offset, u32 length,
+	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
+					       u64 offset, u32 length,
 					       unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
-	void		(*cleanup)(struct intel_ring_buffer *ring);
-	int		(*sync_to)(struct intel_ring_buffer *ring,
-				   struct intel_ring_buffer *to,
-				   u32 seqno);
+	void		(*cleanup)(struct intel_engine_cs *ring);
 
-	/* our mbox written by others */
-	u32		semaphore_register[I915_NUM_RINGS];
-	/* mboxes this ring signals to */
-	u32		signal_mbox[I915_NUM_RINGS];
+	struct {
+		u32	sync_seqno[I915_NUM_RINGS-1];
+
+		struct {
+			/* our mbox written by others */
+			u32		wait[I915_NUM_RINGS];
+			/* mboxes this ring signals to */
+			u32		signal[I915_NUM_RINGS];
+		} mbox;
+
+		/* AKA wait() */
+		int	(*sync_to)(struct intel_engine_cs *ring,
+				   struct intel_engine_cs *to,
+				   u32 seqno);
+		int	(*signal)(struct intel_engine_cs *signaller,
+				  /* num_dwords needed by caller */
+				  unsigned int num_dwords);
+	} semaphore;
 
 	/**
 	 * List of objects currently involved in rendering from the
@@ -153,12 +174,8 @@
 
 	wait_queue_head_t irq_queue;
 
-	/**
-	 * Do an explicit TLB flush before MI_SET_CONTEXT
-	 */
-	bool itlb_before_ctx_switch;
-	struct i915_hw_context *default_context;
-	struct i915_hw_context *last_context;
+	struct intel_context *default_context;
+	struct intel_context *last_context;
 
 	struct intel_ring_hangcheck hangcheck;
 
@@ -168,12 +185,13 @@
 		volatile u32 *cpu_page;
 	} scratch;
 
+	bool needs_cmd_parser;
+
 	/*
-	 * Tables of commands the command parser needs to know about
+	 * Table of commands the command parser needs to know about
 	 * for this ring.
 	 */
-	const struct drm_i915_cmd_table *cmd_tables;
-	int cmd_table_count;
+	DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
 
 	/*
 	 * Table of registers allowed in commands that read/write registers.
@@ -202,20 +220,20 @@
 };
 
 static inline bool
-intel_ring_initialized(struct intel_ring_buffer *ring)
+intel_ring_initialized(struct intel_engine_cs *ring)
 {
-	return ring->obj != NULL;
+	return ring->buffer && ring->buffer->obj;
 }
 
 static inline unsigned
-intel_ring_flag(struct intel_ring_buffer *ring)
+intel_ring_flag(struct intel_engine_cs *ring)
 {
 	return 1 << ring->id;
 }
 
 static inline u32
-intel_ring_sync_index(struct intel_ring_buffer *ring,
-		      struct intel_ring_buffer *other)
+intel_ring_sync_index(struct intel_engine_cs *ring,
+		      struct intel_engine_cs *other)
 {
 	int idx;
 
@@ -233,7 +251,7 @@
 }
 
 static inline u32
-intel_read_status_page(struct intel_ring_buffer *ring,
+intel_read_status_page(struct intel_engine_cs *ring,
 		       int reg)
 {
 	/* Ensure that the compiler doesn't optimize away the load. */
@@ -242,7 +260,7 @@
 }
 
 static inline void
-intel_write_status_page(struct intel_ring_buffer *ring,
+intel_write_status_page(struct intel_engine_cs *ring,
 			int reg, u32 value)
 {
 	ring->status_page.page_addr[reg] = value;
@@ -267,47 +285,51 @@
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
-void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
+void intel_stop_ring_buffer(struct intel_engine_cs *ring);
+void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
-int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
-int __must_check intel_ring_cacheline_align(struct intel_ring_buffer *ring);
-static inline void intel_ring_emit(struct intel_ring_buffer *ring,
+int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
+int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
+static inline void intel_ring_emit(struct intel_engine_cs *ring,
 				   u32 data)
 {
-	iowrite32(data, ring->virtual_start + ring->tail);
-	ring->tail += 4;
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+	ringbuf->tail += 4;
 }
-static inline void intel_ring_advance(struct intel_ring_buffer *ring)
+static inline void intel_ring_advance(struct intel_engine_cs *ring)
 {
-	ring->tail &= ring->size - 1;
+	struct intel_ringbuffer *ringbuf = ring->buffer;
+	ringbuf->tail &= ringbuf->size - 1;
 }
-void __intel_ring_advance(struct intel_ring_buffer *ring);
+void __intel_ring_advance(struct intel_engine_cs *ring);
 
-int __must_check intel_ring_idle(struct intel_ring_buffer *ring);
-void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno);
-int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
-int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
+int __must_check intel_ring_idle(struct intel_engine_cs *ring);
+void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
+int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
+int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
+int intel_init_bsd2_ring_buffer(struct drm_device *dev);
 int intel_init_blt_ring_buffer(struct drm_device *dev);
 int intel_init_vebox_ring_buffer(struct drm_device *dev);
 
-u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
-void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
+u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
+void intel_ring_setup_status_page(struct intel_engine_cs *ring);
 
-static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
+static inline u32 intel_ring_get_tail(struct intel_engine_cs *ring)
 {
-	return ring->tail;
+	return ring->buffer->tail;
 }
 
-static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring)
+static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
 {
 	BUG_ON(ring->outstanding_lazy_seqno == 0);
 	return ring->outstanding_lazy_seqno;
 }
 
-static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno)
+static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
 {
 	if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
 		ring->trace_irq_seqno = seqno;

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 46be00d..6a4d5bc 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c

@@ -1153,20 +1153,21 @@
 	pipe_config->pixel_multiplier =
 		intel_sdvo_get_pixel_multiplier(adjusted_mode);
 
+	pipe_config->has_hdmi_sink = intel_sdvo->has_hdmi_monitor;
+
 	if (intel_sdvo->color_range_auto) {
 		/* See CEA-861-E - 5.1 Default Encoding Parameters */
 		/* FIXME: This bit is only valid when using TMDS encoding and 8
 		 * bit per color mode. */
-		if (intel_sdvo->has_hdmi_monitor &&
+		if (pipe_config->has_hdmi_sink &&
 		    drm_match_cea_mode(adjusted_mode) > 1)
-			intel_sdvo->color_range = HDMI_COLOR_RANGE_16_235;
-		else
-			intel_sdvo->color_range = 0;
+			pipe_config->limited_color_range = true;
+	} else {
+		if (pipe_config->has_hdmi_sink &&
+		    intel_sdvo->color_range == HDMI_COLOR_RANGE_16_235)
+			pipe_config->limited_color_range = true;
 	}
 
-	if (intel_sdvo->color_range)
-		pipe_config->limited_color_range = true;
-
 	/* Clock computation needs to happen after pixel multiplier. */
 	if (intel_sdvo->is_tv)
 		i9xx_adjust_sdvo_tv_clock(pipe_config);
@@ -1174,7 +1175,7 @@
 	return true;
 }
 
-static void intel_sdvo_mode_set(struct intel_encoder *intel_encoder)
+static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1223,7 +1224,7 @@
 	if (!intel_sdvo_set_target_input(intel_sdvo))
 		return;
 
-	if (intel_sdvo->has_hdmi_monitor) {
+	if (crtc->config.has_hdmi_sink) {
 		intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_HDMI);
 		intel_sdvo_set_colorimetry(intel_sdvo,
 					   SDVO_COLORIMETRY_RGB256);
@@ -1258,8 +1259,8 @@
 		/* The real mode polarity is set by the SDVO commands, using
 		 * struct intel_sdvo_dtd. */
 		sdvox = SDVO_VSYNC_ACTIVE_HIGH | SDVO_HSYNC_ACTIVE_HIGH;
-		if (!HAS_PCH_SPLIT(dev) && intel_sdvo->is_hdmi)
-			sdvox |= intel_sdvo->color_range;
+		if (!HAS_PCH_SPLIT(dev) && crtc->config.limited_color_range)
+			sdvox |= HDMI_COLOR_RANGE_16_235;
 		if (INTEL_INFO(dev)->gen < 5)
 			sdvox |= SDVO_BORDER_ENABLE;
 	} else {
@@ -1349,6 +1350,8 @@
 	u8 val;
 	bool ret;
 
+	sdvox = I915_READ(intel_sdvo->sdvo_reg);
+
 	ret = intel_sdvo_get_input_timing(intel_sdvo, &dtd);
 	if (!ret) {
 		/* Some sdvo encoders are not spec compliant and don't
@@ -1377,7 +1380,6 @@
 	 * other platfroms.
 	 */
 	if (IS_I915G(dev) || IS_I915GM(dev)) {
-		sdvox = I915_READ(intel_sdvo->sdvo_reg);
 		pipe_config->pixel_multiplier =
 			((sdvox & SDVO_PORT_MULTIPLY_MASK)
 			 >> SDVO_PORT_MULTIPLY_SHIFT) + 1;
@@ -1406,6 +1408,15 @@
 		}
 	}
 
+	if (sdvox & HDMI_COLOR_RANGE_16_235)
+		pipe_config->limited_color_range = true;
+
+	if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ENCODE,
+				 &val, 1)) {
+		if (val == SDVO_ENCODE_HDMI)
+			pipe_config->has_hdmi_sink = true;
+	}
+
 	WARN(encoder_pixel_multiplier != pipe_config->pixel_multiplier,
 	     "SDVO pixel multiplier mismatch, port: %i, encoder: %i\n",
 	     pipe_config->pixel_multiplier, encoder_pixel_multiplier);
@@ -1732,7 +1743,7 @@
 	enum drm_connector_status ret;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	if (!intel_sdvo_get_value(intel_sdvo,
 				  SDVO_CMD_GET_ATTACHED_DISPLAYS,
@@ -1794,7 +1805,7 @@
 	struct edid *edid;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	/* set the bus switch and get the modes */
 	edid = intel_sdvo_get_edid(connector);
@@ -1892,7 +1903,7 @@
 	int i;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	/* Read the list of supported input resolutions for the selected TV
 	 * format.
@@ -1929,7 +1940,7 @@
 	struct drm_display_mode *newmode;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, drm_get_connector_name(connector));
+		      connector->base.id, connector->name);
 
 	/*
 	 * Fetch modes from VBT. For SDVO prefer the VBT mode since some
@@ -2999,7 +3010,7 @@
 
 	intel_encoder->compute_config = intel_sdvo_compute_config;
 	intel_encoder->disable = intel_disable_sdvo;
-	intel_encoder->mode_set = intel_sdvo_mode_set;
+	intel_encoder->pre_enable = intel_sdvo_pre_enable;
 	intel_encoder->enable = intel_enable_sdvo;
 	intel_encoder->get_hw_state = intel_sdvo_get_hw_state;
 	intel_encoder->get_config = intel_sdvo_get_config;

diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 0954f13..01d841e 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c

@@ -29,12 +29,21 @@
  * IOSF sideband, see VLV2_SidebandMsg_HAS.docx and
  * VLV_VLV2_PUNIT_HAS_0.8.docx
  */
+
+/* Standard MMIO read, non-posted */
+#define SB_MRD_NP	0x00
+/* Standard MMIO write, non-posted */
+#define SB_MWR_NP	0x01
+/* Private register read, double-word addressing, non-posted */
+#define SB_CRRDDA_NP	0x06
+/* Private register write, double-word addressing, non-posted */
+#define SB_CRWRDA_NP	0x07
+
 static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
 			   u32 port, u32 opcode, u32 addr, u32 *val)
 {
 	u32 cmd, be = 0xf, bar = 0;
-	bool is_read = (opcode == PUNIT_OPCODE_REG_READ ||
-			opcode == DPIO_OPCODE_REG_READ);
+	bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
 
 	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
 		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
@@ -74,7 +83,7 @@
 
 	mutex_lock(&dev_priv->dpio_lock);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_PUNIT,
-			PUNIT_OPCODE_REG_READ, addr, &val);
+			SB_CRRDDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 
 	return val;
@@ -86,7 +95,7 @@
 
 	mutex_lock(&dev_priv->dpio_lock);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_PUNIT,
-			PUNIT_OPCODE_REG_WRITE, addr, &val);
+			SB_CRWRDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 
@@ -95,7 +104,7 @@
 	u32 val = 0;
 
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_BUNIT,
-			PUNIT_OPCODE_REG_READ, reg, &val);
+			SB_CRRDDA_NP, reg, &val);
 
 	return val;
 }
@@ -103,7 +112,7 @@
 void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_BUNIT,
-			PUNIT_OPCODE_REG_WRITE, reg, &val);
+			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
@@ -114,7 +123,7 @@
 
 	mutex_lock(&dev_priv->dpio_lock);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_NC,
-			PUNIT_OPCODE_REG_READ, addr, &val);
+			SB_CRRDDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 
 	return val;
@@ -124,56 +133,56 @@
 {
 	u32 val = 0;
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC,
-			PUNIT_OPCODE_REG_READ, reg, &val);
+			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC,
-			PUNIT_OPCODE_REG_WRITE, reg, &val);
+			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK,
-			PUNIT_OPCODE_REG_READ, reg, &val);
+			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK,
-			PUNIT_OPCODE_REG_WRITE, reg, &val);
+			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU,
-			PUNIT_OPCODE_REG_READ, reg, &val);
+			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU,
-			PUNIT_OPCODE_REG_WRITE, reg, &val);
+			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE,
-			PUNIT_OPCODE_REG_READ, reg, &val);
+			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE,
-			PUNIT_OPCODE_REG_WRITE, reg, &val);
+			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
@@ -181,14 +190,22 @@
 	u32 val = 0;
 
 	vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)),
-			DPIO_OPCODE_REG_READ, reg, &val);
+			SB_MRD_NP, reg, &val);
+
+	/*
+	 * FIXME: There might be some registers where all 1's is a valid value,
+	 * so ideally we should check the register offset instead...
+	 */
+	WARN(val == 0xffffffff, "DPIO read pipe %c reg 0x%x == 0x%x\n",
+	     pipe_name(pipe), reg, val);
+
 	return val;
 }
 
 void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val)
 {
 	vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)),
-			DPIO_OPCODE_REG_WRITE, reg, &val);
+			SB_MWR_NP, reg, &val);
 }
 
 /* SBI access */
@@ -253,13 +270,13 @@
 u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI,
-					DPIO_OPCODE_REG_READ, reg, &val);
+	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
+			reg, &val);
 	return val;
 }
 
 void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI,
-					DPIO_OPCODE_REG_WRITE, reg, &val);
+	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP,
+			reg, &val);
 }

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 336ae6c..1b66ddc 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c

@@ -37,6 +37,106 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
+static int usecs_to_scanlines(const struct drm_display_mode *mode, int usecs)
+{
+	/* paranoia */
+	if (!mode->crtc_htotal)
+		return 1;
+
+	return DIV_ROUND_UP(usecs * mode->crtc_clock, 1000 * mode->crtc_htotal);
+}
+
+static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count)
+{
+	struct drm_device *dev = crtc->base.dev;
+	const struct drm_display_mode *mode = &crtc->config.adjusted_mode;
+	enum pipe pipe = crtc->pipe;
+	long timeout = msecs_to_jiffies_timeout(1);
+	int scanline, min, max, vblank_start;
+	DEFINE_WAIT(wait);
+
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
+
+	vblank_start = mode->crtc_vblank_start;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		vblank_start = DIV_ROUND_UP(vblank_start, 2);
+
+	/* FIXME needs to be calibrated sensibly */
+	min = vblank_start - usecs_to_scanlines(mode, 100);
+	max = vblank_start - 1;
+
+	if (min <= 0 || max <= 0)
+		return false;
+
+	if (WARN_ON(drm_vblank_get(dev, pipe)))
+		return false;
+
+	local_irq_disable();
+
+	trace_i915_pipe_update_start(crtc, min, max);
+
+	for (;;) {
+		/*
+		 * prepare_to_wait() has a memory barrier, which guarantees
+		 * other CPUs can see the task state update by the time we
+		 * read the scanline.
+		 */
+		prepare_to_wait(&crtc->vbl_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+		scanline = intel_get_crtc_scanline(crtc);
+		if (scanline < min || scanline > max)
+			break;
+
+		if (timeout <= 0) {
+			DRM_ERROR("Potential atomic update failure on pipe %c\n",
+				  pipe_name(crtc->pipe));
+			break;
+		}
+
+		local_irq_enable();
+
+		timeout = schedule_timeout(timeout);
+
+		local_irq_disable();
+	}
+
+	finish_wait(&crtc->vbl_wait, &wait);
+
+	drm_vblank_put(dev, pipe);
+
+	*start_vbl_count = dev->driver->get_vblank_counter(dev, pipe);
+
+	trace_i915_pipe_update_vblank_evaded(crtc, min, max, *start_vbl_count);
+
+	return true;
+}
+
+static void intel_pipe_update_end(struct intel_crtc *crtc, u32 start_vbl_count)
+{
+	struct drm_device *dev = crtc->base.dev;
+	enum pipe pipe = crtc->pipe;
+	u32 end_vbl_count = dev->driver->get_vblank_counter(dev, pipe);
+
+	trace_i915_pipe_update_end(crtc, end_vbl_count);
+
+	local_irq_enable();
+
+	if (start_vbl_count != end_vbl_count)
+		DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u)\n",
+			  pipe_name(pipe), start_vbl_count, end_vbl_count);
+}
+
+static void intel_update_primary_plane(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	int reg = DSPCNTR(crtc->plane);
+
+	if (crtc->primary_enabled)
+		I915_WRITE(reg, I915_READ(reg) | DISPLAY_PLANE_ENABLE);
+	else
+		I915_WRITE(reg, I915_READ(reg) & ~DISPLAY_PLANE_ENABLE);
+}
+
 static void
 vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
 		 struct drm_framebuffer *fb,
@@ -48,11 +148,14 @@
 	struct drm_device *dev = dplane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(dplane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
 	int plane = intel_plane->plane;
 	u32 sprctl;
 	unsigned long sprsurf_offset, linear_offset;
 	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
+	u32 start_vbl_count;
+	bool atomic_update;
 
 	sprctl = I915_READ(SPCNTR(pipe, plane));
 
@@ -131,6 +234,10 @@
 							fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
+
 	I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
 	I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
 
@@ -143,7 +250,11 @@
 	I915_WRITE(SPCNTR(pipe, plane), sprctl);
 	I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
 		   sprsurf_offset);
-	POSTING_READ(SPSURF(pipe, plane));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 }
 
 static void
@@ -152,14 +263,25 @@
 	struct drm_device *dev = dplane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(dplane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
 	int plane = intel_plane->plane;
+	u32 start_vbl_count;
+	bool atomic_update;
+
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
 
 	I915_WRITE(SPCNTR(pipe, plane), I915_READ(SPCNTR(pipe, plane)) &
 		   ~SP_ENABLE);
 	/* Activate double buffered register update */
 	I915_WRITE(SPSURF(pipe, plane), 0);
-	POSTING_READ(SPSURF(pipe, plane));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 
 	intel_update_sprite_watermarks(dplane, crtc, 0, 0, false, false);
 }
@@ -226,10 +348,13 @@
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
 	u32 sprctl, sprscale = 0;
 	unsigned long sprsurf_offset, linear_offset;
 	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
+	u32 start_vbl_count;
+	bool atomic_update;
 
 	sprctl = I915_READ(SPRCTL(pipe));
 
@@ -299,6 +424,10 @@
 					       pixel_size, fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
+
 	I915_WRITE(SPRSTRIDE(pipe), fb->pitches[0]);
 	I915_WRITE(SPRPOS(pipe), (crtc_y << 16) | crtc_x);
 
@@ -317,7 +446,11 @@
 	I915_WRITE(SPRCTL(pipe), sprctl);
 	I915_WRITE(SPRSURF(pipe),
 		   i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
-	POSTING_READ(SPRSURF(pipe));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 }
 
 static void
@@ -326,7 +459,14 @@
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
+	u32 start_vbl_count;
+	bool atomic_update;
+
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
 
 	I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE);
 	/* Can't leave the scaler enabled... */
@@ -334,7 +474,11 @@
 		I915_WRITE(SPRSCALE(pipe), 0);
 	/* Activate double buffered register update */
 	I915_WRITE(SPRSURF(pipe), 0);
-	POSTING_READ(SPRSURF(pipe));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 
 	/*
 	 * Avoid underruns when disabling the sprite.
@@ -410,10 +554,13 @@
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
 	unsigned long dvssurf_offset, linear_offset;
 	u32 dvscntr, dvsscale;
 	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
+	u32 start_vbl_count;
+	bool atomic_update;
 
 	dvscntr = I915_READ(DVSCNTR(pipe));
 
@@ -478,6 +625,10 @@
 					       pixel_size, fb->pitches[0]);
 	linear_offset -= dvssurf_offset;
 
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
+
 	I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
 	I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
 
@@ -491,7 +642,11 @@
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
 	I915_WRITE(DVSSURF(pipe),
 		   i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
-	POSTING_READ(DVSSURF(pipe));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 }
 
 static void
@@ -500,14 +655,25 @@
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_plane->pipe;
+	u32 start_vbl_count;
+	bool atomic_update;
+
+	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
+
+	intel_update_primary_plane(intel_crtc);
 
 	I915_WRITE(DVSCNTR(pipe), I915_READ(DVSCNTR(pipe)) & ~DVS_ENABLE);
 	/* Disable the scaler */
 	I915_WRITE(DVSSCALE(pipe), 0);
 	/* Flush double buffered register updates */
 	I915_WRITE(DVSSURF(pipe), 0);
-	POSTING_READ(DVSSURF(pipe));
+
+	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+
+	if (atomic_update)
+		intel_pipe_update_end(intel_crtc, start_vbl_count);
 
 	/*
 	 * Avoid underruns when disabling the sprite.
@@ -519,20 +685,10 @@
 }
 
 static void
-intel_enable_primary(struct drm_crtc *crtc)
+intel_post_enable_primary(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int reg = DSPCNTR(intel_crtc->plane);
-
-	if (intel_crtc->primary_enabled)
-		return;
-
-	intel_crtc->primary_enabled = true;
-
-	I915_WRITE(reg, I915_READ(reg) | DISPLAY_PLANE_ENABLE);
-	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
 
 	/*
 	 * FIXME IPS should be fine as long as one plane is
@@ -540,10 +696,7 @@
 	 * when going from primary only to sprite only and vice
 	 * versa.
 	 */
-	if (intel_crtc->config.ips_enabled) {
-		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		hsw_enable_ips(intel_crtc);
-	}
+	hsw_enable_ips(intel_crtc);
 
 	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
@@ -551,17 +704,11 @@
 }
 
 static void
-intel_disable_primary(struct drm_crtc *crtc)
+intel_pre_disable_primary(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int reg = DSPCNTR(intel_crtc->plane);
-
-	if (!intel_crtc->primary_enabled)
-		return;
-
-	intel_crtc->primary_enabled = false;
 
 	mutex_lock(&dev->struct_mutex);
 	if (dev_priv->fbc.plane == intel_crtc->plane)
@@ -575,9 +722,6 @@
 	 * versa.
 	 */
 	hsw_disable_ips(intel_crtc);
-
-	I915_WRITE(reg, I915_READ(reg) & ~DISPLAY_PLANE_ENABLE);
-	intel_flush_primary_plane(dev_priv, intel_crtc->plane);
 }
 
 static int
@@ -671,7 +815,7 @@
 	struct drm_i915_gem_object *obj = intel_fb->obj;
 	struct drm_i915_gem_object *old_obj = intel_plane->obj;
 	int ret;
-	bool disable_primary = false;
+	bool primary_enabled;
 	bool visible;
 	int hscale, vscale;
 	int max_scale, min_scale;
@@ -842,8 +986,8 @@
 	 * If the sprite is completely covering the primary plane,
 	 * we can disable the primary and save power.
 	 */
-	disable_primary = drm_rect_equals(&dst, &clip) && !colorkey_enabled(intel_plane);
-	WARN_ON(disable_primary && !visible && intel_crtc->active);
+	primary_enabled = !drm_rect_equals(&dst, &clip) || colorkey_enabled(intel_plane);
+	WARN_ON(!primary_enabled && !visible && intel_crtc->active);
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -870,12 +1014,15 @@
 	intel_plane->obj = obj;
 
 	if (intel_crtc->active) {
-		/*
-		 * Be sure to re-enable the primary before the sprite is no longer
-		 * covering it fully.
-		 */
-		if (!disable_primary)
-			intel_enable_primary(crtc);
+		bool primary_was_enabled = intel_crtc->primary_enabled;
+
+		intel_crtc->primary_enabled = primary_enabled;
+
+		if (primary_was_enabled != primary_enabled)
+			intel_crtc_wait_for_pending_flips(crtc);
+
+		if (primary_was_enabled && !primary_enabled)
+			intel_pre_disable_primary(crtc);
 
 		if (visible)
 			intel_plane->update_plane(plane, crtc, fb, obj,
@@ -884,8 +1031,8 @@
 		else
 			intel_plane->disable_plane(plane, crtc);
 
-		if (disable_primary)
-			intel_disable_primary(crtc);
+		if (!primary_was_enabled && primary_enabled)
+			intel_post_enable_primary(crtc);
 	}
 
 	/* Unpin old obj after new one is active to avoid ugliness */
@@ -923,8 +1070,14 @@
 	intel_crtc = to_intel_crtc(plane->crtc);
 
 	if (intel_crtc->active) {
-		intel_enable_primary(plane->crtc);
+		bool primary_was_enabled = intel_crtc->primary_enabled;
+
+		intel_crtc->primary_enabled = true;
+
 		intel_plane->disable_plane(plane, plane->crtc);
+
+		if (!primary_was_enabled && intel_crtc->primary_enabled)
+			intel_post_enable_primary(plane->crtc);
 	}
 
 	if (intel_plane->obj) {

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index bafe92e..67c6c9a 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c

@@ -934,54 +934,14 @@
 	return true;
 }
 
-static void intel_tv_mode_set(struct intel_encoder *encoder)
+static void
+set_tv_mode_timings(struct drm_i915_private *dev_priv,
+		    const struct tv_mode *tv_mode,
+		    bool burst_ena)
 {
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
-	struct intel_tv *intel_tv = enc_to_tv(encoder);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
-	u32 tv_ctl;
 	u32 hctl1, hctl2, hctl3;
 	u32 vctl1, vctl2, vctl3, vctl4, vctl5, vctl6, vctl7;
-	u32 scctl1, scctl2, scctl3;
-	int i, j;
-	const struct video_levels *video_levels;
-	const struct color_conversion *color_conversion;
-	bool burst_ena;
-	int pipe = intel_crtc->pipe;
 
-	if (!tv_mode)
-		return;	/* can't happen (mode_prepare prevents this) */
-
-	tv_ctl = I915_READ(TV_CTL);
-	tv_ctl &= TV_CTL_SAVE;
-
-	switch (intel_tv->type) {
-	default:
-	case DRM_MODE_CONNECTOR_Unknown:
-	case DRM_MODE_CONNECTOR_Composite:
-		tv_ctl |= TV_ENC_OUTPUT_COMPOSITE;
-		video_levels = tv_mode->composite_levels;
-		color_conversion = tv_mode->composite_color;
-		burst_ena = tv_mode->burst_ena;
-		break;
-	case DRM_MODE_CONNECTOR_Component:
-		tv_ctl |= TV_ENC_OUTPUT_COMPONENT;
-		video_levels = &component_levels;
-		if (tv_mode->burst_ena)
-			color_conversion = &sdtv_csc_yprpb;
-		else
-			color_conversion = &hdtv_csc_yprpb;
-		burst_ena = false;
-		break;
-	case DRM_MODE_CONNECTOR_SVIDEO:
-		tv_ctl |= TV_ENC_OUTPUT_SVIDEO;
-		video_levels = tv_mode->svideo_levels;
-		color_conversion = tv_mode->svideo_color;
-		burst_ena = tv_mode->burst_ena;
-		break;
-	}
 	hctl1 = (tv_mode->hsync_end << TV_HSYNC_END_SHIFT) |
 		(tv_mode->htotal << TV_HTOTAL_SHIFT);
 
@@ -1021,6 +981,86 @@
 	vctl7 = (tv_mode->vburst_start_f4 << TV_VBURST_START_F4_SHIFT) |
 		(tv_mode->vburst_end_f4 << TV_VBURST_END_F4_SHIFT);
 
+	I915_WRITE(TV_H_CTL_1, hctl1);
+	I915_WRITE(TV_H_CTL_2, hctl2);
+	I915_WRITE(TV_H_CTL_3, hctl3);
+	I915_WRITE(TV_V_CTL_1, vctl1);
+	I915_WRITE(TV_V_CTL_2, vctl2);
+	I915_WRITE(TV_V_CTL_3, vctl3);
+	I915_WRITE(TV_V_CTL_4, vctl4);
+	I915_WRITE(TV_V_CTL_5, vctl5);
+	I915_WRITE(TV_V_CTL_6, vctl6);
+	I915_WRITE(TV_V_CTL_7, vctl7);
+}
+
+static void set_color_conversion(struct drm_i915_private *dev_priv,
+				 const struct color_conversion *color_conversion)
+{
+	if (!color_conversion)
+		return;
+
+	I915_WRITE(TV_CSC_Y, (color_conversion->ry << 16) |
+		   color_conversion->gy);
+	I915_WRITE(TV_CSC_Y2, (color_conversion->by << 16) |
+		   color_conversion->ay);
+	I915_WRITE(TV_CSC_U, (color_conversion->ru << 16) |
+		   color_conversion->gu);
+	I915_WRITE(TV_CSC_U2, (color_conversion->bu << 16) |
+		   color_conversion->au);
+	I915_WRITE(TV_CSC_V, (color_conversion->rv << 16) |
+		   color_conversion->gv);
+	I915_WRITE(TV_CSC_V2, (color_conversion->bv << 16) |
+		   color_conversion->av);
+}
+
+static void intel_tv_pre_enable(struct intel_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_tv *intel_tv = enc_to_tv(encoder);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	u32 tv_ctl;
+	u32 scctl1, scctl2, scctl3;
+	int i, j;
+	const struct video_levels *video_levels;
+	const struct color_conversion *color_conversion;
+	bool burst_ena;
+	int xpos = 0x0, ypos = 0x0;
+	unsigned int xsize, ysize;
+
+	if (!tv_mode)
+		return;	/* can't happen (mode_prepare prevents this) */
+
+	tv_ctl = I915_READ(TV_CTL);
+	tv_ctl &= TV_CTL_SAVE;
+
+	switch (intel_tv->type) {
+	default:
+	case DRM_MODE_CONNECTOR_Unknown:
+	case DRM_MODE_CONNECTOR_Composite:
+		tv_ctl |= TV_ENC_OUTPUT_COMPOSITE;
+		video_levels = tv_mode->composite_levels;
+		color_conversion = tv_mode->composite_color;
+		burst_ena = tv_mode->burst_ena;
+		break;
+	case DRM_MODE_CONNECTOR_Component:
+		tv_ctl |= TV_ENC_OUTPUT_COMPONENT;
+		video_levels = &component_levels;
+		if (tv_mode->burst_ena)
+			color_conversion = &sdtv_csc_yprpb;
+		else
+			color_conversion = &hdtv_csc_yprpb;
+		burst_ena = false;
+		break;
+	case DRM_MODE_CONNECTOR_SVIDEO:
+		tv_ctl |= TV_ENC_OUTPUT_SVIDEO;
+		video_levels = tv_mode->svideo_levels;
+		color_conversion = tv_mode->svideo_color;
+		burst_ena = tv_mode->burst_ena;
+		break;
+	}
+
 	if (intel_crtc->pipe == 1)
 		tv_ctl |= TV_ENC_PIPEB_SELECT;
 	tv_ctl |= tv_mode->oversample;
@@ -1051,37 +1091,16 @@
 		tv_mode->dda3_inc << TV_SCDDA3_INC_SHIFT;
 
 	/* Enable two fixes for the chips that need them. */
-	if (dev->pdev->device < 0x2772)
+	if (IS_I915GM(dev))
 		tv_ctl |= TV_ENC_C0_FIX | TV_ENC_SDP_FIX;
 
-	I915_WRITE(TV_H_CTL_1, hctl1);
-	I915_WRITE(TV_H_CTL_2, hctl2);
-	I915_WRITE(TV_H_CTL_3, hctl3);
-	I915_WRITE(TV_V_CTL_1, vctl1);
-	I915_WRITE(TV_V_CTL_2, vctl2);
-	I915_WRITE(TV_V_CTL_3, vctl3);
-	I915_WRITE(TV_V_CTL_4, vctl4);
-	I915_WRITE(TV_V_CTL_5, vctl5);
-	I915_WRITE(TV_V_CTL_6, vctl6);
-	I915_WRITE(TV_V_CTL_7, vctl7);
+	set_tv_mode_timings(dev_priv, tv_mode, burst_ena);
+
 	I915_WRITE(TV_SC_CTL_1, scctl1);
 	I915_WRITE(TV_SC_CTL_2, scctl2);
 	I915_WRITE(TV_SC_CTL_3, scctl3);
 
-	if (color_conversion) {
-		I915_WRITE(TV_CSC_Y, (color_conversion->ry << 16) |
-			   color_conversion->gy);
-		I915_WRITE(TV_CSC_Y2, (color_conversion->by << 16) |
-			   color_conversion->ay);
-		I915_WRITE(TV_CSC_U, (color_conversion->ru << 16) |
-			   color_conversion->gu);
-		I915_WRITE(TV_CSC_U2, (color_conversion->bu << 16) |
-			   color_conversion->au);
-		I915_WRITE(TV_CSC_V, (color_conversion->rv << 16) |
-			   color_conversion->gv);
-		I915_WRITE(TV_CSC_V2, (color_conversion->bv << 16) |
-			   color_conversion->av);
-	}
+	set_color_conversion(dev_priv, color_conversion);
 
 	if (INTEL_INFO(dev)->gen >= 4)
 		I915_WRITE(TV_CLR_KNOBS, 0x00404000);
@@ -1092,46 +1111,25 @@
 		I915_WRITE(TV_CLR_LEVEL,
 			   ((video_levels->black << TV_BLACK_LEVEL_SHIFT) |
 			    (video_levels->blank << TV_BLANK_LEVEL_SHIFT)));
-	{
-		int pipeconf_reg = PIPECONF(pipe);
-		int dspcntr_reg = DSPCNTR(intel_crtc->plane);
-		int pipeconf = I915_READ(pipeconf_reg);
-		int dspcntr = I915_READ(dspcntr_reg);
-		int xpos = 0x0, ypos = 0x0;
-		unsigned int xsize, ysize;
-		/* Pipe must be off here */
-		I915_WRITE(dspcntr_reg, dspcntr & ~DISPLAY_PLANE_ENABLE);
-		intel_flush_primary_plane(dev_priv, intel_crtc->plane);
 
-		/* Wait for vblank for the disable to take effect */
-		if (IS_GEN2(dev))
-			intel_wait_for_vblank(dev, intel_crtc->pipe);
+	assert_pipe_disabled(dev_priv, intel_crtc->pipe);
 
-		I915_WRITE(pipeconf_reg, pipeconf & ~PIPECONF_ENABLE);
-		/* Wait for vblank for the disable to take effect. */
-		intel_wait_for_pipe_off(dev, intel_crtc->pipe);
+	/* Filter ctl must be set before TV_WIN_SIZE */
+	I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
+	xsize = tv_mode->hblank_start - tv_mode->hblank_end;
+	if (tv_mode->progressive)
+		ysize = tv_mode->nbr_end + 1;
+	else
+		ysize = 2*tv_mode->nbr_end + 1;
 
-		/* Filter ctl must be set before TV_WIN_SIZE */
-		I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
-		xsize = tv_mode->hblank_start - tv_mode->hblank_end;
-		if (tv_mode->progressive)
-			ysize = tv_mode->nbr_end + 1;
-		else
-			ysize = 2*tv_mode->nbr_end + 1;
-
-		xpos += intel_tv->margin[TV_MARGIN_LEFT];
-		ypos += intel_tv->margin[TV_MARGIN_TOP];
-		xsize -= (intel_tv->margin[TV_MARGIN_LEFT] +
-			  intel_tv->margin[TV_MARGIN_RIGHT]);
-		ysize -= (intel_tv->margin[TV_MARGIN_TOP] +
-			  intel_tv->margin[TV_MARGIN_BOTTOM]);
-		I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
-		I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
-
-		I915_WRITE(pipeconf_reg, pipeconf);
-		I915_WRITE(dspcntr_reg, dspcntr);
-		intel_flush_primary_plane(dev_priv, intel_crtc->plane);
-	}
+	xpos += intel_tv->margin[TV_MARGIN_LEFT];
+	ypos += intel_tv->margin[TV_MARGIN_TOP];
+	xsize -= (intel_tv->margin[TV_MARGIN_LEFT] +
+		  intel_tv->margin[TV_MARGIN_RIGHT]);
+	ysize -= (intel_tv->margin[TV_MARGIN_TOP] +
+		  intel_tv->margin[TV_MARGIN_BOTTOM]);
+	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
+	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
 
 	j = 0;
 	for (i = 0; i < 60; i++)
@@ -1316,17 +1314,18 @@
 	int type;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
-		      connector->base.id, drm_get_connector_name(connector),
+		      connector->base.id, connector->name,
 		      force);
 
 	mode = reported_modes[0];
 
 	if (force) {
 		struct intel_load_detect_pipe tmp;
+		struct drm_modeset_acquire_ctx ctx;
 
-		if (intel_get_load_detect_pipe(connector, &mode, &tmp)) {
+		if (intel_get_load_detect_pipe(connector, &mode, &tmp, &ctx)) {
 			type = intel_tv_detect_type(intel_tv, connector);
-			intel_release_load_detect_pipe(connector, &tmp);
+			intel_release_load_detect_pipe(connector, &tmp, &ctx);
 		} else
 			return connector_status_unknown;
 	} else
@@ -1634,7 +1633,7 @@
 
 	intel_encoder->compute_config = intel_tv_compute_config;
 	intel_encoder->get_config = intel_tv_get_config;
-	intel_encoder->mode_set = intel_tv_mode_set;
+	intel_encoder->pre_enable = intel_tv_pre_enable;
 	intel_encoder->enable = intel_enable_tv;
 	intel_encoder->disable = intel_disable_tv;
 	intel_encoder->get_hw_state = intel_tv_get_hw_state;

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d0c7577..79cba59 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c

@@ -255,8 +255,7 @@
 
 }
 
-void vlv_force_wake_get(struct drm_i915_private *dev_priv,
-						int fw_engine)
+static void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine)
 {
 	unsigned long irqflags;
 
@@ -275,8 +274,7 @@
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-void vlv_force_wake_put(struct drm_i915_private *dev_priv,
-						int fw_engine)
+static void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine)
 {
 	unsigned long irqflags;
 
@@ -374,7 +372,7 @@
 	if (HAS_FPGA_DBG_UNCLAIMED(dev))
 		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
 
-	if (IS_HASWELL(dev) &&
+	if ((IS_HASWELL(dev) || IS_BROADWELL(dev)) &&
 	    (__raw_i915_read32(dev_priv, HSW_EDRAM_PRESENT) == 1)) {
 		/* The docs do not explain exactly how the calculation can be
 		 * made. It is somewhat guessable, but for now, it's always
@@ -395,26 +393,8 @@
 
 void intel_uncore_sanitize(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg_val;
-
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
 	intel_disable_gt_powersave(dev);
-
-	/* Turn off power gate, require especially for the BIOS less system */
-	if (IS_VALLEYVIEW(dev)) {
-
-		mutex_lock(&dev_priv->rps.hw_lock);
-		reg_val = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS);
-
-		if (reg_val & (PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_RENDER) |
-			       PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_MEDIA) |
-			       PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_DISP2D)))
-			vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, 0x0);
-
-		mutex_unlock(&dev_priv->rps.hw_lock);
-
-	}
 }
 
 /*
@@ -488,6 +468,17 @@
 #define NEEDS_FORCE_WAKE(dev_priv, reg) \
 	 ((reg) < 0x40000 && (reg) != FORCEWAKE)
 
+#define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \
+	(((reg) >= 0x2000 && (reg) < 0x4000) ||\
+	((reg) >= 0x5000 && (reg) < 0x8000) ||\
+	((reg) >= 0xB000 && (reg) < 0x12000) ||\
+	((reg) >= 0x2E000 && (reg) < 0x30000))
+
+#define FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)\
+	(((reg) >= 0x12000 && (reg) < 0x14000) ||\
+	((reg) >= 0x22000 && (reg) < 0x24000) ||\
+	((reg) >= 0x30000 && (reg) < 0x40000))
+
 static void
 ilk_dummy_write(struct drm_i915_private *dev_priv)
 {
@@ -854,12 +845,15 @@
 	intel_uncore_forcewake_reset(dev, false);
 }
 
+#define GEN_RANGE(l, h) GENMASK(h, l)
+
 static const struct register_whitelist {
 	uint64_t offset;
 	uint32_t size;
-	uint32_t gen_bitmask; /* support gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+	/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+	uint32_t gen_bitmask;
 } whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, 0x1F0 },
+	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, GEN_RANGE(4, 8) },
 };
 
 int i915_reg_read_ioctl(struct drm_device *dev,
@@ -911,7 +905,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_reset_stats *args = data;
 	struct i915_ctx_hang_stats *hs;
-	struct i915_hw_context *ctx;
+	struct intel_context *ctx;
 	int ret;
 
 	if (args->flags || args->pad)
@@ -955,6 +949,9 @@
 {
 	int ret;
 
+	/* FIXME: i965g/gm need a display save/restore for gpu reset. */
+	return -ENODEV;
+
 	/*
 	 * Set the domains we want to reset (GRDOM/bits 2 and 3) as
 	 * well as the reset bit (GR/bit 0).  Setting the GR bit
@@ -966,7 +963,6 @@
 	if (ret)
 		return ret;
 
-	/* We can't reset render&media without also resetting display ... */
 	pci_write_config_byte(dev->pdev, I965_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
 
@@ -979,26 +975,58 @@
 	return 0;
 }
 
-static int ironlake_do_reset(struct drm_device *dev)
+static int g4x_do_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 gdrst;
 	int ret;
 
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret = wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
+	pci_write_config_byte(dev->pdev, I965_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(i965_reset_complete(dev), 500);
 	if (ret)
 		return ret;
 
-	/* We can't reset render&media without also resetting display ... */
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
+	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
+	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	pci_write_config_byte(dev->pdev, I965_GDRST,
+			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+	ret =  wait_for(i965_reset_complete(dev), 500);
+	if (ret)
+		return ret;
+
+	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
+	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	pci_write_config_byte(dev->pdev, I965_GDRST, 0);
+
+	return 0;
+}
+
+static int ironlake_do_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
 	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	return wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
+		   ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = wait_for((I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) &
+			ILK_GRDOM_RESET_ENABLE) == 0, 500);
+	if (ret)
+		return ret;
+
+	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
+		   ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = wait_for((I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) &
+			ILK_GRDOM_RESET_ENABLE) == 0, 500);
+	if (ret)
+		return ret;
+
+	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR, 0);
+
+	return 0;
 }
 
 static int gen6_do_reset(struct drm_device *dev)
@@ -1029,7 +1057,11 @@
 	case 7:
 	case 6: return gen6_do_reset(dev);
 	case 5: return ironlake_do_reset(dev);
-	case 4: return i965_do_reset(dev);
+	case 4:
+		if (IS_G4X(dev))
+			return g4x_do_reset(dev);
+		else
+			return i965_do_reset(dev);
 	default: return -ENODEV;
 	}
 }

diff --git a/drivers/gpu/drm/mga/mga_ioc32.c b/drivers/gpu/drm/mga/mga_ioc32.c
index 86b4bb8..729bfd5 100644
--- a/drivers/gpu/drm/mga/mga_ioc32.c
+++ b/drivers/gpu/drm/mga/mga_ioc32.c

@@ -214,7 +214,7 @@
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(mga_compat_ioctls))
 		fn = mga_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)

diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index 314685b..792f924 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c

@@ -1020,7 +1020,7 @@
 
 	switch (param->param) {
 	case MGA_PARAM_IRQ_NR:
-		value = drm_dev_to_irq(dev);
+		value = dev->pdev->irq;
 		break;
 	case MGA_PARAM_CARD_TYPE:
 		value = dev_priv->chipset;
@@ -1099,4 +1099,4 @@
 	DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 };
 
-int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls);
+int mga_max_ioctl = ARRAY_SIZE(mga_ioctls);

diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 26868e5..f6b283b 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c

@@ -322,17 +322,13 @@
 
 	tbo = &((*bo)->bo);
 	ttm_bo_unref(&tbo);
-	if (tbo == NULL)
-		*bo = NULL;
-
+	*bo = NULL;
 }
 
 void mgag200_gem_free_object(struct drm_gem_object *obj)
 {
 	struct mgag200_bo *mgag200_bo = gem_to_mga_bo(obj);
 
-	if (!mgag200_bo)
-		return;
 	mgag200_bo_unref(&mgag200_bo);
 }
 

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index b698497..f123889 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig

@@ -3,7 +3,7 @@
 	tristate "MSM DRM"
 	depends on DRM
 	depends on MSM_IOMMU
-	depends on ARCH_MSM8960 || (ARM && COMPILE_TEST)
+	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
 	select DRM_KMS_HELPER
 	select SHMEM
 	select TMPFS

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 5e1e6b0..93ca49c 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile

@@ -34,6 +34,8 @@
 	msm_gem_submit.o \
 	msm_gpu.o \
 	msm_iommu.o \
+	msm_perf.o \
+	msm_rd.o \
 	msm_ringbuffer.o
 
 msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o

diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index f20fbde..942e09d 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c

@@ -207,11 +207,11 @@
 	/* Turn on performance counters: */
 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
 
-	/* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS
-	 * we will use this to augment our hang detection:
-	 */
-	gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT,
-			SP_FS_FULL_ALU_INSTRUCTIONS);
+	/* Enable the perfcntrs that we use.. */
+	for (i = 0; i < gpu->num_perfcntrs; i++) {
+		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
+		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
+	}
 
 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
 
@@ -465,6 +465,13 @@
 	},
 };
 
+static const struct msm_gpu_perfcntr perfcntrs[] = {
+	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
+			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
+	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
+			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
+};
+
 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 {
 	struct a3xx_gpu *a3xx_gpu = NULL;
@@ -504,6 +511,9 @@
 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
 
+	gpu->perfcntrs = perfcntrs;
+	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
+
 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
 	if (ret)
 		goto fail;

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index 7dedfdd..e56a619 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c

@@ -247,36 +247,49 @@
 	}
 }
 
+static enum drm_connector_status detect_reg(struct hdmi *hdmi)
+{
+	uint32_t hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+	return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ?
+			connector_status_connected : connector_status_disconnected;
+}
+
+static enum drm_connector_status detect_gpio(struct hdmi *hdmi)
+{
+	const struct hdmi_platform_config *config = hdmi->config;
+	return gpio_get_value(config->hpd_gpio) ?
+			connector_status_connected :
+			connector_status_disconnected;
+}
+
 static enum drm_connector_status hdmi_connector_detect(
 		struct drm_connector *connector, bool force)
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
-	const struct hdmi_platform_config *config = hdmi->config;
-	uint32_t hpd_int_status;
+	enum drm_connector_status stat_gpio, stat_reg;
 	int retry = 20;
 
-	hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+	do {
+		stat_gpio = detect_gpio(hdmi);
+		stat_reg  = detect_reg(hdmi);
 
-	/* sense seems to in some cases be momentarily de-asserted, don't
-	 * let that trick us into thinking the monitor is gone:
-	 */
-	while (retry-- && !(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED)) {
-		/* hdmi debounce logic seems to get stuck sometimes,
-		 * read directly the gpio to get a second opinion:
-		 */
-		if (gpio_get_value(config->hpd_gpio)) {
-			DBG("gpio tells us we are connected!");
-			hpd_int_status |= HDMI_HPD_INT_STATUS_CABLE_DETECTED;
+		if (stat_gpio == stat_reg)
 			break;
-		}
+
 		mdelay(10);
-		hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
-		DBG("status=%08x", hpd_int_status);
+	} while (--retry);
+
+	/* the status we get from reading gpio seems to be more reliable,
+	 * so trust that one the most if we didn't manage to get hdmi and
+	 * gpio status to agree:
+	 */
+	if (stat_gpio != stat_reg) {
+		DBG("HDMI_HPD_INT_STATUS tells us: %d", stat_reg);
+		DBG("hpd gpio tells us: %d", stat_gpio);
 	}
 
-	return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ?
-			connector_status_connected : connector_status_disconnected;
+	return stat_gpio;
 }
 
 static void hdmi_connector_destroy(struct drm_connector *connector)
@@ -389,7 +402,8 @@
 			DRM_MODE_CONNECTOR_HDMIA);
 	drm_connector_helper_add(connector, &hdmi_connector_helper_funcs);
 
-	connector->polled = DRM_CONNECTOR_POLL_HPD;
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			DRM_CONNECTOR_POLL_DISCONNECT;
 
 	connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;

diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index ef9957d..74cebb5 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c

@@ -217,8 +217,6 @@
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 
-	mdp4_crtc->plane->funcs->destroy(mdp4_crtc->plane);
-
 	drm_crtc_cleanup(crtc);
 	drm_flip_work_cleanup(&mdp4_crtc->unref_fb_work);
 	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 6ea10bd..ebe2e60 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c

@@ -195,8 +195,6 @@
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
 
-	mdp5_crtc->plane->funcs->destroy(mdp5_crtc->plane);
-
 	drm_crtc_cleanup(crtc);
 	drm_flip_work_cleanup(&mdp5_crtc->unref_fb_work);
 

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index ee8446c..42caf7f 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c

@@ -280,12 +280,22 @@
 		goto fail;
 	}
 
-	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk") ||
-			get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk") ||
-			get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src") ||
-			get_clk(pdev, &mdp5_kms->core_clk, "core_clk") ||
-			get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk") ||
-			get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk");
+	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk");
+	if (ret)
+		goto fail;
+	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk");
+	if (ret)
+		goto fail;
+	ret = get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src");
+	if (ret)
+		goto fail;
+	ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk");
+	if (ret)
+		goto fail;
+	ret = get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk");
+	if (ret)
+		goto fail;
+	ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk");
 	if (ret)
 		goto fail;
 

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index 47f7bbb..f3daec4 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c

@@ -85,8 +85,11 @@
 static void mdp5_plane_destroy(struct drm_plane *plane)
 {
 	struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane);
+	struct msm_drm_private *priv = plane->dev->dev_private;
 
-	mdp5_plane_disable(plane);
+	if (priv->kms)
+		mdp5_plane_disable(plane);
+
 	drm_plane_cleanup(plane);
 
 	kfree(mdp5_plane);

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f9de156..0d2562f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c

@@ -220,7 +220,7 @@
 		 * is bogus, but non-null if allocation succeeded:
 		 */
 		p = dma_alloc_attrs(dev->dev, size,
-				&priv->vram.paddr, 0, &attrs);
+				&priv->vram.paddr, GFP_KERNEL, &attrs);
 		if (!p) {
 			dev_err(dev->dev, "failed to allocate VRAM\n");
 			priv->vram.paddr = 0;
@@ -288,7 +288,7 @@
 	}
 
 	pm_runtime_get_sync(dev->dev);
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	pm_runtime_put_sync(dev->dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to install IRQ handler\n");
@@ -299,6 +299,10 @@
 	priv->fbdev = msm_fbdev_init(dev);
 #endif
 
+	ret = msm_debugfs_late_init(dev);
+	if (ret)
+		goto fail;
+
 	drm_kms_helper_poll_init(dev);
 
 	return 0;
@@ -382,11 +386,8 @@
 static void msm_lastclose(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	if (priv->fbdev) {
-		drm_modeset_lock_all(dev);
-		drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-		drm_modeset_unlock_all(dev);
-	}
+	if (priv->fbdev)
+		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 }
 
 static irqreturn_t msm_irq(int irq, void *arg)
@@ -531,6 +532,41 @@
 		{ "fb", show_locked, 0, msm_fb_show },
 };
 
+static int late_init_minor(struct drm_minor *minor)
+{
+	int ret;
+
+	if (!minor)
+		return 0;
+
+	ret = msm_rd_debugfs_init(minor);
+	if (ret) {
+		dev_err(minor->dev->dev, "could not install rd debugfs\n");
+		return ret;
+	}
+
+	ret = msm_perf_debugfs_init(minor);
+	if (ret) {
+		dev_err(minor->dev->dev, "could not install perf debugfs\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int msm_debugfs_late_init(struct drm_device *dev)
+{
+	int ret;
+	ret = late_init_minor(dev->primary);
+	if (ret)
+		return ret;
+	ret = late_init_minor(dev->render);
+	if (ret)
+		return ret;
+	ret = late_init_minor(dev->control);
+	return ret;
+}
+
 static int msm_debugfs_init(struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
@@ -545,13 +581,17 @@
 		return ret;
 	}
 
-	return ret;
+	return 0;
 }
 
 static void msm_debugfs_cleanup(struct drm_minor *minor)
 {
 	drm_debugfs_remove_files(msm_debugfs_list,
 			ARRAY_SIZE(msm_debugfs_list), minor);
+	if (!minor->dev->dev_private)
+		return;
+	msm_rd_debugfs_cleanup(minor);
+	msm_perf_debugfs_cleanup(minor);
 }
 #endif
 

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 9d10ee0..8a2c5fd 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h

@@ -33,7 +33,7 @@
 #include <asm/sizes.h>
 
 
-#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_ARCH_MSM)
+#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_ARCH_QCOM)
 /* stubs we need for compile-test: */
 static inline struct device *msm_iommu_get_ctx(const char *ctx_name)
 {
@@ -55,6 +55,9 @@
 struct msm_kms;
 struct msm_gpu;
 struct msm_mmu;
+struct msm_rd_state;
+struct msm_perf_state;
+struct msm_gem_submit;
 
 #define NUM_DOMAINS 2    /* one for KMS, then one per gpu core (?) */
 
@@ -82,6 +85,9 @@
 	uint32_t next_fence, completed_fence;
 	wait_queue_head_t fence_event;
 
+	struct msm_rd_state *rd;
+	struct msm_perf_state *perf;
+
 	/* list of GEM objects: */
 	struct list_head inactive_list;
 
@@ -204,6 +210,15 @@
 void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
 void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
+int msm_debugfs_late_init(struct drm_device *dev);
+int msm_rd_debugfs_init(struct drm_minor *minor);
+void msm_rd_debugfs_cleanup(struct drm_minor *minor);
+void msm_rd_dump_submit(struct msm_gem_submit *submit);
+int msm_perf_debugfs_init(struct drm_minor *minor);
+void msm_perf_debugfs_cleanup(struct drm_minor *minor);
+#else
+static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; }
+static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {}
 #endif
 
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 3246bb4..bfb0526 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h

@@ -90,6 +90,7 @@
 		uint32_t type;
 		uint32_t size;  /* in dwords */
 		uint32_t iova;
+		uint32_t idx;   /* cmdstream buffer idx in bos[] */
 	} cmd[MAX_CMDS];
 	struct {
 		uint32_t flags;

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 1f1f4cf..cd0554f 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c

@@ -402,6 +402,7 @@
 		submit->cmd[i].type = submit_cmd.type;
 		submit->cmd[i].size = submit_cmd.size / 4;
 		submit->cmd[i].iova = iova + submit_cmd.submit_offset;
+		submit->cmd[i].idx  = submit_cmd.submit_idx;
 
 		if (submit->valid)
 			continue;

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 3e667ca..c632219 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c

@@ -320,6 +320,101 @@
 }
 
 /*
+ * Performance Counters:
+ */
+
+/* called under perf_lock */
+static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
+{
+	uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
+	int i, n = min(ncntrs, gpu->num_perfcntrs);
+
+	/* read current values: */
+	for (i = 0; i < gpu->num_perfcntrs; i++)
+		current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);
+
+	/* update cntrs: */
+	for (i = 0; i < n; i++)
+		cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];
+
+	/* save current values: */
+	for (i = 0; i < gpu->num_perfcntrs; i++)
+		gpu->last_cntrs[i] = current_cntrs[i];
+
+	return n;
+}
+
+static void update_sw_cntrs(struct msm_gpu *gpu)
+{
+	ktime_t time;
+	uint32_t elapsed;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpu->perf_lock, flags);
+	if (!gpu->perfcntr_active)
+		goto out;
+
+	time = ktime_get();
+	elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));
+
+	gpu->totaltime += elapsed;
+	if (gpu->last_sample.active)
+		gpu->activetime += elapsed;
+
+	gpu->last_sample.active = msm_gpu_active(gpu);
+	gpu->last_sample.time = time;
+
+out:
+	spin_unlock_irqrestore(&gpu->perf_lock, flags);
+}
+
+void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpu->perf_lock, flags);
+	/* we could dynamically enable/disable perfcntr registers too.. */
+	gpu->last_sample.active = msm_gpu_active(gpu);
+	gpu->last_sample.time = ktime_get();
+	gpu->activetime = gpu->totaltime = 0;
+	gpu->perfcntr_active = true;
+	update_hw_cntrs(gpu, 0, NULL);
+	spin_unlock_irqrestore(&gpu->perf_lock, flags);
+}
+
+void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
+{
+	gpu->perfcntr_active = false;
+}
+
+/* returns -errno or # of cntrs sampled */
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
+		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&gpu->perf_lock, flags);
+
+	if (!gpu->perfcntr_active) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*activetime = gpu->activetime;
+	*totaltime = gpu->totaltime;
+
+	gpu->activetime = gpu->totaltime = 0;
+
+	ret = update_hw_cntrs(gpu, ncntrs, cntrs);
+
+out:
+	spin_unlock_irqrestore(&gpu->perf_lock, flags);
+
+	return ret;
+}
+
+/*
  * Cmdstream submission/retirement:
  */
 
@@ -361,6 +456,7 @@
 {
 	struct msm_drm_private *priv = gpu->dev->dev_private;
 	queue_work(priv->wq, &gpu->retire_work);
+	update_sw_cntrs(gpu);
 }
 
 /* add bo's to gpu's ring, and kick gpu: */
@@ -377,6 +473,12 @@
 
 	inactive_cancel(gpu);
 
+	msm_rd_dump_submit(submit);
+
+	gpu->submitted_fence = submit->fence;
+
+	update_sw_cntrs(gpu);
+
 	ret = gpu->funcs->submit(gpu, submit, ctx);
 	priv->lastctx = ctx;
 
@@ -429,6 +531,9 @@
 	struct iommu_domain *iommu;
 	int i, ret;
 
+	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
+		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
+
 	gpu->dev = drm;
 	gpu->funcs = funcs;
 	gpu->name = name;
@@ -444,6 +549,8 @@
 	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
 			(unsigned long)gpu);
 
+	spin_lock_init(&gpu->perf_lock);
+
 	BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
 
 	/* Map registers: */

diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index fad2700..9b579b7 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h

@@ -25,6 +25,7 @@
 #include "msm_ringbuffer.h"
 
 struct msm_gem_submit;
+struct msm_gpu_perfcntr;
 
 /* So far, with hardware that I've seen to date, we can have:
  *  + zero, one, or two z180 2d cores
@@ -64,6 +65,18 @@
 	struct drm_device *dev;
 	const struct msm_gpu_funcs *funcs;
 
+	/* performance counters (hw & sw): */
+	spinlock_t perf_lock;
+	bool perfcntr_active;
+	struct {
+		bool active;
+		ktime_t time;
+	} last_sample;
+	uint32_t totaltime, activetime;    /* sw counters */
+	uint32_t last_cntrs[5];            /* hw counters */
+	const struct msm_gpu_perfcntr *perfcntrs;
+	uint32_t num_perfcntrs;
+
 	struct msm_ringbuffer *rb;
 	uint32_t rb_iova;
 
@@ -113,6 +126,19 @@
 	return gpu->submitted_fence > gpu->funcs->last_fence(gpu);
 }
 
+/* Perf-Counters:
+ * The select_reg and select_val are just there for the benefit of the child
+ * class that actually enables the perf counter..  but msm_gpu base class
+ * will handle sampling/displaying the counters.
+ */
+
+struct msm_gpu_perfcntr {
+	uint32_t select_reg;
+	uint32_t sample_reg;
+	uint32_t select_val;
+	const char *name;
+};
+
 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
 {
 	msm_writel(data, gpu->mmio + (reg << 2));
@@ -126,6 +152,11 @@
 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
 int msm_gpu_pm_resume(struct msm_gpu *gpu);
 
+void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
+void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
+		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
+
 void msm_gpu_retire(struct msm_gpu *gpu);
 int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 		struct msm_file_private *ctx);

diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
new file mode 100644
index 0000000..830857c
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_perf.c

@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* For profiling, userspace can:
+ *
+ *   tail -f /sys/kernel/debug/dri/<minor>/gpu
+ *
+ * This will enable performance counters/profiling to track the busy time
+ * and any gpu specific performance counters that are supported.
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+
+#include "msm_drv.h"
+#include "msm_gpu.h"
+
+struct msm_perf_state {
+	struct drm_device *dev;
+
+	bool open;
+	int cnt;
+	struct mutex read_lock;
+
+	char buf[256];
+	int buftot, bufpos;
+
+	unsigned long next_jiffies;
+
+	struct dentry *ent;
+	struct drm_info_node *node;
+};
+
+#define SAMPLE_TIME (HZ/4)
+
+/* wait for next sample time: */
+static int wait_sample(struct msm_perf_state *perf)
+{
+	unsigned long start_jiffies = jiffies;
+
+	if (time_after(perf->next_jiffies, start_jiffies)) {
+		unsigned long remaining_jiffies =
+			perf->next_jiffies - start_jiffies;
+		int ret = schedule_timeout_interruptible(remaining_jiffies);
+		if (ret > 0) {
+			/* interrupted */
+			return -ERESTARTSYS;
+		}
+	}
+	perf->next_jiffies += SAMPLE_TIME;
+	return 0;
+}
+
+static int refill_buf(struct msm_perf_state *perf)
+{
+	struct msm_drm_private *priv = perf->dev->dev_private;
+	struct msm_gpu *gpu = priv->gpu;
+	char *ptr = perf->buf;
+	int rem = sizeof(perf->buf);
+	int i, n;
+
+	if ((perf->cnt++ % 32) == 0) {
+		/* Header line: */
+		n = snprintf(ptr, rem, "%%BUSY");
+		ptr += n;
+		rem -= n;
+
+		for (i = 0; i < gpu->num_perfcntrs; i++) {
+			const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
+			n = snprintf(ptr, rem, "\t%s", perfcntr->name);
+			ptr += n;
+			rem -= n;
+		}
+	} else {
+		/* Sample line: */
+		uint32_t activetime = 0, totaltime = 0;
+		uint32_t cntrs[5];
+		uint32_t val;
+		int ret;
+
+		/* sleep until next sample time: */
+		ret = wait_sample(perf);
+		if (ret)
+			return ret;
+
+		ret = msm_gpu_perfcntr_sample(gpu, &activetime, &totaltime,
+				ARRAY_SIZE(cntrs), cntrs);
+		if (ret < 0)
+			return ret;
+
+		val = totaltime ? 1000 * activetime / totaltime : 0;
+		n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10);
+		ptr += n;
+		rem -= n;
+
+		for (i = 0; i < ret; i++) {
+			/* cycle counters (I think).. convert to MHz.. */
+			val = cntrs[i] / 10000;
+			n = snprintf(ptr, rem, "\t%5d.%02d",
+					val / 100, val % 100);
+			ptr += n;
+			rem -= n;
+		}
+	}
+
+	n = snprintf(ptr, rem, "\n");
+	ptr += n;
+	rem -= n;
+
+	perf->bufpos = 0;
+	perf->buftot = ptr - perf->buf;
+
+	return 0;
+}
+
+static ssize_t perf_read(struct file *file, char __user *buf,
+		size_t sz, loff_t *ppos)
+{
+	struct msm_perf_state *perf = file->private_data;
+	int n = 0, ret;
+
+	mutex_lock(&perf->read_lock);
+
+	if (perf->bufpos >= perf->buftot) {
+		ret = refill_buf(perf);
+		if (ret)
+			goto out;
+	}
+
+	n = min((int)sz, perf->buftot - perf->bufpos);
+	ret = copy_to_user(buf, &perf->buf[perf->bufpos], n);
+	if (ret)
+		goto out;
+
+	perf->bufpos += n;
+	*ppos += n;
+
+out:
+	mutex_unlock(&perf->read_lock);
+	if (ret)
+		return ret;
+	return n;
+}
+
+static int perf_open(struct inode *inode, struct file *file)
+{
+	struct msm_perf_state *perf = inode->i_private;
+	struct drm_device *dev = perf->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gpu *gpu = priv->gpu;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (perf->open || !gpu) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	file->private_data = perf;
+	perf->open = true;
+	perf->cnt = 0;
+	perf->buftot = 0;
+	perf->bufpos = 0;
+	msm_gpu_perfcntr_start(gpu);
+	perf->next_jiffies = jiffies + SAMPLE_TIME;
+
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct msm_perf_state *perf = inode->i_private;
+	struct msm_drm_private *priv = perf->dev->dev_private;
+	msm_gpu_perfcntr_stop(priv->gpu);
+	perf->open = false;
+	return 0;
+}
+
+
+static const struct file_operations perf_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = perf_open,
+	.read = perf_read,
+	.llseek = no_llseek,
+	.release = perf_release,
+};
+
+int msm_perf_debugfs_init(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_perf_state *perf;
+
+	/* only create on first minor: */
+	if (priv->perf)
+		return 0;
+
+	perf = kzalloc(sizeof(*perf), GFP_KERNEL);
+	if (!perf)
+		return -ENOMEM;
+
+	perf->dev = minor->dev;
+
+	mutex_init(&perf->read_lock);
+	priv->perf = perf;
+
+	perf->node = kzalloc(sizeof(*perf->node), GFP_KERNEL);
+	if (!perf->node)
+		goto fail;
+
+	perf->ent = debugfs_create_file("perf", S_IFREG | S_IRUGO,
+			minor->debugfs_root, perf, &perf_debugfs_fops);
+	if (!perf->ent) {
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/perf\n",
+				minor->debugfs_root->d_name.name);
+		goto fail;
+	}
+
+	perf->node->minor = minor;
+	perf->node->dent  = perf->ent;
+	perf->node->info_ent = NULL;
+
+	mutex_lock(&minor->debugfs_lock);
+	list_add(&perf->node->list, &minor->debugfs_list);
+	mutex_unlock(&minor->debugfs_lock);
+
+	return 0;
+
+fail:
+	msm_perf_debugfs_cleanup(minor);
+	return -1;
+}
+
+void msm_perf_debugfs_cleanup(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_perf_state *perf = priv->perf;
+
+	if (!perf)
+		return;
+
+	priv->perf = NULL;
+
+	debugfs_remove(perf->ent);
+
+	if (perf->node) {
+		mutex_lock(&minor->debugfs_lock);
+		list_del(&perf->node->list);
+		mutex_unlock(&minor->debugfs_lock);
+		kfree(perf->node);
+	}
+
+	mutex_destroy(&perf->read_lock);
+
+	kfree(perf);
+}
+
+#endif

diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
new file mode 100644
index 0000000..9a78c48
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_rd.c

@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* For debugging crashes, userspace can:
+ *
+ *   tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd
+ *
+ * To log the cmdstream in a format that is understood by freedreno/cffdump
+ * utility.  By comparing the last successfully completed fence #, to the
+ * cmdstream for the next fence, you can narrow down which process and submit
+ * caused the gpu crash/lockup.
+ *
+ * This bypasses drm_debugfs_create_files() mainly because we need to use
+ * our own fops for a bit more control.  In particular, we don't want to
+ * do anything if userspace doesn't have the debugfs file open.
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/kfifo.h>
+#include <linux/debugfs.h>
+#include <linux/circ_buf.h>
+#include <linux/wait.h>
+
+#include "msm_drv.h"
+#include "msm_gpu.h"
+#include "msm_gem.h"
+
+enum rd_sect_type {
+	RD_NONE,
+	RD_TEST,       /* ascii text */
+	RD_CMD,        /* ascii text */
+	RD_GPUADDR,    /* u32 gpuaddr, u32 size */
+	RD_CONTEXT,    /* raw dump */
+	RD_CMDSTREAM,  /* raw dump */
+	RD_CMDSTREAM_ADDR, /* gpu addr of cmdstream */
+	RD_PARAM,      /* u32 param_type, u32 param_val, u32 bitlen */
+	RD_FLUSH,      /* empty, clear previous params */
+	RD_PROGRAM,    /* shader program, raw dump */
+	RD_VERT_SHADER,
+	RD_FRAG_SHADER,
+	RD_BUFFER_CONTENTS,
+	RD_GPU_ID,
+};
+
+#define BUF_SZ 512  /* should be power of 2 */
+
+/* space used: */
+#define circ_count(circ) \
+	(CIRC_CNT((circ)->head, (circ)->tail, BUF_SZ))
+#define circ_count_to_end(circ) \
+	(CIRC_CNT_TO_END((circ)->head, (circ)->tail, BUF_SZ))
+/* space available: */
+#define circ_space(circ) \
+	(CIRC_SPACE((circ)->head, (circ)->tail, BUF_SZ))
+#define circ_space_to_end(circ) \
+	(CIRC_SPACE_TO_END((circ)->head, (circ)->tail, BUF_SZ))
+
+struct msm_rd_state {
+	struct drm_device *dev;
+
+	bool open;
+
+	struct dentry *ent;
+	struct drm_info_node *node;
+
+	/* current submit to read out: */
+	struct msm_gem_submit *submit;
+
+	/* fifo access is synchronized on the producer side by
+	 * struct_mutex held by submit code (otherwise we could
+	 * end up w/ cmds logged in different order than they
+	 * were executed).  And read_lock synchronizes the reads
+	 */
+	struct mutex read_lock;
+
+	wait_queue_head_t fifo_event;
+	struct circ_buf fifo;
+
+	char buf[BUF_SZ];
+};
+
+static void rd_write(struct msm_rd_state *rd, const void *buf, int sz)
+{
+	struct circ_buf *fifo = &rd->fifo;
+	const char *ptr = buf;
+
+	while (sz > 0) {
+		char *fptr = &fifo->buf[fifo->head];
+		int n;
+
+		wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0);
+
+		n = min(sz, circ_space_to_end(&rd->fifo));
+		memcpy(fptr, ptr, n);
+
+		fifo->head = (fifo->head + n) & (BUF_SZ - 1);
+		sz  -= n;
+		ptr += n;
+
+		wake_up_all(&rd->fifo_event);
+	}
+}
+
+static void rd_write_section(struct msm_rd_state *rd,
+		enum rd_sect_type type, const void *buf, int sz)
+{
+	rd_write(rd, &type, 4);
+	rd_write(rd, &sz, 4);
+	rd_write(rd, buf, sz);
+}
+
+static ssize_t rd_read(struct file *file, char __user *buf,
+		size_t sz, loff_t *ppos)
+{
+	struct msm_rd_state *rd = file->private_data;
+	struct circ_buf *fifo = &rd->fifo;
+	const char *fptr = &fifo->buf[fifo->tail];
+	int n = 0, ret = 0;
+
+	mutex_lock(&rd->read_lock);
+
+	ret = wait_event_interruptible(rd->fifo_event,
+			circ_count(&rd->fifo) > 0);
+	if (ret)
+		goto out;
+
+	n = min_t(int, sz, circ_count_to_end(&rd->fifo));
+	ret = copy_to_user(buf, fptr, n);
+	if (ret)
+		goto out;
+
+	fifo->tail = (fifo->tail + n) & (BUF_SZ - 1);
+	*ppos += n;
+
+	wake_up_all(&rd->fifo_event);
+
+out:
+	mutex_unlock(&rd->read_lock);
+	if (ret)
+		return ret;
+	return n;
+}
+
+static int rd_open(struct inode *inode, struct file *file)
+{
+	struct msm_rd_state *rd = inode->i_private;
+	struct drm_device *dev = rd->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gpu *gpu = priv->gpu;
+	uint64_t val;
+	uint32_t gpu_id;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (rd->open || !gpu) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	file->private_data = rd;
+	rd->open = true;
+
+	/* the parsing tools need to know gpu-id to know which
+	 * register database to load.
+	 */
+	gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val);
+	gpu_id = val;
+
+	rd_write_section(rd, RD_GPU_ID, &gpu_id, sizeof(gpu_id));
+
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static int rd_release(struct inode *inode, struct file *file)
+{
+	struct msm_rd_state *rd = inode->i_private;
+	rd->open = false;
+	return 0;
+}
+
+
+static const struct file_operations rd_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = rd_open,
+	.read = rd_read,
+	.llseek = no_llseek,
+	.release = rd_release,
+};
+
+int msm_rd_debugfs_init(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_rd_state *rd;
+
+	/* only create on first minor: */
+	if (priv->rd)
+		return 0;
+
+	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	rd->dev = minor->dev;
+	rd->fifo.buf = rd->buf;
+
+	mutex_init(&rd->read_lock);
+	priv->rd = rd;
+
+	init_waitqueue_head(&rd->fifo_event);
+
+	rd->node = kzalloc(sizeof(*rd->node), GFP_KERNEL);
+	if (!rd->node)
+		goto fail;
+
+	rd->ent = debugfs_create_file("rd", S_IFREG | S_IRUGO,
+			minor->debugfs_root, rd, &rd_debugfs_fops);
+	if (!rd->ent) {
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/rd\n",
+				minor->debugfs_root->d_name.name);
+		goto fail;
+	}
+
+	rd->node->minor = minor;
+	rd->node->dent  = rd->ent;
+	rd->node->info_ent = NULL;
+
+	mutex_lock(&minor->debugfs_lock);
+	list_add(&rd->node->list, &minor->debugfs_list);
+	mutex_unlock(&minor->debugfs_lock);
+
+	return 0;
+
+fail:
+	msm_rd_debugfs_cleanup(minor);
+	return -1;
+}
+
+void msm_rd_debugfs_cleanup(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_rd_state *rd = priv->rd;
+
+	if (!rd)
+		return;
+
+	priv->rd = NULL;
+
+	debugfs_remove(rd->ent);
+
+	if (rd->node) {
+		mutex_lock(&minor->debugfs_lock);
+		list_del(&rd->node->list);
+		mutex_unlock(&minor->debugfs_lock);
+		kfree(rd->node);
+	}
+
+	mutex_destroy(&rd->read_lock);
+
+	kfree(rd);
+}
+
+/* called under struct_mutex */
+void msm_rd_dump_submit(struct msm_gem_submit *submit)
+{
+	struct drm_device *dev = submit->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_rd_state *rd = priv->rd;
+	char msg[128];
+	int i, n;
+
+	if (!rd->open)
+		return;
+
+	/* writing into fifo is serialized by caller, and
+	 * rd->read_lock is used to serialize the reads
+	 */
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
+			TASK_COMM_LEN, current->comm, task_pid_nr(current),
+			submit->fence);
+
+	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
+
+	/* could be nice to have an option (module-param?) to snapshot
+	 * all the bo's associated with the submit.  Handy to see vtx
+	 * buffers, etc.  For now just the cmdstream bo's is enough.
+	 */
+
+	for (i = 0; i < submit->nr_cmds; i++) {
+		uint32_t idx  = submit->cmd[i].idx;
+		uint32_t iova = submit->cmd[i].iova;
+		uint32_t szd  = submit->cmd[i].size; /* in dwords */
+		struct msm_gem_object *obj = submit->bos[idx].obj;
+		const char *buf = msm_gem_vaddr_locked(&obj->base);
+
+		buf += iova - submit->bos[idx].iova;
+
+		rd_write_section(rd, RD_GPUADDR,
+				(uint32_t[2]){ iova, szd * 4 }, 8);
+		rd_write_section(rd, RD_BUFFER_CONTENTS,
+				buf, szd * 4);
+
+		switch (submit->cmd[i].type) {
+		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
+			/* ignore IB-targets, we've logged the buffer, the
+			 * parser tool will follow the IB based on the logged
+			 * buffer/gpuaddr, so nothing more to do.
+			 */
+			break;
+		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
+		case MSM_SUBMIT_CMD_BUF:
+			rd_write_section(rd, RD_CMDSTREAM_ADDR,
+					(uint32_t[2]){ iova, szd }, 8);
+			break;
+		}
+	}
+}
+#endif

diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index b7d2162..2b6156d 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile

@@ -102,6 +102,7 @@
 nouveau-y += core/subdev/fb/nvaf.o
 nouveau-y += core/subdev/fb/nvc0.o
 nouveau-y += core/subdev/fb/nve0.o
+nouveau-y += core/subdev/fb/gk20a.o
 nouveau-y += core/subdev/fb/gm107.o
 nouveau-y += core/subdev/fb/ramnv04.o
 nouveau-y += core/subdev/fb/ramnv10.o
@@ -117,25 +118,32 @@
 nouveau-y += core/subdev/fb/ramnvaa.o
 nouveau-y += core/subdev/fb/ramnvc0.o
 nouveau-y += core/subdev/fb/ramnve0.o
+nouveau-y += core/subdev/fb/ramgk20a.o
 nouveau-y += core/subdev/fb/ramgm107.o
 nouveau-y += core/subdev/fb/sddr3.o
 nouveau-y += core/subdev/fb/gddr5.o
 nouveau-y += core/subdev/gpio/base.o
 nouveau-y += core/subdev/gpio/nv10.o
 nouveau-y += core/subdev/gpio/nv50.o
+nouveau-y += core/subdev/gpio/nv92.o
 nouveau-y += core/subdev/gpio/nvd0.o
 nouveau-y += core/subdev/gpio/nve0.o
 nouveau-y += core/subdev/i2c/base.o
 nouveau-y += core/subdev/i2c/anx9805.o
 nouveau-y += core/subdev/i2c/aux.o
 nouveau-y += core/subdev/i2c/bit.o
+nouveau-y += core/subdev/i2c/pad.o
+nouveau-y += core/subdev/i2c/padnv04.o
+nouveau-y += core/subdev/i2c/padnv94.o
 nouveau-y += core/subdev/i2c/nv04.o
 nouveau-y += core/subdev/i2c/nv4e.o
 nouveau-y += core/subdev/i2c/nv50.o
 nouveau-y += core/subdev/i2c/nv94.o
 nouveau-y += core/subdev/i2c/nvd0.o
+nouveau-y += core/subdev/i2c/nve0.o
 nouveau-y += core/subdev/ibus/nvc0.o
 nouveau-y += core/subdev/ibus/nve0.o
+nouveau-y += core/subdev/ibus/gk20a.o
 nouveau-y += core/subdev/instmem/base.o
 nouveau-y += core/subdev/instmem/nv04.o
 nouveau-y += core/subdev/instmem/nv40.o
@@ -214,6 +222,9 @@
 nouveau-y += core/engine/device/nve0.o
 nouveau-y += core/engine/device/gm100.o
 nouveau-y += core/engine/disp/base.o
+nouveau-y += core/engine/disp/conn.o
+nouveau-y += core/engine/disp/outp.o
+nouveau-y += core/engine/disp/outpdp.o
 nouveau-y += core/engine/disp/nv04.o
 nouveau-y += core/engine/disp/nv50.o
 nouveau-y += core/engine/disp/nv84.o
@@ -245,6 +256,7 @@
 nouveau-y += core/engine/fifo/nv84.o
 nouveau-y += core/engine/fifo/nvc0.o
 nouveau-y += core/engine/fifo/nve0.o
+nouveau-y += core/engine/fifo/gk20a.o
 nouveau-y += core/engine/fifo/nv108.o
 nouveau-y += core/engine/graph/ctxnv40.o
 nouveau-y += core/engine/graph/ctxnv50.o
@@ -255,6 +267,7 @@
 nouveau-y += core/engine/graph/ctxnvd7.o
 nouveau-y += core/engine/graph/ctxnvd9.o
 nouveau-y += core/engine/graph/ctxnve4.o
+nouveau-y += core/engine/graph/ctxgk20a.o
 nouveau-y += core/engine/graph/ctxnvf0.o
 nouveau-y += core/engine/graph/ctxnv108.o
 nouveau-y += core/engine/graph/ctxgm107.o
@@ -275,6 +288,7 @@
 nouveau-y += core/engine/graph/nvd7.o
 nouveau-y += core/engine/graph/nvd9.o
 nouveau-y += core/engine/graph/nve4.o
+nouveau-y += core/engine/graph/gk20a.o
 nouveau-y += core/engine/graph/nvf0.o
 nouveau-y += core/engine/graph/nv108.o
 nouveau-y += core/engine/graph/gm107.o

diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c
index 3f3c765..ae81d3b 100644
--- a/drivers/gpu/drm/nouveau/core/core/event.c
+++ b/drivers/gpu/drm/nouveau/core/core/event.c

@@ -28,14 +28,20 @@
 {
 	struct nouveau_event *event = handler->event;
 	unsigned long flags;
-	if (__test_and_clear_bit(NVKM_EVENT_ENABLE, &handler->flags)) {
-		spin_lock_irqsave(&event->refs_lock, flags);
-		if (!--event->index[handler->index].refs) {
+	u32 m, t;
+
+	if (!__test_and_clear_bit(NVKM_EVENT_ENABLE, &handler->flags))
+		return;
+
+	spin_lock_irqsave(&event->refs_lock, flags);
+	for (m = handler->types; t = __ffs(m), m; m &= ~(1 << t)) {
+		if (!--event->refs[handler->index * event->types_nr + t]) {
 			if (event->disable)
-				event->disable(event, handler->index);
+				event->disable(event, 1 << t, handler->index);
 		}
-		spin_unlock_irqrestore(&event->refs_lock, flags);
+
 	}
+	spin_unlock_irqrestore(&event->refs_lock, flags);
 }
 
 void
@@ -43,14 +49,20 @@
 {
 	struct nouveau_event *event = handler->event;
 	unsigned long flags;
-	if (!__test_and_set_bit(NVKM_EVENT_ENABLE, &handler->flags)) {
-		spin_lock_irqsave(&event->refs_lock, flags);
-		if (!event->index[handler->index].refs++) {
+	u32 m, t;
+
+	if (__test_and_set_bit(NVKM_EVENT_ENABLE, &handler->flags))
+		return;
+
+	spin_lock_irqsave(&event->refs_lock, flags);
+	for (m = handler->types; t = __ffs(m), m; m &= ~(1 << t)) {
+		if (!event->refs[handler->index * event->types_nr + t]++) {
 			if (event->enable)
-				event->enable(event, handler->index);
+				event->enable(event, 1 << t, handler->index);
 		}
-		spin_unlock_irqrestore(&event->refs_lock, flags);
+
 	}
+	spin_unlock_irqrestore(&event->refs_lock, flags);
 }
 
 static void
@@ -65,38 +77,47 @@
 }
 
 static int
-nouveau_event_init(struct nouveau_event *event, int index,
-		   int (*func)(void *, int), void *priv,
+nouveau_event_init(struct nouveau_event *event, u32 types, int index,
+		   int (*func)(void *, u32, int), void *priv,
 		   struct nouveau_eventh *handler)
 {
 	unsigned long flags;
 
+	if (types & ~((1 << event->types_nr) - 1))
+		return -EINVAL;
 	if (index >= event->index_nr)
 		return -EINVAL;
 
 	handler->event = event;
 	handler->flags = 0;
+	handler->types = types;
 	handler->index = index;
 	handler->func = func;
 	handler->priv = priv;
 
 	spin_lock_irqsave(&event->list_lock, flags);
-	list_add_tail(&handler->head, &event->index[index].list);
+	list_add_tail(&handler->head, &event->list[index]);
 	spin_unlock_irqrestore(&event->list_lock, flags);
 	return 0;
 }
 
 int
-nouveau_event_new(struct nouveau_event *event, int index,
-		  int (*func)(void *, int), void *priv,
+nouveau_event_new(struct nouveau_event *event, u32 types, int index,
+		  int (*func)(void *, u32, int), void *priv,
 		  struct nouveau_eventh **phandler)
 {
 	struct nouveau_eventh *handler;
 	int ret = -ENOMEM;
 
+	if (event->check) {
+		ret = event->check(event, types, index);
+		if (ret)
+			return ret;
+	}
+
 	handler = *phandler = kmalloc(sizeof(*handler), GFP_KERNEL);
 	if (handler) {
-		ret = nouveau_event_init(event, index, func, priv, handler);
+		ret = nouveau_event_init(event, types, index, func, priv, handler);
 		if (ret)
 			kfree(handler);
 	}
@@ -116,7 +137,7 @@
 }
 
 void
-nouveau_event_trigger(struct nouveau_event *event, int index)
+nouveau_event_trigger(struct nouveau_event *event, u32 types, int index)
 {
 	struct nouveau_eventh *handler;
 	unsigned long flags;
@@ -125,10 +146,15 @@
 		return;
 
 	spin_lock_irqsave(&event->list_lock, flags);
-	list_for_each_entry(handler, &event->index[index].list, head) {
-		if (test_bit(NVKM_EVENT_ENABLE, &handler->flags) &&
-		    handler->func(handler->priv, index) == NVKM_EVENT_DROP)
-			nouveau_event_put(handler);
+	list_for_each_entry(handler, &event->list[index], head) {
+		if (!test_bit(NVKM_EVENT_ENABLE, &handler->flags))
+			continue;
+		if (!(handler->types & types))
+			continue;
+		if (handler->func(handler->priv, handler->types & types, index)
+				!= NVKM_EVENT_DROP)
+			continue;
+		nouveau_event_put(handler);
 	}
 	spin_unlock_irqrestore(&event->list_lock, flags);
 }
@@ -144,20 +170,27 @@
 }
 
 int
-nouveau_event_create(int index_nr, struct nouveau_event **pevent)
+nouveau_event_create(int types_nr, int index_nr, struct nouveau_event **pevent)
 {
 	struct nouveau_event *event;
 	int i;
 
-	event = *pevent = kzalloc(sizeof(*event) + index_nr *
-				  sizeof(event->index[0]), GFP_KERNEL);
+	event = *pevent = kzalloc(sizeof(*event) + (index_nr * types_nr) *
+				  sizeof(event->refs[0]), GFP_KERNEL);
 	if (!event)
 		return -ENOMEM;
 
+	event->list = kmalloc(sizeof(*event->list) * index_nr, GFP_KERNEL);
+	if (!event->list) {
+		kfree(event);
+		return -ENOMEM;
+	}
+
 	spin_lock_init(&event->list_lock);
 	spin_lock_init(&event->refs_lock);
 	for (i = 0; i < index_nr; i++)
-		INIT_LIST_HEAD(&event->index[i].list);
+		INIT_LIST_HEAD(&event->list[i]);
+	event->types_nr = types_nr;
 	event->index_nr = index_nr;
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/core/object.c b/drivers/gpu/drm/nouveau/core/core/object.c
index 7f48e28..1245385 100644
--- a/drivers/gpu/drm/nouveau/core/core/object.c
+++ b/drivers/gpu/drm/nouveau/core/core/object.c

@@ -156,7 +156,7 @@
 	}
 
 	if (ret == 0) {
-		nv_debug(object, "created\n");
+		nv_trace(object, "created\n");
 		atomic_set(&object->refcount, 1);
 	}
 
@@ -166,7 +166,7 @@
 static void
 nouveau_object_dtor(struct nouveau_object *object)
 {
-	nv_debug(object, "destroying\n");
+	nv_trace(object, "destroying\n");
 	nv_ofuncs(object)->dtor(object);
 }
 
@@ -337,7 +337,7 @@
 		goto fail_self;
 	}
 
-	nv_debug(object, "initialised\n");
+	nv_trace(object, "initialised\n");
 	return 0;
 
 fail_self:
@@ -375,7 +375,7 @@
 	if (object->parent)
 		nouveau_object_dec(object->parent, false);
 
-	nv_debug(object, "stopped\n");
+	nv_trace(object, "stopped\n");
 	return 0;
 }
 
@@ -411,7 +411,7 @@
 		}
 	}
 
-	nv_debug(object, "suspended\n");
+	nv_trace(object, "suspended\n");
 	return 0;
 
 fail_parent:

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/gm100.c b/drivers/gpu/drm/nouveau/core/engine/device/gm100.c
index d258c21..a520029 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/gm100.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/gm100.c

@@ -60,8 +60,8 @@
 	case 0x117:
 		device->cname = "GM107";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 #if 0
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv04.c b/drivers/gpu/drm/nouveau/core/engine/device/nv04.c
index 0a51ff4..40b29d0 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv04.c

@@ -47,7 +47,7 @@
 	case 0x04:
 		device->cname = "NV04";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv04_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -65,7 +65,7 @@
 	case 0x05:
 		device->cname = "NV05";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv05_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv10.c b/drivers/gpu/drm/nouveau/core/engine/device/nv10.c
index e008de8..5f7c25f 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv10.c

@@ -48,8 +48,8 @@
 	case 0x10:
 		device->cname = "NV10";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -65,8 +65,8 @@
 	case 0x15:
 		device->cname = "NV15";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -84,8 +84,8 @@
 	case 0x16:
 		device->cname = "NV16";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -103,8 +103,8 @@
 	case 0x1a:
 		device->cname = "nForce";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -122,8 +122,8 @@
 	case 0x11:
 		device->cname = "NV11";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -141,8 +141,8 @@
 	case 0x17:
 		device->cname = "NV17";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -160,8 +160,8 @@
 	case 0x1f:
 		device->cname = "nForce2";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -179,8 +179,8 @@
 	case 0x18:
 		device->cname = "NV18";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv20.c b/drivers/gpu/drm/nouveau/core/engine/device/nv20.c
index 7b629a3..75fed11 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv20.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv20.c

@@ -49,8 +49,8 @@
 	case 0x20:
 		device->cname = "NV20";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -68,8 +68,8 @@
 	case 0x25:
 		device->cname = "NV25";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -87,8 +87,8 @@
 	case 0x28:
 		device->cname = "NV28";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -106,8 +106,8 @@
 	case 0x2a:
 		device->cname = "NV2A";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv30.c b/drivers/gpu/drm/nouveau/core/engine/device/nv30.c
index 7dfddd5..36919d7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv30.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv30.c

@@ -49,8 +49,8 @@
 	case 0x30:
 		device->cname = "NV30";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -68,8 +68,8 @@
 	case 0x35:
 		device->cname = "NV35";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -87,8 +87,8 @@
 	case 0x31:
 		device->cname = "NV31";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -107,8 +107,8 @@
 	case 0x36:
 		device->cname = "NV36";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;
@@ -127,8 +127,8 @@
 	case 0x34:
 		device->cname = "NV34";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv04_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  nv04_mc_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv40.c b/drivers/gpu/drm/nouveau/core/engine/device/nv40.c
index 7c1ce6c..1130a62 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv40.c

@@ -53,8 +53,8 @@
 	case 0x40:
 		device->cname = "NV40";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -76,8 +76,8 @@
 	case 0x41:
 		device->cname = "NV41";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -99,8 +99,8 @@
 	case 0x42:
 		device->cname = "NV42";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -122,8 +122,8 @@
 	case 0x43:
 		device->cname = "NV43";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -145,8 +145,8 @@
 	case 0x45:
 		device->cname = "NV45";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -168,8 +168,8 @@
 	case 0x47:
 		device->cname = "G70";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -191,8 +191,8 @@
 	case 0x49:
 		device->cname = "G71";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -214,8 +214,8 @@
 	case 0x4b:
 		device->cname = "G73";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -237,8 +237,8 @@
 	case 0x44:
 		device->cname = "NV44";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -260,8 +260,8 @@
 	case 0x46:
 		device->cname = "G72";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -283,8 +283,8 @@
 	case 0x4a:
 		device->cname = "NV44A";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -306,8 +306,8 @@
 	case 0x4c:
 		device->cname = "C61";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -329,8 +329,8 @@
 	case 0x4e:
 		device->cname = "C51";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv4e_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv4e_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -352,8 +352,8 @@
 	case 0x63:
 		device->cname = "C73";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -375,8 +375,8 @@
 	case 0x67:
 		device->cname = "C67";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;
@@ -398,8 +398,8 @@
 	case 0x68:
 		device->cname = "C68";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv10_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv04_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv10_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv04_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nv40_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv40_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nv1a_devinit_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv50.c b/drivers/gpu/drm/nouveau/core/engine/device/nv50.c
index 66499fa..ef0b0bd 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv50.c

@@ -60,8 +60,8 @@
 	case 0x50:
 		device->cname = "G80";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv50_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv50_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -85,8 +85,8 @@
 	case 0x84:
 		device->cname = "G84";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -113,8 +113,8 @@
 	case 0x86:
 		device->cname = "G86";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -141,8 +141,8 @@
 	case 0x92:
 		device->cname = "G92";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -169,8 +169,8 @@
 	case 0x94:
 		device->cname = "G94";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -197,8 +197,8 @@
 	case 0x96:
 		device->cname = "G96";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -225,8 +225,8 @@
 	case 0x98:
 		device->cname = "G98";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -253,8 +253,8 @@
 	case 0xa0:
 		device->cname = "G200";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -281,8 +281,8 @@
 	case 0xaa:
 		device->cname = "MCP77/MCP78";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nvaa_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -309,8 +309,8 @@
 	case 0xac:
 		device->cname = "MCP79/MCP7A";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nvaa_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -337,8 +337,8 @@
 	case 0xa3:
 		device->cname = "GT215";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -367,8 +367,8 @@
 	case 0xa5:
 		device->cname = "GT216";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -396,8 +396,8 @@
 	case 0xa8:
 		device->cname = "GT218";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -425,8 +425,8 @@
 	case 0xaf:
 		device->cname = "MCP89";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
index 2075b30..f199957 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c

@@ -60,8 +60,8 @@
 	case 0xc0:
 		device->cname = "GF100";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -92,8 +92,8 @@
 	case 0xc4:
 		device->cname = "GF104";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -124,8 +124,8 @@
 	case 0xc3:
 		device->cname = "GF106";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -155,8 +155,8 @@
 	case 0xce:
 		device->cname = "GF114";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -187,8 +187,8 @@
 	case 0xcf:
 		device->cname = "GF116";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -219,8 +219,8 @@
 	case 0xc1:
 		device->cname = "GF108";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -250,8 +250,8 @@
 	case 0xc8:
 		device->cname = "GF110";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nv50_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -282,8 +282,8 @@
 	case 0xd9:
 		device->cname = "GF119";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nvd0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nvd0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -313,8 +313,8 @@
 	case 0xd7:
 		device->cname = "GF117";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nvd0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nvd0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
index 9784cbf..2d1e97d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c

@@ -60,8 +60,8 @@
 	case 0xe4:
 		device->cname = "GK104";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -93,8 +93,8 @@
 	case 0xe7:
 		device->cname = "GK107";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -126,8 +126,8 @@
 	case 0xe6:
 		device->cname = "GK106";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -156,11 +156,28 @@
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_PERFMON] = &nve0_perfmon_oclass;
 		break;
+	case 0xea:
+		device->cname = "GK20A";
+		device->oclass[NVDEV_SUBDEV_MC     ] =  nvc3_mc_oclass;
+		device->oclass[NVDEV_SUBDEV_BUS    ] =  nvc0_bus_oclass;
+		device->oclass[NVDEV_SUBDEV_TIMER  ] = &gk20a_timer_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] =  gk20a_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_IBUS   ] = &gk20a_ibus_oclass;
+		device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass;
+		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
+		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_FIFO   ] =  gk20a_fifo_oclass;
+		device->oclass[NVDEV_ENGINE_SW     ] =  nvc0_software_oclass;
+		device->oclass[NVDEV_ENGINE_GR     ] =  gk20a_graph_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
+		device->oclass[NVDEV_ENGINE_PERFMON] = &nve0_perfmon_oclass;
+		break;
 	case 0xf0:
 		device->cname = "GK110";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -184,18 +201,49 @@
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
 		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
-#if 0
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
-#endif
+		device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass;
+		break;
+	case 0xf1:
+		device->cname = "GK110B";
+		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
+		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
+		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] =  nvc0_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_MC     ] =  nvc3_mc_oclass;
+		device->oclass[NVDEV_SUBDEV_BUS    ] =  nvc0_bus_oclass;
+		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] =  nve0_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_LTCG   ] =  gf100_ltcg_oclass;
+		device->oclass[NVDEV_SUBDEV_IBUS   ] = &nve0_ibus_oclass;
+		device->oclass[NVDEV_SUBDEV_INSTMEM] =  nv50_instmem_oclass;
+		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
+		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
+		device->oclass[NVDEV_SUBDEV_PWR    ] = &nvd0_pwr_oclass;
+		device->oclass[NVDEV_SUBDEV_VOLT   ] = &nv40_volt_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_FIFO   ] =  nve0_fifo_oclass;
+		device->oclass[NVDEV_ENGINE_SW     ] =  nvc0_software_oclass;
+		device->oclass[NVDEV_ENGINE_GR     ] =  nvf0_graph_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] =  nvf0_disp_oclass;
+		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
+		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass;
 		break;
 	case 0x108:
 		device->cname = "GK208";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
-		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -219,11 +267,9 @@
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
 		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
-#if 0
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
-#endif
 		break;
 	default:
 		nv_fatal(device, "unknown Kepler chipset\n");

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/base.c b/drivers/gpu/drm/nouveau/core/engine/disp/base.c
index 7a5cae4..c41f656 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/base.c

@@ -22,13 +22,87 @@
  * Authors: Ben Skeggs
  */
 
-#include <engine/disp.h>
+#include "priv.h"
+#include "outp.h"
+#include "conn.h"
+
+static int
+nouveau_disp_hpd_check(struct nouveau_event *event, u32 types, int index)
+{
+	struct nouveau_disp *disp = event->priv;
+	struct nvkm_output *outp;
+	list_for_each_entry(outp, &disp->outp, head) {
+		if (outp->conn->index == index) {
+			if (outp->conn->hpd.event)
+				return 0;
+			break;
+		}
+	}
+	return -ENOSYS;
+}
+
+int
+_nouveau_disp_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_disp *disp = (void *)object;
+	struct nvkm_output *outp;
+	int ret;
+
+	list_for_each_entry(outp, &disp->outp, head) {
+		ret = nv_ofuncs(outp)->fini(nv_object(outp), suspend);
+		if (ret && suspend)
+			goto fail_outp;
+	}
+
+	return nouveau_engine_fini(&disp->base, suspend);
+
+fail_outp:
+	list_for_each_entry_continue_reverse(outp, &disp->outp, head) {
+		nv_ofuncs(outp)->init(nv_object(outp));
+	}
+
+	return ret;
+}
+
+int
+_nouveau_disp_init(struct nouveau_object *object)
+{
+	struct nouveau_disp *disp = (void *)object;
+	struct nvkm_output *outp;
+	int ret;
+
+	ret = nouveau_engine_init(&disp->base);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(outp, &disp->outp, head) {
+		ret = nv_ofuncs(outp)->init(nv_object(outp));
+		if (ret)
+			goto fail_outp;
+	}
+
+	return ret;
+
+fail_outp:
+	list_for_each_entry_continue_reverse(outp, &disp->outp, head) {
+		nv_ofuncs(outp)->fini(nv_object(outp), false);
+	}
+
+	return ret;
+}
 
 void
 _nouveau_disp_dtor(struct nouveau_object *object)
 {
 	struct nouveau_disp *disp = (void *)object;
+	struct nvkm_output *outp, *outt;
+
 	nouveau_event_destroy(&disp->vblank);
+
+	list_for_each_entry_safe(outp, outt, &disp->outp, head) {
+		nouveau_object_ref(NULL, (struct nouveau_object **)&outp);
+	}
+
 	nouveau_engine_destroy(&disp->base);
 }
 
@@ -39,8 +113,15 @@
 		     const char *intname, const char *extname,
 		     int length, void **pobject)
 {
+	struct nouveau_disp_impl *impl = (void *)oclass;
+	struct nouveau_bios *bios = nouveau_bios(parent);
 	struct nouveau_disp *disp;
-	int ret;
+	struct nouveau_oclass **sclass;
+	struct nouveau_object *object;
+	struct dcb_output dcbE;
+	u8  hpd = 0, ver, hdr;
+	u32 data;
+	int ret, i;
 
 	ret = nouveau_engine_create_(parent, engine, oclass, true,
 				     intname, extname, length, pobject);
@@ -48,5 +129,42 @@
 	if (ret)
 		return ret;
 
-	return nouveau_event_create(heads, &disp->vblank);
+	INIT_LIST_HEAD(&disp->outp);
+
+	/* create output objects for each display path in the vbios */
+	i = -1;
+	while ((data = dcb_outp_parse(bios, ++i, &ver, &hdr, &dcbE))) {
+		if (dcbE.type == DCB_OUTPUT_UNUSED)
+			continue;
+		if (dcbE.type == DCB_OUTPUT_EOL)
+			break;
+		data = dcbE.location << 4 | dcbE.type;
+
+		oclass = nvkm_output_oclass;
+		sclass = impl->outp;
+		while (sclass && sclass[0]) {
+			if (sclass[0]->handle == data) {
+				oclass = sclass[0];
+				break;
+			}
+			sclass++;
+		}
+
+		nouveau_object_ctor(*pobject, *pobject, oclass,
+				    &dcbE, i, &object);
+		hpd = max(hpd, (u8)(dcbE.connector + 1));
+	}
+
+	ret = nouveau_event_create(3, hpd, &disp->hpd);
+	if (ret)
+		return ret;
+
+	disp->hpd->priv = disp;
+	disp->hpd->check = nouveau_disp_hpd_check;
+
+	ret = nouveau_event_create(1, heads, &disp->vblank);
+	if (ret)
+		return ret;
+
+	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/conn.c b/drivers/gpu/drm/nouveau/core/engine/disp/conn.c
new file mode 100644
index 0000000..4ffbc70
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/conn.c

@@ -0,0 +1,172 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/gpio.h>
+
+#include "conn.h"
+#include "outp.h"
+
+static void
+nvkm_connector_hpd_work(struct work_struct *w)
+{
+	struct nvkm_connector *conn = container_of(w, typeof(*conn), hpd.work);
+	struct nouveau_disp *disp = nouveau_disp(conn);
+	struct nouveau_gpio *gpio = nouveau_gpio(conn);
+	u32 send = NVKM_HPD_UNPLUG;
+	if (gpio->get(gpio, 0, DCB_GPIO_UNUSED, conn->hpd.event->index))
+		send = NVKM_HPD_PLUG;
+	nouveau_event_trigger(disp->hpd, send, conn->index);
+	nouveau_event_get(conn->hpd.event);
+}
+
+static int
+nvkm_connector_hpd(void *data, u32 type, int index)
+{
+	struct nvkm_connector *conn = data;
+	DBG("HPD: %d\n", type);
+	schedule_work(&conn->hpd.work);
+	return NVKM_EVENT_DROP;
+}
+
+int
+_nvkm_connector_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nvkm_connector *conn = (void *)object;
+	if (conn->hpd.event)
+		nouveau_event_put(conn->hpd.event);
+	return nouveau_object_fini(&conn->base, suspend);
+}
+
+int
+_nvkm_connector_init(struct nouveau_object *object)
+{
+	struct nvkm_connector *conn = (void *)object;
+	int ret = nouveau_object_init(&conn->base);
+	if (ret == 0) {
+		if (conn->hpd.event)
+			nouveau_event_get(conn->hpd.event);
+	}
+	return ret;
+}
+
+void
+_nvkm_connector_dtor(struct nouveau_object *object)
+{
+	struct nvkm_connector *conn = (void *)object;
+	nouveau_event_ref(NULL, &conn->hpd.event);
+	nouveau_object_destroy(&conn->base);
+}
+
+int
+nvkm_connector_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass,
+		       struct nvbios_connE *info, int index,
+		       int length, void **pobject)
+{
+	static const u8 hpd[] = { 0x07, 0x08, 0x51, 0x52, 0x5e, 0x5f, 0x60 };
+	struct nouveau_gpio *gpio = nouveau_gpio(parent);
+	struct nouveau_disp *disp = (void *)engine;
+	struct nvkm_connector *conn;
+	struct nvkm_output *outp;
+	struct dcb_gpio_func func;
+	int ret;
+
+	list_for_each_entry(outp, &disp->outp, head) {
+		if (outp->conn && outp->conn->index == index) {
+			atomic_inc(&nv_object(outp->conn)->refcount);
+			*pobject = outp->conn;
+			return 1;
+		}
+	}
+
+	ret = nouveau_object_create_(parent, engine, oclass, 0, length, pobject);
+	conn = *pobject;
+	if (ret)
+		return ret;
+
+	conn->info = *info;
+	conn->index = index;
+
+	DBG("type %02x loc %d hpd %02x dp %x di %x sr %x lcdid %x\n",
+	    info->type, info->location, info->hpd, info->dp,
+	    info->di, info->sr, info->lcdid);
+
+	if ((info->hpd = ffs(info->hpd))) {
+		if (--info->hpd >= ARRAY_SIZE(hpd)) {
+			ERR("hpd %02x unknown\n", info->hpd);
+			goto done;
+		}
+		info->hpd = hpd[info->hpd];
+
+		ret = gpio->find(gpio, 0, info->hpd, DCB_GPIO_UNUSED, &func);
+		if (ret) {
+			ERR("func %02x lookup failed, %d\n", info->hpd, ret);
+			goto done;
+		}
+
+		ret = nouveau_event_new(gpio->events, NVKM_GPIO_TOGGLED,
+					func.line, nvkm_connector_hpd,
+					conn, &conn->hpd.event);
+		if (ret) {
+			ERR("func %02x failed, %d\n", info->hpd, ret);
+		} else {
+			DBG("func %02x (HPD)\n", info->hpd);
+		}
+	}
+
+done:
+	INIT_WORK(&conn->hpd.work, nvkm_connector_hpd_work);
+	return 0;
+}
+
+int
+_nvkm_connector_ctor(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass, void *info, u32 index,
+		     struct nouveau_object **pobject)
+{
+	struct nvkm_connector *conn;
+	int ret;
+
+	ret = nvkm_connector_create(parent, engine, oclass, info, index, &conn);
+	*pobject = nv_object(conn);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_oclass *
+nvkm_connector_oclass = &(struct nvkm_connector_impl) {
+	.base = {
+		.handle = 0,
+		.ofuncs = &(struct nouveau_ofuncs) {
+			.ctor = _nvkm_connector_ctor,
+			.dtor = _nvkm_connector_dtor,
+			.init = _nvkm_connector_init,
+			.fini = _nvkm_connector_fini,
+		},
+	},
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/conn.h b/drivers/gpu/drm/nouveau/core/engine/disp/conn.h
new file mode 100644
index 0000000..035ebea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/conn.h

@@ -0,0 +1,59 @@
+#ifndef __NVKM_DISP_CONN_H__
+#define __NVKM_DISP_CONN_H__
+
+#include "priv.h"
+
+struct nvkm_connector {
+	struct nouveau_object base;
+	struct list_head head;
+
+	struct nvbios_connE info;
+	int index;
+
+	struct {
+		struct nouveau_eventh *event;
+		struct work_struct work;
+	} hpd;
+};
+
+#define nvkm_connector_create(p,e,c,b,i,d)                                     \
+	nvkm_connector_create_((p), (e), (c), (b), (i), sizeof(**d), (void **)d)
+#define nvkm_connector_destroy(d) ({                                           \
+	struct nvkm_connector *disp = (d);                                     \
+	_nvkm_connector_dtor(nv_object(disp));                                 \
+})
+#define nvkm_connector_init(d) ({                                              \
+	struct nvkm_connector *disp = (d);                                     \
+	_nvkm_connector_init(nv_object(disp));                                 \
+})
+#define nvkm_connector_fini(d,s) ({                                            \
+	struct nvkm_connector *disp = (d);                                     \
+	_nvkm_connector_fini(nv_object(disp), (s));                            \
+})
+
+int nvkm_connector_create_(struct nouveau_object *, struct nouveau_object *,
+			   struct nouveau_oclass *, struct nvbios_connE *,
+			   int, int, void **);
+
+int  _nvkm_connector_ctor(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, void *, u32,
+			  struct nouveau_object **);
+void _nvkm_connector_dtor(struct nouveau_object *);
+int  _nvkm_connector_init(struct nouveau_object *);
+int  _nvkm_connector_fini(struct nouveau_object *, bool);
+
+struct nvkm_connector_impl {
+	struct nouveau_oclass base;
+};
+
+#ifndef MSG
+#define MSG(l,f,a...) do {                                                     \
+	struct nvkm_connector *_conn = (void *)conn;                           \
+	nv_##l(nv_object(conn)->engine, "%02x:%02x%02x: "f, _conn->index,      \
+	       _conn->info.location, _conn->info.type, ##a);                   \
+} while(0)
+#define DBG(f,a...) MSG(debug, f, ##a)
+#define ERR(f,a...) MSG(error, f, ##a)
+#endif
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/dport.c b/drivers/gpu/drm/nouveau/core/engine/disp/dport.c
index 3ca2d25..39562d4 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/dport.c

@@ -30,42 +30,38 @@
 
 #include <engine/disp.h>
 
-#include "dport.h"
+#include <core/class.h>
 
-#define DBG(fmt, args...) nv_debug(dp->disp, "DP:%04x:%04x: " fmt,             \
-				   dp->outp->hasht, dp->outp->hashm, ##args)
-#define ERR(fmt, args...) nv_error(dp->disp, "DP:%04x:%04x: " fmt,             \
-				   dp->outp->hasht, dp->outp->hashm, ##args)
+#include "dport.h"
+#include "outpdp.h"
 
 /******************************************************************************
  * link training
  *****************************************************************************/
 struct dp_state {
-	const struct nouveau_dp_func *func;
-	struct nouveau_disp *disp;
-	struct dcb_output *outp;
-	struct nvbios_dpout info;
-	u8 version;
-	struct nouveau_i2c_port *aux;
-	int head;
-	u8  dpcd[4];
+	struct nvkm_output_dp *outp;
 	int link_nr;
 	u32 link_bw;
 	u8  stat[6];
 	u8  conf[4];
+	bool pc2;
+	u8  pc2stat;
+	u8  pc2conf[2];
 };
 
 static int
 dp_set_link_config(struct dp_state *dp)
 {
-	struct nouveau_disp *disp = dp->disp;
+	struct nvkm_output_dp_impl *impl = (void *)nv_oclass(dp->outp);
+	struct nvkm_output_dp *outp = dp->outp;
+	struct nouveau_disp *disp = nouveau_disp(outp);
 	struct nouveau_bios *bios = nouveau_bios(disp);
 	struct nvbios_init init = {
-		.subdev = nv_subdev(dp->disp),
+		.subdev = nv_subdev(disp),
 		.bios = bios,
 		.offset = 0x0000,
-		.outp = dp->outp,
-		.crtc = dp->head,
+		.outp = &outp->base.info,
+		.crtc = -1,
 		.execute = 1,
 	};
 	u32 lnkcmp;
@@ -75,8 +71,8 @@
 	DBG("%d lanes at %d KB/s\n", dp->link_nr, dp->link_bw);
 
 	/* set desired link configuration on the source */
-	if ((lnkcmp = dp->info.lnkcmp)) {
-		if (dp->version < 0x30) {
+	if ((lnkcmp = dp->outp->info.lnkcmp)) {
+		if (outp->version < 0x30) {
 			while ((dp->link_bw / 10) < nv_ro16(bios, lnkcmp))
 				lnkcmp += 4;
 			init.offset = nv_ro16(bios, lnkcmp + 2);
@@ -89,73 +85,112 @@
 		nvbios_exec(&init);
 	}
 
-	ret = dp->func->lnk_ctl(dp->disp, dp->outp, dp->head,
-				dp->link_nr, dp->link_bw / 27000,
-				dp->dpcd[DPCD_RC02] &
-					 DPCD_RC02_ENHANCED_FRAME_CAP);
+	ret = impl->lnk_ctl(outp, dp->link_nr, dp->link_bw / 27000,
+			    outp->dpcd[DPCD_RC02] &
+				       DPCD_RC02_ENHANCED_FRAME_CAP);
 	if (ret) {
-		ERR("lnk_ctl failed with %d\n", ret);
+		if (ret < 0)
+			ERR("lnk_ctl failed with %d\n", ret);
 		return ret;
 	}
 
+	impl->lnk_pwr(outp, dp->link_nr);
+
 	/* set desired link configuration on the sink */
 	sink[0] = dp->link_bw / 27000;
 	sink[1] = dp->link_nr;
-	if (dp->dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP)
+	if (outp->dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP)
 		sink[1] |= DPCD_LC01_ENHANCED_FRAME_EN;
 
-	return nv_wraux(dp->aux, DPCD_LC00, sink, 2);
+	return nv_wraux(outp->base.edid, DPCD_LC00_LINK_BW_SET, sink, 2);
 }
 
 static void
 dp_set_training_pattern(struct dp_state *dp, u8 pattern)
 {
+	struct nvkm_output_dp_impl *impl = (void *)nv_oclass(dp->outp);
+	struct nvkm_output_dp *outp = dp->outp;
 	u8 sink_tp;
 
 	DBG("training pattern %d\n", pattern);
-	dp->func->pattern(dp->disp, dp->outp, dp->head, pattern);
+	impl->pattern(outp, pattern);
 
-	nv_rdaux(dp->aux, DPCD_LC02, &sink_tp, 1);
+	nv_rdaux(outp->base.edid, DPCD_LC02, &sink_tp, 1);
 	sink_tp &= ~DPCD_LC02_TRAINING_PATTERN_SET;
 	sink_tp |= pattern;
-	nv_wraux(dp->aux, DPCD_LC02, &sink_tp, 1);
+	nv_wraux(outp->base.edid, DPCD_LC02, &sink_tp, 1);
 }
 
 static int
-dp_link_train_commit(struct dp_state *dp)
+dp_link_train_commit(struct dp_state *dp, bool pc)
 {
-	int i;
+	struct nvkm_output_dp_impl *impl = (void *)nv_oclass(dp->outp);
+	struct nvkm_output_dp *outp = dp->outp;
+	int ret, i;
 
 	for (i = 0; i < dp->link_nr; i++) {
 		u8 lane = (dp->stat[4 + (i >> 1)] >> ((i & 1) * 4)) & 0xf;
+		u8 lpc2 = (dp->pc2stat >> (i * 2)) & 0x3;
 		u8 lpre = (lane & 0x0c) >> 2;
 		u8 lvsw = (lane & 0x03) >> 0;
+		u8 hivs = 3 - lpre;
+		u8 hipe = 3;
+		u8 hipc = 3;
+
+		if (lpc2 >= hipc)
+			lpc2 = hipc | DPCD_LC0F_LANE0_MAX_POST_CURSOR2_REACHED;
+		if (lpre >= hipe) {
+			lpre = hipe | DPCD_LC03_MAX_SWING_REACHED; /* yes. */
+			lvsw = hivs = 3 - (lpre & 3);
+		} else
+		if (lvsw >= hivs) {
+			lvsw = hivs | DPCD_LC03_MAX_SWING_REACHED;
+		}
 
 		dp->conf[i] = (lpre << 3) | lvsw;
-		if (lvsw == 3)
-			dp->conf[i] |= DPCD_LC03_MAX_SWING_REACHED;
-		if (lpre == 3)
-			dp->conf[i] |= DPCD_LC03_MAX_PRE_EMPHASIS_REACHED;
+		dp->pc2conf[i >> 1] |= lpc2 << ((i & 1) * 4);
 
-		DBG("config lane %d %02x\n", i, dp->conf[i]);
-		dp->func->drv_ctl(dp->disp, dp->outp, dp->head, i, lvsw, lpre);
+		DBG("config lane %d %02x %02x\n", i, dp->conf[i], lpc2);
+		impl->drv_ctl(outp, i, lvsw & 3, lpre & 3, lpc2 & 3);
 	}
 
-	return nv_wraux(dp->aux, DPCD_LC03(0), dp->conf, 4);
-}
-
-static int
-dp_link_train_update(struct dp_state *dp, u32 delay)
-{
-	int ret;
-
-	udelay(delay);
-
-	ret = nv_rdaux(dp->aux, DPCD_LS02, dp->stat, 6);
+	ret = nv_wraux(outp->base.edid, DPCD_LC03(0), dp->conf, 4);
 	if (ret)
 		return ret;
 
-	DBG("status %6ph\n", dp->stat);
+	if (pc) {
+		ret = nv_wraux(outp->base.edid, DPCD_LC0F, dp->pc2conf, 2);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+dp_link_train_update(struct dp_state *dp, bool pc, u32 delay)
+{
+	struct nvkm_output_dp *outp = dp->outp;
+	int ret;
+
+	if (outp->dpcd[DPCD_RC0E_AUX_RD_INTERVAL])
+		mdelay(outp->dpcd[DPCD_RC0E_AUX_RD_INTERVAL] * 4);
+	else
+		udelay(delay);
+
+	ret = nv_rdaux(outp->base.edid, DPCD_LS02, dp->stat, 6);
+	if (ret)
+		return ret;
+
+	if (pc) {
+		ret = nv_rdaux(outp->base.edid, DPCD_LS0C, &dp->pc2stat, 1);
+		if (ret)
+			dp->pc2stat = 0x00;
+		DBG("status %6ph pc2 %02x\n", dp->stat, dp->pc2stat);
+	} else {
+		DBG("status %6ph\n", dp->stat);
+	}
+
 	return 0;
 }
 
@@ -169,8 +204,8 @@
 	dp_set_training_pattern(dp, 1);
 
 	do {
-		if (dp_link_train_commit(dp) ||
-		    dp_link_train_update(dp, 100))
+		if (dp_link_train_commit(dp, false) ||
+		    dp_link_train_update(dp, false, 100))
 			break;
 
 		cr_done = true;
@@ -196,13 +231,17 @@
 static int
 dp_link_train_eq(struct dp_state *dp)
 {
+	struct nvkm_output_dp *outp = dp->outp;
 	bool eq_done = false, cr_done = true;
 	int tries = 0, i;
 
-	dp_set_training_pattern(dp, 2);
+	if (outp->dpcd[2] & DPCD_RC02_TPS3_SUPPORTED)
+		dp_set_training_pattern(dp, 3);
+	else
+		dp_set_training_pattern(dp, 2);
 
 	do {
-		if (dp_link_train_update(dp, 400))
+		if (dp_link_train_update(dp, dp->pc2, 400))
 			break;
 
 		eq_done = !!(dp->stat[2] & DPCD_LS04_INTERLANE_ALIGN_DONE);
@@ -215,7 +254,7 @@
 				eq_done = false;
 		}
 
-		if (dp_link_train_commit(dp))
+		if (dp_link_train_commit(dp, dp->pc2))
 			break;
 	} while (!eq_done && cr_done && ++tries <= 5);
 
@@ -225,121 +264,109 @@
 static void
 dp_link_train_init(struct dp_state *dp, bool spread)
 {
+	struct nvkm_output_dp *outp = dp->outp;
+	struct nouveau_disp *disp = nouveau_disp(outp);
+	struct nouveau_bios *bios = nouveau_bios(disp);
 	struct nvbios_init init = {
-		.subdev = nv_subdev(dp->disp),
-		.bios = nouveau_bios(dp->disp),
-		.outp = dp->outp,
-		.crtc = dp->head,
+		.subdev = nv_subdev(disp),
+		.bios = bios,
+		.outp = &outp->base.info,
+		.crtc = -1,
 		.execute = 1,
 	};
 
 	/* set desired spread */
 	if (spread)
-		init.offset = dp->info.script[2];
+		init.offset = outp->info.script[2];
 	else
-		init.offset = dp->info.script[3];
+		init.offset = outp->info.script[3];
 	nvbios_exec(&init);
 
 	/* pre-train script */
-	init.offset = dp->info.script[0];
+	init.offset = outp->info.script[0];
 	nvbios_exec(&init);
 }
 
 static void
 dp_link_train_fini(struct dp_state *dp)
 {
+	struct nvkm_output_dp *outp = dp->outp;
+	struct nouveau_disp *disp = nouveau_disp(outp);
+	struct nouveau_bios *bios = nouveau_bios(disp);
 	struct nvbios_init init = {
-		.subdev = nv_subdev(dp->disp),
-		.bios = nouveau_bios(dp->disp),
-		.outp = dp->outp,
-		.crtc = dp->head,
+		.subdev = nv_subdev(disp),
+		.bios = bios,
+		.outp = &outp->base.info,
+		.crtc = -1,
 		.execute = 1,
 	};
 
 	/* post-train script */
-	init.offset = dp->info.script[1],
+	init.offset = outp->info.script[1],
 	nvbios_exec(&init);
 }
 
-int
-nouveau_dp_train(struct nouveau_disp *disp, const struct nouveau_dp_func *func,
-		 struct dcb_output *outp, int head, u32 datarate)
+static const struct dp_rates {
+	u32 rate;
+	u8  bw;
+	u8  nr;
+} nouveau_dp_rates[] = {
+	{ 2160000, 0x14, 4 },
+	{ 1080000, 0x0a, 4 },
+	{ 1080000, 0x14, 2 },
+	{  648000, 0x06, 4 },
+	{  540000, 0x0a, 2 },
+	{  540000, 0x14, 1 },
+	{  324000, 0x06, 2 },
+	{  270000, 0x0a, 1 },
+	{  162000, 0x06, 1 },
+	{}
+};
+
+void
+nouveau_dp_train(struct work_struct *w)
 {
-	struct nouveau_bios *bios = nouveau_bios(disp);
-	struct nouveau_i2c *i2c = nouveau_i2c(disp);
+	struct nvkm_output_dp *outp = container_of(w, typeof(*outp), lt.work);
+	struct nouveau_disp *disp = nouveau_disp(outp);
+	const struct dp_rates *cfg = nouveau_dp_rates;
 	struct dp_state _dp = {
-		.disp = disp,
-		.func = func,
 		.outp = outp,
-		.head = head,
 	}, *dp = &_dp;
-	const u32 bw_list[] = { 540000, 270000, 162000, 0 };
-	const u32 *link_bw = bw_list;
-	u8  hdr, cnt, len;
-	u32 data;
+	u32 datarate = 0;
 	int ret;
 
-	/* find the bios displayport data relevant to this output */
-	data = nvbios_dpout_match(bios, outp->hasht, outp->hashm, &dp->version,
-				 &hdr, &cnt, &len, &dp->info);
-	if (!data) {
-		ERR("bios data not found\n");
-		return -EINVAL;
-	}
-
-	/* acquire the aux channel and fetch some info about the display */
-	if (outp->location)
-		dp->aux = i2c->find_type(i2c, NV_I2C_TYPE_EXTAUX(outp->extdev));
-	else
-		dp->aux = i2c->find(i2c, NV_I2C_TYPE_DCBI2C(outp->i2c_index));
-	if (!dp->aux) {
-		ERR("no aux channel?!\n");
-		return -ENODEV;
-	}
-
-	ret = nv_rdaux(dp->aux, 0x00000, dp->dpcd, sizeof(dp->dpcd));
-	if (ret) {
-		/* it's possible the display has been unplugged before we
-		 * get here.  we still need to execute the full set of
-		 * vbios scripts, and program the OR at a high enough
-		 * frequency to satisfy the target mode.  failure to do
-		 * so results at best in an UPDATE hanging, and at worst
-		 * with PDISP running away to join the circus.
-		 */
-		dp->dpcd[1] = link_bw[0] / 27000;
-		dp->dpcd[2] = 4;
-		dp->dpcd[3] = 0x00;
-		ERR("failed to read DPCD\n");
-	}
-
 	/* bring capabilities within encoder limits */
-	if ((dp->dpcd[2] & 0x1f) > dp->outp->dpconf.link_nr) {
-		dp->dpcd[2] &= ~0x1f;
-		dp->dpcd[2] |= dp->outp->dpconf.link_nr;
+	if (nv_mclass(disp) < NVD0_DISP_CLASS)
+		outp->dpcd[2] &= ~DPCD_RC02_TPS3_SUPPORTED;
+	if ((outp->dpcd[2] & 0x1f) > outp->base.info.dpconf.link_nr) {
+		outp->dpcd[2] &= ~DPCD_RC02_MAX_LANE_COUNT;
+		outp->dpcd[2] |= outp->base.info.dpconf.link_nr;
 	}
-	if (dp->dpcd[1] > dp->outp->dpconf.link_bw)
-		dp->dpcd[1] = dp->outp->dpconf.link_bw;
+	if (outp->dpcd[1] > outp->base.info.dpconf.link_bw)
+		outp->dpcd[1] = outp->base.info.dpconf.link_bw;
+	dp->pc2 = outp->dpcd[2] & DPCD_RC02_TPS3_SUPPORTED;
 
-	/* adjust required bandwidth for 8B/10B coding overhead */
-	datarate = (datarate / 8) * 10;
+	/* restrict link config to the lowest required rate, if requested */
+	if (datarate) {
+		datarate = (datarate / 8) * 10; /* 8B/10B coding overhead */
+		while (cfg[1].rate >= datarate)
+			cfg++;
+	}
+	cfg--;
+
+	/* disable link interrupt handling during link training */
+	nouveau_event_put(outp->irq);
 
 	/* enable down-spreading and execute pre-train script from vbios */
-	dp_link_train_init(dp, dp->dpcd[3] & 0x01);
+	dp_link_train_init(dp, outp->dpcd[3] & 0x01);
 
-	/* start off at highest link rate supported by encoder and display */
-	while (*link_bw > (dp->dpcd[1] * 27000))
-		link_bw++;
-
-	while ((ret = -EIO) && link_bw[0]) {
-		/* find minimum required lane count at this link rate */
-		dp->link_nr = dp->dpcd[2] & DPCD_RC02_MAX_LANE_COUNT;
-		while ((dp->link_nr >> 1) * link_bw[0] > datarate)
-			dp->link_nr >>= 1;
-
-		/* drop link rate to minimum with this lane count */
-		while ((link_bw[1] * dp->link_nr) > datarate)
-			link_bw++;
-		dp->link_bw = link_bw[0];
+	while (ret = -EIO, (++cfg)->rate) {
+		/* select next configuration supported by encoder and sink */
+		while (cfg->nr > (outp->dpcd[2] & DPCD_RC02_MAX_LANE_COUNT) ||
+		       cfg->bw > (outp->dpcd[DPCD_RC01_MAX_LINK_RATE]))
+			cfg++;
+		dp->link_bw = cfg->bw * 27000;
+		dp->link_nr = cfg->nr;
 
 		/* program selected link configuration */
 		ret = dp_set_link_config(dp);
@@ -356,17 +383,18 @@
 			 */
 			break;
 		}
-
-		/* retry at lower rate */
-		link_bw++;
 	}
 
-	/* finish link training */
+	/* finish link training and execute post-train script from vbios */
 	dp_set_training_pattern(dp, 0);
 	if (ret < 0)
 		ERR("link training failed\n");
 
-	/* execute post-train script from vbios */
 	dp_link_train_fini(dp);
-	return (ret < 0) ? false : true;
+
+	/* signal completion and enable link interrupt handling */
+	DBG("training complete\n");
+	atomic_set(&outp->lt.done, 1);
+	wake_up(&outp->lt.wait);
+	nouveau_event_get(outp->irq);
 }

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/dport.h b/drivers/gpu/drm/nouveau/core/engine/disp/dport.h
index 0e1bbd1..5628d2d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/dport.h
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/dport.h

@@ -2,19 +2,18 @@
 #define __NVKM_DISP_DPORT_H__
 
 /* DPCD Receiver Capabilities */
-#define DPCD_RC00                                                       0x00000
-#define DPCD_RC00_DPCD_REV                                                 0xff
-#define DPCD_RC01                                                       0x00001
-#define DPCD_RC01_MAX_LINK_RATE                                            0xff
+#define DPCD_RC00_DPCD_REV                                              0x00000
+#define DPCD_RC01_MAX_LINK_RATE                                         0x00001
 #define DPCD_RC02                                                       0x00002
 #define DPCD_RC02_ENHANCED_FRAME_CAP                                       0x80
+#define DPCD_RC02_TPS3_SUPPORTED                                           0x40
 #define DPCD_RC02_MAX_LANE_COUNT                                           0x1f
 #define DPCD_RC03                                                       0x00003
 #define DPCD_RC03_MAX_DOWNSPREAD                                           0x01
+#define DPCD_RC0E_AUX_RD_INTERVAL                                       0x0000e
 
 /* DPCD Link Configuration */
-#define DPCD_LC00                                                       0x00100
-#define DPCD_LC00_LINK_BW_SET                                              0xff
+#define DPCD_LC00_LINK_BW_SET                                           0x00100
 #define DPCD_LC01                                                       0x00101
 #define DPCD_LC01_ENHANCED_FRAME_EN                                        0x80
 #define DPCD_LC01_LANE_COUNT_SET                                           0x1f
@@ -25,6 +24,16 @@
 #define DPCD_LC03_PRE_EMPHASIS_SET                                         0x18
 #define DPCD_LC03_MAX_SWING_REACHED                                        0x04
 #define DPCD_LC03_VOLTAGE_SWING_SET                                        0x03
+#define DPCD_LC0F                                                       0x0010f
+#define DPCD_LC0F_LANE1_MAX_POST_CURSOR2_REACHED                           0x40
+#define DPCD_LC0F_LANE1_POST_CURSOR2_SET                                   0x30
+#define DPCD_LC0F_LANE0_MAX_POST_CURSOR2_REACHED                           0x04
+#define DPCD_LC0F_LANE0_POST_CURSOR2_SET                                   0x03
+#define DPCD_LC10                                                       0x00110
+#define DPCD_LC10_LANE3_MAX_POST_CURSOR2_REACHED                           0x40
+#define DPCD_LC10_LANE3_POST_CURSOR2_SET                                   0x30
+#define DPCD_LC10_LANE2_MAX_POST_CURSOR2_REACHED                           0x04
+#define DPCD_LC10_LANE2_POST_CURSOR2_SET                                   0x03
 
 /* DPCD Link/Sink Status */
 #define DPCD_LS02                                                       0x00202
@@ -55,24 +64,12 @@
 #define DPCD_LS07_LANE3_VOLTAGE_SWING                                      0x30
 #define DPCD_LS07_LANE2_PRE_EMPHASIS                                       0x0c
 #define DPCD_LS07_LANE2_VOLTAGE_SWING                                      0x03
+#define DPCD_LS0C                                                       0x0020c
+#define DPCD_LS0C_LANE3_POST_CURSOR2                                       0xc0
+#define DPCD_LS0C_LANE2_POST_CURSOR2                                       0x30
+#define DPCD_LS0C_LANE1_POST_CURSOR2                                       0x0c
+#define DPCD_LS0C_LANE0_POST_CURSOR2                                       0x03
 
-struct nouveau_disp;
-struct dcb_output;
-
-struct nouveau_dp_func {
-	int (*pattern)(struct nouveau_disp *, struct dcb_output *,
-		       int head, int pattern);
-	int (*lnk_ctl)(struct nouveau_disp *, struct dcb_output *, int head,
-		       int link_nr, int link_bw, bool enh_frame);
-	int (*drv_ctl)(struct nouveau_disp *, struct dcb_output *, int head,
-		       int lane, int swing, int preem);
-};
-
-extern const struct nouveau_dp_func nv94_sor_dp_func;
-extern const struct nouveau_dp_func nvd0_sor_dp_func;
-extern const struct nouveau_dp_func nv50_pior_dp_func;
-
-int nouveau_dp_train(struct nouveau_disp *, const struct nouveau_dp_func *,
-		     struct dcb_output *, int, u32);
+void nouveau_dp_train(struct work_struct *);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c
index cf6f596..9fc7447 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c

@@ -81,7 +81,6 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
 	priv->sor.hdmi = nvd0_hdmi_ctrl;
-	priv->sor.dp = &nvd0_sor_dp_func;
 	return 0;
 }
 
@@ -94,6 +93,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nvd0_disp_outp_sclass,
 	.mthd.core = &nve0_disp_mast_mthd_chan,
 	.mthd.base = &nvd0_disp_sync_mthd_chan,
 	.mthd.ovly = &nve0_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv04.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv04.c
index 6c89af7..a32666e 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv04.c

@@ -51,6 +51,14 @@
 	args->htotal  = nv_rd32(priv, 0x680824 + (head * 0x2000)) & 0xffff;
 	args->hblanke = args->htotal - 1;
 
+	/*
+	 * If output is vga instead of digital then vtotal/htotal is invalid
+	 * so we have to give up and trigger the timestamping fallback in the
+	 * drm core.
+	 */
+	if (!args->vtotal || !args->htotal)
+		return -ENOTSUPP;
+
 	args->time[0] = ktime_to_ns(ktime_get());
 	line = nv_rd32(priv, 0x600868 + (head * 0x2000));
 	args->time[1] = ktime_to_ns(ktime_get());
@@ -78,13 +86,13 @@
  ******************************************************************************/
 
 static void
-nv04_disp_vblank_enable(struct nouveau_event *event, int head)
+nv04_disp_vblank_enable(struct nouveau_event *event, int type, int head)
 {
 	nv_wr32(event->priv, 0x600140 + (head * 0x2000) , 0x00000001);
 }
 
 static void
-nv04_disp_vblank_disable(struct nouveau_event *event, int head)
+nv04_disp_vblank_disable(struct nouveau_event *event, int type, int head)
 {
 	nv_wr32(event->priv, 0x600140 + (head * 0x2000) , 0x00000000);
 }
@@ -98,12 +106,12 @@
 	u32 pvideo;
 
 	if (crtc0 & 0x00000001) {
-		nouveau_event_trigger(priv->base.vblank, 0);
+		nouveau_event_trigger(priv->base.vblank, 1, 0);
 		nv_wr32(priv, 0x600100, 0x00000001);
 	}
 
 	if (crtc1 & 0x00000001) {
-		nouveau_event_trigger(priv->base.vblank, 1);
+		nouveau_event_trigger(priv->base.vblank, 1, 1);
 		nv_wr32(priv, 0x602100, 0x00000001);
 	}
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
index 9a0cab9..1e85f36 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c

@@ -829,13 +829,13 @@
 }
 
 static void
-nv50_disp_base_vblank_enable(struct nouveau_event *event, int head)
+nv50_disp_base_vblank_enable(struct nouveau_event *event, int type, int head)
 {
 	nv_mask(event->priv, 0x61002c, (4 << head), (4 << head));
 }
 
 static void
-nv50_disp_base_vblank_disable(struct nouveau_event *event, int head)
+nv50_disp_base_vblank_disable(struct nouveau_event *event, int type, int head)
 {
 	nv_mask(event->priv, 0x61002c, (4 << head), 0);
 }
@@ -1114,19 +1114,20 @@
 	nv_wr32(priv, 0x610080 + (chid * 0x08), 0x90000000);
 }
 
-static u16
-exec_lookup(struct nv50_disp_priv *priv, int head, int outp, u32 ctrl,
-	    struct dcb_output *dcb, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+static struct nvkm_output *
+exec_lookup(struct nv50_disp_priv *priv, int head, int or, u32 ctrl,
+	    u32 *data, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 	    struct nvbios_outp *info)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
-	u16 mask, type, data;
+	struct nvkm_output *outp;
+	u16 mask, type;
 
-	if (outp < 4) {
+	if (or < 4) {
 		type = DCB_OUTPUT_ANALOG;
 		mask = 0;
 	} else
-	if (outp < 8) {
+	if (or < 8) {
 		switch (ctrl & 0x00000f00) {
 		case 0x00000000: type = DCB_OUTPUT_LVDS; mask = 1; break;
 		case 0x00000100: type = DCB_OUTPUT_TMDS; mask = 1; break;
@@ -1136,45 +1137,48 @@
 		case 0x00000900: type = DCB_OUTPUT_DP; mask = 2; break;
 		default:
 			nv_error(priv, "unknown SOR mc 0x%08x\n", ctrl);
-			return 0x0000;
+			return NULL;
 		}
-		outp -= 4;
+		or  -= 4;
 	} else {
-		outp = outp - 8;
+		or   = or - 8;
 		type = 0x0010;
 		mask = 0;
 		switch (ctrl & 0x00000f00) {
-		case 0x00000000: type |= priv->pior.type[outp]; break;
+		case 0x00000000: type |= priv->pior.type[or]; break;
 		default:
 			nv_error(priv, "unknown PIOR mc 0x%08x\n", ctrl);
-			return 0x0000;
+			return NULL;
 		}
 	}
 
 	mask  = 0x00c0 & (mask << 6);
-	mask |= 0x0001 << outp;
+	mask |= 0x0001 << or;
 	mask |= 0x0100 << head;
 
-	data = dcb_outp_match(bios, type, mask, ver, hdr, dcb);
-	if (!data)
-		return 0x0000;
+	list_for_each_entry(outp, &priv->base.outp, head) {
+		if ((outp->info.hasht & 0xff) == type &&
+		    (outp->info.hashm & mask) == mask) {
+			*data = nvbios_outp_match(bios, outp->info.hasht,
+							outp->info.hashm,
+						  ver, hdr, cnt, len, info);
+			if (!*data)
+				return NULL;
+			return outp;
+		}
+	}
 
-	/* off-chip encoders require matching the exact encoder type */
-	if (dcb->location != 0)
-		type |= dcb->extdev << 8;
-
-	return nvbios_outp_match(bios, type, mask, ver, hdr, cnt, len, info);
+	return NULL;
 }
 
-static bool
+static struct nvkm_output *
 exec_script(struct nv50_disp_priv *priv, int head, int id)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvkm_output *outp;
 	struct nvbios_outp info;
-	struct dcb_output dcb;
 	u8  ver, hdr, cnt, len;
-	u16 data;
-	u32 ctrl = 0x00000000;
+	u32 data, ctrl = 0;
 	u32 reg;
 	int i;
 
@@ -1204,36 +1208,35 @@
 	}
 
 	if (!(ctrl & (1 << head)))
-		return false;
+		return NULL;
 	i--;
 
-	data = exec_lookup(priv, head, i, ctrl, &dcb, &ver, &hdr, &cnt, &len, &info);
-	if (data) {
+	outp = exec_lookup(priv, head, i, ctrl, &data, &ver, &hdr, &cnt, &len, &info);
+	if (outp) {
 		struct nvbios_init init = {
 			.subdev = nv_subdev(priv),
 			.bios = bios,
 			.offset = info.script[id],
-			.outp = &dcb,
+			.outp = &outp->info,
 			.crtc = head,
 			.execute = 1,
 		};
 
-		return nvbios_exec(&init) == 0;
+		nvbios_exec(&init);
 	}
 
-	return false;
+	return outp;
 }
 
-static u32
-exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, u32 pclk,
-	    struct dcb_output *outp)
+static struct nvkm_output *
+exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, u32 pclk, u32 *conf)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvkm_output *outp;
 	struct nvbios_outp info1;
 	struct nvbios_ocfg info2;
 	u8  ver, hdr, cnt, len;
-	u32 ctrl = 0x00000000;
-	u32 data, conf = ~0;
+	u32 data, ctrl = 0;
 	u32 reg;
 	int i;
 
@@ -1263,37 +1266,37 @@
 	}
 
 	if (!(ctrl & (1 << head)))
-		return conf;
+		return NULL;
 	i--;
 
-	data = exec_lookup(priv, head, i, ctrl, outp, &ver, &hdr, &cnt, &len, &info1);
+	outp = exec_lookup(priv, head, i, ctrl, &data, &ver, &hdr, &cnt, &len, &info1);
 	if (!data)
-		return conf;
+		return NULL;
 
-	if (outp->location == 0) {
-		switch (outp->type) {
+	if (outp->info.location == 0) {
+		switch (outp->info.type) {
 		case DCB_OUTPUT_TMDS:
-			conf = (ctrl & 0x00000f00) >> 8;
+			*conf = (ctrl & 0x00000f00) >> 8;
 			if (pclk >= 165000)
-				conf |= 0x0100;
+				*conf |= 0x0100;
 			break;
 		case DCB_OUTPUT_LVDS:
-			conf = priv->sor.lvdsconf;
+			*conf = priv->sor.lvdsconf;
 			break;
 		case DCB_OUTPUT_DP:
-			conf = (ctrl & 0x00000f00) >> 8;
+			*conf = (ctrl & 0x00000f00) >> 8;
 			break;
 		case DCB_OUTPUT_ANALOG:
 		default:
-			conf = 0x00ff;
+			*conf = 0x00ff;
 			break;
 		}
 	} else {
-		conf = (ctrl & 0x00000f00) >> 8;
+		*conf = (ctrl & 0x00000f00) >> 8;
 		pclk = pclk / 2;
 	}
 
-	data = nvbios_ocfg_match(bios, data, conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -1301,7 +1304,7 @@
 				.subdev = nv_subdev(priv),
 				.bios = bios,
 				.offset = data,
-				.outp = outp,
+				.outp = &outp->info,
 				.crtc = head,
 				.execute = 1,
 			};
@@ -1310,7 +1313,7 @@
 		}
 	}
 
-	return conf;
+	return outp;
 }
 
 static void
@@ -1322,7 +1325,35 @@
 static void
 nv50_disp_intr_unk20_0(struct nv50_disp_priv *priv, int head)
 {
-	exec_script(priv, head, 2);
+	struct nvkm_output *outp = exec_script(priv, head, 2);
+
+	/* the binary driver does this outside of the supervisor handling
+	 * (after the third supervisor from a detach).  we (currently?)
+	 * allow both detach/attach to happen in the same set of
+	 * supervisor interrupts, so it would make sense to execute this
+	 * (full power down?) script after all the detach phases of the
+	 * supervisor handling.  like with training if needed from the
+	 * second supervisor, nvidia doesn't do this, so who knows if it's
+	 * entirely safe, but it does appear to work..
+	 *
+	 * without this script being run, on some configurations i've
+	 * seen, switching from DP to TMDS on a DP connector may result
+	 * in a blank screen (SOR_PWR off/on can restore it)
+	 */
+	if (outp && outp->info.type == DCB_OUTPUT_DP) {
+		struct nvkm_output_dp *outpdp = (void *)outp;
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = nouveau_bios(priv),
+			.outp = &outp->info,
+			.crtc = head,
+			.offset = outpdp->info.script[4],
+			.execute = 1,
+		};
+
+		nvbios_exec(&init);
+		atomic_set(&outpdp->lt.done, 0);
+	}
 }
 
 static void
@@ -1444,56 +1475,83 @@
 static void
 nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head)
 {
-	struct dcb_output outp;
+	struct nvkm_output *outp;
 	u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
 	u32 hval, hreg = 0x614200 + (head * 0x800);
 	u32 oval, oreg;
-	u32 mask;
-	u32 conf = exec_clkcmp(priv, head, 0xff, pclk, &outp);
-	if (conf != ~0) {
-		if (outp.location == 0 && outp.type == DCB_OUTPUT_DP) {
-			u32 soff = (ffs(outp.or) - 1) * 0x08;
-			u32 ctrl = nv_rd32(priv, 0x610794 + soff);
-			u32 datarate;
+	u32 mask, conf;
 
-			switch ((ctrl & 0x000f0000) >> 16) {
-			case 6: datarate = pclk * 30 / 8; break;
-			case 5: datarate = pclk * 24 / 8; break;
-			case 2:
-			default:
-				datarate = pclk * 18 / 8;
-				break;
-			}
+	outp = exec_clkcmp(priv, head, 0xff, pclk, &conf);
+	if (!outp)
+		return;
 
-			nouveau_dp_train(&priv->base, priv->sor.dp,
-					 &outp, head, datarate);
-		}
+	/* we allow both encoder attach and detach operations to occur
+	 * within a single supervisor (ie. modeset) sequence.  the
+	 * encoder detach scripts quite often switch off power to the
+	 * lanes, which requires the link to be re-trained.
+	 *
+	 * this is not generally an issue as the sink "must" (heh)
+	 * signal an irq when it's lost sync so the driver can
+	 * re-train.
+	 *
+	 * however, on some boards, if one does not configure at least
+	 * the gpu side of the link *before* attaching, then various
+	 * things can go horribly wrong (PDISP disappearing from mmio,
+	 * third supervisor never happens, etc).
+	 *
+	 * the solution is simply to retrain here, if necessary.  last
+	 * i checked, the binary driver userspace does not appear to
+	 * trigger this situation (it forces an UPDATE between steps).
+	 */
+	if (outp->info.type == DCB_OUTPUT_DP) {
+		u32 soff = (ffs(outp->info.or) - 1) * 0x08;
+		u32 ctrl, datarate;
 
-		exec_clkcmp(priv, head, 0, pclk, &outp);
-
-		if (!outp.location && outp.type == DCB_OUTPUT_ANALOG) {
-			oreg = 0x614280 + (ffs(outp.or) - 1) * 0x800;
-			oval = 0x00000000;
-			hval = 0x00000000;
-			mask = 0xffffffff;
-		} else
-		if (!outp.location) {
-			if (outp.type == DCB_OUTPUT_DP)
-				nv50_disp_intr_unk20_2_dp(priv, &outp, pclk);
-			oreg = 0x614300 + (ffs(outp.or) - 1) * 0x800;
-			oval = (conf & 0x0100) ? 0x00000101 : 0x00000000;
-			hval = 0x00000000;
-			mask = 0x00000707;
+		if (outp->info.location == 0) {
+			ctrl = nv_rd32(priv, 0x610794 + soff);
+			soff = 1;
 		} else {
-			oreg = 0x614380 + (ffs(outp.or) - 1) * 0x800;
-			oval = 0x00000001;
-			hval = 0x00000001;
-			mask = 0x00000707;
+			ctrl = nv_rd32(priv, 0x610b80 + soff);
+			soff = 2;
 		}
 
-		nv_mask(priv, hreg, 0x0000000f, hval);
-		nv_mask(priv, oreg, mask, oval);
+		switch ((ctrl & 0x000f0000) >> 16) {
+		case 6: datarate = pclk * 30 / 8; break;
+		case 5: datarate = pclk * 24 / 8; break;
+		case 2:
+		default:
+			datarate = pclk * 18 / 8;
+			break;
+		}
+
+		if (nvkm_output_dp_train(outp, datarate / soff, true))
+			ERR("link not trained before attach\n");
 	}
+
+	exec_clkcmp(priv, head, 0, pclk, &conf);
+
+	if (!outp->info.location && outp->info.type == DCB_OUTPUT_ANALOG) {
+		oreg = 0x614280 + (ffs(outp->info.or) - 1) * 0x800;
+		oval = 0x00000000;
+		hval = 0x00000000;
+		mask = 0xffffffff;
+	} else
+	if (!outp->info.location) {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			nv50_disp_intr_unk20_2_dp(priv, &outp->info, pclk);
+		oreg = 0x614300 + (ffs(outp->info.or) - 1) * 0x800;
+		oval = (conf & 0x0100) ? 0x00000101 : 0x00000000;
+		hval = 0x00000000;
+		mask = 0x00000707;
+	} else {
+		oreg = 0x614380 + (ffs(outp->info.or) - 1) * 0x800;
+		oval = 0x00000001;
+		hval = 0x00000001;
+		mask = 0x00000707;
+	}
+
+	nv_mask(priv, hreg, 0x0000000f, hval);
+	nv_mask(priv, oreg, mask, oval);
 }
 
 /* If programming a TMDS output on a SOR that can also be configured for
@@ -1521,30 +1579,16 @@
 static void
 nv50_disp_intr_unk40_0(struct nv50_disp_priv *priv, int head)
 {
-	struct dcb_output outp;
+	struct nvkm_output *outp;
 	u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
-	if (exec_clkcmp(priv, head, 1, pclk, &outp) != ~0) {
-		if (outp.location == 0 && outp.type == DCB_OUTPUT_TMDS)
-			nv50_disp_intr_unk40_0_tmds(priv, &outp);
-		else
-		if (outp.location == 1 && outp.type == DCB_OUTPUT_DP) {
-			u32 soff = (ffs(outp.or) - 1) * 0x08;
-			u32 ctrl = nv_rd32(priv, 0x610b84 + soff);
-			u32 datarate;
+	u32 conf;
 
-			switch ((ctrl & 0x000f0000) >> 16) {
-			case 6: datarate = pclk * 30 / 8; break;
-			case 5: datarate = pclk * 24 / 8; break;
-			case 2:
-			default:
-				datarate = pclk * 18 / 8;
-				break;
-			}
+	outp = exec_clkcmp(priv, head, 1, pclk, &conf);
+	if (!outp)
+		return;
 
-			nouveau_dp_train(&priv->base, priv->pior.dp,
-					 &outp, head, datarate);
-		}
-	}
+	if (outp->info.location == 0 && outp->info.type == DCB_OUTPUT_TMDS)
+		nv50_disp_intr_unk40_0_tmds(priv, &outp->info);
 }
 
 void
@@ -1610,13 +1654,13 @@
 	}
 
 	if (intr1 & 0x00000004) {
-		nouveau_event_trigger(priv->base.vblank, 0);
+		nouveau_event_trigger(priv->base.vblank, 1, 0);
 		nv_wr32(priv, 0x610024, 0x00000004);
 		intr1 &= ~0x00000004;
 	}
 
 	if (intr1 & 0x00000008) {
-		nouveau_event_trigger(priv->base.vblank, 1);
+		nouveau_event_trigger(priv->base.vblank, 1, 1);
 		nv_wr32(priv, 0x610024, 0x00000008);
 		intr1 &= ~0x00000008;
 	}
@@ -1656,11 +1700,16 @@
 	priv->dac.sense = nv50_dac_sense;
 	priv->sor.power = nv50_sor_power;
 	priv->pior.power = nv50_pior_power;
-	priv->pior.dp = &nv50_pior_dp_func;
 	return 0;
 }
 
 struct nouveau_oclass *
+nv50_disp_outp_sclass[] = {
+	&nv50_pior_dp_impl.base.base,
+	NULL
+};
+
+struct nouveau_oclass *
 nv50_disp_oclass = &(struct nv50_disp_impl) {
 	.base.base.handle = NV_ENGINE(DISP, 0x50),
 	.base.base.ofuncs = &(struct nouveau_ofuncs) {
@@ -1669,6 +1718,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nv50_disp_outp_sclass,
 	.mthd.core = &nv50_disp_mast_mthd_chan,
 	.mthd.base = &nv50_disp_sync_mthd_chan,
 	.mthd.ovly = &nv50_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
index 48d59db..1a88647 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h

@@ -11,6 +11,8 @@
 
 #include "dport.h"
 #include "priv.h"
+#include "outp.h"
+#include "outpdp.h"
 
 struct nv50_disp_impl {
 	struct nouveau_disp_impl base;
@@ -43,13 +45,11 @@
 		int (*hda_eld)(struct nv50_disp_priv *, int sor, u8 *, u32);
 		int (*hdmi)(struct nv50_disp_priv *, int head, int sor, u32);
 		u32 lvdsconf;
-		const struct nouveau_dp_func *dp;
 	} sor;
 	struct {
 		int nr;
 		int (*power)(struct nv50_disp_priv *, int ext, u32 data);
 		u8 type[3];
-		const struct nouveau_dp_func *dp;
 	} pior;
 };
 
@@ -199,4 +199,14 @@
 extern const struct nv50_disp_mthd_chan nve0_disp_mast_mthd_chan;
 extern const struct nv50_disp_mthd_chan nve0_disp_ovly_mthd_chan;
 
+extern struct nvkm_output_dp_impl nv50_pior_dp_impl;
+extern struct nouveau_oclass *nv50_disp_outp_sclass[];
+
+extern struct nvkm_output_dp_impl nv94_sor_dp_impl;
+int nv94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int);
+extern struct nouveau_oclass *nv94_disp_outp_sclass[];
+
+extern struct nvkm_output_dp_impl nvd0_sor_dp_impl;
+extern struct nouveau_oclass *nvd0_disp_outp_sclass[];
+
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
index 98c5b19..1cc62e4 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c

@@ -264,7 +264,6 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hdmi = nv84_hdmi_ctrl;
 	priv->pior.power = nv50_pior_power;
-	priv->pior.dp = &nv50_pior_dp_func;
 	return 0;
 }
 
@@ -277,6 +276,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nv50_disp_outp_sclass,
 	.mthd.core = &nv84_disp_mast_mthd_chan,
 	.mthd.base = &nv84_disp_sync_mthd_chan,
 	.mthd.ovly = &nv84_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
index 6844061..4f718a9 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c

@@ -77,6 +77,7 @@
 	{ SOR_MTHD(NV50_DISP_SOR_PWR)         , nv50_sor_mthd },
 	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
 	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_PWR)      , nv50_sor_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
 	{ PIOR_MTHD(NV50_DISP_PIOR_PWR)       , nv50_pior_mthd },
@@ -122,13 +123,18 @@
 	priv->dac.sense = nv50_dac_sense;
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hdmi = nv84_hdmi_ctrl;
-	priv->sor.dp = &nv94_sor_dp_func;
 	priv->pior.power = nv50_pior_power;
-	priv->pior.dp = &nv50_pior_dp_func;
 	return 0;
 }
 
 struct nouveau_oclass *
+nv94_disp_outp_sclass[] = {
+	&nv50_pior_dp_impl.base.base,
+	&nv94_sor_dp_impl.base.base,
+	NULL
+};
+
+struct nouveau_oclass *
 nv94_disp_oclass = &(struct nv50_disp_impl) {
 	.base.base.handle = NV_ENGINE(DISP, 0x88),
 	.base.base.ofuncs = &(struct nouveau_ofuncs) {
@@ -137,6 +143,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nv94_disp_outp_sclass,
 	.mthd.core = &nv94_disp_mast_mthd_chan,
 	.mthd.base = &nv84_disp_sync_mthd_chan,
 	.mthd.ovly = &nv84_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
index 88c9624..6237a9a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c

@@ -126,7 +126,6 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hdmi = nv84_hdmi_ctrl;
 	priv->pior.power = nv50_pior_power;
-	priv->pior.dp = &nv50_pior_dp_func;
 	return 0;
 }
 
@@ -139,6 +138,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nv50_disp_outp_sclass,
 	.mthd.core = &nv84_disp_mast_mthd_chan,
 	.mthd.base = &nv84_disp_sync_mthd_chan,
 	.mthd.ovly = &nva0_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
index 46cb2ce..019124d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c

@@ -50,6 +50,7 @@
 	{ SOR_MTHD(NVA3_DISP_SOR_HDA_ELD)     , nv50_sor_mthd },
 	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
 	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_PWR)      , nv50_sor_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
 	{ PIOR_MTHD(NV50_DISP_PIOR_PWR)       , nv50_pior_mthd },
@@ -96,9 +97,7 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nva3_hda_eld;
 	priv->sor.hdmi = nva3_hdmi_ctrl;
-	priv->sor.dp = &nv94_sor_dp_func;
 	priv->pior.power = nv50_pior_power;
-	priv->pior.dp = &nv50_pior_dp_func;
 	return 0;
 }
 
@@ -111,6 +110,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nv94_disp_outp_sclass,
 	.mthd.core = &nv94_disp_mast_mthd_chan,
 	.mthd.base = &nv84_disp_sync_mthd_chan,
 	.mthd.ovly = &nv84_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
index 876de9a..48aa38a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c

@@ -748,13 +748,13 @@
 }
 
 static void
-nvd0_disp_base_vblank_enable(struct nouveau_event *event, int head)
+nvd0_disp_base_vblank_enable(struct nouveau_event *event, int type, int head)
 {
 	nv_mask(event->priv, 0x6100c0 + (head * 0x800), 0x00000001, 0x00000001);
 }
 
 static void
-nvd0_disp_base_vblank_disable(struct nouveau_event *event, int head)
+nvd0_disp_base_vblank_disable(struct nouveau_event *event, int type, int head)
 {
 	nv_mask(event->priv, 0x6100c0 + (head * 0x800), 0x00000001, 0x00000000);
 }
@@ -887,6 +887,7 @@
 	{ SOR_MTHD(NVA3_DISP_SOR_HDA_ELD)     , nv50_sor_mthd },
 	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
 	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_PWR)      , nv50_sor_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
 	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
 	{ PIOR_MTHD(NV50_DISP_PIOR_PWR)       , nv50_pior_mthd },
@@ -915,19 +916,20 @@
  * Display engine implementation
  ******************************************************************************/
 
-static u16
-exec_lookup(struct nv50_disp_priv *priv, int head, int outp, u32 ctrl,
-	    struct dcb_output *dcb, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+static struct nvkm_output *
+exec_lookup(struct nv50_disp_priv *priv, int head, int or, u32 ctrl,
+	    u32 *data, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 	    struct nvbios_outp *info)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
-	u16 mask, type, data;
+	struct nvkm_output *outp;
+	u16 mask, type;
 
-	if (outp < 4) {
+	if (or < 4) {
 		type = DCB_OUTPUT_ANALOG;
 		mask = 0;
 	} else {
-		outp -= 4;
+		or -= 4;
 		switch (ctrl & 0x00000f00) {
 		case 0x00000000: type = DCB_OUTPUT_LVDS; mask = 1; break;
 		case 0x00000100: type = DCB_OUTPUT_TMDS; mask = 1; break;
@@ -939,101 +941,106 @@
 			nv_error(priv, "unknown SOR mc 0x%08x\n", ctrl);
 			return 0x0000;
 		}
-		dcb->sorconf.link = mask;
 	}
 
 	mask  = 0x00c0 & (mask << 6);
-	mask |= 0x0001 << outp;
+	mask |= 0x0001 << or;
 	mask |= 0x0100 << head;
 
-	data = dcb_outp_match(bios, type, mask, ver, hdr, dcb);
-	if (!data)
-		return 0x0000;
+	list_for_each_entry(outp, &priv->base.outp, head) {
+		if ((outp->info.hasht & 0xff) == type &&
+		    (outp->info.hashm & mask) == mask) {
+			*data = nvbios_outp_match(bios, outp->info.hasht,
+							outp->info.hashm,
+						  ver, hdr, cnt, len, info);
+			if (!*data)
+				return NULL;
+			return outp;
+		}
+	}
 
-	return nvbios_outp_match(bios, type, mask, ver, hdr, cnt, len, info);
+	return NULL;
 }
 
-static bool
+static struct nvkm_output *
 exec_script(struct nv50_disp_priv *priv, int head, int id)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvkm_output *outp;
 	struct nvbios_outp info;
-	struct dcb_output dcb;
 	u8  ver, hdr, cnt, len;
-	u32 ctrl = 0x00000000;
-	u16 data;
-	int outp;
+	u32 data, ctrl = 0;
+	int or;
 
-	for (outp = 0; !(ctrl & (1 << head)) && outp < 8; outp++) {
-		ctrl = nv_rd32(priv, 0x640180 + (outp * 0x20));
+	for (or = 0; !(ctrl & (1 << head)) && or < 8; or++) {
+		ctrl = nv_rd32(priv, 0x640180 + (or * 0x20));
 		if (ctrl & (1 << head))
 			break;
 	}
 
-	if (outp == 8)
-		return false;
+	if (or == 8)
+		return NULL;
 
-	data = exec_lookup(priv, head, outp, ctrl, &dcb, &ver, &hdr, &cnt, &len, &info);
-	if (data) {
+	outp = exec_lookup(priv, head, or, ctrl, &data, &ver, &hdr, &cnt, &len, &info);
+	if (outp) {
 		struct nvbios_init init = {
 			.subdev = nv_subdev(priv),
 			.bios = bios,
 			.offset = info.script[id],
-			.outp = &dcb,
+			.outp = &outp->info,
 			.crtc = head,
 			.execute = 1,
 		};
 
-		return nvbios_exec(&init) == 0;
+		nvbios_exec(&init);
 	}
 
-	return false;
+	return outp;
 }
 
-static u32
-exec_clkcmp(struct nv50_disp_priv *priv, int head, int id,
-	    u32 pclk, struct dcb_output *dcb)
+static struct nvkm_output *
+exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, u32 pclk, u32 *conf)
 {
 	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvkm_output *outp;
 	struct nvbios_outp info1;
 	struct nvbios_ocfg info2;
 	u8  ver, hdr, cnt, len;
-	u32 ctrl = 0x00000000;
-	u32 data, conf = ~0;
-	int outp;
+	u32 data, ctrl = 0;
+	int or;
 
-	for (outp = 0; !(ctrl & (1 << head)) && outp < 8; outp++) {
-		ctrl = nv_rd32(priv, 0x660180 + (outp * 0x20));
+	for (or = 0; !(ctrl & (1 << head)) && or < 8; or++) {
+		ctrl = nv_rd32(priv, 0x660180 + (or * 0x20));
 		if (ctrl & (1 << head))
 			break;
 	}
 
-	if (outp == 8)
-		return conf;
+	if (or == 8)
+		return NULL;
 
-	data = exec_lookup(priv, head, outp, ctrl, dcb, &ver, &hdr, &cnt, &len, &info1);
-	if (data == 0x0000)
-		return conf;
+	outp = exec_lookup(priv, head, or, ctrl, &data, &ver, &hdr, &cnt, &len, &info1);
+	if (!outp)
+		return NULL;
 
-	switch (dcb->type) {
+	switch (outp->info.type) {
 	case DCB_OUTPUT_TMDS:
-		conf = (ctrl & 0x00000f00) >> 8;
+		*conf = (ctrl & 0x00000f00) >> 8;
 		if (pclk >= 165000)
-			conf |= 0x0100;
+			*conf |= 0x0100;
 		break;
 	case DCB_OUTPUT_LVDS:
-		conf = priv->sor.lvdsconf;
+		*conf = priv->sor.lvdsconf;
 		break;
 	case DCB_OUTPUT_DP:
-		conf = (ctrl & 0x00000f00) >> 8;
+		*conf = (ctrl & 0x00000f00) >> 8;
 		break;
 	case DCB_OUTPUT_ANALOG:
 	default:
-		conf = 0x00ff;
+		*conf = 0x00ff;
 		break;
 	}
 
-	data = nvbios_ocfg_match(bios, data, conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -1041,7 +1048,7 @@
 				.subdev = nv_subdev(priv),
 				.bios = bios,
 				.offset = data,
-				.outp = dcb,
+				.outp = &outp->info,
 				.crtc = head,
 				.execute = 1,
 			};
@@ -1050,7 +1057,7 @@
 		}
 	}
 
-	return conf;
+	return outp;
 }
 
 static void
@@ -1062,7 +1069,23 @@
 static void
 nvd0_disp_intr_unk2_0(struct nv50_disp_priv *priv, int head)
 {
-	exec_script(priv, head, 2);
+	struct nvkm_output *outp = exec_script(priv, head, 2);
+
+	/* see note in nv50_disp_intr_unk20_0() */
+	if (outp && outp->info.type == DCB_OUTPUT_DP) {
+		struct nvkm_output_dp *outpdp = (void *)outp;
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = nouveau_bios(priv),
+			.outp = &outp->info,
+			.crtc = head,
+			.offset = outpdp->info.script[4],
+			.execute = 1,
+		};
+
+		nvbios_exec(&init);
+		atomic_set(&outpdp->lt.done, 0);
+	}
 }
 
 static void
@@ -1124,49 +1147,52 @@
 static void
 nvd0_disp_intr_unk2_2(struct nv50_disp_priv *priv, int head)
 {
-	struct dcb_output outp;
+	struct nvkm_output *outp;
 	u32 pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
-	u32 conf = exec_clkcmp(priv, head, 0xff, pclk, &outp);
-	if (conf != ~0) {
-		u32 addr, data;
+	u32 conf, addr, data;
 
-		if (outp.type == DCB_OUTPUT_DP) {
-			u32 sync = nv_rd32(priv, 0x660404 + (head * 0x300));
-			switch ((sync & 0x000003c0) >> 6) {
-			case 6: pclk = pclk * 30 / 8; break;
-			case 5: pclk = pclk * 24 / 8; break;
-			case 2:
-			default:
-				pclk = pclk * 18 / 8;
-				break;
-			}
+	outp = exec_clkcmp(priv, head, 0xff, pclk, &conf);
+	if (!outp)
+		return;
 
-			nouveau_dp_train(&priv->base, priv->sor.dp,
-					 &outp, head, pclk);
+	/* see note in nv50_disp_intr_unk20_2() */
+	if (outp->info.type == DCB_OUTPUT_DP) {
+		u32 sync = nv_rd32(priv, 0x660404 + (head * 0x300));
+		switch ((sync & 0x000003c0) >> 6) {
+		case 6: pclk = pclk * 30 / 8; break;
+		case 5: pclk = pclk * 24 / 8; break;
+		case 2:
+		default:
+			pclk = pclk * 18 / 8;
+			break;
 		}
 
-		exec_clkcmp(priv, head, 0, pclk, &outp);
-
-		if (outp.type == DCB_OUTPUT_ANALOG) {
-			addr = 0x612280 + (ffs(outp.or) - 1) * 0x800;
-			data = 0x00000000;
-		} else {
-			if (outp.type == DCB_OUTPUT_DP)
-				nvd0_disp_intr_unk2_2_tu(priv, head, &outp);
-			addr = 0x612300 + (ffs(outp.or) - 1) * 0x800;
-			data = (conf & 0x0100) ? 0x00000101 : 0x00000000;
-		}
-
-		nv_mask(priv, addr, 0x00000707, data);
+		if (nvkm_output_dp_train(outp, pclk, true))
+			ERR("link not trained before attach\n");
 	}
+
+	exec_clkcmp(priv, head, 0, pclk, &conf);
+
+	if (outp->info.type == DCB_OUTPUT_ANALOG) {
+		addr = 0x612280 + (ffs(outp->info.or) - 1) * 0x800;
+		data = 0x00000000;
+	} else {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			nvd0_disp_intr_unk2_2_tu(priv, head, &outp->info);
+		addr = 0x612300 + (ffs(outp->info.or) - 1) * 0x800;
+		data = (conf & 0x0100) ? 0x00000101 : 0x00000000;
+	}
+
+	nv_mask(priv, addr, 0x00000707, data);
 }
 
 static void
 nvd0_disp_intr_unk4_0(struct nv50_disp_priv *priv, int head)
 {
-	struct dcb_output outp;
 	u32 pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
-	exec_clkcmp(priv, head, 1, pclk, &outp);
+	u32 conf;
+
+	exec_clkcmp(priv, head, 1, pclk, &conf);
 }
 
 void
@@ -1240,7 +1266,7 @@
 		 chid, (mthd & 0x0000ffc), data, mthd, unkn);
 
 	if (chid == 0) {
-		switch (mthd) {
+		switch (mthd & 0xffc) {
 		case 0x0080:
 			nv50_disp_mthd_chan(priv, NV_DBG_ERROR, chid - 0,
 					    impl->mthd.core);
@@ -1250,7 +1276,7 @@
 		}
 	} else
 	if (chid <= 4) {
-		switch (mthd) {
+		switch (mthd & 0xffc) {
 		case 0x0080:
 			nv50_disp_mthd_chan(priv, NV_DBG_ERROR, chid - 1,
 					    impl->mthd.base);
@@ -1260,7 +1286,7 @@
 		}
 	} else
 	if (chid <= 8) {
-		switch (mthd) {
+		switch (mthd & 0xffc) {
 		case 0x0080:
 			nv50_disp_mthd_chan(priv, NV_DBG_ERROR, chid - 5,
 					    impl->mthd.ovly);
@@ -1317,7 +1343,7 @@
 		if (mask & intr) {
 			u32 stat = nv_rd32(priv, 0x6100bc + (i * 0x800));
 			if (stat & 0x00000001)
-				nouveau_event_trigger(priv->base.vblank, i);
+				nouveau_event_trigger(priv->base.vblank, 1, i);
 			nv_mask(priv, 0x6100bc + (i * 0x800), 0, 0);
 			nv_rd32(priv, 0x6100c0 + (i * 0x800));
 		}
@@ -1352,11 +1378,16 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
 	priv->sor.hdmi = nvd0_hdmi_ctrl;
-	priv->sor.dp = &nvd0_sor_dp_func;
 	return 0;
 }
 
 struct nouveau_oclass *
+nvd0_disp_outp_sclass[] = {
+	&nvd0_sor_dp_impl.base.base,
+	NULL
+};
+
+struct nouveau_oclass *
 nvd0_disp_oclass = &(struct nv50_disp_impl) {
 	.base.base.handle = NV_ENGINE(DISP, 0x90),
 	.base.base.ofuncs = &(struct nouveau_ofuncs) {
@@ -1365,6 +1396,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nvd0_disp_outp_sclass,
 	.mthd.core = &nvd0_disp_mast_mthd_chan,
 	.mthd.base = &nvd0_disp_sync_mthd_chan,
 	.mthd.ovly = &nvd0_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
index 44e0b8f..11328e3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c

@@ -246,7 +246,6 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
 	priv->sor.hdmi = nvd0_hdmi_ctrl;
-	priv->sor.dp = &nvd0_sor_dp_func;
 	return 0;
 }
 
@@ -259,6 +258,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nvd0_disp_outp_sclass,
 	.mthd.core = &nve0_disp_mast_mthd_chan,
 	.mthd.base = &nvd0_disp_sync_mthd_chan,
 	.mthd.ovly = &nve0_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
index 482585d..1043880 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c

@@ -81,7 +81,6 @@
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
 	priv->sor.hdmi = nvd0_hdmi_ctrl;
-	priv->sor.dp = &nvd0_sor_dp_func;
 	return 0;
 }
 
@@ -94,6 +93,7 @@
 		.init = _nouveau_disp_init,
 		.fini = _nouveau_disp_fini,
 	},
+	.base.outp =  nvd0_disp_outp_sclass,
 	.mthd.core = &nve0_disp_mast_mthd_chan,
 	.mthd.base = &nvd0_disp_sync_mthd_chan,
 	.mthd.ovly = &nve0_disp_ovly_mthd_chan,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/outp.c b/drivers/gpu/drm/nouveau/core/engine/disp/outp.c
new file mode 100644
index 0000000..ad9ba7c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/outp.c

@@ -0,0 +1,137 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/i2c.h>
+#include <subdev/bios.h>
+#include <subdev/bios/conn.h>
+
+#include "outp.h"
+
+int
+_nvkm_output_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nvkm_output *outp = (void *)object;
+	nv_ofuncs(outp->conn)->fini(nv_object(outp->conn), suspend);
+	return nouveau_object_fini(&outp->base, suspend);
+}
+
+int
+_nvkm_output_init(struct nouveau_object *object)
+{
+	struct nvkm_output *outp = (void *)object;
+	int ret = nouveau_object_init(&outp->base);
+	if (ret == 0)
+		nv_ofuncs(outp->conn)->init(nv_object(outp->conn));
+	return 0;
+}
+
+void
+_nvkm_output_dtor(struct nouveau_object *object)
+{
+	struct nvkm_output *outp = (void *)object;
+	list_del(&outp->head);
+	nouveau_object_ref(NULL, (void *)&outp->conn);
+	nouveau_object_destroy(&outp->base);
+}
+
+int
+nvkm_output_create_(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass,
+		    struct dcb_output *dcbE, int index,
+		    int length, void **pobject)
+{
+	struct nouveau_bios *bios = nouveau_bios(engine);
+	struct nouveau_i2c *i2c = nouveau_i2c(parent);
+	struct nouveau_disp *disp = (void *)engine;
+	struct nvbios_connE connE;
+	struct nvkm_output *outp;
+	u8  ver, hdr;
+	u32 data;
+	int ret;
+
+	ret = nouveau_object_create_(parent, engine, oclass, 0, length, pobject);
+	outp = *pobject;
+	if (ret)
+		return ret;
+
+	outp->info = *dcbE;
+	outp->index = index;
+
+	DBG("type %02x loc %d or %d link %d con %x edid %x bus %d head %x\n",
+	    dcbE->type, dcbE->location, dcbE->or, dcbE->type >= 2 ?
+	    dcbE->sorconf.link : 0, dcbE->connector, dcbE->i2c_index,
+	    dcbE->bus, dcbE->heads);
+
+	outp->port = i2c->find(i2c, outp->info.i2c_index);
+	outp->edid = outp->port;
+
+	data = nvbios_connEp(bios, outp->info.connector, &ver, &hdr, &connE);
+	if (!data) {
+		DBG("vbios connector data not found\n");
+		memset(&connE, 0x00, sizeof(connE));
+		connE.type = DCB_CONNECTOR_NONE;
+	}
+
+	ret = nouveau_object_ctor(parent, engine, nvkm_connector_oclass,
+				 &connE, outp->info.connector,
+				 (struct nouveau_object **)&outp->conn);
+	if (ret < 0) {
+		ERR("error %d creating connector, disabling\n", ret);
+		return ret;
+	}
+
+	list_add_tail(&outp->head, &disp->outp);
+	return 0;
+}
+
+int
+_nvkm_output_ctor(struct nouveau_object *parent,
+		  struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *dcbE, u32 index,
+		  struct nouveau_object **pobject)
+{
+	struct nvkm_output *outp;
+	int ret;
+
+	ret = nvkm_output_create(parent, engine, oclass, dcbE, index, &outp);
+	*pobject = nv_object(outp);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_oclass *
+nvkm_output_oclass = &(struct nvkm_output_impl) {
+	.base = {
+		.handle = 0,
+		.ofuncs = &(struct nouveau_ofuncs) {
+			.ctor = _nvkm_output_ctor,
+			.dtor = _nvkm_output_dtor,
+			.init = _nvkm_output_init,
+			.fini = _nvkm_output_fini,
+		},
+	},
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/outp.h b/drivers/gpu/drm/nouveau/core/engine/disp/outp.h
new file mode 100644
index 0000000..bc76fbf
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/outp.h

@@ -0,0 +1,59 @@
+#ifndef __NVKM_DISP_OUTP_H__
+#define __NVKM_DISP_OUTP_H__
+
+#include "priv.h"
+
+struct nvkm_output {
+	struct nouveau_object base;
+	struct list_head head;
+
+	struct dcb_output info;
+	int index;
+
+	struct nouveau_i2c_port *port;
+	struct nouveau_i2c_port *edid;
+
+	struct nvkm_connector *conn;
+};
+
+#define nvkm_output_create(p,e,c,b,i,d)                                        \
+	nvkm_output_create_((p), (e), (c), (b), (i), sizeof(**d), (void **)d)
+#define nvkm_output_destroy(d) ({                                              \
+	struct nvkm_output *_outp = (d);                                       \
+	_nvkm_output_dtor(nv_object(_outp));                                   \
+})
+#define nvkm_output_init(d) ({                                                 \
+	struct nvkm_output *_outp = (d);                                       \
+	_nvkm_output_init(nv_object(_outp));                                   \
+})
+#define nvkm_output_fini(d,s) ({                                               \
+	struct nvkm_output *_outp = (d);                                       \
+	_nvkm_output_fini(nv_object(_outp), (s));                              \
+})
+
+int nvkm_output_create_(struct nouveau_object *, struct nouveau_object *,
+			struct nouveau_oclass *, struct dcb_output *,
+			int, int, void **);
+
+int  _nvkm_output_ctor(struct nouveau_object *, struct nouveau_object *,
+		       struct nouveau_oclass *, void *, u32,
+		       struct nouveau_object **);
+void _nvkm_output_dtor(struct nouveau_object *);
+int  _nvkm_output_init(struct nouveau_object *);
+int  _nvkm_output_fini(struct nouveau_object *, bool);
+
+struct nvkm_output_impl {
+	struct nouveau_oclass base;
+};
+
+#ifndef MSG
+#define MSG(l,f,a...) do {                                                     \
+	struct nvkm_output *_outp = (void *)outp;                              \
+	nv_##l(nv_object(outp)->engine, "%02x:%04x:%04x: "f, _outp->index,     \
+	       _outp->info.hasht, _outp->info.hashm, ##a);                     \
+} while(0)
+#define DBG(f,a...) MSG(debug, f, ##a)
+#define ERR(f,a...) MSG(error, f, ##a)
+#endif
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c
new file mode 100644
index 0000000..52c299c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c

@@ -0,0 +1,276 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/i2c.h>
+
+#include "outpdp.h"
+#include "conn.h"
+#include "dport.h"
+
+int
+nvkm_output_dp_train(struct nvkm_output *base, u32 datarate, bool wait)
+{
+	struct nvkm_output_dp *outp = (void *)base;
+	bool retrain = true;
+	u8 link[2], stat[3];
+	u32 rate;
+	int ret, i;
+
+	/* check that the link is trained at a high enough rate */
+	ret = nv_rdaux(outp->base.edid, DPCD_LC00_LINK_BW_SET, link, 2);
+	if (ret) {
+		DBG("failed to read link config, assuming no sink\n");
+		goto done;
+	}
+
+	rate = link[0] * 27000 * (link[1] & DPCD_LC01_LANE_COUNT_SET);
+	if (rate < ((datarate / 8) * 10)) {
+		DBG("link not trained at sufficient rate\n");
+		goto done;
+	}
+
+	/* check that link is still trained */
+	ret = nv_rdaux(outp->base.edid, DPCD_LS02, stat, 3);
+	if (ret) {
+		DBG("failed to read link status, assuming no sink\n");
+		goto done;
+	}
+
+	if (stat[2] & DPCD_LS04_INTERLANE_ALIGN_DONE) {
+		for (i = 0; i < (link[1] & DPCD_LC01_LANE_COUNT_SET); i++) {
+			u8 lane = (stat[i >> 1] >> ((i & 1) * 4)) & 0x0f;
+			if (!(lane & DPCD_LS02_LANE0_CR_DONE) ||
+			    !(lane & DPCD_LS02_LANE0_CHANNEL_EQ_DONE) ||
+			    !(lane & DPCD_LS02_LANE0_SYMBOL_LOCKED)) {
+				DBG("lane %d not equalised\n", lane);
+				goto done;
+			}
+		}
+		retrain = false;
+	} else {
+		DBG("no inter-lane alignment\n");
+	}
+
+done:
+	if (retrain || !atomic_read(&outp->lt.done)) {
+		/* no sink, but still need to configure source */
+		if (outp->dpcd[DPCD_RC00_DPCD_REV] == 0x00) {
+			outp->dpcd[DPCD_RC01_MAX_LINK_RATE] =
+				outp->base.info.dpconf.link_bw;
+			outp->dpcd[DPCD_RC02] =
+				outp->base.info.dpconf.link_nr;
+		}
+		atomic_set(&outp->lt.done, 0);
+		schedule_work(&outp->lt.work);
+	} else {
+		nouveau_event_get(outp->irq);
+	}
+
+	if (wait) {
+		if (!wait_event_timeout(outp->lt.wait,
+					atomic_read(&outp->lt.done),
+					msecs_to_jiffies(2000)))
+			ret = -ETIMEDOUT;
+	}
+
+	return ret;
+}
+
+static void
+nvkm_output_dp_enable(struct nvkm_output_dp *outp, bool present)
+{
+	struct nouveau_i2c_port *port = outp->base.edid;
+	if (present) {
+		if (!outp->present) {
+			nouveau_i2c(port)->acquire_pad(port, 0);
+			DBG("aux power -> always\n");
+			outp->present = true;
+		}
+		nvkm_output_dp_train(&outp->base, 0, true);
+	} else {
+		if (outp->present) {
+			nouveau_i2c(port)->release_pad(port);
+			DBG("aux power -> demand\n");
+			outp->present = false;
+		}
+		atomic_set(&outp->lt.done, 0);
+	}
+}
+
+static void
+nvkm_output_dp_detect(struct nvkm_output_dp *outp)
+{
+	struct nouveau_i2c_port *port = outp->base.edid;
+	int ret = nouveau_i2c(port)->acquire_pad(port, 0);
+	if (ret == 0) {
+		ret = nv_rdaux(outp->base.edid, DPCD_RC00_DPCD_REV,
+			       outp->dpcd, sizeof(outp->dpcd));
+		nvkm_output_dp_enable(outp, ret == 0);
+		nouveau_i2c(port)->release_pad(port);
+	}
+}
+
+static void
+nvkm_output_dp_service_work(struct work_struct *work)
+{
+	struct nvkm_output_dp *outp = container_of(work, typeof(*outp), work);
+	struct nouveau_disp *disp = nouveau_disp(outp);
+	int type = atomic_xchg(&outp->pending, 0);
+	u32 send = 0;
+
+	if (type & (NVKM_I2C_PLUG | NVKM_I2C_UNPLUG)) {
+		nvkm_output_dp_detect(outp);
+		if (type & NVKM_I2C_UNPLUG)
+			send |= NVKM_HPD_UNPLUG;
+		if (type & NVKM_I2C_PLUG)
+			send |= NVKM_HPD_PLUG;
+		nouveau_event_get(outp->base.conn->hpd.event);
+	}
+
+	if (type & NVKM_I2C_IRQ) {
+		nvkm_output_dp_train(&outp->base, 0, true);
+		send |= NVKM_HPD_IRQ;
+	}
+
+	nouveau_event_trigger(disp->hpd, send, outp->base.info.connector);
+}
+
+static int
+nvkm_output_dp_service(void *data, u32 type, int index)
+{
+	struct nvkm_output_dp *outp = data;
+	DBG("HPD: %d\n", type);
+	atomic_or(type, &outp->pending);
+	schedule_work(&outp->work);
+	return NVKM_EVENT_DROP;
+}
+
+int
+_nvkm_output_dp_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nvkm_output_dp *outp = (void *)object;
+	nouveau_event_put(outp->irq);
+	nvkm_output_dp_enable(outp, false);
+	return nvkm_output_fini(&outp->base, suspend);
+}
+
+int
+_nvkm_output_dp_init(struct nouveau_object *object)
+{
+	struct nvkm_output_dp *outp = (void *)object;
+	nvkm_output_dp_detect(outp);
+	return nvkm_output_init(&outp->base);
+}
+
+void
+_nvkm_output_dp_dtor(struct nouveau_object *object)
+{
+	struct nvkm_output_dp *outp = (void *)object;
+	nouveau_event_ref(NULL, &outp->irq);
+	nvkm_output_destroy(&outp->base);
+}
+
+int
+nvkm_output_dp_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass,
+		       struct dcb_output *info, int index,
+		       int length, void **pobject)
+{
+	struct nouveau_bios *bios = nouveau_bios(parent);
+	struct nouveau_i2c *i2c = nouveau_i2c(parent);
+	struct nvkm_output_dp *outp;
+	u8  hdr, cnt, len;
+	u32 data;
+	int ret;
+
+	ret = nvkm_output_create_(parent, engine, oclass, info, index,
+				  length, pobject);
+	outp = *pobject;
+	if (ret)
+		return ret;
+
+	nouveau_event_ref(NULL, &outp->base.conn->hpd.event);
+
+	/* access to the aux channel is not optional... */
+	if (!outp->base.edid) {
+		ERR("aux channel not found\n");
+		return -ENODEV;
+	}
+
+	/* nor is the bios data for this output... */
+	data = nvbios_dpout_match(bios, outp->base.info.hasht,
+				  outp->base.info.hashm, &outp->version,
+				  &hdr, &cnt, &len, &outp->info);
+	if (!data) {
+		ERR("no bios dp data\n");
+		return -ENODEV;
+	}
+
+	DBG("bios dp %02x %02x %02x %02x\n", outp->version, hdr, cnt, len);
+
+	/* link training */
+	INIT_WORK(&outp->lt.work, nouveau_dp_train);
+	init_waitqueue_head(&outp->lt.wait);
+	atomic_set(&outp->lt.done, 0);
+
+	/* link maintenance */
+	ret = nouveau_event_new(i2c->ntfy, NVKM_I2C_IRQ, outp->base.edid->index,
+				nvkm_output_dp_service, outp, &outp->irq);
+	if (ret) {
+		ERR("error monitoring aux irq event: %d\n", ret);
+		return ret;
+	}
+
+	INIT_WORK(&outp->work, nvkm_output_dp_service_work);
+
+	/* hotplug detect, replaces gpio-based mechanism with aux events */
+	ret = nouveau_event_new(i2c->ntfy, NVKM_I2C_PLUG | NVKM_I2C_UNPLUG,
+				outp->base.edid->index,
+				nvkm_output_dp_service, outp,
+			       &outp->base.conn->hpd.event);
+	if (ret) {
+		ERR("error monitoring aux hpd events: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+_nvkm_output_dp_ctor(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass, void *info, u32 index,
+		     struct nouveau_object **pobject)
+{
+	struct nvkm_output_dp *outp;
+	int ret;
+
+	ret = nvkm_output_dp_create(parent, engine, oclass, info, index, &outp);
+	*pobject = nv_object(outp);
+	if (ret)
+		return ret;
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.h
new file mode 100644
index 0000000..ff33ba1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.h

@@ -0,0 +1,65 @@
+#ifndef __NVKM_DISP_OUTP_DP_H__
+#define __NVKM_DISP_OUTP_DP_H__
+
+#include <subdev/bios.h>
+#include <subdev/bios/dp.h>
+
+#include "outp.h"
+
+struct nvkm_output_dp {
+	struct nvkm_output base;
+
+	struct nvbios_dpout info;
+	u8 version;
+
+	struct nouveau_eventh *irq;
+	struct nouveau_eventh *hpd;
+	struct work_struct work;
+	atomic_t pending;
+	bool present;
+	u8 dpcd[16];
+
+	struct {
+		struct work_struct work;
+		wait_queue_head_t wait;
+		atomic_t done;
+	} lt;
+};
+
+#define nvkm_output_dp_create(p,e,c,b,i,d)                                     \
+	nvkm_output_dp_create_((p), (e), (c), (b), (i), sizeof(**d), (void **)d)
+#define nvkm_output_dp_destroy(d) ({                                           \
+	struct nvkm_output_dp *_outp = (d);                                    \
+	_nvkm_output_dp_dtor(nv_object(_outp));                                \
+})
+#define nvkm_output_dp_init(d) ({                                              \
+	struct nvkm_output_dp *_outp = (d);                                    \
+	_nvkm_output_dp_init(nv_object(_outp));                                \
+})
+#define nvkm_output_dp_fini(d,s) ({                                            \
+	struct nvkm_output_dp *_outp = (d);                                    \
+	_nvkm_output_dp_fini(nv_object(_outp), (s));                           \
+})
+
+int nvkm_output_dp_create_(struct nouveau_object *, struct nouveau_object *,
+			   struct nouveau_oclass *, struct dcb_output *,
+			   int, int, void **);
+
+int  _nvkm_output_dp_ctor(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, void *, u32,
+			  struct nouveau_object **);
+void _nvkm_output_dp_dtor(struct nouveau_object *);
+int  _nvkm_output_dp_init(struct nouveau_object *);
+int  _nvkm_output_dp_fini(struct nouveau_object *, bool);
+
+struct nvkm_output_dp_impl {
+	struct nvkm_output_impl base;
+	int (*pattern)(struct nvkm_output_dp *, int);
+	int (*lnk_pwr)(struct nvkm_output_dp *, int nr);
+	int (*lnk_ctl)(struct nvkm_output_dp *, int nr, int bw, bool ef);
+	int (*drv_ctl)(struct nvkm_output_dp *, int ln, int vs, int pe, int pc);
+};
+
+int nvkm_output_dp_train(struct nvkm_output *, u32 rate, bool wait);
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/piornv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/piornv50.c
index 2c8ce35..fe0f256 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/piornv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/piornv50.c

@@ -33,68 +33,107 @@
 #include "nv50.h"
 
 /******************************************************************************
+ * TMDS
+ *****************************************************************************/
+
+static int
+nv50_pior_tmds_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *info, u32 index,
+		    struct nouveau_object **pobject)
+{
+	struct nouveau_i2c *i2c = nouveau_i2c(parent);
+	struct nvkm_output *outp;
+	int ret;
+
+	ret = nvkm_output_create(parent, engine, oclass, info, index, &outp);
+	*pobject = nv_object(outp);
+	if (ret)
+		return ret;
+
+	outp->edid = i2c->find_type(i2c, NV_I2C_TYPE_EXTDDC(outp->info.extdev));
+	return 0;
+}
+
+struct nvkm_output_impl
+nv50_pior_tmds_impl = {
+	.base.handle = DCB_OUTPUT_TMDS | 0x0100,
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv50_pior_tmds_ctor,
+		.dtor = _nvkm_output_dtor,
+		.init = _nvkm_output_init,
+		.fini = _nvkm_output_fini,
+	},
+};
+
+/******************************************************************************
  * DisplayPort
  *****************************************************************************/
-static struct nouveau_i2c_port *
-nv50_pior_dp_find(struct nouveau_disp *disp, struct dcb_output *outp)
+
+static int
+nv50_pior_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
-	struct nouveau_i2c *i2c = nouveau_i2c(disp);
-	return i2c->find_type(i2c, NV_I2C_TYPE_EXTAUX(outp->extdev));
+	struct nouveau_i2c_port *port = outp->base.edid;
+	if (port && port->func->pattern)
+		return port->func->pattern(port, pattern);
+	return port ? 0 : -ENODEV;
 }
 
 static int
-nv50_pior_dp_pattern(struct nouveau_disp *disp, struct dcb_output *outp,
-		     int head, int pattern)
+nv50_pior_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
-	struct nouveau_i2c_port *port;
-	int ret = -EINVAL;
-
-	port = nv50_pior_dp_find(disp, outp);
-	if (port) {
-		if (port->func->pattern)
-			ret = port->func->pattern(port, pattern);
-		else
-			ret = 0;
-	}
-
-	return ret;
+	return 0;
 }
 
 static int
-nv50_pior_dp_lnk_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		     int head, int lane_nr, int link_bw, bool enh)
+nv50_pior_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 {
-	struct nouveau_i2c_port *port;
-	int ret = -EINVAL;
-
-	port = nv50_pior_dp_find(disp, outp);
+	struct nouveau_i2c_port *port = outp->base.edid;
 	if (port && port->func->lnk_ctl)
-		ret = port->func->lnk_ctl(port, lane_nr, link_bw, enh);
-
-	return ret;
+		return port->func->lnk_ctl(port, nr, bw, ef);
+	return port ? 0 : -ENODEV;
 }
 
 static int
-nv50_pior_dp_drv_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		     int head, int lane, int vsw, int pre)
+nv50_pior_dp_drv_ctl(struct nvkm_output_dp *outp, int ln, int vs, int pe, int pc)
 {
-	struct nouveau_i2c_port *port;
-	int ret = -EINVAL;
-
-	port = nv50_pior_dp_find(disp, outp);
-	if (port) {
-		if (port->func->drv_ctl)
-			ret = port->func->drv_ctl(port, lane, vsw, pre);
-		else
-			ret = 0;
-	}
-
-	return ret;
+	struct nouveau_i2c_port *port = outp->base.edid;
+	if (port && port->func->drv_ctl)
+		return port->func->drv_ctl(port, ln, vs, pe);
+	return port ? 0 : -ENODEV;
 }
 
-const struct nouveau_dp_func
-nv50_pior_dp_func = {
+static int
+nv50_pior_dp_ctor(struct nouveau_object *parent,
+		  struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *info, u32 index,
+		  struct nouveau_object **pobject)
+{
+	struct nouveau_i2c *i2c = nouveau_i2c(parent);
+	struct nvkm_output_dp *outp;
+	int ret;
+
+	ret = nvkm_output_dp_create(parent, engine, oclass, info, index, &outp);
+	*pobject = nv_object(outp);
+	if (ret)
+		return ret;
+
+	outp->base.edid = i2c->find_type(i2c, NV_I2C_TYPE_EXTAUX(
+					 outp->base.info.extdev));
+	return 0;
+}
+
+struct nvkm_output_dp_impl
+nv50_pior_dp_impl = {
+	.base.base.handle = DCB_OUTPUT_DP | 0x0010,
+	.base.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv50_pior_dp_ctor,
+		.dtor = _nvkm_output_dp_dtor,
+		.init = _nvkm_output_dp_init,
+		.fini = _nvkm_output_dp_fini,
+	},
 	.pattern = nv50_pior_dp_pattern,
+	.lnk_pwr = nv50_pior_dp_lnk_pwr,
 	.lnk_ctl = nv50_pior_dp_lnk_ctl,
 	.drv_ctl = nv50_pior_dp_drv_ctl,
 };
@@ -102,6 +141,7 @@
 /******************************************************************************
  * General PIOR handling
  *****************************************************************************/
+
 int
 nv50_pior_power(struct nv50_disp_priv *priv, int or, u32 data)
 {

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/priv.h b/drivers/gpu/drm/nouveau/core/engine/disp/priv.h
index cc3c7a4..26e9a42 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/priv.h
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/priv.h

@@ -1,10 +1,42 @@
 #ifndef __NVKM_DISP_PRIV_H__
 #define __NVKM_DISP_PRIV_H__
 
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/conn.h>
+
 #include <engine/disp.h>
 
 struct nouveau_disp_impl {
 	struct nouveau_oclass base;
+	struct nouveau_oclass **outp;
+	struct nouveau_oclass **conn;
 };
 
+#define nouveau_disp_create(p,e,c,h,i,x,d)                                     \
+	nouveau_disp_create_((p), (e), (c), (h), (i), (x),                     \
+			     sizeof(**d), (void **)d)
+#define nouveau_disp_destroy(d) ({                                             \
+	struct nouveau_disp *disp = (d);                                       \
+	_nouveau_disp_dtor(nv_object(disp));                                   \
+})
+#define nouveau_disp_init(d) ({                                                \
+	struct nouveau_disp *disp = (d);                                       \
+	_nouveau_disp_init(nv_object(disp));                                   \
+})
+#define nouveau_disp_fini(d,s) ({                                              \
+	struct nouveau_disp *disp = (d);                                       \
+	_nouveau_disp_fini(nv_object(disp), (s));                              \
+})
+
+int  nouveau_disp_create_(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, int heads,
+			  const char *, const char *, int, void **);
+void _nouveau_disp_dtor(struct nouveau_object *);
+int  _nouveau_disp_init(struct nouveau_object *);
+int  _nouveau_disp_fini(struct nouveau_object *, bool);
+
+extern struct nouveau_oclass *nvkm_output_oclass;
+extern struct nouveau_oclass *nvkm_connector_oclass;
+
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
index 526b752..e183277 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c

@@ -47,8 +47,12 @@
 nv50_sor_mthd(struct nouveau_object *object, u32 mthd, void *args, u32 size)
 {
 	struct nv50_disp_priv *priv = (void *)object->engine;
+	const u8  type = (mthd & NV50_DISP_SOR_MTHD_TYPE) >> 12;
 	const u8  head = (mthd & NV50_DISP_SOR_MTHD_HEAD) >> 3;
+	const u8  link = (mthd & NV50_DISP_SOR_MTHD_LINK) >> 2;
 	const u8    or = (mthd & NV50_DISP_SOR_MTHD_OR);
+	const u16 mask = (0x0100 << head) | (0x0040 << link) | (0x0001 << or);
+	struct nvkm_output *outp = NULL, *temp;
 	u32 data;
 	int ret = -EINVAL;
 
@@ -56,6 +60,13 @@
 		return -EINVAL;
 	data = *(u32 *)args;
 
+	list_for_each_entry(temp, &priv->base.outp, head) {
+		if ((temp->info.hasht & 0xff) == type &&
+		    (temp->info.hashm & mask) == mask) {
+			outp = temp;
+			break;
+		}
+	}
 
 	switch (mthd & ~0x3f) {
 	case NV50_DISP_SOR_PWR:
@@ -71,6 +82,23 @@
 		priv->sor.lvdsconf = data & NV50_DISP_SOR_LVDS_SCRIPT_ID;
 		ret = 0;
 		break;
+	case NV94_DISP_SOR_DP_PWR:
+		if (outp) {
+			struct nvkm_output_dp *outpdp = (void *)outp;
+			switch (data) {
+			case NV94_DISP_SOR_DP_PWR_STATE_OFF:
+				((struct nvkm_output_dp_impl *)nv_oclass(outp))
+					->lnk_pwr(outpdp, 0);
+				atomic_set(&outpdp->lt.done, 0);
+				break;
+			case NV94_DISP_SOR_DP_PWR_STATE_ON:
+				nvkm_output_dp_train(&outpdp->base, 0, true);
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+		break;
 	default:
 		BUG_ON(1);
 	}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c
index eea3ef5..05487cd 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c

@@ -29,19 +29,21 @@
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/dp.h>
 #include <subdev/bios/init.h>
+#include <subdev/timer.h>
 
 #include "nv50.h"
+#include "outpdp.h"
 
 static inline u32
-nv94_sor_soff(struct dcb_output *outp)
+nv94_sor_soff(struct nvkm_output_dp *outp)
 {
-	return (ffs(outp->or) - 1) * 0x800;
+	return (ffs(outp->base.info.or) - 1) * 0x800;
 }
 
 static inline u32
-nv94_sor_loff(struct dcb_output *outp)
+nv94_sor_loff(struct nvkm_output_dp *outp)
 {
-	return nv94_sor_soff(outp) + !(outp->sorconf.link & 1) * 0x80;
+	return nv94_sor_soff(outp) + !(outp->base.info.sorconf.link & 1) * 0x80;
 }
 
 static inline u32
@@ -55,77 +57,96 @@
 }
 
 static int
-nv94_sor_dp_pattern(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int pattern)
+nv94_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
-	struct nv50_disp_priv *priv = (void *)disp;
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
 	const u32 loff = nv94_sor_loff(outp);
 	nv_mask(priv, 0x61c10c + loff, 0x0f000000, pattern << 24);
 	return 0;
 }
 
-static int
-nv94_sor_dp_lnk_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int link_nr, int link_bw, bool enh_frame)
+int
+nv94_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
-	struct nv50_disp_priv *priv = (void *)disp;
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
 	const u32 soff = nv94_sor_soff(outp);
 	const u32 loff = nv94_sor_loff(outp);
-	u32 dpctrl = 0x00000000;
-	u32 clksor = 0x00000000;
-	u32 lane = 0;
-	int i;
+	u32 mask = 0, i;
 
-	dpctrl |= ((1 << link_nr) - 1) << 16;
-	if (enh_frame)
-		dpctrl |= 0x00004000;
-	if (link_bw > 0x06)
-		clksor |= 0x00040000;
+	for (i = 0; i < nr; i++)
+		mask |= 1 << (nv94_sor_dp_lane_map(priv, i) >> 3);
 
-	for (i = 0; i < link_nr; i++)
-		lane |= 1 << (nv94_sor_dp_lane_map(priv, i) >> 3);
-
-	nv_mask(priv, 0x614300 + soff, 0x000c0000, clksor);
-	nv_mask(priv, 0x61c10c + loff, 0x001f4000, dpctrl);
-	nv_mask(priv, 0x61c130 + loff, 0x0000000f, lane);
+	nv_mask(priv, 0x61c130 + loff, 0x0000000f, mask);
+	nv_mask(priv, 0x61c034 + soff, 0x80000000, 0x80000000);
+	nv_wait(priv, 0x61c034 + soff, 0x80000000, 0x00000000);
 	return 0;
 }
 
 static int
-nv94_sor_dp_drv_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int lane, int swing, int preem)
+nv94_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 {
-	struct nouveau_bios *bios = nouveau_bios(disp);
-	struct nv50_disp_priv *priv = (void *)disp;
-	const u32 shift = nv94_sor_dp_lane_map(priv, lane);
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
+	const u32 soff = nv94_sor_soff(outp);
+	const u32 loff = nv94_sor_loff(outp);
+	u32 dpctrl = 0x00000000;
+	u32 clksor = 0x00000000;
+
+	dpctrl |= ((1 << nr) - 1) << 16;
+	if (ef)
+		dpctrl |= 0x00004000;
+	if (bw > 0x06)
+		clksor |= 0x00040000;
+
+	nv_mask(priv, 0x614300 + soff, 0x000c0000, clksor);
+	nv_mask(priv, 0x61c10c + loff, 0x001f4000, dpctrl);
+	return 0;
+}
+
+static int
+nv94_sor_dp_drv_ctl(struct nvkm_output_dp *outp, int ln, int vs, int pe, int pc)
+{
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 shift = nv94_sor_dp_lane_map(priv, ln);
 	const u32 loff = nv94_sor_loff(outp);
 	u32 addr, data[3];
 	u8  ver, hdr, cnt, len;
 	struct nvbios_dpout info;
 	struct nvbios_dpcfg ocfg;
 
-	addr = nvbios_dpout_match(bios, outp->hasht, outp->hashm,
+	addr = nvbios_dpout_match(bios, outp->base.info.hasht,
+					outp->base.info.hashm,
 				 &ver, &hdr, &cnt, &len, &info);
 	if (!addr)
 		return -ENODEV;
 
-	addr = nvbios_dpcfg_match(bios, addr, 0, swing, preem,
+	addr = nvbios_dpcfg_match(bios, addr, 0, vs, pe,
 				 &ver, &hdr, &cnt, &len, &ocfg);
 	if (!addr)
 		return -EINVAL;
 
 	data[0] = nv_rd32(priv, 0x61c118 + loff) & ~(0x000000ff << shift);
 	data[1] = nv_rd32(priv, 0x61c120 + loff) & ~(0x000000ff << shift);
-	data[2] = nv_rd32(priv, 0x61c130 + loff) & ~(0x0000ff00);
-	nv_wr32(priv, 0x61c118 + loff, data[0] | (ocfg.drv << shift));
-	nv_wr32(priv, 0x61c120 + loff, data[1] | (ocfg.pre << shift));
-	nv_wr32(priv, 0x61c130 + loff, data[2] | (ocfg.unk << 8));
+	data[2] = nv_rd32(priv, 0x61c130 + loff);
+	if ((data[2] & 0x0000ff00) < (ocfg.tx_pu << 8) || ln == 0)
+		data[2] = (data[2] & ~0x0000ff00) | (ocfg.tx_pu << 8);
+	nv_wr32(priv, 0x61c118 + loff, data[0] | (ocfg.dc << shift));
+	nv_wr32(priv, 0x61c120 + loff, data[1] | (ocfg.pe << shift));
+	nv_wr32(priv, 0x61c130 + loff, data[2] | (ocfg.tx_pu << 8));
 	return 0;
 }
 
-const struct nouveau_dp_func
-nv94_sor_dp_func = {
+struct nvkm_output_dp_impl
+nv94_sor_dp_impl = {
+	.base.base.handle = DCB_OUTPUT_DP,
+	.base.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nvkm_output_dp_ctor,
+		.dtor = _nvkm_output_dp_dtor,
+		.init = _nvkm_output_dp_init,
+		.fini = _nvkm_output_dp_fini,
+	},
 	.pattern = nv94_sor_dp_pattern,
+	.lnk_pwr = nv94_sor_dp_lnk_pwr,
 	.lnk_ctl = nv94_sor_dp_lnk_ctl,
 	.drv_ctl = nv94_sor_dp_drv_ctl,
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c
index d2df572..97f0e9c 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c

@@ -29,19 +29,20 @@
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/dp.h>
 #include <subdev/bios/init.h>
+#include <subdev/timer.h>
 
 #include "nv50.h"
 
 static inline u32
-nvd0_sor_soff(struct dcb_output *outp)
+nvd0_sor_soff(struct nvkm_output_dp *outp)
 {
-	return (ffs(outp->or) - 1) * 0x800;
+	return (ffs(outp->base.info.or) - 1) * 0x800;
 }
 
 static inline u32
-nvd0_sor_loff(struct dcb_output *outp)
+nvd0_sor_loff(struct nvkm_output_dp *outp)
 {
-	return nvd0_sor_soff(outp) + !(outp->sorconf.link & 1) * 0x80;
+	return nvd0_sor_soff(outp) + !(outp->base.info.sorconf.link & 1) * 0x80;
 }
 
 static inline u32
@@ -52,77 +53,80 @@
 }
 
 static int
-nvd0_sor_dp_pattern(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int pattern)
+nvd0_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
-	struct nv50_disp_priv *priv = (void *)disp;
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
 	const u32 loff = nvd0_sor_loff(outp);
 	nv_mask(priv, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
 	return 0;
 }
 
 static int
-nvd0_sor_dp_lnk_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int link_nr, int link_bw, bool enh_frame)
+nvd0_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 {
-	struct nv50_disp_priv *priv = (void *)disp;
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
 	const u32 soff = nvd0_sor_soff(outp);
 	const u32 loff = nvd0_sor_loff(outp);
 	u32 dpctrl = 0x00000000;
 	u32 clksor = 0x00000000;
-	u32 lane = 0;
-	int i;
 
-	clksor |= link_bw << 18;
-	dpctrl |= ((1 << link_nr) - 1) << 16;
-	if (enh_frame)
+	clksor |= bw << 18;
+	dpctrl |= ((1 << nr) - 1) << 16;
+	if (ef)
 		dpctrl |= 0x00004000;
 
-	for (i = 0; i < link_nr; i++)
-		lane |= 1 << (nvd0_sor_dp_lane_map(priv, i) >> 3);
-
 	nv_mask(priv, 0x612300 + soff, 0x007c0000, clksor);
 	nv_mask(priv, 0x61c10c + loff, 0x001f4000, dpctrl);
-	nv_mask(priv, 0x61c130 + loff, 0x0000000f, lane);
 	return 0;
 }
 
 static int
-nvd0_sor_dp_drv_ctl(struct nouveau_disp *disp, struct dcb_output *outp,
-		    int head, int lane, int swing, int preem)
+nvd0_sor_dp_drv_ctl(struct nvkm_output_dp *outp, int ln, int vs, int pe, int pc)
 {
-	struct nouveau_bios *bios = nouveau_bios(disp);
-	struct nv50_disp_priv *priv = (void *)disp;
-	const u32 shift = nvd0_sor_dp_lane_map(priv, lane);
+	struct nv50_disp_priv *priv = (void *)nouveau_disp(outp);
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 shift = nvd0_sor_dp_lane_map(priv, ln);
 	const u32 loff = nvd0_sor_loff(outp);
-	u32 addr, data[3];
+	u32 addr, data[4];
 	u8  ver, hdr, cnt, len;
 	struct nvbios_dpout info;
 	struct nvbios_dpcfg ocfg;
 
-	addr = nvbios_dpout_match(bios, outp->hasht, outp->hashm,
+	addr = nvbios_dpout_match(bios, outp->base.info.hasht,
+					outp->base.info.hashm,
 				 &ver, &hdr, &cnt, &len, &info);
 	if (!addr)
 		return -ENODEV;
 
-	addr = nvbios_dpcfg_match(bios, addr, 0, swing, preem,
+	addr = nvbios_dpcfg_match(bios, addr, pc, vs, pe,
 				 &ver, &hdr, &cnt, &len, &ocfg);
 	if (!addr)
 		return -EINVAL;
 
 	data[0] = nv_rd32(priv, 0x61c118 + loff) & ~(0x000000ff << shift);
 	data[1] = nv_rd32(priv, 0x61c120 + loff) & ~(0x000000ff << shift);
-	data[2] = nv_rd32(priv, 0x61c130 + loff) & ~(0x0000ff00);
-	nv_wr32(priv, 0x61c118 + loff, data[0] | (ocfg.drv << shift));
-	nv_wr32(priv, 0x61c120 + loff, data[1] | (ocfg.pre << shift));
-	nv_wr32(priv, 0x61c130 + loff, data[2] | (ocfg.unk << 8));
-	nv_mask(priv, 0x61c13c + loff, 0x00000000, 0x00000000);
+	data[2] = nv_rd32(priv, 0x61c130 + loff);
+	if ((data[2] & 0x0000ff00) < (ocfg.tx_pu << 8) || ln == 0)
+		data[2] = (data[2] & ~0x0000ff00) | (ocfg.tx_pu << 8);
+	nv_wr32(priv, 0x61c118 + loff, data[0] | (ocfg.dc << shift));
+	nv_wr32(priv, 0x61c120 + loff, data[1] | (ocfg.pe << shift));
+	nv_wr32(priv, 0x61c130 + loff, data[2] | (ocfg.tx_pu << 8));
+	data[3] = nv_rd32(priv, 0x61c13c + loff) & ~(0x000000ff << shift);
+	nv_wr32(priv, 0x61c13c + loff, data[3] | (ocfg.pc << shift));
 	return 0;
 }
 
-const struct nouveau_dp_func
-nvd0_sor_dp_func = {
+struct nvkm_output_dp_impl
+nvd0_sor_dp_impl = {
+	.base.base.handle = DCB_OUTPUT_DP,
+	.base.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nvkm_output_dp_ctor,
+		.dtor = _nvkm_output_dp_dtor,
+		.init = _nvkm_output_dp_init,
+		.fini = _nvkm_output_dp_fini,
+	},
 	.pattern = nvd0_sor_dp_pattern,
+	.lnk_pwr = nv94_sor_dp_lnk_pwr,
 	.lnk_ctl = nvd0_sor_dp_lnk_ctl,
 	.drv_ctl = nvd0_sor_dp_drv_ctl,
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
index 6f9041c..56ed3d7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c

@@ -91,7 +91,7 @@
 	if (!chan->user)
 		return -EFAULT;
 
-	nouveau_event_trigger(priv->cevent, 0);
+	nouveau_event_trigger(priv->cevent, 1, 0);
 
 	chan->size = size;
 	return 0;
@@ -194,11 +194,11 @@
 	if (!priv->channel)
 		return -ENOMEM;
 
-	ret = nouveau_event_create(1, &priv->cevent);
+	ret = nouveau_event_create(1, 1, &priv->cevent);
 	if (ret)
 		return ret;
 
-	ret = nouveau_event_create(1, &priv->uevent);
+	ret = nouveau_event_create(1, 1, &priv->uevent);
 	if (ret)
 		return ret;
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c
new file mode 100644
index 0000000..327456e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c

@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nve0.h"
+
+struct nouveau_oclass *
+gk20a_fifo_oclass = &(struct nve0_fifo_impl) {
+	.base.handle = NV_ENGINE(FIFO, 0xea),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nve0_fifo_ctor,
+		.dtor = nve0_fifo_dtor,
+		.init = nve0_fifo_init,
+		.fini = nve0_fifo_fini,
+	},
+	.channels = 128,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c
index 54f26cc..c61b16a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c

@@ -539,7 +539,7 @@
 			}
 
 			if (status & 0x40000000) {
-				nouveau_event_trigger(priv->base.uevent, 0);
+				nouveau_event_trigger(priv->base.uevent, 1, 0);
 				nv_wr32(priv, 0x002100, 0x40000000);
 				status &= ~0x40000000;
 			}

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
index fe0f41e..6e5ac16 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c

@@ -389,14 +389,14 @@
  ******************************************************************************/
 
 static void
-nv84_fifo_uevent_enable(struct nouveau_event *event, int index)
+nv84_fifo_uevent_enable(struct nouveau_event *event, int type, int index)
 {
 	struct nv84_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x40000000, 0x40000000);
 }
 
 static void
-nv84_fifo_uevent_disable(struct nouveau_event *event, int index)
+nv84_fifo_uevent_disable(struct nouveau_event *event, int type, int index)
 {
 	struct nv84_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x40000000, 0x00000000);

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c
index fa1e719..ae4a4dc 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c

@@ -730,7 +730,7 @@
 	for (unkn = 0; unkn < 8; unkn++) {
 		u32 ints = (intr >> (unkn * 0x04)) & inte;
 		if (ints & 0x1) {
-			nouveau_event_trigger(priv->base.uevent, 0);
+			nouveau_event_trigger(priv->base.uevent, 1, 0);
 			ints &= ~1;
 		}
 		if (ints) {
@@ -827,14 +827,14 @@
 }
 
 static void
-nvc0_fifo_uevent_enable(struct nouveau_event *event, int index)
+nvc0_fifo_uevent_enable(struct nouveau_event *event, int type, int index)
 {
 	struct nvc0_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x80000000, 0x80000000);
 }
 
 static void
-nvc0_fifo_uevent_disable(struct nouveau_event *event, int index)
+nvc0_fifo_uevent_disable(struct nouveau_event *event, int type, int index)
 {
 	struct nvc0_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x80000000, 0x00000000);

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c
index a9a1a9c..298063e 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c

@@ -859,7 +859,7 @@
 static void
 nve0_fifo_intr_engine(struct nve0_fifo_priv *priv)
 {
-	nouveau_event_trigger(priv->base.uevent, 0);
+	nouveau_event_trigger(priv->base.uevent, 1, 0);
 }
 
 static void
@@ -952,14 +952,14 @@
 }
 
 static void
-nve0_fifo_uevent_enable(struct nouveau_event *event, int index)
+nve0_fifo_uevent_enable(struct nouveau_event *event, int type, int index)
 {
 	struct nve0_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x80000000, 0x80000000);
 }
 
 static void
-nve0_fifo_uevent_disable(struct nouveau_event *event, int index)
+nve0_fifo_uevent_disable(struct nouveau_event *event, int type, int index)
 {
 	struct nve0_fifo_priv *priv = event->priv;
 	nv_mask(priv, 0x002140, 0x80000000, 0x00000000);

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h
index 014344e..e96b32b 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h

@@ -8,6 +8,7 @@
 		    struct nouveau_object **);
 void nve0_fifo_dtor(struct nouveau_object *);
 int  nve0_fifo_init(struct nouveau_object *);
+int  nve0_fifo_fini(struct nouveau_object *, bool);
 
 struct nve0_fifo_impl {
 	struct nouveau_oclass base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c
new file mode 100644
index 0000000..224ee02
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c

@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ctxnvc0.h"
+
+static const struct nvc0_graph_pack
+gk20a_grctx_pack_mthd[] = {
+	{ nve4_grctx_init_a097_0, 0xa297 },
+	{ nvc0_grctx_init_902d_0, 0x902d },
+	{}
+};
+
+struct nouveau_oclass *
+gk20a_grctx_oclass = &(struct nvc0_grctx_oclass) {
+	.base.handle = NV_ENGCTX(GR, 0xea),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_graph_context_ctor,
+		.dtor = nvc0_graph_context_dtor,
+		.init = _nouveau_graph_context_init,
+		.fini = _nouveau_graph_context_fini,
+		.rd32 = _nouveau_graph_context_rd32,
+		.wr32 = _nouveau_graph_context_wr32,
+	},
+	.main  = nve4_grctx_generate_main,
+	.mods  = nve4_grctx_generate_mods,
+	.unkn  = nve4_grctx_generate_unkn,
+	.hub   = nve4_grctx_pack_hub,
+	.gpc   = nve4_grctx_pack_gpc,
+	.zcull = nvc0_grctx_pack_zcull,
+	.tpc   = nve4_grctx_pack_tpc,
+	.ppc   = nve4_grctx_pack_ppc,
+	.icmd  = nve4_grctx_pack_icmd,
+	.mthd  = gk20a_grctx_pack_mthd,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv108.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv108.c
index 48351b4..8de4a42 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv108.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv108.c

@@ -545,10 +545,12 @@
 	mmio_list(0x408010, 0x80000000,  0, 0);
 	mmio_list(0x419004, 0x00000000,  8, 1);
 	mmio_list(0x419008, 0x00000000,  0, 0);
+	mmio_list(0x4064cc, 0x80000000,  0, 0);
 	mmio_list(0x408004, 0x00000000,  8, 0);
 	mmio_list(0x408008, 0x80000030,  0, 0);
 	mmio_list(0x418808, 0x00000000,  8, 0);
 	mmio_list(0x41880c, 0x80000030,  0, 0);
+	mmio_list(0x4064c8, 0x00c20200,  0, 0);
 	mmio_list(0x418810, 0x80000000, 12, 2);
 	mmio_list(0x419848, 0x10000000, 12, 2);
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h
index 9c815d1..8da8b62 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h

@@ -69,7 +69,9 @@
 extern struct nouveau_oclass *nvd9_grctx_oclass;
 
 extern struct nouveau_oclass *nve4_grctx_oclass;
+extern struct nouveau_oclass *gk20a_grctx_oclass;
 void nve4_grctx_generate_main(struct nvc0_graph_priv *, struct nvc0_grctx *);
+void nve4_grctx_generate_mods(struct nvc0_graph_priv *, struct nvc0_grctx *);
 void nve4_grctx_generate_unkn(struct nvc0_graph_priv *);
 void nve4_grctx_generate_r418bb8(struct nvc0_graph_priv *);
 
@@ -151,6 +153,13 @@
 
 extern const struct nvc0_graph_init nve4_grctx_init_pes_0[];
 
+extern const struct nvc0_graph_pack nve4_grctx_pack_hub[];
+extern const struct nvc0_graph_pack nve4_grctx_pack_gpc[];
+extern const struct nvc0_graph_pack nve4_grctx_pack_tpc[];
+extern const struct nvc0_graph_pack nve4_grctx_pack_ppc[];
+extern const struct nvc0_graph_pack nve4_grctx_pack_icmd[];
+extern const struct nvc0_graph_init nve4_grctx_init_a097_0[];
+
 extern const struct nvc0_graph_pack nvf0_grctx_pack_mthd[];
 
 extern const struct nvc0_graph_init nvf0_grctx_init_pri_0[];

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c
index 49a14b1..c5b2492 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c

@@ -272,13 +272,13 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_grctx_pack_icmd[] = {
 	{ nve4_grctx_init_icmd_0 },
 	{}
 };
 
-static const struct nvc0_graph_init
+const struct nvc0_graph_init
 nve4_grctx_init_a097_0[] = {
 	{ 0x000800,   8, 0x40, 0x00000000 },
 	{ 0x000804,   8, 0x40, 0x00000000 },
@@ -697,7 +697,7 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_grctx_pack_hub[] = {
 	{ nvc0_grctx_init_main_0 },
 	{ nve4_grctx_init_fe_0 },
@@ -737,7 +737,7 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_grctx_pack_gpc[] = {
 	{ nvc0_grctx_init_gpc_unk_0 },
 	{ nvd9_grctx_init_prop_0 },
@@ -802,7 +802,7 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_grctx_pack_tpc[] = {
 	{ nvd7_grctx_init_pe_0 },
 	{ nve4_grctx_init_tex_0 },
@@ -826,7 +826,7 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_grctx_pack_ppc[] = {
 	{ nve4_grctx_init_pes_0 },
 	{ nve4_grctx_init_cbm_0 },
@@ -838,7 +838,7 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
-static void
+void
 nve4_grctx_generate_mods(struct nvc0_graph_priv *priv, struct nvc0_grctx *info)
 {
 	u32 magic[GPC_MAX][2];

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvf0.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvf0.c
index 0fab95e..dec03f0 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvf0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvf0.c

@@ -842,7 +842,7 @@
 		u16 magic3 = 0x0648;
 		magic[gpc][0]  = 0x10000000 | (magic0 << 16) | offset;
 		magic[gpc][1]  = 0x00000000 | (magic1 << 16);
-		offset += 0x0324 * (priv->tpc_nr[gpc] - 1);;
+		offset += 0x0324 * (priv->tpc_nr[gpc] - 1);
 		magic[gpc][2]  = 0x10000000 | (magic2 << 16) | offset;
 		magic[gpc][3]  = 0x00000000 | (magic3 << 16);
 		offset += 0x0324;

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c
new file mode 100644
index 0000000..83048a5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0.h"
+#include "ctxnvc0.h"
+
+static struct nouveau_oclass
+gk20a_graph_sclass[] = {
+	{ 0x902d, &nouveau_object_ofuncs },
+	{ 0xa040, &nouveau_object_ofuncs },
+	{ 0xa297, &nouveau_object_ofuncs },
+	{ 0xa0c0, &nouveau_object_ofuncs },
+	{}
+};
+
+struct nouveau_oclass *
+gk20a_graph_oclass = &(struct nvc0_graph_oclass) {
+	.base.handle = NV_ENGINE(GR, 0xea),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_graph_ctor,
+		.dtor = nvc0_graph_dtor,
+		.init = nve4_graph_init,
+		.fini = nve4_graph_fini,
+	},
+	.cclass = &gk20a_grctx_oclass,
+	.sclass = gk20a_graph_sclass,
+	.mmio = nve4_graph_pack_mmio,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
index 2c7809e..1a2d564 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c

@@ -901,7 +901,7 @@
 		nv_engine(priv)->sclass = nvaf_graph_sclass;
 		break;
 
-	};
+	}
 
 	/* unfortunate hw bug workaround... */
 	if (nv_device(priv)->chipset != 0x50 &&

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
index f3c7329..bf7bdb1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c

@@ -894,6 +894,10 @@
 			nv_wr32(priv, fuc_base + 0x0188, i >> 6);
 		nv_wr32(priv, fuc_base + 0x0184, code->data[i]);
 	}
+
+	/* code must be padded to 0x40 words */
+	for (; i & 0x3f; i++)
+		nv_wr32(priv, fuc_base + 0x0184, 0);
 }
 
 static void
@@ -1259,10 +1263,14 @@
 	struct nvc0_graph_oclass *oclass = (void *)bclass;
 	struct nouveau_device *device = nv_device(parent);
 	struct nvc0_graph_priv *priv;
+	bool use_ext_fw, enable;
 	int ret, i;
 
-	ret = nouveau_graph_create(parent, engine, bclass,
-				   (oclass->fecs.ucode != NULL), &priv);
+	use_ext_fw = nouveau_boolopt(device->cfgopt, "NvGrUseFW",
+				     oclass->fecs.ucode == NULL);
+	enable = use_ext_fw || oclass->fecs.ucode != NULL;
+
+	ret = nouveau_graph_create(parent, engine, bclass, enable, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -1272,7 +1280,7 @@
 
 	priv->base.units = nvc0_graph_units;
 
-	if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) {
+	if (use_ext_fw) {
 		nv_info(priv, "using external firmware\n");
 		if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) ||
 		    nvc0_graph_ctor_fw(priv, "fuc409d", &priv->fuc409d) ||

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
index 90d4461..75203a9 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h

@@ -116,6 +116,7 @@
 		     struct nouveau_object **);
 void nvc0_graph_dtor(struct nouveau_object *);
 int  nvc0_graph_init(struct nouveau_object *);
+int  nve4_graph_fini(struct nouveau_object *, bool);
 int  nve4_graph_init(struct nouveau_object *);
 
 extern struct nouveau_oclass nvc0_graph_sclass[];
@@ -217,6 +218,7 @@
 extern const struct nvc0_graph_init nve4_graph_init_tpccs_0[];
 extern const struct nvc0_graph_init nve4_graph_init_pe_0[];
 extern const struct nvc0_graph_init nve4_graph_init_be_0[];
+extern const struct nvc0_graph_pack nve4_graph_pack_mmio[];
 
 extern const struct nvc0_graph_init nvf0_graph_init_fe_0[];
 extern const struct nvc0_graph_init nvf0_graph_init_sked_0[];

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c
index f7c0112..51e0c07 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c

@@ -151,7 +151,7 @@
 	{}
 };
 
-static const struct nvc0_graph_pack
+const struct nvc0_graph_pack
 nve4_graph_pack_mmio[] = {
 	{ nve4_graph_init_main_0 },
 	{ nvc0_graph_init_fe_0 },
@@ -189,7 +189,7 @@
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
+int
 nve4_graph_fini(struct nouveau_object *object, bool suspend)
 {
 	struct nvc0_graph_priv *priv = (void *)object;

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
index 5ce686e..f3b4d9d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c

@@ -124,7 +124,7 @@
  ******************************************************************************/
 
 static int
-nv50_software_vblsem_release(void *data, int head)
+nv50_software_vblsem_release(void *data, u32 type, int head)
 {
 	struct nv50_software_chan *chan = data;
 	struct nv50_software_priv *priv = (void *)nv_object(chan)->engine;
@@ -183,7 +183,7 @@
 		return -ENOMEM;
 
 	for (i = 0; i < chan->vblank.nr_event; i++) {
-		ret = nouveau_event_new(pdisp->vblank, i, pclass->vblank,
+		ret = nouveau_event_new(pdisp->vblank, 1, i, pclass->vblank,
 					chan, &chan->vblank.event[i]);
 		if (ret)
 			return ret;

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv50.h b/drivers/gpu/drm/nouveau/core/engine/software/nv50.h
index 2de370c..bb49a7a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv50.h
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv50.h

@@ -19,7 +19,7 @@
 
 struct nv50_software_cclass {
 	struct nouveau_oclass base;
-	int (*vblank)(void *, int);
+	int (*vblank)(void *, u32, int);
 };
 
 struct nv50_software_chan {

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
index f9430c1..135c20f 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c

@@ -104,7 +104,7 @@
  ******************************************************************************/
 
 static int
-nvc0_software_vblsem_release(void *data, int head)
+nvc0_software_vblsem_release(void *data, u32 type, int head)
 {
 	struct nv50_software_chan *chan = data;
 	struct nv50_software_priv *priv = (void *)nv_object(chan)->engine;

diff --git a/drivers/gpu/drm/nouveau/core/include/core/class.h b/drivers/gpu/drm/nouveau/core/include/core/class.h
index 9c0cd73..e0c812b 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/class.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/class.h

@@ -295,6 +295,10 @@
 #define NV84_DISP_SOR_HDMI_PWR_REKEY                                 0x0000007f
 #define NV50_DISP_SOR_LVDS_SCRIPT                                    0x00013000
 #define NV50_DISP_SOR_LVDS_SCRIPT_ID                                 0x0000ffff
+#define NV94_DISP_SOR_DP_PWR                                         0x00016000
+#define NV94_DISP_SOR_DP_PWR_STATE                                   0x00000001
+#define NV94_DISP_SOR_DP_PWR_STATE_OFF                               0x00000000
+#define NV94_DISP_SOR_DP_PWR_STATE_ON                                0x00000001
 
 #define NV50_DISP_DAC_MTHD                                           0x00020000
 #define NV50_DISP_DAC_MTHD_TYPE                                      0x0000f000

diff --git a/drivers/gpu/drm/nouveau/core/include/core/event.h b/drivers/gpu/drm/nouveau/core/include/core/event.h
index 5d539eb..ba3f1a7 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/event.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/event.h

@@ -12,32 +12,33 @@
 	struct nouveau_event *event;
 	struct list_head head;
 	unsigned long flags;
+	u32 types;
 	int index;
-	int (*func)(void *, int);
+	int (*func)(void *, u32, int);
 	void *priv;
 };
 
 struct nouveau_event {
-	spinlock_t list_lock;
-	spinlock_t refs_lock;
-
 	void *priv;
-	void (*enable)(struct nouveau_event *, int index);
-	void (*disable)(struct nouveau_event *, int index);
+	int (*check)(struct nouveau_event *, u32 type, int index);
+	void (*enable)(struct nouveau_event *, int type, int index);
+	void (*disable)(struct nouveau_event *, int type, int index);
 
+	int types_nr;
 	int index_nr;
-	struct {
-		struct list_head list;
-		int refs;
-	} index[];
+
+	spinlock_t list_lock;
+	struct list_head *list;
+	spinlock_t refs_lock;
+	int refs[];
 };
 
-int  nouveau_event_create(int index_nr, struct nouveau_event **);
+int  nouveau_event_create(int types_nr, int index_nr, struct nouveau_event **);
 void nouveau_event_destroy(struct nouveau_event **);
-void nouveau_event_trigger(struct nouveau_event *, int index);
+void nouveau_event_trigger(struct nouveau_event *, u32 types, int index);
 
-int  nouveau_event_new(struct nouveau_event *, int index,
-		       int (*func)(void *, int), void *,
+int  nouveau_event_new(struct nouveau_event *, u32 types, int index,
+		       int (*func)(void *, u32, int), void *,
 		       struct nouveau_eventh **);
 void nouveau_event_ref(struct nouveau_eventh *, struct nouveau_eventh **);
 void nouveau_event_get(struct nouveau_eventh *);

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/disp.h b/drivers/gpu/drm/nouveau/core/include/engine/disp.h
index fd0c688..fde8428 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/disp.h

@@ -6,8 +6,19 @@
 #include <core/device.h>
 #include <core/event.h>
 
+enum nvkm_hpd_event {
+	NVKM_HPD_PLUG = 1,
+	NVKM_HPD_UNPLUG = 2,
+	NVKM_HPD_IRQ = 4,
+	NVKM_HPD = (NVKM_HPD_PLUG | NVKM_HPD_UNPLUG | NVKM_HPD_IRQ)
+};
+
 struct nouveau_disp {
 	struct nouveau_engine base;
+
+	struct list_head outp;
+	struct nouveau_event *hpd;
+
 	struct nouveau_event *vblank;
 };
 
@@ -17,25 +28,6 @@
 	return (void *)nv_device(obj)->subdev[NVDEV_ENGINE_DISP];
 }
 
-#define nouveau_disp_create(p,e,c,h,i,x,d)                                     \
-	nouveau_disp_create_((p), (e), (c), (h), (i), (x),                     \
-			     sizeof(**d), (void **)d)
-#define nouveau_disp_destroy(d) ({                                             \
-	struct nouveau_disp *disp = (d);                                       \
-	_nouveau_disp_dtor(nv_object(disp));                                   \
-})
-#define nouveau_disp_init(d)                                                   \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_disp_fini(d,s)                                                 \
-	nouveau_engine_fini(&(d)->base, (s))
-
-int  nouveau_disp_create_(struct nouveau_object *, struct nouveau_object *,
-			  struct nouveau_oclass *, int heads,
-			  const char *, const char *, int, void **);
-void _nouveau_disp_dtor(struct nouveau_object *);
-#define _nouveau_disp_init _nouveau_engine_init
-#define _nouveau_disp_fini _nouveau_engine_fini
-
 extern struct nouveau_oclass *nv04_disp_oclass;
 extern struct nouveau_oclass *nv50_disp_oclass;
 extern struct nouveau_oclass *nv84_disp_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
index 26b6b2b..b639eb2 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h

@@ -109,6 +109,7 @@
 extern struct nouveau_oclass *nv84_fifo_oclass;
 extern struct nouveau_oclass *nvc0_fifo_oclass;
 extern struct nouveau_oclass *nve0_fifo_oclass;
+extern struct nouveau_oclass *gk20a_fifo_oclass;
 extern struct nouveau_oclass *nv108_fifo_oclass;
 
 void nv04_fifo_intr(struct nouveau_subdev *);

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/graph.h b/drivers/gpu/drm/nouveau/core/include/engine/graph.h
index 871edfd..8c1d477 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/graph.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/graph.h

@@ -68,6 +68,7 @@
 extern struct nouveau_oclass *nvd7_graph_oclass;
 extern struct nouveau_oclass *nvd9_graph_oclass;
 extern struct nouveau_oclass *nve4_graph_oclass;
+extern struct nouveau_oclass *gk20a_graph_oclass;
 extern struct nouveau_oclass *nvf0_graph_oclass;
 extern struct nouveau_oclass *nv108_graph_oclass;
 extern struct nouveau_oclass *gm107_graph_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/conn.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/conn.h
index a32feb3..f3930c2 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/conn.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/conn.h

@@ -22,7 +22,25 @@
 	DCB_CONNECTOR_NONE = 0xff
 };
 
-u16 dcb_conntab(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 dcb_conn(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len);
+struct nvbios_connT {
+};
+
+u32 nvbios_connTe(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_connTp(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		  struct nvbios_connT *info);
+
+struct nvbios_connE {
+	u8 type;
+	u8 location;
+	u8 hpd;
+	u8 dp;
+	u8 di;
+	u8 sr;
+	u8 lcdid;
+};
+
+u32 nvbios_connEe(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *hdr);
+u32 nvbios_connEp(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *hdr,
+		  struct nvbios_connE *info);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h
index 6e54218..728206e 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h

@@ -17,9 +17,10 @@
 		       struct nvbios_dpout *);
 
 struct nvbios_dpcfg {
-	u8 drv;
-	u8 pre;
-	u8 unk;
+	u8 pc;
+	u8 dc;
+	u8 pe;
+	u8 tx_pu;
 };
 
 u16
@@ -27,7 +28,7 @@
 		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		   struct nvbios_dpcfg *);
 u16
-nvbios_dpcfg_match(struct nouveau_bios *, u16 outp, u8 un, u8 vs, u8 pe,
+nvbios_dpcfg_match(struct nouveau_bios *, u16 outp, u8 pc, u8 vs, u8 pe,
 		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		   struct nvbios_dpcfg *);
 

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h
index 8f4ced7..c01e29c 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h

@@ -77,6 +77,8 @@
 	int tstate; /* thermal adjustment (max-) */
 	int dstate; /* display adjustment (min+) */
 
+	bool allow_reclock;
+
 	int  (*read)(struct nouveau_clock *, enum nv_clk_src);
 	int  (*calc)(struct nouveau_clock *, struct nouveau_cstate *);
 	int  (*prog)(struct nouveau_clock *);
@@ -106,8 +108,8 @@
 	int mdiv;
 };
 
-#define nouveau_clock_create(p,e,o,i,d)                                        \
-	nouveau_clock_create_((p), (e), (o), (i), sizeof(**d), (void **)d)
+#define nouveau_clock_create(p,e,o,i,r,d)                                      \
+	nouveau_clock_create_((p), (e), (o), (i), (r), sizeof(**d), (void **)d)
 #define nouveau_clock_destroy(p) ({                                            \
 	struct nouveau_clock *clk = (p);                                       \
 	_nouveau_clock_dtor(nv_object(clk));                                   \
@@ -121,7 +123,7 @@
 
 int  nouveau_clock_create_(struct nouveau_object *, struct nouveau_object *,
 			   struct nouveau_oclass *,
-			   struct nouveau_clocks *, int, void **);
+			   struct nouveau_clocks *, bool, int, void **);
 void _nouveau_clock_dtor(struct nouveau_object *);
 int _nouveau_clock_init(struct nouveau_object *);
 #define _nouveau_clock_fini _nouveau_subdev_fini

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
index 58c7ccd..871e739 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h

@@ -105,6 +105,7 @@
 extern struct nouveau_oclass *nvaf_fb_oclass;
 extern struct nouveau_oclass *nvc0_fb_oclass;
 extern struct nouveau_oclass *nve0_fb_oclass;
+extern struct nouveau_oclass *gk20a_fb_oclass;
 extern struct nouveau_oclass *gm107_fb_oclass;
 
 #include <subdev/bios/ramcfg.h>

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
index c85b9f1..612d82a 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h

@@ -8,17 +8,18 @@
 #include <subdev/bios.h>
 #include <subdev/bios/gpio.h>
 
+enum nvkm_gpio_event {
+	NVKM_GPIO_HI = 1,
+	NVKM_GPIO_LO = 2,
+	NVKM_GPIO_TOGGLED = (NVKM_GPIO_HI | NVKM_GPIO_LO),
+};
+
 struct nouveau_gpio {
 	struct nouveau_subdev base;
 
 	struct nouveau_event *events;
 
-	/* hardware interfaces */
 	void (*reset)(struct nouveau_gpio *, u8 func);
-	int  (*drive)(struct nouveau_gpio *, int line, int dir, int out);
-	int  (*sense)(struct nouveau_gpio *, int line);
-
-	/* software interfaces */
 	int  (*find)(struct nouveau_gpio *, int idx, u8 tag, u8 line,
 		     struct dcb_gpio_func *);
 	int  (*set)(struct nouveau_gpio *, int idx, u8 tag, u8 line, int state);
@@ -31,23 +32,10 @@
 	return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_GPIO];
 }
 
-#define nouveau_gpio_create(p,e,o,l,d)                                         \
-	nouveau_gpio_create_((p), (e), (o), (l), sizeof(**d), (void **)d)
-#define nouveau_gpio_destroy(p) ({                                             \
-	struct nouveau_gpio *gpio = (p);                                       \
-	_nouveau_gpio_dtor(nv_object(gpio));                                   \
-})
-#define nouveau_gpio_fini(p,s)                                                 \
-	nouveau_subdev_fini(&(p)->base, (s))
-
-int  nouveau_gpio_create_(struct nouveau_object *, struct nouveau_object *,
-			  struct nouveau_oclass *, int, int, void **);
-void _nouveau_gpio_dtor(struct nouveau_object *);
-int  nouveau_gpio_init(struct nouveau_gpio *);
-
-extern struct nouveau_oclass nv10_gpio_oclass;
-extern struct nouveau_oclass nv50_gpio_oclass;
-extern struct nouveau_oclass nvd0_gpio_oclass;
-extern struct nouveau_oclass nve0_gpio_oclass;
+extern struct nouveau_oclass *nv10_gpio_oclass;
+extern struct nouveau_oclass *nv50_gpio_oclass;
+extern struct nouveau_oclass *nv92_gpio_oclass;
+extern struct nouveau_oclass *nvd0_gpio_oclass;
+extern struct nouveau_oclass *nve0_gpio_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/i2c.h b/drivers/gpu/drm/nouveau/core/include/subdev/i2c.h
index 7f50a85..db1b39d0 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/i2c.h

@@ -14,52 +14,41 @@
 #define NV_I2C_TYPE_EXTDDC(e) (0x0005 | (e) << 8)
 #define NV_I2C_TYPE_EXTAUX(e) (0x0006 | (e) << 8)
 
+enum nvkm_i2c_event {
+	NVKM_I2C_PLUG = 1,
+	NVKM_I2C_UNPLUG = 2,
+	NVKM_I2C_IRQ = 4,
+	NVKM_I2C_DONE = 8,
+	NVKM_I2C_ANY = (NVKM_I2C_PLUG |
+			NVKM_I2C_UNPLUG |
+			NVKM_I2C_IRQ |
+			NVKM_I2C_DONE),
+};
+
 struct nouveau_i2c_port {
 	struct nouveau_object base;
 	struct i2c_adapter adapter;
+	struct mutex mutex;
 
 	struct list_head head;
 	u8  index;
+	int aux;
 
 	const struct nouveau_i2c_func *func;
 };
 
 struct nouveau_i2c_func {
-	void (*acquire)(struct nouveau_i2c_port *);
-	void (*release)(struct nouveau_i2c_port *);
-
 	void (*drive_scl)(struct nouveau_i2c_port *, int);
 	void (*drive_sda)(struct nouveau_i2c_port *, int);
 	int  (*sense_scl)(struct nouveau_i2c_port *);
 	int  (*sense_sda)(struct nouveau_i2c_port *);
 
-	int  (*aux)(struct nouveau_i2c_port *, u8, u32, u8 *, u8);
+	int  (*aux)(struct nouveau_i2c_port *, bool, u8, u32, u8 *, u8);
 	int  (*pattern)(struct nouveau_i2c_port *, int pattern);
 	int  (*lnk_ctl)(struct nouveau_i2c_port *, int nr, int bw, bool enh);
 	int  (*drv_ctl)(struct nouveau_i2c_port *, int lane, int sw, int pe);
 };
 
-#define nouveau_i2c_port_create(p,e,o,i,a,f,d)                                 \
-	nouveau_i2c_port_create_((p), (e), (o), (i), (a), (f),                 \
-				 sizeof(**d), (void **)d)
-#define nouveau_i2c_port_destroy(p) ({                                         \
-	struct nouveau_i2c_port *port = (p);                                   \
-	_nouveau_i2c_port_dtor(nv_object(i2c));                                \
-})
-#define nouveau_i2c_port_init(p)                                               \
-	nouveau_object_init(&(p)->base)
-#define nouveau_i2c_port_fini(p,s)                                             \
-	nouveau_object_fini(&(p)->base, (s))
-
-int nouveau_i2c_port_create_(struct nouveau_object *, struct nouveau_object *,
-			     struct nouveau_oclass *, u8,
-			     const struct i2c_algorithm *,
-			     const struct nouveau_i2c_func *,
-			     int, void **);
-void _nouveau_i2c_port_dtor(struct nouveau_object *);
-#define _nouveau_i2c_port_init nouveau_object_init
-#define _nouveau_i2c_port_fini nouveau_object_fini
-
 struct nouveau_i2c_board_info {
 	struct i2c_board_info dev;
 	u8 udelay; /* set to 0 to use the standard delay */
@@ -67,13 +56,20 @@
 
 struct nouveau_i2c {
 	struct nouveau_subdev base;
+	struct nouveau_event *ntfy;
 
 	struct nouveau_i2c_port *(*find)(struct nouveau_i2c *, u8 index);
 	struct nouveau_i2c_port *(*find_type)(struct nouveau_i2c *, u16 type);
+	int  (*acquire_pad)(struct nouveau_i2c_port *, unsigned long timeout);
+	void (*release_pad)(struct nouveau_i2c_port *);
+	int  (*acquire)(struct nouveau_i2c_port *, unsigned long timeout);
+	void (*release)(struct nouveau_i2c_port *);
 	int (*identify)(struct nouveau_i2c *, int index,
 			const char *what, struct nouveau_i2c_board_info *,
 			bool (*match)(struct nouveau_i2c_port *,
 				      struct i2c_board_info *, void *), void *);
+
+	wait_queue_head_t wait;
 	struct list_head ports;
 };
 
@@ -83,37 +79,12 @@
 	return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_I2C];
 }
 
-#define nouveau_i2c_create(p,e,o,s,d)                                          \
-	nouveau_i2c_create_((p), (e), (o), (s), sizeof(**d), (void **)d)
-#define nouveau_i2c_destroy(p) ({                                              \
-	struct nouveau_i2c *i2c = (p);                                         \
-	_nouveau_i2c_dtor(nv_object(i2c));                                     \
-})
-#define nouveau_i2c_init(p) ({                                                 \
-	struct nouveau_i2c *i2c = (p);                                         \
-	_nouveau_i2c_init(nv_object(i2c));                                     \
-})
-#define nouveau_i2c_fini(p,s) ({                                               \
-	struct nouveau_i2c *i2c = (p);                                         \
-	_nouveau_i2c_fini(nv_object(i2c), (s));                                \
-})
-
-int nouveau_i2c_create_(struct nouveau_object *, struct nouveau_object *,
-			struct nouveau_oclass *, struct nouveau_oclass *,
-			int, void **);
-void _nouveau_i2c_dtor(struct nouveau_object *);
-int  _nouveau_i2c_init(struct nouveau_object *);
-int  _nouveau_i2c_fini(struct nouveau_object *, bool);
-
-extern struct nouveau_oclass nv04_i2c_oclass;
-extern struct nouveau_oclass nv4e_i2c_oclass;
-extern struct nouveau_oclass nv50_i2c_oclass;
-extern struct nouveau_oclass nv94_i2c_oclass;
-extern struct nouveau_oclass nvd0_i2c_oclass;
-extern struct nouveau_oclass nouveau_anx9805_sclass[];
-
-extern const struct i2c_algorithm nouveau_i2c_bit_algo;
-extern const struct i2c_algorithm nouveau_i2c_aux_algo;
+extern struct nouveau_oclass *nv04_i2c_oclass;
+extern struct nouveau_oclass *nv4e_i2c_oclass;
+extern struct nouveau_oclass *nv50_i2c_oclass;
+extern struct nouveau_oclass *nv94_i2c_oclass;
+extern struct nouveau_oclass *nvd0_i2c_oclass;
+extern struct nouveau_oclass *nve0_i2c_oclass;
 
 static inline int
 nv_rdi2cr(struct nouveau_i2c_port *port, u8 addr, u8 reg)

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h b/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h
index 88814f1..31df634 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h

@@ -30,5 +30,6 @@
 
 extern struct nouveau_oclass nvc0_ibus_oclass;
 extern struct nouveau_oclass nve0_ibus_oclass;
+extern struct nouveau_oclass gk20a_ibus_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
index bdf5941..73b1ed2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c

@@ -118,8 +118,10 @@
 	if (ret)
 		return ret;
 
-	bar->iomem = ioremap(nv_device_resource_start(device, 3),
-			     nv_device_resource_len(device, 3));
+	if (nv_device_resource_len(device, 3) != 0)
+		bar->iomem = ioremap(nv_device_resource_start(device, 3),
+				     nv_device_resource_len(device, 3));
+
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
index 3f30db6..ca8139b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c

@@ -30,14 +30,16 @@
 
 #include "priv.h"
 
+struct nvc0_bar_priv_vm {
+	struct nouveau_gpuobj *mem;
+	struct nouveau_gpuobj *pgd;
+	struct nouveau_vm *vm;
+};
+
 struct nvc0_bar_priv {
 	struct nouveau_bar base;
 	spinlock_t lock;
-	struct {
-		struct nouveau_gpuobj *mem;
-		struct nouveau_gpuobj *pgd;
-		struct nouveau_vm *vm;
-	} bar[2];
+	struct nvc0_bar_priv_vm bar[2];
 };
 
 static int
@@ -79,14 +81,66 @@
 }
 
 static int
+nvc0_bar_init_vm(struct nvc0_bar_priv *priv, struct nvc0_bar_priv_vm *bar_vm,
+		 int bar_nr)
+{
+	struct nouveau_device *device = nv_device(&priv->base);
+	struct nouveau_vm *vm;
+	resource_size_t bar_len;
+	int ret;
+
+	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0,
+				&bar_vm->mem);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0,
+				&bar_vm->pgd);
+	if (ret)
+		return ret;
+
+	bar_len = nv_device_resource_len(device, bar_nr);
+
+	ret = nouveau_vm_new(device, 0, bar_len, 0, &vm);
+	if (ret)
+		return ret;
+
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
+
+	/*
+	 * Bootstrap page table lookup.
+	 */
+	if (bar_nr == 3) {
+		ret = nouveau_gpuobj_new(nv_object(priv), NULL,
+					 (bar_len >> 12) * 8, 0x1000,
+					 NVOBJ_FLAG_ZERO_ALLOC,
+					&vm->pgt[0].obj[0]);
+		vm->pgt[0].refcount[0] = 1;
+		if (ret)
+			return ret;
+	}
+
+	ret = nouveau_vm_ref(vm, &bar_vm->vm, bar_vm->pgd);
+	nouveau_vm_ref(NULL, &vm, NULL);
+	if (ret)
+		return ret;
+
+	nv_wo32(bar_vm->mem, 0x0200, lower_32_bits(bar_vm->pgd->addr));
+	nv_wo32(bar_vm->mem, 0x0204, upper_32_bits(bar_vm->pgd->addr));
+	nv_wo32(bar_vm->mem, 0x0208, lower_32_bits(bar_len - 1));
+	nv_wo32(bar_vm->mem, 0x020c, upper_32_bits(bar_len - 1));
+
+	return 0;
+}
+
+static int
 nvc0_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 size,
 	      struct nouveau_object **pobject)
 {
 	struct nouveau_device *device = nv_device(parent);
 	struct nvc0_bar_priv *priv;
-	struct nouveau_gpuobj *mem;
-	struct nouveau_vm *vm;
+	bool has_bar3 = nv_device_resource_len(device, 3) != 0;
 	int ret;
 
 	ret = nouveau_bar_create(parent, engine, oclass, &priv);
@@ -95,71 +149,19 @@
 		return ret;
 
 	/* BAR3 */
-	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0,
-				&priv->bar[0].mem);
-	mem = priv->bar[0].mem;
-	if (ret)
-		return ret;
-
-	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0,
-				&priv->bar[0].pgd);
-	if (ret)
-		return ret;
-
-	ret = nouveau_vm_new(device, 0, nv_device_resource_len(device, 3), 0, &vm);
-	if (ret)
-		return ret;
-
-	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
-
-	ret = nouveau_gpuobj_new(nv_object(priv), NULL,
-				 (nv_device_resource_len(device, 3) >> 12) * 8,
-				 0x1000, NVOBJ_FLAG_ZERO_ALLOC,
-				 &vm->pgt[0].obj[0]);
-	vm->pgt[0].refcount[0] = 1;
-	if (ret)
-		return ret;
-
-	ret = nouveau_vm_ref(vm, &priv->bar[0].vm, priv->bar[0].pgd);
-	nouveau_vm_ref(NULL, &vm, NULL);
-	if (ret)
-		return ret;
-
-	nv_wo32(mem, 0x0200, lower_32_bits(priv->bar[0].pgd->addr));
-	nv_wo32(mem, 0x0204, upper_32_bits(priv->bar[0].pgd->addr));
-	nv_wo32(mem, 0x0208, lower_32_bits(nv_device_resource_len(device, 3) - 1));
-	nv_wo32(mem, 0x020c, upper_32_bits(nv_device_resource_len(device, 3) - 1));
+	if (has_bar3) {
+		ret = nvc0_bar_init_vm(priv, &priv->bar[0], 3);
+		if (ret)
+			return ret;
+		priv->base.alloc = nouveau_bar_alloc;
+		priv->base.kmap = nvc0_bar_kmap;
+	}
 
 	/* BAR1 */
-	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0,
-				&priv->bar[1].mem);
-	mem = priv->bar[1].mem;
+	ret = nvc0_bar_init_vm(priv, &priv->bar[1], 1);
 	if (ret)
 		return ret;
 
-	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0,
-				&priv->bar[1].pgd);
-	if (ret)
-		return ret;
-
-	ret = nouveau_vm_new(device, 0, nv_device_resource_len(device, 1), 0, &vm);
-	if (ret)
-		return ret;
-
-	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
-
-	ret = nouveau_vm_ref(vm, &priv->bar[1].vm, priv->bar[1].pgd);
-	nouveau_vm_ref(NULL, &vm, NULL);
-	if (ret)
-		return ret;
-
-	nv_wo32(mem, 0x0200, lower_32_bits(priv->bar[1].pgd->addr));
-	nv_wo32(mem, 0x0204, upper_32_bits(priv->bar[1].pgd->addr));
-	nv_wo32(mem, 0x0208, lower_32_bits(nv_device_resource_len(device, 1) - 1));
-	nv_wo32(mem, 0x020c, upper_32_bits(nv_device_resource_len(device, 1) - 1));
-
-	priv->base.alloc = nouveau_bar_alloc;
-	priv->base.kmap = nvc0_bar_kmap;
 	priv->base.umap = nvc0_bar_umap;
 	priv->base.unmap = nvc0_bar_unmap;
 	priv->base.flush = nv84_bar_flush;
@@ -201,7 +203,9 @@
 	nv_mask(priv, 0x100c80, 0x00000001, 0x00000000);
 
 	nv_wr32(priv, 0x001704, 0x80000000 | priv->bar[1].mem->addr >> 12);
-	nv_wr32(priv, 0x001714, 0xc0000000 | priv->bar[0].mem->addr >> 12);
+	if (priv->bar[0].mem)
+		nv_wr32(priv, 0x001714,
+			0xc0000000 | priv->bar[0].mem->addr >> 12);
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
index 222e8eb..d45704a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c

@@ -183,10 +183,11 @@
 		goto out;
 
 	bios->data = kmalloc(bios->size, GFP_KERNEL);
-	if (bios->data) {
-		for (i = 0; i < bios->size; i += 4)
-			((u32 *)bios->data)[i/4] = nv_rd32(bios, 0x300000 + i);
-	}
+	if (!bios->data)
+		goto out;
+
+	for (i = 0; i < bios->size; i += 4)
+		((u32 *)bios->data)[i/4] = nv_rd32(bios, 0x300000 + i);
 
 	/* check the PCI record header */
 	pcir = nv_ro16(bios, 0x0018);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/conn.c b/drivers/gpu/drm/nouveau/core/subdev/bios/conn.c
index 5ac010e..2ede3bc 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/conn.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/conn.c

@@ -28,12 +28,12 @@
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/conn.h>
 
-u16
-dcb_conntab(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+u32
+nvbios_connTe(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
-	u16 dcb = dcb_table(bios, ver, hdr, cnt, len);
+	u32 dcb = dcb_table(bios, ver, hdr, cnt, len);
 	if (dcb && *ver >= 0x30 && *hdr >= 0x16) {
-		u16 data = nv_ro16(bios, dcb + 0x14);
+		u32 data = nv_ro16(bios, dcb + 0x14);
 		if (data) {
 			*ver = nv_ro08(bios, data + 0);
 			*hdr = nv_ro08(bios, data + 1);
@@ -42,15 +42,59 @@
 			return data;
 		}
 	}
-	return 0x0000;
+	return 0x00000000;
 }
 
-u16
-dcb_conn(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len)
+u32
+nvbios_connTp(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+	      struct nvbios_connT *info)
+{
+	u32 data = nvbios_connTe(bios, ver, hdr, cnt, len);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x30:
+	case 0x40:
+		return data;
+	default:
+		break;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_connEe(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len)
 {
 	u8  hdr, cnt;
-	u16 data = dcb_conntab(bios, ver, &hdr, &cnt, len);
+	u32 data = nvbios_connTe(bios, ver, &hdr, &cnt, len);
 	if (data && idx < cnt)
 		return data + hdr + (idx * *len);
-	return 0x0000;
+	return 0x00000000;
+}
+
+u32
+nvbios_connEp(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len,
+	      struct nvbios_connE *info)
+{
+	u32 data = nvbios_connEe(bios, idx, ver, len);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x30:
+	case 0x40:
+		info->type     =  nv_ro08(bios, data + 0x00);
+		info->location =  nv_ro08(bios, data + 0x01) & 0x0f;
+		info->hpd      = (nv_ro08(bios, data + 0x01) & 0x30) >> 4;
+		info->dp       = (nv_ro08(bios, data + 0x01) & 0xc0) >> 6;
+		if (*len < 4)
+			return data;
+		info->hpd     |= (nv_ro08(bios, data + 0x02) & 0x03) << 2;
+		info->dp      |=  nv_ro08(bios, data + 0x02) & 0x0c;
+		info->di       = (nv_ro08(bios, data + 0x02) & 0xf0) >> 4;
+		info->hpd     |= (nv_ro08(bios, data + 0x03) & 0x07) << 4;
+		info->sr       = (nv_ro08(bios, data + 0x03) & 0x08) >> 3;
+		info->lcdid    = (nv_ro08(bios, data + 0x03) & 0x70) >> 4;
+		return data;
+	default:
+		break;
+	}
+	return 0x00000000;
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c
index 7628fe7..f309dd6 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c

@@ -162,18 +162,20 @@
 		   struct nvbios_dpcfg *info)
 {
 	u16 data = nvbios_dpcfg_entry(bios, outp, idx, ver, hdr, cnt, len);
+	memset(info, 0x00, sizeof(*info));
 	if (data) {
 		switch (*ver) {
 		case 0x21:
-			info->drv = nv_ro08(bios, data + 0x02);
-			info->pre = nv_ro08(bios, data + 0x03);
-			info->unk = nv_ro08(bios, data + 0x04);
+			info->dc    = nv_ro08(bios, data + 0x02);
+			info->pe    = nv_ro08(bios, data + 0x03);
+			info->tx_pu = nv_ro08(bios, data + 0x04);
 			break;
 		case 0x30:
 		case 0x40:
-			info->drv = nv_ro08(bios, data + 0x01);
-			info->pre = nv_ro08(bios, data + 0x02);
-			info->unk = nv_ro08(bios, data + 0x03);
+			info->pc    = nv_ro08(bios, data + 0x00);
+			info->dc    = nv_ro08(bios, data + 0x01);
+			info->pe    = nv_ro08(bios, data + 0x02);
+			info->tx_pu = nv_ro08(bios, data + 0x03);
 			break;
 		default:
 			data = 0x0000;
@@ -184,7 +186,7 @@
 }
 
 u16
-nvbios_dpcfg_match(struct nouveau_bios *bios, u16 outp, u8 un, u8 vs, u8 pe,
+nvbios_dpcfg_match(struct nouveau_bios *bios, u16 outp, u8 pc, u8 vs, u8 pe,
 		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		   struct nvbios_dpcfg *info)
 {
@@ -193,16 +195,15 @@
 
 	if (*ver >= 0x30) {
 		const u8 vsoff[] = { 0, 4, 7, 9 };
-		idx = (un * 10) + vsoff[vs] + pe;
+		idx = (pc * 10) + vsoff[vs] + pe;
 	} else {
-		while ((data = nvbios_dpcfg_entry(bios, outp, idx,
+		while ((data = nvbios_dpcfg_entry(bios, outp, ++idx,
 						  ver, hdr, cnt, len))) {
 			if (nv_ro08(bios, data + 0x00) == vs &&
 			    nv_ro08(bios, data + 0x01) == pe)
 				break;
-			idx++;
 		}
 	}
 
-	return nvbios_dpcfg_parse(bios, outp, pe, ver, hdr, cnt, len, info);
+	return nvbios_dpcfg_parse(bios, outp, idx, ver, hdr, cnt, len, info);
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
index acaeaf7..626380f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c

@@ -98,15 +98,16 @@
 init_conn(struct nvbios_init *init)
 {
 	struct nouveau_bios *bios = init->bios;
-	u8  ver, len;
-	u16 conn;
+	struct nvbios_connE connE;
+	u8  ver, hdr;
+	u32 conn;
 
 	if (init_exec(init)) {
 		if (init->outp) {
 			conn = init->outp->connector;
-			conn = dcb_conn(bios, conn, &ver, &len);
+			conn = nvbios_connEp(bios, conn, &ver, &hdr, &connE);
 			if (conn)
-				return nv_ro08(bios, conn);
+				return connE.type;
 		}
 
 		error("script needs connector type\n");

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/base.c b/drivers/gpu/drm/nouveau/core/subdev/clock/base.c
index dd62bae..22351f5 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/base.c

@@ -346,8 +346,8 @@
 	struct nouveau_pstate *pstate;
 	int i = 0;
 
-	/* YKW repellant */
-	return -ENOSYS;
+	if (!clk->allow_reclock)
+		return -ENOSYS;
 
 	if (req != -1 && req != -2) {
 		list_for_each_entry(pstate, &clk->states, head) {
@@ -456,6 +456,7 @@
 		      struct nouveau_object *engine,
 		      struct nouveau_oclass *oclass,
 		      struct nouveau_clocks *clocks,
+		      bool allow_reclock,
 		      int length, void **object)
 {
 	struct nouveau_device *device = nv_device(parent);
@@ -478,6 +479,8 @@
 		ret = nouveau_pstate_new(clk, idx++);
 	} while (ret == 0);
 
+	clk->allow_reclock = allow_reclock;
+
 	mode = nouveau_stropt(device->cfgopt, "NvClkMode", &arglen);
 	if (mode) {
 		if (!strncasecmpz(mode, "disabled", arglen)) {

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c
index b74db6c..eb2d442 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c

@@ -82,7 +82,8 @@
 	struct nv04_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nv04_domain, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nv04_domain, false,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c
index db7346f..8a9e168 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c

@@ -213,7 +213,8 @@
 	struct nv40_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nv40_domain, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nv40_domain, true,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c
index 250a6d9..8c13277 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c

@@ -507,7 +507,7 @@
 	int ret;
 
 	ret = nouveau_clock_create(parent, engine, oclass, pclass->domains,
-				  &priv);
+				   false, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
index 4f5a137..9fb5835 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c

@@ -302,7 +302,8 @@
 	struct nva3_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nva3_domain, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nva3_domain, false,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c
index 7a723b4..6a65fc9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c

@@ -421,7 +421,8 @@
 	struct nvaa_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nvaa_domains, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nvaa_domains, true,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c
index c310572..dbf8517 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c

@@ -437,7 +437,8 @@
 	struct nvc0_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nvc0_domain, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nvc0_domain, false,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nve0.c
index d3c37c9..4ac1aa3 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nve0.c

@@ -473,7 +473,8 @@
 	struct nve0_clock_priv *priv;
 	int ret;
 
-	ret = nouveau_clock_create(parent, engine, oclass, nve0_domain, &priv);
+	ret = nouveau_clock_create(parent, engine, oclass, nve0_domain, true,
+				   &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c
new file mode 100644
index 0000000..a16024a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c

@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0.h"
+
+struct gk20a_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+gk20a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct gk20a_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_oclass *
+gk20a_fb_oclass = &(struct nouveau_fb_impl) {
+	.base.handle = NV_SUBDEV(FB, 0xea),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gk20a_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = _nouveau_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+	.memtype = nvc0_fb_memtype_valid,
+	.ram = &gk20a_ram_oclass,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
index da74c88..82273f8 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h

@@ -32,6 +32,7 @@
 extern struct nouveau_oclass nvaa_ram_oclass;
 extern struct nouveau_oclass nvc0_ram_oclass;
 extern struct nouveau_oclass nve0_ram_oclass;
+extern struct nouveau_oclass gk20a_ram_oclass;
 extern struct nouveau_oclass gm107_ram_oclass;
 
 int nouveau_sddr3_calc(struct nouveau_ram *ram);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
new file mode 100644
index 0000000..4d77d75
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c

@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+#include <subdev/fb.h>
+
+struct gk20a_mem {
+	struct nouveau_mem base;
+	void *cpuaddr;
+	dma_addr_t handle;
+};
+#define to_gk20a_mem(m) container_of(m, struct gk20a_mem, base)
+
+static void
+gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
+{
+	struct device *dev = nv_device_base(nv_device(pfb));
+	struct gk20a_mem *mem = to_gk20a_mem(*pmem);
+
+	*pmem = NULL;
+	if (unlikely(mem == NULL))
+		return;
+
+	if (likely(mem->cpuaddr))
+		dma_free_coherent(dev, mem->base.size << PAGE_SHIFT,
+				  mem->cpuaddr, mem->handle);
+
+	kfree(mem->base.pages);
+	kfree(mem);
+}
+
+static int
+gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
+	     u32 memtype, struct nouveau_mem **pmem)
+{
+	struct device *dev = nv_device_base(nv_device(pfb));
+	struct gk20a_mem *mem;
+	u32 type = memtype & 0xff;
+	u32 npages, order;
+	int i;
+
+	nv_debug(pfb, "%s: size: %llx align: %x, ncmin: %x\n", __func__, size,
+		 align, ncmin);
+
+	npages = size >> PAGE_SHIFT;
+	if (npages == 0)
+		npages = 1;
+
+	if (align == 0)
+		align = PAGE_SIZE;
+	align >>= PAGE_SHIFT;
+
+	/* round alignment to the next power of 2, if needed */
+	order = fls(align);
+	if ((align & (align - 1)) == 0)
+		order--;
+	align = BIT(order);
+
+	/* ensure returned address is correctly aligned */
+	npages = max(align, npages);
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	mem->base.size = npages;
+	mem->base.memtype = type;
+
+	mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL);
+	if (!mem->base.pages) {
+		kfree(mem);
+		return -ENOMEM;
+	}
+
+	*pmem = &mem->base;
+
+	mem->cpuaddr = dma_alloc_coherent(dev, npages << PAGE_SHIFT,
+					  &mem->handle, GFP_KERNEL);
+	if (!mem->cpuaddr) {
+		nv_error(pfb, "%s: cannot allocate memory!\n", __func__);
+		gk20a_ram_put(pfb, pmem);
+		return -ENOMEM;
+	}
+
+	align <<= PAGE_SHIFT;
+
+	/* alignment check */
+	if (unlikely(mem->handle & (align - 1)))
+		nv_warn(pfb, "memory not aligned as requested: %pad (0x%x)\n",
+			&mem->handle, align);
+
+	nv_debug(pfb, "alloc size: 0x%x, align: 0x%x, paddr: %pad, vaddr: %p\n",
+		 npages << PAGE_SHIFT, align, &mem->handle, mem->cpuaddr);
+
+	for (i = 0; i < npages; i++)
+		mem->base.pages[i] = mem->handle + (PAGE_SIZE * i);
+
+	mem->base.offset = (u64)mem->base.pages[0];
+
+	return 0;
+}
+
+static int
+gk20a_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 datasize,
+	      struct nouveau_object **pobject)
+{
+	struct nouveau_ram *ram;
+	int ret;
+
+	ret = nouveau_ram_create(parent, engine, oclass, &ram);
+	*pobject = nv_object(ram);
+	if (ret)
+		return ret;
+	ram->type = NV_MEM_TYPE_STOLEN;
+	ram->size = get_num_physpages() << PAGE_SHIFT;
+
+	ram->get = gk20a_ram_get;
+	ram->put = gk20a_ram_put;
+
+	return 0;
+}
+
+struct nouveau_oclass
+gk20a_ram_oclass = {
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gk20a_ram_ctor,
+		.dtor = _nouveau_ram_dtor,
+		.init = _nouveau_ram_init,
+		.fini = _nouveau_ram_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
index ef91b6e893af..e5d12c2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c

@@ -211,7 +211,7 @@
 	struct nv50_ram *ram = (void *)pfb->ram;
 	struct nv50_ramseq *hwsq = &ram->hwsq;
 
-	ram_exec(hwsq, nouveau_boolopt(device->cfgopt, "NvMemExec", false));
+	ram_exec(hwsq, nouveau_boolopt(device->cfgopt, "NvMemExec", true));
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c
index 6eb97f1..8076fb1 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c

@@ -309,7 +309,7 @@
 	struct nouveau_device *device = nv_device(pfb);
 	struct nva3_ram *ram = (void *)pfb->ram;
 	struct nva3_ramfuc *fuc = &ram->fuc;
-	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", false));
+	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", true));
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
index 8edc922..5a6a502 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c

@@ -408,7 +408,7 @@
 	struct nouveau_device *device = nv_device(pfb);
 	struct nvc0_ram *ram = (void *)pfb->ram;
 	struct nvc0_ramfuc *fuc = &ram->fuc;
-	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", false));
+	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", true));
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c
index 1675219..84c7efb 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c

@@ -1111,7 +1111,7 @@
 	struct nouveau_device *device = nv_device(pfb);
 	struct nve0_ram *ram = (void *)pfb->ram;
 	struct nve0_ramfuc *fuc = &ram->fuc;
-	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", false));
+	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", true));
 	return (ram->base.next == &ram->base.xition);
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
index f572c28..45e0202 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c

@@ -22,21 +22,24 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/gpio.h>
 #include <subdev/bios.h>
 #include <subdev/bios/gpio.h>
 
+#include "priv.h"
+
 static int
 nouveau_gpio_drive(struct nouveau_gpio *gpio,
 		   int idx, int line, int dir, int out)
 {
-	return gpio->drive ? gpio->drive(gpio, line, dir, out) : -ENODEV;
+	const struct nouveau_gpio_impl *impl = (void *)nv_object(gpio)->oclass;
+	return impl->drive ? impl->drive(gpio, line, dir, out) : -ENODEV;
 }
 
 static int
 nouveau_gpio_sense(struct nouveau_gpio *gpio, int idx, int line)
 {
-	return gpio->sense ? gpio->sense(gpio, line) : -ENODEV;
+	const struct nouveau_gpio_impl *impl = (void *)nv_object(gpio)->oclass;
+	return impl->sense ? impl->sense(gpio, line) : -ENODEV;
 }
 
 static int
@@ -102,37 +105,51 @@
 	return ret;
 }
 
-void
-_nouveau_gpio_dtor(struct nouveau_object *object)
+static void
+nouveau_gpio_intr_disable(struct nouveau_event *event, int type, int index)
 {
-	struct nouveau_gpio *gpio = (void *)object;
-	nouveau_event_destroy(&gpio->events);
-	nouveau_subdev_destroy(&gpio->base);
+	struct nouveau_gpio *gpio = nouveau_gpio(event->priv);
+	const struct nouveau_gpio_impl *impl = (void *)nv_object(gpio)->oclass;
+	impl->intr_mask(gpio, type, 1 << index, 0);
+}
+
+static void
+nouveau_gpio_intr_enable(struct nouveau_event *event, int type, int index)
+{
+	struct nouveau_gpio *gpio = nouveau_gpio(event->priv);
+	const struct nouveau_gpio_impl *impl = (void *)nv_object(gpio)->oclass;
+	impl->intr_mask(gpio, type, 1 << index, 1 << index);
+}
+
+static void
+nouveau_gpio_intr(struct nouveau_subdev *subdev)
+{
+	struct nouveau_gpio *gpio = nouveau_gpio(subdev);
+	const struct nouveau_gpio_impl *impl = (void *)nv_object(gpio)->oclass;
+	u32 hi, lo, e, i;
+
+	impl->intr_stat(gpio, &hi, &lo);
+
+	for (i = 0; e = 0, (hi | lo) && i < impl->lines; i++) {
+		if (hi & (1 << i))
+			e |= NVKM_GPIO_HI;
+		if (lo & (1 << i))
+			e |= NVKM_GPIO_LO;
+		nouveau_event_trigger(gpio->events, e, i);
+	}
 }
 
 int
-nouveau_gpio_create_(struct nouveau_object *parent,
-		     struct nouveau_object *engine,
-		     struct nouveau_oclass *oclass, int lines,
-		     int length, void **pobject)
+_nouveau_gpio_fini(struct nouveau_object *object, bool suspend)
 {
-	struct nouveau_gpio *gpio;
-	int ret;
+	const struct nouveau_gpio_impl *impl = (void *)object->oclass;
+	struct nouveau_gpio *gpio = nouveau_gpio(object);
+	u32 mask = (1 << impl->lines) - 1;
 
-	ret = nouveau_subdev_create_(parent, engine, oclass, 0, "GPIO", "gpio",
-				     length, pobject);
-	gpio = *pobject;
-	if (ret)
-		return ret;
+	impl->intr_mask(gpio, NVKM_GPIO_TOGGLED, mask, 0);
+	impl->intr_stat(gpio, &mask, &mask);
 
-	ret = nouveau_event_create(lines, &gpio->events);
-	if (ret)
-		return ret;
-
-	gpio->find = nouveau_gpio_find;
-	gpio->set  = nouveau_gpio_set;
-	gpio->get  = nouveau_gpio_get;
-	return 0;
+	return nouveau_subdev_fini(&gpio->base, suspend);
 }
 
 static struct dmi_system_id gpio_reset_ids[] = {
@@ -147,12 +164,73 @@
 };
 
 int
-nouveau_gpio_init(struct nouveau_gpio *gpio)
+_nouveau_gpio_init(struct nouveau_object *object)
 {
-	int ret = nouveau_subdev_init(&gpio->base);
-	if (ret == 0 && gpio->reset) {
-		if (dmi_check_system(gpio_reset_ids))
-			gpio->reset(gpio, DCB_GPIO_UNUSED);
-	}
+	struct nouveau_gpio *gpio = nouveau_gpio(object);
+	int ret;
+
+	ret = nouveau_subdev_init(&gpio->base);
+	if (ret)
+		return ret;
+
+	if (gpio->reset && dmi_check_system(gpio_reset_ids))
+		gpio->reset(gpio, DCB_GPIO_UNUSED);
+
 	return ret;
 }
+
+void
+_nouveau_gpio_dtor(struct nouveau_object *object)
+{
+	struct nouveau_gpio *gpio = (void *)object;
+	nouveau_event_destroy(&gpio->events);
+	nouveau_subdev_destroy(&gpio->base);
+}
+
+int
+nouveau_gpio_create_(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass,
+		     int length, void **pobject)
+{
+	const struct nouveau_gpio_impl *impl = (void *)oclass;
+	struct nouveau_gpio *gpio;
+	int ret;
+
+	ret = nouveau_subdev_create_(parent, engine, oclass, 0, "GPIO", "gpio",
+				     length, pobject);
+	gpio = *pobject;
+	if (ret)
+		return ret;
+
+	gpio->find = nouveau_gpio_find;
+	gpio->set  = nouveau_gpio_set;
+	gpio->get  = nouveau_gpio_get;
+	gpio->reset = impl->reset;
+
+	ret = nouveau_event_create(2, impl->lines, &gpio->events);
+	if (ret)
+		return ret;
+
+	gpio->events->priv = gpio;
+	gpio->events->enable = nouveau_gpio_intr_enable;
+	gpio->events->disable = nouveau_gpio_intr_disable;
+	nv_subdev(gpio)->intr = nouveau_gpio_intr;
+	return 0;
+}
+
+int
+_nouveau_gpio_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		   struct nouveau_oclass *oclass, void *data, u32 size,
+		   struct nouveau_object **pobject)
+{
+	struct nouveau_gpio *gpio;
+	int ret;
+
+	ret = nouveau_gpio_create(parent, engine, oclass, &gpio);
+	*pobject = nv_object(gpio);
+	if (ret)
+		return ret;
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv10.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv10.c
index 76d5d54..27ad23e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv10.c

@@ -26,10 +26,6 @@
 
 #include "priv.h"
 
-struct nv10_gpio_priv {
-	struct nouveau_gpio base;
-};
-
 static int
 nv10_gpio_sense(struct nouveau_gpio *gpio, int line)
 {
@@ -83,95 +79,38 @@
 }
 
 static void
-nv10_gpio_intr(struct nouveau_subdev *subdev)
+nv10_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
 {
-	struct nv10_gpio_priv *priv = (void *)subdev;
-	u32 intr = nv_rd32(priv, 0x001104);
-	u32 hi = (intr & 0x0000ffff) >> 0;
-	u32 lo = (intr & 0xffff0000) >> 16;
-	int i;
-
-	for (i = 0; (hi | lo) && i < 32; i++) {
-		if ((hi | lo) & (1 << i))
-			nouveau_event_trigger(priv->base.events, i);
-	}
-
-	nv_wr32(priv, 0x001104, intr);
+	u32 intr = nv_rd32(gpio, 0x001104);
+	u32 stat = nv_rd32(gpio, 0x001144) & intr;
+	*lo = (stat & 0xffff0000) >> 16;
+	*hi = (stat & 0x0000ffff);
+	nv_wr32(gpio, 0x001104, intr);
 }
 
 static void
-nv10_gpio_intr_enable(struct nouveau_event *event, int line)
+nv10_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
 {
-	nv_wr32(event->priv, 0x001104, 0x00010001 << line);
-	nv_mask(event->priv, 0x001144, 0x00010001 << line, 0x00010001 << line);
+	u32 inte = nv_rd32(gpio, 0x001144);
+	if (type & NVKM_GPIO_LO)
+		inte = (inte & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte = (inte & ~mask) | data;
+	nv_wr32(gpio, 0x001144, inte);
 }
 
-static void
-nv10_gpio_intr_disable(struct nouveau_event *event, int line)
-{
-	nv_wr32(event->priv, 0x001104, 0x00010001 << line);
-	nv_mask(event->priv, 0x001144, 0x00010001 << line, 0x00000000);
-}
-
-static int
-nv10_gpio_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	       struct nouveau_oclass *oclass, void *data, u32 size,
-	       struct nouveau_object **pobject)
-{
-	struct nv10_gpio_priv *priv;
-	int ret;
-
-	ret = nouveau_gpio_create(parent, engine, oclass, 16, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	priv->base.drive = nv10_gpio_drive;
-	priv->base.sense = nv10_gpio_sense;
-	priv->base.events->priv = priv;
-	priv->base.events->enable = nv10_gpio_intr_enable;
-	priv->base.events->disable = nv10_gpio_intr_disable;
-	nv_subdev(priv)->intr = nv10_gpio_intr;
-	return 0;
-}
-
-static void
-nv10_gpio_dtor(struct nouveau_object *object)
-{
-	struct nv10_gpio_priv *priv = (void *)object;
-	nouveau_gpio_destroy(&priv->base);
-}
-
-static int
-nv10_gpio_init(struct nouveau_object *object)
-{
-	struct nv10_gpio_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_gpio_init(&priv->base);
-	if (ret)
-		return ret;
-
-	nv_wr32(priv, 0x001144, 0x00000000);
-	nv_wr32(priv, 0x001104, 0xffffffff);
-	return 0;
-}
-
-static int
-nv10_gpio_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv10_gpio_priv *priv = (void *)object;
-	nv_wr32(priv, 0x001144, 0x00000000);
-	return nouveau_gpio_fini(&priv->base, suspend);
-}
-
-struct nouveau_oclass
-nv10_gpio_oclass = {
-	.handle = NV_SUBDEV(GPIO, 0x10),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv10_gpio_ctor,
-		.dtor = nv10_gpio_dtor,
-		.init = nv10_gpio_init,
-		.fini = nv10_gpio_fini,
+struct nouveau_oclass *
+nv10_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0x10),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_gpio_ctor,
+		.dtor = _nouveau_gpio_dtor,
+		.init = _nouveau_gpio_init,
+		.fini = _nouveau_gpio_fini,
 	},
-};
+	.lines = 16,
+	.intr_stat = nv10_gpio_intr_stat,
+	.intr_mask = nv10_gpio_intr_mask,
+	.drive = nv10_gpio_drive,
+	.sense = nv10_gpio_sense,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
index 2ef7747..1864fa9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c

@@ -24,15 +24,10 @@
 
 #include "priv.h"
 
-struct nv50_gpio_priv {
-	struct nouveau_gpio base;
-};
-
-static void
+void
 nv50_gpio_reset(struct nouveau_gpio *gpio, u8 match)
 {
 	struct nouveau_bios *bios = nouveau_bios(gpio);
-	struct nv50_gpio_priv *priv = (void *)gpio;
 	u8 ver, len;
 	u16 entry;
 	int ent = -1;
@@ -55,7 +50,7 @@
 
 		gpio->set(gpio, 0, func, line, defs);
 
-		nv_mask(priv, reg, 0x00010001 << lsh, val << lsh);
+		nv_mask(gpio, reg, 0x00010001 << lsh, val << lsh);
 	}
 }
 
@@ -72,7 +67,7 @@
 	return 0;
 }
 
-static int
+int
 nv50_gpio_drive(struct nouveau_gpio *gpio, int line, int dir, int out)
 {
 	u32 reg, shift;
@@ -84,7 +79,7 @@
 	return 0;
 }
 
-static int
+int
 nv50_gpio_sense(struct nouveau_gpio *gpio, int line)
 {
 	u32 reg, shift;
@@ -95,119 +90,40 @@
 	return !!(nv_rd32(gpio, reg) & (4 << shift));
 }
 
-void
-nv50_gpio_intr(struct nouveau_subdev *subdev)
+static void
+nv50_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
 {
-	struct nv50_gpio_priv *priv = (void *)subdev;
-	u32 intr0, intr1 = 0;
-	u32 hi, lo;
-	int i;
-
-	intr0 = nv_rd32(priv, 0xe054) & nv_rd32(priv, 0xe050);
-	if (nv_device(priv)->chipset > 0x92)
-		intr1 = nv_rd32(priv, 0xe074) & nv_rd32(priv, 0xe070);
-
-	hi = (intr0 & 0x0000ffff) | (intr1 << 16);
-	lo = (intr0 >> 16) | (intr1 & 0xffff0000);
-
-	for (i = 0; (hi | lo) && i < 32; i++) {
-		if ((hi | lo) & (1 << i))
-			nouveau_event_trigger(priv->base.events, i);
-	}
-
-	nv_wr32(priv, 0xe054, intr0);
-	if (nv_device(priv)->chipset > 0x92)
-		nv_wr32(priv, 0xe074, intr1);
+	u32 intr = nv_rd32(gpio, 0x00e054);
+	u32 stat = nv_rd32(gpio, 0x00e050) & intr;
+	*lo = (stat & 0xffff0000) >> 16;
+	*hi = (stat & 0x0000ffff);
+	nv_wr32(gpio, 0x00e054, intr);
 }
 
-void
-nv50_gpio_intr_enable(struct nouveau_event *event, int line)
+static void
+nv50_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
 {
-	const u32 addr = line < 16 ? 0xe050 : 0xe070;
-	const u32 mask = 0x00010001 << (line & 0xf);
-	nv_wr32(event->priv, addr + 0x04, mask);
-	nv_mask(event->priv, addr + 0x00, mask, mask);
+	u32 inte = nv_rd32(gpio, 0x00e050);
+	if (type & NVKM_GPIO_LO)
+		inte = (inte & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte = (inte & ~mask) | data;
+	nv_wr32(gpio, 0x00e050, inte);
 }
 
-void
-nv50_gpio_intr_disable(struct nouveau_event *event, int line)
-{
-	const u32 addr = line < 16 ? 0xe050 : 0xe070;
-	const u32 mask = 0x00010001 << (line & 0xf);
-	nv_wr32(event->priv, addr + 0x04, mask);
-	nv_mask(event->priv, addr + 0x00, mask, 0x00000000);
-}
-
-static int
-nv50_gpio_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	       struct nouveau_oclass *oclass, void *data, u32 size,
-	       struct nouveau_object **pobject)
-{
-	struct nv50_gpio_priv *priv;
-	int ret;
-
-	ret = nouveau_gpio_create(parent, engine, oclass,
-				  nv_device(parent)->chipset > 0x92 ? 32 : 16,
-				  &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	priv->base.reset = nv50_gpio_reset;
-	priv->base.drive = nv50_gpio_drive;
-	priv->base.sense = nv50_gpio_sense;
-	priv->base.events->priv = priv;
-	priv->base.events->enable = nv50_gpio_intr_enable;
-	priv->base.events->disable = nv50_gpio_intr_disable;
-	nv_subdev(priv)->intr = nv50_gpio_intr;
-	return 0;
-}
-
-void
-nv50_gpio_dtor(struct nouveau_object *object)
-{
-	struct nv50_gpio_priv *priv = (void *)object;
-	nouveau_gpio_destroy(&priv->base);
-}
-
-int
-nv50_gpio_init(struct nouveau_object *object)
-{
-	struct nv50_gpio_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_gpio_init(&priv->base);
-	if (ret)
-		return ret;
-
-	/* disable, and ack any pending gpio interrupts */
-	nv_wr32(priv, 0xe050, 0x00000000);
-	nv_wr32(priv, 0xe054, 0xffffffff);
-	if (nv_device(priv)->chipset > 0x92) {
-		nv_wr32(priv, 0xe070, 0x00000000);
-		nv_wr32(priv, 0xe074, 0xffffffff);
-	}
-
-	return 0;
-}
-
-int
-nv50_gpio_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv50_gpio_priv *priv = (void *)object;
-	nv_wr32(priv, 0xe050, 0x00000000);
-	if (nv_device(priv)->chipset > 0x92)
-		nv_wr32(priv, 0xe070, 0x00000000);
-	return nouveau_gpio_fini(&priv->base, suspend);
-}
-
-struct nouveau_oclass
-nv50_gpio_oclass = {
-	.handle = NV_SUBDEV(GPIO, 0x50),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv50_gpio_ctor,
-		.dtor = nv50_gpio_dtor,
-		.init = nv50_gpio_init,
-		.fini = nv50_gpio_fini,
+struct nouveau_oclass *
+nv50_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0x50),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_gpio_ctor,
+		.dtor = _nouveau_gpio_dtor,
+		.init = _nouveau_gpio_init,
+		.fini = _nouveau_gpio_fini,
 	},
-};
+	.lines = 16,
+	.intr_stat = nv50_gpio_intr_stat,
+	.intr_mask = nv50_gpio_intr_mask,
+	.drive = nv50_gpio_drive,
+	.sense = nv50_gpio_sense,
+	.reset = nv50_gpio_reset,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv92.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv92.c
new file mode 100644
index 0000000..252083d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv92.c

@@ -0,0 +1,74 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "priv.h"
+
+void
+nv92_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
+{
+	u32 intr0 = nv_rd32(gpio, 0x00e054);
+	u32 intr1 = nv_rd32(gpio, 0x00e074);
+	u32 stat0 = nv_rd32(gpio, 0x00e050) & intr0;
+	u32 stat1 = nv_rd32(gpio, 0x00e070) & intr1;
+	*lo = (stat1 & 0xffff0000) | (stat0 >> 16);
+	*hi = (stat1 << 16) | (stat0 & 0x0000ffff);
+	nv_wr32(gpio, 0x00e054, intr0);
+	nv_wr32(gpio, 0x00e074, intr1);
+}
+
+void
+nv92_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
+{
+	u32 inte0 = nv_rd32(gpio, 0x00e050);
+	u32 inte1 = nv_rd32(gpio, 0x00e070);
+	if (type & NVKM_GPIO_LO)
+		inte0 = (inte0 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff);
+	mask >>= 16;
+	data >>= 16;
+	if (type & NVKM_GPIO_LO)
+		inte1 = (inte1 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte1 = (inte1 & ~mask) | data;
+	nv_wr32(gpio, 0x00e050, inte0);
+	nv_wr32(gpio, 0x00e070, inte1);
+}
+
+struct nouveau_oclass *
+nv92_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0x92),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_gpio_ctor,
+		.dtor = _nouveau_gpio_dtor,
+		.init = _nouveau_gpio_init,
+		.fini = _nouveau_gpio_fini,
+	},
+	.lines = 32,
+	.intr_stat = nv92_gpio_intr_stat,
+	.intr_mask = nv92_gpio_intr_mask,
+	.drive = nv50_gpio_drive,
+	.sense = nv50_gpio_sense,
+	.reset = nv50_gpio_reset,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
index 010431e..a4682b0 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c

@@ -24,15 +24,10 @@
 
 #include "priv.h"
 
-struct nvd0_gpio_priv {
-	struct nouveau_gpio base;
-};
-
 void
 nvd0_gpio_reset(struct nouveau_gpio *gpio, u8 match)
 {
 	struct nouveau_bios *bios = nouveau_bios(gpio);
-	struct nvd0_gpio_priv *priv = (void *)gpio;
 	u8 ver, len;
 	u16 entry;
 	int ent = -1;
@@ -51,9 +46,9 @@
 
 		gpio->set(gpio, 0, func, line, defs);
 
-		nv_mask(priv, 0x00d610 + (line * 4), 0xff, unk0);
+		nv_mask(gpio, 0x00d610 + (line * 4), 0xff, unk0);
 		if (unk1--)
-			nv_mask(priv, 0x00d740 + (unk1 * 4), 0xff, line);
+			nv_mask(gpio, 0x00d740 + (unk1 * 4), 0xff, line);
 	}
 }
 
@@ -72,36 +67,19 @@
 	return !!(nv_rd32(gpio, 0x00d610 + (line * 4)) & 0x00004000);
 }
 
-static int
-nvd0_gpio_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	       struct nouveau_oclass *oclass, void *data, u32 size,
-	       struct nouveau_object **pobject)
-{
-	struct nvd0_gpio_priv *priv;
-	int ret;
-
-	ret = nouveau_gpio_create(parent, engine, oclass, 32, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	priv->base.reset = nvd0_gpio_reset;
-	priv->base.drive = nvd0_gpio_drive;
-	priv->base.sense = nvd0_gpio_sense;
-	priv->base.events->priv = priv;
-	priv->base.events->enable = nv50_gpio_intr_enable;
-	priv->base.events->disable = nv50_gpio_intr_disable;
-	nv_subdev(priv)->intr = nv50_gpio_intr;
-	return 0;
-}
-
-struct nouveau_oclass
-nvd0_gpio_oclass = {
-	.handle = NV_SUBDEV(GPIO, 0xd0),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nvd0_gpio_ctor,
-		.dtor = nv50_gpio_dtor,
-		.init = nv50_gpio_init,
-		.fini = nv50_gpio_fini,
+struct nouveau_oclass *
+nvd0_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0xd0),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_gpio_ctor,
+		.dtor = _nouveau_gpio_dtor,
+		.init = _nouveau_gpio_init,
+		.fini = _nouveau_gpio_fini,
 	},
-};
+	.lines = 32,
+	.intr_stat = nv92_gpio_intr_stat,
+	.intr_mask = nv92_gpio_intr_mask,
+	.drive = nvd0_gpio_drive,
+	.sense = nvd0_gpio_sense,
+	.reset = nvd0_gpio_reset,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nve0.c
index 16b8c5b..e1145b4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nve0.c

@@ -24,108 +24,51 @@
 
 #include "priv.h"
 
-struct nve0_gpio_priv {
-	struct nouveau_gpio base;
-};
-
-void
-nve0_gpio_intr(struct nouveau_subdev *subdev)
+static void
+nve0_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
 {
-	struct nve0_gpio_priv *priv = (void *)subdev;
-	u32 intr0 = nv_rd32(priv, 0xdc00) & nv_rd32(priv, 0xdc08);
-	u32 intr1 = nv_rd32(priv, 0xdc80) & nv_rd32(priv, 0xdc88);
-	u32 hi = (intr0 & 0x0000ffff) | (intr1 << 16);
-	u32 lo = (intr0 >> 16) | (intr1 & 0xffff0000);
-	int i;
-
-	for (i = 0; (hi | lo) && i < 32; i++) {
-		if ((hi | lo) & (1 << i))
-			nouveau_event_trigger(priv->base.events, i);
-	}
-
-	nv_wr32(priv, 0xdc00, intr0);
-	nv_wr32(priv, 0xdc88, intr1);
+	u32 intr0 = nv_rd32(gpio, 0x00dc00);
+	u32 intr1 = nv_rd32(gpio, 0x00dc80);
+	u32 stat0 = nv_rd32(gpio, 0x00dc08) & intr0;
+	u32 stat1 = nv_rd32(gpio, 0x00dc88) & intr1;
+	*lo = (stat1 & 0xffff0000) | (stat0 >> 16);
+	*hi = (stat1 << 16) | (stat0 & 0x0000ffff);
+	nv_wr32(gpio, 0x00dc00, intr0);
+	nv_wr32(gpio, 0x00dc80, intr1);
 }
 
 void
-nve0_gpio_intr_enable(struct nouveau_event *event, int line)
+nve0_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
 {
-	const u32 addr = line < 16 ? 0xdc00 : 0xdc80;
-	const u32 mask = 0x00010001 << (line & 0xf);
-	nv_wr32(event->priv, addr + 0x08, mask);
-	nv_mask(event->priv, addr + 0x00, mask, mask);
+	u32 inte0 = nv_rd32(gpio, 0x00dc08);
+	u32 inte1 = nv_rd32(gpio, 0x00dc88);
+	if (type & NVKM_GPIO_LO)
+		inte0 = (inte0 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff);
+	mask >>= 16;
+	data >>= 16;
+	if (type & NVKM_GPIO_LO)
+		inte1 = (inte1 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte1 = (inte1 & ~mask) | data;
+	nv_wr32(gpio, 0x00dc08, inte0);
+	nv_wr32(gpio, 0x00dc88, inte1);
 }
 
-void
-nve0_gpio_intr_disable(struct nouveau_event *event, int line)
-{
-	const u32 addr = line < 16 ? 0xdc00 : 0xdc80;
-	const u32 mask = 0x00010001 << (line & 0xf);
-	nv_wr32(event->priv, addr + 0x08, mask);
-	nv_mask(event->priv, addr + 0x00, mask, 0x00000000);
-}
-
-int
-nve0_gpio_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nve0_gpio_priv *priv = (void *)object;
-	nv_wr32(priv, 0xdc08, 0x00000000);
-	nv_wr32(priv, 0xdc88, 0x00000000);
-	return nouveau_gpio_fini(&priv->base, suspend);
-}
-
-int
-nve0_gpio_init(struct nouveau_object *object)
-{
-	struct nve0_gpio_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_gpio_init(&priv->base);
-	if (ret)
-		return ret;
-
-	nv_wr32(priv, 0xdc00, 0xffffffff);
-	nv_wr32(priv, 0xdc80, 0xffffffff);
-	return 0;
-}
-
-void
-nve0_gpio_dtor(struct nouveau_object *object)
-{
-	struct nve0_gpio_priv *priv = (void *)object;
-	nouveau_gpio_destroy(&priv->base);
-}
-
-static int
-nve0_gpio_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	       struct nouveau_oclass *oclass, void *data, u32 size,
-	       struct nouveau_object **pobject)
-{
-	struct nve0_gpio_priv *priv;
-	int ret;
-
-	ret = nouveau_gpio_create(parent, engine, oclass, 32, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	priv->base.reset = nvd0_gpio_reset;
-	priv->base.drive = nvd0_gpio_drive;
-	priv->base.sense = nvd0_gpio_sense;
-	priv->base.events->priv = priv;
-	priv->base.events->enable = nve0_gpio_intr_enable;
-	priv->base.events->disable = nve0_gpio_intr_disable;
-	nv_subdev(priv)->intr = nve0_gpio_intr;
-	return 0;
-}
-
-struct nouveau_oclass
-nve0_gpio_oclass = {
-	.handle = NV_SUBDEV(GPIO, 0xe0),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nve0_gpio_ctor,
-		.dtor = nv50_gpio_dtor,
-		.init = nve0_gpio_init,
-		.fini = nve0_gpio_fini,
+struct nouveau_oclass *
+nve0_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0xe0),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_gpio_ctor,
+		.dtor = _nouveau_gpio_dtor,
+		.init = _nouveau_gpio_init,
+		.fini = _nouveau_gpio_fini,
 	},
-};
+	.lines = 32,
+	.intr_stat = nve0_gpio_intr_stat,
+	.intr_mask = nve0_gpio_intr_mask,
+	.drive = nvd0_gpio_drive,
+	.sense = nvd0_gpio_sense,
+	.reset = nvd0_gpio_reset,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h b/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h
index 2ee1c89..e1724df 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h

@@ -3,15 +3,65 @@
 
 #include <subdev/gpio.h>
 
-void nv50_gpio_dtor(struct nouveau_object *);
-int  nv50_gpio_init(struct nouveau_object *);
-int  nv50_gpio_fini(struct nouveau_object *, bool);
-void nv50_gpio_intr(struct nouveau_subdev *);
-void nv50_gpio_intr_enable(struct nouveau_event *, int line);
-void nv50_gpio_intr_disable(struct nouveau_event *, int line);
+#define nouveau_gpio_create(p,e,o,d)                                           \
+	nouveau_gpio_create_((p), (e), (o), sizeof(**d), (void **)d)
+#define nouveau_gpio_destroy(p) ({                                             \
+	struct nouveau_gpio *gpio = (p);                                       \
+	_nouveau_gpio_dtor(nv_object(gpio));                                   \
+})
+#define nouveau_gpio_init(p) ({                                                \
+	struct nouveau_gpio *gpio = (p);                                       \
+	_nouveau_gpio_init(nv_object(gpio));                                   \
+})
+#define nouveau_gpio_fini(p,s) ({                                              \
+	struct nouveau_gpio *gpio = (p);                                       \
+	_nouveau_gpio_fini(nv_object(gpio), (s));                              \
+})
+
+int  nouveau_gpio_create_(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, int, void **);
+int  _nouveau_gpio_ctor(struct nouveau_object *, struct nouveau_object *,
+			struct nouveau_oclass *, void *, u32,
+			struct nouveau_object **);
+void _nouveau_gpio_dtor(struct nouveau_object *);
+int  _nouveau_gpio_init(struct nouveau_object *);
+int  _nouveau_gpio_fini(struct nouveau_object *, bool);
+
+struct nouveau_gpio_impl {
+	struct nouveau_oclass base;
+	int lines;
+
+	/* read and ack pending interrupts, returning only data
+	 * for lines that have not been masked off, while still
+	 * performing the ack for anything that was pending.
+	 */
+	void (*intr_stat)(struct nouveau_gpio *, u32 *, u32 *);
+
+	/* mask on/off interrupts for hi/lo transitions on a
+	 * given set of gpio lines
+	 */
+	void (*intr_mask)(struct nouveau_gpio *, u32, u32, u32);
+
+	/* configure gpio direction and output value */
+	int  (*drive)(struct nouveau_gpio *, int line, int dir, int out);
+
+	/* sense current state of given gpio line */
+	int  (*sense)(struct nouveau_gpio *, int line);
+
+	/*XXX*/
+	void (*reset)(struct nouveau_gpio *, u8);
+};
+
+void nv50_gpio_reset(struct nouveau_gpio *, u8);
+int  nv50_gpio_drive(struct nouveau_gpio *, int, int, int);
+int  nv50_gpio_sense(struct nouveau_gpio *, int);
+
+void nv92_gpio_intr_stat(struct nouveau_gpio *, u32 *, u32 *);
+void nv92_gpio_intr_mask(struct nouveau_gpio *, u32, u32, u32);
 
 void nvd0_gpio_reset(struct nouveau_gpio *, u8);
 int  nvd0_gpio_drive(struct nouveau_gpio *, int, int, int);
 int  nvd0_gpio_sense(struct nouveau_gpio *, int);
 
+
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/anx9805.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/anx9805.c
index 4b195ac..2c2731a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/anx9805.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/anx9805.c

@@ -22,7 +22,7 @@
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 
-#include <subdev/i2c.h>
+#include "port.h"
 
 struct anx9805_i2c_port {
 	struct nouveau_i2c_port base;
@@ -37,6 +37,8 @@
 	struct nouveau_i2c_port *mast = (void *)nv_object(chan)->parent;
 	u8 tmp, i;
 
+	DBG("ANX9805 train %d 0x%02x %d\n", link_nr, link_bw, enh);
+
 	nv_wri2cr(mast, chan->addr, 0xa0, link_bw);
 	nv_wri2cr(mast, chan->addr, 0xa1, link_nr | (enh ? 0x80 : 0x00));
 	nv_wri2cr(mast, chan->addr, 0xa2, 0x01);
@@ -60,21 +62,29 @@
 }
 
 static int
-anx9805_aux(struct nouveau_i2c_port *port, u8 type, u32 addr, u8 *data, u8 size)
+anx9805_aux(struct nouveau_i2c_port *port, bool retry,
+	    u8 type, u32 addr, u8 *data, u8 size)
 {
 	struct anx9805_i2c_port *chan = (void *)port;
 	struct nouveau_i2c_port *mast = (void *)nv_object(chan)->parent;
 	int i, ret = -ETIMEDOUT;
+	u8 buf[16] = {};
 	u8 tmp;
 
+	DBG("%02x %05x %d\n", type, addr, size);
+
 	tmp = nv_rdi2cr(mast, chan->ctrl, 0x07) & ~0x04;
 	nv_wri2cr(mast, chan->ctrl, 0x07, tmp | 0x04);
 	nv_wri2cr(mast, chan->ctrl, 0x07, tmp);
 	nv_wri2cr(mast, chan->ctrl, 0xf7, 0x01);
 
 	nv_wri2cr(mast, chan->addr, 0xe4, 0x80);
-	for (i = 0; !(type & 1) && i < size; i++)
-		nv_wri2cr(mast, chan->addr, 0xf0 + i, data[i]);
+	if (!(type & 1)) {
+		memcpy(buf, data, size);
+		DBG("%16ph", buf);
+		for (i = 0; i < size; i++)
+			nv_wri2cr(mast, chan->addr, 0xf0 + i, buf[i]);
+	}
 	nv_wri2cr(mast, chan->addr, 0xe5, ((size - 1) << 4) | type);
 	nv_wri2cr(mast, chan->addr, 0xe6, (addr & 0x000ff) >>  0);
 	nv_wri2cr(mast, chan->addr, 0xe7, (addr & 0x0ff00) >>  8);
@@ -93,8 +103,13 @@
 		goto done;
 	}
 
-	for (i = 0; (type & 1) && i < size; i++)
-		data[i] = nv_rdi2cr(mast, chan->addr, 0xf0 + i);
+	if (type & 1) {
+		for (i = 0; i < size; i++)
+			buf[i] = nv_rdi2cr(mast, chan->addr, 0xf0 + i);
+		DBG("%16ph", buf);
+		memcpy(data, buf, size);
+	}
+
 	ret = 0;
 done:
 	nv_wri2cr(mast, chan->ctrl, 0xf7, 0x01);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
index 5de074a..02eb42b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c

@@ -22,15 +22,19 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/i2c.h>
+#include "priv.h"
 
 int
 nv_rdaux(struct nouveau_i2c_port *port, u32 addr, u8 *data, u8 size)
 {
+	struct nouveau_i2c *i2c = nouveau_i2c(port);
 	if (port->func->aux) {
-		if (port->func->acquire)
-			port->func->acquire(port);
-		return port->func->aux(port, 9, addr, data, size);
+		int ret = i2c->acquire(port, 0);
+		if (ret == 0) {
+			ret = port->func->aux(port, true, 9, addr, data, size);
+			i2c->release(port);
+		}
+		return ret;
 	}
 	return -ENODEV;
 }
@@ -38,10 +42,14 @@
 int
 nv_wraux(struct nouveau_i2c_port *port, u32 addr, u8 *data, u8 size)
 {
+	struct nouveau_i2c *i2c = nouveau_i2c(port);
 	if (port->func->aux) {
-		if (port->func->acquire)
-			port->func->acquire(port);
-		return port->func->aux(port, 8, addr, data, size);
+		int ret = i2c->acquire(port, 0);
+		if (ret == 0) {
+			ret = port->func->aux(port, true, 8, addr, data, size);
+			i2c->release(port);
+		}
+		return ret;
 	}
 	return -ENODEV;
 }
@@ -50,13 +58,16 @@
 aux_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 {
 	struct nouveau_i2c_port *port = adap->algo_data;
+	struct nouveau_i2c *i2c = nouveau_i2c(port);
 	struct i2c_msg *msg = msgs;
 	int ret, mcnt = num;
 
 	if (!port->func->aux)
 		return -ENODEV;
-	if ( port->func->acquire)
-		port->func->acquire(port);
+
+	ret = i2c->acquire(port, 0);
+	if (ret)
+		return ret;
 
 	while (mcnt--) {
 		u8 remaining = msg->len;
@@ -74,9 +85,11 @@
 			if (mcnt || remaining > 16)
 				cmd |= 4; /* MOT */
 
-			ret = port->func->aux(port, cmd, msg->addr, ptr, cnt);
-			if (ret < 0)
+			ret = port->func->aux(port, true, cmd, msg->addr, ptr, cnt);
+			if (ret < 0) {
+				i2c->release(port);
 				return ret;
+			}
 
 			ptr += cnt;
 			remaining -= cnt;
@@ -85,6 +98,7 @@
 		msg++;
 	}
 
+	i2c->release(port);
 	return num;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
index 378e05b..09ba2cc 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c

@@ -23,13 +23,16 @@
  */
 
 #include <core/option.h>
+#include <core/event.h>
 
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/i2c.h>
-#include <subdev/i2c.h>
 #include <subdev/vga.h>
 
+#include "priv.h"
+#include "pad.h"
+
 /******************************************************************************
  * interface to linux i2c bit-banging algorithm
  *****************************************************************************/
@@ -45,9 +48,15 @@
 {
 	struct i2c_algo_bit_data *bit = adap->algo_data;
 	struct nouveau_i2c_port *port = bit->data;
-	if (port->func->acquire)
-		port->func->acquire(port);
-	return 0;
+	return nouveau_i2c(port)->acquire(port, bit->timeout);
+}
+
+static void
+nouveau_i2c_post_xfer(struct i2c_adapter *adap)
+{
+	struct i2c_algo_bit_data *bit = adap->algo_data;
+	struct nouveau_i2c_port *port = bit->data;
+	return nouveau_i2c(port)->release(port);
 }
 
 static void
@@ -82,6 +91,15 @@
  * base i2c "port" class implementation
  *****************************************************************************/
 
+int
+_nouveau_i2c_port_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_i2c_port *port = (void *)object;
+	struct nvkm_i2c_pad *pad = nvkm_i2c_pad(port);
+	nv_ofuncs(pad)->fini(nv_object(pad), suspend);
+	return nouveau_object_fini(&port->base, suspend);
+}
+
 void
 _nouveau_i2c_port_dtor(struct nouveau_object *object)
 {
@@ -98,7 +116,7 @@
 			 const struct nouveau_i2c_func *func,
 			 int size, void **pobject)
 {
-	struct nouveau_device *device = nv_device(parent);
+	struct nouveau_device *device = nv_device(engine);
 	struct nouveau_i2c *i2c = (void *)engine;
 	struct nouveau_i2c_port *port;
 	int ret;
@@ -113,8 +131,9 @@
 	port->adapter.owner = THIS_MODULE;
 	port->adapter.dev.parent = nv_device_base(device);
 	port->index = index;
+	port->aux = -1;
 	port->func = func;
-	i2c_set_adapdata(&port->adapter, i2c);
+	mutex_init(&port->mutex);
 
 	if ( algo == &nouveau_i2c_bit_algo &&
 	    !nouveau_boolopt(device->cfgopt, "NvI2C", CSTMSEL)) {
@@ -128,6 +147,7 @@
 		bit->timeout = usecs_to_jiffies(2200);
 		bit->data = port;
 		bit->pre_xfer = nouveau_i2c_pre_xfer;
+		bit->post_xfer = nouveau_i2c_post_xfer;
 		bit->setsda = nouveau_i2c_setsda;
 		bit->setscl = nouveau_i2c_setscl;
 		bit->getsda = nouveau_i2c_getsda;
@@ -141,7 +161,6 @@
 		ret = i2c_add_adapter(&port->adapter);
 	}
 
-	/* drop port's i2c subdev refcount, i2c handles this itself */
 	if (ret == 0)
 		list_add_tail(&port->head, &i2c->ports);
 	return ret;
@@ -193,6 +212,75 @@
 	return NULL;
 }
 
+static void
+nouveau_i2c_release_pad(struct nouveau_i2c_port *port)
+{
+	struct nvkm_i2c_pad *pad = nvkm_i2c_pad(port);
+	struct nouveau_i2c *i2c = nouveau_i2c(port);
+
+	if (atomic_dec_and_test(&nv_object(pad)->usecount)) {
+		nv_ofuncs(pad)->fini(nv_object(pad), false);
+		wake_up_all(&i2c->wait);
+	}
+}
+
+static int
+nouveau_i2c_try_acquire_pad(struct nouveau_i2c_port *port)
+{
+	struct nvkm_i2c_pad *pad = nvkm_i2c_pad(port);
+
+	if (atomic_add_return(1, &nv_object(pad)->usecount) != 1) {
+		struct nouveau_object *owner = (void *)pad->port;
+		do {
+			if (owner == (void *)port)
+				return 0;
+			owner = owner->parent;
+		} while(owner);
+		nouveau_i2c_release_pad(port);
+		return -EBUSY;
+	}
+
+	pad->next = port;
+	nv_ofuncs(pad)->init(nv_object(pad));
+	return 0;
+}
+
+static int
+nouveau_i2c_acquire_pad(struct nouveau_i2c_port *port, unsigned long timeout)
+{
+	struct nouveau_i2c *i2c = nouveau_i2c(port);
+
+	if (timeout) {
+		if (wait_event_timeout(i2c->wait,
+				       nouveau_i2c_try_acquire_pad(port) == 0,
+				       timeout) == 0)
+			return -EBUSY;
+	} else {
+		wait_event(i2c->wait, nouveau_i2c_try_acquire_pad(port) == 0);
+	}
+
+	return 0;
+}
+
+static void
+nouveau_i2c_release(struct nouveau_i2c_port *port)
+__releases(pad->mutex)
+{
+	nouveau_i2c(port)->release_pad(port);
+	mutex_unlock(&port->mutex);
+}
+
+static int
+nouveau_i2c_acquire(struct nouveau_i2c_port *port, unsigned long timeout)
+__acquires(pad->mutex)
+{
+	int ret;
+	mutex_lock(&port->mutex);
+	if ((ret = nouveau_i2c(port)->acquire_pad(port, timeout)))
+		mutex_unlock(&port->mutex);
+	return ret;
+}
+
 static int
 nouveau_i2c_identify(struct nouveau_i2c *i2c, int index, const char *what,
 		     struct nouveau_i2c_board_info *info,
@@ -237,11 +325,59 @@
 	return -ENODEV;
 }
 
+static void
+nouveau_i2c_intr_disable(struct nouveau_event *event, int type, int index)
+{
+	struct nouveau_i2c *i2c = nouveau_i2c(event->priv);
+	struct nouveau_i2c_port *port = i2c->find(i2c, index);
+	const struct nouveau_i2c_impl *impl = (void *)nv_object(i2c)->oclass;
+	if (port && port->aux >= 0)
+		impl->aux_mask(i2c, type, 1 << port->aux, 0);
+}
+
+static void
+nouveau_i2c_intr_enable(struct nouveau_event *event, int type, int index)
+{
+	struct nouveau_i2c *i2c = nouveau_i2c(event->priv);
+	struct nouveau_i2c_port *port = i2c->find(i2c, index);
+	const struct nouveau_i2c_impl *impl = (void *)nv_object(i2c)->oclass;
+	if (port && port->aux >= 0)
+		impl->aux_mask(i2c, type, 1 << port->aux, 1 << port->aux);
+}
+
+static void
+nouveau_i2c_intr(struct nouveau_subdev *subdev)
+{
+	struct nouveau_i2c_impl *impl = (void *)nv_oclass(subdev);
+	struct nouveau_i2c *i2c = nouveau_i2c(subdev);
+	struct nouveau_i2c_port *port;
+	u32 hi, lo, rq, tx, e;
+
+	if (impl->aux_stat) {
+		impl->aux_stat(i2c, &hi, &lo, &rq, &tx);
+		if (hi || lo || rq || tx) {
+			list_for_each_entry(port, &i2c->ports, head) {
+				if (e = 0, port->aux < 0)
+					continue;
+
+				if (hi & (1 << port->aux)) e |= NVKM_I2C_PLUG;
+				if (lo & (1 << port->aux)) e |= NVKM_I2C_UNPLUG;
+				if (rq & (1 << port->aux)) e |= NVKM_I2C_IRQ;
+				if (tx & (1 << port->aux)) e |= NVKM_I2C_DONE;
+
+				nouveau_event_trigger(i2c->ntfy, e, port->index);
+			}
+		}
+	}
+}
+
 int
 _nouveau_i2c_fini(struct nouveau_object *object, bool suspend)
 {
+	struct nouveau_i2c_impl *impl = (void *)nv_oclass(object);
 	struct nouveau_i2c *i2c = (void *)object;
 	struct nouveau_i2c_port *port;
+	u32 mask;
 	int ret;
 
 	list_for_each_entry(port, &i2c->ports, head) {
@@ -250,6 +386,11 @@
 			goto fail;
 	}
 
+	if ((mask = (1 << impl->aux) - 1), impl->aux_stat) {
+		impl->aux_mask(i2c, NVKM_I2C_ANY, mask, 0);
+		impl->aux_stat(i2c, &mask, &mask, &mask, &mask);
+	}
+
 	return nouveau_subdev_fini(&i2c->base, suspend);
 fail:
 	list_for_each_entry_continue_reverse(port, &i2c->ports, head) {
@@ -290,6 +431,8 @@
 	struct nouveau_i2c *i2c = (void *)object;
 	struct nouveau_i2c_port *port, *temp;
 
+	nouveau_event_destroy(&i2c->ntfy);
+
 	list_for_each_entry_safe(port, temp, &i2c->ports, head) {
 		nouveau_object_ref(NULL, (struct nouveau_object **)&port);
 	}
@@ -306,14 +449,14 @@
 nouveau_i2c_create_(struct nouveau_object *parent,
 		    struct nouveau_object *engine,
 		    struct nouveau_oclass *oclass,
-		    struct nouveau_oclass *sclass,
 		    int length, void **pobject)
 {
+	const struct nouveau_i2c_impl *impl = (void *)oclass;
 	struct nouveau_bios *bios = nouveau_bios(parent);
 	struct nouveau_i2c *i2c;
 	struct nouveau_object *object;
 	struct dcb_i2c_entry info;
-	int ret, i, j, index = -1;
+	int ret, i, j, index = -1, pad;
 	struct dcb_output outp;
 	u8  ver, hdr;
 	u32 data;
@@ -324,24 +467,48 @@
 	if (ret)
 		return ret;
 
+	nv_subdev(i2c)->intr = nouveau_i2c_intr;
 	i2c->find = nouveau_i2c_find;
 	i2c->find_type = nouveau_i2c_find_type;
+	i2c->acquire_pad = nouveau_i2c_acquire_pad;
+	i2c->release_pad = nouveau_i2c_release_pad;
+	i2c->acquire = nouveau_i2c_acquire;
+	i2c->release = nouveau_i2c_release;
 	i2c->identify = nouveau_i2c_identify;
+	init_waitqueue_head(&i2c->wait);
 	INIT_LIST_HEAD(&i2c->ports);
 
 	while (!dcb_i2c_parse(bios, ++index, &info)) {
 		if (info.type == DCB_I2C_UNUSED)
 			continue;
 
-		oclass = sclass;
+		if (info.share != DCB_I2C_UNUSED) {
+			if (info.type == DCB_I2C_NVIO_AUX)
+				pad = info.drive;
+			else
+				pad = info.share;
+			oclass = impl->pad_s;
+		} else {
+			pad = 0x100 + info.drive;
+			oclass = impl->pad_x;
+		}
+
+		ret = nouveau_object_ctor(NULL, *pobject, oclass,
+					  NULL, pad, &parent);
+		if (ret < 0)
+			continue;
+
+		oclass = impl->sclass;
 		do {
 			ret = -EINVAL;
 			if (oclass->handle == info.type) {
-				ret = nouveau_object_ctor(*pobject, *pobject,
+				ret = nouveau_object_ctor(parent, *pobject,
 							  oclass, &info,
 							  index, &object);
 			}
 		} while (ret && (++oclass)->handle);
+
+		nouveau_object_ref(NULL, &parent);
 	}
 
 	/* in addition to the busses specified in the i2c table, there
@@ -380,5 +547,28 @@
 		}
 	}
 
+	ret = nouveau_event_create(4, index, &i2c->ntfy);
+	if (ret)
+		return ret;
+
+	i2c->ntfy->priv = i2c;
+	i2c->ntfy->enable = nouveau_i2c_intr_enable;
+	i2c->ntfy->disable = nouveau_i2c_intr_disable;
+	return 0;
+}
+
+int
+_nouveau_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 size,
+		  struct nouveau_object **pobject)
+{
+	struct nouveau_i2c *i2c;
+	int ret;
+
+	ret = nouveau_i2c_create(parent, engine, oclass, &i2c);
+	*pobject = nv_object(i2c);
+	if (ret)
+		return ret;
+
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/bit.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/bit.c
index a6e72d3..813ffc9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/bit.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/bit.c

@@ -22,7 +22,7 @@
  * Authors: Ben Skeggs
  */
 
-#include "subdev/i2c.h"
+#include "priv.h"
 
 #ifdef CONFIG_NOUVEAU_I2C_INTERNAL
 #define T_TIMEOUT  2200000
@@ -187,8 +187,9 @@
 	struct i2c_msg *msg = msgs;
 	int ret = 0, mcnt = num;
 
-	if (port->func->acquire)
-		port->func->acquire(port);
+	ret = nouveau_i2c(port)->acquire(port, nsecs_to_jiffies(T_TIMEOUT));
+	if (ret)
+		return ret;
 
 	while (!ret && mcnt--) {
 		u8 remaining = msg->len;
@@ -210,6 +211,7 @@
 	}
 
 	i2c_stop(port);
+	nouveau_i2c(port)->release(port);
 	return (ret < 0) ? ret : num;
 }
 #else

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv04.c
index 860d5d2..b1725bd 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv04.c

@@ -22,9 +22,10 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/i2c.h>
 #include <subdev/vga.h>
 
+#include "priv.h"
+
 struct nv04_i2c_priv {
 	struct nouveau_i2c base;
 };
@@ -115,29 +116,15 @@
 	{}
 };
 
-static int
-nv04_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	      struct nouveau_oclass *oclass, void *data, u32 size,
-	      struct nouveau_object **pobject)
-{
-	struct nv04_i2c_priv *priv;
-	int ret;
-
-	ret = nouveau_i2c_create(parent, engine, oclass, nv04_i2c_sclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-struct nouveau_oclass
-nv04_i2c_oclass = {
-	.handle = NV_SUBDEV(I2C, 0x04),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv04_i2c_ctor,
+struct nouveau_oclass *
+nv04_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0x04),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
 		.dtor = _nouveau_i2c_dtor,
 		.init = _nouveau_i2c_init,
 		.fini = _nouveau_i2c_fini,
 	},
-};
+	.sclass = nv04_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv4e.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv4e.c
index 0c2655a..f16c87c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv4e.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv4e.c

@@ -22,9 +22,10 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/i2c.h>
 #include <subdev/vga.h>
 
+#include "priv.h"
+
 struct nv4e_i2c_priv {
 	struct nouveau_i2c base;
 };
@@ -107,29 +108,15 @@
 	{}
 };
 
-static int
-nv4e_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	      struct nouveau_oclass *oclass, void *data, u32 size,
-	      struct nouveau_object **pobject)
-{
-	struct nv4e_i2c_priv *priv;
-	int ret;
-
-	ret = nouveau_i2c_create(parent, engine, oclass, nv4e_i2c_sclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-struct nouveau_oclass
-nv4e_i2c_oclass = {
-	.handle = NV_SUBDEV(I2C, 0x4e),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv4e_i2c_ctor,
+struct nouveau_oclass *
+nv4e_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0x4e),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
 		.dtor = _nouveau_i2c_dtor,
 		.init = _nouveau_i2c_init,
 		.fini = _nouveau_i2c_fini,
 	},
-};
+	.sclass = nv4e_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.c
index a8d67a2..7b8756d 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.c

@@ -121,29 +121,15 @@
 	{}
 };
 
-static int
-nv50_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	      struct nouveau_oclass *oclass, void *data, u32 size,
-	      struct nouveau_object **pobject)
-{
-	struct nv50_i2c_priv *priv;
-	int ret;
-
-	ret = nouveau_i2c_create(parent, engine, oclass, nv50_i2c_sclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-struct nouveau_oclass
-nv50_i2c_oclass = {
-	.handle = NV_SUBDEV(I2C, 0x50),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv50_i2c_ctor,
+struct nouveau_oclass *
+nv50_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0x50),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
 		.dtor = _nouveau_i2c_dtor,
 		.init = _nouveau_i2c_init,
 		.fini = _nouveau_i2c_fini,
 	},
-};
+	.sclass = nv50_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.h b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.h
index 4e5ba48..5d2a774 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv50.h

@@ -1,7 +1,7 @@
 #ifndef __NV50_I2C_H__
 #define __NV50_I2C_H__
 
-#include <subdev/i2c.h>
+#include "priv.h"
 
 struct nv50_i2c_priv {
 	struct nouveau_i2c base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv94.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv94.c
index df6d3e4..f59c3a2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nv94.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nv94.c

@@ -24,6 +24,36 @@
 
 #include "nv50.h"
 
+void
+nv94_aux_stat(struct nouveau_i2c *i2c, u32 *hi, u32 *lo, u32 *rq, u32 *tx)
+{
+	u32 intr = nv_rd32(i2c, 0x00e06c);
+	u32 stat = nv_rd32(i2c, 0x00e068) & intr, i;
+	for (i = 0, *hi = *lo = *rq = *tx = 0; i < 8; i++) {
+		if ((stat & (1 << (i * 4)))) *hi |= 1 << i;
+		if ((stat & (2 << (i * 4)))) *lo |= 1 << i;
+		if ((stat & (4 << (i * 4)))) *rq |= 1 << i;
+		if ((stat & (8 << (i * 4)))) *tx |= 1 << i;
+	}
+	nv_wr32(i2c, 0x00e06c, intr);
+}
+
+void
+nv94_aux_mask(struct nouveau_i2c *i2c, u32 type, u32 mask, u32 data)
+{
+	u32 temp = nv_rd32(i2c, 0x00e068), i;
+	for (i = 0; i < 8; i++) {
+		if (mask & (1 << i)) {
+			if (!(data & (1 << i))) {
+				temp &= ~(type << (i * 4));
+				continue;
+			}
+			temp |= type << (i * 4);
+		}
+	}
+	nv_wr32(i2c, 0x00e068, temp);
+}
+
 #define AUX_DBG(fmt, args...) nv_debug(aux, "AUXCH(%d): " fmt, ch, ##args)
 #define AUX_ERR(fmt, args...) nv_error(aux, "AUXCH(%d): " fmt, ch, ##args)
 
@@ -69,7 +99,8 @@
 }
 
 int
-nv94_aux(struct nouveau_i2c_port *base, u8 type, u32 addr, u8 *data, u8 size)
+nv94_aux(struct nouveau_i2c_port *base, bool retry,
+	 u8 type, u32 addr, u8 *data, u8 size)
 {
 	struct nouveau_i2c *aux = nouveau_i2c(base);
 	struct nv50_i2c_port *port = (void *)base;
@@ -105,9 +136,8 @@
 	ctrl |= size - 1;
 	nv_wr32(aux, 0x00e4e0 + (ch * 0x50), addr);
 
-	/* retry transaction a number of times on failure... */
-	ret = -EREMOTEIO;
-	for (retries = 0; retries < 32; retries++) {
+	/* (maybe) retry transaction a number of times on failure... */
+	for (retries = 0; !ret && retries < 32; retries++) {
 		/* reset, and delay a while if this is a retry */
 		nv_wr32(aux, 0x00e4e4 + (ch * 0x50), 0x80000000 | ctrl);
 		nv_wr32(aux, 0x00e4e4 + (ch * 0x50), 0x00000000 | ctrl);
@@ -123,16 +153,21 @@
 			udelay(1);
 			if (!timeout--) {
 				AUX_ERR("tx req timeout 0x%08x\n", ctrl);
+				ret = -EIO;
 				goto out;
 			}
 		} while (ctrl & 0x00010000);
+		ret = 1;
 
 		/* read status, and check if transaction completed ok */
 		stat = nv_mask(aux, 0x00e4e8 + (ch * 0x50), 0, 0);
-		if (!(stat & 0x000f0f00)) {
-			ret = 0;
-			break;
-		}
+		if ((stat & 0x000f0000) == 0x00080000 ||
+		    (stat & 0x000f0000) == 0x00020000)
+			ret = retry ? 0 : 1;
+		if ((stat & 0x00000100))
+			ret = -ETIMEDOUT;
+		if ((stat & 0x00000e00))
+			ret = -EIO;
 
 		AUX_DBG("%02d 0x%08x 0x%08x\n", retries, ctrl, stat);
 	}
@@ -147,29 +182,11 @@
 
 out:
 	auxch_fini(aux, ch);
-	return ret;
-}
-
-void
-nv94_i2c_acquire(struct nouveau_i2c_port *base)
-{
-	struct nv50_i2c_priv *priv = (void *)nv_object(base)->engine;
-	struct nv50_i2c_port *port = (void *)base;
-	if (port->ctrl) {
-		nv_mask(priv, port->ctrl + 0x0c, 0x00000001, 0x00000000);
-		nv_mask(priv, port->ctrl + 0x00, 0x0000f003, port->data);
-	}
-}
-
-void
-nv94_i2c_release(struct nouveau_i2c_port *base)
-{
+	return ret < 0 ? ret : (stat & 0x000f0000) >> 16;
 }
 
 static const struct nouveau_i2c_func
 nv94_i2c_func = {
-	.acquire   = nv94_i2c_acquire,
-	.release   = nv94_i2c_release,
 	.drive_scl = nv50_i2c_drive_scl,
 	.drive_sda = nv50_i2c_drive_sda,
 	.sense_scl = nv50_i2c_sense_scl,
@@ -206,8 +223,6 @@
 
 static const struct nouveau_i2c_func
 nv94_aux_func = {
-	.acquire   = nv94_i2c_acquire,
-	.release   = nv94_i2c_release,
 	.aux       = nv94_aux,
 };
 
@@ -227,6 +242,7 @@
 	if (ret)
 		return ret;
 
+	port->base.aux = info->drive;
 	port->addr = info->drive;
 	if (info->share != DCB_I2C_UNUSED) {
 		port->ctrl = 0x00e500 + (info->drive * 0x50);
@@ -257,29 +273,19 @@
 	{}
 };
 
-static int
-nv94_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	      struct nouveau_oclass *oclass, void *data, u32 size,
-	      struct nouveau_object **pobject)
-{
-	struct nv50_i2c_priv *priv;
-	int ret;
-
-	ret = nouveau_i2c_create(parent, engine, oclass, nv94_i2c_sclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-struct nouveau_oclass
-nv94_i2c_oclass = {
-	.handle = NV_SUBDEV(I2C, 0x94),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv94_i2c_ctor,
+struct nouveau_oclass *
+nv94_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0x94),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
 		.dtor = _nouveau_i2c_dtor,
 		.init = _nouveau_i2c_init,
 		.fini = _nouveau_i2c_fini,
 	},
-};
+	.sclass = nv94_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+	.pad_s = &nv94_i2c_pad_oclass,
+	.aux = 4,
+	.aux_stat = nv94_aux_stat,
+	.aux_mask = nv94_aux_mask,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nvd0.c
index 29967d3..364ddb1 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nvd0.c

@@ -42,8 +42,6 @@
 
 static const struct nouveau_i2c_func
 nvd0_i2c_func = {
-	.acquire   = nv94_i2c_acquire,
-	.release   = nv94_i2c_release,
 	.drive_scl = nv50_i2c_drive_scl,
 	.drive_sda = nv50_i2c_drive_sda,
 	.sense_scl = nvd0_i2c_sense_scl,
@@ -75,7 +73,7 @@
 	return 0;
 }
 
-static struct nouveau_oclass
+struct nouveau_oclass
 nvd0_i2c_sclass[] = {
 	{ .handle = NV_I2C_TYPE_DCBI2C(DCB_I2C_NVIO_BIT),
 	  .ofuncs = &(struct nouveau_ofuncs) {
@@ -96,29 +94,19 @@
 	{}
 };
 
-static int
-nvd0_i2c_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	      struct nouveau_oclass *oclass, void *data, u32 size,
-	      struct nouveau_object **pobject)
-{
-	struct nv50_i2c_priv *priv;
-	int ret;
-
-	ret = nouveau_i2c_create(parent, engine, oclass, nvd0_i2c_sclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-struct nouveau_oclass
-nvd0_i2c_oclass = {
-	.handle = NV_SUBDEV(I2C, 0xd0),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nvd0_i2c_ctor,
+struct nouveau_oclass *
+nvd0_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0xd0),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
 		.dtor = _nouveau_i2c_dtor,
 		.init = _nouveau_i2c_init,
 		.fini = _nouveau_i2c_fini,
 	},
-};
+	.sclass = nvd0_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+	.pad_s = &nv94_i2c_pad_oclass,
+	.aux = 4,
+	.aux_stat = nv94_aux_stat,
+	.aux_mask = nv94_aux_mask,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/nve0.c
new file mode 100644
index 0000000..cae77e1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/nve0.c

@@ -0,0 +1,72 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "nv50.h"
+
+static void
+nve0_aux_stat(struct nouveau_i2c *i2c, u32 *hi, u32 *lo, u32 *rq, u32 *tx)
+{
+	u32 intr = nv_rd32(i2c, 0x00dc60);
+	u32 stat = nv_rd32(i2c, 0x00dc68) & intr, i;
+	for (i = 0, *hi = *lo = *rq = *tx = 0; i < 8; i++) {
+		if ((stat & (1 << (i * 4)))) *hi |= 1 << i;
+		if ((stat & (2 << (i * 4)))) *lo |= 1 << i;
+		if ((stat & (4 << (i * 4)))) *rq |= 1 << i;
+		if ((stat & (8 << (i * 4)))) *tx |= 1 << i;
+	}
+	nv_wr32(i2c, 0x00dc60, intr);
+}
+
+static void
+nve0_aux_mask(struct nouveau_i2c *i2c, u32 type, u32 mask, u32 data)
+{
+	u32 temp = nv_rd32(i2c, 0x00dc68), i;
+	for (i = 0; i < 8; i++) {
+		if (mask & (1 << i)) {
+			if (!(data & (1 << i))) {
+				temp &= ~(type << (i * 4));
+				continue;
+			}
+			temp |= type << (i * 4);
+		}
+	}
+	nv_wr32(i2c, 0x00dc68, temp);
+}
+
+struct nouveau_oclass *
+nve0_i2c_oclass = &(struct nouveau_i2c_impl) {
+	.base.handle = NV_SUBDEV(I2C, 0xe0),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_i2c_ctor,
+		.dtor = _nouveau_i2c_dtor,
+		.init = _nouveau_i2c_init,
+		.fini = _nouveau_i2c_fini,
+	},
+	.sclass = nvd0_i2c_sclass,
+	.pad_x = &nv04_i2c_pad_oclass,
+	.pad_s = &nv94_i2c_pad_oclass,
+	.aux = 4,
+	.aux_stat = nve0_aux_stat,
+	.aux_mask = nve0_aux_mask,
+}.base;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.c
new file mode 100644
index 0000000..e9e4124
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.c

@@ -0,0 +1,84 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "pad.h"
+
+int
+_nvkm_i2c_pad_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nvkm_i2c_pad *pad = (void *)object;
+	DBG("-> NULL\n");
+	pad->port = NULL;
+	return nouveau_object_fini(&pad->base, suspend);
+}
+
+int
+_nvkm_i2c_pad_init(struct nouveau_object *object)
+{
+	struct nvkm_i2c_pad *pad = (void *)object;
+	DBG("-> PORT:%02x\n", pad->next->index);
+	pad->port = pad->next;
+	return nouveau_object_init(&pad->base);
+}
+
+int
+nvkm_i2c_pad_create_(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass, int index,
+		     int size, void **pobject)
+{
+	struct nouveau_i2c *i2c = (void *)engine;
+	struct nouveau_i2c_port *port;
+	struct nvkm_i2c_pad *pad;
+	int ret;
+
+	list_for_each_entry(port, &i2c->ports, head) {
+		pad = nvkm_i2c_pad(port);
+		if (pad->index == index) {
+			atomic_inc(&nv_object(pad)->refcount);
+			*pobject = pad;
+			return 1;
+		}
+	}
+
+	ret = nouveau_object_create_(parent, engine, oclass, 0, size, pobject);
+	pad = *pobject;
+	if (ret)
+		return ret;
+
+	pad->index = index;
+	return 0;
+}
+
+int
+_nvkm_i2c_pad_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		   struct nouveau_oclass *oclass, void *data, u32 index,
+		   struct nouveau_object **pobject)
+{
+	struct nvkm_i2c_pad *pad;
+	int ret;
+	ret = nvkm_i2c_pad_create(parent, engine, oclass, index, &pad);
+	*pobject = nv_object(pad);
+	return ret;
+}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.h
new file mode 100644
index 0000000..452ac10
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/pad.h

@@ -0,0 +1,58 @@
+#ifndef __NVKM_I2C_PAD_H__
+#define __NVKM_I2C_PAD_H__
+
+#include "priv.h"
+
+struct nvkm_i2c_pad {
+	struct nouveau_object base;
+	int index;
+	struct nouveau_i2c_port *port;
+	struct nouveau_i2c_port *next;
+};
+
+static inline struct nvkm_i2c_pad *
+nvkm_i2c_pad(struct nouveau_i2c_port *port)
+{
+	struct nouveau_object *pad = nv_object(port);
+	while (pad->parent)
+		pad = pad->parent;
+	return (void *)pad;
+}
+
+#define nvkm_i2c_pad_create(p,e,o,i,d)                                         \
+	nvkm_i2c_pad_create_((p), (e), (o), (i), sizeof(**d), (void **)d)
+#define nvkm_i2c_pad_destroy(p) ({                                             \
+	struct nvkm_i2c_pad *_p = (p);                                         \
+	_nvkm_i2c_pad_dtor(nv_object(_p));                                     \
+})
+#define nvkm_i2c_pad_init(p) ({                                                \
+	struct nvkm_i2c_pad *_p = (p);                                         \
+	_nvkm_i2c_pad_init(nv_object(_p));                                     \
+})
+#define nvkm_i2c_pad_fini(p,s) ({                                              \
+	struct nvkm_i2c_pad *_p = (p);                                         \
+	_nvkm_i2c_pad_fini(nv_object(_p), (s));                                \
+})
+
+int nvkm_i2c_pad_create_(struct nouveau_object *, struct nouveau_object *,
+			 struct nouveau_oclass *, int index, int, void **);
+
+int _nvkm_i2c_pad_ctor(struct nouveau_object *, struct nouveau_object *,
+		       struct nouveau_oclass *, void *, u32,
+		       struct nouveau_object **);
+#define _nvkm_i2c_pad_dtor nouveau_object_destroy
+int _nvkm_i2c_pad_init(struct nouveau_object *);
+int _nvkm_i2c_pad_fini(struct nouveau_object *, bool);
+
+#ifndef MSG
+#define MSG(l,f,a...) do {                                                     \
+	struct nvkm_i2c_pad *_pad = (void *)pad;                               \
+	nv_##l(nv_object(_pad)->engine, "PAD:%c:%02x: "f,                      \
+	       _pad->index >= 0x100 ? 'X' : 'S',                               \
+	       _pad->index >= 0x100 ? _pad->index - 0x100 : _pad->index, ##a); \
+} while(0)
+#define DBG(f,a...) MSG(debug, f, ##a)
+#define ERR(f,a...) MSG(error, f, ##a)
+#endif
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv04.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv04.c
new file mode 100644
index 0000000..2c4b612
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv04.c

@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "pad.h"
+
+struct nouveau_oclass
+nv04_i2c_pad_oclass = {
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nvkm_i2c_pad_ctor,
+		.dtor = _nvkm_i2c_pad_dtor,
+		.init = _nvkm_i2c_pad_init,
+		.fini = _nvkm_i2c_pad_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv94.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv94.c
new file mode 100644
index 0000000..0dc6753
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/padnv94.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "pad.h"
+
+struct nv94_i2c_pad {
+	struct nvkm_i2c_pad base;
+	int addr;
+};
+
+static int
+nv94_i2c_pad_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_i2c *i2c = (void *)object->engine;
+	struct nv94_i2c_pad *pad = (void *)object;
+	nv_mask(i2c, 0x00e50c + pad->addr, 0x00000001, 0x00000001);
+	return nvkm_i2c_pad_fini(&pad->base, suspend);
+}
+
+static int
+nv94_i2c_pad_init(struct nouveau_object *object)
+{
+	struct nouveau_i2c *i2c = (void *)object->engine;
+	struct nv94_i2c_pad *pad = (void *)object;
+
+	switch (nv_oclass(pad->base.next)->handle) {
+	case NV_I2C_TYPE_DCBI2C(DCB_I2C_NVIO_AUX):
+		nv_mask(i2c, 0x00e500 + pad->addr, 0x0000c003, 0x00000002);
+		break;
+	case NV_I2C_TYPE_DCBI2C(DCB_I2C_NVIO_BIT):
+	default:
+		nv_mask(i2c, 0x00e500 + pad->addr, 0x0000c003, 0x0000c001);
+		break;
+	}
+
+	nv_mask(i2c, 0x00e50c + pad->addr, 0x00000001, 0x00000000);
+	return nvkm_i2c_pad_init(&pad->base);
+}
+
+static int
+nv94_i2c_pad_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 index,
+		  struct nouveau_object **pobject)
+{
+	struct nv94_i2c_pad *pad;
+	int ret;
+
+	ret = nvkm_i2c_pad_create(parent, engine, oclass, index, &pad);
+	*pobject = nv_object(pad);
+	if (ret)
+		return ret;
+
+	pad->addr = index * 0x50;;
+	return 0;
+}
+
+struct nouveau_oclass
+nv94_i2c_pad_oclass = {
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv94_i2c_pad_ctor,
+		.dtor = _nvkm_i2c_pad_dtor,
+		.init = nv94_i2c_pad_init,
+		.fini = nv94_i2c_pad_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/port.h b/drivers/gpu/drm/nouveau/core/subdev/i2c/port.h
new file mode 100644
index 0000000..a8ff6e0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/port.h

@@ -0,0 +1,15 @@
+#ifndef __NVKM_I2C_PORT_H__
+#define __NVKM_I2C_PORT_H__
+
+#include "priv.h"
+
+#ifndef MSG
+#define MSG(l,f,a...) do {                                                     \
+	struct nouveau_i2c_port *_port = (void *)port;                         \
+	nv_##l(nv_object(_port)->engine, "PORT:%02x: "f, _port->index, ##a);   \
+} while(0)
+#define DBG(f,a...) MSG(debug, f, ##a)
+#define ERR(f,a...) MSG(error, f, ##a)
+#endif
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/core/subdev/i2c/priv.h
new file mode 100644
index 0000000..780090b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/priv.h

@@ -0,0 +1,85 @@
+#ifndef __NVKM_I2C_H__
+#define __NVKM_I2C_H__
+
+#include <subdev/i2c.h>
+
+extern struct nouveau_oclass nv04_i2c_pad_oclass;
+extern struct nouveau_oclass nv94_i2c_pad_oclass;
+
+#define nouveau_i2c_port_create(p,e,o,i,a,f,d)                                 \
+	nouveau_i2c_port_create_((p), (e), (o), (i), (a), (f),                 \
+				 sizeof(**d), (void **)d)
+#define nouveau_i2c_port_destroy(p) ({                                         \
+	struct nouveau_i2c_port *port = (p);                                   \
+	_nouveau_i2c_port_dtor(nv_object(i2c));                                \
+})
+#define nouveau_i2c_port_init(p)                                               \
+	nouveau_object_init(&(p)->base)
+#define nouveau_i2c_port_fini(p,s)                                             \
+	nouveau_object_fini(&(p)->base, (s))
+
+int nouveau_i2c_port_create_(struct nouveau_object *, struct nouveau_object *,
+			     struct nouveau_oclass *, u8,
+			     const struct i2c_algorithm *,
+			     const struct nouveau_i2c_func *,
+			     int, void **);
+void _nouveau_i2c_port_dtor(struct nouveau_object *);
+#define _nouveau_i2c_port_init nouveau_object_init
+int  _nouveau_i2c_port_fini(struct nouveau_object *, bool);
+
+#define nouveau_i2c_create(p,e,o,d)                                            \
+	nouveau_i2c_create_((p), (e), (o), sizeof(**d), (void **)d)
+#define nouveau_i2c_destroy(p) ({                                              \
+	struct nouveau_i2c *i2c = (p);                                         \
+	_nouveau_i2c_dtor(nv_object(i2c));                                     \
+})
+#define nouveau_i2c_init(p) ({                                                 \
+	struct nouveau_i2c *i2c = (p);                                         \
+	_nouveau_i2c_init(nv_object(i2c));                                     \
+})
+#define nouveau_i2c_fini(p,s) ({                                               \
+	struct nouveau_i2c *i2c = (p);                                         \
+	_nouveau_i2c_fini(nv_object(i2c), (s));                                \
+})
+
+int nouveau_i2c_create_(struct nouveau_object *, struct nouveau_object *,
+			struct nouveau_oclass *, int, void **);
+int  _nouveau_i2c_ctor(struct nouveau_object *, struct nouveau_object *,
+		       struct nouveau_oclass *, void *, u32,
+		       struct nouveau_object **);
+void _nouveau_i2c_dtor(struct nouveau_object *);
+int  _nouveau_i2c_init(struct nouveau_object *);
+int  _nouveau_i2c_fini(struct nouveau_object *, bool);
+
+extern struct nouveau_oclass nouveau_anx9805_sclass[];
+extern struct nouveau_oclass nvd0_i2c_sclass[];
+
+extern const struct i2c_algorithm nouveau_i2c_bit_algo;
+extern const struct i2c_algorithm nouveau_i2c_aux_algo;
+
+struct nouveau_i2c_impl {
+	struct nouveau_oclass base;
+
+	/* supported i2c port classes */
+	struct nouveau_oclass *sclass;
+	struct nouveau_oclass *pad_x;
+	struct nouveau_oclass *pad_s;
+
+	/* number of native dp aux channels present */
+	int aux;
+
+	/* read and ack pending interrupts, returning only data
+	 * for ports that have not been masked off, while still
+	 * performing the ack for anything that was pending.
+	 */
+	void (*aux_stat)(struct nouveau_i2c *, u32 *, u32 *, u32 *, u32 *);
+
+	/* mask on/off interrupt types for a given set of auxch
+	 */
+	void (*aux_mask)(struct nouveau_i2c *, u32, u32, u32);
+};
+
+void nv94_aux_stat(struct nouveau_i2c *, u32 *, u32 *, u32 *, u32 *);
+void nv94_aux_mask(struct nouveau_i2c *, u32, u32, u32);
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c
new file mode 100644
index 0000000..245f0eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c

@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <subdev/ibus.h>
+#include <subdev/timer.h>
+
+struct gk20a_ibus_priv {
+	struct nouveau_ibus base;
+};
+
+static void
+gk20a_ibus_init_priv_ring(struct gk20a_ibus_priv *priv)
+{
+	nv_mask(priv, 0x137250, 0x3f, 0);
+
+	nv_mask(priv, 0x000200, 0x20, 0);
+	usleep_range(20, 30);
+	nv_mask(priv, 0x000200, 0x20, 0x20);
+
+	nv_wr32(priv, 0x12004c, 0x4);
+	nv_wr32(priv, 0x122204, 0x2);
+	nv_rd32(priv, 0x122204);
+}
+
+static void
+gk20a_ibus_intr(struct nouveau_subdev *subdev)
+{
+	struct gk20a_ibus_priv *priv = (void *)subdev;
+	u32 status0 = nv_rd32(priv, 0x120058);
+
+	if (status0 & 0x7) {
+		nv_debug(priv, "resetting priv ring\n");
+		gk20a_ibus_init_priv_ring(priv);
+	}
+
+	/* Acknowledge interrupt */
+	nv_mask(priv, 0x12004c, 0x2, 0x2);
+
+	if (!nv_wait(subdev, 0x12004c, 0x3f, 0x00))
+		nv_warn(priv, "timeout waiting for ringmaster ack\n");
+}
+
+static int
+gk20a_ibus_init(struct nouveau_object *object)
+{
+	struct gk20a_ibus_priv *priv = (void *)object;
+	int ret;
+
+	ret = _nouveau_ibus_init(object);
+	if (ret)
+		return ret;
+
+	gk20a_ibus_init_priv_ring(priv);
+
+	return 0;
+}
+
+static int
+gk20a_ibus_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct gk20a_ibus_priv *priv;
+	int ret;
+
+	ret = nouveau_ibus_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->intr = gk20a_ibus_intr;
+	return 0;
+}
+
+struct nouveau_oclass
+gk20a_ibus_oclass = {
+	.handle = NV_SUBDEV(IBUS, 0xea),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gk20a_ibus_ctor,
+		.dtor = _nouveau_ibus_dtor,
+		.init = gk20a_ibus_init,
+		.fini = _nouveau_ibus_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
index e8822a9..9ca93e2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c

@@ -26,6 +26,7 @@
 
 const struct nouveau_mc_intr
 nv50_mc_intr[] = {
+	{ 0x04000000, NVDEV_ENGINE_DISP },  /* DISP before FIFO, so pageflip-timestamping works! */
 	{ 0x00000001, NVDEV_ENGINE_MPEG },
 	{ 0x00000100, NVDEV_ENGINE_FIFO },
 	{ 0x00001000, NVDEV_ENGINE_GR },
@@ -33,8 +34,8 @@
 	{ 0x00008000, NVDEV_ENGINE_BSP },	/* NV84- */
 	{ 0x00020000, NVDEV_ENGINE_VP },	/* NV84- */
 	{ 0x00100000, NVDEV_SUBDEV_TIMER },
-	{ 0x00200000, NVDEV_SUBDEV_GPIO },
-	{ 0x04000000, NVDEV_ENGINE_DISP },
+	{ 0x00200000, NVDEV_SUBDEV_GPIO },	/* PMGR->GPIO */
+	{ 0x00200000, NVDEV_SUBDEV_I2C }, 	/* PMGR->I2C/AUX */
 	{ 0x10000000, NVDEV_SUBDEV_BUS },
 	{ 0x80000000, NVDEV_ENGINE_SW },
 	{ 0x0002d101, NVDEV_SUBDEV_FB },

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
index f8a6f18..3c76d90 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c

@@ -26,6 +26,7 @@
 
 static const struct nouveau_mc_intr
 nv98_mc_intr[] = {
+	{ 0x04000000, NVDEV_ENGINE_DISP },  /* DISP first, so pageflip timestamps work */
 	{ 0x00000001, NVDEV_ENGINE_PPP },
 	{ 0x00000100, NVDEV_ENGINE_FIFO },
 	{ 0x00001000, NVDEV_ENGINE_GR },
@@ -35,9 +36,9 @@
 	{ 0x00040000, NVDEV_SUBDEV_PWR },	/* NVA3:NVC0 */
 	{ 0x00080000, NVDEV_SUBDEV_THERM },	/* NVA3:NVC0 */
 	{ 0x00100000, NVDEV_SUBDEV_TIMER },
-	{ 0x00200000, NVDEV_SUBDEV_GPIO },
+	{ 0x00200000, NVDEV_SUBDEV_GPIO },	/* PMGR->GPIO */
+	{ 0x00200000, NVDEV_SUBDEV_I2C }, 	/* PMGR->I2C/AUX */
 	{ 0x00400000, NVDEV_ENGINE_COPY0 },	/* NVA3-     */
-	{ 0x04000000, NVDEV_ENGINE_DISP },
 	{ 0x10000000, NVDEV_SUBDEV_BUS },
 	{ 0x80000000, NVDEV_ENGINE_SW },
 	{ 0x0042d101, NVDEV_SUBDEV_FB },

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
index 34472d31..f9c6a67 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c

@@ -26,6 +26,7 @@
 
 const struct nouveau_mc_intr
 nvc0_mc_intr[] = {
+	{ 0x04000000, NVDEV_ENGINE_DISP },  /* DISP first, so pageflip timestamps work. */
 	{ 0x00000001, NVDEV_ENGINE_PPP },
 	{ 0x00000020, NVDEV_ENGINE_COPY0 },
 	{ 0x00000040, NVDEV_ENGINE_COPY1 },
@@ -37,10 +38,10 @@
 	{ 0x00040000, NVDEV_SUBDEV_THERM },
 	{ 0x00020000, NVDEV_ENGINE_VP },
 	{ 0x00100000, NVDEV_SUBDEV_TIMER },
-	{ 0x00200000, NVDEV_SUBDEV_GPIO },
+	{ 0x00200000, NVDEV_SUBDEV_GPIO },	/* PMGR->GPIO */
+	{ 0x00200000, NVDEV_SUBDEV_I2C },	/* PMGR->I2C/AUX */
 	{ 0x01000000, NVDEV_SUBDEV_PWR },
 	{ 0x02000000, NVDEV_SUBDEV_LTCG },
-	{ 0x04000000, NVDEV_ENGINE_DISP },
 	{ 0x08000000, NVDEV_SUBDEV_FB },
 	{ 0x10000000, NVDEV_SUBDEV_BUS },
 	{ 0x40000000, NVDEV_SUBDEV_IBUS },

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/nv50.c
index 64f8b47..fcaabe8 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/nv50.c

@@ -150,7 +150,7 @@
 	 * common example is DP->eDP.
 	 */
 	conn  = bios->data;
-	conn += dcb_conn(bios, (ctx.outp[0] & 0x0000f000) >> 12, &ver, &len);
+	conn += nvbios_connEe(bios, (ctx.outp[0] & 0x0000f000) >> 12, &ver, &len);
 	type  = conn[0];
 	switch (ctx.desc.conn_type) {
 	case 0x01: /* LVDS */

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c
index 7610fc5..ca9ad9fd4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c

@@ -60,9 +60,9 @@
 nv_board_infos[] = {
 	{ { I2C_BOARD_INFO("w83l785ts", 0x2d) }, 0 },
 	{ { I2C_BOARD_INFO("w83781d", 0x2d) }, 0  },
-	{ { I2C_BOARD_INFO("adt7473", 0x2e) }, 20  },
-	{ { I2C_BOARD_INFO("adt7473", 0x2d) }, 20  },
-	{ { I2C_BOARD_INFO("adt7473", 0x2c) }, 20  },
+	{ { I2C_BOARD_INFO("adt7473", 0x2e) }, 40  },
+	{ { I2C_BOARD_INFO("adt7473", 0x2d) }, 40  },
+	{ { I2C_BOARD_INFO("adt7473", 0x2c) }, 40  },
 	{ { I2C_BOARD_INFO("f75375", 0x2e) }, 0  },
 	{ { I2C_BOARD_INFO("lm99", 0x4c) }, 0  },
 	{ { I2C_BOARD_INFO("lm90", 0x4c) }, 0  },

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c
index 3b2c458..0478b2e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c

@@ -36,7 +36,7 @@
 	u32 tach = nv_rd32(therm, 0x00e728) & 0x0000ffff;
 	u32 ctrl = nv_rd32(therm, 0x00e720);
 	if (ctrl & 0x00000001)
-		return tach * 60;
+		return tach * 60 / 2;
 	return -ENODEV;
 }
 

diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index 434b920..a96dda4 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c

@@ -414,7 +414,7 @@
 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
 
 	NV_DEBUG(drm, "Output %s is running on CRTC %d using output %c\n",
-		 drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
+		 nouveau_encoder_connector_get(nv_encoder)->base.name,
 		 nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
 }
 

diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index a2d669b..e57babb 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c

@@ -477,7 +477,7 @@
 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
 
 	NV_DEBUG(drm, "Output %s is running on CRTC %d using output %c\n",
-		 drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
+		 nouveau_encoder_connector_get(nv_encoder)->base.name,
 		 nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
 }
 

diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 2f1ed61..4342fda 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c

@@ -115,7 +115,7 @@
 				 &dev->mode_config.connector_list, head) {
 		if (!connector->encoder_ids[0]) {
 			NV_WARN(drm, "%s has no encoders, removing\n",
-				drm_get_connector_name(connector));
+				connector->name);
 			connector->funcs->destroy(connector);
 		}
 	}

diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 244822d..8667620 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c

@@ -171,7 +171,8 @@
 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
 
 	NV_DEBUG(drm, "Output %s is running on CRTC %d using output %c\n",
-		 drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base), nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
+		 nouveau_encoder_connector_get(nv_encoder)->base.name,
+		 nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
 }
 
 static void nv04_tv_destroy(struct drm_encoder *encoder)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index acef48f..195bd8e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c

@@ -612,8 +612,7 @@
 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
 
 	NV_INFO(drm, "Output %s is running on CRTC %d using output %c\n",
-		drm_get_connector_name(
-			&nouveau_encoder_connector_get(nv_encoder)->base),
+		nouveau_encoder_connector_get(nv_encoder)->base.name,
 		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
 }
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index d07ce02..1fa222e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c

@@ -44,6 +44,7 @@
 
 #include <subdev/i2c.h>
 #include <subdev/gpio.h>
+#include <engine/disp.h>
 
 MODULE_PARM_DESC(tv_disable, "Disable TV-out detection");
 static int nouveau_tv_disable = 0;
@@ -75,7 +76,8 @@
 			continue;
 		nv_encoder = nouveau_encoder(obj_to_encoder(obj));
 
-		if (type == DCB_OUTPUT_ANY || nv_encoder->dcb->type == type)
+		if (type == DCB_OUTPUT_ANY ||
+		    (nv_encoder->dcb && nv_encoder->dcb->type == type))
 			return nv_encoder;
 	}
 
@@ -100,22 +102,24 @@
 nouveau_connector_destroy(struct drm_connector *connector)
 {
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
-	nouveau_event_ref(NULL, &nv_connector->hpd_func);
+	nouveau_event_ref(NULL, &nv_connector->hpd);
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
+	if (nv_connector->aux.transfer)
+		drm_dp_aux_unregister(&nv_connector->aux);
 	kfree(connector);
 }
 
-static struct nouveau_i2c_port *
-nouveau_connector_ddc_detect(struct drm_connector *connector,
-			     struct nouveau_encoder **pnv_encoder)
+static struct nouveau_encoder *
+nouveau_connector_ddc_detect(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_gpio *gpio = nouveau_gpio(drm->device);
-	struct nouveau_i2c_port *port = NULL;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_mode_object *obj;
 	int i, panel = -ENODEV;
 
 	/* eDP panels need powering on by us (if the VBIOS doesn't default it
@@ -130,13 +134,9 @@
 		}
 	}
 
-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		struct nouveau_encoder *nv_encoder;
-		struct drm_mode_object *obj;
-		int id;
-
-		id = connector->encoder_ids[i];
-		if (!id)
+	for (i = 0; nv_encoder = NULL, i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		int id = connector->encoder_ids[i];
+		if (id == 0)
 			break;
 
 		obj = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_ENCODER);
@@ -144,22 +144,24 @@
 			continue;
 		nv_encoder = nouveau_encoder(obj_to_encoder(obj));
 
-		port = nv_encoder->i2c;
-		if (port && nv_probe_i2c(port, 0x50)) {
-			*pnv_encoder = nv_encoder;
-			break;
+		if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
+			int ret = nouveau_dp_detect(nv_encoder);
+			if (ret == 0)
+				break;
+		} else
+		if (nv_encoder->i2c) {
+			if (nv_probe_i2c(nv_encoder->i2c, 0x50))
+				break;
 		}
-
-		port = NULL;
 	}
 
 	/* eDP panel not detected, restore panel power GPIO to previous
 	 * state to avoid confusing the SOR for other output types.
 	 */
-	if (!port && panel == 0)
+	if (!nv_encoder && panel == 0)
 		gpio->set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, panel);
 
-	return port;
+	return nv_encoder;
 }
 
 static struct nouveau_encoder *
@@ -258,25 +260,17 @@
 	if (ret < 0 && ret != -EACCES)
 		return conn_status;
 
-	i2c = nouveau_connector_ddc_detect(connector, &nv_encoder);
-	if (i2c) {
+	nv_encoder = nouveau_connector_ddc_detect(connector);
+	if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
 		nv_connector->edid = drm_get_edid(connector, &i2c->adapter);
 		drm_mode_connector_update_edid_property(connector,
 							nv_connector->edid);
 		if (!nv_connector->edid) {
 			NV_ERROR(drm, "DDC responded, but no EDID for %s\n",
-				 drm_get_connector_name(connector));
+				 connector->name);
 			goto detect_analog;
 		}
 
-		if (nv_encoder->dcb->type == DCB_OUTPUT_DP &&
-		    !nouveau_dp_detect(to_drm_encoder(nv_encoder))) {
-			NV_ERROR(drm, "Detected %s, but failed init\n",
-				 drm_get_connector_name(connector));
-			conn_status = connector_status_disconnected;
-			goto out;
-		}
-
 		/* Override encoder type for DVI-I based on whether EDID
 		 * says the display is digital or analog, both use the
 		 * same i2c channel so the value returned from ddc_detect
@@ -437,7 +431,7 @@
 	nv_encoder = find_encoder(connector, type);
 	if (!nv_encoder) {
 		NV_ERROR(drm, "can't find encoder to force %s on!\n",
-			 drm_get_connector_name(connector));
+			 connector->name);
 		connector->status = connector_status_disconnected;
 		return;
 	}
@@ -912,33 +906,103 @@
 };
 
 static void
+nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
+{
+	struct nouveau_encoder *nv_encoder = NULL;
+
+	if (connector->encoder)
+		nv_encoder = nouveau_encoder(connector->encoder);
+	if (nv_encoder && nv_encoder->dcb &&
+	    nv_encoder->dcb->type == DCB_OUTPUT_DP) {
+		if (mode == DRM_MODE_DPMS_ON) {
+			u8 data = DP_SET_POWER_D0;
+			nv_wraux(nv_encoder->i2c, DP_SET_POWER, &data, 1);
+			usleep_range(1000, 2000);
+		} else {
+			u8 data = DP_SET_POWER_D3;
+			nv_wraux(nv_encoder->i2c, DP_SET_POWER, &data, 1);
+		}
+	}
+
+	drm_helper_connector_dpms(connector, mode);
+}
+
+static const struct drm_connector_funcs
+nouveau_connector_funcs_dp = {
+	.dpms = nouveau_connector_dp_dpms,
+	.save = NULL,
+	.restore = NULL,
+	.detect = nouveau_connector_detect,
+	.destroy = nouveau_connector_destroy,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = nouveau_connector_set_property,
+	.force = nouveau_connector_force
+};
+
+static void
 nouveau_connector_hotplug_work(struct work_struct *work)
 {
 	struct nouveau_connector *nv_connector =
-		container_of(work, struct nouveau_connector, hpd_work);
+		container_of(work, typeof(*nv_connector), work);
 	struct drm_connector *connector = &nv_connector->base;
-	struct drm_device *dev = connector->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_gpio *gpio = nouveau_gpio(drm->device);
-	bool plugged = gpio->get(gpio, 0, nv_connector->hpd.func, 0xff);
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	const char *name = connector->name;
 
-	NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un",
-		 drm_get_connector_name(connector));
+	if (nv_connector->status & NVKM_HPD_IRQ) {
+	} else {
+		bool plugged = (nv_connector->status != NVKM_HPD_UNPLUG);
 
-	if (plugged)
-		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-	else
-		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+		NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name);
 
-	drm_helper_hpd_irq_event(dev);
+		if (plugged)
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+		else
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+		drm_helper_hpd_irq_event(connector->dev);
+	}
+
+	nouveau_event_get(nv_connector->hpd);
 }
 
 static int
-nouveau_connector_hotplug(void *data, int index)
+nouveau_connector_hotplug(void *data, u32 type, int index)
 {
 	struct nouveau_connector *nv_connector = data;
-	schedule_work(&nv_connector->hpd_work);
-	return NVKM_EVENT_KEEP;
+	nv_connector->status = type;
+	schedule_work(&nv_connector->work);
+	return NVKM_EVENT_DROP;
+}
+
+static ssize_t
+nouveau_connector_aux_xfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
+{
+	struct nouveau_connector *nv_connector =
+		container_of(aux, typeof(*nv_connector), aux);
+	struct nouveau_encoder *nv_encoder;
+	struct nouveau_i2c_port *port;
+	int ret;
+
+	nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP);
+	if (!nv_encoder || !(port = nv_encoder->i2c))
+		return -ENODEV;
+	if (WARN_ON(msg->size > 16))
+		return -E2BIG;
+	if (msg->size == 0)
+		return msg->size;
+
+	ret = nouveau_i2c(port)->acquire(port, 0);
+	if (ret)
+		return ret;
+
+	ret = port->func->aux(port, false, msg->request, msg->address,
+			      msg->buffer, msg->size);
+	nouveau_i2c(port)->release(port);
+	if (ret >= 0) {
+		msg->reply = ret;
+		return msg->size;
+	}
+
+	return ret;
 }
 
 static int
@@ -974,9 +1038,9 @@
 {
 	const struct drm_connector_funcs *funcs = &nouveau_connector_funcs;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_gpio *gpio = nouveau_gpio(drm->device);
 	struct nouveau_display *disp = nouveau_display(dev);
 	struct nouveau_connector *nv_connector = NULL;
+	struct nouveau_disp *pdisp = nouveau_disp(drm->device);
 	struct drm_connector *connector;
 	int type, ret = 0;
 	bool dummy;
@@ -992,33 +1056,15 @@
 		return ERR_PTR(-ENOMEM);
 
 	connector = &nv_connector->base;
-	INIT_WORK(&nv_connector->hpd_work, nouveau_connector_hotplug_work);
 	nv_connector->index = index;
 
 	/* attempt to parse vbios connector type and hotplug gpio */
 	nv_connector->dcb = olddcb_conn(dev, index);
 	if (nv_connector->dcb) {
-		static const u8 hpd[16] = {
-			0xff, 0x07, 0x08, 0xff, 0xff, 0x51, 0x52, 0xff,
-			0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0x5f, 0x60,
-		};
-
 		u32 entry = ROM16(nv_connector->dcb[0]);
 		if (olddcb_conntab(dev)[3] >= 4)
 			entry |= (u32)ROM16(nv_connector->dcb[2]) << 16;
 
-		ret = gpio->find(gpio, 0, hpd[ffs((entry & 0x07033000) >> 12)],
-				 DCB_GPIO_UNUSED, &nv_connector->hpd);
-		if (ret)
-			nv_connector->hpd.func = DCB_GPIO_UNUSED;
-
-		if (nv_connector->hpd.func != DCB_GPIO_UNUSED) {
-			nouveau_event_new(gpio->events, nv_connector->hpd.line,
-					  nouveau_connector_hotplug,
-					  nv_connector,
-					 &nv_connector->hpd_func);
-		}
-
 		nv_connector->type = nv_connector->dcb[0];
 		if (drm_conntype_from_dcb(nv_connector->type) ==
 					  DRM_MODE_CONNECTOR_Unknown) {
@@ -1040,7 +1086,6 @@
 		}
 	} else {
 		nv_connector->type = DCB_CONNECTOR_NONE;
-		nv_connector->hpd.func = DCB_GPIO_UNUSED;
 	}
 
 	/* no vbios data, or an unknown dcb connector type - attempt to
@@ -1080,8 +1125,8 @@
 		}
 	}
 
-	type = drm_conntype_from_dcb(nv_connector->type);
-	if (type == DRM_MODE_CONNECTOR_LVDS) {
+	switch ((type = drm_conntype_from_dcb(nv_connector->type))) {
+	case DRM_MODE_CONNECTOR_LVDS:
 		ret = nouveau_bios_parse_lvds_table(dev, 0, &dummy, &dummy);
 		if (ret) {
 			NV_ERROR(drm, "Error parsing LVDS table, disabling\n");
@@ -1090,8 +1135,23 @@
 		}
 
 		funcs = &nouveau_connector_funcs_lvds;
-	} else {
+		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+	case DRM_MODE_CONNECTOR_eDP:
+		nv_connector->aux.dev = dev->dev;
+		nv_connector->aux.transfer = nouveau_connector_aux_xfer;
+		ret = drm_dp_aux_register(&nv_connector->aux);
+		if (ret) {
+			NV_ERROR(drm, "failed to register aux channel\n");
+			kfree(nv_connector);
+			return ERR_PTR(ret);
+		}
+
+		funcs = &nouveau_connector_funcs_dp;
+		break;
+	default:
 		funcs = &nouveau_connector_funcs;
+		break;
 	}
 
 	/* defaults, will get overridden in detect() */
@@ -1166,10 +1226,16 @@
 		break;
 	}
 
-	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-	if (nv_connector->hpd.func != DCB_GPIO_UNUSED)
+	ret = nouveau_event_new(pdisp->hpd, NVKM_HPD, index,
+				nouveau_connector_hotplug,
+				nv_connector, &nv_connector->hpd);
+	if (ret)
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+	else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
+	INIT_WORK(&nv_connector->work, nouveau_connector_hotplug_work);
+
 	drm_sysfs_connector_add(connector);
 	return connector;
 }

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index 264a778..8861b6c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h

@@ -28,12 +28,12 @@
 #define __NOUVEAU_CONNECTOR_H__
 
 #include <drm/drm_edid.h>
+#include <drm/drm_dp_helper.h>
 #include "nouveau_crtc.h"
 
 #include <core/event.h>
 
 #include <subdev/bios.h>
-#include <subdev/bios/gpio.h>
 
 struct nouveau_i2c_port;
 
@@ -67,9 +67,11 @@
 	u8 index;
 	u8 *dcb;
 
-	struct dcb_gpio_func hpd;
-	struct work_struct hpd_work;
-	struct nouveau_eventh *hpd_func;
+	struct nouveau_eventh *hpd;
+	u32 status;
+	struct work_struct work;
+
+	struct drm_dp_aux aux;
 
 	int dithering_mode;
 	int dithering_depth;

diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index d1e5890..a053448 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h

@@ -74,7 +74,7 @@
 
 static inline struct nouveau_crtc *nouveau_crtc(struct drm_crtc *crtc)
 {
-	return container_of(crtc, struct nouveau_crtc, base);
+	return crtc ? container_of(crtc, struct nouveau_crtc, base) : NULL;
 }
 
 static inline struct drm_crtc *to_drm_crtc(struct nouveau_crtc *crtc)

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index da764a4..26b5647 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c

@@ -42,7 +42,7 @@
 #include <core/class.h>
 
 static int
-nouveau_display_vblank_handler(void *data, int head)
+nouveau_display_vblank_handler(void *data, u32 type, int head)
 {
 	struct nouveau_drm *drm = data;
 	drm_handle_vblank(drm->dev, head);
@@ -178,7 +178,7 @@
 		return -ENOMEM;
 
 	for (i = 0; i < dev->mode_config.num_crtc; i++) {
-		ret = nouveau_event_new(pdisp->vblank, i,
+		ret = nouveau_event_new(pdisp->vblank, 1, i,
 					nouveau_display_vblank_handler,
 					drm, &disp->vblank[i]);
 		if (ret) {
@@ -393,7 +393,7 @@
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
-		if (conn->hpd_func) nouveau_event_get(conn->hpd_func);
+		if (conn->hpd) nouveau_event_get(conn->hpd);
 	}
 
 	return ret;
@@ -408,7 +408,7 @@
 	/* disable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
-		if (conn->hpd_func) nouveau_event_put(conn->hpd_func);
+		if (conn->hpd) nouveau_event_put(conn->hpd);
 	}
 
 	drm_kms_helper_poll_disable(dev);
@@ -798,6 +798,7 @@
 	struct drm_device *dev = drm->dev;
 	struct nouveau_page_flip_state *s;
 	unsigned long flags;
+	int crtcid = -1;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
@@ -808,8 +809,13 @@
 	}
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
-	if (s->event)
-		drm_send_vblank_event(dev, s->crtc, s->event);
+	if (s->event) {
+		/* Vblank timestamps/counts are only correct on >= NV-50 */
+		if (nv_device(drm->device)->card_type >= NV_50)
+			crtcid = s->crtc;
+
+		drm_send_vblank_event(dev, crtcid, s->event);
+	}
 
 	list_del(&s->head);
 	if (ps)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 36fd225..5675ffc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c

@@ -55,11 +55,10 @@
 
 }
 
-bool
-nouveau_dp_detect(struct drm_encoder *encoder)
+int
+nouveau_dp_detect(struct nouveau_encoder *nv_encoder)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = nv_encoder->base.base.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_i2c_port *auxch;
 	u8 *dpcd = nv_encoder->dp.dpcd;
@@ -67,11 +66,11 @@
 
 	auxch = nv_encoder->i2c;
 	if (!auxch)
-		return false;
+		return -ENODEV;
 
 	ret = nv_rdaux(auxch, DP_DPCD_REV, dpcd, 8);
 	if (ret)
-		return false;
+		return ret;
 
 	nv_encoder->dp.link_bw = 27000 * dpcd[1];
 	nv_encoder->dp.link_nr = dpcd[2] & DP_MAX_LANE_COUNT_MASK;
@@ -91,6 +90,5 @@
 		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw);
 
 	nouveau_dp_probe_oui(dev, auxch, dpcd);
-
-	return true;
+	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index 24660c0..5f0e37f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h

@@ -46,6 +46,7 @@
 	/* different to drm_encoder.crtc, this reflects what's
 	 * actually programmed on the hw, not the proposed crtc */
 	struct drm_crtc *crtc;
+	u32 ctrl;
 
 	struct drm_display_mode mode;
 	int last_dpms;
@@ -84,9 +85,7 @@
 }
 
 /* nouveau_dp.c */
-bool nouveau_dp_detect(struct drm_encoder *);
-void nouveau_dp_dpms(struct drm_encoder *, int mode, u32 datarate,
-		     struct nouveau_object *);
+int nouveau_dp_detect(struct nouveau_encoder *);
 
 struct nouveau_connector *
 nouveau_encoder_connector_get(struct nouveau_encoder *encoder);

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 90074d6..ab5ea3b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c

@@ -166,7 +166,7 @@
 }
 
 static int
-nouveau_fence_wait_uevent_handler(void *data, int index)
+nouveau_fence_wait_uevent_handler(void *data, u32 type, int index)
 {
 	struct nouveau_fence_priv *priv = data;
 	wake_up_all(&priv->waiting);
@@ -183,7 +183,7 @@
 	struct nouveau_eventh *handler;
 	int ret = 0;
 
-	ret = nouveau_event_new(pfifo->uevent, 0,
+	ret = nouveau_event_new(pfifo->uevent, 1, 0,
 				nouveau_fence_wait_uevent_handler,
 				priv, &handler);
 	if (ret)

diff --git a/drivers/gpu/drm/nouveau/nouveau_ioc32.c b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
index c1a7e5a..462679a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ioc32.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ioc32.c

@@ -57,7 +57,7 @@
 		return drm_compat_ioctl(filp, cmd, arg);
 
 #if 0
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(mga_compat_ioctls))
 		fn = nouveau_compat_ioctls[nr - DRM_COMMAND_BASE];
 #endif
 	if (fn != NULL)

diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index fb84da3..4f4c3fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c

@@ -64,12 +64,13 @@
 nouveau_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 58af547..afdf607 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c

@@ -1,4 +1,4 @@
-	/*
+/*
  * Copyright 2011 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,6 +26,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
 
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
@@ -957,7 +958,7 @@
 
 	nv50_display_flip_stop(crtc);
 
-	push = evo_wait(mast, 2);
+	push = evo_wait(mast, 6);
 	if (push) {
 		if (nv50_vers(mast) < NV84_DISP_MAST_CLASS) {
 			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
@@ -1207,6 +1208,7 @@
 nv50_crtc_disable(struct drm_crtc *crtc)
 {
 	struct nv50_head *head = nv50_head(crtc);
+	evo_sync(crtc->dev);
 	if (head->image)
 		nouveau_bo_unpin(head->image);
 	nouveau_bo_ref(NULL, &head->image);
@@ -1700,10 +1702,9 @@
 }
 
 static void
-nv50_hdmi_disconnect(struct drm_encoder *encoder)
+nv50_hdmi_disconnect(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
 	const u32 moff = (nv_crtc->index << 3) | nv_encoder->or;
 
@@ -1722,7 +1723,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct nv50_disp *disp = nv50_disp(dev);
 	struct drm_encoder *partner;
-	int or = nv_encoder->or;
+	u32 mthd;
 
 	nv_encoder->last_dpms = mode;
 
@@ -1740,7 +1741,17 @@
 		}
 	}
 
-	nv_call(disp->core, NV50_DISP_SOR_PWR + or, (mode == DRM_MODE_DPMS_ON));
+	mthd  = (ffs(nv_encoder->dcb->sorconf.link) - 1) << 2;
+	mthd |= nv_encoder->or;
+
+	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
+		nv_call(disp->core, NV50_DISP_SOR_PWR | mthd, 1);
+		mthd |= NV94_DISP_SOR_DP_PWR;
+	} else {
+		mthd |= NV50_DISP_SOR_PWR;
+	}
+
+	nv_call(disp->core, mthd, (mode == DRM_MODE_DPMS_ON));
 }
 
 static bool
@@ -1764,33 +1775,36 @@
 }
 
 static void
+nv50_sor_ctrl(struct nouveau_encoder *nv_encoder, u32 mask, u32 data)
+{
+	struct nv50_mast *mast = nv50_mast(nv_encoder->base.base.dev);
+	u32 temp = (nv_encoder->ctrl & ~mask) | (data & mask), *push;
+	if (temp != nv_encoder->ctrl && (push = evo_wait(mast, 2))) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0600 + (nv_encoder->or * 0x40), 1);
+			evo_data(push, (nv_encoder->ctrl = temp));
+		} else {
+			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
+			evo_data(push, (nv_encoder->ctrl = temp));
+		}
+		evo_kick(push, mast);
+	}
+}
+
+static void
 nv50_sor_disconnect(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	const int or = nv_encoder->or;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		nv50_crtc_prepare(nv_encoder->crtc);
-
-		push = evo_wait(mast, 4);
-		if (push) {
-			if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
-				evo_mthd(push, 0x0600 + (or * 0x40), 1);
-				evo_data(push, 0x00000000);
-			} else {
-				evo_mthd(push, 0x0200 + (or * 0x20), 1);
-				evo_data(push, 0x00000000);
-			}
-			evo_kick(push, mast);
-		}
-
-		nv50_hdmi_disconnect(encoder);
-	}
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
 
 	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
 	nv_encoder->crtc = NULL;
+
+	if (nv_crtc) {
+		nv50_crtc_prepare(&nv_crtc->base);
+		nv50_sor_ctrl(nv_encoder, 1 << nv_crtc->index, 0);
+		nv50_hdmi_disconnect(&nv_encoder->base.base, nv_crtc);
+	}
 }
 
 static void
@@ -1810,12 +1824,14 @@
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nouveau_connector *nv_connector;
 	struct nvbios *bios = &drm->vbios;
-	u32 *push, lvds = 0;
+	u32 lvds = 0, mask, ctrl;
 	u8 owner = 1 << nv_crtc->index;
 	u8 proto = 0xf;
 	u8 depth = 0x0;
 
 	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	nv_encoder->crtc = encoder->crtc;
+
 	switch (nv_encoder->dcb->type) {
 	case DCB_OUTPUT_TMDS:
 		if (nv_encoder->dcb->sorconf.link & 1) {
@@ -1827,7 +1843,7 @@
 			proto = 0x2;
 		}
 
-		nv50_hdmi_mode_set(encoder, mode);
+		nv50_hdmi_mode_set(&nv_encoder->base.base, mode);
 		break;
 	case DCB_OUTPUT_LVDS:
 		proto = 0x0;
@@ -1883,19 +1899,11 @@
 		break;
 	}
 
-	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+	nv50_sor_dpms(&nv_encoder->base.base, DRM_MODE_DPMS_ON);
 
-	push = evo_wait(nv50_mast(dev), 8);
-	if (push) {
-		if (nv50_vers(mast) < NVD0_DISP_CLASS) {
-			u32 ctrl = (depth << 16) | (proto << 8) | owner;
-			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-				ctrl |= 0x00001000;
-			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-				ctrl |= 0x00002000;
-			evo_mthd(push, 0x0600 + (nv_encoder->or * 0x040), 1);
-			evo_data(push, ctrl);
-		} else {
+	if (nv50_vers(mast) >= NVD0_DISP_CLASS) {
+		u32 *push = evo_wait(mast, 3);
+		if (push) {
 			u32 magic = 0x31ec6000 | (nv_crtc->index << 25);
 			u32 syncs = 0x00000001;
 
@@ -1910,14 +1918,21 @@
 			evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
 			evo_data(push, syncs | (depth << 6));
 			evo_data(push, magic);
-			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x020), 1);
-			evo_data(push, owner | (proto << 8));
+			evo_kick(push, mast);
 		}
 
-		evo_kick(push, mast);
+		ctrl = proto << 8;
+		mask = 0x00000f00;
+	} else {
+		ctrl = (depth << 16) | (proto << 8);
+		if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+			ctrl |= 0x00001000;
+		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+			ctrl |= 0x00002000;
+		mask = 0x000f3f00;
 	}
 
-	nv_encoder->crtc = encoder->crtc;
+	nv50_sor_ctrl(nv_encoder, mask | owner, ctrl | owner);
 }
 
 static void
@@ -2295,7 +2310,7 @@
 			continue;
 
 		NV_WARN(drm, "%s has no encoders, removing\n",
-			drm_get_connector_name(connector));
+			connector->name);
 		connector->funcs->destroy(connector);
 	}
 

diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index e3c47a8..2d28dc3 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c

@@ -319,13 +319,13 @@
 	struct drm_display_mode *mode = &crtc->mode;
 	struct drm_gem_object *bo;
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	omap_plane_mode_set(omap_crtc->plane, crtc, crtc->primary->fb,
 			0, 0, mode->hdisplay, mode->vdisplay,
 			crtc->x << 16, crtc->y << 16,
 			mode->hdisplay << 16, mode->vdisplay << 16,
 			vblank_cb, crtc);
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	bo = omap_framebuffer_bo(crtc->primary->fb, 0);
 	drm_gem_object_unreference_unlocked(bo);
@@ -465,7 +465,7 @@
 	 * the callbacks and list modification all serialized
 	 * with respect to modesetting ioctls from userspace.
 	 */
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	dispc_runtime_get();
 
 	/*
@@ -510,7 +510,7 @@
 
 out:
 	dispc_runtime_put();
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 }
 
 int omap_crtc_apply(struct drm_crtc *crtc,
@@ -518,7 +518,7 @@
 {
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
 
-	WARN_ON(!mutex_is_locked(&crtc->mutex));
+	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
 	/* no need to queue it again if it is already queued: */
 	if (apply->queued)

diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index c8270e4..002b972 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c

@@ -588,9 +588,7 @@
 		}
 	}
 
-	drm_modeset_lock_all(dev);
-	ret = drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-	drm_modeset_unlock_all(dev);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 	if (ret)
 		DBG("failed to restore crtc mode");
 }

diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 8b01960..2a5cacd 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c

@@ -346,6 +346,7 @@
 
 	VERB("flush: %d,%d %dx%d, fb=%p", x, y, w, h, fb);
 
+	/* FIXME: This is racy - no protection against modeset config changes. */
 	while ((connector = omap_framebuffer_get_next_connector(fb, connector))) {
 		/* only consider connectors that are part of a chain */
 		if (connector->encoder && connector->encoder->crtc) {

diff --git a/drivers/gpu/drm/panel/panel-ld9040.c b/drivers/gpu/drm/panel/panel-ld9040.c
index 1f1f837..db1601f 100644
--- a/drivers/gpu/drm/panel/panel-ld9040.c
+++ b/drivers/gpu/drm/panel/panel-ld9040.c

@@ -27,6 +27,7 @@
 #define MCS_ELVSS_ON		0xb1
 #define MCS_USER_SETTING	0xf0
 #define MCS_DISPCTL		0xf2
+#define MCS_POWER_CTRL		0xf4
 #define MCS_GTCON		0xf7
 #define MCS_PANEL_CONDITION	0xf8
 #define MCS_GAMMA_SET1		0xf9
@@ -182,6 +183,8 @@
 	ld9040_dcs_write_seq_static(ctx, MCS_DISPCTL,
 		0x02, 0x08, 0x08, 0x10, 0x10);
 	ld9040_dcs_write_seq_static(ctx, MCS_MANPWR, 0x04);
+	ld9040_dcs_write_seq_static(ctx, MCS_POWER_CTRL,
+		0x0a, 0x87, 0x25, 0x6a, 0x44, 0x02, 0x88);
 	ld9040_dcs_write_seq_static(ctx, MCS_ELVSS_ON, 0x0d, 0x00, 0x16);
 	ld9040_dcs_write_seq_static(ctx, MCS_GTCON, 0x09, 0x00, 0x00);
 	ld9040_brightness_set(ctx);

diff --git a/drivers/gpu/drm/panel/panel-s6e8aa0.c b/drivers/gpu/drm/panel/panel-s6e8aa0.c
index 35941d2..06e57a2 100644
--- a/drivers/gpu/drm/panel/panel-s6e8aa0.c
+++ b/drivers/gpu/drm/panel/panel-s6e8aa0.c

@@ -847,6 +847,7 @@
 	if (i >= ARRAY_SIZE(s6e8aa0_variants)) {
 		dev_err(ctx->dev, "unsupported display version %d\n", id[1]);
 		ctx->error = -EINVAL;
+		return;
 	}
 
 	ctx->variant = &s6e8aa0_variants[i];

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 309f29e..a251361 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c

@@ -262,6 +262,13 @@
 	return 0;
 }
 
+static void panel_simple_shutdown(struct device *dev)
+{
+	struct panel_simple *panel = dev_get_drvdata(dev);
+
+	panel_simple_disable(&panel->base);
+}
+
 static const struct drm_display_mode auo_b101aw03_mode = {
 	.clock = 51450,
 	.hdisplay = 1024,
@@ -284,6 +291,28 @@
 	},
 };
 
+static const struct drm_display_mode auo_b133xtn01_mode = {
+	.clock = 69500,
+	.hdisplay = 1366,
+	.hsync_start = 1366 + 48,
+	.hsync_end = 1366 + 48 + 32,
+	.htotal = 1366 + 48 + 32 + 20,
+	.vdisplay = 768,
+	.vsync_start = 768 + 3,
+	.vsync_end = 768 + 3 + 6,
+	.vtotal = 768 + 3 + 6 + 13,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc auo_b133xtn01 = {
+	.modes = &auo_b133xtn01_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 293,
+		.height = 165,
+	},
+};
+
 static const struct drm_display_mode chunghwa_claa101wa01a_mode = {
 	.clock = 72070,
 	.hdisplay = 1366,
@@ -328,6 +357,52 @@
 	},
 };
 
+static const struct drm_display_mode edt_et057090dhu_mode = {
+	.clock = 25175,
+	.hdisplay = 640,
+	.hsync_start = 640 + 16,
+	.hsync_end = 640 + 16 + 30,
+	.htotal = 640 + 16 + 30 + 114,
+	.vdisplay = 480,
+	.vsync_start = 480 + 10,
+	.vsync_end = 480 + 10 + 3,
+	.vtotal = 480 + 10 + 3 + 32,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc edt_et057090dhu = {
+	.modes = &edt_et057090dhu_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 115,
+		.height = 86,
+	},
+};
+
+static const struct drm_display_mode edt_etm0700g0dh6_mode = {
+	.clock = 33260,
+	.hdisplay = 800,
+	.hsync_start = 800 + 40,
+	.hsync_end = 800 + 40 + 128,
+	.htotal = 800 + 40 + 128 + 88,
+	.vdisplay = 480,
+	.vsync_start = 480 + 10,
+	.vsync_end = 480 + 10 + 2,
+	.vtotal = 480 + 10 + 2 + 33,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc edt_etm0700g0dh6 = {
+	.modes = &edt_etm0700g0dh6_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 152,
+		.height = 91,
+	},
+};
+
 static const struct drm_display_mode lg_lp129qe_mode = {
 	.clock = 285250,
 	.hdisplay = 2560,
@@ -377,12 +452,24 @@
 		.compatible = "auo,b101aw03",
 		.data = &auo_b101aw03,
 	}, {
+		.compatible = "auo,b133xtn01",
+		.data = &auo_b133xtn01,
+	}, {
 		.compatible = "chunghwa,claa101wa01a",
 		.data = &chunghwa_claa101wa01a
 	}, {
 		.compatible = "chunghwa,claa101wb01",
 		.data = &chunghwa_claa101wb01
 	}, {
+		.compatible = "edt,et057090dhu",
+		.data = &edt_et057090dhu,
+	}, {
+		.compatible = "edt,et070080dh6",
+		.data = &edt_etm0700g0dh6,
+	}, {
+		.compatible = "edt,etm0700g0dh6",
+		.data = &edt_etm0700g0dh6,
+	}, {
 		.compatible = "lg,lp129qe",
 		.data = &lg_lp129qe,
 	}, {
@@ -412,6 +499,11 @@
 	return panel_simple_remove(&pdev->dev);
 }
 
+static void panel_simple_platform_shutdown(struct platform_device *pdev)
+{
+	panel_simple_shutdown(&pdev->dev);
+}
+
 static struct platform_driver panel_simple_platform_driver = {
 	.driver = {
 		.name = "panel-simple",
@@ -420,6 +512,7 @@
 	},
 	.probe = panel_simple_platform_probe,
 	.remove = panel_simple_platform_remove,
+	.shutdown = panel_simple_platform_shutdown,
 };
 
 struct panel_desc_dsi {
@@ -561,6 +654,11 @@
 	return panel_simple_remove(&dsi->dev);
 }
 
+static void panel_simple_dsi_shutdown(struct mipi_dsi_device *dsi)
+{
+	panel_simple_shutdown(&dsi->dev);
+}
+
 static struct mipi_dsi_driver panel_simple_dsi_driver = {
 	.driver = {
 		.name = "panel-simple-dsi",
@@ -569,6 +667,7 @@
 	},
 	.probe = panel_simple_dsi_probe,
 	.remove = panel_simple_dsi_remove,
+	.shutdown = panel_simple_dsi_shutdown,
 };
 
 static int __init panel_simple_init(void)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 41bdd17..5d7ea24 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c

@@ -574,6 +574,10 @@
 			   bo->surf.height, bo->surf.stride, bo->surf.format);
 		qxl_io_create_primary(qdev, base_offset, bo);
 		bo->is_primary = true;
+	}
+
+	if (bo->is_primary) {
+		DRM_DEBUG_KMS("setting surface_id to 0 for primary surface %d on crtc %d\n", bo->surface_id, qcrtc->index);
 		surf_id = 0;
 	} else {
 		surf_id = bo->surface_id;
@@ -841,7 +845,7 @@
 	.save = qxl_conn_save,
 	.restore = qxl_conn_restore,
 	.detect = qxl_conn_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
+	.fill_modes = drm_helper_probe_single_connector_modes_nomerge,
 	.set_property = qxl_conn_set_property,
 	.destroy = qxl_conn_destroy,
 };

diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index fee8748..6e93663 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c

@@ -214,7 +214,6 @@
 static struct drm_driver qxl_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET |
 			   DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED,
-	.dev_priv_size = 0,
 	.load = qxl_driver_load,
 	.unload = qxl_driver_unload,
 

diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 0bb86e6..b110883 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c

@@ -451,4 +451,4 @@
 			  DRM_AUTH|DRM_UNLOCKED),
 };
 
-int qxl_max_ioctls = DRM_ARRAY_SIZE(qxl_ioctls);
+int qxl_max_ioctls = ARRAY_SIZE(qxl_ioctls);

diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 28f84b4..34d6a85 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c

@@ -87,7 +87,7 @@
 	atomic_set(&qdev->irq_received_cursor, 0);
 	atomic_set(&qdev->irq_received_io_cmd, 0);
 	qdev->irq_received_error = 0;
-	ret = drm_irq_install(qdev->ddev);
+	ret = drm_irq_install(qdev->ddev, qdev->ddev->pdev->irq);
 	qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed installing irq: %d\n", ret);

diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index d52c275..71a1bae 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c

@@ -109,13 +109,11 @@
 static int qxl_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo;
-	struct qxl_device *qdev;
 	int r;
 
 	bo = (struct ttm_buffer_object *)vma->vm_private_data;
 	if (bo == NULL)
 		return VM_FAULT_NOPAGE;
-	qdev = qxl_get_qdev(bo->bdev);
 	r = ttm_vm_ops->fault(vma, vmf);
 	return r;
 }
@@ -162,10 +160,6 @@
 static int qxl_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 			     struct ttm_mem_type_manager *man)
 {
-	struct qxl_device *qdev;
-
-	qdev = qxl_get_qdev(bdev);
-
 	switch (type) {
 	case TTM_PL_SYSTEM:
 		/* System memory */

diff --git a/drivers/gpu/drm/r128/r128_ioc32.c b/drivers/gpu/drm/r128/r128_ioc32.c
index b0d0fd3..663f38c 100644
--- a/drivers/gpu/drm/r128/r128_ioc32.c
+++ b/drivers/gpu/drm/r128/r128_ioc32.c

@@ -203,7 +203,7 @@
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(r128_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(r128_compat_ioctls))
 		fn = r128_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)

diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index e806dac..575e986 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c

@@ -1594,7 +1594,7 @@
 
 	switch (param->param) {
 	case R128_PARAM_IRQ_NR:
-		value = drm_dev_to_irq(dev);
+		value = dev->pdev->irq;
 		break;
 	default:
 		return -EINVAL;
@@ -1641,4 +1641,4 @@
 	DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH),
 };
 
-int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls);
+int r128_max_ioctl = ARRAY_SIZE(r128_ioctls);

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0943353..dbcbfe8 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile

@@ -72,7 +72,7 @@
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit_shaders.o \
-	radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o dce3_1_afmt.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o \
 	evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
 	atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e911898..26c12a3 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c

@@ -557,6 +557,7 @@
 	u32 adjusted_clock = mode->clock;
 	int encoder_mode = atombios_get_encoder_mode(encoder);
 	u32 dp_clock = mode->clock;
+	u32 clock = mode->clock;
 	int bpc = radeon_crtc->bpc;
 	bool is_duallink = radeon_dig_monitor_is_duallink(encoder, mode->clock);
 
@@ -632,6 +633,24 @@
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 	}
 
+	/* adjust pll for deep color modes */
+	if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+		switch (bpc) {
+		case 8:
+		default:
+			break;
+		case 10:
+			clock = (clock * 5) / 4;
+			break;
+		case 12:
+			clock = (clock * 3) / 2;
+			break;
+		case 16:
+			clock = clock * 2;
+			break;
+		}
+	}
+
 	/* DCE3+ has an AdjustDisplayPll that will adjust the pixel clock
 	 * accordingly based on the encoder/transmitter to work around
 	 * special hw requirements.
@@ -653,7 +672,7 @@
 			switch (crev) {
 			case 1:
 			case 2:
-				args.v1.usPixelClock = cpu_to_le16(mode->clock / 10);
+				args.v1.usPixelClock = cpu_to_le16(clock / 10);
 				args.v1.ucTransmitterID = radeon_encoder->encoder_id;
 				args.v1.ucEncodeMode = encoder_mode;
 				if (radeon_crtc->ss_enabled && radeon_crtc->ss.percentage)
@@ -665,7 +684,7 @@
 				adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
 				break;
 			case 3:
-				args.v3.sInput.usPixelClock = cpu_to_le16(mode->clock / 10);
+				args.v3.sInput.usPixelClock = cpu_to_le16(clock / 10);
 				args.v3.sInput.ucTransmitterID = radeon_encoder->encoder_id;
 				args.v3.sInput.ucEncodeMode = encoder_mode;
 				args.v3.sInput.ucDispPllConfig = 0;
@@ -679,10 +698,6 @@
 					args.v3.sInput.usPixelClock = cpu_to_le16(dp_clock / 10);
 				} else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
 					struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
-					if (encoder_mode == ATOM_ENCODER_MODE_HDMI)
-						/* deep color support */
-						args.v3.sInput.usPixelClock =
-							cpu_to_le16((mode->clock * bpc / 8) / 10);
 					if (dig->coherent_mode)
 						args.v3.sInput.ucDispPllConfig |=
 							DISPPLL_CONFIG_COHERENT_MODE;
@@ -862,14 +877,21 @@
 			args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
 			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
 				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_REF_DIV_SRC;
-			switch (bpc) {
-			case 8:
-			default:
-				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP;
-				break;
-			case 10:
-				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP;
-				break;
+			if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+				switch (bpc) {
+				case 8:
+				default:
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP;
+					break;
+				case 10:
+					/* yes this is correct, the atom define is wrong */
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_32BPP;
+					break;
+				case 12:
+					/* yes this is correct, the atom define is wrong */
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP;
+					break;
+				}
 			}
 			args.v5.ucTransmitterID = encoder_id;
 			args.v5.ucEncoderMode = encoder_mode;
@@ -884,20 +906,22 @@
 			args.v6.ucMiscInfo = 0; /* HDMI depth, etc. */
 			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
 				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_REF_DIV_SRC;
-			switch (bpc) {
-			case 8:
-			default:
-				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP;
-				break;
-			case 10:
-				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP;
-				break;
-			case 12:
-				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP;
-				break;
-			case 16:
-				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP;
-				break;
+			if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+				switch (bpc) {
+				case 8:
+				default:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP;
+					break;
+				case 10:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP_V6;
+					break;
+				case 12:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP_V6;
+					break;
+				case 16:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP;
+					break;
+				}
 			}
 			args.v6.ucTransmitterID = encoder_id;
 			args.v6.ucEncoderMode = encoder_mode;
@@ -938,6 +962,9 @@
 		struct radeon_connector_atom_dig *dig_connector =
 			radeon_connector->con_priv;
 		int dp_clock;
+
+		/* Assign mode clock for hdmi deep color max clock limit check */
+		radeon_connector->pixelclock_for_modeset = mode->clock;
 		radeon_crtc->bpc = radeon_get_monitor_bpc(connector);
 
 		switch (encoder_mode) {
@@ -1019,10 +1046,17 @@
 	struct radeon_encoder *radeon_encoder =
 		to_radeon_encoder(radeon_crtc->encoder);
 	u32 pll_clock = mode->clock;
+	u32 clock = mode->clock;
 	u32 ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
 	struct radeon_pll *pll;
 	int encoder_mode = atombios_get_encoder_mode(radeon_crtc->encoder);
 
+	/* pass the actual clock to atombios_crtc_program_pll for DCE5,6 for HDMI */
+	if (ASIC_IS_DCE5(rdev) && !ASIC_IS_DCE8(rdev) &&
+	    (encoder_mode == ATOM_ENCODER_MODE_HDMI) &&
+	    (radeon_crtc->bpc > 8))
+		clock = radeon_crtc->adjusted_clock;
+
 	switch (radeon_crtc->pll_id) {
 	case ATOM_PPLL1:
 		pll = &rdev->clock.p1pll;
@@ -1057,7 +1091,7 @@
 				 radeon_crtc->crtc_id, &radeon_crtc->ss);
 
 	atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
-				  encoder_mode, radeon_encoder->encoder_id, mode->clock,
+				  encoder_mode, radeon_encoder->encoder_id, clock,
 				  ref_div, fb_div, frac_fb_div, post_div,
 				  radeon_crtc->bpc, radeon_crtc->ss_enabled, &radeon_crtc->ss);
 

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 54e4f52..c5b1f2d 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c

@@ -95,9 +95,12 @@
 	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
 	unsigned char *base;
 	int recv_bytes;
+	int r = 0;
 
 	memset(&args, 0, sizeof(args));
 
+	mutex_lock(&chan->mutex);
+
 	base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);
 
 	radeon_atom_copy_swap(base, send, send_bytes, true);
@@ -117,19 +120,22 @@
 	/* timeout */
 	if (args.v1.ucReplyStatus == 1) {
 		DRM_DEBUG_KMS("dp_aux_ch timeout\n");
-		return -ETIMEDOUT;
+		r = -ETIMEDOUT;
+		goto done;
 	}
 
 	/* flags not zero */
 	if (args.v1.ucReplyStatus == 2) {
 		DRM_DEBUG_KMS("dp_aux_ch flags not zero\n");
-		return -EBUSY;
+		r = -EBUSY;
+		goto done;
 	}
 
 	/* error */
 	if (args.v1.ucReplyStatus == 3) {
 		DRM_DEBUG_KMS("dp_aux_ch error\n");
-		return -EIO;
+		r = -EIO;
+		goto done;
 	}
 
 	recv_bytes = args.v1.ucDataOutLen;
@@ -139,7 +145,11 @@
 	if (recv && recv_size)
 		radeon_atom_copy_swap(recv, base + 16, recv_bytes, false);
 
-	return recv_bytes;
+	r = recv_bytes;
+done:
+	mutex_unlock(&chan->mutex);
+
+	return r;
 }
 
 #define BARE_ADDRESS_SIZE 3
@@ -212,11 +222,12 @@
 	radeon_connector->ddc_bus->rec.hpd = radeon_connector->hpd.hpd;
 	radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev;
 	radeon_connector->ddc_bus->aux.transfer = radeon_dp_aux_transfer;
-	ret = drm_dp_aux_register_i2c_bus(&radeon_connector->ddc_bus->aux);
+
+	ret = drm_dp_aux_register(&radeon_connector->ddc_bus->aux);
 	if (!ret)
 		radeon_connector->ddc_bus->has_aux = true;
 
-	WARN(ret, "drm_dp_aux_register_i2c_bus() failed with error %d\n", ret);
+	WARN(ret, "drm_dp_aux_register() failed with error %d\n", ret);
 }
 
 /***** general DP utility functions *****/
@@ -281,6 +292,19 @@
 
 /***** radeon specific DP functions *****/
 
+static int radeon_dp_get_max_link_rate(struct drm_connector *connector,
+				       u8 dpcd[DP_DPCD_SIZE])
+{
+	int max_link_rate;
+
+	if (radeon_connector_is_dp12_capable(connector))
+		max_link_rate = min(drm_dp_max_link_rate(dpcd), 540000);
+	else
+		max_link_rate = min(drm_dp_max_link_rate(dpcd), 270000);
+
+	return max_link_rate;
+}
+
 /* First get the min lane# when low rate is used according to pixel clock
  * (prefer low rate), second check max lane# supported by DP panel,
  * if the max lane# < low rate lane# then use max lane# instead.
@@ -290,7 +314,7 @@
 					int pix_clock)
 {
 	int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
-	int max_link_rate = drm_dp_max_link_rate(dpcd);
+	int max_link_rate = radeon_dp_get_max_link_rate(connector, dpcd);
 	int max_lane_num = drm_dp_max_lane_count(dpcd);
 	int lane_num;
 	int max_dp_pix_clock;
@@ -328,7 +352,7 @@
 			return 540000;
 	}
 
-	return drm_dp_max_link_rate(dpcd);
+	return radeon_dp_get_max_link_rate(connector, dpcd);
 }
 
 static u8 radeon_dp_encoder_service(struct radeon_device *rdev,

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index e6eb509..2b29084 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c

@@ -1884,8 +1884,11 @@
 					args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT;
 				else
 					args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder);
-			} else
+			} else if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+			} else {
 				args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder);
+			}
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:

diff --git a/drivers/gpu/drm/radeon/atombios_i2c.c b/drivers/gpu/drm/radeon/atombios_i2c.c
index b5162c3..9c570fb 100644
--- a/drivers/gpu/drm/radeon/atombios_i2c.c
+++ b/drivers/gpu/drm/radeon/atombios_i2c.c

@@ -43,15 +43,19 @@
 	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
 	unsigned char *base;
 	u16 out = cpu_to_le16(0);
+	int r = 0;
 
 	memset(&args, 0, sizeof(args));
 
+	mutex_lock(&chan->mutex);
+
 	base = (unsigned char *)rdev->mode_info.atom_context->scratch;
 
 	if (flags & HW_I2C_WRITE) {
 		if (num > ATOM_MAX_HW_I2C_WRITE) {
 			DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 3)\n", num);
-			return -EINVAL;
+			r = -EINVAL;
+			goto done;
 		}
 		if (buf == NULL)
 			args.ucRegIndex = 0;
@@ -65,7 +69,8 @@
 	} else {
 		if (num > ATOM_MAX_HW_I2C_READ) {
 			DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num);
-			return -EINVAL;
+			r = -EINVAL;
+			goto done;
 		}
 		args.ucRegIndex = 0;
 		args.lpI2CDataOut = 0;
@@ -82,13 +87,17 @@
 	/* error */
 	if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
 		DRM_DEBUG_KMS("hw_i2c error\n");
-		return -EIO;
+		r = -EIO;
+		goto done;
 	}
 
 	if (!(flags & HW_I2C_WRITE))
 		radeon_atom_copy_swap(buf, base, num, false);
 
-	return 0;
+done:
+	mutex_unlock(&chan->mutex);
+
+	return r;
 }
 
 int radeon_atom_hw_i2c_xfer(struct i2c_adapter *i2c_adap,

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index d2fd989..dcd4518 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c

@@ -80,6 +80,7 @@
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3257,7 +3258,7 @@
 	u32 mc_shared_chmap, mc_arb_ramcfg;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
@@ -3446,6 +3447,15 @@
 		     rdev->config.cik.max_sh_per_se,
 		     rdev->config.cik.max_backends_per_se);
 
+	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+				rdev->config.cik.active_cus +=
+					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
+
 	/* set HW defaults for 3D engine */
 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
@@ -3698,7 +3708,7 @@
 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
 
 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
-	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
 
 	return true;
@@ -3818,7 +3828,7 @@
 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
 			radeon_ring_write(ring, next_rptr);
 		}
 
@@ -5396,6 +5406,7 @@
 	WREG32(MC_VM_MX_L1_TLB_CNTL,
 	       (0xA << 7) |
 	       ENABLE_L1_TLB |
+	       ENABLE_L1_FRAGMENT_PROCESSING |
 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
 	       ENABLE_ADVANCED_DRIVER_MODEL |
 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
@@ -5408,7 +5419,8 @@
 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+	       BANK_SELECT(4) |
+	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
 	/* setup context0 */
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
@@ -5444,6 +5456,7 @@
 	       (u32)(rdev->dummy_page.addr >> 12));
 	WREG32(VM_CONTEXT1_CNTL2, 4);
 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
@@ -7450,7 +7463,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_flip(rdev, 0);
+						radeon_crtc_handle_vblank(rdev, 0);
 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -7476,7 +7489,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_flip(rdev, 1);
+						radeon_crtc_handle_vblank(rdev, 1);
 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}
@@ -7502,7 +7515,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_flip(rdev, 2);
+						radeon_crtc_handle_vblank(rdev, 2);
 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D3 vblank\n");
 				}
@@ -7528,7 +7541,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_flip(rdev, 3);
+						radeon_crtc_handle_vblank(rdev, 3);
 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D4 vblank\n");
 				}
@@ -7554,7 +7567,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_flip(rdev, 4);
+						radeon_crtc_handle_vblank(rdev, 4);
 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D5 vblank\n");
 				}
@@ -7580,7 +7593,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_flip(rdev, 5);
+						radeon_crtc_handle_vblank(rdev, 5);
 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D6 vblank\n");
 				}

diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 72e464c..8e9d0f1 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c

@@ -141,7 +141,7 @@
 		next_rptr += 4;
 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
 		radeon_ring_write(ring, 1); /* number of DWs to follow */
 		radeon_ring_write(ring, next_rptr);
 	}
@@ -151,7 +151,7 @@
 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
 	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
-	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	radeon_ring_write(ring, ib->length_dw);
 
 }
@@ -203,8 +203,8 @@
 
 	/* write the fence */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
-	radeon_ring_write(ring, addr & 0xffffffff);
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
+	radeon_ring_write(ring, upper_32_bits(addr));
 	radeon_ring_write(ring, fence->seq);
 	/* generate an interrupt */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
@@ -233,7 +233,7 @@
 
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
 	radeon_ring_write(ring, addr & 0xfffffff8);
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	radeon_ring_write(ring, upper_32_bits(addr));
 
 	return true;
 }
@@ -551,10 +551,10 @@
 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
 		radeon_ring_write(ring, cur_size_in_bytes);
 		radeon_ring_write(ring, 0); /* src/dst endian swap */
-		radeon_ring_write(ring, src_offset & 0xffffffff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
-		radeon_ring_write(ring, dst_offset & 0xffffffff);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
+		radeon_ring_write(ring, lower_32_bits(src_offset));
+		radeon_ring_write(ring, upper_32_bits(src_offset));
+		radeon_ring_write(ring, lower_32_bits(dst_offset));
+		radeon_ring_write(ring, upper_32_bits(dst_offset));
 		src_offset += cur_size_in_bytes;
 		dst_offset += cur_size_in_bytes;
 	}
@@ -605,7 +605,7 @@
 	}
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
 	radeon_ring_write(ring, 1); /* number of DWs to follow */
 	radeon_ring_write(ring, 0xDEADBEEF);
 	radeon_ring_unlock_commit(rdev, ring);
@@ -660,7 +660,7 @@
 
 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
+	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr);
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
@@ -742,7 +742,26 @@
 
 	trace_radeon_vm_set_page(pe, addr, count, incr, flags);
 
-	if (flags & R600_PTE_SYSTEM) {
+	if (flags == R600_PTE_GART) {
+		uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
+		while (count) {
+			unsigned bytes = count * 8;
+			if (bytes > 0x1FFFF8)
+				bytes = 0x1FFFF8;
+
+			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+			ib->ptr[ib->length_dw++] = bytes;
+			ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+			ib->ptr[ib->length_dw++] = lower_32_bits(src);
+			ib->ptr[ib->length_dw++] = upper_32_bits(src);
+			ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+			pe += bytes;
+			src += bytes;
+			count -= bytes / 8;
+		}
+	} else if (flags & R600_PTE_SYSTEM) {
 		while (count) {
 			ndw = count * 2;
 			if (ndw > 0xFFFFE)

diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index dd79263..ae88660 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h

@@ -482,6 +482,7 @@
 #define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
 #define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
 #define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
+#define		PAGE_TABLE_BLOCK_SIZE(x)			(((x) & 0xF) << 24)
 #define VM_CONTEXT1_CNTL				0x1414
 #define VM_CONTEXT0_CNTL2				0x1430
 #define VM_CONTEXT1_CNTL2				0x1434

diff --git a/drivers/gpu/drm/radeon/clearstate_cayman.h b/drivers/gpu/drm/radeon/clearstate_cayman.h
index aa908c5..e48a140 100644
--- a/drivers/gpu/drm/radeon/clearstate_cayman.h
+++ b/drivers/gpu/drm/radeon/clearstate_cayman.h

@@ -1050,7 +1050,7 @@
     {SECT_CONTEXT_def_5, 0x0000a29e, 5 },
     {SECT_CONTEXT_def_6, 0x0000a2a5, 56 },
     {SECT_CONTEXT_def_7, 0x0000a2de, 290 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CLEAR_def_1[] =
 {
@@ -1061,7 +1061,7 @@
 static const struct cs_extent_def SECT_CLEAR_defs[] =
 {
     {SECT_CLEAR_def_1, 0x0000ffc0, 3 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CTRLCONST_def_1[] =
 {
@@ -1071,11 +1071,11 @@
 static const struct cs_extent_def SECT_CTRLCONST_defs[] =
 {
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def cayman_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };

diff --git a/drivers/gpu/drm/radeon/clearstate_ci.h b/drivers/gpu/drm/radeon/clearstate_ci.h
index c3982f9..f55d066 100644
--- a/drivers/gpu/drm/radeon/clearstate_ci.h
+++ b/drivers/gpu/drm/radeon/clearstate_ci.h

@@ -936,9 +936,9 @@
     {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
     {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
     {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def ci_cs_data[] = {
     { ci_SECT_CONTEXT_defs, SECT_CONTEXT },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };

diff --git a/drivers/gpu/drm/radeon/clearstate_si.h b/drivers/gpu/drm/radeon/clearstate_si.h
index b994cb2..66e39cd 100644
--- a/drivers/gpu/drm/radeon/clearstate_si.h
+++ b/drivers/gpu/drm/radeon/clearstate_si.h

@@ -933,9 +933,9 @@
     {si_SECT_CONTEXT_def_5, 0x0000a2a1, 1 },
     {si_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
     {si_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def si_cs_data[] = {
     { si_SECT_CONTEXT_defs, SECT_CONTEXT },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };

diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
new file mode 100644
index 0000000..51800e3
--- /dev/null
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c

@@ -0,0 +1,244 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * Copyright 2014 Rafał Miłecki
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/hdmi.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "r600d.h"
+
+static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	u32 tmp;
+	u8 *sadb;
+	int sad_count;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			radeon_connector = to_radeon_connector(connector);
+			break;
+		}
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		return;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER);
+	tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK);
+	/* set HDMI mode */
+	tmp |= HDMI_CONNECTION;
+	if (sad_count)
+		tmp |= SPEAKER_ALLOCATION(sadb[0]);
+	else
+		tmp |= SPEAKER_ALLOCATION(5); /* stereo */
+	WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			radeon_connector = to_radeon_connector(connector);
+			break;
+		}
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+		return;
+	}
+	BUG_ON(!sads);
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 value = 0;
+		u8 stereo_freqs = 0;
+		int max_channels = -1;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				if (sad->channels > max_channels) {
+					value = MAX_CHANNELS(sad->channels) |
+						DESCRIPTOR_BYTE_2(sad->byte2) |
+						SUPPORTED_FREQUENCIES(sad->freq);
+					max_channels = sad->channels;
+				}
+
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					stereo_freqs |= sad->freq;
+				else
+					break;
+			}
+		}
+
+		value |= SUPPORTED_FREQUENCIES_STEREO(stereo_freqs);
+
+		WREG32(eld_reg_to_type[i][0], value);
+	}
+
+	kfree(sads);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+void dce3_1_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+	struct hdmi_avi_infoframe frame;
+	uint32_t offset;
+	ssize_t err;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (!dig->afmt->enabled)
+		return;
+	offset = dig->afmt->offset;
+
+	/* disable audio prior to setting up hw */
+	dig->afmt->pin = r600_audio_get_pin(rdev);
+	r600_audio_enable(rdev, dig->afmt->pin, false);
+
+	r600_audio_set_dto(encoder, mode->clock);
+
+	WREG32(HDMI0_VBI_PACKET_CONTROL + offset,
+	       HDMI0_NULL_SEND); /* send null packets when required */
+
+	WREG32(HDMI0_AUDIO_CRC_CONTROL + offset, 0x1000);
+
+	if (ASIC_IS_DCE32(rdev)) {
+		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
+		       HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
+		       HDMI0_AUDIO_PACKETS_PER_LINE(3)); /* should be suffient for all audio modes and small enough for all hblanks */
+		WREG32(AFMT_AUDIO_PACKET_CONTROL + offset,
+		       AFMT_AUDIO_SAMPLE_SEND | /* send audio packets */
+		       AFMT_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
+	} else {
+		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
+		       HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
+		       HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
+		       HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
+		       HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
+	}
+
+	if (ASIC_IS_DCE32(rdev)) {
+		dce3_2_afmt_write_speaker_allocation(encoder);
+		dce3_2_afmt_write_sad_regs(encoder);
+	}
+
+	WREG32(HDMI0_ACR_PACKET_CONTROL + offset,
+	       HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */
+	       HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
+
+	WREG32(HDMI0_VBI_PACKET_CONTROL + offset,
+	       HDMI0_NULL_SEND | /* send null packets when required */
+	       HDMI0_GC_SEND | /* send general control packets */
+	       HDMI0_GC_CONT); /* send general control packets every frame */
+
+	/* TODO: HDMI0_AUDIO_INFO_UPDATE */
+	WREG32(HDMI0_INFOFRAME_CONTROL0 + offset,
+	       HDMI0_AVI_INFO_SEND | /* enable AVI info frames */
+	       HDMI0_AVI_INFO_CONT | /* send AVI info frames every frame/field */
+	       HDMI0_AUDIO_INFO_SEND | /* enable audio info frames (frames won't be set until audio is enabled) */
+	       HDMI0_AUDIO_INFO_CONT); /* send audio info frames every frame/field */
+
+	WREG32(HDMI0_INFOFRAME_CONTROL1 + offset,
+	       HDMI0_AVI_INFO_LINE(2) | /* anything other than 0 */
+	       HDMI0_AUDIO_INFO_LINE(2)); /* anything other than 0 */
+
+	WREG32(HDMI0_GC + offset, 0); /* unset HDMI0_GC_AVMUTE */
+
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
+	if (err < 0) {
+		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	r600_hdmi_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+	r600_hdmi_update_ACR(encoder, mode->clock);
+
+	/* it's unknown what these bits do excatly, but it's indeed quite useful for debugging */
+	WREG32(HDMI0_RAMP_CONTROL0 + offset, 0x00FFFFFF);
+	WREG32(HDMI0_RAMP_CONTROL1 + offset, 0x007FFFFF);
+	WREG32(HDMI0_RAMP_CONTROL2 + offset, 0x00000001);
+	WREG32(HDMI0_RAMP_CONTROL3 + offset, 0x00000001);
+
+	r600_hdmi_audio_workaround(encoder);
+
+	/* enable audio after to setting up hw */
+	r600_audio_enable(rdev, dig->afmt->pin, true);
+}

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 0f7a51a..e2f6052 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c

@@ -1301,36 +1301,6 @@
 }
 
 /**
- * radeon_irq_kms_pflip_irq_get - pre-pageflip callback.
- *
- * @rdev: radeon_device pointer
- * @crtc: crtc to prepare for pageflip on
- *
- * Pre-pageflip callback (evergreen+).
- * Enables the pageflip irq (vblank irq).
- */
-void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* enable the pflip int */
-	radeon_irq_kms_pflip_irq_get(rdev, crtc);
-}
-
-/**
- * evergreen_post_page_flip - pos-pageflip callback.
- *
- * @rdev: radeon_device pointer
- * @crtc: crtc to cleanup pageflip on
- *
- * Post-pageflip callback (evergreen+).
- * Disables the pageflip irq (vblank irq).
- */
-void evergreen_post_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* disable the pflip int */
-	radeon_irq_kms_pflip_irq_put(rdev, crtc);
-}
-
-/**
  * evergreen_page_flip - pageflip callback.
  *
  * @rdev: radeon_device pointer
@@ -1343,7 +1313,7 @@
  * double buffered update to take place.
  * Returns the current update pending status.
  */
-u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
+void evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 	u32 tmp = RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset);
@@ -1375,9 +1345,23 @@
 	/* Unlock the lock, so double-buffering can take place inside vblank */
 	tmp &= ~EVERGREEN_GRPH_UPDATE_LOCK;
 	WREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
+}
+
+/**
+ * evergreen_page_flip_pending - check if page flip is still pending
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to check
+ *
+ * Returns the current update pending status.
+ */
+bool evergreen_page_flip_pending(struct radeon_device *rdev, int crtc_id)
+{
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 
 	/* Return current update_pending status: */
-	return RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset) & EVERGREEN_GRPH_SURFACE_UPDATE_PENDING;
+	return !!(RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset) &
+		EVERGREEN_GRPH_SURFACE_UPDATE_PENDING);
 }
 
 /* get temperature in millidegrees */
@@ -3353,6 +3337,18 @@
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.evergreen.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
@@ -4810,7 +4806,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_flip(rdev, 0);
+						radeon_crtc_handle_vblank(rdev, 0);
 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -4836,7 +4832,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_flip(rdev, 1);
+						radeon_crtc_handle_vblank(rdev, 1);
 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}
@@ -4862,7 +4858,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_flip(rdev, 2);
+						radeon_crtc_handle_vblank(rdev, 2);
 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D3 vblank\n");
 				}
@@ -4888,7 +4884,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_flip(rdev, 3);
+						radeon_crtc_handle_vblank(rdev, 3);
 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D4 vblank\n");
 				}
@@ -4914,7 +4910,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_flip(rdev, 4);
+						radeon_crtc_handle_vblank(rdev, 4);
 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D5 vblank\n");
 				}
@@ -4940,7 +4936,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_flip(rdev, 5);
+						radeon_crtc_handle_vblank(rdev, 5);
 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D6 vblank\n");
 				}

diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 05b0c95..1ec0e6e 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c

@@ -293,10 +293,13 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
 	struct hdmi_avi_infoframe frame;
 	uint32_t offset;
 	ssize_t err;
+	uint32_t val;
+	int bpc = 8;
 
 	if (!dig || !dig->afmt)
 		return;
@@ -306,6 +309,12 @@
 		return;
 	offset = dig->afmt->offset;
 
+	/* hdmi deep color mode general control packets setup, if bpc > 8 */
+	if (encoder->crtc) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+		bpc = radeon_crtc->bpc;
+	}
+
 	/* disable audio prior to setting up hw */
 	if (ASIC_IS_DCE6(rdev)) {
 		dig->afmt->pin = dce6_audio_get_pin(rdev);
@@ -322,6 +331,35 @@
 
 	WREG32(AFMT_AUDIO_CRC_CONTROL + offset, 0x1000);
 
+	val = RREG32(HDMI_CONTROL + offset);
+	val &= ~HDMI_DEEP_COLOR_ENABLE;
+	val &= ~HDMI_DEEP_COLOR_DEPTH_MASK;
+
+	switch (bpc) {
+		case 0:
+		case 6:
+		case 8:
+		case 16:
+		default:
+			DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+					 connector->name, bpc);
+			break;
+		case 10:
+			val |= HDMI_DEEP_COLOR_ENABLE;
+			val |= HDMI_DEEP_COLOR_DEPTH(HDMI_30BIT_DEEP_COLOR);
+			DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+					 connector->name);
+			break;
+		case 12:
+			val |= HDMI_DEEP_COLOR_ENABLE;
+			val |= HDMI_DEEP_COLOR_DEPTH(HDMI_36BIT_DEEP_COLOR);
+			DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+					 connector->name);
+			break;
+	}
+
+	WREG32(HDMI_CONTROL + offset, val);
+
 	WREG32(HDMI_VBI_PACKET_CONTROL + offset,
 	       HDMI_NULL_SEND | /* send null packets when required */
 	       HDMI_GC_SEND | /* send general control packets */
@@ -348,9 +386,13 @@
 
 	/* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */
 
-	WREG32(HDMI_ACR_PACKET_CONTROL + offset,
-	       HDMI_ACR_SOURCE | /* select SW CTS value */
-	       HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
+	if (bpc > 8)
+		WREG32(HDMI_ACR_PACKET_CONTROL + offset,
+		       HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
+	else
+		WREG32(HDMI_ACR_PACKET_CONTROL + offset,
+		       HDMI_ACR_SOURCE | /* select SW CTS value */
+		       HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
 
 	evergreen_hdmi_update_ACR(encoder, mode->clock);
 

diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index f9c7963..b066d67 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h

@@ -517,10 +517,11 @@
 #       define HDMI_ERROR_ACK                (1 << 8)
 #       define HDMI_ERROR_MASK               (1 << 9)
 #       define HDMI_DEEP_COLOR_ENABLE        (1 << 24)
-#       define HDMI_DEEP_COLOR_DEPTH         (((x) & 3) << 28)
+#       define HDMI_DEEP_COLOR_DEPTH(x)      (((x) & 3) << 28)
 #       define HDMI_24BIT_DEEP_COLOR         0
 #       define HDMI_30BIT_DEEP_COLOR         1
 #       define HDMI_36BIT_DEEP_COLOR         2
+#       define HDMI_DEEP_COLOR_DEPTH_MASK    (3 << 28)
 #define HDMI_STATUS                          0x7034
 #       define HDMI_ACTIVE_AVMUTE            (1 << 0)
 #       define HDMI_AUDIO_PACKET_ERROR       (1 << 16)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index d246e04..5a33ca6 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c

@@ -1057,6 +1057,18 @@
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.cayman.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
@@ -1228,12 +1240,14 @@
 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
 	/* Setup L2 cache */
 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+	       ENABLE_L2_FRAGMENT_PROCESSING |
 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+	       BANK_SELECT(6) |
 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
 	/* setup context0 */
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
@@ -1266,6 +1280,7 @@
 	       (u32)(rdev->dummy_page.addr >> 12));
 	WREG32(VM_CONTEXT1_CNTL2, 4);
 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
@@ -1343,7 +1358,7 @@
 	/* EVENT_WRITE_EOP - flush caches, send int */
 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
-	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
 	radeon_ring_write(ring, fence->seq);
 	radeon_ring_write(ring, 0);

diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index d996033..2e12e4d 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h

@@ -128,6 +128,7 @@
 #define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
 #define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
 #define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
+#define		PAGE_TABLE_BLOCK_SIZE(x)			(((x) & 0xF) << 24)
 #define VM_CONTEXT1_CNTL				0x1414
 #define VM_CONTEXT0_CNTL2				0x1430
 #define VM_CONTEXT1_CNTL2				0x1434

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index b6c3264..1544efc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c

@@ -142,36 +142,6 @@
 }
 
 /**
- * r100_pre_page_flip - pre-pageflip callback.
- *
- * @rdev: radeon_device pointer
- * @crtc: crtc to prepare for pageflip on
- *
- * Pre-pageflip callback (r1xx-r4xx).
- * Enables the pageflip irq (vblank irq).
- */
-void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* enable the pflip int */
-	radeon_irq_kms_pflip_irq_get(rdev, crtc);
-}
-
-/**
- * r100_post_page_flip - pos-pageflip callback.
- *
- * @rdev: radeon_device pointer
- * @crtc: crtc to cleanup pageflip on
- *
- * Post-pageflip callback (r1xx-r4xx).
- * Disables the pageflip irq (vblank irq).
- */
-void r100_post_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* disable the pflip int */
-	radeon_irq_kms_pflip_irq_put(rdev, crtc);
-}
-
-/**
  * r100_page_flip - pageflip callback.
  *
  * @rdev: radeon_device pointer
@@ -182,9 +152,8 @@
  * During vblank we take the crtc lock and wait for the update_pending
  * bit to go high, when it does, we release the lock, and allow the
  * double buffered update to take place.
- * Returns the current update pending status.
  */
-u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
+void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 	u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
@@ -206,8 +175,24 @@
 	tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 
+}
+
+/**
+ * r100_page_flip_pending - check if page flip is still pending
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to check
+ *
+ * Check if the last pagefilp is still pending (r1xx-r4xx).
+ * Returns the current update pending status.
+ */
+bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
+{
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
+
 	/* Return current update_pending status: */
-	return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
+	return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
+		RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 }
 
 /**
@@ -697,15 +682,11 @@
 	WREG32(RADEON_AIC_HI_ADDR, 0);
 }
 
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+			    uint64_t addr)
 {
 	u32 *gtt = rdev->gart.ptr;
-
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	gtt[i] = cpu_to_le32(lower_32_bits(addr));
-	return 0;
 }
 
 void r100_pci_gart_fini(struct radeon_device *rdev)
@@ -794,7 +775,7 @@
 				wake_up(&rdev->irq.vblank_queue);
 			}
 			if (atomic_read(&rdev->irq.pflip[0]))
-				radeon_crtc_handle_flip(rdev, 0);
+				radeon_crtc_handle_vblank(rdev, 0);
 		}
 		if (status & RADEON_CRTC2_VBLANK_STAT) {
 			if (rdev->irq.crtc_vblank_int[1]) {
@@ -803,7 +784,7 @@
 				wake_up(&rdev->irq.vblank_queue);
 			}
 			if (atomic_read(&rdev->irq.pflip[1]))
-				radeon_crtc_handle_flip(rdev, 1);
+				radeon_crtc_handle_vblank(rdev, 1);
 		}
 		if (status & RADEON_FP_DETECT_STAT) {
 			queue_hotplug = true;

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 206caf9..3c21d77 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c

@@ -72,13 +72,11 @@
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
-int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+			      uint64_t addr)
 {
 	void __iomem *ptr = rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	addr = (lower_32_bits(addr) >> 8) |
 	       ((upper_32_bits(addr) & 0xff) << 24) |
 	       R300_PTE_WRITEABLE | R300_PTE_READABLE;
@@ -86,7 +84,6 @@
 	 * on powerpc without HW swappers, it'll get swapped on way
 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
 	writel(addr, ((void __iomem *)ptr) + (i * 4));
-	return 0;
 }
 
 int rv370_pcie_gart_init(struct radeon_device *rdev)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index bbc189f..c66952d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c

@@ -1958,6 +1958,9 @@
 	if (tmp < rdev->config.r600.max_simds) {
 		rdev->config.r600.max_simds = tmp;
 	}
+	tmp = rdev->config.r600.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
+	rdev->config.r600.active_simds = tmp;
 
 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
 	tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
@@ -2724,7 +2727,7 @@
 		/* EVENT_WRITE_EOP - flush caches, send int */
 		radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 		radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
-		radeon_ring_write(ring, addr & 0xffffffff);
+		radeon_ring_write(ring, lower_32_bits(addr));
 		radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
 		radeon_ring_write(ring, fence->seq);
 		radeon_ring_write(ring, 0);
@@ -2763,7 +2766,7 @@
 		sel |= PACKET3_SEM_WAIT_ON_SIGNAL;
 
 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
-	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
 
 	return true;
@@ -2824,9 +2827,9 @@
 		if (size_in_bytes == 0)
 			tmp |= PACKET3_CP_DMA_CP_SYNC;
 		radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4));
-		radeon_ring_write(ring, src_offset & 0xffffffff);
+		radeon_ring_write(ring, lower_32_bits(src_offset));
 		radeon_ring_write(ring, tmp);
-		radeon_ring_write(ring, dst_offset & 0xffffffff);
+		radeon_ring_write(ring, lower_32_bits(dst_offset));
 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
 		radeon_ring_write(ring, cur_size_in_bytes);
 		src_offset += cur_size_in_bytes;
@@ -3876,7 +3879,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_flip(rdev, 0);
+						radeon_crtc_handle_vblank(rdev, 0);
 					rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -3902,7 +3905,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_flip(rdev, 1);
+						radeon_crtc_handle_vblank(rdev, 1);
 					rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}

diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index 85a2bb2..26ef8ce 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c

@@ -133,7 +133,7 @@
 /*
  * update the N and CTS parameters for a given pixel clock rate
  */
-static void r600_hdmi_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+void r600_hdmi_update_ACR(struct drm_encoder *encoder, uint32_t clock)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -142,21 +142,33 @@
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	uint32_t offset = dig->afmt->offset;
 
-	WREG32(HDMI0_ACR_32_0 + offset, HDMI0_ACR_CTS_32(acr.cts_32khz));
-	WREG32(HDMI0_ACR_32_1 + offset, acr.n_32khz);
+	WREG32_P(HDMI0_ACR_32_0 + offset,
+		 HDMI0_ACR_CTS_32(acr.cts_32khz),
+		 ~HDMI0_ACR_CTS_32_MASK);
+	WREG32_P(HDMI0_ACR_32_1 + offset,
+		 HDMI0_ACR_N_32(acr.n_32khz),
+		 ~HDMI0_ACR_N_32_MASK);
 
-	WREG32(HDMI0_ACR_44_0 + offset, HDMI0_ACR_CTS_44(acr.cts_44_1khz));
-	WREG32(HDMI0_ACR_44_1 + offset, acr.n_44_1khz);
+	WREG32_P(HDMI0_ACR_44_0 + offset,
+		 HDMI0_ACR_CTS_44(acr.cts_44_1khz),
+		 ~HDMI0_ACR_CTS_44_MASK);
+	WREG32_P(HDMI0_ACR_44_1 + offset,
+		 HDMI0_ACR_N_44(acr.n_44_1khz),
+		 ~HDMI0_ACR_N_44_MASK);
 
-	WREG32(HDMI0_ACR_48_0 + offset, HDMI0_ACR_CTS_48(acr.cts_48khz));
-	WREG32(HDMI0_ACR_48_1 + offset, acr.n_48khz);
+	WREG32_P(HDMI0_ACR_48_0 + offset,
+		 HDMI0_ACR_CTS_48(acr.cts_48khz),
+		 ~HDMI0_ACR_CTS_48_MASK);
+	WREG32_P(HDMI0_ACR_48_1 + offset,
+		 HDMI0_ACR_N_48(acr.n_48khz),
+		 ~HDMI0_ACR_N_48_MASK);
 }
 
 /*
  * build a HDMI Video Info Frame
  */
-static void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder,
-					   void *buffer, size_t size)
+void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, void *buffer,
+				    size_t size)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -231,7 +243,7 @@
 /*
  * write the audio workaround status to the hardware
  */
-static void r600_hdmi_audio_workaround(struct drm_encoder *encoder)
+void r600_hdmi_audio_workaround(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -250,7 +262,7 @@
 		 value, ~HDMI0_AUDIO_TEST_EN);
 }
 
-static void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -320,121 +332,6 @@
 	}
 }
 
-static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
-{
-	struct radeon_device *rdev = encoder->dev->dev_private;
-	struct drm_connector *connector;
-	struct radeon_connector *radeon_connector = NULL;
-	u32 tmp;
-	u8 *sadb;
-	int sad_count;
-
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
-		if (connector->encoder == encoder) {
-			radeon_connector = to_radeon_connector(connector);
-			break;
-		}
-	}
-
-	if (!radeon_connector) {
-		DRM_ERROR("Couldn't find encoder's connector\n");
-		return;
-	}
-
-	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
-	if (sad_count < 0) {
-		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
-		return;
-	}
-
-	/* program the speaker allocation */
-	tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER);
-	tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK);
-	/* set HDMI mode */
-	tmp |= HDMI_CONNECTION;
-	if (sad_count)
-		tmp |= SPEAKER_ALLOCATION(sadb[0]);
-	else
-		tmp |= SPEAKER_ALLOCATION(5); /* stereo */
-	WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp);
-
-	kfree(sadb);
-}
-
-static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder)
-{
-	struct radeon_device *rdev = encoder->dev->dev_private;
-	struct drm_connector *connector;
-	struct radeon_connector *radeon_connector = NULL;
-	struct cea_sad *sads;
-	int i, sad_count;
-
-	static const u16 eld_reg_to_type[][2] = {
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
-		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
-	};
-
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
-		if (connector->encoder == encoder) {
-			radeon_connector = to_radeon_connector(connector);
-			break;
-		}
-	}
-
-	if (!radeon_connector) {
-		DRM_ERROR("Couldn't find encoder's connector\n");
-		return;
-	}
-
-	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
-	if (sad_count < 0) {
-		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
-		return;
-	}
-	BUG_ON(!sads);
-
-	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
-		u32 value = 0;
-		u8 stereo_freqs = 0;
-		int max_channels = -1;
-		int j;
-
-		for (j = 0; j < sad_count; j++) {
-			struct cea_sad *sad = &sads[j];
-
-			if (sad->format == eld_reg_to_type[i][1]) {
-				if (sad->channels > max_channels) {
-					value = MAX_CHANNELS(sad->channels) |
-						DESCRIPTOR_BYTE_2(sad->byte2) |
-						SUPPORTED_FREQUENCIES(sad->freq);
-					max_channels = sad->channels;
-				}
-
-				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
-					stereo_freqs |= sad->freq;
-				else
-					break;
-			}
-		}
-
-		value |= SUPPORTED_FREQUENCIES_STEREO(stereo_freqs);
-
-		WREG32(eld_reg_to_type[i][0], value);
-	}
-
-	kfree(sads);
-}
-
 /*
  * update the info frames with the data from the current display mode
  */
@@ -447,6 +344,7 @@
 	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
 	struct hdmi_avi_infoframe frame;
 	uint32_t offset;
+	uint32_t acr_ctl;
 	ssize_t err;
 
 	if (!dig || !dig->afmt)
@@ -463,52 +361,44 @@
 
 	r600_audio_set_dto(encoder, mode->clock);
 
-	WREG32(HDMI0_VBI_PACKET_CONTROL + offset,
-	       HDMI0_NULL_SEND); /* send null packets when required */
+	WREG32_P(HDMI0_AUDIO_PACKET_CONTROL + offset,
+		 HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
+		 HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
+		 HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
+		 HDMI0_60958_CS_UPDATE, /* allow 60958 channel status fields to be updated */
+		 ~(HDMI0_AUDIO_SAMPLE_SEND |
+		   HDMI0_AUDIO_DELAY_EN_MASK |
+		   HDMI0_AUDIO_PACKETS_PER_LINE_MASK |
+		   HDMI0_60958_CS_UPDATE));
 
-	WREG32(HDMI0_AUDIO_CRC_CONTROL + offset, 0x1000);
+	/* DCE 3.0 uses register that's normally for CRC_CONTROL */
+	acr_ctl = ASIC_IS_DCE3(rdev) ? DCE3_HDMI0_ACR_PACKET_CONTROL :
+				       HDMI0_ACR_PACKET_CONTROL;
+	WREG32_P(acr_ctl + offset,
+		 HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */
+		 HDMI0_ACR_AUTO_SEND, /* allow hw to sent ACR packets when required */
+		 ~(HDMI0_ACR_SOURCE |
+		   HDMI0_ACR_AUTO_SEND));
 
-	if (ASIC_IS_DCE32(rdev)) {
-		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
-		       HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
-		       HDMI0_AUDIO_PACKETS_PER_LINE(3)); /* should be suffient for all audio modes and small enough for all hblanks */
-		WREG32(AFMT_AUDIO_PACKET_CONTROL + offset,
-		       AFMT_AUDIO_SAMPLE_SEND | /* send audio packets */
-		       AFMT_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
-	} else {
-		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
-		       HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
-		       HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
-		       HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
-		       HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
-	}
+	WREG32_OR(HDMI0_VBI_PACKET_CONTROL + offset,
+		  HDMI0_NULL_SEND | /* send null packets when required */
+		  HDMI0_GC_SEND | /* send general control packets */
+		  HDMI0_GC_CONT); /* send general control packets every frame */
 
-	if (ASIC_IS_DCE32(rdev)) {
-		dce3_2_afmt_write_speaker_allocation(encoder);
-		dce3_2_afmt_write_sad_regs(encoder);
-	}
+	WREG32_OR(HDMI0_INFOFRAME_CONTROL0 + offset,
+		  HDMI0_AVI_INFO_SEND | /* enable AVI info frames */
+		  HDMI0_AVI_INFO_CONT | /* send AVI info frames every frame/field */
+		  HDMI0_AUDIO_INFO_SEND | /* enable audio info frames (frames won't be set until audio is enabled) */
+		  HDMI0_AUDIO_INFO_UPDATE); /* required for audio info values to be updated */
 
-	WREG32(HDMI0_ACR_PACKET_CONTROL + offset,
-	       HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */
-	       HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
+	WREG32_P(HDMI0_INFOFRAME_CONTROL1 + offset,
+		 HDMI0_AVI_INFO_LINE(2) | /* anything other than 0 */
+		 HDMI0_AUDIO_INFO_LINE(2), /* anything other than 0 */
+		 ~(HDMI0_AVI_INFO_LINE_MASK |
+		   HDMI0_AUDIO_INFO_LINE_MASK));
 
-	WREG32(HDMI0_VBI_PACKET_CONTROL + offset,
-	       HDMI0_NULL_SEND | /* send null packets when required */
-	       HDMI0_GC_SEND | /* send general control packets */
-	       HDMI0_GC_CONT); /* send general control packets every frame */
-
-	/* TODO: HDMI0_AUDIO_INFO_UPDATE */
-	WREG32(HDMI0_INFOFRAME_CONTROL0 + offset,
-	       HDMI0_AVI_INFO_SEND | /* enable AVI info frames */
-	       HDMI0_AVI_INFO_CONT | /* send AVI info frames every frame/field */
-	       HDMI0_AUDIO_INFO_SEND | /* enable audio info frames (frames won't be set until audio is enabled) */
-	       HDMI0_AUDIO_INFO_CONT); /* send audio info frames every frame/field */
-
-	WREG32(HDMI0_INFOFRAME_CONTROL1 + offset,
-	       HDMI0_AVI_INFO_LINE(2) | /* anything other than 0 */
-	       HDMI0_AUDIO_INFO_LINE(2)); /* anything other than 0 */
-
-	WREG32(HDMI0_GC + offset, 0); /* unset HDMI0_GC_AVMUTE */
+	WREG32_AND(HDMI0_GC + offset,
+		   ~HDMI0_GC_AVMUTE); /* unset HDMI0_GC_AVMUTE */
 
 	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
 	if (err < 0) {
@@ -523,22 +413,45 @@
 	}
 
 	r600_hdmi_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+	/* fglrx duplicates INFOFRAME_CONTROL0 & INFOFRAME_CONTROL1 ops here */
+
+	WREG32_AND(HDMI0_GENERIC_PACKET_CONTROL + offset,
+		   ~(HDMI0_GENERIC0_SEND |
+		     HDMI0_GENERIC0_CONT |
+		     HDMI0_GENERIC0_UPDATE |
+		     HDMI0_GENERIC1_SEND |
+		     HDMI0_GENERIC1_CONT |
+		     HDMI0_GENERIC0_LINE_MASK |
+		     HDMI0_GENERIC1_LINE_MASK));
+
 	r600_hdmi_update_ACR(encoder, mode->clock);
 
+	WREG32_P(HDMI0_60958_0 + offset,
+		 HDMI0_60958_CS_CHANNEL_NUMBER_L(1),
+		 ~(HDMI0_60958_CS_CHANNEL_NUMBER_L_MASK |
+		   HDMI0_60958_CS_CLOCK_ACCURACY_MASK));
+
+	WREG32_P(HDMI0_60958_1 + offset,
+		 HDMI0_60958_CS_CHANNEL_NUMBER_R(2),
+		 ~HDMI0_60958_CS_CHANNEL_NUMBER_R_MASK);
+
 	/* it's unknown what these bits do excatly, but it's indeed quite useful for debugging */
 	WREG32(HDMI0_RAMP_CONTROL0 + offset, 0x00FFFFFF);
 	WREG32(HDMI0_RAMP_CONTROL1 + offset, 0x007FFFFF);
 	WREG32(HDMI0_RAMP_CONTROL2 + offset, 0x00000001);
 	WREG32(HDMI0_RAMP_CONTROL3 + offset, 0x00000001);
 
-	r600_hdmi_audio_workaround(encoder);
-
 	/* enable audio after to setting up hw */
 	r600_audio_enable(rdev, dig->afmt->pin, true);
 }
 
-/*
- * update settings with current parameters from audio engine
+/**
+ * r600_hdmi_update_audio_settings - Update audio infoframe
+ *
+ * @encoder: drm encoder
+ *
+ * Gets info about current audio stream and updates audio infoframe.
  */
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder)
 {
@@ -550,7 +463,7 @@
 	uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE];
 	struct hdmi_audio_infoframe frame;
 	uint32_t offset;
-	uint32_t iec;
+	uint32_t value;
 	ssize_t err;
 
 	if (!dig->afmt || !dig->afmt->enabled)
@@ -563,60 +476,6 @@
 	DRM_DEBUG("0x%02X IEC60958 status bits and 0x%02X category code\n",
 		  (int)audio.status_bits, (int)audio.category_code);
 
-	iec = 0;
-	if (audio.status_bits & AUDIO_STATUS_PROFESSIONAL)
-		iec |= 1 << 0;
-	if (audio.status_bits & AUDIO_STATUS_NONAUDIO)
-		iec |= 1 << 1;
-	if (audio.status_bits & AUDIO_STATUS_COPYRIGHT)
-		iec |= 1 << 2;
-	if (audio.status_bits & AUDIO_STATUS_EMPHASIS)
-		iec |= 1 << 3;
-
-	iec |= HDMI0_60958_CS_CATEGORY_CODE(audio.category_code);
-
-	switch (audio.rate) {
-	case 32000:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0x3);
-		break;
-	case 44100:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0x0);
-		break;
-	case 48000:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0x2);
-		break;
-	case 88200:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0x8);
-		break;
-	case 96000:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0xa);
-		break;
-	case 176400:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0xc);
-		break;
-	case 192000:
-		iec |= HDMI0_60958_CS_SAMPLING_FREQUENCY(0xe);
-		break;
-	}
-
-	WREG32(HDMI0_60958_0 + offset, iec);
-
-	iec = 0;
-	switch (audio.bits_per_sample) {
-	case 16:
-		iec |= HDMI0_60958_CS_WORD_LENGTH(0x2);
-		break;
-	case 20:
-		iec |= HDMI0_60958_CS_WORD_LENGTH(0x3);
-		break;
-	case 24:
-		iec |= HDMI0_60958_CS_WORD_LENGTH(0xb);
-		break;
-	}
-	if (audio.status_bits & AUDIO_STATUS_V)
-		iec |= 0x5 << 16;
-	WREG32_P(HDMI0_60958_1 + offset, iec, ~0x5000f);
-
 	err = hdmi_audio_infoframe_init(&frame);
 	if (err < 0) {
 		DRM_ERROR("failed to setup audio infoframe\n");
@@ -631,8 +490,22 @@
 		return;
 	}
 
+	value = RREG32(HDMI0_AUDIO_PACKET_CONTROL + offset);
+	if (value & HDMI0_AUDIO_TEST_EN)
+		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
+		       value & ~HDMI0_AUDIO_TEST_EN);
+
+	WREG32_OR(HDMI0_CONTROL + offset,
+		  HDMI0_ERROR_ACK);
+
+	WREG32_AND(HDMI0_INFOFRAME_CONTROL0 + offset,
+		   ~HDMI0_AUDIO_INFO_SOURCE);
+
 	r600_hdmi_update_audio_infoframe(encoder, buffer, sizeof(buffer));
-	r600_hdmi_audio_workaround(encoder);
+
+	WREG32_OR(HDMI0_INFOFRAME_CONTROL0 + offset,
+		  HDMI0_AUDIO_INFO_CONT |
+		  HDMI0_AUDIO_INFO_UPDATE);
 }
 
 /*

diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 37455f6..f94e7a9 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h

@@ -1029,15 +1029,18 @@
 #define HDMI0_AUDIO_PACKET_CONTROL   0x7408
 #       define HDMI0_AUDIO_SAMPLE_SEND  (1 << 0)
 #       define HDMI0_AUDIO_DELAY_EN(x)  (((x) & 3) << 4)
+#       define HDMI0_AUDIO_DELAY_EN_MASK	(3 << 4)
 #       define HDMI0_AUDIO_SEND_MAX_PACKETS  (1 << 8)
 #       define HDMI0_AUDIO_TEST_EN         (1 << 12)
 #       define HDMI0_AUDIO_PACKETS_PER_LINE(x)  (((x) & 0x1f) << 16)
+#       define HDMI0_AUDIO_PACKETS_PER_LINE_MASK	(0x1f << 16)
 #       define HDMI0_AUDIO_CHANNEL_SWAP    (1 << 24)
 #       define HDMI0_60958_CS_UPDATE       (1 << 26)
 #       define HDMI0_AZ_FORMAT_WTRIG_MASK  (1 << 28)
 #       define HDMI0_AZ_FORMAT_WTRIG_ACK   (1 << 29)
 #define HDMI0_AUDIO_CRC_CONTROL      0x740c
 #       define HDMI0_AUDIO_CRC_EN    (1 << 0)
+#define DCE3_HDMI0_ACR_PACKET_CONTROL	0x740c
 #define HDMI0_VBI_PACKET_CONTROL     0x7410
 #       define HDMI0_NULL_SEND       (1 << 0)
 #       define HDMI0_GC_SEND         (1 << 4)
@@ -1054,7 +1057,9 @@
 #       define HDMI0_MPEG_INFO_UPDATE  (1 << 10)
 #define HDMI0_INFOFRAME_CONTROL1     0x7418
 #       define HDMI0_AVI_INFO_LINE(x)  (((x) & 0x3f) << 0)
+#       define HDMI0_AVI_INFO_LINE_MASK		(0x3f << 0)
 #       define HDMI0_AUDIO_INFO_LINE(x)  (((x) & 0x3f) << 8)
+#       define HDMI0_AUDIO_INFO_LINE_MASK	(0x3f << 8)
 #       define HDMI0_MPEG_INFO_LINE(x)  (((x) & 0x3f) << 16)
 #define HDMI0_GENERIC_PACKET_CONTROL 0x741c
 #       define HDMI0_GENERIC0_SEND   (1 << 0)
@@ -1063,7 +1068,9 @@
 #       define HDMI0_GENERIC1_SEND   (1 << 4)
 #       define HDMI0_GENERIC1_CONT   (1 << 5)
 #       define HDMI0_GENERIC0_LINE(x)  (((x) & 0x3f) << 16)
+#       define HDMI0_GENERIC0_LINE_MASK		(0x3f << 16)
 #       define HDMI0_GENERIC1_LINE(x)  (((x) & 0x3f) << 24)
+#       define HDMI0_GENERIC1_LINE_MASK		(0x3f << 24)
 #define HDMI0_GC                     0x7428
 #       define HDMI0_GC_AVMUTE       (1 << 0)
 #define HDMI0_AVI_INFO0              0x7454
@@ -1119,16 +1126,22 @@
 #define HDMI0_GENERIC1_6             0x74a8
 #define HDMI0_ACR_32_0               0x74ac
 #       define HDMI0_ACR_CTS_32(x)   (((x) & 0xfffff) << 12)
+#       define HDMI0_ACR_CTS_32_MASK		(0xfffff << 12)
 #define HDMI0_ACR_32_1               0x74b0
 #       define HDMI0_ACR_N_32(x)   (((x) & 0xfffff) << 0)
+#       define HDMI0_ACR_N_32_MASK		(0xfffff << 0)
 #define HDMI0_ACR_44_0               0x74b4
 #       define HDMI0_ACR_CTS_44(x)   (((x) & 0xfffff) << 12)
+#       define HDMI0_ACR_CTS_44_MASK		(0xfffff << 12)
 #define HDMI0_ACR_44_1               0x74b8
 #       define HDMI0_ACR_N_44(x)   (((x) & 0xfffff) << 0)
+#       define HDMI0_ACR_N_44_MASK		(0xfffff << 0)
 #define HDMI0_ACR_48_0               0x74bc
 #       define HDMI0_ACR_CTS_48(x)   (((x) & 0xfffff) << 12)
+#       define HDMI0_ACR_CTS_48_MASK		(0xfffff << 12)
 #define HDMI0_ACR_48_1               0x74c0
 #       define HDMI0_ACR_N_48(x)   (((x) & 0xfffff) << 0)
+#       define HDMI0_ACR_N_48_MASK		(0xfffff << 0)
 #define HDMI0_ACR_STATUS_0           0x74c4
 #define HDMI0_ACR_STATUS_1           0x74c8
 #define HDMI0_AUDIO_INFO0            0x74cc
@@ -1148,14 +1161,17 @@
 #       define HDMI0_60958_CS_CATEGORY_CODE(x)      (((x) & 0xff) << 8)
 #       define HDMI0_60958_CS_SOURCE_NUMBER(x)      (((x) & 0xf) << 16)
 #       define HDMI0_60958_CS_CHANNEL_NUMBER_L(x)   (((x) & 0xf) << 20)
+#       define HDMI0_60958_CS_CHANNEL_NUMBER_L_MASK	(0xf << 20)
 #       define HDMI0_60958_CS_SAMPLING_FREQUENCY(x) (((x) & 0xf) << 24)
 #       define HDMI0_60958_CS_CLOCK_ACCURACY(x)     (((x) & 3) << 28)
+#       define HDMI0_60958_CS_CLOCK_ACCURACY_MASK	(3 << 28)
 #define HDMI0_60958_1                0x74d8
 #       define HDMI0_60958_CS_WORD_LENGTH(x)        (((x) & 0xf) << 0)
 #       define HDMI0_60958_CS_ORIGINAL_SAMPLING_FREQUENCY(x)   (((x) & 0xf) << 4)
 #       define HDMI0_60958_CS_VALID_L(x)   (((x) & 1) << 16)
 #       define HDMI0_60958_CS_VALID_R(x)   (((x) & 1) << 18)
 #       define HDMI0_60958_CS_CHANNEL_NUMBER_R(x)   (((x) & 0xf) << 20)
+#       define HDMI0_60958_CS_CHANNEL_NUMBER_R_MASK	(0xf << 20)
 #define HDMI0_ACR_PACKET_CONTROL     0x74dc
 #       define HDMI0_ACR_SEND        (1 << 0)
 #       define HDMI0_ACR_CONT        (1 << 1)
@@ -1166,6 +1182,7 @@
 #       define HDMI0_ACR_48          3
 #       define HDMI0_ACR_SOURCE      (1 << 8) /* 0 - hw; 1 - cts value */
 #       define HDMI0_ACR_AUTO_SEND   (1 << 12)
+#define DCE3_HDMI0_AUDIO_CRC_CONTROL	0x74dc
 #define HDMI0_RAMP_CONTROL0          0x74e0
 #       define HDMI0_RAMP_MAX_COUNT(x)   (((x) & 0xffffff) << 0)
 #define HDMI0_RAMP_CONTROL1          0x74e4

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8149e7c..4b0bbf8 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h

@@ -100,6 +100,8 @@
 extern int radeon_aspm;
 extern int radeon_runtime_pm;
 extern int radeon_hard_reset;
+extern int radeon_vm_size;
+extern int radeon_vm_block_size;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
@@ -676,14 +678,16 @@
  * IRQS.
  */
 
-struct radeon_unpin_work {
-	struct work_struct work;
-	struct radeon_device *rdev;
-	int crtc_id;
-	struct radeon_fence *fence;
+struct radeon_flip_work {
+	struct work_struct		flip_work;
+	struct work_struct		unpin_work;
+	struct radeon_device		*rdev;
+	int				crtc_id;
+	struct drm_framebuffer		*fb;
 	struct drm_pending_vblank_event *event;
-	struct radeon_bo *old_rbo;
-	u64 new_crtc_base;
+	struct radeon_bo		*old_rbo;
+	struct radeon_bo		*new_rbo;
+	struct radeon_fence		*fence;
 };
 
 struct r500_irq_stat_regs {
@@ -835,13 +839,8 @@
 /* maximum number of VMIDs */
 #define RADEON_NUM_VM	16
 
-/* defines number of bits in page table versus page directory,
- * a page is 4KB so we have 12 bits offset, 9 bits in the page
- * table and the remaining 19 bits are in the page directory */
-#define RADEON_VM_BLOCK_SIZE   9
-
 /* number of entries in page table */
-#define RADEON_VM_PTE_COUNT (1 << RADEON_VM_BLOCK_SIZE)
+#define RADEON_VM_PTE_COUNT (1 << radeon_vm_block_size)
 
 /* PTBs (Page Table Blocks) need to be aligned to 32K */
 #define RADEON_VM_PTB_ALIGN_SIZE   32768
@@ -854,6 +853,15 @@
 #define R600_PTE_READABLE	(1 << 5)
 #define R600_PTE_WRITEABLE	(1 << 6)
 
+/* PTE (Page Table Entry) fragment field for different page sizes */
+#define R600_PTE_FRAG_4KB	(0 << 7)
+#define R600_PTE_FRAG_64KB	(4 << 7)
+#define R600_PTE_FRAG_256KB	(6 << 7)
+
+/* flags used for GART page table entries on R600+ */
+#define R600_PTE_GART	( R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED \
+			| R600_PTE_READABLE | R600_PTE_WRITEABLE)
+
 struct radeon_vm_pt {
 	struct radeon_bo		*bo;
 	uint64_t			addr;
@@ -986,8 +994,8 @@
 	struct radeon_bo		*robj;
 	struct ttm_validate_buffer	tv;
 	uint64_t			gpu_offset;
-	unsigned			domain;
-	unsigned			alt_domain;
+	unsigned			prefered_domains;
+	unsigned			allowed_domains;
 	uint32_t			tiling_flags;
 	uint32_t			handle;
 };
@@ -1771,7 +1779,8 @@
 	/* gart */
 	struct {
 		void (*tlb_flush)(struct radeon_device *rdev);
-		int (*set_page)(struct radeon_device *rdev, int i, uint64_t addr);
+		void (*set_page)(struct radeon_device *rdev, unsigned i,
+				 uint64_t addr);
 	} gart;
 	struct {
 		int (*init)(struct radeon_device *rdev);
@@ -1883,9 +1892,8 @@
 	} dpm;
 	/* pageflipping */
 	struct {
-		void (*pre_page_flip)(struct radeon_device *rdev, int crtc);
-		u32 (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base);
-		void (*post_page_flip)(struct radeon_device *rdev, int crtc);
+		void (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base);
+		bool (*page_flip_pending)(struct radeon_device *rdev, int crtc);
 	} pflip;
 };
 
@@ -1924,6 +1932,7 @@
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct rv770_asic {
@@ -1949,6 +1958,7 @@
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct evergreen_asic {
@@ -1975,6 +1985,7 @@
 	unsigned tiling_group_size;
 	unsigned tile_config;
 	unsigned backend_map;
+	unsigned active_simds;
 };
 
 struct cayman_asic {
@@ -2013,6 +2024,7 @@
 	unsigned multi_gpu_tile_size;
 
 	unsigned tile_config;
+	unsigned active_simds;
 };
 
 struct si_asic {
@@ -2043,6 +2055,7 @@
 
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
+	uint32_t active_cus;
 };
 
 struct cik_asic {
@@ -2074,6 +2087,7 @@
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
+	uint32_t active_cus;
 };
 
 union radeon_asic_config {
@@ -2745,9 +2759,8 @@
 #define radeon_pm_finish(rdev) (rdev)->asic->pm.finish((rdev))
 #define radeon_pm_init_profile(rdev) (rdev)->asic->pm.init_profile((rdev))
 #define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm.get_dynpm_state((rdev))
-#define radeon_pre_page_flip(rdev, crtc) (rdev)->asic->pflip.pre_page_flip((rdev), (crtc))
 #define radeon_page_flip(rdev, crtc, base) (rdev)->asic->pflip.page_flip((rdev), (crtc), (base))
-#define radeon_post_page_flip(rdev, crtc) (rdev)->asic->pflip.post_page_flip((rdev), (crtc))
+#define radeon_page_flip_pending(rdev, crtc) (rdev)->asic->pflip.page_flip_pending((rdev), (crtc))
 #define radeon_wait_for_vblank(rdev, crtc) (rdev)->asic->display.wait_for_vblank((rdev), (crtc))
 #define radeon_mc_wait_for_idle(rdev) (rdev)->asic->mc_wait_for_idle((rdev))
 #define radeon_get_xclk(rdev) (rdev)->asic->get_xclk((rdev))

diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index 4243334..a9297b2 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c

@@ -117,9 +117,6 @@
 	/* ATI Host Bridge / RV280 [M9+] Needs AGPMode 1 (phoronix forum) */
 	{ PCI_VENDOR_ID_ATI, 0xcbb2, PCI_VENDOR_ID_ATI, 0x5c61,
 		PCI_VENDOR_ID_SONY, 0x8175, 1},
-	/* HP Host Bridge / R300 [FireGL X1] Needs AGPMode 2 (fdo #7770) */
-	{ PCI_VENDOR_ID_HP, 0x122e, PCI_VENDOR_ID_ATI, 0x4e47,
-		PCI_VENDOR_ID_ATI, 0x0152, 2},
 	{ 0, 0, 0, 0, 0, 0, 0 },
 };
 #endif

diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e5f0177..34b9aa9 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c

@@ -248,9 +248,8 @@
 		.set_clock_gating = &radeon_legacy_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -315,9 +314,8 @@
 		.set_clock_gating = &radeon_legacy_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -396,9 +394,8 @@
 		.set_clock_gating = &radeon_legacy_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -463,9 +460,8 @@
 		.set_clock_gating = &radeon_legacy_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -530,9 +526,8 @@
 		.set_clock_gating = &radeon_atom_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -597,9 +592,8 @@
 		.set_clock_gating = &radeon_legacy_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &r100_pre_page_flip,
 		.page_flip = &r100_page_flip,
-		.post_page_flip = &r100_post_page_flip,
+		.page_flip_pending = &r100_page_flip_pending,
 	},
 };
 
@@ -666,9 +660,8 @@
 		.set_clock_gating = &radeon_atom_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -735,9 +728,8 @@
 		.set_clock_gating = &radeon_atom_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -802,9 +794,8 @@
 		.set_clock_gating = &radeon_atom_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -869,9 +860,8 @@
 		.set_clock_gating = &radeon_atom_set_clock_gating,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -968,9 +958,8 @@
 		.get_temperature = &rv6xx_get_temp,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -1059,9 +1048,8 @@
 		.force_performance_level = &rv6xx_dpm_force_performance_level,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -1150,9 +1138,8 @@
 		.force_performance_level = &rs780_dpm_force_performance_level,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rs600_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rs600_page_flip_pending,
 	},
 };
 
@@ -1201,7 +1188,7 @@
 		.set_backlight_level = &atombios_set_backlight_level,
 		.get_backlight_level = &atombios_get_backlight_level,
 		.hdmi_enable = &r600_hdmi_enable,
-		.hdmi_setmode = &r600_hdmi_setmode,
+		.hdmi_setmode = &dce3_1_hdmi_setmode,
 	},
 	.copy = {
 		.blit = &r600_copy_cpdma,
@@ -1256,9 +1243,8 @@
 		.vblank_too_short = &rv770_dpm_vblank_too_short,
 	},
 	.pflip = {
-		.pre_page_flip = &rs600_pre_page_flip,
 		.page_flip = &rv770_page_flip,
-		.post_page_flip = &rs600_post_page_flip,
+		.page_flip_pending = &rv770_page_flip_pending,
 	},
 };
 
@@ -1375,9 +1361,8 @@
 		.vblank_too_short = &cypress_dpm_vblank_too_short,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -1467,9 +1452,8 @@
 		.force_performance_level = &sumo_dpm_force_performance_level,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -1560,9 +1544,8 @@
 		.vblank_too_short = &btc_dpm_vblank_too_short,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -1704,9 +1687,8 @@
 		.vblank_too_short = &ni_dpm_vblank_too_short,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -1805,9 +1787,8 @@
 		.enable_bapm = &trinity_dpm_enable_bapm,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -1936,9 +1917,8 @@
 		.vblank_too_short = &ni_dpm_vblank_too_short,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -2049,8 +2029,8 @@
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &cik_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
-		.copy = &cik_copy_cpdma,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.copy = &cik_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -2099,9 +2079,8 @@
 		.powergate_uvd = &ci_dpm_powergate_uvd,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 
@@ -2204,9 +2183,8 @@
 		.enable_bapm = &kv_dpm_enable_bapm,
 	},
 	.pflip = {
-		.pre_page_flip = &evergreen_pre_page_flip,
 		.page_flip = &evergreen_page_flip,
-		.post_page_flip = &evergreen_post_page_flip,
+		.page_flip_pending = &evergreen_page_flip_pending,
 	},
 };
 

diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3d55a3a..01e7c0a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h

@@ -67,7 +67,8 @@
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+			    uint64_t addr);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -135,9 +136,9 @@
 extern void r100_pm_finish(struct radeon_device *rdev);
 extern void r100_pm_init_profile(struct radeon_device *rdev);
 extern void r100_pm_get_dynpm_state(struct radeon_device *rdev);
-extern void r100_pre_page_flip(struct radeon_device *rdev, int crtc);
-extern u32 r100_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
-extern void r100_post_page_flip(struct radeon_device *rdev, int crtc);
+extern void r100_page_flip(struct radeon_device *rdev, int crtc,
+			   u64 crtc_base);
+extern bool r100_page_flip_pending(struct radeon_device *rdev, int crtc);
 extern void r100_wait_for_vblank(struct radeon_device *rdev, int crtc);
 extern int r100_mc_wait_for_idle(struct radeon_device *rdev);
 
@@ -171,7 +172,8 @@
 				struct radeon_fence *fence);
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
-extern int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+				     uint64_t addr);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@@ -206,7 +208,8 @@
 extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@@ -229,7 +232,8 @@
 void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
@@ -241,9 +245,9 @@
 extern void rs600_pm_misc(struct radeon_device *rdev);
 extern void rs600_pm_prepare(struct radeon_device *rdev);
 extern void rs600_pm_finish(struct radeon_device *rdev);
-extern void rs600_pre_page_flip(struct radeon_device *rdev, int crtc);
-extern u32 rs600_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
-extern void rs600_post_page_flip(struct radeon_device *rdev, int crtc);
+extern void rs600_page_flip(struct radeon_device *rdev, int crtc,
+			    u64 crtc_base);
+extern bool rs600_page_flip_pending(struct radeon_device *rdev, int crtc);
 void rs600_set_safe_registers(struct radeon_device *rdev);
 extern void avivo_wait_for_vblank(struct radeon_device *rdev, int crtc);
 extern int rs600_mc_wait_for_idle(struct radeon_device *rdev);
@@ -387,6 +391,11 @@
 int r600_audio_init(struct radeon_device *rdev);
 struct r600_audio_pin r600_audio_status(struct radeon_device *rdev);
 void r600_audio_fini(struct radeon_device *rdev);
+void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock);
+void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, void *buffer,
+				    size_t size);
+void r600_hdmi_update_ACR(struct drm_encoder *encoder, uint32_t clock);
+void r600_hdmi_audio_workaround(struct drm_encoder *encoder);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 void r600_hdmi_enable(struct drm_encoder *encoder, bool enable);
@@ -447,7 +456,8 @@
 int rv770_suspend(struct radeon_device *rdev);
 int rv770_resume(struct radeon_device *rdev);
 void rv770_pm_misc(struct radeon_device *rdev);
-u32 rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
+void rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
+bool rv770_page_flip_pending(struct radeon_device *rdev, int crtc);
 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 void r700_cp_stop(struct radeon_device *rdev);
 void r700_cp_fini(struct radeon_device *rdev);
@@ -458,6 +468,8 @@
 u32 rv770_get_xclk(struct radeon_device *rdev);
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int rv770_get_temp(struct radeon_device *rdev);
+/* hdmi */
+void dce3_1_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
 /* rv7xx pm */
 int rv770_dpm_init(struct radeon_device *rdev);
 int rv770_dpm_enable(struct radeon_device *rdev);
@@ -513,9 +525,9 @@
 extern void btc_pm_init_profile(struct radeon_device *rdev);
 int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
-extern void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc);
-extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
-extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
+extern void evergreen_page_flip(struct radeon_device *rdev, int crtc,
+				u64 crtc_base);
+extern bool evergreen_page_flip_pending(struct radeon_device *rdev, int crtc);
 extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
 int evergreen_mc_wait_for_idle(struct radeon_device *rdev);

diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 9ab3097..6a03624 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c

@@ -626,7 +626,7 @@
 	    vhdr->DeviceID != rdev->pdev->device) {
 		DRM_INFO("ACPI VFCT table is not for this card\n");
 		goto out_unmap;
-	};
+	}
 
 	if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) + vhdr->ImageLength > tbl_size) {
 		DRM_ERROR("ACPI VFCT image truncated\n");

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index ea50e0a..933c5c3 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c

@@ -48,6 +48,7 @@
 	radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 
 	/* if the connector is already off, don't turn it back on */
+	/* FIXME: This access isn't protected by any locks. */
 	if (connector->dpms != DRM_MODE_DPMS_ON)
 		return;
 
@@ -100,6 +101,7 @@
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct radeon_connector_atom_dig *dig_connector;
 	int bpc = 8;
+	int mode_clock, max_tmds_clock;
 
 	switch (connector->connector_type) {
 	case DRM_MODE_CONNECTOR_DVII:
@@ -145,6 +147,61 @@
 		}
 		break;
 	}
+
+	if (drm_detect_hdmi_monitor(radeon_connector->edid)) {
+		/* hdmi deep color only implemented on DCE4+ */
+		if ((bpc > 8) && !ASIC_IS_DCE4(rdev)) {
+			DRM_DEBUG("%s: HDMI deep color %d bpc unsupported. Using 8 bpc.\n",
+					  connector->name, bpc);
+			bpc = 8;
+		}
+
+		/*
+		 * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make
+		 * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at
+		 * 12 bpc is always supported on hdmi deep color sinks, as this is
+		 * required by the HDMI-1.3 spec. Clamp to a safe 12 bpc maximum.
+		 */
+		if (bpc > 12) {
+			DRM_DEBUG("%s: HDMI deep color %d bpc unsupported. Using 12 bpc.\n",
+					  connector->name, bpc);
+			bpc = 12;
+		}
+
+		/* Any defined maximum tmds clock limit we must not exceed? */
+		if (connector->max_tmds_clock > 0) {
+			/* mode_clock is clock in kHz for mode to be modeset on this connector */
+			mode_clock = radeon_connector->pixelclock_for_modeset;
+
+			/* Maximum allowable input clock in kHz */
+			max_tmds_clock = connector->max_tmds_clock * 1000;
+
+			DRM_DEBUG("%s: hdmi mode dotclock %d kHz, max tmds input clock %d kHz.\n",
+					  connector->name, mode_clock, max_tmds_clock);
+
+			/* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+			if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+				if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
+					(mode_clock * 5/4 <= max_tmds_clock))
+					bpc = 10;
+				else
+					bpc = 8;
+
+				DRM_DEBUG("%s: HDMI deep color 12 bpc exceeds max tmds clock. Using %d bpc.\n",
+						  connector->name, bpc);
+			}
+
+			if ((bpc == 10) && (mode_clock * 5/4 > max_tmds_clock)) {
+				bpc = 8;
+				DRM_DEBUG("%s: HDMI deep color 10 bpc exceeds max tmds clock. Using %d bpc.\n",
+						  connector->name, bpc);
+			}
+		}
+	}
+
+	DRM_DEBUG("%s: Display bpc=%d, returned bpc=%d\n",
+			  connector->name, connector->display_info.bpc, bpc);
+
 	return bpc;
 }
 
@@ -260,13 +317,17 @@
 					continue;
 
 				if (priority == true) {
-					DRM_DEBUG_KMS("1: conflicting encoders switching off %s\n", drm_get_connector_name(conflict));
-					DRM_DEBUG_KMS("in favor of %s\n", drm_get_connector_name(connector));
+					DRM_DEBUG_KMS("1: conflicting encoders switching off %s\n",
+						      conflict->name);
+					DRM_DEBUG_KMS("in favor of %s\n",
+						      connector->name);
 					conflict->status = connector_status_disconnected;
 					radeon_connector_update_scratch_regs(conflict, connector_status_disconnected);
 				} else {
-					DRM_DEBUG_KMS("2: conflicting encoders switching off %s\n", drm_get_connector_name(connector));
-					DRM_DEBUG_KMS("in favor of %s\n", drm_get_connector_name(conflict));
+					DRM_DEBUG_KMS("2: conflicting encoders switching off %s\n",
+						      connector->name);
+					DRM_DEBUG_KMS("in favor of %s\n",
+						      conflict->name);
 					current_status = connector_status_disconnected;
 				}
 				break;
@@ -787,7 +848,7 @@
 
 		if (!radeon_connector->edid) {
 			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
-					drm_get_connector_name(connector));
+					connector->name);
 			ret = connector_status_connected;
 		} else {
 			radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
@@ -1010,12 +1071,13 @@
 
 		if (!radeon_connector->edid) {
 			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
-					drm_get_connector_name(connector));
+					connector->name);
 			/* rs690 seems to have a problem with connectors not existing and always
 			 * return a block of 0's. If we see this just stop polling on this output */
 			if ((rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) && radeon_connector->base.null_edid_counter) {
 				ret = connector_status_disconnected;
-				DRM_ERROR("%s: detected RS690 floating bus bug, stopping ddc detect\n", drm_get_connector_name(connector));
+				DRM_ERROR("%s: detected RS690 floating bus bug, stopping ddc detect\n",
+					  connector->name);
 				radeon_connector->ddc_bus = NULL;
 			} else {
 				ret = connector_status_connected;
@@ -1387,7 +1449,7 @@
 	struct radeon_device *rdev = dev->dev_private;
 
 	if (ASIC_IS_DCE5(rdev) &&
-	    (rdev->clock.dp_extclk >= 53900) &&
+	    (rdev->clock.default_dispclk >= 53900) &&
 	    radeon_connector_encoder_is_hbr2(connector)) {
 		return true;
 	}

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41ecf8a..71a1434 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c

@@ -140,10 +140,10 @@
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
 			/* TODO: is this still needed for NI+ ? */
-			p->relocs[i].domain =
+			p->relocs[i].prefered_domains =
 				RADEON_GEM_DOMAIN_VRAM;
 
-			p->relocs[i].alt_domain =
+			p->relocs[i].allowed_domains =
 				RADEON_GEM_DOMAIN_VRAM;
 
 			/* prioritize this over any other relocation */
@@ -158,10 +158,10 @@
 				return -EINVAL;
 			}
 
-			p->relocs[i].domain = domain;
+			p->relocs[i].prefered_domains = domain;
 			if (domain == RADEON_GEM_DOMAIN_VRAM)
 				domain |= RADEON_GEM_DOMAIN_GTT;
-			p->relocs[i].alt_domain = domain;
+			p->relocs[i].allowed_domains = domain;
 		}
 
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2cd144c..03686fa 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c

@@ -1052,6 +1052,43 @@
 		radeon_agpmode = 0;
 		break;
 	}
+
+	if (!radeon_check_pot_argument(radeon_vm_size)) {
+		dev_warn(rdev->dev, "VM size (%d) must be a power of 2\n",
+			 radeon_vm_size);
+		radeon_vm_size = 4096;
+	}
+
+	if (radeon_vm_size < 4) {
+		dev_warn(rdev->dev, "VM size (%d) to small, min is 4MB\n",
+			 radeon_vm_size);
+		radeon_vm_size = 4096;
+	}
+
+       /*
+        * Max GPUVM size for Cayman, SI and CI are 40 bits.
+        */
+	if (radeon_vm_size > 1024*1024) {
+		dev_warn(rdev->dev, "VM size (%d) to large, max is 1TB\n",
+			 radeon_vm_size);
+		radeon_vm_size = 4096;
+	}
+
+	/* defines number of bits in page table versus page directory,
+	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+	 * page table and the remaining bits are in the page directory */
+	if (radeon_vm_block_size < 9) {
+		dev_warn(rdev->dev, "VM page table size (%d) to small\n",
+			 radeon_vm_block_size);
+		radeon_vm_block_size = 9;
+	}
+
+	if (radeon_vm_block_size > 24 ||
+	    radeon_vm_size < (1ull << radeon_vm_block_size)) {
+		dev_warn(rdev->dev, "VM page table size (%d) to large\n",
+			 radeon_vm_block_size);
+		radeon_vm_block_size = 9;
+	}
 }
 
 /**
@@ -1126,12 +1163,13 @@
 static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = {
@@ -1196,17 +1234,16 @@
 	if (r)
 		return r;
 
+	radeon_check_arguments(rdev);
 	/* Adjust VM size here.
-	 * Currently set to 4GB ((1 << 20) 4k pages).
-	 * Max GPUVM size for cayman and SI is 40 bits.
+	 * Max GPUVM size for cayman+ is 40 bits.
 	 */
-	rdev->vm_manager.max_pfn = 1 << 20;
+	rdev->vm_manager.max_pfn = radeon_vm_size << 8;
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
 	if (r)
 		return r;
-	radeon_check_arguments(rdev);
 
 	/* all of the newer IGP chips have an internal gart
 	 * However some rs4xx report as AGP, so remove that here.

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 356b733..5ed6170 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c

@@ -249,16 +249,21 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
+	destroy_workqueue(radeon_crtc->flip_queue);
 	kfree(radeon_crtc);
 }
 
-/*
- * Handle unpin events outside the interrupt handler proper.
+/**
+ * radeon_unpin_work_func - unpin old buffer object
+ *
+ * @__work - kernel work item
+ *
+ * Unpin the old frame buffer object outside of the interrupt handler
  */
 static void radeon_unpin_work_func(struct work_struct *__work)
 {
-	struct radeon_unpin_work *work =
-		container_of(__work, struct radeon_unpin_work, work);
+	struct radeon_flip_work *work =
+		container_of(__work, struct radeon_flip_work, unpin_work);
 	int r;
 
 	/* unpin of the old buffer */
@@ -276,10 +281,10 @@
 	kfree(work);
 }
 
-void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
+void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
-	struct radeon_unpin_work *work;
+	struct radeon_flip_work *work;
 	unsigned long flags;
 	u32 update_pending;
 	int vpos, hpos;
@@ -289,24 +294,13 @@
 		return;
 
 	spin_lock_irqsave(&rdev->ddev->event_lock, flags);
-	work = radeon_crtc->unpin_work;
-	if (work == NULL ||
-	    (work->fence && !radeon_fence_signaled(work->fence))) {
+	work = radeon_crtc->flip_work;
+	if (work == NULL) {
 		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 		return;
 	}
-	/* New pageflip, or just completion of a previous one? */
-	if (!radeon_crtc->deferred_flip_completion) {
-		/* do the flip (mmio) */
-		update_pending = radeon_page_flip(rdev, crtc_id, work->new_crtc_base);
-	} else {
-		/* This is just a completion of a flip queued in crtc
-		 * at last invocation. Make sure we go directly to
-		 * completion routine.
-		 */
-		update_pending = 0;
-		radeon_crtc->deferred_flip_completion = 0;
-	}
+
+	update_pending = radeon_page_flip_pending(rdev, crtc_id);
 
 	/* Has the pageflip already completed in crtc, or is it certain
 	 * to complete in this vblank?
@@ -324,19 +318,38 @@
 		 */
 		update_pending = 0;
 	}
-	if (update_pending) {
-		/* crtc didn't flip in this target vblank interval,
-		 * but flip is pending in crtc. It will complete it
-		 * in next vblank interval, so complete the flip at
-		 * next vblank irq.
-		 */
-		radeon_crtc->deferred_flip_completion = 1;
+	spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
+	if (!update_pending)
+		radeon_crtc_handle_flip(rdev, crtc_id);
+}
+
+/**
+ * radeon_crtc_handle_flip - page flip completed
+ *
+ * @rdev: radeon device pointer
+ * @crtc_id: crtc number this event is for
+ *
+ * Called when we are sure that a page flip for this crtc is completed.
+ */
+void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
+{
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
+	struct radeon_flip_work *work;
+	unsigned long flags;
+
+	/* this can happen at init */
+	if (radeon_crtc == NULL)
+		return;
+
+	spin_lock_irqsave(&rdev->ddev->event_lock, flags);
+	work = radeon_crtc->flip_work;
+	if (work == NULL) {
 		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 		return;
 	}
 
-	/* Pageflip (will be) certainly completed in this vblank. Clean up. */
-	radeon_crtc->unpin_work = NULL;
+	/* Pageflip completed. Clean up. */
+	radeon_crtc->flip_work = NULL;
 
 	/* wakeup userspace */
 	if (work->event)
@@ -344,86 +357,71 @@
 
 	spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 
-	drm_vblank_put(rdev->ddev, radeon_crtc->crtc_id);
 	radeon_fence_unref(&work->fence);
-	radeon_post_page_flip(work->rdev, work->crtc_id);
-	schedule_work(&work->work);
+	radeon_irq_kms_pflip_irq_get(rdev, work->crtc_id);
+	queue_work(radeon_crtc->flip_queue, &work->unpin_work);
 }
 
-static int radeon_crtc_page_flip(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags)
+/**
+ * radeon_flip_work_func - page flip framebuffer
+ *
+ * @work - kernel work item
+ *
+ * Wait for the buffer object to become idle and do the actual page flip
+ */
+static void radeon_flip_work_func(struct work_struct *__work)
 {
-	struct drm_device *dev = crtc->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_framebuffer *old_radeon_fb;
-	struct radeon_framebuffer *new_radeon_fb;
-	struct drm_gem_object *obj;
-	struct radeon_bo *rbo;
-	struct radeon_unpin_work *work;
+	struct radeon_flip_work *work =
+		container_of(__work, struct radeon_flip_work, flip_work);
+	struct radeon_device *rdev = work->rdev;
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id];
+
+	struct drm_crtc *crtc = &radeon_crtc->base;
+	struct drm_framebuffer *fb = work->fb;
+
+	uint32_t tiling_flags, pitch_pixels;
+	uint64_t base;
+
 	unsigned long flags;
-	u32 tiling_flags, pitch_pixels;
-	u64 base;
 	int r;
 
-	work = kzalloc(sizeof *work, GFP_KERNEL);
-	if (work == NULL)
-		return -ENOMEM;
+        down_read(&rdev->exclusive_lock);
+	while (work->fence) {
+		r = radeon_fence_wait(work->fence, false);
+		if (r == -EDEADLK) {
+			up_read(&rdev->exclusive_lock);
+			r = radeon_gpu_reset(rdev);
+			down_read(&rdev->exclusive_lock);
+		}
 
-	work->event = event;
-	work->rdev = rdev;
-	work->crtc_id = radeon_crtc->crtc_id;
-	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-	new_radeon_fb = to_radeon_framebuffer(fb);
-	/* schedule unpin of the old buffer */
-	obj = old_radeon_fb->obj;
-	/* take a reference to the old object */
-	drm_gem_object_reference(obj);
-	rbo = gem_to_radeon_bo(obj);
-	work->old_rbo = rbo;
-	obj = new_radeon_fb->obj;
-	rbo = gem_to_radeon_bo(obj);
-
-	spin_lock(&rbo->tbo.bdev->fence_lock);
-	if (rbo->tbo.sync_obj)
-		work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
-	spin_unlock(&rbo->tbo.bdev->fence_lock);
-
-	INIT_WORK(&work->work, radeon_unpin_work_func);
-
-	/* We borrow the event spin lock for protecting unpin_work */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	if (radeon_crtc->unpin_work) {
-		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
-		r = -EBUSY;
-		goto unlock_free;
+		if (r) {
+			DRM_ERROR("failed to wait on page flip fence (%d)!\n",
+				  r);
+			goto cleanup;
+		} else
+			radeon_fence_unref(&work->fence);
 	}
-	radeon_crtc->unpin_work = work;
-	radeon_crtc->deferred_flip_completion = 0;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* pin the new buffer */
 	DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n",
-			 work->old_rbo, rbo);
+			 work->old_rbo, work->new_rbo);
 
-	r = radeon_bo_reserve(rbo, false);
+	r = radeon_bo_reserve(work->new_rbo, false);
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to reserve new rbo buffer before flip\n");
-		goto pflip_cleanup;
+		goto cleanup;
 	}
 	/* Only 27 bit offset for legacy CRTC */
-	r = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_pin_restricted(work->new_rbo, RADEON_GEM_DOMAIN_VRAM,
 				     ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, &base);
 	if (unlikely(r != 0)) {
-		radeon_bo_unreserve(rbo);
+		radeon_bo_unreserve(work->new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto pflip_cleanup;
+		goto cleanup;
 	}
-	radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL);
-	radeon_bo_unreserve(rbo);
+	radeon_bo_get_tiling_flags(work->new_rbo, &tiling_flags, NULL);
+	radeon_bo_unreserve(work->new_rbo);
 
 	if (!ASIC_IS_AVIVO(rdev)) {
 		/* crtc offset is from display base addr not FB location */
@@ -461,44 +459,91 @@
 		base &= ~7;
 	}
 
-	spin_lock_irqsave(&dev->event_lock, flags);
-	work->new_crtc_base = base;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
+	/* We borrow the event spin lock for protecting flip_work */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+	/* set the proper interrupt */
+	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
+
+	/* do the flip (mmio) */
+	radeon_page_flip(rdev, radeon_crtc->crtc_id, base);
+
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	up_read(&rdev->exclusive_lock);
+
+	return;
+
+cleanup:
+	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	radeon_fence_unref(&work->fence);
+	kfree(work);
+	up_read(&rdev->exclusive_lock);
+}
+
+static int radeon_crtc_page_flip(struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_framebuffer *old_radeon_fb;
+	struct radeon_framebuffer *new_radeon_fb;
+	struct drm_gem_object *obj;
+	struct radeon_flip_work *work;
+	unsigned long flags;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&work->flip_work, radeon_flip_work_func);
+	INIT_WORK(&work->unpin_work, radeon_unpin_work_func);
+
+	work->rdev = rdev;
+	work->crtc_id = radeon_crtc->crtc_id;
+	work->fb = fb;
+	work->event = event;
+
+	/* schedule unpin of the old buffer */
+	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	obj = old_radeon_fb->obj;
+
+	/* take a reference to the old object */
+	drm_gem_object_reference(obj);
+	work->old_rbo = gem_to_radeon_bo(obj);
+
+	new_radeon_fb = to_radeon_framebuffer(fb);
+	obj = new_radeon_fb->obj;
+	work->new_rbo = gem_to_radeon_bo(obj);
+
+	spin_lock(&work->new_rbo->tbo.bdev->fence_lock);
+	if (work->new_rbo->tbo.sync_obj)
+		work->fence = radeon_fence_ref(work->new_rbo->tbo.sync_obj);
+	spin_unlock(&work->new_rbo->tbo.bdev->fence_lock);
+
+	/* We borrow the event spin lock for protecting flip_work */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+	if (radeon_crtc->flip_work) {
+		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+		radeon_fence_unref(&work->fence);
+		kfree(work);
+		return -EBUSY;
+	}
+	radeon_crtc->flip_work = work;
 
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 
-	r = drm_vblank_get(dev, radeon_crtc->crtc_id);
-	if (r) {
-		DRM_ERROR("failed to get vblank before flip\n");
-		goto pflip_cleanup1;
-	}
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 
-	/* set the proper interrupt */
-	radeon_pre_page_flip(rdev, radeon_crtc->crtc_id);
+	queue_work(radeon_crtc->flip_queue, &work->flip_work);
 
 	return 0;
-
-pflip_cleanup1:
-	if (unlikely(radeon_bo_reserve(rbo, false) != 0)) {
-		DRM_ERROR("failed to reserve new rbo in error path\n");
-		goto pflip_cleanup;
-	}
-	if (unlikely(radeon_bo_unpin(rbo) != 0)) {
-		DRM_ERROR("failed to unpin new rbo in error path\n");
-	}
-	radeon_bo_unreserve(rbo);
-
-pflip_cleanup:
-	spin_lock_irqsave(&dev->event_lock, flags);
-	radeon_crtc->unpin_work = NULL;
-unlock_free:
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
-	radeon_fence_unref(&work->fence);
-	kfree(work);
-
-	return r;
 }
 
 static int
@@ -568,6 +613,7 @@
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
+	radeon_crtc->flip_queue = create_singlethread_workqueue("radeon-crtc");
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
@@ -661,7 +707,7 @@
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		radeon_connector = to_radeon_connector(connector);
 		DRM_INFO("Connector %d:\n", i);
-		DRM_INFO("  %s\n", drm_get_connector_name(connector));
+		DRM_INFO("  %s\n", connector->name);
 		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 			DRM_INFO("  %s\n", hpd_names[radeon_connector->hpd.hpd]);
 		if (radeon_connector->ddc_bus) {

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index c00a2f5..6e30174 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c

@@ -81,9 +81,10 @@
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	38
+#define KMS_DRIVER_MINOR	39
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -172,6 +173,8 @@
 int radeon_aspm = -1;
 int radeon_runtime_pm = -1;
 int radeon_hard_reset = 0;
+int radeon_vm_size = 4096;
+int radeon_vm_block_size = 9;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -239,6 +242,12 @@
 MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
 module_param_named(hard_reset, radeon_hard_reset, int, 0444);
 
+MODULE_PARM_DESC(vm_size, "VM address space size in megabytes (default 4GB)");
+module_param_named(vm_size, radeon_vm_size, int, 0444);
+
+MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default 9)");
+module_param_named(vm_block_size, radeon_vm_block_size, int, 0444);
+
 static struct pci_device_id pciidlist[] = {
 	radeon_PCI_IDS
 };
@@ -519,7 +528,6 @@
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
 	    DRIVER_PRIME | DRIVER_RENDER,
-	.dev_priv_size = 0,
 	.load = radeon_driver_load_kms,
 	.open = radeon_driver_open_kms,
 	.preclose = radeon_driver_preclose_kms,

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index a77b1c1..9137870 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c

@@ -819,15 +819,35 @@
 	return 0;
 }
 
+/**
+ * radeon_debugfs_gpu_reset - manually trigger a gpu reset
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	down_read(&rdev->exclusive_lock);
+	seq_printf(m, "%d\n", rdev->needs_reset);
+	rdev->needs_reset = true;
+	up_read(&rdev->exclusive_lock);
+
+	return 0;
+}
+
 static struct drm_info_list radeon_debugfs_fence_list[] = {
 	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
+	{"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
 };
 #endif
 
 int radeon_debugfs_fence_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
-	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
 #else
 	return 0;
 #endif

diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 7b94414..add6220 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c

@@ -94,6 +94,8 @@
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
 	uint32_t temp;
 
+	mutex_lock(&i2c->mutex);
+
 	/* RV410 appears to have a bug where the hw i2c in reset
 	 * holds the i2c port in a bad state - switch hw i2c away before
 	 * doing DDC - do this for all r200s/r300s/r400s for safety sake
@@ -170,6 +172,8 @@
 	temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
 	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
+
+	mutex_unlock(&i2c->mutex);
 }
 
 static int get_clock(void *i2c_priv)
@@ -813,6 +817,8 @@
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
 	int ret = 0;
 
+	mutex_lock(&i2c->mutex);
+
 	switch (rdev->family) {
 	case CHIP_R100:
 	case CHIP_RV100:
@@ -879,6 +885,8 @@
 		break;
 	}
 
+	mutex_unlock(&i2c->mutex);
+
 	return ret;
 }
 
@@ -919,6 +927,7 @@
 	i2c->adapter.dev.parent = &dev->pdev->dev;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
+	mutex_init(&i2c->mutex);
 	if (rec->mm_i2c ||
 	    (rec->hw_capable &&
 	     radeon_hw_i2c &&
@@ -979,7 +988,7 @@
 		return;
 	i2c_del_adapter(&i2c->adapter);
 	if (i2c->has_aux)
-		drm_dp_aux_unregister_i2c_bus(&i2c->aux);
+		drm_dp_aux_unregister(&i2c->aux);
 	kfree(i2c);
 }
 

diff --git a/drivers/gpu/drm/radeon/radeon_ioc32.c b/drivers/gpu/drm/radeon/radeon_ioc32.c
index bdb0f93..0b98ea1 100644
--- a/drivers/gpu/drm/radeon/radeon_ioc32.c
+++ b/drivers/gpu/drm/radeon/radeon_ioc32.c

@@ -399,7 +399,7 @@
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(radeon_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(radeon_compat_ioctls))
 		fn = radeon_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)

diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 089c9ff..16807af 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c

@@ -287,7 +287,7 @@
 	INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);
 
 	rdev->irq.installed = true;
-	r = drm_irq_install(rdev->ddev);
+	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
 	if (r) {
 		rdev->irq.installed = false;
 		flush_work(&rdev->hotplug_work);

diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba..35d9318 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c

@@ -513,6 +513,22 @@
 		value_size = sizeof(uint64_t);
 		value64 = atomic64_read(&rdev->gtt_usage);
 		break;
+	case RADEON_INFO_ACTIVE_CU_COUNT:
+		if (rdev->family >= CHIP_BONAIRE)
+			*value = rdev->config.cik.active_cus;
+		else if (rdev->family >= CHIP_TAHITI)
+			*value = rdev->config.si.active_cus;
+		else if (rdev->family >= CHIP_CAYMAN)
+			*value = rdev->config.cayman.active_simds;
+		else if (rdev->family >= CHIP_CEDAR)
+			*value = rdev->config.evergreen.active_simds;
+		else if (rdev->family >= CHIP_RV770)
+			*value = rdev->config.rv770.active_simds;
+		else if (rdev->family >= CHIP_R600)
+			*value = rdev->config.r600.active_simds;
+		else
+			*value = 1;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
@@ -859,4 +875,4 @@
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
-int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
+int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);

diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 6ddf31a..ad0e4b8 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h

@@ -191,6 +191,7 @@
 	struct radeon_i2c_bus_rec rec;
 	struct drm_dp_aux aux;
 	bool has_aux;
+	struct mutex mutex;
 };
 
 /* mostly for macs, but really any system without connector tables */
@@ -324,8 +325,8 @@
 	struct drm_display_mode native_mode;
 	int pll_id;
 	/* page flipping */
-	struct radeon_unpin_work *unpin_work;
-	int deferred_flip_completion;
+	struct workqueue_struct *flip_queue;
+	struct radeon_flip_work *flip_work;
 	/* pll sharing */
 	struct radeon_atom_ss ss;
 	bool ss_enabled;
@@ -505,6 +506,7 @@
 	struct radeon_i2c_chan *router_bus;
 	enum radeon_connector_audio audio;
 	enum radeon_connector_dither dither;
+	int pixelclock_for_modeset;
 };
 
 struct radeon_framebuffer {
@@ -906,6 +908,7 @@
 
 void radeon_fb_output_poll_changed(struct radeon_device *rdev);
 
+void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id);
 void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id);
 
 int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled);

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 4faa4d6..6c717b2 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c

@@ -446,7 +446,7 @@
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->robj;
 		if (!bo->pin_count) {
-			u32 domain = lobj->domain;
+			u32 domain = lobj->prefered_domains;
 			u32 current_domain =
 				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
 
@@ -458,7 +458,7 @@
 			 * into account. We don't want to disallow buffer moves
 			 * completely.
 			 */
-			if ((lobj->alt_domain & current_domain) != 0 &&
+			if ((lobj->allowed_domains & current_domain) != 0 &&
 			    (domain & current_domain) == 0 && /* will be moved */
 			    bytes_moved > bytes_moved_threshold) {
 				/* don't move it */
@@ -476,8 +476,9 @@
 				       initial_bytes_moved;
 
 			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
-					domain = lobj->alt_domain;
+				if (r != -ERESTARTSYS &&
+				    domain != lobj->allowed_domains) {
+					domain = lobj->allowed_domains;
 					goto retry;
 				}
 				ttm_eu_backoff_reservation(ticket, head);
@@ -730,7 +731,7 @@
 {
 	int r;
 
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
 	if (unlikely(r != 0))
 		return r;
 	spin_lock(&bo->tbo.bdev->fence_lock);

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 9e7b25a..5a873f3 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h

@@ -65,7 +65,7 @@
 {
 	int r;
 
-	r = ttm_bo_reserve(&bo->tbo, !no_intr, false, false, 0);
+	r = ttm_bo_reserve(&bo->tbo, !no_intr, false, false, NULL);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);

diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 2bdae61..12c663e 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c

@@ -984,6 +984,8 @@
 		if (enable) {
 			mutex_lock(&rdev->pm.mutex);
 			rdev->pm.dpm.uvd_active = true;
+			/* disable this for now */
+#if 0
 			if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
 			else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
@@ -993,6 +995,7 @@
 			else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
 			else
+#endif
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
 			rdev->pm.dpm.state = dpm_state;
 			mutex_unlock(&rdev->pm.mutex);

diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 956ab7f..23bb64f 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c

@@ -3054,7 +3054,7 @@
 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
 			value = 0;
 		else
-			value = drm_dev_to_irq(dev);
+			value = dev->pdev->irq;
 		break;
 	case RADEON_PARAM_GART_BASE:
 		value = dev_priv->gart_vm_start;
@@ -3258,4 +3258,4 @@
 	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
-int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
+int radeon_max_ioctl = ARRAY_SIZE(radeon_ioctls);

diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 1b65ae2..a4ad270 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c

@@ -812,7 +812,8 @@
 		    (rdev->pm.dpm.hd != hd)) {
 			rdev->pm.dpm.sd = sd;
 			rdev->pm.dpm.hd = hd;
-			streams_changed = true;
+			/* disable this for now */
+			/*streams_changed = true;*/
 		}
 	}
 

diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 3971d96..aa21c31 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c

@@ -66,6 +66,7 @@
 	case CHIP_BONAIRE:
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
+	case CHIP_HAWAII:
 	case CHIP_MULLINS:
 		fw_name = FIRMWARE_BONAIRE;
 		break;

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index c11b71d..899d912 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c

@@ -59,7 +59,7 @@
  */
 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
 {
-	return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
+	return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
 }
 
 /**
@@ -140,8 +140,8 @@
 	/* add the vm page table to the list */
 	list[0].gobj = NULL;
 	list[0].robj = vm->page_directory;
-	list[0].domain = RADEON_GEM_DOMAIN_VRAM;
-	list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
+	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
 	list[0].tiling_flags = 0;
 	list[0].handle = 0;
@@ -153,8 +153,8 @@
 
 		list[idx].gobj = NULL;
 		list[idx].robj = vm->page_tables[i].bo;
-		list[idx].domain = RADEON_GEM_DOMAIN_VRAM;
-		list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
+		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
 		list[idx].tiling_flags = 0;
 		list[idx].handle = 0;
@@ -474,8 +474,10 @@
 	bo_va->valid = false;
 	list_move(&bo_va->vm_list, head);
 
-	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+
+	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 
 	if (eoffset > vm->max_pde_used)
 		vm->max_pde_used = eoffset;
@@ -583,10 +585,9 @@
 int radeon_vm_update_page_directory(struct radeon_device *rdev,
 				    struct radeon_vm *vm)
 {
-	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
-
 	struct radeon_bo *pd = vm->page_directory;
 	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
+	uint32_t incr = RADEON_VM_PTE_COUNT * 8;
 	uint64_t last_pde = ~0, last_pt = ~0;
 	unsigned count = 0, pt_idx, ndw;
 	struct radeon_ib ib;
@@ -660,6 +661,84 @@
 }
 
 /**
+ * radeon_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB for the update
+ * @pe_start: first PTE to handle
+ * @pe_end: last PTE to handle
+ * @addr: addr those PTEs should point to
+ * @flags: hw mapping flags
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_frag_ptes(struct radeon_device *rdev,
+				struct radeon_ib *ib,
+				uint64_t pe_start, uint64_t pe_end,
+				uint64_t addr, uint32_t flags)
+{
+	/**
+	 * The MC L1 TLB supports variable sized pages, based on a fragment
+	 * field in the PTE. When this field is set to a non-zero value, page
+	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+	 * flags are considered valid for all PTEs within the fragment range
+	 * and corresponding mappings are assumed to be physically contiguous.
+	 *
+	 * The L1 TLB can store a single PTE for the whole fragment,
+	 * significantly increasing the space available for translation
+	 * caching. This leads to large improvements in throughput when the
+	 * TLB is under pressure.
+	 *
+	 * The L2 TLB distributes small and large fragments into two
+	 * asymmetric partitions. The large fragment cache is significantly
+	 * larger. Thus, we try to use large fragments wherever possible.
+	 * Userspace can support this by aligning virtual base address and
+	 * allocation size to the fragment size.
+	 */
+
+	/* NI is optimized for 256KB fragments, SI and newer for 64KB */
+	uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
+			R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
+	uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
+
+	uint64_t frag_start = ALIGN(pe_start, frag_align);
+	uint64_t frag_end = pe_end & ~(frag_align - 1);
+
+	unsigned count;
+
+	/* system pages are non continuously */
+	if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
+	    (frag_start >= frag_end)) {
+
+		count = (pe_end - pe_start) / 8;
+		radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
+					RADEON_GPU_PAGE_SIZE, flags);
+		return;
+	}
+
+	/* handle the 4K area at the beginning */
+	if (pe_start != frag_start) {
+		count = (frag_start - pe_start) / 8;
+		radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
+					RADEON_GPU_PAGE_SIZE, flags);
+		addr += RADEON_GPU_PAGE_SIZE * count;
+	}
+
+	/* handle the area in the middle */
+	count = (frag_end - frag_start) / 8;
+	radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count,
+				RADEON_GPU_PAGE_SIZE, flags | frag_flags);
+
+	/* handle the 4K area at the end */
+	if (frag_end != pe_end) {
+		addr += RADEON_GPU_PAGE_SIZE * count;
+		count = (pe_end - frag_end) / 8;
+		radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count,
+					RADEON_GPU_PAGE_SIZE, flags);
+	}
+}
+
+/**
  * radeon_vm_update_ptes - make sure that page tables are valid
  *
  * @rdev: radeon_device pointer
@@ -679,8 +758,7 @@
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint32_t flags)
 {
-	static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
-
+	uint64_t mask = RADEON_VM_PTE_COUNT - 1;
 	uint64_t last_pte = ~0, last_dst = ~0;
 	unsigned count = 0;
 	uint64_t addr;
@@ -690,7 +768,7 @@
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
+		uint64_t pt_idx = addr >> radeon_vm_block_size;
 		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
 		unsigned nptes;
 		uint64_t pte;
@@ -708,10 +786,9 @@
 		if ((last_pte + 8 * count) != pte) {
 
 			if (count) {
-				radeon_asic_vm_set_page(rdev, ib, last_pte,
-							last_dst, count,
-							RADEON_GPU_PAGE_SIZE,
-							flags);
+				radeon_vm_frag_ptes(rdev, ib, last_pte,
+						    last_pte + 8 * count,
+						    last_dst, flags);
 			}
 
 			count = nptes;
@@ -726,9 +803,9 @@
 	}
 
 	if (count) {
-		radeon_asic_vm_set_page(rdev, ib, last_pte,
-					last_dst, count,
-					RADEON_GPU_PAGE_SIZE, flags);
+		radeon_vm_frag_ptes(rdev, ib, last_pte,
+				    last_pte + 8 * count,
+				    last_dst, flags);
 	}
 }
 
@@ -796,13 +873,13 @@
 	/* padding, etc. */
 	ndw = 64;
 
-	if (RADEON_VM_BLOCK_SIZE > 11)
+	if (radeon_vm_block_size > 11)
 		/* reserve space for one header for every 2k dwords */
 		ndw += (nptes >> 11) * 4;
 	else
 		/* reserve space for one header for
 		    every (1 << BLOCK_SIZE) entries */
-		ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
+		ndw += (nptes >> radeon_vm_block_size) * 4;
 
 	/* reserve space for pte addresses */
 	ndw += nptes * 2;
@@ -892,6 +969,8 @@
  */
 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
+	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
+		RADEON_VM_PTE_COUNT * 8);
 	unsigned pd_size, pd_entries, pts_size;
 	int r;
 
@@ -913,7 +992,7 @@
 		return -ENOMEM;
 	}
 
-	r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false,
+	r = radeon_bo_create(rdev, pd_size, align, false,
 			     RADEON_GEM_DOMAIN_VRAM, NULL,
 			     &vm->page_directory);
 	if (r)

diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 130d5cc..a0f96de 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c

@@ -212,21 +212,16 @@
 #define RS400_PTE_WRITEABLE (1 << 2)
 #define RS400_PTE_READABLE  (1 << 3)
 
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
 	uint32_t entry;
 	u32 *gtt = rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
-
 	entry = (lower_32_bits(addr) & PAGE_MASK) |
 		((upper_32_bits(addr) & 0xff) << 4) |
 		RS400_PTE_WRITEABLE | RS400_PTE_READABLE;
 	entry = cpu_to_le32(entry);
 	gtt[i] = entry;
-	return 0;
 }
 
 int rs400_mc_wait_for_idle(struct radeon_device *rdev)

diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 72d3616..d1a35cb 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c

@@ -109,19 +109,7 @@
 	}
 }
 
-void rs600_pre_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* enable the pflip int */
-	radeon_irq_kms_pflip_irq_get(rdev, crtc);
-}
-
-void rs600_post_page_flip(struct radeon_device *rdev, int crtc)
-{
-	/* disable the pflip int */
-	radeon_irq_kms_pflip_irq_put(rdev, crtc);
-}
-
-u32 rs600_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
+void rs600_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 	u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
@@ -148,9 +136,15 @@
 	/* Unlock the lock, so double-buffering can take place inside vblank */
 	tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
 	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
+}
+
+bool rs600_page_flip_pending(struct radeon_device *rdev, int crtc_id)
+{
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 
 	/* Return current update_pending status: */
-	return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
+	return !!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) &
+		AVIVO_D1GRPH_SURFACE_UPDATE_PENDING);
 }
 
 void avivo_program_fmt(struct drm_encoder *encoder)
@@ -632,24 +626,16 @@
 	radeon_gart_table_vram_free(rdev);
 }
 
-#define R600_PTE_VALID     (1 << 0)
-#define R600_PTE_SYSTEM    (1 << 1)
-#define R600_PTE_SNOOPED   (1 << 2)
-#define R600_PTE_READABLE  (1 << 5)
-#define R600_PTE_WRITEABLE (1 << 6)
-
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
 	void __iomem *ptr = (void *)rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	addr = addr & 0xFFFFFFFFFFFFF000ULL;
-	addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
-	addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
+	if (addr == rdev->dummy_page.addr)
+		addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
+	else
+		addr |= R600_PTE_GART;
 	writeq(addr, ptr + (i * 8));
-	return 0;
 }
 
 int rs600_irq_set(struct radeon_device *rdev)
@@ -787,7 +773,7 @@
 				wake_up(&rdev->irq.vblank_queue);
 			}
 			if (atomic_read(&rdev->irq.pflip[0]))
-				radeon_crtc_handle_flip(rdev, 0);
+				radeon_crtc_handle_vblank(rdev, 0);
 		}
 		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) {
 			if (rdev->irq.crtc_vblank_int[1]) {
@@ -796,7 +782,7 @@
 				wake_up(&rdev->irq.vblank_queue);
 			}
 			if (atomic_read(&rdev->irq.pflip[1]))
-				radeon_crtc_handle_flip(rdev, 1);
+				radeon_crtc_handle_vblank(rdev, 1);
 		}
 		if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) {
 			queue_hotplug = true;

diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index fef3107..da8703d 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c

@@ -801,7 +801,7 @@
 	return reference_clock;
 }
 
-u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
+void rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 	u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
@@ -835,9 +835,15 @@
 	/* Unlock the lock, so double-buffering can take place inside vblank */
 	tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
 	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
+}
+
+bool rv770_page_flip_pending(struct radeon_device *rdev, int crtc_id)
+{
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 
 	/* Return current update_pending status: */
-	return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
+	return !!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) &
+		AVIVO_D1GRPH_SURFACE_UPDATE_PENDING);
 }
 
 /* get temperature in millidegrees */
@@ -1321,6 +1327,9 @@
 	if (tmp < rdev->config.rv770.max_simds) {
 		rdev->config.rv770.max_simds = tmp;
 	}
+	tmp = rdev->config.rv770.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
+	rdev->config.rv770.active_simds = tmp;
 
 	switch (rdev->config.rv770.max_tile_pipes) {
 	case 1:

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 22a63c9..730cee2 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c

@@ -71,6 +71,7 @@
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@
 	u32 sx_debug_1;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@
 		     rdev->config.si.max_sh_per_se,
 		     rdev->config.si.max_cu_per_sh);
 
+	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+				rdev->config.si.active_cus +=
+					hweight32(si_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
 
 	/* set HW defaults for 3D engine */
 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
@@ -3186,7 +3195,7 @@
 	/* EVENT_WRITE_EOP - flush caches, send int */
 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
-	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
 	radeon_ring_write(ring, fence->seq);
 	radeon_ring_write(ring, 0);
@@ -3219,7 +3228,7 @@
 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 			radeon_ring_write(ring, (1 << 8));
 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
 			radeon_ring_write(ring, next_rptr);
 		}
 
@@ -4044,18 +4053,21 @@
 	WREG32(MC_VM_MX_L1_TLB_CNTL,
 	       (0xA << 7) |
 	       ENABLE_L1_TLB |
+	       ENABLE_L1_FRAGMENT_PROCESSING |
 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
 	       ENABLE_ADVANCED_DRIVER_MODEL |
 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
 	/* Setup L2 cache */
 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+	       ENABLE_L2_FRAGMENT_PROCESSING |
 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
+	       BANK_SELECT(4) |
+	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
 	/* setup context0 */
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
@@ -4092,6 +4104,7 @@
 	       (u32)(rdev->dummy_page.addr >> 12));
 	WREG32(VM_CONTEXT1_CNTL2, 4);
 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
@@ -6151,7 +6164,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_flip(rdev, 0);
+						radeon_crtc_handle_vblank(rdev, 0);
 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -6177,7 +6190,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_flip(rdev, 1);
+						radeon_crtc_handle_vblank(rdev, 1);
 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}
@@ -6203,7 +6216,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_flip(rdev, 2);
+						radeon_crtc_handle_vblank(rdev, 2);
 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D3 vblank\n");
 				}
@@ -6229,7 +6242,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_flip(rdev, 3);
+						radeon_crtc_handle_vblank(rdev, 3);
 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D4 vblank\n");
 				}
@@ -6255,7 +6268,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_flip(rdev, 4);
+						radeon_crtc_handle_vblank(rdev, 4);
 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D5 vblank\n");
 				}
@@ -6281,7 +6294,7 @@
 						wake_up(&rdev->irq.vblank_queue);
 					}
 					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_flip(rdev, 5);
+						radeon_crtc_handle_vblank(rdev, 5);
 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D6 vblank\n");
 				}

diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index de0ca07..e24c94b 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c

@@ -79,7 +79,25 @@
 
 	trace_radeon_vm_set_page(pe, addr, count, incr, flags);
 
-	if (flags & R600_PTE_SYSTEM) {
+	if (flags == R600_PTE_GART) {
+		uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
+		while (count) {
+			unsigned bytes = count * 8;
+			if (bytes > 0xFFFF8)
+				bytes = 0xFFFF8;
+
+			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
+							      1, 0, 0, bytes);
+			ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+			ib->ptr[ib->length_dw++] = lower_32_bits(src);
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+			ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
+
+			pe += bytes;
+			src += bytes;
+			count -= bytes / 8;
+		}
+	} else if (flags & R600_PTE_SYSTEM) {
 		while (count) {
 			ndw = count * 2;
 			if (ndw > 0xFFFFE)
@@ -202,8 +220,8 @@
 			cur_size_in_bytes = 0xFFFFF;
 		size_in_bytes -= cur_size_in_bytes;
 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
-		radeon_ring_write(ring, dst_offset & 0xffffffff);
-		radeon_ring_write(ring, src_offset & 0xffffffff);
+		radeon_ring_write(ring, lower_32_bits(dst_offset));
+		radeon_ring_write(ring, lower_32_bits(src_offset));
 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
 		src_offset += cur_size_in_bytes;

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 9a3567b..5891886 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c

@@ -1948,6 +1948,10 @@
 			si_pi->cac_weights = cac_weights_cape_verde_pro;
 			si_pi->dte_data = dte_data_cape_verde;
 			break;
+		case 0x682C:
+			si_pi->cac_weights = cac_weights_cape_verde_pro;
+			si_pi->dte_data = dte_data_sun_xt;
+			break;
 		case 0x6825:
 		case 0x6827:
 			si_pi->cac_weights = cac_weights_heathrow;
@@ -1971,10 +1975,9 @@
 			si_pi->dte_data = dte_data_venus_xt;
 			break;
 		case 0x6823:
-			si_pi->cac_weights = cac_weights_chelsea_pro;
-			si_pi->dte_data = dte_data_venus_pro;
-			break;
 		case 0x682B:
+		case 0x6822:
+		case 0x682A:
 			si_pi->cac_weights = cac_weights_chelsea_pro;
 			si_pi->dte_data = dte_data_venus_pro;
 			break;
@@ -1988,6 +1991,7 @@
 		case 0x6601:
 		case 0x6621:
 		case 0x6603:
+		case 0x6605:
 			si_pi->cac_weights = cac_weights_mars_pro;
 			si_pi->lcac_config = lcac_mars_pro;
 			si_pi->cac_override = cac_override_oland;
@@ -1998,6 +2002,7 @@
 		case 0x6600:
 		case 0x6606:
 		case 0x6620:
+		case 0x6604:
 			si_pi->cac_weights = cac_weights_mars_xt;
 			si_pi->lcac_config = lcac_mars_pro;
 			si_pi->cac_override = cac_override_oland;
@@ -2006,6 +2011,8 @@
 			update_dte_from_pl2 = true;
 			break;
 		case 0x6611:
+		case 0x6613:
+		case 0x6608:
 			si_pi->cac_weights = cac_weights_oland_pro;
 			si_pi->lcac_config = lcac_mars_pro;
 			si_pi->cac_override = cac_override_oland;

diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 7321283..fd414d3 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h

@@ -362,6 +362,7 @@
 #define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
 #define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
 #define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
+#define		PAGE_TABLE_BLOCK_SIZE(x)			(((x) & 0xF) << 24)
 #define VM_CONTEXT1_CNTL				0x1414
 #define VM_CONTEXT0_CNTL2				0x1430
 #define VM_CONTEXT1_CNTL2				0x1434

diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c
index d177100..8bfdadd 100644
--- a/drivers/gpu/drm/radeon/uvd_v2_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c

@@ -45,7 +45,7 @@
 	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
 	radeon_ring_write(ring, fence->seq);
 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
 	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));

diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index d8e835a..2e3d7b5 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig

@@ -1,6 +1,7 @@
 config DRM_RCAR_DU
 	tristate "DRM Support for R-Car Display Unit"
 	depends on DRM && ARM
+	depends on ARCH_SHMOBILE || COMPILE_TEST
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
@@ -12,6 +13,7 @@
 config DRM_RCAR_LVDS
 	bool "R-Car DU LVDS Encoder Support"
 	depends on DRM_RCAR_DU
+	depends on ARCH_R8A7790 || ARCH_R8A7791 || COMPILE_TEST
 	help
 	  Enable support the R-Car Display Unit embedded LVDS encoders
 	  (currently only on R8A7790).

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index 4f3ba93..289048d 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c

@@ -57,15 +57,8 @@
 	return 1;
 }
 
-static int rcar_du_lvds_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = rcar_du_lvds_connector_get_modes,
-	.mode_valid = rcar_du_lvds_connector_mode_valid,
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
index 41d563a..ccfe64c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c

@@ -25,15 +25,8 @@
 	return 0;
 }
 
-static int rcar_du_vga_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = rcar_du_vga_connector_get_modes,
-	.mode_valid = rcar_du_vga_connector_mode_valid,
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 

diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index d2b2df9..c97cdc9 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c

@@ -1079,4 +1079,4 @@
 	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
 };
 
-int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
+int savage_max_ioctl = ARRAY_SIZE(savage_ioctls);

diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig
index 2ee44ca..a50fe0e 100644
--- a/drivers/gpu/drm/shmobile/Kconfig
+++ b/drivers/gpu/drm/shmobile/Kconfig

@@ -1,6 +1,7 @@
 config DRM_SHMOBILE
 	tristate "DRM Support for SH Mobile"
-	depends on DRM && (ARM || SUPERH)
+	depends on DRM && ARM
+	depends on ARCH_SHMOBILE || COMPILE_TEST
 	select BACKLIGHT_CLASS_DEVICE
 	select DRM_KMS_HELPER
 	select DRM_KMS_FB_HELPER

diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index e9e5e6d..faf176b 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c

@@ -674,12 +674,6 @@
 	return 1;
 }
 
-static int shmob_drm_connector_mode_valid(struct drm_connector *connector,
-					  struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *
 shmob_drm_connector_best_encoder(struct drm_connector *connector)
 {
@@ -690,7 +684,6 @@
 
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = shmob_drm_connector_get_modes,
-	.mode_valid = shmob_drm_connector_mode_valid,
 	.best_encoder = shmob_drm_connector_best_encoder,
 };
 

diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index c839c9c..82c84c7 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c

@@ -185,7 +185,7 @@
 		goto done;
 	}
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to install IRQ handler\n");
 		goto done;

diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 0573be0..77f288e 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c

@@ -359,4 +359,4 @@
 	DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 };
 
-int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls);
+int sis_max_ioctl = ARRAY_SIZE(sis_ioctls);

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index d43f21b..2c66a8d 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile

@@ -1,7 +1,6 @@
 ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 
 tegra-drm-y := \
-	bus.o \
 	drm.o \
 	gem.o \
 	fb.o \

diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c
deleted file mode 100644
index 71cef5c..0000000
--- a/drivers/gpu/drm/tegra/bus.c
+++ /dev/null

@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "drm.h"
-
-static int drm_host1x_set_busid(struct drm_device *dev,
-				struct drm_master *master)
-{
-	const char *device = dev_name(dev->dev);
-	const char *driver = dev->driver->name;
-	const char *bus = dev->dev->bus->name;
-	int length;
-
-	master->unique_len = strlen(bus) + 1 + strlen(device);
-	master->unique_size = master->unique_len;
-
-	master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL);
-	if (!master->unique)
-		return -ENOMEM;
-
-	snprintf(master->unique, master->unique_len + 1, "%s:%s", bus, device);
-
-	length = strlen(driver) + 1 + master->unique_len;
-
-	dev->devname = kmalloc(length + 1, GFP_KERNEL);
-	if (!dev->devname)
-		return -ENOMEM;
-
-	snprintf(dev->devname, length + 1, "%s@%s", driver, master->unique);
-
-	return 0;
-}
-
-static struct drm_bus drm_host1x_bus = {
-	.bus_type = DRIVER_BUS_HOST1X,
-	.set_busid = drm_host1x_set_busid,
-};
-
-int drm_host1x_init(struct drm_driver *driver, struct host1x_device *device)
-{
-	struct drm_device *drm;
-	int ret;
-
-	driver->bus = &drm_host1x_bus;
-
-	drm = drm_dev_alloc(driver, &device->dev);
-	if (!drm)
-		return -ENOMEM;
-
-	ret = drm_dev_register(drm, 0);
-	if (ret)
-		goto err_free;
-
-	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name,
-		 driver->major, driver->minor, driver->patchlevel,
-		 driver->date, drm->primary->index);
-
-	return 0;
-
-err_free:
-	drm_dev_unref(drm);
-	return ret;
-}
-
-void drm_host1x_exit(struct drm_driver *driver, struct host1x_device *device)
-{
-	struct tegra_drm *tegra = dev_get_drvdata(&device->dev);
-
-	drm_put_dev(tegra->drm);
-}

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index edb871d..ef40381 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c

@@ -17,6 +17,7 @@
 
 struct tegra_dc_soc_info {
 	bool supports_interlacing;
+	bool supports_cursor;
 };
 
 struct tegra_plane {
@@ -29,309 +30,70 @@
 	return container_of(plane, struct tegra_plane, base);
 }
 
-static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
-			      struct drm_framebuffer *fb, int crtc_x,
-			      int crtc_y, unsigned int crtc_w,
-			      unsigned int crtc_h, uint32_t src_x,
-			      uint32_t src_y, uint32_t src_w, uint32_t src_h)
+static unsigned int tegra_dc_format(uint32_t format, uint32_t *swap)
 {
-	struct tegra_plane *p = to_tegra_plane(plane);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct tegra_dc_window window;
-	unsigned int i;
+	/* assume no swapping of fetched data */
+	if (swap)
+		*swap = BYTE_SWAP_NOSWAP;
 
-	memset(&window, 0, sizeof(window));
-	window.src.x = src_x >> 16;
-	window.src.y = src_y >> 16;
-	window.src.w = src_w >> 16;
-	window.src.h = src_h >> 16;
-	window.dst.x = crtc_x;
-	window.dst.y = crtc_y;
-	window.dst.w = crtc_w;
-	window.dst.h = crtc_h;
-	window.format = tegra_dc_format(fb->pixel_format);
-	window.bits_per_pixel = fb->bits_per_pixel;
-	window.bottom_up = tegra_fb_is_bottom_up(fb);
-	window.tiled = tegra_fb_is_tiled(fb);
+	switch (format) {
+	case DRM_FORMAT_XBGR8888:
+		return WIN_COLOR_DEPTH_R8G8B8A8;
 
-	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
-		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
+	case DRM_FORMAT_XRGB8888:
+		return WIN_COLOR_DEPTH_B8G8R8A8;
 
-		window.base[i] = bo->paddr + fb->offsets[i];
+	case DRM_FORMAT_RGB565:
+		return WIN_COLOR_DEPTH_B5G6R5;
 
-		/*
-		 * Tegra doesn't support different strides for U and V planes
-		 * so we display a warning if the user tries to display a
-		 * framebuffer with such a configuration.
-		 */
-		if (i >= 2) {
-			if (fb->pitches[i] != window.stride[1])
-				DRM_ERROR("unsupported UV-plane configuration\n");
-		} else {
-			window.stride[i] = fb->pitches[i];
-		}
+	case DRM_FORMAT_UYVY:
+		return WIN_COLOR_DEPTH_YCbCr422;
+
+	case DRM_FORMAT_YUYV:
+		if (swap)
+			*swap = BYTE_SWAP_SWAP2;
+
+		return WIN_COLOR_DEPTH_YCbCr422;
+
+	case DRM_FORMAT_YUV420:
+		return WIN_COLOR_DEPTH_YCbCr420P;
+
+	case DRM_FORMAT_YUV422:
+		return WIN_COLOR_DEPTH_YCbCr422P;
+
+	default:
+		break;
 	}
 
-	return tegra_dc_setup_window(dc, p->index, &window);
+	WARN(1, "unsupported pixel format %u, using default\n", format);
+	return WIN_COLOR_DEPTH_B8G8R8A8;
 }
 
-static int tegra_plane_disable(struct drm_plane *plane)
+static bool tegra_dc_format_is_yuv(unsigned int format, bool *planar)
 {
-	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
-	struct tegra_plane *p = to_tegra_plane(plane);
-	unsigned long value;
+	switch (format) {
+	case WIN_COLOR_DEPTH_YCbCr422:
+	case WIN_COLOR_DEPTH_YUV422:
+		if (planar)
+			*planar = false;
 
-	if (!plane->crtc)
-		return 0;
+		return true;
 
-	value = WINDOW_A_SELECT << p->index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+	case WIN_COLOR_DEPTH_YCbCr420P:
+	case WIN_COLOR_DEPTH_YUV420P:
+	case WIN_COLOR_DEPTH_YCbCr422P:
+	case WIN_COLOR_DEPTH_YUV422P:
+	case WIN_COLOR_DEPTH_YCbCr422R:
+	case WIN_COLOR_DEPTH_YUV422R:
+	case WIN_COLOR_DEPTH_YCbCr422RA:
+	case WIN_COLOR_DEPTH_YUV422RA:
+		if (planar)
+			*planar = true;
 
-	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
-	value &= ~WIN_ENABLE;
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	tegra_dc_writel(dc, WIN_A_UPDATE << p->index, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, WIN_A_ACT_REQ << p->index, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-static void tegra_plane_destroy(struct drm_plane *plane)
-{
-	struct tegra_plane *p = to_tegra_plane(plane);
-
-	tegra_plane_disable(plane);
-	drm_plane_cleanup(plane);
-	kfree(p);
-}
-
-static const struct drm_plane_funcs tegra_plane_funcs = {
-	.update_plane = tegra_plane_update,
-	.disable_plane = tegra_plane_disable,
-	.destroy = tegra_plane_destroy,
-};
-
-static const uint32_t plane_formats[] = {
-	DRM_FORMAT_XBGR8888,
-	DRM_FORMAT_XRGB8888,
-	DRM_FORMAT_RGB565,
-	DRM_FORMAT_UYVY,
-	DRM_FORMAT_YUV420,
-	DRM_FORMAT_YUV422,
-};
-
-static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < 2; i++) {
-		struct tegra_plane *plane;
-
-		plane = kzalloc(sizeof(*plane), GFP_KERNEL);
-		if (!plane)
-			return -ENOMEM;
-
-		plane->index = 1 + i;
-
-		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
-				     &tegra_plane_funcs, plane_formats,
-				     ARRAY_SIZE(plane_formats), false);
-		if (err < 0) {
-			kfree(plane);
-			return err;
-		}
+		return true;
 	}
 
-	return 0;
-}
-
-static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y,
-			     struct drm_framebuffer *fb)
-{
-	unsigned int format = tegra_dc_format(fb->pixel_format);
-	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
-	unsigned int h_offset = 0, v_offset = 0;
-	unsigned long value;
-
-	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	value = fb->offsets[0] + y * fb->pitches[0] +
-		x * fb->bits_per_pixel / 8;
-
-	tegra_dc_writel(dc, bo->paddr + value, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, fb->pitches[0], DC_WIN_LINE_STRIDE);
-	tegra_dc_writel(dc, format, DC_WIN_COLOR_DEPTH);
-
-	if (tegra_fb_is_tiled(fb)) {
-		value = DC_WIN_BUFFER_ADDR_MODE_TILE_UV |
-			DC_WIN_BUFFER_ADDR_MODE_TILE;
-	} else {
-		value = DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV |
-			DC_WIN_BUFFER_ADDR_MODE_LINEAR;
-	}
-
-	tegra_dc_writel(dc, value, DC_WIN_BUFFER_ADDR_MODE);
-
-	/* make sure bottom-up buffers are properly displayed */
-	if (tegra_fb_is_bottom_up(fb)) {
-		value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
-		value |= INVERT_V;
-		tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-		v_offset += fb->height - 1;
-	} else {
-		value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
-		value &= ~INVERT_V;
-		tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-	}
-
-	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
-	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
-
-	value = GENERAL_UPDATE | WIN_A_UPDATE;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-void tegra_dc_enable_vblank(struct tegra_dc *dc)
-{
-	unsigned long value, flags;
-
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value |= VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	spin_unlock_irqrestore(&dc->lock, flags);
-}
-
-void tegra_dc_disable_vblank(struct tegra_dc *dc)
-{
-	unsigned long value, flags;
-
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value &= ~VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	spin_unlock_irqrestore(&dc->lock, flags);
-}
-
-static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
-{
-	struct drm_device *drm = dc->base.dev;
-	struct drm_crtc *crtc = &dc->base;
-	unsigned long flags, base;
-	struct tegra_bo *bo;
-
-	if (!dc->event)
-		return;
-
-	bo = tegra_fb_get_plane(crtc->primary->fb, 0);
-
-	/* check if new start address has been latched */
-	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
-	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
-
-	if (base == bo->paddr + crtc->primary->fb->offsets[0]) {
-		spin_lock_irqsave(&drm->event_lock, flags);
-		drm_send_vblank_event(drm, dc->pipe, dc->event);
-		drm_vblank_put(drm, dc->pipe);
-		dc->event = NULL;
-		spin_unlock_irqrestore(&drm->event_lock, flags);
-	}
-}
-
-void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&drm->event_lock, flags);
-
-	if (dc->event && dc->event->base.file_priv == file) {
-		dc->event->base.destroy(&dc->event->base);
-		drm_vblank_put(drm, dc->pipe);
-		dc->event = NULL;
-	}
-
-	spin_unlock_irqrestore(&drm->event_lock, flags);
-}
-
-static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			      struct drm_pending_vblank_event *event, uint32_t page_flip_flags)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-
-	if (dc->event)
-		return -EBUSY;
-
-	if (event) {
-		event->pipe = dc->pipe;
-		dc->event = event;
-		drm_vblank_get(drm, dc->pipe);
-	}
-
-	tegra_dc_set_base(dc, 0, 0, fb);
-	crtc->primary->fb = fb;
-
-	return 0;
-}
-
-static void drm_crtc_clear(struct drm_crtc *crtc)
-{
-	memset(crtc, 0, sizeof(*crtc));
-}
-
-static void tegra_dc_destroy(struct drm_crtc *crtc)
-{
-	drm_crtc_cleanup(crtc);
-	drm_crtc_clear(crtc);
-}
-
-static const struct drm_crtc_funcs tegra_crtc_funcs = {
-	.page_flip = tegra_dc_page_flip,
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = tegra_dc_destroy,
-};
-
-static void tegra_crtc_disable(struct drm_crtc *crtc)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-	struct drm_plane *plane;
-
-	drm_for_each_legacy_plane(plane, &drm->mode_config.plane_list) {
-		if (plane->crtc == crtc) {
-			tegra_plane_disable(plane);
-			plane->crtc = NULL;
-
-			if (plane->fb) {
-				drm_framebuffer_unreference(plane->fb);
-				plane->fb = NULL;
-			}
-		}
-	}
-
-	drm_vblank_off(drm, dc->pipe);
-}
-
-static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted)
-{
-	return true;
+	return false;
 }
 
 static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v,
@@ -374,103 +136,8 @@
 	return dfixed_frac(inf);
 }
 
-static int tegra_dc_set_timings(struct tegra_dc *dc,
-				struct drm_display_mode *mode)
-{
-	/* TODO: For HDMI compliance, h & v ref_to_sync should be set to 1 */
-	unsigned int h_ref_to_sync = 0;
-	unsigned int v_ref_to_sync = 0;
-	unsigned long value;
-
-	tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
-
-	value = (v_ref_to_sync << 16) | h_ref_to_sync;
-	tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
-
-	value = ((mode->vsync_end - mode->vsync_start) << 16) |
-		((mode->hsync_end - mode->hsync_start) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH);
-
-	value = ((mode->vtotal - mode->vsync_end) << 16) |
-		((mode->htotal - mode->hsync_end) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
-
-	value = ((mode->vsync_start - mode->vdisplay) << 16) |
-		((mode->hsync_start - mode->hdisplay) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH);
-
-	value = (mode->vdisplay << 16) | mode->hdisplay;
-	tegra_dc_writel(dc, value, DC_DISP_ACTIVE);
-
-	return 0;
-}
-
-static int tegra_crtc_setup_clk(struct drm_crtc *crtc,
-				struct drm_display_mode *mode,
-				unsigned long *div)
-{
-	unsigned long pclk = mode->clock * 1000, rate;
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct tegra_output *output = NULL;
-	struct drm_encoder *encoder;
-	long err;
-
-	list_for_each_entry(encoder, &crtc->dev->mode_config.encoder_list, head)
-		if (encoder->crtc == crtc) {
-			output = encoder_to_output(encoder);
-			break;
-		}
-
-	if (!output)
-		return -ENODEV;
-
-	/*
-	 * This assumes that the display controller will divide its parent
-	 * clock by 2 to generate the pixel clock.
-	 */
-	err = tegra_output_setup_clock(output, dc->clk, pclk * 2);
-	if (err < 0) {
-		dev_err(dc->dev, "failed to setup clock: %ld\n", err);
-		return err;
-	}
-
-	rate = clk_get_rate(dc->clk);
-	*div = (rate * 2 / pclk) - 2;
-
-	DRM_DEBUG_KMS("rate: %lu, div: %lu\n", rate, *div);
-
-	return 0;
-}
-
-static bool tegra_dc_format_is_yuv(unsigned int format, bool *planar)
-{
-	switch (format) {
-	case WIN_COLOR_DEPTH_YCbCr422:
-	case WIN_COLOR_DEPTH_YUV422:
-		if (planar)
-			*planar = false;
-
-		return true;
-
-	case WIN_COLOR_DEPTH_YCbCr420P:
-	case WIN_COLOR_DEPTH_YUV420P:
-	case WIN_COLOR_DEPTH_YCbCr422P:
-	case WIN_COLOR_DEPTH_YUV422P:
-	case WIN_COLOR_DEPTH_YCbCr422R:
-	case WIN_COLOR_DEPTH_YUV422R:
-	case WIN_COLOR_DEPTH_YCbCr422RA:
-	case WIN_COLOR_DEPTH_YUV422RA:
-		if (planar)
-			*planar = true;
-
-		return true;
-	}
-
-	return false;
-}
-
-int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
-			  const struct tegra_dc_window *window)
+static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+				 const struct tegra_dc_window *window)
 {
 	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
 	unsigned long value;
@@ -490,7 +157,7 @@
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
 	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
-	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
+	tegra_dc_writel(dc, window->swap, DC_WIN_BYTE_SWAP);
 
 	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
 	tegra_dc_writel(dc, value, DC_WIN_POSITION);
@@ -574,7 +241,7 @@
 	}
 
 	if (window->bottom_up)
-		value |= INVERT_V;
+		value |= V_DIRECTION;
 
 	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
 
@@ -611,33 +278,486 @@
 	return 0;
 }
 
-unsigned int tegra_dc_format(uint32_t format)
+static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
+			      struct drm_framebuffer *fb, int crtc_x,
+			      int crtc_y, unsigned int crtc_w,
+			      unsigned int crtc_h, uint32_t src_x,
+			      uint32_t src_y, uint32_t src_w, uint32_t src_h)
 {
-	switch (format) {
-	case DRM_FORMAT_XBGR8888:
-		return WIN_COLOR_DEPTH_R8G8B8A8;
+	struct tegra_plane *p = to_tegra_plane(plane);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_dc_window window;
+	unsigned int i;
 
-	case DRM_FORMAT_XRGB8888:
-		return WIN_COLOR_DEPTH_B8G8R8A8;
+	memset(&window, 0, sizeof(window));
+	window.src.x = src_x >> 16;
+	window.src.y = src_y >> 16;
+	window.src.w = src_w >> 16;
+	window.src.h = src_h >> 16;
+	window.dst.x = crtc_x;
+	window.dst.y = crtc_y;
+	window.dst.w = crtc_w;
+	window.dst.h = crtc_h;
+	window.format = tegra_dc_format(fb->pixel_format, &window.swap);
+	window.bits_per_pixel = fb->bits_per_pixel;
+	window.bottom_up = tegra_fb_is_bottom_up(fb);
+	window.tiled = tegra_fb_is_tiled(fb);
 
-	case DRM_FORMAT_RGB565:
-		return WIN_COLOR_DEPTH_B5G6R5;
+	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
+		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
 
-	case DRM_FORMAT_UYVY:
-		return WIN_COLOR_DEPTH_YCbCr422;
+		window.base[i] = bo->paddr + fb->offsets[i];
 
-	case DRM_FORMAT_YUV420:
-		return WIN_COLOR_DEPTH_YCbCr420P;
-
-	case DRM_FORMAT_YUV422:
-		return WIN_COLOR_DEPTH_YCbCr422P;
-
-	default:
-		break;
+		/*
+		 * Tegra doesn't support different strides for U and V planes
+		 * so we display a warning if the user tries to display a
+		 * framebuffer with such a configuration.
+		 */
+		if (i >= 2) {
+			if (fb->pitches[i] != window.stride[1])
+				DRM_ERROR("unsupported UV-plane configuration\n");
+		} else {
+			window.stride[i] = fb->pitches[i];
+		}
 	}
 
-	WARN(1, "unsupported pixel format %u, using default\n", format);
-	return WIN_COLOR_DEPTH_B8G8R8A8;
+	return tegra_dc_setup_window(dc, p->index, &window);
+}
+
+static int tegra_plane_disable(struct drm_plane *plane)
+{
+	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	unsigned long value;
+
+	if (!plane->crtc)
+		return 0;
+
+	value = WINDOW_A_SELECT << p->index;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+	value &= ~WIN_ENABLE;
+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+	tegra_dc_writel(dc, WIN_A_UPDATE << p->index, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, WIN_A_ACT_REQ << p->index, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+static void tegra_plane_destroy(struct drm_plane *plane)
+{
+	struct tegra_plane *p = to_tegra_plane(plane);
+
+	tegra_plane_disable(plane);
+	drm_plane_cleanup(plane);
+	kfree(p);
+}
+
+static const struct drm_plane_funcs tegra_plane_funcs = {
+	.update_plane = tegra_plane_update,
+	.disable_plane = tegra_plane_disable,
+	.destroy = tegra_plane_destroy,
+};
+
+static const uint32_t plane_formats[] = {
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YUV422,
+};
+
+static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < 2; i++) {
+		struct tegra_plane *plane;
+
+		plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+		if (!plane)
+			return -ENOMEM;
+
+		plane->index = 1 + i;
+
+		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
+				     &tegra_plane_funcs, plane_formats,
+				     ARRAY_SIZE(plane_formats), false);
+		if (err < 0) {
+			kfree(plane);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y,
+			     struct drm_framebuffer *fb)
+{
+	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
+	unsigned int h_offset = 0, v_offset = 0;
+	unsigned int format, swap;
+	unsigned long value;
+
+	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	value = fb->offsets[0] + y * fb->pitches[0] +
+		x * fb->bits_per_pixel / 8;
+
+	tegra_dc_writel(dc, bo->paddr + value, DC_WINBUF_START_ADDR);
+	tegra_dc_writel(dc, fb->pitches[0], DC_WIN_LINE_STRIDE);
+
+	format = tegra_dc_format(fb->pixel_format, &swap);
+	tegra_dc_writel(dc, format, DC_WIN_COLOR_DEPTH);
+	tegra_dc_writel(dc, swap, DC_WIN_BYTE_SWAP);
+
+	if (tegra_fb_is_tiled(fb)) {
+		value = DC_WIN_BUFFER_ADDR_MODE_TILE_UV |
+			DC_WIN_BUFFER_ADDR_MODE_TILE;
+	} else {
+		value = DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV |
+			DC_WIN_BUFFER_ADDR_MODE_LINEAR;
+	}
+
+	tegra_dc_writel(dc, value, DC_WIN_BUFFER_ADDR_MODE);
+
+	/* make sure bottom-up buffers are properly displayed */
+	if (tegra_fb_is_bottom_up(fb)) {
+		value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+		value |= V_DIRECTION;
+		tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+		v_offset += fb->height - 1;
+	} else {
+		value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+		value &= ~V_DIRECTION;
+		tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+	}
+
+	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
+	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
+
+	value = GENERAL_UPDATE | WIN_A_UPDATE;
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+
+	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+void tegra_dc_enable_vblank(struct tegra_dc *dc)
+{
+	unsigned long value, flags;
+
+	spin_lock_irqsave(&dc->lock, flags);
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
+	value |= VBLANK_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	spin_unlock_irqrestore(&dc->lock, flags);
+}
+
+void tegra_dc_disable_vblank(struct tegra_dc *dc)
+{
+	unsigned long value, flags;
+
+	spin_lock_irqsave(&dc->lock, flags);
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
+	value &= ~VBLANK_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	spin_unlock_irqrestore(&dc->lock, flags);
+}
+
+static int tegra_dc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file,
+				uint32_t handle, uint32_t width,
+				uint32_t height, int32_t hot_x, int32_t hot_y)
+{
+	unsigned long value = CURSOR_CLIP_DISPLAY;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo = NULL;
+
+	if (!dc->soc->supports_cursor)
+		return -ENXIO;
+
+	if (width != height)
+		return -EINVAL;
+
+	switch (width) {
+	case 32:
+		value |= CURSOR_SIZE_32x32;
+		break;
+
+	case 64:
+		value |= CURSOR_SIZE_64x64;
+		break;
+
+	case 128:
+		value |= CURSOR_SIZE_128x128;
+
+	case 256:
+		value |= CURSOR_SIZE_256x256;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (handle) {
+		gem = drm_gem_object_lookup(crtc->dev, file, handle);
+		if (!gem)
+			return -ENOENT;
+
+		bo = to_tegra_bo(gem);
+	}
+
+	if (bo) {
+		unsigned long addr = (bo->paddr & 0xfffffc00) >> 10;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		unsigned long high = (bo->paddr & 0xfffffffc) >> 32;
+#endif
+
+		tegra_dc_writel(dc, value | addr, DC_DISP_CURSOR_START_ADDR);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		tegra_dc_writel(dc, high, DC_DISP_CURSOR_START_ADDR_HI);
+#endif
+
+		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+		value |= CURSOR_ENABLE;
+		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+
+		value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL);
+		value &= ~CURSOR_DST_BLEND_MASK;
+		value &= ~CURSOR_SRC_BLEND_MASK;
+		value |= CURSOR_MODE_NORMAL;
+		value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC;
+		value |= CURSOR_SRC_BLEND_K1_TIMES_SRC;
+		value |= CURSOR_ALPHA;
+		tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL);
+	} else {
+		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+		value &= ~CURSOR_ENABLE;
+		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+	}
+
+	tegra_dc_writel(dc, CURSOR_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, CURSOR_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+static int tegra_dc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	unsigned long value;
+
+	if (!dc->soc->supports_cursor)
+		return -ENXIO;
+
+	value = ((y & 0x3fff) << 16) | (x & 0x3fff);
+	tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
+
+	tegra_dc_writel(dc, CURSOR_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, CURSOR_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	/* XXX: only required on generations earlier than Tegra124? */
+	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
+{
+	struct drm_device *drm = dc->base.dev;
+	struct drm_crtc *crtc = &dc->base;
+	unsigned long flags, base;
+	struct tegra_bo *bo;
+
+	if (!dc->event)
+		return;
+
+	bo = tegra_fb_get_plane(crtc->primary->fb, 0);
+
+	/* check if new start address has been latched */
+	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
+	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
+	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
+
+	if (base == bo->paddr + crtc->primary->fb->offsets[0]) {
+		spin_lock_irqsave(&drm->event_lock, flags);
+		drm_send_vblank_event(drm, dc->pipe, dc->event);
+		drm_vblank_put(drm, dc->pipe);
+		dc->event = NULL;
+		spin_unlock_irqrestore(&drm->event_lock, flags);
+	}
+}
+
+void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct drm_device *drm = crtc->dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&drm->event_lock, flags);
+
+	if (dc->event && dc->event->base.file_priv == file) {
+		dc->event->base.destroy(&dc->event->base);
+		drm_vblank_put(drm, dc->pipe);
+		dc->event = NULL;
+	}
+
+	spin_unlock_irqrestore(&drm->event_lock, flags);
+}
+
+static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			      struct drm_pending_vblank_event *event, uint32_t page_flip_flags)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct drm_device *drm = crtc->dev;
+
+	if (dc->event)
+		return -EBUSY;
+
+	if (event) {
+		event->pipe = dc->pipe;
+		dc->event = event;
+		drm_vblank_get(drm, dc->pipe);
+	}
+
+	tegra_dc_set_base(dc, 0, 0, fb);
+	crtc->primary->fb = fb;
+
+	return 0;
+}
+
+static void drm_crtc_clear(struct drm_crtc *crtc)
+{
+	memset(crtc, 0, sizeof(*crtc));
+}
+
+static void tegra_dc_destroy(struct drm_crtc *crtc)
+{
+	drm_crtc_cleanup(crtc);
+	drm_crtc_clear(crtc);
+}
+
+static const struct drm_crtc_funcs tegra_crtc_funcs = {
+	.cursor_set2 = tegra_dc_cursor_set2,
+	.cursor_move = tegra_dc_cursor_move,
+	.page_flip = tegra_dc_page_flip,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = tegra_dc_destroy,
+};
+
+static void tegra_crtc_disable(struct drm_crtc *crtc)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct drm_device *drm = crtc->dev;
+	struct drm_plane *plane;
+
+	drm_for_each_legacy_plane(plane, &drm->mode_config.plane_list) {
+		if (plane->crtc == crtc) {
+			tegra_plane_disable(plane);
+			plane->crtc = NULL;
+
+			if (plane->fb) {
+				drm_framebuffer_unreference(plane->fb);
+				plane->fb = NULL;
+			}
+		}
+	}
+
+	drm_vblank_off(drm, dc->pipe);
+}
+
+static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
+				  const struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted)
+{
+	return true;
+}
+
+static int tegra_dc_set_timings(struct tegra_dc *dc,
+				struct drm_display_mode *mode)
+{
+	unsigned int h_ref_to_sync = 1;
+	unsigned int v_ref_to_sync = 1;
+	unsigned long value;
+
+	tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
+
+	value = (v_ref_to_sync << 16) | h_ref_to_sync;
+	tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
+
+	value = ((mode->vsync_end - mode->vsync_start) << 16) |
+		((mode->hsync_end - mode->hsync_start) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH);
+
+	value = ((mode->vtotal - mode->vsync_end) << 16) |
+		((mode->htotal - mode->hsync_end) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
+
+	value = ((mode->vsync_start - mode->vdisplay) << 16) |
+		((mode->hsync_start - mode->hdisplay) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH);
+
+	value = (mode->vdisplay << 16) | mode->hdisplay;
+	tegra_dc_writel(dc, value, DC_DISP_ACTIVE);
+
+	return 0;
+}
+
+static int tegra_crtc_setup_clk(struct drm_crtc *crtc,
+				struct drm_display_mode *mode)
+{
+	unsigned long pclk = mode->clock * 1000;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_output *output = NULL;
+	struct drm_encoder *encoder;
+	unsigned int div;
+	u32 value;
+	long err;
+
+	list_for_each_entry(encoder, &crtc->dev->mode_config.encoder_list, head)
+		if (encoder->crtc == crtc) {
+			output = encoder_to_output(encoder);
+			break;
+		}
+
+	if (!output)
+		return -ENODEV;
+
+	/*
+	 * This assumes that the parent clock is pll_d_out0 or pll_d2_out
+	 * respectively, each of which divides the base pll_d by 2.
+	 */
+	err = tegra_output_setup_clock(output, dc->clk, pclk, &div);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to setup clock: %ld\n", err);
+		return err;
+	}
+
+	DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk), div);
+
+	value = SHIFT_CLK_DIVIDER(div) | PIXEL_CLK_DIVIDER_PCD1;
+	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+
+	return 0;
 }
 
 static int tegra_crtc_mode_set(struct drm_crtc *crtc,
@@ -648,12 +768,12 @@
 	struct tegra_bo *bo = tegra_fb_get_plane(crtc->primary->fb, 0);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	struct tegra_dc_window window;
-	unsigned long div, value;
+	u32 value;
 	int err;
 
 	drm_vblank_pre_modeset(crtc->dev, dc->pipe);
 
-	err = tegra_crtc_setup_clk(crtc, mode, &div);
+	err = tegra_crtc_setup_clk(crtc, mode);
 	if (err) {
 		dev_err(dc->dev, "failed to setup clock for CRTC: %d\n", err);
 		return err;
@@ -669,9 +789,6 @@
 		tegra_dc_writel(dc, value, DC_DISP_INTERLACE_CONTROL);
 	}
 
-	value = SHIFT_CLK_DIVIDER(div) | PIXEL_CLK_DIVIDER_PCD1;
-	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
-
 	/* setup window parameters */
 	memset(&window, 0, sizeof(window));
 	window.src.x = 0;
@@ -682,7 +799,8 @@
 	window.dst.y = 0;
 	window.dst.w = mode->hdisplay;
 	window.dst.h = mode->vdisplay;
-	window.format = tegra_dc_format(crtc->primary->fb->pixel_format);
+	window.format = tegra_dc_format(crtc->primary->fb->pixel_format,
+					&window.swap);
 	window.bits_per_pixel = crtc->primary->fb->bits_per_pixel;
 	window.stride[0] = crtc->primary->fb->pitches[0];
 	window.base[0] = bo->paddr;
@@ -728,10 +846,6 @@
 		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
 	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
 
-	value = PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
-		PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
-
 	/* initialize timer */
 	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
 		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
@@ -991,6 +1105,8 @@
 	DUMP_REG(DC_DISP_SD_BL_CONTROL);
 	DUMP_REG(DC_DISP_SD_HW_K_VALUES);
 	DUMP_REG(DC_DISP_SD_MAN_K_VALUES);
+	DUMP_REG(DC_DISP_CURSOR_START_ADDR_HI);
+	DUMP_REG(DC_DISP_BLEND_CURSOR_CONTROL);
 	DUMP_REG(DC_WIN_WIN_OPTIONS);
 	DUMP_REG(DC_WIN_BYTE_SWAP);
 	DUMP_REG(DC_WIN_BUFFER_CONTROL);
@@ -1096,26 +1212,26 @@
 
 static int tegra_dc_init(struct host1x_client *client)
 {
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	int err;
 
-	drm_crtc_init(tegra->drm, &dc->base, &tegra_crtc_funcs);
+	drm_crtc_init(drm, &dc->base, &tegra_crtc_funcs);
 	drm_mode_crtc_set_gamma_size(&dc->base, 256);
 	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
 
-	err = tegra_dc_rgb_init(tegra->drm, dc);
+	err = tegra_dc_rgb_init(drm, dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
 		return err;
 	}
 
-	err = tegra_dc_add_planes(tegra->drm, dc);
+	err = tegra_dc_add_planes(drm, dc);
 	if (err < 0)
 		return err;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_init(dc, tegra->drm->primary);
+		err = tegra_dc_debugfs_init(dc, drm->primary);
 		if (err < 0)
 			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
 	}
@@ -1160,14 +1276,17 @@
 
 static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.supports_interlacing = false,
+	.supports_cursor = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.supports_interlacing = false,
+	.supports_cursor = false,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.supports_interlacing = true,
+	.supports_cursor = true,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {

diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index c941014..78c5fef 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h

@@ -67,10 +67,12 @@
 #define WIN_A_ACT_REQ   (1 <<  1)
 #define WIN_B_ACT_REQ   (1 <<  2)
 #define WIN_C_ACT_REQ   (1 <<  3)
+#define CURSOR_ACT_REQ  (1 <<  7)
 #define GENERAL_UPDATE  (1 <<  8)
 #define WIN_A_UPDATE    (1 <<  9)
 #define WIN_B_UPDATE    (1 << 10)
 #define WIN_C_UPDATE    (1 << 11)
+#define CURSOR_UPDATE   (1 << 15)
 #define NC_HOST_TRIG    (1 << 24)
 
 #define DC_CMD_DISPLAY_WINDOW_HEADER		0x042
@@ -116,9 +118,10 @@
 #define DC_DISP_DISP_SIGNAL_OPTIONS1		0x401
 
 #define DC_DISP_DISP_WIN_OPTIONS		0x402
-#define HDMI_ENABLE (1 << 30)
-#define DSI_ENABLE  (1 << 29)
-#define SOR_ENABLE  (1 << 25)
+#define HDMI_ENABLE	(1 << 30)
+#define DSI_ENABLE	(1 << 29)
+#define SOR_ENABLE	(1 << 25)
+#define CURSOR_ENABLE	(1 << 16)
 
 #define DC_DISP_DISP_MEM_HIGH_PRIORITY		0x403
 #define CURSOR_THRESHOLD(x)   (((x) & 0x03) << 24)
@@ -266,6 +269,14 @@
 #define DC_DISP_CURSOR_BACKGROUND		0x43d
 
 #define DC_DISP_CURSOR_START_ADDR		0x43e
+#define CURSOR_CLIP_DISPLAY	(0 << 28)
+#define CURSOR_CLIP_WIN_A	(1 << 28)
+#define CURSOR_CLIP_WIN_B	(2 << 28)
+#define CURSOR_CLIP_WIN_C	(3 << 28)
+#define CURSOR_SIZE_32x32	(0 << 24)
+#define CURSOR_SIZE_64x64	(1 << 24)
+#define CURSOR_SIZE_128x128	(2 << 24)
+#define CURSOR_SIZE_256x256	(3 << 24)
 #define DC_DISP_CURSOR_START_ADDR_NS		0x43f
 
 #define DC_DISP_CURSOR_POSITION			0x440
@@ -302,6 +313,19 @@
 #define  INTERLACE_START  (1 << 1)
 #define  INTERLACE_ENABLE (1 << 0)
 
+#define DC_DISP_CURSOR_START_ADDR_HI		0x4ec
+#define DC_DISP_BLEND_CURSOR_CONTROL		0x4f1
+#define CURSOR_MODE_LEGACY			(0 << 24)
+#define CURSOR_MODE_NORMAL			(1 << 24)
+#define CURSOR_DST_BLEND_ZERO			(0 << 16)
+#define CURSOR_DST_BLEND_K1			(1 << 16)
+#define CURSOR_DST_BLEND_NEG_K1_TIMES_SRC	(2 << 16)
+#define CURSOR_DST_BLEND_MASK			(3 << 16)
+#define CURSOR_SRC_BLEND_K1			(0 << 8)
+#define CURSOR_SRC_BLEND_K1_TIMES_SRC		(1 << 8)
+#define CURSOR_SRC_BLEND_MASK			(3 << 8)
+#define CURSOR_ALPHA				0xff
+
 #define DC_WIN_CSC_YOF				0x611
 #define DC_WIN_CSC_KYRGB			0x612
 #define DC_WIN_CSC_KUR				0x613
@@ -312,7 +336,8 @@
 #define DC_WIN_CSC_KVB				0x618
 
 #define DC_WIN_WIN_OPTIONS			0x700
-#define INVERT_V     (1 <<  2)
+#define H_DIRECTION  (1 <<  0)
+#define V_DIRECTION  (1 <<  2)
 #define COLOR_EXPAND (1 <<  6)
 #define CSC_ENABLE   (1 << 18)
 #define WIN_ENABLE   (1 << 30)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 005c19b..3f132e3 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c

@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
 
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_panel.h>
@@ -41,6 +42,7 @@
 	struct regulator *vdd;
 
 	struct completion complete;
+	struct work_struct work;
 	struct list_head list;
 };
 
@@ -49,6 +51,11 @@
 	return container_of(aux, struct tegra_dpaux, aux);
 }
 
+static inline struct tegra_dpaux *work_to_dpaux(struct work_struct *work)
+{
+	return container_of(work, struct tegra_dpaux, work);
+}
+
 static inline unsigned long tegra_dpaux_readl(struct tegra_dpaux *dpaux,
 					      unsigned long offset)
 {
@@ -231,6 +238,14 @@
 	return ret;
 }
 
+static void tegra_dpaux_hotplug(struct work_struct *work)
+{
+	struct tegra_dpaux *dpaux = work_to_dpaux(work);
+
+	if (dpaux->output)
+		drm_helper_hpd_irq_event(dpaux->output->connector.dev);
+}
+
 static irqreturn_t tegra_dpaux_irq(int irq, void *data)
 {
 	struct tegra_dpaux *dpaux = data;
@@ -241,16 +256,8 @@
 	value = tegra_dpaux_readl(dpaux, DPAUX_INTR_AUX);
 	tegra_dpaux_writel(dpaux, value, DPAUX_INTR_AUX);
 
-	if (value & DPAUX_INTR_PLUG_EVENT) {
-		if (dpaux->output) {
-			drm_helper_hpd_irq_event(dpaux->output->connector.dev);
-		}
-	}
-
-	if (value & DPAUX_INTR_UNPLUG_EVENT) {
-		if (dpaux->output)
-			drm_helper_hpd_irq_event(dpaux->output->connector.dev);
-	}
+	if (value & (DPAUX_INTR_PLUG_EVENT | DPAUX_INTR_UNPLUG_EVENT))
+		schedule_work(&dpaux->work);
 
 	if (value & DPAUX_INTR_IRQ_EVENT) {
 		/* TODO: handle this */
@@ -273,6 +280,7 @@
 	if (!dpaux)
 		return -ENOMEM;
 
+	INIT_WORK(&dpaux->work, tegra_dpaux_hotplug);
 	init_completion(&dpaux->complete);
 	INIT_LIST_HEAD(&dpaux->list);
 	dpaux->dev = &pdev->dev;
@@ -332,7 +340,7 @@
 	dpaux->aux.transfer = tegra_dpaux_transfer;
 	dpaux->aux.dev = &pdev->dev;
 
-	err = drm_dp_aux_register_i2c_bus(&dpaux->aux);
+	err = drm_dp_aux_register(&dpaux->aux);
 	if (err < 0)
 		return err;
 
@@ -355,12 +363,14 @@
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
 
-	drm_dp_aux_unregister_i2c_bus(&dpaux->aux);
+	drm_dp_aux_unregister(&dpaux->aux);
 
 	mutex_lock(&dpaux_lock);
 	list_del(&dpaux->list);
 	mutex_unlock(&dpaux_lock);
 
+	cancel_work_sync(&dpaux->work);
+
 	clk_disable_unprepare(dpaux->clk_parent);
 	reset_control_assert(dpaux->rst);
 	clk_disable_unprepare(dpaux->clk);
@@ -404,6 +414,7 @@
 	unsigned long timeout;
 	int err;
 
+	output->connector.polled = DRM_CONNECTOR_POLL_HPD;
 	dpaux->output = output;
 
 	err = regulator_enable(dpaux->vdd);

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6f5b6e2..3396f9f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c

@@ -33,7 +33,6 @@
 	if (!tegra)
 		return -ENOMEM;
 
-	dev_set_drvdata(drm->dev, tegra);
 	mutex_init(&tegra->clients_lock);
 	INIT_LIST_HEAD(&tegra->clients);
 	drm->dev_private = tegra;
@@ -640,14 +639,40 @@
 	return 0;
 }
 
-static int host1x_drm_probe(struct host1x_device *device)
+static int host1x_drm_probe(struct host1x_device *dev)
 {
-	return drm_host1x_init(&tegra_drm_driver, device);
+	struct drm_driver *driver = &tegra_drm_driver;
+	struct drm_device *drm;
+	int err;
+
+	drm = drm_dev_alloc(driver, &dev->dev);
+	if (!drm)
+		return -ENOMEM;
+
+	drm_dev_set_unique(drm, dev_name(&dev->dev));
+	dev_set_drvdata(&dev->dev, drm);
+
+	err = drm_dev_register(drm, 0);
+	if (err < 0)
+		goto unref;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name,
+		 driver->major, driver->minor, driver->patchlevel,
+		 driver->date, drm->primary->index);
+
+	return 0;
+
+unref:
+	drm_dev_unref(drm);
+	return err;
 }
 
-static int host1x_drm_remove(struct host1x_device *device)
+static int host1x_drm_remove(struct host1x_device *dev)
 {
-	drm_host1x_exit(&tegra_drm_driver, device);
+	struct drm_device *drm = dev_get_drvdata(&dev->dev);
+
+	drm_dev_unregister(drm);
+	drm_dev_unref(drm);
 
 	return 0;
 }
@@ -666,6 +691,7 @@
 	{ .compatible = "nvidia,tegra114-gr3d", },
 	{ .compatible = "nvidia,tegra124-dc", },
 	{ .compatible = "nvidia,tegra124-sor", },
+	{ .compatible = "nvidia,tegra124-hdmi", },
 	{ /* sentinel */ }
 };
 

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 126332c..6b8fe9d 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h

@@ -80,13 +80,13 @@
 	return container_of(client, struct tegra_drm_client, base);
 }
 
-extern int tegra_drm_register_client(struct tegra_drm *tegra,
-				     struct tegra_drm_client *client);
-extern int tegra_drm_unregister_client(struct tegra_drm *tegra,
-				       struct tegra_drm_client *client);
+int tegra_drm_register_client(struct tegra_drm *tegra,
+			      struct tegra_drm_client *client);
+int tegra_drm_unregister_client(struct tegra_drm *tegra,
+				struct tegra_drm_client *client);
 
-extern int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
-extern int tegra_drm_exit(struct tegra_drm *tegra);
+int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
+int tegra_drm_exit(struct tegra_drm *tegra);
 
 struct tegra_dc_soc_info;
 struct tegra_output;
@@ -156,6 +156,7 @@
 	} dst;
 	unsigned int bits_per_pixel;
 	unsigned int format;
+	unsigned int swap;
 	unsigned int stride[2];
 	unsigned long base[3];
 	bool bottom_up;
@@ -163,19 +164,15 @@
 };
 
 /* from dc.c */
-extern unsigned int tegra_dc_format(uint32_t format);
-extern int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
-				 const struct tegra_dc_window *window);
-extern void tegra_dc_enable_vblank(struct tegra_dc *dc);
-extern void tegra_dc_disable_vblank(struct tegra_dc *dc);
-extern void tegra_dc_cancel_page_flip(struct drm_crtc *crtc,
-				      struct drm_file *file);
+void tegra_dc_enable_vblank(struct tegra_dc *dc);
+void tegra_dc_disable_vblank(struct tegra_dc *dc);
+void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
 
 struct tegra_output_ops {
 	int (*enable)(struct tegra_output *output);
 	int (*disable)(struct tegra_output *output);
 	int (*setup_clock)(struct tegra_output *output, struct clk *clk,
-			   unsigned long pclk);
+			   unsigned long pclk, unsigned int *div);
 	int (*check_mode)(struct tegra_output *output,
 			  struct drm_display_mode *mode,
 			  enum drm_mode_status *status);
@@ -233,10 +230,11 @@
 }
 
 static inline int tegra_output_setup_clock(struct tegra_output *output,
-					   struct clk *clk, unsigned long pclk)
+					   struct clk *clk, unsigned long pclk,
+					   unsigned int *div)
 {
 	if (output && output->ops && output->ops->setup_clock)
-		return output->ops->setup_clock(output, clk, pclk);
+		return output->ops->setup_clock(output, clk, pclk, div);
 
 	return output ? -ENOSYS : -EINVAL;
 }
@@ -251,27 +249,21 @@
 	return output ? -ENOSYS : -EINVAL;
 }
 
-/* from bus.c */
-int drm_host1x_init(struct drm_driver *driver, struct host1x_device *device);
-void drm_host1x_exit(struct drm_driver *driver, struct host1x_device *device);
-
 /* from rgb.c */
-extern int tegra_dc_rgb_probe(struct tegra_dc *dc);
-extern int tegra_dc_rgb_remove(struct tegra_dc *dc);
-extern int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
-extern int tegra_dc_rgb_exit(struct tegra_dc *dc);
+int tegra_dc_rgb_probe(struct tegra_dc *dc);
+int tegra_dc_rgb_remove(struct tegra_dc *dc);
+int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
+int tegra_dc_rgb_exit(struct tegra_dc *dc);
 
 /* from output.c */
-extern int tegra_output_probe(struct tegra_output *output);
-extern int tegra_output_remove(struct tegra_output *output);
-extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
-extern int tegra_output_exit(struct tegra_output *output);
+int tegra_output_probe(struct tegra_output *output);
+int tegra_output_remove(struct tegra_output *output);
+int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
+int tegra_output_exit(struct tegra_output *output);
 
 /* from dpaux.c */
-
 struct tegra_dpaux;
 struct drm_dp_link;
-struct drm_dp_aux;
 
 struct tegra_dpaux *tegra_dpaux_find_by_of_node(struct device_node *np);
 enum drm_connector_status tegra_dpaux_detect(struct tegra_dpaux *dpaux);
@@ -288,10 +280,10 @@
 				    unsigned int index);
 bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer);
 bool tegra_fb_is_tiled(struct drm_framebuffer *framebuffer);
-extern int tegra_drm_fb_init(struct drm_device *drm);
-extern void tegra_drm_fb_exit(struct drm_device *drm);
+int tegra_drm_fb_init(struct drm_device *drm);
+void tegra_drm_fb_exit(struct drm_device *drm);
 #ifdef CONFIG_DRM_TEGRA_FBDEV
-extern void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
+void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 #endif
 
 extern struct platform_driver tegra_dc_driver;

diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 0e599f0..bd56f2a 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c

@@ -14,6 +14,8 @@
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
+#include <linux/regulator/consumer.h>
+
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
@@ -43,11 +45,15 @@
 	struct drm_minor *minor;
 	struct dentry *debugfs;
 
+	unsigned long flags;
 	enum mipi_dsi_pixel_format format;
 	unsigned int lanes;
 
 	struct tegra_mipi_device *mipi;
 	struct mipi_dsi_host host;
+
+	struct regulator *vdd;
+	bool enabled;
 };
 
 static inline struct tegra_dsi *
@@ -244,8 +250,10 @@
 #define PKT_LP		(1 << 30)
 #define NUM_PKT_SEQ	12
 
-/* non-burst mode with sync-end */
-static const u32 pkt_seq_vnb_syne[NUM_PKT_SEQ] = {
+/*
+ * non-burst mode with sync pulses
+ */
+static const u32 pkt_seq_video_non_burst_sync_pulses[NUM_PKT_SEQ] = {
 	[ 0] = PKT_ID0(MIPI_DSI_V_SYNC_START) | PKT_LEN0(0) |
 	       PKT_ID1(MIPI_DSI_BLANKING_PACKET) | PKT_LEN1(1) |
 	       PKT_ID2(MIPI_DSI_H_SYNC_END) | PKT_LEN2(0) |
@@ -280,6 +288,36 @@
 	       PKT_ID2(MIPI_DSI_BLANKING_PACKET) | PKT_LEN2(4),
 };
 
+/*
+ * non-burst mode with sync events
+ */
+static const u32 pkt_seq_video_non_burst_sync_events[NUM_PKT_SEQ] = {
+	[ 0] = PKT_ID0(MIPI_DSI_V_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_END_OF_TRANSMISSION) | PKT_LEN1(7) |
+	       PKT_LP,
+	[ 1] = 0,
+	[ 2] = PKT_ID0(MIPI_DSI_H_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_END_OF_TRANSMISSION) | PKT_LEN1(7) |
+	       PKT_LP,
+	[ 3] = 0,
+	[ 4] = PKT_ID0(MIPI_DSI_H_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_END_OF_TRANSMISSION) | PKT_LEN1(7) |
+	       PKT_LP,
+	[ 5] = 0,
+	[ 6] = PKT_ID0(MIPI_DSI_H_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_BLANKING_PACKET) | PKT_LEN1(2) |
+	       PKT_ID2(MIPI_DSI_PACKED_PIXEL_STREAM_24) | PKT_LEN2(3),
+	[ 7] = PKT_ID0(MIPI_DSI_BLANKING_PACKET) | PKT_LEN0(4),
+	[ 8] = PKT_ID0(MIPI_DSI_H_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_END_OF_TRANSMISSION) | PKT_LEN1(7) |
+	       PKT_LP,
+	[ 9] = 0,
+	[10] = PKT_ID0(MIPI_DSI_H_SYNC_START) | PKT_LEN0(0) |
+	       PKT_ID1(MIPI_DSI_BLANKING_PACKET) | PKT_LEN1(2) |
+	       PKT_ID2(MIPI_DSI_PACKED_PIXEL_STREAM_24) | PKT_LEN2(3),
+	[11] = PKT_ID0(MIPI_DSI_BLANKING_PACKET) | PKT_LEN0(4),
+};
+
 static int tegra_dsi_set_phy_timing(struct tegra_dsi *dsi)
 {
 	struct mipi_dphy_timing timing;
@@ -361,28 +399,70 @@
 	return 0;
 }
 
+static int tegra_dsi_get_format(enum mipi_dsi_pixel_format format,
+				enum tegra_dsi_format *fmt)
+{
+	switch (format) {
+	case MIPI_DSI_FMT_RGB888:
+		*fmt = TEGRA_DSI_FORMAT_24P;
+		break;
+
+	case MIPI_DSI_FMT_RGB666:
+		*fmt = TEGRA_DSI_FORMAT_18NP;
+		break;
+
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		*fmt = TEGRA_DSI_FORMAT_18P;
+		break;
+
+	case MIPI_DSI_FMT_RGB565:
+		*fmt = TEGRA_DSI_FORMAT_16P;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int tegra_output_dsi_enable(struct tegra_output *output)
 {
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct drm_display_mode *mode = &dc->base.mode;
 	unsigned int hact, hsw, hbp, hfp, i, mul, div;
 	struct tegra_dsi *dsi = to_dsi(output);
-	/* FIXME: don't hardcode this */
-	const u32 *pkt_seq = pkt_seq_vnb_syne;
+	enum tegra_dsi_format format;
 	unsigned long value;
+	const u32 *pkt_seq;
 	int err;
 
+	if (dsi->enabled)
+		return 0;
+
+	if (dsi->flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
+		DRM_DEBUG_KMS("Non-burst video mode with sync pulses\n");
+		pkt_seq = pkt_seq_video_non_burst_sync_pulses;
+	} else {
+		DRM_DEBUG_KMS("Non-burst video mode with sync events\n");
+		pkt_seq = pkt_seq_video_non_burst_sync_events;
+	}
+
 	err = tegra_dsi_get_muldiv(dsi->format, &mul, &div);
 	if (err < 0)
 		return err;
 
+	err = tegra_dsi_get_format(dsi->format, &format);
+	if (err < 0)
+		return err;
+
 	err = clk_enable(dsi->clk);
 	if (err < 0)
 		return err;
 
 	reset_control_deassert(dsi->rst);
 
-	value = DSI_CONTROL_CHANNEL(0) | DSI_CONTROL_FORMAT(dsi->format) |
+	value = DSI_CONTROL_CHANNEL(0) | DSI_CONTROL_FORMAT(format) |
 		DSI_CONTROL_LANES(dsi->lanes - 1) |
 		DSI_CONTROL_SOURCE(dc->pipe);
 	tegra_dsi_writel(dsi, value, DSI_CONTROL);
@@ -454,6 +534,8 @@
 	value |= DSI_POWER_CONTROL_ENABLE;
 	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
 
+	dsi->enabled = true;
+
 	return 0;
 }
 
@@ -463,9 +545,12 @@
 	struct tegra_dsi *dsi = to_dsi(output);
 	unsigned long value;
 
+	if (!dsi->enabled)
+		return 0;
+
 	/* disable DSI controller */
 	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
-	value &= DSI_POWER_CONTROL_ENABLE;
+	value &= ~DSI_POWER_CONTROL_ENABLE;
 	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
 
 	/*
@@ -492,30 +577,44 @@
 
 	clk_disable(dsi->clk);
 
+	dsi->enabled = false;
+
 	return 0;
 }
 
 static int tegra_output_dsi_setup_clock(struct tegra_output *output,
-					struct clk *clk, unsigned long pclk)
+					struct clk *clk, unsigned long pclk,
+					unsigned int *divp)
 {
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct drm_display_mode *mode = &dc->base.mode;
 	unsigned int timeout, mul, div, vrefresh;
 	struct tegra_dsi *dsi = to_dsi(output);
 	unsigned long bclk, plld, value;
-	struct clk *base;
 	int err;
 
 	err = tegra_dsi_get_muldiv(dsi->format, &mul, &div);
 	if (err < 0)
 		return err;
 
+	DRM_DEBUG_KMS("mul: %u, div: %u, lanes: %u\n", mul, div, dsi->lanes);
 	vrefresh = drm_mode_vrefresh(mode);
+	DRM_DEBUG_KMS("vrefresh: %u\n", vrefresh);
 
-	pclk = mode->htotal * mode->vtotal * vrefresh;
+	/* compute byte clock */
 	bclk = (pclk * mul) / (div * dsi->lanes);
-	plld = DIV_ROUND_UP(bclk * 8, 1000000);
-	pclk = (plld * 1000000) / 2;
+
+	/*
+	 * Compute bit clock and round up to the next MHz.
+	 */
+	plld = DIV_ROUND_UP(bclk * 8, 1000000) * 1000000;
+
+	/*
+	 * We divide the frequency by two here, but we make up for that by
+	 * setting the shift clock divider (further below) to half of the
+	 * correct value.
+	 */
+	plld /= 2;
 
 	err = clk_set_parent(clk, dsi->clk_parent);
 	if (err < 0) {
@@ -523,20 +622,26 @@
 		return err;
 	}
 
-	base = clk_get_parent(dsi->clk_parent);
-
-	/*
-	 * This assumes that the parent clock is pll_d_out0 or pll_d2_out
-	 * respectively, each of which divides the base pll_d by 2.
-	 */
-	err = clk_set_rate(base, pclk * 2);
+	err = clk_set_rate(dsi->clk_parent, plld);
 	if (err < 0) {
 		dev_err(dsi->dev, "failed to set base clock rate to %lu Hz\n",
-			pclk * 2);
+			plld);
 		return err;
 	}
 
 	/*
+	 * Derive pixel clock from bit clock using the shift clock divider.
+	 * Note that this is only half of what we would expect, but we need
+	 * that to make up for the fact that we divided the bit clock by a
+	 * factor of two above.
+	 *
+	 * It's not clear exactly why this is necessary, but the display is
+	 * not working properly otherwise. Perhaps the PLLs cannot generate
+	 * frequencies sufficiently high.
+	 */
+	*divp = ((8 * mul) / (div * dsi->lanes)) - 2;
+
+	/*
 	 * XXX: Move the below somewhere else so that we don't need to have
 	 * access to the vrefresh in this function?
 	 */
@@ -610,61 +715,32 @@
 
 static int tegra_dsi_init(struct host1x_client *client)
 {
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
-	unsigned long value, i;
 	int err;
 
 	dsi->output.type = TEGRA_OUTPUT_DSI;
 	dsi->output.dev = client->dev;
 	dsi->output.ops = &dsi_ops;
 
-	err = tegra_output_init(tegra->drm, &dsi->output);
+	err = tegra_output_init(drm, &dsi->output);
 	if (err < 0) {
 		dev_err(client->dev, "output setup failed: %d\n", err);
 		return err;
 	}
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dsi_debugfs_init(dsi, tegra->drm->primary);
+		err = tegra_dsi_debugfs_init(dsi, drm->primary);
 		if (err < 0)
 			dev_err(dsi->dev, "debugfs setup failed: %d\n", err);
 	}
 
-	/*
-	 * enable high-speed mode, checksum generation, ECC generation and
-	 * disable raw mode
-	 */
-	value = tegra_dsi_readl(dsi, DSI_HOST_CONTROL);
-	value |= DSI_HOST_CONTROL_ECC | DSI_HOST_CONTROL_CS |
-		 DSI_HOST_CONTROL_HS;
-	value &= ~DSI_HOST_CONTROL_RAW;
-	tegra_dsi_writel(dsi, value, DSI_HOST_CONTROL);
-
-	tegra_dsi_writel(dsi, 0, DSI_SOL_DELAY);
-	tegra_dsi_writel(dsi, 0, DSI_MAX_THRESHOLD);
-
-	tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_CONTROL);
-
-	for (i = 0; i < 8; i++) {
-		tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_0 + i);
-		tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_8 + i);
-	}
-
-	for (i = 0; i < 12; i++)
-		tegra_dsi_writel(dsi, 0, DSI_PKT_SEQ_0_LO + i);
-
-	tegra_dsi_writel(dsi, 0, DSI_DCS_CMDS);
-
 	err = tegra_dsi_pad_calibrate(dsi);
 	if (err < 0) {
 		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
 		return err;
 	}
 
-	tegra_dsi_writel(dsi, DSI_POWER_CONTROL_ENABLE, DSI_POWER_CONTROL);
-	usleep_range(300, 1000);
-
 	return 0;
 }
 
@@ -715,66 +791,13 @@
 	return 0;
 }
 
-static void tegra_dsi_initialize(struct tegra_dsi *dsi)
-{
-	unsigned int i;
-
-	tegra_dsi_writel(dsi, 0, DSI_POWER_CONTROL);
-
-	tegra_dsi_writel(dsi, 0, DSI_INT_ENABLE);
-	tegra_dsi_writel(dsi, 0, DSI_INT_STATUS);
-	tegra_dsi_writel(dsi, 0, DSI_INT_MASK);
-
-	tegra_dsi_writel(dsi, 0, DSI_HOST_CONTROL);
-	tegra_dsi_writel(dsi, 0, DSI_CONTROL);
-
-	tegra_dsi_writel(dsi, 0, DSI_SOL_DELAY);
-	tegra_dsi_writel(dsi, 0, DSI_MAX_THRESHOLD);
-
-	tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_CONTROL);
-
-	for (i = 0; i < 8; i++) {
-		tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_0 + i);
-		tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_8 + i);
-	}
-
-	for (i = 0; i < 12; i++)
-		tegra_dsi_writel(dsi, 0, DSI_PKT_SEQ_0_LO + i);
-
-	tegra_dsi_writel(dsi, 0, DSI_DCS_CMDS);
-
-	for (i = 0; i < 4; i++)
-		tegra_dsi_writel(dsi, 0, DSI_PKT_LEN_0_1 + i);
-
-	tegra_dsi_writel(dsi, 0x00000000, DSI_PHY_TIMING_0);
-	tegra_dsi_writel(dsi, 0x00000000, DSI_PHY_TIMING_1);
-	tegra_dsi_writel(dsi, 0x000000ff, DSI_PHY_TIMING_2);
-	tegra_dsi_writel(dsi, 0x00000000, DSI_BTA_TIMING);
-
-	tegra_dsi_writel(dsi, 0, DSI_TIMEOUT_0);
-	tegra_dsi_writel(dsi, 0, DSI_TIMEOUT_1);
-	tegra_dsi_writel(dsi, 0, DSI_TO_TALLY);
-
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_CD);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CD_STATUS);
-	tegra_dsi_writel(dsi, 0, DSI_VIDEO_MODE_CONTROL);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_2);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_3);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_4);
-
-	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_CONTROL);
-	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_START);
-	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_SIZE);
-}
-
 static int tegra_dsi_host_attach(struct mipi_dsi_host *host,
 				 struct mipi_dsi_device *device)
 {
 	struct tegra_dsi *dsi = host_to_tegra(host);
 	struct tegra_output *output = &dsi->output;
 
+	dsi->flags = device->mode_flags;
 	dsi->format = device->format;
 	dsi->lanes = device->lanes;
 
@@ -829,6 +852,7 @@
 	 * attaches to the DSI host, the parameters will be taken from
 	 * the attached device.
 	 */
+	dsi->flags = MIPI_DSI_MODE_VIDEO;
 	dsi->format = MIPI_DSI_FMT_RGB888;
 	dsi->lanes = 4;
 
@@ -872,6 +896,18 @@
 		return err;
 	}
 
+	dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi");
+	if (IS_ERR(dsi->vdd)) {
+		dev_err(&pdev->dev, "cannot get VDD supply\n");
+		return PTR_ERR(dsi->vdd);
+	}
+
+	err = regulator_enable(dsi->vdd);
+	if (err < 0) {
+		dev_err(&pdev->dev, "cannot enable VDD supply\n");
+		return err;
+	}
+
 	err = tegra_dsi_setup_clocks(dsi);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot setup clocks\n");
@@ -883,8 +919,6 @@
 	if (IS_ERR(dsi->regs))
 		return PTR_ERR(dsi->regs);
 
-	tegra_dsi_initialize(dsi);
-
 	dsi->mipi = tegra_mipi_request(&pdev->dev);
 	if (IS_ERR(dsi->mipi))
 		return PTR_ERR(dsi->mipi);
@@ -929,9 +963,11 @@
 	mipi_dsi_host_unregister(&dsi->host);
 	tegra_mipi_free(dsi->mipi);
 
+	regulator_disable(dsi->vdd);
 	clk_disable_unprepare(dsi->clk_parent);
 	clk_disable_unprepare(dsi->clk_lp);
 	clk_disable_unprepare(dsi->clk);
+	reset_control_assert(dsi->rst);
 
 	err = tegra_output_remove(&dsi->output);
 	if (err < 0) {

diff --git a/drivers/gpu/drm/tegra/dsi.h b/drivers/gpu/drm/tegra/dsi.h
index 1db5cc2..5ce610d 100644
--- a/drivers/gpu/drm/tegra/dsi.h
+++ b/drivers/gpu/drm/tegra/dsi.h

@@ -117,4 +117,14 @@
 #define DSI_INIT_SEQ_DATA_14		0x5e
 #define DSI_INIT_SEQ_DATA_15		0x5f
 
+/*
+ * pixel format as used in the DSI_CONTROL_FORMAT field
+ */
+enum tegra_dsi_format {
+	TEGRA_DSI_FORMAT_16P,
+	TEGRA_DSI_FORMAT_18NP,
+	TEGRA_DSI_FORMAT_18P,
+	TEGRA_DSI_FORMAT_24P,
+};
+
 #endif

diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index f7fca09..9798a70 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c

@@ -346,11 +346,8 @@
 
 void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
 {
-	if (fbdev) {
-		drm_modeset_lock_all(fbdev->base.dev);
-		drm_fb_helper_restore_fbdev_mode(&fbdev->base);
-		drm_modeset_unlock_all(fbdev->base.dev);
-	}
+	if (fbdev)
+		drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->base);
 }
 
 static void tegra_fb_output_poll_changed(struct drm_device *drm)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index bcf9895..aa85b7b 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c

@@ -169,7 +169,8 @@
 	return ERR_PTR(ret);
 }
 
-struct tegra_bo *tegra_bo_import(struct drm_device *drm, struct dma_buf *buf)
+static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
+					struct dma_buf *buf)
 {
 	struct dma_buf_attachment *attach;
 	struct tegra_bo *bo;

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 2c7ca74..7c53941 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c

@@ -28,7 +28,7 @@
 static int gr2d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr2d *gr2d = to_gr2d(drm);
 
@@ -42,17 +42,17 @@
 		return -ENOMEM;
 	}
 
-	return tegra_drm_register_client(tegra, drm);
+	return tegra_drm_register_client(dev->dev_private, drm);
 }
 
 static int gr2d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
-	err = tegra_drm_unregister_client(tegra, drm);
+	err = tegra_drm_unregister_client(dev->dev_private, drm);
 	if (err < 0)
 		return err;
 

diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 0cbb24b..30f5ba9 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c

@@ -37,7 +37,7 @@
 static int gr3d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr3d *gr3d = to_gr3d(drm);
 
@@ -51,17 +51,17 @@
 		return -ENOMEM;
 	}
 
-	return tegra_drm_register_client(tegra, drm);
+	return tegra_drm_register_client(dev->dev_private, drm);
 }
 
 static int gr3d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
-	err = tegra_drm_unregister_client(tegra, drm);
+	err = tegra_drm_unregister_client(dev->dev_private, drm);
 	if (err < 0)
 		return err;
 

diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 6928015..ba067bb 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c

@@ -42,8 +42,9 @@
 	struct device *dev;
 	bool enabled;
 
-	struct regulator *vdd;
+	struct regulator *hdmi;
 	struct regulator *pll;
+	struct regulator *vdd;
 
 	void __iomem *regs;
 	unsigned int irq;
@@ -317,6 +318,85 @@
 	},
 };
 
+static const struct tmds_config tegra124_tmds_config[] = {
+	{ /* 480p/576p / 25.2MHz/27MHz modes */
+		.pclk = 27000000,
+		.pll0 = SOR_PLL_ICHPMP(1) | SOR_PLL_BG_V17_S(3) |
+			SOR_PLL_VCOCAP(0) | SOR_PLL_RESISTORSEL,
+		.pll1 = SOR_PLL_LOADADJ(3) | SOR_PLL_TMDS_TERMADJ(0),
+		.pe_current = PE_CURRENT0(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT1(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT2(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT3(PE_CURRENT_0_mA_T114),
+		.drive_current =
+			DRIVE_CURRENT_LANE0_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE1_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE2_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE3_T114(DRIVE_CURRENT_10_400_mA_T114),
+		.peak_current = PEAK_CURRENT_LANE0(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE1(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE2(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE3(PEAK_CURRENT_0_000_mA),
+	}, { /* 720p / 74.25MHz modes */
+		.pclk = 74250000,
+		.pll0 = SOR_PLL_ICHPMP(1) | SOR_PLL_BG_V17_S(3) |
+			SOR_PLL_VCOCAP(1) | SOR_PLL_RESISTORSEL,
+		.pll1 = SOR_PLL_PE_EN | SOR_PLL_LOADADJ(3) |
+			SOR_PLL_TMDS_TERMADJ(0),
+		.pe_current = PE_CURRENT0(PE_CURRENT_15_mA_T114) |
+			PE_CURRENT1(PE_CURRENT_15_mA_T114) |
+			PE_CURRENT2(PE_CURRENT_15_mA_T114) |
+			PE_CURRENT3(PE_CURRENT_15_mA_T114),
+		.drive_current =
+			DRIVE_CURRENT_LANE0_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE1_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE2_T114(DRIVE_CURRENT_10_400_mA_T114) |
+			DRIVE_CURRENT_LANE3_T114(DRIVE_CURRENT_10_400_mA_T114),
+		.peak_current = PEAK_CURRENT_LANE0(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE1(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE2(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE3(PEAK_CURRENT_0_000_mA),
+	}, { /* 1080p / 148.5MHz modes */
+		.pclk = 148500000,
+		.pll0 = SOR_PLL_ICHPMP(1) | SOR_PLL_BG_V17_S(3) |
+			SOR_PLL_VCOCAP(3) | SOR_PLL_RESISTORSEL,
+		.pll1 = SOR_PLL_PE_EN | SOR_PLL_LOADADJ(3) |
+			SOR_PLL_TMDS_TERMADJ(0),
+		.pe_current = PE_CURRENT0(PE_CURRENT_10_mA_T114) |
+			PE_CURRENT1(PE_CURRENT_10_mA_T114) |
+			PE_CURRENT2(PE_CURRENT_10_mA_T114) |
+			PE_CURRENT3(PE_CURRENT_10_mA_T114),
+		.drive_current =
+			DRIVE_CURRENT_LANE0_T114(DRIVE_CURRENT_12_400_mA_T114) |
+			DRIVE_CURRENT_LANE1_T114(DRIVE_CURRENT_12_400_mA_T114) |
+			DRIVE_CURRENT_LANE2_T114(DRIVE_CURRENT_12_400_mA_T114) |
+			DRIVE_CURRENT_LANE3_T114(DRIVE_CURRENT_12_400_mA_T114),
+		.peak_current = PEAK_CURRENT_LANE0(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE1(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE2(PEAK_CURRENT_0_000_mA) |
+			PEAK_CURRENT_LANE3(PEAK_CURRENT_0_000_mA),
+	}, { /* 225/297MHz modes */
+		.pclk = UINT_MAX,
+		.pll0 = SOR_PLL_ICHPMP(1) | SOR_PLL_BG_V17_S(3) |
+			SOR_PLL_VCOCAP(0xf) | SOR_PLL_RESISTORSEL,
+		.pll1 = SOR_PLL_LOADADJ(3) | SOR_PLL_TMDS_TERMADJ(7)
+			| SOR_PLL_TMDS_TERM_ENABLE,
+		.pe_current = PE_CURRENT0(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT1(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT2(PE_CURRENT_0_mA_T114) |
+			PE_CURRENT3(PE_CURRENT_0_mA_T114),
+		.drive_current =
+			DRIVE_CURRENT_LANE0_T114(DRIVE_CURRENT_25_200_mA_T114) |
+			DRIVE_CURRENT_LANE1_T114(DRIVE_CURRENT_25_200_mA_T114) |
+			DRIVE_CURRENT_LANE2_T114(DRIVE_CURRENT_25_200_mA_T114) |
+			DRIVE_CURRENT_LANE3_T114(DRIVE_CURRENT_19_200_mA_T114),
+		.peak_current = PEAK_CURRENT_LANE0(PEAK_CURRENT_3_000_mA) |
+			PEAK_CURRENT_LANE1(PEAK_CURRENT_3_000_mA) |
+			PEAK_CURRENT_LANE2(PEAK_CURRENT_3_000_mA) |
+			PEAK_CURRENT_LANE3(PEAK_CURRENT_0_800_mA),
+	},
+};
+
 static const struct tegra_hdmi_audio_config *
 tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pclk)
 {
@@ -716,13 +796,9 @@
 		return err;
 	}
 
-	/*
-	 * This assumes that the display controller will divide its parent
-	 * clock by 2 to generate the pixel clock.
-	 */
-	err = tegra_output_setup_clock(output, hdmi->clk, pclk * 2);
+	err = regulator_enable(hdmi->vdd);
 	if (err < 0) {
-		dev_err(hdmi->dev, "failed to setup clock: %d\n", err);
+		dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err);
 		return err;
 	}
 
@@ -730,7 +806,7 @@
 	if (err < 0)
 		return err;
 
-	err = clk_enable(hdmi->clk);
+	err = clk_prepare_enable(hdmi->clk);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to enable clock: %d\n", err);
 		return err;
@@ -740,6 +816,17 @@
 	usleep_range(1000, 2000);
 	reset_control_deassert(hdmi->rst);
 
+	/* power up sequence */
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_PLL0);
+	value &= ~SOR_PLL_PDBG;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_PLL0);
+
+	usleep_range(10, 20);
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_PLL0);
+	value &= ~SOR_PLL_PWR;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_PLL0);
+
 	tegra_dc_writel(dc, VSYNC_H_POSITION(1),
 			DC_DISP_DISP_TIMING_OPTIONS);
 	tegra_dc_writel(dc, DITHER_CONTROL_DISABLE | BASE_COLOR_SIZE888,
@@ -838,9 +925,13 @@
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(0));
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(8));
 
-	value = 0x1c800;
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_CSTM);
 	value &= ~SOR_CSTM_ROTCLK(~0);
 	value |= SOR_CSTM_ROTCLK(2);
+	value |= SOR_CSTM_PLLDIV;
+	value &= ~SOR_CSTM_LVDS_ENABLE;
+	value &= ~SOR_CSTM_MODE_MASK;
+	value |= SOR_CSTM_MODE_TMDS;
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_CSTM);
 
 	/* start SOR */
@@ -930,10 +1021,18 @@
 	 * sure it's only executed when the output is attached to one.
 	 */
 	if (dc) {
+		/*
+		 * XXX: We can't do this here because it causes HDMI to go
+		 * into an erroneous state with the result that HDMI won't
+		 * properly work once disabled. See also a similar symptom
+		 * for the SOR output.
+		 */
+		/*
 		value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL);
 		value &= ~(PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
 			   PW4_ENABLE | PM0_ENABLE | PM1_ENABLE);
 		tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
+		*/
 
 		value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND);
 		value &= ~DISP_CTRL_MODE_MASK;
@@ -947,8 +1046,9 @@
 		tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
 	}
 
+	clk_disable_unprepare(hdmi->clk);
 	reset_control_assert(hdmi->rst);
-	clk_disable(hdmi->clk);
+	regulator_disable(hdmi->vdd);
 	regulator_disable(hdmi->pll);
 
 	hdmi->enabled = false;
@@ -957,10 +1057,10 @@
 }
 
 static int tegra_output_hdmi_setup_clock(struct tegra_output *output,
-					 struct clk *clk, unsigned long pclk)
+					 struct clk *clk, unsigned long pclk,
+					 unsigned int *div)
 {
 	struct tegra_hdmi *hdmi = to_hdmi(output);
-	struct clk *base;
 	int err;
 
 	err = clk_set_parent(clk, hdmi->clk_parent);
@@ -969,17 +1069,12 @@
 		return err;
 	}
 
-	base = clk_get_parent(hdmi->clk_parent);
-
-	/*
-	 * This assumes that the parent clock is pll_d_out0 or pll_d2_out
-	 * respectively, each of which divides the base pll_d by 2.
-	 */
-	err = clk_set_rate(base, pclk * 2);
+	err = clk_set_rate(hdmi->clk_parent, pclk);
 	if (err < 0)
-		dev_err(output->dev,
-			"failed to set base clock rate to %lu Hz\n",
-			pclk * 2);
+		dev_err(output->dev, "failed to set clock rate to %lu Hz\n",
+			pclk);
+
+	*div = 0;
 
 	return 0;
 }
@@ -1017,7 +1112,7 @@
 	struct tegra_hdmi *hdmi = node->info_ent->data;
 	int err;
 
-	err = clk_enable(hdmi->clk);
+	err = clk_prepare_enable(hdmi->clk);
 	if (err)
 		return err;
 
@@ -1186,7 +1281,7 @@
 
 #undef DUMP_REG
 
-	clk_disable(hdmi->clk);
+	clk_disable_unprepare(hdmi->clk);
 
 	return 0;
 }
@@ -1252,33 +1347,33 @@
 
 static int tegra_hdmi_init(struct host1x_client *client)
 {
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
 	int err;
 
-	err = regulator_enable(hdmi->vdd);
-	if (err < 0) {
-		dev_err(client->dev, "failed to enable VDD regulator: %d\n",
-			err);
-		return err;
-	}
-
 	hdmi->output.type = TEGRA_OUTPUT_HDMI;
 	hdmi->output.dev = client->dev;
 	hdmi->output.ops = &hdmi_ops;
 
-	err = tegra_output_init(tegra->drm, &hdmi->output);
+	err = tegra_output_init(drm, &hdmi->output);
 	if (err < 0) {
 		dev_err(client->dev, "output setup failed: %d\n", err);
 		return err;
 	}
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_init(hdmi, tegra->drm->primary);
+		err = tegra_hdmi_debugfs_init(hdmi, drm->primary);
 		if (err < 0)
 			dev_err(client->dev, "debugfs setup failed: %d\n", err);
 	}
 
+	err = regulator_enable(hdmi->hdmi);
+	if (err < 0) {
+		dev_err(client->dev, "failed to enable HDMI regulator: %d\n",
+			err);
+		return err;
+	}
+
 	return 0;
 }
 
@@ -1287,6 +1382,8 @@
 	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
 	int err;
 
+	regulator_disable(hdmi->hdmi);
+
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
 		err = tegra_hdmi_debugfs_exit(hdmi);
 		if (err < 0)
@@ -1306,8 +1403,6 @@
 		return err;
 	}
 
-	regulator_disable(hdmi->vdd);
-
 	return 0;
 }
 
@@ -1340,7 +1435,16 @@
 	.has_sor_io_peak_current = true,
 };
 
+static const struct tegra_hdmi_config tegra124_hdmi_config = {
+	.tmds = tegra124_tmds_config,
+	.num_tmds = ARRAY_SIZE(tegra124_tmds_config),
+	.fuse_override_offset = HDMI_NV_PDISP_SOR_PAD_CTLS0,
+	.fuse_override_value = 1 << 31,
+	.has_sor_io_peak_current = true,
+};
+
 static const struct of_device_id tegra_hdmi_of_match[] = {
+	{ .compatible = "nvidia,tegra124-hdmi", .data = &tegra124_hdmi_config },
 	{ .compatible = "nvidia,tegra114-hdmi", .data = &tegra114_hdmi_config },
 	{ .compatible = "nvidia,tegra30-hdmi", .data = &tegra30_hdmi_config },
 	{ .compatible = "nvidia,tegra20-hdmi", .data = &tegra20_hdmi_config },
@@ -1381,28 +1485,20 @@
 		return PTR_ERR(hdmi->rst);
 	}
 
-	err = clk_prepare(hdmi->clk);
-	if (err < 0)
-		return err;
-
 	hdmi->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(hdmi->clk_parent))
 		return PTR_ERR(hdmi->clk_parent);
 
-	err = clk_prepare(hdmi->clk_parent);
-	if (err < 0)
-		return err;
-
 	err = clk_set_parent(hdmi->clk, hdmi->clk_parent);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to setup clocks: %d\n", err);
 		return err;
 	}
 
-	hdmi->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(hdmi->vdd)) {
-		dev_err(&pdev->dev, "failed to get VDD regulator\n");
-		return PTR_ERR(hdmi->vdd);
+	hdmi->hdmi = devm_regulator_get(&pdev->dev, "hdmi");
+	if (IS_ERR(hdmi->hdmi)) {
+		dev_err(&pdev->dev, "failed to get HDMI regulator\n");
+		return PTR_ERR(hdmi->hdmi);
 	}
 
 	hdmi->pll = devm_regulator_get(&pdev->dev, "pll");
@@ -1411,6 +1507,12 @@
 		return PTR_ERR(hdmi->pll);
 	}
 
+	hdmi->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	if (IS_ERR(hdmi->vdd)) {
+		dev_err(&pdev->dev, "failed to get VDD regulator\n");
+		return PTR_ERR(hdmi->vdd);
+	}
+
 	hdmi->output.dev = &pdev->dev;
 
 	err = tegra_output_probe(&hdmi->output);
@@ -1462,8 +1564,8 @@
 		return err;
 	}
 
-	clk_unprepare(hdmi->clk_parent);
-	clk_unprepare(hdmi->clk);
+	clk_disable_unprepare(hdmi->clk_parent);
+	clk_disable_unprepare(hdmi->clk);
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/drm/tegra/hdmi.h
index 0aebc48..919a19d 100644
--- a/drivers/gpu/drm/tegra/hdmi.h
+++ b/drivers/gpu/drm/tegra/hdmi.h

@@ -190,6 +190,11 @@
 
 #define HDMI_NV_PDISP_SOR_CSTM					0x5a
 #define SOR_CSTM_ROTCLK(x) (((x) & 0xf) << 24)
+#define SOR_CSTM_PLLDIV (1 << 21)
+#define SOR_CSTM_LVDS_ENABLE (1 << 16)
+#define SOR_CSTM_MODE_LVDS (0 << 12)
+#define SOR_CSTM_MODE_TMDS (1 << 12)
+#define SOR_CSTM_MODE_MASK (3 << 12)
 
 #define HDMI_NV_PDISP_SOR_LVDS					0x5b
 #define HDMI_NV_PDISP_SOR_CRCA					0x5c

diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index 0266fb4..d6af9be 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c

@@ -159,11 +159,38 @@
 }
 
 static int tegra_output_rgb_setup_clock(struct tegra_output *output,
-					struct clk *clk, unsigned long pclk)
+					struct clk *clk, unsigned long pclk,
+					unsigned int *div)
 {
 	struct tegra_rgb *rgb = to_rgb(output);
+	int err;
 
-	return clk_set_parent(clk, rgb->clk_parent);
+	err = clk_set_parent(clk, rgb->clk_parent);
+	if (err < 0) {
+		dev_err(output->dev, "failed to set parent: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * We may not want to change the frequency of the parent clock, since
+	 * it may be a parent for other peripherals. This is due to the fact
+	 * that on Tegra20 there's only a single clock dedicated to display
+	 * (pll_d_out0), whereas later generations have a second one that can
+	 * be used to independently drive a second output (pll_d2_out0).
+	 *
+	 * As a way to support multiple outputs on Tegra20 as well, pll_p is
+	 * typically used as the parent clock for the display controllers.
+	 * But this comes at a cost: pll_p is the parent of several other
+	 * peripherals, so its frequency shouldn't change out of the blue.
+	 *
+	 * The best we can do at this point is to use the shift clock divider
+	 * and hope that the desired frequency can be matched (or at least
+	 * matched sufficiently close that the panel will still work).
+	 */
+
+	*div = ((clk_get_rate(clk) * 2) / pclk) - 2;
+
+	return 0;
 }
 
 static int tegra_output_rgb_check_mode(struct tegra_output *output,

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 49ef572..27c979b 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c

@@ -7,6 +7,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
@@ -33,7 +34,23 @@
 
 	struct tegra_dpaux *dpaux;
 
+	struct mutex lock;
 	bool enabled;
+
+	struct dentry *debugfs;
+};
+
+struct tegra_sor_config {
+	u32 bits_per_pixel;
+
+	u32 active_polarity;
+	u32 active_count;
+	u32 tu_size;
+	u32 active_frac;
+	u32 watermark;
+
+	u32 hblank_symbols;
+	u32 vblank_symbols;
 };
 
 static inline struct tegra_sor *
@@ -289,34 +306,232 @@
 	return -ETIMEDOUT;
 }
 
+struct tegra_sor_params {
+	/* number of link clocks per line */
+	unsigned int num_clocks;
+	/* ratio between input and output */
+	u64 ratio;
+	/* precision factor */
+	u64 precision;
+
+	unsigned int active_polarity;
+	unsigned int active_count;
+	unsigned int active_frac;
+	unsigned int tu_size;
+	unsigned int error;
+};
+
+static int tegra_sor_compute_params(struct tegra_sor *sor,
+				    struct tegra_sor_params *params,
+				    unsigned int tu_size)
+{
+	u64 active_sym, active_count, frac, approx;
+	u32 active_polarity, active_frac = 0;
+	const u64 f = params->precision;
+	s64 error;
+
+	active_sym = params->ratio * tu_size;
+	active_count = div_u64(active_sym, f) * f;
+	frac = active_sym - active_count;
+
+	/* fraction < 0.5 */
+	if (frac >= (f / 2)) {
+		active_polarity = 1;
+		frac = f - frac;
+	} else {
+		active_polarity = 0;
+	}
+
+	if (frac != 0) {
+		frac = div_u64(f * f,  frac); /* 1/fraction */
+		if (frac <= (15 * f)) {
+			active_frac = div_u64(frac, f);
+
+			/* round up */
+			if (active_polarity)
+				active_frac++;
+		} else {
+			active_frac = active_polarity ? 1 : 15;
+		}
+	}
+
+	if (active_frac == 1)
+		active_polarity = 0;
+
+	if (active_polarity == 1) {
+		if (active_frac) {
+			approx = active_count + (active_frac * (f - 1)) * f;
+			approx = div_u64(approx, active_frac * f);
+		} else {
+			approx = active_count + f;
+		}
+	} else {
+		if (active_frac)
+			approx = active_count + div_u64(f, active_frac);
+		else
+			approx = active_count;
+	}
+
+	error = div_s64(active_sym - approx, tu_size);
+	error *= params->num_clocks;
+
+	if (error <= 0 && abs64(error) < params->error) {
+		params->active_count = div_u64(active_count, f);
+		params->active_polarity = active_polarity;
+		params->active_frac = active_frac;
+		params->error = abs64(error);
+		params->tu_size = tu_size;
+
+		if (error == 0)
+			return true;
+	}
+
+	return false;
+}
+
+static int tegra_sor_calc_config(struct tegra_sor *sor,
+				 struct drm_display_mode *mode,
+				 struct tegra_sor_config *config,
+				 struct drm_dp_link *link)
+{
+	const u64 f = 100000, link_rate = link->rate * 1000;
+	const u64 pclk = mode->clock * 1000;
+	u64 input, output, watermark, num;
+	struct tegra_sor_params params;
+	u32 num_syms_per_line;
+	unsigned int i;
+
+	if (!link_rate || !link->num_lanes || !pclk || !config->bits_per_pixel)
+		return -EINVAL;
+
+	output = link_rate * 8 * link->num_lanes;
+	input = pclk * config->bits_per_pixel;
+
+	if (input >= output)
+		return -ERANGE;
+
+	memset(&params, 0, sizeof(params));
+	params.ratio = div64_u64(input * f, output);
+	params.num_clocks = div_u64(link_rate * mode->hdisplay, pclk);
+	params.precision = f;
+	params.error = 64 * f;
+	params.tu_size = 64;
+
+	for (i = params.tu_size; i >= 32; i--)
+		if (tegra_sor_compute_params(sor, &params, i))
+			break;
+
+	if (params.active_frac == 0) {
+		config->active_polarity = 0;
+		config->active_count = params.active_count;
+
+		if (!params.active_polarity)
+			config->active_count--;
+
+		config->tu_size = params.tu_size;
+		config->active_frac = 1;
+	} else {
+		config->active_polarity = params.active_polarity;
+		config->active_count = params.active_count;
+		config->active_frac = params.active_frac;
+		config->tu_size = params.tu_size;
+	}
+
+	dev_dbg(sor->dev,
+		"polarity: %d active count: %d tu size: %d active frac: %d\n",
+		config->active_polarity, config->active_count,
+		config->tu_size, config->active_frac);
+
+	watermark = params.ratio * config->tu_size * (f - params.ratio);
+	watermark = div_u64(watermark, f);
+
+	watermark = div_u64(watermark + params.error, f);
+	config->watermark = watermark + (config->bits_per_pixel / 8) + 2;
+	num_syms_per_line = (mode->hdisplay * config->bits_per_pixel) *
+			    (link->num_lanes * 8);
+
+	if (config->watermark > 30) {
+		config->watermark = 30;
+		dev_err(sor->dev,
+			"unable to compute TU size, forcing watermark to %u\n",
+			config->watermark);
+	} else if (config->watermark > num_syms_per_line) {
+		config->watermark = num_syms_per_line;
+		dev_err(sor->dev, "watermark too high, forcing to %u\n",
+			config->watermark);
+	}
+
+	/* compute the number of symbols per horizontal blanking interval */
+	num = ((mode->htotal - mode->hdisplay) - 7) * link_rate;
+	config->hblank_symbols = div_u64(num, pclk);
+
+	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+		config->hblank_symbols -= 3;
+
+	config->hblank_symbols -= 12 / link->num_lanes;
+
+	/* compute the number of symbols per vertical blanking interval */
+	num = (mode->hdisplay - 25) * link_rate;
+	config->vblank_symbols = div_u64(num, pclk);
+	config->vblank_symbols -= 36 / link->num_lanes + 4;
+
+	dev_dbg(sor->dev, "blank symbols: H:%u V:%u\n", config->hblank_symbols,
+		config->vblank_symbols);
+
+	return 0;
+}
+
 static int tegra_output_sor_enable(struct tegra_output *output)
 {
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct drm_display_mode *mode = &dc->base.mode;
 	unsigned int vbe, vse, hbe, hse, vbs, hbs, i;
 	struct tegra_sor *sor = to_sor(output);
+	struct tegra_sor_config config;
+	struct drm_dp_link link;
+	struct drm_dp_aux *aux;
 	unsigned long value;
-	int err;
+	int err = 0;
+
+	mutex_lock(&sor->lock);
 
 	if (sor->enabled)
-		return 0;
+		goto unlock;
 
 	err = clk_prepare_enable(sor->clk);
 	if (err < 0)
-		return err;
+		goto unlock;
 
 	reset_control_deassert(sor->rst);
 
+	/* FIXME: properly convert to struct drm_dp_aux */
+	aux = (struct drm_dp_aux *)sor->dpaux;
+
 	if (sor->dpaux) {
 		err = tegra_dpaux_enable(sor->dpaux);
 		if (err < 0)
 			dev_err(sor->dev, "failed to enable DP: %d\n", err);
+
+		err = drm_dp_link_probe(aux, &link);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to probe eDP link: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	err = clk_set_parent(sor->clk, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
+	memset(&config, 0, sizeof(config));
+	config.bits_per_pixel = 24; /* XXX: don't hardcode? */
+
+	err = tegra_sor_calc_config(sor, mode, &config, &link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to compute link configuration: %d\n",
+			err);
+
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
 	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
@@ -385,7 +600,7 @@
 	err = tegra_io_rail_power_on(TEGRA_IO_RAIL_LVDS);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to power on I/O rail: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	usleep_range(5, 100);
@@ -419,15 +634,29 @@
 	if (err < 0)
 		dev_err(sor->dev, "failed to set DP parent clock: %d\n", err);
 
-	/* power dplanes (XXX parameterize based on link?) */
+	/* power DP lanes */
 	value = tegra_sor_readl(sor, SOR_DP_PADCTL_0);
-	value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
-		 SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2;
+
+	if (link.num_lanes <= 2)
+		value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2);
+	else
+		value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2;
+
+	if (link.num_lanes <= 1)
+		value &= ~SOR_DP_PADCTL_PD_TXD_1;
+	else
+		value |= SOR_DP_PADCTL_PD_TXD_1;
+
+	if (link.num_lanes == 0)
+		value &= ~SOR_DP_PADCTL_PD_TXD_0;
+	else
+		value |= SOR_DP_PADCTL_PD_TXD_0;
+
 	tegra_sor_writel(sor, value, SOR_DP_PADCTL_0);
 
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL_0);
 	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(4);
+	value |= SOR_DP_LINKCTL_LANE_COUNT(link.num_lanes);
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL_0);
 
 	/* start lane sequencer */
@@ -443,10 +672,10 @@
 		usleep_range(250, 1000);
 	}
 
-	/* set link bandwidth (2.7 GHz, XXX: parameterize based on link?) */
+	/* set link bandwidth */
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G2_70;
+	value |= drm_dp_link_rate_to_bw_code(link.rate) << 2;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
 	/* set linkctl */
@@ -454,7 +683,7 @@
 	value |= SOR_DP_LINKCTL_ENABLE;
 
 	value &= ~SOR_DP_LINKCTL_TU_SIZE_MASK;
-	value |= SOR_DP_LINKCTL_TU_SIZE(59); /* XXX: don't hardcode? */
+	value |= SOR_DP_LINKCTL_TU_SIZE(config.tu_size);
 
 	value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL_0);
@@ -470,28 +699,31 @@
 
 	value = tegra_sor_readl(sor, SOR_DP_CONFIG_0);
 	value &= ~SOR_DP_CONFIG_WATERMARK_MASK;
-	value |= SOR_DP_CONFIG_WATERMARK(14); /* XXX: don't hardcode? */
+	value |= SOR_DP_CONFIG_WATERMARK(config.watermark);
 
 	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_COUNT_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(47); /* XXX: don't hardcode? */
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(config.active_count);
 
 	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_FRAC_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(9); /* XXX: don't hardcode? */
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(config.active_frac);
 
-	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY; /* XXX: don't hardcode? */
+	if (config.active_polarity)
+		value |= SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
+	else
+		value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
 
 	value |= SOR_DP_CONFIG_ACTIVE_SYM_ENABLE;
-	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE; /* XXX: don't hardcode? */
+	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE;
 	tegra_sor_writel(sor, value, SOR_DP_CONFIG_0);
 
 	value = tegra_sor_readl(sor, SOR_DP_AUDIO_HBLANK_SYMBOLS);
 	value &= ~SOR_DP_AUDIO_HBLANK_SYMBOLS_MASK;
-	value |= 137; /* XXX: don't hardcode? */
+	value |= config.hblank_symbols & 0xffff;
 	tegra_sor_writel(sor, value, SOR_DP_AUDIO_HBLANK_SYMBOLS);
 
 	value = tegra_sor_readl(sor, SOR_DP_AUDIO_VBLANK_SYMBOLS);
 	value &= ~SOR_DP_AUDIO_VBLANK_SYMBOLS_MASK;
-	value |= 2368; /* XXX: don't hardcode? */
+	value |= config.vblank_symbols & 0xffff;
 	tegra_sor_writel(sor, value, SOR_DP_AUDIO_VBLANK_SYMBOLS);
 
 	/* enable pad calibration logic */
@@ -500,30 +732,27 @@
 	tegra_sor_writel(sor, value, SOR_DP_PADCTL_0);
 
 	if (sor->dpaux) {
-		/* FIXME: properly convert to struct drm_dp_aux */
-		struct drm_dp_aux *aux = (struct drm_dp_aux *)sor->dpaux;
-		struct drm_dp_link link;
 		u8 rate, lanes;
 
 		err = drm_dp_link_probe(aux, &link);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to probe eDP link: %d\n",
 				err);
-			return err;
+			goto unlock;
 		}
 
 		err = drm_dp_link_power_up(aux, &link);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to power up eDP link: %d\n",
 				err);
-			return err;
+			goto unlock;
 		}
 
 		err = drm_dp_link_configure(aux, &link);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to configure eDP link: %d\n",
 				err);
-			return err;
+			goto unlock;
 		}
 
 		rate = drm_dp_link_rate_to_bw_code(link.rate);
@@ -558,7 +787,7 @@
 		if (err < 0) {
 			dev_err(sor->dev, "DP fast link training failed: %d\n",
 				err);
-			return err;
+			goto unlock;
 		}
 
 		dev_dbg(sor->dev, "fast link training succeeded\n");
@@ -567,7 +796,7 @@
 	err = tegra_sor_power_up(sor, 250);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	/* start display controller in continuous mode */
@@ -586,12 +815,26 @@
 	 * configure panel (24bpp, vsync-, hsync-, DP-A protocol, complete
 	 * raster, associate with display controller)
 	 */
-	value = SOR_STATE_ASY_PIXELDEPTH_BPP_24_444 |
-		SOR_STATE_ASY_VSYNCPOL |
+	value = SOR_STATE_ASY_VSYNCPOL |
 		SOR_STATE_ASY_HSYNCPOL |
 		SOR_STATE_ASY_PROTOCOL_DP_A |
 		SOR_STATE_ASY_CRC_MODE_COMPLETE |
 		SOR_STATE_ASY_OWNER(dc->pipe + 1);
+
+	switch (config.bits_per_pixel) {
+	case 24:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
+		break;
+
+	case 18:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+
 	tegra_sor_writel(sor, value, SOR_STATE_1);
 
 	/*
@@ -620,11 +863,8 @@
 	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
 	tegra_sor_writel(sor, value, SOR_HEAD_STATE_4(0));
 
-	/* XXX interlaced mode */
-	tegra_sor_writel(sor, 0x00000001, SOR_HEAD_STATE_5(0));
-
 	/* CSTM (LVDS, link A/B, upper) */
-	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_B | SOR_CSTM_LINK_ACT_B |
+	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
 		SOR_CSTM_UPPER;
 	tegra_sor_writel(sor, value, SOR_CSTM);
 
@@ -632,7 +872,7 @@
 	err = tegra_sor_setup_pwm(sor, 250);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to setup PWM: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
@@ -644,18 +884,20 @@
 	err = tegra_sor_attach(sor);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	err = tegra_sor_wakeup(sor);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable DC: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	sor->enabled = true;
 
-	return 0;
+unlock:
+	mutex_unlock(&sor->lock);
+	return err;
 }
 
 static int tegra_sor_detach(struct tegra_sor *sor)
@@ -740,7 +982,7 @@
 	tegra_sor_writel(sor, value, SOR_DP_PADCTL_0);
 
 	/* stop lane sequencer */
-	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN |
+	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
 		SOR_LANE_SEQ_CTL_POWER_STATE_DOWN;
 	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
 
@@ -783,15 +1025,17 @@
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct tegra_sor *sor = to_sor(output);
 	unsigned long value;
-	int err;
+	int err = 0;
+
+	mutex_lock(&sor->lock);
 
 	if (!sor->enabled)
-		return 0;
+		goto unlock;
 
 	err = tegra_sor_detach(sor);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	tegra_sor_writel(sor, 0, SOR_STATE_1);
@@ -832,21 +1076,21 @@
 	err = tegra_sor_power_down(sor);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	if (sor->dpaux) {
 		err = tegra_dpaux_disable(sor->dpaux);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to disable DP: %d\n", err);
-			return err;
+			goto unlock;
 		}
 	}
 
 	err = tegra_io_rail_power_off(TEGRA_IO_RAIL_LVDS);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to power off I/O rail: %d\n", err);
-		return err;
+		goto unlock;
 	}
 
 	reset_control_assert(sor->rst);
@@ -854,18 +1098,18 @@
 
 	sor->enabled = false;
 
-	return 0;
+unlock:
+	mutex_unlock(&sor->lock);
+	return err;
 }
 
 static int tegra_output_sor_setup_clock(struct tegra_output *output,
-					struct clk *clk, unsigned long pclk)
+					struct clk *clk, unsigned long pclk,
+					unsigned int *div)
 {
 	struct tegra_sor *sor = to_sor(output);
 	int err;
 
-	/* round to next MHz */
-	pclk = DIV_ROUND_UP(pclk / 2, 1000000) * 1000000;
-
 	err = clk_set_parent(clk, sor->clk_parent);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
@@ -874,11 +1118,12 @@
 
 	err = clk_set_rate(sor->clk_parent, pclk);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to set base clock rate to %lu Hz\n",
-			pclk * 2);
+		dev_err(sor->dev, "failed to set clock rate to %lu Hz\n", pclk);
 		return err;
 	}
 
+	*div = 0;
+
 	return 0;
 }
 
@@ -914,9 +1159,124 @@
 	.detect = tegra_output_sor_detect,
 };
 
+static int tegra_sor_crc_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return 0;
+}
+
+static int tegra_sor_crc_release(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static int tegra_sor_crc_wait(struct tegra_sor *sor, unsigned long timeout)
+{
+	u32 value;
+
+	timeout = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_sor_readl(sor, SOR_CRC_A);
+		if (value & SOR_CRC_A_VALID)
+			return 0;
+
+		usleep_range(100, 200);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static ssize_t tegra_sor_crc_read(struct file *file, char __user *buffer,
+				  size_t size, loff_t *ppos)
+{
+	struct tegra_sor *sor = file->private_data;
+	ssize_t num, err;
+	char buf[10];
+	u32 value;
+
+	mutex_lock(&sor->lock);
+
+	if (!sor->enabled) {
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	value = tegra_sor_readl(sor, SOR_STATE_1);
+	value &= ~SOR_STATE_ASY_CRC_MODE_MASK;
+	tegra_sor_writel(sor, value, SOR_STATE_1);
+
+	value = tegra_sor_readl(sor, SOR_CRC_CNTRL);
+	value |= SOR_CRC_CNTRL_ENABLE;
+	tegra_sor_writel(sor, value, SOR_CRC_CNTRL);
+
+	value = tegra_sor_readl(sor, SOR_TEST);
+	value &= ~SOR_TEST_CRC_POST_SERIALIZE;
+	tegra_sor_writel(sor, value, SOR_TEST);
+
+	err = tegra_sor_crc_wait(sor, 100);
+	if (err < 0)
+		goto unlock;
+
+	tegra_sor_writel(sor, SOR_CRC_A_RESET, SOR_CRC_A);
+	value = tegra_sor_readl(sor, SOR_CRC_B);
+
+	num = scnprintf(buf, sizeof(buf), "%08x\n", value);
+
+	err = simple_read_from_buffer(buffer, size, ppos, buf, num);
+
+unlock:
+	mutex_unlock(&sor->lock);
+	return err;
+}
+
+static const struct file_operations tegra_sor_crc_fops = {
+	.owner = THIS_MODULE,
+	.open = tegra_sor_crc_open,
+	.read = tegra_sor_crc_read,
+	.release = tegra_sor_crc_release,
+};
+
+static int tegra_sor_debugfs_init(struct tegra_sor *sor,
+				  struct drm_minor *minor)
+{
+	struct dentry *entry;
+	int err = 0;
+
+	sor->debugfs = debugfs_create_dir("sor", minor->debugfs_root);
+	if (!sor->debugfs)
+		return -ENOMEM;
+
+	entry = debugfs_create_file("crc", 0644, sor->debugfs, sor,
+				    &tegra_sor_crc_fops);
+	if (!entry) {
+		dev_err(sor->dev,
+			"cannot create /sys/kernel/debug/dri/%s/sor/crc\n",
+			minor->debugfs_root->d_name.name);
+		err = -ENOMEM;
+		goto remove;
+	}
+
+	return err;
+
+remove:
+	debugfs_remove(sor->debugfs);
+	sor->debugfs = NULL;
+	return err;
+}
+
+static int tegra_sor_debugfs_exit(struct tegra_sor *sor)
+{
+	debugfs_remove_recursive(sor->debugfs);
+	sor->debugfs = NULL;
+
+	return 0;
+}
+
 static int tegra_sor_init(struct host1x_client *client)
 {
-	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int err;
 
@@ -928,12 +1288,18 @@
 	sor->output.dev = sor->dev;
 	sor->output.ops = &sor_ops;
 
-	err = tegra_output_init(tegra->drm, &sor->output);
+	err = tegra_output_init(drm, &sor->output);
 	if (err < 0) {
 		dev_err(sor->dev, "output setup failed: %d\n", err);
 		return err;
 	}
 
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_sor_debugfs_init(sor, drm->primary);
+		if (err < 0)
+			dev_err(sor->dev, "debugfs setup failed: %d\n", err);
+	}
+
 	if (sor->dpaux) {
 		err = tegra_dpaux_attach(sor->dpaux, &sor->output);
 		if (err < 0) {
@@ -964,6 +1330,12 @@
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_sor_debugfs_exit(sor);
+		if (err < 0)
+			dev_err(sor->dev, "debugfs cleanup failed: %d\n", err);
+	}
+
 	err = tegra_output_exit(&sor->output);
 	if (err < 0) {
 		dev_err(sor->dev, "output cleanup failed: %d\n", err);
@@ -1045,6 +1417,8 @@
 	sor->client.ops = &sor_client_ops;
 	sor->client.dev = &pdev->dev;
 
+	mutex_init(&sor->lock);
+
 	err = host1x_client_register(&sor->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",

diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index f4156d5..a5f8853 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h

@@ -47,6 +47,7 @@
 #define SOR_HEAD_STATE_4(x) (0x0d + (x))
 #define SOR_HEAD_STATE_5(x) (0x0f + (x))
 #define SOR_CRC_CNTRL 0x11
+#define  SOR_CRC_CNTRL_ENABLE			(1 << 0)
 #define SOR_DP_DEBUG_MVID 0x12
 
 #define SOR_CLK_CNTRL 0x13
@@ -69,6 +70,7 @@
 #define  SOR_PWR_NORMAL_STATE_PU		(1 << 0)
 
 #define SOR_TEST 0x16
+#define  SOR_TEST_CRC_POST_SERIALIZE		(1 << 23)
 #define  SOR_TEST_ATTACHED			(1 << 10)
 #define  SOR_TEST_HEAD_MODE_MASK		(3 << 8)
 #define  SOR_TEST_HEAD_MODE_AWAKE		(2 << 8)
@@ -115,6 +117,8 @@
 
 #define SOR_LVDS 0x1c
 #define SOR_CRC_A 0x1d
+#define  SOR_CRC_A_VALID			(1 << 0)
+#define  SOR_CRC_A_RESET			(1 << 0)
 #define SOR_CRC_B 0x1e
 #define SOR_BLANK 0x1f
 #define SOR_SEQ_CTL 0x20

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 171a820..b20b694 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c

@@ -268,7 +268,7 @@
 	}
 
 	pm_runtime_get_sync(dev->dev);
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	pm_runtime_put_sync(dev->dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to install IRQ handler\n");

diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index afdf383..7094b92 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c

@@ -294,6 +294,7 @@
 	dev->dev_private = udl;
 
 	if (!udl_parse_vendor_descriptor(dev, dev->usbdev)) {
+		ret = -ENODEV;
 		DRM_ERROR("firmware not recognized. Assume incompatible device\n");
 		goto err;
 	}

diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index a18479c..6fc0648 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c

@@ -737,4 +737,4 @@
 	DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
 };
 
-int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);
+int via_max_ioctl = ARRAY_SIZE(via_ioctls);

diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index 9278891..d70b1e1 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c

@@ -79,7 +79,7 @@
 
 	/* Linux specific until context tracking code gets ported to BSD */
 	/* Last context, perform cleanup */
-	if (list_is_singular(&dev->ctxlist) && dev->dev_private) {
+	if (list_is_singular(&dev->ctxlist)) {
 		DRM_DEBUG("Last Context\n");
 		drm_irq_uninstall(dev);
 		via_cleanup_futex(dev_priv);

diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index b71bcd0..67720f7 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig

@@ -1,11 +1,14 @@
 config DRM_VMWGFX
 	tristate "DRM driver for VMware Virtual GPU"
-	depends on DRM && PCI && FB
+	depends on DRM && PCI
 	select FB_DEFERRED_IO
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
 	select DRM_TTM
+	# Only needed for the transitional use of drm_crtc_init - can be removed
+	# again once vmwgfx sets up the primary plane itself.
+	select DRM_KMS_HELPER
 	help
 	  Choose this option if you would like to run 3D acceleration
 	  in a VMware virtual machine.
@@ -14,7 +17,7 @@
 	  The compiled module will be called "vmwgfx.ko".
 
 config DRM_VMWGFX_FBCON
-	depends on DRM_VMWGFX
+	depends on DRM_VMWGFX && FB
 	bool "Enable framebuffer console under vmwgfx by default"
 	help
 	   Choose this option if you are shipping a new vmwgfx

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4a223bb..246a62b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c

@@ -806,7 +806,7 @@
 	}
 
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
-		ret = drm_irq_install(dev);
+		ret = drm_irq_install(dev, dev->pdev->irq);
 		if (ret != 0) {
 			DRM_ERROR("Failed installing irq: %d\n", ret);
 			goto out_no_irq;
@@ -1417,7 +1417,7 @@
 	.enable_vblank = vmw_enable_vblank,
 	.disable_vblank = vmw_disable_vblank,
 	.ioctls = vmw_ioctls,
-	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
+	.num_ioctls = ARRAY_SIZE(vmw_ioctls),
 	.master_create = vmw_master_create,
 	.master_destroy = vmw_master_destroy,
 	.master_set = vmw_master_set,

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index a2dde5a..8f3edc4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c

@@ -187,7 +187,7 @@
 	 * can do this since the caller in the drm core doesn't check anything
 	 * which is protected by any looks.
 	 */
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 	drm_modeset_lock_all(dev_priv->dev);
 
 	/* A lot of the code assumes this */
@@ -252,7 +252,7 @@
 	ret = 0;
 out:
 	drm_modeset_unlock_all(dev_priv->dev);
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 
 	return ret;
 }
@@ -273,7 +273,7 @@
 	 * can do this since the caller in the drm core doesn't check anything
 	 * which is protected by any looks.
 	 */
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 	drm_modeset_lock_all(dev_priv->dev);
 
 	vmw_cursor_update_position(dev_priv, shown,
@@ -281,7 +281,7 @@
 				   du->cursor_y + du->hotspot_y);
 
 	drm_modeset_unlock_all(dev_priv->dev);
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 
 	return 0;
 }
@@ -2001,7 +2001,7 @@
 	if (du->pref_mode)
 		list_move(&du->pref_mode->head, &connector->probed_modes);
 
-	drm_mode_connector_list_update(connector);
+	drm_mode_connector_list_update(connector, true);
 
 	return 1;
 }

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index ccdd2e6..aaf5485 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c

@@ -216,8 +216,8 @@
 }
 EXPORT_SYMBOL(host1x_device_exit);
 
-static int host1x_register_client(struct host1x *host1x,
-				  struct host1x_client *client)
+static int host1x_add_client(struct host1x *host1x,
+			     struct host1x_client *client)
 {
 	struct host1x_device *device;
 	struct host1x_subdev *subdev;
@@ -238,8 +238,8 @@
 	return -ENODEV;
 }
 
-static int host1x_unregister_client(struct host1x *host1x,
-				    struct host1x_client *client)
+static int host1x_del_client(struct host1x *host1x,
+			     struct host1x_client *client)
 {
 	struct host1x_device *device, *dt;
 	struct host1x_subdev *subdev;
@@ -503,7 +503,7 @@
 	mutex_lock(&devices_lock);
 
 	list_for_each_entry(host1x, &devices, list) {
-		err = host1x_register_client(host1x, client);
+		err = host1x_add_client(host1x, client);
 		if (!err) {
 			mutex_unlock(&devices_lock);
 			return 0;
@@ -529,7 +529,7 @@
 	mutex_lock(&devices_lock);
 
 	list_for_each_entry(host1x, &devices, list) {
-		err = host1x_unregister_client(host1x, client);
+		err = host1x_del_client(host1x, client);
 		if (!err) {
 			mutex_unlock(&devices_lock);
 			return 0;

diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig
new file mode 100644
index 0000000..2f228a2
--- /dev/null
+++ b/drivers/gpu/ipu-v3/Kconfig

@@ -0,0 +1,7 @@
+config IMX_IPUV3_CORE
+	tristate "IPUv3 core support"
+	depends on SOC_IMX5 || SOC_IMX6Q || SOC_IMX6SL || ARCH_MULTIPLATFORM
+	depends on RESET_CONTROLLER
+	help
+	  Choose this if you have a i.MX5/6 system and want to use the Image
+	  Processing Unit. This option only enables IPU base support.

diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile
new file mode 100644
index 0000000..1887972b
--- /dev/null
+++ b/drivers/gpu/ipu-v3/Makefile

@@ -0,0 +1,3 @@
+obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o
+
+imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o ipu-smfc.o

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
similarity index 93%
rename from drivers/staging/imx-drm/ipu-v3/ipu-common.c
rename to drivers/gpu/ipu-v3/ipu-common.c
index a1f7b20..04e7b2e 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c

@@ -31,7 +31,7 @@
 
 #include <drm/drm_fourcc.h>
 
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "ipu-prv.h"
 
 static inline u32 ipu_cm_read(struct ipu_soc *ipu, unsigned offset)
@@ -661,6 +661,39 @@
 }
 EXPORT_SYMBOL_GPL(ipu_module_disable);
 
+int ipu_csi_enable(struct ipu_soc *ipu, int csi)
+{
+	return ipu_module_enable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_csi_enable);
+
+int ipu_csi_disable(struct ipu_soc *ipu, int csi)
+{
+	return ipu_module_disable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_csi_disable);
+
+int ipu_smfc_enable(struct ipu_soc *ipu)
+{
+	return ipu_module_enable(ipu, IPU_CONF_SMFC_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_enable);
+
+int ipu_smfc_disable(struct ipu_soc *ipu)
+{
+	return ipu_module_disable(ipu, IPU_CONF_SMFC_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_disable);
+
+int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	unsigned int chno = channel->num;
+
+	return (ipu_cm_read(ipu, IPU_CHA_CUR_BUF(chno)) & idma_mask(chno)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_get_current_buffer);
+
 void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num)
 {
 	struct ipu_soc *ipu = channel->ipu;
@@ -896,8 +929,17 @@
 		goto err_dp;
 	}
 
+	ret = ipu_smfc_init(ipu, dev, ipu_base +
+			devtype->cm_ofs + IPU_CM_SMFC_REG_OFS);
+	if (ret) {
+		unit = "smfc";
+		goto err_smfc;
+	}
+
 	return 0;
 
+err_smfc:
+	ipu_dp_exit(ipu);
 err_dp:
 	ipu_dmfc_exit(ipu);
 err_dmfc:
@@ -977,6 +1019,7 @@
 
 static void ipu_submodules_exit(struct ipu_soc *ipu)
 {
+	ipu_smfc_exit(ipu);
 	ipu_dp_exit(ipu);
 	ipu_dmfc_exit(ipu);
 	ipu_dc_exit(ipu);
@@ -1001,6 +1044,7 @@
 struct ipu_platform_reg {
 	struct ipu_client_platformdata pdata;
 	const char *name;
+	int reg_offset;
 };
 
 static const struct ipu_platform_reg client_reg[] = {
@@ -1022,13 +1066,29 @@
 			.dma[1] = -EINVAL,
 		},
 		.name = "imx-ipuv3-crtc",
+	}, {
+		.pdata = {
+			.csi = 0,
+			.dma[0] = IPUV3_CHANNEL_CSI0,
+			.dma[1] = -EINVAL,
+		},
+		.reg_offset = IPU_CM_CSI0_REG_OFS,
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
+			.csi = 1,
+			.dma[0] = IPUV3_CHANNEL_CSI1,
+			.dma[1] = -EINVAL,
+		},
+		.reg_offset = IPU_CM_CSI1_REG_OFS,
+		.name = "imx-ipuv3-camera",
 	},
 };
 
 static DEFINE_MUTEX(ipu_client_id_mutex);
 static int ipu_client_id;
 
-static int ipu_add_client_devices(struct ipu_soc *ipu)
+static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 {
 	struct device *dev = ipu->dev;
 	unsigned i;
@@ -1042,9 +1102,19 @@
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
+		struct resource res;
 
-		pdev = platform_device_register_data(dev, reg->name,
-			id++, &reg->pdata, sizeof(reg->pdata));
+		if (reg->reg_offset) {
+			memset(&res, 0, sizeof(res));
+			res.flags = IORESOURCE_MEM;
+			res.start = ipu_base + ipu->devtype->cm_ofs + reg->reg_offset;
+			res.end = res.start + PAGE_SIZE - 1;
+			pdev = platform_device_register_resndata(dev, reg->name,
+				id++, &res, 1, &reg->pdata, sizeof(reg->pdata));
+		} else {
+			pdev = platform_device_register_data(dev, reg->name,
+				id++, &reg->pdata, sizeof(reg->pdata));
+		}
 
 		if (IS_ERR(pdev))
 			goto err_register;
@@ -1241,7 +1311,7 @@
 	if (ret)
 		goto failed_submodules_init;
 
-	ret = ipu_add_client_devices(ipu);
+	ret = ipu_add_client_devices(ipu, ipu_base);
 	if (ret) {
 		dev_err(&pdev->dev, "adding client devices failed with %d\n",
 				ret);

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c
similarity index 99%
rename from drivers/staging/imx-drm/ipu-v3/ipu-dc.c
rename to drivers/gpu/ipu-v3/ipu-dc.c
index 784a4a1..2326c75 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dc.c
+++ b/drivers/gpu/ipu-v3/ipu-dc.c

@@ -21,8 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 
-#include "../imx-drm.h"
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "ipu-prv.h"
 
 #define DC_MAP_CONF_PTR(n)	(0x108 + ((n) & ~0x1) * 2)

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
similarity index 99%
rename from drivers/staging/imx-drm/ipu-v3/ipu-di.c
rename to drivers/gpu/ipu-v3/ipu-di.c
index 849b3e1..c490ba4 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-di.c
+++ b/drivers/gpu/ipu-v3/ipu-di.c

@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "ipu-prv.h"
 
 struct ipu_di {

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c b/drivers/gpu/ipu-v3/ipu-dmfc.c
similarity index 99%
rename from drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
rename to drivers/gpu/ipu-v3/ipu-dmfc.c
index 59f182b..042c395 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
+++ b/drivers/gpu/ipu-v3/ipu-dmfc.c

@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "ipu-prv.h"
 
 #define DMFC_RD_CHAN		0x0000

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c
similarity index 99%
rename from drivers/staging/imx-drm/ipu-v3/ipu-dp.c
rename to drivers/gpu/ipu-v3/ipu-dp.c
index d90f82a..98686ed 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dp.c
+++ b/drivers/gpu/ipu-v3/ipu-dp.c

@@ -19,7 +19,7 @@
 #include <linux/io.h>
 #include <linux/err.h>
 
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "ipu-prv.h"
 
 #define DP_SYNC 0

diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h
similarity index 96%
rename from drivers/staging/imx-drm/ipu-v3/ipu-prv.h
rename to drivers/gpu/ipu-v3/ipu-prv.h
index bfc1b33..c93f50e 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
+++ b/drivers/gpu/ipu-v3/ipu-prv.h

@@ -22,7 +22,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 
-#include "imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 
 #define IPUV3_CHANNEL_CSI0			 0
 #define IPUV3_CHANNEL_CSI1			 1
@@ -151,6 +151,8 @@
 struct ipu_dc_priv;
 struct ipu_dmfc_priv;
 struct ipu_di;
+struct ipu_smfc_priv;
+
 struct ipu_devtype;
 
 struct ipu_soc {
@@ -178,6 +180,7 @@
 	struct ipu_dp_priv	*dp_priv;
 	struct ipu_dmfc_priv	*dmfc_priv;
 	struct ipu_di		*di_priv[2];
+	struct ipu_smfc_priv	*smfc_priv;
 };
 
 void ipu_srm_dp_sync_update(struct ipu_soc *ipu);
@@ -206,4 +209,7 @@
 int ipu_cpmem_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
 void ipu_cpmem_exit(struct ipu_soc *ipu);
 
+int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
+void ipu_smfc_exit(struct ipu_soc *ipu);
+
 #endif				/* __IPU_PRV_H__ */

diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c
new file mode 100644
index 0000000..e4f85ad
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-smfc.c

@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008-2010 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#define DEBUG
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <video/imx-ipu-v3.h>
+
+#include "ipu-prv.h"
+
+struct ipu_smfc_priv {
+	void __iomem *base;
+	spinlock_t lock;
+};
+
+/*SMFC Registers */
+#define SMFC_MAP	0x0000
+#define SMFC_WMC	0x0004
+#define SMFC_BS		0x0008
+
+int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize)
+{
+	struct ipu_smfc_priv *smfc = ipu->smfc_priv;
+	unsigned long flags;
+	u32 val, shift;
+
+	spin_lock_irqsave(&smfc->lock, flags);
+
+	shift = channel * 4;
+	val = readl(smfc->base + SMFC_BS);
+	val &= ~(0xf << shift);
+	val |= burstsize << shift;
+	writel(val, smfc->base + SMFC_BS);
+
+	spin_unlock_irqrestore(&smfc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_set_burstsize);
+
+int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id)
+{
+	struct ipu_smfc_priv *smfc = ipu->smfc_priv;
+	unsigned long flags;
+	u32 val, shift;
+
+	spin_lock_irqsave(&smfc->lock, flags);
+
+	shift = channel * 3;
+	val = readl(smfc->base + SMFC_MAP);
+	val &= ~(0x7 << shift);
+	val |= ((csi_id << 2) | mipi_id) << shift;
+	writel(val, smfc->base + SMFC_MAP);
+
+	spin_unlock_irqrestore(&smfc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_map_channel);
+
+int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev,
+		  unsigned long base)
+{
+	struct ipu_smfc_priv *smfc;
+
+	smfc = devm_kzalloc(dev, sizeof(*smfc), GFP_KERNEL);
+	if (!smfc)
+		return -ENOMEM;
+
+	ipu->smfc_priv = smfc;
+	spin_lock_init(&smfc->lock);
+
+	smfc->base = devm_ioremap(dev, base, PAGE_SIZE);
+	if (!smfc->base)
+		return -ENOMEM;
+
+	pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, smfc->base);
+
+	return 0;
+}
+
+void ipu_smfc_exit(struct ipu_soc *ipu)
+{
+}

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index ec0ae2d..6866448 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c

@@ -623,7 +623,8 @@
 	ret = dev->bus->pm->runtime_suspend(dev);
 	if (ret)
 		return ret;
-
+	if (vgasr_priv.handler->switchto)
+		vgasr_priv.handler->switchto(VGA_SWITCHEROO_IGD);
 	vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_OFF);
 	return 0;
 }

diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 24883b4..cc2bd20 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c

@@ -52,7 +52,7 @@
 static ssize_t lg4ff_range_show(struct device *dev, struct device_attribute *attr, char *buf);
 static ssize_t lg4ff_range_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count);
 
-static DEVICE_ATTR(range, S_IRWXU | S_IRWXG | S_IRWXO, lg4ff_range_show, lg4ff_range_store);
+static DEVICE_ATTR(range, S_IRWXU | S_IRWXG | S_IROTH, lg4ff_range_show, lg4ff_range_store);
 
 struct lg4ff_device_entry {
 	__u32 product_id;

diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
index c930ab8..7f965e2 100644
--- a/drivers/hid/hid-picolcd_fb.c
+++ b/drivers/hid/hid-picolcd_fb.c

@@ -501,7 +501,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(fb_update_rate, 0666, picolcd_fb_update_rate_show,
+static DEVICE_ATTR(fb_update_rate, 0664, picolcd_fb_update_rate_show,
 		picolcd_fb_update_rate_store);
 
 /* initialize Framebuffer device */

diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig
index 71b9f9a..bc60dec 100644
--- a/drivers/hsi/clients/Kconfig
+++ b/drivers/hsi/clients/Kconfig

@@ -15,7 +15,7 @@
 
 config SSI_PROTOCOL
 	tristate "SSI protocol"
-	depends on HSI && PHONET && (OMAP_SSI=y || OMAP_SSI=m)
+	depends on HSI && PHONET && OMAP_SSI
 	help
 	If you say Y here, you will enable the SSI protocol aka McSAAB.
 

diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c
index b8693f0..29aea0b 100644
--- a/drivers/hsi/controllers/omap_ssi_port.c
+++ b/drivers/hsi/controllers/omap_ssi_port.c

@@ -1116,8 +1116,7 @@
 
 	dev_dbg(&pd->dev, "init ssi port...\n");
 
-	err = ref_module(THIS_MODULE, ssi->owner);
-	if (err) {
+	if (!try_module_get(ssi->owner)) {
 		dev_err(&pd->dev, "could not increment parent module refcount (err=%d)\n",
 			err);
 		return -ENODEV;
@@ -1254,6 +1253,7 @@
 
 	omap_ssi->port[omap_port->port_id] = NULL;
 	platform_set_drvdata(pd, NULL);
+	module_put(ssi->owner);
 	pm_runtime_disable(&pd->dev);
 
 	return 0;

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6c8b032c..ed9350d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c

@@ -404,7 +404,7 @@
  * performance critical channels (IDE, SCSI and Network) will be uniformly
  * distributed across all available CPUs.
  */
-static void init_vp_index(struct vmbus_channel *channel, uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
 {
 	u32 cur_cpu;
 	int i;

diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 18d1a84..22b7507 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h

@@ -649,9 +649,9 @@
 
 /* General vmbus interface */
 
-struct hv_device *vmbus_device_create(uuid_le *type,
-					 uuid_le *instance,
-					 struct vmbus_channel *channel);
+struct hv_device *vmbus_device_create(const uuid_le *type,
+				      const uuid_le *instance,
+				      struct vmbus_channel *channel);
 
 int vmbus_device_register(struct hv_device *child_device_obj);
 void vmbus_device_unregister(struct hv_device *device_obj);

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8e53a3c..4d6b269 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c

@@ -435,7 +435,7 @@
 	return ret;
 }
 
-static uuid_le null_guid;
+static const uuid_le null_guid;
 
 static inline bool is_null_guid(const __u8 *guid)
 {
@@ -450,7 +450,7 @@
  */
 static const struct hv_vmbus_device_id *hv_vmbus_get_id(
 					const struct hv_vmbus_device_id *id,
-					__u8 *guid)
+					const __u8 *guid)
 {
 	for (; !is_null_guid(id->guid); id++)
 		if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
@@ -779,9 +779,9 @@
  * vmbus_device_create - Creates and registers a new child device
  * on the vmbus.
  */
-struct hv_device *vmbus_device_create(uuid_le *type,
-					    uuid_le *instance,
-					    struct vmbus_channel *channel)
+struct hv_device *vmbus_device_create(const uuid_le *type,
+				      const uuid_le *instance,
+				      struct vmbus_channel *channel)
 {
 	struct hv_device *child_device_obj;
 

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0034316..08531a1 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig

@@ -1124,6 +1124,16 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called sht21.
 
+config SENSORS_SHTC1
+	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
+	depends on I2C
+	help
+	  If you say yes here you get support for the Sensiron SHTC1 and SHTW1
+	  humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called shtc1.
+
 config SENSORS_S3C
 	tristate "Samsung built-in ADC"
 	depends on S3C_ADC

diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 11798ad..3dc0f02 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile

@@ -126,6 +126,7 @@
 obj-$(CONFIG_SENSORS_SCH5636)	+= sch5636.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
+obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o

diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index 6edce42..2ae8a30 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c

@@ -45,30 +45,6 @@
 
 static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END };
 
-static int atxp1_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id);
-static int atxp1_remove(struct i2c_client *client);
-static struct atxp1_data *atxp1_update_device(struct device *dev);
-static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info);
-
-static const struct i2c_device_id atxp1_id[] = {
-	{ "atxp1", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, atxp1_id);
-
-static struct i2c_driver atxp1_driver = {
-	.class		= I2C_CLASS_HWMON,
-	.driver = {
-		.name	= "atxp1",
-	},
-	.probe		= atxp1_probe,
-	.remove		= atxp1_remove,
-	.id_table	= atxp1_id,
-	.detect		= atxp1_detect,
-	.address_list	= normal_i2c,
-};
-
 struct atxp1_data {
 	struct device *hwmon_dev;
 	struct mutex update_lock;
@@ -386,4 +362,22 @@
 	return 0;
 };
 
+static const struct i2c_device_id atxp1_id[] = {
+	{ "atxp1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, atxp1_id);
+
+static struct i2c_driver atxp1_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "atxp1",
+	},
+	.probe		= atxp1_probe,
+	.remove		= atxp1_remove,
+	.id_table	= atxp1_id,
+	.detect		= atxp1_detect,
+	.address_list	= normal_i2c,
+};
+
 module_i2c_driver(atxp1_driver);

diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index 93d26e8..bfd3f3e 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c

@@ -148,7 +148,8 @@
 
 	switch (reg) {
 	case INA2XX_SHUNT_VOLTAGE:
-		val = DIV_ROUND_CLOSEST(data->regs[reg],
+		/* signed register */
+		val = DIV_ROUND_CLOSEST((s16)data->regs[reg],
 					data->config->shunt_div);
 		break;
 	case INA2XX_BUS_VOLTAGE:
@@ -160,8 +161,8 @@
 		val = data->regs[reg] * data->config->power_lsb;
 		break;
 	case INA2XX_CURRENT:
-		/* LSB=1mA (selected). Is in mA */
-		val = data->regs[reg];
+		/* signed register, LSB=1mA (selected), in mA */
+		val = (s16)data->regs[reg];
 		break;
 	default:
 		/* programmer goofed */

diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index bed4af35..b0129a5 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c

@@ -5,7 +5,7 @@
  * Copyright (c) 2002, 2003  Philip Pokorny <ppokorny@penguincomputing.com>
  * Copyright (c) 2003        Margit Schubert-While <margitsw@t-online.de>
  * Copyright (c) 2004        Justin Thiessen <jthiessen@penguincomputing.com>
- * Copyright (C) 2007--2009  Jean Delvare <jdelvare@suse.de>
+ * Copyright (C) 2007--2014  Jean Delvare <jdelvare@suse.de>
  *
  * Chip details at	      <http://www.national.com/ds/LM/LM85.pdf>
  *
@@ -39,7 +39,7 @@
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 enum chips {
-	any_chip, lm85b, lm85c,
+	lm85,
 	adm1027, adt7463, adt7468,
 	emc6d100, emc6d102, emc6d103, emc6d103s
 };
@@ -75,9 +75,6 @@
 #define LM85_COMPANY_NATIONAL		0x01
 #define LM85_COMPANY_ANALOG_DEV		0x41
 #define LM85_COMPANY_SMSC		0x5c
-#define LM85_VERSTEP_VMASK              0xf0
-#define LM85_VERSTEP_GENERIC		0x60
-#define LM85_VERSTEP_GENERIC2		0x70
 #define LM85_VERSTEP_LM85C		0x60
 #define LM85_VERSTEP_LM85B		0x62
 #define LM85_VERSTEP_LM96000_1		0x68
@@ -351,9 +348,9 @@
 	{ "adm1027", adm1027 },
 	{ "adt7463", adt7463 },
 	{ "adt7468", adt7468 },
-	{ "lm85", any_chip },
-	{ "lm85b", lm85b },
-	{ "lm85c", lm85c },
+	{ "lm85", lm85 },
+	{ "lm85b", lm85 },
+	{ "lm85c", lm85 },
 	{ "emc6d100", emc6d100 },
 	{ "emc6d101", emc6d100 },
 	{ "emc6d102", emc6d102 },
@@ -1281,7 +1278,7 @@
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int address = client->addr;
-	const char *type_name;
+	const char *type_name = NULL;
 	int company, verstep;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
@@ -1297,16 +1294,6 @@
 		"Detecting device at 0x%02x with COMPANY: 0x%02x and VERSTEP: 0x%02x\n",
 		address, company, verstep);
 
-	/* All supported chips have the version in common */
-	if ((verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC &&
-	    (verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC2) {
-		dev_dbg(&adapter->dev,
-			"Autodetection failed: unsupported version\n");
-		return -ENODEV;
-	}
-	type_name = "lm85";
-
-	/* Now, refine the detection */
 	if (company == LM85_COMPANY_NATIONAL) {
 		switch (verstep) {
 		case LM85_VERSTEP_LM85C:
@@ -1323,6 +1310,7 @@
 					"Found Winbond WPCD377I, ignoring\n");
 				return -ENODEV;
 			}
+			type_name = "lm85";
 			break;
 		}
 	} else if (company == LM85_COMPANY_ANALOG_DEV) {
@@ -1357,12 +1345,11 @@
 			type_name = "emc6d103s";
 			break;
 		}
-	} else {
-		dev_dbg(&adapter->dev,
-			"Autodetection failed: unknown vendor\n");
-		return -ENODEV;
 	}
 
+	if (!type_name)
+		return -ENODEV;
+
 	strlcpy(info->type, type_name, I2C_NAME_SIZE);
 
 	return 0;

diff --git a/drivers/hwmon/ltc4151.c b/drivers/hwmon/ltc4151.c
index af81be1..c86a184 100644
--- a/drivers/hwmon/ltc4151.c
+++ b/drivers/hwmon/ltc4151.c

@@ -47,7 +47,7 @@
 #define LTC4151_ADIN_L	0x05
 
 struct ltc4151_data {
-	struct device *hwmon_dev;
+	struct i2c_client *client;
 
 	struct mutex update_lock;
 	bool valid;
@@ -59,8 +59,8 @@
 
 static struct ltc4151_data *ltc4151_update_device(struct device *dev)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ltc4151_data *data = i2c_get_clientdata(client);
+	struct ltc4151_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
 	struct ltc4151_data *ret = data;
 
 	mutex_lock(&data->update_lock);
@@ -159,7 +159,7 @@
  * Finally, construct an array of pointers to members of the above objects,
  * as required for sysfs_create_group()
  */
-static struct attribute *ltc4151_attributes[] = {
+static struct attribute *ltc4151_attrs[] = {
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in2_input.dev_attr.attr,
 
@@ -167,54 +167,30 @@
 
 	NULL,
 };
-
-static const struct attribute_group ltc4151_group = {
-	.attrs = ltc4151_attributes,
-};
+ATTRIBUTE_GROUPS(ltc4151);
 
 static int ltc4151_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = client->adapter;
+	struct device *dev = &client->dev;
 	struct ltc4151_data *data;
-	int ret;
+	struct device *hwmon_dev;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -ENODEV;
 
-	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, data);
+	data->client = client;
 	mutex_init(&data->update_lock);
 
-	/* Register sysfs hooks */
-	ret = sysfs_create_group(&client->dev.kobj, &ltc4151_group);
-	if (ret)
-		return ret;
-
-	data->hwmon_dev = hwmon_device_register(&client->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto out_hwmon_device_register;
-	}
-
-	return 0;
-
-out_hwmon_device_register:
-	sysfs_remove_group(&client->dev.kobj, &ltc4151_group);
-	return ret;
-}
-
-static int ltc4151_remove(struct i2c_client *client)
-{
-	struct ltc4151_data *data = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	sysfs_remove_group(&client->dev.kobj, &ltc4151_group);
-
-	return 0;
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+							   data,
+							   ltc4151_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static const struct i2c_device_id ltc4151_id[] = {
@@ -229,7 +205,6 @@
 		.name	= "ltc4151",
 	},
 	.probe		= ltc4151_probe,
-	.remove		= ltc4151_remove,
 	.id_table	= ltc4151_id,
 };
 

diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
new file mode 100644
index 0000000..decd7df
--- /dev/null
+++ b/drivers/hwmon/shtc1.c

@@ -0,0 +1,251 @@
+/* Sensirion SHTC1 humidity and temperature sensor driver
+ *
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_data/shtc1.h>
+
+/* commands (high precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_hpm[]    = { 0x7C, 0xA2 };
+static const unsigned char shtc1_cmd_measure_nonblocking_hpm[] = { 0x78, 0x66 };
+
+/* commands (low precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_lpm[]    = { 0x64, 0x58 };
+static const unsigned char shtc1_cmd_measure_nonblocking_lpm[] = { 0x60, 0x9c };
+
+/* command for reading the ID register */
+static const unsigned char shtc1_cmd_read_id_reg[]	       = { 0xef, 0xc8 };
+
+/* constants for reading the ID register */
+#define SHTC1_ID	  0x07
+#define SHTC1_ID_REG_MASK 0x1f
+
+/* delays for non-blocking i2c commands, both in us */
+#define SHTC1_NONBLOCKING_WAIT_TIME_HPM  14400
+#define SHTC1_NONBLOCKING_WAIT_TIME_LPM   1000
+
+#define SHTC1_CMD_LENGTH      2
+#define SHTC1_RESPONSE_LENGTH 6
+
+struct shtc1_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+	bool valid;
+	unsigned long last_updated; /* in jiffies */
+
+	const unsigned char *command;
+	unsigned int nonblocking_wait_time; /* in us */
+
+	struct shtc1_platform_data setup;
+
+	int temperature; /* 1000 * temperature in dgr C */
+	int humidity; /* 1000 * relative humidity in %RH */
+};
+
+static int shtc1_update_values(struct i2c_client *client,
+			       struct shtc1_data *data,
+			       char *buf, int bufsize)
+{
+	int ret = i2c_master_send(client, data->command, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(&client->dev, "failed to send command: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	/*
+	 * In blocking mode (clock stretching mode) the I2C bus
+	 * is blocked for other traffic, thus the call to i2c_master_recv()
+	 * will wait until the data is ready. For non blocking mode, we
+	 * have to wait ourselves.
+	 */
+	if (!data->setup.blocking_io)
+		usleep_range(data->nonblocking_wait_time,
+			     data->nonblocking_wait_time + 1000);
+
+	ret = i2c_master_recv(client, buf, bufsize);
+	if (ret != bufsize) {
+		dev_err(&client->dev, "failed to read values: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+/* sysfs attributes */
+static struct shtc1_data *shtc1_update_client(struct device *dev)
+{
+	struct shtc1_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	unsigned char buf[SHTC1_RESPONSE_LENGTH];
+	int val;
+	int ret = 0;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) {
+		ret = shtc1_update_values(client, data, buf, sizeof(buf));
+		if (ret)
+			goto out;
+
+		/*
+		 * From datasheet:
+		 * T = -45 + 175 * ST / 2^16
+		 * RH = 100 * SRH / 2^16
+		 *
+		 * Adapted for integer fixed point (3 digit) arithmetic.
+		 */
+		val = be16_to_cpup((__be16 *)buf);
+		data->temperature = ((21875 * val) >> 13) - 45000;
+		val = be16_to_cpup((__be16 *)(buf + 3));
+		data->humidity = ((12500 * val) >> 13);
+
+		data->last_updated = jiffies;
+		data->valid = true;
+	}
+
+out:
+	mutex_unlock(&data->update_lock);
+
+	return ret == 0 ? data : ERR_PTR(ret);
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->temperature);
+}
+
+static ssize_t humidity1_input_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->humidity);
+}
+
+static DEVICE_ATTR_RO(temp1_input);
+static DEVICE_ATTR_RO(humidity1_input);
+
+static struct attribute *shtc1_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&dev_attr_humidity1_input.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(shtc1);
+
+static void shtc1_select_command(struct shtc1_data *data)
+{
+	if (data->setup.high_precision) {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_hpm :
+				shtc1_cmd_measure_nonblocking_hpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_HPM;
+
+	} else {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_lpm :
+				shtc1_cmd_measure_nonblocking_lpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_LPM;
+	}
+}
+
+static int shtc1_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	int ret;
+	char id_reg[2];
+	struct shtc1_data *data;
+	struct device *hwmon_dev;
+	struct i2c_adapter *adap = client->adapter;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) {
+		dev_err(dev, "plain i2c transactions not supported\n");
+		return -ENODEV;
+	}
+
+	ret = i2c_master_send(client, shtc1_cmd_read_id_reg, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(dev, "could not send read_id_reg command: %d\n", ret);
+		return ret < 0 ? ret : -ENODEV;
+	}
+	ret = i2c_master_recv(client, id_reg, sizeof(id_reg));
+	if (ret != sizeof(id_reg)) {
+		dev_err(dev, "could not read ID register: %d\n", ret);
+		return -ENODEV;
+	}
+	if ((id_reg[1] & SHTC1_ID_REG_MASK) != SHTC1_ID) {
+		dev_err(dev, "ID register doesn't match\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->setup.blocking_io = false;
+	data->setup.high_precision = true;
+	data->client = client;
+
+	if (client->dev.platform_data)
+		data->setup = *(struct shtc1_platform_data *)dev->platform_data;
+	shtc1_select_command(data);
+	mutex_init(&data->update_lock);
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   data,
+							   shtc1_groups);
+	if (IS_ERR(hwmon_dev))
+		dev_dbg(dev, "unable to register hwmon device\n");
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/* device ID table */
+static const struct i2c_device_id shtc1_id[] = {
+	{ "shtc1", 0 },
+	{ "shtw1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, shtc1_id);
+
+static struct i2c_driver shtc1_i2c_driver = {
+	.driver.name  = "shtc1",
+	.probe        = shtc1_probe,
+	.id_table     = shtc1_id,
+};
+
+module_i2c_driver(shtc1_i2c_driver);
+
+MODULE_AUTHOR("Johannes Winkelmann <johannes.winkelmann@sensirion.com>");
+MODULE_DESCRIPTION("Sensirion SHTC1 humidity and temperature sensor driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/hwmon/vexpress.c b/drivers/hwmon/vexpress.c
index 611f34c..c536190 100644
--- a/drivers/hwmon/vexpress.c
+++ b/drivers/hwmon/vexpress.c

@@ -27,17 +27,8 @@
 struct vexpress_hwmon_data {
 	struct device *hwmon_dev;
 	struct regmap *reg;
-	const char *name;
 };
 
-static ssize_t vexpress_hwmon_name_show(struct device *dev,
-		struct device_attribute *dev_attr, char *buffer)
-{
-	struct vexpress_hwmon_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buffer, "%s\n", data->name);
-}
-
 static ssize_t vexpress_hwmon_label_show(struct device *dev,
 		struct device_attribute *dev_attr, char *buffer)
 {
@@ -95,16 +86,6 @@
 	return attr->mode;
 }
 
-static DEVICE_ATTR(name, S_IRUGO, vexpress_hwmon_name_show, NULL);
-
-#define VEXPRESS_HWMON_ATTRS(_name, _label_attr, _input_attr)	\
-struct attribute *vexpress_hwmon_attrs_##_name[] = {		\
-	&dev_attr_name.attr,					\
-	&dev_attr_##_label_attr.attr,				\
-	&sensor_dev_attr_##_input_attr.dev_attr.attr,		\
-	NULL							\
-}
-
 struct vexpress_hwmon_type {
 	const char *name;
 	const struct attribute_group **attr_groups;
@@ -114,7 +95,11 @@
 static DEVICE_ATTR(in1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(volt, in1_label, in1_input);
+static struct attribute *vexpress_hwmon_attrs_volt[] = {
+	&dev_attr_in1_label.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_volt = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_volt,
@@ -131,7 +116,11 @@
 static DEVICE_ATTR(curr1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(amp, curr1_label, curr1_input);
+static struct attribute *vexpress_hwmon_attrs_amp[] = {
+	&dev_attr_curr1_label.attr,
+	&sensor_dev_attr_curr1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_amp = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_amp,
@@ -147,7 +136,11 @@
 static DEVICE_ATTR(temp1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(temp, temp1_label, temp1_input);
+static struct attribute *vexpress_hwmon_attrs_temp[] = {
+	&dev_attr_temp1_label.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_temp = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_temp,
@@ -163,7 +156,11 @@
 static DEVICE_ATTR(power1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1);
-static VEXPRESS_HWMON_ATTRS(power, power1_label, power1_input);
+static struct attribute *vexpress_hwmon_attrs_power[] = {
+	&dev_attr_power1_label.attr,
+	&sensor_dev_attr_power1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_power = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_power,
@@ -179,7 +176,11 @@
 static DEVICE_ATTR(energy1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(energy1_input, S_IRUGO, vexpress_hwmon_u64_show,
 		NULL, 1);
-static VEXPRESS_HWMON_ATTRS(energy, energy1_label, energy1_input);
+static struct attribute *vexpress_hwmon_attrs_energy[] = {
+	&dev_attr_energy1_label.attr,
+	&sensor_dev_attr_energy1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_energy = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_energy,
@@ -218,7 +219,6 @@
 
 static int vexpress_hwmon_probe(struct platform_device *pdev)
 {
-	int err;
 	const struct of_device_id *match;
 	struct vexpress_hwmon_data *data;
 	const struct vexpress_hwmon_type *type;
@@ -232,45 +232,19 @@
 	if (!match)
 		return -ENODEV;
 	type = match->data;
-	data->name = type->name;
 
 	data->reg = devm_regmap_init_vexpress_config(&pdev->dev);
 	if (IS_ERR(data->reg))
 		return PTR_ERR(data->reg);
 
-	err = sysfs_create_groups(&pdev->dev.kobj, type->attr_groups);
-	if (err)
-		goto error;
+	data->hwmon_dev = devm_hwmon_device_register_with_groups(&pdev->dev,
+			type->name, data, type->attr_groups);
 
-	data->hwmon_dev = hwmon_device_register(&pdev->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		err = PTR_ERR(data->hwmon_dev);
-		goto error;
-	}
-
-	return 0;
-
-error:
-	sysfs_remove_group(&pdev->dev.kobj, match->data);
-	return err;
-}
-
-static int vexpress_hwmon_remove(struct platform_device *pdev)
-{
-	struct vexpress_hwmon_data *data = platform_get_drvdata(pdev);
-	const struct of_device_id *match;
-
-	hwmon_device_unregister(data->hwmon_dev);
-
-	match = of_match_device(vexpress_hwmon_of_match, &pdev->dev);
-	sysfs_remove_group(&pdev->dev.kobj, match->data);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(data->hwmon_dev);
 }
 
 static struct platform_driver vexpress_hwmon_driver = {
 	.probe = vexpress_hwmon_probe,
-	.remove = vexpress_hwmon_remove,
 	.driver	= {
 		.name = DRVNAME,
 		.owner = THIS_MODULE,

diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index bf508b5..dc21836 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile

@@ -1,18 +1,3 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
-obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
-obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
-obj-$(CONFIG_INFINIBAND_QIB)		+= hw/qib/
-obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
-obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
-obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
-obj-$(CONFIG_INFINIBAND_CXGB4)		+= hw/cxgb4/
-obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
-obj-$(CONFIG_MLX5_INFINIBAND)		+= hw/mlx5/
-obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
-obj-$(CONFIG_INFINIBAND_OCRDMA)		+= hw/ocrdma/
-obj-$(CONFIG_INFINIBAND_USNIC)		+= hw/usnic/
-obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
-obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
-obj-$(CONFIG_INFINIBAND_SRPT)		+= ulp/srpt/
-obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
-obj-$(CONFIG_INFINIBAND_ISERT)		+= ulp/isert/
+obj-$(CONFIG_INFINIBAND)		+= hw/
+obj-$(CONFIG_INFINIBAND)		+= ulp/

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 3ab3865..ffd0af6 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile

@@ -18,7 +18,7 @@
 
 ib_cm-y :=			cm.o
 
-iw_cm-y :=			iwcm.o
+iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=			cma.o
 

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 42c3058..d570030 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c

@@ -3607,7 +3607,8 @@
 
 			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
 						sizeof *id_stats, RDMA_NL_RDMA_CM,
-						RDMA_NL_RDMA_CM_ID_STATS);
+						RDMA_NL_RDMA_CM_ID_STATS,
+						NLM_F_MULTI);
 			if (!id_stats)
 				goto out;
 

diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
new file mode 100644
index 0000000..b85ddbc
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_msg.c

@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static int iwpm_ulib_version = 3;
+static int iwpm_user_pid = IWPM_PID_UNDEFINED;
+static atomic_t echo_nlmsg_seq;
+
+int iwpm_valid_pid(void)
+{
+	return iwpm_user_pid > 0;
+}
+EXPORT_SYMBOL(iwpm_valid_pid);
+
+/*
+ * iwpm_register_pid - Send a netlink query to user space
+ *                     for the iwarp port mapper pid
+ *
+ * nlmsg attributes:
+ *	[IWPM_NLA_REG_PID_SEQ]
+ *	[IWPM_NLA_REG_IF_NAME]
+ *	[IWPM_NLA_REG_IBDEV_NAME]
+ *	[IWPM_NLA_REG_ULIB_NAME]
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto pid_query_error;
+	}
+	if (iwpm_registered_client(nl_client))
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto pid_query_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto pid_query_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+
+	/* fill in the pid request message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
+				pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
+				pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
+				(char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
+	if (ret)
+		goto pid_query_error;
+
+	pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
+		__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
+
+	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_set_registered(nl_client, 1);
+		iwpm_user_pid = IWPM_PID_UNAVAILABLE;
+		err_str = "Unable to send a nlmsg";
+		goto pid_query_error;
+	}
+	nlmsg_request->req_buffer = pm_msg;
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+pid_query_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_register_pid);
+
+/*
+ * iwpm_add_mapping - Send a netlink add mapping message
+ *                    to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *	[IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto add_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto add_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto add_mapping_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto add_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+	/* fill in the add mapping message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_MANAGE_MAPPING_SEQ);
+	if (ret)
+		goto add_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
+	if (ret)
+		goto add_mapping_error;
+	nlmsg_request->req_buffer = pm_msg;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_user_pid = IWPM_PID_UNDEFINED;
+		err_str = "Unable to send a nlmsg";
+		goto add_mapping_error;
+	}
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+add_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_add_mapping);
+
+/*
+ * iwpm_add_and_query_mapping - Send a netlink add and query
+ *                              mapping message to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_QUERY_MAPPING_SEQ]
+ *	[IWPM_NLA_QUERY_LOCAL_ADDR]
+ *	[IWPM_NLA_QUERY_REMOTE_ADDR]
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto query_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto query_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	ret = -ENOMEM;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto query_mapping_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
+				nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto query_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+
+	/* fill in the query message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_QUERY_MAPPING_SEQ);
+	if (ret)
+		goto query_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
+	if (ret)
+		goto query_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
+	if (ret)
+		goto query_mapping_error;
+	nlmsg_request->req_buffer = pm_msg;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		err_str = "Unable to send a nlmsg";
+		goto query_mapping_error;
+	}
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+query_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping);
+
+/*
+ * iwpm_remove_mapping - Send a netlink remove mapping message
+ *                       to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *	[IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto remove_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto remove_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		ret = -ENOMEM;
+		err_str = "Unable to create a nlmsg";
+		goto remove_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_MANAGE_MAPPING_SEQ);
+	if (ret)
+		goto remove_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				local_addr, IWPM_NLA_MANAGE_ADDR);
+	if (ret)
+		goto remove_mapping_error;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_user_pid = IWPM_PID_UNDEFINED;
+		err_str = "Unable to send a nlmsg";
+		goto remove_mapping_error;
+	}
+	iwpm_print_sockaddr(local_addr,
+			"remove_mapping: Local sockaddr:");
+	return 0;
+remove_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb_any(skb);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapping);
+
+/* netlink attribute policy for the received response to register pid request */
+static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
+	[IWPM_NLA_RREG_PID_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_RREG_IBDEV_NAME]  = { .type = NLA_STRING,
+					.len = IWPM_DEVNAME_SIZE - 1 },
+	[IWPM_NLA_RREG_ULIB_NAME]   = { .type = NLA_STRING,
+					.len = IWPM_ULIBNAME_SIZE - 1 },
+	[IWPM_NLA_RREG_ULIB_VER]    = { .type = NLA_U16 },
+	[IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_register_pid_cb - Process a port mapper response to
+ *                        iwpm_register_pid()
+ */
+int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
+	struct iwpm_dev_data *pm_msg;
+	char *dev_name, *iwpm_name;
+	u32 msg_seq;
+	u8 nl_client;
+	u16 iwpm_version;
+	const char *msg_type = "Register Pid response";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
+				resp_reg_policy, nltb, msg_type))
+		return -EINVAL;
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+		return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	nl_client = nlmsg_request->nl_client;
+	dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
+	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
+	iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
+
+	/* check device name, ulib name and version */
+	if (strcmp(pm_msg->dev_name, dev_name) ||
+			strcmp(iwpm_ulib_name, iwpm_name) ||
+			iwpm_version != iwpm_ulib_version) {
+
+		pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+				__func__, dev_name, iwpm_name, iwpm_version);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto register_pid_response_exit;
+	}
+	iwpm_user_pid = cb->nlh->nlmsg_pid;
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+			__func__, iwpm_user_pid);
+	if (iwpm_valid_client(nl_client))
+		iwpm_set_registered(nl_client, 1);
+register_pid_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found nlmsg_request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_register_pid_cb);
+
+/* netlink attribute policy for the received response to add mapping request */
+static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
+	[IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RMANAGE_MAPPING_ERR]	  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_mapping_cb - Process a port mapper response to
+ *                       iwpm_add_mapping()
+ */
+int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_sa_data *pm_msg;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
+	struct sockaddr_storage *local_sockaddr;
+	struct sockaddr_storage *mapped_sockaddr;
+	const char *msg_type;
+	u32 msg_seq;
+
+	msg_type = "Add Mapping response";
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
+				resp_add_policy, nltb, msg_type))
+		return -EINVAL;
+
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+		return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	local_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+	mapped_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+
+	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto add_mapping_response_exit;
+	}
+	if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
+		pr_info("%s: Sockaddr family doesn't match the requested one\n",
+				__func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto add_mapping_response_exit;
+	}
+	memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
+			sizeof(*mapped_sockaddr));
+	iwpm_print_sockaddr(&pm_msg->loc_addr,
+			"add_mapping: Local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+			"add_mapping: Mapped local sockaddr:");
+
+add_mapping_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_add_mapping_cb);
+
+/* netlink attribute policy for the response to add and query mapping request */
+static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
+	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
+	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_and_query_mapping_cb - Process a port mapper response to
+ *                                 iwpm_add_and_query_mapping()
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct iwpm_sa_data *pm_msg;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+	struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+	struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+	const char *msg_type;
+	u32 msg_seq;
+	u16 err_code;
+
+	msg_type = "Query Mapping response";
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+				resp_query_policy, nltb, msg_type))
+		return -EINVAL;
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+			return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	local_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+	remote_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+	mapped_loc_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+	mapped_rem_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+	err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
+	if (err_code == IWPM_REMOTE_QUERY_REJECT) {
+		pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
+			__func__, cb->nlh->nlmsg_pid, msg_seq);
+		nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
+	}
+	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
+		iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
+		pr_info("%s: Incorrect local sockaddr\n", __func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto query_mapping_response_exit;
+	}
+	if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+		mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+		pr_info("%s: Sockaddr family doesn't match the requested one\n",
+				__func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto query_mapping_response_exit;
+	}
+	memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
+			sizeof(*mapped_loc_sockaddr));
+	memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
+			sizeof(*mapped_rem_sockaddr));
+
+	iwpm_print_sockaddr(&pm_msg->loc_addr,
+			"query_mapping: Local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+			"query_mapping: Mapped local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->rem_addr,
+			"query_mapping: Remote sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
+			"query_mapping: Mapped remote sockaddr:");
+query_mapping_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
+
+/* netlink attribute policy for the received request for mapping info */
+static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
+	[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
+					.len = IWPM_ULIBNAME_SIZE - 1 },
+	[IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+ */
+int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
+	const char *msg_type = "Mapping Info response";
+	int iwpm_pid;
+	u8 nl_client;
+	char *iwpm_name;
+	u16 iwpm_version;
+	int ret = -EINVAL;
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
+				resp_mapinfo_policy, nltb, msg_type)) {
+		pr_info("%s: Unable to parse nlmsg\n", __func__);
+		return ret;
+	}
+	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
+	iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
+	if (strcmp(iwpm_ulib_name, iwpm_name) ||
+			iwpm_version != iwpm_ulib_version) {
+		pr_info("%s: Invalid port mapper name = %s version = %d\n",
+				__func__, iwpm_name, iwpm_version);
+		return ret;
+	}
+	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	if (!iwpm_valid_client(nl_client)) {
+		pr_info("%s: Invalid port mapper client = %d\n",
+				__func__, nl_client);
+		return ret;
+	}
+	iwpm_set_registered(nl_client, 0);
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	if (!iwpm_mapinfo_available())
+		return 0;
+	iwpm_pid = cb->nlh->nlmsg_pid;
+	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+		 __func__, iwpm_pid);
+	ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_mapping_info_cb);
+
+/* netlink attribute policy for the received mapping info ack */
+static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
+	[IWPM_NLA_MAPINFO_SEQ]    =   { .type = NLA_U32 },
+	[IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
+	[IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 }
+};
+
+/*
+ * iwpm_ack_mapping_info_cb - Process a port mapper ack for
+ *                            the provided mapping info records
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
+	u32 mapinfo_send, mapinfo_ack;
+	const char *msg_type = "Mapping Info Ack";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
+				ack_mapinfo_policy, nltb, msg_type))
+		return -EINVAL;
+	mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
+	mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
+	if (mapinfo_ack != mapinfo_send)
+		pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
+			__func__, mapinfo_send, mapinfo_ack);
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
+
+/* netlink attribute policy for the received port mapper error message */
+static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
+	[IWPM_NLA_ERR_SEQ]        = { .type = NLA_U32 },
+	[IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 },
+};
+
+/*
+ * iwpm_mapping_error_cb - Process a port mapper error message
+ */
+int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	struct nlattr *nltb[IWPM_NLA_ERR_MAX];
+	u32 msg_seq;
+	u16 err_code;
+	const char *msg_type = "Mapping Error Msg";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
+				map_error_policy, nltb, msg_type))
+		return -EINVAL;
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
+	err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
+	pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
+				__func__, msg_seq, err_code, nl_client);
+	/* look for nlmsg_request */
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		/* not all errors have associated requests */
+		pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
+		return 0;
+	}
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	nlmsg_request->err_code = err_code;
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_mapping_error_cb);

diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
new file mode 100644
index 0000000..69e9f84
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.c

@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+#define IWPM_HASH_BUCKET_SIZE	512
+#define IWPM_HASH_BUCKET_MASK	(IWPM_HASH_BUCKET_SIZE - 1)
+
+static LIST_HEAD(iwpm_nlmsg_req_list);
+static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
+
+static struct hlist_head *iwpm_hash_bucket;
+static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
+
+static DEFINE_MUTEX(iwpm_admin_lock);
+static struct iwpm_admin_data iwpm_admin;
+
+int iwpm_init(u8 nl_client)
+{
+	if (iwpm_valid_client(nl_client))
+		return -EINVAL;
+	mutex_lock(&iwpm_admin_lock);
+	if (atomic_read(&iwpm_admin.refcount) == 0) {
+		iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+					sizeof(struct hlist_head), GFP_KERNEL);
+		if (!iwpm_hash_bucket) {
+			mutex_unlock(&iwpm_admin_lock);
+			pr_err("%s Unable to create mapinfo hash table\n", __func__);
+			return -ENOMEM;
+		}
+	}
+	atomic_inc(&iwpm_admin.refcount);
+	mutex_unlock(&iwpm_admin_lock);
+	iwpm_set_valid(nl_client, 1);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_init);
+
+static void free_hash_bucket(void);
+
+int iwpm_exit(u8 nl_client)
+{
+
+	if (!iwpm_valid_client(nl_client))
+		return -EINVAL;
+	mutex_lock(&iwpm_admin_lock);
+	if (atomic_read(&iwpm_admin.refcount) == 0) {
+		mutex_unlock(&iwpm_admin_lock);
+		pr_err("%s Incorrect usage - negative refcount\n", __func__);
+		return -EINVAL;
+	}
+	if (atomic_dec_and_test(&iwpm_admin.refcount)) {
+		free_hash_bucket();
+		pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+	}
+	mutex_unlock(&iwpm_admin_lock);
+	iwpm_set_valid(nl_client, 0);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_exit);
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+					       struct sockaddr_storage *);
+
+int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
+			struct sockaddr_storage *mapped_sockaddr,
+			u8 nl_client)
+{
+	struct hlist_head *hash_bucket_head;
+	struct iwpm_mapping_info *map_info;
+	unsigned long flags;
+
+	if (!iwpm_valid_client(nl_client))
+		return -EINVAL;
+	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
+	if (!map_info) {
+		pr_err("%s: Unable to allocate a mapping info\n", __func__);
+		return -ENOMEM;
+	}
+	memcpy(&map_info->local_sockaddr, local_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	map_info->nl_client = nl_client;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		hash_bucket_head = get_hash_bucket_head(
+					&map_info->local_sockaddr,
+					&map_info->mapped_sockaddr);
+		hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_create_mapinfo);
+
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
+			struct sockaddr_storage *mapped_local_addr)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct hlist_head *hash_bucket_head;
+	struct iwpm_mapping_info *map_info = NULL;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		hash_bucket_head = get_hash_bucket_head(
+					local_sockaddr,
+					mapped_local_addr);
+		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+					hash_bucket_head, hlist_node) {
+
+			if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
+						mapped_local_addr)) {
+
+				hlist_del_init(&map_info->hlist_node);
+				kfree(map_info);
+				ret = 0;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapinfo);
+
+static void free_hash_bucket(void)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct iwpm_mapping_info *map_info;
+	unsigned long flags;
+	int i;
+
+	/* remove all the mapinfo data from the list */
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+			&iwpm_hash_bucket[i], hlist_node) {
+
+				hlist_del_init(&map_info->hlist_node);
+				kfree(map_info);
+			}
+	}
+	/* free the hash list */
+	kfree(iwpm_hash_bucket);
+	iwpm_hash_bucket = NULL;
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+}
+
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+					u8 nl_client, gfp_t gfp)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	unsigned long flags;
+
+	nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
+	if (!nlmsg_request) {
+		pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+		return NULL;
+	}
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+	kref_init(&nlmsg_request->kref);
+	kref_get(&nlmsg_request->kref);
+	nlmsg_request->nlmsg_seq = nlmsg_seq;
+	nlmsg_request->nl_client = nl_client;
+	nlmsg_request->request_done = 0;
+	nlmsg_request->err_code = 0;
+	return nlmsg_request;
+}
+
+void iwpm_free_nlmsg_request(struct kref *kref)
+{
+	struct iwpm_nlmsg_request *nlmsg_request;
+	unsigned long flags;
+
+	nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
+
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_del_init(&nlmsg_request->inprocess_list);
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+	if (!nlmsg_request->request_done)
+		pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
+			__func__, nlmsg_request->nlmsg_seq);
+	kfree(nlmsg_request);
+}
+
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
+{
+	struct iwpm_nlmsg_request *nlmsg_request;
+	struct iwpm_nlmsg_request *found_request = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
+			    inprocess_list) {
+		if (nlmsg_request->nlmsg_seq == echo_seq) {
+			found_request = nlmsg_request;
+			kref_get(&nlmsg_request->kref);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+	return found_request;
+}
+
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
+{
+	int ret;
+	init_waitqueue_head(&nlmsg_request->waitq);
+
+	ret = wait_event_timeout(nlmsg_request->waitq,
+			(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
+	if (!ret) {
+		ret = -EINVAL;
+		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
+			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
+	} else {
+		ret = nlmsg_request->err_code;
+	}
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	return ret;
+}
+
+int iwpm_get_nlmsg_seq(void)
+{
+	return atomic_inc_return(&iwpm_admin.nlmsg_seq);
+}
+
+int iwpm_valid_client(u8 nl_client)
+{
+	if (nl_client >= RDMA_NL_NUM_CLIENTS)
+		return 0;
+	return iwpm_admin.client_list[nl_client];
+}
+
+void iwpm_set_valid(u8 nl_client, int valid)
+{
+	if (nl_client >= RDMA_NL_NUM_CLIENTS)
+		return;
+	iwpm_admin.client_list[nl_client] = valid;
+}
+
+/* valid client */
+int iwpm_registered_client(u8 nl_client)
+{
+	return iwpm_admin.reg_list[nl_client];
+}
+
+/* valid client */
+void iwpm_set_registered(u8 nl_client, int reg)
+{
+	iwpm_admin.reg_list[nl_client] = reg;
+}
+
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+				struct sockaddr_storage *b_sockaddr)
+{
+	if (a_sockaddr->ss_family != b_sockaddr->ss_family)
+		return 1;
+	if (a_sockaddr->ss_family == AF_INET) {
+		struct sockaddr_in *a4_sockaddr =
+			(struct sockaddr_in *)a_sockaddr;
+		struct sockaddr_in *b4_sockaddr =
+			(struct sockaddr_in *)b_sockaddr;
+		if (!memcmp(&a4_sockaddr->sin_addr,
+			&b4_sockaddr->sin_addr, sizeof(struct in_addr))
+			&& a4_sockaddr->sin_port == b4_sockaddr->sin_port)
+				return 0;
+
+	} else if (a_sockaddr->ss_family == AF_INET6) {
+		struct sockaddr_in6 *a6_sockaddr =
+			(struct sockaddr_in6 *)a_sockaddr;
+		struct sockaddr_in6 *b6_sockaddr =
+			(struct sockaddr_in6 *)b_sockaddr;
+		if (!memcmp(&a6_sockaddr->sin6_addr,
+			&b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
+			&& a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
+				return 0;
+
+	} else {
+		pr_err("%s: Invalid sockaddr family\n", __func__);
+	}
+	return 1;
+}
+
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+						int nl_client)
+{
+	struct sk_buff *skb = NULL;
+
+	skb = dev_alloc_skb(NLMSG_GOODSIZE);
+	if (!skb) {
+		pr_err("%s Unable to allocate skb\n", __func__);
+		goto create_nlmsg_exit;
+	}
+	if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
+			   NLM_F_REQUEST))) {
+		pr_warn("%s: Unable to put the nlmsg header\n", __func__);
+		dev_kfree_skb(skb);
+		skb = NULL;
+	}
+create_nlmsg_exit:
+	return skb;
+}
+
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+				   const struct nla_policy *nlmsg_policy,
+				   struct nlattr *nltb[], const char *msg_type)
+{
+	int nlh_len = 0;
+	int ret;
+	const char *err_str = "";
+
+	ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+	if (ret) {
+		err_str = "Invalid attribute";
+		goto parse_nlmsg_error;
+	}
+	ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+	if (ret) {
+		err_str = "Unable to parse the nlmsg";
+		goto parse_nlmsg_error;
+	}
+	ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
+	if (ret) {
+		err_str = "Invalid NULL attribute";
+		goto parse_nlmsg_error;
+	}
+	return 0;
+parse_nlmsg_error:
+	pr_warn("%s: %s (msg type %s ret = %d)\n",
+			__func__, err_str, msg_type, ret);
+	return ret;
+}
+
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
+{
+	struct sockaddr_in6 *sockaddr_v6;
+	struct sockaddr_in *sockaddr_v4;
+
+	switch (sockaddr->ss_family) {
+	case AF_INET:
+		sockaddr_v4 = (struct sockaddr_in *)sockaddr;
+		pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
+			msg, &sockaddr_v4->sin_addr,
+			ntohs(sockaddr_v4->sin_port),
+			ntohs(sockaddr_v4->sin_port));
+		break;
+	case AF_INET6:
+		sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
+		pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
+			msg, &sockaddr_v6->sin6_addr,
+			ntohs(sockaddr_v6->sin6_port),
+			ntohs(sockaddr_v6->sin6_port));
+		break;
+	default:
+		break;
+	}
+}
+
+static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
+{
+	u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
+	u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
+	return hash;
+}
+
+static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
+{
+	u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
+	u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
+	return hash;
+}
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
+					       *local_sockaddr,
+					       struct sockaddr_storage
+					       *mapped_sockaddr)
+{
+	u32 local_hash, mapped_hash, hash;
+
+	if (local_sockaddr->ss_family == AF_INET) {
+		local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
+		mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+
+	} else if (local_sockaddr->ss_family == AF_INET6) {
+		local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
+		mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+	} else {
+		pr_err("%s: Invalid sockaddr family\n", __func__);
+		return NULL;
+	}
+
+	if (local_hash == mapped_hash) /* if port mapper isn't available */
+		hash = local_hash;
+	else
+		hash = jhash_2words(local_hash, mapped_hash, 0);
+
+	return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+}
+
+static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto mapinfo_num_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	msg_seq = 0;
+	err_str = "Unable to put attribute of mapinfo number nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
+	if (ret)
+		goto mapinfo_num_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+				&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
+	if (ret)
+		goto mapinfo_num_error;
+	ret = ibnl_unicast(skb, nlh, iwpm_pid);
+	if (ret) {
+		skb = NULL;
+		err_str = "Unable to send a nlmsg";
+		goto mapinfo_num_error;
+	}
+	pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+	return 0;
+mapinfo_num_error:
+	pr_info("%s: %s\n", __func__, err_str);
+	if (skb)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
+{
+	struct nlmsghdr *nlh = NULL;
+	int ret = 0;
+
+	if (!skb)
+		return ret;
+	if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+			   RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+		pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+		return -ENOMEM;
+	}
+	nlh->nlmsg_type = NLMSG_DONE;
+	ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+	if (ret)
+		pr_warn("%s Unable to send a nlmsg\n", __func__);
+	return ret;
+}
+
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
+{
+	struct iwpm_mapping_info *map_info;
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	int skb_num = 0, mapping_num = 0;
+	int i = 0, nlmsg_bytes = 0;
+	unsigned long flags;
+	const char *err_str = "";
+	int ret;
+
+	skb = dev_alloc_skb(NLMSG_GOODSIZE);
+	if (!skb) {
+		ret = -ENOMEM;
+		err_str = "Unable to allocate skb";
+		goto send_mapping_info_exit;
+	}
+	skb_num++;
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+		hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
+				     hlist_node) {
+			if (map_info->nl_client != nl_client)
+				continue;
+			nlh = NULL;
+			if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+					RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+				ret = -ENOMEM;
+				err_str = "Unable to put the nlmsg header";
+				goto send_mapping_info_unlock;
+			}
+			err_str = "Unable to put attribute of the nlmsg";
+			ret = ibnl_put_attr(skb, nlh,
+					sizeof(struct sockaddr_storage),
+					&map_info->local_sockaddr,
+					IWPM_NLA_MAPINFO_LOCAL_ADDR);
+			if (ret)
+				goto send_mapping_info_unlock;
+
+			ret = ibnl_put_attr(skb, nlh,
+					sizeof(struct sockaddr_storage),
+					&map_info->mapped_sockaddr,
+					IWPM_NLA_MAPINFO_MAPPED_ADDR);
+			if (ret)
+				goto send_mapping_info_unlock;
+
+			iwpm_print_sockaddr(&map_info->local_sockaddr,
+				"send_mapping_info: Local sockaddr:");
+			iwpm_print_sockaddr(&map_info->mapped_sockaddr,
+				"send_mapping_info: Mapped local sockaddr:");
+			mapping_num++;
+			nlmsg_bytes += nlh->nlmsg_len;
+
+			/* check if all mappings can fit in one skb */
+			if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
+				/* and leave room for NLMSG_DONE */
+				nlmsg_bytes = 0;
+				skb_num++;
+				spin_unlock_irqrestore(&iwpm_mapinfo_lock,
+						       flags);
+				/* send the skb */
+				ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
+				skb = NULL;
+				if (ret) {
+					err_str = "Unable to send map info";
+					goto send_mapping_info_exit;
+				}
+				if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
+					ret = -ENOMEM;
+					err_str = "Insufficient skbs for map info";
+					goto send_mapping_info_exit;
+				}
+				skb = dev_alloc_skb(NLMSG_GOODSIZE);
+				if (!skb) {
+					ret = -ENOMEM;
+					err_str = "Unable to allocate skb";
+					goto send_mapping_info_exit;
+				}
+				spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+			}
+		}
+	}
+send_mapping_info_unlock:
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+send_mapping_info_exit:
+	if (ret) {
+		pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
+		if (skb)
+			dev_kfree_skb(skb);
+		return ret;
+	}
+	send_nlmsg_done(skb, nl_client, iwpm_pid);
+	return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
+}
+
+int iwpm_mapinfo_available(void)
+{
+	unsigned long flags;
+	int full_bucket = 0, i = 0;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+			if (!hlist_empty(&iwpm_hash_bucket[i])) {
+				full_bucket = 1;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return full_bucket;
+}

diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
new file mode 100644
index 0000000..9777c86
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.h

@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWPM_UTIL_H
+#define _IWPM_UTIL_H
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/jhash.h>
+#include <linux/kref.h>
+#include <net/netlink.h>
+#include <linux/errno.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+
+
+#define IWPM_NL_RETRANS		3
+#define IWPM_NL_TIMEOUT		(10*HZ)
+#define IWPM_MAPINFO_SKB_COUNT	20
+
+#define IWPM_PID_UNDEFINED     -1
+#define IWPM_PID_UNAVAILABLE   -2
+
+struct iwpm_nlmsg_request {
+	struct list_head    inprocess_list;
+	__u32               nlmsg_seq;
+	void                *req_buffer;
+	u8	            nl_client;
+	u8                  request_done;
+	u16                 err_code;
+	wait_queue_head_t   waitq;
+	struct kref         kref;
+};
+
+struct iwpm_mapping_info {
+	struct hlist_node hlist_node;
+	struct sockaddr_storage local_sockaddr;
+	struct sockaddr_storage mapped_sockaddr;
+	u8     nl_client;
+};
+
+struct iwpm_admin_data {
+	atomic_t refcount;
+	atomic_t nlmsg_seq;
+	int      client_list[RDMA_NL_NUM_CLIENTS];
+	int      reg_list[RDMA_NL_NUM_CLIENTS];
+};
+
+/**
+ * iwpm_get_nlmsg_request - Allocate and initialize netlink message request
+ * @nlmsg_seq: Sequence number of the netlink message
+ * @nl_client: The index of the netlink client
+ * @gfp: Indicates how the memory for the request should be allocated
+ *
+ * Returns the newly allocated netlink request object if successful,
+ * otherwise returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+						u8 nl_client, gfp_t gfp);
+
+/**
+ * iwpm_free_nlmsg_request - Deallocate netlink message request
+ * @kref: Holds reference of netlink message request
+ */
+void iwpm_free_nlmsg_request(struct kref *kref);
+
+/**
+ * iwpm_find_nlmsg_request - Find netlink message request in the request list
+ * @echo_seq: Sequence number of the netlink request to find
+ *
+ * Returns the found netlink message request,
+ * if not found, returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
+
+/**
+ * iwpm_wait_complete_req - Block while servicing the netlink request
+ * @nlmsg_request: Netlink message request to service
+ *
+ * Wakes up, after the request is completed or expired
+ * Returns 0 if the request is complete without error
+ */
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
+
+/**
+ * iwpm_get_nlmsg_seq - Get the sequence number for a netlink
+ *			message to send to the port mapper
+ *
+ * Returns the sequence number for the netlink message.
+ */
+int iwpm_get_nlmsg_seq(void);
+
+/**
+ * iwpm_valid_client - Check if the port mapper client is valid
+ * @nl_client: The index of the netlink client
+ *
+ * Valid clients need to call iwpm_init() before using
+ * the port mapper
+ */
+int iwpm_valid_client(u8 nl_client);
+
+/**
+ * iwpm_set_valid - Set the port mapper client to valid or not
+ * @nl_client: The index of the netlink client
+ * @valid: 1 if valid or 0 if invalid
+ */
+void iwpm_set_valid(u8 nl_client, int valid);
+
+/**
+ * iwpm_registered_client - Check if the port mapper client is registered
+ * @nl_client: The index of the netlink client
+ *
+ * Call iwpm_register_pid() to register a client
+ */
+int iwpm_registered_client(u8 nl_client);
+
+/**
+ * iwpm_set_registered - Set the port mapper client to registered or not
+ * @nl_client: The index of the netlink client
+ * @reg: 1 if registered or 0 if not
+ */
+void iwpm_set_registered(u8 nl_client, int reg);
+
+/**
+ * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
+ *                     a client to the user space port mapper
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ *
+ * If successful, returns the number of sent mapping info records
+ */
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
+
+/**
+ * iwpm_mapinfo_available - Check if any mapping info records is available
+ *		            in the hash table
+ *
+ * Returns 1 if mapping information is available, otherwise returns 0
+ */
+int iwpm_mapinfo_available(void);
+
+/**
+ * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ *
+ * Returns 0 if they are holding the same ip/tcp address info,
+ * otherwise returns 1
+ */
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+			struct sockaddr_storage *b_sockaddr);
+
+/**
+ * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
+ * @nltb: Holds address of each netlink message attributes
+ * @nla_count: Number of netlink message attributes
+ *
+ * Returns error if any of the nla_count attributes is NULL
+ */
+static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
+					   int nla_count)
+{
+	int i;
+	for (i = 1; i < nla_count; i++) {
+		if (!nltb[i])
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * iwpm_create_nlmsg - Allocate skb and form a netlink message
+ * @nl_op: Netlink message opcode
+ * @nlh: Holds address of the netlink message header in skb
+ * @nl_client: The index of the netlink client
+ *
+ * Returns the newly allcated skb, or NULL if the tailroom of the skb
+ * is insufficient to store the message header and payload
+ */
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+					int nl_client);
+
+/**
+ * iwpm_parse_nlmsg - Validate and parse the received netlink message
+ * @cb: Netlink callback structure
+ * @policy_max: Maximum attribute type to be expected
+ * @nlmsg_policy: Validation policy
+ * @nltb: Array to store policy_max parsed elements
+ * @msg_type: Type of netlink message
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+				const struct nla_policy *nlmsg_policy,
+				struct nlattr *nltb[], const char *msg_type);
+
+/**
+ * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
+ * @sockaddr: Socket address to print
+ * @msg: Message to print
+ */
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+#endif

diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index a1e9cba..23dd5a5 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c

@@ -103,13 +103,13 @@
 EXPORT_SYMBOL(ibnl_remove_client);
 
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-		   int len, int client, int op)
+		   int len, int client, int op, int flags)
 {
 	unsigned char *prev_tail;
 
 	prev_tail = skb_tail_pointer(skb);
 	*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
-			 len, NLM_F_MULTI);
+			 len, flags);
 	if (!*nlh)
 		goto out_nlmsg_trim;
 	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
@@ -172,6 +172,20 @@
 	mutex_unlock(&ibnl_mutex);
 }
 
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			__u32 pid)
+{
+	return nlmsg_unicast(nls, skb, pid);
+}
+EXPORT_SYMBOL(ibnl_unicast);
+
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			unsigned int group, gfp_t flags)
+{
+	return nlmsg_multicast(nls, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(ibnl_multicast);
+
 int __init ibnl_init(void)
 {
 	struct netlink_kernel_cfg cfg = {

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f820958..233eaf5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c

@@ -618,7 +618,7 @@
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
-	bool preload = gfp_mask & __GFP_WAIT;
+	bool preload = !!(gfp_mask & __GFP_WAIT);
 	unsigned long flags;
 	int ret, id;
 

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7d3292c..cbd0383 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c

@@ -429,15 +429,19 @@
 	struct attribute *a;
 	int i;
 
-	for (i = 0; (a = p->gid_group.attrs[i]); ++i)
-		kfree(a);
+	if (p->gid_group.attrs) {
+		for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+			kfree(a);
 
-	kfree(p->gid_group.attrs);
+		kfree(p->gid_group.attrs);
+	}
 
-	for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
-		kfree(a);
+	if (p->pkey_group.attrs) {
+		for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+			kfree(a);
 
-	kfree(p->pkey_group.attrs);
+		kfree(p->pkey_group.attrs);
+	}
 
 	kfree(p);
 }
@@ -534,10 +538,12 @@
 	p->port_num   = port_num;
 
 	ret = kobject_init_and_add(&p->kobj, &port_type,
-				   kobject_get(device->ports_parent),
+				   device->ports_parent,
 				   "%d", port_num);
-	if (ret)
-		goto err_put;
+	if (ret) {
+		kfree(p);
+		return ret;
+	}
 
 	ret = sysfs_create_group(&p->kobj, &pma_group);
 	if (ret)
@@ -585,6 +591,7 @@
 		kfree(p->pkey_group.attrs[i]);
 
 	kfree(p->pkey_group.attrs);
+	p->pkey_group.attrs = NULL;
 
 err_remove_gid:
 	sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -594,13 +601,13 @@
 		kfree(p->gid_group.attrs[i]);
 
 	kfree(p->gid_group.attrs);
+	p->gid_group.attrs = NULL;
 
 err_remove_pma:
 	sysfs_remove_group(&p->kobj, &pma_group);
 
 err_put:
-	kobject_put(device->ports_parent);
-	kfree(p);
+	kobject_put(&p->kobj);
 	return ret;
 }
 
@@ -809,6 +816,22 @@
 	.attrs	= iw_proto_stats_attrs,
 };
 
+static void free_port_list_attributes(struct ib_device *device)
+{
+	struct kobject *p, *t;
+
+	list_for_each_entry_safe(p, t, &device->port_list, entry) {
+		struct ib_port *port = container_of(p, struct ib_port, kobj);
+		list_del(&p->entry);
+		sysfs_remove_group(p, &pma_group);
+		sysfs_remove_group(p, &port->pkey_group);
+		sysfs_remove_group(p, &port->gid_group);
+		kobject_put(p);
+	}
+
+	kobject_put(device->ports_parent);
+}
+
 int ib_device_register_sysfs(struct ib_device *device,
 			     int (*port_callback)(struct ib_device *,
 						  u8, struct kobject *))
@@ -835,7 +858,7 @@
 	}
 
 	device->ports_parent = kobject_create_and_add("ports",
-					kobject_get(&class_dev->kobj));
+						      &class_dev->kobj);
 	if (!device->ports_parent) {
 		ret = -ENOMEM;
 		goto err_put;
@@ -862,21 +885,7 @@
 	return 0;
 
 err_put:
-	{
-		struct kobject *p, *t;
-		struct ib_port *port;
-
-		list_for_each_entry_safe(p, t, &device->port_list, entry) {
-			list_del(&p->entry);
-			port = container_of(p, struct ib_port, kobj);
-			sysfs_remove_group(p, &pma_group);
-			sysfs_remove_group(p, &port->pkey_group);
-			sysfs_remove_group(p, &port->gid_group);
-			kobject_put(p);
-		}
-	}
-
-	kobject_put(&class_dev->kobj);
+	free_port_list_attributes(device);
 
 err_unregister:
 	device_unregister(class_dev);
@@ -887,22 +896,18 @@
 
 void ib_device_unregister_sysfs(struct ib_device *device)
 {
-	struct kobject *p, *t;
-	struct ib_port *port;
-
 	/* Hold kobject until ib_dealloc_device() */
-	kobject_get(&device->dev.kobj);
+	struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
+	int i;
 
-	list_for_each_entry_safe(p, t, &device->port_list, entry) {
-		list_del(&p->entry);
-		port = container_of(p, struct ib_port, kobj);
-		sysfs_remove_group(p, &pma_group);
-		sysfs_remove_group(p, &port->pkey_group);
-		sysfs_remove_group(p, &port->gid_group);
-		kobject_put(p);
-	}
+	if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+		sysfs_remove_group(kobj_dev, &iw_stats_group);
 
-	kobject_put(device->ports_parent);
+	free_port_list_attributes(device);
+
+	for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+		device_remove_file(&device->dev, ib_class_attributes[i]);
+
 	device_unregister(&device->dev);
 }
 

diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f0d588f..1acb991 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c

@@ -98,7 +98,7 @@
 
 struct ib_umad_device {
 	int                  start_port, end_port;
-	struct kref          ref;
+	struct kobject       kobj;
 	struct ib_umad_port  port[0];
 };
 
@@ -134,14 +134,18 @@
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device);
 
-static void ib_umad_release_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
 {
 	struct ib_umad_device *dev =
-		container_of(ref, struct ib_umad_device, ref);
+		container_of(kobj, struct ib_umad_device, kobj);
 
 	kfree(dev);
 }
 
+static struct kobj_type ib_umad_dev_ktype = {
+	.release = ib_umad_release_dev,
+};
+
 static int hdr_size(struct ib_umad_file *file)
 {
 	return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
@@ -780,27 +784,19 @@
 {
 	struct ib_umad_port *port;
 	struct ib_umad_file *file;
-	int ret;
+	int ret = -ENXIO;
 
 	port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
-	if (port)
-		kref_get(&port->umad_dev->ref);
-	else
-		return -ENXIO;
 
 	mutex_lock(&port->file_mutex);
 
-	if (!port->ib_dev) {
-		ret = -ENXIO;
+	if (!port->ib_dev)
 		goto out;
-	}
 
+	ret = -ENOMEM;
 	file = kzalloc(sizeof *file, GFP_KERNEL);
-	if (!file) {
-		kref_put(&port->umad_dev->ref, ib_umad_release_dev);
-		ret = -ENOMEM;
+	if (!file)
 		goto out;
-	}
 
 	mutex_init(&file->mutex);
 	spin_lock_init(&file->send_lock);
@@ -814,6 +810,13 @@
 	list_add_tail(&file->port_list, &port->file_list);
 
 	ret = nonseekable_open(inode, filp);
+	if (ret) {
+		list_del(&file->port_list);
+		kfree(file);
+		goto out;
+	}
+
+	kobject_get(&port->umad_dev->kobj);
 
 out:
 	mutex_unlock(&port->file_mutex);
@@ -852,7 +855,7 @@
 	mutex_unlock(&file->port->file_mutex);
 
 	kfree(file);
-	kref_put(&dev->ref, ib_umad_release_dev);
+	kobject_put(&dev->kobj);
 
 	return 0;
 }
@@ -880,10 +883,6 @@
 	int ret;
 
 	port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
-	if (port)
-		kref_get(&port->umad_dev->ref);
-	else
-		return -ENXIO;
 
 	if (filp->f_flags & O_NONBLOCK) {
 		if (down_trylock(&port->sm_sem)) {
@@ -898,17 +897,27 @@
 	}
 
 	ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
-	if (ret) {
-		up(&port->sm_sem);
-		goto fail;
-	}
+	if (ret)
+		goto err_up_sem;
 
 	filp->private_data = port;
 
-	return nonseekable_open(inode, filp);
+	ret = nonseekable_open(inode, filp);
+	if (ret)
+		goto err_clr_sm_cap;
+
+	kobject_get(&port->umad_dev->kobj);
+
+	return 0;
+
+err_clr_sm_cap:
+	swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+	ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+	up(&port->sm_sem);
 
 fail:
-	kref_put(&port->umad_dev->ref, ib_umad_release_dev);
 	return ret;
 }
 
@@ -927,7 +936,7 @@
 
 	up(&port->sm_sem);
 
-	kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+	kobject_put(&port->umad_dev->kobj);
 
 	return ret;
 }
@@ -995,6 +1004,7 @@
 }
 
 static int ib_umad_init_port(struct ib_device *device, int port_num,
+			     struct ib_umad_device *umad_dev,
 			     struct ib_umad_port *port)
 {
 	int devnum;
@@ -1027,6 +1037,7 @@
 
 	cdev_init(&port->cdev, &umad_fops);
 	port->cdev.owner = THIS_MODULE;
+	port->cdev.kobj.parent = &umad_dev->kobj;
 	kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
 	if (cdev_add(&port->cdev, base, 1))
 		goto err_cdev;
@@ -1045,6 +1056,7 @@
 	base += IB_UMAD_MAX_PORTS;
 	cdev_init(&port->sm_cdev, &umad_sm_fops);
 	port->sm_cdev.owner = THIS_MODULE;
+	port->sm_cdev.kobj.parent = &umad_dev->kobj;
 	kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
 	if (cdev_add(&port->sm_cdev, base, 1))
 		goto err_sm_cdev;
@@ -1138,7 +1150,7 @@
 	if (!umad_dev)
 		return;
 
-	kref_init(&umad_dev->ref);
+	kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
 
 	umad_dev->start_port = s;
 	umad_dev->end_port   = e;
@@ -1146,7 +1158,8 @@
 	for (i = s; i <= e; ++i) {
 		umad_dev->port[i - s].umad_dev = umad_dev;
 
-		if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+		if (ib_umad_init_port(device, i, umad_dev,
+				      &umad_dev->port[i - s]))
 			goto err;
 	}
 
@@ -1158,7 +1171,7 @@
 	while (--i >= s)
 		ib_umad_kill_port(&umad_dev->port[i - s]);
 
-	kref_put(&umad_dev->ref, ib_umad_release_dev);
+	kobject_put(&umad_dev->kobj);
 }
 
 static void ib_umad_remove_one(struct ib_device *device)
@@ -1172,7 +1185,7 @@
 	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
 		ib_umad_kill_port(&umad_dev->port[i]);
 
-	kref_put(&umad_dev->ref, ib_umad_release_dev);
+	kobject_put(&umad_dev->kobj);
 }
 
 static char *umad_devnode(struct device *dev, umode_t *mode)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 92525f8..c2b89cc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c

@@ -48,7 +48,7 @@
 
 #include "core_priv.h"
 
-int ib_rate_to_mult(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
 {
 	switch (rate) {
 	case IB_RATE_2_5_GBPS: return  1;
@@ -65,7 +65,7 @@
 }
 EXPORT_SYMBOL(ib_rate_to_mult);
 
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
 {
 	switch (mult) {
 	case 1:  return IB_RATE_2_5_GBPS;
@@ -82,7 +82,7 @@
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
 {
 	switch (rate) {
 	case IB_RATE_2_5_GBPS: return 2500;
@@ -107,7 +107,7 @@
 }
 EXPORT_SYMBOL(ib_rate_to_mbps);
 
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type)
 {
 	switch (node_type) {

diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644
index 0000000..e900b03
--- /dev/null
+++ b/drivers/infiniband/hw/Makefile

@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA)		+= mthca/
+obj-$(CONFIG_INFINIBAND_IPATH)		+= ipath/
+obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
+obj-$(CONFIG_INFINIBAND_EHCA)		+= ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3)		+= cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND)		+= mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
+obj-$(CONFIG_INFINIBAND_NES)		+= nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA)		+= ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC)		+= usnic/

diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index c3f5aca..de1c61b4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c

@@ -735,14 +735,12 @@
 			((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
 			V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
 			V_TPT_PAGE_SIZE(page_size));
-		tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
-				    cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+		tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
 		tpt.len = cpu_to_be32(len);
 		tpt.va_hi = cpu_to_be32((u32) (to >> 32));
 		tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
 		tpt.rsvd_bind_cnt_or_pstag = 0;
-		tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
-				  cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+		tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
 	}
 	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
 				       stag_idx +

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 095bb04..cb78b1e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -418,6 +418,7 @@
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, abort_arp_failure);
 	req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+	memset(req, 0, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 1f863a9..5e153f6 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 
+#include <rdma/ib_addr.h>
+
 #include "iw_cxgb4.h"
 
 static char *states[] = {
@@ -232,12 +234,16 @@
 
 static void set_emss(struct c4iw_ep *ep, u16 opt)
 {
-	ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
+	ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
+		   sizeof(struct iphdr) - sizeof(struct tcphdr);
 	ep->mss = ep->emss;
 	if (GET_TCPOPT_TSTAMP(opt))
 		ep->emss -= 12;
 	if (ep->emss < 128)
 		ep->emss = 128;
+	if (ep->emss & 7)
+		PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+		     GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
 	PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
 	     ep->mss, ep->emss);
 }
@@ -294,6 +300,12 @@
 		dst_release(ep->dst);
 		cxgb4_l2t_release(ep->l2t);
 	}
+	if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
+		print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
+		iwpm_remove_mapinfo(&ep->com.local_addr,
+				    &ep->com.mapped_local_addr);
+		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+	}
 	kfree(ep);
 }
 
@@ -341,10 +353,7 @@
 
 static struct net_device *get_real_dev(struct net_device *egress_dev)
 {
-	struct net_device *phys_dev = egress_dev;
-	if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
-		phys_dev = vlan_dev_real_dev(egress_dev);
-	return phys_dev;
+	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 }
 
 static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
@@ -468,7 +477,7 @@
 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 	flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
 	flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
-	flowc->mnemval[6].val = cpu_to_be32(snd_win);
+	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 	/* Pad WR to 16 byte boundary */
@@ -528,6 +537,49 @@
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
+/*
+ * c4iw_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void c4iw_form_pm_msg(struct c4iw_ep *ep,
+				struct iwpm_sa_data *pm_msg)
+{
+	memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
+		sizeof(ep->com.local_addr));
+	memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
+		sizeof(ep->com.remote_addr));
+}
+
+/*
+ * c4iw_form_reg_msg - Form a port mapper message with dev info
+ */
+static void c4iw_form_reg_msg(struct c4iw_dev *dev,
+				struct iwpm_dev_data *pm_msg)
+{
+	memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
+	memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
+				IWPM_IFNAME_SIZE);
+}
+
+static void c4iw_record_pm_msg(struct c4iw_ep *ep,
+			struct iwpm_sa_data *pm_msg)
+{
+	memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
+		sizeof(ep->com.mapped_local_addr));
+	memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
+		sizeof(ep->com.mapped_remote_addr));
+}
+
+static void best_mtu(const unsigned short *mtus, unsigned short mtu,
+		     unsigned int *idx, int use_ts)
+{
+	unsigned short hdr_size = sizeof(struct iphdr) +
+				  sizeof(struct tcphdr) +
+				  (use_ts ? 12 : 0);
+	unsigned short data_size = mtu - hdr_size;
+
+	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+}
+
 static int send_connect(struct c4iw_ep *ep)
 {
 	struct cpl_act_open_req *req;
@@ -546,10 +598,15 @@
 	int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
 				sizeof(struct cpl_act_open_req6) :
 				sizeof(struct cpl_t5_act_open_req6);
-	struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
-	struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
-	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
-	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+	struct sockaddr_in *la = (struct sockaddr_in *)
+				 &ep->com.mapped_local_addr;
+	struct sockaddr_in *ra = (struct sockaddr_in *)
+				 &ep->com.mapped_remote_addr;
+	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
+				   &ep->com.mapped_local_addr;
+	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
+				   &ep->com.mapped_remote_addr;
+	int win;
 
 	wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
 			roundup(sizev4, 16) :
@@ -565,8 +622,18 @@
 	}
 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 
-	cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		 enable_tcp_timestamps);
 	wscale = compute_wscale(rcv_win);
+
+	/*
+	 * Specify the largest window that will fit in opt0. The
+	 * remainder will be specified in the rx_data_ack.
+	 */
+	win = ep->rcv_win >> 10;
+	if (win > RCV_BUFSIZ_MASK)
+		win = RCV_BUFSIZ_MASK;
+
 	opt0 = (nocong ? NO_CONG(1) : 0) |
 	       KEEP_ALIVE(1) |
 	       DELACK(1) |
@@ -577,7 +644,7 @@
 	       SMAC_SEL(ep->smac_idx) |
 	       DSCP(ep->tos) |
 	       ULP_MODE(ULP_MODE_TCPDDP) |
-	       RCV_BUFSIZ(rcv_win>>10);
+	       RCV_BUFSIZ(win);
 	opt2 = RX_CHANNEL(0) |
 	       CCTRL_ECN(enable_ecn) |
 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -633,6 +700,13 @@
 			req6->opt2 = cpu_to_be32(opt2);
 		}
 	} else {
+		u32 isn = (prandom_u32() & ~7UL) - 1;
+
+		opt2 |= T5_OPT_2_VALID;
+		opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+		if (peer2peer)
+			isn += 4;
+
 		if (ep->com.remote_addr.ss_family == AF_INET) {
 			t5_req = (struct cpl_t5_act_open_req *)
 				 skb_put(skb, wrlen);
@@ -649,6 +723,9 @@
 						     cxgb4_select_ntuple(
 					     ep->com.dev->rdev.lldi.ports[0],
 					     ep->l2t)));
+			t5_req->rsvd = cpu_to_be32(isn);
+			PDBG("%s snd_isn %u\n", __func__,
+			     be32_to_cpu(t5_req->rsvd));
 			t5_req->opt2 = cpu_to_be32(opt2);
 		} else {
 			t5_req6 = (struct cpl_t5_act_open_req6 *)
@@ -672,6 +749,9 @@
 							cxgb4_select_ntuple(
 						ep->com.dev->rdev.lldi.ports[0],
 						ep->l2t));
+			t5_req6->rsvd = cpu_to_be32(isn);
+			PDBG("%s snd_isn %u\n", __func__,
+			     be32_to_cpu(t5_req6->rsvd));
 			t5_req6->opt2 = cpu_to_be32(opt2);
 		}
 	}
@@ -1145,6 +1225,14 @@
 		return 0;
 	}
 
+	/*
+	 * If we couldn't specify the entire rcv window at connection setup
+	 * due to the limit in the number of bits in the RCV_BUFSIZ field,
+	 * then add the overage in to the credits returned.
+	 */
+	if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024)
+		credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024;
+
 	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
 	memset(req, 0, wrlen);
 	INIT_TP_WR(req, ep->hwtid);
@@ -1618,6 +1706,7 @@
 	unsigned int mtu_idx;
 	int wscale;
 	struct sockaddr_in *sin;
+	int win;
 
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 	req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
@@ -1627,10 +1716,10 @@
 	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
 				     ep->com.dev->rdev.lldi.ports[0],
 				     ep->l2t));
-	sin = (struct sockaddr_in *)&ep->com.local_addr;
+	sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
 	req->le.lport = sin->sin_port;
 	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
-	sin = (struct sockaddr_in *)&ep->com.remote_addr;
+	sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
 	req->le.pport = sin->sin_port;
 	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
 	req->tcb.t_state_to_astid =
@@ -1640,8 +1729,18 @@
 			htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
 	req->tcb.tx_max = (__force __be32) jiffies;
 	req->tcb.rcv_adv = htons(1);
-	cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		 enable_tcp_timestamps);
 	wscale = compute_wscale(rcv_win);
+
+	/*
+	 * Specify the largest window that will fit in opt0. The
+	 * remainder will be specified in the rx_data_ack.
+	 */
+	win = ep->rcv_win >> 10;
+	if (win > RCV_BUFSIZ_MASK)
+		win = RCV_BUFSIZ_MASK;
+
 	req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
 		(nocong ? NO_CONG(1) : 0) |
 		KEEP_ALIVE(1) |
@@ -1653,7 +1752,7 @@
 		SMAC_SEL(ep->smac_idx) |
 		DSCP(ep->tos) |
 		ULP_MODE(ULP_MODE_TCPDDP) |
-		RCV_BUFSIZ(rcv_win >> 10));
+		RCV_BUFSIZ(win));
 	req->tcb.opt2 = (__force __be32) (PACE(1) |
 		TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
 		RX_CHANNEL(0) |
@@ -1690,6 +1789,13 @@
 	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
 }
 
+static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
+{
+	ep->snd_win = snd_win;
+	ep->rcv_win = rcv_win;
+	PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+}
+
 #define ACT_OPEN_RETRY_COUNT 2
 
 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
@@ -1738,6 +1844,7 @@
 		ep->ctrlq_idx = cxgb4_port_idx(pdev);
 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
 			cxgb4_port_idx(pdev) * step];
+		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
 		dev_put(pdev);
 	} else {
 		pdev = get_real_dev(n->dev);
@@ -1746,16 +1853,17 @@
 		if (!ep->l2t)
 			goto out;
 		ep->mtu = dst_mtu(dst);
-		ep->tx_chan = cxgb4_port_chan(n->dev);
-		ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+		ep->tx_chan = cxgb4_port_chan(pdev);
+		ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
 		step = cdev->rdev.lldi.ntxq /
 			cdev->rdev.lldi.nchan;
-		ep->txq_idx = cxgb4_port_idx(n->dev) * step;
-		ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+		ep->txq_idx = cxgb4_port_idx(pdev) * step;
+		ep->ctrlq_idx = cxgb4_port_idx(pdev);
 		step = cdev->rdev.lldi.nrxq /
 			cdev->rdev.lldi.nchan;
 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
-			cxgb4_port_idx(n->dev) * step];
+			cxgb4_port_idx(pdev) * step];
+		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
 
 		if (clear_mpa_v1) {
 			ep->retry_with_mpa_v1 = 0;
@@ -1870,10 +1978,10 @@
 	struct sockaddr_in6 *ra6;
 
 	ep = lookup_atid(t, atid);
-	la = (struct sockaddr_in *)&ep->com.local_addr;
-	ra = (struct sockaddr_in *)&ep->com.remote_addr;
-	la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
-	ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+	la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+	ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+	la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+	ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
 
 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
 	     status, status2errno(status));
@@ -1986,13 +2094,36 @@
 	u64 opt0;
 	u32 opt2;
 	int wscale;
+	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
+	int win;
 
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 	BUG_ON(skb_cloned(skb));
-	skb_trim(skb, sizeof(*rpl));
+
 	skb_get(skb);
-	cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+	rpl = cplhdr(skb);
+	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+		skb_trim(skb, roundup(sizeof(*rpl5), 16));
+		rpl5 = (void *)rpl;
+		INIT_TP_WR(rpl5, ep->hwtid);
+	} else {
+		skb_trim(skb, sizeof(*rpl));
+		INIT_TP_WR(rpl, ep->hwtid);
+	}
+	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						    ep->hwtid));
+
+	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		 enable_tcp_timestamps && req->tcpopt.tstamp);
 	wscale = compute_wscale(rcv_win);
+
+	/*
+	 * Specify the largest window that will fit in opt0. The
+	 * remainder will be specified in the rx_data_ack.
+	 */
+	win = ep->rcv_win >> 10;
+	if (win > RCV_BUFSIZ_MASK)
+		win = RCV_BUFSIZ_MASK;
 	opt0 = (nocong ? NO_CONG(1) : 0) |
 	       KEEP_ALIVE(1) |
 	       DELACK(1) |
@@ -2003,7 +2134,7 @@
 	       SMAC_SEL(ep->smac_idx) |
 	       DSCP(ep->tos >> 2) |
 	       ULP_MODE(ULP_MODE_TCPDDP) |
-	       RCV_BUFSIZ(rcv_win>>10);
+	       RCV_BUFSIZ(win);
 	opt2 = RX_CHANNEL(0) |
 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
 
@@ -2023,14 +2154,18 @@
 			opt2 |= CCTRL_ECN(1);
 	}
 	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+		u32 isn = (prandom_u32() & ~7UL) - 1;
 		opt2 |= T5_OPT_2_VALID;
 		opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+		opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+		rpl5 = (void *)rpl;
+		memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
+		if (peer2peer)
+			isn += 4;
+		rpl5->iss = cpu_to_be32(isn);
+		PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
 	}
 
-	rpl = cplhdr(skb);
-	INIT_TP_WR(rpl, ep->hwtid);
-	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
-				      ep->hwtid));
 	rpl->opt0 = cpu_to_be64(opt0);
 	rpl->opt2 = cpu_to_be32(opt2);
 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -2095,6 +2230,7 @@
 	int err;
 	u16 peer_mss = ntohs(req->tcpopt.mss);
 	int iptype;
+	unsigned short hdrs;
 
 	parent_ep = lookup_stid(t, stid);
 	if (!parent_ep) {
@@ -2152,8 +2288,10 @@
 		goto reject;
 	}
 
-	if (peer_mss && child_ep->mtu > (peer_mss + 40))
-		child_ep->mtu = peer_mss + 40;
+	hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
+	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
+		child_ep->mtu = peer_mss + hdrs;
 
 	state_set(&child_ep->com, CONNECTING);
 	child_ep->com.dev = dev;
@@ -2730,13 +2868,15 @@
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_ep *ep;
 	int err = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
-				      &cm_id->remote_addr;
+	struct sockaddr_in *laddr;
+	struct sockaddr_in *raddr;
+	struct sockaddr_in6 *laddr6;
+	struct sockaddr_in6 *raddr6;
+	struct iwpm_dev_data pm_reg_msg;
+	struct iwpm_sa_data pm_msg;
 	__u8 *ra;
 	int iptype;
+	int iwpm_err = 0;
 
 	if ((conn_param->ord > c4iw_max_read_depth) ||
 	    (conn_param->ird > c4iw_max_read_depth)) {
@@ -2767,7 +2907,7 @@
 	if (!ep->com.qp) {
 		PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
 		err = -EINVAL;
-		goto fail2;
+		goto fail1;
 	}
 	ref_qp(ep);
 	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -2780,10 +2920,50 @@
 	if (ep->atid == -1) {
 		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
 		err = -ENOMEM;
-		goto fail2;
+		goto fail1;
 	}
 	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
 
+	memcpy(&ep->com.local_addr, &cm_id->local_addr,
+	       sizeof(ep->com.local_addr));
+	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+	       sizeof(ep->com.remote_addr));
+
+	/* No port mapper available, go with the specified peer information */
+	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+	       sizeof(ep->com.mapped_local_addr));
+	memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
+	       sizeof(ep->com.mapped_remote_addr));
+
+	c4iw_form_reg_msg(dev, &pm_reg_msg);
+	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+	if (iwpm_err) {
+		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+			__func__, iwpm_err);
+	}
+	if (iwpm_valid_pid() && !iwpm_err) {
+		c4iw_form_pm_msg(ep, &pm_msg);
+		iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
+		if (iwpm_err)
+			PDBG("%s: Port Mapper query fail (err = %d).\n",
+				__func__, iwpm_err);
+		else
+			c4iw_record_pm_msg(ep, &pm_msg);
+	}
+	if (iwpm_create_mapinfo(&ep->com.local_addr,
+				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+		err = -ENOMEM;
+		goto fail1;
+	}
+	print_addr(&ep->com, __func__, "add_query/create_mapinfo");
+	set_bit(RELEASE_MAPINFO, &ep->com.flags);
+
+	laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+	raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+	raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+
 	if (cm_id->remote_addr.ss_family == AF_INET) {
 		iptype = 4;
 		ra = (__u8 *)&raddr->sin_addr;
@@ -2794,7 +2974,7 @@
 		if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
 			err = pick_local_ipaddrs(dev, cm_id);
 			if (err)
-				goto fail2;
+				goto fail1;
 		}
 
 		/* find a route */
@@ -2814,7 +2994,7 @@
 		if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
 			err = pick_local_ip6addrs(dev, cm_id);
 			if (err)
-				goto fail2;
+				goto fail1;
 		}
 
 		/* find a route */
@@ -2830,13 +3010,13 @@
 	if (!ep->dst) {
 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
 		err = -EHOSTUNREACH;
-		goto fail3;
+		goto fail2;
 	}
 
 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
 	if (err) {
 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
-		goto fail4;
+		goto fail3;
 	}
 
 	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -2845,10 +3025,6 @@
 
 	state_set(&ep->com, CONNECTING);
 	ep->tos = 0;
-	memcpy(&ep->com.local_addr, &cm_id->local_addr,
-	       sizeof(ep->com.local_addr));
-	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
-	       sizeof(ep->com.remote_addr));
 
 	/* send connect request to rnic */
 	err = send_connect(ep);
@@ -2856,12 +3032,12 @@
 		goto out;
 
 	cxgb4_l2t_release(ep->l2t);
-fail4:
-	dst_release(ep->dst);
 fail3:
+	dst_release(ep->dst);
+fail2:
 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail2:
+fail1:
 	cm_id->rem_ref(cm_id);
 	c4iw_put_ep(&ep->com);
 out:
@@ -2871,7 +3047,8 @@
 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
 	int err;
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+				    &ep->com.mapped_local_addr;
 
 	c4iw_init_wr_wait(&ep->com.wr_wait);
 	err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
@@ -2892,7 +3069,8 @@
 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
 	int err;
-	struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *)
+				  &ep->com.mapped_local_addr;
 
 	if (dev->rdev.lldi.enable_fw_ofld_conn) {
 		do {
@@ -2927,6 +3105,9 @@
 	int err = 0;
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_listen_ep *ep;
+	struct iwpm_dev_data pm_reg_msg;
+	struct iwpm_sa_data pm_msg;
+	int iwpm_err = 0;
 
 	might_sleep();
 
@@ -2961,6 +3142,37 @@
 		goto fail2;
 	}
 	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+
+	/* No port mapper available, go with the specified info */
+	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+	       sizeof(ep->com.mapped_local_addr));
+
+	c4iw_form_reg_msg(dev, &pm_reg_msg);
+	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+	if (iwpm_err) {
+		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+			__func__, iwpm_err);
+	}
+	if (iwpm_valid_pid() && !iwpm_err) {
+		memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
+				sizeof(ep->com.local_addr));
+		iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
+		if (iwpm_err)
+			PDBG("%s: Port Mapper query fail (err = %d).\n",
+				__func__, iwpm_err);
+		else
+			memcpy(&ep->com.mapped_local_addr,
+				&pm_msg.mapped_loc_addr,
+				sizeof(ep->com.mapped_local_addr));
+	}
+	if (iwpm_create_mapinfo(&ep->com.local_addr,
+				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+		err = -ENOMEM;
+		goto fail3;
+	}
+	print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+
+	set_bit(RELEASE_MAPINFO, &ep->com.flags);
 	state_set(&ep->com, LISTEN);
 	if (ep->com.local_addr.ss_family == AF_INET)
 		err = create_server4(dev, ep);
@@ -2970,6 +3182,8 @@
 		cm_id->provider_data = ep;
 		goto out;
 	}
+
+fail3:
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 fail2:

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index cfaa56a..c04292c 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c

@@ -134,7 +134,8 @@
 			V_FW_RI_RES_WR_IQANUS(0) |
 			V_FW_RI_RES_WR_IQANUD(1) |
 			F_FW_RI_RES_WR_IQANDST |
-			V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids));
+			V_FW_RI_RES_WR_IQANDSTINDEX(
+				rdev->lldi.ciq_ids[cq->vector]));
 	res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
 			F_FW_RI_RES_WR_IQDROPRSS |
 			V_FW_RI_RES_WR_IQPCIECH(2) |
@@ -870,6 +871,9 @@
 
 	rhp = to_c4iw_dev(ibdev);
 
+	if (vector >= rhp->rdev.lldi.nciq)
+		return ERR_PTR(-EINVAL);
+
 	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
 	if (!chp)
 		return ERR_PTR(-ENOMEM);
@@ -915,6 +919,7 @@
 	}
 	chp->cq.size = hwentries;
 	chp->cq.memsize = memsize;
+	chp->cq.vector = vector;
 
 	ret = create_cq(&rhp->rdev, &chp->cq,
 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -940,7 +945,6 @@
 		if (!mm2)
 			goto err4;
 
-		memset(&uresp, 0, sizeof(uresp));
 		uresp.qid_mask = rhp->rdev.cqmask;
 		uresp.cqid = chp->cq.cqid;
 		uresp.size = chp->cq.size;
@@ -951,7 +955,8 @@
 		uresp.gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
 		spin_unlock(&ucontext->mmap_lock);
-		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+		ret = ib_copy_to_udata(udata, &uresp,
+				       sizeof(uresp) - sizeof(uresp.reserved));
 		if (ret)
 			goto err5;
 

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index f4fa50a..dd93aad 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c

@@ -77,6 +77,16 @@
 	int pos;
 };
 
+/* registered cxgb4 netlink callbacks */
+static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
+	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static int count_idrs(int id, void *p, void *data)
 {
 	int *countp = data;
@@ -113,35 +123,49 @@
 				&qp->ep->com.local_addr;
 			struct sockaddr_in *rsin = (struct sockaddr_in *)
 				&qp->ep->com.remote_addr;
+			struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+				&qp->ep->com.mapped_local_addr;
+			struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+				&qp->ep->com.mapped_remote_addr;
 
 			cc = snprintf(qpd->buf + qpd->pos, space,
 				      "rc qp sq id %u rq id %u state %u "
 				      "onchip %u ep tid %u state %u "
-				      "%pI4:%u->%pI4:%u\n",
+				      "%pI4:%u/%u->%pI4:%u/%u\n",
 				      qp->wq.sq.qid, qp->wq.rq.qid,
 				      (int)qp->attr.state,
 				      qp->wq.sq.flags & T4_SQ_ONCHIP,
 				      qp->ep->hwtid, (int)qp->ep->com.state,
 				      &lsin->sin_addr, ntohs(lsin->sin_port),
-				      &rsin->sin_addr, ntohs(rsin->sin_port));
+				      ntohs(mapped_lsin->sin_port),
+				      &rsin->sin_addr, ntohs(rsin->sin_port),
+				      ntohs(mapped_rsin->sin_port));
 		} else {
 			struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 				&qp->ep->com.local_addr;
 			struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 				&qp->ep->com.remote_addr;
+			struct sockaddr_in6 *mapped_lsin6 =
+				(struct sockaddr_in6 *)
+				&qp->ep->com.mapped_local_addr;
+			struct sockaddr_in6 *mapped_rsin6 =
+				(struct sockaddr_in6 *)
+				&qp->ep->com.mapped_remote_addr;
 
 			cc = snprintf(qpd->buf + qpd->pos, space,
 				      "rc qp sq id %u rq id %u state %u "
 				      "onchip %u ep tid %u state %u "
-				      "%pI6:%u->%pI6:%u\n",
+				      "%pI6:%u/%u->%pI6:%u/%u\n",
 				      qp->wq.sq.qid, qp->wq.rq.qid,
 				      (int)qp->attr.state,
 				      qp->wq.sq.flags & T4_SQ_ONCHIP,
 				      qp->ep->hwtid, (int)qp->ep->com.state,
 				      &lsin6->sin6_addr,
 				      ntohs(lsin6->sin6_port),
+				      ntohs(mapped_lsin6->sin6_port),
 				      &rsin6->sin6_addr,
-				      ntohs(rsin6->sin6_port));
+				      ntohs(rsin6->sin6_port),
+				      ntohs(mapped_rsin6->sin6_port));
 		}
 	} else
 		cc = snprintf(qpd->buf + qpd->pos, space,
@@ -386,31 +410,43 @@
 			&ep->com.local_addr;
 		struct sockaddr_in *rsin = (struct sockaddr_in *)
 			&ep->com.remote_addr;
+		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+			&ep->com.mapped_local_addr;
+		struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+			&ep->com.mapped_remote_addr;
 
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "history 0x%lx hwtid %d atid %d "
-			      "%pI4:%d <-> %pI4:%d\n",
+			      "%pI4:%d/%d <-> %pI4:%d/%d\n",
 			      ep, ep->com.cm_id, ep->com.qp,
 			      (int)ep->com.state, ep->com.flags,
 			      ep->com.history, ep->hwtid, ep->atid,
 			      &lsin->sin_addr, ntohs(lsin->sin_port),
-			      &rsin->sin_addr, ntohs(rsin->sin_port));
+			      ntohs(mapped_lsin->sin_port),
+			      &rsin->sin_addr, ntohs(rsin->sin_port),
+			      ntohs(mapped_rsin->sin_port));
 	} else {
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 			&ep->com.local_addr;
 		struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 			&ep->com.remote_addr;
+		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+			&ep->com.mapped_local_addr;
+		struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
+			&ep->com.mapped_remote_addr;
 
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "history 0x%lx hwtid %d atid %d "
-			      "%pI6:%d <-> %pI6:%d\n",
+			      "%pI6:%d/%d <-> %pI6:%d/%d\n",
 			      ep, ep->com.cm_id, ep->com.qp,
 			      (int)ep->com.state, ep->com.flags,
 			      ep->com.history, ep->hwtid, ep->atid,
 			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
-			      &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+			      ntohs(mapped_lsin6->sin6_port),
+			      &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
+			      ntohs(mapped_rsin6->sin6_port));
 	}
 	if (cc < space)
 		epd->pos += cc;
@@ -431,23 +467,29 @@
 	if (ep->com.local_addr.ss_family == AF_INET) {
 		struct sockaddr_in *lsin = (struct sockaddr_in *)
 			&ep->com.local_addr;
+		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+			&ep->com.mapped_local_addr;
 
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
-			      "backlog %d %pI4:%d\n",
+			      "backlog %d %pI4:%d/%d\n",
 			      ep, ep->com.cm_id, (int)ep->com.state,
 			      ep->com.flags, ep->stid, ep->backlog,
-			      &lsin->sin_addr, ntohs(lsin->sin_port));
+			      &lsin->sin_addr, ntohs(lsin->sin_port),
+			      ntohs(mapped_lsin->sin_port));
 	} else {
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 			&ep->com.local_addr;
+		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+			&ep->com.mapped_local_addr;
 
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
-			      "backlog %d %pI6:%d\n",
+			      "backlog %d %pI6:%d/%d\n",
 			      ep, ep->com.cm_id, (int)ep->com.state,
 			      ep->com.flags, ep->stid, ep->backlog,
-			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+			      ntohs(mapped_lsin6->sin6_port));
 	}
 	if (cc < space)
 		epd->pos += cc;
@@ -687,6 +729,7 @@
 	if (ctx->dev->rdev.oc_mw_kva)
 		iounmap(ctx->dev->rdev.oc_mw_kva);
 	ib_dealloc_device(&ctx->dev->ibdev);
+	iwpm_exit(RDMA_NL_C4IW);
 	ctx->dev = NULL;
 }
 
@@ -736,6 +779,7 @@
 			pci_resource_len(devp->rdev.lldi.pdev, 2));
 		if (!devp->rdev.bar2_kva) {
 			pr_err(MOD "Unable to ioremap BAR2\n");
+			ib_dealloc_device(&devp->ibdev);
 			return ERR_PTR(-EINVAL);
 		}
 	} else if (ocqp_supported(infop)) {
@@ -747,6 +791,7 @@
 			devp->rdev.lldi.vr->ocq.size);
 		if (!devp->rdev.oc_mw_kva) {
 			pr_err(MOD "Unable to ioremap onchip mem\n");
+			ib_dealloc_device(&devp->ibdev);
 			return ERR_PTR(-EINVAL);
 		}
 	}
@@ -780,6 +825,14 @@
 					c4iw_debugfs_root);
 		setup_debugfs(devp);
 	}
+
+	ret = iwpm_init(RDMA_NL_C4IW);
+	if (ret) {
+		pr_err("port mapper initialization failed with %d\n", ret);
+		ib_dealloc_device(&devp->ibdev);
+		return ERR_PTR(ret);
+	}
+
 	return devp;
 }
 
@@ -1274,6 +1327,11 @@
 		printk(KERN_WARNING MOD
 		       "could not create debugfs entry, continuing\n");
 
+	if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
+			    c4iw_nl_cb_table))
+		pr_err("%s[%u]: Failed to add netlink callback\n"
+		       , __func__, __LINE__);
+
 	cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
 
 	return 0;
@@ -1291,6 +1349,7 @@
 	}
 	mutex_unlock(&dev_mutex);
 	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+	ibnl_remove_client(RDMA_NL_C4IW);
 	c4iw_cm_term();
 	debugfs_remove_recursive(c4iw_debugfs_root);
 }

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 7474b49..125bc5d 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h

@@ -52,6 +52,8 @@
 
 #include <rdma/ib_verbs.h>
 #include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
 
 #include "cxgb4.h"
 #include "cxgb4_uld.h"
@@ -728,6 +730,7 @@
 	CLOSE_SENT		= 3,
 	TIMEOUT                 = 4,
 	QP_REFERENCED           = 5,
+	RELEASE_MAPINFO		= 6,
 };
 
 enum c4iw_ep_history {
@@ -764,6 +767,8 @@
 	struct mutex mutex;
 	struct sockaddr_storage local_addr;
 	struct sockaddr_storage remote_addr;
+	struct sockaddr_storage mapped_local_addr;
+	struct sockaddr_storage mapped_remote_addr;
 	struct c4iw_wr_wait wr_wait;
 	unsigned long flags;
 	unsigned long history;
@@ -805,8 +810,49 @@
 	u8 retry_with_mpa_v1;
 	u8 tried_with_mpa_v1;
 	unsigned int retry_count;
+	int snd_win;
+	int rcv_win;
 };
 
+static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
+			      const char *msg)
+{
+
+#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
+#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
+#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
+#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
+
+	if (c4iw_debug) {
+		switch (epc->local_addr.ss_family) {
+		case AF_INET:
+			PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
+			     func, msg, SINA(&epc->local_addr),
+			     SINP(&epc->local_addr),
+			     SINP(&epc->mapped_local_addr),
+			     SINA(&epc->remote_addr),
+			     SINP(&epc->remote_addr),
+			     SINP(&epc->mapped_remote_addr));
+			break;
+		case AF_INET6:
+			PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
+			     func, msg, SIN6A(&epc->local_addr),
+			     SIN6P(&epc->local_addr),
+			     SIN6P(&epc->mapped_local_addr),
+			     SIN6A(&epc->remote_addr),
+			     SIN6P(&epc->remote_addr),
+			     SIN6P(&epc->mapped_remote_addr));
+			break;
+		default:
+			break;
+		}
+	}
+#undef SINA
+#undef SINP
+#undef SIN6A
+#undef SIN6P
+}
+
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
 {
 	return cm_id->provider_data;

diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index a94a3e1..b1d3053 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c

@@ -122,7 +122,7 @@
 	INIT_LIST_HEAD(&context->mmaps);
 	spin_lock_init(&context->mmap_lock);
 
-	if (udata->outlen < sizeof(uresp)) {
+	if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
 		if (!warned++)
 			pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
 		rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
@@ -140,7 +140,8 @@
 		context->key += PAGE_SIZE;
 		spin_unlock(&context->mmap_lock);
 
-		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		ret = ib_copy_to_udata(udata, &uresp,
+				       sizeof(uresp) - sizeof(uresp.reserved));
 		if (ret)
 			goto err_mm;
 
@@ -499,7 +500,7 @@
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
 	memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
 	dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
-	dev->ibdev.num_comp_vectors = 1;
+	dev->ibdev.num_comp_vectors =  dev->rdev.lldi.nciq;
 	dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
 	dev->ibdev.query_device = c4iw_query_device;
 	dev->ibdev.query_port = c4iw_query_port;

diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 2178f31..68b0a6b 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h

@@ -542,6 +542,7 @@
 	size_t memsize;
 	__be64 bits_type_ts;
 	u32 cqid;
+	int vector;
 	u16 size; /* including status page */
 	u16 cidx;
 	u16 sw_pidx;

diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 6121ca0..91289a0 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h

@@ -848,6 +848,7 @@
 #define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
 #define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
 
+#define CONG_CNTRL_VALID   (1 << 18)
 #define T5_OPT_2_VALID       (1 << 31)
 
 #endif /* _T4FW_RI_API_H_ */

diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index 11ccd27..cbd0ce1 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h

@@ -48,6 +48,7 @@
 	__u32 cqid;
 	__u32 size;
 	__u32 qid_mask;
+	__u32 reserved; /* explicit padding (optional for i386) */
 };
 
 
@@ -74,5 +75,6 @@
 struct c4iw_alloc_ucontext_resp {
 	__u64 status_page_key;
 	__u32 status_page_size;
+	__u32 reserved; /* explicit padding (optional for i386) */
 };
 #endif

diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index e2f9a51..45802e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c

@@ -346,6 +346,10 @@
 			ret = -EFAULT;
 			goto bail;
 		}
+		dp.len = odp.len;
+		dp.unit = odp.unit;
+		dp.data = odp.data;
+		dp.pbc_wd = 0;
 	} else {
 		ret = -EINVAL;
 		goto bail;

diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26dfbc8..01ba792 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c

@@ -70,7 +70,7 @@
 	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
 		int i;
 		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-			dd->ipath_lastcancel > jiffies) {
+			time_after(dd->ipath_lastcancel, jiffies)) {
 			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
 					  "SendbufErrs %lx %lx", sbuf[0],
 					  sbuf[1]);
@@ -755,7 +755,7 @@
 
 	/* likely due to cancel; so suppress message unless verbose */
 	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-		dd->ipath_lastcancel > jiffies) {
+		time_after(dd->ipath_lastcancel, jiffies)) {
 		/* armlaunch takes precedence; it often causes both. */
 		ipath_cdbg(VERBOSE,
 			"Suppressed %s error (%llx) after sendbuf cancel\n",

diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 98ac18e..17a5177 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c

@@ -247,7 +247,7 @@
 
 	/* ipath_sdma_abort() is done, waiting for interrupt */
 	if (status == IPATH_SDMA_ABORT_DISARMED) {
-		if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+		if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
 			goto resched_noprint;
 		/* give up, intr got lost somewhere */
 		ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -341,7 +341,7 @@
 	 * JAG - this is bad to just have default be a loop without
 	 * state change
 	 */
-	if (jiffies > dd->ipath_sdma_abort_jiffies) {
+	if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
 		ipath_dbg("looping with status 0x%08lx\n",
 			  dd->ipath_sdma_status);
 		dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;

diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 170dca6..2d8c339 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c

@@ -73,7 +73,7 @@
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
 	struct mlx4_dev *dev = ibdev->dev;
-	int is_mcast;
+	int is_mcast = 0;
 	struct in6_addr in6;
 	u16 vlan_tag;
 

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5f64081..1066eec 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c

@@ -102,7 +102,7 @@
 	int err;
 
 	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-			     PAGE_SIZE * 2, &buf->buf);
+			     PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
 
 	if (err)
 		goto out;
@@ -113,7 +113,7 @@
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 
@@ -209,7 +209,7 @@
 
 		uar = &to_mucontext(context)->uar;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+		err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
 		if (err)
 			goto err_cq;
 

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index fd36ec6..287ad05 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c

@@ -478,10 +478,6 @@
 	if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (!dest_qpt)
 		tun_qp = &tun_ctx->qp[0];
 	else
@@ -667,6 +663,21 @@
 	}
 	/* Class-specific handling */
 	switch (mad->mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		/* 255 indicates the dom0 */
+		if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+			if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+				return -EPERM;
+			/* for a VF. drop unsolicited MADs */
+			if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+				mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+					     slave, mad->mad_hdr.mgmt_class,
+					     mad->mad_hdr.method);
+				return -EINVAL;
+			}
+		}
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
 					     (struct ib_sa_mad *) mad))
@@ -1165,10 +1176,6 @@
 	if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (dest_qpt == IB_QPT_SMI) {
 		src_qpnum = 0;
 		sqp = &sqp_ctx->qp[0];
@@ -1285,11 +1292,6 @@
 			     "belongs to another slave\n", wc->src_qp);
 		return;
 	}
-	if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
-		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
-			     "non-master trying to send QP0 packets\n", wc->src_qp);
-		return;
-	}
 
 	/* Map transaction ID */
 	ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1317,6 +1319,12 @@
 
 	/* Class-specific handling */
 	switch (tunnel->mad.mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		if (slave != mlx4_master_func_num(dev->dev) &&
+		    !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+			return;
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
 			      (struct ib_sa_mad *) &tunnel->mad))
@@ -1749,9 +1757,9 @@
 		return -EEXIST;
 
 	ctx->state = DEMUX_PV_STATE_STARTING;
-	/* have QP0 only on port owner, and only if link layer is IB */
-	if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
-	    rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+	/* have QP0 only if link layer is IB */
+	if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+	    IB_LINK_LAYER_INFINIBAND)
 		ctx->has_smi = 1;
 
 	if (ctx->has_smi) {

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 199c789..0f7027e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c

@@ -545,12 +545,11 @@
 	return 0;
 }
 
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
-			 u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+			    u32 cap_mask)
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	int err;
-	u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
 	if (IS_ERR(mailbox))
@@ -564,8 +563,8 @@
 		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
 	}
 
-	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
 	return err;
@@ -574,11 +573,20 @@
 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 			       struct ib_port_modify *props)
 {
+	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+	u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 	struct ib_port_attr attr;
 	u32 cap_mask;
 	int err;
 
-	mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
+	 * of whether port link layer is ETH or IB. For ETH ports, qkey
+	 * violations and port capabilities are not meaningful.
+	 */
+	if (is_eth)
+		return 0;
+
+	mutex_lock(&mdev->cap_mask_mutex);
 
 	err = mlx4_ib_query_port(ibdev, port, &attr);
 	if (err)
@@ -587,9 +595,9 @@
 	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
 		~props->clr_port_cap_mask;
 
-	err = mlx4_SET_PORT(to_mdev(ibdev), port,
-			    !!(mask & IB_PORT_RESET_QKEY_CNTR),
-			    cap_mask);
+	err = mlx4_ib_SET_PORT(mdev, port,
+			       !!(mask & IB_PORT_RESET_QKEY_CNTR),
+			       cap_mask);
 
 out:
 	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 66b0b7d..369da3c 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -156,6 +156,7 @@
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+	MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
 	MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
 	MLX4_IB_SRIOV_SQP = 1 << 31,
 };

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index dc57482..6778045 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c

@@ -608,9 +608,20 @@
 	return !attr->srq;
 }
 
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i])
+			return !!dev->caps.qp0_qkey[i];
+	}
+	return 0;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+			    gfp_t gfp)
 {
 	int qpn;
 	int err;
@@ -625,10 +636,13 @@
 		     !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
 			if (init_attr->qp_type == IB_QPT_GSI)
 				qp_type = MLX4_IB_QPT_PROXY_GSI;
-			else if (mlx4_is_master(dev->dev))
-				qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
-			else
-				qp_type = MLX4_IB_QPT_PROXY_SMI;
+			else {
+				if (mlx4_is_master(dev->dev) ||
+				    qp0_enabled_vf(dev->dev, sqpn))
+					qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+				else
+					qp_type = MLX4_IB_QPT_PROXY_SMI;
+			}
 		}
 		qpn = sqpn;
 		/* add extra sg entry for tunneling */
@@ -643,7 +657,9 @@
 			return -EINVAL;
 		if (tnl_init->proxy_qp_type == IB_QPT_GSI)
 			qp_type = MLX4_IB_QPT_TUN_GSI;
-		else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+		else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+			 mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+					     tnl_init->port))
 			qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
 		else
 			qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -658,14 +674,14 @@
 		if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
 		    (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
 				MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
-			sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+			sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
 			if (!sqp)
 				return -ENOMEM;
 			qp = &sqp->qp;
 			qp->pri.vid = 0xFFFF;
 			qp->alt.vid = 0xFFFF;
 		} else {
-			qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+			qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
 			if (!qp)
 				return -ENOMEM;
 			qp->pri.vid = 0xFFFF;
@@ -748,14 +764,14 @@
 			goto err;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
 			if (err)
 				goto err;
 
 			*qp->db.db = 0;
 		}
 
-		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -765,13 +781,12 @@
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -801,7 +816,7 @@
 			goto err_proxy;
 	}
 
-	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
 	if (err)
 		goto err_qpn;
 
@@ -1040,7 +1055,10 @@
 	struct mlx4_ib_qp *qp = NULL;
 	int err;
 	u16 xrcdn = 0;
+	gfp_t gfp;
 
+	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+		GFP_NOIO : GFP_KERNEL;
 	/*
 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
 	 * and only for kernel UD QPs.
@@ -1049,7 +1067,8 @@
 					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
 					MLX4_IB_SRIOV_TUNNEL_QP |
 					MLX4_IB_SRIOV_SQP |
-					MLX4_IB_QP_NETIF))
+					MLX4_IB_QP_NETIF |
+					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1059,7 +1078,7 @@
 
 	if (init_attr->create_flags &&
 	    (udata ||
-	     ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+	     ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
 	      init_attr->qp_type != IB_QPT_UD) ||
 	     ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
 	      init_attr->qp_type > IB_QPT_GSI)))
@@ -1079,7 +1098,7 @@
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_RAW_PACKET:
-		qp = kzalloc(sizeof *qp, GFP_KERNEL);
+		qp = kzalloc(sizeof *qp, gfp);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 		qp->pri.vid = 0xFFFF;
@@ -1088,7 +1107,7 @@
 	case IB_QPT_UD:
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-				       udata, 0, &qp);
+				       udata, 0, &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1106,7 +1125,7 @@
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
 				       get_sqp_num(to_mdev(pd->device), init_attr),
-				       &qp);
+				       &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1938,6 +1957,19 @@
 	return err;
 }
 
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i] ||
+		    qpn == dev->caps.qp0_tunnel[i]) {
+			*qkey = dev->caps.qp0_qkey[i];
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 				  struct ib_send_wr *wr,
 				  void *wqe, unsigned *mlx_seg_len)
@@ -1995,8 +2027,13 @@
 			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 
 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-	if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
-		return -EINVAL;
+	if (mlx4_is_master(mdev->dev)) {
+		if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	} else {
+		if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	}
 	sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
 	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
 
@@ -2378,7 +2415,8 @@
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 				    struct mlx4_wqe_datagram_seg *dseg,
-				    struct ib_send_wr *wr, enum ib_qp_type qpt)
+				    struct ib_send_wr *wr,
+				    enum mlx4_ib_qp_type qpt)
 {
 	union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
 	struct mlx4_av sqp_av = {0};
@@ -2391,8 +2429,10 @@
 			cpu_to_be32(0xf0000000);
 
 	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
-	/* This function used only for sending on QP1 proxies */
-	dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	if (qpt == MLX4_IB_QPT_PROXY_GSI)
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	else
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
 	/* Use QKEY from the QP context, which is set by master */
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
@@ -2687,11 +2727,6 @@
 			break;
 
 		case MLX4_IB_QPT_PROXY_SMI_OWNER:
-			if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
-				err = -ENOSYS;
-				*bad_wr = wr;
-				goto out;
-			}
 			err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
 			if (unlikely(err)) {
 				*bad_wr = wr;
@@ -2708,16 +2743,13 @@
 			size += seglen / 16;
 			break;
 		case MLX4_IB_QPT_PROXY_SMI:
-			/* don't allow QP0 sends on guests */
-			err = -ENOSYS;
-			*bad_wr = wr;
-			goto out;
 		case MLX4_IB_QPT_PROXY_GSI:
 			/* If we are tunneling special qps, this is a UD qp.
 			 * In this case we first add a UD segment targeting
 			 * the tunnel qp, and then add a header with address
 			 * information */
-			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+						qp->mlx4_ib_qp_type);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 			build_tunnel_header(wr, wqe, &seglen);

diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb0..62d9285 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c

@@ -134,13 +134,14 @@
 		if (err)
 			goto err_mtt;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+		err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
 		if (err)
 			goto err_srq;
 
 		*srq->db.db = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+				   GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -165,7 +166,7 @@
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
 		if (err)
 			goto err_mtt;
 

diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 5a38e43..cb4c66e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c

@@ -389,8 +389,10 @@
 	struct mlx4_ib_dev    *dev;
 	struct attribute_group pkey_group;
 	struct attribute_group gid_group;
-	u8                     port_num;
+	struct device_attribute	enable_smi_admin;
+	struct device_attribute	smi_enabled;
 	int		       slave;
+	u8                     port_num;
 };
 
 
@@ -558,6 +560,101 @@
 	return NULL;
 }
 
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, smi_enabled);
+	ssize_t len = 0;
+
+	if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	ssize_t len = 0;
+
+	if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	int enable;
+
+	if (sscanf(buf, "%i", &enable) != 1 ||
+	    enable < 0 || enable > 1)
+		return -EINVAL;
+
+	if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+		return -EINVAL;
+	return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+	int ret;
+
+	/* do not display entries if eth transport, or if master */
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return 0;
+
+	sysfs_attr_init(&p->smi_enabled.attr);
+	p->smi_enabled.show = sysfs_show_smi_enabled;
+	p->smi_enabled.store = NULL;
+	p->smi_enabled.attr.name = "smi_enabled";
+	p->smi_enabled.attr.mode = 0444;
+	ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+	if (ret) {
+		pr_err("failed to create smi_enabled\n");
+		return ret;
+	}
+
+	sysfs_attr_init(&p->enable_smi_admin.attr);
+	p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+	p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+	p->enable_smi_admin.attr.name = "enable_smi_admin";
+	p->enable_smi_admin.attr.mode = 0644;
+	ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+	if (ret) {
+		pr_err("failed to create enable_smi_admin\n");
+		sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+		return ret;
+	}
+	return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return;
+
+	sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+	sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
 static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 {
 	struct mlx4_port *p;
@@ -602,6 +699,10 @@
 	if (ret)
 		goto err_free_gid;
 
+	ret = add_vf_smi_entries(p);
+	if (ret)
+		goto err_free_gid;
+
 	list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
 	return 0;
 
@@ -669,6 +770,7 @@
 		mport = container_of(p, struct mlx4_port, kobj);
 		sysfs_remove_group(p, &mport->pkey_group);
 		sysfs_remove_group(p, &mport->gid_group);
+		remove_vf_smi_entries(mport);
 		kobject_put(p);
 	}
 	kobject_put(dev->dev_ports_parent[slave]);
@@ -713,6 +815,7 @@
 			port = container_of(p, struct mlx4_port, kobj);
 			sysfs_remove_group(p, &port->pkey_group);
 			sysfs_remove_group(p, &port->gid_group);
+			remove_vf_smi_entries(port);
 			kobject_put(p);
 			kobject_put(device->dev_ports_parent[slave]);
 		}

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 62bb6b4..8ae4f89 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c

@@ -32,6 +32,7 @@
 
 #include <linux/kref.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
 #include "user.h"
 
@@ -602,14 +603,24 @@
 			  int *cqe_size, int *index, int *inlen)
 {
 	struct mlx5_ib_create_cq ucmd;
+	size_t ucmdlen;
 	int page_shift;
 	int npages;
 	int ncont;
 	int err;
 
-	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+	ucmdlen =
+		(udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+		 sizeof(ucmd)) ? (sizeof(ucmd) -
+				  sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+	if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
 		return -EFAULT;
 
+	if (ucmdlen == sizeof(ucmd) &&
+	    ucmd.reserved != 0)
+		return -EINVAL;
+
 	if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
 		return -EINVAL;
 

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5054158..f2ccf1a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -264,8 +264,6 @@
 	__be64			*pas;
 	dma_addr_t		dma;
 	int			npages;
-	struct completion	done;
-	enum ib_wc_status	status;
 	struct mlx5_ib_dev     *dev;
 	struct mlx5_create_mkey_mbox_out out;
 	struct mlx5_core_sig_ctx    *sig;
@@ -277,6 +275,17 @@
 	dma_addr_t			map;
 };
 
+struct mlx5_ib_umr_context {
+	enum ib_wc_status	status;
+	struct completion	done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+	context->status = -1;
+	init_completion(&context->done);
+}
+
 struct umr_common {
 	struct ib_pd	*pd;
 	struct ib_cq	*cq;

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 81392b2..afa873b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c

@@ -73,6 +73,8 @@
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
+	struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
 	ent->pending--;
@@ -107,6 +109,13 @@
 	ent->cur++;
 	ent->size++;
 	spin_unlock_irqrestore(&ent->lock, flags);
+
+	write_lock_irqsave(&table->lock, flags);
+	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+				&mr->mmr);
+	if (err)
+		pr_err("Error inserting to mr tree. 0x%x\n", -err);
+	write_unlock_irqrestore(&table->lock, flags);
 }
 
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -699,7 +708,7 @@
 
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 {
-	struct mlx5_ib_mr *mr;
+	struct mlx5_ib_umr_context *context;
 	struct ib_wc wc;
 	int err;
 
@@ -712,9 +721,9 @@
 		if (err == 0)
 			break;
 
-		mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
-		mr->status = wc.status;
-		complete(&mr->done);
+		context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+		context->status = wc.status;
+		complete(&context->done);
 	}
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 }
@@ -726,11 +735,12 @@
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct device *ddev = dev->ib_dev.dma_device;
 	struct umr_common *umrc = &dev->umrc;
+	struct mlx5_ib_umr_context umr_context;
 	struct ib_send_wr wr, *bad;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
 	int size = sizeof(u64) * npages;
-	int err;
+	int err = 0;
 	int i;
 
 	for (i = 0; i < 1; i++) {
@@ -751,7 +761,7 @@
 	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 	if (!mr->pas) {
 		err = -ENOMEM;
-		goto error;
+		goto free_mr;
 	}
 
 	mlx5_ib_populate_pas(dev, umem, page_shift,
@@ -760,44 +770,46 @@
 	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
 				 DMA_TO_DEVICE);
 	if (dma_mapping_error(ddev, mr->dma)) {
-		kfree(mr->pas);
 		err = -ENOMEM;
-		goto error;
+		goto free_pas;
 	}
 
 	memset(&wr, 0, sizeof(wr));
-	wr.wr_id = (u64)(unsigned long)mr;
+	wr.wr_id = (u64)(unsigned long)&umr_context;
 	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
 
-	/* We serialize polls so one process does not kidnap another's
-	 * completion. This is not a problem since wr is completed in
-	 * around 1 usec
-	 */
+	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
-	init_completion(&mr->done);
 	err = ib_post_send(umrc->qp, &wr, &bad);
 	if (err) {
 		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
-		up(&umrc->sem);
-		goto error;
+		goto unmap_dma;
+	} else {
+		wait_for_completion(&umr_context.done);
+		if (umr_context.status != IB_WC_SUCCESS) {
+			mlx5_ib_warn(dev, "reg umr failed\n");
+			err = -EFAULT;
+		}
 	}
-	wait_for_completion(&mr->done);
-	up(&umrc->sem);
 
+	mr->mmr.iova = virt_addr;
+	mr->mmr.size = len;
+	mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
+	up(&umrc->sem);
 	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+
+free_pas:
 	kfree(mr->pas);
 
-	if (mr->status != IB_WC_SUCCESS) {
-		mlx5_ib_warn(dev, "reg umr failed\n");
-		err = -EFAULT;
-		goto error;
+free_mr:
+	if (err) {
+		free_cached_mr(dev, mr);
+		return ERR_PTR(err);
 	}
 
 	return mr;
-
-error:
-	free_cached_mr(dev, mr);
-	return ERR_PTR(err);
 }
 
 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
@@ -926,24 +938,26 @@
 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	struct umr_common *umrc = &dev->umrc;
+	struct mlx5_ib_umr_context umr_context;
 	struct ib_send_wr wr, *bad;
 	int err;
 
 	memset(&wr, 0, sizeof(wr));
-	wr.wr_id = (u64)(unsigned long)mr;
+	wr.wr_id = (u64)(unsigned long)&umr_context;
 	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
 
+	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
-	init_completion(&mr->done);
 	err = ib_post_send(umrc->qp, &wr, &bad);
 	if (err) {
 		up(&umrc->sem);
 		mlx5_ib_dbg(dev, "err %d\n", err);
 		goto error;
+	} else {
+		wait_for_completion(&umr_context.done);
+		up(&umrc->sem);
 	}
-	wait_for_completion(&mr->done);
-	up(&umrc->sem);
-	if (mr->status != IB_WC_SUCCESS) {
+	if (umr_context.status != IB_WC_SUCCESS) {
 		mlx5_ib_warn(dev, "unreg umr failed\n");
 		err = -EFAULT;
 		goto error;

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index dc930ed..d13ddf1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c

@@ -574,6 +574,10 @@
 	uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
 	mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
 
+	qp->rq.offset = 0;
+	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
 	err = set_user_buf_size(dev, qp, &ucmd);
 	if (err)
 		goto err_uuar;
@@ -2078,6 +2082,7 @@
 	struct ib_sig_domain *wire = &sig_attrs->wire;
 	int ret, selector;
 
+	memset(bsf, 0, sizeof(*bsf));
 	switch (sig_attrs->mem.sig_type) {
 	case IB_SIG_TYPE_T10_DIF:
 		if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
@@ -2090,9 +2095,11 @@
 			/* Same block structure */
 			basic->bsf_size_sbs = 1 << 4;
 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
-				basic->wire.copy_byte_mask = 0xff;
-			else
-				basic->wire.copy_byte_mask = 0x3f;
+				basic->wire.copy_byte_mask |= 0xc0;
+			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+				basic->wire.copy_byte_mask |= 0x30;
+			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+				basic->wire.copy_byte_mask |= 0x0f;
 		} else
 			basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
 
@@ -2131,9 +2138,13 @@
 	int ret;
 	int wqe_size;
 
-	if (!wr->wr.sig_handover.prot) {
+	if (!wr->wr.sig_handover.prot ||
+	    (data_key == wr->wr.sig_handover.prot->lkey &&
+	     data_va == wr->wr.sig_handover.prot->addr &&
+	     data_len == wr->wr.sig_handover.prot->length)) {
 		/**
 		 * Source domain doesn't contain signature information
+		 * or data and protection are interleaved in memory.
 		 * So need construct:
 		 *                  ------------------
 		 *                 |     data_klm     |
@@ -2187,23 +2198,13 @@
 		data_sentry->bcount = cpu_to_be16(block_size);
 		data_sentry->key = cpu_to_be32(data_key);
 		data_sentry->va = cpu_to_be64(data_va);
+		data_sentry->stride = cpu_to_be16(block_size);
+
 		prot_sentry->bcount = cpu_to_be16(prot_size);
 		prot_sentry->key = cpu_to_be32(prot_key);
+		prot_sentry->va = cpu_to_be64(prot_va);
+		prot_sentry->stride = cpu_to_be16(prot_size);
 
-		if (prot_key == data_key && prot_va == data_va) {
-			/**
-			 * The data and protection are interleaved
-			 * in a single memory region
-			 **/
-			prot_sentry->va = cpu_to_be64(data_va + block_size);
-			prot_sentry->stride = cpu_to_be16(block_size + prot_size);
-			data_sentry->stride = prot_sentry->stride;
-		} else {
-			/* The data and protection are two different buffers */
-			prot_sentry->va = cpu_to_be64(prot_va);
-			data_sentry->stride = cpu_to_be16(block_size);
-			prot_sentry->stride = cpu_to_be16(prot_size);
-		}
 		wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
 				 sizeof(*prot_sentry), 64);
 	}
@@ -2275,7 +2276,10 @@
 
 	/* length of the protected region, data + protection */
 	region_len = wr->sg_list->length;
-	if (wr->wr.sig_handover.prot)
+	if (wr->wr.sig_handover.prot &&
+	    (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey  ||
+	     wr->wr.sig_handover.prot->addr != wr->sg_list->addr  ||
+	     wr->wr.sig_handover.prot->length != wr->sg_list->length))
 		region_len += wr->wr.sig_handover.prot->length;
 
 	/**

diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 210b3ea..384af6d 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c

@@ -35,6 +35,7 @@
 #include <linux/mlx5/srq.h>
 #include <linux/slab.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
 
 #include "mlx5_ib.h"
 #include "user.h"
@@ -78,16 +79,27 @@
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_create_srq ucmd;
+	size_t ucmdlen;
 	int err;
 	int npages;
 	int page_shift;
 	int ncont;
 	u32 offset;
 
-	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+	ucmdlen =
+		(udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+		 sizeof(ucmd)) ? (sizeof(ucmd) -
+				  sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
 		mlx5_ib_dbg(dev, "failed copy udata\n");
 		return -EFAULT;
 	}
+
+	if (ucmdlen == sizeof(ucmd) &&
+	    ucmd.reserved != 0)
+		return -EINVAL;
+
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
 	srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,

diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 0f4f8e4..d0ba264 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h

@@ -91,6 +91,7 @@
 	__u64	buf_addr;
 	__u64	db_addr;
 	__u32	cqe_size;
+	__u32	reserved; /* explicit padding (optional on i386) */
 };
 
 struct mlx5_ib_create_cq_resp {
@@ -109,6 +110,7 @@
 	__u64	buf_addr;
 	__u64	db_addr;
 	__u32	flags;
+	__u32	reserved; /* explicit padding (optional on i386) */
 };
 
 struct mlx5_ib_create_srq_resp {

diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 353c7b0..3b2a6dc 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c

@@ -68,7 +68,6 @@
 int max_mtu = 9000;
 int interrupt_mod_interval = 0;
 
-
 /* Interoperability */
 int mpa_version = 1;
 module_param(mpa_version, int, 0644);
@@ -112,6 +111,16 @@
 
 MODULE_DEVICE_TABLE(pci, nes_pci_table);
 
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static int nes_net_event(struct notifier_block *, unsigned long, void *);
 static int nes_notifiers_registered;
@@ -672,6 +681,17 @@
 	}
 	nes_notifiers_registered++;
 
+	if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+		printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
+			__func__, __LINE__);
+
+	ret = iwpm_init(RDMA_NL_NES);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
+				pci_name(pcidev));
+		goto bail7;
+	}
+
 	INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
 
 	/* Initialize network devices */
@@ -710,6 +730,7 @@
 
 	nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
 			nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+	ibnl_remove_client(RDMA_NL_NES);
 
 	nes_notifiers_registered--;
 	if (nes_notifiers_registered == 0) {
@@ -773,6 +794,8 @@
 				nesdev->nesadapter->netdev_count--;
 			}
 		}
+	ibnl_remove_client(RDMA_NL_NES);
+	iwpm_exit(RDMA_NL_NES);
 
 	nes_notifiers_registered--;
 	if (nes_notifiers_registered == 0) {

diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 33cc589..bd9d132 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h

@@ -51,6 +51,8 @@
 #include <rdma/ib_pack.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
 
 #define NES_SEND_FIRST_WRITE
 
@@ -130,6 +132,7 @@
 #define NES_DBG_IW_TX       0x00040000
 #define NES_DBG_SHUTDOWN    0x00080000
 #define NES_DBG_PAU         0x00100000
+#define NES_DBG_NLMSG       0x00200000
 #define NES_DBG_RSVD1       0x10000000
 #define NES_DBG_RSVD2       0x20000000
 #define NES_DBG_RSVD3       0x40000000

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index dfa9df4..6f09a72 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -59,6 +59,7 @@
 #include <net/route.h>
 #include <net/ip_fib.h>
 #include <net/tcp.h>
+#include <linux/fcntl.h>
 
 #include "nes.h"
 
@@ -166,7 +167,6 @@
 {
 	return rem_ref_cm_node(cm_node->cm_core, cm_node);
 }
-
 /**
  * create_event
  */
@@ -482,11 +482,11 @@
 	iph->ttl = 0x40;
 	iph->protocol = 0x06;   /* IPPROTO_TCP */
 
-	iph->saddr = htonl(cm_node->loc_addr);
-	iph->daddr = htonl(cm_node->rem_addr);
+	iph->saddr = htonl(cm_node->mapped_loc_addr);
+	iph->daddr = htonl(cm_node->mapped_rem_addr);
 
-	tcph->source = htons(cm_node->loc_port);
-	tcph->dest = htons(cm_node->rem_port);
+	tcph->source = htons(cm_node->mapped_loc_port);
+	tcph->dest = htons(cm_node->mapped_rem_port);
 	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
 
 	if (flags & SET_ACK) {
@@ -525,6 +525,100 @@
 	cm_packets_created++;
 }
 
+/*
+ * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
+ */
+static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
+				struct sockaddr_storage *addr)
+{
+	struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
+	nes_sockaddr->sin_family = AF_INET;
+	memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
+	nes_sockaddr->sin_port = port;
+}
+
+/*
+ * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
+ */
+static int nes_create_mapinfo(struct nes_cm_info *cm_info)
+{
+	struct sockaddr_storage local_sockaddr;
+	struct sockaddr_storage mapped_sockaddr;
+
+	nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+				&local_sockaddr);
+	nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
+			htons(cm_info->mapped_loc_port), &mapped_sockaddr);
+
+	return iwpm_create_mapinfo(&local_sockaddr,
+				&mapped_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
+ *                      and send a remove mapping op message to
+ *                      the userspace port mapper
+ */
+static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
+			u32 mapped_loc_addr, u16 mapped_loc_port)
+{
+	struct sockaddr_storage local_sockaddr;
+	struct sockaddr_storage mapped_sockaddr;
+
+	nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
+	nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
+				&mapped_sockaddr);
+
+	iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
+	return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void nes_form_pm_msg(struct nes_cm_info *cm_info,
+				struct iwpm_sa_data *pm_msg)
+{
+	nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+				&pm_msg->loc_addr);
+	nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
+				&pm_msg->rem_addr);
+}
+
+/*
+ * nes_form_reg_msg - Form a port mapper message with dev info
+ */
+static void nes_form_reg_msg(struct nes_vnic *nesvnic,
+			struct iwpm_dev_data *pm_msg)
+{
+	memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
+				IWPM_DEVNAME_SIZE);
+	memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
+}
+
+/*
+ * nes_record_pm_msg - Save the received mapping info
+ */
+static void nes_record_pm_msg(struct nes_cm_info *cm_info,
+			struct iwpm_sa_data *pm_msg)
+{
+	struct sockaddr_in *mapped_loc_addr =
+			(struct sockaddr_in *)&pm_msg->mapped_loc_addr;
+	struct sockaddr_in *mapped_rem_addr =
+			(struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+
+	if (mapped_loc_addr->sin_family == AF_INET) {
+		cm_info->mapped_loc_addr =
+			ntohl(mapped_loc_addr->sin_addr.s_addr);
+		cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
+	}
+	if (mapped_rem_addr->sin_family == AF_INET) {
+		cm_info->mapped_rem_addr =
+			ntohl(mapped_rem_addr->sin_addr.s_addr);
+		cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
+	}
+}
+
 /**
  * print_core - dump a cm core
  */
@@ -1147,8 +1241,11 @@
 			  loc_addr, loc_port,
 			  cm_node->rem_addr, cm_node->rem_port,
 			  rem_addr, rem_port);
-		if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
-		    (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+		if ((cm_node->mapped_loc_addr == loc_addr) &&
+			(cm_node->mapped_loc_port == loc_port) &&
+			(cm_node->mapped_rem_addr == rem_addr) &&
+			(cm_node->mapped_rem_port == rem_port)) {
+
 			add_ref_cm_node(cm_node);
 			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 			return cm_node;
@@ -1165,18 +1262,28 @@
  * find_listener - find a cm node listening on this addr-port pair
  */
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
-					     nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+					nes_addr_t dst_addr, u16 dst_port,
+					enum nes_cm_listener_state listener_state, int local)
 {
 	unsigned long flags;
 	struct nes_cm_listener *listen_node;
+	nes_addr_t listen_addr;
+	u16 listen_port;
 
 	/* walk list and find cm_node associated with this session ID */
 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
 	list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+		if (local) {
+			listen_addr = listen_node->loc_addr;
+			listen_port = listen_node->loc_port;
+		} else {
+			listen_addr = listen_node->mapped_loc_addr;
+			listen_port = listen_node->mapped_loc_port;
+		}
 		/* compare node pair, return node handle if a match */
-		if (((listen_node->loc_addr == dst_addr) ||
-		     listen_node->loc_addr == 0x00000000) &&
-		    (listen_node->loc_port == dst_port) &&
+		if (((listen_addr == dst_addr) ||
+		     listen_addr == 0x00000000) &&
+		    (listen_port == dst_port) &&
 		    (listener_state & listen_node->listener_state)) {
 			atomic_inc(&listen_node->ref_count);
 			spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
@@ -1189,7 +1296,6 @@
 	return NULL;
 }
 
-
 /**
  * add_hte_node - add a cm node to the hash table
  */
@@ -1310,9 +1416,20 @@
 
 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
-		if (listener->nesvnic)
-			nes_manage_apbvt(listener->nesvnic, listener->loc_port,
-					 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+		if (listener->nesvnic) {
+			nes_manage_apbvt(listener->nesvnic,
+				listener->mapped_loc_port,
+				PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
+				NES_MANAGE_APBVT_DEL);
+
+			nes_remove_mapinfo(listener->loc_addr,
+					listener->loc_port,
+					listener->mapped_loc_addr,
+					listener->mapped_loc_port);
+			nes_debug(NES_DBG_NLMSG,
+					"Delete APBVT mapped_loc_port = %04X\n",
+					listener->mapped_loc_port);
+		}
 
 		nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
 
@@ -1454,6 +1571,11 @@
 	cm_node->loc_port = cm_info->loc_port;
 	cm_node->rem_port = cm_info->rem_port;
 
+	cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
+	cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
+	cm_node->mapped_loc_port = cm_info->mapped_loc_port;
+	cm_node->mapped_rem_port = cm_info->mapped_rem_port;
+
 	cm_node->mpa_frame_rev = mpa_version;
 	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
 	cm_node->mpav2_ird_ord = 0;
@@ -1500,8 +1622,10 @@
 	cm_node->loopbackpartner = NULL;
 
 	/* get the mac addr for the remote node */
-	oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
-	arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+	oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
+				NULL, NES_ARP_RESOLVE);
+	arpindex = nes_addr_resolve_neigh(nesvnic,
+				cm_node->mapped_rem_addr, oldarpindex);
 	if (arpindex < 0) {
 		kfree(cm_node);
 		return NULL;
@@ -1563,11 +1687,14 @@
 		mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
 	} else {
 		if (cm_node->apbvt_set && cm_node->nesvnic) {
-			nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
-					 PCI_FUNC(
-						 cm_node->nesvnic->nesdev->pcidev->devfn),
+			nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+					 PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
 					 NES_MANAGE_APBVT_DEL);
 		}
+		nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
+					cm_node->mapped_loc_port);
+		nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
+			cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
 	}
 
 	atomic_dec(&cm_core->node_cnt);
@@ -2235,17 +2362,21 @@
  * mini_cm_listen - create a listen node with params
  */
 static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
-					      struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+			struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 {
 	struct nes_cm_listener *listener;
+	struct iwpm_dev_data pm_reg_msg;
+	struct iwpm_sa_data pm_msg;
 	unsigned long flags;
+	int iwpm_err = 0;
 
 	nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
 		  cm_info->loc_addr, cm_info->loc_port);
 
 	/* cannot have multiple matching listeners */
-	listener = find_listener(cm_core, htonl(cm_info->loc_addr),
-				 htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+	listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
+				NES_CM_LISTENER_EITHER_STATE, 1);
+
 	if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
 		/* find automatically incs ref count ??? */
 		atomic_dec(&listener->ref_count);
@@ -2254,6 +2385,22 @@
 	}
 
 	if (!listener) {
+		nes_form_reg_msg(nesvnic, &pm_reg_msg);
+		iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+		if (iwpm_err) {
+			nes_debug(NES_DBG_NLMSG,
+			"Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+		}
+		if (iwpm_valid_pid() && !iwpm_err) {
+			nes_form_pm_msg(cm_info, &pm_msg);
+			iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
+			if (iwpm_err)
+				nes_debug(NES_DBG_NLMSG,
+				"Port Mapper query fail (err = %d).\n", iwpm_err);
+			else
+				nes_record_pm_msg(cm_info, &pm_msg);
+		}
+
 		/* create a CM listen node (1/2 node to compare incoming traffic to) */
 		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
 		if (!listener) {
@@ -2261,8 +2408,10 @@
 			return NULL;
 		}
 
-		listener->loc_addr = htonl(cm_info->loc_addr);
-		listener->loc_port = htons(cm_info->loc_port);
+		listener->loc_addr = cm_info->loc_addr;
+		listener->loc_port = cm_info->loc_port;
+		listener->mapped_loc_addr = cm_info->mapped_loc_addr;
+		listener->mapped_loc_port = cm_info->mapped_loc_port;
 		listener->reused_node = 0;
 
 		atomic_set(&listener->ref_count, 1);
@@ -2324,14 +2473,18 @@
 
 	if (cm_info->loc_addr == cm_info->rem_addr) {
 		loopbackremotelistener = find_listener(cm_core,
-						       ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
-						       NES_CM_LISTENER_ACTIVE_STATE);
+			cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
+			NES_CM_LISTENER_ACTIVE_STATE, 0);
 		if (loopbackremotelistener == NULL) {
 			create_event(cm_node, NES_CM_EVENT_ABORTED);
 		} else {
 			loopback_cm_info = *cm_info;
 			loopback_cm_info.loc_port = cm_info->rem_port;
 			loopback_cm_info.rem_port = cm_info->loc_port;
+			loopback_cm_info.mapped_loc_port =
+				cm_info->mapped_rem_port;
+			loopback_cm_info.mapped_rem_port =
+				cm_info->mapped_loc_port;
 			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
 			loopbackremotenode = make_cm_node(cm_core, nesvnic,
 							  &loopback_cm_info, loopbackremotelistener);
@@ -2560,6 +2713,12 @@
 	nfo.rem_addr = ntohl(iph->saddr);
 	nfo.rem_port = ntohs(tcph->source);
 
+	/* If port mapper is available these should be mapped address info */
+	nfo.mapped_loc_addr = ntohl(iph->daddr);
+	nfo.mapped_loc_port = ntohs(tcph->dest);
+	nfo.mapped_rem_addr = ntohl(iph->saddr);
+	nfo.mapped_rem_port = ntohs(tcph->source);
+
 	tmp_daddr = cpu_to_be32(iph->daddr);
 	tmp_saddr = cpu_to_be32(iph->saddr);
 
@@ -2568,8 +2727,8 @@
 
 	do {
 		cm_node = find_node(cm_core,
-				    nfo.rem_port, nfo.rem_addr,
-				    nfo.loc_port, nfo.loc_addr);
+				    nfo.mapped_rem_port, nfo.mapped_rem_addr,
+				    nfo.mapped_loc_port, nfo.mapped_loc_addr);
 
 		if (!cm_node) {
 			/* Only type of packet accepted are for */
@@ -2578,9 +2737,9 @@
 				skb_handled = 0;
 				break;
 			}
-			listener = find_listener(cm_core, nfo.loc_addr,
-						 nfo.loc_port,
-						 NES_CM_LISTENER_ACTIVE_STATE);
+			listener = find_listener(cm_core, nfo.mapped_loc_addr,
+					nfo.mapped_loc_port,
+					NES_CM_LISTENER_ACTIVE_STATE, 0);
 			if (!listener) {
 				nfo.cm_id = NULL;
 				nfo.conn_type = 0;
@@ -3184,10 +3343,12 @@
 
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 
-	nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
-	nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+	nesqp->nesqp_context->tcpPorts[0] =
+				cpu_to_le16(cm_node->mapped_loc_port);
+	nesqp->nesqp_context->tcpPorts[1] =
+				cpu_to_le16(cm_node->mapped_rem_port);
 
-	nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
 
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 		(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3211,9 +3372,9 @@
 	memset(&nes_quad, 0, sizeof(nes_quad));
 	nes_quad.DstIpAdrIndex =
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-	nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
-	nes_quad.TcpPorts[0] = raddr->sin_port;
-	nes_quad.TcpPorts[1] = laddr->sin_port;
+	nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+	nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+	nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
 
 	/* Produce hash key */
 	crc_value = get_crc_value(&nes_quad);
@@ -3315,6 +3476,9 @@
 	int apbvt_set = 0;
 	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
 	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	struct iwpm_dev_data pm_reg_msg;
+	struct iwpm_sa_data pm_msg;
+	int iwpm_err = 0;
 
 	if (cm_id->remote_addr.ss_family != AF_INET)
 		return -ENOSYS;
@@ -3352,20 +3516,44 @@
 	nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
 		  conn_param->private_data_len);
 
+	/* set up the connection params for the node */
+	cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
+	cm_info.loc_port = ntohs(laddr->sin_port);
+	cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
+	cm_info.rem_port = ntohs(raddr->sin_port);
+	cm_info.cm_id = cm_id;
+	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+	/* No port mapper available, go with the specified peer information */
+	cm_info.mapped_loc_addr = cm_info.loc_addr;
+	cm_info.mapped_loc_port = cm_info.loc_port;
+	cm_info.mapped_rem_addr = cm_info.rem_addr;
+	cm_info.mapped_rem_port = cm_info.rem_port;
+
+	nes_form_reg_msg(nesvnic, &pm_reg_msg);
+	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+	if (iwpm_err) {
+		nes_debug(NES_DBG_NLMSG,
+			"Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+	}
+	if (iwpm_valid_pid() && !iwpm_err) {
+		nes_form_pm_msg(&cm_info, &pm_msg);
+		iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
+		if (iwpm_err)
+			nes_debug(NES_DBG_NLMSG,
+			"Port Mapper query fail (err = %d).\n", iwpm_err);
+		else
+			nes_record_pm_msg(&cm_info, &pm_msg);
+	}
+
 	if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
-		nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
-				 PCI_FUNC(nesdev->pcidev->devfn),
-				 NES_MANAGE_APBVT_ADD);
+		nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+			PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
 		apbvt_set = 1;
 	}
 
-	/* set up the connection params for the node */
-	cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
-	cm_info.loc_port = htons(laddr->sin_port);
-	cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
-	cm_info.rem_port = htons(raddr->sin_port);
-	cm_info.cm_id = cm_id;
-	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+	if (nes_create_mapinfo(&cm_info))
+		return -ENOMEM;
 
 	cm_id->add_ref(cm_id);
 
@@ -3375,10 +3563,14 @@
 					  &cm_info);
 	if (!cm_node) {
 		if (apbvt_set)
-			nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+			nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
 					 PCI_FUNC(nesdev->pcidev->devfn),
 					 NES_MANAGE_APBVT_DEL);
 
+		nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
+				cm_info.mapped_loc_port);
+		nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
+			cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
 		cm_id->rem_ref(cm_id);
 		return -ENOMEM;
 	}
@@ -3424,13 +3616,16 @@
 			nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
 
 	/* setup listen params in our api call struct */
-	cm_info.loc_addr = nesvnic->local_ipaddr;
-	cm_info.loc_port = laddr->sin_port;
+	cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
+	cm_info.loc_port = ntohs(laddr->sin_port);
 	cm_info.backlog = backlog;
 	cm_info.cm_id = cm_id;
 
 	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 
+	/* No port mapper available, go with the specified info */
+	cm_info.mapped_loc_addr = cm_info.loc_addr;
+	cm_info.mapped_loc_port = cm_info.loc_port;
 
 	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
 	if (!cm_node) {
@@ -3442,7 +3637,10 @@
 	cm_id->provider_data = cm_node;
 
 	if (!cm_node->reused_node) {
-		err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+		if (nes_create_mapinfo(&cm_info))
+			return -ENOMEM;
+
+		err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
 				       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
 				       NES_MANAGE_APBVT_ADD);
 		if (err) {
@@ -3567,9 +3765,11 @@
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 
 	/* set the QP tsa context */
-	nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
-	nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
-	nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+	nesqp->nesqp_context->tcpPorts[0] =
+			cpu_to_le16(cm_node->mapped_loc_port);
+	nesqp->nesqp_context->tcpPorts[1] =
+			cpu_to_le16(cm_node->mapped_rem_port);
+	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
 
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 			(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3599,9 +3799,9 @@
 
 	nes_quad.DstIpAdrIndex =
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-	nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
-	nes_quad.TcpPorts[0] = raddr->sin_port;
-	nes_quad.TcpPorts[1] = laddr->sin_port;
+	nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+	nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+	nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
 
 	/* Produce hash key */
 	crc_value = get_crc_value(&nes_quad);
@@ -3629,7 +3829,7 @@
 	cm_event.ird = cm_node->ird_size;
 	cm_event.ord = cm_node->ord_size;
 
-	cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
+	cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 

diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 522c99c..f522cf6 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -293,8 +293,8 @@
 	struct list_head           list;
 	struct nes_cm_core         *cm_core;
 	u8                         loc_mac[ETH_ALEN];
-	nes_addr_t                 loc_addr;
-	u16                        loc_port;
+	nes_addr_t                 loc_addr, mapped_loc_addr;
+	u16                        loc_port, mapped_loc_port;
 	struct iw_cm_id            *cm_id;
 	enum nes_cm_conn_type      conn_type;
 	atomic_t                   ref_count;
@@ -308,7 +308,9 @@
 /* per connection node and node state information */
 struct nes_cm_node {
 	nes_addr_t                loc_addr, rem_addr;
+	nes_addr_t                mapped_loc_addr, mapped_rem_addr;
 	u16                       loc_port, rem_port;
+	u16                       mapped_loc_port, mapped_rem_port;
 
 	u8                        loc_mac[ETH_ALEN];
 	u8                        rem_mac[ETH_ALEN];
@@ -364,6 +366,10 @@
 	u16 rem_port;
 	nes_addr_t loc_addr;
 	nes_addr_t rem_addr;
+	u16 mapped_loc_port;
+	u16 mapped_rem_port;
+	nes_addr_t mapped_loc_addr;
+	nes_addr_t mapped_rem_addr;
 
 	enum nes_cm_conn_type  conn_type;
 	int backlog;

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 6c54106..41a9aec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c

@@ -510,16 +510,9 @@
 	return status;
 }
 
-static int ocrdma_debugfs_open(struct inode *inode, struct file *file)
-{
-	if (inode->i_private)
-		file->private_data = inode->i_private;
-	return 0;
-}
-
 static const struct file_operations ocrdma_dbg_ops = {
 	.owner = THIS_MODULE,
-	.open = ocrdma_debugfs_open,
+	.open = simple_open,
 	.read = ocrdma_dbgfs_ops_read,
 };
 

diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 5b7aeb2..8d3c78d 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c

@@ -1272,7 +1272,7 @@
  * Do all the generic driver unit- and chip-independent memory
  * allocation and initialization.
  */
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
 {
 	int ret;
 
@@ -1316,12 +1316,12 @@
 	return ret;
 }
 
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
 
 /*
  * Do the non-unit driver cleanup, memory free, etc. at unload.
  */
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
 {
 	int ret;
 
@@ -1346,7 +1346,7 @@
 	qib_dev_cleanup();
 }
 
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
 
 /* this can only be called after a successful initialization */
 static void cleanup_device_data(struct qib_devdata *dd)

diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index edad991..22c720e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c

@@ -1028,7 +1028,7 @@
 
 		event.event = IB_EVENT_PKEY_CHANGE;
 		event.device = &dd->verbs_dev.ibdev;
-		event.element.port_num = 1;
+		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
 	return 0;

diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 0cad0c4..7fcc150 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c

@@ -985,7 +985,8 @@
 	struct ib_qp *ret;
 
 	if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
-	    init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+	    init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+	    init_attr->create_flags) {
 		ret = ERR_PTR(-EINVAL);
 		goto bail;
 	}

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index d48d2c0..53bd6a2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c

@@ -466,6 +466,9 @@
 	ucontext = to_uucontext(pd->uobject->context);
 	us_ibdev = to_usdev(pd->device);
 
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+
 	err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
 	if (err) {
 		usnic_err("%s: cannot copy udata for create_qp\n",

diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
index d135ad9..3a4288e 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c

@@ -1,3 +1,21 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>

diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
new file mode 100644
index 0000000..f3c7dcf
--- /dev/null
+++ b/drivers/infiniband/ulp/Makefile

@@ -0,0 +1,5 @@
+obj-$(CONFIG_INFINIBAND_IPOIB)		+= ipoib/
+obj-$(CONFIG_INFINIBAND_SRP)		+= srp/
+obj-$(CONFIG_INFINIBAND_SRPT)		+= srpt/
+obj-$(CONFIG_INFINIBAND_ISER)		+= iser/
+obj-$(CONFIG_INFINIBAND_ISERT)		+= isert/

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1377f85..933efce 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

@@ -1030,10 +1030,20 @@
 		.cap.max_send_sge	= 1,
 		.sq_sig_type		= IB_SIGNAL_ALL_WR,
 		.qp_type		= IB_QPT_RC,
-		.qp_context		= tx
+		.qp_context		= tx,
+		.create_flags		= IB_QP_CREATE_USE_GFP_NOIO
 	};
 
-	return ib_create_qp(priv->pd, &attr);
+	struct ib_qp *tx_qp;
+
+	tx_qp = ib_create_qp(priv->pd, &attr);
+	if (PTR_ERR(tx_qp) == -EINVAL) {
+		ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
+			   priv->ca->name);
+		attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
+		tx_qp = ib_create_qp(priv->pd, &attr);
+	}
+	return tx_qp;
 }
 
 static int ipoib_cm_send_req(struct net_device *dev,
@@ -1104,12 +1114,14 @@
 	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
 	int ret;
 
-	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+			       GFP_NOIO, PAGE_KERNEL);
 	if (!p->tx_ring) {
 		ipoib_warn(priv, "failed to allocate tx ring\n");
 		ret = -ENOMEM;
 		goto err_tx;
 	}
+	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
 
 	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
 	if (IS_ERR(p->qp)) {

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index c4b3940..078cadd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c

@@ -105,5 +105,5 @@
 
 void ipoib_set_ethtool_ops(struct net_device *dev)
 {
-	SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops);
+	dev->ethtool_ops = &ipoib_ethtool_ops;
 }

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 25f195e..eb79739 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c

@@ -99,6 +99,7 @@
 module_param_named(pi_guard, iser_pi_guard, int, 0644);
 MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
 
+static struct workqueue_struct *release_wq;
 struct iser_global ig;
 
 void
@@ -337,24 +338,6 @@
 	return cls_conn;
 }
 
-static void
-iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
-{
-	struct iscsi_conn *conn = cls_conn->dd_data;
-	struct iser_conn *ib_conn = conn->dd_data;
-
-	iscsi_conn_teardown(cls_conn);
-	/*
-	 * Userspace will normally call the stop callback and
-	 * already have freed the ib_conn, but if it goofed up then
-	 * we free it here.
-	 */
-	if (ib_conn) {
-		ib_conn->iscsi_conn = NULL;
-		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
-	}
-}
-
 static int
 iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
 		     struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
@@ -392,29 +375,39 @@
 	conn->dd_data = ib_conn;
 	ib_conn->iscsi_conn = conn;
 
-	iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
 	return 0;
 }
 
+static int
+iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *iscsi_conn;
+	struct iser_conn *ib_conn;
+
+	iscsi_conn = cls_conn->dd_data;
+	ib_conn = iscsi_conn->dd_data;
+	reinit_completion(&ib_conn->stop_completion);
+
+	return iscsi_conn_start(cls_conn);
+}
+
 static void
 iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iser_conn *ib_conn = conn->dd_data;
 
+	iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
+	iscsi_conn_stop(cls_conn, flag);
+
 	/*
 	 * Userspace may have goofed up and not bound the connection or
 	 * might have only partially setup the connection.
 	 */
 	if (ib_conn) {
-		iscsi_conn_stop(cls_conn, flag);
-		/*
-		 * There is no unbind event so the stop callback
-		 * must release the ref from the bind.
-		 */
-		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
+		conn->dd_data = NULL;
+		complete(&ib_conn->stop_completion);
 	}
-	conn->dd_data = NULL;
 }
 
 static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
@@ -515,28 +508,28 @@
 	case ISCSI_PARAM_HDRDGST_EN:
 		sscanf(buf, "%d", &value);
 		if (value) {
-			iser_err("DataDigest wasn't negotiated to None");
+			iser_err("DataDigest wasn't negotiated to None\n");
 			return -EPROTO;
 		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
 		sscanf(buf, "%d", &value);
 		if (value) {
-			iser_err("DataDigest wasn't negotiated to None");
+			iser_err("DataDigest wasn't negotiated to None\n");
 			return -EPROTO;
 		}
 		break;
 	case ISCSI_PARAM_IFMARKER_EN:
 		sscanf(buf, "%d", &value);
 		if (value) {
-			iser_err("IFMarker wasn't negotiated to No");
+			iser_err("IFMarker wasn't negotiated to No\n");
 			return -EPROTO;
 		}
 		break;
 	case ISCSI_PARAM_OFMARKER_EN:
 		sscanf(buf, "%d", &value);
 		if (value) {
-			iser_err("OFMarker wasn't negotiated to No");
+			iser_err("OFMarker wasn't negotiated to No\n");
 			return -EPROTO;
 		}
 		break;
@@ -652,19 +645,20 @@
 	struct iser_conn *ib_conn;
 
 	ib_conn = ep->dd_data;
-	if (ib_conn->iscsi_conn)
-		/*
-		 * Must suspend xmit path if the ep is bound to the
-		 * iscsi_conn, so we know we are not accessing the ib_conn
-		 * when we free it.
-		 *
-		 * This may not be bound if the ep poll failed.
-		 */
-		iscsi_suspend_tx(ib_conn->iscsi_conn);
-
-
-	iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
+	iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
 	iser_conn_terminate(ib_conn);
+
+	/*
+	 * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
+	 * call and ISER_CONN_DOWN state before freeing the iser resources.
+	 * otherwise we are safe to free resources immediately.
+	 */
+	if (ib_conn->iscsi_conn) {
+		INIT_WORK(&ib_conn->release_work, iser_release_work);
+		queue_work(release_wq, &ib_conn->release_work);
+	} else {
+		iser_conn_release(ib_conn);
+	}
 }
 
 static umode_t iser_attr_is_visible(int param_type, int param)
@@ -748,13 +742,13 @@
 	/* connection management */
 	.create_conn            = iscsi_iser_conn_create,
 	.bind_conn              = iscsi_iser_conn_bind,
-	.destroy_conn           = iscsi_iser_conn_destroy,
+	.destroy_conn           = iscsi_conn_teardown,
 	.attr_is_visible	= iser_attr_is_visible,
 	.set_param              = iscsi_iser_set_param,
 	.get_conn_param		= iscsi_conn_get_param,
 	.get_ep_param		= iscsi_iser_get_ep_param,
 	.get_session_param	= iscsi_session_get_param,
-	.start_conn             = iscsi_conn_start,
+	.start_conn             = iscsi_iser_conn_start,
 	.stop_conn              = iscsi_iser_conn_stop,
 	/* iscsi host params */
 	.get_host_param		= iscsi_host_get_param,
@@ -801,6 +795,12 @@
 	mutex_init(&ig.connlist_mutex);
 	INIT_LIST_HEAD(&ig.connlist);
 
+	release_wq = alloc_workqueue("release workqueue", 0, 0);
+	if (!release_wq) {
+		iser_err("failed to allocate release workqueue\n");
+		return -ENOMEM;
+	}
+
 	iscsi_iser_scsi_transport = iscsi_register_transport(
 							&iscsi_iser_transport);
 	if (!iscsi_iser_scsi_transport) {
@@ -819,7 +819,24 @@
 
 static void __exit iser_exit(void)
 {
+	struct iser_conn *ib_conn, *n;
+	int connlist_empty;
+
 	iser_dbg("Removing iSER datamover...\n");
+	destroy_workqueue(release_wq);
+
+	mutex_lock(&ig.connlist_mutex);
+	connlist_empty = list_empty(&ig.connlist);
+	mutex_unlock(&ig.connlist_mutex);
+
+	if (!connlist_empty) {
+		iser_err("Error cleanup stage completed but we still have iser "
+			 "connections, destroying them anyway.\n");
+		list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
+			iser_conn_release(ib_conn);
+		}
+	}
+
 	iscsi_unregister_transport(&iscsi_iser_transport);
 	kmem_cache_destroy(ig.desc_cache);
 }

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 324129f..97cd385 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h

@@ -69,7 +69,7 @@
 
 #define DRV_NAME	"iser"
 #define PFX		DRV_NAME ": "
-#define DRV_VER		"1.3"
+#define DRV_VER		"1.4"
 
 #define iser_dbg(fmt, arg...)				\
 	do {						\
@@ -333,6 +333,8 @@
 	int                          post_recv_buf_count; /* posted rx count  */
 	atomic_t                     post_send_buf_count; /* posted tx count   */
 	char 			     name[ISER_OBJECT_NAME_SIZE];
+	struct work_struct	     release_work;
+	struct completion	     stop_completion;
 	struct list_head	     conn_list;       /* entry in ig conn list */
 
 	char  			     *login_buf;
@@ -417,12 +419,12 @@
 
 void iser_conn_init(struct iser_conn *ib_conn);
 
-void iser_conn_get(struct iser_conn *ib_conn);
-
-int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed);
+void iser_conn_release(struct iser_conn *ib_conn);
 
 void iser_conn_terminate(struct iser_conn *ib_conn);
 
+void iser_release_work(struct work_struct *work);
+
 void iser_rcv_completion(struct iser_rx_desc *desc,
 			 unsigned long    dto_xfer_len,
 			struct iser_conn *ib_conn);

diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2e2d903..8d44a40 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c

@@ -41,11 +41,11 @@
 #include "iscsi_iser.h"
 
 /* Register user buffer memory and initialize passive rdma
- *  dto descriptor. Total data size is stored in
- *  iser_task->data[ISER_DIR_IN].data_len
+ *  dto descriptor. Data size is stored in
+ *  task->data[ISER_DIR_IN].data_len, Protection size
+ *  os stored in task->prot[ISER_DIR_IN].data_len
  */
-static int iser_prepare_read_cmd(struct iscsi_task *task,
-				 unsigned int edtl)
+static int iser_prepare_read_cmd(struct iscsi_task *task)
 
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
@@ -73,14 +73,6 @@
 			return err;
 	}
 
-	if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
-		iser_err("Total data length: %ld, less than EDTL: "
-			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
-			 iser_task->data[ISER_DIR_IN].data_len, edtl,
-			 task->itt, iser_task->ib_conn);
-		return -EINVAL;
-	}
-
 	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
 	if (err) {
 		iser_err("Failed to set up Data-IN RDMA\n");
@@ -100,8 +92,9 @@
 }
 
 /* Register user buffer memory and initialize passive rdma
- *  dto descriptor. Total data size is stored in
- *  task->data[ISER_DIR_OUT].data_len
+ *  dto descriptor. Data size is stored in
+ *  task->data[ISER_DIR_OUT].data_len, Protection size
+ *  is stored at task->prot[ISER_DIR_OUT].data_len
  */
 static int
 iser_prepare_write_cmd(struct iscsi_task *task,
@@ -135,14 +128,6 @@
 			return err;
 	}
 
-	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
-		iser_err("Total data length: %ld, less than EDTL: %d, "
-			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
-			 iser_task->data[ISER_DIR_OUT].data_len,
-			 edtl, task->itt, task->conn);
-		return -EINVAL;
-	}
-
 	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
 	if (err != 0) {
 		iser_err("Failed to register write cmd RDMA mem\n");
@@ -417,11 +402,12 @@
 	if (scsi_prot_sg_count(sc)) {
 		prot_buf->buf  = scsi_prot_sglist(sc);
 		prot_buf->size = scsi_prot_sg_count(sc);
-		prot_buf->data_len = sc->prot_sdb->length;
+		prot_buf->data_len = data_buf->data_len >>
+				     ilog2(sc->device->sector_size) * 8;
 	}
 
 	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
-		err = iser_prepare_read_cmd(task, edtl);
+		err = iser_prepare_read_cmd(task);
 		if (err)
 			goto send_command_error;
 	}

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 32849f2..ea01075 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c

@@ -581,14 +581,30 @@
 	return ret;
 }
 
+void iser_release_work(struct work_struct *work)
+{
+	struct iser_conn *ib_conn;
+
+	ib_conn = container_of(work, struct iser_conn, release_work);
+
+	/* wait for .conn_stop callback */
+	wait_for_completion(&ib_conn->stop_completion);
+
+	/* wait for the qp`s post send and post receive buffers to empty */
+	wait_event_interruptible(ib_conn->wait,
+				 ib_conn->state == ISER_CONN_DOWN);
+
+	iser_conn_release(ib_conn);
+}
+
 /**
  * Frees all conn objects and deallocs conn descriptor
  */
-static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
+void iser_conn_release(struct iser_conn *ib_conn)
 {
 	struct iser_device  *device = ib_conn->device;
 
-	BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+	BUG_ON(ib_conn->state == ISER_CONN_UP);
 
 	mutex_lock(&ig.connlist_mutex);
 	list_del(&ib_conn->conn_list);
@@ -600,27 +616,13 @@
 	if (device != NULL)
 		iser_device_try_release(device);
 	/* if cma handler context, the caller actually destroy the id */
-	if (ib_conn->cma_id != NULL && can_destroy_id) {
+	if (ib_conn->cma_id != NULL) {
 		rdma_destroy_id(ib_conn->cma_id);
 		ib_conn->cma_id = NULL;
 	}
 	iscsi_destroy_endpoint(ib_conn->ep);
 }
 
-void iser_conn_get(struct iser_conn *ib_conn)
-{
-	atomic_inc(&ib_conn->refcount);
-}
-
-int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
-{
-	if (atomic_dec_and_test(&ib_conn->refcount)) {
-		iser_conn_release(ib_conn, can_destroy_id);
-		return 1;
-	}
-	return 0;
-}
-
 /**
  * triggers start of the disconnect procedures and wait for them to be done
  */
@@ -638,24 +640,19 @@
 	if (err)
 		iser_err("Failed to disconnect, conn: 0x%p err %d\n",
 			 ib_conn,err);
-
-	wait_event_interruptible(ib_conn->wait,
-				 ib_conn->state == ISER_CONN_DOWN);
-
-	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
 }
 
-static int iser_connect_error(struct rdma_cm_id *cma_id)
+static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
 	struct iser_conn *ib_conn;
+
 	ib_conn = (struct iser_conn *)cma_id->context;
 
 	ib_conn->state = ISER_CONN_DOWN;
 	wake_up_interruptible(&ib_conn->wait);
-	return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
 }
 
-static int iser_addr_handler(struct rdma_cm_id *cma_id)
+static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
 	struct iser_device *device;
 	struct iser_conn   *ib_conn;
@@ -664,7 +661,8 @@
 	device = iser_device_find_by_ib_device(cma_id);
 	if (!device) {
 		iser_err("device lookup/creation failed\n");
-		return iser_connect_error(cma_id);
+		iser_connect_error(cma_id);
+		return;
 	}
 
 	ib_conn = (struct iser_conn *)cma_id->context;
@@ -686,13 +684,12 @@
 	ret = rdma_resolve_route(cma_id, 1000);
 	if (ret) {
 		iser_err("resolve route failed: %d\n", ret);
-		return iser_connect_error(cma_id);
+		iser_connect_error(cma_id);
+		return;
 	}
-
-	return 0;
 }
 
-static int iser_route_handler(struct rdma_cm_id *cma_id)
+static void iser_route_handler(struct rdma_cm_id *cma_id)
 {
 	struct rdma_conn_param conn_param;
 	int    ret;
@@ -720,9 +717,9 @@
 		goto failure;
 	}
 
-	return 0;
+	return;
 failure:
-	return iser_connect_error(cma_id);
+	iser_connect_error(cma_id);
 }
 
 static void iser_connected_handler(struct rdma_cm_id *cma_id)
@@ -735,14 +732,13 @@
 	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
 	ib_conn = (struct iser_conn *)cma_id->context;
-	ib_conn->state = ISER_CONN_UP;
-	wake_up_interruptible(&ib_conn->wait);
+	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
+		wake_up_interruptible(&ib_conn->wait);
 }
 
-static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
+static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 {
 	struct iser_conn *ib_conn;
-	int ret;
 
 	ib_conn = (struct iser_conn *)cma_id->context;
 
@@ -762,24 +758,19 @@
 		ib_conn->state = ISER_CONN_DOWN;
 		wake_up_interruptible(&ib_conn->wait);
 	}
-
-	ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
-	return ret;
 }
 
 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-	int ret = 0;
-
 	iser_info("event %d status %d conn %p id %p\n",
 		  event->event, event->status, cma_id->context, cma_id);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		ret = iser_addr_handler(cma_id);
+		iser_addr_handler(cma_id);
 		break;
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
-		ret = iser_route_handler(cma_id);
+		iser_route_handler(cma_id);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		iser_connected_handler(cma_id);
@@ -789,18 +780,18 @@
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_UNREACHABLE:
 	case RDMA_CM_EVENT_REJECTED:
-		ret = iser_connect_error(cma_id);
+		iser_connect_error(cma_id);
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_ADDR_CHANGE:
-		ret = iser_disconnected_handler(cma_id);
+		iser_disconnected_handler(cma_id);
 		break;
 	default:
 		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
 		break;
 	}
-	return ret;
+	return 0;
 }
 
 void iser_conn_init(struct iser_conn *ib_conn)
@@ -809,7 +800,7 @@
 	init_waitqueue_head(&ib_conn->wait);
 	ib_conn->post_recv_buf_count = 0;
 	atomic_set(&ib_conn->post_send_buf_count, 0);
-	atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
+	init_completion(&ib_conn->stop_completion);
 	INIT_LIST_HEAD(&ib_conn->conn_list);
 	spin_lock_init(&ib_conn->lock);
 }
@@ -837,7 +828,6 @@
 
 	ib_conn->state = ISER_CONN_PENDING;
 
-	iser_conn_get(ib_conn); /* ref ib conn's cma id */
 	ib_conn->cma_id = rdma_create_id(iser_cma_handler,
 					     (void *)ib_conn,
 					     RDMA_PS_TCP, IB_QPT_RC);
@@ -874,9 +864,8 @@
 	ib_conn->cma_id = NULL;
 addr_failure:
 	ib_conn->state = ISER_CONN_DOWN;
-	iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
 connect_failure:
-	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
+	iser_conn_release(ib_conn);
 	return err;
 }
 

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b9d6474..d4c7928 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c

@@ -663,8 +663,9 @@
 
 	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
 	if (pi_support && !device->pi_capable) {
-		pr_err("Protection information requested but not supported\n");
-		ret = -EINVAL;
+		pr_err("Protection information requested but not supported, "
+		       "rejecting connect request\n");
+		ret = rdma_reject(cma_id, NULL, 0);
 		goto out_mr;
 	}
 
@@ -787,14 +788,12 @@
 		isert_put_conn(isert_conn);
 		return;
 	}
-	if (!isert_conn->logout_posted) {
-		pr_debug("Calling rdma_disconnect for !logout_posted from"
-			 " isert_disconnect_work\n");
+
+	if (isert_conn->disconnect) {
+		/* Send DREQ/DREP towards our initiator */
 		rdma_disconnect(isert_conn->conn_cm_id);
-		mutex_unlock(&isert_conn->conn_mutex);
-		iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
-		goto wake_up;
 	}
+
 	mutex_unlock(&isert_conn->conn_mutex);
 
 wake_up:
@@ -803,10 +802,11 @@
 }
 
 static void
-isert_disconnected_handler(struct rdma_cm_id *cma_id)
+isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
 {
 	struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context;
 
+	isert_conn->disconnect = disconnect;
 	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
 	schedule_work(&isert_conn->conn_logout_work);
 }
@@ -815,29 +815,28 @@
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	int ret = 0;
+	bool disconnect = false;
 
 	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
 		 event->event, event->status, cma_id->context, cma_id);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
-		pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n");
 		ret = isert_connect_request(cma_id, event);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
-		pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n");
 		isert_connected_handler(cma_id);
 		break;
-	case RDMA_CM_EVENT_DISCONNECTED:
-		pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n");
-		isert_disconnected_handler(cma_id);
-		break;
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-	case RDMA_CM_EVENT_ADDR_CHANGE:
+	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
+	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
+	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
+		disconnect = true;
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
+		isert_disconnected_handler(cma_id, disconnect);
 		break;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	default:
-		pr_err("Unknown RDMA CMA event: %d\n", event->event);
+		pr_err("Unhandled RDMA CMA event: %d\n", event->event);
 		break;
 	}
 
@@ -1054,7 +1053,9 @@
 	}
 	if (!login->login_failed) {
 		if (login->login_complete) {
-			if (isert_conn->conn_device->use_fastreg) {
+			if (!conn->sess->sess_ops->SessionType &&
+			    isert_conn->conn_device->use_fastreg) {
+				/* Normal Session and fastreg is used */
 				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
 
 				ret = isert_conn_create_fastreg_pool(isert_conn,
@@ -1824,11 +1825,8 @@
 		break;
 	case ISTATE_SEND_LOGOUTRSP:
 		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
-		/*
-		 * Call atomic_dec(&isert_conn->post_send_buf_count)
-		 * from isert_wait_conn()
-		 */
-		isert_conn->logout_posted = true;
+
+		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_logout_post_handler(cmd, cmd->conn);
 		break;
 	case ISTATE_SEND_TEXTRSP:
@@ -2034,6 +2032,8 @@
 	isert_conn->state = ISER_CONN_DOWN;
 	mutex_unlock(&isert_conn->conn_mutex);
 
+	iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+
 	complete(&isert_conn->conn_wait_comp_err);
 }
 
@@ -2320,7 +2320,7 @@
 	int rc;
 
 	isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND);
 	if (rc < 0)
 		return rc;
 
@@ -3156,9 +3156,14 @@
 		return -ENODEV;
 
 	spin_lock_bh(&np->np_thread_lock);
-	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("ISCSI_NP_THREAD_RESET for isert_accept_np\n");
+		pr_debug("np_thread_state %d for isert_accept_np\n",
+			 np->np_thread_state);
+		/**
+		 * No point in stalling here when np_thread
+		 * is in state RESET/SHUTDOWN/EXIT - bail
+		 **/
 		return -ENODEV;
 	}
 	spin_unlock_bh(&np->np_thread_lock);
@@ -3208,15 +3213,9 @@
 	struct isert_conn *isert_conn = conn->context;
 
 	pr_debug("isert_wait_conn: Starting \n");
-	/*
-	 * Decrement post_send_buf_count for special case when called
-	 * from isert_do_control_comp() -> iscsit_logout_post_handler()
-	 */
-	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->logout_posted)
-		atomic_dec(&isert_conn->post_send_buf_count);
 
-	if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) {
+	mutex_lock(&isert_conn->conn_mutex);
+	if (isert_conn->conn_cm_id) {
 		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
 		rdma_disconnect(isert_conn->conn_cm_id);
 	}
@@ -3293,6 +3292,7 @@
 
 static void __exit isert_exit(void)
 {
+	flush_scheduled_work();
 	destroy_workqueue(isert_comp_wq);
 	destroy_workqueue(isert_rx_wq);
 	iscsit_unregister_transport(&iser_target_transport);

diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index da6612e..04f51f7 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h

@@ -116,7 +116,6 @@
 
 struct isert_conn {
 	enum iser_conn_state	state;
-	bool			logout_posted;
 	int			post_recv_buf_count;
 	atomic_t		post_send_buf_count;
 	u32			responder_resources;
@@ -151,6 +150,7 @@
 #define ISERT_COMP_BATCH_COUNT	8
 	int			conn_comp_batch;
 	struct llist_head	conn_comp_llist;
+	bool                    disconnect;
 };
 
 #define ISERT_MAX_CQ 64

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 66a908b..e3c2c5b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c

@@ -30,7 +30,7 @@
  * SOFTWARE.
  */
 
-#define pr_fmt(fmt) PFX fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -66,6 +66,8 @@
 static unsigned int cmd_sg_entries;
 static unsigned int indirect_sg_entries;
 static bool allow_ext_sg;
+static bool prefer_fr;
+static bool register_always;
 static int topspin_workarounds = 1;
 
 module_param(srp_sg_tablesize, uint, 0444);
@@ -87,6 +89,14 @@
 MODULE_PARM_DESC(topspin_workarounds,
 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
 
+module_param(prefer_fr, bool, 0444);
+MODULE_PARM_DESC(prefer_fr,
+"Whether to use fast registration if both FMR and fast registration are supported");
+
+module_param(register_always, bool, 0444);
+MODULE_PARM_DESC(register_always,
+		 "Use memory registration even for contiguous memory regions");
+
 static struct kernel_param_ops srp_tmo_ops;
 
 static int srp_reconnect_delay = 10;
@@ -288,28 +298,174 @@
 	return 0;
 }
 
+static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
+{
+	struct srp_device *dev = target->srp_host->srp_dev;
+	struct ib_fmr_pool_param fmr_param;
+
+	memset(&fmr_param, 0, sizeof(fmr_param));
+	fmr_param.pool_size	    = target->scsi_host->can_queue;
+	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
+	fmr_param.cache		    = 1;
+	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
+	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
+	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
+				       IB_ACCESS_REMOTE_WRITE |
+				       IB_ACCESS_REMOTE_READ);
+
+	return ib_create_fmr_pool(dev->pd, &fmr_param);
+}
+
+/**
+ * srp_destroy_fr_pool() - free the resources owned by a pool
+ * @pool: Fast registration pool to be destroyed.
+ */
+static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
+{
+	int i;
+	struct srp_fr_desc *d;
+
+	if (!pool)
+		return;
+
+	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+		if (d->frpl)
+			ib_free_fast_reg_page_list(d->frpl);
+		if (d->mr)
+			ib_dereg_mr(d->mr);
+	}
+	kfree(pool);
+}
+
+/**
+ * srp_create_fr_pool() - allocate and initialize a pool for fast registration
+ * @device:            IB device to allocate fast registration descriptors for.
+ * @pd:                Protection domain associated with the FR descriptors.
+ * @pool_size:         Number of descriptors to allocate.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ */
+static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
+					      struct ib_pd *pd, int pool_size,
+					      int max_page_list_len)
+{
+	struct srp_fr_pool *pool;
+	struct srp_fr_desc *d;
+	struct ib_mr *mr;
+	struct ib_fast_reg_page_list *frpl;
+	int i, ret = -EINVAL;
+
+	if (pool_size <= 0)
+		goto err;
+	ret = -ENOMEM;
+	pool = kzalloc(sizeof(struct srp_fr_pool) +
+		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
+	if (!pool)
+		goto err;
+	pool->size = pool_size;
+	pool->max_page_list_len = max_page_list_len;
+	spin_lock_init(&pool->lock);
+	INIT_LIST_HEAD(&pool->free_list);
+
+	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+		if (IS_ERR(mr)) {
+			ret = PTR_ERR(mr);
+			goto destroy_pool;
+		}
+		d->mr = mr;
+		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
+		if (IS_ERR(frpl)) {
+			ret = PTR_ERR(frpl);
+			goto destroy_pool;
+		}
+		d->frpl = frpl;
+		list_add_tail(&d->entry, &pool->free_list);
+	}
+
+out:
+	return pool;
+
+destroy_pool:
+	srp_destroy_fr_pool(pool);
+
+err:
+	pool = ERR_PTR(ret);
+	goto out;
+}
+
+/**
+ * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
+ * @pool: Pool to obtain descriptor from.
+ */
+static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
+{
+	struct srp_fr_desc *d = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	if (!list_empty(&pool->free_list)) {
+		d = list_first_entry(&pool->free_list, typeof(*d), entry);
+		list_del(&d->entry);
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	return d;
+}
+
+/**
+ * srp_fr_pool_put() - put an FR descriptor back in the free list
+ * @pool: Pool the descriptor was allocated from.
+ * @desc: Pointer to an array of fast registration descriptor pointers.
+ * @n:    Number of descriptors to put back.
+ *
+ * Note: The caller must already have queued an invalidation request for
+ * desc->mr->rkey before calling this function.
+ */
+static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
+			    int n)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	for (i = 0; i < n; i++)
+		list_add(&desc[i]->entry, &pool->free_list);
+	spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
+{
+	struct srp_device *dev = target->srp_host->srp_dev;
+
+	return srp_create_fr_pool(dev->dev, dev->pd,
+				  target->scsi_host->can_queue,
+				  dev->max_pages_per_mr);
+}
+
 static int srp_create_target_ib(struct srp_target_port *target)
 {
+	struct srp_device *dev = target->srp_host->srp_dev;
 	struct ib_qp_init_attr *init_attr;
 	struct ib_cq *recv_cq, *send_cq;
 	struct ib_qp *qp;
+	struct ib_fmr_pool *fmr_pool = NULL;
+	struct srp_fr_pool *fr_pool = NULL;
+	const int m = 1 + dev->use_fast_reg;
 	int ret;
 
 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
 	if (!init_attr)
 		return -ENOMEM;
 
-	recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-			       srp_recv_completion, NULL, target,
+	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target,
 			       target->queue_size, target->comp_vector);
 	if (IS_ERR(recv_cq)) {
 		ret = PTR_ERR(recv_cq);
 		goto err;
 	}
 
-	send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-			       srp_send_completion, NULL, target,
-			       target->queue_size, target->comp_vector);
+	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
+			       m * target->queue_size, target->comp_vector);
 	if (IS_ERR(send_cq)) {
 		ret = PTR_ERR(send_cq);
 		goto err_recv_cq;
@@ -318,16 +474,16 @@
 	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
 
 	init_attr->event_handler       = srp_qp_event;
-	init_attr->cap.max_send_wr     = target->queue_size;
+	init_attr->cap.max_send_wr     = m * target->queue_size;
 	init_attr->cap.max_recv_wr     = target->queue_size;
 	init_attr->cap.max_recv_sge    = 1;
 	init_attr->cap.max_send_sge    = 1;
-	init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR;
+	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
 	init_attr->qp_type             = IB_QPT_RC;
 	init_attr->send_cq             = send_cq;
 	init_attr->recv_cq             = recv_cq;
 
-	qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
+	qp = ib_create_qp(dev->pd, init_attr);
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
 		goto err_send_cq;
@@ -337,6 +493,30 @@
 	if (ret)
 		goto err_qp;
 
+	if (dev->use_fast_reg && dev->has_fr) {
+		fr_pool = srp_alloc_fr_pool(target);
+		if (IS_ERR(fr_pool)) {
+			ret = PTR_ERR(fr_pool);
+			shost_printk(KERN_WARNING, target->scsi_host, PFX
+				     "FR pool allocation failed (%d)\n", ret);
+			goto err_qp;
+		}
+		if (target->fr_pool)
+			srp_destroy_fr_pool(target->fr_pool);
+		target->fr_pool = fr_pool;
+	} else if (!dev->use_fast_reg && dev->has_fmr) {
+		fmr_pool = srp_alloc_fmr_pool(target);
+		if (IS_ERR(fmr_pool)) {
+			ret = PTR_ERR(fmr_pool);
+			shost_printk(KERN_WARNING, target->scsi_host, PFX
+				     "FMR pool allocation failed (%d)\n", ret);
+			goto err_qp;
+		}
+		if (target->fmr_pool)
+			ib_destroy_fmr_pool(target->fmr_pool);
+		target->fmr_pool = fmr_pool;
+	}
+
 	if (target->qp)
 		ib_destroy_qp(target->qp);
 	if (target->recv_cq)
@@ -371,8 +551,16 @@
  */
 static void srp_free_target_ib(struct srp_target_port *target)
 {
+	struct srp_device *dev = target->srp_host->srp_dev;
 	int i;
 
+	if (dev->use_fast_reg) {
+		if (target->fr_pool)
+			srp_destroy_fr_pool(target->fr_pool);
+	} else {
+		if (target->fmr_pool)
+			ib_destroy_fmr_pool(target->fmr_pool);
+	}
 	ib_destroy_qp(target->qp);
 	ib_destroy_cq(target->send_cq);
 	ib_destroy_cq(target->recv_cq);
@@ -577,7 +765,8 @@
 
 static void srp_free_req_data(struct srp_target_port *target)
 {
-	struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+	struct srp_device *dev = target->srp_host->srp_dev;
+	struct ib_device *ibdev = dev->dev;
 	struct srp_request *req;
 	int i;
 
@@ -586,7 +775,10 @@
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		req = &target->req_ring[i];
-		kfree(req->fmr_list);
+		if (dev->use_fast_reg)
+			kfree(req->fr_list);
+		else
+			kfree(req->fmr_list);
 		kfree(req->map_page);
 		if (req->indirect_dma_addr) {
 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
@@ -605,6 +797,7 @@
 	struct srp_device *srp_dev = target->srp_host->srp_dev;
 	struct ib_device *ibdev = srp_dev->dev;
 	struct srp_request *req;
+	void *mr_list;
 	dma_addr_t dma_addr;
 	int i, ret = -ENOMEM;
 
@@ -617,12 +810,20 @@
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		req = &target->req_ring[i];
-		req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
-					GFP_KERNEL);
-		req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
-					GFP_KERNEL);
+		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+				  GFP_KERNEL);
+		if (!mr_list)
+			goto out;
+		if (srp_dev->use_fast_reg)
+			req->fr_list = mr_list;
+		else
+			req->fmr_list = mr_list;
+		req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+					sizeof(void *), GFP_KERNEL);
+		if (!req->map_page)
+			goto out;
 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
-		if (!req->fmr_list || !req->map_page || !req->indirect_desc)
+		if (!req->indirect_desc)
 			goto out;
 
 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
@@ -759,21 +960,56 @@
 	}
 }
 
+static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_send_wr wr = {
+		.opcode		    = IB_WR_LOCAL_INV,
+		.wr_id		    = LOCAL_INV_WR_ID_MASK,
+		.next		    = NULL,
+		.num_sge	    = 0,
+		.send_flags	    = 0,
+		.ex.invalidate_rkey = rkey,
+	};
+
+	return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
 static void srp_unmap_data(struct scsi_cmnd *scmnd,
 			   struct srp_target_port *target,
 			   struct srp_request *req)
 {
-	struct ib_device *ibdev = target->srp_host->srp_dev->dev;
-	struct ib_pool_fmr **pfmr;
+	struct srp_device *dev = target->srp_host->srp_dev;
+	struct ib_device *ibdev = dev->dev;
+	int i, res;
 
 	if (!scsi_sglist(scmnd) ||
 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
 		return;
 
-	pfmr = req->fmr_list;
-	while (req->nfmr--)
-		ib_fmr_pool_unmap(*pfmr++);
+	if (dev->use_fast_reg) {
+		struct srp_fr_desc **pfr;
+
+		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
+			res = srp_inv_rkey(target, (*pfr)->mr->rkey);
+			if (res < 0) {
+				shost_printk(KERN_ERR, target->scsi_host, PFX
+				  "Queueing INV WR for rkey %#x failed (%d)\n",
+				  (*pfr)->mr->rkey, res);
+				queue_work(system_long_wq,
+					   &target->tl_err_work);
+			}
+		}
+		if (req->nmdesc)
+			srp_fr_pool_put(target->fr_pool, req->fr_list,
+					req->nmdesc);
+	} else {
+		struct ib_pool_fmr **pfmr;
+
+		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
+			ib_fmr_pool_unmap(*pfmr);
+	}
 
 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
 			scmnd->sc_data_direction);
@@ -813,6 +1049,10 @@
 
 /**
  * srp_free_req() - Unmap data and add request to the free request list.
+ * @target: SRP target port.
+ * @req:    Request to be freed.
+ * @scmnd:  SCSI command associated with @req.
+ * @req_lim_delta: Amount to be added to @target->req_lim.
  */
 static void srp_free_req(struct srp_target_port *target,
 			 struct srp_request *req, struct scsi_cmnd *scmnd,
@@ -882,21 +1122,19 @@
 	 * callbacks will have finished before a new QP is allocated.
 	 */
 	ret = srp_new_cm_id(target);
-	/*
-	 * Whether or not creating a new CM ID succeeded, create a new
-	 * QP. This guarantees that all completion callback function
-	 * invocations have finished before request resetting starts.
-	 */
-	if (ret == 0)
-		ret = srp_create_target_ib(target);
-	else
-		srp_create_target_ib(target);
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		struct srp_request *req = &target->req_ring[i];
 		srp_finish_req(target, req, NULL, DID_RESET << 16);
 	}
 
+	/*
+	 * Whether or not creating a new CM ID succeeded, create a new
+	 * QP. This guarantees that all callback functions for the old QP have
+	 * finished before any send requests are posted on the new QP.
+	 */
+	ret += srp_create_target_ib(target);
+
 	INIT_LIST_HEAD(&target->free_tx);
 	for (i = 0; i < target->queue_size; ++i)
 		list_add(&target->tx_ring[i]->list, &target->free_tx);
@@ -928,33 +1166,87 @@
 static int srp_map_finish_fmr(struct srp_map_state *state,
 			      struct srp_target_port *target)
 {
-	struct srp_device *dev = target->srp_host->srp_dev;
 	struct ib_pool_fmr *fmr;
 	u64 io_addr = 0;
 
-	if (!state->npages)
-		return 0;
-
-	if (state->npages == 1) {
-		srp_map_desc(state, state->base_dma_addr, state->fmr_len,
-			     target->rkey);
-		state->npages = state->fmr_len = 0;
-		return 0;
-	}
-
-	fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+	fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
 				   state->npages, io_addr);
 	if (IS_ERR(fmr))
 		return PTR_ERR(fmr);
 
 	*state->next_fmr++ = fmr;
-	state->nfmr++;
+	state->nmdesc++;
 
-	srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
-	state->npages = state->fmr_len = 0;
+	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
+
 	return 0;
 }
 
+static int srp_map_finish_fr(struct srp_map_state *state,
+			     struct srp_target_port *target)
+{
+	struct srp_device *dev = target->srp_host->srp_dev;
+	struct ib_send_wr *bad_wr;
+	struct ib_send_wr wr;
+	struct srp_fr_desc *desc;
+	u32 rkey;
+
+	desc = srp_fr_pool_get(target->fr_pool);
+	if (!desc)
+		return -ENOMEM;
+
+	rkey = ib_inc_rkey(desc->mr->rkey);
+	ib_update_fast_reg_key(desc->mr, rkey);
+
+	memcpy(desc->frpl->page_list, state->pages,
+	       sizeof(state->pages[0]) * state->npages);
+
+	memset(&wr, 0, sizeof(wr));
+	wr.opcode = IB_WR_FAST_REG_MR;
+	wr.wr_id = FAST_REG_WR_ID_MASK;
+	wr.wr.fast_reg.iova_start = state->base_dma_addr;
+	wr.wr.fast_reg.page_list = desc->frpl;
+	wr.wr.fast_reg.page_list_len = state->npages;
+	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
+	wr.wr.fast_reg.length = state->dma_len;
+	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+				       IB_ACCESS_REMOTE_READ |
+				       IB_ACCESS_REMOTE_WRITE);
+	wr.wr.fast_reg.rkey = desc->mr->lkey;
+
+	*state->next_fr++ = desc;
+	state->nmdesc++;
+
+	srp_map_desc(state, state->base_dma_addr, state->dma_len,
+		     desc->mr->rkey);
+
+	return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
+static int srp_finish_mapping(struct srp_map_state *state,
+			      struct srp_target_port *target)
+{
+	int ret = 0;
+
+	if (state->npages == 0)
+		return 0;
+
+	if (state->npages == 1 && !register_always)
+		srp_map_desc(state, state->base_dma_addr, state->dma_len,
+			     target->rkey);
+	else
+		ret = target->srp_host->srp_dev->use_fast_reg ?
+			srp_map_finish_fr(state, target) :
+			srp_map_finish_fmr(state, target);
+
+	if (ret == 0) {
+		state->npages = 0;
+		state->dma_len = 0;
+	}
+
+	return ret;
+}
+
 static void srp_map_update_start(struct srp_map_state *state,
 				 struct scatterlist *sg, int sg_index,
 				 dma_addr_t dma_addr)
@@ -967,7 +1259,7 @@
 static int srp_map_sg_entry(struct srp_map_state *state,
 			    struct srp_target_port *target,
 			    struct scatterlist *sg, int sg_index,
-			    int use_fmr)
+			    bool use_mr)
 {
 	struct srp_device *dev = target->srp_host->srp_dev;
 	struct ib_device *ibdev = dev->dev;
@@ -979,23 +1271,25 @@
 	if (!dma_len)
 		return 0;
 
-	if (use_fmr == SRP_MAP_NO_FMR) {
-		/* Once we're in direct map mode for a request, we don't
-		 * go back to FMR mode, so no need to update anything
+	if (!use_mr) {
+		/*
+		 * Once we're in direct map mode for a request, we don't
+		 * go back to FMR or FR mode, so no need to update anything
 		 * other than the descriptor.
 		 */
 		srp_map_desc(state, dma_addr, dma_len, target->rkey);
 		return 0;
 	}
 
-	/* If we start at an offset into the FMR page, don't merge into
-	 * the current FMR. Finish it out, and use the kernel's MR for this
-	 * sg entry. This is to avoid potential bugs on some SRP targets
-	 * that were never quite defined, but went away when the initiator
-	 * avoided using FMR on such page fragments.
+	/*
+	 * Since not all RDMA HW drivers support non-zero page offsets for
+	 * FMR, if we start at an offset into a page, don't merge into the
+	 * current FMR mapping. Finish it out, and use the kernel's MR for
+	 * this sg entry.
 	 */
-	if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
-		ret = srp_map_finish_fmr(state, target);
+	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
+	    dma_len > dev->mr_max_size) {
+		ret = srp_finish_mapping(state, target);
 		if (ret)
 			return ret;
 
@@ -1004,52 +1298,106 @@
 		return 0;
 	}
 
-	/* If this is the first sg to go into the FMR, save our position.
-	 * We need to know the first unmapped entry, its index, and the
-	 * first unmapped address within that entry to be able to restart
-	 * mapping after an error.
+	/*
+	 * If this is the first sg that will be mapped via FMR or via FR, save
+	 * our position. We need to know the first unmapped entry, its index,
+	 * and the first unmapped address within that entry to be able to
+	 * restart mapping after an error.
 	 */
 	if (!state->unmapped_sg)
 		srp_map_update_start(state, sg, sg_index, dma_addr);
 
 	while (dma_len) {
-		if (state->npages == SRP_FMR_SIZE) {
-			ret = srp_map_finish_fmr(state, target);
+		unsigned offset = dma_addr & ~dev->mr_page_mask;
+		if (state->npages == dev->max_pages_per_mr || offset != 0) {
+			ret = srp_finish_mapping(state, target);
 			if (ret)
 				return ret;
 
 			srp_map_update_start(state, sg, sg_index, dma_addr);
 		}
 
-		len = min_t(unsigned int, dma_len, dev->fmr_page_size);
+		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
 
 		if (!state->npages)
 			state->base_dma_addr = dma_addr;
-		state->pages[state->npages++] = dma_addr;
-		state->fmr_len += len;
+		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
+		state->dma_len += len;
 		dma_addr += len;
 		dma_len -= len;
 	}
 
-	/* If the last entry of the FMR wasn't a full page, then we need to
+	/*
+	 * If the last entry of the MR wasn't a full page, then we need to
 	 * close it out and start a new one -- we can only merge at page
 	 * boundries.
 	 */
 	ret = 0;
-	if (len != dev->fmr_page_size) {
-		ret = srp_map_finish_fmr(state, target);
+	if (len != dev->mr_page_size) {
+		ret = srp_finish_mapping(state, target);
 		if (!ret)
 			srp_map_update_start(state, NULL, 0, 0);
 	}
 	return ret;
 }
 
+static int srp_map_sg(struct srp_map_state *state,
+		      struct srp_target_port *target, struct srp_request *req,
+		      struct scatterlist *scat, int count)
+{
+	struct srp_device *dev = target->srp_host->srp_dev;
+	struct ib_device *ibdev = dev->dev;
+	struct scatterlist *sg;
+	int i;
+	bool use_mr;
+
+	state->desc	= req->indirect_desc;
+	state->pages	= req->map_page;
+	if (dev->use_fast_reg) {
+		state->next_fr = req->fr_list;
+		use_mr = !!target->fr_pool;
+	} else {
+		state->next_fmr = req->fmr_list;
+		use_mr = !!target->fmr_pool;
+	}
+
+	for_each_sg(scat, sg, count, i) {
+		if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
+			/*
+			 * Memory registration failed, so backtrack to the
+			 * first unmapped entry and continue on without using
+			 * memory registration.
+			 */
+			dma_addr_t dma_addr;
+			unsigned int dma_len;
+
+backtrack:
+			sg = state->unmapped_sg;
+			i = state->unmapped_index;
+
+			dma_addr = ib_sg_dma_address(ibdev, sg);
+			dma_len = ib_sg_dma_len(ibdev, sg);
+			dma_len -= (state->unmapped_addr - dma_addr);
+			dma_addr = state->unmapped_addr;
+			use_mr = false;
+			srp_map_desc(state, dma_addr, dma_len, target->rkey);
+		}
+	}
+
+	if (use_mr && srp_finish_mapping(state, target))
+		goto backtrack;
+
+	req->nmdesc = state->nmdesc;
+
+	return 0;
+}
+
 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
 			struct srp_request *req)
 {
-	struct scatterlist *scat, *sg;
+	struct scatterlist *scat;
 	struct srp_cmd *cmd = req->cmd->buf;
-	int i, len, nents, count, use_fmr;
+	int len, nents, count;
 	struct srp_device *dev;
 	struct ib_device *ibdev;
 	struct srp_map_state state;
@@ -1081,7 +1429,7 @@
 	fmt = SRP_DATA_DESC_DIRECT;
 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
 
-	if (count == 1) {
+	if (count == 1 && !register_always) {
 		/*
 		 * The midlayer only generated a single gather/scatter
 		 * entry, or DMA mapping coalesced everything to a
@@ -1094,13 +1442,13 @@
 		buf->key = cpu_to_be32(target->rkey);
 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
 
-		req->nfmr = 0;
+		req->nmdesc = 0;
 		goto map_complete;
 	}
 
-	/* We have more than one scatter/gather entry, so build our indirect
-	 * descriptor table, trying to merge as many entries with FMR as we
-	 * can.
+	/*
+	 * We have more than one scatter/gather entry, so build our indirect
+	 * descriptor table, trying to merge as many entries as we can.
 	 */
 	indirect_hdr = (void *) cmd->add_data;
 
@@ -1108,35 +1456,7 @@
 				   target->indirect_size, DMA_TO_DEVICE);
 
 	memset(&state, 0, sizeof(state));
-	state.desc	= req->indirect_desc;
-	state.pages	= req->map_page;
-	state.next_fmr	= req->fmr_list;
-
-	use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
-
-	for_each_sg(scat, sg, count, i) {
-		if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
-			/* FMR mapping failed, so backtrack to the first
-			 * unmapped entry and continue on without using FMR.
-			 */
-			dma_addr_t dma_addr;
-			unsigned int dma_len;
-
-backtrack:
-			sg = state.unmapped_sg;
-			i = state.unmapped_index;
-
-			dma_addr = ib_sg_dma_address(ibdev, sg);
-			dma_len = ib_sg_dma_len(ibdev, sg);
-			dma_len -= (state.unmapped_addr - dma_addr);
-			dma_addr = state.unmapped_addr;
-			use_fmr = SRP_MAP_NO_FMR;
-			srp_map_desc(&state, dma_addr, dma_len, target->rkey);
-		}
-	}
-
-	if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
-		goto backtrack;
+	srp_map_sg(&state, target, req, scat, count);
 
 	/* We've mapped the request, now pull as much of the indirect
 	 * descriptor table as we can into the command buffer. If this
@@ -1144,9 +1464,9 @@
 	 * guaranteed to fit into the command, as the SCSI layer won't
 	 * give us more S/G entries than we allow.
 	 */
-	req->nfmr = state.nfmr;
 	if (state.ndesc == 1) {
-		/* FMR mapping was able to collapse this to one entry,
+		/*
+		 * Memory registration collapsed the sg-list into one entry,
 		 * so use a direct descriptor.
 		 */
 		struct srp_direct_buf *buf = (void *) cmd->add_data;
@@ -1455,6 +1775,7 @@
 
 /**
  * srp_tl_err_work() - handle a transport layer error
+ * @work: Work structure embedded in an SRP target port.
  *
  * Note: This function may get invoked before the rport has been created,
  * hence the target->rport test.
@@ -1468,14 +1789,24 @@
 		srp_start_tl_fail_timers(target->rport);
 }
 
-static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
-			      struct srp_target_port *target)
+static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
+			      bool send_err, struct srp_target_port *target)
 {
 	if (target->connected && !target->qp_in_error) {
-		shost_printk(KERN_ERR, target->scsi_host,
-			     PFX "failed %s status %d\n",
-			     send_err ? "send" : "receive",
-			     wc_status);
+		if (wr_id & LOCAL_INV_WR_ID_MASK) {
+			shost_printk(KERN_ERR, target->scsi_host, PFX
+				     "LOCAL_INV failed with status %d\n",
+				     wc_status);
+		} else if (wr_id & FAST_REG_WR_ID_MASK) {
+			shost_printk(KERN_ERR, target->scsi_host, PFX
+				     "FAST_REG_MR failed status %d\n",
+				     wc_status);
+		} else {
+			shost_printk(KERN_ERR, target->scsi_host,
+				     PFX "failed %s status %d for iu %p\n",
+				     send_err ? "send" : "receive",
+				     wc_status, (void *)(uintptr_t)wr_id);
+		}
 		queue_work(system_long_wq, &target->tl_err_work);
 	}
 	target->qp_in_error = true;
@@ -1491,7 +1822,7 @@
 		if (likely(wc.status == IB_WC_SUCCESS)) {
 			srp_handle_recv(target, &wc);
 		} else {
-			srp_handle_qp_err(wc.status, false, target);
+			srp_handle_qp_err(wc.wr_id, wc.status, false, target);
 		}
 	}
 }
@@ -1507,7 +1838,7 @@
 			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
 			list_add(&iu->list, &target->free_tx);
 		} else {
-			srp_handle_qp_err(wc.status, true, target);
+			srp_handle_qp_err(wc.wr_id, wc.status, true, target);
 		}
 	}
 }
@@ -1521,7 +1852,7 @@
 	struct srp_cmd *cmd;
 	struct ib_device *dev;
 	unsigned long flags;
-	int len, result;
+	int len, ret;
 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
 
 	/*
@@ -1533,12 +1864,9 @@
 	if (in_scsi_eh)
 		mutex_lock(&rport->mutex);
 
-	result = srp_chkready(target->rport);
-	if (unlikely(result)) {
-		scmnd->result = result;
-		scmnd->scsi_done(scmnd);
-		goto unlock_rport;
-	}
+	scmnd->result = srp_chkready(target->rport);
+	if (unlikely(scmnd->result))
+		goto err;
 
 	spin_lock_irqsave(&target->lock, flags);
 	iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1553,7 +1881,6 @@
 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
 				   DMA_TO_DEVICE);
 
-	scmnd->result        = 0;
 	scmnd->host_scribble = (void *) req;
 
 	cmd = iu->buf;
@@ -1570,7 +1897,15 @@
 	len = srp_map_data(scmnd, target, req);
 	if (len < 0) {
 		shost_printk(KERN_ERR, target->scsi_host,
-			     PFX "Failed to map data\n");
+			     PFX "Failed to map data (%d)\n", len);
+		/*
+		 * If we ran out of memory descriptors (-ENOMEM) because an
+		 * application is queuing many requests with more than
+		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
+		 * to reduce queue depth temporarily.
+		 */
+		scmnd->result = len == -ENOMEM ?
+			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
 		goto err_iu;
 	}
 
@@ -1582,11 +1917,13 @@
 		goto err_unmap;
 	}
 
+	ret = 0;
+
 unlock_rport:
 	if (in_scsi_eh)
 		mutex_unlock(&rport->mutex);
 
-	return 0;
+	return ret;
 
 err_unmap:
 	srp_unmap_data(scmnd, target, req);
@@ -1594,16 +1931,27 @@
 err_iu:
 	srp_put_tx_iu(target, iu, SRP_IU_CMD);
 
+	/*
+	 * Avoid that the loops that iterate over the request ring can
+	 * encounter a dangling SCSI command pointer.
+	 */
+	req->scmnd = NULL;
+
 	spin_lock_irqsave(&target->lock, flags);
 	list_add(&req->list, &target->free_reqs);
 
 err_unlock:
 	spin_unlock_irqrestore(&target->lock, flags);
 
-	if (in_scsi_eh)
-		mutex_unlock(&rport->mutex);
+err:
+	if (scmnd->result) {
+		scmnd->scsi_done(scmnd);
+		ret = 0;
+	} else {
+		ret = SCSI_MLQUEUE_HOST_BUSY;
+	}
 
-	return SCSI_MLQUEUE_HOST_BUSY;
+	goto unlock_rport;
 }
 
 /*
@@ -2310,6 +2658,8 @@
 
 /**
  * srp_conn_unique() - check whether the connection to a target is unique
+ * @host:   SRP host.
+ * @target: SRP target port.
  */
 static bool srp_conn_unique(struct srp_host *host,
 			    struct srp_target_port *target)
@@ -2605,7 +2955,8 @@
 		container_of(dev, struct srp_host, dev);
 	struct Scsi_Host *target_host;
 	struct srp_target_port *target;
-	struct ib_device *ibdev = host->srp_dev->dev;
+	struct srp_device *srp_dev = host->srp_dev;
+	struct ib_device *ibdev = srp_dev->dev;
 	int ret;
 
 	target_host = scsi_host_alloc(&srp_template,
@@ -2650,9 +3001,9 @@
 		goto err;
 	}
 
-	if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
-				target->cmd_sg_cnt < target->sg_tablesize) {
-		pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
+	    target->cmd_sg_cnt < target->sg_tablesize) {
+		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
 		target->sg_tablesize = target->cmd_sg_cnt;
 	}
 
@@ -2790,9 +3141,9 @@
 {
 	struct srp_device *srp_dev;
 	struct ib_device_attr *dev_attr;
-	struct ib_fmr_pool_param fmr_param;
 	struct srp_host *host;
-	int max_pages_per_fmr, fmr_page_shift, s, e, p;
+	int mr_page_shift, s, e, p;
+	u64 max_pages_per_mr;
 
 	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
 	if (!dev_attr)
@@ -2807,15 +3158,39 @@
 	if (!srp_dev)
 		goto free_attr;
 
+	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+			    device->map_phys_fmr && device->unmap_fmr);
+	srp_dev->has_fr = (dev_attr->device_cap_flags &
+			   IB_DEVICE_MEM_MGT_EXTENSIONS);
+	if (!srp_dev->has_fmr && !srp_dev->has_fr)
+		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+
+	srp_dev->use_fast_reg = (srp_dev->has_fr &&
+				 (!srp_dev->has_fmr || prefer_fr));
+
 	/*
 	 * Use the smallest page size supported by the HCA, down to a
 	 * minimum of 4096 bytes. We're unlikely to build large sglists
 	 * out of smaller entries.
 	 */
-	fmr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1);
-	srp_dev->fmr_page_size	= 1 << fmr_page_shift;
-	srp_dev->fmr_page_mask	= ~((u64) srp_dev->fmr_page_size - 1);
-	srp_dev->fmr_max_size	= srp_dev->fmr_page_size * SRP_FMR_SIZE;
+	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1);
+	srp_dev->mr_page_size	= 1 << mr_page_shift;
+	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
+	max_pages_per_mr	= dev_attr->max_mr_size;
+	do_div(max_pages_per_mr, srp_dev->mr_page_size);
+	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
+					  max_pages_per_mr);
+	if (srp_dev->use_fast_reg) {
+		srp_dev->max_pages_per_mr =
+			min_t(u32, srp_dev->max_pages_per_mr,
+			      dev_attr->max_fast_reg_page_list_len);
+	}
+	srp_dev->mr_max_size	= srp_dev->mr_page_size *
+				   srp_dev->max_pages_per_mr;
+	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+		 device->name, mr_page_shift, dev_attr->max_mr_size,
+		 dev_attr->max_fast_reg_page_list_len,
+		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
 	INIT_LIST_HEAD(&srp_dev->dev_list);
 
@@ -2831,27 +3206,6 @@
 	if (IS_ERR(srp_dev->mr))
 		goto err_pd;
 
-	for (max_pages_per_fmr = SRP_FMR_SIZE;
-			max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
-			max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
-		memset(&fmr_param, 0, sizeof fmr_param);
-		fmr_param.pool_size	    = SRP_FMR_POOL_SIZE;
-		fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE;
-		fmr_param.cache		    = 1;
-		fmr_param.max_pages_per_fmr = max_pages_per_fmr;
-		fmr_param.page_shift	    = fmr_page_shift;
-		fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
-					       IB_ACCESS_REMOTE_WRITE |
-					       IB_ACCESS_REMOTE_READ);
-
-		srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
-		if (!IS_ERR(srp_dev->fmr_pool))
-			break;
-	}
-
-	if (IS_ERR(srp_dev->fmr_pool))
-		srp_dev->fmr_pool = NULL;
-
 	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
@@ -2914,8 +3268,6 @@
 		kfree(host);
 	}
 
-	if (srp_dev->fmr_pool)
-		ib_destroy_fmr_pool(srp_dev->fmr_pool);
 	ib_dereg_mr(srp_dev->mr);
 	ib_dealloc_pd(srp_dev->pd);
 

diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index aad27b7..e46ecb1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h

@@ -66,13 +66,10 @@
 	SRP_TAG_NO_REQ		= ~0U,
 	SRP_TAG_TSK_MGMT	= 1U << 31,
 
-	SRP_FMR_SIZE		= 512,
-	SRP_FMR_MIN_SIZE	= 128,
-	SRP_FMR_POOL_SIZE	= 1024,
-	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4,
+	SRP_MAX_PAGES_PER_MR	= 512,
 
-	SRP_MAP_ALLOW_FMR	= 0,
-	SRP_MAP_NO_FMR		= 1,
+	LOCAL_INV_WR_ID_MASK	= 1,
+	FAST_REG_WR_ID_MASK	= 2,
 };
 
 enum srp_target_state {
@@ -86,15 +83,24 @@
 	SRP_IU_RSP,
 };
 
+/*
+ * @mr_page_mask: HCA memory registration page mask.
+ * @mr_page_size: HCA memory registration page size.
+ * @mr_max_size: Maximum size in bytes of a single FMR / FR registration
+ *   request.
+ */
 struct srp_device {
 	struct list_head	dev_list;
 	struct ib_device       *dev;
 	struct ib_pd	       *pd;
 	struct ib_mr	       *mr;
-	struct ib_fmr_pool     *fmr_pool;
-	u64			fmr_page_mask;
-	int			fmr_page_size;
-	int			fmr_max_size;
+	u64			mr_page_mask;
+	int			mr_page_size;
+	int			mr_max_size;
+	int			max_pages_per_mr;
+	bool			has_fmr;
+	bool			has_fr;
+	bool			use_fast_reg;
 };
 
 struct srp_host {
@@ -112,11 +118,14 @@
 	struct list_head	list;
 	struct scsi_cmnd       *scmnd;
 	struct srp_iu	       *cmd;
-	struct ib_pool_fmr    **fmr_list;
+	union {
+		struct ib_pool_fmr **fmr_list;
+		struct srp_fr_desc **fr_list;
+	};
 	u64		       *map_page;
 	struct srp_direct_buf  *indirect_desc;
 	dma_addr_t		indirect_dma_addr;
-	short			nfmr;
+	short			nmdesc;
 	short			index;
 };
 
@@ -131,6 +140,10 @@
 	struct ib_cq	       *send_cq ____cacheline_aligned_in_smp;
 	struct ib_cq	       *recv_cq;
 	struct ib_qp	       *qp;
+	union {
+		struct ib_fmr_pool     *fmr_pool;
+		struct srp_fr_pool     *fr_pool;
+	};
 	u32			lkey;
 	u32			rkey;
 	enum srp_target_state	state;
@@ -197,15 +210,66 @@
 	enum dma_data_direction	direction;
 };
 
+/**
+ * struct srp_fr_desc - fast registration work request arguments
+ * @entry: Entry in srp_fr_pool.free_list.
+ * @mr:    Memory region.
+ * @frpl:  Fast registration page list.
+ */
+struct srp_fr_desc {
+	struct list_head		entry;
+	struct ib_mr			*mr;
+	struct ib_fast_reg_page_list	*frpl;
+};
+
+/**
+ * struct srp_fr_pool - pool of fast registration descriptors
+ *
+ * An entry is available for allocation if and only if it occurs in @free_list.
+ *
+ * @size:      Number of descriptors in this pool.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ * @lock:      Protects free_list.
+ * @free_list: List of free descriptors.
+ * @desc:      Fast registration descriptor pool.
+ */
+struct srp_fr_pool {
+	int			size;
+	int			max_page_list_len;
+	spinlock_t		lock;
+	struct list_head	free_list;
+	struct srp_fr_desc	desc[0];
+};
+
+/**
+ * struct srp_map_state - per-request DMA memory mapping state
+ * @desc:	    Pointer to the element of the SRP buffer descriptor array
+ *		    that is being filled in.
+ * @pages:	    Array with DMA addresses of pages being considered for
+ *		    memory registration.
+ * @base_dma_addr:  DMA address of the first page that has not yet been mapped.
+ * @dma_len:	    Number of bytes that will be registered with the next
+ *		    FMR or FR memory registration call.
+ * @total_len:	    Total number of bytes in the sg-list being mapped.
+ * @npages:	    Number of page addresses in the pages[] array.
+ * @nmdesc:	    Number of FMR or FR memory descriptors used for mapping.
+ * @ndesc:	    Number of SRP buffer descriptors that have been filled in.
+ * @unmapped_sg:    First element of the sg-list that is mapped via FMR or FR.
+ * @unmapped_index: Index of the first element mapped via FMR or FR.
+ * @unmapped_addr:  DMA address of the first element mapped via FMR or FR.
+ */
 struct srp_map_state {
-	struct ib_pool_fmr    **next_fmr;
+	union {
+		struct ib_pool_fmr **next_fmr;
+		struct srp_fr_desc **next_fr;
+	};
 	struct srp_direct_buf  *desc;
 	u64		       *pages;
 	dma_addr_t		base_dma_addr;
-	u32			fmr_len;
+	u32			dma_len;
 	u32			total_len;
 	unsigned int		npages;
-	unsigned int		nfmr;
+	unsigned int		nmdesc;
 	unsigned int		ndesc;
 	struct scatterlist     *unmapped_sg;
 	int			unmapped_index;

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index ce953d8..fd325ec 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c

@@ -629,12 +629,10 @@
 	return copy_to_user(p, str, len) ? -EFAULT : len;
 }
 
-#define OLD_KEY_MAX	0x1ff
 static int handle_eviocgbit(struct input_dev *dev,
 			    unsigned int type, unsigned int size,
 			    void __user *p, int compat_mode)
 {
-	static unsigned long keymax_warn_time;
 	unsigned long *bits;
 	int len;
 
@@ -652,24 +650,8 @@
 	default: return -EINVAL;
 	}
 
-	/*
-	 * Work around bugs in userspace programs that like to do
-	 * EVIOCGBIT(EV_KEY, KEY_MAX) and not realize that 'len'
-	 * should be in bytes, not in bits.
-	 */
-	if (type == EV_KEY && size == OLD_KEY_MAX) {
-		len = OLD_KEY_MAX;
-		if (printk_timed_ratelimit(&keymax_warn_time, 10 * 1000))
-			pr_warning("(EVIOCGBIT): Suspicious buffer size %u, "
-				   "limiting output to %zu bytes. See "
-				   "http://userweb.kernel.org/~dtor/eviocgbit-bug.html\n",
-				   OLD_KEY_MAX,
-				   BITS_TO_LONGS(OLD_KEY_MAX) * sizeof(long));
-	}
-
 	return bits_to_user(bits, len, size, p, compat_mode);
 }
-#undef OLD_KEY_MAX
 
 static int evdev_handle_get_keycode(struct input_dev *dev, void __user *p)
 {

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 7f161d9..3664f81 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c

@@ -147,6 +147,11 @@
 	.attrs = sysfs_attrs
 };
 
+static const struct attribute_group *input_polldev_attribute_groups[] = {
+	&input_polldev_attribute_group,
+	NULL
+};
+
 /**
  * input_allocate_polled_device - allocate memory for polled device
  *
@@ -171,6 +176,91 @@
 }
 EXPORT_SYMBOL(input_allocate_polled_device);
 
+struct input_polled_devres {
+	struct input_polled_dev *polldev;
+};
+
+static int devm_input_polldev_match(struct device *dev, void *res, void *data)
+{
+	struct input_polled_devres *devres = res;
+
+	return devres->polldev == data;
+}
+
+static void devm_input_polldev_release(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: dropping reference/freeing %s\n",
+		__func__, dev_name(&polldev->input->dev));
+
+	input_put_device(polldev->input);
+	kfree(polldev);
+}
+
+static void devm_input_polldev_unregister(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&polldev->input->dev));
+	input_unregister_device(polldev->input);
+
+	/*
+	 * Note that we are still holding extra reference to the input
+	 * device so it will stick around until devm_input_polldev_release()
+	 * is called.
+	 */
+}
+
+/**
+ * devm_input_allocate_polled_device - allocate managed polled device
+ * @dev: device owning the polled device being created
+ *
+ * Returns prepared &struct input_polled_dev or %NULL.
+ *
+ * Managed polled input devices do not need to be explicitly unregistered
+ * or freed as it will be done automatically when owner device unbinds
+ * from * its driver (or binding fails). Once such managed polled device
+ * is allocated, it is ready to be set up and registered in the same
+ * fashion as regular polled input devices (using
+ * input_register_polled_device() function).
+ *
+ * If you want to manually unregister and free such managed polled devices,
+ * it can be still done by calling input_unregister_polled_device() and
+ * input_free_polled_device(), although it is rarely needed.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev)
+{
+	struct input_polled_dev *polldev;
+	struct input_polled_devres *devres;
+
+	devres = devres_alloc(devm_input_polldev_release, sizeof(*devres),
+			      GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	polldev = input_allocate_polled_device();
+	if (!polldev) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	polldev->input->dev.parent = dev;
+	polldev->devres_managed = true;
+
+	devres->polldev = polldev;
+	devres_add(dev, devres);
+
+	return polldev;
+}
+EXPORT_SYMBOL(devm_input_allocate_polled_device);
+
 /**
  * input_free_polled_device - free memory allocated for polled device
  * @dev: device to free
@@ -181,7 +271,12 @@
 void input_free_polled_device(struct input_polled_dev *dev)
 {
 	if (dev) {
-		input_free_device(dev->input);
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->input->dev.parent,
+						devm_input_polldev_release,
+						devm_input_polldev_match,
+						dev));
+		input_put_device(dev->input);
 		kfree(dev);
 	}
 }
@@ -199,26 +294,35 @@
  */
 int input_register_polled_device(struct input_polled_dev *dev)
 {
+	struct input_polled_devres *devres = NULL;
 	struct input_dev *input = dev->input;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_polldev_unregister,
+				      sizeof(*devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->polldev = dev;
+	}
+
 	input_set_drvdata(input, dev);
 	INIT_DELAYED_WORK(&dev->work, input_polled_device_work);
+
 	if (!dev->poll_interval)
 		dev->poll_interval = 500;
 	if (!dev->poll_interval_max)
 		dev->poll_interval_max = dev->poll_interval;
+
 	input->open = input_open_polled_device;
 	input->close = input_close_polled_device;
 
-	error = input_register_device(input);
-	if (error)
-		return error;
+	input->dev.groups = input_polldev_attribute_groups;
 
-	error = sysfs_create_group(&input->dev.kobj,
-				   &input_polldev_attribute_group);
+	error = input_register_device(input);
 	if (error) {
-		input_unregister_device(input);
+		devres_free(devres);
 		return error;
 	}
 
@@ -231,6 +335,12 @@
 	 */
 	input_get_device(input);
 
+	if (dev->devres_managed) {
+		dev_dbg(input->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&input->dev));
+		devres_add(input->dev.parent, devres);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(input_register_polled_device);
@@ -245,8 +355,11 @@
  */
 void input_unregister_polled_device(struct input_polled_dev *dev)
 {
-	sysfs_remove_group(&dev->input->dev.kobj,
-			   &input_polldev_attribute_group);
+	if (dev->devres_managed)
+		WARN_ON(devres_destroy(dev->input->dev.parent,
+					devm_input_polldev_unregister,
+					devm_input_polldev_match,
+					dev));
 
 	input_unregister_device(dev->input);
 }

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index ffc7ad3..f7e79b4 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig

@@ -524,6 +524,17 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called stowaway.
 
+config KEYBOARD_ST_KEYSCAN
+	tristate "STMicroelectronics keyscan support"
+	depends on ARCH_STI || COMPILE_TEST
+	select INPUT_MATRIXKMAP
+	help
+	  Say Y here if you want to use a keypad attached to the keyscan block
+	  on some STMicroelectronics SoC devices.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called st-keyscan.
+
 config KEYBOARD_SUNKBD
 	tristate "Sun Type 4 and Type 5 keyboard"
 	select SERIO
@@ -578,7 +589,7 @@
 
 config KEYBOARD_OMAP4
 	tristate "TI OMAP4+ keypad support"
-	depends on ARCH_OMAP2PLUS
+	depends on OF || ARCH_OMAP2PLUS
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use the OMAP4+ keypad.

diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 11cff7b..7504ae1 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile

@@ -51,6 +51,7 @@
 obj-$(CONFIG_KEYBOARD_SPEAR)		+= spear-keyboard.o
 obj-$(CONFIG_KEYBOARD_STMPE)		+= stmpe-keypad.o
 obj-$(CONFIG_KEYBOARD_STOWAWAY)		+= stowaway.o
+obj-$(CONFIG_KEYBOARD_ST_KEYSCAN)	+= st-keyscan.o
 obj-$(CONFIG_KEYBOARD_SUNKBD)		+= sunkbd.o
 obj-$(CONFIG_KEYBOARD_TC3589X)		+= tc3589x-keypad.o
 obj-$(CONFIG_KEYBOARD_TEGRA)		+= tegra-kbc.o

diff --git a/drivers/input/keyboard/adp5520-keys.c b/drivers/input/keyboard/adp5520-keys.c
index 4cc14c2..7f4a8b5 100644
--- a/drivers/input/keyboard/adp5520-keys.c
+++ b/drivers/input/keyboard/adp5520-keys.c

@@ -12,6 +12,7 @@
 #include <linux/input.h>
 #include <linux/mfd/adp5520.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 struct adp5520_keys {
 	struct input_dev *input;
@@ -81,7 +82,7 @@
 		return -EINVAL;
 	}
 
-	if (pdata == NULL) {
+	if (!pdata) {
 		dev_err(&pdev->dev, "missing platform data\n");
 		return -EINVAL;
 	}
@@ -89,17 +90,15 @@
 	if (!(pdata->rows_en_mask && pdata->cols_en_mask))
 		return -EINVAL;
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (dev == NULL) {
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
 		dev_err(&pdev->dev, "failed to alloc memory\n");
 		return -ENOMEM;
 	}
 
-	input = input_allocate_device();
-	if (!input) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	input = devm_input_allocate_device(&pdev->dev);
+	if (!input)
+		return -ENOMEM;
 
 	dev->master = pdev->dev.parent;
 	dev->input = input;
@@ -135,7 +134,7 @@
 	ret = input_register_device(input);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to register input device\n");
-		goto err;
+		return ret;
 	}
 
 	en_mask = pdata->rows_en_mask | pdata->cols_en_mask;
@@ -157,8 +156,7 @@
 
 	if (ret) {
 		dev_err(&pdev->dev, "failed to write\n");
-		ret = -EIO;
-		goto err1;
+		return -EIO;
 	}
 
 	dev->notifier.notifier_call = adp5520_keys_notifier;
@@ -166,19 +164,11 @@
 			ADP5520_KP_IEN | ADP5520_KR_IEN);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register notifier\n");
-		goto err1;
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, dev);
 	return 0;
-
-err1:
-	input_unregister_device(input);
-	input = NULL;
-err:
-	input_free_device(input);
-	kfree(dev);
-	return ret;
 }
 
 static int adp5520_keys_remove(struct platform_device *pdev)
@@ -188,8 +178,6 @@
 	adp5520_unregister_notifier(dev->master, &dev->notifier,
 				ADP5520_KP_IEN | ADP5520_KR_IEN);
 
-	input_unregister_device(dev->input);
-	kfree(dev);
 	return 0;
 }
 

diff --git a/drivers/input/keyboard/clps711x-keypad.c b/drivers/input/keyboard/clps711x-keypad.c
index 3955aec..552b65c 100644
--- a/drivers/input/keyboard/clps711x-keypad.c
+++ b/drivers/input/keyboard/clps711x-keypad.c

@@ -185,7 +185,7 @@
 	return 0;
 }
 
-static struct of_device_id clps711x_keypad_of_match[] = {
+static const struct of_device_id clps711x_keypad_of_match[] = {
 	{ .compatible = "cirrus,clps711x-keypad", },
 	{ }
 };

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 2db1324..8c98e97 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c

@@ -424,6 +424,16 @@
 	return IRQ_HANDLED;
 }
 
+static void gpio_keys_quiesce_key(void *data)
+{
+	struct gpio_button_data *bdata = data;
+
+	if (bdata->timer_debounce)
+		del_timer_sync(&bdata->timer);
+
+	cancel_work_sync(&bdata->work);
+}
+
 static int gpio_keys_setup_key(struct platform_device *pdev,
 				struct input_dev *input,
 				struct gpio_button_data *bdata,
@@ -433,7 +443,8 @@
 	struct device *dev = &pdev->dev;
 	irq_handler_t isr;
 	unsigned long irqflags;
-	int irq, error;
+	int irq;
+	int error;
 
 	bdata->input = input;
 	bdata->button = button;
@@ -441,7 +452,8 @@
 
 	if (gpio_is_valid(button->gpio)) {
 
-		error = gpio_request_one(button->gpio, GPIOF_IN, desc);
+		error = devm_gpio_request_one(&pdev->dev, button->gpio,
+					      GPIOF_IN, desc);
 		if (error < 0) {
 			dev_err(dev, "Failed to request GPIO %d, error %d\n",
 				button->gpio, error);
@@ -463,7 +475,7 @@
 			dev_err(dev,
 				"Unable to get irq number for GPIO %d, error %d\n",
 				button->gpio, error);
-			goto fail;
+			return error;
 		}
 		bdata->irq = irq;
 
@@ -497,26 +509,33 @@
 	input_set_capability(input, button->type ?: EV_KEY, button->code);
 
 	/*
+	 * Install custom action to cancel debounce timer and
+	 * workqueue item.
+	 */
+	error = devm_add_action(&pdev->dev, gpio_keys_quiesce_key, bdata);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to register quiesce action, error: %d\n",
+			error);
+		return error;
+	}
+
+	/*
 	 * If platform has specified that the button can be disabled,
 	 * we don't want it to share the interrupt line.
 	 */
 	if (!button->can_disable)
 		irqflags |= IRQF_SHARED;
 
-	error = request_any_context_irq(bdata->irq, isr, irqflags, desc, bdata);
+	error = devm_request_any_context_irq(&pdev->dev, bdata->irq,
+					     isr, irqflags, desc, bdata);
 	if (error < 0) {
 		dev_err(dev, "Unable to claim irq %d; error %d\n",
 			bdata->irq, error);
-		goto fail;
+		return error;
 	}
 
 	return 0;
-
-fail:
-	if (gpio_is_valid(button->gpio))
-		gpio_free(button->gpio);
-
-	return error;
 }
 
 static void gpio_keys_report_state(struct gpio_keys_drvdata *ddata)
@@ -578,23 +597,18 @@
 	int i;
 
 	node = dev->of_node;
-	if (!node) {
-		error = -ENODEV;
-		goto err_out;
-	}
+	if (!node)
+		return ERR_PTR(-ENODEV);
 
 	nbuttons = of_get_child_count(node);
-	if (nbuttons == 0) {
-		error = -ENODEV;
-		goto err_out;
-	}
+	if (nbuttons == 0)
+		return ERR_PTR(-ENODEV);
 
-	pdata = kzalloc(sizeof(*pdata) + nbuttons * (sizeof *button),
-			GFP_KERNEL);
-	if (!pdata) {
-		error = -ENOMEM;
-		goto err_out;
-	}
+	pdata = devm_kzalloc(dev,
+			     sizeof(*pdata) + nbuttons * sizeof(*button),
+			     GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
 
 	pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
 	pdata->nbuttons = nbuttons;
@@ -619,7 +633,7 @@
 				dev_err(dev,
 					"Failed to get gpio flags, error: %d\n",
 					error);
-			goto err_free_pdata;
+			return ERR_PTR(error);
 		}
 
 		button = &pdata->buttons[i++];
@@ -630,8 +644,7 @@
 		if (of_property_read_u32(pp, "linux,code", &button->code)) {
 			dev_err(dev, "Button without keycode: 0x%x\n",
 				button->gpio);
-			error = -EINVAL;
-			goto err_free_pdata;
+			return ERR_PTR(-EINVAL);
 		}
 
 		button->desc = of_get_property(pp, "label", NULL);
@@ -646,20 +659,13 @@
 			button->debounce_interval = 5;
 	}
 
-	if (pdata->nbuttons == 0) {
-		error = -EINVAL;
-		goto err_free_pdata;
-	}
+	if (pdata->nbuttons == 0)
+		return ERR_PTR(-EINVAL);
 
 	return pdata;
-
-err_free_pdata:
-	kfree(pdata);
-err_out:
-	return ERR_PTR(error);
 }
 
-static struct of_device_id gpio_keys_of_match[] = {
+static const struct of_device_id gpio_keys_of_match[] = {
 	{ .compatible = "gpio-keys", },
 	{ },
 };
@@ -675,22 +681,13 @@
 
 #endif
 
-static void gpio_remove_key(struct gpio_button_data *bdata)
-{
-	free_irq(bdata->irq, bdata);
-	if (bdata->timer_debounce)
-		del_timer_sync(&bdata->timer);
-	cancel_work_sync(&bdata->work);
-	if (gpio_is_valid(bdata->button->gpio))
-		gpio_free(bdata->button->gpio);
-}
-
 static int gpio_keys_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct gpio_keys_platform_data *pdata = dev_get_platdata(dev);
 	struct gpio_keys_drvdata *ddata;
 	struct input_dev *input;
+	size_t size;
 	int i, error;
 	int wakeup = 0;
 
@@ -700,14 +697,18 @@
 			return PTR_ERR(pdata);
 	}
 
-	ddata = kzalloc(sizeof(struct gpio_keys_drvdata) +
-			pdata->nbuttons * sizeof(struct gpio_button_data),
-			GFP_KERNEL);
-	input = input_allocate_device();
-	if (!ddata || !input) {
+	size = sizeof(struct gpio_keys_drvdata) +
+			pdata->nbuttons * sizeof(struct gpio_button_data);
+	ddata = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (!ddata) {
 		dev_err(dev, "failed to allocate state\n");
-		error = -ENOMEM;
-		goto fail1;
+		return -ENOMEM;
+	}
+
+	input = devm_input_allocate_device(dev);
+	if (!input) {
+		dev_err(dev, "failed to allocate input device\n");
+		return -ENOMEM;
 	}
 
 	ddata->pdata = pdata;
@@ -738,7 +739,7 @@
 
 		error = gpio_keys_setup_key(pdev, input, bdata, button);
 		if (error)
-			goto fail2;
+			return error;
 
 		if (button->wakeup)
 			wakeup = 1;
@@ -748,57 +749,31 @@
 	if (error) {
 		dev_err(dev, "Unable to export keys/switches, error: %d\n",
 			error);
-		goto fail2;
+		return error;
 	}
 
 	error = input_register_device(input);
 	if (error) {
 		dev_err(dev, "Unable to register input device, error: %d\n",
 			error);
-		goto fail3;
+		goto err_remove_group;
 	}
 
 	device_init_wakeup(&pdev->dev, wakeup);
 
 	return 0;
 
- fail3:
+err_remove_group:
 	sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group);
- fail2:
-	while (--i >= 0)
-		gpio_remove_key(&ddata->data[i]);
-
- fail1:
-	input_free_device(input);
-	kfree(ddata);
-	/* If we have no platform data, we allocated pdata dynamically. */
-	if (!dev_get_platdata(&pdev->dev))
-		kfree(pdata);
-
 	return error;
 }
 
 static int gpio_keys_remove(struct platform_device *pdev)
 {
-	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
-	struct input_dev *input = ddata->input;
-	int i;
-
 	sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group);
 
 	device_init_wakeup(&pdev->dev, 0);
 
-	for (i = 0; i < ddata->pdata->nbuttons; i++)
-		gpio_remove_key(&ddata->data[i]);
-
-	input_unregister_device(input);
-
-	/* If we have no platform data, we allocated pdata dynamically. */
-	if (!dev_get_platdata(&pdev->dev))
-		kfree(ddata->pdata);
-
-	kfree(ddata);
-
 	return 0;
 }
 

diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index e571e19..432d363 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c

@@ -120,12 +120,10 @@
 	if (nbuttons == 0)
 		return NULL;
 
-	pdata = kzalloc(sizeof(*pdata) + nbuttons * (sizeof *button),
-			GFP_KERNEL);
-	if (!pdata) {
-		error = -ENOMEM;
-		goto err_out;
-	}
+	pdata = devm_kzalloc(dev, sizeof(*pdata) + nbuttons * sizeof(*button),
+			     GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
 
 	pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
 	pdata->nbuttons = nbuttons;
@@ -151,7 +149,7 @@
 				dev_err(dev,
 					"Failed to get gpio flags, error: %d\n",
 					error);
-			goto err_free_pdata;
+			return ERR_PTR(error);
 		}
 
 		button = &pdata->buttons[i++];
@@ -162,8 +160,7 @@
 		if (of_property_read_u32(pp, "linux,code", &button->code)) {
 			dev_err(dev, "Button without keycode: 0x%x\n",
 				button->gpio);
-			error = -EINVAL;
-			goto err_free_pdata;
+			return ERR_PTR(-EINVAL);
 		}
 
 		button->desc = of_get_property(pp, "label", NULL);
@@ -178,20 +175,13 @@
 			button->debounce_interval = 5;
 	}
 
-	if (pdata->nbuttons == 0) {
-		error = -EINVAL;
-		goto err_free_pdata;
-	}
+	if (pdata->nbuttons == 0)
+		return ERR_PTR(-EINVAL);
 
 	return pdata;
-
-err_free_pdata:
-	kfree(pdata);
-err_out:
-	return ERR_PTR(error);
 }
 
-static struct of_device_id gpio_keys_polled_of_match[] = {
+static const struct of_device_id gpio_keys_polled_of_match[] = {
 	{ .compatible = "gpio-keys-polled", },
 	{ },
 };
@@ -213,6 +203,7 @@
 	struct gpio_keys_polled_dev *bdev;
 	struct input_polled_dev *poll_dev;
 	struct input_dev *input;
+	size_t size;
 	int error;
 	int i;
 
@@ -228,24 +219,21 @@
 
 	if (!pdata->poll_interval) {
 		dev_err(dev, "missing poll_interval value\n");
-		error = -EINVAL;
-		goto err_free_pdata;
+		return -EINVAL;
 	}
 
-	bdev = kzalloc(sizeof(struct gpio_keys_polled_dev) +
-		       pdata->nbuttons * sizeof(struct gpio_keys_button_data),
-		       GFP_KERNEL);
+	size = sizeof(struct gpio_keys_polled_dev) +
+			pdata->nbuttons * sizeof(struct gpio_keys_button_data);
+	bdev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
 	if (!bdev) {
 		dev_err(dev, "no memory for private data\n");
-		error = -ENOMEM;
-		goto err_free_pdata;
+		return -ENOMEM;
 	}
 
-	poll_dev = input_allocate_polled_device();
+	poll_dev = devm_input_allocate_polled_device(&pdev->dev);
 	if (!poll_dev) {
 		dev_err(dev, "no memory for polled device\n");
-		error = -ENOMEM;
-		goto err_free_bdev;
+		return -ENOMEM;
 	}
 
 	poll_dev->private = bdev;
@@ -258,7 +246,6 @@
 
 	input->name = pdev->name;
 	input->phys = DRV_NAME"/input0";
-	input->dev.parent = &pdev->dev;
 
 	input->id.bustype = BUS_HOST;
 	input->id.vendor = 0x0001;
@@ -277,16 +264,15 @@
 
 		if (button->wakeup) {
 			dev_err(dev, DRV_NAME " does not support wakeup\n");
-			error = -EINVAL;
-			goto err_free_gpio;
+			return -EINVAL;
 		}
 
-		error = gpio_request_one(gpio, GPIOF_IN,
-					 button->desc ?: DRV_NAME);
+		error = devm_gpio_request_one(&pdev->dev, gpio, GPIOF_IN,
+					      button->desc ? : DRV_NAME);
 		if (error) {
 			dev_err(dev, "unable to claim gpio %u, err=%d\n",
 				gpio, error);
-			goto err_free_gpio;
+			return error;
 		}
 
 		bdata->can_sleep = gpio_cansleep(gpio);
@@ -306,7 +292,7 @@
 	if (error) {
 		dev_err(dev, "unable to register polled device, err=%d\n",
 			error);
-		goto err_free_gpio;
+		return error;
 	}
 
 	/* report initial state of the buttons */
@@ -315,52 +301,10 @@
 					     &bdev->data[i]);
 
 	return 0;
-
-err_free_gpio:
-	while (--i >= 0)
-		gpio_free(pdata->buttons[i].gpio);
-
-	input_free_polled_device(poll_dev);
-
-err_free_bdev:
-	kfree(bdev);
-
-err_free_pdata:
-	/* If we have no platform_data, we allocated pdata dynamically.  */
-	if (!dev_get_platdata(&pdev->dev))
-		kfree(pdata);
-
-	return error;
-}
-
-static int gpio_keys_polled_remove(struct platform_device *pdev)
-{
-	struct gpio_keys_polled_dev *bdev = platform_get_drvdata(pdev);
-	const struct gpio_keys_platform_data *pdata = bdev->pdata;
-	int i;
-
-	input_unregister_polled_device(bdev->poll_dev);
-
-	for (i = 0; i < pdata->nbuttons; i++)
-		gpio_free(pdata->buttons[i].gpio);
-
-	input_free_polled_device(bdev->poll_dev);
-
-	/*
-	 * If we had no platform_data, we allocated pdata dynamically and
-	 * must free it here.
-	 */
-	if (!dev_get_platdata(&pdev->dev))
-		kfree(pdata);
-
-	kfree(bdev);
-
-	return 0;
 }
 
 static struct platform_driver gpio_keys_polled_driver = {
 	.probe	= gpio_keys_polled_probe,
-	.remove	= gpio_keys_polled_remove,
 	.driver	= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c
index 97ec335..8280cb1 100644
--- a/drivers/input/keyboard/imx_keypad.c
+++ b/drivers/input/keyboard/imx_keypad.c

@@ -415,7 +415,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id imx_keypad_of_match[] = {
+static const struct of_device_id imx_keypad_of_match[] = {
 	{ .compatible = "fsl,imx21-kpp", },
 	{ /* sentinel */ }
 };

diff --git a/drivers/input/keyboard/jornada680_kbd.c b/drivers/input/keyboard/jornada680_kbd.c
index 69b1f00..0ba4428 100644
--- a/drivers/input/keyboard/jornada680_kbd.c
+++ b/drivers/input/keyboard/jornada680_kbd.c

@@ -16,6 +16,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/device.h>
 #include <linux/input.h>
 #include <linux/input-polldev.h>
 #include <linux/interrupt.h>
@@ -185,14 +186,15 @@
 	struct input_dev *input_dev;
 	int i, error;
 
-	jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL);
+	jornadakbd = devm_kzalloc(&pdev->dev, sizeof(struct jornadakbd),
+				  GFP_KERNEL);
 	if (!jornadakbd)
 		return -ENOMEM;
 
-	poll_dev = input_allocate_polled_device();
+	poll_dev = devm_input_allocate_polled_device(&pdev->dev);
 	if (!poll_dev) {
-		error = -ENOMEM;
-		goto failed;
+		dev_err(&pdev->dev, "failed to allocate polled input device\n");
+		return -ENOMEM;
 	}
 
 	platform_set_drvdata(pdev, jornadakbd);
@@ -224,27 +226,10 @@
 	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
 
 	error = input_register_polled_device(jornadakbd->poll_dev);
-	if (error)
-		goto failed;
-
-	return 0;
-
- failed:
-	printk(KERN_ERR "Jornadakbd: failed to register driver, error: %d\n",
-		error);
-	input_free_polled_device(poll_dev);
-	kfree(jornadakbd);
-	return error;
-
-}
-
-static int jornada680kbd_remove(struct platform_device *pdev)
-{
-	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
-
-	input_unregister_polled_device(jornadakbd->poll_dev);
-	input_free_polled_device(jornadakbd->poll_dev);
-	kfree(jornadakbd);
+	if (error) {
+		dev_err(&pdev->dev, "failed to register polled input device\n");
+		return error;
+	}
 
 	return 0;
 }
@@ -255,7 +240,6 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe	= jornada680kbd_probe,
-	.remove	= jornada680kbd_remove,
 };
 module_platform_driver(jornada680kbd_driver);
 

diff --git a/drivers/input/keyboard/mcs_touchkey.c b/drivers/input/keyboard/mcs_touchkey.c
index 1da8e0b..375b05c 100644
--- a/drivers/input/keyboard/mcs_touchkey.c
+++ b/drivers/input/keyboard/mcs_touchkey.c

@@ -147,7 +147,7 @@
 	}
 	dev_info(&client->dev, "Firmware version: %d\n", fw_ver);
 
-	input_dev->name = "MELPAS MCS Touchkey";
+	input_dev->name = "MELFAS MCS Touchkey";
 	input_dev->id.bustype = BUS_I2C;
 	input_dev->dev.parent = &client->dev;
 	input_dev->evbit[0] = BIT_MASK(EV_KEY);

diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 0400b3f..024b7bd 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c

@@ -28,11 +28,10 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 
-#include <linux/platform_data/omap4-keypad.h>
-
 /* OMAP4 registers */
 #define OMAP4_KBD_REVISION		0x00
 #define OMAP4_KBD_SYSCONFIG		0x10
@@ -218,7 +217,6 @@
 	pm_runtime_put_sync(input->dev.parent);
 }
 
-#ifdef CONFIG_OF
 static int omap4_keypad_parse_dt(struct device *dev,
 				 struct omap4_keypad *keypad_data)
 {
@@ -235,20 +233,9 @@
 
 	return 0;
 }
-#else
-static inline int omap4_keypad_parse_dt(struct device *dev,
-					struct omap4_keypad *keypad_data)
-{
-	return -ENOSYS;
-}
-#endif
 
 static int omap4_keypad_probe(struct platform_device *pdev)
 {
-	const struct omap4_keypad_platform_data *pdata =
-				dev_get_platdata(&pdev->dev);
-	const struct matrix_keymap_data *keymap_data =
-				pdata ? pdata->keymap_data : NULL;
 	struct omap4_keypad *keypad_data;
 	struct input_dev *input_dev;
 	struct resource *res;
@@ -277,14 +264,9 @@
 
 	keypad_data->irq = irq;
 
-	if (pdata) {
-		keypad_data->rows = pdata->rows;
-		keypad_data->cols = pdata->cols;
-	} else {
-		error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
-		if (error)
-			return error;
-	}
+	error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
+	if (error)
+		return error;
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (!res) {
@@ -363,7 +345,7 @@
 		goto err_free_input;
 	}
 
-	error = matrix_keypad_build_keymap(keymap_data, NULL,
+	error = matrix_keypad_build_keymap(NULL, NULL,
 					   keypad_data->rows, keypad_data->cols,
 					   keypad_data->keymap, input_dev);
 	if (error) {
@@ -434,13 +416,11 @@
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id omap_keypad_dt_match[] = {
 	{ .compatible = "ti,omap4-keypad" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_keypad_dt_match);
-#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int omap4_keypad_suspend(struct device *dev)
@@ -482,7 +462,7 @@
 		.name	= "omap4-keypad",
 		.owner	= THIS_MODULE,
 		.pm	= &omap4_keypad_pm_ops,
-		.of_match_table = of_match_ptr(omap_keypad_dt_match),
+		.of_match_table = omap_keypad_dt_match,
 	},
 };
 module_platform_driver(omap4_keypad_driver);

diff --git a/drivers/input/keyboard/st-keyscan.c b/drivers/input/keyboard/st-keyscan.c
new file mode 100644
index 0000000..758b487
--- /dev/null
+++ b/drivers/input/keyboard/st-keyscan.c

@@ -0,0 +1,274 @@
+/*
+ * STMicroelectronics Key Scanning driver
+ *
+ * Copyright (c) 2014 STMicroelectonics Ltd.
+ * Author: Stuart Menefy <stuart.menefy@st.com>
+ *
+ * Based on sh_keysc.c, copyright 2008 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/input/matrix_keypad.h>
+
+#define ST_KEYSCAN_MAXKEYS 16
+
+#define KEYSCAN_CONFIG_OFF		0x0
+#define KEYSCAN_CONFIG_ENABLE		0x1
+#define KEYSCAN_DEBOUNCE_TIME_OFF	0x4
+#define KEYSCAN_MATRIX_STATE_OFF	0x8
+#define KEYSCAN_MATRIX_DIM_OFF		0xc
+#define KEYSCAN_MATRIX_DIM_X_SHIFT	0x0
+#define KEYSCAN_MATRIX_DIM_Y_SHIFT	0x2
+
+struct st_keyscan {
+	void __iomem *base;
+	int irq;
+	struct clk *clk;
+	struct input_dev *input_dev;
+	unsigned long last_state;
+	unsigned int n_rows;
+	unsigned int n_cols;
+	unsigned int debounce_us;
+};
+
+static irqreturn_t keyscan_isr(int irq, void *dev_id)
+{
+	struct st_keyscan *keypad = dev_id;
+	unsigned short *keycode = keypad->input_dev->keycode;
+	unsigned long state, change;
+	int bit_nr;
+
+	state = readl(keypad->base + KEYSCAN_MATRIX_STATE_OFF) & 0xffff;
+	change = keypad->last_state ^ state;
+	keypad->last_state = state;
+
+	for_each_set_bit(bit_nr, &change, BITS_PER_LONG)
+		input_report_key(keypad->input_dev,
+				 keycode[bit_nr], state & BIT(bit_nr));
+
+	input_sync(keypad->input_dev);
+
+	return IRQ_HANDLED;
+}
+
+static int keyscan_start(struct st_keyscan *keypad)
+{
+	int error;
+
+	error = clk_enable(keypad->clk);
+	if (error)
+		return error;
+
+	writel(keypad->debounce_us * (clk_get_rate(keypad->clk) / 1000000),
+	       keypad->base + KEYSCAN_DEBOUNCE_TIME_OFF);
+
+	writel(((keypad->n_cols - 1) << KEYSCAN_MATRIX_DIM_X_SHIFT) |
+	       ((keypad->n_rows - 1) << KEYSCAN_MATRIX_DIM_Y_SHIFT),
+	       keypad->base + KEYSCAN_MATRIX_DIM_OFF);
+
+	writel(KEYSCAN_CONFIG_ENABLE, keypad->base + KEYSCAN_CONFIG_OFF);
+
+	return 0;
+}
+
+static void keyscan_stop(struct st_keyscan *keypad)
+{
+	writel(0, keypad->base + KEYSCAN_CONFIG_OFF);
+
+	clk_disable(keypad->clk);
+}
+
+static int keyscan_open(struct input_dev *dev)
+{
+	struct st_keyscan *keypad = input_get_drvdata(dev);
+
+	return keyscan_start(keypad);
+}
+
+static void keyscan_close(struct input_dev *dev)
+{
+	struct st_keyscan *keypad = input_get_drvdata(dev);
+
+	keyscan_stop(keypad);
+}
+
+static int keypad_matrix_key_parse_dt(struct st_keyscan *keypad_data)
+{
+	struct device *dev = keypad_data->input_dev->dev.parent;
+	struct device_node *np = dev->of_node;
+	int error;
+
+	error = matrix_keypad_parse_of_params(dev, &keypad_data->n_rows,
+					      &keypad_data->n_cols);
+	if (error) {
+		dev_err(dev, "failed to parse keypad params\n");
+		return error;
+	}
+
+	of_property_read_u32(np, "st,debounce-us", &keypad_data->debounce_us);
+
+	dev_dbg(dev, "n_rows=%d n_col=%d debounce=%d\n",
+		keypad_data->n_rows, keypad_data->n_cols,
+		keypad_data->debounce_us);
+
+	return 0;
+}
+
+static int keyscan_probe(struct platform_device *pdev)
+{
+	struct st_keyscan *keypad_data;
+	struct input_dev *input_dev;
+	struct resource *res;
+	int error;
+
+	if (!pdev->dev.of_node) {
+		dev_err(&pdev->dev, "no DT data present\n");
+		return -EINVAL;
+	}
+
+	keypad_data = devm_kzalloc(&pdev->dev, sizeof(*keypad_data),
+				   GFP_KERNEL);
+	if (!keypad_data)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!input_dev) {
+		dev_err(&pdev->dev, "failed to allocate the input device\n");
+		return -ENOMEM;
+	}
+
+	input_dev->name = pdev->name;
+	input_dev->phys = "keyscan-keys/input0";
+	input_dev->dev.parent = &pdev->dev;
+	input_dev->open = keyscan_open;
+	input_dev->close = keyscan_close;
+
+	input_dev->id.bustype = BUS_HOST;
+
+	error = keypad_matrix_key_parse_dt(keypad_data);
+	if (error)
+		return error;
+
+	error = matrix_keypad_build_keymap(NULL, NULL,
+					   keypad_data->n_rows,
+					   keypad_data->n_cols,
+					   NULL, input_dev);
+	if (error) {
+		dev_err(&pdev->dev, "failed to build keymap\n");
+		return error;
+	}
+
+	input_set_drvdata(input_dev, keypad_data);
+
+	keypad_data->input_dev = input_dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	keypad_data->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(keypad_data->base))
+		return PTR_ERR(keypad_data->base);
+
+	keypad_data->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(keypad_data->clk)) {
+		dev_err(&pdev->dev, "cannot get clock\n");
+		return PTR_ERR(keypad_data->clk);
+	}
+
+	error = clk_enable(keypad_data->clk);
+	if (error) {
+		dev_err(&pdev->dev, "failed to enable clock\n");
+		return error;
+	}
+
+	keyscan_stop(keypad_data);
+
+	keypad_data->irq = platform_get_irq(pdev, 0);
+	if (keypad_data->irq < 0) {
+		dev_err(&pdev->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	error = devm_request_irq(&pdev->dev, keypad_data->irq, keyscan_isr, 0,
+				 pdev->name, keypad_data);
+	if (error) {
+		dev_err(&pdev->dev, "failed to request IRQ\n");
+		return error;
+	}
+
+	error = input_register_device(input_dev);
+	if (error) {
+		dev_err(&pdev->dev, "failed to register input device\n");
+		return error;
+	}
+
+	platform_set_drvdata(pdev, keypad_data);
+
+	device_set_wakeup_capable(&pdev->dev, 1);
+
+	return 0;
+}
+
+static int keyscan_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct st_keyscan *keypad = platform_get_drvdata(pdev);
+	struct input_dev *input = keypad->input_dev;
+
+	mutex_lock(&input->mutex);
+
+	if (device_may_wakeup(dev))
+		enable_irq_wake(keypad->irq);
+	else if (input->users)
+		keyscan_stop(keypad);
+
+	mutex_unlock(&input->mutex);
+	return 0;
+}
+
+static int keyscan_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct st_keyscan *keypad = platform_get_drvdata(pdev);
+	struct input_dev *input = keypad->input_dev;
+	int retval = 0;
+
+	mutex_lock(&input->mutex);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(keypad->irq);
+	else if (input->users)
+		retval = keyscan_start(keypad);
+
+	mutex_unlock(&input->mutex);
+	return retval;
+}
+
+static SIMPLE_DEV_PM_OPS(keyscan_dev_pm_ops, keyscan_suspend, keyscan_resume);
+
+static const struct of_device_id keyscan_of_match[] = {
+	{ .compatible = "st,sti-keyscan" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, keyscan_of_match);
+
+static struct platform_driver keyscan_device_driver = {
+	.probe		= keyscan_probe,
+	.driver		= {
+		.name	= "st-keyscan",
+		.pm	= &keyscan_dev_pm_ops,
+		.of_match_table = of_match_ptr(keyscan_of_match),
+	}
+};
+
+module_platform_driver(keyscan_device_driver);
+
+MODULE_AUTHOR("Stuart Menefy <stuart.menefy@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics keyscan device driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c
index 74494a3..ad7abae 100644
--- a/drivers/input/keyboard/tc3589x-keypad.c
+++ b/drivers/input/keyboard/tc3589x-keypad.c

@@ -296,6 +296,65 @@
 	tc3589x_keypad_disable(keypad);
 }
 
+#ifdef CONFIG_OF
+static const struct tc3589x_keypad_platform_data *
+tc3589x_keypad_of_probe(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct tc3589x_keypad_platform_data *plat;
+	u32 cols, rows;
+	u32 debounce_ms;
+	int proplen;
+
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL);
+	if (!plat)
+		return ERR_PTR(-ENOMEM);
+
+	of_property_read_u32(np, "keypad,num-columns", &cols);
+	of_property_read_u32(np, "keypad,num-rows", &rows);
+	plat->kcol = (u8) cols;
+	plat->krow = (u8) rows;
+	if (!plat->krow || !plat->kcol ||
+	     plat->krow > TC_KPD_ROWS || plat->kcol > TC_KPD_COLUMNS) {
+		dev_err(dev,
+			"keypad columns/rows not properly specified (%ux%u)\n",
+			plat->kcol, plat->krow);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!of_get_property(np, "linux,keymap", &proplen)) {
+		dev_err(dev, "property linux,keymap not found\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	plat->no_autorepeat = of_property_read_bool(np, "linux,no-autorepeat");
+	plat->enable_wakeup = of_property_read_bool(np, "linux,wakeup");
+
+	/* The custom delay format is ms/16 */
+	of_property_read_u32(np, "debounce-delay-ms", &debounce_ms);
+	if (debounce_ms)
+		plat->debounce_period = debounce_ms * 16;
+	else
+		plat->debounce_period = TC_KPD_DEBOUNCE_PERIOD;
+
+	plat->settle_time = TC_KPD_SETTLE_TIME;
+	/* FIXME: should be property of the IRQ resource? */
+	plat->irqtype = IRQF_TRIGGER_FALLING;
+
+	return plat;
+}
+#else
+static inline const struct tc3589x_keypad_platform_data *
+tc3589x_keypad_of_probe(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+
 static int tc3589x_keypad_probe(struct platform_device *pdev)
 {
 	struct tc3589x *tc3589x = dev_get_drvdata(pdev->dev.parent);
@@ -306,8 +365,11 @@
 
 	plat = tc3589x->pdata->keypad;
 	if (!plat) {
-		dev_err(&pdev->dev, "invalid keypad platform data\n");
-		return -EINVAL;
+		plat = tc3589x_keypad_of_probe(&pdev->dev);
+		if (IS_ERR(plat)) {
+			dev_err(&pdev->dev, "invalid keypad platform data\n");
+			return PTR_ERR(plat);
+		}
 	}
 
 	irq = platform_get_irq(pdev, 0);

diff --git a/drivers/input/misc/88pm860x_onkey.c b/drivers/input/misc/88pm860x_onkey.c
index abd8453..220ce0f 100644
--- a/drivers/input/misc/88pm860x_onkey.c
+++ b/drivers/input/misc/88pm860x_onkey.c

@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/88pm860x.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 #define PM8607_WAKEUP		0x0b
 
@@ -68,7 +69,8 @@
 		return -EINVAL;
 	}
 
-	info = kzalloc(sizeof(struct pm860x_onkey_info), GFP_KERNEL);
+	info = devm_kzalloc(&pdev->dev, sizeof(struct pm860x_onkey_info),
+			    GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 	info->chip = chip;
@@ -76,11 +78,10 @@
 	info->dev = &pdev->dev;
 	info->irq = irq;
 
-	info->idev = input_allocate_device();
+	info->idev = devm_input_allocate_device(&pdev->dev);
 	if (!info->idev) {
 		dev_err(chip->dev, "Failed to allocate input dev\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	info->idev->name = "88pm860x_on";
@@ -93,42 +94,22 @@
 	ret = input_register_device(info->idev);
 	if (ret) {
 		dev_err(chip->dev, "Can't register input device: %d\n", ret);
-		goto out_reg;
+		return ret;
 	}
 
-	ret = request_threaded_irq(info->irq, NULL, pm860x_onkey_handler,
-				   IRQF_ONESHOT, "onkey", info);
+	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+					pm860x_onkey_handler, IRQF_ONESHOT,
+					"onkey", info);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
 			info->irq, ret);
-		goto out_irq;
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, info);
 	device_init_wakeup(&pdev->dev, 1);
 
 	return 0;
-
-out_irq:
-	input_unregister_device(info->idev);
-	kfree(info);
-	return ret;
-
-out_reg:
-	input_free_device(info->idev);
-out:
-	kfree(info);
-	return ret;
-}
-
-static int pm860x_onkey_remove(struct platform_device *pdev)
-{
-	struct pm860x_onkey_info *info = platform_get_drvdata(pdev);
-
-	free_irq(info->irq, info);
-	input_unregister_device(info->idev);
-	kfree(info);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -161,7 +142,6 @@
 		.pm	= &pm860x_onkey_pm_ops,
 	},
 	.probe		= pm860x_onkey_probe,
-	.remove		= pm860x_onkey_remove,
 };
 module_platform_driver(pm860x_onkey_driver);
 

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 5928ea7..2ff4425 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig

@@ -224,7 +224,7 @@
 
 config INPUT_GPIO_BEEPER
 	tristate "Generic GPIO Beeper support"
-	depends on OF_GPIO
+	depends on GPIOLIB
 	help
 	  Say Y here if you have a beeper connected to a GPIO pin.
 

diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index f2fbdd8..95ef7dd 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c

@@ -7,6 +7,7 @@
  * AB8500 Power-On Key handler
  */
 
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -65,12 +66,14 @@
 		return irq_dbr;
 	}
 
-	ponkey = kzalloc(sizeof(struct ab8500_ponkey), GFP_KERNEL);
-	input = input_allocate_device();
-	if (!ponkey || !input) {
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	ponkey = devm_kzalloc(&pdev->dev, sizeof(struct ab8500_ponkey),
+			      GFP_KERNEL);
+	if (!ponkey)
+		return -ENOMEM;
+
+	input = devm_input_allocate_device(&pdev->dev);
+	if (!input)
+		return -ENOMEM;
 
 	ponkey->idev = input;
 	ponkey->ab8500 = ab8500;
@@ -82,52 +85,32 @@
 
 	input_set_capability(input, EV_KEY, KEY_POWER);
 
-	error = request_any_context_irq(ponkey->irq_dbf, ab8500_ponkey_handler,
-					0, "ab8500-ponkey-dbf", ponkey);
+	error = devm_request_any_context_irq(&pdev->dev, ponkey->irq_dbf,
+					     ab8500_ponkey_handler, 0,
+					     "ab8500-ponkey-dbf", ponkey);
 	if (error < 0) {
 		dev_err(ab8500->dev, "Failed to request dbf IRQ#%d: %d\n",
 			ponkey->irq_dbf, error);
-		goto err_free_mem;
+		return error;
 	}
 
-	error = request_any_context_irq(ponkey->irq_dbr, ab8500_ponkey_handler,
-					0, "ab8500-ponkey-dbr", ponkey);
+	error = devm_request_any_context_irq(&pdev->dev, ponkey->irq_dbr,
+					     ab8500_ponkey_handler, 0,
+					     "ab8500-ponkey-dbr", ponkey);
 	if (error < 0) {
 		dev_err(ab8500->dev, "Failed to request dbr IRQ#%d: %d\n",
 			ponkey->irq_dbr, error);
-		goto err_free_dbf_irq;
+		return error;
 	}
 
 	error = input_register_device(ponkey->idev);
 	if (error) {
 		dev_err(ab8500->dev, "Can't register input device: %d\n", error);
-		goto err_free_dbr_irq;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, ponkey);
 	return 0;
-
-err_free_dbr_irq:
-	free_irq(ponkey->irq_dbr, ponkey);
-err_free_dbf_irq:
-	free_irq(ponkey->irq_dbf, ponkey);
-err_free_mem:
-	input_free_device(input);
-	kfree(ponkey);
-
-	return error;
-}
-
-static int ab8500_ponkey_remove(struct platform_device *pdev)
-{
-	struct ab8500_ponkey *ponkey = platform_get_drvdata(pdev);
-
-	free_irq(ponkey->irq_dbf, ponkey);
-	free_irq(ponkey->irq_dbr, ponkey);
-	input_unregister_device(ponkey->idev);
-	kfree(ponkey);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -144,7 +127,6 @@
 		.of_match_table = of_match_ptr(ab8500_ponkey_match),
 	},
 	.probe		= ab8500_ponkey_probe,
-	.remove		= ab8500_ponkey_remove,
 };
 module_platform_driver(ab8500_ponkey_driver);
 

diff --git a/drivers/input/misc/gpio-beeper.c b/drivers/input/misc/gpio-beeper.c
index b757435..8886af6 100644
--- a/drivers/input/misc/gpio-beeper.c
+++ b/drivers/input/misc/gpio-beeper.c

@@ -1,7 +1,7 @@
 /*
  * Generic GPIO beeper driver
  *
- * Copyright (C) 2013 Alexander Shiyan <shc_work@mail.ru>
+ * Copyright (C) 2013-2014 Alexander Shiyan <shc_work@mail.ru>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -11,7 +11,8 @@
 
 #include <linux/input.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/of.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
 
@@ -19,14 +20,13 @@
 
 struct gpio_beeper {
 	struct work_struct	work;
-	int			gpio;
-	bool			active_low;
+	struct gpio_desc	*desc;
 	bool			beeping;
 };
 
 static void gpio_beeper_toggle(struct gpio_beeper *beep, bool on)
 {
-	gpio_set_value_cansleep(beep->gpio, on ^ beep->active_low);
+	gpiod_set_value_cansleep(beep->desc, on);
 }
 
 static void gpio_beeper_work(struct work_struct *work)
@@ -65,18 +65,16 @@
 static int gpio_beeper_probe(struct platform_device *pdev)
 {
 	struct gpio_beeper *beep;
-	enum of_gpio_flags flags;
 	struct input_dev *input;
-	unsigned long gflags;
 	int err;
 
 	beep = devm_kzalloc(&pdev->dev, sizeof(*beep), GFP_KERNEL);
 	if (!beep)
 		return -ENOMEM;
 
-	beep->gpio = of_get_gpio_flags(pdev->dev.of_node, 0, &flags);
-	if (!gpio_is_valid(beep->gpio))
-		return beep->gpio;
+	beep->desc = devm_gpiod_get(&pdev->dev, NULL);
+	if (IS_ERR(beep->desc))
+		return PTR_ERR(beep->desc);
 
 	input = devm_input_allocate_device(&pdev->dev);
 	if (!input)
@@ -94,10 +92,7 @@
 
 	input_set_capability(input, EV_SND, SND_BELL);
 
-	beep->active_low = flags & OF_GPIO_ACTIVE_LOW;
-	gflags = beep->active_low ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
-
-	err = devm_gpio_request_one(&pdev->dev, beep->gpio, gflags, pdev->name);
+	err = gpiod_direction_output(beep->desc, 0);
 	if (err)
 		return err;
 
@@ -106,17 +101,19 @@
 	return input_register_device(input);
 }
 
-static struct of_device_id gpio_beeper_of_match[] = {
+#ifdef CONFIG_OF
+static const struct of_device_id gpio_beeper_of_match[] = {
 	{ .compatible = BEEPER_MODNAME, },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, gpio_beeper_of_match);
+#endif
 
 static struct platform_driver gpio_beeper_platform_driver = {
 	.driver	= {
 		.name		= BEEPER_MODNAME,
 		.owner		= THIS_MODULE,
-		.of_match_table	= gpio_beeper_of_match,
+		.of_match_table	= of_match_ptr(gpio_beeper_of_match),
 	},
 	.probe	= gpio_beeper_probe,
 };

diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index 5a73639..719410f 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c

@@ -1566,6 +1566,7 @@
 	if (!pcu->urb_ctrl_buf) {
 		dev_err(pcu->dev,
 			"Failed to allocate memory for read buffer\n");
+		error = -ENOMEM;
 		goto err_free_urb_out_buf;
 	}
 

diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
index eef41cf..3809618 100644
--- a/drivers/input/misc/max8925_onkey.c
+++ b/drivers/input/misc/max8925_onkey.c

@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/max8925.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 #define SW_INPUT		(1 << 7)	/* 0/1 -- up/down */
 #define HARDRESET_EN		(1 << 7)
@@ -81,12 +82,14 @@
 		return -EINVAL;
 	}
 
-	info = kzalloc(sizeof(struct max8925_onkey_info), GFP_KERNEL);
-	input = input_allocate_device();
-	if (!info || !input) {
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	info = devm_kzalloc(&pdev->dev, sizeof(struct max8925_onkey_info),
+			    GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	input = devm_input_allocate_device(&pdev->dev);
+	if (!input)
+		return -ENOMEM;
 
 	info->idev = input;
 	info->i2c = chip->i2c;
@@ -100,55 +103,34 @@
 	input->dev.parent = &pdev->dev;
 	input_set_capability(input, EV_KEY, KEY_POWER);
 
-	error = request_threaded_irq(irq[0], NULL, max8925_onkey_handler,
-				     IRQF_ONESHOT, "onkey-down", info);
+	error = devm_request_threaded_irq(&pdev->dev, irq[0], NULL,
+					  max8925_onkey_handler, IRQF_ONESHOT,
+					  "onkey-down", info);
 	if (error < 0) {
 		dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
 			irq[0], error);
-		goto err_free_mem;
+		return error;
 	}
 
-	error = request_threaded_irq(irq[1], NULL, max8925_onkey_handler,
-				     IRQF_ONESHOT, "onkey-up", info);
+	error = devm_request_threaded_irq(&pdev->dev, irq[1], NULL,
+					  max8925_onkey_handler, IRQF_ONESHOT,
+					  "onkey-up", info);
 	if (error < 0) {
 		dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
 			irq[1], error);
-		goto err_free_irq0;
+		return error;
 	}
 
 	error = input_register_device(info->idev);
 	if (error) {
 		dev_err(chip->dev, "Can't register input device: %d\n", error);
-		goto err_free_irq1;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, info);
 	device_init_wakeup(&pdev->dev, 1);
 
 	return 0;
-
-err_free_irq1:
-	free_irq(irq[1], info);
-err_free_irq0:
-	free_irq(irq[0], info);
-err_free_mem:
-	input_free_device(input);
-	kfree(info);
-
-	return error;
-}
-
-static int max8925_onkey_remove(struct platform_device *pdev)
-{
-	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
-	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
-
-	free_irq(info->irq[0] + chip->irq_base, info);
-	free_irq(info->irq[1] + chip->irq_base, info);
-	input_unregister_device(info->idev);
-	kfree(info);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -190,7 +172,6 @@
 		.pm	= &max8925_onkey_pm_ops,
 	},
 	.probe		= max8925_onkey_probe,
-	.remove		= max8925_onkey_remove,
 };
 module_platform_driver(max8925_onkey_driver);
 

diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index 1fea548..a363ebb 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c

@@ -181,11 +181,21 @@
 	}
 
 	if (!chip->enabled) {
-		chip->enabled = true;
-		regulator_enable(chip->regulator);
+		error = regulator_enable(chip->regulator);
+		if (error) {
+			dev_err(chip->dev, "Failed to enable regulator\n");
+			goto out;
+		}
 		max8997_haptic_configure(chip);
-		if (chip->mode == MAX8997_EXTERNAL_MODE)
-			pwm_enable(chip->pwm);
+		if (chip->mode == MAX8997_EXTERNAL_MODE) {
+			error = pwm_enable(chip->pwm);
+			if (error) {
+				dev_err(chip->dev, "Failed to enable PWM\n");
+				regulator_disable(chip->regulator);
+				goto out;
+			}
+		}
+		chip->enabled = true;
 	}
 
 out:

diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index 1cb8fda..c91e3d3 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c

@@ -92,15 +92,15 @@
 	bool pull_up;
 
 	if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay))
-		kpd_delay = 0;
+		kpd_delay = 15625;
 
-	pull_up = of_property_read_bool(pdev->dev.of_node, "pull-up");
-
-	if (kpd_delay > 62500) {
+	if (kpd_delay > 62500 || kpd_delay == 0) {
 		dev_err(&pdev->dev, "invalid power key trigger delay\n");
 		return -EINVAL;
 	}
 
+	pull_up = of_property_read_bool(pdev->dev.of_node, "pull-up");
+
 	regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!regmap) {
 		dev_err(&pdev->dev, "failed to locate regmap for the device\n");

diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 99b9e42..93558a1c 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c

@@ -143,7 +143,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id rotary_encoder_of_match[] = {
+static const struct of_device_id rotary_encoder_of_match[] = {
 	{ .compatible = "rotary-encoder", },
 	{ },
 };

diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index 20c80f5..5a6334b 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c

@@ -17,7 +17,6 @@
 #include <linux/acpi.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio_keys.h>
-#include <linux/input.h>
 #include <linux/platform_device.h>
 #include <linux/pnp.h>
 

diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 77dc23b..6d26eec 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c

@@ -262,7 +262,7 @@
 	struct vibra_info *info;
 	int vddvibl_uV = 0;
 	int vddvibr_uV = 0;
-	int ret;
+	int error;
 
 	twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
 						 "vibra");
@@ -309,12 +309,12 @@
 
 	mutex_init(&info->mutex);
 
-	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
-					twl6040_vib_irq_handler, 0,
-					"twl6040_irq_vib", info);
-	if (ret) {
-		dev_err(info->dev, "VIB IRQ request failed: %d\n", ret);
-		return ret;
+	error = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+					  twl6040_vib_irq_handler, 0,
+					  "twl6040_irq_vib", info);
+	if (error) {
+		dev_err(info->dev, "VIB IRQ request failed: %d\n", error);
+		return error;
 	}
 
 	info->supplies[0].supply = "vddvibl";
@@ -323,40 +323,40 @@
 	 * When booted with Device tree the regulators are attached to the
 	 * parent device (twl6040 MFD core)
 	 */
-	ret = regulator_bulk_get(twl6040_core_dev, ARRAY_SIZE(info->supplies),
-				 info->supplies);
-	if (ret) {
-		dev_err(info->dev, "couldn't get regulators %d\n", ret);
-		return ret;
+	error = devm_regulator_bulk_get(twl6040_core_dev,
+					ARRAY_SIZE(info->supplies),
+					info->supplies);
+	if (error) {
+		dev_err(info->dev, "couldn't get regulators %d\n", error);
+		return error;
 	}
 
 	if (vddvibl_uV) {
-		ret = regulator_set_voltage(info->supplies[0].consumer,
-					    vddvibl_uV, vddvibl_uV);
-		if (ret) {
+		error = regulator_set_voltage(info->supplies[0].consumer,
+					      vddvibl_uV, vddvibl_uV);
+		if (error) {
 			dev_err(info->dev, "failed to set VDDVIBL volt %d\n",
-				ret);
-			goto err_regulator;
+				error);
+			return error;
 		}
 	}
 
 	if (vddvibr_uV) {
-		ret = regulator_set_voltage(info->supplies[1].consumer,
-					    vddvibr_uV, vddvibr_uV);
-		if (ret) {
+		error = regulator_set_voltage(info->supplies[1].consumer,
+					      vddvibr_uV, vddvibr_uV);
+		if (error) {
 			dev_err(info->dev, "failed to set VDDVIBR volt %d\n",
-				ret);
-			goto err_regulator;
+				error);
+			return error;
 		}
 	}
 
 	INIT_WORK(&info->play_work, vibra_play_work);
 
-	info->input_dev = input_allocate_device();
-	if (info->input_dev == NULL) {
+	info->input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!info->input_dev) {
 		dev_err(info->dev, "couldn't allocate input device\n");
-		ret = -ENOMEM;
-		goto err_regulator;
+		return -ENOMEM;
 	}
 
 	input_set_drvdata(info->input_dev, info);
@@ -367,44 +367,25 @@
 	info->input_dev->close = twl6040_vibra_close;
 	__set_bit(FF_RUMBLE, info->input_dev->ffbit);
 
-	ret = input_ff_create_memless(info->input_dev, NULL, vibra_play);
-	if (ret < 0) {
+	error = input_ff_create_memless(info->input_dev, NULL, vibra_play);
+	if (error) {
 		dev_err(info->dev, "couldn't register vibrator to FF\n");
-		goto err_ialloc;
+		return error;
 	}
 
-	ret = input_register_device(info->input_dev);
-	if (ret < 0) {
+	error = input_register_device(info->input_dev);
+	if (error) {
 		dev_err(info->dev, "couldn't register input device\n");
-		goto err_iff;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, info);
 
 	return 0;
-
-err_iff:
-	input_ff_destroy(info->input_dev);
-err_ialloc:
-	input_free_device(info->input_dev);
-err_regulator:
-	regulator_bulk_free(ARRAY_SIZE(info->supplies), info->supplies);
-	return ret;
-}
-
-static int twl6040_vibra_remove(struct platform_device *pdev)
-{
-	struct vibra_info *info = platform_get_drvdata(pdev);
-
-	input_unregister_device(info->input_dev);
-	regulator_bulk_free(ARRAY_SIZE(info->supplies), info->supplies);
-
-	return 0;
 }
 
 static struct platform_driver twl6040_vibra_driver = {
 	.probe		= twl6040_vibra_probe,
-	.remove		= twl6040_vibra_remove,
 	.driver		= {
 		.name	= "twl6040-vibra",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 6b8441f..366fc7a 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig

@@ -53,7 +53,7 @@
 	default y
 	depends on MOUSE_PS2
 	help
-	  Say Y here if you have a Logictech PS/2++ mouse connected to
+	  Say Y here if you have a Logitech PS/2++ mouse connected to
 	  your system.
 
 	  If unsure, say Y.

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index b96e978..ee2a04d 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c

@@ -473,8 +473,15 @@
 	input_report_key(dev, BTN_TOOL_FINGER, fingers == 1);
 	input_report_key(dev, BTN_TOOL_DOUBLETAP, fingers == 2);
 	input_report_key(dev, BTN_TOOL_TRIPLETAP, fingers == 3);
-	input_report_key(dev, BTN_LEFT, packet[0] & 0x01);
-	input_report_key(dev, BTN_RIGHT, packet[0] & 0x02);
+
+	/* For clickpads map both buttons to BTN_LEFT */
+	if (etd->fw_version & 0x001000) {
+		input_report_key(dev, BTN_LEFT, packet[0] & 0x03);
+	} else {
+		input_report_key(dev, BTN_LEFT, packet[0] & 0x01);
+		input_report_key(dev, BTN_RIGHT, packet[0] & 0x02);
+	}
+
 	input_report_abs(dev, ABS_PRESSURE, pres);
 	input_report_abs(dev, ABS_TOOL_WIDTH, width);
 
@@ -484,10 +491,17 @@
 static void elantech_input_sync_v4(struct psmouse *psmouse)
 {
 	struct input_dev *dev = psmouse->dev;
+	struct elantech_data *etd = psmouse->private;
 	unsigned char *packet = psmouse->packet;
 
-	input_report_key(dev, BTN_LEFT, packet[0] & 0x01);
-	input_report_key(dev, BTN_RIGHT, packet[0] & 0x02);
+	/* For clickpads map both buttons to BTN_LEFT */
+	if (etd->fw_version & 0x001000) {
+		input_report_key(dev, BTN_LEFT, packet[0] & 0x03);
+	} else {
+		input_report_key(dev, BTN_LEFT, packet[0] & 0x01);
+		input_report_key(dev, BTN_RIGHT, packet[0] & 0x02);
+	}
+
 	input_mt_report_pointer_emulation(dev, true);
 	input_sync(dev);
 }
@@ -835,7 +849,7 @@
 		if (etd->set_hw_resolution)
 			etd->reg_10 = 0x0b;
 		else
-			etd->reg_10 = 0x03;
+			etd->reg_10 = 0x01;
 
 		if (elantech_write_reg(psmouse, 0x10, etd->reg_10))
 			rc = -1;
@@ -1336,7 +1350,8 @@
 }
 
 /*
- * Some hw_version 3 models go into error state when we try to set bit 3 of r10
+ * Some hw_version 3 models go into error state when we try to set
+ * bit 3 and/or bit 1 of r10.
  */
 static const struct dmi_system_id no_hw_res_dmi_table[] = {
 #if defined(CONFIG_DMI) && defined(CONFIG_X86)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index c5ec703..ec772d9 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c

@@ -347,15 +347,6 @@
 	unsigned char resp[3];
 	int i;
 
-	for (i = 0; min_max_pnpid_table[i].pnp_ids; i++)
-		if (matches_pnp_id(psmouse, min_max_pnpid_table[i].pnp_ids)) {
-			priv->x_min = min_max_pnpid_table[i].x_min;
-			priv->x_max = min_max_pnpid_table[i].x_max;
-			priv->y_min = min_max_pnpid_table[i].y_min;
-			priv->y_max = min_max_pnpid_table[i].y_max;
-			return 0;
-		}
-
 	if (SYN_ID_MAJOR(priv->identity) < 4)
 		return 0;
 
@@ -366,6 +357,16 @@
 		}
 	}
 
+	for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) {
+		if (matches_pnp_id(psmouse, min_max_pnpid_table[i].pnp_ids)) {
+			priv->x_min = min_max_pnpid_table[i].x_min;
+			priv->x_max = min_max_pnpid_table[i].x_max;
+			priv->y_min = min_max_pnpid_table[i].y_min;
+			priv->y_max = min_max_pnpid_table[i].y_max;
+			return 0;
+		}
+	}
+
 	if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 &&
 	    SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) {
 		if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) {

diff --git a/drivers/input/serio/apbps2.c b/drivers/input/serio/apbps2.c
index 17e01a8..98be824 100644
--- a/drivers/input/serio/apbps2.c
+++ b/drivers/input/serio/apbps2.c

@@ -203,7 +203,7 @@
 	return 0;
 }
 
-static struct of_device_id apbps2_of_match[] = {
+static const struct of_device_id apbps2_of_match[] = {
 	{ .name = "GAISLER_APBPS2", },
 	{ .name = "01_060", },
 	{}

diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c
index 5d2fe7e..d906f3e 100644
--- a/drivers/input/serio/olpc_apsp.c
+++ b/drivers/input/serio/olpc_apsp.c

@@ -262,7 +262,7 @@
 	return 0;
 }
 
-static struct of_device_id olpc_apsp_dt_ids[] = {
+static const struct of_device_id olpc_apsp_dt_ids[] = {
 	{ .compatible = "olpc,ap-sp", },
 	{}
 };

diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 611fc39..2c613cd 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c

@@ -349,6 +349,7 @@
 						break;
 
 					case MTTPC:
+					case MTTPC_B:
 						features->pktlen = WACOM_PKGLEN_MTTPC;
 						break;
 
@@ -380,6 +381,16 @@
 						i += 12;
 						break;
 
+					case MTTPC_B:
+						features->x_max =
+							get_unaligned_le16(&report[i + 3]);
+						features->x_phy =
+							get_unaligned_le16(&report[i + 6]);
+						features->unit = report[i - 5];
+						features->unitExpo = report[i - 3];
+						i += 9;
+						break;
+
 					default:
 						features->x_max =
 							get_unaligned_le16(&report[i + 3]);
@@ -430,6 +441,14 @@
 						i += 12;
 						break;
 
+					case MTTPC_B:
+						features->y_max =
+							get_unaligned_le16(&report[i + 3]);
+						features->y_phy =
+							get_unaligned_le16(&report[i + 6]);
+						i += 9;
+						break;
+
 					default:
 						features->y_max =
 							features->x_max;

diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 4822c57..977d05c 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c

@@ -484,6 +484,8 @@
 		input_report_key(input, BTN_TOUCH, 0);
 		input_report_abs(input, ABS_PRESSURE, 0);
 		input_report_abs(input, ABS_DISTANCE, wacom->features.distance_max);
+		if (features->quirks & WACOM_QUIRK_MULTI_INPUT)
+			wacom->shared->stylus_in_proximity = true;
 	}
 
 	/* Exit report */
@@ -928,12 +930,12 @@
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 
 		if (touch) {
-			int t_x = le16_to_cpup((__le16 *)&data[offset + 2]);
-			int c_x = le16_to_cpup((__le16 *)&data[offset + 4]);
-			int t_y = le16_to_cpup((__le16 *)&data[offset + 6]);
-			int c_y = le16_to_cpup((__le16 *)&data[offset + 8]);
-			int w = le16_to_cpup((__le16 *)&data[offset + 10]);
-			int h = le16_to_cpup((__le16 *)&data[offset + 12]);
+			int t_x = get_unaligned_le16(&data[offset + 2]);
+			int c_x = get_unaligned_le16(&data[offset + 4]);
+			int t_y = get_unaligned_le16(&data[offset + 6]);
+			int c_y = get_unaligned_le16(&data[offset + 8]);
+			int w = get_unaligned_le16(&data[offset + 10]);
+			int h = get_unaligned_le16(&data[offset + 12]);
 
 			input_report_abs(input, ABS_MT_POSITION_X, t_x);
 			input_report_abs(input, ABS_MT_POSITION_Y, t_y);
@@ -962,7 +964,7 @@
 	int x_offset = 0;
 
 	/* MTTPC does not support Height and Width */
-	if (wacom->features.type == MTTPC)
+	if (wacom->features.type == MTTPC || wacom->features.type == MTTPC_B)
 		x_offset = -4;
 
 	/*
@@ -978,7 +980,7 @@
 	for (i = 0; i < contacts_to_send; i++) {
 		int offset = (WACOM_BYTES_PER_MT_PACKET + x_offset) * i + 3;
 		bool touch = data[offset] & 0x1;
-		int id = le16_to_cpup((__le16 *)&data[offset + 1]);
+		int id = get_unaligned_le16(&data[offset + 1]);
 		int slot = input_mt_get_slot_by_key(input, id);
 
 		if (slot < 0)
@@ -987,8 +989,8 @@
 		input_mt_slot(input, slot);
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 		if (touch) {
-			int x = le16_to_cpup((__le16 *)&data[offset + x_offset + 7]);
-			int y = le16_to_cpup((__le16 *)&data[offset + x_offset + 9]);
+			int x = get_unaligned_le16(&data[offset + x_offset + 7]);
+			int y = get_unaligned_le16(&data[offset + x_offset + 9]);
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
 		}
@@ -1047,6 +1049,10 @@
 			prox = data[0] & 0x01;
 			x = get_unaligned_le16(&data[1]);
 			y = get_unaligned_le16(&data[3]);
+		} else if (len == WACOM_PKGLEN_TPC1FG_B) {
+			prox = data[2] & 0x01;
+			x = get_unaligned_le16(&data[3]);
+			y = get_unaligned_le16(&data[5]);
 		} else {
 			prox = data[1] & 0x01;
 			x = le16_to_cpup((__le16 *)&data[2]);
@@ -1110,6 +1116,9 @@
 	case WACOM_PKGLEN_TPC2FG:
 		return wacom_tpc_mt_touch(wacom);
 
+	case WACOM_PKGLEN_PENABLED:
+		return wacom_tpc_pen(wacom);
+
 	default:
 		switch (data[0]) {
 		case WACOM_REPORT_TPC1FG:
@@ -1119,6 +1128,7 @@
 			return wacom_tpc_single_touch(wacom, len);
 
 		case WACOM_REPORT_TPCMT:
+		case WACOM_REPORT_TPCMT2:
 			return wacom_mt_touch(wacom);
 
 		case WACOM_REPORT_PENABLED:
@@ -1461,6 +1471,7 @@
 	case TABLETPC2FG:
 	case MTSCREEN:
 	case MTTPC:
+	case MTTPC_B:
 		sync = wacom_tpc_irq(wacom_wac, len);
 		break;
 
@@ -1565,10 +1576,10 @@
 	struct wacom_features *features = &wacom_wac->features;
 
 	if (features->device_type == BTN_TOOL_PEN) {
-		input_set_abs_params(input_dev, ABS_X, 0, features->x_max,
-				     features->x_fuzz, 0);
-		input_set_abs_params(input_dev, ABS_Y, 0, features->y_max,
-				     features->y_fuzz, 0);
+		input_set_abs_params(input_dev, ABS_X, features->x_min,
+				     features->x_max, features->x_fuzz, 0);
+		input_set_abs_params(input_dev, ABS_Y, features->y_min,
+				     features->y_max, features->y_fuzz, 0);
 		input_set_abs_params(input_dev, ABS_PRESSURE, 0,
 			features->pressure_max, features->pressure_fuzz, 0);
 
@@ -1802,6 +1813,7 @@
 
 	case MTSCREEN:
 	case MTTPC:
+	case MTTPC_B:
 	case TABLETPC2FG:
 		if (features->device_type == BTN_TOOL_FINGER) {
 			unsigned int flags = INPUT_MT_DIRECT;
@@ -2123,11 +2135,11 @@
 	  63, INTUOSPL, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
 	  .touch_max = 16 };
 static const struct wacom_features wacom_features_0xF4 =
-	{ "Wacom Cintiq 24HD",       WACOM_PKGLEN_INTUOS,   104480, 65600, 2047,
-	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+	{ "Wacom Cintiq 24HD",       WACOM_PKGLEN_INTUOS,   104280, 65400, 2047,
+	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
 static const struct wacom_features wacom_features_0xF8 =
-	{ "Wacom Cintiq 24HD touch", WACOM_PKGLEN_INTUOS,   104480, 65600, 2047, /* Pen */
-	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	{ "Wacom Cintiq 24HD touch", WACOM_PKGLEN_INTUOS,   104280, 65400, 2047, /* Pen */
+	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200,
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf6 };
 static const struct wacom_features wacom_features_0xF6 =
 	{ "Wacom Cintiq 24HD touch", .type = WACOM_24HDT, /* Touch */
@@ -2142,8 +2154,8 @@
 	{ "Wacom Cintiq 12WX",    WACOM_PKGLEN_INTUOS,    53020, 33440, 1023,
 	  63, WACOM_BEE, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
 static const struct wacom_features wacom_features_0x304 =
-	{ "Wacom Cintiq 13HD",    WACOM_PKGLEN_INTUOS,    59552, 33848, 1023,
-	  63, WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+	{ "Wacom Cintiq 13HD",    WACOM_PKGLEN_INTUOS,    59352, 33648, 1023,
+	  63, WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
 static const struct wacom_features wacom_features_0xC7 =
 	{ "Wacom DTU1931",        WACOM_PKGLEN_GRAPHIRE,  37832, 30305,  511,
 	  0, PL, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2157,24 +2169,24 @@
 	{ "Wacom DTU1031",        WACOM_PKGLEN_DTUS,      22096, 13960,  511,
 	  0, DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x57 =
-	{ "Wacom DTK2241",        WACOM_PKGLEN_INTUOS,    95840, 54260, 2047,
-	  63, DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES};
+	{ "Wacom DTK2241",        WACOM_PKGLEN_INTUOS,    95640, 54060, 2047,
+	  63, DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
 static const struct wacom_features wacom_features_0x59 = /* Pen */
-	{ "Wacom DTH2242",        WACOM_PKGLEN_INTUOS,    95840, 54260, 2047,
-	  63, DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	{ "Wacom DTH2242",        WACOM_PKGLEN_INTUOS,    95640, 54060, 2047,
+	  63, DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200,
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5D };
 static const struct wacom_features wacom_features_0x5D = /* Touch */
 	{ "Wacom DTH2242",       .type = WACOM_24HDT,
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x59, .touch_max = 10 };
 static const struct wacom_features wacom_features_0xCC =
-	{ "Wacom Cintiq 21UX2",   WACOM_PKGLEN_INTUOS,    87200, 65600, 2047,
-	  63, WACOM_21UX2, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+	{ "Wacom Cintiq 21UX2",   WACOM_PKGLEN_INTUOS,    87000, 65400, 2047,
+	  63, WACOM_21UX2, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
 static const struct wacom_features wacom_features_0xFA =
-	{ "Wacom Cintiq 22HD",    WACOM_PKGLEN_INTUOS,    95840, 54260, 2047,
-	  63, WACOM_22HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+	{ "Wacom Cintiq 22HD",    WACOM_PKGLEN_INTUOS,    95640, 54060, 2047,
+	  63, WACOM_22HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
 static const struct wacom_features wacom_features_0x5B =
-	{ "Wacom Cintiq 22HDT", WACOM_PKGLEN_INTUOS,      95840, 54260, 2047,
-	  63, WACOM_22HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	{ "Wacom Cintiq 22HDT", WACOM_PKGLEN_INTUOS,      95640, 54060, 2047,
+	  63, WACOM_22HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200,
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5e };
 static const struct wacom_features wacom_features_0x5E =
 	{ "Wacom Cintiq 22HDT", .type = WACOM_24HDT,
@@ -2233,9 +2245,21 @@
 static const struct wacom_features wacom_features_0x10F =
 	{ "Wacom ISDv4 10F",      WACOM_PKGLEN_MTTPC,     27760, 15694,  255,
 	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x116 =
+	{ "Wacom ISDv4 116",      WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
+	  0, TABLETPCE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x4001 =
 	{ "Wacom ISDv4 4001",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
 	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x4004 =
+	{ "Wacom ISDv4 4004",      WACOM_PKGLEN_MTTPC,     11060, 6220,  255,
+	  0, MTTPC_B, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x5000 =
+	{ "Wacom ISDv4 5000",      WACOM_PKGLEN_MTTPC,     27848, 15752,  1023,
+	  0, MTTPC_B, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x5002 =
+	{ "Wacom ISDv4 5002",      WACOM_PKGLEN_MTTPC,     29576, 16724,  1023,
+	  0, MTTPC_B, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x47 =
 	{ "Wacom Intuos2 6x8",    WACOM_PKGLEN_INTUOS,    20320, 16240, 1023,
 	  31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2316,8 +2340,8 @@
 	{ "ISD-V4",               WACOM_PKGLEN_GRAPHIRE,  12800,  8000,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x0307 =
-	{ "Wacom ISDv5 307", WACOM_PKGLEN_INTUOS,  59552,  33848, 2047,
-	  63, CINTIQ_HYBRID, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	{ "Wacom ISDv5 307", WACOM_PKGLEN_INTUOS,  59352,  33648, 2047,
+	  63, CINTIQ_HYBRID, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200,
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x309 };
 static const struct wacom_features wacom_features_0x0309 =
 	{ "Wacom ISDv5 309", .type = WACOM_24HDT, /* Touch */
@@ -2447,6 +2471,7 @@
 	{ USB_DEVICE_WACOM(0x10D) },
 	{ USB_DEVICE_WACOM(0x10E) },
 	{ USB_DEVICE_WACOM(0x10F) },
+	{ USB_DEVICE_WACOM(0x116) },
 	{ USB_DEVICE_WACOM(0x300) },
 	{ USB_DEVICE_WACOM(0x301) },
 	{ USB_DEVICE_DETAILED(0x302, USB_CLASS_HID, 0, 0) },
@@ -2457,6 +2482,9 @@
 	{ USB_DEVICE_DETAILED(0x315, USB_CLASS_HID, 0, 0) },
 	{ USB_DEVICE_DETAILED(0x317, USB_CLASS_HID, 0, 0) },
 	{ USB_DEVICE_WACOM(0x4001) },
+	{ USB_DEVICE_WACOM(0x4004) },
+	{ USB_DEVICE_WACOM(0x5000) },
+	{ USB_DEVICE_WACOM(0x5002) },
 	{ USB_DEVICE_WACOM(0x47) },
 	{ USB_DEVICE_WACOM(0xF4) },
 	{ USB_DEVICE_WACOM(0xF8) },

diff --git a/drivers/input/tablet/wacom_wac.h b/drivers/input/tablet/wacom_wac.h
index f69c0eb..b2c9a9c 100644
--- a/drivers/input/tablet/wacom_wac.h
+++ b/drivers/input/tablet/wacom_wac.h

@@ -22,6 +22,7 @@
 #define WACOM_PKGLEN_BBFUN	 9
 #define WACOM_PKGLEN_INTUOS	10
 #define WACOM_PKGLEN_TPC1FG	 5
+#define WACOM_PKGLEN_TPC1FG_B	10
 #define WACOM_PKGLEN_TPC2FG	14
 #define WACOM_PKGLEN_BBTOUCH	20
 #define WACOM_PKGLEN_BBTOUCH3	64
@@ -30,6 +31,7 @@
 #define WACOM_PKGLEN_MTOUCH	62
 #define WACOM_PKGLEN_MTTPC	40
 #define WACOM_PKGLEN_DTUS	68
+#define WACOM_PKGLEN_PENABLED	 8
 
 /* wacom data size per MT contact */
 #define WACOM_BYTES_PER_MT_PACKET	11
@@ -52,6 +54,7 @@
 #define WACOM_REPORT_TPC1FG		6
 #define WACOM_REPORT_TPC2FG		13
 #define WACOM_REPORT_TPCMT		13
+#define WACOM_REPORT_TPCMT2		3
 #define WACOM_REPORT_TPCHID		15
 #define WACOM_REPORT_TPCST		16
 #define WACOM_REPORT_DTUS		17
@@ -105,6 +108,7 @@
 	TABLETPC2FG,
 	MTSCREEN,
 	MTTPC,
+	MTTPC_B,
 	MAX_TYPE
 };
 
@@ -118,6 +122,8 @@
 	int type;
 	int x_resolution;
 	int y_resolution;
+	int x_min;
+	int y_min;
 	int device_type;
 	int x_phy;
 	int y_phy;

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 544e20c..0d4a9fa 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c

@@ -16,6 +16,7 @@
 #include <linux/input.h>
 #include <linux/mfd/88pm860x.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 #define MEAS_LEN		(8)
 #define ACCURATE_BIT		(12)
@@ -234,16 +235,17 @@
 	if (ret)
 		return ret;
 
-	touch = kzalloc(sizeof(struct pm860x_touch), GFP_KERNEL);
-	if (touch == NULL)
+	touch = devm_kzalloc(&pdev->dev, sizeof(struct pm860x_touch),
+			     GFP_KERNEL);
+	if (!touch)
 		return -ENOMEM;
+
 	platform_set_drvdata(pdev, touch);
 
-	touch->idev = input_allocate_device();
-	if (touch->idev == NULL) {
+	touch->idev = devm_input_allocate_device(&pdev->dev);
+	if (!touch->idev) {
 		dev_err(&pdev->dev, "Failed to allocate input device!\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	touch->idev->name = "88pm860x-touch";
@@ -258,10 +260,11 @@
 	touch->res_x = res_x;
 	input_set_drvdata(touch->idev, touch);
 
-	ret = request_threaded_irq(touch->irq, NULL, pm860x_touch_handler,
-				   IRQF_ONESHOT, "touch", touch);
+	ret = devm_request_threaded_irq(&pdev->dev, touch->irq, NULL,
+					pm860x_touch_handler, IRQF_ONESHOT,
+					"touch", touch);
 	if (ret < 0)
-		goto out_irq;
+		return ret;
 
 	__set_bit(EV_ABS, touch->idev->evbit);
 	__set_bit(ABS_X, touch->idev->absbit);
@@ -279,28 +282,11 @@
 	ret = input_register_device(touch->idev);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to register touch!\n");
-		goto out_rg;
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, touch);
 	return 0;
-out_rg:
-	free_irq(touch->irq, touch);
-out_irq:
-	input_free_device(touch->idev);
-out:
-	kfree(touch);
-	return ret;
-}
-
-static int pm860x_touch_remove(struct platform_device *pdev)
-{
-	struct pm860x_touch *touch = platform_get_drvdata(pdev);
-
-	input_unregister_device(touch->idev);
-	free_irq(touch->irq, touch);
-	kfree(touch);
-	return 0;
 }
 
 static struct platform_driver pm860x_touch_driver = {
@@ -309,7 +295,6 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe	= pm860x_touch_probe,
-	.remove	= pm860x_touch_remove,
 };
 module_platform_driver(pm860x_touch_driver);
 

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index d4e5ab5..a23a94b 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig

@@ -11,6 +11,10 @@
 
 if INPUT_TOUCHSCREEN
 
+config OF_TOUCHSCREEN
+	def_tristate INPUT
+	depends on INPUT && OF
+
 config TOUCHSCREEN_88PM860X
 	tristate "Marvell 88PM860x touchscreen"
 	depends on MFD_88PM860X
@@ -89,6 +93,7 @@
 config TOUCHSCREEN_ATMEL_MXT
 	tristate "Atmel mXT I2C Touchscreen"
 	depends on I2C
+	select FW_LOADER
 	help
 	  Say Y here if you have Atmel mXT series I2C touchscreen,
 	  such as AT42QT602240/ATMXT224, connected to your system.
@@ -846,7 +851,7 @@
 
 config TOUCHSCREEN_W90X900
 	tristate "W90P910 touchscreen driver"
-	depends on HAVE_CLK
+	depends on ARCH_W90X900
 	help
 	  Say Y here if you have a W90P910 based touchscreen.
 
@@ -885,6 +890,17 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called stmpe-ts.
 
+config TOUCHSCREEN_SUN4I
+	tristate "Allwinner sun4i resistive touchscreen controller support"
+	depends on ARCH_SUNXI || COMPILE_TEST
+	depends on HWMON
+	help
+	  This selects support for the resistive touchscreen controller
+	  found on Allwinner sunxi SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called sun4i-ts.
+
 config TOUCHSCREEN_SUR40
 	tristate "Samsung SUR40 (Surface 2.0/PixelSense) touchscreen"
 	depends on USB

diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 03f12a1..126479d 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile

@@ -6,6 +6,7 @@
 
 wm97xx-ts-y := wm97xx-core.o
 
+obj-$(CONFIG_OF_TOUCHSCREEN)		+= of_touchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
 obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
@@ -53,6 +54,7 @@
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)	+= st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)		+= stmpe-ts.o
+obj-$(CONFIG_TOUCHSCREEN_SUN4I)		+= sun4i-ts.o
 obj-$(CONFIG_TOUCHSCREEN_SUR40)		+= sur40.o
 obj-$(CONFIG_TOUCHSCREEN_TI_AM335X_TSC)	+= ti_am335x_tsc.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o

diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index 6793c85..523865d 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c

@@ -210,11 +210,6 @@
 module_param(gpio3, bool, 0);
 MODULE_PARM_DESC(gpio3, "If gpio3 is set to 1 AUX3 acts as GPIO3");
 
-/*
- * ad7877_read/write are only used for initial setup and for sysfs controls.
- * The main traffic is done using spi_async() in the interrupt handler.
- */
-
 static int ad7877_read(struct spi_device *spi, u16 reg)
 {
 	struct ser_req *req;

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 7f8aa98..da201b8 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c

@@ -706,7 +706,7 @@
 		m = &ts->msg[msg_idx];
 		error = spi_sync(ts->spi, m);
 		if (error) {
-			dev_err(&ts->spi->dev, "spi_async --> %d\n", error);
+			dev_err(&ts->spi->dev, "spi_sync --> %d\n", error);
 			packet->tc.ignore = true;
 			return;
 		}

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index a704007..6e0b4a2 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c

@@ -2,6 +2,8 @@
  * Atmel maXTouch Touchscreen driver
  *
  * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Copyright (C) 2012 Google, Inc.
+ *
  * Author: Joonyoung Shim <jy0922.shim@samsung.com>
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -12,6 +14,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/init.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
 #include <linux/i2c.h>
@@ -25,12 +29,6 @@
 #define MXT_VER_21		21
 #define MXT_VER_22		22
 
-/* Slave addresses */
-#define MXT_APP_LOW		0x4a
-#define MXT_APP_HIGH		0x4b
-#define MXT_BOOT_LOW		0x24
-#define MXT_BOOT_HIGH		0x25
-
 /* Firmware */
 #define MXT_FW_NAME		"maxtouch.fw"
 
@@ -83,6 +81,9 @@
 #define MXT_COMMAND_REPORTALL	3
 #define MXT_COMMAND_DIAGNOSTIC	5
 
+/* Define for T6 status byte */
+#define MXT_T6_STATUS_RESET	(1 << 7)
+
 /* MXT_GEN_POWER_T7 field */
 #define MXT_POWER_IDLEACQINT	0
 #define MXT_POWER_ACTVACQINT	1
@@ -99,33 +100,26 @@
 
 /* MXT_TOUCH_MULTI_T9 field */
 #define MXT_TOUCH_CTRL		0
-#define MXT_TOUCH_XORIGIN	1
-#define MXT_TOUCH_YORIGIN	2
-#define MXT_TOUCH_XSIZE		3
-#define MXT_TOUCH_YSIZE		4
-#define MXT_TOUCH_BLEN		6
-#define MXT_TOUCH_TCHTHR	7
-#define MXT_TOUCH_TCHDI		8
-#define MXT_TOUCH_ORIENT	9
-#define MXT_TOUCH_MOVHYSTI	11
-#define MXT_TOUCH_MOVHYSTN	12
-#define MXT_TOUCH_NUMTOUCH	14
-#define MXT_TOUCH_MRGHYST	15
-#define MXT_TOUCH_MRGTHR	16
-#define MXT_TOUCH_AMPHYST	17
-#define MXT_TOUCH_XRANGE_LSB	18
-#define MXT_TOUCH_XRANGE_MSB	19
-#define MXT_TOUCH_YRANGE_LSB	20
-#define MXT_TOUCH_YRANGE_MSB	21
-#define MXT_TOUCH_XLOCLIP	22
-#define MXT_TOUCH_XHICLIP	23
-#define MXT_TOUCH_YLOCLIP	24
-#define MXT_TOUCH_YHICLIP	25
-#define MXT_TOUCH_XEDGECTRL	26
-#define MXT_TOUCH_XEDGEDIST	27
-#define MXT_TOUCH_YEDGECTRL	28
-#define MXT_TOUCH_YEDGEDIST	29
-#define MXT_TOUCH_JUMPLIMIT	30
+#define MXT_T9_ORIENT		9
+#define MXT_T9_RANGE		18
+
+/* MXT_TOUCH_MULTI_T9 status */
+#define MXT_T9_UNGRIP		(1 << 0)
+#define MXT_T9_SUPPRESS		(1 << 1)
+#define MXT_T9_AMP		(1 << 2)
+#define MXT_T9_VECTOR		(1 << 3)
+#define MXT_T9_MOVE		(1 << 4)
+#define MXT_T9_RELEASE		(1 << 5)
+#define MXT_T9_PRESS		(1 << 6)
+#define MXT_T9_DETECT		(1 << 7)
+
+struct t9_range {
+	u16 x;
+	u16 y;
+} __packed;
+
+/* MXT_TOUCH_MULTI_T9 orient */
+#define MXT_T9_ORIENT_SWITCH	(1 << 0)
 
 /* MXT_PROCI_GRIPFACE_T20 field */
 #define MXT_GRIPFACE_CTRL	0
@@ -174,17 +168,16 @@
 
 /* Define for MXT_GEN_COMMAND_T6 */
 #define MXT_BOOT_VALUE		0xa5
+#define MXT_RESET_VALUE		0x01
 #define MXT_BACKUP_VALUE	0x55
+
+/* Delay times */
 #define MXT_BACKUP_TIME		50	/* msec */
 #define MXT_RESET_TIME		200	/* msec */
-
-#define MXT_FWRESET_TIME	175	/* msec */
-
-/* MXT_SPT_GPIOPWM_T19 field */
-#define MXT_GPIO0_MASK		0x04
-#define MXT_GPIO1_MASK		0x08
-#define MXT_GPIO2_MASK		0x10
-#define MXT_GPIO3_MASK		0x20
+#define MXT_RESET_TIMEOUT	3000	/* msec */
+#define MXT_CRC_TIMEOUT		1000	/* msec */
+#define MXT_FW_RESET_TIME	3000	/* msec */
+#define MXT_FW_CHG_TIMEOUT	300	/* msec */
 
 /* Command to unlock bootloader */
 #define MXT_UNLOCK_CMD_MSB	0xaa
@@ -198,21 +191,8 @@
 #define MXT_FRAME_CRC_PASS	0x04
 #define MXT_APP_CRC_FAIL	0x40	/* valid 7 8 bit only */
 #define MXT_BOOT_STATUS_MASK	0x3f
-
-/* Touch status */
-#define MXT_UNGRIP		(1 << 0)
-#define MXT_SUPPRESS		(1 << 1)
-#define MXT_AMP			(1 << 2)
-#define MXT_VECTOR		(1 << 3)
-#define MXT_MOVE		(1 << 4)
-#define MXT_RELEASE		(1 << 5)
-#define MXT_PRESS		(1 << 6)
-#define MXT_DETECT		(1 << 7)
-
-/* Touch orient bits */
-#define MXT_XY_SWITCH		(1 << 0)
-#define MXT_X_INVERT		(1 << 1)
-#define MXT_Y_INVERT		(1 << 2)
+#define MXT_BOOT_EXTENDED_ID	(1 << 5)
+#define MXT_BOOT_ID_MASK	0x1f
 
 /* Touchscreen absolute values */
 #define MXT_MAX_AREA		0xff
@@ -232,8 +212,8 @@
 struct mxt_object {
 	u8 type;
 	u16 start_address;
-	u8 size;		/* Size of each instance - 1 */
-	u8 instances;		/* Number of instances - 1 */
+	u8 size_minus_one;
+	u8 instances_minus_one;
 	u8 num_report_ids;
 } __packed;
 
@@ -250,19 +230,40 @@
 	const struct mxt_platform_data *pdata;
 	struct mxt_object *object_table;
 	struct mxt_info info;
-	bool is_tp;
-
 	unsigned int irq;
 	unsigned int max_x;
 	unsigned int max_y;
+	bool in_bootloader;
+	u32 config_crc;
+	u8 bootloader_addr;
 
 	/* Cached parameters from object table */
 	u8 T6_reportid;
+	u16 T6_address;
 	u8 T9_reportid_min;
 	u8 T9_reportid_max;
 	u8 T19_reportid;
+
+	/* for fw update in bootloader */
+	struct completion bl_completion;
+
+	/* for reset handling */
+	struct completion reset_completion;
+
+	/* for config update handling */
+	struct completion crc_completion;
 };
 
+static size_t mxt_obj_size(const struct mxt_object *obj)
+{
+	return obj->size_minus_one + 1;
+}
+
+static size_t mxt_obj_instances(const struct mxt_object *obj)
+{
+	return obj->instances_minus_one + 1;
+}
+
 static bool mxt_object_readable(unsigned int type)
 {
 	switch (type) {
@@ -334,60 +335,190 @@
 		message->reportid, 7, message->message);
 }
 
-static int mxt_check_bootloader(struct i2c_client *client,
-				     unsigned int state)
+static int mxt_wait_for_completion(struct mxt_data *data,
+				   struct completion *comp,
+				   unsigned int timeout_ms)
 {
+	struct device *dev = &data->client->dev;
+	unsigned long timeout = msecs_to_jiffies(timeout_ms);
+	long ret;
+
+	ret = wait_for_completion_interruptible_timeout(comp, timeout);
+	if (ret < 0) {
+		return ret;
+	} else if (ret == 0) {
+		dev_err(dev, "Wait for completion timed out.\n");
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+static int mxt_bootloader_read(struct mxt_data *data,
+			       u8 *val, unsigned int count)
+{
+	int ret;
+	struct i2c_msg msg;
+
+	msg.addr = data->bootloader_addr;
+	msg.flags = data->client->flags & I2C_M_TEN;
+	msg.flags |= I2C_M_RD;
+	msg.len = count;
+	msg.buf = val;
+
+	ret = i2c_transfer(data->client->adapter, &msg, 1);
+
+	if (ret == 1) {
+		ret = 0;
+	} else {
+		ret = ret < 0 ? ret : -EIO;
+		dev_err(&data->client->dev, "%s: i2c recv failed (%d)\n",
+			__func__, ret);
+	}
+
+	return ret;
+}
+
+static int mxt_bootloader_write(struct mxt_data *data,
+				const u8 * const val, unsigned int count)
+{
+	int ret;
+	struct i2c_msg msg;
+
+	msg.addr = data->bootloader_addr;
+	msg.flags = data->client->flags & I2C_M_TEN;
+	msg.len = count;
+	msg.buf = (u8 *)val;
+
+	ret = i2c_transfer(data->client->adapter, &msg, 1);
+	if (ret == 1) {
+		ret = 0;
+	} else {
+		ret = ret < 0 ? ret : -EIO;
+		dev_err(&data->client->dev, "%s: i2c send failed (%d)\n",
+			__func__, ret);
+	}
+
+	return ret;
+}
+
+static int mxt_lookup_bootloader_address(struct mxt_data *data)
+{
+	u8 appmode = data->client->addr;
+	u8 bootloader;
+
+	switch (appmode) {
+	case 0x4a:
+	case 0x4b:
+	case 0x4c:
+	case 0x4d:
+	case 0x5a:
+	case 0x5b:
+		bootloader = appmode - 0x26;
+		break;
+	default:
+		dev_err(&data->client->dev,
+			"Appmode i2c address 0x%02x not found\n",
+			appmode);
+		return -EINVAL;
+	}
+
+	data->bootloader_addr = bootloader;
+	return 0;
+}
+
+static u8 mxt_get_bootloader_version(struct mxt_data *data, u8 val)
+{
+	struct device *dev = &data->client->dev;
+	u8 buf[3];
+
+	if (val & MXT_BOOT_EXTENDED_ID) {
+		if (mxt_bootloader_read(data, &buf[0], 3) != 0) {
+			dev_err(dev, "%s: i2c failure\n", __func__);
+			return val;
+		}
+
+		dev_dbg(dev, "Bootloader ID:%d Version:%d\n", buf[1], buf[2]);
+
+		return buf[0];
+	} else {
+		dev_dbg(dev, "Bootloader ID:%d\n", val & MXT_BOOT_ID_MASK);
+
+		return val;
+	}
+}
+
+static int mxt_check_bootloader(struct mxt_data *data, unsigned int state)
+{
+	struct device *dev = &data->client->dev;
 	u8 val;
+	int ret;
 
 recheck:
-	if (i2c_master_recv(client, &val, 1) != 1) {
-		dev_err(&client->dev, "%s: i2c recv failed\n", __func__);
-		return -EIO;
+	if (state != MXT_WAITING_BOOTLOAD_CMD) {
+		/*
+		 * In application update mode, the interrupt
+		 * line signals state transitions. We must wait for the
+		 * CHG assertion before reading the status byte.
+		 * Once the status byte has been read, the line is deasserted.
+		 */
+		ret = mxt_wait_for_completion(data, &data->bl_completion,
+					      MXT_FW_CHG_TIMEOUT);
+		if (ret) {
+			/*
+			 * TODO: handle -ERESTARTSYS better by terminating
+			 * fw update process before returning to userspace
+			 * by writing length 0x000 to device (iff we are in
+			 * WAITING_FRAME_DATA state).
+			 */
+			dev_err(dev, "Update wait error %d\n", ret);
+			return ret;
+		}
 	}
 
+	ret = mxt_bootloader_read(data, &val, 1);
+	if (ret)
+		return ret;
+
+	if (state == MXT_WAITING_BOOTLOAD_CMD)
+		val = mxt_get_bootloader_version(data, val);
+
 	switch (state) {
 	case MXT_WAITING_BOOTLOAD_CMD:
 	case MXT_WAITING_FRAME_DATA:
 		val &= ~MXT_BOOT_STATUS_MASK;
 		break;
 	case MXT_FRAME_CRC_PASS:
-		if (val == MXT_FRAME_CRC_CHECK)
+		if (val == MXT_FRAME_CRC_CHECK) {
 			goto recheck;
+		} else if (val == MXT_FRAME_CRC_FAIL) {
+			dev_err(dev, "Bootloader CRC fail\n");
+			return -EINVAL;
+		}
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (val != state) {
-		dev_err(&client->dev, "Unvalid bootloader mode state\n");
+		dev_err(dev, "Invalid bootloader state %02X != %02X\n",
+			val, state);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
-static int mxt_unlock_bootloader(struct i2c_client *client)
+static int mxt_unlock_bootloader(struct mxt_data *data)
 {
+	int ret;
 	u8 buf[2];
 
 	buf[0] = MXT_UNLOCK_CMD_LSB;
 	buf[1] = MXT_UNLOCK_CMD_MSB;
 
-	if (i2c_master_send(client, buf, 2) != 2) {
-		dev_err(&client->dev, "%s: i2c send failed\n", __func__);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int mxt_fw_write(struct i2c_client *client,
-			     const u8 *data, unsigned int frame_size)
-{
-	if (i2c_master_send(client, data, frame_size) != frame_size) {
-		dev_err(&client->dev, "%s: i2c send failed\n", __func__);
-		return -EIO;
-	}
+	ret = mxt_bootloader_write(data, buf, 2);
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -427,11 +558,6 @@
 	return ret;
 }
 
-static int mxt_read_reg(struct i2c_client *client, u16 reg, u8 *val)
-{
-	return __mxt_read_reg(client, reg, 1, val);
-}
-
 static int __mxt_write_reg(struct i2c_client *client, u16 reg, u16 len,
 			   const void *val)
 {
@@ -479,7 +605,7 @@
 			return object;
 	}
 
-	dev_err(&data->client->dev, "Invalid object type\n");
+	dev_err(&data->client->dev, "Invalid object type T%u\n", type);
 	return NULL;
 }
 
@@ -505,7 +631,7 @@
 	u16 reg;
 
 	object = mxt_get_object(data, type);
-	if (!object || offset >= object->size + 1)
+	if (!object || offset >= mxt_obj_size(object))
 		return -EINVAL;
 
 	reg = object->start_address;
@@ -515,18 +641,25 @@
 static void mxt_input_button(struct mxt_data *data, struct mxt_message *message)
 {
 	struct input_dev *input = data->input_dev;
+	const struct mxt_platform_data *pdata = data->pdata;
 	bool button;
 	int i;
 
 	/* Active-low switch */
-	for (i = 0; i < MXT_NUM_GPIO; i++) {
-		if (data->pdata->key_map[i] == KEY_RESERVED)
+	for (i = 0; i < pdata->t19_num_keys; i++) {
+		if (pdata->t19_keymap[i] == KEY_RESERVED)
 			continue;
-		button = !(message->message[0] & MXT_GPIO0_MASK << i);
-		input_report_key(input, data->pdata->key_map[i], button);
+		button = !(message->message[0] & (1 << i));
+		input_report_key(input, pdata->t19_keymap[i], button);
 	}
 }
 
+static void mxt_input_sync(struct input_dev *input_dev)
+{
+	input_mt_report_pointer_emulation(input_dev, false);
+	input_sync(input_dev);
+}
+
 static void mxt_input_touchevent(struct mxt_data *data,
 				      struct mxt_message *message, int id)
 {
@@ -536,44 +669,60 @@
 	int x;
 	int y;
 	int area;
-	int pressure;
+	int amplitude;
 
 	x = (message->message[1] << 4) | ((message->message[3] >> 4) & 0xf);
 	y = (message->message[2] << 4) | ((message->message[3] & 0xf));
+
+	/* Handle 10/12 bit switching */
 	if (data->max_x < 1024)
-		x = x >> 2;
+		x >>= 2;
 	if (data->max_y < 1024)
-		y = y >> 2;
+		y >>= 2;
 
 	area = message->message[4];
-	pressure = message->message[5];
+	amplitude = message->message[5];
 
 	dev_dbg(dev,
 		"[%u] %c%c%c%c%c%c%c%c x: %5u y: %5u area: %3u amp: %3u\n",
 		id,
-		(status & MXT_DETECT) ? 'D' : '.',
-		(status & MXT_PRESS) ? 'P' : '.',
-		(status & MXT_RELEASE) ? 'R' : '.',
-		(status & MXT_MOVE) ? 'M' : '.',
-		(status & MXT_VECTOR) ? 'V' : '.',
-		(status & MXT_AMP) ? 'A' : '.',
-		(status & MXT_SUPPRESS) ? 'S' : '.',
-		(status & MXT_UNGRIP) ? 'U' : '.',
-		x, y, area, pressure);
+		(status & MXT_T9_DETECT) ? 'D' : '.',
+		(status & MXT_T9_PRESS) ? 'P' : '.',
+		(status & MXT_T9_RELEASE) ? 'R' : '.',
+		(status & MXT_T9_MOVE) ? 'M' : '.',
+		(status & MXT_T9_VECTOR) ? 'V' : '.',
+		(status & MXT_T9_AMP) ? 'A' : '.',
+		(status & MXT_T9_SUPPRESS) ? 'S' : '.',
+		(status & MXT_T9_UNGRIP) ? 'U' : '.',
+		x, y, area, amplitude);
 
 	input_mt_slot(input_dev, id);
-	input_mt_report_slot_state(input_dev, MT_TOOL_FINGER,
-				   status & MXT_DETECT);
 
-	if (status & MXT_DETECT) {
+	if (status & MXT_T9_DETECT) {
+		/*
+		 * Multiple bits may be set if the host is slow to read
+		 * the status messages, indicating all the events that
+		 * have happened.
+		 */
+		if (status & MXT_T9_RELEASE) {
+			input_mt_report_slot_state(input_dev,
+						   MT_TOOL_FINGER, 0);
+			mxt_input_sync(input_dev);
+		}
+
+		/* Touch active */
+		input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, 1);
 		input_report_abs(input_dev, ABS_MT_POSITION_X, x);
 		input_report_abs(input_dev, ABS_MT_POSITION_Y, y);
-		input_report_abs(input_dev, ABS_MT_PRESSURE, pressure);
+		input_report_abs(input_dev, ABS_MT_PRESSURE, amplitude);
 		input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, area);
+	} else {
+		/* Touch no longer active, close out slot */
+		input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, 0);
 	}
 }
 
-static unsigned mxt_extract_T6_csum(const u8 *csum)
+static u16 mxt_extract_T6_csum(const u8 *csum)
 {
 	return csum[0] | (csum[1] << 8) | (csum[2] << 16);
 }
@@ -584,28 +733,37 @@
 	return (id >= data->T9_reportid_min && id <= data->T9_reportid_max);
 }
 
-static irqreturn_t mxt_interrupt(int irq, void *dev_id)
+static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data)
 {
-	struct mxt_data *data = dev_id;
 	struct mxt_message message;
 	const u8 *payload = &message.message[0];
 	struct device *dev = &data->client->dev;
 	u8 reportid;
 	bool update_input = false;
+	u32 crc;
 
 	do {
 		if (mxt_read_message(data, &message)) {
 			dev_err(dev, "Failed to read message\n");
-			goto end;
+			return IRQ_NONE;
 		}
 
 		reportid = message.reportid;
 
 		if (reportid == data->T6_reportid) {
 			u8 status = payload[0];
-			unsigned csum = mxt_extract_T6_csum(&payload[1]);
+
+			crc = mxt_extract_T6_csum(&payload[1]);
+			if (crc != data->config_crc) {
+				data->config_crc = crc;
+				complete(&data->crc_completion);
+			}
+
 			dev_dbg(dev, "Status: %02x Config Checksum: %06x\n",
-				status, csum);
+				status, data->config_crc);
+
+			if (status & MXT_T6_STATUS_RESET)
+				complete(&data->reset_completion);
 		} else if (mxt_is_T9_message(data, &message)) {
 			int id = reportid - data->T9_reportid_min;
 			mxt_input_touchevent(data, &message, id);
@@ -618,13 +776,94 @@
 		}
 	} while (reportid != 0xff);
 
-	if (update_input) {
-		input_mt_report_pointer_emulation(data->input_dev, false);
-		input_sync(data->input_dev);
+	if (update_input)
+		mxt_input_sync(data->input_dev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mxt_interrupt(int irq, void *dev_id)
+{
+	struct mxt_data *data = dev_id;
+
+	if (data->in_bootloader) {
+		/* bootloader state transition completion */
+		complete(&data->bl_completion);
+		return IRQ_HANDLED;
 	}
 
-end:
-	return IRQ_HANDLED;
+	return mxt_process_messages_until_invalid(data);
+}
+
+static int mxt_t6_command(struct mxt_data *data, u16 cmd_offset,
+			  u8 value, bool wait)
+{
+	u16 reg;
+	u8 command_register;
+	int timeout_counter = 0;
+	int ret;
+
+	reg = data->T6_address + cmd_offset;
+
+	ret = mxt_write_reg(data->client, reg, value);
+	if (ret)
+		return ret;
+
+	if (!wait)
+		return 0;
+
+	do {
+		msleep(20);
+		ret = __mxt_read_reg(data->client, reg, 1, &command_register);
+		if (ret)
+			return ret;
+	} while (command_register != 0 && timeout_counter++ <= 100);
+
+	if (timeout_counter > 100) {
+		dev_err(&data->client->dev, "Command failed!\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mxt_soft_reset(struct mxt_data *data)
+{
+	struct device *dev = &data->client->dev;
+	int ret = 0;
+
+	dev_info(dev, "Resetting chip\n");
+
+	reinit_completion(&data->reset_completion);
+
+	ret = mxt_t6_command(data, MXT_COMMAND_RESET, MXT_RESET_VALUE, false);
+	if (ret)
+		return ret;
+
+	ret = mxt_wait_for_completion(data, &data->reset_completion,
+				      MXT_RESET_TIMEOUT);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value)
+{
+	/*
+	 * On failure, CRC is set to 0 and config will always be
+	 * downloaded.
+	 */
+	data->config_crc = 0;
+	reinit_completion(&data->crc_completion);
+
+	mxt_t6_command(data, cmd, value, true);
+
+	/*
+	 * Wait for crc message. On failure, CRC is set to 0 and config will
+	 * always be downloaded.
+	 */
+	mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT);
 }
 
 static int mxt_check_reg_init(struct mxt_data *data)
@@ -641,13 +880,23 @@
 		return 0;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1);
+
+	if (data->config_crc == pdata->config_crc) {
+		dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc);
+		return 0;
+	}
+
+	dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n",
+		 data->config_crc, pdata->config_crc);
+
 	for (i = 0; i < data->info.object_num; i++) {
 		object = data->object_table + i;
 
 		if (!mxt_object_writable(object->type))
 			continue;
 
-		size = (object->size + 1) * (object->instances + 1);
+		size = mxt_obj_size(object) * mxt_obj_instances(object);
 		if (index + size > pdata->config_length) {
 			dev_err(dev, "Not enough config data!\n");
 			return -EINVAL;
@@ -660,6 +909,14 @@
 		index += size;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE);
+
+	ret = mxt_soft_reset(data);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "Config successfully updated\n");
+
 	return 0;
 }
 
@@ -685,54 +942,6 @@
 	return 0;
 }
 
-static void mxt_handle_pdata(struct mxt_data *data)
-{
-	const struct mxt_platform_data *pdata = data->pdata;
-	u8 voltage;
-
-	/* Set touchscreen lines */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
-			pdata->x_line);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
-			pdata->y_line);
-
-	/* Set touchscreen orient */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
-			pdata->orient);
-
-	/* Set touchscreen burst length */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_BLEN, pdata->blen);
-
-	/* Set touchscreen threshold */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_TCHTHR, pdata->threshold);
-
-	/* Set touchscreen resolution */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
-
-	/* Set touchscreen voltage */
-	if (pdata->voltage) {
-		if (pdata->voltage < MXT_VOLTAGE_DEFAULT) {
-			voltage = (MXT_VOLTAGE_DEFAULT - pdata->voltage) /
-				MXT_VOLTAGE_STEP;
-			voltage = 0xff - voltage + 1;
-		} else
-			voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
-				MXT_VOLTAGE_STEP;
-
-		mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
-				MXT_CTE_VOLTAGE, voltage);
-	}
-}
-
 static int mxt_get_info(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
@@ -772,7 +981,7 @@
 		if (object->num_report_ids) {
 			min_id = reportid;
 			reportid += object->num_report_ids *
-					(object->instances + 1);
+					mxt_obj_instances(object);
 			max_id = reportid - 1;
 		} else {
 			min_id = 0;
@@ -780,13 +989,15 @@
 		}
 
 		dev_dbg(&data->client->dev,
-			"Type %2d Start %3d Size %3d Instances %2d ReportIDs %3u : %3u\n",
-			object->type, object->start_address, object->size + 1,
-			object->instances + 1, min_id, max_id);
+			"T%u Start:%u Size:%zu Instances:%zu Report IDs:%u-%u\n",
+			object->type, object->start_address,
+			mxt_obj_size(object), mxt_obj_instances(object),
+			min_id, max_id);
 
 		switch (object->type) {
 		case MXT_GEN_COMMAND_T6:
 			data->T6_reportid = min_id;
+			data->T6_address = object->start_address;
 			break;
 		case MXT_TOUCH_MULTI_T9:
 			data->T9_reportid_min = min_id;
@@ -811,12 +1022,59 @@
 	data->T19_reportid = 0;
 }
 
+static int mxt_read_t9_resolution(struct mxt_data *data)
+{
+	struct i2c_client *client = data->client;
+	int error;
+	struct t9_range range;
+	unsigned char orient;
+	struct mxt_object *object;
+
+	object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
+	if (!object)
+		return -EINVAL;
+
+	error = __mxt_read_reg(client,
+			       object->start_address + MXT_T9_RANGE,
+			       sizeof(range), &range);
+	if (error)
+		return error;
+
+	le16_to_cpus(&range.x);
+	le16_to_cpus(&range.y);
+
+	error =  __mxt_read_reg(client,
+				object->start_address + MXT_T9_ORIENT,
+				1, &orient);
+	if (error)
+		return error;
+
+	/* Handle default values */
+	if (range.x == 0)
+		range.x = 1023;
+
+	if (range.y == 0)
+		range.y = 1023;
+
+	if (orient & MXT_T9_ORIENT_SWITCH) {
+		data->max_x = range.y;
+		data->max_y = range.x;
+	} else {
+		data->max_x = range.x;
+		data->max_y = range.y;
+	}
+
+	dev_dbg(&client->dev,
+		"Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+	return 0;
+}
+
 static int mxt_initialize(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
 	struct mxt_info *info = &data->info;
 	int error;
-	u8 val;
 
 	error = mxt_get_info(data);
 	if (error)
@@ -832,47 +1090,29 @@
 
 	/* Get object table information */
 	error = mxt_get_object_table(data);
-	if (error)
+	if (error) {
+		dev_err(&client->dev, "Error %d reading object table\n", error);
 		goto err_free_object_table;
+	}
 
 	/* Check register init values */
 	error = mxt_check_reg_init(data);
-	if (error)
+	if (error) {
+		dev_err(&client->dev, "Error %d initializing configuration\n",
+			error);
 		goto err_free_object_table;
+	}
 
-	mxt_handle_pdata(data);
-
-	/* Backup to memory */
-	mxt_write_object(data, MXT_GEN_COMMAND_T6,
-			MXT_COMMAND_BACKUPNV,
-			MXT_BACKUP_VALUE);
-	msleep(MXT_BACKUP_TIME);
-
-	/* Soft reset */
-	mxt_write_object(data, MXT_GEN_COMMAND_T6,
-			MXT_COMMAND_RESET, 1);
-	msleep(MXT_RESET_TIME);
-
-	/* Update matrix size at info struct */
-	error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val);
-	if (error)
+	error = mxt_read_t9_resolution(data);
+	if (error) {
+		dev_err(&client->dev, "Failed to initialize T9 resolution\n");
 		goto err_free_object_table;
-	info->matrix_xsize = val;
-
-	error = mxt_read_reg(client, MXT_MATRIX_Y_SIZE, &val);
-	if (error)
-		goto err_free_object_table;
-	info->matrix_ysize = val;
+	}
 
 	dev_info(&client->dev,
-			"Family ID: %u Variant ID: %u Major.Minor.Build: %u.%u.%02X\n",
-			info->family_id, info->variant_id, info->version >> 4,
-			info->version & 0xf, info->build);
-
-	dev_info(&client->dev,
-			"Matrix X Size: %u Matrix Y Size: %u Object Num: %u\n",
-			info->matrix_xsize, info->matrix_ysize,
-			info->object_num);
+		 "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
+		 info->family_id, info->variant_id, info->version >> 4,
+		 info->version & 0xf, info->build, info->object_num);
 
 	return 0;
 
@@ -881,20 +1121,6 @@
 	return error;
 }
 
-static void mxt_calc_resolution(struct mxt_data *data)
-{
-	unsigned int max_x = data->pdata->x_size - 1;
-	unsigned int max_y = data->pdata->y_size - 1;
-
-	if (data->pdata->orient & MXT_XY_SWITCH) {
-		data->max_x = max_y;
-		data->max_y = max_x;
-	} else {
-		data->max_x = max_x;
-		data->max_y = max_y;
-	}
-}
-
 /* Firmware Version is returned as Major.Minor.Build */
 static ssize_t mxt_fw_version_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
@@ -921,11 +1147,11 @@
 {
 	int i;
 
-	if (object->instances > 0)
+	if (mxt_obj_instances(object) > 1)
 		count += scnprintf(buf + count, PAGE_SIZE - count,
 				   "Instance %u\n", instance);
 
-	for (i = 0; i < object->size + 1; i++)
+	for (i = 0; i < mxt_obj_size(object); i++)
 		count += scnprintf(buf + count, PAGE_SIZE - count,
 				"\t[%2u]: %02x (%d)\n", i, val[i], val[i]);
 	count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
@@ -958,8 +1184,8 @@
 		count += scnprintf(buf + count, PAGE_SIZE - count,
 				"T%u:\n", object->type);
 
-		for (j = 0; j < object->instances + 1; j++) {
-			u16 size = object->size + 1;
+		for (j = 0; j < mxt_obj_instances(object); j++) {
+			u16 size = mxt_obj_size(object);
 			u16 addr = object->start_address + j * size;
 
 			error = __mxt_read_reg(data->client, addr, size, obuf);
@@ -975,13 +1201,38 @@
 	return error ?: count;
 }
 
+static int mxt_check_firmware_format(struct device *dev,
+				     const struct firmware *fw)
+{
+	unsigned int pos = 0;
+	char c;
+
+	while (pos < fw->size) {
+		c = *(fw->data + pos);
+
+		if (c < '0' || (c > '9' && c < 'A') || c > 'F')
+			return 0;
+
+		pos++;
+	}
+
+	/*
+	 * To convert file try:
+	 * xxd -r -p mXTXXX__APP_VX-X-XX.enc > maxtouch.fw
+	 */
+	dev_err(dev, "Aborting: firmware file must be in binary format\n");
+
+	return -EINVAL;
+}
+
 static int mxt_load_fw(struct device *dev, const char *fn)
 {
 	struct mxt_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
 	const struct firmware *fw = NULL;
 	unsigned int frame_size;
 	unsigned int pos = 0;
+	unsigned int retry = 0;
+	unsigned int frame = 0;
 	int ret;
 
 	ret = request_firmware(&fw, fn, dev);
@@ -990,59 +1241,91 @@
 		return ret;
 	}
 
+	/* Check for incorrect enc file */
+	ret = mxt_check_firmware_format(dev, fw);
+	if (ret)
+		goto release_firmware;
+
+	ret = mxt_lookup_bootloader_address(data);
+	if (ret)
+		goto release_firmware;
+
 	/* Change to the bootloader mode */
-	mxt_write_object(data, MXT_GEN_COMMAND_T6,
-			MXT_COMMAND_RESET, MXT_BOOT_VALUE);
+	data->in_bootloader = true;
+
+	ret = mxt_t6_command(data, MXT_COMMAND_RESET, MXT_BOOT_VALUE, false);
+	if (ret)
+		goto release_firmware;
+
 	msleep(MXT_RESET_TIME);
 
-	/* Change to slave address of bootloader */
-	if (client->addr == MXT_APP_LOW)
-		client->addr = MXT_BOOT_LOW;
-	else
-		client->addr = MXT_BOOT_HIGH;
+	reinit_completion(&data->bl_completion);
 
-	ret = mxt_check_bootloader(client, MXT_WAITING_BOOTLOAD_CMD);
+	ret = mxt_check_bootloader(data, MXT_WAITING_BOOTLOAD_CMD);
 	if (ret)
-		goto out;
+		goto disable_irq;
 
 	/* Unlock bootloader */
-	mxt_unlock_bootloader(client);
+	mxt_unlock_bootloader(data);
 
 	while (pos < fw->size) {
-		ret = mxt_check_bootloader(client,
-						MXT_WAITING_FRAME_DATA);
+		ret = mxt_check_bootloader(data, MXT_WAITING_FRAME_DATA);
 		if (ret)
-			goto out;
+			goto disable_irq;
 
 		frame_size = ((*(fw->data + pos) << 8) | *(fw->data + pos + 1));
 
-		/* We should add 2 at frame size as the the firmware data is not
-		 * included the CRC bytes.
-		 */
+		/* Take account of CRC bytes */
 		frame_size += 2;
 
 		/* Write one frame to device */
-		mxt_fw_write(client, fw->data + pos, frame_size);
-
-		ret = mxt_check_bootloader(client,
-						MXT_FRAME_CRC_PASS);
+		ret = mxt_bootloader_write(data, fw->data + pos, frame_size);
 		if (ret)
-			goto out;
+			goto disable_irq;
 
-		pos += frame_size;
+		ret = mxt_check_bootloader(data, MXT_FRAME_CRC_PASS);
+		if (ret) {
+			retry++;
 
-		dev_dbg(dev, "Updated %d bytes / %zd bytes\n", pos, fw->size);
+			/* Back off by 20ms per retry */
+			msleep(retry * 20);
+
+			if (retry > 20) {
+				dev_err(dev, "Retry count exceeded\n");
+				goto disable_irq;
+			}
+		} else {
+			retry = 0;
+			pos += frame_size;
+			frame++;
+		}
+
+		if (frame % 50 == 0)
+			dev_dbg(dev, "Sent %d frames, %d/%zd bytes\n",
+				frame, pos, fw->size);
 	}
 
-out:
+	/* Wait for flash. */
+	ret = mxt_wait_for_completion(data, &data->bl_completion,
+				      MXT_FW_RESET_TIME);
+	if (ret)
+		goto disable_irq;
+
+	dev_dbg(dev, "Sent %d frames, %d bytes\n", frame, pos);
+
+	/*
+	 * Wait for device to reset. Some bootloader versions do not assert
+	 * the CHG line after bootloading has finished, so ignore potential
+	 * errors.
+	 */
+	mxt_wait_for_completion(data, &data->bl_completion, MXT_FW_RESET_TIME);
+
+	data->in_bootloader = false;
+
+disable_irq:
+	disable_irq(data->irq);
+release_firmware:
 	release_firmware(fw);
-
-	/* Change to slave address of application */
-	if (client->addr == MXT_BOOT_LOW)
-		client->addr = MXT_APP_LOW;
-	else
-		client->addr = MXT_APP_HIGH;
-
 	return ret;
 }
 
@@ -1053,29 +1336,24 @@
 	struct mxt_data *data = dev_get_drvdata(dev);
 	int error;
 
-	disable_irq(data->irq);
-
 	error = mxt_load_fw(dev, MXT_FW_NAME);
 	if (error) {
 		dev_err(dev, "The firmware update failed(%d)\n", error);
 		count = error;
 	} else {
-		dev_dbg(dev, "The firmware update succeeded\n");
-
-		/* Wait for reset */
-		msleep(MXT_FWRESET_TIME);
+		dev_info(dev, "The firmware update succeeded\n");
 
 		mxt_free_object_table(data);
 
 		mxt_initialize(data);
+
+		enable_irq(data->irq);
+
+		error = mxt_make_highchg(data);
+		if (error)
+			return error;
 	}
 
-	enable_irq(data->irq);
-
-	error = mxt_make_highchg(data);
-	if (error)
-		return error;
-
 	return count;
 }
 
@@ -1134,6 +1412,8 @@
 	struct input_dev *input_dev;
 	int error;
 	unsigned int num_mt_slots;
+	unsigned int mt_flags = 0;
+	int i;
 
 	if (!pdata)
 		return -EINVAL;
@@ -1146,10 +1426,7 @@
 		goto err_free_mem;
 	}
 
-	data->is_tp = pdata && pdata->is_tp;
-
-	input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" :
-					  "Atmel maXTouch Touchscreen";
+	input_dev->name = "Atmel maXTouch Touchscreen";
 	snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0",
 		 client->adapter->nr, client->addr);
 
@@ -1165,7 +1442,9 @@
 	data->pdata = pdata;
 	data->irq = client->irq;
 
-	mxt_calc_resolution(data);
+	init_completion(&data->bl_completion);
+	init_completion(&data->reset_completion);
+	init_completion(&data->crc_completion);
 
 	error = mxt_initialize(data);
 	if (error)
@@ -1175,20 +1454,15 @@
 	__set_bit(EV_KEY, input_dev->evbit);
 	__set_bit(BTN_TOUCH, input_dev->keybit);
 
-	if (data->is_tp) {
-		int i;
-		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+	if (pdata->t19_num_keys) {
 		__set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);
 
-		for (i = 0; i < MXT_NUM_GPIO; i++)
-			if (pdata->key_map[i] != KEY_RESERVED)
-				__set_bit(pdata->key_map[i], input_dev->keybit);
+		for (i = 0; i < pdata->t19_num_keys; i++)
+			if (pdata->t19_keymap[i] != KEY_RESERVED)
+				input_set_capability(input_dev, EV_KEY,
+						     pdata->t19_keymap[i]);
 
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-		__set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUADTAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit);
+		mt_flags |= INPUT_MT_POINTER;
 
 		input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM);
@@ -1196,6 +1470,8 @@
 				  MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_MT_POSITION_Y,
 				  MXT_PIXELS_PER_MM);
+
+		input_dev->name = "Atmel maXTouch Touchpad";
 	}
 
 	/* For single touch */
@@ -1208,7 +1484,7 @@
 
 	/* For multi touch */
 	num_mt_slots = data->T9_reportid_max - data->T9_reportid_min + 1;
-	error = input_mt_init_slots(input_dev, num_mt_slots, 0);
+	error = input_mt_init_slots(input_dev, num_mt_slots, mt_flags);
 	if (error)
 		goto err_free_object;
 	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR,
@@ -1236,12 +1512,18 @@
 		goto err_free_irq;
 
 	error = input_register_device(input_dev);
-	if (error)
+	if (error) {
+		dev_err(&client->dev, "Error %d registering input device\n",
+			error);
 		goto err_free_irq;
+	}
 
 	error = sysfs_create_group(&client->dev.kobj, &mxt_attr_group);
-	if (error)
+	if (error) {
+		dev_err(&client->dev, "Failure %d creating sysfs group\n",
+			error);
 		goto err_unregister_device;
+	}
 
 	return 0;
 
@@ -1294,11 +1576,7 @@
 	struct mxt_data *data = i2c_get_clientdata(client);
 	struct input_dev *input_dev = data->input_dev;
 
-	/* Soft reset */
-	mxt_write_object(data, MXT_GEN_COMMAND_T6,
-			MXT_COMMAND_RESET, 1);
-
-	msleep(MXT_RESET_TIME);
+	mxt_soft_reset(data);
 
 	mutex_lock(&input_dev->mutex);
 

diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index d3f9f6b..7f3c947 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c

@@ -679,7 +679,7 @@
 MODULE_DEVICE_TABLE(i2c, auo_pixcir_idtable);
 
 #ifdef CONFIG_OF
-static struct of_device_id auo_pixcir_ts_dt_idtable[] = {
+static const struct of_device_id auo_pixcir_ts_dt_idtable[] = {
 	{ .compatible = "auo,auo_pixcir_ts" },
 	{},
 };

diff --git a/drivers/input/touchscreen/da9034-ts.c b/drivers/input/touchscreen/da9034-ts.c
index 8ccf7bb..cf6f4b3 100644
--- a/drivers/input/touchscreen/da9034-ts.c
+++ b/drivers/input/touchscreen/da9034-ts.c

@@ -301,10 +301,11 @@
 	struct da9034_touch_pdata *pdata = dev_get_platdata(&pdev->dev);
 	struct da9034_touch *touch;
 	struct input_dev *input_dev;
-	int ret;
+	int error;
 
-	touch = kzalloc(sizeof(struct da9034_touch), GFP_KERNEL);
-	if (touch == NULL) {
+	touch = devm_kzalloc(&pdev->dev, sizeof(struct da9034_touch),
+			     GFP_KERNEL);
+	if (!touch) {
 		dev_err(&pdev->dev, "failed to allocate driver data\n");
 		return -ENOMEM;
 	}
@@ -315,18 +316,18 @@
 		touch->interval_ms	= pdata->interval_ms;
 		touch->x_inverted	= pdata->x_inverted;
 		touch->y_inverted	= pdata->y_inverted;
-	} else
+	} else {
 		/* fallback into default */
 		touch->interval_ms	= 10;
+	}
 
 	INIT_DELAYED_WORK(&touch->tsi_work, da9034_tsi_work);
 	touch->notifier.notifier_call = da9034_touch_notifier;
 
-	input_dev = input_allocate_device();
+	input_dev = devm_input_allocate_device(&pdev->dev);
 	if (!input_dev) {
 		dev_err(&pdev->dev, "failed to allocate input device\n");
-		ret = -ENOMEM;
-		goto err_free_touch;
+		return -ENOMEM;
 	}
 
 	input_dev->name		= pdev->name;
@@ -346,26 +347,9 @@
 	touch->input_dev = input_dev;
 	input_set_drvdata(input_dev, touch);
 
-	ret = input_register_device(input_dev);
-	if (ret)
-		goto err_free_input;
-
-	platform_set_drvdata(pdev, touch);
-	return 0;
-
-err_free_input:
-	input_free_device(input_dev);
-err_free_touch:
-	kfree(touch);
-	return ret;
-}
-
-static int da9034_touch_remove(struct platform_device *pdev)
-{
-	struct da9034_touch *touch = platform_get_drvdata(pdev);
-
-	input_unregister_device(touch->input_dev);
-	kfree(touch);
+	error = input_register_device(input_dev);
+	if (error)
+		return error;
 
 	return 0;
 }
@@ -376,7 +360,6 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= da9034_touch_probe,
-	.remove		= da9034_touch_remove,
 };
 module_platform_driver(da9034_touch_driver);
 

diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index f8815be..d4f3399 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c

@@ -271,7 +271,7 @@
 		wrbuf[0] = addr;
 		wrbuf[1] = value;
 
-		return edt_ft5x06_ts_readwrite(tsdata->client, 3,
+		return edt_ft5x06_ts_readwrite(tsdata->client, 2,
 					wrbuf, 0, NULL);
 
 	default:

diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c
index e6bcb13..c805784 100644
--- a/drivers/input/touchscreen/egalax_ts.c
+++ b/drivers/input/touchscreen/egalax_ts.c

@@ -262,7 +262,7 @@
 
 static SIMPLE_DEV_PM_OPS(egalax_ts_pm_ops, egalax_ts_suspend, egalax_ts_resume);
 
-static struct of_device_id egalax_ts_dt_ids[] = {
+static const struct of_device_id egalax_ts_dt_ids[] = {
 	{ .compatible = "eeti,egalax_ts" },
 	{ /* sentinel */ }
 };

diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c
index 4f6b156..c38ca4a 100644
--- a/drivers/input/touchscreen/intel-mid-touch.c
+++ b/drivers/input/touchscreen/intel-mid-touch.c

@@ -36,6 +36,7 @@
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <asm/intel_scu_ipc.h>
+#include <linux/device.h>
 
 /* PMIC Interrupt registers */
 #define PMIC_REG_ID1		0x00 /* PMIC ID1 register */
@@ -580,12 +581,17 @@
 		return -EINVAL;
 	}
 
-	tsdev = kzalloc(sizeof(struct mrstouch_dev), GFP_KERNEL);
-	input = input_allocate_device();
-	if (!tsdev || !input) {
+	tsdev = devm_kzalloc(&pdev->dev, sizeof(struct mrstouch_dev),
+			     GFP_KERNEL);
+	if (!tsdev) {
 		dev_err(&pdev->dev, "unable to allocate memory\n");
-		err = -ENOMEM;
-		goto err_free_mem;
+		return -ENOMEM;
+	}
+
+	input = devm_input_allocate_device(&pdev->dev);
+	if (!input) {
+		dev_err(&pdev->dev, "unable to allocate input device\n");
+		return -ENOMEM;
 	}
 
 	tsdev->dev = &pdev->dev;
@@ -598,7 +604,7 @@
 	err = mrstouch_adc_init(tsdev);
 	if (err) {
 		dev_err(&pdev->dev, "ADC initialization failed\n");
-		goto err_free_mem;
+		return err;
 	}
 
 	input->name = "mrst_touchscreen";
@@ -618,38 +624,20 @@
 	input_set_abs_params(tsdev->input, ABS_PRESSURE,
 			     MRST_PRESSURE_MIN, MRST_PRESSURE_MAX, 0, 0);
 
-	err = request_threaded_irq(tsdev->irq, NULL, mrstouch_pendet_irq,
-				   IRQF_ONESHOT, "mrstouch", tsdev);
+	err = devm_request_threaded_irq(&pdev->dev, tsdev->irq, NULL,
+					mrstouch_pendet_irq, IRQF_ONESHOT,
+					"mrstouch", tsdev);
 	if (err) {
 		dev_err(tsdev->dev, "unable to allocate irq\n");
-		goto err_free_mem;
+		return err;
 	}
 
 	err = input_register_device(tsdev->input);
 	if (err) {
 		dev_err(tsdev->dev, "unable to register input device\n");
-		goto err_free_irq;
+		return err;
 	}
 
-	platform_set_drvdata(pdev, tsdev);
-	return 0;
-
-err_free_irq:
-	free_irq(tsdev->irq, tsdev);
-err_free_mem:
-	input_free_device(input);
-	kfree(tsdev);
-	return err;
-}
-
-static int mrstouch_remove(struct platform_device *pdev)
-{
-	struct mrstouch_dev *tsdev = platform_get_drvdata(pdev);
-
-	free_irq(tsdev->irq, tsdev);
-	input_unregister_device(tsdev->input);
-	kfree(tsdev);
-
 	return 0;
 }
 
@@ -659,7 +647,6 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= mrstouch_probe,
-	.remove		= mrstouch_remove,
 };
 module_platform_driver(mrstouch_driver);
 

diff --git a/drivers/input/touchscreen/lpc32xx_ts.c b/drivers/input/touchscreen/lpc32xx_ts.c
index 2058253..bb47d34 100644
--- a/drivers/input/touchscreen/lpc32xx_ts.c
+++ b/drivers/input/touchscreen/lpc32xx_ts.c

@@ -384,7 +384,7 @@
 #endif
 
 #ifdef CONFIG_OF
-static struct of_device_id lpc32xx_tsc_of_match[] = {
+static const struct of_device_id lpc32xx_tsc_of_match[] = {
 	{ .compatible = "nxp,lpc3220-tsc", },
 	{ },
 };

diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
index 647e36f..00510a9 100644
--- a/drivers/input/touchscreen/mcs5000_ts.c
+++ b/drivers/input/touchscreen/mcs5000_ts.c

@@ -161,10 +161,9 @@
 	return IRQ_HANDLED;
 }
 
-static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data,
+				 const struct mcs_platform_data *platform_data)
 {
-	const struct mcs_platform_data *platform_data =
-		data->platform_data;
 	struct i2c_client *client = data->client;
 
 	/* Touch reset & sleep mode */
@@ -187,28 +186,32 @@
 }
 
 static int mcs5000_ts_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+			    const struct i2c_device_id *id)
 {
+	const struct mcs_platform_data *pdata;
 	struct mcs5000_ts_data *data;
 	struct input_dev *input_dev;
-	int ret;
+	int error;
 
-	if (!dev_get_platdata(&client->dev))
+	pdata = dev_get_platdata(&client->dev);
+	if (!pdata)
 		return -EINVAL;
 
-	data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!data || !input_dev) {
+	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
 		dev_err(&client->dev, "Failed to allocate memory\n");
-		ret = -ENOMEM;
-		goto err_free_mem;
+		return -ENOMEM;
 	}
 
 	data->client = client;
-	data->input_dev = input_dev;
-	data->platform_data = dev_get_platdata(&client->dev);
 
-	input_dev->name = "MELPAS MCS-5000 Touchscreen";
+	input_dev = devm_input_allocate_device(&client->dev);
+	if (!input_dev) {
+		dev_err(&client->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	input_dev->name = "MELFAS MCS-5000 Touchscreen";
 	input_dev->id.bustype = BUS_I2C;
 	input_dev->dev.parent = &client->dev;
 
@@ -219,44 +222,30 @@
 	input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
 
 	input_set_drvdata(input_dev, data);
+	data->input_dev = input_dev;
 
-	if (data->platform_data->cfg_pin)
-		data->platform_data->cfg_pin();
+	if (pdata->cfg_pin)
+		pdata->cfg_pin();
 
-	ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
-			IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
-
-	if (ret < 0) {
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, mcs5000_ts_interrupt,
+					  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+					  "mcs5000_ts", data);
+	if (error) {
 		dev_err(&client->dev, "Failed to register interrupt\n");
-		goto err_free_mem;
+		return error;
 	}
 
-	ret = input_register_device(data->input_dev);
-	if (ret < 0)
-		goto err_free_irq;
+	error = input_register_device(data->input_dev);
+	if (error) {
+		dev_err(&client->dev, "Failed to register input device\n");
+		return error;
+	}
 
-	mcs5000_ts_phys_init(data);
+	mcs5000_ts_phys_init(data, pdata);
 	i2c_set_clientdata(client, data);
 
 	return 0;
-
-err_free_irq:
-	free_irq(client->irq, data);
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(data);
-	return ret;
-}
-
-static int mcs5000_ts_remove(struct i2c_client *client)
-{
-	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
-
-	free_irq(client->irq, data);
-	input_unregister_device(data->input_dev);
-	kfree(data);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -274,14 +263,15 @@
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+	const struct mcs_platform_data *pdata = dev_get_platdata(dev);
 
-	mcs5000_ts_phys_init(data);
+	mcs5000_ts_phys_init(data, pdata);
 
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(mcs5000_ts_pm, mcs5000_ts_suspend, mcs5000_ts_resume);
-#endif
 
 static const struct i2c_device_id mcs5000_ts_id[] = {
 	{ "mcs5000_ts", 0 },
@@ -291,12 +281,9 @@
 
 static struct i2c_driver mcs5000_ts_driver = {
 	.probe		= mcs5000_ts_probe,
-	.remove		= mcs5000_ts_remove,
 	.driver = {
 		.name = "mcs5000_ts",
-#ifdef CONFIG_PM
 		.pm   = &mcs5000_ts_pm,
-#endif
 	},
 	.id_table	= mcs5000_ts_id,
 };

diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 8a598c0..372bbf7 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c

@@ -456,7 +456,7 @@
 	data->input_dev = input_dev;
 	data->pdata = pdata;
 
-	input_dev->name = "MELPAS MMS114 Touchscreen";
+	input_dev->name = "MELFAS MMS114 Touchscreen";
 	input_dev->id.bustype = BUS_I2C;
 	input_dev->dev.parent = &client->dev;
 	input_dev->open = mms114_input_open;
@@ -570,7 +570,7 @@
 MODULE_DEVICE_TABLE(i2c, mms114_id);
 
 #ifdef CONFIG_OF
-static struct of_device_id mms114_dt_match[] = {
+static const struct of_device_id mms114_dt_match[] = {
 	{ .compatible = "melfas,mms114" },
 	{ }
 };

diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c
new file mode 100644
index 0000000..f8f9b84
--- /dev/null
+++ b/drivers/input/touchscreen/of_touchscreen.c

@@ -0,0 +1,45 @@
+/*
+ *  Generic DT helper functions for touchscreen devices
+ *
+ *  Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/of.h>
+#include <linux/input.h>
+#include <linux/input/touchscreen.h>
+
+/**
+ * touchscreen_parse_of_params - parse common touchscreen DT properties
+ * @dev: device that should be parsed
+ *
+ * This function parses common DT properties for touchscreens and setups the
+ * input device accordingly. The function keeps previously setuped default
+ * values if no value is specified via DT.
+ */
+void touchscreen_parse_of_params(struct input_dev *dev)
+{
+	struct device_node *np = dev->dev.parent->of_node;
+	struct input_absinfo *absinfo;
+
+	input_alloc_absinfo(dev);
+	if (!dev->absinfo)
+		return;
+
+	absinfo = &dev->absinfo[ABS_X];
+	of_property_read_u32(np, "touchscreen-size-x", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-x", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_Y];
+	of_property_read_u32(np, "touchscreen-size-y", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-y", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_PRESSURE];
+	of_property_read_u32(np, "touchscreen-max-pressure", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-pressure", &absinfo->fuzz);
+}
+EXPORT_SYMBOL(touchscreen_parse_of_params);

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 02392d2..19c6c0f 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c

@@ -24,12 +24,13 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/pixcir_ts.h>
+#include <linux/gpio.h>
 
 struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
 	struct input_dev *input;
 	const struct pixcir_ts_platform_data *chip;
-	bool exiting;
+	bool running;
 };
 
 static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
@@ -87,11 +88,12 @@
 static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
+	const struct pixcir_ts_platform_data *pdata = tsdata->chip;
 
-	while (!tsdata->exiting) {
+	while (tsdata->running) {
 		pixcir_ts_poscheck(tsdata);
 
-		if (tsdata->chip->attb_read_val())
+		if (gpio_get_value(pdata->gpio_attb))
 			break;
 
 		msleep(20);
@@ -100,25 +102,221 @@
 	return IRQ_HANDLED;
 }
 
+static int pixcir_set_power_mode(struct pixcir_i2c_ts_data *ts,
+				 enum pixcir_power_mode mode)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_POWER_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_POWER_MODE_MASK;
+	ret |= mode;
+
+	/* Always AUTO_IDLE */
+	ret |= PIXCIR_POWER_ALLOW_IDLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_POWER_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Set the interrupt mode for the device i.e. ATTB line behaviour
+ *
+ * @polarity : 1 for active high, 0 for active low.
+ */
+static int pixcir_set_int_mode(struct pixcir_i2c_ts_data *ts,
+			       enum pixcir_int_mode mode, bool polarity)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_INT_MODE_MASK;
+	ret |= mode;
+
+	if (polarity)
+		ret |= PIXCIR_INT_POL_HIGH;
+	else
+		ret &= ~PIXCIR_INT_POL_HIGH;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Enable/disable interrupt generation
+ */
+static int pixcir_int_enable(struct pixcir_i2c_ts_data *ts, bool enable)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	if (enable)
+		ret |= PIXCIR_INT_ENABLE;
+	else
+		ret &= ~PIXCIR_INT_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pixcir_start(struct pixcir_i2c_ts_data *ts)
+{
+	struct device *dev = &ts->client->dev;
+	int error;
+
+	/* LEVEL_TOUCH interrupt with active low polarity */
+	error = pixcir_set_int_mode(ts, PIXCIR_INT_LEVEL_TOUCH, 0);
+	if (error) {
+		dev_err(dev, "Failed to set interrupt mode: %d\n", error);
+		return error;
+	}
+
+	ts->running = true;
+	mb();	/* Update status before IRQ can fire */
+
+	/* enable interrupt generation */
+	error = pixcir_int_enable(ts, true);
+	if (error) {
+		dev_err(dev, "Failed to enable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int pixcir_stop(struct pixcir_i2c_ts_data *ts)
+{
+	int error;
+
+	/* Disable interrupt generation */
+	error = pixcir_int_enable(ts, false);
+	if (error) {
+		dev_err(&ts->client->dev,
+			"Failed to disable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	/* Exit ISR if running, no more report parsing */
+	ts->running = false;
+	mb();	/* update status before we synchronize irq */
+
+	/* Wait till running ISR is complete */
+	synchronize_irq(ts->client->irq);
+
+	return 0;
+}
+
+static int pixcir_input_open(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	return pixcir_start(ts);
+}
+
+static void pixcir_input_close(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	pixcir_stop(ts);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int pixcir_i2c_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
+	struct input_dev *input = ts->input;
+	int ret = 0;
 
-	if (device_may_wakeup(&client->dev))
+	mutex_lock(&input->mutex);
+
+	if (device_may_wakeup(&client->dev)) {
+		if (!input->users) {
+			ret = pixcir_start(ts);
+			if (ret) {
+				dev_err(dev, "Failed to start\n");
+				goto unlock;
+			}
+		}
+
 		enable_irq_wake(client->irq);
+	} else if (input->users) {
+		ret = pixcir_stop(ts);
+	}
 
-	return 0;
+unlock:
+	mutex_unlock(&input->mutex);
+
+	return ret;
 }
 
 static int pixcir_i2c_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
+	struct input_dev *input = ts->input;
+	int ret = 0;
 
-	if (device_may_wakeup(&client->dev))
+	mutex_lock(&input->mutex);
+
+	if (device_may_wakeup(&client->dev)) {
 		disable_irq_wake(client->irq);
 
-	return 0;
+		if (!input->users) {
+			ret = pixcir_stop(ts);
+			if (ret) {
+				dev_err(dev, "Failed to stop\n");
+				goto unlock;
+			}
+		}
+	} else if (input->users) {
+		ret = pixcir_start(ts);
+	}
+
+unlock:
+	mutex_unlock(&input->mutex);
+
+	return ret;
 }
 #endif
 
@@ -130,6 +328,7 @@
 {
 	const struct pixcir_ts_platform_data *pdata =
 			dev_get_platdata(&client->dev);
+	struct device *dev = &client->dev;
 	struct pixcir_i2c_ts_data *tsdata;
 	struct input_dev *input;
 	int error;
@@ -139,12 +338,19 @@
 		return -EINVAL;
 	}
 
-	tsdata = kzalloc(sizeof(*tsdata), GFP_KERNEL);
-	input = input_allocate_device();
-	if (!tsdata || !input) {
-		dev_err(&client->dev, "Failed to allocate driver data!\n");
-		error = -ENOMEM;
-		goto err_free_mem;
+	if (!gpio_is_valid(pdata->gpio_attb)) {
+		dev_err(dev, "Invalid gpio_attb in pdata\n");
+		return -EINVAL;
+	}
+
+	tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL);
+	if (!tsdata)
+		return -ENOMEM;
+
+	input = devm_input_allocate_device(dev);
+	if (!input) {
+		dev_err(dev, "Failed to allocate input device\n");
+		return -ENOMEM;
 	}
 
 	tsdata->client = client;
@@ -153,6 +359,8 @@
 
 	input->name = client->name;
 	input->id.bustype = BUS_I2C;
+	input->open = pixcir_input_open;
+	input->close = pixcir_input_close;
 	input->dev.parent = &client->dev;
 
 	__set_bit(EV_KEY, input->evbit);
@@ -165,44 +373,47 @@
 
 	input_set_drvdata(input, tsdata);
 
-	error = request_threaded_irq(client->irq, NULL, pixcir_ts_isr,
-				     IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				     client->name, tsdata);
+	error = devm_gpio_request_one(dev, pdata->gpio_attb,
+				      GPIOF_DIR_IN, "pixcir_i2c_attb");
 	if (error) {
-		dev_err(&client->dev, "Unable to request touchscreen IRQ.\n");
-		goto err_free_mem;
+		dev_err(dev, "Failed to request ATTB gpio\n");
+		return error;
 	}
 
+	error = devm_request_threaded_irq(dev, client->irq, NULL, pixcir_ts_isr,
+					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					  client->name, tsdata);
+	if (error) {
+		dev_err(dev, "failed to request irq %d\n", client->irq);
+		return error;
+	}
+
+	/* Always be in IDLE mode to save power, device supports auto wake */
+	error = pixcir_set_power_mode(tsdata, PIXCIR_POWER_IDLE);
+	if (error) {
+		dev_err(dev, "Failed to set IDLE mode\n");
+		return error;
+	}
+
+	/* Stop device till opened */
+	error = pixcir_stop(tsdata);
+	if (error)
+		return error;
+
 	error = input_register_device(input);
 	if (error)
-		goto err_free_irq;
+		return error;
 
 	i2c_set_clientdata(client, tsdata);
 	device_init_wakeup(&client->dev, 1);
 
 	return 0;
-
-err_free_irq:
-	free_irq(client->irq, tsdata);
-err_free_mem:
-	input_free_device(input);
-	kfree(tsdata);
-	return error;
 }
 
 static int pixcir_i2c_ts_remove(struct i2c_client *client)
 {
-	struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client);
-
 	device_init_wakeup(&client->dev, 0);
 
-	tsdata->exiting = true;
-	mb();
-	free_irq(client->irq, tsdata);
-
-	input_unregister_device(tsdata->input);
-	kfree(tsdata);
-
 	return 0;
 }
 

diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c
new file mode 100644
index 0000000..2ba8260
--- /dev/null
+++ b/drivers/input/touchscreen/sun4i-ts.c

@@ -0,0 +1,339 @@
+/*
+ * Allwinner sunxi resistive touchscreen controller driver
+ *
+ * Copyright (C) 2013 - 2014 Hans de Goede <hdegoede@redhat.com>
+ *
+ * The hwmon parts are based on work by Corentin LABBE which is:
+ * Copyright (C) 2013 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * The sun4i-ts controller is capable of detecting a second touch, but when a
+ * second touch is present then the accuracy becomes so bad the reported touch
+ * location is not useable.
+ *
+ * The original android driver contains some complicated heuristics using the
+ * aprox. distance between the 2 touches to see if the user is making a pinch
+ * open / close movement, and then reports emulated multi-touch events around
+ * the last touch coordinate (as the dual-touch coordinates are worthless).
+ *
+ * These kinds of heuristics are just asking for trouble (and don't belong
+ * in the kernel). So this driver offers straight forward, reliable single
+ * touch functionality only.
+ */
+
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define TP_CTRL0		0x00
+#define TP_CTRL1		0x04
+#define TP_CTRL2		0x08
+#define TP_CTRL3		0x0c
+#define TP_INT_FIFOC		0x10
+#define TP_INT_FIFOS		0x14
+#define TP_TPR			0x18
+#define TP_CDAT			0x1c
+#define TEMP_DATA		0x20
+#define TP_DATA			0x24
+
+/* TP_CTRL0 bits */
+#define ADC_FIRST_DLY(x)	((x) << 24) /* 8 bits */
+#define ADC_FIRST_DLY_MODE(x)	((x) << 23)
+#define ADC_CLK_SEL(x)		((x) << 22)
+#define ADC_CLK_DIV(x)		((x) << 20) /* 3 bits */
+#define FS_DIV(x)		((x) << 16) /* 4 bits */
+#define T_ACQ(x)		((x) << 0) /* 16 bits */
+
+/* TP_CTRL1 bits */
+#define STYLUS_UP_DEBOUN(x)	((x) << 12) /* 8 bits */
+#define STYLUS_UP_DEBOUN_EN(x)	((x) << 9)
+#define TOUCH_PAN_CALI_EN(x)	((x) << 6)
+#define TP_DUAL_EN(x)		((x) << 5)
+#define TP_MODE_EN(x)		((x) << 4)
+#define TP_ADC_SELECT(x)	((x) << 3)
+#define ADC_CHAN_SELECT(x)	((x) << 0)  /* 3 bits */
+
+/* TP_CTRL2 bits */
+#define TP_SENSITIVE_ADJUST(x)	((x) << 28) /* 4 bits */
+#define TP_MODE_SELECT(x)	((x) << 26) /* 2 bits */
+#define PRE_MEA_EN(x)		((x) << 24)
+#define PRE_MEA_THRE_CNT(x)	((x) << 0) /* 24 bits */
+
+/* TP_CTRL3 bits */
+#define FILTER_EN(x)		((x) << 2)
+#define FILTER_TYPE(x)		((x) << 0)  /* 2 bits */
+
+/* TP_INT_FIFOC irq and fifo mask / control bits */
+#define TEMP_IRQ_EN(x)		((x) << 18)
+#define OVERRUN_IRQ_EN(x)	((x) << 17)
+#define DATA_IRQ_EN(x)		((x) << 16)
+#define TP_DATA_XY_CHANGE(x)	((x) << 13)
+#define FIFO_TRIG(x)		((x) << 8)  /* 5 bits */
+#define DATA_DRQ_EN(x)		((x) << 7)
+#define FIFO_FLUSH(x)		((x) << 4)
+#define TP_UP_IRQ_EN(x)		((x) << 1)
+#define TP_DOWN_IRQ_EN(x)	((x) << 0)
+
+/* TP_INT_FIFOS irq and fifo status bits */
+#define TEMP_DATA_PENDING	BIT(18)
+#define FIFO_OVERRUN_PENDING	BIT(17)
+#define FIFO_DATA_PENDING	BIT(16)
+#define TP_IDLE_FLG		BIT(2)
+#define TP_UP_PENDING		BIT(1)
+#define TP_DOWN_PENDING		BIT(0)
+
+/* TP_TPR bits */
+#define TEMP_ENABLE(x)		((x) << 16)
+#define TEMP_PERIOD(x)		((x) << 0)  /* t = x * 256 * 16 / clkin */
+
+struct sun4i_ts_data {
+	struct device *dev;
+	struct input_dev *input;
+	void __iomem *base;
+	unsigned int irq;
+	bool ignore_fifo_data;
+	int temp_data;
+};
+
+static void sun4i_ts_irq_handle_input(struct sun4i_ts_data *ts, u32 reg_val)
+{
+	u32 x, y;
+
+	if (reg_val & FIFO_DATA_PENDING) {
+		x = readl(ts->base + TP_DATA);
+		y = readl(ts->base + TP_DATA);
+		/* The 1st location reported after an up event is unreliable */
+		if (!ts->ignore_fifo_data) {
+			input_report_abs(ts->input, ABS_X, x);
+			input_report_abs(ts->input, ABS_Y, y);
+			/*
+			 * The hardware has a separate down status bit, but
+			 * that gets set before we get the first location,
+			 * resulting in reporting a click on the old location.
+			 */
+			input_report_key(ts->input, BTN_TOUCH, 1);
+			input_sync(ts->input);
+		} else {
+			ts->ignore_fifo_data = false;
+		}
+	}
+
+	if (reg_val & TP_UP_PENDING) {
+		ts->ignore_fifo_data = true;
+		input_report_key(ts->input, BTN_TOUCH, 0);
+		input_sync(ts->input);
+	}
+}
+
+static irqreturn_t sun4i_ts_irq(int irq, void *dev_id)
+{
+	struct sun4i_ts_data *ts = dev_id;
+	u32 reg_val;
+
+	reg_val  = readl(ts->base + TP_INT_FIFOS);
+
+	if (reg_val & TEMP_DATA_PENDING)
+		ts->temp_data = readl(ts->base + TEMP_DATA);
+
+	if (ts->input)
+		sun4i_ts_irq_handle_input(ts, reg_val);
+
+	writel(reg_val, ts->base + TP_INT_FIFOS);
+
+	return IRQ_HANDLED;
+}
+
+static int sun4i_ts_open(struct input_dev *dev)
+{
+	struct sun4i_ts_data *ts = input_get_drvdata(dev);
+
+	/* Flush, set trig level to 1, enable temp, data and up irqs */
+	writel(TEMP_IRQ_EN(1) | DATA_IRQ_EN(1) | FIFO_TRIG(1) | FIFO_FLUSH(1) |
+		TP_UP_IRQ_EN(1), ts->base + TP_INT_FIFOC);
+
+	return 0;
+}
+
+static void sun4i_ts_close(struct input_dev *dev)
+{
+	struct sun4i_ts_data *ts = input_get_drvdata(dev);
+
+	/* Deactivate all input IRQs */
+	writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC);
+}
+
+static ssize_t show_temp(struct device *dev, struct device_attribute *devattr,
+			 char *buf)
+{
+	struct sun4i_ts_data *ts = dev_get_drvdata(dev);
+
+	/* No temp_data until the first irq */
+	if (ts->temp_data == -1)
+		return -EAGAIN;
+
+	return sprintf(buf, "%d\n", (ts->temp_data - 1447) * 100);
+}
+
+static ssize_t show_temp_label(struct device *dev,
+			      struct device_attribute *devattr, char *buf)
+{
+	return sprintf(buf, "SoC temperature\n");
+}
+
+static DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL);
+static DEVICE_ATTR(temp1_label, S_IRUGO, show_temp_label, NULL);
+
+static struct attribute *sun4i_ts_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&dev_attr_temp1_label.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(sun4i_ts);
+
+static int sun4i_ts_probe(struct platform_device *pdev)
+{
+	struct sun4i_ts_data *ts;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device *hwmon;
+	int error;
+	bool ts_attached;
+
+	ts = devm_kzalloc(dev, sizeof(struct sun4i_ts_data), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->dev = dev;
+	ts->ignore_fifo_data = true;
+	ts->temp_data = -1;
+
+	ts_attached = of_property_read_bool(np, "allwinner,ts-attached");
+	if (ts_attached) {
+		ts->input = devm_input_allocate_device(dev);
+		if (!ts->input)
+			return -ENOMEM;
+
+		ts->input->name = pdev->name;
+		ts->input->phys = "sun4i_ts/input0";
+		ts->input->open = sun4i_ts_open;
+		ts->input->close = sun4i_ts_close;
+		ts->input->id.bustype = BUS_HOST;
+		ts->input->id.vendor = 0x0001;
+		ts->input->id.product = 0x0001;
+		ts->input->id.version = 0x0100;
+		ts->input->evbit[0] =  BIT(EV_SYN) | BIT(EV_KEY) | BIT(EV_ABS);
+		__set_bit(BTN_TOUCH, ts->input->keybit);
+		input_set_abs_params(ts->input, ABS_X, 0, 4095, 0, 0);
+		input_set_abs_params(ts->input, ABS_Y, 0, 4095, 0, 0);
+		input_set_drvdata(ts->input, ts);
+	}
+
+	ts->base = devm_ioremap_resource(dev,
+			      platform_get_resource(pdev, IORESOURCE_MEM, 0));
+	if (IS_ERR(ts->base))
+		return PTR_ERR(ts->base);
+
+	ts->irq = platform_get_irq(pdev, 0);
+	error = devm_request_irq(dev, ts->irq, sun4i_ts_irq, 0, "sun4i-ts", ts);
+	if (error)
+		return error;
+
+	/*
+	 * Select HOSC clk, clkin = clk / 6, adc samplefreq = clkin / 8192,
+	 * t_acq = clkin / (16 * 64)
+	 */
+	writel(ADC_CLK_SEL(0) | ADC_CLK_DIV(2) | FS_DIV(7) | T_ACQ(63),
+	       ts->base + TP_CTRL0);
+
+	/*
+	 * sensitive_adjust = 15 : max, which is not all that sensitive,
+	 * tp_mode = 0 : only x and y coordinates, as we don't use dual touch
+	 */
+	writel(TP_SENSITIVE_ADJUST(15) | TP_MODE_SELECT(0),
+	       ts->base + TP_CTRL2);
+
+	/* Enable median filter, type 1 : 5/3 */
+	writel(FILTER_EN(1) | FILTER_TYPE(1), ts->base + TP_CTRL3);
+
+	/* Enable temperature measurement, period 1953 (2 seconds) */
+	writel(TEMP_ENABLE(1) | TEMP_PERIOD(1953), ts->base + TP_TPR);
+
+	/*
+	 * Set stylus up debounce to aprox 10 ms, enable debounce, and
+	 * finally enable tp mode.
+	 */
+	writel(STYLUS_UP_DEBOUN(5) | STYLUS_UP_DEBOUN_EN(1) | TP_MODE_EN(1),
+	       ts->base + TP_CTRL1);
+
+	hwmon = devm_hwmon_device_register_with_groups(ts->dev, "sun4i_ts",
+						       ts, sun4i_ts_groups);
+	if (IS_ERR(hwmon))
+		return PTR_ERR(hwmon);
+
+	writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC);
+
+	if (ts_attached) {
+		error = input_register_device(ts->input);
+		if (error) {
+			writel(0, ts->base + TP_INT_FIFOC);
+			return error;
+		}
+	}
+
+	platform_set_drvdata(pdev, ts);
+	return 0;
+}
+
+static int sun4i_ts_remove(struct platform_device *pdev)
+{
+	struct sun4i_ts_data *ts = platform_get_drvdata(pdev);
+
+	/* Explicit unregister to avoid open/close changing the imask later */
+	if (ts->input)
+		input_unregister_device(ts->input);
+
+	/* Deactivate all IRQs */
+	writel(0, ts->base + TP_INT_FIFOC);
+
+	return 0;
+}
+
+static const struct of_device_id sun4i_ts_of_match[] = {
+	{ .compatible = "allwinner,sun4i-a10-ts", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sun4i_ts_of_match);
+
+static struct platform_driver sun4i_ts_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "sun4i-ts",
+		.of_match_table = of_match_ptr(sun4i_ts_of_match),
+	},
+	.probe	= sun4i_ts_probe,
+	.remove	= sun4i_ts_remove,
+};
+
+module_platform_driver(sun4i_ts_driver);
+
+MODULE_DESCRIPTION("Allwinner sun4i resistive touchscreen controller driver");
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 550adcb..52380b6 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c

@@ -25,11 +25,15 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/input.h>
+#include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/tsc2005.h>
+#include <linux/regulator/consumer.h>
 
 /*
  * The touchscreen interface operates as follows:
@@ -100,6 +104,11 @@
 					 TSC2005_CFR2_AVG_7)
 
 #define MAX_12BIT			0xfff
+#define TSC2005_DEF_X_FUZZ		4
+#define TSC2005_DEF_Y_FUZZ		8
+#define TSC2005_DEF_P_FUZZ		2
+#define TSC2005_DEF_RESISTOR		280
+
 #define TSC2005_SPI_MAX_SPEED_HZ	10000000
 #define TSC2005_PENUP_TIME_MS		40
 
@@ -143,6 +152,9 @@
 
 	bool			pen_down;
 
+	struct regulator	*vio;
+
+	int			reset_gpio;
 	void			(*set_reset)(bool enable);
 };
 
@@ -337,6 +349,14 @@
 	tsc2005_cmd(ts, TSC2005_CMD_STOP);
 }
 
+static void tsc2005_set_reset(struct tsc2005 *ts, bool enable)
+{
+	if (ts->reset_gpio >= 0)
+		gpio_set_value(ts->reset_gpio, enable);
+	else if (ts->set_reset)
+		ts->set_reset(enable);
+}
+
 /* must be called with ts->mutex held */
 static void __tsc2005_disable(struct tsc2005 *ts)
 {
@@ -355,7 +375,7 @@
 {
 	tsc2005_start_scan(ts);
 
-	if (ts->esd_timeout && ts->set_reset) {
+	if (ts->esd_timeout && (ts->set_reset || ts->reset_gpio)) {
 		ts->last_valid_interrupt = jiffies;
 		schedule_delayed_work(&ts->esd_work,
 				round_jiffies_relative(
@@ -414,9 +434,9 @@
 	}
 
 	/* hardware reset */
-	ts->set_reset(false);
+	tsc2005_set_reset(ts, false);
 	usleep_range(100, 500); /* only 10us required */
-	ts->set_reset(true);
+	tsc2005_set_reset(ts, true);
 
 	if (!success)
 		goto out;
@@ -459,7 +479,7 @@
 	umode_t mode = attr->mode;
 
 	if (attr == &dev_attr_selftest.attr) {
-		if (!ts->set_reset)
+		if (!ts->set_reset && !ts->reset_gpio)
 			mode = 0;
 	}
 
@@ -509,9 +529,9 @@
 
 	tsc2005_update_pen_state(ts, 0, 0, 0);
 
-	ts->set_reset(false);
+	tsc2005_set_reset(ts, false);
 	usleep_range(100, 500); /* only 10us required */
-	ts->set_reset(true);
+	tsc2005_set_reset(ts, true);
 
 	enable_irq(ts->spi->irq);
 	tsc2005_start_scan(ts);
@@ -572,29 +592,47 @@
 static int tsc2005_probe(struct spi_device *spi)
 {
 	const struct tsc2005_platform_data *pdata = dev_get_platdata(&spi->dev);
+	struct device_node *np = spi->dev.of_node;
+
 	struct tsc2005 *ts;
 	struct input_dev *input_dev;
-	unsigned int max_x, max_y, max_p;
-	unsigned int fudge_x, fudge_y, fudge_p;
+	unsigned int max_x = MAX_12BIT;
+	unsigned int max_y = MAX_12BIT;
+	unsigned int max_p = MAX_12BIT;
+	unsigned int fudge_x = TSC2005_DEF_X_FUZZ;
+	unsigned int fudge_y = TSC2005_DEF_Y_FUZZ;
+	unsigned int fudge_p = TSC2005_DEF_P_FUZZ;
+	unsigned int x_plate_ohm = TSC2005_DEF_RESISTOR;
+	unsigned int esd_timeout;
 	int error;
 
-	if (!pdata) {
-		dev_dbg(&spi->dev, "no platform data\n");
+	if (!np && !pdata) {
+		dev_err(&spi->dev, "no platform data\n");
 		return -ENODEV;
 	}
 
-	fudge_x	= pdata->ts_x_fudge	   ? : 4;
-	fudge_y	= pdata->ts_y_fudge	   ? : 8;
-	fudge_p	= pdata->ts_pressure_fudge ? : 2;
-	max_x	= pdata->ts_x_max	   ? : MAX_12BIT;
-	max_y	= pdata->ts_y_max	   ? : MAX_12BIT;
-	max_p	= pdata->ts_pressure_max   ? : MAX_12BIT;
-
 	if (spi->irq <= 0) {
-		dev_dbg(&spi->dev, "no irq\n");
+		dev_err(&spi->dev, "no irq\n");
 		return -ENODEV;
 	}
 
+	if (pdata) {
+		fudge_x	= pdata->ts_x_fudge;
+		fudge_y	= pdata->ts_y_fudge;
+		fudge_p	= pdata->ts_pressure_fudge;
+		max_x	= pdata->ts_x_max;
+		max_y	= pdata->ts_y_max;
+		max_p	= pdata->ts_pressure_max;
+		x_plate_ohm = pdata->ts_x_plate_ohm;
+		esd_timeout = pdata->esd_timeout_ms;
+	} else {
+		x_plate_ohm = TSC2005_DEF_RESISTOR;
+		of_property_read_u32(np, "ti,x-plate-ohms", &x_plate_ohm);
+		esd_timeout = 0;
+		of_property_read_u32(np, "ti,esd-recovery-timeout-ms",
+								&esd_timeout);
+	}
+
 	spi->mode = SPI_MODE_0;
 	spi->bits_per_word = 8;
 	if (!spi->max_speed_hz)
@@ -604,19 +642,48 @@
 	if (error)
 		return error;
 
-	ts = kzalloc(sizeof(*ts), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts || !input_dev) {
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	ts = devm_kzalloc(&spi->dev, sizeof(*ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&spi->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	ts->spi = spi;
 	ts->idev = input_dev;
 
-	ts->x_plate_ohm	= pdata->ts_x_plate_ohm	? : 280;
-	ts->esd_timeout	= pdata->esd_timeout_ms;
-	ts->set_reset	= pdata->set_reset;
+	ts->x_plate_ohm = x_plate_ohm;
+	ts->esd_timeout = esd_timeout;
+
+	if (np) {
+		ts->reset_gpio = of_get_named_gpio(np, "reset-gpios", 0);
+		if (ts->reset_gpio == -EPROBE_DEFER)
+			return ts->reset_gpio;
+		if (ts->reset_gpio < 0) {
+			dev_err(&spi->dev, "error acquiring reset gpio: %d\n",
+				ts->reset_gpio);
+			return ts->reset_gpio;
+		}
+
+		error = devm_gpio_request_one(&spi->dev, ts->reset_gpio, 0,
+					      "reset-gpios");
+		if (error) {
+			dev_err(&spi->dev, "error requesting reset gpio: %d\n",
+				error);
+			return error;
+		}
+
+		ts->vio = devm_regulator_get(&spi->dev, "vio");
+		if (IS_ERR(ts->vio)) {
+			error = PTR_ERR(ts->vio);
+			dev_err(&spi->dev, "vio regulator missing (%d)", error);
+			return error;
+		}
+	} else {
+		ts->reset_gpio = -1;
+		ts->set_reset = pdata->set_reset;
+	}
 
 	mutex_init(&ts->mutex);
 
@@ -641,6 +708,9 @@
 	input_set_abs_params(input_dev, ABS_Y, 0, max_y, fudge_y, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, 0, max_p, fudge_p, 0);
 
+	if (np)
+		touchscreen_parse_of_params(input_dev);
+
 	input_dev->open = tsc2005_open;
 	input_dev->close = tsc2005_close;
 
@@ -649,12 +719,20 @@
 	/* Ensure the touchscreen is off */
 	tsc2005_stop_scan(ts);
 
-	error = request_threaded_irq(spi->irq, NULL, tsc2005_irq_thread,
-				     IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-				     "tsc2005", ts);
+	error = devm_request_threaded_irq(&spi->dev, spi->irq, NULL,
+					  tsc2005_irq_thread,
+					  IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					  "tsc2005", ts);
 	if (error) {
 		dev_err(&spi->dev, "Failed to request irq, err: %d\n", error);
-		goto err_free_mem;
+		return error;
+	}
+
+	/* enable regulator for DT */
+	if (ts->vio) {
+		error = regulator_enable(ts->vio);
+		if (error)
+			return error;
 	}
 
 	spi_set_drvdata(spi, ts);
@@ -662,7 +740,7 @@
 	if (error) {
 		dev_err(&spi->dev,
 			"Failed to create sysfs attributes, err: %d\n", error);
-		goto err_clear_drvdata;
+		goto disable_regulator;
 	}
 
 	error = input_register_device(ts->idev);
@@ -677,11 +755,9 @@
 
 err_remove_sysfs:
 	sysfs_remove_group(&spi->dev.kobj, &tsc2005_attr_group);
-err_clear_drvdata:
-	free_irq(spi->irq, ts);
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(ts);
+disable_regulator:
+	if (ts->vio)
+		regulator_disable(ts->vio);
 	return error;
 }
 
@@ -689,11 +765,10 @@
 {
 	struct tsc2005 *ts = spi_get_drvdata(spi);
 
-	sysfs_remove_group(&ts->spi->dev.kobj, &tsc2005_attr_group);
+	sysfs_remove_group(&spi->dev.kobj, &tsc2005_attr_group);
 
-	free_irq(ts->spi->irq, ts);
-	input_unregister_device(ts->idev);
-	kfree(ts);
+	if (ts->vio)
+		regulator_disable(ts->vio);
 
 	return 0;
 }

diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c
index 01d30ce..feea85b 100644
--- a/drivers/input/touchscreen/zforce_ts.c
+++ b/drivers/input/touchscreen/zforce_ts.c

@@ -880,7 +880,7 @@
 MODULE_DEVICE_TABLE(i2c, zforce_idtable);
 
 #ifdef CONFIG_OF
-static struct of_device_id zforce_dt_idtable[] = {
+static const struct of_device_id zforce_dt_idtable[] = {
 	{ .compatible = "neonode,zforce" },
 	{},
 };

diff --git a/drivers/isdn/capi/Kconfig b/drivers/isdn/capi/Kconfig
index 9816c51..7641b30 100644
--- a/drivers/isdn/capi/Kconfig
+++ b/drivers/isdn/capi/Kconfig

@@ -1,11 +1,3 @@
-config ISDN_DRV_AVMB1_VERBOSE_REASON
-	bool "Verbose reason code reporting"
-	default y
-	help
-	  If you say Y here, the CAPI drivers will give verbose reasons for
-	  disconnecting. This will increase the size of the kernel by 7 KB. If
-	  unsure, say Y.
-
 config CAPI_TRACE
 	bool "CAPI trace support"
 	default y
@@ -17,7 +9,7 @@
 	  If unsure, say Y.
 
 config ISDN_CAPI_CAPI20
-	tristate "CAPI2.0 /dev/capi support"
+	tristate "CAPI2.0 /dev/capi20 support"
 	help
 	  This option will provide the CAPI 2.0 interface to userspace
 	  applications via /dev/capi20. Applications should use the
@@ -42,3 +34,11 @@
 	  the legacy isdn4linux link layer.  If you have a card which is
 	  supported by a CAPI driver, but still want to use old features like
 	  ippp interfaces or ttyI emulation, say Y/M here.
+
+config ISDN_CAPI_CAPIDRV_VERBOSE
+	bool "Verbose reason code reporting"
+	depends on ISDN_CAPI_CAPIDRV
+	help
+	  If you say Y here, the capidrv interface will give verbose reasons
+	  for disconnecting. This will increase the size of the kernel by 7 KB.
+	  If unsure, say N.

diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index ac6f72b..f9a87ed 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c

@@ -1271,7 +1271,7 @@
 		return -ENOMEM;
 	}
 	drv->driver_name = "capi_nc";
-	drv->name = "capi";
+	drv->name = "capi!";
 	drv->major = 0;
 	drv->minor_start = 0;
 	drv->type = TTY_DRIVER_TYPE_SERIAL;
@@ -1417,7 +1417,7 @@
 		return PTR_ERR(capi_class);
 	}
 
-	device_create(capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi");
+	device_create(capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20");
 
 	if (capinc_tty_init() < 0) {
 		device_destroy(capi_class, MKDEV(capi_major, 0));

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index cc9f192..fd6d28f 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c

@@ -763,6 +763,201 @@
 }
 
 /* ------------------------------------------------------------------- */
+static char *capi_info2str(u16 reason)
+{
+#ifndef CONFIG_ISDN_CAPI_CAPIDRV_VERBOSE
+	return "..";
+#else
+	switch (reason) {
+
+/*-- informative values (corresponding message was processed) -----*/
+	case 0x0001:
+		return "NCPI not supported by current protocol, NCPI ignored";
+	case 0x0002:
+		return "Flags not supported by current protocol, flags ignored";
+	case 0x0003:
+		return "Alert already sent by another application";
+
+/*-- error information concerning CAPI_REGISTER -----*/
+	case 0x1001:
+		return "Too many applications";
+	case 0x1002:
+		return "Logical block size too small, must be at least 128 Bytes";
+	case 0x1003:
+		return "Buffer exceeds 64 kByte";
+	case 0x1004:
+		return "Message buffer size too small, must be at least 1024 Bytes";
+	case 0x1005:
+		return "Max. number of logical connections not supported";
+	case 0x1006:
+		return "Reserved";
+	case 0x1007:
+		return "The message could not be accepted because of an internal busy condition";
+	case 0x1008:
+		return "OS resource error (no memory ?)";
+	case 0x1009:
+		return "CAPI not installed";
+	case 0x100A:
+		return "Controller does not support external equipment";
+	case 0x100B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning message exchange functions -----*/
+	case 0x1101:
+		return "Illegal application number";
+	case 0x1102:
+		return "Illegal command or subcommand or message length less than 12 bytes";
+	case 0x1103:
+		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
+	case 0x1104:
+		return "Queue is empty";
+	case 0x1105:
+		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
+	case 0x1106:
+		return "Unknown notification parameter";
+	case 0x1107:
+		return "The Message could not be accepted because of an internal busy condition";
+	case 0x1108:
+		return "OS Resource error (no memory ?)";
+	case 0x1109:
+		return "CAPI not installed";
+	case 0x110A:
+		return "Controller does not support external equipment";
+	case 0x110B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning resource / coding problems -----*/
+	case 0x2001:
+		return "Message not supported in current state";
+	case 0x2002:
+		return "Illegal Controller / PLCI / NCCI";
+	case 0x2003:
+		return "Out of PLCI";
+	case 0x2004:
+		return "Out of NCCI";
+	case 0x2005:
+		return "Out of LISTEN";
+	case 0x2006:
+		return "Out of FAX resources (protocol T.30)";
+	case 0x2007:
+		return "Illegal message parameter coding";
+
+/*-- error information concerning requested services  -----*/
+	case 0x3001:
+		return "B1 protocol not supported";
+	case 0x3002:
+		return "B2 protocol not supported";
+	case 0x3003:
+		return "B3 protocol not supported";
+	case 0x3004:
+		return "B1 protocol parameter not supported";
+	case 0x3005:
+		return "B2 protocol parameter not supported";
+	case 0x3006:
+		return "B3 protocol parameter not supported";
+	case 0x3007:
+		return "B protocol combination not supported";
+	case 0x3008:
+		return "NCPI not supported";
+	case 0x3009:
+		return "CIP Value unknown";
+	case 0x300A:
+		return "Flags not supported (reserved bits)";
+	case 0x300B:
+		return "Facility not supported";
+	case 0x300C:
+		return "Data length not supported by current protocol";
+	case 0x300D:
+		return "Reset procedure not supported by current protocol";
+
+/*-- informations about the clearing of a physical connection -----*/
+	case 0x3301:
+		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
+	case 0x3302:
+		return "Protocol error layer 2";
+	case 0x3303:
+		return "Protocol error layer 3";
+	case 0x3304:
+		return "Another application got that call";
+/*-- T.30 specific reasons -----*/
+	case 0x3311:
+		return "Connecting not successful (remote station is no FAX G3 machine)";
+	case 0x3312:
+		return "Connecting not successful (training error)";
+	case 0x3313:
+		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
+	case 0x3314:
+		return "Disconnected during transfer (remote abort)";
+	case 0x3315:
+		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
+	case 0x3316:
+		return "Disconnected during transfer (local tx data underrun)";
+	case 0x3317:
+		return "Disconnected during transfer (local rx data overflow)";
+	case 0x3318:
+		return "Disconnected during transfer (local abort)";
+	case 0x3319:
+		return "Illegal parameter coding (e.g. SFF coding error)";
+
+/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
+	case 0x3481: return "Unallocated (unassigned) number";
+	case 0x3482: return "No route to specified transit network";
+	case 0x3483: return "No route to destination";
+	case 0x3486: return "Channel unacceptable";
+	case 0x3487:
+		return "Call awarded and being delivered in an established channel";
+	case 0x3490: return "Normal call clearing";
+	case 0x3491: return "User busy";
+	case 0x3492: return "No user responding";
+	case 0x3493: return "No answer from user (user alerted)";
+	case 0x3495: return "Call rejected";
+	case 0x3496: return "Number changed";
+	case 0x349A: return "Non-selected user clearing";
+	case 0x349B: return "Destination out of order";
+	case 0x349C: return "Invalid number format";
+	case 0x349D: return "Facility rejected";
+	case 0x349E: return "Response to STATUS ENQUIRY";
+	case 0x349F: return "Normal, unspecified";
+	case 0x34A2: return "No circuit / channel available";
+	case 0x34A6: return "Network out of order";
+	case 0x34A9: return "Temporary failure";
+	case 0x34AA: return "Switching equipment congestion";
+	case 0x34AB: return "Access information discarded";
+	case 0x34AC: return "Requested circuit / channel not available";
+	case 0x34AF: return "Resources unavailable, unspecified";
+	case 0x34B1: return "Quality of service unavailable";
+	case 0x34B2: return "Requested facility not subscribed";
+	case 0x34B9: return "Bearer capability not authorized";
+	case 0x34BA: return "Bearer capability not presently available";
+	case 0x34BF: return "Service or option not available, unspecified";
+	case 0x34C1: return "Bearer capability not implemented";
+	case 0x34C2: return "Channel type not implemented";
+	case 0x34C5: return "Requested facility not implemented";
+	case 0x34C6: return "Only restricted digital information bearer capability is available";
+	case 0x34CF: return "Service or option not implemented, unspecified";
+	case 0x34D1: return "Invalid call reference value";
+	case 0x34D2: return "Identified channel does not exist";
+	case 0x34D3: return "A suspended call exists, but this call identity does not";
+	case 0x34D4: return "Call identity in use";
+	case 0x34D5: return "No call suspended";
+	case 0x34D6: return "Call having the requested call identity has been cleared";
+	case 0x34D8: return "Incompatible destination";
+	case 0x34DB: return "Invalid transit network selection";
+	case 0x34DF: return "Invalid message, unspecified";
+	case 0x34E0: return "Mandatory information element is missing";
+	case 0x34E1: return "Message type non-existent or not implemented";
+	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
+	case 0x34E3: return "Information element non-existent or not implemented";
+	case 0x34E4: return "Invalid information element contents";
+	case 0x34E5: return "Message not compatible with call state";
+	case 0x34E6: return "Recovery on timer expiry";
+	case 0x34EF: return "Protocol error, unspecified";
+	case 0x34FF: return "Interworking, unspecified";
+
+	default: return "No additional information";
+	}
+#endif
+}
 
 static void handle_controller(_cmsg *cmsg)
 {

diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index d26f170..6e797e5 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c

@@ -22,205 +22,6 @@
 
 /* from CAPI2.0 DDK AVM Berlin GmbH */
 
-#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON
-char *capi_info2str(u16 reason)
-{
-	return "..";
-}
-#else
-char *capi_info2str(u16 reason)
-{
-	switch (reason) {
-
-/*-- informative values (corresponding message was processed) -----*/
-	case 0x0001:
-		return "NCPI not supported by current protocol, NCPI ignored";
-	case 0x0002:
-		return "Flags not supported by current protocol, flags ignored";
-	case 0x0003:
-		return "Alert already sent by another application";
-
-/*-- error information concerning CAPI_REGISTER -----*/
-	case 0x1001:
-		return "Too many applications";
-	case 0x1002:
-		return "Logical block size too small, must be at least 128 Bytes";
-	case 0x1003:
-		return "Buffer exceeds 64 kByte";
-	case 0x1004:
-		return "Message buffer size too small, must be at least 1024 Bytes";
-	case 0x1005:
-		return "Max. number of logical connections not supported";
-	case 0x1006:
-		return "Reserved";
-	case 0x1007:
-		return "The message could not be accepted because of an internal busy condition";
-	case 0x1008:
-		return "OS resource error (no memory ?)";
-	case 0x1009:
-		return "CAPI not installed";
-	case 0x100A:
-		return "Controller does not support external equipment";
-	case 0x100B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning message exchange functions -----*/
-	case 0x1101:
-		return "Illegal application number";
-	case 0x1102:
-		return "Illegal command or subcommand or message length less than 12 bytes";
-	case 0x1103:
-		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
-	case 0x1104:
-		return "Queue is empty";
-	case 0x1105:
-		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
-	case 0x1106:
-		return "Unknown notification parameter";
-	case 0x1107:
-		return "The Message could not be accepted because of an internal busy condition";
-	case 0x1108:
-		return "OS Resource error (no memory ?)";
-	case 0x1109:
-		return "CAPI not installed";
-	case 0x110A:
-		return "Controller does not support external equipment";
-	case 0x110B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning resource / coding problems -----*/
-	case 0x2001:
-		return "Message not supported in current state";
-	case 0x2002:
-		return "Illegal Controller / PLCI / NCCI";
-	case 0x2003:
-		return "Out of PLCI";
-	case 0x2004:
-		return "Out of NCCI";
-	case 0x2005:
-		return "Out of LISTEN";
-	case 0x2006:
-		return "Out of FAX resources (protocol T.30)";
-	case 0x2007:
-		return "Illegal message parameter coding";
-
-/*-- error information concerning requested services  -----*/
-	case 0x3001:
-		return "B1 protocol not supported";
-	case 0x3002:
-		return "B2 protocol not supported";
-	case 0x3003:
-		return "B3 protocol not supported";
-	case 0x3004:
-		return "B1 protocol parameter not supported";
-	case 0x3005:
-		return "B2 protocol parameter not supported";
-	case 0x3006:
-		return "B3 protocol parameter not supported";
-	case 0x3007:
-		return "B protocol combination not supported";
-	case 0x3008:
-		return "NCPI not supported";
-	case 0x3009:
-		return "CIP Value unknown";
-	case 0x300A:
-		return "Flags not supported (reserved bits)";
-	case 0x300B:
-		return "Facility not supported";
-	case 0x300C:
-		return "Data length not supported by current protocol";
-	case 0x300D:
-		return "Reset procedure not supported by current protocol";
-
-/*-- informations about the clearing of a physical connection -----*/
-	case 0x3301:
-		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
-	case 0x3302:
-		return "Protocol error layer 2";
-	case 0x3303:
-		return "Protocol error layer 3";
-	case 0x3304:
-		return "Another application got that call";
-/*-- T.30 specific reasons -----*/
-	case 0x3311:
-		return "Connecting not successful (remote station is no FAX G3 machine)";
-	case 0x3312:
-		return "Connecting not successful (training error)";
-	case 0x3313:
-		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
-	case 0x3314:
-		return "Disconnected during transfer (remote abort)";
-	case 0x3315:
-		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
-	case 0x3316:
-		return "Disconnected during transfer (local tx data underrun)";
-	case 0x3317:
-		return "Disconnected during transfer (local rx data overflow)";
-	case 0x3318:
-		return "Disconnected during transfer (local abort)";
-	case 0x3319:
-		return "Illegal parameter coding (e.g. SFF coding error)";
-
-/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
-	case 0x3481: return "Unallocated (unassigned) number";
-	case 0x3482: return "No route to specified transit network";
-	case 0x3483: return "No route to destination";
-	case 0x3486: return "Channel unacceptable";
-	case 0x3487:
-		return "Call awarded and being delivered in an established channel";
-	case 0x3490: return "Normal call clearing";
-	case 0x3491: return "User busy";
-	case 0x3492: return "No user responding";
-	case 0x3493: return "No answer from user (user alerted)";
-	case 0x3495: return "Call rejected";
-	case 0x3496: return "Number changed";
-	case 0x349A: return "Non-selected user clearing";
-	case 0x349B: return "Destination out of order";
-	case 0x349C: return "Invalid number format";
-	case 0x349D: return "Facility rejected";
-	case 0x349E: return "Response to STATUS ENQUIRY";
-	case 0x349F: return "Normal, unspecified";
-	case 0x34A2: return "No circuit / channel available";
-	case 0x34A6: return "Network out of order";
-	case 0x34A9: return "Temporary failure";
-	case 0x34AA: return "Switching equipment congestion";
-	case 0x34AB: return "Access information discarded";
-	case 0x34AC: return "Requested circuit / channel not available";
-	case 0x34AF: return "Resources unavailable, unspecified";
-	case 0x34B1: return "Quality of service unavailable";
-	case 0x34B2: return "Requested facility not subscribed";
-	case 0x34B9: return "Bearer capability not authorized";
-	case 0x34BA: return "Bearer capability not presently available";
-	case 0x34BF: return "Service or option not available, unspecified";
-	case 0x34C1: return "Bearer capability not implemented";
-	case 0x34C2: return "Channel type not implemented";
-	case 0x34C5: return "Requested facility not implemented";
-	case 0x34C6: return "Only restricted digital information bearer capability is available";
-	case 0x34CF: return "Service or option not implemented, unspecified";
-	case 0x34D1: return "Invalid call reference value";
-	case 0x34D2: return "Identified channel does not exist";
-	case 0x34D3: return "A suspended call exists, but this call identity does not";
-	case 0x34D4: return "Call identity in use";
-	case 0x34D5: return "No call suspended";
-	case 0x34D6: return "Call having the requested call identity has been cleared";
-	case 0x34D8: return "Incompatible destination";
-	case 0x34DB: return "Invalid transit network selection";
-	case 0x34DF: return "Invalid message, unspecified";
-	case 0x34E0: return "Mandatory information element is missing";
-	case 0x34E1: return "Message type non-existent or not implemented";
-	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
-	case 0x34E3: return "Information element non-existent or not implemented";
-	case 0x34E4: return "Invalid information element contents";
-	case 0x34E5: return "Message not compatible with call state";
-	case 0x34E6: return "Recovery on timer expiry";
-	case 0x34EF: return "Protocol error, unspecified";
-	case 0x34FF: return "Interworking, unspecified";
-
-	default: return "No additional information";
-	}
-}
-#endif
-
 typedef struct {
 	int typ;
 	size_t off;
@@ -1073,4 +874,3 @@
 EXPORT_SYMBOL(capi_cmd2str);
 EXPORT_SYMBOL(capi_cmsg2str);
 EXPORT_SYMBOL(capi_message2str);
-EXPORT_SYMBOL(capi_info2str);

diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c
index 414dbf6..fc9f9d0 100644
--- a/drivers/isdn/hisax/hfc4s8s_l1.c
+++ b/drivers/isdn/hisax/hfc4s8s_l1.c

@@ -197,25 +197,6 @@
 
 
 
-/***************************/
-/* inline function defines */
-/***************************/
-#ifdef HISAX_HFC4S8S_PCIMEM	/* inline functions memory mapped */
-
-/* memory write and dummy IO read to avoid PCI byte merge problems */
-#define Write_hfc8(a, b, c) {(*((volatile u_char *)(a->membase + b)) = c); inb(a->iobase + 4);}
-/* memory write without dummy IO access for fifo data access */
-#define fWrite_hfc8(a, b, c) (*((volatile u_char *)(a->membase + b)) = c)
-#define Read_hfc8(a, b) (*((volatile u_char *)(a->membase + b)))
-#define Write_hfc16(a, b, c) (*((volatile unsigned short *)(a->membase + b)) = c)
-#define Read_hfc16(a, b) (*((volatile unsigned short *)(a->membase + b)))
-#define Write_hfc32(a, b, c) (*((volatile unsigned long *)(a->membase + b)) = c)
-#define Read_hfc32(a, b) (*((volatile unsigned long *)(a->membase + b)))
-#define wait_busy(a) {while ((Read_hfc8(a, R_STATUS) & M_BUSY));}
-#define PCI_ENA_MEMIO	0x03
-
-#else
-
 /* inline functions io mapped */
 static inline void
 SetRegAddr(hfc4s8s_hw *a, u_char b)
@@ -306,8 +287,6 @@
 
 #define PCI_ENA_REGIO	0x01
 
-#endif				/* HISAX_HFC4S8S_PCIMEM */
-
 /******************************************************/
 /* function to read critical counter registers that   */
 /* may be updated by the chip during read             */
@@ -724,26 +703,15 @@
 				return;
 			} else {
 				/* read errornous D frame */
-
-#ifndef HISAX_HFC4S8S_PCIMEM
 				SetRegAddr(l1p->hw, A_FIFO_DATA0);
-#endif
 
 				while (z1 >= 4) {
-#ifdef HISAX_HFC4S8S_PCIMEM
-					Read_hfc32(l1p->hw, A_FIFO_DATA0);
-#else
 					fRead_hfc32(l1p->hw);
-#endif
 					z1 -= 4;
 				}
 
 				while (z1--)
-#ifdef HISAX_HFC4S8S_PCIMEM
-					Read_hfc8(l1p->hw, A_FIFO_DATA0);
-#else
-				fRead_hfc8(l1p->hw);
-#endif
+					fRead_hfc8(l1p->hw);
 
 				Write_hfc8(l1p->hw, A_INC_RES_FIFO, 1);
 				wait_busy(l1p->hw);
@@ -753,27 +721,16 @@
 
 		cp = skb->data;
 
-#ifndef HISAX_HFC4S8S_PCIMEM
 		SetRegAddr(l1p->hw, A_FIFO_DATA0);
-#endif
 
 		while (z1 >= 4) {
-#ifdef HISAX_HFC4S8S_PCIMEM
-			*((unsigned long *) cp) =
-				Read_hfc32(l1p->hw, A_FIFO_DATA0);
-#else
 			*((unsigned long *) cp) = fRead_hfc32(l1p->hw);
-#endif
 			cp += 4;
 			z1 -= 4;
 		}
 
 		while (z1--)
-#ifdef HISAX_HFC4S8S_PCIMEM
-			*cp++ = Read_hfc8(l1p->hw, A_FIFO_DATA0);
-#else
-		*cp++ = fRead_hfc8(l1p->hw);
-#endif
+			*cp++ = fRead_hfc8(l1p->hw);
 
 		Write_hfc8(l1p->hw, A_INC_RES_FIFO, 1);	/* increment f counter */
 		wait_busy(l1p->hw);
@@ -859,28 +816,17 @@
 			wait_busy(l1->hw);
 			return;
 		}
-#ifndef HISAX_HFC4S8S_PCIMEM
 		SetRegAddr(l1->hw, A_FIFO_DATA0);
-#endif
 
 		while (z1 >= 4) {
-#ifdef HISAX_HFC4S8S_PCIMEM
-			*((unsigned long *) bch->rx_ptr) =
-				Read_hfc32(l1->hw, A_FIFO_DATA0);
-#else
 			*((unsigned long *) bch->rx_ptr) =
 				fRead_hfc32(l1->hw);
-#endif
 			bch->rx_ptr += 4;
 			z1 -= 4;
 		}
 
 		while (z1--)
-#ifdef HISAX_HFC4S8S_PCIMEM
-			*(bch->rx_ptr++) = Read_hfc8(l1->hw, A_FIFO_DATA0);
-#else
-		*(bch->rx_ptr++) = fRead_hfc8(l1->hw);
-#endif
+			*(bch->rx_ptr++) = fRead_hfc8(l1->hw);
 
 		if (hdlc_complete) {
 			/* increment f counter */
@@ -940,29 +886,17 @@
 	if ((skb = skb_dequeue(&l1p->d_tx_queue))) {
 		cp = skb->data;
 		cnt = skb->len;
-#ifndef HISAX_HFC4S8S_PCIMEM
 		SetRegAddr(l1p->hw, A_FIFO_DATA0);
-#endif
 
 		while (cnt >= 4) {
-#ifdef HISAX_HFC4S8S_PCIMEM
-			fWrite_hfc32(l1p->hw, A_FIFO_DATA0,
-				     *(unsigned long *) cp);
-#else
 			SetRegAddr(l1p->hw, A_FIFO_DATA0);
 			fWrite_hfc32(l1p->hw, *(unsigned long *) cp);
-#endif
 			cp += 4;
 			cnt -= 4;
 		}
 
-#ifdef HISAX_HFC4S8S_PCIMEM
-		while (cnt--)
-			fWrite_hfc8(l1p->hw, A_FIFO_DATA0, *cp++);
-#else
 		while (cnt--)
 			fWrite_hfc8(l1p->hw, *cp++);
-#endif
 
 		l1p->tx_cnt = skb->truesize;
 		Write_hfc8(l1p->hw, A_INC_RES_FIFO, 1);	/* increment f counter */
@@ -1037,26 +971,15 @@
 		cp = skb->data + bch->tx_cnt;
 		bch->tx_cnt += cnt;
 
-#ifndef HISAX_HFC4S8S_PCIMEM
 		SetRegAddr(l1->hw, A_FIFO_DATA0);
-#endif
 		while (cnt >= 4) {
-#ifdef HISAX_HFC4S8S_PCIMEM
-			fWrite_hfc32(l1->hw, A_FIFO_DATA0,
-				     *(unsigned long *) cp);
-#else
 			fWrite_hfc32(l1->hw, *(unsigned long *) cp);
-#endif
 			cp += 4;
 			cnt -= 4;
 		}
 
 		while (cnt--)
-#ifdef HISAX_HFC4S8S_PCIMEM
-			fWrite_hfc8(l1->hw, A_FIFO_DATA0, *cp++);
-#else
-		fWrite_hfc8(l1->hw, *cp++);
-#endif
+			fWrite_hfc8(l1->hw, *cp++);
 
 		if (bch->tx_cnt >= skb->len) {
 			if (bch->mode == L1_MODE_HDLC) {
@@ -1281,10 +1204,8 @@
 	if (!hw || !(hw->mr.r_irq_ctrl & M_GLOB_IRQ_EN))
 		return IRQ_NONE;
 
-#ifndef	HISAX_HFC4S8S_PCIMEM
 	/* read current selected regsister */
 	old_ioreg = GetRegAddr(hw);
-#endif
 
 	/* Layer 1 State change */
 	hw->mr.r_irq_statech |=
@@ -1292,9 +1213,7 @@
 	if (!
 	    (b = (Read_hfc8(hw, R_STATUS) & (M_MISC_IRQSTA | M_FR_IRQSTA)))
 	    && !hw->mr.r_irq_statech) {
-#ifndef	HISAX_HFC4S8S_PCIMEM
 		SetRegAddr(hw, old_ioreg);
-#endif
 		return IRQ_NONE;
 	}
 
@@ -1322,9 +1241,7 @@
 	/* queue the request to allow other cards to interrupt */
 	schedule_work(&hw->tqueue);
 
-#ifndef	HISAX_HFC4S8S_PCIMEM
 	SetRegAddr(hw, old_ioreg);
-#endif
 	return IRQ_HANDLED;
 }				/* hfc4s8s_interrupt */
 
@@ -1471,13 +1388,8 @@
 release_pci_ports(hfc4s8s_hw *hw)
 {
 	pci_write_config_word(hw->pdev, PCI_COMMAND, 0);
-#ifdef HISAX_HFC4S8S_PCIMEM
-	if (hw->membase)
-		iounmap((void *) hw->membase);
-#else
 	if (hw->iobase)
 		release_region(hw->iobase, 8);
-#endif
 }
 
 /*****************************************/
@@ -1486,11 +1398,7 @@
 static void
 enable_pci_ports(hfc4s8s_hw *hw)
 {
-#ifdef HISAX_HFC4S8S_PCIMEM
-	pci_write_config_word(hw->pdev, PCI_COMMAND, PCI_ENA_MEMIO);
-#else
 	pci_write_config_word(hw->pdev, PCI_COMMAND, PCI_ENA_REGIO);
-#endif
 }
 
 /*************************************/
@@ -1561,15 +1469,9 @@
 		       hw->irq);
 		goto out;
 	}
-#ifdef HISAX_HFC4S8S_PCIMEM
-	printk(KERN_INFO
-	       "HFC-4S/8S: found PCI card at membase 0x%p, irq %d\n",
-	       hw->hw_membase, hw->irq);
-#else
 	printk(KERN_INFO
 	       "HFC-4S/8S: found PCI card at iobase 0x%x, irq %d\n",
 	       hw->iobase, hw->irq);
-#endif
 
 	hfc_hardware_enable(hw, 1, 0);
 
@@ -1614,17 +1516,12 @@
 	hw->irq = pdev->irq;
 	hw->iobase = pci_resource_start(pdev, 0);
 
-#ifdef HISAX_HFC4S8S_PCIMEM
-	hw->hw_membase = (u_char *) pci_resource_start(pdev, 1);
-	hw->membase = ioremap((ulong) hw->hw_membase, 256);
-#else
 	if (!request_region(hw->iobase, 8, hw->card_name)) {
 		printk(KERN_INFO
 		       "HFC-4S/8S: failed to request address space at 0x%04x\n",
 		       hw->iobase);
 		goto out;
 	}
-#endif
 
 	pci_set_drvdata(pdev, hw);
 	err = setup_instance(hw);

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index a5da511..61ac632 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c

@@ -634,7 +634,7 @@
 #ifdef CONFIG_IPPP_FILTER
 	case PPPIOCSPASS:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 
@@ -653,7 +653,7 @@
 	}
 	case PPPIOCSACTIVE:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 

diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 2c0d2c2..9f454d7 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c

@@ -287,11 +287,9 @@
 	p = frame;
 
 	/* restart timer */
-	if ((int)(hc->keep_tl.expires-jiffies) < 5 * HZ) {
-		del_timer(&hc->keep_tl);
-		hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ;
-		add_timer(&hc->keep_tl);
-	} else
+	if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ))
+		mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ);
+	else
 		hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ;
 
 	if (debug & DEBUG_L1OIP_MSG)
@@ -621,11 +619,9 @@
 		goto multiframe;
 
 	/* restart timer */
-	if ((int)(hc->timeout_tl.expires-jiffies) < 5 * HZ || !hc->timeout_on) {
+	if (time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || !hc->timeout_on) {
 		hc->timeout_on = 1;
-		del_timer(&hc->timeout_tl);
-		hc->timeout_tl.expires = jiffies + L1OIP_TIMEOUT * HZ;
-		add_timer(&hc->timeout_tl);
+		mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ);
 	} else /* only adjust timer */
 		hc->timeout_tl.expires = jiffies + L1OIP_TIMEOUT * HZ;
 

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 089841c..a1b044e 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig

@@ -300,16 +300,6 @@
 	  LED driver chip accessed via the I2C bus. Supported
 	  devices include PCA9633 and PCA9634
 
-config LEDS_PCA9685
-	tristate "LED support for PCA9685 I2C chip"
-	depends on LEDS_CLASS
-	depends on I2C
-	help
-	  This option enables support for LEDs connected to the PCA9685
-	  LED driver chip accessed via the I2C bus.
-	  The PCA9685 offers 12-bit PWM (4095 levels of brightness) on
-	  16 individual channels.
-
 config LEDS_WM831X_STATUS
 	tristate "LED support for status LEDs on WM831x PMICs"
 	depends on LEDS_CLASS

diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 8b4c956..79c5155 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile

@@ -36,7 +36,6 @@
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_PCA963X)		+= leds-pca963x.o
-obj-$(CONFIG_LEDS_PCA9685)		+= leds-pca9685.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_DA9052)		+= leds-da9052.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o

diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c
index e5c5738..c36acaf 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/leds/dell-led.c

@@ -15,12 +15,15 @@
 #include <linux/leds.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/dell-led.h>
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
 MODULE_LICENSE("GPL");
 
 #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396"
+#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492"
 MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 
 /* Error Result Codes: */
@@ -39,6 +42,149 @@
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
+struct app_wmi_args {
+	u16 class;
+	u16 selector;
+	u32 arg1;
+	u32 arg2;
+	u32 arg3;
+	u32 arg4;
+	u32 res1;
+	u32 res2;
+	u32 res3;
+	u32 res4;
+	char dummy[92];
+};
+
+#define GLOBAL_MIC_MUTE_ENABLE	0x364
+#define GLOBAL_MIC_MUTE_DISABLE	0x365
+
+struct dell_bios_data_token {
+	u16 tokenid;
+	u16 location;
+	u16 value;
+};
+
+struct __attribute__ ((__packed__)) dell_bios_calling_interface {
+	struct	dmi_header header;
+	u16	cmd_io_addr;
+	u8	cmd_io_code;
+	u32	supported_cmds;
+	struct	dell_bios_data_token damap[];
+};
+
+static struct dell_bios_data_token dell_mic_tokens[2];
+
+static int dell_wmi_perform_query(struct app_wmi_args *args)
+{
+	struct app_wmi_args *bios_return;
+	union acpi_object *obj;
+	struct acpi_buffer input;
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	u32 rc = -EINVAL;
+
+	input.length = 128;
+	input.pointer = args;
+
+	status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output);
+	if (!ACPI_SUCCESS(status))
+		goto err_out0;
+
+	obj = output.pointer;
+	if (!obj)
+		goto err_out0;
+
+	if (obj->type != ACPI_TYPE_BUFFER)
+		goto err_out1;
+
+	bios_return = (struct app_wmi_args *)obj->buffer.pointer;
+	rc = bios_return->res1;
+	if (rc)
+		goto err_out1;
+
+	memcpy(args, bios_return, sizeof(struct app_wmi_args));
+	rc = 0;
+
+ err_out1:
+	kfree(obj);
+ err_out0:
+	return rc;
+}
+
+static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy)
+{
+	struct dell_bios_calling_interface *calling_interface;
+	struct dell_bios_data_token *token;
+	int token_size = sizeof(struct dell_bios_data_token);
+	int i = 0;
+
+	if (dm->type == 0xda && dm->length > 17) {
+		calling_interface = container_of(dm,
+				struct dell_bios_calling_interface, header);
+
+		token = &calling_interface->damap[i];
+		while (token->tokenid != 0xffff) {
+			if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE)
+				memcpy(&dell_mic_tokens[0], token, token_size);
+			else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE)
+				memcpy(&dell_mic_tokens[1], token, token_size);
+
+			i++;
+			token = &calling_interface->damap[i];
+		}
+	}
+}
+
+static int dell_micmute_led_set(int state)
+{
+	struct app_wmi_args args;
+	struct dell_bios_data_token *token;
+
+	if (!wmi_has_guid(DELL_APP_GUID))
+		return -ENODEV;
+
+	if (state == 0 || state == 1)
+		token = &dell_mic_tokens[state];
+	else
+		return -EINVAL;
+
+	memset(&args, 0, sizeof(struct app_wmi_args));
+
+	args.class = 1;
+	args.arg1 = token->location;
+	args.arg2 = token->value;
+
+	dell_wmi_perform_query(&args);
+
+	return state;
+}
+
+int dell_app_wmi_led_set(int whichled, int on)
+{
+	int state = 0;
+
+	switch (whichled) {
+	case DELL_LED_MICMUTE:
+		state = dell_micmute_led_set(on);
+		break;
+	default:
+		pr_warn("led type %x is not supported\n", whichled);
+		break;
+	}
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
+
+static int __init dell_micmute_led_init(void)
+{
+	memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2);
+	dmi_walk(find_micmute_tokens, NULL);
+
+	return 0;
+}
+
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -181,21 +327,32 @@
 {
 	int error = 0;
 
-	if (!wmi_has_guid(DELL_LED_BIOS_GUID))
+	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	error = led_off();
-	if (error != 0)
-		return -ENODEV;
+	if (wmi_has_guid(DELL_APP_GUID))
+		error = dell_micmute_led_init();
 
-	return led_classdev_register(NULL, &dell_led);
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error != 0)
+			return -ENODEV;
+
+		error = led_classdev_register(NULL, &dell_led);
+	}
+
+	return error;
 }
 
 static void __exit dell_led_exit(void)
 {
-	led_classdev_unregister(&dell_led);
+	int error = 0;
 
-	led_off();
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error == 0)
+			led_classdev_unregister(&dell_led);
+	}
 }
 
 module_init(dell_led_init);

diff --git a/drivers/leds/leds-88pm860x.c b/drivers/leds/leds-88pm860x.c
index d1e1bca..c2def55 100644
--- a/drivers/leds/leds-88pm860x.c
+++ b/drivers/leds/leds-88pm860x.c

@@ -130,10 +130,9 @@
 	struct device_node *nproot, *np;
 	int iset = 0;
 
-	nproot = of_node_get(pdev->dev.parent->of_node);
-	if (!nproot)
+	if (!pdev->dev.parent->of_node)
 		return -ENODEV;
-	nproot = of_find_node_by_name(nproot, "leds");
+	nproot = of_get_child_by_name(pdev->dev.parent->of_node, "leds");
 	if (!nproot) {
 		dev_err(&pdev->dev, "failed to find leds node\n");
 		return -ENODEV;

diff --git a/drivers/leds/leds-adp5520.c b/drivers/leds/leds-adp5520.c
index 86b5bdb..5036d7b 100644
--- a/drivers/leds/leds-adp5520.c
+++ b/drivers/leds/leds-adp5520.c

@@ -120,13 +120,10 @@
 
 	led = devm_kzalloc(&pdev->dev, sizeof(*led) * pdata->num_leds,
 				GFP_KERNEL);
-	if (led == NULL) {
-		dev_err(&pdev->dev, "failed to alloc memory\n");
+	if (!led)
 		return -ENOMEM;
-	}
 
 	ret = adp5520_led_prepare(pdev);
-
 	if (ret) {
 		dev_err(&pdev->dev, "failed to write\n");
 		return ret;

diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c
index fb5a347..6078c15 100644
--- a/drivers/leds/leds-bd2802.c
+++ b/drivers/leds/leds-bd2802.c

@@ -678,10 +678,8 @@
 	int ret, i;
 
 	led = devm_kzalloc(&client->dev, sizeof(struct bd2802_led), GFP_KERNEL);
-	if (!led) {
-		dev_err(&client->dev, "failed to allocate driver data\n");
+	if (!led)
 		return -ENOMEM;
-	}
 
 	led->client = client;
 	pdata = led->pdata = dev_get_platdata(&client->dev);

diff --git a/drivers/leds/leds-da903x.c b/drivers/leds/leds-da903x.c
index 35dffb1..54b8b52 100644
--- a/drivers/leds/leds-da903x.c
+++ b/drivers/leds/leds-da903x.c

@@ -108,10 +108,8 @@
 	}
 
 	led = devm_kzalloc(&pdev->dev, sizeof(struct da903x_led), GFP_KERNEL);
-	if (led == NULL) {
-		dev_err(&pdev->dev, "failed to alloc memory for LED%d\n", id);
+	if (!led)
 		return -ENOMEM;
-	}
 
 	led->cdev.name = pdata->name;
 	led->cdev.default_trigger = pdata->default_trigger;

diff --git a/drivers/leds/leds-da9052.c b/drivers/leds/leds-da9052.c
index 01486ad..e4da1f4 100644
--- a/drivers/leds/leds-da9052.c
+++ b/drivers/leds/leds-da9052.c

@@ -126,8 +126,7 @@
 	led = devm_kzalloc(&pdev->dev,
 			   sizeof(struct da9052_led) * pled->num_leds,
 			   GFP_KERNEL);
-	if (led == NULL) {
-		dev_err(&pdev->dev, "Failed to alloc memory\n");
+	if (!led) {
 		error = -ENOMEM;
 		goto err;
 	}

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index cb5ed82..9e1716f 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c

@@ -1,5 +1,5 @@
 /*
- * lp5523.c - LP5523 LED Driver
+ * lp5523.c - LP5523, LP55231 LED Driver
  *
  * Copyright (C) 2010 Nokia Corporation
  * Copyright (C) 2012 Texas Instruments
@@ -814,6 +814,7 @@
 #ifdef CONFIG_OF
 static const struct of_device_id of_lp5523_leds_match[] = {
 	{ .compatible = "national,lp5523", },
+	{ .compatible = "ti,lp55231", },
 	{},
 };
 

diff --git a/drivers/leds/leds-pca9685.c b/drivers/leds/leds-pca9685.c
deleted file mode 100644
index 6e1ef3a..0000000
--- a/drivers/leds/leds-pca9685.c
+++ /dev/null

@@ -1,213 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.c driver by
- * Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * Driver for the NXP PCA9685 12-Bit PWM LED driver chip.
- *
- */
-
-#include <linux/ctype.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/workqueue.h>
-
-#include <linux/platform_data/leds-pca9685.h>
-
-/* Register Addresses */
-#define PCA9685_MODE1 0x00
-#define PCA9685_MODE2 0x01
-#define PCA9685_LED0_ON_L 0x06
-#define PCA9685_ALL_LED_ON_L 0xFA
-
-/* MODE1 Register */
-#define PCA9685_ALLCALL 0x00
-#define PCA9685_SLEEP   0x04
-#define PCA9685_AI      0x05
-
-/* MODE2 Register */
-#define PCA9685_INVRT   0x04
-#define PCA9685_OUTDRV  0x02
-
-static const struct i2c_device_id pca9685_id[] = {
-	{ "pca9685", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, pca9685_id);
-
-struct pca9685_led {
-	struct i2c_client *client;
-	struct work_struct work;
-	u16 brightness;
-	struct led_classdev led_cdev;
-	int led_num; /* 0-15 */
-	char name[32];
-};
-
-static void pca9685_write_msg(struct i2c_client *client, u8 *buf, u8 len)
-{
-	struct i2c_msg msg = {
-		.addr = client->addr,
-		.flags = 0x00,
-		.len = len,
-		.buf = buf
-	};
-	i2c_transfer(client->adapter, &msg, 1);
-}
-
-static void pca9685_all_off(struct i2c_client *client)
-{
-	u8 i2c_buffer[5] = {PCA9685_ALL_LED_ON_L, 0x00, 0x00, 0x00, 0x10};
-	pca9685_write_msg(client, i2c_buffer, 5);
-}
-
-static void pca9685_led_work(struct work_struct *work)
-{
-	struct pca9685_led *pca9685;
-	u8 i2c_buffer[5];
-
-	pca9685 = container_of(work, struct pca9685_led, work);
-	i2c_buffer[0] = PCA9685_LED0_ON_L + 4 * pca9685->led_num;
-	/*
-	 * 4095 is the maximum brightness, so we set the ON time to 0x1000
-	 * which disables the PWM generator for that LED
-	 */
-	if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+1)) = cpu_to_le16(0x1000);
-	else
-		*((__le16 *)(i2c_buffer+1)) = 0x0000;
-
-	if (pca9685->brightness == 0)
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(0x1000);
-	else if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+3)) = 0x0000;
-	else
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(pca9685->brightness);
-
-	pca9685_write_msg(pca9685->client, i2c_buffer, 5);
-}
-
-static void pca9685_led_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
-{
-	struct pca9685_led *pca9685;
-	pca9685 = container_of(led_cdev, struct pca9685_led, led_cdev);
-	pca9685->brightness = value;
-
-	schedule_work(&pca9685->work);
-}
-
-static int pca9685_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
-{
-	struct pca9685_led *pca9685;
-	struct pca9685_platform_data *pdata;
-	int err;
-	u8 i;
-
-	pdata = dev_get_platdata(&client->dev);
-	if (pdata) {
-		if (pdata->leds.num_leds < 1 || pdata->leds.num_leds > 15) {
-			dev_err(&client->dev, "board info must claim 1-16 LEDs");
-			return -EINVAL;
-		}
-	}
-
-	pca9685 = devm_kzalloc(&client->dev, 16 * sizeof(*pca9685), GFP_KERNEL);
-	if (!pca9685)
-		return -ENOMEM;
-
-	i2c_set_clientdata(client, pca9685);
-	pca9685_all_off(client);
-
-	for (i = 0; i < 16; i++) {
-		pca9685[i].client = client;
-		pca9685[i].led_num = i;
-		pca9685[i].name[0] = '\0';
-		if (pdata && i < pdata->leds.num_leds) {
-			if (pdata->leds.leds[i].name)
-				strncpy(pca9685[i].name,
-					pdata->leds.leds[i].name,
-					sizeof(pca9685[i].name)-1);
-			if (pdata->leds.leds[i].default_trigger)
-				pca9685[i].led_cdev.default_trigger =
-					pdata->leds.leds[i].default_trigger;
-		}
-		if (strlen(pca9685[i].name) == 0) {
-			/*
-			 * Write adapter and address to the name as well.
-			 * Otherwise multiple chips attached to one host would
-			 * not work.
-			 */
-			snprintf(pca9685[i].name, sizeof(pca9685[i].name),
-					"pca9685:%d:x%.2x:%d",
-					client->adapter->nr, client->addr, i);
-		}
-		pca9685[i].led_cdev.name = pca9685[i].name;
-		pca9685[i].led_cdev.max_brightness = 0xfff;
-		pca9685[i].led_cdev.brightness_set = pca9685_led_set;
-
-		INIT_WORK(&pca9685[i].work, pca9685_led_work);
-		err = led_classdev_register(&client->dev, &pca9685[i].led_cdev);
-		if (err < 0)
-			goto exit;
-	}
-
-	if (pdata)
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			pdata->outdrv << PCA9685_OUTDRV |
-			pdata->inverted << PCA9685_INVRT);
-	else
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			PCA9685_TOTEM_POLE << PCA9685_OUTDRV);
-	/* Enable Auto-Increment, enable oscillator, ALLCALL/SUBADDR disabled */
-	i2c_smbus_write_byte_data(client, PCA9685_MODE1, BIT(PCA9685_AI));
-
-	return 0;
-
-exit:
-	while (i--) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	return err;
-}
-
-static int pca9685_remove(struct i2c_client *client)
-{
-	struct pca9685_led *pca9685 = i2c_get_clientdata(client);
-	u8 i;
-
-	for (i = 0; i < 16; i++) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	pca9685_all_off(client);
-	return 0;
-}
-
-static struct i2c_driver pca9685_driver = {
-	.driver = {
-		.name = "leds-pca9685",
-		.owner = THIS_MODULE,
-	},
-	.probe = pca9685_probe,
-	.remove = pca9685_remove,
-	.id_table = pca9685_id,
-};
-
-module_i2c_driver(pca9685_driver);
-
-MODULE_AUTHOR("Maximilian Güntner <maximilian.guentner@gmail.com>");
-MODULE_DESCRIPTION("PCA9685 LED Driver");
-MODULE_LICENSE("GPL v2");

diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index 7d0aaed..d672bb4 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c

@@ -69,6 +69,10 @@
 
 	duty *= brightness;
 	do_div(duty, max);
+
+	if (led_dat->active_low)
+		duty = led_dat->period - duty;
+
 	led_dat->duty = duty;
 
 	if (led_dat->can_sleep)
@@ -92,54 +96,77 @@
 	}
 }
 
-static int led_pwm_create_of(struct platform_device *pdev,
-			     struct led_pwm_priv *priv)
+static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
+		       struct led_pwm *led, struct device_node *child)
 {
-	struct device_node *child;
+	struct led_pwm_data *led_data = &priv->leds[priv->num_leds];
 	int ret;
 
-	for_each_child_of_node(pdev->dev.of_node, child) {
-		struct led_pwm_data *led_dat = &priv->leds[priv->num_leds];
+	led_data->active_low = led->active_low;
+	led_data->cdev.name = led->name;
+	led_data->cdev.default_trigger = led->default_trigger;
+	led_data->cdev.brightness_set = led_pwm_set;
+	led_data->cdev.brightness = LED_OFF;
+	led_data->cdev.max_brightness = led->max_brightness;
+	led_data->cdev.flags = LED_CORE_SUSPENDRESUME;
 
-		led_dat->cdev.name = of_get_property(child, "label",
-						     NULL) ? : child->name;
-
-		led_dat->pwm = devm_of_pwm_get(&pdev->dev, child, NULL);
-		if (IS_ERR(led_dat->pwm)) {
-			dev_err(&pdev->dev, "unable to request PWM for %s\n",
-				led_dat->cdev.name);
-			ret = PTR_ERR(led_dat->pwm);
-			goto err;
-		}
-		/* Get the period from PWM core when n*/
-		led_dat->period = pwm_get_period(led_dat->pwm);
-
-		led_dat->cdev.default_trigger = of_get_property(child,
-						"linux,default-trigger", NULL);
-		of_property_read_u32(child, "max-brightness",
-				     &led_dat->cdev.max_brightness);
-
-		led_dat->cdev.brightness_set = led_pwm_set;
-		led_dat->cdev.brightness = LED_OFF;
-		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
-
-		led_dat->can_sleep = pwm_can_sleep(led_dat->pwm);
-		if (led_dat->can_sleep)
-			INIT_WORK(&led_dat->work, led_pwm_work);
-
-		ret = led_classdev_register(&pdev->dev, &led_dat->cdev);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "failed to register for %s\n",
-				led_dat->cdev.name);
-			of_node_put(child);
-			goto err;
-		}
-		priv->num_leds++;
+	if (child)
+		led_data->pwm = devm_of_pwm_get(dev, child, NULL);
+	else
+		led_data->pwm = devm_pwm_get(dev, led->name);
+	if (IS_ERR(led_data->pwm)) {
+		ret = PTR_ERR(led_data->pwm);
+		dev_err(dev, "unable to request PWM for %s: %d\n",
+			led->name, ret);
+		return ret;
 	}
 
-	return 0;
-err:
-	led_pwm_cleanup(priv);
+	if (child)
+		led_data->period = pwm_get_period(led_data->pwm);
+
+	led_data->can_sleep = pwm_can_sleep(led_data->pwm);
+	if (led_data->can_sleep)
+		INIT_WORK(&led_data->work, led_pwm_work);
+
+	led_data->period = pwm_get_period(led_data->pwm);
+	if (!led_data->period && (led->pwm_period_ns > 0))
+		led_data->period = led->pwm_period_ns;
+
+	ret = led_classdev_register(dev, &led_data->cdev);
+	if (ret == 0) {
+		priv->num_leds++;
+	} else {
+		dev_err(dev, "failed to register PWM led for %s: %d\n",
+			led->name, ret);
+	}
+
+	return ret;
+}
+
+static int led_pwm_create_of(struct device *dev, struct led_pwm_priv *priv)
+{
+	struct device_node *child;
+	struct led_pwm led;
+	int ret = 0;
+
+	memset(&led, 0, sizeof(led));
+
+	for_each_child_of_node(dev->of_node, child) {
+		led.name = of_get_property(child, "label", NULL) ? :
+			   child->name;
+
+		led.default_trigger = of_get_property(child,
+						"linux,default-trigger", NULL);
+		led.active_low = of_property_read_bool(child, "active-low");
+		of_property_read_u32(child, "max-brightness",
+				     &led.max_brightness);
+
+		ret = led_pwm_add(dev, priv, &led, child);
+		if (ret) {
+			of_node_put(child);
+			break;
+		}
+	}
 
 	return ret;
 }
@@ -166,51 +193,23 @@
 
 	if (pdata) {
 		for (i = 0; i < count; i++) {
-			struct led_pwm *cur_led = &pdata->leds[i];
-			struct led_pwm_data *led_dat = &priv->leds[i];
-
-			led_dat->pwm = devm_pwm_get(&pdev->dev, cur_led->name);
-			if (IS_ERR(led_dat->pwm)) {
-				ret = PTR_ERR(led_dat->pwm);
-				dev_err(&pdev->dev,
-					"unable to request PWM for %s\n",
-					cur_led->name);
-				goto err;
-			}
-
-			led_dat->cdev.name = cur_led->name;
-			led_dat->cdev.default_trigger = cur_led->default_trigger;
-			led_dat->active_low = cur_led->active_low;
-			led_dat->period = cur_led->pwm_period_ns;
-			led_dat->cdev.brightness_set = led_pwm_set;
-			led_dat->cdev.brightness = LED_OFF;
-			led_dat->cdev.max_brightness = cur_led->max_brightness;
-			led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
-
-			led_dat->can_sleep = pwm_can_sleep(led_dat->pwm);
-			if (led_dat->can_sleep)
-				INIT_WORK(&led_dat->work, led_pwm_work);
-
-			ret = led_classdev_register(&pdev->dev, &led_dat->cdev);
-			if (ret < 0)
-				goto err;
+			ret = led_pwm_add(&pdev->dev, priv, &pdata->leds[i],
+					  NULL);
+			if (ret)
+				break;
 		}
-		priv->num_leds = count;
 	} else {
-		ret = led_pwm_create_of(pdev, priv);
-		if (ret)
-			return ret;
+		ret = led_pwm_create_of(&pdev->dev, priv);
+	}
+
+	if (ret) {
+		led_pwm_cleanup(priv);
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, priv);
 
 	return 0;
-
-err:
-	priv->num_leds = i;
-	led_pwm_cleanup(priv);
-
-	return ret;
 }
 
 static int led_pwm_remove(struct platform_device *pdev)

diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c
index 28988b7..785eb53 100644
--- a/drivers/leds/leds-s3c24xx.c
+++ b/drivers/leds/leds-s3c24xx.c

@@ -76,10 +76,8 @@
 
 	led = devm_kzalloc(&dev->dev, sizeof(struct s3c24xx_gpio_led),
 			   GFP_KERNEL);
-	if (led == NULL) {
-		dev_err(&dev->dev, "No memory for device\n");
+	if (!led)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(dev, led);
 

diff --git a/drivers/leds/leds-sunfire.c b/drivers/leds/leds-sunfire.c
index 388632d..0b8cc4a 100644
--- a/drivers/leds/leds-sunfire.c
+++ b/drivers/leds/leds-sunfire.c

@@ -135,10 +135,8 @@
 	}
 
 	p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
-	if (!p) {
-		dev_err(&pdev->dev, "Could not allocate struct sunfire_drvdata\n");
+	if (!p)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < NUM_LEDS_PER_BOARD; i++) {
 		struct led_classdev *lp = &p->leds[i].led_cdev;

diff --git a/drivers/leds/trigger/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c
index 1c3ee9f..aec0f02 100644
--- a/drivers/leds/trigger/ledtrig-cpu.c
+++ b/drivers/leds/trigger/ledtrig-cpu.c

@@ -47,7 +47,7 @@
  */
 void ledtrig_cpu(enum cpu_led_event ledevt)
 {
-	struct led_trigger_cpu *trig = &__get_cpu_var(cpu_trig);
+	struct led_trigger_cpu *trig = this_cpu_ptr(&cpu_trig);
 
 	/* Locate the correct CPU LED */
 	switch (ledevt) {

diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index 7fe58b0..b350fb8 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c

@@ -555,8 +555,18 @@
 	pid_param.interval	= PM121_SYS_INTERVAL;
 	pid_param.history_len	= PM121_SYS_HISTORY_SIZE;
 	pid_param.itarget	= param->itarget;
-	pid_param.min		= control->ops->get_min(control);
-	pid_param.max		= control->ops->get_max(control);
+	if(control)
+	{
+		pid_param.min		= control->ops->get_min(control);
+		pid_param.max		= control->ops->get_max(control);
+	} else {
+		/*
+		 * This is probably not the right!?
+		 * Perhaps goto fail  if control == NULL  above?
+		 */
+		pid_param.min		= 0;
+		pid_param.max		= 0;
+	}
 
 	wf_pid_init(&pm121_sys_state[loop_id]->pid, &pid_param);
 
@@ -571,7 +581,7 @@
 	   control the same control */
 	printk(KERN_WARNING "pm121: failed to set up %s loop "
 	       "setting \"%s\" to max speed.\n",
-	       loop_names[loop_id], control->name);
+	       loop_names[loop_id], control ? control->name : "uninitialized value");
 
 	if (control)
 		wf_control_set_max(control);

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9a8e66a..67f8b31 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c

@@ -669,17 +669,13 @@
 /*
  * return a pointer to the page in the filemap that contains the given bit
  *
- * this lookup is complicated by the fact that the bitmap sb might be exactly
- * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
- * 0 or page 1
  */
 static inline struct page *filemap_get_page(struct bitmap_storage *store,
 					    unsigned long chunk)
 {
 	if (file_page_index(store, chunk) >= store->file_pages)
 		return NULL;
-	return store->filemap[file_page_index(store, chunk)
-			      - file_page_index(store, 0)];
+	return store->filemap[file_page_index(store, chunk)];
 }
 
 static int bitmap_storage_alloc(struct bitmap_storage *store,

diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index 85f0b70..f752d12 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c

@@ -14,13 +14,17 @@
 
 /*----------------------------------------------------------------*/
 
-struct dm_bio_prison {
+struct bucket {
 	spinlock_t lock;
+	struct hlist_head cells;
+};
+
+struct dm_bio_prison {
 	mempool_t *cell_pool;
 
 	unsigned nr_buckets;
 	unsigned hash_mask;
-	struct hlist_head *cells;
+	struct bucket *buckets;
 };
 
 /*----------------------------------------------------------------*/
@@ -40,6 +44,12 @@
 
 static struct kmem_cache *_cell_cache;
 
+static void init_bucket(struct bucket *b)
+{
+	spin_lock_init(&b->lock);
+	INIT_HLIST_HEAD(&b->cells);
+}
+
 /*
  * @nr_cells should be the number of cells you want in use _concurrently_.
  * Don't confuse it with the number of distinct keys.
@@ -49,13 +59,12 @@
 	unsigned i;
 	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
 	size_t len = sizeof(struct dm_bio_prison) +
-		(sizeof(struct hlist_head) * nr_buckets);
+		(sizeof(struct bucket) * nr_buckets);
 	struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
 
 	if (!prison)
 		return NULL;
 
-	spin_lock_init(&prison->lock);
 	prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
 	if (!prison->cell_pool) {
 		kfree(prison);
@@ -64,9 +73,9 @@
 
 	prison->nr_buckets = nr_buckets;
 	prison->hash_mask = nr_buckets - 1;
-	prison->cells = (struct hlist_head *) (prison + 1);
+	prison->buckets = (struct bucket *) (prison + 1);
 	for (i = 0; i < nr_buckets; i++)
-		INIT_HLIST_HEAD(prison->cells + i);
+		init_bucket(prison->buckets + i);
 
 	return prison;
 }
@@ -107,40 +116,44 @@
 		       (lhs->block == rhs->block);
 }
 
-static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
+static struct bucket *get_bucket(struct dm_bio_prison *prison,
+				 struct dm_cell_key *key)
+{
+	return prison->buckets + hash_key(prison, key);
+}
+
+static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
 						  struct dm_cell_key *key)
 {
 	struct dm_bio_prison_cell *cell;
 
-	hlist_for_each_entry(cell, bucket, list)
+	hlist_for_each_entry(cell, &b->cells, list)
 		if (keys_equal(&cell->key, key))
 			return cell;
 
 	return NULL;
 }
 
-static void __setup_new_cell(struct dm_bio_prison *prison,
+static void __setup_new_cell(struct bucket *b,
 			     struct dm_cell_key *key,
 			     struct bio *holder,
-			     uint32_t hash,
 			     struct dm_bio_prison_cell *cell)
 {
 	memcpy(&cell->key, key, sizeof(cell->key));
 	cell->holder = holder;
 	bio_list_init(&cell->bios);
-	hlist_add_head(&cell->list, prison->cells + hash);
+	hlist_add_head(&cell->list, &b->cells);
 }
 
-static int __bio_detain(struct dm_bio_prison *prison,
+static int __bio_detain(struct bucket *b,
 			struct dm_cell_key *key,
 			struct bio *inmate,
 			struct dm_bio_prison_cell *cell_prealloc,
 			struct dm_bio_prison_cell **cell_result)
 {
-	uint32_t hash = hash_key(prison, key);
 	struct dm_bio_prison_cell *cell;
 
-	cell = __search_bucket(prison->cells + hash, key);
+	cell = __search_bucket(b, key);
 	if (cell) {
 		if (inmate)
 			bio_list_add(&cell->bios, inmate);
@@ -148,7 +161,7 @@
 		return 1;
 	}
 
-	__setup_new_cell(prison, key, inmate, hash, cell_prealloc);
+	__setup_new_cell(b, key, inmate, cell_prealloc);
 	*cell_result = cell_prealloc;
 	return 0;
 }
@@ -161,10 +174,11 @@
 {
 	int r;
 	unsigned long flags;
+	struct bucket *b = get_bucket(prison, key);
 
-	spin_lock_irqsave(&prison->lock, flags);
-	r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
-	spin_unlock_irqrestore(&prison->lock, flags);
+	spin_lock_irqsave(&b->lock, flags);
+	r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
+	spin_unlock_irqrestore(&b->lock, flags);
 
 	return r;
 }
@@ -208,10 +222,11 @@
 		     struct bio_list *bios)
 {
 	unsigned long flags;
+	struct bucket *b = get_bucket(prison, &cell->key);
 
-	spin_lock_irqsave(&prison->lock, flags);
+	spin_lock_irqsave(&b->lock, flags);
 	__cell_release(cell, bios);
-	spin_unlock_irqrestore(&prison->lock, flags);
+	spin_unlock_irqrestore(&b->lock, flags);
 }
 EXPORT_SYMBOL_GPL(dm_cell_release);
 
@@ -230,28 +245,25 @@
 			       struct bio_list *inmates)
 {
 	unsigned long flags;
+	struct bucket *b = get_bucket(prison, &cell->key);
 
-	spin_lock_irqsave(&prison->lock, flags);
+	spin_lock_irqsave(&b->lock, flags);
 	__cell_release_no_holder(cell, inmates);
-	spin_unlock_irqrestore(&prison->lock, flags);
+	spin_unlock_irqrestore(&b->lock, flags);
 }
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell)
+		   struct dm_bio_prison_cell *cell, int error)
 {
 	struct bio_list bios;
 	struct bio *bio;
-	unsigned long flags;
 
 	bio_list_init(&bios);
-
-	spin_lock_irqsave(&prison->lock, flags);
-	__cell_release(cell, &bios);
-	spin_unlock_irqrestore(&prison->lock, flags);
+	dm_cell_release(prison, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios)))
-		bio_io_error(bio);
+		bio_endio(bio, error);
 }
 EXPORT_SYMBOL_GPL(dm_cell_error);
 

diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 3f83319..6805a14 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h

@@ -85,7 +85,7 @@
 			       struct dm_bio_prison_cell *cell,
 			       struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell);
+		   struct dm_bio_prison_cell *cell, int error);
 
 /*----------------------------------------------------------------*/
 

diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 414dad4..ad913cd 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c

@@ -1391,7 +1391,8 @@
 
 static void era_destroy(struct era *era)
 {
-	metadata_close(era->md);
+	if (era->md)
+		metadata_close(era->md);
 
 	if (era->wq)
 		destroy_workqueue(era->wq);

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ebfa411..3f6fd9d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c

@@ -1242,17 +1242,8 @@
 	if (!error && !clone->errors)
 		return 0;	/* I/O complete */
 
-	if (noretry_error(error)) {
-		if ((clone->cmd_flags & REQ_WRITE_SAME) &&
-		    !clone->q->limits.max_write_same_sectors) {
-			struct queue_limits *limits;
-
-			/* device doesn't really support WRITE SAME, disable it */
-			limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
-			limits->max_write_same_sectors = 0;
-		}
+	if (noretry_error(error))
 		return error;
-	}
 
 	if (mpio->pgpath)
 		fail_path(mpio->pgpath);

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 8e0caed..5bd2290 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c

@@ -2141,6 +2141,11 @@
  * Origin: maps a linear range of a device, with hooks for snapshotting.
  */
 
+struct dm_origin {
+	struct dm_dev *dev;
+	unsigned split_boundary;
+};
+
 /*
  * Construct an origin mapping: <dev_path>
  * The context for an origin is merely a 'struct dm_dev *'
@@ -2149,41 +2154,65 @@
 static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	int r;
-	struct dm_dev *dev;
+	struct dm_origin *o;
 
 	if (argc != 1) {
 		ti->error = "origin: incorrect number of arguments";
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
-	if (r) {
-		ti->error = "Cannot get target device";
-		return r;
+	o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
+	if (!o) {
+		ti->error = "Cannot allocate private origin structure";
+		r = -ENOMEM;
+		goto bad_alloc;
 	}
 
-	ti->private = dev;
+	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
+	if (r) {
+		ti->error = "Cannot get target device";
+		goto bad_open;
+	}
+
+	ti->private = o;
 	ti->num_flush_bios = 1;
 
 	return 0;
+
+bad_open:
+	kfree(o);
+bad_alloc:
+	return r;
 }
 
 static void origin_dtr(struct dm_target *ti)
 {
-	struct dm_dev *dev = ti->private;
-	dm_put_device(ti, dev);
+	struct dm_origin *o = ti->private;
+	dm_put_device(ti, o->dev);
+	kfree(o);
 }
 
 static int origin_map(struct dm_target *ti, struct bio *bio)
 {
-	struct dm_dev *dev = ti->private;
-	bio->bi_bdev = dev->bdev;
+	struct dm_origin *o = ti->private;
+	unsigned available_sectors;
 
-	if (bio->bi_rw & REQ_FLUSH)
+	bio->bi_bdev = o->dev->bdev;
+
+	if (unlikely(bio->bi_rw & REQ_FLUSH))
 		return DM_MAPIO_REMAPPED;
 
+	if (bio_rw(bio) != WRITE)
+		return DM_MAPIO_REMAPPED;
+
+	available_sectors = o->split_boundary -
+		((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
+
+	if (bio_sectors(bio) > available_sectors)
+		dm_accept_partial_bio(bio, available_sectors);
+
 	/* Only tell snapshots if this is a write */
-	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
+	return do_origin(o->dev, bio);
 }
 
 /*
@@ -2192,15 +2221,15 @@
  */
 static void origin_resume(struct dm_target *ti)
 {
-	struct dm_dev *dev = ti->private;
+	struct dm_origin *o = ti->private;
 
-	ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
+	o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
 }
 
 static void origin_status(struct dm_target *ti, status_type_t type,
 			  unsigned status_flags, char *result, unsigned maxlen)
 {
-	struct dm_dev *dev = ti->private;
+	struct dm_origin *o = ti->private;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -2208,7 +2237,7 @@
 		break;
 
 	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%s", dev->name);
+		snprintf(result, maxlen, "%s", o->dev->name);
 		break;
 	}
 }
@@ -2216,13 +2245,13 @@
 static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 			struct bio_vec *biovec, int max_size)
 {
-	struct dm_dev *dev = ti->private;
-	struct request_queue *q = bdev_get_queue(dev->bdev);
+	struct dm_origin *o = ti->private;
+	struct request_queue *q = bdev_get_queue(o->dev->bdev);
 
 	if (!q->merge_bvec_fn)
 		return max_size;
 
-	bvm->bi_bdev = dev->bdev;
+	bvm->bi_bdev = o->dev->bdev;
 
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
@@ -2230,9 +2259,9 @@
 static int origin_iterate_devices(struct dm_target *ti,
 				  iterate_devices_callout_fn fn, void *data)
 {
-	struct dm_dev *dev = ti->private;
+	struct dm_origin *o = ti->private;
 
-	return fn(ti, dev, 0, ti->len, data);
+	return fn(ti, o->dev, 0, ti->len, data);
 }
 
 static struct target_type origin_target = {

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 50601ec..5f59f1e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c

@@ -465,8 +465,8 @@
 }
 EXPORT_SYMBOL(dm_get_device);
 
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data)
+static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+				sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
@@ -499,7 +499,6 @@
 					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
 /*
  * Decrement a device's use count and remove it if necessary.

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 242ac2e..fc9c848 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c

@@ -310,13 +310,18 @@
 	wake_worker(pool);
 }
 
-static void cell_error(struct pool *pool,
-		       struct dm_bio_prison_cell *cell)
+static void cell_error_with_code(struct pool *pool,
+				 struct dm_bio_prison_cell *cell, int error_code)
 {
-	dm_cell_error(pool->prison, cell);
+	dm_cell_error(pool->prison, cell, error_code);
 	dm_bio_prison_free_cell(pool->prison, cell);
 }
 
+static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
+{
+	cell_error_with_code(pool, cell, -EIO);
+}
+
 /*----------------------------------------------------------------*/
 
 /*
@@ -1027,7 +1032,7 @@
 	spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static bool should_error_unserviceable_bio(struct pool *pool)
+static int should_error_unserviceable_bio(struct pool *pool)
 {
 	enum pool_mode m = get_pool_mode(pool);
 
@@ -1035,25 +1040,27 @@
 	case PM_WRITE:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-		return true;
+		return -EIO;
 
 	case PM_OUT_OF_DATA_SPACE:
-		return pool->pf.error_if_no_space;
+		return pool->pf.error_if_no_space ? -ENOSPC : 0;
 
 	case PM_READ_ONLY:
 	case PM_FAIL:
-		return true;
+		return -EIO;
 	default:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-		return true;
+		return -EIO;
 	}
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-	if (should_error_unserviceable_bio(pool))
-		bio_io_error(bio);
+	int error = should_error_unserviceable_bio(pool);
+
+	if (error)
+		bio_endio(bio, error);
 	else
 		retry_on_resume(bio);
 }
@@ -1062,18 +1069,21 @@
 {
 	struct bio *bio;
 	struct bio_list bios;
+	int error;
 
-	if (should_error_unserviceable_bio(pool)) {
-		cell_error(pool, cell);
+	error = should_error_unserviceable_bio(pool);
+	if (error) {
+		cell_error_with_code(pool, cell, error);
 		return;
 	}
 
 	bio_list_init(&bios);
 	cell_release(pool, cell, &bios);
 
-	if (should_error_unserviceable_bio(pool))
+	error = should_error_unserviceable_bio(pool);
+	if (error)
 		while ((bio = bio_list_pop(&bios)))
-			bio_io_error(bio);
+			bio_endio(bio, error);
 	else
 		while ((bio = bio_list_pop(&bios)))
 			retry_on_resume(bio);
@@ -1610,47 +1620,63 @@
 
 /*----------------------------------------------------------------*/
 
-struct noflush_work {
+struct pool_work {
 	struct work_struct worker;
-	struct thin_c *tc;
-
-	atomic_t complete;
-	wait_queue_head_t wait;
+	struct completion complete;
 };
 
-static void complete_noflush_work(struct noflush_work *w)
+static struct pool_work *to_pool_work(struct work_struct *ws)
 {
-	atomic_set(&w->complete, 1);
-	wake_up(&w->wait);
+	return container_of(ws, struct pool_work, worker);
+}
+
+static void pool_work_complete(struct pool_work *pw)
+{
+	complete(&pw->complete);
+}
+
+static void pool_work_wait(struct pool_work *pw, struct pool *pool,
+			   void (*fn)(struct work_struct *))
+{
+	INIT_WORK_ONSTACK(&pw->worker, fn);
+	init_completion(&pw->complete);
+	queue_work(pool->wq, &pw->worker);
+	wait_for_completion(&pw->complete);
+}
+
+/*----------------------------------------------------------------*/
+
+struct noflush_work {
+	struct pool_work pw;
+	struct thin_c *tc;
+};
+
+static struct noflush_work *to_noflush(struct work_struct *ws)
+{
+	return container_of(to_pool_work(ws), struct noflush_work, pw);
 }
 
 static void do_noflush_start(struct work_struct *ws)
 {
-	struct noflush_work *w = container_of(ws, struct noflush_work, worker);
+	struct noflush_work *w = to_noflush(ws);
 	w->tc->requeue_mode = true;
 	requeue_io(w->tc);
-	complete_noflush_work(w);
+	pool_work_complete(&w->pw);
 }
 
 static void do_noflush_stop(struct work_struct *ws)
 {
-	struct noflush_work *w = container_of(ws, struct noflush_work, worker);
+	struct noflush_work *w = to_noflush(ws);
 	w->tc->requeue_mode = false;
-	complete_noflush_work(w);
+	pool_work_complete(&w->pw);
 }
 
 static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
 {
 	struct noflush_work w;
 
-	INIT_WORK_ONSTACK(&w.worker, fn);
 	w.tc = tc;
-	atomic_set(&w.complete, 0);
-	init_waitqueue_head(&w.wait);
-
-	queue_work(tc->pool->wq, &w.worker);
-
-	wait_event(w.wait, atomic_read(&w.complete));
+	pool_work_wait(&w.pw, tc->pool, fn);
 }
 
 /*----------------------------------------------------------------*/
@@ -3068,7 +3094,8 @@
 	 */
 	if (pt->adjusted_pf.discard_passdown) {
 		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
-		limits->discard_granularity = data_limits->discard_granularity;
+		limits->discard_granularity = max(data_limits->discard_granularity,
+						  pool->sectors_per_block << SECTOR_SHIFT);
 	} else
 		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
 }

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index aa9e093..437d990 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -755,6 +755,14 @@
 	}
 }
 
+static void disable_write_same(struct mapped_device *md)
+{
+	struct queue_limits *limits = dm_get_queue_limits(md);
+
+	/* device doesn't really support WRITE SAME, disable it */
+	limits->max_write_same_sectors = 0;
+}
+
 static void clone_endio(struct bio *bio, int error)
 {
 	int r = 0;
@@ -783,6 +791,10 @@
 		}
 	}
 
+	if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
+		     !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
+		disable_write_same(md);
+
 	free_tio(md, tio);
 	dec_pending(io, error);
 }
@@ -977,6 +989,10 @@
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
+	if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
+		     !clone->q->limits.max_write_same_sectors))
+		disable_write_same(tio->md);
+
 	if (r <= 0)
 		/* The target wants to complete the I/O */
 		dm_end_request(clone, r);
@@ -1110,6 +1126,46 @@
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
+/*
+ * A target may call dm_accept_partial_bio only from the map routine.  It is
+ * allowed for all bio types except REQ_FLUSH.
+ *
+ * dm_accept_partial_bio informs the dm that the target only wants to process
+ * additional n_sectors sectors of the bio and the rest of the data should be
+ * sent in a next bio.
+ *
+ * A diagram that explains the arithmetics:
+ * +--------------------+---------------+-------+
+ * |         1          |       2       |   3   |
+ * +--------------------+---------------+-------+
+ *
+ * <-------------- *tio->len_ptr --------------->
+ *                      <------- bi_size ------->
+ *                      <-- n_sectors -->
+ *
+ * Region 1 was already iterated over with bio_advance or similar function.
+ *	(it may be empty if the target doesn't use bio_advance)
+ * Region 2 is the remaining bio size that the target wants to process.
+ *	(it may be empty if region 1 is non-empty, although there is no reason
+ *	 to make it empty)
+ * The target requires that region 3 is to be sent in the next bio.
+ *
+ * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
+ * the partially processed part (the sum of regions 1+2) must be the same for all
+ * copies of the bio.
+ */
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
+{
+	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+	BUG_ON(bio->bi_rw & REQ_FLUSH);
+	BUG_ON(bi_size > *tio->len_ptr);
+	BUG_ON(n_sectors > bi_size);
+	*tio->len_ptr -= bi_size - n_sectors;
+	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+
 static void __map_bio(struct dm_target_io *tio)
 {
 	int r;
@@ -1152,10 +1208,10 @@
 	struct bio *bio;
 	struct dm_io *io;
 	sector_t sector;
-	sector_t sector_count;
+	unsigned sector_count;
 };
 
-static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
+static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
 {
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_iter.bi_size = to_bytes(len);
@@ -1200,11 +1256,13 @@
 
 static void __clone_and_map_simple_bio(struct clone_info *ci,
 				       struct dm_target *ti,
-				       unsigned target_bio_nr, sector_t len)
+				       unsigned target_bio_nr, unsigned *len)
 {
 	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
 	struct bio *clone = &tio->clone;
 
+	tio->len_ptr = len;
+
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
 	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
@@ -1212,13 +1270,13 @@
 	 */
 	 __bio_clone_fast(clone, ci->bio);
 	if (len)
-		bio_setup_sector(clone, ci->sector, len);
+		bio_setup_sector(clone, ci->sector, *len);
 
 	__map_bio(tio);
 }
 
 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
-				  unsigned num_bios, sector_t len)
+				  unsigned num_bios, unsigned *len)
 {
 	unsigned target_bio_nr;
 
@@ -1233,13 +1291,13 @@
 
 	BUG_ON(bio_has_data(ci->bio));
 	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
+		__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
 
 	return 0;
 }
 
 static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
-				     sector_t sector, unsigned len)
+				     sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
@@ -1254,7 +1312,8 @@
 
 	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
 		tio = alloc_tio(ci, ti, 0, target_bio_nr);
-		clone_bio(tio, bio, sector, len);
+		tio->len_ptr = len;
+		clone_bio(tio, bio, sector, *len);
 		__map_bio(tio);
 	}
 }
@@ -1283,7 +1342,7 @@
 				       is_split_required_fn is_split_required)
 {
 	struct dm_target *ti;
-	sector_t len;
+	unsigned len;
 	unsigned num_bios;
 
 	do {
@@ -1302,11 +1361,11 @@
 			return -EOPNOTSUPP;
 
 		if (is_split_required && !is_split_required(ti))
-			len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
+			len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
 		else
-			len = min(ci->sector_count, max_io_len(ci->sector, ti));
+			len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
 
-		__send_duplicate_bios(ci, ti, num_bios, len);
+		__send_duplicate_bios(ci, ti, num_bios, &len);
 
 		ci->sector += len;
 	} while (ci->sector_count -= len);
@@ -1345,7 +1404,7 @@
 
 	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	__clone_and_map_data_bio(ci, ti, ci->sector, len);
+	__clone_and_map_data_bio(ci, ti, ci->sector, &len);
 
 	ci->sector += len;
 	ci->sector_count -= len;
@@ -1439,7 +1498,6 @@
 	 * just one page.
 	 */
 	else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-
 		max_size = 0;
 
 out:

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2382cfc..3484685 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -3448,6 +3448,8 @@
 		mddev->level = LEVEL_NONE;
 		return rv;
 	}
+	if (mddev->ro)
+		return  -EROFS;
 
 	/* request to change the personality.  Need to ensure:
 	 *  - array is not engaged in resync/recovery/reshape
@@ -3634,6 +3636,8 @@
 		int err;
 		if (mddev->pers->check_reshape == NULL)
 			return -EBUSY;
+		if (mddev->ro)
+			return -EROFS;
 		mddev->new_layout = n;
 		err = mddev->pers->check_reshape(mddev);
 		if (err) {
@@ -3723,6 +3727,8 @@
 		int err;
 		if (mddev->pers->check_reshape == NULL)
 			return -EBUSY;
+		if (mddev->ro)
+			return -EROFS;
 		mddev->new_chunk_sectors = n >> 9;
 		err = mddev->pers->check_reshape(mddev);
 		if (err) {
@@ -6135,6 +6141,8 @@
 	 */
 	if (mddev->sync_thread)
 		return -EBUSY;
+	if (mddev->ro)
+		return -EROFS;
 
 	rdev_for_each(rdev, mddev) {
 		sector_t avail = rdev->sectors;
@@ -6157,6 +6165,8 @@
 	/* change the number of raid disks */
 	if (mddev->pers->check_reshape == NULL)
 		return -EINVAL;
+	if (mddev->ro)
+		return -EROFS;
 	if (raid_disks <= 0 ||
 	    (mddev->max_disks && raid_disks >= mddev->max_disks))
 		return -EINVAL;
@@ -8333,7 +8343,7 @@
 			if (a < s) {
 				/* we need to split this range */
 				if (bb->count >= MD_MAX_BADBLOCKS) {
-					rv = 0;
+					rv = -ENOSPC;
 					goto out;
 				}
 				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2afef4e..6234b2e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -292,9 +292,12 @@
 	BUG_ON(atomic_read(&conf->active_stripes)==0);
 	if (test_bit(STRIPE_HANDLE, &sh->state)) {
 		if (test_bit(STRIPE_DELAYED, &sh->state) &&
-		    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+		    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 			list_add_tail(&sh->lru, &conf->delayed_list);
-		else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+			if (atomic_read(&conf->preread_active_stripes)
+			    < IO_THRESHOLD)
+				md_wakeup_thread(conf->mddev->thread);
+		} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
 			   sh->bm_seq - conf->seq_write > 0)
 			list_add_tail(&sh->lru, &conf->bitmap_list);
 		else {
@@ -413,6 +416,11 @@
 	int hash;
 	bool wakeup;
 
+	/* Avoid release_list until the last reference.
+	 */
+	if (atomic_add_unless(&sh->count, -1, 1))
+		return;
+
 	if (unlikely(!conf->mddev->thread) ||
 		test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
 		goto slow_path;
@@ -479,6 +487,7 @@
 	int num = sh->raid_conf->pool_size;
 
 	for (i = 0; i < num ; i++) {
+		WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
 		p = sh->dev[i].page;
 		if (!p)
 			continue;
@@ -499,6 +508,7 @@
 			return 1;
 		}
 		sh->dev[i].page = page;
+		sh->dev[i].orig_page = page;
 	}
 	return 0;
 }
@@ -855,6 +865,9 @@
 			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
 				bi->bi_rw |= REQ_NOMERGE;
 
+			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+			sh->dev[i].vec.bv_page = sh->dev[i].page;
 			bi->bi_vcnt = 1;
 			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
 			bi->bi_io_vec[0].bv_offset = 0;
@@ -899,6 +912,9 @@
 			else
 				rbi->bi_iter.bi_sector = (sh->sector
 						  + rrdev->data_offset);
+			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+			sh->dev[i].rvec.bv_page = sh->dev[i].page;
 			rbi->bi_vcnt = 1;
 			rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
 			rbi->bi_io_vec[0].bv_offset = 0;
@@ -927,8 +943,9 @@
 }
 
 static struct dma_async_tx_descriptor *
-async_copy_data(int frombio, struct bio *bio, struct page *page,
-	sector_t sector, struct dma_async_tx_descriptor *tx)
+async_copy_data(int frombio, struct bio *bio, struct page **page,
+	sector_t sector, struct dma_async_tx_descriptor *tx,
+	struct stripe_head *sh)
 {
 	struct bio_vec bvl;
 	struct bvec_iter iter;
@@ -965,11 +982,16 @@
 		if (clen > 0) {
 			b_offset += bvl.bv_offset;
 			bio_page = bvl.bv_page;
-			if (frombio)
-				tx = async_memcpy(page, bio_page, page_offset,
+			if (frombio) {
+				if (sh->raid_conf->skip_copy &&
+				    b_offset == 0 && page_offset == 0 &&
+				    clen == STRIPE_SIZE)
+					*page = bio_page;
+				else
+					tx = async_memcpy(*page, bio_page, page_offset,
 						  b_offset, clen, &submit);
-			else
-				tx = async_memcpy(bio_page, page, b_offset,
+			} else
+				tx = async_memcpy(bio_page, *page, b_offset,
 						  page_offset, clen, &submit);
 		}
 		/* chain the operations */
@@ -1045,8 +1067,8 @@
 			spin_unlock_irq(&sh->stripe_lock);
 			while (rbi && rbi->bi_iter.bi_sector <
 				dev->sector + STRIPE_SECTORS) {
-				tx = async_copy_data(0, rbi, dev->page,
-					dev->sector, tx);
+				tx = async_copy_data(0, rbi, &dev->page,
+					dev->sector, tx, sh);
 				rbi = r5_next_bio(rbi, dev->sector);
 			}
 		}
@@ -1384,6 +1406,7 @@
 			BUG_ON(dev->written);
 			wbi = dev->written = chosen;
 			spin_unlock_irq(&sh->stripe_lock);
+			WARN_ON(dev->page != dev->orig_page);
 
 			while (wbi && wbi->bi_iter.bi_sector <
 				dev->sector + STRIPE_SECTORS) {
@@ -1393,9 +1416,15 @@
 					set_bit(R5_SyncIO, &dev->flags);
 				if (wbi->bi_rw & REQ_DISCARD)
 					set_bit(R5_Discard, &dev->flags);
-				else
-					tx = async_copy_data(1, wbi, dev->page,
-						dev->sector, tx);
+				else {
+					tx = async_copy_data(1, wbi, &dev->page,
+						dev->sector, tx, sh);
+					if (dev->page != dev->orig_page) {
+						set_bit(R5_SkipCopy, &dev->flags);
+						clear_bit(R5_UPTODATE, &dev->flags);
+						clear_bit(R5_OVERWRITE, &dev->flags);
+					}
+				}
 				wbi = r5_next_bio(wbi, dev->sector);
 			}
 		}
@@ -1426,7 +1455,7 @@
 		struct r5dev *dev = &sh->dev[i];
 
 		if (dev->written || i == pd_idx || i == qd_idx) {
-			if (!discard)
+			if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
 				set_bit(R5_UPTODATE, &dev->flags);
 			if (fua)
 				set_bit(R5_WantFUA, &dev->flags);
@@ -1839,8 +1868,10 @@
 		osh = get_free_stripe(conf, hash);
 		unlock_device_hash_lock(conf, hash);
 		atomic_set(&nsh->count, 1);
-		for(i=0; i<conf->pool_size; i++)
+		for(i=0; i<conf->pool_size; i++) {
 			nsh->dev[i].page = osh->dev[i].page;
+			nsh->dev[i].orig_page = osh->dev[i].page;
+		}
 		for( ; i<newsize; i++)
 			nsh->dev[i].page = NULL;
 		nsh->hash_lock_index = hash;
@@ -1896,6 +1927,7 @@
 			if (nsh->dev[i].page == NULL) {
 				struct page *p = alloc_page(GFP_NOIO);
 				nsh->dev[i].page = p;
+				nsh->dev[i].orig_page = p;
 				if (!p)
 					err = -ENOMEM;
 			}
@@ -2133,24 +2165,20 @@
 }
 
 static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-	
+
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
 	struct r5dev *dev = &sh->dev[i];
 
 	bio_init(&dev->req);
 	dev->req.bi_io_vec = &dev->vec;
-	dev->req.bi_vcnt++;
-	dev->req.bi_max_vecs++;
+	dev->req.bi_max_vecs = 1;
 	dev->req.bi_private = sh;
-	dev->vec.bv_page = dev->page;
 
 	bio_init(&dev->rreq);
 	dev->rreq.bi_io_vec = &dev->rvec;
-	dev->rreq.bi_vcnt++;
-	dev->rreq.bi_max_vecs++;
+	dev->rreq.bi_max_vecs = 1;
 	dev->rreq.bi_private = sh;
-	dev->rvec.bv_page = dev->page;
 
 	dev->flags = 0;
 	dev->sector = compute_blocknr(sh, i, previous);
@@ -2750,6 +2778,11 @@
 		/* and fail all 'written' */
 		bi = sh->dev[i].written;
 		sh->dev[i].written = NULL;
+		if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
+			WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+			sh->dev[i].page = sh->dev[i].orig_page;
+		}
+
 		if (bi) bitmap_end = 1;
 		while (bi && bi->bi_iter.bi_sector <
 		       sh->dev[i].sector + STRIPE_SECTORS) {
@@ -2886,8 +2919,11 @@
 	     (s->failed >= 1 && fdev[0]->toread) ||
 	     (s->failed >= 2 && fdev[1]->toread) ||
 	     (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
 	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
-	     (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
+	     (sh->raid_conf->level == 6 && s->failed && s->to_write &&
+	      s->to_write < sh->raid_conf->raid_disks - 2 &&
+	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
 		/* we would like to get this block, possibly by computing it,
 		 * otherwise read it if the backing disk is insync
 		 */
@@ -2991,12 +3027,17 @@
 			dev = &sh->dev[i];
 			if (!test_bit(R5_LOCKED, &dev->flags) &&
 			    (test_bit(R5_UPTODATE, &dev->flags) ||
-			     test_bit(R5_Discard, &dev->flags))) {
+			     test_bit(R5_Discard, &dev->flags) ||
+			     test_bit(R5_SkipCopy, &dev->flags))) {
 				/* We can return any write requests */
 				struct bio *wbi, *wbi2;
 				pr_debug("Return write for disc %d\n", i);
 				if (test_and_clear_bit(R5_Discard, &dev->flags))
 					clear_bit(R5_UPTODATE, &dev->flags);
+				if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
+					WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
+					dev->page = dev->orig_page;
+				}
 				wbi = dev->written;
 				dev->written = NULL;
 				while (wbi && wbi->bi_iter.bi_sector <
@@ -3015,6 +3056,8 @@
 						0);
 			} else if (test_bit(R5_Discard, &dev->flags))
 				discard_pending = 1;
+			WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
+			WARN_ON(dev->page != dev->orig_page);
 		}
 	if (!discard_pending &&
 	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -3086,7 +3129,8 @@
 		    !test_bit(R5_LOCKED, &dev->flags) &&
 		    !(test_bit(R5_UPTODATE, &dev->flags) ||
 		    test_bit(R5_Wantcompute, &dev->flags))) {
-			if (test_bit(R5_Insync, &dev->flags)) rcw++;
+			if (test_bit(R5_Insync, &dev->flags))
+				rcw++;
 			else
 				rcw += 2*disks;
 		}
@@ -3107,10 +3151,10 @@
 			    !(test_bit(R5_UPTODATE, &dev->flags) ||
 			    test_bit(R5_Wantcompute, &dev->flags)) &&
 			    test_bit(R5_Insync, &dev->flags)) {
-				if (
-				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-					pr_debug("Read_old block "
-						 "%d for r-m-w\n", i);
+				if (test_bit(STRIPE_PREREAD_ACTIVE,
+					     &sh->state)) {
+					pr_debug("Read_old block %d for r-m-w\n",
+						 i);
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
@@ -3133,10 +3177,9 @@
 			    !(test_bit(R5_UPTODATE, &dev->flags) ||
 			      test_bit(R5_Wantcompute, &dev->flags))) {
 				rcw++;
-				if (!test_bit(R5_Insync, &dev->flags))
-					continue; /* it's a failed drive */
-				if (
-				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+				if (test_bit(R5_Insync, &dev->flags) &&
+				    test_bit(STRIPE_PREREAD_ACTIVE,
+					     &sh->state)) {
 					pr_debug("Read_old block "
 						"%d for Reconstruct\n", i);
 					set_bit(R5_LOCKED, &dev->flags);
@@ -5031,8 +5074,8 @@
 	bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
 
 	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
+	set_bit(STRIPE_HANDLE, &sh->state);
 
-	handle_stripe(sh);
 	release_stripe(sh);
 
 	return STRIPE_SECTORS;
@@ -5072,7 +5115,7 @@
 			/* already done this stripe */
 			continue;
 
-		sh = get_active_stripe(conf, sector, 0, 1, 0);
+		sh = get_active_stripe(conf, sector, 0, 1, 1);
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
@@ -5355,6 +5398,50 @@
 					raid5_store_preread_threshold);
 
 static ssize_t
+raid5_show_skip_copy(struct mddev *mddev, char *page)
+{
+	struct r5conf *conf = mddev->private;
+	if (conf)
+		return sprintf(page, "%d\n", conf->skip_copy);
+	else
+		return 0;
+}
+
+static ssize_t
+raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
+{
+	struct r5conf *conf = mddev->private;
+	unsigned long new;
+	if (len >= PAGE_SIZE)
+		return -EINVAL;
+	if (!conf)
+		return -ENODEV;
+
+	if (kstrtoul(page, 10, &new))
+		return -EINVAL;
+	new = !!new;
+	if (new == conf->skip_copy)
+		return len;
+
+	mddev_suspend(mddev);
+	conf->skip_copy = new;
+	if (new)
+		mddev->queue->backing_dev_info.capabilities |=
+						BDI_CAP_STABLE_WRITES;
+	else
+		mddev->queue->backing_dev_info.capabilities &=
+						~BDI_CAP_STABLE_WRITES;
+	mddev_resume(mddev);
+	return len;
+}
+
+static struct md_sysfs_entry
+raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
+					raid5_show_skip_copy,
+					raid5_store_skip_copy);
+
+
+static ssize_t
 stripe_cache_active_show(struct mddev *mddev, char *page)
 {
 	struct r5conf *conf = mddev->private;
@@ -5439,6 +5526,7 @@
 	&raid5_stripecache_active.attr,
 	&raid5_preread_bypass_threshold.attr,
 	&raid5_group_thread_cnt.attr,
+	&raid5_skip_copy.attr,
 	NULL,
 };
 static struct attribute_group raid5_attrs_group = {

diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 01ad8ae..bc72cd4 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h

@@ -232,7 +232,7 @@
 		 */
 		struct bio	req, rreq;
 		struct bio_vec	vec, rvec;
-		struct page	*page;
+		struct page	*page, *orig_page;
 		struct bio	*toread, *read, *towrite, *written;
 		sector_t	sector;			/* sector of this page */
 		unsigned long	flags;
@@ -299,6 +299,7 @@
 			 * data in, and now is a good time to write it out.
 			 */
 	R5_Discard,	/* Discard the stripe */
+	R5_SkipCopy,	/* Don't copy data from bio to stripe cache */
 };
 
 /*
@@ -436,6 +437,7 @@
 	atomic_t		pending_full_writes; /* full write backlog */
 	int			bypass_count; /* bypassed prereads */
 	int			bypass_threshold; /* preread nice */
+	int			skip_copy; /* Don't copy data from bio to stripe cache */
 	struct list_head	*last_hold; /* detect hold_list promotions */
 
 	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 20f1655..8108c69 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig

@@ -93,7 +93,9 @@
 
 config VIDEO_OMAP3
 	tristate "OMAP 3 Camera support"
-	depends on OMAP_IOVMM && VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3
+	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3
+	select ARM_DMA_USE_IOMMU
+	select OMAP_IOMMU
 	---help---
 	  Driver for an OMAP 3 camera controller.
 

diff --git a/drivers/media/platform/omap3isp/Makefile b/drivers/media/platform/omap3isp/Makefile
index e8847e7..254975a 100644
--- a/drivers/media/platform/omap3isp/Makefile
+++ b/drivers/media/platform/omap3isp/Makefile

@@ -3,7 +3,7 @@
 ccflags-$(CONFIG_VIDEO_OMAP3_DEBUG) += -DDEBUG
 
 omap3-isp-objs += \
-	isp.o ispqueue.o ispvideo.o \
+	isp.o ispvideo.o \
 	ispcsiphy.o ispccp2.o ispcsi2.o \
 	ispccdc.o isppreview.o ispresizer.o \
 	ispstat.o isph3a_aewb.o isph3a_af.o isphist.o

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 06a0df4..2c7aa67 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c

@@ -69,6 +69,8 @@
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 
+#include <asm/dma-iommu.h>
+
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
 
@@ -1397,14 +1399,14 @@
 	if (isp_pipeline_is_last(me)) {
 		struct isp_video *video = pipe->output;
 		unsigned long flags;
-		spin_lock_irqsave(&video->queue->irqlock, flags);
+		spin_lock_irqsave(&video->irqlock, flags);
 		if (video->dmaqueue_flags & ISP_VIDEO_DMAQUEUE_UNDERRUN) {
-			spin_unlock_irqrestore(&video->queue->irqlock, flags);
+			spin_unlock_irqrestore(&video->irqlock, flags);
 			atomic_set(stopping, 0);
 			smp_mb();
 			return 0;
 		}
-		spin_unlock_irqrestore(&video->queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		if (!wait_event_timeout(*wait, !atomic_read(stopping),
 					msecs_to_jiffies(1000))) {
 			atomic_set(stopping, 0);
@@ -1625,7 +1627,7 @@
  * Decrement the reference count on the ISP. If the last reference is released,
  * power-down all submodules, disable clocks and free temporary buffers.
  */
-void omap3isp_put(struct isp_device *isp)
+static void __omap3isp_put(struct isp_device *isp, bool save_ctx)
 {
 	if (isp == NULL)
 		return;
@@ -1634,7 +1636,7 @@
 	BUG_ON(isp->ref_count == 0);
 	if (--isp->ref_count == 0) {
 		isp_disable_interrupts(isp);
-		if (isp->domain) {
+		if (save_ctx) {
 			isp_save_ctx(isp);
 			isp->has_context = 1;
 		}
@@ -1648,6 +1650,11 @@
 	mutex_unlock(&isp->isp_mutex);
 }
 
+void omap3isp_put(struct isp_device *isp)
+{
+	__omap3isp_put(isp, true);
+}
+
 /* --------------------------------------------------------------------------
  * Platform device driver
  */
@@ -2120,6 +2127,61 @@
 	return ret;
 }
 
+static void isp_detach_iommu(struct isp_device *isp)
+{
+	arm_iommu_release_mapping(isp->mapping);
+	isp->mapping = NULL;
+	iommu_group_remove_device(isp->dev);
+}
+
+static int isp_attach_iommu(struct isp_device *isp)
+{
+	struct dma_iommu_mapping *mapping;
+	struct iommu_group *group;
+	int ret;
+
+	/* Create a device group and add the device to it. */
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(isp->dev, "failed to allocate IOMMU group\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, isp->dev);
+	iommu_group_put(group);
+
+	if (ret < 0) {
+		dev_err(isp->dev, "failed to add device to IPMMU group\n");
+		return ret;
+	}
+
+	/*
+	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
+	 * VAs. This will allocate a corresponding IOMMU domain.
+	 */
+	mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G);
+	if (IS_ERR(mapping)) {
+		dev_err(isp->dev, "failed to create ARM IOMMU mapping\n");
+		ret = PTR_ERR(mapping);
+		goto error;
+	}
+
+	isp->mapping = mapping;
+
+	/* Attach the ARM VA mapping to the device. */
+	ret = arm_iommu_attach_device(isp->dev, mapping);
+	if (ret < 0) {
+		dev_err(isp->dev, "failed to attach device to VA mapping\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	isp_detach_iommu(isp);
+	return ret;
+}
+
 /*
  * isp_remove - Remove ISP platform device
  * @pdev: Pointer to ISP platform device
@@ -2135,10 +2197,8 @@
 	isp_xclk_cleanup(isp);
 
 	__omap3isp_get(isp, false);
-	iommu_detach_device(isp->domain, &pdev->dev);
-	iommu_domain_free(isp->domain);
-	isp->domain = NULL;
-	omap3isp_put(isp);
+	isp_detach_iommu(isp);
+	__omap3isp_put(isp, false);
 
 	return 0;
 }
@@ -2265,39 +2325,32 @@
 		}
 	}
 
-	isp->domain = iommu_domain_alloc(pdev->dev.bus);
-	if (!isp->domain) {
-		dev_err(isp->dev, "can't alloc iommu domain\n");
-		ret = -ENOMEM;
+	/* IOMMU */
+	ret = isp_attach_iommu(isp);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to attach to IOMMU\n");
 		goto error_isp;
 	}
 
-	ret = iommu_attach_device(isp->domain, &pdev->dev);
-	if (ret) {
-		dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret);
-		ret = -EPROBE_DEFER;
-		goto free_domain;
-	}
-
 	/* Interrupt */
 	isp->irq_num = platform_get_irq(pdev, 0);
 	if (isp->irq_num <= 0) {
 		dev_err(isp->dev, "No IRQ resource\n");
 		ret = -ENODEV;
-		goto detach_dev;
+		goto error_iommu;
 	}
 
 	if (devm_request_irq(isp->dev, isp->irq_num, isp_isr, IRQF_SHARED,
 			     "OMAP3 ISP", isp)) {
 		dev_err(isp->dev, "Unable to request IRQ\n");
 		ret = -EINVAL;
-		goto detach_dev;
+		goto error_iommu;
 	}
 
 	/* Entities */
 	ret = isp_initialize_modules(isp);
 	if (ret < 0)
-		goto detach_dev;
+		goto error_iommu;
 
 	ret = isp_register_entities(isp);
 	if (ret < 0)
@@ -2310,14 +2363,11 @@
 
 error_modules:
 	isp_cleanup_modules(isp);
-detach_dev:
-	iommu_detach_device(isp->domain, &pdev->dev);
-free_domain:
-	iommu_domain_free(isp->domain);
-	isp->domain = NULL;
+error_iommu:
+	isp_detach_iommu(isp);
 error_isp:
 	isp_xclk_cleanup(isp);
-	omap3isp_put(isp);
+	__omap3isp_put(isp, false);
 error:
 	mutex_destroy(&isp->isp_mutex);
 

diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h
index 6d5e697..2c314ee 100644
--- a/drivers/media/platform/omap3isp/isp.h
+++ b/drivers/media/platform/omap3isp/isp.h

@@ -45,8 +45,6 @@
 #include "ispcsi2.h"
 #include "ispccp2.h"
 
-#define IOMMU_FLAG (IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8)
-
 #define ISP_TOK_TERM		0xFFFFFFFF	/*
 						 * terminating token for ISP
 						 * modules reg list
@@ -152,6 +150,7 @@
  *             regions.
  * @mmio_base_phys: Array with physical L4 bus addresses for ISP register
  *                  regions.
+ * @mapping: IOMMU mapping
  * @stat_lock: Spinlock for handling statistics
  * @isp_mutex: Mutex for serializing requests to ISP.
  * @stop_failure: Indicates that an entity failed to stop.
@@ -171,7 +170,6 @@
  * @isp_res: Pointer to current settings for ISP Resizer.
  * @isp_prev: Pointer to current settings for ISP Preview.
  * @isp_ccdc: Pointer to current settings for ISP CCDC.
- * @iommu: Pointer to requested IOMMU instance for ISP.
  * @platform_cb: ISP driver callback function pointers for platform code
  *
  * This structure is used to store the OMAP ISP Information.
@@ -189,6 +187,8 @@
 	void __iomem *mmio_base[OMAP3_ISP_IOMEM_LAST];
 	unsigned long mmio_base_phys[OMAP3_ISP_IOMEM_LAST];
 
+	struct dma_iommu_mapping *mapping;
+
 	/* ISP Obj */
 	spinlock_t stat_lock;	/* common lock for statistic drivers */
 	struct mutex isp_mutex;	/* For handling ref_count field */
@@ -219,8 +219,6 @@
 
 	unsigned int sbl_resources;
 	unsigned int subclk_resources;
-
-	struct iommu_domain *domain;
 };
 
 #define v4l2_dev_to_isp_device(dev) \

diff --git a/drivers/media/platform/omap3isp/ispccdc.c b/drivers/media/platform/omap3isp/ispccdc.c
index 4d920c8..9f727d2 100644
--- a/drivers/media/platform/omap3isp/ispccdc.c
+++ b/drivers/media/platform/omap3isp/ispccdc.c

@@ -30,7 +30,6 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
-#include <linux/omap-iommu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <media/v4l2-event.h>
@@ -206,7 +205,8 @@
  * ccdc_lsc_program_table - Program Lens Shading Compensation table address.
  * @ccdc: Pointer to ISP CCDC device.
  */
-static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc, u32 addr)
+static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc,
+				   dma_addr_t addr)
 {
 	isp_reg_writel(to_isp_device(ccdc), addr,
 		       OMAP3_ISP_IOMEM_CCDC, ISPCCDC_LSC_TABLE_BASE);
@@ -333,7 +333,7 @@
 		return -EBUSY;
 
 	ccdc_lsc_setup_regs(ccdc, &req->config);
-	ccdc_lsc_program_table(ccdc, req->table);
+	ccdc_lsc_program_table(ccdc, req->table.dma);
 	return 0;
 }
 
@@ -368,11 +368,12 @@
 	if (req == NULL)
 		return;
 
-	if (req->iovm)
-		dma_unmap_sg(isp->dev, req->iovm->sgt->sgl,
-			     req->iovm->sgt->nents, DMA_TO_DEVICE);
-	if (req->table)
-		omap_iommu_vfree(isp->domain, isp->dev, req->table);
+	if (req->table.addr) {
+		sg_free_table(&req->table.sgt);
+		dma_free_coherent(isp->dev, req->config.size, req->table.addr,
+				  req->table.dma);
+	}
+
 	kfree(req);
 }
 
@@ -416,7 +417,6 @@
 	struct isp_device *isp = to_isp_device(ccdc);
 	struct ispccdc_lsc_config_req *req;
 	unsigned long flags;
-	void *table;
 	u16 update;
 	int ret;
 
@@ -444,38 +444,31 @@
 
 		req->enable = 1;
 
-		req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
-					req->config.size, IOMMU_FLAG);
-		if (IS_ERR_VALUE(req->table)) {
-			req->table = 0;
+		req->table.addr = dma_alloc_coherent(isp->dev, req->config.size,
+						     &req->table.dma,
+						     GFP_KERNEL);
+		if (req->table.addr == NULL) {
 			ret = -ENOMEM;
 			goto done;
 		}
 
-		req->iovm = omap_find_iovm_area(isp->dev, req->table);
-		if (req->iovm == NULL) {
-			ret = -ENOMEM;
+		ret = dma_get_sgtable(isp->dev, &req->table.sgt,
+				      req->table.addr, req->table.dma,
+				      req->config.size);
+		if (ret < 0)
 			goto done;
-		}
 
-		if (!dma_map_sg(isp->dev, req->iovm->sgt->sgl,
-				req->iovm->sgt->nents, DMA_TO_DEVICE)) {
-			ret = -ENOMEM;
-			req->iovm = NULL;
-			goto done;
-		}
+		dma_sync_sg_for_cpu(isp->dev, req->table.sgt.sgl,
+				    req->table.sgt.nents, DMA_TO_DEVICE);
 
-		dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl,
-				    req->iovm->sgt->nents, DMA_TO_DEVICE);
-
-		table = omap_da_to_va(isp->dev, req->table);
-		if (copy_from_user(table, config->lsc, req->config.size)) {
+		if (copy_from_user(req->table.addr, config->lsc,
+				   req->config.size)) {
 			ret = -EFAULT;
 			goto done;
 		}
 
-		dma_sync_sg_for_device(isp->dev, req->iovm->sgt->sgl,
-				       req->iovm->sgt->nents, DMA_TO_DEVICE);
+		dma_sync_sg_for_device(isp->dev, req->table.sgt.sgl,
+				       req->table.sgt.nents, DMA_TO_DEVICE);
 	}
 
 	spin_lock_irqsave(&ccdc->lsc.req_lock, flags);
@@ -584,7 +577,7 @@
 	if (!ccdc->fpc_en)
 		return;
 
-	isp_reg_writel(isp, ccdc->fpc.fpcaddr, OMAP3_ISP_IOMEM_CCDC,
+	isp_reg_writel(isp, ccdc->fpc.dma, OMAP3_ISP_IOMEM_CCDC,
 		       ISPCCDC_FPC_ADDR);
 	/* The FPNUM field must be set before enabling FPC. */
 	isp_reg_writel(isp, (ccdc->fpc.fpnum << ISPCCDC_FPC_FPNUM_SHIFT),
@@ -724,8 +717,9 @@
 	ccdc->shadow_update = 0;
 
 	if (OMAP3ISP_CCDC_FPC & ccdc_struct->update) {
-		u32 table_old = 0;
-		u32 table_new;
+		struct omap3isp_ccdc_fpc fpc;
+		struct ispccdc_fpc fpc_old = { .addr = NULL, };
+		struct ispccdc_fpc fpc_new;
 		u32 size;
 
 		if (ccdc->state != ISP_PIPELINE_STREAM_STOPPED)
@@ -734,35 +728,39 @@
 		ccdc->fpc_en = !!(OMAP3ISP_CCDC_FPC & ccdc_struct->flag);
 
 		if (ccdc->fpc_en) {
-			if (copy_from_user(&ccdc->fpc, ccdc_struct->fpc,
-					   sizeof(ccdc->fpc)))
+			if (copy_from_user(&fpc, ccdc_struct->fpc, sizeof(fpc)))
 				return -EFAULT;
 
+			size = fpc.fpnum * 4;
+
 			/*
-			 * table_new must be 64-bytes aligned, but it's
-			 * already done by omap_iommu_vmalloc().
+			 * The table address must be 64-bytes aligned, which is
+			 * guaranteed by dma_alloc_coherent().
 			 */
-			size = ccdc->fpc.fpnum * 4;
-			table_new = omap_iommu_vmalloc(isp->domain, isp->dev,
-							0, size, IOMMU_FLAG);
-			if (IS_ERR_VALUE(table_new))
+			fpc_new.fpnum = fpc.fpnum;
+			fpc_new.addr = dma_alloc_coherent(isp->dev, size,
+							  &fpc_new.dma,
+							  GFP_KERNEL);
+			if (fpc_new.addr == NULL)
 				return -ENOMEM;
 
-			if (copy_from_user(omap_da_to_va(isp->dev, table_new),
-					   (__force void __user *)
-					   ccdc->fpc.fpcaddr, size)) {
-				omap_iommu_vfree(isp->domain, isp->dev,
-								table_new);
+			if (copy_from_user(fpc_new.addr,
+					   (__force void __user *)fpc.fpcaddr,
+					   size)) {
+				dma_free_coherent(isp->dev, size, fpc_new.addr,
+						  fpc_new.dma);
 				return -EFAULT;
 			}
 
-			table_old = ccdc->fpc.fpcaddr;
-			ccdc->fpc.fpcaddr = table_new;
+			fpc_old = ccdc->fpc;
+			ccdc->fpc = fpc_new;
 		}
 
 		ccdc_configure_fpc(ccdc);
-		if (table_old != 0)
-			omap_iommu_vfree(isp->domain, isp->dev, table_old);
+
+		if (fpc_old.addr != NULL)
+			dma_free_coherent(isp->dev, fpc_old.fpnum * 4,
+					  fpc_old.addr, fpc_old.dma);
 	}
 
 	return ccdc_lsc_config(ccdc, ccdc_struct);
@@ -1523,7 +1521,7 @@
 
 	buffer = omap3isp_video_buffer_next(&ccdc->video_out);
 	if (buffer != NULL) {
-		ccdc_set_outaddr(ccdc, buffer->isp_addr);
+		ccdc_set_outaddr(ccdc, buffer->dma);
 		restart = 1;
 	}
 
@@ -1662,7 +1660,7 @@
 	if (!(ccdc->output & CCDC_OUTPUT_MEMORY))
 		return -ENODEV;
 
-	ccdc_set_outaddr(ccdc, buffer->isp_addr);
+	ccdc_set_outaddr(ccdc, buffer->dma);
 
 	/* We now have a buffer queued on the output, restart the pipeline
 	 * on the next CCDC interrupt if running in continuous mode (or when
@@ -2580,8 +2578,9 @@
 	cancel_work_sync(&ccdc->lsc.table_work);
 	ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue);
 
-	if (ccdc->fpc.fpcaddr != 0)
-		omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr);
+	if (ccdc->fpc.addr != NULL)
+		dma_free_coherent(isp->dev, ccdc->fpc.fpnum * 4, ccdc->fpc.addr,
+				  ccdc->fpc.dma);
 
 	mutex_destroy(&ccdc->ioctl_lock);
 }

diff --git a/drivers/media/platform/omap3isp/ispccdc.h b/drivers/media/platform/omap3isp/ispccdc.h
index 9d24e41..f650616 100644
--- a/drivers/media/platform/omap3isp/ispccdc.h
+++ b/drivers/media/platform/omap3isp/ispccdc.h

@@ -46,6 +46,12 @@
 
 #define	OMAP3ISP_CCDC_NEVENTS	16
 
+struct ispccdc_fpc {
+	void *addr;
+	dma_addr_t dma;
+	unsigned int fpnum;
+};
+
 enum ispccdc_lsc_state {
 	LSC_STATE_STOPPED = 0,
 	LSC_STATE_STOPPING = 1,
@@ -57,8 +63,12 @@
 	struct list_head list;
 	struct omap3isp_ccdc_lsc_config config;
 	unsigned char enable;
-	u32 table;
-	struct iovm_struct *iovm;
+
+	struct {
+		void *addr;
+		dma_addr_t dma;
+		struct sg_table sgt;
+	} table;
 };
 
 /*
@@ -136,7 +146,7 @@
 		     fpc_en:1;
 	struct omap3isp_ccdc_blcomp blcomp;
 	struct omap3isp_ccdc_bclamp clamp;
-	struct omap3isp_ccdc_fpc fpc;
+	struct ispccdc_fpc fpc;
 	struct ispccdc_lsc lsc;
 	unsigned int update;
 	unsigned int shadow_update;

diff --git a/drivers/media/platform/omap3isp/ispccp2.c b/drivers/media/platform/omap3isp/ispccp2.c
index b30b67d..f3801db 100644
--- a/drivers/media/platform/omap3isp/ispccp2.c
+++ b/drivers/media/platform/omap3isp/ispccp2.c

@@ -549,7 +549,7 @@
 
 	buffer = omap3isp_video_buffer_next(&ccp2->video_in);
 	if (buffer != NULL)
-		ccp2_set_inaddr(ccp2, buffer->isp_addr);
+		ccp2_set_inaddr(ccp2, buffer->dma);
 
 	pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 
@@ -940,7 +940,7 @@
 {
 	struct isp_ccp2_device *ccp2 = &video->isp->isp_ccp2;
 
-	ccp2_set_inaddr(ccp2, buffer->isp_addr);
+	ccp2_set_inaddr(ccp2, buffer->dma);
 	return 0;
 }
 

diff --git a/drivers/media/platform/omap3isp/ispcsi2.c b/drivers/media/platform/omap3isp/ispcsi2.c
index 6205608..5a2e47e 100644
--- a/drivers/media/platform/omap3isp/ispcsi2.c
+++ b/drivers/media/platform/omap3isp/ispcsi2.c

@@ -695,7 +695,7 @@
 	if (buffer == NULL)
 		return;
 
-	csi2_set_outaddr(csi2, buffer->isp_addr);
+	csi2_set_outaddr(csi2, buffer->dma);
 	csi2_ctx_enable(isp, csi2, 0, 1);
 }
 
@@ -812,7 +812,7 @@
 	struct isp_device *isp = video->isp;
 	struct isp_csi2_device *csi2 = &isp->isp_csi2a;
 
-	csi2_set_outaddr(csi2, buffer->isp_addr);
+	csi2_set_outaddr(csi2, buffer->dma);
 
 	/*
 	 * If streaming was enabled before there was a buffer queued

diff --git a/drivers/media/platform/omap3isp/isph3a_aewb.c b/drivers/media/platform/omap3isp/isph3a_aewb.c
index 75fd82b..d6811ce 100644
--- a/drivers/media/platform/omap3isp/isph3a_aewb.c
+++ b/drivers/media/platform/omap3isp/isph3a_aewb.c

@@ -47,7 +47,7 @@
 	if (aewb->state == ISPSTAT_DISABLED)
 		return;
 
-	isp_reg_writel(aewb->isp, aewb->active_buf->iommu_addr,
+	isp_reg_writel(aewb->isp, aewb->active_buf->dma_addr,
 		       OMAP3_ISP_IOMEM_H3A, ISPH3A_AEWBUFST);
 
 	if (!aewb->update)

diff --git a/drivers/media/platform/omap3isp/isph3a_af.c b/drivers/media/platform/omap3isp/isph3a_af.c
index a0bf5af..6fc960c 100644
--- a/drivers/media/platform/omap3isp/isph3a_af.c
+++ b/drivers/media/platform/omap3isp/isph3a_af.c

@@ -51,7 +51,7 @@
 	if (af->state == ISPSTAT_DISABLED)
 		return;
 
-	isp_reg_writel(af->isp, af->active_buf->iommu_addr, OMAP3_ISP_IOMEM_H3A,
+	isp_reg_writel(af->isp, af->active_buf->dma_addr, OMAP3_ISP_IOMEM_H3A,
 		       ISPH3A_AFBUFST);
 
 	if (!af->update)

diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c
index 395b2b0..720809b 100644
--- a/drivers/media/platform/omap3isp/isppreview.c
+++ b/drivers/media/platform/omap3isp/isppreview.c

@@ -1499,14 +1499,14 @@
 	if (prev->input == PREVIEW_INPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&prev->video_in);
 		if (buffer != NULL)
-			preview_set_inaddr(prev, buffer->isp_addr);
+			preview_set_inaddr(prev, buffer->dma);
 		pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 	}
 
 	if (prev->output & PREVIEW_OUTPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&prev->video_out);
 		if (buffer != NULL) {
-			preview_set_outaddr(prev, buffer->isp_addr);
+			preview_set_outaddr(prev, buffer->dma);
 			restart = 1;
 		}
 		pipe->state |= ISP_PIPELINE_IDLE_OUTPUT;
@@ -1577,10 +1577,10 @@
 	struct isp_prev_device *prev = &video->isp->isp_prev;
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-		preview_set_inaddr(prev, buffer->isp_addr);
+		preview_set_inaddr(prev, buffer->dma);
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		preview_set_outaddr(prev, buffer->isp_addr);
+		preview_set_outaddr(prev, buffer->dma);
 
 	return 0;
 }

diff --git a/drivers/media/platform/omap3isp/ispqueue.c b/drivers/media/platform/omap3isp/ispqueue.c
deleted file mode 100644
index a5e6585..0000000
--- a/drivers/media/platform/omap3isp/ispqueue.c
+++ /dev/null

@@ -1,1161 +0,0 @@
-/*
- * ispqueue.c
- *
- * TI OMAP3 ISP - Video buffers queue handling
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *	     Sakari Ailus <sakari.ailus@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#include <asm/cacheflush.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/poll.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ispqueue.h"
-
-/* -----------------------------------------------------------------------------
- * Video buffers management
- */
-
-/*
- * isp_video_buffer_cache_sync - Keep the buffers coherent between CPU and ISP
- *
- * The typical operation required here is Cache Invalidation across
- * the (user space) buffer address range. And this _must_ be done
- * at QBUF stage (and *only* at QBUF).
- *
- * We try to use optimal cache invalidation function:
- * - dmac_map_area:
- *    - used when the number of pages are _low_.
- *    - it becomes quite slow as the number of pages increase.
- *       - for 648x492 viewfinder (150 pages) it takes 1.3 ms.
- *       - for 5 Mpix buffer (2491 pages) it takes between 25-50 ms.
- *
- * - flush_cache_all:
- *    - used when the number of pages are _high_.
- *    - time taken in the range of 500-900 us.
- *    - has a higher penalty but, as whole dcache + icache is invalidated
- */
-/*
- * FIXME: dmac_inv_range crashes randomly on the user space buffer
- *        address. Fall back to flush_cache_all for now.
- */
-#define ISP_CACHE_FLUSH_PAGES_MAX       0
-
-static void isp_video_buffer_cache_sync(struct isp_video_buffer *buf)
-{
-	if (buf->skip_cache)
-		return;
-
-	if (buf->vbuf.m.userptr == 0 || buf->npages == 0 ||
-	    buf->npages > ISP_CACHE_FLUSH_PAGES_MAX)
-		flush_cache_all();
-	else {
-		dmac_map_area((void *)buf->vbuf.m.userptr, buf->vbuf.length,
-			      DMA_FROM_DEVICE);
-		outer_inv_range(buf->vbuf.m.userptr,
-				buf->vbuf.m.userptr + buf->vbuf.length);
-	}
-}
-
-/*
- * isp_video_buffer_lock_vma - Prevent VMAs from being unmapped
- *
- * Lock the VMAs underlying the given buffer into memory. This avoids the
- * userspace buffer mapping from being swapped out, making VIPT cache handling
- * easier.
- *
- * Note that the pages will not be freed as the buffers have been locked to
- * memory using by a call to get_user_pages(), but the userspace mapping could
- * still disappear if the VMAs are not locked. This is caused by the memory
- * management code trying to be as lock-less as possible, which results in the
- * userspace mapping manager not finding out that the pages are locked under
- * some conditions.
- */
-static int isp_video_buffer_lock_vma(struct isp_video_buffer *buf, int lock)
-{
-	struct vm_area_struct *vma;
-	unsigned long start;
-	unsigned long end;
-	int ret = 0;
-
-	if (buf->vbuf.memory == V4L2_MEMORY_MMAP)
-		return 0;
-
-	/* We can be called from workqueue context if the current task dies to
-	 * unlock the VMAs. In that case there's no current memory management
-	 * context so unlocking can't be performed, but the VMAs have been or
-	 * are getting destroyed anyway so it doesn't really matter.
-	 */
-	if (!current || !current->mm)
-		return lock ? -EINVAL : 0;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	down_write(&current->mm->mmap_sem);
-	spin_lock(&current->mm->page_table_lock);
-
-	do {
-		vma = find_vma(current->mm, start);
-		if (vma == NULL) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		if (lock)
-			vma->vm_flags |= VM_LOCKED;
-		else
-			vma->vm_flags &= ~VM_LOCKED;
-
-		start = vma->vm_end + 1;
-	} while (vma->vm_end < end);
-
-	if (lock)
-		buf->vm_flags |= VM_LOCKED;
-	else
-		buf->vm_flags &= ~VM_LOCKED;
-
-out:
-	spin_unlock(&current->mm->page_table_lock);
-	up_write(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_sglist_kernel - Build a scatter list for a vmalloc'ed buffer
- *
- * Iterate over the vmalloc'ed area and create a scatter list entry for every
- * page.
- */
-static int isp_video_buffer_sglist_kernel(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int npages;
-	unsigned int i;
-	void *addr;
-
-	addr = buf->vaddr;
-	npages = PAGE_ALIGN(buf->vbuf.length) >> PAGE_SHIFT;
-
-	sglist = vmalloc(npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, npages);
-
-	for (i = 0; i < npages; ++i, addr += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(addr);
-
-		if (page == NULL || PageHighMem(page)) {
-			vfree(sglist);
-			return -EINVAL;
-		}
-
-		sg_set_page(&sglist[i], page, PAGE_SIZE, 0);
-	}
-
-	buf->sglen = npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_sglist_user - Build a scatter list for a userspace buffer
- *
- * Walk the buffer pages list and create a 1:1 mapping to a scatter list.
- */
-static int isp_video_buffer_sglist_user(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int offset = buf->offset;
-	unsigned int i;
-
-	sglist = vmalloc(buf->npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, buf->npages);
-
-	for (i = 0; i < buf->npages; ++i) {
-		if (PageHighMem(buf->pages[i])) {
-			vfree(sglist);
-			return -EINVAL;
-		}
-
-		sg_set_page(&sglist[i], buf->pages[i], PAGE_SIZE - offset,
-			    offset);
-		offset = 0;
-	}
-
-	buf->sglen = buf->npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_sglist_pfnmap - Build a scatter list for a VM_PFNMAP buffer
- *
- * Create a scatter list of physically contiguous pages starting at the buffer
- * memory physical address.
- */
-static int isp_video_buffer_sglist_pfnmap(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int offset = buf->offset;
-	unsigned long pfn = buf->paddr >> PAGE_SHIFT;
-	unsigned int i;
-
-	sglist = vmalloc(buf->npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, buf->npages);
-
-	for (i = 0; i < buf->npages; ++i, ++pfn) {
-		sg_set_page(&sglist[i], pfn_to_page(pfn), PAGE_SIZE - offset,
-			    offset);
-		/* PFNMAP buffers will not get DMA-mapped, set the DMA address
-		 * manually.
-		 */
-		sg_dma_address(&sglist[i]) = (pfn << PAGE_SHIFT) + offset;
-		offset = 0;
-	}
-
-	buf->sglen = buf->npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_cleanup - Release pages for a userspace VMA.
- *
- * Release pages locked by a call isp_video_buffer_prepare_user and free the
- * pages table.
- */
-static void isp_video_buffer_cleanup(struct isp_video_buffer *buf)
-{
-	enum dma_data_direction direction;
-	unsigned int i;
-
-	if (buf->queue->ops->buffer_cleanup)
-		buf->queue->ops->buffer_cleanup(buf);
-
-	if (!(buf->vm_flags & VM_PFNMAP)) {
-		direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE
-			  ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		dma_unmap_sg(buf->queue->dev, buf->sglist, buf->sglen,
-			     direction);
-	}
-
-	vfree(buf->sglist);
-	buf->sglist = NULL;
-	buf->sglen = 0;
-
-	if (buf->pages != NULL) {
-		isp_video_buffer_lock_vma(buf, 0);
-
-		for (i = 0; i < buf->npages; ++i)
-			page_cache_release(buf->pages[i]);
-
-		vfree(buf->pages);
-		buf->pages = NULL;
-	}
-
-	buf->npages = 0;
-	buf->skip_cache = false;
-}
-
-/*
- * isp_video_buffer_prepare_user - Pin userspace VMA pages to memory.
- *
- * This function creates a list of pages for a userspace VMA. The number of
- * pages is first computed based on the buffer size, and pages are then
- * retrieved by a call to get_user_pages.
- *
- * Pages are pinned to memory by get_user_pages, making them available for DMA
- * transfers. However, due to memory management optimization, it seems the
- * get_user_pages doesn't guarantee that the pinned pages will not be written
- * to swap and removed from the userspace mapping(s). When this happens, a page
- * fault can be generated when accessing those unmapped pages.
- *
- * If the fault is triggered by a page table walk caused by VIPT cache
- * management operations, the page fault handler might oops if the MM semaphore
- * is held, as it can't handle kernel page faults in that case. To fix that, a
- * fixup entry needs to be added to the cache management code, or the userspace
- * VMA must be locked to avoid removing pages from the userspace mapping in the
- * first place.
- *
- * If the number of pages retrieved is smaller than the number required by the
- * buffer size, the function returns -EFAULT.
- */
-static int isp_video_buffer_prepare_user(struct isp_video_buffer *buf)
-{
-	unsigned long data;
-	unsigned int first;
-	unsigned int last;
-	int ret;
-
-	data = buf->vbuf.m.userptr;
-	first = (data & PAGE_MASK) >> PAGE_SHIFT;
-	last = ((data + buf->vbuf.length - 1) & PAGE_MASK) >> PAGE_SHIFT;
-
-	buf->offset = data & ~PAGE_MASK;
-	buf->npages = last - first + 1;
-	buf->pages = vmalloc(buf->npages * sizeof(buf->pages[0]));
-	if (buf->pages == NULL)
-		return -ENOMEM;
-
-	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm, data & PAGE_MASK,
-			     buf->npages,
-			     buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE, 0,
-			     buf->pages, NULL);
-	up_read(&current->mm->mmap_sem);
-
-	if (ret != buf->npages) {
-		buf->npages = ret < 0 ? 0 : ret;
-		isp_video_buffer_cleanup(buf);
-		return -EFAULT;
-	}
-
-	ret = isp_video_buffer_lock_vma(buf, 1);
-	if (ret < 0)
-		isp_video_buffer_cleanup(buf);
-
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare_pfnmap - Validate a VM_PFNMAP userspace buffer
- *
- * Userspace VM_PFNMAP buffers are supported only if they are contiguous in
- * memory and if they span a single VMA.
- *
- * Return 0 if the buffer is valid, or -EFAULT otherwise.
- */
-static int isp_video_buffer_prepare_pfnmap(struct isp_video_buffer *buf)
-{
-	struct vm_area_struct *vma;
-	unsigned long prev_pfn;
-	unsigned long this_pfn;
-	unsigned long start;
-	unsigned long end;
-	dma_addr_t pa = 0;
-	int ret = -EFAULT;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	buf->offset = start & ~PAGE_MASK;
-	buf->npages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
-	buf->pages = NULL;
-
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, start);
-	if (vma == NULL || vma->vm_end < end)
-		goto done;
-
-	for (prev_pfn = 0; start <= end; start += PAGE_SIZE) {
-		ret = follow_pfn(vma, start, &this_pfn);
-		if (ret)
-			goto done;
-
-		if (prev_pfn == 0)
-			pa = this_pfn << PAGE_SHIFT;
-		else if (this_pfn != prev_pfn + 1) {
-			ret = -EFAULT;
-			goto done;
-		}
-
-		prev_pfn = this_pfn;
-	}
-
-	buf->paddr = pa + buf->offset;
-	ret = 0;
-
-done:
-	up_read(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare_vm_flags - Get VMA flags for a userspace address
- *
- * This function locates the VMAs for the buffer's userspace address and checks
- * that their flags match. The only flag that we need to care for at the moment
- * is VM_PFNMAP.
- *
- * The buffer vm_flags field is set to the first VMA flags.
- *
- * Return -EFAULT if no VMA can be found for part of the buffer, or if the VMAs
- * have incompatible flags.
- */
-static int isp_video_buffer_prepare_vm_flags(struct isp_video_buffer *buf)
-{
-	struct vm_area_struct *vma;
-	pgprot_t uninitialized_var(vm_page_prot);
-	unsigned long start;
-	unsigned long end;
-	int ret = -EFAULT;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	down_read(&current->mm->mmap_sem);
-
-	do {
-		vma = find_vma(current->mm, start);
-		if (vma == NULL)
-			goto done;
-
-		if (start == buf->vbuf.m.userptr) {
-			buf->vm_flags = vma->vm_flags;
-			vm_page_prot = vma->vm_page_prot;
-		}
-
-		if ((buf->vm_flags ^ vma->vm_flags) & VM_PFNMAP)
-			goto done;
-
-		if (vm_page_prot != vma->vm_page_prot)
-			goto done;
-
-		start = vma->vm_end + 1;
-	} while (vma->vm_end < end);
-
-	/* Skip cache management to enhance performances for non-cached or
-	 * write-combining buffers.
-	 */
-	if (vm_page_prot == pgprot_noncached(vm_page_prot) ||
-	    vm_page_prot == pgprot_writecombine(vm_page_prot))
-		buf->skip_cache = true;
-
-	ret = 0;
-
-done:
-	up_read(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare - Make a buffer ready for operation
- *
- * Preparing a buffer involves:
- *
- * - validating VMAs (userspace buffers only)
- * - locking pages and VMAs into memory (userspace buffers only)
- * - building page and scatter-gather lists
- * - mapping buffers for DMA operation
- * - performing driver-specific preparation
- *
- * The function must be called in userspace context with a valid mm context
- * (this excludes cleanup paths such as sys_close when the userspace process
- * segfaults).
- */
-static int isp_video_buffer_prepare(struct isp_video_buffer *buf)
-{
-	enum dma_data_direction direction;
-	int ret;
-
-	switch (buf->vbuf.memory) {
-	case V4L2_MEMORY_MMAP:
-		ret = isp_video_buffer_sglist_kernel(buf);
-		break;
-
-	case V4L2_MEMORY_USERPTR:
-		ret = isp_video_buffer_prepare_vm_flags(buf);
-		if (ret < 0)
-			return ret;
-
-		if (buf->vm_flags & VM_PFNMAP) {
-			ret = isp_video_buffer_prepare_pfnmap(buf);
-			if (ret < 0)
-				return ret;
-
-			ret = isp_video_buffer_sglist_pfnmap(buf);
-		} else {
-			ret = isp_video_buffer_prepare_user(buf);
-			if (ret < 0)
-				return ret;
-
-			ret = isp_video_buffer_sglist_user(buf);
-		}
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	if (ret < 0)
-		goto done;
-
-	if (!(buf->vm_flags & VM_PFNMAP)) {
-		direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE
-			  ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		ret = dma_map_sg(buf->queue->dev, buf->sglist, buf->sglen,
-				 direction);
-		if (ret != buf->sglen) {
-			ret = -EFAULT;
-			goto done;
-		}
-	}
-
-	if (buf->queue->ops->buffer_prepare)
-		ret = buf->queue->ops->buffer_prepare(buf);
-
-done:
-	if (ret < 0) {
-		isp_video_buffer_cleanup(buf);
-		return ret;
-	}
-
-	return ret;
-}
-
-/*
- * isp_video_queue_query - Query the status of a given buffer
- *
- * Locking: must be called with the queue lock held.
- */
-static void isp_video_buffer_query(struct isp_video_buffer *buf,
-				   struct v4l2_buffer *vbuf)
-{
-	memcpy(vbuf, &buf->vbuf, sizeof(*vbuf));
-
-	if (buf->vma_use_count)
-		vbuf->flags |= V4L2_BUF_FLAG_MAPPED;
-
-	switch (buf->state) {
-	case ISP_BUF_STATE_ERROR:
-		vbuf->flags |= V4L2_BUF_FLAG_ERROR;
-		/* Fallthrough */
-	case ISP_BUF_STATE_DONE:
-		vbuf->flags |= V4L2_BUF_FLAG_DONE;
-		break;
-	case ISP_BUF_STATE_QUEUED:
-	case ISP_BUF_STATE_ACTIVE:
-		vbuf->flags |= V4L2_BUF_FLAG_QUEUED;
-		break;
-	case ISP_BUF_STATE_IDLE:
-	default:
-		break;
-	}
-}
-
-/*
- * isp_video_buffer_wait - Wait for a buffer to be ready
- *
- * In non-blocking mode, return immediately with 0 if the buffer is ready or
- * -EAGAIN if the buffer is in the QUEUED or ACTIVE state.
- *
- * In blocking mode, wait (interruptibly but with no timeout) on the buffer wait
- * queue using the same condition.
- */
-static int isp_video_buffer_wait(struct isp_video_buffer *buf, int nonblocking)
-{
-	if (nonblocking) {
-		return (buf->state != ISP_BUF_STATE_QUEUED &&
-			buf->state != ISP_BUF_STATE_ACTIVE)
-			? 0 : -EAGAIN;
-	}
-
-	return wait_event_interruptible(buf->wait,
-		buf->state != ISP_BUF_STATE_QUEUED &&
-		buf->state != ISP_BUF_STATE_ACTIVE);
-}
-
-/* -----------------------------------------------------------------------------
- * Queue management
- */
-
-/*
- * isp_video_queue_free - Free video buffers memory
- *
- * Buffers can only be freed if the queue isn't streaming and if no buffer is
- * mapped to userspace. Return -EBUSY if those conditions aren't satisfied.
- *
- * This function must be called with the queue lock held.
- */
-static int isp_video_queue_free(struct isp_video_queue *queue)
-{
-	unsigned int i;
-
-	if (queue->streaming)
-		return -EBUSY;
-
-	for (i = 0; i < queue->count; ++i) {
-		if (queue->buffers[i]->vma_use_count != 0)
-			return -EBUSY;
-	}
-
-	for (i = 0; i < queue->count; ++i) {
-		struct isp_video_buffer *buf = queue->buffers[i];
-
-		isp_video_buffer_cleanup(buf);
-
-		vfree(buf->vaddr);
-		buf->vaddr = NULL;
-
-		kfree(buf);
-		queue->buffers[i] = NULL;
-	}
-
-	INIT_LIST_HEAD(&queue->queue);
-	queue->count = 0;
-	return 0;
-}
-
-/*
- * isp_video_queue_alloc - Allocate video buffers memory
- *
- * This function must be called with the queue lock held.
- */
-static int isp_video_queue_alloc(struct isp_video_queue *queue,
-				 unsigned int nbuffers,
-				 unsigned int size, enum v4l2_memory memory)
-{
-	struct isp_video_buffer *buf;
-	unsigned int i;
-	void *mem;
-	int ret;
-
-	/* Start by freeing the buffers. */
-	ret = isp_video_queue_free(queue);
-	if (ret < 0)
-		return ret;
-
-	/* Bail out if no buffers should be allocated. */
-	if (nbuffers == 0)
-		return 0;
-
-	/* Initialize the allocated buffers. */
-	for (i = 0; i < nbuffers; ++i) {
-		buf = kzalloc(queue->bufsize, GFP_KERNEL);
-		if (buf == NULL)
-			break;
-
-		if (memory == V4L2_MEMORY_MMAP) {
-			/* Allocate video buffers memory for mmap mode. Align
-			 * the size to the page size.
-			 */
-			mem = vmalloc_32_user(PAGE_ALIGN(size));
-			if (mem == NULL) {
-				kfree(buf);
-				break;
-			}
-
-			buf->vbuf.m.offset = i * PAGE_ALIGN(size);
-			buf->vaddr = mem;
-		}
-
-		buf->vbuf.index = i;
-		buf->vbuf.length = size;
-		buf->vbuf.type = queue->type;
-		buf->vbuf.flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-		buf->vbuf.field = V4L2_FIELD_NONE;
-		buf->vbuf.memory = memory;
-
-		buf->queue = queue;
-		init_waitqueue_head(&buf->wait);
-
-		queue->buffers[i] = buf;
-	}
-
-	if (i == 0)
-		return -ENOMEM;
-
-	queue->count = i;
-	return nbuffers;
-}
-
-/**
- * omap3isp_video_queue_cleanup - Clean up the video buffers queue
- * @queue: Video buffers queue
- *
- * Free all allocated resources and clean up the video buffers queue. The queue
- * must not be busy (no ongoing video stream) and buffers must have been
- * unmapped.
- *
- * Return 0 on success or -EBUSY if the queue is busy or buffers haven't been
- * unmapped.
- */
-int omap3isp_video_queue_cleanup(struct isp_video_queue *queue)
-{
-	return isp_video_queue_free(queue);
-}
-
-/**
- * omap3isp_video_queue_init - Initialize the video buffers queue
- * @queue: Video buffers queue
- * @type: V4L2 buffer type (capture or output)
- * @ops: Driver-specific queue operations
- * @dev: Device used for DMA operations
- * @bufsize: Size of the driver-specific buffer structure
- *
- * Initialize the video buffers queue with the supplied parameters.
- *
- * The queue type must be one of V4L2_BUF_TYPE_VIDEO_CAPTURE or
- * V4L2_BUF_TYPE_VIDEO_OUTPUT. Other buffer types are not supported yet.
- *
- * Buffer objects will be allocated using the given buffer size to allow room
- * for driver-specific fields. Driver-specific buffer structures must start
- * with a struct isp_video_buffer field. Drivers with no driver-specific buffer
- * structure must pass the size of the isp_video_buffer structure in the bufsize
- * parameter.
- *
- * Return 0 on success.
- */
-int omap3isp_video_queue_init(struct isp_video_queue *queue,
-			      enum v4l2_buf_type type,
-			      const struct isp_video_queue_operations *ops,
-			      struct device *dev, unsigned int bufsize)
-{
-	INIT_LIST_HEAD(&queue->queue);
-	mutex_init(&queue->lock);
-	spin_lock_init(&queue->irqlock);
-
-	queue->type = type;
-	queue->ops = ops;
-	queue->dev = dev;
-	queue->bufsize = bufsize;
-
-	return 0;
-}
-
-/* -----------------------------------------------------------------------------
- * V4L2 operations
- */
-
-/**
- * omap3isp_video_queue_reqbufs - Allocate video buffers memory
- *
- * This function is intended to be used as a VIDIOC_REQBUFS ioctl handler. It
- * allocated video buffer objects and, for MMAP buffers, buffer memory.
- *
- * If the number of buffers is 0, all buffers are freed and the function returns
- * without performing any allocation.
- *
- * If the number of buffers is not 0, currently allocated buffers (if any) are
- * freed and the requested number of buffers are allocated. Depending on
- * driver-specific requirements and on memory availability, a number of buffer
- * smaller or bigger than requested can be allocated. This isn't considered as
- * an error.
- *
- * Return 0 on success or one of the following error codes:
- *
- * -EINVAL if the buffer type or index are invalid
- * -EBUSY if the queue is busy (streaming or buffers mapped)
- * -ENOMEM if the buffers can't be allocated due to an out-of-memory condition
- */
-int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue,
-				 struct v4l2_requestbuffers *rb)
-{
-	unsigned int nbuffers = rb->count;
-	unsigned int size;
-	int ret;
-
-	if (rb->type != queue->type)
-		return -EINVAL;
-
-	queue->ops->queue_prepare(queue, &nbuffers, &size);
-	if (size == 0)
-		return -EINVAL;
-
-	nbuffers = min_t(unsigned int, nbuffers, ISP_VIDEO_MAX_BUFFERS);
-
-	mutex_lock(&queue->lock);
-
-	ret = isp_video_queue_alloc(queue, nbuffers, size, rb->memory);
-	if (ret < 0)
-		goto done;
-
-	rb->count = ret;
-	ret = 0;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_querybuf - Query the status of a buffer in a queue
- *
- * This function is intended to be used as a VIDIOC_QUERYBUF ioctl handler. It
- * returns the status of a given video buffer.
- *
- * Return 0 on success or -EINVAL if the buffer type or index are invalid.
- */
-int omap3isp_video_queue_querybuf(struct isp_video_queue *queue,
-				  struct v4l2_buffer *vbuf)
-{
-	struct isp_video_buffer *buf;
-	int ret = 0;
-
-	if (vbuf->type != queue->type)
-		return -EINVAL;
-
-	mutex_lock(&queue->lock);
-
-	if (vbuf->index >= queue->count) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	buf = queue->buffers[vbuf->index];
-	isp_video_buffer_query(buf, vbuf);
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_qbuf - Queue a buffer
- *
- * This function is intended to be used as a VIDIOC_QBUF ioctl handler.
- *
- * The v4l2_buffer structure passed from userspace is first sanity tested. If
- * sane, the buffer is then processed and added to the main queue and, if the
- * queue is streaming, to the IRQ queue.
- *
- * Before being enqueued, USERPTR buffers are checked for address changes. If
- * the buffer has a different userspace address, the old memory area is unlocked
- * and the new memory area is locked.
- */
-int omap3isp_video_queue_qbuf(struct isp_video_queue *queue,
-			      struct v4l2_buffer *vbuf)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-	int ret = -EINVAL;
-
-	if (vbuf->type != queue->type)
-		goto done;
-
-	mutex_lock(&queue->lock);
-
-	if (vbuf->index >= queue->count)
-		goto done;
-
-	buf = queue->buffers[vbuf->index];
-
-	if (vbuf->memory != buf->vbuf.memory)
-		goto done;
-
-	if (buf->state != ISP_BUF_STATE_IDLE)
-		goto done;
-
-	if (vbuf->memory == V4L2_MEMORY_USERPTR &&
-	    vbuf->length < buf->vbuf.length)
-		goto done;
-
-	if (vbuf->memory == V4L2_MEMORY_USERPTR &&
-	    vbuf->m.userptr != buf->vbuf.m.userptr) {
-		isp_video_buffer_cleanup(buf);
-		buf->vbuf.m.userptr = vbuf->m.userptr;
-		buf->prepared = 0;
-	}
-
-	if (!buf->prepared) {
-		ret = isp_video_buffer_prepare(buf);
-		if (ret < 0)
-			goto done;
-		buf->prepared = 1;
-	}
-
-	isp_video_buffer_cache_sync(buf);
-
-	buf->state = ISP_BUF_STATE_QUEUED;
-	list_add_tail(&buf->stream, &queue->queue);
-
-	if (queue->streaming) {
-		spin_lock_irqsave(&queue->irqlock, flags);
-		queue->ops->buffer_queue(buf);
-		spin_unlock_irqrestore(&queue->irqlock, flags);
-	}
-
-	ret = 0;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_dqbuf - Dequeue a buffer
- *
- * This function is intended to be used as a VIDIOC_DQBUF ioctl handler.
- *
- * Wait until a buffer is ready to be dequeued, remove it from the queue and
- * copy its information to the v4l2_buffer structure.
- *
- * If the nonblocking argument is not zero and no buffer is ready, return
- * -EAGAIN immediately instead of waiting.
- *
- * If no buffer has been enqueued, or if the requested buffer type doesn't match
- * the queue type, return -EINVAL.
- */
-int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue,
-			       struct v4l2_buffer *vbuf, int nonblocking)
-{
-	struct isp_video_buffer *buf;
-	int ret;
-
-	if (vbuf->type != queue->type)
-		return -EINVAL;
-
-	mutex_lock(&queue->lock);
-
-	if (list_empty(&queue->queue)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream);
-	ret = isp_video_buffer_wait(buf, nonblocking);
-	if (ret < 0)
-		goto done;
-
-	list_del(&buf->stream);
-
-	isp_video_buffer_query(buf, vbuf);
-	buf->state = ISP_BUF_STATE_IDLE;
-	vbuf->flags &= ~V4L2_BUF_FLAG_QUEUED;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_streamon - Start streaming
- *
- * This function is intended to be used as a VIDIOC_STREAMON ioctl handler. It
- * starts streaming on the queue and calls the buffer_queue operation for all
- * queued buffers.
- *
- * Return 0 on success.
- */
-int omap3isp_video_queue_streamon(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-
-	mutex_lock(&queue->lock);
-
-	if (queue->streaming)
-		goto done;
-
-	queue->streaming = 1;
-
-	spin_lock_irqsave(&queue->irqlock, flags);
-	list_for_each_entry(buf, &queue->queue, stream)
-		queue->ops->buffer_queue(buf);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
-
-done:
-	mutex_unlock(&queue->lock);
-	return 0;
-}
-
-/**
- * omap3isp_video_queue_streamoff - Stop streaming
- *
- * This function is intended to be used as a VIDIOC_STREAMOFF ioctl handler. It
- * stops streaming on the queue and wakes up all the buffers.
- *
- * Drivers must stop the hardware and synchronize with interrupt handlers and/or
- * delayed works before calling this function to make sure no buffer will be
- * touched by the driver and/or hardware.
- */
-void omap3isp_video_queue_streamoff(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-	unsigned int i;
-
-	mutex_lock(&queue->lock);
-
-	if (!queue->streaming)
-		goto done;
-
-	queue->streaming = 0;
-
-	spin_lock_irqsave(&queue->irqlock, flags);
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-
-		if (buf->state == ISP_BUF_STATE_ACTIVE)
-			wake_up(&buf->wait);
-
-		buf->state = ISP_BUF_STATE_IDLE;
-	}
-	spin_unlock_irqrestore(&queue->irqlock, flags);
-
-	INIT_LIST_HEAD(&queue->queue);
-
-done:
-	mutex_unlock(&queue->lock);
-}
-
-/**
- * omap3isp_video_queue_discard_done - Discard all buffers marked as DONE
- *
- * This function is intended to be used with suspend/resume operations. It
- * discards all 'done' buffers as they would be too old to be requested after
- * resume.
- *
- * Drivers must stop the hardware and synchronize with interrupt handlers and/or
- * delayed works before calling this function to make sure no buffer will be
- * touched by the driver and/or hardware.
- */
-void omap3isp_video_queue_discard_done(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned int i;
-
-	mutex_lock(&queue->lock);
-
-	if (!queue->streaming)
-		goto done;
-
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-
-		if (buf->state == ISP_BUF_STATE_DONE)
-			buf->state = ISP_BUF_STATE_ERROR;
-	}
-
-done:
-	mutex_unlock(&queue->lock);
-}
-
-static void isp_video_queue_vm_open(struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *buf = vma->vm_private_data;
-
-	buf->vma_use_count++;
-}
-
-static void isp_video_queue_vm_close(struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *buf = vma->vm_private_data;
-
-	buf->vma_use_count--;
-}
-
-static const struct vm_operations_struct isp_video_queue_vm_ops = {
-	.open = isp_video_queue_vm_open,
-	.close = isp_video_queue_vm_close,
-};
-
-/**
- * omap3isp_video_queue_mmap - Map buffers to userspace
- *
- * This function is intended to be used as an mmap() file operation handler. It
- * maps a buffer to userspace based on the VMA offset.
- *
- * Only buffers of memory type MMAP are supported.
- */
-int omap3isp_video_queue_mmap(struct isp_video_queue *queue,
-			 struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *uninitialized_var(buf);
-	unsigned long size;
-	unsigned int i;
-	int ret = 0;
-
-	mutex_lock(&queue->lock);
-
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-		if ((buf->vbuf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff)
-			break;
-	}
-
-	if (i == queue->count) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	size = vma->vm_end - vma->vm_start;
-
-	if (buf->vbuf.memory != V4L2_MEMORY_MMAP ||
-	    size != PAGE_ALIGN(buf->vbuf.length)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	ret = remap_vmalloc_range(vma, buf->vaddr, 0);
-	if (ret < 0)
-		goto done;
-
-	vma->vm_ops = &isp_video_queue_vm_ops;
-	vma->vm_private_data = buf;
-	isp_video_queue_vm_open(vma);
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_poll - Poll video queue state
- *
- * This function is intended to be used as a poll() file operation handler. It
- * polls the state of the video buffer at the front of the queue and returns an
- * events mask.
- *
- * If no buffer is present at the front of the queue, POLLERR is returned.
- */
-unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue,
-				       struct file *file, poll_table *wait)
-{
-	struct isp_video_buffer *buf;
-	unsigned int mask = 0;
-
-	mutex_lock(&queue->lock);
-	if (list_empty(&queue->queue)) {
-		mask |= POLLERR;
-		goto done;
-	}
-	buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream);
-
-	poll_wait(file, &buf->wait, wait);
-	if (buf->state == ISP_BUF_STATE_DONE ||
-	    buf->state == ISP_BUF_STATE_ERROR) {
-		if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-			mask |= POLLIN | POLLRDNORM;
-		else
-			mask |= POLLOUT | POLLWRNORM;
-	}
-
-done:
-	mutex_unlock(&queue->lock);
-	return mask;
-}

diff --git a/drivers/media/platform/omap3isp/ispqueue.h b/drivers/media/platform/omap3isp/ispqueue.h
deleted file mode 100644
index 3e048ad..0000000
--- a/drivers/media/platform/omap3isp/ispqueue.h
+++ /dev/null

@@ -1,188 +0,0 @@
-/*
- * ispqueue.h
- *
- * TI OMAP3 ISP - Video buffers queue handling
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *	     Sakari Ailus <sakari.ailus@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#ifndef OMAP3_ISP_QUEUE_H
-#define OMAP3_ISP_QUEUE_H
-
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mm_types.h>
-#include <linux/mutex.h>
-#include <linux/videodev2.h>
-#include <linux/wait.h>
-
-struct isp_video_queue;
-struct page;
-struct scatterlist;
-
-#define ISP_VIDEO_MAX_BUFFERS		16
-
-/**
- * enum isp_video_buffer_state - ISP video buffer state
- * @ISP_BUF_STATE_IDLE:	The buffer is under userspace control (dequeued
- *	or not queued yet).
- * @ISP_BUF_STATE_QUEUED: The buffer has been queued but isn't used by the
- *	device yet.
- * @ISP_BUF_STATE_ACTIVE: The buffer is in use for an active video transfer.
- * @ISP_BUF_STATE_ERROR: The device is done with the buffer and an error
- *	occurred. For capture device the buffer likely contains corrupted data or
- *	no data at all.
- * @ISP_BUF_STATE_DONE: The device is done with the buffer and no error occurred.
- *	For capture devices the buffer contains valid data.
- */
-enum isp_video_buffer_state {
-	ISP_BUF_STATE_IDLE,
-	ISP_BUF_STATE_QUEUED,
-	ISP_BUF_STATE_ACTIVE,
-	ISP_BUF_STATE_ERROR,
-	ISP_BUF_STATE_DONE,
-};
-
-/**
- * struct isp_video_buffer - ISP video buffer
- * @vma_use_count: Number of times the buffer is mmap'ed to userspace
- * @stream: List head for insertion into main queue
- * @queue: ISP buffers queue this buffer belongs to
- * @prepared: Whether the buffer has been prepared
- * @skip_cache: Whether to skip cache management operations for this buffer
- * @vaddr: Memory virtual address (for kernel buffers)
- * @vm_flags: Buffer VMA flags (for userspace buffers)
- * @offset: Offset inside the first page (for userspace buffers)
- * @npages: Number of pages (for userspace buffers)
- * @pages: Pages table (for userspace non-VM_PFNMAP buffers)
- * @paddr: Memory physical address (for userspace VM_PFNMAP buffers)
- * @sglen: Number of elements in the scatter list (for non-VM_PFNMAP buffers)
- * @sglist: Scatter list (for non-VM_PFNMAP buffers)
- * @vbuf: V4L2 buffer
- * @irqlist: List head for insertion into IRQ queue
- * @state: Current buffer state
- * @wait: Wait queue to signal buffer completion
- */
-struct isp_video_buffer {
-	unsigned long vma_use_count;
-	struct list_head stream;
-	struct isp_video_queue *queue;
-	unsigned int prepared:1;
-	bool skip_cache;
-
-	/* For kernel buffers. */
-	void *vaddr;
-
-	/* For userspace buffers. */
-	vm_flags_t vm_flags;
-	unsigned long offset;
-	unsigned int npages;
-	struct page **pages;
-	dma_addr_t paddr;
-
-	/* For all buffers except VM_PFNMAP. */
-	unsigned int sglen;
-	struct scatterlist *sglist;
-
-	/* Touched by the interrupt handler. */
-	struct v4l2_buffer vbuf;
-	struct list_head irqlist;
-	enum isp_video_buffer_state state;
-	wait_queue_head_t wait;
-};
-
-#define to_isp_video_buffer(vb)	container_of(vb, struct isp_video_buffer, vb)
-
-/**
- * struct isp_video_queue_operations - Driver-specific operations
- * @queue_prepare: Called before allocating buffers. Drivers should clamp the
- *	number of buffers according to their requirements, and must return the
- *	buffer size in bytes.
- * @buffer_prepare: Called the first time a buffer is queued, or after changing
- *	the userspace memory address for a USERPTR buffer, with the queue lock
- *	held. Drivers should perform device-specific buffer preparation (such as
- *	mapping the buffer memory in an IOMMU). This operation is optional.
- * @buffer_queue: Called when a buffer is being added to the queue with the
- *	queue irqlock spinlock held.
- * @buffer_cleanup: Called before freeing buffers, or before changing the
- *	userspace memory address for a USERPTR buffer, with the queue lock held.
- *	Drivers must perform cleanup operations required to undo the
- *	buffer_prepare call. This operation is optional.
- */
-struct isp_video_queue_operations {
-	void (*queue_prepare)(struct isp_video_queue *queue,
-			      unsigned int *nbuffers, unsigned int *size);
-	int  (*buffer_prepare)(struct isp_video_buffer *buf);
-	void (*buffer_queue)(struct isp_video_buffer *buf);
-	void (*buffer_cleanup)(struct isp_video_buffer *buf);
-};
-
-/**
- * struct isp_video_queue - ISP video buffers queue
- * @type: Type of video buffers handled by this queue
- * @ops: Queue operations
- * @dev: Device used for DMA operations
- * @bufsize: Size of a driver-specific buffer object
- * @count: Number of currently allocated buffers
- * @buffers: ISP video buffers
- * @lock: Mutex to protect access to the buffers, main queue and state
- * @irqlock: Spinlock to protect access to the IRQ queue
- * @streaming: Queue state, indicates whether the queue is streaming
- * @queue: List of all queued buffers
- */
-struct isp_video_queue {
-	enum v4l2_buf_type type;
-	const struct isp_video_queue_operations *ops;
-	struct device *dev;
-	unsigned int bufsize;
-
-	unsigned int count;
-	struct isp_video_buffer *buffers[ISP_VIDEO_MAX_BUFFERS];
-	struct mutex lock;
-	spinlock_t irqlock;
-
-	unsigned int streaming:1;
-
-	struct list_head queue;
-};
-
-int omap3isp_video_queue_cleanup(struct isp_video_queue *queue);
-int omap3isp_video_queue_init(struct isp_video_queue *queue,
-			      enum v4l2_buf_type type,
-			      const struct isp_video_queue_operations *ops,
-			      struct device *dev, unsigned int bufsize);
-
-int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue,
-				 struct v4l2_requestbuffers *rb);
-int omap3isp_video_queue_querybuf(struct isp_video_queue *queue,
-				  struct v4l2_buffer *vbuf);
-int omap3isp_video_queue_qbuf(struct isp_video_queue *queue,
-			      struct v4l2_buffer *vbuf);
-int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue,
-			       struct v4l2_buffer *vbuf, int nonblocking);
-int omap3isp_video_queue_streamon(struct isp_video_queue *queue);
-void omap3isp_video_queue_streamoff(struct isp_video_queue *queue);
-void omap3isp_video_queue_discard_done(struct isp_video_queue *queue);
-int omap3isp_video_queue_mmap(struct isp_video_queue *queue,
-			      struct vm_area_struct *vma);
-unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue,
-				       struct file *file, poll_table *wait);
-
-#endif /* OMAP3_ISP_QUEUE_H */

diff --git a/drivers/media/platform/omap3isp/ispresizer.c b/drivers/media/platform/omap3isp/ispresizer.c
index 86369df..6f077c2 100644
--- a/drivers/media/platform/omap3isp/ispresizer.c
+++ b/drivers/media/platform/omap3isp/ispresizer.c

@@ -1040,7 +1040,7 @@
 	 */
 	buffer = omap3isp_video_buffer_next(&res->video_out);
 	if (buffer != NULL) {
-		resizer_set_outaddr(res, buffer->isp_addr);
+		resizer_set_outaddr(res, buffer->dma);
 		restart = 1;
 	}
 
@@ -1049,7 +1049,7 @@
 	if (res->input == RESIZER_INPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&res->video_in);
 		if (buffer != NULL)
-			resizer_set_inaddr(res, buffer->isp_addr);
+			resizer_set_inaddr(res, buffer->dma);
 		pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 	}
 
@@ -1101,7 +1101,7 @@
 	struct isp_res_device *res = &video->isp->isp_res;
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-		resizer_set_inaddr(res, buffer->isp_addr);
+		resizer_set_inaddr(res, buffer->dma);
 
 	/*
 	 * We now have a buffer queued on the output. Despite what the
@@ -1116,7 +1116,7 @@
 	 * continuous mode or when starting the stream.
 	 */
 	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		resizer_set_outaddr(res, buffer->isp_addr);
+		resizer_set_outaddr(res, buffer->dma);
 
 	return 0;
 }

diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
index 5707f85..e6cbc1e 100644
--- a/drivers/media/platform/omap3isp/ispstat.c
+++ b/drivers/media/platform/omap3isp/ispstat.c

@@ -26,13 +26,12 @@
  */
 
 #include <linux/dma-mapping.h>
-#include <linux/omap-iommu.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
 #include "isp.h"
 
-#define IS_COHERENT_BUF(stat)	((stat)->dma_ch >= 0)
+#define ISP_STAT_USES_DMAENGINE(stat)	((stat)->dma_ch >= 0)
 
 /*
  * MAGIC_SIZE must always be the greatest common divisor of
@@ -77,21 +76,10 @@
 					dma_addr_t, unsigned long, size_t,
 					enum dma_data_direction))
 {
-	struct device *dev = stat->isp->dev;
-	struct page *pg;
-	dma_addr_t dma_addr;
-	u32 offset;
-
-	/* Initial magic words */
-	pg = vmalloc_to_page(buf->virt_addr);
-	dma_addr = pfn_to_dma(dev, page_to_pfn(pg));
-	dma_sync(dev, dma_addr, 0, MAGIC_SIZE, dir);
-
-	/* Final magic words */
-	pg = vmalloc_to_page(buf->virt_addr + buf_size);
-	dma_addr = pfn_to_dma(dev, page_to_pfn(pg));
-	offset = ((u32)buf->virt_addr + buf_size) & ~PAGE_MASK;
-	dma_sync(dev, dma_addr, offset, MAGIC_SIZE, dir);
+	/* Sync the initial and final magic words. */
+	dma_sync(stat->isp->dev, buf->dma_addr, 0, MAGIC_SIZE, dir);
+	dma_sync(stat->isp->dev, buf->dma_addr + (buf_size & PAGE_MASK),
+		 buf_size & ~PAGE_MASK, MAGIC_SIZE, dir);
 }
 
 static void isp_stat_buf_sync_magic_for_device(struct ispstat *stat,
@@ -99,7 +87,7 @@
 					       u32 buf_size,
 					       enum dma_data_direction dir)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
 	__isp_stat_buf_sync_magic(stat, buf, buf_size, dir,
@@ -111,7 +99,7 @@
 					    u32 buf_size,
 					    enum dma_data_direction dir)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
 	__isp_stat_buf_sync_magic(stat, buf, buf_size, dir,
@@ -180,21 +168,21 @@
 static void isp_stat_buf_sync_for_device(struct ispstat *stat,
 					 struct ispstat_buffer *buf)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
-	dma_sync_sg_for_device(stat->isp->dev, buf->iovm->sgt->sgl,
-			       buf->iovm->sgt->nents, DMA_FROM_DEVICE);
+	dma_sync_sg_for_device(stat->isp->dev, buf->sgt.sgl,
+			       buf->sgt.nents, DMA_FROM_DEVICE);
 }
 
 static void isp_stat_buf_sync_for_cpu(struct ispstat *stat,
 				      struct ispstat_buffer *buf)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
-	dma_sync_sg_for_cpu(stat->isp->dev, buf->iovm->sgt->sgl,
-			    buf->iovm->sgt->nents, DMA_FROM_DEVICE);
+	dma_sync_sg_for_cpu(stat->isp->dev, buf->sgt.sgl,
+			    buf->sgt.nents, DMA_FROM_DEVICE);
 }
 
 static void isp_stat_buf_clear(struct ispstat *stat)
@@ -354,29 +342,21 @@
 
 static void isp_stat_bufs_free(struct ispstat *stat)
 {
-	struct isp_device *isp = stat->isp;
-	int i;
+	struct device *dev = ISP_STAT_USES_DMAENGINE(stat)
+			   ? NULL : stat->isp->dev;
+	unsigned int i;
 
 	for (i = 0; i < STAT_MAX_BUFS; i++) {
 		struct ispstat_buffer *buf = &stat->buf[i];
 
-		if (!IS_COHERENT_BUF(stat)) {
-			if (IS_ERR_OR_NULL((void *)buf->iommu_addr))
-				continue;
-			if (buf->iovm)
-				dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl,
-					     buf->iovm->sgt->nents,
-					     DMA_FROM_DEVICE);
-			omap_iommu_vfree(isp->domain, isp->dev,
-							buf->iommu_addr);
-		} else {
-			if (!buf->virt_addr)
-				continue;
-			dma_free_coherent(stat->isp->dev, stat->buf_alloc_size,
-					  buf->virt_addr, buf->dma_addr);
-		}
-		buf->iommu_addr = 0;
-		buf->iovm = NULL;
+		if (!buf->virt_addr)
+			continue;
+
+		sg_free_table(&buf->sgt);
+
+		dma_free_coherent(dev, stat->buf_alloc_size, buf->virt_addr,
+				  buf->dma_addr);
+
 		buf->dma_addr = 0;
 		buf->virt_addr = NULL;
 		buf->empty = 1;
@@ -389,83 +369,51 @@
 	stat->active_buf = NULL;
 }
 
-static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
+static int isp_stat_bufs_alloc_one(struct device *dev,
+				   struct ispstat_buffer *buf,
+				   unsigned int size)
 {
-	struct isp_device *isp = stat->isp;
-	int i;
+	int ret;
 
-	stat->buf_alloc_size = size;
+	buf->virt_addr = dma_alloc_coherent(dev, size, &buf->dma_addr,
+					    GFP_KERNEL | GFP_DMA);
+	if (!buf->virt_addr)
+		return -ENOMEM;
 
-	for (i = 0; i < STAT_MAX_BUFS; i++) {
-		struct ispstat_buffer *buf = &stat->buf[i];
-		struct iovm_struct *iovm;
-
-		WARN_ON(buf->dma_addr);
-		buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
-							size, IOMMU_FLAG);
-		if (IS_ERR((void *)buf->iommu_addr)) {
-			dev_err(stat->isp->dev,
-				 "%s: Can't acquire memory for "
-				 "buffer %d\n", stat->subdev.name, i);
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-
-		iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr);
-		if (!iovm ||
-		    !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents,
-				DMA_FROM_DEVICE)) {
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-		buf->iovm = iovm;
-
-		buf->virt_addr = omap_da_to_va(stat->isp->dev,
-					  (u32)buf->iommu_addr);
-		buf->empty = 1;
-		dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
-			"iommu_addr=0x%08lx virt_addr=0x%08lx",
-			stat->subdev.name, i, buf->iommu_addr,
-			(unsigned long)buf->virt_addr);
+	ret = dma_get_sgtable(dev, &buf->sgt, buf->virt_addr, buf->dma_addr,
+			      size);
+	if (ret < 0) {
+		dma_free_coherent(dev, size, buf->virt_addr, buf->dma_addr);
+		buf->virt_addr = NULL;
+		buf->dma_addr = 0;
+		return ret;
 	}
 
 	return 0;
 }
 
-static int isp_stat_bufs_alloc_dma(struct ispstat *stat, unsigned int size)
-{
-	int i;
-
-	stat->buf_alloc_size = size;
-
-	for (i = 0; i < STAT_MAX_BUFS; i++) {
-		struct ispstat_buffer *buf = &stat->buf[i];
-
-		WARN_ON(buf->iommu_addr);
-		buf->virt_addr = dma_alloc_coherent(stat->isp->dev, size,
-					&buf->dma_addr, GFP_KERNEL | GFP_DMA);
-
-		if (!buf->virt_addr || !buf->dma_addr) {
-			dev_info(stat->isp->dev,
-				 "%s: Can't acquire memory for "
-				 "DMA buffer %d\n", stat->subdev.name, i);
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-		buf->empty = 1;
-
-		dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
-			"dma_addr=0x%08lx virt_addr=0x%08lx\n",
-			stat->subdev.name, i, (unsigned long)buf->dma_addr,
-			(unsigned long)buf->virt_addr);
-	}
-
-	return 0;
-}
-
+/*
+ * The device passed to the DMA API depends on whether the statistics block uses
+ * ISP DMA, external DMA or PIO to transfer data.
+ *
+ * The first case (for the AEWB and AF engines) passes the ISP device, resulting
+ * in the DMA buffers being mapped through the ISP IOMMU.
+ *
+ * The second case (for the histogram engine) should pass the DMA engine device.
+ * As that device isn't accessible through the OMAP DMA engine API the driver
+ * passes NULL instead, resulting in the buffers being mapped directly as
+ * physical pages.
+ *
+ * The third case (for the histogram engine) doesn't require any mapping. The
+ * buffers could be allocated with kmalloc/vmalloc, but we still use
+ * dma_alloc_coherent() for consistency purpose.
+ */
 static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size)
 {
+	struct device *dev = ISP_STAT_USES_DMAENGINE(stat)
+			   ? NULL : stat->isp->dev;
 	unsigned long flags;
+	unsigned int i;
 
 	spin_lock_irqsave(&stat->isp->stat_lock, flags);
 
@@ -489,10 +437,31 @@
 
 	isp_stat_bufs_free(stat);
 
-	if (IS_COHERENT_BUF(stat))
-		return isp_stat_bufs_alloc_dma(stat, size);
-	else
-		return isp_stat_bufs_alloc_iommu(stat, size);
+	stat->buf_alloc_size = size;
+
+	for (i = 0; i < STAT_MAX_BUFS; i++) {
+		struct ispstat_buffer *buf = &stat->buf[i];
+		int ret;
+
+		ret = isp_stat_bufs_alloc_one(dev, buf, size);
+		if (ret < 0) {
+			dev_err(stat->isp->dev,
+				"%s: Failed to allocate DMA buffer %u\n",
+				stat->subdev.name, i);
+			isp_stat_bufs_free(stat);
+			return ret;
+		}
+
+		buf->empty = 1;
+
+		dev_dbg(stat->isp->dev,
+			"%s: buffer[%u] allocated. dma=0x%08lx virt=0x%08lx",
+			stat->subdev.name, i,
+			(unsigned long)buf->dma_addr,
+			(unsigned long)buf->virt_addr);
+	}
+
+	return 0;
 }
 
 static void isp_stat_queue_event(struct ispstat *stat, int err)

diff --git a/drivers/media/platform/omap3isp/ispstat.h b/drivers/media/platform/omap3isp/ispstat.h
index 9a047c9..58d6ac7 100644
--- a/drivers/media/platform/omap3isp/ispstat.h
+++ b/drivers/media/platform/omap3isp/ispstat.h

@@ -46,8 +46,7 @@
 struct ispstat;
 
 struct ispstat_buffer {
-	unsigned long iommu_addr;
-	struct iovm_struct *iovm;
+	struct sg_table sgt;
 	void *virt_addr;
 	dma_addr_t dma_addr;
 	struct timespec ts;

diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index 85b4036..e36bac2 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c

@@ -27,7 +27,6 @@
 #include <linux/clk.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/omap-iommu.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
@@ -35,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
 
 #include "ispvideo.h"
 #include "isp.h"
@@ -326,90 +326,36 @@
 }
 
 /* -----------------------------------------------------------------------------
- * IOMMU management
- */
-
-#define IOMMU_FLAG	(IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8)
-
-/*
- * ispmmu_vmap - Wrapper for Virtual memory mapping of a scatter gather list
- * @isp: Device pointer specific to the OMAP3 ISP.
- * @sglist: Pointer to source Scatter gather list to allocate.
- * @sglen: Number of elements of the scatter-gatter list.
- *
- * Returns a resulting mapped device address by the ISP MMU, or -ENOMEM if
- * we ran out of memory.
- */
-static dma_addr_t
-ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen)
-{
-	struct sg_table *sgt;
-	u32 da;
-
-	sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
-	if (sgt == NULL)
-		return -ENOMEM;
-
-	sgt->sgl = (struct scatterlist *)sglist;
-	sgt->nents = sglen;
-	sgt->orig_nents = sglen;
-
-	da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG);
-	if (IS_ERR_VALUE(da))
-		kfree(sgt);
-
-	return da;
-}
-
-/*
- * ispmmu_vunmap - Unmap a device address from the ISP MMU
- * @isp: Device pointer specific to the OMAP3 ISP.
- * @da: Device address generated from a ispmmu_vmap call.
- */
-static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da)
-{
-	struct sg_table *sgt;
-
-	sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da);
-	kfree(sgt);
-}
-
-/* -----------------------------------------------------------------------------
  * Video queue operations
  */
 
-static void isp_video_queue_prepare(struct isp_video_queue *queue,
-				    unsigned int *nbuffers, unsigned int *size)
+static int isp_video_queue_setup(struct vb2_queue *queue,
+				 const struct v4l2_format *fmt,
+				 unsigned int *count, unsigned int *num_planes,
+				 unsigned int sizes[], void *alloc_ctxs[])
 {
-	struct isp_video_fh *vfh =
-		container_of(queue, struct isp_video_fh, queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(queue);
 	struct isp_video *video = vfh->video;
 
-	*size = vfh->format.fmt.pix.sizeimage;
-	if (*size == 0)
-		return;
+	*num_planes = 1;
 
-	*nbuffers = min(*nbuffers, video->capture_mem / PAGE_ALIGN(*size));
+	sizes[0] = vfh->format.fmt.pix.sizeimage;
+	if (sizes[0] == 0)
+		return -EINVAL;
+
+	alloc_ctxs[0] = video->alloc_ctx;
+
+	*count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0]));
+
+	return 0;
 }
 
-static void isp_video_buffer_cleanup(struct isp_video_buffer *buf)
+static int isp_video_buffer_prepare(struct vb2_buffer *buf)
 {
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue);
 	struct isp_buffer *buffer = to_isp_buffer(buf);
 	struct isp_video *video = vfh->video;
-
-	if (buffer->isp_addr) {
-		ispmmu_vunmap(video->isp, buffer->isp_addr);
-		buffer->isp_addr = 0;
-	}
-}
-
-static int isp_video_buffer_prepare(struct isp_video_buffer *buf)
-{
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
-	struct isp_buffer *buffer = to_isp_buffer(buf);
-	struct isp_video *video = vfh->video;
-	unsigned long addr;
+	dma_addr_t addr;
 
 	/* Refuse to prepare the buffer is the video node has registered an
 	 * error. We don't need to take any lock here as the operation is
@@ -420,19 +366,16 @@
 	if (unlikely(video->error))
 		return -EIO;
 
-	addr = ispmmu_vmap(video->isp, buf->sglist, buf->sglen);
-	if (IS_ERR_VALUE(addr))
-		return -EIO;
-
+	addr = vb2_dma_contig_plane_dma_addr(buf, 0);
 	if (!IS_ALIGNED(addr, 32)) {
-		dev_dbg(video->isp->dev, "Buffer address must be "
-			"aligned to 32 bytes boundary.\n");
-		ispmmu_vunmap(video->isp, buffer->isp_addr);
+		dev_dbg(video->isp->dev,
+			"Buffer address must be aligned to 32 bytes boundary.\n");
 		return -EINVAL;
 	}
 
-	buf->vbuf.bytesused = vfh->format.fmt.pix.sizeimage;
-	buffer->isp_addr = addr;
+	vb2_set_plane_payload(&buffer->vb, 0, vfh->format.fmt.pix.sizeimage);
+	buffer->dma = addr;
+
 	return 0;
 }
 
@@ -445,9 +388,9 @@
  * If the pipeline is busy, it will be restarted in the output module interrupt
  * handler.
  */
-static void isp_video_buffer_queue(struct isp_video_buffer *buf)
+static void isp_video_buffer_queue(struct vb2_buffer *buf)
 {
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue);
 	struct isp_buffer *buffer = to_isp_buffer(buf);
 	struct isp_video *video = vfh->video;
 	struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity);
@@ -456,14 +399,18 @@
 	unsigned int empty;
 	unsigned int start;
 
+	spin_lock_irqsave(&video->irqlock, flags);
+
 	if (unlikely(video->error)) {
-		buf->state = ISP_BUF_STATE_ERROR;
-		wake_up(&buf->wait);
+		vb2_buffer_done(&buffer->vb, VB2_BUF_STATE_ERROR);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		return;
 	}
 
 	empty = list_empty(&video->dmaqueue);
-	list_add_tail(&buffer->buffer.irqlist, &video->dmaqueue);
+	list_add_tail(&buffer->irqlist, &video->dmaqueue);
+
+	spin_unlock_irqrestore(&video->irqlock, flags);
 
 	if (empty) {
 		if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
@@ -487,23 +434,22 @@
 	}
 }
 
-static const struct isp_video_queue_operations isp_video_queue_ops = {
-	.queue_prepare = &isp_video_queue_prepare,
-	.buffer_prepare = &isp_video_buffer_prepare,
-	.buffer_queue = &isp_video_buffer_queue,
-	.buffer_cleanup = &isp_video_buffer_cleanup,
+static const struct vb2_ops isp_video_queue_ops = {
+	.queue_setup = isp_video_queue_setup,
+	.buf_prepare = isp_video_buffer_prepare,
+	.buf_queue = isp_video_buffer_queue,
 };
 
 /*
  * omap3isp_video_buffer_next - Complete the current buffer and return the next
  * @video: ISP video object
  *
- * Remove the current video buffer from the DMA queue and fill its timestamp,
- * field count and state fields before waking up its completion handler.
+ * Remove the current video buffer from the DMA queue and fill its timestamp and
+ * field count before handing it back to videobuf2.
  *
- * For capture video nodes the buffer state is set to ISP_BUF_STATE_DONE if no
- * error has been flagged in the pipeline, or to ISP_BUF_STATE_ERROR otherwise.
- * For video output nodes the buffer state is always set to ISP_BUF_STATE_DONE.
+ * For capture video nodes the buffer state is set to VB2_BUF_STATE_DONE if no
+ * error has been flagged in the pipeline, or to VB2_BUF_STATE_ERROR otherwise.
+ * For video output nodes the buffer state is always set to VB2_BUF_STATE_DONE.
  *
  * The DMA queue is expected to contain at least one buffer.
  *
@@ -513,26 +459,25 @@
 struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video)
 {
 	struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity);
-	struct isp_video_queue *queue = video->queue;
 	enum isp_pipeline_state state;
-	struct isp_video_buffer *buf;
+	struct isp_buffer *buf;
 	unsigned long flags;
 	struct timespec ts;
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irqsave(&video->irqlock, flags);
 	if (WARN_ON(list_empty(&video->dmaqueue))) {
-		spin_unlock_irqrestore(&queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		return NULL;
 	}
 
-	buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer,
+	buf = list_first_entry(&video->dmaqueue, struct isp_buffer,
 			       irqlist);
 	list_del(&buf->irqlist);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irqrestore(&video->irqlock, flags);
 
 	ktime_get_ts(&ts);
-	buf->vbuf.timestamp.tv_sec = ts.tv_sec;
-	buf->vbuf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+	buf->vb.v4l2_buf.timestamp.tv_sec = ts.tv_sec;
+	buf->vb.v4l2_buf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
 
 	/* Do frame number propagation only if this is the output video node.
 	 * Frame number either comes from the CSI receivers or it gets
@@ -541,22 +486,27 @@
 	 * first, so the input number might lag behind by 1 in some cases.
 	 */
 	if (video == pipe->output && !pipe->do_propagation)
-		buf->vbuf.sequence = atomic_inc_return(&pipe->frame_number);
+		buf->vb.v4l2_buf.sequence =
+			atomic_inc_return(&pipe->frame_number);
 	else
-		buf->vbuf.sequence = atomic_read(&pipe->frame_number);
+		buf->vb.v4l2_buf.sequence = atomic_read(&pipe->frame_number);
 
 	/* Report pipeline errors to userspace on the capture device side. */
-	if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) {
-		buf->state = ISP_BUF_STATE_ERROR;
+	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) {
+		state = VB2_BUF_STATE_ERROR;
 		pipe->error = false;
 	} else {
-		buf->state = ISP_BUF_STATE_DONE;
+		state = VB2_BUF_STATE_DONE;
 	}
 
-	wake_up(&buf->wait);
+	vb2_buffer_done(&buf->vb, state);
+
+	spin_lock_irqsave(&video->irqlock, flags);
 
 	if (list_empty(&video->dmaqueue)) {
-		if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		spin_unlock_irqrestore(&video->irqlock, flags);
+
+		if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
 			state = ISP_PIPELINE_QUEUE_OUTPUT
 			      | ISP_PIPELINE_STREAM;
 		else
@@ -571,16 +521,19 @@
 		return NULL;
 	}
 
-	if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) {
-		spin_lock_irqsave(&pipe->lock, flags);
+	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) {
+		spin_lock(&pipe->lock);
 		pipe->state &= ~ISP_PIPELINE_STREAM;
-		spin_unlock_irqrestore(&pipe->lock, flags);
+		spin_unlock(&pipe->lock);
 	}
 
-	buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer,
+	buf = list_first_entry(&video->dmaqueue, struct isp_buffer,
 			       irqlist);
-	buf->state = ISP_BUF_STATE_ACTIVE;
-	return to_isp_buffer(buf);
+	buf->vb.state = VB2_BUF_STATE_ACTIVE;
+
+	spin_unlock_irqrestore(&video->irqlock, flags);
+
+	return buf;
 }
 
 /*
@@ -592,25 +545,22 @@
  */
 void omap3isp_video_cancel_stream(struct isp_video *video)
 {
-	struct isp_video_queue *queue = video->queue;
 	unsigned long flags;
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irqsave(&video->irqlock, flags);
 
 	while (!list_empty(&video->dmaqueue)) {
-		struct isp_video_buffer *buf;
+		struct isp_buffer *buf;
 
 		buf = list_first_entry(&video->dmaqueue,
-				       struct isp_video_buffer, irqlist);
+				       struct isp_buffer, irqlist);
 		list_del(&buf->irqlist);
-
-		buf->state = ISP_BUF_STATE_ERROR;
-		wake_up(&buf->wait);
+		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
 	}
 
 	video->error = true;
 
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irqrestore(&video->irqlock, flags);
 }
 
 /*
@@ -627,12 +577,15 @@
 {
 	struct isp_buffer *buf = NULL;
 
-	if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		omap3isp_video_queue_discard_done(video->queue);
+	if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+		mutex_lock(&video->queue_lock);
+		vb2_discard_done(video->queue);
+		mutex_unlock(&video->queue_lock);
+	}
 
 	if (!list_empty(&video->dmaqueue)) {
 		buf = list_first_entry(&video->dmaqueue,
-				       struct isp_buffer, buffer.irqlist);
+				       struct isp_buffer, irqlist);
 		video->ops->queue(video, buf);
 		video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_QUEUED;
 	} else {
@@ -840,33 +793,56 @@
 isp_video_reqbufs(struct file *file, void *fh, struct v4l2_requestbuffers *rb)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_reqbufs(&vfh->queue, rb);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_reqbufs(&vfh->queue, rb);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_querybuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_querybuf(&vfh->queue, b);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_querybuf(&vfh->queue, b);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_qbuf(&vfh->queue, b);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_qbuf(&vfh->queue, b);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_dqbuf(&vfh->queue, b,
-					  file->f_flags & O_NONBLOCK);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_dqbuf(&vfh->queue, b, file->f_flags & O_NONBLOCK);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int isp_video_check_external_subdevs(struct isp_video *video,
@@ -1006,11 +982,6 @@
 
 	mutex_lock(&video->stream_lock);
 
-	if (video->streaming) {
-		mutex_unlock(&video->stream_lock);
-		return -EBUSY;
-	}
-
 	/* Start streaming on the pipeline. No link touching an entity in the
 	 * pipeline can be activated or deactivated once streaming is started.
 	 */
@@ -1069,7 +1040,9 @@
 	INIT_LIST_HEAD(&video->dmaqueue);
 	atomic_set(&pipe->frame_number, -1);
 
-	ret = omap3isp_video_queue_streamon(&vfh->queue);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_streamon(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 	if (ret < 0)
 		goto err_check_format;
 
@@ -1082,19 +1055,19 @@
 					      ISP_PIPELINE_STREAM_CONTINUOUS);
 		if (ret < 0)
 			goto err_set_stream;
-		spin_lock_irqsave(&video->queue->irqlock, flags);
+		spin_lock_irqsave(&video->irqlock, flags);
 		if (list_empty(&video->dmaqueue))
 			video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_UNDERRUN;
-		spin_unlock_irqrestore(&video->queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 	}
 
-	video->streaming = 1;
-
 	mutex_unlock(&video->stream_lock);
 	return 0;
 
 err_set_stream:
-	omap3isp_video_queue_streamoff(&vfh->queue);
+	mutex_lock(&video->queue_lock);
+	vb2_streamoff(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 err_check_format:
 	media_entity_pipeline_stop(&video->video.entity);
 err_pipeline_start:
@@ -1130,9 +1103,9 @@
 	mutex_lock(&video->stream_lock);
 
 	/* Make sure we're not streaming yet. */
-	mutex_lock(&vfh->queue.lock);
-	streaming = vfh->queue.streaming;
-	mutex_unlock(&vfh->queue.lock);
+	mutex_lock(&video->queue_lock);
+	streaming = vb2_is_streaming(&vfh->queue);
+	mutex_unlock(&video->queue_lock);
 
 	if (!streaming)
 		goto done;
@@ -1151,9 +1124,12 @@
 
 	/* Stop the stream. */
 	omap3isp_pipeline_set_stream(pipe, ISP_PIPELINE_STREAM_STOPPED);
-	omap3isp_video_queue_streamoff(&vfh->queue);
+	omap3isp_video_cancel_stream(video);
+
+	mutex_lock(&video->queue_lock);
+	vb2_streamoff(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 	video->queue = NULL;
-	video->streaming = 0;
 	video->error = false;
 
 	if (video->isp->pdata->set_constraints)
@@ -1223,6 +1199,7 @@
 {
 	struct isp_video *video = video_drvdata(file);
 	struct isp_video_fh *handle;
+	struct vb2_queue *queue;
 	int ret = 0;
 
 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
@@ -1244,9 +1221,20 @@
 		goto done;
 	}
 
-	omap3isp_video_queue_init(&handle->queue, video->type,
-				  &isp_video_queue_ops, video->isp->dev,
-				  sizeof(struct isp_buffer));
+	queue = &handle->queue;
+	queue->type = video->type;
+	queue->io_modes = VB2_MMAP | VB2_USERPTR;
+	queue->drv_priv = handle;
+	queue->ops = &isp_video_queue_ops;
+	queue->mem_ops = &vb2_dma_contig_memops;
+	queue->buf_struct_size = sizeof(struct isp_buffer);
+	queue->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+
+	ret = vb2_queue_init(&handle->queue);
+	if (ret < 0) {
+		omap3isp_put(video->isp);
+		goto done;
+	}
 
 	memset(&handle->format, 0, sizeof(handle->format));
 	handle->format.type = video->type;
@@ -1273,9 +1261,9 @@
 	/* Disable streaming and free the buffers queue resources. */
 	isp_video_streamoff(file, vfh, video->type);
 
-	mutex_lock(&handle->queue.lock);
-	omap3isp_video_queue_cleanup(&handle->queue);
-	mutex_unlock(&handle->queue.lock);
+	mutex_lock(&video->queue_lock);
+	vb2_queue_release(&handle->queue);
+	mutex_unlock(&video->queue_lock);
 
 	omap3isp_pipeline_pm_use(&video->video.entity, 0);
 
@@ -1292,16 +1280,27 @@
 static unsigned int isp_video_poll(struct file *file, poll_table *wait)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(file->private_data);
-	struct isp_video_queue *queue = &vfh->queue;
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_poll(queue, file, wait);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_poll(&vfh->queue, file, wait);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int isp_video_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(file->private_data);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_mmap(&vfh->queue, vma);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_mmap(&vfh->queue, vma);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static struct v4l2_file_operations isp_video_fops = {
@@ -1342,15 +1341,23 @@
 		return -EINVAL;
 	}
 
+	video->alloc_ctx = vb2_dma_contig_init_ctx(video->isp->dev);
+	if (IS_ERR(video->alloc_ctx))
+		return PTR_ERR(video->alloc_ctx);
+
 	ret = media_entity_init(&video->video.entity, 1, &video->pad, 0);
-	if (ret < 0)
+	if (ret < 0) {
+		vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 		return ret;
+	}
 
 	mutex_init(&video->mutex);
 	atomic_set(&video->active, 0);
 
 	spin_lock_init(&video->pipe.lock);
 	mutex_init(&video->stream_lock);
+	mutex_init(&video->queue_lock);
+	spin_lock_init(&video->irqlock);
 
 	/* Initialize the video device. */
 	if (video->ops == NULL)
@@ -1371,7 +1378,9 @@
 
 void omap3isp_video_cleanup(struct isp_video *video)
 {
+	vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	media_entity_cleanup(&video->video.entity);
+	mutex_destroy(&video->queue_lock);
 	mutex_destroy(&video->stream_lock);
 	mutex_destroy(&video->mutex);
 }

diff --git a/drivers/media/platform/omap3isp/ispvideo.h b/drivers/media/platform/omap3isp/ispvideo.h
index 4e19407..7d2e821 100644
--- a/drivers/media/platform/omap3isp/ispvideo.h
+++ b/drivers/media/platform/omap3isp/ispvideo.h

@@ -30,8 +30,7 @@
 #include <media/media-entity.h>
 #include <media/v4l2-dev.h>
 #include <media/v4l2-fh.h>
-
-#include "ispqueue.h"
+#include <media/videobuf2-core.h>
 
 #define ISP_VIDEO_DRIVER_NAME		"ispvideo"
 #define ISP_VIDEO_DRIVER_VERSION	"0.0.2"
@@ -124,17 +123,19 @@
 			       ISP_PIPELINE_IDLE_OUTPUT);
 }
 
-/*
- * struct isp_buffer - ISP buffer
- * @buffer: ISP video buffer
- * @isp_addr: MMU mapped address (a.k.a. device address) of the buffer.
+/**
+ * struct isp_buffer - ISP video buffer
+ * @vb: videobuf2 buffer
+ * @irqlist: List head for insertion into IRQ queue
+ * @dma: DMA address
  */
 struct isp_buffer {
-	struct isp_video_buffer buffer;
-	dma_addr_t isp_addr;
+	struct vb2_buffer vb;
+	struct list_head irqlist;
+	dma_addr_t dma;
 };
 
-#define to_isp_buffer(buf)	container_of(buf, struct isp_buffer, buffer)
+#define to_isp_buffer(buf)	container_of(buf, struct isp_buffer, vb)
 
 enum isp_video_dmaqueue_flags {
 	/* Set if DMA queue becomes empty when ISP_PIPELINE_STREAM_CONTINUOUS */
@@ -172,16 +173,16 @@
 	unsigned int bpl_value;		/* bytes per line value */
 	unsigned int bpl_padding;	/* padding at end of line */
 
-	/* Entity video node streaming */
-	unsigned int streaming:1;
-
 	/* Pipeline state */
 	struct isp_pipeline pipe;
 	struct mutex stream_lock;	/* pipeline and stream states */
 	bool error;
 
 	/* Video buffers queue */
-	struct isp_video_queue *queue;
+	void *alloc_ctx;
+	struct vb2_queue *queue;
+	struct mutex queue_lock;	/* protects the queue */
+	spinlock_t irqlock;		/* protects dmaqueue */
 	struct list_head dmaqueue;
 	enum isp_video_dmaqueue_flags dmaqueue_flags;
 
@@ -193,7 +194,7 @@
 struct isp_video_fh {
 	struct v4l2_fh vfh;
 	struct isp_video *video;
-	struct isp_video_queue queue;
+	struct vb2_queue queue;
 	struct v4l2_format format;
 	struct v4l2_fract timeperframe;
 };

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 349e659..7c4489c 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c

@@ -1200,6 +1200,30 @@
 EXPORT_SYMBOL_GPL(vb2_buffer_done);
 
 /**
+ * vb2_discard_done() - discard all buffers marked as DONE
+ * @q:		videobuf2 queue
+ *
+ * This function is intended to be used with suspend/resume operations. It
+ * discards all 'done' buffers as they would be too old to be requested after
+ * resume.
+ *
+ * Drivers must stop the hardware and synchronize with interrupt handlers and/or
+ * delayed works before calling this function to make sure no buffer will be
+ * touched by the driver and/or hardware.
+ */
+void vb2_discard_done(struct vb2_queue *q)
+{
+	struct vb2_buffer *vb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->done_lock, flags);
+	list_for_each_entry(vb, &q->done_list, done_entry)
+		vb->state = VB2_BUF_STATE_ERROR;
+	spin_unlock_irqrestore(&q->done_lock, flags);
+}
+EXPORT_SYMBOL_GPL(vb2_discard_done);
+
+/**
  * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a
  * v4l2_buffer by the userspace. The caller has already verified that struct
  * v4l2_buffer has a valid number of planes.

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 570b18a..ebc0af7 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c

@@ -1037,7 +1037,7 @@
 		goto out;
 	/* signature to know if this mf is freed */
 	mf->u.frame.linkage.arg1 = cpu_to_le32(0xdeadbeaf);
-	list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ);
+	list_add(&mf->u.frame.linkage.list, &ioc->FreeQ);
 #ifdef MFCNT
 	ioc->mfcnt--;
 #endif

diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index dcc8385..8a050e8 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c

@@ -2432,9 +2432,9 @@
 	int			rc, cim_rev;
 	ToolboxIstwiReadWriteRequest_t	*IstwiRWRequest;
 	MPT_FRAME_HDR		*mf = NULL;
-	MPIHeader_t		*mpi_hdr;
 	unsigned long		timeleft;
 	int			retval;
+	u32			msgcontext;
 
 	/* Reset long to int. Should affect IA64 and SPARC only
 	 */
@@ -2581,11 +2581,11 @@
 	}
 
 	IstwiRWRequest = (ToolboxIstwiReadWriteRequest_t *)mf;
-	mpi_hdr = (MPIHeader_t *) mf;
+	msgcontext = IstwiRWRequest->MsgContext;
 	memset(IstwiRWRequest,0,sizeof(ToolboxIstwiReadWriteRequest_t));
+	IstwiRWRequest->MsgContext = msgcontext;
 	IstwiRWRequest->Function = MPI_FUNCTION_TOOLBOX;
 	IstwiRWRequest->Tool = MPI_TOOLBOX_ISTWI_READ_WRITE_TOOL;
-	IstwiRWRequest->MsgContext = mpi_hdr->MsgContext;
 	IstwiRWRequest->Flags = MPI_TB_ISTWI_FLAGS_READ;
 	IstwiRWRequest->NumAddressBytes = 0x01;
 	IstwiRWRequest->DataLength = cpu_to_le16(0x04);

diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index fd75108..02a3eef 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c

@@ -649,7 +649,7 @@
 }
 
 static int
-mptfc_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptfc_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt)
 {
 	struct mptfc_rport_info	*ri;
 	struct fc_rport	*rport = starget_to_rport(scsi_target(SCpnt->device));
@@ -658,14 +658,14 @@
 
 	if (!vdevice || !vdevice->vtarget) {
 		SCpnt->result = DID_NO_CONNECT << 16;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
 	err = fc_remote_port_chkready(rport);
 	if (unlikely(err)) {
 		SCpnt->result = err;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
@@ -673,15 +673,13 @@
 	ri = *((struct mptfc_rport_info **)rport->dd_data);
 	if (unlikely(!ri)) {
 		SCpnt->result = DID_IMM_RETRY << 16;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
-	return mptscsih_qcmd(SCpnt,done);
+	return mptscsih_qcmd(SCpnt);
 }
 
-static DEF_SCSI_QCMD(mptfc_qcmd)
-
 /*
  *	mptfc_display_port_link_speed - displaying link speed
  *	@ioc: Pointer to MPT_ADAPTER structure

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 00d339c..711fcb5 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c

@@ -1896,7 +1896,7 @@
 }
 
 static int
-mptsas_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptsas_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt)
 {
 	MPT_SCSI_HOST	*hd;
 	MPT_ADAPTER	*ioc;
@@ -1904,11 +1904,11 @@
 
 	if (!vdevice || !vdevice->vtarget || vdevice->vtarget->deleted) {
 		SCpnt->result = DID_NO_CONNECT << 16;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
-	hd = shost_priv(SCpnt->device->host);
+	hd = shost_priv(shost);
 	ioc = hd->ioc;
 
 	if (ioc->sas_discovery_quiesce_io)
@@ -1917,11 +1917,9 @@
 	if (ioc->debug_level & MPT_DEBUG_SCSI)
 		scsi_print_command(SCpnt);
 
-	return mptscsih_qcmd(SCpnt,done);
+	return mptscsih_qcmd(SCpnt);
 }
 
-static DEF_SCSI_QCMD(mptsas_qcmd)
-
 /**
  *	mptsas_mptsas_eh_timed_out - resets the scsi_cmnd timeout
  *		if the device under question is currently in the

diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 727819c..2a1c6f2 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c

@@ -1304,7 +1304,6 @@
 /**
  *	mptscsih_qcmd - Primary Fusion MPT SCSI initiator IO start routine.
  *	@SCpnt: Pointer to scsi_cmnd structure
- *	@done: Pointer SCSI mid-layer IO completion function
  *
  *	(linux scsi_host_template.queuecommand routine)
  *	This is the primary SCSI IO start routine.  Create a MPI SCSIIORequest
@@ -1313,7 +1312,7 @@
  *	Returns 0. (rtn value discarded by linux scsi mid-layer)
  */
 int
-mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptscsih_qcmd(struct scsi_cmnd *SCpnt)
 {
 	MPT_SCSI_HOST		*hd;
 	MPT_FRAME_HDR		*mf;
@@ -1329,10 +1328,9 @@
 
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
-	SCpnt->scsi_done = done;
 
-	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p, done()=%p\n",
-		ioc->name, SCpnt, done));
+	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p\n",
+		ioc->name, SCpnt));
 
 	if (ioc->taskmgmt_quiesce_io)
 		return SCSI_MLQUEUE_HOST_BUSY;

diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 83f5031..99e3390 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h

@@ -113,7 +113,7 @@
 #endif
 extern int mptscsih_show_info(struct seq_file *, struct Scsi_Host *);
 extern const char * mptscsih_info(struct Scsi_Host *SChost);
-extern int mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *));
+extern int mptscsih_qcmd(struct scsi_cmnd *SCpnt);
 extern int mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel,
 	u8 id, int lun, int ctx2abort, ulong timeout);
 extern void mptscsih_slave_destroy(struct scsi_device *device);

diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 5653e50..49d1133 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c

@@ -780,33 +780,31 @@
 }
 
 static int
-mptspi_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptspi_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt)
 {
-	struct _MPT_SCSI_HOST *hd = shost_priv(SCpnt->device->host);
+	struct _MPT_SCSI_HOST *hd = shost_priv(shost);
 	VirtDevice	*vdevice = SCpnt->device->hostdata;
 	MPT_ADAPTER *ioc = hd->ioc;
 
 	if (!vdevice || !vdevice->vtarget) {
 		SCpnt->result = DID_NO_CONNECT << 16;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
 	if (SCpnt->device->channel == 1 &&
 		mptscsih_is_phys_disk(ioc, 0, SCpnt->device->id) == 0) {
 		SCpnt->result = DID_NO_CONNECT << 16;
-		done(SCpnt);
+		SCpnt->scsi_done(SCpnt);
 		return 0;
 	}
 
 	if (spi_dv_pending(scsi_target(SCpnt->device)))
 		ddvprintk(ioc, scsi_print_command(SCpnt));
 
-	return mptscsih_qcmd(SCpnt,done);
+	return mptscsih_qcmd(SCpnt);
 }
 
-static DEF_SCSI_QCMD(mptspi_qcmd)
-
 static void mptspi_slave_destroy(struct scsi_device *sdev)
 {
 	struct scsi_target *starget = scsi_target(sdev);

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 96162b6..3bc969a 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c

@@ -29,13 +29,21 @@
 #include <linux/i2c/twl.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/mach-types.h>
 
 static u8 twl4030_start_script_address = 0x2b;
 
-#define PWR_P1_SW_EVENTS	0x10
-#define PWR_DEVOFF		(1 << 0)
+/* Register bits for P1, P2 and P3_SW_EVENTS */
+#define PWR_STOPON_PRWON	BIT(6)
+#define PWR_STOPON_SYSEN	BIT(5)
+#define PWR_ENABLE_WARMRESET	BIT(4)
+#define PWR_LVL_WAKEUP		BIT(3)
+#define PWR_DEVACT		BIT(2)
+#define PWR_DEVSLP		BIT(1)
+#define PWR_DEVOFF		BIT(0)
+
 #define SEQ_OFFSYNC		(1 << 0)
 
 #define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
@@ -52,10 +60,6 @@
 #define R_CFG_P2_TRANSITION	PHY_TO_OFF_PM_MASTER(0x37)
 #define R_CFG_P3_TRANSITION	PHY_TO_OFF_PM_MASTER(0x38)
 
-#define LVL_WAKEUP	0x08
-
-#define ENABLE_WARMRESET (1<<4)
-
 #define END_OF_SCRIPT		0x3f
 
 #define R_SEQ_ADD_A2S		PHY_TO_OFF_PM_MASTER(0x55)
@@ -125,6 +129,53 @@
 	[RES_MAIN_REF]	= 0x94,
 };
 
+/*
+ * Usable values for .remap_sleep and .remap_off
+ * Based on table "5.3.3 Resource Operating modes"
+ */
+enum {
+	TWL_REMAP_OFF = 0,
+	TWL_REMAP_SLEEP = 8,
+	TWL_REMAP_ACTIVE = 9,
+};
+
+/*
+ * Macros to configure the PM register states for various resources.
+ * Note that we can make MSG_SINGULAR etc private to this driver once
+ * omap3 has been made DT only.
+ */
+#define TWL_DFLT_DELAY		2	/* typically 2 32 KiHz cycles */
+#define TWL_DEV_GRP_P123	(DEV_GRP_P1 | DEV_GRP_P2 | DEV_GRP_P3)
+#define TWL_RESOURCE_SET(res, state)					\
+	{ MSG_SINGULAR(DEV_GRP_NULL, (res), (state)), TWL_DFLT_DELAY }
+#define TWL_RESOURCE_ON(res)	TWL_RESOURCE_SET(res, RES_STATE_ACTIVE)
+#define TWL_RESOURCE_OFF(res)	TWL_RESOURCE_SET(res, RES_STATE_OFF)
+#define TWL_RESOURCE_RESET(res)	TWL_RESOURCE_SET(res, RES_STATE_WRST)
+/*
+ * It seems that type1 and type2 is just the resource init order
+ * number for the type1 and type2 group.
+ */
+#define TWL_RESOURCE_SET_ACTIVE(res, state)			       	\
+	{ MSG_SINGULAR(DEV_GRP_NULL, (res), RES_STATE_ACTIVE), (state) }
+#define TWL_RESOURCE_GROUP_RESET(group, type1, type2)			\
+	{ MSG_BROADCAST(DEV_GRP_NULL, (group), (type1), (type2),	\
+		RES_STATE_WRST), TWL_DFLT_DELAY }
+#define TWL_RESOURCE_GROUP_SLEEP(group, type, type2)			\
+	{ MSG_BROADCAST(DEV_GRP_NULL, (group), (type), (type2),		\
+		RES_STATE_SLEEP), TWL_DFLT_DELAY }
+#define TWL_RESOURCE_GROUP_ACTIVE(group, type, type2)			\
+	{ MSG_BROADCAST(DEV_GRP_NULL, (group), (type), (type2),		\
+		RES_STATE_ACTIVE), TWL_DFLT_DELAY }
+#define TWL_REMAP_SLEEP(res, devgrp, typ, typ2)				\
+	{ .resource = (res), .devgroup = (devgrp),			\
+	  .type = (typ), .type2 = (typ2),				\
+	  .remap_off = TWL_REMAP_OFF,					\
+	  .remap_sleep = TWL_REMAP_SLEEP, }
+#define TWL_REMAP_OFF(res, devgrp, typ, typ2)				\
+	{ .resource = (res), .devgroup = (devgrp),			\
+	  .type = (typ), .type2 = (typ2),				\
+	  .remap_off = TWL_REMAP_OFF, .remap_sleep = TWL_REMAP_OFF, }
+
 static int twl4030_write_script_byte(u8 address, u8 byte)
 {
 	int err;
@@ -196,7 +247,7 @@
 	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &data, R_P3_SW_EVENTS);
 	if (err)
 		goto out;
-	data |= LVL_WAKEUP;
+	data |= PWR_LVL_WAKEUP;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P3_SW_EVENTS);
 out:
 	if (err)
@@ -219,7 +270,7 @@
 	if (err)
 		goto out;
 
-	data |= LVL_WAKEUP;
+	data |= PWR_LVL_WAKEUP;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
@@ -228,7 +279,7 @@
 	if (err)
 		goto out;
 
-	data |= LVL_WAKEUP;
+	data |= PWR_LVL_WAKEUP;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
@@ -281,7 +332,7 @@
 	if (err)
 		goto out;
 
-	rd_data |= ENABLE_WARMRESET;
+	rd_data |= PWR_ENABLE_WARMRESET;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
@@ -290,7 +341,7 @@
 	if (err)
 		goto out;
 
-	rd_data |= ENABLE_WARMRESET;
+	rd_data |= PWR_ENABLE_WARMRESET;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
@@ -299,7 +350,7 @@
 	if (err)
 		goto out;
 
-	rd_data |= ENABLE_WARMRESET;
+	rd_data |= PWR_ENABLE_WARMRESET;
 	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P3_SW_EVENTS);
 out:
 	if (err)
@@ -421,6 +472,12 @@
 			goto out;
 	}
 	if (tscript->flags & TWL4030_WAKEUP12_SCRIPT) {
+		/* Reset any existing sleep script to avoid hangs on reboot */
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, END_OF_SCRIPT,
+				       R_SEQ_ADD_A2S);
+		if (err)
+			goto out;
+
 		err = twl4030_config_wakeup12_sequence(address);
 		if (err)
 			goto out;
@@ -493,7 +550,8 @@
 	return err;
 }
 
-static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata)
+static int
+twl4030_power_configure_scripts(const struct twl4030_power_data *pdata)
 {
 	int err;
 	int i;
@@ -509,12 +567,34 @@
 	return 0;
 }
 
-static int twl4030_power_configure_resources(struct twl4030_power_data *pdata)
+static void twl4030_patch_rconfig(struct twl4030_resconfig *common,
+				  struct twl4030_resconfig *board)
+{
+	while (common->resource) {
+		struct twl4030_resconfig *b = board;
+
+		while (b->resource) {
+			if (b->resource == common->resource) {
+				*common = *b;
+				break;
+			}
+			b++;
+		}
+		common++;
+	}
+}
+
+static int
+twl4030_power_configure_resources(const struct twl4030_power_data *pdata)
 {
 	struct twl4030_resconfig *resconfig = pdata->resource_config;
+	struct twl4030_resconfig *boardconf = pdata->board_config;
 	int err;
 
 	if (resconfig) {
+		if (boardconf)
+			twl4030_patch_rconfig(resconfig, boardconf);
+
 		while (resconfig->resource) {
 			err = twl4030_configure_resource(resconfig);
 			if (err)
@@ -541,7 +621,7 @@
 		pr_err("TWL4030 Unable to power off\n");
 }
 
-static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata,
+static bool twl4030_power_use_poweroff(const struct twl4030_power_data *pdata,
 					struct device_node *node)
 {
 	if (pdata && pdata->use_poweroff)
@@ -553,10 +633,170 @@
 	return false;
 }
 
+#ifdef CONFIG_OF
+
+/* Generic warm reset configuration for omap3 */
+
+static struct twl4030_ins omap3_wrst_seq[] = {
+	TWL_RESOURCE_OFF(RES_NRES_PWRON),
+	TWL_RESOURCE_OFF(RES_RESET),
+	TWL_RESOURCE_RESET(RES_MAIN_REF),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2),
+	TWL_RESOURCE_RESET(RES_VUSB_3V1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0),
+	TWL_RESOURCE_ON(RES_RESET),
+	TWL_RESOURCE_ON(RES_NRES_PWRON),
+};
+
+static struct twl4030_script omap3_wrst_script = {
+	.script	= omap3_wrst_seq,
+	.size	= ARRAY_SIZE(omap3_wrst_seq),
+	.flags	= TWL4030_WRST_SCRIPT,
+};
+
+static struct twl4030_script *omap3_reset_scripts[] = {
+	&omap3_wrst_script,
+};
+
+static struct twl4030_resconfig omap3_rconfig[] = {
+	TWL_REMAP_SLEEP(RES_HFCLKOUT, DEV_GRP_P3, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD1, DEV_GRP_P1, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD2, DEV_GRP_P1, -1, -1),
+	{ 0, 0 },
+};
+
+static struct twl4030_power_data omap3_reset = {
+	.scripts		= omap3_reset_scripts,
+	.num			= ARRAY_SIZE(omap3_reset_scripts),
+	.resource_config	= omap3_rconfig,
+};
+
+/* Recommended generic default idle configuration for off-idle */
+
+/* Broadcast message to put res to sleep */
+static struct twl4030_ins omap3_idle_sleep_on_seq[] = {
+	TWL_RESOURCE_GROUP_SLEEP(RES_GRP_ALL, RES_TYPE_ALL, 0),
+};
+
+static struct twl4030_script omap3_idle_sleep_on_script = {
+	.script	= omap3_idle_sleep_on_seq,
+	.size	= ARRAY_SIZE(omap3_idle_sleep_on_seq),
+	.flags	= TWL4030_SLEEP_SCRIPT,
+};
+
+/* Broadcast message to put res to active */
+static struct twl4030_ins omap3_idle_wakeup_p12_seq[] = {
+	TWL_RESOURCE_GROUP_ACTIVE(RES_GRP_ALL, RES_TYPE_ALL, 0),
+};
+
+static struct twl4030_script omap3_idle_wakeup_p12_script = {
+	.script	= omap3_idle_wakeup_p12_seq,
+	.size	= ARRAY_SIZE(omap3_idle_wakeup_p12_seq),
+	.flags	= TWL4030_WAKEUP12_SCRIPT,
+};
+
+/* Broadcast message to put res to active */
+static struct twl4030_ins omap3_idle_wakeup_p3_seq[] = {
+	TWL_RESOURCE_SET_ACTIVE(RES_CLKEN, 0x37),
+	TWL_RESOURCE_GROUP_ACTIVE(RES_GRP_ALL, RES_TYPE_ALL, 0),
+};
+
+static struct twl4030_script omap3_idle_wakeup_p3_script = {
+	.script	= omap3_idle_wakeup_p3_seq,
+	.size	= ARRAY_SIZE(omap3_idle_wakeup_p3_seq),
+	.flags	= TWL4030_WAKEUP3_SCRIPT,
+};
+
+static struct twl4030_script *omap3_idle_scripts[] = {
+	&omap3_idle_wakeup_p12_script,
+	&omap3_idle_wakeup_p3_script,
+	&omap3_wrst_script,
+	&omap3_idle_sleep_on_script,
+};
+
+/*
+ * Recommended configuration based on "Recommended Sleep
+ * Sequences for the Zoom Platform":
+ * http://omappedia.com/wiki/File:Recommended_Sleep_Sequences_Zoom.pdf
+ * Note that the type1 and type2 seem to be just the init order number
+ * for type1 and type2 groups as specified in the document mentioned
+ * above.
+ */
+static struct twl4030_resconfig omap3_idle_rconfig[] = {
+	TWL_REMAP_SLEEP(RES_VAUX1, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VAUX2, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VAUX3, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VAUX4, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VMMC1, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VMMC2, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_OFF(RES_VPLL1, DEV_GRP_P1, 3, 1),
+	TWL_REMAP_SLEEP(RES_VPLL2, DEV_GRP_P1, 0, 0),
+	TWL_REMAP_SLEEP(RES_VSIM, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VDAC, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VINTANA1, TWL_DEV_GRP_P123, 1, 2),
+	TWL_REMAP_SLEEP(RES_VINTANA2, TWL_DEV_GRP_P123, 0, 2),
+	TWL_REMAP_SLEEP(RES_VINTDIG, TWL_DEV_GRP_P123, 1, 2),
+	TWL_REMAP_SLEEP(RES_VIO, TWL_DEV_GRP_P123, 2, 2),
+	TWL_REMAP_OFF(RES_VDD1, DEV_GRP_P1, 4, 1),
+	TWL_REMAP_OFF(RES_VDD2, DEV_GRP_P1, 3, 1),
+	TWL_REMAP_SLEEP(RES_VUSB_1V5, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VUSB_1V8, DEV_GRP_NULL, 0, 0),
+	TWL_REMAP_SLEEP(RES_VUSB_3V1, TWL_DEV_GRP_P123, 0, 0),
+	/* Resource #20 USB charge pump skipped */
+	TWL_REMAP_SLEEP(RES_REGEN, TWL_DEV_GRP_P123, 2, 1),
+	TWL_REMAP_SLEEP(RES_NRES_PWRON, TWL_DEV_GRP_P123, 0, 1),
+	TWL_REMAP_SLEEP(RES_CLKEN, TWL_DEV_GRP_P123, 3, 2),
+	TWL_REMAP_SLEEP(RES_SYSEN, TWL_DEV_GRP_P123, 6, 1),
+	TWL_REMAP_SLEEP(RES_HFCLKOUT, DEV_GRP_P3, 0, 2),
+	TWL_REMAP_SLEEP(RES_32KCLKOUT, TWL_DEV_GRP_P123, 0, 0),
+	TWL_REMAP_SLEEP(RES_RESET, TWL_DEV_GRP_P123, 6, 0),
+	TWL_REMAP_SLEEP(RES_MAIN_REF, TWL_DEV_GRP_P123, 0, 0),
+	{ /* Terminator */ },
+};
+
+static struct twl4030_power_data omap3_idle = {
+	.scripts		= omap3_idle_scripts,
+	.num			= ARRAY_SIZE(omap3_idle_scripts),
+	.resource_config	= omap3_idle_rconfig,
+};
+
+/* Disable 32 KiHz oscillator during idle */
+static struct twl4030_resconfig osc_off_rconfig[] = {
+	TWL_REMAP_OFF(RES_CLKEN, DEV_GRP_P1 | DEV_GRP_P3, 3, 2),
+	{ /* Terminator */ },
+};
+
+static struct twl4030_power_data osc_off_idle = {
+	.scripts		= omap3_idle_scripts,
+	.num			= ARRAY_SIZE(omap3_idle_scripts),
+	.resource_config	= omap3_idle_rconfig,
+	.board_config		= osc_off_rconfig,
+};
+
+static struct of_device_id twl4030_power_of_match[] = {
+	{
+		.compatible = "ti,twl4030-power-reset",
+		.data = &omap3_reset,
+	},
+	{
+		.compatible = "ti,twl4030-power-idle",
+		.data = &omap3_idle,
+	},
+	{
+		.compatible = "ti,twl4030-power-idle-osc-off",
+		.data = &osc_off_idle,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
+#endif	/* CONFIG_OF */
+
 static int twl4030_power_probe(struct platform_device *pdev)
 {
-	struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
+	const struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
 	int err = 0;
 	int err2 = 0;
 	u8 val;
@@ -577,8 +817,12 @@
 		return err;
 	}
 
+	match = of_match_device(of_match_ptr(twl4030_power_of_match),
+				&pdev->dev);
+	if (match && match->data)
+		pdata = match->data;
+
 	if (pdata) {
-		/* TODO: convert to device tree */
 		err = twl4030_power_configure_scripts(pdata);
 		if (err) {
 			pr_err("TWL4030 failed to load scripts\n");
@@ -628,14 +872,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id twl4030_power_of_match[] = {
-	{.compatible = "ti,twl4030-power", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
-#endif
-
 static struct platform_driver twl4030_power_driver = {
 	.driver = {
 		.name	= "twl4030_power",

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 8246448..d2dbf02 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c

@@ -341,16 +341,17 @@
 	if (mmc_host_is_spi(card->host)) {
 		pr_info("%s: new %s%s%s card on SPI\n",
 			mmc_hostname(card->host),
-			mmc_card_highspeed(card) ? "high speed " : "",
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_hs(card) ? "high speed " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			type);
 	} else {
 		pr_info("%s: new %s%s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
-			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs(card) ? "high speed " : ""),
+			mmc_card_hs400(card) ? "HS400 " :
 			(mmc_card_hs200(card) ? "HS200 " : ""),
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			uhs_bus_speed_mode, type, card->rca);
 	}
 

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index acbc3f2..7dc0c85 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c

@@ -800,6 +800,10 @@
 			data->timeout_ns = limit_us * 1000;
 			data->timeout_clks = 0;
 		}
+
+		/* assign limit value if invalid */
+		if (timeout_us == 0)
+			data->timeout_ns = limit_us * 1000;
 	}
 
 	/*
@@ -1310,31 +1314,38 @@
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
 
+#endif /* CONFIG_REGULATOR */
+
 int mmc_regulator_get_supply(struct mmc_host *mmc)
 {
 	struct device *dev = mmc_dev(mmc);
-	struct regulator *supply;
 	int ret;
 
-	supply = devm_regulator_get(dev, "vmmc");
-	mmc->supply.vmmc = supply;
+	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
 	mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
 
-	if (IS_ERR(supply))
-		return PTR_ERR(supply);
+	if (IS_ERR(mmc->supply.vmmc)) {
+		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vmmc regulator found\n");
+	} else {
+		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+		if (ret > 0)
+			mmc->ocr_avail = ret;
+		else
+			dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
+	}
 
-	ret = mmc_regulator_get_ocrmask(supply);
-	if (ret > 0)
-		mmc->ocr_avail = ret;
-	else
-		dev_warn(mmc_dev(mmc), "Failed getting OCR mask: %d\n", ret);
+	if (IS_ERR(mmc->supply.vqmmc)) {
+		if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vqmmc regulator found\n");
+	}
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
 
-#endif /* CONFIG_REGULATOR */
-
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
@@ -1533,8 +1544,13 @@
 	host->ios.timing = MMC_TIMING_LEGACY;
 	mmc_set_ios(host);
 
-	/* Set signal voltage to 3.3V */
-	__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330);
+	/* Try to set signal voltage to 3.3V but fall back to 1.8v or 1.2v */
+	if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330) == 0)
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 3.3v\n");
+	else if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180) == 0)
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.8v\n");
+	else if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120) == 0)
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.2v\n");
 
 	/*
 	 * This delay should be sufficient to allow the power supply
@@ -2183,7 +2199,7 @@
 {
 	struct mmc_command cmd = {0};
 
-	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
+	if (mmc_card_blockaddr(card) || mmc_card_ddr52(card))
 		return 0;
 
 	cmd.opcode = MMC_SET_BLOCKLEN;
@@ -2263,7 +2279,6 @@
 		}
 	}
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_DDR);
 	if (mmc_host_is_spi(host)) {
 		host->ios.chip_select = MMC_CS_HIGH;
 		host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
@@ -2403,6 +2418,11 @@
 		container_of(work, struct mmc_host, detect.work);
 	int i;
 
+	if (host->trigger_card_event && host->ops->card_event) {
+		host->ops->card_event(host);
+		host->trigger_card_event = false;
+	}
+
 	if (host->rescan_disable)
 		return;
 

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 54829c0..91eb162 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c

@@ -135,8 +135,14 @@
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_DDR52:
+		str = "mmc DDR52";
+		break;
 	case MMC_TIMING_MMC_HS200:
-		str = "mmc high-speed SDR200";
+		str = "mmc HS200";
+		break;
+	case MMC_TIMING_MMC_HS400:
+		str = "mmc HS400";
 		break;
 	default:
 		str = "invalid";

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index fdea825..95cceae 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c

@@ -447,6 +447,10 @@
 		host->caps2 |= MMC_CAP2_HS200_1_8V_SDR;
 	if (of_find_property(np, "mmc-hs200-1_2v", &len))
 		host->caps2 |= MMC_CAP2_HS200_1_2V_SDR;
+	if (of_find_property(np, "mmc-hs400-1_8v", &len))
+		host->caps2 |= MMC_CAP2_HS400_1_8V | MMC_CAP2_HS200_1_8V_SDR;
+	if (of_find_property(np, "mmc-hs400-1_2v", &len))
+		host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR;
 
 	return 0;
 

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1ab5f3a..793c6f7 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c

@@ -240,31 +240,62 @@
 static void mmc_select_card_type(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
-	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
+	u8 card_type = card->ext_csd.raw_card_type;
 	u32 caps = host->caps, caps2 = host->caps2;
-	unsigned int hs_max_dtr = 0;
-
-	if (card_type & EXT_CSD_CARD_TYPE_26)
-		hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+	unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
+	unsigned int avail_type = 0;
 
 	if (caps & MMC_CAP_MMC_HIGHSPEED &&
-			card_type & EXT_CSD_CARD_TYPE_52)
+	    card_type & EXT_CSD_CARD_TYPE_HS_26) {
+		hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_26;
+	}
+
+	if (caps & MMC_CAP_MMC_HIGHSPEED &&
+	    card_type & EXT_CSD_CARD_TYPE_HS_52) {
 		hs_max_dtr = MMC_HIGH_52_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_52;
+	}
 
-	if ((caps & MMC_CAP_1_8V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) ||
-	    (caps & MMC_CAP_1_2V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_2V))
+	if (caps & MMC_CAP_1_8V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) {
 		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_8V;
+	}
 
-	if ((caps2 & MMC_CAP2_HS200_1_8V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) ||
-	    (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_2V))
-		hs_max_dtr = MMC_HS200_MAX_DTR;
+	if (caps & MMC_CAP_1_2V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_2V;
+	}
+
+	if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
+	}
+
+	if (caps2 & MMC_CAP2_HS400_1_8V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS400_1_2V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_2V;
+	}
 
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
-	card->ext_csd.card_type = card_type;
+	card->ext_csd.hs200_max_dtr = hs200_max_dtr;
+	card->mmc_avail_type = avail_type;
 }
 
 /*
@@ -480,6 +511,8 @@
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_195];
 		card->ext_csd.raw_pwr_cl_ddr_52_360 =
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_360];
+		card->ext_csd.raw_pwr_cl_ddr_200_360 =
+			ext_csd[EXT_CSD_PWR_CL_DDR_200_360];
 	}
 
 	if (card->ext_csd.rev >= 5) {
@@ -646,7 +679,10 @@
 		(card->ext_csd.raw_pwr_cl_ddr_52_195 ==
 			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_195]) &&
 		(card->ext_csd.raw_pwr_cl_ddr_52_360 ==
-			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]));
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]) &&
+		(card->ext_csd.raw_pwr_cl_ddr_200_360 ==
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_200_360]));
+
 	if (err)
 		err = -EINVAL;
 
@@ -694,18 +730,10 @@
 	&dev_attr_rel_sectors.attr,
 	NULL,
 };
-
-static struct attribute_group mmc_std_attr_group = {
-	.attrs = mmc_std_attrs,
-};
-
-static const struct attribute_group *mmc_attr_groups[] = {
-	&mmc_std_attr_group,
-	NULL,
-};
+ATTRIBUTE_GROUPS(mmc_std);
 
 static struct device_type mmc_type = {
-	.groups = mmc_attr_groups,
+	.groups = mmc_std_groups,
 };
 
 /*
@@ -714,17 +742,13 @@
  * extended CSD register, select it by executing the
  * mmc_switch command.
  */
-static int mmc_select_powerclass(struct mmc_card *card,
-		unsigned int bus_width)
+static int __mmc_select_powerclass(struct mmc_card *card,
+				   unsigned int bus_width)
 {
-	int err = 0;
+	struct mmc_host *host = card->host;
+	struct mmc_ext_csd *ext_csd = &card->ext_csd;
 	unsigned int pwrclass_val = 0;
-	struct mmc_host *host;
-
-	BUG_ON(!card);
-
-	host = card->host;
-	BUG_ON(!host);
+	int err = 0;
 
 	/* Power class selection is supported for versions >= 4.0 */
 	if (card->csd.mmca_vsn < CSD_SPEC_VER_4)
@@ -736,14 +760,14 @@
 
 	switch (1 << host->ios.vdd) {
 	case MMC_VDD_165_195:
-		if (host->ios.clock <= 26000000)
-			pwrclass_val = card->ext_csd.raw_pwr_cl_26_195;
-		else if	(host->ios.clock <= 52000000)
+		if (host->ios.clock <= MMC_HIGH_26_MAX_DTR)
+			pwrclass_val = ext_csd->raw_pwr_cl_26_195;
+		else if (host->ios.clock <= MMC_HIGH_52_MAX_DTR)
 			pwrclass_val = (bus_width <= EXT_CSD_BUS_WIDTH_8) ?
-				card->ext_csd.raw_pwr_cl_52_195 :
-				card->ext_csd.raw_pwr_cl_ddr_52_195;
-		else if (host->ios.clock <= 200000000)
-			pwrclass_val = card->ext_csd.raw_pwr_cl_200_195;
+				ext_csd->raw_pwr_cl_52_195 :
+				ext_csd->raw_pwr_cl_ddr_52_195;
+		else if (host->ios.clock <= MMC_HS200_MAX_DTR)
+			pwrclass_val = ext_csd->raw_pwr_cl_200_195;
 		break;
 	case MMC_VDD_27_28:
 	case MMC_VDD_28_29:
@@ -754,14 +778,16 @@
 	case MMC_VDD_33_34:
 	case MMC_VDD_34_35:
 	case MMC_VDD_35_36:
-		if (host->ios.clock <= 26000000)
-			pwrclass_val = card->ext_csd.raw_pwr_cl_26_360;
-		else if	(host->ios.clock <= 52000000)
+		if (host->ios.clock <= MMC_HIGH_26_MAX_DTR)
+			pwrclass_val = ext_csd->raw_pwr_cl_26_360;
+		else if (host->ios.clock <= MMC_HIGH_52_MAX_DTR)
 			pwrclass_val = (bus_width <= EXT_CSD_BUS_WIDTH_8) ?
-				card->ext_csd.raw_pwr_cl_52_360 :
-				card->ext_csd.raw_pwr_cl_ddr_52_360;
-		else if (host->ios.clock <= 200000000)
-			pwrclass_val = card->ext_csd.raw_pwr_cl_200_360;
+				ext_csd->raw_pwr_cl_52_360 :
+				ext_csd->raw_pwr_cl_ddr_52_360;
+		else if (host->ios.clock <= MMC_HS200_MAX_DTR)
+			pwrclass_val = (bus_width == EXT_CSD_DDR_BUS_WIDTH_8) ?
+				ext_csd->raw_pwr_cl_ddr_200_360 :
+				ext_csd->raw_pwr_cl_200_360;
 		break;
 	default:
 		pr_warning("%s: Voltage range not supported "
@@ -787,40 +813,79 @@
 	return err;
 }
 
-/*
- * Selects the desired buswidth and switch to the HS200 mode
- * if bus width set without error
- */
-static int mmc_select_hs200(struct mmc_card *card)
+static int mmc_select_powerclass(struct mmc_card *card)
 {
-	int idx, err = -EINVAL;
-	struct mmc_host *host;
+	struct mmc_host *host = card->host;
+	u32 bus_width, ext_csd_bits;
+	int err, ddr;
+
+	/* Power class selection is supported for versions >= 4.0 */
+	if (card->csd.mmca_vsn < CSD_SPEC_VER_4)
+		return 0;
+
+	bus_width = host->ios.bus_width;
+	/* Power class values are defined only for 4/8 bit bus */
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
+
+	ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
+	if (ddr)
+		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+			EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+	else
+		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+			EXT_CSD_BUS_WIDTH_8 :  EXT_CSD_BUS_WIDTH_4;
+
+	err = __mmc_select_powerclass(card, ext_csd_bits);
+	if (err)
+		pr_warn("%s: power class selection to bus width %d ddr %d failed\n",
+			mmc_hostname(host), 1 << bus_width, ddr);
+
+	return err;
+}
+
+/*
+ * Set the bus speed for the selected speed mode.
+ */
+static void mmc_set_bus_speed(struct mmc_card *card)
+{
+	unsigned int max_dtr = (unsigned int)-1;
+
+	if ((mmc_card_hs200(card) || mmc_card_hs400(card)) &&
+	     max_dtr > card->ext_csd.hs200_max_dtr)
+		max_dtr = card->ext_csd.hs200_max_dtr;
+	else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
+		max_dtr = card->ext_csd.hs_max_dtr;
+	else if (max_dtr > card->csd.max_dtr)
+		max_dtr = card->csd.max_dtr;
+
+	mmc_set_clock(card->host, max_dtr);
+}
+
+/*
+ * Select the bus width amoung 4-bit and 8-bit(SDR).
+ * If the bus width is changed successfully, return the selected width value.
+ * Zero is returned instead of error value if the wide width is not supported.
+ */
+static int mmc_select_bus_width(struct mmc_card *card)
+{
 	static unsigned ext_csd_bits[] = {
-		EXT_CSD_BUS_WIDTH_4,
 		EXT_CSD_BUS_WIDTH_8,
+		EXT_CSD_BUS_WIDTH_4,
 	};
 	static unsigned bus_widths[] = {
-		MMC_BUS_WIDTH_4,
 		MMC_BUS_WIDTH_8,
+		MMC_BUS_WIDTH_4,
 	};
+	struct mmc_host *host = card->host;
+	unsigned idx, bus_width = 0;
+	int err = 0;
 
-	BUG_ON(!card);
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4) &&
+	    !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
+		return 0;
 
-	host = card->host;
-
-	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
-			host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
-
-	if (err && card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_8V &&
-			host->caps2 & MMC_CAP2_HS200_1_8V_SDR)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
-
-	/* If fails try again during next card power cycle */
-	if (err)
-		goto err;
-
-	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 0 : 1;
 
 	/*
 	 * Unlike SD, MMC cards dont have a configuration register to notify
@@ -828,8 +893,7 @@
 	 * the supported bus width or compare the ext csd values of current
 	 * bus width and ext csd values of 1 bit mode read earlier.
 	 */
-	for (; idx >= 0; idx--) {
-
+	for (; idx < ARRAY_SIZE(bus_widths); idx++) {
 		/*
 		 * Host is capable of 8bit transfer, then switch
 		 * the device to work in 8bit transfer mode. If the
@@ -844,27 +908,266 @@
 		if (err)
 			continue;
 
-		mmc_set_bus_width(card->host, bus_widths[idx]);
+		bus_width = bus_widths[idx];
+		mmc_set_bus_width(host, bus_width);
 
+		/*
+		 * If controller can't handle bus width test,
+		 * compare ext_csd previously read in 1 bit mode
+		 * against ext_csd at new bus width
+		 */
 		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+			err = mmc_compare_ext_csds(card, bus_width);
 		else
-			err = mmc_bus_test(card, bus_widths[idx]);
-		if (!err)
+			err = mmc_bus_test(card, bus_width);
+
+		if (!err) {
+			err = bus_width;
 			break;
+		} else {
+			pr_warn("%s: switch to bus width %d failed\n",
+				mmc_hostname(host), ext_csd_bits[idx]);
+		}
 	}
 
-	/* switch to HS200 mode if bus width set successfully */
+	return err;
+}
+
+/*
+ * Switch to the high-speed mode
+ */
+static int mmc_select_hs(struct mmc_card *card)
+{
+	int err;
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
 	if (!err)
+		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+	return err;
+}
+
+/*
+ * Activate wide bus and DDR if supported.
+ */
+static int mmc_select_hs_ddr(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u32 bus_width, ext_csd_bits;
+	int err = 0;
+
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52))
+		return 0;
+
+	bus_width = host->ios.bus_width;
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
+
+	ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+		EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			EXT_CSD_BUS_WIDTH,
+			ext_csd_bits,
+			card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width %d ddr failed\n",
+			mmc_hostname(host), 1 << bus_width);
+		return err;
+	}
+
+	/*
+	 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+	 * signaling.
+	 *
+	 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+	 *
+	 * 1.8V vccq at 3.3V core voltage (vcc) is not required
+	 * in the JEDEC spec for DDR.
+	 *
+	 * Do not force change in vccq since we are obviously
+	 * working and no change to vccq is needed.
+	 *
+	 * WARNING: eMMC rules are NOT the same as SD DDR
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		err = __mmc_set_signal_voltage(host,
+				MMC_SIGNAL_VOLTAGE_120);
+		if (err)
+			return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
+
+	return err;
+}
+
+static int mmc_select_hs400(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	/*
+	 * HS400 mode requires 8-bit bus width
+	 */
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	      host->ios.bus_width == MMC_BUS_WIDTH_8))
+		return 0;
+
+	/*
+	 * Before switching to dual data rate operation for HS400,
+	 * it is required to convert from HS200 mode to HS mode.
+	 */
+	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+	mmc_set_bus_speed(card);
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to high-speed from hs200 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_BUS_WIDTH,
+			 EXT_CSD_DDR_BUS_WIDTH_8,
+			 card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width for hs400 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS400,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to hs400 failed, err:%d\n",
+			 mmc_hostname(host), err);
+		return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
+	mmc_set_bus_speed(card);
+
+	return 0;
+}
+
+/*
+ * For device supporting HS200 mode, the following sequence
+ * should be done before executing the tuning process.
+ * 1. set the desired bus width(4-bit or 8-bit, 1-bit is not supported)
+ * 2. switch to HS200 mode
+ * 3. set the clock to > 52Mhz and <=200MHz
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = -EINVAL;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
+
+	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	/*
+	 * Set the bus width(4 or 8) with host's support and
+	 * switch to HS200 mode if bus width is set successfully.
+	 */
+	err = mmc_select_bus_width(card);
+	if (!IS_ERR_VALUE(err)) {
 		err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_HS_TIMING, 2,
-				card->ext_csd.generic_cmd6_time,
-				true, true, true);
+				   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS200,
+				   card->ext_csd.generic_cmd6_time,
+				   true, true, true);
+		if (!err)
+			mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+	}
 err:
 	return err;
 }
 
 /*
+ * Activate High Speed or HS200 mode if supported.
+ */
+static int mmc_select_timing(struct mmc_card *card)
+{
+	int err = 0;
+
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4 &&
+	     card->ext_csd.hs_max_dtr == 0))
+		goto bus_speed;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
+		err = mmc_select_hs200(card);
+	else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
+		err = mmc_select_hs(card);
+
+	if (err && err != -EBADMSG)
+		return err;
+
+	if (err) {
+		pr_warn("%s: switch to %s failed\n",
+			mmc_card_hs(card) ? "high-speed" :
+			(mmc_card_hs200(card) ? "hs200" : ""),
+			mmc_hostname(card->host));
+		err = 0;
+	}
+
+bus_speed:
+	/*
+	 * Set the bus speed to the selected bus timing.
+	 * If timing is not selected, backward compatible is the default.
+	 */
+	mmc_set_bus_speed(card);
+	return err;
+}
+
+/*
+ * Execute tuning sequence to seek the proper bus operating
+ * conditions for HS200 and HS400, which sends CMD21 to the device.
+ */
+static int mmc_hs200_tuning(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	/*
+	 * Timing should be adjusted to the HS400 target
+	 * operation frequency for tuning process
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	    host->ios.bus_width == MMC_BUS_WIDTH_8)
+		if (host->ops->prepare_hs400_tuning)
+			host->ops->prepare_hs400_tuning(host, &host->ios);
+
+	if (host->ops->execute_tuning) {
+		mmc_host_clk_hold(host);
+		err = host->ops->execute_tuning(host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		mmc_host_clk_release(host);
+
+		if (err)
+			pr_warn("%s: tuning execution failed\n",
+				mmc_hostname(host));
+	}
+
+	return err;
+}
+
+/*
  * Handle the detection and initialisation of a card.
  *
  * In the case of a resume, "oldcard" will contain the card
@@ -874,9 +1177,8 @@
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = 0;
+	int err;
 	u32 cid[4];
-	unsigned int max_dtr;
 	u32 rocr;
 	u8 *ext_csd = NULL;
 
@@ -1068,206 +1370,34 @@
 	}
 
 	/*
-	 * Activate high speed (if supported)
+	 * Select timing interface
 	 */
-	if (card->ext_csd.hs_max_dtr != 0) {
-		err = 0;
-		if (card->ext_csd.hs_max_dtr > 52000000 &&
-		    host->caps2 & MMC_CAP2_HS200)
-			err = mmc_select_hs200(card);
-		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
-			err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_HS_TIMING, 1,
-					card->ext_csd.generic_cmd6_time,
-					true, true, true);
+	err = mmc_select_timing(card);
+	if (err)
+		goto free_card;
 
-		if (err && err != -EBADMSG)
-			goto free_card;
-
-		if (err) {
-			pr_warning("%s: switch to highspeed failed\n",
-			       mmc_hostname(card->host));
-			err = 0;
-		} else {
-			if (card->ext_csd.hs_max_dtr > 52000000 &&
-			    host->caps2 & MMC_CAP2_HS200) {
-				mmc_card_set_hs200(card);
-				mmc_set_timing(card->host,
-					       MMC_TIMING_MMC_HS200);
-			} else {
-				mmc_card_set_highspeed(card);
-				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-			}
-		}
-	}
-
-	/*
-	 * Compute bus speed.
-	 */
-	max_dtr = (unsigned int)-1;
-
-	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
-		if (max_dtr > card->ext_csd.hs_max_dtr)
-			max_dtr = card->ext_csd.hs_max_dtr;
-		if (mmc_card_highspeed(card) && (max_dtr > 52000000))
-			max_dtr = 52000000;
-	} else if (max_dtr > card->csd.max_dtr) {
-		max_dtr = card->csd.max_dtr;
-	}
-
-	mmc_set_clock(host, max_dtr);
-
-	/*
-	 * Indicate DDR mode (if supported).
-	 */
-	if (mmc_card_highspeed(card)) {
-		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
-			&& (host->caps & MMC_CAP_1_8V_DDR))
-				ddr = MMC_1_8V_DDR_MODE;
-		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
-			&& (host->caps & MMC_CAP_1_2V_DDR))
-				ddr = MMC_1_2V_DDR_MODE;
-	}
-
-	/*
-	 * Indicate HS200 SDR mode (if supported).
-	 */
 	if (mmc_card_hs200(card)) {
-		u32 ext_csd_bits;
-		u32 bus_width = card->host->ios.bus_width;
-
-		/*
-		 * For devices supporting HS200 mode, the bus width has
-		 * to be set before executing the tuning function. If
-		 * set before tuning, then device will respond with CRC
-		 * errors for responses on CMD line. So for HS200 the
-		 * sequence will be
-		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
-		 * 2. switch to HS200 mode
-		 * 3. set the clock to > 52Mhz <=200MHz and
-		 * 4. execute tuning for HS200
-		 */
-		if ((host->caps2 & MMC_CAP2_HS200) &&
-		    card->host->ops->execute_tuning) {
-			mmc_host_clk_hold(card->host);
-			err = card->host->ops->execute_tuning(card->host,
-				MMC_SEND_TUNING_BLOCK_HS200);
-			mmc_host_clk_release(card->host);
-		}
-		if (err) {
-			pr_warning("%s: tuning execution failed\n",
-				   mmc_hostname(card->host));
-			goto err;
-		}
-
-		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
-				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
-		err = mmc_select_powerclass(card, ext_csd_bits);
+		err = mmc_hs200_tuning(card);
 		if (err)
-			pr_warning("%s: power class selection to bus width %d"
-				   " failed\n", mmc_hostname(card->host),
-				   1 << bus_width);
+			goto err;
+
+		err = mmc_select_hs400(card);
+		if (err)
+			goto err;
+	} else if (mmc_card_hs(card)) {
+		/* Select the desired bus width optionally */
+		err = mmc_select_bus_width(card);
+		if (!IS_ERR_VALUE(err)) {
+			err = mmc_select_hs_ddr(card);
+			if (err)
+				goto err;
+		}
 	}
 
 	/*
-	 * Activate wide bus and DDR (if supported).
+	 * Choose the power class with selected bus interface
 	 */
-	if (!mmc_card_hs200(card) &&
-	    (card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
-	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
-		static unsigned ext_csd_bits[][2] = {
-			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
-			{ EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
-			{ EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
-		};
-		static unsigned bus_widths[] = {
-			MMC_BUS_WIDTH_8,
-			MMC_BUS_WIDTH_4,
-			MMC_BUS_WIDTH_1
-		};
-		unsigned idx, bus_width = 0;
-
-		if (host->caps & MMC_CAP_8_BIT_DATA)
-			idx = 0;
-		else
-			idx = 1;
-		for (; idx < ARRAY_SIZE(bus_widths); idx++) {
-			bus_width = bus_widths[idx];
-			if (bus_width == MMC_BUS_WIDTH_1)
-				ddr = 0; /* no DDR for 1-bit width */
-			err = mmc_select_powerclass(card, ext_csd_bits[idx][0]);
-			if (err)
-				pr_warning("%s: power class selection to "
-					   "bus width %d failed\n",
-					   mmc_hostname(card->host),
-					   1 << bus_width);
-
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][0],
-					 card->ext_csd.generic_cmd6_time);
-			if (!err) {
-				mmc_set_bus_width(card->host, bus_width);
-
-				/*
-				 * If controller can't handle bus width test,
-				 * compare ext_csd previously read in 1 bit mode
-				 * against ext_csd at new bus width
-				 */
-				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-					err = mmc_compare_ext_csds(card,
-						bus_width);
-				else
-					err = mmc_bus_test(card, bus_width);
-				if (!err)
-					break;
-			}
-		}
-
-		if (!err && ddr) {
-			err = mmc_select_powerclass(card, ext_csd_bits[idx][1]);
-			if (err)
-				pr_warning("%s: power class selection to "
-					   "bus width %d ddr %d failed\n",
-					   mmc_hostname(card->host),
-					   1 << bus_width, ddr);
-
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][1],
-					 card->ext_csd.generic_cmd6_time);
-		}
-		if (err) {
-			pr_warning("%s: switch to bus width %d ddr %d "
-				"failed\n", mmc_hostname(card->host),
-				1 << bus_width, ddr);
-			goto free_card;
-		} else if (ddr) {
-			/*
-			 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
-			 * signaling.
-			 *
-			 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
-			 *
-			 * 1.8V vccq at 3.3V core voltage (vcc) is not required
-			 * in the JEDEC spec for DDR.
-			 *
-			 * Do not force change in vccq since we are obviously
-			 * working and no change to vccq is needed.
-			 *
-			 * WARNING: eMMC rules are NOT the same as SD DDR
-			 */
-			if (ddr == MMC_1_2V_DDR_MODE) {
-				err = __mmc_set_signal_voltage(host,
-					MMC_SIGNAL_VOLTAGE_120);
-				if (err)
-					goto err;
-			}
-			mmc_card_set_ddr_mode(card);
-			mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
-			mmc_set_bus_width(card->host, bus_width);
-		}
-	}
+	mmc_select_powerclass(card);
 
 	/*
 	 * Enable HPI feature (if supported)
@@ -1507,7 +1637,6 @@
 		err = mmc_sleep(host);
 	else if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 
 	if (!err) {
 		mmc_power_off(host);
@@ -1637,7 +1766,6 @@
 {
 	int ret;
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 2dd359d..0c44510 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c

@@ -707,18 +707,10 @@
 	&dev_attr_serial.attr,
 	NULL,
 };
-
-static struct attribute_group sd_std_attr_group = {
-	.attrs = sd_std_attrs,
-};
-
-static const struct attribute_group *sd_attr_groups[] = {
-	&sd_std_attr_group,
-	NULL,
-};
+ATTRIBUTE_GROUPS(sd_std);
 
 struct device_type sd_type = {
-	.groups = sd_attr_groups,
+	.groups = sd_std_groups,
 };
 
 /*
@@ -895,7 +887,7 @@
 {
 	unsigned max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		if (max_dtr > card->sw_caps.hs_max_dtr)
 			max_dtr = card->sw_caps.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -905,12 +897,6 @@
 	return max_dtr;
 }
 
-void mmc_sd_go_highspeed(struct mmc_card *card)
-{
-	mmc_card_set_highspeed(card);
-	mmc_set_timing(card->host, MMC_TIMING_SD_HS);
-}
-
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -985,16 +971,13 @@
 		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
 		 */
 		err = mmc_sd_switch_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto free_card;
 
@@ -1089,7 +1072,7 @@
 
 	if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+
 	if (!err) {
 		mmc_power_off(host);
 		mmc_card_set_suspended(host->card);
@@ -1198,7 +1181,6 @@
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
 	ret = mmc_sd_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);

diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 4b34b24..aab824a 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h

@@ -12,6 +12,5 @@
 	bool reinit);
 unsigned mmc_sd_get_max_clock(struct mmc_card *card);
 int mmc_sd_switch_hs(struct mmc_card *card);
-void mmc_sd_go_highspeed(struct mmc_card *card);
 
 #endif

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4d721c6..e636d9e 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c

@@ -363,7 +363,7 @@
 {
 	unsigned max_dtr;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		/*
 		 * The SDIO specification doesn't mention how
 		 * the CIS transfer speed register relates to
@@ -733,7 +733,6 @@
 		mmc_set_clock(host, card->cis.max_dtr);
 
 		if (card->cccr.high_speed) {
-			mmc_card_set_highspeed(card);
 			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		}
 
@@ -792,16 +791,13 @@
 		err = mmc_sdio_init_uhs_card(card);
 		if (err)
 			goto remove;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Switch to high-speed (if supported).
 		 */
 		err = sdio_enable_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto remove;
 
@@ -943,40 +939,21 @@
  */
 static int mmc_sdio_suspend(struct mmc_host *host)
 {
-	int i, err = 0;
-
-	for (i = 0; i < host->card->sdio_funcs; i++) {
-		struct sdio_func *func = host->card->sdio_func[i];
-		if (func && sdio_func_present(func) && func->dev.driver) {
-			const struct dev_pm_ops *pmops = func->dev.driver->pm;
-			err = pmops->suspend(&func->dev);
-			if (err)
-				break;
-		}
-	}
-	while (err && --i >= 0) {
-		struct sdio_func *func = host->card->sdio_func[i];
-		if (func && sdio_func_present(func) && func->dev.driver) {
-			const struct dev_pm_ops *pmops = func->dev.driver->pm;
-			pmops->resume(&func->dev);
-		}
-	}
-
-	if (!err && mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
+	if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		mmc_claim_host(host);
 		sdio_disable_wide(host->card);
 		mmc_release_host(host);
 	}
 
-	if (!err && !mmc_card_keep_power(host))
+	if (!mmc_card_keep_power(host))
 		mmc_power_off(host);
 
-	return err;
+	return 0;
 }
 
 static int mmc_sdio_resume(struct mmc_host *host)
 {
-	int i, err = 0;
+	int err = 0;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -1019,24 +996,6 @@
 		wake_up_process(host->sdio_irq_thread);
 	mmc_release_host(host);
 
-	/*
-	 * If the card looked to be the same as before suspending, then
-	 * we proceed to resume all card functions.  If one of them returns
-	 * an error then we simply return that error to the core and the
-	 * card will be redetected as new.  It is the responsibility of
-	 * the function driver to perform further tests with the extra
-	 * knowledge it has of the card to confirm the card is indeed the
-	 * same as before suspending (same MAC address for network cards,
-	 * etc.) and return an error otherwise.
-	 */
-	for (i = 0; !err && i < host->card->sdio_funcs; i++) {
-		struct sdio_func *func = host->card->sdio_func[i];
-		if (func && sdio_func_present(func) && func->dev.driver) {
-			const struct dev_pm_ops *pmops = func->dev.driver->pm;
-			err = pmops->resume(&func->dev);
-		}
-	}
-
 	host->pm_flags &= ~MMC_PM_KEEP_POWER;
 	return err;
 }

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 92d1ba8..4fa8fef9 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c

@@ -197,20 +197,8 @@
 
 #ifdef CONFIG_PM
 
-#ifdef CONFIG_PM_SLEEP
-static int pm_no_operation(struct device *dev)
-{
-	/*
-	 * Prevent the PM core from calling SDIO device drivers' suspend
-	 * callback routines, which it is not supposed to do, by using this
-	 * empty function as the bus type suspend callaback for SDIO.
-	 */
-	return 0;
-}
-#endif
-
 static const struct dev_pm_ops sdio_bus_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pm_no_operation, pm_no_operation)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_generic_suspend, pm_generic_resume)
 	SET_RUNTIME_PM_OPS(
 		pm_generic_runtime_suspend,
 		pm_generic_runtime_resume,

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index aaa9046..5cc13c8 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c

@@ -90,6 +90,15 @@
 	return ret;
 }
 
+void sdio_run_irqs(struct mmc_host *host)
+{
+	mmc_claim_host(host);
+	host->sdio_irq_pending = true;
+	process_sdio_pending_irqs(host);
+	mmc_release_host(host);
+}
+EXPORT_SYMBOL_GPL(sdio_run_irqs);
+
 static int sdio_irq_thread(void *_host)
 {
 	struct mmc_host *host = _host;
@@ -189,14 +198,20 @@
 	WARN_ON(!host->claimed);
 
 	if (!host->sdio_irqs++) {
-		atomic_set(&host->sdio_irq_thread_abort, 0);
-		host->sdio_irq_thread =
-			kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",
-				mmc_hostname(host));
-		if (IS_ERR(host->sdio_irq_thread)) {
-			int err = PTR_ERR(host->sdio_irq_thread);
-			host->sdio_irqs--;
-			return err;
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 0);
+			host->sdio_irq_thread =
+				kthread_run(sdio_irq_thread, host,
+					    "ksdioirqd/%s", mmc_hostname(host));
+			if (IS_ERR(host->sdio_irq_thread)) {
+				int err = PTR_ERR(host->sdio_irq_thread);
+				host->sdio_irqs--;
+				return err;
+			}
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
 		}
 	}
 
@@ -211,8 +226,14 @@
 	BUG_ON(host->sdio_irqs < 1);
 
 	if (!--host->sdio_irqs) {
-		atomic_set(&host->sdio_irq_thread_abort, 1);
-		kthread_stop(host->sdio_irq_thread);
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 1);
+			kthread_stop(host->sdio_irq_thread);
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 0);
+			mmc_host_clk_release(host);
+		}
 	}
 
 	return 0;

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index f7650b8..5f89cb8 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c

@@ -32,9 +32,7 @@
 	/* Schedule a card detection after a debounce timeout */
 	struct mmc_host *host = dev_id;
 
-	if (host->ops->card_event)
-		host->ops->card_event(host);
-
+	host->trigger_card_event = true;
 	mmc_detect_change(host, msecs_to_jiffies(200));
 
 	return IRQ_HANDLED;

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 779368b..a565254 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig

@@ -168,7 +168,7 @@
 
 config MMC_SDHCI_DOVE
 	tristate "SDHCI support on Marvell's Dove SoC"
-	depends on ARCH_DOVE
+	depends on ARCH_DOVE || MACH_DOVE
 	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
 	help
@@ -216,8 +216,7 @@
 config MMC_SDHCI_PXAV3
 	tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
 	depends on CLKDEV_LOOKUP
-	select MMC_SDHCI
-	select MMC_SDHCI_PLTFM
+	depends on MMC_SDHCI_PLTFM
 	default CPU_MMP2
 	help
 	  This selects the Marvell(R) PXAV3 SD Host Controller.
@@ -229,8 +228,7 @@
 config MMC_SDHCI_PXAV2
 	tristate "Marvell PXA9XX SD Host Controller support (PXAV2)"
 	depends on CLKDEV_LOOKUP
-	select MMC_SDHCI
-	select MMC_SDHCI_PLTFM
+	depends on MMC_SDHCI_PLTFM
 	default CPU_PXA910
 	help
 	  This selects the Marvell(R) PXAV2 SD Host Controller.
@@ -264,7 +262,7 @@
 config MMC_SDHCI_BCM_KONA
 	tristate "SDHCI support on Broadcom KONA platform"
 	depends on ARCH_BCM_MOBILE
-	select MMC_SDHCI_PLTFM
+	depends on MMC_SDHCI_PLTFM
 	help
 	  This selects the Broadcom Kona Secure Digital Host Controller
 	  Interface(SDHCI) support.
@@ -283,10 +281,19 @@
 
 	  If unsure, say N.
 
+config MMC_MOXART
+	tristate "MOXART SD/MMC Host Controller support"
+	depends on ARCH_MOXART && MMC
+	help
+	  This selects support for the MOXART SD/MMC Host Controller.
+	  MOXA provides one multi-functional card reader which can
+	  be found on some embedded hardware such as UC-7112-LX.
+	  If you have a controller with this interface, say Y here.
+
 config MMC_OMAP
 	tristate "TI OMAP Multimedia Card Interface support"
 	depends on ARCH_OMAP
-	select TPS65010 if MACH_OMAP_H2
+	depends on TPS65010 || !MACH_OMAP_H2
 	help
 	  This selects the TI OMAP Multimedia card Interface.
 	  If you have an OMAP board with a Multimedia Card slot,
@@ -688,6 +695,12 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called wmt-sdmmc.
 
+config MMC_USDHI6ROL0
+	tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
+	help
+	  This selects support for the Renesas USDHI6ROL0 SD/SDIO
+	  Host Controller
+
 config MMC_REALTEK_PCI
 	tristate "Realtek PCI-E SD/MMC Card Interface Driver"
 	depends on MFD_RTSX_PCI

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 61cbc24..7f81ddf 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile

@@ -50,7 +50,9 @@
 obj-$(CONFIG_MMC_VUB300)	+= vub300.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
 obj-$(CONFIG_MMC_WMT)		+= wmt-sdmmc.o
+obj-$(CONFIG_MMC_MOXART)	+= moxart-mmc.o
 obj-$(CONFIG_MMC_SUNXI)		+= sunxi-mmc.o
+obj-$(CONFIG_MMC_USDHI6ROL0)	+= usdhi6rol0.o
 
 obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
 obj-$(CONFIG_MMC_REALTEK_USB)	+= rtsx_usb_sdmmc.o

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 42706ea..bb585d9 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c

@@ -37,6 +37,7 @@
 #include <linux/atmel-mci.h>
 #include <linux/atmel_pdc.h>
 
+#include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <asm/unaligned.h>
 
@@ -820,16 +821,9 @@
 
 	atmci_pdc_cleanup(host);
 
-	/*
-	 * If the card was removed, data will be NULL. No point trying
-	 * to send the stop command or waiting for NBUSY in this case.
-	 */
-	if (host->data) {
-		dev_dbg(&host->pdev->dev,
-		        "(%s) set pending xfer complete\n", __func__);
-		atmci_set_pending(host, EVENT_XFER_COMPLETE);
-		tasklet_schedule(&host->tasklet);
-	}
+	dev_dbg(&host->pdev->dev, "(%s) set pending xfer complete\n", __func__);
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
+	tasklet_schedule(&host->tasklet);
 }
 
 static void atmci_dma_cleanup(struct atmel_mci *host)

diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 3423c5e..0fbc53a 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c

@@ -187,7 +187,7 @@
 	unsigned long actual;
 	u8 div = priv->ciu_div + 1;
 
-	if (ios->timing == MMC_TIMING_UHS_DDR50) {
+	if (ios->timing == MMC_TIMING_MMC_DDR52) {
 		mci_writel(host, CLKSEL, priv->ddr_timing);
 		/* Should be double rate for DDR mode */
 		if (ios->bus_width == MMC_BUS_WIDTH_8)
@@ -386,8 +386,7 @@
 
 /* Common capabilities of Exynos4/Exynos5 SoC */
 static unsigned long exynos_dwmmc_caps[4] = {
-	MMC_CAP_UHS_DDR50 | MMC_CAP_1_8V_DDR |
-		MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
+	MMC_CAP_1_8V_DDR | MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
 	MMC_CAP_CMD23,
 	MMC_CAP_CMD23,
 	MMC_CAP_CMD23,
@@ -426,7 +425,7 @@
 	return dw_mci_pltfm_register(pdev, drv_data);
 }
 
-const struct dev_pm_ops dw_mci_exynos_pmops = {
+static const struct dev_pm_ops dw_mci_exynos_pmops = {
 	SET_SYSTEM_SLEEP_PM_OPS(dw_mci_exynos_suspend, dw_mci_exynos_resume)
 	.resume_noirq = dw_mci_exynos_resume_noirq,
 	.thaw_noirq = dw_mci_exynos_resume_noirq,

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index cced599..1ac227c 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c

@@ -235,12 +235,6 @@
 }
 #endif /* defined(CONFIG_DEBUG_FS) */
 
-static void dw_mci_set_timeout(struct dw_mci *host)
-{
-	/* timeout (maximum) */
-	mci_writel(host, TMOUT, 0xffffffff);
-}
-
 static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 {
 	struct mmc_data	*data;
@@ -257,9 +251,8 @@
 	    (cmd->opcode == SD_IO_RW_DIRECT &&
 	     ((cmd->arg >> 9) & 0x1FFFF) == SDIO_CCCR_ABORT))
 		cmdr |= SDMMC_CMD_STOP;
-	else
-		if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
-			cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+	else if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
+		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
 
 	if (cmd->flags & MMC_RSP_PRESENT) {
 		/* We expect a response, so set this bit */
@@ -850,8 +843,6 @@
 	u32 cmdflags;
 
 	mrq = slot->mrq;
-	if (host->pdata->select_slot)
-		host->pdata->select_slot(slot->id);
 
 	host->cur_slot = slot;
 	host->mrq = mrq;
@@ -864,7 +855,7 @@
 
 	data = cmd->data;
 	if (data) {
-		dw_mci_set_timeout(host);
+		mci_writel(host, TMOUT, 0xFFFFFFFF);
 		mci_writel(host, BYTCNT, data->blksz*data->blocks);
 		mci_writel(host, BLKSIZ, data->blksz);
 	}
@@ -962,7 +953,7 @@
 	regs = mci_readl(slot->host, UHS_REG);
 
 	/* DDR mode set */
-	if (ios->timing == MMC_TIMING_UHS_DDR50)
+	if (ios->timing == MMC_TIMING_MMC_DDR52)
 		regs |= ((0x1 << slot->id) << 16);
 	else
 		regs &= ~((0x1 << slot->id) << 16);
@@ -985,17 +976,11 @@
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
-		/* Power up slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, mmc->ocr_avail);
 		regs = mci_readl(slot->host, PWREN);
 		regs |= (1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_OFF:
-		/* Power down slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, 0);
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
@@ -1009,15 +994,13 @@
 {
 	int read_only;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci_board *brd = slot->host->pdata;
+	int gpio_ro = mmc_gpio_get_ro(mmc);
 
 	/* Use platform get_ro function, else try on board write protect */
 	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
 		read_only = 0;
-	else if (brd->get_ro)
-		read_only = brd->get_ro(slot->id);
-	else if (gpio_is_valid(slot->wp_gpio))
-		read_only = gpio_get_value(slot->wp_gpio);
+	else if (!IS_ERR_VALUE(gpio_ro))
+		read_only = gpio_ro;
 	else
 		read_only =
 			mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0;
@@ -1039,8 +1022,6 @@
 	/* Use platform get_cd function, else try onboard card detect */
 	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
 		present = 1;
-	else if (brd->get_cd)
-		present = !brd->get_cd(slot->id);
 	else if (!IS_ERR_VALUE(gpio_cd))
 		present = gpio_cd;
 	else
@@ -1248,7 +1229,7 @@
 			data->error = -EIO;
 		}
 
-		dev_err(host->dev, "data error, status 0x%08x\n", status);
+		dev_dbg(host->dev, "data error, status 0x%08x\n", status);
 
 		/*
 		 * After an error, there may be data lingering
@@ -2045,86 +2026,15 @@
 
 	return quirks;
 }
-
-/* find out bus-width for a given slot */
-static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
-{
-	struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
-	u32 bus_wd = 1;
-
-	if (!np)
-		return 1;
-
-	if (of_property_read_u32(np, "bus-width", &bus_wd))
-		dev_err(dev, "bus-width property not found, assuming width"
-			       " as 1\n");
-	return bus_wd;
-}
-
-/* find the write protect gpio for a given slot; or -1 if none specified */
-static int dw_mci_of_get_wp_gpio(struct device *dev, u8 slot)
-{
-	struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
-	int gpio;
-
-	if (!np)
-		return -EINVAL;
-
-	gpio = of_get_named_gpio(np, "wp-gpios", 0);
-
-	/* Having a missing entry is valid; return silently */
-	if (!gpio_is_valid(gpio))
-		return -EINVAL;
-
-	if (devm_gpio_request(dev, gpio, "dw-mci-wp")) {
-		dev_warn(dev, "gpio [%d] request failed\n", gpio);
-		return -EINVAL;
-	}
-
-	return gpio;
-}
-
-/* find the cd gpio for a given slot */
-static void dw_mci_of_get_cd_gpio(struct device *dev, u8 slot,
-					struct mmc_host *mmc)
-{
-	struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
-	int gpio;
-
-	if (!np)
-		return;
-
-	gpio = of_get_named_gpio(np, "cd-gpios", 0);
-
-	/* Having a missing entry is valid; return silently */
-	if (!gpio_is_valid(gpio))
-		return;
-
-	if (mmc_gpio_request_cd(mmc, gpio, 0))
-		dev_warn(dev, "gpio [%d] request failed\n", gpio);
-}
 #else /* CONFIG_OF */
 static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
 {
 	return 0;
 }
-static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
-{
-	return 1;
-}
 static struct device_node *dw_mci_of_find_slot_node(struct device *dev, u8 slot)
 {
 	return NULL;
 }
-static int dw_mci_of_get_wp_gpio(struct device *dev, u8 slot)
-{
-	return -EINVAL;
-}
-static void dw_mci_of_get_cd_gpio(struct device *dev, u8 slot,
-					struct mmc_host *mmc)
-{
-	return;
-}
 #endif /* CONFIG_OF */
 
 static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
@@ -2134,7 +2044,6 @@
 	const struct dw_mci_drv_data *drv_data = host->drv_data;
 	int ctrl_id, ret;
 	u32 freq[2];
-	u8 bus_width;
 
 	mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), host->dev);
 	if (!mmc)
@@ -2158,17 +2067,7 @@
 		mmc->f_max = freq[1];
 	}
 
-	if (host->pdata->get_ocr)
-		mmc->ocr_avail = host->pdata->get_ocr(id);
-	else
-		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-
-	/*
-	 * Start with slot power disabled, it will be enabled when a card
-	 * is detected.
-	 */
-	if (host->pdata->setpower)
-		host->pdata->setpower(id, 0);
+	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
@@ -2189,19 +2088,7 @@
 	if (host->pdata->caps2)
 		mmc->caps2 = host->pdata->caps2;
 
-	if (host->pdata->get_bus_wd)
-		bus_width = host->pdata->get_bus_wd(slot->id);
-	else if (host->dev->of_node)
-		bus_width = dw_mci_of_get_bus_wd(host->dev, slot->id);
-	else
-		bus_width = 1;
-
-	switch (bus_width) {
-	case 8:
-		mmc->caps |= MMC_CAP_8_BIT_DATA;
-	case 4:
-		mmc->caps |= MMC_CAP_4_BIT_DATA;
-	}
+	mmc_of_parse(mmc);
 
 	if (host->pdata->blk_settings) {
 		mmc->max_segs = host->pdata->blk_settings->max_segs;
@@ -2226,8 +2113,10 @@
 #endif /* CONFIG_MMC_DW_IDMAC */
 	}
 
-	slot->wp_gpio = dw_mci_of_get_wp_gpio(host->dev, slot->id);
-	dw_mci_of_get_cd_gpio(host->dev, slot->id, mmc);
+	if (dw_mci_get_cd(mmc))
+		set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+	else
+		clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
 
 	ret = mmc_add_host(mmc);
 	if (ret)
@@ -2249,10 +2138,6 @@
 
 static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
 {
-	/* Shutdown detect IRQ */
-	if (slot->host->pdata->exit)
-		slot->host->pdata->exit(id);
-
 	/* Debugfs stuff is cleaned up by mmc core */
 	mmc_remove_host(slot->mmc);
 	slot->host->slot[id] = NULL;
@@ -2399,24 +2284,9 @@
 			return ERR_PTR(ret);
 	}
 
-	if (of_find_property(np, "keep-power-in-suspend", NULL))
-		pdata->pm_caps |= MMC_PM_KEEP_POWER;
-
-	if (of_find_property(np, "enable-sdio-wakeup", NULL))
-		pdata->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
-
 	if (of_find_property(np, "supports-highspeed", NULL))
 		pdata->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
-	if (of_find_property(np, "caps2-mmc-hs200-1_8v", NULL))
-		pdata->caps2 |= MMC_CAP2_HS200_1_8V_SDR;
-
-	if (of_find_property(np, "caps2-mmc-hs200-1_2v", NULL))
-		pdata->caps2 |= MMC_CAP2_HS200_1_2V_SDR;
-
-	if (of_get_property(np, "cd-inverted", NULL))
-		pdata->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
-
 	return pdata;
 }
 
@@ -2442,9 +2312,9 @@
 		}
 	}
 
-	if (!host->pdata->select_slot && host->pdata->num_slots > 1) {
+	if (host->pdata->num_slots > 1) {
 		dev_err(host->dev,
-			"Platform data must supply select_slot function\n");
+			"Platform data must supply num_slots.\n");
 		return -ENODEV;
 	}
 
@@ -2474,12 +2344,19 @@
 			ret = clk_set_rate(host->ciu_clk, host->pdata->bus_hz);
 			if (ret)
 				dev_warn(host->dev,
-					 "Unable to set bus rate to %ul\n",
+					 "Unable to set bus rate to %uHz\n",
 					 host->pdata->bus_hz);
 		}
 		host->bus_hz = clk_get_rate(host->ciu_clk);
 	}
 
+	if (!host->bus_hz) {
+		dev_err(host->dev,
+			"Platform data must supply bus speed\n");
+		ret = -ENODEV;
+		goto err_clk_ciu;
+	}
+
 	if (drv_data && drv_data->init) {
 		ret = drv_data->init(host);
 		if (ret) {
@@ -2516,13 +2393,6 @@
 		}
 	}
 
-	if (!host->bus_hz) {
-		dev_err(host->dev,
-			"Platform data must supply bus speed\n");
-		ret = -ENODEV;
-		goto err_regulator;
-	}
-
 	host->quirks = host->pdata->quirks;
 
 	spin_lock_init(&host->lock);
@@ -2666,8 +2536,6 @@
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
-
-err_regulator:
 	if (host->vmmc)
 		regulator_disable(host->vmmc);
 

diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 6834977..738fa24 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h

@@ -195,7 +195,6 @@
  * @mmc: The mmc_host representing this slot.
  * @host: The MMC controller this slot is using.
  * @quirks: Slot-level quirks (DW_MCI_SLOT_QUIRK_XXX)
- * @wp_gpio: If gpio_is_valid() we'll use this to read write protect.
  * @ctype: Card type for this slot.
  * @mrq: mmc_request currently being processed or waiting to be
  *	processed, or NULL when the slot is idle.
@@ -214,7 +213,6 @@
 	struct dw_mci		*host;
 
 	int			quirks;
-	int			wp_gpio;
 
 	u32			ctype;
 

diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index de2139c..537d6c7 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c

@@ -515,10 +515,13 @@
 
 		jz4740_mmc_send_command(host, req->stop);
 
-		timeout = jz4740_mmc_poll_irq(host, JZ_MMC_IRQ_PRG_DONE);
-		if (timeout) {
-			host->state = JZ4740_MMC_STATE_DONE;
-			break;
+		if (mmc_resp_type(req->stop) & MMC_RSP_BUSY) {
+			timeout = jz4740_mmc_poll_irq(host,
+						      JZ_MMC_IRQ_PRG_DONE);
+			if (timeout) {
+				host->state = JZ4740_MMC_STATE_DONE;
+				break;
+			}
 		}
 	case JZ4740_MMC_STATE_DONE:
 		break;

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 0a87e56..cc8d4a6 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c

@@ -448,7 +448,6 @@
 {
 	struct scratch		*data = host->data;
 	u8			*cp = data->status;
-	u32			arg = cmd->arg;
 	int			status;
 	struct spi_transfer	*t;
 
@@ -465,14 +464,12 @@
 	 * We init the whole buffer to all-ones, which is what we need
 	 * to write while we're reading (later) response data.
 	 */
-	memset(cp++, 0xff, sizeof(data->status));
+	memset(cp, 0xff, sizeof(data->status));
 
-	*cp++ = 0x40 | cmd->opcode;
-	*cp++ = (u8)(arg >> 24);
-	*cp++ = (u8)(arg >> 16);
-	*cp++ = (u8)(arg >> 8);
-	*cp++ = (u8)arg;
-	*cp++ = (crc7(0, &data->status[1], 5) << 1) | 0x01;
+	cp[1] = 0x40 | cmd->opcode;
+	put_unaligned_be32(cmd->arg, cp+2);
+	cp[6] = crc7_be(0, cp+1, 5) | 0x01;
+	cp += 7;
 
 	/* Then, read up to 13 bytes (while writing all-ones):
 	 *  - N(CR) (== 1..8) bytes of all-ones
@@ -711,10 +708,7 @@
 	 * so we have to cope with this situation and check the response
 	 * bit-by-bit. Arggh!!!
 	 */
-	pattern  = scratch->status[0] << 24;
-	pattern |= scratch->status[1] << 16;
-	pattern |= scratch->status[2] << 8;
-	pattern |= scratch->status[3];
+	pattern = get_unaligned_be32(scratch->status);
 
 	/* First 3 bit of pattern are undefined */
 	pattern |= 0xE0000000;

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index a084edd..7ad463e 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c

@@ -301,7 +301,8 @@
 	if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
 		clk |= MCI_ST_8BIT_BUS;
 
-	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50)
+	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
+	    host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
 		clk |= MCI_ST_UX500_NEG_EDGE;
 
 	mmci_write_clkreg(host, clk);
@@ -764,7 +765,8 @@
 			mmci_write_clkreg(host, clk);
 		}
 
-	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50)
+	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
+	    host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
 		datactrl |= MCI_ST_DPSM_DDRMODE;
 
 	/*

diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
new file mode 100644
index 0000000..74924a0
--- /dev/null
+++ b/drivers/mmc/host/moxart-mmc.c

@@ -0,0 +1,730 @@
+/*
+ * MOXA ART MMC host driver.
+ *
+ * Copyright (C) 2014 Jonas Jensen
+ *
+ * Jonas Jensen <jonas.jensen@gmail.com>
+ *
+ * Based on code from
+ * Moxa Technologies Co., Ltd. <www.moxa.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/sd.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+#include <linux/of_dma.h>
+#include <linux/spinlock.h>
+
+#define REG_COMMAND		0
+#define REG_ARGUMENT		4
+#define REG_RESPONSE0		8
+#define REG_RESPONSE1		12
+#define REG_RESPONSE2		16
+#define REG_RESPONSE3		20
+#define REG_RESPONSE_COMMAND	24
+#define REG_DATA_CONTROL	28
+#define REG_DATA_TIMER		32
+#define REG_DATA_LENGTH		36
+#define REG_STATUS		40
+#define REG_CLEAR		44
+#define REG_INTERRUPT_MASK	48
+#define REG_POWER_CONTROL	52
+#define REG_CLOCK_CONTROL	56
+#define REG_BUS_WIDTH		60
+#define REG_DATA_WINDOW		64
+#define REG_FEATURE		68
+#define REG_REVISION		72
+
+/* REG_COMMAND */
+#define CMD_SDC_RESET		BIT(10)
+#define CMD_EN			BIT(9)
+#define CMD_APP_CMD		BIT(8)
+#define CMD_LONG_RSP		BIT(7)
+#define CMD_NEED_RSP		BIT(6)
+#define CMD_IDX_MASK		0x3f
+
+/* REG_RESPONSE_COMMAND */
+#define RSP_CMD_APP		BIT(6)
+#define RSP_CMD_IDX_MASK	0x3f
+
+/* REG_DATA_CONTROL */
+#define DCR_DATA_FIFO_RESET     BIT(8)
+#define DCR_DATA_THRES          BIT(7)
+#define DCR_DATA_EN		BIT(6)
+#define DCR_DMA_EN		BIT(5)
+#define DCR_DATA_WRITE		BIT(4)
+#define DCR_BLK_SIZE		0x0f
+
+/* REG_DATA_LENGTH */
+#define DATA_LEN_MASK		0xffffff
+
+/* REG_STATUS */
+#define WRITE_PROT		BIT(12)
+#define CARD_DETECT		BIT(11)
+/* 1-10 below can be sent to either registers, interrupt or clear. */
+#define CARD_CHANGE		BIT(10)
+#define FIFO_ORUN		BIT(9)
+#define FIFO_URUN		BIT(8)
+#define DATA_END		BIT(7)
+#define CMD_SENT		BIT(6)
+#define DATA_CRC_OK		BIT(5)
+#define RSP_CRC_OK		BIT(4)
+#define DATA_TIMEOUT		BIT(3)
+#define RSP_TIMEOUT		BIT(2)
+#define DATA_CRC_FAIL		BIT(1)
+#define RSP_CRC_FAIL		BIT(0)
+
+#define MASK_RSP		(RSP_TIMEOUT | RSP_CRC_FAIL | \
+				 RSP_CRC_OK  | CARD_DETECT  | CMD_SENT)
+
+#define MASK_DATA		(DATA_CRC_OK   | DATA_END | \
+				 DATA_CRC_FAIL | DATA_TIMEOUT)
+
+#define MASK_INTR_PIO		(FIFO_URUN | FIFO_ORUN | CARD_CHANGE)
+
+/* REG_POWER_CONTROL */
+#define SD_POWER_ON		BIT(4)
+#define SD_POWER_MASK		0x0f
+
+/* REG_CLOCK_CONTROL */
+#define CLK_HISPD		BIT(9)
+#define CLK_OFF			BIT(8)
+#define CLK_SD			BIT(7)
+#define CLK_DIV_MASK		0x7f
+
+/* REG_BUS_WIDTH */
+#define BUS_WIDTH_8		BIT(2)
+#define BUS_WIDTH_4		BIT(1)
+#define BUS_WIDTH_1		BIT(0)
+
+#define MMC_VDD_360		23
+#define MIN_POWER		(MMC_VDD_360 - SD_POWER_MASK)
+#define MAX_RETRIES		500000
+
+struct moxart_host {
+	spinlock_t			lock;
+
+	void __iomem			*base;
+
+	phys_addr_t			reg_phys;
+
+	struct dma_chan			*dma_chan_tx;
+	struct dma_chan                 *dma_chan_rx;
+	struct dma_async_tx_descriptor	*tx_desc;
+	struct mmc_host			*mmc;
+	struct mmc_request		*mrq;
+	struct scatterlist		*cur_sg;
+	struct completion		dma_complete;
+	struct completion		pio_complete;
+
+	u32				num_sg;
+	u32				data_remain;
+	u32				data_len;
+	u32				fifo_width;
+	u32				timeout;
+	u32				rate;
+
+	long				sysclk;
+
+	bool				have_dma;
+	bool				is_removed;
+};
+
+static inline void moxart_init_sg(struct moxart_host *host,
+				  struct mmc_data *data)
+{
+	host->cur_sg = data->sg;
+	host->num_sg = data->sg_len;
+	host->data_remain = host->cur_sg->length;
+
+	if (host->data_remain > host->data_len)
+		host->data_remain = host->data_len;
+}
+
+static inline int moxart_next_sg(struct moxart_host *host)
+{
+	int remain;
+	struct mmc_data *data = host->mrq->cmd->data;
+
+	host->cur_sg++;
+	host->num_sg--;
+
+	if (host->num_sg > 0) {
+		host->data_remain = host->cur_sg->length;
+		remain = host->data_len - data->bytes_xfered;
+		if (remain > 0 && remain < host->data_remain)
+			host->data_remain = remain;
+	}
+
+	return host->num_sg;
+}
+
+static int moxart_wait_for_status(struct moxart_host *host,
+				  u32 mask, u32 *status)
+{
+	int ret = -ETIMEDOUT;
+	u32 i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		*status = readl(host->base + REG_STATUS);
+		if (!(*status & mask)) {
+			udelay(5);
+			continue;
+		}
+		writel(*status & mask, host->base + REG_CLEAR);
+		ret = 0;
+		break;
+	}
+
+	if (ret)
+		dev_err(mmc_dev(host->mmc), "timed out waiting for status\n");
+
+	return ret;
+}
+
+
+static void moxart_send_command(struct moxart_host *host,
+	struct mmc_command *cmd)
+{
+	u32 status, cmdctrl;
+
+	writel(RSP_TIMEOUT  | RSP_CRC_OK |
+	       RSP_CRC_FAIL | CMD_SENT, host->base + REG_CLEAR);
+	writel(cmd->arg, host->base + REG_ARGUMENT);
+
+	cmdctrl = cmd->opcode & CMD_IDX_MASK;
+	if (cmdctrl == SD_APP_SET_BUS_WIDTH    || cmdctrl == SD_APP_OP_COND   ||
+	    cmdctrl == SD_APP_SEND_SCR         || cmdctrl == SD_APP_SD_STATUS ||
+	    cmdctrl == SD_APP_SEND_NUM_WR_BLKS)
+		cmdctrl |= CMD_APP_CMD;
+
+	if (cmd->flags & MMC_RSP_PRESENT)
+		cmdctrl |= CMD_NEED_RSP;
+
+	if (cmd->flags & MMC_RSP_136)
+		cmdctrl |= CMD_LONG_RSP;
+
+	writel(cmdctrl | CMD_EN, host->base + REG_COMMAND);
+
+	if (moxart_wait_for_status(host, MASK_RSP, &status) == -ETIMEDOUT)
+		cmd->error = -ETIMEDOUT;
+
+	if (status & RSP_TIMEOUT) {
+		cmd->error = -ETIMEDOUT;
+		return;
+	}
+	if (status & RSP_CRC_FAIL) {
+		cmd->error = -EIO;
+		return;
+	}
+	if (status & RSP_CRC_OK) {
+		if (cmd->flags & MMC_RSP_136) {
+			cmd->resp[3] = readl(host->base + REG_RESPONSE0);
+			cmd->resp[2] = readl(host->base + REG_RESPONSE1);
+			cmd->resp[1] = readl(host->base + REG_RESPONSE2);
+			cmd->resp[0] = readl(host->base + REG_RESPONSE3);
+		} else {
+			cmd->resp[0] = readl(host->base + REG_RESPONSE0);
+		}
+	}
+}
+
+static void moxart_dma_complete(void *param)
+{
+	struct moxart_host *host = param;
+
+	complete(&host->dma_complete);
+}
+
+static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
+{
+	u32 len, dir_data, dir_slave;
+	unsigned long dma_time;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *dma_chan;
+
+	if (host->data_len == data->bytes_xfered)
+		return;
+
+	if (data->flags & MMC_DATA_WRITE) {
+		dma_chan = host->dma_chan_tx;
+		dir_data = DMA_TO_DEVICE;
+		dir_slave = DMA_MEM_TO_DEV;
+	} else {
+		dma_chan = host->dma_chan_rx;
+		dir_data = DMA_FROM_DEVICE;
+		dir_slave = DMA_DEV_TO_MEM;
+	}
+
+	len = dma_map_sg(dma_chan->device->dev, data->sg,
+			 data->sg_len, dir_data);
+
+	if (len > 0) {
+		desc = dmaengine_prep_slave_sg(dma_chan, data->sg,
+					       len, dir_slave,
+					       DMA_PREP_INTERRUPT |
+					       DMA_CTRL_ACK);
+	} else {
+		dev_err(mmc_dev(host->mmc), "dma_map_sg returned zero length\n");
+	}
+
+	if (desc) {
+		host->tx_desc = desc;
+		desc->callback = moxart_dma_complete;
+		desc->callback_param = host;
+		dmaengine_submit(desc);
+		dma_async_issue_pending(dma_chan);
+	}
+
+	data->bytes_xfered += host->data_remain;
+
+	dma_time = wait_for_completion_interruptible_timeout(
+		   &host->dma_complete, host->timeout);
+
+	dma_unmap_sg(dma_chan->device->dev,
+		     data->sg, data->sg_len,
+		     dir_data);
+}
+
+
+static void moxart_transfer_pio(struct moxart_host *host)
+{
+	struct mmc_data *data = host->mrq->cmd->data;
+	u32 *sgp, len = 0, remain, status;
+
+	if (host->data_len == data->bytes_xfered)
+		return;
+
+	sgp = sg_virt(host->cur_sg);
+	remain = host->data_remain;
+
+	if (data->flags & MMC_DATA_WRITE) {
+		while (remain > 0) {
+			if (moxart_wait_for_status(host, FIFO_URUN, &status)
+			     == -ETIMEDOUT) {
+				data->error = -ETIMEDOUT;
+				complete(&host->pio_complete);
+				return;
+			}
+			for (len = 0; len < remain && len < host->fifo_width;) {
+				iowrite32(*sgp, host->base + REG_DATA_WINDOW);
+				sgp++;
+				len += 4;
+			}
+			remain -= len;
+		}
+
+	} else {
+		while (remain > 0) {
+			if (moxart_wait_for_status(host, FIFO_ORUN, &status)
+			    == -ETIMEDOUT) {
+				data->error = -ETIMEDOUT;
+				complete(&host->pio_complete);
+				return;
+			}
+			for (len = 0; len < remain && len < host->fifo_width;) {
+				/* SCR data must be read in big endian. */
+				if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
+					*sgp = ioread32be(host->base +
+							  REG_DATA_WINDOW);
+				else
+					*sgp = ioread32(host->base +
+							REG_DATA_WINDOW);
+				sgp++;
+				len += 4;
+			}
+			remain -= len;
+		}
+	}
+
+	data->bytes_xfered += host->data_remain - remain;
+	host->data_remain = remain;
+
+	if (host->data_len != data->bytes_xfered)
+		moxart_next_sg(host);
+	else
+		complete(&host->pio_complete);
+}
+
+static void moxart_prepare_data(struct moxart_host *host)
+{
+	struct mmc_data *data = host->mrq->cmd->data;
+	u32 datactrl;
+	int blksz_bits;
+
+	if (!data)
+		return;
+
+	host->data_len = data->blocks * data->blksz;
+	blksz_bits = ffs(data->blksz) - 1;
+	BUG_ON(1 << blksz_bits != data->blksz);
+
+	moxart_init_sg(host, data);
+
+	datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE);
+
+	if (data->flags & MMC_DATA_WRITE)
+		datactrl |= DCR_DATA_WRITE;
+
+	if ((host->data_len > host->fifo_width) && host->have_dma)
+		datactrl |= DCR_DMA_EN;
+
+	writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL);
+	writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR);
+	writel(host->rate, host->base + REG_DATA_TIMER);
+	writel(host->data_len, host->base + REG_DATA_LENGTH);
+	writel(datactrl, host->base + REG_DATA_CONTROL);
+}
+
+static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct moxart_host *host = mmc_priv(mmc);
+	unsigned long pio_time, flags;
+	u32 status;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	init_completion(&host->dma_complete);
+	init_completion(&host->pio_complete);
+
+	host->mrq = mrq;
+
+	if (readl(host->base + REG_STATUS) & CARD_DETECT) {
+		mrq->cmd->error = -ETIMEDOUT;
+		goto request_done;
+	}
+
+	moxart_prepare_data(host);
+	moxart_send_command(host, host->mrq->cmd);
+
+	if (mrq->cmd->data) {
+		if ((host->data_len > host->fifo_width) && host->have_dma) {
+
+			writel(CARD_CHANGE, host->base + REG_INTERRUPT_MASK);
+
+			spin_unlock_irqrestore(&host->lock, flags);
+
+			moxart_transfer_dma(mrq->cmd->data, host);
+
+			spin_lock_irqsave(&host->lock, flags);
+		} else {
+
+			writel(MASK_INTR_PIO, host->base + REG_INTERRUPT_MASK);
+
+			spin_unlock_irqrestore(&host->lock, flags);
+
+			/* PIO transfers start from interrupt. */
+			pio_time = wait_for_completion_interruptible_timeout(
+				   &host->pio_complete, host->timeout);
+
+			spin_lock_irqsave(&host->lock, flags);
+		}
+
+		if (host->is_removed) {
+			dev_err(mmc_dev(host->mmc), "card removed\n");
+			mrq->cmd->error = -ETIMEDOUT;
+			goto request_done;
+		}
+
+		if (moxart_wait_for_status(host, MASK_DATA, &status)
+		    == -ETIMEDOUT) {
+			mrq->cmd->data->error = -ETIMEDOUT;
+			goto request_done;
+		}
+
+		if (status & DATA_CRC_FAIL)
+			mrq->cmd->data->error = -ETIMEDOUT;
+
+		if (mrq->cmd->data->stop)
+			moxart_send_command(host, mrq->cmd->data->stop);
+	}
+
+request_done:
+	spin_unlock_irqrestore(&host->lock, flags);
+	mmc_request_done(host->mmc, mrq);
+}
+
+static irqreturn_t moxart_irq(int irq, void *devid)
+{
+	struct moxart_host *host = (struct moxart_host *)devid;
+	u32 status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	status = readl(host->base + REG_STATUS);
+	if (status & CARD_CHANGE) {
+		host->is_removed = status & CARD_DETECT;
+		if (host->is_removed && host->have_dma) {
+			dmaengine_terminate_all(host->dma_chan_tx);
+			dmaengine_terminate_all(host->dma_chan_rx);
+		}
+		host->mrq = NULL;
+		writel(MASK_INTR_PIO, host->base + REG_CLEAR);
+		writel(CARD_CHANGE, host->base + REG_INTERRUPT_MASK);
+		mmc_detect_change(host->mmc, 0);
+	}
+	if (status & (FIFO_ORUN | FIFO_URUN) && host->mrq)
+		moxart_transfer_pio(host);
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static void moxart_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct moxart_host *host = mmc_priv(mmc);
+	unsigned long flags;
+	u8 power, div;
+	u32 ctrl;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (ios->clock) {
+		for (div = 0; div < CLK_DIV_MASK; ++div) {
+			if (ios->clock >= host->sysclk / (2 * (div + 1)))
+				break;
+		}
+		ctrl = CLK_SD | div;
+		host->rate = host->sysclk / (2 * (div + 1));
+		if (host->rate > host->sysclk)
+			ctrl |= CLK_HISPD;
+		writel(ctrl, host->base + REG_CLOCK_CONTROL);
+	}
+
+	if (ios->power_mode == MMC_POWER_OFF) {
+		writel(readl(host->base + REG_POWER_CONTROL) & ~SD_POWER_ON,
+		       host->base + REG_POWER_CONTROL);
+	} else {
+		if (ios->vdd < MIN_POWER)
+			power = 0;
+		else
+			power = ios->vdd - MIN_POWER;
+
+		writel(SD_POWER_ON | (u32) power,
+		       host->base + REG_POWER_CONTROL);
+	}
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_4:
+		writel(BUS_WIDTH_4, host->base + REG_BUS_WIDTH);
+		break;
+	case MMC_BUS_WIDTH_8:
+		writel(BUS_WIDTH_8, host->base + REG_BUS_WIDTH);
+		break;
+	default:
+		writel(BUS_WIDTH_1, host->base + REG_BUS_WIDTH);
+		break;
+	}
+
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
+
+static int moxart_get_ro(struct mmc_host *mmc)
+{
+	struct moxart_host *host = mmc_priv(mmc);
+
+	return !!(readl(host->base + REG_STATUS) & WRITE_PROT);
+}
+
+static struct mmc_host_ops moxart_ops = {
+	.request = moxart_request,
+	.set_ios = moxart_set_ios,
+	.get_ro = moxart_get_ro,
+};
+
+static int moxart_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct resource res_mmc;
+	struct mmc_host *mmc;
+	struct moxart_host *host = NULL;
+	struct dma_slave_config cfg;
+	struct clk *clk;
+	void __iomem *reg_mmc;
+	dma_cap_mask_t mask;
+	int irq, ret;
+	u32 i;
+
+	mmc = mmc_alloc_host(sizeof(struct moxart_host), dev);
+	if (!mmc) {
+		dev_err(dev, "mmc_alloc_host failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = of_address_to_resource(node, 0, &res_mmc);
+	if (ret) {
+		dev_err(dev, "of_address_to_resource failed\n");
+		goto out;
+	}
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq <= 0) {
+		dev_err(dev, "irq_of_parse_and_map failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "of_clk_get failed\n");
+		ret = PTR_ERR(clk);
+		goto out;
+	}
+
+	reg_mmc = devm_ioremap_resource(dev, &res_mmc);
+	if (IS_ERR(reg_mmc)) {
+		ret = PTR_ERR(reg_mmc);
+		goto out;
+	}
+
+	mmc_of_parse(mmc);
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	host = mmc_priv(mmc);
+	host->mmc = mmc;
+	host->base = reg_mmc;
+	host->reg_phys = res_mmc.start;
+	host->timeout = msecs_to_jiffies(1000);
+	host->sysclk = clk_get_rate(clk);
+	host->fifo_width = readl(host->base + REG_FEATURE) << 2;
+	host->dma_chan_tx = of_dma_request_slave_channel(node, "tx");
+	host->dma_chan_rx = of_dma_request_slave_channel(node, "rx");
+
+	spin_lock_init(&host->lock);
+
+	mmc->ops = &moxart_ops;
+	mmc->f_max = DIV_ROUND_CLOSEST(host->sysclk, 2);
+	mmc->f_min = DIV_ROUND_CLOSEST(host->sysclk, CLK_DIV_MASK * 2);
+	mmc->ocr_avail = 0xffff00;	/* Support 2.0v - 3.6v power. */
+
+	if (IS_ERR(host->dma_chan_tx) || IS_ERR(host->dma_chan_rx)) {
+		dev_dbg(dev, "PIO mode transfer enabled\n");
+		host->have_dma = false;
+	} else {
+		dev_dbg(dev, "DMA channels found (%p,%p)\n",
+			 host->dma_chan_tx, host->dma_chan_rx);
+		host->have_dma = true;
+
+		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+		cfg.direction = DMA_MEM_TO_DEV;
+		cfg.src_addr = 0;
+		cfg.dst_addr = host->reg_phys + REG_DATA_WINDOW;
+		dmaengine_slave_config(host->dma_chan_tx, &cfg);
+
+		cfg.direction = DMA_DEV_TO_MEM;
+		cfg.src_addr = host->reg_phys + REG_DATA_WINDOW;
+		cfg.dst_addr = 0;
+		dmaengine_slave_config(host->dma_chan_rx, &cfg);
+	}
+
+	switch ((readl(host->base + REG_BUS_WIDTH) >> 3) & 3) {
+	case 1:
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
+		break;
+	case 2:
+		mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA;
+		break;
+	default:
+		break;
+	}
+
+	writel(0, host->base + REG_INTERRUPT_MASK);
+
+	writel(CMD_SDC_RESET, host->base + REG_COMMAND);
+	for (i = 0; i < MAX_RETRIES; i++) {
+		if (!(readl(host->base + REG_COMMAND) & CMD_SDC_RESET))
+			break;
+		udelay(5);
+	}
+
+	ret = devm_request_irq(dev, irq, moxart_irq, 0, "moxart-mmc", host);
+	if (ret)
+		goto out;
+
+	dev_set_drvdata(dev, mmc);
+	mmc_add_host(mmc);
+
+	dev_dbg(dev, "IRQ=%d, FIFO is %d bytes\n", irq, host->fifo_width);
+
+	return 0;
+
+out:
+	if (mmc)
+		mmc_free_host(mmc);
+	return ret;
+}
+
+static int moxart_remove(struct platform_device *pdev)
+{
+	struct mmc_host *mmc = dev_get_drvdata(&pdev->dev);
+	struct moxart_host *host = mmc_priv(mmc);
+
+	dev_set_drvdata(&pdev->dev, NULL);
+
+	if (mmc) {
+		if (!IS_ERR(host->dma_chan_tx))
+			dma_release_channel(host->dma_chan_tx);
+		if (!IS_ERR(host->dma_chan_rx))
+			dma_release_channel(host->dma_chan_rx);
+		mmc_remove_host(mmc);
+		mmc_free_host(mmc);
+
+		writel(0, host->base + REG_INTERRUPT_MASK);
+		writel(0, host->base + REG_POWER_CONTROL);
+		writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
+		       host->base + REG_CLOCK_CONTROL);
+	}
+
+	kfree(host);
+
+	return 0;
+}
+
+static const struct of_device_id moxart_mmc_match[] = {
+	{ .compatible = "moxa,moxart-mmc" },
+	{ .compatible = "faraday,ftsdc010" },
+	{ }
+};
+
+static struct platform_driver moxart_mmc_driver = {
+	.probe      = moxart_probe,
+	.remove     = moxart_remove,
+	.driver     = {
+		.name		= "mmc-moxart",
+		.owner		= THIS_MODULE,
+		.of_match_table	= moxart_mmc_match,
+	},
+};
+module_platform_driver(moxart_mmc_driver);
+
+MODULE_ALIAS("platform:mmc-moxart");
+MODULE_DESCRIPTION("MOXA ART MMC driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>");

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 45aa220..6b4c5ad 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c

@@ -79,11 +79,11 @@
 		unsigned long t = jiffies + HZ;
 		unsigned int hw_state,  count = 0;
 		do {
+			hw_state = mvsd_read(MVSD_HW_STATE);
 			if (time_after(jiffies, t)) {
 				dev_warn(host->dev, "FIFO_EMPTY bit missing\n");
 				break;
 			}
-			hw_state = mvsd_read(MVSD_HW_STATE);
 			count++;
 		} while (!(hw_state & (1 << 13)));
 		dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit "
@@ -354,6 +354,20 @@
 		intr_status, mvsd_read(MVSD_NOR_INTR_EN),
 		mvsd_read(MVSD_HW_STATE));
 
+	/*
+	 * It looks like, SDIO IP can issue one late, spurious irq
+	 * although all irqs should be disabled. To work around this,
+	 * bail out early, if we didn't expect any irqs to occur.
+	 */
+	if (!mvsd_read(MVSD_NOR_INTR_EN) && !mvsd_read(MVSD_ERR_INTR_EN)) {
+		dev_dbg(host->dev, "spurious irq detected intr 0x%04x intr_en 0x%04x erri 0x%04x erri_en 0x%04x\n",
+			mvsd_read(MVSD_NOR_INTR_STATUS),
+			mvsd_read(MVSD_NOR_INTR_EN),
+			mvsd_read(MVSD_ERR_INTR_STATUS),
+			mvsd_read(MVSD_ERR_INTR_EN));
+		return IRQ_HANDLED;
+	}
+
 	spin_lock(&host->lock);
 
 	/* PIO handling, if needed. Messy business... */
@@ -801,10 +815,10 @@
 		goto out;
 
 	if (!(mmc->caps & MMC_CAP_NEEDS_POLL))
-		dev_notice(&pdev->dev, "using GPIO for card detection\n");
+		dev_dbg(&pdev->dev, "using GPIO for card detection\n");
 	else
-		dev_notice(&pdev->dev,
-			   "lacking card detect (fall back to polling)\n");
+		dev_dbg(&pdev->dev, "lacking card detect (fall back to polling)\n");
+
 	return 0;
 
 out:

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index f7199c8..ed1cb93 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c

@@ -124,9 +124,8 @@
 
 struct mxcmci_host {
 	struct mmc_host		*mmc;
-	struct resource		*res;
 	void __iomem		*base;
-	int			irq;
+	dma_addr_t		phys_base;
 	int			detect_irq;
 	struct dma_chan		*dma;
 	struct dma_async_tx_descriptor *desc;
@@ -154,8 +153,6 @@
 	struct work_struct	datawork;
 	spinlock_t		lock;
 
-	struct regulator	*vcc;
-
 	int			burstlen;
 	int			dmareq;
 	struct dma_slave_config dma_slave_config;
@@ -241,37 +238,15 @@
 
 static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
 
-static inline void mxcmci_init_ocr(struct mxcmci_host *host)
+static void mxcmci_set_power(struct mxcmci_host *host, unsigned int vdd)
 {
-	host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc");
-
-	if (IS_ERR(host->vcc)) {
-		host->vcc = NULL;
-	} else {
-		host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc);
-		if (host->pdata && host->pdata->ocr_avail)
-			dev_warn(mmc_dev(host->mmc),
-				"pdata->ocr_avail will not be used\n");
-	}
-
-	if (host->vcc == NULL) {
-		/* fall-back to platform data */
-		if (host->pdata && host->pdata->ocr_avail)
-			host->mmc->ocr_avail = host->pdata->ocr_avail;
-		else
-			host->mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	}
-}
-
-static inline void mxcmci_set_power(struct mxcmci_host *host,
-				    unsigned char power_mode,
-				    unsigned int vdd)
-{
-	if (host->vcc) {
-		if (power_mode == MMC_POWER_UP)
-			mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
-		else if (power_mode == MMC_POWER_OFF)
-			mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
+	if (!IS_ERR(host->mmc->supply.vmmc)) {
+		if (host->power_mode == MMC_POWER_UP)
+			mmc_regulator_set_ocr(host->mmc,
+					      host->mmc->supply.vmmc, vdd);
+		else if (host->power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(host->mmc,
+					      host->mmc->supply.vmmc, 0);
 	}
 
 	if (host->pdata && host->pdata->setpower)
@@ -299,7 +274,6 @@
 
 	mxcmci_writew(host, 0xff, MMC_REG_RES_TO);
 }
-static int mxcmci_setup_dma(struct mmc_host *mmc);
 
 #if IS_ENABLED(CONFIG_PPC_MPC512x)
 static inline void buffer_swap32(u32 *buf, int len)
@@ -868,8 +842,8 @@
 	struct mxcmci_host *host = mmc_priv(mmc);
 	struct dma_slave_config *config = &host->dma_slave_config;
 
-	config->dst_addr = host->res->start + MMC_REG_BUFFER_ACCESS;
-	config->src_addr = host->res->start + MMC_REG_BUFFER_ACCESS;
+	config->dst_addr = host->phys_base + MMC_REG_BUFFER_ACCESS;
+	config->src_addr = host->phys_base + MMC_REG_BUFFER_ACCESS;
 	config->dst_addr_width = 4;
 	config->src_addr_width = 4;
 	config->dst_maxburst = host->burstlen;
@@ -911,8 +885,8 @@
 		host->cmdat &= ~CMD_DAT_CONT_BUS_WIDTH_4;
 
 	if (host->power_mode != ios->power_mode) {
-		mxcmci_set_power(host, ios->power_mode, ios->vdd);
 		host->power_mode = ios->power_mode;
+		mxcmci_set_power(host, ios->vdd);
 
 		if (ios->power_mode == MMC_POWER_ON)
 			host->cmdat |= CMD_DAT_CONT_INIT;
@@ -1040,8 +1014,8 @@
 static int mxcmci_probe(struct platform_device *pdev)
 {
 	struct mmc_host *mmc;
-	struct mxcmci_host *host = NULL;
-	struct resource *iores, *r;
+	struct mxcmci_host *host;
+	struct resource *res;
 	int ret = 0, irq;
 	bool dat3_card_detect = false;
 	dma_cap_mask_t mask;
@@ -1052,21 +1026,25 @@
 
 	of_id = of_match_device(mxcmci_of_match, &pdev->dev);
 
-	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
-	if (!iores || irq < 0)
+	if (irq < 0)
 		return -EINVAL;
 
-	r = request_mem_region(iores->start, resource_size(iores), pdev->name);
-	if (!r)
-		return -EBUSY;
+	mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
+	if (!mmc)
+		return -ENOMEM;
 
-	mmc = mmc_alloc_host(sizeof(struct mxcmci_host), &pdev->dev);
-	if (!mmc) {
-		ret = -ENOMEM;
-		goto out_release_mem;
+	host = mmc_priv(mmc);
+
+	host->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(host->base)) {
+		ret = PTR_ERR(host->base);
+		goto out_free;
 	}
 
+	host->phys_base = res->start;
+
 	ret = mmc_of_parse(mmc);
 	if (ret)
 		goto out_free;
@@ -1084,13 +1062,6 @@
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_seg_size = mmc->max_req_size;
 
-	host = mmc_priv(mmc);
-	host->base = ioremap(r->start, resource_size(r));
-	if (!host->base) {
-		ret = -ENOMEM;
-		goto out_free;
-	}
-
 	if (of_id) {
 		const struct platform_device_id *id_entry = of_id->data;
 		host->devtype = id_entry->driver_data;
@@ -1112,7 +1083,14 @@
 			&& !of_property_read_bool(pdev->dev.of_node, "cd-gpios"))
 		dat3_card_detect = true;
 
-	mxcmci_init_ocr(host);
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret) {
+		if (pdata && ret != -EPROBE_DEFER)
+			mmc->ocr_avail = pdata->ocr_avail ? :
+				MMC_VDD_32_33 | MMC_VDD_33_34;
+		else
+			goto out_free;
+	}
 
 	if (dat3_card_detect)
 		host->default_irq_mask =
@@ -1120,19 +1098,16 @@
 	else
 		host->default_irq_mask = 0;
 
-	host->res = r;
-	host->irq = irq;
-
 	host->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
 	if (IS_ERR(host->clk_ipg)) {
 		ret = PTR_ERR(host->clk_ipg);
-		goto out_iounmap;
+		goto out_free;
 	}
 
 	host->clk_per = devm_clk_get(&pdev->dev, "per");
 	if (IS_ERR(host->clk_per)) {
 		ret = PTR_ERR(host->clk_per);
-		goto out_iounmap;
+		goto out_free;
 	}
 
 	clk_prepare_enable(host->clk_per);
@@ -1159,9 +1134,9 @@
 	if (!host->pdata) {
 		host->dma = dma_request_slave_channel(&pdev->dev, "rx-tx");
 	} else {
-		r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-		if (r) {
-			host->dmareq = r->start;
+		res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+		if (res) {
+			host->dmareq = res->start;
 			host->dma_data.peripheral_type = IMX_DMATYPE_SDHC;
 			host->dma_data.priority = DMA_PRIO_LOW;
 			host->dma_data.dma_request = host->dmareq;
@@ -1178,7 +1153,8 @@
 
 	INIT_WORK(&host->datawork, mxcmci_datawork);
 
-	ret = request_irq(host->irq, mxcmci_irq, 0, DRIVER_NAME, host);
+	ret = devm_request_irq(&pdev->dev, irq, mxcmci_irq, 0,
+			       dev_name(&pdev->dev), host);
 	if (ret)
 		goto out_free_dma;
 
@@ -1188,7 +1164,7 @@
 		ret = host->pdata->init(&pdev->dev, mxcmci_detect_irq,
 				host->mmc);
 		if (ret)
-			goto out_free_irq;
+			goto out_free_dma;
 	}
 
 	init_timer(&host->watchdog);
@@ -1199,20 +1175,17 @@
 
 	return 0;
 
-out_free_irq:
-	free_irq(host->irq, host);
 out_free_dma:
 	if (host->dma)
 		dma_release_channel(host->dma);
+
 out_clk_put:
 	clk_disable_unprepare(host->clk_per);
 	clk_disable_unprepare(host->clk_ipg);
-out_iounmap:
-	iounmap(host->base);
+
 out_free:
 	mmc_free_host(mmc);
-out_release_mem:
-	release_mem_region(iores->start, resource_size(iores));
+
 	return ret;
 }
 
@@ -1223,30 +1196,21 @@
 
 	mmc_remove_host(mmc);
 
-	if (host->vcc)
-		regulator_put(host->vcc);
-
 	if (host->pdata && host->pdata->exit)
 		host->pdata->exit(&pdev->dev, mmc);
 
-	free_irq(host->irq, host);
-	iounmap(host->base);
-
 	if (host->dma)
 		dma_release_channel(host->dma);
 
 	clk_disable_unprepare(host->clk_per);
 	clk_disable_unprepare(host->clk_ipg);
 
-	release_mem_region(host->res->start, resource_size(host->res));
-
 	mmc_free_host(mmc);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int mxcmci_suspend(struct device *dev)
+static int __maybe_unused mxcmci_suspend(struct device *dev)
 {
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct mxcmci_host *host = mmc_priv(mmc);
@@ -1256,7 +1220,7 @@
 	return 0;
 }
 
-static int mxcmci_resume(struct device *dev)
+static int __maybe_unused mxcmci_resume(struct device *dev)
 {
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct mxcmci_host *host = mmc_priv(mmc);
@@ -1266,11 +1230,7 @@
 	return 0;
 }
 
-static const struct dev_pm_ops mxcmci_pm_ops = {
-	.suspend	= mxcmci_suspend,
-	.resume		= mxcmci_resume,
-};
-#endif
+static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
 
 static struct platform_driver mxcmci_driver = {
 	.probe		= mxcmci_probe,
@@ -1279,9 +1239,7 @@
 	.driver		= {
 		.name		= DRIVER_NAME,
 		.owner		= THIS_MODULE,
-#ifdef CONFIG_PM
 		.pm	= &mxcmci_pm_ops,
-#endif
 		.of_match_table	= mxcmci_of_match,
 	}
 };

diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 073e871..babfea0 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c

@@ -70,6 +70,7 @@
 	unsigned char			bus_width;
 	spinlock_t			lock;
 	int				sdio_irq_en;
+	bool				broken_cd;
 };
 
 static int mxs_mmc_get_cd(struct mmc_host *mmc)
@@ -78,6 +79,9 @@
 	struct mxs_ssp *ssp = &host->ssp;
 	int present, ret;
 
+	if (host->broken_cd)
+		return -ENOSYS;
+
 	ret = mmc_gpio_get_cd(mmc);
 	if (ret >= 0)
 		return ret;
@@ -568,6 +572,7 @@
 {
 	const struct of_device_id *of_id =
 			of_match_device(mxs_mmc_dt_ids, &pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
 	struct mxs_mmc_host *host;
 	struct mmc_host *mmc;
 	struct resource *iores;
@@ -634,6 +639,8 @@
 	mmc->caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
 		    MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL;
 
+	host->broken_cd = of_property_read_bool(np, "broken-cd");
+
 	mmc->f_min = 400000;
 	mmc->f_max = 288000000;
 

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 5c2e58b..81974ec 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c

@@ -177,7 +177,7 @@
 	unsigned long tick_ns;
 
 	if (slot != NULL && slot->host->fclk_enabled && slot->fclk_freq > 0) {
-		tick_ns = (1000000000 + slot->fclk_freq - 1) / slot->fclk_freq;
+		tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, slot->fclk_freq);
 		ndelay(8 * tick_ns);
 	}
 }
@@ -435,7 +435,7 @@
 	struct mmc_data *data = host->stop_data;
 	unsigned long tick_ns;
 
-	tick_ns = (1000000000 + slot->fclk_freq - 1)/slot->fclk_freq;
+	tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, slot->fclk_freq);
 	ndelay(8*tick_ns);
 
 	mmc_omap_start_command(host, data->stop);
@@ -477,7 +477,7 @@
 	u16 stat = 0;
 
 	/* Sending abort takes 80 clocks. Have some extra and round up */
-	timeout = (120*1000000 + slot->fclk_freq - 1)/slot->fclk_freq;
+	timeout = DIV_ROUND_UP(120 * USEC_PER_SEC, slot->fclk_freq);
 	restarts = 0;
 	while (restarts < maxloops) {
 		OMAP_MMC_WRITE(host, STAT, 0xFFFF);
@@ -677,8 +677,8 @@
 	if (n > host->buffer_bytes_left)
 		n = host->buffer_bytes_left;
 
-	nwords = n / 2;
-	nwords += n & 1; /* handle odd number of bytes to transfer */
+	/* Round up to handle odd number of bytes to transfer */
+	nwords = DIV_ROUND_UP(n, 2);
 
 	host->buffer_bytes_left -= n;
 	host->total_bytes_left -= n;

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e91ee21..6b7b755 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c

@@ -31,7 +31,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
-#include <linux/omap-dma.h>
+#include <linux/omap-dmaengine.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
@@ -582,7 +582,7 @@
 	 *	- MMC/SD clock coming out of controller > 25MHz
 	 */
 	if ((mmc_slot(host).features & HSMMC_HAS_HSPE_SUPPORT) &&
-	    (ios->timing != MMC_TIMING_UHS_DDR50) &&
+	    (ios->timing != MMC_TIMING_MMC_DDR52) &&
 	    ((OMAP_HSMMC_READ(host->base, CAPA) & HSS) == HSS)) {
 		regval = OMAP_HSMMC_READ(host->base, HCTL);
 		if (clkdiv && (clk_get_rate(host->fclk)/clkdiv) > 25000000)
@@ -602,7 +602,7 @@
 	u32 con;
 
 	con = OMAP_HSMMC_READ(host->base, CON);
-	if (ios->timing == MMC_TIMING_UHS_DDR50)
+	if (ios->timing == MMC_TIMING_MMC_DDR52)
 		con |= DDR;	/* configure in DDR mode */
 	else
 		con &= ~DDR;
@@ -920,16 +920,17 @@
 static void
 omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd)
 {
-	host->cmd = NULL;
-
 	if (host->mrq->sbc && (host->cmd == host->mrq->sbc) &&
 	    !host->mrq->sbc->error && !(host->flags & AUTO_CMD23)) {
+		host->cmd = NULL;
 		omap_hsmmc_start_dma_transfer(host);
 		omap_hsmmc_start_command(host, host->mrq->cmd,
 						host->mrq->data);
 		return;
 	}
 
+	host->cmd = NULL;
+
 	if (cmd->flags & MMC_RSP_PRESENT) {
 		if (cmd->flags & MMC_RSP_136) {
 			/* response type 2 */
@@ -1851,6 +1852,7 @@
 	unsigned tx_req, rx_req;
 	struct pinctrl *pinctrl;
 	const struct omap_mmc_of_data *data;
+	void __iomem *base;
 
 	match = of_match_device(of_match_ptr(omap_mmc_of_match), &pdev->dev);
 	if (match) {
@@ -1881,9 +1883,9 @@
 	if (res == NULL || irq < 0)
 		return -ENXIO;
 
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL)
-		return -EBUSY;
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	ret = omap_hsmmc_gpio_init(pdata);
 	if (ret)
@@ -1904,7 +1906,7 @@
 	host->irq	= irq;
 	host->slot_id	= 0;
 	host->mapbase	= res->start + pdata->reg_offset;
-	host->base	= ioremap(host->mapbase, SZ_4K);
+	host->base	= base + pdata->reg_offset;
 	host->power_mode = MMC_POWER_OFF;
 	host->next_data.cookie = 1;
 	host->pbias_enabled = 0;
@@ -1922,7 +1924,7 @@
 
 	spin_lock_init(&host->irq_lock);
 
-	host->fclk = clk_get(&pdev->dev, "fck");
+	host->fclk = devm_clk_get(&pdev->dev, "fck");
 	if (IS_ERR(host->fclk)) {
 		ret = PTR_ERR(host->fclk);
 		host->fclk = NULL;
@@ -1941,7 +1943,7 @@
 
 	omap_hsmmc_context_save(host);
 
-	host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
+	host->dbclk = devm_clk_get(&pdev->dev, "mmchsdb_fck");
 	/*
 	 * MMC can still work without debounce clock.
 	 */
@@ -1949,7 +1951,6 @@
 		host->dbclk = NULL;
 	} else if (clk_prepare_enable(host->dbclk) != 0) {
 		dev_warn(mmc_dev(host->mmc), "Failed to enable debounce clk\n");
-		clk_put(host->dbclk);
 		host->dbclk = NULL;
 	}
 
@@ -2018,7 +2019,7 @@
 	}
 
 	/* Request IRQ for MMC operations */
-	ret = request_irq(host->irq, omap_hsmmc_irq, 0,
+	ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0,
 			mmc_hostname(mmc), host);
 	if (ret) {
 		dev_err(mmc_dev(host->mmc), "Unable to grab HSMMC IRQ\n");
@@ -2029,7 +2030,7 @@
 		if (pdata->init(&pdev->dev) != 0) {
 			dev_err(mmc_dev(host->mmc),
 				"Unable to configure MMC IRQs\n");
-			goto err_irq_cd_init;
+			goto err_irq;
 		}
 	}
 
@@ -2044,9 +2045,9 @@
 
 	/* Request IRQ for card detect */
 	if ((mmc_slot(host).card_detect_irq)) {
-		ret = request_threaded_irq(mmc_slot(host).card_detect_irq,
-					   NULL,
-					   omap_hsmmc_detect,
+		ret = devm_request_threaded_irq(&pdev->dev,
+						mmc_slot(host).card_detect_irq,
+						NULL, omap_hsmmc_detect,
 					   IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					   mmc_hostname(mmc), host);
 		if (ret) {
@@ -2089,15 +2090,12 @@
 
 err_slot_name:
 	mmc_remove_host(mmc);
-	free_irq(mmc_slot(host).card_detect_irq, host);
 err_irq_cd:
 	if (host->use_reg)
 		omap_hsmmc_reg_put(host);
 err_reg:
 	if (host->pdata->cleanup)
 		host->pdata->cleanup(&pdev->dev);
-err_irq_cd_init:
-	free_irq(host->irq, host);
 err_irq:
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);
@@ -2105,27 +2103,19 @@
 		dma_release_channel(host->rx_chan);
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
-	clk_put(host->fclk);
-	if (host->dbclk) {
+	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
-		clk_put(host->dbclk);
-	}
 err1:
-	iounmap(host->base);
 	mmc_free_host(mmc);
 err_alloc:
 	omap_hsmmc_gpio_free(pdata);
 err:
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
 	return ret;
 }
 
 static int omap_hsmmc_remove(struct platform_device *pdev)
 {
 	struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
-	struct resource *res;
 
 	pm_runtime_get_sync(host->dev);
 	mmc_remove_host(host->mmc);
@@ -2133,9 +2123,6 @@
 		omap_hsmmc_reg_put(host);
 	if (host->pdata->cleanup)
 		host->pdata->cleanup(&pdev->dev);
-	free_irq(host->irq, host);
-	if (mmc_slot(host).card_detect_irq)
-		free_irq(mmc_slot(host).card_detect_irq, host);
 
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);
@@ -2144,20 +2131,12 @@
 
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
-	clk_put(host->fclk);
-	if (host->dbclk) {
+	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
-		clk_put(host->dbclk);
-	}
 
 	omap_hsmmc_gpio_free(host->pdata);
-	iounmap(host->base);
 	mmc_free_host(host->mmc);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
 	return 0;
 }
 

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 0b9ded1..0d51964 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c

@@ -236,6 +236,9 @@
 	case MMC_RSP_R1:
 		rsp_type = SD_RSP_TYPE_R1;
 		break;
+	case MMC_RSP_R1 & ~MMC_RSP_CRC:
+		rsp_type = SD_RSP_TYPE_R1 | SD_NO_CHECK_CRC7;
+		break;
 	case MMC_RSP_R1B:
 		rsp_type = SD_RSP_TYPE_R1b;
 		break;
@@ -816,6 +819,7 @@
 		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, CLK_LOW_FREQ, 0);
 		break;
 
+	case MMC_TIMING_MMC_DDR52:
 	case MMC_TIMING_UHS_DDR50:
 		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_CFG1,
 				0x0C | SD_ASYNC_FIFO_NOT_RST,
@@ -896,6 +900,7 @@
 		host->vpclk = true;
 		host->double_clk = false;
 		break;
+	case MMC_TIMING_MMC_DDR52:
 	case MMC_TIMING_UHS_DDR50:
 	case MMC_TIMING_UHS_SDR25:
 		host->ssc_depth = RTSX_SSC_DEPTH_1M;

diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index e11fafa..5d3766e 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c

@@ -34,7 +34,8 @@
 #include <linux/mfd/rtsx_usb.h>
 #include <asm/unaligned.h>
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \
+		defined(CONFIG_MMC_REALTEK_USB_MODULE))
 #include <linux/leds.h>
 #include <linux/workqueue.h>
 #define RTSX_USB_USE_LEDS_CLASS
@@ -59,7 +60,7 @@
 
 	unsigned char		power_mode;
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef RTSX_USB_USE_LEDS_CLASS
 	struct led_classdev	led;
 	char			led_name[32];
 	struct work_struct	led_work;

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index ebb3f39..8ce3c28 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c

@@ -102,11 +102,19 @@
 }
 
 static const struct sdhci_ops sdhci_acpi_ops_dflt = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static const struct sdhci_ops sdhci_acpi_ops_int = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 	.hw_reset   = sdhci_acpi_int_hw_reset,
 };
 

diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 6f166e6..dd780c3 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c

@@ -206,9 +206,13 @@
 }
 
 static struct sdhci_ops sdhci_bcm_kona_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_bcm_kona_get_max_clk,
 	.get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
 	.platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 	.card_event = sdhci_bcm_kona_card_event,
 };
 

diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index f6d8d67..46af9a4 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c

@@ -131,8 +131,12 @@
 	.read_l = bcm2835_sdhci_readl,
 	.read_w = bcm2835_sdhci_readw,
 	.read_b = bcm2835_sdhci_readb,
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_min_clock = bcm2835_sdhci_get_min_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data bcm2835_sdhci_pdata = {

diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index f2cc266..14b7407 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c

@@ -30,13 +30,12 @@
 	u16 clk;
 	unsigned long timeout;
 
-	if (clock == host->clock)
-		return;
+	host->mmc->actual_clock = 0;
 
 	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
 
 	if (clock == 0)
-		goto out;
+		return;
 
 	while (host->max_clk / div > clock) {
 		/*
@@ -75,13 +74,14 @@
 
 	clk |= SDHCI_CLOCK_CARD_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
-out:
-	host->clock = clock;
 }
 
 static const struct sdhci_ops sdhci_cns3xxx_ops = {
 	.get_max_clock	= sdhci_cns3xxx_get_max_clk,
 	.set_clock	= sdhci_cns3xxx_set_clock,
+	.set_bus_width	= sdhci_set_bus_width,
+	.reset          = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
@@ -90,8 +90,7 @@
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
 		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
-		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
-		  SDHCI_QUIRK_NONSTANDARD_CLOCK,
+		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
 static int sdhci_cns3xxx_probe(struct platform_device *pdev)

diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 736d7a2..e6278ec 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c

@@ -21,28 +21,17 @@
 
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/mmc/host.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
 #include "sdhci-pltfm.h"
 
 struct sdhci_dove_priv {
 	struct clk *clk;
-	int gpio_cd;
 };
 
-static irqreturn_t sdhci_dove_carddetect_irq(int irq, void *data)
-{
-	struct sdhci_host *host = data;
-
-	tasklet_schedule(&host->card_tasklet);
-	return IRQ_HANDLED;
-}
-
 static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
 {
 	u16 ret;
@@ -60,8 +49,6 @@
 
 static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
 {
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_dove_priv *priv = pltfm_host->priv;
 	u32 ret;
 
 	ret = readl(host->ioaddr + reg);
@@ -71,14 +58,6 @@
 		/* Mask the support for 3.0V */
 		ret &= ~SDHCI_CAN_VDD_300;
 		break;
-	case SDHCI_PRESENT_STATE:
-		if (gpio_is_valid(priv->gpio_cd)) {
-			if (gpio_get_value(priv->gpio_cd) == 0)
-				ret |= SDHCI_CARD_PRESENT;
-			else
-				ret &= ~SDHCI_CARD_PRESENT;
-		}
-		break;
 	}
 	return ret;
 }
@@ -86,6 +65,10 @@
 static const struct sdhci_ops sdhci_dove_ops = {
 	.read_w	= sdhci_dove_readw,
 	.read_l	= sdhci_dove_readl,
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data sdhci_dove_pdata = {
@@ -113,28 +96,9 @@
 
 	priv->clk = devm_clk_get(&pdev->dev, NULL);
 
-	if (pdev->dev.of_node) {
-		priv->gpio_cd = of_get_named_gpio(pdev->dev.of_node,
-						  "cd-gpios", 0);
-	} else {
-		priv->gpio_cd = -EINVAL;
-	}
-
-	if (gpio_is_valid(priv->gpio_cd)) {
-		ret = gpio_request(priv->gpio_cd, "sdhci-cd");
-		if (ret) {
-			dev_err(&pdev->dev, "card detect gpio request failed: %d\n",
-				ret);
-			return ret;
-		}
-		gpio_direction_input(priv->gpio_cd);
-	}
-
 	host = sdhci_pltfm_init(pdev, &sdhci_dove_pdata, 0);
-	if (IS_ERR(host)) {
-		ret = PTR_ERR(host);
-		goto err_sdhci_pltfm_init;
-	}
+	if (IS_ERR(host))
+		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
 	pltfm_host->priv = priv;
@@ -142,39 +106,20 @@
 	if (!IS_ERR(priv->clk))
 		clk_prepare_enable(priv->clk);
 
-	sdhci_get_of_property(pdev);
+	ret = mmc_of_parse(host->mmc);
+	if (ret)
+		goto err_sdhci_add;
 
 	ret = sdhci_add_host(host);
 	if (ret)
 		goto err_sdhci_add;
 
-	/*
-	 * We must request the IRQ after sdhci_add_host(), as the tasklet only
-	 * gets setup in sdhci_add_host() and we oops.
-	 */
-	if (gpio_is_valid(priv->gpio_cd)) {
-		ret = request_irq(gpio_to_irq(priv->gpio_cd),
-				  sdhci_dove_carddetect_irq,
-				  IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				  mmc_hostname(host->mmc), host);
-		if (ret) {
-			dev_err(&pdev->dev, "card detect irq request failed: %d\n",
-				ret);
-			goto err_request_irq;
-		}
-	}
-
 	return 0;
 
-err_request_irq:
-	sdhci_remove_host(host, 0);
 err_sdhci_add:
 	if (!IS_ERR(priv->clk))
 		clk_disable_unprepare(priv->clk);
 	sdhci_pltfm_free(pdev);
-err_sdhci_pltfm_init:
-	if (gpio_is_valid(priv->gpio_cd))
-		gpio_free(priv->gpio_cd);
 	return ret;
 }
 
@@ -186,11 +131,6 @@
 
 	sdhci_pltfm_unregister(pdev);
 
-	if (gpio_is_valid(priv->gpio_cd)) {
-		free_irq(gpio_to_irq(priv->gpio_cd), host);
-		gpio_free(priv->gpio_cd);
-	}
-
 	if (!IS_ERR(priv->clk))
 		clk_disable_unprepare(priv->clk);
 

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b841bb7..ccec0e3 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c

@@ -160,7 +160,6 @@
 		MULTIBLK_IN_PROCESS, /* exact multiblock cmd in process */
 		WAIT_FOR_INT,        /* sent CMD12, waiting for response INT */
 	} multiblock_status;
-	u32 uhs_mode;
 	u32 is_ddr;
 };
 
@@ -382,7 +381,6 @@
 		if (val & ESDHC_MIX_CTRL_SMPCLK_SEL)
 			ret |= SDHCI_CTRL_TUNED_CLK;
 
-		ret |= (imx_data->uhs_mode & SDHCI_CTRL_UHS_MASK);
 		ret &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
 
 		return ret;
@@ -429,7 +427,6 @@
 		else
 			new_val &= ~ESDHC_VENDOR_SPEC_VSELECT;
 		writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
-		imx_data->uhs_mode = val & SDHCI_CTRL_UHS_MASK;
 		if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) {
 			new_val = readl(host->ioaddr + ESDHC_MIX_CTRL);
 			if (val & SDHCI_CTRL_TUNED_CLK)
@@ -600,12 +597,14 @@
 	u32 temp, val;
 
 	if (clock == 0) {
+		host->mmc->actual_clock = 0;
+
 		if (esdhc_is_usdhc(imx_data)) {
 			val = readl(host->ioaddr + ESDHC_VENDOR_SPEC);
 			writel(val & ~ESDHC_VENDOR_SPEC_FRC_SDCLK_ON,
 					host->ioaddr + ESDHC_VENDOR_SPEC);
 		}
-		goto out;
+		return;
 	}
 
 	if (esdhc_is_usdhc(imx_data) && !imx_data->is_ddr)
@@ -645,8 +644,6 @@
 	}
 
 	mdelay(1);
-out:
-	host->clock = clock;
 }
 
 static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
@@ -668,7 +665,7 @@
 	return -ENOSYS;
 }
 
-static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
+static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
 {
 	u32 ctrl;
 
@@ -686,8 +683,6 @@
 
 	esdhc_clrset_le(host, ESDHC_CTRL_BUSWIDTH_MASK, ctrl,
 			SDHCI_HOST_CONTROL);
-
-	return 0;
 }
 
 static void esdhc_prepare_tuning(struct sdhci_host *host, u32 val)
@@ -697,6 +692,7 @@
 	/* FIXME: delay a bit for card to be ready for next tuning due to errors */
 	mdelay(1);
 
+	/* This is balanced by the runtime put in sdhci_tasklet_finish */
 	pm_runtime_get_sync(host->mmc->parent);
 	reg = readl(host->ioaddr + ESDHC_MIX_CTRL);
 	reg |= ESDHC_MIX_CTRL_EXE_TUNE | ESDHC_MIX_CTRL_SMPCLK_SEL |
@@ -713,13 +709,12 @@
 	complete(&mrq->completion);
 }
 
-static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode)
+static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode,
+				 struct scatterlist *sg)
 {
 	struct mmc_command cmd = {0};
 	struct mmc_request mrq = {NULL};
 	struct mmc_data data = {0};
-	struct scatterlist sg;
-	char tuning_pattern[ESDHC_TUNING_BLOCK_PATTERN_LEN];
 
 	cmd.opcode = opcode;
 	cmd.arg = 0;
@@ -728,11 +723,9 @@
 	data.blksz = ESDHC_TUNING_BLOCK_PATTERN_LEN;
 	data.blocks = 1;
 	data.flags = MMC_DATA_READ;
-	data.sg = &sg;
+	data.sg = sg;
 	data.sg_len = 1;
 
-	sg_init_one(&sg, tuning_pattern, sizeof(tuning_pattern));
-
 	mrq.cmd = &cmd;
 	mrq.cmd->mrq = &mrq;
 	mrq.data = &data;
@@ -742,14 +735,12 @@
 	mrq.done = esdhc_request_done;
 	init_completion(&(mrq.completion));
 
-	disable_irq(host->irq);
-	spin_lock(&host->lock);
+	spin_lock_irq(&host->lock);
 	host->mrq = &mrq;
 
 	sdhci_send_command(host, mrq.cmd);
 
-	spin_unlock(&host->lock);
-	enable_irq(host->irq);
+	spin_unlock_irq(&host->lock);
 
 	wait_for_completion(&mrq.completion);
 
@@ -772,13 +763,21 @@
 
 static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
 {
+	struct scatterlist sg;
+	char *tuning_pattern;
 	int min, max, avg, ret;
 
+	tuning_pattern = kmalloc(ESDHC_TUNING_BLOCK_PATTERN_LEN, GFP_KERNEL);
+	if (!tuning_pattern)
+		return -ENOMEM;
+
+	sg_init_one(&sg, tuning_pattern, ESDHC_TUNING_BLOCK_PATTERN_LEN);
+
 	/* find the mininum delay first which can pass tuning */
 	min = ESDHC_TUNE_CTRL_MIN;
 	while (min < ESDHC_TUNE_CTRL_MAX) {
 		esdhc_prepare_tuning(host, min);
-		if (!esdhc_send_tuning_cmd(host, opcode))
+		if (!esdhc_send_tuning_cmd(host, opcode, &sg))
 			break;
 		min += ESDHC_TUNE_CTRL_STEP;
 	}
@@ -787,7 +786,7 @@
 	max = min + ESDHC_TUNE_CTRL_STEP;
 	while (max < ESDHC_TUNE_CTRL_MAX) {
 		esdhc_prepare_tuning(host, max);
-		if (esdhc_send_tuning_cmd(host, opcode)) {
+		if (esdhc_send_tuning_cmd(host, opcode, &sg)) {
 			max -= ESDHC_TUNE_CTRL_STEP;
 			break;
 		}
@@ -797,9 +796,11 @@
 	/* use average delay to get the best timing */
 	avg = (min + max) / 2;
 	esdhc_prepare_tuning(host, avg);
-	ret = esdhc_send_tuning_cmd(host, opcode);
+	ret = esdhc_send_tuning_cmd(host, opcode, &sg);
 	esdhc_post_tuning(host);
 
+	kfree(tuning_pattern);
+
 	dev_dbg(mmc_dev(host->mmc), "tunning %s at 0x%x ret %d\n",
 		ret ? "failed" : "passed", avg, ret);
 
@@ -837,28 +838,21 @@
 	return pinctrl_select_state(imx_data->pinctrl, pinctrl);
 }
 
-static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 	struct esdhc_platform_data *boarddata = &imx_data->boarddata;
 
-	switch (uhs) {
+	switch (timing) {
 	case MMC_TIMING_UHS_SDR12:
-		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR12;
-		break;
 	case MMC_TIMING_UHS_SDR25:
-		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR25;
-		break;
 	case MMC_TIMING_UHS_SDR50:
-		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR50;
-		break;
 	case MMC_TIMING_UHS_SDR104:
 	case MMC_TIMING_MMC_HS200:
-		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR104;
 		break;
 	case MMC_TIMING_UHS_DDR50:
-		imx_data->uhs_mode = SDHCI_CTRL_UHS_DDR50;
+	case MMC_TIMING_MMC_DDR52:
 		writel(readl(host->ioaddr + ESDHC_MIX_CTRL) |
 				ESDHC_MIX_CTRL_DDREN,
 				host->ioaddr + ESDHC_MIX_CTRL);
@@ -875,7 +869,15 @@
 		break;
 	}
 
-	return esdhc_change_pinstate(host, uhs);
+	esdhc_change_pinstate(host, timing);
+}
+
+static void esdhc_reset(struct sdhci_host *host, u8 mask)
+{
+	sdhci_reset(host, mask);
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static struct sdhci_ops sdhci_esdhc_ops = {
@@ -888,8 +890,9 @@
 	.get_max_clock = esdhc_pltfm_get_max_clock,
 	.get_min_clock = esdhc_pltfm_get_min_clock,
 	.get_ro = esdhc_pltfm_get_ro,
-	.platform_bus_width = esdhc_pltfm_bus_width,
+	.set_bus_width = esdhc_pltfm_set_bus_width,
 	.set_uhs_signaling = esdhc_set_uhs_signaling,
+	.reset = esdhc_reset,
 };
 
 static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
@@ -1170,8 +1173,10 @@
 
 	ret = sdhci_runtime_suspend_host(host);
 
-	clk_disable_unprepare(imx_data->clk_per);
-	clk_disable_unprepare(imx_data->clk_ipg);
+	if (!sdhci_sdio_irq_enabled(host)) {
+		clk_disable_unprepare(imx_data->clk_per);
+		clk_disable_unprepare(imx_data->clk_ipg);
+	}
 	clk_disable_unprepare(imx_data->clk_ahb);
 
 	return ret;
@@ -1183,8 +1188,10 @@
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 
-	clk_prepare_enable(imx_data->clk_per);
-	clk_prepare_enable(imx_data->clk_ipg);
+	if (!sdhci_sdio_irq_enabled(host)) {
+		clk_prepare_enable(imx_data->clk_per);
+		clk_prepare_enable(imx_data->clk_ipg);
+	}
 	clk_prepare_enable(imx_data->clk_ahb);
 
 	return sdhci_runtime_resume_host(host);

diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index a7d9f95..3497cfa 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h

@@ -20,10 +20,8 @@
 
 #define ESDHC_DEFAULT_QUIRKS	(SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
-				SDHCI_QUIRK_NONSTANDARD_CLOCK | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
-				SDHCI_QUIRK_PIO_NEEDS_DELAY | \
-				SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
+				SDHCI_QUIRK_PIO_NEEDS_DELAY)
 
 #define ESDHC_SYSTEM_CONTROL	0x2c
 #define ESDHC_CLOCK_MASK	0x0000fff0

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index acb0e9e..40573a5 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c

@@ -468,6 +468,10 @@
 
 static struct sdhci_ops sdhci_msm_ops = {
 	.platform_execute_tuning = sdhci_msm_execute_tuning,
+	.reset = sdhci_reset,
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static int sdhci_msm_probe(struct platform_device *pdev)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index f7c7cf6..5bd1092 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c

@@ -52,8 +52,12 @@
 }
 
 static struct sdhci_ops sdhci_arasan_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_timeout_clock = sdhci_arasan_get_timeout_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static struct sdhci_pltfm_data sdhci_arasan_pdata = {

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 0b24997..8be4dcf 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c

@@ -199,13 +199,14 @@
 
 static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 {
-
 	int pre_div = 2;
 	int div = 1;
 	u32 temp;
 
+	host->mmc->actual_clock = 0;
+
 	if (clock == 0)
-		goto out;
+		return;
 
 	/* Workaround to reduce the clock frequency for p1010 esdhc */
 	if (of_find_compatible_node(NULL, NULL, "fsl,p1010-esdhc")) {
@@ -238,24 +239,8 @@
 		| (pre_div << ESDHC_PREDIV_SHIFT));
 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
 	mdelay(1);
-out:
-	host->clock = clock;
 }
 
-#ifdef CONFIG_PM
-static u32 esdhc_proctl;
-static void esdhc_of_suspend(struct sdhci_host *host)
-{
-	esdhc_proctl = sdhci_be32bs_readl(host, SDHCI_HOST_CONTROL);
-}
-
-static void esdhc_of_resume(struct sdhci_host *host)
-{
-	esdhc_of_enable_dma(host);
-	sdhci_be32bs_writel(host, esdhc_proctl, SDHCI_HOST_CONTROL);
-}
-#endif
-
 static void esdhc_of_platform_init(struct sdhci_host *host)
 {
 	u32 vvn;
@@ -269,7 +254,7 @@
 		host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ;
 }
 
-static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
+static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
 {
 	u32 ctrl;
 
@@ -289,8 +274,6 @@
 
 	clrsetbits_be32(host->ioaddr + SDHCI_HOST_CONTROL,
 			ESDHC_CTRL_BUSWIDTH_MASK, ctrl);
-
-	return 0;
 }
 
 static const struct sdhci_ops sdhci_esdhc_ops = {
@@ -305,14 +288,47 @@
 	.get_max_clock = esdhc_of_get_max_clock,
 	.get_min_clock = esdhc_of_get_min_clock,
 	.platform_init = esdhc_of_platform_init,
-#ifdef CONFIG_PM
-	.platform_suspend = esdhc_of_suspend,
-	.platform_resume = esdhc_of_resume,
-#endif
 	.adma_workaround = esdhci_of_adma_workaround,
-	.platform_bus_width = esdhc_pltfm_bus_width,
+	.set_bus_width = esdhc_pltfm_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
+#ifdef CONFIG_PM
+
+static u32 esdhc_proctl;
+static int esdhc_of_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+
+	esdhc_proctl = sdhci_be32bs_readl(host, SDHCI_HOST_CONTROL);
+
+	return sdhci_suspend_host(host);
+}
+
+static int esdhc_of_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	int ret = sdhci_resume_host(host);
+
+	if (ret == 0) {
+		/* Isn't this already done by sdhci_resume_host() ? --rmk */
+		esdhc_of_enable_dma(host);
+		sdhci_be32bs_writel(host, esdhc_proctl, SDHCI_HOST_CONTROL);
+	}
+
+	return ret;
+}
+
+static const struct dev_pm_ops esdhc_pmops = {
+	.suspend	= esdhc_of_suspend,
+	.resume		= esdhc_of_resume,
+};
+#define ESDHC_PMOPS (&esdhc_pmops)
+#else
+#define ESDHC_PMOPS NULL
+#endif
+
 static const struct sdhci_pltfm_data sdhci_esdhc_pdata = {
 	/*
 	 * card detection could be handled via GPIO
@@ -374,7 +390,7 @@
 		.name = "sdhci-esdhc",
 		.owner = THIS_MODULE,
 		.of_match_table = sdhci_esdhc_of_match,
-		.pm = SDHCI_PLTFM_PMOPS,
+		.pm = ESDHC_PMOPS,
 	},
 	.probe = sdhci_esdhc_probe,
 	.remove = sdhci_esdhc_remove,

diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 57c514a..b341661 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c

@@ -58,6 +58,10 @@
 	.write_l = sdhci_hlwd_writel,
 	.write_w = sdhci_hlwd_writew,
 	.write_b = sdhci_hlwd_writeb,
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data sdhci_hlwd_pdata = {

diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index f49666b..5670e38 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c

@@ -21,6 +21,45 @@
 #include "sdhci-pci.h"
 #include "sdhci-pci-o2micro.h"
 
+static void o2_pci_set_baseclk(struct sdhci_pci_chip *chip, u32 value)
+{
+	u32 scratch_32;
+	pci_read_config_dword(chip->pdev,
+			      O2_SD_PLL_SETTING, &scratch_32);
+
+	scratch_32 &= 0x0000FFFF;
+	scratch_32 |= value;
+
+	pci_write_config_dword(chip->pdev,
+			       O2_SD_PLL_SETTING, scratch_32);
+}
+
+static void o2_pci_led_enable(struct sdhci_pci_chip *chip)
+{
+	int ret;
+	u32 scratch_32;
+
+	/* Set led of SD host function enable */
+	ret = pci_read_config_dword(chip->pdev,
+				    O2_SD_FUNC_REG0, &scratch_32);
+	if (ret)
+		return;
+
+	scratch_32 &= ~O2_SD_FREG0_LEDOFF;
+	pci_write_config_dword(chip->pdev,
+			       O2_SD_FUNC_REG0, scratch_32);
+
+	ret = pci_read_config_dword(chip->pdev,
+				    O2_SD_TEST_REG, &scratch_32);
+	if (ret)
+		return;
+
+	scratch_32 |= O2_SD_LED_ENABLE;
+	pci_write_config_dword(chip->pdev,
+			       O2_SD_TEST_REG, scratch_32);
+
+}
+
 void sdhci_pci_o2_fujin2_pci_init(struct sdhci_pci_chip *chip)
 {
 	u32 scratch_32;
@@ -216,6 +255,40 @@
 		scratch &= 0x7f;
 		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
 
+		/* DevId=8520 subId= 0x11 or 0x12  Type Chip support */
+		if (chip->pdev->device == PCI_DEVICE_ID_O2_FUJIN2) {
+			ret = pci_read_config_dword(chip->pdev,
+						    O2_SD_FUNC_REG0,
+						    &scratch_32);
+			scratch_32 = ((scratch_32 & 0xFF000000) >> 24);
+
+			/* Check Whether subId is 0x11 or 0x12 */
+			if ((scratch_32 == 0x11) || (scratch_32 == 0x12)) {
+				scratch_32 = 0x2c280000;
+
+				/* Set Base Clock to 208MZ */
+				o2_pci_set_baseclk(chip, scratch_32);
+				ret = pci_read_config_dword(chip->pdev,
+							    O2_SD_FUNC_REG4,
+							    &scratch_32);
+
+				/* Enable Base Clk setting change */
+				scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET;
+				pci_write_config_dword(chip->pdev,
+						       O2_SD_FUNC_REG4,
+						       scratch_32);
+
+				/* Set Tuning Window to 4 */
+				pci_write_config_byte(chip->pdev,
+						      O2_SD_TUNING_CTRL, 0x44);
+
+				break;
+			}
+		}
+
+		/* Enable 8520 led function */
+		o2_pci_led_enable(chip);
+
 		/* Set timeout CLK */
 		ret = pci_read_config_dword(chip->pdev,
 					    O2_SD_CLK_SETTING, &scratch_32);
@@ -276,7 +349,7 @@
 		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
 
 		ret = pci_read_config_dword(chip->pdev,
-					    O2_SD_FUNC_REG0, &scratch_32);
+					    O2_SD_PLL_SETTING, &scratch_32);
 
 		if ((scratch_32 & 0xff000000) == 0x01000000) {
 			scratch_32 &= 0x0000FFFF;
@@ -299,6 +372,9 @@
 					       O2_SD_FUNC_REG4, scratch_32);
 		}
 
+		/* Set Tuning Windows to 5 */
+		pci_write_config_byte(chip->pdev,
+				O2_SD_TUNING_CTRL, 0x55);
 		/* Lock WP */
 		ret = pci_read_config_byte(chip->pdev,
 					   O2_SD_LOCK_WP, &scratch);

diff --git a/drivers/mmc/host/sdhci-pci-o2micro.h b/drivers/mmc/host/sdhci-pci-o2micro.h
index dbec4c9..f7ffc90 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.h
+++ b/drivers/mmc/host/sdhci-pci-o2micro.h

@@ -57,6 +57,9 @@
 #define O2_SD_UHS2_L1_CTRL	0x35C
 #define O2_SD_FUNC_REG3		0x3E0
 #define O2_SD_FUNC_REG4		0x3E4
+#define O2_SD_LED_ENABLE	BIT(6)
+#define O2_SD_FREG0_LEDOFF	BIT(13)
+#define O2_SD_FREG4_ENABLE_CLK_SET	BIT(22)
 
 #define O2_SD_VENDOR_SETTING	0x110
 #define O2_SD_VENDOR_SETTING2	0x1C8

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index fdc6121..52c42fc 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c

@@ -1031,7 +1031,7 @@
 	return 0;
 }
 
-static int sdhci_pci_bus_width(struct sdhci_host *host, int width)
+static void sdhci_pci_set_bus_width(struct sdhci_host *host, int width)
 {
 	u8 ctrl;
 
@@ -1052,8 +1052,6 @@
 	}
 
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-
-	return 0;
 }
 
 static void sdhci_pci_gpio_hw_reset(struct sdhci_host *host)
@@ -1080,8 +1078,11 @@
 }
 
 static const struct sdhci_ops sdhci_pci_ops = {
+	.set_clock	= sdhci_set_clock,
 	.enable_dma	= sdhci_pci_enable_dma,
-	.platform_bus_width	= sdhci_pci_bus_width,
+	.set_bus_width	= sdhci_pci_set_bus_width,
+	.reset		= sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
 };
 

diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index bef250e..7e834fb 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c

@@ -45,6 +45,10 @@
 EXPORT_SYMBOL_GPL(sdhci_pltfm_clk_get_max_clock);
 
 static const struct sdhci_ops sdhci_pltfm_ops = {
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 #ifdef CONFIG_OF

diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index d51e061..3c0f3c0 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c

@@ -51,11 +51,13 @@
 #define MMC_CARD		0x1000
 #define MMC_WIDTH		0x0100
 
-static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask)
+static void pxav2_reset(struct sdhci_host *host, u8 mask)
 {
 	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
 	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
 
+	sdhci_reset(host, mask);
+
 	if (mask == SDHCI_RESET_ALL) {
 		u16 tmp = 0;
 
@@ -88,7 +90,7 @@
 	}
 }
 
-static int pxav2_mmc_set_width(struct sdhci_host *host, int width)
+static void pxav2_mmc_set_bus_width(struct sdhci_host *host, int width)
 {
 	u8 ctrl;
 	u16 tmp;
@@ -107,14 +109,14 @@
 	}
 	writew(tmp, host->ioaddr + SD_CE_ATA_2);
 	writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
-
-	return 0;
 }
 
 static const struct sdhci_ops pxav2_sdhci_ops = {
+	.set_clock     = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
-	.platform_reset_exit = pxav2_set_private_registers,
-	.platform_bus_width = pxav2_mmc_set_width,
+	.set_bus_width = pxav2_mmc_set_bus_width,
+	.reset         = pxav2_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 #ifdef CONFIG_OF

diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 2fd73b3..f4f1289 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c

@@ -112,11 +112,13 @@
 	return 0;
 }
 
-static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask)
+static void pxav3_reset(struct sdhci_host *host, u8 mask)
 {
 	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
 	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
 
+	sdhci_reset(host, mask);
+
 	if (mask == SDHCI_RESET_ALL) {
 		/*
 		 * tune timing of read data/command when crc error happen
@@ -184,7 +186,7 @@
 	pxa->power_mode = power_mode;
 }
 
-static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+static void pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 {
 	u16 ctrl_2;
 
@@ -218,15 +220,16 @@
 	dev_dbg(mmc_dev(host->mmc),
 		"%s uhs = %d, ctrl_2 = %04X\n",
 		__func__, uhs, ctrl_2);
-
-	return 0;
 }
 
 static const struct sdhci_ops pxav3_sdhci_ops = {
-	.platform_reset_exit = pxav3_set_private_registers,
+	.set_clock = sdhci_set_clock,
 	.set_uhs_signaling = pxav3_set_uhs_signaling,
 	.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = pxav3_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static struct sdhci_pltfm_data sdhci_pxav3_pdata = {

diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index d61eb5a..fa5954a 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c

@@ -33,9 +33,6 @@
 
 #define MAX_BUS_CLK	(4)
 
-/* Number of gpio's used is max data bus width + command and clock lines */
-#define NUM_GPIOS(x)	(x + 2)
-
 /**
  * struct sdhci_s3c - S3C SDHCI instance
  * @host: The SDHCI host created
@@ -58,6 +55,8 @@
 	struct clk		*clk_io;
 	struct clk		*clk_bus[MAX_BUS_CLK];
 	unsigned long		clk_rates[MAX_BUS_CLK];
+
+	bool			no_divider;
 };
 
 /**
@@ -70,6 +69,7 @@
  */
 struct sdhci_s3c_drv_data {
 	unsigned int	sdhci_quirks;
+	bool		no_divider;
 };
 
 static inline struct sdhci_s3c *to_s3c(struct sdhci_host *host)
@@ -119,7 +119,7 @@
 	 * If controller uses a non-standard clock division, find the best clock
 	 * speed possible with selected clock source and skip the division.
 	 */
-	if (ourhost->host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (ourhost->no_divider) {
 		rate = clk_round_rate(clksrc, wanted);
 		return wanted - rate;
 	}
@@ -161,9 +161,13 @@
 	int src;
 	u32 ctrl;
 
+	host->mmc->actual_clock = 0;
+
 	/* don't bother if the clock is going off. */
-	if (clock == 0)
+	if (clock == 0) {
+		sdhci_set_clock(host, clock);
 		return;
+	}
 
 	for (src = 0; src < MAX_BUS_CLK; src++) {
 		delta = sdhci_s3c_consider_clock(ourhost, src, clock);
@@ -215,6 +219,8 @@
 	if (clock < 25 * 1000000)
 		ctrl |= (S3C_SDHCI_CTRL3_FCSEL3 | S3C_SDHCI_CTRL3_FCSEL2);
 	writel(ctrl, host->ioaddr + S3C_SDHCI_CONTROL3);
+
+	sdhci_set_clock(host, clock);
 }
 
 /**
@@ -295,10 +301,11 @@
 	unsigned long timeout;
 	u16 clk = 0;
 
+	host->mmc->actual_clock = 0;
+
 	/* If the clock is going off, set to 0 at clock control register */
 	if (clock == 0) {
 		sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
-		host->clock = clock;
 		return;
 	}
 
@@ -306,8 +313,6 @@
 
 	clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock);
 
-	host->clock = clock;
-
 	clk = SDHCI_CLOCK_INT_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
@@ -329,14 +334,14 @@
 }
 
 /**
- * sdhci_s3c_platform_bus_width - support 8bit buswidth
+ * sdhci_s3c_set_bus_width - support 8bit buswidth
  * @host: The SDHCI host being queried
  * @width: MMC_BUS_WIDTH_ macro for the bus width being requested
  *
  * We have 8-bit width support but is not a v3 controller.
  * So we add platform_bus_width() and support 8bit width.
  */
-static int sdhci_s3c_platform_bus_width(struct sdhci_host *host, int width)
+static void sdhci_s3c_set_bus_width(struct sdhci_host *host, int width)
 {
 	u8 ctrl;
 
@@ -358,93 +363,23 @@
 	}
 
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-
-	return 0;
 }
 
 static struct sdhci_ops sdhci_s3c_ops = {
 	.get_max_clock		= sdhci_s3c_get_max_clk,
 	.set_clock		= sdhci_s3c_set_clock,
 	.get_min_clock		= sdhci_s3c_get_min_clock,
-	.platform_bus_width	= sdhci_s3c_platform_bus_width,
+	.set_bus_width		= sdhci_s3c_set_bus_width,
+	.reset			= sdhci_reset,
+	.set_uhs_signaling	= sdhci_set_uhs_signaling,
 };
 
-static void sdhci_s3c_notify_change(struct platform_device *dev, int state)
-{
-	struct sdhci_host *host = platform_get_drvdata(dev);
-#ifdef CONFIG_PM_RUNTIME
-	struct sdhci_s3c *sc = sdhci_priv(host);
-#endif
-	unsigned long flags;
-
-	if (host) {
-		spin_lock_irqsave(&host->lock, flags);
-		if (state) {
-			dev_dbg(&dev->dev, "card inserted.\n");
-#ifdef CONFIG_PM_RUNTIME
-			clk_prepare_enable(sc->clk_io);
-#endif
-			host->flags &= ~SDHCI_DEVICE_DEAD;
-			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-		} else {
-			dev_dbg(&dev->dev, "card removed.\n");
-			host->flags |= SDHCI_DEVICE_DEAD;
-			host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-#ifdef CONFIG_PM_RUNTIME
-			clk_disable_unprepare(sc->clk_io);
-#endif
-		}
-		tasklet_schedule(&host->card_tasklet);
-		spin_unlock_irqrestore(&host->lock, flags);
-	}
-}
-
-static irqreturn_t sdhci_s3c_gpio_card_detect_thread(int irq, void *dev_id)
-{
-	struct sdhci_s3c *sc = dev_id;
-	int status = gpio_get_value(sc->ext_cd_gpio);
-	if (sc->pdata->ext_cd_gpio_invert)
-		status = !status;
-	sdhci_s3c_notify_change(sc->pdev, status);
-	return IRQ_HANDLED;
-}
-
-static void sdhci_s3c_setup_card_detect_gpio(struct sdhci_s3c *sc)
-{
-	struct s3c_sdhci_platdata *pdata = sc->pdata;
-	struct device *dev = &sc->pdev->dev;
-
-	if (devm_gpio_request(dev, pdata->ext_cd_gpio, "SDHCI EXT CD") == 0) {
-		sc->ext_cd_gpio = pdata->ext_cd_gpio;
-		sc->ext_cd_irq = gpio_to_irq(pdata->ext_cd_gpio);
-		if (sc->ext_cd_irq &&
-		    request_threaded_irq(sc->ext_cd_irq, NULL,
-					 sdhci_s3c_gpio_card_detect_thread,
-					 IRQF_TRIGGER_RISING |
-					 IRQF_TRIGGER_FALLING |
-					 IRQF_ONESHOT,
-					 dev_name(dev), sc) == 0) {
-			int status = gpio_get_value(sc->ext_cd_gpio);
-			if (pdata->ext_cd_gpio_invert)
-				status = !status;
-			sdhci_s3c_notify_change(sc->pdev, status);
-		} else {
-			dev_warn(dev, "cannot request irq for card detect\n");
-			sc->ext_cd_irq = 0;
-		}
-	} else {
-		dev_err(dev, "cannot request gpio for card detect\n");
-	}
-}
-
 #ifdef CONFIG_OF
 static int sdhci_s3c_parse_dt(struct device *dev,
 		struct sdhci_host *host, struct s3c_sdhci_platdata *pdata)
 {
 	struct device_node *node = dev->of_node;
-	struct sdhci_s3c *ourhost = to_s3c(host);
 	u32 max_width;
-	int gpio;
 
 	/* if the bus-width property is not specified, assume width as 1 */
 	if (of_property_read_u32(node, "bus-width", &max_width))
@@ -462,18 +397,8 @@
 		return 0;
 	}
 
-	gpio = of_get_named_gpio(node, "cd-gpios", 0);
-	if (gpio_is_valid(gpio)) {
-		pdata->cd_type = S3C_SDHCI_CD_GPIO;
-		pdata->ext_cd_gpio = gpio;
-		ourhost->ext_cd_gpio = -1;
-		if (of_get_property(node, "cd-inverted", NULL))
-			pdata->ext_cd_gpio_invert = 1;
+	if (of_get_named_gpio(node, "cd-gpios", 0))
 		return 0;
-	} else if (gpio != -ENOENT) {
-		dev_err(dev, "invalid card detect gpio specified\n");
-		return -EINVAL;
-	}
 
 	/* assuming internal card detect that will be configured by pinctrl */
 	pdata->cd_type = S3C_SDHCI_CD_INTERNAL;
@@ -606,8 +531,10 @@
 	/* Setup quirks for the controller */
 	host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
 	host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT;
-	if (drv_data)
+	if (drv_data) {
 		host->quirks |= drv_data->sdhci_quirks;
+		sc->no_divider = drv_data->no_divider;
+	}
 
 #ifndef CONFIG_MMC_SDHCI_S3C_DMA
 
@@ -656,7 +583,7 @@
 	 * If controller does not have internal clock divider,
 	 * we can use overriding functions instead of default.
 	 */
-	if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (sc->no_divider) {
 		sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
 		sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
 		sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
@@ -674,6 +601,8 @@
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_suspend_ignore_children(&pdev->dev, 1);
 
+	mmc_of_parse(host->mmc);
+
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(dev, "sdhci_add_host() failed\n");
@@ -682,15 +611,6 @@
 		goto err_req_regs;
 	}
 
-	/* The following two methods of card detection might call
-	   sdhci_s3c_notify_change() immediately, so they can be called
-	   only after sdhci_add_host(). Setup errors are ignored. */
-	if (pdata->cd_type == S3C_SDHCI_CD_EXTERNAL && pdata->ext_cd_init)
-		pdata->ext_cd_init(&sdhci_s3c_notify_change);
-	if (pdata->cd_type == S3C_SDHCI_CD_GPIO &&
-	    gpio_is_valid(pdata->ext_cd_gpio))
-		sdhci_s3c_setup_card_detect_gpio(sc);
-
 #ifdef CONFIG_PM_RUNTIME
 	if (pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
 		clk_disable_unprepare(sc->clk_io);
@@ -711,16 +631,12 @@
 {
 	struct sdhci_host *host =  platform_get_drvdata(pdev);
 	struct sdhci_s3c *sc = sdhci_priv(host);
-	struct s3c_sdhci_platdata *pdata = sc->pdata;
-
-	if (pdata->cd_type == S3C_SDHCI_CD_EXTERNAL && pdata->ext_cd_cleanup)
-		pdata->ext_cd_cleanup(&sdhci_s3c_notify_change);
 
 	if (sc->ext_cd_irq)
 		free_irq(sc->ext_cd_irq, sc);
 
 #ifdef CONFIG_PM_RUNTIME
-	if (pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
+	if (sc->pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
 		clk_prepare_enable(sc->clk_io);
 #endif
 	sdhci_remove_host(host, 1);
@@ -797,7 +713,7 @@
 
 #if defined(CONFIG_CPU_EXYNOS4210) || defined(CONFIG_SOC_EXYNOS4212)
 static struct sdhci_s3c_drv_data exynos4_sdhci_drv_data = {
-	.sdhci_quirks = SDHCI_QUIRK_NONSTANDARD_CLOCK,
+	.no_divider = true,
 };
 #define EXYNOS4_SDHCI_DRV_DATA ((kernel_ulong_t)&exynos4_sdhci_drv_data)
 #else

diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 696122c..1700453 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c

@@ -28,7 +28,11 @@
 }
 
 static struct sdhci_ops sdhci_sirf_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock	= sdhci_sirf_get_max_clk,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 static struct sdhci_pltfm_data sdhci_sirf_pdata = {

diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 0316dec..9d535c7 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c

@@ -38,7 +38,10 @@
 
 /* sdhci ops */
 static const struct sdhci_ops sdhci_pltfm_ops = {
-	/* Nothing to do for now. */
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
 #ifdef CONFIG_OF

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index a835898..d93a063 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c

@@ -32,11 +32,17 @@
 
 /* Tegra SDHOST controller vendor register definitions */
 #define SDHCI_TEGRA_VENDOR_MISC_CTRL		0x120
+#define SDHCI_MISC_CTRL_ENABLE_SDR104		0x8
+#define SDHCI_MISC_CTRL_ENABLE_SDR50		0x10
 #define SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300	0x20
+#define SDHCI_MISC_CTRL_ENABLE_DDR50		0x200
 
 #define NVQUIRK_FORCE_SDHCI_SPEC_200	BIT(0)
 #define NVQUIRK_ENABLE_BLOCK_GAP_DET	BIT(1)
 #define NVQUIRK_ENABLE_SDHCI_SPEC_300	BIT(2)
+#define NVQUIRK_DISABLE_SDR50		BIT(3)
+#define NVQUIRK_DISABLE_SDR104		BIT(4)
+#define NVQUIRK_DISABLE_DDR50		BIT(5)
 
 struct sdhci_tegra_soc_data {
 	const struct sdhci_pltfm_data *pdata;
@@ -48,19 +54,6 @@
 	int power_gpio;
 };
 
-static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
-{
-	u32 val;
-
-	if (unlikely(reg == SDHCI_PRESENT_STATE)) {
-		/* Use wp_gpio here instead? */
-		val = readl(host->ioaddr + reg);
-		return val | SDHCI_WRITE_PROTECT;
-	}
-
-	return readl(host->ioaddr + reg);
-}
-
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -108,26 +101,33 @@
 	return mmc_gpio_get_ro(host->mmc);
 }
 
-static void tegra_sdhci_reset_exit(struct sdhci_host *host, u8 mask)
+static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_tegra *tegra_host = pltfm_host->priv;
 	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+	u32 misc_ctrl;
+
+	sdhci_reset(host, mask);
 
 	if (!(mask & SDHCI_RESET_ALL))
 		return;
 
+	misc_ctrl = sdhci_readw(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
 	/* Erratum: Enable SDHCI spec v3.00 support */
-	if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300) {
-		u32 misc_ctrl;
-
-		misc_ctrl = sdhci_readb(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+	if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
 		misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
-		sdhci_writeb(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
-	}
+	/* Don't advertise UHS modes which aren't supported yet */
+	if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR50)
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR50;
+	if (soc_data->nvquirks & NVQUIRK_DISABLE_DDR50)
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_DDR50;
+	if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR104)
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR104;
+	sdhci_writew(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
 }
 
-static int tegra_sdhci_buswidth(struct sdhci_host *host, int bus_width)
+static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
 {
 	u32 ctrl;
 
@@ -144,23 +144,25 @@
 			ctrl &= ~SDHCI_CTRL_4BITBUS;
 	}
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-	return 0;
 }
 
 static const struct sdhci_ops tegra_sdhci_ops = {
 	.get_ro     = tegra_sdhci_get_ro,
-	.read_l     = tegra_sdhci_readl,
 	.read_w     = tegra_sdhci_readw,
 	.write_l    = tegra_sdhci_writel,
-	.platform_bus_width = tegra_sdhci_buswidth,
-	.platform_reset_exit = tegra_sdhci_reset_exit,
+	.set_clock  = sdhci_set_clock,
+	.set_bus_width = tegra_sdhci_set_bus_width,
+	.reset      = tegra_sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
+	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
 	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
 		  SDHCI_QUIRK_NO_HISPD_BIT |
-		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
 	.ops  = &tegra_sdhci_ops,
 };
 
@@ -175,13 +177,16 @@
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
 		  SDHCI_QUIRK_NO_HISPD_BIT |
-		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
 	.ops  = &tegra_sdhci_ops,
 };
 
 static struct sdhci_tegra_soc_data soc_data_tegra30 = {
 	.pdata = &sdhci_tegra30_pdata,
-	.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300,
+	.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
+		    NVQUIRK_DISABLE_SDR50 |
+		    NVQUIRK_DISABLE_SDR104,
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
@@ -189,12 +194,16 @@
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
 		  SDHCI_QUIRK_NO_HISPD_BIT |
-		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
 	.ops  = &tegra_sdhci_ops,
 };
 
 static struct sdhci_tegra_soc_data soc_data_tegra114 = {
 	.pdata = &sdhci_tegra114_pdata,
+	.nvquirks = NVQUIRK_DISABLE_SDR50 |
+		    NVQUIRK_DISABLE_DDR50 |
+		    NVQUIRK_DISABLE_SDR104,
 };
 
 static const struct of_device_id sdhci_tegra_dt_match[] = {

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9a79fc4..47055f3 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c

@@ -44,6 +44,8 @@
 
 #define MAX_TUNING_LOOP 40
 
+#define ADMA_SIZE	((128 * 2 + 1) * 4)
+
 static unsigned int debug_quirks = 0;
 static unsigned int debug_quirks2;
 
@@ -131,43 +133,26 @@
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_clear_set_irqs(struct sdhci_host *host, u32 clear, u32 set)
-{
-	u32 ier;
-
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	ier &= ~clear;
-	ier |= set;
-	sdhci_writel(host, ier, SDHCI_INT_ENABLE);
-	sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
-}
-
-static void sdhci_unmask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, 0, irqs);
-}
-
-static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, irqs, 0);
-}
-
 static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 {
-	u32 present, irqs;
+	u32 present;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
 	    (host->mmc->caps & MMC_CAP_NONREMOVABLE))
 		return;
 
-	present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			      SDHCI_CARD_PRESENT;
-	irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
+	if (enable) {
+		present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+				      SDHCI_CARD_PRESENT;
 
-	if (enable)
-		sdhci_unmask_irqs(host, irqs);
-	else
-		sdhci_mask_irqs(host, irqs);
+		host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+				       SDHCI_INT_CARD_INSERT;
+	} else {
+		host->ier &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT);
+	}
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_enable_card_detection(struct sdhci_host *host)
@@ -180,22 +165,9 @@
 	sdhci_set_card_detection(host, false);
 }
 
-static void sdhci_reset(struct sdhci_host *host, u8 mask)
+void sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	unsigned long timeout;
-	u32 uninitialized_var(ier);
-
-	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
-		if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			SDHCI_CARD_PRESENT))
-			return;
-	}
-
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-
-	if (host->ops->platform_reset_enter)
-		host->ops->platform_reset_enter(host, mask);
 
 	sdhci_writeb(host, mask, SDHCI_SOFTWARE_RESET);
 
@@ -220,16 +192,27 @@
 		timeout--;
 		mdelay(1);
 	}
+}
+EXPORT_SYMBOL_GPL(sdhci_reset);
 
-	if (host->ops->platform_reset_exit)
-		host->ops->platform_reset_exit(host, mask);
+static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
+{
+	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
+		if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
+			SDHCI_CARD_PRESENT))
+			return;
+	}
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier);
+	host->ops->reset(host, mask);
 
-	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
-		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
-			host->ops->enable_dma(host);
+	if (mask & SDHCI_RESET_ALL) {
+		if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
+			if (host->ops->enable_dma)
+				host->ops->enable_dma(host);
+		}
+
+		/* Resetting the controller clears many */
+		host->preset_enabled = false;
 	}
 }
 
@@ -238,15 +221,18 @@
 static void sdhci_init(struct sdhci_host *host, int soft)
 {
 	if (soft)
-		sdhci_reset(host, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
+		sdhci_do_reset(host, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
 	else
-		sdhci_reset(host, SDHCI_RESET_ALL);
+		sdhci_do_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK,
-		SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
-		SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX |
-		SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
-		SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE);
+	host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
+		    SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
+		    SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC |
+		    SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_END |
+		    SDHCI_INT_RESPONSE;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
 	if (soft) {
 		/* force clock reconfiguration */
@@ -502,11 +488,6 @@
 	else
 		direction = DMA_TO_DEVICE;
 
-	/*
-	 * The ADMA descriptor table is mapped further down as we
-	 * need to fill it with data first.
-	 */
-
 	host->align_addr = dma_map_single(mmc_dev(host->mmc),
 		host->align_buffer, 128 * 4, direction);
 	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
@@ -567,7 +548,7 @@
 		 * If this triggers then we have a calculation bug
 		 * somewhere. :/
 		 */
-		WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4);
+		WARN_ON((desc - host->adma_desc) > ADMA_SIZE);
 	}
 
 	if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
@@ -595,17 +576,8 @@
 			host->align_addr, 128 * 4, direction);
 	}
 
-	host->adma_addr = dma_map_single(mmc_dev(host->mmc),
-		host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
-	if (dma_mapping_error(mmc_dev(host->mmc), host->adma_addr))
-		goto unmap_entries;
-	BUG_ON(host->adma_addr & 0x3);
-
 	return 0;
 
-unmap_entries:
-	dma_unmap_sg(mmc_dev(host->mmc), data->sg,
-		data->sg_len, direction);
 unmap_align:
 	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
 		128 * 4, direction);
@@ -623,19 +595,25 @@
 	u8 *align;
 	char *buffer;
 	unsigned long flags;
+	bool has_unaligned;
 
 	if (data->flags & MMC_DATA_READ)
 		direction = DMA_FROM_DEVICE;
 	else
 		direction = DMA_TO_DEVICE;
 
-	dma_unmap_single(mmc_dev(host->mmc), host->adma_addr,
-		(128 * 2 + 1) * 4, DMA_TO_DEVICE);
-
 	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
 		128 * 4, direction);
 
-	if (data->flags & MMC_DATA_READ) {
+	/* Do a quick scan of the SG list for any unaligned mappings */
+	has_unaligned = false;
+	for_each_sg(data->sg, sg, host->sg_count, i)
+		if (sg_dma_address(sg) & 3) {
+			has_unaligned = true;
+			break;
+		}
+
+	if (has_unaligned && data->flags & MMC_DATA_READ) {
 		dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
 			data->sg_len, direction);
 
@@ -721,9 +699,12 @@
 	u32 dma_irqs = SDHCI_INT_DMA_END | SDHCI_INT_ADMA_ERROR;
 
 	if (host->flags & SDHCI_REQ_USE_DMA)
-		sdhci_clear_set_irqs(host, pio_irqs, dma_irqs);
+		host->ier = (host->ier & ~pio_irqs) | dma_irqs;
 	else
-		sdhci_clear_set_irqs(host, dma_irqs, pio_irqs);
+		host->ier = (host->ier & ~dma_irqs) | pio_irqs;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
@@ -976,8 +957,8 @@
 		 * upon error conditions.
 		 */
 		if (data->error) {
-			sdhci_reset(host, SDHCI_RESET_CMD);
-			sdhci_reset(host, SDHCI_RESET_DATA);
+			sdhci_do_reset(host, SDHCI_RESET_CMD);
+			sdhci_do_reset(host, SDHCI_RESET_DATA);
 		}
 
 		sdhci_send_command(host, data->stop);
@@ -1107,24 +1088,23 @@
 
 static u16 sdhci_get_preset_value(struct sdhci_host *host)
 {
-	u16 ctrl, preset = 0;
+	u16 preset = 0;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
-	switch (ctrl & SDHCI_CTRL_UHS_MASK) {
-	case SDHCI_CTRL_UHS_SDR12:
+	switch (host->timing) {
+	case MMC_TIMING_UHS_SDR12:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
 		break;
-	case SDHCI_CTRL_UHS_SDR25:
+	case MMC_TIMING_UHS_SDR25:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25);
 		break;
-	case SDHCI_CTRL_UHS_SDR50:
+	case MMC_TIMING_UHS_SDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50);
 		break;
-	case SDHCI_CTRL_UHS_SDR104:
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
 		break;
-	case SDHCI_CTRL_UHS_DDR50:
+	case MMC_TIMING_UHS_DDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
 		break;
 	default:
@@ -1136,32 +1116,22 @@
 	return preset;
 }
 
-static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
 	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
-	if (clock && clock == host->clock)
-		return;
-
 	host->mmc->actual_clock = 0;
 
-	if (host->ops->set_clock) {
-		host->ops->set_clock(host, clock);
-		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
-			return;
-	}
-
 	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
 
 	if (clock == 0)
-		goto out;
+		return;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
-			SDHCI_CTRL_PRESET_VAL_ENABLE) {
+		if (host->preset_enabled) {
 			u16 pre_val;
 
 			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1247,26 +1217,16 @@
 
 	clk |= SDHCI_CLOCK_CARD_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
-
-out:
-	host->clock = clock;
 }
+EXPORT_SYMBOL_GPL(sdhci_set_clock);
 
-static inline void sdhci_update_clock(struct sdhci_host *host)
-{
-	unsigned int clock;
-
-	clock = host->clock;
-	host->clock = 0;
-	sdhci_set_clock(host, clock);
-}
-
-static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
+static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
+			    unsigned short vdd)
 {
 	u8 pwr = 0;
 
-	if (power != (unsigned short)-1) {
-		switch (1 << power) {
+	if (mode != MMC_POWER_OFF) {
+		switch (1 << vdd) {
 		case MMC_VDD_165_195:
 			pwr = SDHCI_POWER_180;
 			break;
@@ -1284,7 +1244,7 @@
 	}
 
 	if (host->pwr == pwr)
-		return -1;
+		return;
 
 	host->pwr = pwr;
 
@@ -1292,38 +1252,43 @@
 		sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
 		if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
 			sdhci_runtime_pm_bus_off(host);
-		return 0;
-	}
+		vdd = 0;
+	} else {
+		/*
+		 * Spec says that we should clear the power reg before setting
+		 * a new value. Some controllers don't seem to like this though.
+		 */
+		if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
+			sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
 
-	/*
-	 * Spec says that we should clear the power reg before setting
-	 * a new value. Some controllers don't seem to like this though.
-	 */
-	if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
-		sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
+		/*
+		 * At least the Marvell CaFe chip gets confused if we set the
+		 * voltage and set turn on power at the same time, so set the
+		 * voltage first.
+		 */
+		if (host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)
+			sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
 
-	/*
-	 * At least the Marvell CaFe chip gets confused if we set the voltage
-	 * and set turn on power at the same time, so set the voltage first.
-	 */
-	if (host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)
+		pwr |= SDHCI_POWER_ON;
+
 		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
 
-	pwr |= SDHCI_POWER_ON;
+		if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
+			sdhci_runtime_pm_bus_on(host);
 
-	sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+		/*
+		 * Some controllers need an extra 10ms delay of 10ms before
+		 * they can apply clock after applying power
+		 */
+		if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
+			mdelay(10);
+	}
 
-	if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
-		sdhci_runtime_pm_bus_on(host);
-
-	/*
-	 * Some controllers need an extra 10ms delay of 10ms before they
-	 * can apply clock after applying power
-	 */
-	if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
-		mdelay(10);
-
-	return power;
+	if (host->vmmc) {
+		spin_unlock_irq(&host->lock);
+		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
+		spin_lock_irq(&host->lock);
+	}
 }
 
 /*****************************************************************************\
@@ -1427,10 +1392,53 @@
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+void sdhci_set_bus_width(struct sdhci_host *host, int width)
+{
+	u8 ctrl;
+
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+	if (width == MMC_BUS_WIDTH_8) {
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		if (host->version >= SDHCI_SPEC_300)
+			ctrl |= SDHCI_CTRL_8BITBUS;
+	} else {
+		if (host->version >= SDHCI_SPEC_300)
+			ctrl &= ~SDHCI_CTRL_8BITBUS;
+		if (width == MMC_BUS_WIDTH_4)
+			ctrl |= SDHCI_CTRL_4BITBUS;
+		else
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+	}
+	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+}
+EXPORT_SYMBOL_GPL(sdhci_set_bus_width);
+
+void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
+{
+	u16 ctrl_2;
+
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	if ((timing == MMC_TIMING_MMC_HS200) ||
+	    (timing == MMC_TIMING_UHS_SDR104))
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+	else if (timing == MMC_TIMING_UHS_SDR12)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+	else if (timing == MMC_TIMING_UHS_SDR25)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+	else if (timing == MMC_TIMING_UHS_SDR50)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+	else if ((timing == MMC_TIMING_UHS_DDR50) ||
+		 (timing == MMC_TIMING_MMC_DDR52))
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
+
 static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 {
 	unsigned long flags;
-	int vdd_bit = -1;
 	u8 ctrl;
 
 	spin_lock_irqsave(&host->lock, flags);
@@ -1456,45 +1464,17 @@
 		!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN))
 		sdhci_enable_preset_value(host, false);
 
-	sdhci_set_clock(host, ios->clock);
-
-	if (ios->power_mode == MMC_POWER_OFF)
-		vdd_bit = sdhci_set_power(host, -1);
-	else
-		vdd_bit = sdhci_set_power(host, ios->vdd);
-
-	if (host->vmmc && vdd_bit != -1) {
-		spin_unlock_irqrestore(&host->lock, flags);
-		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd_bit);
-		spin_lock_irqsave(&host->lock, flags);
+	if (!ios->clock || ios->clock != host->clock) {
+		host->ops->set_clock(host, ios->clock);
+		host->clock = ios->clock;
 	}
 
+	sdhci_set_power(host, ios->power_mode, ios->vdd);
+
 	if (host->ops->platform_send_init_74_clocks)
 		host->ops->platform_send_init_74_clocks(host, ios->power_mode);
 
-	/*
-	 * If your platform has 8-bit width support but is not a v3 controller,
-	 * or if it requires special setup code, you should implement that in
-	 * platform_bus_width().
-	 */
-	if (host->ops->platform_bus_width) {
-		host->ops->platform_bus_width(host, ios->bus_width);
-	} else {
-		ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
-		if (ios->bus_width == MMC_BUS_WIDTH_8) {
-			ctrl &= ~SDHCI_CTRL_4BITBUS;
-			if (host->version >= SDHCI_SPEC_300)
-				ctrl |= SDHCI_CTRL_8BITBUS;
-		} else {
-			if (host->version >= SDHCI_SPEC_300)
-				ctrl &= ~SDHCI_CTRL_8BITBUS;
-			if (ios->bus_width == MMC_BUS_WIDTH_4)
-				ctrl |= SDHCI_CTRL_4BITBUS;
-			else
-				ctrl &= ~SDHCI_CTRL_4BITBUS;
-		}
-		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-	}
+	host->ops->set_bus_width(host, ios->bus_width);
 
 	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
 
@@ -1510,19 +1490,20 @@
 
 		/* In case of UHS-I modes, set High Speed Enable */
 		if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+		    (ios->timing == MMC_TIMING_MMC_DDR52) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
 		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR25))
 			ctrl |= SDHCI_CTRL_HISPD;
 
-		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		if (!host->preset_enabled) {
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 			/*
 			 * We only need to set Driver Strength if the
 			 * preset value enable is not set.
 			 */
+			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
 			if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
 				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
@@ -1546,7 +1527,7 @@
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
 			/* Re-enable SD Clock */
-			sdhci_update_clock(host);
+			host->ops->set_clock(host, host->clock);
 		}
 
 
@@ -1555,25 +1536,8 @@
 		clk &= ~SDHCI_CLOCK_CARD_EN;
 		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
-		if (host->ops->set_uhs_signaling)
-			host->ops->set_uhs_signaling(host, ios->timing);
-		else {
-			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-			/* Select Bus Speed Mode for host */
-			ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-			if ((ios->timing == MMC_TIMING_MMC_HS200) ||
-			    (ios->timing == MMC_TIMING_UHS_SDR104))
-				ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
-			else if (ios->timing == MMC_TIMING_UHS_SDR12)
-				ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
-			else if (ios->timing == MMC_TIMING_UHS_SDR25)
-				ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
-			else if (ios->timing == MMC_TIMING_UHS_SDR50)
-				ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
-			else if (ios->timing == MMC_TIMING_UHS_DDR50)
-				ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
-			sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
-		}
+		host->ops->set_uhs_signaling(host, ios->timing);
+		host->timing = ios->timing;
 
 		if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
 				((ios->timing == MMC_TIMING_UHS_SDR12) ||
@@ -1590,7 +1554,7 @@
 		}
 
 		/* Re-enable SD Clock */
-		sdhci_update_clock(host);
+		host->ops->set_clock(host, host->clock);
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
@@ -1600,7 +1564,7 @@
 	 * it on each ios seems to solve the problem.
 	 */
 	if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
-		sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+		sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
 
 	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -1709,24 +1673,16 @@
 
 static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 {
-	if (host->flags & SDHCI_DEVICE_DEAD)
-		goto out;
+	if (!(host->flags & SDHCI_DEVICE_DEAD)) {
+		if (enable)
+			host->ier |= SDHCI_INT_CARD_INT;
+		else
+			host->ier &= ~SDHCI_INT_CARD_INT;
 
-	if (enable)
-		host->flags |= SDHCI_SDIO_IRQ_ENABLED;
-	else
-		host->flags &= ~SDHCI_SDIO_IRQ_ENABLED;
-
-	/* SDIO IRQ will be enabled as appropriate in runtime resume */
-	if (host->runtime_suspended)
-		goto out;
-
-	if (enable)
-		sdhci_unmask_irqs(host, SDHCI_INT_CARD_INT);
-	else
-		sdhci_mask_irqs(host, SDHCI_INT_CARD_INT);
-out:
-	mmiowb();
+		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+		mmiowb();
+	}
 }
 
 static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
@@ -1734,9 +1690,18 @@
 	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
 
+	sdhci_runtime_pm_get(host);
+
 	spin_lock_irqsave(&host->lock, flags);
+	if (enable)
+		host->flags |= SDHCI_SDIO_IRQ_ENABLED;
+	else
+		host->flags &= ~SDHCI_SDIO_IRQ_ENABLED;
+
 	sdhci_enable_sdio_irq_nolock(host, enable);
 	spin_unlock_irqrestore(&host->lock, flags);
+
+	sdhci_runtime_pm_put(host);
 }
 
 static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
@@ -1855,22 +1820,15 @@
 
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
-	struct sdhci_host *host;
+	struct sdhci_host *host = mmc_priv(mmc);
 	u16 ctrl;
-	u32 ier;
 	int tuning_loop_counter = MAX_TUNING_LOOP;
-	unsigned long timeout;
 	int err = 0;
-	bool requires_tuning_nonuhs = false;
 	unsigned long flags;
 
-	host = mmc_priv(mmc);
-
 	sdhci_runtime_pm_get(host);
 	spin_lock_irqsave(&host->lock, flags);
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
 	/*
 	 * The Host Controller needs tuning only in case of SDR104 mode
 	 * and for SDR50 mode when Use Tuning for SDR50 is set in the
@@ -1878,15 +1836,18 @@
 	 * If the Host Controller supports the HS200 mode then the
 	 * tuning function has to be executed.
 	 */
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
-	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
-	     host->flags & SDHCI_SDR104_NEEDS_TUNING))
-		requires_tuning_nonuhs = true;
+	switch (host->timing) {
+	case MMC_TIMING_MMC_HS200:
+	case MMC_TIMING_UHS_SDR104:
+		break;
 
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
-	    requires_tuning_nonuhs)
-		ctrl |= SDHCI_CTRL_EXEC_TUNING;
-	else {
+	case MMC_TIMING_UHS_SDR50:
+		if (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
+		    host->flags & SDHCI_SDR104_NEEDS_TUNING)
+			break;
+		/* FALLTHROUGH */
+
+	default:
 		spin_unlock_irqrestore(&host->lock, flags);
 		sdhci_runtime_pm_put(host);
 		return 0;
@@ -1899,6 +1860,8 @@
 		return err;
 	}
 
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 
 	/*
@@ -1911,21 +1874,17 @@
 	 * to make sure we don't hit a controller bug, we _only_
 	 * enable Buffer Read Ready interrupt here.
 	 */
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_INT_ENABLE);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_SIGNAL_ENABLE);
 
 	/*
 	 * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
 	 * of loops reaches 40 times or a timeout of 150ms occurs.
 	 */
-	timeout = 150;
 	do {
 		struct mmc_command cmd = {0};
 		struct mmc_request mrq = {NULL};
 
-		if (!tuning_loop_counter && !timeout)
-			break;
-
 		cmd.opcode = opcode;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -1933,6 +1892,9 @@
 		cmd.data = NULL;
 		cmd.error = 0;
 
+		if (tuning_loop_counter-- == 0)
+			break;
+
 		mrq.cmd = &cmd;
 		host->mrq = &mrq;
 
@@ -1990,26 +1952,25 @@
 		host->tuning_done = 0;
 
 		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-		tuning_loop_counter--;
-		timeout--;
-		mdelay(1);
+
+		/* eMMC spec does not require a delay between tuning cycles */
+		if (opcode == MMC_SEND_TUNING_BLOCK)
+			mdelay(1);
 	} while (ctrl & SDHCI_CTRL_EXEC_TUNING);
 
 	/*
 	 * The Host Driver has exhausted the maximum number of loops allowed,
 	 * so use fixed sampling frequency.
 	 */
-	if (!tuning_loop_counter || !timeout) {
+	if (tuning_loop_counter < 0) {
 		ctrl &= ~SDHCI_CTRL_TUNED_CLK;
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	}
+	if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
+		pr_info(DRIVER_NAME ": Tuning procedure"
+			" failed, falling back to fixed sampling"
+			" clock\n");
 		err = -EIO;
-	} else {
-		if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
-			pr_info(DRIVER_NAME ": Tuning procedure"
-				" failed, falling back to fixed sampling"
-				" clock\n");
-			err = -EIO;
-		}
 	}
 
 out:
@@ -2044,7 +2005,8 @@
 	if (err && (host->flags & SDHCI_USING_RETUNING_TIMER))
 		err = 0;
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 	sdhci_runtime_pm_put(host);
 
@@ -2054,26 +2016,30 @@
 
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
 {
-	u16 ctrl;
-
 	/* Host Controller v3.00 defines preset value registers */
 	if (host->version < SDHCI_SPEC_300)
 		return;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
 	/*
 	 * We only enable or disable Preset Value if they are not already
 	 * enabled or disabled respectively. Otherwise, we bail out.
 	 */
-	if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+	if (host->preset_enabled != enable) {
+		u16 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+		if (enable)
+			ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+		else
+			ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags |= SDHCI_PV_ENABLED;
-	} else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
-		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags &= ~SDHCI_PV_ENABLED;
+
+		if (enable)
+			host->flags |= SDHCI_PV_ENABLED;
+		else
+			host->flags &= ~SDHCI_PV_ENABLED;
+
+		host->preset_enabled = enable;
 	}
 }
 
@@ -2095,8 +2061,8 @@
 		pr_err("%s: Resetting controller.\n",
 			mmc_hostname(host->mmc));
 
-		sdhci_reset(host, SDHCI_RESET_CMD);
-		sdhci_reset(host, SDHCI_RESET_DATA);
+		sdhci_do_reset(host, SDHCI_RESET_CMD);
+		sdhci_do_reset(host, SDHCI_RESET_DATA);
 
 		host->mrq->cmd->error = -ENOMEDIUM;
 		tasklet_schedule(&host->finish_tasklet);
@@ -2124,15 +2090,6 @@
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_tasklet_card(unsigned long param)
-{
-	struct sdhci_host *host = (struct sdhci_host*)param;
-
-	sdhci_card_event(host->mmc);
-
-	mmc_detect_change(host->mmc, msecs_to_jiffies(200));
-}
-
 static void sdhci_tasklet_finish(unsigned long param)
 {
 	struct sdhci_host *host;
@@ -2169,12 +2126,12 @@
 		/* Some controllers need this kick or reset won't work here */
 		if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
 			/* This is to force an update */
-			sdhci_update_clock(host);
+			host->ops->set_clock(host, host->clock);
 
 		/* Spec says we should do both at the same time, but Ricoh
 		   controllers do not like that. */
-		sdhci_reset(host, SDHCI_RESET_CMD);
-		sdhci_reset(host, SDHCI_RESET_DATA);
+		sdhci_do_reset(host, SDHCI_RESET_CMD);
+		sdhci_do_reset(host, SDHCI_RESET_DATA);
 	}
 
 	host->mrq = NULL;
@@ -2424,101 +2381,94 @@
 
 static irqreturn_t sdhci_irq(int irq, void *dev_id)
 {
-	irqreturn_t result;
+	irqreturn_t result = IRQ_NONE;
 	struct sdhci_host *host = dev_id;
-	u32 intmask, unexpected = 0;
-	int cardint = 0, max_loops = 16;
+	u32 intmask, mask, unexpected = 0;
+	int max_loops = 16;
 
 	spin_lock(&host->lock);
 
-	if (host->runtime_suspended) {
+	if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
 		spin_unlock(&host->lock);
 		return IRQ_NONE;
 	}
 
 	intmask = sdhci_readl(host, SDHCI_INT_STATUS);
-
 	if (!intmask || intmask == 0xffffffff) {
 		result = IRQ_NONE;
 		goto out;
 	}
 
-again:
-	DBG("*** %s got interrupt: 0x%08x\n",
-		mmc_hostname(host->mmc), intmask);
+	do {
+		/* Clear selected interrupts. */
+		mask = intmask & (SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
+				  SDHCI_INT_BUS_POWER);
+		sdhci_writel(host, mask, SDHCI_INT_STATUS);
 
-	if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
-		u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			      SDHCI_CARD_PRESENT;
+		DBG("*** %s got interrupt: 0x%08x\n",
+			mmc_hostname(host->mmc), intmask);
 
-		/*
-		 * There is a observation on i.mx esdhc.  INSERT bit will be
-		 * immediately set again when it gets cleared, if a card is
-		 * inserted.  We have to mask the irq to prevent interrupt
-		 * storm which will freeze the system.  And the REMOVE gets
-		 * the same situation.
-		 *
-		 * More testing are needed here to ensure it works for other
-		 * platforms though.
-		 */
-		sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
-						SDHCI_INT_CARD_REMOVE);
-		sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
-						  SDHCI_INT_CARD_INSERT);
+		if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+			u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+				      SDHCI_CARD_PRESENT;
 
-		sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
-			     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
-		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
-		tasklet_schedule(&host->card_tasklet);
-	}
+			/*
+			 * There is a observation on i.mx esdhc.  INSERT
+			 * bit will be immediately set again when it gets
+			 * cleared, if a card is inserted.  We have to mask
+			 * the irq to prevent interrupt storm which will
+			 * freeze the system.  And the REMOVE gets the
+			 * same situation.
+			 *
+			 * More testing are needed here to ensure it works
+			 * for other platforms though.
+			 */
+			host->ier &= ~(SDHCI_INT_CARD_INSERT |
+				       SDHCI_INT_CARD_REMOVE);
+			host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+					       SDHCI_INT_CARD_INSERT;
+			sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+			sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
-	if (intmask & SDHCI_INT_CMD_MASK) {
-		sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK,
-			SDHCI_INT_STATUS);
-		sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
-	}
+			sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
+				     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
 
-	if (intmask & SDHCI_INT_DATA_MASK) {
-		sdhci_writel(host, intmask & SDHCI_INT_DATA_MASK,
-			SDHCI_INT_STATUS);
-		sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
-	}
+			host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
+						       SDHCI_INT_CARD_REMOVE);
+			result = IRQ_WAKE_THREAD;
+		}
 
-	intmask &= ~(SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK);
+		if (intmask & SDHCI_INT_CMD_MASK)
+			sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
 
-	intmask &= ~SDHCI_INT_ERROR;
+		if (intmask & SDHCI_INT_DATA_MASK)
+			sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
 
-	if (intmask & SDHCI_INT_BUS_POWER) {
-		pr_err("%s: Card is consuming too much power!\n",
-			mmc_hostname(host->mmc));
-		sdhci_writel(host, SDHCI_INT_BUS_POWER, SDHCI_INT_STATUS);
-	}
+		if (intmask & SDHCI_INT_BUS_POWER)
+			pr_err("%s: Card is consuming too much power!\n",
+				mmc_hostname(host->mmc));
 
-	intmask &= ~SDHCI_INT_BUS_POWER;
+		if (intmask & SDHCI_INT_CARD_INT) {
+			sdhci_enable_sdio_irq_nolock(host, false);
+			host->thread_isr |= SDHCI_INT_CARD_INT;
+			result = IRQ_WAKE_THREAD;
+		}
 
-	if (intmask & SDHCI_INT_CARD_INT)
-		cardint = 1;
+		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
+			     SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
+			     SDHCI_INT_ERROR | SDHCI_INT_BUS_POWER |
+			     SDHCI_INT_CARD_INT);
 
-	intmask &= ~SDHCI_INT_CARD_INT;
+		if (intmask) {
+			unexpected |= intmask;
+			sdhci_writel(host, intmask, SDHCI_INT_STATUS);
+		}
 
-	if (intmask) {
-		unexpected |= intmask;
-		sdhci_writel(host, intmask, SDHCI_INT_STATUS);
-	}
+		if (result == IRQ_NONE)
+			result = IRQ_HANDLED;
 
-	result = IRQ_HANDLED;
-
-	intmask = sdhci_readl(host, SDHCI_INT_STATUS);
-
-	/*
-	 * If we know we'll call the driver to signal SDIO IRQ, disregard
-	 * further indications of Card Interrupt in the status to avoid a
-	 * needless loop.
-	 */
-	if (cardint)
-		intmask &= ~SDHCI_INT_CARD_INT;
-	if (intmask && --max_loops)
-		goto again;
+		intmask = sdhci_readl(host, SDHCI_INT_STATUS);
+	} while (intmask && --max_loops);
 out:
 	spin_unlock(&host->lock);
 
@@ -2527,15 +2477,38 @@
 			   mmc_hostname(host->mmc), unexpected);
 		sdhci_dumpregs(host);
 	}
-	/*
-	 * We have to delay this as it calls back into the driver.
-	 */
-	if (cardint)
-		mmc_signal_sdio_irq(host->mmc);
 
 	return result;
 }
 
+static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
+{
+	struct sdhci_host *host = dev_id;
+	unsigned long flags;
+	u32 isr;
+
+	spin_lock_irqsave(&host->lock, flags);
+	isr = host->thread_isr;
+	host->thread_isr = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+		sdhci_card_event(host->mmc);
+		mmc_detect_change(host->mmc, msecs_to_jiffies(200));
+	}
+
+	if (isr & SDHCI_INT_CARD_INT) {
+		sdio_run_irqs(host->mmc);
+
+		spin_lock_irqsave(&host->lock, flags);
+		if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
+			sdhci_enable_sdio_irq_nolock(host, true);
+		spin_unlock_irqrestore(&host->lock, flags);
+	}
+
+	return isr ? IRQ_HANDLED : IRQ_NONE;
+}
+
 /*****************************************************************************\
  *                                                                           *
  * Suspend/resume                                                            *
@@ -2572,9 +2545,6 @@
 
 int sdhci_suspend_host(struct sdhci_host *host)
 {
-	if (host->ops->platform_suspend)
-		host->ops->platform_suspend(host);
-
 	sdhci_disable_card_detection(host);
 
 	/* Disable tuning since we are suspending */
@@ -2584,7 +2554,9 @@
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+		host->ier = 0;
+		sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+		sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 		free_irq(host->irq, host);
 	} else {
 		sdhci_enable_irq_wakeups(host);
@@ -2605,8 +2577,9 @@
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-				  mmc_hostname(host->mmc), host);
+		ret = request_threaded_irq(host->irq, sdhci_irq,
+					   sdhci_thread_irq, IRQF_SHARED,
+					   mmc_hostname(host->mmc), host);
 		if (ret)
 			return ret;
 	} else {
@@ -2628,9 +2601,6 @@
 
 	sdhci_enable_card_detection(host);
 
-	if (host->ops->platform_resume)
-		host->ops->platform_resume(host);
-
 	/* Set the re-tuning expiration flag */
 	if (host->flags & SDHCI_USING_RETUNING_TIMER)
 		host->flags |= SDHCI_NEEDS_RETUNING;
@@ -2682,10 +2652,12 @@
 	}
 
 	spin_lock_irqsave(&host->lock, flags);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	host->ier &= SDHCI_INT_CARD_INT;
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	synchronize_irq(host->irq);
+	synchronize_hardirq(host->irq);
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->runtime_suspended = true;
@@ -2729,7 +2701,7 @@
 	host->runtime_suspended = false;
 
 	/* Enable SDIO IRQ */
-	if ((host->flags & SDHCI_SDIO_IRQ_ENABLED))
+	if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
 		sdhci_enable_sdio_irq_nolock(host, true);
 
 	/* Enable Card Detection */
@@ -2788,7 +2760,7 @@
 	if (debug_quirks2)
 		host->quirks2 = debug_quirks2;
 
-	sdhci_reset(host, SDHCI_RESET_ALL);
+	sdhci_do_reset(host, SDHCI_RESET_ALL);
 
 	host->version = sdhci_readw(host, SDHCI_HOST_VERSION);
 	host->version = (host->version & SDHCI_SPEC_VER_MASK)
@@ -2848,15 +2820,29 @@
 		 * (128) and potentially one alignment transfer for
 		 * each of those entries.
 		 */
-		host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL);
+		host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc),
+						     ADMA_SIZE, &host->adma_addr,
+						     GFP_KERNEL);
 		host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
 		if (!host->adma_desc || !host->align_buffer) {
-			kfree(host->adma_desc);
+			dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+					  host->adma_desc, host->adma_addr);
 			kfree(host->align_buffer);
 			pr_warning("%s: Unable to allocate ADMA "
 				"buffers. Falling back to standard DMA.\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
+			host->adma_desc = NULL;
+			host->align_buffer = NULL;
+		} else if (host->adma_addr & 3) {
+			pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
+				   mmc_hostname(mmc));
+			host->flags &= ~SDHCI_USE_ADMA;
+			dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+					  host->adma_desc, host->adma_addr);
+			kfree(host->align_buffer);
+			host->adma_desc = NULL;
+			host->align_buffer = NULL;
 		}
 	}
 
@@ -2941,6 +2927,7 @@
 	mmc->max_busy_timeout = (1 << 27) / host->timeout_clk;
 
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
@@ -3212,8 +3199,6 @@
 	/*
 	 * Init tasklets.
 	 */
-	tasklet_init(&host->card_tasklet,
-		sdhci_tasklet_card, (unsigned long)host);
 	tasklet_init(&host->finish_tasklet,
 		sdhci_tasklet_finish, (unsigned long)host);
 
@@ -3230,8 +3215,8 @@
 
 	sdhci_init(host, 0);
 
-	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-		mmc_hostname(mmc), host);
+	ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
+				   IRQF_SHARED,	mmc_hostname(mmc), host);
 	if (ret) {
 		pr_err("%s: Failed to request IRQ %d: %d\n",
 		       mmc_hostname(mmc), host->irq, ret);
@@ -3273,12 +3258,12 @@
 
 #ifdef SDHCI_USE_LEDS_CLASS
 reset:
-	sdhci_reset(host, SDHCI_RESET_ALL);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_do_reset(host, SDHCI_RESET_ALL);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 #endif
 untasklet:
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	return ret;
@@ -3315,14 +3300,14 @@
 #endif
 
 	if (!dead)
-		sdhci_reset(host, SDHCI_RESET_ALL);
+		sdhci_do_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 
 	del_timer_sync(&host->timer);
 
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	if (host->vmmc) {
@@ -3335,7 +3320,9 @@
 		regulator_put(host->vqmmc);
 	}
 
-	kfree(host->adma_desc);
+	if (host->adma_desc)
+		dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+				  host->adma_desc, host->adma_addr);
 	kfree(host->align_buffer);
 
 	host->adma_desc = NULL;

diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0a3ed01..4a5cd5e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h

@@ -281,18 +281,14 @@
 	unsigned int	(*get_max_clock)(struct sdhci_host *host);
 	unsigned int	(*get_min_clock)(struct sdhci_host *host);
 	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
-	int		(*platform_bus_width)(struct sdhci_host *host,
-					       int width);
+	void		(*set_bus_width)(struct sdhci_host *host, int width);
 	void (*platform_send_init_74_clocks)(struct sdhci_host *host,
 					     u8 power_mode);
 	unsigned int    (*get_ro)(struct sdhci_host *host);
-	void	(*platform_reset_enter)(struct sdhci_host *host, u8 mask);
-	void	(*platform_reset_exit)(struct sdhci_host *host, u8 mask);
+	void		(*reset)(struct sdhci_host *host, u8 mask);
 	int	(*platform_execute_tuning)(struct sdhci_host *host, u32 opcode);
-	int	(*set_uhs_signaling)(struct sdhci_host *host, unsigned int uhs);
+	void	(*set_uhs_signaling)(struct sdhci_host *host, unsigned int uhs);
 	void	(*hw_reset)(struct sdhci_host *host);
-	void	(*platform_suspend)(struct sdhci_host *host);
-	void	(*platform_resume)(struct sdhci_host *host);
 	void    (*adma_workaround)(struct sdhci_host *host, u32 intmask);
 	void	(*platform_init)(struct sdhci_host *host);
 	void    (*card_event)(struct sdhci_host *host);
@@ -397,6 +393,16 @@
 extern void sdhci_send_command(struct sdhci_host *host,
 				struct mmc_command *cmd);
 
+static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host)
+{
+	return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
+}
+
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
+void sdhci_set_bus_width(struct sdhci_host *host, int width);
+void sdhci_reset(struct sdhci_host *host, u8 mask);
+void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing);
+
 #ifdef CONFIG_PM
 extern int sdhci_suspend_host(struct sdhci_host *host);
 extern int sdhci_resume_host(struct sdhci_host *host);

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 54730f4..656fbba 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c

@@ -803,12 +803,13 @@
 			break;
 		}
 		switch (host->timing) {
-		case MMC_TIMING_UHS_DDR50:
+		case MMC_TIMING_MMC_DDR52:
 			/*
 			 * MMC core will only set this timing, if the host
-			 * advertises the MMC_CAP_UHS_DDR50 capability. MMCIF
-			 * implementations with this capability, e.g. sh73a0,
-			 * will have to set it in their platform data.
+			 * advertises the MMC_CAP_1_8V_DDR/MMC_CAP_1_2V_DDR
+			 * capability. MMCIF implementations with this
+			 * capability, e.g. sh73a0, will have to set it
+			 * in their platform data.
 			 */
 			tmp |= CMD_SET_DARS;
 			break;

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
new file mode 100644
index 0000000..f0a39eb
--- /dev/null
+++ b/drivers/mmc/host/usdhi6rol0.c

@@ -0,0 +1,1847 @@
+/*
+ * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/sdio.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/virtio.h>
+#include <linux/workqueue.h>
+
+#define USDHI6_SD_CMD		0x0000
+#define USDHI6_SD_PORT_SEL	0x0004
+#define USDHI6_SD_ARG		0x0008
+#define USDHI6_SD_STOP		0x0010
+#define USDHI6_SD_SECCNT	0x0014
+#define USDHI6_SD_RSP10		0x0018
+#define USDHI6_SD_RSP32		0x0020
+#define USDHI6_SD_RSP54		0x0028
+#define USDHI6_SD_RSP76		0x0030
+#define USDHI6_SD_INFO1		0x0038
+#define USDHI6_SD_INFO2		0x003c
+#define USDHI6_SD_INFO1_MASK	0x0040
+#define USDHI6_SD_INFO2_MASK	0x0044
+#define USDHI6_SD_CLK_CTRL	0x0048
+#define USDHI6_SD_SIZE		0x004c
+#define USDHI6_SD_OPTION	0x0050
+#define USDHI6_SD_ERR_STS1	0x0058
+#define USDHI6_SD_ERR_STS2	0x005c
+#define USDHI6_SD_BUF0		0x0060
+#define USDHI6_SDIO_MODE	0x0068
+#define USDHI6_SDIO_INFO1	0x006c
+#define USDHI6_SDIO_INFO1_MASK	0x0070
+#define USDHI6_CC_EXT_MODE	0x01b0
+#define USDHI6_SOFT_RST		0x01c0
+#define USDHI6_VERSION		0x01c4
+#define USDHI6_HOST_MODE	0x01c8
+#define USDHI6_SDIF_MODE	0x01cc
+
+#define USDHI6_SD_CMD_APP		0x0040
+#define USDHI6_SD_CMD_MODE_RSP_AUTO	0x0000
+#define USDHI6_SD_CMD_MODE_RSP_NONE	0x0300
+#define USDHI6_SD_CMD_MODE_RSP_R1	0x0400	/* Also R5, R6, R7 */
+#define USDHI6_SD_CMD_MODE_RSP_R1B	0x0500	/* R1b */
+#define USDHI6_SD_CMD_MODE_RSP_R2	0x0600
+#define USDHI6_SD_CMD_MODE_RSP_R3	0x0700	/* Also R4 */
+#define USDHI6_SD_CMD_DATA		0x0800
+#define USDHI6_SD_CMD_READ		0x1000
+#define USDHI6_SD_CMD_MULTI		0x2000
+#define USDHI6_SD_CMD_CMD12_AUTO_OFF	0x4000
+
+#define USDHI6_CC_EXT_MODE_SDRW		BIT(1)
+
+#define USDHI6_SD_INFO1_RSP_END		BIT(0)
+#define USDHI6_SD_INFO1_ACCESS_END	BIT(2)
+#define USDHI6_SD_INFO1_CARD_OUT	BIT(3)
+#define USDHI6_SD_INFO1_CARD_IN		BIT(4)
+#define USDHI6_SD_INFO1_CD		BIT(5)
+#define USDHI6_SD_INFO1_WP		BIT(7)
+#define USDHI6_SD_INFO1_D3_CARD_OUT	BIT(8)
+#define USDHI6_SD_INFO1_D3_CARD_IN	BIT(9)
+
+#define USDHI6_SD_INFO2_CMD_ERR		BIT(0)
+#define USDHI6_SD_INFO2_CRC_ERR		BIT(1)
+#define USDHI6_SD_INFO2_END_ERR		BIT(2)
+#define USDHI6_SD_INFO2_TOUT		BIT(3)
+#define USDHI6_SD_INFO2_IWA_ERR		BIT(4)
+#define USDHI6_SD_INFO2_IRA_ERR		BIT(5)
+#define USDHI6_SD_INFO2_RSP_TOUT	BIT(6)
+#define USDHI6_SD_INFO2_SDDAT0		BIT(7)
+#define USDHI6_SD_INFO2_BRE		BIT(8)
+#define USDHI6_SD_INFO2_BWE		BIT(9)
+#define USDHI6_SD_INFO2_SCLKDIVEN	BIT(13)
+#define USDHI6_SD_INFO2_CBSY		BIT(14)
+#define USDHI6_SD_INFO2_ILA		BIT(15)
+
+#define USDHI6_SD_INFO1_CARD_INSERT (USDHI6_SD_INFO1_CARD_IN | USDHI6_SD_INFO1_D3_CARD_IN)
+#define USDHI6_SD_INFO1_CARD_EJECT (USDHI6_SD_INFO1_CARD_OUT | USDHI6_SD_INFO1_D3_CARD_OUT)
+#define USDHI6_SD_INFO1_CARD (USDHI6_SD_INFO1_CARD_INSERT | USDHI6_SD_INFO1_CARD_EJECT)
+#define USDHI6_SD_INFO1_CARD_CD (USDHI6_SD_INFO1_CARD_IN | USDHI6_SD_INFO1_CARD_OUT)
+
+#define USDHI6_SD_INFO2_ERR	(USDHI6_SD_INFO2_CMD_ERR |	\
+	USDHI6_SD_INFO2_CRC_ERR | USDHI6_SD_INFO2_END_ERR |	\
+	USDHI6_SD_INFO2_TOUT | USDHI6_SD_INFO2_IWA_ERR |	\
+	USDHI6_SD_INFO2_IRA_ERR | USDHI6_SD_INFO2_RSP_TOUT |	\
+	USDHI6_SD_INFO2_ILA)
+
+#define USDHI6_SD_INFO1_IRQ	(USDHI6_SD_INFO1_RSP_END | USDHI6_SD_INFO1_ACCESS_END | \
+				 USDHI6_SD_INFO1_CARD)
+
+#define USDHI6_SD_INFO2_IRQ	(USDHI6_SD_INFO2_ERR | USDHI6_SD_INFO2_BRE | \
+				 USDHI6_SD_INFO2_BWE | 0x0800 | USDHI6_SD_INFO2_ILA)
+
+#define USDHI6_SD_CLK_CTRL_SCLKEN	BIT(8)
+
+#define USDHI6_SD_STOP_STP		BIT(0)
+#define USDHI6_SD_STOP_SEC		BIT(8)
+
+#define USDHI6_SDIO_INFO1_IOIRQ		BIT(0)
+#define USDHI6_SDIO_INFO1_EXPUB52	BIT(14)
+#define USDHI6_SDIO_INFO1_EXWT		BIT(15)
+
+#define USDHI6_SD_ERR_STS1_CRC_NO_ERROR	BIT(13)
+
+#define USDHI6_SOFT_RST_RESERVED	(BIT(1) | BIT(2))
+#define USDHI6_SOFT_RST_RESET		BIT(0)
+
+#define USDHI6_SD_OPTION_TIMEOUT_SHIFT	4
+#define USDHI6_SD_OPTION_TIMEOUT_MASK	(0xf << USDHI6_SD_OPTION_TIMEOUT_SHIFT)
+#define USDHI6_SD_OPTION_WIDTH_1	BIT(15)
+
+#define USDHI6_SD_PORT_SEL_PORTS_SHIFT	8
+
+#define USDHI6_SD_CLK_CTRL_DIV_MASK	0xff
+
+#define USDHI6_SDIO_INFO1_IRQ	(USDHI6_SDIO_INFO1_IOIRQ | 3 | \
+				 USDHI6_SDIO_INFO1_EXPUB52 | USDHI6_SDIO_INFO1_EXWT)
+
+#define USDHI6_MIN_DMA 64
+
+enum usdhi6_wait_for {
+	USDHI6_WAIT_FOR_REQUEST,
+	USDHI6_WAIT_FOR_CMD,
+	USDHI6_WAIT_FOR_MREAD,
+	USDHI6_WAIT_FOR_MWRITE,
+	USDHI6_WAIT_FOR_READ,
+	USDHI6_WAIT_FOR_WRITE,
+	USDHI6_WAIT_FOR_DATA_END,
+	USDHI6_WAIT_FOR_STOP,
+	USDHI6_WAIT_FOR_DMA,
+};
+
+struct usdhi6_page {
+	struct page *page;
+	void *mapped;		/* mapped page */
+};
+
+struct usdhi6_host {
+	struct mmc_host *mmc;
+	struct mmc_request *mrq;
+	void __iomem *base;
+	struct clk *clk;
+
+	/* SG memory handling */
+
+	/* Common for multiple and single block requests */
+	struct usdhi6_page pg;	/* current page from an SG */
+	void *blk_page;		/* either a mapped page, or the bounce buffer */
+	size_t offset;		/* offset within a page, including sg->offset */
+
+	/* Blocks, crossing a page boundary */
+	size_t head_len;
+	struct usdhi6_page head_pg;
+
+	/* A bounce buffer for unaligned blocks or blocks, crossing a page boundary */
+	struct scatterlist bounce_sg;
+	u8 bounce_buf[512];
+
+	/* Multiple block requests only */
+	struct scatterlist *sg;	/* current SG segment */
+	int page_idx;		/* page index within an SG segment */
+
+	enum usdhi6_wait_for wait;
+	u32 status_mask;
+	u32 status2_mask;
+	u32 sdio_mask;
+	u32 io_error;
+	u32 irq_status;
+	unsigned long imclk;
+	unsigned long rate;
+	bool app_cmd;
+
+	/* Timeout handling */
+	struct delayed_work timeout_work;
+	unsigned long timeout;
+
+	/* DMA support */
+	struct dma_chan *chan_rx;
+	struct dma_chan *chan_tx;
+	bool dma_active;
+};
+
+/*			I/O primitives					*/
+
+static void usdhi6_write(struct usdhi6_host *host, u32 reg, u32 data)
+{
+	iowrite32(data, host->base + reg);
+	dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+		host->base, reg, data);
+}
+
+static void usdhi6_write16(struct usdhi6_host *host, u32 reg, u16 data)
+{
+	iowrite16(data, host->base + reg);
+	dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+		host->base, reg, data);
+}
+
+static u32 usdhi6_read(struct usdhi6_host *host, u32 reg)
+{
+	u32 data = ioread32(host->base + reg);
+	dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+		host->base, reg, data);
+	return data;
+}
+
+static u16 usdhi6_read16(struct usdhi6_host *host, u32 reg)
+{
+	u16 data = ioread16(host->base + reg);
+	dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+		host->base, reg, data);
+	return data;
+}
+
+static void usdhi6_irq_enable(struct usdhi6_host *host, u32 info1, u32 info2)
+{
+	host->status_mask = USDHI6_SD_INFO1_IRQ & ~info1;
+	host->status2_mask = USDHI6_SD_INFO2_IRQ & ~info2;
+	usdhi6_write(host, USDHI6_SD_INFO1_MASK, host->status_mask);
+	usdhi6_write(host, USDHI6_SD_INFO2_MASK, host->status2_mask);
+}
+
+static void usdhi6_wait_for_resp(struct usdhi6_host *host)
+{
+	usdhi6_irq_enable(host, USDHI6_SD_INFO1_RSP_END |
+			  USDHI6_SD_INFO1_ACCESS_END | USDHI6_SD_INFO1_CARD_CD,
+			  USDHI6_SD_INFO2_ERR);
+}
+
+static void usdhi6_wait_for_brwe(struct usdhi6_host *host, bool read)
+{
+	usdhi6_irq_enable(host, USDHI6_SD_INFO1_ACCESS_END |
+			  USDHI6_SD_INFO1_CARD_CD, USDHI6_SD_INFO2_ERR |
+			  (read ? USDHI6_SD_INFO2_BRE : USDHI6_SD_INFO2_BWE));
+}
+
+static void usdhi6_only_cd(struct usdhi6_host *host)
+{
+	/* Mask all except card hotplug */
+	usdhi6_irq_enable(host, USDHI6_SD_INFO1_CARD_CD, 0);
+}
+
+static void usdhi6_mask_all(struct usdhi6_host *host)
+{
+	usdhi6_irq_enable(host, 0, 0);
+}
+
+static int usdhi6_error_code(struct usdhi6_host *host)
+{
+	u32 err;
+
+	usdhi6_write(host, USDHI6_SD_STOP, USDHI6_SD_STOP_STP);
+
+	if (host->io_error &
+	    (USDHI6_SD_INFO2_RSP_TOUT | USDHI6_SD_INFO2_TOUT)) {
+		u32 rsp54 = usdhi6_read(host, USDHI6_SD_RSP54);
+		int opc = host->mrq ? host->mrq->cmd->opcode : -1;
+
+		err = usdhi6_read(host, USDHI6_SD_ERR_STS2);
+		/* Response timeout is often normal, don't spam the log */
+		if (host->wait == USDHI6_WAIT_FOR_CMD)
+			dev_dbg(mmc_dev(host->mmc),
+				"T-out sts 0x%x, resp 0x%x, state %u, CMD%d\n",
+				err, rsp54, host->wait, opc);
+		else
+			dev_warn(mmc_dev(host->mmc),
+				 "T-out sts 0x%x, resp 0x%x, state %u, CMD%d\n",
+				 err, rsp54, host->wait, opc);
+		return -ETIMEDOUT;
+	}
+
+	err = usdhi6_read(host, USDHI6_SD_ERR_STS1);
+	if (err != USDHI6_SD_ERR_STS1_CRC_NO_ERROR)
+		dev_warn(mmc_dev(host->mmc), "Err sts 0x%x, state %u, CMD%d\n",
+			 err, host->wait, host->mrq ? host->mrq->cmd->opcode : -1);
+	if (host->io_error & USDHI6_SD_INFO2_ILA)
+		return -EILSEQ;
+
+	return -EIO;
+}
+
+/*			Scatter-Gather management			*/
+
+/*
+ * In PIO mode we have to map each page separately, using kmap(). That way
+ * adjacent pages are mapped to non-adjacent virtual addresses. That's why we
+ * have to use a bounce buffer for blocks, crossing page boundaries. Such blocks
+ * have been observed with an SDIO WiFi card (b43 driver).
+ */
+static void usdhi6_blk_bounce(struct usdhi6_host *host,
+			      struct scatterlist *sg)
+{
+	struct mmc_data *data = host->mrq->data;
+	size_t blk_head = host->head_len;
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): CMD%u of %u SG: %ux%u @ 0x%x\n",
+		__func__, host->mrq->cmd->opcode, data->sg_len,
+		data->blksz, data->blocks, sg->offset);
+
+	host->head_pg.page	= host->pg.page;
+	host->head_pg.mapped	= host->pg.mapped;
+	host->pg.page		= nth_page(host->pg.page, 1);
+	host->pg.mapped		= kmap(host->pg.page);
+
+	host->blk_page = host->bounce_buf;
+	host->offset = 0;
+
+	if (data->flags & MMC_DATA_READ)
+		return;
+
+	memcpy(host->bounce_buf, host->head_pg.mapped + PAGE_SIZE - blk_head,
+	       blk_head);
+	memcpy(host->bounce_buf + blk_head, host->pg.mapped,
+	       data->blksz - blk_head);
+}
+
+/* Only called for multiple block IO */
+static void usdhi6_sg_prep(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_data *data = mrq->data;
+
+	usdhi6_write(host, USDHI6_SD_SECCNT, data->blocks);
+
+	host->sg = data->sg;
+	/* TODO: if we always map, this is redundant */
+	host->offset = host->sg->offset;
+}
+
+/* Map the first page in an SG segment: common for multiple and single block IO */
+static void *usdhi6_sg_map(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	struct scatterlist *sg = data->sg_len > 1 ? host->sg : data->sg;
+	size_t head = PAGE_SIZE - sg->offset;
+	size_t blk_head = head % data->blksz;
+
+	WARN(host->pg.page, "%p not properly unmapped!\n", host->pg.page);
+	if (WARN(sg_dma_len(sg) % data->blksz,
+		 "SG size %u isn't a multiple of block size %u\n",
+		 sg_dma_len(sg), data->blksz))
+		return NULL;
+
+	host->pg.page = sg_page(sg);
+	host->pg.mapped = kmap(host->pg.page);
+	host->offset = sg->offset;
+
+	/*
+	 * Block size must be a power of 2 for multi-block transfers,
+	 * therefore blk_head is equal for all pages in this SG
+	 */
+	host->head_len = blk_head;
+
+	if (head < data->blksz)
+		/*
+		 * The first block in the SG crosses a page boundary.
+		 * Max blksz = 512, so blocks can only span 2 pages
+		 */
+		usdhi6_blk_bounce(host, sg);
+	else
+		host->blk_page = host->pg.mapped;
+
+	dev_dbg(mmc_dev(host->mmc), "Mapped %p (%lx) at %p + %u for CMD%u @ 0x%p\n",
+		host->pg.page, page_to_pfn(host->pg.page), host->pg.mapped,
+		sg->offset, host->mrq->cmd->opcode, host->mrq);
+
+	return host->blk_page + host->offset;
+}
+
+/* Unmap the current page: common for multiple and single block IO */
+static void usdhi6_sg_unmap(struct usdhi6_host *host, bool force)
+{
+	struct mmc_data *data = host->mrq->data;
+	struct page *page = host->head_pg.page;
+
+	if (page) {
+		/* Previous block was cross-page boundary */
+		struct scatterlist *sg = data->sg_len > 1 ?
+			host->sg : data->sg;
+		size_t blk_head = host->head_len;
+
+		if (!data->error && data->flags & MMC_DATA_READ) {
+			memcpy(host->head_pg.mapped + PAGE_SIZE - blk_head,
+			       host->bounce_buf, blk_head);
+			memcpy(host->pg.mapped, host->bounce_buf + blk_head,
+			       data->blksz - blk_head);
+		}
+
+		flush_dcache_page(page);
+		kunmap(page);
+
+		host->head_pg.page = NULL;
+
+		if (!force && sg_dma_len(sg) + sg->offset >
+		    (host->page_idx << PAGE_SHIFT) + data->blksz - blk_head)
+			/* More blocks in this SG, don't unmap the next page */
+			return;
+	}
+
+	page = host->pg.page;
+	if (!page)
+		return;
+
+	flush_dcache_page(page);
+	kunmap(page);
+
+	host->pg.page = NULL;
+}
+
+/* Called from MMC_WRITE_MULTIPLE_BLOCK or MMC_READ_MULTIPLE_BLOCK */
+static void usdhi6_sg_advance(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	size_t done, total;
+
+	/* New offset: set at the end of the previous block */
+	if (host->head_pg.page) {
+		/* Finished a cross-page block, jump to the new page */
+		host->page_idx++;
+		host->offset = data->blksz - host->head_len;
+		host->blk_page = host->pg.mapped;
+		usdhi6_sg_unmap(host, false);
+	} else {
+		host->offset += data->blksz;
+		/* The completed block didn't cross a page boundary */
+		if (host->offset == PAGE_SIZE) {
+			/* If required, we'll map the page below */
+			host->offset = 0;
+			host->page_idx++;
+		}
+	}
+
+	/*
+	 * Now host->blk_page + host->offset point at the end of our last block
+	 * and host->page_idx is the index of the page, in which our new block
+	 * is located, if any
+	 */
+
+	done = (host->page_idx << PAGE_SHIFT) + host->offset;
+	total = host->sg->offset + sg_dma_len(host->sg);
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): %zu of %zu @ %zu\n", __func__,
+		done, total, host->offset);
+
+	if (done < total && host->offset) {
+		/* More blocks in this page */
+		if (host->offset + data->blksz > PAGE_SIZE)
+			/* We approached at a block, that spans 2 pages */
+			usdhi6_blk_bounce(host, host->sg);
+
+		return;
+	}
+
+	/* Finished current page or an SG segment */
+	usdhi6_sg_unmap(host, false);
+
+	if (done == total) {
+		/*
+		 * End of an SG segment or the complete SG: jump to the next
+		 * segment, we'll map it later in usdhi6_blk_read() or
+		 * usdhi6_blk_write()
+		 */
+		struct scatterlist *next = sg_next(host->sg);
+
+		host->page_idx = 0;
+
+		if (!next)
+			host->wait = USDHI6_WAIT_FOR_DATA_END;
+		host->sg = next;
+
+		if (WARN(next && sg_dma_len(next) % data->blksz,
+			 "SG size %u isn't a multiple of block size %u\n",
+			 sg_dma_len(next), data->blksz))
+			data->error = -EINVAL;
+
+		return;
+	}
+
+	/* We cannot get here after crossing a page border */
+
+	/* Next page in the same SG */
+	host->pg.page = nth_page(sg_page(host->sg), host->page_idx);
+	host->pg.mapped = kmap(host->pg.page);
+	host->blk_page = host->pg.mapped;
+
+	dev_dbg(mmc_dev(host->mmc), "Mapped %p (%lx) at %p for CMD%u @ 0x%p\n",
+		host->pg.page, page_to_pfn(host->pg.page), host->pg.mapped,
+		host->mrq->cmd->opcode, host->mrq);
+}
+
+/*			DMA handling					*/
+
+static void usdhi6_dma_release(struct usdhi6_host *host)
+{
+	host->dma_active = false;
+	if (host->chan_tx) {
+		struct dma_chan *chan = host->chan_tx;
+		host->chan_tx = NULL;
+		dma_release_channel(chan);
+	}
+	if (host->chan_rx) {
+		struct dma_chan *chan = host->chan_rx;
+		host->chan_rx = NULL;
+		dma_release_channel(chan);
+	}
+}
+
+static void usdhi6_dma_stop_unmap(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+
+	if (!host->dma_active)
+		return;
+
+	usdhi6_write(host, USDHI6_CC_EXT_MODE, 0);
+	host->dma_active = false;
+
+	if (data->flags & MMC_DATA_READ)
+		dma_unmap_sg(host->chan_rx->device->dev, data->sg,
+			     data->sg_len, DMA_FROM_DEVICE);
+	else
+		dma_unmap_sg(host->chan_tx->device->dev, data->sg,
+			     data->sg_len, DMA_TO_DEVICE);
+}
+
+static void usdhi6_dma_complete(void *arg)
+{
+	struct usdhi6_host *host = arg;
+	struct mmc_request *mrq = host->mrq;
+
+	if (WARN(!mrq || !mrq->data, "%s: NULL data in DMA completion for %p!\n",
+		 dev_name(mmc_dev(host->mmc)), mrq))
+		return;
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): CMD%u DMA completed\n", __func__,
+		mrq->cmd->opcode);
+
+	usdhi6_dma_stop_unmap(host);
+	usdhi6_wait_for_brwe(host, mrq->data->flags & MMC_DATA_READ);
+}
+
+static int usdhi6_dma_setup(struct usdhi6_host *host, struct dma_chan *chan,
+			    enum dma_transfer_direction dir)
+{
+	struct mmc_data *data = host->mrq->data;
+	struct scatterlist *sg = data->sg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	dma_cookie_t cookie = -EINVAL;
+	enum dma_data_direction data_dir;
+	int ret;
+
+	switch (dir) {
+	case DMA_MEM_TO_DEV:
+		data_dir = DMA_TO_DEVICE;
+		break;
+	case DMA_DEV_TO_MEM:
+		data_dir = DMA_FROM_DEVICE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = dma_map_sg(chan->device->dev, sg, data->sg_len, data_dir);
+	if (ret > 0) {
+		host->dma_active = true;
+		desc = dmaengine_prep_slave_sg(chan, sg, ret, dir,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = usdhi6_dma_complete;
+		desc->callback_param = host;
+		cookie = dmaengine_submit(desc);
+	}
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): mapped %d -> %d, cookie %d @ %p\n",
+		__func__, data->sg_len, ret, cookie, desc);
+
+	if (cookie < 0) {
+		/* DMA failed, fall back to PIO */
+		if (ret >= 0)
+			ret = cookie;
+		usdhi6_dma_release(host);
+		dev_warn(mmc_dev(host->mmc),
+			 "DMA failed: %d, falling back to PIO\n", ret);
+	}
+
+	return cookie;
+}
+
+static int usdhi6_dma_start(struct usdhi6_host *host)
+{
+	if (!host->chan_rx || !host->chan_tx)
+		return -ENODEV;
+
+	if (host->mrq->data->flags & MMC_DATA_READ)
+		return usdhi6_dma_setup(host, host->chan_rx, DMA_DEV_TO_MEM);
+
+	return usdhi6_dma_setup(host, host->chan_tx, DMA_MEM_TO_DEV);
+}
+
+static void usdhi6_dma_kill(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): SG of %u: %ux%u\n",
+		__func__, data->sg_len, data->blocks, data->blksz);
+	/* Abort DMA */
+	if (data->flags & MMC_DATA_READ)
+		dmaengine_terminate_all(host->chan_rx);
+	else
+		dmaengine_terminate_all(host->chan_tx);
+}
+
+static void usdhi6_dma_check_error(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): IO error %d, status 0x%x\n",
+		__func__, host->io_error, usdhi6_read(host, USDHI6_SD_INFO1));
+
+	if (host->io_error) {
+		data->error = usdhi6_error_code(host);
+		data->bytes_xfered = 0;
+		usdhi6_dma_kill(host);
+		usdhi6_dma_release(host);
+		dev_warn(mmc_dev(host->mmc),
+			 "DMA failed: %d, falling back to PIO\n", data->error);
+		return;
+	}
+
+	/*
+	 * The datasheet tells us to check a response from the card, whereas
+	 * responses only come after the command phase, not after the data
+	 * phase. Let's check anyway.
+	 */
+	if (host->irq_status & USDHI6_SD_INFO1_RSP_END)
+		dev_warn(mmc_dev(host->mmc), "Unexpected response received!\n");
+}
+
+static void usdhi6_dma_kick(struct usdhi6_host *host)
+{
+	if (host->mrq->data->flags & MMC_DATA_READ)
+		dma_async_issue_pending(host->chan_rx);
+	else
+		dma_async_issue_pending(host->chan_tx);
+}
+
+static void usdhi6_dma_request(struct usdhi6_host *host, phys_addr_t start)
+{
+	struct dma_slave_config cfg = {
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+	};
+	int ret;
+
+	host->chan_tx = dma_request_slave_channel(mmc_dev(host->mmc), "tx");
+	dev_dbg(mmc_dev(host->mmc), "%s: TX: got channel %p\n", __func__,
+		host->chan_tx);
+
+	if (!host->chan_tx)
+		return;
+
+	cfg.direction = DMA_MEM_TO_DEV;
+	cfg.dst_addr = start + USDHI6_SD_BUF0;
+	cfg.dst_maxburst = 128;	/* 128 words * 4 bytes = 512 bytes */
+	cfg.src_addr = 0;
+	ret = dmaengine_slave_config(host->chan_tx, &cfg);
+	if (ret < 0)
+		goto e_release_tx;
+
+	host->chan_rx = dma_request_slave_channel(mmc_dev(host->mmc), "rx");
+	dev_dbg(mmc_dev(host->mmc), "%s: RX: got channel %p\n", __func__,
+		host->chan_rx);
+
+	if (!host->chan_rx)
+		goto e_release_tx;
+
+	cfg.direction = DMA_DEV_TO_MEM;
+	cfg.src_addr = cfg.dst_addr;
+	cfg.src_maxburst = 128;	/* 128 words * 4 bytes = 512 bytes */
+	cfg.dst_addr = 0;
+	ret = dmaengine_slave_config(host->chan_rx, &cfg);
+	if (ret < 0)
+		goto e_release_rx;
+
+	return;
+
+e_release_rx:
+	dma_release_channel(host->chan_rx);
+	host->chan_rx = NULL;
+e_release_tx:
+	dma_release_channel(host->chan_tx);
+	host->chan_tx = NULL;
+}
+
+/*			API helpers					*/
+
+static void usdhi6_clk_set(struct usdhi6_host *host, struct mmc_ios *ios)
+{
+	unsigned long rate = ios->clock;
+	u32 val;
+	unsigned int i;
+
+	for (i = 1000; i; i--) {
+		if (usdhi6_read(host, USDHI6_SD_INFO2) & USDHI6_SD_INFO2_SCLKDIVEN)
+			break;
+		usleep_range(10, 100);
+	}
+
+	if (!i) {
+		dev_err(mmc_dev(host->mmc), "SD bus busy, clock set aborted\n");
+		return;
+	}
+
+	val = usdhi6_read(host, USDHI6_SD_CLK_CTRL) & ~USDHI6_SD_CLK_CTRL_DIV_MASK;
+
+	if (rate) {
+		unsigned long new_rate;
+
+		if (host->imclk <= rate) {
+			if (ios->timing != MMC_TIMING_UHS_DDR50) {
+				/* Cannot have 1-to-1 clock in DDR mode */
+				new_rate = host->imclk;
+				val |= 0xff;
+			} else {
+				new_rate = host->imclk / 2;
+			}
+		} else {
+			unsigned long div =
+				roundup_pow_of_two(DIV_ROUND_UP(host->imclk, rate));
+			val |= div >> 2;
+			new_rate = host->imclk / div;
+		}
+
+		if (host->rate == new_rate)
+			return;
+
+		host->rate = new_rate;
+
+		dev_dbg(mmc_dev(host->mmc), "target %lu, div %u, set %lu\n",
+			rate, (val & 0xff) << 2, new_rate);
+	}
+
+	/*
+	 * if old or new rate is equal to input rate, have to switch the clock
+	 * off before changing and on after
+	 */
+	if (host->imclk == rate || host->imclk == host->rate || !rate)
+		usdhi6_write(host, USDHI6_SD_CLK_CTRL,
+			     val & ~USDHI6_SD_CLK_CTRL_SCLKEN);
+
+	if (!rate) {
+		host->rate = 0;
+		return;
+	}
+
+	usdhi6_write(host, USDHI6_SD_CLK_CTRL, val);
+
+	if (host->imclk == rate || host->imclk == host->rate ||
+	    !(val & USDHI6_SD_CLK_CTRL_SCLKEN))
+		usdhi6_write(host, USDHI6_SD_CLK_CTRL,
+			     val | USDHI6_SD_CLK_CTRL_SCLKEN);
+}
+
+static void usdhi6_set_power(struct usdhi6_host *host, struct mmc_ios *ios)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	if (!IS_ERR(mmc->supply.vmmc))
+		/* Errors ignored... */
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
+				      ios->power_mode ? ios->vdd : 0);
+}
+
+static int usdhi6_reset(struct usdhi6_host *host)
+{
+	int i;
+
+	usdhi6_write(host, USDHI6_SOFT_RST, USDHI6_SOFT_RST_RESERVED);
+	cpu_relax();
+	usdhi6_write(host, USDHI6_SOFT_RST, USDHI6_SOFT_RST_RESERVED | USDHI6_SOFT_RST_RESET);
+	for (i = 1000; i; i--)
+		if (usdhi6_read(host, USDHI6_SOFT_RST) & USDHI6_SOFT_RST_RESET)
+			break;
+
+	return i ? 0 : -ETIMEDOUT;
+}
+
+static void usdhi6_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct usdhi6_host *host = mmc_priv(mmc);
+	u32 option, mode;
+	int ret;
+
+	dev_dbg(mmc_dev(mmc), "%uHz, OCR: %u, power %u, bus-width %u, timing %u\n",
+		ios->clock, ios->vdd, ios->power_mode, ios->bus_width, ios->timing);
+
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF:
+		usdhi6_set_power(host, ios);
+		usdhi6_only_cd(host);
+		break;
+	case MMC_POWER_UP:
+		/*
+		 * We only also touch USDHI6_SD_OPTION from .request(), which
+		 * cannot race with MMC_POWER_UP
+		 */
+		ret = usdhi6_reset(host);
+		if (ret < 0) {
+			dev_err(mmc_dev(mmc), "Cannot reset the interface!\n");
+		} else {
+			usdhi6_set_power(host, ios);
+			usdhi6_only_cd(host);
+		}
+		break;
+	case MMC_POWER_ON:
+		option = usdhi6_read(host, USDHI6_SD_OPTION);
+		/*
+		 * The eMMC standard only allows 4 or 8 bits in the DDR mode,
+		 * the same probably holds for SD cards. We check here anyway,
+		 * since the datasheet explicitly requires 4 bits for DDR.
+		 */
+		if (ios->bus_width == MMC_BUS_WIDTH_1) {
+			if (ios->timing == MMC_TIMING_UHS_DDR50)
+				dev_err(mmc_dev(mmc),
+					"4 bits are required for DDR\n");
+			option |= USDHI6_SD_OPTION_WIDTH_1;
+			mode = 0;
+		} else {
+			option &= ~USDHI6_SD_OPTION_WIDTH_1;
+			mode = ios->timing == MMC_TIMING_UHS_DDR50;
+		}
+		usdhi6_write(host, USDHI6_SD_OPTION, option);
+		usdhi6_write(host, USDHI6_SDIF_MODE, mode);
+		break;
+	}
+
+	if (host->rate != ios->clock)
+		usdhi6_clk_set(host, ios);
+}
+
+/* This is data timeout. Response timeout is fixed to 640 clock cycles */
+static void usdhi6_timeout_set(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	u32 val;
+	unsigned long ticks;
+
+	if (!mrq->data)
+		ticks = host->rate / 1000 * mrq->cmd->busy_timeout;
+	else
+		ticks = host->rate / 1000000 * (mrq->data->timeout_ns / 1000) +
+			mrq->data->timeout_clks;
+
+	if (!ticks || ticks > 1 << 27)
+		/* Max timeout */
+		val = 14;
+	else if (ticks < 1 << 13)
+		/* Min timeout */
+		val = 0;
+	else
+		val = order_base_2(ticks) - 13;
+
+	dev_dbg(mmc_dev(host->mmc), "Set %s timeout %lu ticks @ %lu Hz\n",
+		mrq->data ? "data" : "cmd", ticks, host->rate);
+
+	/* Timeout Counter mask: 0xf0 */
+	usdhi6_write(host, USDHI6_SD_OPTION, (val << USDHI6_SD_OPTION_TIMEOUT_SHIFT) |
+		     (usdhi6_read(host, USDHI6_SD_OPTION) & ~USDHI6_SD_OPTION_TIMEOUT_MASK));
+}
+
+static void usdhi6_request_done(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_data *data = mrq->data;
+
+	if (WARN(host->pg.page || host->head_pg.page,
+		 "Page %p or %p not unmapped: wait %u, CMD%d(%c) @ +0x%zx %ux%u in SG%u!\n",
+		 host->pg.page, host->head_pg.page, host->wait, mrq->cmd->opcode,
+		 data ? (data->flags & MMC_DATA_READ ? 'R' : 'W') : '-',
+		 data ? host->offset : 0, data ? data->blocks : 0,
+		 data ? data->blksz : 0, data ? data->sg_len : 0))
+		usdhi6_sg_unmap(host, true);
+
+	if (mrq->cmd->error ||
+	    (data && data->error) ||
+	    (mrq->stop && mrq->stop->error))
+		dev_dbg(mmc_dev(host->mmc), "%s(CMD%d: %ux%u): err %d %d %d\n",
+			__func__, mrq->cmd->opcode, data ? data->blocks : 0,
+			data ? data->blksz : 0,
+			mrq->cmd->error,
+			data ? data->error : 1,
+			mrq->stop ? mrq->stop->error : 1);
+
+	/* Disable DMA */
+	usdhi6_write(host, USDHI6_CC_EXT_MODE, 0);
+	host->wait = USDHI6_WAIT_FOR_REQUEST;
+	host->mrq = NULL;
+
+	mmc_request_done(host->mmc, mrq);
+}
+
+static int usdhi6_cmd_flags(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_command *cmd = mrq->cmd;
+	u16 opc = cmd->opcode;
+
+	if (host->app_cmd) {
+		host->app_cmd = false;
+		opc |= USDHI6_SD_CMD_APP;
+	}
+
+	if (mrq->data) {
+		opc |= USDHI6_SD_CMD_DATA;
+
+		if (mrq->data->flags & MMC_DATA_READ)
+			opc |= USDHI6_SD_CMD_READ;
+
+		if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+		    cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+		    (cmd->opcode == SD_IO_RW_EXTENDED &&
+		     mrq->data->blocks > 1)) {
+			opc |= USDHI6_SD_CMD_MULTI;
+			if (!mrq->stop)
+				opc |= USDHI6_SD_CMD_CMD12_AUTO_OFF;
+		}
+
+		switch (mmc_resp_type(cmd)) {
+		case MMC_RSP_NONE:
+			opc |= USDHI6_SD_CMD_MODE_RSP_NONE;
+			break;
+		case MMC_RSP_R1:
+			opc |= USDHI6_SD_CMD_MODE_RSP_R1;
+			break;
+		case MMC_RSP_R1B:
+			opc |= USDHI6_SD_CMD_MODE_RSP_R1B;
+			break;
+		case MMC_RSP_R2:
+			opc |= USDHI6_SD_CMD_MODE_RSP_R2;
+			break;
+		case MMC_RSP_R3:
+			opc |= USDHI6_SD_CMD_MODE_RSP_R3;
+			break;
+		default:
+			dev_warn(mmc_dev(host->mmc),
+				 "Unknown response type %d\n",
+				 mmc_resp_type(cmd));
+			return -EINVAL;
+		}
+	}
+
+	return opc;
+}
+
+static int usdhi6_rq_start(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_command *cmd = mrq->cmd;
+	struct mmc_data *data = mrq->data;
+	int opc = usdhi6_cmd_flags(host);
+	int i;
+
+	if (opc < 0)
+		return opc;
+
+	for (i = 1000; i; i--) {
+		if (!(usdhi6_read(host, USDHI6_SD_INFO2) & USDHI6_SD_INFO2_CBSY))
+			break;
+		usleep_range(10, 100);
+	}
+
+	if (!i) {
+		dev_dbg(mmc_dev(host->mmc), "Command active, request aborted\n");
+		return -EAGAIN;
+	}
+
+	if (data) {
+		bool use_dma;
+		int ret = 0;
+
+		host->page_idx = 0;
+
+		if (cmd->opcode == SD_IO_RW_EXTENDED && data->blocks > 1) {
+			switch (data->blksz) {
+			case 512:
+				break;
+			case 32:
+			case 64:
+			case 128:
+			case 256:
+				if (mrq->stop)
+					ret = -EINVAL;
+				break;
+			default:
+				ret = -EINVAL;
+			}
+		} else if ((cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+			    cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK) &&
+			   data->blksz != 512) {
+			ret = -EINVAL;
+		}
+
+		if (ret < 0) {
+			dev_warn(mmc_dev(host->mmc), "%s(): %u blocks of %u bytes\n",
+				 __func__, data->blocks, data->blksz);
+			return -EINVAL;
+		}
+
+		if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+		    cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+		    (cmd->opcode == SD_IO_RW_EXTENDED &&
+		     data->blocks > 1))
+			usdhi6_sg_prep(host);
+
+		usdhi6_write(host, USDHI6_SD_SIZE, data->blksz);
+
+		if ((data->blksz >= USDHI6_MIN_DMA ||
+		     data->blocks > 1) &&
+		    (data->blksz % 4 ||
+		     data->sg->offset % 4))
+			dev_dbg(mmc_dev(host->mmc),
+				"Bad SG of %u: %ux%u @ %u\n", data->sg_len,
+				data->blksz, data->blocks, data->sg->offset);
+
+		/* Enable DMA for USDHI6_MIN_DMA bytes or more */
+		use_dma = data->blksz >= USDHI6_MIN_DMA &&
+			!(data->blksz % 4) &&
+			usdhi6_dma_start(host) >= DMA_MIN_COOKIE;
+
+		if (use_dma)
+			usdhi6_write(host, USDHI6_CC_EXT_MODE, USDHI6_CC_EXT_MODE_SDRW);
+
+		dev_dbg(mmc_dev(host->mmc),
+			"%s(): request opcode %u, %u blocks of %u bytes in %u segments, %s %s @+0x%x%s\n",
+			__func__, cmd->opcode, data->blocks, data->blksz,
+			data->sg_len, use_dma ? "DMA" : "PIO",
+			data->flags & MMC_DATA_READ ? "read" : "write",
+			data->sg->offset, mrq->stop ? " + stop" : "");
+	} else {
+		dev_dbg(mmc_dev(host->mmc), "%s(): request opcode %u\n",
+			__func__, cmd->opcode);
+	}
+
+	/* We have to get a command completion interrupt with DMA too */
+	usdhi6_wait_for_resp(host);
+
+	host->wait = USDHI6_WAIT_FOR_CMD;
+	schedule_delayed_work(&host->timeout_work, host->timeout);
+
+	/* SEC bit is required to enable block counting by the core */
+	usdhi6_write(host, USDHI6_SD_STOP,
+		     data && data->blocks > 1 ? USDHI6_SD_STOP_SEC : 0);
+	usdhi6_write(host, USDHI6_SD_ARG, cmd->arg);
+
+	/* Kick command execution */
+	usdhi6_write(host, USDHI6_SD_CMD, opc);
+
+	return 0;
+}
+
+static void usdhi6_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct usdhi6_host *host = mmc_priv(mmc);
+	int ret;
+
+	cancel_delayed_work_sync(&host->timeout_work);
+
+	host->mrq = mrq;
+	host->sg = NULL;
+
+	usdhi6_timeout_set(host);
+	ret = usdhi6_rq_start(host);
+	if (ret < 0) {
+		mrq->cmd->error = ret;
+		usdhi6_request_done(host);
+	}
+}
+
+static int usdhi6_get_cd(struct mmc_host *mmc)
+{
+	struct usdhi6_host *host = mmc_priv(mmc);
+	/* Read is atomic, no need to lock */
+	u32 status = usdhi6_read(host, USDHI6_SD_INFO1) & USDHI6_SD_INFO1_CD;
+
+/*
+ *	level	status.CD	CD_ACTIVE_HIGH	card present
+ *	1	0		0		0
+ *	1	0		1		1
+ *	0	1		0		1
+ *	0	1		1		0
+ */
+	return !status ^ !(mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH);
+}
+
+static int usdhi6_get_ro(struct mmc_host *mmc)
+{
+	struct usdhi6_host *host = mmc_priv(mmc);
+	/* No locking as above */
+	u32 status = usdhi6_read(host, USDHI6_SD_INFO1) & USDHI6_SD_INFO1_WP;
+
+/*
+ *	level	status.WP	RO_ACTIVE_HIGH	card read-only
+ *	1	0		0		0
+ *	1	0		1		1
+ *	0	1		0		1
+ *	0	1		1		0
+ */
+	return !status ^ !(mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
+}
+
+static void usdhi6_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct usdhi6_host *host = mmc_priv(mmc);
+
+	dev_dbg(mmc_dev(mmc), "%s(): %sable\n", __func__, enable ? "en" : "dis");
+
+	if (enable) {
+		host->sdio_mask = USDHI6_SDIO_INFO1_IRQ & ~USDHI6_SDIO_INFO1_IOIRQ;
+		usdhi6_write(host, USDHI6_SDIO_INFO1_MASK, host->sdio_mask);
+		usdhi6_write(host, USDHI6_SDIO_MODE, 1);
+	} else {
+		usdhi6_write(host, USDHI6_SDIO_MODE, 0);
+		usdhi6_write(host, USDHI6_SDIO_INFO1_MASK, USDHI6_SDIO_INFO1_IRQ);
+		host->sdio_mask = USDHI6_SDIO_INFO1_IRQ;
+	}
+}
+
+static struct mmc_host_ops usdhi6_ops = {
+	.request	= usdhi6_request,
+	.set_ios	= usdhi6_set_ios,
+	.get_cd		= usdhi6_get_cd,
+	.get_ro		= usdhi6_get_ro,
+	.enable_sdio_irq = usdhi6_enable_sdio_irq,
+};
+
+/*			State machine handlers				*/
+
+static void usdhi6_resp_cmd12(struct usdhi6_host *host)
+{
+	struct mmc_command *cmd = host->mrq->stop;
+	cmd->resp[0] = usdhi6_read(host, USDHI6_SD_RSP10);
+}
+
+static void usdhi6_resp_read(struct usdhi6_host *host)
+{
+	struct mmc_command *cmd = host->mrq->cmd;
+	u32 *rsp = cmd->resp, tmp = 0;
+	int i;
+
+/*
+ * RSP10	39-8
+ * RSP32	71-40
+ * RSP54	103-72
+ * RSP76	127-104
+ * R2-type response:
+ * resp[0]	= r[127..96]
+ * resp[1]	= r[95..64]
+ * resp[2]	= r[63..32]
+ * resp[3]	= r[31..0]
+ * Other responses:
+ * resp[0]	= r[39..8]
+ */
+
+	if (mmc_resp_type(cmd) == MMC_RSP_NONE)
+		return;
+
+	if (!(host->irq_status & USDHI6_SD_INFO1_RSP_END)) {
+		dev_err(mmc_dev(host->mmc),
+			"CMD%d: response expected but is missing!\n", cmd->opcode);
+		return;
+	}
+
+	if (mmc_resp_type(cmd) & MMC_RSP_136)
+		for (i = 0; i < 4; i++) {
+			if (i)
+				rsp[3 - i] = tmp >> 24;
+			tmp = usdhi6_read(host, USDHI6_SD_RSP10 + i * 8);
+			rsp[3 - i] |= tmp << 8;
+		}
+	else if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+		 cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
+		/* Read RSP54 to avoid conflict with auto CMD12 */
+		rsp[0] = usdhi6_read(host, USDHI6_SD_RSP54);
+	else
+		rsp[0] = usdhi6_read(host, USDHI6_SD_RSP10);
+
+	dev_dbg(mmc_dev(host->mmc), "Response 0x%x\n", rsp[0]);
+}
+
+static int usdhi6_blk_read(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p;
+	int i, rest;
+
+	if (host->io_error) {
+		data->error = usdhi6_error_code(host);
+		goto error;
+	}
+
+	if (host->pg.page) {
+		p = host->blk_page + host->offset;
+	} else {
+		p = usdhi6_sg_map(host);
+		if (!p) {
+			data->error = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (i = 0; i < data->blksz / 4; i++, p++)
+		*p = usdhi6_read(host, USDHI6_SD_BUF0);
+
+	rest = data->blksz % 4;
+	for (i = 0; i < (rest + 1) / 2; i++) {
+		u16 d = usdhi6_read16(host, USDHI6_SD_BUF0);
+		((u8 *)p)[2 * i] = ((u8 *)&d)[0];
+		if (rest > 1 && !i)
+			((u8 *)p)[2 * i + 1] = ((u8 *)&d)[1];
+	}
+
+	return 0;
+
+error:
+	dev_dbg(mmc_dev(host->mmc), "%s(): %d\n", __func__, data->error);
+	host->wait = USDHI6_WAIT_FOR_REQUEST;
+	return data->error;
+}
+
+static int usdhi6_blk_write(struct usdhi6_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p;
+	int i, rest;
+
+	if (host->io_error) {
+		data->error = usdhi6_error_code(host);
+		goto error;
+	}
+
+	if (host->pg.page) {
+		p = host->blk_page + host->offset;
+	} else {
+		p = usdhi6_sg_map(host);
+		if (!p) {
+			data->error = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (i = 0; i < data->blksz / 4; i++, p++)
+		usdhi6_write(host, USDHI6_SD_BUF0, *p);
+
+	rest = data->blksz % 4;
+	for (i = 0; i < (rest + 1) / 2; i++) {
+		u16 d;
+		((u8 *)&d)[0] = ((u8 *)p)[2 * i];
+		if (rest > 1 && !i)
+			((u8 *)&d)[1] = ((u8 *)p)[2 * i + 1];
+		else
+			((u8 *)&d)[1] = 0;
+		usdhi6_write16(host, USDHI6_SD_BUF0, d);
+	}
+
+	return 0;
+
+error:
+	dev_dbg(mmc_dev(host->mmc), "%s(): %d\n", __func__, data->error);
+	host->wait = USDHI6_WAIT_FOR_REQUEST;
+	return data->error;
+}
+
+static int usdhi6_stop_cmd(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+
+	switch (mrq->cmd->opcode) {
+	case MMC_READ_MULTIPLE_BLOCK:
+	case MMC_WRITE_MULTIPLE_BLOCK:
+		if (mrq->stop->opcode == MMC_STOP_TRANSMISSION) {
+			host->wait = USDHI6_WAIT_FOR_STOP;
+			return 0;
+		}
+		/* Unsupported STOP command */
+	default:
+		dev_err(mmc_dev(host->mmc),
+			"unsupported stop CMD%d for CMD%d\n",
+			mrq->stop->opcode, mrq->cmd->opcode);
+		mrq->stop->error = -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static bool usdhi6_end_cmd(struct usdhi6_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_command *cmd = mrq->cmd;
+
+	if (host->io_error) {
+		cmd->error = usdhi6_error_code(host);
+		return false;
+	}
+
+	usdhi6_resp_read(host);
+
+	if (!mrq->data)
+		return false;
+
+	if (host->dma_active) {
+		usdhi6_dma_kick(host);
+		if (!mrq->stop)
+			host->wait = USDHI6_WAIT_FOR_DMA;
+		else if (usdhi6_stop_cmd(host) < 0)
+			return false;
+	} else if (mrq->data->flags & MMC_DATA_READ) {
+		if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+		    (cmd->opcode == SD_IO_RW_EXTENDED &&
+		     mrq->data->blocks > 1))
+			host->wait = USDHI6_WAIT_FOR_MREAD;
+		else
+			host->wait = USDHI6_WAIT_FOR_READ;
+	} else {
+		if (cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+		    (cmd->opcode == SD_IO_RW_EXTENDED &&
+		     mrq->data->blocks > 1))
+			host->wait = USDHI6_WAIT_FOR_MWRITE;
+		else
+			host->wait = USDHI6_WAIT_FOR_WRITE;
+	}
+
+	return true;
+}
+
+static bool usdhi6_read_block(struct usdhi6_host *host)
+{
+	/* ACCESS_END IRQ is already unmasked */
+	int ret = usdhi6_blk_read(host);
+
+	/*
+	 * Have to force unmapping both pages: the single block could have been
+	 * cross-page, in which case for single-block IO host->page_idx == 0.
+	 * So, if we don't force, the second page won't be unmapped.
+	 */
+	usdhi6_sg_unmap(host, true);
+
+	if (ret < 0)
+		return false;
+
+	host->wait = USDHI6_WAIT_FOR_DATA_END;
+	return true;
+}
+
+static bool usdhi6_mread_block(struct usdhi6_host *host)
+{
+	int ret = usdhi6_blk_read(host);
+
+	if (ret < 0)
+		return false;
+
+	usdhi6_sg_advance(host);
+
+	return !host->mrq->data->error &&
+		(host->wait != USDHI6_WAIT_FOR_DATA_END || !host->mrq->stop);
+}
+
+static bool usdhi6_write_block(struct usdhi6_host *host)
+{
+	int ret = usdhi6_blk_write(host);
+
+	/* See comment in usdhi6_read_block() */
+	usdhi6_sg_unmap(host, true);
+
+	if (ret < 0)
+		return false;
+
+	host->wait = USDHI6_WAIT_FOR_DATA_END;
+	return true;
+}
+
+static bool usdhi6_mwrite_block(struct usdhi6_host *host)
+{
+	int ret = usdhi6_blk_write(host);
+
+	if (ret < 0)
+		return false;
+
+	usdhi6_sg_advance(host);
+
+	return !host->mrq->data->error &&
+		(host->wait != USDHI6_WAIT_FOR_DATA_END || !host->mrq->stop);
+}
+
+/*			Interrupt & timeout handlers			*/
+
+static irqreturn_t usdhi6_sd_bh(int irq, void *dev_id)
+{
+	struct usdhi6_host *host = dev_id;
+	struct mmc_request *mrq;
+	struct mmc_command *cmd;
+	struct mmc_data *data;
+	bool io_wait = false;
+
+	cancel_delayed_work_sync(&host->timeout_work);
+
+	mrq = host->mrq;
+	if (!mrq)
+		return IRQ_HANDLED;
+
+	cmd = mrq->cmd;
+	data = mrq->data;
+
+	switch (host->wait) {
+	case USDHI6_WAIT_FOR_REQUEST:
+		/* We're too late, the timeout has already kicked in */
+		return IRQ_HANDLED;
+	case USDHI6_WAIT_FOR_CMD:
+		/* Wait for data? */
+		io_wait = usdhi6_end_cmd(host);
+		break;
+	case USDHI6_WAIT_FOR_MREAD:
+		/* Wait for more data? */
+		io_wait = usdhi6_mread_block(host);
+		break;
+	case USDHI6_WAIT_FOR_READ:
+		/* Wait for data end? */
+		io_wait = usdhi6_read_block(host);
+		break;
+	case USDHI6_WAIT_FOR_MWRITE:
+		/* Wait data to write? */
+		io_wait = usdhi6_mwrite_block(host);
+		break;
+	case USDHI6_WAIT_FOR_WRITE:
+		/* Wait for data end? */
+		io_wait = usdhi6_write_block(host);
+		break;
+	case USDHI6_WAIT_FOR_DMA:
+		usdhi6_dma_check_error(host);
+		break;
+	case USDHI6_WAIT_FOR_STOP:
+		usdhi6_write(host, USDHI6_SD_STOP, 0);
+		if (host->io_error) {
+			int ret = usdhi6_error_code(host);
+			if (mrq->stop)
+				mrq->stop->error = ret;
+			else
+				mrq->data->error = ret;
+			dev_warn(mmc_dev(host->mmc), "%s(): %d\n", __func__, ret);
+			break;
+		}
+		usdhi6_resp_cmd12(host);
+		mrq->stop->error = 0;
+		break;
+	case USDHI6_WAIT_FOR_DATA_END:
+		if (host->io_error) {
+			mrq->data->error = usdhi6_error_code(host);
+			dev_warn(mmc_dev(host->mmc), "%s(): %d\n", __func__,
+				 mrq->data->error);
+		}
+		break;
+	default:
+		cmd->error = -EFAULT;
+		dev_err(mmc_dev(host->mmc), "Invalid state %u\n", host->wait);
+		usdhi6_request_done(host);
+		return IRQ_HANDLED;
+	}
+
+	if (io_wait) {
+		schedule_delayed_work(&host->timeout_work, host->timeout);
+		/* Wait for more data or ACCESS_END */
+		if (!host->dma_active)
+			usdhi6_wait_for_brwe(host, mrq->data->flags & MMC_DATA_READ);
+		return IRQ_HANDLED;
+	}
+
+	if (!cmd->error) {
+		if (data) {
+			if (!data->error) {
+				if (host->wait != USDHI6_WAIT_FOR_STOP &&
+				    host->mrq->stop &&
+				    !host->mrq->stop->error &&
+				    !usdhi6_stop_cmd(host)) {
+					/* Sending STOP */
+					usdhi6_wait_for_resp(host);
+
+					schedule_delayed_work(&host->timeout_work,
+							      host->timeout);
+
+					return IRQ_HANDLED;
+				}
+
+				data->bytes_xfered = data->blocks * data->blksz;
+			} else {
+				/* Data error: might need to unmap the last page */
+				dev_warn(mmc_dev(host->mmc), "%s(): data error %d\n",
+					 __func__, data->error);
+				usdhi6_sg_unmap(host, true);
+			}
+		} else if (cmd->opcode == MMC_APP_CMD) {
+			host->app_cmd = true;
+		}
+	}
+
+	usdhi6_request_done(host);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t usdhi6_sd(int irq, void *dev_id)
+{
+	struct usdhi6_host *host = dev_id;
+	u16 status, status2, error;
+
+	status = usdhi6_read(host, USDHI6_SD_INFO1) & ~host->status_mask &
+		~USDHI6_SD_INFO1_CARD;
+	status2 = usdhi6_read(host, USDHI6_SD_INFO2) & ~host->status2_mask;
+
+	usdhi6_only_cd(host);
+
+	dev_dbg(mmc_dev(host->mmc),
+		"IRQ status = 0x%08x, status2 = 0x%08x\n", status, status2);
+
+	if (!status && !status2)
+		return IRQ_NONE;
+
+	error = status2 & USDHI6_SD_INFO2_ERR;
+
+	/* Ack / clear interrupts */
+	if (USDHI6_SD_INFO1_IRQ & status)
+		usdhi6_write(host, USDHI6_SD_INFO1,
+			     0xffff & ~(USDHI6_SD_INFO1_IRQ & status));
+
+	if (USDHI6_SD_INFO2_IRQ & status2) {
+		if (error)
+			/* In error cases BWE and BRE aren't cleared automatically */
+			status2 |= USDHI6_SD_INFO2_BWE | USDHI6_SD_INFO2_BRE;
+
+		usdhi6_write(host, USDHI6_SD_INFO2,
+			     0xffff & ~(USDHI6_SD_INFO2_IRQ & status2));
+	}
+
+	host->io_error = error;
+	host->irq_status = status;
+
+	if (error) {
+		/* Don't pollute the log with unsupported command timeouts */
+		if (host->wait != USDHI6_WAIT_FOR_CMD ||
+		    error != USDHI6_SD_INFO2_RSP_TOUT)
+			dev_warn(mmc_dev(host->mmc),
+				 "%s(): INFO2 error bits 0x%08x\n",
+				 __func__, error);
+		else
+			dev_dbg(mmc_dev(host->mmc),
+				"%s(): INFO2 error bits 0x%08x\n",
+				__func__, error);
+	}
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t usdhi6_sdio(int irq, void *dev_id)
+{
+	struct usdhi6_host *host = dev_id;
+	u32 status = usdhi6_read(host, USDHI6_SDIO_INFO1) & ~host->sdio_mask;
+
+	dev_dbg(mmc_dev(host->mmc), "%s(): status 0x%x\n", __func__, status);
+
+	if (!status)
+		return IRQ_NONE;
+
+	usdhi6_write(host, USDHI6_SDIO_INFO1, ~status);
+
+	mmc_signal_sdio_irq(host->mmc);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t usdhi6_cd(int irq, void *dev_id)
+{
+	struct usdhi6_host *host = dev_id;
+	struct mmc_host *mmc = host->mmc;
+	u16 status;
+
+	/* We're only interested in hotplug events here */
+	status = usdhi6_read(host, USDHI6_SD_INFO1) & ~host->status_mask &
+		USDHI6_SD_INFO1_CARD;
+
+	if (!status)
+		return IRQ_NONE;
+
+	/* Ack */
+	usdhi6_write(host, USDHI6_SD_INFO1, !status);
+
+	if (!work_pending(&mmc->detect.work) &&
+	    (((status & USDHI6_SD_INFO1_CARD_INSERT) &&
+	      !mmc->card) ||
+	     ((status & USDHI6_SD_INFO1_CARD_EJECT) &&
+	      mmc->card)))
+		mmc_detect_change(mmc, msecs_to_jiffies(100));
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Actually this should not be needed, if the built-in timeout works reliably in
+ * the both PIO cases and DMA never fails. But if DMA does fail, a timeout
+ * handler might be the only way to catch the error.
+ */
+static void usdhi6_timeout_work(struct work_struct *work)
+{
+	struct delayed_work *d = container_of(work, struct delayed_work, work);
+	struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work);
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_data *data = mrq ? mrq->data : NULL;
+
+	dev_warn(mmc_dev(host->mmc),
+		 "%s timeout wait %u CMD%d: IRQ 0x%08x:0x%08x, last IRQ 0x%08x\n",
+		 host->dma_active ? "DMA" : "PIO",
+		 host->wait, mrq ? mrq->cmd->opcode : -1,
+		 usdhi6_read(host, USDHI6_SD_INFO1),
+		 usdhi6_read(host, USDHI6_SD_INFO2), host->irq_status);
+
+	if (host->dma_active) {
+		usdhi6_dma_kill(host);
+		usdhi6_dma_stop_unmap(host);
+	}
+
+	switch (host->wait) {
+	default:
+		dev_err(mmc_dev(host->mmc), "Invalid state %u\n", host->wait);
+		/* mrq can be NULL in this actually impossible case */
+	case USDHI6_WAIT_FOR_CMD:
+		usdhi6_error_code(host);
+		if (mrq)
+			mrq->cmd->error = -ETIMEDOUT;
+		break;
+	case USDHI6_WAIT_FOR_STOP:
+		usdhi6_error_code(host);
+		mrq->stop->error = -ETIMEDOUT;
+		break;
+	case USDHI6_WAIT_FOR_DMA:
+	case USDHI6_WAIT_FOR_MREAD:
+	case USDHI6_WAIT_FOR_MWRITE:
+	case USDHI6_WAIT_FOR_READ:
+	case USDHI6_WAIT_FOR_WRITE:
+		dev_dbg(mmc_dev(host->mmc),
+			"%c: page #%u @ +0x%zx %ux%u in SG%u. Current SG %u bytes @ %u\n",
+			data->flags & MMC_DATA_READ ? 'R' : 'W', host->page_idx,
+			host->offset, data->blocks, data->blksz, data->sg_len,
+			sg_dma_len(host->sg), host->sg->offset);
+		usdhi6_sg_unmap(host, true);
+		/*
+		 * If USDHI6_WAIT_FOR_DATA_END times out, we have already unmapped
+		 * the page
+		 */
+	case USDHI6_WAIT_FOR_DATA_END:
+		usdhi6_error_code(host);
+		data->error = -ETIMEDOUT;
+	}
+
+	if (mrq)
+		usdhi6_request_done(host);
+}
+
+/*			 Probe / release				*/
+
+static const struct of_device_id usdhi6_of_match[] = {
+	{.compatible = "renesas,usdhi6rol0"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, usdhi6_of_match);
+
+static int usdhi6_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mmc_host *mmc;
+	struct usdhi6_host *host;
+	struct resource *res;
+	int irq_cd, irq_sd, irq_sdio;
+	u32 version;
+	int ret;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	irq_cd = platform_get_irq_byname(pdev, "card detect");
+	irq_sd = platform_get_irq_byname(pdev, "data");
+	irq_sdio = platform_get_irq_byname(pdev, "SDIO");
+	if (irq_sd < 0 || irq_sdio < 0)
+		return -ENODEV;
+
+	mmc = mmc_alloc_host(sizeof(struct usdhi6_host), dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	ret = mmc_of_parse(mmc);
+	if (ret < 0)
+		goto e_free_mmc;
+
+	mmc_regulator_get_supply(mmc);
+
+	host		= mmc_priv(mmc);
+	host->mmc	= mmc;
+	host->wait	= USDHI6_WAIT_FOR_REQUEST;
+	host->timeout	= msecs_to_jiffies(4000);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	host->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(host->base)) {
+		ret = PTR_ERR(host->base);
+		goto e_free_mmc;
+	}
+
+	host->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(host->clk))
+		goto e_free_mmc;
+
+	host->imclk = clk_get_rate(host->clk);
+
+	ret = clk_prepare_enable(host->clk);
+	if (ret < 0)
+		goto e_free_mmc;
+
+	version = usdhi6_read(host, USDHI6_VERSION);
+	if ((version & 0xfff) != 0xa0d) {
+		dev_err(dev, "Version not recognized %x\n", version);
+		goto e_clk_off;
+	}
+
+	dev_info(dev, "A USDHI6ROL0 SD host detected with %d ports\n",
+		 usdhi6_read(host, USDHI6_SD_PORT_SEL) >> USDHI6_SD_PORT_SEL_PORTS_SHIFT);
+
+	usdhi6_mask_all(host);
+
+	if (irq_cd >= 0) {
+		ret = devm_request_irq(dev, irq_cd, usdhi6_cd, 0,
+				       dev_name(dev), host);
+		if (ret < 0)
+			goto e_clk_off;
+	} else {
+		mmc->caps |= MMC_CAP_NEEDS_POLL;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq_sd, usdhi6_sd, usdhi6_sd_bh, 0,
+			       dev_name(dev), host);
+	if (ret < 0)
+		goto e_clk_off;
+
+	ret = devm_request_irq(dev, irq_sdio, usdhi6_sdio, 0,
+			       dev_name(dev), host);
+	if (ret < 0)
+		goto e_clk_off;
+
+	INIT_DELAYED_WORK(&host->timeout_work, usdhi6_timeout_work);
+
+	usdhi6_dma_request(host, res->start);
+
+	mmc->ops = &usdhi6_ops;
+	mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
+		MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_SDIO_IRQ;
+	/* Set .max_segs to some random number. Feel free to adjust. */
+	mmc->max_segs = 32;
+	mmc->max_blk_size = 512;
+	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
+	/*
+	 * Setting .max_seg_size to 1 page would simplify our page-mapping code,
+	 * But OTOH, having large segments makes DMA more efficient. We could
+	 * check, whether we managed to get DMA and fall back to 1 page
+	 * segments, but if we do manage to obtain DMA and then it fails at
+	 * run-time and we fall back to PIO, we will continue getting large
+	 * segments. So, we wouldn't be able to get rid of the code anyway.
+	 */
+	mmc->max_seg_size = mmc->max_req_size;
+	if (!mmc->f_max)
+		mmc->f_max = host->imclk;
+	mmc->f_min = host->imclk / 512;
+
+	platform_set_drvdata(pdev, host);
+
+	ret = mmc_add_host(mmc);
+	if (ret < 0)
+		goto e_clk_off;
+
+	return 0;
+
+e_clk_off:
+	clk_disable_unprepare(host->clk);
+e_free_mmc:
+	mmc_free_host(mmc);
+
+	return ret;
+}
+
+static int usdhi6_remove(struct platform_device *pdev)
+{
+	struct usdhi6_host *host = platform_get_drvdata(pdev);
+
+	mmc_remove_host(host->mmc);
+
+	usdhi6_mask_all(host);
+	cancel_delayed_work_sync(&host->timeout_work);
+	usdhi6_dma_release(host);
+	clk_disable_unprepare(host->clk);
+	mmc_free_host(host->mmc);
+
+	return 0;
+}
+
+static struct platform_driver usdhi6_driver = {
+	.probe		= usdhi6_probe,
+	.remove		= usdhi6_remove,
+	.driver		= {
+		.name	= "usdhi6rol0",
+		.owner	= THIS_MODULE,
+		.of_match_table = usdhi6_of_match,
+	},
+};
+
+module_platform_driver(usdhi6_driver);
+
+MODULE_DESCRIPTION("Renesas usdhi6rol0 SD/SDIO host driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:usdhi6rol0");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");

diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 498d1f7..282891a 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c

@@ -840,7 +840,7 @@
 	priv->dma_desc_buffer = dma_alloc_coherent(&pdev->dev,
 						   mmc->max_blk_count * 16,
 						   &priv->dma_desc_device_addr,
-						   208);
+						   GFP_KERNEL);
 	if (!priv->dma_desc_buffer) {
 		dev_err(&pdev->dev, "DMA alloc fail\n");
 		ret = -EPERM;

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 5d49a21..94b8210 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig

@@ -321,6 +321,8 @@
 
 source "drivers/mtd/lpddr/Kconfig"
 
+source "drivers/mtd/spi-nor/Kconfig"
+
 source "drivers/mtd/ubi/Kconfig"
 
 endif # MTD

diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 4cfb31e..99bb9a1 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile

@@ -32,4 +32,5 @@
 
 obj-y		+= chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/
 
+obj-$(CONFIG_MTD_SPI_NOR)	+= spi-nor/
 obj-$(CONFIG_MTD_UBI)		+= ubi/

diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index e4696b3..9f02c28 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig

@@ -169,33 +169,33 @@
 	  in the programming of OTP bits will waste them.
 
 config MTD_CFI_INTELEXT
-	tristate "Support for Intel/Sharp flash chips"
+	tristate "Support for CFI command set 0001 (Intel/Sharp chips)"
 	depends on MTD_GEN_PROBE
 	select MTD_CFI_UTIL
 	help
 	  The Common Flash Interface defines a number of different command
 	  sets which a CFI-compliant chip may claim to implement. This code
-	  provides support for one of those command sets, used on Intel
-	  StrataFlash and other parts.
+	  provides support for command set 0001, used on Intel StrataFlash
+	  and other parts.
 
 config MTD_CFI_AMDSTD
-	tristate "Support for AMD/Fujitsu/Spansion flash chips"
+	tristate "Support for CFI command set 0002 (AMD/Fujitsu/Spansion chips)"
 	depends on MTD_GEN_PROBE
 	select MTD_CFI_UTIL
 	help
 	  The Common Flash Interface defines a number of different command
 	  sets which a CFI-compliant chip may claim to implement. This code
-	  provides support for one of those command sets, used on chips
-	  including the AMD Am29LV320.
+	  provides support for command set 0002, used on chips including
+	  the AMD Am29LV320.
 
 config MTD_CFI_STAA
-	tristate "Support for ST (Advanced Architecture) flash chips"
+	tristate "Support for CFI command set 0020 (ST (Advanced Architecture) chips)"
 	depends on MTD_GEN_PROBE
 	select MTD_CFI_UTIL
 	help
 	  The Common Flash Interface defines a number of different command
 	  sets which a CFI-compliant chip may claim to implement. This code
-	  provides support for one of those command sets.
+	  provides support for command set 0020.
 
 config MTD_CFI_UTIL
 	tristate

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 6293855..423666b 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c

@@ -961,7 +961,7 @@
 			chipnum++;
 
 			if (chipnum >= cfi->numchips)
-			break;
+				break;
 		}
 	}
 
@@ -1170,7 +1170,7 @@
 			chipnum++;
 
 			if (chipnum >= cfi->numchips)
-			break;
+				break;
 		}
 	}
 	return 0;

diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 08049f6..09c79bd 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c

@@ -239,7 +239,7 @@
 			chipnum++;
 
 			if (chipnum >= cfi->numchips)
-			break;
+				break;
 		}
 	}
 

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 1210bc2..c49d0b1 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig

@@ -80,7 +80,7 @@
 
 config MTD_M25P80
 	tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
-	depends on SPI_MASTER
+	depends on SPI_MASTER && MTD_SPI_NOR
 	help
 	  This enables access to most modern SPI flash chips, used for
 	  program and data storage.   Series supported include Atmel AT26DF,
@@ -212,7 +212,7 @@
 
 config MTD_ST_SPI_FSM
 	tristate "ST Microelectronics SPI FSM Serial Flash Controller"
-	depends on ARM || SH
+	depends on ARCH_STI
 	help
 	  This provides an MTD device driver for the ST Microelectronics
 	  SPI Fast Sequence Mode (FSM) Serial Flash Controller and support

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index dd5e101..91a169c 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c

@@ -1608,8 +1608,8 @@
 #define FLOOR_SYSFS(id) { \
 	__ATTR(f##id##_dps0_is_keylocked, S_IRUGO, dps0_is_key_locked, NULL), \
 	__ATTR(f##id##_dps1_is_keylocked, S_IRUGO, dps1_is_key_locked, NULL), \
-	__ATTR(f##id##_dps0_protection_key, S_IWUGO, NULL, dps0_insert_key), \
-	__ATTR(f##id##_dps1_protection_key, S_IWUGO, NULL, dps1_insert_key), \
+	__ATTR(f##id##_dps0_protection_key, S_IWUSR|S_IWGRP, NULL, dps0_insert_key), \
+	__ATTR(f##id##_dps1_protection_key, S_IWUSR|S_IWGRP, NULL, dps1_insert_key), \
 }
 
 static struct device_attribute doc_sys_attrs[DOC_MAX_NBFLOORS][4] = {

diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
index 1fd4a0f..7df8694 100644
--- a/drivers/mtd/devices/elm.c
+++ b/drivers/mtd/devices/elm.c

@@ -213,6 +213,28 @@
 				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
 				elm_write_reg(info, offset, val);
 				break;
+			case BCH16_ECC:
+				val = cpu_to_be32(*(u32 *) &ecc[22]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[18]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[14]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[10]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[6]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[2]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+				elm_write_reg(info, offset, val);
+				break;
 			default:
 				pr_err("invalid config bch_type\n");
 			}
@@ -418,6 +440,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 /**
  * elm_context_save
  * saves ELM configurations to preserve them across Hardware powered-down
@@ -435,6 +458,13 @@
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			regs->elm_syndrome_fragment_6[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_6 + offset);
+			regs->elm_syndrome_fragment_5[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_5 + offset);
+			regs->elm_syndrome_fragment_4[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_4 + offset);
 		case BCH8_ECC:
 			regs->elm_syndrome_fragment_3[i] = elm_read_reg(info,
 					ELM_SYNDROME_FRAGMENT_3 + offset);
@@ -473,6 +503,13 @@
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_6 + offset,
+					regs->elm_syndrome_fragment_6[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_5 + offset,
+					regs->elm_syndrome_fragment_5[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_4 + offset,
+					regs->elm_syndrome_fragment_4[i]);
 		case BCH8_ECC:
 			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset,
 					regs->elm_syndrome_fragment_3[i]);
@@ -509,6 +546,7 @@
 	elm_context_restore(info);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(elm_pm_ops, elm_suspend, elm_resume);
 

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 524dab3..ed7e0a1b 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c

@@ -19,485 +19,98 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/math64.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mod_devicetable.h>
 
-#include <linux/mtd/cfi.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
-#include <linux/of_platform.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
+#include <linux/mtd/spi-nor.h>
 
-/* Flash opcodes. */
-#define	OPCODE_WREN		0x06	/* Write enable */
-#define	OPCODE_RDSR		0x05	/* Read status register */
-#define	OPCODE_WRSR		0x01	/* Write status register 1 byte */
-#define	OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ        0x3b    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ        0x6b    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
-#define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
-#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
-#define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
-#define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
-#define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
-#define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
-#define	OPCODE_RDCR             0x35    /* Read configuration register */
-
-/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ_4B	0x3c    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ_4B	0x6c    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
-#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
-
-/* Used for SST flashes only. */
-#define	OPCODE_BP		0x02	/* Byte program */
-#define	OPCODE_WRDI		0x04	/* Write disable */
-#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
-
-/* Used for Macronix and Winbond flashes. */
-#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
-#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
-
-/* Used for Spansion flashes only. */
-#define	OPCODE_BRWR		0x17	/* Bank register write */
-
-/* Status Register bits. */
-#define	SR_WIP			1	/* Write in progress */
-#define	SR_WEL			2	/* Write enable latch */
-/* meaning of other SR_* bits may differ between vendors */
-#define	SR_BP0			4	/* Block protect 0 */
-#define	SR_BP1			8	/* Block protect 1 */
-#define	SR_BP2			0x10	/* Block protect 2 */
-#define	SR_SRWD			0x80	/* SR write protect */
-
-#define SR_QUAD_EN_MX           0x40    /* Macronix Quad I/O */
-
-/* Configuration Register bits. */
-#define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
-
-/* Define max times to check status register before we give up. */
-#define	MAX_READY_WAIT_JIFFIES	(40 * HZ)	/* M25P16 specs 40s max chip erase */
 #define	MAX_CMD_SIZE		6
-
-#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
-
-/****************************************************************************/
-
-enum read_type {
-	M25P80_NORMAL = 0,
-	M25P80_FAST,
-	M25P80_DUAL,
-	M25P80_QUAD,
-};
-
 struct m25p {
 	struct spi_device	*spi;
-	struct mutex		lock;
+	struct spi_nor		spi_nor;
 	struct mtd_info		mtd;
-	u16			page_size;
-	u16			addr_width;
-	u8			erase_opcode;
-	u8			read_opcode;
-	u8			program_opcode;
-	u8			*command;
-	enum read_type		flash_read;
+	u8			command[MAX_CMD_SIZE];
 };
 
-static inline struct m25p *mtd_to_m25p(struct mtd_info *mtd)
+static int m25p80_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
 {
-	return container_of(mtd, struct m25p, mtd);
-}
-
-/****************************************************************************/
-
-/*
- * Internal helper functions
- */
-
-/*
- * Read the status register, returning its value in the location
- * Return the status register value.
- * Returns negative if error occurred.
- */
-static int read_sr(struct m25p *flash)
-{
-	ssize_t retval;
-	u8 code = OPCODE_RDSR;
-	u8 val;
-
-	retval = spi_write_then_read(flash->spi, &code, 1, &val, 1);
-
-	if (retval < 0) {
-		dev_err(&flash->spi->dev, "error %d reading SR\n",
-				(int) retval);
-		return retval;
-	}
-
-	return val;
-}
-
-/*
- * Read configuration register, returning its value in the
- * location. Return the configuration register value.
- * Returns negative if error occured.
- */
-static int read_cr(struct m25p *flash)
-{
-	u8 code = OPCODE_RDCR;
+	struct m25p *flash = nor->priv;
+	struct spi_device *spi = flash->spi;
 	int ret;
-	u8 val;
 
-	ret = spi_write_then_read(flash->spi, &code, 1, &val, 1);
-	if (ret < 0) {
-		dev_err(&flash->spi->dev, "error %d reading CR\n", ret);
-		return ret;
-	}
+	ret = spi_write_then_read(spi, &code, 1, val, len);
+	if (ret < 0)
+		dev_err(&spi->dev, "error %d reading %x\n", ret, code);
 
-	return val;
+	return ret;
 }
 
-/*
- * Write status register 1 byte
- * Returns negative if error occurred.
- */
-static int write_sr(struct m25p *flash, u8 val)
-{
-	flash->command[0] = OPCODE_WRSR;
-	flash->command[1] = val;
-
-	return spi_write(flash->spi, flash->command, 2);
-}
-
-/*
- * Set write enable latch with Write Enable command.
- * Returns negative if error occurred.
- */
-static inline int write_enable(struct m25p *flash)
-{
-	u8	code = OPCODE_WREN;
-
-	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
-}
-
-/*
- * Send write disble instruction to the chip.
- */
-static inline int write_disable(struct m25p *flash)
-{
-	u8	code = OPCODE_WRDI;
-
-	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
-}
-
-/*
- * Enable/disable 4-byte addressing mode.
- */
-static inline int set_4byte(struct m25p *flash, u32 jedec_id, int enable)
-{
-	int status;
-	bool need_wren = false;
-
-	switch (JEDEC_MFR(jedec_id)) {
-	case CFI_MFR_ST: /* Micron, actually */
-		/* Some Micron need WREN command; all will accept it */
-		need_wren = true;
-	case CFI_MFR_MACRONIX:
-	case 0xEF /* winbond */:
-		if (need_wren)
-			write_enable(flash);
-
-		flash->command[0] = enable ? OPCODE_EN4B : OPCODE_EX4B;
-		status = spi_write(flash->spi, flash->command, 1);
-
-		if (need_wren)
-			write_disable(flash);
-
-		return status;
-	default:
-		/* Spansion style */
-		flash->command[0] = OPCODE_BRWR;
-		flash->command[1] = enable << 7;
-		return spi_write(flash->spi, flash->command, 2);
-	}
-}
-
-/*
- * Service routine to read status register until ready, or timeout occurs.
- * Returns non-zero if error.
- */
-static int wait_till_ready(struct m25p *flash)
-{
-	unsigned long deadline;
-	int sr;
-
-	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
-
-	do {
-		if ((sr = read_sr(flash)) < 0)
-			break;
-		else if (!(sr & SR_WIP))
-			return 0;
-
-		cond_resched();
-
-	} while (!time_after_eq(jiffies, deadline));
-
-	return 1;
-}
-
-/*
- * Write status Register and configuration register with 2 bytes
- * The first byte will be written to the status register, while the
- * second byte will be written to the configuration register.
- * Return negative if error occured.
- */
-static int write_sr_cr(struct m25p *flash, u16 val)
-{
-	flash->command[0] = OPCODE_WRSR;
-	flash->command[1] = val & 0xff;
-	flash->command[2] = (val >> 8);
-
-	return spi_write(flash->spi, flash->command, 3);
-}
-
-static int macronix_quad_enable(struct m25p *flash)
-{
-	int ret, val;
-	u8 cmd[2];
-	cmd[0] = OPCODE_WRSR;
-
-	val = read_sr(flash);
-	cmd[1] = val | SR_QUAD_EN_MX;
-	write_enable(flash);
-
-	spi_write(flash->spi, &cmd, 2);
-
-	if (wait_till_ready(flash))
-		return 1;
-
-	ret = read_sr(flash);
-	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
-		dev_err(&flash->spi->dev, "Macronix Quad bit not set\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int spansion_quad_enable(struct m25p *flash)
-{
-	int ret;
-	int quad_en = CR_QUAD_EN_SPAN << 8;
-
-	write_enable(flash);
-
-	ret = write_sr_cr(flash, quad_en);
-	if (ret < 0) {
-		dev_err(&flash->spi->dev,
-			"error while writing configuration register\n");
-		return -EINVAL;
-	}
-
-	/* read back and check it */
-	ret = read_cr(flash);
-	if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
-		dev_err(&flash->spi->dev, "Spansion Quad bit not set\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int set_quad_mode(struct m25p *flash, u32 jedec_id)
-{
-	int status;
-
-	switch (JEDEC_MFR(jedec_id)) {
-	case CFI_MFR_MACRONIX:
-		status = macronix_quad_enable(flash);
-		if (status) {
-			dev_err(&flash->spi->dev,
-				"Macronix quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	default:
-		status = spansion_quad_enable(flash);
-		if (status) {
-			dev_err(&flash->spi->dev,
-				"Spansion quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	}
-}
-
-/*
- * Erase the whole flash memory
- *
- * Returns 0 if successful, non-zero otherwise.
- */
-static int erase_chip(struct m25p *flash)
-{
-	pr_debug("%s: %s %lldKiB\n", dev_name(&flash->spi->dev), __func__,
-			(long long)(flash->mtd.size >> 10));
-
-	/* Wait until finished previous write command. */
-	if (wait_till_ready(flash))
-		return 1;
-
-	/* Send write enable, then erase commands. */
-	write_enable(flash);
-
-	/* Set up command buffer. */
-	flash->command[0] = OPCODE_CHIP_ERASE;
-
-	spi_write(flash->spi, flash->command, 1);
-
-	return 0;
-}
-
-static void m25p_addr2cmd(struct m25p *flash, unsigned int addr, u8 *cmd)
+static void m25p_addr2cmd(struct spi_nor *nor, unsigned int addr, u8 *cmd)
 {
 	/* opcode is in cmd[0] */
-	cmd[1] = addr >> (flash->addr_width * 8 -  8);
-	cmd[2] = addr >> (flash->addr_width * 8 - 16);
-	cmd[3] = addr >> (flash->addr_width * 8 - 24);
-	cmd[4] = addr >> (flash->addr_width * 8 - 32);
+	cmd[1] = addr >> (nor->addr_width * 8 -  8);
+	cmd[2] = addr >> (nor->addr_width * 8 - 16);
+	cmd[3] = addr >> (nor->addr_width * 8 - 24);
+	cmd[4] = addr >> (nor->addr_width * 8 - 32);
 }
 
-static int m25p_cmdsz(struct m25p *flash)
+static int m25p_cmdsz(struct spi_nor *nor)
 {
-	return 1 + flash->addr_width;
+	return 1 + nor->addr_width;
 }
 
-/*
- * Erase one sector of flash memory at offset ``offset'' which is any
- * address within the sector which should be erased.
- *
- * Returns 0 if successful, non-zero otherwise.
- */
-static int erase_sector(struct m25p *flash, u32 offset)
+static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
+			int wr_en)
 {
-	pr_debug("%s: %s %dKiB at 0x%08x\n", dev_name(&flash->spi->dev),
-			__func__, flash->mtd.erasesize / 1024, offset);
+	struct m25p *flash = nor->priv;
+	struct spi_device *spi = flash->spi;
 
-	/* Wait until finished previous write command. */
-	if (wait_till_ready(flash))
-		return 1;
+	flash->command[0] = opcode;
+	if (buf)
+		memcpy(&flash->command[1], buf, len);
 
-	/* Send write enable, then erase commands. */
-	write_enable(flash);
-
-	/* Set up command buffer. */
-	flash->command[0] = flash->erase_opcode;
-	m25p_addr2cmd(flash, offset, flash->command);
-
-	spi_write(flash->spi, flash->command, m25p_cmdsz(flash));
-
-	return 0;
+	return spi_write(spi, flash->command, len + 1);
 }
 
-/****************************************************************************/
-
-/*
- * MTD implementation
- */
-
-/*
- * Erase an address range on the flash chip.  The address range may extend
- * one or more erase sectors.  Return an error is there is a problem erasing.
- */
-static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
+static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
+			size_t *retlen, const u_char *buf)
 {
-	struct m25p *flash = mtd_to_m25p(mtd);
-	u32 addr,len;
-	uint32_t rem;
+	struct m25p *flash = nor->priv;
+	struct spi_device *spi = flash->spi;
+	struct spi_transfer t[2] = {};
+	struct spi_message m;
+	int cmd_sz = m25p_cmdsz(nor);
 
-	pr_debug("%s: %s at 0x%llx, len %lld\n", dev_name(&flash->spi->dev),
-			__func__, (long long)instr->addr,
-			(long long)instr->len);
+	spi_message_init(&m);
 
-	div_u64_rem(instr->len, mtd->erasesize, &rem);
-	if (rem)
-		return -EINVAL;
+	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
+		cmd_sz = 1;
 
-	addr = instr->addr;
-	len = instr->len;
+	flash->command[0] = nor->program_opcode;
+	m25p_addr2cmd(nor, to, flash->command);
 
-	mutex_lock(&flash->lock);
+	t[0].tx_buf = flash->command;
+	t[0].len = cmd_sz;
+	spi_message_add_tail(&t[0], &m);
 
-	/* whole-chip erase? */
-	if (len == flash->mtd.size) {
-		if (erase_chip(flash)) {
-			instr->state = MTD_ERASE_FAILED;
-			mutex_unlock(&flash->lock);
-			return -EIO;
-		}
+	t[1].tx_buf = buf;
+	t[1].len = len;
+	spi_message_add_tail(&t[1], &m);
 
-	/* REVISIT in some cases we could speed up erasing large regions
-	 * by using OPCODE_SE instead of OPCODE_BE_4K.  We may have set up
-	 * to use "small sector erase", but that's not always optimal.
-	 */
+	spi_sync(spi, &m);
 
-	/* "sector"-at-a-time erase */
-	} else {
-		while (len) {
-			if (erase_sector(flash, addr)) {
-				instr->state = MTD_ERASE_FAILED;
-				mutex_unlock(&flash->lock);
-				return -EIO;
-			}
-
-			addr += mtd->erasesize;
-			len -= mtd->erasesize;
-		}
-	}
-
-	mutex_unlock(&flash->lock);
-
-	instr->state = MTD_ERASE_DONE;
-	mtd_erase_callback(instr);
-
-	return 0;
+	*retlen += m.actual_length - cmd_sz;
 }
 
-/*
- * Dummy Cycle calculation for different type of read.
- * It can be used to support more commands with
- * different dummy cycle requirements.
- */
-static inline int m25p80_dummy_cycles_read(struct m25p *flash)
+static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor)
 {
-	switch (flash->flash_read) {
-	case M25P80_FAST:
-	case M25P80_DUAL:
-	case M25P80_QUAD:
-		return 1;
-	case M25P80_NORMAL:
-		return 0;
-	default:
-		dev_err(&flash->spi->dev, "No valid read type supported\n");
-		return -1;
-	}
-}
-
-static inline unsigned int m25p80_rx_nbits(const struct m25p *flash)
-{
-	switch (flash->flash_read) {
-	case M25P80_DUAL:
+	switch (nor->flash_read) {
+	case SPI_NOR_DUAL:
 		return 2;
-	case M25P80_QUAD:
+	case SPI_NOR_QUAD:
 		return 4;
 	default:
 		return 0;
@@ -505,590 +118,72 @@
 }
 
 /*
- * Read an address range from the flash chip.  The address range
+ * Read an address range from the nor chip.  The address range
  * may be any size provided it is within the physical boundaries.
  */
-static int m25p80_read(struct mtd_info *mtd, loff_t from, size_t len,
-	size_t *retlen, u_char *buf)
+static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
+			size_t *retlen, u_char *buf)
 {
-	struct m25p *flash = mtd_to_m25p(mtd);
+	struct m25p *flash = nor->priv;
+	struct spi_device *spi = flash->spi;
 	struct spi_transfer t[2];
 	struct spi_message m;
-	uint8_t opcode;
-	int dummy;
+	int dummy = nor->read_dummy;
+	int ret;
 
-	pr_debug("%s: %s from 0x%08x, len %zd\n", dev_name(&flash->spi->dev),
-			__func__, (u32)from, len);
+	/* Wait till previous write/erase is done. */
+	ret = nor->wait_till_ready(nor);
+	if (ret)
+		return ret;
 
 	spi_message_init(&m);
 	memset(t, 0, (sizeof t));
 
-	dummy =  m25p80_dummy_cycles_read(flash);
-	if (dummy < 0) {
-		dev_err(&flash->spi->dev, "No valid read command supported\n");
-		return -EINVAL;
-	}
+	flash->command[0] = nor->read_opcode;
+	m25p_addr2cmd(nor, from, flash->command);
 
 	t[0].tx_buf = flash->command;
-	t[0].len = m25p_cmdsz(flash) + dummy;
+	t[0].len = m25p_cmdsz(nor) + dummy;
 	spi_message_add_tail(&t[0], &m);
 
 	t[1].rx_buf = buf;
-	t[1].rx_nbits = m25p80_rx_nbits(flash);
+	t[1].rx_nbits = m25p80_rx_nbits(nor);
 	t[1].len = len;
 	spi_message_add_tail(&t[1], &m);
 
-	mutex_lock(&flash->lock);
+	spi_sync(spi, &m);
 
-	/* Wait till previous write/erase is done. */
-	if (wait_till_ready(flash)) {
-		/* REVISIT status return?? */
-		mutex_unlock(&flash->lock);
-		return 1;
-	}
-
-	/* Set up the write data buffer. */
-	opcode = flash->read_opcode;
-	flash->command[0] = opcode;
-	m25p_addr2cmd(flash, from, flash->command);
-
-	spi_sync(flash->spi, &m);
-
-	*retlen = m.actual_length - m25p_cmdsz(flash) - dummy;
-
-	mutex_unlock(&flash->lock);
-
+	*retlen = m.actual_length - m25p_cmdsz(nor) - dummy;
 	return 0;
 }
 
-/*
- * Write an address range to the flash chip.  Data must be written in
- * FLASH_PAGESIZE chunks.  The address range may be any size provided
- * it is within the physical boundaries.
- */
-static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
-	size_t *retlen, const u_char *buf)
+static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 {
-	struct m25p *flash = mtd_to_m25p(mtd);
-	u32 page_offset, page_size;
-	struct spi_transfer t[2];
-	struct spi_message m;
+	struct m25p *flash = nor->priv;
+	int ret;
 
-	pr_debug("%s: %s to 0x%08x, len %zd\n", dev_name(&flash->spi->dev),
-			__func__, (u32)to, len);
-
-	spi_message_init(&m);
-	memset(t, 0, (sizeof t));
-
-	t[0].tx_buf = flash->command;
-	t[0].len = m25p_cmdsz(flash);
-	spi_message_add_tail(&t[0], &m);
-
-	t[1].tx_buf = buf;
-	spi_message_add_tail(&t[1], &m);
-
-	mutex_lock(&flash->lock);
+	dev_dbg(nor->dev, "%dKiB at 0x%08x\n",
+		flash->mtd.erasesize / 1024, (u32)offset);
 
 	/* Wait until finished previous write command. */
-	if (wait_till_ready(flash)) {
-		mutex_unlock(&flash->lock);
-		return 1;
-	}
+	ret = nor->wait_till_ready(nor);
+	if (ret)
+		return ret;
 
-	write_enable(flash);
+	/* Send write enable, then erase commands. */
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
+	if (ret)
+		return ret;
 
-	/* Set up the opcode in the write buffer. */
-	flash->command[0] = flash->program_opcode;
-	m25p_addr2cmd(flash, to, flash->command);
+	/* Set up command buffer. */
+	flash->command[0] = nor->erase_opcode;
+	m25p_addr2cmd(nor, offset, flash->command);
 
-	page_offset = to & (flash->page_size - 1);
-
-	/* do all the bytes fit onto one page? */
-	if (page_offset + len <= flash->page_size) {
-		t[1].len = len;
-
-		spi_sync(flash->spi, &m);
-
-		*retlen = m.actual_length - m25p_cmdsz(flash);
-	} else {
-		u32 i;
-
-		/* the size of data remaining on the first page */
-		page_size = flash->page_size - page_offset;
-
-		t[1].len = page_size;
-		spi_sync(flash->spi, &m);
-
-		*retlen = m.actual_length - m25p_cmdsz(flash);
-
-		/* write everything in flash->page_size chunks */
-		for (i = page_size; i < len; i += page_size) {
-			page_size = len - i;
-			if (page_size > flash->page_size)
-				page_size = flash->page_size;
-
-			/* write the next page to flash */
-			m25p_addr2cmd(flash, to + i, flash->command);
-
-			t[1].tx_buf = buf + i;
-			t[1].len = page_size;
-
-			wait_till_ready(flash);
-
-			write_enable(flash);
-
-			spi_sync(flash->spi, &m);
-
-			*retlen += m.actual_length - m25p_cmdsz(flash);
-		}
-	}
-
-	mutex_unlock(&flash->lock);
+	spi_write(flash->spi, flash->command, m25p_cmdsz(nor));
 
 	return 0;
 }
 
-static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
-		size_t *retlen, const u_char *buf)
-{
-	struct m25p *flash = mtd_to_m25p(mtd);
-	struct spi_transfer t[2];
-	struct spi_message m;
-	size_t actual;
-	int cmd_sz, ret;
-
-	pr_debug("%s: %s to 0x%08x, len %zd\n", dev_name(&flash->spi->dev),
-			__func__, (u32)to, len);
-
-	spi_message_init(&m);
-	memset(t, 0, (sizeof t));
-
-	t[0].tx_buf = flash->command;
-	t[0].len = m25p_cmdsz(flash);
-	spi_message_add_tail(&t[0], &m);
-
-	t[1].tx_buf = buf;
-	spi_message_add_tail(&t[1], &m);
-
-	mutex_lock(&flash->lock);
-
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(flash);
-	if (ret)
-		goto time_out;
-
-	write_enable(flash);
-
-	actual = to % 2;
-	/* Start write from odd address. */
-	if (actual) {
-		flash->command[0] = OPCODE_BP;
-		m25p_addr2cmd(flash, to, flash->command);
-
-		/* write one byte. */
-		t[1].len = 1;
-		spi_sync(flash->spi, &m);
-		ret = wait_till_ready(flash);
-		if (ret)
-			goto time_out;
-		*retlen += m.actual_length - m25p_cmdsz(flash);
-	}
-	to += actual;
-
-	flash->command[0] = OPCODE_AAI_WP;
-	m25p_addr2cmd(flash, to, flash->command);
-
-	/* Write out most of the data here. */
-	cmd_sz = m25p_cmdsz(flash);
-	for (; actual < len - 1; actual += 2) {
-		t[0].len = cmd_sz;
-		/* write two bytes. */
-		t[1].len = 2;
-		t[1].tx_buf = buf + actual;
-
-		spi_sync(flash->spi, &m);
-		ret = wait_till_ready(flash);
-		if (ret)
-			goto time_out;
-		*retlen += m.actual_length - cmd_sz;
-		cmd_sz = 1;
-		to += 2;
-	}
-	write_disable(flash);
-	ret = wait_till_ready(flash);
-	if (ret)
-		goto time_out;
-
-	/* Write out trailing byte if it exists. */
-	if (actual != len) {
-		write_enable(flash);
-		flash->command[0] = OPCODE_BP;
-		m25p_addr2cmd(flash, to, flash->command);
-		t[0].len = m25p_cmdsz(flash);
-		t[1].len = 1;
-		t[1].tx_buf = buf + actual;
-
-		spi_sync(flash->spi, &m);
-		ret = wait_till_ready(flash);
-		if (ret)
-			goto time_out;
-		*retlen += m.actual_length - m25p_cmdsz(flash);
-		write_disable(flash);
-	}
-
-time_out:
-	mutex_unlock(&flash->lock);
-	return ret;
-}
-
-static int m25p80_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
-{
-	struct m25p *flash = mtd_to_m25p(mtd);
-	uint32_t offset = ofs;
-	uint8_t status_old, status_new;
-	int res = 0;
-
-	mutex_lock(&flash->lock);
-	/* Wait until finished previous command */
-	if (wait_till_ready(flash)) {
-		res = 1;
-		goto err;
-	}
-
-	status_old = read_sr(flash);
-
-	if (offset < flash->mtd.size-(flash->mtd.size/2))
-		status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0;
-	else if (offset < flash->mtd.size-(flash->mtd.size/4))
-		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
-	else if (offset < flash->mtd.size-(flash->mtd.size/8))
-		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
-	else if (offset < flash->mtd.size-(flash->mtd.size/16))
-		status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2;
-	else if (offset < flash->mtd.size-(flash->mtd.size/32))
-		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
-	else if (offset < flash->mtd.size-(flash->mtd.size/64))
-		status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1;
-	else
-		status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0;
-
-	/* Only modify protection if it will not unlock other areas */
-	if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) >
-					(status_old&(SR_BP2|SR_BP1|SR_BP0))) {
-		write_enable(flash);
-		if (write_sr(flash, status_new) < 0) {
-			res = 1;
-			goto err;
-		}
-	}
-
-err:	mutex_unlock(&flash->lock);
-	return res;
-}
-
-static int m25p80_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
-{
-	struct m25p *flash = mtd_to_m25p(mtd);
-	uint32_t offset = ofs;
-	uint8_t status_old, status_new;
-	int res = 0;
-
-	mutex_lock(&flash->lock);
-	/* Wait until finished previous command */
-	if (wait_till_ready(flash)) {
-		res = 1;
-		goto err;
-	}
-
-	status_old = read_sr(flash);
-
-	if (offset+len > flash->mtd.size-(flash->mtd.size/64))
-		status_new = status_old & ~(SR_BP2|SR_BP1|SR_BP0);
-	else if (offset+len > flash->mtd.size-(flash->mtd.size/32))
-		status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0;
-	else if (offset+len > flash->mtd.size-(flash->mtd.size/16))
-		status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1;
-	else if (offset+len > flash->mtd.size-(flash->mtd.size/8))
-		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
-	else if (offset+len > flash->mtd.size-(flash->mtd.size/4))
-		status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2;
-	else if (offset+len > flash->mtd.size-(flash->mtd.size/2))
-		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
-	else
-		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
-
-	/* Only modify protection if it will not lock other areas */
-	if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) <
-					(status_old&(SR_BP2|SR_BP1|SR_BP0))) {
-		write_enable(flash);
-		if (write_sr(flash, status_new) < 0) {
-			res = 1;
-			goto err;
-		}
-	}
-
-err:	mutex_unlock(&flash->lock);
-	return res;
-}
-
-/****************************************************************************/
-
-/*
- * SPI device driver setup and teardown
- */
-
-struct flash_info {
-	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
-	 * a high byte of zero plus three data bytes: the manufacturer id,
-	 * then a two byte device id.
-	 */
-	u32		jedec_id;
-	u16             ext_id;
-
-	/* The size listed here is what works with OPCODE_SE, which isn't
-	 * necessarily called a "sector" by the vendor.
-	 */
-	unsigned	sector_size;
-	u16		n_sectors;
-
-	u16		page_size;
-	u16		addr_width;
-
-	u16		flags;
-#define	SECT_4K		0x01		/* OPCODE_BE_4K works uniformly */
-#define	M25P_NO_ERASE	0x02		/* No erase command needed */
-#define	SST_WRITE	0x04		/* use SST byte programming */
-#define	M25P_NO_FR	0x08		/* Can't do fastread */
-#define	SECT_4K_PMC	0x10		/* OPCODE_BE_4K_PMC works uniformly */
-#define	M25P80_DUAL_READ	0x20    /* Flash supports Dual Read */
-#define	M25P80_QUAD_READ	0x40    /* Flash supports Quad Read */
-};
-
-#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
-	((kernel_ulong_t)&(struct flash_info) {				\
-		.jedec_id = (_jedec_id),				\
-		.ext_id = (_ext_id),					\
-		.sector_size = (_sector_size),				\
-		.n_sectors = (_n_sectors),				\
-		.page_size = 256,					\
-		.flags = (_flags),					\
-	})
-
-#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags)	\
-	((kernel_ulong_t)&(struct flash_info) {				\
-		.sector_size = (_sector_size),				\
-		.n_sectors = (_n_sectors),				\
-		.page_size = (_page_size),				\
-		.addr_width = (_addr_width),				\
-		.flags = (_flags),					\
-	})
-
-/* NOTE: double check command sets and memory organization when you add
- * more flash chips.  This current list focusses on newer chips, which
- * have been converging on command sets which including JEDEC ID.
- */
-static const struct spi_device_id m25p_ids[] = {
-	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
-	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
-	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
-
-	{ "at25df041a", INFO(0x1f4401, 0, 64 * 1024,   8, SECT_4K) },
-	{ "at25df321a", INFO(0x1f4701, 0, 64 * 1024,  64, SECT_4K) },
-	{ "at25df641",  INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) },
-
-	{ "at26f004",   INFO(0x1f0400, 0, 64 * 1024,  8, SECT_4K) },
-	{ "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) },
-	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
-	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
-
-	{ "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) },
-
-	/* EON -- en25xxx */
-	{ "en25f32",    INFO(0x1c3116, 0, 64 * 1024,   64, SECT_4K) },
-	{ "en25p32",    INFO(0x1c2016, 0, 64 * 1024,   64, 0) },
-	{ "en25q32b",   INFO(0x1c3016, 0, 64 * 1024,   64, 0) },
-	{ "en25p64",    INFO(0x1c2017, 0, 64 * 1024,  128, 0) },
-	{ "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
-	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
-
-	/* ESMT */
-	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
-
-	/* Everspin */
-	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, M25P_NO_ERASE | M25P_NO_FR) },
-	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, M25P_NO_ERASE | M25P_NO_FR) },
-
-	/* GigaDevice */
-	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
-
-	/* Intel/Numonyx -- xxxs33b */
-	{ "160s33b",  INFO(0x898911, 0, 64 * 1024,  32, 0) },
-	{ "320s33b",  INFO(0x898912, 0, 64 * 1024,  64, 0) },
-	{ "640s33b",  INFO(0x898913, 0, 64 * 1024, 128, 0) },
-
-	/* Macronix */
-	{ "mx25l2005a",  INFO(0xc22012, 0, 64 * 1024,   4, SECT_4K) },
-	{ "mx25l4005a",  INFO(0xc22013, 0, 64 * 1024,   8, SECT_4K) },
-	{ "mx25l8005",   INFO(0xc22014, 0, 64 * 1024,  16, 0) },
-	{ "mx25l1606e",  INFO(0xc22015, 0, 64 * 1024,  32, SECT_4K) },
-	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
-	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
-	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
-	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
-	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
-	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
-	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
-	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, M25P80_QUAD_READ) },
-	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, M25P80_QUAD_READ) },
-
-	/* Micron */
-	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
-	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
-	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
-	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
-	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
-
-	/* PMC */
-	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
-	{ "pm25lv010",   INFO(0,        0, 32 * 1024,    4, SECT_4K_PMC) },
-	{ "pm25lq032",   INFO(0x7f9d46, 0, 64 * 1024,   64, SECT_4K) },
-
-	/* Spansion -- single (large) sector size only, at least
-	 * for the chips listed here (without boot sectors).
-	 */
-	{ "s25sl032p",  INFO(0x010215, 0x4d00,  64 * 1024,  64, 0) },
-	{ "s25sl064p",  INFO(0x010216, 0x4d00,  64 * 1024, 128, 0) },
-	{ "s25fl256s0", INFO(0x010219, 0x4d00, 256 * 1024, 128, 0) },
-	{ "s25fl256s1", INFO(0x010219, 0x4d01,  64 * 1024, 512, M25P80_DUAL_READ | M25P80_QUAD_READ) },
-	{ "s25fl512s",  INFO(0x010220, 0x4d00, 256 * 1024, 256, M25P80_DUAL_READ | M25P80_QUAD_READ) },
-	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
-	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
-	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
-	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
-	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
-	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
-	{ "s25sl008a",  INFO(0x010213,      0,  64 * 1024,  16, 0) },
-	{ "s25sl016a",  INFO(0x010214,      0,  64 * 1024,  32, 0) },
-	{ "s25sl032a",  INFO(0x010215,      0,  64 * 1024,  64, 0) },
-	{ "s25sl064a",  INFO(0x010216,      0,  64 * 1024, 128, 0) },
-	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
-	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
-	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
-
-	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
-	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
-	{ "sst25vf080b", INFO(0xbf258e, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
-	{ "sst25vf016b", INFO(0xbf2541, 0, 64 * 1024, 32, SECT_4K | SST_WRITE) },
-	{ "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) },
-	{ "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) },
-	{ "sst25wf512",  INFO(0xbf2501, 0, 64 * 1024,  1, SECT_4K | SST_WRITE) },
-	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
-	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
-	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
-
-	/* ST Microelectronics -- newer production may have feature updates */
-	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
-	{ "m25p10",  INFO(0x202011,  0,  32 * 1024,   4, 0) },
-	{ "m25p20",  INFO(0x202012,  0,  64 * 1024,   4, 0) },
-	{ "m25p40",  INFO(0x202013,  0,  64 * 1024,   8, 0) },
-	{ "m25p80",  INFO(0x202014,  0,  64 * 1024,  16, 0) },
-	{ "m25p16",  INFO(0x202015,  0,  64 * 1024,  32, 0) },
-	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
-	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
-	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
-	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
-
-	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
-	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
-	{ "m25p20-nonjedec",  INFO(0, 0,  64 * 1024,   4, 0) },
-	{ "m25p40-nonjedec",  INFO(0, 0,  64 * 1024,   8, 0) },
-	{ "m25p80-nonjedec",  INFO(0, 0,  64 * 1024,  16, 0) },
-	{ "m25p16-nonjedec",  INFO(0, 0,  64 * 1024,  32, 0) },
-	{ "m25p32-nonjedec",  INFO(0, 0,  64 * 1024,  64, 0) },
-	{ "m25p64-nonjedec",  INFO(0, 0,  64 * 1024, 128, 0) },
-	{ "m25p128-nonjedec", INFO(0, 0, 256 * 1024,  64, 0) },
-
-	{ "m45pe10", INFO(0x204011,  0, 64 * 1024,    2, 0) },
-	{ "m45pe80", INFO(0x204014,  0, 64 * 1024,   16, 0) },
-	{ "m45pe16", INFO(0x204015,  0, 64 * 1024,   32, 0) },
-
-	{ "m25pe20", INFO(0x208012,  0, 64 * 1024,  4,       0) },
-	{ "m25pe80", INFO(0x208014,  0, 64 * 1024, 16,       0) },
-	{ "m25pe16", INFO(0x208015,  0, 64 * 1024, 32, SECT_4K) },
-
-	{ "m25px16",    INFO(0x207115,  0, 64 * 1024, 32, SECT_4K) },
-	{ "m25px32",    INFO(0x207116,  0, 64 * 1024, 64, SECT_4K) },
-	{ "m25px32-s0", INFO(0x207316,  0, 64 * 1024, 64, SECT_4K) },
-	{ "m25px32-s1", INFO(0x206316,  0, 64 * 1024, 64, SECT_4K) },
-	{ "m25px64",    INFO(0x207117,  0, 64 * 1024, 128, 0) },
-
-	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
-	{ "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
-	{ "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
-	{ "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
-	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
-	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
-	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
-	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
-	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
-	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
-	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
-	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
-	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
-
-	/* Catalyst / On Semiconductor -- non-JEDEC */
-	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, M25P_NO_ERASE | M25P_NO_FR) },
-	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2, M25P_NO_ERASE | M25P_NO_FR) },
-	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2, M25P_NO_ERASE | M25P_NO_FR) },
-	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2, M25P_NO_ERASE | M25P_NO_FR) },
-	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, M25P_NO_ERASE | M25P_NO_FR) },
-	{ },
-};
-MODULE_DEVICE_TABLE(spi, m25p_ids);
-
-static const struct spi_device_id *jedec_probe(struct spi_device *spi)
-{
-	int			tmp;
-	u8			code = OPCODE_RDID;
-	u8			id[5];
-	u32			jedec;
-	u16                     ext_jedec;
-	struct flash_info	*info;
-
-	/* JEDEC also defines an optional "extended device information"
-	 * string for after vendor-specific data, after the three bytes
-	 * we use here.  Supporting some chips might require using it.
-	 */
-	tmp = spi_write_then_read(spi, &code, 1, id, 5);
-	if (tmp < 0) {
-		pr_debug("%s: error %d reading JEDEC ID\n",
-				dev_name(&spi->dev), tmp);
-		return ERR_PTR(tmp);
-	}
-	jedec = id[0];
-	jedec = jedec << 8;
-	jedec |= id[1];
-	jedec = jedec << 8;
-	jedec |= id[2];
-
-	ext_jedec = id[3] << 8 | id[4];
-
-	for (tmp = 0; tmp < ARRAY_SIZE(m25p_ids) - 1; tmp++) {
-		info = (void *)m25p_ids[tmp].driver_data;
-		if (info->jedec_id == jedec) {
-			if (info->ext_id == 0 || info->ext_id == ext_jedec)
-				return &m25p_ids[tmp];
-		}
-	}
-	dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
-	return ERR_PTR(-ENODEV);
-}
-
-
 /*
  * board specific setup should have ensured the SPI clock used here
  * matches what the READ command supports, at least until this driver
@@ -1096,231 +191,45 @@
  */
 static int m25p_probe(struct spi_device *spi)
 {
-	const struct spi_device_id	*id = spi_get_device_id(spi);
-	struct flash_platform_data	*data;
-	struct m25p			*flash;
-	struct flash_info		*info;
-	unsigned			i;
 	struct mtd_part_parser_data	ppdata;
-	struct device_node *np = spi->dev.of_node;
+	struct flash_platform_data	*data;
+	struct m25p *flash;
+	struct spi_nor *nor;
+	enum read_mode mode = SPI_NOR_NORMAL;
 	int ret;
 
-	/* Platform data helps sort out which chip type we have, as
-	 * well as how this board partitions it.  If we don't have
-	 * a chip ID, try the JEDEC id commands; they'll work for most
-	 * newer chips, even if we don't recognize the particular chip.
-	 */
-	data = dev_get_platdata(&spi->dev);
-	if (data && data->type) {
-		const struct spi_device_id *plat_id;
-
-		for (i = 0; i < ARRAY_SIZE(m25p_ids) - 1; i++) {
-			plat_id = &m25p_ids[i];
-			if (strcmp(data->type, plat_id->name))
-				continue;
-			break;
-		}
-
-		if (i < ARRAY_SIZE(m25p_ids) - 1)
-			id = plat_id;
-		else
-			dev_warn(&spi->dev, "unrecognized id %s\n", data->type);
-	}
-
-	info = (void *)id->driver_data;
-
-	if (info->jedec_id) {
-		const struct spi_device_id *jid;
-
-		jid = jedec_probe(spi);
-		if (IS_ERR(jid)) {
-			return PTR_ERR(jid);
-		} else if (jid != id) {
-			/*
-			 * JEDEC knows better, so overwrite platform ID. We
-			 * can't trust partitions any longer, but we'll let
-			 * mtd apply them anyway, since some partitions may be
-			 * marked read-only, and we don't want to lose that
-			 * information, even if it's not 100% accurate.
-			 */
-			dev_warn(&spi->dev, "found %s, expected %s\n",
-				 jid->name, id->name);
-			id = jid;
-			info = (void *)jid->driver_data;
-		}
-	}
-
 	flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL);
 	if (!flash)
 		return -ENOMEM;
 
-	flash->command = devm_kzalloc(&spi->dev, MAX_CMD_SIZE, GFP_KERNEL);
-	if (!flash->command)
-		return -ENOMEM;
+	nor = &flash->spi_nor;
 
-	flash->spi = spi;
-	mutex_init(&flash->lock);
+	/* install the hooks */
+	nor->read = m25p80_read;
+	nor->write = m25p80_write;
+	nor->erase = m25p80_erase;
+	nor->write_reg = m25p80_write_reg;
+	nor->read_reg = m25p80_read_reg;
+
+	nor->dev = &spi->dev;
+	nor->mtd = &flash->mtd;
+	nor->priv = flash;
+
 	spi_set_drvdata(spi, flash);
+	flash->mtd.priv = nor;
+	flash->spi = spi;
 
-	/*
-	 * Atmel, SST and Intel/Numonyx serial flash tend to power
-	 * up with the software protection bits set
-	 */
+	if (spi->mode & SPI_RX_QUAD)
+		mode = SPI_NOR_QUAD;
+	else if (spi->mode & SPI_RX_DUAL)
+		mode = SPI_NOR_DUAL;
+	ret = spi_nor_scan(nor, spi_get_device_id(spi), mode);
+	if (ret)
+		return ret;
 
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
-		write_enable(flash);
-		write_sr(flash, 0);
-	}
-
-	if (data && data->name)
-		flash->mtd.name = data->name;
-	else
-		flash->mtd.name = dev_name(&spi->dev);
-
-	flash->mtd.type = MTD_NORFLASH;
-	flash->mtd.writesize = 1;
-	flash->mtd.flags = MTD_CAP_NORFLASH;
-	flash->mtd.size = info->sector_size * info->n_sectors;
-	flash->mtd._erase = m25p80_erase;
-	flash->mtd._read = m25p80_read;
-
-	/* flash protection support for STmicro chips */
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
-		flash->mtd._lock = m25p80_lock;
-		flash->mtd._unlock = m25p80_unlock;
-	}
-
-	/* sst flash chips use AAI word program */
-	if (info->flags & SST_WRITE)
-		flash->mtd._write = sst_write;
-	else
-		flash->mtd._write = m25p80_write;
-
-	/* prefer "small sector" erase if possible */
-	if (info->flags & SECT_4K) {
-		flash->erase_opcode = OPCODE_BE_4K;
-		flash->mtd.erasesize = 4096;
-	} else if (info->flags & SECT_4K_PMC) {
-		flash->erase_opcode = OPCODE_BE_4K_PMC;
-		flash->mtd.erasesize = 4096;
-	} else {
-		flash->erase_opcode = OPCODE_SE;
-		flash->mtd.erasesize = info->sector_size;
-	}
-
-	if (info->flags & M25P_NO_ERASE)
-		flash->mtd.flags |= MTD_NO_ERASE;
-
+	data = dev_get_platdata(&spi->dev);
 	ppdata.of_node = spi->dev.of_node;
-	flash->mtd.dev.parent = &spi->dev;
-	flash->page_size = info->page_size;
-	flash->mtd.writebufsize = flash->page_size;
 
-	if (np) {
-		/* If we were instantiated by DT, use it */
-		if (of_property_read_bool(np, "m25p,fast-read"))
-			flash->flash_read = M25P80_FAST;
-		else
-			flash->flash_read = M25P80_NORMAL;
-	} else {
-		/* If we weren't instantiated by DT, default to fast-read */
-		flash->flash_read = M25P80_FAST;
-	}
-
-	/* Some devices cannot do fast-read, no matter what DT tells us */
-	if (info->flags & M25P_NO_FR)
-		flash->flash_read = M25P80_NORMAL;
-
-	/* Quad/Dual-read mode takes precedence over fast/normal */
-	if (spi->mode & SPI_RX_QUAD && info->flags & M25P80_QUAD_READ) {
-		ret = set_quad_mode(flash, info->jedec_id);
-		if (ret) {
-			dev_err(&flash->spi->dev, "quad mode not supported\n");
-			return ret;
-		}
-		flash->flash_read = M25P80_QUAD;
-	} else if (spi->mode & SPI_RX_DUAL && info->flags & M25P80_DUAL_READ) {
-		flash->flash_read = M25P80_DUAL;
-	}
-
-	/* Default commands */
-	switch (flash->flash_read) {
-	case M25P80_QUAD:
-		flash->read_opcode = OPCODE_QUAD_READ;
-		break;
-	case M25P80_DUAL:
-		flash->read_opcode = OPCODE_DUAL_READ;
-		break;
-	case M25P80_FAST:
-		flash->read_opcode = OPCODE_FAST_READ;
-		break;
-	case M25P80_NORMAL:
-		flash->read_opcode = OPCODE_NORM_READ;
-		break;
-	default:
-		dev_err(&flash->spi->dev, "No Read opcode defined\n");
-		return -EINVAL;
-	}
-
-	flash->program_opcode = OPCODE_PP;
-
-	if (info->addr_width)
-		flash->addr_width = info->addr_width;
-	else if (flash->mtd.size > 0x1000000) {
-		/* enable 4-byte addressing if the device exceeds 16MiB */
-		flash->addr_width = 4;
-		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
-			/* Dedicated 4-byte command set */
-			switch (flash->flash_read) {
-			case M25P80_QUAD:
-				flash->read_opcode = OPCODE_QUAD_READ_4B;
-				break;
-			case M25P80_DUAL:
-				flash->read_opcode = OPCODE_DUAL_READ_4B;
-				break;
-			case M25P80_FAST:
-				flash->read_opcode = OPCODE_FAST_READ_4B;
-				break;
-			case M25P80_NORMAL:
-				flash->read_opcode = OPCODE_NORM_READ_4B;
-				break;
-			}
-			flash->program_opcode = OPCODE_PP_4B;
-			/* No small sector erase for 4-byte command set */
-			flash->erase_opcode = OPCODE_SE_4B;
-			flash->mtd.erasesize = info->sector_size;
-		} else
-			set_4byte(flash, info->jedec_id, 1);
-	} else {
-		flash->addr_width = 3;
-	}
-
-	dev_info(&spi->dev, "%s (%lld Kbytes)\n", id->name,
-			(long long)flash->mtd.size >> 10);
-
-	pr_debug("mtd .name = %s, .size = 0x%llx (%lldMiB) "
-			".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
-		flash->mtd.name,
-		(long long)flash->mtd.size, (long long)(flash->mtd.size >> 20),
-		flash->mtd.erasesize, flash->mtd.erasesize / 1024,
-		flash->mtd.numeraseregions);
-
-	if (flash->mtd.numeraseregions)
-		for (i = 0; i < flash->mtd.numeraseregions; i++)
-			pr_debug("mtd.eraseregions[%d] = { .offset = 0x%llx, "
-				".erasesize = 0x%.8x (%uKiB), "
-				".numblocks = %d }\n",
-				i, (long long)flash->mtd.eraseregions[i].offset,
-				flash->mtd.eraseregions[i].erasesize,
-				flash->mtd.eraseregions[i].erasesize / 1024,
-				flash->mtd.eraseregions[i].numblocks);
-
-
-	/* partitions should match sector boundaries; and it may be good to
-	 * use readonly partitions for writeprotected sectors (BP2..BP0).
-	 */
 	return mtd_device_parse_register(&flash->mtd, NULL, &ppdata,
 			data ? data->parts : NULL,
 			data ? data->nr_parts : 0);
@@ -1341,7 +250,7 @@
 		.name	= "m25p80",
 		.owner	= THIS_MODULE,
 	},
-	.id_table	= m25p_ids,
+	.id_table	= spi_nor_ids,
 	.probe	= m25p_probe,
 	.remove	= m25p_remove,
 

diff --git a/drivers/mtd/devices/serial_flash_cmds.h b/drivers/mtd/devices/serial_flash_cmds.h
index 4f0c2c7..f59a125 100644
--- a/drivers/mtd/devices/serial_flash_cmds.h
+++ b/drivers/mtd/devices/serial_flash_cmds.h

@@ -13,43 +13,23 @@
 #define _MTD_SERIAL_FLASH_CMDS_H
 
 /* Generic Flash Commands/OPCODEs */
-#define FLASH_CMD_WREN		0x06
-#define FLASH_CMD_WRDI		0x04
-#define FLASH_CMD_RDID		0x9f
-#define FLASH_CMD_RDSR		0x05
-#define FLASH_CMD_RDSR2		0x35
-#define FLASH_CMD_WRSR		0x01
-#define FLASH_CMD_SE_4K		0x20
-#define FLASH_CMD_SE_32K	0x52
-#define FLASH_CMD_SE		0xd8
-#define FLASH_CMD_CHIPERASE	0xc7
-#define FLASH_CMD_WRVCR		0x81
-#define FLASH_CMD_RDVCR		0x85
+#define SPINOR_OP_RDSR2		0x35
+#define SPINOR_OP_WRVCR		0x81
+#define SPINOR_OP_RDVCR		0x85
 
 /* JEDEC Standard - Serial Flash Discoverable Parmeters (SFDP) Commands */
-#define FLASH_CMD_READ		0x03	/* READ */
-#define FLASH_CMD_READ_FAST	0x0b	/* FAST READ */
-#define FLASH_CMD_READ_1_1_2	0x3b	/* DUAL OUTPUT READ */
-#define FLASH_CMD_READ_1_2_2	0xbb	/* DUAL I/O READ */
-#define FLASH_CMD_READ_1_1_4	0x6b	/* QUAD OUTPUT READ */
-#define FLASH_CMD_READ_1_4_4	0xeb	/* QUAD I/O READ */
+#define SPINOR_OP_READ_1_2_2	0xbb	/* DUAL I/O READ */
+#define SPINOR_OP_READ_1_4_4	0xeb	/* QUAD I/O READ */
 
-#define FLASH_CMD_WRITE		0x02	/* PAGE PROGRAM */
-#define FLASH_CMD_WRITE_1_1_2	0xa2	/* DUAL INPUT PROGRAM */
-#define FLASH_CMD_WRITE_1_2_2	0xd2	/* DUAL INPUT EXT PROGRAM */
-#define FLASH_CMD_WRITE_1_1_4	0x32	/* QUAD INPUT PROGRAM */
-#define FLASH_CMD_WRITE_1_4_4	0x12	/* QUAD INPUT EXT PROGRAM */
-
-#define FLASH_CMD_EN4B_ADDR	0xb7	/* Enter 4-byte address mode */
-#define FLASH_CMD_EX4B_ADDR	0xe9	/* Exit 4-byte address mode */
+#define SPINOR_OP_WRITE		0x02	/* PAGE PROGRAM */
+#define SPINOR_OP_WRITE_1_1_2	0xa2	/* DUAL INPUT PROGRAM */
+#define SPINOR_OP_WRITE_1_2_2	0xd2	/* DUAL INPUT EXT PROGRAM */
+#define SPINOR_OP_WRITE_1_1_4	0x32	/* QUAD INPUT PROGRAM */
+#define SPINOR_OP_WRITE_1_4_4	0x12	/* QUAD INPUT EXT PROGRAM */
 
 /* READ commands with 32-bit addressing */
-#define FLASH_CMD_READ4		0x13
-#define FLASH_CMD_READ4_FAST	0x0c
-#define FLASH_CMD_READ4_1_1_2	0x3c
-#define FLASH_CMD_READ4_1_2_2	0xbc
-#define FLASH_CMD_READ4_1_1_4	0x6c
-#define FLASH_CMD_READ4_1_4_4	0xec
+#define SPINOR_OP_READ4_1_2_2	0xbc
+#define SPINOR_OP_READ4_1_4_4	0xec
 
 /* Configuration flags */
 #define FLASH_FLAG_SINGLE	0x000000ff

diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 5a5cd2a..2fc4957c 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c

@@ -280,14 +280,11 @@
 static int __init init_slram(void)
 {
 	char *devname;
-	int i;
 
 #ifndef MODULE
 	char *devstart;
 	char *devlength;
 
-	i = 0;
-
 	if (!map) {
 		E("slram: not enough parameters.\n");
 		return(-EINVAL);
@@ -314,6 +311,7 @@
 	}
 #else
 	int count;
+	int i;
 
 	for (count = 0; count < SLRAM_MAX_DEVICES_PARAMS && map[count];
 			count++) {

diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c
index 1957d7c..d252514 100644
--- a/drivers/mtd/devices/st_spi_fsm.c
+++ b/drivers/mtd/devices/st_spi_fsm.c

@@ -19,6 +19,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/mtd/spi-nor.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -201,44 +202,6 @@
 
 #define STFSM_MAX_WAIT_SEQ_MS  1000     /* FSM execution time */
 
-/* Flash Commands */
-#define FLASH_CMD_WREN         0x06
-#define FLASH_CMD_WRDI         0x04
-#define FLASH_CMD_RDID         0x9f
-#define FLASH_CMD_RDSR         0x05
-#define FLASH_CMD_RDSR2                0x35
-#define FLASH_CMD_WRSR         0x01
-#define FLASH_CMD_SE_4K                0x20
-#define FLASH_CMD_SE_32K       0x52
-#define FLASH_CMD_SE           0xd8
-#define FLASH_CMD_CHIPERASE    0xc7
-#define FLASH_CMD_WRVCR                0x81
-#define FLASH_CMD_RDVCR                0x85
-
-#define FLASH_CMD_READ         0x03    /* READ */
-#define FLASH_CMD_READ_FAST    0x0b    /* FAST READ */
-#define FLASH_CMD_READ_1_1_2   0x3b    /* DUAL OUTPUT READ */
-#define FLASH_CMD_READ_1_2_2   0xbb    /* DUAL I/O READ */
-#define FLASH_CMD_READ_1_1_4   0x6b    /* QUAD OUTPUT READ */
-#define FLASH_CMD_READ_1_4_4   0xeb    /* QUAD I/O READ */
-
-#define FLASH_CMD_WRITE                0x02    /* PAGE PROGRAM */
-#define FLASH_CMD_WRITE_1_1_2  0xa2    /* DUAL INPUT PROGRAM */
-#define FLASH_CMD_WRITE_1_2_2  0xd2    /* DUAL INPUT EXT PROGRAM */
-#define FLASH_CMD_WRITE_1_1_4  0x32    /* QUAD INPUT PROGRAM */
-#define FLASH_CMD_WRITE_1_4_4  0x12    /* QUAD INPUT EXT PROGRAM */
-
-#define FLASH_CMD_EN4B_ADDR    0xb7    /* Enter 4-byte address mode */
-#define FLASH_CMD_EX4B_ADDR    0xe9    /* Exit 4-byte address mode */
-
-/* READ commands with 32-bit addressing (N25Q256 and S25FLxxxS) */
-#define FLASH_CMD_READ4                0x13
-#define FLASH_CMD_READ4_FAST   0x0c
-#define FLASH_CMD_READ4_1_1_2  0x3c
-#define FLASH_CMD_READ4_1_2_2  0xbc
-#define FLASH_CMD_READ4_1_1_4  0x6c
-#define FLASH_CMD_READ4_1_4_4  0xec
-
 /* S25FLxxxS commands */
 #define S25FL_CMD_WRITE4_1_1_4 0x34
 #define S25FL_CMD_SE4          0xdc
@@ -246,7 +209,7 @@
 #define S25FL_CMD_DYBWR                0xe1
 #define S25FL_CMD_DYBRD                0xe0
 #define S25FL_CMD_WRITE4       0x12    /* Note, opcode clashes with
-					* 'FLASH_CMD_WRITE_1_4_4'
+					* 'SPINOR_OP_WRITE_1_4_4'
 					* as found on N25Qxxx devices! */
 
 /* Status register */
@@ -261,6 +224,12 @@
 #define S25FL_STATUS_E_ERR     0x20
 #define S25FL_STATUS_P_ERR     0x40
 
+#define N25Q_CMD_WRVCR         0x81
+#define N25Q_CMD_RDVCR         0x85
+#define N25Q_CMD_RDVECR        0x65
+#define N25Q_CMD_RDNVCR        0xb5
+#define N25Q_CMD_WRNVCR        0xb1
+
 #define FLASH_PAGESIZE         256			/* In Bytes    */
 #define FLASH_PAGESIZE_32      (FLASH_PAGESIZE / 4)	/* In uint32_t */
 #define FLASH_MAX_BUSY_WAIT    (300 * HZ)	/* Maximum 'CHIPERASE' time */
@@ -270,7 +239,6 @@
  */
 #define CFG_READ_TOGGLE_32BIT_ADDR     0x00000001
 #define CFG_WRITE_TOGGLE_32BIT_ADDR    0x00000002
-#define CFG_WRITE_EX_32BIT_ADDR_DELAY  0x00000004
 #define CFG_ERASESEC_TOGGLE_32BIT_ADDR 0x00000008
 #define CFG_S25FL_CHECK_ERROR_FLAGS    0x00000010
 
@@ -329,7 +297,7 @@
 	u32             jedec_id;
 	u16             ext_id;
 	/*
-	 * The size listed here is what works with FLASH_CMD_SE, which isn't
+	 * The size listed here is what works with SPINOR_OP_SE, which isn't
 	 * necessarily called a "sector" by the vendor.
 	 */
 	unsigned        sector_size;
@@ -369,17 +337,26 @@
 	{ "m25px32", 0x207116, 0,  64 * 1024,  64, M25PX_FLAG, 75, NULL },
 	{ "m25px64", 0x207117, 0,  64 * 1024, 128, M25PX_FLAG, 75, NULL },
 
+	/* Macronix MX25xxx
+	 *     - Support for 'FLASH_FLAG_WRITE_1_4_4' is omitted for devices
+	 *       where operating frequency must be reduced.
+	 */
 #define MX25_FLAG (FLASH_FLAG_READ_WRITE       |	\
 		   FLASH_FLAG_READ_FAST         |	\
 		   FLASH_FLAG_READ_1_1_2        |	\
 		   FLASH_FLAG_READ_1_2_2        |	\
 		   FLASH_FLAG_READ_1_1_4        |	\
-		   FLASH_FLAG_READ_1_4_4        |	\
 		   FLASH_FLAG_SE_4K             |	\
 		   FLASH_FLAG_SE_32K)
+	{ "mx25l3255e",  0xc29e16, 0, 64 * 1024, 64,
+	  (MX25_FLAG | FLASH_FLAG_WRITE_1_4_4), 86,
+	  stfsm_mx25_config},
 	{ "mx25l25635e", 0xc22019, 0, 64*1024, 512,
 	  (MX25_FLAG | FLASH_FLAG_32BIT_ADDR | FLASH_FLAG_RESET), 70,
 	  stfsm_mx25_config },
+	{ "mx25l25655e", 0xc22619, 0, 64*1024, 512,
+	  (MX25_FLAG | FLASH_FLAG_32BIT_ADDR | FLASH_FLAG_RESET), 70,
+	  stfsm_mx25_config},
 
 #define N25Q_FLAG (FLASH_FLAG_READ_WRITE       |	\
 		   FLASH_FLAG_READ_FAST         |	\
@@ -407,6 +384,8 @@
 			FLASH_FLAG_READ_1_4_4   |	\
 			FLASH_FLAG_WRITE_1_1_4  |	\
 			FLASH_FLAG_READ_FAST)
+	{ "s25fl032p",  0x010215, 0x4d00,  64 * 1024,  64, S25FLXXXP_FLAG, 80,
+	  stfsm_s25fl_config},
 	{ "s25fl129p0", 0x012018, 0x4d00, 256 * 1024,  64, S25FLXXXP_FLAG, 80,
 	  stfsm_s25fl_config },
 	{ "s25fl129p1", 0x012018, 0x4d01,  64 * 1024, 256, S25FLXXXP_FLAG, 80,
@@ -473,22 +452,22 @@
 
 /* Default READ configurations, in order of preference */
 static struct seq_rw_config default_read_configs[] = {
-	{FLASH_FLAG_READ_1_4_4, FLASH_CMD_READ_1_4_4,	0, 4, 4, 0x00, 2, 4},
-	{FLASH_FLAG_READ_1_1_4, FLASH_CMD_READ_1_1_4,	0, 1, 4, 0x00, 4, 0},
-	{FLASH_FLAG_READ_1_2_2, FLASH_CMD_READ_1_2_2,	0, 2, 2, 0x00, 4, 0},
-	{FLASH_FLAG_READ_1_1_2, FLASH_CMD_READ_1_1_2,	0, 1, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_FAST,	FLASH_CMD_READ_FAST,	0, 1, 1, 0x00, 0, 8},
-	{FLASH_FLAG_READ_WRITE, FLASH_CMD_READ,		0, 1, 1, 0x00, 0, 0},
+	{FLASH_FLAG_READ_1_4_4, SPINOR_OP_READ_1_4_4,	0, 4, 4, 0x00, 2, 4},
+	{FLASH_FLAG_READ_1_1_4, SPINOR_OP_READ_1_1_4,	0, 1, 4, 0x00, 4, 0},
+	{FLASH_FLAG_READ_1_2_2, SPINOR_OP_READ_1_2_2,	0, 2, 2, 0x00, 4, 0},
+	{FLASH_FLAG_READ_1_1_2, SPINOR_OP_READ_1_1_2,	0, 1, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_FAST,	SPINOR_OP_READ_FAST,	0, 1, 1, 0x00, 0, 8},
+	{FLASH_FLAG_READ_WRITE, SPINOR_OP_READ,		0, 1, 1, 0x00, 0, 0},
 	{0x00,			0,			0, 0, 0, 0x00, 0, 0},
 };
 
 /* Default WRITE configurations */
 static struct seq_rw_config default_write_configs[] = {
-	{FLASH_FLAG_WRITE_1_4_4, FLASH_CMD_WRITE_1_4_4, 1, 4, 4, 0x00, 0, 0},
-	{FLASH_FLAG_WRITE_1_1_4, FLASH_CMD_WRITE_1_1_4, 1, 1, 4, 0x00, 0, 0},
-	{FLASH_FLAG_WRITE_1_2_2, FLASH_CMD_WRITE_1_2_2, 1, 2, 2, 0x00, 0, 0},
-	{FLASH_FLAG_WRITE_1_1_2, FLASH_CMD_WRITE_1_1_2, 1, 1, 2, 0x00, 0, 0},
-	{FLASH_FLAG_READ_WRITE,  FLASH_CMD_WRITE,       1, 1, 1, 0x00, 0, 0},
+	{FLASH_FLAG_WRITE_1_4_4, SPINOR_OP_WRITE_1_4_4, 1, 4, 4, 0x00, 0, 0},
+	{FLASH_FLAG_WRITE_1_1_4, SPINOR_OP_WRITE_1_1_4, 1, 1, 4, 0x00, 0, 0},
+	{FLASH_FLAG_WRITE_1_2_2, SPINOR_OP_WRITE_1_2_2, 1, 2, 2, 0x00, 0, 0},
+	{FLASH_FLAG_WRITE_1_1_2, SPINOR_OP_WRITE_1_1_2, 1, 1, 2, 0x00, 0, 0},
+	{FLASH_FLAG_READ_WRITE,  SPINOR_OP_WRITE,       1, 1, 1, 0x00, 0, 0},
 	{0x00,			 0,			0, 0, 0, 0x00, 0, 0},
 };
 
@@ -511,12 +490,12 @@
  * cycles.
  */
 static struct seq_rw_config n25q_read3_configs[] = {
-	{FLASH_FLAG_READ_1_4_4, FLASH_CMD_READ_1_4_4,	0, 4, 4, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_1_4, FLASH_CMD_READ_1_1_4,	0, 1, 4, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_2_2, FLASH_CMD_READ_1_2_2,	0, 2, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_1_2, FLASH_CMD_READ_1_1_2,	0, 1, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_FAST,	FLASH_CMD_READ_FAST,	0, 1, 1, 0x00, 0, 8},
-	{FLASH_FLAG_READ_WRITE, FLASH_CMD_READ,	        0, 1, 1, 0x00, 0, 0},
+	{FLASH_FLAG_READ_1_4_4, SPINOR_OP_READ_1_4_4,	0, 4, 4, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_1_4, SPINOR_OP_READ_1_1_4,	0, 1, 4, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_2_2, SPINOR_OP_READ_1_2_2,	0, 2, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_1_2, SPINOR_OP_READ_1_1_2,	0, 1, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_FAST,	SPINOR_OP_READ_FAST,	0, 1, 1, 0x00, 0, 8},
+	{FLASH_FLAG_READ_WRITE, SPINOR_OP_READ,	        0, 1, 1, 0x00, 0, 0},
 	{0x00,			0,			0, 0, 0, 0x00, 0, 0},
 };
 
@@ -526,12 +505,12 @@
  *	- 'FAST' variants configured for 8 dummy cycles (see note above.)
  */
 static struct seq_rw_config n25q_read4_configs[] = {
-	{FLASH_FLAG_READ_1_4_4, FLASH_CMD_READ4_1_4_4,	0, 4, 4, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_1_4, FLASH_CMD_READ4_1_1_4,	0, 1, 4, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_2_2, FLASH_CMD_READ4_1_2_2,	0, 2, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_1_2, FLASH_CMD_READ4_1_1_2,	0, 1, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_FAST,	FLASH_CMD_READ4_FAST,	0, 1, 1, 0x00, 0, 8},
-	{FLASH_FLAG_READ_WRITE, FLASH_CMD_READ4,	0, 1, 1, 0x00, 0, 0},
+	{FLASH_FLAG_READ_1_4_4, SPINOR_OP_READ4_1_4_4,	0, 4, 4, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_1_4, SPINOR_OP_READ4_1_1_4,	0, 1, 4, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_2_2, SPINOR_OP_READ4_1_2_2,	0, 2, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_1_2, SPINOR_OP_READ4_1_1_2,	0, 1, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_FAST,	SPINOR_OP_READ4_FAST,	0, 1, 1, 0x00, 0, 8},
+	{FLASH_FLAG_READ_WRITE, SPINOR_OP_READ4,	0, 1, 1, 0x00, 0, 0},
 	{0x00,			0,			0, 0, 0, 0x00, 0, 0},
 };
 
@@ -544,7 +523,7 @@
 {
 	seq->seq_opc[0] = (SEQ_OPC_PADS_1 |
 			   SEQ_OPC_CYCLES(8) |
-			   SEQ_OPC_OPCODE(FLASH_CMD_EN4B_ADDR) |
+			   SEQ_OPC_OPCODE(SPINOR_OP_EN4B) |
 			   SEQ_OPC_CSDEASSERT);
 
 	seq->seq[0] = STFSM_INST_CMD1;
@@ -572,12 +551,12 @@
  * entering a state that is incompatible with the SPIBoot Controller.
  */
 static struct seq_rw_config stfsm_s25fl_read4_configs[] = {
-	{FLASH_FLAG_READ_1_4_4,  FLASH_CMD_READ4_1_4_4,  0, 4, 4, 0x00, 2, 4},
-	{FLASH_FLAG_READ_1_1_4,  FLASH_CMD_READ4_1_1_4,  0, 1, 4, 0x00, 0, 8},
-	{FLASH_FLAG_READ_1_2_2,  FLASH_CMD_READ4_1_2_2,  0, 2, 2, 0x00, 4, 0},
-	{FLASH_FLAG_READ_1_1_2,  FLASH_CMD_READ4_1_1_2,  0, 1, 2, 0x00, 0, 8},
-	{FLASH_FLAG_READ_FAST,   FLASH_CMD_READ4_FAST,   0, 1, 1, 0x00, 0, 8},
-	{FLASH_FLAG_READ_WRITE,  FLASH_CMD_READ4,        0, 1, 1, 0x00, 0, 0},
+	{FLASH_FLAG_READ_1_4_4,  SPINOR_OP_READ4_1_4_4,  0, 4, 4, 0x00, 2, 4},
+	{FLASH_FLAG_READ_1_1_4,  SPINOR_OP_READ4_1_1_4,  0, 1, 4, 0x00, 0, 8},
+	{FLASH_FLAG_READ_1_2_2,  SPINOR_OP_READ4_1_2_2,  0, 2, 2, 0x00, 4, 0},
+	{FLASH_FLAG_READ_1_1_2,  SPINOR_OP_READ4_1_1_2,  0, 1, 2, 0x00, 0, 8},
+	{FLASH_FLAG_READ_FAST,   SPINOR_OP_READ4_FAST,   0, 1, 1, 0x00, 0, 8},
+	{FLASH_FLAG_READ_WRITE,  SPINOR_OP_READ4,        0, 1, 1, 0x00, 0, 0},
 	{0x00,                   0,                      0, 0, 0, 0x00, 0, 0},
 };
 
@@ -590,13 +569,13 @@
 /*
  * [W25Qxxx] Configuration
  */
-#define W25Q_STATUS_QE			(0x1 << 9)
+#define W25Q_STATUS_QE			(0x1 << 1)
 
 static struct stfsm_seq stfsm_seq_read_jedec = {
 	.data_size = TRANSFER_SIZE(8),
 	.seq_opc[0] = (SEQ_OPC_PADS_1 |
 		       SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_RDID)),
+		       SEQ_OPC_OPCODE(SPINOR_OP_RDID)),
 	.seq = {
 		STFSM_INST_CMD1,
 		STFSM_INST_DATA_READ,
@@ -612,7 +591,7 @@
 	.data_size = TRANSFER_SIZE(4),
 	.seq_opc[0] = (SEQ_OPC_PADS_1 |
 		       SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_RDSR)),
+		       SEQ_OPC_OPCODE(SPINOR_OP_RDSR)),
 	.seq = {
 		STFSM_INST_CMD1,
 		STFSM_INST_DATA_READ,
@@ -628,10 +607,10 @@
 	/* 'addr_cfg' configured during initialisation */
 	.seq_opc = {
 		(SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		 SEQ_OPC_OPCODE(FLASH_CMD_WREN) | SEQ_OPC_CSDEASSERT),
+		 SEQ_OPC_OPCODE(SPINOR_OP_WREN) | SEQ_OPC_CSDEASSERT),
 
 		(SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		 SEQ_OPC_OPCODE(FLASH_CMD_SE)),
+		 SEQ_OPC_OPCODE(SPINOR_OP_SE)),
 	},
 	.seq = {
 		STFSM_INST_CMD1,
@@ -649,10 +628,10 @@
 static struct stfsm_seq stfsm_seq_erase_chip = {
 	.seq_opc = {
 		(SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		 SEQ_OPC_OPCODE(FLASH_CMD_WREN) | SEQ_OPC_CSDEASSERT),
+		 SEQ_OPC_OPCODE(SPINOR_OP_WREN) | SEQ_OPC_CSDEASSERT),
 
 		(SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		 SEQ_OPC_OPCODE(FLASH_CMD_CHIPERASE) | SEQ_OPC_CSDEASSERT),
+		 SEQ_OPC_OPCODE(SPINOR_OP_CHIP_ERASE) | SEQ_OPC_CSDEASSERT),
 	},
 	.seq = {
 		STFSM_INST_CMD1,
@@ -669,26 +648,9 @@
 
 static struct stfsm_seq stfsm_seq_write_status = {
 	.seq_opc[0] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_WREN) | SEQ_OPC_CSDEASSERT),
+		       SEQ_OPC_OPCODE(SPINOR_OP_WREN) | SEQ_OPC_CSDEASSERT),
 	.seq_opc[1] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_WRSR)),
-	.seq = {
-		STFSM_INST_CMD1,
-		STFSM_INST_CMD2,
-		STFSM_INST_STA_WR1,
-		STFSM_INST_STOP,
-	},
-	.seq_cfg = (SEQ_CFG_PADS_1 |
-		    SEQ_CFG_READNOTWRITE |
-		    SEQ_CFG_CSDEASSERT |
-		    SEQ_CFG_STARTSEQ),
-};
-
-static struct stfsm_seq stfsm_seq_wrvcr = {
-	.seq_opc[0] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_WREN) | SEQ_OPC_CSDEASSERT),
-	.seq_opc[1] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-		       SEQ_OPC_OPCODE(FLASH_CMD_WRVCR)),
+		       SEQ_OPC_OPCODE(SPINOR_OP_WRSR)),
 	.seq = {
 		STFSM_INST_CMD1,
 		STFSM_INST_CMD2,
@@ -704,9 +666,9 @@
 static int stfsm_n25q_en_32bit_addr_seq(struct stfsm_seq *seq)
 {
 	seq->seq_opc[0] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-			   SEQ_OPC_OPCODE(FLASH_CMD_EN4B_ADDR));
+			   SEQ_OPC_OPCODE(SPINOR_OP_EN4B));
 	seq->seq_opc[1] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-			   SEQ_OPC_OPCODE(FLASH_CMD_WREN) |
+			   SEQ_OPC_OPCODE(SPINOR_OP_WREN) |
 			   SEQ_OPC_CSDEASSERT);
 
 	seq->seq[0] = STFSM_INST_CMD2;
@@ -793,7 +755,7 @@
 
 	dev_dbg(fsm->dev, "Reading %d bytes from FIFO\n", size);
 
-	BUG_ON((((uint32_t)buf) & 0x3) || (size & 0x3));
+	BUG_ON((((uintptr_t)buf) & 0x3) || (size & 0x3));
 
 	while (remaining) {
 		for (;;) {
@@ -817,7 +779,7 @@
 
 	dev_dbg(fsm->dev, "writing %d bytes to FIFO\n", size);
 
-	BUG_ON((((uint32_t)buf) & 0x3) || (size & 0x3));
+	BUG_ON((((uintptr_t)buf) & 0x3) || (size & 0x3));
 
 	writesl(fsm->base + SPI_FAST_SEQ_DATA_REG, buf, words);
 
@@ -827,7 +789,7 @@
 static int stfsm_enter_32bit_addr(struct stfsm *fsm, int enter)
 {
 	struct stfsm_seq *seq = &fsm->stfsm_seq_en_32bit_addr;
-	uint32_t cmd = enter ? FLASH_CMD_EN4B_ADDR : FLASH_CMD_EX4B_ADDR;
+	uint32_t cmd = enter ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
 
 	seq->seq_opc[0] = (SEQ_OPC_PADS_1 |
 			   SEQ_OPC_CYCLES(8) |
@@ -851,7 +813,7 @@
 	/* Use RDRS1 */
 	seq->seq_opc[0] = (SEQ_OPC_PADS_1 |
 			   SEQ_OPC_CYCLES(8) |
-			   SEQ_OPC_OPCODE(FLASH_CMD_RDSR));
+			   SEQ_OPC_OPCODE(SPINOR_OP_RDSR));
 
 	/* Load read_status sequence */
 	stfsm_load_seq(fsm, seq);
@@ -889,59 +851,56 @@
 }
 
 static int stfsm_read_status(struct stfsm *fsm, uint8_t cmd,
-			   uint8_t *status)
+			     uint8_t *data, int bytes)
 {
 	struct stfsm_seq *seq = &stfsm_seq_read_status_fifo;
 	uint32_t tmp;
+	uint8_t *t = (uint8_t *)&tmp;
+	int i;
 
-	dev_dbg(fsm->dev, "reading STA[%s]\n",
-		(cmd == FLASH_CMD_RDSR) ? "1" : "2");
+	dev_dbg(fsm->dev, "read 'status' register [0x%02x], %d byte(s)\n",
+		cmd, bytes);
 
-	seq->seq_opc[0] = (SEQ_OPC_PADS_1 |
-			   SEQ_OPC_CYCLES(8) |
+	BUG_ON(bytes != 1 && bytes != 2);
+
+	seq->seq_opc[0] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
 			   SEQ_OPC_OPCODE(cmd)),
 
 	stfsm_load_seq(fsm, seq);
 
 	stfsm_read_fifo(fsm, &tmp, 4);
 
-	*status = (uint8_t)(tmp >> 24);
+	for (i = 0; i < bytes; i++)
+		data[i] = t[i];
 
 	stfsm_wait_seq(fsm);
 
 	return 0;
 }
 
-static int stfsm_write_status(struct stfsm *fsm, uint16_t status,
-			       int sta_bytes)
+static int stfsm_write_status(struct stfsm *fsm, uint8_t cmd,
+			    uint16_t data, int bytes, int wait_busy)
 {
 	struct stfsm_seq *seq = &stfsm_seq_write_status;
 
-	dev_dbg(fsm->dev, "writing STA[%s] 0x%04x\n",
-		(sta_bytes == 1) ? "1" : "1+2", status);
+	dev_dbg(fsm->dev,
+		"write 'status' register [0x%02x], %d byte(s), 0x%04x\n"
+		" %s wait-busy\n", cmd, bytes, data, wait_busy ? "with" : "no");
 
-	seq->status = (uint32_t)status | STA_PADS_1 | STA_CSDEASSERT;
-	seq->seq[2] = (sta_bytes == 1) ?
-		STFSM_INST_STA_WR1 : STFSM_INST_STA_WR1_2;
+	BUG_ON(bytes != 1 && bytes != 2);
+
+	seq->seq_opc[1] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
+			   SEQ_OPC_OPCODE(cmd));
+
+	seq->status = (uint32_t)data | STA_PADS_1 | STA_CSDEASSERT;
+	seq->seq[2] = (bytes == 1) ? STFSM_INST_STA_WR1 : STFSM_INST_STA_WR1_2;
 
 	stfsm_load_seq(fsm, seq);
 
 	stfsm_wait_seq(fsm);
 
-	return 0;
-};
-
-static int stfsm_wrvcr(struct stfsm *fsm, uint8_t data)
-{
-	struct stfsm_seq *seq = &stfsm_seq_wrvcr;
-
-	dev_dbg(fsm->dev, "writing VCR 0x%02x\n", data);
-
-	seq->status = (STA_DATA_BYTE1(data) | STA_PADS_1 | STA_CSDEASSERT);
-
-	stfsm_load_seq(fsm, seq);
-
-	stfsm_wait_seq(fsm);
+	if (wait_busy)
+		stfsm_wait_busy(fsm);
 
 	return 0;
 }
@@ -1027,7 +986,7 @@
 	if (cfg->write)
 		seq->seq_opc[i++] = (SEQ_OPC_PADS_1 |
 				     SEQ_OPC_CYCLES(8) |
-				     SEQ_OPC_OPCODE(FLASH_CMD_WREN) |
+				     SEQ_OPC_OPCODE(SPINOR_OP_WREN) |
 				     SEQ_OPC_CSDEASSERT);
 
 	/* Address configuration (24 or 32-bit addresses) */
@@ -1149,31 +1108,36 @@
 		stfsm_mx25_en_32bit_addr_seq(&fsm->stfsm_seq_en_32bit_addr);
 
 		soc_reset = stfsm_can_handle_soc_reset(fsm);
-		if (soc_reset || !fsm->booted_from_spi) {
+		if (soc_reset || !fsm->booted_from_spi)
 			/* If we can handle SoC resets, we enable 32-bit address
 			 * mode pervasively */
 			stfsm_enter_32bit_addr(fsm, 1);
 
-		} else {
+		else
 			/* Else, enable/disable 32-bit addressing before/after
 			 * each operation */
 			fsm->configuration = (CFG_READ_TOGGLE_32BIT_ADDR |
 					      CFG_WRITE_TOGGLE_32BIT_ADDR |
 					      CFG_ERASESEC_TOGGLE_32BIT_ADDR);
-			/* It seems a small delay is required after exiting
-			 * 32-bit mode following a write operation.  The issue
-			 * is under investigation.
-			 */
-			fsm->configuration |= CFG_WRITE_EX_32BIT_ADDR_DELAY;
-		}
 	}
 
-	/* For QUAD mode, set 'QE' STATUS bit */
+	/* Check status of 'QE' bit, update if required. */
+	stfsm_read_status(fsm, SPINOR_OP_RDSR, &sta, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
-		stfsm_read_status(fsm, FLASH_CMD_RDSR, &sta);
-		sta |= MX25_STATUS_QE;
-		stfsm_write_status(fsm, sta, 1);
+		if (!(sta & MX25_STATUS_QE)) {
+			/* Set 'QE' */
+			sta |= MX25_STATUS_QE;
+
+			stfsm_write_status(fsm, SPINOR_OP_WRSR, sta, 1, 1);
+		}
+	} else {
+		if (sta & MX25_STATUS_QE) {
+			/* Clear 'QE' */
+			sta &= ~MX25_STATUS_QE;
+
+			stfsm_write_status(fsm, SPINOR_OP_WRSR, sta, 1, 1);
+		}
 	}
 
 	return 0;
@@ -1239,7 +1203,7 @@
 	 */
 	vcr = (N25Q_VCR_DUMMY_CYCLES(8) | N25Q_VCR_XIP_DISABLED |
 	       N25Q_VCR_WRAP_CONT);
-	stfsm_wrvcr(fsm, vcr);
+	stfsm_write_status(fsm, N25Q_CMD_WRVCR, vcr, 1, 0);
 
 	return 0;
 }
@@ -1297,7 +1261,7 @@
 {
 	struct stfsm_seq seq = {
 		.seq_opc[0] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
-			       SEQ_OPC_OPCODE(FLASH_CMD_WREN) |
+			       SEQ_OPC_OPCODE(SPINOR_OP_WREN) |
 			       SEQ_OPC_CSDEASSERT),
 		.seq_opc[1] = (SEQ_OPC_PADS_1 | SEQ_OPC_CYCLES(8) |
 			       SEQ_OPC_OPCODE(S25FL_CMD_DYBWR)),
@@ -1337,7 +1301,7 @@
 			       SEQ_OPC_CSDEASSERT),
 		.seq_opc[1] = (SEQ_OPC_PADS_1 |
 			       SEQ_OPC_CYCLES(8) |
-			       SEQ_OPC_OPCODE(FLASH_CMD_WRDI) |
+			       SEQ_OPC_OPCODE(SPINOR_OP_WRDI) |
 			       SEQ_OPC_CSDEASSERT),
 		.seq = {
 			STFSM_INST_CMD1,
@@ -1367,6 +1331,7 @@
 	uint32_t offs;
 	uint16_t sta_wr;
 	uint8_t sr1, cr1, dyb;
+	int update_sr = 0;
 	int ret;
 
 	if (flags & FLASH_FLAG_32BIT_ADDR) {
@@ -1414,34 +1379,28 @@
 		}
 	}
 
-	/* Check status of 'QE' bit */
+	/* Check status of 'QE' bit, update if required. */
+	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &cr1, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
-	stfsm_read_status(fsm, FLASH_CMD_RDSR2, &cr1);
 	if (data_pads == 4) {
 		if (!(cr1 & STFSM_S25FL_CONFIG_QE)) {
 			/* Set 'QE' */
 			cr1 |= STFSM_S25FL_CONFIG_QE;
 
-			stfsm_read_status(fsm, FLASH_CMD_RDSR, &sr1);
-			sta_wr = ((uint16_t)cr1  << 8) | sr1;
-
-			stfsm_write_status(fsm, sta_wr, 2);
-
-			stfsm_wait_busy(fsm);
+			update_sr = 1;
 		}
 	} else {
-		if ((cr1 & STFSM_S25FL_CONFIG_QE)) {
+		if (cr1 & STFSM_S25FL_CONFIG_QE) {
 			/* Clear 'QE' */
 			cr1 &= ~STFSM_S25FL_CONFIG_QE;
 
-			stfsm_read_status(fsm, FLASH_CMD_RDSR, &sr1);
-			sta_wr = ((uint16_t)cr1  << 8) | sr1;
-
-			stfsm_write_status(fsm, sta_wr, 2);
-
-			stfsm_wait_busy(fsm);
+			update_sr = 1;
 		}
-
+	}
+	if (update_sr) {
+		stfsm_read_status(fsm, SPINOR_OP_RDSR, &sr1, 1);
+		sta_wr = ((uint16_t)cr1  << 8) | sr1;
+		stfsm_write_status(fsm, SPINOR_OP_WRSR, sta_wr, 2, 1);
 	}
 
 	/*
@@ -1456,27 +1415,36 @@
 static int stfsm_w25q_config(struct stfsm *fsm)
 {
 	uint32_t data_pads;
-	uint16_t sta_wr;
-	uint8_t sta1, sta2;
+	uint8_t sr1, sr2;
+	uint16_t sr_wr;
+	int update_sr = 0;
 	int ret;
 
 	ret = stfsm_prepare_rwe_seqs_default(fsm);
 	if (ret)
 		return ret;
 
-	/* If using QUAD mode, set QE STATUS bit */
+	/* Check status of 'QE' bit, update if required. */
+	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &sr2, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
-		stfsm_read_status(fsm, FLASH_CMD_RDSR, &sta1);
-		stfsm_read_status(fsm, FLASH_CMD_RDSR2, &sta2);
-
-		sta_wr = ((uint16_t)sta2 << 8) | sta1;
-
-		sta_wr |= W25Q_STATUS_QE;
-
-		stfsm_write_status(fsm, sta_wr, 2);
-
-		stfsm_wait_busy(fsm);
+		if (!(sr2 & W25Q_STATUS_QE)) {
+			/* Set 'QE' */
+			sr2 |= W25Q_STATUS_QE;
+			update_sr = 1;
+		}
+	} else {
+		if (sr2 & W25Q_STATUS_QE) {
+			/* Clear 'QE' */
+			sr2 &= ~W25Q_STATUS_QE;
+			update_sr = 1;
+		}
+	}
+	if (update_sr) {
+		/* Write status register */
+		stfsm_read_status(fsm, SPINOR_OP_RDSR, &sr1, 1);
+		sr_wr = ((uint16_t)sr2 << 8) | sr1;
+		stfsm_write_status(fsm, SPINOR_OP_WRSR, sr_wr, 2, 1);
 	}
 
 	return 0;
@@ -1506,7 +1474,7 @@
 	read_mask = (data_pads << 2) - 1;
 
 	/* Handle non-aligned buf */
-	p = ((uint32_t)buf & 0x3) ? (uint8_t *)page_buf : buf;
+	p = ((uintptr_t)buf & 0x3) ? (uint8_t *)page_buf : buf;
 
 	/* Handle non-aligned size */
 	size_ub = (size + read_mask) & ~read_mask;
@@ -1528,7 +1496,7 @@
 	}
 
 	/* Handle non-aligned buf */
-	if ((uint32_t)buf & 0x3)
+	if ((uintptr_t)buf & 0x3)
 		memcpy(buf, page_buf, size);
 
 	/* Wait for sequence to finish */
@@ -1570,7 +1538,7 @@
 	write_mask = (data_pads << 2) - 1;
 
 	/* Handle non-aligned buf */
-	if ((uint32_t)buf & 0x3) {
+	if ((uintptr_t)buf & 0x3) {
 		memcpy(page_buf, buf, size);
 		p = (uint8_t *)page_buf;
 	} else {
@@ -1628,11 +1596,8 @@
 		stfsm_s25fl_clear_status_reg(fsm);
 
 	/* Exit 32-bit address mode, if required */
-	if (fsm->configuration & CFG_WRITE_TOGGLE_32BIT_ADDR) {
+	if (fsm->configuration & CFG_WRITE_TOGGLE_32BIT_ADDR)
 		stfsm_enter_32bit_addr(fsm, 0);
-		if (fsm->configuration & CFG_WRITE_EX_32BIT_ADDR_DELAY)
-			udelay(1);
-	}
 
 	return 0;
 }
@@ -1736,7 +1701,7 @@
 
 	while (len) {
 		/* Write up to page boundary */
-		bytes = min(FLASH_PAGESIZE - page_offs, len);
+		bytes = min_t(size_t, FLASH_PAGESIZE - page_offs, len);
 
 		ret = stfsm_write(fsm, b, bytes, to);
 		if (ret)
@@ -1935,6 +1900,13 @@
 	       fsm->base + SPI_CONFIGDATA);
 	writel(STFSM_DEFAULT_WR_TIME, fsm->base + SPI_STATUS_WR_TIME_REG);
 
+	/*
+	 * Set the FSM 'WAIT' delay to the minimum workable value.  Note, for
+	 * our purposes, the WAIT instruction is used purely to achieve
+	 * "sequence validity" rather than actually implement a delay.
+	 */
+	writel(0x00000001, fsm->base + SPI_PROGRAM_ERASE_TIME);
+
 	/* Clear FIFO, just in case */
 	stfsm_clear_fifo(fsm);
 
@@ -2086,7 +2058,7 @@
 	return mtd_device_unregister(&fsm->mtd);
 }
 
-static struct of_device_id stfsm_match[] = {
+static const struct of_device_id stfsm_match[] = {
 	{ .compatible = "st,spi-fsm", },
 	{},
 };

diff --git a/drivers/mtd/lpddr/Kconfig b/drivers/mtd/lpddr/Kconfig
index 265f969..3a19cbe 100644
--- a/drivers/mtd/lpddr/Kconfig
+++ b/drivers/mtd/lpddr/Kconfig

@@ -1,5 +1,5 @@
-menu "LPDDR flash memory drivers"
-	depends on MTD!=n
+menu "LPDDR & LPDDR2 PCM memory drivers"
+	depends on MTD
 
 config MTD_LPDDR
 	tristate "Support for LPDDR flash chips"
@@ -17,4 +17,13 @@
 	    Window QINFO interface, permits software to be used for entire
 	    families of devices. This serves similar purpose of CFI on legacy
 	    Flash products
+
+config MTD_LPDDR2_NVM
+	# ARM dependency is only for writel_relaxed()
+	depends on MTD && ARM
+	tristate "Support for LPDDR2-NVM flash chips"
+	help
+	  This option enables support of PCM memories with a LPDDR2-NVM
+	  (Low power double data rate 2) interface.
+
 endmenu

diff --git a/drivers/mtd/lpddr/Makefile b/drivers/mtd/lpddr/Makefile
index da48e46..881d440 100644
--- a/drivers/mtd/lpddr/Makefile
+++ b/drivers/mtd/lpddr/Makefile

@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_MTD_QINFO_PROBE)	+= qinfo_probe.o
 obj-$(CONFIG_MTD_LPDDR)	+= lpddr_cmds.o
+obj-$(CONFIG_MTD_LPDDR2_NVM) += lpddr2_nvm.o

diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c
new file mode 100644
index 0000000..063cec4
--- /dev/null
+++ b/drivers/mtd/lpddr/lpddr2_nvm.c

@@ -0,0 +1,507 @@
+/*
+ * LPDDR2-NVM MTD driver. This module provides read, write, erase, lock/unlock
+ * support for LPDDR2-NVM PCM memories
+ *
+ * Copyright © 2012 Micron Technology, Inc.
+ *
+ * Vincenzo Aliberti <vincenzo.aliberti@gmail.com>
+ * Domenico Manna <domenico.manna@gmail.com>
+ * Many thanks to Andrea Vigilante for initial enabling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/err.h>
+
+/* Parameters */
+#define ERASE_BLOCKSIZE			(0x00020000/2)	/* in Word */
+#define WRITE_BUFFSIZE			(0x00000400/2)	/* in Word */
+#define OW_BASE_ADDRESS			0x00000000	/* OW offset */
+#define BUS_WIDTH			0x00000020	/* x32 devices */
+
+/* PFOW symbols address offset */
+#define PFOW_QUERY_STRING_P		(0x0000/2)	/* in Word */
+#define PFOW_QUERY_STRING_F		(0x0002/2)	/* in Word */
+#define PFOW_QUERY_STRING_O		(0x0004/2)	/* in Word */
+#define PFOW_QUERY_STRING_W		(0x0006/2)	/* in Word */
+
+/* OW registers address */
+#define CMD_CODE_OFS			(0x0080/2)	/* in Word */
+#define CMD_DATA_OFS			(0x0084/2)	/* in Word */
+#define CMD_ADD_L_OFS			(0x0088/2)	/* in Word */
+#define CMD_ADD_H_OFS			(0x008A/2)	/* in Word */
+#define MPR_L_OFS			(0x0090/2)	/* in Word */
+#define MPR_H_OFS			(0x0092/2)	/* in Word */
+#define CMD_EXEC_OFS			(0x00C0/2)	/* in Word */
+#define STATUS_REG_OFS			(0x00CC/2)	/* in Word */
+#define PRG_BUFFER_OFS			(0x0010/2)	/* in Word */
+
+/* Datamask */
+#define MR_CFGMASK			0x8000
+#define SR_OK_DATAMASK			0x0080
+
+/* LPDDR2-NVM Commands */
+#define LPDDR2_NVM_LOCK			0x0061
+#define LPDDR2_NVM_UNLOCK		0x0062
+#define LPDDR2_NVM_SW_PROGRAM		0x0041
+#define LPDDR2_NVM_SW_OVERWRITE		0x0042
+#define LPDDR2_NVM_BUF_PROGRAM		0x00E9
+#define LPDDR2_NVM_BUF_OVERWRITE	0x00EA
+#define LPDDR2_NVM_ERASE		0x0020
+
+/* LPDDR2-NVM Registers offset */
+#define LPDDR2_MODE_REG_DATA		0x0040
+#define LPDDR2_MODE_REG_CFG		0x0050
+
+/*
+ * Internal Type Definitions
+ * pcm_int_data contains memory controller details:
+ * @reg_data : LPDDR2_MODE_REG_DATA register address after remapping
+ * @reg_cfg  : LPDDR2_MODE_REG_CFG register address after remapping
+ * &bus_width: memory bus-width (eg: x16 2 Bytes, x32 4 Bytes)
+ */
+struct pcm_int_data {
+	void __iomem *ctl_regs;
+	int bus_width;
+};
+
+static DEFINE_MUTEX(lpdd2_nvm_mutex);
+
+/*
+ * Build a map_word starting from an u_long
+ */
+static inline map_word build_map_word(u_long myword)
+{
+	map_word val = { {0} };
+	val.x[0] = myword;
+	return val;
+}
+
+/*
+ * Build Mode Register Configuration DataMask based on device bus-width
+ */
+static inline u_int build_mr_cfgmask(u_int bus_width)
+{
+	u_int val = MR_CFGMASK;
+
+	if (bus_width == 0x0004)		/* x32 device */
+		val = val << 16;
+
+	return val;
+}
+
+/*
+ * Build Status Register OK DataMask based on device bus-width
+ */
+static inline u_int build_sr_ok_datamask(u_int bus_width)
+{
+	u_int val = SR_OK_DATAMASK;
+
+	if (bus_width == 0x0004)		/* x32 device */
+		val = (val << 16)+val;
+
+	return val;
+}
+
+/*
+ * Evaluates Overlay Window Control Registers address
+ */
+static inline u_long ow_reg_add(struct map_info *map, u_long offset)
+{
+	u_long val = 0;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	val = map->pfow_base + offset*pcm_data->bus_width;
+
+	return val;
+}
+
+/*
+ * Enable lpddr2-nvm Overlay Window
+ * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers
+ * used by device commands as well as uservisible resources like Device Status
+ * Register, Device ID, etc
+ */
+static inline void ow_enable(struct map_info *map)
+{
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18,
+		pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG);
+	writel_relaxed(0x01, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA);
+}
+
+/*
+ * Disable lpddr2-nvm Overlay Window
+ * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers
+ * used by device commands as well as uservisible resources like Device Status
+ * Register, Device ID, etc
+ */
+static inline void ow_disable(struct map_info *map)
+{
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18,
+		pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG);
+	writel_relaxed(0x02, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA);
+}
+
+/*
+ * Execute lpddr2-nvm operations
+ */
+static int lpddr2_nvm_do_op(struct map_info *map, u_long cmd_code,
+	u_long cmd_data, u_long cmd_add, u_long cmd_mpr, u_char *buf)
+{
+	map_word add_l = { {0} }, add_h = { {0} }, mpr_l = { {0} },
+		mpr_h = { {0} }, data_l = { {0} }, cmd = { {0} },
+		exec_cmd = { {0} }, sr;
+	map_word data_h = { {0} };	/* only for 2x x16 devices stacked */
+	u_long i, status_reg, prg_buff_ofs;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+	u_int sr_ok_datamask = build_sr_ok_datamask(pcm_data->bus_width);
+
+	/* Builds low and high words for OW Control Registers */
+	add_l.x[0]	= cmd_add & 0x0000FFFF;
+	add_h.x[0]	= (cmd_add >> 16) & 0x0000FFFF;
+	mpr_l.x[0]	= cmd_mpr & 0x0000FFFF;
+	mpr_h.x[0]	= (cmd_mpr >> 16) & 0x0000FFFF;
+	cmd.x[0]	= cmd_code & 0x0000FFFF;
+	exec_cmd.x[0]	= 0x0001;
+	data_l.x[0]	= cmd_data & 0x0000FFFF;
+	data_h.x[0]	= (cmd_data >> 16) & 0x0000FFFF; /* only for 2x x16 */
+
+	/* Set Overlay Window Control Registers */
+	map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS));
+	map_write(map, data_l, ow_reg_add(map, CMD_DATA_OFS));
+	map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS));
+	map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS));
+	map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS));
+	map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS));
+	if (pcm_data->bus_width == 0x0004) {	/* 2x16 devices stacked */
+		map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS) + 2);
+		map_write(map, data_h, ow_reg_add(map, CMD_DATA_OFS) + 2);
+		map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS) + 2);
+		map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS) + 2);
+		map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS) + 2);
+		map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS) + 2);
+	}
+
+	/* Fill Program Buffer */
+	if ((cmd_code == LPDDR2_NVM_BUF_PROGRAM) ||
+		(cmd_code == LPDDR2_NVM_BUF_OVERWRITE)) {
+		prg_buff_ofs = (map_read(map,
+			ow_reg_add(map, PRG_BUFFER_OFS))).x[0];
+		for (i = 0; i < cmd_mpr; i++) {
+			map_write(map, build_map_word(buf[i]), map->pfow_base +
+			prg_buff_ofs + i);
+		}
+	}
+
+	/* Command Execute */
+	map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS));
+	if (pcm_data->bus_width == 0x0004)	/* 2x16 devices stacked */
+		map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS) + 2);
+
+	/* Status Register Check */
+	do {
+		sr = map_read(map, ow_reg_add(map, STATUS_REG_OFS));
+		status_reg = sr.x[0];
+		if (pcm_data->bus_width == 0x0004) {/* 2x16 devices stacked */
+			sr = map_read(map, ow_reg_add(map,
+				STATUS_REG_OFS) + 2);
+			status_reg += sr.x[0] << 16;
+		}
+	} while ((status_reg & sr_ok_datamask) != sr_ok_datamask);
+
+	return (((status_reg & sr_ok_datamask) == sr_ok_datamask) ? 0 : -EIO);
+}
+
+/*
+ * Execute lpddr2-nvm operations @ block level
+ */
+static int lpddr2_nvm_do_block_op(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len, u_char block_op)
+{
+	struct map_info *map = mtd->priv;
+	u_long add, end_add;
+	int ret = 0;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	add = start_add;
+	end_add = add + len;
+
+	do {
+		ret = lpddr2_nvm_do_op(map, block_op, 0x00, add, add, NULL);
+		if (ret)
+			goto out;
+		add += mtd->erasesize;
+	} while (add < end_add);
+
+out:
+	ow_disable(map);
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return ret;
+}
+
+/*
+ * verify presence of PFOW string
+ */
+static int lpddr2_nvm_pfow_present(struct map_info *map)
+{
+	map_word pfow_val[4];
+	unsigned int found = 1;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	/* Load string from array */
+	pfow_val[0] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_P));
+	pfow_val[1] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_F));
+	pfow_val[2] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_O));
+	pfow_val[3] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_W));
+
+	/* Verify the string loaded vs expected */
+	if (!map_word_equal(map, build_map_word('P'), pfow_val[0]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('F'), pfow_val[1]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('O'), pfow_val[2]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('W'), pfow_val[3]))
+		found = 0;
+
+	ow_disable(map);
+
+	mutex_unlock(&lpdd2_nvm_mutex);
+
+	return found;
+}
+
+/*
+ * lpddr2_nvm driver read method
+ */
+static int lpddr2_nvm_read(struct mtd_info *mtd, loff_t start_add,
+				size_t len, size_t *retlen, u_char *buf)
+{
+	struct map_info *map = mtd->priv;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	*retlen = len;
+
+	map_copy_from(map, buf, start_add, *retlen);
+
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return 0;
+}
+
+/*
+ * lpddr2_nvm driver write method
+ */
+static int lpddr2_nvm_write(struct mtd_info *mtd, loff_t start_add,
+				size_t len, size_t *retlen, const u_char *buf)
+{
+	struct map_info *map = mtd->priv;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+	u_long add, current_len, tot_len, target_len, my_data;
+	u_char *write_buf = (u_char *)buf;
+	int ret = 0;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	/* Set start value for the variables */
+	add = start_add;
+	target_len = len;
+	tot_len = 0;
+
+	while (tot_len < target_len) {
+		if (!(IS_ALIGNED(add, mtd->writesize))) { /* do sw program */
+			my_data = write_buf[tot_len];
+			my_data += (write_buf[tot_len+1]) << 8;
+			if (pcm_data->bus_width == 0x0004) {/* 2x16 devices */
+				my_data += (write_buf[tot_len+2]) << 16;
+				my_data += (write_buf[tot_len+3]) << 24;
+			}
+			ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_SW_OVERWRITE,
+				my_data, add, 0x00, NULL);
+			if (ret)
+				goto out;
+
+			add += pcm_data->bus_width;
+			tot_len += pcm_data->bus_width;
+		} else {		/* do buffer program */
+			current_len = min(target_len - tot_len,
+				(u_long) mtd->writesize);
+			ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_BUF_OVERWRITE,
+				0x00, add, current_len, write_buf + tot_len);
+			if (ret)
+				goto out;
+
+			add += current_len;
+			tot_len += current_len;
+		}
+	}
+
+out:
+	*retlen = tot_len;
+	ow_disable(map);
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return ret;
+}
+
+/*
+ * lpddr2_nvm driver erase method
+ */
+static int lpddr2_nvm_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	int ret = lpddr2_nvm_do_block_op(mtd, instr->addr, instr->len,
+		LPDDR2_NVM_ERASE);
+	if (!ret) {
+		instr->state = MTD_ERASE_DONE;
+		mtd_erase_callback(instr);
+	}
+
+	return ret;
+}
+
+/*
+ * lpddr2_nvm driver unlock method
+ */
+static int lpddr2_nvm_unlock(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len)
+{
+	return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_UNLOCK);
+}
+
+/*
+ * lpddr2_nvm driver lock method
+ */
+static int lpddr2_nvm_lock(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len)
+{
+	return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_LOCK);
+}
+
+/*
+ * lpddr2_nvm driver probe method
+ */
+static int lpddr2_nvm_probe(struct platform_device *pdev)
+{
+	struct map_info *map;
+	struct mtd_info *mtd;
+	struct resource *add_range;
+	struct resource *control_regs;
+	struct pcm_int_data *pcm_data;
+
+	/* Allocate memory control_regs data structures */
+	pcm_data = devm_kzalloc(&pdev->dev, sizeof(*pcm_data), GFP_KERNEL);
+	if (!pcm_data)
+		return -ENOMEM;
+
+	pcm_data->bus_width = BUS_WIDTH;
+
+	/* Allocate memory for map_info & mtd_info data structures */
+	map = devm_kzalloc(&pdev->dev, sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	mtd = devm_kzalloc(&pdev->dev, sizeof(*mtd), GFP_KERNEL);
+	if (!mtd)
+		return -ENOMEM;
+
+	/* lpddr2_nvm address range */
+	add_range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	/* Populate map_info data structure */
+	*map = (struct map_info) {
+		.virt		= devm_ioremap_resource(&pdev->dev, add_range),
+		.name		= pdev->dev.init_name,
+		.phys		= add_range->start,
+		.size		= resource_size(add_range),
+		.bankwidth	= pcm_data->bus_width / 2,
+		.pfow_base	= OW_BASE_ADDRESS,
+		.fldrv_priv	= pcm_data,
+	};
+	if (IS_ERR(map->virt))
+		return PTR_ERR(map->virt);
+
+	simple_map_init(map);	/* fill with default methods */
+
+	control_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pcm_data->ctl_regs = devm_ioremap_resource(&pdev->dev, control_regs);
+	if (IS_ERR(pcm_data->ctl_regs))
+		return PTR_ERR(pcm_data->ctl_regs);
+
+	/* Populate mtd_info data structure */
+	*mtd = (struct mtd_info) {
+		.name		= pdev->dev.init_name,
+		.type		= MTD_RAM,
+		.priv		= map,
+		.size		= resource_size(add_range),
+		.erasesize	= ERASE_BLOCKSIZE * pcm_data->bus_width,
+		.writesize	= 1,
+		.writebufsize	= WRITE_BUFFSIZE * pcm_data->bus_width,
+		.flags		= (MTD_CAP_NVRAM | MTD_POWERUP_LOCK),
+		._read		= lpddr2_nvm_read,
+		._write		= lpddr2_nvm_write,
+		._erase		= lpddr2_nvm_erase,
+		._unlock	= lpddr2_nvm_unlock,
+		._lock		= lpddr2_nvm_lock,
+	};
+
+	/* Verify the presence of the device looking for PFOW string */
+	if (!lpddr2_nvm_pfow_present(map)) {
+		pr_err("device not recognized\n");
+		return -EINVAL;
+	}
+	/* Parse partitions and register the MTD device */
+	return mtd_device_parse_register(mtd, NULL, NULL, NULL, 0);
+}
+
+/*
+ * lpddr2_nvm driver remove method
+ */
+static int lpddr2_nvm_remove(struct platform_device *pdev)
+{
+	return mtd_device_unregister(dev_get_drvdata(&pdev->dev));
+}
+
+/* Initialize platform_driver data structure for lpddr2_nvm */
+static struct platform_driver lpddr2_nvm_drv = {
+	.driver		= {
+		.name	= "lpddr2_nvm",
+	},
+	.probe		= lpddr2_nvm_probe,
+	.remove		= lpddr2_nvm_remove,
+};
+
+module_platform_driver(lpddr2_nvm_drv);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vincenzo Aliberti <vincenzo.aliberti@gmail.com>");
+MODULE_DESCRIPTION("MTD driver for LPDDR2-NVM PCM memories");

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index fce23fe..21b2874 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig

@@ -108,7 +108,7 @@
 
 config MTD_SC520CDP
 	tristate "CFI Flash device mapped on AMD SC520 CDP"
-	depends on X86 && MTD_CFI
+	depends on (MELAN || COMPILE_TEST) && MTD_CFI
 	help
 	  The SC520 CDP board has two banks of CFI-compliant chips and one
 	  Dual-in-line JEDEC chip. This 'mapping' driver supports that
@@ -116,7 +116,7 @@
 
 config MTD_NETSC520
 	tristate "CFI Flash device mapped on AMD NetSc520"
-	depends on X86 && MTD_CFI
+	depends on (MELAN || COMPILE_TEST) && MTD_CFI
 	help
 	  This enables access routines for the flash chips on the AMD NetSc520
 	  demonstration board. If you have one of these boards and would like

diff --git a/drivers/mtd/maps/sc520cdp.c b/drivers/mtd/maps/sc520cdp.c
index 8fead8e..093edd5 100644
--- a/drivers/mtd/maps/sc520cdp.c
+++ b/drivers/mtd/maps/sc520cdp.c

@@ -183,7 +183,7 @@
 
 static void sc520cdp_setup_par(void)
 {
-	volatile unsigned long __iomem *mmcr;
+	unsigned long __iomem *mmcr;
 	unsigned long mmcr_val;
 	int i, j;
 
@@ -203,11 +203,11 @@
 	*/
 	for(i = 0; i < NUM_FLASH_BANKS; i++) {		/* for each par_table entry  */
 		for(j = 0; j < NUM_SC520_PAR; j++) {	/* for each PAR register     */
-			mmcr_val = mmcr[SC520_PAR(j)];
+			mmcr_val = readl(&mmcr[SC520_PAR(j)]);
 			/* if target device field matches, reprogram the PAR */
 			if((mmcr_val & SC520_PAR_TRGDEV) == par_table[i].trgdev)
 			{
-				mmcr[SC520_PAR(j)] = par_table[i].new_par;
+				writel(par_table[i].new_par, &mmcr[SC520_PAR(j)]);
 				break;
 			}
 		}

diff --git a/drivers/mtd/maps/solutionengine.c b/drivers/mtd/maps/solutionengine.c
index 83a7a70..bb580bc 100644
--- a/drivers/mtd/maps/solutionengine.c
+++ b/drivers/mtd/maps/solutionengine.c

@@ -33,28 +33,6 @@
 
 static const char * const probes[] = { "RedBoot", "cmdlinepart", NULL };
 
-#ifdef CONFIG_MTD_SUPERH_RESERVE
-static struct mtd_partition superh_se_partitions[] = {
-	/* Reserved for boot code, read-only */
-	{
-		.name = "flash_boot",
-		.offset = 0x00000000,
-		.size = CONFIG_MTD_SUPERH_RESERVE,
-		.mask_flags = MTD_WRITEABLE,
-	},
-	/* All else is writable (e.g. JFFS) */
-	{
-		.name = "Flash FS",
-		.offset = MTDPART_OFS_NXTBLK,
-		.size = MTDPART_SIZ_FULL,
-	}
-};
-#define NUM_PARTITIONS ARRAY_SIZE(superh_se_partitions)
-#else
-#define superh_se_partitions NULL
-#define NUM_PARTITIONS 0
-#endif /* CONFIG_MTD_SUPERH_RESERVE */
-
 static int __init init_soleng_maps(void)
 {
 	/* First probe at offset 0 */
@@ -92,8 +70,7 @@
 		mtd_device_register(eprom_mtd, NULL, 0);
 	}
 
-	mtd_device_parse_register(flash_mtd, probes, NULL,
-				  superh_se_partitions, NUM_PARTITIONS);
+	mtd_device_parse_register(flash_mtd, probes, NULL, NULL, 0);
 
 	return 0;
 }

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 4dbfaee..43e3099 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -87,6 +87,9 @@
 	if (req->cmd_type != REQ_TYPE_FS)
 		return -EIO;
 
+	if (req->cmd_flags & REQ_FLUSH)
+		return tr->flush(dev);
+
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
 		return -EIO;
@@ -407,6 +410,9 @@
 	if (!new->rq)
 		goto error3;
 
+	if (tr->flush)
+		blk_queue_flush(new->rq, REQ_FLUSH);
+
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 7d4e7b9..a0f54e8 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c

@@ -568,13 +568,18 @@
 {
 	struct mtd_write_req req;
 	struct mtd_oob_ops ops;
-	void __user *usr_data, *usr_oob;
+	const void __user *usr_data, *usr_oob;
 	int ret;
 
-	if (copy_from_user(&req, argp, sizeof(req)) ||
-			!access_ok(VERIFY_READ, req.usr_data, req.len) ||
-			!access_ok(VERIFY_READ, req.usr_oob, req.ooblen))
+	if (copy_from_user(&req, argp, sizeof(req)))
 		return -EFAULT;
+
+	usr_data = (const void __user *)(uintptr_t)req.usr_data;
+	usr_oob = (const void __user *)(uintptr_t)req.usr_oob;
+	if (!access_ok(VERIFY_READ, usr_data, req.len) ||
+	    !access_ok(VERIFY_READ, usr_oob, req.ooblen))
+		return -EFAULT;
+
 	if (!mtd->_write_oob)
 		return -EOPNOTSUPP;
 
@@ -583,10 +588,7 @@
 	ops.ooblen = (size_t)req.ooblen;
 	ops.ooboffs = 0;
 
-	usr_data = (void __user *)(uintptr_t)req.usr_data;
-	usr_oob = (void __user *)(uintptr_t)req.usr_oob;
-
-	if (req.usr_data) {
+	if (usr_data) {
 		ops.datbuf = memdup_user(usr_data, ops.len);
 		if (IS_ERR(ops.datbuf))
 			return PTR_ERR(ops.datbuf);
@@ -594,7 +596,7 @@
 		ops.datbuf = NULL;
 	}
 
-	if (req.usr_oob) {
+	if (usr_oob) {
 		ops.oobbuf = memdup_user(usr_oob, ops.ooblen);
 		if (IS_ERR(ops.oobbuf)) {
 			kfree(ops.datbuf);

diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
index b7a2494..722898a 100644
--- a/drivers/mtd/nand/bf5xx_nand.c
+++ b/drivers/mtd/nand/bf5xx_nand.c

@@ -679,9 +679,6 @@
 	peripheral_free_list(bfin_nfc_pin_req);
 	bf5xx_nand_dma_remove(info);
 
-	/* free the common resources */
-	kfree(info);
-
 	return 0;
 }
 
@@ -742,10 +739,10 @@
 		return -EFAULT;
 	}
 
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (info == NULL) {
 		err = -ENOMEM;
-		goto out_err_kzalloc;
+		goto out_err;
 	}
 
 	platform_set_drvdata(pdev, info);
@@ -790,7 +787,7 @@
 	/* initialise the hardware */
 	err = bf5xx_nand_hw_init(info);
 	if (err)
-		goto out_err_hw_init;
+		goto out_err;
 
 	/* setup hardware ECC data struct */
 	if (hardware_ecc) {
@@ -827,9 +824,7 @@
 
 out_err_nand_scan:
 	bf5xx_nand_dma_remove(info);
-out_err_hw_init:
-	kfree(info);
-out_err_kzalloc:
+out_err:
 	peripheral_free_list(bfin_nfc_pin_req);
 
 	return err;

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index c07cd57..9f2012a 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c

@@ -1233,7 +1233,7 @@
 	return status;
 }
 
-static void denali_erase(struct mtd_info *mtd, int page)
+static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
@@ -1250,8 +1250,7 @@
 	irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP |
 					INTR_STATUS__ERASE_FAIL);
 
-	denali->status = (irq_status & INTR_STATUS__ERASE_FAIL) ?
-						NAND_STATUS_FAIL : PASS;
+	return (irq_status & INTR_STATUS__ERASE_FAIL) ? NAND_STATUS_FAIL : PASS;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
@@ -1584,7 +1583,7 @@
 	denali->nand.ecc.write_page_raw = denali_write_page_raw;
 	denali->nand.ecc.read_oob = denali_read_oob;
 	denali->nand.ecc.write_oob = denali_write_oob;
-	denali->nand.erase_cmd = denali_erase;
+	denali->nand.erase = denali_erase;
 
 	if (nand_scan_tail(&denali->mtd)) {
 		ret = -ENXIO;

diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 1b0265e..ce24637 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c

@@ -872,7 +872,7 @@
 	return 0;
 }
 
-static void docg4_erase_block(struct mtd_info *mtd, int page)
+static int docg4_erase_block(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *nand = mtd->priv;
 	struct docg4_priv *doc = nand->priv;
@@ -916,6 +916,8 @@
 	write_nop(docptr);
 	poll_status(doc);
 	write_nop(docptr);
+
+	return nand->waitfunc(mtd, nand);
 }
 
 static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
@@ -1236,7 +1238,7 @@
 	nand->block_markbad = docg4_block_markbad;
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
-	nand->erase_cmd = docg4_erase_block;
+	nand->erase = docg4_erase_block;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index ec549cd..545a5c0 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c

@@ -723,6 +723,19 @@
 	return 0;
 }
 
+/* ECC will be calculated automatically, and errors will be detected in
+ * waitfunc.
+ */
+static int fsl_elbc_write_subpage(struct mtd_info *mtd, struct nand_chip *chip,
+				uint32_t offset, uint32_t data_len,
+				const uint8_t *buf, int oob_required)
+{
+	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
+	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
 static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 {
 	struct fsl_lbc_ctrl *ctrl = priv->ctrl;
@@ -761,6 +774,7 @@
 
 	chip->ecc.read_page = fsl_elbc_read_page;
 	chip->ecc.write_page = fsl_elbc_write_page;
+	chip->ecc.write_subpage = fsl_elbc_write_subpage;
 
 	/* If CS Base Register selects full hardware ECC then use it */
 	if ((in_be32(&lbc->bank[priv->bank].br) & BR_DECC) ==

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index cb45d2f..2338124 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c

@@ -56,7 +56,7 @@
 	struct nand_hw_control controller;
 	struct fsl_ifc_mtd *chips[FSL_IFC_BANK_COUNT];
 
-	u8 __iomem *addr;	/* Address of assigned IFC buffer	*/
+	void __iomem *addr;	/* Address of assigned IFC buffer	*/
 	unsigned int page;	/* Last page written to / read from	*/
 	unsigned int read_bytes;/* Number of bytes read during command	*/
 	unsigned int column;	/* Saved column from SEQIN		*/
@@ -591,7 +591,10 @@
 		 * The chip always seems to report that it is
 		 * write-protected, even when it is not.
 		 */
-		setbits8(ifc_nand_ctrl->addr, NAND_STATUS_WP);
+		if (chip->options & NAND_BUSWIDTH_16)
+			setbits16(ifc_nand_ctrl->addr, NAND_STATUS_WP);
+		else
+			setbits8(ifc_nand_ctrl->addr, NAND_STATUS_WP);
 		return;
 
 	case NAND_CMD_RESET:
@@ -636,7 +639,7 @@
 		len = bufsize - ifc_nand_ctrl->index;
 	}
 
-	memcpy_toio(&ifc_nand_ctrl->addr[ifc_nand_ctrl->index], buf, len);
+	memcpy_toio(ifc_nand_ctrl->addr + ifc_nand_ctrl->index, buf, len);
 	ifc_nand_ctrl->index += len;
 }
 
@@ -648,13 +651,16 @@
 {
 	struct nand_chip *chip = mtd->priv;
 	struct fsl_ifc_mtd *priv = chip->priv;
+	unsigned int offset;
 
 	/*
 	 * If there are still bytes in the IFC buffer, then use the
 	 * next byte.
 	 */
-	if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes)
-		return in_8(&ifc_nand_ctrl->addr[ifc_nand_ctrl->index++]);
+	if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes) {
+		offset = ifc_nand_ctrl->index++;
+		return in_8(ifc_nand_ctrl->addr + offset);
+	}
 
 	dev_err(priv->dev, "%s: beyond end of buffer\n", __func__);
 	return ERR_BYTE;
@@ -675,8 +681,7 @@
 	 * next byte.
 	 */
 	if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes) {
-		data = in_be16((uint16_t __iomem *)&ifc_nand_ctrl->
-			       addr[ifc_nand_ctrl->index]);
+		data = in_be16(ifc_nand_ctrl->addr + ifc_nand_ctrl->index);
 		ifc_nand_ctrl->index += 2;
 		return (uint8_t) data;
 	}
@@ -701,7 +706,7 @@
 
 	avail = min((unsigned int)len,
 			ifc_nand_ctrl->read_bytes - ifc_nand_ctrl->index);
-	memcpy_fromio(buf, &ifc_nand_ctrl->addr[ifc_nand_ctrl->index], avail);
+	memcpy_fromio(buf, ifc_nand_ctrl->addr + ifc_nand_ctrl->index, avail);
 	ifc_nand_ctrl->index += avail;
 
 	if (len > avail)

diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h
index 588f537..05bb91f 100644
--- a/drivers/mtd/nand/gpmi-nand/bch-regs.h
+++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h

@@ -54,7 +54,7 @@
 #define MX6Q_BP_BCH_FLASH0LAYOUT0_ECC0		11
 #define MX6Q_BM_BCH_FLASH0LAYOUT0_ECC0	(0x1f << MX6Q_BP_BCH_FLASH0LAYOUT0_ECC0)
 #define BF_BCH_FLASH0LAYOUT0_ECC0(v, x)				\
-	(GPMI_IS_MX6Q(x)					\
+	(GPMI_IS_MX6(x)					\
 		? (((v) << MX6Q_BP_BCH_FLASH0LAYOUT0_ECC0)	\
 			& MX6Q_BM_BCH_FLASH0LAYOUT0_ECC0)	\
 		: (((v) << BP_BCH_FLASH0LAYOUT0_ECC0)		\
@@ -65,7 +65,7 @@
 #define MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14			\
 				(0x1 << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14)
 #define BF_BCH_FLASH0LAYOUT0_GF(v, x)				\
-	((GPMI_IS_MX6Q(x) && ((v) == 14))			\
+	((GPMI_IS_MX6(x) && ((v) == 14))			\
 		? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14)	\
 			& MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14)	\
 		: 0						\
@@ -77,7 +77,7 @@
 #define MX6Q_BM_BCH_FLASH0LAYOUT0_DATA0_SIZE	\
 			(0x3ff << BP_BCH_FLASH0LAYOUT0_DATA0_SIZE)
 #define BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(v, x)				\
-	(GPMI_IS_MX6Q(x)						\
+	(GPMI_IS_MX6(x)						\
 		? (((v) >> 2) & MX6Q_BM_BCH_FLASH0LAYOUT0_DATA0_SIZE)	\
 		: ((v) & BM_BCH_FLASH0LAYOUT0_DATA0_SIZE)		\
 	)
@@ -96,7 +96,7 @@
 #define MX6Q_BP_BCH_FLASH0LAYOUT1_ECCN		11
 #define MX6Q_BM_BCH_FLASH0LAYOUT1_ECCN	(0x1f << MX6Q_BP_BCH_FLASH0LAYOUT1_ECCN)
 #define BF_BCH_FLASH0LAYOUT1_ECCN(v, x)				\
-	(GPMI_IS_MX6Q(x)					\
+	(GPMI_IS_MX6(x)					\
 		? (((v) << MX6Q_BP_BCH_FLASH0LAYOUT1_ECCN)	\
 			& MX6Q_BM_BCH_FLASH0LAYOUT1_ECCN)	\
 		: (((v) << BP_BCH_FLASH0LAYOUT1_ECCN)		\
@@ -107,7 +107,7 @@
 #define MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14			\
 				(0x1 << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14)
 #define BF_BCH_FLASH0LAYOUT1_GF(v, x)				\
-	((GPMI_IS_MX6Q(x) && ((v) == 14))			\
+	((GPMI_IS_MX6(x) && ((v) == 14))			\
 		? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14)	\
 			& MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14)	\
 		: 0						\
@@ -119,7 +119,7 @@
 #define MX6Q_BM_BCH_FLASH0LAYOUT1_DATAN_SIZE	\
 			(0x3ff << BP_BCH_FLASH0LAYOUT1_DATAN_SIZE)
 #define BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(v, x)				\
-	(GPMI_IS_MX6Q(x)						\
+	(GPMI_IS_MX6(x)						\
 		? (((v) >> 2) & MX6Q_BM_BCH_FLASH0LAYOUT1_DATAN_SIZE)	\
 		: ((v) & BM_BCH_FLASH0LAYOUT1_DATAN_SIZE)		\
 	)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index dd1df60..87e658c 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c

@@ -861,7 +861,7 @@
 	struct resources *r = &this->resources;
 	unsigned long rate = clk_get_rate(r->clock[0]);
 	int mode = this->timing_mode;
-	int dll_threshold = 16; /* in ns */
+	int dll_threshold = this->devdata->max_chain_delay;
 	unsigned long delay;
 	unsigned long clk_period;
 	int t_rea;
@@ -886,9 +886,6 @@
 	/* [3] for GPMI_HW_GPMI_CTRL1 */
 	hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
 
-	if (GPMI_IS_MX6Q(this))
-		dll_threshold = 12;
-
 	/*
 	 * Enlarge 10 times for the numerator and denominator in {3}.
 	 * This make us to get more accurate result.
@@ -974,7 +971,7 @@
 	struct nand_chip *chip = &this->nand;
 
 	/* Enable the asynchronous EDO feature. */
-	if (GPMI_IS_MX6Q(this) && chip->onfi_version) {
+	if (GPMI_IS_MX6(this) && chip->onfi_version) {
 		int mode = onfi_get_async_timing_mode(chip);
 
 		/* We only support the timing mode 4 and mode 5. */
@@ -1096,12 +1093,12 @@
 	if (GPMI_IS_MX23(this)) {
 		mask = MX23_BM_GPMI_DEBUG_READY0 << chip;
 		reg = readl(r->gpmi_regs + HW_GPMI_DEBUG);
-	} else if (GPMI_IS_MX28(this) || GPMI_IS_MX6Q(this)) {
+	} else if (GPMI_IS_MX28(this) || GPMI_IS_MX6(this)) {
 		/*
 		 * In the imx6, all the ready/busy pins are bound
 		 * together. So we only need to check chip 0.
 		 */
-		if (GPMI_IS_MX6Q(this))
+		if (GPMI_IS_MX6(this))
 			chip = 0;
 
 		/* MX28 shares the same R/B register as MX6Q. */

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index bb77f75..f638cd8 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

@@ -53,6 +53,30 @@
 	.oobfree = { {.offset = 0, .length = 0} }
 };
 
+static const struct gpmi_devdata gpmi_devdata_imx23 = {
+	.type = IS_MX23,
+	.bch_max_ecc_strength = 20,
+	.max_chain_delay = 16,
+};
+
+static const struct gpmi_devdata gpmi_devdata_imx28 = {
+	.type = IS_MX28,
+	.bch_max_ecc_strength = 20,
+	.max_chain_delay = 16,
+};
+
+static const struct gpmi_devdata gpmi_devdata_imx6q = {
+	.type = IS_MX6Q,
+	.bch_max_ecc_strength = 40,
+	.max_chain_delay = 12,
+};
+
+static const struct gpmi_devdata gpmi_devdata_imx6sx = {
+	.type = IS_MX6SX,
+	.bch_max_ecc_strength = 62,
+	.max_chain_delay = 12,
+};
+
 static irqreturn_t bch_irq(int irq, void *cookie)
 {
 	struct gpmi_nand_data *this = cookie;
@@ -102,14 +126,8 @@
 		/* The mx23/mx28 only support the GF13. */
 		if (geo->gf_len == 14)
 			return false;
-
-		if (geo->ecc_strength > MXS_ECC_STRENGTH_MAX)
-			return false;
-	} else if (GPMI_IS_MX6Q(this)) {
-		if (geo->ecc_strength > MX6_ECC_STRENGTH_MAX)
-			return false;
 	}
-	return true;
+	return geo->ecc_strength <= this->devdata->bch_max_ecc_strength;
 }
 
 /*
@@ -270,8 +288,7 @@
 			"We can not support this nand chip."
 			" Its required ecc strength(%d) is beyond our"
 			" capability(%d).\n", geo->ecc_strength,
-			(GPMI_IS_MX6Q(this) ? MX6_ECC_STRENGTH_MAX
-					: MXS_ECC_STRENGTH_MAX));
+			this->devdata->bch_max_ecc_strength);
 		return -EINVAL;
 	}
 
@@ -572,7 +589,7 @@
 	}
 
 	/* Get extra clocks */
-	if (GPMI_IS_MX6Q(this))
+	if (GPMI_IS_MX6(this))
 		extra_clks = extra_clks_for_mx6q;
 	if (!extra_clks)
 		return 0;
@@ -590,9 +607,9 @@
 		r->clock[i] = clk;
 	}
 
-	if (GPMI_IS_MX6Q(this))
+	if (GPMI_IS_MX6(this))
 		/*
-		 * Set the default value for the gpmi clock in mx6q:
+		 * Set the default value for the gpmi clock.
 		 *
 		 * If you want to use the ONFI nand which is in the
 		 * Synchronous Mode, you should change the clock as you need.
@@ -1655,7 +1672,7 @@
 	 *  (1) the chip is imx6, and
 	 *  (2) the size of the ECC parity is byte aligned.
 	 */
-	if (GPMI_IS_MX6Q(this) &&
+	if (GPMI_IS_MX6(this) &&
 		((bch_geo->gf_len * bch_geo->ecc_strength) % 8) == 0) {
 		ecc->read_subpage = gpmi_ecc_read_subpage;
 		chip->options |= NAND_SUBPAGE_READ;
@@ -1711,7 +1728,7 @@
 	if (ret)
 		goto err_out;
 
-	ret = nand_scan_ident(mtd, GPMI_IS_MX6Q(this) ? 2 : 1, NULL);
+	ret = nand_scan_ident(mtd, GPMI_IS_MX6(this) ? 2 : 1, NULL);
 	if (ret)
 		goto err_out;
 
@@ -1740,23 +1757,19 @@
 	return ret;
 }
 
-static const struct platform_device_id gpmi_ids[] = {
-	{ .name = "imx23-gpmi-nand", .driver_data = IS_MX23, },
-	{ .name = "imx28-gpmi-nand", .driver_data = IS_MX28, },
-	{ .name = "imx6q-gpmi-nand", .driver_data = IS_MX6Q, },
-	{}
-};
-
 static const struct of_device_id gpmi_nand_id_table[] = {
 	{
 		.compatible = "fsl,imx23-gpmi-nand",
-		.data = (void *)&gpmi_ids[IS_MX23],
+		.data = (void *)&gpmi_devdata_imx23,
 	}, {
 		.compatible = "fsl,imx28-gpmi-nand",
-		.data = (void *)&gpmi_ids[IS_MX28],
+		.data = (void *)&gpmi_devdata_imx28,
 	}, {
 		.compatible = "fsl,imx6q-gpmi-nand",
-		.data = (void *)&gpmi_ids[IS_MX6Q],
+		.data = (void *)&gpmi_devdata_imx6q,
+	}, {
+		.compatible = "fsl,imx6sx-gpmi-nand",
+		.data = (void *)&gpmi_devdata_imx6sx,
 	}, {}
 };
 MODULE_DEVICE_TABLE(of, gpmi_nand_id_table);
@@ -1767,18 +1780,18 @@
 	const struct of_device_id *of_id;
 	int ret;
 
+	this = devm_kzalloc(&pdev->dev, sizeof(*this), GFP_KERNEL);
+	if (!this)
+		return -ENOMEM;
+
 	of_id = of_match_device(gpmi_nand_id_table, &pdev->dev);
 	if (of_id) {
-		pdev->id_entry = of_id->data;
+		this->devdata = of_id->data;
 	} else {
 		dev_err(&pdev->dev, "Failed to find the right device id.\n");
 		return -ENODEV;
 	}
 
-	this = devm_kzalloc(&pdev->dev, sizeof(*this), GFP_KERNEL);
-	if (!this)
-		return -ENOMEM;
-
 	platform_set_drvdata(pdev, this);
 	this->pdev  = pdev;
 	this->dev   = &pdev->dev;
@@ -1823,7 +1836,6 @@
 	},
 	.probe   = gpmi_nand_probe,
 	.remove  = gpmi_nand_remove,
-	.id_table = gpmi_ids,
 };
 module_platform_driver(gpmi_nand_driver);
 

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 4c801fa..32c6ba4 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h

@@ -119,11 +119,25 @@
 	int8_t  tRHOH_in_ns;
 };
 
+enum gpmi_type {
+	IS_MX23,
+	IS_MX28,
+	IS_MX6Q,
+	IS_MX6SX
+};
+
+struct gpmi_devdata {
+	enum gpmi_type type;
+	int bch_max_ecc_strength;
+	int max_chain_delay; /* See the async EDO mode */
+};
+
 struct gpmi_nand_data {
 	/* flags */
 #define GPMI_ASYNC_EDO_ENABLED	(1 << 0)
 #define GPMI_TIMING_INIT_OK	(1 << 1)
 	int			flags;
+	const struct gpmi_devdata *devdata;
 
 	/* System Interface */
 	struct device		*dev;
@@ -281,15 +295,11 @@
 #define STATUS_ERASED		0xff
 #define STATUS_UNCORRECTABLE	0xfe
 
-/* BCH's bit correction capability. */
-#define MXS_ECC_STRENGTH_MAX	20	/* mx23 and mx28 */
-#define MX6_ECC_STRENGTH_MAX	40
+/* Use the devdata to distinguish different Archs. */
+#define GPMI_IS_MX23(x)		((x)->devdata->type == IS_MX23)
+#define GPMI_IS_MX28(x)		((x)->devdata->type == IS_MX28)
+#define GPMI_IS_MX6Q(x)		((x)->devdata->type == IS_MX6Q)
+#define GPMI_IS_MX6SX(x)	((x)->devdata->type == IS_MX6SX)
 
-/* Use the platform_id to distinguish different Archs. */
-#define IS_MX23			0x0
-#define IS_MX28			0x1
-#define IS_MX6Q			0x2
-#define GPMI_IS_MX23(x)		((x)->pdev->id_entry->driver_data == IS_MX23)
-#define GPMI_IS_MX28(x)		((x)->pdev->id_entry->driver_data == IS_MX28)
-#define GPMI_IS_MX6Q(x)		((x)->pdev->id_entry->driver_data == IS_MX6Q)
+#define GPMI_IS_MX6(x)		(GPMI_IS_MX6Q(x) || GPMI_IS_MX6SX(x))
 #endif

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 9d01c4d..41167e9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -1204,8 +1205,7 @@
 	 * ecc.pos. Let's make sure that there are no gaps in ECC positions.
 	 */
 	for (i = 0; i < eccfrag_len - 1; i++) {
-		if (eccpos[i + start_step * chip->ecc.bytes] + 1 !=
-			eccpos[i + start_step * chip->ecc.bytes + 1]) {
+		if (eccpos[i + index] + 1 != eccpos[i + index + 1]) {
 			gaps = 1;
 			break;
 		}
@@ -1501,6 +1501,7 @@
 		mtd->oobavail : mtd->oobsize;
 
 	uint8_t *bufpoi, *oob, *buf;
+	int use_bufpoi;
 	unsigned int max_bitflips = 0;
 	int retry_mode = 0;
 	bool ecc_fail = false;
@@ -1523,9 +1524,20 @@
 		bytes = min(mtd->writesize - col, readlen);
 		aligned = (bytes == mtd->writesize);
 
+		if (!aligned)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
+
 		/* Is the current page in the buffer? */
 		if (realpage != chip->pagebuf || oob) {
-			bufpoi = aligned ? buf : chip->buffers->databuf;
+			bufpoi = use_bufpoi ? chip->buffers->databuf : buf;
+
+			if (use_bufpoi && aligned)
+				pr_debug("%s: using read bounce buffer for buf@%p\n",
+						 __func__, buf);
 
 read_retry:
 			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
@@ -1547,7 +1559,7 @@
 				ret = chip->ecc.read_page(mtd, chip, bufpoi,
 							  oob_required, page);
 			if (ret < 0) {
-				if (!aligned)
+				if (use_bufpoi)
 					/* Invalidate page cache */
 					chip->pagebuf = -1;
 				break;
@@ -1556,7 +1568,7 @@
 			max_bitflips = max_t(unsigned int, max_bitflips, ret);
 
 			/* Transfer not aligned data */
-			if (!aligned) {
+			if (use_bufpoi) {
 				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
 				    !(mtd->ecc_stats.failed - ecc_failures) &&
 				    (ops->mode != MTD_OPS_RAW)) {
@@ -2376,11 +2388,23 @@
 		int bytes = mtd->writesize;
 		int cached = writelen > bytes && page != blockmask;
 		uint8_t *wbuf = buf;
+		int use_bufpoi;
+		int part_pagewr = (column || writelen < (mtd->writesize - 1));
 
-		/* Partial page write? */
-		if (unlikely(column || writelen < (mtd->writesize - 1))) {
+		if (part_pagewr)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
+
+		/* Partial page write?, or need to use bounce buffer */
+		if (use_bufpoi) {
+			pr_debug("%s: using write bounce buffer for buf@%p\n",
+					 __func__, buf);
 			cached = 0;
-			bytes = min_t(int, bytes - column, (int) writelen);
+			if (part_pagewr)
+				bytes = min_t(int, bytes - column, writelen);
 			chip->pagebuf = -1;
 			memset(chip->buffers->databuf, 0xff, mtd->writesize);
 			memcpy(&chip->buffers->databuf[column], buf, bytes);
@@ -2618,18 +2642,20 @@
 }
 
 /**
- * single_erase_cmd - [GENERIC] NAND standard block erase command function
+ * single_erase - [GENERIC] NAND standard block erase command function
  * @mtd: MTD device structure
  * @page: the page address of the block which will be erased
  *
- * Standard erase command for NAND chips.
+ * Standard erase command for NAND chips. Returns NAND status.
  */
-static void single_erase_cmd(struct mtd_info *mtd, int page)
+static int single_erase(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *chip = mtd->priv;
 	/* Send commands to erase a block */
 	chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page);
 	chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1);
+
+	return chip->waitfunc(mtd, chip);
 }
 
 /**
@@ -2710,9 +2736,7 @@
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		chip->erase_cmd(mtd, page & chip->pagemask);
-
-		status = chip->waitfunc(mtd, chip);
+		status = chip->erase(mtd, page & chip->pagemask);
 
 		/*
 		 * See if operation failed and additional status checks are
@@ -3607,7 +3631,7 @@
 
 	chip->onfi_version = 0;
 	if (!type->name || !type->pagesize) {
-		/* Check is chip is ONFI compliant */
+		/* Check if the chip is ONFI compliant */
 		if (nand_flash_detect_onfi(mtd, chip, &busw))
 			goto ident_done;
 
@@ -3685,7 +3709,7 @@
 	}
 
 	chip->badblockbits = 8;
-	chip->erase_cmd = single_erase_cmd;
+	chip->erase = single_erase;
 
 	/* Do not replace user supplied command function! */
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
@@ -3770,6 +3794,39 @@
 }
 EXPORT_SYMBOL(nand_scan_ident);
 
+/*
+ * Check if the chip configuration meet the datasheet requirements.
+
+ * If our configuration corrects A bits per B bytes and the minimum
+ * required correction level is X bits per Y bytes, then we must ensure
+ * both of the following are true:
+ *
+ * (1) A / B >= X / Y
+ * (2) A >= X
+ *
+ * Requirement (1) ensures we can correct for the required bitflip density.
+ * Requirement (2) ensures we can correct even when all bitflips are clumped
+ * in the same sector.
+ */
+static bool nand_ecc_strength_good(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int corr, ds_corr;
+
+	if (ecc->size == 0 || chip->ecc_step_ds == 0)
+		/* Not enough information */
+		return true;
+
+	/*
+	 * We get the number of corrected bits per page to compare
+	 * the correction density.
+	 */
+	corr = (mtd->writesize * ecc->strength) / ecc->size;
+	ds_corr = (mtd->writesize * chip->ecc_strength_ds) / chip->ecc_step_ds;
+
+	return corr >= ds_corr && ecc->strength >= chip->ecc_strength_ds;
+}
 
 /**
  * nand_scan_tail - [NAND Interface] Scan for the NAND device
@@ -3990,6 +4047,9 @@
 		ecc->layout->oobavail += ecc->layout->oobfree[i].length;
 	mtd->oobavail = ecc->layout->oobavail;
 
+	/* ECC sanity check: warn noisily if it's too weak */
+	WARN_ON(!nand_ecc_strength_good(mtd));
+
 	/*
 	 * Set the number of read / write steps for one page depending on ECC
 	 * mode.
@@ -4023,8 +4083,16 @@
 	chip->pagebuf = -1;
 
 	/* Large page NAND with SOFT_ECC should support subpage reads */
-	if ((ecc->mode == NAND_ECC_SOFT) && (chip->page_shift > 9))
-		chip->options |= NAND_SUBPAGE_READ;
+	switch (ecc->mode) {
+	case NAND_ECC_SOFT:
+	case NAND_ECC_SOFT_BCH:
+		if (chip->page_shift > 9)
+			chip->options |= NAND_SUBPAGE_READ;
+		break;
+
+	default:
+		break;
+	}
 
 	/* Fill in remaining MTD driver data */
 	mtd->type = nand_is_slc(chip) ? MTD_NANDFLASH : MTD_MLCNANDFLASH;

diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index c0615d1..7f0c3b4 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c

@@ -528,7 +528,7 @@
 {
 	struct nand_chip *this = mtd->priv;
 	int i, chips;
-	int bits, startblock, block, dir;
+	int startblock, block, dir;
 	int scanlen = mtd->writesize + mtd->oobsize;
 	int bbtblocks;
 	int blocktopage = this->bbt_erase_shift - this->page_shift;
@@ -552,9 +552,6 @@
 		bbtblocks = mtd->size >> this->bbt_erase_shift;
 	}
 
-	/* Number of bits for each erase block in the bbt */
-	bits = td->options & NAND_BBT_NRBITS_MSK;
-
 	for (i = 0; i < chips; i++) {
 		/* Reset version information */
 		td->version[i] = 0;
@@ -1285,6 +1282,7 @@
 int nand_default_bbt(struct mtd_info *mtd)
 {
 	struct nand_chip *this = mtd->priv;
+	int ret;
 
 	/* Is a flash based bad block table requested? */
 	if (this->bbt_options & NAND_BBT_USE_FLASH) {
@@ -1303,8 +1301,11 @@
 		this->bbt_md = NULL;
 	}
 
-	if (!this->badblock_pattern)
-		nand_create_badblock_pattern(this);
+	if (!this->badblock_pattern) {
+		ret = nand_create_badblock_pattern(this);
+		if (ret)
+			return ret;
+	}
 
 	return nand_scan_bbt(mtd, this->badblock_pattern);
 }

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 053c9a2..97c4c02 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c

@@ -506,7 +506,7 @@
 	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
 		return 1;	/* error in ECC data; no action needed */
 
-	pr_err("%s: uncorrectable ECC error", __func__);
+	pr_err("%s: uncorrectable ECC error\n", __func__);
 	return -1;
 }
 EXPORT_SYMBOL(__nand_correct_data);

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 42e8a77..4f0d836 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c

@@ -575,12 +575,12 @@
 		cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
 		if (IS_ERR(cfile))
 			return PTR_ERR(cfile);
-		if (!cfile->f_op->read && !cfile->f_op->aio_read) {
+		if (!(cfile->f_mode & FMODE_CAN_READ)) {
 			NS_ERR("alloc_device: cache file not readable\n");
 			err = -EINVAL;
 			goto err_close;
 		}
-		if (!cfile->f_op->write && !cfile->f_op->aio_write) {
+		if (!(cfile->f_mode & FMODE_CAN_WRITE)) {
 			NS_ERR("alloc_device: cache file not writeable\n");
 			err = -EINVAL;
 			goto err_close;

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 1ff49b8..f0ed92e 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c

@@ -137,6 +137,10 @@
 #define BADBLOCK_MARKER_LENGTH		2
 
 #ifdef CONFIG_MTD_NAND_OMAP_BCH
+static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55,
+				0x2e, 0x2c, 0x86, 0xa3, 0xed, 0x36, 0x1b, 0x78,
+				0x48, 0x76, 0xa9, 0x3b, 0x97, 0xd1, 0x7a, 0x93,
+				0x07, 0x0e};
 static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 	0xac, 0x6b, 0xff, 0x99, 0x7b};
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
@@ -1114,6 +1118,19 @@
 			ecc_size1 = BCH_ECC_SIZE1;
 		}
 		break;
+	case OMAP_ECC_BCH16_CODE_HW:
+		bch_type = 0x2;
+		nsectors = chip->ecc.steps;
+		if (mode == NAND_ECC_READ) {
+			wr_mode	  = 0x01;
+			ecc_size0 = 52; /* ECC bits in nibbles per sector */
+			ecc_size1 = 0;  /* non-ECC bits in nibbles per sector */
+		} else {
+			wr_mode	  = 0x01;
+			ecc_size0 = 0;  /* extra bits in nibbles per sector */
+			ecc_size1 = 52; /* OOB bits in nibbles per sector */
+		}
+		break;
 	default:
 		return;
 	}
@@ -1162,7 +1179,8 @@
 	struct gpmc_nand_regs	*gpmc_regs = &info->reg;
 	u8 *ecc_code;
 	unsigned long nsectors, bch_val1, bch_val2, bch_val3, bch_val4;
-	int i;
+	u32 val;
+	int i, j;
 
 	nsectors = ((readl(info->reg.gpmc_ecc_config) >> 4) & 0x7) + 1;
 	for (i = 0; i < nsectors; i++) {
@@ -1201,6 +1219,41 @@
 			*ecc_code++ = ((bch_val1 >> 4) & 0xFF);
 			*ecc_code++ = ((bch_val1 & 0xF) << 4);
 			break;
+		case OMAP_ECC_BCH16_CODE_HW:
+			val = readl(gpmc_regs->gpmc_bch_result6[i]);
+			ecc_code[0]  = ((val >>  8) & 0xFF);
+			ecc_code[1]  = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result5[i]);
+			ecc_code[2]  = ((val >> 24) & 0xFF);
+			ecc_code[3]  = ((val >> 16) & 0xFF);
+			ecc_code[4]  = ((val >>  8) & 0xFF);
+			ecc_code[5]  = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result4[i]);
+			ecc_code[6]  = ((val >> 24) & 0xFF);
+			ecc_code[7]  = ((val >> 16) & 0xFF);
+			ecc_code[8]  = ((val >>  8) & 0xFF);
+			ecc_code[9]  = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result3[i]);
+			ecc_code[10] = ((val >> 24) & 0xFF);
+			ecc_code[11] = ((val >> 16) & 0xFF);
+			ecc_code[12] = ((val >>  8) & 0xFF);
+			ecc_code[13] = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result2[i]);
+			ecc_code[14] = ((val >> 24) & 0xFF);
+			ecc_code[15] = ((val >> 16) & 0xFF);
+			ecc_code[16] = ((val >>  8) & 0xFF);
+			ecc_code[17] = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result1[i]);
+			ecc_code[18] = ((val >> 24) & 0xFF);
+			ecc_code[19] = ((val >> 16) & 0xFF);
+			ecc_code[20] = ((val >>  8) & 0xFF);
+			ecc_code[21] = ((val >>  0) & 0xFF);
+			val = readl(gpmc_regs->gpmc_bch_result0[i]);
+			ecc_code[22] = ((val >> 24) & 0xFF);
+			ecc_code[23] = ((val >> 16) & 0xFF);
+			ecc_code[24] = ((val >>  8) & 0xFF);
+			ecc_code[25] = ((val >>  0) & 0xFF);
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -1210,8 +1263,8 @@
 		case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
 			/* Add constant polynomial to remainder, so that
 			 * ECC of blank pages results in 0x0 on reading back */
-			for (i = 0; i < eccbytes; i++)
-				ecc_calc[i] ^= bch4_polynomial[i];
+			for (j = 0; j < eccbytes; j++)
+				ecc_calc[j] ^= bch4_polynomial[j];
 			break;
 		case OMAP_ECC_BCH4_CODE_HW:
 			/* Set  8th ECC byte as 0x0 for ROM compatibility */
@@ -1220,13 +1273,15 @@
 		case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
 			/* Add constant polynomial to remainder, so that
 			 * ECC of blank pages results in 0x0 on reading back */
-			for (i = 0; i < eccbytes; i++)
-				ecc_calc[i] ^= bch8_polynomial[i];
+			for (j = 0; j < eccbytes; j++)
+				ecc_calc[j] ^= bch8_polynomial[j];
 			break;
 		case OMAP_ECC_BCH8_CODE_HW:
 			/* Set 14th ECC byte as 0x0 for ROM compatibility */
 			ecc_calc[eccbytes - 1] = 0x0;
 			break;
+		case OMAP_ECC_BCH16_CODE_HW:
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -1237,6 +1292,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
 /**
  * erased_sector_bitflips - count bit flips
  * @data:	data sector buffer
@@ -1276,7 +1332,6 @@
 	return flip_bits;
 }
 
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 /**
  * omap_elm_correct_data - corrects page data area in case error reported
  * @mtd:	MTD device structure
@@ -1318,6 +1373,10 @@
 		actual_eccbytes = ecc->bytes - 1;
 		erased_ecc_vec = bch8_vector;
 		break;
+	case OMAP_ECC_BCH16_CODE_HW:
+		actual_eccbytes = ecc->bytes;
+		erased_ecc_vec = bch16_vector;
+		break;
 	default:
 		pr_err("invalid driver configuration\n");
 		return -EINVAL;
@@ -1382,7 +1441,7 @@
 
 	/* Check if any error reported */
 	if (!is_error_reported)
-		return 0;
+		return stat;
 
 	/* Decode BCH error using ELM module */
 	elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec);
@@ -1401,6 +1460,7 @@
 						BCH4_BIT_PAD;
 					break;
 				case OMAP_ECC_BCH8_CODE_HW:
+				case OMAP_ECC_BCH16_CODE_HW:
 					pos = err_vec[i].error_loc[j];
 					break;
 				default:
@@ -1912,6 +1972,40 @@
 		goto return_error;
 #endif
 
+	case OMAP_ECC_BCH16_CODE_HW:
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
+		pr_info("using OMAP_ECC_BCH16_CODE_HW ECC scheme\n");
+		nand_chip->ecc.mode		= NAND_ECC_HW;
+		nand_chip->ecc.size		= 512;
+		nand_chip->ecc.bytes		= 26;
+		nand_chip->ecc.strength		= 16;
+		nand_chip->ecc.hwctl		= omap_enable_hwecc_bch;
+		nand_chip->ecc.correct		= omap_elm_correct_data;
+		nand_chip->ecc.calculate	= omap_calculate_ecc_bch;
+		nand_chip->ecc.read_page	= omap_read_page_bch;
+		nand_chip->ecc.write_page	= omap_write_page_bch;
+		/* This ECC scheme requires ELM H/W block */
+		err = is_elm_present(info, pdata->elm_of_node, BCH16_ECC);
+		if (err < 0) {
+			pr_err("ELM is required for this ECC scheme\n");
+			goto return_error;
+		}
+		/* define ECC layout */
+		ecclayout->eccbytes		= nand_chip->ecc.bytes *
+							(mtd->writesize /
+							nand_chip->ecc.size);
+		oob_index			= BADBLOCK_MARKER_LENGTH;
+		for (i = 0; i < ecclayout->eccbytes; i++, oob_index++)
+			ecclayout->eccpos[i]	= oob_index;
+		/* reserved marker already included in ecclayout->eccbytes */
+		ecclayout->oobfree->offset	=
+				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
+		break;
+#else
+		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
+		err = -EINVAL;
+		goto return_error;
+#endif
 	default:
 		pr_err("nand: error: invalid or unsupported ECC scheme\n");
 		err = -EINVAL;

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index dd7fe81..471b4df 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c

@@ -214,7 +214,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id orion_nand_of_match_table[] = {
+static const struct of_device_id orion_nand_of_match_table[] = {
 	{ .compatible = "marvell,orion-nand", },
 	{},
 };

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 7588fe2..96b0b1d 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c

@@ -127,10 +127,10 @@
 
 /* macros for registers read/write */
 #define nand_writel(info, off, val)	\
-	__raw_writel((val), (info)->mmio_base + (off))
+	writel_relaxed((val), (info)->mmio_base + (off))
 
 #define nand_readl(info, off)		\
-	__raw_readl((info)->mmio_base + (off))
+	readl_relaxed((info)->mmio_base + (off))
 
 /* error code and state */
 enum {
@@ -337,7 +337,7 @@
 /* convert nano-seconds to nand flash controller clock cycles */
 #define ns2cycle(ns, clk)	(int)((ns) * (clk / 1000000) / 1000)
 
-static struct of_device_id pxa3xx_nand_dt_ids[] = {
+static const struct of_device_id pxa3xx_nand_dt_ids[] = {
 	{
 		.compatible = "marvell,pxa3xx-nand",
 		.data       = (void *)PXA3XX_NAND_VARIANT_PXA,
@@ -1354,7 +1354,6 @@
 		ecc->mode = NAND_ECC_HW;
 		ecc->size = 512;
 		ecc->strength = 1;
-		return 1;
 
 	} else if (strength == 1 && ecc_stepsize == 512 && page_size == 512) {
 		info->chunk_size = 512;
@@ -1363,7 +1362,6 @@
 		ecc->mode = NAND_ECC_HW;
 		ecc->size = 512;
 		ecc->strength = 1;
-		return 1;
 
 	/*
 	 * Required ECC: 4-bit correction per 512 bytes
@@ -1378,7 +1376,6 @@
 		ecc->size = info->chunk_size;
 		ecc->layout = &ecc_layout_2KB_bch4bit;
 		ecc->strength = 16;
-		return 1;
 
 	} else if (strength == 4 && ecc_stepsize == 512 && page_size == 4096) {
 		info->ecc_bch = 1;
@@ -1389,7 +1386,6 @@
 		ecc->size = info->chunk_size;
 		ecc->layout = &ecc_layout_4KB_bch4bit;
 		ecc->strength = 16;
-		return 1;
 
 	/*
 	 * Required ECC: 8-bit correction per 512 bytes
@@ -1404,8 +1400,15 @@
 		ecc->size = info->chunk_size;
 		ecc->layout = &ecc_layout_4KB_bch8bit;
 		ecc->strength = 16;
-		return 1;
+	} else {
+		dev_err(&info->pdev->dev,
+			"ECC strength %d at page size %d is not supported\n",
+			strength, page_size);
+		return -ENODEV;
 	}
+
+	dev_info(&info->pdev->dev, "ECC strength %d, ECC step size %d\n",
+		 ecc->strength, ecc->size);
 	return 0;
 }
 
@@ -1516,8 +1519,13 @@
 		}
 	}
 
-	ecc_strength = chip->ecc_strength_ds;
-	ecc_step = chip->ecc_step_ds;
+	if (pdata->ecc_strength && pdata->ecc_step_size) {
+		ecc_strength = pdata->ecc_strength;
+		ecc_step = pdata->ecc_step_size;
+	} else {
+		ecc_strength = chip->ecc_strength_ds;
+		ecc_step = chip->ecc_step_ds;
+	}
 
 	/* Set default ECC strength requirements on non-ONFI devices */
 	if (ecc_strength < 1 && ecc_step < 1) {
@@ -1527,12 +1535,8 @@
 
 	ret = pxa_ecc_init(info, &chip->ecc, ecc_strength,
 			   ecc_step, mtd->writesize);
-	if (!ret) {
-		dev_err(&info->pdev->dev,
-			"ECC strength %d at page size %d is not supported\n",
-			ecc_strength, mtd->writesize);
-		return -ENODEV;
-	}
+	if (ret)
+		return ret;
 
 	/* calculate addressing information */
 	if (mtd->writesize >= 2048)
@@ -1730,6 +1734,14 @@
 	of_property_read_u32(np, "num-cs", &pdata->num_cs);
 	pdata->flash_bbt = of_get_nand_on_flash_bbt(np);
 
+	pdata->ecc_strength = of_get_nand_ecc_strength(np);
+	if (pdata->ecc_strength < 0)
+		pdata->ecc_strength = 0;
+
+	pdata->ecc_step_size = of_get_nand_ecc_step_size(np);
+	if (pdata->ecc_step_size < 0)
+		pdata->ecc_step_size = 0;
+
 	pdev->dev.platform_data = pdata;
 
 	return 0;

diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index 325930d..baea83f 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c

@@ -245,7 +245,7 @@
 	}
 
 	/* write DWORD chinks - faster */
-	while (len) {
+	while (len >= 4) {
 		reg = buf[0] | buf[1] << 8 | buf[2] << 16 | buf[3] << 24;
 		r852_write_reg_dword(dev, R852_DATALINE, reg);
 		buf += 4;
@@ -254,8 +254,10 @@
 	}
 
 	/* write rest */
-	while (len)
+	while (len > 0) {
 		r852_write_reg(dev, R852_DATALINE, *buf++);
+		len--;
+	}
 }
 
 /*

diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
index b1a792f..efb819c 100644
--- a/drivers/mtd/onenand/samsung.c
+++ b/drivers/mtd/onenand/samsung.c

@@ -537,9 +537,9 @@
 	return 0;
 }
 
-static int (*s5pc110_dma_ops)(void *dst, void *src, size_t count, int direction);
+static int (*s5pc110_dma_ops)(dma_addr_t dst, dma_addr_t src, size_t count, int direction);
 
-static int s5pc110_dma_poll(void *dst, void *src, size_t count, int direction)
+static int s5pc110_dma_poll(dma_addr_t dst, dma_addr_t src, size_t count, int direction)
 {
 	void __iomem *base = onenand->dma_addr;
 	int status;
@@ -605,7 +605,7 @@
 	return IRQ_HANDLED;
 }
 
-static int s5pc110_dma_irq(void *dst, void *src, size_t count, int direction)
+static int s5pc110_dma_irq(dma_addr_t dst, dma_addr_t src, size_t count, int direction)
 {
 	void __iomem *base = onenand->dma_addr;
 	int status;
@@ -686,7 +686,7 @@
 		dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count);
 		goto normal;
 	}
-	err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src,
+	err = s5pc110_dma_ops(dma_dst, dma_src,
 			count, S5PC110_DMA_DIR_READ);
 
 	if (page_dma)

diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
new file mode 100644
index 0000000..f8acfa4
--- /dev/null
+++ b/drivers/mtd/spi-nor/Kconfig

@@ -0,0 +1,17 @@
+menuconfig MTD_SPI_NOR
+	tristate "SPI-NOR device support"
+	depends on MTD
+	help
+	  This is the framework for the SPI NOR which can be used by the SPI
+	  device drivers and the SPI-NOR device driver.
+
+if MTD_SPI_NOR
+
+config SPI_FSL_QUADSPI
+	tristate "Freescale Quad SPI controller"
+	depends on ARCH_MXC
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  We only connect the NOR to this controller now.
+
+endif # MTD_SPI_NOR

diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
new file mode 100644
index 0000000..6a7ce14
--- /dev/null
+++ b/drivers/mtd/spi-nor/Makefile

@@ -0,0 +1,2 @@
+obj-$(CONFIG_MTD_SPI_NOR)	+= spi-nor.o
+obj-$(CONFIG_SPI_FSL_QUADSPI)	+= fsl-quadspi.o

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
new file mode 100644
index 0000000..8d659a2
--- /dev/null
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c

@@ -0,0 +1,1009 @@
+/*
+ * Freescale QuadSPI driver.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/timer.h>
+#include <linux/jiffies.h>
+#include <linux/completion.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/spi-nor.h>
+
+/* The registers */
+#define QUADSPI_MCR			0x00
+#define QUADSPI_MCR_RESERVED_SHIFT	16
+#define QUADSPI_MCR_RESERVED_MASK	(0xF << QUADSPI_MCR_RESERVED_SHIFT)
+#define QUADSPI_MCR_MDIS_SHIFT		14
+#define QUADSPI_MCR_MDIS_MASK		(1 << QUADSPI_MCR_MDIS_SHIFT)
+#define QUADSPI_MCR_CLR_TXF_SHIFT	11
+#define QUADSPI_MCR_CLR_TXF_MASK	(1 << QUADSPI_MCR_CLR_TXF_SHIFT)
+#define QUADSPI_MCR_CLR_RXF_SHIFT	10
+#define QUADSPI_MCR_CLR_RXF_MASK	(1 << QUADSPI_MCR_CLR_RXF_SHIFT)
+#define QUADSPI_MCR_DDR_EN_SHIFT	7
+#define QUADSPI_MCR_DDR_EN_MASK		(1 << QUADSPI_MCR_DDR_EN_SHIFT)
+#define QUADSPI_MCR_END_CFG_SHIFT	2
+#define QUADSPI_MCR_END_CFG_MASK	(3 << QUADSPI_MCR_END_CFG_SHIFT)
+#define QUADSPI_MCR_SWRSTHD_SHIFT	1
+#define QUADSPI_MCR_SWRSTHD_MASK	(1 << QUADSPI_MCR_SWRSTHD_SHIFT)
+#define QUADSPI_MCR_SWRSTSD_SHIFT	0
+#define QUADSPI_MCR_SWRSTSD_MASK	(1 << QUADSPI_MCR_SWRSTSD_SHIFT)
+
+#define QUADSPI_IPCR			0x08
+#define QUADSPI_IPCR_SEQID_SHIFT	24
+#define QUADSPI_IPCR_SEQID_MASK		(0xF << QUADSPI_IPCR_SEQID_SHIFT)
+
+#define QUADSPI_BUF0CR			0x10
+#define QUADSPI_BUF1CR			0x14
+#define QUADSPI_BUF2CR			0x18
+#define QUADSPI_BUFXCR_INVALID_MSTRID	0xe
+
+#define QUADSPI_BUF3CR			0x1c
+#define QUADSPI_BUF3CR_ALLMST_SHIFT	31
+#define QUADSPI_BUF3CR_ALLMST		(1 << QUADSPI_BUF3CR_ALLMST_SHIFT)
+
+#define QUADSPI_BFGENCR			0x20
+#define QUADSPI_BFGENCR_PAR_EN_SHIFT	16
+#define QUADSPI_BFGENCR_PAR_EN_MASK	(1 << (QUADSPI_BFGENCR_PAR_EN_SHIFT))
+#define QUADSPI_BFGENCR_SEQID_SHIFT	12
+#define QUADSPI_BFGENCR_SEQID_MASK	(0xF << QUADSPI_BFGENCR_SEQID_SHIFT)
+
+#define QUADSPI_BUF0IND			0x30
+#define QUADSPI_BUF1IND			0x34
+#define QUADSPI_BUF2IND			0x38
+#define QUADSPI_SFAR			0x100
+
+#define QUADSPI_SMPR			0x108
+#define QUADSPI_SMPR_DDRSMP_SHIFT	16
+#define QUADSPI_SMPR_DDRSMP_MASK	(7 << QUADSPI_SMPR_DDRSMP_SHIFT)
+#define QUADSPI_SMPR_FSDLY_SHIFT	6
+#define QUADSPI_SMPR_FSDLY_MASK		(1 << QUADSPI_SMPR_FSDLY_SHIFT)
+#define QUADSPI_SMPR_FSPHS_SHIFT	5
+#define QUADSPI_SMPR_FSPHS_MASK		(1 << QUADSPI_SMPR_FSPHS_SHIFT)
+#define QUADSPI_SMPR_HSENA_SHIFT	0
+#define QUADSPI_SMPR_HSENA_MASK		(1 << QUADSPI_SMPR_HSENA_SHIFT)
+
+#define QUADSPI_RBSR			0x10c
+#define QUADSPI_RBSR_RDBFL_SHIFT	8
+#define QUADSPI_RBSR_RDBFL_MASK		(0x3F << QUADSPI_RBSR_RDBFL_SHIFT)
+
+#define QUADSPI_RBCT			0x110
+#define QUADSPI_RBCT_WMRK_MASK		0x1F
+#define QUADSPI_RBCT_RXBRD_SHIFT	8
+#define QUADSPI_RBCT_RXBRD_USEIPS	(0x1 << QUADSPI_RBCT_RXBRD_SHIFT)
+
+#define QUADSPI_TBSR			0x150
+#define QUADSPI_TBDR			0x154
+#define QUADSPI_SR			0x15c
+#define QUADSPI_SR_IP_ACC_SHIFT		1
+#define QUADSPI_SR_IP_ACC_MASK		(0x1 << QUADSPI_SR_IP_ACC_SHIFT)
+#define QUADSPI_SR_AHB_ACC_SHIFT	2
+#define QUADSPI_SR_AHB_ACC_MASK		(0x1 << QUADSPI_SR_AHB_ACC_SHIFT)
+
+#define QUADSPI_FR			0x160
+#define QUADSPI_FR_TFF_MASK		0x1
+
+#define QUADSPI_SFA1AD			0x180
+#define QUADSPI_SFA2AD			0x184
+#define QUADSPI_SFB1AD			0x188
+#define QUADSPI_SFB2AD			0x18c
+#define QUADSPI_RBDR			0x200
+
+#define QUADSPI_LUTKEY			0x300
+#define QUADSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define QUADSPI_LCKCR			0x304
+#define QUADSPI_LCKER_LOCK		0x1
+#define QUADSPI_LCKER_UNLOCK		0x2
+
+#define QUADSPI_RSER			0x164
+#define QUADSPI_RSER_TFIE		(0x1 << 0)
+
+#define QUADSPI_LUT_BASE		0x310
+
+/*
+ * The definition of the LUT register shows below:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define OPRND0_SHIFT		0
+#define PAD0_SHIFT		8
+#define INSTR0_SHIFT		10
+#define OPRND1_SHIFT		16
+
+/* Instruction set for the LUT register. */
+#define LUT_STOP		0
+#define LUT_CMD			1
+#define LUT_ADDR		2
+#define LUT_DUMMY		3
+#define LUT_MODE		4
+#define LUT_MODE2		5
+#define LUT_MODE4		6
+#define LUT_READ		7
+#define LUT_WRITE		8
+#define LUT_JMP_ON_CS		9
+#define LUT_ADDR_DDR		10
+#define LUT_MODE_DDR		11
+#define LUT_MODE2_DDR		12
+#define LUT_MODE4_DDR		13
+#define LUT_READ_DDR		14
+#define LUT_WRITE_DDR		15
+#define LUT_DATA_LEARN		16
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the lines number of IO[0:3].
+ * For example, the Quad read need four IO lines, so you should
+ * set LUT_PAD4 which means we use four IO lines.
+ */
+#define LUT_PAD1		0
+#define LUT_PAD2		1
+#define LUT_PAD4		2
+
+/* Oprands for the LUT register. */
+#define ADDR24BIT		0x18
+#define ADDR32BIT		0x20
+
+/* Macros for constructing the LUT register. */
+#define LUT0(ins, pad, opr)						\
+		(((opr) << OPRND0_SHIFT) | ((LUT_##pad) << PAD0_SHIFT) | \
+		((LUT_##ins) << INSTR0_SHIFT))
+
+#define LUT1(ins, pad, opr)	(LUT0(ins, pad, opr) << OPRND1_SHIFT)
+
+/* other macros for LUT register. */
+#define QUADSPI_LUT(x)          (QUADSPI_LUT_BASE + (x) * 4)
+#define QUADSPI_LUT_NUM		64
+
+/* SEQID -- we can have 16 seqids at most. */
+#define SEQID_QUAD_READ		0
+#define SEQID_WREN		1
+#define SEQID_WRDI		2
+#define SEQID_RDSR		3
+#define SEQID_SE		4
+#define SEQID_CHIP_ERASE	5
+#define SEQID_PP		6
+#define SEQID_RDID		7
+#define SEQID_WRSR		8
+#define SEQID_RDCR		9
+#define SEQID_EN4B		10
+#define SEQID_BRWR		11
+
+enum fsl_qspi_devtype {
+	FSL_QUADSPI_VYBRID,
+	FSL_QUADSPI_IMX6SX,
+};
+
+struct fsl_qspi_devtype_data {
+	enum fsl_qspi_devtype devtype;
+	int rxfifo;
+	int txfifo;
+};
+
+static struct fsl_qspi_devtype_data vybrid_data = {
+	.devtype = FSL_QUADSPI_VYBRID,
+	.rxfifo = 128,
+	.txfifo = 64
+};
+
+static struct fsl_qspi_devtype_data imx6sx_data = {
+	.devtype = FSL_QUADSPI_IMX6SX,
+	.rxfifo = 128,
+	.txfifo = 512
+};
+
+#define FSL_QSPI_MAX_CHIP	4
+struct fsl_qspi {
+	struct mtd_info mtd[FSL_QSPI_MAX_CHIP];
+	struct spi_nor nor[FSL_QSPI_MAX_CHIP];
+	void __iomem *iobase;
+	void __iomem *ahb_base; /* Used when read from AHB bus */
+	u32 memmap_phy;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	struct fsl_qspi_devtype_data *devtype_data;
+	u32 nor_size;
+	u32 nor_num;
+	u32 clk_rate;
+	unsigned int chip_base_addr; /* We may support two chips. */
+};
+
+static inline int is_vybrid_qspi(struct fsl_qspi *q)
+{
+	return q->devtype_data->devtype == FSL_QUADSPI_VYBRID;
+}
+
+static inline int is_imx6sx_qspi(struct fsl_qspi *q)
+{
+	return q->devtype_data->devtype == FSL_QUADSPI_IMX6SX;
+}
+
+/*
+ * An IC bug makes us to re-arrange the 32-bit data.
+ * The following chips, such as IMX6SLX, have fixed this bug.
+ */
+static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a)
+{
+	return is_vybrid_qspi(q) ? __swab32(a) : a;
+}
+
+static inline void fsl_qspi_unlock_lut(struct fsl_qspi *q)
+{
+	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	writel(QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+}
+
+static inline void fsl_qspi_lock_lut(struct fsl_qspi *q)
+{
+	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	writel(QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
+}
+
+static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
+{
+	struct fsl_qspi *q = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = readl(q->iobase + QUADSPI_FR);
+	writel(reg, q->iobase + QUADSPI_FR);
+
+	if (reg & QUADSPI_FR_TFF_MASK)
+		complete(&q->c);
+
+	dev_dbg(q->dev, "QUADSPI_FR : 0x%.8x:0x%.8x\n", q->chip_base_addr, reg);
+	return IRQ_HANDLED;
+}
+
+static void fsl_qspi_init_lut(struct fsl_qspi *q)
+{
+	void __iomem *base = q->iobase;
+	int rxfifo = q->devtype_data->rxfifo;
+	u32 lut_base;
+	u8 cmd, addrlen, dummy;
+	int i;
+
+	fsl_qspi_unlock_lut(q);
+
+	/* Clear all the LUT table */
+	for (i = 0; i < QUADSPI_LUT_NUM; i++)
+		writel(0, base + QUADSPI_LUT_BASE + i * 4);
+
+	/* Quad Read */
+	lut_base = SEQID_QUAD_READ * 4;
+
+	if (q->nor_size <= SZ_16M) {
+		cmd = SPINOR_OP_READ_1_1_4;
+		addrlen = ADDR24BIT;
+		dummy = 8;
+	} else {
+		/* use the 4-byte address */
+		cmd = SPINOR_OP_READ_1_1_4;
+		addrlen = ADDR32BIT;
+		dummy = 8;
+	}
+
+	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+			base + QUADSPI_LUT(lut_base));
+	writel(LUT0(DUMMY, PAD1, dummy) | LUT1(READ, PAD4, rxfifo),
+			base + QUADSPI_LUT(lut_base + 1));
+
+	/* Write enable */
+	lut_base = SEQID_WREN * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base));
+
+	/* Page Program */
+	lut_base = SEQID_PP * 4;
+
+	if (q->nor_size <= SZ_16M) {
+		cmd = SPINOR_OP_PP;
+		addrlen = ADDR24BIT;
+	} else {
+		/* use the 4-byte address */
+		cmd = SPINOR_OP_PP;
+		addrlen = ADDR32BIT;
+	}
+
+	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+			base + QUADSPI_LUT(lut_base));
+	writel(LUT0(WRITE, PAD1, 0), base + QUADSPI_LUT(lut_base + 1));
+
+	/* Read Status */
+	lut_base = SEQID_RDSR * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(READ, PAD1, 0x1),
+			base + QUADSPI_LUT(lut_base));
+
+	/* Erase a sector */
+	lut_base = SEQID_SE * 4;
+
+	if (q->nor_size <= SZ_16M) {
+		cmd = SPINOR_OP_SE;
+		addrlen = ADDR24BIT;
+	} else {
+		/* use the 4-byte address */
+		cmd = SPINOR_OP_SE;
+		addrlen = ADDR32BIT;
+	}
+
+	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+			base + QUADSPI_LUT(lut_base));
+
+	/* Erase the whole chip */
+	lut_base = SEQID_CHIP_ERASE * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
+			base + QUADSPI_LUT(lut_base));
+
+	/* READ ID */
+	lut_base = SEQID_RDID * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(READ, PAD1, 0x8),
+			base + QUADSPI_LUT(lut_base));
+
+	/* Write Register */
+	lut_base = SEQID_WRSR * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(WRITE, PAD1, 0x2),
+			base + QUADSPI_LUT(lut_base));
+
+	/* Read Configuration Register */
+	lut_base = SEQID_RDCR * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(READ, PAD1, 0x1),
+			base + QUADSPI_LUT(lut_base));
+
+	/* Write disable */
+	lut_base = SEQID_WRDI * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base));
+
+	/* Enter 4 Byte Mode (Micron) */
+	lut_base = SEQID_EN4B * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base));
+
+	/* Enter 4 Byte Mode (Spansion) */
+	lut_base = SEQID_BRWR * 4;
+	writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base));
+
+	fsl_qspi_lock_lut(q);
+}
+
+/* Get the SEQID for the command */
+static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
+{
+	switch (cmd) {
+	case SPINOR_OP_READ_1_1_4:
+		return SEQID_QUAD_READ;
+	case SPINOR_OP_WREN:
+		return SEQID_WREN;
+	case SPINOR_OP_WRDI:
+		return SEQID_WRDI;
+	case SPINOR_OP_RDSR:
+		return SEQID_RDSR;
+	case SPINOR_OP_SE:
+		return SEQID_SE;
+	case SPINOR_OP_CHIP_ERASE:
+		return SEQID_CHIP_ERASE;
+	case SPINOR_OP_PP:
+		return SEQID_PP;
+	case SPINOR_OP_RDID:
+		return SEQID_RDID;
+	case SPINOR_OP_WRSR:
+		return SEQID_WRSR;
+	case SPINOR_OP_RDCR:
+		return SEQID_RDCR;
+	case SPINOR_OP_EN4B:
+		return SEQID_EN4B;
+	case SPINOR_OP_BRWR:
+		return SEQID_BRWR;
+	default:
+		dev_err(q->dev, "Unsupported cmd 0x%.2x\n", cmd);
+		break;
+	}
+	return -EINVAL;
+}
+
+static int
+fsl_qspi_runcmd(struct fsl_qspi *q, u8 cmd, unsigned int addr, int len)
+{
+	void __iomem *base = q->iobase;
+	int seqid;
+	u32 reg, reg2;
+	int err;
+
+	init_completion(&q->c);
+	dev_dbg(q->dev, "to 0x%.8x:0x%.8x, len:%d, cmd:%.2x\n",
+			q->chip_base_addr, addr, len, cmd);
+
+	/* save the reg */
+	reg = readl(base + QUADSPI_MCR);
+
+	writel(q->memmap_phy + q->chip_base_addr + addr, base + QUADSPI_SFAR);
+	writel(QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+			base + QUADSPI_RBCT);
+	writel(reg | QUADSPI_MCR_CLR_RXF_MASK, base + QUADSPI_MCR);
+
+	do {
+		reg2 = readl(base + QUADSPI_SR);
+		if (reg2 & (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
+			udelay(1);
+			dev_dbg(q->dev, "The controller is busy, 0x%x\n", reg2);
+			continue;
+		}
+		break;
+	} while (1);
+
+	/* trigger the LUT now */
+	seqid = fsl_qspi_get_seqid(q, cmd);
+	writel((seqid << QUADSPI_IPCR_SEQID_SHIFT) | len, base + QUADSPI_IPCR);
+
+	/* Wait for the interrupt. */
+	err = wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000));
+	if (!err) {
+		dev_err(q->dev,
+			"cmd 0x%.2x timeout, addr@%.8x, FR:0x%.8x, SR:0x%.8x\n",
+			cmd, addr, readl(base + QUADSPI_FR),
+			readl(base + QUADSPI_SR));
+		err = -ETIMEDOUT;
+	} else {
+		err = 0;
+	}
+
+	/* restore the MCR */
+	writel(reg, base + QUADSPI_MCR);
+
+	return err;
+}
+
+/* Read out the data from the QUADSPI_RBDR buffer registers. */
+static void fsl_qspi_read_data(struct fsl_qspi *q, int len, u8 *rxbuf)
+{
+	u32 tmp;
+	int i = 0;
+
+	while (len > 0) {
+		tmp = readl(q->iobase + QUADSPI_RBDR + i * 4);
+		tmp = fsl_qspi_endian_xchg(q, tmp);
+		dev_dbg(q->dev, "chip addr:0x%.8x, rcv:0x%.8x\n",
+				q->chip_base_addr, tmp);
+
+		if (len >= 4) {
+			*((u32 *)rxbuf) = tmp;
+			rxbuf += 4;
+		} else {
+			memcpy(rxbuf, &tmp, len);
+			break;
+		}
+
+		len -= 4;
+		i++;
+	}
+}
+
+/*
+ * If we have changed the content of the flash by writing or erasing,
+ * we need to invalidate the AHB buffer. If we do not do so, we may read out
+ * the wrong data. The spec tells us reset the AHB domain and Serial Flash
+ * domain at the same time.
+ */
+static inline void fsl_qspi_invalid(struct fsl_qspi *q)
+{
+	u32 reg;
+
+	reg = readl(q->iobase + QUADSPI_MCR);
+	reg |= QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK;
+	writel(reg, q->iobase + QUADSPI_MCR);
+
+	/*
+	 * The minimum delay : 1 AHB + 2 SFCK clocks.
+	 * Delay 1 us is enough.
+	 */
+	udelay(1);
+
+	reg &= ~(QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK);
+	writel(reg, q->iobase + QUADSPI_MCR);
+}
+
+static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor,
+				u8 opcode, unsigned int to, u32 *txbuf,
+				unsigned count, size_t *retlen)
+{
+	int ret, i, j;
+	u32 tmp;
+
+	dev_dbg(q->dev, "to 0x%.8x:0x%.8x, len : %d\n",
+		q->chip_base_addr, to, count);
+
+	/* clear the TX FIFO. */
+	tmp = readl(q->iobase + QUADSPI_MCR);
+	writel(tmp | QUADSPI_MCR_CLR_RXF_MASK, q->iobase + QUADSPI_MCR);
+
+	/* fill the TX data to the FIFO */
+	for (j = 0, i = ((count + 3) / 4); j < i; j++) {
+		tmp = fsl_qspi_endian_xchg(q, *txbuf);
+		writel(tmp, q->iobase + QUADSPI_TBDR);
+		txbuf++;
+	}
+
+	/* Trigger it */
+	ret = fsl_qspi_runcmd(q, opcode, to, count);
+
+	if (ret == 0 && retlen)
+		*retlen += count;
+
+	return ret;
+}
+
+static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
+{
+	int nor_size = q->nor_size;
+	void __iomem *base = q->iobase;
+
+	writel(nor_size + q->memmap_phy, base + QUADSPI_SFA1AD);
+	writel(nor_size * 2 + q->memmap_phy, base + QUADSPI_SFA2AD);
+	writel(nor_size * 3 + q->memmap_phy, base + QUADSPI_SFB1AD);
+	writel(nor_size * 4 + q->memmap_phy, base + QUADSPI_SFB2AD);
+}
+
+/*
+ * There are two different ways to read out the data from the flash:
+ *  the "IP Command Read" and the "AHB Command Read".
+ *
+ * The IC guy suggests we use the "AHB Command Read" which is faster
+ * then the "IP Command Read". (What's more is that there is a bug in
+ * the "IP Command Read" in the Vybrid.)
+ *
+ * After we set up the registers for the "AHB Command Read", we can use
+ * the memcpy to read the data directly. A "missed" access to the buffer
+ * causes the controller to clear the buffer, and use the sequence pointed
+ * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
+ */
+static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
+{
+	void __iomem *base = q->iobase;
+	int seqid;
+
+	/* AHB configuration for access buffer 0/1/2 .*/
+	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
+	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF1CR);
+	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
+	writel(QUADSPI_BUF3CR_ALLMST, base + QUADSPI_BUF3CR);
+
+	/* We only use the buffer3 */
+	writel(0, base + QUADSPI_BUF0IND);
+	writel(0, base + QUADSPI_BUF1IND);
+	writel(0, base + QUADSPI_BUF2IND);
+
+	/* Set the default lut sequence for AHB Read. */
+	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
+	writel(seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
+		q->iobase + QUADSPI_BFGENCR);
+}
+
+/* We use this function to do some basic init for spi_nor_scan(). */
+static int fsl_qspi_nor_setup(struct fsl_qspi *q)
+{
+	void __iomem *base = q->iobase;
+	u32 reg;
+	int ret;
+
+	/* the default frequency, we will change it in the future.*/
+	ret = clk_set_rate(q->clk, 66000000);
+	if (ret)
+		return ret;
+
+	/* Init the LUT table. */
+	fsl_qspi_init_lut(q);
+
+	/* Disable the module */
+	writel(QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+			base + QUADSPI_MCR);
+
+	reg = readl(base + QUADSPI_SMPR);
+	writel(reg & ~(QUADSPI_SMPR_FSDLY_MASK
+			| QUADSPI_SMPR_FSPHS_MASK
+			| QUADSPI_SMPR_HSENA_MASK
+			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
+
+	/* Enable the module */
+	writel(QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+			base + QUADSPI_MCR);
+
+	/* enable the interrupt */
+	writel(QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+
+	return 0;
+}
+
+static int fsl_qspi_nor_setup_last(struct fsl_qspi *q)
+{
+	unsigned long rate = q->clk_rate;
+	int ret;
+
+	if (is_imx6sx_qspi(q))
+		rate *= 4;
+
+	ret = clk_set_rate(q->clk, rate);
+	if (ret)
+		return ret;
+
+	/* Init the LUT table again. */
+	fsl_qspi_init_lut(q);
+
+	/* Init for AHB read */
+	fsl_qspi_init_abh_read(q);
+
+	return 0;
+}
+
+static struct of_device_id fsl_qspi_dt_ids[] = {
+	{ .compatible = "fsl,vf610-qspi", .data = (void *)&vybrid_data, },
+	{ .compatible = "fsl,imx6sx-qspi", .data = (void *)&imx6sx_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
+
+static void fsl_qspi_set_base_addr(struct fsl_qspi *q, struct spi_nor *nor)
+{
+	q->chip_base_addr = q->nor_size * (nor - q->nor);
+}
+
+static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+{
+	int ret;
+	struct fsl_qspi *q = nor->priv;
+
+	ret = fsl_qspi_runcmd(q, opcode, 0, len);
+	if (ret)
+		return ret;
+
+	fsl_qspi_read_data(q, len, buf);
+	return 0;
+}
+
+static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
+			int write_enable)
+{
+	struct fsl_qspi *q = nor->priv;
+	int ret;
+
+	if (!buf) {
+		ret = fsl_qspi_runcmd(q, opcode, 0, 1);
+		if (ret)
+			return ret;
+
+		if (opcode == SPINOR_OP_CHIP_ERASE)
+			fsl_qspi_invalid(q);
+
+	} else if (len > 0) {
+		ret = fsl_qspi_nor_write(q, nor, opcode, 0,
+					(u32 *)buf, len, NULL);
+	} else {
+		dev_err(q->dev, "invalid cmd %d\n", opcode);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void fsl_qspi_write(struct spi_nor *nor, loff_t to,
+		size_t len, size_t *retlen, const u_char *buf)
+{
+	struct fsl_qspi *q = nor->priv;
+
+	fsl_qspi_nor_write(q, nor, nor->program_opcode, to,
+				(u32 *)buf, len, retlen);
+
+	/* invalid the data in the AHB buffer. */
+	fsl_qspi_invalid(q);
+}
+
+static int fsl_qspi_read(struct spi_nor *nor, loff_t from,
+		size_t len, size_t *retlen, u_char *buf)
+{
+	struct fsl_qspi *q = nor->priv;
+	u8 cmd = nor->read_opcode;
+	int ret;
+
+	dev_dbg(q->dev, "cmd [%x],read from (0x%p, 0x%.8x, 0x%.8x),len:%d\n",
+		cmd, q->ahb_base, q->chip_base_addr, (unsigned int)from, len);
+
+	/* Wait until the previous command is finished. */
+	ret = nor->wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/* Read out the data directly from the AHB buffer.*/
+	memcpy(buf, q->ahb_base + q->chip_base_addr + from, len);
+
+	*retlen += len;
+	return 0;
+}
+
+static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
+{
+	struct fsl_qspi *q = nor->priv;
+	int ret;
+
+	dev_dbg(nor->dev, "%dKiB at 0x%08x:0x%08x\n",
+		nor->mtd->erasesize / 1024, q->chip_base_addr, (u32)offs);
+
+	/* Wait until finished previous write command. */
+	ret = nor->wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/* Send write enable, then erase commands. */
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
+	if (ret)
+		return ret;
+
+	ret = fsl_qspi_runcmd(q, nor->erase_opcode, offs, 0);
+	if (ret)
+		return ret;
+
+	fsl_qspi_invalid(q);
+	return 0;
+}
+
+static int fsl_qspi_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct fsl_qspi *q = nor->priv;
+	int ret;
+
+	ret = clk_enable(q->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_enable(q->clk);
+	if (ret) {
+		clk_disable(q->clk_en);
+		return ret;
+	}
+
+	fsl_qspi_set_base_addr(q, nor);
+	return 0;
+}
+
+static void fsl_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct fsl_qspi *q = nor->priv;
+
+	clk_disable(q->clk);
+	clk_disable(q->clk_en);
+}
+
+static int fsl_qspi_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct mtd_part_parser_data ppdata;
+	struct device *dev = &pdev->dev;
+	struct fsl_qspi *q;
+	struct resource *res;
+	struct spi_nor *nor;
+	struct mtd_info *mtd;
+	int ret, i = 0;
+	bool has_second_chip = false;
+	const struct of_device_id *of_id =
+			of_match_device(fsl_qspi_dt_ids, &pdev->dev);
+
+	q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+
+	q->nor_num = of_get_child_count(dev->of_node);
+	if (!q->nor_num || q->nor_num > FSL_QSPI_MAX_CHIP)
+		return -ENODEV;
+
+	/* find the resources */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI");
+	q->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->iobase)) {
+		ret = PTR_ERR(q->iobase);
+		goto map_failed;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					"QuadSPI-memory");
+	q->ahb_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->ahb_base)) {
+		ret = PTR_ERR(q->ahb_base);
+		goto map_failed;
+	}
+	q->memmap_phy = res->start;
+
+	/* find the clocks */
+	q->clk_en = devm_clk_get(dev, "qspi_en");
+	if (IS_ERR(q->clk_en)) {
+		ret = PTR_ERR(q->clk_en);
+		goto map_failed;
+	}
+
+	q->clk = devm_clk_get(dev, "qspi");
+	if (IS_ERR(q->clk)) {
+		ret = PTR_ERR(q->clk);
+		goto map_failed;
+	}
+
+	ret = clk_prepare_enable(q->clk_en);
+	if (ret) {
+		dev_err(dev, "can not enable the qspi_en clock\n");
+		goto map_failed;
+	}
+
+	ret = clk_prepare_enable(q->clk);
+	if (ret) {
+		clk_disable_unprepare(q->clk_en);
+		dev_err(dev, "can not enable the qspi clock\n");
+		goto map_failed;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get the irq\n");
+		goto irq_failed;
+	}
+
+	ret = devm_request_irq(dev, ret,
+			fsl_qspi_irq_handler, 0, pdev->name, q);
+	if (ret) {
+		dev_err(dev, "failed to request irq.\n");
+		goto irq_failed;
+	}
+
+	q->dev = dev;
+	q->devtype_data = (struct fsl_qspi_devtype_data *)of_id->data;
+	platform_set_drvdata(pdev, q);
+
+	ret = fsl_qspi_nor_setup(q);
+	if (ret)
+		goto irq_failed;
+
+	if (of_get_property(np, "fsl,qspi-has-second-chip", NULL))
+		has_second_chip = true;
+
+	/* iterate the subnodes. */
+	for_each_available_child_of_node(dev->of_node, np) {
+		const struct spi_device_id *id;
+		char modalias[40];
+
+		/* skip the holes */
+		if (!has_second_chip)
+			i *= 2;
+
+		nor = &q->nor[i];
+		mtd = &q->mtd[i];
+
+		nor->mtd = mtd;
+		nor->dev = dev;
+		nor->priv = q;
+		mtd->priv = nor;
+
+		/* fill the hooks */
+		nor->read_reg = fsl_qspi_read_reg;
+		nor->write_reg = fsl_qspi_write_reg;
+		nor->read = fsl_qspi_read;
+		nor->write = fsl_qspi_write;
+		nor->erase = fsl_qspi_erase;
+
+		nor->prepare = fsl_qspi_prep;
+		nor->unprepare = fsl_qspi_unprep;
+
+		if (of_modalias_node(np, modalias, sizeof(modalias)) < 0)
+			goto map_failed;
+
+		id = spi_nor_match_id(modalias);
+		if (!id)
+			goto map_failed;
+
+		ret = of_property_read_u32(np, "spi-max-frequency",
+				&q->clk_rate);
+		if (ret < 0)
+			goto map_failed;
+
+		/* set the chip address for READID */
+		fsl_qspi_set_base_addr(q, nor);
+
+		ret = spi_nor_scan(nor, id, SPI_NOR_QUAD);
+		if (ret)
+			goto map_failed;
+
+		ppdata.of_node = np;
+		ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
+		if (ret)
+			goto map_failed;
+
+		/* Set the correct NOR size now. */
+		if (q->nor_size == 0) {
+			q->nor_size = mtd->size;
+
+			/* Map the SPI NOR to accessiable address */
+			fsl_qspi_set_map_addr(q);
+		}
+
+		/*
+		 * The TX FIFO is 64 bytes in the Vybrid, but the Page Program
+		 * may writes 265 bytes per time. The write is working in the
+		 * unit of the TX FIFO, not in the unit of the SPI NOR's page
+		 * size.
+		 *
+		 * So shrink the spi_nor->page_size if it is larger then the
+		 * TX FIFO.
+		 */
+		if (nor->page_size > q->devtype_data->txfifo)
+			nor->page_size = q->devtype_data->txfifo;
+
+		i++;
+	}
+
+	/* finish the rest init. */
+	ret = fsl_qspi_nor_setup_last(q);
+	if (ret)
+		goto last_init_failed;
+
+	clk_disable(q->clk);
+	clk_disable(q->clk_en);
+	dev_info(dev, "QuadSPI SPI NOR flash driver\n");
+	return 0;
+
+last_init_failed:
+	for (i = 0; i < q->nor_num; i++)
+		mtd_device_unregister(&q->mtd[i]);
+
+irq_failed:
+	clk_disable_unprepare(q->clk);
+	clk_disable_unprepare(q->clk_en);
+map_failed:
+	dev_err(dev, "Freescale QuadSPI probe failed\n");
+	return ret;
+}
+
+static int fsl_qspi_remove(struct platform_device *pdev)
+{
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < q->nor_num; i++)
+		mtd_device_unregister(&q->mtd[i]);
+
+	/* disable the hardware */
+	writel(QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	writel(0x0, q->iobase + QUADSPI_RSER);
+
+	clk_unprepare(q->clk);
+	clk_unprepare(q->clk_en);
+	return 0;
+}
+
+static struct platform_driver fsl_qspi_driver = {
+	.driver = {
+		.name	= "fsl-quadspi",
+		.bus	= &platform_bus_type,
+		.owner	= THIS_MODULE,
+		.of_match_table = fsl_qspi_dt_ids,
+	},
+	.probe          = fsl_qspi_probe,
+	.remove		= fsl_qspi_remove,
+};
+module_platform_driver(fsl_qspi_driver);
+
+MODULE_DESCRIPTION("Freescale QuadSPI Controller Driver");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
new file mode 100644
index 0000000..c713c86
--- /dev/null
+++ b/drivers/mtd/spi-nor/spi-nor.c

@@ -0,0 +1,1107 @@
+/*
+ * Based on m25p80.c, by Mike Lavender (mike@steroidmicros.com), with
+ * influence from lart.c (Abraham Van Der Merwe) and mtd_dataflash.c
+ *
+ * Copyright (C) 2005, Intec Automation Inc.
+ * Copyright (C) 2014, Freescale Semiconductor, Inc.
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/math64.h>
+
+#include <linux/mtd/cfi.h>
+#include <linux/mtd/mtd.h>
+#include <linux/of_platform.h>
+#include <linux/spi/flash.h>
+#include <linux/mtd/spi-nor.h>
+
+/* Define max times to check status register before we give up. */
+#define	MAX_READY_WAIT_JIFFIES	(40 * HZ) /* M25P16 specs 40s max chip erase */
+
+#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
+
+/*
+ * Read the status register, returning its value in the location
+ * Return the status register value.
+ * Returns negative if error occurred.
+ */
+static int read_sr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
+	if (ret < 0) {
+		pr_err("error %d reading SR\n", (int) ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Read configuration register, returning its value in the
+ * location. Return the configuration register value.
+ * Returns negative if error occured.
+ */
+static int read_cr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1);
+	if (ret < 0) {
+		dev_err(nor->dev, "error %d reading CR\n", ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Dummy Cycle calculation for different type of read.
+ * It can be used to support more commands with
+ * different dummy cycle requirements.
+ */
+static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
+{
+	switch (nor->flash_read) {
+	case SPI_NOR_FAST:
+	case SPI_NOR_DUAL:
+	case SPI_NOR_QUAD:
+		return 1;
+	case SPI_NOR_NORMAL:
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * Write status register 1 byte
+ * Returns negative if error occurred.
+ */
+static inline int write_sr(struct spi_nor *nor, u8 val)
+{
+	nor->cmd_buf[0] = val;
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
+}
+
+/*
+ * Set write enable latch with Write Enable command.
+ * Returns negative if error occurred.
+ */
+static inline int write_enable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
+}
+
+/*
+ * Send write disble instruction to the chip.
+ */
+static inline int write_disable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0, 0);
+}
+
+static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
+{
+	return mtd->priv;
+}
+
+/* Enable/disable 4-byte addressing mode. */
+static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
+{
+	int status;
+	bool need_wren = false;
+	u8 cmd;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_ST: /* Micron, actually */
+		/* Some Micron need WREN command; all will accept it */
+		need_wren = true;
+	case CFI_MFR_MACRONIX:
+	case 0xEF /* winbond */:
+		if (need_wren)
+			write_enable(nor);
+
+		cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
+		status = nor->write_reg(nor, cmd, NULL, 0, 0);
+		if (need_wren)
+			write_disable(nor);
+
+		return status;
+	default:
+		/* Spansion style */
+		nor->cmd_buf[0] = enable << 7;
+		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
+	}
+}
+
+static int spi_nor_wait_till_ready(struct spi_nor *nor)
+{
+	unsigned long deadline;
+	int sr;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+
+	do {
+		cond_resched();
+
+		sr = read_sr(nor);
+		if (sr < 0)
+			break;
+		else if (!(sr & SR_WIP))
+			return 0;
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
+static int wait_till_ready(struct spi_nor *nor)
+{
+	return nor->wait_till_ready(nor);
+}
+
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_chip(struct spi_nor *nor)
+{
+	int ret;
+
+	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/* Send write enable, then erase commands. */
+	write_enable(nor);
+
+	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
+}
+
+static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	int ret = 0;
+
+	mutex_lock(&nor->lock);
+
+	if (nor->prepare) {
+		ret = nor->prepare(nor, ops);
+		if (ret) {
+			dev_err(nor->dev, "failed in the preparation.\n");
+			mutex_unlock(&nor->lock);
+			return ret;
+		}
+	}
+	return ret;
+}
+
+static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	if (nor->unprepare)
+		nor->unprepare(nor, ops);
+	mutex_unlock(&nor->lock);
+}
+
+/*
+ * Erase an address range on the nor chip.  The address range may extend
+ * one or more erase sectors.  Return an error is there is a problem erasing.
+ */
+static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 addr, len;
+	uint32_t rem;
+	int ret;
+
+	dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr,
+			(long long)instr->len);
+
+	div_u64_rem(instr->len, mtd->erasesize, &rem);
+	if (rem)
+		return -EINVAL;
+
+	addr = instr->addr;
+	len = instr->len;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_ERASE);
+	if (ret)
+		return ret;
+
+	/* whole-chip erase? */
+	if (len == mtd->size) {
+		if (erase_chip(nor)) {
+			ret = -EIO;
+			goto erase_err;
+		}
+
+	/* REVISIT in some cases we could speed up erasing large regions
+	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
+	 * to use "small sector erase", but that's not always optimal.
+	 */
+
+	/* "sector"-at-a-time erase */
+	} else {
+		while (len) {
+			if (nor->erase(nor, addr)) {
+				ret = -EIO;
+				goto erase_err;
+			}
+
+			addr += mtd->erasesize;
+			len -= mtd->erasesize;
+		}
+	}
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+
+	instr->state = MTD_ERASE_DONE;
+	mtd_erase_callback(instr);
+
+	return ret;
+
+erase_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+	instr->state = MTD_ERASE_FAILED;
+	return ret;
+}
+
+static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset < mtd->size - (mtd->size / 2))
+		status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+	else if (offset < mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset < mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 64))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+
+	/* Only modify protection if it will not unlock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) >
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+	return ret;
+}
+
+static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset+len > mtd->size - (mtd->size / 64))
+		status_new = status_old & ~(SR_BP2 | SR_BP1 | SR_BP0);
+	else if (offset+len > mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else if (offset+len > mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset+len > mtd->size - (mtd->size / 2))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+
+	/* Only modify protection if it will not lock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) <
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
+	return ret;
+}
+
+struct flash_info {
+	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
+	 * a high byte of zero plus three data bytes: the manufacturer id,
+	 * then a two byte device id.
+	 */
+	u32		jedec_id;
+	u16             ext_id;
+
+	/* The size listed here is what works with SPINOR_OP_SE, which isn't
+	 * necessarily called a "sector" by the vendor.
+	 */
+	unsigned	sector_size;
+	u16		n_sectors;
+
+	u16		page_size;
+	u16		addr_width;
+
+	u16		flags;
+#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
+#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
+#define	SST_WRITE		0x04	/* use SST byte programming */
+#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
+#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
+#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
+#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+};
+
+#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.jedec_id = (_jedec_id),				\
+		.ext_id = (_ext_id),					\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = 256,					\
+		.flags = (_flags),					\
+	})
+
+#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = (_page_size),				\
+		.addr_width = (_addr_width),				\
+		.flags = (_flags),					\
+	})
+
+/* NOTE: double check command sets and memory organization when you add
+ * more nor chips.  This current list focusses on newer chips, which
+ * have been converging on command sets which including JEDEC ID.
+ */
+const struct spi_device_id spi_nor_ids[] = {
+	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
+	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
+	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
+
+	{ "at25df041a", INFO(0x1f4401, 0, 64 * 1024,   8, SECT_4K) },
+	{ "at25df321a", INFO(0x1f4701, 0, 64 * 1024,  64, SECT_4K) },
+	{ "at25df641",  INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) },
+
+	{ "at26f004",   INFO(0x1f0400, 0, 64 * 1024,  8, SECT_4K) },
+	{ "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) },
+	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
+	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
+
+	{ "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) },
+
+	/* EON -- en25xxx */
+	{ "en25f32",    INFO(0x1c3116, 0, 64 * 1024,   64, SECT_4K) },
+	{ "en25p32",    INFO(0x1c2016, 0, 64 * 1024,   64, 0) },
+	{ "en25q32b",   INFO(0x1c3016, 0, 64 * 1024,   64, 0) },
+	{ "en25p64",    INFO(0x1c2017, 0, 64 * 1024,  128, 0) },
+	{ "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
+	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
+
+	/* ESMT */
+	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
+
+	/* Everspin */
+	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+
+	/* GigaDevice */
+	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
+
+	/* Intel/Numonyx -- xxxs33b */
+	{ "160s33b",  INFO(0x898911, 0, 64 * 1024,  32, 0) },
+	{ "320s33b",  INFO(0x898912, 0, 64 * 1024,  64, 0) },
+	{ "640s33b",  INFO(0x898913, 0, 64 * 1024, 128, 0) },
+
+	/* Macronix */
+	{ "mx25l2005a",  INFO(0xc22012, 0, 64 * 1024,   4, SECT_4K) },
+	{ "mx25l4005a",  INFO(0xc22013, 0, 64 * 1024,   8, SECT_4K) },
+	{ "mx25l8005",   INFO(0xc22014, 0, 64 * 1024,  16, 0) },
+	{ "mx25l1606e",  INFO(0xc22015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
+	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
+	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
+	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
+	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
+	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
+
+	/* Micron */
+	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
+	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
+	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
+
+	/* PMC */
+	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
+	{ "pm25lv010",   INFO(0,        0, 32 * 1024,    4, SECT_4K_PMC) },
+	{ "pm25lq032",   INFO(0x7f9d46, 0, 64 * 1024,   64, SECT_4K) },
+
+	/* Spansion -- single (large) sector size only, at least
+	 * for the chips listed here (without boot sectors).
+	 */
+	{ "s25sl032p",  INFO(0x010215, 0x4d00,  64 * 1024,  64, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s25sl064p",  INFO(0x010216, 0x4d00,  64 * 1024, 128, 0) },
+	{ "s25fl256s0", INFO(0x010219, 0x4d00, 256 * 1024, 128, 0) },
+	{ "s25fl256s1", INFO(0x010219, 0x4d01,  64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s25fl512s",  INFO(0x010220, 0x4d00, 256 * 1024, 256, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
+	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
+	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
+	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
+	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
+	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
+	{ "s25sl008a",  INFO(0x010213,      0,  64 * 1024,  16, 0) },
+	{ "s25sl016a",  INFO(0x010214,      0,  64 * 1024,  32, 0) },
+	{ "s25sl032a",  INFO(0x010215,      0,  64 * 1024,  64, 0) },
+	{ "s25sl064a",  INFO(0x010216,      0,  64 * 1024, 128, 0) },
+	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
+	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
+	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+
+	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
+	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+	{ "sst25vf080b", INFO(0xbf258e, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
+	{ "sst25vf016b", INFO(0xbf2541, 0, 64 * 1024, 32, SECT_4K | SST_WRITE) },
+	{ "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) },
+	{ "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) },
+	{ "sst25wf512",  INFO(0xbf2501, 0, 64 * 1024,  1, SECT_4K | SST_WRITE) },
+	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
+	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
+	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+
+	/* ST Microelectronics -- newer production may have feature updates */
+	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
+	{ "m25p10",  INFO(0x202011,  0,  32 * 1024,   4, 0) },
+	{ "m25p20",  INFO(0x202012,  0,  64 * 1024,   4, 0) },
+	{ "m25p40",  INFO(0x202013,  0,  64 * 1024,   8, 0) },
+	{ "m25p80",  INFO(0x202014,  0,  64 * 1024,  16, 0) },
+	{ "m25p16",  INFO(0x202015,  0,  64 * 1024,  32, 0) },
+	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
+	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
+	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
+	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
+
+	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
+	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
+	{ "m25p20-nonjedec",  INFO(0, 0,  64 * 1024,   4, 0) },
+	{ "m25p40-nonjedec",  INFO(0, 0,  64 * 1024,   8, 0) },
+	{ "m25p80-nonjedec",  INFO(0, 0,  64 * 1024,  16, 0) },
+	{ "m25p16-nonjedec",  INFO(0, 0,  64 * 1024,  32, 0) },
+	{ "m25p32-nonjedec",  INFO(0, 0,  64 * 1024,  64, 0) },
+	{ "m25p64-nonjedec",  INFO(0, 0,  64 * 1024, 128, 0) },
+	{ "m25p128-nonjedec", INFO(0, 0, 256 * 1024,  64, 0) },
+
+	{ "m45pe10", INFO(0x204011,  0, 64 * 1024,    2, 0) },
+	{ "m45pe80", INFO(0x204014,  0, 64 * 1024,   16, 0) },
+	{ "m45pe16", INFO(0x204015,  0, 64 * 1024,   32, 0) },
+
+	{ "m25pe20", INFO(0x208012,  0, 64 * 1024,  4,       0) },
+	{ "m25pe80", INFO(0x208014,  0, 64 * 1024, 16,       0) },
+	{ "m25pe16", INFO(0x208015,  0, 64 * 1024, 32, SECT_4K) },
+
+	{ "m25px16",    INFO(0x207115,  0, 64 * 1024, 32, SECT_4K) },
+	{ "m25px32",    INFO(0x207116,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s0", INFO(0x207316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s1", INFO(0x206316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px64",    INFO(0x207117,  0, 64 * 1024, 128, 0) },
+
+	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
+	{ "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
+	{ "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
+	{ "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
+	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+
+	/* Catalyst / On Semiconductor -- non-JEDEC */
+	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ },
+};
+EXPORT_SYMBOL_GPL(spi_nor_ids);
+
+static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
+{
+	int			tmp;
+	u8			id[5];
+	u32			jedec;
+	u16                     ext_jedec;
+	struct flash_info	*info;
+
+	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5);
+	if (tmp < 0) {
+		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
+		return ERR_PTR(tmp);
+	}
+	jedec = id[0];
+	jedec = jedec << 8;
+	jedec |= id[1];
+	jedec = jedec << 8;
+	jedec |= id[2];
+
+	ext_jedec = id[3] << 8 | id[4];
+
+	for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) {
+		info = (void *)spi_nor_ids[tmp].driver_data;
+		if (info->jedec_id == jedec) {
+			if (info->ext_id == 0 || info->ext_id == ext_jedec)
+				return &spi_nor_ids[tmp];
+		}
+	}
+	dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec);
+	return ERR_PTR(-ENODEV);
+}
+
+static const struct spi_device_id *jedec_probe(struct spi_nor *nor)
+{
+	return nor->read_id(nor);
+}
+
+static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t *retlen, u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	int ret;
+
+	dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_READ);
+	if (ret)
+		return ret;
+
+	ret = nor->read(nor, from, len, retlen, buf);
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_READ);
+	return ret;
+}
+
+static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
+		size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	size_t actual;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	write_enable(nor);
+
+	nor->sst_write_second = false;
+
+	actual = to % 2;
+	/* Start write from odd address. */
+	if (actual) {
+		nor->program_opcode = SPINOR_OP_BP;
+
+		/* write one byte. */
+		nor->write(nor, to, 1, retlen, buf);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+	}
+	to += actual;
+
+	/* Write out most of the data here. */
+	for (; actual < len - 1; actual += 2) {
+		nor->program_opcode = SPINOR_OP_AAI_WP;
+
+		/* write two bytes. */
+		nor->write(nor, to, 2, retlen, buf + actual);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		to += 2;
+		nor->sst_write_second = true;
+	}
+	nor->sst_write_second = false;
+
+	write_disable(nor);
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	/* Write out trailing byte if it exists. */
+	if (actual != len) {
+		write_enable(nor);
+
+		nor->program_opcode = SPINOR_OP_BP;
+		nor->write(nor, to, 1, retlen, buf + actual);
+
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		write_disable(nor);
+	}
+time_out:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return ret;
+}
+
+/*
+ * Write an address range to the nor chip.  Data must be written in
+ * FLASH_PAGESIZE chunks.  The address range may be any size provided
+ * it is within the physical boundaries.
+ */
+static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
+	size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 page_offset, page_size, i;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto write_err;
+
+	write_enable(nor);
+
+	page_offset = to & (nor->page_size - 1);
+
+	/* do all the bytes fit onto one page? */
+	if (page_offset + len <= nor->page_size) {
+		nor->write(nor, to, len, retlen, buf);
+	} else {
+		/* the size of data remaining on the first page */
+		page_size = nor->page_size - page_offset;
+		nor->write(nor, to, page_size, retlen, buf);
+
+		/* write everything in nor->page_size chunks */
+		for (i = page_size; i < len; i += page_size) {
+			page_size = len - i;
+			if (page_size > nor->page_size)
+				page_size = nor->page_size;
+
+			wait_till_ready(nor);
+			write_enable(nor);
+
+			nor->write(nor, to + i, page_size, retlen, buf + i);
+		}
+	}
+
+write_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return 0;
+}
+
+static int macronix_quad_enable(struct spi_nor *nor)
+{
+	int ret, val;
+
+	val = read_sr(nor);
+	write_enable(nor);
+
+	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
+	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
+
+	if (wait_till_ready(nor))
+		return 1;
+
+	ret = read_sr(nor);
+	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
+		dev_err(nor->dev, "Macronix Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Write status Register and configuration register with 2 bytes
+ * The first byte will be written to the status register, while the
+ * second byte will be written to the configuration register.
+ * Return negative if error occured.
+ */
+static int write_sr_cr(struct spi_nor *nor, u16 val)
+{
+	nor->cmd_buf[0] = val & 0xff;
+	nor->cmd_buf[1] = (val >> 8);
+
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2, 0);
+}
+
+static int spansion_quad_enable(struct spi_nor *nor)
+{
+	int ret;
+	int quad_en = CR_QUAD_EN_SPAN << 8;
+
+	write_enable(nor);
+
+	ret = write_sr_cr(nor, quad_en);
+	if (ret < 0) {
+		dev_err(nor->dev,
+			"error while writing configuration register\n");
+		return -EINVAL;
+	}
+
+	/* read back and check it */
+	ret = read_cr(nor);
+	if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
+		dev_err(nor->dev, "Spansion Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_quad_mode(struct spi_nor *nor, u32 jedec_id)
+{
+	int status;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_MACRONIX:
+		status = macronix_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Macronix quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	default:
+		status = spansion_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Spansion quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	}
+}
+
+static int spi_nor_check(struct spi_nor *nor)
+{
+	if (!nor->dev || !nor->read || !nor->write ||
+		!nor->read_reg || !nor->write_reg || !nor->erase) {
+		pr_err("spi-nor: please fill all the necessary fields!\n");
+		return -EINVAL;
+	}
+
+	if (!nor->read_id)
+		nor->read_id = spi_nor_read_id;
+	if (!nor->wait_till_ready)
+		nor->wait_till_ready = spi_nor_wait_till_ready;
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode)
+{
+	struct flash_info		*info;
+	struct flash_platform_data	*data;
+	struct device *dev = nor->dev;
+	struct mtd_info *mtd = nor->mtd;
+	struct device_node *np = dev->of_node;
+	int ret;
+	int i;
+
+	ret = spi_nor_check(nor);
+	if (ret)
+		return ret;
+
+	/* Platform data helps sort out which chip type we have, as
+	 * well as how this board partitions it.  If we don't have
+	 * a chip ID, try the JEDEC id commands; they'll work for most
+	 * newer chips, even if we don't recognize the particular chip.
+	 */
+	data = dev_get_platdata(dev);
+	if (data && data->type) {
+		const struct spi_device_id *plat_id;
+
+		for (i = 0; i < ARRAY_SIZE(spi_nor_ids) - 1; i++) {
+			plat_id = &spi_nor_ids[i];
+			if (strcmp(data->type, plat_id->name))
+				continue;
+			break;
+		}
+
+		if (i < ARRAY_SIZE(spi_nor_ids) - 1)
+			id = plat_id;
+		else
+			dev_warn(dev, "unrecognized id %s\n", data->type);
+	}
+
+	info = (void *)id->driver_data;
+
+	if (info->jedec_id) {
+		const struct spi_device_id *jid;
+
+		jid = jedec_probe(nor);
+		if (IS_ERR(jid)) {
+			return PTR_ERR(jid);
+		} else if (jid != id) {
+			/*
+			 * JEDEC knows better, so overwrite platform ID. We
+			 * can't trust partitions any longer, but we'll let
+			 * mtd apply them anyway, since some partitions may be
+			 * marked read-only, and we don't want to lose that
+			 * information, even if it's not 100% accurate.
+			 */
+			dev_warn(dev, "found %s, expected %s\n",
+				 jid->name, id->name);
+			id = jid;
+			info = (void *)jid->driver_data;
+		}
+	}
+
+	mutex_init(&nor->lock);
+
+	/*
+	 * Atmel, SST and Intel/Numonyx serial nor tend to power
+	 * up with the software protection bits set
+	 */
+
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
+		write_enable(nor);
+		write_sr(nor, 0);
+	}
+
+	if (data && data->name)
+		mtd->name = data->name;
+	else
+		mtd->name = dev_name(dev);
+
+	mtd->type = MTD_NORFLASH;
+	mtd->writesize = 1;
+	mtd->flags = MTD_CAP_NORFLASH;
+	mtd->size = info->sector_size * info->n_sectors;
+	mtd->_erase = spi_nor_erase;
+	mtd->_read = spi_nor_read;
+
+	/* nor protection support for STmicro chips */
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+		mtd->_lock = spi_nor_lock;
+		mtd->_unlock = spi_nor_unlock;
+	}
+
+	/* sst nor chips use AAI word program */
+	if (info->flags & SST_WRITE)
+		mtd->_write = sst_write;
+	else
+		mtd->_write = spi_nor_write;
+
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = SPINOR_OP_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else {
+		nor->erase_opcode = SPINOR_OP_SE;
+		mtd->erasesize = info->sector_size;
+	}
+
+	if (info->flags & SPI_NOR_NO_ERASE)
+		mtd->flags |= MTD_NO_ERASE;
+
+	mtd->dev.parent = dev;
+	nor->page_size = info->page_size;
+	mtd->writebufsize = nor->page_size;
+
+	if (np) {
+		/* If we were instantiated by DT, use it */
+		if (of_property_read_bool(np, "m25p,fast-read"))
+			nor->flash_read = SPI_NOR_FAST;
+		else
+			nor->flash_read = SPI_NOR_NORMAL;
+	} else {
+		/* If we weren't instantiated by DT, default to fast-read */
+		nor->flash_read = SPI_NOR_FAST;
+	}
+
+	/* Some devices cannot do fast-read, no matter what DT tells us */
+	if (info->flags & SPI_NOR_NO_FR)
+		nor->flash_read = SPI_NOR_NORMAL;
+
+	/* Quad/Dual-read mode takes precedence over fast/normal */
+	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
+		ret = set_quad_mode(nor, info->jedec_id);
+		if (ret) {
+			dev_err(dev, "quad mode not supported\n");
+			return ret;
+		}
+		nor->flash_read = SPI_NOR_QUAD;
+	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
+		nor->flash_read = SPI_NOR_DUAL;
+	}
+
+	/* Default commands */
+	switch (nor->flash_read) {
+	case SPI_NOR_QUAD:
+		nor->read_opcode = SPINOR_OP_READ_1_1_4;
+		break;
+	case SPI_NOR_DUAL:
+		nor->read_opcode = SPINOR_OP_READ_1_1_2;
+		break;
+	case SPI_NOR_FAST:
+		nor->read_opcode = SPINOR_OP_READ_FAST;
+		break;
+	case SPI_NOR_NORMAL:
+		nor->read_opcode = SPINOR_OP_READ;
+		break;
+	default:
+		dev_err(dev, "No Read opcode defined\n");
+		return -EINVAL;
+	}
+
+	nor->program_opcode = SPINOR_OP_PP;
+
+	if (info->addr_width)
+		nor->addr_width = info->addr_width;
+	else if (mtd->size > 0x1000000) {
+		/* enable 4-byte addressing if the device exceeds 16MiB */
+		nor->addr_width = 4;
+		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+			/* Dedicated 4-byte command set */
+			switch (nor->flash_read) {
+			case SPI_NOR_QUAD:
+				nor->read_opcode = SPINOR_OP_READ4_1_1_4;
+				break;
+			case SPI_NOR_DUAL:
+				nor->read_opcode = SPINOR_OP_READ4_1_1_2;
+				break;
+			case SPI_NOR_FAST:
+				nor->read_opcode = SPINOR_OP_READ4_FAST;
+				break;
+			case SPI_NOR_NORMAL:
+				nor->read_opcode = SPINOR_OP_READ4;
+				break;
+			}
+			nor->program_opcode = SPINOR_OP_PP_4B;
+			/* No small sector erase for 4-byte command set */
+			nor->erase_opcode = SPINOR_OP_SE_4B;
+			mtd->erasesize = info->sector_size;
+		} else
+			set_4byte(nor, info->jedec_id, 1);
+	} else {
+		nor->addr_width = 3;
+	}
+
+	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
+
+	dev_info(dev, "%s (%lld Kbytes)\n", id->name,
+			(long long)mtd->size >> 10);
+
+	dev_dbg(dev,
+		"mtd .name = %s, .size = 0x%llx (%lldMiB), "
+		".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
+		mtd->name, (long long)mtd->size, (long long)(mtd->size >> 20),
+		mtd->erasesize, mtd->erasesize / 1024, mtd->numeraseregions);
+
+	if (mtd->numeraseregions)
+		for (i = 0; i < mtd->numeraseregions; i++)
+			dev_dbg(dev,
+				"mtd.eraseregions[%d] = { .offset = 0x%llx, "
+				".erasesize = 0x%.8x (%uKiB), "
+				".numblocks = %d }\n",
+				i, (long long)mtd->eraseregions[i].offset,
+				mtd->eraseregions[i].erasesize,
+				mtd->eraseregions[i].erasesize / 1024,
+				mtd->eraseregions[i].numblocks);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_nor_scan);
+
+const struct spi_device_id *spi_nor_match_id(char *name)
+{
+	const struct spi_device_id *id = spi_nor_ids;
+
+	while (id->name[0]) {
+		if (!strcmp(name, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(spi_nor_match_id);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
+MODULE_AUTHOR("Mike Lavender");
+MODULE_DESCRIPTION("framework for SPI NOR");

diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index 2e9e2d1..f19ab1a 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c

@@ -69,8 +69,8 @@
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
+	prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
-		prandom_bytes_state(&rnd_state, writebuf, use_len);
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
@@ -78,7 +78,7 @@
 		ops.oobretlen = 0;
 		ops.ooboffs   = use_offset;
 		ops.datbuf    = NULL;
-		ops.oobbuf    = writebuf;
+		ops.oobbuf    = writebuf + (use_len_max * i) + use_offset;
 		err = mtd_write_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
 			pr_err("error: writeoob failed at %#llx\n",
@@ -122,8 +122,8 @@
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
+	prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
-		prandom_bytes_state(&rnd_state, writebuf, use_len);
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
@@ -139,7 +139,8 @@
 			errcnt += 1;
 			return err ? err : -1;
 		}
-		if (memcmp(readbuf, writebuf, use_len)) {
+		if (memcmp(readbuf, writebuf + (use_len_max * i) + use_offset,
+			   use_len)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
@@ -166,7 +167,9 @@
 				errcnt += 1;
 				return err ? err : -1;
 			}
-			if (memcmp(readbuf + use_offset, writebuf, use_len)) {
+			if (memcmp(readbuf + use_offset,
+				   writebuf + (use_len_max * i) + use_offset,
+				   use_len)) {
 				pr_err("error: verify failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -566,8 +569,8 @@
 		if (bbt[i] || bbt[i + 1])
 			continue;
 		addr = (i + 1) * mtd->erasesize - mtd->writesize;
+		prandom_bytes_state(&rnd_state, writebuf, sz * cnt);
 		for (pg = 0; pg < cnt; ++pg) {
-			prandom_bytes_state(&rnd_state, writebuf, sz);
 			ops.mode      = MTD_OPS_AUTO_OOB;
 			ops.len       = 0;
 			ops.retlen    = 0;
@@ -575,7 +578,7 @@
 			ops.oobretlen = 0;
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
-			ops.oobbuf    = writebuf;
+			ops.oobbuf    = writebuf + pg * sz;
 			err = mtd_write_oob(mtd, addr, &ops);
 			if (err)
 				goto out;

diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 20a667c..8457df7 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c

@@ -432,8 +432,10 @@
 	 * Rembember workqueues are cheap, they're not threads.
 	 */
 	dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name);
-	if (!dev->wq)
+	if (!dev->wq) {
+		ret = -ENOMEM;
 		goto out_free_queue;
+	}
 	INIT_WORK(&dev->work, ubiblock_do_work);
 
 	mutex_lock(&devices_mutex);

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index f54562a..7646220 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c

@@ -731,7 +731,7 @@
 			goto out_free;
 		}
 
-		re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+		re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_READWRITE);
 		if (IS_ERR(re->desc)) {
 			err = PTR_ERR(re->desc);
 			ubi_err("cannot open volume %d, error %d", vol_id, err);

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index c5dad65..b04e7d0 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c

@@ -125,9 +125,9 @@
 		parent = *p;
 		av = rb_entry(parent, struct ubi_ainf_volume, rb);
 
-		if (vol_id > av->vol_id)
+		if (vol_id < av->vol_id)
 			p = &(*p)->rb_left;
-		else if (vol_id > av->vol_id)
+		else
 			p = &(*p)->rb_right;
 	}
 

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index b667a51..0dfeaf5 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c

@@ -157,7 +157,7 @@
 
 	rcu_read_lock();
 	first_slave = bond_first_slave_rcu(bond);
-	agg = first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL;
+	agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL;
 	rcu_read_unlock();
 
 	return agg;
@@ -192,7 +192,7 @@
 {
 	struct slave *slave = port->slave;
 
-	if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev))
+	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave))
 		bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);
 }
 
@@ -241,7 +241,7 @@
  */
 static inline void __get_state_machine_lock(struct port *port)
 {
-	spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
+	spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
 }
 
 /**
@@ -250,7 +250,7 @@
  */
 static inline void __release_state_machine_lock(struct port *port)
 {
-	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
+	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
 }
 
 /**
@@ -350,7 +350,7 @@
 static inline void __initialize_port_locks(struct slave *slave)
 {
 	/* make sure it isn't called twice */
-	spin_lock_init(&(SLAVE_AD_INFO(slave).state_machine_lock));
+	spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock));
 }
 
 /* Conversions */
@@ -688,8 +688,8 @@
 	struct slave *slave;
 
 	bond_for_each_slave_rcu(bond, slave, iter)
-		if (SLAVE_AD_INFO(slave).aggregator.is_active)
-			return &(SLAVE_AD_INFO(slave).aggregator);
+		if (SLAVE_AD_INFO(slave)->aggregator.is_active)
+			return &(SLAVE_AD_INFO(slave)->aggregator);
 
 	return NULL;
 }
@@ -1293,7 +1293,7 @@
 	}
 	/* search on all aggregators for a suitable aggregator for this port */
 	bond_for_each_slave(bond, slave, iter) {
-		aggregator = &(SLAVE_AD_INFO(slave).aggregator);
+		aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
 
 		/* keep a free aggregator for later use(if needed) */
 		if (!aggregator->lag_ports) {
@@ -1504,7 +1504,7 @@
 	best = (active && agg_device_up(active)) ? active : NULL;
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		agg = &(SLAVE_AD_INFO(slave).aggregator);
+		agg = &(SLAVE_AD_INFO(slave)->aggregator);
 
 		agg->is_active = 0;
 
@@ -1549,7 +1549,7 @@
 			 best->slave ? best->slave->dev->name : "NULL");
 
 		bond_for_each_slave_rcu(bond, slave, iter) {
-			agg = &(SLAVE_AD_INFO(slave).aggregator);
+			agg = &(SLAVE_AD_INFO(slave)->aggregator);
 
 			pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",
 				 agg->aggregator_identifier, agg->num_of_ports,
@@ -1840,16 +1840,16 @@
 	struct aggregator *aggregator;
 
 	/* check that the slave has not been initialized yet. */
-	if (SLAVE_AD_INFO(slave).port.slave != slave) {
+	if (SLAVE_AD_INFO(slave)->port.slave != slave) {
 
 		/* port initialization */
-		port = &(SLAVE_AD_INFO(slave).port);
+		port = &(SLAVE_AD_INFO(slave)->port);
 
 		ad_initialize_port(port, bond->params.lacp_fast);
 
 		__initialize_port_locks(slave);
 		port->slave = slave;
-		port->actor_port_number = SLAVE_AD_INFO(slave).id;
+		port->actor_port_number = SLAVE_AD_INFO(slave)->id;
 		/* key is determined according to the link speed, duplex and user key(which
 		 * is yet not supported)
 		 */
@@ -1874,7 +1874,7 @@
 		__disable_port(port);
 
 		/* aggregator initialization */
-		aggregator = &(SLAVE_AD_INFO(slave).aggregator);
+		aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
 
 		ad_initialize_agg(aggregator);
 
@@ -1903,8 +1903,8 @@
 	struct slave *slave_iter;
 	struct list_head *iter;
 
-	aggregator = &(SLAVE_AD_INFO(slave).aggregator);
-	port = &(SLAVE_AD_INFO(slave).port);
+	aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
+	port = &(SLAVE_AD_INFO(slave)->port);
 
 	/* if slave is null, the whole port is not initialized */
 	if (!port->slave) {
@@ -1932,7 +1932,7 @@
 		    (aggregator->lag_ports->next_port_in_aggregator)) {
 			/* find new aggregator for the related port(s) */
 			bond_for_each_slave(bond, slave_iter, iter) {
-				new_aggregator = &(SLAVE_AD_INFO(slave_iter).aggregator);
+				new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator);
 				/* if the new aggregator is empty, or it is
 				 * connected to our port only
 				 */
@@ -2010,7 +2010,7 @@
 
 	/* find the aggregator that this port is connected to */
 	bond_for_each_slave(bond, slave_iter, iter) {
-		temp_aggregator = &(SLAVE_AD_INFO(slave_iter).aggregator);
+		temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator);
 		prev_port = NULL;
 		/* search the port in the aggregator's related ports */
 		for (temp_port = temp_aggregator->lag_ports; temp_port;
@@ -2076,7 +2076,7 @@
 	if (BOND_AD_INFO(bond).agg_select_timer &&
 	    !(--BOND_AD_INFO(bond).agg_select_timer)) {
 		slave = bond_first_slave_rcu(bond);
-		port = slave ? &(SLAVE_AD_INFO(slave).port) : NULL;
+		port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL;
 
 		/* select the active aggregator for the bond */
 		if (port) {
@@ -2094,7 +2094,7 @@
 
 	/* for each port run the state machines */
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		port = &(SLAVE_AD_INFO(slave).port);
+		port = &(SLAVE_AD_INFO(slave)->port);
 		if (!port->slave) {
 			pr_warn_ratelimited("%s: Warning: Found an uninitialized port\n",
 					    bond->dev->name);
@@ -2155,7 +2155,7 @@
 
 	if (length >= sizeof(struct lacpdu)) {
 
-		port = &(SLAVE_AD_INFO(slave).port);
+		port = &(SLAVE_AD_INFO(slave)->port);
 
 		if (!port->slave) {
 			pr_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n",
@@ -2212,7 +2212,7 @@
 {
 	struct port *port;
 
-	port = &(SLAVE_AD_INFO(slave).port);
+	port = &(SLAVE_AD_INFO(slave)->port);
 
 	/* if slave is null, the whole port is not initialized */
 	if (!port->slave) {
@@ -2245,7 +2245,7 @@
 {
 	struct port *port;
 
-	port = &(SLAVE_AD_INFO(slave).port);
+	port = &(SLAVE_AD_INFO(slave)->port);
 
 	/* if slave is null, the whole port is not initialized */
 	if (!port->slave) {
@@ -2279,7 +2279,7 @@
 {
 	struct port *port;
 
-	port = &(SLAVE_AD_INFO(slave).port);
+	port = &(SLAVE_AD_INFO(slave)->port);
 
 	/* if slave is null, the whole port is not initialized */
 	if (!port->slave) {
@@ -2347,7 +2347,7 @@
 		ret = 0;
 		goto out;
 	}
-	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave).aggregator));
+	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
 	if (active) {
 		/* are enough slaves available to consider link up? */
 		if (active->num_of_ports < bond->params.min_links) {
@@ -2384,7 +2384,7 @@
 	struct port *port;
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		port = &(SLAVE_AD_INFO(slave).port);
+		port = &(SLAVE_AD_INFO(slave)->port);
 		if (port->aggregator && port->aggregator->is_active) {
 			aggregator = port->aggregator;
 			break;
@@ -2440,22 +2440,22 @@
 		goto err_free;
 	}
 
-	slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
+	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
 	first_ok_slave = NULL;
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		agg = SLAVE_AD_INFO(slave).port.aggregator;
+		agg = SLAVE_AD_INFO(slave)->port.aggregator;
 		if (!agg || agg->aggregator_identifier != agg_id)
 			continue;
 
 		if (slave_agg_no >= 0) {
-			if (!first_ok_slave && SLAVE_IS_OK(slave))
+			if (!first_ok_slave && bond_slave_can_tx(slave))
 				first_ok_slave = slave;
 			slave_agg_no--;
 			continue;
 		}
 
-		if (SLAVE_IS_OK(slave)) {
+		if (bond_slave_can_tx(slave)) {
 			bond_dev_queue_xmit(bond, skb, slave->dev);
 			goto out;
 		}
@@ -2522,7 +2522,7 @@
 
 	lacp_fast = bond->params.lacp_fast;
 	bond_for_each_slave(bond, slave, iter) {
-		port = &(SLAVE_AD_INFO(slave).port);
+		port = &(SLAVE_AD_INFO(slave)->port);
 		__get_state_machine_lock(port);
 		if (lacp_fast)
 			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 93580a4..76c0dad 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c

@@ -229,7 +229,7 @@
 
 	/* Find the slave with the largest gap */
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		if (SLAVE_IS_OK(slave)) {
+		if (bond_slave_can_tx(slave)) {
 			long long gap = compute_gap(slave);
 
 			if (max_gap < gap) {
@@ -384,7 +384,7 @@
 	bool found = false;
 
 	bond_for_each_slave(bond, slave, iter) {
-		if (!SLAVE_IS_OK(slave))
+		if (!bond_slave_can_tx(slave))
 			continue;
 		if (!found) {
 			if (!before || before->speed < slave->speed)
@@ -417,7 +417,7 @@
 	bool found = false;
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		if (!SLAVE_IS_OK(slave))
+		if (!bond_slave_can_tx(slave))
 			continue;
 		if (!found) {
 			if (!before || before->speed < slave->speed)
@@ -755,7 +755,7 @@
 	/* Don't modify or load balance ARPs that do not originate locally
 	 * (e.g.,arrive via a bridge).
 	 */
-	if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
+	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
 		return NULL;
 
 	if (arp->op_code == htons(ARPOP_REPLY)) {
@@ -1039,11 +1039,14 @@
 	struct bonding *bond = bond_get_bond_by_slave(slave);
 	struct net_device *upper;
 	struct list_head *iter;
+	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP];
 
 	/* send untagged */
 	alb_send_lp_vid(slave, mac_addr, 0, 0);
 
-	/* loop through vlans and send one packet for each */
+	/* loop through all devices and see if we need to send a packet
+	 * for that device.
+	 */
 	rcu_read_lock();
 	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 		if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) {
@@ -1059,6 +1062,16 @@
 						vlan_dev_vlan_id(upper));
 			}
 		}
+
+		/* If this is a macvlan device, then only send updates
+		 * when strict_match is turned off.
+		 */
+		if (netif_is_macvlan(upper) && !strict_match) {
+			memset(tags, 0, sizeof(tags));
+			bond_verify_device_path(bond->dev, upper, tags);
+			alb_send_lp_vid(slave, upper->dev_addr,
+					tags[0].vlan_proto, tags[0].vlan_id);
+		}
 	}
 	rcu_read_unlock();
 }
@@ -1068,7 +1081,7 @@
 	struct net_device *dev = slave->dev;
 	struct sockaddr s_addr;
 
-	if (slave->bond->params.mode == BOND_MODE_TLB) {
+	if (BOND_MODE(slave->bond) == BOND_MODE_TLB) {
 		memcpy(dev->dev_addr, addr, dev->addr_len);
 		return 0;
 	}
@@ -1111,13 +1124,13 @@
 static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
 				struct slave *slave2)
 {
-	int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2));
+	int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2));
 	struct slave *disabled_slave = NULL;
 
 	ASSERT_RTNL();
 
 	/* fasten the change in the switch */
-	if (SLAVE_IS_OK(slave1)) {
+	if (bond_slave_can_tx(slave1)) {
 		alb_send_learning_packets(slave1, slave1->dev->dev_addr, false);
 		if (bond->alb_info.rlb_enabled) {
 			/* inform the clients that the mac address
@@ -1129,7 +1142,7 @@
 		disabled_slave = slave1;
 	}
 
-	if (SLAVE_IS_OK(slave2)) {
+	if (bond_slave_can_tx(slave2)) {
 		alb_send_learning_packets(slave2, slave2->dev->dev_addr, false);
 		if (bond->alb_info.rlb_enabled) {
 			/* inform the clients that the mac address
@@ -1358,6 +1371,77 @@
 		rlb_deinitialize(bond);
 }
 
+static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
+		struct slave *tx_slave)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct ethhdr *eth_data = eth_hdr(skb);
+
+	if (!tx_slave) {
+		/* unbalanced or unassigned, send through primary */
+		tx_slave = rcu_dereference(bond->curr_active_slave);
+		if (bond->params.tlb_dynamic_lb)
+			bond_info->unbalanced_load += skb->len;
+	}
+
+	if (tx_slave && bond_slave_can_tx(tx_slave)) {
+		if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
+			ether_addr_copy(eth_data->h_source,
+					tx_slave->dev->dev_addr);
+		}
+
+		bond_dev_queue_xmit(bond, skb, tx_slave->dev);
+		goto out;
+	}
+
+	if (tx_slave && bond->params.tlb_dynamic_lb) {
+		_lock_tx_hashtbl(bond);
+		__tlb_clear_slave(bond, tx_slave, 0);
+		_unlock_tx_hashtbl(bond);
+	}
+
+	/* no suitable interface, frame not sent */
+	dev_kfree_skb_any(skb);
+out:
+	return NETDEV_TX_OK;
+}
+
+int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct ethhdr *eth_data;
+	struct slave *tx_slave = NULL;
+	u32 hash_index;
+
+	skb_reset_mac_header(skb);
+	eth_data = eth_hdr(skb);
+
+	/* Do not TX balance any multicast or broadcast */
+	if (!is_multicast_ether_addr(eth_data->h_dest)) {
+		switch (skb->protocol) {
+		case htons(ETH_P_IP):
+		case htons(ETH_P_IPX):
+		    /* In case of IPX, it will falback to L2 hash */
+		case htons(ETH_P_IPV6):
+			hash_index = bond_xmit_hash(bond, skb);
+			if (bond->params.tlb_dynamic_lb) {
+				tx_slave = tlb_choose_channel(bond,
+							      hash_index & 0xFF,
+							      skb->len);
+			} else {
+				struct list_head *iter;
+				int idx = hash_index % bond->slave_cnt;
+
+				bond_for_each_slave_rcu(bond, tx_slave, iter)
+					if (--idx < 0)
+						break;
+			}
+			break;
+		}
+	}
+	return bond_do_alb_xmit(skb, bond, tx_slave);
+}
+
 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
@@ -1366,7 +1450,7 @@
 	struct slave *tx_slave = NULL;
 	static const __be32 ip_bcast = htonl(0xffffffff);
 	int hash_size = 0;
-	int do_tx_balance = 1;
+	bool do_tx_balance = true;
 	u32 hash_index = 0;
 	const u8 *hash_start = NULL;
 	struct ipv6hdr *ip6hdr;
@@ -1381,7 +1465,7 @@
 		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) ||
 		    (iph->daddr == ip_bcast) ||
 		    (iph->protocol == IPPROTO_IGMP)) {
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 		hash_start = (char *)&(iph->daddr);
@@ -1393,7 +1477,7 @@
 		 * that here just in case.
 		 */
 		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) {
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 
@@ -1401,7 +1485,7 @@
 		 * broadcasts in IPv4.
 		 */
 		if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) {
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 
@@ -1411,7 +1495,7 @@
 		 */
 		ip6hdr = ipv6_hdr(skb);
 		if (ipv6_addr_any(&ip6hdr->saddr)) {
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 
@@ -1421,7 +1505,7 @@
 	case ETH_P_IPX:
 		if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) {
 			/* something is wrong with this packet */
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 
@@ -1430,7 +1514,7 @@
 			 * this family since it has an "ARP" like
 			 * mechanism
 			 */
-			do_tx_balance = 0;
+			do_tx_balance = false;
 			break;
 		}
 
@@ -1438,12 +1522,12 @@
 		hash_size = ETH_ALEN;
 		break;
 	case ETH_P_ARP:
-		do_tx_balance = 0;
+		do_tx_balance = false;
 		if (bond_info->rlb_enabled)
 			tx_slave = rlb_arp_xmit(skb, bond);
 		break;
 	default:
-		do_tx_balance = 0;
+		do_tx_balance = false;
 		break;
 	}
 
@@ -1452,32 +1536,7 @@
 		tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
 	}
 
-	if (!tx_slave) {
-		/* unbalanced or unassigned, send through primary */
-		tx_slave = rcu_dereference(bond->curr_active_slave);
-		bond_info->unbalanced_load += skb->len;
-	}
-
-	if (tx_slave && SLAVE_IS_OK(tx_slave)) {
-		if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
-			ether_addr_copy(eth_data->h_source,
-					tx_slave->dev->dev_addr);
-		}
-
-		bond_dev_queue_xmit(bond, skb, tx_slave->dev);
-		goto out;
-	}
-
-	if (tx_slave) {
-		_lock_tx_hashtbl(bond);
-		__tlb_clear_slave(bond, tx_slave, 0);
-		_unlock_tx_hashtbl(bond);
-	}
-
-	/* no suitable interface, frame not sent */
-	dev_kfree_skb_any(skb);
-out:
-	return NETDEV_TX_OK;
+	return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 
 void bond_alb_monitor(struct work_struct *work)
@@ -1514,8 +1573,10 @@
 			/* If updating current_active, use all currently
 			 * user mac addreses (!strict_match).  Otherwise, only
 			 * use mac of the slave device.
+			 * In RLB mode, we always use strict matches.
 			 */
-			strict_match = (slave != bond->curr_active_slave);
+			strict_match = (slave != bond->curr_active_slave ||
+					bond_info->rlb_enabled);
 			alb_send_learning_packets(slave, slave->dev->dev_addr,
 						  strict_match);
 		}
@@ -1719,7 +1780,7 @@
 	/* in TLB mode, the slave might flip down/up with the old dev_addr,
 	 * and thus filter bond->dev_addr's packets, so force bond's mac
 	 */
-	if (bond->params.mode == BOND_MODE_TLB) {
+	if (BOND_MODE(bond) == BOND_MODE_TLB) {
 		struct sockaddr sa;
 		u8 tmp_addr[ETH_ALEN];
 

diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index e09dd4bf..5fc76c0 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h

@@ -175,6 +175,7 @@
 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
 void bond_alb_monitor(struct work_struct *);
 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
 void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);

diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 2d3f7fa..658e761 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c

@@ -23,7 +23,7 @@
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	if (bond->params.mode != BOND_MODE_ALB)
+	if (BOND_MODE(bond) != BOND_MODE_ALB)
 		return 0;
 
 	seq_printf(m, "SourceIP        DestinationIP   "

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d3a6789..04f35f9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -343,7 +343,7 @@
 	if (!bond_has_slaves(bond))
 		goto down;
 
-	if (bond->params.mode == BOND_MODE_8023AD)
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		return bond_3ad_set_carrier(bond);
 
 	bond_for_each_slave(bond, slave, iter) {
@@ -497,7 +497,7 @@
 	struct list_head *iter;
 	int err = 0;
 
-	if (USES_PRIMARY(bond->params.mode)) {
+	if (bond_uses_primary(bond)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave) {
 			err = dev_set_promiscuity(bond->curr_active_slave->dev,
@@ -523,7 +523,7 @@
 	struct list_head *iter;
 	int err = 0;
 
-	if (USES_PRIMARY(bond->params.mode)) {
+	if (bond_uses_primary(bond)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave) {
 			err = dev_set_allmulti(bond->curr_active_slave->dev,
@@ -574,7 +574,7 @@
 	dev_uc_unsync(slave_dev, bond_dev);
 	dev_mc_unsync(slave_dev, bond_dev);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		/* del lacpdu mc addr from mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
@@ -585,8 +585,8 @@
 /*--------------------------- Active slave change ---------------------------*/
 
 /* Update the hardware address list and promisc/allmulti for the new and
- * old active slaves (if any).  Modes that are !USES_PRIMARY keep all
- * slaves up date at all times; only the USES_PRIMARY modes need to call
+ * old active slaves (if any).  Modes that are not using primary keep all
+ * slaves up date at all times; only the modes that use primary need to call
  * this function to swap these settings during a failover.
  */
 static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
@@ -747,7 +747,7 @@
 	bond_for_each_slave(bond, slave, iter) {
 		if (slave->link == BOND_LINK_UP)
 			return slave;
-		if (slave->link == BOND_LINK_BACK && IS_UP(slave->dev) &&
+		if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) &&
 		    slave->delay < mintime) {
 			mintime = slave->delay;
 			bestslave = slave;
@@ -801,7 +801,7 @@
 		new_active->last_link_up = jiffies;
 
 		if (new_active->link == BOND_LINK_BACK) {
-			if (USES_PRIMARY(bond->params.mode)) {
+			if (bond_uses_primary(bond)) {
 				pr_info("%s: making interface %s the new active one %d ms earlier\n",
 					bond->dev->name, new_active->dev->name,
 					(bond->params.updelay - new_active->delay) * bond->params.miimon);
@@ -810,20 +810,20 @@
 			new_active->delay = 0;
 			new_active->link = BOND_LINK_UP;
 
-			if (bond->params.mode == BOND_MODE_8023AD)
+			if (BOND_MODE(bond) == BOND_MODE_8023AD)
 				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
 
 			if (bond_is_lb(bond))
 				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
 		} else {
-			if (USES_PRIMARY(bond->params.mode)) {
+			if (bond_uses_primary(bond)) {
 				pr_info("%s: making interface %s the new active one\n",
 					bond->dev->name, new_active->dev->name);
 			}
 		}
 	}
 
-	if (USES_PRIMARY(bond->params.mode))
+	if (bond_uses_primary(bond))
 		bond_hw_addr_swap(bond, new_active, old_active);
 
 	if (bond_is_lb(bond)) {
@@ -838,7 +838,7 @@
 		rcu_assign_pointer(bond->curr_active_slave, new_active);
 	}
 
-	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
+	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
 		if (old_active)
 			bond_set_slave_inactive_flags(old_active,
 						      BOND_SLAVE_NOTIFY_NOW);
@@ -876,8 +876,8 @@
 	 * resend only if bond is brought up with the affected
 	 * bonding modes and the retransmission is enabled */
 	if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) &&
-	    ((USES_PRIMARY(bond->params.mode) && new_active) ||
-	     bond->params.mode == BOND_MODE_ROUNDROBIN)) {
+	    ((bond_uses_primary(bond) && new_active) ||
+	     BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {
 		bond->igmp_retrans = bond->params.resend_igmp;
 		queue_delayed_work(bond->wq, &bond->mcast_work, 1);
 	}
@@ -958,7 +958,7 @@
 	struct slave *slave;
 
 	bond_for_each_slave(bond, slave, iter)
-		if (IS_UP(slave->dev))
+		if (bond_slave_is_up(slave))
 			slave_disable_netpoll(slave);
 }
 
@@ -1038,6 +1038,7 @@
 
 	if (!bond_has_slaves(bond))
 		goto done;
+	vlan_features &= NETIF_F_ALL_FOR_ALL;
 
 	bond_for_each_slave(bond, slave, iter) {
 		vlan_features = netdev_increment_features(vlan_features,
@@ -1084,7 +1085,7 @@
 					    struct bonding *bond)
 {
 	if (bond_is_slave_inactive(slave)) {
-		if (bond->params.mode == BOND_MODE_ALB &&
+		if (BOND_MODE(bond) == BOND_MODE_ALB &&
 		    skb->pkt_type != PACKET_BROADCAST &&
 		    skb->pkt_type != PACKET_MULTICAST)
 			return false;
@@ -1126,7 +1127,7 @@
 
 	skb->dev = bond->dev;
 
-	if (bond->params.mode == BOND_MODE_ALB &&
+	if (BOND_MODE(bond) == BOND_MODE_ALB &&
 	    bond->dev->priv_flags & IFF_BRIDGE_PORT &&
 	    skb->pkt_type == PACKET_HOST) {
 
@@ -1163,6 +1164,35 @@
 	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 }
 
+static struct slave *bond_alloc_slave(struct bonding *bond)
+{
+	struct slave *slave = NULL;
+
+	slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
+	if (!slave)
+		return NULL;
+
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+		SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info),
+					       GFP_KERNEL);
+		if (!SLAVE_AD_INFO(slave)) {
+			kfree(slave);
+			return NULL;
+		}
+	}
+	return slave;
+}
+
+static void bond_free_slave(struct slave *slave)
+{
+	struct bonding *bond = bond_get_bond_by_slave(slave);
+
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
+		kfree(SLAVE_AD_INFO(slave));
+
+	kfree(slave);
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1269,7 +1299,7 @@
 		if (!bond_has_slaves(bond)) {
 			pr_warn("%s: Warning: The first slave device specified does not support setting the MAC address\n",
 				bond_dev->name);
-			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
+			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
 				bond->params.fail_over_mac = BOND_FOM_ACTIVE;
 				pr_warn("%s: Setting fail_over_mac to active for active-backup mode\n",
 					bond_dev->name);
@@ -1290,11 +1320,14 @@
 	    bond->dev->addr_assign_type == NET_ADDR_RANDOM)
 		bond_set_dev_addr(bond->dev, slave_dev);
 
-	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
+	new_slave = bond_alloc_slave(bond);
 	if (!new_slave) {
 		res = -ENOMEM;
 		goto err_undo_flags;
 	}
+
+	new_slave->bond = bond;
+	new_slave->dev = slave_dev;
 	/*
 	 * Set the new_slave's queue_id to be zero.  Queue ID mapping
 	 * is set via sysfs or module option if desired.
@@ -1317,7 +1350,7 @@
 	ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr);
 
 	if (!bond->params.fail_over_mac ||
-	    bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
+	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
 		/*
 		 * Set slave to master's mac address.  The application already
 		 * set the master's mac address to that of the first slave
@@ -1338,8 +1371,6 @@
 		goto err_restore_mac;
 	}
 
-	new_slave->bond = bond;
-	new_slave->dev = slave_dev;
 	slave_dev->priv_flags |= IFF_BONDING;
 
 	if (bond_is_lb(bond)) {
@@ -1351,10 +1382,10 @@
 			goto err_close;
 	}
 
-	/* If the mode USES_PRIMARY, then the following is handled by
+	/* If the mode uses primary, then the following is handled by
 	 * bond_change_active_slave().
 	 */
-	if (!USES_PRIMARY(bond->params.mode)) {
+	if (!bond_uses_primary(bond)) {
 		/* set promiscuity level to new slave */
 		if (bond_dev->flags & IFF_PROMISC) {
 			res = dev_set_promiscuity(slave_dev, 1);
@@ -1377,7 +1408,7 @@
 		netif_addr_unlock_bh(bond_dev);
 	}
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		/* add lacpdu mc addr to mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
@@ -1450,7 +1481,7 @@
 		 new_slave->link == BOND_LINK_DOWN ? "DOWN" :
 		 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
 
-	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
+	if (bond_uses_primary(bond) && bond->params.primary[0]) {
 		/* if there is a primary slave, remember it */
 		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
 			bond->primary_slave = new_slave;
@@ -1458,7 +1489,7 @@
 		}
 	}
 
-	switch (bond->params.mode) {
+	switch (BOND_MODE(bond)) {
 	case BOND_MODE_ACTIVEBACKUP:
 		bond_set_slave_inactive_flags(new_slave,
 					      BOND_SLAVE_NOTIFY_NOW);
@@ -1471,14 +1502,14 @@
 		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
 		/* if this is the first slave */
 		if (!prev_slave) {
-			SLAVE_AD_INFO(new_slave).id = 1;
+			SLAVE_AD_INFO(new_slave)->id = 1;
 			/* Initialize AD with the number of times that the AD timer is called in 1 second
 			 * can be called only after the mac address of the bond is set
 			 */
 			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);
 		} else {
-			SLAVE_AD_INFO(new_slave).id =
-				SLAVE_AD_INFO(prev_slave).id + 1;
+			SLAVE_AD_INFO(new_slave)->id =
+				SLAVE_AD_INFO(prev_slave)->id + 1;
 		}
 
 		bond_3ad_bind_slave(new_slave);
@@ -1539,7 +1570,7 @@
 	bond_compute_features(bond);
 	bond_set_carrier(bond);
 
-	if (USES_PRIMARY(bond->params.mode)) {
+	if (bond_uses_primary(bond)) {
 		block_netpoll_tx();
 		write_lock_bh(&bond->curr_slave_lock);
 		bond_select_active_slave(bond);
@@ -1563,7 +1594,7 @@
 	netdev_rx_handler_unregister(slave_dev);
 
 err_detach:
-	if (!USES_PRIMARY(bond->params.mode))
+	if (!bond_uses_primary(bond))
 		bond_hw_addr_flush(bond_dev, slave_dev);
 
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
@@ -1585,7 +1616,7 @@
 
 err_restore_mac:
 	if (!bond->params.fail_over_mac ||
-	    bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
+	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
 		/* XXX TODO - fom follow mode needs to change master's
 		 * MAC if this slave's MAC is in use by the bond, or at
 		 * least print a warning.
@@ -1599,7 +1630,7 @@
 	dev_set_mtu(slave_dev, new_slave->original_mtu);
 
 err_free:
-	kfree(new_slave);
+	bond_free_slave(new_slave);
 
 err_undo_flags:
 	/* Enslave of first slave has failed and we need to fix master's mac */
@@ -1661,7 +1692,7 @@
 	write_lock_bh(&bond->lock);
 
 	/* Inform AD package of unbinding of slave. */
-	if (bond->params.mode == BOND_MODE_8023AD)
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		bond_3ad_unbind_slave(slave);
 
 	write_unlock_bh(&bond->lock);
@@ -1676,7 +1707,7 @@
 	bond->current_arp_slave = NULL;
 
 	if (!all && (!bond->params.fail_over_mac ||
-		     bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
+		     BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) {
 		if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
 		    bond_has_slaves(bond))
 			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n",
@@ -1748,10 +1779,10 @@
 	/* must do this from outside any spinlocks */
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
 
-	/* If the mode USES_PRIMARY, then this cases was handled above by
+	/* If the mode uses primary, then this cases was handled above by
 	 * bond_change_active_slave(..., NULL)
 	 */
-	if (!USES_PRIMARY(bond->params.mode)) {
+	if (!bond_uses_primary(bond)) {
 		/* unset promiscuity level from slave
 		 * NOTE: The NETDEV_CHANGEADDR call above may change the value
 		 * of the IFF_PROMISC flag in the bond_dev, but we need the
@@ -1775,7 +1806,7 @@
 	dev_close(slave_dev);
 
 	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
-	    bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
+	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
 		/* restore original ("permanent") mac address */
 		ether_addr_copy(addr.sa_data, slave->perm_hwaddr);
 		addr.sa_family = slave_dev->type;
@@ -1786,7 +1817,7 @@
 
 	slave_dev->priv_flags &= ~IFF_BONDING;
 
-	kfree(slave);
+	bond_free_slave(slave);
 
 	return 0;  /* deletion OK */
 }
@@ -1821,7 +1852,7 @@
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	info->bond_mode = bond->params.mode;
+	info->bond_mode = BOND_MODE(bond);
 	info->miimon = bond->params.miimon;
 
 	info->num_slaves = bond->slave_cnt;
@@ -1877,7 +1908,7 @@
 			if (slave->delay) {
 				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms\n",
 					bond->dev->name,
-					(bond->params.mode ==
+					(BOND_MODE(bond) ==
 					 BOND_MODE_ACTIVEBACKUP) ?
 					(bond_is_active_slave(slave) ?
 					 "active " : "backup ") : "",
@@ -1968,10 +1999,10 @@
 			slave->link = BOND_LINK_UP;
 			slave->last_link_up = jiffies;
 
-			if (bond->params.mode == BOND_MODE_8023AD) {
+			if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 				/* prevent it from being the active one */
 				bond_set_backup_slave(slave);
-			} else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
+			} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
 				/* make it immediately active */
 				bond_set_active_slave(slave);
 			} else if (slave != bond->primary_slave) {
@@ -1985,7 +2016,7 @@
 				slave->duplex ? "full" : "half");
 
 			/* notify ad that the link status has changed */
-			if (bond->params.mode == BOND_MODE_8023AD)
+			if (BOND_MODE(bond) == BOND_MODE_8023AD)
 				bond_3ad_handle_link_change(slave, BOND_LINK_UP);
 
 			if (bond_is_lb(bond))
@@ -2004,15 +2035,15 @@
 
 			slave->link = BOND_LINK_DOWN;
 
-			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP ||
-			    bond->params.mode == BOND_MODE_8023AD)
+			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
+			    BOND_MODE(bond) == BOND_MODE_8023AD)
 				bond_set_slave_inactive_flags(slave,
 							      BOND_SLAVE_NOTIFY_NOW);
 
 			pr_info("%s: link status definitely down for interface %s, disabling it\n",
 				bond->dev->name, slave->dev->name);
 
-			if (bond->params.mode == BOND_MODE_8023AD)
+			if (BOND_MODE(bond) == BOND_MODE_8023AD)
 				bond_3ad_handle_link_change(slave,
 							    BOND_LINK_DOWN);
 
@@ -2175,9 +2206,9 @@
  * When the path is validated, collect any vlan information in the
  * path.
  */
-static bool bond_verify_device_path(struct net_device *start_dev,
-				    struct net_device *end_dev,
-				    struct bond_vlan_tag *tags)
+bool bond_verify_device_path(struct net_device *start_dev,
+			     struct net_device *end_dev,
+			     struct bond_vlan_tag *tags)
 {
 	struct net_device *upper;
 	struct list_head  *iter;
@@ -2287,8 +2318,8 @@
 	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
 
 	if (!slave_do_arp_validate(bond, slave)) {
-		if ((slave_do_arp_validate_only(bond, slave) && is_arp) ||
-		    !slave_do_arp_validate_only(bond, slave))
+		if ((slave_do_arp_validate_only(bond) && is_arp) ||
+		    !slave_do_arp_validate_only(bond))
 			slave->last_rx = jiffies;
 		return RX_HANDLER_ANOTHER;
 	} else if (!is_arp) {
@@ -2456,7 +2487,7 @@
 		 * do - all replies will be rx'ed on same link causing slaves
 		 * to be unstable during low/no traffic periods
 		 */
-		if (IS_UP(slave->dev))
+		if (bond_slave_is_up(slave))
 			bond_arp_send_all(bond, slave);
 	}
 
@@ -2678,10 +2709,10 @@
 	bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER);
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		if (!found && !before && IS_UP(slave->dev))
+		if (!found && !before && bond_slave_is_up(slave))
 			before = slave;
 
-		if (found && !new_slave && IS_UP(slave->dev))
+		if (found && !new_slave && bond_slave_is_up(slave))
 			new_slave = slave;
 		/* if the link state is up at this point, we
 		 * mark it down - this can happen if we have
@@ -2690,7 +2721,7 @@
 		 * one the current slave so it is still marked
 		 * up when it is actually down
 		 */
-		if (!IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
+		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
 			slave->link = BOND_LINK_DOWN;
 			if (slave->link_failure_count < UINT_MAX)
 				slave->link_failure_count++;
@@ -2853,7 +2884,7 @@
 
 		bond_update_speed_duplex(slave);
 
-		if (bond->params.mode == BOND_MODE_8023AD) {
+		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 			if (old_speed != slave->speed)
 				bond_3ad_adapter_speed_changed(slave);
 			if (old_duplex != slave->duplex)
@@ -2881,7 +2912,7 @@
 		break;
 	case NETDEV_CHANGENAME:
 		/* we don't care if we don't have primary set */
-		if (!USES_PRIMARY(bond->params.mode) ||
+		if (!bond_uses_primary(bond) ||
 		    !bond->params.primary[0])
 			break;
 
@@ -3011,20 +3042,18 @@
  * bond_xmit_hash - generate a hash value based on the xmit policy
  * @bond: bonding device
  * @skb: buffer to use for headers
- * @count: modulo value
  *
  * This function will extract the necessary headers from the skb buffer and use
  * them to generate a hash based on the xmit_policy set in the bonding device
- * which will be reduced modulo count before returning.
  */
-int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
 {
 	struct flow_keys flow;
 	u32 hash;
 
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
 	    !bond_flow_dissect(bond, skb, &flow))
-		return bond_eth_hash(skb) % count;
+		return bond_eth_hash(skb);
 
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
 	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
@@ -3035,7 +3064,7 @@
 	hash ^= (hash >> 16);
 	hash ^= (hash >> 8);
 
-	return hash % count;
+	return hash;
 }
 
 /*-------------------------- Device entry points ----------------------------*/
@@ -3046,7 +3075,7 @@
 			  bond_resend_igmp_join_requests_delayed);
 	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
 	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
-	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
 		INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon);
 	else
 		INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
@@ -3073,7 +3102,7 @@
 	if (bond_has_slaves(bond)) {
 		read_lock(&bond->curr_slave_lock);
 		bond_for_each_slave(bond, slave, iter) {
-			if (USES_PRIMARY(bond->params.mode)
+			if (bond_uses_primary(bond)
 				&& (slave != bond->curr_active_slave)) {
 				bond_set_slave_inactive_flags(slave,
 							      BOND_SLAVE_NOTIFY_NOW);
@@ -3092,9 +3121,10 @@
 		/* bond_alb_initialize must be called before the timer
 		 * is started.
 		 */
-		if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB)))
+		if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))
 			return -ENOMEM;
-		queue_delayed_work(bond->wq, &bond->alb_work, 0);
+		if (bond->params.tlb_dynamic_lb)
+			queue_delayed_work(bond->wq, &bond->alb_work, 0);
 	}
 
 	if (bond->params.miimon)  /* link check interval, in milliseconds. */
@@ -3105,7 +3135,7 @@
 		bond->recv_probe = bond_arp_rcv;
 	}
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		queue_delayed_work(bond->wq, &bond->ad_work, 0);
 		/* register to receive LACPDUs */
 		bond->recv_probe = bond_3ad_lacpdu_recv;
@@ -3310,7 +3340,7 @@
 
 
 	rcu_read_lock();
-	if (USES_PRIMARY(bond->params.mode)) {
+	if (bond_uses_primary(bond)) {
 		slave = rcu_dereference(bond->curr_active_slave);
 		if (slave) {
 			dev_uc_sync(slave->dev, bond_dev);
@@ -3464,7 +3494,7 @@
 	struct list_head *iter;
 	int res = 0;
 
-	if (bond->params.mode == BOND_MODE_ALB)
+	if (BOND_MODE(bond) == BOND_MODE_ALB)
 		return bond_alb_set_mac_address(bond_dev, addr);
 
 
@@ -3475,7 +3505,7 @@
 	 * Returning an error causes ifenslave to fail.
 	 */
 	if (bond->params.fail_over_mac &&
-	    bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+	    BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
 		return 0;
 
 	if (!is_valid_ether_addr(sa->sa_data))
@@ -3555,7 +3585,7 @@
 	/* Here we start from the slave with slave_id */
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0) {
-			if (slave_can_tx(slave)) {
+			if (bond_slave_can_tx(slave)) {
 				bond_dev_queue_xmit(bond, skb, slave->dev);
 				return;
 			}
@@ -3567,7 +3597,7 @@
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0)
 			break;
-		if (slave_can_tx(slave)) {
+		if (bond_slave_can_tx(slave)) {
 			bond_dev_queue_xmit(bond, skb, slave->dev);
 			return;
 		}
@@ -3624,7 +3654,7 @@
 	 */
 	if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
 		slave = rcu_dereference(bond->curr_active_slave);
-		if (slave && slave_can_tx(slave))
+		if (slave && bond_slave_can_tx(slave))
 			bond_dev_queue_xmit(bond, skb, slave->dev);
 		else
 			bond_xmit_slave_id(bond, skb, 0);
@@ -3662,7 +3692,7 @@
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
+	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);
 
 	return NETDEV_TX_OK;
 }
@@ -3677,7 +3707,7 @@
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (bond_is_last_slave(bond, slave))
 			break;
-		if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
+		if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 			if (!skb2) {
@@ -3689,7 +3719,7 @@
 			bond_dev_queue_xmit(bond, skb2, slave->dev);
 		}
 	}
-	if (slave && IS_UP(slave->dev) && slave->link == BOND_LINK_UP)
+	if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
 		bond_dev_queue_xmit(bond, skb, slave->dev);
 	else
 		dev_kfree_skb_any(skb);
@@ -3714,7 +3744,7 @@
 	/* Find out if any slaves have the same mapping as this skb. */
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (slave->queue_id == skb->queue_mapping) {
-			if (slave_can_tx(slave)) {
+			if (bond_slave_can_tx(slave)) {
 				bond_dev_queue_xmit(bond, skb, slave->dev);
 				return 0;
 			}
@@ -3755,12 +3785,11 @@
 {
 	struct bonding *bond = netdev_priv(dev);
 
-	if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
-		if (!bond_slave_override(bond, skb))
-			return NETDEV_TX_OK;
-	}
+	if (bond_should_override_tx_queue(bond) &&
+	    !bond_slave_override(bond, skb))
+		return NETDEV_TX_OK;
 
-	switch (bond->params.mode) {
+	switch (BOND_MODE(bond)) {
 	case BOND_MODE_ROUNDROBIN:
 		return bond_xmit_roundrobin(skb, dev);
 	case BOND_MODE_ACTIVEBACKUP:
@@ -3772,12 +3801,13 @@
 	case BOND_MODE_8023AD:
 		return bond_3ad_xmit_xor(skb, dev);
 	case BOND_MODE_ALB:
-	case BOND_MODE_TLB:
 		return bond_alb_xmit(skb, dev);
+	case BOND_MODE_TLB:
+		return bond_tlb_xmit(skb, dev);
 	default:
 		/* Should never happen, mode already checked */
 		pr_err("%s: Error: Unknown bonding mode %d\n",
-		       dev->name, bond->params.mode);
+		       dev->name, BOND_MODE(bond));
 		WARN_ON_ONCE(1);
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
@@ -3817,14 +3847,14 @@
 	ecmd->duplex = DUPLEX_UNKNOWN;
 	ecmd->port = PORT_OTHER;
 
-	/* Since SLAVE_IS_OK returns false for all inactive or down slaves, we
+	/* Since bond_slave_can_tx returns false for all inactive or down slaves, we
 	 * do not need to check mode.  Though link speed might not represent
 	 * the true receive or transmit bandwidth (not all modes are symmetric)
 	 * this is an accurate maximum.
 	 */
 	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
-		if (SLAVE_IS_OK(slave)) {
+		if (bond_slave_can_tx(slave)) {
 			if (slave->speed != SPEED_UNKNOWN)
 				speed += slave->speed;
 			if (ecmd->duplex == DUPLEX_UNKNOWN &&
@@ -3915,7 +3945,7 @@
 	/* Initialize the device options */
 	bond_dev->tx_queue_len = 0;
 	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
-	bond_dev->priv_flags |= IFF_BONDING;
+	bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
 	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 
 	/* At first, we block adding VLANs. That's the only way to
@@ -3994,7 +4024,8 @@
 
 	if (xmit_hash_policy) {
 		if ((bond_mode != BOND_MODE_XOR) &&
-		    (bond_mode != BOND_MODE_8023AD)) {
+		    (bond_mode != BOND_MODE_8023AD) &&
+		    (bond_mode != BOND_MODE_TLB)) {
 			pr_info("xmit_hash_policy param is irrelevant in mode %s\n",
 				bond_mode_name(bond_mode));
 		} else {
@@ -4079,7 +4110,7 @@
 	}
 
 	/* reset values for 802.3ad/TLB/ALB */
-	if (BOND_NO_USES_ARP(bond_mode)) {
+	if (!bond_mode_uses_arp(bond_mode)) {
 		if (!miimon) {
 			pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n");
 			pr_warn("Forcing miimon to 100msec\n");
@@ -4161,7 +4192,7 @@
 		   catch mistakes */
 		__be32 ip;
 		if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) ||
-		    IS_IP_TARGET_UNUSABLE_ADDRESS(ip)) {
+		    !bond_is_ip_target_ok(ip)) {
 			pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
 				arp_ip_target[i]);
 			arp_interval = 0;
@@ -4234,7 +4265,7 @@
 		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");
 	}
 
-	if (primary && !USES_PRIMARY(bond_mode)) {
+	if (primary && !bond_mode_uses_primary(bond_mode)) {
 		/* currently, using a primary only makes sense
 		 * in active backup, TLB or ALB modes
 		 */
@@ -4300,6 +4331,7 @@
 	params->min_links = min_links;
 	params->lp_interval = lp_interval;
 	params->packets_per_slave = packets_per_slave;
+	params->tlb_dynamic_lb = 1; /* Default value */
 	if (packets_per_slave > 0) {
 		params->reciprocal_packets_per_slave =
 			reciprocal_value(packets_per_slave);

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index f847e16..5ab3c18 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c

@@ -56,10 +56,10 @@
 	if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, slave->queue_id))
 		goto nla_put_failure;
 
-	if (slave->bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
 		const struct aggregator *agg;
 
-		agg = SLAVE_AD_INFO(slave).port.aggregator;
+		agg = SLAVE_AD_INFO(slave)->port.aggregator;
 		if (agg)
 			if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 					agg->aggregator_identifier))
@@ -407,7 +407,7 @@
 	unsigned int packets_per_slave;
 	int i, targets_added;
 
-	if (nla_put_u8(skb, IFLA_BOND_MODE, bond->params.mode))
+	if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond)))
 		goto nla_put_failure;
 
 	if (slave_dev &&
@@ -505,7 +505,7 @@
 		       bond->params.ad_select))
 		goto nla_put_failure;
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info info;
 
 		if (!bond_3ad_get_active_agg_info(bond, &info)) {

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 8320702..540e016 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c

@@ -70,6 +70,8 @@
 				const struct bond_opt_value *newval);
 static int bond_option_slaves_set(struct bonding *bond,
 				  const struct bond_opt_value *newval);
+static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
+				  const struct bond_opt_value *newval);
 
 
 static const struct bond_opt_value bond_mode_tbl[] = {
@@ -180,6 +182,12 @@
 	{ NULL,      -1,      0},
 };
 
+static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = {
+	{ "off", 0,  0},
+	{ "on",  1,  BOND_VALFLAG_DEFAULT},
+	{ NULL,  -1, 0}
+};
+
 static const struct bond_option bond_opts[] = {
 	[BOND_OPT_MODE] = {
 		.id = BOND_OPT_MODE,
@@ -200,7 +208,7 @@
 	[BOND_OPT_XMIT_HASH] = {
 		.id = BOND_OPT_XMIT_HASH,
 		.name = "xmit_hash_policy",
-		.desc = "balance-xor and 802.3ad hashing method",
+		.desc = "balance-xor, 802.3ad, and tlb hashing method",
 		.values = bond_xmit_hashtype_tbl,
 		.set = bond_option_xmit_hash_policy_set
 	},
@@ -365,9 +373,33 @@
 		.flags = BOND_OPTFLAG_RAWVAL,
 		.set = bond_option_slaves_set
 	},
+	[BOND_OPT_TLB_DYNAMIC_LB] = {
+		.id = BOND_OPT_TLB_DYNAMIC_LB,
+		.name = "tlb_dynamic_lb",
+		.desc = "Enable dynamic flow shuffling",
+		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)),
+		.values = bond_tlb_dynamic_lb_tbl,
+		.flags = BOND_OPTFLAG_IFDOWN,
+		.set = bond_option_tlb_dynamic_lb_set,
+	},
 	{ }
 };
 
+/* Searches for an option by name */
+const struct bond_option *bond_opt_get_by_name(const char *name)
+{
+	const struct bond_option *opt;
+	int option;
+
+	for (option = 0; option < BOND_OPT_LAST; option++) {
+		opt = bond_opt_get(option);
+		if (opt && !strcmp(opt->name, name))
+			return opt;
+	}
+
+	return NULL;
+}
+
 /* Searches for a value in opt's values[] table */
 const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val)
 {
@@ -641,7 +673,7 @@
 
 int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval)
 {
-	if (BOND_NO_USES_ARP(newval->value) && bond->params.arp_interval) {
+	if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) {
 		pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n",
 			bond->dev->name, newval->string);
 		/* disable arp monitoring */
@@ -662,7 +694,7 @@
 static struct net_device *__bond_option_active_slave_get(struct bonding *bond,
 							 struct slave *slave)
 {
-	return USES_PRIMARY(bond->params.mode) && slave ? slave->dev : NULL;
+	return bond_uses_primary(bond) && slave ? slave->dev : NULL;
 }
 
 struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond)
@@ -727,7 +759,7 @@
 				bond->dev->name, new_active->dev->name);
 		} else {
 			if (old_active && (new_active->link == BOND_LINK_UP) &&
-			    IS_UP(new_active->dev)) {
+			    bond_slave_is_up(new_active)) {
 				pr_info("%s: Setting %s as active slave\n",
 					bond->dev->name, new_active->dev->name);
 				bond_change_active_slave(bond, new_active);
@@ -746,6 +778,10 @@
 	return ret;
 }
 
+/* There are two tricky bits here.  First, if MII monitoring is activated, then
+ * we must disable ARP monitoring.  Second, if the timer isn't running, we must
+ * start it.
+ */
 static int bond_option_miimon_set(struct bonding *bond,
 				  const struct bond_opt_value *newval)
 {
@@ -784,6 +820,10 @@
 	return 0;
 }
 
+/* Set up and down delays. These must be multiples of the
+ * MII monitoring value, and are stored internally as the multiplier.
+ * Thus, we must translate to MS for the real world.
+ */
 static int bond_option_updelay_set(struct bonding *bond,
 				   const struct bond_opt_value *newval)
 {
@@ -842,6 +882,10 @@
 	return 0;
 }
 
+/* There are two tricky bits here.  First, if ARP monitoring is activated, then
+ * we must disable MII monitoring.  Second, if the ARP timer isn't running,
+ * we must start it.
+ */
 static int bond_option_arp_interval_set(struct bonding *bond,
 					const struct bond_opt_value *newval)
 {
@@ -899,7 +943,7 @@
 	__be32 *targets = bond->params.arp_targets;
 	int ind;
 
-	if (IS_IP_TARGET_UNUSABLE_ADDRESS(target)) {
+	if (!bond_is_ip_target_ok(target)) {
 		pr_err("%s: invalid ARP target %pI4 specified for addition\n",
 		       bond->dev->name, &target);
 		return -EINVAL;
@@ -944,7 +988,7 @@
 	unsigned long *targets_rx;
 	int ind, i;
 
-	if (IS_IP_TARGET_UNUSABLE_ADDRESS(target)) {
+	if (!bond_is_ip_target_ok(target)) {
 		pr_err("%s: invalid ARP target %pI4 specified for removal\n",
 		       bond->dev->name, &target);
 		return -EINVAL;
@@ -1338,3 +1382,13 @@
 	ret = -EPERM;
 	goto out;
 }
+
+static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
+					  const struct bond_opt_value *newval)
+{
+	pr_info("%s: Setting dynamic-lb to %s (%llu)\n",
+		bond->dev->name, newval->string, newval->value);
+	bond->params.tlb_dynamic_lb = newval->value;
+
+	return 0;
+}

diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
index 12be9e1..17ded5b 100644
--- a/drivers/net/bonding/bond_options.h
+++ b/drivers/net/bonding/bond_options.h

@@ -62,6 +62,7 @@
 	BOND_OPT_RESEND_IGMP,
 	BOND_OPT_LP_INTERVAL,
 	BOND_OPT_SLAVES,
+	BOND_OPT_TLB_DYNAMIC_LB,
 	BOND_OPT_LAST
 };
 
@@ -104,6 +105,7 @@
 const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt,
 					    struct bond_opt_value *val);
 const struct bond_option *bond_opt_get(unsigned int option);
+const struct bond_option *bond_opt_get_by_name(const char *name);
 const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val);
 
 /* This helper is used to initialize a bond_opt_value structure for parameter

diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 013fdd0..b215b47 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c

@@ -72,9 +72,9 @@
 	curr = rcu_dereference(bond->curr_active_slave);
 
 	seq_printf(seq, "Bonding Mode: %s",
-		   bond_mode_name(bond->params.mode));
+		   bond_mode_name(BOND_MODE(bond)));
 
-	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
+	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
 	    bond->params.fail_over_mac) {
 		optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC,
 					  bond->params.fail_over_mac);
@@ -83,15 +83,15 @@
 
 	seq_printf(seq, "\n");
 
-	if (bond->params.mode == BOND_MODE_XOR ||
-		bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_XOR ||
+		BOND_MODE(bond) == BOND_MODE_8023AD) {
 		optval = bond_opt_get_val(BOND_OPT_XMIT_HASH,
 					  bond->params.xmit_policy);
 		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
 			   optval->string, bond->params.xmit_policy);
 	}
 
-	if (USES_PRIMARY(bond->params.mode)) {
+	if (bond_uses_primary(bond)) {
 		seq_printf(seq, "Primary Slave: %s",
 			   (bond->primary_slave) ?
 			   bond->primary_slave->dev->name : "None");
@@ -134,7 +134,7 @@
 		seq_printf(seq, "\n");
 	}
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 
 		seq_puts(seq, "\n802.3ad info\n");
@@ -188,9 +188,9 @@
 
 	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		const struct aggregator *agg
-			= SLAVE_AD_INFO(slave).port.aggregator;
+			= SLAVE_AD_INFO(slave)->port.aggregator;
 
 		if (agg)
 			seq_printf(seq, "Aggregator ID: %d\n",

diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 5f6babc..daed52f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c

@@ -45,8 +45,7 @@
 #define to_dev(obj)	container_of(obj, struct device, kobj)
 #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd))))
 
-/*
- * "show" function for the bond_masters attribute.
+/* "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
 static ssize_t bonding_show_bonds(struct class *cls,
@@ -88,14 +87,12 @@
 	return NULL;
 }
 
-/*
- * "store" function for the bond_masters attribute.  This is what
+/* "store" function for the bond_masters attribute.  This is what
  * creates and deletes entire bonds.
  *
  * The class parameter is ignored.
  *
  */
-
 static ssize_t bonding_store_bonds(struct class *cls,
 				   struct class_attribute *attr,
 				   const char *buffer, size_t count)
@@ -158,9 +155,26 @@
 	.store = bonding_store_bonds,
 };
 
-/*
- * Show the slaves in the current bond.
- */
+/* Generic "store" method for bonding sysfs option setting */
+static ssize_t bonding_sysfs_store_option(struct device *d,
+					  struct device_attribute *attr,
+					  const char *buffer, size_t count)
+{
+	struct bonding *bond = to_bond(d);
+	const struct bond_option *opt;
+	int ret;
+
+	opt = bond_opt_get_by_name(attr->attr.name);
+	if (WARN_ON(!opt))
+		return -ENOENT;
+	ret = bond_opt_tryset_rtnl(bond, opt->id, (char *)buffer);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+
+/* Show the slaves in the current bond. */
 static ssize_t bonding_show_slaves(struct device *d,
 				   struct device_attribute *attr, char *buf)
 {
@@ -190,62 +204,24 @@
 
 	return res;
 }
-
-/*
- * Set the slaves in the current bond.
- * This is supposed to be only thin wrapper for bond_enslave and bond_release.
- * All hard work should be done there.
- */
-static ssize_t bonding_store_slaves(struct device *d,
-				    struct device_attribute *attr,
-				    const char *buffer, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_SLAVES, (char *)buffer);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves,
-		   bonding_store_slaves);
+		   bonding_sysfs_store_option);
 
-/*
- * Show and set the bonding mode.  The bond interface must be down to
- * change the mode.
- */
+/* Show the bonding mode. */
 static ssize_t bonding_show_mode(struct device *d,
 				 struct device_attribute *attr, char *buf)
 {
 	struct bonding *bond = to_bond(d);
 	const struct bond_opt_value *val;
 
-	val = bond_opt_get_val(BOND_OPT_MODE, bond->params.mode);
+	val = bond_opt_get_val(BOND_OPT_MODE, BOND_MODE(bond));
 
-	return sprintf(buf, "%s %d\n", val->string, bond->params.mode);
-}
-
-static ssize_t bonding_store_mode(struct device *d,
-				  struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_MODE, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
+	return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));
 }
 static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
-		   bonding_show_mode, bonding_store_mode);
+		   bonding_show_mode, bonding_sysfs_store_option);
 
-/*
- * Show and set the bonding transmit hash method.
- */
+/* Show the bonding transmit hash method. */
 static ssize_t bonding_show_xmit_hash(struct device *d,
 				      struct device_attribute *attr,
 				      char *buf)
@@ -257,26 +233,10 @@
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);
 }
-
-static ssize_t bonding_store_xmit_hash(struct device *d,
-				       struct device_attribute *attr,
-				       const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_XMIT_HASH, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR,
-		   bonding_show_xmit_hash, bonding_store_xmit_hash);
+		   bonding_show_xmit_hash, bonding_sysfs_store_option);
 
-/*
- * Show and set arp_validate.
- */
+/* Show arp_validate. */
 static ssize_t bonding_show_arp_validate(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -289,26 +249,10 @@
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);
 }
-
-static ssize_t bonding_store_arp_validate(struct device *d,
-					  struct device_attribute *attr,
-					  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ARP_VALIDATE, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-
 static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
-		   bonding_store_arp_validate);
-/*
- * Show and set arp_all_targets.
- */
+		   bonding_sysfs_store_option);
+
+/* Show arp_all_targets. */
 static ssize_t bonding_show_arp_all_targets(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -321,28 +265,10 @@
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.arp_all_targets);
 }
-
-static ssize_t bonding_store_arp_all_targets(struct device *d,
-					  struct device_attribute *attr,
-					  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ARP_ALL_TARGETS, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-
 static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
-		   bonding_show_arp_all_targets, bonding_store_arp_all_targets);
+		   bonding_show_arp_all_targets, bonding_sysfs_store_option);
 
-/*
- * Show and store fail_over_mac.  User only allowed to change the
- * value when there are no slaves.
- */
+/* Show fail_over_mac. */
 static ssize_t bonding_show_fail_over_mac(struct device *d,
 					  struct device_attribute *attr,
 					  char *buf)
@@ -355,30 +281,10 @@
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);
 }
-
-static ssize_t bonding_store_fail_over_mac(struct device *d,
-					   struct device_attribute *attr,
-					   const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_FAIL_OVER_MAC, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-
 static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR,
-		   bonding_show_fail_over_mac, bonding_store_fail_over_mac);
+		   bonding_show_fail_over_mac, bonding_sysfs_store_option);
 
-/*
- * Show and set the arp timer interval.  There are two tricky bits
- * here.  First, if ARP monitoring is activated, then we must disable
- * MII monitoring.  Second, if the ARP timer isn't running, we must
- * start it.
- */
+/* Show the arp timer interval. */
 static ssize_t bonding_show_arp_interval(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -387,26 +293,10 @@
 
 	return sprintf(buf, "%d\n", bond->params.arp_interval);
 }
-
-static ssize_t bonding_store_arp_interval(struct device *d,
-					  struct device_attribute *attr,
-					  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ARP_INTERVAL, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
-		   bonding_show_arp_interval, bonding_store_arp_interval);
+		   bonding_show_arp_interval, bonding_sysfs_store_option);
 
-/*
- * Show and set the arp targets.
- */
+/* Show the arp targets. */
 static ssize_t bonding_show_arp_targets(struct device *d,
 					struct device_attribute *attr,
 					char *buf)
@@ -424,27 +314,10 @@
 
 	return res;
 }
+static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR,
+		   bonding_show_arp_targets, bonding_sysfs_store_option);
 
-static ssize_t bonding_store_arp_targets(struct device *d,
-					 struct device_attribute *attr,
-					 const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ARP_TARGETS, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
-
-/*
- * Show and set the up and down delays.  These must be multiples of the
- * MII monitoring value, and are stored internally as the multiplier.
- * Thus, we must translate to MS for the real world.
- */
+/* Show the up and down delays. */
 static ssize_t bonding_show_downdelay(struct device *d,
 				      struct device_attribute *attr,
 				      char *buf)
@@ -453,22 +326,8 @@
 
 	return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
 }
-
-static ssize_t bonding_store_downdelay(struct device *d,
-				       struct device_attribute *attr,
-				       const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_DOWNDELAY, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR,
-		   bonding_show_downdelay, bonding_store_downdelay);
+		   bonding_show_downdelay, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_updelay(struct device *d,
 				    struct device_attribute *attr,
@@ -479,27 +338,10 @@
 	return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
 
 }
-
-static ssize_t bonding_store_updelay(struct device *d,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_UPDELAY, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR,
-		   bonding_show_updelay, bonding_store_updelay);
+		   bonding_show_updelay, bonding_sysfs_store_option);
 
-/*
- * Show and set the LACP interval.  Interface must be down, and the mode
- * must be set to 802.3ad mode.
- */
+/* Show the LACP interval. */
 static ssize_t bonding_show_lacp(struct device *d,
 				 struct device_attribute *attr,
 				 char *buf)
@@ -511,22 +353,8 @@
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
 }
-
-static ssize_t bonding_store_lacp(struct device *d,
-				  struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_LACP_RATE, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR,
-		   bonding_show_lacp, bonding_store_lacp);
+		   bonding_show_lacp, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_min_links(struct device *d,
 				      struct device_attribute *attr,
@@ -536,22 +364,8 @@
 
 	return sprintf(buf, "%u\n", bond->params.min_links);
 }
-
-static ssize_t bonding_store_min_links(struct device *d,
-				       struct device_attribute *attr,
-				       const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_MINLINKS, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR,
-		   bonding_show_min_links, bonding_store_min_links);
+		   bonding_show_min_links, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_select(struct device *d,
 				      struct device_attribute *attr,
@@ -564,27 +378,10 @@
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);
 }
-
-
-static ssize_t bonding_store_ad_select(struct device *d,
-				       struct device_attribute *attr,
-				       const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_AD_SELECT, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
-		   bonding_show_ad_select, bonding_store_ad_select);
+		   bonding_show_ad_select, bonding_sysfs_store_option);
 
-/*
- * Show and set the number of peer notifications to send after a failover event.
- */
+/* Show and set the number of peer notifications to send after a failover event. */
 static ssize_t bonding_show_num_peer_notif(struct device *d,
 					   struct device_attribute *attr,
 					   char *buf)
@@ -611,12 +408,7 @@
 static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
 		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
 
-/*
- * Show and set the MII monitor interval.  There are two tricky bits
- * here.  First, if MII monitoring is activated, then we must disable
- * ARP monitoring.  Second, if the timer isn't running, we must
- * start it.
- */
+/* Show the MII monitor interval. */
 static ssize_t bonding_show_miimon(struct device *d,
 				   struct device_attribute *attr,
 				   char *buf)
@@ -625,30 +417,10 @@
 
 	return sprintf(buf, "%d\n", bond->params.miimon);
 }
-
-static ssize_t bonding_store_miimon(struct device *d,
-				    struct device_attribute *attr,
-				    const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_MIIMON, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,
-		   bonding_show_miimon, bonding_store_miimon);
+		   bonding_show_miimon, bonding_sysfs_store_option);
 
-/*
- * Show and set the primary slave.  The store function is much
- * simpler than bonding_store_slaves function because it only needs to
- * handle one interface name.
- * The bond must be a mode that supports a primary for this be
- * set.
- */
+/* Show the primary slave. */
 static ssize_t bonding_show_primary(struct device *d,
 				    struct device_attribute *attr,
 				    char *buf)
@@ -661,26 +433,10 @@
 
 	return count;
 }
-
-static ssize_t bonding_store_primary(struct device *d,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_PRIMARY, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
-		   bonding_show_primary, bonding_store_primary);
+		   bonding_show_primary, bonding_sysfs_store_option);
 
-/*
- * Show and set the primary_reselect flag.
- */
+/* Show the primary_reselect flag. */
 static ssize_t bonding_show_primary_reselect(struct device *d,
 					     struct device_attribute *attr,
 					     char *buf)
@@ -694,28 +450,10 @@
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.primary_reselect);
 }
-
-static ssize_t bonding_store_primary_reselect(struct device *d,
-					      struct device_attribute *attr,
-					      const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_PRIMARY_RESELECT,
-				   (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
-		   bonding_show_primary_reselect,
-		   bonding_store_primary_reselect);
+		   bonding_show_primary_reselect, bonding_sysfs_store_option);
 
-/*
- * Show and set the use_carrier flag.
- */
+/* Show the use_carrier flag. */
 static ssize_t bonding_show_carrier(struct device *d,
 				    struct device_attribute *attr,
 				    char *buf)
@@ -724,27 +462,11 @@
 
 	return sprintf(buf, "%d\n", bond->params.use_carrier);
 }
-
-static ssize_t bonding_store_carrier(struct device *d,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_USE_CARRIER, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
-		   bonding_show_carrier, bonding_store_carrier);
+		   bonding_show_carrier, bonding_sysfs_store_option);
 
 
-/*
- * Show and set currently active_slave.
- */
+/* Show currently active_slave. */
 static ssize_t bonding_show_active_slave(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -761,27 +483,10 @@
 
 	return count;
 }
-
-static ssize_t bonding_store_active_slave(struct device *d,
-					  struct device_attribute *attr,
-					  const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ACTIVE_SLAVE, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
-		   bonding_show_active_slave, bonding_store_active_slave);
+		   bonding_show_active_slave, bonding_sysfs_store_option);
 
-
-/*
- * Show link status of the bond interface.
- */
+/* Show link status of the bond interface. */
 static ssize_t bonding_show_mii_status(struct device *d,
 				       struct device_attribute *attr,
 				       char *buf)
@@ -792,9 +497,7 @@
 }
 static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
 
-/*
- * Show current 802.3ad aggregator ID.
- */
+/* Show current 802.3ad aggregator ID. */
 static ssize_t bonding_show_ad_aggregator(struct device *d,
 					  struct device_attribute *attr,
 					  char *buf)
@@ -802,7 +505,7 @@
 	int count = 0;
 	struct bonding *bond = to_bond(d);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
@@ -814,9 +517,7 @@
 static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL);
 
 
-/*
- * Show number of active 802.3ad ports.
- */
+/* Show number of active 802.3ad ports. */
 static ssize_t bonding_show_ad_num_ports(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -824,7 +525,7 @@
 	int count = 0;
 	struct bonding *bond = to_bond(d);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
@@ -836,9 +537,7 @@
 static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL);
 
 
-/*
- * Show current 802.3ad actor key.
- */
+/* Show current 802.3ad actor key. */
 static ssize_t bonding_show_ad_actor_key(struct device *d,
 					 struct device_attribute *attr,
 					 char *buf)
@@ -846,7 +545,7 @@
 	int count = 0;
 	struct bonding *bond = to_bond(d);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
@@ -858,9 +557,7 @@
 static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL);
 
 
-/*
- * Show current 802.3ad partner key.
- */
+/* Show current 802.3ad partner key. */
 static ssize_t bonding_show_ad_partner_key(struct device *d,
 					   struct device_attribute *attr,
 					   char *buf)
@@ -868,7 +565,7 @@
 	int count = 0;
 	struct bonding *bond = to_bond(d);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
@@ -880,9 +577,7 @@
 static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL);
 
 
-/*
- * Show current 802.3ad partner mac.
- */
+/* Show current 802.3ad partner mac. */
 static ssize_t bonding_show_ad_partner_mac(struct device *d,
 					   struct device_attribute *attr,
 					   char *buf)
@@ -890,7 +585,7 @@
 	int count = 0;
 	struct bonding *bond = to_bond(d);
 
-	if (bond->params.mode == BOND_MODE_8023AD) {
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 		if (!bond_3ad_get_active_agg_info(bond, &ad_info))
 			count = sprintf(buf, "%pM\n", ad_info.partner_system);
@@ -900,9 +595,7 @@
 }
 static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
 
-/*
- * Show the queue_ids of the slaves in the current bond.
- */
+/* Show the queue_ids of the slaves in the current bond. */
 static ssize_t bonding_show_queue_id(struct device *d,
 				     struct device_attribute *attr,
 				     char *buf)
@@ -933,31 +626,11 @@
 
 	return res;
 }
-
-/*
- * Set the queue_ids of the  slaves in the current bond.  The bond
- * interface must be enslaved for this to work.
- */
-static ssize_t bonding_store_queue_id(struct device *d,
-				      struct device_attribute *attr,
-				      const char *buffer, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_QUEUE_ID, (char *)buffer);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
-		   bonding_store_queue_id);
+		   bonding_sysfs_store_option);
 
 
-/*
- * Show and set the all_slaves_active flag.
- */
+/* Show the all_slaves_active flag. */
 static ssize_t bonding_show_slaves_active(struct device *d,
 					  struct device_attribute *attr,
 					  char *buf)
@@ -966,27 +639,10 @@
 
 	return sprintf(buf, "%d\n", bond->params.all_slaves_active);
 }
-
-static ssize_t bonding_store_slaves_active(struct device *d,
-					   struct device_attribute *attr,
-					   const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_ALL_SLAVES_ACTIVE,
-				   (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
-		   bonding_show_slaves_active, bonding_store_slaves_active);
+		   bonding_show_slaves_active, bonding_sysfs_store_option);
 
-/*
- * Show and set the number of IGMP membership reports to send on link failure
- */
+/* Show the number of IGMP membership reports to send on link failure */
 static ssize_t bonding_show_resend_igmp(struct device *d,
 					struct device_attribute *attr,
 					char *buf)
@@ -995,23 +651,8 @@
 
 	return sprintf(buf, "%d\n", bond->params.resend_igmp);
 }
-
-static ssize_t bonding_store_resend_igmp(struct device *d,
-					 struct device_attribute *attr,
-					 const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_RESEND_IGMP, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-
 static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
-		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+		   bonding_show_resend_igmp, bonding_sysfs_store_option);
 
 
 static ssize_t bonding_show_lp_interval(struct device *d,
@@ -1019,25 +660,21 @@
 					char *buf)
 {
 	struct bonding *bond = to_bond(d);
+
 	return sprintf(buf, "%d\n", bond->params.lp_interval);
 }
+static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
+		   bonding_show_lp_interval, bonding_sysfs_store_option);
 
-static ssize_t bonding_store_lp_interval(struct device *d,
-					 struct device_attribute *attr,
-					 const char *buf, size_t count)
+static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
+					   struct device_attribute *attr,
+					   char *buf)
 {
 	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_LP_INTERVAL, (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
+	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
 }
-
-static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
-		   bonding_show_lp_interval, bonding_store_lp_interval);
+static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR,
+		   bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_packets_per_slave(struct device *d,
 					      struct device_attribute *attr,
@@ -1045,27 +682,11 @@
 {
 	struct bonding *bond = to_bond(d);
 	unsigned int packets_per_slave = bond->params.packets_per_slave;
+
 	return sprintf(buf, "%u\n", packets_per_slave);
 }
-
-static ssize_t bonding_store_packets_per_slave(struct device *d,
-					       struct device_attribute *attr,
-					       const char *buf, size_t count)
-{
-	struct bonding *bond = to_bond(d);
-	int ret;
-
-	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_PACKETS_PER_SLAVE,
-				   (char *)buf);
-	if (!ret)
-		ret = count;
-
-	return ret;
-}
-
 static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR,
-		   bonding_show_packets_per_slave,
-		   bonding_store_packets_per_slave);
+		   bonding_show_packets_per_slave, bonding_sysfs_store_option);
 
 static struct attribute *per_bond_attrs[] = {
 	&dev_attr_slaves.attr,
@@ -1099,6 +720,7 @@
 	&dev_attr_min_links.attr,
 	&dev_attr_lp_interval.attr,
 	&dev_attr_packets_per_slave.attr,
+	&dev_attr_tlb_dynamic_lb.attr,
 	NULL,
 };
 
@@ -1107,8 +729,7 @@
 	.attrs = per_bond_attrs,
 };
 
-/*
- * Initialize sysfs.  This sets up the bonding_masters file in
+/* Initialize sysfs.  This sets up the bonding_masters file in
  * /sys/class/net.
  */
 int bond_create_sysfs(struct bond_net *bn)
@@ -1120,8 +741,7 @@
 
 	ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters,
 					  bn->net);
-	/*
-	 * Permit multiple loads of the module by ignoring failures to
+	/* Permit multiple loads of the module by ignoring failures to
 	 * create the bonding_masters sysfs file.  Bonding devices
 	 * created by second or subsequent loads of the module will
 	 * not be listed in, or controllable by, bonding_masters, but
@@ -1144,16 +764,13 @@
 
 }
 
-/*
- * Remove /sys/class/net/bonding_masters.
- */
+/* Remove /sys/class/net/bonding_masters. */
 void bond_destroy_sysfs(struct bond_net *bn)
 {
 	netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);
 }
 
-/*
- * Initialize sysfs for each bond.  This sets up and registers
+/* Initialize sysfs for each bond.  This sets up and registers
  * the 'bondctl' directory for each individual bond under /sys/class/net.
  */
 void bond_prepare_sysfs_group(struct bonding *bond)

diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 2e4eec5..198677f 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c

@@ -69,8 +69,8 @@
 {
 	const struct aggregator *agg;
 
-	if (slave->bond->params.mode == BOND_MODE_8023AD) {
-		agg = SLAVE_AD_INFO(slave).port.aggregator;
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+		agg = SLAVE_AD_INFO(slave)->port.aggregator;
 		if (agg)
 			return sprintf(buf, "%d\n",
 				       agg->aggregator_identifier);

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 00bea32..0b4d9cd 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h

@@ -41,42 +41,6 @@
 
 #define BOND_DEFAULT_MIIMON	100
 
-#define IS_UP(dev)					   \
-	      ((((dev)->flags & IFF_UP) == IFF_UP)	&& \
-	       netif_running(dev)			&& \
-	       netif_carrier_ok(dev))
-
-/*
- * Checks whether slave is ready for transmit.
- */
-#define SLAVE_IS_OK(slave)			        \
-		    (((slave)->dev->flags & IFF_UP)  && \
-		     netif_running((slave)->dev)     && \
-		     ((slave)->link == BOND_LINK_UP) && \
-		     bond_is_active_slave(slave))
-
-
-#define USES_PRIMARY(mode)				\
-		(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\
-		 ((mode) == BOND_MODE_TLB)          ||	\
-		 ((mode) == BOND_MODE_ALB))
-
-#define BOND_NO_USES_ARP(mode)				\
-		(((mode) == BOND_MODE_8023AD)	||	\
-		 ((mode) == BOND_MODE_TLB)	||	\
-		 ((mode) == BOND_MODE_ALB))
-
-#define TX_QUEUE_OVERRIDE(mode)				\
-			(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\
-			 ((mode) == BOND_MODE_ROUNDROBIN))
-
-#define BOND_MODE_IS_LB(mode)			\
-		(((mode) == BOND_MODE_TLB) ||	\
-		 ((mode) == BOND_MODE_ALB))
-
-#define IS_IP_TARGET_UNUSABLE_ADDRESS(a)	\
-	((htonl(INADDR_BROADCAST) == a) ||	\
-	 ipv4_is_zeronet(a))
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -90,6 +54,8 @@
 	set_fs(fs);			\
 	res; })
 
+#define BOND_MODE(bond) ((bond)->params.mode)
+
 /* slave list primitives */
 #define bond_slave_list(bond) (&(bond)->dev->adj_list.lower)
 
@@ -175,6 +141,7 @@
 	int resend_igmp;
 	int lp_interval;
 	int packets_per_slave;
+	int tlb_dynamic_lb;
 	struct reciprocal_value reciprocal_packets_per_slave;
 };
 
@@ -183,8 +150,6 @@
 	int mode;
 };
 
-#define BOND_MAX_MODENAME_LEN 20
-
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
 	struct bonding *bond; /* our master */
@@ -205,7 +170,7 @@
 	u32    speed;
 	u16    queue_id;
 	u8     perm_hwaddr[ETH_ALEN];
-	struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */
+	struct ad_slave_info *ad_info;
 	struct tlb_slave_info tlb_info;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll *np;
@@ -285,14 +250,41 @@
 
 static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
 {
-	if (!slave || !slave->bond)
-		return NULL;
 	return slave->bond;
 }
 
+static inline bool bond_should_override_tx_queue(struct bonding *bond)
+{
+	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
+	       BOND_MODE(bond) == BOND_MODE_ROUNDROBIN;
+}
+
 static inline bool bond_is_lb(const struct bonding *bond)
 {
-	return BOND_MODE_IS_LB(bond->params.mode);
+	return BOND_MODE(bond) == BOND_MODE_TLB ||
+	       BOND_MODE(bond) == BOND_MODE_ALB;
+}
+
+static inline bool bond_mode_uses_arp(int mode)
+{
+	return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB &&
+	       mode != BOND_MODE_ALB;
+}
+
+static inline bool bond_mode_uses_primary(int mode)
+{
+	return mode == BOND_MODE_ACTIVEBACKUP || mode == BOND_MODE_TLB ||
+	       mode == BOND_MODE_ALB;
+}
+
+static inline bool bond_uses_primary(struct bonding *bond)
+{
+	return bond_mode_uses_primary(BOND_MODE(bond));
+}
+
+static inline bool bond_slave_is_up(struct slave *slave)
+{
+	return netif_running(slave->dev) && netif_carrier_ok(slave->dev);
 }
 
 static inline void bond_set_active_slave(struct slave *slave)
@@ -365,6 +357,12 @@
 	return !bond_slave_state(slave);
 }
 
+static inline bool bond_slave_can_tx(struct slave *slave)
+{
+	return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP &&
+	       bond_is_active_slave(slave);
+}
+
 #define BOND_PRI_RESELECT_ALWAYS	0
 #define BOND_PRI_RESELECT_BETTER	1
 #define BOND_PRI_RESELECT_FAILURE	2
@@ -396,12 +394,16 @@
 	return bond->params.arp_validate & (1 << bond_slave_state(slave));
 }
 
-static inline int slave_do_arp_validate_only(struct bonding *bond,
-					     struct slave *slave)
+static inline int slave_do_arp_validate_only(struct bonding *bond)
 {
 	return bond->params.arp_validate & BOND_ARP_FILTER;
 }
 
+static inline int bond_is_ip_target_ok(__be32 addr)
+{
+	return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr);
+}
+
 /* Get the oldest arp which we've received on this slave for bond's
  * arp_targets.
  */
@@ -479,16 +481,14 @@
 	return addr;
 }
 
-static inline bool slave_can_tx(struct slave *slave)
-{
-	if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP &&
-	    bond_is_active_slave(slave))
-		return true;
-	else
-		return false;
-}
-
-struct bond_net;
+struct bond_net {
+	struct net		*net;	/* Associated network namespace */
+	struct list_head	dev_list;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc_dir;
+#endif
+	struct class_attribute	class_attr_bonding_masters;
+};
 
 int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
 void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
@@ -500,7 +500,7 @@
 void bond_sysfs_slave_del(struct slave *slave);
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
-int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
 void bond_create_debugfs(void);
@@ -516,15 +516,9 @@
 struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond);
 struct net_device *bond_option_active_slave_get(struct bonding *bond);
 const char *bond_slave_link_status(s8 link);
-
-struct bond_net {
-	struct net *		net;	/* Associated network namespace */
-	struct list_head	dev_list;
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry *	proc_dir;
-#endif
-	struct class_attribute	class_attr_bonding_masters;
-};
+bool bond_verify_device_path(struct net_device *start_dev,
+			     struct net_device *end_dev,
+			     struct bond_vlan_tag *tags);
 
 #ifdef CONFIG_PROC_FS
 void bond_create_proc_entry(struct bonding *bond);
@@ -576,6 +570,27 @@
 	return NULL;
 }
 
+/* Caller must hold rcu_read_lock() for read */
+static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+{
+	struct list_head *iter;
+	struct slave *tmp;
+	struct netdev_hw_addr *ha;
+
+	bond_for_each_slave_rcu(bond, tmp, iter)
+		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+			return true;
+
+	if (netdev_uc_empty(bond->dev))
+		return false;
+
+	netdev_for_each_uc_addr(ha, bond->dev)
+		if (ether_addr_equal_64bits(mac, ha->addr))
+			return true;
+
+	return false;
+}
+
 /* Check if the ip is present in arp ip list, or first free slot if ip == 0
  * Returns -1 if not found, index if found
  */

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 9e7d95d..4168822 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig

@@ -65,7 +65,7 @@
 
 config CAN_AT91
 	tristate "Atmel AT91 onchip CAN controller"
-	depends on ARM
+	depends on ARCH_AT91 || COMPILE_TEST
 	---help---
 	  This is a driver for the SoC CAN controller in Atmel's AT91SAM9263
 	  and AT91SAM9X5 processors.
@@ -77,12 +77,6 @@
 	  Driver for TI HECC (High End CAN Controller) module found on many
 	  TI devices. The device specifications are available from www.ti.com
 
-config CAN_MCP251X
-	tristate "Microchip MCP251x SPI CAN controllers"
-	depends on SPI && HAS_DMA
-	---help---
-	  Driver for the Microchip MCP251x SPI CAN controllers.
-
 config CAN_BFIN
 	depends on BF534 || BF536 || BF537 || BF538 || BF539 || BF54x
 	tristate "Analog Devices Blackfin on-chip CAN"
@@ -110,7 +104,7 @@
 
 config PCH_CAN
 	tristate "Intel EG20T PCH CAN controller"
-	depends on PCI
+	depends on PCI && (X86_32 || COMPILE_TEST)
 	---help---
 	  This driver is for PCH CAN of Topcliff (Intel EG20T PCH) which
 	  is an IOH for x86 embedded processor (Intel Atom E6xx series).
@@ -125,6 +119,24 @@
 	  endian syntheses of the cores would need some modifications on
 	  the hardware level to work.
 
+config CAN_RCAR
+	tristate "Renesas R-Car CAN controller"
+	depends on ARM
+	---help---
+	  Say Y here if you want to use CAN controller found on Renesas R-Car
+	  SoCs.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called rcar_can.
+
+config CAN_XILINXCAN
+	tristate "Xilinx CAN"
+	depends on ARCH_ZYNQ || MICROBLAZE || COMPILE_TEST
+	depends on COMMON_CLK && HAS_IOMEM
+	---help---
+	  Xilinx CAN driver. This driver supports both soft AXI CAN IP and
+	  Zynq CANPS IP.
+
 source "drivers/net/can/mscan/Kconfig"
 
 source "drivers/net/can/sja1000/Kconfig"
@@ -133,6 +145,8 @@
 
 source "drivers/net/can/cc770/Kconfig"
 
+source "drivers/net/can/spi/Kconfig"
+
 source "drivers/net/can/usb/Kconfig"
 
 source "drivers/net/can/softing/Kconfig"

diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index c744039..1697f22 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile

@@ -10,6 +10,7 @@
 
 can-dev-$(CONFIG_CAN_LEDS)	+= led.o
 
+obj-y				+= spi/
 obj-y				+= usb/
 obj-y				+= softing/
 
@@ -19,11 +20,12 @@
 obj-$(CONFIG_CAN_CC770)		+= cc770/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
-obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
 obj-$(CONFIG_CAN_BFIN)		+= bfin_can.o
 obj-$(CONFIG_CAN_JANZ_ICAN3)	+= janz-ican3.o
 obj-$(CONFIG_CAN_FLEXCAN)	+= flexcan.o
 obj-$(CONFIG_PCH_CAN)		+= pch_can.o
 obj-$(CONFIG_CAN_GRCAN)		+= grcan.o
+obj-$(CONFIG_CAN_RCAR)		+= rcar_can.o
+obj-$(CONFIG_CAN_XILINXCAN)	+= xilinx_can.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 95e04e2..8e78bb4 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c

@@ -252,8 +252,7 @@
 	struct c_can_priv *priv = netdev_priv(dev);
 	int cnt, reg = C_CAN_IFACE(COMREQ_REG, iface);
 
-	priv->write_reg(priv, reg + 1, cmd);
-	priv->write_reg(priv, reg, obj);
+	priv->write_reg32(priv, reg, (cmd << 16) | obj);
 
 	for (cnt = MIN_TIMEOUT_VALUE; cnt; cnt--) {
 		if (!(priv->read_reg(priv, reg) & IF_COMR_BUSY))
@@ -328,8 +327,7 @@
 		change_bit(idx, &priv->tx_dir);
 	}
 
-	priv->write_reg(priv, C_CAN_IFACE(ARB1_REG, iface), arb);
-	priv->write_reg(priv, C_CAN_IFACE(ARB2_REG, iface), arb >> 16);
+	priv->write_reg32(priv, C_CAN_IFACE(ARB1_REG, iface), arb);
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 
@@ -391,8 +389,7 @@
 
 	frame->can_dlc = get_can_dlc(ctrl & 0x0F);
 
-	arb = priv->read_reg(priv, C_CAN_IFACE(ARB1_REG, iface));
-	arb |= priv->read_reg(priv, C_CAN_IFACE(ARB2_REG, iface)) << 16;
+	arb = priv->read_reg32(priv, C_CAN_IFACE(ARB1_REG, iface));
 
 	if (arb & IF_ARB_MSGXTD)
 		frame->can_id = (arb & CAN_EFF_MASK) | CAN_EFF_FLAG;
@@ -424,12 +421,10 @@
 	struct c_can_priv *priv = netdev_priv(dev);
 
 	mask |= BIT(29);
-	priv->write_reg(priv, C_CAN_IFACE(MASK1_REG, iface), mask);
-	priv->write_reg(priv, C_CAN_IFACE(MASK2_REG, iface), mask >> 16);
+	priv->write_reg32(priv, C_CAN_IFACE(MASK1_REG, iface), mask);
 
 	id |= IF_ARB_MSGVAL;
-	priv->write_reg(priv, C_CAN_IFACE(ARB1_REG, iface), id);
-	priv->write_reg(priv, C_CAN_IFACE(ARB2_REG, iface), id >> 16);
+	priv->write_reg32(priv, C_CAN_IFACE(ARB1_REG, iface), id);
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), mcont);
 	c_can_object_put(dev, iface, obj, IF_COMM_RCV_SETUP);

diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index c56f1b1..99ad1aa 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h

@@ -78,6 +78,7 @@
 	C_CAN_INTPND2_REG,
 	C_CAN_MSGVAL1_REG,
 	C_CAN_MSGVAL2_REG,
+	C_CAN_FUNCTION_REG,
 };
 
 static const u16 reg_map_c_can[] = {
@@ -129,6 +130,7 @@
 	[C_CAN_BRPEXT_REG]	= 0x0E,
 	[C_CAN_INT_REG]		= 0x10,
 	[C_CAN_TEST_REG]	= 0x14,
+	[C_CAN_FUNCTION_REG]	= 0x18,
 	[C_CAN_TXRQST1_REG]	= 0x88,
 	[C_CAN_TXRQST2_REG]	= 0x8A,
 	[C_CAN_NEWDAT1_REG]	= 0x9C,
@@ -176,8 +178,10 @@
 	atomic_t tx_active;
 	unsigned long tx_dir;
 	int last_status;
-	u16 (*read_reg) (struct c_can_priv *priv, enum reg index);
-	void (*write_reg) (struct c_can_priv *priv, enum reg index, u16 val);
+	u16 (*read_reg) (const struct c_can_priv *priv, enum reg index);
+	void (*write_reg) (const struct c_can_priv *priv, enum reg index, u16 val);
+	u32 (*read_reg32) (const struct c_can_priv *priv, enum reg index);
+	void (*write_reg32) (const struct c_can_priv *priv, enum reg index, u32 val);
 	void __iomem *base;
 	const u16 *regs;
 	void *priv;		/* for board-specific data */

diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index fe5f630..5d11e0e 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c

@@ -19,9 +19,13 @@
 
 #include "c_can.h"
 
+#define PCI_DEVICE_ID_PCH_CAN	0x8818
+#define PCH_PCI_SOFT_RESET	0x01fc
+
 enum c_can_pci_reg_align {
 	C_CAN_REG_ALIGN_16,
 	C_CAN_REG_ALIGN_32,
+	C_CAN_REG_32,
 };
 
 struct c_can_pci_data {
@@ -31,6 +35,10 @@
 	enum c_can_pci_reg_align reg_align;
 	/* Set the frequency */
 	unsigned int freq;
+	/* PCI bar number */
+	int bar;
+	/* Callback for reset */
+	void (*init)(const struct c_can_priv *priv, bool enable);
 };
 
 /*
@@ -39,30 +47,70 @@
  * registers can be aligned to a 16-bit boundary or 32-bit boundary etc.
  * Handle the same by providing a common read/write interface.
  */
-static u16 c_can_pci_read_reg_aligned_to_16bit(struct c_can_priv *priv,
+static u16 c_can_pci_read_reg_aligned_to_16bit(const struct c_can_priv *priv,
 						enum reg index)
 {
 	return readw(priv->base + priv->regs[index]);
 }
 
-static void c_can_pci_write_reg_aligned_to_16bit(struct c_can_priv *priv,
+static void c_can_pci_write_reg_aligned_to_16bit(const struct c_can_priv *priv,
 						enum reg index, u16 val)
 {
 	writew(val, priv->base + priv->regs[index]);
 }
 
-static u16 c_can_pci_read_reg_aligned_to_32bit(struct c_can_priv *priv,
+static u16 c_can_pci_read_reg_aligned_to_32bit(const struct c_can_priv *priv,
 						enum reg index)
 {
 	return readw(priv->base + 2 * priv->regs[index]);
 }
 
-static void c_can_pci_write_reg_aligned_to_32bit(struct c_can_priv *priv,
+static void c_can_pci_write_reg_aligned_to_32bit(const struct c_can_priv *priv,
 						enum reg index, u16 val)
 {
 	writew(val, priv->base + 2 * priv->regs[index]);
 }
 
+static u16 c_can_pci_read_reg_32bit(const struct c_can_priv *priv,
+				    enum reg index)
+{
+	return (u16)ioread32(priv->base + 2 * priv->regs[index]);
+}
+
+static void c_can_pci_write_reg_32bit(const struct c_can_priv *priv,
+				      enum reg index, u16 val)
+{
+	iowrite32((u32)val, priv->base + 2 * priv->regs[index]);
+}
+
+static u32 c_can_pci_read_reg32(const struct c_can_priv *priv, enum reg index)
+{
+	u32 val;
+
+	val = priv->read_reg(priv, index);
+	val |= ((u32) priv->read_reg(priv, index + 1)) << 16;
+
+	return val;
+}
+
+static void c_can_pci_write_reg32(const struct c_can_priv *priv, enum reg index,
+		u32 val)
+{
+	priv->write_reg(priv, index + 1, val >> 16);
+	priv->write_reg(priv, index, val);
+}
+
+static void c_can_pci_reset_pch(const struct c_can_priv *priv, bool enable)
+{
+	if (enable) {
+		u32 __iomem *addr = priv->base + PCH_PCI_SOFT_RESET;
+
+		/* write to sw reset register */
+		iowrite32(1, addr);
+		iowrite32(0, addr);
+	}
+}
+
 static int c_can_pci_probe(struct pci_dev *pdev,
 			   const struct pci_device_id *ent)
 {
@@ -90,7 +138,8 @@
 		pci_set_master(pdev);
 	}
 
-	addr = pci_iomap(pdev, 0, pci_resource_len(pdev, 0));
+	addr = pci_iomap(pdev, c_can_pci_data->bar,
+			 pci_resource_len(pdev, c_can_pci_data->bar));
 	if (!addr) {
 		dev_err(&pdev->dev,
 			"device has no PCI memory resources, "
@@ -147,10 +196,18 @@
 		priv->read_reg = c_can_pci_read_reg_aligned_to_16bit;
 		priv->write_reg = c_can_pci_write_reg_aligned_to_16bit;
 		break;
+	case C_CAN_REG_32:
+		priv->read_reg = c_can_pci_read_reg_32bit;
+		priv->write_reg = c_can_pci_write_reg_32bit;
+		break;
 	default:
 		ret = -EINVAL;
 		goto out_free_c_can;
 	}
+	priv->read_reg32 = c_can_pci_read_reg32;
+	priv->write_reg32 = c_can_pci_write_reg32;
+
+	priv->raminit = c_can_pci_data->init;
 
 	ret = register_c_can_dev(dev);
 	if (ret) {
@@ -198,6 +255,15 @@
 	.type = BOSCH_C_CAN,
 	.reg_align = C_CAN_REG_ALIGN_32,
 	.freq = 52000000, /* 52 Mhz */
+	.bar = 0,
+};
+
+static struct c_can_pci_data c_can_pch = {
+	.type = BOSCH_C_CAN,
+	.reg_align = C_CAN_REG_32,
+	.freq = 50000000, /* 50 MHz */
+	.init = c_can_pci_reset_pch,
+	.bar = 1,
 };
 
 #define C_CAN_ID(_vend, _dev, _driverdata) {		\
@@ -207,6 +273,8 @@
 static DEFINE_PCI_DEVICE_TABLE(c_can_pci_tbl) = {
 	C_CAN_ID(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_CAN,
 		 c_can_sta2x11),
+	C_CAN_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PCH_CAN,
+		 c_can_pch),
 	{},
 };
 static struct pci_driver c_can_pci_driver = {

diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 1df0b32..824108c 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c

@@ -40,6 +40,7 @@
 #define CAN_RAMINIT_START_MASK(i)	(0x001 << (i))
 #define CAN_RAMINIT_DONE_MASK(i)	(0x100 << (i))
 #define CAN_RAMINIT_ALL_MASK(i)		(0x101 << (i))
+#define DCAN_RAM_INIT_BIT		(1 << 3)
 static DEFINE_SPINLOCK(raminit_lock);
 /*
  * 16-bit c_can registers can be arranged differently in the memory
@@ -47,31 +48,31 @@
  * registers can be aligned to a 16-bit boundary or 32-bit boundary etc.
  * Handle the same by providing a common read/write interface.
  */
-static u16 c_can_plat_read_reg_aligned_to_16bit(struct c_can_priv *priv,
+static u16 c_can_plat_read_reg_aligned_to_16bit(const struct c_can_priv *priv,
 						enum reg index)
 {
 	return readw(priv->base + priv->regs[index]);
 }
 
-static void c_can_plat_write_reg_aligned_to_16bit(struct c_can_priv *priv,
+static void c_can_plat_write_reg_aligned_to_16bit(const struct c_can_priv *priv,
 						enum reg index, u16 val)
 {
 	writew(val, priv->base + priv->regs[index]);
 }
 
-static u16 c_can_plat_read_reg_aligned_to_32bit(struct c_can_priv *priv,
+static u16 c_can_plat_read_reg_aligned_to_32bit(const struct c_can_priv *priv,
 						enum reg index)
 {
 	return readw(priv->base + 2 * priv->regs[index]);
 }
 
-static void c_can_plat_write_reg_aligned_to_32bit(struct c_can_priv *priv,
+static void c_can_plat_write_reg_aligned_to_32bit(const struct c_can_priv *priv,
 						enum reg index, u16 val)
 {
 	writew(val, priv->base + 2 * priv->regs[index]);
 }
 
-static void c_can_hw_raminit_wait(const struct c_can_priv *priv, u32 mask,
+static void c_can_hw_raminit_wait_ti(const struct c_can_priv *priv, u32 mask,
 				  u32 val)
 {
 	/* We look only at the bits of our instance. */
@@ -80,7 +81,7 @@
 		udelay(1);
 }
 
-static void c_can_hw_raminit(const struct c_can_priv *priv, bool enable)
+static void c_can_hw_raminit_ti(const struct c_can_priv *priv, bool enable)
 {
 	u32 mask = CAN_RAMINIT_ALL_MASK(priv->instance);
 	u32 ctrl;
@@ -96,18 +97,68 @@
 	ctrl |= CAN_RAMINIT_DONE_MASK(priv->instance);
 	writel(ctrl, priv->raminit_ctrlreg);
 	ctrl &= ~CAN_RAMINIT_DONE_MASK(priv->instance);
-	c_can_hw_raminit_wait(priv, ctrl, mask);
+	c_can_hw_raminit_wait_ti(priv, ctrl, mask);
 
 	if (enable) {
 		/* Set start bit and wait for the done bit. */
 		ctrl |= CAN_RAMINIT_START_MASK(priv->instance);
 		writel(ctrl, priv->raminit_ctrlreg);
 		ctrl |= CAN_RAMINIT_DONE_MASK(priv->instance);
-		c_can_hw_raminit_wait(priv, ctrl, mask);
+		c_can_hw_raminit_wait_ti(priv, ctrl, mask);
 	}
 	spin_unlock(&raminit_lock);
 }
 
+static u32 c_can_plat_read_reg32(const struct c_can_priv *priv, enum reg index)
+{
+	u32 val;
+
+	val = priv->read_reg(priv, index);
+	val |= ((u32) priv->read_reg(priv, index + 1)) << 16;
+
+	return val;
+}
+
+static void c_can_plat_write_reg32(const struct c_can_priv *priv, enum reg index,
+		u32 val)
+{
+	priv->write_reg(priv, index + 1, val >> 16);
+	priv->write_reg(priv, index, val);
+}
+
+static u32 d_can_plat_read_reg32(const struct c_can_priv *priv, enum reg index)
+{
+	return readl(priv->base + priv->regs[index]);
+}
+
+static void d_can_plat_write_reg32(const struct c_can_priv *priv, enum reg index,
+		u32 val)
+{
+	writel(val, priv->base + priv->regs[index]);
+}
+
+static void c_can_hw_raminit_wait(const struct c_can_priv *priv, u32 mask)
+{
+	while (priv->read_reg32(priv, C_CAN_FUNCTION_REG) & mask)
+		udelay(1);
+}
+
+static void c_can_hw_raminit(const struct c_can_priv *priv, bool enable)
+{
+	u32 ctrl;
+
+	ctrl = priv->read_reg32(priv, C_CAN_FUNCTION_REG);
+	ctrl &= ~DCAN_RAM_INIT_BIT;
+	priv->write_reg32(priv, C_CAN_FUNCTION_REG, ctrl);
+	c_can_hw_raminit_wait(priv, ctrl);
+
+	if (enable) {
+		ctrl |= DCAN_RAM_INIT_BIT;
+		priv->write_reg32(priv, C_CAN_FUNCTION_REG, ctrl);
+		c_can_hw_raminit_wait(priv, ctrl);
+	}
+}
+
 static struct platform_device_id c_can_id_table[] = {
 	[BOSCH_C_CAN_PLATFORM] = {
 		.name = KBUILD_MODNAME,
@@ -201,11 +252,15 @@
 		case IORESOURCE_MEM_32BIT:
 			priv->read_reg = c_can_plat_read_reg_aligned_to_32bit;
 			priv->write_reg = c_can_plat_write_reg_aligned_to_32bit;
+			priv->read_reg32 = c_can_plat_read_reg32;
+			priv->write_reg32 = c_can_plat_write_reg32;
 			break;
 		case IORESOURCE_MEM_16BIT:
 		default:
 			priv->read_reg = c_can_plat_read_reg_aligned_to_16bit;
 			priv->write_reg = c_can_plat_write_reg_aligned_to_16bit;
+			priv->read_reg32 = c_can_plat_read_reg32;
+			priv->write_reg32 = c_can_plat_write_reg32;
 			break;
 		}
 		break;
@@ -214,6 +269,8 @@
 		priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
 		priv->read_reg = c_can_plat_read_reg_aligned_to_16bit;
 		priv->write_reg = c_can_plat_write_reg_aligned_to_16bit;
+		priv->read_reg32 = d_can_plat_read_reg32;
+		priv->write_reg32 = d_can_plat_write_reg32;
 
 		if (pdev->dev.of_node)
 			priv->instance = of_alias_get_id(pdev->dev.of_node, "d_can");
@@ -221,11 +278,20 @@
 			priv->instance = pdev->id;
 
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		/* Not all D_CAN modules have a separate register for the D_CAN
+		 * RAM initialization. Use default RAM init bit in D_CAN module
+		 * if not specified in DT.
+		 */
+		if (!res) {
+			priv->raminit = c_can_hw_raminit;
+			break;
+		}
+
 		priv->raminit_ctrlreg = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(priv->raminit_ctrlreg) || priv->instance < 0)
 			dev_info(&pdev->dev, "control memory is not used for raminit\n");
 		else
-			priv->raminit = c_can_hw_raminit;
+			priv->raminit = c_can_hw_raminit_ti;
 		break;
 	default:
 		ret = -EINVAL;

diff --git a/drivers/net/can/mscan/Kconfig b/drivers/net/can/mscan/Kconfig
index f19be52..81c7117 100644
--- a/drivers/net/can/mscan/Kconfig
+++ b/drivers/net/can/mscan/Kconfig

@@ -1,5 +1,5 @@
 config CAN_MSCAN
-	depends on PPC || M68K
+	depends on PPC
 	tristate "Support for Freescale MSCAN based chips"
 	---help---
 	  The Motorola Scalable Controller Area Network (MSCAN) definition

diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
new file mode 100644
index 0000000..5268d21
--- /dev/null
+++ b/drivers/net/can/rcar_can.c

@@ -0,0 +1,876 @@
+/* Renesas R-Car CAN device driver
+ *
+ * Copyright (C) 2013 Cogent Embedded, Inc. <source@cogentembedded.com>
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/can/led.h>
+#include <linux/can/dev.h>
+#include <linux/clk.h>
+#include <linux/can/platform/rcar_can.h>
+
+#define RCAR_CAN_DRV_NAME	"rcar_can"
+
+/* Mailbox configuration:
+ * mailbox 60 - 63 - Rx FIFO mailboxes
+ * mailbox 56 - 59 - Tx FIFO mailboxes
+ * non-FIFO mailboxes are not used
+ */
+#define RCAR_CAN_N_MBX		64 /* Number of mailboxes in non-FIFO mode */
+#define RCAR_CAN_RX_FIFO_MBX	60 /* Mailbox - window to Rx FIFO */
+#define RCAR_CAN_TX_FIFO_MBX	56 /* Mailbox - window to Tx FIFO */
+#define RCAR_CAN_FIFO_DEPTH	4
+
+/* Mailbox registers structure */
+struct rcar_can_mbox_regs {
+	u32 id;		/* IDE and RTR bits, SID and EID */
+	u8 stub;	/* Not used */
+	u8 dlc;		/* Data Length Code - bits [0..3] */
+	u8 data[8];	/* Data Bytes */
+	u8 tsh;		/* Time Stamp Higher Byte */
+	u8 tsl;		/* Time Stamp Lower Byte */
+};
+
+struct rcar_can_regs {
+	struct rcar_can_mbox_regs mb[RCAR_CAN_N_MBX]; /* Mailbox registers */
+	u32 mkr_2_9[8];	/* Mask Registers 2-9 */
+	u32 fidcr[2];	/* FIFO Received ID Compare Register */
+	u32 mkivlr1;	/* Mask Invalid Register 1 */
+	u32 mier1;	/* Mailbox Interrupt Enable Register 1 */
+	u32 mkr_0_1[2];	/* Mask Registers 0-1 */
+	u32 mkivlr0;    /* Mask Invalid Register 0*/
+	u32 mier0;      /* Mailbox Interrupt Enable Register 0 */
+	u8 pad_440[0x3c0];
+	u8 mctl[64];	/* Message Control Registers */
+	u16 ctlr;	/* Control Register */
+	u16 str;	/* Status register */
+	u8 bcr[3];	/* Bit Configuration Register */
+	u8 clkr;	/* Clock Select Register */
+	u8 rfcr;	/* Receive FIFO Control Register */
+	u8 rfpcr;	/* Receive FIFO Pointer Control Register */
+	u8 tfcr;	/* Transmit FIFO Control Register */
+	u8 tfpcr;       /* Transmit FIFO Pointer Control Register */
+	u8 eier;	/* Error Interrupt Enable Register */
+	u8 eifr;	/* Error Interrupt Factor Judge Register */
+	u8 recr;	/* Receive Error Count Register */
+	u8 tecr;        /* Transmit Error Count Register */
+	u8 ecsr;	/* Error Code Store Register */
+	u8 cssr;	/* Channel Search Support Register */
+	u8 mssr;	/* Mailbox Search Status Register */
+	u8 msmr;	/* Mailbox Search Mode Register */
+	u16 tsr;	/* Time Stamp Register */
+	u8 afsr;	/* Acceptance Filter Support Register */
+	u8 pad_857;
+	u8 tcr;		/* Test Control Register */
+	u8 pad_859[7];
+	u8 ier;		/* Interrupt Enable Register */
+	u8 isr;		/* Interrupt Status Register */
+	u8 pad_862;
+	u8 mbsmr;	/* Mailbox Search Mask Register */
+};
+
+struct rcar_can_priv {
+	struct can_priv can;	/* Must be the first member! */
+	struct net_device *ndev;
+	struct napi_struct napi;
+	struct rcar_can_regs __iomem *regs;
+	struct clk *clk;
+	u8 tx_dlc[RCAR_CAN_FIFO_DEPTH];
+	u32 tx_head;
+	u32 tx_tail;
+	u8 clock_select;
+	u8 ier;
+};
+
+static const struct can_bittiming_const rcar_can_bittiming_const = {
+	.name = RCAR_CAN_DRV_NAME,
+	.tseg1_min = 4,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+/* Control Register bits */
+#define RCAR_CAN_CTLR_BOM	(3 << 11) /* Bus-Off Recovery Mode Bits */
+#define RCAR_CAN_CTLR_BOM_ENT	(1 << 11) /* Entry to halt mode */
+					/* at bus-off entry */
+#define RCAR_CAN_CTLR_SLPM	(1 << 10)
+#define RCAR_CAN_CTLR_CANM	(3 << 8) /* Operating Mode Select Bit */
+#define RCAR_CAN_CTLR_CANM_HALT	(1 << 9)
+#define RCAR_CAN_CTLR_CANM_RESET (1 << 8)
+#define RCAR_CAN_CTLR_CANM_FORCE_RESET (3 << 8)
+#define RCAR_CAN_CTLR_MLM	(1 << 3) /* Message Lost Mode Select */
+#define RCAR_CAN_CTLR_IDFM	(3 << 1) /* ID Format Mode Select Bits */
+#define RCAR_CAN_CTLR_IDFM_MIXED (1 << 2) /* Mixed ID mode */
+#define RCAR_CAN_CTLR_MBM	(1 << 0) /* Mailbox Mode select */
+
+/* Status Register bits */
+#define RCAR_CAN_STR_RSTST	(1 << 8) /* Reset Status Bit */
+
+/* FIFO Received ID Compare Registers 0 and 1 bits */
+#define RCAR_CAN_FIDCR_IDE	(1 << 31) /* ID Extension Bit */
+#define RCAR_CAN_FIDCR_RTR	(1 << 30) /* Remote Transmission Request Bit */
+
+/* Receive FIFO Control Register bits */
+#define RCAR_CAN_RFCR_RFEST	(1 << 7) /* Receive FIFO Empty Status Flag */
+#define RCAR_CAN_RFCR_RFE	(1 << 0) /* Receive FIFO Enable */
+
+/* Transmit FIFO Control Register bits */
+#define RCAR_CAN_TFCR_TFUST	(7 << 1) /* Transmit FIFO Unsent Message */
+					/* Number Status Bits */
+#define RCAR_CAN_TFCR_TFUST_SHIFT 1	/* Offset of Transmit FIFO Unsent */
+					/* Message Number Status Bits */
+#define RCAR_CAN_TFCR_TFE	(1 << 0) /* Transmit FIFO Enable */
+
+#define RCAR_CAN_N_RX_MKREGS1	2	/* Number of mask registers */
+					/* for Rx mailboxes 0-31 */
+#define RCAR_CAN_N_RX_MKREGS2	8
+
+/* Bit Configuration Register settings */
+#define RCAR_CAN_BCR_TSEG1(x)	(((x) & 0x0f) << 20)
+#define RCAR_CAN_BCR_BPR(x)	(((x) & 0x3ff) << 8)
+#define RCAR_CAN_BCR_SJW(x)	(((x) & 0x3) << 4)
+#define RCAR_CAN_BCR_TSEG2(x)	((x) & 0x07)
+
+/* Mailbox and Mask Registers bits */
+#define RCAR_CAN_IDE		(1 << 31)
+#define RCAR_CAN_RTR		(1 << 30)
+#define RCAR_CAN_SID_SHIFT	18
+
+/* Mailbox Interrupt Enable Register 1 bits */
+#define RCAR_CAN_MIER1_RXFIE	(1 << 28) /* Receive  FIFO Interrupt Enable */
+#define RCAR_CAN_MIER1_TXFIE	(1 << 24) /* Transmit FIFO Interrupt Enable */
+
+/* Interrupt Enable Register bits */
+#define RCAR_CAN_IER_ERSIE	(1 << 5) /* Error (ERS) Interrupt Enable Bit */
+#define RCAR_CAN_IER_RXFIE	(1 << 4) /* Reception FIFO Interrupt */
+					/* Enable Bit */
+#define RCAR_CAN_IER_TXFIE	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Enable Bit */
+/* Interrupt Status Register bits */
+#define RCAR_CAN_ISR_ERSF	(1 << 5) /* Error (ERS) Interrupt Status Bit */
+#define RCAR_CAN_ISR_RXFF	(1 << 4) /* Reception FIFO Interrupt */
+					/* Status Bit */
+#define RCAR_CAN_ISR_TXFF	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Status Bit */
+
+/* Error Interrupt Enable Register bits */
+#define RCAR_CAN_EIER_BLIE	(1 << 7) /* Bus Lock Interrupt Enable */
+#define RCAR_CAN_EIER_OLIE	(1 << 6) /* Overload Frame Transmit */
+					/* Interrupt Enable */
+#define RCAR_CAN_EIER_ORIE	(1 << 5) /* Receive Overrun  Interrupt Enable */
+#define RCAR_CAN_EIER_BORIE	(1 << 4) /* Bus-Off Recovery Interrupt Enable */
+#define RCAR_CAN_EIER_BOEIE	(1 << 3) /* Bus-Off Entry Interrupt Enable */
+#define RCAR_CAN_EIER_EPIE	(1 << 2) /* Error Passive Interrupt Enable */
+#define RCAR_CAN_EIER_EWIE	(1 << 1) /* Error Warning Interrupt Enable */
+#define RCAR_CAN_EIER_BEIE	(1 << 0) /* Bus Error Interrupt Enable */
+
+/* Error Interrupt Factor Judge Register bits */
+#define RCAR_CAN_EIFR_BLIF	(1 << 7) /* Bus Lock Detect Flag */
+#define RCAR_CAN_EIFR_OLIF	(1 << 6) /* Overload Frame Transmission */
+					 /* Detect Flag */
+#define RCAR_CAN_EIFR_ORIF	(1 << 5) /* Receive Overrun Detect Flag */
+#define RCAR_CAN_EIFR_BORIF	(1 << 4) /* Bus-Off Recovery Detect Flag */
+#define RCAR_CAN_EIFR_BOEIF	(1 << 3) /* Bus-Off Entry Detect Flag */
+#define RCAR_CAN_EIFR_EPIF	(1 << 2) /* Error Passive Detect Flag */
+#define RCAR_CAN_EIFR_EWIF	(1 << 1) /* Error Warning Detect Flag */
+#define RCAR_CAN_EIFR_BEIF	(1 << 0) /* Bus Error Detect Flag */
+
+/* Error Code Store Register bits */
+#define RCAR_CAN_ECSR_EDPM	(1 << 7) /* Error Display Mode Select Bit */
+#define RCAR_CAN_ECSR_ADEF	(1 << 6) /* ACK Delimiter Error Flag */
+#define RCAR_CAN_ECSR_BE0F	(1 << 5) /* Bit Error (dominant) Flag */
+#define RCAR_CAN_ECSR_BE1F	(1 << 4) /* Bit Error (recessive) Flag */
+#define RCAR_CAN_ECSR_CEF	(1 << 3) /* CRC Error Flag */
+#define RCAR_CAN_ECSR_AEF	(1 << 2) /* ACK Error Flag */
+#define RCAR_CAN_ECSR_FEF	(1 << 1) /* Form Error Flag */
+#define RCAR_CAN_ECSR_SEF	(1 << 0) /* Stuff Error Flag */
+
+#define RCAR_CAN_NAPI_WEIGHT	4
+#define MAX_STR_READS		0x100
+
+static void tx_failure_cleanup(struct net_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++)
+		can_free_echo_skb(ndev, i);
+}
+
+static void rcar_can_error(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 eifr, txerr = 0, rxerr = 0;
+
+	/* Propagate the error condition to the CAN stack */
+	skb = alloc_can_err_skb(ndev, &cf);
+
+	eifr = readb(&priv->regs->eifr);
+	if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) {
+		txerr = readb(&priv->regs->tecr);
+		rxerr = readb(&priv->regs->recr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[6] = txerr;
+			cf->data[7] = rxerr;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_BEIF) {
+		int rx_errors = 0, tx_errors = 0;
+		u8 ecsr;
+
+		netdev_dbg(priv->ndev, "Bus error interrupt:\n");
+		if (skb) {
+			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+			cf->data[2] = CAN_ERR_PROT_UNSPEC;
+		}
+		ecsr = readb(&priv->regs->ecsr);
+		if (ecsr & RCAR_CAN_ECSR_ADEF) {
+			netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE0F) {
+			netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE0F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT0;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE1F) {
+			netdev_dbg(priv->ndev, "Bit Error (recessive)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE1F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT1;
+		}
+		if (ecsr & RCAR_CAN_ECSR_CEF) {
+			netdev_dbg(priv->ndev, "CRC Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		}
+		if (ecsr & RCAR_CAN_ECSR_AEF) {
+			netdev_dbg(priv->ndev, "ACK Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
+			if (skb) {
+				cf->can_id |= CAN_ERR_ACK;
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+			}
+		}
+		if (ecsr & RCAR_CAN_ECSR_FEF) {
+			netdev_dbg(priv->ndev, "Form Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_FEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_FORM;
+		}
+		if (ecsr & RCAR_CAN_ECSR_SEF) {
+			netdev_dbg(priv->ndev, "Stuff Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_SEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_STUFF;
+		}
+
+		priv->can.can_stats.bus_error++;
+		ndev->stats.rx_errors += rx_errors;
+		ndev->stats.tx_errors += tx_errors;
+		writeb(~RCAR_CAN_EIFR_BEIF, &priv->regs->eifr);
+	}
+	if (eifr & RCAR_CAN_EIFR_EWIF) {
+		netdev_dbg(priv->ndev, "Error warning interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		priv->can.can_stats.error_warning++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EWIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING :
+					      CAN_ERR_CRTL_RX_WARNING;
+	}
+	if (eifr & RCAR_CAN_EIFR_EPIF) {
+		netdev_dbg(priv->ndev, "Error passive interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		priv->can.can_stats.error_passive++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EPIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE :
+					      CAN_ERR_CRTL_RX_PASSIVE;
+	}
+	if (eifr & RCAR_CAN_EIFR_BOEIF) {
+		netdev_dbg(priv->ndev, "Bus-off entry interrupt\n");
+		tx_failure_cleanup(ndev);
+		priv->ier = RCAR_CAN_IER_ERSIE;
+		writeb(priv->ier, &priv->regs->ier);
+		priv->can.state = CAN_STATE_BUS_OFF;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_BOEIF, &priv->regs->eifr);
+		can_bus_off(ndev);
+		if (skb)
+			cf->can_id |= CAN_ERR_BUSOFF;
+	}
+	if (eifr & RCAR_CAN_EIFR_ORIF) {
+		netdev_dbg(priv->ndev, "Receive overrun error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_ORIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_OLIF) {
+		netdev_dbg(priv->ndev,
+			   "Overload Frame Transmission error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_OLIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_PROT;
+			cf->data[2] |= CAN_ERR_PROT_OVERLOAD;
+		}
+	}
+
+	if (skb) {
+		stats->rx_packets++;
+		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
+	}
+}
+
+static void rcar_can_tx_done(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u8 isr;
+
+	while (1) {
+		u8 unsent = readb(&priv->regs->tfcr);
+
+		unsent = (unsent & RCAR_CAN_TFCR_TFUST) >>
+			  RCAR_CAN_TFCR_TFUST_SHIFT;
+		if (priv->tx_head - priv->tx_tail <= unsent)
+			break;
+		stats->tx_packets++;
+		stats->tx_bytes += priv->tx_dlc[priv->tx_tail %
+						RCAR_CAN_FIFO_DEPTH];
+		priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0;
+		can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH);
+		priv->tx_tail++;
+		netif_wake_queue(ndev);
+	}
+	/* Clear interrupt */
+	isr = readb(&priv->regs->isr);
+	writeb(isr & ~RCAR_CAN_ISR_TXFF, &priv->regs->isr);
+	can_led_event(ndev, CAN_LED_EVENT_TX);
+}
+
+static irqreturn_t rcar_can_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u8 isr;
+
+	isr = readb(&priv->regs->isr);
+	if (!(isr & priv->ier))
+		return IRQ_NONE;
+
+	if (isr & RCAR_CAN_ISR_ERSF)
+		rcar_can_error(ndev);
+
+	if (isr & RCAR_CAN_ISR_TXFF)
+		rcar_can_tx_done(ndev);
+
+	if (isr & RCAR_CAN_ISR_RXFF) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* Disable Rx FIFO interrupts */
+			priv->ier &= ~RCAR_CAN_IER_RXFIE;
+			writeb(priv->ier, &priv->regs->ier);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void rcar_can_set_bittiming(struct net_device *dev)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u32 bcr;
+
+	bcr = RCAR_CAN_BCR_TSEG1(bt->phase_seg1 + bt->prop_seg - 1) |
+	      RCAR_CAN_BCR_BPR(bt->brp - 1) | RCAR_CAN_BCR_SJW(bt->sjw - 1) |
+	      RCAR_CAN_BCR_TSEG2(bt->phase_seg2 - 1);
+	/* Don't overwrite CLKR with 32-bit BCR access; CLKR has 8-bit access.
+	 * All the registers are big-endian but they get byte-swapped on 32-bit
+	 * read/write (but not on 8-bit, contrary to the manuals)...
+	 */
+	writel((bcr << 8) | priv->clock_select, &priv->regs->bcr);
+}
+
+static void rcar_can_start(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Set controller to known mode:
+	 * - FIFO mailbox mode
+	 * - accept all messages
+	 * - overrun mode
+	 * CAN is in sleep mode after MCU hardware or software reset.
+	 */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	/* Go to reset mode */
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	rcar_can_set_bittiming(ndev);
+	ctlr |= RCAR_CAN_CTLR_IDFM_MIXED; /* Select mixed ID mode */
+	ctlr |= RCAR_CAN_CTLR_BOM_ENT;	/* Entry to halt mode automatically */
+					/* at bus-off */
+	ctlr |= RCAR_CAN_CTLR_MBM;	/* Select FIFO mailbox mode */
+	ctlr |= RCAR_CAN_CTLR_MLM;	/* Overrun mode */
+	writew(ctlr, &priv->regs->ctlr);
+
+	/* Accept all SID and EID */
+	writel(0, &priv->regs->mkr_2_9[6]);
+	writel(0, &priv->regs->mkr_2_9[7]);
+	/* In FIFO mailbox mode, write "0" to bits 24 to 31 */
+	writel(0, &priv->regs->mkivlr1);
+	/* Accept all frames */
+	writel(0, &priv->regs->fidcr[0]);
+	writel(RCAR_CAN_FIDCR_IDE | RCAR_CAN_FIDCR_RTR, &priv->regs->fidcr[1]);
+	/* Enable and configure FIFO mailbox interrupts */
+	writel(RCAR_CAN_MIER1_RXFIE | RCAR_CAN_MIER1_TXFIE, &priv->regs->mier1);
+
+	priv->ier = RCAR_CAN_IER_ERSIE | RCAR_CAN_IER_RXFIE |
+		    RCAR_CAN_IER_TXFIE;
+	writeb(priv->ier, &priv->regs->ier);
+
+	/* Accumulate error codes */
+	writeb(RCAR_CAN_ECSR_EDPM, &priv->regs->ecsr);
+	/* Enable error interrupts */
+	writeb(RCAR_CAN_EIER_EWIE | RCAR_CAN_EIER_EPIE | RCAR_CAN_EIER_BOEIE |
+	       (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING ?
+	       RCAR_CAN_EIER_BEIE : 0) | RCAR_CAN_EIER_ORIE |
+	       RCAR_CAN_EIER_OLIE, &priv->regs->eier);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	/* Go to operation mode */
+	writew(ctlr & ~RCAR_CAN_CTLR_CANM, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST))
+			break;
+	}
+	/* Enable Rx and Tx FIFO */
+	writeb(RCAR_CAN_RFCR_RFE, &priv->regs->rfcr);
+	writeb(RCAR_CAN_TFCR_TFE, &priv->regs->tfcr);
+}
+
+static int rcar_can_open(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_prepare_enable() failed, error %d\n",
+			   err);
+		goto out;
+	}
+	err = open_candev(ndev);
+	if (err) {
+		netdev_err(ndev, "open_candev() failed, error %d\n", err);
+		goto out_clock;
+	}
+	napi_enable(&priv->napi);
+	err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev);
+	if (err) {
+		netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq);
+		goto out_close;
+	}
+	can_led_event(ndev, CAN_LED_EVENT_OPEN);
+	rcar_can_start(ndev);
+	netif_start_queue(ndev);
+	return 0;
+out_close:
+	napi_disable(&priv->napi);
+	close_candev(ndev);
+out_clock:
+	clk_disable_unprepare(priv->clk);
+out:
+	return err;
+}
+
+static void rcar_can_stop(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Go to (force) reset mode */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	writel(0, &priv->regs->mier0);
+	writel(0, &priv->regs->mier1);
+	writeb(0, &priv->regs->ier);
+	writeb(0, &priv->regs->eier);
+	/* Go to sleep mode */
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int rcar_can_close(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	rcar_can_stop(ndev);
+	free_irq(ndev->irq, ndev);
+	napi_disable(&priv->napi);
+	clk_disable_unprepare(priv->clk);
+	close_candev(ndev);
+	can_led_event(ndev, CAN_LED_EVENT_STOP);
+	return 0;
+}
+
+static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb,
+				       struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 data, i;
+
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
+	if (cf->can_id & CAN_EFF_FLAG)	/* Extended frame format */
+		data = (cf->can_id & CAN_EFF_MASK) | RCAR_CAN_IDE;
+	else				/* Standard frame format */
+		data = (cf->can_id & CAN_SFF_MASK) << RCAR_CAN_SID_SHIFT;
+
+	if (cf->can_id & CAN_RTR_FLAG) { /* Remote transmission request */
+		data |= RCAR_CAN_RTR;
+	} else {
+		for (i = 0; i < cf->can_dlc; i++)
+			writeb(cf->data[i],
+			       &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].data[i]);
+	}
+
+	writel(data, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].id);
+
+	writeb(cf->can_dlc, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc);
+
+	priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->can_dlc;
+	can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH);
+	priv->tx_head++;
+	/* Start Tx: write 0xff to the TFPCR register to increment
+	 * the CPU-side pointer for the transmit FIFO to the next
+	 * mailbox location
+	 */
+	writeb(0xff, &priv->regs->tfpcr);
+	/* Stop the queue if we've filled all FIFO entries */
+	if (priv->tx_head - priv->tx_tail >= RCAR_CAN_FIFO_DEPTH)
+		netif_stop_queue(ndev);
+
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops rcar_can_netdev_ops = {
+	.ndo_open = rcar_can_open,
+	.ndo_stop = rcar_can_close,
+	.ndo_start_xmit = rcar_can_start_xmit,
+};
+
+static void rcar_can_rx_pkt(struct rcar_can_priv *priv)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 data;
+	u8 dlc;
+
+	skb = alloc_can_skb(priv->ndev, &cf);
+	if (!skb) {
+		stats->rx_dropped++;
+		return;
+	}
+
+	data = readl(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].id);
+	if (data & RCAR_CAN_IDE)
+		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (data >> RCAR_CAN_SID_SHIFT) & CAN_SFF_MASK;
+
+	dlc = readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].dlc);
+	cf->can_dlc = get_can_dlc(dlc);
+	if (data & RCAR_CAN_RTR) {
+		cf->can_id |= CAN_RTR_FLAG;
+	} else {
+		for (dlc = 0; dlc < cf->can_dlc; dlc++)
+			cf->data[dlc] =
+			readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].data[dlc]);
+	}
+
+	can_led_event(priv->ndev, CAN_LED_EVENT_RX);
+
+	stats->rx_bytes += cf->can_dlc;
+	stats->rx_packets++;
+	netif_receive_skb(skb);
+}
+
+static int rcar_can_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct rcar_can_priv *priv = container_of(napi,
+						  struct rcar_can_priv, napi);
+	int num_pkts;
+
+	for (num_pkts = 0; num_pkts < quota; num_pkts++) {
+		u8 rfcr, isr;
+
+		isr = readb(&priv->regs->isr);
+		/* Clear interrupt bit */
+		if (isr & RCAR_CAN_ISR_RXFF)
+			writeb(isr & ~RCAR_CAN_ISR_RXFF, &priv->regs->isr);
+		rfcr = readb(&priv->regs->rfcr);
+		if (rfcr & RCAR_CAN_RFCR_RFEST)
+			break;
+		rcar_can_rx_pkt(priv);
+		/* Write 0xff to the RFPCR register to increment
+		 * the CPU-side pointer for the receive FIFO
+		 * to the next mailbox location
+		 */
+		writeb(0xff, &priv->regs->rfpcr);
+	}
+	/* All packets processed */
+	if (num_pkts < quota) {
+		napi_complete(napi);
+		priv->ier |= RCAR_CAN_IER_RXFIE;
+		writeb(priv->ier, &priv->regs->ier);
+	}
+	return num_pkts;
+}
+
+static int rcar_can_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	switch (mode) {
+	case CAN_MODE_START:
+		rcar_can_start(ndev);
+		netif_wake_queue(ndev);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int rcar_can_get_berr_counter(const struct net_device *dev,
+				     struct can_berr_counter *bec)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		return err;
+	bec->txerr = readb(&priv->regs->tecr);
+	bec->rxerr = readb(&priv->regs->recr);
+	clk_disable_unprepare(priv->clk);
+	return 0;
+}
+
+static int rcar_can_probe(struct platform_device *pdev)
+{
+	struct rcar_can_platform_data *pdata;
+	struct rcar_can_priv *priv;
+	struct net_device *ndev;
+	struct resource *mem;
+	void __iomem *addr;
+	int err = -ENODEV;
+	int irq;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data provided!\n");
+		goto fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		goto fail;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(addr)) {
+		err = PTR_ERR(addr);
+		goto fail;
+	}
+
+	ndev = alloc_candev(sizeof(struct rcar_can_priv), RCAR_CAN_FIFO_DEPTH);
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev() failed\n");
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	priv = netdev_priv(ndev);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		err = PTR_ERR(priv->clk);
+		dev_err(&pdev->dev, "cannot get clock: %d\n", err);
+		goto fail_clk;
+	}
+
+	ndev->netdev_ops = &rcar_can_netdev_ops;
+	ndev->irq = irq;
+	ndev->flags |= IFF_ECHO;
+	priv->ndev = ndev;
+	priv->regs = addr;
+	priv->clock_select = pdata->clock_select;
+	priv->can.clock.freq = clk_get_rate(priv->clk);
+	priv->can.bittiming_const = &rcar_can_bittiming_const;
+	priv->can.do_set_mode = rcar_can_do_set_mode;
+	priv->can.do_get_berr_counter = rcar_can_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING;
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	netif_napi_add(ndev, &priv->napi, rcar_can_rx_poll,
+		       RCAR_CAN_NAPI_WEIGHT);
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed, error %d\n",
+			err);
+		goto fail_candev;
+	}
+
+	devm_can_led_init(ndev);
+
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n",
+		 priv->regs, ndev->irq);
+
+	return 0;
+fail_candev:
+	netif_napi_del(&priv->napi);
+fail_clk:
+	free_candev(ndev);
+fail:
+	return err;
+}
+
+static int rcar_can_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	unregister_candev(ndev);
+	netif_napi_del(&priv->napi);
+	free_candev(ndev);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+
+	if (netif_running(ndev)) {
+		netif_stop_queue(ndev);
+		netif_device_detach(ndev);
+	}
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_HALT;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_SLEEPING;
+
+	clk_disable(priv->clk);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int err;
+
+	err = clk_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_enable() failed, error %d\n", err);
+		return err;
+	}
+
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_CANM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	if (netif_running(ndev)) {
+		netif_device_attach(ndev);
+		netif_start_queue(ndev);
+	}
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_can_pm_ops, rcar_can_suspend, rcar_can_resume);
+
+static struct platform_driver rcar_can_driver = {
+	.driver = {
+		.name = RCAR_CAN_DRV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &rcar_can_pm_ops,
+	},
+	.probe = rcar_can_probe,
+	.remove = rcar_can_remove,
+};
+
+module_platform_driver(rcar_can_driver);
+
+MODULE_AUTHOR("Cogent Embedded, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CAN driver for Renesas R-Car SoC");
+MODULE_ALIAS("platform:" RCAR_CAN_DRV_NAME);

diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 7d8c8f3..bacd236 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c

@@ -556,15 +556,6 @@
 /*
  * netdev sysfs
  */
-static ssize_t show_channel(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct net_device *ndev = to_net_dev(dev);
-	struct softing_priv *priv = netdev2softing(ndev);
-
-	return sprintf(buf, "%i\n", priv->index);
-}
-
 static ssize_t show_chip(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
@@ -609,12 +600,10 @@
 	return count;
 }
 
-static const DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
 static const DEVICE_ATTR(chip, S_IRUGO, show_chip, NULL);
 static const DEVICE_ATTR(output, S_IRUGO | S_IWUSR, show_output, store_output);
 
 static const struct attribute *const netdev_sysfs_attrs[] = {
-	&dev_attr_channel.attr,
 	&dev_attr_chip.attr,
 	&dev_attr_output.attr,
 	NULL,
@@ -679,17 +668,20 @@
 {
 	int ret;
 
-	netdev->sysfs_groups[0] = &netdev_sysfs_group;
 	ret = register_candev(netdev);
 	if (ret) {
 		dev_alert(&netdev->dev, "register failed\n");
 		return ret;
 	}
+	if (sysfs_create_group(&netdev->dev.kobj, &netdev_sysfs_group) < 0)
+		netdev_alert(netdev, "sysfs group failed\n");
+
 	return 0;
 }
 
 static void softing_netdev_cleanup(struct net_device *netdev)
 {
+	sysfs_remove_group(&netdev->dev.kobj, &netdev_sysfs_group);
 	unregister_candev(netdev);
 	free_candev(netdev);
 }
@@ -721,8 +713,6 @@
 DEV_ATTR_RO_STR(hardware, pdat->name);
 DEV_ATTR_RO(hardware_version, id.hw_version);
 DEV_ATTR_RO(license, id.license);
-DEV_ATTR_RO(frequency, id.freq);
-DEV_ATTR_RO(txpending, tx.pending);
 
 static struct attribute *softing_pdev_attrs[] = {
 	&dev_attr_serial.attr,
@@ -731,8 +721,6 @@
 	&dev_attr_hardware.attr,
 	&dev_attr_hardware_version.attr,
 	&dev_attr_license.attr,
-	&dev_attr_frequency.attr,
-	&dev_attr_txpending.attr,
 	NULL,
 };
 

diff --git a/drivers/net/can/spi/Kconfig b/drivers/net/can/spi/Kconfig
new file mode 100644
index 0000000..148cae5
--- /dev/null
+++ b/drivers/net/can/spi/Kconfig

@@ -0,0 +1,10 @@
+menu "CAN SPI interfaces"
+	depends on SPI
+
+config CAN_MCP251X
+	tristate "Microchip MCP251x SPI CAN controllers"
+	depends on HAS_DMA
+	---help---
+	  Driver for the Microchip MCP251x SPI CAN controllers.
+
+endmenu

diff --git a/drivers/net/can/spi/Makefile b/drivers/net/can/spi/Makefile
new file mode 100644
index 0000000..90bcacf
--- /dev/null
+++ b/drivers/net/can/spi/Makefile

@@ -0,0 +1,8 @@
+#
+#  Makefile for the Linux Controller Area Network SPI drivers.
+#
+
+
+obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
+
+ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG

diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/spi/mcp251x.c
similarity index 96%
rename from drivers/net/can/mcp251x.c
rename to drivers/net/can/spi/mcp251x.c
index 28c11f8..5df239e 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c

@@ -214,6 +214,8 @@
 
 #define TX_ECHO_SKB_MAX	1
 
+#define MCP251X_OST_DELAY_MS	(5)
+
 #define DEVICE_NAME "mcp251x"
 
 static int mcp251x_enable_dma; /* Enable SPI DMA. Default: 0 (Off) */
@@ -624,50 +626,45 @@
 static int mcp251x_hw_reset(struct spi_device *spi)
 {
 	struct mcp251x_priv *priv = spi_get_drvdata(spi);
+	u8 reg;
 	int ret;
-	unsigned long timeout;
+
+	/* Wait for oscillator startup timer after power up */
+	mdelay(MCP251X_OST_DELAY_MS);
 
 	priv->spi_tx_buf[0] = INSTRUCTION_RESET;
-	ret = spi_write(spi, priv->spi_tx_buf, 1);
-	if (ret) {
-		dev_err(&spi->dev, "reset failed: ret = %d\n", ret);
-		return -EIO;
-	}
+	ret = mcp251x_spi_trans(spi, 1);
+	if (ret)
+		return ret;
 
-	/* Wait for reset to finish */
-	timeout = jiffies + HZ;
-	mdelay(10);
-	while ((mcp251x_read_reg(spi, CANSTAT) & CANCTRL_REQOP_MASK)
-	       != CANCTRL_REQOP_CONF) {
-		schedule();
-		if (time_after(jiffies, timeout)) {
-			dev_err(&spi->dev, "MCP251x didn't"
-				" enter in conf mode after reset\n");
-			return -EBUSY;
-		}
-	}
+	/* Wait for oscillator startup timer after reset */
+	mdelay(MCP251X_OST_DELAY_MS);
+	
+	reg = mcp251x_read_reg(spi, CANSTAT);
+	if ((reg & CANCTRL_REQOP_MASK) != CANCTRL_REQOP_CONF)
+		return -ENODEV;
+
 	return 0;
 }
 
 static int mcp251x_hw_probe(struct spi_device *spi)
 {
-	int st1, st2;
+	u8 ctrl;
+	int ret;
 
-	mcp251x_hw_reset(spi);
+	ret = mcp251x_hw_reset(spi);
+	if (ret)
+		return ret;
 
-	/*
-	 * Please note that these are "magic values" based on after
-	 * reset defaults taken from data sheet which allows us to see
-	 * if we really have a chip on the bus (we avoid common all
-	 * zeroes or all ones situations)
-	 */
-	st1 = mcp251x_read_reg(spi, CANSTAT) & 0xEE;
-	st2 = mcp251x_read_reg(spi, CANCTRL) & 0x17;
+	ctrl = mcp251x_read_reg(spi, CANCTRL);
 
-	dev_dbg(&spi->dev, "CANSTAT 0x%02x CANCTRL 0x%02x\n", st1, st2);
+	dev_dbg(&spi->dev, "CANCTRL 0x%02x\n", ctrl);
 
-	/* Check for power up default values */
-	return (st1 == 0x80 && st2 == 0x07) ? 1 : 0;
+	/* Check for power up default value */
+	if ((ctrl & 0x17) != 0x07)
+		return -ENODEV;
+
+	return 0;
 }
 
 static int mcp251x_power_enable(struct regulator *reg, int enable)
@@ -776,7 +773,6 @@
 
 	mutex_lock(&priv->mcp_lock);
 	if (priv->after_suspend) {
-		mdelay(10);
 		mcp251x_hw_reset(spi);
 		mcp251x_setup(net, priv, spi);
 		if (priv->after_suspend & AFTER_SUSPEND_RESTART) {
@@ -955,7 +951,7 @@
 	priv->tx_len = 0;
 
 	ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist,
-				   flags, DEVICE_NAME, priv);
+				   flags | IRQF_ONESHOT, DEVICE_NAME, priv);
 	if (ret) {
 		dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq);
 		mcp251x_power_enable(priv->transceiver, 0);
@@ -1032,8 +1028,8 @@
 	struct mcp251x_platform_data *pdata = dev_get_platdata(&spi->dev);
 	struct net_device *net;
 	struct mcp251x_priv *priv;
-	int freq, ret = -ENODEV;
 	struct clk *clk;
+	int freq, ret;
 
 	clk = devm_clk_get(&spi->dev, NULL);
 	if (IS_ERR(clk)) {
@@ -1076,6 +1072,18 @@
 	priv->net = net;
 	priv->clk = clk;
 
+	spi_set_drvdata(spi, priv);
+
+	/* Configure the SPI bus */
+	spi->bits_per_word = 8;
+	if (mcp251x_is_2510(spi))
+		spi->max_speed_hz = spi->max_speed_hz ? : 5 * 1000 * 1000;
+	else
+		spi->max_speed_hz = spi->max_speed_hz ? : 10 * 1000 * 1000;
+	ret = spi_setup(spi);
+	if (ret)
+		goto out_clk;
+
 	priv->power = devm_regulator_get(&spi->dev, "vdd");
 	priv->transceiver = devm_regulator_get(&spi->dev, "xceiver");
 	if ((PTR_ERR(priv->power) == -EPROBE_DEFER) ||
@@ -1088,8 +1096,6 @@
 	if (ret)
 		goto out_clk;
 
-	spi_set_drvdata(spi, priv);
-
 	priv->spi = spi;
 	mutex_init(&priv->mcp_lock);
 
@@ -1134,20 +1140,11 @@
 
 	SET_NETDEV_DEV(net, &spi->dev);
 
-	/* Configure the SPI bus */
-	spi->mode = spi->mode ? : SPI_MODE_0;
-	if (mcp251x_is_2510(spi))
-		spi->max_speed_hz = spi->max_speed_hz ? : 5 * 1000 * 1000;
-	else
-		spi->max_speed_hz = spi->max_speed_hz ? : 10 * 1000 * 1000;
-	spi->bits_per_word = 8;
-	spi_setup(spi);
-
 	/* Here is OK to not lock the MCP, no one knows about it yet */
-	if (!mcp251x_hw_probe(spi)) {
-		ret = -ENODEV;
+	ret = mcp251x_hw_probe(spi);
+	if (ret)
 		goto error_probe;
-	}
+
 	mcp251x_hw_sleep(spi);
 
 	ret = register_candev(net);
@@ -1156,7 +1153,7 @@
 
 	devm_can_led_init(net);
 
-	return ret;
+	return 0;
 
 error_probe:
 	if (mcp251x_enable_dma)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index fc96a3d..a77db919 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig

@@ -13,13 +13,21 @@
           This driver supports the CAN-USB/2 interface
           from esd electronic system design gmbh (http://www.esd.eu).
 
+config CAN_GS_USB
+	tristate "Geschwister Schneider UG interfaces"
+	---help---
+	  This driver supports the Geschwister Schneider USB/CAN devices.
+	  If unsure choose N,
+	  choose Y for built in support,
+	  M to compile as module (module will be named: gs_usb).
+
 config CAN_KVASER_USB
 	tristate "Kvaser CAN/USB interface"
 	---help---
 	  This driver adds support for Kvaser CAN/USB devices like Kvaser
 	  Leaf Light.
 
-	  The driver gives support for the following devices:
+	  The driver provides support for the following devices:
 	    - Kvaser Leaf Light
 	    - Kvaser Leaf Professional HS
 	    - Kvaser Leaf SemiPro HS
@@ -36,6 +44,8 @@
 	    - Kvaser Leaf Light "China"
 	    - Kvaser BlackBird SemiPro
 	    - Kvaser USBcan R
+	    - Kvaser Leaf Light v2
+	    - Kvaser Mini PCI Express HS
 
 	  If unsure, say N.
 

diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile
index becef46..7b9a393 100644
--- a/drivers/net/can/usb/Makefile
+++ b/drivers/net/can/usb/Makefile

@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
 obj-$(CONFIG_CAN_ESD_USB2) += esd_usb2.o
+obj-$(CONFIG_CAN_GS_USB) += gs_usb.o
 obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb.o
 obj-$(CONFIG_CAN_PEAK_USB) += peak_usb/
 obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
new file mode 100644
index 0000000..04b0f84
--- /dev/null
+++ b/drivers/net/can/usb/gs_usb.c

@@ -0,0 +1,971 @@
+/* CAN driver for Geschwister Schneider USB/CAN devices.
+ *
+ * Copyright (C) 2013 Geschwister Schneider Technologie-,
+ * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt).
+ *
+ * Many thanks to all socketcan devs!
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+
+/* Device specific constants */
+#define USB_GSUSB_1_VENDOR_ID      0x1d50
+#define USB_GSUSB_1_PRODUCT_ID     0x606f
+
+#define GSUSB_ENDPOINT_IN          1
+#define GSUSB_ENDPOINT_OUT         2
+
+/* Device specific constants */
+enum gs_usb_breq {
+	GS_USB_BREQ_HOST_FORMAT = 0,
+	GS_USB_BREQ_BITTIMING,
+	GS_USB_BREQ_MODE,
+	GS_USB_BREQ_BERR,
+	GS_USB_BREQ_BT_CONST,
+	GS_USB_BREQ_DEVICE_CONFIG
+};
+
+enum gs_can_mode {
+	/* reset a channel. turns it off */
+	GS_CAN_MODE_RESET = 0,
+	/* starts a channel */
+	GS_CAN_MODE_START
+};
+
+enum gs_can_state {
+	GS_CAN_STATE_ERROR_ACTIVE = 0,
+	GS_CAN_STATE_ERROR_WARNING,
+	GS_CAN_STATE_ERROR_PASSIVE,
+	GS_CAN_STATE_BUS_OFF,
+	GS_CAN_STATE_STOPPED,
+	GS_CAN_STATE_SLEEPING
+};
+
+/* data types passed between host and device */
+struct gs_host_config {
+	u32 byte_order;
+} __packed;
+/* All data exchanged between host and device is exchanged in host byte order,
+ * thanks to the struct gs_host_config byte_order member, which is sent first
+ * to indicate the desired byte order.
+ */
+
+struct gs_device_config {
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	u8 icount;
+	u32 sw_version;
+	u32 hw_version;
+} __packed;
+
+#define GS_CAN_MODE_NORMAL               0
+#define GS_CAN_MODE_LISTEN_ONLY          (1<<0)
+#define GS_CAN_MODE_LOOP_BACK            (1<<1)
+#define GS_CAN_MODE_TRIPLE_SAMPLE        (1<<2)
+#define GS_CAN_MODE_ONE_SHOT             (1<<3)
+
+struct gs_device_mode {
+	u32 mode;
+	u32 flags;
+} __packed;
+
+struct gs_device_state {
+	u32 state;
+	u32 rxerr;
+	u32 txerr;
+} __packed;
+
+struct gs_device_bittiming {
+	u32 prop_seg;
+	u32 phase_seg1;
+	u32 phase_seg2;
+	u32 sjw;
+	u32 brp;
+} __packed;
+
+#define GS_CAN_FEATURE_LISTEN_ONLY      (1<<0)
+#define GS_CAN_FEATURE_LOOP_BACK        (1<<1)
+#define GS_CAN_FEATURE_TRIPLE_SAMPLE    (1<<2)
+#define GS_CAN_FEATURE_ONE_SHOT         (1<<3)
+
+struct gs_device_bt_const {
+	u32 feature;
+	u32 fclk_can;
+	u32 tseg1_min;
+	u32 tseg1_max;
+	u32 tseg2_min;
+	u32 tseg2_max;
+	u32 sjw_max;
+	u32 brp_min;
+	u32 brp_max;
+	u32 brp_inc;
+} __packed;
+
+#define GS_CAN_FLAG_OVERFLOW 1
+
+struct gs_host_frame {
+	u32 echo_id;
+	u32 can_id;
+
+	u8 can_dlc;
+	u8 channel;
+	u8 flags;
+	u8 reserved;
+
+	u8 data[8];
+} __packed;
+/* The GS USB devices make use of the same flags and masks as in
+ * linux/can.h and linux/can/error.h, and no additional mapping is necessary.
+ */
+
+/* Only send a max of GS_MAX_TX_URBS frames per channel at a time. */
+#define GS_MAX_TX_URBS 10
+/* Only launch a max of GS_MAX_RX_URBS usb requests at a time. */
+#define GS_MAX_RX_URBS 30
+/* Maximum number of interfaces the driver supports per device.
+ * Current hardware only supports 2 interfaces. The future may vary.
+ */
+#define GS_MAX_INTF 2
+
+struct gs_tx_context {
+	struct gs_can *dev;
+	unsigned int echo_id;
+};
+
+struct gs_can {
+	struct can_priv can; /* must be the first member */
+
+	struct gs_usb *parent;
+
+	struct net_device *netdev;
+	struct usb_device *udev;
+	struct usb_interface *iface;
+
+	struct can_bittiming_const bt_const;
+	unsigned int channel;	/* channel number */
+
+	/* This lock prevents a race condition between xmit and recieve. */
+	spinlock_t tx_ctx_lock;
+	struct gs_tx_context tx_context[GS_MAX_TX_URBS];
+
+	struct usb_anchor tx_submitted;
+	atomic_t active_tx_urbs;
+};
+
+/* usb interface struct */
+struct gs_usb {
+	struct gs_can *canch[GS_MAX_INTF];
+	struct usb_anchor rx_submitted;
+	atomic_t active_channels;
+	struct usb_device *udev;
+};
+
+/* 'allocate' a tx context.
+ * returns a valid tx context or NULL if there is no space.
+ */
+static struct gs_tx_context *gs_alloc_tx_context(struct gs_can *dev)
+{
+	int i = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->tx_ctx_lock, flags);
+
+	for (; i < GS_MAX_TX_URBS; i++) {
+		if (dev->tx_context[i].echo_id == GS_MAX_TX_URBS) {
+			dev->tx_context[i].echo_id = i;
+			spin_unlock_irqrestore(&dev->tx_ctx_lock, flags);
+			return &dev->tx_context[i];
+		}
+	}
+
+	spin_unlock_irqrestore(&dev->tx_ctx_lock, flags);
+	return NULL;
+}
+
+/* releases a tx context
+ */
+static void gs_free_tx_context(struct gs_tx_context *txc)
+{
+	txc->echo_id = GS_MAX_TX_URBS;
+}
+
+/* Get a tx context by id.
+ */
+static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev, unsigned int id)
+{
+	unsigned long flags;
+
+	if (id < GS_MAX_TX_URBS) {
+		spin_lock_irqsave(&dev->tx_ctx_lock, flags);
+		if (dev->tx_context[id].echo_id == id) {
+			spin_unlock_irqrestore(&dev->tx_ctx_lock, flags);
+			return &dev->tx_context[id];
+		}
+		spin_unlock_irqrestore(&dev->tx_ctx_lock, flags);
+	}
+	return NULL;
+}
+
+static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
+{
+	struct gs_device_mode *dm;
+	struct usb_interface *intf = gsdev->iface;
+	int rc;
+
+	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+	if (!dm)
+		return -ENOMEM;
+
+	dm->mode = GS_CAN_MODE_RESET;
+
+	rc = usb_control_msg(interface_to_usbdev(intf),
+			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
+			     GS_USB_BREQ_MODE,
+			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     gsdev->channel,
+			     0,
+			     dm,
+			     sizeof(*dm),
+			     1000);
+
+	return rc;
+}
+
+static void gs_update_state(struct gs_can *dev, struct can_frame *cf)
+{
+	struct can_device_stats *can_stats = &dev->can.can_stats;
+
+	if (cf->can_id & CAN_ERR_RESTARTED) {
+		dev->can.state = CAN_STATE_ERROR_ACTIVE;
+		can_stats->restarts++;
+	} else if (cf->can_id & CAN_ERR_BUSOFF) {
+		dev->can.state = CAN_STATE_BUS_OFF;
+		can_stats->bus_off++;
+	} else if (cf->can_id & CAN_ERR_CRTL) {
+		if ((cf->data[1] & CAN_ERR_CRTL_TX_WARNING) ||
+		    (cf->data[1] & CAN_ERR_CRTL_RX_WARNING)) {
+			dev->can.state = CAN_STATE_ERROR_WARNING;
+			can_stats->error_warning++;
+		} else if ((cf->data[1] & CAN_ERR_CRTL_TX_PASSIVE) ||
+			   (cf->data[1] & CAN_ERR_CRTL_RX_PASSIVE)) {
+			dev->can.state = CAN_STATE_ERROR_PASSIVE;
+			can_stats->error_passive++;
+		} else {
+			dev->can.state = CAN_STATE_ERROR_ACTIVE;
+		}
+	}
+}
+
+static void gs_usb_recieve_bulk_callback(struct urb *urb)
+{
+	struct gs_usb *usbcan = urb->context;
+	struct gs_can *dev;
+	struct net_device *netdev;
+	int rc;
+	struct net_device_stats *stats;
+	struct gs_host_frame *hf = urb->transfer_buffer;
+	struct gs_tx_context *txc;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+
+	BUG_ON(!usbcan);
+
+	switch (urb->status) {
+	case 0: /* success */
+		break;
+	case -ENOENT:
+	case -ESHUTDOWN:
+		return;
+	default:
+		/* do not resubmit aborted urbs. eg: when device goes down */
+		return;
+	}
+
+	/* device reports out of range channel id */
+	if (hf->channel >= GS_MAX_INTF)
+		goto resubmit_urb;
+
+	dev = usbcan->canch[hf->channel];
+
+	netdev = dev->netdev;
+	stats = &netdev->stats;
+
+	if (!netif_device_present(netdev))
+		return;
+
+	if (hf->echo_id == -1) { /* normal rx */
+		skb = alloc_can_skb(dev->netdev, &cf);
+		if (!skb)
+			return;
+
+		cf->can_id = hf->can_id;
+
+		cf->can_dlc = get_can_dlc(hf->can_dlc);
+		memcpy(cf->data, hf->data, 8);
+
+		/* ERROR frames tell us information about the controller */
+		if (hf->can_id & CAN_ERR_FLAG)
+			gs_update_state(dev, cf);
+
+		netdev->stats.rx_packets++;
+		netdev->stats.rx_bytes += hf->can_dlc;
+
+		netif_rx(skb);
+	} else { /* echo_id == hf->echo_id */
+		if (hf->echo_id >= GS_MAX_TX_URBS) {
+			netdev_err(netdev,
+				   "Unexpected out of range echo id %d\n",
+				   hf->echo_id);
+			goto resubmit_urb;
+		}
+
+		netdev->stats.tx_packets++;
+		netdev->stats.tx_bytes += hf->can_dlc;
+
+		txc = gs_get_tx_context(dev, hf->echo_id);
+
+		/* bad devices send bad echo_ids. */
+		if (!txc) {
+			netdev_err(netdev,
+				   "Unexpected unused echo id %d\n",
+				   hf->echo_id);
+			goto resubmit_urb;
+		}
+
+		can_get_echo_skb(netdev, hf->echo_id);
+
+		gs_free_tx_context(txc);
+
+		netif_wake_queue(netdev);
+	}
+
+	if (hf->flags & GS_CAN_FLAG_OVERFLOW) {
+		skb = alloc_can_err_skb(netdev, &cf);
+		if (!skb)
+			goto resubmit_urb;
+
+		cf->can_id |= CAN_ERR_CRTL;
+		cf->can_dlc = CAN_ERR_DLC;
+		cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+		stats->rx_over_errors++;
+		stats->rx_errors++;
+		netif_rx(skb);
+	}
+
+ resubmit_urb:
+	usb_fill_bulk_urb(urb,
+			  usbcan->udev,
+			  usb_rcvbulkpipe(usbcan->udev, GSUSB_ENDPOINT_IN),
+			  hf,
+			  sizeof(struct gs_host_frame),
+			  gs_usb_recieve_bulk_callback,
+			  usbcan
+			  );
+
+	rc = usb_submit_urb(urb, GFP_ATOMIC);
+
+	/* USB failure take down all interfaces */
+	if (rc == -ENODEV) {
+		for (rc = 0; rc < GS_MAX_INTF; rc++) {
+			if (usbcan->canch[rc])
+				netif_device_detach(usbcan->canch[rc]->netdev);
+		}
+	}
+}
+
+static int gs_usb_set_bittiming(struct net_device *netdev)
+{
+	struct gs_can *dev = netdev_priv(netdev);
+	struct can_bittiming *bt = &dev->can.bittiming;
+	struct usb_interface *intf = dev->iface;
+	int rc;
+	struct gs_device_bittiming *dbt;
+
+	dbt = kmalloc(sizeof(*dbt), GFP_KERNEL);
+	if (!dbt)
+		return -ENOMEM;
+
+	dbt->prop_seg = bt->prop_seg;
+	dbt->phase_seg1 = bt->phase_seg1;
+	dbt->phase_seg2 = bt->phase_seg2;
+	dbt->sjw = bt->sjw;
+	dbt->brp = bt->brp;
+
+	/* request bit timings */
+	rc = usb_control_msg(interface_to_usbdev(intf),
+			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
+			     GS_USB_BREQ_BITTIMING,
+			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     dev->channel,
+			     0,
+			     dbt,
+			     sizeof(*dbt),
+			     1000);
+
+	kfree(dbt);
+
+	if (rc < 0)
+		dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
+			rc);
+
+	return rc;
+}
+
+static void gs_usb_xmit_callback(struct urb *urb)
+{
+	struct gs_tx_context *txc = urb->context;
+	struct gs_can *dev = txc->dev;
+	struct net_device *netdev = dev->netdev;
+
+	if (urb->status)
+		netdev_info(netdev, "usb xmit fail %d\n", txc->echo_id);
+
+	usb_free_coherent(urb->dev,
+			  urb->transfer_buffer_length,
+			  urb->transfer_buffer,
+			  urb->transfer_dma);
+
+	atomic_dec(&dev->active_tx_urbs);
+
+	if (!netif_device_present(netdev))
+		return;
+
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+}
+
+static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct gs_can *dev = netdev_priv(netdev);
+	struct net_device_stats *stats = &dev->netdev->stats;
+	struct urb *urb;
+	struct gs_host_frame *hf;
+	struct can_frame *cf;
+	int rc;
+	unsigned int idx;
+	struct gs_tx_context *txc;
+
+	if (can_dropped_invalid_skb(netdev, skb))
+		return NETDEV_TX_OK;
+
+	/* find an empty context to keep track of transmission */
+	txc = gs_alloc_tx_context(dev);
+	if (!txc)
+		return NETDEV_TX_BUSY;
+
+	/* create a URB, and a buffer for it */
+	urb = usb_alloc_urb(0, GFP_ATOMIC);
+	if (!urb) {
+		netdev_err(netdev, "No memory left for URB\n");
+		goto nomem_urb;
+	}
+
+	hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC,
+				&urb->transfer_dma);
+	if (!hf) {
+		netdev_err(netdev, "No memory left for USB buffer\n");
+		goto nomem_hf;
+	}
+
+	idx = txc->echo_id;
+
+	if (idx >= GS_MAX_TX_URBS) {
+		netdev_err(netdev, "Invalid tx context %d\n", idx);
+		goto badidx;
+	}
+
+	hf->echo_id = idx;
+	hf->channel = dev->channel;
+
+	cf = (struct can_frame *)skb->data;
+
+	hf->can_id = cf->can_id;
+	hf->can_dlc = cf->can_dlc;
+	memcpy(hf->data, cf->data, cf->can_dlc);
+
+	usb_fill_bulk_urb(urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, GSUSB_ENDPOINT_OUT),
+			  hf,
+			  sizeof(*hf),
+			  gs_usb_xmit_callback,
+			  txc);
+
+	urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	usb_anchor_urb(urb, &dev->tx_submitted);
+
+	can_put_echo_skb(skb, netdev, idx);
+
+	atomic_inc(&dev->active_tx_urbs);
+
+	rc = usb_submit_urb(urb, GFP_ATOMIC);
+	if (unlikely(rc)) {			/* usb send failed */
+		atomic_dec(&dev->active_tx_urbs);
+
+		can_free_echo_skb(netdev, idx);
+		gs_free_tx_context(txc);
+
+		usb_unanchor_urb(urb);
+		usb_free_coherent(dev->udev,
+				  sizeof(*hf),
+				  hf,
+				  urb->transfer_dma);
+
+
+		if (rc == -ENODEV) {
+			netif_device_detach(netdev);
+		} else {
+			netdev_err(netdev, "usb_submit failed (err=%d)\n", rc);
+			stats->tx_dropped++;
+		}
+	} else {
+		/* Slow down tx path */
+		if (atomic_read(&dev->active_tx_urbs) >= GS_MAX_TX_URBS)
+			netif_stop_queue(netdev);
+	}
+
+	/* let usb core take care of this urb */
+	usb_free_urb(urb);
+
+	return NETDEV_TX_OK;
+
+ badidx:
+	usb_free_coherent(dev->udev,
+			  sizeof(*hf),
+			  hf,
+			  urb->transfer_dma);
+ nomem_hf:
+	usb_free_urb(urb);
+
+ nomem_urb:
+	gs_free_tx_context(txc);
+	dev_kfree_skb(skb);
+	stats->tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+static int gs_can_open(struct net_device *netdev)
+{
+	struct gs_can *dev = netdev_priv(netdev);
+	struct gs_usb *parent = dev->parent;
+	int rc, i;
+	struct gs_device_mode *dm;
+	u32 ctrlmode;
+
+	rc = open_candev(netdev);
+	if (rc)
+		return rc;
+
+	if (atomic_add_return(1, &parent->active_channels) == 1) {
+		for (i = 0; i < GS_MAX_RX_URBS; i++) {
+			struct urb *urb;
+			u8 *buf;
+
+			/* alloc rx urb */
+			urb = usb_alloc_urb(0, GFP_KERNEL);
+			if (!urb) {
+				netdev_err(netdev,
+					   "No memory left for URB\n");
+				return -ENOMEM;
+			}
+
+			/* alloc rx buffer */
+			buf = usb_alloc_coherent(dev->udev,
+						 sizeof(struct gs_host_frame),
+						 GFP_KERNEL,
+						 &urb->transfer_dma);
+			if (!buf) {
+				netdev_err(netdev,
+					   "No memory left for USB buffer\n");
+				usb_free_urb(urb);
+				return -ENOMEM;
+			}
+
+			/* fill, anchor, and submit rx urb */
+			usb_fill_bulk_urb(urb,
+					  dev->udev,
+					  usb_rcvbulkpipe(dev->udev,
+							  GSUSB_ENDPOINT_IN),
+					  buf,
+					  sizeof(struct gs_host_frame),
+					  gs_usb_recieve_bulk_callback,
+					  parent);
+			urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+			usb_anchor_urb(urb, &parent->rx_submitted);
+
+			rc = usb_submit_urb(urb, GFP_KERNEL);
+			if (rc) {
+				if (rc == -ENODEV)
+					netif_device_detach(dev->netdev);
+
+				netdev_err(netdev,
+					   "usb_submit failed (err=%d)\n",
+					   rc);
+
+				usb_unanchor_urb(urb);
+				break;
+			}
+
+			/* Drop reference,
+			 * USB core will take care of freeing it
+			 */
+			usb_free_urb(urb);
+		}
+	}
+
+	dm = kmalloc(sizeof(*dm), GFP_KERNEL);
+	if (!dm)
+		return -ENOMEM;
+
+	/* flags */
+	ctrlmode = dev->can.ctrlmode;
+	dm->flags = 0;
+
+	if (ctrlmode & CAN_CTRLMODE_LOOPBACK)
+		dm->flags |= GS_CAN_MODE_LOOP_BACK;
+	else if (ctrlmode & CAN_CTRLMODE_LISTENONLY)
+		dm->flags |= GS_CAN_MODE_LISTEN_ONLY;
+
+	/* Controller is not allowed to retry TX
+	 * this mode is unavailable on atmels uc3c hardware
+	 */
+	if (ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+		dm->flags |= GS_CAN_MODE_ONE_SHOT;
+
+	if (ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+		dm->flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
+
+	/* finally start device */
+	dm->mode = GS_CAN_MODE_START;
+	rc = usb_control_msg(interface_to_usbdev(dev->iface),
+			     usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0),
+			     GS_USB_BREQ_MODE,
+			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     dev->channel,
+			     0,
+			     dm,
+			     sizeof(*dm),
+			     1000);
+
+	if (rc < 0) {
+		netdev_err(netdev, "Couldn't start device (err=%d)\n", rc);
+		kfree(dm);
+		return rc;
+	}
+
+	kfree(dm);
+
+	dev->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
+		netif_start_queue(netdev);
+
+	return 0;
+}
+
+static int gs_can_close(struct net_device *netdev)
+{
+	int rc;
+	struct gs_can *dev = netdev_priv(netdev);
+	struct gs_usb *parent = dev->parent;
+
+	netif_stop_queue(netdev);
+
+	/* Stop polling */
+	if (atomic_dec_and_test(&parent->active_channels))
+		usb_kill_anchored_urbs(&parent->rx_submitted);
+
+	/* Stop sending URBs */
+	usb_kill_anchored_urbs(&dev->tx_submitted);
+	atomic_set(&dev->active_tx_urbs, 0);
+
+	/* reset the device */
+	rc = gs_cmd_reset(parent, dev);
+	if (rc < 0)
+		netdev_warn(netdev, "Couldn't shutdown device (err=%d)", rc);
+
+	/* reset tx contexts */
+	for (rc = 0; rc < GS_MAX_TX_URBS; rc++) {
+		dev->tx_context[rc].dev = dev;
+		dev->tx_context[rc].echo_id = GS_MAX_TX_URBS;
+	}
+
+	/* close the netdev */
+	close_candev(netdev);
+
+	return 0;
+}
+
+static const struct net_device_ops gs_usb_netdev_ops = {
+	.ndo_open = gs_can_open,
+	.ndo_stop = gs_can_close,
+	.ndo_start_xmit = gs_can_start_xmit,
+};
+
+static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface *intf)
+{
+	struct gs_can *dev;
+	struct net_device *netdev;
+	int rc;
+	struct gs_device_bt_const *bt_const;
+
+	bt_const = kmalloc(sizeof(*bt_const), GFP_KERNEL);
+	if (!bt_const)
+		return ERR_PTR(-ENOMEM);
+
+	/* fetch bit timing constants */
+	rc = usb_control_msg(interface_to_usbdev(intf),
+			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
+			     GS_USB_BREQ_BT_CONST,
+			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     channel,
+			     0,
+			     bt_const,
+			     sizeof(*bt_const),
+			     1000);
+
+	if (rc < 0) {
+		dev_err(&intf->dev,
+			"Couldn't get bit timing const for channel (err=%d)\n",
+			rc);
+		kfree(bt_const);
+		return ERR_PTR(rc);
+	}
+
+	/* create netdev */
+	netdev = alloc_candev(sizeof(struct gs_can), GS_MAX_TX_URBS);
+	if (!netdev) {
+		dev_err(&intf->dev, "Couldn't allocate candev\n");
+		kfree(bt_const);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dev = netdev_priv(netdev);
+
+	netdev->netdev_ops = &gs_usb_netdev_ops;
+
+	netdev->flags |= IFF_ECHO; /* we support full roundtrip echo */
+
+	/* dev settup */
+	strcpy(dev->bt_const.name, "gs_usb");
+	dev->bt_const.tseg1_min = bt_const->tseg1_min;
+	dev->bt_const.tseg1_max = bt_const->tseg1_max;
+	dev->bt_const.tseg2_min = bt_const->tseg2_min;
+	dev->bt_const.tseg2_max = bt_const->tseg2_max;
+	dev->bt_const.sjw_max = bt_const->sjw_max;
+	dev->bt_const.brp_min = bt_const->brp_min;
+	dev->bt_const.brp_max = bt_const->brp_max;
+	dev->bt_const.brp_inc = bt_const->brp_inc;
+
+	dev->udev = interface_to_usbdev(intf);
+	dev->iface = intf;
+	dev->netdev = netdev;
+	dev->channel = channel;
+
+	init_usb_anchor(&dev->tx_submitted);
+	atomic_set(&dev->active_tx_urbs, 0);
+	spin_lock_init(&dev->tx_ctx_lock);
+	for (rc = 0; rc < GS_MAX_TX_URBS; rc++) {
+		dev->tx_context[rc].dev = dev;
+		dev->tx_context[rc].echo_id = GS_MAX_TX_URBS;
+	}
+
+	/* can settup */
+	dev->can.state = CAN_STATE_STOPPED;
+	dev->can.clock.freq = bt_const->fclk_can;
+	dev->can.bittiming_const = &dev->bt_const;
+	dev->can.do_set_bittiming = gs_usb_set_bittiming;
+
+	dev->can.ctrlmode_supported = 0;
+
+	if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY)
+		dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
+
+	if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK)
+		dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK;
+
+	if (bt_const->feature & GS_CAN_FEATURE_TRIPLE_SAMPLE)
+		dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
+
+	if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT)
+		dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
+
+	kfree(bt_const);
+
+	SET_NETDEV_DEV(netdev, &intf->dev);
+
+	rc = register_candev(dev->netdev);
+	if (rc) {
+		free_candev(dev->netdev);
+		dev_err(&intf->dev, "Couldn't register candev (err=%d)\n", rc);
+		return ERR_PTR(rc);
+	}
+
+	return dev;
+}
+
+static void gs_destroy_candev(struct gs_can *dev)
+{
+	unregister_candev(dev->netdev);
+	free_candev(dev->netdev);
+	usb_kill_anchored_urbs(&dev->tx_submitted);
+	kfree(dev);
+}
+
+static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
+{
+	struct gs_usb *dev;
+	int rc = -ENOMEM;
+	unsigned int icount, i;
+	struct gs_host_config *hconf;
+	struct gs_device_config *dconf;
+
+	hconf = kmalloc(sizeof(*hconf), GFP_KERNEL);
+	if (!hconf)
+		return -ENOMEM;
+
+	hconf->byte_order = 0x0000beef;
+
+	/* send host config */
+	rc = usb_control_msg(interface_to_usbdev(intf),
+			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
+			     GS_USB_BREQ_HOST_FORMAT,
+			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     1,
+			     intf->altsetting[0].desc.bInterfaceNumber,
+			     hconf,
+			     sizeof(*hconf),
+			     1000);
+
+	kfree(hconf);
+
+	if (rc < 0) {
+		dev_err(&intf->dev, "Couldn't send data format (err=%d)\n",
+			rc);
+		return rc;
+	}
+
+	dconf = kmalloc(sizeof(*dconf), GFP_KERNEL);
+	if (!dconf)
+		return -ENOMEM;
+
+	/* read device config */
+	rc = usb_control_msg(interface_to_usbdev(intf),
+			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
+			     GS_USB_BREQ_DEVICE_CONFIG,
+			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     1,
+			     intf->altsetting[0].desc.bInterfaceNumber,
+			     dconf,
+			     sizeof(*dconf),
+			     1000);
+	if (rc < 0) {
+		dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n",
+			rc);
+
+		kfree(dconf);
+
+		return rc;
+	}
+
+	icount = dconf->icount+1;
+
+	kfree(dconf);
+
+	dev_info(&intf->dev, "Configuring for %d interfaces\n", icount);
+
+	if (icount > GS_MAX_INTF) {
+		dev_err(&intf->dev,
+			"Driver cannot handle more that %d CAN interfaces\n",
+			GS_MAX_INTF);
+		return -EINVAL;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	init_usb_anchor(&dev->rx_submitted);
+
+	atomic_set(&dev->active_channels, 0);
+
+	usb_set_intfdata(intf, dev);
+	dev->udev = interface_to_usbdev(intf);
+
+	for (i = 0; i < icount; i++) {
+		dev->canch[i] = gs_make_candev(i, intf);
+		if (IS_ERR_OR_NULL(dev->canch[i])) {
+			/* on failure destroy previously created candevs */
+			icount = i;
+			for (i = 0; i < icount; i++) {
+				gs_destroy_candev(dev->canch[i]);
+				dev->canch[i] = NULL;
+			}
+			kfree(dev);
+			return rc;
+		}
+		dev->canch[i]->parent = dev;
+	}
+
+	return 0;
+}
+
+static void gs_usb_disconnect(struct usb_interface *intf)
+{
+	unsigned i;
+	struct gs_usb *dev = usb_get_intfdata(intf);
+	usb_set_intfdata(intf, NULL);
+
+	if (!dev) {
+		dev_err(&intf->dev, "Disconnect (nodata)\n");
+		return;
+	}
+
+	for (i = 0; i < GS_MAX_INTF; i++) {
+		struct gs_can *can = dev->canch[i];
+
+		if (!can)
+			continue;
+
+		gs_destroy_candev(can);
+	}
+
+	usb_kill_anchored_urbs(&dev->rx_submitted);
+}
+
+static const struct usb_device_id gs_usb_table[] = {
+	{USB_DEVICE(USB_GSUSB_1_VENDOR_ID, USB_GSUSB_1_PRODUCT_ID)},
+	{} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, gs_usb_table);
+
+static struct usb_driver gs_usb_driver = {
+	.name       = "gs_usb",
+	.probe      = gs_usb_probe,
+	.disconnect = gs_usb_disconnect,
+	.id_table   = gs_usb_table,
+};
+
+module_usb_driver(gs_usb_driver);
+
+MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>");
+MODULE_DESCRIPTION(
+"Socket CAN device driver for Geschwister Schneider Technologie-, "
+"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces.");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 4ca46ed..541fb7a 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c

@@ -53,6 +53,8 @@
 #define USB_OEM_MERCURY_PRODUCT_ID	34
 #define USB_OEM_LEAF_PRODUCT_ID		35
 #define USB_CAN_R_PRODUCT_ID		39
+#define USB_LEAF_LITE_V2_PRODUCT_ID	288
+#define USB_MINI_PCIE_HS_PRODUCT_ID	289
 
 /* USB devices features */
 #define KVASER_HAS_SILENT_MODE		BIT(0)
@@ -356,6 +358,8 @@
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID),
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
@@ -379,38 +383,43 @@
 	void *buf;
 	int actual_len;
 	int err;
-	int pos = 0;
+	int pos;
+	unsigned long to = jiffies + msecs_to_jiffies(USB_RECV_TIMEOUT);
 
 	buf = kzalloc(RX_BUFFER_SIZE, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-	err = usb_bulk_msg(dev->udev,
-			   usb_rcvbulkpipe(dev->udev,
-					   dev->bulk_in->bEndpointAddress),
-			   buf, RX_BUFFER_SIZE, &actual_len,
-			   USB_RECV_TIMEOUT);
-	if (err < 0)
-		goto end;
-
-	while (pos <= actual_len - MSG_HEADER_LEN) {
-		tmp = buf + pos;
-
-		if (!tmp->len)
-			break;
-
-		if (pos + tmp->len > actual_len) {
-			dev_err(dev->udev->dev.parent, "Format error\n");
-			break;
-		}
-
-		if (tmp->id == id) {
-			memcpy(msg, tmp, tmp->len);
+	do {
+		err = usb_bulk_msg(dev->udev,
+				   usb_rcvbulkpipe(dev->udev,
+					dev->bulk_in->bEndpointAddress),
+				   buf, RX_BUFFER_SIZE, &actual_len,
+				   USB_RECV_TIMEOUT);
+		if (err < 0)
 			goto end;
-		}
 
-		pos += tmp->len;
-	}
+		pos = 0;
+		while (pos <= actual_len - MSG_HEADER_LEN) {
+			tmp = buf + pos;
+
+			if (!tmp->len)
+				break;
+
+			if (pos + tmp->len > actual_len) {
+				dev_err(dev->udev->dev.parent,
+					"Format error\n");
+				break;
+			}
+
+			if (tmp->id == id) {
+				memcpy(msg, tmp, tmp->len);
+				goto end;
+			}
+
+			pos += tmp->len;
+		}
+	} while (time_before(jiffies, to));
 
 	err = -EINVAL;
 

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
new file mode 100644
index 0000000..5e8b560
--- /dev/null
+++ b/drivers/net/can/xilinx_can.c

@@ -0,0 +1,1208 @@
+/* Xilinx CAN device driver
+ *
+ * Copyright (C) 2012 - 2014 Xilinx, Inc.
+ * Copyright (C) 2009 PetaLogix. All rights reserved.
+ *
+ * Description:
+ * This driver is developed for Axi CAN IP and for Zynq CANPS Controller.
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/led.h>
+
+#define DRIVER_NAME	"xilinx_can"
+
+/* CAN registers set */
+enum xcan_reg {
+	XCAN_SRR_OFFSET		= 0x00, /* Software reset */
+	XCAN_MSR_OFFSET		= 0x04, /* Mode select */
+	XCAN_BRPR_OFFSET	= 0x08, /* Baud rate prescaler */
+	XCAN_BTR_OFFSET		= 0x0C, /* Bit timing */
+	XCAN_ECR_OFFSET		= 0x10, /* Error counter */
+	XCAN_ESR_OFFSET		= 0x14, /* Error status */
+	XCAN_SR_OFFSET		= 0x18, /* Status */
+	XCAN_ISR_OFFSET		= 0x1C, /* Interrupt status */
+	XCAN_IER_OFFSET		= 0x20, /* Interrupt enable */
+	XCAN_ICR_OFFSET		= 0x24, /* Interrupt clear */
+	XCAN_TXFIFO_ID_OFFSET	= 0x30,/* TX FIFO ID */
+	XCAN_TXFIFO_DLC_OFFSET	= 0x34, /* TX FIFO DLC */
+	XCAN_TXFIFO_DW1_OFFSET	= 0x38, /* TX FIFO Data Word 1 */
+	XCAN_TXFIFO_DW2_OFFSET	= 0x3C, /* TX FIFO Data Word 2 */
+	XCAN_RXFIFO_ID_OFFSET	= 0x50, /* RX FIFO ID */
+	XCAN_RXFIFO_DLC_OFFSET	= 0x54, /* RX FIFO DLC */
+	XCAN_RXFIFO_DW1_OFFSET	= 0x58, /* RX FIFO Data Word 1 */
+	XCAN_RXFIFO_DW2_OFFSET	= 0x5C, /* RX FIFO Data Word 2 */
+};
+
+/* CAN register bit masks - XCAN_<REG>_<BIT>_MASK */
+#define XCAN_SRR_CEN_MASK		0x00000002 /* CAN enable */
+#define XCAN_SRR_RESET_MASK		0x00000001 /* Soft Reset the CAN core */
+#define XCAN_MSR_LBACK_MASK		0x00000002 /* Loop back mode select */
+#define XCAN_MSR_SLEEP_MASK		0x00000001 /* Sleep mode select */
+#define XCAN_BRPR_BRP_MASK		0x000000FF /* Baud rate prescaler */
+#define XCAN_BTR_SJW_MASK		0x00000180 /* Synchronous jump width */
+#define XCAN_BTR_TS2_MASK		0x00000070 /* Time segment 2 */
+#define XCAN_BTR_TS1_MASK		0x0000000F /* Time segment 1 */
+#define XCAN_ECR_REC_MASK		0x0000FF00 /* Receive error counter */
+#define XCAN_ECR_TEC_MASK		0x000000FF /* Transmit error counter */
+#define XCAN_ESR_ACKER_MASK		0x00000010 /* ACK error */
+#define XCAN_ESR_BERR_MASK		0x00000008 /* Bit error */
+#define XCAN_ESR_STER_MASK		0x00000004 /* Stuff error */
+#define XCAN_ESR_FMER_MASK		0x00000002 /* Form error */
+#define XCAN_ESR_CRCER_MASK		0x00000001 /* CRC error */
+#define XCAN_SR_TXFLL_MASK		0x00000400 /* TX FIFO is full */
+#define XCAN_SR_ESTAT_MASK		0x00000180 /* Error status */
+#define XCAN_SR_ERRWRN_MASK		0x00000040 /* Error warning */
+#define XCAN_SR_NORMAL_MASK		0x00000008 /* Normal mode */
+#define XCAN_SR_LBACK_MASK		0x00000002 /* Loop back mode */
+#define XCAN_SR_CONFIG_MASK		0x00000001 /* Configuration mode */
+#define XCAN_IXR_TXFEMP_MASK		0x00004000 /* TX FIFO Empty */
+#define XCAN_IXR_WKUP_MASK		0x00000800 /* Wake up interrupt */
+#define XCAN_IXR_SLP_MASK		0x00000400 /* Sleep interrupt */
+#define XCAN_IXR_BSOFF_MASK		0x00000200 /* Bus off interrupt */
+#define XCAN_IXR_ERROR_MASK		0x00000100 /* Error interrupt */
+#define XCAN_IXR_RXNEMP_MASK		0x00000080 /* RX FIFO NotEmpty intr */
+#define XCAN_IXR_RXOFLW_MASK		0x00000040 /* RX FIFO Overflow intr */
+#define XCAN_IXR_RXOK_MASK		0x00000010 /* Message received intr */
+#define XCAN_IXR_TXFLL_MASK		0x00000004 /* Tx FIFO Full intr */
+#define XCAN_IXR_TXOK_MASK		0x00000002 /* TX successful intr */
+#define XCAN_IXR_ARBLST_MASK		0x00000001 /* Arbitration lost intr */
+#define XCAN_IDR_ID1_MASK		0xFFE00000 /* Standard msg identifier */
+#define XCAN_IDR_SRR_MASK		0x00100000 /* Substitute remote TXreq */
+#define XCAN_IDR_IDE_MASK		0x00080000 /* Identifier extension */
+#define XCAN_IDR_ID2_MASK		0x0007FFFE /* Extended message ident */
+#define XCAN_IDR_RTR_MASK		0x00000001 /* Remote TX request */
+#define XCAN_DLCR_DLC_MASK		0xF0000000 /* Data length code */
+
+#define XCAN_INTR_ALL		(XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
+				 XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
+				 XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
+				 XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK)
+
+/* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
+#define XCAN_BTR_SJW_SHIFT		7  /* Synchronous jump width */
+#define XCAN_BTR_TS2_SHIFT		4  /* Time segment 2 */
+#define XCAN_IDR_ID1_SHIFT		21 /* Standard Messg Identifier */
+#define XCAN_IDR_ID2_SHIFT		1  /* Extended Message Identifier */
+#define XCAN_DLCR_DLC_SHIFT		28 /* Data length code */
+#define XCAN_ESR_REC_SHIFT		8  /* Rx Error Count */
+
+/* CAN frame length constants */
+#define XCAN_FRAME_MAX_DATA_LEN		8
+#define XCAN_TIMEOUT			(1 * HZ)
+
+/**
+ * struct xcan_priv - This definition define CAN driver instance
+ * @can:			CAN private data structure.
+ * @tx_head:			Tx CAN packets ready to send on the queue
+ * @tx_tail:			Tx CAN packets successfully sended on the queue
+ * @tx_max:			Maximum number packets the driver can send
+ * @napi:			NAPI structure
+ * @read_reg:			For reading data from CAN registers
+ * @write_reg:			For writing data to CAN registers
+ * @dev:			Network device data structure
+ * @reg_base:			Ioremapped address to registers
+ * @irq_flags:			For request_irq()
+ * @bus_clk:			Pointer to struct clk
+ * @can_clk:			Pointer to struct clk
+ */
+struct xcan_priv {
+	struct can_priv can;
+	unsigned int tx_head;
+	unsigned int tx_tail;
+	unsigned int tx_max;
+	struct napi_struct napi;
+	u32 (*read_reg)(const struct xcan_priv *priv, enum xcan_reg reg);
+	void (*write_reg)(const struct xcan_priv *priv, enum xcan_reg reg,
+			u32 val);
+	struct net_device *dev;
+	void __iomem *reg_base;
+	unsigned long irq_flags;
+	struct clk *bus_clk;
+	struct clk *can_clk;
+};
+
+/* CAN Bittiming constants as per Xilinx CAN specs */
+static const struct can_bittiming_const xcan_bittiming_const = {
+	.name = DRIVER_NAME,
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 256,
+	.brp_inc = 1,
+};
+
+/**
+ * xcan_write_reg_le - Write a value to the device register little endian
+ * @priv:	Driver private data structure
+ * @reg:	Register offset
+ * @val:	Value to write at the Register offset
+ *
+ * Write data to the paricular CAN register
+ */
+static void xcan_write_reg_le(const struct xcan_priv *priv, enum xcan_reg reg,
+			u32 val)
+{
+	iowrite32(val, priv->reg_base + reg);
+}
+
+/**
+ * xcan_read_reg_le - Read a value from the device register little endian
+ * @priv:	Driver private data structure
+ * @reg:	Register offset
+ *
+ * Read data from the particular CAN register
+ * Return: value read from the CAN register
+ */
+static u32 xcan_read_reg_le(const struct xcan_priv *priv, enum xcan_reg reg)
+{
+	return ioread32(priv->reg_base + reg);
+}
+
+/**
+ * xcan_write_reg_be - Write a value to the device register big endian
+ * @priv:	Driver private data structure
+ * @reg:	Register offset
+ * @val:	Value to write at the Register offset
+ *
+ * Write data to the paricular CAN register
+ */
+static void xcan_write_reg_be(const struct xcan_priv *priv, enum xcan_reg reg,
+			u32 val)
+{
+	iowrite32be(val, priv->reg_base + reg);
+}
+
+/**
+ * xcan_read_reg_be - Read a value from the device register big endian
+ * @priv:	Driver private data structure
+ * @reg:	Register offset
+ *
+ * Read data from the particular CAN register
+ * Return: value read from the CAN register
+ */
+static u32 xcan_read_reg_be(const struct xcan_priv *priv, enum xcan_reg reg)
+{
+	return ioread32be(priv->reg_base + reg);
+}
+
+/**
+ * set_reset_mode - Resets the CAN device mode
+ * @ndev:	Pointer to net_device structure
+ *
+ * This is the driver reset mode routine.The driver
+ * enters into configuration mode.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int set_reset_mode(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	unsigned long timeout;
+
+	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+
+	timeout = jiffies + XCAN_TIMEOUT;
+	while (!(priv->read_reg(priv, XCAN_SR_OFFSET) & XCAN_SR_CONFIG_MASK)) {
+		if (time_after(jiffies, timeout)) {
+			netdev_warn(ndev, "timed out for config mode\n");
+			return -ETIMEDOUT;
+		}
+		usleep_range(500, 10000);
+	}
+
+	return 0;
+}
+
+/**
+ * xcan_set_bittiming - CAN set bit timing routine
+ * @ndev:	Pointer to net_device structure
+ *
+ * This is the driver set bittiming  routine.
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_set_bittiming(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u32 btr0, btr1;
+	u32 is_config_mode;
+
+	/* Check whether Xilinx CAN is in configuration mode.
+	 * It cannot set bit timing if Xilinx CAN is not in configuration mode.
+	 */
+	is_config_mode = priv->read_reg(priv, XCAN_SR_OFFSET) &
+				XCAN_SR_CONFIG_MASK;
+	if (!is_config_mode) {
+		netdev_alert(ndev,
+		     "BUG! Cannot set bittiming - CAN is not in config mode\n");
+		return -EPERM;
+	}
+
+	/* Setting Baud Rate prescalar value in BRPR Register */
+	btr0 = (bt->brp - 1);
+
+	/* Setting Time Segment 1 in BTR Register */
+	btr1 = (bt->prop_seg + bt->phase_seg1 - 1);
+
+	/* Setting Time Segment 2 in BTR Register */
+	btr1 |= (bt->phase_seg2 - 1) << XCAN_BTR_TS2_SHIFT;
+
+	/* Setting Synchronous jump width in BTR Register */
+	btr1 |= (bt->sjw - 1) << XCAN_BTR_SJW_SHIFT;
+
+	priv->write_reg(priv, XCAN_BRPR_OFFSET, btr0);
+	priv->write_reg(priv, XCAN_BTR_OFFSET, btr1);
+
+	netdev_dbg(ndev, "BRPR=0x%08x, BTR=0x%08x\n",
+			priv->read_reg(priv, XCAN_BRPR_OFFSET),
+			priv->read_reg(priv, XCAN_BTR_OFFSET));
+
+	return 0;
+}
+
+/**
+ * xcan_chip_start - This the drivers start routine
+ * @ndev:	Pointer to net_device structure
+ *
+ * This is the drivers start routine.
+ * Based on the State of the CAN device it puts
+ * the CAN device into a proper mode.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_chip_start(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 err, reg_msr, reg_sr_mask;
+	unsigned long timeout;
+
+	/* Check if it is in reset mode */
+	err = set_reset_mode(ndev);
+	if (err < 0)
+		return err;
+
+	err = xcan_set_bittiming(ndev);
+	if (err < 0)
+		return err;
+
+	/* Enable interrupts */
+	priv->write_reg(priv, XCAN_IER_OFFSET, XCAN_INTR_ALL);
+
+	/* Check whether it is loopback mode or normal mode  */
+	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
+		reg_msr = XCAN_MSR_LBACK_MASK;
+		reg_sr_mask = XCAN_SR_LBACK_MASK;
+	} else {
+		reg_msr = 0x0;
+		reg_sr_mask = XCAN_SR_NORMAL_MASK;
+	}
+
+	priv->write_reg(priv, XCAN_MSR_OFFSET, reg_msr);
+	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
+
+	timeout = jiffies + XCAN_TIMEOUT;
+	while (!(priv->read_reg(priv, XCAN_SR_OFFSET) & reg_sr_mask)) {
+		if (time_after(jiffies, timeout)) {
+			netdev_warn(ndev,
+				"timed out for correct mode\n");
+			return -ETIMEDOUT;
+		}
+	}
+	netdev_dbg(ndev, "status:#x%08x\n",
+			priv->read_reg(priv, XCAN_SR_OFFSET));
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	return 0;
+}
+
+/**
+ * xcan_do_set_mode - This sets the mode of the driver
+ * @ndev:	Pointer to net_device structure
+ * @mode:	Tells the mode of the driver
+ *
+ * This check the drivers state and calls the
+ * the corresponding modes to set.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	int ret;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		ret = xcan_chip_start(ndev);
+		if (ret < 0) {
+			netdev_err(ndev, "xcan_chip_start failed!\n");
+			return ret;
+		}
+		netif_wake_queue(ndev);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * xcan_start_xmit - Starts the transmission
+ * @skb:	sk_buff pointer that contains data to be Txed
+ * @ndev:	Pointer to net_device structure
+ *
+ * This function is invoked from upper layers to initiate transmission. This
+ * function uses the next available free txbuff and populates their fields to
+ * start the transmission.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 id, dlc, data[2] = {0, 0};
+
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
+	/* Check if the TX buffer is full */
+	if (unlikely(priv->read_reg(priv, XCAN_SR_OFFSET) &
+			XCAN_SR_TXFLL_MASK)) {
+		netif_stop_queue(ndev);
+		netdev_err(ndev, "BUG!, TX FIFO full when queue awake!\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Watch carefully on the bit sequence */
+	if (cf->can_id & CAN_EFF_FLAG) {
+		/* Extended CAN ID format */
+		id = ((cf->can_id & CAN_EFF_MASK) << XCAN_IDR_ID2_SHIFT) &
+			XCAN_IDR_ID2_MASK;
+		id |= (((cf->can_id & CAN_EFF_MASK) >>
+			(CAN_EFF_ID_BITS-CAN_SFF_ID_BITS)) <<
+			XCAN_IDR_ID1_SHIFT) & XCAN_IDR_ID1_MASK;
+
+		/* The substibute remote TX request bit should be "1"
+		 * for extended frames as in the Xilinx CAN datasheet
+		 */
+		id |= XCAN_IDR_IDE_MASK | XCAN_IDR_SRR_MASK;
+
+		if (cf->can_id & CAN_RTR_FLAG)
+			/* Extended frames remote TX request */
+			id |= XCAN_IDR_RTR_MASK;
+	} else {
+		/* Standard CAN ID format */
+		id = ((cf->can_id & CAN_SFF_MASK) << XCAN_IDR_ID1_SHIFT) &
+			XCAN_IDR_ID1_MASK;
+
+		if (cf->can_id & CAN_RTR_FLAG)
+			/* Standard frames remote TX request */
+			id |= XCAN_IDR_SRR_MASK;
+	}
+
+	dlc = cf->can_dlc << XCAN_DLCR_DLC_SHIFT;
+
+	if (cf->can_dlc > 0)
+		data[0] = be32_to_cpup((__be32 *)(cf->data + 0));
+	if (cf->can_dlc > 4)
+		data[1] = be32_to_cpup((__be32 *)(cf->data + 4));
+
+	can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max);
+	priv->tx_head++;
+
+	/* Write the Frame to Xilinx CAN TX FIFO */
+	priv->write_reg(priv, XCAN_TXFIFO_ID_OFFSET, id);
+	/* If the CAN frame is RTR frame this write triggers tranmission */
+	priv->write_reg(priv, XCAN_TXFIFO_DLC_OFFSET, dlc);
+	if (!(cf->can_id & CAN_RTR_FLAG)) {
+		priv->write_reg(priv, XCAN_TXFIFO_DW1_OFFSET, data[0]);
+		/* If the CAN frame is Standard/Extended frame this
+		 * write triggers tranmission
+		 */
+		priv->write_reg(priv, XCAN_TXFIFO_DW2_OFFSET, data[1]);
+		stats->tx_bytes += cf->can_dlc;
+	}
+
+	/* Check if the TX buffer is full */
+	if ((priv->tx_head - priv->tx_tail) == priv->tx_max)
+		netif_stop_queue(ndev);
+
+	return NETDEV_TX_OK;
+}
+
+/**
+ * xcan_rx -  Is called from CAN isr to complete the received
+ *		frame  processing
+ * @ndev:	Pointer to net_device structure
+ *
+ * This function is invoked from the CAN isr(poll) to process the Rx frames. It
+ * does minimal processing and invokes "netif_receive_skb" to complete further
+ * processing.
+ * Return: 1 on success and 0 on failure.
+ */
+static int xcan_rx(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 id_xcan, dlc, data[2] = {0, 0};
+
+	skb = alloc_can_skb(ndev, &cf);
+	if (unlikely(!skb)) {
+		stats->rx_dropped++;
+		return 0;
+	}
+
+	/* Read a frame from Xilinx zynq CANPS */
+	id_xcan = priv->read_reg(priv, XCAN_RXFIFO_ID_OFFSET);
+	dlc = priv->read_reg(priv, XCAN_RXFIFO_DLC_OFFSET) >>
+				XCAN_DLCR_DLC_SHIFT;
+
+	/* Change Xilinx CAN data length format to socketCAN data format */
+	cf->can_dlc = get_can_dlc(dlc);
+
+	/* Change Xilinx CAN ID format to socketCAN ID format */
+	if (id_xcan & XCAN_IDR_IDE_MASK) {
+		/* The received frame is an Extended format frame */
+		cf->can_id = (id_xcan & XCAN_IDR_ID1_MASK) >> 3;
+		cf->can_id |= (id_xcan & XCAN_IDR_ID2_MASK) >>
+				XCAN_IDR_ID2_SHIFT;
+		cf->can_id |= CAN_EFF_FLAG;
+		if (id_xcan & XCAN_IDR_RTR_MASK)
+			cf->can_id |= CAN_RTR_FLAG;
+	} else {
+		/* The received frame is a standard format frame */
+		cf->can_id = (id_xcan & XCAN_IDR_ID1_MASK) >>
+				XCAN_IDR_ID1_SHIFT;
+		if (id_xcan & XCAN_IDR_SRR_MASK)
+			cf->can_id |= CAN_RTR_FLAG;
+	}
+
+	if (!(id_xcan & XCAN_IDR_SRR_MASK)) {
+		data[0] = priv->read_reg(priv, XCAN_RXFIFO_DW1_OFFSET);
+		data[1] = priv->read_reg(priv, XCAN_RXFIFO_DW2_OFFSET);
+
+		/* Change Xilinx CAN data format to socketCAN data format */
+		if (cf->can_dlc > 0)
+			*(__be32 *)(cf->data) = cpu_to_be32(data[0]);
+		if (cf->can_dlc > 4)
+			*(__be32 *)(cf->data + 4) = cpu_to_be32(data[1]);
+	}
+
+	stats->rx_bytes += cf->can_dlc;
+	stats->rx_packets++;
+	netif_receive_skb(skb);
+
+	return 1;
+}
+
+/**
+ * xcan_err_interrupt - error frame Isr
+ * @ndev:	net_device pointer
+ * @isr:	interrupt status register value
+ *
+ * This is the CAN error interrupt and it will
+ * check the the type of error and forward the error
+ * frame to upper layers.
+ */
+static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 err_status, status, txerr = 0, rxerr = 0;
+
+	skb = alloc_can_err_skb(ndev, &cf);
+
+	err_status = priv->read_reg(priv, XCAN_ESR_OFFSET);
+	priv->write_reg(priv, XCAN_ESR_OFFSET, err_status);
+	txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
+	rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
+			XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
+	status = priv->read_reg(priv, XCAN_SR_OFFSET);
+
+	if (isr & XCAN_IXR_BSOFF_MASK) {
+		priv->can.state = CAN_STATE_BUS_OFF;
+		priv->can.can_stats.bus_off++;
+		/* Leave device in Config Mode in bus-off state */
+		priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+		can_bus_off(ndev);
+		if (skb)
+			cf->can_id |= CAN_ERR_BUSOFF;
+	} else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) {
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		priv->can.can_stats.error_passive++;
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = (rxerr > 127) ?
+					CAN_ERR_CRTL_RX_PASSIVE :
+					CAN_ERR_CRTL_TX_PASSIVE;
+			cf->data[6] = txerr;
+			cf->data[7] = rxerr;
+		}
+	} else if (status & XCAN_SR_ERRWRN_MASK) {
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		priv->can.can_stats.error_warning++;
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] |= (txerr > rxerr) ?
+					CAN_ERR_CRTL_TX_WARNING :
+					CAN_ERR_CRTL_RX_WARNING;
+			cf->data[6] = txerr;
+			cf->data[7] = rxerr;
+		}
+	}
+
+	/* Check for Arbitration lost interrupt */
+	if (isr & XCAN_IXR_ARBLST_MASK) {
+		priv->can.can_stats.arbitration_lost++;
+		if (skb) {
+			cf->can_id |= CAN_ERR_LOSTARB;
+			cf->data[0] = CAN_ERR_LOSTARB_UNSPEC;
+		}
+	}
+
+	/* Check for RX FIFO Overflow interrupt */
+	if (isr & XCAN_IXR_RXOFLW_MASK) {
+		stats->rx_over_errors++;
+		stats->rx_errors++;
+		priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;
+		}
+	}
+
+	/* Check for error interrupt */
+	if (isr & XCAN_IXR_ERROR_MASK) {
+		if (skb) {
+			cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+		}
+
+		/* Check for Ack error interrupt */
+		if (err_status & XCAN_ESR_ACKER_MASK) {
+			stats->tx_errors++;
+			if (skb) {
+				cf->can_id |= CAN_ERR_ACK;
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+			}
+		}
+
+		/* Check for Bit error interrupt */
+		if (err_status & XCAN_ESR_BERR_MASK) {
+			stats->tx_errors++;
+			if (skb) {
+				cf->can_id |= CAN_ERR_PROT;
+				cf->data[2] = CAN_ERR_PROT_BIT;
+			}
+		}
+
+		/* Check for Stuff error interrupt */
+		if (err_status & XCAN_ESR_STER_MASK) {
+			stats->rx_errors++;
+			if (skb) {
+				cf->can_id |= CAN_ERR_PROT;
+				cf->data[2] = CAN_ERR_PROT_STUFF;
+			}
+		}
+
+		/* Check for Form error interrupt */
+		if (err_status & XCAN_ESR_FMER_MASK) {
+			stats->rx_errors++;
+			if (skb) {
+				cf->can_id |= CAN_ERR_PROT;
+				cf->data[2] = CAN_ERR_PROT_FORM;
+			}
+		}
+
+		/* Check for CRC error interrupt */
+		if (err_status & XCAN_ESR_CRCER_MASK) {
+			stats->rx_errors++;
+			if (skb) {
+				cf->can_id |= CAN_ERR_PROT;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ |
+						CAN_ERR_PROT_LOC_CRC_DEL;
+			}
+		}
+			priv->can.can_stats.bus_error++;
+	}
+
+	if (skb) {
+		stats->rx_packets++;
+		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
+	}
+
+	netdev_dbg(ndev, "%s: error status register:0x%x\n",
+			__func__, priv->read_reg(priv, XCAN_ESR_OFFSET));
+}
+
+/**
+ * xcan_state_interrupt - It will check the state of the CAN device
+ * @ndev:	net_device pointer
+ * @isr:	interrupt status register value
+ *
+ * This will checks the state of the CAN device
+ * and puts the device into appropriate state.
+ */
+static void xcan_state_interrupt(struct net_device *ndev, u32 isr)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+
+	/* Check for Sleep interrupt if set put CAN device in sleep state */
+	if (isr & XCAN_IXR_SLP_MASK)
+		priv->can.state = CAN_STATE_SLEEPING;
+
+	/* Check for Wake up interrupt if set put CAN device in Active state */
+	if (isr & XCAN_IXR_WKUP_MASK)
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+/**
+ * xcan_rx_poll - Poll routine for rx packets (NAPI)
+ * @napi:	napi structure pointer
+ * @quota:	Max number of rx packets to be processed.
+ *
+ * This is the poll routine for rx part.
+ * It will process the packets maximux quota value.
+ *
+ * Return: number of packets received
+ */
+static int xcan_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct net_device *ndev = napi->dev;
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 isr, ier;
+	int work_done = 0;
+
+	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+	while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) {
+		if (isr & XCAN_IXR_RXOK_MASK) {
+			priv->write_reg(priv, XCAN_ICR_OFFSET,
+				XCAN_IXR_RXOK_MASK);
+			work_done += xcan_rx(ndev);
+		} else {
+			priv->write_reg(priv, XCAN_ICR_OFFSET,
+				XCAN_IXR_RXNEMP_MASK);
+			break;
+		}
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK);
+		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+	}
+
+	if (work_done)
+		can_led_event(ndev, CAN_LED_EVENT_RX);
+
+	if (work_done < quota) {
+		napi_complete(napi);
+		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
+		ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK);
+		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
+	}
+	return work_done;
+}
+
+/**
+ * xcan_tx_interrupt - Tx Done Isr
+ * @ndev:	net_device pointer
+ * @isr:	Interrupt status register value
+ */
+static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+
+	while ((priv->tx_head - priv->tx_tail > 0) &&
+			(isr & XCAN_IXR_TXOK_MASK)) {
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+		can_get_echo_skb(ndev, priv->tx_tail %
+					priv->tx_max);
+		priv->tx_tail++;
+		stats->tx_packets++;
+		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+	}
+	can_led_event(ndev, CAN_LED_EVENT_TX);
+	netif_wake_queue(ndev);
+}
+
+/**
+ * xcan_interrupt - CAN Isr
+ * @irq:	irq number
+ * @dev_id:	device id poniter
+ *
+ * This is the xilinx CAN Isr. It checks for the type of interrupt
+ * and invokes the corresponding ISR.
+ *
+ * Return:
+ * IRQ_NONE - If CAN device is in sleep mode, IRQ_HANDLED otherwise
+ */
+static irqreturn_t xcan_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 isr, ier;
+
+	/* Get the interrupt status from Xilinx CAN */
+	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+	if (!isr)
+		return IRQ_NONE;
+
+	/* Check for the type of interrupt and Processing it */
+	if (isr & (XCAN_IXR_SLP_MASK | XCAN_IXR_WKUP_MASK)) {
+		priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_SLP_MASK |
+				XCAN_IXR_WKUP_MASK));
+		xcan_state_interrupt(ndev, isr);
+	}
+
+	/* Check for Tx interrupt and Processing it */
+	if (isr & XCAN_IXR_TXOK_MASK)
+		xcan_tx_interrupt(ndev, isr);
+
+	/* Check for the type of error interrupt and Processing it */
+	if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+			XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) {
+		priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK |
+				XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK |
+				XCAN_IXR_ARBLST_MASK));
+		xcan_err_interrupt(ndev, isr);
+	}
+
+	/* Check for the type of receive interrupt and Processing it */
+	if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) {
+		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
+		ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK);
+		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
+		napi_schedule(&priv->napi);
+	}
+	return IRQ_HANDLED;
+}
+
+/**
+ * xcan_chip_stop - Driver stop routine
+ * @ndev:	Pointer to net_device structure
+ *
+ * This is the drivers stop routine. It will disable the
+ * interrupts and put the device into configuration mode.
+ */
+static void xcan_chip_stop(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 ier;
+
+	/* Disable interrupts and leave the can in configuration mode */
+	ier = priv->read_reg(priv, XCAN_IER_OFFSET);
+	ier &= ~XCAN_INTR_ALL;
+	priv->write_reg(priv, XCAN_IER_OFFSET, ier);
+	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+/**
+ * xcan_open - Driver open routine
+ * @ndev:	Pointer to net_device structure
+ *
+ * This is the driver open routine.
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_open(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = request_irq(ndev->irq, xcan_interrupt, priv->irq_flags,
+			ndev->name, ndev);
+	if (ret < 0) {
+		netdev_err(ndev, "irq allocation for CAN failed\n");
+		goto err;
+	}
+
+	ret = clk_prepare_enable(priv->can_clk);
+	if (ret) {
+		netdev_err(ndev, "unable to enable device clock\n");
+		goto err_irq;
+	}
+
+	ret = clk_prepare_enable(priv->bus_clk);
+	if (ret) {
+		netdev_err(ndev, "unable to enable bus clock\n");
+		goto err_can_clk;
+	}
+
+	/* Set chip into reset mode */
+	ret = set_reset_mode(ndev);
+	if (ret < 0) {
+		netdev_err(ndev, "mode resetting failed!\n");
+		goto err_bus_clk;
+	}
+
+	/* Common open */
+	ret = open_candev(ndev);
+	if (ret)
+		goto err_bus_clk;
+
+	ret = xcan_chip_start(ndev);
+	if (ret < 0) {
+		netdev_err(ndev, "xcan_chip_start failed!\n");
+		goto err_candev;
+	}
+
+	can_led_event(ndev, CAN_LED_EVENT_OPEN);
+	napi_enable(&priv->napi);
+	netif_start_queue(ndev);
+
+	return 0;
+
+err_candev:
+	close_candev(ndev);
+err_bus_clk:
+	clk_disable_unprepare(priv->bus_clk);
+err_can_clk:
+	clk_disable_unprepare(priv->can_clk);
+err_irq:
+	free_irq(ndev->irq, ndev);
+err:
+	return ret;
+}
+
+/**
+ * xcan_close - Driver close routine
+ * @ndev:	Pointer to net_device structure
+ *
+ * Return: 0 always
+ */
+static int xcan_close(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	napi_disable(&priv->napi);
+	xcan_chip_stop(ndev);
+	clk_disable_unprepare(priv->bus_clk);
+	clk_disable_unprepare(priv->can_clk);
+	free_irq(ndev->irq, ndev);
+	close_candev(ndev);
+
+	can_led_event(ndev, CAN_LED_EVENT_STOP);
+
+	return 0;
+}
+
+/**
+ * xcan_get_berr_counter - error counter routine
+ * @ndev:	Pointer to net_device structure
+ * @bec:	Pointer to can_berr_counter structure
+ *
+ * This is the driver error counter routine.
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_get_berr_counter(const struct net_device *ndev,
+					struct can_berr_counter *bec)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = clk_prepare_enable(priv->can_clk);
+	if (ret)
+		goto err;
+
+	ret = clk_prepare_enable(priv->bus_clk);
+	if (ret)
+		goto err_clk;
+
+	bec->txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
+	bec->rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
+			XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
+
+	clk_disable_unprepare(priv->bus_clk);
+	clk_disable_unprepare(priv->can_clk);
+
+	return 0;
+
+err_clk:
+	clk_disable_unprepare(priv->can_clk);
+err:
+	return ret;
+}
+
+
+static const struct net_device_ops xcan_netdev_ops = {
+	.ndo_open	= xcan_open,
+	.ndo_stop	= xcan_close,
+	.ndo_start_xmit	= xcan_start_xmit,
+};
+
+/**
+ * xcan_suspend - Suspend method for the driver
+ * @dev:	Address of the platform_device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 always
+ */
+static int __maybe_unused xcan_suspend(struct device *dev)
+{
+	struct platform_device *pdev = dev_get_drvdata(dev);
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct xcan_priv *priv = netdev_priv(ndev);
+
+	if (netif_running(ndev)) {
+		netif_stop_queue(ndev);
+		netif_device_detach(ndev);
+	}
+
+	priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK);
+	priv->can.state = CAN_STATE_SLEEPING;
+
+	clk_disable(priv->bus_clk);
+	clk_disable(priv->can_clk);
+
+	return 0;
+}
+
+/**
+ * xcan_resume - Resume from suspend
+ * @dev:	Address of the platformdevice structure
+ *
+ * Resume operation after suspend.
+ * Return: 0 on success and failure value on error
+ */
+static int __maybe_unused xcan_resume(struct device *dev)
+{
+	struct platform_device *pdev = dev_get_drvdata(dev);
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct xcan_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = clk_enable(priv->bus_clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable clock.\n");
+		return ret;
+	}
+	ret = clk_enable(priv->can_clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable clock.\n");
+		clk_disable_unprepare(priv->bus_clk);
+		return ret;
+	}
+
+	priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
+	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	if (netif_running(ndev)) {
+		netif_device_attach(ndev);
+		netif_start_queue(ndev);
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
+
+/**
+ * xcan_probe - Platform registration call
+ * @pdev:	Handle to the platform device structure
+ *
+ * This function does all the memory allocation and registration for the CAN
+ * device.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xcan_probe(struct platform_device *pdev)
+{
+	struct resource *res; /* IO mem resources */
+	struct net_device *ndev;
+	struct xcan_priv *priv;
+	void __iomem *addr;
+	int ret, rx_max, tx_max;
+
+	/* Get the virtual base address for the device */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto err;
+	}
+
+	ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max);
+	if (ret < 0)
+		goto err;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "rx-fifo-depth", &rx_max);
+	if (ret < 0)
+		goto err;
+
+	/* Create a CAN device instance */
+	ndev = alloc_candev(sizeof(struct xcan_priv), tx_max);
+	if (!ndev)
+		return -ENOMEM;
+
+	priv = netdev_priv(ndev);
+	priv->dev = ndev;
+	priv->can.bittiming_const = &xcan_bittiming_const;
+	priv->can.do_set_mode = xcan_do_set_mode;
+	priv->can.do_get_berr_counter = xcan_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
+					CAN_CTRLMODE_BERR_REPORTING;
+	priv->reg_base = addr;
+	priv->tx_max = tx_max;
+
+	/* Get IRQ for the device */
+	ndev->irq = platform_get_irq(pdev, 0);
+	ndev->flags |= IFF_ECHO;	/* We support local echo */
+
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ndev->netdev_ops = &xcan_netdev_ops;
+
+	/* Getting the CAN can_clk info */
+	priv->can_clk = devm_clk_get(&pdev->dev, "can_clk");
+	if (IS_ERR(priv->can_clk)) {
+		dev_err(&pdev->dev, "Device clock not found.\n");
+		ret = PTR_ERR(priv->can_clk);
+		goto err_free;
+	}
+	/* Check for type of CAN device */
+	if (of_device_is_compatible(pdev->dev.of_node,
+				    "xlnx,zynq-can-1.0")) {
+		priv->bus_clk = devm_clk_get(&pdev->dev, "pclk");
+		if (IS_ERR(priv->bus_clk)) {
+			dev_err(&pdev->dev, "bus clock not found\n");
+			ret = PTR_ERR(priv->bus_clk);
+			goto err_free;
+		}
+	} else {
+		priv->bus_clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
+		if (IS_ERR(priv->bus_clk)) {
+			dev_err(&pdev->dev, "bus clock not found\n");
+			ret = PTR_ERR(priv->bus_clk);
+			goto err_free;
+		}
+	}
+
+	ret = clk_prepare_enable(priv->can_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable device clock\n");
+		goto err_free;
+	}
+
+	ret = clk_prepare_enable(priv->bus_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable bus clock\n");
+		goto err_unprepare_disable_dev;
+	}
+
+	priv->write_reg = xcan_write_reg_le;
+	priv->read_reg = xcan_read_reg_le;
+
+	if (priv->read_reg(priv, XCAN_SR_OFFSET) != XCAN_SR_CONFIG_MASK) {
+		priv->write_reg = xcan_write_reg_be;
+		priv->read_reg = xcan_read_reg_be;
+	}
+
+	priv->can.clock.freq = clk_get_rate(priv->can_clk);
+
+	netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
+
+	ret = register_candev(ndev);
+	if (ret) {
+		dev_err(&pdev->dev, "fail to register failed (err=%d)\n", ret);
+		goto err_unprepare_disable_busclk;
+	}
+
+	devm_can_led_init(ndev);
+	clk_disable_unprepare(priv->bus_clk);
+	clk_disable_unprepare(priv->can_clk);
+	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n",
+			priv->reg_base, ndev->irq, priv->can.clock.freq,
+			priv->tx_max);
+
+	return 0;
+
+err_unprepare_disable_busclk:
+	clk_disable_unprepare(priv->bus_clk);
+err_unprepare_disable_dev:
+	clk_disable_unprepare(priv->can_clk);
+err_free:
+	free_candev(ndev);
+err:
+	return ret;
+}
+
+/**
+ * xcan_remove - Unregister the device after releasing the resources
+ * @pdev:	Handle to the platform device structure
+ *
+ * This function frees all the resources allocated to the device.
+ * Return: 0 always
+ */
+static int xcan_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct xcan_priv *priv = netdev_priv(ndev);
+
+	if (set_reset_mode(ndev) < 0)
+		netdev_err(ndev, "mode resetting failed!\n");
+
+	unregister_candev(ndev);
+	netif_napi_del(&priv->napi);
+	free_candev(ndev);
+
+	return 0;
+}
+
+/* Match table for OF platform binding */
+static struct of_device_id xcan_of_match[] = {
+	{ .compatible = "xlnx,zynq-can-1.0", },
+	{ .compatible = "xlnx,axi-can-1.00.a", },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(of, xcan_of_match);
+
+static struct platform_driver xcan_driver = {
+	.probe = xcan_probe,
+	.remove	= xcan_remove,
+	.driver	= {
+		.owner = THIS_MODULE,
+		.name = DRIVER_NAME,
+		.pm = &xcan_dev_pm_ops,
+		.of_match_table	= xcan_of_match,
+	},
+};
+
+module_platform_driver(xcan_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Xilinx Inc");
+MODULE_DESCRIPTION("Xilinx CAN interface");

diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index 41ee5b6..69c4251 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c

@@ -289,7 +289,7 @@
 
 static int mv88e6123_61_65_setup(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int i;
 	int ret;
 

diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c
index dadfafb..953bc6a 100644
--- a/drivers/net/dsa/mv88e6131.c
+++ b/drivers/net/dsa/mv88e6131.c

@@ -155,7 +155,7 @@
 
 static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int addr = REG_PORT(p);
 	u16 val;
 
@@ -274,7 +274,7 @@
 
 static int mv88e6131_setup(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int i;
 	int ret;
 

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 17314ed..9ce2146 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c

@@ -74,7 +74,7 @@
 
 int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
@@ -118,7 +118,7 @@
 
 int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
@@ -256,7 +256,7 @@
 
 static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int ret;
 
 	mutex_lock(&ps->ppu_mutex);
@@ -283,7 +283,7 @@
 
 static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
 	/* Schedule a timer to re-enable the PHY polling unit. */
 	mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
@@ -292,7 +292,7 @@
 
 void mv88e6xxx_ppu_state_init(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
 	mutex_init(&ps->ppu_mutex);
 	INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
@@ -463,7 +463,7 @@
 				 int nr_stats, struct mv88e6xxx_hw_stat *stats,
 				 int port, uint64_t *data)
 {
-	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int ret;
 	int i;
 

diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 35df0b9..a968654 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c

@@ -534,7 +534,7 @@
 	/* The EL3-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	err = register_netdev(dev);
 	if (err) {

diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 063557e..f18647c 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c

@@ -218,7 +218,7 @@
 	dev->netdev_ops = &el3_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 
 	return tc589_config(link);
 }

diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 465cc71..e13b046 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c

@@ -2435,7 +2435,7 @@
 	netif_napi_add(dev, &tp->napi, typhoon_poll, 16);
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &typhoon_ethtool_ops);
+	dev->ethtool_ops = &typhoon_ethtool_ops;
 
 	/* We can handle scatter gather, up to 16 entries, and
 	 * we can do IP checksumming (only version 4, doh...)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 455d4c3..1d162cc 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c

@@ -157,7 +157,7 @@
 
 	/* This check _should_not_ be necessary, omit eventually. */
 	while ((ei_inb(addr + EN0_ISR) & ENISR_RESET) == 0) {
-		if (jiffies - reset_start_time > 2 * HZ / 100) {
+		if (time_after(jiffies, reset_start_time + 2 * HZ / 100)) {
 			netdev_warn(dev, "%s: did not complete.\n", __func__);
 			break;
 		}
@@ -293,7 +293,7 @@
 	dma_start = jiffies;
 
 	while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) {
-		if (jiffies - dma_start > 2 * HZ / 100) {		/* 20ms */
+		if (time_after(jiffies, dma_start + 2 * HZ / 100)) { /* 20ms */
 			netdev_warn(dev, "timeout waiting for Tx RDC.\n");
 			ax_reset_8390(dev);
 			ax_NS8390_init(dev, 1);

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 0513494..edb7186 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig

@@ -68,6 +68,7 @@
 source "drivers/net/ethernet/faraday/Kconfig"
 source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
+source "drivers/net/ethernet/hisilicon/Kconfig"
 source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"

diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 35190e3..58de333 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile

@@ -31,6 +31,7 @@
 obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/
 obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
+obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
 obj-$(CONFIG_NET_VENDOR_HP) += hp/
 obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/

diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 171d73c..40dbbf7 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c

@@ -784,7 +784,7 @@
 
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work);
 

diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 1517e9df..9a6991b 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c

@@ -476,7 +476,7 @@
 	dev->watchdog_timeo = 5*HZ;
 
 	dev->netdev_ops = &ace_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ace_ethtool_ops);
+	dev->ethtool_ops = &ace_ethtool_ops;
 
 	/* we only display this string ONCE */
 	if (!boards_found)

diff --git a/drivers/net/ethernet/altera/altera_sgdma.c b/drivers/net/ethernet/altera/altera_sgdma.c
index 99cc56f..580553d 100644
--- a/drivers/net/ethernet/altera/altera_sgdma.c
+++ b/drivers/net/ethernet/altera/altera_sgdma.c

@@ -353,7 +353,6 @@
 
 	struct sgdma_descrip __iomem *cdesc = &descbase[0];
 	struct sgdma_descrip __iomem *ndesc = &descbase[1];
-
 	struct tse_buffer *rxbuffer = NULL;
 
 	if (!sgdma_rxbusy(priv)) {

diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index 54c25ef..be72e1e 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c

@@ -271,5 +271,5 @@
 
 void altera_tse_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops);
+	netdev->ethtool_ops = &tse_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 562df46..bbaf36d 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig

@@ -7,7 +7,7 @@
 	default y
 	depends on DIO || MACH_DECSTATION || MVME147 || ATARI || SUN3 || \
 		   SUN3X || SBUS || PCI || ZORRO || (ISA && ISA_DMA_API) || \
-		   (ARM && ARCH_EBSA110) || ISA || EISA || PCMCIA
+		   (ARM && ARCH_EBSA110) || ISA || EISA || PCMCIA || ARM64
 	---help---
 	  If you have a network (Ethernet) chipset belonging to this class,
 	  say Y.
@@ -177,4 +177,16 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called sunlance.
 
+config AMD_XGBE
+	tristate "AMD 10GbE Ethernet driver"
+	depends on OF_NET
+	select PHYLIB
+	select AMD_XGBE_PHY
+	---help---
+	  This driver supports the AMD 10GbE Ethernet device found on an
+	  AMD SoC.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called amd-xgbe.
+
 endif # NET_VENDOR_AMD

diff --git a/drivers/net/ethernet/amd/Makefile b/drivers/net/ethernet/amd/Makefile
index cdd4301..a38a2dc 100644
--- a/drivers/net/ethernet/amd/Makefile
+++ b/drivers/net/ethernet/amd/Makefile

@@ -17,3 +17,4 @@
 obj-$(CONFIG_PCNET32) += pcnet32.o
 obj-$(CONFIG_SUN3LANCE) += sun3lance.o
 obj-$(CONFIG_SUNLANCE) += sunlance.o
+obj-$(CONFIG_AMD_XGBE) += xgbe/

diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 26efaaa..068dc7c 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c

@@ -1900,7 +1900,7 @@
 
 	/* Initialize driver entry points */
 	dev->netdev_ops = &amd8111e_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->irq =pdev->irq;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
 	netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);

diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index b08101b..968b7bf 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c

@@ -718,7 +718,6 @@
 	unsigned long mem_start = board + ARIADNE_RAM;
 	struct resource *r1, *r2;
 	struct net_device *dev;
-	struct ariadne_private *priv;
 	u32 serial;
 	int err;
 
@@ -738,8 +737,6 @@
 		return -ENOMEM;
 	}
 
-	priv = netdev_priv(dev);
-
 	r1->name = dev->name;
 	r2->name = dev->name;
 

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index a2bd91e..a78e4c1 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c

@@ -1229,7 +1229,7 @@
 	dev->base_addr = base->start;
 	dev->irq = irq;
 	dev->netdev_ops = &au1000_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops);
+	dev->ethtool_ops = &au1000_ethtool_ops;
 	dev->watchdog_timeo = ETH_TX_TIMEOUT;
 
 	/*

diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c
index 47ce57c..6c9de11 100644
--- a/drivers/net/ethernet/amd/hplance.c
+++ b/drivers/net/ethernet/amd/hplance.c

@@ -27,9 +27,9 @@
 
 #include "hplance.h"
 
-/* We have 16834 bytes of RAM for the init block and buffers. This places
+/* We have 16392 bytes of RAM for the init block and buffers. This places
  * an upper limit on the number of buffers we can use. NetBSD uses 8 Rx
- * buffers and 2 Tx buffers.
+ * buffers and 2 Tx buffers, it takes (8 + 2) * 1544 bytes.
  */
 #define LANCE_LOG_TX_BUFFERS 1
 #define LANCE_LOG_RX_BUFFERS 3

diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c
index 0e8399d..0660ac5 100644
--- a/drivers/net/ethernet/amd/mvme147.c
+++ b/drivers/net/ethernet/amd/mvme147.c

@@ -26,9 +26,9 @@
 #include <asm/pgtable.h>
 #include <asm/mvme147hw.h>
 
-/* We have 16834 bytes of RAM for the init block and buffers. This places
+/* We have 32K of RAM for the init block and buffers. This places
  * an upper limit on the number of buffers we can use. NetBSD uses 8 Rx
- * buffers and 2 Tx buffers.
+ * buffers and 2 Tx buffers, it takes (8 + 2) * 1544 bytes.
  */
 #define LANCE_LOG_TX_BUFFERS 1
 #define LANCE_LOG_RX_BUFFERS 3
@@ -111,7 +111,7 @@
 	       dev->dev_addr);
 
 	lp = netdev_priv(dev);
-	lp->ram = __get_dma_pages(GFP_ATOMIC, 3);	/* 16K */
+	lp->ram = __get_dma_pages(GFP_ATOMIC, 3);	/* 32K */
 	if (!lp->ram) {
 		printk("%s: No memory for LANCE buffers\n", dev->name);
 		free_netdev(dev);

diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 08569fe..abf3b15 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c

@@ -457,7 +457,7 @@
     lp->tx_free_frames=AM2150_MAX_TX_FRAMES;
 
     dev->netdev_ops = &mace_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     return nmclan_config(link);

diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile
new file mode 100644
index 0000000..26cf9af
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/Makefile

@@ -0,0 +1,6 @@
+obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o
+
+amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \
+		 xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o
+
+amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
new file mode 100644
index 0000000..bf462ee8
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h

@@ -0,0 +1,1007 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __XGBE_COMMON_H__
+#define __XGBE_COMMON_H__
+
+/* DMA register offsets */
+#define DMA_MR				0x3000
+#define DMA_SBMR			0x3004
+#define DMA_ISR				0x3008
+#define DMA_AXIARCR			0x3010
+#define DMA_AXIAWCR			0x3018
+#define DMA_DSR0			0x3020
+#define DMA_DSR1			0x3024
+#define DMA_DSR2			0x3028
+#define DMA_DSR3			0x302c
+#define DMA_DSR4			0x3030
+
+/* DMA register entry bit positions and sizes */
+#define DMA_AXIARCR_DRC_INDEX		0
+#define DMA_AXIARCR_DRC_WIDTH		4
+#define DMA_AXIARCR_DRD_INDEX		4
+#define DMA_AXIARCR_DRD_WIDTH		2
+#define DMA_AXIARCR_TEC_INDEX		8
+#define DMA_AXIARCR_TEC_WIDTH		4
+#define DMA_AXIARCR_TED_INDEX		12
+#define DMA_AXIARCR_TED_WIDTH		2
+#define DMA_AXIARCR_THC_INDEX		16
+#define DMA_AXIARCR_THC_WIDTH		4
+#define DMA_AXIARCR_THD_INDEX		20
+#define DMA_AXIARCR_THD_WIDTH		2
+#define DMA_AXIAWCR_DWC_INDEX		0
+#define DMA_AXIAWCR_DWC_WIDTH		4
+#define DMA_AXIAWCR_DWD_INDEX		4
+#define DMA_AXIAWCR_DWD_WIDTH		2
+#define DMA_AXIAWCR_RPC_INDEX		8
+#define DMA_AXIAWCR_RPC_WIDTH		4
+#define DMA_AXIAWCR_RPD_INDEX		12
+#define DMA_AXIAWCR_RPD_WIDTH		2
+#define DMA_AXIAWCR_RHC_INDEX		16
+#define DMA_AXIAWCR_RHC_WIDTH		4
+#define DMA_AXIAWCR_RHD_INDEX		20
+#define DMA_AXIAWCR_RHD_WIDTH		2
+#define DMA_AXIAWCR_TDC_INDEX		24
+#define DMA_AXIAWCR_TDC_WIDTH		4
+#define DMA_AXIAWCR_TDD_INDEX		28
+#define DMA_AXIAWCR_TDD_WIDTH		2
+#define DMA_DSR0_RPS_INDEX		8
+#define DMA_DSR0_RPS_WIDTH		4
+#define DMA_DSR0_TPS_INDEX		12
+#define DMA_DSR0_TPS_WIDTH		4
+#define DMA_ISR_MACIS_INDEX		17
+#define DMA_ISR_MACIS_WIDTH		1
+#define DMA_ISR_MTLIS_INDEX		16
+#define DMA_ISR_MTLIS_WIDTH		1
+#define DMA_MR_SWR_INDEX		0
+#define DMA_MR_SWR_WIDTH		1
+#define DMA_SBMR_EAME_INDEX		11
+#define DMA_SBMR_EAME_WIDTH		1
+#define DMA_SBMR_UNDEF_INDEX		0
+#define DMA_SBMR_UNDEF_WIDTH		1
+
+/* DMA channel register offsets
+ *   Multiple channels can be active.  The first channel has registers
+ *   that begin at 0x3100.  Each subsequent channel has registers that
+ *   are accessed using an offset of 0x80 from the previous channel.
+ */
+#define DMA_CH_BASE			0x3100
+#define DMA_CH_INC			0x80
+
+#define DMA_CH_CR			0x00
+#define DMA_CH_TCR			0x04
+#define DMA_CH_RCR			0x08
+#define DMA_CH_TDLR_HI			0x10
+#define DMA_CH_TDLR_LO			0x14
+#define DMA_CH_RDLR_HI			0x18
+#define DMA_CH_RDLR_LO			0x1c
+#define DMA_CH_TDTR_LO			0x24
+#define DMA_CH_RDTR_LO			0x2c
+#define DMA_CH_TDRLR			0x30
+#define DMA_CH_RDRLR			0x34
+#define DMA_CH_IER			0x38
+#define DMA_CH_RIWT			0x3c
+#define DMA_CH_CATDR_LO			0x44
+#define DMA_CH_CARDR_LO			0x4c
+#define DMA_CH_CATBR_HI			0x50
+#define DMA_CH_CATBR_LO			0x54
+#define DMA_CH_CARBR_HI			0x58
+#define DMA_CH_CARBR_LO			0x5c
+#define DMA_CH_SR			0x60
+
+/* DMA channel register entry bit positions and sizes */
+#define DMA_CH_CR_PBLX8_INDEX		16
+#define DMA_CH_CR_PBLX8_WIDTH		1
+#define DMA_CH_IER_AIE_INDEX		15
+#define DMA_CH_IER_AIE_WIDTH		1
+#define DMA_CH_IER_FBEE_INDEX		12
+#define DMA_CH_IER_FBEE_WIDTH		1
+#define DMA_CH_IER_NIE_INDEX		16
+#define DMA_CH_IER_NIE_WIDTH		1
+#define DMA_CH_IER_RBUE_INDEX		7
+#define DMA_CH_IER_RBUE_WIDTH		1
+#define DMA_CH_IER_RIE_INDEX		6
+#define DMA_CH_IER_RIE_WIDTH		1
+#define DMA_CH_IER_RSE_INDEX		8
+#define DMA_CH_IER_RSE_WIDTH		1
+#define DMA_CH_IER_TBUE_INDEX		2
+#define DMA_CH_IER_TBUE_WIDTH		1
+#define DMA_CH_IER_TIE_INDEX		0
+#define DMA_CH_IER_TIE_WIDTH		1
+#define DMA_CH_IER_TXSE_INDEX		1
+#define DMA_CH_IER_TXSE_WIDTH		1
+#define DMA_CH_RCR_PBL_INDEX		16
+#define DMA_CH_RCR_PBL_WIDTH		6
+#define DMA_CH_RCR_RBSZ_INDEX		1
+#define DMA_CH_RCR_RBSZ_WIDTH		14
+#define DMA_CH_RCR_SR_INDEX		0
+#define DMA_CH_RCR_SR_WIDTH		1
+#define DMA_CH_RIWT_RWT_INDEX		0
+#define DMA_CH_RIWT_RWT_WIDTH		8
+#define DMA_CH_SR_FBE_INDEX		12
+#define DMA_CH_SR_FBE_WIDTH		1
+#define DMA_CH_SR_RBU_INDEX		7
+#define DMA_CH_SR_RBU_WIDTH		1
+#define DMA_CH_SR_RI_INDEX		6
+#define DMA_CH_SR_RI_WIDTH		1
+#define DMA_CH_SR_RPS_INDEX		8
+#define DMA_CH_SR_RPS_WIDTH		1
+#define DMA_CH_SR_TBU_INDEX		2
+#define DMA_CH_SR_TBU_WIDTH		1
+#define DMA_CH_SR_TI_INDEX		0
+#define DMA_CH_SR_TI_WIDTH		1
+#define DMA_CH_SR_TPS_INDEX		1
+#define DMA_CH_SR_TPS_WIDTH		1
+#define DMA_CH_TCR_OSP_INDEX		4
+#define DMA_CH_TCR_OSP_WIDTH		1
+#define DMA_CH_TCR_PBL_INDEX		16
+#define DMA_CH_TCR_PBL_WIDTH		6
+#define DMA_CH_TCR_ST_INDEX		0
+#define DMA_CH_TCR_ST_WIDTH		1
+#define DMA_CH_TCR_TSE_INDEX		12
+#define DMA_CH_TCR_TSE_WIDTH		1
+
+/* DMA channel register values */
+#define DMA_OSP_DISABLE			0x00
+#define DMA_OSP_ENABLE			0x01
+#define DMA_PBL_1			1
+#define DMA_PBL_2			2
+#define DMA_PBL_4			4
+#define DMA_PBL_8			8
+#define DMA_PBL_16			16
+#define DMA_PBL_32			32
+#define DMA_PBL_64			64      /* 8 x 8 */
+#define DMA_PBL_128			128     /* 8 x 16 */
+#define DMA_PBL_256			256     /* 8 x 32 */
+#define DMA_PBL_X8_DISABLE		0x00
+#define DMA_PBL_X8_ENABLE		0x01
+
+
+/* MAC register offsets */
+#define MAC_TCR				0x0000
+#define MAC_RCR				0x0004
+#define MAC_PFR				0x0008
+#define MAC_WTR				0x000c
+#define MAC_HTR0			0x0010
+#define MAC_HTR1			0x0014
+#define MAC_HTR2			0x0018
+#define MAC_HTR3			0x001c
+#define MAC_HTR4			0x0020
+#define MAC_HTR5			0x0024
+#define MAC_HTR6			0x0028
+#define MAC_HTR7			0x002c
+#define MAC_VLANTR			0x0050
+#define MAC_VLANHTR			0x0058
+#define MAC_VLANIR			0x0060
+#define MAC_IVLANIR			0x0064
+#define MAC_RETMR			0x006c
+#define MAC_Q0TFCR			0x0070
+#define MAC_RFCR			0x0090
+#define MAC_RQC0R			0x00a0
+#define MAC_RQC1R			0x00a4
+#define MAC_RQC2R			0x00a8
+#define MAC_RQC3R			0x00ac
+#define MAC_ISR				0x00b0
+#define MAC_IER				0x00b4
+#define MAC_RTSR			0x00b8
+#define MAC_PMTCSR			0x00c0
+#define MAC_RWKPFR			0x00c4
+#define MAC_LPICSR			0x00d0
+#define MAC_LPITCR			0x00d4
+#define MAC_VR				0x0110
+#define MAC_DR				0x0114
+#define MAC_HWF0R			0x011c
+#define MAC_HWF1R			0x0120
+#define MAC_HWF2R			0x0124
+#define MAC_GPIOCR			0x0278
+#define MAC_GPIOSR			0x027c
+#define MAC_MACA0HR			0x0300
+#define MAC_MACA0LR			0x0304
+#define MAC_MACA1HR			0x0308
+#define MAC_MACA1LR			0x030c
+
+#define MAC_QTFCR_INC			4
+#define MAC_MACA_INC			4
+
+/* MAC register entry bit positions and sizes */
+#define MAC_HWF0R_ADDMACADRSEL_INDEX	18
+#define MAC_HWF0R_ADDMACADRSEL_WIDTH	5
+#define MAC_HWF0R_ARPOFFSEL_INDEX	9
+#define MAC_HWF0R_ARPOFFSEL_WIDTH	1
+#define MAC_HWF0R_EEESEL_INDEX		13
+#define MAC_HWF0R_EEESEL_WIDTH		1
+#define MAC_HWF0R_GMIISEL_INDEX		1
+#define MAC_HWF0R_GMIISEL_WIDTH		1
+#define MAC_HWF0R_MGKSEL_INDEX		7
+#define MAC_HWF0R_MGKSEL_WIDTH		1
+#define MAC_HWF0R_MMCSEL_INDEX		8
+#define MAC_HWF0R_MMCSEL_WIDTH		1
+#define MAC_HWF0R_RWKSEL_INDEX		6
+#define MAC_HWF0R_RWKSEL_WIDTH		1
+#define MAC_HWF0R_RXCOESEL_INDEX	16
+#define MAC_HWF0R_RXCOESEL_WIDTH	1
+#define MAC_HWF0R_SAVLANINS_INDEX	27
+#define MAC_HWF0R_SAVLANINS_WIDTH	1
+#define MAC_HWF0R_SMASEL_INDEX		5
+#define MAC_HWF0R_SMASEL_WIDTH		1
+#define MAC_HWF0R_TSSEL_INDEX		12
+#define MAC_HWF0R_TSSEL_WIDTH		1
+#define MAC_HWF0R_TSSTSSEL_INDEX	25
+#define MAC_HWF0R_TSSTSSEL_WIDTH	2
+#define MAC_HWF0R_TXCOESEL_INDEX	14
+#define MAC_HWF0R_TXCOESEL_WIDTH	1
+#define MAC_HWF0R_VLHASH_INDEX		4
+#define MAC_HWF0R_VLHASH_WIDTH		1
+#define MAC_HWF1R_ADVTHWORD_INDEX	13
+#define MAC_HWF1R_ADVTHWORD_WIDTH	1
+#define MAC_HWF1R_DBGMEMA_INDEX		19
+#define MAC_HWF1R_DBGMEMA_WIDTH		1
+#define MAC_HWF1R_DCBEN_INDEX		16
+#define MAC_HWF1R_DCBEN_WIDTH		1
+#define MAC_HWF1R_HASHTBLSZ_INDEX	24
+#define MAC_HWF1R_HASHTBLSZ_WIDTH	3
+#define MAC_HWF1R_L3L4FNUM_INDEX	27
+#define MAC_HWF1R_L3L4FNUM_WIDTH	4
+#define MAC_HWF1R_RSSEN_INDEX		20
+#define MAC_HWF1R_RSSEN_WIDTH		1
+#define MAC_HWF1R_RXFIFOSIZE_INDEX	0
+#define MAC_HWF1R_RXFIFOSIZE_WIDTH	5
+#define MAC_HWF1R_SPHEN_INDEX		17
+#define MAC_HWF1R_SPHEN_WIDTH		1
+#define MAC_HWF1R_TSOEN_INDEX		18
+#define MAC_HWF1R_TSOEN_WIDTH		1
+#define MAC_HWF1R_TXFIFOSIZE_INDEX	6
+#define MAC_HWF1R_TXFIFOSIZE_WIDTH	5
+#define MAC_HWF2R_AUXSNAPNUM_INDEX	28
+#define MAC_HWF2R_AUXSNAPNUM_WIDTH	3
+#define MAC_HWF2R_PPSOUTNUM_INDEX	24
+#define MAC_HWF2R_PPSOUTNUM_WIDTH	3
+#define MAC_HWF2R_RXCHCNT_INDEX		12
+#define MAC_HWF2R_RXCHCNT_WIDTH		4
+#define MAC_HWF2R_RXQCNT_INDEX		0
+#define MAC_HWF2R_RXQCNT_WIDTH		4
+#define MAC_HWF2R_TXCHCNT_INDEX		18
+#define MAC_HWF2R_TXCHCNT_WIDTH		4
+#define MAC_HWF2R_TXQCNT_INDEX		6
+#define MAC_HWF2R_TXQCNT_WIDTH		4
+#define MAC_ISR_MMCRXIS_INDEX		9
+#define MAC_ISR_MMCRXIS_WIDTH		1
+#define MAC_ISR_MMCTXIS_INDEX		10
+#define MAC_ISR_MMCTXIS_WIDTH		1
+#define MAC_ISR_PMTIS_INDEX		4
+#define MAC_ISR_PMTIS_WIDTH		1
+#define MAC_MACA1HR_AE_INDEX		31
+#define MAC_MACA1HR_AE_WIDTH		1
+#define MAC_PFR_HMC_INDEX		2
+#define MAC_PFR_HMC_WIDTH		1
+#define MAC_PFR_HUC_INDEX		1
+#define MAC_PFR_HUC_WIDTH		1
+#define MAC_PFR_PM_INDEX		4
+#define MAC_PFR_PM_WIDTH		1
+#define MAC_PFR_PR_INDEX		0
+#define MAC_PFR_PR_WIDTH		1
+#define MAC_PMTCSR_MGKPKTEN_INDEX	1
+#define MAC_PMTCSR_MGKPKTEN_WIDTH	1
+#define MAC_PMTCSR_PWRDWN_INDEX		0
+#define MAC_PMTCSR_PWRDWN_WIDTH		1
+#define MAC_PMTCSR_RWKFILTRST_INDEX	31
+#define MAC_PMTCSR_RWKFILTRST_WIDTH	1
+#define MAC_PMTCSR_RWKPKTEN_INDEX	2
+#define MAC_PMTCSR_RWKPKTEN_WIDTH	1
+#define MAC_Q0TFCR_PT_INDEX		16
+#define MAC_Q0TFCR_PT_WIDTH		16
+#define MAC_Q0TFCR_TFE_INDEX		1
+#define MAC_Q0TFCR_TFE_WIDTH		1
+#define MAC_RCR_ACS_INDEX		1
+#define MAC_RCR_ACS_WIDTH		1
+#define MAC_RCR_CST_INDEX		2
+#define MAC_RCR_CST_WIDTH		1
+#define MAC_RCR_DCRCC_INDEX		3
+#define MAC_RCR_DCRCC_WIDTH		1
+#define MAC_RCR_IPC_INDEX		9
+#define MAC_RCR_IPC_WIDTH		1
+#define MAC_RCR_JE_INDEX		8
+#define MAC_RCR_JE_WIDTH		1
+#define MAC_RCR_LM_INDEX		10
+#define MAC_RCR_LM_WIDTH		1
+#define MAC_RCR_RE_INDEX		0
+#define MAC_RCR_RE_WIDTH		1
+#define MAC_RFCR_RFE_INDEX		0
+#define MAC_RFCR_RFE_WIDTH		1
+#define MAC_RQC0R_RXQ0EN_INDEX		0
+#define MAC_RQC0R_RXQ0EN_WIDTH		2
+#define MAC_TCR_SS_INDEX		29
+#define MAC_TCR_SS_WIDTH		2
+#define MAC_TCR_TE_INDEX		0
+#define MAC_TCR_TE_WIDTH		1
+#define MAC_VLANTR_DOVLTC_INDEX		20
+#define MAC_VLANTR_DOVLTC_WIDTH		1
+#define MAC_VLANTR_ERSVLM_INDEX		19
+#define MAC_VLANTR_ERSVLM_WIDTH		1
+#define MAC_VLANTR_ESVL_INDEX		18
+#define MAC_VLANTR_ESVL_WIDTH		1
+#define MAC_VLANTR_EVLS_INDEX		21
+#define MAC_VLANTR_EVLS_WIDTH		2
+#define MAC_VLANTR_EVLRXS_INDEX		24
+#define MAC_VLANTR_EVLRXS_WIDTH		1
+#define MAC_VR_DEVID_INDEX		8
+#define MAC_VR_DEVID_WIDTH		8
+#define MAC_VR_SNPSVER_INDEX		0
+#define MAC_VR_SNPSVER_WIDTH		8
+#define MAC_VR_USERVER_INDEX		16
+#define MAC_VR_USERVER_WIDTH		8
+
+/* MMC register offsets */
+#define MMC_CR				0x0800
+#define MMC_RISR			0x0804
+#define MMC_TISR			0x0808
+#define MMC_RIER			0x080c
+#define MMC_TIER			0x0810
+#define MMC_TXOCTETCOUNT_GB_LO		0x0814
+#define MMC_TXOCTETCOUNT_GB_HI		0x0818
+#define MMC_TXFRAMECOUNT_GB_LO		0x081c
+#define MMC_TXFRAMECOUNT_GB_HI		0x0820
+#define MMC_TXBROADCASTFRAMES_G_LO	0x0824
+#define MMC_TXBROADCASTFRAMES_G_HI	0x0828
+#define MMC_TXMULTICASTFRAMES_G_LO	0x082c
+#define MMC_TXMULTICASTFRAMES_G_HI	0x0830
+#define MMC_TX64OCTETS_GB_LO		0x0834
+#define MMC_TX64OCTETS_GB_HI		0x0838
+#define MMC_TX65TO127OCTETS_GB_LO	0x083c
+#define MMC_TX65TO127OCTETS_GB_HI	0x0840
+#define MMC_TX128TO255OCTETS_GB_LO	0x0844
+#define MMC_TX128TO255OCTETS_GB_HI	0x0848
+#define MMC_TX256TO511OCTETS_GB_LO	0x084c
+#define MMC_TX256TO511OCTETS_GB_HI	0x0850
+#define MMC_TX512TO1023OCTETS_GB_LO	0x0854
+#define MMC_TX512TO1023OCTETS_GB_HI	0x0858
+#define MMC_TX1024TOMAXOCTETS_GB_LO	0x085c
+#define MMC_TX1024TOMAXOCTETS_GB_HI	0x0860
+#define MMC_TXUNICASTFRAMES_GB_LO	0x0864
+#define MMC_TXUNICASTFRAMES_GB_HI	0x0868
+#define MMC_TXMULTICASTFRAMES_GB_LO	0x086c
+#define MMC_TXMULTICASTFRAMES_GB_HI	0x0870
+#define MMC_TXBROADCASTFRAMES_GB_LO	0x0874
+#define MMC_TXBROADCASTFRAMES_GB_HI	0x0878
+#define MMC_TXUNDERFLOWERROR_LO		0x087c
+#define MMC_TXUNDERFLOWERROR_HI		0x0880
+#define MMC_TXOCTETCOUNT_G_LO		0x0884
+#define MMC_TXOCTETCOUNT_G_HI		0x0888
+#define MMC_TXFRAMECOUNT_G_LO		0x088c
+#define MMC_TXFRAMECOUNT_G_HI		0x0890
+#define MMC_TXPAUSEFRAMES_LO		0x0894
+#define MMC_TXPAUSEFRAMES_HI		0x0898
+#define MMC_TXVLANFRAMES_G_LO		0x089c
+#define MMC_TXVLANFRAMES_G_HI		0x08a0
+#define MMC_RXFRAMECOUNT_GB_LO		0x0900
+#define MMC_RXFRAMECOUNT_GB_HI		0x0904
+#define MMC_RXOCTETCOUNT_GB_LO		0x0908
+#define MMC_RXOCTETCOUNT_GB_HI		0x090c
+#define MMC_RXOCTETCOUNT_G_LO		0x0910
+#define MMC_RXOCTETCOUNT_G_HI		0x0914
+#define MMC_RXBROADCASTFRAMES_G_LO	0x0918
+#define MMC_RXBROADCASTFRAMES_G_HI	0x091c
+#define MMC_RXMULTICASTFRAMES_G_LO	0x0920
+#define MMC_RXMULTICASTFRAMES_G_HI	0x0924
+#define MMC_RXCRCERROR_LO		0x0928
+#define MMC_RXCRCERROR_HI		0x092c
+#define MMC_RXRUNTERROR			0x0930
+#define MMC_RXJABBERERROR		0x0934
+#define MMC_RXUNDERSIZE_G		0x0938
+#define MMC_RXOVERSIZE_G		0x093c
+#define MMC_RX64OCTETS_GB_LO		0x0940
+#define MMC_RX64OCTETS_GB_HI		0x0944
+#define MMC_RX65TO127OCTETS_GB_LO	0x0948
+#define MMC_RX65TO127OCTETS_GB_HI	0x094c
+#define MMC_RX128TO255OCTETS_GB_LO	0x0950
+#define MMC_RX128TO255OCTETS_GB_HI	0x0954
+#define MMC_RX256TO511OCTETS_GB_LO	0x0958
+#define MMC_RX256TO511OCTETS_GB_HI	0x095c
+#define MMC_RX512TO1023OCTETS_GB_LO	0x0960
+#define MMC_RX512TO1023OCTETS_GB_HI	0x0964
+#define MMC_RX1024TOMAXOCTETS_GB_LO	0x0968
+#define MMC_RX1024TOMAXOCTETS_GB_HI	0x096c
+#define MMC_RXUNICASTFRAMES_G_LO	0x0970
+#define MMC_RXUNICASTFRAMES_G_HI	0x0974
+#define MMC_RXLENGTHERROR_LO		0x0978
+#define MMC_RXLENGTHERROR_HI		0x097c
+#define MMC_RXOUTOFRANGETYPE_LO		0x0980
+#define MMC_RXOUTOFRANGETYPE_HI		0x0984
+#define MMC_RXPAUSEFRAMES_LO		0x0988
+#define MMC_RXPAUSEFRAMES_HI		0x098c
+#define MMC_RXFIFOOVERFLOW_LO		0x0990
+#define MMC_RXFIFOOVERFLOW_HI		0x0994
+#define MMC_RXVLANFRAMES_GB_LO		0x0998
+#define MMC_RXVLANFRAMES_GB_HI		0x099c
+#define MMC_RXWATCHDOGERROR		0x09a0
+
+/* MMC register entry bit positions and sizes */
+#define MMC_CR_CR_INDEX				0
+#define MMC_CR_CR_WIDTH				1
+#define MMC_CR_CSR_INDEX			1
+#define MMC_CR_CSR_WIDTH			1
+#define MMC_CR_ROR_INDEX			2
+#define MMC_CR_ROR_WIDTH			1
+#define MMC_CR_MCF_INDEX			3
+#define MMC_CR_MCF_WIDTH			1
+#define MMC_CR_MCT_INDEX			4
+#define MMC_CR_MCT_WIDTH			2
+#define MMC_RIER_ALL_INTERRUPTS_INDEX		0
+#define MMC_RIER_ALL_INTERRUPTS_WIDTH		23
+#define MMC_RISR_RXFRAMECOUNT_GB_INDEX		0
+#define MMC_RISR_RXFRAMECOUNT_GB_WIDTH		1
+#define MMC_RISR_RXOCTETCOUNT_GB_INDEX		1
+#define MMC_RISR_RXOCTETCOUNT_GB_WIDTH		1
+#define MMC_RISR_RXOCTETCOUNT_G_INDEX		2
+#define MMC_RISR_RXOCTETCOUNT_G_WIDTH		1
+#define MMC_RISR_RXBROADCASTFRAMES_G_INDEX	3
+#define MMC_RISR_RXBROADCASTFRAMES_G_WIDTH	1
+#define MMC_RISR_RXMULTICASTFRAMES_G_INDEX	4
+#define MMC_RISR_RXMULTICASTFRAMES_G_WIDTH	1
+#define MMC_RISR_RXCRCERROR_INDEX		5
+#define MMC_RISR_RXCRCERROR_WIDTH		1
+#define MMC_RISR_RXRUNTERROR_INDEX		6
+#define MMC_RISR_RXRUNTERROR_WIDTH		1
+#define MMC_RISR_RXJABBERERROR_INDEX		7
+#define MMC_RISR_RXJABBERERROR_WIDTH		1
+#define MMC_RISR_RXUNDERSIZE_G_INDEX		8
+#define MMC_RISR_RXUNDERSIZE_G_WIDTH		1
+#define MMC_RISR_RXOVERSIZE_G_INDEX		9
+#define MMC_RISR_RXOVERSIZE_G_WIDTH		1
+#define MMC_RISR_RX64OCTETS_GB_INDEX		10
+#define MMC_RISR_RX64OCTETS_GB_WIDTH		1
+#define MMC_RISR_RX65TO127OCTETS_GB_INDEX	11
+#define MMC_RISR_RX65TO127OCTETS_GB_WIDTH	1
+#define MMC_RISR_RX128TO255OCTETS_GB_INDEX	12
+#define MMC_RISR_RX128TO255OCTETS_GB_WIDTH	1
+#define MMC_RISR_RX256TO511OCTETS_GB_INDEX	13
+#define MMC_RISR_RX256TO511OCTETS_GB_WIDTH	1
+#define MMC_RISR_RX512TO1023OCTETS_GB_INDEX	14
+#define MMC_RISR_RX512TO1023OCTETS_GB_WIDTH	1
+#define MMC_RISR_RX1024TOMAXOCTETS_GB_INDEX	15
+#define MMC_RISR_RX1024TOMAXOCTETS_GB_WIDTH	1
+#define MMC_RISR_RXUNICASTFRAMES_G_INDEX	16
+#define MMC_RISR_RXUNICASTFRAMES_G_WIDTH	1
+#define MMC_RISR_RXLENGTHERROR_INDEX		17
+#define MMC_RISR_RXLENGTHERROR_WIDTH		1
+#define MMC_RISR_RXOUTOFRANGETYPE_INDEX		18
+#define MMC_RISR_RXOUTOFRANGETYPE_WIDTH		1
+#define MMC_RISR_RXPAUSEFRAMES_INDEX		19
+#define MMC_RISR_RXPAUSEFRAMES_WIDTH		1
+#define MMC_RISR_RXFIFOOVERFLOW_INDEX		20
+#define MMC_RISR_RXFIFOOVERFLOW_WIDTH		1
+#define MMC_RISR_RXVLANFRAMES_GB_INDEX		21
+#define MMC_RISR_RXVLANFRAMES_GB_WIDTH		1
+#define MMC_RISR_RXWATCHDOGERROR_INDEX		22
+#define MMC_RISR_RXWATCHDOGERROR_WIDTH		1
+#define MMC_TIER_ALL_INTERRUPTS_INDEX		0
+#define MMC_TIER_ALL_INTERRUPTS_WIDTH		18
+#define MMC_TISR_TXOCTETCOUNT_GB_INDEX		0
+#define MMC_TISR_TXOCTETCOUNT_GB_WIDTH		1
+#define MMC_TISR_TXFRAMECOUNT_GB_INDEX		1
+#define MMC_TISR_TXFRAMECOUNT_GB_WIDTH		1
+#define MMC_TISR_TXBROADCASTFRAMES_G_INDEX	2
+#define MMC_TISR_TXBROADCASTFRAMES_G_WIDTH	1
+#define MMC_TISR_TXMULTICASTFRAMES_G_INDEX	3
+#define MMC_TISR_TXMULTICASTFRAMES_G_WIDTH	1
+#define MMC_TISR_TX64OCTETS_GB_INDEX		4
+#define MMC_TISR_TX64OCTETS_GB_WIDTH		1
+#define MMC_TISR_TX65TO127OCTETS_GB_INDEX	5
+#define MMC_TISR_TX65TO127OCTETS_GB_WIDTH	1
+#define MMC_TISR_TX128TO255OCTETS_GB_INDEX	6
+#define MMC_TISR_TX128TO255OCTETS_GB_WIDTH	1
+#define MMC_TISR_TX256TO511OCTETS_GB_INDEX	7
+#define MMC_TISR_TX256TO511OCTETS_GB_WIDTH	1
+#define MMC_TISR_TX512TO1023OCTETS_GB_INDEX	8
+#define MMC_TISR_TX512TO1023OCTETS_GB_WIDTH	1
+#define MMC_TISR_TX1024TOMAXOCTETS_GB_INDEX	9
+#define MMC_TISR_TX1024TOMAXOCTETS_GB_WIDTH	1
+#define MMC_TISR_TXUNICASTFRAMES_GB_INDEX	10
+#define MMC_TISR_TXUNICASTFRAMES_GB_WIDTH	1
+#define MMC_TISR_TXMULTICASTFRAMES_GB_INDEX	11
+#define MMC_TISR_TXMULTICASTFRAMES_GB_WIDTH	1
+#define MMC_TISR_TXBROADCASTFRAMES_GB_INDEX	12
+#define MMC_TISR_TXBROADCASTFRAMES_GB_WIDTH	1
+#define MMC_TISR_TXUNDERFLOWERROR_INDEX		13
+#define MMC_TISR_TXUNDERFLOWERROR_WIDTH		1
+#define MMC_TISR_TXOCTETCOUNT_G_INDEX		14
+#define MMC_TISR_TXOCTETCOUNT_G_WIDTH		1
+#define MMC_TISR_TXFRAMECOUNT_G_INDEX		15
+#define MMC_TISR_TXFRAMECOUNT_G_WIDTH		1
+#define MMC_TISR_TXPAUSEFRAMES_INDEX		16
+#define MMC_TISR_TXPAUSEFRAMES_WIDTH		1
+#define MMC_TISR_TXVLANFRAMES_G_INDEX		17
+#define MMC_TISR_TXVLANFRAMES_G_WIDTH		1
+
+/* MTL register offsets */
+#define MTL_OMR				0x1000
+#define MTL_FDCR			0x1008
+#define MTL_FDSR			0x100c
+#define MTL_FDDR			0x1010
+#define MTL_ISR				0x1020
+#define MTL_RQDCM0R			0x1030
+#define MTL_TCPM0R			0x1040
+#define MTL_TCPM1R			0x1044
+
+#define MTL_RQDCM_INC			4
+#define MTL_RQDCM_Q_PER_REG		4
+
+/* MTL register entry bit positions and sizes */
+#define MTL_OMR_ETSALG_INDEX		5
+#define MTL_OMR_ETSALG_WIDTH		2
+#define MTL_OMR_RAA_INDEX		2
+#define MTL_OMR_RAA_WIDTH		1
+
+/* MTL queue register offsets
+ *   Multiple queues can be active.  The first queue has registers
+ *   that begin at 0x1100.  Each subsequent queue has registers that
+ *   are accessed using an offset of 0x80 from the previous queue.
+ */
+#define MTL_Q_BASE			0x1100
+#define MTL_Q_INC			0x80
+
+#define MTL_Q_TQOMR			0x00
+#define MTL_Q_TQUR			0x04
+#define MTL_Q_TQDR			0x08
+#define MTL_Q_TCECR			0x10
+#define MTL_Q_TCESR			0x14
+#define MTL_Q_TCQWR			0x18
+#define MTL_Q_RQOMR			0x40
+#define MTL_Q_RQMPOCR			0x44
+#define MTL_Q_RQDR			0x4c
+#define MTL_Q_IER			0x70
+#define MTL_Q_ISR			0x74
+
+/* MTL queue register entry bit positions and sizes */
+#define MTL_Q_TCQWR_QW_INDEX		0
+#define MTL_Q_TCQWR_QW_WIDTH		21
+#define MTL_Q_RQOMR_EHFC_INDEX		7
+#define MTL_Q_RQOMR_EHFC_WIDTH		1
+#define MTL_Q_RQOMR_RFA_INDEX		8
+#define MTL_Q_RQOMR_RFA_WIDTH		3
+#define MTL_Q_RQOMR_RFD_INDEX		13
+#define MTL_Q_RQOMR_RFD_WIDTH		3
+#define MTL_Q_RQOMR_RQS_INDEX		16
+#define MTL_Q_RQOMR_RQS_WIDTH		9
+#define MTL_Q_RQOMR_RSF_INDEX		5
+#define MTL_Q_RQOMR_RSF_WIDTH		1
+#define MTL_Q_RQOMR_RTC_INDEX		0
+#define MTL_Q_RQOMR_RTC_WIDTH		2
+#define MTL_Q_TQOMR_FTQ_INDEX		0
+#define MTL_Q_TQOMR_FTQ_WIDTH		1
+#define MTL_Q_TQOMR_TQS_INDEX		16
+#define MTL_Q_TQOMR_TQS_WIDTH		10
+#define MTL_Q_TQOMR_TSF_INDEX		1
+#define MTL_Q_TQOMR_TSF_WIDTH		1
+#define MTL_Q_TQOMR_TTC_INDEX		4
+#define MTL_Q_TQOMR_TTC_WIDTH		3
+#define MTL_Q_TQOMR_TXQEN_INDEX		2
+#define MTL_Q_TQOMR_TXQEN_WIDTH		2
+
+/* MTL queue register value */
+#define MTL_RSF_DISABLE			0x00
+#define MTL_RSF_ENABLE			0x01
+#define MTL_TSF_DISABLE			0x00
+#define MTL_TSF_ENABLE			0x01
+
+#define MTL_RX_THRESHOLD_64		0x00
+#define MTL_RX_THRESHOLD_96		0x02
+#define MTL_RX_THRESHOLD_128		0x03
+#define MTL_TX_THRESHOLD_32		0x01
+#define MTL_TX_THRESHOLD_64		0x00
+#define MTL_TX_THRESHOLD_96		0x02
+#define MTL_TX_THRESHOLD_128		0x03
+#define MTL_TX_THRESHOLD_192		0x04
+#define MTL_TX_THRESHOLD_256		0x05
+#define MTL_TX_THRESHOLD_384		0x06
+#define MTL_TX_THRESHOLD_512		0x07
+
+#define MTL_ETSALG_WRR			0x00
+#define MTL_ETSALG_WFQ			0x01
+#define MTL_ETSALG_DWRR			0x02
+#define MTL_RAA_SP			0x00
+#define MTL_RAA_WSP			0x01
+
+#define MTL_Q_DISABLED			0x00
+#define MTL_Q_ENABLED			0x02
+
+
+/* MTL traffic class register offsets
+ *   Multiple traffic classes can be active.  The first class has registers
+ *   that begin at 0x1100.  Each subsequent queue has registers that
+ *   are accessed using an offset of 0x80 from the previous queue.
+ */
+#define MTL_TC_BASE			MTL_Q_BASE
+#define MTL_TC_INC			MTL_Q_INC
+
+#define MTL_TC_ETSCR			0x10
+
+/* MTL traffic class register entry bit positions and sizes */
+#define MTL_TC_ETSCR_TSA_INDEX		0
+#define MTL_TC_ETSCR_TSA_WIDTH		2
+
+/* MTL traffic class register value */
+#define MTL_TSA_SP			0x00
+#define MTL_TSA_ETS			0x02
+
+
+/* PCS MMD select register offset
+ *  The MMD select register is used for accessing PCS registers
+ *  when the underlying APB3 interface is using indirect addressing.
+ *  Indirect addressing requires accessing registers in two phases,
+ *  an address phase and a data phase.  The address phases requires
+ *  writing an address selection value to the MMD select regiesters.
+ */
+#define PCS_MMD_SELECT			0xff
+
+
+/* Descriptor/Packet entry bit positions and sizes */
+#define RX_PACKET_ERRORS_CRC_INDEX		2
+#define RX_PACKET_ERRORS_CRC_WIDTH		1
+#define RX_PACKET_ERRORS_FRAME_INDEX		3
+#define RX_PACKET_ERRORS_FRAME_WIDTH		1
+#define RX_PACKET_ERRORS_LENGTH_INDEX		0
+#define RX_PACKET_ERRORS_LENGTH_WIDTH		1
+#define RX_PACKET_ERRORS_OVERRUN_INDEX		1
+#define RX_PACKET_ERRORS_OVERRUN_WIDTH		1
+
+#define RX_PACKET_ATTRIBUTES_CSUM_DONE_INDEX	0
+#define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX	1
+#define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX	2
+#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH	1
+
+#define RX_NORMAL_DESC0_OVT_INDEX		0
+#define RX_NORMAL_DESC0_OVT_WIDTH		16
+#define RX_NORMAL_DESC3_ES_INDEX		15
+#define RX_NORMAL_DESC3_ES_WIDTH		1
+#define RX_NORMAL_DESC3_ETLT_INDEX		16
+#define RX_NORMAL_DESC3_ETLT_WIDTH		4
+#define RX_NORMAL_DESC3_INTE_INDEX		30
+#define RX_NORMAL_DESC3_INTE_WIDTH		1
+#define RX_NORMAL_DESC3_LD_INDEX		28
+#define RX_NORMAL_DESC3_LD_WIDTH		1
+#define RX_NORMAL_DESC3_OWN_INDEX		31
+#define RX_NORMAL_DESC3_OWN_WIDTH		1
+#define RX_NORMAL_DESC3_PL_INDEX		0
+#define RX_NORMAL_DESC3_PL_WIDTH		14
+
+#define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_INDEX	0
+#define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_WIDTH	1
+#define TX_PACKET_ATTRIBUTES_TSO_ENABLE_INDEX	1
+#define TX_PACKET_ATTRIBUTES_TSO_ENABLE_WIDTH	1
+#define TX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX	2
+#define TX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
+
+#define TX_CONTEXT_DESC2_MSS_INDEX		0
+#define TX_CONTEXT_DESC2_MSS_WIDTH		15
+#define TX_CONTEXT_DESC3_CTXT_INDEX		30
+#define TX_CONTEXT_DESC3_CTXT_WIDTH		1
+#define TX_CONTEXT_DESC3_TCMSSV_INDEX		26
+#define TX_CONTEXT_DESC3_TCMSSV_WIDTH		1
+#define TX_CONTEXT_DESC3_VLTV_INDEX		16
+#define TX_CONTEXT_DESC3_VLTV_WIDTH		1
+#define TX_CONTEXT_DESC3_VT_INDEX		0
+#define TX_CONTEXT_DESC3_VT_WIDTH		16
+
+#define TX_NORMAL_DESC2_HL_B1L_INDEX		0
+#define TX_NORMAL_DESC2_HL_B1L_WIDTH		14
+#define TX_NORMAL_DESC2_IC_INDEX		31
+#define TX_NORMAL_DESC2_IC_WIDTH		1
+#define TX_NORMAL_DESC2_VTIR_INDEX		14
+#define TX_NORMAL_DESC2_VTIR_WIDTH		2
+#define TX_NORMAL_DESC3_CIC_INDEX		16
+#define TX_NORMAL_DESC3_CIC_WIDTH		2
+#define TX_NORMAL_DESC3_CPC_INDEX		26
+#define TX_NORMAL_DESC3_CPC_WIDTH		2
+#define TX_NORMAL_DESC3_CTXT_INDEX		30
+#define TX_NORMAL_DESC3_CTXT_WIDTH		1
+#define TX_NORMAL_DESC3_FD_INDEX		29
+#define TX_NORMAL_DESC3_FD_WIDTH		1
+#define TX_NORMAL_DESC3_FL_INDEX		0
+#define TX_NORMAL_DESC3_FL_WIDTH		15
+#define TX_NORMAL_DESC3_LD_INDEX		28
+#define TX_NORMAL_DESC3_LD_WIDTH		1
+#define TX_NORMAL_DESC3_OWN_INDEX		31
+#define TX_NORMAL_DESC3_OWN_WIDTH		1
+#define TX_NORMAL_DESC3_TCPHDRLEN_INDEX		19
+#define TX_NORMAL_DESC3_TCPHDRLEN_WIDTH		4
+#define TX_NORMAL_DESC3_TCPPL_INDEX		0
+#define TX_NORMAL_DESC3_TCPPL_WIDTH		18
+#define TX_NORMAL_DESC3_TSE_INDEX		18
+#define TX_NORMAL_DESC3_TSE_WIDTH		1
+
+#define TX_NORMAL_DESC2_VLAN_INSERT		0x2
+
+/* MDIO undefined or vendor specific registers */
+#ifndef MDIO_AN_COMP_STAT
+#define MDIO_AN_COMP_STAT		0x0030
+#endif
+
+
+/* Bit setting and getting macros
+ *  The get macro will extract the current bit field value from within
+ *  the variable
+ *
+ *  The set macro will clear the current bit field value within the
+ *  variable and then set the bit field of the variable to the
+ *  specified value
+ */
+#define GET_BITS(_var, _index, _width)					\
+	(((_var) >> (_index)) & ((0x1 << (_width)) - 1))
+
+#define SET_BITS(_var, _index, _width, _val)				\
+do {									\
+	(_var) &= ~(((0x1 << (_width)) - 1) << (_index));		\
+	(_var) |= (((_val) & ((0x1 << (_width)) - 1)) << (_index));	\
+} while (0)
+
+#define GET_BITS_LE(_var, _index, _width)				\
+	((le32_to_cpu((_var)) >> (_index)) & ((0x1 << (_width)) - 1))
+
+#define SET_BITS_LE(_var, _index, _width, _val)				\
+do {									\
+	(_var) &= cpu_to_le32(~(((0x1 << (_width)) - 1) << (_index)));	\
+	(_var) |= cpu_to_le32((((_val) &				\
+			      ((0x1 << (_width)) - 1)) << (_index)));	\
+} while (0)
+
+
+/* Bit setting and getting macros based on register fields
+ *  The get macro uses the bit field definitions formed using the input
+ *  names to extract the current bit field value from within the
+ *  variable
+ *
+ *  The set macro uses the bit field definitions formed using the input
+ *  names to set the bit field of the variable to the specified value
+ */
+#define XGMAC_GET_BITS(_var, _prefix, _field)				\
+	GET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH)
+
+#define XGMAC_SET_BITS(_var, _prefix, _field, _val)			\
+	SET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH, (_val))
+
+#define XGMAC_GET_BITS_LE(_var, _prefix, _field)			\
+	GET_BITS_LE((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH)
+
+#define XGMAC_SET_BITS_LE(_var, _prefix, _field, _val)			\
+	SET_BITS_LE((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH, (_val))
+
+
+/* Macros for reading or writing registers
+ *  The ioread macros will get bit fields or full values using the
+ *  register definitions formed using the input names
+ *
+ *  The iowrite macros will set bit fields or full values using the
+ *  register definitions formed using the input names
+ */
+#define XGMAC_IOREAD(_pdata, _reg)					\
+	ioread32((_pdata)->xgmac_regs + _reg)
+
+#define XGMAC_IOREAD_BITS(_pdata, _reg, _field)				\
+	GET_BITS(XGMAC_IOREAD((_pdata), _reg),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XGMAC_IOWRITE(_pdata, _reg, _val)				\
+	iowrite32((_val), (_pdata)->xgmac_regs + _reg)
+
+#define XGMAC_IOWRITE_BITS(_pdata, _reg, _field, _val)			\
+do {									\
+	u32 reg_val = XGMAC_IOREAD((_pdata), _reg);			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XGMAC_IOWRITE((_pdata), _reg, reg_val);				\
+} while (0)
+
+
+/* Macros for reading or writing MTL queue or traffic class registers
+ *  Similar to the standard read and write macros except that the
+ *  base register value is calculated by the queue or traffic class number
+ */
+#define XGMAC_MTL_IOREAD(_pdata, _n, _reg)				\
+	ioread32((_pdata)->xgmac_regs +					\
+		 MTL_Q_BASE + ((_n) * MTL_Q_INC) + _reg)
+
+#define XGMAC_MTL_IOREAD_BITS(_pdata, _n, _reg, _field)			\
+	GET_BITS(XGMAC_MTL_IOREAD((_pdata), (_n), _reg),		\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XGMAC_MTL_IOWRITE(_pdata, _n, _reg, _val)			\
+	iowrite32((_val), (_pdata)->xgmac_regs +			\
+		  MTL_Q_BASE + ((_n) * MTL_Q_INC) + _reg)
+
+#define XGMAC_MTL_IOWRITE_BITS(_pdata, _n, _reg, _field, _val)		\
+do {									\
+	u32 reg_val = XGMAC_MTL_IOREAD((_pdata), (_n), _reg);		\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XGMAC_MTL_IOWRITE((_pdata), (_n), _reg, reg_val);		\
+} while (0)
+
+
+/* Macros for reading or writing DMA channel registers
+ *  Similar to the standard read and write macros except that the
+ *  base register value is obtained from the ring
+ */
+#define XGMAC_DMA_IOREAD(_channel, _reg)				\
+	ioread32((_channel)->dma_regs + _reg)
+
+#define XGMAC_DMA_IOREAD_BITS(_channel, _reg, _field)			\
+	GET_BITS(XGMAC_DMA_IOREAD((_channel), _reg),			\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XGMAC_DMA_IOWRITE(_channel, _reg, _val)				\
+	iowrite32((_val), (_channel)->dma_regs + _reg)
+
+#define XGMAC_DMA_IOWRITE_BITS(_channel, _reg, _field, _val)		\
+do {									\
+	u32 reg_val = XGMAC_DMA_IOREAD((_channel), _reg);		\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XGMAC_DMA_IOWRITE((_channel), _reg, reg_val);			\
+} while (0)
+
+
+/* Macros for building, reading or writing register values or bits
+ * within the register values of XPCS registers.
+ */
+#define XPCS_IOWRITE(_pdata, _off, _val)				\
+	iowrite32(_val, (_pdata)->xpcs_regs + (_off))
+
+#define XPCS_IOREAD(_pdata, _off)					\
+	ioread32((_pdata)->xpcs_regs + (_off))
+
+
+/* Macros for building, reading or writing register values or bits
+ * using MDIO.  Different from above because of the use of standardized
+ * Linux include values.  No shifting is performed with the bit
+ * operations, everything works on mask values.
+ */
+#define XMDIO_READ(_pdata, _mmd, _reg)					\
+	((_pdata)->hw_if.read_mmd_regs((_pdata), 0,			\
+		MII_ADDR_C45 | (_mmd << 16) | ((_reg) & 0xffff)))
+
+#define XMDIO_READ_BITS(_pdata, _mmd, _reg, _mask)			\
+	(XMDIO_READ((_pdata), _mmd, _reg) & _mask)
+
+#define XMDIO_WRITE(_pdata, _mmd, _reg, _val)				\
+	((_pdata)->hw_if.write_mmd_regs((_pdata), 0,			\
+		MII_ADDR_C45 | (_mmd << 16) | ((_reg) & 0xffff), (_val)))
+
+#define XMDIO_WRITE_BITS(_pdata, _mmd, _reg, _mask, _val)		\
+do {									\
+	u32 mmd_val = XMDIO_READ((_pdata), _mmd, _reg);			\
+	mmd_val &= ~_mask;						\
+	mmd_val |= (_val);						\
+	XMDIO_WRITE((_pdata), _mmd, _reg, mmd_val);			\
+} while (0)
+
+#endif

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
new file mode 100644
index 0000000..6bb76d5
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c

@@ -0,0 +1,375 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static ssize_t xgbe_common_read(char __user *buffer, size_t count,
+				loff_t *ppos, unsigned int value)
+{
+	char *buf;
+	ssize_t len;
+
+	if (*ppos != 0)
+		return 0;
+
+	buf = kasprintf(GFP_KERNEL, "0x%08x\n", value);
+	if (!buf)
+		return -ENOMEM;
+
+	if (count < strlen(buf)) {
+		kfree(buf);
+		return -ENOSPC;
+	}
+
+	len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+	kfree(buf);
+
+	return len;
+}
+
+static ssize_t xgbe_common_write(const char __user *buffer, size_t count,
+				 loff_t *ppos, unsigned int *value)
+{
+	char workarea[32];
+	ssize_t len;
+	unsigned int scan_value;
+
+	if (*ppos != 0)
+		return 0;
+
+	if (count >= sizeof(workarea))
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(workarea, sizeof(workarea) - 1, ppos,
+				     buffer, count);
+	if (len < 0)
+		return len;
+
+	workarea[len] = '\0';
+	if (sscanf(workarea, "%x", &scan_value) == 1)
+		*value = scan_value;
+	else
+		return -EIO;
+
+	return len;
+}
+
+static ssize_t xgmac_reg_addr_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xgmac_reg);
+}
+
+static ssize_t xgmac_reg_addr_write(struct file *filp,
+				    const char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_write(buffer, count, ppos,
+				 &pdata->debugfs_xgmac_reg);
+}
+
+static ssize_t xgmac_reg_value_read(struct file *filp, char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+
+	value = XGMAC_IOREAD(pdata, pdata->debugfs_xgmac_reg);
+
+	return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xgmac_reg_value_write(struct file *filp,
+				     const char __user *buffer,
+				     size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+	ssize_t len;
+
+	len = xgbe_common_write(buffer, count, ppos, &value);
+	if (len < 0)
+		return len;
+
+	XGMAC_IOWRITE(pdata, pdata->debugfs_xgmac_reg, value);
+
+	return len;
+}
+
+static const struct file_operations xgmac_reg_addr_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xgmac_reg_addr_read,
+	.write = xgmac_reg_addr_write,
+};
+
+static const struct file_operations xgmac_reg_value_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xgmac_reg_value_read,
+	.write = xgmac_reg_value_write,
+};
+
+static ssize_t xpcs_mmd_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xpcs_mmd);
+}
+
+static ssize_t xpcs_mmd_write(struct file *filp, const char __user *buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_write(buffer, count, ppos,
+				 &pdata->debugfs_xpcs_mmd);
+}
+
+static ssize_t xpcs_reg_addr_read(struct file *filp, char __user *buffer,
+				  size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xpcs_reg);
+}
+
+static ssize_t xpcs_reg_addr_write(struct file *filp, const char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_write(buffer, count, ppos,
+				 &pdata->debugfs_xpcs_reg);
+}
+
+static ssize_t xpcs_reg_value_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+
+	value = pdata->hw_if.read_mmd_regs(pdata, pdata->debugfs_xpcs_mmd,
+					   pdata->debugfs_xpcs_reg);
+
+	return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xpcs_reg_value_write(struct file *filp,
+				    const char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+	ssize_t len;
+
+	len = xgbe_common_write(buffer, count, ppos, &value);
+	if (len < 0)
+		return len;
+
+	pdata->hw_if.write_mmd_regs(pdata, pdata->debugfs_xpcs_mmd,
+				    pdata->debugfs_xpcs_reg, value);
+
+	return len;
+}
+
+static const struct file_operations xpcs_mmd_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xpcs_mmd_read,
+	.write = xpcs_mmd_write,
+};
+
+static const struct file_operations xpcs_reg_addr_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xpcs_reg_addr_read,
+	.write = xpcs_reg_addr_write,
+};
+
+static const struct file_operations xpcs_reg_value_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xpcs_reg_value_read,
+	.write = xpcs_reg_value_write,
+};
+
+void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
+{
+	struct dentry *pfile;
+	char *buf;
+
+	/* Set defaults */
+	pdata->debugfs_xgmac_reg = 0;
+	pdata->debugfs_xpcs_mmd = 1;
+	pdata->debugfs_xpcs_reg = 0;
+
+	buf = kasprintf(GFP_KERNEL, "amd-xgbe-%s", pdata->netdev->name);
+	pdata->xgbe_debugfs = debugfs_create_dir(buf, NULL);
+	if (pdata->xgbe_debugfs == NULL) {
+		netdev_err(pdata->netdev, "debugfs_create_dir failed\n");
+		return;
+	}
+
+	pfile = debugfs_create_file("xgmac_register", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xgmac_reg_addr_fops);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+
+	pfile = debugfs_create_file("xgmac_register_value", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xgmac_reg_value_fops);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+
+	pfile = debugfs_create_file("xpcs_mmd", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xpcs_mmd_fops);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+
+	pfile = debugfs_create_file("xpcs_register", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xpcs_reg_addr_fops);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+
+	pfile = debugfs_create_file("xpcs_register_value", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xpcs_reg_value_fops);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+
+	kfree(buf);
+}
+
+void xgbe_debugfs_exit(struct xgbe_prv_data *pdata)
+{
+	debugfs_remove_recursive(pdata->xgbe_debugfs);
+	pdata->xgbe_debugfs = NULL;
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
new file mode 100644
index 0000000..6f1c859
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c

@@ -0,0 +1,556 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static void xgbe_unmap_skb(struct xgbe_prv_data *, struct xgbe_ring_data *);
+
+static void xgbe_free_ring(struct xgbe_prv_data *pdata,
+			   struct xgbe_ring *ring)
+{
+	struct xgbe_ring_data *rdata;
+	unsigned int i;
+
+	if (!ring)
+		return;
+
+	if (ring->rdata) {
+		for (i = 0; i < ring->rdesc_count; i++) {
+			rdata = GET_DESC_DATA(ring, i);
+			xgbe_unmap_skb(pdata, rdata);
+		}
+
+		kfree(ring->rdata);
+		ring->rdata = NULL;
+	}
+
+	if (ring->rdesc) {
+		dma_free_coherent(pdata->dev,
+				  (sizeof(struct xgbe_ring_desc) *
+				   ring->rdesc_count),
+				  ring->rdesc, ring->rdesc_dma);
+		ring->rdesc = NULL;
+	}
+}
+
+static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	DBGPR("-->xgbe_free_ring_resources\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		xgbe_free_ring(pdata, channel->tx_ring);
+		xgbe_free_ring(pdata, channel->rx_ring);
+	}
+
+	DBGPR("<--xgbe_free_ring_resources\n");
+}
+
+static int xgbe_init_ring(struct xgbe_prv_data *pdata,
+			  struct xgbe_ring *ring, unsigned int rdesc_count)
+{
+	DBGPR("-->xgbe_init_ring\n");
+
+	if (!ring)
+		return 0;
+
+	/* Descriptors */
+	ring->rdesc_count = rdesc_count;
+	ring->rdesc = dma_alloc_coherent(pdata->dev,
+					 (sizeof(struct xgbe_ring_desc) *
+					  rdesc_count), &ring->rdesc_dma,
+					 GFP_KERNEL);
+	if (!ring->rdesc)
+		return -ENOMEM;
+
+	/* Descriptor information */
+	ring->rdata = kcalloc(rdesc_count, sizeof(struct xgbe_ring_data),
+			      GFP_KERNEL);
+	if (!ring->rdata)
+		return -ENOMEM;
+
+	DBGPR("    rdesc=0x%p, rdesc_dma=0x%llx, rdata=0x%p\n",
+	      ring->rdesc, ring->rdesc_dma, ring->rdata);
+
+	DBGPR("<--xgbe_init_ring\n");
+
+	return 0;
+}
+
+static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+	int ret;
+
+	DBGPR("-->xgbe_alloc_ring_resources\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		DBGPR("  %s - tx_ring:\n", channel->name);
+		ret = xgbe_init_ring(pdata, channel->tx_ring,
+				     pdata->tx_desc_count);
+		if (ret) {
+			netdev_alert(pdata->netdev,
+				     "error initializing Tx ring\n");
+			goto err_ring;
+		}
+
+		DBGPR("  %s - rx_ring:\n", channel->name);
+		ret = xgbe_init_ring(pdata, channel->rx_ring,
+				     pdata->rx_desc_count);
+		if (ret) {
+			netdev_alert(pdata->netdev,
+				     "error initializing Tx ring\n");
+			goto err_ring;
+		}
+	}
+
+	DBGPR("<--xgbe_alloc_ring_resources\n");
+
+	return 0;
+
+err_ring:
+	xgbe_free_ring_resources(pdata);
+
+	return ret;
+}
+
+static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	dma_addr_t rdesc_dma;
+	unsigned int i, j;
+
+	DBGPR("-->xgbe_wrapper_tx_descriptor_init\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		ring = channel->tx_ring;
+		if (!ring)
+			break;
+
+		rdesc = ring->rdesc;
+		rdesc_dma = ring->rdesc_dma;
+
+		for (j = 0; j < ring->rdesc_count; j++) {
+			rdata = GET_DESC_DATA(ring, j);
+
+			rdata->rdesc = rdesc;
+			rdata->rdesc_dma = rdesc_dma;
+
+			rdesc++;
+			rdesc_dma += sizeof(struct xgbe_ring_desc);
+		}
+
+		ring->cur = 0;
+		ring->dirty = 0;
+		ring->tx.queue_stopped = 0;
+
+		hw_if->tx_desc_init(channel);
+	}
+
+	DBGPR("<--xgbe_wrapper_tx_descriptor_init\n");
+}
+
+static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
+	struct xgbe_ring_desc *rdesc;
+	struct xgbe_ring_data *rdata;
+	dma_addr_t rdesc_dma, skb_dma;
+	struct sk_buff *skb = NULL;
+	unsigned int i, j;
+
+	DBGPR("-->xgbe_wrapper_rx_descriptor_init\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		ring = channel->rx_ring;
+		if (!ring)
+			break;
+
+		rdesc = ring->rdesc;
+		rdesc_dma = ring->rdesc_dma;
+
+		for (j = 0; j < ring->rdesc_count; j++) {
+			rdata = GET_DESC_DATA(ring, j);
+
+			rdata->rdesc = rdesc;
+			rdata->rdesc_dma = rdesc_dma;
+
+			/* Allocate skb & assign to each rdesc */
+			skb = dev_alloc_skb(pdata->rx_buf_size);
+			if (skb == NULL)
+				break;
+			skb_dma = dma_map_single(pdata->dev, skb->data,
+						 pdata->rx_buf_size,
+						 DMA_FROM_DEVICE);
+			if (dma_mapping_error(pdata->dev, skb_dma)) {
+				netdev_alert(pdata->netdev,
+					     "failed to do the dma map\n");
+				dev_kfree_skb_any(skb);
+				break;
+			}
+			rdata->skb = skb;
+			rdata->skb_dma = skb_dma;
+			rdata->skb_dma_len = pdata->rx_buf_size;
+
+			rdesc++;
+			rdesc_dma += sizeof(struct xgbe_ring_desc);
+		}
+
+		ring->cur = 0;
+		ring->dirty = 0;
+		ring->rx.realloc_index = 0;
+		ring->rx.realloc_threshold = 0;
+
+		hw_if->rx_desc_init(channel);
+	}
+
+	DBGPR("<--xgbe_wrapper_rx_descriptor_init\n");
+}
+
+static void xgbe_unmap_skb(struct xgbe_prv_data *pdata,
+			   struct xgbe_ring_data *rdata)
+{
+	if (rdata->skb_dma) {
+		if (rdata->mapped_as_page) {
+			dma_unmap_page(pdata->dev, rdata->skb_dma,
+				       rdata->skb_dma_len, DMA_TO_DEVICE);
+		} else {
+			dma_unmap_single(pdata->dev, rdata->skb_dma,
+					 rdata->skb_dma_len, DMA_TO_DEVICE);
+		}
+		rdata->skb_dma = 0;
+		rdata->skb_dma_len = 0;
+	}
+
+	if (rdata->skb) {
+		dev_kfree_skb_any(rdata->skb);
+		rdata->skb = NULL;
+	}
+
+	rdata->tso_header = 0;
+	rdata->len = 0;
+	rdata->interrupt = 0;
+	rdata->mapped_as_page = 0;
+}
+
+static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_ring *ring = channel->tx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_packet_data *packet;
+	struct skb_frag_struct *frag;
+	dma_addr_t skb_dma;
+	unsigned int start_index, cur_index;
+	unsigned int offset, tso, vlan, datalen, len;
+	unsigned int i;
+
+	DBGPR("-->xgbe_map_tx_skb: cur = %d\n", ring->cur);
+
+	offset = 0;
+	start_index = ring->cur;
+	cur_index = ring->cur;
+
+	packet = &ring->packet_data;
+	packet->rdesc_count = 0;
+	packet->length = 0;
+
+	tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			     TSO_ENABLE);
+	vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			      VLAN_CTAG);
+
+	/* Save space for a context descriptor if needed */
+	if ((tso && (packet->mss != ring->tx.cur_mss)) ||
+	    (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag)))
+		cur_index++;
+	rdata = GET_DESC_DATA(ring, cur_index);
+
+	if (tso) {
+		DBGPR("  TSO packet\n");
+
+		/* Map the TSO header */
+		skb_dma = dma_map_single(pdata->dev, skb->data,
+					 packet->header_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(pdata->dev, skb_dma)) {
+			netdev_alert(pdata->netdev, "dma_map_single failed\n");
+			goto err_out;
+		}
+		rdata->skb_dma = skb_dma;
+		rdata->skb_dma_len = packet->header_len;
+		rdata->tso_header = 1;
+
+		offset = packet->header_len;
+
+		packet->length += packet->header_len;
+
+		cur_index++;
+		rdata = GET_DESC_DATA(ring, cur_index);
+	}
+
+	/* Map the (remainder of the) packet */
+	for (datalen = skb_headlen(skb) - offset; datalen; ) {
+		len = min_t(unsigned int, datalen, TX_MAX_BUF_SIZE);
+
+		skb_dma = dma_map_single(pdata->dev, skb->data + offset, len,
+					 DMA_TO_DEVICE);
+		if (dma_mapping_error(pdata->dev, skb_dma)) {
+			netdev_alert(pdata->netdev, "dma_map_single failed\n");
+			goto err_out;
+		}
+		rdata->skb_dma = skb_dma;
+		rdata->skb_dma_len = len;
+		DBGPR("  skb data: index=%u, dma=0x%llx, len=%u\n",
+		      cur_index, skb_dma, len);
+
+		datalen -= len;
+		offset += len;
+
+		packet->length += len;
+
+		cur_index++;
+		rdata = GET_DESC_DATA(ring, cur_index);
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		DBGPR("  mapping frag %u\n", i);
+
+		frag = &skb_shinfo(skb)->frags[i];
+		offset = 0;
+
+		for (datalen = skb_frag_size(frag); datalen; ) {
+			len = min_t(unsigned int, datalen, TX_MAX_BUF_SIZE);
+
+			skb_dma = skb_frag_dma_map(pdata->dev, frag, offset,
+						   len, DMA_TO_DEVICE);
+			if (dma_mapping_error(pdata->dev, skb_dma)) {
+				netdev_alert(pdata->netdev,
+					     "skb_frag_dma_map failed\n");
+				goto err_out;
+			}
+			rdata->skb_dma = skb_dma;
+			rdata->skb_dma_len = len;
+			rdata->mapped_as_page = 1;
+			DBGPR("  skb data: index=%u, dma=0x%llx, len=%u\n",
+			      cur_index, skb_dma, len);
+
+			datalen -= len;
+			offset += len;
+
+			packet->length += len;
+
+			cur_index++;
+			rdata = GET_DESC_DATA(ring, cur_index);
+		}
+	}
+
+	/* Save the skb address in the last entry */
+	rdata->skb = skb;
+
+	/* Save the number of descriptor entries used */
+	packet->rdesc_count = cur_index - start_index;
+
+	DBGPR("<--xgbe_map_tx_skb: count=%u\n", packet->rdesc_count);
+
+	return packet->rdesc_count;
+
+err_out:
+	while (start_index < cur_index) {
+		rdata = GET_DESC_DATA(ring, start_index++);
+		xgbe_unmap_skb(pdata, rdata);
+	}
+
+	DBGPR("<--xgbe_map_tx_skb: count=0\n");
+
+	return 0;
+}
+
+static void xgbe_realloc_skb(struct xgbe_channel *channel)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_ring *ring = channel->rx_ring;
+	struct xgbe_ring_data *rdata;
+	struct sk_buff *skb = NULL;
+	dma_addr_t skb_dma;
+	int i;
+
+	DBGPR("-->xgbe_realloc_skb: rx_ring->rx.realloc_index = %u\n",
+	      ring->rx.realloc_index);
+
+	for (i = 0; i < ring->dirty; i++) {
+		rdata = GET_DESC_DATA(ring, ring->rx.realloc_index);
+
+		/* Reset rdata values */
+		xgbe_unmap_skb(pdata, rdata);
+
+		/* Allocate skb & assign to each rdesc */
+		skb = dev_alloc_skb(pdata->rx_buf_size);
+		if (skb == NULL) {
+			netdev_alert(pdata->netdev,
+				     "failed to allocate skb\n");
+			break;
+		}
+		skb_dma = dma_map_single(pdata->dev, skb->data,
+					 pdata->rx_buf_size, DMA_FROM_DEVICE);
+		if (dma_mapping_error(pdata->dev, skb_dma)) {
+			netdev_alert(pdata->netdev,
+				     "failed to do the dma map\n");
+			dev_kfree_skb_any(skb);
+			break;
+		}
+		rdata->skb = skb;
+		rdata->skb_dma = skb_dma;
+		rdata->skb_dma_len = pdata->rx_buf_size;
+
+		hw_if->rx_desc_reset(rdata);
+
+		ring->rx.realloc_index++;
+	}
+	ring->dirty = 0;
+
+	DBGPR("<--xgbe_realloc_skb\n");
+}
+
+void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *desc_if)
+{
+	DBGPR("-->xgbe_init_function_ptrs_desc\n");
+
+	desc_if->alloc_ring_resources = xgbe_alloc_ring_resources;
+	desc_if->free_ring_resources = xgbe_free_ring_resources;
+	desc_if->map_tx_skb = xgbe_map_tx_skb;
+	desc_if->realloc_skb = xgbe_realloc_skb;
+	desc_if->unmap_skb = xgbe_unmap_skb;
+	desc_if->wrapper_tx_desc_init = xgbe_wrapper_tx_descriptor_init;
+	desc_if->wrapper_rx_desc_init = xgbe_wrapper_rx_descriptor_init;
+
+	DBGPR("<--xgbe_init_function_ptrs_desc\n");
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
new file mode 100644
index 0000000..002293b
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c

@@ -0,0 +1,2182 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/phy.h>
+#include <linux/clk.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata,
+				      unsigned int usec)
+{
+	unsigned long rate;
+	unsigned int ret;
+
+	DBGPR("-->xgbe_usec_to_riwt\n");
+
+	rate = clk_get_rate(pdata->sysclock);
+
+	/*
+	 * Convert the input usec value to the watchdog timer value. Each
+	 * watchdog timer value is equivalent to 256 clock cycles.
+	 * Calculate the required value as:
+	 *   ( usec * ( system_clock_mhz / 10^6 ) / 256
+	 */
+	ret = (usec * (rate / 1000000)) / 256;
+
+	DBGPR("<--xgbe_usec_to_riwt\n");
+
+	return ret;
+}
+
+static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata,
+				      unsigned int riwt)
+{
+	unsigned long rate;
+	unsigned int ret;
+
+	DBGPR("-->xgbe_riwt_to_usec\n");
+
+	rate = clk_get_rate(pdata->sysclock);
+
+	/*
+	 * Convert the input watchdog timer value to the usec value. Each
+	 * watchdog timer value is equivalent to 256 clock cycles.
+	 * Calculate the required value as:
+	 *   ( riwt * 256 ) / ( system_clock_mhz / 10^6 )
+	 */
+	ret = (riwt * 256) / (rate / 1000000);
+
+	DBGPR("<--xgbe_riwt_to_usec\n");
+
+	return ret;
+}
+
+static int xgbe_config_pblx8(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++)
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, PBLX8,
+				       pdata->pblx8);
+
+	return 0;
+}
+
+static int xgbe_get_tx_pbl_val(struct xgbe_prv_data *pdata)
+{
+	return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_TCR, PBL);
+}
+
+static int xgbe_config_tx_pbl_val(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, PBL,
+				       pdata->tx_pbl);
+	}
+
+	return 0;
+}
+
+static int xgbe_get_rx_pbl_val(struct xgbe_prv_data *pdata)
+{
+	return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_RCR, PBL);
+}
+
+static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, PBL,
+				       pdata->rx_pbl);
+	}
+
+	return 0;
+}
+
+static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, OSP,
+				       pdata->tx_osp_mode);
+	}
+
+	return 0;
+}
+
+static int xgbe_config_rsf_mode(struct xgbe_prv_data *pdata, unsigned int val)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RSF, val);
+
+	return 0;
+}
+
+static int xgbe_config_tsf_mode(struct xgbe_prv_data *pdata, unsigned int val)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TSF, val);
+
+	return 0;
+}
+
+static int xgbe_config_rx_threshold(struct xgbe_prv_data *pdata,
+				    unsigned int val)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RTC, val);
+
+	return 0;
+}
+
+static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata,
+				    unsigned int val)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TTC, val);
+
+	return 0;
+}
+
+static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RIWT, RWT,
+				       pdata->rx_riwt);
+	}
+
+	return 0;
+}
+
+static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata)
+{
+	return 0;
+}
+
+static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, RBSZ,
+				       pdata->rx_buf_size);
+	}
+}
+
+static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, TSE, 1);
+	}
+}
+
+static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
+{
+	unsigned int max_q_count, q_count;
+	unsigned int reg, reg_val;
+	unsigned int i;
+
+	/* Clear MTL flow control */
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0);
+
+	/* Clear MAC flow control */
+	max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
+	q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt, max_q_count);
+	reg = MAC_Q0TFCR;
+	for (i = 0; i < q_count; i++) {
+		reg_val = XGMAC_IOREAD(pdata, reg);
+		XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 0);
+		XGMAC_IOWRITE(pdata, reg, reg_val);
+
+		reg += MAC_QTFCR_INC;
+	}
+
+	return 0;
+}
+
+static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
+{
+	unsigned int max_q_count, q_count;
+	unsigned int reg, reg_val;
+	unsigned int i;
+
+	/* Set MTL flow control */
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 1);
+
+	/* Set MAC flow control */
+	max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
+	q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt, max_q_count);
+	reg = MAC_Q0TFCR;
+	for (i = 0; i < q_count; i++) {
+		reg_val = XGMAC_IOREAD(pdata, reg);
+
+		/* Enable transmit flow control */
+		XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 1);
+		/* Set pause time */
+		XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, PT, 0xffff);
+
+		XGMAC_IOWRITE(pdata, reg, reg_val);
+
+		reg += MAC_QTFCR_INC;
+	}
+
+	return 0;
+}
+
+static int xgbe_disable_rx_flow_control(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 0);
+
+	return 0;
+}
+
+static int xgbe_enable_rx_flow_control(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 1);
+
+	return 0;
+}
+
+static int xgbe_config_tx_flow_control(struct xgbe_prv_data *pdata)
+{
+	if (pdata->tx_pause)
+		xgbe_enable_tx_flow_control(pdata);
+	else
+		xgbe_disable_tx_flow_control(pdata);
+
+	return 0;
+}
+
+static int xgbe_config_rx_flow_control(struct xgbe_prv_data *pdata)
+{
+	if (pdata->rx_pause)
+		xgbe_enable_rx_flow_control(pdata);
+	else
+		xgbe_disable_rx_flow_control(pdata);
+
+	return 0;
+}
+
+static void xgbe_config_flow_control(struct xgbe_prv_data *pdata)
+{
+	xgbe_config_tx_flow_control(pdata);
+	xgbe_config_rx_flow_control(pdata);
+}
+
+static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int dma_ch_isr, dma_ch_ier;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		/* Clear all the interrupts which are set */
+		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
+
+		/* Clear all interrupt enable bits */
+		dma_ch_ier = 0;
+
+		/* Enable following interrupts
+		 *   NIE  - Normal Interrupt Summary Enable
+		 *   AIE  - Abnormal Interrupt Summary Enable
+		 *   FBEE - Fatal Bus Error Enable
+		 */
+		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, NIE, 1);
+		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, AIE, 1);
+		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, FBEE, 1);
+
+		if (channel->tx_ring) {
+			/* Enable the following Tx interrupts
+			 *   TIE  - Transmit Interrupt Enable (unless polling)
+			 */
+			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
+		}
+		if (channel->rx_ring) {
+			/* Enable following Rx interrupts
+			 *   RBUE - Receive Buffer Unavailable Enable
+			 *   RIE  - Receive Interrupt Enable
+			 */
+			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
+			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+		}
+
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+	}
+}
+
+static void xgbe_enable_mtl_interrupts(struct xgbe_prv_data *pdata)
+{
+	unsigned int mtl_q_isr;
+	unsigned int q_count, i;
+
+	q_count = max(pdata->hw_feat.tx_q_cnt, pdata->hw_feat.rx_q_cnt);
+	for (i = 0; i < q_count; i++) {
+		/* Clear all the interrupts which are set */
+		mtl_q_isr = XGMAC_MTL_IOREAD(pdata, i, MTL_Q_ISR);
+		XGMAC_MTL_IOWRITE(pdata, i, MTL_Q_ISR, mtl_q_isr);
+
+		/* No MTL interrupts to be enabled */
+		XGMAC_MTL_IOWRITE(pdata, i, MTL_Q_ISR, 0);
+	}
+}
+
+static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata)
+{
+	/* No MAC interrupts to be enabled */
+	XGMAC_IOWRITE(pdata, MAC_IER, 0);
+
+	/* Enable all counter interrupts */
+	XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xff);
+	XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xff);
+}
+
+static int xgbe_set_gmii_speed(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x3);
+
+	return 0;
+}
+
+static int xgbe_set_gmii_2500_speed(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x2);
+
+	return 0;
+}
+
+static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0);
+
+	return 0;
+}
+
+static int xgbe_set_promiscuous_mode(struct xgbe_prv_data *pdata,
+				     unsigned int enable)
+{
+	unsigned int val = enable ? 1 : 0;
+
+	if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == val)
+		return 0;
+
+	DBGPR("  %s promiscuous mode\n", enable ? "entering" : "leaving");
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, val);
+
+	return 0;
+}
+
+static int xgbe_set_all_multicast_mode(struct xgbe_prv_data *pdata,
+				       unsigned int enable)
+{
+	unsigned int val = enable ? 1 : 0;
+
+	if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PM) == val)
+		return 0;
+
+	DBGPR("  %s allmulti mode\n", enable ? "entering" : "leaving");
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PM, val);
+
+	return 0;
+}
+
+static int xgbe_set_addn_mac_addrs(struct xgbe_prv_data *pdata,
+				   unsigned int am_mode)
+{
+	struct netdev_hw_addr *ha;
+	unsigned int mac_reg;
+	unsigned int mac_addr_hi, mac_addr_lo;
+	u8 *mac_addr;
+	unsigned int i;
+
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HUC, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HMC, 0);
+
+	i = 0;
+	mac_reg = MAC_MACA1HR;
+
+	netdev_for_each_uc_addr(ha, pdata->netdev) {
+		mac_addr_lo = 0;
+		mac_addr_hi = 0;
+		mac_addr = (u8 *)&mac_addr_lo;
+		mac_addr[0] = ha->addr[0];
+		mac_addr[1] = ha->addr[1];
+		mac_addr[2] = ha->addr[2];
+		mac_addr[3] = ha->addr[3];
+		mac_addr = (u8 *)&mac_addr_hi;
+		mac_addr[0] = ha->addr[4];
+		mac_addr[1] = ha->addr[5];
+
+		DBGPR("  adding unicast address %pM at 0x%04x\n",
+		      ha->addr, mac_reg);
+
+		XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1);
+
+		XGMAC_IOWRITE(pdata, mac_reg, mac_addr_hi);
+		mac_reg += MAC_MACA_INC;
+		XGMAC_IOWRITE(pdata, mac_reg, mac_addr_lo);
+		mac_reg += MAC_MACA_INC;
+
+		i++;
+	}
+
+	if (!am_mode) {
+		netdev_for_each_mc_addr(ha, pdata->netdev) {
+			mac_addr_lo = 0;
+			mac_addr_hi = 0;
+			mac_addr = (u8 *)&mac_addr_lo;
+			mac_addr[0] = ha->addr[0];
+			mac_addr[1] = ha->addr[1];
+			mac_addr[2] = ha->addr[2];
+			mac_addr[3] = ha->addr[3];
+			mac_addr = (u8 *)&mac_addr_hi;
+			mac_addr[0] = ha->addr[4];
+			mac_addr[1] = ha->addr[5];
+
+			DBGPR("  adding multicast address %pM at 0x%04x\n",
+			      ha->addr, mac_reg);
+
+			XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1);
+
+			XGMAC_IOWRITE(pdata, mac_reg, mac_addr_hi);
+			mac_reg += MAC_MACA_INC;
+			XGMAC_IOWRITE(pdata, mac_reg, mac_addr_lo);
+			mac_reg += MAC_MACA_INC;
+
+			i++;
+		}
+	}
+
+	/* Clear remaining additional MAC address entries */
+	for (; i < pdata->hw_feat.addn_mac; i++) {
+		XGMAC_IOWRITE(pdata, mac_reg, 0);
+		mac_reg += MAC_MACA_INC;
+		XGMAC_IOWRITE(pdata, mac_reg, 0);
+		mac_reg += MAC_MACA_INC;
+	}
+
+	return 0;
+}
+
+static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, u8 *addr)
+{
+	unsigned int mac_addr_hi, mac_addr_lo;
+
+	mac_addr_hi = (addr[5] <<  8) | (addr[4] <<  0);
+	mac_addr_lo = (addr[3] << 24) | (addr[2] << 16) |
+		      (addr[1] <<  8) | (addr[0] <<  0);
+
+	XGMAC_IOWRITE(pdata, MAC_MACA0HR, mac_addr_hi);
+	XGMAC_IOWRITE(pdata, MAC_MACA0LR, mac_addr_lo);
+
+	return 0;
+}
+
+static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+			      int mmd_reg)
+{
+	unsigned int mmd_address;
+	int mmd_data;
+
+	if (mmd_reg & MII_ADDR_C45)
+		mmd_address = mmd_reg & ~MII_ADDR_C45;
+	else
+		mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+	/* The PCS registers are accessed using mmio. The underlying APB3
+	 * management interface uses indirect addressing to access the MMD
+	 * register sets. This requires accessing of the PCS register in two
+	 * phases, an address phase and a data phase.
+	 *
+	 * The mmio interface is based on 32-bit offsets and values. All
+	 * register offsets must therefore be adjusted by left shifting the
+	 * offset 2 bits and reading 32 bits of data.
+	 */
+	mutex_lock(&pdata->xpcs_mutex);
+	XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
+	mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2);
+	mutex_unlock(&pdata->xpcs_mutex);
+
+	return mmd_data;
+}
+
+static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+				int mmd_reg, int mmd_data)
+{
+	unsigned int mmd_address;
+
+	if (mmd_reg & MII_ADDR_C45)
+		mmd_address = mmd_reg & ~MII_ADDR_C45;
+	else
+		mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+	/* The PCS registers are accessed using mmio. The underlying APB3
+	 * management interface uses indirect addressing to access the MMD
+	 * register sets. This requires accessing of the PCS register in two
+	 * phases, an address phase and a data phase.
+	 *
+	 * The mmio interface is based on 32-bit offsets and values. All
+	 * register offsets must therefore be adjusted by left shifting the
+	 * offset 2 bits and reading 32 bits of data.
+	 */
+	mutex_lock(&pdata->xpcs_mutex);
+	XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
+	XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data);
+	mutex_unlock(&pdata->xpcs_mutex);
+}
+
+static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc)
+{
+	return !XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN);
+}
+
+static int xgbe_disable_rx_csum(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, IPC, 0);
+
+	return 0;
+}
+
+static int xgbe_enable_rx_csum(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, IPC, 1);
+
+	return 0;
+}
+
+static int xgbe_enable_rx_vlan_stripping(struct xgbe_prv_data *pdata)
+{
+	/* Put the VLAN tag in the Rx descriptor */
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLRXS, 1);
+
+	/* Don't check the VLAN type */
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, DOVLTC, 1);
+
+	/* Check only C-TAG (0x8100) packets */
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ERSVLM, 0);
+
+	/* Don't consider an S-TAG (0x88A8) packet as a VLAN packet */
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ESVL, 0);
+
+	/* Enable VLAN tag stripping */
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0x3);
+
+	return 0;
+}
+
+static int xgbe_disable_rx_vlan_stripping(struct xgbe_prv_data *pdata)
+{
+	XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0);
+
+	return 0;
+}
+
+static void xgbe_tx_desc_reset(struct xgbe_ring_data *rdata)
+{
+	struct xgbe_ring_desc *rdesc = rdata->rdesc;
+
+	/* Reset the Tx descriptor
+	 *   Set buffer 1 (lo) address to zero
+	 *   Set buffer 1 (hi) address to zero
+	 *   Reset all other control bits (IC, TTSE, B2L & B1L)
+	 *   Reset all other control bits (OWN, CTXT, FD, LD, CPC, CIC, etc)
+	 */
+	rdesc->desc0 = 0;
+	rdesc->desc1 = 0;
+	rdesc->desc2 = 0;
+	rdesc->desc3 = 0;
+}
+
+static void xgbe_tx_desc_init(struct xgbe_channel *channel)
+{
+	struct xgbe_ring *ring = channel->tx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	int i;
+	int start_index = ring->cur;
+
+	DBGPR("-->tx_desc_init\n");
+
+	/* Initialze all descriptors */
+	for (i = 0; i < ring->rdesc_count; i++) {
+		rdata = GET_DESC_DATA(ring, i);
+		rdesc = rdata->rdesc;
+
+		/* Initialize Tx descriptor
+		 *   Set buffer 1 (lo) address to zero
+		 *   Set buffer 1 (hi) address to zero
+		 *   Reset all other control bits (IC, TTSE, B2L & B1L)
+		 *   Reset all other control bits (OWN, CTXT, FD, LD, CPC, CIC,
+		 *     etc)
+		 */
+		rdesc->desc0 = 0;
+		rdesc->desc1 = 0;
+		rdesc->desc2 = 0;
+		rdesc->desc3 = 0;
+	}
+
+	/* Make sure everything is written to the descriptor(s) before
+	 * telling the device about them
+	 */
+	wmb();
+
+	/* Update the total number of Tx descriptors */
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDRLR, ring->rdesc_count - 1);
+
+	/* Update the starting address of descriptor ring */
+	rdata = GET_DESC_DATA(ring, start_index);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_HI,
+			  upper_32_bits(rdata->rdesc_dma));
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_LO,
+			  lower_32_bits(rdata->rdesc_dma));
+
+	DBGPR("<--tx_desc_init\n");
+}
+
+static void xgbe_rx_desc_reset(struct xgbe_ring_data *rdata)
+{
+	struct xgbe_ring_desc *rdesc = rdata->rdesc;
+
+	/* Reset the Rx descriptor
+	 *   Set buffer 1 (lo) address to dma address (lo)
+	 *   Set buffer 1 (hi) address to dma address (hi)
+	 *   Set buffer 2 (lo) address to zero
+	 *   Set buffer 2 (hi) address to zero and set control bits
+	 *     OWN and INTE
+	 */
+	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->skb_dma));
+	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->skb_dma));
+	rdesc->desc2 = 0;
+
+	rdesc->desc3 = 0;
+	if (rdata->interrupt)
+		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, 1);
+
+	/* Since the Rx DMA engine is likely running, make sure everything
+	 * is written to the descriptor(s) before setting the OWN bit
+	 * for the descriptor
+	 */
+	wmb();
+
+	XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN, 1);
+
+	/* Make sure ownership is written to the descriptor */
+	wmb();
+}
+
+static void xgbe_rx_desc_init(struct xgbe_channel *channel)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_ring *ring = channel->rx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	unsigned int start_index = ring->cur;
+	unsigned int rx_coalesce, rx_frames;
+	unsigned int i;
+
+	DBGPR("-->rx_desc_init\n");
+
+	rx_coalesce = (pdata->rx_riwt || pdata->rx_frames) ? 1 : 0;
+	rx_frames = pdata->rx_frames;
+
+	/* Initialize all descriptors */
+	for (i = 0; i < ring->rdesc_count; i++) {
+		rdata = GET_DESC_DATA(ring, i);
+		rdesc = rdata->rdesc;
+
+		/* Initialize Rx descriptor
+		 *   Set buffer 1 (lo) address to dma address (lo)
+		 *   Set buffer 1 (hi) address to dma address (hi)
+		 *   Set buffer 2 (lo) address to zero
+		 *   Set buffer 2 (hi) address to zero and set control
+		 *     bits OWN and INTE appropriateley
+		 */
+		rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->skb_dma));
+		rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->skb_dma));
+		rdesc->desc2 = 0;
+		rdesc->desc3 = 0;
+		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN, 1);
+		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, 1);
+		rdata->interrupt = 1;
+		if (rx_coalesce && (!rx_frames || ((i + 1) % rx_frames))) {
+			/* Clear interrupt on completion bit */
+			XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE,
+					  0);
+			rdata->interrupt = 0;
+		}
+	}
+
+	/* Make sure everything is written to the descriptors before
+	 * telling the device about them
+	 */
+	wmb();
+
+	/* Update the total number of Rx descriptors */
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_RDRLR, ring->rdesc_count - 1);
+
+	/* Update the starting address of descriptor ring */
+	rdata = GET_DESC_DATA(ring, start_index);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_HI,
+			  upper_32_bits(rdata->rdesc_dma));
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_LO,
+			  lower_32_bits(rdata->rdesc_dma));
+
+	/* Update the Rx Descriptor Tail Pointer */
+	rdata = GET_DESC_DATA(ring, start_index + ring->rdesc_count - 1);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO,
+			  lower_32_bits(rdata->rdesc_dma));
+
+	DBGPR("<--rx_desc_init\n");
+}
+
+static void xgbe_pre_xmit(struct xgbe_channel *channel)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_ring *ring = channel->tx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	struct xgbe_packet_data *packet = &ring->packet_data;
+	unsigned int csum, tso, vlan;
+	unsigned int tso_context, vlan_context;
+	unsigned int tx_coalesce, tx_frames;
+	int start_index = ring->cur;
+	int i;
+
+	DBGPR("-->xgbe_pre_xmit\n");
+
+	csum = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			      CSUM_ENABLE);
+	tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			     TSO_ENABLE);
+	vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			      VLAN_CTAG);
+
+	if (tso && (packet->mss != ring->tx.cur_mss))
+		tso_context = 1;
+	else
+		tso_context = 0;
+
+	if (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag))
+		vlan_context = 1;
+	else
+		vlan_context = 0;
+
+	tx_coalesce = (pdata->tx_usecs || pdata->tx_frames) ? 1 : 0;
+	tx_frames = pdata->tx_frames;
+	if (tx_coalesce && !channel->tx_timer_active)
+		ring->coalesce_count = 0;
+
+	rdata = GET_DESC_DATA(ring, ring->cur);
+	rdesc = rdata->rdesc;
+
+	/* Create a context descriptor if this is a TSO packet */
+	if (tso_context || vlan_context) {
+		if (tso_context) {
+			DBGPR("  TSO context descriptor, mss=%u\n",
+			      packet->mss);
+
+			/* Set the MSS size */
+			XGMAC_SET_BITS_LE(rdesc->desc2, TX_CONTEXT_DESC2,
+					  MSS, packet->mss);
+
+			/* Mark it as a CONTEXT descriptor */
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3,
+					  CTXT, 1);
+
+			/* Indicate this descriptor contains the MSS */
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3,
+					  TCMSSV, 1);
+
+			ring->tx.cur_mss = packet->mss;
+		}
+
+		if (vlan_context) {
+			DBGPR("  VLAN context descriptor, ctag=%u\n",
+			      packet->vlan_ctag);
+
+			/* Mark it as a CONTEXT descriptor */
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3,
+					  CTXT, 1);
+
+			/* Set the VLAN tag */
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3,
+					  VT, packet->vlan_ctag);
+
+			/* Indicate this descriptor contains the VLAN tag */
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3,
+					  VLTV, 1);
+
+			ring->tx.cur_vlan_ctag = packet->vlan_ctag;
+		}
+
+		ring->cur++;
+		rdata = GET_DESC_DATA(ring, ring->cur);
+		rdesc = rdata->rdesc;
+	}
+
+	/* Update buffer address (for TSO this is the header) */
+	rdesc->desc0 =  cpu_to_le32(lower_32_bits(rdata->skb_dma));
+	rdesc->desc1 =  cpu_to_le32(upper_32_bits(rdata->skb_dma));
+
+	/* Update the buffer length */
+	XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, HL_B1L,
+			  rdata->skb_dma_len);
+
+	/* VLAN tag insertion check */
+	if (vlan)
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, VTIR,
+				  TX_NORMAL_DESC2_VLAN_INSERT);
+
+	/* Set IC bit based on Tx coalescing settings */
+	XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
+	if (tx_coalesce && (!tx_frames ||
+			    (++ring->coalesce_count % tx_frames)))
+		/* Clear IC bit */
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 0);
+
+	/* Mark it as First Descriptor */
+	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, FD, 1);
+
+	/* Mark it as a NORMAL descriptor */
+	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CTXT, 0);
+
+	/* Set OWN bit if not the first descriptor */
+	if (ring->cur != start_index)
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1);
+
+	if (tso) {
+		/* Enable TSO */
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, TSE, 1);
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, TCPPL,
+				  packet->tcp_payload_len);
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, TCPHDRLEN,
+				  packet->tcp_header_len / 4);
+	} else {
+		/* Enable CRC and Pad Insertion */
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CPC, 0);
+
+		/* Enable HW CSUM */
+		if (csum)
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3,
+					  CIC, 0x3);
+
+		/* Set the total length to be transmitted */
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, FL,
+				  packet->length);
+	}
+
+	for (i = ring->cur - start_index + 1; i < packet->rdesc_count; i++) {
+		ring->cur++;
+		rdata = GET_DESC_DATA(ring, ring->cur);
+		rdesc = rdata->rdesc;
+
+		/* Update buffer address */
+		rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->skb_dma));
+		rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->skb_dma));
+
+		/* Update the buffer length */
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, HL_B1L,
+				  rdata->skb_dma_len);
+
+		/* Set IC bit based on Tx coalescing settings */
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
+		if (tx_coalesce && (!tx_frames ||
+				    (++ring->coalesce_count % tx_frames)))
+			/* Clear IC bit */
+			XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 0);
+
+		/* Set OWN bit */
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1);
+
+		/* Mark it as NORMAL descriptor */
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CTXT, 0);
+
+		/* Enable HW CSUM */
+		if (csum)
+			XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3,
+					  CIC, 0x3);
+	}
+
+	/* Set LAST bit for the last descriptor */
+	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, LD, 1);
+
+	/* In case the Tx DMA engine is running, make sure everything
+	 * is written to the descriptor(s) before setting the OWN bit
+	 * for the first descriptor
+	 */
+	wmb();
+
+	/* Set OWN bit for the first descriptor */
+	rdata = GET_DESC_DATA(ring, start_index);
+	rdesc = rdata->rdesc;
+	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1);
+
+#ifdef XGMAC_ENABLE_TX_DESC_DUMP
+	xgbe_dump_tx_desc(ring, start_index, packet->rdesc_count, 1);
+#endif
+
+	/* Make sure ownership is written to the descriptor */
+	wmb();
+
+	/* Issue a poll command to Tx DMA by writing address
+	 * of next immediate free descriptor */
+	ring->cur++;
+	rdata = GET_DESC_DATA(ring, ring->cur);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
+			  lower_32_bits(rdata->rdesc_dma));
+
+	/* Start the Tx coalescing timer */
+	if (tx_coalesce && !channel->tx_timer_active) {
+		channel->tx_timer_active = 1;
+		hrtimer_start(&channel->tx_timer,
+			      ktime_set(0, pdata->tx_usecs * NSEC_PER_USEC),
+			      HRTIMER_MODE_REL);
+	}
+
+	DBGPR("  %s: descriptors %u to %u written\n",
+	      channel->name, start_index & (ring->rdesc_count - 1),
+	      (ring->cur - 1) & (ring->rdesc_count - 1));
+
+	DBGPR("<--xgbe_pre_xmit\n");
+}
+
+static int xgbe_dev_read(struct xgbe_channel *channel)
+{
+	struct xgbe_ring *ring = channel->rx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	struct xgbe_packet_data *packet = &ring->packet_data;
+	unsigned int err, etlt;
+
+	DBGPR("-->xgbe_dev_read: cur = %d\n", ring->cur);
+
+	rdata = GET_DESC_DATA(ring, ring->cur);
+	rdesc = rdata->rdesc;
+
+	/* Check for data availability */
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN))
+		return 1;
+
+#ifdef XGMAC_ENABLE_RX_DESC_DUMP
+	xgbe_dump_rx_desc(ring, rdesc, ring->cur);
+#endif
+
+	/* Get the packet length */
+	rdata->len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+
+	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
+		/* Not all the data has been transferred for this packet */
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       INCOMPLETE, 1);
+		return 0;
+	}
+
+	/* This is the last of the data for this packet */
+	XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+		       INCOMPLETE, 0);
+
+	/* Set checksum done indicator as appropriate */
+	if (channel->pdata->netdev->features & NETIF_F_RXCSUM)
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       CSUM_DONE, 1);
+
+	/* Check for errors (only valid in last descriptor) */
+	err = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ES);
+	etlt = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ETLT);
+	DBGPR("  err=%u, etlt=%#x\n", err, etlt);
+
+	if (!err || (err && !etlt)) {
+		if (etlt == 0x09) {
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       VLAN_CTAG, 1);
+			packet->vlan_ctag = XGMAC_GET_BITS_LE(rdesc->desc0,
+							      RX_NORMAL_DESC0,
+							      OVT);
+			DBGPR("  vlan-ctag=0x%04x\n", packet->vlan_ctag);
+		}
+	} else {
+		if ((etlt == 0x05) || (etlt == 0x06))
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       CSUM_DONE, 0);
+		else
+			XGMAC_SET_BITS(packet->errors, RX_PACKET_ERRORS,
+				       FRAME, 1);
+	}
+
+	DBGPR("<--xgbe_dev_read: %s - descriptor=%u (cur=%d)\n", channel->name,
+	      ring->cur & (ring->rdesc_count - 1), ring->cur);
+
+	return 0;
+}
+
+static int xgbe_is_context_desc(struct xgbe_ring_desc *rdesc)
+{
+	/* Rx and Tx share CTXT bit, so check TDES3.CTXT bit */
+	return XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CTXT);
+}
+
+static int xgbe_is_last_desc(struct xgbe_ring_desc *rdesc)
+{
+	/* Rx and Tx share LD bit, so check TDES3.LD bit */
+	return XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, LD);
+}
+
+static void xgbe_save_interrupt_status(struct xgbe_channel *channel,
+				       enum xgbe_int_state int_state)
+{
+	unsigned int dma_ch_ier;
+
+	if (int_state == XGMAC_INT_STATE_SAVE) {
+		channel->saved_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER);
+		channel->saved_ier &= DMA_INTERRUPT_MASK;
+	} else {
+		dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER);
+		dma_ch_ier |= channel->saved_ier;
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+	}
+}
+
+static int xgbe_enable_int(struct xgbe_channel *channel,
+			   enum xgbe_int int_id)
+{
+	switch (int_id) {
+	case XGMAC_INT_DMA_ISR_DC0IS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TIE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TI:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TIE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TPS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TXSE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TBU:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TBUE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RI:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RIE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RBU:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RBUE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RPS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RSE, 1);
+		break;
+	case XGMAC_INT_DMA_CH_SR_FBE:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, FBEE, 1);
+		break;
+	case XGMAC_INT_DMA_ALL:
+		xgbe_save_interrupt_status(channel, XGMAC_INT_STATE_RESTORE);
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xgbe_disable_int(struct xgbe_channel *channel,
+			    enum xgbe_int int_id)
+{
+	unsigned int dma_ch_ier;
+
+	switch (int_id) {
+	case XGMAC_INT_DMA_ISR_DC0IS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TIE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TI:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TIE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TPS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TXSE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_TBU:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, TBUE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RI:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RIE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RBU:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RBUE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_RPS:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, RSE, 0);
+		break;
+	case XGMAC_INT_DMA_CH_SR_FBE:
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_IER, FBEE, 0);
+		break;
+	case XGMAC_INT_DMA_ALL:
+		xgbe_save_interrupt_status(channel, XGMAC_INT_STATE_SAVE);
+
+		dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER);
+		dma_ch_ier &= ~DMA_INTERRUPT_MASK;
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xgbe_exit(struct xgbe_prv_data *pdata)
+{
+	unsigned int count = 2000;
+
+	DBGPR("-->xgbe_exit\n");
+
+	/* Issue a software reset */
+	XGMAC_IOWRITE_BITS(pdata, DMA_MR, SWR, 1);
+	usleep_range(10, 15);
+
+	/* Poll Until Poll Condition */
+	while (count-- && XGMAC_IOREAD_BITS(pdata, DMA_MR, SWR))
+		usleep_range(500, 600);
+
+	if (!count)
+		return -EBUSY;
+
+	DBGPR("<--xgbe_exit\n");
+
+	return 0;
+}
+
+static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
+{
+	unsigned int i, count;
+
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, FTQ, 1);
+
+	/* Poll Until Poll Condition */
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++) {
+		count = 2000;
+		while (count-- && XGMAC_MTL_IOREAD_BITS(pdata, i,
+							MTL_Q_TQOMR, FTQ))
+			usleep_range(500, 600);
+
+		if (!count)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata)
+{
+	/* Set enhanced addressing mode */
+	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1);
+
+	/* Set the System Bus mode */
+	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1);
+}
+
+static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata)
+{
+	unsigned int arcache, awcache;
+
+	arcache = 0;
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, DMA_ARCACHE_SETTING);
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, DMA_ARDOMAIN_SETTING);
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, DMA_ARCACHE_SETTING);
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, DMA_ARDOMAIN_SETTING);
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, DMA_ARCACHE_SETTING);
+	XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, DMA_ARDOMAIN_SETTING);
+	XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache);
+
+	awcache = 0;
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, DMA_AWCACHE_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, DMA_AWDOMAIN_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, DMA_AWCACHE_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, DMA_AWDOMAIN_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, DMA_AWCACHE_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, DMA_AWDOMAIN_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, DMA_AWCACHE_SETTING);
+	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, DMA_AWDOMAIN_SETTING);
+	XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache);
+}
+
+static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
+{
+	unsigned int i;
+
+	/* Set Tx to weighted round robin scheduling algorithm (when
+	 * traffic class is using ETS algorithm)
+	 */
+	XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_WRR);
+
+	/* Set Tx traffic classes to strict priority algorithm */
+	for (i = 0; i < XGBE_TC_CNT; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, MTL_TSA_SP);
+
+	/* Set Rx to strict priority algorithm */
+	XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP);
+}
+
+static unsigned int xgbe_calculate_per_queue_fifo(unsigned long fifo_size,
+						  unsigned char queue_count)
+{
+	unsigned int q_fifo_size = 0;
+	enum xgbe_mtl_fifo_size p_fifo = XGMAC_MTL_FIFO_SIZE_256;
+
+	/* Calculate Tx/Rx fifo share per queue */
+	switch (fifo_size) {
+	case 0:
+		q_fifo_size = FIFO_SIZE_B(128);
+		break;
+	case 1:
+		q_fifo_size = FIFO_SIZE_B(256);
+		break;
+	case 2:
+		q_fifo_size = FIFO_SIZE_B(512);
+		break;
+	case 3:
+		q_fifo_size = FIFO_SIZE_KB(1);
+		break;
+	case 4:
+		q_fifo_size = FIFO_SIZE_KB(2);
+		break;
+	case 5:
+		q_fifo_size = FIFO_SIZE_KB(4);
+		break;
+	case 6:
+		q_fifo_size = FIFO_SIZE_KB(8);
+		break;
+	case 7:
+		q_fifo_size = FIFO_SIZE_KB(16);
+		break;
+	case 8:
+		q_fifo_size = FIFO_SIZE_KB(32);
+		break;
+	case 9:
+		q_fifo_size = FIFO_SIZE_KB(64);
+		break;
+	case 10:
+		q_fifo_size = FIFO_SIZE_KB(128);
+		break;
+	case 11:
+		q_fifo_size = FIFO_SIZE_KB(256);
+		break;
+	}
+	q_fifo_size = q_fifo_size / queue_count;
+
+	/* Set the queue fifo size programmable value */
+	if (q_fifo_size >= FIFO_SIZE_KB(256))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_256K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(128))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_128K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(64))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_64K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(32))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_32K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(16))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_16K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(8))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_8K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(4))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_4K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(2))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_2K;
+	else if (q_fifo_size >= FIFO_SIZE_KB(1))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_1K;
+	else if (q_fifo_size >= FIFO_SIZE_B(512))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_512;
+	else if (q_fifo_size >= FIFO_SIZE_B(256))
+		p_fifo = XGMAC_MTL_FIFO_SIZE_256;
+
+	return p_fifo;
+}
+
+static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_mtl_fifo_size fifo_size;
+	unsigned int i;
+
+	fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size,
+						  pdata->hw_feat.tx_q_cnt);
+
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size);
+
+	netdev_notice(pdata->netdev, "%d Tx queues, %d byte fifo per queue\n",
+		      pdata->hw_feat.tx_q_cnt, ((fifo_size + 1) * 256));
+}
+
+static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_mtl_fifo_size fifo_size;
+	unsigned int i;
+
+	fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size,
+						  pdata->hw_feat.rx_q_cnt);
+
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size);
+
+	netdev_notice(pdata->netdev, "%d Rx queues, %d byte fifo per queue\n",
+		      pdata->hw_feat.rx_q_cnt, ((fifo_size + 1) * 256));
+}
+
+static void xgbe_config_rx_queue_mapping(struct xgbe_prv_data *pdata)
+{
+	unsigned int i, reg, reg_val;
+	unsigned int q_count = pdata->hw_feat.rx_q_cnt;
+
+	/* Select dynamic mapping of MTL Rx queue to DMA Rx channel */
+	reg = MTL_RQDCM0R;
+	reg_val = 0;
+	for (i = 0; i < q_count;) {
+		reg_val |= (0x80 << ((i++ % MTL_RQDCM_Q_PER_REG) << 3));
+
+		if ((i % MTL_RQDCM_Q_PER_REG) && (i != q_count))
+			continue;
+
+		XGMAC_IOWRITE(pdata, reg, reg_val);
+
+		reg += MTL_RQDCM_INC;
+		reg_val = 0;
+	}
+}
+
+static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++) {
+		/* Activate flow control when less than 4k left in fifo */
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RFA, 2);
+
+		/* De-activate flow control when more than 6k left in fifo */
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RFD, 4);
+	}
+}
+
+static void xgbe_config_mac_address(struct xgbe_prv_data *pdata)
+{
+	xgbe_set_mac_address(pdata, pdata->netdev->dev_addr);
+}
+
+static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata)
+{
+	unsigned int val;
+
+	val = (pdata->netdev->mtu > XGMAC_STD_PACKET_MTU) ? 1 : 0;
+
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, JE, val);
+}
+
+static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata)
+{
+	if (pdata->netdev->features & NETIF_F_RXCSUM)
+		xgbe_enable_rx_csum(pdata);
+	else
+		xgbe_disable_rx_csum(pdata);
+}
+
+static void xgbe_config_vlan_support(struct xgbe_prv_data *pdata)
+{
+	if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		xgbe_enable_rx_vlan_stripping(pdata);
+	else
+		xgbe_disable_rx_vlan_stripping(pdata);
+}
+
+static void xgbe_tx_mmc_int(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_mmc_stats *stats = &pdata->mmc_stats;
+	unsigned int mmc_isr = XGMAC_IOREAD(pdata, MMC_TISR);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXOCTETCOUNT_GB))
+		stats->txoctetcount_gb +=
+			XGMAC_IOREAD(pdata, MMC_TXOCTETCOUNT_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXFRAMECOUNT_GB))
+		stats->txframecount_gb +=
+			XGMAC_IOREAD(pdata, MMC_TXFRAMECOUNT_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXBROADCASTFRAMES_G))
+		stats->txbroadcastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_TXBROADCASTFRAMES_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXMULTICASTFRAMES_G))
+		stats->txmulticastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_TXMULTICASTFRAMES_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX64OCTETS_GB))
+		stats->tx64octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX64OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX65TO127OCTETS_GB))
+		stats->tx65to127octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX65TO127OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX128TO255OCTETS_GB))
+		stats->tx128to255octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX128TO255OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX256TO511OCTETS_GB))
+		stats->tx256to511octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX256TO511OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX512TO1023OCTETS_GB))
+		stats->tx512to1023octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX512TO1023OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX1024TOMAXOCTETS_GB))
+		stats->tx1024tomaxoctets_gb +=
+			XGMAC_IOREAD(pdata, MMC_TX1024TOMAXOCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXUNICASTFRAMES_GB))
+		stats->txunicastframes_gb +=
+			XGMAC_IOREAD(pdata, MMC_TXUNICASTFRAMES_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXMULTICASTFRAMES_GB))
+		stats->txmulticastframes_gb +=
+			XGMAC_IOREAD(pdata, MMC_TXMULTICASTFRAMES_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXBROADCASTFRAMES_GB))
+		stats->txbroadcastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_TXBROADCASTFRAMES_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXUNDERFLOWERROR))
+		stats->txunderflowerror +=
+			XGMAC_IOREAD(pdata, MMC_TXUNDERFLOWERROR_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXOCTETCOUNT_G))
+		stats->txoctetcount_g +=
+			XGMAC_IOREAD(pdata, MMC_TXOCTETCOUNT_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXFRAMECOUNT_G))
+		stats->txframecount_g +=
+			XGMAC_IOREAD(pdata, MMC_TXFRAMECOUNT_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXPAUSEFRAMES))
+		stats->txpauseframes +=
+			XGMAC_IOREAD(pdata, MMC_TXPAUSEFRAMES_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXVLANFRAMES_G))
+		stats->txvlanframes_g +=
+			XGMAC_IOREAD(pdata, MMC_TXVLANFRAMES_G_LO);
+}
+
+static void xgbe_rx_mmc_int(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_mmc_stats *stats = &pdata->mmc_stats;
+	unsigned int mmc_isr = XGMAC_IOREAD(pdata, MMC_RISR);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXFRAMECOUNT_GB))
+		stats->rxframecount_gb +=
+			XGMAC_IOREAD(pdata, MMC_RXFRAMECOUNT_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOCTETCOUNT_GB))
+		stats->rxoctetcount_gb +=
+			XGMAC_IOREAD(pdata, MMC_RXOCTETCOUNT_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOCTETCOUNT_G))
+		stats->rxoctetcount_g +=
+			XGMAC_IOREAD(pdata, MMC_RXOCTETCOUNT_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXBROADCASTFRAMES_G))
+		stats->rxbroadcastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_RXBROADCASTFRAMES_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXMULTICASTFRAMES_G))
+		stats->rxmulticastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_RXMULTICASTFRAMES_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXCRCERROR))
+		stats->rxcrcerror +=
+			XGMAC_IOREAD(pdata, MMC_RXCRCERROR_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXRUNTERROR))
+		stats->rxrunterror +=
+			XGMAC_IOREAD(pdata, MMC_RXRUNTERROR);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXJABBERERROR))
+		stats->rxjabbererror +=
+			XGMAC_IOREAD(pdata, MMC_RXJABBERERROR);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXUNDERSIZE_G))
+		stats->rxundersize_g +=
+			XGMAC_IOREAD(pdata, MMC_RXUNDERSIZE_G);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOVERSIZE_G))
+		stats->rxoversize_g +=
+			XGMAC_IOREAD(pdata, MMC_RXOVERSIZE_G);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX64OCTETS_GB))
+		stats->rx64octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX64OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX65TO127OCTETS_GB))
+		stats->rx65to127octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX65TO127OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX128TO255OCTETS_GB))
+		stats->rx128to255octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX128TO255OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX256TO511OCTETS_GB))
+		stats->rx256to511octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX256TO511OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX512TO1023OCTETS_GB))
+		stats->rx512to1023octets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX512TO1023OCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX1024TOMAXOCTETS_GB))
+		stats->rx1024tomaxoctets_gb +=
+			XGMAC_IOREAD(pdata, MMC_RX1024TOMAXOCTETS_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXUNICASTFRAMES_G))
+		stats->rxunicastframes_g +=
+			XGMAC_IOREAD(pdata, MMC_RXUNICASTFRAMES_G_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXLENGTHERROR))
+		stats->rxlengtherror +=
+			XGMAC_IOREAD(pdata, MMC_RXLENGTHERROR_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOUTOFRANGETYPE))
+		stats->rxoutofrangetype +=
+			XGMAC_IOREAD(pdata, MMC_RXOUTOFRANGETYPE_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXPAUSEFRAMES))
+		stats->rxpauseframes +=
+			XGMAC_IOREAD(pdata, MMC_RXPAUSEFRAMES_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXFIFOOVERFLOW))
+		stats->rxfifooverflow +=
+			XGMAC_IOREAD(pdata, MMC_RXFIFOOVERFLOW_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXVLANFRAMES_GB))
+		stats->rxvlanframes_gb +=
+			XGMAC_IOREAD(pdata, MMC_RXVLANFRAMES_GB_LO);
+
+	if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXWATCHDOGERROR))
+		stats->rxwatchdogerror +=
+			XGMAC_IOREAD(pdata, MMC_RXWATCHDOGERROR);
+}
+
+static void xgbe_read_mmc_stats(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_mmc_stats *stats = &pdata->mmc_stats;
+
+	/* Freeze counters */
+	XGMAC_IOWRITE_BITS(pdata, MMC_CR, MCF, 1);
+
+	stats->txoctetcount_gb +=
+		XGMAC_IOREAD(pdata, MMC_TXOCTETCOUNT_GB_LO);
+
+	stats->txframecount_gb +=
+		XGMAC_IOREAD(pdata, MMC_TXFRAMECOUNT_GB_LO);
+
+	stats->txbroadcastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_TXBROADCASTFRAMES_G_LO);
+
+	stats->txmulticastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_TXMULTICASTFRAMES_G_LO);
+
+	stats->tx64octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX64OCTETS_GB_LO);
+
+	stats->tx65to127octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX65TO127OCTETS_GB_LO);
+
+	stats->tx128to255octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX128TO255OCTETS_GB_LO);
+
+	stats->tx256to511octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX256TO511OCTETS_GB_LO);
+
+	stats->tx512to1023octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX512TO1023OCTETS_GB_LO);
+
+	stats->tx1024tomaxoctets_gb +=
+		XGMAC_IOREAD(pdata, MMC_TX1024TOMAXOCTETS_GB_LO);
+
+	stats->txunicastframes_gb +=
+		XGMAC_IOREAD(pdata, MMC_TXUNICASTFRAMES_GB_LO);
+
+	stats->txmulticastframes_gb +=
+		XGMAC_IOREAD(pdata, MMC_TXMULTICASTFRAMES_GB_LO);
+
+	stats->txbroadcastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_TXBROADCASTFRAMES_GB_LO);
+
+	stats->txunderflowerror +=
+		XGMAC_IOREAD(pdata, MMC_TXUNDERFLOWERROR_LO);
+
+	stats->txoctetcount_g +=
+		XGMAC_IOREAD(pdata, MMC_TXOCTETCOUNT_G_LO);
+
+	stats->txframecount_g +=
+		XGMAC_IOREAD(pdata, MMC_TXFRAMECOUNT_G_LO);
+
+	stats->txpauseframes +=
+		XGMAC_IOREAD(pdata, MMC_TXPAUSEFRAMES_LO);
+
+	stats->txvlanframes_g +=
+		XGMAC_IOREAD(pdata, MMC_TXVLANFRAMES_G_LO);
+
+	stats->rxframecount_gb +=
+		XGMAC_IOREAD(pdata, MMC_RXFRAMECOUNT_GB_LO);
+
+	stats->rxoctetcount_gb +=
+		XGMAC_IOREAD(pdata, MMC_RXOCTETCOUNT_GB_LO);
+
+	stats->rxoctetcount_g +=
+		XGMAC_IOREAD(pdata, MMC_RXOCTETCOUNT_G_LO);
+
+	stats->rxbroadcastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_RXBROADCASTFRAMES_G_LO);
+
+	stats->rxmulticastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_RXMULTICASTFRAMES_G_LO);
+
+	stats->rxcrcerror +=
+		XGMAC_IOREAD(pdata, MMC_RXCRCERROR_LO);
+
+	stats->rxrunterror +=
+		XGMAC_IOREAD(pdata, MMC_RXRUNTERROR);
+
+	stats->rxjabbererror +=
+		XGMAC_IOREAD(pdata, MMC_RXJABBERERROR);
+
+	stats->rxundersize_g +=
+		XGMAC_IOREAD(pdata, MMC_RXUNDERSIZE_G);
+
+	stats->rxoversize_g +=
+		XGMAC_IOREAD(pdata, MMC_RXOVERSIZE_G);
+
+	stats->rx64octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX64OCTETS_GB_LO);
+
+	stats->rx65to127octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX65TO127OCTETS_GB_LO);
+
+	stats->rx128to255octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX128TO255OCTETS_GB_LO);
+
+	stats->rx256to511octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX256TO511OCTETS_GB_LO);
+
+	stats->rx512to1023octets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX512TO1023OCTETS_GB_LO);
+
+	stats->rx1024tomaxoctets_gb +=
+		XGMAC_IOREAD(pdata, MMC_RX1024TOMAXOCTETS_GB_LO);
+
+	stats->rxunicastframes_g +=
+		XGMAC_IOREAD(pdata, MMC_RXUNICASTFRAMES_G_LO);
+
+	stats->rxlengtherror +=
+		XGMAC_IOREAD(pdata, MMC_RXLENGTHERROR_LO);
+
+	stats->rxoutofrangetype +=
+		XGMAC_IOREAD(pdata, MMC_RXOUTOFRANGETYPE_LO);
+
+	stats->rxpauseframes +=
+		XGMAC_IOREAD(pdata, MMC_RXPAUSEFRAMES_LO);
+
+	stats->rxfifooverflow +=
+		XGMAC_IOREAD(pdata, MMC_RXFIFOOVERFLOW_LO);
+
+	stats->rxvlanframes_gb +=
+		XGMAC_IOREAD(pdata, MMC_RXVLANFRAMES_GB_LO);
+
+	stats->rxwatchdogerror +=
+		XGMAC_IOREAD(pdata, MMC_RXWATCHDOGERROR);
+
+	/* Un-freeze counters */
+	XGMAC_IOWRITE_BITS(pdata, MMC_CR, MCF, 0);
+}
+
+static void xgbe_config_mmc(struct xgbe_prv_data *pdata)
+{
+	/* Set counters to reset on read */
+	XGMAC_IOWRITE_BITS(pdata, MMC_CR, ROR, 1);
+
+	/* Reset the counters */
+	XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1);
+}
+
+static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Enable each Tx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+	}
+
+	/* Enable each Tx queue */
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN,
+				       MTL_Q_ENABLED);
+
+	/* Enable MAC Tx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1);
+}
+
+static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Disable MAC Tx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
+
+	/* Disable each Tx queue */
+	for (i = 0; i < pdata->hw_feat.tx_q_cnt; i++)
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
+
+	/* Disable each Tx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+	}
+}
+
+static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int reg_val, i;
+
+	/* Enable each Rx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+	}
+
+	/* Enable each Rx queue */
+	reg_val = 0;
+	for (i = 0; i < pdata->hw_feat.rx_q_cnt; i++)
+		reg_val |= (0x02 << (i << 1));
+	XGMAC_IOWRITE(pdata, MAC_RQC0R, reg_val);
+
+	/* Enable MAC Rx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, DCRCC, 1);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, CST, 1);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, ACS, 1);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, RE, 1);
+}
+
+static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Disable MAC Rx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, DCRCC, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, CST, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, ACS, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, RE, 0);
+
+	/* Disable each Rx queue */
+	XGMAC_IOWRITE(pdata, MAC_RQC0R, 0);
+
+	/* Disable each Rx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+	}
+}
+
+static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Enable each Tx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+	}
+
+	/* Enable MAC Tx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1);
+}
+
+static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Disable MAC Tx */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
+
+	/* Disable each Tx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+	}
+}
+
+static void xgbe_powerup_rx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Enable each Rx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+	}
+}
+
+static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	/* Disable each Rx DMA channel */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+	}
+}
+
+static int xgbe_init(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	int ret;
+
+	DBGPR("-->xgbe_init\n");
+
+	/* Flush Tx queues */
+	ret = xgbe_flush_tx_queues(pdata);
+	if (ret)
+		return ret;
+
+	/*
+	 * Initialize DMA related features
+	 */
+	xgbe_config_dma_bus(pdata);
+	xgbe_config_dma_cache(pdata);
+	xgbe_config_osp_mode(pdata);
+	xgbe_config_pblx8(pdata);
+	xgbe_config_tx_pbl_val(pdata);
+	xgbe_config_rx_pbl_val(pdata);
+	xgbe_config_rx_coalesce(pdata);
+	xgbe_config_tx_coalesce(pdata);
+	xgbe_config_rx_buffer_size(pdata);
+	xgbe_config_tso_mode(pdata);
+	desc_if->wrapper_tx_desc_init(pdata);
+	desc_if->wrapper_rx_desc_init(pdata);
+	xgbe_enable_dma_interrupts(pdata);
+
+	/*
+	 * Initialize MTL related features
+	 */
+	xgbe_config_mtl_mode(pdata);
+	xgbe_config_rx_queue_mapping(pdata);
+	/*TODO: Program the priorities mapped to the Selected Traffic Classes
+		in MTL_TC_Prty_Map0-3 registers */
+	xgbe_config_tsf_mode(pdata, pdata->tx_sf_mode);
+	xgbe_config_rsf_mode(pdata, pdata->rx_sf_mode);
+	xgbe_config_tx_threshold(pdata, pdata->tx_threshold);
+	xgbe_config_rx_threshold(pdata, pdata->rx_threshold);
+	xgbe_config_tx_fifo_size(pdata);
+	xgbe_config_rx_fifo_size(pdata);
+	xgbe_config_flow_control_threshold(pdata);
+	/*TODO: Queue to Traffic Class Mapping (Q2TCMAP) */
+	/*TODO: Error Packet and undersized good Packet forwarding enable
+		(FEP and FUP)
+	 */
+	xgbe_enable_mtl_interrupts(pdata);
+
+	/* Transmit Class Weight */
+	XGMAC_IOWRITE_BITS(pdata, MTL_Q_TCQWR, QW, 0x10);
+
+	/*
+	 * Initialize MAC related features
+	 */
+	xgbe_config_mac_address(pdata);
+	xgbe_config_jumbo_enable(pdata);
+	xgbe_config_flow_control(pdata);
+	xgbe_config_checksum_offload(pdata);
+	xgbe_config_vlan_support(pdata);
+	xgbe_config_mmc(pdata);
+	xgbe_enable_mac_interrupts(pdata);
+
+	DBGPR("<--xgbe_init\n");
+
+	return 0;
+}
+
+void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
+{
+	DBGPR("-->xgbe_init_function_ptrs\n");
+
+	hw_if->tx_complete = xgbe_tx_complete;
+
+	hw_if->set_promiscuous_mode = xgbe_set_promiscuous_mode;
+	hw_if->set_all_multicast_mode = xgbe_set_all_multicast_mode;
+	hw_if->set_addn_mac_addrs = xgbe_set_addn_mac_addrs;
+	hw_if->set_mac_address = xgbe_set_mac_address;
+
+	hw_if->enable_rx_csum = xgbe_enable_rx_csum;
+	hw_if->disable_rx_csum = xgbe_disable_rx_csum;
+
+	hw_if->enable_rx_vlan_stripping = xgbe_enable_rx_vlan_stripping;
+	hw_if->disable_rx_vlan_stripping = xgbe_disable_rx_vlan_stripping;
+
+	hw_if->read_mmd_regs = xgbe_read_mmd_regs;
+	hw_if->write_mmd_regs = xgbe_write_mmd_regs;
+
+	hw_if->set_gmii_speed = xgbe_set_gmii_speed;
+	hw_if->set_gmii_2500_speed = xgbe_set_gmii_2500_speed;
+	hw_if->set_xgmii_speed = xgbe_set_xgmii_speed;
+
+	hw_if->enable_tx = xgbe_enable_tx;
+	hw_if->disable_tx = xgbe_disable_tx;
+	hw_if->enable_rx = xgbe_enable_rx;
+	hw_if->disable_rx = xgbe_disable_rx;
+
+	hw_if->powerup_tx = xgbe_powerup_tx;
+	hw_if->powerdown_tx = xgbe_powerdown_tx;
+	hw_if->powerup_rx = xgbe_powerup_rx;
+	hw_if->powerdown_rx = xgbe_powerdown_rx;
+
+	hw_if->pre_xmit = xgbe_pre_xmit;
+	hw_if->dev_read = xgbe_dev_read;
+	hw_if->enable_int = xgbe_enable_int;
+	hw_if->disable_int = xgbe_disable_int;
+	hw_if->init = xgbe_init;
+	hw_if->exit = xgbe_exit;
+
+	/* Descriptor related Sequences have to be initialized here */
+	hw_if->tx_desc_init = xgbe_tx_desc_init;
+	hw_if->rx_desc_init = xgbe_rx_desc_init;
+	hw_if->tx_desc_reset = xgbe_tx_desc_reset;
+	hw_if->rx_desc_reset = xgbe_rx_desc_reset;
+	hw_if->is_last_desc = xgbe_is_last_desc;
+	hw_if->is_context_desc = xgbe_is_context_desc;
+
+	/* For FLOW ctrl */
+	hw_if->config_tx_flow_control = xgbe_config_tx_flow_control;
+	hw_if->config_rx_flow_control = xgbe_config_rx_flow_control;
+
+	/* For RX coalescing */
+	hw_if->config_rx_coalesce = xgbe_config_rx_coalesce;
+	hw_if->config_tx_coalesce = xgbe_config_tx_coalesce;
+	hw_if->usec_to_riwt = xgbe_usec_to_riwt;
+	hw_if->riwt_to_usec = xgbe_riwt_to_usec;
+
+	/* For RX and TX threshold config */
+	hw_if->config_rx_threshold = xgbe_config_rx_threshold;
+	hw_if->config_tx_threshold = xgbe_config_tx_threshold;
+
+	/* For RX and TX Store and Forward Mode config */
+	hw_if->config_rsf_mode = xgbe_config_rsf_mode;
+	hw_if->config_tsf_mode = xgbe_config_tsf_mode;
+
+	/* For TX DMA Operating on Second Frame config */
+	hw_if->config_osp_mode = xgbe_config_osp_mode;
+
+	/* For RX and TX PBL config */
+	hw_if->config_rx_pbl_val = xgbe_config_rx_pbl_val;
+	hw_if->get_rx_pbl_val = xgbe_get_rx_pbl_val;
+	hw_if->config_tx_pbl_val = xgbe_config_tx_pbl_val;
+	hw_if->get_tx_pbl_val = xgbe_get_tx_pbl_val;
+	hw_if->config_pblx8 = xgbe_config_pblx8;
+
+	/* For MMC statistics support */
+	hw_if->tx_mmc_int = xgbe_tx_mmc_int;
+	hw_if->rx_mmc_int = xgbe_rx_mmc_int;
+	hw_if->read_mmc_stats = xgbe_read_mmc_stats;
+
+	DBGPR("<--xgbe_init_function_ptrs\n");
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
new file mode 100644
index 0000000..cfe3d93
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c

@@ -0,0 +1,1351 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <linux/phy.h>
+#include <net/busy_poll.h>
+#include <linux/clk.h>
+#include <linux/if_ether.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static int xgbe_poll(struct napi_struct *, int);
+static void xgbe_set_rx_mode(struct net_device *);
+
+static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
+{
+	return (ring->rdesc_count - (ring->cur - ring->dirty));
+}
+
+static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
+{
+	unsigned int rx_buf_size;
+
+	if (mtu > XGMAC_JUMBO_PACKET_MTU) {
+		netdev_alert(netdev, "MTU exceeds maximum supported value\n");
+		return -EINVAL;
+	}
+
+	rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	if (rx_buf_size < RX_MIN_BUF_SIZE)
+		rx_buf_size = RX_MIN_BUF_SIZE;
+	rx_buf_size = (rx_buf_size + RX_BUF_ALIGN - 1) & ~(RX_BUF_ALIGN - 1);
+
+	return rx_buf_size;
+}
+
+static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (channel->tx_ring)
+			hw_if->enable_int(channel,
+					  XGMAC_INT_DMA_CH_SR_TI);
+		if (channel->rx_ring)
+			hw_if->enable_int(channel,
+					  XGMAC_INT_DMA_CH_SR_RI);
+	}
+}
+
+static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (channel->tx_ring)
+			hw_if->disable_int(channel,
+					   XGMAC_INT_DMA_CH_SR_TI);
+		if (channel->rx_ring)
+			hw_if->disable_int(channel,
+					   XGMAC_INT_DMA_CH_SR_RI);
+	}
+}
+
+static irqreturn_t xgbe_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = data;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
+	unsigned int dma_isr, dma_ch_isr;
+	unsigned int mac_isr;
+	unsigned int i;
+
+	/* The DMA interrupt status register also reports MAC and MTL
+	 * interrupts. So for polling mode, we just need to check for
+	 * this register to be non-zero
+	 */
+	dma_isr = XGMAC_IOREAD(pdata, DMA_ISR);
+	if (!dma_isr)
+		goto isr_done;
+
+	DBGPR("-->xgbe_isr\n");
+
+	DBGPR("  DMA_ISR = %08x\n", dma_isr);
+	DBGPR("  DMA_DS0 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR0));
+	DBGPR("  DMA_DS1 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR1));
+
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!(dma_isr & (1 << i)))
+			continue;
+
+		channel = pdata->channel + i;
+
+		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
+		DBGPR("  DMA_CH%u_ISR = %08x\n", i, dma_ch_isr);
+
+		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, TI) ||
+		    XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RI)) {
+			if (napi_schedule_prep(&pdata->napi)) {
+				/* Disable Tx and Rx interrupts */
+				xgbe_disable_rx_tx_ints(pdata);
+
+				/* Turn on polling */
+				__napi_schedule(&pdata->napi);
+			}
+		}
+
+		/* Restart the device on a Fatal Bus Error */
+		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE))
+			schedule_work(&pdata->restart_work);
+
+		/* Clear all interrupt signals */
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
+	}
+
+	if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) {
+		mac_isr = XGMAC_IOREAD(pdata, MAC_ISR);
+
+		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS))
+			hw_if->tx_mmc_int(pdata);
+
+		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCRXIS))
+			hw_if->rx_mmc_int(pdata);
+	}
+
+	DBGPR("  DMA_ISR = %08x\n", XGMAC_IOREAD(pdata, DMA_ISR));
+
+	DBGPR("<--xgbe_isr\n");
+
+isr_done:
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart xgbe_tx_timer(struct hrtimer *timer)
+{
+	struct xgbe_channel *channel = container_of(timer,
+						    struct xgbe_channel,
+						    tx_timer);
+	struct xgbe_ring *ring = channel->tx_ring;
+	struct xgbe_prv_data *pdata = channel->pdata;
+	unsigned long flags;
+
+	DBGPR("-->xgbe_tx_timer\n");
+
+	spin_lock_irqsave(&ring->lock, flags);
+
+	if (napi_schedule_prep(&pdata->napi)) {
+		/* Disable Tx and Rx interrupts */
+		xgbe_disable_rx_tx_ints(pdata);
+
+		/* Turn on polling */
+		__napi_schedule(&pdata->napi);
+	}
+
+	channel->tx_timer_active = 0;
+
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	DBGPR("<--xgbe_tx_timer\n");
+
+	return HRTIMER_NORESTART;
+}
+
+static void xgbe_init_tx_timers(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	DBGPR("-->xgbe_init_tx_timers\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		DBGPR("  %s adding tx timer\n", channel->name);
+		hrtimer_init(&channel->tx_timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL);
+		channel->tx_timer.function = xgbe_tx_timer;
+	}
+
+	DBGPR("<--xgbe_init_tx_timers\n");
+}
+
+static void xgbe_stop_tx_timers(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	DBGPR("-->xgbe_stop_tx_timers\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		DBGPR("  %s deleting tx timer\n", channel->name);
+		channel->tx_timer_active = 0;
+		hrtimer_cancel(&channel->tx_timer);
+	}
+
+	DBGPR("<--xgbe_stop_tx_timers\n");
+}
+
+void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
+{
+	unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
+	struct xgbe_hw_features *hw_feat = &pdata->hw_feat;
+
+	DBGPR("-->xgbe_get_all_hw_features\n");
+
+	mac_hfr0 = XGMAC_IOREAD(pdata, MAC_HWF0R);
+	mac_hfr1 = XGMAC_IOREAD(pdata, MAC_HWF1R);
+	mac_hfr2 = XGMAC_IOREAD(pdata, MAC_HWF2R);
+
+	memset(hw_feat, 0, sizeof(*hw_feat));
+
+	/* Hardware feature register 0 */
+	hw_feat->gmii        = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, GMIISEL);
+	hw_feat->vlhash      = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, VLHASH);
+	hw_feat->sma         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SMASEL);
+	hw_feat->rwk         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RWKSEL);
+	hw_feat->mgk         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MGKSEL);
+	hw_feat->mmc         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MMCSEL);
+	hw_feat->aoe         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, ARPOFFSEL);
+	hw_feat->ts          = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSEL);
+	hw_feat->eee         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, EEESEL);
+	hw_feat->tx_coe      = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TXCOESEL);
+	hw_feat->rx_coe      = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RXCOESEL);
+	hw_feat->addn_mac    = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R,
+					      ADDMACADRSEL);
+	hw_feat->ts_src      = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSTSSEL);
+	hw_feat->sa_vlan_ins = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SAVLANINS);
+
+	/* Hardware feature register 1 */
+	hw_feat->rx_fifo_size  = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R,
+						RXFIFOSIZE);
+	hw_feat->tx_fifo_size  = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R,
+						TXFIFOSIZE);
+	hw_feat->dcb           = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, DCBEN);
+	hw_feat->sph           = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, SPHEN);
+	hw_feat->tso           = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, TSOEN);
+	hw_feat->dma_debug     = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, DBGMEMA);
+	hw_feat->hash_table_size = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R,
+						  HASHTBLSZ);
+	hw_feat->l3l4_filter_num = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R,
+						  L3L4FNUM);
+
+	/* Hardware feature register 2 */
+	hw_feat->rx_q_cnt     = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, RXQCNT);
+	hw_feat->tx_q_cnt     = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, TXQCNT);
+	hw_feat->rx_ch_cnt    = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, RXCHCNT);
+	hw_feat->tx_ch_cnt    = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, TXCHCNT);
+	hw_feat->pps_out_num  = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, PPSOUTNUM);
+	hw_feat->aux_snap_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, AUXSNAPNUM);
+
+	/* The Queue and Channel counts are zero based so increment them
+	 * to get the actual number
+	 */
+	hw_feat->rx_q_cnt++;
+	hw_feat->tx_q_cnt++;
+	hw_feat->rx_ch_cnt++;
+	hw_feat->tx_ch_cnt++;
+
+	DBGPR("<--xgbe_get_all_hw_features\n");
+}
+
+static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
+{
+	if (add)
+		netif_napi_add(pdata->netdev, &pdata->napi, xgbe_poll,
+			       NAPI_POLL_WEIGHT);
+	napi_enable(&pdata->napi);
+}
+
+static void xgbe_napi_disable(struct xgbe_prv_data *pdata)
+{
+	napi_disable(&pdata->napi);
+}
+
+void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+
+	DBGPR("-->xgbe_init_tx_coalesce\n");
+
+	pdata->tx_usecs = XGMAC_INIT_DMA_TX_USECS;
+	pdata->tx_frames = XGMAC_INIT_DMA_TX_FRAMES;
+
+	hw_if->config_tx_coalesce(pdata);
+
+	DBGPR("<--xgbe_init_tx_coalesce\n");
+}
+
+void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+
+	DBGPR("-->xgbe_init_rx_coalesce\n");
+
+	pdata->rx_riwt = hw_if->usec_to_riwt(pdata, XGMAC_INIT_DMA_RX_USECS);
+	pdata->rx_frames = XGMAC_INIT_DMA_RX_FRAMES;
+
+	hw_if->config_rx_coalesce(pdata);
+
+	DBGPR("<--xgbe_init_rx_coalesce\n");
+}
+
+static void xgbe_free_tx_skbuff(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
+	struct xgbe_ring_data *rdata;
+	unsigned int i, j;
+
+	DBGPR("-->xgbe_free_tx_skbuff\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		ring = channel->tx_ring;
+		if (!ring)
+			break;
+
+		for (j = 0; j < ring->rdesc_count; j++) {
+			rdata = GET_DESC_DATA(ring, j);
+			desc_if->unmap_skb(pdata, rdata);
+		}
+	}
+
+	DBGPR("<--xgbe_free_tx_skbuff\n");
+}
+
+static void xgbe_free_rx_skbuff(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
+	struct xgbe_ring_data *rdata;
+	unsigned int i, j;
+
+	DBGPR("-->xgbe_free_rx_skbuff\n");
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		ring = channel->rx_ring;
+		if (!ring)
+			break;
+
+		for (j = 0; j < ring->rdesc_count; j++) {
+			rdata = GET_DESC_DATA(ring, j);
+			desc_if->unmap_skb(pdata, rdata);
+		}
+	}
+
+	DBGPR("<--xgbe_free_rx_skbuff\n");
+}
+
+int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned long flags;
+
+	DBGPR("-->xgbe_powerdown\n");
+
+	if (!netif_running(netdev) ||
+	    (caller == XGMAC_IOCTL_CONTEXT && pdata->power_down)) {
+		netdev_alert(netdev, "Device is already powered down\n");
+		DBGPR("<--xgbe_powerdown\n");
+		return -EINVAL;
+	}
+
+	phy_stop(pdata->phydev);
+
+	spin_lock_irqsave(&pdata->lock, flags);
+
+	if (caller == XGMAC_DRIVER_CONTEXT)
+		netif_device_detach(netdev);
+
+	netif_tx_stop_all_queues(netdev);
+	xgbe_napi_disable(pdata);
+
+	/* Powerdown Tx/Rx */
+	hw_if->powerdown_tx(pdata);
+	hw_if->powerdown_rx(pdata);
+
+	pdata->power_down = 1;
+
+	spin_unlock_irqrestore(&pdata->lock, flags);
+
+	DBGPR("<--xgbe_powerdown\n");
+
+	return 0;
+}
+
+int xgbe_powerup(struct net_device *netdev, unsigned int caller)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned long flags;
+
+	DBGPR("-->xgbe_powerup\n");
+
+	if (!netif_running(netdev) ||
+	    (caller == XGMAC_IOCTL_CONTEXT && !pdata->power_down)) {
+		netdev_alert(netdev, "Device is already powered up\n");
+		DBGPR("<--xgbe_powerup\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pdata->lock, flags);
+
+	pdata->power_down = 0;
+
+	phy_start(pdata->phydev);
+
+	/* Enable Tx/Rx */
+	hw_if->powerup_tx(pdata);
+	hw_if->powerup_rx(pdata);
+
+	if (caller == XGMAC_DRIVER_CONTEXT)
+		netif_device_attach(netdev);
+
+	xgbe_napi_enable(pdata, 0);
+	netif_tx_start_all_queues(netdev);
+
+	spin_unlock_irqrestore(&pdata->lock, flags);
+
+	DBGPR("<--xgbe_powerup\n");
+
+	return 0;
+}
+
+static int xgbe_start(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct net_device *netdev = pdata->netdev;
+
+	DBGPR("-->xgbe_start\n");
+
+	xgbe_set_rx_mode(netdev);
+
+	hw_if->init(pdata);
+
+	phy_start(pdata->phydev);
+
+	hw_if->enable_tx(pdata);
+	hw_if->enable_rx(pdata);
+
+	xgbe_init_tx_timers(pdata);
+
+	xgbe_napi_enable(pdata, 1);
+	netif_tx_start_all_queues(netdev);
+
+	DBGPR("<--xgbe_start\n");
+
+	return 0;
+}
+
+static void xgbe_stop(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct net_device *netdev = pdata->netdev;
+
+	DBGPR("-->xgbe_stop\n");
+
+	phy_stop(pdata->phydev);
+
+	netif_tx_stop_all_queues(netdev);
+	xgbe_napi_disable(pdata);
+
+	xgbe_stop_tx_timers(pdata);
+
+	hw_if->disable_tx(pdata);
+	hw_if->disable_rx(pdata);
+
+	DBGPR("<--xgbe_stop\n");
+}
+
+static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+
+	DBGPR("-->xgbe_restart_dev\n");
+
+	/* If not running, "restart" will happen on open */
+	if (!netif_running(pdata->netdev))
+		return;
+
+	xgbe_stop(pdata);
+	synchronize_irq(pdata->irq_number);
+
+	xgbe_free_tx_skbuff(pdata);
+	xgbe_free_rx_skbuff(pdata);
+
+	/* Issue software reset to device if requested */
+	if (reset)
+		hw_if->exit(pdata);
+
+	xgbe_start(pdata);
+
+	DBGPR("<--xgbe_restart_dev\n");
+}
+
+static void xgbe_restart(struct work_struct *work)
+{
+	struct xgbe_prv_data *pdata = container_of(work,
+						   struct xgbe_prv_data,
+						   restart_work);
+
+	rtnl_lock();
+
+	xgbe_restart_dev(pdata, 1);
+
+	rtnl_unlock();
+}
+
+static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data *packet)
+{
+	if (vlan_tx_tag_present(skb))
+		packet->vlan_ctag = vlan_tx_tag_get(skb);
+}
+
+static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
+{
+	int ret;
+
+	if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			    TSO_ENABLE))
+		return 0;
+
+	ret = skb_cow_head(skb, 0);
+	if (ret)
+		return ret;
+
+	packet->header_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	packet->tcp_header_len = tcp_hdrlen(skb);
+	packet->tcp_payload_len = skb->len - packet->header_len;
+	packet->mss = skb_shinfo(skb)->gso_size;
+	DBGPR("  packet->header_len=%u\n", packet->header_len);
+	DBGPR("  packet->tcp_header_len=%u, packet->tcp_payload_len=%u\n",
+	      packet->tcp_header_len, packet->tcp_payload_len);
+	DBGPR("  packet->mss=%u\n", packet->mss);
+
+	return 0;
+}
+
+static int xgbe_is_tso(struct sk_buff *skb)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	DBGPR("  TSO packet to be processed\n");
+
+	return 1;
+}
+
+static void xgbe_packet_info(struct xgbe_ring *ring, struct sk_buff *skb,
+			     struct xgbe_packet_data *packet)
+{
+	struct skb_frag_struct *frag;
+	unsigned int context_desc;
+	unsigned int len;
+	unsigned int i;
+
+	context_desc = 0;
+	packet->rdesc_count = 0;
+
+	if (xgbe_is_tso(skb)) {
+		/* TSO requires an extra desriptor if mss is different */
+		if (skb_shinfo(skb)->gso_size != ring->tx.cur_mss) {
+			context_desc = 1;
+			packet->rdesc_count++;
+		}
+
+		/* TSO requires an extra desriptor for TSO header */
+		packet->rdesc_count++;
+
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       TSO_ENABLE, 1);
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       CSUM_ENABLE, 1);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL)
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       CSUM_ENABLE, 1);
+
+	if (vlan_tx_tag_present(skb)) {
+		/* VLAN requires an extra descriptor if tag is different */
+		if (vlan_tx_tag_get(skb) != ring->tx.cur_vlan_ctag)
+			/* We can share with the TSO context descriptor */
+			if (!context_desc) {
+				context_desc = 1;
+				packet->rdesc_count++;
+			}
+
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       VLAN_CTAG, 1);
+	}
+
+	for (len = skb_headlen(skb); len;) {
+		packet->rdesc_count++;
+		len -= min_t(unsigned int, len, TX_MAX_BUF_SIZE);
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		for (len = skb_frag_size(frag); len; ) {
+			packet->rdesc_count++;
+			len -= min_t(unsigned int, len, TX_MAX_BUF_SIZE);
+		}
+	}
+}
+
+static int xgbe_open(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	int ret;
+
+	DBGPR("-->xgbe_open\n");
+
+	/* Enable the clock */
+	ret = clk_prepare_enable(pdata->sysclock);
+	if (ret) {
+		netdev_alert(netdev, "clk_prepare_enable failed\n");
+		return ret;
+	}
+
+	/* Calculate the Rx buffer size before allocating rings */
+	ret = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
+	if (ret < 0)
+		goto err_clk;
+	pdata->rx_buf_size = ret;
+
+	/* Allocate the ring descriptors and buffers */
+	ret = desc_if->alloc_ring_resources(pdata);
+	if (ret)
+		goto err_clk;
+
+	/* Initialize the device restart work struct */
+	INIT_WORK(&pdata->restart_work, xgbe_restart);
+
+	/* Request interrupts */
+	ret = devm_request_irq(pdata->dev, netdev->irq, xgbe_isr, 0,
+			       netdev->name, pdata);
+	if (ret) {
+		netdev_alert(netdev, "error requesting irq %d\n",
+			     pdata->irq_number);
+		goto err_irq;
+	}
+	pdata->irq_number = netdev->irq;
+
+	ret = xgbe_start(pdata);
+	if (ret)
+		goto err_start;
+
+	DBGPR("<--xgbe_open\n");
+
+	return 0;
+
+err_start:
+	hw_if->exit(pdata);
+
+	devm_free_irq(pdata->dev, pdata->irq_number, pdata);
+	pdata->irq_number = 0;
+
+err_irq:
+	desc_if->free_ring_resources(pdata);
+
+err_clk:
+	clk_disable_unprepare(pdata->sysclock);
+
+	return ret;
+}
+
+static int xgbe_close(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+
+	DBGPR("-->xgbe_close\n");
+
+	/* Stop the device */
+	xgbe_stop(pdata);
+
+	/* Issue software reset to device */
+	hw_if->exit(pdata);
+
+	/* Free all the ring data */
+	desc_if->free_ring_resources(pdata);
+
+	/* Release the interrupt */
+	if (pdata->irq_number != 0) {
+		devm_free_irq(pdata->dev, pdata->irq_number, pdata);
+		pdata->irq_number = 0;
+	}
+
+	/* Disable the clock */
+	clk_disable_unprepare(pdata->sysclock);
+
+	DBGPR("<--xgbe_close\n");
+
+	return 0;
+}
+
+static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
+	struct xgbe_packet_data *packet;
+	unsigned long flags;
+	int ret;
+
+	DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
+
+	channel = pdata->channel + skb->queue_mapping;
+	ring = channel->tx_ring;
+	packet = &ring->packet_data;
+
+	ret = NETDEV_TX_OK;
+
+	spin_lock_irqsave(&ring->lock, flags);
+
+	if (skb->len == 0) {
+		netdev_err(netdev, "empty skb received from stack\n");
+		dev_kfree_skb_any(skb);
+		goto tx_netdev_return;
+	}
+
+	/* Calculate preliminary packet info */
+	memset(packet, 0, sizeof(*packet));
+	xgbe_packet_info(ring, skb, packet);
+
+	/* Check that there are enough descriptors available */
+	if (packet->rdesc_count > xgbe_tx_avail_desc(ring)) {
+		DBGPR("  Tx queue stopped, not enough descriptors available\n");
+		netif_stop_subqueue(netdev, channel->queue_index);
+		ring->tx.queue_stopped = 1;
+		ret = NETDEV_TX_BUSY;
+		goto tx_netdev_return;
+	}
+
+	ret = xgbe_prep_tso(skb, packet);
+	if (ret) {
+		netdev_err(netdev, "error processing TSO packet\n");
+		dev_kfree_skb_any(skb);
+		goto tx_netdev_return;
+	}
+	xgbe_prep_vlan(skb, packet);
+
+	if (!desc_if->map_tx_skb(channel, skb)) {
+		dev_kfree_skb_any(skb);
+		goto tx_netdev_return;
+	}
+
+	/* Configure required descriptor fields for transmission */
+	hw_if->pre_xmit(channel);
+
+#ifdef XGMAC_ENABLE_TX_PKT_DUMP
+	xgbe_print_pkt(netdev, skb, true);
+#endif
+
+tx_netdev_return:
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	DBGPR("<--xgbe_xmit\n");
+
+	return ret;
+}
+
+static void xgbe_set_rx_mode(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int pr_mode, am_mode;
+
+	DBGPR("-->xgbe_set_rx_mode\n");
+
+	pr_mode = ((netdev->flags & IFF_PROMISC) != 0);
+	am_mode = ((netdev->flags & IFF_ALLMULTI) != 0);
+
+	if (netdev_uc_count(netdev) > pdata->hw_feat.addn_mac)
+		pr_mode = 1;
+	if (netdev_mc_count(netdev) > pdata->hw_feat.addn_mac)
+		am_mode = 1;
+	if ((netdev_uc_count(netdev) + netdev_mc_count(netdev)) >
+	     pdata->hw_feat.addn_mac)
+		pr_mode = 1;
+
+	hw_if->set_promiscuous_mode(pdata, pr_mode);
+	hw_if->set_all_multicast_mode(pdata, am_mode);
+	if (!pr_mode)
+		hw_if->set_addn_mac_addrs(pdata, am_mode);
+
+	DBGPR("<--xgbe_set_rx_mode\n");
+}
+
+static int xgbe_set_mac_address(struct net_device *netdev, void *addr)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct sockaddr *saddr = addr;
+
+	DBGPR("-->xgbe_set_mac_address\n");
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(netdev->dev_addr, saddr->sa_data, netdev->addr_len);
+
+	hw_if->set_mac_address(pdata, netdev->dev_addr);
+
+	DBGPR("<--xgbe_set_mac_address\n");
+
+	return 0;
+}
+
+static int xgbe_change_mtu(struct net_device *netdev, int mtu)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	int ret;
+
+	DBGPR("-->xgbe_change_mtu\n");
+
+	ret = xgbe_calc_rx_buf_size(netdev, mtu);
+	if (ret < 0)
+		return ret;
+
+	pdata->rx_buf_size = ret;
+	netdev->mtu = mtu;
+
+	xgbe_restart_dev(pdata, 0);
+
+	DBGPR("<--xgbe_change_mtu\n");
+
+	return 0;
+}
+
+static struct rtnl_link_stats64 *xgbe_get_stats64(struct net_device *netdev,
+						  struct rtnl_link_stats64 *s)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_mmc_stats *pstats = &pdata->mmc_stats;
+
+	DBGPR("-->%s\n", __func__);
+
+	pdata->hw_if.read_mmc_stats(pdata);
+
+	s->rx_packets = pstats->rxframecount_gb;
+	s->rx_bytes = pstats->rxoctetcount_gb;
+	s->rx_errors = pstats->rxframecount_gb -
+		       pstats->rxbroadcastframes_g -
+		       pstats->rxmulticastframes_g -
+		       pstats->rxunicastframes_g;
+	s->multicast = pstats->rxmulticastframes_g;
+	s->rx_length_errors = pstats->rxlengtherror;
+	s->rx_crc_errors = pstats->rxcrcerror;
+	s->rx_fifo_errors = pstats->rxfifooverflow;
+
+	s->tx_packets = pstats->txframecount_gb;
+	s->tx_bytes = pstats->txoctetcount_gb;
+	s->tx_errors = pstats->txframecount_gb - pstats->txframecount_g;
+	s->tx_dropped = netdev->stats.tx_dropped;
+
+	DBGPR("<--%s\n", __func__);
+
+	return s;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void xgbe_poll_controller(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	DBGPR("-->xgbe_poll_controller\n");
+
+	disable_irq(pdata->irq_number);
+
+	xgbe_isr(pdata->irq_number, pdata);
+
+	enable_irq(pdata->irq_number);
+
+	DBGPR("<--xgbe_poll_controller\n");
+}
+#endif /* End CONFIG_NET_POLL_CONTROLLER */
+
+static int xgbe_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int rxcsum_enabled, rxvlan_enabled;
+
+	rxcsum_enabled = !!(pdata->netdev_features & NETIF_F_RXCSUM);
+	rxvlan_enabled = !!(pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX);
+
+	if ((features & NETIF_F_RXCSUM) && !rxcsum_enabled) {
+		hw_if->enable_rx_csum(pdata);
+		netdev_alert(netdev, "state change - rxcsum enabled\n");
+	} else if (!(features & NETIF_F_RXCSUM) && rxcsum_enabled) {
+		hw_if->disable_rx_csum(pdata);
+		netdev_alert(netdev, "state change - rxcsum disabled\n");
+	}
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan_enabled) {
+		hw_if->enable_rx_vlan_stripping(pdata);
+		netdev_alert(netdev, "state change - rxvlan enabled\n");
+	} else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && rxvlan_enabled) {
+		hw_if->disable_rx_vlan_stripping(pdata);
+		netdev_alert(netdev, "state change - rxvlan disabled\n");
+	}
+
+	pdata->netdev_features = features;
+
+	DBGPR("<--xgbe_set_features\n");
+
+	return 0;
+}
+
+static const struct net_device_ops xgbe_netdev_ops = {
+	.ndo_open		= xgbe_open,
+	.ndo_stop		= xgbe_close,
+	.ndo_start_xmit		= xgbe_xmit,
+	.ndo_set_rx_mode	= xgbe_set_rx_mode,
+	.ndo_set_mac_address	= xgbe_set_mac_address,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= xgbe_change_mtu,
+	.ndo_get_stats64	= xgbe_get_stats64,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= xgbe_poll_controller,
+#endif
+	.ndo_set_features	= xgbe_set_features,
+};
+
+struct net_device_ops *xgbe_get_netdev_ops(void)
+{
+	return (struct net_device_ops *)&xgbe_netdev_ops;
+}
+
+static int xgbe_tx_poll(struct xgbe_channel *channel)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_ring *ring = channel->tx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+	struct net_device *netdev = pdata->netdev;
+	unsigned long flags;
+	int processed = 0;
+
+	DBGPR("-->xgbe_tx_poll\n");
+
+	/* Nothing to do if there isn't a Tx ring for this channel */
+	if (!ring)
+		return 0;
+
+	spin_lock_irqsave(&ring->lock, flags);
+
+	while ((processed < TX_DESC_MAX_PROC) && (ring->dirty < ring->cur)) {
+		rdata = GET_DESC_DATA(ring, ring->dirty);
+		rdesc = rdata->rdesc;
+
+		if (!hw_if->tx_complete(rdesc))
+			break;
+
+#ifdef XGMAC_ENABLE_TX_DESC_DUMP
+		xgbe_dump_tx_desc(ring, ring->dirty, 1, 0);
+#endif
+
+		/* Free the SKB and reset the descriptor for re-use */
+		desc_if->unmap_skb(pdata, rdata);
+		hw_if->tx_desc_reset(rdata);
+
+		processed++;
+		ring->dirty++;
+	}
+
+	if ((ring->tx.queue_stopped == 1) &&
+	    (xgbe_tx_avail_desc(ring) > TX_DESC_MIN_FREE)) {
+		ring->tx.queue_stopped = 0;
+		netif_wake_subqueue(netdev, channel->queue_index);
+	}
+
+	DBGPR("<--xgbe_tx_poll: processed=%d\n", processed);
+
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return processed;
+}
+
+static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_ring *ring = channel->rx_ring;
+	struct xgbe_ring_data *rdata;
+	struct xgbe_packet_data *packet;
+	struct net_device *netdev = pdata->netdev;
+	struct sk_buff *skb;
+	unsigned int incomplete, error;
+	unsigned int cur_len, put_len, max_len;
+	int received = 0;
+
+	DBGPR("-->xgbe_rx_poll: budget=%d\n", budget);
+
+	/* Nothing to do if there isn't a Rx ring for this channel */
+	if (!ring)
+		return 0;
+
+	packet = &ring->packet_data;
+	while (received < budget) {
+		DBGPR("  cur = %d\n", ring->cur);
+
+		/* Clear the packet data information */
+		memset(packet, 0, sizeof(*packet));
+		skb = NULL;
+		error = 0;
+		cur_len = 0;
+
+read_again:
+		rdata = GET_DESC_DATA(ring, ring->cur);
+
+		if (hw_if->dev_read(channel))
+			break;
+
+		received++;
+		ring->cur++;
+		ring->dirty++;
+
+		dma_unmap_single(pdata->dev, rdata->skb_dma,
+				 rdata->skb_dma_len, DMA_FROM_DEVICE);
+		rdata->skb_dma = 0;
+
+		incomplete = XGMAC_GET_BITS(packet->attributes,
+					    RX_PACKET_ATTRIBUTES,
+					    INCOMPLETE);
+
+		/* Earlier error, just drain the remaining data */
+		if (incomplete && error)
+			goto read_again;
+
+		if (error || packet->errors) {
+			if (packet->errors)
+				DBGPR("Error in received packet\n");
+			dev_kfree_skb(skb);
+			continue;
+		}
+
+		put_len = rdata->len - cur_len;
+		if (skb) {
+			if (pskb_expand_head(skb, 0, put_len, GFP_ATOMIC)) {
+				DBGPR("pskb_expand_head error\n");
+				if (incomplete) {
+					error = 1;
+					goto read_again;
+				}
+
+				dev_kfree_skb(skb);
+				continue;
+			}
+			memcpy(skb_tail_pointer(skb), rdata->skb->data,
+			       put_len);
+		} else {
+			skb = rdata->skb;
+			rdata->skb = NULL;
+		}
+		skb_put(skb, put_len);
+		cur_len += put_len;
+
+		if (incomplete)
+			goto read_again;
+
+		/* Be sure we don't exceed the configured MTU */
+		max_len = netdev->mtu + ETH_HLEN;
+		if (!(netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    (skb->protocol == htons(ETH_P_8021Q)))
+			max_len += VLAN_HLEN;
+
+		if (skb->len > max_len) {
+			DBGPR("packet length exceeds configured MTU\n");
+			dev_kfree_skb(skb);
+			continue;
+		}
+
+#ifdef XGMAC_ENABLE_RX_PKT_DUMP
+		xgbe_print_pkt(netdev, skb, false);
+#endif
+
+		skb_checksum_none_assert(skb);
+		if (XGMAC_GET_BITS(packet->attributes,
+				   RX_PACKET_ATTRIBUTES, CSUM_DONE))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		if (XGMAC_GET_BITS(packet->attributes,
+				   RX_PACKET_ATTRIBUTES, VLAN_CTAG))
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       packet->vlan_ctag);
+
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, netdev);
+		skb_record_rx_queue(skb, channel->queue_index);
+		skb_mark_napi_id(skb, &pdata->napi);
+
+		netdev->last_rx = jiffies;
+		napi_gro_receive(&pdata->napi, skb);
+	}
+
+	if (received) {
+		desc_if->realloc_skb(channel);
+
+		/* Update the Rx Tail Pointer Register with address of
+		 * the last cleaned entry */
+		rdata = GET_DESC_DATA(ring, ring->rx.realloc_index - 1);
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO,
+				  lower_32_bits(rdata->rdesc_dma));
+	}
+
+	DBGPR("<--xgbe_rx_poll: received = %d\n", received);
+
+	return received;
+}
+
+static int xgbe_poll(struct napi_struct *napi, int budget)
+{
+	struct xgbe_prv_data *pdata = container_of(napi, struct xgbe_prv_data,
+						   napi);
+	struct xgbe_channel *channel;
+	int processed;
+	unsigned int i;
+
+	DBGPR("-->xgbe_poll: budget=%d\n", budget);
+
+	/* Cleanup Tx ring first */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++)
+		xgbe_tx_poll(channel);
+
+	/* Process Rx ring next */
+	processed = 0;
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++)
+		processed += xgbe_rx_poll(channel, budget - processed);
+
+	/* If we processed everything, we are done */
+	if (processed < budget) {
+		/* Turn off polling */
+		napi_complete(napi);
+
+		/* Enable Tx and Rx interrupts */
+		xgbe_enable_rx_tx_ints(pdata);
+	}
+
+	DBGPR("<--xgbe_poll: received = %d\n", processed);
+
+	return processed;
+}
+
+void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx,
+		       unsigned int count, unsigned int flag)
+{
+	struct xgbe_ring_data *rdata;
+	struct xgbe_ring_desc *rdesc;
+
+	while (count--) {
+		rdata = GET_DESC_DATA(ring, idx);
+		rdesc = rdata->rdesc;
+		DBGPR("TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx,
+		      (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE",
+		      le32_to_cpu(rdesc->desc0), le32_to_cpu(rdesc->desc1),
+		      le32_to_cpu(rdesc->desc2), le32_to_cpu(rdesc->desc3));
+		idx++;
+	}
+}
+
+void xgbe_dump_rx_desc(struct xgbe_ring *ring, struct xgbe_ring_desc *desc,
+		       unsigned int idx)
+{
+	DBGPR("RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n", idx,
+	      le32_to_cpu(desc->desc0), le32_to_cpu(desc->desc1),
+	      le32_to_cpu(desc->desc2), le32_to_cpu(desc->desc3));
+}
+
+void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+	unsigned char *buf = skb->data;
+	unsigned char buffer[128];
+	unsigned int i, j;
+
+	netdev_alert(netdev, "\n************** SKB dump ****************\n");
+
+	netdev_alert(netdev, "%s packet of %d bytes\n",
+		     (tx_rx ? "TX" : "RX"), skb->len);
+
+	netdev_alert(netdev, "Dst MAC addr: %pM\n", eth->h_dest);
+	netdev_alert(netdev, "Src MAC addr: %pM\n", eth->h_source);
+	netdev_alert(netdev, "Protocol: 0x%04hx\n", ntohs(eth->h_proto));
+
+	for (i = 0, j = 0; i < skb->len;) {
+		j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx",
+			      buf[i++]);
+
+		if ((i % 32) == 0) {
+			netdev_alert(netdev, "  0x%04x: %s\n", i - 32, buffer);
+			j = 0;
+		} else if ((i % 16) == 0) {
+			buffer[j++] = ' ';
+			buffer[j++] = ' ';
+		} else if ((i % 4) == 0) {
+			buffer[j++] = ' ';
+		}
+	}
+	if (i % 32)
+		netdev_alert(netdev, "  0x%04x: %s\n", i - (i % 32), buffer);
+
+	netdev_alert(netdev, "\n************** SKB dump ****************\n");
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
new file mode 100644
index 0000000..8909f2b
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c

@@ -0,0 +1,510 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/phy.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+struct xgbe_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int stat_size;
+	int stat_offset;
+};
+
+#define XGMAC_MMC_STAT(_string, _var)				\
+	{ _string,						\
+	  FIELD_SIZEOF(struct xgbe_mmc_stats, _var),		\
+	  offsetof(struct xgbe_prv_data, mmc_stats._var),	\
+	}
+
+static const struct xgbe_stats xgbe_gstring_stats[] = {
+	XGMAC_MMC_STAT("tx_bytes", txoctetcount_gb),
+	XGMAC_MMC_STAT("tx_packets", txframecount_gb),
+	XGMAC_MMC_STAT("tx_unicast_packets", txunicastframes_gb),
+	XGMAC_MMC_STAT("tx_broadcast_packets", txbroadcastframes_gb),
+	XGMAC_MMC_STAT("tx_multicast_packets", txmulticastframes_gb),
+	XGMAC_MMC_STAT("tx_vlan_packets", txvlanframes_g),
+	XGMAC_MMC_STAT("tx_64_byte_packets", tx64octets_gb),
+	XGMAC_MMC_STAT("tx_65_to_127_byte_packets", tx65to127octets_gb),
+	XGMAC_MMC_STAT("tx_128_to_255_byte_packets", tx128to255octets_gb),
+	XGMAC_MMC_STAT("tx_256_to_511_byte_packets", tx256to511octets_gb),
+	XGMAC_MMC_STAT("tx_512_to_1023_byte_packets", tx512to1023octets_gb),
+	XGMAC_MMC_STAT("tx_1024_to_max_byte_packets", tx1024tomaxoctets_gb),
+	XGMAC_MMC_STAT("tx_underflow_errors", txunderflowerror),
+	XGMAC_MMC_STAT("tx_pause_frames", txpauseframes),
+
+	XGMAC_MMC_STAT("rx_bytes", rxoctetcount_gb),
+	XGMAC_MMC_STAT("rx_packets", rxframecount_gb),
+	XGMAC_MMC_STAT("rx_unicast_packets", rxunicastframes_g),
+	XGMAC_MMC_STAT("rx_broadcast_packets", rxbroadcastframes_g),
+	XGMAC_MMC_STAT("rx_multicast_packets", rxmulticastframes_g),
+	XGMAC_MMC_STAT("rx_vlan_packets", rxvlanframes_gb),
+	XGMAC_MMC_STAT("rx_64_byte_packets", rx64octets_gb),
+	XGMAC_MMC_STAT("rx_65_to_127_byte_packets", rx65to127octets_gb),
+	XGMAC_MMC_STAT("rx_128_to_255_byte_packets", rx128to255octets_gb),
+	XGMAC_MMC_STAT("rx_256_to_511_byte_packets", rx256to511octets_gb),
+	XGMAC_MMC_STAT("rx_512_to_1023_byte_packets", rx512to1023octets_gb),
+	XGMAC_MMC_STAT("rx_1024_to_max_byte_packets", rx1024tomaxoctets_gb),
+	XGMAC_MMC_STAT("rx_undersize_packets", rxundersize_g),
+	XGMAC_MMC_STAT("rx_oversize_packets", rxoversize_g),
+	XGMAC_MMC_STAT("rx_crc_errors", rxcrcerror),
+	XGMAC_MMC_STAT("rx_crc_errors_small_packets", rxrunterror),
+	XGMAC_MMC_STAT("rx_crc_errors_giant_packets", rxjabbererror),
+	XGMAC_MMC_STAT("rx_length_errors", rxlengtherror),
+	XGMAC_MMC_STAT("rx_out_of_range_errors", rxoutofrangetype),
+	XGMAC_MMC_STAT("rx_fifo_overflow_errors", rxfifooverflow),
+	XGMAC_MMC_STAT("rx_watchdog_errors", rxwatchdogerror),
+	XGMAC_MMC_STAT("rx_pause_frames", rxpauseframes),
+};
+#define XGBE_STATS_COUNT	ARRAY_SIZE(xgbe_gstring_stats)
+
+static void xgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	int i;
+
+	DBGPR("-->%s\n", __func__);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < XGBE_STATS_COUNT; i++) {
+			memcpy(data, xgbe_gstring_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			data += ETH_GSTRING_LEN;
+		}
+		break;
+	}
+
+	DBGPR("<--%s\n", __func__);
+}
+
+static void xgbe_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	u8 *stat;
+	int i;
+
+	DBGPR("-->%s\n", __func__);
+
+	pdata->hw_if.read_mmc_stats(pdata);
+	for (i = 0; i < XGBE_STATS_COUNT; i++) {
+		stat = (u8 *)pdata + xgbe_gstring_stats[i].stat_offset;
+		*data++ = *(u64 *)stat;
+	}
+
+	DBGPR("<--%s\n", __func__);
+}
+
+static int xgbe_get_sset_count(struct net_device *netdev, int stringset)
+{
+	int ret;
+
+	DBGPR("-->%s\n", __func__);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		ret = XGBE_STATS_COUNT;
+		break;
+
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	DBGPR("<--%s\n", __func__);
+
+	return ret;
+}
+
+static void xgbe_get_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	DBGPR("-->xgbe_get_pauseparam\n");
+
+	pause->autoneg = pdata->pause_autoneg;
+	pause->tx_pause = pdata->tx_pause;
+	pause->rx_pause = pdata->rx_pause;
+
+	DBGPR("<--xgbe_get_pauseparam\n");
+}
+
+static int xgbe_set_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct phy_device *phydev = pdata->phydev;
+	int ret = 0;
+
+	DBGPR("-->xgbe_set_pauseparam\n");
+
+	DBGPR("  autoneg = %d, tx_pause = %d, rx_pause = %d\n",
+	      pause->autoneg, pause->tx_pause, pause->rx_pause);
+
+	pdata->pause_autoneg = pause->autoneg;
+	if (pause->autoneg) {
+		phydev->advertising |= ADVERTISED_Pause;
+		phydev->advertising |= ADVERTISED_Asym_Pause;
+
+	} else {
+		phydev->advertising &= ~ADVERTISED_Pause;
+		phydev->advertising &= ~ADVERTISED_Asym_Pause;
+
+		pdata->tx_pause = pause->tx_pause;
+		pdata->rx_pause = pause->rx_pause;
+	}
+
+	if (netif_running(netdev))
+		ret = phy_start_aneg(phydev);
+
+	DBGPR("<--xgbe_set_pauseparam\n");
+
+	return ret;
+}
+
+static int xgbe_get_settings(struct net_device *netdev,
+			     struct ethtool_cmd *cmd)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	int ret;
+
+	DBGPR("-->xgbe_get_settings\n");
+
+	if (!pdata->phydev)
+		return -ENODEV;
+
+	spin_lock_irq(&pdata->lock);
+
+	ret = phy_ethtool_gset(pdata->phydev, cmd);
+	cmd->transceiver = XCVR_EXTERNAL;
+
+	spin_unlock_irq(&pdata->lock);
+
+	DBGPR("<--xgbe_get_settings\n");
+
+	return ret;
+}
+
+static int xgbe_set_settings(struct net_device *netdev,
+			     struct ethtool_cmd *cmd)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct phy_device *phydev = pdata->phydev;
+	u32 speed;
+	int ret;
+
+	DBGPR("-->xgbe_set_settings\n");
+
+	if (!pdata->phydev)
+		return -ENODEV;
+
+	spin_lock_irq(&pdata->lock);
+
+	speed = ethtool_cmd_speed(cmd);
+
+	ret = -EINVAL;
+	if (cmd->phy_address != phydev->addr)
+		goto unlock;
+
+	if ((cmd->autoneg != AUTONEG_ENABLE) &&
+	    (cmd->autoneg != AUTONEG_DISABLE))
+		goto unlock;
+
+	if ((cmd->autoneg == AUTONEG_DISABLE) &&
+	    (((speed != SPEED_10000) && (speed != SPEED_1000)) ||
+	     (cmd->duplex != DUPLEX_FULL)))
+		goto unlock;
+
+	if (cmd->autoneg == AUTONEG_ENABLE) {
+		/* Clear settings needed to force speeds */
+		phydev->supported &= ~SUPPORTED_1000baseT_Full;
+		phydev->supported &= ~SUPPORTED_10000baseT_Full;
+	} else {
+		/* Add settings needed to force speed */
+		phydev->supported |= SUPPORTED_1000baseT_Full;
+		phydev->supported |= SUPPORTED_10000baseT_Full;
+	}
+
+	cmd->advertising &= phydev->supported;
+	if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising)
+		goto unlock;
+
+	ret = 0;
+	phydev->autoneg = cmd->autoneg;
+	phydev->speed = speed;
+	phydev->duplex = cmd->duplex;
+	phydev->advertising = cmd->advertising;
+
+	if (cmd->autoneg == AUTONEG_ENABLE)
+		phydev->advertising |= ADVERTISED_Autoneg;
+	else
+		phydev->advertising &= ~ADVERTISED_Autoneg;
+
+	if (netif_running(netdev))
+		ret = phy_start_aneg(phydev);
+
+unlock:
+	spin_unlock_irq(&pdata->lock);
+
+	DBGPR("<--xgbe_set_settings\n");
+
+	return ret;
+}
+
+static void xgbe_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver, XGBE_DRV_NAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, XGBE_DRV_VERSION, sizeof(drvinfo->version));
+	strlcpy(drvinfo->bus_info, dev_name(pdata->dev),
+		sizeof(drvinfo->bus_info));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%d",
+		 XGMAC_IOREAD_BITS(pdata, MAC_VR, USERVER),
+		 XGMAC_IOREAD_BITS(pdata, MAC_VR, DEVID),
+		 XGMAC_IOREAD_BITS(pdata, MAC_VR, SNPSVER));
+	drvinfo->n_stats = XGBE_STATS_COUNT;
+}
+
+static int xgbe_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int riwt;
+
+	DBGPR("-->xgbe_get_coalesce\n");
+
+	memset(ec, 0, sizeof(struct ethtool_coalesce));
+
+	riwt = pdata->rx_riwt;
+	ec->rx_coalesce_usecs = hw_if->riwt_to_usec(pdata, riwt);
+	ec->rx_max_coalesced_frames = pdata->rx_frames;
+
+	ec->tx_coalesce_usecs = pdata->tx_usecs;
+	ec->tx_max_coalesced_frames = pdata->tx_frames;
+
+	DBGPR("<--xgbe_get_coalesce\n");
+
+	return 0;
+}
+
+static int xgbe_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int rx_frames, rx_riwt, rx_usecs;
+	unsigned int tx_frames, tx_usecs;
+
+	DBGPR("-->xgbe_set_coalesce\n");
+
+	/* Check for not supported parameters  */
+	if ((ec->rx_coalesce_usecs_irq) ||
+	    (ec->rx_max_coalesced_frames_irq) ||
+	    (ec->tx_coalesce_usecs_irq) ||
+	    (ec->tx_max_coalesced_frames_irq) ||
+	    (ec->stats_block_coalesce_usecs) ||
+	    (ec->use_adaptive_rx_coalesce) ||
+	    (ec->use_adaptive_tx_coalesce) ||
+	    (ec->pkt_rate_low) ||
+	    (ec->rx_coalesce_usecs_low) ||
+	    (ec->rx_max_coalesced_frames_low) ||
+	    (ec->tx_coalesce_usecs_low) ||
+	    (ec->tx_max_coalesced_frames_low) ||
+	    (ec->pkt_rate_high) ||
+	    (ec->rx_coalesce_usecs_high) ||
+	    (ec->rx_max_coalesced_frames_high) ||
+	    (ec->tx_coalesce_usecs_high) ||
+	    (ec->tx_max_coalesced_frames_high) ||
+	    (ec->rate_sample_interval))
+		return -EOPNOTSUPP;
+
+	/* Can only change rx-frames when interface is down (see
+	 * rx_descriptor_init in xgbe-dev.c)
+	 */
+	rx_frames = pdata->rx_frames;
+	if (rx_frames != ec->rx_max_coalesced_frames && netif_running(netdev)) {
+		netdev_alert(netdev,
+			     "interface must be down to change rx-frames\n");
+		return -EINVAL;
+	}
+
+	rx_riwt = hw_if->usec_to_riwt(pdata, ec->rx_coalesce_usecs);
+	rx_frames = ec->rx_max_coalesced_frames;
+
+	/* Use smallest possible value if conversion resulted in zero */
+	if (ec->rx_coalesce_usecs && !rx_riwt)
+		rx_riwt = 1;
+
+	/* Check the bounds of values for Rx */
+	if (rx_riwt > XGMAC_MAX_DMA_RIWT) {
+		rx_usecs = hw_if->riwt_to_usec(pdata, XGMAC_MAX_DMA_RIWT);
+		netdev_alert(netdev, "rx-usec is limited to %d usecs\n",
+			     rx_usecs);
+		return -EINVAL;
+	}
+	if (rx_frames > pdata->channel->rx_ring->rdesc_count) {
+		netdev_alert(netdev, "rx-frames is limited to %d frames\n",
+			     pdata->channel->rx_ring->rdesc_count);
+		return -EINVAL;
+	}
+
+	tx_usecs = ec->tx_coalesce_usecs;
+	tx_frames = ec->tx_max_coalesced_frames;
+
+	/* Check the bounds of values for Tx */
+	if (tx_frames > pdata->channel->tx_ring->rdesc_count) {
+		netdev_alert(netdev, "tx-frames is limited to %d frames\n",
+			     pdata->channel->tx_ring->rdesc_count);
+		return -EINVAL;
+	}
+
+	pdata->rx_riwt = rx_riwt;
+	pdata->rx_frames = rx_frames;
+	hw_if->config_rx_coalesce(pdata);
+
+	pdata->tx_usecs = tx_usecs;
+	pdata->tx_frames = tx_frames;
+	hw_if->config_tx_coalesce(pdata);
+
+	DBGPR("<--xgbe_set_coalesce\n");
+
+	return 0;
+}
+
+static const struct ethtool_ops xgbe_ethtool_ops = {
+	.get_settings = xgbe_get_settings,
+	.set_settings = xgbe_set_settings,
+	.get_drvinfo = xgbe_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_coalesce = xgbe_get_coalesce,
+	.set_coalesce = xgbe_set_coalesce,
+	.get_pauseparam = xgbe_get_pauseparam,
+	.set_pauseparam = xgbe_set_pauseparam,
+	.get_strings = xgbe_get_strings,
+	.get_ethtool_stats = xgbe_get_ethtool_stats,
+	.get_sset_count = xgbe_get_sset_count,
+};
+
+struct ethtool_ops *xgbe_get_ethtool_ops(void)
+{
+	return (struct ethtool_ops *)&xgbe_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
new file mode 100644
index 0000000..c83584a
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c

@@ -0,0 +1,512 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/clk.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(XGBE_DRV_VERSION);
+MODULE_DESCRIPTION(XGBE_DRV_DESC);
+
+static struct xgbe_channel *xgbe_alloc_rings(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel_mem, *channel;
+	struct xgbe_ring *tx_ring, *rx_ring;
+	unsigned int count, i;
+
+	DBGPR("-->xgbe_alloc_rings\n");
+
+	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+
+	channel_mem = devm_kcalloc(pdata->dev, count,
+				   sizeof(struct xgbe_channel), GFP_KERNEL);
+	if (!channel_mem)
+		return NULL;
+
+	tx_ring = devm_kcalloc(pdata->dev, pdata->tx_ring_count,
+			       sizeof(struct xgbe_ring), GFP_KERNEL);
+	if (!tx_ring)
+		return NULL;
+
+	rx_ring = devm_kcalloc(pdata->dev, pdata->rx_ring_count,
+			       sizeof(struct xgbe_ring), GFP_KERNEL);
+	if (!rx_ring)
+		return NULL;
+
+	for (i = 0, channel = channel_mem; i < count; i++, channel++) {
+		snprintf(channel->name, sizeof(channel->name), "channel-%d", i);
+		channel->pdata = pdata;
+		channel->queue_index = i;
+		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
+				    (DMA_CH_INC * i);
+
+		if (i < pdata->tx_ring_count) {
+			spin_lock_init(&tx_ring->lock);
+			channel->tx_ring = tx_ring++;
+		}
+
+		if (i < pdata->rx_ring_count) {
+			spin_lock_init(&tx_ring->lock);
+			channel->rx_ring = rx_ring++;
+		}
+
+		DBGPR("  %s - queue_index=%u, dma_regs=%p, tx=%p, rx=%p\n",
+		      channel->name, channel->queue_index, channel->dma_regs,
+		      channel->tx_ring, channel->rx_ring);
+	}
+
+	pdata->channel_count = count;
+
+	DBGPR("<--xgbe_alloc_rings\n");
+
+	return channel_mem;
+}
+
+static void xgbe_default_config(struct xgbe_prv_data *pdata)
+{
+	DBGPR("-->xgbe_default_config\n");
+
+	pdata->pblx8 = DMA_PBL_X8_ENABLE;
+	pdata->tx_sf_mode = MTL_TSF_ENABLE;
+	pdata->tx_threshold = MTL_TX_THRESHOLD_64;
+	pdata->tx_pbl = DMA_PBL_16;
+	pdata->tx_osp_mode = DMA_OSP_ENABLE;
+	pdata->rx_sf_mode = MTL_RSF_DISABLE;
+	pdata->rx_threshold = MTL_RX_THRESHOLD_64;
+	pdata->rx_pbl = DMA_PBL_16;
+	pdata->pause_autoneg = 1;
+	pdata->tx_pause = 1;
+	pdata->rx_pause = 1;
+	pdata->power_down = 0;
+	pdata->default_autoneg = AUTONEG_ENABLE;
+	pdata->default_speed = SPEED_10000;
+
+	DBGPR("<--xgbe_default_config\n");
+}
+
+static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata)
+{
+	xgbe_init_function_ptrs_dev(&pdata->hw_if);
+	xgbe_init_function_ptrs_desc(&pdata->desc_if);
+}
+
+static int xgbe_probe(struct platform_device *pdev)
+{
+	struct xgbe_prv_data *pdata;
+	struct xgbe_hw_if *hw_if;
+	struct xgbe_desc_if *desc_if;
+	struct net_device *netdev;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	const u8 *mac_addr;
+	int ret;
+
+	DBGPR("--> xgbe_probe\n");
+
+	netdev = alloc_etherdev_mq(sizeof(struct xgbe_prv_data),
+				   XGBE_MAX_DMA_CHANNELS);
+	if (!netdev) {
+		dev_err(dev, "alloc_etherdev failed\n");
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+	SET_NETDEV_DEV(netdev, dev);
+	pdata = netdev_priv(netdev);
+	pdata->netdev = netdev;
+	pdata->pdev = pdev;
+	pdata->dev = dev;
+	platform_set_drvdata(pdev, netdev);
+
+	spin_lock_init(&pdata->lock);
+	mutex_init(&pdata->xpcs_mutex);
+
+	/* Set and validate the number of descriptors for a ring */
+	BUILD_BUG_ON_NOT_POWER_OF_2(TX_DESC_CNT);
+	pdata->tx_desc_count = TX_DESC_CNT;
+	if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) {
+		dev_err(dev, "tx descriptor count (%d) is not valid\n",
+			pdata->tx_desc_count);
+		ret = -EINVAL;
+		goto err_io;
+	}
+	BUILD_BUG_ON_NOT_POWER_OF_2(RX_DESC_CNT);
+	pdata->rx_desc_count = RX_DESC_CNT;
+	if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) {
+		dev_err(dev, "rx descriptor count (%d) is not valid\n",
+			pdata->rx_desc_count);
+		ret = -EINVAL;
+		goto err_io;
+	}
+
+	/* Obtain the system clock setting */
+	pdata->sysclock = devm_clk_get(dev, NULL);
+	if (IS_ERR(pdata->sysclock)) {
+		dev_err(dev, "devm_clk_get failed\n");
+		ret = PTR_ERR(pdata->sysclock);
+		goto err_io;
+	}
+
+	/* Obtain the mmio areas for the device */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pdata->xgmac_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->xgmac_regs)) {
+		dev_err(dev, "xgmac ioremap failed\n");
+		ret = PTR_ERR(pdata->xgmac_regs);
+		goto err_io;
+	}
+	DBGPR("  xgmac_regs = %p\n", pdata->xgmac_regs);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pdata->xpcs_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->xpcs_regs)) {
+		dev_err(dev, "xpcs ioremap failed\n");
+		ret = PTR_ERR(pdata->xpcs_regs);
+		goto err_io;
+	}
+	DBGPR("  xpcs_regs  = %p\n", pdata->xpcs_regs);
+
+	/* Set the DMA mask */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+	*(dev->dma_mask) = DMA_BIT_MASK(40);
+	dev->coherent_dma_mask = DMA_BIT_MASK(40);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "platform_get_irq failed\n");
+		goto err_io;
+	}
+	netdev->irq = ret;
+	netdev->base_addr = (unsigned long)pdata->xgmac_regs;
+
+	/* Set all the function pointers */
+	xgbe_init_all_fptrs(pdata);
+	hw_if = &pdata->hw_if;
+	desc_if = &pdata->desc_if;
+
+	/* Issue software reset to device */
+	hw_if->exit(pdata);
+
+	/* Populate the hardware features */
+	xgbe_get_all_hw_features(pdata);
+
+	/* Retrieve the MAC address */
+	mac_addr = of_get_mac_address(dev->of_node);
+	if (!mac_addr) {
+		dev_err(dev, "invalid mac address for this device\n");
+		ret = -EINVAL;
+		goto err_io;
+	}
+	memcpy(netdev->dev_addr, mac_addr, netdev->addr_len);
+
+	/* Retrieve the PHY mode - it must be "xgmii" */
+	pdata->phy_mode = of_get_phy_mode(dev->of_node);
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) {
+		dev_err(dev, "invalid phy-mode specified for this device\n");
+		ret = -EINVAL;
+		goto err_io;
+	}
+
+	/* Set default configuration data */
+	xgbe_default_config(pdata);
+
+	/* Calculate the number of Tx and Rx rings to be created */
+	pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
+				     pdata->hw_feat.tx_ch_cnt);
+	if (netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count)) {
+		dev_err(dev, "error setting real tx queue count\n");
+		goto err_io;
+	}
+
+	pdata->rx_ring_count = min_t(unsigned int,
+				     netif_get_num_default_rss_queues(),
+				     pdata->hw_feat.rx_ch_cnt);
+	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
+	if (ret) {
+		dev_err(dev, "error setting real rx queue count\n");
+		goto err_io;
+	}
+
+	/* Allocate the rings for the DMA channels */
+	pdata->channel = xgbe_alloc_rings(pdata);
+	if (!pdata->channel) {
+		dev_err(dev, "ring allocation failed\n");
+		ret = -ENOMEM;
+		goto err_io;
+	}
+
+	/* Prepare to regsiter with MDIO */
+	pdata->mii_bus_id = kasprintf(GFP_KERNEL, "%s", pdev->name);
+	if (!pdata->mii_bus_id) {
+		dev_err(dev, "failed to allocate mii bus id\n");
+		ret = -ENOMEM;
+		goto err_io;
+	}
+	ret = xgbe_mdio_register(pdata);
+	if (ret)
+		goto err_bus_id;
+
+	/* Set network and ethtool operations */
+	netdev->netdev_ops = xgbe_get_netdev_ops();
+	netdev->ethtool_ops = xgbe_get_ethtool_ops();
+
+	/* Set device features */
+	netdev->hw_features = NETIF_F_SG |
+			      NETIF_F_IP_CSUM |
+			      NETIF_F_IPV6_CSUM |
+			      NETIF_F_RXCSUM |
+			      NETIF_F_TSO |
+			      NETIF_F_TSO6 |
+			      NETIF_F_GRO |
+			      NETIF_F_HW_VLAN_CTAG_RX |
+			      NETIF_F_HW_VLAN_CTAG_TX;
+
+	netdev->vlan_features |= NETIF_F_SG |
+				 NETIF_F_IP_CSUM |
+				 NETIF_F_IPV6_CSUM |
+				 NETIF_F_TSO |
+				 NETIF_F_TSO6;
+
+	netdev->features |= netdev->hw_features;
+	pdata->netdev_features = netdev->features;
+
+	xgbe_init_rx_coalesce(pdata);
+	xgbe_init_tx_coalesce(pdata);
+
+	netif_carrier_off(netdev);
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(dev, "net device registration failed\n");
+		goto err_reg_netdev;
+	}
+
+	xgbe_debugfs_init(pdata);
+
+	netdev_notice(netdev, "net device enabled\n");
+
+	DBGPR("<-- xgbe_probe\n");
+
+	return 0;
+
+err_reg_netdev:
+	xgbe_mdio_unregister(pdata);
+
+err_bus_id:
+	kfree(pdata->mii_bus_id);
+
+err_io:
+	free_netdev(netdev);
+
+err_alloc:
+	dev_notice(dev, "net device not enabled\n");
+
+	return ret;
+}
+
+static int xgbe_remove(struct platform_device *pdev)
+{
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	DBGPR("-->xgbe_remove\n");
+
+	xgbe_debugfs_exit(pdata);
+
+	unregister_netdev(netdev);
+
+	xgbe_mdio_unregister(pdata);
+
+	kfree(pdata->mii_bus_id);
+
+	free_netdev(netdev);
+
+	DBGPR("<--xgbe_remove\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int xgbe_suspend(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	int ret;
+
+	DBGPR("-->xgbe_suspend\n");
+
+	if (!netif_running(netdev)) {
+		DBGPR("<--xgbe_dev_suspend\n");
+		return -EINVAL;
+	}
+
+	ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+
+	DBGPR("<--xgbe_suspend\n");
+
+	return ret;
+}
+
+static int xgbe_resume(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	int ret;
+
+	DBGPR("-->xgbe_resume\n");
+
+	if (!netif_running(netdev)) {
+		DBGPR("<--xgbe_dev_resume\n");
+		return -EINVAL;
+	}
+
+	ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+
+	DBGPR("<--xgbe_resume\n");
+
+	return ret;
+}
+#endif /* CONFIG_PM */
+
+static const struct of_device_id xgbe_of_match[] = {
+	{ .compatible = "amd,xgbe-seattle-v1a", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, xgbe_of_match);
+static SIMPLE_DEV_PM_OPS(xgbe_pm_ops, xgbe_suspend, xgbe_resume);
+
+static struct platform_driver xgbe_driver = {
+	.driver = {
+		.name = "amd-xgbe",
+		.of_match_table = xgbe_of_match,
+		.pm = &xgbe_pm_ops,
+	},
+	.probe = xgbe_probe,
+	.remove = xgbe_remove,
+};
+
+module_platform_driver(xgbe_driver);

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
new file mode 100644
index 0000000..ea7a5d6
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c

@@ -0,0 +1,433 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/spinlock.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+
+static int xgbe_mdio_read(struct mii_bus *mii, int prtad, int mmd_reg)
+{
+	struct xgbe_prv_data *pdata = mii->priv;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	int mmd_data;
+
+	DBGPR_MDIO("-->xgbe_mdio_read: prtad=%#x mmd_reg=%#x\n",
+		   prtad, mmd_reg);
+
+	mmd_data = hw_if->read_mmd_regs(pdata, prtad, mmd_reg);
+
+	DBGPR_MDIO("<--xgbe_mdio_read: mmd_data=%#x\n", mmd_data);
+
+	return mmd_data;
+}
+
+static int xgbe_mdio_write(struct mii_bus *mii, int prtad, int mmd_reg,
+			   u16 mmd_val)
+{
+	struct xgbe_prv_data *pdata = mii->priv;
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	int mmd_data = mmd_val;
+
+	DBGPR_MDIO("-->xgbe_mdio_write: prtad=%#x mmd_reg=%#x mmd_data=%#x\n",
+		   prtad, mmd_reg, mmd_data);
+
+	hw_if->write_mmd_regs(pdata, prtad, mmd_reg, mmd_data);
+
+	DBGPR_MDIO("<--xgbe_mdio_write\n");
+
+	return 0;
+}
+
+static void xgbe_adjust_link(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct phy_device *phydev = pdata->phydev;
+	unsigned long flags;
+	int new_state = 0;
+
+	if (phydev == NULL)
+		return;
+
+	DBGPR_MDIO("-->xgbe_adjust_link: address=%d, newlink=%d, curlink=%d\n",
+		   phydev->addr, phydev->link, pdata->phy_link);
+
+	spin_lock_irqsave(&pdata->lock, flags);
+
+	if (phydev->link) {
+		/* Flow control support */
+		if (pdata->pause_autoneg) {
+			if (phydev->pause || phydev->asym_pause) {
+				pdata->tx_pause = 1;
+				pdata->rx_pause = 1;
+			} else {
+				pdata->tx_pause = 0;
+				pdata->rx_pause = 0;
+			}
+		}
+
+		if (pdata->tx_pause != pdata->phy_tx_pause) {
+			hw_if->config_tx_flow_control(pdata);
+			pdata->phy_tx_pause = pdata->tx_pause;
+		}
+
+		if (pdata->rx_pause != pdata->phy_rx_pause) {
+			hw_if->config_rx_flow_control(pdata);
+			pdata->phy_rx_pause = pdata->rx_pause;
+		}
+
+		/* Speed support */
+		if (phydev->speed != pdata->phy_speed) {
+			new_state = 1;
+
+			switch (phydev->speed) {
+			case SPEED_10000:
+				hw_if->set_xgmii_speed(pdata);
+				break;
+
+			case SPEED_2500:
+				hw_if->set_gmii_2500_speed(pdata);
+				break;
+
+			case SPEED_1000:
+				hw_if->set_gmii_speed(pdata);
+				break;
+			}
+			pdata->phy_speed = phydev->speed;
+		}
+
+		if (phydev->link != pdata->phy_link) {
+			new_state = 1;
+			pdata->phy_link = 1;
+		}
+	} else if (pdata->phy_link) {
+		new_state = 1;
+		pdata->phy_link = 0;
+		pdata->phy_speed = SPEED_UNKNOWN;
+	}
+
+	if (new_state)
+		phy_print_status(phydev);
+
+	spin_unlock_irqrestore(&pdata->lock, flags);
+
+	DBGPR_MDIO("<--xgbe_adjust_link\n");
+}
+
+void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata)
+{
+	struct device *dev = pdata->dev;
+	struct phy_device *phydev = pdata->mii->phy_map[XGBE_PRTAD];
+	int i;
+
+	dev_alert(dev, "\n************* PHY Reg dump **********************\n");
+
+	dev_alert(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1));
+	dev_alert(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1));
+	dev_alert(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1));
+	dev_alert(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2));
+	dev_alert(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1));
+	dev_alert(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2,
+		  XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2));
+
+	dev_alert(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1));
+	dev_alert(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1));
+	dev_alert(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n",
+		  MDIO_AN_ADVERTISE,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE));
+	dev_alert(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n",
+		  MDIO_AN_ADVERTISE + 1,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1));
+	dev_alert(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n",
+		  MDIO_AN_ADVERTISE + 2,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2));
+	dev_alert(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n",
+		  MDIO_AN_COMP_STAT,
+		  XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT));
+
+	dev_alert(dev, "MMD Device Mask = %#x\n",
+		  phydev->c45_ids.devices_in_package);
+	for (i = 0; i < ARRAY_SIZE(phydev->c45_ids.device_ids); i++)
+		dev_alert(dev, "  MMD %d: ID = %#08x\n", i,
+			  phydev->c45_ids.device_ids[i]);
+
+	dev_alert(dev, "\n*************************************************\n");
+}
+
+int xgbe_mdio_register(struct xgbe_prv_data *pdata)
+{
+	struct net_device *netdev = pdata->netdev;
+	struct device_node *phy_node;
+	struct mii_bus *mii;
+	struct phy_device *phydev;
+	int ret = 0;
+
+	DBGPR("-->xgbe_mdio_register\n");
+
+	/* Retrieve the phy-handle */
+	phy_node = of_parse_phandle(pdata->dev->of_node, "phy-handle", 0);
+	if (!phy_node) {
+		dev_err(pdata->dev, "unable to parse phy-handle\n");
+		return -EINVAL;
+	}
+
+	/* Register with the MDIO bus */
+	mii = mdiobus_alloc();
+	if (mii == NULL) {
+		dev_err(pdata->dev, "mdiobus_alloc failed\n");
+		ret = -ENOMEM;
+		goto err_node_get;
+	}
+
+	/* Register on the MDIO bus (don't probe any PHYs) */
+	mii->name = XGBE_PHY_NAME;
+	mii->read = xgbe_mdio_read;
+	mii->write = xgbe_mdio_write;
+	snprintf(mii->id, sizeof(mii->id), "%s", pdata->mii_bus_id);
+	mii->priv = pdata;
+	mii->phy_mask = ~0;
+	mii->parent = pdata->dev;
+	ret = mdiobus_register(mii);
+	if (ret) {
+		dev_err(pdata->dev, "mdiobus_register failed\n");
+		goto err_mdiobus_alloc;
+	}
+	DBGPR("  mdiobus_register succeeded for %s\n", pdata->mii_bus_id);
+
+	/* Probe the PCS using Clause 45 */
+	phydev = get_phy_device(mii, XGBE_PRTAD, true);
+	if (IS_ERR(phydev) || !phydev ||
+	    !phydev->c45_ids.device_ids[MDIO_MMD_PCS]) {
+		dev_err(pdata->dev, "get_phy_device failed\n");
+		ret = phydev ? PTR_ERR(phydev) : -ENOLINK;
+		goto err_mdiobus_register;
+	}
+	request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT,
+		       MDIO_ID_ARGS(phydev->c45_ids.device_ids[MDIO_MMD_PCS]));
+
+	of_node_get(phy_node);
+	phydev->dev.of_node = phy_node;
+	ret = phy_device_register(phydev);
+	if (ret) {
+		dev_err(pdata->dev, "phy_device_register failed\n");
+		of_node_put(phy_node);
+		goto err_phy_device;
+	}
+
+	/* Add a reference to the PHY driver so it can't be unloaded */
+	pdata->phy_module = phydev->dev.driver ?
+			    phydev->dev.driver->owner : NULL;
+	if (!try_module_get(pdata->phy_module)) {
+		dev_err(pdata->dev, "try_module_get failed\n");
+		ret = -EIO;
+		goto err_phy_device;
+	}
+
+	pdata->mii = mii;
+	pdata->mdio_mmd = MDIO_MMD_PCS;
+
+	pdata->phy_link = -1;
+	pdata->phy_speed = SPEED_UNKNOWN;
+	pdata->phy_tx_pause = pdata->tx_pause;
+	pdata->phy_rx_pause = pdata->rx_pause;
+
+	ret = phy_connect_direct(netdev, phydev, &xgbe_adjust_link,
+				 pdata->phy_mode);
+	if (ret) {
+		netdev_err(netdev, "phy_connect_direct failed\n");
+		goto err_phy_device;
+	}
+
+	if (!phydev->drv || (phydev->drv->phy_id == 0)) {
+		netdev_err(netdev, "phy_id not valid\n");
+		ret = -ENODEV;
+		goto err_phy_connect;
+	}
+	DBGPR("  phy_connect_direct succeeded for PHY %s, link=%d\n",
+	      dev_name(&phydev->dev), phydev->link);
+
+	phydev->autoneg = pdata->default_autoneg;
+	if (phydev->autoneg == AUTONEG_DISABLE) {
+		/* Add settings needed to force speed */
+		phydev->supported |= SUPPORTED_1000baseT_Full;
+		phydev->supported |= SUPPORTED_10000baseT_Full;
+
+		phydev->speed = pdata->default_speed;
+		phydev->duplex = DUPLEX_FULL;
+
+		phydev->advertising &= ~ADVERTISED_Autoneg;
+	}
+
+	pdata->phydev = phydev;
+
+	of_node_put(phy_node);
+
+	DBGPHY_REGS(pdata);
+
+	DBGPR("<--xgbe_mdio_register\n");
+
+	return 0;
+
+err_phy_connect:
+	phy_disconnect(phydev);
+
+err_phy_device:
+	phy_device_free(phydev);
+
+err_mdiobus_register:
+	mdiobus_unregister(mii);
+
+err_mdiobus_alloc:
+	mdiobus_free(mii);
+
+err_node_get:
+	of_node_put(phy_node);
+
+	return ret;
+}
+
+void xgbe_mdio_unregister(struct xgbe_prv_data *pdata)
+{
+	DBGPR("-->xgbe_mdio_unregister\n");
+
+	phy_disconnect(pdata->phydev);
+	pdata->phydev = NULL;
+
+	module_put(pdata->phy_module);
+	pdata->phy_module = NULL;
+
+	mdiobus_unregister(pdata->mii);
+	pdata->mii->priv = NULL;
+
+	mdiobus_free(pdata->mii);
+	pdata->mii = NULL;
+
+	DBGPR("<--xgbe_mdio_unregister\n");
+}

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
new file mode 100644
index 0000000..ab06271
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h

@@ -0,0 +1,676 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __XGBE_H__
+#define __XGBE_H__
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/phy.h>
+
+
+#define XGBE_DRV_NAME		"amd-xgbe"
+#define XGBE_DRV_VERSION	"1.0.0-a"
+#define XGBE_DRV_DESC		"AMD 10 Gigabit Ethernet Driver"
+
+/* Descriptor related defines */
+#define TX_DESC_CNT		512
+#define TX_DESC_MIN_FREE	(TX_DESC_CNT >> 3)
+#define TX_DESC_MAX_PROC	(TX_DESC_CNT >> 1)
+#define RX_DESC_CNT		512
+
+#define TX_MAX_BUF_SIZE		(0x3fff & ~(64 - 1))
+
+#define RX_MIN_BUF_SIZE		(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
+#define RX_BUF_ALIGN		64
+
+#define XGBE_MAX_DMA_CHANNELS	16
+#define DMA_ARDOMAIN_SETTING	0x2
+#define DMA_ARCACHE_SETTING	0xb
+#define DMA_AWDOMAIN_SETTING	0x2
+#define DMA_AWCACHE_SETTING	0x7
+#define DMA_INTERRUPT_MASK	0x31c7
+
+#define XGMAC_MIN_PACKET	60
+#define XGMAC_STD_PACKET_MTU	1500
+#define XGMAC_MAX_STD_PACKET	1518
+#define XGMAC_JUMBO_PACKET_MTU	9000
+#define XGMAC_MAX_JUMBO_PACKET	9018
+
+#define MAX_MULTICAST_LIST	14
+#define TX_FLAGS_IP_PKT		0x00000001
+#define TX_FLAGS_TCP_PKT	0x00000002
+
+/* MDIO bus phy name */
+#define XGBE_PHY_NAME		"amd_xgbe_phy"
+#define XGBE_PRTAD		0
+
+/* Driver PMT macros */
+#define XGMAC_DRIVER_CONTEXT	1
+#define XGMAC_IOCTL_CONTEXT	2
+
+#define FIFO_SIZE_B(x)		(x)
+#define FIFO_SIZE_KB(x)		(x * 1024)
+
+#define XGBE_TC_CNT		2
+
+/* Helper macro for descriptor handling
+ *  Always use GET_DESC_DATA to access the descriptor data
+ *  since the index is free-running and needs to be and-ed
+ *  with the descriptor count value of the ring to index to
+ *  the proper descriptor data.
+ */
+#define GET_DESC_DATA(_ring, _idx)				\
+	((_ring)->rdata +					\
+	 ((_idx) & ((_ring)->rdesc_count - 1)))
+
+
+/* Default coalescing parameters */
+#define XGMAC_INIT_DMA_TX_USECS		100
+#define XGMAC_INIT_DMA_TX_FRAMES	16
+
+#define XGMAC_MAX_DMA_RIWT		0xff
+#define XGMAC_INIT_DMA_RX_USECS		100
+#define XGMAC_INIT_DMA_RX_FRAMES	16
+
+/* Flow control queue count */
+#define XGMAC_MAX_FLOW_CONTROL_QUEUES	8
+
+
+struct xgbe_prv_data;
+
+struct xgbe_packet_data {
+	unsigned int attributes;
+
+	unsigned int errors;
+
+	unsigned int rdesc_count;
+	unsigned int length;
+
+	unsigned int header_len;
+	unsigned int tcp_header_len;
+	unsigned int tcp_payload_len;
+	unsigned short mss;
+
+	unsigned short vlan_ctag;
+};
+
+/* Common Rx and Tx descriptor mapping */
+struct xgbe_ring_desc {
+	unsigned int desc0;
+	unsigned int desc1;
+	unsigned int desc2;
+	unsigned int desc3;
+};
+
+/* Structure used to hold information related to the descriptor
+ * and the packet associated with the descriptor (always use
+ * use the GET_DESC_DATA macro to access this data from the ring)
+ */
+struct xgbe_ring_data {
+	struct xgbe_ring_desc *rdesc;	/* Virtual address of descriptor */
+	dma_addr_t rdesc_dma;		/* DMA address of descriptor */
+
+	struct sk_buff *skb;		/* Virtual address of SKB */
+	dma_addr_t skb_dma;		/* DMA address of SKB data */
+	unsigned int skb_dma_len;	/* Length of SKB DMA area */
+	unsigned int tso_header;        /* TSO header indicator */
+
+	unsigned short len;		/* Length of received Rx packet */
+
+	unsigned int interrupt;		/* Interrupt indicator */
+
+	unsigned int mapped_as_page;
+};
+
+struct xgbe_ring {
+	/* Ring lock - used just for TX rings at the moment */
+	spinlock_t lock;
+
+	/* Per packet related information */
+	struct xgbe_packet_data packet_data;
+
+	/* Virtual/DMA addresses and count of allocated descriptor memory */
+	struct xgbe_ring_desc *rdesc;
+	dma_addr_t rdesc_dma;
+	unsigned int rdesc_count;
+
+	/* Array of descriptor data corresponding the descriptor memory
+	 * (always use the GET_DESC_DATA macro to access this data)
+	 */
+	struct xgbe_ring_data *rdata;
+
+	/* Ring index values
+	 *  cur   - Tx: index of descriptor to be used for current transfer
+	 *          Rx: index of descriptor to check for packet availability
+	 *  dirty - Tx: index of descriptor to check for transfer complete
+	 *          Rx: count of descriptors in which a packet has been received
+	 *              (used with skb_realloc_index to refresh the ring)
+	 */
+	unsigned int cur;
+	unsigned int dirty;
+
+	/* Coalesce frame count used for interrupt bit setting */
+	unsigned int coalesce_count;
+
+	union {
+		struct {
+			unsigned int queue_stopped;
+			unsigned short cur_mss;
+			unsigned short cur_vlan_ctag;
+		} tx;
+
+		struct {
+			unsigned int realloc_index;
+			unsigned int realloc_threshold;
+		} rx;
+	};
+} ____cacheline_aligned;
+
+/* Structure used to describe the descriptor rings associated with
+ * a DMA channel.
+ */
+struct xgbe_channel {
+	char name[16];
+
+	/* Address of private data area for device */
+	struct xgbe_prv_data *pdata;
+
+	/* Queue index and base address of queue's DMA registers */
+	unsigned int queue_index;
+	void __iomem *dma_regs;
+
+	unsigned int saved_ier;
+
+	unsigned int tx_timer_active;
+	struct hrtimer tx_timer;
+
+	struct xgbe_ring *tx_ring;
+	struct xgbe_ring *rx_ring;
+} ____cacheline_aligned;
+
+enum xgbe_int {
+	XGMAC_INT_DMA_ISR_DC0IS,
+	XGMAC_INT_DMA_CH_SR_TI,
+	XGMAC_INT_DMA_CH_SR_TPS,
+	XGMAC_INT_DMA_CH_SR_TBU,
+	XGMAC_INT_DMA_CH_SR_RI,
+	XGMAC_INT_DMA_CH_SR_RBU,
+	XGMAC_INT_DMA_CH_SR_RPS,
+	XGMAC_INT_DMA_CH_SR_FBE,
+	XGMAC_INT_DMA_ALL,
+};
+
+enum xgbe_int_state {
+	XGMAC_INT_STATE_SAVE,
+	XGMAC_INT_STATE_RESTORE,
+};
+
+enum xgbe_mtl_fifo_size {
+	XGMAC_MTL_FIFO_SIZE_256  = 0x00,
+	XGMAC_MTL_FIFO_SIZE_512  = 0x01,
+	XGMAC_MTL_FIFO_SIZE_1K   = 0x03,
+	XGMAC_MTL_FIFO_SIZE_2K   = 0x07,
+	XGMAC_MTL_FIFO_SIZE_4K   = 0x0f,
+	XGMAC_MTL_FIFO_SIZE_8K   = 0x1f,
+	XGMAC_MTL_FIFO_SIZE_16K  = 0x3f,
+	XGMAC_MTL_FIFO_SIZE_32K  = 0x7f,
+	XGMAC_MTL_FIFO_SIZE_64K  = 0xff,
+	XGMAC_MTL_FIFO_SIZE_128K = 0x1ff,
+	XGMAC_MTL_FIFO_SIZE_256K = 0x3ff,
+};
+
+struct xgbe_mmc_stats {
+	/* Tx Stats */
+	u64 txoctetcount_gb;
+	u64 txframecount_gb;
+	u64 txbroadcastframes_g;
+	u64 txmulticastframes_g;
+	u64 tx64octets_gb;
+	u64 tx65to127octets_gb;
+	u64 tx128to255octets_gb;
+	u64 tx256to511octets_gb;
+	u64 tx512to1023octets_gb;
+	u64 tx1024tomaxoctets_gb;
+	u64 txunicastframes_gb;
+	u64 txmulticastframes_gb;
+	u64 txbroadcastframes_gb;
+	u64 txunderflowerror;
+	u64 txoctetcount_g;
+	u64 txframecount_g;
+	u64 txpauseframes;
+	u64 txvlanframes_g;
+
+	/* Rx Stats */
+	u64 rxframecount_gb;
+	u64 rxoctetcount_gb;
+	u64 rxoctetcount_g;
+	u64 rxbroadcastframes_g;
+	u64 rxmulticastframes_g;
+	u64 rxcrcerror;
+	u64 rxrunterror;
+	u64 rxjabbererror;
+	u64 rxundersize_g;
+	u64 rxoversize_g;
+	u64 rx64octets_gb;
+	u64 rx65to127octets_gb;
+	u64 rx128to255octets_gb;
+	u64 rx256to511octets_gb;
+	u64 rx512to1023octets_gb;
+	u64 rx1024tomaxoctets_gb;
+	u64 rxunicastframes_g;
+	u64 rxlengtherror;
+	u64 rxoutofrangetype;
+	u64 rxpauseframes;
+	u64 rxfifooverflow;
+	u64 rxvlanframes_gb;
+	u64 rxwatchdogerror;
+};
+
+struct xgbe_hw_if {
+	int (*tx_complete)(struct xgbe_ring_desc *);
+
+	int (*set_promiscuous_mode)(struct xgbe_prv_data *, unsigned int);
+	int (*set_all_multicast_mode)(struct xgbe_prv_data *, unsigned int);
+	int (*set_addn_mac_addrs)(struct xgbe_prv_data *, unsigned int);
+	int (*set_mac_address)(struct xgbe_prv_data *, u8 *addr);
+
+	int (*enable_rx_csum)(struct xgbe_prv_data *);
+	int (*disable_rx_csum)(struct xgbe_prv_data *);
+
+	int (*enable_rx_vlan_stripping)(struct xgbe_prv_data *);
+	int (*disable_rx_vlan_stripping)(struct xgbe_prv_data *);
+
+	int (*read_mmd_regs)(struct xgbe_prv_data *, int, int);
+	void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int);
+	int (*set_gmii_speed)(struct xgbe_prv_data *);
+	int (*set_gmii_2500_speed)(struct xgbe_prv_data *);
+	int (*set_xgmii_speed)(struct xgbe_prv_data *);
+
+	void (*enable_tx)(struct xgbe_prv_data *);
+	void (*disable_tx)(struct xgbe_prv_data *);
+	void (*enable_rx)(struct xgbe_prv_data *);
+	void (*disable_rx)(struct xgbe_prv_data *);
+
+	void (*powerup_tx)(struct xgbe_prv_data *);
+	void (*powerdown_tx)(struct xgbe_prv_data *);
+	void (*powerup_rx)(struct xgbe_prv_data *);
+	void (*powerdown_rx)(struct xgbe_prv_data *);
+
+	int (*init)(struct xgbe_prv_data *);
+	int (*exit)(struct xgbe_prv_data *);
+
+	int (*enable_int)(struct xgbe_channel *, enum xgbe_int);
+	int (*disable_int)(struct xgbe_channel *, enum xgbe_int);
+	void (*pre_xmit)(struct xgbe_channel *);
+	int (*dev_read)(struct xgbe_channel *);
+	void (*tx_desc_init)(struct xgbe_channel *);
+	void (*rx_desc_init)(struct xgbe_channel *);
+	void (*rx_desc_reset)(struct xgbe_ring_data *);
+	void (*tx_desc_reset)(struct xgbe_ring_data *);
+	int (*is_last_desc)(struct xgbe_ring_desc *);
+	int (*is_context_desc)(struct xgbe_ring_desc *);
+
+	/* For FLOW ctrl */
+	int (*config_tx_flow_control)(struct xgbe_prv_data *);
+	int (*config_rx_flow_control)(struct xgbe_prv_data *);
+
+	/* For RX coalescing */
+	int (*config_rx_coalesce)(struct xgbe_prv_data *);
+	int (*config_tx_coalesce)(struct xgbe_prv_data *);
+	unsigned int (*usec_to_riwt)(struct xgbe_prv_data *, unsigned int);
+	unsigned int (*riwt_to_usec)(struct xgbe_prv_data *, unsigned int);
+
+	/* For RX and TX threshold config */
+	int (*config_rx_threshold)(struct xgbe_prv_data *, unsigned int);
+	int (*config_tx_threshold)(struct xgbe_prv_data *, unsigned int);
+
+	/* For RX and TX Store and Forward Mode config */
+	int (*config_rsf_mode)(struct xgbe_prv_data *, unsigned int);
+	int (*config_tsf_mode)(struct xgbe_prv_data *, unsigned int);
+
+	/* For TX DMA Operate on Second Frame config */
+	int (*config_osp_mode)(struct xgbe_prv_data *);
+
+	/* For RX and TX PBL config */
+	int (*config_rx_pbl_val)(struct xgbe_prv_data *);
+	int (*get_rx_pbl_val)(struct xgbe_prv_data *);
+	int (*config_tx_pbl_val)(struct xgbe_prv_data *);
+	int (*get_tx_pbl_val)(struct xgbe_prv_data *);
+	int (*config_pblx8)(struct xgbe_prv_data *);
+
+	/* For MMC statistics */
+	void (*rx_mmc_int)(struct xgbe_prv_data *);
+	void (*tx_mmc_int)(struct xgbe_prv_data *);
+	void (*read_mmc_stats)(struct xgbe_prv_data *);
+};
+
+struct xgbe_desc_if {
+	int (*alloc_ring_resources)(struct xgbe_prv_data *);
+	void (*free_ring_resources)(struct xgbe_prv_data *);
+	int (*map_tx_skb)(struct xgbe_channel *, struct sk_buff *);
+	void (*realloc_skb)(struct xgbe_channel *);
+	void (*unmap_skb)(struct xgbe_prv_data *, struct xgbe_ring_data *);
+	void (*wrapper_tx_desc_init)(struct xgbe_prv_data *);
+	void (*wrapper_rx_desc_init)(struct xgbe_prv_data *);
+};
+
+/* This structure contains flags that indicate what hardware features
+ * or configurations are present in the device.
+ */
+struct xgbe_hw_features {
+	/* HW Feature Register0 */
+	unsigned int gmii;		/* 1000 Mbps support */
+	unsigned int vlhash;		/* VLAN Hash Filter */
+	unsigned int sma;		/* SMA(MDIO) Interface */
+	unsigned int rwk;		/* PMT remote wake-up packet */
+	unsigned int mgk;		/* PMT magic packet */
+	unsigned int mmc;		/* RMON module */
+	unsigned int aoe;		/* ARP Offload */
+	unsigned int ts;		/* IEEE 1588-2008 Adavanced Timestamp */
+	unsigned int eee;		/* Energy Efficient Ethernet */
+	unsigned int tx_coe;		/* Tx Checksum Offload */
+	unsigned int rx_coe;		/* Rx Checksum Offload */
+	unsigned int addn_mac;		/* Additional MAC Addresses */
+	unsigned int ts_src;		/* Timestamp Source */
+	unsigned int sa_vlan_ins;	/* Source Address or VLAN Insertion */
+
+	/* HW Feature Register1 */
+	unsigned int rx_fifo_size;	/* MTL Receive FIFO Size */
+	unsigned int tx_fifo_size;	/* MTL Transmit FIFO Size */
+	unsigned int adv_ts_hi;		/* Advance Timestamping High Word */
+	unsigned int dcb;		/* DCB Feature */
+	unsigned int sph;		/* Split Header Feature */
+	unsigned int tso;		/* TCP Segmentation Offload */
+	unsigned int dma_debug;		/* DMA Debug Registers */
+	unsigned int rss;		/* Receive Side Scaling */
+	unsigned int hash_table_size;	/* Hash Table Size */
+	unsigned int l3l4_filter_num;	/* Number of L3-L4 Filters */
+
+	/* HW Feature Register2 */
+	unsigned int rx_q_cnt;		/* Number of MTL Receive Queues */
+	unsigned int tx_q_cnt;		/* Number of MTL Transmit Queues */
+	unsigned int rx_ch_cnt;		/* Number of DMA Receive Channels */
+	unsigned int tx_ch_cnt;		/* Number of DMA Transmit Channels */
+	unsigned int pps_out_num;	/* Number of PPS outputs */
+	unsigned int aux_snap_num;	/* Number of Aux snapshot inputs */
+};
+
+struct xgbe_prv_data {
+	struct net_device *netdev;
+	struct platform_device *pdev;
+	struct device *dev;
+
+	/* XGMAC/XPCS related mmio registers */
+	void __iomem *xgmac_regs;	/* XGMAC CSRs */
+	void __iomem *xpcs_regs;	/* XPCS MMD registers */
+
+	/* Overall device lock */
+	spinlock_t lock;
+
+	/* XPCS indirect addressing mutex */
+	struct mutex xpcs_mutex;
+
+	int irq_number;
+
+	struct xgbe_hw_if hw_if;
+	struct xgbe_desc_if desc_if;
+
+	/* Rings for Tx/Rx on a DMA channel */
+	struct xgbe_channel *channel;
+	unsigned int channel_count;
+	unsigned int tx_ring_count;
+	unsigned int tx_desc_count;
+	unsigned int rx_ring_count;
+	unsigned int rx_desc_count;
+
+	/* Tx/Rx common settings */
+	unsigned int pblx8;
+
+	/* Tx settings */
+	unsigned int tx_sf_mode;
+	unsigned int tx_threshold;
+	unsigned int tx_pbl;
+	unsigned int tx_osp_mode;
+
+	/* Rx settings */
+	unsigned int rx_sf_mode;
+	unsigned int rx_threshold;
+	unsigned int rx_pbl;
+
+	/* Tx coalescing settings */
+	unsigned int tx_usecs;
+	unsigned int tx_frames;
+
+	/* Rx coalescing settings */
+	unsigned int rx_riwt;
+	unsigned int rx_frames;
+
+	/* Current MTU */
+	unsigned int rx_buf_size;
+
+	/* Flow control settings */
+	unsigned int pause_autoneg;
+	unsigned int tx_pause;
+	unsigned int rx_pause;
+
+	/* MDIO settings */
+	struct module *phy_module;
+	char *mii_bus_id;
+	struct mii_bus *mii;
+	int mdio_mmd;
+	struct phy_device *phydev;
+	int default_autoneg;
+	int default_speed;
+
+	/* Current PHY settings */
+	phy_interface_t phy_mode;
+	int phy_link;
+	int phy_speed;
+	unsigned int phy_tx_pause;
+	unsigned int phy_rx_pause;
+
+	/* Netdev related settings */
+	netdev_features_t netdev_features;
+	struct napi_struct napi;
+	struct xgbe_mmc_stats mmc_stats;
+
+	/* System clock value used for Rx watchdog */
+	struct clk *sysclock;
+
+	/* Hardware features of the device */
+	struct xgbe_hw_features hw_feat;
+
+	/* Device restart work structure */
+	struct work_struct restart_work;
+
+	/* Keeps track of power mode */
+	unsigned int power_down;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *xgbe_debugfs;
+
+	unsigned int debugfs_xgmac_reg;
+
+	unsigned int debugfs_xpcs_mmd;
+	unsigned int debugfs_xpcs_reg;
+#endif
+};
+
+/* Function prototypes*/
+
+void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *);
+void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *);
+struct net_device_ops *xgbe_get_netdev_ops(void);
+struct ethtool_ops *xgbe_get_ethtool_ops(void);
+
+int xgbe_mdio_register(struct xgbe_prv_data *);
+void xgbe_mdio_unregister(struct xgbe_prv_data *);
+void xgbe_dump_phy_registers(struct xgbe_prv_data *);
+void xgbe_dump_tx_desc(struct xgbe_ring *, unsigned int, unsigned int,
+		       unsigned int);
+void xgbe_dump_rx_desc(struct xgbe_ring *, struct xgbe_ring_desc *,
+		       unsigned int);
+void xgbe_print_pkt(struct net_device *, struct sk_buff *, bool);
+void xgbe_get_all_hw_features(struct xgbe_prv_data *);
+int xgbe_powerup(struct net_device *, unsigned int);
+int xgbe_powerdown(struct net_device *, unsigned int);
+void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
+void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
+
+#ifdef CONFIG_DEBUG_FS
+void xgbe_debugfs_init(struct xgbe_prv_data *);
+void xgbe_debugfs_exit(struct xgbe_prv_data *);
+#else
+static inline void xgbe_debugfs_init(struct xgbe_prv_data *pdata) {}
+static inline void xgbe_debugfs_exit(struct xgbe_prv_data *pdata) {}
+#endif /* CONFIG_DEBUG_FS */
+
+/* NOTE: Uncomment for TX and RX DESCRIPTOR DUMP in KERNEL LOG */
+#if 0
+#define XGMAC_ENABLE_TX_DESC_DUMP
+#define XGMAC_ENABLE_RX_DESC_DUMP
+#endif
+
+/* NOTE: Uncomment for TX and RX PACKET DUMP in KERNEL LOG */
+#if 0
+#define XGMAC_ENABLE_TX_PKT_DUMP
+#define XGMAC_ENABLE_RX_PKT_DUMP
+#endif
+
+/* NOTE: Uncomment for function trace log messages in KERNEL LOG */
+#if 0
+#define YDEBUG
+#define YDEBUG_MDIO
+#endif
+
+/* For debug prints */
+#ifdef YDEBUG
+#define DBGPR(x...) pr_alert(x)
+#define DBGPHY_REGS(x...) xgbe_dump_phy_registers(x)
+#else
+#define DBGPR(x...) do { } while (0)
+#define DBGPHY_REGS(x...) do { } while (0)
+#endif
+
+#ifdef YDEBUG_MDIO
+#define DBGPR_MDIO(x...) pr_alert(x)
+#else
+#define DBGPR_MDIO(x...) do { } while (0)
+#endif
+
+#endif

diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index d647a7d..18e2fac 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c

@@ -13,6 +13,7 @@
  *		Vineet Gupta
  */
 
+#include <linux/crc32.h>
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -362,6 +363,15 @@
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void arc_emac_poll_controller(struct net_device *dev)
+{
+	disable_irq(dev->irq);
+	arc_emac_intr(dev->irq, dev);
+	enable_irq(dev->irq);
+}
+#endif
+
 /**
  * arc_emac_open - Open the network device.
  * @ndev:	Pointer to the network device.
@@ -451,6 +461,41 @@
 }
 
 /**
+ * arc_emac_set_rx_mode - Change the receive filtering mode.
+ * @ndev:	Pointer to the network device.
+ *
+ * This function enables/disables promiscuous or all-multicast mode
+ * and updates the multicast filtering list of the network device.
+ */
+static void arc_emac_set_rx_mode(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+
+	if (ndev->flags & IFF_PROMISC) {
+		arc_reg_or(priv, R_CTRL, PROM_MASK);
+	} else {
+		arc_reg_clr(priv, R_CTRL, PROM_MASK);
+
+		if (ndev->flags & IFF_ALLMULTI) {
+			arc_reg_set(priv, R_LAFL, ~0);
+			arc_reg_set(priv, R_LAFH, ~0);
+		} else {
+			struct netdev_hw_addr *ha;
+			unsigned int filter[2] = { 0, 0 };
+			int bit;
+
+			netdev_for_each_mc_addr(ha, ndev) {
+				bit = ether_crc_le(ETH_ALEN, ha->addr) >> 26;
+				filter[bit >> 5] |= 1 << (bit & 31);
+			}
+
+			arc_reg_set(priv, R_LAFL, filter[0]);
+			arc_reg_set(priv, R_LAFH, filter[1]);
+		}
+	}
+}
+
+/**
  * arc_emac_stop - Close the network device.
  * @ndev:	Pointer to the network device.
  *
@@ -620,6 +665,10 @@
 	.ndo_start_xmit		= arc_emac_tx,
 	.ndo_set_mac_address	= arc_emac_set_address,
 	.ndo_get_stats		= arc_emac_stats,
+	.ndo_set_rx_mode	= arc_emac_set_rx_mode,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= arc_emac_poll_controller,
+#endif
 };
 
 static int arc_emac_probe(struct platform_device *pdev)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 17bb9ce..49faa97 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c

@@ -1302,7 +1302,7 @@
 	}
 
 	netdev->netdev_ops = &alx_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &alx_ethtool_ops);
+	netdev->ethtool_ops = &alx_ethtool_ops;
 	netdev->irq = pdev->irq;
 	netdev->watchdog_timeo = ALX_WATCHDOG_TIME;
 

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
index 859ea84..48694c2 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c

@@ -56,8 +56,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_ENABLE;
@@ -305,5 +305,5 @@
 
 void atl1c_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1c_ethtool_ops);
+	netdev->ethtool_ops = &atl1c_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
index 82b2386..1be072f 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c

@@ -57,8 +57,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_ENABLE;
@@ -388,5 +388,5 @@
 
 void atl1e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1e_ethtool_ops);
+	netdev->ethtool_ops = &atl1e_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index dfd0e91..b460db7 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c

@@ -3258,8 +3258,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 	if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR ||
 	    hw->media_type == MEDIA_TYPE_1000M_FULL)

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 78befb5..6746bd7 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c

@@ -1396,7 +1396,7 @@
 	atl2_setup_pcicmd(pdev);
 
 	netdev->netdev_ops = &atl2_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &atl2_ethtool_ops);
+	netdev->ethtool_ops = &atl2_ethtool_ops;
 	netdev->watchdog_timeo = 5 * HZ;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
@@ -1769,8 +1769,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_ENABLE;

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 85dbddd..3e48809 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig

@@ -150,4 +150,15 @@
 	  In case of using this driver on BCM4706 it's also requires to enable
 	  BCMA_DRIVER_GMAC_CMN to make it work.
 
+config SYSTEMPORT
+	tristate "Broadcom SYSTEMPORT internal MAC support"
+	depends on OF
+	select MII
+	select PHYLIB
+	select FIXED_PHY if SYSTEMPORT=y
+	help
+	  This driver supports the built-in Ethernet MACs found in the
+	  Broadcom BCM7xxx Set Top Box family chipset using an internal
+	  Ethernet switch.
+
 endif # NET_VENDOR_BROADCOM

diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index fd639a0..e2a958a 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile

@@ -11,3 +11,4 @@
 obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
 obj-$(CONFIG_TIGON3) += tg3.o
 obj-$(CONFIG_BGMAC) += bgmac.o
+obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 05ba625..ca5a20a 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c

@@ -2380,7 +2380,7 @@
 	netif_napi_add(dev, &bp->napi, b44_poll, 64);
 	dev->watchdog_timeo = B44_TX_TIMEOUT;
 	dev->irq = sdev->irq;
-	SET_ETHTOOL_OPS(dev, &b44_ethtool_ops);
+	dev->ethtool_ops = &b44_ethtool_ops;
 
 	err = ssb_bus_powerup(sdev->bus, 0);
 	if (err) {

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index a7d11f5..3e8d1a8 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c

@@ -1315,8 +1315,7 @@
 
 };
 
-#define BCM_ENET_STATS_LEN	\
-	(sizeof(bcm_enet_gstrings_stats) / sizeof(struct bcm_enet_stats))
+#define BCM_ENET_STATS_LEN	ARRAY_SIZE(bcm_enet_gstrings_stats)
 
 static const u32 unused_mib_regs[] = {
 	ETH_MIB_TX_ALL_OCTETS,
@@ -1898,7 +1897,7 @@
 	dev->netdev_ops = &bcm_enet_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
 
-	SET_ETHTOOL_OPS(dev, &bcm_enet_ethtool_ops);
+	dev->ethtool_ops = &bcm_enet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ret = register_netdev(dev);
@@ -2784,7 +2783,7 @@
 	/* register netdevice */
 	dev->netdev_ops = &bcm_enetsw_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
-	SET_ETHTOOL_OPS(dev, &bcm_enetsw_ethtool_ops);
+	dev->ethtool_ops = &bcm_enetsw_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	spin_lock_init(&priv->enetsw_mdio_lock);

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
new file mode 100644
index 0000000..141160e
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c

@@ -0,0 +1,1654 @@
+/*
+ * Broadcom BCM7xxx System Port Ethernet MAC driver
+ *
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+#include "bcmsysport.h"
+
+/* I/O accessors register helpers */
+#define BCM_SYSPORT_IO_MACRO(name, offset) \
+static inline u32 name##_readl(struct bcm_sysport_priv *priv, u32 off)	\
+{									\
+	u32 reg = __raw_readl(priv->base + offset + off);		\
+	return reg;							\
+}									\
+static inline void name##_writel(struct bcm_sysport_priv *priv,		\
+				  u32 val, u32 off)			\
+{									\
+	__raw_writel(val, priv->base + offset + off);			\
+}									\
+
+BCM_SYSPORT_IO_MACRO(intrl2_0, SYS_PORT_INTRL2_0_OFFSET);
+BCM_SYSPORT_IO_MACRO(intrl2_1, SYS_PORT_INTRL2_1_OFFSET);
+BCM_SYSPORT_IO_MACRO(umac, SYS_PORT_UMAC_OFFSET);
+BCM_SYSPORT_IO_MACRO(tdma, SYS_PORT_TDMA_OFFSET);
+BCM_SYSPORT_IO_MACRO(rdma, SYS_PORT_RDMA_OFFSET);
+BCM_SYSPORT_IO_MACRO(rxchk, SYS_PORT_RXCHK_OFFSET);
+BCM_SYSPORT_IO_MACRO(txchk, SYS_PORT_TXCHK_OFFSET);
+BCM_SYSPORT_IO_MACRO(rbuf, SYS_PORT_RBUF_OFFSET);
+BCM_SYSPORT_IO_MACRO(tbuf, SYS_PORT_TBUF_OFFSET);
+BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET);
+
+/* L2-interrupt masking/unmasking helpers, does automatic saving of the applied
+ * mask in a software copy to avoid CPU_MASK_STATUS reads in hot-paths.
+  */
+#define BCM_SYSPORT_INTR_L2(which)	\
+static inline void intrl2_##which##_mask_clear(struct bcm_sysport_priv *priv, \
+						u32 mask)		\
+{									\
+	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
+	priv->irq##which##_mask &= ~(mask);				\
+}									\
+static inline void intrl2_##which##_mask_set(struct bcm_sysport_priv *priv, \
+						u32 mask)		\
+{									\
+	intrl2_## which##_writel(priv, mask, INTRL2_CPU_MASK_SET);	\
+	priv->irq##which##_mask |= (mask);				\
+}									\
+
+BCM_SYSPORT_INTR_L2(0)
+BCM_SYSPORT_INTR_L2(1)
+
+/* Register accesses to GISB/RBUS registers are expensive (few hundred
+ * nanoseconds), so keep the check for 64-bits explicit here to save
+ * one register write per-packet on 32-bits platforms.
+ */
+static inline void dma_desc_set_addr(struct bcm_sysport_priv *priv,
+				     void __iomem *d,
+				     dma_addr_t addr)
+{
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	__raw_writel(upper_32_bits(addr) & DESC_ADDR_HI_MASK,
+			d + DESC_ADDR_HI_STATUS_LEN);
+#endif
+	__raw_writel(lower_32_bits(addr), d + DESC_ADDR_LO);
+}
+
+static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
+						struct dma_desc *desc,
+						unsigned int port)
+{
+	/* Ports are latched, so write upper address first */
+	tdma_writel(priv, desc->addr_status_len, TDMA_WRITE_PORT_HI(port));
+	tdma_writel(priv, desc->addr_lo, TDMA_WRITE_PORT_LO(port));
+}
+
+/* Ethtool operations */
+static int bcm_sysport_set_settings(struct net_device *dev,
+				    struct ethtool_cmd *cmd)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int bcm_sysport_get_settings(struct net_device *dev,
+					struct ethtool_cmd *cmd)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int bcm_sysport_set_rx_csum(struct net_device *dev,
+					netdev_features_t wanted)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	u32 reg;
+
+	priv->rx_csum_en = !!(wanted & NETIF_F_RXCSUM);
+	reg = rxchk_readl(priv, RXCHK_CONTROL);
+	if (priv->rx_csum_en)
+		reg |= RXCHK_EN;
+	else
+		reg &= ~RXCHK_EN;
+
+	/* If UniMAC forwards CRC, we need to skip over it to get
+	 * a valid CHK bit to be set in the per-packet status word
+	 */
+	if (priv->rx_csum_en && priv->crc_fwd)
+		reg |= RXCHK_SKIP_FCS;
+	else
+		reg &= ~RXCHK_SKIP_FCS;
+
+	rxchk_writel(priv, reg, RXCHK_CONTROL);
+
+	return 0;
+}
+
+static int bcm_sysport_set_tx_csum(struct net_device *dev,
+					netdev_features_t wanted)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	u32 reg;
+
+	/* Hardware transmit checksum requires us to enable the Transmit status
+	 * block prepended to the packet contents
+	 */
+	priv->tsb_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
+	reg = tdma_readl(priv, TDMA_CONTROL);
+	if (priv->tsb_en)
+		reg |= TSB_EN;
+	else
+		reg &= ~TSB_EN;
+	tdma_writel(priv, reg, TDMA_CONTROL);
+
+	return 0;
+}
+
+static int bcm_sysport_set_features(struct net_device *dev,
+					netdev_features_t features)
+{
+	netdev_features_t changed = features ^ dev->features;
+	netdev_features_t wanted = dev->wanted_features;
+	int ret = 0;
+
+	if (changed & NETIF_F_RXCSUM)
+		ret = bcm_sysport_set_rx_csum(dev, wanted);
+	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
+		ret = bcm_sysport_set_tx_csum(dev, wanted);
+
+	return ret;
+}
+
+/* Hardware counters must be kept in sync because the order/offset
+ * is important here (order in structure declaration = order in hardware)
+ */
+static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = {
+	/* general stats */
+	STAT_NETDEV(rx_packets),
+	STAT_NETDEV(tx_packets),
+	STAT_NETDEV(rx_bytes),
+	STAT_NETDEV(tx_bytes),
+	STAT_NETDEV(rx_errors),
+	STAT_NETDEV(tx_errors),
+	STAT_NETDEV(rx_dropped),
+	STAT_NETDEV(tx_dropped),
+	STAT_NETDEV(multicast),
+	/* UniMAC RSV counters */
+	STAT_MIB_RX("rx_64_octets", mib.rx.pkt_cnt.cnt_64),
+	STAT_MIB_RX("rx_65_127_oct", mib.rx.pkt_cnt.cnt_127),
+	STAT_MIB_RX("rx_128_255_oct", mib.rx.pkt_cnt.cnt_255),
+	STAT_MIB_RX("rx_256_511_oct", mib.rx.pkt_cnt.cnt_511),
+	STAT_MIB_RX("rx_512_1023_oct", mib.rx.pkt_cnt.cnt_1023),
+	STAT_MIB_RX("rx_1024_1518_oct", mib.rx.pkt_cnt.cnt_1518),
+	STAT_MIB_RX("rx_vlan_1519_1522_oct", mib.rx.pkt_cnt.cnt_mgv),
+	STAT_MIB_RX("rx_1522_2047_oct", mib.rx.pkt_cnt.cnt_2047),
+	STAT_MIB_RX("rx_2048_4095_oct", mib.rx.pkt_cnt.cnt_4095),
+	STAT_MIB_RX("rx_4096_9216_oct", mib.rx.pkt_cnt.cnt_9216),
+	STAT_MIB_RX("rx_pkts", mib.rx.pkt),
+	STAT_MIB_RX("rx_bytes", mib.rx.bytes),
+	STAT_MIB_RX("rx_multicast", mib.rx.mca),
+	STAT_MIB_RX("rx_broadcast", mib.rx.bca),
+	STAT_MIB_RX("rx_fcs", mib.rx.fcs),
+	STAT_MIB_RX("rx_control", mib.rx.cf),
+	STAT_MIB_RX("rx_pause", mib.rx.pf),
+	STAT_MIB_RX("rx_unknown", mib.rx.uo),
+	STAT_MIB_RX("rx_align", mib.rx.aln),
+	STAT_MIB_RX("rx_outrange", mib.rx.flr),
+	STAT_MIB_RX("rx_code", mib.rx.cde),
+	STAT_MIB_RX("rx_carrier", mib.rx.fcr),
+	STAT_MIB_RX("rx_oversize", mib.rx.ovr),
+	STAT_MIB_RX("rx_jabber", mib.rx.jbr),
+	STAT_MIB_RX("rx_mtu_err", mib.rx.mtue),
+	STAT_MIB_RX("rx_good_pkts", mib.rx.pok),
+	STAT_MIB_RX("rx_unicast", mib.rx.uc),
+	STAT_MIB_RX("rx_ppp", mib.rx.ppp),
+	STAT_MIB_RX("rx_crc", mib.rx.rcrc),
+	/* UniMAC TSV counters */
+	STAT_MIB_TX("tx_64_octets", mib.tx.pkt_cnt.cnt_64),
+	STAT_MIB_TX("tx_65_127_oct", mib.tx.pkt_cnt.cnt_127),
+	STAT_MIB_TX("tx_128_255_oct", mib.tx.pkt_cnt.cnt_255),
+	STAT_MIB_TX("tx_256_511_oct", mib.tx.pkt_cnt.cnt_511),
+	STAT_MIB_TX("tx_512_1023_oct", mib.tx.pkt_cnt.cnt_1023),
+	STAT_MIB_TX("tx_1024_1518_oct", mib.tx.pkt_cnt.cnt_1518),
+	STAT_MIB_TX("tx_vlan_1519_1522_oct", mib.tx.pkt_cnt.cnt_mgv),
+	STAT_MIB_TX("tx_1522_2047_oct", mib.tx.pkt_cnt.cnt_2047),
+	STAT_MIB_TX("tx_2048_4095_oct", mib.tx.pkt_cnt.cnt_4095),
+	STAT_MIB_TX("tx_4096_9216_oct", mib.tx.pkt_cnt.cnt_9216),
+	STAT_MIB_TX("tx_pkts", mib.tx.pkts),
+	STAT_MIB_TX("tx_multicast", mib.tx.mca),
+	STAT_MIB_TX("tx_broadcast", mib.tx.bca),
+	STAT_MIB_TX("tx_pause", mib.tx.pf),
+	STAT_MIB_TX("tx_control", mib.tx.cf),
+	STAT_MIB_TX("tx_fcs_err", mib.tx.fcs),
+	STAT_MIB_TX("tx_oversize", mib.tx.ovr),
+	STAT_MIB_TX("tx_defer", mib.tx.drf),
+	STAT_MIB_TX("tx_excess_defer", mib.tx.edf),
+	STAT_MIB_TX("tx_single_col", mib.tx.scl),
+	STAT_MIB_TX("tx_multi_col", mib.tx.mcl),
+	STAT_MIB_TX("tx_late_col", mib.tx.lcl),
+	STAT_MIB_TX("tx_excess_col", mib.tx.ecl),
+	STAT_MIB_TX("tx_frags", mib.tx.frg),
+	STAT_MIB_TX("tx_total_col", mib.tx.ncl),
+	STAT_MIB_TX("tx_jabber", mib.tx.jbr),
+	STAT_MIB_TX("tx_bytes", mib.tx.bytes),
+	STAT_MIB_TX("tx_good_pkts", mib.tx.pok),
+	STAT_MIB_TX("tx_unicast", mib.tx.uc),
+	/* UniMAC RUNT counters */
+	STAT_RUNT("rx_runt_pkts", mib.rx_runt_cnt),
+	STAT_RUNT("rx_runt_valid_fcs", mib.rx_runt_fcs),
+	STAT_RUNT("rx_runt_inval_fcs_align", mib.rx_runt_fcs_align),
+	STAT_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
+	/* RXCHK misc statistics */
+	STAT_RXCHK("rxchk_bad_csum", mib.rxchk_bad_csum, RXCHK_BAD_CSUM_CNTR),
+	STAT_RXCHK("rxchk_other_pkt_disc", mib.rxchk_other_pkt_disc,
+			RXCHK_OTHER_DISC_CNTR),
+	/* RBUF misc statistics */
+	STAT_RBUF("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, RBUF_OVFL_DISC_CNTR),
+	STAT_RBUF("rbuf_err_cnt", mib.rbuf_err_cnt, RBUF_ERR_PKT_CNTR),
+};
+
+#define BCM_SYSPORT_STATS_LEN	ARRAY_SIZE(bcm_sysport_gstrings_stats)
+
+static void bcm_sysport_get_drvinfo(struct net_device *dev,
+					struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->version, "0.1", sizeof(info->version));
+	strlcpy(info->bus_info, "platform", sizeof(info->bus_info));
+	info->n_stats = BCM_SYSPORT_STATS_LEN;
+}
+
+static u32 bcm_sysport_get_msglvl(struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+
+	return priv->msg_enable;
+}
+
+static void bcm_sysport_set_msglvl(struct net_device *dev, u32 enable)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+
+	priv->msg_enable = enable;
+}
+
+static int bcm_sysport_get_sset_count(struct net_device *dev, int string_set)
+{
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return BCM_SYSPORT_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void bcm_sysport_get_strings(struct net_device *dev,
+					u32 stringset, u8 *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < BCM_SYSPORT_STATS_LEN; i++) {
+			memcpy(data + i * ETH_GSTRING_LEN,
+				bcm_sysport_gstrings_stats[i].stat_string,
+				ETH_GSTRING_LEN);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
+{
+	int i, j = 0;
+
+	for (i = 0; i < BCM_SYSPORT_STATS_LEN; i++) {
+		const struct bcm_sysport_stats *s;
+		u8 offset = 0;
+		u32 val = 0;
+		char *p;
+
+		s = &bcm_sysport_gstrings_stats[i];
+		switch (s->type) {
+		case BCM_SYSPORT_STAT_NETDEV:
+			continue;
+		case BCM_SYSPORT_STAT_MIB_RX:
+		case BCM_SYSPORT_STAT_MIB_TX:
+		case BCM_SYSPORT_STAT_RUNT:
+			if (s->type != BCM_SYSPORT_STAT_MIB_RX)
+				offset = UMAC_MIB_STAT_OFFSET;
+			val = umac_readl(priv, UMAC_MIB_START + j + offset);
+			break;
+		case BCM_SYSPORT_STAT_RXCHK:
+			val = rxchk_readl(priv, s->reg_offset);
+			if (val == ~0)
+				rxchk_writel(priv, 0, s->reg_offset);
+			break;
+		case BCM_SYSPORT_STAT_RBUF:
+			val = rbuf_readl(priv, s->reg_offset);
+			if (val == ~0)
+				rbuf_writel(priv, 0, s->reg_offset);
+			break;
+		}
+
+		j += s->stat_sizeof;
+		p = (char *)priv + s->stat_offset;
+		*(u32 *)p = val;
+	}
+
+	netif_dbg(priv, hw, priv->netdev, "updated MIB counters\n");
+}
+
+static void bcm_sysport_get_stats(struct net_device *dev,
+					struct ethtool_stats *stats, u64 *data)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (netif_running(dev))
+		bcm_sysport_update_mib_counters(priv);
+
+	for (i =  0; i < BCM_SYSPORT_STATS_LEN; i++) {
+		const struct bcm_sysport_stats *s;
+		char *p;
+
+		s = &bcm_sysport_gstrings_stats[i];
+		if (s->type == BCM_SYSPORT_STAT_NETDEV)
+			p = (char *)&dev->stats;
+		else
+			p = (char *)priv;
+		p += s->stat_offset;
+		data[i] = *(u32 *)p;
+	}
+}
+
+static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb)
+{
+	dev_kfree_skb_any(cb->skb);
+	cb->skb = NULL;
+	dma_unmap_addr_set(cb, dma_addr, 0);
+}
+
+static int bcm_sysport_rx_refill(struct bcm_sysport_priv *priv,
+				 struct bcm_sysport_cb *cb)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct net_device *ndev = priv->netdev;
+	dma_addr_t mapping;
+	int ret;
+
+	cb->skb = netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH);
+	if (!cb->skb) {
+		netif_err(priv, rx_err, ndev, "SKB alloc failed\n");
+		return -ENOMEM;
+	}
+
+	mapping = dma_map_single(kdev, cb->skb->data,
+				RX_BUF_LENGTH, DMA_FROM_DEVICE);
+	ret = dma_mapping_error(kdev, mapping);
+	if (ret) {
+		bcm_sysport_free_cb(cb);
+		netif_err(priv, rx_err, ndev, "DMA mapping failure\n");
+		return ret;
+	}
+
+	dma_unmap_addr_set(cb, dma_addr, mapping);
+	dma_desc_set_addr(priv, priv->rx_bd_assign_ptr, mapping);
+
+	priv->rx_bd_assign_index++;
+	priv->rx_bd_assign_index &= (priv->num_rx_bds - 1);
+	priv->rx_bd_assign_ptr = priv->rx_bds +
+		(priv->rx_bd_assign_index * DESC_SIZE);
+
+	netif_dbg(priv, rx_status, ndev, "RX refill\n");
+
+	return 0;
+}
+
+static int bcm_sysport_alloc_rx_bufs(struct bcm_sysport_priv *priv)
+{
+	struct bcm_sysport_cb *cb;
+	int ret = 0;
+	unsigned int i;
+
+	for (i = 0; i < priv->num_rx_bds; i++) {
+		cb = &priv->rx_cbs[priv->rx_bd_assign_index];
+		if (cb->skb)
+			continue;
+
+		ret = bcm_sysport_rx_refill(priv, cb);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* Poll the hardware for up to budget packets to process */
+static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
+					unsigned int budget)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct net_device *ndev = priv->netdev;
+	unsigned int processed = 0, to_process;
+	struct bcm_sysport_cb *cb;
+	struct sk_buff *skb;
+	unsigned int p_index;
+	u16 len, status;
+	struct bcm_rsb *rsb;
+
+	/* Determine how much we should process since last call */
+	p_index = rdma_readl(priv, RDMA_PROD_INDEX);
+	p_index &= RDMA_PROD_INDEX_MASK;
+
+	if (p_index < priv->rx_c_index)
+		to_process = (RDMA_CONS_INDEX_MASK + 1) -
+			priv->rx_c_index + p_index;
+	else
+		to_process = p_index - priv->rx_c_index;
+
+	netif_dbg(priv, rx_status, ndev,
+			"p_index=%d rx_c_index=%d to_process=%d\n",
+			p_index, priv->rx_c_index, to_process);
+
+	while ((processed < to_process) &&
+		(processed < budget)) {
+
+		cb = &priv->rx_cbs[priv->rx_read_ptr];
+		skb = cb->skb;
+		dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
+				RX_BUF_LENGTH, DMA_FROM_DEVICE);
+
+		/* Extract the Receive Status Block prepended */
+		rsb = (struct bcm_rsb *)skb->data;
+		len = (rsb->rx_status_len >> DESC_LEN_SHIFT) & DESC_LEN_MASK;
+		status = (rsb->rx_status_len >> DESC_STATUS_SHIFT) &
+			DESC_STATUS_MASK;
+
+		processed++;
+		priv->rx_read_ptr++;
+		if (priv->rx_read_ptr == priv->num_rx_bds)
+			priv->rx_read_ptr = 0;
+
+		netif_dbg(priv, rx_status, ndev,
+				"p=%d, c=%d, rd_ptr=%d, len=%d, flag=0x%04x\n",
+				p_index, priv->rx_c_index, priv->rx_read_ptr,
+				len, status);
+
+		if (unlikely(!skb)) {
+			netif_err(priv, rx_err, ndev, "out of memory!\n");
+			ndev->stats.rx_dropped++;
+			ndev->stats.rx_errors++;
+			goto refill;
+		}
+
+		if (unlikely(!(status & DESC_EOP) || !(status & DESC_SOP))) {
+			netif_err(priv, rx_status, ndev, "fragmented packet!\n");
+			ndev->stats.rx_dropped++;
+			ndev->stats.rx_errors++;
+			bcm_sysport_free_cb(cb);
+			goto refill;
+		}
+
+		if (unlikely(status & (RX_STATUS_ERR | RX_STATUS_OVFLOW))) {
+			netif_err(priv, rx_err, ndev, "error packet\n");
+			if (status & RX_STATUS_OVFLOW)
+				ndev->stats.rx_over_errors++;
+			ndev->stats.rx_dropped++;
+			ndev->stats.rx_errors++;
+			bcm_sysport_free_cb(cb);
+			goto refill;
+		}
+
+		skb_put(skb, len);
+
+		/* Hardware validated our checksum */
+		if (likely(status & DESC_L4_CSUM))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		/* Hardware pre-pends packets with 2bytes before Ethernet
+		 * header plus we have the Receive Status Block, strip off all
+		 * of this from the SKB.
+		 */
+		skb_pull(skb, sizeof(*rsb) + 2);
+		len -= (sizeof(*rsb) + 2);
+
+		/* UniMAC may forward CRC */
+		if (priv->crc_fwd) {
+			skb_trim(skb, len - ETH_FCS_LEN);
+			len -= ETH_FCS_LEN;
+		}
+
+		skb->protocol = eth_type_trans(skb, ndev);
+		ndev->stats.rx_packets++;
+		ndev->stats.rx_bytes += len;
+
+		napi_gro_receive(&priv->napi, skb);
+refill:
+		bcm_sysport_rx_refill(priv, cb);
+	}
+
+	return processed;
+}
+
+static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_priv *priv,
+					struct bcm_sysport_cb *cb,
+					unsigned int *bytes_compl,
+					unsigned int *pkts_compl)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct net_device *ndev = priv->netdev;
+
+	if (cb->skb) {
+		ndev->stats.tx_bytes += cb->skb->len;
+		*bytes_compl += cb->skb->len;
+		dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
+				dma_unmap_len(cb, dma_len),
+				DMA_TO_DEVICE);
+		ndev->stats.tx_packets++;
+		(*pkts_compl)++;
+		bcm_sysport_free_cb(cb);
+	/* SKB fragment */
+	} else if (dma_unmap_addr(cb, dma_addr)) {
+		ndev->stats.tx_bytes += dma_unmap_len(cb, dma_len);
+		dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr),
+				dma_unmap_len(cb, dma_len), DMA_TO_DEVICE);
+		dma_unmap_addr_set(cb, dma_addr, 0);
+	}
+}
+
+/* Reclaim queued SKBs for transmission completion, lockless version */
+static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
+					     struct bcm_sysport_tx_ring *ring)
+{
+	struct net_device *ndev = priv->netdev;
+	unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct bcm_sysport_cb *cb;
+	struct netdev_queue *txq;
+	u32 hw_ind;
+
+	txq = netdev_get_tx_queue(ndev, ring->index);
+
+	/* Compute how many descriptors have been processed since last call */
+	hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index));
+	c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK;
+	ring->p_index = (hw_ind & RING_PROD_INDEX_MASK);
+
+	last_c_index = ring->c_index;
+	num_tx_cbs = ring->size;
+
+	c_index &= (num_tx_cbs - 1);
+
+	if (c_index >= last_c_index)
+		last_tx_cn = c_index - last_c_index;
+	else
+		last_tx_cn = num_tx_cbs - last_c_index + c_index;
+
+	netif_dbg(priv, tx_done, ndev,
+			"ring=%d c_index=%d last_tx_cn=%d last_c_index=%d\n",
+			ring->index, c_index, last_tx_cn, last_c_index);
+
+	while (last_tx_cn-- > 0) {
+		cb = ring->cbs + last_c_index;
+		bcm_sysport_tx_reclaim_one(priv, cb, &bytes_compl, &pkts_compl);
+
+		ring->desc_count++;
+		last_c_index++;
+		last_c_index &= (num_tx_cbs - 1);
+	}
+
+	ring->c_index = c_index;
+
+	if (netif_tx_queue_stopped(txq) && pkts_compl)
+		netif_tx_wake_queue(txq);
+
+	netif_dbg(priv, tx_done, ndev,
+			"ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n",
+			ring->index, ring->c_index, pkts_compl, bytes_compl);
+
+	return pkts_compl;
+}
+
+/* Locked version of the per-ring TX reclaim routine */
+static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
+					   struct bcm_sysport_tx_ring *ring)
+{
+	unsigned int released;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	released = __bcm_sysport_tx_reclaim(priv, ring);
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return released;
+}
+
+static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
+{
+	struct bcm_sysport_tx_ring *ring =
+		container_of(napi, struct bcm_sysport_tx_ring, napi);
+	unsigned int work_done = 0;
+
+	work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		/* re-enable TX interrupt */
+		intrl2_1_mask_clear(ring->priv, BIT(ring->index));
+	}
+
+	return work_done;
+}
+
+static void bcm_sysport_tx_reclaim_all(struct bcm_sysport_priv *priv)
+{
+	unsigned int q;
+
+	for (q = 0; q < priv->netdev->num_tx_queues; q++)
+		bcm_sysport_tx_reclaim(priv, &priv->tx_rings[q]);
+}
+
+static int bcm_sysport_poll(struct napi_struct *napi, int budget)
+{
+	struct bcm_sysport_priv *priv =
+		container_of(napi, struct bcm_sysport_priv, napi);
+	unsigned int work_done = 0;
+
+	work_done = bcm_sysport_desc_rx(priv, budget);
+
+	priv->rx_c_index += work_done;
+	priv->rx_c_index &= RDMA_CONS_INDEX_MASK;
+	rdma_writel(priv, priv->rx_c_index, RDMA_CONS_INDEX);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		/* re-enable RX interrupts */
+		intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
+	}
+
+	return work_done;
+}
+
+
+/* RX and misc interrupt routine */
+static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+
+	priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) &
+			  ~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
+	intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+
+	if (unlikely(priv->irq0_stat == 0)) {
+		netdev_warn(priv->netdev, "spurious RX interrupt\n");
+		return IRQ_NONE;
+	}
+
+	if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
+		if (likely(napi_schedule_prep(&priv->napi))) {
+			/* disable RX interrupts */
+			intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	/* TX ring is full, perform a full reclaim since we do not know
+	 * which one would trigger this interrupt
+	 */
+	if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
+		bcm_sysport_tx_reclaim_all(priv);
+
+	return IRQ_HANDLED;
+}
+
+/* TX interrupt service routine */
+static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *txr;
+	unsigned int ring;
+
+	priv->irq1_stat = intrl2_1_readl(priv, INTRL2_CPU_STATUS) &
+				~intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
+	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
+
+	if (unlikely(priv->irq1_stat == 0)) {
+		netdev_warn(priv->netdev, "spurious TX interrupt\n");
+		return IRQ_NONE;
+	}
+
+	for (ring = 0; ring < dev->num_tx_queues; ring++) {
+		if (!(priv->irq1_stat & BIT(ring)))
+			continue;
+
+		txr = &priv->tx_rings[ring];
+
+		if (likely(napi_schedule_prep(&txr->napi))) {
+			intrl2_1_mask_set(priv, BIT(ring));
+			__napi_schedule(&txr->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int bcm_sysport_insert_tsb(struct sk_buff *skb, struct net_device *dev)
+{
+	struct sk_buff *nskb;
+	struct bcm_tsb *tsb;
+	u32 csum_info;
+	u8 ip_proto;
+	u16 csum_start;
+	u16 ip_ver;
+
+	/* Re-allocate SKB if needed */
+	if (unlikely(skb_headroom(skb) < sizeof(*tsb))) {
+		nskb = skb_realloc_headroom(skb, sizeof(*tsb));
+		dev_kfree_skb(skb);
+		if (!nskb) {
+			dev->stats.tx_errors++;
+			dev->stats.tx_dropped++;
+			return -ENOMEM;
+		}
+		skb = nskb;
+	}
+
+	tsb = (struct bcm_tsb *)skb_push(skb, sizeof(*tsb));
+	/* Zero-out TSB by default */
+	memset(tsb, 0, sizeof(*tsb));
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		ip_ver = htons(skb->protocol);
+		switch (ip_ver) {
+		case ETH_P_IP:
+			ip_proto = ip_hdr(skb)->protocol;
+			break;
+		case ETH_P_IPV6:
+			ip_proto = ipv6_hdr(skb)->nexthdr;
+			break;
+		default:
+			return 0;
+		}
+
+		/* Get the checksum offset and the L4 (transport) offset */
+		csum_start = skb_checksum_start_offset(skb) - sizeof(*tsb);
+		csum_info = (csum_start + skb->csum_offset) & L4_CSUM_PTR_MASK;
+		csum_info |= (csum_start << L4_PTR_SHIFT);
+
+		if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) {
+			csum_info |= L4_LENGTH_VALID;
+			if (ip_proto == IPPROTO_UDP && ip_ver == ETH_P_IP)
+				csum_info |= L4_UDP;
+		} else
+			csum_info = 0;
+
+		tsb->l4_ptr_dest_map = csum_info;
+	}
+
+	return 0;
+}
+
+static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct device *kdev = &priv->pdev->dev;
+	struct bcm_sysport_tx_ring *ring;
+	struct bcm_sysport_cb *cb;
+	struct netdev_queue *txq;
+	struct dma_desc *desc;
+	unsigned int skb_len;
+	unsigned long flags;
+	dma_addr_t mapping;
+	u32 len_status;
+	u16 queue;
+	int ret;
+
+	queue = skb_get_queue_mapping(skb);
+	txq = netdev_get_tx_queue(dev, queue);
+	ring = &priv->tx_rings[queue];
+
+	/* lock against tx reclaim in BH context and TX ring full interrupt */
+	spin_lock_irqsave(&ring->lock, flags);
+	if (unlikely(ring->desc_count == 0)) {
+		netif_tx_stop_queue(txq);
+		netdev_err(dev, "queue %d awake and ring full!\n", queue);
+		ret = NETDEV_TX_BUSY;
+		goto out;
+	}
+
+	/* Insert TSB and checksum infos */
+	if (priv->tsb_en) {
+		ret = bcm_sysport_insert_tsb(skb, dev);
+		if (ret) {
+			ret = NETDEV_TX_OK;
+			goto out;
+		}
+	}
+
+	/* The Ethernet switch we are interfaced with needs packets to be at
+	 * least 64 bytes (including FCS) otherwise they will be discarded when
+	 * they enter the switch port logic. When Broadcom tags are enabled, we
+	 * need to make sure that packets are at least 68 bytes
+	 * (including FCS and tag) because the length verification is done after
+	 * the Broadcom tag is stripped off the ingress packet.
+	 */
+	if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+
+	skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ?
+			ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len;
+
+	mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(kdev, mapping)) {
+		netif_err(priv, tx_err, dev, "DMA map failed at %p (len=%d)\n",
+				skb->data, skb_len);
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+
+	/* Remember the SKB for future freeing */
+	cb = &ring->cbs[ring->curr_desc];
+	cb->skb = skb;
+	dma_unmap_addr_set(cb, dma_addr, mapping);
+	dma_unmap_len_set(cb, dma_len, skb_len);
+
+	/* Fetch a descriptor entry from our pool */
+	desc = ring->desc_cpu;
+
+	desc->addr_lo = lower_32_bits(mapping);
+	len_status = upper_32_bits(mapping) & DESC_ADDR_HI_MASK;
+	len_status |= (skb_len << DESC_LEN_SHIFT);
+	len_status |= (DESC_SOP | DESC_EOP | TX_STATUS_APP_CRC) <<
+			DESC_STATUS_SHIFT;
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		len_status |= (DESC_L4_CSUM << DESC_STATUS_SHIFT);
+
+	ring->curr_desc++;
+	if (ring->curr_desc == ring->size)
+		ring->curr_desc = 0;
+	ring->desc_count--;
+
+	/* Ensure write completion of the descriptor status/length
+	 * in DRAM before the System Port WRITE_PORT register latches
+	 * the value
+	 */
+	wmb();
+	desc->addr_status_len = len_status;
+	wmb();
+
+	/* Write this descriptor address to the RING write port */
+	tdma_port_write_desc_addr(priv, desc, ring->index);
+
+	/* Check ring space and update SW control flow */
+	if (ring->desc_count == 0)
+		netif_tx_stop_queue(txq);
+
+	netif_dbg(priv, tx_queued, dev, "ring=%d desc_count=%d, curr_desc=%d\n",
+			ring->index, ring->desc_count, ring->curr_desc);
+
+	ret = NETDEV_TX_OK;
+out:
+	spin_unlock_irqrestore(&ring->lock, flags);
+	return ret;
+}
+
+static void bcm_sysport_tx_timeout(struct net_device *dev)
+{
+	netdev_warn(dev, "transmit timeout!\n");
+
+	dev->trans_start = jiffies;
+	dev->stats.tx_errors++;
+
+	netif_tx_wake_all_queues(dev);
+}
+
+/* phylib adjust link callback */
+static void bcm_sysport_adj_link(struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	unsigned int changed = 0;
+	u32 cmd_bits = 0, reg;
+
+	if (priv->old_link != phydev->link) {
+		changed = 1;
+		priv->old_link = phydev->link;
+	}
+
+	if (priv->old_duplex != phydev->duplex) {
+		changed = 1;
+		priv->old_duplex = phydev->duplex;
+	}
+
+	switch (phydev->speed) {
+	case SPEED_2500:
+		cmd_bits = CMD_SPEED_2500;
+		break;
+	case SPEED_1000:
+		cmd_bits = CMD_SPEED_1000;
+		break;
+	case SPEED_100:
+		cmd_bits = CMD_SPEED_100;
+		break;
+	case SPEED_10:
+		cmd_bits = CMD_SPEED_10;
+		break;
+	default:
+		break;
+	}
+	cmd_bits <<= CMD_SPEED_SHIFT;
+
+	if (phydev->duplex == DUPLEX_HALF)
+		cmd_bits |= CMD_HD_EN;
+
+	if (priv->old_pause != phydev->pause) {
+		changed = 1;
+		priv->old_pause = phydev->pause;
+	}
+
+	if (!phydev->pause)
+		cmd_bits |= CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE;
+
+	if (changed) {
+		reg = umac_readl(priv, UMAC_CMD);
+		reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) |
+			CMD_HD_EN | CMD_RX_PAUSE_IGNORE |
+			CMD_TX_PAUSE_IGNORE);
+		reg |= cmd_bits;
+		umac_writel(priv, reg, UMAC_CMD);
+
+		phy_print_status(priv->phydev);
+	}
+}
+
+static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
+				    unsigned int index)
+{
+	struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
+	struct device *kdev = &priv->pdev->dev;
+	size_t size;
+	void *p;
+	u32 reg;
+
+	/* Simple descriptors partitioning for now */
+	size = 256;
+
+	/* We just need one DMA descriptor which is DMA-able, since writing to
+	 * the port will allocate a new descriptor in its internal linked-list
+	 */
+	p = dma_zalloc_coherent(kdev, 1, &ring->desc_dma, GFP_KERNEL);
+	if (!p) {
+		netif_err(priv, hw, priv->netdev, "DMA alloc failed\n");
+		return -ENOMEM;
+	}
+
+	ring->cbs = kzalloc(sizeof(struct bcm_sysport_cb) * size, GFP_KERNEL);
+	if (!ring->cbs) {
+		netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* Initialize SW view of the ring */
+	spin_lock_init(&ring->lock);
+	ring->priv = priv;
+	netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64);
+	ring->index = index;
+	ring->size = size;
+	ring->alloc_size = ring->size;
+	ring->desc_cpu = p;
+	ring->desc_count = ring->size;
+	ring->curr_desc = 0;
+
+	/* Initialize HW ring */
+	tdma_writel(priv, RING_EN, TDMA_DESC_RING_HEAD_TAIL_PTR(index));
+	tdma_writel(priv, 0, TDMA_DESC_RING_COUNT(index));
+	tdma_writel(priv, 1, TDMA_DESC_RING_INTR_CONTROL(index));
+	tdma_writel(priv, 0, TDMA_DESC_RING_PROD_CONS_INDEX(index));
+	tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index));
+	tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index));
+
+	/* Program the number of descriptors as MAX_THRESHOLD and half of
+	 * its size for the hysteresis trigger
+	 */
+	tdma_writel(priv, ring->size |
+			1 << RING_HYST_THRESH_SHIFT,
+			TDMA_DESC_RING_MAX_HYST(index));
+
+	/* Enable the ring queue in the arbiter */
+	reg = tdma_readl(priv, TDMA_TIER1_ARB_0_QUEUE_EN);
+	reg |= (1 << index);
+	tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
+
+	napi_enable(&ring->napi);
+
+	netif_dbg(priv, hw, priv->netdev,
+			"TDMA cfg, size=%d, desc_cpu=%p\n",
+			ring->size, ring->desc_cpu);
+
+	return 0;
+}
+
+static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
+					unsigned int index)
+{
+	struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
+	struct device *kdev = &priv->pdev->dev;
+	u32 reg;
+
+	/* Caller should stop the TDMA engine */
+	reg = tdma_readl(priv, TDMA_STATUS);
+	if (!(reg & TDMA_DISABLED))
+		netdev_warn(priv->netdev, "TDMA not stopped!\n");
+
+	napi_disable(&ring->napi);
+	netif_napi_del(&ring->napi);
+
+	bcm_sysport_tx_reclaim(priv, ring);
+
+	kfree(ring->cbs);
+	ring->cbs = NULL;
+
+	if (ring->desc_dma) {
+		dma_free_coherent(kdev, 1, ring->desc_cpu, ring->desc_dma);
+		ring->desc_dma = 0;
+	}
+	ring->size = 0;
+	ring->alloc_size = 0;
+
+	netif_dbg(priv, hw, priv->netdev, "TDMA fini done\n");
+}
+
+/* RDMA helper */
+static inline int rdma_enable_set(struct bcm_sysport_priv *priv,
+					unsigned int enable)
+{
+	unsigned int timeout = 1000;
+	u32 reg;
+
+	reg = rdma_readl(priv, RDMA_CONTROL);
+	if (enable)
+		reg |= RDMA_EN;
+	else
+		reg &= ~RDMA_EN;
+	rdma_writel(priv, reg, RDMA_CONTROL);
+
+	/* Poll for RMDA disabling completion */
+	do {
+		reg = rdma_readl(priv, RDMA_STATUS);
+		if (!!(reg & RDMA_DISABLED) == !enable)
+			return 0;
+		usleep_range(1000, 2000);
+	} while (timeout-- > 0);
+
+	netdev_err(priv->netdev, "timeout waiting for RDMA to finish\n");
+
+	return -ETIMEDOUT;
+}
+
+/* TDMA helper */
+static inline int tdma_enable_set(struct bcm_sysport_priv *priv,
+					unsigned int enable)
+{
+	unsigned int timeout = 1000;
+	u32 reg;
+
+	reg = tdma_readl(priv, TDMA_CONTROL);
+	if (enable)
+		reg |= TDMA_EN;
+	else
+		reg &= ~TDMA_EN;
+	tdma_writel(priv, reg, TDMA_CONTROL);
+
+	/* Poll for TMDA disabling completion */
+	do {
+		reg = tdma_readl(priv, TDMA_STATUS);
+		if (!!(reg & TDMA_DISABLED) == !enable)
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout-- > 0);
+
+	netdev_err(priv->netdev, "timeout waiting for TDMA to finish\n");
+
+	return -ETIMEDOUT;
+}
+
+static int bcm_sysport_init_rx_ring(struct bcm_sysport_priv *priv)
+{
+	u32 reg;
+	int ret;
+
+	/* Initialize SW view of the RX ring */
+	priv->num_rx_bds = NUM_RX_DESC;
+	priv->rx_bds = priv->base + SYS_PORT_RDMA_OFFSET;
+	priv->rx_bd_assign_ptr = priv->rx_bds;
+	priv->rx_bd_assign_index = 0;
+	priv->rx_c_index = 0;
+	priv->rx_read_ptr = 0;
+	priv->rx_cbs = kzalloc(priv->num_rx_bds *
+				sizeof(struct bcm_sysport_cb), GFP_KERNEL);
+	if (!priv->rx_cbs) {
+		netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
+		return -ENOMEM;
+	}
+
+	ret = bcm_sysport_alloc_rx_bufs(priv);
+	if (ret) {
+		netif_err(priv, hw, priv->netdev, "SKB allocation failed\n");
+		return ret;
+	}
+
+	/* Initialize HW, ensure RDMA is disabled */
+	reg = rdma_readl(priv, RDMA_STATUS);
+	if (!(reg & RDMA_DISABLED))
+		rdma_enable_set(priv, 0);
+
+	rdma_writel(priv, 0, RDMA_WRITE_PTR_LO);
+	rdma_writel(priv, 0, RDMA_WRITE_PTR_HI);
+	rdma_writel(priv, 0, RDMA_PROD_INDEX);
+	rdma_writel(priv, 0, RDMA_CONS_INDEX);
+	rdma_writel(priv, priv->num_rx_bds << RDMA_RING_SIZE_SHIFT |
+			  RX_BUF_LENGTH, RDMA_RING_BUF_SIZE);
+	/* Operate the queue in ring mode */
+	rdma_writel(priv, 0, RDMA_START_ADDR_HI);
+	rdma_writel(priv, 0, RDMA_START_ADDR_LO);
+	rdma_writel(priv, 0, RDMA_END_ADDR_HI);
+	rdma_writel(priv, NUM_HW_RX_DESC_WORDS - 1, RDMA_END_ADDR_LO);
+
+	rdma_writel(priv, 1, RDMA_MBDONE_INTR);
+
+	netif_dbg(priv, hw, priv->netdev,
+			"RDMA cfg, num_rx_bds=%d, rx_bds=%p\n",
+			priv->num_rx_bds, priv->rx_bds);
+
+	return 0;
+}
+
+static void bcm_sysport_fini_rx_ring(struct bcm_sysport_priv *priv)
+{
+	struct bcm_sysport_cb *cb;
+	unsigned int i;
+	u32 reg;
+
+	/* Caller should ensure RDMA is disabled */
+	reg = rdma_readl(priv, RDMA_STATUS);
+	if (!(reg & RDMA_DISABLED))
+		netdev_warn(priv->netdev, "RDMA not stopped!\n");
+
+	for (i = 0; i < priv->num_rx_bds; i++) {
+		cb = &priv->rx_cbs[i];
+		if (dma_unmap_addr(cb, dma_addr))
+			dma_unmap_single(&priv->pdev->dev,
+					dma_unmap_addr(cb, dma_addr),
+					RX_BUF_LENGTH, DMA_FROM_DEVICE);
+		bcm_sysport_free_cb(cb);
+	}
+
+	kfree(priv->rx_cbs);
+	priv->rx_cbs = NULL;
+
+	netif_dbg(priv, hw, priv->netdev, "RDMA fini done\n");
+}
+
+static void bcm_sysport_set_rx_mode(struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	u32 reg;
+
+	reg = umac_readl(priv, UMAC_CMD);
+	if (dev->flags & IFF_PROMISC)
+		reg |= CMD_PROMISC;
+	else
+		reg &= ~CMD_PROMISC;
+	umac_writel(priv, reg, UMAC_CMD);
+
+	/* No support for ALLMULTI */
+	if (dev->flags & IFF_ALLMULTI)
+		return;
+}
+
+static inline void umac_enable_set(struct bcm_sysport_priv *priv,
+					unsigned int enable)
+{
+	u32 reg;
+
+	reg = umac_readl(priv, UMAC_CMD);
+	if (enable)
+		reg |= CMD_RX_EN | CMD_TX_EN;
+	else
+		reg &= ~(CMD_RX_EN | CMD_TX_EN);
+	umac_writel(priv, reg, UMAC_CMD);
+
+	/* UniMAC stops on a packet boundary, wait for a full-sized packet
+	 * to be processed (1 msec).
+	 */
+	if (enable == 0)
+		usleep_range(1000, 2000);
+}
+
+static inline int umac_reset(struct bcm_sysport_priv *priv)
+{
+	unsigned int timeout = 0;
+	u32 reg;
+	int ret = 0;
+
+	umac_writel(priv, 0, UMAC_CMD);
+	while (timeout++ < 1000) {
+		reg = umac_readl(priv, UMAC_CMD);
+		if (!(reg & CMD_SW_RESET))
+			break;
+
+		udelay(1);
+	}
+
+	if (timeout == 1000) {
+		dev_err(&priv->pdev->dev,
+			"timeout waiting for MAC to come out of reset\n");
+		ret = -ETIMEDOUT;
+	}
+
+	return ret;
+}
+
+static void umac_set_hw_addr(struct bcm_sysport_priv *priv,
+				unsigned char *addr)
+{
+	umac_writel(priv, (addr[0] << 24) | (addr[1] << 16) |
+			(addr[2] << 8) | addr[3], UMAC_MAC0);
+	umac_writel(priv, (addr[4] << 8) | addr[5], UMAC_MAC1);
+}
+
+static void topctrl_flush(struct bcm_sysport_priv *priv)
+{
+	topctrl_writel(priv, RX_FLUSH, RX_FLUSH_CNTL);
+	topctrl_writel(priv, TX_FLUSH, TX_FLUSH_CNTL);
+	mdelay(1);
+	topctrl_writel(priv, 0, RX_FLUSH_CNTL);
+	topctrl_writel(priv, 0, TX_FLUSH_CNTL);
+}
+
+static int bcm_sysport_open(struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	unsigned int i;
+	u32 reg;
+	int ret;
+
+	/* Reset UniMAC */
+	ret = umac_reset(priv);
+	if (ret) {
+		netdev_err(dev, "UniMAC reset failed\n");
+		return ret;
+	}
+
+	/* Flush TX and RX FIFOs at TOPCTRL level */
+	topctrl_flush(priv);
+
+	/* Disable the UniMAC RX/TX */
+	umac_enable_set(priv, 0);
+
+	/* Enable RBUF 2bytes alignment and Receive Status Block */
+	reg = rbuf_readl(priv, RBUF_CONTROL);
+	reg |= RBUF_4B_ALGN | RBUF_RSB_EN;
+	rbuf_writel(priv, reg, RBUF_CONTROL);
+
+	/* Set maximum frame length */
+	umac_writel(priv, UMAC_MAX_MTU_SIZE, UMAC_MAX_FRAME_LEN);
+
+	/* Set MAC address */
+	umac_set_hw_addr(priv, dev->dev_addr);
+
+	/* Read CRC forward */
+	priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD);
+
+	priv->phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link,
+					0, priv->phy_interface);
+	if (!priv->phydev) {
+		netdev_err(dev, "could not attach to PHY\n");
+		return -ENODEV;
+	}
+
+	/* Reset house keeping link status */
+	priv->old_duplex = -1;
+	priv->old_link = -1;
+	priv->old_pause = -1;
+
+	/* mask all interrupts and request them */
+	intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+	intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
+	intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
+	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
+	intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
+
+	ret = request_irq(priv->irq0, bcm_sysport_rx_isr, 0, dev->name, dev);
+	if (ret) {
+		netdev_err(dev, "failed to request RX interrupt\n");
+		goto out_phy_disconnect;
+	}
+
+	ret = request_irq(priv->irq1, bcm_sysport_tx_isr, 0, dev->name, dev);
+	if (ret) {
+		netdev_err(dev, "failed to request TX interrupt\n");
+		goto out_free_irq0;
+	}
+
+	/* Initialize both hardware and software ring */
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		ret = bcm_sysport_init_tx_ring(priv, i);
+		if (ret) {
+			netdev_err(dev, "failed to initialize TX ring %d\n",
+					i);
+			goto out_free_tx_ring;
+		}
+	}
+
+	/* Initialize linked-list */
+	tdma_writel(priv, TDMA_LL_RAM_INIT_BUSY, TDMA_STATUS);
+
+	/* Initialize RX ring */
+	ret = bcm_sysport_init_rx_ring(priv);
+	if (ret) {
+		netdev_err(dev, "failed to initialize RX ring\n");
+		goto out_free_rx_ring;
+	}
+
+	/* Turn on RDMA */
+	ret = rdma_enable_set(priv, 1);
+	if (ret)
+		goto out_free_rx_ring;
+
+	/* Enable RX interrupt and TX ring full interrupt */
+	intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL);
+
+	/* Turn on TDMA */
+	ret = tdma_enable_set(priv, 1);
+	if (ret)
+		goto out_clear_rx_int;
+
+	/* Enable NAPI */
+	napi_enable(&priv->napi);
+
+	/* Turn on UniMAC TX/RX */
+	umac_enable_set(priv, 1);
+
+	phy_start(priv->phydev);
+
+	/* Enable TX interrupts for the 32 TXQs */
+	intrl2_1_mask_clear(priv, 0xffffffff);
+
+	/* Last call before we start the real business */
+	netif_tx_start_all_queues(dev);
+
+	return 0;
+
+out_clear_rx_int:
+	intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL);
+out_free_rx_ring:
+	bcm_sysport_fini_rx_ring(priv);
+out_free_tx_ring:
+	for (i = 0; i < dev->num_tx_queues; i++)
+		bcm_sysport_fini_tx_ring(priv, i);
+	free_irq(priv->irq1, dev);
+out_free_irq0:
+	free_irq(priv->irq0, dev);
+out_phy_disconnect:
+	phy_disconnect(priv->phydev);
+	return ret;
+}
+
+static int bcm_sysport_stop(struct net_device *dev)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	unsigned int i;
+	u32 reg;
+	int ret;
+
+	/* stop all software from updating hardware */
+	netif_tx_stop_all_queues(dev);
+	napi_disable(&priv->napi);
+	phy_stop(priv->phydev);
+
+	/* mask all interrupts */
+	intrl2_0_mask_set(priv, 0xffffffff);
+	intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
+	intrl2_1_mask_set(priv, 0xffffffff);
+	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
+
+	/* Disable UniMAC RX */
+	reg = umac_readl(priv, UMAC_CMD);
+	reg &= ~CMD_RX_EN;
+	umac_writel(priv, reg, UMAC_CMD);
+
+	ret = tdma_enable_set(priv, 0);
+	if (ret) {
+		netdev_err(dev, "timeout disabling RDMA\n");
+		return ret;
+	}
+
+	/* Wait for a maximum packet size to be drained */
+	usleep_range(2000, 3000);
+
+	ret = rdma_enable_set(priv, 0);
+	if (ret) {
+		netdev_err(dev, "timeout disabling TDMA\n");
+		return ret;
+	}
+
+	/* Disable UniMAC TX */
+	reg = umac_readl(priv, UMAC_CMD);
+	reg &= ~CMD_TX_EN;
+	umac_writel(priv, reg, UMAC_CMD);
+
+	/* Free RX/TX rings SW structures */
+	for (i = 0; i < dev->num_tx_queues; i++)
+		bcm_sysport_fini_tx_ring(priv, i);
+	bcm_sysport_fini_rx_ring(priv);
+
+	free_irq(priv->irq0, dev);
+	free_irq(priv->irq1, dev);
+
+	/* Disconnect from PHY */
+	phy_disconnect(priv->phydev);
+
+	return 0;
+}
+
+static struct ethtool_ops bcm_sysport_ethtool_ops = {
+	.get_settings		= bcm_sysport_get_settings,
+	.set_settings		= bcm_sysport_set_settings,
+	.get_drvinfo		= bcm_sysport_get_drvinfo,
+	.get_msglevel		= bcm_sysport_get_msglvl,
+	.set_msglevel		= bcm_sysport_set_msglvl,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= bcm_sysport_get_strings,
+	.get_ethtool_stats	= bcm_sysport_get_stats,
+	.get_sset_count		= bcm_sysport_get_sset_count,
+};
+
+static const struct net_device_ops bcm_sysport_netdev_ops = {
+	.ndo_start_xmit		= bcm_sysport_xmit,
+	.ndo_tx_timeout		= bcm_sysport_tx_timeout,
+	.ndo_open		= bcm_sysport_open,
+	.ndo_stop		= bcm_sysport_stop,
+	.ndo_set_features	= bcm_sysport_set_features,
+	.ndo_set_rx_mode	= bcm_sysport_set_rx_mode,
+};
+
+#define REV_FMT	"v%2x.%02x"
+
+static int bcm_sysport_probe(struct platform_device *pdev)
+{
+	struct bcm_sysport_priv *priv;
+	struct device_node *dn;
+	struct net_device *dev;
+	const void *macaddr;
+	struct resource *r;
+	u32 txq, rxq;
+	int ret;
+
+	dn = pdev->dev.of_node;
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	/* Read the Transmit/Receive Queue properties */
+	if (of_property_read_u32(dn, "systemport,num-txq", &txq))
+		txq = TDMA_NUM_RINGS;
+	if (of_property_read_u32(dn, "systemport,num-rxq", &rxq))
+		rxq = 1;
+
+	dev = alloc_etherdev_mqs(sizeof(*priv), txq, rxq);
+	if (!dev)
+		return -ENOMEM;
+
+	/* Initialize private members */
+	priv = netdev_priv(dev);
+
+	priv->irq0 = platform_get_irq(pdev, 0);
+	priv->irq1 = platform_get_irq(pdev, 1);
+	if (priv->irq0 <= 0 || priv->irq1 <= 0) {
+		dev_err(&pdev->dev, "invalid interrupts\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	priv->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto err;
+	}
+
+	priv->netdev = dev;
+	priv->pdev = pdev;
+
+	priv->phy_interface = of_get_phy_mode(dn);
+	/* Default to GMII interface mode */
+	if (priv->phy_interface < 0)
+		priv->phy_interface = PHY_INTERFACE_MODE_GMII;
+
+	/* In the case of a fixed PHY, the DT node associated
+	 * to the PHY is the Ethernet MAC DT node.
+	 */
+	if (of_phy_is_fixed_link(dn)) {
+		ret = of_phy_register_fixed_link(dn);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to register fixed PHY\n");
+			goto err;
+		}
+
+		priv->phy_dn = dn;
+	}
+
+	/* Initialize netdevice members */
+	macaddr = of_get_mac_address(dn);
+	if (!macaddr || !is_valid_ether_addr(macaddr)) {
+		dev_warn(&pdev->dev, "using random Ethernet MAC\n");
+		random_ether_addr(dev->dev_addr);
+	} else {
+		ether_addr_copy(dev->dev_addr, macaddr);
+	}
+
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	dev_set_drvdata(&pdev->dev, dev);
+	dev->ethtool_ops = &bcm_sysport_ethtool_ops;
+	dev->netdev_ops = &bcm_sysport_netdev_ops;
+	netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64);
+
+	/* HW supported features, none enabled by default */
+	dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_HIGHDMA |
+				NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+	/* Set the needed headroom once and for all */
+	BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
+	dev->needed_headroom += sizeof(struct bcm_tsb);
+
+	/* We are interfaced to a switch which handles the multicast
+	 * filtering for us, so we do not support programming any
+	 * multicast hash table in this Ethernet MAC.
+	 */
+	dev->flags &= ~IFF_MULTICAST;
+
+	/* libphy will adjust the link state accordingly */
+	netif_carrier_off(dev);
+
+	ret = register_netdev(dev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register net_device\n");
+		goto err;
+	}
+
+	priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
+	dev_info(&pdev->dev,
+		"Broadcom SYSTEMPORT" REV_FMT
+		" at 0x%p (irqs: %d, %d, TXQs: %d, RXQs: %d)\n",
+		(priv->rev >> 8) & 0xff, priv->rev & 0xff,
+		priv->base, priv->irq0, priv->irq1, txq, rxq);
+
+	return 0;
+err:
+	free_netdev(dev);
+	return ret;
+}
+
+static int bcm_sysport_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = dev_get_drvdata(&pdev->dev);
+
+	/* Not much to do, ndo_close has been called
+	 * and we use managed allocations
+	 */
+	unregister_netdev(dev);
+	free_netdev(dev);
+	dev_set_drvdata(&pdev->dev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id bcm_sysport_of_match[] = {
+	{ .compatible = "brcm,systemport-v1.00" },
+	{ .compatible = "brcm,systemport" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver bcm_sysport_driver = {
+	.probe	= bcm_sysport_probe,
+	.remove	= bcm_sysport_remove,
+	.driver =  {
+		.name = "brcm-systemport",
+		.owner = THIS_MODULE,
+		.of_match_table = bcm_sysport_of_match,
+	},
+};
+module_platform_driver(bcm_sysport_driver);
+
+MODULE_AUTHOR("Broadcom Corporation");
+MODULE_DESCRIPTION("Broadcom System Port Ethernet MAC driver");
+MODULE_ALIAS("platform:brcm-systemport");
+MODULE_LICENSE("GPL");

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
new file mode 100644
index 0000000..281c082
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h

@@ -0,0 +1,678 @@
+/*
+ * Broadcom BCM7xxx System Port Ethernet MAC driver
+ *
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __BCM_SYSPORT_H
+#define __BCM_SYSPORT_H
+
+#include <linux/if_vlan.h>
+
+/* Receive/transmit descriptor format */
+#define DESC_ADDR_HI_STATUS_LEN	0x00
+#define  DESC_ADDR_HI_SHIFT	0
+#define  DESC_ADDR_HI_MASK	0xff
+#define  DESC_STATUS_SHIFT	8
+#define  DESC_STATUS_MASK	0x3ff
+#define  DESC_LEN_SHIFT		18
+#define  DESC_LEN_MASK		0x7fff
+#define DESC_ADDR_LO		0x04
+
+/* HW supports 40-bit addressing hence the */
+#define DESC_SIZE		(WORDS_PER_DESC * sizeof(u32))
+
+/* Default RX buffer allocation size */
+#define RX_BUF_LENGTH		2048
+
+/* Body(1500) + EH_SIZE(14) + VLANTAG(4) + BRCMTAG(4) + FCS(4) = 1526.
+ * 1536 is multiple of 256 bytes
+ */
+#define ENET_BRCM_TAG_LEN	4
+#define ENET_PAD		10
+#define UMAC_MAX_MTU_SIZE	(ETH_DATA_LEN + ETH_HLEN + VLAN_HLEN + \
+				 ENET_BRCM_TAG_LEN + ETH_FCS_LEN + ENET_PAD)
+
+/* Transmit status block */
+struct bcm_tsb {
+	u32 pcp_dei_vid;
+#define PCP_DEI_MASK		0xf
+#define VID_SHIFT		4
+#define VID_MASK		0xfff
+	u32 l4_ptr_dest_map;
+#define L4_CSUM_PTR_MASK	0x1ff
+#define L4_PTR_SHIFT		9
+#define L4_PTR_MASK		0x1ff
+#define L4_UDP			(1 << 18)
+#define L4_LENGTH_VALID		(1 << 19)
+#define DEST_MAP_SHIFT		20
+#define DEST_MAP_MASK		0x1ff
+};
+
+/* Receive status block uses the same
+ * definitions as the DMA descriptor
+ */
+struct bcm_rsb {
+	u32 rx_status_len;
+	u32 brcm_egress_tag;
+};
+
+/* Common Receive/Transmit status bits */
+#define DESC_L4_CSUM		(1 << 7)
+#define DESC_SOP		(1 << 8)
+#define DESC_EOP		(1 << 9)
+
+/* Receive Status bits */
+#define RX_STATUS_UCAST			0
+#define RX_STATUS_BCAST			0x04
+#define RX_STATUS_MCAST			0x08
+#define RX_STATUS_L2_MCAST		0x0c
+#define RX_STATUS_ERR			(1 << 4)
+#define RX_STATUS_OVFLOW		(1 << 5)
+#define RX_STATUS_PARSE_FAIL		(1 << 6)
+
+/* Transmit Status bits */
+#define TX_STATUS_VLAN_NO_ACT		0x00
+#define TX_STATUS_VLAN_PCP_TSB		0x01
+#define TX_STATUS_VLAN_QUEUE		0x02
+#define TX_STATUS_VLAN_VID_TSB		0x03
+#define TX_STATUS_OWR_CRC		(1 << 2)
+#define TX_STATUS_APP_CRC		(1 << 3)
+#define TX_STATUS_BRCM_TAG_NO_ACT	0
+#define TX_STATUS_BRCM_TAG_ZERO		0x10
+#define TX_STATUS_BRCM_TAG_ONE_QUEUE	0x20
+#define TX_STATUS_BRCM_TAG_ONE_TSB	0x30
+#define TX_STATUS_SKIP_BYTES		(1 << 6)
+
+/* Specific register definitions */
+#define SYS_PORT_TOPCTRL_OFFSET		0
+#define REV_CNTL			0x00
+#define  REV_MASK			0xffff
+
+#define RX_FLUSH_CNTL			0x04
+#define  RX_FLUSH			(1 << 0)
+
+#define TX_FLUSH_CNTL			0x08
+#define  TX_FLUSH			(1 << 0)
+
+#define MISC_CNTL			0x0c
+#define  SYS_CLK_SEL			(1 << 0)
+#define  TDMA_EOP_SEL			(1 << 1)
+
+/* Level-2 Interrupt controller offsets and defines */
+#define SYS_PORT_INTRL2_0_OFFSET	0x200
+#define SYS_PORT_INTRL2_1_OFFSET	0x240
+#define INTRL2_CPU_STATUS		0x00
+#define INTRL2_CPU_SET			0x04
+#define INTRL2_CPU_CLEAR		0x08
+#define INTRL2_CPU_MASK_STATUS		0x0c
+#define INTRL2_CPU_MASK_SET		0x10
+#define INTRL2_CPU_MASK_CLEAR		0x14
+
+/* Level-2 instance 0 interrupt bits */
+#define INTRL2_0_GISB_ERR		(1 << 0)
+#define INTRL2_0_RBUF_OVFLOW		(1 << 1)
+#define INTRL2_0_TBUF_UNDFLOW		(1 << 2)
+#define INTRL2_0_MPD			(1 << 3)
+#define INTRL2_0_BRCM_MATCH_TAG		(1 << 4)
+#define INTRL2_0_RDMA_MBDONE		(1 << 5)
+#define INTRL2_0_OVER_MAX_THRESH	(1 << 6)
+#define INTRL2_0_BELOW_HYST_THRESH	(1 << 7)
+#define INTRL2_0_FREE_LIST_EMPTY	(1 << 8)
+#define INTRL2_0_TX_RING_FULL		(1 << 9)
+#define INTRL2_0_DESC_ALLOC_ERR		(1 << 10)
+#define INTRL2_0_UNEXP_PKTSIZE_ACK	(1 << 11)
+
+/* RXCHK offset and defines */
+#define SYS_PORT_RXCHK_OFFSET		0x300
+
+#define RXCHK_CONTROL			0x00
+#define  RXCHK_EN			(1 << 0)
+#define  RXCHK_SKIP_FCS			(1 << 1)
+#define  RXCHK_BAD_CSUM_DIS		(1 << 2)
+#define  RXCHK_BRCM_TAG_EN		(1 << 3)
+#define  RXCHK_BRCM_TAG_MATCH_SHIFT	4
+#define  RXCHK_BRCM_TAG_MATCH_MASK	0xff
+#define  RXCHK_PARSE_TNL		(1 << 12)
+#define  RXCHK_VIOL_EN			(1 << 13)
+#define  RXCHK_VIOL_DIS			(1 << 14)
+#define  RXCHK_INCOM_PKT		(1 << 15)
+#define  RXCHK_V6_DUPEXT_EN		(1 << 16)
+#define  RXCHK_V6_DUPEXT_DIS		(1 << 17)
+#define  RXCHK_ETHERTYPE_DIS		(1 << 18)
+#define  RXCHK_L2_HDR_DIS		(1 << 19)
+#define  RXCHK_L3_HDR_DIS		(1 << 20)
+#define  RXCHK_MAC_RX_ERR_DIS		(1 << 21)
+#define  RXCHK_PARSE_AUTH		(1 << 22)
+
+#define RXCHK_BRCM_TAG0			0x04
+#define RXCHK_BRCM_TAG(i)		((i) * RXCHK_BRCM_TAG0)
+#define RXCHK_BRCM_TAG0_MASK		0x24
+#define RXCHK_BRCM_TAG_MASK(i)		((i) * RXCHK_BRCM_TAG0_MASK)
+#define RXCHK_BRCM_TAG_MATCH_STATUS	0x44
+#define RXCHK_ETHERTYPE			0x48
+#define RXCHK_BAD_CSUM_CNTR		0x4C
+#define RXCHK_OTHER_DISC_CNTR		0x50
+
+/* TXCHCK offsets and defines */
+#define SYS_PORT_TXCHK_OFFSET		0x380
+#define TXCHK_PKT_RDY_THRESH		0x00
+
+/* Receive buffer offset and defines */
+#define SYS_PORT_RBUF_OFFSET		0x400
+
+#define RBUF_CONTROL			0x00
+#define  RBUF_RSB_EN			(1 << 0)
+#define  RBUF_4B_ALGN			(1 << 1)
+#define  RBUF_BRCM_TAG_STRIP		(1 << 2)
+#define  RBUF_BAD_PKT_DISC		(1 << 3)
+#define  RBUF_RESUME_THRESH_SHIFT	4
+#define  RBUF_RESUME_THRESH_MASK	0xff
+#define  RBUF_OK_TO_SEND_SHIFT		12
+#define  RBUF_OK_TO_SEND_MASK		0xff
+#define  RBUF_CRC_REPLACE		(1 << 20)
+#define  RBUF_OK_TO_SEND_MODE		(1 << 21)
+#define  RBUF_RSB_SWAP			(1 << 22)
+#define  RBUF_ACPI_EN			(1 << 23)
+
+#define RBUF_PKT_RDY_THRESH		0x04
+
+#define RBUF_STATUS			0x08
+#define  RBUF_WOL_MODE			(1 << 0)
+#define  RBUF_MPD			(1 << 1)
+#define  RBUF_ACPI			(1 << 2)
+
+#define RBUF_OVFL_DISC_CNTR		0x0c
+#define RBUF_ERR_PKT_CNTR		0x10
+
+/* Transmit buffer offset and defines */
+#define SYS_PORT_TBUF_OFFSET		0x600
+
+#define TBUF_CONTROL			0x00
+#define  TBUF_BP_EN			(1 << 0)
+#define  TBUF_MAX_PKT_THRESH_SHIFT	1
+#define  TBUF_MAX_PKT_THRESH_MASK	0x1f
+#define  TBUF_FULL_THRESH_SHIFT		8
+#define  TBUF_FULL_THRESH_MASK		0x1f
+
+/* UniMAC offset and defines */
+#define SYS_PORT_UMAC_OFFSET		0x800
+
+#define UMAC_CMD			0x008
+#define  CMD_TX_EN			(1 << 0)
+#define  CMD_RX_EN			(1 << 1)
+#define  CMD_SPEED_SHIFT		2
+#define  CMD_SPEED_10			0
+#define  CMD_SPEED_100			1
+#define  CMD_SPEED_1000			2
+#define  CMD_SPEED_2500			3
+#define  CMD_SPEED_MASK			3
+#define  CMD_PROMISC			(1 << 4)
+#define  CMD_PAD_EN			(1 << 5)
+#define  CMD_CRC_FWD			(1 << 6)
+#define  CMD_PAUSE_FWD			(1 << 7)
+#define  CMD_RX_PAUSE_IGNORE		(1 << 8)
+#define  CMD_TX_ADDR_INS		(1 << 9)
+#define  CMD_HD_EN			(1 << 10)
+#define  CMD_SW_RESET			(1 << 13)
+#define  CMD_LCL_LOOP_EN		(1 << 15)
+#define  CMD_AUTO_CONFIG		(1 << 22)
+#define  CMD_CNTL_FRM_EN		(1 << 23)
+#define  CMD_NO_LEN_CHK			(1 << 24)
+#define  CMD_RMT_LOOP_EN		(1 << 25)
+#define  CMD_PRBL_EN			(1 << 27)
+#define  CMD_TX_PAUSE_IGNORE		(1 << 28)
+#define  CMD_TX_RX_EN			(1 << 29)
+#define  CMD_RUNT_FILTER_DIS		(1 << 30)
+
+#define UMAC_MAC0			0x00c
+#define UMAC_MAC1			0x010
+#define UMAC_MAX_FRAME_LEN		0x014
+
+#define UMAC_TX_FLUSH			0x334
+
+#define UMAC_MIB_START			0x400
+
+/* There is a 0xC gap between the end of RX and beginning of TX stats and then
+ * between the end of TX stats and the beginning of the RX RUNT
+ */
+#define UMAC_MIB_STAT_OFFSET		0xc
+
+#define UMAC_MIB_CTRL			0x580
+#define  MIB_RX_CNT_RST			(1 << 0)
+#define  MIB_RUNT_CNT_RST		(1 << 1)
+#define  MIB_TX_CNT_RST			(1 << 2)
+#define UMAC_MDF_CTRL			0x650
+#define UMAC_MDF_ADDR			0x654
+
+/* Receive DMA offset and defines */
+#define SYS_PORT_RDMA_OFFSET		0x2000
+
+#define RDMA_CONTROL			0x1000
+#define  RDMA_EN			(1 << 0)
+#define  RDMA_RING_CFG			(1 << 1)
+#define  RDMA_DISC_EN			(1 << 2)
+#define  RDMA_BUF_DATA_OFFSET_SHIFT	4
+#define  RDMA_BUF_DATA_OFFSET_MASK	0x3ff
+
+#define RDMA_STATUS			0x1004
+#define  RDMA_DISABLED			(1 << 0)
+#define  RDMA_DESC_RAM_INIT_BUSY	(1 << 1)
+#define  RDMA_BP_STATUS			(1 << 2)
+
+#define RDMA_SCB_BURST_SIZE		0x1008
+
+#define RDMA_RING_BUF_SIZE		0x100c
+#define  RDMA_RING_SIZE_SHIFT		16
+
+#define RDMA_WRITE_PTR_HI		0x1010
+#define RDMA_WRITE_PTR_LO		0x1014
+#define RDMA_PROD_INDEX			0x1018
+#define  RDMA_PROD_INDEX_MASK		0xffff
+
+#define RDMA_CONS_INDEX			0x101c
+#define  RDMA_CONS_INDEX_MASK		0xffff
+
+#define RDMA_START_ADDR_HI		0x1020
+#define RDMA_START_ADDR_LO		0x1024
+#define RDMA_END_ADDR_HI		0x1028
+#define RDMA_END_ADDR_LO		0x102c
+
+#define RDMA_MBDONE_INTR		0x1030
+#define  RDMA_INTR_THRESH_MASK		0xff
+#define  RDMA_TIMEOUT_SHIFT		16
+#define  RDMA_TIMEOUT_MASK		0xffff
+
+#define RDMA_XON_XOFF_THRESH		0x1034
+#define  RDMA_XON_XOFF_THRESH_MASK	0xffff
+#define  RDMA_XOFF_THRESH_SHIFT		16
+
+#define RDMA_READ_PTR_HI		0x1038
+#define RDMA_READ_PTR_LO		0x103c
+
+#define RDMA_OVERRIDE			0x1040
+#define  RDMA_LE_MODE			(1 << 0)
+#define  RDMA_REG_MODE			(1 << 1)
+
+#define RDMA_TEST			0x1044
+#define  RDMA_TP_OUT_SEL		(1 << 0)
+#define  RDMA_MEM_SEL			(1 << 1)
+
+#define RDMA_DEBUG			0x1048
+
+/* Transmit DMA offset and defines */
+#define TDMA_NUM_RINGS			32	/* rings = queues */
+#define TDMA_PORT_SIZE			DESC_SIZE /* two 32-bits words */
+
+#define SYS_PORT_TDMA_OFFSET		0x4000
+#define TDMA_WRITE_PORT_OFFSET		0x0000
+#define TDMA_WRITE_PORT_HI(i)		(TDMA_WRITE_PORT_OFFSET + \
+					(i) * TDMA_PORT_SIZE)
+#define TDMA_WRITE_PORT_LO(i)		(TDMA_WRITE_PORT_OFFSET + \
+					sizeof(u32) + (i) * TDMA_PORT_SIZE)
+
+#define TDMA_READ_PORT_OFFSET		(TDMA_WRITE_PORT_OFFSET + \
+					(TDMA_NUM_RINGS * TDMA_PORT_SIZE))
+#define TDMA_READ_PORT_HI(i)		(TDMA_READ_PORT_OFFSET + \
+					(i) * TDMA_PORT_SIZE)
+#define TDMA_READ_PORT_LO(i)		(TDMA_READ_PORT_OFFSET + \
+					sizeof(u32) + (i) * TDMA_PORT_SIZE)
+
+#define TDMA_READ_PORT_CMD_OFFSET	(TDMA_READ_PORT_OFFSET + \
+					(TDMA_NUM_RINGS * TDMA_PORT_SIZE))
+#define TDMA_READ_PORT_CMD(i)		(TDMA_READ_PORT_CMD_OFFSET + \
+					(i) * sizeof(u32))
+
+#define TDMA_DESC_RING_00_BASE		(TDMA_READ_PORT_CMD_OFFSET + \
+					(TDMA_NUM_RINGS * sizeof(u32)))
+
+/* Register offsets and defines relatives to a specific ring number */
+#define RING_HEAD_TAIL_PTR		0x00
+#define  RING_HEAD_MASK			0x7ff
+#define  RING_TAIL_SHIFT		11
+#define  RING_TAIL_MASK			0x7ff
+#define  RING_FLUSH			(1 << 24)
+#define  RING_EN			(1 << 25)
+
+#define RING_COUNT			0x04
+#define  RING_COUNT_MASK		0x7ff
+#define  RING_BUFF_DONE_SHIFT		11
+#define  RING_BUFF_DONE_MASK		0x7ff
+
+#define RING_MAX_HYST			0x08
+#define  RING_MAX_THRESH_MASK		0x7ff
+#define  RING_HYST_THRESH_SHIFT		11
+#define  RING_HYST_THRESH_MASK		0x7ff
+
+#define RING_INTR_CONTROL		0x0c
+#define  RING_INTR_THRESH_MASK		0x7ff
+#define  RING_EMPTY_INTR_EN		(1 << 15)
+#define  RING_TIMEOUT_SHIFT		16
+#define  RING_TIMEOUT_MASK		0xffff
+
+#define RING_PROD_CONS_INDEX		0x10
+#define  RING_PROD_INDEX_MASK		0xffff
+#define  RING_CONS_INDEX_SHIFT		16
+#define  RING_CONS_INDEX_MASK		0xffff
+
+#define RING_MAPPING			0x14
+#define  RING_QID_MASK			0x3
+#define  RING_PORT_ID_SHIFT		3
+#define  RING_PORT_ID_MASK		0x7
+#define  RING_IGNORE_STATUS		(1 << 6)
+#define  RING_FAILOVER_EN		(1 << 7)
+#define  RING_CREDIT_SHIFT		8
+#define  RING_CREDIT_MASK		0xffff
+
+#define RING_PCP_DEI_VID		0x18
+#define  RING_VID_MASK			0x7ff
+#define  RING_DEI			(1 << 12)
+#define  RING_PCP_SHIFT			13
+#define  RING_PCP_MASK			0x7
+#define  RING_PKT_SIZE_ADJ_SHIFT	16
+#define  RING_PKT_SIZE_ADJ_MASK		0xf
+
+#define TDMA_DESC_RING_SIZE		28
+
+/* Defininition for a given TX ring base address */
+#define TDMA_DESC_RING_BASE(i)		(TDMA_DESC_RING_00_BASE + \
+					((i) * TDMA_DESC_RING_SIZE))
+
+/* Ring indexed register addreses */
+#define TDMA_DESC_RING_HEAD_TAIL_PTR(i)	(TDMA_DESC_RING_BASE(i) + \
+					RING_HEAD_TAIL_PTR)
+#define TDMA_DESC_RING_COUNT(i)		(TDMA_DESC_RING_BASE(i) + \
+					RING_COUNT)
+#define TDMA_DESC_RING_MAX_HYST(i)	(TDMA_DESC_RING_BASE(i) + \
+					RING_MAX_HYST)
+#define TDMA_DESC_RING_INTR_CONTROL(i)	(TDMA_DESC_RING_BASE(i) + \
+					RING_INTR_CONTROL)
+#define TDMA_DESC_RING_PROD_CONS_INDEX(i) \
+					(TDMA_DESC_RING_BASE(i) + \
+					RING_PROD_CONS_INDEX)
+#define TDMA_DESC_RING_MAPPING(i)	(TDMA_DESC_RING_BASE(i) + \
+					RING_MAPPING)
+#define TDMA_DESC_RING_PCP_DEI_VID(i)	(TDMA_DESC_RING_BASE(i) + \
+					RING_PCP_DEI_VID)
+
+#define TDMA_CONTROL			0x600
+#define  TDMA_EN			(1 << 0)
+#define  TSB_EN				(1 << 1)
+#define  TSB_SWAP			(1 << 2)
+#define  ACB_ALGO			(1 << 3)
+#define  BUF_DATA_OFFSET_SHIFT		4
+#define  BUF_DATA_OFFSET_MASK		0x3ff
+#define  VLAN_EN			(1 << 14)
+#define  SW_BRCM_TAG			(1 << 15)
+#define  WNC_KPT_SIZE_UPDATE		(1 << 16)
+#define  SYNC_PKT_SIZE			(1 << 17)
+#define  ACH_TXDONE_DELAY_SHIFT		18
+#define  ACH_TXDONE_DELAY_MASK		0xff
+
+#define TDMA_STATUS			0x604
+#define  TDMA_DISABLED			(1 << 0)
+#define  TDMA_LL_RAM_INIT_BUSY		(1 << 1)
+
+#define TDMA_SCB_BURST_SIZE		0x608
+#define TDMA_OVER_MAX_THRESH_STATUS	0x60c
+#define TDMA_OVER_HYST_THRESH_STATUS	0x610
+#define TDMA_TPID			0x614
+
+#define TDMA_FREE_LIST_HEAD_TAIL_PTR	0x618
+#define  TDMA_FREE_HEAD_MASK		0x7ff
+#define  TDMA_FREE_TAIL_SHIFT		11
+#define  TDMA_FREE_TAIL_MASK		0x7ff
+
+#define TDMA_FREE_LIST_COUNT		0x61c
+#define  TDMA_FREE_LIST_COUNT_MASK	0x7ff
+
+#define TDMA_TIER2_ARB_CTRL		0x620
+#define  TDMA_ARB_MODE_RR		0
+#define  TDMA_ARB_MODE_WEIGHT_RR	0x1
+#define  TDMA_ARB_MODE_STRICT		0x2
+#define  TDMA_ARB_MODE_DEFICIT_RR	0x3
+#define  TDMA_CREDIT_SHIFT		4
+#define  TDMA_CREDIT_MASK		0xffff
+
+#define TDMA_TIER1_ARB_0_CTRL		0x624
+#define  TDMA_ARB_EN			(1 << 0)
+
+#define TDMA_TIER1_ARB_0_QUEUE_EN	0x628
+#define TDMA_TIER1_ARB_1_CTRL		0x62c
+#define TDMA_TIER1_ARB_1_QUEUE_EN	0x630
+#define TDMA_TIER1_ARB_2_CTRL		0x634
+#define TDMA_TIER1_ARB_2_QUEUE_EN	0x638
+#define TDMA_TIER1_ARB_3_CTRL		0x63c
+#define TDMA_TIER1_ARB_3_QUEUE_EN	0x640
+
+#define TDMA_SCB_ENDIAN_OVERRIDE	0x644
+#define  TDMA_LE_MODE			(1 << 0)
+#define  TDMA_REG_MODE			(1 << 1)
+
+#define TDMA_TEST			0x648
+#define  TDMA_TP_OUT_SEL		(1 << 0)
+#define  TDMA_MEM_TM			(1 << 1)
+
+#define TDMA_DEBUG			0x64c
+
+/* Transmit/Receive descriptor */
+struct dma_desc {
+	u32	addr_status_len;
+	u32	addr_lo;
+};
+
+/* Number of Receive hardware descriptor words */
+#define NUM_HW_RX_DESC_WORDS		1024
+/* Real number of usable descriptors */
+#define NUM_RX_DESC			(NUM_HW_RX_DESC_WORDS / WORDS_PER_DESC)
+
+/* Internal linked-list RAM has up to 1536 entries */
+#define NUM_TX_DESC			1536
+
+#define WORDS_PER_DESC			(sizeof(struct dma_desc) / sizeof(u32))
+
+/* Rx/Tx common counter group.*/
+struct bcm_sysport_pkt_counters {
+	u32	cnt_64;		/* RO Received/Transmited 64 bytes packet */
+	u32	cnt_127;	/* RO Rx/Tx 127 bytes packet */
+	u32	cnt_255;	/* RO Rx/Tx 65-255 bytes packet */
+	u32	cnt_511;	/* RO Rx/Tx 256-511 bytes packet */
+	u32	cnt_1023;	/* RO Rx/Tx 512-1023 bytes packet */
+	u32	cnt_1518;	/* RO Rx/Tx 1024-1518 bytes packet */
+	u32	cnt_mgv;	/* RO Rx/Tx 1519-1522 good VLAN packet */
+	u32	cnt_2047;	/* RO Rx/Tx 1522-2047 bytes packet*/
+	u32	cnt_4095;	/* RO Rx/Tx 2048-4095 bytes packet*/
+	u32	cnt_9216;	/* RO Rx/Tx 4096-9216 bytes packet*/
+};
+
+/* RSV, Receive Status Vector */
+struct bcm_sysport_rx_counters {
+	struct  bcm_sysport_pkt_counters pkt_cnt;
+	u32	pkt;		/* RO (0x428) Received pkt count*/
+	u32	bytes;		/* RO Received byte count */
+	u32	mca;		/* RO # of Received multicast pkt */
+	u32	bca;		/* RO # of Receive broadcast pkt */
+	u32	fcs;		/* RO # of Received FCS error  */
+	u32	cf;		/* RO # of Received control frame pkt*/
+	u32	pf;		/* RO # of Received pause frame pkt */
+	u32	uo;		/* RO # of unknown op code pkt */
+	u32	aln;		/* RO # of alignment error count */
+	u32	flr;		/* RO # of frame length out of range count */
+	u32	cde;		/* RO # of code error pkt */
+	u32	fcr;		/* RO # of carrier sense error pkt */
+	u32	ovr;		/* RO # of oversize pkt*/
+	u32	jbr;		/* RO # of jabber count */
+	u32	mtue;		/* RO # of MTU error pkt*/
+	u32	pok;		/* RO # of Received good pkt */
+	u32	uc;		/* RO # of unicast pkt */
+	u32	ppp;		/* RO # of PPP pkt */
+	u32	rcrc;		/* RO (0x470),# of CRC match pkt */
+};
+
+/* TSV, Transmit Status Vector */
+struct bcm_sysport_tx_counters {
+	struct bcm_sysport_pkt_counters pkt_cnt;
+	u32	pkts;		/* RO (0x4a8) Transmited pkt */
+	u32	mca;		/* RO # of xmited multicast pkt */
+	u32	bca;		/* RO # of xmited broadcast pkt */
+	u32	pf;		/* RO # of xmited pause frame count */
+	u32	cf;		/* RO # of xmited control frame count */
+	u32	fcs;		/* RO # of xmited FCS error count */
+	u32	ovr;		/* RO # of xmited oversize pkt */
+	u32	drf;		/* RO # of xmited deferral pkt */
+	u32	edf;		/* RO # of xmited Excessive deferral pkt*/
+	u32	scl;		/* RO # of xmited single collision pkt */
+	u32	mcl;		/* RO # of xmited multiple collision pkt*/
+	u32	lcl;		/* RO # of xmited late collision pkt */
+	u32	ecl;		/* RO # of xmited excessive collision pkt*/
+	u32	frg;		/* RO # of xmited fragments pkt*/
+	u32	ncl;		/* RO # of xmited total collision count */
+	u32	jbr;		/* RO # of xmited jabber count*/
+	u32	bytes;		/* RO # of xmited byte count */
+	u32	pok;		/* RO # of xmited good pkt */
+	u32	uc;		/* RO (0x0x4f0)# of xmited unitcast pkt */
+};
+
+struct bcm_sysport_mib {
+	struct bcm_sysport_rx_counters rx;
+	struct bcm_sysport_tx_counters tx;
+	u32 rx_runt_cnt;
+	u32 rx_runt_fcs;
+	u32 rx_runt_fcs_align;
+	u32 rx_runt_bytes;
+	u32 rxchk_bad_csum;
+	u32 rxchk_other_pkt_disc;
+	u32 rbuf_ovflow_cnt;
+	u32 rbuf_err_cnt;
+};
+
+/* HW maintains a large list of counters */
+enum bcm_sysport_stat_type {
+	BCM_SYSPORT_STAT_NETDEV = -1,
+	BCM_SYSPORT_STAT_MIB_RX,
+	BCM_SYSPORT_STAT_MIB_TX,
+	BCM_SYSPORT_STAT_RUNT,
+	BCM_SYSPORT_STAT_RXCHK,
+	BCM_SYSPORT_STAT_RBUF,
+};
+
+/* Macros to help define ethtool statistics */
+#define STAT_NETDEV(m) { \
+	.stat_string = __stringify(m), \
+	.stat_sizeof = sizeof(((struct net_device_stats *)0)->m), \
+	.stat_offset = offsetof(struct net_device_stats, m), \
+	.type = BCM_SYSPORT_STAT_NETDEV, \
+}
+
+#define STAT_MIB(str, m, _type) { \
+	.stat_string = str, \
+	.stat_sizeof = sizeof(((struct bcm_sysport_priv *)0)->m), \
+	.stat_offset = offsetof(struct bcm_sysport_priv, m), \
+	.type = _type, \
+}
+
+#define STAT_MIB_RX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_RX)
+#define STAT_MIB_TX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_TX)
+#define STAT_RUNT(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_RUNT)
+
+#define STAT_RXCHK(str, m, ofs) { \
+	.stat_string = str, \
+	.stat_sizeof = sizeof(((struct bcm_sysport_priv *)0)->m), \
+	.stat_offset = offsetof(struct bcm_sysport_priv, m), \
+	.type = BCM_SYSPORT_STAT_RXCHK, \
+	.reg_offset = ofs, \
+}
+
+#define STAT_RBUF(str, m, ofs) { \
+	.stat_string = str, \
+	.stat_sizeof = sizeof(((struct bcm_sysport_priv *)0)->m), \
+	.stat_offset = offsetof(struct bcm_sysport_priv, m), \
+	.type = BCM_SYSPORT_STAT_RBUF, \
+	.reg_offset = ofs, \
+}
+
+struct bcm_sysport_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int stat_sizeof;
+	int stat_offset;
+	enum bcm_sysport_stat_type type;
+	/* reg offset from UMAC base for misc counters */
+	u16 reg_offset;
+};
+
+/* Software house keeping helper structure */
+struct bcm_sysport_cb {
+	struct sk_buff	*skb;		/* SKB for RX packets */
+	void __iomem	*bd_addr;	/* Buffer descriptor PHYS addr */
+
+	DEFINE_DMA_UNMAP_ADDR(dma_addr);
+	DEFINE_DMA_UNMAP_LEN(dma_len);
+};
+
+/* Software view of the TX ring */
+struct bcm_sysport_tx_ring {
+	spinlock_t	lock;		/* Ring lock for tx reclaim/xmit */
+	struct napi_struct napi;	/* NAPI per tx queue */
+	dma_addr_t	desc_dma;	/* DMA cookie */
+	unsigned int	index;		/* Ring index */
+	unsigned int	size;		/* Ring current size */
+	unsigned int	alloc_size;	/* Ring one-time allocated size */
+	unsigned int	desc_count;	/* Number of descriptors */
+	unsigned int	curr_desc;	/* Current descriptor */
+	unsigned int	c_index;	/* Last consumer index */
+	unsigned int	p_index;	/* Current producer index */
+	struct bcm_sysport_cb *cbs;	/* Transmit control blocks */
+	struct dma_desc	*desc_cpu;	/* CPU view of the descriptor */
+	struct bcm_sysport_priv *priv;	/* private context backpointer */
+};
+
+/* Driver private structure */
+struct bcm_sysport_priv {
+	void __iomem		*base;
+	u32			irq0_stat;
+	u32			irq0_mask;
+	u32			irq1_stat;
+	u32			irq1_mask;
+	struct napi_struct	napi ____cacheline_aligned;
+	struct net_device	*netdev;
+	struct platform_device	*pdev;
+	int			irq0;
+	int			irq1;
+
+	/* Transmit rings */
+	struct bcm_sysport_tx_ring tx_rings[TDMA_NUM_RINGS];
+
+	/* Receive queue */
+	void __iomem		*rx_bds;
+	void __iomem		*rx_bd_assign_ptr;
+	unsigned int		rx_bd_assign_index;
+	struct bcm_sysport_cb	*rx_cbs;
+	unsigned int		num_rx_bds;
+	unsigned int		rx_read_ptr;
+	unsigned int		rx_c_index;
+
+	/* PHY device */
+	struct device_node	*phy_dn;
+	struct phy_device	*phydev;
+	phy_interface_t		phy_interface;
+	int			old_pause;
+	int			old_link;
+	int			old_duplex;
+
+	/* Misc fields */
+	unsigned int		rx_csum_en:1;
+	unsigned int		tsb_en:1;
+	unsigned int		crc_fwd:1;
+	u16			rev;
+
+	/* MIB related fields */
+	struct bcm_sysport_mib	mib;
+
+	/* Ethtool */
+	u32			msg_enable;
+};
+#endif /* __BCM_SYSPORT_H */

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 0297a79..05c6af6 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c

@@ -1436,7 +1436,7 @@
 		return -ENOMEM;
 	net_dev->netdev_ops = &bgmac_netdev_ops;
 	net_dev->irq = core->irq;
-	SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops);
+	net_dev->ethtool_ops = &bgmac_ethtool_ops;
 	bgmac = netdev_priv(net_dev);
 	bgmac->net_dev = net_dev;
 	bgmac->core = core;

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 0ab8370..67d2b00 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c

@@ -6916,8 +6916,8 @@
 		}
 	}
 	else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 	spin_unlock_bh(&bp->phy_lock);
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4d8f8ab..4cab09d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index dd57c7c..47c5814 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman
@@ -906,6 +906,18 @@
 		bd_prod = RX_BD(bd_prod);
 		bd_cons = RX_BD(bd_cons);
 
+		/* A rmb() is required to ensure that the CQE is not read
+		 * before it is written by the adapter DMA.  PCI ordering
+		 * rules will make sure the other fields are written before
+		 * the marker at the end of struct eth_fast_path_rx_cqe
+		 * but without rmb() a weakly ordered processor can process
+		 * stale data.  Without the barrier TPA state-machine might
+		 * enter inconsistent state and kernel stack might be
+		 * provided with incorrect packet description - these lead
+		 * to various kernel crashed.
+		 */
+		rmb();
+
 		cqe_fp_flags = cqe_fp->type_error_flags;
 		cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 3448cc0..571427c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 97ea542..51a952c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c

@@ -12,7 +12,7 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Dmitry Kravkov
  *
  */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
index 804b8f6..c6939ec 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h

@@ -12,7 +12,7 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Dmitry Kravkov
  *
  */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index b6de05e..bd0600c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman
@@ -3316,7 +3316,7 @@
 	return T_ETH_INDIRECTION_TABLE_SIZE;
 }
 
-static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
@@ -3340,14 +3340,15 @@
 	return 0;
 }
 
-static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir,
+			  const u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
 		/*
-		 * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy()
+		 * The same as in bnx2x_get_rxfh: we can't use a memcpy()
 		 * as an internal storage of an indirection table is a u8 array
 		 * while indir->ring_index points to an array of u32.
 		 *
@@ -3471,8 +3472,8 @@
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 	.get_module_info	= bnx2x_get_module_info,
@@ -3498,16 +3499,14 @@
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 };
 
 void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev)
 {
-	if (IS_PF(bp))
-		SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops);
-	else /* vf */
-		SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops);
+	netdev->ethtool_ops = (IS_PF(bp)) ?
+		&bnx2x_ethtool_ops : &bnx2x_vf_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_file_hdr.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_file_hdr.h
index f572ae1..8aafd9b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_file_hdr.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_file_hdr.h

@@ -6,8 +6,8 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Vladislav Zolotarov <vladz@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Vladislav Zolotarov
  * Based on the original idea of John Wright <john.wright@hp.com>.
  */
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
index c2dfea7..bd90e50 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h

@@ -7,9 +7,9 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
- * Modified by: Vladislav Zolotarov <vladz@broadcom.com>
+ * Modified by: Vladislav Zolotarov
  */
 
 #ifndef BNX2X_INIT_H

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
index 8ab0dd9..5669ed2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h

@@ -8,8 +8,8 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Vladislav Zolotarov <vladz@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Vladislav Zolotarov
  */
 
 #ifndef BNX2X_INIT_OPS_H

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 9b6b3d7..53fb4fa 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c

@@ -2218,7 +2218,6 @@
 	 */
 	u32 val;
 	struct bnx2x *bp = params->bp;
-	int bnx2x_status = 0;
 	u8 bmac_loopback = (params->loopback_mode == LOOPBACK_BMAC);
 
 	if (params->feature_config_flags & FEATURE_CONFIG_PFC_ENABLED)
@@ -2232,7 +2231,7 @@
 	bnx2x_update_pfc_nig(params, vars, pfc_params);
 
 	if (!vars->link_up)
-		return bnx2x_status;
+		return 0;
 
 	DP(NETIF_MSG_LINK, "About to update PFC in BMAC\n");
 
@@ -2246,7 +2245,7 @@
 		    == 0) {
 			DP(NETIF_MSG_LINK, "About to update PFC in EMAC\n");
 			bnx2x_emac_enable(params, vars, 0);
-			return bnx2x_status;
+			return 0;
 		}
 		if (CHIP_IS_E2(bp))
 			bnx2x_update_pfc_bmac2(params, vars, bmac_loopback);
@@ -2260,7 +2259,7 @@
 			val = 1;
 		REG_WR(bp, NIG_REG_BMAC0_PAUSE_OUT_EN + params->port*4, val);
 	}
-	return bnx2x_status;
+	return 0;
 }
 
 static int bnx2x_bmac1_enable(struct link_params *params,
@@ -3703,7 +3702,8 @@
 static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 					struct link_params *params,
 					struct link_vars *vars) {
-	u16 lane, i, cl72_ctrl, an_adv = 0;
+	u16 lane, i, cl72_ctrl, an_adv = 0, val;
+	u32 wc_lane_config;
 	struct bnx2x *bp = params->bp;
 	static struct bnx2x_reg_set reg_set[] = {
 		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
@@ -3822,15 +3822,27 @@
 		/* Enable Auto-Detect to support 1G over CL37 as well */
 		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 				 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, 0x10);
-
+		wc_lane_config = REG_RD(bp, params->shmem_base +
+					offsetof(struct shmem_region, dev_info.
+					shared_hw_config.wc_lane_config));
+		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
+				MDIO_WC_REG_RX0_PCI_CTRL + (lane << 4), &val);
 		/* Force cl48 sync_status LOW to avoid getting stuck in CL73
 		 * parallel-detect loop when CL73 and CL37 are enabled.
 		 */
-		CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
-				  MDIO_AER_BLOCK_AER_REG, 0);
+		val |= 1 << 11;
+
+		/* Restore Polarity settings in case it was run over by
+		 * previous link owner
+		 */
+		if (wc_lane_config &
+		    (SHARED_HW_CFG_RX_LANE0_POL_FLIP_ENABLED << lane))
+			val |= 3 << 2;
+		else
+			val &= ~(3 << 2);
 		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-				 MDIO_WC_REG_RXB_ANA_RX_CONTROL_PCI, 0x0800);
-		bnx2x_set_aer_mmd(params, phy);
+				 MDIO_WC_REG_RX0_PCI_CTRL + (lane << 4),
+				 val);
 
 		bnx2x_disable_kr2(params, vars, phy);
 	}
@@ -6473,7 +6485,6 @@
 static int bnx2x_link_initialize(struct link_params *params,
 				 struct link_vars *vars)
 {
-	int rc = 0;
 	u8 phy_index, non_ext_phy;
 	struct bnx2x *bp = params->bp;
 	/* In case of external phy existence, the line speed would be the
@@ -6546,7 +6557,7 @@
 			NIG_STATUS_XGXS0_LINK_STATUS |
 			NIG_STATUS_SERDES0_LINK_STATUS |
 			NIG_MASK_MI_INT));
-	return rc;
+	return 0;
 }
 
 static void bnx2x_int_link_reset(struct bnx2x_phy *phy,
@@ -12461,6 +12472,7 @@
 	u32 dont_clear_stat, lfa_sts;
 	struct bnx2x *bp = params->bp;
 
+	bnx2x_set_mdio_emac_per_phy(bp, params);
 	/* Sync the link parameters */
 	bnx2x_link_status_update(params, vars);
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 3a8e51e..2887034 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman
@@ -10053,6 +10053,24 @@
 #define BCM_5710_UNDI_FW_MF_VERS	(0x05)
 #define BNX2X_PREV_UNDI_MF_PORT(p) (BAR_TSTRORM_INTMEM + 0x150c + ((p) << 4))
 #define BNX2X_PREV_UNDI_MF_FUNC(f) (BAR_TSTRORM_INTMEM + 0x184c + ((f) << 4))
+
+static bool bnx2x_prev_is_after_undi(struct bnx2x *bp)
+{
+	/* UNDI marks its presence in DORQ -
+	 * it initializes CID offset for normal bell to 0x7
+	 */
+	if (!(REG_RD(bp, MISC_REG_RESET_REG_1) &
+	    MISC_REGISTERS_RESET_REG_1_RST_DORQ))
+		return false;
+
+	if (REG_RD(bp, DORQ_REG_NORM_CID_OFST) == 0x7) {
+		BNX2X_DEV_INFO("UNDI previously loaded\n");
+		return true;
+	}
+
+	return false;
+}
+
 static bool bnx2x_prev_unload_undi_fw_supports_mf(struct bnx2x *bp)
 {
 	u8 major, minor, version;
@@ -10302,6 +10320,10 @@
 
 	BNX2X_DEV_INFO("Path is unmarked\n");
 
+	/* Cannot proceed with FLR if UNDI is loaded, since FW does not match */
+	if (bnx2x_prev_is_after_undi(bp))
+		goto out;
+
 	/* If function has FLR capabilities, and existing FW version matches
 	 * the one required, then FLR will be sufficient to clean any residue
 	 * left by previous driver
@@ -10322,6 +10344,7 @@
 
 	BNX2X_DEV_INFO("Could not FLR\n");
 
+out:
 	/* Close the MCP request, return failure*/
 	rc = bnx2x_prev_mcp_done(bp);
 	if (!rc)
@@ -10360,19 +10383,13 @@
 		/* close LLH filters towards the BRB */
 		bnx2x_set_rx_filter(&bp->link_params, 0);
 
-		/* Check if the UNDI driver was previously loaded
-		 * UNDI driver initializes CID offset for normal bell to 0x7
-		 */
-		if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) {
-			tmp_reg = REG_RD(bp, DORQ_REG_NORM_CID_OFST);
-			if (tmp_reg == 0x7) {
-				BNX2X_DEV_INFO("UNDI previously loaded\n");
-				prev_undi = true;
-				/* clear the UNDI indication */
-				REG_WR(bp, DORQ_REG_NORM_CID_OFST, 0);
-				/* clear possible idle check errors */
-				REG_RD(bp, NIG_REG_NIG_INT_STS_CLR_0);
-			}
+		/* Check if the UNDI driver was previously loaded */
+		if (bnx2x_prev_is_after_undi(bp)) {
+			prev_undi = true;
+			/* clear the UNDI indication */
+			REG_WR(bp, DORQ_REG_NORM_CID_OFST, 0);
+			/* clear possible idle check errors */
+			REG_RD(bp, NIG_REG_NIG_INT_STS_CLR_0);
 		}
 		if (!CHIP_IS_E1x(bp))
 			/* block FW from writing to host */
@@ -13283,8 +13300,8 @@
 	netdev_reset_tc(bp->dev);
 
 	del_timer_sync(&bp->timer);
-	cancel_delayed_work(&bp->sp_task);
-	cancel_delayed_work(&bp->period_task);
+	cancel_delayed_work_sync(&bp->sp_task);
+	cancel_delayed_work_sync(&bp->period_task);
 
 	spin_lock_bh(&bp->stats_lock);
 	bp->stats_state = STATS_STATE_DISABLED;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index d725317..b193604 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c

@@ -12,7 +12,7 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Vladislav Zolotarov
  *
  */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 80f6c79..718ecd2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h

@@ -12,7 +12,7 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Vladislav Zolotarov
  *
  */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index faf0148..eda8583 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

@@ -12,9 +12,9 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Shmulik Ravid <shmulikr@broadcom.com>
- *	       Ariel Elior <ariele@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Shmulik Ravid
+ *	       Ariel Elior <ariel.elior@qlogic.com>
  *
  */
 #include "bnx2x.h"
@@ -1071,8 +1071,10 @@
 	REG_WR(bp, DORQ_REG_VF_TYPE_MIN_MCID_0, 0);
 	REG_WR(bp, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff);
 
-	/* set the VF doorbell threshold */
-	REG_WR(bp, DORQ_REG_VF_USAGE_CT_LIMIT, 4);
+	/* set the VF doorbell threshold. This threshold represents the amount
+	 * of doorbells allowed in the main DORQ fifo for a specific VF.
+	 */
+	REG_WR(bp, DORQ_REG_VF_USAGE_CT_LIMIT, 64);
 }
 
 void bnx2x_iov_init_dmae(struct bnx2x *bp)
@@ -2576,7 +2578,8 @@
 
 	ivi->vf = vfidx;
 	ivi->qos = 0;
-	ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->min_tx_rate = 0;
 	ivi->spoofchk = 1; /*always enabled */
 	if (vf->state == VF_ENABLED) {
 		/* mac and vlan are in vlan_mac objects */

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 6929adb..96c575e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h

@@ -12,9 +12,9 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Shmulik Ravid <shmulikr@broadcom.com>
- *	       Ariel Elior <ariele@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Shmulik Ravid
+ *	       Ariel Elior <ariel.elior@qlogic.com>
  */
 #ifndef BNX2X_SRIOV_H
 #define BNX2X_SRIOV_H
@@ -571,7 +571,7 @@
 	return NULL;
 }
 
-static inline void bnx2x_vf_pci_dealloc(struct bnx2 *bp) {return 0; }
+static inline void bnx2x_vf_pci_dealloc(struct bnx2x *bp) {}
 static inline int bnx2x_vf_pci_alloc(struct bnx2x *bp) {return 0; }
 static inline void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) {}
 static inline int bnx2x_sriov_configure(struct pci_dev *dev, int num_vfs) {return 0; }

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index 3b75070..ca47665 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
index f358450..2beceae 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h

@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 784c715..d712d0d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

@@ -12,9 +12,9 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Shmulik Ravid <shmulikr@broadcom.com>
- *	       Ariel Elior <ariele@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Shmulik Ravid
+ *	       Ariel Elior <ariel.elior@qlogic.com>
  */
 
 #include "bnx2x.h"

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index c922b81..e21e706 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h

@@ -12,8 +12,8 @@
  * license other than the GPL, without Broadcom's express prior written
  * consent.
  *
- * Maintained by: Eilon Greenstein <eilong@broadcom.com>
- * Written by: Ariel Elior <ariele@broadcom.com>
+ * Maintained by: Ariel Elior <ariel.elior@qlogic.com>
+ * Written by: Ariel Elior <ariel.elior@qlogic.com>
  */
 #ifndef VF_PF_IF_H
 #define VF_PF_IF_H

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 4dd48d2..8244e2b 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c

@@ -608,6 +608,10 @@
 		pr_err("%s: Bad type %d\n", __func__, ulp_type);
 		return -EINVAL;
 	}
+
+	if (ulp_type == CNIC_ULP_ISCSI)
+		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
+
 	mutex_lock(&cnic_lock);
 	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
 		RCU_INIT_POINTER(cp->ulp_ops[ulp_type], NULL);
@@ -620,9 +624,7 @@
 	}
 	mutex_unlock(&cnic_lock);
 
-	if (ulp_type == CNIC_ULP_ISCSI)
-		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
-	else if (ulp_type == CNIC_ULP_FCOE)
+	if (ulp_type == CNIC_ULP_FCOE)
 		dev->fcoe_cap = NULL;
 
 	synchronize_rcu();
@@ -1039,21 +1041,17 @@
 	struct cnic_local *cp = dev->cnic_priv;
 	struct cnic_uio_dev *udev;
 
-	read_lock(&cnic_dev_lock);
 	list_for_each_entry(udev, &cnic_udev_list, list) {
 		if (udev->pdev == dev->pcidev) {
 			udev->dev = dev;
 			if (__cnic_alloc_uio_rings(udev, pages)) {
 				udev->dev = NULL;
-				read_unlock(&cnic_dev_lock);
 				return -ENOMEM;
 			}
 			cp->udev = udev;
-			read_unlock(&cnic_dev_lock);
 			return 0;
 		}
 	}
-	read_unlock(&cnic_dev_lock);
 
 	udev = kzalloc(sizeof(struct cnic_uio_dev), GFP_ATOMIC);
 	if (!udev)
@@ -1067,9 +1065,7 @@
 	if (__cnic_alloc_uio_rings(udev, pages))
 		goto err_udev;
 
-	write_lock(&cnic_dev_lock);
 	list_add(&udev->list, &cnic_udev_list);
-	write_unlock(&cnic_dev_lock);
 
 	pci_dev_get(udev->pdev);
 
@@ -5624,20 +5620,27 @@
 {
 	int if_type;
 
-	rcu_read_lock();
 	for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
 		struct cnic_ulp_ops *ulp_ops;
 		void *ctx;
 
-		ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
-		if (!ulp_ops || !ulp_ops->indicate_netevent)
+		mutex_lock(&cnic_lock);
+		ulp_ops = rcu_dereference_protected(cp->ulp_ops[if_type],
+						lockdep_is_held(&cnic_lock));
+		if (!ulp_ops || !ulp_ops->indicate_netevent) {
+			mutex_unlock(&cnic_lock);
 			continue;
+		}
 
 		ctx = cp->ulp_handle[if_type];
 
+		set_bit(ULP_F_CALL_PENDING, &cp->ulp_flags[if_type]);
+		mutex_unlock(&cnic_lock);
+
 		ulp_ops->indicate_netevent(ctx, event, vlan_id);
+
+		clear_bit(ULP_F_CALL_PENDING, &cp->ulp_flags[if_type]);
 	}
-	rcu_read_unlock();
 }
 
 /* netdev event handler */

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0966bd0..5ba1cfb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

@@ -2481,7 +2481,7 @@
 	dev_set_drvdata(&pdev->dev, dev);
 	ether_addr_copy(dev->dev_addr, macaddr);
 	dev->watchdog_timeo = 2 * HZ;
-	SET_ETHTOOL_OPS(dev, &bcmgenet_ethtool_ops);
+	dev->ethtool_ops = &bcmgenet_ethtool_ops;
 	dev->netdev_ops = &bcmgenet_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64);
 

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 4608673..add8d85 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c

@@ -298,6 +298,7 @@
 static int bcmgenet_mii_probe(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device_node *dn = priv->pdev->dev.of_node;
 	struct phy_device *phydev;
 	unsigned int phy_flags;
 	int ret;
@@ -307,15 +308,19 @@
 		return 0;
 	}
 
-	if (priv->phy_dn)
-		phydev = of_phy_connect(dev, priv->phy_dn,
-					bcmgenet_mii_setup, 0,
-					priv->phy_interface);
-	else
-		phydev = of_phy_connect_fixed_link(dev,
-					bcmgenet_mii_setup,
-					priv->phy_interface);
+	/* In the case of a fixed PHY, the DT node associated
+	 * to the PHY is the Ethernet MAC DT node.
+	 */
+	if (of_phy_is_fixed_link(dn)) {
+		ret = of_phy_register_fixed_link(dn);
+		if (ret)
+			return ret;
 
+		priv->phy_dn = dn;
+	}
+
+	phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, 0,
+				priv->phy_interface);
 	if (!phydev) {
 		pr_err("could not attach to PHY\n");
 		return -ENODEV;

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index e5d95c5..df2792d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c

@@ -4,7 +4,7 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2005-2013 Broadcom Corporation.
+ * Copyright (C) 2005-2014 Broadcom Corporation.
  *
  * Firmware is:
  *	Derived from proprietary unpublished source code,
@@ -94,10 +94,10 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			136
+#define TG3_MIN_NUM			137
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"Jan 03, 2014"
+#define DRV_MODULE_RELDATE	"May 11, 2014"
 
 #define RESET_KIND_SHUTDOWN	0
 #define RESET_KIND_INIT		1
@@ -3224,7 +3224,7 @@
 	return 0;
 }
 
-#define NVRAM_CMD_TIMEOUT 10000
+#define NVRAM_CMD_TIMEOUT 100
 
 static int tg3_nvram_exec_cmd(struct tg3 *tp, u32 nvram_cmd)
 {
@@ -7871,9 +7871,7 @@
 	return NETDEV_TX_OK;
 }
 
-/* hard_start_xmit for devices that have the 4G bug and/or 40-bit bug and
- * support TG3_FLAG_HW_TSO_1 or firmware TSO only.
- */
+/* hard_start_xmit for all devices */
 static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
@@ -7884,6 +7882,10 @@
 	struct tg3_napi *tnapi;
 	struct netdev_queue *txq;
 	unsigned int last;
+	struct iphdr *iph = NULL;
+	struct tcphdr *tcph = NULL;
+	__sum16 tcp_csum = 0, ip_csum = 0;
+	__be16 ip_tot_len = 0;
 
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 	tnapi = &tp->napi[skb_get_queue_mapping(skb)];
@@ -7915,7 +7917,6 @@
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss) {
-		struct iphdr *iph;
 		u32 tcp_opt_len, hdr_len;
 
 		if (skb_cow_head(skb, 0))
@@ -7927,27 +7928,31 @@
 		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN;
 
 		if (!skb_is_gso_v6(skb)) {
+			if (unlikely((ETH_HLEN + hdr_len) > 80) &&
+			    tg3_flag(tp, TSO_BUG))
+				return tg3_tso_bug(tp, skb);
+
+			ip_csum = iph->check;
+			ip_tot_len = iph->tot_len;
 			iph->check = 0;
 			iph->tot_len = htons(mss + hdr_len);
 		}
 
-		if (unlikely((ETH_HLEN + hdr_len) > 80) &&
-		    tg3_flag(tp, TSO_BUG))
-			return tg3_tso_bug(tp, skb);
-
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
+		tcph = tcp_hdr(skb);
+		tcp_csum = tcph->check;
+
 		if (tg3_flag(tp, HW_TSO_1) ||
 		    tg3_flag(tp, HW_TSO_2) ||
 		    tg3_flag(tp, HW_TSO_3)) {
-			tcp_hdr(skb)->check = 0;
+			tcph->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
-		} else
-			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
-								 iph->daddr, 0,
-								 IPPROTO_TCP,
-								 0);
+		} else {
+			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 0, IPPROTO_TCP, 0);
+		}
 
 		if (tg3_flag(tp, HW_TSO_3)) {
 			mss |= (hdr_len & 0xc) << 12;
@@ -8047,6 +8052,18 @@
 	if (would_hit_hwbug) {
 		tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 
+		if (mss) {
+			/* If it's a TSO packet, do GSO instead of
+			 * allocating and copying to a large linear SKB
+			 */
+			if (ip_tot_len) {
+				iph->check = ip_csum;
+				iph->tot_len = ip_tot_len;
+			}
+			tcph->check = tcp_csum;
+			return tg3_tso_bug(tp, skb);
+		}
+
 		/* If the workaround fails due to memory/mapping
 		 * failure, silently drop this packet.
 		 */
@@ -11876,9 +11893,9 @@
 static int tg3_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
 {
 	struct tg3 *tp = netdev_priv(dev);
-	int ret;
+	int ret, cpmu_restore = 0;
 	u8  *pd;
-	u32 i, offset, len, b_offset, b_count;
+	u32 i, offset, len, b_offset, b_count, cpmu_val = 0;
 	__be32 val;
 
 	if (tg3_flag(tp, NO_NVRAM))
@@ -11890,6 +11907,19 @@
 
 	eeprom->magic = TG3_EEPROM_MAGIC;
 
+	/* Override clock, link aware and link idle modes */
+	if (tg3_flag(tp, CPMU_PRESENT)) {
+		cpmu_val = tr32(TG3_CPMU_CTRL);
+		if (cpmu_val & (CPMU_CTRL_LINK_AWARE_MODE |
+				CPMU_CTRL_LINK_IDLE_MODE)) {
+			tw32(TG3_CPMU_CTRL, cpmu_val &
+					    ~(CPMU_CTRL_LINK_AWARE_MODE |
+					     CPMU_CTRL_LINK_IDLE_MODE));
+			cpmu_restore = 1;
+		}
+	}
+	tg3_override_clk(tp);
+
 	if (offset & 3) {
 		/* adjustments to start on required 4 byte boundary */
 		b_offset = offset & 3;
@@ -11900,7 +11930,7 @@
 		}
 		ret = tg3_nvram_read_be32(tp, offset-b_offset, &val);
 		if (ret)
-			return ret;
+			goto eeprom_done;
 		memcpy(data, ((char *)&val) + b_offset, b_count);
 		len -= b_count;
 		offset += b_count;
@@ -11912,10 +11942,20 @@
 	for (i = 0; i < (len - (len & 3)); i += 4) {
 		ret = tg3_nvram_read_be32(tp, offset + i, &val);
 		if (ret) {
+			if (i)
+				i -= 4;
 			eeprom->len += i;
-			return ret;
+			goto eeprom_done;
 		}
 		memcpy(pd + i, &val, 4);
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				eeprom->len += i;
+				ret = -EINTR;
+				goto eeprom_done;
+			}
+			cond_resched();
+		}
 	}
 	eeprom->len += i;
 
@@ -11926,11 +11966,19 @@
 		b_offset = offset + len - b_count;
 		ret = tg3_nvram_read_be32(tp, b_offset, &val);
 		if (ret)
-			return ret;
+			goto eeprom_done;
 		memcpy(pd, &val, b_count);
 		eeprom->len += b_count;
 	}
-	return 0;
+	ret = 0;
+
+eeprom_done:
+	/* Restore clock, link aware and link idle modes */
+	tg3_restore_clk(tp);
+	if (cpmu_restore)
+		tw32(TG3_CPMU_CTRL, cpmu_val);
+
+	return ret;
 }
 
 static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
@@ -12484,7 +12532,7 @@
 	return size;
 }
 
-static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int i;
@@ -12495,7 +12543,7 @@
 	return 0;
 }
 
-static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	size_t i;
@@ -14027,8 +14075,8 @@
 	.get_sset_count		= tg3_get_sset_count,
 	.get_rxnfc		= tg3_get_rxnfc,
 	.get_rxfh_indir_size    = tg3_get_rxfh_indir_size,
-	.get_rxfh_indir		= tg3_get_rxfh_indir,
-	.set_rxfh_indir		= tg3_set_rxfh_indir,
+	.get_rxfh		= tg3_get_rxfh,
+	.set_rxfh		= tg3_set_rxfh,
 	.get_channels		= tg3_get_channels,
 	.set_channels		= tg3_set_channels,
 	.get_ts_info		= tg3_get_ts_info,

diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 04321e5..461acca 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h

@@ -4,7 +4,7 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2007-2013 Broadcom Corporation.
+ * Copyright (C) 2007-2014 Broadcom Corporation.
  */
 
 #ifndef _T3_H

diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index f9e1508..882cad7 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c

@@ -266,8 +266,8 @@
 		ethtool_cmd_speed_set(cmd, SPEED_10000);
 		cmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 	cmd->transceiver = XCVR_EXTERNAL;
 	cmd->maxtxpkt = 0;
@@ -1137,5 +1137,5 @@
 void
 bnad_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops);
+	netdev->ethtool_ops = &bnad_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 521dfea..25d6b2a 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c

@@ -1737,7 +1737,7 @@
 	platform_set_drvdata(pdev, ndev);
 	ether_setup(ndev);
 	ndev->netdev_ops = &xgmac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops);
+	ndev->ethtool_ops = &xgmac_ethtool_ops;
 	spin_lock_init(&priv->stats_lock);
 	INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 05613a8..186566b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c

@@ -580,8 +580,8 @@
 		ethtool_cmd_speed_set(cmd, p->link_config.speed);
 		cmd->duplex = p->link_config.duplex;
 	} else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
@@ -1100,7 +1100,7 @@
 
 		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
 
-		SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops);
+		netdev->ethtool_ops = &t1_ethtool_ops;
 	}
 
 	if (t1_init_sw_modules(adapter, bi) < 0) {

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 07bbb71..5d9cce0 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c

@@ -1809,8 +1809,8 @@
 		ethtool_cmd_speed_set(cmd, p->link_config.speed);
 		cmd->duplex = p->link_config.duplex;
 	} else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
@@ -3291,7 +3291,7 @@
 			netdev->features |= NETIF_F_HIGHDMA;
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index c0a9dd5..b0cbb2b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c

@@ -185,7 +185,7 @@
 		if (ether_addr_equal(dev->dev_addr, mac)) {
 			rcu_read_lock();
 			if (vlan && vlan != VLAN_VID_MASK) {
-				dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan);
+				dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), vlan);
 			} else if (netif_is_bond_slave(dev)) {
 				struct net_device *upper_dev;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 32db377..f503dce 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

@@ -357,11 +357,17 @@
 	MAX_OFLD_QSETS = 16,          /* # of offload Tx/Rx queue sets */
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
 	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
+	MAX_RDMA_CIQS = NCHAN,        /* # of  RDMA concentrator IQs */
+	MAX_ISCSI_QUEUES = NCHAN,     /* # of streaming iSCSI Rx queues */
 };
 
 enum {
-	MAX_EGRQ = 128,         /* max # of egress queues, including FLs */
-	MAX_INGQ = 64           /* max # of interrupt-capable ingress queues */
+	INGQ_EXTRAS = 2,        /* firmware event queue and */
+				/*   forwarded interrupts */
+	MAX_EGRQ = MAX_ETH_QSETS*2 + MAX_OFLD_QSETS*2
+		   + MAX_CTRL_QUEUES + MAX_RDMA_QUEUES + MAX_ISCSI_QUEUES,
+	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES
+		   + MAX_RDMA_CIQS + MAX_ISCSI_QUEUES + INGQ_EXTRAS,
 };
 
 struct adapter;
@@ -538,6 +544,7 @@
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
 	struct sge_ofld_rxq ofldrxq[MAX_OFLD_QSETS];
 	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
+	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
 
 	struct sge_rspq intrq ____cacheline_aligned_in_smp;
@@ -548,8 +555,10 @@
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 ofldqsets;              /* # of active offload queue sets */
 	u16 rdmaqs;                 /* # of available RDMA Rx queues */
+	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
 	u16 ofld_rxq[MAX_OFLD_QSETS];
 	u16 rdma_rxq[NCHAN];
+	u16 rdma_ciq[NCHAN];
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u32 fl_pg_order;            /* large page allocation size */
@@ -577,6 +586,7 @@
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
 #define for_each_ofldrxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
 #define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
+#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
 
 struct l2t_data;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 24e16e3..2f8d6b9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -818,12 +818,17 @@
 	for_each_rdmarxq(&adap->sge, i)
 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
 			 adap->port[0]->name, i);
+
+	for_each_rdmaciq(&adap->sge, i)
+		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
+			 adap->port[0]->name, i);
 }
 
 static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi_index = 2;
+	int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
+	int msi_index = 2;
 
 	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
 			  adap->msix_info[1].desc, &s->fw_evtq);
@@ -857,9 +862,21 @@
 			goto unwind;
 		msi_index++;
 	}
+	for_each_rdmaciq(s, rdmaciqqidx) {
+		err = request_irq(adap->msix_info[msi_index].vec,
+				  t4_sge_intr_msix, 0,
+				  adap->msix_info[msi_index].desc,
+				  &s->rdmaciq[rdmaciqqidx].rspq);
+		if (err)
+			goto unwind;
+		msi_index++;
+	}
 	return 0;
 
 unwind:
+	while (--rdmaciqqidx >= 0)
+		free_irq(adap->msix_info[--msi_index].vec,
+			 &s->rdmaciq[rdmaciqqidx].rspq);
 	while (--rdmaqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->rdmarxq[rdmaqidx].rspq);
@@ -885,6 +902,8 @@
 		free_irq(adap->msix_info[msi_index++].vec, &s->ofldrxq[i].rspq);
 	for_each_rdmarxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
+	for_each_rdmaciq(s, i)
+		free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
 }
 
 /**
@@ -1047,7 +1066,8 @@
 		if (msi_idx > 0)
 			msi_idx++;
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
-				       &q->fl, uldrx_handler);
+				       q->fl.size ? &q->fl : NULL,
+				       uldrx_handler);
 		if (err)
 			goto freeout;
 		memset(&q->stats, 0, sizeof(q->stats));
@@ -1064,13 +1084,28 @@
 		if (msi_idx > 0)
 			msi_idx++;
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
-				       msi_idx, &q->fl, uldrx_handler);
+				       msi_idx, q->fl.size ? &q->fl : NULL,
+				       uldrx_handler);
 		if (err)
 			goto freeout;
 		memset(&q->stats, 0, sizeof(q->stats));
 		s->rdma_rxq[i] = q->rspq.abs_id;
 	}
 
+	for_each_rdmaciq(s, i) {
+		struct sge_ofld_rxq *q = &s->rdmaciq[i];
+
+		if (msi_idx > 0)
+			msi_idx++;
+		err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
+				       msi_idx, q->fl.size ? &q->fl : NULL,
+				       uldrx_handler);
+		if (err)
+			goto freeout;
+		memset(&q->stats, 0, sizeof(q->stats));
+		s->rdma_ciq[i] = q->rspq.abs_id;
+	}
+
 	for_each_port(adap, i) {
 		/*
 		 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
@@ -2252,12 +2287,19 @@
 	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
 		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
 		cmd->port = PORT_FIBRE;
-	else if (p->port_type == FW_PORT_TYPE_SFP) {
-		if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
-		    p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
+	else if (p->port_type == FW_PORT_TYPE_SFP ||
+		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
+		 p->port_type == FW_PORT_TYPE_QSFP) {
+		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
+		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
+		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
+		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
+			cmd->port = PORT_FIBRE;
+		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
+			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
 			cmd->port = PORT_DA;
 		else
-			cmd->port = PORT_FIBRE;
+			cmd->port = PORT_OTHER;
 	} else
 		cmd->port = PORT_OTHER;
 
@@ -2461,8 +2503,7 @@
 }
 
 /**
- *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
- *	@adap: the adapter
+ *	set_rspq_intr_params - set a queue's interrupt holdoff parameters
  *	@q: the Rx queue
  *	@us: the hold-off time in us, or 0 to disable timer
  *	@cnt: the hold-off packet count, or 0 to disable counter
@@ -2470,9 +2511,11 @@
  *	Sets an Rx queue's interrupt hold-off time and packet count.  At least
  *	one of the two needs to be enabled for the queue to generate interrupts.
  */
-static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
-			       unsigned int us, unsigned int cnt)
+static int set_rspq_intr_params(struct sge_rspq *q,
+				unsigned int us, unsigned int cnt)
 {
+	struct adapter *adap = q->adap;
+
 	if ((us | cnt) == 0)
 		cnt = 1;
 
@@ -2499,24 +2542,34 @@
 	return 0;
 }
 
+/**
+ * set_rx_intr_params - set a net devices's RX interrupt holdoff paramete!
+ * @dev: the network device
+ * @us: the hold-off time in us, or 0 to disable timer
+ * @cnt: the hold-off packet count, or 0 to disable counter
+ *
+ * Set the RX interrupt hold-off parameters for a network device.
+ */
+static int set_rx_intr_params(struct net_device *dev,
+			      unsigned int us, unsigned int cnt)
+{
+	int i, err;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
+
+	for (i = 0; i < pi->nqsets; i++, q++) {
+		err = set_rspq_intr_params(&q->rspq, us, cnt);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 {
-	const struct port_info *pi = netdev_priv(dev);
-	struct adapter *adap = pi->adapter;
-	struct sge_rspq *q;
-	int i;
-	int r = 0;
-
-	for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++) {
-		q = &adap->sge.ethrxq[i].rspq;
-		r = set_rxq_intr_params(adap, q, c->rx_coalesce_usecs,
-			c->rx_max_coalesced_frames);
-		if (r) {
-			dev_err(&dev->dev, "failed to set coalesce %d\n", r);
-			break;
-		}
-	}
-	return r;
+	return set_rx_intr_params(dev, c->rx_coalesce_usecs,
+				  c->rx_max_coalesced_frames);
 }
 
 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
@@ -2732,7 +2785,7 @@
 	return pi->rss_size;
 }
 
-static int get_rss_table(struct net_device *dev, u32 *p)
+static int get_rss_table(struct net_device *dev, u32 *p, u8 *key)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	unsigned int n = pi->rss_size;
@@ -2742,7 +2795,7 @@
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev, const u32 *p)
+static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
@@ -2844,8 +2897,8 @@
 	.set_wol           = set_wol,
 	.get_rxnfc         = get_rxnfc,
 	.get_rxfh_indir_size = get_rss_table_size,
-	.get_rxfh_indir    = get_rss_table,
-	.set_rxfh_indir    = set_rss_table,
+	.get_rxfh	   = get_rss_table,
+	.set_rxfh	   = set_rss_table,
 	.flash_device      = set_flash,
 };
 
@@ -3386,6 +3439,77 @@
 EXPORT_SYMBOL(cxgb4_best_mtu);
 
 /**
+ *     cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned
+ *     @mtus: the HW MTU table
+ *     @header_size: Header Size
+ *     @data_size_max: maximum Data Segment Size
+ *     @data_size_align: desired Data Segment Size Alignment (2^N)
+ *     @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL)
+ *
+ *     Similar to cxgb4_best_mtu() but instead of searching the Hardware
+ *     MTU Table based solely on a Maximum MTU parameter, we break that
+ *     parameter up into a Header Size and Maximum Data Segment Size, and
+ *     provide a desired Data Segment Size Alignment.  If we find an MTU in
+ *     the Hardware MTU Table which will result in a Data Segment Size with
+ *     the requested alignment _and_ that MTU isn't "too far" from the
+ *     closest MTU, then we'll return that rather than the closest MTU.
+ */
+unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
+				    unsigned short header_size,
+				    unsigned short data_size_max,
+				    unsigned short data_size_align,
+				    unsigned int *mtu_idxp)
+{
+	unsigned short max_mtu = header_size + data_size_max;
+	unsigned short data_size_align_mask = data_size_align - 1;
+	int mtu_idx, aligned_mtu_idx;
+
+	/* Scan the MTU Table till we find an MTU which is larger than our
+	 * Maximum MTU or we reach the end of the table.  Along the way,
+	 * record the last MTU found, if any, which will result in a Data
+	 * Segment Length matching the requested alignment.
+	 */
+	for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) {
+		unsigned short data_size = mtus[mtu_idx] - header_size;
+
+		/* If this MTU minus the Header Size would result in a
+		 * Data Segment Size of the desired alignment, remember it.
+		 */
+		if ((data_size & data_size_align_mask) == 0)
+			aligned_mtu_idx = mtu_idx;
+
+		/* If we're not at the end of the Hardware MTU Table and the
+		 * next element is larger than our Maximum MTU, drop out of
+		 * the loop.
+		 */
+		if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu)
+			break;
+	}
+
+	/* If we fell out of the loop because we ran to the end of the table,
+	 * then we just have to use the last [largest] entry.
+	 */
+	if (mtu_idx == NMTUS)
+		mtu_idx--;
+
+	/* If we found an MTU which resulted in the requested Data Segment
+	 * Length alignment and that's "not far" from the largest MTU which is
+	 * less than or equal to the maximum MTU, then use that.
+	 */
+	if (aligned_mtu_idx >= 0 &&
+	    mtu_idx - aligned_mtu_idx <= 1)
+		mtu_idx = aligned_mtu_idx;
+
+	/* If the caller has passed in an MTU Index pointer, pass the
+	 * MTU Index back.  Return the MTU value.
+	 */
+	if (mtu_idxp)
+		*mtu_idxp = mtu_idx;
+	return mtus[mtu_idx];
+}
+EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
+
+/**
  *	cxgb4_port_chan - get the HW channel of a port
  *	@dev: the net device for the port
  *
@@ -3782,7 +3906,9 @@
 	lli.mtus = adap->params.mtus;
 	if (uld == CXGB4_ULD_RDMA) {
 		lli.rxq_ids = adap->sge.rdma_rxq;
+		lli.ciq_ids = adap->sge.rdma_ciq;
 		lli.nrxq = adap->sge.rdmaqs;
+		lli.nciq = adap->sge.rdmaciqs;
 	} else if (uld == CXGB4_ULD_ISCSI) {
 		lli.rxq_ids = adap->sge.ofld_rxq;
 		lli.nrxq = adap->sge.ofldqsets;
@@ -4061,7 +4187,7 @@
 
 	/* Parse all bond and vlan devices layered on top of the physical dev */
 	for (i = 0; i < VLAN_N_VID; i++) {
-		root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i);
+		root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
 		if (!root_dev)
 			continue;
 
@@ -5528,13 +5654,41 @@
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
 
-	/*
-	 * These are finalized by FW initialization, load their values now.
+	/* The MTU/MSS Table is initialized by now, so load their values.  If
+	 * we're initializing the adapter, then we'll make any modifications
+	 * we want to the MTU/MSS Table and also initialize the congestion
+	 * parameters.
 	 */
 	t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
-	t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
-		     adap->params.b_wnd);
+	if (state != DEV_STATE_INIT) {
+		int i;
 
+		/* The default MTU Table contains values 1492 and 1500.
+		 * However, for TCP, it's better to have two values which are
+		 * a multiple of 8 +/- 4 bytes apart near this popular MTU.
+		 * This allows us to have a TCP Data Payload which is a
+		 * multiple of 8 regardless of what combination of TCP Options
+		 * are in use (always a multiple of 4 bytes) which is
+		 * important for performance reasons.  For instance, if no
+		 * options are in use, then we have a 20-byte IP header and a
+		 * 20-byte TCP header.  In this case, a 1500-byte MSS would
+		 * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
+		 * which is not a multiple of 8.  So using an MSS of 1488 in
+		 * this case results in a TCP Data Payload of 1448 bytes which
+		 * is a multiple of 8.  On the other hand, if 12-byte TCP Time
+		 * Stamps have been negotiated, then an MTU of 1500 bytes
+		 * results in a TCP Data Payload of 1448 bytes which, as
+		 * above, is a multiple of 8 bytes ...
+		 */
+		for (i = 0; i < NMTUS; i++)
+			if (adap->params.mtus[i] == 1492) {
+				adap->params.mtus[i] = 1488;
+				break;
+			}
+
+		t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
+			     adap->params.b_wnd);
+	}
 	t4_init_tp_params(adap);
 	adap->flags |= FW_OK;
 	return 0;
@@ -5669,12 +5823,12 @@
 	       (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
 }
 
-static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
+static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+			     unsigned int us, unsigned int cnt,
 			     unsigned int size, unsigned int iqe_size)
 {
-	q->intr_params = QINTR_TIMER_IDX(timer_idx) |
-			 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
-	q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
+	q->adap = adap;
+	set_rspq_intr_params(q, us, cnt);
 	q->iqe_len = iqe_size;
 	q->size = size;
 }
@@ -5688,6 +5842,7 @@
 {
 	struct sge *s = &adap->sge;
 	int i, q10g = 0, n10g = 0, qidx = 0;
+	int ciq_size;
 
 	for_each_port(adap, i)
 		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
@@ -5726,12 +5881,13 @@
 			s->ofldqsets = adap->params.nports;
 		/* For RDMA one Rx queue per channel suffices */
 		s->rdmaqs = adap->params.nports;
+		s->rdmaciqs = adap->params.nports;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
 		struct sge_eth_rxq *r = &s->ethrxq[i];
 
-		init_rspq(&r->rspq, 0, 0, 1024, 64);
+		init_rspq(adap, &r->rspq, 5, 10, 1024, 64);
 		r->fl.size = 72;
 	}
 
@@ -5747,7 +5903,7 @@
 	for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
 		struct sge_ofld_rxq *r = &s->ofldrxq[i];
 
-		init_rspq(&r->rspq, 0, 0, 1024, 64);
+		init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
 		r->rspq.uld = CXGB4_ULD_ISCSI;
 		r->fl.size = 72;
 	}
@@ -5755,13 +5911,26 @@
 	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
 		struct sge_ofld_rxq *r = &s->rdmarxq[i];
 
-		init_rspq(&r->rspq, 0, 0, 511, 64);
+		init_rspq(adap, &r->rspq, 5, 1, 511, 64);
 		r->rspq.uld = CXGB4_ULD_RDMA;
 		r->fl.size = 72;
 	}
 
-	init_rspq(&s->fw_evtq, 6, 0, 512, 64);
-	init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
+	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
+	if (ciq_size > SGE_MAX_IQ_SIZE) {
+		CH_WARN(adap, "CIQ size too small for available IQs\n");
+		ciq_size = SGE_MAX_IQ_SIZE;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
+		struct sge_ofld_rxq *r = &s->rdmaciq[i];
+
+		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
+		r->rspq.uld = CXGB4_ULD_RDMA;
+	}
+
+	init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
+	init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
 }
 
 /*
@@ -5808,9 +5977,9 @@
 
 	want = s->max_ethqsets + EXTRA_VECS;
 	if (is_offload(adap)) {
-		want += s->rdmaqs + s->ofldqsets;
+		want += s->rdmaqs + s->rdmaciqs + s->ofldqsets;
 		/* need nchan for each possible ULD */
-		ofld_need = 2 * nchan;
+		ofld_need = 3 * nchan;
 	}
 	need = adap->params.nports + EXTRA_VECS + ofld_need;
 
@@ -6076,7 +6245,7 @@
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index e274a04..55e9daf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h

@@ -232,8 +232,10 @@
 	const struct cxgb4_virt_res *vr;     /* assorted HW resources */
 	const unsigned short *mtus;          /* MTU table */
 	const unsigned short *rxq_ids;       /* the ULD's Rx queue ids */
+	const unsigned short *ciq_ids;       /* the ULD's concentrator IQ ids */
 	unsigned short nrxq;                 /* # of Rx queues */
 	unsigned short ntxq;                 /* # of Tx queues */
+	unsigned short nciq;		     /* # of concentrator IQ */
 	unsigned char nchan:4;               /* # of channels */
 	unsigned char nports:4;              /* # of ports */
 	unsigned char wr_cred;               /* WR 16-byte credits */
@@ -274,6 +276,11 @@
 unsigned int cxgb4_port_idx(const struct net_device *dev);
 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
 			    unsigned int *idx);
+unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
+				    unsigned short header_size,
+				    unsigned short data_size_max,
+				    unsigned short data_size_align,
+				    unsigned int *mtu_idxp);
 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
 			 struct tp_tcp_stats *v6);
 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index e249528..dd4355d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c

@@ -1697,7 +1697,8 @@
 		return handle_trace_pkt(q->adap, si);
 
 	pkt = (const struct cpl_rx_pkt *)rsp;
-	csum_ok = pkt->csum_calc && !pkt->err_vec;
+	csum_ok = pkt->csum_calc && !pkt->err_vec &&
+		  (q->netdev->features & NETIF_F_RXCSUM);
 	if ((pkt->l2info & htonl(RXF_TCP)) &&
 	    (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
 		do_gro(rxq, si, pkt);
@@ -1720,8 +1721,7 @@
 
 	rxq->stats.pkts++;
 
-	if (csum_ok && (q->netdev->features & NETIF_F_RXCSUM) &&
-	    (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) {
+	if (csum_ok && (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) {
 		if (!pkt->ip_frag) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			rxq->stats.rx_cso++;
@@ -2215,7 +2215,6 @@
 	iq->cntxt_id = ntohs(c.iqid);
 	iq->abs_id = ntohs(c.physiqid);
 	iq->size--;                           /* subtract status entry */
-	iq->adap = adap;
 	iq->netdev = dev;
 	iq->handler = hnd;
 
@@ -2515,6 +2514,10 @@
 		if (oq->rspq.desc)
 			free_rspq_fl(adap, &oq->rspq, &oq->fl);
 	}
+	for (i = 0, oq = adap->sge.rdmaciq; i < adap->sge.rdmaciqs; i++, oq++) {
+		if (oq->rspq.desc)
+			free_rspq_fl(adap, &oq->rspq, &oq->fl);
+	}
 
 	/* clean up offload Tx queues */
 	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 1d1623b..71b799b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h

@@ -68,6 +68,7 @@
 	SGE_MAX_WR_LEN = 512,     /* max WR size in bytes */
 	SGE_NTIMERS = 6,          /* # of interrupt holdoff timer values */
 	SGE_NCOUNTERS = 4,        /* # of interrupt packet counter values */
+	SGE_MAX_IQ_SIZE = 65520,
 
 	SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */
 	SGE_TIMER_UPD_CIDX = 7,   /* update cidx only */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index f2738c7..973eb11 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h

@@ -227,6 +227,7 @@
 #define DELACK(x)     ((x) << 5)
 #define ULP_MODE(x)   ((x) << 8)
 #define RCV_BUFSIZ(x) ((x) << 12)
+#define RCV_BUFSIZ_MASK 0x3FFU
 #define DSCP(x)       ((x) << 22)
 #define SMAC_SEL(x)   ((u64)(x) << 28)
 #define L2T_IDX(x)    ((u64)(x) << 36)
@@ -278,6 +279,15 @@
 	__be64 opt0;
 };
 
+struct cpl_t5_pass_accept_rpl {
+	WR_HDR;
+	union opcode_tid ot;
+	__be32 opt2;
+	__be64 opt0;
+	__be32 iss;
+	__be32 rsvd;
+};
+
 struct cpl_act_open_req {
 	WR_HDR;
 	union opcode_tid ot;

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 5285928..ff1cdd1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

@@ -2664,7 +2664,7 @@
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
+		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
 
 		/*
 		 * Initialize the hardware/software state for the port.

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 9d88c1d..bdfa80c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c

@@ -1510,7 +1510,8 @@
 {
 	struct sk_buff *skb;
 	const struct cpl_rx_pkt *pkt = (void *)rsp;
-	bool csum_ok = pkt->csum_calc && !pkt->err_vec;
+	bool csum_ok = pkt->csum_calc && !pkt->err_vec &&
+		       (rspq->netdev->features & NETIF_F_RXCSUM);
 	struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
 
 	/*
@@ -1538,8 +1539,8 @@
 	skb_record_rx_queue(skb, rspq->idx);
 	rxq->stats.pkts++;
 
-	if (csum_ok && (rspq->netdev->features & NETIF_F_RXCSUM) &&
-	    !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP|RXF_TCP))) {
+	if (csum_ok && !pkt->err_vec &&
+	    (be32_to_cpu(pkt->l2info) & (RXF_UDP|RXF_TCP))) {
 		if (!pkt->ip_frag)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else {

diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index e35c8e0..14f465f 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h

@@ -43,6 +43,8 @@
 #define ENIC_CQ_MAX		(ENIC_WQ_MAX + ENIC_RQ_MAX)
 #define ENIC_INTR_MAX		(ENIC_CQ_MAX + 2)
 
+#define ENIC_AIC_LARGE_PKT_DIFF	3
+
 struct enic_msix_entry {
 	int requested;
 	char devname[IFNAMSIZ];
@@ -50,6 +52,33 @@
 	void *devid;
 };
 
+/* Store only the lower range.  Higher range is given by fw. */
+struct enic_intr_mod_range {
+	u32 small_pkt_range_start;
+	u32 large_pkt_range_start;
+};
+
+struct enic_intr_mod_table {
+	u32 rx_rate;
+	u32 range_percent;
+};
+
+#define ENIC_MAX_LINK_SPEEDS		3
+#define ENIC_LINK_SPEED_10G		10000
+#define ENIC_LINK_SPEED_4G		4000
+#define ENIC_LINK_40G_INDEX		2
+#define ENIC_LINK_10G_INDEX		1
+#define ENIC_LINK_4G_INDEX		0
+#define ENIC_RX_COALESCE_RANGE_END	125
+#define ENIC_AIC_TS_BREAK		100
+
+struct enic_rx_coal {
+	u32 small_pkt_range_start;
+	u32 large_pkt_range_start;
+	u32 range_end;
+	u32 use_adaptive_rx_coalesce;
+};
+
 /* priv_flags */
 #define ENIC_SRIOV_ENABLED		(1 << 0)
 
@@ -85,13 +114,12 @@
 	u32 msg_enable;
 	spinlock_t devcmd_lock;
 	u8 mac_addr[ETH_ALEN];
-	u8 mc_addr[ENIC_MULTICAST_PERFECT_FILTERS][ETH_ALEN];
-	u8 uc_addr[ENIC_UNICAST_PERFECT_FILTERS][ETH_ALEN];
 	unsigned int flags;
 	unsigned int priv_flags;
 	unsigned int mc_count;
 	unsigned int uc_count;
 	u32 port_mtu;
+	struct enic_rx_coal rx_coalesce_setting;
 	u32 rx_coalesce_usecs;
 	u32 tx_coalesce_usecs;
 #ifdef CONFIG_PCI_IOV

diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c
index 4b6e569..3e27df5 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.c

@@ -88,7 +88,7 @@
 	return err;
 }
 
-int enic_dev_add_addr(struct enic *enic, u8 *addr)
+int enic_dev_add_addr(struct enic *enic, const u8 *addr)
 {
 	int err;
 
@@ -99,7 +99,7 @@
 	return err;
 }
 
-int enic_dev_del_addr(struct enic *enic, u8 *addr)
+int enic_dev_del_addr(struct enic *enic, const u8 *addr)
 {
 	int err;
 

diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h
index 129b14a..36ea1ab 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.h

@@ -45,8 +45,8 @@
 int enic_dev_del_station_addr(struct enic *enic);
 int enic_dev_packet_filter(struct enic *enic, int directed, int multicast,
 	int broadcast, int promisc, int allmulti);
-int enic_dev_add_addr(struct enic *enic, u8 *addr);
-int enic_dev_del_addr(struct enic *enic, u8 *addr);
+int enic_dev_add_addr(struct enic *enic, const u8 *addr);
+int enic_dev_del_addr(struct enic *enic, const u8 *addr);
 int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid);
 int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid);
 int enic_dev_notify_unset(struct enic *enic);

diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 47e3562..2e50b54 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c

@@ -79,6 +79,17 @@
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
+{
+	int i;
+	int intr;
+
+	for (i = 0; i < enic->rq_count; i++) {
+		intr = enic_msix_rq_intr(enic, i);
+		vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+	}
+}
+
 static int enic_get_settings(struct net_device *netdev,
 	struct ethtool_cmd *ecmd)
 {
@@ -93,8 +104,8 @@
 		ethtool_cmd_speed_set(ecmd, vnic_dev_port_speed(enic->vdev));
 		ecmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_DISABLE;
@@ -178,9 +189,14 @@
 	struct ethtool_coalesce *ecmd)
 {
 	struct enic *enic = netdev_priv(netdev);
+	struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
 
 	ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
 	ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs;
+	if (rxcoal->use_adaptive_rx_coalesce)
+		ecmd->use_adaptive_rx_coalesce = 1;
+	ecmd->rx_coalesce_usecs_low = rxcoal->small_pkt_range_start;
+	ecmd->rx_coalesce_usecs_high = rxcoal->range_end;
 
 	return 0;
 }
@@ -191,17 +207,31 @@
 	struct enic *enic = netdev_priv(netdev);
 	u32 tx_coalesce_usecs;
 	u32 rx_coalesce_usecs;
+	u32 rx_coalesce_usecs_low;
+	u32 rx_coalesce_usecs_high;
+	u32 coalesce_usecs_max;
 	unsigned int i, intr;
+	struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
 
+	coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
 	tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs,
-		vnic_dev_get_intr_coal_timer_max(enic->vdev));
+				  coalesce_usecs_max);
 	rx_coalesce_usecs = min_t(u32, ecmd->rx_coalesce_usecs,
-		vnic_dev_get_intr_coal_timer_max(enic->vdev));
+				  coalesce_usecs_max);
+
+	rx_coalesce_usecs_low = min_t(u32, ecmd->rx_coalesce_usecs_low,
+				      coalesce_usecs_max);
+	rx_coalesce_usecs_high = min_t(u32, ecmd->rx_coalesce_usecs_high,
+				       coalesce_usecs_max);
 
 	switch (vnic_dev_get_intr_mode(enic->vdev)) {
 	case VNIC_DEV_INTR_MODE_INTX:
 		if (tx_coalesce_usecs != rx_coalesce_usecs)
 			return -EINVAL;
+		if (ecmd->use_adaptive_rx_coalesce	||
+		    ecmd->rx_coalesce_usecs_low		||
+		    ecmd->rx_coalesce_usecs_high)
+			return -EOPNOTSUPP;
 
 		intr = enic_legacy_io_intr();
 		vnic_intr_coalescing_timer_set(&enic->intr[intr],
@@ -210,6 +240,10 @@
 	case VNIC_DEV_INTR_MODE_MSI:
 		if (tx_coalesce_usecs != rx_coalesce_usecs)
 			return -EINVAL;
+		if (ecmd->use_adaptive_rx_coalesce	||
+		    ecmd->rx_coalesce_usecs_low		||
+		    ecmd->rx_coalesce_usecs_high)
+			return -EOPNOTSUPP;
 
 		vnic_intr_coalescing_timer_set(&enic->intr[0],
 			tx_coalesce_usecs);
@@ -221,12 +255,27 @@
 				tx_coalesce_usecs);
 		}
 
-		for (i = 0; i < enic->rq_count; i++) {
-			intr = enic_msix_rq_intr(enic, i);
-			vnic_intr_coalescing_timer_set(&enic->intr[intr],
-				rx_coalesce_usecs);
+		if (rxcoal->use_adaptive_rx_coalesce) {
+			if (!ecmd->use_adaptive_rx_coalesce) {
+				rxcoal->use_adaptive_rx_coalesce = 0;
+				enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
+			}
+		} else {
+			if (ecmd->use_adaptive_rx_coalesce)
+				rxcoal->use_adaptive_rx_coalesce = 1;
+			else
+				enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
 		}
 
+		if (ecmd->rx_coalesce_usecs_high) {
+			if (rx_coalesce_usecs_high <
+			    (rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
+				return -EINVAL;
+			rxcoal->range_end = rx_coalesce_usecs_high;
+			rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
+			rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
+							ENIC_AIC_LARGE_PKT_DIFF;
+		}
 		break;
 	default:
 		break;
@@ -253,5 +302,5 @@
 
 void enic_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &enic_ethtool_ops);
+	netdev->ethtool_ops = &enic_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 2945718..f32f828 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c

@@ -38,6 +38,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/prefetch.h>
 #include <net/ip6_checksum.h>
+#include <linux/ktime.h>
 
 #include "cq_enet_desc.h"
 #include "vnic_dev.h"
@@ -72,6 +73,35 @@
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, enic_id_table);
 
+#define ENIC_LARGE_PKT_THRESHOLD		1000
+#define ENIC_MAX_COALESCE_TIMERS		10
+/*  Interrupt moderation table, which will be used to decide the
+ *  coalescing timer values
+ *  {rx_rate in Mbps, mapping percentage of the range}
+ */
+struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
+	{4000,  0},
+	{4400, 10},
+	{5060, 20},
+	{5230, 30},
+	{5540, 40},
+	{5820, 50},
+	{6120, 60},
+	{6435, 70},
+	{6745, 80},
+	{7000, 90},
+	{0xFFFFFFFF, 100}
+};
+
+/* This table helps the driver to pick different ranges for rx coalescing
+ * timer depending on the link speed.
+ */
+struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
+	{0,  0}, /* 0  - 4  Gbps */
+	{0,  3}, /* 4  - 10 Gbps */
+	{3,  6}, /* 10 - 40 Gbps */
+};
+
 int enic_is_dynamic(struct enic *enic)
 {
 	return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
@@ -586,8 +616,71 @@
 	return net_stats;
 }
 
+static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr)
+{
+	struct enic *enic = netdev_priv(netdev);
+
+	if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) {
+		unsigned int mc_count = netdev_mc_count(netdev);
+
+		netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n",
+			    ENIC_MULTICAST_PERFECT_FILTERS, mc_count);
+
+		return -ENOSPC;
+	}
+
+	enic_dev_add_addr(enic, mc_addr);
+	enic->mc_count++;
+
+	return 0;
+}
+
+static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr)
+{
+	struct enic *enic = netdev_priv(netdev);
+
+	enic_dev_del_addr(enic, mc_addr);
+	enic->mc_count--;
+
+	return 0;
+}
+
+static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr)
+{
+	struct enic *enic = netdev_priv(netdev);
+
+	if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) {
+		unsigned int uc_count = netdev_uc_count(netdev);
+
+		netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n",
+			    ENIC_UNICAST_PERFECT_FILTERS, uc_count);
+
+		return -ENOSPC;
+	}
+
+	enic_dev_add_addr(enic, uc_addr);
+	enic->uc_count++;
+
+	return 0;
+}
+
+static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr)
+{
+	struct enic *enic = netdev_priv(netdev);
+
+	enic_dev_del_addr(enic, uc_addr);
+	enic->uc_count--;
+
+	return 0;
+}
+
 void enic_reset_addr_lists(struct enic *enic)
 {
+	struct net_device *netdev = enic->netdev;
+
+	__dev_uc_unsync(netdev, NULL);
+	__dev_mc_unsync(netdev, NULL);
+
 	enic->mc_count = 0;
 	enic->uc_count = 0;
 	enic->flags = 0;
@@ -654,112 +747,6 @@
 	return enic_dev_add_station_addr(enic);
 }
 
-static void enic_update_multicast_addr_list(struct enic *enic)
-{
-	struct net_device *netdev = enic->netdev;
-	struct netdev_hw_addr *ha;
-	unsigned int mc_count = netdev_mc_count(netdev);
-	u8 mc_addr[ENIC_MULTICAST_PERFECT_FILTERS][ETH_ALEN];
-	unsigned int i, j;
-
-	if (mc_count > ENIC_MULTICAST_PERFECT_FILTERS) {
-		netdev_warn(netdev, "Registering only %d out of %d "
-			"multicast addresses\n",
-			ENIC_MULTICAST_PERFECT_FILTERS, mc_count);
-		mc_count = ENIC_MULTICAST_PERFECT_FILTERS;
-	}
-
-	/* Is there an easier way?  Trying to minimize to
-	 * calls to add/del multicast addrs.  We keep the
-	 * addrs from the last call in enic->mc_addr and
-	 * look for changes to add/del.
-	 */
-
-	i = 0;
-	netdev_for_each_mc_addr(ha, netdev) {
-		if (i == mc_count)
-			break;
-		memcpy(mc_addr[i++], ha->addr, ETH_ALEN);
-	}
-
-	for (i = 0; i < enic->mc_count; i++) {
-		for (j = 0; j < mc_count; j++)
-			if (ether_addr_equal(enic->mc_addr[i], mc_addr[j]))
-				break;
-		if (j == mc_count)
-			enic_dev_del_addr(enic, enic->mc_addr[i]);
-	}
-
-	for (i = 0; i < mc_count; i++) {
-		for (j = 0; j < enic->mc_count; j++)
-			if (ether_addr_equal(mc_addr[i], enic->mc_addr[j]))
-				break;
-		if (j == enic->mc_count)
-			enic_dev_add_addr(enic, mc_addr[i]);
-	}
-
-	/* Save the list to compare against next time
-	 */
-
-	for (i = 0; i < mc_count; i++)
-		memcpy(enic->mc_addr[i], mc_addr[i], ETH_ALEN);
-
-	enic->mc_count = mc_count;
-}
-
-static void enic_update_unicast_addr_list(struct enic *enic)
-{
-	struct net_device *netdev = enic->netdev;
-	struct netdev_hw_addr *ha;
-	unsigned int uc_count = netdev_uc_count(netdev);
-	u8 uc_addr[ENIC_UNICAST_PERFECT_FILTERS][ETH_ALEN];
-	unsigned int i, j;
-
-	if (uc_count > ENIC_UNICAST_PERFECT_FILTERS) {
-		netdev_warn(netdev, "Registering only %d out of %d "
-			"unicast addresses\n",
-			ENIC_UNICAST_PERFECT_FILTERS, uc_count);
-		uc_count = ENIC_UNICAST_PERFECT_FILTERS;
-	}
-
-	/* Is there an easier way?  Trying to minimize to
-	 * calls to add/del unicast addrs.  We keep the
-	 * addrs from the last call in enic->uc_addr and
-	 * look for changes to add/del.
-	 */
-
-	i = 0;
-	netdev_for_each_uc_addr(ha, netdev) {
-		if (i == uc_count)
-			break;
-		memcpy(uc_addr[i++], ha->addr, ETH_ALEN);
-	}
-
-	for (i = 0; i < enic->uc_count; i++) {
-		for (j = 0; j < uc_count; j++)
-			if (ether_addr_equal(enic->uc_addr[i], uc_addr[j]))
-				break;
-		if (j == uc_count)
-			enic_dev_del_addr(enic, enic->uc_addr[i]);
-	}
-
-	for (i = 0; i < uc_count; i++) {
-		for (j = 0; j < enic->uc_count; j++)
-			if (ether_addr_equal(uc_addr[i], enic->uc_addr[j]))
-				break;
-		if (j == enic->uc_count)
-			enic_dev_add_addr(enic, uc_addr[i]);
-	}
-
-	/* Save the list to compare against next time
-	 */
-
-	for (i = 0; i < uc_count; i++)
-		memcpy(enic->uc_addr[i], uc_addr[i], ETH_ALEN);
-
-	enic->uc_count = uc_count;
-}
-
 /* netif_tx_lock held, BHs disabled */
 static void enic_set_rx_mode(struct net_device *netdev)
 {
@@ -782,9 +769,9 @@
 	}
 
 	if (!promisc) {
-		enic_update_unicast_addr_list(enic);
+		__dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync);
 		if (!allmulti)
-			enic_update_multicast_addr_list(enic);
+			__dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync);
 	}
 }
 
@@ -979,6 +966,15 @@
 	return 0;
 }
 
+static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
+				      u32 pkt_len)
+{
+	if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
+		pkt_size->large_pkt_bytes_cnt += pkt_len;
+	else
+		pkt_size->small_pkt_bytes_cnt += pkt_len;
+}
+
 static void enic_rq_indicate_buf(struct vnic_rq *rq,
 	struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
 	int skipped, void *opaque)
@@ -986,6 +982,7 @@
 	struct enic *enic = vnic_dev_priv(rq->vdev);
 	struct net_device *netdev = enic->netdev;
 	struct sk_buff *skb;
+	struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
 
 	u8 type, color, eop, sop, ingress_port, vlan_stripped;
 	u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
@@ -1056,6 +1053,9 @@
 			napi_gro_receive(&enic->napi[q_number], skb);
 		else
 			netif_receive_skb(skb);
+		if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+			enic_intr_update_pkt_size(&cq->pkt_size_counter,
+						  bytes_written);
 	} else {
 
 		/* Buffer overflow
@@ -1134,6 +1134,64 @@
 	return rq_work_done;
 }
 
+static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+	unsigned int intr = enic_msix_rq_intr(enic, rq->index);
+	struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+	u32 timer = cq->tobe_rx_coal_timeval;
+
+	if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
+		vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+		cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
+	}
+}
+
+static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+	struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+	struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+	struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
+	int index;
+	u32 timer;
+	u32 range_start;
+	u32 traffic;
+	u64 delta;
+	ktime_t now = ktime_get();
+
+	delta = ktime_us_delta(now, cq->prev_ts);
+	if (delta < ENIC_AIC_TS_BREAK)
+		return;
+	cq->prev_ts = now;
+
+	traffic = pkt_size_counter->large_pkt_bytes_cnt +
+		  pkt_size_counter->small_pkt_bytes_cnt;
+	/* The table takes Mbps
+	 * traffic *= 8    => bits
+	 * traffic *= (10^6 / delta)    => bps
+	 * traffic /= 10^6     => Mbps
+	 *
+	 * Combining, traffic *= (8 / delta)
+	 */
+
+	traffic <<= 3;
+	traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
+
+	for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
+		if (traffic < mod_table[index].rx_rate)
+			break;
+	range_start = (pkt_size_counter->small_pkt_bytes_cnt >
+		       pkt_size_counter->large_pkt_bytes_cnt << 1) ?
+		      rx_coal->small_pkt_range_start :
+		      rx_coal->large_pkt_range_start;
+	timer = range_start + ((rx_coal->range_end - range_start) *
+			       mod_table[index].range_percent / 100);
+	/* Damping */
+	cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
+
+	pkt_size_counter->large_pkt_bytes_cnt = 0;
+	pkt_size_counter->small_pkt_bytes_cnt = 0;
+}
+
 static int enic_poll_msix(struct napi_struct *napi, int budget)
 {
 	struct net_device *netdev = napi->dev;
@@ -1171,6 +1229,13 @@
 
 	if (err)
 		work_done = work_to_do;
+	if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+		/* Call the function which refreshes
+		 * the intr coalescing timer value based on
+		 * the traffic.  This is supported only in
+		 * the case of MSI-x mode
+		 */
+		enic_calc_int_moderation(enic, &enic->rq[rq]);
 
 	if (work_done < work_to_do) {
 
@@ -1179,6 +1244,8 @@
 		 */
 
 		napi_complete(napi);
+		if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+			enic_set_int_moderation(enic, &enic->rq[rq]);
 		vnic_intr_unmask(&enic->intr[intr]);
 	}
 
@@ -1314,6 +1381,42 @@
 	}
 }
 
+static void enic_set_rx_coal_setting(struct enic *enic)
+{
+	unsigned int speed;
+	int index = -1;
+	struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+
+	/* If intr mode is not MSIX, do not do adaptive coalescing */
+	if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
+		netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
+		return;
+	}
+
+	/* 1. Read the link speed from fw
+	 * 2. Pick the default range for the speed
+	 * 3. Update it in enic->rx_coalesce_setting
+	 */
+	speed = vnic_dev_port_speed(enic->vdev);
+	if (ENIC_LINK_SPEED_10G < speed)
+		index = ENIC_LINK_40G_INDEX;
+	else if (ENIC_LINK_SPEED_4G < speed)
+		index = ENIC_LINK_10G_INDEX;
+	else
+		index = ENIC_LINK_4G_INDEX;
+
+	rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
+	rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
+	rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
+
+	/* Start with the value provided by UCSM */
+	for (index = 0; index < enic->rq_count; index++)
+		enic->cq[index].cur_rx_coal_timeval =
+				enic->config.intr_timer_usec;
+
+	rx_coal->use_adaptive_rx_coalesce = 1;
+}
+
 static int enic_dev_notify_set(struct enic *enic)
 {
 	int err;
@@ -2231,6 +2334,7 @@
 	enic->notify_timer.function = enic_notify_timer;
 	enic->notify_timer.data = (unsigned long)enic;
 
+	enic_set_rx_coal_setting(enic);
 	INIT_WORK(&enic->reset, enic_reset);
 	INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
 
@@ -2250,6 +2354,9 @@
 	}
 
 	enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
+	/* rx coalesce time already got initialized. This gets used
+	 * if adaptive coal is turned off
+	 */
 	enic->rx_coalesce_usecs = enic->tx_coalesce_usecs;
 
 	if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))

diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.h b/drivers/net/ethernet/cisco/enic/vnic_cq.h
index 579315c..4e6aa65 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_cq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_cq.h

@@ -50,6 +50,11 @@
 	u32 pad10;
 };
 
+struct vnic_rx_bytes_counter {
+	unsigned int small_pkt_bytes_cnt;
+	unsigned int large_pkt_bytes_cnt;
+};
+
 struct vnic_cq {
 	unsigned int index;
 	struct vnic_dev *vdev;
@@ -58,6 +63,10 @@
 	unsigned int to_clean;
 	unsigned int last_color;
 	unsigned int interrupt_offset;
+	struct vnic_rx_bytes_counter pkt_size_counter;
+	unsigned int cur_rx_coal_timeval;
+	unsigned int tobe_rx_coal_timeval;
+	ktime_t prev_ts;
 };
 
 static inline unsigned int vnic_cq_service(struct vnic_cq *cq,

diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 69dd925..e86a45c 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c

@@ -657,7 +657,7 @@
 	return err;
 }
 
-int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr)
+int vnic_dev_add_addr(struct vnic_dev *vdev, const u8 *addr)
 {
 	u64 a0 = 0, a1 = 0;
 	int wait = 1000;
@@ -674,7 +674,7 @@
 	return err;
 }
 
-int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr)
+int vnic_dev_del_addr(struct vnic_dev *vdev, const u8 *addr)
 {
 	u64 a0 = 0, a1 = 0;
 	int wait = 1000;

diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h
index e670029..1f3b301 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h

@@ -95,8 +95,8 @@
 int vnic_dev_hang_notify(struct vnic_dev *vdev);
 int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
 	int broadcast, int promisc, int allmulti);
-int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr);
-int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr);
+int vnic_dev_add_addr(struct vnic_dev *vdev, const u8 *addr);
+int vnic_dev_del_addr(struct vnic_dev *vdev, const u8 *addr);
 int vnic_dev_get_mac_addr(struct vnic_dev *vdev, u8 *mac_addr);
 int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr);
 int vnic_dev_notify_unset(struct vnic_dev *vdev);

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 8c4b93b..13723c9 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c

@@ -109,6 +109,7 @@
 	u8		imr_all;
 
 	unsigned int	flags;
+	unsigned int	in_timeout:1;
 	unsigned int	in_suspend:1;
 	unsigned int	wake_supported:1;
 
@@ -187,13 +188,13 @@
 	 * The essential point is that we have to do a double reset, and the
 	 * instruction is to set LBK into MAC internal loopback mode.
 	 */
-	iow(db, DM9000_NCR, 0x03);
+	iow(db, DM9000_NCR, NCR_RST | NCR_MAC_LBK);
 	udelay(100); /* Application note says at least 20 us */
 	if (ior(db, DM9000_NCR) & 1)
 		dev_err(db->dev, "dm9000 did not respond to first reset\n");
 
 	iow(db, DM9000_NCR, 0);
-	iow(db, DM9000_NCR, 0x03);
+	iow(db, DM9000_NCR, NCR_RST | NCR_MAC_LBK);
 	udelay(100);
 	if (ior(db, DM9000_NCR) & 1)
 		dev_err(db->dev, "dm9000 did not respond to second reset\n");
@@ -273,7 +274,7 @@
  */
 static void dm9000_msleep(board_info_t *db, unsigned int ms)
 {
-	if (db->in_suspend)
+	if (db->in_suspend || db->in_timeout)
 		mdelay(ms);
 	else
 		msleep(ms);
@@ -334,7 +335,8 @@
 	unsigned long reg_save;
 
 	dm9000_dbg(db, 5, "phy_write[%02x] = %04x\n", reg, value);
-	mutex_lock(&db->addr_lock);
+	if (!db->in_timeout)
+		mutex_lock(&db->addr_lock);
 
 	spin_lock_irqsave(&db->lock, flags);
 
@@ -365,7 +367,8 @@
 	writeb(reg_save, db->io_addr);
 
 	spin_unlock_irqrestore(&db->lock, flags);
-	mutex_unlock(&db->addr_lock);
+	if (!db->in_timeout)
+		mutex_unlock(&db->addr_lock);
 }
 
 /* dm9000_set_io
@@ -882,6 +885,18 @@
 	spin_unlock_irqrestore(&db->lock, flags);
 }
 
+static void
+dm9000_mask_interrupts(board_info_t *db)
+{
+	iow(db, DM9000_IMR, IMR_PAR);
+}
+
+static void
+dm9000_unmask_interrupts(board_info_t *db)
+{
+	iow(db, DM9000_IMR, db->imr_all);
+}
+
 /*
  * Initialize dm9000 board
  */
@@ -894,6 +909,9 @@
 
 	dm9000_dbg(db, 1, "entering %s\n", __func__);
 
+	dm9000_reset(db);
+	dm9000_mask_interrupts(db);
+
 	/* I/O mode */
 	db->io_mode = ior(db, DM9000_ISR) >> 6;	/* ISR bit7:6 keeps I/O mode */
 
@@ -941,9 +959,6 @@
 
 	db->imr_all = imr;
 
-	/* Enable TX/RX interrupt mask */
-	iow(db, DM9000_IMR, imr);
-
 	/* Init Driver variable */
 	db->tx_pkt_cnt = 0;
 	db->queue_pkt_len = 0;
@@ -959,17 +974,19 @@
 
 	/* Save previous register address */
 	spin_lock_irqsave(&db->lock, flags);
+	db->in_timeout = 1;
 	reg_save = readb(db->io_addr);
 
 	netif_stop_queue(dev);
-	dm9000_reset(db);
 	dm9000_init_dm9000(dev);
+	dm9000_unmask_interrupts(db);
 	/* We can accept TX packets again */
 	dev->trans_start = jiffies; /* prevent tx timeout */
 	netif_wake_queue(dev);
 
 	/* Restore previous register address */
 	writeb(reg_save, db->io_addr);
+	db->in_timeout = 0;
 	spin_unlock_irqrestore(&db->lock, flags);
 }
 
@@ -1093,7 +1110,6 @@
 		if (rxbyte & DM9000_PKT_ERR) {
 			dev_warn(db->dev, "status check fail: %d\n", rxbyte);
 			iow(db, DM9000_RCR, 0x00);	/* Stop Device */
-			iow(db, DM9000_ISR, IMR_PAR);	/* Stop INT request */
 			return;
 		}
 
@@ -1193,9 +1209,7 @@
 	/* Save previous register address */
 	reg_save = readb(db->io_addr);
 
-	/* Disable all interrupts */
-	iow(db, DM9000_IMR, IMR_PAR);
-
+	dm9000_mask_interrupts(db);
 	/* Got DM9000 interrupt status */
 	int_status = ior(db, DM9000_ISR);	/* Got ISR */
 	iow(db, DM9000_ISR, int_status);	/* Clear ISR status */
@@ -1218,9 +1232,7 @@
 		}
 	}
 
-	/* Re-enable interrupt mask */
-	iow(db, DM9000_IMR, db->imr_all);
-
+	dm9000_unmask_interrupts(db);
 	/* Restore previous register address */
 	writeb(reg_save, db->io_addr);
 
@@ -1292,6 +1304,9 @@
 	 * may work, and tell the user that this is a problem */
 
 	if (irqflags == IRQF_TRIGGER_NONE)
+		irqflags = irq_get_trigger_type(dev->irq);
+
+	if (irqflags == IRQF_TRIGGER_NONE)
 		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
 	irqflags |= IRQF_SHARED;
@@ -1301,11 +1316,14 @@
 	mdelay(1); /* delay needs by DM9000B */
 
 	/* Initialize DM9000 board */
-	dm9000_reset(db);
 	dm9000_init_dm9000(dev);
 
 	if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
 		return -EAGAIN;
+	/* Now that we have an interrupt handler hooked up we can unmask
+	 * our interrupts
+	 */
+	dm9000_unmask_interrupts(db);
 
 	/* Init driver variable */
 	db->dbug_cnt = 0;
@@ -1313,7 +1331,8 @@
 	mii_check_media(&db->mii, netif_msg_link(db), 1);
 	netif_start_queue(dev);
 
-	dm9000_schedule_poll(db);
+	/* Poll initial link status */
+	schedule_delayed_work(&db->phy_poll, 1);
 
 	return 0;
 }
@@ -1326,7 +1345,7 @@
 	/* RESET device */
 	dm9000_phy_write(dev, 0, MII_BMCR, BMCR_RESET);	/* PHY RESET */
 	iow(db, DM9000_GPR, 0x01);	/* Power-Down PHY */
-	iow(db, DM9000_IMR, IMR_PAR);	/* Disable all interrupt */
+	dm9000_mask_interrupts(db);
 	iow(db, DM9000_RCR, 0x00);	/* Disable RX */
 }
 
@@ -1547,12 +1566,7 @@
 	db->flags |= DM9000_PLATF_SIMPLE_PHY;
 #endif
 
-	/* Fixing bug on dm9000_probe, takeover dm9000_reset(db),
-	 * Need 'NCR_MAC_LBK' bit to indeed stable our DM9000 fifo
-	 * while probe stage.
-	 */
-
-	iow(db, DM9000_NCR, NCR_MAC_LBK | NCR_RST);
+	dm9000_reset(db);
 
 	/* try multiple times, DM9000 sometimes gets the read wrong */
 	for (i = 0; i < 8; i++) {
@@ -1695,8 +1709,8 @@
 			/* reset if we were not in wake mode to ensure if
 			 * the device was powered off it is in a known state */
 			if (!db->wake_state) {
-				dm9000_reset(db);
 				dm9000_init_dm9000(ndev);
+				dm9000_unmask_interrupts(db);
 			}
 
 			netif_device_attach(ndev);

diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 1642de7..8616608 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c

@@ -1703,7 +1703,7 @@
 #ifdef CONFIG_TULIP_NAPI
 	netif_napi_add(dev, &tp->napi, tulip_poll, 16);
 #endif
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 
 	if (register_netdev(dev))
 		goto err_out_free_ring;

diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index aa801a6..80afec3 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c

@@ -962,8 +962,8 @@
 	}
 	if(db->link_failed)
 	{
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	if (db->media_mode & ULI526X_AUTO)

diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 4fb756d..1274b6f 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c

@@ -227,7 +227,7 @@
 	}
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 #if 0
 	dev->features = NETIF_F_IP_CSUM;
 #endif
@@ -1185,8 +1185,8 @@
 		ethtool_cmd_speed_set(cmd, np->speed);
 		cmd->duplex = np->full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 	if ( np->an_enable)
 		cmd->autoneg = AUTONEG_ENABLE;

diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index d9e5ca0..433c1e1 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c

@@ -577,7 +577,7 @@
 
 	/* The chip-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	pci_set_drvdata(pdev, dev);

diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 4884205..056b44b 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c

@@ -134,17 +134,17 @@
 
 	struct pci_dev *dev;
 
-	void * __iomem io;
-	void * __iomem dma_io;
+	void __iomem *io;
+	void __iomem *dma_io;
 
 	struct hrtimer hrtimer;
 
 	int tx_dma_chan;
 	int rx_dma_chan;
-	void * __iomem ec_io;
-	void * __iomem fifo_io;
-	void * __iomem mii_io;
-	void * __iomem mac_io;
+	void __iomem *ec_io;
+	void __iomem *fifo_io;
+	void __iomem *mii_io;
+	void __iomem *mac_io;
 
 	struct bhf_dma rx_buf;
 	struct rx_desc *rx_descs;
@@ -297,7 +297,7 @@
 {
 	struct device *dev = PRIV_TO_DEV(priv);
 	unsigned block_count, i;
-	void * __iomem ec_info;
+	void __iomem *ec_info;
 
 	dev_dbg(dev, "Info block:\n");
 	dev_dbg(dev, "Type of function: %x\n", (unsigned)ioread16(priv->io));
@@ -569,8 +569,8 @@
 {
 	struct net_device *net_dev;
 	struct ec_bhf_priv *priv;
-	void * __iomem dma_io;
-	void * __iomem io;
+	void __iomem *dma_io;
+	void __iomem *io;
 	int err = 0;
 
 	err = pci_enable_device(dev);
@@ -615,7 +615,7 @@
 	}
 
 	net_dev = alloc_etherdev(sizeof(struct ec_bhf_priv));
-	if (net_dev == 0) {
+	if (net_dev == NULL) {
 		err = -ENOMEM;
 		goto err_unmap_dma_io;
 	}

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 97db5a7..2e7c555 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -120,6 +120,9 @@
 #define MAX_VFS			30 /* Max VFs supported by BE3 FW */
 #define FW_VER_LEN		32
 
+#define	RSS_INDIR_TABLE_LEN	128
+#define RSS_HASH_KEY_LEN	40
+
 struct be_dma_mem {
 	void *va;
 	dma_addr_t dma;
@@ -371,6 +374,7 @@
 #define BE_FLAGS_LINK_STATUS_INIT		1
 #define BE_FLAGS_WORKER_SCHEDULED		(1 << 3)
 #define BE_FLAGS_VLAN_PROMISC			(1 << 4)
+#define BE_FLAGS_MCAST_PROMISC			(1 << 5)
 #define BE_FLAGS_NAPI_ENABLED			(1 << 9)
 #define BE_FLAGS_QNQ_ASYNC_EVT_RCVD		(1 << 11)
 #define BE_FLAGS_VXLAN_OFFLOADS			(1 << 12)
@@ -409,6 +413,13 @@
 	u32 if_cap_flags;
 };
 
+struct rss_info {
+	u64 rss_flags;
+	u8 rsstable[RSS_INDIR_TABLE_LEN];
+	u8 rss_queue[RSS_INDIR_TABLE_LEN];
+	u8 rss_hkey[RSS_HASH_KEY_LEN];
+};
+
 struct be_adapter {
 	struct pci_dev *pdev;
 	struct net_device *netdev;
@@ -445,7 +456,7 @@
 	struct be_drv_stats drv_stats;
 	struct be_aic_obj aic_obj[MAX_EVT_QS];
 	u16 vlans_added;
-	u8 vlan_tag[VLAN_N_VID];
+	unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)];
 	u8 vlan_prio_bmap;	/* Available Priority BitMap */
 	u16 recommended_prio;	/* Recommended Priority */
 	struct be_dma_mem rx_filter; /* Cmd DMA mem for rx-filter */
@@ -507,7 +518,7 @@
 	u32 msg_enable;
 	int be_get_temp_freq;
 	u8 pf_number;
-	u64 rss_flags;
+	struct rss_info rss_info;
 };
 
 #define be_physfn(adapter)		(!adapter->virtfn)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index d1ec15a..f4ea349 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c

@@ -52,8 +52,7 @@
 	}
 };
 
-static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode,
-			   u8 subsystem)
+static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem)
 {
 	int i;
 	int num_entries = sizeof(cmd_priv_map)/sizeof(struct be_cmd_priv_map);
@@ -120,22 +119,29 @@
 	return (void *)addr;
 }
 
-static int be_mcc_compl_process(struct be_adapter *adapter,
-				struct be_mcc_compl *compl)
+static bool be_skip_err_log(u8 opcode, u16 base_status, u16 addl_status)
 {
-	u16 compl_status, extd_status;
-	struct be_cmd_resp_hdr *resp_hdr;
+	if (base_status == MCC_STATUS_NOT_SUPPORTED ||
+	    base_status == MCC_STATUS_ILLEGAL_REQUEST ||
+	    addl_status == MCC_ADDL_STATUS_TOO_MANY_INTERFACES ||
+	    (opcode == OPCODE_COMMON_WRITE_FLASHROM &&
+	    (base_status == MCC_STATUS_ILLEGAL_FIELD ||
+	     addl_status == MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH)))
+		return true;
+	else
+		return false;
+}
+
+/* Place holder for all the async MCC cmds wherein the caller is not in a busy
+ * loop (has not issued be_mcc_notify_wait())
+ */
+static void be_async_cmd_process(struct be_adapter *adapter,
+				 struct be_mcc_compl *compl,
+				 struct be_cmd_resp_hdr *resp_hdr)
+{
+	enum mcc_base_status base_status = base_status(compl->status);
 	u8 opcode = 0, subsystem = 0;
 
-	/* Just swap the status to host endian; mcc tag is opaquely copied
-	 * from mcc_wrb */
-	be_dws_le_to_cpu(compl, 4);
-
-	compl_status = (compl->status >> CQE_STATUS_COMPL_SHIFT) &
-				CQE_STATUS_COMPL_MASK;
-
-	resp_hdr = be_decode_resp_hdr(compl->tag0, compl->tag1);
-
 	if (resp_hdr) {
 		opcode = resp_hdr->opcode;
 		subsystem = resp_hdr->subsystem;
@@ -144,61 +150,86 @@
 	if (opcode == OPCODE_LOWLEVEL_LOOPBACK_TEST &&
 	    subsystem == CMD_SUBSYSTEM_LOWLEVEL) {
 		complete(&adapter->et_cmd_compl);
-		return 0;
+		return;
 	}
 
-	if (((opcode == OPCODE_COMMON_WRITE_FLASHROM) ||
-	     (opcode == OPCODE_COMMON_WRITE_OBJECT)) &&
-	    (subsystem == CMD_SUBSYSTEM_COMMON)) {
-		adapter->flash_status = compl_status;
+	if ((opcode == OPCODE_COMMON_WRITE_FLASHROM ||
+	     opcode == OPCODE_COMMON_WRITE_OBJECT) &&
+	    subsystem == CMD_SUBSYSTEM_COMMON) {
+		adapter->flash_status = compl->status;
 		complete(&adapter->et_cmd_compl);
+		return;
 	}
 
-	if (compl_status == MCC_STATUS_SUCCESS) {
-		if (((opcode == OPCODE_ETH_GET_STATISTICS) ||
-		     (opcode == OPCODE_ETH_GET_PPORT_STATS)) &&
-		    (subsystem == CMD_SUBSYSTEM_ETH)) {
-			be_parse_stats(adapter);
-			adapter->stats_cmd_sent = false;
-		}
-		if (opcode == OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES &&
-		    subsystem == CMD_SUBSYSTEM_COMMON) {
+	if ((opcode == OPCODE_ETH_GET_STATISTICS ||
+	     opcode == OPCODE_ETH_GET_PPORT_STATS) &&
+	    subsystem == CMD_SUBSYSTEM_ETH &&
+	    base_status == MCC_STATUS_SUCCESS) {
+		be_parse_stats(adapter);
+		adapter->stats_cmd_sent = false;
+		return;
+	}
+
+	if (opcode == OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES &&
+	    subsystem == CMD_SUBSYSTEM_COMMON) {
+		if (base_status == MCC_STATUS_SUCCESS) {
 			struct be_cmd_resp_get_cntl_addnl_attribs *resp =
-				(void *)resp_hdr;
+							(void *)resp_hdr;
 			adapter->drv_stats.be_on_die_temperature =
-				resp->on_die_temperature;
-		}
-	} else {
-		if (opcode == OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES)
+						resp->on_die_temperature;
+		} else {
 			adapter->be_get_temp_freq = 0;
+		}
+		return;
+	}
+}
 
-		if (compl_status == MCC_STATUS_NOT_SUPPORTED ||
-			compl_status == MCC_STATUS_ILLEGAL_REQUEST)
-			goto done;
+static int be_mcc_compl_process(struct be_adapter *adapter,
+				struct be_mcc_compl *compl)
+{
+	enum mcc_base_status base_status;
+	enum mcc_addl_status addl_status;
+	struct be_cmd_resp_hdr *resp_hdr;
+	u8 opcode = 0, subsystem = 0;
 
-		if (compl_status == MCC_STATUS_UNAUTHORIZED_REQUEST) {
+	/* Just swap the status to host endian; mcc tag is opaquely copied
+	 * from mcc_wrb */
+	be_dws_le_to_cpu(compl, 4);
+
+	base_status = base_status(compl->status);
+	addl_status = addl_status(compl->status);
+
+	resp_hdr = be_decode_resp_hdr(compl->tag0, compl->tag1);
+	if (resp_hdr) {
+		opcode = resp_hdr->opcode;
+		subsystem = resp_hdr->subsystem;
+	}
+
+	be_async_cmd_process(adapter, compl, resp_hdr);
+
+	if (base_status != MCC_STATUS_SUCCESS &&
+	    !be_skip_err_log(opcode, base_status, addl_status)) {
+
+		if (base_status == MCC_STATUS_UNAUTHORIZED_REQUEST) {
 			dev_warn(&adapter->pdev->dev,
 				 "VF is not privileged to issue opcode %d-%d\n",
 				 opcode, subsystem);
 		} else {
-			extd_status = (compl->status >> CQE_STATUS_EXTD_SHIFT) &
-					CQE_STATUS_EXTD_MASK;
 			dev_err(&adapter->pdev->dev,
 				"opcode %d-%d failed:status %d-%d\n",
-				opcode, subsystem, compl_status, extd_status);
-
-			if (extd_status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES)
-				return extd_status;
+				opcode, subsystem, base_status, addl_status);
 		}
 	}
-done:
-	return compl_status;
+	return compl->status;
 }
 
 /* Link state evt is a string of bytes; no need for endian swapping */
 static void be_async_link_state_process(struct be_adapter *adapter,
-		struct be_async_event_link_state *evt)
+					struct be_mcc_compl *compl)
 {
+	struct be_async_event_link_state *evt =
+			(struct be_async_event_link_state *)compl;
+
 	/* When link status changes, link speed must be re-queried from FW */
 	adapter->phy.link_speed = -1;
 
@@ -221,8 +252,11 @@
 
 /* Grp5 CoS Priority evt */
 static void be_async_grp5_cos_priority_process(struct be_adapter *adapter,
-		struct be_async_event_grp5_cos_priority *evt)
+					       struct be_mcc_compl *compl)
 {
+	struct be_async_event_grp5_cos_priority *evt =
+			(struct be_async_event_grp5_cos_priority *)compl;
+
 	if (evt->valid) {
 		adapter->vlan_prio_bmap = evt->available_priority_bmap;
 		adapter->recommended_prio &= ~VLAN_PRIO_MASK;
@@ -233,8 +267,11 @@
 
 /* Grp5 QOS Speed evt: qos_link_speed is in units of 10 Mbps */
 static void be_async_grp5_qos_speed_process(struct be_adapter *adapter,
-		struct be_async_event_grp5_qos_link_speed *evt)
+					    struct be_mcc_compl *compl)
 {
+	struct be_async_event_grp5_qos_link_speed *evt =
+			(struct be_async_event_grp5_qos_link_speed *)compl;
+
 	if (adapter->phy.link_speed >= 0 &&
 	    evt->physical_port == adapter->port_num)
 		adapter->phy.link_speed = le16_to_cpu(evt->qos_link_speed) * 10;
@@ -242,8 +279,11 @@
 
 /*Grp5 PVID evt*/
 static void be_async_grp5_pvid_state_process(struct be_adapter *adapter,
-		struct be_async_event_grp5_pvid_state *evt)
+					     struct be_mcc_compl *compl)
 {
+	struct be_async_event_grp5_pvid_state *evt =
+			(struct be_async_event_grp5_pvid_state *)compl;
+
 	if (evt->enabled) {
 		adapter->pvid = le16_to_cpu(evt->tag) & VLAN_VID_MASK;
 		dev_info(&adapter->pdev->dev, "LPVID: %d\n", adapter->pvid);
@@ -253,26 +293,21 @@
 }
 
 static void be_async_grp5_evt_process(struct be_adapter *adapter,
-		u32 trailer, struct be_mcc_compl *evt)
+				      struct be_mcc_compl *compl)
 {
-	u8 event_type = 0;
-
-	event_type = (trailer >> ASYNC_TRAILER_EVENT_TYPE_SHIFT) &
-		ASYNC_TRAILER_EVENT_TYPE_MASK;
+	u8 event_type = (compl->flags >> ASYNC_EVENT_TYPE_SHIFT) &
+				ASYNC_EVENT_TYPE_MASK;
 
 	switch (event_type) {
 	case ASYNC_EVENT_COS_PRIORITY:
-		be_async_grp5_cos_priority_process(adapter,
-		(struct be_async_event_grp5_cos_priority *)evt);
-	break;
+		be_async_grp5_cos_priority_process(adapter, compl);
+		break;
 	case ASYNC_EVENT_QOS_SPEED:
-		be_async_grp5_qos_speed_process(adapter,
-		(struct be_async_event_grp5_qos_link_speed *)evt);
-	break;
+		be_async_grp5_qos_speed_process(adapter, compl);
+		break;
 	case ASYNC_EVENT_PVID_STATE:
-		be_async_grp5_pvid_state_process(adapter,
-		(struct be_async_event_grp5_pvid_state *)evt);
-	break;
+		be_async_grp5_pvid_state_process(adapter, compl);
+		break;
 	default:
 		dev_warn(&adapter->pdev->dev, "Unknown grp5 event 0x%x!\n",
 			 event_type);
@@ -281,13 +316,13 @@
 }
 
 static void be_async_dbg_evt_process(struct be_adapter *adapter,
-		u32 trailer, struct be_mcc_compl *cmp)
+				     struct be_mcc_compl *cmp)
 {
 	u8 event_type = 0;
 	struct be_async_event_qnq *evt = (struct be_async_event_qnq *) cmp;
 
-	event_type = (trailer >> ASYNC_TRAILER_EVENT_TYPE_SHIFT) &
-		ASYNC_TRAILER_EVENT_TYPE_MASK;
+	event_type = (cmp->flags >> ASYNC_EVENT_TYPE_SHIFT) &
+			ASYNC_EVENT_TYPE_MASK;
 
 	switch (event_type) {
 	case ASYNC_DEBUG_EVENT_TYPE_QNQ:
@@ -302,25 +337,33 @@
 	}
 }
 
-static inline bool is_link_state_evt(u32 trailer)
+static inline bool is_link_state_evt(u32 flags)
 {
-	return ((trailer >> ASYNC_TRAILER_EVENT_CODE_SHIFT) &
-		ASYNC_TRAILER_EVENT_CODE_MASK) ==
-				ASYNC_EVENT_CODE_LINK_STATE;
+	return ((flags >> ASYNC_EVENT_CODE_SHIFT) & ASYNC_EVENT_CODE_MASK) ==
+			ASYNC_EVENT_CODE_LINK_STATE;
 }
 
-static inline bool is_grp5_evt(u32 trailer)
+static inline bool is_grp5_evt(u32 flags)
 {
-	return (((trailer >> ASYNC_TRAILER_EVENT_CODE_SHIFT) &
-		ASYNC_TRAILER_EVENT_CODE_MASK) ==
-				ASYNC_EVENT_CODE_GRP_5);
+	return ((flags >> ASYNC_EVENT_CODE_SHIFT) & ASYNC_EVENT_CODE_MASK) ==
+			ASYNC_EVENT_CODE_GRP_5;
 }
 
-static inline bool is_dbg_evt(u32 trailer)
+static inline bool is_dbg_evt(u32 flags)
 {
-	return (((trailer >> ASYNC_TRAILER_EVENT_CODE_SHIFT) &
-		ASYNC_TRAILER_EVENT_CODE_MASK) ==
-				ASYNC_EVENT_CODE_QNQ);
+	return ((flags >> ASYNC_EVENT_CODE_SHIFT) & ASYNC_EVENT_CODE_MASK) ==
+			ASYNC_EVENT_CODE_QNQ;
+}
+
+static void be_mcc_event_process(struct be_adapter *adapter,
+				 struct be_mcc_compl *compl)
+{
+	if (is_link_state_evt(compl->flags))
+		be_async_link_state_process(adapter, compl);
+	else if (is_grp5_evt(compl->flags))
+		be_async_grp5_evt_process(adapter, compl);
+	else if (is_dbg_evt(compl->flags))
+		be_async_dbg_evt_process(adapter, compl);
 }
 
 static struct be_mcc_compl *be_mcc_compl_get(struct be_adapter *adapter)
@@ -362,21 +405,13 @@
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
 	spin_lock(&adapter->mcc_cq_lock);
+
 	while ((compl = be_mcc_compl_get(adapter))) {
 		if (compl->flags & CQE_FLAGS_ASYNC_MASK) {
-			/* Interpret flags as an async trailer */
-			if (is_link_state_evt(compl->flags))
-				be_async_link_state_process(adapter,
-				(struct be_async_event_link_state *) compl);
-			else if (is_grp5_evt(compl->flags))
-				be_async_grp5_evt_process(adapter,
-				compl->flags, compl);
-			else if (is_dbg_evt(compl->flags))
-				be_async_dbg_evt_process(adapter,
-				compl->flags, compl);
+			be_mcc_event_process(adapter, compl);
 		} else if (compl->flags & CQE_FLAGS_COMPLETED_MASK) {
-				status = be_mcc_compl_process(adapter, compl);
-				atomic_dec(&mcc_obj->q.used);
+			status = be_mcc_compl_process(adapter, compl);
+			atomic_dec(&mcc_obj->q.used);
 		}
 		be_mcc_compl_use(compl);
 		num++;
@@ -436,7 +471,9 @@
 	if (status == -EIO)
 		goto out;
 
-	status = resp->status;
+	status = (resp->base_status |
+		  ((resp->addl_status & CQE_ADDL_STATUS_MASK) <<
+		   CQE_ADDL_STATUS_SHIFT));
 out:
 	return status;
 }
@@ -560,10 +597,8 @@
 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
 	sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
 	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
-		sliport_err1 = ioread32(adapter->db +
-					SLIPORT_ERROR1_OFFSET);
-		sliport_err2 = ioread32(adapter->db +
-					SLIPORT_ERROR2_OFFSET);
+		sliport_err1 = ioread32(adapter->db + SLIPORT_ERROR1_OFFSET);
+		sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET);
 
 		if (sliport_err1 == SLIPORT_ERROR_NO_RESOURCE1 &&
 		    sliport_err2 == SLIPORT_ERROR_NO_RESOURCE2)
@@ -630,8 +665,7 @@
 		if (stage == POST_STAGE_ARMFW_RDY)
 			return 0;
 
-		dev_info(dev, "Waiting for POST, %ds elapsed\n",
-			 timeout);
+		dev_info(dev, "Waiting for POST, %ds elapsed\n", timeout);
 		if (msleep_interruptible(2000)) {
 			dev_err(dev, "Waiting for POST aborted\n");
 			return -EINTR;
@@ -649,8 +683,7 @@
 	return &wrb->payload.sgl[0];
 }
 
-static inline void fill_wrb_tags(struct be_mcc_wrb *wrb,
-				 unsigned long addr)
+static inline void fill_wrb_tags(struct be_mcc_wrb *wrb, unsigned long addr)
 {
 	wrb->tag0 = addr & 0xFFFFFFFF;
 	wrb->tag1 = upper_32_bits(addr);
@@ -659,8 +692,9 @@
 /* Don't touch the hdr after it's prepared */
 /* mem will be NULL for embedded commands */
 static void be_wrb_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr,
-				u8 subsystem, u8 opcode, int cmd_len,
-				struct be_mcc_wrb *wrb, struct be_dma_mem *mem)
+				   u8 subsystem, u8 opcode, int cmd_len,
+				   struct be_mcc_wrb *wrb,
+				   struct be_dma_mem *mem)
 {
 	struct be_sge *sge;
 
@@ -683,7 +717,7 @@
 }
 
 static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages,
-			struct be_dma_mem *mem)
+				      struct be_dma_mem *mem)
 {
 	int i, buf_pages = min(PAGES_4K_SPANNED(mem->va, mem->size), max_pages);
 	u64 dma = (u64)mem->dma;
@@ -868,7 +902,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_EQ_CREATE, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_EQ_CREATE, sizeof(*req), wrb,
+			       NULL);
 
 	/* Support for EQ_CREATEv2 available only SH-R onwards */
 	if (!(BEx_chip(adapter) || lancer_chip(adapter)))
@@ -917,7 +952,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_MAC_QUERY, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_NTWK_MAC_QUERY, sizeof(*req), wrb,
+			       NULL);
 	req->type = MAC_ADDRESS_TYPE_NETWORK;
 	if (permanent) {
 		req->permanent = 1;
@@ -940,7 +976,7 @@
 
 /* Uses synchronous MCCQ */
 int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
-		u32 if_id, u32 *pmac_id, u32 domain)
+		    u32 if_id, u32 *pmac_id, u32 domain)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_pmac_add *req;
@@ -956,7 +992,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_PMAC_ADD, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_NTWK_PMAC_ADD, sizeof(*req), wrb,
+			       NULL);
 
 	req->hdr.domain = domain;
 	req->if_id = cpu_to_le32(if_id);
@@ -1012,7 +1049,7 @@
 
 /* Uses Mbox */
 int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq,
-		struct be_queue_info *eq, bool no_delay, int coalesce_wm)
+		     struct be_queue_info *eq, bool no_delay, int coalesce_wm)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_cq_create *req;
@@ -1028,17 +1065,18 @@
 	ctxt = &req->context;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_CQ_CREATE, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_CQ_CREATE, sizeof(*req), wrb,
+			       NULL);
 
 	req->num_pages =  cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size));
 
 	if (BEx_chip(adapter)) {
 		AMAP_SET_BITS(struct amap_cq_context_be, coalescwm, ctxt,
-								coalesce_wm);
+			      coalesce_wm);
 		AMAP_SET_BITS(struct amap_cq_context_be, nodelay,
-								ctxt, no_delay);
+			      ctxt, no_delay);
 		AMAP_SET_BITS(struct amap_cq_context_be, count, ctxt,
-						__ilog2_u32(cq->len/256));
+			      __ilog2_u32(cq->len / 256));
 		AMAP_SET_BITS(struct amap_cq_context_be, valid, ctxt, 1);
 		AMAP_SET_BITS(struct amap_cq_context_be, eventable, ctxt, 1);
 		AMAP_SET_BITS(struct amap_cq_context_be, eqid, ctxt, eq->id);
@@ -1053,14 +1091,12 @@
 			AMAP_SET_BITS(struct amap_cq_context_v2, coalescwm,
 				      ctxt, coalesce_wm);
 		AMAP_SET_BITS(struct amap_cq_context_v2, nodelay, ctxt,
-								no_delay);
+			      no_delay);
 		AMAP_SET_BITS(struct amap_cq_context_v2, count, ctxt,
-						__ilog2_u32(cq->len/256));
+			      __ilog2_u32(cq->len / 256));
 		AMAP_SET_BITS(struct amap_cq_context_v2, valid, ctxt, 1);
-		AMAP_SET_BITS(struct amap_cq_context_v2, eventable,
-								ctxt, 1);
-		AMAP_SET_BITS(struct amap_cq_context_v2, eqid,
-								ctxt, eq->id);
+		AMAP_SET_BITS(struct amap_cq_context_v2, eventable, ctxt, 1);
+		AMAP_SET_BITS(struct amap_cq_context_v2, eqid, ctxt, eq->id);
 	}
 
 	be_dws_cpu_to_le(ctxt, sizeof(req->context));
@@ -1088,8 +1124,8 @@
 }
 
 static int be_cmd_mccq_ext_create(struct be_adapter *adapter,
-				struct be_queue_info *mccq,
-				struct be_queue_info *cq)
+				  struct be_queue_info *mccq,
+				  struct be_queue_info *cq)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_mcc_ext_create *req;
@@ -1105,13 +1141,14 @@
 	ctxt = &req->context;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_MCC_CREATE_EXT, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_MCC_CREATE_EXT, sizeof(*req), wrb,
+			       NULL);
 
 	req->num_pages = cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size));
 	if (BEx_chip(adapter)) {
 		AMAP_SET_BITS(struct amap_mcc_context_be, valid, ctxt, 1);
 		AMAP_SET_BITS(struct amap_mcc_context_be, ring_size, ctxt,
-						be_encoded_q_len(mccq->len));
+			      be_encoded_q_len(mccq->len));
 		AMAP_SET_BITS(struct amap_mcc_context_be, cq_id, ctxt, cq->id);
 	} else {
 		req->hdr.version = 1;
@@ -1145,8 +1182,8 @@
 }
 
 static int be_cmd_mccq_org_create(struct be_adapter *adapter,
-				struct be_queue_info *mccq,
-				struct be_queue_info *cq)
+				  struct be_queue_info *mccq,
+				  struct be_queue_info *cq)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_mcc_create *req;
@@ -1162,13 +1199,14 @@
 	ctxt = &req->context;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_MCC_CREATE, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_MCC_CREATE, sizeof(*req), wrb,
+			       NULL);
 
 	req->num_pages = cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size));
 
 	AMAP_SET_BITS(struct amap_mcc_context_be, valid, ctxt, 1);
 	AMAP_SET_BITS(struct amap_mcc_context_be, ring_size, ctxt,
-			be_encoded_q_len(mccq->len));
+		      be_encoded_q_len(mccq->len));
 	AMAP_SET_BITS(struct amap_mcc_context_be, cq_id, ctxt, cq->id);
 
 	be_dws_cpu_to_le(ctxt, sizeof(req->context));
@@ -1187,8 +1225,7 @@
 }
 
 int be_cmd_mccq_create(struct be_adapter *adapter,
-			struct be_queue_info *mccq,
-			struct be_queue_info *cq)
+		       struct be_queue_info *mccq, struct be_queue_info *cq)
 {
 	int status;
 
@@ -1213,7 +1250,7 @@
 
 	req = embedded_payload(&wrb);
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-				OPCODE_ETH_TX_CREATE, sizeof(*req), &wrb, NULL);
+			       OPCODE_ETH_TX_CREATE, sizeof(*req), &wrb, NULL);
 
 	if (lancer_chip(adapter)) {
 		req->hdr.version = 1;
@@ -1250,8 +1287,8 @@
 
 /* Uses MCC */
 int be_cmd_rxq_create(struct be_adapter *adapter,
-		struct be_queue_info *rxq, u16 cq_id, u16 frag_size,
-		u32 if_id, u32 rss, u8 *rss_id)
+		      struct be_queue_info *rxq, u16 cq_id, u16 frag_size,
+		      u32 if_id, u32 rss, u8 *rss_id)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_eth_rx_create *req;
@@ -1268,7 +1305,7 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-				OPCODE_ETH_RX_CREATE, sizeof(*req), wrb, NULL);
+			       OPCODE_ETH_RX_CREATE, sizeof(*req), wrb, NULL);
 
 	req->cq_id = cpu_to_le16(cq_id);
 	req->frag_size = fls(frag_size) - 1;
@@ -1295,7 +1332,7 @@
  * Uses Mbox
  */
 int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
-		int queue_type)
+		     int queue_type)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_q_destroy *req;
@@ -1334,7 +1371,7 @@
 	}
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, subsys, opcode, sizeof(*req), wrb,
-				NULL);
+			       NULL);
 	req->id = cpu_to_le16(q->id);
 
 	status = be_mbox_notify_wait(adapter);
@@ -1361,7 +1398,7 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-			OPCODE_ETH_RX_DESTROY, sizeof(*req), wrb, NULL);
+			       OPCODE_ETH_RX_DESTROY, sizeof(*req), wrb, NULL);
 	req->id = cpu_to_le16(q->id);
 
 	status = be_mcc_notify_wait(adapter);
@@ -1384,7 +1421,8 @@
 
 	req = embedded_payload(&wrb);
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_INTERFACE_CREATE, sizeof(*req), &wrb, NULL);
+			       OPCODE_COMMON_NTWK_INTERFACE_CREATE,
+			       sizeof(*req), &wrb, NULL);
 	req->hdr.domain = domain;
 	req->capability_flags = cpu_to_le32(cap_flags);
 	req->enable_flags = cpu_to_le32(en_flags);
@@ -1422,7 +1460,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_INTERFACE_DESTROY, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_NTWK_INTERFACE_DESTROY,
+			       sizeof(*req), wrb, NULL);
 	req->hdr.domain = domain;
 	req->interface_id = cpu_to_le32(interface_id);
 
@@ -1452,7 +1491,8 @@
 	hdr = nonemb_cmd->va;
 
 	be_wrb_cmd_hdr_prepare(hdr, CMD_SUBSYSTEM_ETH,
-		OPCODE_ETH_GET_STATISTICS, nonemb_cmd->size, wrb, nonemb_cmd);
+			       OPCODE_ETH_GET_STATISTICS, nonemb_cmd->size, wrb,
+			       nonemb_cmd);
 
 	/* version 1 of the cmd is not supported only by BE2 */
 	if (BE2_chip(adapter))
@@ -1472,7 +1512,7 @@
 
 /* Lancer Stats */
 int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
-				struct be_dma_mem *nonemb_cmd)
+			       struct be_dma_mem *nonemb_cmd)
 {
 
 	struct be_mcc_wrb *wrb;
@@ -1493,8 +1533,8 @@
 	req = nonemb_cmd->va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-			OPCODE_ETH_GET_PPORT_STATS, nonemb_cmd->size, wrb,
-			nonemb_cmd);
+			       OPCODE_ETH_GET_PPORT_STATS, nonemb_cmd->size,
+			       wrb, nonemb_cmd);
 
 	req->cmd_params.params.pport_num = cpu_to_le16(adapter->hba_port_num);
 	req->cmd_params.params.reset_stats = 0;
@@ -1553,7 +1593,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_LINK_STATUS_QUERY, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_NTWK_LINK_STATUS_QUERY,
+			       sizeof(*req), wrb, NULL);
 
 	/* version 1 of the cmd is not supported only by BE2 */
 	if (!BE2_chip(adapter))
@@ -1598,8 +1639,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES, sizeof(*req),
-		wrb, NULL);
+			       OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES,
+			       sizeof(*req), wrb, NULL);
 
 	be_mcc_notify(adapter);
 
@@ -1625,7 +1666,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_MANAGE_FAT, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_MANAGE_FAT, sizeof(*req), wrb,
+			       NULL);
 	req->fat_operation = cpu_to_le32(QUERY_FAT);
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
@@ -1655,8 +1697,8 @@
 
 	get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
 	get_fat_cmd.va = pci_alloc_consistent(adapter->pdev,
-			get_fat_cmd.size,
-			&get_fat_cmd.dma);
+					      get_fat_cmd.size,
+					      &get_fat_cmd.dma);
 	if (!get_fat_cmd.va) {
 		status = -ENOMEM;
 		dev_err(&adapter->pdev->dev,
@@ -1679,8 +1721,8 @@
 
 		payload_len = sizeof(struct be_cmd_req_get_fat) + buf_size;
 		be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-				OPCODE_COMMON_MANAGE_FAT, payload_len, wrb,
-				&get_fat_cmd);
+				       OPCODE_COMMON_MANAGE_FAT, payload_len,
+				       wrb, &get_fat_cmd);
 
 		req->fat_operation = cpu_to_le32(RETRIEVE_FAT);
 		req->read_log_offset = cpu_to_le32(log_offset);
@@ -1691,8 +1733,8 @@
 		if (!status) {
 			struct be_cmd_resp_get_fat *resp = get_fat_cmd.va;
 			memcpy(buf + offset,
-				resp->data_buffer,
-				le32_to_cpu(resp->read_log_length));
+			       resp->data_buffer,
+			       le32_to_cpu(resp->read_log_length));
 		} else {
 			dev_err(&adapter->pdev->dev, "FAT Table Retrieve error\n");
 			goto err;
@@ -1702,14 +1744,13 @@
 	}
 err:
 	pci_free_consistent(adapter->pdev, get_fat_cmd.size,
-			get_fat_cmd.va,
-			get_fat_cmd.dma);
+			    get_fat_cmd.va, get_fat_cmd.dma);
 	spin_unlock_bh(&adapter->mcc_lock);
 }
 
 /* Uses synchronous mcc */
 int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver,
-			char *fw_on_flash)
+		      char *fw_on_flash)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_get_fw_version *req;
@@ -1726,7 +1767,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_GET_FW_VERSION, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_GET_FW_VERSION, sizeof(*req), wrb,
+			       NULL);
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
 		struct be_cmd_resp_get_fw_version *resp = embedded_payload(wrb);
@@ -1759,7 +1801,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_MODIFY_EQ_DELAY, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_MODIFY_EQ_DELAY, sizeof(*req), wrb,
+			       NULL);
 
 	req->num_eq = cpu_to_le32(num);
 	for (i = 0; i < num; i++) {
@@ -1777,7 +1820,7 @@
 
 /* Uses sycnhronous mcc */
 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
-		       u32 num, bool promiscuous)
+		       u32 num)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_vlan_config *req;
@@ -1793,19 +1836,16 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req),
+			       wrb, NULL);
 
 	req->interface_id = if_id;
-	req->promiscuous = promiscuous;
 	req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;
 	req->num_vlan = num;
-	if (!promiscuous) {
-		memcpy(req->normal_vlan, vtag_array,
-			req->num_vlan * sizeof(vtag_array[0]));
-	}
+	memcpy(req->normal_vlan, vtag_array,
+	       req->num_vlan * sizeof(vtag_array[0]));
 
 	status = be_mcc_notify_wait(adapter);
-
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
@@ -1827,18 +1867,19 @@
 	}
 	memset(req, 0, sizeof(*req));
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-				OPCODE_COMMON_NTWK_RX_FILTER, sizeof(*req),
-				wrb, mem);
+			       OPCODE_COMMON_NTWK_RX_FILTER, sizeof(*req),
+			       wrb, mem);
 
 	req->if_id = cpu_to_le32(adapter->if_handle);
 	if (flags & IFF_PROMISC) {
 		req->if_flags_mask = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS |
-					BE_IF_FLAGS_VLAN_PROMISCUOUS |
-					BE_IF_FLAGS_MCAST_PROMISCUOUS);
+						 BE_IF_FLAGS_VLAN_PROMISCUOUS |
+						 BE_IF_FLAGS_MCAST_PROMISCUOUS);
 		if (value == ON)
-			req->if_flags = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS |
-						BE_IF_FLAGS_VLAN_PROMISCUOUS |
-						BE_IF_FLAGS_MCAST_PROMISCUOUS);
+			req->if_flags =
+				cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS |
+					    BE_IF_FLAGS_VLAN_PROMISCUOUS |
+					    BE_IF_FLAGS_MCAST_PROMISCUOUS);
 	} else if (flags & IFF_ALLMULTI) {
 		req->if_flags_mask = req->if_flags =
 				cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS);
@@ -1867,7 +1908,7 @@
 	}
 
 	if ((req->if_flags_mask & cpu_to_le32(be_if_cap_flags(adapter))) !=
-	     req->if_flags_mask) {
+	    req->if_flags_mask) {
 		dev_warn(&adapter->pdev->dev,
 			 "Cannot set rx filter flags 0x%x\n",
 			 req->if_flags_mask);
@@ -1905,7 +1946,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_SET_FLOW_CONTROL, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_SET_FLOW_CONTROL, sizeof(*req),
+			       wrb, NULL);
 
 	req->tx_flow_control = cpu_to_le16((u16)tx_fc);
 	req->rx_flow_control = cpu_to_le16((u16)rx_fc);
@@ -1938,7 +1980,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_GET_FLOW_CONTROL, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_GET_FLOW_CONTROL, sizeof(*req),
+			       wrb, NULL);
 
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
@@ -1968,7 +2011,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_QUERY_FIRMWARE_CONFIG, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_QUERY_FIRMWARE_CONFIG,
+			       sizeof(*req), wrb, NULL);
 
 	status = be_mbox_notify_wait(adapter);
 	if (!status) {
@@ -2011,7 +2055,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(req, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_FUNCTION_RESET, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_FUNCTION_RESET, sizeof(*req), wrb,
+			       NULL);
 
 	status = be_mbox_notify_wait(adapter);
 
@@ -2020,47 +2065,47 @@
 }
 
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-			u32 rss_hash_opts, u16 table_size)
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_rss_config *req;
-	u32 myhash[10] = {0x15d43fa5, 0x2534685a, 0x5f87693a, 0x5668494e,
-			0x33cf6a53, 0x383334c6, 0x76ac4257, 0x59b242b2,
-			0x3ea83c02, 0x4a110304};
 	int status;
 
 	if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
 		return 0;
 
-	if (mutex_lock_interruptible(&adapter->mbox_lock))
-		return -1;
+	spin_lock_bh(&adapter->mcc_lock);
 
-	wrb = wrb_from_mbox(adapter);
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-		OPCODE_ETH_RSS_CONFIG, sizeof(*req), wrb, NULL);
+			       OPCODE_ETH_RSS_CONFIG, sizeof(*req), wrb, NULL);
 
 	req->if_id = cpu_to_le32(adapter->if_handle);
 	req->enable_rss = cpu_to_le16(rss_hash_opts);
 	req->cpu_table_size_log2 = cpu_to_le16(fls(table_size) - 1);
 
-	if (lancer_chip(adapter) || skyhawk_chip(adapter))
+	if (!BEx_chip(adapter))
 		req->hdr.version = 1;
 
 	memcpy(req->cpu_table, rsstable, table_size);
-	memcpy(req->hash, myhash, sizeof(myhash));
+	memcpy(req->hash, rss_hkey, RSS_HASH_KEY_LEN);
 	be_dws_cpu_to_le(req->hash, sizeof(req->hash));
 
-	status = be_mbox_notify_wait(adapter);
-
-	mutex_unlock(&adapter->mbox_lock);
+	status = be_mcc_notify_wait(adapter);
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
 
 /* Uses sync mcc */
 int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num,
-			u8 bcn, u8 sts, u8 state)
+			    u8 bcn, u8 sts, u8 state)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_enable_disable_beacon *req;
@@ -2076,7 +2121,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_ENABLE_DISABLE_BEACON, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_ENABLE_DISABLE_BEACON,
+			       sizeof(*req), wrb, NULL);
 
 	req->port_num = port_num;
 	req->beacon_state = state;
@@ -2107,7 +2153,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_GET_BEACON_STATE, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_GET_BEACON_STATE, sizeof(*req),
+			       wrb, NULL);
 
 	req->port_num = port_num;
 
@@ -2146,20 +2193,20 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-				OPCODE_COMMON_WRITE_OBJECT,
-				sizeof(struct lancer_cmd_req_write_object), wrb,
-				NULL);
+			       OPCODE_COMMON_WRITE_OBJECT,
+			       sizeof(struct lancer_cmd_req_write_object), wrb,
+			       NULL);
 
 	ctxt = &req->context;
 	AMAP_SET_BITS(struct amap_lancer_write_obj_context,
-			write_length, ctxt, data_size);
+		      write_length, ctxt, data_size);
 
 	if (data_size == 0)
 		AMAP_SET_BITS(struct amap_lancer_write_obj_context,
-				eof, ctxt, 1);
+			      eof, ctxt, 1);
 	else
 		AMAP_SET_BITS(struct amap_lancer_write_obj_context,
-				eof, ctxt, 0);
+			      eof, ctxt, 0);
 
 	be_dws_cpu_to_le(ctxt, sizeof(req->context));
 	req->write_offset = cpu_to_le32(data_offset);
@@ -2167,8 +2214,8 @@
 	req->descriptor_count = cpu_to_le32(1);
 	req->buf_len = cpu_to_le32(data_size);
 	req->addr_low = cpu_to_le32((cmd->dma +
-				sizeof(struct lancer_cmd_req_write_object))
-				& 0xFFFFFFFF);
+				     sizeof(struct lancer_cmd_req_write_object))
+				    & 0xFFFFFFFF);
 	req->addr_high = cpu_to_le32(upper_32_bits(cmd->dma +
 				sizeof(struct lancer_cmd_req_write_object)));
 
@@ -2197,8 +2244,8 @@
 }
 
 int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
-		u32 data_size, u32 data_offset, const char *obj_name,
-		u32 *data_read, u32 *eof, u8 *addn_status)
+			   u32 data_size, u32 data_offset, const char *obj_name,
+			   u32 *data_read, u32 *eof, u8 *addn_status)
 {
 	struct be_mcc_wrb *wrb;
 	struct lancer_cmd_req_read_object *req;
@@ -2216,9 +2263,9 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_READ_OBJECT,
-			sizeof(struct lancer_cmd_req_read_object), wrb,
-			NULL);
+			       OPCODE_COMMON_READ_OBJECT,
+			       sizeof(struct lancer_cmd_req_read_object), wrb,
+			       NULL);
 
 	req->desired_read_len = cpu_to_le32(data_size);
 	req->read_offset = cpu_to_le32(data_offset);
@@ -2244,7 +2291,7 @@
 }
 
 int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd,
-			u32 flash_type, u32 flash_opcode, u32 buf_size)
+			  u32 flash_type, u32 flash_opcode, u32 buf_size)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_write_flashrom *req;
@@ -2261,7 +2308,8 @@
 	req = cmd->va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_WRITE_FLASHROM, cmd->size, wrb, cmd);
+			       OPCODE_COMMON_WRITE_FLASHROM, cmd->size, wrb,
+			       cmd);
 
 	req->params.op_type = cpu_to_le32(flash_type);
 	req->params.op_code = cpu_to_le32(flash_opcode);
@@ -2284,7 +2332,7 @@
 }
 
 int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc,
-			 int offset)
+			  u16 optype, int offset)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_read_flash_crc *req;
@@ -2303,7 +2351,7 @@
 			       OPCODE_COMMON_READ_FLASHROM, sizeof(*req),
 			       wrb, NULL);
 
-	req->params.op_type = cpu_to_le32(OPTYPE_REDBOOT);
+	req->params.op_type = cpu_to_le32(optype);
 	req->params.op_code = cpu_to_le32(FLASHROM_OPER_REPORT);
 	req->params.offset = cpu_to_le32(offset);
 	req->params.data_buf_size = cpu_to_le32(0x4);
@@ -2318,7 +2366,7 @@
 }
 
 int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac,
-				struct be_dma_mem *nonemb_cmd)
+			    struct be_dma_mem *nonemb_cmd)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_acpi_wol_magic_config *req;
@@ -2334,8 +2382,8 @@
 	req = nonemb_cmd->va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
-		OPCODE_ETH_ACPI_WOL_MAGIC_CONFIG, sizeof(*req), wrb,
-		nonemb_cmd);
+			       OPCODE_ETH_ACPI_WOL_MAGIC_CONFIG, sizeof(*req),
+			       wrb, nonemb_cmd);
 	memcpy(req->magic_mac, mac, ETH_ALEN);
 
 	status = be_mcc_notify_wait(adapter);
@@ -2363,8 +2411,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL,
-			OPCODE_LOWLEVEL_SET_LOOPBACK_MODE, sizeof(*req), wrb,
-			NULL);
+			       OPCODE_LOWLEVEL_SET_LOOPBACK_MODE, sizeof(*req),
+			       wrb, NULL);
 
 	req->src_port = port_num;
 	req->dest_port = port_num;
@@ -2378,7 +2426,8 @@
 }
 
 int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
-		u32 loopback_type, u32 pkt_size, u32 num_pkts, u64 pattern)
+			 u32 loopback_type, u32 pkt_size, u32 num_pkts,
+			 u64 pattern)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_loopback_test *req;
@@ -2396,7 +2445,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL,
-			OPCODE_LOWLEVEL_LOOPBACK_TEST, sizeof(*req), wrb, NULL);
+			       OPCODE_LOWLEVEL_LOOPBACK_TEST, sizeof(*req), wrb,
+			       NULL);
 
 	req->hdr.timeout = cpu_to_le32(15);
 	req->pattern = cpu_to_le64(pattern);
@@ -2421,7 +2471,7 @@
 }
 
 int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern,
-				u32 byte_cnt, struct be_dma_mem *cmd)
+			u32 byte_cnt, struct be_dma_mem *cmd)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_ddrdma_test *req;
@@ -2437,7 +2487,8 @@
 	}
 	req = cmd->va;
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL,
-			OPCODE_LOWLEVEL_HOST_DDR_DMA, cmd->size, wrb, cmd);
+			       OPCODE_LOWLEVEL_HOST_DDR_DMA, cmd->size, wrb,
+			       cmd);
 
 	req->pattern = cpu_to_le64(pattern);
 	req->byte_count = cpu_to_le32(byte_cnt);
@@ -2465,7 +2516,7 @@
 }
 
 int be_cmd_get_seeprom_data(struct be_adapter *adapter,
-				struct be_dma_mem *nonemb_cmd)
+			    struct be_dma_mem *nonemb_cmd)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_seeprom_read *req;
@@ -2481,8 +2532,8 @@
 	req = nonemb_cmd->va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_SEEPROM_READ, sizeof(*req), wrb,
-			nonemb_cmd);
+			       OPCODE_COMMON_SEEPROM_READ, sizeof(*req), wrb,
+			       nonemb_cmd);
 
 	status = be_mcc_notify_wait(adapter);
 
@@ -2510,8 +2561,7 @@
 		goto err;
 	}
 	cmd.size = sizeof(struct be_cmd_req_get_phy_info);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size,
-					&cmd.dma);
+	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -2521,8 +2571,8 @@
 	req = cmd.va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_GET_PHY_DETAILS, sizeof(*req),
-			wrb, &cmd);
+			       OPCODE_COMMON_GET_PHY_DETAILS, sizeof(*req),
+			       wrb, &cmd);
 
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
@@ -2544,8 +2594,7 @@
 				BE_SUPPORTED_SPEED_1GBPS;
 		}
 	}
-	pci_free_consistent(adapter->pdev, cmd.size,
-				cmd.va, cmd.dma);
+	pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
@@ -2568,7 +2617,7 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_SET_QOS, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_SET_QOS, sizeof(*req), wrb, NULL);
 
 	req->hdr.domain = domain;
 	req->valid_bits = cpu_to_le32(BE_QOS_BITS_NIC);
@@ -2597,10 +2646,9 @@
 	memset(&attribs_cmd, 0, sizeof(struct be_dma_mem));
 	attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs);
 	attribs_cmd.va = pci_alloc_consistent(adapter->pdev, attribs_cmd.size,
-						&attribs_cmd.dma);
+					      &attribs_cmd.dma);
 	if (!attribs_cmd.va) {
-		dev_err(&adapter->pdev->dev,
-				"Memory allocation failure\n");
+		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
 		goto err;
 	}
@@ -2613,8 +2661,8 @@
 	req = attribs_cmd.va;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			 OPCODE_COMMON_GET_CNTL_ATTRIBUTES, payload_len, wrb,
-			&attribs_cmd);
+			       OPCODE_COMMON_GET_CNTL_ATTRIBUTES, payload_len,
+			       wrb, &attribs_cmd);
 
 	status = be_mbox_notify_wait(adapter);
 	if (!status) {
@@ -2649,7 +2697,8 @@
 	req = embedded_payload(wrb);
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-		OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP,
+			       sizeof(*req), wrb, NULL);
 
 	req->valid_cap_flags = cpu_to_le32(CAPABILITY_SW_TIMESTAMPS |
 				CAPABILITY_BE3_NATIVE_ERX_API);
@@ -2762,12 +2811,12 @@
 	memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem));
 	get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list);
 	get_mac_list_cmd.va = pci_alloc_consistent(adapter->pdev,
-			get_mac_list_cmd.size,
-			&get_mac_list_cmd.dma);
+						   get_mac_list_cmd.size,
+						   &get_mac_list_cmd.dma);
 
 	if (!get_mac_list_cmd.va) {
 		dev_err(&adapter->pdev->dev,
-				"Memory allocation failure during GET_MAC_LIST\n");
+			"Memory allocation failure during GET_MAC_LIST\n");
 		return -ENOMEM;
 	}
 
@@ -2831,18 +2880,18 @@
 		/* If no active mac_id found, return first mac addr */
 		*pmac_id_valid = false;
 		memcpy(mac, resp->macaddr_list[0].mac_addr_id.macaddr,
-								ETH_ALEN);
+		       ETH_ALEN);
 	}
 
 out:
 	spin_unlock_bh(&adapter->mcc_lock);
 	pci_free_consistent(adapter->pdev, get_mac_list_cmd.size,
-			get_mac_list_cmd.va, get_mac_list_cmd.dma);
+			    get_mac_list_cmd.va, get_mac_list_cmd.dma);
 	return status;
 }
 
-int be_cmd_get_active_mac(struct be_adapter *adapter, u32 curr_pmac_id, u8 *mac,
-			  u32 if_handle, bool active, u32 domain)
+int be_cmd_get_active_mac(struct be_adapter *adapter, u32 curr_pmac_id,
+			  u8 *mac, u32 if_handle, bool active, u32 domain)
 {
 
 	if (!active)
@@ -2892,7 +2941,7 @@
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_req_set_mac_list);
 	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size,
-			&cmd.dma, GFP_KERNEL);
+				    &cmd.dma, GFP_KERNEL);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -2906,8 +2955,8 @@
 
 	req = cmd.va;
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-				OPCODE_COMMON_SET_MAC_LIST, sizeof(*req),
-				wrb, &cmd);
+			       OPCODE_COMMON_SET_MAC_LIST, sizeof(*req),
+			       wrb, &cmd);
 
 	req->hdr.domain = domain;
 	req->mac_count = mac_count;
@@ -2917,8 +2966,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	dma_free_coherent(&adapter->pdev->dev, cmd.size,
-				cmd.va, cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
@@ -2963,7 +3011,8 @@
 	ctxt = &req->context;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_SET_HSW_CONFIG, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_SET_HSW_CONFIG, sizeof(*req), wrb,
+			       NULL);
 
 	req->hdr.domain = domain;
 	AMAP_SET_BITS(struct amap_set_hsw_context, interface_id, ctxt, intf_id);
@@ -3009,7 +3058,8 @@
 	ctxt = &req->context;
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			OPCODE_COMMON_GET_HSW_CONFIG, sizeof(*req), wrb, NULL);
+			       OPCODE_COMMON_GET_HSW_CONFIG, sizeof(*req), wrb,
+			       NULL);
 
 	req->hdr.domain = domain;
 	AMAP_SET_BITS(struct amap_get_hsw_req_context, interface_id,
@@ -3027,10 +3077,9 @@
 	if (!status) {
 		struct be_cmd_resp_get_hsw_config *resp =
 						embedded_payload(wrb);
-		be_dws_le_to_cpu(&resp->context,
-						sizeof(resp->context));
+		be_dws_le_to_cpu(&resp->context, sizeof(resp->context));
 		vid = AMAP_GET_BITS(struct amap_get_hsw_resp_context,
-							pvid, &resp->context);
+				    pvid, &resp->context);
 		if (pvid)
 			*pvid = le16_to_cpu(vid);
 		if (mode)
@@ -3062,11 +3111,9 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size,
-					       &cmd.dma);
+	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
 	if (!cmd.va) {
-		dev_err(&adapter->pdev->dev,
-				"Memory allocation failure\n");
+		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
 		goto err;
 	}
@@ -3349,8 +3396,7 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_get_func_config);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size,
-				      &cmd.dma);
+	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -3396,7 +3442,7 @@
 
 /* Uses mbox */
 static int be_cmd_get_profile_config_mbox(struct be_adapter *adapter,
-					u8 domain, struct be_dma_mem *cmd)
+					  u8 domain, struct be_dma_mem *cmd)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_get_profile_config *req;
@@ -3424,7 +3470,7 @@
 
 /* Uses sync mcc */
 static int be_cmd_get_profile_config_mccq(struct be_adapter *adapter,
-					u8 domain, struct be_dma_mem *cmd)
+					  u8 domain, struct be_dma_mem *cmd)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_get_profile_config *req;
@@ -3484,8 +3530,8 @@
 	resp = cmd.va;
 	desc_count = le32_to_cpu(resp->desc_count);
 
-	pcie =  be_get_pcie_desc(adapter->pdev->devfn, resp->func_param,
-				 desc_count);
+	pcie = be_get_pcie_desc(adapter->pdev->devfn, resp->func_param,
+				desc_count);
 	if (pcie)
 		res->max_vfs = le16_to_cpu(pcie->num_vfs);
 
@@ -3548,33 +3594,47 @@
 	nic->cq_count = 0xFFFF;
 	nic->toe_conn_count = 0xFFFF;
 	nic->eq_count = 0xFFFF;
+	nic->iface_count = 0xFFFF;
 	nic->link_param = 0xFF;
+	nic->channel_id_param = cpu_to_le16(0xF000);
 	nic->acpi_params = 0xFF;
 	nic->wol_param = 0x0F;
-	nic->bw_min = 0xFFFFFFFF;
+	nic->tunnel_iface_count = 0xFFFF;
+	nic->direct_tenant_iface_count = 0xFFFF;
 	nic->bw_max = 0xFFFFFFFF;
 }
 
-int be_cmd_config_qos(struct be_adapter *adapter, u32 bps, u8 domain)
+int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate, u16 link_speed,
+		      u8 domain)
 {
-	if (lancer_chip(adapter)) {
-		struct be_nic_res_desc nic_desc;
+	struct be_nic_res_desc nic_desc;
+	u32 bw_percent;
+	u16 version = 0;
 
-		be_reset_nic_desc(&nic_desc);
+	if (BE3_chip(adapter))
+		return be_cmd_set_qos(adapter, max_rate / 10, domain);
+
+	be_reset_nic_desc(&nic_desc);
+	nic_desc.pf_num = adapter->pf_number;
+	nic_desc.vf_num = domain;
+	if (lancer_chip(adapter)) {
 		nic_desc.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V0;
 		nic_desc.hdr.desc_len = RESOURCE_DESC_SIZE_V0;
 		nic_desc.flags = (1 << QUN_SHIFT) | (1 << IMM_SHIFT) |
 					(1 << NOSV_SHIFT);
-		nic_desc.pf_num = adapter->pf_number;
-		nic_desc.vf_num = domain;
-		nic_desc.bw_max = cpu_to_le32(bps);
-
-		return be_cmd_set_profile_config(adapter, &nic_desc,
-						 RESOURCE_DESC_SIZE_V0,
-						 0, domain);
+		nic_desc.bw_max = cpu_to_le32(max_rate / 10);
 	} else {
-		return be_cmd_set_qos(adapter, bps, domain);
+		version = 1;
+		nic_desc.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V1;
+		nic_desc.hdr.desc_len = RESOURCE_DESC_SIZE_V1;
+		nic_desc.flags = (1 << IMM_SHIFT) | (1 << NOSV_SHIFT);
+		bw_percent = max_rate ? (max_rate * 100) / link_speed : 100;
+		nic_desc.bw_max = cpu_to_le32(bw_percent);
 	}
+
+	return be_cmd_set_profile_config(adapter, &nic_desc,
+					 nic_desc.hdr.desc_len,
+					 version, domain);
 }
 
 int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op)
@@ -3859,7 +3919,7 @@
 }
 
 int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
-			int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
+		    int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
 {
 	struct be_adapter *adapter = netdev_priv(netdev_handle);
 	struct be_mcc_wrb *wrb;

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index b60e4d5..3e0a6b2 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h

@@ -50,7 +50,7 @@
 #define CQE_FLAGS_CONSUMED_MASK 	(1 << 27)
 
 /* Completion Status */
-enum {
+enum mcc_base_status {
 	MCC_STATUS_SUCCESS = 0,
 	MCC_STATUS_FAILED = 1,
 	MCC_STATUS_ILLEGAL_REQUEST = 2,
@@ -60,12 +60,25 @@
 	MCC_STATUS_NOT_SUPPORTED = 66
 };
 
-#define MCC_ADDL_STS_INSUFFICIENT_RESOURCES	0x16
+/* Additional status */
+enum mcc_addl_status {
+	MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES = 0x16,
+	MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH = 0x4d,
+	MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a
+};
 
-#define CQE_STATUS_COMPL_MASK		0xFFFF
-#define CQE_STATUS_COMPL_SHIFT		0	/* bits 0 - 15 */
-#define CQE_STATUS_EXTD_MASK		0xFFFF
-#define CQE_STATUS_EXTD_SHIFT		16	/* bits 16 - 31 */
+#define CQE_BASE_STATUS_MASK		0xFFFF
+#define CQE_BASE_STATUS_SHIFT		0	/* bits 0 - 15 */
+#define CQE_ADDL_STATUS_MASK		0xFF
+#define CQE_ADDL_STATUS_SHIFT		16	/* bits 16 - 31 */
+
+#define base_status(status)		\
+		((enum mcc_base_status)	\
+			(status > 0 ? (status & CQE_BASE_STATUS_MASK) : 0))
+#define addl_status(status)		\
+		((enum mcc_addl_status)	\
+			(status > 0 ? (status >> CQE_ADDL_STATUS_SHIFT) & \
+					CQE_ADDL_STATUS_MASK : 0))
 
 struct be_mcc_compl {
 	u32 status;		/* dword 0 */
@@ -74,13 +87,13 @@
 	u32 flags;		/* dword 3 */
 };
 
-/* When the async bit of mcc_compl is set, the last 4 bytes of
- * mcc_compl is interpreted as follows:
+/* When the async bit of mcc_compl flags is set, flags
+ * is interpreted as follows:
  */
-#define ASYNC_TRAILER_EVENT_CODE_SHIFT	8	/* bits 8 - 15 */
-#define ASYNC_TRAILER_EVENT_CODE_MASK	0xFF
-#define ASYNC_TRAILER_EVENT_TYPE_SHIFT	16
-#define ASYNC_TRAILER_EVENT_TYPE_MASK	0xFF
+#define ASYNC_EVENT_CODE_SHIFT		8	/* bits 8 - 15 */
+#define ASYNC_EVENT_CODE_MASK		0xFF
+#define ASYNC_EVENT_TYPE_SHIFT		16
+#define ASYNC_EVENT_TYPE_MASK		0xFF
 #define ASYNC_EVENT_CODE_LINK_STATE	0x1
 #define ASYNC_EVENT_CODE_GRP_5		0x5
 #define ASYNC_EVENT_QOS_SPEED		0x1
@@ -89,10 +102,6 @@
 #define ASYNC_EVENT_CODE_QNQ		0x6
 #define ASYNC_DEBUG_EVENT_TYPE_QNQ	1
 
-struct be_async_event_trailer {
-	u32 code;
-};
-
 enum {
 	LINK_DOWN	= 0x0,
 	LINK_UP		= 0x1
@@ -100,7 +109,7 @@
 #define LINK_STATUS_MASK			0x1
 #define LOGICAL_LINK_STATUS_MASK		0x2
 
-/* When the event code of an async trailer is link-state, the mcc_compl
+/* When the event code of compl->flags is link-state, the mcc_compl
  * must be interpreted as follows
  */
 struct be_async_event_link_state {
@@ -110,10 +119,10 @@
 	u8 port_speed;
 	u8 port_fault;
 	u8 rsvd0[7];
-	struct be_async_event_trailer trailer;
+	u32 flags;
 } __packed;
 
-/* When the event code of an async trailer is GRP-5 and event_type is QOS_SPEED
+/* When the event code of compl->flags is GRP-5 and event_type is QOS_SPEED
  * the mcc_compl must be interpreted as follows
  */
 struct be_async_event_grp5_qos_link_speed {
@@ -121,10 +130,10 @@
 	u8 rsvd[5];
 	u16 qos_link_speed;
 	u32 event_tag;
-	struct be_async_event_trailer trailer;
+	u32 flags;
 } __packed;
 
-/* When the event code of an async trailer is GRP5 and event type is
+/* When the event code of compl->flags is GRP5 and event type is
  * CoS-Priority, the mcc_compl must be interpreted as follows
  */
 struct be_async_event_grp5_cos_priority {
@@ -134,10 +143,10 @@
 	u8 valid;
 	u8 rsvd0;
 	u8 event_tag;
-	struct be_async_event_trailer trailer;
+	u32 flags;
 } __packed;
 
-/* When the event code of an async trailer is GRP5 and event type is
+/* When the event code of compl->flags is GRP5 and event type is
  * PVID state, the mcc_compl must be interpreted as follows
  */
 struct be_async_event_grp5_pvid_state {
@@ -146,7 +155,7 @@
 	u16 tag;
 	u32 event_tag;
 	u32 rsvd1;
-	struct be_async_event_trailer trailer;
+	u32 flags;
 } __packed;
 
 /* async event indicating outer VLAN tag in QnQ */
@@ -156,7 +165,7 @@
 	u16 vlan_tag;
 	u32 event_tag;
 	u8 rsvd1[4];
-	struct be_async_event_trailer trailer;
+	u32 flags;
 } __packed;
 
 struct be_mcc_mailbox {
@@ -258,8 +267,8 @@
 	u8 opcode;		/* dword 0 */
 	u8 subsystem;		/* dword 0 */
 	u8 rsvd[2];		/* dword 0 */
-	u8 status;		/* dword 1 */
-	u8 add_status;		/* dword 1 */
+	u8 base_status;		/* dword 1 */
+	u8 addl_status;		/* dword 1 */
 	u8 rsvd1[2];		/* dword 1 */
 	u32 response_length;	/* dword 2 */
 	u32 actual_resp_len;	/* dword 3 */
@@ -1186,7 +1195,8 @@
 	struct flashrom_params params;
 	u8 crc[4];
 	u8 rsvd[4];
-};
+} __packed;
+
 /**************** Lancer Firmware Flash ************/
 struct amap_lancer_write_obj_context {
 	u8 write_length[24];
@@ -1891,16 +1901,20 @@
 	u16 cq_count;
 	u16 toe_conn_count;
 	u16 eq_count;
-	u32 rsvd5;
+	u16 vlan_id;
+	u16 iface_count;
 	u32 cap_flags;
 	u8 link_param;
-	u8 rsvd6[3];
+	u8 rsvd6;
+	u16 channel_id_param;
 	u32 bw_min;
 	u32 bw_max;
 	u8 acpi_params;
 	u8 wol_param;
 	u16 rsvd7;
-	u32 rsvd8[7];
+	u16 tunnel_iface_count;
+	u16 direct_tenant_iface_count;
+	u32 rsvd8[6];
 } __packed;
 
 /************ Multi-Channel type ***********/
@@ -2060,7 +2074,7 @@
 		      char *fw_on_flash);
 int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num);
 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
-		       u32 num, bool promiscuous);
+		       u32 num);
 int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status);
 int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc);
 int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc);
@@ -2068,7 +2082,7 @@
 			u32 *function_mode, u32 *function_caps, u16 *asic_rev);
 int be_cmd_reset_function(struct be_adapter *adapter);
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-		      u32 rss_hash_opts, u16 table_size);
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey);
 int be_process_mcc(struct be_adapter *adapter);
 int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
 			    u8 status, u8 state);
@@ -2084,7 +2098,7 @@
 			   u32 data_size, u32 data_offset, const char *obj_name,
 			   u32 *data_read, u32 *eof, u8 *addn_status);
 int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc,
-			 int offset);
+			  u16 optype, int offset);
 int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac,
 			    struct be_dma_mem *nonemb_cmd);
 int be_cmd_fw_init(struct be_adapter *adapter);
@@ -2101,7 +2115,8 @@
 int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
 			u8 loopback_type, u8 enable);
 int be_cmd_get_phy_info(struct be_adapter *adapter);
-int be_cmd_config_qos(struct be_adapter *adapter, u32 bps, u8 domain);
+int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate,
+		      u16 link_speed, u8 domain);
 void be_detect_error(struct be_adapter *adapter);
 int be_cmd_get_die_temperature(struct be_adapter *adapter);
 int be_cmd_get_cntl_attributes(struct be_adapter *adapter);

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 15ba96c..e2da4d2 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c

@@ -132,6 +132,7 @@
 	{DRVSTAT_RX_INFO(rx_bytes)},/* If moving this member see above note */
 	{DRVSTAT_RX_INFO(rx_pkts)}, /* If moving this member see above note */
 	{DRVSTAT_RX_INFO(rx_compl)},
+	{DRVSTAT_RX_INFO(rx_compl_err)},
 	{DRVSTAT_RX_INFO(rx_mcast_pkts)},
 	/* Number of page allocation failures while posting receive buffers
 	 * to HW.
@@ -181,7 +182,7 @@
 #define BE_NO_LOOPBACK 0xff
 
 static void be_get_drvinfo(struct net_device *netdev,
-				struct ethtool_drvinfo *drvinfo)
+			   struct ethtool_drvinfo *drvinfo)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -201,8 +202,7 @@
 	drvinfo->eedump_len = 0;
 }
 
-static u32
-lancer_cmd_get_file_len(struct be_adapter *adapter, u8 *file_name)
+static u32 lancer_cmd_get_file_len(struct be_adapter *adapter, u8 *file_name)
 {
 	u32 data_read = 0, eof;
 	u8 addn_status;
@@ -212,14 +212,14 @@
 	memset(&data_len_cmd, 0, sizeof(data_len_cmd));
 	/* data_offset and data_size should be 0 to get reg len */
 	status = lancer_cmd_read_object(adapter, &data_len_cmd, 0, 0,
-				file_name, &data_read, &eof, &addn_status);
+					file_name, &data_read, &eof,
+					&addn_status);
 
 	return data_read;
 }
 
-static int
-lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
-		u32 buf_len, void *buf)
+static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
+				u32 buf_len, void *buf)
 {
 	struct be_dma_mem read_cmd;
 	u32 read_len = 0, total_read_len = 0, chunk_size;
@@ -229,11 +229,11 @@
 
 	read_cmd.size = LANCER_READ_FILE_CHUNK;
 	read_cmd.va = pci_alloc_consistent(adapter->pdev, read_cmd.size,
-			&read_cmd.dma);
+					   &read_cmd.dma);
 
 	if (!read_cmd.va) {
 		dev_err(&adapter->pdev->dev,
-				"Memory allocation failure while reading dump\n");
+			"Memory allocation failure while reading dump\n");
 		return -ENOMEM;
 	}
 
@@ -242,8 +242,8 @@
 				LANCER_READ_FILE_CHUNK);
 		chunk_size = ALIGN(chunk_size, 4);
 		status = lancer_cmd_read_object(adapter, &read_cmd, chunk_size,
-				total_read_len, file_name, &read_len,
-				&eof, &addn_status);
+						total_read_len, file_name,
+						&read_len, &eof, &addn_status);
 		if (!status) {
 			memcpy(buf + total_read_len, read_cmd.va, read_len);
 			total_read_len += read_len;
@@ -254,13 +254,12 @@
 		}
 	}
 	pci_free_consistent(adapter->pdev, read_cmd.size, read_cmd.va,
-			read_cmd.dma);
+			    read_cmd.dma);
 
 	return status;
 }
 
-static int
-be_get_reg_len(struct net_device *netdev)
+static int be_get_reg_len(struct net_device *netdev)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	u32 log_size = 0;
@@ -271,7 +270,7 @@
 	if (be_physfn(adapter)) {
 		if (lancer_chip(adapter))
 			log_size = lancer_cmd_get_file_len(adapter,
-					LANCER_FW_DUMP_FILE);
+							   LANCER_FW_DUMP_FILE);
 		else
 			be_cmd_get_reg_len(adapter, &log_size);
 	}
@@ -287,7 +286,7 @@
 		memset(buf, 0, regs->len);
 		if (lancer_chip(adapter))
 			lancer_cmd_read_file(adapter, LANCER_FW_DUMP_FILE,
-					regs->len, buf);
+					     regs->len, buf);
 		else
 			be_cmd_get_regs(adapter, regs->len, buf);
 	}
@@ -337,9 +336,8 @@
 	return 0;
 }
 
-static void
-be_get_ethtool_stats(struct net_device *netdev,
-		struct ethtool_stats *stats, uint64_t *data)
+static void be_get_ethtool_stats(struct net_device *netdev,
+				 struct ethtool_stats *stats, uint64_t *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_rx_obj *rxo;
@@ -390,9 +388,8 @@
 	}
 }
 
-static void
-be_get_stat_strings(struct net_device *netdev, uint32_t stringset,
-		uint8_t *data)
+static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset,
+				uint8_t *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int i, j;
@@ -642,16 +639,15 @@
 	adapter->rx_fc = ecmd->rx_pause;
 
 	status = be_cmd_set_flow_control(adapter,
-					adapter->tx_fc, adapter->rx_fc);
+					 adapter->tx_fc, adapter->rx_fc);
 	if (status)
 		dev_warn(&adapter->pdev->dev, "Pause param set failed.\n");
 
 	return status;
 }
 
-static int
-be_set_phys_id(struct net_device *netdev,
-	       enum ethtool_phys_id_state state)
+static int be_set_phys_id(struct net_device *netdev,
+			  enum ethtool_phys_id_state state)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -708,8 +704,7 @@
 	return status;
 }
 
-static void
-be_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+static void be_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -723,8 +718,7 @@
 	memset(&wol->sopass, 0, sizeof(wol->sopass));
 }
 
-static int
-be_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+static int be_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -744,8 +738,7 @@
 	return 0;
 }
 
-static int
-be_test_ddr_dma(struct be_adapter *adapter)
+static int be_test_ddr_dma(struct be_adapter *adapter)
 {
 	int ret, i;
 	struct be_dma_mem ddrdma_cmd;
@@ -761,7 +754,7 @@
 
 	for (i = 0; i < 2; i++) {
 		ret = be_cmd_ddr_dma_test(adapter, pattern[i],
-					4096, &ddrdma_cmd);
+					  4096, &ddrdma_cmd);
 		if (ret != 0)
 			goto err;
 	}
@@ -773,20 +766,17 @@
 }
 
 static u64 be_loopback_test(struct be_adapter *adapter, u8 loopback_type,
-				u64 *status)
+			    u64 *status)
 {
-	be_cmd_set_loopback(adapter, adapter->hba_port_num,
-				loopback_type, 1);
+	be_cmd_set_loopback(adapter, adapter->hba_port_num, loopback_type, 1);
 	*status = be_cmd_loopback_test(adapter, adapter->hba_port_num,
-				loopback_type, 1500,
-				2, 0xabc);
-	be_cmd_set_loopback(adapter, adapter->hba_port_num,
-				BE_NO_LOOPBACK, 1);
+				       loopback_type, 1500, 2, 0xabc);
+	be_cmd_set_loopback(adapter, adapter->hba_port_num, BE_NO_LOOPBACK, 1);
 	return *status;
 }
 
-static void
-be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
+static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
+			 u64 *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status;
@@ -801,12 +791,10 @@
 	memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
 
 	if (test->flags & ETH_TEST_FL_OFFLINE) {
-		if (be_loopback_test(adapter, BE_MAC_LOOPBACK,
-				     &data[0]) != 0)
+		if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0)
 			test->flags |= ETH_TEST_FL_FAILED;
 
-		if (be_loopback_test(adapter, BE_PHY_LOOPBACK,
-				     &data[1]) != 0)
+		if (be_loopback_test(adapter, BE_PHY_LOOPBACK, &data[1]) != 0)
 			test->flags |= ETH_TEST_FL_FAILED;
 
 		if (test->flags & ETH_TEST_FL_EXTERNAL_LB) {
@@ -832,16 +820,14 @@
 	}
 }
 
-static int
-be_do_flash(struct net_device *netdev, struct ethtool_flash *efl)
+static int be_do_flash(struct net_device *netdev, struct ethtool_flash *efl)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	return be_load_fw(adapter, efl->data);
 }
 
-static int
-be_get_eeprom_len(struct net_device *netdev)
+static int be_get_eeprom_len(struct net_device *netdev)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -851,18 +837,17 @@
 	if (lancer_chip(adapter)) {
 		if (be_physfn(adapter))
 			return lancer_cmd_get_file_len(adapter,
-					LANCER_VPD_PF_FILE);
+						       LANCER_VPD_PF_FILE);
 		else
 			return lancer_cmd_get_file_len(adapter,
-					LANCER_VPD_VF_FILE);
+						       LANCER_VPD_VF_FILE);
 	} else {
 		return BE_READ_SEEPROM_LEN;
 	}
 }
 
-static int
-be_read_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
-			uint8_t *data)
+static int be_read_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, uint8_t *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_dma_mem eeprom_cmd;
@@ -875,10 +860,10 @@
 	if (lancer_chip(adapter)) {
 		if (be_physfn(adapter))
 			return lancer_cmd_read_file(adapter, LANCER_VPD_PF_FILE,
-					eeprom->len, data);
+						    eeprom->len, data);
 		else
 			return lancer_cmd_read_file(adapter, LANCER_VPD_VF_FILE,
-					eeprom->len, data);
+						    eeprom->len, data);
 	}
 
 	eeprom->magic = BE_VENDOR_ID | (adapter->pdev->device<<16);
@@ -933,27 +918,27 @@
 
 	switch (flow_type) {
 	case TCP_V4_FLOW:
-		if (adapter->rss_flags & RSS_ENABLE_IPV4)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4)
 			data |= RXH_IP_DST | RXH_IP_SRC;
-		if (adapter->rss_flags & RSS_ENABLE_TCP_IPV4)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV4)
 			data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 		break;
 	case UDP_V4_FLOW:
-		if (adapter->rss_flags & RSS_ENABLE_IPV4)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4)
 			data |= RXH_IP_DST | RXH_IP_SRC;
-		if (adapter->rss_flags & RSS_ENABLE_UDP_IPV4)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV4)
 			data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 		break;
 	case TCP_V6_FLOW:
-		if (adapter->rss_flags & RSS_ENABLE_IPV6)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6)
 			data |= RXH_IP_DST | RXH_IP_SRC;
-		if (adapter->rss_flags & RSS_ENABLE_TCP_IPV6)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV6)
 			data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 		break;
 	case UDP_V6_FLOW:
-		if (adapter->rss_flags & RSS_ENABLE_IPV6)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6)
 			data |= RXH_IP_DST | RXH_IP_SRC;
-		if (adapter->rss_flags & RSS_ENABLE_UDP_IPV6)
+		if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV6)
 			data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 		break;
 	}
@@ -962,7 +947,7 @@
 }
 
 static int be_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
-		      u32 *rule_locs)
+			u32 *rule_locs)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
@@ -992,7 +977,7 @@
 	struct be_rx_obj *rxo;
 	int status = 0, i, j;
 	u8 rsstable[128];
-	u32 rss_flags = adapter->rss_flags;
+	u32 rss_flags = adapter->rss_info.rss_flags;
 
 	if (cmd->data != L3_RSS_FLAGS &&
 	    cmd->data != (L3_RSS_FLAGS | L4_RSS_FLAGS))
@@ -1039,7 +1024,7 @@
 		return -EINVAL;
 	}
 
-	if (rss_flags == adapter->rss_flags)
+	if (rss_flags == adapter->rss_info.rss_flags)
 		return status;
 
 	if (be_multi_rxq(adapter)) {
@@ -1051,9 +1036,11 @@
 			}
 		}
 	}
-	status = be_cmd_rss_config(adapter, rsstable, rss_flags, 128);
+
+	status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
+				   rss_flags, 128, adapter->rss_info.rss_hkey);
 	if (!status)
-		adapter->rss_flags = rss_flags;
+		adapter->rss_info.rss_flags = rss_flags;
 
 	return status;
 }
@@ -1103,6 +1090,69 @@
 	return be_update_queues(adapter);
 }
 
+static u32 be_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return RSS_INDIR_TABLE_LEN;
+}
+
+static u32 be_get_rxfh_key_size(struct net_device *netdev)
+{
+	return RSS_HASH_KEY_LEN;
+}
+
+static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+	int i;
+	struct rss_info *rss = &adapter->rss_info;
+
+	if (indir) {
+		for (i = 0; i < RSS_INDIR_TABLE_LEN; i++)
+			indir[i] = rss->rss_queue[i];
+	}
+
+	if (hkey)
+		memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN);
+
+	return 0;
+}
+
+static int be_set_rxfh(struct net_device *netdev, const u32 *indir,
+		       const u8 *hkey)
+{
+	int rc = 0, i, j;
+	struct be_adapter *adapter = netdev_priv(netdev);
+	u8 rsstable[RSS_INDIR_TABLE_LEN];
+
+	if (indir) {
+		struct be_rx_obj *rxo;
+		for (i = 0; i < RSS_INDIR_TABLE_LEN; i++) {
+			j = indir[i];
+			rxo = &adapter->rx_obj[j];
+			rsstable[i] = rxo->rss_id;
+			adapter->rss_info.rss_queue[i] = j;
+		}
+	} else {
+		memcpy(rsstable, adapter->rss_info.rsstable,
+		       RSS_INDIR_TABLE_LEN);
+	}
+
+	if (!hkey)
+		hkey =  adapter->rss_info.rss_hkey;
+
+	rc = be_cmd_rss_config(adapter, rsstable,
+			adapter->rss_info.rss_flags,
+			RSS_INDIR_TABLE_LEN, hkey);
+	if (rc) {
+		adapter->rss_info.rss_flags = RSS_ENABLE_NONE;
+		return -EIO;
+	}
+	memcpy(adapter->rss_info.rss_hkey, hkey, RSS_HASH_KEY_LEN);
+	memcpy(adapter->rss_info.rsstable, rsstable,
+	       RSS_INDIR_TABLE_LEN);
+	return 0;
+}
+
 const struct ethtool_ops be_ethtool_ops = {
 	.get_settings = be_get_settings,
 	.get_drvinfo = be_get_drvinfo,
@@ -1129,6 +1179,10 @@
 	.self_test = be_self_test,
 	.get_rxnfc = be_get_rxnfc,
 	.set_rxnfc = be_set_rxnfc,
+	.get_rxfh_indir_size = be_get_rxfh_indir_size,
+	.get_rxfh_key_size = be_get_rxfh_key_size,
+	.get_rxfh = be_get_rxfh,
+	.set_rxfh = be_set_rxfh,
 	.get_channels = be_get_channels,
 	.set_channels = be_set_channels
 };

diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index 3bd1985..8840c64 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h

@@ -188,10 +188,14 @@
 #define OPTYPE_FCOE_FW_ACTIVE		10
 #define OPTYPE_FCOE_FW_BACKUP		11
 #define OPTYPE_NCSI_FW			13
+#define OPTYPE_REDBOOT_DIR		18
+#define OPTYPE_REDBOOT_CONFIG		19
+#define OPTYPE_SH_PHY_FW		21
+#define OPTYPE_FLASHISM_JUMPVECTOR	22
+#define OPTYPE_UFI_DIR			23
 #define OPTYPE_PHY_FW			99
 #define TN_8022				13
 
-#define ILLEGAL_IOCTL_REQ		2
 #define FLASHROM_OPER_PHY_FLASH		9
 #define FLASHROM_OPER_PHY_SAVE		10
 #define FLASHROM_OPER_FLASH		1
@@ -250,6 +254,9 @@
 #define IMAGE_FIRMWARE_BACKUP_FCoE	178
 #define IMAGE_FIRMWARE_BACKUP_COMP_FCoE 179
 #define IMAGE_FIRMWARE_PHY		192
+#define IMAGE_REDBOOT_DIR		208
+#define IMAGE_REDBOOT_CONFIG		209
+#define IMAGE_UFI_DIR			210
 #define IMAGE_BOOT_CODE			224
 
 /************* Rx Packet Type Encoding **************/
@@ -534,7 +541,8 @@
 	u32 image_size;
 	u32 cksum;
 	u32 entry_point;
-	u32 rsvd0;
+	u16 optype;
+	u16 rsvd0;
 	u32 rsvd1;
 	u8 ver_data[32];
 } __packed;

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index dc19bc5..6822b3d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -134,7 +134,7 @@
 }
 
 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
-		u16 len, u16 entry_size)
+			  u16 len, u16 entry_size)
 {
 	struct be_dma_mem *mem = &q->dma_mem;
 
@@ -154,7 +154,7 @@
 	u32 reg, enabled;
 
 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
-				&reg);
+			      &reg);
 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 
 	if (!enabled && enable)
@@ -165,7 +165,7 @@
 		return;
 
 	pci_write_config_dword(adapter->pdev,
-			PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
+			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 }
 
 static void be_intr_set(struct be_adapter *adapter, bool enable)
@@ -206,12 +206,11 @@
 }
 
 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
-		bool arm, bool clear_int, u16 num_popped)
+			 bool arm, bool clear_int, u16 num_popped)
 {
 	u32 val = 0;
 	val |= qid & DB_EQ_RING_ID_MASK;
-	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) <<
-			DB_EQ_RING_ID_EXT_MASK_SHIFT);
+	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 
 	if (adapter->eeh_error)
 		return;
@@ -477,7 +476,7 @@
 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
-	if (be_roce_supported(adapter))  {
+	if (be_roce_supported(adapter)) {
 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 		drvs->rx_roce_frames = port_stats->roce_frames_received;
@@ -491,8 +490,7 @@
 {
 
 	struct be_drv_stats *drvs = &adapter->drv_stats;
-	struct lancer_pport_stats *pport_stats =
-					pport_stats_from_cmd(adapter);
+	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 
 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
@@ -539,8 +537,7 @@
 }
 
 static void populate_erx_stats(struct be_adapter *adapter,
-			struct be_rx_obj *rxo,
-			u32 erx_stat)
+			       struct be_rx_obj *rxo, u32 erx_stat)
 {
 	if (!BEx_chip(adapter))
 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
@@ -579,7 +576,7 @@
 }
 
 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
-					struct rtnl_link_stats64 *stats)
+						struct rtnl_link_stats64 *stats)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_drv_stats *drvs = &adapter->drv_stats;
@@ -660,7 +657,8 @@
 }
 
 static void be_tx_stats_update(struct be_tx_obj *txo,
-			u32 wrb_cnt, u32 copied, u32 gso_segs, bool stopped)
+			       u32 wrb_cnt, u32 copied, u32 gso_segs,
+			       bool stopped)
 {
 	struct be_tx_stats *stats = tx_stats(txo);
 
@@ -676,7 +674,7 @@
 
 /* Determine number of WRB entries needed to xmit data in an skb */
 static u32 wrb_cnt_for_skb(struct be_adapter *adapter, struct sk_buff *skb,
-								bool *dummy)
+			   bool *dummy)
 {
 	int cnt = (skb->len > skb->data_len);
 
@@ -704,7 +702,7 @@
 }
 
 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
-					struct sk_buff *skb)
+				     struct sk_buff *skb)
 {
 	u8 vlan_prio;
 	u16 vlan_tag;
@@ -733,7 +731,8 @@
 }
 
 static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
-		struct sk_buff *skb, u32 wrb_cnt, u32 len, bool skip_hw_vlan)
+			 struct sk_buff *skb, u32 wrb_cnt, u32 len,
+			 bool skip_hw_vlan)
 {
 	u16 vlan_tag, proto;
 
@@ -774,7 +773,7 @@
 }
 
 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
-		bool unmap_single)
+			  bool unmap_single)
 {
 	dma_addr_t dma;
 
@@ -791,8 +790,8 @@
 }
 
 static int make_tx_wrbs(struct be_adapter *adapter, struct be_queue_info *txq,
-		struct sk_buff *skb, u32 wrb_cnt, bool dummy_wrb,
-		bool skip_hw_vlan)
+			struct sk_buff *skb, u32 wrb_cnt, bool dummy_wrb,
+			bool skip_hw_vlan)
 {
 	dma_addr_t busaddr;
 	int i, copied = 0;
@@ -821,8 +820,7 @@
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		const struct skb_frag_struct *frag =
-			&skb_shinfo(skb)->frags[i];
+		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
 		busaddr = skb_frag_dma_map(dev, frag, 0,
 					   skb_frag_size(frag), DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, busaddr))
@@ -927,8 +925,7 @@
 	return vlan_tx_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
 }
 
-static int be_ipv6_tx_stall_chk(struct be_adapter *adapter,
-				struct sk_buff *skb)
+static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
 {
 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
 }
@@ -959,7 +956,7 @@
 	 */
 	if (be_pvid_tagging_enabled(adapter) &&
 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
-			*skip_hw_vlan = true;
+		*skip_hw_vlan = true;
 
 	/* HW has a bug wherein it will calculate CSUM for VLAN
 	 * pkts even though it is disabled.
@@ -1077,16 +1074,15 @@
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	if (new_mtu < BE_MIN_MTU ||
-			new_mtu > (BE_MAX_JUMBO_FRAME_SIZE -
-					(ETH_HLEN + ETH_FCS_LEN))) {
+	    new_mtu > (BE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN))) {
 		dev_info(&adapter->pdev->dev,
-			"MTU must be between %d and %d bytes\n",
-			BE_MIN_MTU,
-			(BE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN)));
+			 "MTU must be between %d and %d bytes\n",
+			 BE_MIN_MTU,
+			 (BE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN)));
 		return -EINVAL;
 	}
 	dev_info(&adapter->pdev->dev, "MTU changed from %d to %d bytes\n",
-			netdev->mtu, new_mtu);
+		 netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 	return 0;
 }
@@ -1098,7 +1094,7 @@
 static int be_vid_config(struct be_adapter *adapter)
 {
 	u16 vids[BE_NUM_VLANS_SUPPORTED];
-	u16 num = 0, i;
+	u16 num = 0, i = 0;
 	int status = 0;
 
 	/* No need to further configure vids if in promiscuous mode */
@@ -1109,16 +1105,14 @@
 		goto set_vlan_promisc;
 
 	/* Construct VLAN Table to give to HW */
-	for (i = 0; i < VLAN_N_VID; i++)
-		if (adapter->vlan_tag[i])
-			vids[num++] = cpu_to_le16(i);
+	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
+		vids[num++] = cpu_to_le16(i);
 
-	status = be_cmd_vlan_config(adapter, adapter->if_handle,
-				    vids, num, 0);
-
+	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num);
 	if (status) {
 		/* Set to VLAN promisc mode as setting VLAN filter failed */
-		if (status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES)
+		if (addl_status(status) ==
+				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
 			goto set_vlan_promisc;
 		dev_err(&adapter->pdev->dev,
 			"Setting HW VLAN filtering failed.\n");
@@ -1160,16 +1154,16 @@
 	if (lancer_chip(adapter) && vid == 0)
 		return status;
 
-	if (adapter->vlan_tag[vid])
+	if (test_bit(vid, adapter->vids))
 		return status;
 
-	adapter->vlan_tag[vid] = 1;
+	set_bit(vid, adapter->vids);
 	adapter->vlans_added++;
 
 	status = be_vid_config(adapter);
 	if (status) {
 		adapter->vlans_added--;
-		adapter->vlan_tag[vid] = 0;
+		clear_bit(vid, adapter->vids);
 	}
 
 	return status;
@@ -1184,12 +1178,12 @@
 	if (lancer_chip(adapter) && vid == 0)
 		goto ret;
 
-	adapter->vlan_tag[vid] = 0;
+	clear_bit(vid, adapter->vids);
 	status = be_vid_config(adapter);
 	if (!status)
 		adapter->vlans_added--;
 	else
-		adapter->vlan_tag[vid] = 1;
+		set_bit(vid, adapter->vids);
 ret:
 	return status;
 }
@@ -1197,7 +1191,7 @@
 static void be_clear_promisc(struct be_adapter *adapter)
 {
 	adapter->promiscuous = false;
-	adapter->flags &= ~BE_FLAGS_VLAN_PROMISC;
+	adapter->flags &= ~(BE_FLAGS_VLAN_PROMISC | BE_FLAGS_MCAST_PROMISC);
 
 	be_cmd_rx_filter(adapter, IFF_PROMISC, OFF);
 }
@@ -1222,10 +1216,8 @@
 
 	/* Enable multicast promisc if num configured exceeds what we support */
 	if (netdev->flags & IFF_ALLMULTI ||
-	    netdev_mc_count(netdev) > be_max_mc(adapter)) {
-		be_cmd_rx_filter(adapter, IFF_ALLMULTI, ON);
-		goto done;
-	}
+	    netdev_mc_count(netdev) > be_max_mc(adapter))
+		goto set_mcast_promisc;
 
 	if (netdev_uc_count(netdev) != adapter->uc_macs) {
 		struct netdev_hw_addr *ha;
@@ -1251,13 +1243,22 @@
 	}
 
 	status = be_cmd_rx_filter(adapter, IFF_MULTICAST, ON);
-
-	/* Set to MCAST promisc mode if setting MULTICAST address fails */
-	if (status) {
-		dev_info(&adapter->pdev->dev, "Exhausted multicast HW filters.\n");
-		dev_info(&adapter->pdev->dev, "Disabling HW multicast filtering.\n");
-		be_cmd_rx_filter(adapter, IFF_ALLMULTI, ON);
+	if (!status) {
+		if (adapter->flags & BE_FLAGS_MCAST_PROMISC)
+			adapter->flags &= ~BE_FLAGS_MCAST_PROMISC;
+		goto done;
 	}
+
+set_mcast_promisc:
+	if (adapter->flags & BE_FLAGS_MCAST_PROMISC)
+		return;
+
+	/* Set to MCAST promisc mode if setting MULTICAST address fails
+	 * or if num configured exceeds what we support
+	 */
+	status = be_cmd_rx_filter(adapter, IFF_ALLMULTI, ON);
+	if (!status)
+		adapter->flags |= BE_FLAGS_MCAST_PROMISC;
 done:
 	return;
 }
@@ -1287,7 +1288,7 @@
 
 	if (status)
 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed\n",
-				mac, vf);
+			mac, vf);
 	else
 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
 
@@ -1295,7 +1296,7 @@
 }
 
 static int be_get_vf_config(struct net_device *netdev, int vf,
-			struct ifla_vf_info *vi)
+			    struct ifla_vf_info *vi)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
@@ -1307,7 +1308,8 @@
 		return -EINVAL;
 
 	vi->vf = vf;
-	vi->tx_rate = vf_cfg->tx_rate;
+	vi->max_tx_rate = vf_cfg->tx_rate;
+	vi->min_tx_rate = 0;
 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
@@ -1316,8 +1318,7 @@
 	return 0;
 }
 
-static int be_set_vf_vlan(struct net_device *netdev,
-			int vf, u16 vlan, u8 qos)
+static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
@@ -1348,11 +1349,14 @@
 	return status;
 }
 
-static int be_set_vf_tx_rate(struct net_device *netdev,
-			int vf, int rate)
+static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	int status = 0;
+	struct device *dev = &adapter->pdev->dev;
+	int percent_rate, status = 0;
+	u16 link_speed = 0;
+	u8 link_status;
 
 	if (!sriov_enabled(adapter))
 		return -EPERM;
@@ -1360,18 +1364,50 @@
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	if (rate < 100 || rate > 10000) {
-		dev_err(&adapter->pdev->dev,
-			"tx rate must be between 100 and 10000 Mbps\n");
+	if (min_tx_rate)
 		return -EINVAL;
+
+	if (!max_tx_rate)
+		goto config_qos;
+
+	status = be_cmd_link_status_query(adapter, &link_speed,
+					  &link_status, 0);
+	if (status)
+		goto err;
+
+	if (!link_status) {
+		dev_err(dev, "TX-rate setting not allowed when link is down\n");
+		status = -EPERM;
+		goto err;
 	}
 
-	status = be_cmd_config_qos(adapter, rate / 10, vf + 1);
+	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
+		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
+			link_speed);
+		status = -EINVAL;
+		goto err;
+	}
+
+	/* On Skyhawk the QOS setting must be done only as a % value */
+	percent_rate = link_speed / 100;
+	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
+		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
+			percent_rate);
+		status = -EINVAL;
+		goto err;
+	}
+
+config_qos:
+	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
 	if (status)
-		dev_err(&adapter->pdev->dev,
-				"tx rate %d on VF %d failed\n", rate, vf);
-	else
-		adapter->vf_cfg[vf].tx_rate = rate;
+		goto err;
+
+	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
+	return 0;
+
+err:
+	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
+		max_tx_rate, vf);
 	return status;
 }
 static int be_set_vf_link_state(struct net_device *netdev, int vf,
@@ -1469,7 +1505,7 @@
 }
 
 static void be_rx_stats_update(struct be_rx_obj *rxo,
-		struct be_rx_compl_info *rxcp)
+			       struct be_rx_compl_info *rxcp)
 {
 	struct be_rx_stats *stats = rx_stats(rxo);
 
@@ -1566,7 +1602,8 @@
 		skb_frag_set_page(skb, 0, page_info->page);
 		skb_shinfo(skb)->frags[0].page_offset =
 					page_info->page_offset + hdr_len;
-		skb_frag_size_set(&skb_shinfo(skb)->frags[0], curr_frag_len - hdr_len);
+		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
+				  curr_frag_len - hdr_len);
 		skb->data_len = curr_frag_len - hdr_len;
 		skb->truesize += rx_frag_size;
 		skb->tail += hdr_len;
@@ -1725,8 +1762,8 @@
 	if (rxcp->vlanf) {
 		rxcp->qnq = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, qnq,
 					  compl);
-		rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vlan_tag,
-					       compl);
+		rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v1,
+					       vlan_tag, compl);
 	}
 	rxcp->port = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, port, compl);
 	rxcp->tunneled =
@@ -1757,8 +1794,8 @@
 	if (rxcp->vlanf) {
 		rxcp->qnq = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, qnq,
 					  compl);
-		rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vlan_tag,
-					       compl);
+		rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v0,
+					       vlan_tag, compl);
 	}
 	rxcp->port = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, port, compl);
 	rxcp->ip_frag = AMAP_GET_BITS(struct amap_eth_rx_compl_v0,
@@ -1799,7 +1836,7 @@
 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
 
 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
-		    !adapter->vlan_tag[rxcp->vlan_tag])
+		    !test_bit(rxcp->vlan_tag, adapter->vids))
 			rxcp->vlanf = 0;
 	}
 
@@ -1915,7 +1952,7 @@
 }
 
 static u16 be_tx_compl_process(struct be_adapter *adapter,
-		struct be_tx_obj *txo, u16 last_index)
+			       struct be_tx_obj *txo, u16 last_index)
 {
 	struct be_queue_info *txq = &txo->q;
 	struct be_eth_wrb *wrb;
@@ -2122,7 +2159,7 @@
 
 		eq = &eqo->q;
 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
-					sizeof(struct be_eq_entry));
+				    sizeof(struct be_eq_entry));
 		if (rc)
 			return rc;
 
@@ -2155,7 +2192,7 @@
 
 	cq = &adapter->mcc_obj.cq;
 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
-			sizeof(struct be_mcc_compl)))
+			   sizeof(struct be_mcc_compl)))
 		goto err;
 
 	/* Use the default EQ for MCC completions */
@@ -2275,7 +2312,7 @@
 		rxo->adapter = adapter;
 		cq = &rxo->cq;
 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
-				sizeof(struct be_eth_rx_compl));
+				    sizeof(struct be_eth_rx_compl));
 		if (rc)
 			return rc;
 
@@ -2339,7 +2376,7 @@
 }
 
 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
-			int budget, int polling)
+			 int budget, int polling)
 {
 	struct be_adapter *adapter = rxo->adapter;
 	struct be_queue_info *rx_cq = &rxo->cq;
@@ -2365,7 +2402,7 @@
 		 * promiscuous mode on some skews
 		 */
 		if (unlikely(rxcp->port != adapter->port_num &&
-				!lancer_chip(adapter))) {
+			     !lancer_chip(adapter))) {
 			be_rx_compl_discard(rxo, rxcp);
 			goto loop_continue;
 		}
@@ -2405,8 +2442,9 @@
 		if (!txcp)
 			break;
 		num_wrbs += be_tx_compl_process(adapter, txo,
-				AMAP_GET_BITS(struct amap_eth_tx_compl,
-					wrb_index, txcp));
+						AMAP_GET_BITS(struct
+							      amap_eth_tx_compl,
+							      wrb_index, txcp));
 	}
 
 	if (work_done) {
@@ -2416,7 +2454,7 @@
 		/* As Tx wrbs have been freed up, wake up netdev queue
 		 * if it was stopped due to lack of tx wrbs.  */
 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
-			atomic_read(&txo->q.used) < txo->q.len / 2) {
+		    atomic_read(&txo->q.used) < txo->q.len / 2) {
 			netif_wake_subqueue(adapter->netdev, idx);
 		}
 
@@ -2510,9 +2548,9 @@
 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
 			sliport_err1 = ioread32(adapter->db +
-					SLIPORT_ERROR1_OFFSET);
+						SLIPORT_ERROR1_OFFSET);
 			sliport_err2 = ioread32(adapter->db +
-					SLIPORT_ERROR2_OFFSET);
+						SLIPORT_ERROR2_OFFSET);
 			adapter->hw_error = true;
 			/* Do not log error messages if its a FW reset */
 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
@@ -2531,13 +2569,13 @@
 		}
 	} else {
 		pci_read_config_dword(adapter->pdev,
-				PCICFG_UE_STATUS_LOW, &ue_lo);
+				      PCICFG_UE_STATUS_LOW, &ue_lo);
 		pci_read_config_dword(adapter->pdev,
-				PCICFG_UE_STATUS_HIGH, &ue_hi);
+				      PCICFG_UE_STATUS_HIGH, &ue_hi);
 		pci_read_config_dword(adapter->pdev,
-				PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
+				      PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
 		pci_read_config_dword(adapter->pdev,
-				PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
+				      PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
 
 		ue_lo = (ue_lo & ~ue_lo_mask);
 		ue_hi = (ue_hi & ~ue_hi_mask);
@@ -2624,7 +2662,7 @@
 }
 
 static inline int be_msix_vec_get(struct be_adapter *adapter,
-				struct be_eq_obj *eqo)
+				  struct be_eq_obj *eqo)
 {
 	return adapter->msix_entries[eqo->msix_idx].vector;
 }
@@ -2648,7 +2686,7 @@
 	for (i--, eqo = &adapter->eq_obj[i]; i >= 0; i--, eqo--)
 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
-		status);
+		 status);
 	be_msix_disable(adapter);
 	return status;
 }
@@ -2774,7 +2812,8 @@
 {
 	struct be_rx_obj *rxo;
 	int rc, i, j;
-	u8 rsstable[128];
+	u8 rss_hkey[RSS_HASH_KEY_LEN];
+	struct rss_info *rss = &adapter->rss_info;
 
 	for_all_rx_queues(adapter, rxo, i) {
 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
@@ -2799,31 +2838,36 @@
 	}
 
 	if (be_multi_rxq(adapter)) {
-		for (j = 0; j < 128; j += adapter->num_rx_qs - 1) {
+		for (j = 0; j < RSS_INDIR_TABLE_LEN;
+			j += adapter->num_rx_qs - 1) {
 			for_all_rss_queues(adapter, rxo, i) {
-				if ((j + i) >= 128)
+				if ((j + i) >= RSS_INDIR_TABLE_LEN)
 					break;
-				rsstable[j + i] = rxo->rss_id;
+				rss->rsstable[j + i] = rxo->rss_id;
+				rss->rss_queue[j + i] = i;
 			}
 		}
-		adapter->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
-					RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
+		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
+			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
 
 		if (!BEx_chip(adapter))
-			adapter->rss_flags |= RSS_ENABLE_UDP_IPV4 |
-						RSS_ENABLE_UDP_IPV6;
+			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
+				RSS_ENABLE_UDP_IPV6;
 	} else {
 		/* Disable RSS, if only default RX Q is created */
-		adapter->rss_flags = RSS_ENABLE_NONE;
+		rss->rss_flags = RSS_ENABLE_NONE;
 	}
 
-	rc = be_cmd_rss_config(adapter, rsstable, adapter->rss_flags,
-			       128);
+	get_random_bytes(rss_hkey, RSS_HASH_KEY_LEN);
+	rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
+			       128, rss_hkey);
 	if (rc) {
-		adapter->rss_flags = RSS_ENABLE_NONE;
+		rss->rss_flags = RSS_ENABLE_NONE;
 		return rc;
 	}
 
+	memcpy(rss->rss_hkey, rss_hkey, RSS_HASH_KEY_LEN);
+
 	/* First time posting */
 	for_all_rx_queues(adapter, rxo, i)
 		be_post_rx_frags(rxo, GFP_KERNEL);
@@ -2896,7 +2940,8 @@
 
 	if (enable) {
 		status = pci_write_config_dword(adapter->pdev,
-			PCICFG_PM_CONTROL_OFFSET, PCICFG_PM_CONTROL_MASK);
+						PCICFG_PM_CONTROL_OFFSET,
+						PCICFG_PM_CONTROL_MASK);
 		if (status) {
 			dev_err(&adapter->pdev->dev,
 				"Could not enable Wake-on-lan\n");
@@ -2905,7 +2950,8 @@
 			return status;
 		}
 		status = be_cmd_enable_magic_wol(adapter,
-				adapter->netdev->dev_addr, &cmd);
+						 adapter->netdev->dev_addr,
+						 &cmd);
 		pci_enable_wake(adapter->pdev, PCI_D3hot, 1);
 		pci_enable_wake(adapter->pdev, PCI_D3cold, 1);
 	} else {
@@ -2944,7 +2990,8 @@
 
 		if (status)
 			dev_err(&adapter->pdev->dev,
-			"Mac address assignment failed for VF %d\n", vf);
+				"Mac address assignment failed for VF %d\n",
+				vf);
 		else
 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
 
@@ -3086,9 +3133,11 @@
 
 		/* If a FW profile exists, then cap_flags are updated */
 		en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
-			   BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST);
-		status = be_cmd_if_create(adapter, cap_flags, en_flags,
-					  &vf_cfg->if_handle, vf + 1);
+					BE_IF_FLAGS_BROADCAST |
+					BE_IF_FLAGS_MULTICAST);
+		status =
+		    be_cmd_if_create(adapter, cap_flags, en_flags,
+				     &vf_cfg->if_handle, vf + 1);
 		if (status)
 			goto err;
 	}
@@ -3119,7 +3168,6 @@
 	struct be_vf_cfg *vf_cfg;
 	int status, old_vfs, vf;
 	u32 privileges;
-	u16 lnk_speed;
 
 	old_vfs = pci_num_vf(adapter->pdev);
 	if (old_vfs) {
@@ -3175,16 +3223,9 @@
 					 vf);
 		}
 
-		/* BE3 FW, by default, caps VF TX-rate to 100mbps.
-		 * Allow full available bandwidth
-		 */
-		if (BE3_chip(adapter) && !old_vfs)
-			be_cmd_config_qos(adapter, 1000, vf + 1);
-
-		status = be_cmd_link_status_query(adapter, &lnk_speed,
-						  NULL, vf + 1);
-		if (!status)
-			vf_cfg->tx_rate = lnk_speed;
+		/* Allow full available bandwidth */
+		if (!old_vfs)
+			be_cmd_config_qos(adapter, 0, 0, vf + 1);
 
 		if (!old_vfs) {
 			be_cmd_enable_vf(adapter, vf + 1);
@@ -3590,35 +3631,7 @@
 }
 #endif
 
-#define FW_FILE_HDR_SIGN 	"ServerEngines Corp. "
-static char flash_cookie[2][16] =      {"*** SE FLAS", "H DIRECTORY *** "};
-
-static bool be_flash_redboot(struct be_adapter *adapter,
-			const u8 *p, u32 img_start, int image_size,
-			int hdr_size)
-{
-	u32 crc_offset;
-	u8 flashed_crc[4];
-	int status;
-
-	crc_offset = hdr_size + img_start + image_size - 4;
-
-	p += crc_offset;
-
-	status = be_cmd_get_flash_crc(adapter, flashed_crc,
-			(image_size - 4));
-	if (status) {
-		dev_err(&adapter->pdev->dev,
-		"could not get crc from flash, not flashing redboot\n");
-		return false;
-	}
-
-	/*update redboot only if crc does not match*/
-	if (!memcmp(flashed_crc, p, 4))
-		return false;
-	else
-		return true;
-}
+static char flash_cookie[2][16] = {"*** SE FLAS", "H DIRECTORY *** "};
 
 static bool phy_flashing_required(struct be_adapter *adapter)
 {
@@ -3649,8 +3662,8 @@
 }
 
 static struct flash_section_info *get_fsec_info(struct be_adapter *adapter,
-					 int header_size,
-					 const struct firmware *fw)
+						int header_size,
+						const struct firmware *fw)
 {
 	struct flash_section_info *fsec = NULL;
 	const u8 *p = fw->data;
@@ -3665,12 +3678,35 @@
 	return NULL;
 }
 
-static int be_flash(struct be_adapter *adapter, const u8 *img,
-		struct be_dma_mem *flash_cmd, int optype, int img_size)
+static int be_check_flash_crc(struct be_adapter *adapter, const u8 *p,
+			      u32 img_offset, u32 img_size, int hdr_size,
+			      u16 img_optype, bool *crc_match)
 {
-	u32 total_bytes = 0, flash_op, num_bytes = 0;
-	int status = 0;
+	u32 crc_offset;
+	int status;
+	u8 crc[4];
+
+	status = be_cmd_get_flash_crc(adapter, crc, img_optype, img_size - 4);
+	if (status)
+		return status;
+
+	crc_offset = hdr_size + img_offset + img_size - 4;
+
+	/* Skip flashing, if crc of flashed region matches */
+	if (!memcmp(crc, p + crc_offset, 4))
+		*crc_match = true;
+	else
+		*crc_match = false;
+
+	return status;
+}
+
+static int be_flash(struct be_adapter *adapter, const u8 *img,
+		    struct be_dma_mem *flash_cmd, int optype, int img_size)
+{
 	struct be_cmd_write_flashrom *req = flash_cmd->va;
+	u32 total_bytes, flash_op, num_bytes;
+	int status;
 
 	total_bytes = img_size;
 	while (total_bytes) {
@@ -3693,32 +3729,28 @@
 		memcpy(req->data_buf, img, num_bytes);
 		img += num_bytes;
 		status = be_cmd_write_flashrom(adapter, flash_cmd, optype,
-						flash_op, num_bytes);
-		if (status) {
-			if (status == ILLEGAL_IOCTL_REQ &&
-			    optype == OPTYPE_PHY_FW)
-				break;
-			dev_err(&adapter->pdev->dev,
-				"cmd to write to flash rom failed.\n");
+					       flash_op, num_bytes);
+		if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST &&
+		    optype == OPTYPE_PHY_FW)
+			break;
+		else if (status)
 			return status;
-		}
 	}
 	return 0;
 }
 
 /* For BE2, BE3 and BE3-R */
 static int be_flash_BEx(struct be_adapter *adapter,
-			 const struct firmware *fw,
-			 struct be_dma_mem *flash_cmd,
-			 int num_of_images)
-
+			const struct firmware *fw,
+			struct be_dma_mem *flash_cmd, int num_of_images)
 {
-	int status = 0, i, filehdr_size = 0;
 	int img_hdrs_size = (num_of_images * sizeof(struct image_hdr));
-	const u8 *p = fw->data;
-	const struct flash_comp *pflashcomp;
-	int num_comp, redboot;
+	struct device *dev = &adapter->pdev->dev;
 	struct flash_section_info *fsec = NULL;
+	int status, i, filehdr_size, num_comp;
+	const struct flash_comp *pflashcomp;
+	bool crc_match;
+	const u8 *p;
 
 	struct flash_comp gen3_flash_types[] = {
 		{ FLASH_iSCSI_PRIMARY_IMAGE_START_g3, OPTYPE_ISCSI_ACTIVE,
@@ -3775,8 +3807,7 @@
 	/* Get flash section info*/
 	fsec = get_fsec_info(adapter, filehdr_size + img_hdrs_size, fw);
 	if (!fsec) {
-		dev_err(&adapter->pdev->dev,
-			"Invalid Cookie. UFI corrupted ?\n");
+		dev_err(dev, "Invalid Cookie. FW image may be corrupted\n");
 		return -1;
 	}
 	for (i = 0; i < num_comp; i++) {
@@ -3792,23 +3823,32 @@
 				continue;
 
 		if (pflashcomp[i].optype == OPTYPE_REDBOOT) {
-			redboot = be_flash_redboot(adapter, fw->data,
-				pflashcomp[i].offset, pflashcomp[i].size,
-				filehdr_size + img_hdrs_size);
-			if (!redboot)
+			status = be_check_flash_crc(adapter, fw->data,
+						    pflashcomp[i].offset,
+						    pflashcomp[i].size,
+						    filehdr_size +
+						    img_hdrs_size,
+						    OPTYPE_REDBOOT, &crc_match);
+			if (status) {
+				dev_err(dev,
+					"Could not get CRC for 0x%x region\n",
+					pflashcomp[i].optype);
+				continue;
+			}
+
+			if (crc_match)
 				continue;
 		}
 
-		p = fw->data;
-		p += filehdr_size + pflashcomp[i].offset + img_hdrs_size;
+		p = fw->data + filehdr_size + pflashcomp[i].offset +
+			img_hdrs_size;
 		if (p + pflashcomp[i].size > fw->data + fw->size)
 			return -1;
 
 		status = be_flash(adapter, p, flash_cmd, pflashcomp[i].optype,
-					pflashcomp[i].size);
+				  pflashcomp[i].size);
 		if (status) {
-			dev_err(&adapter->pdev->dev,
-				"Flashing section type %d failed.\n",
+			dev_err(dev, "Flashing section type 0x%x failed\n",
 				pflashcomp[i].img_type);
 			return status;
 		}
@@ -3816,80 +3856,142 @@
 	return 0;
 }
 
-static int be_flash_skyhawk(struct be_adapter *adapter,
-		const struct firmware *fw,
-		struct be_dma_mem *flash_cmd, int num_of_images)
+static u16 be_get_img_optype(struct flash_section_entry fsec_entry)
 {
-	int status = 0, i, filehdr_size = 0;
-	int img_offset, img_size, img_optype, redboot;
+	u32 img_type = le32_to_cpu(fsec_entry.type);
+	u16 img_optype = le16_to_cpu(fsec_entry.optype);
+
+	if (img_optype != 0xFFFF)
+		return img_optype;
+
+	switch (img_type) {
+	case IMAGE_FIRMWARE_iSCSI:
+		img_optype = OPTYPE_ISCSI_ACTIVE;
+		break;
+	case IMAGE_BOOT_CODE:
+		img_optype = OPTYPE_REDBOOT;
+		break;
+	case IMAGE_OPTION_ROM_ISCSI:
+		img_optype = OPTYPE_BIOS;
+		break;
+	case IMAGE_OPTION_ROM_PXE:
+		img_optype = OPTYPE_PXE_BIOS;
+		break;
+	case IMAGE_OPTION_ROM_FCoE:
+		img_optype = OPTYPE_FCOE_BIOS;
+		break;
+	case IMAGE_FIRMWARE_BACKUP_iSCSI:
+		img_optype = OPTYPE_ISCSI_BACKUP;
+		break;
+	case IMAGE_NCSI:
+		img_optype = OPTYPE_NCSI_FW;
+		break;
+	case IMAGE_FLASHISM_JUMPVECTOR:
+		img_optype = OPTYPE_FLASHISM_JUMPVECTOR;
+		break;
+	case IMAGE_FIRMWARE_PHY:
+		img_optype = OPTYPE_SH_PHY_FW;
+		break;
+	case IMAGE_REDBOOT_DIR:
+		img_optype = OPTYPE_REDBOOT_DIR;
+		break;
+	case IMAGE_REDBOOT_CONFIG:
+		img_optype = OPTYPE_REDBOOT_CONFIG;
+		break;
+	case IMAGE_UFI_DIR:
+		img_optype = OPTYPE_UFI_DIR;
+		break;
+	default:
+		break;
+	}
+
+	return img_optype;
+}
+
+static int be_flash_skyhawk(struct be_adapter *adapter,
+			    const struct firmware *fw,
+			    struct be_dma_mem *flash_cmd, int num_of_images)
+{
 	int img_hdrs_size = num_of_images * sizeof(struct image_hdr);
-	const u8 *p = fw->data;
+	struct device *dev = &adapter->pdev->dev;
 	struct flash_section_info *fsec = NULL;
+	u32 img_offset, img_size, img_type;
+	int status, i, filehdr_size;
+	bool crc_match, old_fw_img;
+	u16 img_optype;
+	const u8 *p;
 
 	filehdr_size = sizeof(struct flash_file_hdr_g3);
 	fsec = get_fsec_info(adapter, filehdr_size + img_hdrs_size, fw);
 	if (!fsec) {
-		dev_err(&adapter->pdev->dev,
-			"Invalid Cookie. UFI corrupted ?\n");
+		dev_err(dev, "Invalid Cookie. FW image may be corrupted\n");
 		return -1;
 	}
 
 	for (i = 0; i < le32_to_cpu(fsec->fsec_hdr.num_images); i++) {
 		img_offset = le32_to_cpu(fsec->fsec_entry[i].offset);
 		img_size   = le32_to_cpu(fsec->fsec_entry[i].pad_size);
+		img_type   = le32_to_cpu(fsec->fsec_entry[i].type);
+		img_optype = be_get_img_optype(fsec->fsec_entry[i]);
+		old_fw_img = fsec->fsec_entry[i].optype == 0xFFFF;
 
-		switch (le32_to_cpu(fsec->fsec_entry[i].type)) {
-		case IMAGE_FIRMWARE_iSCSI:
-			img_optype = OPTYPE_ISCSI_ACTIVE;
-			break;
-		case IMAGE_BOOT_CODE:
-			img_optype = OPTYPE_REDBOOT;
-			break;
-		case IMAGE_OPTION_ROM_ISCSI:
-			img_optype = OPTYPE_BIOS;
-			break;
-		case IMAGE_OPTION_ROM_PXE:
-			img_optype = OPTYPE_PXE_BIOS;
-			break;
-		case IMAGE_OPTION_ROM_FCoE:
-			img_optype = OPTYPE_FCOE_BIOS;
-			break;
-		case IMAGE_FIRMWARE_BACKUP_iSCSI:
-			img_optype = OPTYPE_ISCSI_BACKUP;
-			break;
-		case IMAGE_NCSI:
-			img_optype = OPTYPE_NCSI_FW;
-			break;
-		default:
+		if (img_optype == 0xFFFF)
 			continue;
+		/* Don't bother verifying CRC if an old FW image is being
+		 * flashed
+		 */
+		if (old_fw_img)
+			goto flash;
+
+		status = be_check_flash_crc(adapter, fw->data, img_offset,
+					    img_size, filehdr_size +
+					    img_hdrs_size, img_optype,
+					    &crc_match);
+		/* The current FW image on the card does not recognize the new
+		 * FLASH op_type. The FW download is partially complete.
+		 * Reboot the server now to enable FW image to recognize the
+		 * new FLASH op_type. To complete the remaining process,
+		 * download the same FW again after the reboot.
+		 */
+		if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST ||
+		    base_status(status) == MCC_STATUS_ILLEGAL_FIELD) {
+			dev_err(dev, "Flash incomplete. Reset the server\n");
+			dev_err(dev, "Download FW image again after reset\n");
+			return -EAGAIN;
+		} else if (status) {
+			dev_err(dev, "Could not get CRC for 0x%x region\n",
+				img_optype);
+			return -EFAULT;
 		}
 
-		if (img_optype == OPTYPE_REDBOOT) {
-			redboot = be_flash_redboot(adapter, fw->data,
-					img_offset, img_size,
-					filehdr_size + img_hdrs_size);
-			if (!redboot)
-				continue;
-		}
+		if (crc_match)
+			continue;
 
-		p = fw->data;
-		p += filehdr_size + img_offset + img_hdrs_size;
+flash:
+		p = fw->data + filehdr_size + img_offset + img_hdrs_size;
 		if (p + img_size > fw->data + fw->size)
 			return -1;
 
 		status = be_flash(adapter, p, flash_cmd, img_optype, img_size);
-		if (status) {
-			dev_err(&adapter->pdev->dev,
-				"Flashing section type %d failed.\n",
-				fsec->fsec_entry[i].type);
-			return status;
+		/* For old FW images ignore ILLEGAL_FIELD error or errors on
+		 * UFI_DIR region
+		 */
+		if (old_fw_img &&
+		    (base_status(status) == MCC_STATUS_ILLEGAL_FIELD ||
+		     (img_optype == OPTYPE_UFI_DIR &&
+		      base_status(status) == MCC_STATUS_FAILED))) {
+			continue;
+		} else if (status) {
+			dev_err(dev, "Flashing section type 0x%x failed\n",
+				img_type);
+			return -EFAULT;
 		}
 	}
 	return 0;
 }
 
 static int lancer_fw_download(struct be_adapter *adapter,
-				const struct firmware *fw)
+			      const struct firmware *fw)
 {
 #define LANCER_FW_DOWNLOAD_CHUNK      (32 * 1024)
 #define LANCER_FW_DOWNLOAD_LOCATION   "/prg"
@@ -3955,7 +4057,7 @@
 	}
 
 	dma_free_coherent(&adapter->pdev->dev, flash_cmd.size, flash_cmd.va,
-				flash_cmd.dma);
+			  flash_cmd.dma);
 	if (status) {
 		dev_err(&adapter->pdev->dev,
 			"Firmware load error. "
@@ -3976,9 +4078,8 @@
 			goto lancer_fw_exit;
 		}
 	} else if (change_status != LANCER_NO_RESET_NEEDED) {
-			dev_err(&adapter->pdev->dev,
-				"System reboot required for new FW"
-				" to be active\n");
+		dev_err(&adapter->pdev->dev,
+			"System reboot required for new FW to be active\n");
 	}
 
 	dev_info(&adapter->pdev->dev, "Firmware flashed successfully\n");
@@ -4042,7 +4143,7 @@
 			switch (ufi_type) {
 			case UFI_TYPE4:
 				status = be_flash_skyhawk(adapter, fw,
-							&flash_cmd, num_imgs);
+							  &flash_cmd, num_imgs);
 				break;
 			case UFI_TYPE3R:
 				status = be_flash_BEx(adapter, fw, &flash_cmd,
@@ -4112,8 +4213,7 @@
 	return status;
 }
 
-static int be_ndo_bridge_setlink(struct net_device *dev,
-				    struct nlmsghdr *nlh)
+static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
 	struct be_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
@@ -4155,8 +4255,7 @@
 }
 
 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				    struct net_device *dev,
-				    u32 filter_mask)
+				 struct net_device *dev, u32 filter_mask)
 {
 	struct be_adapter *adapter = netdev_priv(dev);
 	int status = 0;
@@ -4254,7 +4353,7 @@
 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
 	.ndo_set_vf_mac		= be_set_vf_mac,
 	.ndo_set_vf_vlan	= be_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= be_set_vf_tx_rate,
+	.ndo_set_vf_rate	= be_set_vf_tx_rate,
 	.ndo_get_vf_config	= be_get_vf_config,
 	.ndo_set_vf_link_state  = be_set_vf_link_state,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -4301,7 +4400,7 @@
 
 	netdev->netdev_ops = &be_netdev_ops;
 
-	SET_ETHTOOL_OPS(netdev, &be_ethtool_ops);
+	netdev->ethtool_ops = &be_ethtool_ops;
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
@@ -4870,7 +4969,7 @@
 }
 
 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
-				pci_channel_state_t state)
+					    pci_channel_state_t state)
 {
 	struct be_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev =  adapter->netdev;

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 8b70ca7..f3658bd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c

@@ -769,11 +769,6 @@
 	return phy_mii_ioctl(phy, ifr, cmd);
 }
 
-static int ethoc_config(struct net_device *dev, struct ifmap *map)
-{
-	return -ENOSYS;
-}
-
 static void ethoc_do_set_mac_address(struct net_device *dev)
 {
 	struct ethoc *priv = netdev_priv(dev);
@@ -995,7 +990,6 @@
 	.ndo_open = ethoc_open,
 	.ndo_stop = ethoc_stop,
 	.ndo_do_ioctl = ethoc_ioctl,
-	.ndo_set_config = ethoc_config,
 	.ndo_set_mac_address = ethoc_set_mac_address,
 	.ndo_set_rx_mode = ethoc_set_multicast_list,
 	.ndo_change_mtu = ethoc_change_mtu,

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 68069ea..c77fa4a 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c

@@ -1210,7 +1210,7 @@
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	SET_ETHTOOL_OPS(netdev, &ftgmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftgmac100_ethtool_ops;
 	netdev->netdev_ops = &ftgmac100_netdev_ops;
 	netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO;
 

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8be5b40..4ff1adc 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c

@@ -1085,7 +1085,7 @@
 	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &ftmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftmac100_ethtool_ops;
 	netdev->netdev_ops = &ftmac100_netdev_ops;
 
 	platform_set_drvdata(pdev, netdev);

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 6048dc8..2703083 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig

@@ -67,6 +67,7 @@
 	tristate "Freescale XGMAC MDIO"
 	depends on FSL_SOC
 	select PHYLIB
+	select OF_MDIO
 	---help---
 	  This driver supports the MDIO bus on the Fman 10G Ethernet MACs.
 

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 3b8d6d1..671d080 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h

@@ -221,7 +221,7 @@
 #define BD_ENET_TX_RCMASK       ((ushort)0x003c)
 #define BD_ENET_TX_UN           ((ushort)0x0002)
 #define BD_ENET_TX_CSL          ((ushort)0x0001)
-#define BD_ENET_TX_STATS        ((ushort)0x03ff)        /* All status bits */
+#define BD_ENET_TX_STATS        ((ushort)0x0fff)        /* All status bits */
 
 /*enhanced buffer descriptor control/status used by Ethernet transmit*/
 #define BD_ENET_TX_INT          0x40000000
@@ -246,8 +246,8 @@
 #define RX_RING_SIZE		(FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
 #define FEC_ENET_TX_FRSIZE	2048
 #define FEC_ENET_TX_FRPPG	(PAGE_SIZE / FEC_ENET_TX_FRSIZE)
-#define TX_RING_SIZE		16	/* Must be power of two */
-#define TX_RING_MOD_MASK	15	/*   for this to work */
+#define TX_RING_SIZE		512	/* Must be power of two */
+#define TX_RING_MOD_MASK	511	/*   for this to work */
 
 #define BD_ENET_RX_INT          0x00800000
 #define BD_ENET_RX_PTP          ((ushort)0x0400)
@@ -296,8 +296,15 @@
 	/* The ring entries to be free()ed */
 	struct bufdesc	*dirty_tx;
 
+	unsigned short bufdesc_size;
 	unsigned short tx_ring_size;
 	unsigned short rx_ring_size;
+	unsigned short tx_stop_threshold;
+	unsigned short tx_wake_threshold;
+
+	/* Software TSO */
+	char *tso_hdrs;
+	dma_addr_t tso_hdrs_dma;
 
 	struct	platform_device *pdev;
 

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 8d69e43..38d9d27 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -36,6 +36,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
+#include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -54,6 +55,7 @@
 #include <linux/of_net.h>
 #include <linux/regulator/consumer.h>
 #include <linux/if_vlan.h>
+#include <linux/pinctrl/consumer.h>
 
 #include <asm/cacheflush.h>
 
@@ -172,10 +174,6 @@
 #endif
 #endif /* CONFIG_M5272 */
 
-#if (((RX_RING_SIZE + TX_RING_SIZE) * 32) > PAGE_SIZE)
-#error "FEC: descriptor ring size constants too large"
-#endif
-
 /* Interrupt events/masks. */
 #define FEC_ENET_HBERR	((uint)0x80000000)	/* Heartbeat error */
 #define FEC_ENET_BABR	((uint)0x40000000)	/* Babbling receiver */
@@ -231,6 +229,15 @@
 #define FEC_PAUSE_FLAG_AUTONEG	0x1
 #define FEC_PAUSE_FLAG_ENABLE	0x2
 
+#define TSO_HEADER_SIZE		128
+/* Max number of allowed TCP segments for software TSO */
+#define FEC_MAX_TSO_SEGS	100
+#define FEC_MAX_SKB_DESCS	(FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+	((addr >= txq->tso_hdrs_dma) && \
+	(addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
 static int mii_cnt;
 
 static inline
@@ -286,6 +293,22 @@
 		return (new_bd < base) ? (new_bd + ring_size) : new_bd;
 }
 
+static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp,
+				struct fec_enet_private *fep)
+{
+	return ((const char *)bdp - (const char *)base) / fep->bufdesc_size;
+}
+
+static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep)
+{
+	int entries;
+
+	entries = ((const char *)fep->dirty_tx -
+			(const char *)fep->cur_tx) / fep->bufdesc_size - 1;
+
+	return entries > 0 ? entries : entries + fep->tx_ring_size;
+}
+
 static void *swap_buffer(void *bufaddr, int len)
 {
 	int i;
@@ -307,115 +330,18 @@
 	if (unlikely(skb_cow_head(skb, 0)))
 		return -1;
 
+	ip_hdr(skb)->check = 0;
 	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 0;
 
 	return 0;
 }
 
-static netdev_tx_t
-fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static void
+fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
 {
-	struct fec_enet_private *fep = netdev_priv(ndev);
 	const struct platform_device_id *id_entry =
 				platform_get_device_id(fep->pdev);
-	struct bufdesc *bdp, *bdp_pre;
-	void *bufaddr;
-	unsigned short	status;
-	unsigned int index;
-
-	/* Fill in a Tx ring entry */
-	bdp = fep->cur_tx;
-
-	status = bdp->cbd_sc;
-
-	if (status & BD_ENET_TX_READY) {
-		/* Ooops.  All transmit buffers are full.  Bail out.
-		 * This should not happen, since ndev->tbusy should be set.
-		 */
-		netdev_err(ndev, "tx queue full!\n");
-		return NETDEV_TX_BUSY;
-	}
-
-	/* Protocol checksum off-load for TCP and UDP. */
-	if (fec_enet_clear_csum(skb, ndev)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	/* Clear all of the status flags */
-	status &= ~BD_ENET_TX_STATS;
-
-	/* Set buffer length and buffer pointer */
-	bufaddr = skb->data;
-	bdp->cbd_datlen = skb->len;
-
-	/*
-	 * On some FEC implementations data must be aligned on
-	 * 4-byte boundaries. Use bounce buffers to copy data
-	 * and get it aligned. Ugh.
-	 */
-	if (fep->bufdesc_ex)
-		index = (struct bufdesc_ex *)bdp -
-			(struct bufdesc_ex *)fep->tx_bd_base;
-	else
-		index = bdp - fep->tx_bd_base;
-
-	if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
-		memcpy(fep->tx_bounce[index], skb->data, skb->len);
-		bufaddr = fep->tx_bounce[index];
-	}
-
-	/*
-	 * Some design made an incorrect assumption on endian mode of
-	 * the system that it's running on. As the result, driver has to
-	 * swap every frame going to and coming from the controller.
-	 */
-	if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
-		swap_buffer(bufaddr, skb->len);
-
-	/* Save skb pointer */
-	fep->tx_skbuff[index] = skb;
-
-	/* Push the data cache so the CPM does not get stale memory
-	 * data.
-	 */
-	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
-			skb->len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
-		bdp->cbd_bufaddr = 0;
-		fep->tx_skbuff[index] = NULL;
-		dev_kfree_skb_any(skb);
-		if (net_ratelimit())
-			netdev_err(ndev, "Tx DMA memory map failed\n");
-		return NETDEV_TX_OK;
-	}
-
-	if (fep->bufdesc_ex) {
-
-		struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-		ebdp->cbd_bdu = 0;
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-			fep->hwts_tx_en)) {
-			ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
-			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		} else {
-			ebdp->cbd_esc = BD_ENET_TX_INT;
-
-			/* Enable protocol checksum flags
-			 * We do not bother with the IP Checksum bits as they
-			 * are done by the kernel
-			 */
-			if (skb->ip_summed == CHECKSUM_PARTIAL)
-				ebdp->cbd_esc |= BD_ENET_TX_PINS;
-		}
-	}
-
-	/* Send it on its way.  Tell FEC it's ready, interrupt when done,
-	 * it's the last BD of the frame, and to put the CRC on the end.
-	 */
-	status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
-			| BD_ENET_TX_LAST | BD_ENET_TX_TC);
-	bdp->cbd_sc = status;
+	struct bufdesc *bdp_pre;
 
 	bdp_pre = fec_enet_get_prevdesc(bdp, fep);
 	if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
@@ -424,20 +350,415 @@
 		schedule_delayed_work(&(fep->delay_work.delay_work),
 					msecs_to_jiffies(1));
 	}
+}
+
+static int
+fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	struct bufdesc *bdp = fep->cur_tx;
+	struct bufdesc_ex *ebdp;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int frag, frag_len;
+	unsigned short status;
+	unsigned int estatus = 0;
+	skb_frag_t *this_frag;
+	unsigned int index;
+	void *bufaddr;
+	int i;
+
+	for (frag = 0; frag < nr_frags; frag++) {
+		this_frag = &skb_shinfo(skb)->frags[frag];
+		bdp = fec_enet_get_nextdesc(bdp, fep);
+		ebdp = (struct bufdesc_ex *)bdp;
+
+		status = bdp->cbd_sc;
+		status &= ~BD_ENET_TX_STATS;
+		status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+		frag_len = skb_shinfo(skb)->frags[frag].size;
+
+		/* Handle the last BD specially */
+		if (frag == nr_frags - 1) {
+			status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+			if (fep->bufdesc_ex) {
+				estatus |= BD_ENET_TX_INT;
+				if (unlikely(skb_shinfo(skb)->tx_flags &
+					SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+					estatus |= BD_ENET_TX_TS;
+			}
+		}
+
+		if (fep->bufdesc_ex) {
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
+				estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+			ebdp->cbd_bdu = 0;
+			ebdp->cbd_esc = estatus;
+		}
+
+		bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
+
+		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+		if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+			id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+			memcpy(fep->tx_bounce[index], bufaddr, frag_len);
+			bufaddr = fep->tx_bounce[index];
+
+			if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+				swap_buffer(bufaddr, frag_len);
+		}
+
+		bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+						frag_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+			dev_kfree_skb_any(skb);
+			if (net_ratelimit())
+				netdev_err(ndev, "Tx DMA memory map failed\n");
+			goto dma_mapping_error;
+		}
+
+		bdp->cbd_datlen = frag_len;
+		bdp->cbd_sc = status;
+	}
+
+	fep->cur_tx = bdp;
+
+	return 0;
+
+dma_mapping_error:
+	bdp = fep->cur_tx;
+	for (i = 0; i < frag; i++) {
+		bdp = fec_enet_get_nextdesc(bdp, fep);
+		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+				bdp->cbd_datlen, DMA_TO_DEVICE);
+	}
+	return NETDEV_TX_OK;
+}
+
+static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	struct bufdesc *bdp, *last_bdp;
+	void *bufaddr;
+	unsigned short status;
+	unsigned short buflen;
+	unsigned int estatus = 0;
+	unsigned int index;
+	int entries_free;
+	int ret;
+
+	entries_free = fec_enet_get_free_txdesc_num(fep);
+	if (entries_free < MAX_SKB_FRAGS + 1) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "NOT enough BD for SG!\n");
+		return NETDEV_TX_OK;
+	}
+
+	/* Protocol checksum off-load for TCP and UDP. */
+	if (fec_enet_clear_csum(skb, ndev)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Fill in a Tx ring entry */
+	bdp = fep->cur_tx;
+	status = bdp->cbd_sc;
+	status &= ~BD_ENET_TX_STATS;
+
+	/* Set buffer length and buffer pointer */
+	bufaddr = skb->data;
+	buflen = skb_headlen(skb);
+
+	index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+	if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+		memcpy(fep->tx_bounce[index], skb->data, buflen);
+		bufaddr = fep->tx_bounce[index];
+
+		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+			swap_buffer(bufaddr, buflen);
+	}
+
+	/* Push the data cache so the CPM does not get stale memory
+	 * data.
+	 */
+	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+					buflen, DMA_TO_DEVICE);
+	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "Tx DMA memory map failed\n");
+		return NETDEV_TX_OK;
+	}
+
+	if (nr_frags) {
+		ret = fec_enet_txq_submit_frag_skb(skb, ndev);
+		if (ret)
+			return ret;
+	} else {
+		status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+		if (fep->bufdesc_ex) {
+			estatus = BD_ENET_TX_INT;
+			if (unlikely(skb_shinfo(skb)->tx_flags &
+				SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+				estatus |= BD_ENET_TX_TS;
+		}
+	}
+
+	if (fep->bufdesc_ex) {
+
+		struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+			fep->hwts_tx_en))
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+
+		ebdp->cbd_bdu = 0;
+		ebdp->cbd_esc = estatus;
+	}
+
+	last_bdp = fep->cur_tx;
+	index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep);
+	/* Save skb pointer */
+	fep->tx_skbuff[index] = skb;
+
+	bdp->cbd_datlen = buflen;
+
+	/* Send it on its way.  Tell FEC it's ready, interrupt when done,
+	 * it's the last BD of the frame, and to put the CRC on the end.
+	 */
+	status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
+	bdp->cbd_sc = status;
+
+	fec_enet_submit_work(bdp, fep);
 
 	/* If this was the last BD in the ring, start at the beginning again. */
-	bdp = fec_enet_get_nextdesc(bdp, fep);
+	bdp = fec_enet_get_nextdesc(last_bdp, fep);
 
 	skb_tx_timestamp(skb);
 
 	fep->cur_tx = bdp;
 
-	if (fep->cur_tx == fep->dirty_tx)
-		netif_stop_queue(ndev);
+	/* Trigger transmission start */
+	writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
+	return 0;
+}
+
+static int
+fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
+			struct bufdesc *bdp, int index, char *data,
+			int size, bool last_tcp, bool is_last)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+	unsigned short status;
+	unsigned int estatus = 0;
+
+	status = bdp->cbd_sc;
+	status &= ~BD_ENET_TX_STATS;
+
+	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+	bdp->cbd_datlen = size;
+
+	if (((unsigned long) data) & FEC_ALIGNMENT ||
+		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+		memcpy(fep->tx_bounce[index], data, size);
+		data = fep->tx_bounce[index];
+
+		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+			swap_buffer(data, size);
+	}
+
+	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
+					size, DMA_TO_DEVICE);
+	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "Tx DMA memory map failed\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	if (fep->bufdesc_ex) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+		ebdp->cbd_bdu = 0;
+		ebdp->cbd_esc = estatus;
+	}
+
+	/* Handle the last BD specially */
+	if (last_tcp)
+		status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
+	if (is_last) {
+		status |= BD_ENET_TX_INTR;
+		if (fep->bufdesc_ex)
+			ebdp->cbd_esc |= BD_ENET_TX_INT;
+	}
+
+	bdp->cbd_sc = status;
+
+	return 0;
+}
+
+static int
+fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
+			struct bufdesc *bdp, int index)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+	void *bufaddr;
+	unsigned long dmabuf;
+	unsigned short status;
+	unsigned int estatus = 0;
+
+	status = bdp->cbd_sc;
+	status &= ~BD_ENET_TX_STATS;
+	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+
+	bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+	dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
+	if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+		memcpy(fep->tx_bounce[index], skb->data, hdr_len);
+		bufaddr = fep->tx_bounce[index];
+
+		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+			swap_buffer(bufaddr, hdr_len);
+
+		dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
+					hdr_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
+			dev_kfree_skb_any(skb);
+			if (net_ratelimit())
+				netdev_err(ndev, "Tx DMA memory map failed\n");
+			return NETDEV_TX_BUSY;
+		}
+	}
+
+	bdp->cbd_bufaddr = dmabuf;
+	bdp->cbd_datlen = hdr_len;
+
+	if (fep->bufdesc_ex) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+		ebdp->cbd_bdu = 0;
+		ebdp->cbd_esc = estatus;
+	}
+
+	bdp->cbd_sc = status;
+
+	return 0;
+}
+
+static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int total_len, data_left;
+	struct bufdesc *bdp = fep->cur_tx;
+	struct tso_t tso;
+	unsigned int index = 0;
+	int ret;
+
+	if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "NOT enough BD for TSO!\n");
+		return NETDEV_TX_OK;
+	}
+
+	/* Protocol checksum off-load for TCP and UDP. */
+	if (fec_enet_clear_csum(skb, ndev)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	tso_start(skb, &tso);
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		char *hdr;
+
+		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+		ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
+		if (ret)
+			goto err_release;
+
+		while (data_left > 0) {
+			int size;
+
+			size = min_t(int, tso.size, data_left);
+			bdp = fec_enet_get_nextdesc(bdp, fep);
+			index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+			ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
+							size, size == data_left,
+							total_len == 0);
+			if (ret)
+				goto err_release;
+
+			data_left -= size;
+			tso_build_data(skb, &tso, size);
+		}
+
+		bdp = fec_enet_get_nextdesc(bdp, fep);
+	}
+
+	/* Save skb pointer */
+	fep->tx_skbuff[index] = skb;
+
+	fec_enet_submit_work(bdp, fep);
+
+	skb_tx_timestamp(skb);
+	fep->cur_tx = bdp;
 
 	/* Trigger transmission start */
 	writel(0, fep->hwp + FEC_X_DES_ACTIVE);
 
+	return 0;
+
+err_release:
+	/* TODO: Release all used data descriptors for TSO */
+	return ret;
+}
+
+static netdev_tx_t
+fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	int entries_free;
+	int ret;
+
+	if (skb_is_gso(skb))
+		ret = fec_enet_txq_submit_tso(skb, ndev);
+	else
+		ret = fec_enet_txq_submit_skb(skb, ndev);
+	if (ret)
+		return ret;
+
+	entries_free = fec_enet_get_free_txdesc_num(fep);
+	if (entries_free <= fep->tx_stop_threshold)
+		netif_stop_queue(ndev);
+
 	return NETDEV_TX_OK;
 }
 
@@ -756,6 +1077,7 @@
 	unsigned short status;
 	struct	sk_buff	*skb;
 	int	index = 0;
+	int	entries_free;
 
 	fep = netdev_priv(ndev);
 	bdp = fep->dirty_tx;
@@ -769,16 +1091,17 @@
 		if (bdp == fep->cur_tx)
 			break;
 
-		if (fep->bufdesc_ex)
-			index = (struct bufdesc_ex *)bdp -
-				(struct bufdesc_ex *)fep->tx_bd_base;
-		else
-			index = bdp - fep->tx_bd_base;
+		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
 
 		skb = fep->tx_skbuff[index];
-		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len,
-				DMA_TO_DEVICE);
+		if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
+			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+					bdp->cbd_datlen, DMA_TO_DEVICE);
 		bdp->cbd_bufaddr = 0;
+		if (!skb) {
+			bdp = fec_enet_get_nextdesc(bdp, fep);
+			continue;
+		}
 
 		/* Check for errors. */
 		if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -797,7 +1120,7 @@
 				ndev->stats.tx_carrier_errors++;
 		} else {
 			ndev->stats.tx_packets++;
-			ndev->stats.tx_bytes += bdp->cbd_datlen;
+			ndev->stats.tx_bytes += skb->len;
 		}
 
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
@@ -834,15 +1157,15 @@
 
 		/* Since we have freed up a buffer, the ring is no longer full
 		 */
-		if (fep->dirty_tx != fep->cur_tx) {
-			if (netif_queue_stopped(ndev))
+		if (netif_queue_stopped(ndev)) {
+			entries_free = fec_enet_get_free_txdesc_num(fep);
+			if (entries_free >= fep->tx_wake_threshold)
 				netif_wake_queue(ndev);
 		}
 	}
 	return;
 }
 
-
 /* During a receive, the cur_rx points to the current incoming buffer.
  * When we update through the ring, if the next incoming buffer has
  * not been given to the system, we just set the empty indicator,
@@ -920,11 +1243,7 @@
 		pkt_len = bdp->cbd_datlen;
 		ndev->stats.rx_bytes += pkt_len;
 
-		if (fep->bufdesc_ex)
-			index = (struct bufdesc_ex *)bdp -
-				(struct bufdesc_ex *)fep->rx_bd_base;
-		else
-			index = bdp - fep->rx_bd_base;
+		index = fec_enet_get_bd_index(fep->rx_bd_base, bdp, fep);
 		data = fep->rx_skbuff[index]->data;
 		dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
 					FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);
@@ -1255,6 +1574,49 @@
 	return 0;
 }
 
+static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	int ret;
+
+	if (enable) {
+		ret = clk_prepare_enable(fep->clk_ahb);
+		if (ret)
+			return ret;
+		ret = clk_prepare_enable(fep->clk_ipg);
+		if (ret)
+			goto failed_clk_ipg;
+		if (fep->clk_enet_out) {
+			ret = clk_prepare_enable(fep->clk_enet_out);
+			if (ret)
+				goto failed_clk_enet_out;
+		}
+		if (fep->clk_ptp) {
+			ret = clk_prepare_enable(fep->clk_ptp);
+			if (ret)
+				goto failed_clk_ptp;
+		}
+	} else {
+		clk_disable_unprepare(fep->clk_ahb);
+		clk_disable_unprepare(fep->clk_ipg);
+		if (fep->clk_enet_out)
+			clk_disable_unprepare(fep->clk_enet_out);
+		if (fep->clk_ptp)
+			clk_disable_unprepare(fep->clk_ptp);
+	}
+
+	return 0;
+failed_clk_ptp:
+	if (fep->clk_enet_out)
+		clk_disable_unprepare(fep->clk_enet_out);
+failed_clk_enet_out:
+		clk_disable_unprepare(fep->clk_ipg);
+failed_clk_ipg:
+		clk_disable_unprepare(fep->clk_ahb);
+
+	return ret;
+}
+
 static int fec_enet_mii_probe(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1364,7 +1726,7 @@
 	 * Reference Manual has an error on this, and gets fixed on i.MX6Q
 	 * document.
 	 */
-	fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000);
+	fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
 	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
 		fep->phy_speed--;
 	fep->phy_speed <<= 1;
@@ -1773,6 +2135,11 @@
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int ret;
 
+	pinctrl_pm_select_default_state(&fep->pdev->dev);
+	ret = fec_enet_clk_enable(ndev, true);
+	if (ret)
+		return ret;
+
 	/* I should reset the ring buffers here, but I don't yet know
 	 * a simple way to do that.
 	 */
@@ -1811,6 +2178,8 @@
 		phy_disconnect(fep->phy_dev);
 	}
 
+	fec_enet_clk_enable(ndev, false);
+	pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 	fec_enet_free_buffers(ndev);
 
 	return 0;
@@ -1988,13 +2357,35 @@
 	const struct platform_device_id *id_entry =
 				platform_get_device_id(fep->pdev);
 	struct bufdesc *cbd_base;
+	int bd_size;
+
+	/* init the tx & rx ring size */
+	fep->tx_ring_size = TX_RING_SIZE;
+	fep->rx_ring_size = RX_RING_SIZE;
+
+	fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+	fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
+
+	if (fep->bufdesc_ex)
+		fep->bufdesc_size = sizeof(struct bufdesc_ex);
+	else
+		fep->bufdesc_size = sizeof(struct bufdesc);
+	bd_size = (fep->tx_ring_size + fep->rx_ring_size) *
+			fep->bufdesc_size;
 
 	/* Allocate memory for buffer descriptors. */
-	cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma,
+	cbd_base = dma_alloc_coherent(NULL, bd_size, &fep->bd_dma,
 				      GFP_KERNEL);
 	if (!cbd_base)
 		return -ENOMEM;
 
+	fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
+						&fep->tso_hdrs_dma, GFP_KERNEL);
+	if (!fep->tso_hdrs) {
+		dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
+		return -ENOMEM;
+	}
+
 	memset(cbd_base, 0, PAGE_SIZE);
 
 	fep->netdev = ndev;
@@ -2004,10 +2395,6 @@
 	/* make sure MAC we just acquired is programmed into the hw */
 	fec_set_mac_address(ndev, NULL);
 
-	/* init the tx & rx ring size */
-	fep->tx_ring_size = TX_RING_SIZE;
-	fep->rx_ring_size = RX_RING_SIZE;
-
 	/* Set receive and transmit descriptor base. */
 	fep->rx_bd_base = cbd_base;
 	if (fep->bufdesc_ex)
@@ -2024,21 +2411,21 @@
 	writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
 	netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);
 
-	if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) {
+	if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN)
 		/* enable hw VLAN support */
 		ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
-		ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
-	}
 
 	if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
+		ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
+
 		/* enable hw accelerator */
 		ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-				| NETIF_F_RXCSUM);
-		ndev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-				| NETIF_F_RXCSUM);
+				| NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
 		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
 	}
 
+	ndev->hw_features = ndev->features;
+
 	fec_restart(ndev, 0);
 
 	return 0;
@@ -2114,6 +2501,9 @@
 		fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG;
 #endif
 
+	/* Select default pin state */
+	pinctrl_pm_select_default_state(&pdev->dev);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	fep->hwp = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(fep->hwp)) {
@@ -2164,26 +2554,10 @@
 		fep->bufdesc_ex = 0;
 	}
 
-	ret = clk_prepare_enable(fep->clk_ahb);
+	ret = fec_enet_clk_enable(ndev, true);
 	if (ret)
 		goto failed_clk;
 
-	ret = clk_prepare_enable(fep->clk_ipg);
-	if (ret)
-		goto failed_clk_ipg;
-
-	if (fep->clk_enet_out) {
-		ret = clk_prepare_enable(fep->clk_enet_out);
-		if (ret)
-			goto failed_clk_enet_out;
-	}
-
-	if (fep->clk_ptp) {
-		ret = clk_prepare_enable(fep->clk_ptp);
-		if (ret)
-			goto failed_clk_ptp;
-	}
-
 	fep->reg_phy = devm_regulator_get(&pdev->dev, "phy");
 	if (!IS_ERR(fep->reg_phy)) {
 		ret = regulator_enable(fep->reg_phy);
@@ -2225,6 +2599,8 @@
 
 	/* Carrier starts down, phylib will bring it up */
 	netif_carrier_off(ndev);
+	fec_enet_clk_enable(ndev, false);
+	pinctrl_pm_select_sleep_state(&pdev->dev);
 
 	ret = register_netdev(ndev);
 	if (ret)
@@ -2244,15 +2620,7 @@
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
 failed_regulator:
-	if (fep->clk_ptp)
-		clk_disable_unprepare(fep->clk_ptp);
-failed_clk_ptp:
-	if (fep->clk_enet_out)
-		clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-	clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
-	clk_disable_unprepare(fep->clk_ahb);
+	fec_enet_clk_enable(ndev, false);
 failed_clk:
 failed_ioremap:
 	free_netdev(ndev);
@@ -2272,14 +2640,9 @@
 	del_timer_sync(&fep->time_keep);
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
-	if (fep->clk_ptp)
-		clk_disable_unprepare(fep->clk_ptp);
 	if (fep->ptp_clock)
 		ptp_clock_unregister(fep->ptp_clock);
-	if (fep->clk_enet_out)
-		clk_disable_unprepare(fep->clk_enet_out);
-	clk_disable_unprepare(fep->clk_ipg);
-	clk_disable_unprepare(fep->clk_ahb);
+	fec_enet_clk_enable(ndev, false);
 	free_netdev(ndev);
 
 	return 0;
@@ -2296,12 +2659,8 @@
 		fec_stop(ndev);
 		netif_device_detach(ndev);
 	}
-	if (fep->clk_ptp)
-		clk_disable_unprepare(fep->clk_ptp);
-	if (fep->clk_enet_out)
-		clk_disable_unprepare(fep->clk_enet_out);
-	clk_disable_unprepare(fep->clk_ipg);
-	clk_disable_unprepare(fep->clk_ahb);
+	fec_enet_clk_enable(ndev, false);
+	pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
@@ -2322,25 +2681,10 @@
 			return ret;
 	}
 
-	ret = clk_prepare_enable(fep->clk_ahb);
+	pinctrl_pm_select_default_state(&fep->pdev->dev);
+	ret = fec_enet_clk_enable(ndev, true);
 	if (ret)
-		goto failed_clk_ahb;
-
-	ret = clk_prepare_enable(fep->clk_ipg);
-	if (ret)
-		goto failed_clk_ipg;
-
-	if (fep->clk_enet_out) {
-		ret = clk_prepare_enable(fep->clk_enet_out);
-		if (ret)
-			goto failed_clk_enet_out;
-	}
-
-	if (fep->clk_ptp) {
-		ret = clk_prepare_enable(fep->clk_ptp);
-		if (ret)
-			goto failed_clk_ptp;
-	}
+		goto failed_clk;
 
 	if (netif_running(ndev)) {
 		fec_restart(ndev, fep->full_duplex);
@@ -2349,14 +2693,7 @@
 
 	return 0;
 
-failed_clk_ptp:
-	if (fep->clk_enet_out)
-		clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-	clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
-	clk_disable_unprepare(fep->clk_ahb);
-failed_clk_ahb:
+failed_clk:
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
 	return ret;

diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index dc80db4..cfaf17b 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c

@@ -792,10 +792,6 @@
 	phydev = of_phy_connect(dev, fep->fpi->phy_node, &fs_adjust_link, 0,
 				iface);
 	if (!phydev) {
-		phydev = of_phy_connect_fixed_link(dev, &fs_adjust_link,
-						   iface);
-	}
-	if (!phydev) {
 		dev_err(&dev->dev, "Could not attach to PHY\n");
 		return -ENODEV;
 	}
@@ -1029,9 +1025,16 @@
 	fpi->use_napi = 1;
 	fpi->napi_weight = 17;
 	fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
-	if ((!fpi->phy_node) && (!of_get_property(ofdev->dev.of_node, "fixed-link",
-						  NULL)))
-		goto out_free_fpi;
+	if (!fpi->phy_node && of_phy_is_fixed_link(ofdev->dev.of_node)) {
+		err = of_phy_register_fixed_link(ofdev->dev.of_node);
+		if (err)
+			goto out_free_fpi;
+
+		/* In the case of a fixed PHY, the DT node associated
+		 * to the PHY is the Ethernet MAC DT node.
+		 */
+		fpi->phy_node = ofdev->dev.of_node;
+	}
 
 	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec")) {
 		phy_connection_type = of_get_property(ofdev->dev.of_node,

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index ee6ddbd..a6cf40e 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c

@@ -889,6 +889,17 @@
 
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
 
+	/* In the case of a fixed PHY, the DT node associated
+	 * to the PHY is the Ethernet MAC DT node.
+	 */
+	if (of_phy_is_fixed_link(np)) {
+		err = of_phy_register_fixed_link(np);
+		if (err)
+			goto err_grp_init;
+
+		priv->phy_node = np;
+	}
+
 	/* Find the TBI PHY.  If it's not there, we don't support SGMII */
 	priv->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
 
@@ -1231,7 +1242,7 @@
 		gfar_write_isrg(priv);
 }
 
-static void __init gfar_init_addr_hash_table(struct gfar_private *priv)
+static void gfar_init_addr_hash_table(struct gfar_private *priv)
 {
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 
@@ -1373,6 +1384,9 @@
 
 	gfar_hw_init(priv);
 
+	/* Carrier starts down, phylib will bring it up */
+	netif_carrier_off(dev);
+
 	err = register_netdev(dev);
 
 	if (err) {
@@ -1380,9 +1394,6 @@
 		goto register_fail;
 	}
 
-	/* Carrier starts down, phylib will bring it up */
-	netif_carrier_off(dev);
-
 	device_init_wakeup(&dev->dev,
 			   priv->device_flags &
 			   FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
@@ -1660,9 +1671,6 @@
 
 	priv->phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
 				      interface);
-	if (!priv->phydev)
-		priv->phydev = of_phy_connect_fixed_link(dev, &adjust_link,
-							 interface);
 	if (!priv->phydev) {
 		dev_err(&dev->dev, "could not attach to PHY\n");
 		return -ENODEV;

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index c8299c3..fab39e2 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c

@@ -1728,9 +1728,6 @@
 
 	phydev = of_phy_connect(dev, ug_info->phy_node, &adjust_link, 0,
 				priv->phy_interface);
-	if (!phydev)
-		phydev = of_phy_connect_fixed_link(dev, &adjust_link,
-						   priv->phy_interface);
 	if (!phydev) {
 		dev_err(&dev->dev, "Could not attach to PHY\n");
 		return -ENODEV;
@@ -3790,6 +3787,17 @@
 	ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
 
 	ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!ug_info->phy_node) {
+		/* In the case of a fixed PHY, the DT node associated
+		 * to the PHY is the Ethernet MAC DT node.
+		 */
+		if (of_phy_is_fixed_link(np)) {
+			err = of_phy_register_fixed_link(np);
+			if (err)
+				return err;
+		}
+		ug_info->phy_node = np;
+	}
 
 	/* Find the TBI PHY node.  If it's not there, we don't support SGMII */
 	ug_info->tbi_node = of_parse_phandle(np, "tbi-handle", 0);

diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 413329e..cc83350 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c

@@ -417,5 +417,5 @@
 
 void uec_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops);
+	netdev->ethtool_ops = &uec_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index d449fcb..0c9d55c 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c

@@ -162,7 +162,9 @@
 
 	/* Return all Fs if nothing was there */
 	if (in_be32(&regs->mdio_stat) & MDIO_STAT_RD_ER) {
-		dev_err(&bus->dev, "MDIO read error\n");
+		dev_err(&bus->dev,
+			"Error while reading PHY%d reg at %d.%d\n",
+			phy_id, dev_addr, regnum);
 		return 0xffff;
 	}
 

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 7becab1..cfe7a74 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c

@@ -256,7 +256,7 @@
     dev->netdev_ops = &fjn_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
 
     return fmvj18x_config(link);
 } /* fmvj18x_attach */

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
new file mode 100644
index 0000000..e942173
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/Kconfig

@@ -0,0 +1,27 @@
+#
+# HISILICON device configuration
+#
+
+config NET_VENDOR_HISILICON
+	bool "Hisilicon devices"
+	default y
+	depends on ARM
+	---help---
+	  If you have a network (Ethernet) card belonging to this class, say Y
+	  and read the Ethernet-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Hisilicon devices. If you say Y, you will be asked
+	  for your specific card in the following questions.
+
+if NET_VENDOR_HISILICON
+
+config HIX5HD2_GMAC
+	tristate "Hisilicon HIX5HD2 Family Network Device Support"
+	select PHYLIB
+	help
+	  This selects the hix5hd2 mac family network device.
+
+endif # NET_VENDOR_HISILICON

diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
new file mode 100644
index 0000000..9175e846
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/Makefile

@@ -0,0 +1,5 @@
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o

diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
new file mode 100644
index 0000000..0ffdcd3
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c

@@ -0,0 +1,1066 @@
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/clk.h>
+#include <linux/circ_buf.h>
+
+#define STATION_ADDR_LOW		0x0000
+#define STATION_ADDR_HIGH		0x0004
+#define MAC_DUPLEX_HALF_CTRL		0x0008
+#define MAX_FRM_SIZE			0x003c
+#define PORT_MODE			0x0040
+#define PORT_EN				0x0044
+#define BITS_TX_EN			BIT(2)
+#define BITS_RX_EN			BIT(1)
+#define REC_FILT_CONTROL		0x0064
+#define BIT_CRC_ERR_PASS		BIT(5)
+#define BIT_PAUSE_FRM_PASS		BIT(4)
+#define BIT_VLAN_DROP_EN		BIT(3)
+#define BIT_BC_DROP_EN			BIT(2)
+#define BIT_MC_MATCH_EN			BIT(1)
+#define BIT_UC_MATCH_EN			BIT(0)
+#define PORT_MC_ADDR_LOW		0x0068
+#define PORT_MC_ADDR_HIGH		0x006C
+#define CF_CRC_STRIP			0x01b0
+#define MODE_CHANGE_EN			0x01b4
+#define BIT_MODE_CHANGE_EN		BIT(0)
+#define COL_SLOT_TIME			0x01c0
+#define RECV_CONTROL			0x01e0
+#define BIT_STRIP_PAD_EN		BIT(3)
+#define BIT_RUNT_PKT_EN			BIT(4)
+#define CONTROL_WORD			0x0214
+#define MDIO_SINGLE_CMD			0x03c0
+#define MDIO_SINGLE_DATA		0x03c4
+#define MDIO_CTRL			0x03cc
+#define MDIO_RDATA_STATUS		0x03d0
+
+#define MDIO_START			BIT(20)
+#define MDIO_R_VALID			BIT(0)
+#define MDIO_READ			(BIT(17) | MDIO_START)
+#define MDIO_WRITE			(BIT(16) | MDIO_START)
+
+#define RX_FQ_START_ADDR		0x0500
+#define RX_FQ_DEPTH			0x0504
+#define RX_FQ_WR_ADDR			0x0508
+#define RX_FQ_RD_ADDR			0x050c
+#define RX_FQ_VLDDESC_CNT		0x0510
+#define RX_FQ_ALEMPTY_TH		0x0514
+#define RX_FQ_REG_EN			0x0518
+#define BITS_RX_FQ_START_ADDR_EN	BIT(2)
+#define BITS_RX_FQ_DEPTH_EN		BIT(1)
+#define BITS_RX_FQ_RD_ADDR_EN		BIT(0)
+#define RX_FQ_ALFULL_TH			0x051c
+#define RX_BQ_START_ADDR		0x0520
+#define RX_BQ_DEPTH			0x0524
+#define RX_BQ_WR_ADDR			0x0528
+#define RX_BQ_RD_ADDR			0x052c
+#define RX_BQ_FREE_DESC_CNT		0x0530
+#define RX_BQ_ALEMPTY_TH		0x0534
+#define RX_BQ_REG_EN			0x0538
+#define BITS_RX_BQ_START_ADDR_EN	BIT(2)
+#define BITS_RX_BQ_DEPTH_EN		BIT(1)
+#define BITS_RX_BQ_WR_ADDR_EN		BIT(0)
+#define RX_BQ_ALFULL_TH			0x053c
+#define TX_BQ_START_ADDR		0x0580
+#define TX_BQ_DEPTH			0x0584
+#define TX_BQ_WR_ADDR			0x0588
+#define TX_BQ_RD_ADDR			0x058c
+#define TX_BQ_VLDDESC_CNT		0x0590
+#define TX_BQ_ALEMPTY_TH		0x0594
+#define TX_BQ_REG_EN			0x0598
+#define BITS_TX_BQ_START_ADDR_EN	BIT(2)
+#define BITS_TX_BQ_DEPTH_EN		BIT(1)
+#define BITS_TX_BQ_RD_ADDR_EN		BIT(0)
+#define TX_BQ_ALFULL_TH			0x059c
+#define TX_RQ_START_ADDR		0x05a0
+#define TX_RQ_DEPTH			0x05a4
+#define TX_RQ_WR_ADDR			0x05a8
+#define TX_RQ_RD_ADDR			0x05ac
+#define TX_RQ_FREE_DESC_CNT		0x05b0
+#define TX_RQ_ALEMPTY_TH		0x05b4
+#define TX_RQ_REG_EN			0x05b8
+#define BITS_TX_RQ_START_ADDR_EN	BIT(2)
+#define BITS_TX_RQ_DEPTH_EN		BIT(1)
+#define BITS_TX_RQ_WR_ADDR_EN		BIT(0)
+#define TX_RQ_ALFULL_TH			0x05bc
+#define RAW_PMU_INT			0x05c0
+#define ENA_PMU_INT			0x05c4
+#define STATUS_PMU_INT			0x05c8
+#define MAC_FIFO_ERR_IN			BIT(30)
+#define TX_RQ_IN_TIMEOUT_INT		BIT(29)
+#define RX_BQ_IN_TIMEOUT_INT		BIT(28)
+#define TXOUTCFF_FULL_INT		BIT(27)
+#define TXOUTCFF_EMPTY_INT		BIT(26)
+#define TXCFF_FULL_INT			BIT(25)
+#define TXCFF_EMPTY_INT			BIT(24)
+#define RXOUTCFF_FULL_INT		BIT(23)
+#define RXOUTCFF_EMPTY_INT		BIT(22)
+#define RXCFF_FULL_INT			BIT(21)
+#define RXCFF_EMPTY_INT			BIT(20)
+#define TX_RQ_IN_INT			BIT(19)
+#define TX_BQ_OUT_INT			BIT(18)
+#define RX_BQ_IN_INT			BIT(17)
+#define RX_FQ_OUT_INT			BIT(16)
+#define TX_RQ_EMPTY_INT			BIT(15)
+#define TX_RQ_FULL_INT			BIT(14)
+#define TX_RQ_ALEMPTY_INT		BIT(13)
+#define TX_RQ_ALFULL_INT		BIT(12)
+#define TX_BQ_EMPTY_INT			BIT(11)
+#define TX_BQ_FULL_INT			BIT(10)
+#define TX_BQ_ALEMPTY_INT		BIT(9)
+#define TX_BQ_ALFULL_INT		BIT(8)
+#define RX_BQ_EMPTY_INT			BIT(7)
+#define RX_BQ_FULL_INT			BIT(6)
+#define RX_BQ_ALEMPTY_INT		BIT(5)
+#define RX_BQ_ALFULL_INT		BIT(4)
+#define RX_FQ_EMPTY_INT			BIT(3)
+#define RX_FQ_FULL_INT			BIT(2)
+#define RX_FQ_ALEMPTY_INT		BIT(1)
+#define RX_FQ_ALFULL_INT		BIT(0)
+
+#define DEF_INT_MASK			(RX_BQ_IN_INT | RX_BQ_IN_TIMEOUT_INT | \
+					TX_RQ_IN_INT | TX_RQ_IN_TIMEOUT_INT)
+
+#define DESC_WR_RD_ENA			0x05cc
+#define IN_QUEUE_TH			0x05d8
+#define OUT_QUEUE_TH			0x05dc
+#define QUEUE_TX_BQ_SHIFT		16
+#define RX_BQ_IN_TIMEOUT_TH		0x05e0
+#define TX_RQ_IN_TIMEOUT_TH		0x05e4
+#define STOP_CMD			0x05e8
+#define BITS_TX_STOP			BIT(1)
+#define BITS_RX_STOP			BIT(0)
+#define FLUSH_CMD			0x05eC
+#define BITS_TX_FLUSH_CMD		BIT(5)
+#define BITS_RX_FLUSH_CMD		BIT(4)
+#define BITS_TX_FLUSH_FLAG_DOWN		BIT(3)
+#define BITS_TX_FLUSH_FLAG_UP		BIT(2)
+#define BITS_RX_FLUSH_FLAG_DOWN		BIT(1)
+#define BITS_RX_FLUSH_FLAG_UP		BIT(0)
+#define RX_CFF_NUM_REG			0x05f0
+#define PMU_FSM_REG			0x05f8
+#define RX_FIFO_PKT_IN_NUM		0x05fc
+#define RX_FIFO_PKT_OUT_NUM		0x0600
+
+#define RGMII_SPEED_1000		0x2c
+#define RGMII_SPEED_100			0x2f
+#define RGMII_SPEED_10			0x2d
+#define MII_SPEED_100			0x0f
+#define MII_SPEED_10			0x0d
+#define GMAC_SPEED_1000			0x05
+#define GMAC_SPEED_100			0x01
+#define GMAC_SPEED_10			0x00
+#define GMAC_FULL_DUPLEX		BIT(4)
+
+#define RX_BQ_INT_THRESHOLD		0x01
+#define TX_RQ_INT_THRESHOLD		0x01
+#define RX_BQ_IN_TIMEOUT		0x10000
+#define TX_RQ_IN_TIMEOUT		0x50000
+
+#define MAC_MAX_FRAME_SIZE		1600
+#define DESC_SIZE			32
+#define RX_DESC_NUM			1024
+#define TX_DESC_NUM			1024
+
+#define DESC_VLD_FREE			0
+#define DESC_VLD_BUSY			0x80000000
+#define DESC_FL_MID			0
+#define DESC_FL_LAST			0x20000000
+#define DESC_FL_FIRST			0x40000000
+#define DESC_FL_FULL			0x60000000
+#define DESC_DATA_LEN_OFF		16
+#define DESC_BUFF_LEN_OFF		0
+#define DESC_DATA_MASK			0x7ff
+
+/* DMA descriptor ring helpers */
+#define dma_ring_incr(n, s)		(((n) + 1) & ((s) - 1))
+#define dma_cnt(n)			((n) >> 5)
+#define dma_byte(n)			((n) << 5)
+
+struct hix5hd2_desc {
+	__le32 buff_addr;
+	__le32 cmd;
+} __aligned(32);
+
+struct hix5hd2_desc_sw {
+	struct hix5hd2_desc *desc;
+	dma_addr_t	phys_addr;
+	unsigned int	count;
+	unsigned int	size;
+};
+
+#define QUEUE_NUMS	4
+struct hix5hd2_priv {
+	struct hix5hd2_desc_sw pool[QUEUE_NUMS];
+#define rx_fq		pool[0]
+#define rx_bq		pool[1]
+#define tx_bq		pool[2]
+#define tx_rq		pool[3]
+
+	void __iomem *base;
+	void __iomem *ctrl_base;
+
+	struct sk_buff *tx_skb[TX_DESC_NUM];
+	struct sk_buff *rx_skb[RX_DESC_NUM];
+
+	struct device *dev;
+	struct net_device *netdev;
+
+	struct phy_device *phy;
+	struct device_node *phy_node;
+	phy_interface_t	phy_mode;
+
+	unsigned int speed;
+	unsigned int duplex;
+
+	struct clk *clk;
+	struct mii_bus *bus;
+	struct napi_struct napi;
+	struct work_struct tx_timeout_task;
+};
+
+static void hix5hd2_config_port(struct net_device *dev, u32 speed, u32 duplex)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	u32 val;
+
+	priv->speed = speed;
+	priv->duplex = duplex;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_RGMII:
+		if (speed == SPEED_1000)
+			val = RGMII_SPEED_1000;
+		else if (speed == SPEED_100)
+			val = RGMII_SPEED_100;
+		else
+			val = RGMII_SPEED_10;
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		if (speed == SPEED_100)
+			val = MII_SPEED_100;
+		else
+			val = MII_SPEED_10;
+		break;
+	default:
+		netdev_warn(dev, "not supported mode\n");
+		val = MII_SPEED_10;
+		break;
+	}
+
+	if (duplex)
+		val |= GMAC_FULL_DUPLEX;
+	writel_relaxed(val, priv->ctrl_base);
+
+	writel_relaxed(BIT_MODE_CHANGE_EN, priv->base + MODE_CHANGE_EN);
+	if (speed == SPEED_1000)
+		val = GMAC_SPEED_1000;
+	else if (speed == SPEED_100)
+		val = GMAC_SPEED_100;
+	else
+		val = GMAC_SPEED_10;
+	writel_relaxed(val, priv->base + PORT_MODE);
+	writel_relaxed(0, priv->base + MODE_CHANGE_EN);
+	writel_relaxed(duplex, priv->base + MAC_DUPLEX_HALF_CTRL);
+}
+
+static void hix5hd2_set_desc_depth(struct hix5hd2_priv *priv, int rx, int tx)
+{
+	writel_relaxed(BITS_RX_FQ_DEPTH_EN, priv->base + RX_FQ_REG_EN);
+	writel_relaxed(rx << 3, priv->base + RX_FQ_DEPTH);
+	writel_relaxed(0, priv->base + RX_FQ_REG_EN);
+
+	writel_relaxed(BITS_RX_BQ_DEPTH_EN, priv->base + RX_BQ_REG_EN);
+	writel_relaxed(rx << 3, priv->base + RX_BQ_DEPTH);
+	writel_relaxed(0, priv->base + RX_BQ_REG_EN);
+
+	writel_relaxed(BITS_TX_BQ_DEPTH_EN, priv->base + TX_BQ_REG_EN);
+	writel_relaxed(tx << 3, priv->base + TX_BQ_DEPTH);
+	writel_relaxed(0, priv->base + TX_BQ_REG_EN);
+
+	writel_relaxed(BITS_TX_RQ_DEPTH_EN, priv->base + TX_RQ_REG_EN);
+	writel_relaxed(tx << 3, priv->base + TX_RQ_DEPTH);
+	writel_relaxed(0, priv->base + TX_RQ_REG_EN);
+}
+
+static void hix5hd2_set_rx_fq(struct hix5hd2_priv *priv, dma_addr_t phy_addr)
+{
+	writel_relaxed(BITS_RX_FQ_START_ADDR_EN, priv->base + RX_FQ_REG_EN);
+	writel_relaxed(phy_addr, priv->base + RX_FQ_START_ADDR);
+	writel_relaxed(0, priv->base + RX_FQ_REG_EN);
+}
+
+static void hix5hd2_set_rx_bq(struct hix5hd2_priv *priv, dma_addr_t phy_addr)
+{
+	writel_relaxed(BITS_RX_BQ_START_ADDR_EN, priv->base + RX_BQ_REG_EN);
+	writel_relaxed(phy_addr, priv->base + RX_BQ_START_ADDR);
+	writel_relaxed(0, priv->base + RX_BQ_REG_EN);
+}
+
+static void hix5hd2_set_tx_bq(struct hix5hd2_priv *priv, dma_addr_t phy_addr)
+{
+	writel_relaxed(BITS_TX_BQ_START_ADDR_EN, priv->base + TX_BQ_REG_EN);
+	writel_relaxed(phy_addr, priv->base + TX_BQ_START_ADDR);
+	writel_relaxed(0, priv->base + TX_BQ_REG_EN);
+}
+
+static void hix5hd2_set_tx_rq(struct hix5hd2_priv *priv, dma_addr_t phy_addr)
+{
+	writel_relaxed(BITS_TX_RQ_START_ADDR_EN, priv->base + TX_RQ_REG_EN);
+	writel_relaxed(phy_addr, priv->base + TX_RQ_START_ADDR);
+	writel_relaxed(0, priv->base + TX_RQ_REG_EN);
+}
+
+static void hix5hd2_set_desc_addr(struct hix5hd2_priv *priv)
+{
+	hix5hd2_set_rx_fq(priv, priv->rx_fq.phys_addr);
+	hix5hd2_set_rx_bq(priv, priv->rx_bq.phys_addr);
+	hix5hd2_set_tx_rq(priv, priv->tx_rq.phys_addr);
+	hix5hd2_set_tx_bq(priv, priv->tx_bq.phys_addr);
+}
+
+static void hix5hd2_hw_init(struct hix5hd2_priv *priv)
+{
+	u32 val;
+
+	/* disable and clear all interrupts */
+	writel_relaxed(0, priv->base + ENA_PMU_INT);
+	writel_relaxed(~0, priv->base + RAW_PMU_INT);
+
+	writel_relaxed(BIT_CRC_ERR_PASS, priv->base + REC_FILT_CONTROL);
+	writel_relaxed(MAC_MAX_FRAME_SIZE, priv->base + CONTROL_WORD);
+	writel_relaxed(0, priv->base + COL_SLOT_TIME);
+
+	val = RX_BQ_INT_THRESHOLD | TX_RQ_INT_THRESHOLD << QUEUE_TX_BQ_SHIFT;
+	writel_relaxed(val, priv->base + IN_QUEUE_TH);
+
+	writel_relaxed(RX_BQ_IN_TIMEOUT, priv->base + RX_BQ_IN_TIMEOUT_TH);
+	writel_relaxed(TX_RQ_IN_TIMEOUT, priv->base + TX_RQ_IN_TIMEOUT_TH);
+
+	hix5hd2_set_desc_depth(priv, RX_DESC_NUM, TX_DESC_NUM);
+	hix5hd2_set_desc_addr(priv);
+}
+
+static void hix5hd2_irq_enable(struct hix5hd2_priv *priv)
+{
+	writel_relaxed(DEF_INT_MASK, priv->base + ENA_PMU_INT);
+}
+
+static void hix5hd2_irq_disable(struct hix5hd2_priv *priv)
+{
+	writel_relaxed(0, priv->base + ENA_PMU_INT);
+}
+
+static void hix5hd2_port_enable(struct hix5hd2_priv *priv)
+{
+	writel_relaxed(0xf, priv->base + DESC_WR_RD_ENA);
+	writel_relaxed(BITS_RX_EN | BITS_TX_EN, priv->base + PORT_EN);
+}
+
+static void hix5hd2_port_disable(struct hix5hd2_priv *priv)
+{
+	writel_relaxed(~(BITS_RX_EN | BITS_TX_EN), priv->base + PORT_EN);
+	writel_relaxed(0, priv->base + DESC_WR_RD_ENA);
+}
+
+static void hix5hd2_hw_set_mac_addr(struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	unsigned char *mac = dev->dev_addr;
+	u32 val;
+
+	val = mac[1] | (mac[0] << 8);
+	writel_relaxed(val, priv->base + STATION_ADDR_HIGH);
+
+	val = mac[5] | (mac[4] << 8) | (mac[3] << 16) | (mac[2] << 24);
+	writel_relaxed(val, priv->base + STATION_ADDR_LOW);
+}
+
+static int hix5hd2_net_set_mac_address(struct net_device *dev, void *p)
+{
+	int ret;
+
+	ret = eth_mac_addr(dev, p);
+	if (!ret)
+		hix5hd2_hw_set_mac_addr(dev);
+
+	return ret;
+}
+
+static void hix5hd2_adjust_link(struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	struct phy_device *phy = priv->phy;
+
+	if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
+		hix5hd2_config_port(dev, phy->speed, phy->duplex);
+		phy_print_status(phy);
+	}
+}
+
+static void hix5hd2_rx_refill(struct hix5hd2_priv *priv)
+{
+	struct hix5hd2_desc *desc;
+	struct sk_buff *skb;
+	u32 start, end, num, pos, i;
+	u32 len = MAC_MAX_FRAME_SIZE;
+	dma_addr_t addr;
+
+	/* software write pointer */
+	start = dma_cnt(readl_relaxed(priv->base + RX_FQ_WR_ADDR));
+	/* logic read pointer */
+	end = dma_cnt(readl_relaxed(priv->base + RX_FQ_RD_ADDR));
+	num = CIRC_SPACE(start, end, RX_DESC_NUM);
+
+	for (i = 0, pos = start; i < num; i++) {
+		if (priv->rx_skb[pos]) {
+			break;
+		} else {
+			skb = netdev_alloc_skb_ip_align(priv->netdev, len);
+			if (unlikely(skb == NULL))
+				break;
+		}
+
+		addr = dma_map_single(priv->dev, skb->data, len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(priv->dev, addr)) {
+			dev_kfree_skb_any(skb);
+			break;
+		}
+
+		desc = priv->rx_fq.desc + pos;
+		desc->buff_addr = cpu_to_le32(addr);
+		priv->rx_skb[pos] = skb;
+		desc->cmd = cpu_to_le32(DESC_VLD_FREE |
+					(len - 1) << DESC_BUFF_LEN_OFF);
+		pos = dma_ring_incr(pos, RX_DESC_NUM);
+	}
+
+	/* ensure desc updated */
+	wmb();
+
+	if (pos != start)
+		writel_relaxed(dma_byte(pos), priv->base + RX_FQ_WR_ADDR);
+}
+
+static int hix5hd2_rx(struct net_device *dev, int limit)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct hix5hd2_desc *desc;
+	dma_addr_t addr;
+	u32 start, end, num, pos, i, len;
+
+	/* software read pointer */
+	start = dma_cnt(readl_relaxed(priv->base + RX_BQ_RD_ADDR));
+	/* logic write pointer */
+	end = dma_cnt(readl_relaxed(priv->base + RX_BQ_WR_ADDR));
+	num = CIRC_CNT(end, start, RX_DESC_NUM);
+	if (num > limit)
+		num = limit;
+
+	/* ensure get updated desc */
+	rmb();
+	for (i = 0, pos = start; i < num; i++) {
+		skb = priv->rx_skb[pos];
+		if (unlikely(!skb)) {
+			netdev_err(dev, "inconsistent rx_skb\n");
+			break;
+		}
+		priv->rx_skb[pos] = NULL;
+
+		desc = priv->rx_bq.desc + pos;
+		len = (le32_to_cpu(desc->cmd) >> DESC_DATA_LEN_OFF) &
+		       DESC_DATA_MASK;
+		addr = le32_to_cpu(desc->buff_addr);
+		dma_unmap_single(priv->dev, addr, MAC_MAX_FRAME_SIZE,
+				 DMA_FROM_DEVICE);
+
+		skb_put(skb, len);
+		if (skb->len > MAC_MAX_FRAME_SIZE) {
+			netdev_err(dev, "rcv len err, len = %d\n", skb->len);
+			dev->stats.rx_errors++;
+			dev->stats.rx_length_errors++;
+			dev_kfree_skb_any(skb);
+			goto next;
+		}
+
+		skb->protocol = eth_type_trans(skb, dev);
+		napi_gro_receive(&priv->napi, skb);
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+		dev->last_rx = jiffies;
+next:
+		pos = dma_ring_incr(pos, RX_DESC_NUM);
+	}
+
+	if (pos != start)
+		writel_relaxed(dma_byte(pos), priv->base + RX_BQ_RD_ADDR);
+
+	hix5hd2_rx_refill(priv);
+
+	return num;
+}
+
+static void hix5hd2_xmit_reclaim(struct net_device *dev)
+{
+	struct sk_buff *skb;
+	struct hix5hd2_desc *desc;
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+	u32 start, end, num, pos, i;
+	dma_addr_t addr;
+
+	netif_tx_lock(dev);
+
+	/* software read */
+	start = dma_cnt(readl_relaxed(priv->base + TX_RQ_RD_ADDR));
+	/* logic write */
+	end = dma_cnt(readl_relaxed(priv->base + TX_RQ_WR_ADDR));
+	num = CIRC_CNT(end, start, TX_DESC_NUM);
+
+	for (i = 0, pos = start; i < num; i++) {
+		skb = priv->tx_skb[pos];
+		if (unlikely(!skb)) {
+			netdev_err(dev, "inconsistent tx_skb\n");
+			break;
+		}
+
+		pkts_compl++;
+		bytes_compl += skb->len;
+		desc = priv->tx_rq.desc + pos;
+		addr = le32_to_cpu(desc->buff_addr);
+		dma_unmap_single(priv->dev, addr, skb->len, DMA_TO_DEVICE);
+		priv->tx_skb[pos] = NULL;
+		dev_consume_skb_any(skb);
+		pos = dma_ring_incr(pos, TX_DESC_NUM);
+	}
+
+	if (pos != start)
+		writel_relaxed(dma_byte(pos), priv->base + TX_RQ_RD_ADDR);
+
+	netif_tx_unlock(dev);
+
+	if (pkts_compl || bytes_compl)
+		netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+	if (unlikely(netif_queue_stopped(priv->netdev)) && pkts_compl)
+		netif_wake_queue(priv->netdev);
+}
+
+static int hix5hd2_poll(struct napi_struct *napi, int budget)
+{
+	struct hix5hd2_priv *priv = container_of(napi,
+				struct hix5hd2_priv, napi);
+	struct net_device *dev = priv->netdev;
+	int work_done = 0, task = budget;
+	int ints, num;
+
+	do {
+		hix5hd2_xmit_reclaim(dev);
+		num = hix5hd2_rx(dev, task);
+		work_done += num;
+		task -= num;
+		if ((work_done >= budget) || (num == 0))
+			break;
+
+		ints = readl_relaxed(priv->base + RAW_PMU_INT);
+		writel_relaxed(ints, priv->base + RAW_PMU_INT);
+	} while (ints & DEF_INT_MASK);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		hix5hd2_irq_enable(priv);
+	}
+
+	return work_done;
+}
+
+static irqreturn_t hix5hd2_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	int ints = readl_relaxed(priv->base + RAW_PMU_INT);
+
+	writel_relaxed(ints, priv->base + RAW_PMU_INT);
+	if (likely(ints & DEF_INT_MASK)) {
+		hix5hd2_irq_disable(priv);
+		napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	struct hix5hd2_desc *desc;
+	dma_addr_t addr;
+	u32 pos;
+
+	/* software write pointer */
+	pos = dma_cnt(readl_relaxed(priv->base + TX_BQ_WR_ADDR));
+	if (unlikely(priv->tx_skb[pos])) {
+		dev->stats.tx_dropped++;
+		dev->stats.tx_fifo_errors++;
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	addr = dma_map_single(priv->dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(priv->dev, addr)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	desc = priv->tx_bq.desc + pos;
+	desc->buff_addr = cpu_to_le32(addr);
+	priv->tx_skb[pos] = skb;
+	desc->cmd = cpu_to_le32(DESC_VLD_BUSY | DESC_FL_FULL |
+				(skb->len & DESC_DATA_MASK) << DESC_DATA_LEN_OFF |
+				(skb->len & DESC_DATA_MASK) << DESC_BUFF_LEN_OFF);
+
+	/* ensure desc updated */
+	wmb();
+
+	pos = dma_ring_incr(pos, TX_DESC_NUM);
+	writel_relaxed(dma_byte(pos), priv->base + TX_BQ_WR_ADDR);
+
+	dev->trans_start = jiffies;
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	netdev_sent_queue(dev, skb->len);
+
+	return NETDEV_TX_OK;
+}
+
+static void hix5hd2_free_dma_desc_rings(struct hix5hd2_priv *priv)
+{
+	struct hix5hd2_desc *desc;
+	dma_addr_t addr;
+	int i;
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		struct sk_buff *skb = priv->rx_skb[i];
+		if (skb == NULL)
+			continue;
+
+		desc = priv->rx_fq.desc + i;
+		addr = le32_to_cpu(desc->buff_addr);
+		dma_unmap_single(priv->dev, addr,
+				 MAC_MAX_FRAME_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->rx_skb[i] = NULL;
+	}
+
+	for (i = 0; i < TX_DESC_NUM; i++) {
+		struct sk_buff *skb = priv->tx_skb[i];
+		if (skb == NULL)
+			continue;
+
+		desc = priv->tx_rq.desc + i;
+		addr = le32_to_cpu(desc->buff_addr);
+		dma_unmap_single(priv->dev, addr, skb->len, DMA_TO_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->tx_skb[i] = NULL;
+	}
+}
+
+static int hix5hd2_net_open(struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret < 0) {
+		netdev_err(dev, "failed to enable clk %d\n", ret);
+		return ret;
+	}
+
+	priv->phy = of_phy_connect(dev, priv->phy_node,
+				   &hix5hd2_adjust_link, 0, priv->phy_mode);
+	if (!priv->phy)
+		return -ENODEV;
+
+	phy_start(priv->phy);
+	hix5hd2_hw_init(priv);
+	hix5hd2_rx_refill(priv);
+
+	netdev_reset_queue(dev);
+	netif_start_queue(dev);
+	napi_enable(&priv->napi);
+
+	hix5hd2_port_enable(priv);
+	hix5hd2_irq_enable(priv);
+
+	return 0;
+}
+
+static int hix5hd2_net_close(struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+
+	hix5hd2_port_disable(priv);
+	hix5hd2_irq_disable(priv);
+	napi_disable(&priv->napi);
+	netif_stop_queue(dev);
+	hix5hd2_free_dma_desc_rings(priv);
+
+	if (priv->phy) {
+		phy_stop(priv->phy);
+		phy_disconnect(priv->phy);
+	}
+
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static void hix5hd2_tx_timeout_task(struct work_struct *work)
+{
+	struct hix5hd2_priv *priv;
+
+	priv = container_of(work, struct hix5hd2_priv, tx_timeout_task);
+	hix5hd2_net_close(priv->netdev);
+	hix5hd2_net_open(priv->netdev);
+}
+
+static void hix5hd2_net_timeout(struct net_device *dev)
+{
+	struct hix5hd2_priv *priv = netdev_priv(dev);
+
+	schedule_work(&priv->tx_timeout_task);
+}
+
+static const struct net_device_ops hix5hd2_netdev_ops = {
+	.ndo_open		= hix5hd2_net_open,
+	.ndo_stop		= hix5hd2_net_close,
+	.ndo_start_xmit		= hix5hd2_net_xmit,
+	.ndo_tx_timeout		= hix5hd2_net_timeout,
+	.ndo_set_mac_address	= hix5hd2_net_set_mac_address,
+};
+
+static int hix5hd2_get_settings(struct net_device *net_dev,
+				struct ethtool_cmd *cmd)
+{
+	struct hix5hd2_priv *priv = netdev_priv(net_dev);
+
+	if (!priv->phy)
+		return -ENODEV;
+
+	return phy_ethtool_gset(priv->phy, cmd);
+}
+
+static int hix5hd2_set_settings(struct net_device *net_dev,
+				struct ethtool_cmd *cmd)
+{
+	struct hix5hd2_priv *priv = netdev_priv(net_dev);
+
+	if (!priv->phy)
+		return -ENODEV;
+
+	return phy_ethtool_sset(priv->phy, cmd);
+}
+
+static struct ethtool_ops hix5hd2_ethtools_ops = {
+	.get_link		= ethtool_op_get_link,
+	.get_settings		= hix5hd2_get_settings,
+	.set_settings		= hix5hd2_set_settings,
+};
+
+static int hix5hd2_mdio_wait_ready(struct mii_bus *bus)
+{
+	struct hix5hd2_priv *priv = bus->priv;
+	void __iomem *base = priv->base;
+	int i, timeout = 10000;
+
+	for (i = 0; readl_relaxed(base + MDIO_SINGLE_CMD) & MDIO_START; i++) {
+		if (i == timeout)
+			return -ETIMEDOUT;
+		usleep_range(10, 20);
+	}
+
+	return 0;
+}
+
+static int hix5hd2_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct hix5hd2_priv *priv = bus->priv;
+	void __iomem *base = priv->base;
+	int val, ret;
+
+	ret = hix5hd2_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	writel_relaxed(MDIO_READ | phy << 8 | reg, base + MDIO_SINGLE_CMD);
+	ret = hix5hd2_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	val = readl_relaxed(base + MDIO_RDATA_STATUS);
+	if (val & MDIO_R_VALID) {
+		dev_err(bus->parent, "SMI bus read not valid\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	val = readl_relaxed(priv->base + MDIO_SINGLE_DATA);
+	ret = (val >> 16) & 0xFFFF;
+out:
+	return ret;
+}
+
+static int hix5hd2_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
+{
+	struct hix5hd2_priv *priv = bus->priv;
+	void __iomem *base = priv->base;
+	int ret;
+
+	ret = hix5hd2_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	writel_relaxed(val, base + MDIO_SINGLE_DATA);
+	writel_relaxed(MDIO_WRITE | phy << 8 | reg, base + MDIO_SINGLE_CMD);
+	ret = hix5hd2_mdio_wait_ready(bus);
+out:
+	return ret;
+}
+
+static void hix5hd2_destroy_hw_desc_queue(struct hix5hd2_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < QUEUE_NUMS; i++) {
+		if (priv->pool[i].desc) {
+			dma_free_coherent(priv->dev, priv->pool[i].size,
+					  priv->pool[i].desc,
+					  priv->pool[i].phys_addr);
+			priv->pool[i].desc = NULL;
+		}
+	}
+}
+
+static int hix5hd2_init_hw_desc_queue(struct hix5hd2_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct hix5hd2_desc *virt_addr;
+	dma_addr_t phys_addr;
+	int size, i;
+
+	priv->rx_fq.count = RX_DESC_NUM;
+	priv->rx_bq.count = RX_DESC_NUM;
+	priv->tx_bq.count = TX_DESC_NUM;
+	priv->tx_rq.count = TX_DESC_NUM;
+
+	for (i = 0; i < QUEUE_NUMS; i++) {
+		size = priv->pool[i].count * sizeof(struct hix5hd2_desc);
+		virt_addr = dma_alloc_coherent(dev, size, &phys_addr,
+					       GFP_KERNEL);
+		if (virt_addr == NULL)
+			goto error_free_pool;
+
+		memset(virt_addr, 0, size);
+		priv->pool[i].size = size;
+		priv->pool[i].desc = virt_addr;
+		priv->pool[i].phys_addr = phys_addr;
+	}
+	return 0;
+
+error_free_pool:
+	hix5hd2_destroy_hw_desc_queue(priv);
+
+	return -ENOMEM;
+}
+
+static int hix5hd2_dev_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct net_device *ndev;
+	struct hix5hd2_priv *priv;
+	struct resource *res;
+	struct mii_bus *bus;
+	const char *mac_addr;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(struct hix5hd2_priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ndev);
+
+	priv = netdev_priv(ndev);
+	priv->dev = dev;
+	priv->netdev = ndev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto out_free_netdev;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->ctrl_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->ctrl_base)) {
+		ret = PTR_ERR(priv->ctrl_base);
+		goto out_free_netdev;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		netdev_err(ndev, "failed to get clk\n");
+		ret = -ENODEV;
+		goto out_free_netdev;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret < 0) {
+		netdev_err(ndev, "failed to enable clk %d\n", ret);
+		goto out_free_netdev;
+	}
+
+	bus = mdiobus_alloc();
+	if (bus == NULL) {
+		ret = -ENOMEM;
+		goto out_free_netdev;
+	}
+
+	bus->priv = priv;
+	bus->name = "hix5hd2_mii_bus";
+	bus->read = hix5hd2_mdio_read;
+	bus->write = hix5hd2_mdio_write;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev));
+	priv->bus = bus;
+
+	ret = of_mdiobus_register(bus, node);
+	if (ret)
+		goto err_free_mdio;
+
+	priv->phy_mode = of_get_phy_mode(node);
+	if (priv->phy_mode < 0) {
+		netdev_err(ndev, "not find phy-mode\n");
+		ret = -EINVAL;
+		goto err_mdiobus;
+	}
+
+	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		netdev_err(ndev, "not find phy-handle\n");
+		ret = -EINVAL;
+		goto err_mdiobus;
+	}
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	if (ndev->irq <= 0) {
+		netdev_err(ndev, "No irq resource\n");
+		ret = -EINVAL;
+		goto out_phy_node;
+	}
+
+	ret = devm_request_irq(dev, ndev->irq, hix5hd2_interrupt,
+			       0, pdev->name, ndev);
+	if (ret) {
+		netdev_err(ndev, "devm_request_irq failed\n");
+		goto out_phy_node;
+	}
+
+	mac_addr = of_get_mac_address(node);
+	if (mac_addr)
+		ether_addr_copy(ndev->dev_addr, mac_addr);
+	if (!is_valid_ether_addr(ndev->dev_addr)) {
+		eth_hw_addr_random(ndev);
+		netdev_warn(ndev, "using random MAC address %pM\n",
+			    ndev->dev_addr);
+	}
+
+	INIT_WORK(&priv->tx_timeout_task, hix5hd2_tx_timeout_task);
+	ndev->watchdog_timeo = 6 * HZ;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+	ndev->netdev_ops = &hix5hd2_netdev_ops;
+	ndev->ethtool_ops = &hix5hd2_ethtools_ops;
+	SET_NETDEV_DEV(ndev, dev);
+
+	ret = hix5hd2_init_hw_desc_queue(priv);
+	if (ret)
+		goto out_phy_node;
+
+	netif_napi_add(ndev, &priv->napi, hix5hd2_poll, NAPI_POLL_WEIGHT);
+	ret = register_netdev(priv->netdev);
+	if (ret) {
+		netdev_err(ndev, "register_netdev failed!");
+		goto out_destroy_queue;
+	}
+
+	clk_disable_unprepare(priv->clk);
+
+	return ret;
+
+out_destroy_queue:
+	netif_napi_del(&priv->napi);
+	hix5hd2_destroy_hw_desc_queue(priv);
+out_phy_node:
+	of_node_put(priv->phy_node);
+err_mdiobus:
+	mdiobus_unregister(bus);
+err_free_mdio:
+	mdiobus_free(bus);
+out_free_netdev:
+	free_netdev(ndev);
+
+	return ret;
+}
+
+static int hix5hd2_dev_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hix5hd2_priv *priv = netdev_priv(ndev);
+
+	netif_napi_del(&priv->napi);
+	unregister_netdev(ndev);
+	mdiobus_unregister(priv->bus);
+	mdiobus_free(priv->bus);
+
+	hix5hd2_destroy_hw_desc_queue(priv);
+	of_node_put(priv->phy_node);
+	cancel_work_sync(&priv->tx_timeout_task);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static const struct of_device_id hix5hd2_of_match[] = {
+	{.compatible = "hisilicon,hix5hd2-gmac",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, hix5hd2_of_match);
+
+static struct platform_driver hix5hd2_dev_driver = {
+	.driver = {
+		.name = "hix5hd2-gmac",
+		.of_match_table = hix5hd2_of_match,
+	},
+	.probe = hix5hd2_dev_probe,
+	.remove = hix5hd2_dev_remove,
+};
+
+module_platform_driver(hix5hd2_dev_driver);
+
+MODULE_DESCRIPTION("HISILICON HIX5HD2 Ethernet driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hix5hd2-gmac");

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
index 95837b9..85a3866 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c

@@ -63,8 +63,8 @@
 		cmd->duplex = port->full_duplex == 1 ?
 						     DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		speed = ~0;
-		cmd->duplex = -1;
+		speed = SPEED_UNKNOWN;
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 	ethtool_cmd_speed_set(cmd, speed);
 
@@ -278,5 +278,5 @@
 
 void ehea_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops);
+	netdev->ethtool_ops = &ehea_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 538903b..a0b418e 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c

@@ -28,6 +28,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/device.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
@@ -3273,7 +3274,7 @@
 		return -EINVAL;
 	}
 
-	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	adapter = devm_kzalloc(&dev->dev, sizeof(*adapter), GFP_KERNEL);
 	if (!adapter) {
 		ret = -ENOMEM;
 		dev_err(&dev->dev, "no mem for ehea_adapter\n");
@@ -3359,7 +3360,6 @@
 
 out_free_ad:
 	list_del(&adapter->list);
-	kfree(adapter);
 
 out:
 	ehea_update_firmware_handles();
@@ -3386,7 +3386,6 @@
 	ehea_destroy_eq(adapter->neq);
 	ehea_remove_adapter_mr(adapter);
 	list_del(&adapter->list);
-	kfree(adapter);
 
 	ehea_update_firmware_handles();
 

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
index 9b03033..a0820f7 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c

@@ -103,12 +103,14 @@
 
 static void hw_queue_dtor(struct hw_queue *queue)
 {
-	int pages_per_kpage = PAGE_SIZE / queue->pagesize;
+	int pages_per_kpage;
 	int i, nr_pages;
 
 	if (!queue || !queue->queue_pages)
 		return;
 
+	pages_per_kpage = PAGE_SIZE / queue->pagesize;
+
 	nr_pages = queue->queue_length / queue->pagesize;
 
 	for (i = 0; i < nr_pages; i += pages_per_kpage)

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index ae342fd..87bd953 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c

@@ -2879,7 +2879,7 @@
 		dev->commac.ops = &emac_commac_sg_ops;
 	} else
 		ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops);
+	ndev->ethtool_ops = &emac_ethtool_ops;
 
 	netif_carrier_off(ndev);
 

diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
index 25045ae..5727779 100644
--- a/drivers/net/ethernet/icplus/ipg.c
+++ b/drivers/net/ethernet/icplus/ipg.c

@@ -2245,7 +2245,7 @@
 	 */
 	dev->netdev_ops = &ipg_netdev_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
-	SET_ETHTOOL_OPS(dev, &ipg_ethtool_ops);
+	dev->ethtool_ops = &ipg_ethtool_ops;
 
 	rc = pci_request_regions(pdev, DRV_NAME);
 	if (rc)

diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index b56461c..9d979d7 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c

@@ -2854,7 +2854,7 @@
 	netdev->hw_features |= NETIF_F_RXALL;
 
 	netdev->netdev_ops = &e100_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
+	netdev->ethtool_ops = &e100_ethtool_ops;
 	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 

diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 73a8aee..d50f78a 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c

@@ -168,8 +168,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) ||
@@ -1460,7 +1460,8 @@
 			 * enough time to complete the receives, if it's
 			 * exceeded, break and error off
 			 */
-		} while (good_cnt < 64 && jiffies < (time + 20));
+		} while (good_cnt < 64 && time_after(time + 20, jiffies));
+
 		if (good_cnt != 64) {
 			ret_val = 13; /* ret_val is the same as mis-compare */
 			break;
@@ -1905,5 +1906,5 @@
 
 void e1000_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index c1d3fdb..e9b07cc 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c

@@ -4877,10 +4877,10 @@
 	 * since the test for a multicast frame will test positive on
 	 * a broadcast frame.
 	 */
-	if ((mac_addr[0] == (u8) 0xff) && (mac_addr[1] == (u8) 0xff))
+	if (is_broadcast_ether_addr(mac_addr))
 		/* Broadcast packet */
 		stats->bprc++;
-	else if (*mac_addr & 0x01)
+	else if (is_multicast_ether_addr(mac_addr))
 		/* Multicast packet */
 		stats->mprc++;
 

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 27058df..660971f 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c

@@ -3105,11 +3105,6 @@
 	 */
 	tx_ring = adapter->tx_ring;
 
-	if (unlikely(skb->len <= 0)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
 	/* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
 	 * packets may get corrupted during padding by HW.
 	 * To WA this issue, pad all small packets manually.

diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index a5f6b11..08f22f3 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c

@@ -1365,6 +1365,7 @@
 	.setup_led		= e1000e_setup_led_generic,
 	.config_collision_dist	= e1000e_config_collision_dist_generic,
 	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
 };
 
 static const struct e1000_phy_operations es2_phy_ops = {

diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index e0aa7f1..218481e 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c

@@ -1896,6 +1896,7 @@
 	.config_collision_dist	= e1000e_config_collision_dist_generic,
 	.read_mac_addr		= e1000_read_mac_addr_82571,
 	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
 };
 
 static const struct e1000_phy_operations e82_phy_ops_igp = {

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 1471c54..7785240 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h

@@ -265,10 +265,10 @@
 	u32 tx_hwtstamp_timeouts;
 
 	/* Rx */
-	bool (*clean_rx) (struct e1000_ring *ring, int *work_done,
-			  int work_to_do) ____cacheline_aligned_in_smp;
-	void (*alloc_rx_buf) (struct e1000_ring *ring, int cleaned_count,
-			      gfp_t gfp);
+	bool (*clean_rx)(struct e1000_ring *ring, int *work_done,
+			 int work_to_do) ____cacheline_aligned_in_smp;
+	void (*alloc_rx_buf)(struct e1000_ring *ring, int cleaned_count,
+			     gfp_t gfp);
 	struct e1000_ring *rx_ring;
 
 	u32 rx_int_delay;
@@ -391,6 +391,8 @@
  * 25MHz	46-bit	2^46 / 10^9 / 3600 = 19.55 hours
  */
 #define E1000_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 60 * 4)
+#define E1000_MAX_82574_SYSTIM_REREADS	50
+#define E1000_82574_SYSTIM_EPSILON	(1ULL << 35ULL)
 
 /* hardware capability, feature, and workaround flags */
 #define FLAG_HAS_AMT                      (1 << 0)
@@ -573,35 +575,8 @@
 
 #define er32(reg)	__er32(hw, E1000_##reg)
 
-/**
- * __ew32_prepare - prepare to write to MAC CSR register on certain parts
- * @hw: pointer to the HW structure
- *
- * When updating the MAC CSR registers, the Manageability Engine (ME) could
- * be accessing the registers at the same time.  Normally, this is handled in
- * h/w by an arbiter but on some parts there is a bug that acknowledges Host
- * accesses later than it should which could result in the register to have
- * an incorrect value.  Workaround this by checking the FWSM register which
- * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set
- * and try again a number of times.
- **/
-static inline s32 __ew32_prepare(struct e1000_hw *hw)
-{
-	s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT;
-
-	while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i)
-		udelay(50);
-
-	return i;
-}
-
-static inline void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
-{
-	if (hw->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
-		__ew32_prepare(hw);
-
-	writel(val, hw->hw_addr + reg);
-}
+s32 __ew32_prepare(struct e1000_hw *hw);
+void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val);
 
 #define ew32(reg, val)	__ew32(hw, E1000_##reg, (val))
 

diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index cad250b..815e26c 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c

@@ -159,8 +159,8 @@
 		ecmd->transceiver = XCVR_EXTERNAL;
 	}
 
-	speed = -1;
-	ecmd->duplex = -1;
+	speed = SPEED_UNKNOWN;
+	ecmd->duplex = DUPLEX_UNKNOWN;
 
 	if (netif_running(netdev)) {
 		if (netif_carrier_ok(netdev)) {
@@ -169,6 +169,7 @@
 		}
 	} else if (!pm_runtime_suspended(netdev->dev.parent)) {
 		u32 status = er32(STATUS);
+
 		if (status & E1000_STATUS_LU) {
 			if (status & E1000_STATUS_SPEED_1000)
 				speed = SPEED_1000;
@@ -783,25 +784,26 @@
 			      reg + (offset << 2), val,
 			      (test[pat] & write & mask));
 			*data = reg;
-			return 1;
+			return true;
 		}
 	}
-	return 0;
+	return false;
 }
 
 static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data,
 			      int reg, u32 mask, u32 write)
 {
 	u32 val;
+
 	__ew32(&adapter->hw, reg, write & mask);
 	val = __er32(&adapter->hw, reg);
 	if ((write & mask) != (val & mask)) {
 		e_err("set/check test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n",
 		      reg, (val & mask), (write & mask));
 		*data = reg;
-		return 1;
+		return true;
 	}
-	return 0;
+	return false;
 }
 
 #define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write)                       \
@@ -1717,6 +1719,7 @@
 	*data = 0;
 	if (hw->phy.media_type == e1000_media_type_internal_serdes) {
 		int i = 0;
+
 		hw->mac.serdes_has_link = false;
 
 		/* On some blade server designs, link establishment
@@ -2315,5 +2318,5 @@
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 6b3de5f..72f5475 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h

@@ -469,8 +469,9 @@
 	s32  (*setup_led)(struct e1000_hw *);
 	void (*write_vfta)(struct e1000_hw *, u32, u32);
 	void (*config_collision_dist)(struct e1000_hw *);
-	void (*rar_set)(struct e1000_hw *, u8 *, u32);
+	int  (*rar_set)(struct e1000_hw *, u8 *, u32);
 	s32  (*read_mac_addr)(struct e1000_hw *);
+	u32  (*rar_get_count)(struct e1000_hw *);
 };
 
 /* When to use various PHY register access functions:

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index f0bbd42..8894ab8 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c

@@ -139,8 +139,9 @@
 static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw);
-static void e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index);
-static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index);
+static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index);
+static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index);
+static u32 e1000_rar_get_count_pch_lpt(struct e1000_hw *hw);
 static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
 static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate);
 static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force);
@@ -704,6 +705,7 @@
 		mac->ops.rar_set = e1000_rar_set_pch_lpt;
 		mac->ops.setup_physical_interface =
 		    e1000_setup_copper_link_pch_lpt;
+		mac->ops.rar_get_count = e1000_rar_get_count_pch_lpt;
 	}
 
 	/* Enable PCS Lock-loss workaround for ICH8 */
@@ -1334,6 +1336,7 @@
 	if (((hw->mac.type == e1000_pch2lan) ||
 	     (hw->mac.type == e1000_pch_lpt)) && link) {
 		u32 reg;
+
 		reg = er32(STATUS);
 		if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
 			u16 emi_addr;
@@ -1634,9 +1637,9 @@
 	u32 fwsm;
 
 	fwsm = er32(FWSM);
-	return ((fwsm & E1000_ICH_FWSM_FW_VALID) &&
+	return (fwsm & E1000_ICH_FWSM_FW_VALID) &&
 		((fwsm & E1000_FWSM_MODE_MASK) ==
-		 (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)));
+		 (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
 }
 
 /**
@@ -1667,7 +1670,7 @@
  *  contain the MAC address but RAR[1-6] are reserved for manageability (ME).
  *  Use SHRA[0-3] in place of those reserved for ME.
  **/
-static void e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index)
+static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index)
 {
 	u32 rar_low, rar_high;
 
@@ -1689,7 +1692,7 @@
 		e1e_flush();
 		ew32(RAH(index), rar_high);
 		e1e_flush();
-		return;
+		return 0;
 	}
 
 	/* RAR[1-6] are owned by manageability.  Skip those and program the
@@ -1712,7 +1715,7 @@
 		/* verify the register updates */
 		if ((er32(SHRAL(index - 1)) == rar_low) &&
 		    (er32(SHRAH(index - 1)) == rar_high))
-			return;
+			return 0;
 
 		e_dbg("SHRA[%d] might be locked by ME - FWSM=0x%8.8x\n",
 		      (index - 1), er32(FWSM));
@@ -1720,6 +1723,43 @@
 
 out:
 	e_dbg("Failed to write receive address at index %d\n", index);
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_rar_get_count_pch_lpt - Get the number of available SHRA
+ *  @hw: pointer to the HW structure
+ *
+ *  Get the number of available receive registers that the Host can
+ *  program. SHRA[0-10] are the shared receive address registers
+ *  that are shared between the Host and manageability engine (ME).
+ *  ME can reserve any number of addresses and the host needs to be
+ *  able to tell how many available registers it has access to.
+ **/
+static u32 e1000_rar_get_count_pch_lpt(struct e1000_hw *hw)
+{
+	u32 wlock_mac;
+	u32 num_entries;
+
+	wlock_mac = er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK;
+	wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT;
+
+	switch (wlock_mac) {
+	case 0:
+		/* All SHRA[0..10] and RAR[0] available */
+		num_entries = hw->mac.rar_entry_count;
+		break;
+	case 1:
+		/* Only RAR[0] available */
+		num_entries = 1;
+		break;
+	default:
+		/* SHRA[0..(wlock_mac - 1)] available + RAR[0] */
+		num_entries = wlock_mac + 1;
+		break;
+	}
+
+	return num_entries;
 }
 
 /**
@@ -1733,7 +1773,7 @@
  *  contain the MAC address. SHRA[0-10] are the shared receive address
  *  registers that are shared between the Host and manageability engine (ME).
  **/
-static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index)
+static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index)
 {
 	u32 rar_low, rar_high;
 	u32 wlock_mac;
@@ -1755,7 +1795,7 @@
 		e1e_flush();
 		ew32(RAH(index), rar_high);
 		e1e_flush();
-		return;
+		return 0;
 	}
 
 	/* The manageability engine (ME) can lock certain SHRAR registers that
@@ -1787,12 +1827,13 @@
 			/* verify the register updates */
 			if ((er32(SHRAL_PCH_LPT(index - 1)) == rar_low) &&
 			    (er32(SHRAH_PCH_LPT(index - 1)) == rar_high))
-				return;
+				return 0;
 		}
 	}
 
 out:
 	e_dbg("Failed to write receive address at index %d\n", index);
+	return -E1000_ERR_CONFIG;
 }
 
 /**
@@ -4976,6 +5017,7 @@
 	/* id_led_init dependent on mac type */
 	.config_collision_dist	= e1000e_config_collision_dist_generic,
 	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
 };
 
 static const struct e1000_phy_operations ich8_phy_ops = {

diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index baa0a46..8c386f3a 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c

@@ -211,6 +211,11 @@
 	return 0;
 }
 
+u32 e1000e_rar_get_count_generic(struct e1000_hw *hw)
+{
+	return hw->mac.rar_entry_count;
+}
+
 /**
  *  e1000e_rar_set_generic - Set receive address register
  *  @hw: pointer to the HW structure
@@ -220,7 +225,7 @@
  *  Sets the receive address array register at index to the address passed
  *  in by addr.
  **/
-void e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
+int e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
 {
 	u32 rar_low, rar_high;
 
@@ -244,6 +249,8 @@
 	e1e_flush();
 	ew32(RAH(index), rar_high);
 	e1e_flush();
+
+	return 0;
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h
index 4e81c28..0513d90 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.h
+++ b/drivers/net/ethernet/intel/e1000e/mac.h

@@ -61,7 +61,8 @@
 void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
 
 void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
-void e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
+u32 e1000e_rar_get_count_generic(struct e1000_hw *hw);
+int e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
 void e1000e_config_collision_dist_generic(struct e1000_hw *hw);
 
 #endif

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3e69386..201cc93 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c

@@ -124,6 +124,36 @@
 };
 
 /**
+ * __ew32_prepare - prepare to write to MAC CSR register on certain parts
+ * @hw: pointer to the HW structure
+ *
+ * When updating the MAC CSR registers, the Manageability Engine (ME) could
+ * be accessing the registers at the same time.  Normally, this is handled in
+ * h/w by an arbiter but on some parts there is a bug that acknowledges Host
+ * accesses later than it should which could result in the register to have
+ * an incorrect value.  Workaround this by checking the FWSM register which
+ * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set
+ * and try again a number of times.
+ **/
+s32 __ew32_prepare(struct e1000_hw *hw)
+{
+	s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT;
+
+	while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i)
+		udelay(50);
+
+	return i;
+}
+
+void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
+{
+	if (hw->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+		__ew32_prepare(hw);
+
+	writel(val, hw->hw_addr + reg);
+}
+
+/**
  * e1000_regdump - register printout routine
  * @hw: pointer to the HW structure
  * @reginfo: pointer to the register info table
@@ -599,6 +629,7 @@
 
 	if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) {
 		u32 rctl = er32(RCTL);
+
 		ew32(RCTL, rctl & ~E1000_RCTL_EN);
 		e_err("ME firmware caused invalid RDT - resetting\n");
 		schedule_work(&adapter->reset_task);
@@ -615,6 +646,7 @@
 
 	if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) {
 		u32 tctl = er32(TCTL);
+
 		ew32(TCTL, tctl & ~E1000_TCTL_EN);
 		e_err("ME firmware caused invalid TDT - resetting\n");
 		schedule_work(&adapter->reset_task);
@@ -1198,6 +1230,7 @@
 	while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
 	       (count < tx_ring->count)) {
 		bool cleaned = false;
+
 		rmb();		/* read buffer_info after eop_desc */
 		for (; !cleaned; count++) {
 			tx_desc = E1000_TX_DESC(*tx_ring, i);
@@ -1753,6 +1786,7 @@
 		    adapter->flags & FLAG_RX_NEEDS_RESTART) {
 			/* disable receives */
 			u32 rctl = er32(RCTL);
+
 			ew32(RCTL, rctl & ~E1000_RCTL_EN);
 			adapter->flags |= FLAG_RESTART_NOW;
 		}
@@ -1960,6 +1994,7 @@
 	/* Workaround issue with spurious interrupts on 82574 in MSI-X mode */
 	if (hw->mac.type == e1000_82574) {
 		u32 rfctl = er32(RFCTL);
+
 		rfctl |= E1000_RFCTL_ACK_DIS;
 		ew32(RFCTL, rfctl);
 	}
@@ -2204,6 +2239,7 @@
 
 	if (adapter->msix_entries) {
 		int i;
+
 		for (i = 0; i < adapter->num_vectors; i++)
 			synchronize_irq(adapter->msix_entries[i].vector);
 	} else {
@@ -2921,6 +2957,7 @@
 
 	if (adapter->flags2 & FLAG2_DMA_BURST) {
 		u32 txdctl = er32(TXDCTL(0));
+
 		txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH |
 			    E1000_TXDCTL_WTHRESH);
 		/* set up some performance related parameters to encourage the
@@ -3239,6 +3276,7 @@
 
 		if (adapter->flags & FLAG_IS_ICH) {
 			u32 rxdctl = er32(RXDCTL(0));
+
 			ew32(RXDCTL(0), rxdctl | 0x3);
 		}
 
@@ -3303,9 +3341,11 @@
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	unsigned int rar_entries = hw->mac.rar_entry_count;
+	unsigned int rar_entries;
 	int count = 0;
 
+	rar_entries = hw->mac.ops.rar_get_count(hw);
+
 	/* save a rar entry for our hardware address */
 	rar_entries--;
 
@@ -3324,9 +3364,13 @@
 		 * combining
 		 */
 		netdev_for_each_uc_addr(ha, netdev) {
+			int rval;
+
 			if (!rar_entries)
 				break;
-			hw->mac.ops.rar_set(hw, ha->addr, rar_entries--);
+			rval = hw->mac.ops.rar_set(hw, ha->addr, rar_entries--);
+			if (rval < 0)
+				return -ENOMEM;
 			count++;
 		}
 	}
@@ -4085,12 +4129,37 @@
 	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
 						     cc);
 	struct e1000_hw *hw = &adapter->hw;
-	cycle_t systim;
+	cycle_t systim, systim_next;
 
 	/* latch SYSTIMH on read of SYSTIML */
 	systim = (cycle_t)er32(SYSTIML);
 	systim |= (cycle_t)er32(SYSTIMH) << 32;
 
+	if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
+		u64 incvalue, time_delta, rem, temp;
+		int i;
+
+		/* errata for 82574/82583 possible bad bits read from SYSTIMH/L
+		 * check to see that the time is incrementing at a reasonable
+		 * rate and is a multiple of incvalue
+		 */
+		incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+		for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+			/* latch SYSTIMH on read of SYSTIML */
+			systim_next = (cycle_t)er32(SYSTIML);
+			systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+			time_delta = systim_next - systim;
+			temp = time_delta;
+			rem = do_div(temp, incvalue);
+
+			systim = systim_next;
+
+			if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
+			    (rem == 0))
+				break;
+		}
+	}
 	return systim;
 }
 
@@ -4491,7 +4560,7 @@
 	e1000_get_phy_info(hw);
 
 	/* Enable EEE on 82579 after link up */
-	if (hw->phy.type == e1000_phy_82579)
+	if (hw->phy.type >= e1000_phy_82579)
 		e1000_set_eee_pchlan(hw);
 }
 
@@ -4695,6 +4764,7 @@
 	/* Correctable ECC Errors */
 	if (hw->mac.type == e1000_pch_lpt) {
 		u32 pbeccsts = er32(PBECCSTS);
+
 		adapter->corr_errors +=
 		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
 		adapter->uncorr_errors +=
@@ -4808,6 +4878,7 @@
 	    (adapter->flags & FLAG_RESTART_NOW)) {
 		struct e1000_hw *hw = &adapter->hw;
 		u32 rctl = er32(RCTL);
+
 		ew32(RCTL, rctl | E1000_RCTL_EN);
 		adapter->flags &= ~FLAG_RESTART_NOW;
 	}
@@ -4930,6 +5001,7 @@
 			if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) &&
 			    !txb2b) {
 				u32 tarc0;
+
 				tarc0 = er32(TARC(0));
 				tarc0 &= ~SPEED_MODE_BIT;
 				ew32(TARC(0), tarc0);
@@ -5170,7 +5242,7 @@
 	__be16 protocol;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return 0;
+		return false;
 
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q))
 		protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -5215,7 +5287,7 @@
 		i = 0;
 	tx_ring->next_to_use = i;
 
-	return 1;
+	return true;
 }
 
 static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb,
@@ -6209,6 +6281,7 @@
 		e1e_wphy(&adapter->hw, BM_WUS, ~0);
 	} else {
 		u32 wus = er32(WUS);
+
 		if (wus) {
 			e_info("MAC Wakeup cause - %s\n",
 			       wus & E1000_WUS_EX ? "Unicast Packet" :
@@ -7027,7 +7100,7 @@
 	.resume = e1000_io_resume,
 };
 
-static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = {
+static const struct pci_device_id e1000_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 },
@@ -7144,6 +7217,7 @@
 static int __init e1000_init_module(void)
 {
 	int ret;
+
 	pr_info("Intel(R) PRO/1000 Network Driver - %s\n",
 		e1000e_driver_version);
 	pr_info("Copyright(c) 1999 - 2014 Intel Corporation.\n");

diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index a9a976f..b1f212b 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c

@@ -398,6 +398,7 @@
 		/* Loop to allow for up to whole page write of eeprom */
 		while (widx < words) {
 			u16 word_out = data[widx];
+
 			word_out = (word_out >> 8) | (word_out << 8);
 			e1000_shift_out_eec_bits(hw, word_out, 16);
 			widx++;

diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index d0ac0f3..aa1923f 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c

@@ -436,6 +436,7 @@
 
 		if (num_IntMode > bd) {
 			unsigned int int_mode = IntMode[bd];
+
 			e1000_validate_option(&int_mode, &opt, adapter);
 			adapter->int_mode = int_mode;
 		} else {
@@ -457,6 +458,7 @@
 
 		if (num_SmartPowerDownEnable > bd) {
 			unsigned int spd = SmartPowerDownEnable[bd];
+
 			e1000_validate_option(&spd, &opt, adapter);
 			if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) && spd)
 				adapter->flags |= FLAG_SMART_POWER_DOWN;
@@ -473,6 +475,7 @@
 
 		if (num_CrcStripping > bd) {
 			unsigned int crc_stripping = CrcStripping[bd];
+
 			e1000_validate_option(&crc_stripping, &opt, adapter);
 			if (crc_stripping == OPTION_ENABLED) {
 				adapter->flags2 |= FLAG2_CRC_STRIPPING;
@@ -495,6 +498,7 @@
 
 		if (num_KumeranLockLoss > bd) {
 			unsigned int kmrn_lock_loss = KumeranLockLoss[bd];
+
 			e1000_validate_option(&kmrn_lock_loss, &opt, adapter);
 			enabled = kmrn_lock_loss;
 		}

diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 00b3fc9..b2005e1 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c

@@ -2896,6 +2896,7 @@
 		    (hw->phy.addr == 2) &&
 		    !(MAX_PHY_REG_ADDRESS & reg) && (data & (1 << 11))) {
 			u16 data2 = 0x7EFF;
+
 			ret_val = e1000_access_phy_debug_regs_hv(hw,
 								 (1 << 6) | 0x3,
 								 &data2, false);

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index beb7b43..6598584 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h

@@ -72,6 +72,7 @@
 #define I40E_MIN_NUM_DESCRIPTORS      64
 #define I40E_MIN_MSIX                 2
 #define I40E_DEFAULT_NUM_VMDQ_VSI     8 /* max 256 VSIs */
+#define I40E_MIN_VSI_ALLOC            51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */
 #define I40E_DEFAULT_QUEUES_PER_VMDQ  2 /* max 16 qps */
 #define I40E_DEFAULT_QUEUES_PER_VF    4
 #define I40E_DEFAULT_QUEUES_PER_TC    1 /* should be a power of 2 */
@@ -97,10 +98,6 @@
 #define STRINGIFY(foo)  #foo
 #define XSTRINGIFY(bar) STRINGIFY(bar)
 
-#ifndef ARCH_HAS_PREFETCH
-#define prefetch(X)
-#endif
-
 #define I40E_RX_DESC(R, i)			\
 	((ring_is_16byte_desc_enabled(R))	\
 		? (union i40e_32byte_rx_desc *)	\
@@ -157,11 +154,23 @@
 #define I40E_FDIR_BUFFER_FULL_MARGIN	10
 #define I40E_FDIR_BUFFER_HEAD_ROOM	200
 
+enum i40e_fd_stat_idx {
+	I40E_FD_STAT_ATR,
+	I40E_FD_STAT_SB,
+	I40E_FD_STAT_PF_COUNT
+};
+#define I40E_FD_STAT_PF_IDX(pf_id) ((pf_id) * I40E_FD_STAT_PF_COUNT)
+#define I40E_FD_ATR_STAT_IDX(pf_id) \
+			(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_ATR)
+#define I40E_FD_SB_STAT_IDX(pf_id)  \
+			(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_SB)
+
 struct i40e_fdir_filter {
 	struct hlist_node fdir_node;
 	/* filter ipnut set */
 	u8 flow_type;
 	u8 ip4_proto;
+	/* TX packet view of src and dst */
 	__be32 dst_ip[4];
 	__be32 src_ip[4];
 	__be16 src_port;
@@ -205,7 +214,6 @@
 	unsigned long state;
 	unsigned long link_check_timeout;
 	struct msix_entry *msix_entries;
-	u16 num_msix_entries;
 	bool fc_autoneg_status;
 
 	u16 eeprom_version;
@@ -220,11 +228,14 @@
 	u16 rss_size;              /* num queues in the RSS array */
 	u16 rss_size_max;          /* HW defined max RSS queues */
 	u16 fdir_pf_filter_count;  /* num of guaranteed filters for this PF */
+	u16 num_alloc_vsi;         /* num VSIs this driver supports */
 	u8 atr_sample_rate;
 	bool wol_en;
 
 	struct hlist_head fdir_filter_list;
 	u16 fdir_pf_active_filters;
+	u16 fd_sb_cnt_idx;
+	u16 fd_atr_cnt_idx;
 
 #ifdef CONFIG_I40E_VXLAN
 	__be16  vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
@@ -266,6 +277,7 @@
 #ifdef CONFIG_I40E_VXLAN
 #define I40E_FLAG_VXLAN_FILTER_SYNC            (u64)(1 << 27)
 #endif
+#define I40E_FLAG_DCB_CAPABLE                  (u64)(1 << 29)
 
 	/* tracks features that get auto disabled by errors */
 	u64 auto_disable_flags;
@@ -300,7 +312,6 @@
 	u16 pf_seid;
 	u16 main_vsi_seid;
 	u16 mac_seid;
-	struct i40e_aqc_get_switch_config_data *sw_config;
 	struct kobject *switch_kobj;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *i40e_dbg_pf;
@@ -329,9 +340,7 @@
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_caps;
 	struct sk_buff *ptp_tx_skb;
-	struct work_struct ptp_tx_work;
 	struct hwtstamp_config tstamp_config;
-	unsigned long ptp_tx_start;
 	unsigned long last_rx_ptp_check;
 	spinlock_t tmreg_lock; /* Used to protect the device time registers. */
 	u64 ptp_base_adj;
@@ -420,6 +429,7 @@
 	struct i40e_q_vector **q_vectors;
 	int num_q_vectors;
 	int base_vector;
+	bool irqs_ready;
 
 	u16 seid;            /* HW index of this VSI (absolute index) */
 	u16 id;              /* VSI number */
@@ -540,6 +550,15 @@
 		(qw >> I40E_RX_PROG_STATUS_DESC_LENGTH_SHIFT);
 }
 
+/**
+ * i40e_get_fd_cnt_all - get the total FD filter space available
+ * @pf: pointer to the pf struct
+ **/
+static inline int i40e_get_fd_cnt_all(struct i40e_pf *pf)
+{
+	return pf->hw.fdir_shared_filter_count + pf->fdir_pf_filter_count;
+}
+
 /* needed by i40e_ethtool.c */
 int i40e_up(struct i40e_vsi *vsi);
 void i40e_down(struct i40e_vsi *vsi);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index ed3902b..7a02749 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c

@@ -33,6 +33,16 @@
 static void i40e_resume_aq(struct i40e_hw *hw);
 
 /**
+ * i40e_is_nvm_update_op - return true if this is an NVM update operation
+ * @desc: API request descriptor
+ **/
+static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc)
+{
+	return (desc->opcode == i40e_aqc_opc_nvm_erase) ||
+	       (desc->opcode == i40e_aqc_opc_nvm_update);
+}
+
+/**
  *  i40e_adminq_init_regs - Initialize AdminQ registers
  *  @hw: pointer to the hardware structure
  *
@@ -281,8 +291,11 @@
  *
  *  Configure base address and length registers for the transmit queue
  **/
-static void i40e_config_asq_regs(struct i40e_hw *hw)
+static i40e_status i40e_config_asq_regs(struct i40e_hw *hw)
 {
+	i40e_status ret_code = 0;
+	u32 reg = 0;
+
 	if (hw->mac.type == I40E_MAC_VF) {
 		/* configure the transmit queue */
 		wr32(hw, I40E_VF_ATQBAH1,
@@ -291,6 +304,7 @@
 		    lower_32_bits(hw->aq.asq.desc_buf.pa));
 		wr32(hw, I40E_VF_ATQLEN1, (hw->aq.num_asq_entries |
 					  I40E_VF_ATQLEN1_ATQENABLE_MASK));
+		reg = rd32(hw, I40E_VF_ATQBAL1);
 	} else {
 		/* configure the transmit queue */
 		wr32(hw, I40E_PF_ATQBAH,
@@ -299,7 +313,14 @@
 		    lower_32_bits(hw->aq.asq.desc_buf.pa));
 		wr32(hw, I40E_PF_ATQLEN, (hw->aq.num_asq_entries |
 					  I40E_PF_ATQLEN_ATQENABLE_MASK));
+		reg = rd32(hw, I40E_PF_ATQBAL);
 	}
+
+	/* Check one register to verify that config was applied */
+	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
+		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
 }
 
 /**
@@ -308,8 +329,11 @@
  *
  * Configure base address and length registers for the receive (event queue)
  **/
-static void i40e_config_arq_regs(struct i40e_hw *hw)
+static i40e_status i40e_config_arq_regs(struct i40e_hw *hw)
 {
+	i40e_status ret_code = 0;
+	u32 reg = 0;
+
 	if (hw->mac.type == I40E_MAC_VF) {
 		/* configure the receive queue */
 		wr32(hw, I40E_VF_ARQBAH1,
@@ -318,6 +342,7 @@
 		    lower_32_bits(hw->aq.arq.desc_buf.pa));
 		wr32(hw, I40E_VF_ARQLEN1, (hw->aq.num_arq_entries |
 					  I40E_VF_ARQLEN1_ARQENABLE_MASK));
+		reg = rd32(hw, I40E_VF_ARQBAL1);
 	} else {
 		/* configure the receive queue */
 		wr32(hw, I40E_PF_ARQBAH,
@@ -326,10 +351,17 @@
 		    lower_32_bits(hw->aq.arq.desc_buf.pa));
 		wr32(hw, I40E_PF_ARQLEN, (hw->aq.num_arq_entries |
 					  I40E_PF_ARQLEN_ARQENABLE_MASK));
+		reg = rd32(hw, I40E_PF_ARQBAL);
 	}
 
 	/* Update tail in the HW to post pre-allocated buffers */
 	wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+
+	/* Check one register to verify that config was applied */
+	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
+		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
 }
 
 /**
@@ -377,7 +409,9 @@
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	i40e_config_asq_regs(hw);
+	ret_code = i40e_config_asq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
 
 	/* success! */
 	goto init_adminq_exit;
@@ -434,7 +468,9 @@
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	i40e_config_arq_regs(hw);
+	ret_code = i40e_config_arq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
 
 	/* success! */
 	goto init_adminq_exit;
@@ -577,14 +613,14 @@
 	i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, &eetrack_hi);
 	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
 
-	if (hw->aq.api_maj_ver != I40E_FW_API_VERSION_MAJOR ||
-	    hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) {
+	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
 		ret_code = I40E_ERR_FIRMWARE_API_VERSION;
 		goto init_adminq_free_arq;
 	}
 
 	/* pre-emptive resource lock release */
 	i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+	hw->aq.nvm_busy = false;
 
 	ret_code = i40e_aq_set_hmc_resource_profile(hw,
 						    I40E_HMC_PROFILE_DEFAULT,
@@ -708,6 +744,12 @@
 		goto asq_send_command_exit;
 	}
 
+	if (i40e_is_nvm_update_op(desc) && hw->aq.nvm_busy) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: NVM busy.\n");
+		status = I40E_ERR_NVM;
+		goto asq_send_command_exit;
+	}
+
 	details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
 	if (cmd_details) {
 		*details = *cmd_details;
@@ -835,6 +877,9 @@
 		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
 	}
 
+	if (i40e_is_nvm_update_op(desc))
+		hw->aq.nvm_busy = true;
+
 	/* update the error if time out occurred */
 	if ((!cmd_completed) &&
 	    (!details->async && !details->postpone)) {
@@ -929,6 +974,9 @@
 			       e->msg_size);
 	}
 
+	if (i40e_is_nvm_update_op(&e->desc))
+		hw->aq.nvm_busy = false;
+
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message
 	 * size

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 993f768..b1552fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h

@@ -90,6 +90,7 @@
 	u16 fw_min_ver;                 /* firmware minor version */
 	u16 api_maj_ver;                /* api major version */
 	u16 api_min_ver;                /* api minor version */
+	bool nvm_busy;
 
 	struct mutex asq_mutex; /* Send queue lock */
 	struct mutex arq_mutex; /* Receive queue lock */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 7b6374a..15f289f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h

@@ -34,7 +34,7 @@
  */
 
 #define I40E_FW_API_VERSION_MAJOR  0x0001
-#define I40E_FW_API_VERSION_MINOR  0x0001
+#define I40E_FW_API_VERSION_MINOR  0x0002
 
 struct i40e_aq_desc {
 	__le16 flags;
@@ -123,6 +123,7 @@
 	i40e_aqc_opc_get_version      = 0x0001,
 	i40e_aqc_opc_driver_version   = 0x0002,
 	i40e_aqc_opc_queue_shutdown   = 0x0003,
+	i40e_aqc_opc_set_pf_context   = 0x0004,
 
 	/* resource ownership */
 	i40e_aqc_opc_request_resource = 0x0008,
@@ -182,9 +183,6 @@
 	i40e_aqc_opc_add_mirror_rule    = 0x0260,
 	i40e_aqc_opc_delete_mirror_rule = 0x0261,
 
-	i40e_aqc_opc_set_storm_control_config = 0x0280,
-	i40e_aqc_opc_get_storm_control_config = 0x0281,
-
 	/* DCB commands */
 	i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
 	i40e_aqc_opc_dcb_updated    = 0x0302,
@@ -207,6 +205,7 @@
 	i40e_aqc_opc_query_switching_comp_bw_config        = 0x041A,
 	i40e_aqc_opc_suspend_port_tx                       = 0x041B,
 	i40e_aqc_opc_resume_port_tx                        = 0x041C,
+	i40e_aqc_opc_configure_partition_bw                = 0x041D,
 
 	/* hmc */
 	i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
@@ -224,13 +223,15 @@
 	i40e_aqc_opc_get_partner_advt    = 0x0616,
 	i40e_aqc_opc_set_lb_modes        = 0x0618,
 	i40e_aqc_opc_get_phy_wol_caps    = 0x0621,
-	i40e_aqc_opc_set_phy_reset       = 0x0622,
+	i40e_aqc_opc_set_phy_debug	 = 0x0622,
 	i40e_aqc_opc_upload_ext_phy_fm   = 0x0625,
 
 	/* NVM commands */
-	i40e_aqc_opc_nvm_read   = 0x0701,
-	i40e_aqc_opc_nvm_erase  = 0x0702,
-	i40e_aqc_opc_nvm_update = 0x0703,
+	i40e_aqc_opc_nvm_read         = 0x0701,
+	i40e_aqc_opc_nvm_erase        = 0x0702,
+	i40e_aqc_opc_nvm_update       = 0x0703,
+	i40e_aqc_opc_nvm_config_read  = 0x0704,
+	i40e_aqc_opc_nvm_config_write = 0x0705,
 
 	/* virtualization commands */
 	i40e_aqc_opc_send_msg_to_pf   = 0x0801,
@@ -272,8 +273,6 @@
 	i40e_aqc_opc_debug_set_mode         = 0xFF01,
 	i40e_aqc_opc_debug_read_reg         = 0xFF03,
 	i40e_aqc_opc_debug_write_reg        = 0xFF04,
-	i40e_aqc_opc_debug_read_reg_sg      = 0xFF05,
-	i40e_aqc_opc_debug_write_reg_sg     = 0xFF06,
 	i40e_aqc_opc_debug_modify_reg       = 0xFF07,
 	i40e_aqc_opc_debug_dump_internals   = 0xFF08,
 	i40e_aqc_opc_debug_modify_internals = 0xFF09,
@@ -341,6 +340,14 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
 
+/* Set PF context (0x0004, direct) */
+struct i40e_aqc_set_pf_context {
+	u8	pf_id;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_pf_context);
+
 /* Request resource ownership (direct 0x0008)
  * Release resource ownership (direct 0x0009)
  */
@@ -1289,27 +1296,6 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
 
-/* Set Storm Control Configuration (direct 0x0280)
- * Get Storm Control Configuration (direct 0x0281)
- *    the command and response use the same descriptor structure
- */
-struct i40e_aqc_set_get_storm_control_config {
-	__le32 broadcast_threshold;
-	__le32 multicast_threshold;
-	__le32 control_flags;
-#define I40E_AQC_STORM_CONTROL_MDIPW            0x01
-#define I40E_AQC_STORM_CONTROL_MDICW            0x02
-#define I40E_AQC_STORM_CONTROL_BDIPW            0x04
-#define I40E_AQC_STORM_CONTROL_BDICW            0x08
-#define I40E_AQC_STORM_CONTROL_BIDU             0x10
-#define I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT   8
-#define I40E_AQC_STORM_CONTROL_INTERVAL_MASK    (0x3FF << \
-					I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT)
-	u8     reserved[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_get_storm_control_config);
-
 /* DCB 0x03xx*/
 
 /* PFC Ignore (direct 0x0301)
@@ -1427,11 +1413,12 @@
 struct i40e_aqc_configure_switching_comp_ets_data {
 	u8     reserved[4];
 	u8     tc_valid_bits;
-	u8     reserved1;
+	u8     seepage;
+#define I40E_AQ_ETS_SEEPAGE_EN_MASK     0x1
 	u8     tc_strict_priority_flags;
-	u8     reserved2[17];
+	u8     reserved1[17];
 	u8     tc_bw_share_credits[8];
-	u8     reserved3[96];
+	u8     reserved2[96];
 };
 
 /* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
@@ -1499,6 +1486,15 @@
  * (direct 0x041B and 0x041C) uses the generic SEID struct
  */
 
+/* Configure partition BW
+ * (indirect 0x041D)
+ */
+struct i40e_aqc_configure_partition_bw_data {
+	__le16 pf_valid_bits;
+	u8     min_bw[16];      /* guaranteed bandwidth */
+	u8     max_bw[16];      /* bandwidth limit */
+};
+
 /* Get and set the active HMC resource profile and status.
  * (direct 0x0500) and (direct 0x0501)
  */
@@ -1539,6 +1535,8 @@
 	I40E_PHY_TYPE_XLPPI			= 0x9,
 	I40E_PHY_TYPE_40GBASE_CR4_CU		= 0xA,
 	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
+	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
+	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
 	I40E_PHY_TYPE_100BASE_TX		= 0x11,
 	I40E_PHY_TYPE_1000BASE_T		= 0x12,
 	I40E_PHY_TYPE_10GBASE_T			= 0x13,
@@ -1549,7 +1547,10 @@
 	I40E_PHY_TYPE_40GBASE_CR4		= 0x18,
 	I40E_PHY_TYPE_40GBASE_SR4		= 0x19,
 	I40E_PHY_TYPE_40GBASE_LR4		= 0x1A,
-	I40E_PHY_TYPE_20GBASE_KR2		= 0x1B,
+	I40E_PHY_TYPE_1000BASE_SX		= 0x1B,
+	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
+	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
+	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
 	I40E_PHY_TYPE_MAX
 };
 
@@ -1583,11 +1584,8 @@
 #define I40E_AQ_PHY_FLAG_PAUSE_TX         0x01
 #define I40E_AQ_PHY_FLAG_PAUSE_RX         0x02
 #define I40E_AQ_PHY_FLAG_LOW_POWER        0x04
-#define I40E_AQ_PHY_FLAG_AN_SHIFT         3
-#define I40E_AQ_PHY_FLAG_AN_MASK          (0x3 << I40E_AQ_PHY_FLAG_AN_SHIFT)
-#define I40E_AQ_PHY_FLAG_AN_OFF           0x00 /* link forced on */
-#define I40E_AQ_PHY_FLAG_AN_OFF_LINK_DOWN 0x01
-#define I40E_AQ_PHY_FLAG_AN_ON            0x02
+#define I40E_AQ_PHY_LINK_ENABLED		  0x08
+#define I40E_AQ_PHY_AN_ENABLED			  0x10
 #define I40E_AQ_PHY_FLAG_MODULE_QUAL      0x20
 	__le16 eee_capability;
 #define I40E_AQ_EEE_100BASE_TX       0x0002
@@ -1696,6 +1694,7 @@
 #define I40E_AQ_LINK_TX_ACTIVE       0x00
 #define I40E_AQ_LINK_TX_DRAINED      0x01
 #define I40E_AQ_LINK_TX_FLUSHED      0x03
+#define I40E_AQ_LINK_FORCED_40G      0x10
 	u8     loopback;         /* use defines from i40e_aqc_set_lb_mode */
 	__le16 max_frame_size;
 	u8     config;
@@ -1747,14 +1746,21 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
 
-/* Set PHY Reset command (0x0622) */
-struct i40e_aqc_set_phy_reset {
-	u8     reset_flags;
-#define I40E_AQ_PHY_RESET_REQUEST  0x02
+/* Set PHY Debug command (0x0622) */
+struct i40e_aqc_set_phy_debug {
+	u8     command_flags;
+#define I40E_AQ_PHY_DEBUG_RESET_INTERNAL	0x02
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT	2
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_MASK	(0x03 << \
+					I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT)
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_NONE	0x00
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_HARD	0x01
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SOFT	0x02
+#define I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW	0x10
 	u8     reserved[15];
 };
 
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_reset);
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_debug);
 
 enum i40e_aq_phy_reg_type {
 	I40E_AQC_PHY_REG_INTERNAL         = 0x1,
@@ -1779,6 +1785,47 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
 
+/* NVM Config Read (indirect 0x0704) */
+struct i40e_aqc_nvm_config_read {
+	__le16 cmd_flags;
+#define ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
+#define ANVM_READ_SINGLE_FEATURE		0
+#define ANVM_READ_MULTIPLE_FEATURES		1
+	__le16 element_count;
+	__le16 element_id;		/* Feature/field ID */
+	u8     reserved[2];
+	__le32 address_high;
+	__le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_read);
+
+/* NVM Config Write (indirect 0x0705) */
+struct i40e_aqc_nvm_config_write {
+	__le16 cmd_flags;
+	__le16 element_count;
+	u8     reserved[4];
+	__le32 address_high;
+	__le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
+
+struct i40e_aqc_nvm_config_data_feature {
+	__le16 feature_id;
+	__le16 instance_id;
+	__le16 feature_options;
+	__le16 feature_selection;
+};
+
+struct i40e_aqc_nvm_config_data_immediate_field {
+#define ANVM_FEATURE_OR_IMMEDIATE_MASK	0x2
+	__le16 field_id;
+	__le16 instance_id;
+	__le16 field_options;
+	__le16 field_value;
+};
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 922cdcc..6e65f19 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c

@@ -43,12 +43,10 @@
 	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
 		switch (hw->device_id) {
 		case I40E_DEV_ID_SFP_XL710:
-		case I40E_DEV_ID_SFP_X710:
 		case I40E_DEV_ID_QEMU:
 		case I40E_DEV_ID_KX_A:
 		case I40E_DEV_ID_KX_B:
 		case I40E_DEV_ID_KX_C:
-		case I40E_DEV_ID_KX_D:
 		case I40E_DEV_ID_QSFP_A:
 		case I40E_DEV_ID_QSFP_B:
 		case I40E_DEV_ID_QSFP_C:
@@ -133,7 +131,11 @@
  **/
 bool i40e_check_asq_alive(struct i40e_hw *hw)
 {
-	return !!(rd32(hw, hw->aq.asq.len) & I40E_PF_ATQLEN_ATQENABLE_MASK);
+	if (hw->aq.asq.len)
+		return !!(rd32(hw, hw->aq.asq.len) &
+			  I40E_PF_ATQLEN_ATQENABLE_MASK);
+	else
+		return false;
 }
 
 /**
@@ -653,6 +655,36 @@
 }
 
 /**
+ * i40e_pre_tx_queue_cfg - pre tx queue configure
+ * @hw: pointer to the HW structure
+ * @queue: target pf queue index
+ * @enable: state change request
+ *
+ * Handles hw requirement to indicate intention to enable
+ * or disable target queue.
+ **/
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
+{
+	u32 abs_queue_idx = hw->func_caps.base_queue + queue;
+	u32 reg_block = 0;
+	u32 reg_val;
+
+	if (abs_queue_idx >= 128)
+		reg_block = abs_queue_idx / 128;
+
+	reg_val = rd32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block));
+	reg_val &= ~I40E_GLLAN_TXPRE_QDIS_QINDX_MASK;
+	reg_val |= (abs_queue_idx << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT);
+
+	if (enable)
+		reg_val |= I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK;
+	else
+		reg_val |= I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK;
+
+	wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val);
+}
+
+/**
  * i40e_get_media_type - Gets media type
  * @hw: pointer to the hardware structure
  **/
@@ -699,7 +731,7 @@
 }
 
 #define I40E_PF_RESET_WAIT_COUNT_A0	200
-#define I40E_PF_RESET_WAIT_COUNT	10
+#define I40E_PF_RESET_WAIT_COUNT	100
 /**
  * i40e_pf_reset - Reset the PF
  * @hw: pointer to the hardware structure
@@ -789,6 +821,9 @@
 {
 	u32 reg;
 
+	if (i40e_check_asq_alive(hw))
+		i40e_aq_clear_pxe_mode(hw, NULL);
+
 	/* Clear single descriptor fetch/write-back mode */
 	reg = rd32(hw, I40E_GLLAN_RCTL_0);
 
@@ -907,6 +942,33 @@
 /* Admin command wrappers */
 
 /**
+ * i40e_aq_clear_pxe_mode
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Tell the firmware that the driver is taking over from PXE
+ **/
+i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	i40e_status status;
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_clear_pxe *cmd =
+		(struct i40e_aqc_clear_pxe *)&desc.params.raw;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_clear_pxe_mode);
+
+	cmd->rx_cnt = 0x2;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	wr32(hw, I40E_GLLAN_RCTL_0, 0x1);
+
+	return status;
+}
+
+/**
  * i40e_aq_set_link_restart_an
  * @hw: pointer to the hw struct
  * @cmd_details: pointer to command details structure or NULL
@@ -975,6 +1037,13 @@
 	hw_link_info->an_info = resp->an_info;
 	hw_link_info->ext_info = resp->ext_info;
 	hw_link_info->loopback = resp->loopback;
+	hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size);
+	hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK;
+
+	if (resp->config & I40E_AQ_CONFIG_CRC_ENA)
+		hw_link_info->crc_enable = true;
+	else
+		hw_link_info->crc_enable = false;
 
 	if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_ENABLE))
 		hw_link_info->lse_enable = true;
@@ -1021,8 +1090,6 @@
 	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
 
 	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
-	if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
 	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
 				    sizeof(vsi_ctx->info), cmd_details);
@@ -1163,8 +1230,6 @@
 	cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid);
 
 	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
 	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
 				    sizeof(vsi_ctx->info), NULL);
@@ -1203,8 +1268,6 @@
 	cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid);
 
 	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
-	if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
 	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
 				    sizeof(vsi_ctx->info), cmd_details);
@@ -1300,6 +1363,7 @@
 	struct i40e_aqc_driver_version *cmd =
 		(struct i40e_aqc_driver_version *)&desc.params.raw;
 	i40e_status status;
+	u16 len;
 
 	if (dv == NULL)
 		return I40E_ERR_PARAM;
@@ -1311,7 +1375,14 @@
 	cmd->driver_minor_ver = dv->minor_version;
 	cmd->driver_build_ver = dv->build_version;
 	cmd->driver_subbuild_ver = dv->subbuild_version;
-	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	len = 0;
+	while (len < sizeof(dv->driver_string) &&
+	       (dv->driver_string[len] < 0x80) &&
+	       dv->driver_string[len])
+		len++;
+	status = i40e_asq_send_command(hw, &desc, dv->driver_string,
+				       len, cmd_details);
 
 	return status;
 }
@@ -1900,6 +1971,12 @@
 		}
 	}
 
+	/* Software override ensuring FCoE is disabled if npar or mfp
+	 * mode because it is not supported in these modes.
+	 */
+	if (p->npar_enable || p->mfp_mode_1)
+		p->fcoe = false;
+
 	/* additional HW specific goodies that might
 	 * someday be HW version specific
 	 */
@@ -2094,8 +2171,8 @@
  * @cmd_details: pointer to command details structure or NULL
  **/
 i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
-				u16 udp_port, u8 header_len,
-				u8 protocol_index, u8 *filter_index,
+				u16 udp_port, u8 protocol_index,
+				u8 *filter_index,
 				struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
@@ -2253,6 +2330,35 @@
 }
 
 /**
+ * i40e_aq_config_vsi_bw_limit - Configure VSI BW Limit
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @credit: BW limit credits (0 = disabled)
+ * @max_credit: Max BW limit credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+				u16 seid, u16 credit, u8 max_credit,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_configure_vsi_bw_limit *cmd =
+		(struct i40e_aqc_configure_vsi_bw_limit *)&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_configure_vsi_bw_limit);
+
+	cmd->vsi_seid = cpu_to_le16(seid);
+	cmd->credit = cpu_to_le16(credit);
+	cmd->max_credit = max_credit;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
  * i40e_aq_config_vsi_tc_bw - Config VSI BW Allocation per TC
  * @hw: pointer to the hw struct
  * @seid: VSI seid
@@ -2405,7 +2511,7 @@
 {
 	u32 fcoe_cntx_size, fcoe_filt_size;
 	u32 pe_cntx_size, pe_filt_size;
-	u32 fcoe_fmax, pe_fmax;
+	u32 fcoe_fmax;
 	u32 val;
 
 	/* Validate FCoE settings passed */
@@ -2480,13 +2586,6 @@
 	if (fcoe_filt_size + fcoe_cntx_size >  fcoe_fmax)
 		return I40E_ERR_INVALID_SIZE;
 
-	/* PEHSIZE + PEDSIZE should not be greater than PMPEXFMAX */
-	val = rd32(hw, I40E_GLHMC_PEXFMAX);
-	pe_fmax = (val & I40E_GLHMC_PEXFMAX_PMPEXFMAX_MASK)
-		   >> I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT;
-	if (pe_filt_size + pe_cntx_size >  pe_fmax)
-		return I40E_ERR_INVALID_SIZE;
-
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 6e8103a..00bc0cd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c

@@ -232,7 +232,7 @@
 			      struct i40e_ieee_app_priority_table *app)
 {
 	int v, err;
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] && pf->vsi[v]->netdev) {
 			err = i40e_dcbnl_vsi_del_app(pf->vsi[v], app);
 			if (err)
@@ -302,8 +302,8 @@
 	struct net_device *dev = vsi->netdev;
 	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
 
-	/* DCB not enabled */
-	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+	/* Not DCB capable */
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
 		return;
 
 	/* Do not setup DCB NL ops for MFP mode */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 3c37386..cffdfc2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c

@@ -45,7 +45,7 @@
 	if (seid < 0)
 		dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
 	else
-		for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+		for (i = 0; i < pf->num_alloc_vsi; i++)
 			if (pf->vsi[i] && (pf->vsi[i]->seid == seid))
 				return pf->vsi[i];
 
@@ -843,7 +843,7 @@
 {
 	int i;
 
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i])
 			dev_info(&pf->pdev->dev, "dump vsi[%d]: %d\n",
 				 i, pf->vsi[i]->seid);
@@ -862,12 +862,11 @@
 		 "    rx_bytes = \t%lld \trx_unicast = \t\t%lld \trx_multicast = \t%lld\n",
 		estats->rx_bytes, estats->rx_unicast, estats->rx_multicast);
 	dev_info(&pf->pdev->dev,
-		 "    rx_broadcast = \t%lld \trx_discards = \t\t%lld \trx_errors = \t%lld\n",
-		 estats->rx_broadcast, estats->rx_discards, estats->rx_errors);
+		 "    rx_broadcast = \t%lld \trx_discards = \t\t%lld\n",
+		 estats->rx_broadcast, estats->rx_discards);
 	dev_info(&pf->pdev->dev,
-		 "    rx_missed = \t%lld \trx_unknown_protocol = \t%lld \ttx_bytes = \t%lld\n",
-		 estats->rx_missed, estats->rx_unknown_protocol,
-		 estats->tx_bytes);
+		 "    rx_unknown_protocol = \t%lld \ttx_bytes = \t%lld\n",
+		 estats->rx_unknown_protocol, estats->tx_bytes);
 	dev_info(&pf->pdev->dev,
 		 "    tx_unicast = \t%lld \ttx_multicast = \t\t%lld \ttx_broadcast = \t%lld\n",
 		 estats->tx_unicast, estats->tx_multicast, estats->tx_broadcast);
@@ -1527,7 +1526,7 @@
 			cnt = sscanf(&cmd_buf[15], "%i", &vsi_seid);
 			if (cnt == 0) {
 				int i;
-				for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+				for (i = 0; i < pf->num_alloc_vsi; i++)
 					i40e_vsi_reset_stats(pf->vsi[i]);
 				dev_info(&pf->pdev->dev, "vsi clear stats called for all vsi's\n");
 			} else if (cnt == 1) {
@@ -1744,10 +1743,6 @@
 		i40e_dbg_cmd_fd_ctrl(pf, I40E_FLAG_FD_ATR_ENABLED, false);
 	} else if (strncmp(cmd_buf, "fd-atr on", 9) == 0) {
 		i40e_dbg_cmd_fd_ctrl(pf, I40E_FLAG_FD_ATR_ENABLED, true);
-	} else if (strncmp(cmd_buf, "fd-sb off", 9) == 0) {
-		i40e_dbg_cmd_fd_ctrl(pf, I40E_FLAG_FD_SB_ENABLED, false);
-	} else if (strncmp(cmd_buf, "fd-sb on", 8) == 0) {
-		i40e_dbg_cmd_fd_ctrl(pf, I40E_FLAG_FD_SB_ENABLED, true);
 	} else if (strncmp(cmd_buf, "lldp", 4) == 0) {
 		if (strncmp(&cmd_buf[5], "stop", 4) == 0) {
 			int ret;
@@ -1967,8 +1962,6 @@
 		dev_info(&pf->pdev->dev, "  rem fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
 		dev_info(&pf->pdev->dev, "  fd-atr off\n");
 		dev_info(&pf->pdev->dev, "  fd-atr on\n");
-		dev_info(&pf->pdev->dev, "  fd-sb off\n");
-		dev_info(&pf->pdev->dev, "  fd-sb on\n");
 		dev_info(&pf->pdev->dev, "  lldp start\n");
 		dev_info(&pf->pdev->dev, "  lldp stop\n");
 		dev_info(&pf->pdev->dev, "  lldp get local\n");

diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index b2380da..56438bd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c

@@ -67,17 +67,25 @@
 
 struct i40e_diag_reg_test_info i40e_reg_list[] = {
 	/* offset               mask         elements   stride */
-	{I40E_QTX_CTL(0),       0x0000FFBF,   4, I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
-	{I40E_PFINT_ITR0(0),    0x00000FFF,   3, I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)},
-	{I40E_PFINT_ITRN(0, 0), 0x00000FFF,   8, I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)},
-	{I40E_PFINT_ITRN(1, 0), 0x00000FFF,   8, I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)},
-	{I40E_PFINT_ITRN(2, 0), 0x00000FFF,   8, I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)},
-	{I40E_PFINT_STAT_CTL0,  0x0000000C,   1, 0},
-	{I40E_PFINT_LNKLST0,    0x00001FFF,   1, 0},
-	{I40E_PFINT_LNKLSTN(0), 0x000007FF,  64, I40E_PFINT_LNKLSTN(1) - I40E_PFINT_LNKLSTN(0)},
-	{I40E_QINT_TQCTL(0),    0x000000FF,  64, I40E_QINT_TQCTL(1) - I40E_QINT_TQCTL(0)},
-	{I40E_QINT_RQCTL(0),    0x000000FF,  64, I40E_QINT_RQCTL(1) - I40E_QINT_RQCTL(0)},
-	{I40E_PFINT_ICR0_ENA,   0xF7F20000,   1, 0},
+	{I40E_QTX_CTL(0),       0x0000FFBF, 1,
+		I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
+	{I40E_PFINT_ITR0(0),    0x00000FFF, 3,
+		I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)},
+	{I40E_PFINT_ITRN(0, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)},
+	{I40E_PFINT_ITRN(1, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)},
+	{I40E_PFINT_ITRN(2, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)},
+	{I40E_PFINT_STAT_CTL0,  0x0000000C, 1, 0},
+	{I40E_PFINT_LNKLST0,    0x00001FFF, 1, 0},
+	{I40E_PFINT_LNKLSTN(0), 0x000007FF, 1,
+		I40E_PFINT_LNKLSTN(1) - I40E_PFINT_LNKLSTN(0)},
+	{I40E_QINT_TQCTL(0),    0x000000FF, 1,
+		I40E_QINT_TQCTL(1) - I40E_QINT_TQCTL(0)},
+	{I40E_QINT_RQCTL(0),    0x000000FF, 1,
+		I40E_QINT_RQCTL(1) - I40E_QINT_RQCTL(0)},
+	{I40E_PFINT_ICR0_ENA,   0xF7F20000, 1, 0},
 	{ 0 }
 };
 
@@ -93,9 +101,25 @@
 	u32 reg, mask;
 	u32 i, j;
 
-	for (i = 0; (i40e_reg_list[i].offset != 0) && !ret_code; i++) {
+	for (i = 0; i40e_reg_list[i].offset != 0 &&
+					     !ret_code; i++) {
+
+		/* set actual reg range for dynamically allocated resources */
+		if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) &&
+		    hw->func_caps.num_tx_qp != 0)
+			i40e_reg_list[i].elements = hw->func_caps.num_tx_qp;
+		if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) ||
+		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) ||
+		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) ||
+		     i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) ||
+		     i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) &&
+		    hw->func_caps.num_msix_vectors != 0)
+			i40e_reg_list[i].elements =
+				hw->func_caps.num_msix_vectors - 1;
+
+		/* test register access */
 		mask = i40e_reg_list[i].mask;
-		for (j = 0; (j < i40e_reg_list[i].elements) && !ret_code; j++) {
+		for (j = 0; j < i40e_reg_list[i].elements && !ret_code; j++) {
 			reg = i40e_reg_list[i].offset +
 			      (j * i40e_reg_list[i].stride);
 			ret_code = i40e_diag_reg_pattern_test(hw, reg, mask);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 03d99cb..4a488ff 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c

@@ -46,6 +46,8 @@
 		I40E_STAT(struct i40e_pf, _name, _stat)
 #define I40E_VSI_STAT(_name, _stat) \
 		I40E_STAT(struct i40e_vsi, _name, _stat)
+#define I40E_VEB_STAT(_name, _stat) \
+		I40E_STAT(struct i40e_veb, _name, _stat)
 
 static const struct i40e_stats i40e_gstrings_net_stats[] = {
 	I40E_NETDEV_STAT(rx_packets),
@@ -56,12 +58,36 @@
 	I40E_NETDEV_STAT(tx_errors),
 	I40E_NETDEV_STAT(rx_dropped),
 	I40E_NETDEV_STAT(tx_dropped),
-	I40E_NETDEV_STAT(multicast),
 	I40E_NETDEV_STAT(collisions),
 	I40E_NETDEV_STAT(rx_length_errors),
 	I40E_NETDEV_STAT(rx_crc_errors),
 };
 
+static const struct i40e_stats i40e_gstrings_veb_stats[] = {
+	I40E_VEB_STAT("rx_bytes", stats.rx_bytes),
+	I40E_VEB_STAT("tx_bytes", stats.tx_bytes),
+	I40E_VEB_STAT("rx_unicast", stats.rx_unicast),
+	I40E_VEB_STAT("tx_unicast", stats.tx_unicast),
+	I40E_VEB_STAT("rx_multicast", stats.rx_multicast),
+	I40E_VEB_STAT("tx_multicast", stats.tx_multicast),
+	I40E_VEB_STAT("rx_broadcast", stats.rx_broadcast),
+	I40E_VEB_STAT("tx_broadcast", stats.tx_broadcast),
+	I40E_VEB_STAT("rx_discards", stats.rx_discards),
+	I40E_VEB_STAT("tx_discards", stats.tx_discards),
+	I40E_VEB_STAT("tx_errors", stats.tx_errors),
+	I40E_VEB_STAT("rx_unknown_protocol", stats.rx_unknown_protocol),
+};
+
+static const struct i40e_stats i40e_gstrings_misc_stats[] = {
+	I40E_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	I40E_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	I40E_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	I40E_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	I40E_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+};
+
 static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 				 struct ethtool_rxnfc *cmd);
 
@@ -78,7 +104,12 @@
 static struct i40e_stats i40e_gstrings_stats[] = {
 	I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes),
 	I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes),
-	I40E_PF_STAT("rx_errors", stats.eth.rx_errors),
+	I40E_PF_STAT("rx_unicast", stats.eth.rx_unicast),
+	I40E_PF_STAT("tx_unicast", stats.eth.tx_unicast),
+	I40E_PF_STAT("rx_multicast", stats.eth.rx_multicast),
+	I40E_PF_STAT("tx_multicast", stats.eth.tx_multicast),
+	I40E_PF_STAT("rx_broadcast", stats.eth.rx_broadcast),
+	I40E_PF_STAT("tx_broadcast", stats.eth.tx_broadcast),
 	I40E_PF_STAT("tx_errors", stats.eth.tx_errors),
 	I40E_PF_STAT("rx_dropped", stats.eth.rx_discards),
 	I40E_PF_STAT("tx_dropped", stats.eth.tx_discards),
@@ -88,6 +119,7 @@
 	I40E_PF_STAT("mac_local_faults", stats.mac_local_faults),
 	I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
 	I40E_PF_STAT("tx_timeout", tx_timeout_count),
+	I40E_PF_STAT("rx_csum_bad", hw_csum_rx_error),
 	I40E_PF_STAT("rx_length_errors", stats.rx_length_errors),
 	I40E_PF_STAT("link_xon_rx", stats.link_xon_rx),
 	I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
@@ -112,8 +144,10 @@
 	I40E_PF_STAT("rx_oversize", stats.rx_oversize),
 	I40E_PF_STAT("rx_jabber", stats.rx_jabber),
 	I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests),
-	I40E_PF_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
 	I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	I40E_PF_STAT("fdir_atr_match", stats.fd_atr_match),
+	I40E_PF_STAT("fdir_sb_match", stats.fd_sb_match),
+
 	/* LPI stats */
 	I40E_PF_STAT("tx_lpi_status", stats.tx_lpi_status),
 	I40E_PF_STAT("rx_lpi_status", stats.rx_lpi_status),
@@ -122,11 +156,14 @@
 };
 
 #define I40E_QUEUE_STATS_LEN(n) \
-  ((((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs + \
-    ((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs) * 2)
+	(((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs \
+	    * 2 /* Tx and Rx together */                                     \
+	    * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
 #define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
 #define I40E_NETDEV_STATS_LEN   ARRAY_SIZE(i40e_gstrings_net_stats)
+#define I40E_MISC_STATS_LEN	ARRAY_SIZE(i40e_gstrings_misc_stats)
 #define I40E_VSI_STATS_LEN(n)   (I40E_NETDEV_STATS_LEN + \
+				 I40E_MISC_STATS_LEN + \
 				 I40E_QUEUE_STATS_LEN((n)))
 #define I40E_PFC_STATS_LEN ( \
 		(FIELD_SIZEOF(struct i40e_pf, stats.priority_xoff_rx) + \
@@ -135,6 +172,7 @@
 		 FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_tx) + \
 		 FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_2_xoff)) \
 		 / sizeof(u64))
+#define I40E_VEB_STATS_LEN	ARRAY_SIZE(i40e_gstrings_veb_stats)
 #define I40E_PF_STATS_LEN(n)	(I40E_GLOBAL_STATS_LEN + \
 				 I40E_PFC_STATS_LEN + \
 				 I40E_VSI_STATS_LEN((n)))
@@ -620,10 +658,15 @@
 	case ETH_SS_TEST:
 		return I40E_TEST_LEN;
 	case ETH_SS_STATS:
-		if (vsi == pf->vsi[pf->lan_vsi])
-			return I40E_PF_STATS_LEN(netdev);
-		else
+		if (vsi == pf->vsi[pf->lan_vsi]) {
+			int len = I40E_PF_STATS_LEN(netdev);
+
+			if (pf->lan_veb != I40E_NO_VEB)
+				len += I40E_VEB_STATS_LEN;
+			return len;
+		} else {
 			return I40E_VSI_STATS_LEN(netdev);
+		}
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -633,6 +676,7 @@
 				   struct ethtool_stats *stats, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	int i = 0;
@@ -648,10 +692,14 @@
 		data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
+	for (j = 0; j < I40E_MISC_STATS_LEN; j++) {
+		p = (char *)vsi + i40e_gstrings_misc_stats[j].stat_offset;
+		data[i++] = (i40e_gstrings_misc_stats[j].sizeof_stat ==
+			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
 	rcu_read_lock();
-	for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) {
-		struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
-		struct i40e_ring *rx_ring;
+	for (j = 0; j < vsi->num_queue_pairs; j++) {
+		tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
 
 		if (!tx_ring)
 			continue;
@@ -662,33 +710,45 @@
 			data[i] = tx_ring->stats.packets;
 			data[i + 1] = tx_ring->stats.bytes;
 		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
+		i += 2;
 
 		/* Rx ring is the 2nd half of the queue pair */
 		rx_ring = &tx_ring[1];
 		do {
 			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			data[i + 2] = rx_ring->stats.packets;
-			data[i + 3] = rx_ring->stats.bytes;
+			data[i] = rx_ring->stats.packets;
+			data[i + 1] = rx_ring->stats.bytes;
 		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
+		i += 2;
 	}
 	rcu_read_unlock();
-	if (vsi == pf->vsi[pf->lan_vsi]) {
-		for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
-			p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
-			data[i++] = (i40e_gstrings_stats[j].sizeof_stat ==
-				   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	if (vsi != pf->vsi[pf->lan_vsi])
+		return;
+
+	if (pf->lan_veb != I40E_NO_VEB) {
+		struct i40e_veb *veb = pf->veb[pf->lan_veb];
+		for (j = 0; j < I40E_VEB_STATS_LEN; j++) {
+			p = (char *)veb;
+			p += i40e_gstrings_veb_stats[j].stat_offset;
+			data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
+				     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 		}
-		for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-			data[i++] = pf->stats.priority_xon_tx[j];
-			data[i++] = pf->stats.priority_xoff_tx[j];
-		}
-		for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-			data[i++] = pf->stats.priority_xon_rx[j];
-			data[i++] = pf->stats.priority_xoff_rx[j];
-		}
-		for (j = 0; j < I40E_MAX_USER_PRIORITY; j++)
-			data[i++] = pf->stats.priority_xon_2_xoff[j];
 	}
+	for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
+		p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
+		data[i++] = (i40e_gstrings_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_tx[j];
+		data[i++] = pf->stats.priority_xoff_tx[j];
+	}
+	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_rx[j];
+		data[i++] = pf->stats.priority_xoff_rx[j];
+	}
+	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++)
+		data[i++] = pf->stats.priority_xon_2_xoff[j];
 }
 
 static void i40e_get_strings(struct net_device *netdev, u32 stringset,
@@ -713,6 +773,11 @@
 				 i40e_gstrings_net_stats[i].stat_string);
 			p += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < I40E_MISC_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 i40e_gstrings_misc_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
 			p += ETH_GSTRING_LEN;
@@ -723,33 +788,41 @@
 			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
-		if (vsi == pf->vsi[pf->lan_vsi]) {
-			for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
-				snprintf(p, ETH_GSTRING_LEN, "port.%s",
-					 i40e_gstrings_stats[i].stat_string);
+		if (vsi != pf->vsi[pf->lan_vsi])
+			return;
+
+		if (pf->lan_veb != I40E_NO_VEB) {
+			for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
+				snprintf(p, ETH_GSTRING_LEN, "veb.%s",
+					i40e_gstrings_veb_stats[i].stat_string);
 				p += ETH_GSTRING_LEN;
 			}
-			for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-				snprintf(p, ETH_GSTRING_LEN,
-					 "port.tx_priority_%u_xon", i);
-				p += ETH_GSTRING_LEN;
-				snprintf(p, ETH_GSTRING_LEN,
-					 "port.tx_priority_%u_xoff", i);
-				p += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-				snprintf(p, ETH_GSTRING_LEN,
-					 "port.rx_priority_%u_xon", i);
-				p += ETH_GSTRING_LEN;
-				snprintf(p, ETH_GSTRING_LEN,
-					 "port.rx_priority_%u_xoff", i);
-				p += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-				snprintf(p, ETH_GSTRING_LEN,
-					 "port.rx_priority_%u_xon_2_xoff", i);
-				p += ETH_GSTRING_LEN;
-			}
+		}
+		for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "port.%s",
+				 i40e_gstrings_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "port.tx_priority_%u_xon", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "port.tx_priority_%u_xoff", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "port.rx_priority_%u_xon", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "port.rx_priority_%u_xoff", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "port.rx_priority_%u_xon_2_xoff", i);
+			p += ETH_GSTRING_LEN;
 		}
 		/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
@@ -1007,14 +1080,13 @@
 	ec->rx_max_coalesced_frames_irq = vsi->work_limit;
 
 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
-		ec->rx_coalesce_usecs = 1;
-	else
-		ec->rx_coalesce_usecs = vsi->rx_itr_setting;
+		ec->use_adaptive_rx_coalesce = 1;
 
 	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
-		ec->tx_coalesce_usecs = 1;
-	else
-		ec->tx_coalesce_usecs = vsi->tx_itr_setting;
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
 
 	return 0;
 }
@@ -1033,37 +1105,27 @@
 	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
 		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
-	switch (ec->rx_coalesce_usecs) {
-	case 0:
-		vsi->rx_itr_setting = 0;
-		break;
-	case 1:
-		vsi->rx_itr_setting = (I40E_ITR_DYNAMIC |
-				       ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
-		break;
-	default:
-		if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		    (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)))
-			return -EINVAL;
+	if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+	    (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 		vsi->rx_itr_setting = ec->rx_coalesce_usecs;
-		break;
-	}
+	else
+		return -EINVAL;
 
-	switch (ec->tx_coalesce_usecs) {
-	case 0:
-		vsi->tx_itr_setting = 0;
-		break;
-	case 1:
-		vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
-				       ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
-		break;
-	default:
-		if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		    (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)))
-			return -EINVAL;
+	if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+	    (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 		vsi->tx_itr_setting = ec->tx_coalesce_usecs;
-		break;
-	}
+	else
+		return -EINVAL;
+
+	if (ec->use_adaptive_rx_coalesce)
+		vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	if (ec->use_adaptive_tx_coalesce)
+		vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	vector = vsi->base_vector;
 	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
@@ -1140,8 +1202,7 @@
 	int cnt = 0;
 
 	/* report total rule count */
-	cmd->data = pf->hw.fdir_shared_filter_count +
-		    pf->fdir_pf_filter_count;
+	cmd->data = i40e_get_fd_cnt_all(pf);
 
 	hlist_for_each_entry_safe(rule, node2,
 				  &pf->fdir_filter_list, fdir_node) {
@@ -1175,10 +1236,6 @@
 	struct i40e_fdir_filter *rule = NULL;
 	struct hlist_node *node2;
 
-	/* report total rule count */
-	cmd->data = pf->hw.fdir_shared_filter_count +
-		    pf->fdir_pf_filter_count;
-
 	hlist_for_each_entry_safe(rule, node2,
 				  &pf->fdir_filter_list, fdir_node) {
 		if (fsp->location <= rule->fd_id)
@@ -1189,11 +1246,24 @@
 		return -EINVAL;
 
 	fsp->flow_type = rule->flow_type;
-	fsp->h_u.tcp_ip4_spec.psrc = rule->src_port;
-	fsp->h_u.tcp_ip4_spec.pdst = rule->dst_port;
-	fsp->h_u.tcp_ip4_spec.ip4src = rule->src_ip[0];
-	fsp->h_u.tcp_ip4_spec.ip4dst = rule->dst_ip[0];
-	fsp->ring_cookie = rule->q_index;
+	if (fsp->flow_type == IP_USER_FLOW) {
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+	}
+
+	/* Reverse the src and dest notion, since the HW views them from
+	 * Tx perspective where as the user expects it from Rx filter view.
+	 */
+	fsp->h_u.tcp_ip4_spec.psrc = rule->dst_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->src_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip[0];
+
+	if (rule->dest_ctl == I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
 
 	return 0;
 }
@@ -1223,6 +1293,8 @@
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
 		cmd->rule_cnt = pf->fdir_pf_active_filters;
+		/* report total rule count */
+		cmd->data = i40e_get_fd_cnt_all(pf);
 		ret = 0;
 		break;
 	case ETHTOOL_GRXCLSRULE:
@@ -1291,16 +1363,12 @@
 	case UDP_V4_FLOW:
 		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
 		case 0:
-			hena &=
-			~(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+			hena &= ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+				  ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
 			break;
 		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			hena |=
-			(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP)  |
-			((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+			hena |= (((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+				  ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
 			break;
 		default:
 			return -EINVAL;
@@ -1309,16 +1377,12 @@
 	case UDP_V6_FLOW:
 		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
 		case 0:
-			hena &=
-			~(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+			hena &= ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+				  ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
 			break;
 		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			hena |=
-			(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP)  |
-			((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) |
-			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+			hena |= (((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+				 ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
 			break;
 		default:
 			return -EINVAL;
@@ -1503,7 +1567,8 @@
 		return -EINVAL;
 	}
 
-	if (fsp->ring_cookie >= vsi->num_queue_pairs)
+	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+	    (fsp->ring_cookie >= vsi->num_queue_pairs))
 		return -EINVAL;
 
 	input = kzalloc(sizeof(*input), GFP_KERNEL);
@@ -1524,13 +1589,17 @@
 	input->pctype = 0;
 	input->dest_vsi = vsi->id;
 	input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
-	input->cnt_index = 0;
+	input->cnt_index  = pf->fd_sb_cnt_idx;
 	input->flow_type = fsp->flow_type;
 	input->ip4_proto = fsp->h_u.usr_ip4_spec.proto;
-	input->src_port = fsp->h_u.tcp_ip4_spec.psrc;
-	input->dst_port = fsp->h_u.tcp_ip4_spec.pdst;
-	input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
-	input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+
+	/* Reverse the src and dest notion, since the HW expects them to be from
+	 * Tx perspective where as the input from user is from Rx filter view.
+	 */
+	input->dst_port = fsp->h_u.tcp_ip4_spec.psrc;
+	input->src_port = fsp->h_u.tcp_ip4_spec.pdst;
+	input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
 
 	ret = i40e_add_del_fdir(vsi, input, true);
 	if (ret)
@@ -1692,5 +1761,5 @@
 
 void i40e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops);
+	netdev->ethtool_ops = &i40e_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index bf2d4cc..9b987cc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c

@@ -201,7 +201,7 @@
  **/
 i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
 					struct i40e_hmc_info *hmc_info,
-					u32 idx, bool is_pf)
+					u32 idx)
 {
 	i40e_status ret_code = 0;
 	struct i40e_hmc_pd_entry *pd_entry;
@@ -237,10 +237,7 @@
 	pd_addr = (u64 *)pd_table->pd_page_addr.va;
 	pd_addr += rel_pd_idx;
 	memset(pd_addr, 0, sizeof(u64));
-	if (is_pf)
-		I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
-	else
-		I40E_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx, hmc_info->hmc_fn_id);
+	I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
 
 	/* free memory here */
 	ret_code = i40e_free_dma_mem(hw, &(pd_entry->bp.addr));

diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
index 0cd4701..b45d8fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h

@@ -163,11 +163,6 @@
 	    (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		\
 	     ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
 
-#define I40E_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id)	   \
-	wr32((hw), I40E_GLHMC_VFPDINV((hmc_fn_id) - I40E_FIRST_VF_FPM_ID), \
-	     (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		   \
-	      ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
 /**
  * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
  * @hmc_info: pointer to the HMC configuration information structure
@@ -226,7 +221,7 @@
 					      u32 pd_index);
 i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
 					struct i40e_hmc_info *hmc_info,
-					u32 idx, bool is_pf);
+					u32 idx);
 i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
 					     u32 idx);
 i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,

diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index d5d98fe..870ab1e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c

@@ -397,7 +397,7 @@
 				/* remove the backing pages from pd_idx1 to i */
 				while (i && (i > pd_idx1)) {
 					i40e_remove_pd_bp(hw, info->hmc_info,
-							  (i - 1), true);
+							  (i - 1));
 					i--;
 				}
 			}
@@ -433,11 +433,7 @@
 				      ((j - 1) * I40E_HMC_MAX_BP_COUNT));
 			pd_lmt1 = min(pd_lmt, (j * I40E_HMC_MAX_BP_COUNT));
 			for (i = pd_idx1; i < pd_lmt1; i++) {
-				i40e_remove_pd_bp(
-					hw,
-					info->hmc_info,
-					i,
-					true);
+				i40e_remove_pd_bp(hw, info->hmc_info, i);
 			}
 			i40e_remove_pd_page(hw, info->hmc_info, (j - 1));
 			break;
@@ -616,8 +612,7 @@
 		pd_table =
 			&info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
 		if (pd_table->pd_entry[rel_pd_idx].valid) {
-			ret_code = i40e_remove_pd_bp(hw, info->hmc_info,
-						     j, true);
+			ret_code = i40e_remove_pd_bp(hw, info->hmc_info, j);
 			if (ret_code)
 				goto exit;
 		}
@@ -747,6 +742,7 @@
 	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphdata_ena),  1,	195 },
 	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphhead_ena),  1,	196 },
 	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, lrxqthresh),   3,	198 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, prefena),      1,	201 },
 	{ 0 }
 };
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
index 341de92..eb65fe2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h

@@ -56,6 +56,7 @@
 	u8  tphdata_ena;
 	u8  tphhead_ena;
 	u8  lrxqthresh;
+	u8  prefena;	/* NOTE: normally must be set to 1 at init */
 };
 
 /* Tx queue context data */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2e72449..275ca9a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -38,8 +38,8 @@
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 0
-#define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 36
+#define DRV_VERSION_MINOR 4
+#define DRV_VERSION_BUILD 10
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -67,12 +67,10 @@
  */
 static DEFINE_PCI_DEVICE_TABLE(i40e_pci_tbl) = {
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_XL710), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X710), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QEMU), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_A), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_B), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_C), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_D), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
@@ -356,6 +354,7 @@
 					     struct rtnl_link_stats64 *stats)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
 	int i;
@@ -368,7 +367,6 @@
 
 	rcu_read_lock();
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
-		struct i40e_ring *tx_ring, *rx_ring;
 		u64 bytes, packets;
 		unsigned int start;
 
@@ -397,7 +395,7 @@
 	}
 	rcu_read_unlock();
 
-	/* following stats updated by ixgbe_watchdog_task() */
+	/* following stats updated by i40e_watchdog_subtask() */
 	stats->multicast	= vsi_stats->multicast;
 	stats->tx_errors	= vsi_stats->tx_errors;
 	stats->tx_dropped	= vsi_stats->tx_dropped;
@@ -530,6 +528,12 @@
 	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx),
 			   vsi->stat_offsets_loaded,
 			   &oes->rx_discards, &es->rx_discards);
+	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
+	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_errors, &es->tx_errors);
 
 	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
 			   I40E_GLV_GORCL(stat_idx),
@@ -648,10 +652,10 @@
 		return;
 
 	/* Clear the __I40E_HANG_CHECK_ARMED bit for all Tx rings */
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		struct i40e_vsi *vsi = pf->vsi[v];
 
-		if (!vsi)
+		if (!vsi || !vsi->tx_rings[0])
 			continue;
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -702,10 +706,10 @@
 	}
 
 	/* Clear the __I40E_HANG_CHECK_ARMED bit for Tx rings */
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		struct i40e_vsi *vsi = pf->vsi[v];
 
-		if (!vsi)
+		if (!vsi || !vsi->tx_rings[0])
 			continue;
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -720,19 +724,18 @@
 }
 
 /**
- * i40e_update_stats - Update the board statistics counters.
+ * i40e_update_vsi_stats - Update the vsi statistics counters.
  * @vsi: the VSI to be updated
  *
  * There are a few instances where we store the same stat in a
  * couple of different structs.  This is partly because we have
  * the netdev stats that need to be filled out, which is slightly
  * different from the "eth_stats" defined by the chip and used in
- * VF communications.  We sort it all out here in a central place.
+ * VF communications.  We sort it out here.
  **/
-void i40e_update_stats(struct i40e_vsi *vsi)
+static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
 	struct rtnl_link_stats64 *ons;
 	struct rtnl_link_stats64 *ns;   /* netdev stats */
 	struct i40e_eth_stats *oes;
@@ -741,8 +744,6 @@
 	u32 rx_page, rx_buf;
 	u64 rx_p, rx_b;
 	u64 tx_p, tx_b;
-	u32 val;
-	int i;
 	u16 q;
 
 	if (test_bit(__I40E_DOWN, &vsi->state) ||
@@ -804,196 +805,256 @@
 	ns->tx_packets = tx_p;
 	ns->tx_bytes = tx_b;
 
-	i40e_update_eth_stats(vsi);
 	/* update netdev stats from eth stats */
-	ons->rx_errors = oes->rx_errors;
-	ns->rx_errors = es->rx_errors;
+	i40e_update_eth_stats(vsi);
 	ons->tx_errors = oes->tx_errors;
 	ns->tx_errors = es->tx_errors;
 	ons->multicast = oes->rx_multicast;
 	ns->multicast = es->rx_multicast;
+	ons->rx_dropped = oes->rx_discards;
+	ns->rx_dropped = es->rx_discards;
 	ons->tx_dropped = oes->tx_discards;
 	ns->tx_dropped = es->tx_discards;
 
-	/* Get the port data only if this is the main PF VSI */
+	/* pull in a couple PF stats if this is the main vsi */
 	if (vsi == pf->vsi[pf->lan_vsi]) {
-		struct i40e_hw_port_stats *nsd = &pf->stats;
-		struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+		ns->rx_crc_errors = pf->stats.crc_errors;
+		ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes;
+		ns->rx_length_errors = pf->stats.rx_length_errors;
+	}
+}
 
-		i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
-				   I40E_GLPRT_GORCL(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
-		i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
-				   I40E_GLPRT_GOTCL(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
-		i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->eth.rx_discards,
-				   &nsd->eth.rx_discards);
-		i40e_stat_update32(hw, I40E_GLPRT_TDPC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->eth.tx_discards,
-				   &nsd->eth.tx_discards);
-		i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
-				   I40E_GLPRT_MPRCL(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->eth.rx_multicast,
-				   &nsd->eth.rx_multicast);
+/**
+ * i40e_update_pf_stats - Update the pf statistics counters.
+ * @pf: the PF to be updated
+ **/
+static void i40e_update_pf_stats(struct i40e_pf *pf)
+{
+	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+	struct i40e_hw_port_stats *nsd = &pf->stats;
+	struct i40e_hw *hw = &pf->hw;
+	u32 val;
+	int i;
 
-		i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->tx_dropped_link_down,
-				   &nsd->tx_dropped_link_down);
+	i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
+			   I40E_GLPRT_GORCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
+	i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
+			   I40E_GLPRT_GOTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
+	i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_discards,
+			   &nsd->eth.rx_discards);
+	i40e_stat_update32(hw, I40E_GLPRT_TDPC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_discards,
+			   &nsd->eth.tx_discards);
 
-		i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->crc_errors, &nsd->crc_errors);
-		ns->rx_crc_errors = nsd->crc_errors;
+	i40e_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port),
+			   I40E_GLPRT_UPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_unicast,
+			   &nsd->eth.rx_unicast);
+	i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
+			   I40E_GLPRT_MPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_multicast,
+			   &nsd->eth.rx_multicast);
+	i40e_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port),
+			   I40E_GLPRT_BPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_broadcast,
+			   &nsd->eth.rx_broadcast);
+	i40e_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port),
+			   I40E_GLPRT_UPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_unicast,
+			   &nsd->eth.tx_unicast);
+	i40e_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port),
+			   I40E_GLPRT_MPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_multicast,
+			   &nsd->eth.tx_multicast);
+	i40e_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port),
+			   I40E_GLPRT_BPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_broadcast,
+			   &nsd->eth.tx_broadcast);
 
-		i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->illegal_bytes, &nsd->illegal_bytes);
-		ns->rx_errors = nsd->crc_errors
-				+ nsd->illegal_bytes;
+	i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_dropped_link_down,
+			   &nsd->tx_dropped_link_down);
 
-		i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->mac_local_faults,
-				   &nsd->mac_local_faults);
-		i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->mac_remote_faults,
-				   &nsd->mac_remote_faults);
+	i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->crc_errors, &nsd->crc_errors);
 
-		i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_length_errors,
-				   &nsd->rx_length_errors);
-		ns->rx_length_errors = nsd->rx_length_errors;
+	i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->illegal_bytes, &nsd->illegal_bytes);
 
-		i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->link_xon_rx, &nsd->link_xon_rx);
-		i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->link_xon_tx, &nsd->link_xon_tx);
-		i40e_update_prio_xoff_rx(pf);  /* handles I40E_GLPRT_LXOFFRXC */
-		i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->link_xoff_tx, &nsd->link_xoff_tx);
+	i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->mac_local_faults,
+			   &nsd->mac_local_faults);
+	i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->mac_remote_faults,
+			   &nsd->mac_remote_faults);
 
-		for (i = 0; i < 8; i++) {
-			i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
-					   pf->stat_offsets_loaded,
-					   &osd->priority_xon_rx[i],
-					   &nsd->priority_xon_rx[i]);
-			i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
-					   pf->stat_offsets_loaded,
-					   &osd->priority_xon_tx[i],
-					   &nsd->priority_xon_tx[i]);
-			i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
-					   pf->stat_offsets_loaded,
-					   &osd->priority_xoff_tx[i],
-					   &nsd->priority_xoff_tx[i]);
-			i40e_stat_update32(hw,
-					   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
-					   pf->stat_offsets_loaded,
-					   &osd->priority_xon_2_xoff[i],
-					   &nsd->priority_xon_2_xoff[i]);
-		}
+	i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_length_errors,
+			   &nsd->rx_length_errors);
 
-		i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
-				   I40E_GLPRT_PRC64L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_64, &nsd->rx_size_64);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
-				   I40E_GLPRT_PRC127L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_127, &nsd->rx_size_127);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
-				   I40E_GLPRT_PRC255L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_255, &nsd->rx_size_255);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
-				   I40E_GLPRT_PRC511L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_511, &nsd->rx_size_511);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
-				   I40E_GLPRT_PRC1023L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_1023, &nsd->rx_size_1023);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
-				   I40E_GLPRT_PRC1522L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_1522, &nsd->rx_size_1522);
-		i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
-				   I40E_GLPRT_PRC9522L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_size_big, &nsd->rx_size_big);
+	i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xon_rx, &nsd->link_xon_rx);
+	i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xon_tx, &nsd->link_xon_tx);
+	i40e_update_prio_xoff_rx(pf);  /* handles I40E_GLPRT_LXOFFRXC */
+	i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
 
-		i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
-				   I40E_GLPRT_PTC64L(hw->port),
+	for (i = 0; i < 8; i++) {
+		i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
 				   pf->stat_offsets_loaded,
-				   &osd->tx_size_64, &nsd->tx_size_64);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
-				   I40E_GLPRT_PTC127L(hw->port),
+				   &osd->priority_xon_rx[i],
+				   &nsd->priority_xon_rx[i]);
+		i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
 				   pf->stat_offsets_loaded,
-				   &osd->tx_size_127, &nsd->tx_size_127);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
-				   I40E_GLPRT_PTC255L(hw->port),
+				   &osd->priority_xon_tx[i],
+				   &nsd->priority_xon_tx[i]);
+		i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
 				   pf->stat_offsets_loaded,
-				   &osd->tx_size_255, &nsd->tx_size_255);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
-				   I40E_GLPRT_PTC511L(hw->port),
+				   &osd->priority_xoff_tx[i],
+				   &nsd->priority_xoff_tx[i]);
+		i40e_stat_update32(hw,
+				   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
 				   pf->stat_offsets_loaded,
-				   &osd->tx_size_511, &nsd->tx_size_511);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
-				   I40E_GLPRT_PTC1023L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->tx_size_1023, &nsd->tx_size_1023);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
-				   I40E_GLPRT_PTC1522L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->tx_size_1522, &nsd->tx_size_1522);
-		i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
-				   I40E_GLPRT_PTC9522L(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->tx_size_big, &nsd->tx_size_big);
-
-		i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_undersize, &nsd->rx_undersize);
-		i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_fragments, &nsd->rx_fragments);
-		i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_oversize, &nsd->rx_oversize);
-		i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
-				   pf->stat_offsets_loaded,
-				   &osd->rx_jabber, &nsd->rx_jabber);
-
-		val = rd32(hw, I40E_PRTPM_EEE_STAT);
-		nsd->tx_lpi_status =
-			       (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >>
-				I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT;
-		nsd->rx_lpi_status =
-			       (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >>
-				I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT;
-		i40e_stat_update32(hw, I40E_PRTPM_TLPIC,
-				   pf->stat_offsets_loaded,
-				   &osd->tx_lpi_count, &nsd->tx_lpi_count);
-		i40e_stat_update32(hw, I40E_PRTPM_RLPIC,
-				   pf->stat_offsets_loaded,
-				   &osd->rx_lpi_count, &nsd->rx_lpi_count);
+				   &osd->priority_xon_2_xoff[i],
+				   &nsd->priority_xon_2_xoff[i]);
 	}
 
+	i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
+			   I40E_GLPRT_PRC64L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_64, &nsd->rx_size_64);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
+			   I40E_GLPRT_PRC127L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_127, &nsd->rx_size_127);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
+			   I40E_GLPRT_PRC255L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_255, &nsd->rx_size_255);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
+			   I40E_GLPRT_PRC511L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_511, &nsd->rx_size_511);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
+			   I40E_GLPRT_PRC1023L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_1023, &nsd->rx_size_1023);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
+			   I40E_GLPRT_PRC1522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_1522, &nsd->rx_size_1522);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
+			   I40E_GLPRT_PRC9522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_big, &nsd->rx_size_big);
+
+	i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
+			   I40E_GLPRT_PTC64L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_64, &nsd->tx_size_64);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
+			   I40E_GLPRT_PTC127L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_127, &nsd->tx_size_127);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
+			   I40E_GLPRT_PTC255L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_255, &nsd->tx_size_255);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
+			   I40E_GLPRT_PTC511L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_511, &nsd->tx_size_511);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
+			   I40E_GLPRT_PTC1023L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_1023, &nsd->tx_size_1023);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
+			   I40E_GLPRT_PTC1522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_1522, &nsd->tx_size_1522);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
+			   I40E_GLPRT_PTC9522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_big, &nsd->tx_size_big);
+
+	i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_undersize, &nsd->rx_undersize);
+	i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_fragments, &nsd->rx_fragments);
+	i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_oversize, &nsd->rx_oversize);
+	i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_jabber, &nsd->rx_jabber);
+
+	/* FDIR stats */
+	i40e_stat_update32(hw, I40E_GLQF_PCNT(pf->fd_atr_cnt_idx),
+			   pf->stat_offsets_loaded,
+			   &osd->fd_atr_match, &nsd->fd_atr_match);
+	i40e_stat_update32(hw, I40E_GLQF_PCNT(pf->fd_sb_cnt_idx),
+			   pf->stat_offsets_loaded,
+			   &osd->fd_sb_match, &nsd->fd_sb_match);
+
+	val = rd32(hw, I40E_PRTPM_EEE_STAT);
+	nsd->tx_lpi_status =
+		       (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >>
+			I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT;
+	nsd->rx_lpi_status =
+		       (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >>
+			I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT;
+	i40e_stat_update32(hw, I40E_PRTPM_TLPIC,
+			   pf->stat_offsets_loaded,
+			   &osd->tx_lpi_count, &nsd->tx_lpi_count);
+	i40e_stat_update32(hw, I40E_PRTPM_RLPIC,
+			   pf->stat_offsets_loaded,
+			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
+
 	pf->stat_offsets_loaded = true;
 }
 
 /**
+ * i40e_update_stats - Update the various statistics counters.
+ * @vsi: the VSI to be updated
+ *
+ * Update the various stats for this VSI and its related entities.
+ **/
+void i40e_update_stats(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	if (vsi == pf->vsi[pf->lan_vsi])
+		i40e_update_pf_stats(pf);
+
+	i40e_update_vsi_stats(vsi);
+}
+
+/**
  * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
@@ -1101,6 +1162,30 @@
 }
 
 /**
+ * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
+ * @vsi: the PF Main VSI - inappropriate for any other VSI
+ * @macaddr: the MAC address
+ **/
+static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
+{
+	struct i40e_aqc_remove_macvlan_element_data element;
+	struct i40e_pf *pf = vsi->back;
+	i40e_status aq_ret;
+
+	/* Only appropriate for the PF main VSI */
+	if (vsi->type != I40E_VSI_MAIN)
+		return;
+
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
+			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+	aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
+	if (aq_ret)
+		dev_err(&pf->pdev->dev, "Could not remove default MAC-VLAN\n");
+}
+
+/**
  * i40e_add_filter - Add a mac/vlan filter to the VSI
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
@@ -1125,7 +1210,7 @@
 		if (!f)
 			goto add_filter_out;
 
-		memcpy(f->macaddr, macaddr, ETH_ALEN);
+		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
 		f->changed = true;
 
@@ -1249,7 +1334,7 @@
 			return -EADDRNOTAVAIL;
 		}
 
-		memcpy(vsi->back->hw.mac.addr, addr->sa_data, netdev->addr_len);
+		ether_addr_copy(vsi->back->hw.mac.addr, addr->sa_data);
 	}
 
 	/* In order to be sure to not drop any packets, add the new address
@@ -1263,7 +1348,7 @@
 	i40e_del_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY, false, false);
 	i40e_sync_vsi_filters(vsi);
 
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 
 	return 0;
 }
@@ -1313,7 +1398,7 @@
 	vsi->tc_config.numtc = numtc;
 	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
 	/* Number of queues per enabled TC */
-	num_tc_qps = rounddown_pow_of_two(vsi->alloc_queue_pairs/numtc);
+	num_tc_qps = vsi->alloc_queue_pairs/numtc;
 	num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC);
 
 	/* Setup queue offset/count for all TCs for given VSI */
@@ -1520,8 +1605,7 @@
 			cmd_flags = 0;
 
 			/* add to delete list */
-			memcpy(del_list[num_del].mac_addr,
-			       f->macaddr, ETH_ALEN);
+			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
 			del_list[num_del].vlan_tag =
 				cpu_to_le16((u16)(f->vlan ==
 					    I40E_VLAN_ANY ? 0 : f->vlan));
@@ -1542,7 +1626,9 @@
 				num_del = 0;
 				memset(del_list, 0, sizeof(*del_list));
 
-				if (aq_ret)
+				if (aq_ret &&
+				    pf->hw.aq.asq_last_status !=
+							      I40E_AQ_RC_ENOENT)
 					dev_info(&pf->pdev->dev,
 						 "ignoring delete macvlan error, err %d, aq_err %d while flushing a full buffer\n",
 						 aq_ret,
@@ -1554,7 +1640,8 @@
 						     del_list, num_del, NULL);
 			num_del = 0;
 
-			if (aq_ret)
+			if (aq_ret &&
+			    pf->hw.aq.asq_last_status != I40E_AQ_RC_ENOENT)
 				dev_info(&pf->pdev->dev,
 					 "ignoring delete macvlan error, err %d, aq_err %d\n",
 					 aq_ret, pf->hw.aq.asq_last_status);
@@ -1583,8 +1670,7 @@
 			cmd_flags = 0;
 
 			/* add to add array */
-			memcpy(add_list[num_add].mac_addr,
-			       f->macaddr, ETH_ALEN);
+			ether_addr_copy(add_list[num_add].mac_addr, f->macaddr);
 			add_list[num_add].vlan_tag =
 				cpu_to_le16(
 				 (u16)(f->vlan == I40E_VLAN_ANY ? 0 : f->vlan));
@@ -1681,7 +1767,7 @@
 		return;
 	pf->flags &= ~I40E_FLAG_FILTER_SYNC;
 
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
 		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
 			i40e_sync_vsi_filters(pf->vsi[v]);
@@ -1698,7 +1784,7 @@
 static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct i40e_vsi *vsi = np->vsi;
 
 	/* MTU < 68 is an error and causes problems on some kernels */
@@ -2312,6 +2398,8 @@
 	rx_ctx.crcstrip = 1;
 	rx_ctx.l2tsel = 1;
 	rx_ctx.showiv = 1;
+	/* set the prefena field to 1 because the manual says to */
+	rx_ctx.prefena = 1;
 
 	/* clear the context in the HMC */
 	err = i40e_clear_lan_rx_queue_context(hw, pf_q);
@@ -2413,6 +2501,7 @@
  **/
 static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
 {
+	struct i40e_ring *tx_ring, *rx_ring;
 	u16 qoffset, qcount;
 	int i, n;
 
@@ -2426,8 +2515,8 @@
 		qoffset = vsi->tc_config.tc_info[n].qoffset;
 		qcount = vsi->tc_config.tc_info[n].qcount;
 		for (i = qoffset; i < (qoffset + qcount); i++) {
-			struct i40e_ring *rx_ring = vsi->rx_rings[i];
-			struct i40e_ring *tx_ring = vsi->tx_rings[i];
+			rx_ring = vsi->rx_rings[i];
+			tx_ring = vsi->tx_rings[i];
 			rx_ring->dcb_tc = n;
 			tx_ring->dcb_tc = n;
 		}
@@ -2565,7 +2654,6 @@
 	      I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
 	      I40E_PFINT_ICR0_ENA_GPIO_MASK          |
 	      I40E_PFINT_ICR0_ENA_TIMESYNC_MASK      |
-	      I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK  |
 	      I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
 	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
 	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
@@ -2733,6 +2821,7 @@
 				      &q_vector->affinity_mask);
 	}
 
+	vsi->irqs_ready = true;
 	return 0;
 
 free_queue_irqs:
@@ -3152,6 +3241,12 @@
 
 	pf_q = vsi->base_queue;
 	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+
+		/* warn the TX unit of coming changes */
+		i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable);
+		if (!enable)
+			udelay(10);
+
 		for (j = 0; j < 50; j++) {
 			tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
 			if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) ==
@@ -3160,9 +3255,7 @@
 			usleep_range(1000, 2000);
 		}
 		/* Skip if the queue is already in the requested state */
-		if (enable && (tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
-			continue;
-		if (!enable && !(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+		if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
 			continue;
 
 		/* turn on/off the queue */
@@ -3178,13 +3271,8 @@
 		/* wait for the change to finish */
 		for (j = 0; j < 10; j++) {
 			tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
-			if (enable) {
-				if ((tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
-					break;
-			} else {
-				if (!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
-					break;
-			}
+			if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+				break;
 
 			udelay(10);
 		}
@@ -3223,15 +3311,9 @@
 			usleep_range(1000, 2000);
 		}
 
-		if (enable) {
-			/* is STAT set ? */
-			if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
-				continue;
-		} else {
-			/* is !STAT set ? */
-			if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
-				continue;
-		}
+		/* Skip if the queue is already in the requested state */
+		if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+			continue;
 
 		/* turn on/off the queue */
 		if (enable)
@@ -3244,13 +3326,8 @@
 		for (j = 0; j < 10; j++) {
 			rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
 
-			if (enable) {
-				if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
-					break;
-			} else {
-				if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
-					break;
-			}
+			if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+				break;
 
 			udelay(10);
 		}
@@ -3304,6 +3381,10 @@
 		if (!vsi->q_vectors)
 			return;
 
+		if (!vsi->irqs_ready)
+			return;
+
+		vsi->irqs_ready = false;
 		for (i = 0; i < vsi->num_q_vectors; i++) {
 			u16 vector = i + base;
 
@@ -3476,7 +3557,7 @@
 	int i;
 
 	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i])
 			i40e_vsi_free_q_vectors(pf->vsi[i]);
 	i40e_reset_interrupt_capability(pf);
@@ -3513,6 +3594,19 @@
 }
 
 /**
+ * i40e_vsi_close - Shut down a VSI
+ * @vsi: the vsi to be quelled
+ **/
+static void i40e_vsi_close(struct i40e_vsi *vsi)
+{
+	if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
+		i40e_down(vsi);
+	i40e_vsi_free_irq(vsi);
+	i40e_vsi_free_tx_resources(vsi);
+	i40e_vsi_free_rx_resources(vsi);
+}
+
+/**
  * i40e_quiesce_vsi - Pause a given VSI
  * @vsi: the VSI being paused
  **/
@@ -3525,8 +3619,7 @@
 	if (vsi->netdev && netif_running(vsi->netdev)) {
 		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
 	} else {
-		set_bit(__I40E_DOWN, &vsi->state);
-		i40e_down(vsi);
+		i40e_vsi_close(vsi);
 	}
 }
 
@@ -3543,7 +3636,7 @@
 	if (vsi->netdev && netif_running(vsi->netdev))
 		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
 	else
-		i40e_up(vsi);   /* this clears the DOWN bit */
+		i40e_vsi_open(vsi);   /* this clears the DOWN bit */
 }
 
 /**
@@ -3554,7 +3647,7 @@
 {
 	int v;
 
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v])
 			i40e_quiesce_vsi(pf->vsi[v]);
 	}
@@ -3568,7 +3661,7 @@
 {
 	int v;
 
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v])
 			i40e_unquiesce_vsi(pf->vsi[v]);
 	}
@@ -4009,7 +4102,7 @@
 	}
 
 	/* Update each VSI */
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (!pf->vsi[v])
 			continue;
 
@@ -4028,6 +4121,8 @@
 				 pf->vsi[v]->seid);
 			/* Will try to configure as many components */
 		} else {
+			/* Re-configure VSI vectors based on updated TC map */
+			i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
 			if (pf->vsi[v]->netdev)
 				i40e_dcbnl_set_all(pf->vsi[v]);
 		}
@@ -4065,14 +4160,69 @@
 			/* When status is not DISABLED then DCBX in FW */
 			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
 				       DCB_CAP_DCBX_VER_IEEE;
-			pf->flags |= I40E_FLAG_DCB_ENABLED;
+
+			pf->flags |= I40E_FLAG_DCB_CAPABLE;
+			/* Enable DCB tagging only when more than one TC */
+			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+				pf->flags |= I40E_FLAG_DCB_ENABLED;
 		}
+	} else {
+		dev_info(&pf->pdev->dev, "AQ Querying DCB configuration failed: %d\n",
+			 pf->hw.aq.asq_last_status);
 	}
 
 out:
 	return err;
 }
 #endif /* CONFIG_I40E_DCB */
+#define SPEED_SIZE 14
+#define FC_SIZE 8
+/**
+ * i40e_print_link_message - print link up or down
+ * @vsi: the VSI for which link needs a message
+ */
+static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+{
+	char speed[SPEED_SIZE] = "Unknown";
+	char fc[FC_SIZE] = "RX/TX";
+
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (vsi->back->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		strncpy(speed, "40 Gbps", SPEED_SIZE);
+		break;
+	case I40E_LINK_SPEED_10GB:
+		strncpy(speed, "10 Gbps", SPEED_SIZE);
+		break;
+	case I40E_LINK_SPEED_1GB:
+		strncpy(speed, "1000 Mbps", SPEED_SIZE);
+		break;
+	default:
+		break;
+	}
+
+	switch (vsi->back->hw.fc.current_mode) {
+	case I40E_FC_FULL:
+		strncpy(fc, "RX/TX", FC_SIZE);
+		break;
+	case I40E_FC_TX_PAUSE:
+		strncpy(fc, "TX", FC_SIZE);
+		break;
+	case I40E_FC_RX_PAUSE:
+		strncpy(fc, "RX", FC_SIZE);
+		break;
+	default:
+		strncpy(fc, "None", FC_SIZE);
+		break;
+	}
+
+	netdev_info(vsi->netdev, "NIC Link is Up %s Full Duplex, Flow Control: %s\n",
+		    speed, fc);
+}
 
 /**
  * i40e_up_complete - Finish the last steps of bringing up a connection
@@ -4099,11 +4249,11 @@
 
 	if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
 	    (vsi->netdev)) {
-		netdev_info(vsi->netdev, "NIC Link is Up\n");
+		i40e_print_link_message(vsi, true);
 		netif_tx_start_all_queues(vsi->netdev);
 		netif_carrier_on(vsi->netdev);
 	} else if (vsi->netdev) {
-		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		i40e_print_link_message(vsi, false);
 	}
 
 	/* replay FDIR SB filters */
@@ -4309,24 +4459,32 @@
 	if (err)
 		goto err_setup_rx;
 
-	if (!vsi->netdev) {
-		err = EINVAL;
+	if (vsi->netdev) {
+		snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+			 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+		err = i40e_vsi_request_irq(vsi, int_name);
+		if (err)
+			goto err_setup_rx;
+
+		/* Notify the stack of the actual queue counts. */
+		err = netif_set_real_num_tx_queues(vsi->netdev,
+						   vsi->num_queue_pairs);
+		if (err)
+			goto err_set_queues;
+
+		err = netif_set_real_num_rx_queues(vsi->netdev,
+						   vsi->num_queue_pairs);
+		if (err)
+			goto err_set_queues;
+
+	} else if (vsi->type == I40E_VSI_FDIR) {
+		snprintf(int_name, sizeof(int_name) - 1, "%s-fdir",
+			 dev_driver_string(&pf->pdev->dev));
+		err = i40e_vsi_request_irq(vsi, int_name);
+	} else {
+		err = -EINVAL;
 		goto err_setup_rx;
 	}
-	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
-		 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
-	err = i40e_vsi_request_irq(vsi, int_name);
-	if (err)
-		goto err_setup_rx;
-
-	/* Notify the stack of the actual queue counts. */
-	err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_queue_pairs);
-	if (err)
-		goto err_set_queues;
-
-	err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_queue_pairs);
-	if (err)
-		goto err_set_queues;
 
 	err = i40e_up_complete(vsi);
 	if (err)
@@ -4383,14 +4541,7 @@
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 
-	if (test_and_set_bit(__I40E_DOWN, &vsi->state))
-		return 0;
-
-	i40e_down(vsi);
-	i40e_vsi_free_irq(vsi);
-
-	i40e_vsi_free_tx_resources(vsi);
-	i40e_vsi_free_rx_resources(vsi);
+	i40e_vsi_close(vsi);
 
 	return 0;
 }
@@ -4410,6 +4561,9 @@
 
 	WARN_ON(in_interrupt());
 
+	if (i40e_check_asq_alive(&pf->hw))
+		i40e_vc_notify_reset(pf);
+
 	/* do the biggest reset indicated */
 	if (reset_flags & (1 << __I40E_GLOBAL_RESET_REQUESTED)) {
 
@@ -4475,7 +4629,7 @@
 		/* Find the VSI(s) that requested a re-init */
 		dev_info(&pf->pdev->dev,
 			 "VSI reinit requested\n");
-		for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+		for (v = 0; v < pf->num_alloc_vsi; v++) {
 			struct i40e_vsi *vsi = pf->vsi[v];
 			if (vsi != NULL &&
 			    test_bit(__I40E_REINIT_REQUESTED, &vsi->state)) {
@@ -4565,6 +4719,10 @@
 	int ret = 0;
 	u8 type;
 
+	/* Not DCB capable or capability disabled */
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		return ret;
+
 	/* Ignore if event is not for Nearest Bridge */
 	type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
 		& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
@@ -4606,6 +4764,12 @@
 	if (!need_reconfig)
 		goto exit;
 
+	/* Enable DCB tagging only when more than one TC */
+	if (i40e_dcb_get_num_tc(dcbx_cfg) > 1)
+		pf->flags |= I40E_FLAG_DCB_ENABLED;
+	else
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+
 	/* Reconfiguration needed quiesce all VSIs */
 	i40e_pf_quiesce_all_vsi(pf);
 
@@ -4709,8 +4873,7 @@
 	    (pf->flags & I40E_FLAG_FD_SB_ENABLED))
 		return;
 	fcnt_prog = i40e_get_current_fd_count(pf);
-	fcnt_avail = pf->hw.fdir_shared_filter_count +
-					       pf->fdir_pf_filter_count;
+	fcnt_avail = i40e_get_fd_cnt_all(pf);
 	if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) {
 		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
 		    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
@@ -4803,7 +4966,7 @@
 			i40e_veb_link_event(pf->veb[i], link_up);
 
 	/* ... now the local VSIs */
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
 			i40e_vsi_link_event(pf->vsi[i], link_up);
 }
@@ -4821,10 +4984,8 @@
 
 	if (new_link == old_link)
 		return;
-
 	if (!test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
-		netdev_info(pf->vsi[pf->lan_vsi]->netdev,
-			    "NIC Link is %s\n", (new_link ? "Up" : "Down"));
+		i40e_print_link_message(pf->vsi[pf->lan_vsi], new_link);
 
 	/* Notify the base of the switch tree connected to
 	 * the link.  Floating VEBs are not notified.
@@ -4862,7 +5023,7 @@
 	 *     for each q_vector
 	 *         force an interrupt
 	 */
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		struct i40e_vsi *vsi = pf->vsi[v];
 		int armed = 0;
 
@@ -4912,7 +5073,7 @@
 	/* Update the stats for active netdevs so the network stack
 	 * can look at updated numbers whenever it cares to
 	 */
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i] && pf->vsi[i]->netdev)
 			i40e_update_stats(pf->vsi[i]);
 
@@ -5018,11 +5179,47 @@
 	u16 pending, i = 0;
 	i40e_status ret;
 	u16 opcode;
+	u32 oldval;
 	u32 val;
 
 	if (!test_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state))
 		return;
 
+	/* check for error indications */
+	val = rd32(&pf->hw, pf->hw.aq.arq.len);
+	oldval = val;
+	if (val & I40E_PF_ARQLEN_ARQVFE_MASK) {
+		dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQVFE_MASK;
+	}
+	if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) {
+		dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
+	}
+	if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
+		dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(&pf->hw, pf->hw.aq.arq.len, val);
+
+	val = rd32(&pf->hw, pf->hw.aq.asq.len);
+	oldval = val;
+	if (val & I40E_PF_ATQLEN_ATQVFE_MASK) {
+		dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQVFE_MASK;
+	}
+	if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) {
+		dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK;
+	}
+	if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) {
+		dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(&pf->hw, pf->hw.aq.asq.len, val);
+
 	event.msg_size = I40E_MAX_AQ_BUF_SIZE;
 	event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
 	if (!event.msg_buf)
@@ -5128,7 +5325,7 @@
 	int ret;
 
 	/* build VSI that owns this VEB, temporarily attached to base VEB */
-	for (v = 0; v < pf->hw.func_caps.num_vsis && !ctl_vsi; v++) {
+	for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) {
 		if (pf->vsi[v] &&
 		    pf->vsi[v]->veb_idx == veb->idx &&
 		    pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
@@ -5158,7 +5355,7 @@
 		goto end_reconstitute;
 
 	/* create the remaining VSIs attached to this VEB */
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
 			continue;
 
@@ -5226,9 +5423,6 @@
 		}
 	} while (err);
 
-	/* increment MSI-X count because current FW skips one */
-	pf->hw.func_caps.num_msix_vectors++;
-
 	if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) ||
 	    (pf->hw.aq.fw_maj_ver < 2)) {
 		pf->hw.func_caps.num_msix_vectors++;
@@ -5267,15 +5461,14 @@
 static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 {
 	struct i40e_vsi *vsi;
-	bool new_vsi = false;
-	int err, i;
+	int i;
 
 	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 		return;
 
 	/* find existing VSI and see if it needs configuring */
 	vsi = NULL;
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
 			vsi = pf->vsi[i];
 			break;
@@ -5288,47 +5481,12 @@
 				     pf->vsi[pf->lan_vsi]->seid, 0);
 		if (!vsi) {
 			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
-			goto err_vsi;
+			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+			return;
 		}
-		new_vsi = true;
 	}
+
 	i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
-
-	err = i40e_vsi_setup_tx_resources(vsi);
-	if (err)
-		goto err_setup_tx;
-	err = i40e_vsi_setup_rx_resources(vsi);
-	if (err)
-		goto err_setup_rx;
-
-	if (new_vsi) {
-		char int_name[IFNAMSIZ + 9];
-		err = i40e_vsi_configure(vsi);
-		if (err)
-			goto err_setup_rx;
-		snprintf(int_name, sizeof(int_name) - 1, "%s-fdir",
-			 dev_driver_string(&pf->pdev->dev));
-		err = i40e_vsi_request_irq(vsi, int_name);
-		if (err)
-			goto err_setup_rx;
-		err = i40e_up_complete(vsi);
-		if (err)
-			goto err_up_complete;
-		clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
-	}
-
-	return;
-
-err_up_complete:
-	i40e_down(vsi);
-	i40e_vsi_free_irq(vsi);
-err_setup_rx:
-	i40e_vsi_free_rx_resources(vsi);
-err_setup_tx:
-	i40e_vsi_free_tx_resources(vsi);
-err_vsi:
-	pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-	i40e_vsi_clear(vsi);
 }
 
 /**
@@ -5340,7 +5498,7 @@
 	int i;
 
 	i40e_fdir_filter_exit(pf);
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
 			i40e_vsi_release(pf->vsi[i]);
 			break;
@@ -5357,7 +5515,7 @@
 static int i40e_prep_for_reset(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = &pf->hw;
-	i40e_status ret;
+	i40e_status ret = 0;
 	u32 v;
 
 	clear_bit(__I40E_RESET_INTR_RECEIVED, &pf->state);
@@ -5366,13 +5524,10 @@
 
 	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
 
-	if (i40e_check_asq_alive(hw))
-		i40e_vc_notify_reset(pf);
-
 	/* quiesce the VSIs and their queues that are not already DOWN */
 	i40e_pf_quiesce_all_vsi(pf);
 
-	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v])
 			pf->vsi[v]->seid = 0;
 	}
@@ -5380,22 +5535,40 @@
 	i40e_shutdown_adminq(&pf->hw);
 
 	/* call shutdown HMC */
-	ret = i40e_shutdown_lan_hmc(hw);
-	if (ret) {
-		dev_info(&pf->pdev->dev, "shutdown_lan_hmc failed: %d\n", ret);
-		clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
+	if (hw->hmc.hmc_obj) {
+		ret = i40e_shutdown_lan_hmc(hw);
+		if (ret) {
+			dev_warn(&pf->pdev->dev,
+				 "shutdown_lan_hmc failed: %d\n", ret);
+			clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
+		}
 	}
 	return ret;
 }
 
 /**
+ * i40e_send_version - update firmware with driver version
+ * @pf: PF struct
+ */
+static void i40e_send_version(struct i40e_pf *pf)
+{
+	struct i40e_driver_version dv;
+
+	dv.major_version = DRV_VERSION_MAJOR;
+	dv.minor_version = DRV_VERSION_MINOR;
+	dv.build_version = DRV_VERSION_BUILD;
+	dv.subbuild_version = 0;
+	strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
+	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
+}
+
+/**
  * i40e_reset_and_rebuild - reset and rebuild using a saved config
  * @pf: board private structure
  * @reinit: if the Main VSI needs to re-initialized.
  **/
 static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
 {
-	struct i40e_driver_version dv;
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status ret;
 	u32 v;
@@ -5405,8 +5578,10 @@
 	 * because the reset will make them disappear.
 	 */
 	ret = i40e_pf_reset(hw);
-	if (ret)
+	if (ret) {
 		dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
+		goto end_core_reset;
+	}
 	pf->pfr_count++;
 
 	if (test_bit(__I40E_DOWN, &pf->state))
@@ -5426,6 +5601,7 @@
 		i40e_verify_eeprom(pf);
 	}
 
+	i40e_clear_pxe_mode(hw);
 	ret = i40e_get_capabilities(pf);
 	if (ret) {
 		dev_info(&pf->pdev->dev, "i40e_get_capabilities failed, %d\n",
@@ -5526,13 +5702,7 @@
 	}
 
 	/* tell the firmware that we're starting */
-	dv.major_version = DRV_VERSION_MAJOR;
-	dv.minor_version = DRV_VERSION_MINOR;
-	dv.build_version = DRV_VERSION_BUILD;
-	dv.subbuild_version = 0;
-	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
-
-	dev_info(&pf->pdev->dev, "reset complete\n");
+	i40e_send_version(pf);
 
 end_core_reset:
 	clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
@@ -5642,7 +5812,6 @@
  **/
 static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf)
 {
-	const int vxlan_hdr_qwords = 4;
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status ret;
 	u8 filter_index;
@@ -5660,7 +5829,6 @@
 			port = pf->vxlan_ports[i];
 			ret = port ?
 			      i40e_aq_add_udp_tunnel(hw, ntohs(port),
-						     vxlan_hdr_qwords,
 						     I40E_AQC_TUNNEL_TYPE_VXLAN,
 						     &filter_index, NULL)
 			      : i40e_aq_del_udp_tunnel(hw, i, NULL);
@@ -5839,15 +6007,15 @@
 	 * find next empty vsi slot, looping back around if necessary
 	 */
 	i = pf->next_vsi;
-	while (i < pf->hw.func_caps.num_vsis && pf->vsi[i])
+	while (i < pf->num_alloc_vsi && pf->vsi[i])
 		i++;
-	if (i >= pf->hw.func_caps.num_vsis) {
+	if (i >= pf->num_alloc_vsi) {
 		i = 0;
 		while (i < pf->next_vsi && pf->vsi[i])
 			i++;
 	}
 
-	if (i < pf->hw.func_caps.num_vsis && !pf->vsi[i]) {
+	if (i < pf->num_alloc_vsi && !pf->vsi[i]) {
 		vsi_idx = i;             /* Found one! */
 	} else {
 		ret = -ENODEV;
@@ -5870,6 +6038,7 @@
 	vsi->netdev_registered = false;
 	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
 	INIT_LIST_HEAD(&vsi->mac_filter_list);
+	vsi->irqs_ready = false;
 
 	ret = i40e_set_num_rings_in_vsi(vsi);
 	if (ret)
@@ -5987,14 +6156,12 @@
  **/
 static int i40e_alloc_rings(struct i40e_vsi *vsi)
 {
+	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_pf *pf = vsi->back;
 	int i;
 
 	/* Set basic values in the rings to be used later during open() */
 	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
-		struct i40e_ring *tx_ring;
-		struct i40e_ring *rx_ring;
-
 		/* allocate space for both Tx and Rx in one shot */
 		tx_ring = kzalloc(sizeof(struct i40e_ring) * 2, GFP_KERNEL);
 		if (!tx_ring)
@@ -6052,8 +6219,6 @@
 		vectors = 0;
 	}
 
-	pf->num_msix_entries = vectors;
-
 	return vectors;
 }
 
@@ -6107,6 +6272,16 @@
 	for (i = 0; i < v_budget; i++)
 		pf->msix_entries[i].entry = i;
 	vec = i40e_reserve_msix_vectors(pf, v_budget);
+
+	if (vec != v_budget) {
+		/* If we have limited resources, we will start with no vectors
+		 * for the special features and then allocate vectors to some
+		 * of these features based on the policy and at the end disable
+		 * the features that did not get any vectors.
+		 */
+		pf->num_vmdq_msix = 0;
+	}
+
 	if (vec < I40E_MIN_MSIX) {
 		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
 		kfree(pf->msix_entries);
@@ -6115,27 +6290,25 @@
 
 	} else if (vec == I40E_MIN_MSIX) {
 		/* Adjust for minimal MSIX use */
-		dev_info(&pf->pdev->dev, "Features disabled, not enough MSI-X vectors\n");
-		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
 		pf->num_vmdq_vsis = 0;
 		pf->num_vmdq_qps = 0;
-		pf->num_vmdq_msix = 0;
 		pf->num_lan_qps = 1;
 		pf->num_lan_msix = 1;
 
 	} else if (vec != v_budget) {
+		/* reserve the misc vector */
+		vec--;
+
 		/* Scale vector usage down */
 		pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
-		vec--;                    /* reserve the misc vector */
+		pf->num_vmdq_vsis = 1;
 
 		/* partition out the remaining vectors */
 		switch (vec) {
 		case 2:
-			pf->num_vmdq_vsis = 1;
 			pf->num_lan_msix = 1;
 			break;
 		case 3:
-			pf->num_vmdq_vsis = 1;
 			pf->num_lan_msix = 2;
 			break;
 		default:
@@ -6147,6 +6320,11 @@
 		}
 	}
 
+	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+	    (pf->num_vmdq_msix == 0)) {
+		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
+	}
 	return err;
 }
 
@@ -6171,7 +6349,7 @@
 	cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
 	if (vsi->netdev)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
-			       i40e_napi_poll, vsi->work_limit);
+			       i40e_napi_poll, NAPI_POLL_WEIGHT);
 
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
@@ -6231,7 +6409,7 @@
 		if (err) {
 			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
 				       I40E_FLAG_RSS_ENABLED	|
-				       I40E_FLAG_DCB_ENABLED	|
+				       I40E_FLAG_DCB_CAPABLE	|
 				       I40E_FLAG_SRIOV_ENABLED	|
 				       I40E_FLAG_FD_SB_ENABLED	|
 				       I40E_FLAG_FD_ATR_ENABLED	|
@@ -6364,7 +6542,6 @@
 		return 0;
 
 	queue_count = min_t(int, queue_count, pf->rss_size_max);
-	queue_count = rounddown_pow_of_two(queue_count);
 
 	if (queue_count != pf->rss_size) {
 		i40e_prep_for_reset(pf);
@@ -6407,6 +6584,10 @@
 		    I40E_FLAG_MSIX_ENABLED    |
 		    I40E_FLAG_RX_1BUF_ENABLED;
 
+	/* Set default ITR */
+	pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
+	pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+
 	/* Depending on PF configurations, it is possible that the RSS
 	 * maximum might end up larger than the available queues
 	 */
@@ -6416,7 +6597,6 @@
 	if (pf->hw.func_caps.rss) {
 		pf->flags |= I40E_FLAG_RSS_ENABLED;
 		pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus());
-		pf->rss_size = rounddown_pow_of_two(pf->rss_size);
 	} else {
 		pf->rss_size = 1;
 	}
@@ -6432,8 +6612,12 @@
 	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
 		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 		pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
+		/* Setup a counter for fd_atr per pf */
+		pf->fd_atr_cnt_idx = I40E_FD_ATR_STAT_IDX(pf->hw.pf_id);
 		if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) {
 			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			/* Setup a counter for fd_sb per pf */
+			pf->fd_sb_cnt_idx = I40E_FD_SB_STAT_IDX(pf->hw.pf_id);
 		} else {
 			dev_info(&pf->pdev->dev,
 				 "Flow Director Sideband mode Disabled in MFP mode\n");
@@ -6649,6 +6833,96 @@
 }
 
 #endif
+#ifdef HAVE_FDB_OPS
+#ifdef USE_CONST_DEV_UC_CHAR
+static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			    struct net_device *dev,
+			    const unsigned char *addr,
+			    u16 flags)
+#else
+static int i40e_ndo_fdb_add(struct ndmsg *ndm,
+			    struct net_device *dev,
+			    unsigned char *addr,
+			    u16 flags)
+#endif
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_pf *pf = np->vsi->back;
+	int err = 0;
+
+	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
+		return -EOPNOTSUPP;
+
+	/* Hardware does not support aging addresses so if a
+	 * ndm_state is given only allow permanent addresses
+	 */
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_info(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+#ifdef USE_CONST_DEV_UC_CHAR
+static int i40e_ndo_fdb_del(struct ndmsg *ndm,
+			    struct net_device *dev,
+			    const unsigned char *addr)
+#else
+static int i40e_ndo_fdb_del(struct ndmsg *ndm,
+			    struct net_device *dev,
+			    unsigned char *addr)
+#endif
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_pf *pf = np->vsi->back;
+	int err = -EOPNOTSUPP;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		netdev_info(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+		if (is_unicast_ether_addr(addr))
+			err = dev_uc_del(dev, addr);
+		else if (is_multicast_ether_addr(addr))
+			err = dev_mc_del(dev, addr);
+		else
+			err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int i40e_ndo_fdb_dump(struct sk_buff *skb,
+			     struct netlink_callback *cb,
+			     struct net_device *dev,
+			     int idx)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	if (pf->flags & I40E_FLAG_SRIOV_ENABLED)
+		idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+
+	return idx;
+}
+
+#endif /* USE_DEFAULT_FDB_DEL_DUMP */
+#endif /* HAVE_FDB_OPS */
 static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_open		= i40e_open,
 	.ndo_stop		= i40e_close,
@@ -6669,13 +6943,21 @@
 	.ndo_set_features	= i40e_set_features,
 	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
-	.ndo_set_vf_tx_rate	= i40e_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
 	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
+	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofck,
 #ifdef CONFIG_I40E_VXLAN
 	.ndo_add_vxlan_port	= i40e_add_vxlan_port,
 	.ndo_del_vxlan_port	= i40e_del_vxlan_port,
 #endif
+#ifdef HAVE_FDB_OPS
+	.ndo_fdb_add		= i40e_ndo_fdb_add,
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+	.ndo_fdb_del		= i40e_ndo_fdb_del,
+	.ndo_fdb_dump		= i40e_ndo_fdb_dump,
+#endif
+#endif
 };
 
 /**
@@ -6720,16 +7002,26 @@
 			   NETIF_F_TSO_ECN	       |
 			   NETIF_F_TSO6		       |
 			   NETIF_F_RXCSUM	       |
-			   NETIF_F_NTUPLE	       |
 			   NETIF_F_RXHASH	       |
 			   0;
 
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		netdev->features |= NETIF_F_NTUPLE;
+
 	/* copy netdev features into list of user selectable features */
 	netdev->hw_features |= netdev->features;
 
 	if (vsi->type == I40E_VSI_MAIN) {
 		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
-		memcpy(mac_addr, hw->mac.perm_addr, ETH_ALEN);
+		ether_addr_copy(mac_addr, hw->mac.perm_addr);
+		/* The following two steps are necessary to prevent reception
+		 * of tagged packets - by default the NVM loads a MAC-VLAN
+		 * filter that will accept any tagged packet.  This is to
+		 * prevent that during normal operations until a specific
+		 * VLAN tag filter has been set.
+		 */
+		i40e_rm_default_mac_filter(vsi, mac_addr);
+		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, true);
 	} else {
 		/* relate the VSI_VMDQ name to the VSI_MAIN name */
 		snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
@@ -6739,8 +7031,8 @@
 	}
 	i40e_add_filter(vsi, brdcast, I40E_VLAN_ANY, false, false);
 
-	memcpy(netdev->dev_addr, mac_addr, ETH_ALEN);
-	memcpy(netdev->perm_addr, mac_addr, ETH_ALEN);
+	ether_addr_copy(netdev->dev_addr, mac_addr);
+	ether_addr_copy(netdev->perm_addr, mac_addr);
 	/* vlan gets same features (except vlan offload)
 	 * after any tweaks for specific VSI types
 	 */
@@ -6772,7 +7064,6 @@
 		return;
 
 	i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL);
-	return;
 }
 
 /**
@@ -6898,6 +7189,13 @@
 
 		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
 		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
+		if (pf->vf[vsi->vf_id].spoofchk) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+			ctxt.info.sec_flags |=
+				(I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
+				 I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
+		}
 		/* Setup the VSI tx/rx queue map for TC0 only for now */
 		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
 		break;
@@ -6982,11 +7280,7 @@
 				unregister_netdev(vsi->netdev);
 			}
 		} else {
-			if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
-				i40e_down(vsi);
-			i40e_vsi_free_irq(vsi);
-			i40e_vsi_free_tx_resources(vsi);
-			i40e_vsi_free_rx_resources(vsi);
+			i40e_vsi_close(vsi);
 		}
 		i40e_vsi_disable_irq(vsi);
 	}
@@ -7013,7 +7307,7 @@
 	 * the orphan VEBs yet.  We'll wait for an explicit remove request
 	 * from up the network stack.
 	 */
-	for (n = 0, i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i] &&
 		    pf->vsi[i]->uplink_seid == uplink_seid &&
 		    (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
@@ -7192,7 +7486,7 @@
 
 	if (!veb && uplink_seid != pf->mac_seid) {
 
-		for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+		for (i = 0; i < pf->num_alloc_vsi; i++) {
 			if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
 				vsi = pf->vsi[i];
 				break;
@@ -7435,7 +7729,7 @@
 	 * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
 	 *       the VEB itself, so don't use (*branch) after this loop.
 	 */
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (!pf->vsi[i])
 			continue;
 		if (pf->vsi[i]->uplink_seid == branch_seid &&
@@ -7487,7 +7781,7 @@
 	pf = veb->pf;
 
 	/* find the remaining VSI and check for extras */
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
 			n++;
 			vsi = pf->vsi[i];
@@ -7516,8 +7810,6 @@
 
 	i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
 	i40e_veb_clear(veb);
-
-	return;
 }
 
 /**
@@ -7601,10 +7893,10 @@
 	}
 
 	/* make sure there is such a vsi and uplink */
-	for (vsi_idx = 0; vsi_idx < pf->hw.func_caps.num_vsis; vsi_idx++)
+	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
 		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
 			break;
-	if (vsi_idx >= pf->hw.func_caps.num_vsis && vsi_seid != 0) {
+	if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) {
 		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
 			 vsi_seid);
 		return NULL;
@@ -7639,6 +7931,8 @@
 	ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
 	if (ret)
 		goto err_veb;
+	if (vsi_idx == pf->lan_vsi)
+		pf->lan_veb = veb->idx;
 
 	return veb;
 
@@ -7774,15 +8068,6 @@
 				 "header: %d reported %d total\n",
 				 num_reported, num_total);
 
-		if (num_reported) {
-			int sz = sizeof(*sw_config) * num_reported;
-
-			kfree(pf->sw_config);
-			pf->sw_config = kzalloc(sz, GFP_KERNEL);
-			if (pf->sw_config)
-				memcpy(pf->sw_config, sw_config, sz);
-		}
-
 		for (i = 0; i < num_reported; i++) {
 			struct i40e_aqc_switch_config_element_resp *ele =
 				&sw_config->element[i];
@@ -7949,9 +8234,7 @@
 	queues_left = pf->hw.func_caps.num_tx_qp;
 
 	if ((queues_left == 1) ||
-	    !(pf->flags & I40E_FLAG_MSIX_ENABLED) ||
-	    !(pf->flags & (I40E_FLAG_RSS_ENABLED | I40E_FLAG_FD_SB_ENABLED |
-			   I40E_FLAG_DCB_ENABLED))) {
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
 		/* one qp for PF, no queues for anything else */
 		queues_left = 0;
 		pf->rss_size = pf->num_lan_qps = 1;
@@ -7960,14 +8243,27 @@
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
 			       I40E_FLAG_FD_SB_ENABLED	|
 			       I40E_FLAG_FD_ATR_ENABLED	|
-			       I40E_FLAG_DCB_ENABLED	|
+			       I40E_FLAG_DCB_CAPABLE	|
 			       I40E_FLAG_SRIOV_ENABLED	|
 			       I40E_FLAG_VMDQ_ENABLED);
+	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
+				  I40E_FLAG_FD_SB_ENABLED |
+				  I40E_FLAG_FD_ATR_ENABLED |
+				  I40E_FLAG_DCB_CAPABLE))) {
+		/* one qp for PF */
+		pf->rss_size = pf->num_lan_qps = 1;
+		queues_left -= pf->num_lan_qps;
+
+		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_FD_SB_ENABLED	|
+			       I40E_FLAG_FD_ATR_ENABLED	|
+			       I40E_FLAG_DCB_ENABLED	|
+			       I40E_FLAG_VMDQ_ENABLED);
 	} else {
 		/* Not enough queues for all TCs */
-		if ((pf->flags & I40E_FLAG_DCB_ENABLED) &&
+		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
 		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
-			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
 			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
 		}
 		pf->num_lan_qps = pf->rss_size_max;
@@ -7998,7 +8294,6 @@
 	}
 
 	pf->queues_left = queues_left;
-	return;
 }
 
 /**
@@ -8055,12 +8350,13 @@
 
 	if (pf->flags & I40E_FLAG_RSS_ENABLED)
 		buf += sprintf(buf, "RSS ");
-	buf += sprintf(buf, "FDir ");
 	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
-		buf += sprintf(buf, "ATR ");
-	if (pf->flags & I40E_FLAG_FD_SB_ENABLED)
+		buf += sprintf(buf, "FD_ATR ");
+	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+		buf += sprintf(buf, "FD_SB ");
 		buf += sprintf(buf, "NTUPLE ");
-	if (pf->flags & I40E_FLAG_DCB_ENABLED)
+	}
+	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
 		buf += sprintf(buf, "DCB ");
 	if (pf->flags & I40E_FLAG_PTP)
 		buf += sprintf(buf, "PTP ");
@@ -8083,13 +8379,13 @@
  **/
 static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	struct i40e_driver_version dv;
 	struct i40e_pf *pf;
 	struct i40e_hw *hw;
 	static u16 pfs_found;
 	u16 link_status;
 	int err = 0;
 	u32 len;
+	u32 i;
 
 	err = pci_enable_device_mem(pdev);
 	if (err)
@@ -8201,6 +8497,10 @@
 
 	i40e_verify_eeprom(pf);
 
+	/* Rev 0 hardware was never productized */
+	if (hw->revision_id < 1)
+		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
+
 	i40e_clear_pxe_mode(hw);
 	err = i40e_get_capabilities(pf);
 	if (err)
@@ -8234,7 +8534,7 @@
 		goto err_mac_addr;
 	}
 	dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
-	memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
+	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
 
 	pci_set_drvdata(pdev, pf);
 	pci_save_state(pdev);
@@ -8242,8 +8542,8 @@
 	err = i40e_init_pf_dcb(pf);
 	if (err) {
 		dev_info(&pdev->dev, "init_pf_dcb failed: %d\n", err);
-		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
-		goto err_init_dcb;
+		pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+		/* Continue without DCB enabled */
 	}
 #endif /* CONFIG_I40E_DCB */
 
@@ -8264,10 +8564,18 @@
 	i40e_determine_queue_usage(pf);
 	i40e_init_interrupt_scheme(pf);
 
-	/* Set up the *vsi struct based on the number of VSIs in the HW,
-	 * and set up our local tracking of the MAIN PF vsi.
+	/* The number of VSIs reported by the FW is the minimum guaranteed
+	 * to us; HW supports far more and we share the remaining pool with
+	 * the other PFs. We allocate space for more than the guarantee with
+	 * the understanding that we might not get them all later.
 	 */
-	len = sizeof(struct i40e_vsi *) * pf->hw.func_caps.num_vsis;
+	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
+		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
+	else
+		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
+
+	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
+	len = sizeof(struct i40e_vsi *) * pf->num_alloc_vsi;
 	pf->vsi = kzalloc(len, GFP_KERNEL);
 	if (!pf->vsi) {
 		err = -ENOMEM;
@@ -8279,6 +8587,13 @@
 		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
 		goto err_vsis;
 	}
+	/* if FDIR VSI was set up, start it now */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
+			i40e_vsi_open(pf->vsi[i]);
+			break;
+		}
+	}
 
 	/* The main driver is (mostly) up and happy. We need to set this state
 	 * before setting up the misc vector or we get a race and the vector
@@ -8300,6 +8615,7 @@
 		}
 	}
 
+#ifdef CONFIG_PCI_IOV
 	/* prep for VF support */
 	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
 	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
@@ -8322,17 +8638,14 @@
 					 err);
 		}
 	}
+#endif /* CONFIG_PCI_IOV */
 
 	pfs_found++;
 
 	i40e_dbg_pf_init(pf);
 
 	/* tell the firmware that we're starting */
-	dv.major_version = DRV_VERSION_MAJOR;
-	dv.minor_version = DRV_VERSION_MINOR;
-	dv.build_version = DRV_VERSION_BUILD;
-	dv.subbuild_version = 0;
-	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
+	i40e_send_version(pf);
 
 	/* since everything's happy, start the service_task timer */
 	mod_timer(&pf->service_timer,
@@ -8373,9 +8686,6 @@
 err_switch_setup:
 	i40e_reset_interrupt_capability(pf);
 	del_timer_sync(&pf->service_timer);
-#ifdef CONFIG_I40E_DCB
-err_init_dcb:
-#endif /* CONFIG_I40E_DCB */
 err_mac_addr:
 err_configure_lan_hmc:
 	(void)i40e_shutdown_lan_hmc(hw);
@@ -8456,10 +8766,13 @@
 	}
 
 	/* shutdown and destroy the HMC */
-	ret_code = i40e_shutdown_lan_hmc(&pf->hw);
-	if (ret_code)
-		dev_warn(&pdev->dev,
-			 "Failed to destroy the HMC resources: %d\n", ret_code);
+	if (pf->hw.hmc.hmc_obj) {
+		ret_code = i40e_shutdown_lan_hmc(&pf->hw);
+		if (ret_code)
+			dev_warn(&pdev->dev,
+				 "Failed to destroy the HMC resources: %d\n",
+				 ret_code);
+	}
 
 	/* shutdown the adminq */
 	ret_code = i40e_shutdown_adminq(&pf->hw);
@@ -8470,7 +8783,7 @@
 
 	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
 	i40e_clear_interrupt_scheme(pf);
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i]) {
 			i40e_vsi_clear_rings(pf->vsi[i]);
 			i40e_vsi_clear(pf->vsi[i]);
@@ -8485,7 +8798,6 @@
 
 	kfree(pf->qp_pile);
 	kfree(pf->irq_pile);
-	kfree(pf->sw_config);
 	kfree(pf->vsi);
 
 	/* force a PF reset to clean anything leftover */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 9cd57e6..a430699 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h

@@ -70,10 +70,12 @@
 				u16 *fw_major_version, u16 *fw_minor_version,
 				u16 *api_major_version, u16 *api_minor_version,
 				struct i40e_asq_cmd_details *cmd_details);
-i40e_status i40e_aq_set_phy_reset(struct i40e_hw *hw,
+i40e_status i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
 				struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
@@ -157,8 +159,8 @@
 i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
-				u16 udp_port, u8 header_len,
-				u8 protocol_index, u8 *filter_index,
+				u16 udp_port, u8 protocol_index,
+				u8 *filter_index,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
 				struct i40e_asq_cmd_details *cmd_details);
@@ -167,6 +169,9 @@
 i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
 				    u16 flags, u8 *mac_addr,
 				    struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+				u16 seid, u16 credit, u8 max_credit,
+				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw,
@@ -216,6 +221,7 @@
 i40e_status i40e_get_mac_addr(struct i40e_hw *hw,
 						u8 *mac_addr);
 i40e_status i40e_validate_mac_addr(u8 *mac_addr);
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
 /* prototype for functions used for NVM access */
 i40e_status i40e_init_nvm(struct i40e_hw *hw);
 i40e_status i40e_acquire_nvm(struct i40e_hw *hw,

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index e61e637..101f439 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c

@@ -48,7 +48,6 @@
 					I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2  (0x2 << \
 					I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
-#define I40E_PTP_TX_TIMEOUT  (HZ * 15)
 
 /**
  * i40e_ptp_read - Read the PHC time from the device
@@ -217,40 +216,6 @@
 }
 
 /**
- * i40e_ptp_tx_work
- * @work: pointer to work struct
- *
- * This work function polls the PRTTSYN_STAT_0.TXTIME bit to determine when a
- * Tx timestamp event has occurred, in order to pass the Tx timestamp value up
- * the stack in the skb.
- */
-static void i40e_ptp_tx_work(struct work_struct *work)
-{
-	struct i40e_pf *pf = container_of(work, struct i40e_pf,
-					  ptp_tx_work);
-	struct i40e_hw *hw = &pf->hw;
-	u32 prttsyn_stat_0;
-
-	if (!pf->ptp_tx_skb)
-		return;
-
-	if (time_is_before_jiffies(pf->ptp_tx_start +
-				   I40E_PTP_TX_TIMEOUT)) {
-		dev_kfree_skb_any(pf->ptp_tx_skb);
-		pf->ptp_tx_skb = NULL;
-		pf->tx_hwtstamp_timeouts++;
-		dev_warn(&pf->pdev->dev, "clearing Tx timestamp hang\n");
-		return;
-	}
-
-	prttsyn_stat_0 = rd32(hw, I40E_PRTTSYN_STAT_0);
-	if (prttsyn_stat_0 & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
-		i40e_ptp_tx_hwtstamp(pf);
-	else
-		schedule_work(&pf->ptp_tx_work);
-}
-
-/**
  * i40e_ptp_enable - Enable/disable ancillary features of the PHC subsystem
  * @ptp: The PTP clock structure
  * @rq: The requested feature to change
@@ -608,7 +573,6 @@
 		u32 regval;
 
 		spin_lock_init(&pf->tmreg_lock);
-		INIT_WORK(&pf->ptp_tx_work, i40e_ptp_tx_work);
 
 		dev_info(&pf->pdev->dev, "%s: added PHC on %s\n", __func__,
 			 netdev->name);
@@ -647,7 +611,6 @@
 	pf->ptp_tx = false;
 	pf->ptp_rx = false;
 
-	cancel_work_sync(&pf->ptp_tx_work);
 	if (pf->ptp_tx_skb) {
 		dev_kfree_skb_any(pf->ptp_tx_skb);
 		pf->ptp_tx_skb = NULL;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 1d40f42..947de98 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h

@@ -1340,8 +1340,6 @@
 #define I40E_PFINT_ICR0_GPIO_MASK (0x1 << I40E_PFINT_ICR0_GPIO_SHIFT)
 #define I40E_PFINT_ICR0_TIMESYNC_SHIFT 23
 #define I40E_PFINT_ICR0_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_TIMESYNC_SHIFT)
-#define I40E_PFINT_ICR0_STORM_DETECT_SHIFT 24
-#define I40E_PFINT_ICR0_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_STORM_DETECT_SHIFT)
 #define I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
 #define I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
 #define I40E_PFINT_ICR0_HMC_ERR_SHIFT 26
@@ -1367,8 +1365,6 @@
 #define I40E_PFINT_ICR0_ENA_GPIO_MASK (0x1 << I40E_PFINT_ICR0_ENA_GPIO_SHIFT)
 #define I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT 23
 #define I40E_PFINT_ICR0_ENA_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT)
-#define I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT 24
-#define I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT)
 #define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
 #define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
 #define I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT 26
@@ -1589,6 +1585,14 @@
 #define I40E_GLLAN_TSOMSK_M 0x000442DC
 #define I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT 0
 #define I40E_GLLAN_TSOMSK_M_TCPMSKM_MASK (0xFFF << I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS(_i) (0x000E6500 + ((_i) * 4)) /* i=0..11 */
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT 0
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_MASK (0x7FF << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK (0x1 << I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK (0x1 << I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+
 #define I40E_PFLAN_QALLOC 0x001C0400
 #define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
 #define I40E_PFLAN_QALLOC_FIRSTQ_MASK (0x7FF << I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9478ddc..e49f31d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c

@@ -24,6 +24,7 @@
  *
  ******************************************************************************/
 
+#include <linux/prefetch.h>
 #include "i40e.h"
 #include "i40e_prototype.h"
 
@@ -61,7 +62,7 @@
 
 	/* find existing FDIR VSI */
 	vsi = NULL;
-	for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
 			vsi = pf->vsi[i];
 	if (!vsi)
@@ -120,7 +121,7 @@
 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
 		dcc |= ((u32)fdir_data->cnt_index <<
 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
-		       I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 	}
 
 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
@@ -183,7 +184,6 @@
 	struct iphdr *ip;
 	bool err = false;
 	int ret;
-	int i;
 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -199,21 +199,17 @@
 	ip->saddr = fd_data->src_ip[0];
 	udp->source = fd_data->src_port;
 
-	for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
-	     i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
-		fd_data->pctype = i;
-		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
-
-		if (ret) {
-			dev_info(&pf->pdev->dev,
-				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-			err = true;
-		} else {
-			dev_info(&pf->pdev->dev,
-				 "Filter OK for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-		}
+	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
+			 fd_data->pctype, ret);
+		err = true;
+	} else {
+		dev_info(&pf->pdev->dev,
+			 "Filter OK for PCTYPE %d (ret = %d)\n",
+			 fd_data->pctype, ret);
 	}
 
 	return err ? -EOPNOTSUPP : 0;
@@ -262,7 +258,7 @@
 		}
 	}
 
-	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
+	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
 
 	if (ret) {
@@ -455,22 +451,20 @@
 
 		/* filter programming failed most likely due to table full */
 		fcnt_prog = i40e_get_current_fd_count(pf);
-		fcnt_avail = pf->hw.fdir_shared_filter_count +
-						       pf->fdir_pf_filter_count;
-
+		fcnt_avail = i40e_get_fd_cnt_all(pf);
 		/* If ATR is running fcnt_prog can quickly change,
 		 * if we are very close to full, it makes sense to disable
 		 * FD ATR/SB and then re-enable it when there is room.
 		 */
 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
 			/* Turn off ATR first */
-			if (pf->flags | I40E_FLAG_FD_ATR_ENABLED) {
+			if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
 				pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 				dev_warn(&pdev->dev, "FD filter space full, ATR for further flows will be turned off\n");
 				pf->auto_disable_flags |=
 						       I40E_FLAG_FD_ATR_ENABLED;
 				pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT;
-			} else if (pf->flags | I40E_FLAG_FD_SB_ENABLED) {
+			} else if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
 				pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
 				dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
 				pf->auto_disable_flags |=
@@ -1199,10 +1193,12 @@
 				    u32 rx_error,
 				    u16 rx_ptype)
 {
+	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
+	bool ipv4 = false, ipv6 = false;
 	bool ipv4_tunnel, ipv6_tunnel;
 	__wsum rx_udp_csum;
-	__sum16 csum;
 	struct iphdr *iph;
+	__sum16 csum;
 
 	ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
 		      (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
@@ -1213,29 +1209,57 @@
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Rx csum enabled and ip headers found? */
-	if (!(vsi->netdev->features & NETIF_F_RXCSUM &&
-	      rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
 		return;
 
+	/* did the hardware decode the packet and checksum? */
+	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+		return;
+
+	/* both known and outer_ip must be set for the below code to work */
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
+		ipv4 = true;
+	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
+		ipv6 = true;
+
+	if (ipv4 &&
+	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
+			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+		goto checksum_fail;
+
 	/* likely incorrect csum if alternate IP extension headers found */
-	if (rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+	if (ipv6 &&
+	    decoded.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP &&
+	    rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) &&
+	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+		/* don't increment checksum err here, non-fatal err */
 		return;
 
-	/* IP or L4 or outmost IP checksum error */
-	if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
-			(1 << I40E_RX_DESC_ERROR_L4E_SHIFT) |
-			(1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) {
-		vsi->back->hw_csum_rx_error++;
-		return;
-	}
+	/* there was some L4 error, count error and punt packet to the stack */
+	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+		goto checksum_fail;
 
+	/* handle packets that were not able to be checksummed due
+	 * to arrival speed, in this case the stack can compute
+	 * the csum.
+	 */
+	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
+		return;
+
+	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
+	 * it in the driver, hardware does not do it for us.
+	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
+	 * so the total length of IPv4 header is IHL*4 bytes
+	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
+	 */
 	if (ipv4_tunnel &&
+	    (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
 	    !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
-		/* If VXLAN traffic has an outer UDPv4 checksum we need to check
-		 * it in the driver, hardware does not do it for us.
-		 * Since L3L4P bit was set we assume a valid IHL value (>=5)
-		 * so the total length of IPv4 header is IHL*4 bytes
-		 */
 		skb->transport_header = skb->mac_header +
 					sizeof(struct ethhdr) +
 					(ip_hdr(skb)->ihl * 4);
@@ -1252,13 +1276,16 @@
 				(skb->len - skb_transport_offset(skb)),
 				IPPROTO_UDP, rx_udp_csum);
 
-		if (udp_hdr(skb)->check != csum) {
-			vsi->back->hw_csum_rx_error++;
-			return;
-		}
+		if (udp_hdr(skb)->check != csum)
+			goto checksum_fail;
 	}
 
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
 }
 
 /**
@@ -1435,6 +1462,9 @@
 		/* ERR_MASK will only have valid bits if EOP set */
 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
 			dev_kfree_skb_any(skb);
+			/* TODO: shouldn't we increment a counter indicating the
+			 * drop?
+			 */
 			goto next_desc;
 		}
 
@@ -1665,6 +1695,11 @@
 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
 
+	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+	dtype_cmd |=
+		((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+		I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+
 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
 }
@@ -1825,9 +1860,6 @@
 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
 				I40E_TXD_CTX_QW1_CMD_SHIFT;
 
-	pf->ptp_tx_start = jiffies;
-	schedule_work(&pf->ptp_tx_work);
-
 	return 1;
 }
 
@@ -2179,9 +2211,7 @@
 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 				      struct i40e_ring *tx_ring)
 {
-#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
 	unsigned int f;
-#endif
 	int count = 0;
 
 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
@@ -2190,12 +2220,9 @@
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
-#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
-#else
-	count += skb_shinfo(skb)->nr_frags;
-#endif
+
 	count += TXD_USE_COUNT(skb_headlen(skb));
 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
 		tx_ring->tx_stats.tx_busy++;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index d534969..0277894 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h

@@ -27,7 +27,7 @@
 #ifndef _I40E_TXRX_H_
 #define _I40E_TXRX_H_
 
-/* Interrupt Throttling and Rate Limiting (storm control) Goodies */
+/* Interrupt Throttling and Rate Limiting Goodies */
 
 #define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
 #define I40E_MIN_ITR               0x0004  /* reg uses 2 usec resolution */
@@ -69,16 +69,11 @@
 
 /* Supported RSS offloads */
 #define I40E_DEFAULT_RSS_HENA ( \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
 	((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \
@@ -122,11 +117,11 @@
 #define i40e_rx_desc i40e_32byte_rx_desc
 
 #define I40E_MIN_TX_LEN		17
-#define I40E_MAX_DATA_PER_TXD	16383	/* aka 16kB - 1 */
+#define I40E_MAX_DATA_PER_TXD	8192
 
 /* Tx Descriptors needed, worst case */
 #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), I40E_MAX_DATA_PER_TXD)
-#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
 
 #define I40E_TX_FLAGS_CSUM		(u32)(1)
 #define I40E_TX_FLAGS_HW_VLAN		(u32)(1 << 1)
@@ -184,7 +179,6 @@
 	__I40E_TX_DETECT_HANG,
 	__I40E_HANG_CHECK_ARMED,
 	__I40E_RX_PS_ENABLED,
-	__I40E_RX_LRO_ENABLED,
 	__I40E_RX_16BYTE_DESC_ENABLED,
 };
 
@@ -200,12 +194,6 @@
 	set_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
 #define clear_check_for_tx_hang(ring) \
 	clear_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
-#define ring_is_lro_enabled(ring) \
-	test_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
-#define set_ring_lro_enabled(ring) \
-	set_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
-#define clear_ring_lro_enabled(ring) \
-	clear_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
 #define ring_is_16byte_desc_enabled(ring) \
 	test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
 #define set_ring_16byte_desc_enabled(ring) \

diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 71a968f..9d39ff2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h

@@ -36,12 +36,10 @@
 
 /* Device IDs */
 #define I40E_DEV_ID_SFP_XL710		0x1572
-#define I40E_DEV_ID_SFP_X710		0x1573
 #define I40E_DEV_ID_QEMU		0x1574
 #define I40E_DEV_ID_KX_A		0x157F
 #define I40E_DEV_ID_KX_B		0x1580
 #define I40E_DEV_ID_KX_C		0x1581
-#define I40E_DEV_ID_KX_D		0x1582
 #define I40E_DEV_ID_QSFP_A		0x1583
 #define I40E_DEV_ID_QSFP_B		0x1584
 #define I40E_DEV_ID_QSFP_C		0x1585
@@ -60,8 +58,8 @@
 /* Max default timeout in ms, */
 #define I40E_MAX_NVM_TIMEOUT		18000
 
-/* Switch from mc to the 2usec global time (this is the GTIME resolution) */
-#define I40E_MS_TO_GTIME(time)		(((time) * 1000) / 2)
+/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
+#define I40E_MS_TO_GTIME(time)		((time) * 1000)
 
 /* forward declaration */
 struct i40e_hw;
@@ -167,6 +165,9 @@
 	u8 loopback;
 	/* is Link Status Event notification to SW enabled */
 	bool lse_enable;
+	u16 max_frame_size;
+	bool crc_enable;
+	u8 pacing;
 };
 
 struct i40e_phy_info {
@@ -409,6 +410,7 @@
 	u8 minor_version;
 	u8 build_version;
 	u8 subbuild_version;
+	u8 driver_string[32];
 };
 
 /* RX Descriptors */
@@ -488,9 +490,6 @@
 	} wb;  /* writeback */
 };
 
-#define I40E_RXD_QW1_STATUS_SHIFT	0
-#define I40E_RXD_QW1_STATUS_MASK	(0x7FFFUL << I40E_RXD_QW1_STATUS_SHIFT)
-
 enum i40e_rx_desc_status_bits {
 	/* Note: These are predefined bit offsets */
 	I40E_RX_DESC_STATUS_DD_SHIFT		= 0,
@@ -507,9 +506,14 @@
 	I40E_RX_DESC_STATUS_LPBK_SHIFT		= 14,
 	I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
 	I40E_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
-	I40E_RX_DESC_STATUS_UDP_0_SHIFT		= 18
+	I40E_RX_DESC_STATUS_UDP_0_SHIFT		= 18,
+	I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
 };
 
+#define I40E_RXD_QW1_STATUS_SHIFT	0
+#define I40E_RXD_QW1_STATUS_MASK	(((1 << I40E_RX_DESC_STATUS_LAST) - 1) \
+					 << I40E_RXD_QW1_STATUS_SHIFT)
+
 #define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT   I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
 #define I40E_RXD_QW1_STATUS_TSYNINDX_MASK	(0x3UL << \
 					     I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
@@ -537,7 +541,8 @@
 	I40E_RX_DESC_ERROR_IPE_SHIFT		= 3,
 	I40E_RX_DESC_ERROR_L4E_SHIFT		= 4,
 	I40E_RX_DESC_ERROR_EIPE_SHIFT		= 5,
-	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6
+	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
+	I40E_RX_DESC_ERROR_PPRS_SHIFT		= 7
 };
 
 enum i40e_rx_desc_error_l3l4e_fcoe_masks {
@@ -658,7 +663,6 @@
 	I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
 	I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
 	I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
-	I40E_RX_DESC_EXT_STATUS_FTYPE_SHIFT	= 6, /* 3 BITS */
 	I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
 	I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
 	I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
@@ -862,18 +866,14 @@
 
 /* Packet Classifier Types for filters */
 enum i40e_filter_pctype {
-	/* Note: Values 0-28 are reserved for future use */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
+	/* Note: Values 0-30 are reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
-	I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN		= 32,
+	/* Note: Value 32 is reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
 	I40E_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
 	I40E_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
 	I40E_FILTER_PCTYPE_FRAG_IPV4			= 36,
-	/* Note: Values 37-38 are reserved for future use */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
+	/* Note: Values 37-40 are reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
 	I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN		= 42,
 	I40E_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
@@ -955,6 +955,16 @@
 	struct i40e_aqc_vsi_properties_data info;
 };
 
+struct i40e_veb_context {
+	u16 seid;
+	u16 uplink_seid;
+	u16 veb_number;
+	u16 vebs_allocated;
+	u16 vebs_unallocated;
+	u16 flags;
+	struct i40e_aqc_get_veb_parameters_completion info;
+};
+
 /* Statistics collected by each port, VSI, VEB, and S-channel */
 struct i40e_eth_stats {
 	u64 rx_bytes;			/* gorc */
@@ -962,8 +972,6 @@
 	u64 rx_multicast;		/* mprc */
 	u64 rx_broadcast;		/* bprc */
 	u64 rx_discards;		/* rdpc */
-	u64 rx_errors;			/* repc */
-	u64 rx_missed;			/* rmpc */
 	u64 rx_unknown_protocol;	/* rupp */
 	u64 tx_bytes;			/* gotc */
 	u64 tx_unicast;			/* uptc */
@@ -1015,9 +1023,12 @@
 	u64 tx_size_big;		/* ptc9522 */
 	u64 mac_short_packet_dropped;	/* mspdc */
 	u64 checksum_error;		/* xec */
+	/* flow director stats */
+	u64 fd_atr_match;
+	u64 fd_sb_match;
 	/* EEE LPI */
-	bool tx_lpi_status;
-	bool rx_lpi_status;
+	u32 tx_lpi_status;
+	u32 rx_lpi_status;
 	u64 tx_lpi_count;		/* etlpic */
 	u64 rx_lpi_count;		/* erlpic */
 };

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index 22a1b69..70951d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h

@@ -341,10 +341,6 @@
 	int severity;
 };
 
-/* The following are TBD, not necessary for LAN functionality.
- * I40E_VIRTCHNL_OP_FCOE
- */
-
 /* VF reset states - these are written into the RSTAT register:
  * I40E_VFGEN_RSTAT1 on the PF
  * I40E_VFGEN_RSTAT on the VF

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 02c11a7..f5b9d20 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

@@ -29,6 +29,24 @@
 /***********************misc routines*****************************/
 
 /**
+ * i40e_vc_disable_vf
+ * @pf: pointer to the pf info
+ * @vf: pointer to the vf info
+ *
+ * Disable the VF through a SW reset
+ **/
+static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg;
+
+	reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+	reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+	wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+	i40e_flush(hw);
+}
+
+/**
  * i40e_vc_isvalid_vsi_id
  * @vf: pointer to the vf info
  * @vsi_id: vf relative vsi id
@@ -230,9 +248,8 @@
 	tx_ctx.qlen = info->ring_len;
 	tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
 	tx_ctx.rdylist_act = 0;
-	tx_ctx.head_wb_ena = 1;
-	tx_ctx.head_wb_addr = info->dma_ring_addr +
-			      (info->ring_len * sizeof(struct i40e_tx_desc));
+	tx_ctx.head_wb_ena = info->headwb_enabled;
+	tx_ctx.head_wb_addr = info->dma_headwb_addr;
 
 	/* clear the context in the HMC */
 	ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
@@ -336,6 +353,7 @@
 	rx_ctx.tphhead_ena = 1;
 	rx_ctx.lrxqthresh = 2;
 	rx_ctx.crcstrip = 1;
+	rx_ctx.prefena = 1;
 
 	/* clear the context in the HMC */
 	ret = i40e_clear_lan_rx_queue_context(hw, pf_queue_id);
@@ -416,6 +434,15 @@
 	if (ret)
 		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 
+	/* Set VF bandwidth if specified */
+	if (vf->tx_rate) {
+		ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
+						  vf->tx_rate / 50, 0, NULL);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
+				vf->vf_id, ret);
+	}
+
 error_alloc_vsi_res:
 	return ret;
 }
@@ -815,6 +842,10 @@
 	kfree(pf->vf);
 	pf->vf = NULL;
 
+	/* This check is for when the driver is unloaded while VFs are
+	 * assigned. Setting the number of VFs to 0 through sysfs is caught
+	 * before this function ever gets called.
+	 */
 	if (!i40e_vfs_are_assigned(pf)) {
 		pci_disable_sriov(pf->pdev);
 		/* Acknowledge VFLR for all VFS. Without this, VFs will fail to
@@ -867,6 +898,7 @@
 		ret = -ENOMEM;
 		goto err_alloc;
 	}
+	pf->vf = vfs;
 
 	/* apply default profile */
 	for (i = 0; i < num_alloc_vfs; i++) {
@@ -876,13 +908,13 @@
 
 		/* assign default capabilities */
 		set_bit(I40E_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
+		vfs[i].spoofchk = true;
 		/* vf resources get allocated during reset */
 		i40e_reset_vf(&vfs[i], false);
 
 		/* enable vf vplan_qtable mappings */
 		i40e_enable_vf_mappings(&vfs[i]);
 	}
-	pf->vf = vfs;
 	pf->num_alloc_vfs = num_alloc_vfs;
 
 	i40e_enable_pf_switch_lb(pf);
@@ -951,7 +983,12 @@
 	if (num_vfs)
 		return i40e_pci_sriov_enable(pdev, num_vfs);
 
-	i40e_free_vfs(pf);
+	if (!i40e_vfs_are_assigned(pf)) {
+		i40e_free_vfs(pf);
+	} else {
+		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -2022,16 +2059,14 @@
 	}
 
 	/* delete the temporary mac address */
-	i40e_del_filter(vsi, vf->default_lan_addr.addr, 0, true, false);
+	i40e_del_filter(vsi, vf->default_lan_addr.addr, vf->port_vlan_id,
+			true, false);
 
-	/* add the new mac address */
-	f = i40e_add_filter(vsi, mac, 0, true, false);
-	if (!f) {
-		dev_err(&pf->pdev->dev,
-			"Unable to add VF ucast filter\n");
-		ret = -ENOMEM;
-		goto error_param;
-	}
+	/* Delete all the filters for this VSI - we're going to kill it
+	 * anyway.
+	 */
+	list_for_each_entry(f, &vsi->mac_filter_list, list)
+		i40e_del_filter(vsi, f->macaddr, f->vlan, true, false);
 
 	dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
 	/* program mac filter */
@@ -2040,7 +2075,7 @@
 		ret = -EIO;
 		goto error_param;
 	}
-	memcpy(vf->default_lan_addr.addr, mac, ETH_ALEN);
+	ether_addr_copy(vf->default_lan_addr.addr, mac);
 	vf->pf_set_mac = true;
 	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
 	ret = 0;
@@ -2088,18 +2123,28 @@
 		goto error_pvid;
 	}
 
-	if (vsi->info.pvid == 0 && i40e_is_vsi_in_vlan(vsi))
+	if (vsi->info.pvid == 0 && i40e_is_vsi_in_vlan(vsi)) {
 		dev_err(&pf->pdev->dev,
 			"VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
 			vf_id);
+		/* Administrator Error - knock the VF offline until he does
+		 * the right thing by reconfiguring his network correctly
+		 * and then reloading the VF driver.
+		 */
+		i40e_vc_disable_vf(pf, vf);
+	}
 
 	/* Check for condition where there was already a port VLAN ID
 	 * filter set and now it is being deleted by setting it to zero.
+	 * Additionally check for the condition where there was a port
+	 * VLAN but now there is a new and different port VLAN being set.
 	 * Before deleting all the old VLAN filters we must add new ones
 	 * with -1 (I40E_VLAN_ANY) or otherwise we're left with all our
 	 * MAC addresses deleted.
 	 */
-	if (!(vlan_id || qos) && vsi->info.pvid)
+	if ((!(vlan_id || qos) ||
+	    (vlan_id | qos) != le16_to_cpu(vsi->info.pvid)) &&
+	    vsi->info.pvid)
 		ret = i40e_vsi_add_vlan(vsi, I40E_VLAN_ANY);
 
 	if (vsi->info.pvid) {
@@ -2150,6 +2195,8 @@
 	return ret;
 }
 
+#define I40E_BW_CREDIT_DIVISOR 50     /* 50Mbps per BW credit */
+#define I40E_MAX_BW_INACTIVE_ACCUM 4  /* device can accumulate 4 credits max */
 /**
  * i40e_ndo_set_vf_bw
  * @netdev: network interface device structure
@@ -2158,9 +2205,76 @@
  *
  * configure vf tx rate
  **/
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate)
 {
-	return -EOPNOTSUPP;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+	int speed = 0;
+	int ret = 0;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d.\n", vf_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	if (min_tx_rate) {
+		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for vf %d.\n",
+			min_tx_rate, vf_id);
+		return -EINVAL;
+	}
+
+	vf = &(pf->vf[vf_id]);
+	vsi = pf->vsi[vf->lan_vsi_index];
+	if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "Uninitialized VF %d.\n", vf_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	switch (pf->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	default:
+		break;
+	}
+
+	if (max_tx_rate > speed) {
+		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for vf %d.",
+			max_tx_rate, vf->vf_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	if ((max_tx_rate < 50) && (max_tx_rate > 0)) {
+		dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n");
+		max_tx_rate = 50;
+	}
+
+	/* Tx rate credits are in values of 50Mbps, 0 is disabled*/
+	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
+					  max_tx_rate / I40E_BW_CREDIT_DIVISOR,
+					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
+			ret);
+		ret = -EIO;
+		goto error;
+	}
+	vf->tx_rate = max_tx_rate;
+error:
+	return ret;
 }
 
 /**
@@ -2200,10 +2314,18 @@
 
 	memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN);
 
-	ivi->tx_rate = 0;
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK;
 	ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >>
 		   I40E_VLAN_PRIORITY_SHIFT;
+	if (vf->link_forced == false)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vf->link_up == true)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+	ivi->spoofchk = vf->spoofchk;
 	ret = 0;
 
 error_param:
@@ -2270,3 +2392,50 @@
 error_out:
 	return ret;
 }
+
+/**
+ * i40e_ndo_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: vf identifier
+ * @enable: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ **/
+int i40e_ndo_set_vf_spoofck(struct net_device *netdev, int vf_id, bool enable)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_vsi_context ctxt;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vf = &(pf->vf[vf_id]);
+
+	if (enable == vf->spoofchk)
+		goto out;
+
+	vf->spoofchk = enable;
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.seid = pf->vsi[vf->lan_vsi_index]->seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+	if (enable)
+		ctxt.info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Error %d updating VSI parameters\n",
+			ret);
+		ret = -EIO;
+	}
+out:
+	return ret;
+}

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 389c47f..63e7e0d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

@@ -98,8 +98,10 @@
 
 	unsigned long vf_caps;	/* vf's adv. capabilities */
 	unsigned long vf_states;	/* vf's runtime states */
+	unsigned int tx_rate;	/* Tx bandwidth limit in Mbps */
 	bool link_forced;
 	bool link_up;		/* only valid if vf link is forced */
+	bool spoofchk;
 };
 
 void i40e_free_vfs(struct i40e_pf *pf);
@@ -115,10 +117,12 @@
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
 int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
 			      int vf_id, u16 vlan_id, u8 qos);
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate);
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate);
 int i40e_ndo_get_vf_config(struct net_device *netdev,
 			   int vf_id, struct ifla_vf_info *ivi);
 int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
+int i40e_ndo_set_vf_spoofck(struct net_device *netdev, int vf_id, bool enable);
 
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);

diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
index e09be37..3a42383 100644
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ b/drivers/net/ethernet/intel/i40evf/Makefile

@@ -1,7 +1,7 @@
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
-# Copyright(c) 2013 Intel Corporation.
+# Copyright(c) 2013 - 2014 Intel Corporation.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
+# You should have received a copy of the GNU General Public License along
+# with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 # The full GNU General Public License is included in this distribution in
 # the file called "COPYING".
 #

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index 5470ce9..eb67cce 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -28,6 +31,16 @@
 #include "i40e_prototype.h"
 
 /**
+ * i40e_is_nvm_update_op - return true if this is an NVM update operation
+ * @desc: API request descriptor
+ **/
+static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc)
+{
+	return (desc->opcode == i40e_aqc_opc_nvm_erase) ||
+	       (desc->opcode == i40e_aqc_opc_nvm_update);
+}
+
+/**
  *  i40e_adminq_init_regs - Initialize AdminQ registers
  *  @hw: pointer to the hardware structure
  *
@@ -276,8 +289,11 @@
  *
  *  Configure base address and length registers for the transmit queue
  **/
-static void i40e_config_asq_regs(struct i40e_hw *hw)
+static i40e_status i40e_config_asq_regs(struct i40e_hw *hw)
 {
+	i40e_status ret_code = 0;
+	u32 reg = 0;
+
 	if (hw->mac.type == I40E_MAC_VF) {
 		/* configure the transmit queue */
 		wr32(hw, I40E_VF_ATQBAH1,
@@ -286,6 +302,7 @@
 		    lower_32_bits(hw->aq.asq.desc_buf.pa));
 		wr32(hw, I40E_VF_ATQLEN1, (hw->aq.num_asq_entries |
 					  I40E_VF_ATQLEN1_ATQENABLE_MASK));
+		reg = rd32(hw, I40E_VF_ATQBAL1);
 	} else {
 		/* configure the transmit queue */
 		wr32(hw, I40E_PF_ATQBAH,
@@ -294,7 +311,14 @@
 		    lower_32_bits(hw->aq.asq.desc_buf.pa));
 		wr32(hw, I40E_PF_ATQLEN, (hw->aq.num_asq_entries |
 					  I40E_PF_ATQLEN_ATQENABLE_MASK));
+		reg = rd32(hw, I40E_PF_ATQBAL);
 	}
+
+	/* Check one register to verify that config was applied */
+	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
+		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
 }
 
 /**
@@ -303,8 +327,11 @@
  *
  * Configure base address and length registers for the receive (event queue)
  **/
-static void i40e_config_arq_regs(struct i40e_hw *hw)
+static i40e_status i40e_config_arq_regs(struct i40e_hw *hw)
 {
+	i40e_status ret_code = 0;
+	u32 reg = 0;
+
 	if (hw->mac.type == I40E_MAC_VF) {
 		/* configure the receive queue */
 		wr32(hw, I40E_VF_ARQBAH1,
@@ -313,6 +340,7 @@
 		    lower_32_bits(hw->aq.arq.desc_buf.pa));
 		wr32(hw, I40E_VF_ARQLEN1, (hw->aq.num_arq_entries |
 					  I40E_VF_ARQLEN1_ARQENABLE_MASK));
+		reg = rd32(hw, I40E_VF_ARQBAL1);
 	} else {
 		/* configure the receive queue */
 		wr32(hw, I40E_PF_ARQBAH,
@@ -321,10 +349,17 @@
 		    lower_32_bits(hw->aq.arq.desc_buf.pa));
 		wr32(hw, I40E_PF_ARQLEN, (hw->aq.num_arq_entries |
 					  I40E_PF_ARQLEN_ARQENABLE_MASK));
+		reg = rd32(hw, I40E_PF_ARQBAL);
 	}
 
 	/* Update tail in the HW to post pre-allocated buffers */
 	wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+
+	/* Check one register to verify that config was applied */
+	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
+		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
 }
 
 /**
@@ -372,7 +407,9 @@
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	i40e_config_asq_regs(hw);
+	ret_code = i40e_config_asq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
 
 	/* success! */
 	goto init_adminq_exit;
@@ -429,7 +466,9 @@
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	i40e_config_arq_regs(hw);
+	ret_code = i40e_config_arq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
 
 	/* success! */
 	goto init_adminq_exit;
@@ -659,6 +698,12 @@
 		goto asq_send_command_exit;
 	}
 
+	if (i40e_is_nvm_update_op(desc) && hw->aq.nvm_busy) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: NVM busy.\n");
+		status = I40E_ERR_NVM;
+		goto asq_send_command_exit;
+	}
+
 	details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
 	if (cmd_details) {
 		*details = *cmd_details;
@@ -786,6 +831,9 @@
 		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
 	}
 
+	if (i40e_is_nvm_update_op(desc))
+		hw->aq.nvm_busy = true;
+
 	/* update the error if time out occurred */
 	if ((!cmd_completed) &&
 	    (!details->async && !details->postpone)) {
@@ -880,6 +928,9 @@
 			       e->msg_size);
 	}
 
+	if (i40e_is_nvm_update_op(&e->desc))
+		hw->aq.nvm_busy = false;
+
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message
 	 * size

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index 8f72c31d..e3472c6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -87,6 +90,7 @@
 	u16 fw_min_ver;                 /* firmware minor version */
 	u16 api_maj_ver;                /* api major version */
 	u16 api_min_ver;                /* api minor version */
+	bool nvm_busy;
 
 	struct mutex asq_mutex; /* Send queue lock */
 	struct mutex arq_mutex; /* Receive queue lock */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 97662b6..e656ea7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -31,7 +34,7 @@
  */
 
 #define I40E_FW_API_VERSION_MAJOR  0x0001
-#define I40E_FW_API_VERSION_MINOR  0x0001
+#define I40E_FW_API_VERSION_MINOR  0x0002
 #define I40E_FW_API_VERSION_A0_MINOR  0x0000
 
 struct i40e_aq_desc {
@@ -121,6 +124,7 @@
 	i40e_aqc_opc_get_version      = 0x0001,
 	i40e_aqc_opc_driver_version   = 0x0002,
 	i40e_aqc_opc_queue_shutdown   = 0x0003,
+	i40e_aqc_opc_set_pf_context   = 0x0004,
 
 	/* resource ownership */
 	i40e_aqc_opc_request_resource = 0x0008,
@@ -180,9 +184,6 @@
 	i40e_aqc_opc_add_mirror_rule    = 0x0260,
 	i40e_aqc_opc_delete_mirror_rule = 0x0261,
 
-	i40e_aqc_opc_set_storm_control_config = 0x0280,
-	i40e_aqc_opc_get_storm_control_config = 0x0281,
-
 	/* DCB commands */
 	i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
 	i40e_aqc_opc_dcb_updated    = 0x0302,
@@ -205,6 +206,7 @@
 	i40e_aqc_opc_query_switching_comp_bw_config        = 0x041A,
 	i40e_aqc_opc_suspend_port_tx                       = 0x041B,
 	i40e_aqc_opc_resume_port_tx                        = 0x041C,
+	i40e_aqc_opc_configure_partition_bw                = 0x041D,
 
 	/* hmc */
 	i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
@@ -222,13 +224,15 @@
 	i40e_aqc_opc_get_partner_advt    = 0x0616,
 	i40e_aqc_opc_set_lb_modes        = 0x0618,
 	i40e_aqc_opc_get_phy_wol_caps    = 0x0621,
-	i40e_aqc_opc_set_phy_reset       = 0x0622,
+	i40e_aqc_opc_set_phy_debug	 = 0x0622,
 	i40e_aqc_opc_upload_ext_phy_fm   = 0x0625,
 
 	/* NVM commands */
-	i40e_aqc_opc_nvm_read   = 0x0701,
-	i40e_aqc_opc_nvm_erase  = 0x0702,
-	i40e_aqc_opc_nvm_update = 0x0703,
+	i40e_aqc_opc_nvm_read         = 0x0701,
+	i40e_aqc_opc_nvm_erase        = 0x0702,
+	i40e_aqc_opc_nvm_update       = 0x0703,
+	i40e_aqc_opc_nvm_config_read  = 0x0704,
+	i40e_aqc_opc_nvm_config_write = 0x0705,
 
 	/* virtualization commands */
 	i40e_aqc_opc_send_msg_to_pf   = 0x0801,
@@ -270,8 +274,6 @@
 	i40e_aqc_opc_debug_set_mode         = 0xFF01,
 	i40e_aqc_opc_debug_read_reg         = 0xFF03,
 	i40e_aqc_opc_debug_write_reg        = 0xFF04,
-	i40e_aqc_opc_debug_read_reg_sg      = 0xFF05,
-	i40e_aqc_opc_debug_write_reg_sg     = 0xFF06,
 	i40e_aqc_opc_debug_modify_reg       = 0xFF07,
 	i40e_aqc_opc_debug_dump_internals   = 0xFF08,
 	i40e_aqc_opc_debug_modify_internals = 0xFF09,
@@ -339,6 +341,14 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
 
+/* Set PF context (0x0004, direct) */
+struct i40e_aqc_set_pf_context {
+	u8	pf_id;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_pf_context);
+
 /* Request resource ownership (direct 0x0008)
  * Release resource ownership (direct 0x0009)
  */
@@ -678,7 +688,6 @@
 #define I40E_AQ_VSI_TYPE_PF             0x2
 #define I40E_AQ_VSI_TYPE_EMP_MNG        0x3
 #define I40E_AQ_VSI_FLAG_CASCADED_PV    0x4
-#define I40E_AQ_VSI_FLAG_CLOUD_VSI      0x8
 	__le32 addr_high;
 	__le32 addr_low;
 };
@@ -1040,7 +1049,9 @@
 #define I40E_AQC_SET_VSI_PROMISC_VLAN        0x10
 	__le16 seid;
 #define I40E_AQC_VSI_PROM_CMD_SEID_MASK      0x3FF
-	u8     reserved[10];
+	__le16 vlan_tag;
+#define I40E_AQC_SET_VSI_VLAN_VALID          0x8000
+	u8     reserved[8];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
@@ -1289,27 +1300,6 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
 
-/* Set Storm Control Configuration (direct 0x0280)
- * Get Storm Control Configuration (direct 0x0281)
- *    the command and response use the same descriptor structure
- */
-struct i40e_aqc_set_get_storm_control_config {
-	__le32 broadcast_threshold;
-	__le32 multicast_threshold;
-	__le32 control_flags;
-#define I40E_AQC_STORM_CONTROL_MDIPW            0x01
-#define I40E_AQC_STORM_CONTROL_MDICW            0x02
-#define I40E_AQC_STORM_CONTROL_BDIPW            0x04
-#define I40E_AQC_STORM_CONTROL_BDICW            0x08
-#define I40E_AQC_STORM_CONTROL_BIDU             0x10
-#define I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT   8
-#define I40E_AQC_STORM_CONTROL_INTERVAL_MASK    (0x3FF << \
-					I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT)
-	u8     reserved[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_get_storm_control_config);
-
 /* DCB 0x03xx*/
 
 /* PFC Ignore (direct 0x0301)
@@ -1427,11 +1417,12 @@
 struct i40e_aqc_configure_switching_comp_ets_data {
 	u8     reserved[4];
 	u8     tc_valid_bits;
-	u8     reserved1;
+	u8     seepage;
+#define I40E_AQ_ETS_SEEPAGE_EN_MASK     0x1
 	u8     tc_strict_priority_flags;
-	u8     reserved2[17];
+	u8     reserved1[17];
 	u8     tc_bw_share_credits[8];
-	u8     reserved3[96];
+	u8     reserved2[96];
 };
 
 /* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
@@ -1499,6 +1490,15 @@
  * (direct 0x041B and 0x041C) uses the generic SEID struct
  */
 
+/* Configure partition BW
+ * (indirect 0x041D)
+ */
+struct i40e_aqc_configure_partition_bw_data {
+	__le16 pf_valid_bits;
+	u8     min_bw[16];      /* guaranteed bandwidth */
+	u8     max_bw[16];      /* bandwidth limit */
+};
+
 /* Get and set the active HMC resource profile and status.
  * (direct 0x0500) and (direct 0x0501)
  */
@@ -1539,6 +1539,8 @@
 	I40E_PHY_TYPE_XLPPI			= 0x9,
 	I40E_PHY_TYPE_40GBASE_CR4_CU		= 0xA,
 	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
+	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
+	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
 	I40E_PHY_TYPE_100BASE_TX		= 0x11,
 	I40E_PHY_TYPE_1000BASE_T		= 0x12,
 	I40E_PHY_TYPE_10GBASE_T			= 0x13,
@@ -1549,7 +1551,10 @@
 	I40E_PHY_TYPE_40GBASE_CR4		= 0x18,
 	I40E_PHY_TYPE_40GBASE_SR4		= 0x19,
 	I40E_PHY_TYPE_40GBASE_LR4		= 0x1A,
-	I40E_PHY_TYPE_20GBASE_KR2		= 0x1B,
+	I40E_PHY_TYPE_1000BASE_SX		= 0x1B,
+	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
+	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
+	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
 	I40E_PHY_TYPE_MAX
 };
 
@@ -1583,11 +1588,8 @@
 #define I40E_AQ_PHY_FLAG_PAUSE_TX         0x01
 #define I40E_AQ_PHY_FLAG_PAUSE_RX         0x02
 #define I40E_AQ_PHY_FLAG_LOW_POWER        0x04
-#define I40E_AQ_PHY_FLAG_AN_SHIFT         3
-#define I40E_AQ_PHY_FLAG_AN_MASK          (0x3 << I40E_AQ_PHY_FLAG_AN_SHIFT)
-#define I40E_AQ_PHY_FLAG_AN_OFF           0x00 /* link forced on */
-#define I40E_AQ_PHY_FLAG_AN_OFF_LINK_DOWN 0x01
-#define I40E_AQ_PHY_FLAG_AN_ON            0x02
+#define I40E_AQ_PHY_LINK_ENABLED		  0x08
+#define I40E_AQ_PHY_AN_ENABLED			  0x10
 #define I40E_AQ_PHY_FLAG_MODULE_QUAL      0x20
 	__le16 eee_capability;
 #define I40E_AQ_EEE_100BASE_TX       0x0002
@@ -1696,6 +1698,7 @@
 #define I40E_AQ_LINK_TX_ACTIVE       0x00
 #define I40E_AQ_LINK_TX_DRAINED      0x01
 #define I40E_AQ_LINK_TX_FLUSHED      0x03
+#define I40E_AQ_LINK_FORCED_40G      0x10
 	u8     loopback;         /* use defines from i40e_aqc_set_lb_mode */
 	__le16 max_frame_size;
 	u8     config;
@@ -1747,14 +1750,21 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
 
-/* Set PHY Reset command (0x0622) */
-struct i40e_aqc_set_phy_reset {
-	u8     reset_flags;
-#define I40E_AQ_PHY_RESET_REQUEST  0x02
+/* Set PHY Debug command (0x0622) */
+struct i40e_aqc_set_phy_debug {
+	u8     command_flags;
+#define I40E_AQ_PHY_DEBUG_RESET_INTERNAL	0x02
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT	2
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_MASK	(0x03 << \
+					I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT)
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_NONE	0x00
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_HARD	0x01
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SOFT	0x02
+#define I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW	0x10
 	u8     reserved[15];
 };
 
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_reset);
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_debug);
 
 enum i40e_aq_phy_reg_type {
 	I40E_AQC_PHY_REG_INTERNAL         = 0x1,
@@ -1779,6 +1789,47 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
 
+/* NVM Config Read (indirect 0x0704) */
+struct i40e_aqc_nvm_config_read {
+	__le16 cmd_flags;
+#define ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
+#define ANVM_READ_SINGLE_FEATURE		0
+#define ANVM_READ_MULTIPLE_FEATURES		1
+	__le16 element_count;
+	__le16 element_id;		/* Feature/field ID */
+	u8     reserved[2];
+	__le32 address_high;
+	__le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_read);
+
+/* NVM Config Write (indirect 0x0705) */
+struct i40e_aqc_nvm_config_write {
+	__le16 cmd_flags;
+	__le16 element_count;
+	u8     reserved[4];
+	__le32 address_high;
+	__le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
+
+struct i40e_aqc_nvm_config_data_feature {
+	__le16 feature_id;
+	__le16 instance_id;
+	__le16 feature_options;
+	__le16 feature_selection;
+};
+
+struct i40e_aqc_nvm_config_data_immediate_field {
+#define ANVM_FEATURE_OR_IMMEDIATE_MASK	0x2
+	__le16 field_id;
+	__le16 instance_id;
+	__le16 field_options;
+	__le16 field_value;
+};
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)
@@ -1948,19 +1999,12 @@
 /* Add Udp Tunnel command and completion (direct 0x0B00) */
 struct i40e_aqc_add_udp_tunnel {
 	__le16 udp_port;
-	u8     header_len; /* in DWords, 1 to 15 */
+	u8     reserved0[3];
 	u8     protocol_type;
-#define I40E_AQC_TUNNEL_TYPE_TEREDO	0x0
-#define I40E_AQC_TUNNEL_TYPE_VXLAN	0x2
-#define I40E_AQC_TUNNEL_TYPE_NGE	0x3
-	u8     variable_udp_length;
-#define I40E_AQC_TUNNEL_FIXED_UDP_LENGTH	0x0
-#define I40E_AQC_TUNNEL_VARIABLE_UDP_LENGTH	0x1
-	u8		udp_key_index;
-#define I40E_AQC_TUNNEL_KEY_INDEX_VXLAN			0x0
-#define I40E_AQC_TUNNEL_KEY_INDEX_NGE			0x1
-#define I40E_AQC_TUNNEL_KEY_INDEX_PROPRIETARY_UDP	0x2
-	u8		reserved[10];
+#define I40E_AQC_TUNNEL_TYPE_VXLAN	0x00
+#define I40E_AQC_TUNNEL_TYPE_NGE	0x01
+#define I40E_AQC_TUNNEL_TYPE_TEREDO	0x10
+	u8     reserved1[10];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
index d8654fb..8e6a6dd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index ae08437..a43155a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -40,12 +43,10 @@
 	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
 		switch (hw->device_id) {
 		case I40E_DEV_ID_SFP_XL710:
-		case I40E_DEV_ID_SFP_X710:
 		case I40E_DEV_ID_QEMU:
 		case I40E_DEV_ID_KX_A:
 		case I40E_DEV_ID_KX_B:
 		case I40E_DEV_ID_KX_C:
-		case I40E_DEV_ID_KX_D:
 		case I40E_DEV_ID_QSFP_A:
 		case I40E_DEV_ID_QSFP_B:
 		case I40E_DEV_ID_QSFP_C:
@@ -130,7 +131,11 @@
  **/
 bool i40evf_check_asq_alive(struct i40e_hw *hw)
 {
-	return !!(rd32(hw, hw->aq.asq.len) & I40E_PF_ATQLEN_ATQENABLE_MASK);
+	if (hw->aq.asq.len)
+		return !!(rd32(hw, hw->aq.asq.len) &
+			  I40E_PF_ATQLEN_ATQENABLE_MASK);
+	else
+		return false;
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
index cb97b3e..a2ad9a4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -160,11 +163,6 @@
 	    (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		\
 	     ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
 
-#define I40E_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id)	   \
-	wr32((hw), I40E_GLHMC_VFPDINV((hmc_fn_id) - I40E_FIRST_VF_FPM_ID), \
-	     (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		   \
-	      ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
 /**
  * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
  * @hmc_info: pointer to the HMC configuration information structure
@@ -223,7 +221,7 @@
 					      u32 pd_index);
 i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
 					struct i40e_hmc_info *hmc_info,
-					u32 idx, bool is_pf);
+					u32 idx);
 i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
 					     u32 idx);
 i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
index 17e42ca..d6f7622 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -53,6 +56,7 @@
 	u8  tphdata_ena;
 	u8  tphhead_ena;
 	u8  lrxqthresh;
+	u8  prefena;	/* NOTE: normally must be set to 1 at init */
 };
 
 /* Tx queue context data */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 622f373..21a91b1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 97ab8c2..849edcc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
index 30af953..3698396 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -1337,8 +1340,6 @@
 #define I40E_PFINT_ICR0_GPIO_MASK (0x1 << I40E_PFINT_ICR0_GPIO_SHIFT)
 #define I40E_PFINT_ICR0_TIMESYNC_SHIFT 23
 #define I40E_PFINT_ICR0_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_TIMESYNC_SHIFT)
-#define I40E_PFINT_ICR0_STORM_DETECT_SHIFT 24
-#define I40E_PFINT_ICR0_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_STORM_DETECT_SHIFT)
 #define I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
 #define I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
 #define I40E_PFINT_ICR0_HMC_ERR_SHIFT 26
@@ -1364,8 +1365,6 @@
 #define I40E_PFINT_ICR0_ENA_GPIO_MASK (0x1 << I40E_PFINT_ICR0_ENA_GPIO_SHIFT)
 #define I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT 23
 #define I40E_PFINT_ICR0_ENA_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT)
-#define I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT 24
-#define I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT)
 #define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
 #define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
 #define I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT 26
@@ -1586,6 +1585,14 @@
 #define I40E_GLLAN_TSOMSK_M 0x000442DC
 #define I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT 0
 #define I40E_GLLAN_TSOMSK_M_TCPMSKM_MASK (0xFFF << I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS(_i) (0x000E6500 + ((_i) * 4)) /* i=0..11 */
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT 0
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_MASK (0x7FF << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK (0x1 << I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK (0x1 << I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+
 #define I40E_PFLAN_QALLOC 0x001C0400
 #define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
 #define I40E_PFLAN_QALLOC_FIRSTQ_MASK (0x7FF << I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 7c08cc2..7fa7a41 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index b9f50f4..48ebb6c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -725,10 +728,12 @@
 				    u32 rx_error,
 				    u16 rx_ptype)
 {
+	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
+	bool ipv4 = false, ipv6 = false;
 	bool ipv4_tunnel, ipv6_tunnel;
 	__wsum rx_udp_csum;
-	__sum16 csum;
 	struct iphdr *iph;
+	__sum16 csum;
 
 	ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
 		      (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
@@ -739,29 +744,57 @@
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Rx csum enabled and ip headers found? */
-	if (!(vsi->netdev->features & NETIF_F_RXCSUM &&
-	      rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
 		return;
 
+	/* did the hardware decode the packet and checksum? */
+	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+		return;
+
+	/* both known and outer_ip must be set for the below code to work */
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
+		ipv4 = true;
+	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
+		ipv6 = true;
+
+	if (ipv4 &&
+	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
+			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+		goto checksum_fail;
+
 	/* likely incorrect csum if alternate IP extension headers found */
-	if (rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+	if (ipv6 &&
+	    decoded.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP &&
+	    rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) &&
+	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+		/* don't increment checksum err here, non-fatal err */
 		return;
 
-	/* IP or L4 or outmost IP checksum error */
-	if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
-			(1 << I40E_RX_DESC_ERROR_L4E_SHIFT) |
-			(1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) {
-		vsi->back->hw_csum_rx_error++;
-		return;
-	}
+	/* there was some L4 error, count error and punt packet to the stack */
+	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+		goto checksum_fail;
 
+	/* handle packets that were not able to be checksummed due
+	 * to arrival speed, in this case the stack can compute
+	 * the csum.
+	 */
+	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
+		return;
+
+	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
+	 * it in the driver, hardware does not do it for us.
+	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
+	 * so the total length of IPv4 header is IHL*4 bytes
+	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
+	 */
 	if (ipv4_tunnel &&
+	    (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
 	    !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
-		/* If VXLAN traffic has an outer UDPv4 checksum we need to check
-		 * it in the driver, hardware does not do it for us.
-		 * Since L3L4P bit was set we assume a valid IHL value (>=5)
-		 * so the total length of IPv4 header is IHL*4 bytes
-		 */
 		skb->transport_header = skb->mac_header +
 					sizeof(struct ethhdr) +
 					(ip_hdr(skb)->ihl * 4);
@@ -778,13 +811,16 @@
 				(skb->len - skb_transport_offset(skb)),
 				IPPROTO_UDP, rx_udp_csum);
 
-		if (udp_hdr(skb)->check != csum) {
-			vsi->back->hw_csum_rx_error++;
-			return;
-		}
+		if (udp_hdr(skb)->check != csum)
+			goto checksum_fail;
 	}
 
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
 }
 
 /**
@@ -953,6 +989,9 @@
 		/* ERR_MASK will only have valid bits if EOP set */
 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
 			dev_kfree_skb_any(skb);
+			/* TODO: shouldn't we increment a counter indicating the
+			 * drop?
+			 */
 			goto next_desc;
 		}
 
@@ -1508,9 +1547,7 @@
 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 				      struct i40e_ring *tx_ring)
 {
-#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
 	unsigned int f;
-#endif
 	int count = 0;
 
 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
@@ -1519,12 +1556,9 @@
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
-#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
-#else
-	count += skb_shinfo(skb)->nr_frags;
-#endif
+
 	count += TXD_USE_COUNT(skb_headlen(skb));
 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
 		tx_ring->tx_stats.tx_busy++;

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 10bf49e..30d248b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -24,7 +27,7 @@
 #ifndef _I40E_TXRX_H_
 #define _I40E_TXRX_H_
 
-/* Interrupt Throttling and Rate Limiting (storm control) Goodies */
+/* Interrupt Throttling and Rate Limiting Goodies */
 
 #define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
 #define I40E_MIN_ITR               0x0004  /* reg uses 2 usec resolution */
@@ -66,16 +69,11 @@
 
 /* Supported RSS offloads */
 #define I40E_DEFAULT_RSS_HENA ( \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
 	((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
-	((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN) | \
 	((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \
@@ -119,11 +117,11 @@
 #define i40e_rx_desc i40e_32byte_rx_desc
 
 #define I40E_MIN_TX_LEN		17
-#define I40E_MAX_DATA_PER_TXD	16383	/* aka 16kB - 1 */
+#define I40E_MAX_DATA_PER_TXD	8192
 
 /* Tx Descriptors needed, worst case */
 #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), I40E_MAX_DATA_PER_TXD)
-#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
 
 #define I40E_TX_FLAGS_CSUM		(u32)(1)
 #define I40E_TX_FLAGS_HW_VLAN		(u32)(1 << 1)
@@ -180,7 +178,6 @@
 	__I40E_TX_DETECT_HANG,
 	__I40E_HANG_CHECK_ARMED,
 	__I40E_RX_PS_ENABLED,
-	__I40E_RX_LRO_ENABLED,
 	__I40E_RX_16BYTE_DESC_ENABLED,
 };
 
@@ -196,12 +193,6 @@
 	set_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
 #define clear_check_for_tx_hang(ring) \
 	clear_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
-#define ring_is_lro_enabled(ring) \
-	test_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
-#define set_ring_lro_enabled(ring) \
-	set_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
-#define clear_ring_lro_enabled(ring) \
-	clear_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
 #define ring_is_16byte_desc_enabled(ring) \
 	test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
 #define set_ring_16byte_desc_enabled(ring) \

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 4673b33..d3cf5a6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -32,13 +35,11 @@
 #include "i40e_lan_hmc.h"
 
 /* Device IDs */
-#define I40E_DEV_ID_SFP_XL710	0x1572
-#define I40E_DEV_ID_SFP_X710		0x1573
+#define I40E_DEV_ID_SFP_XL710		0x1572
 #define I40E_DEV_ID_QEMU		0x1574
 #define I40E_DEV_ID_KX_A		0x157F
 #define I40E_DEV_ID_KX_B		0x1580
 #define I40E_DEV_ID_KX_C		0x1581
-#define I40E_DEV_ID_KX_D		0x1582
 #define I40E_DEV_ID_QSFP_A		0x1583
 #define I40E_DEV_ID_QSFP_B		0x1584
 #define I40E_DEV_ID_QSFP_C		0x1585
@@ -57,8 +58,8 @@
 /* Max default timeout in ms, */
 #define I40E_MAX_NVM_TIMEOUT		18000
 
-/* Switch from mc to the 2usec global time (this is the GTIME resolution) */
-#define I40E_MS_TO_GTIME(time)		(((time) * 1000) / 2)
+/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
+#define I40E_MS_TO_GTIME(time)		((time) * 1000)
 
 /* forward declaration */
 struct i40e_hw;
@@ -101,15 +102,6 @@
 	I40E_DEBUG_ALL			= 0xFFFFFFFF
 };
 
-/* PCI Bus Info */
-#define I40E_PCI_LINK_WIDTH_1		0x10
-#define I40E_PCI_LINK_WIDTH_2		0x20
-#define I40E_PCI_LINK_WIDTH_4		0x40
-#define I40E_PCI_LINK_WIDTH_8		0x80
-#define I40E_PCI_LINK_SPEED_2500	0x1
-#define I40E_PCI_LINK_SPEED_5000	0x2
-#define I40E_PCI_LINK_SPEED_8000	0x3
-
 /* These are structs for managing the hardware information and the operations.
  * The structures of function pointers are filled out at init time when we
  * know for sure exactly which hardware we're working with.  This gives us the
@@ -173,6 +165,9 @@
 	u8 loopback;
 	/* is Link Status Event notification to SW enabled */
 	bool lse_enable;
+	u16 max_frame_size;
+	bool crc_enable;
+	u8 pacing;
 };
 
 struct i40e_phy_info {
@@ -415,6 +410,7 @@
 	u8 minor_version;
 	u8 build_version;
 	u8 subbuild_version;
+	u8 driver_string[32];
 };
 
 /* RX Descriptors */
@@ -494,9 +490,6 @@
 	} wb;  /* writeback */
 };
 
-#define I40E_RXD_QW1_STATUS_SHIFT	0
-#define I40E_RXD_QW1_STATUS_MASK	(0x7FFFUL << I40E_RXD_QW1_STATUS_SHIFT)
-
 enum i40e_rx_desc_status_bits {
 	/* Note: These are predefined bit offsets */
 	I40E_RX_DESC_STATUS_DD_SHIFT		= 0,
@@ -513,9 +506,14 @@
 	I40E_RX_DESC_STATUS_LPBK_SHIFT		= 14,
 	I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
 	I40E_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
-	I40E_RX_DESC_STATUS_UDP_0_SHIFT		= 18
+	I40E_RX_DESC_STATUS_UDP_0_SHIFT		= 18,
+	I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
 };
 
+#define I40E_RXD_QW1_STATUS_SHIFT	0
+#define I40E_RXD_QW1_STATUS_MASK	(((1 << I40E_RX_DESC_STATUS_LAST) - 1) \
+					 << I40E_RXD_QW1_STATUS_SHIFT)
+
 #define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT   I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
 #define I40E_RXD_QW1_STATUS_TSYNINDX_MASK	(0x3UL << \
 					     I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
@@ -543,7 +541,8 @@
 	I40E_RX_DESC_ERROR_IPE_SHIFT		= 3,
 	I40E_RX_DESC_ERROR_L4E_SHIFT		= 4,
 	I40E_RX_DESC_ERROR_EIPE_SHIFT		= 5,
-	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6
+	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
+	I40E_RX_DESC_ERROR_PPRS_SHIFT		= 7
 };
 
 enum i40e_rx_desc_error_l3l4e_fcoe_masks {
@@ -664,7 +663,6 @@
 	I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
 	I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
 	I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
-	I40E_RX_DESC_EXT_STATUS_FTYPE_SHIFT	= 6, /* 3 BITS */
 	I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
 	I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
 	I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
@@ -868,18 +866,14 @@
 
 /* Packet Classifier Types for filters */
 enum i40e_filter_pctype {
-	/* Note: Values 0-28 are reserved for future use */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
+	/* Note: Values 0-30 are reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
-	I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN		= 32,
+	/* Note: Value 32 is reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
 	I40E_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
 	I40E_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
 	I40E_FILTER_PCTYPE_FRAG_IPV4			= 36,
-	/* Note: Values 37-38 are reserved for future use */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
+	/* Note: Values 37-40 are reserved for future use */
 	I40E_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
 	I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN		= 42,
 	I40E_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
@@ -961,6 +955,16 @@
 	struct i40e_aqc_vsi_properties_data info;
 };
 
+struct i40e_veb_context {
+	u16 seid;
+	u16 uplink_seid;
+	u16 veb_number;
+	u16 vebs_allocated;
+	u16 vebs_unallocated;
+	u16 flags;
+	struct i40e_aqc_get_veb_parameters_completion info;
+};
+
 /* Statistics collected by each port, VSI, VEB, and S-channel */
 struct i40e_eth_stats {
 	u64 rx_bytes;			/* gorc */
@@ -968,8 +972,6 @@
 	u64 rx_multicast;		/* mprc */
 	u64 rx_broadcast;		/* bprc */
 	u64 rx_discards;		/* rdpc */
-	u64 rx_errors;			/* repc */
-	u64 rx_missed;			/* rmpc */
 	u64 rx_unknown_protocol;	/* rupp */
 	u64 tx_bytes;			/* gotc */
 	u64 tx_unicast;			/* uptc */
@@ -1021,9 +1023,12 @@
 	u64 tx_size_big;		/* ptc9522 */
 	u64 mac_short_packet_dropped;	/* mspdc */
 	u64 checksum_error;		/* xec */
+	/* flow director stats */
+	u64 fd_atr_match;
+	u64 fd_sb_match;
 	/* EEE LPI */
-	bool tx_lpi_status;
-	bool rx_lpi_status;
+	u32 tx_lpi_status;
+	u32 rx_lpi_status;
 	u64 tx_lpi_count;		/* etlpic */
 	u64 rx_lpi_count;		/* erlpic */
 };

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index ccf45d0..cd18d56 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h

@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -338,10 +341,6 @@
 	int severity;
 };
 
-/* The following are TBD, not necessary for LAN functionality.
- * I40E_VIRTCHNL_OP_FCOE
- */
-
 /* VF reset states - these are written into the RSTAT register:
  * I40E_VFGEN_RSTAT1 on the PF
  * I40E_VFGEN_RSTAT on the VF

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 807807d..30ef519 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -77,7 +80,7 @@
 #define I40EVF_MIN_TXD       64
 #define I40EVF_MAX_RXD       4096
 #define I40EVF_MIN_RXD       64
-#define I40EVF_REQ_DESCRIPTOR_MULTIPLE  8
+#define I40EVF_REQ_DESCRIPTOR_MULTIPLE  32
 
 /* Supported Rx Buffer Sizes */
 #define I40EVF_RXBUFFER_64    64     /* Used for packet split */
@@ -193,10 +196,12 @@
 	struct i40e_ring *tx_rings[I40E_MAX_VSI_QP];
 	u32 tx_timeout_count;
 	struct list_head mac_filter_list;
+	u32 tx_desc_count;
 
 	/* RX */
 	struct i40e_ring *rx_rings[I40E_MAX_VSI_QP];
 	u64 hw_csum_rx_error;
+	u32 rx_desc_count;
 	int num_msix_vectors;
 	struct msix_entry *msix_entries;
 

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 8b0db1c..60407a9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -44,8 +47,6 @@
 	I40EVF_STAT("rx_multicast", current_stats.rx_multicast),
 	I40EVF_STAT("rx_broadcast", current_stats.rx_broadcast),
 	I40EVF_STAT("rx_discards", current_stats.rx_discards),
-	I40EVF_STAT("rx_errors", current_stats.rx_errors),
-	I40EVF_STAT("rx_missed", current_stats.rx_missed),
 	I40EVF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol),
 	I40EVF_STAT("tx_bytes", current_stats.tx_bytes),
 	I40EVF_STAT("tx_unicast", current_stats.tx_unicast),
@@ -56,10 +57,12 @@
 };
 
 #define I40EVF_GLOBAL_STATS_LEN ARRAY_SIZE(i40evf_gstrings_stats)
-#define I40EVF_QUEUE_STATS_LEN \
+#define I40EVF_QUEUE_STATS_LEN(_dev) \
 	(((struct i40evf_adapter *) \
-		netdev_priv(netdev))->vsi_res->num_queue_pairs * 4)
-#define I40EVF_STATS_LEN (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN)
+		netdev_priv(_dev))->vsi_res->num_queue_pairs \
+		  * 2 * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
+#define I40EVF_STATS_LEN(_dev) \
+	(I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
 
 /**
  * i40evf_get_settings - Get Link Speed and Duplex settings
@@ -75,7 +78,7 @@
 	/* In the future the VF will be able to query the PF for
 	 * some information - for now use a dummy value
 	 */
-	ecmd->supported = SUPPORTED_10000baseT_Full;
+	ecmd->supported = 0;
 	ecmd->autoneg = AUTONEG_DISABLE;
 	ecmd->transceiver = XCVR_DUMMY1;
 	ecmd->port = PORT_NONE;
@@ -94,9 +97,9 @@
 static int i40evf_get_sset_count(struct net_device *netdev, int sset)
 {
 	if (sset == ETH_SS_STATS)
-		return I40EVF_STATS_LEN;
+		return I40EVF_STATS_LEN(netdev);
 	else
-		return -ENOTSUPP;
+		return -EINVAL;
 }
 
 /**
@@ -219,13 +222,11 @@
 				  struct ethtool_ringparam *ring)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_ring *tx_ring = adapter->tx_rings[0];
-	struct i40e_ring *rx_ring = adapter->rx_rings[0];
 
 	ring->rx_max_pending = I40EVF_MAX_RXD;
 	ring->tx_max_pending = I40EVF_MAX_TXD;
-	ring->rx_pending = rx_ring->count;
-	ring->tx_pending = tx_ring->count;
+	ring->rx_pending = adapter->rx_desc_count;
+	ring->tx_pending = adapter->tx_desc_count;
 }
 
 /**
@@ -241,7 +242,6 @@
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	u32 new_rx_count, new_tx_count;
-	int i;
 
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
@@ -257,17 +257,16 @@
 	new_rx_count = ALIGN(new_rx_count, I40EVF_REQ_DESCRIPTOR_MULTIPLE);
 
 	/* if nothing to do return success */
-	if ((new_tx_count == adapter->tx_rings[0]->count) &&
-	    (new_rx_count == adapter->rx_rings[0]->count))
+	if ((new_tx_count == adapter->tx_desc_count) &&
+	    (new_rx_count == adapter->rx_desc_count))
 		return 0;
 
-	for (i = 0; i < adapter->vsi_res->num_queue_pairs; i++) {
-		adapter->tx_rings[0]->count = new_tx_count;
-		adapter->rx_rings[0]->count = new_rx_count;
-	}
+	adapter->tx_desc_count = new_tx_count;
+	adapter->rx_desc_count = new_rx_count;
 
 	if (netif_running(netdev))
 		i40evf_reinit_locked(adapter);
+
 	return 0;
 }
 
@@ -290,14 +289,13 @@
 	ec->rx_max_coalesced_frames = vsi->work_limit;
 
 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
-		ec->rx_coalesce_usecs = 1;
-	else
-		ec->rx_coalesce_usecs = vsi->rx_itr_setting;
+		ec->use_adaptive_rx_coalesce = 1;
 
 	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
-		ec->tx_coalesce_usecs = 1;
-	else
-		ec->tx_coalesce_usecs = vsi->tx_itr_setting;
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
 
 	return 0;
 }
@@ -318,40 +316,34 @@
 	struct i40e_q_vector *q_vector;
 	int i;
 
-	if (ec->tx_max_coalesced_frames || ec->rx_max_coalesced_frames)
-		vsi->work_limit = ec->tx_max_coalesced_frames;
+	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
-	switch (ec->rx_coalesce_usecs) {
-	case 0:
-		vsi->rx_itr_setting = 0;
-		break;
-	case 1:
-		vsi->rx_itr_setting = (I40E_ITR_DYNAMIC
-				       | ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
-		break;
-	default:
-		if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		    (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)))
-			return -EINVAL;
+	if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+	    (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 		vsi->rx_itr_setting = ec->rx_coalesce_usecs;
-		break;
-	}
 
-	switch (ec->tx_coalesce_usecs) {
-	case 0:
-		vsi->tx_itr_setting = 0;
-		break;
-	case 1:
-		vsi->tx_itr_setting = (I40E_ITR_DYNAMIC
-				       | ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
-		break;
-	default:
-		if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		    (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)))
-			return -EINVAL;
+	else
+		return -EINVAL;
+
+	if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+	    (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 		vsi->tx_itr_setting = ec->tx_coalesce_usecs;
-		break;
-	}
+	else if (ec->use_adaptive_tx_coalesce)
+		vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
+				       ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
+	else
+		return -EINVAL;
+
+	if (ec->use_adaptive_rx_coalesce)
+		vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	if (ec->use_adaptive_tx_coalesce)
+		vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
 		q_vector = adapter->q_vector[i];
@@ -365,7 +357,320 @@
 	return 0;
 }
 
-static struct ethtool_ops i40evf_ethtool_ops = {
+/**
+ * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type
+ * @adapter: board private structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow is supported, else Invalid Input.
+ **/
+static int i40evf_get_rss_hash_opts(struct i40evf_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct i40e_hw *hw = &adapter->hw;
+	u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) |
+		   ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32);
+
+	/* We always hash on IP src and dest addresses */
+	cmd->data = RXH_IP_SRC | RXH_IP_DST;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		if (hena & ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V4_FLOW:
+		if (hena & ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		break;
+
+	case TCP_V6_FLOW:
+		if (hena & ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V6_FLOW:
+		if (hena & ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		break;
+	default:
+		cmd->data = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40evf_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40evf_get_rxnfc(struct net_device *netdev,
+			    struct ethtool_rxnfc *cmd,
+			    u32 *rule_locs)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->vsi_res->num_queue_pairs;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		ret = i40evf_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * i40evf_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @adapter: board private structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ **/
+static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter,
+				   struct ethtool_rxnfc *nfc)
+{
+	struct i40e_hw *hw = &adapter->hw;
+
+	u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) |
+		   ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32);
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	/* We need at least the IP SRC and DEST fields for hashing */
+	if (!(nfc->data & RXH_IP_SRC) ||
+	    !(nfc->data & RXH_IP_DST))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			hena &= ~((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case TCP_V6_FLOW:
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			hena &= ~((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V4_FLOW:
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			hena &= ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+				  ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			hena |= (((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+				 ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			hena &= ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+				  ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			hena |= (((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+				 ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
+		break;
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
+		break;
+	case IPV4_FLOW:
+		hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) |
+			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4);
+		break;
+	case IPV6_FLOW:
+		hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
+			((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wr32(hw, I40E_VFQF_HENA(0), (u32)hena);
+	wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32));
+	i40e_flush(hw);
+
+	return 0;
+}
+
+/**
+ * i40evf_set_rxnfc - command to set RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40evf_set_rxnfc(struct net_device *netdev,
+			    struct ethtool_rxnfc *cmd)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = i40evf_set_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * i40evf_get_channels: get the number of channels supported by the device
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * For the purposes of our device, we only use combined channels, i.e. a tx/rx
+ * queue pair. Report one extra channel to match our "other" MSI-X vector.
+ **/
+static void i40evf_get_channels(struct net_device *netdev,
+				struct ethtool_channels *ch)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = adapter->vsi_res->num_queue_pairs;
+
+	ch->max_other = NONQ_VECS;
+	ch->other_count = NONQ_VECS;
+
+	ch->combined_count = adapter->vsi_res->num_queue_pairs;
+}
+
+/**
+ * i40evf_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4;
+}
+
+/**
+ * i40evf_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
+ *
+ * Reads the indirection table directly from the hardware. Always returns 0.
+ **/
+static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40e_hw *hw = &adapter->hw;
+	u32 hlut_val;
+	int i, j;
+
+	for (i = 0, j = 0; i < I40E_VFQF_HLUT_MAX_INDEX; i++) {
+		hlut_val = rd32(hw, I40E_VFQF_HLUT(i));
+		indir[j++] = hlut_val & 0xff;
+		indir[j++] = (hlut_val >> 8) & 0xff;
+		indir[j++] = (hlut_val >> 16) & 0xff;
+		indir[j++] = (hlut_val >> 24) & 0xff;
+	}
+	return 0;
+}
+
+/**
+ * i40evf_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
+ *
+ * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
+ * returns 0 after programming the table.
+ **/
+static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			   const u8 *key)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40e_hw *hw = &adapter->hw;
+	u32 hlut_val;
+	int i, j;
+
+	for (i = 0, j = 0; i < I40E_VFQF_HLUT_MAX_INDEX + 1; i++) {
+		hlut_val = indir[j++];
+		hlut_val |= indir[j++] << 8;
+		hlut_val |= indir[j++] << 16;
+		hlut_val |= indir[j++] << 24;
+		wr32(hw, I40E_VFQF_HLUT(i), hlut_val);
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops i40evf_ethtool_ops = {
 	.get_settings		= i40evf_get_settings,
 	.get_drvinfo		= i40evf_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -378,6 +683,12 @@
 	.set_msglevel		= i40evf_set_msglevel,
 	.get_coalesce		= i40evf_get_coalesce,
 	.set_coalesce		= i40evf_set_coalesce,
+	.get_rxnfc		= i40evf_get_rxnfc,
+	.set_rxnfc		= i40evf_set_rxnfc,
+	.get_rxfh_indir_size	= i40evf_get_rxfh_indir_size,
+	.get_rxfh		= i40evf_get_rxfh,
+	.set_rxfh		= i40evf_set_rxfh,
+	.get_channels		= i40evf_get_channels,
 };
 
 /**
@@ -389,5 +700,5 @@
  **/
 void i40evf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40evf_ethtool_ops);
+	netdev->ethtool_ops = &i40evf_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 2797548..7fc5f3b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -25,13 +28,15 @@
 #include "i40e_prototype.h"
 static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter);
 static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter);
+static void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter);
+static void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter);
 static int i40evf_close(struct net_device *netdev);
 
 char i40evf_driver_name[] = "i40evf";
 static const char i40evf_driver_string[] =
 	"Intel(R) XL710 X710 Virtual Function Network Driver";
 
-#define DRV_VERSION "0.9.16"
+#define DRV_VERSION "0.9.34"
 const char i40evf_driver_version[] = DRV_VERSION;
 static const char i40evf_copyright[] =
 	"Copyright (c) 2013 - 2014 Intel Corporation.";
@@ -167,7 +172,6 @@
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
 	adapter->tx_timeout_count++;
-	dev_info(&adapter->pdev->dev, "TX timeout detected.\n");
 	if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
 		adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
 		schedule_work(&adapter->reset_task);
@@ -657,12 +661,9 @@
 	f = i40evf_find_vlan(adapter, vlan);
 	if (NULL == f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
-		if (NULL == f) {
-			dev_info(&adapter->pdev->dev,
-				 "%s: no memory for new VLAN filter\n",
-				 __func__);
+		if (NULL == f)
 			return NULL;
-		}
+
 		f->vlan = vlan;
 
 		INIT_LIST_HEAD(&f->list);
@@ -688,7 +689,6 @@
 		f->remove = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
 	}
-	return;
 }
 
 /**
@@ -767,14 +767,12 @@
 	if (NULL == f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (NULL == f) {
-			dev_info(&adapter->pdev->dev,
-				 "%s: no memory for new filter\n", __func__);
 			clear_bit(__I40EVF_IN_CRITICAL_TASK,
 				  &adapter->crit_section);
 			return NULL;
 		}
 
-		memcpy(f->macaddr, macaddr, ETH_ALEN);
+		ether_addr_copy(f->macaddr, macaddr);
 
 		list_add(&f->list, &adapter->mac_filter_list);
 		f->add = true;
@@ -807,9 +805,8 @@
 
 	f = i40evf_add_filter(adapter, addr->sa_data);
 	if (f) {
-		memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
-		memcpy(netdev->dev_addr, adapter->hw.mac.addr,
-		       netdev->addr_len);
+		ether_addr_copy(hw->mac.addr, addr->sa_data);
+		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
 	}
 
 	return (f == NULL) ? -ENOMEM : 0;
@@ -841,7 +838,7 @@
 	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
 		bool found = false;
 
-		if (f->macaddr[0] & 0x01) {
+		if (is_multicast_ether_addr(f->macaddr)) {
 			netdev_for_each_mc_addr(mca, netdev) {
 				if (ether_addr_equal(mca->addr, f->macaddr)) {
 					found = true;
@@ -970,6 +967,9 @@
 	struct net_device *netdev = adapter->netdev;
 	struct i40evf_mac_filter *f;
 
+	if (adapter->state == __I40EVF_DOWN)
+		return;
+
 	/* remove all MAC filters */
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->remove = true;
@@ -1027,30 +1027,21 @@
 	 * Right now, we simply care about how many we'll get; we'll
 	 * set them up later while requesting irq's.
 	 */
-	while (vectors >= vector_threshold) {
-		err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
-				      vectors);
-		if (!err) /* Success in acquiring all requested vectors. */
-			break;
-		else if (err < 0)
-			vectors = 0; /* Nasty failure, quit now */
-		else /* err == number of vectors we should try again with */
-			vectors = err;
-	}
-
-	if (vectors < vector_threshold) {
-		dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts.\n");
+	err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+				    vector_threshold, vectors);
+	if (err < 0) {
+		dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n");
 		kfree(adapter->msix_entries);
 		adapter->msix_entries = NULL;
-		err = -EIO;
-	} else {
-		/* Adjust for only the vectors we'll use, which is minimum
-		 * of max_msix_q_vectors + NONQ_VECS, or the number of
-		 * vectors we were allocated.
-		 */
-		adapter->num_msix_vectors = vectors;
+		return err;
 	}
-	return err;
+
+	/* Adjust for only the vectors we'll use, which is minimum
+	 * of max_msix_q_vectors + NONQ_VECS, or the number of
+	 * vectors we were allocated.
+	 */
+	adapter->num_msix_vectors = err;
+	return 0;
 }
 
 /**
@@ -1096,14 +1087,14 @@
 		tx_ring->queue_index = i;
 		tx_ring->netdev = adapter->netdev;
 		tx_ring->dev = &adapter->pdev->dev;
-		tx_ring->count = I40EVF_DEFAULT_TXD;
+		tx_ring->count = adapter->tx_desc_count;
 		adapter->tx_rings[i] = tx_ring;
 
 		rx_ring = &tx_ring[1];
 		rx_ring->queue_index = i;
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
-		rx_ring->count = I40EVF_DEFAULT_RXD;
+		rx_ring->count = adapter->rx_desc_count;
 		adapter->rx_rings[i] = rx_ring;
 	}
 
@@ -1141,9 +1132,6 @@
 	v_budget = min_t(int, pairs, (int)(num_online_cpus() * 2)) + NONQ_VECS;
 	v_budget = min_t(int, v_budget, (int)adapter->vf_res->max_vectors);
 
-	/* A failure in MSI-X entry allocation isn't fatal, but it does
-	 * mean we disable MSI-X capabilities of the adapter.
-	 */
 	adapter->msix_entries = kcalloc(v_budget,
 					sizeof(struct msix_entry), GFP_KERNEL);
 	if (!adapter->msix_entries) {
@@ -1183,7 +1171,7 @@
 		q_vector->vsi = &adapter->vsi;
 		q_vector->v_idx = q_idx;
 		netif_napi_add(adapter->netdev, &q_vector->napi,
-				       i40evf_napi_poll, 64);
+				       i40evf_napi_poll, NAPI_POLL_WEIGHT);
 		adapter->q_vector[q_idx] = q_vector;
 	}
 
@@ -1236,8 +1224,6 @@
 	pci_disable_msix(adapter->pdev);
 	kfree(adapter->msix_entries);
 	adapter->msix_entries = NULL;
-
-	return;
 }
 
 /**
@@ -1309,7 +1295,6 @@
 		goto restart_watchdog;
 
 	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
-		dev_info(&adapter->pdev->dev, "Checking for redemption\n");
 		if ((rd32(hw, I40E_VFGEN_RSTAT) & 0x3) == I40E_VFR_VFACTIVE) {
 			/* A chance for redemption! */
 			dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
@@ -1340,8 +1325,7 @@
 	    (rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) {
 		adapter->state = __I40EVF_RESETTING;
 		adapter->flags |= I40EVF_FLAG_RESET_PENDING;
-		dev_err(&adapter->pdev->dev, "Hardware reset detected.\n");
-		dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
 		schedule_work(&adapter->reset_task);
 		adapter->aq_pending = 0;
 		adapter->aq_required = 0;
@@ -1413,7 +1397,7 @@
 }
 
 /**
- * i40evf_configure_rss - increment to next available tx queue
+ * next_queue - increment to next available tx queue
  * @adapter: board private structure
  * @j: queue counter
  *
@@ -1504,15 +1488,12 @@
 	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
 		rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if (rstat_val != I40E_VFR_VFACTIVE) {
-			dev_info(&adapter->pdev->dev, "Reset now occurring\n");
+		if (rstat_val != I40E_VFR_VFACTIVE)
 			break;
-		} else {
+		else
 			msleep(I40EVF_RESET_WAIT_MS);
-		}
 	}
 	if (i == I40EVF_RESET_WAIT_COUNT) {
-		dev_err(&adapter->pdev->dev, "Reset was not detected\n");
 		adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
 		goto continue_reset; /* act like the reset happened */
 	}
@@ -1521,22 +1502,24 @@
 	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
 		rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if (rstat_val == I40E_VFR_VFACTIVE) {
-			dev_info(&adapter->pdev->dev, "Reset is complete. Reinitializing.\n");
+		if (rstat_val == I40E_VFR_VFACTIVE)
 			break;
-		} else {
+		else
 			msleep(I40EVF_RESET_WAIT_MS);
-		}
 	}
 	if (i == I40EVF_RESET_WAIT_COUNT) {
 		/* reset never finished */
-		dev_err(&adapter->pdev->dev, "Reset never finished (%x). PF driver is dead, and so am I.\n",
+		dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
 			rstat_val);
 		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 
-		if (netif_running(adapter->netdev))
-			i40evf_close(adapter->netdev);
-
+		if (netif_running(adapter->netdev)) {
+			set_bit(__I40E_DOWN, &adapter->vsi.state);
+			i40evf_down(adapter);
+			i40evf_free_traffic_irqs(adapter);
+			i40evf_free_all_tx_resources(adapter);
+			i40evf_free_all_rx_resources(adapter);
+		}
 		i40evf_free_misc_irq(adapter);
 		i40evf_reset_interrupt_capability(adapter);
 		i40evf_free_queues(adapter);
@@ -1591,7 +1574,7 @@
 	}
 	return;
 reset_err:
-	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit.\n");
+	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
 	i40evf_close(adapter->netdev);
 }
 
@@ -1607,6 +1590,7 @@
 	struct i40e_arq_event_info event;
 	struct i40e_virtchnl_msg *v_msg;
 	i40e_status ret;
+	u32 val, oldval;
 	u16 pending;
 
 	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
@@ -1614,11 +1598,9 @@
 
 	event.msg_size = I40EVF_MAX_AQ_BUF_SIZE;
 	event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
-	if (!event.msg_buf) {
-		dev_info(&adapter->pdev->dev, "%s: no memory for ARQ clean\n",
-				 __func__);
+	if (!event.msg_buf)
 		return;
-	}
+
 	v_msg = (struct i40e_virtchnl_msg *)&event.desc;
 	do {
 		ret = i40evf_clean_arq_element(hw, &event, &pending);
@@ -1636,6 +1618,41 @@
 		}
 	} while (pending);
 
+	/* check for error indications */
+	val = rd32(hw, hw->aq.arq.len);
+	oldval = val;
+	if (val & I40E_VF_ARQLEN_ARQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n");
+		val &= ~I40E_VF_ARQLEN_ARQVFE_MASK;
+	}
+	if (val & I40E_VF_ARQLEN_ARQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n");
+		val &= ~I40E_VF_ARQLEN_ARQOVFL_MASK;
+	}
+	if (val & I40E_VF_ARQLEN_ARQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n");
+		val &= ~I40E_VF_ARQLEN_ARQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.arq.len, val);
+
+	val = rd32(hw, hw->aq.asq.len);
+	oldval = val;
+	if (val & I40E_VF_ATQLEN_ATQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n");
+		val &= ~I40E_VF_ATQLEN_ATQVFE_MASK;
+	}
+	if (val & I40E_VF_ATQLEN_ATQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n");
+		val &= ~I40E_VF_ATQLEN_ATQOVFL_MASK;
+	}
+	if (val & I40E_VF_ATQLEN_ATQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n");
+		val &= ~I40E_VF_ATQLEN_ATQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.asq.len, val);
+
 	/* re-enable Admin queue interrupt cause */
 	i40evf_misc_irq_enable(adapter);
 
@@ -1673,6 +1690,7 @@
 	int i, err = 0;
 
 	for (i = 0; i < adapter->vsi_res->num_queue_pairs; i++) {
+		adapter->tx_rings[i]->count = adapter->tx_desc_count;
 		err = i40evf_setup_tx_descriptors(adapter->tx_rings[i]);
 		if (!err)
 			continue;
@@ -1700,6 +1718,7 @@
 	int i, err = 0;
 
 	for (i = 0; i < adapter->vsi_res->num_queue_pairs; i++) {
+		adapter->rx_rings[i]->count = adapter->rx_desc_count;
 		err = i40evf_setup_rx_descriptors(adapter->rx_rings[i]);
 		if (!err)
 			continue;
@@ -1804,12 +1823,11 @@
 	if (adapter->state <= __I40EVF_DOWN)
 		return 0;
 
-	/* signal that we are down to the interrupt handler */
-	adapter->state = __I40EVF_DOWN;
 
 	set_bit(__I40E_DOWN, &adapter->vsi.state);
 
 	i40evf_down(adapter);
+	adapter->state = __I40EVF_DOWN;
 	i40evf_free_traffic_irqs(adapter);
 
 	i40evf_free_all_tx_resources(adapter);
@@ -1848,8 +1866,6 @@
 
 	WARN_ON(in_interrupt());
 
-	adapter->state = __I40EVF_RESETTING;
-
 	i40evf_down(adapter);
 
 	/* allocate transmit descriptors */
@@ -1872,7 +1888,7 @@
 	return;
 
 err_reinit:
-	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit.\n");
+	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
 	i40evf_close(netdev);
 }
 
@@ -1967,7 +1983,7 @@
 		}
 		err = i40evf_check_reset_complete(hw);
 		if (err) {
-			dev_err(&pdev->dev, "Device is still in reset (%d)\n",
+			dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
 				err);
 			goto err;
 		}
@@ -1993,14 +2009,14 @@
 		break;
 	case __I40EVF_INIT_VERSION_CHECK:
 		if (!i40evf_asq_done(hw)) {
-			dev_err(&pdev->dev, "Admin queue command never completed.\n");
+			dev_err(&pdev->dev, "Admin queue command never completed\n");
 			goto err;
 		}
 
 		/* aq msg sent, awaiting reply */
 		err = i40evf_verify_api_ver(adapter);
 		if (err) {
-			dev_err(&pdev->dev, "Unable to verify API version (%d)\n",
+			dev_info(&pdev->dev, "Unable to verify API version (%d), retrying\n",
 				err);
 			goto err;
 		}
@@ -2074,12 +2090,12 @@
 	netdev->hw_features &= ~NETIF_F_RXCSUM;
 
 	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
-		dev_info(&pdev->dev, "Invalid MAC address %pMAC, using random\n",
+		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
 			 adapter->hw.mac.addr);
 		random_ether_addr(adapter->hw.mac.addr);
 	}
-	memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
-	memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len);
+	ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+	ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
@@ -2087,7 +2103,7 @@
 	if (NULL == f)
 		goto err_sw_init;
 
-	memcpy(f->macaddr, adapter->hw.mac.addr, ETH_ALEN);
+	ether_addr_copy(f->macaddr, adapter->hw.mac.addr);
 	f->add = true;
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 
@@ -2098,6 +2114,8 @@
 	adapter->watchdog_timer.data = (unsigned long)adapter;
 	mod_timer(&adapter->watchdog_timer, jiffies + 1);
 
+	adapter->tx_desc_count = I40EVF_DEFAULT_TXD;
+	adapter->rx_desc_count = I40EVF_DEFAULT_RXD;
 	err = i40evf_init_interrupt_scheme(adapter);
 	if (err)
 		goto err_sw_init;
@@ -2114,8 +2132,10 @@
 	adapter->vsi.back = adapter;
 	adapter->vsi.base_vector = 1;
 	adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
-	adapter->vsi.rx_itr_setting = I40E_ITR_DYNAMIC;
-	adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
+	adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC |
+				       ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
+	adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC |
+				       ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
 	adapter->vsi.netdev = adapter->netdev;
 
 	if (!adapter->netdev_registered) {
@@ -2128,7 +2148,7 @@
 
 	netif_tx_stop_all_queues(netdev);
 
-	dev_info(&pdev->dev, "MAC address: %pMAC\n", adapter->hw.mac.addr);
+	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
 	if (netdev->features & NETIF_F_GRO)
 		dev_info(&pdev->dev, "GRO is enabled\n");
 
@@ -2152,12 +2172,11 @@
 err:
 	/* Things went into the weeds, so try again later */
 	if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
-		dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n");
+		dev_err(&pdev->dev, "Failed to communicate with PF; giving up\n");
 		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 		return; /* do not reschedule */
 	}
 	schedule_delayed_work(&adapter->init_task, HZ * 3);
-	return;
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index e294f01..2dc0bac 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c

@@ -12,6 +12,9 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
@@ -216,11 +219,9 @@
 	len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) +
 		       (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs);
 	vqci = kzalloc(len, GFP_ATOMIC);
-	if (!vqci) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!vqci)
 		return;
-	}
+
 	vqci->vsi_id = adapter->vsi_res->vsi_id;
 	vqci->num_queue_pairs = pairs;
 	vqpi = vqci->qpair;
@@ -232,6 +233,9 @@
 		vqpi->txq.queue_id = i;
 		vqpi->txq.ring_len = adapter->tx_rings[i]->count;
 		vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma;
+		vqpi->txq.headwb_enabled = 1;
+		vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr +
+		    (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc));
 
 		vqpi->rxq.vsi_id = vqci->vsi_id;
 		vqpi->rxq.queue_id = i;
@@ -329,11 +333,8 @@
 	      (adapter->num_msix_vectors *
 		sizeof(struct i40e_virtchnl_vector_map));
 	vimi = kzalloc(len, GFP_ATOMIC);
-	if (!vimi) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!vimi)
 		return;
-	}
 
 	vimi->num_vectors = adapter->num_msix_vectors;
 	/* Queue vectors first */
@@ -390,7 +391,7 @@
 	len = sizeof(struct i40e_virtchnl_ether_addr_list) +
 	      (count * sizeof(struct i40e_virtchnl_ether_addr));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
-		dev_warn(&adapter->pdev->dev, "%s: Too many MAC address changes in one request.\n",
+		dev_warn(&adapter->pdev->dev, "%s: Too many MAC address changes in one request\n",
 			__func__);
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
@@ -399,16 +400,14 @@
 	}
 
 	veal = kzalloc(len, GFP_ATOMIC);
-	if (!veal) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!veal)
 		return;
-	}
+
 	veal->vsi_id = adapter->vsi_res->vsi_id;
 	veal->num_elements = count;
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		if (f->add) {
-			memcpy(veal->list[i].addr, f->macaddr, ETH_ALEN);
+			ether_addr_copy(veal->list[i].addr, f->macaddr);
 			i++;
 			f->add = false;
 		}
@@ -454,7 +453,7 @@
 	len = sizeof(struct i40e_virtchnl_ether_addr_list) +
 	      (count * sizeof(struct i40e_virtchnl_ether_addr));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
-		dev_warn(&adapter->pdev->dev, "%s: Too many MAC address changes in one request.\n",
+		dev_warn(&adapter->pdev->dev, "%s: Too many MAC address changes in one request\n",
 			__func__);
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
@@ -462,16 +461,14 @@
 		len = I40EVF_MAX_AQ_BUF_SIZE;
 	}
 	veal = kzalloc(len, GFP_ATOMIC);
-	if (!veal) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!veal)
 		return;
-	}
+
 	veal->vsi_id = adapter->vsi_res->vsi_id;
 	veal->num_elements = count;
 	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
 		if (f->remove) {
-			memcpy(veal->list[i].addr, f->macaddr, ETH_ALEN);
+			ether_addr_copy(veal->list[i].addr, f->macaddr);
 			i++;
 			list_del(&f->list);
 			kfree(f);
@@ -518,7 +515,7 @@
 	len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
-		dev_warn(&adapter->pdev->dev, "%s: Too many VLAN changes in one request.\n",
+		dev_warn(&adapter->pdev->dev, "%s: Too many VLAN changes in one request\n",
 			__func__);
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
@@ -526,11 +523,9 @@
 		len = I40EVF_MAX_AQ_BUF_SIZE;
 	}
 	vvfl = kzalloc(len, GFP_ATOMIC);
-	if (!vvfl) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!vvfl)
 		return;
-	}
+
 	vvfl->vsi_id = adapter->vsi_res->vsi_id;
 	vvfl->num_elements = count;
 	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
@@ -580,7 +575,7 @@
 	len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
-		dev_warn(&adapter->pdev->dev, "%s: Too many VLAN changes in one request.\n",
+		dev_warn(&adapter->pdev->dev, "%s: Too many VLAN changes in one request\n",
 			__func__);
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
@@ -588,11 +583,9 @@
 		len = I40EVF_MAX_AQ_BUF_SIZE;
 	}
 	vvfl = kzalloc(len, GFP_ATOMIC);
-	if (!vvfl) {
-		dev_err(&adapter->pdev->dev, "%s: unable to allocate memory\n",
-			__func__);
+	if (!vvfl)
 		return;
-	}
+
 	vvfl->vsi_id = adapter->vsi_res->vsi_id;
 	vvfl->num_elements = count;
 	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
@@ -721,7 +714,7 @@
 		return;
 	}
 	if (v_opcode != adapter->current_op) {
-		dev_err(&adapter->pdev->dev, "%s: Pending op is %d, received %d.\n",
+		dev_err(&adapter->pdev->dev, "%s: Pending op is %d, received %d\n",
 			__func__, adapter->current_op, v_opcode);
 		/* We're probably completely screwed at this point, but clear
 		 * the current op and try to carry on....
@@ -730,7 +723,7 @@
 		return;
 	}
 	if (v_retval) {
-		dev_err(&adapter->pdev->dev, "%s: PF returned error %d to our request %d!\n",
+		dev_err(&adapter->pdev->dev, "%s: PF returned error %d to our request %d\n",
 			__func__, v_retval, v_opcode);
 	}
 	switch (v_opcode) {
@@ -745,9 +738,8 @@
 						 stats->tx_broadcast;
 		adapter->net_stats.rx_bytes = stats->rx_bytes;
 		adapter->net_stats.tx_bytes = stats->tx_bytes;
-		adapter->net_stats.rx_errors = stats->rx_errors;
 		adapter->net_stats.tx_errors = stats->tx_errors;
-		adapter->net_stats.rx_dropped = stats->rx_missed;
+		adapter->net_stats.rx_dropped = stats->rx_discards;
 		adapter->net_stats.tx_dropped = stats->tx_discards;
 		adapter->current_stats = *stats;
 		}
@@ -781,7 +773,7 @@
 		adapter->aq_pending &= ~(I40EVF_FLAG_AQ_MAP_VECTORS);
 		break;
 	default:
-		dev_warn(&adapter->pdev->dev, "%s: Received unexpected message %d from PF.\n",
+		dev_warn(&adapter->pdev->dev, "%s: Received unexpected message %d from PF\n",
 			__func__, v_opcode);
 		break;
 	} /* switch v_opcode */

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index fa36fe1..a2db388 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 /* e1000_82575
  * e1000_82576
@@ -73,9 +70,8 @@
 static s32  igb_update_nvm_checksum_82580(struct e1000_hw *hw);
 static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw);
 static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw);
-static const u16 e1000_82580_rxpbs_table[] =
-	{ 36, 72, 144, 1, 2, 4, 8, 16,
-	  35, 70, 140 };
+static const u16 e1000_82580_rxpbs_table[] = {
+	36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
 
 /**
  *  igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
@@ -159,7 +155,7 @@
 		ret_val = igb_check_for_link_82575(hw);
 	}
 
-	return E1000_SUCCESS;
+	return 0;
 }
 
 /**
@@ -526,7 +522,7 @@
 static s32 igb_get_invariants_82575(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
-	struct e1000_dev_spec_82575 * dev_spec = &hw->dev_spec._82575;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
 	s32 ret_val;
 	u32 ctrl_ext = 0;
 	u32 link_mode = 0;
@@ -1008,7 +1004,6 @@
 static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
 {
 	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = 0;
 	u16 data;
 
 	data = rd32(E1000_82580_PHY_POWER_MGMT);
@@ -1032,7 +1027,7 @@
 			data &= ~E1000_82580_PM_SPD; }
 
 	wr32(E1000_82580_PHY_POWER_MGMT, data);
-	return ret_val;
+	return 0;
 }
 
 /**
@@ -1052,7 +1047,6 @@
 static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
 {
 	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = 0;
 	u16 data;
 
 	data = rd32(E1000_82580_PHY_POWER_MGMT);
@@ -1077,7 +1071,7 @@
 	}
 
 	wr32(E1000_82580_PHY_POWER_MGMT, data);
-	return ret_val;
+	return 0;
 }
 
 /**
@@ -1180,8 +1174,8 @@
 {
 	u32 swfw_sync;
 
-	while (igb_get_hw_semaphore(hw) != 0);
-	/* Empty */
+	while (igb_get_hw_semaphore(hw) != 0)
+		; /* Empty */
 
 	swfw_sync = rd32(E1000_SW_FW_SYNC);
 	swfw_sync &= ~mask;
@@ -1203,7 +1197,6 @@
 static s32 igb_get_cfg_done_82575(struct e1000_hw *hw)
 {
 	s32 timeout = PHY_CFG_TIMEOUT;
-	s32 ret_val = 0;
 	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
 
 	if (hw->bus.func == 1)
@@ -1216,7 +1209,7 @@
 	while (timeout) {
 		if (rd32(E1000_EEMNGCTL) & mask)
 			break;
-		msleep(1);
+		usleep_range(1000, 2000);
 		timeout--;
 	}
 	if (!timeout)
@@ -1227,7 +1220,7 @@
 	    (hw->phy.type == e1000_phy_igp_3))
 		igb_phy_init_script_igp3(hw);
 
-	return ret_val;
+	return 0;
 }
 
 /**
@@ -1269,7 +1262,7 @@
 
 	if (hw->phy.media_type != e1000_media_type_copper) {
 		ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed,
-		                                             &duplex);
+							     &duplex);
 		/* Use this flag to determine if link needs to be checked or
 		 * not.  If  we have link clear the flag so that we do not
 		 * continue to check for link.
@@ -1316,7 +1309,7 @@
 
 	/* flush the write to verify completion */
 	wrfl();
-	msleep(1);
+	usleep_range(1000, 2000);
 }
 
 /**
@@ -1411,7 +1404,7 @@
 
 		/* flush the write to verify completion */
 		wrfl();
-		msleep(1);
+		usleep_range(1000, 2000);
 	}
 }
 
@@ -1436,9 +1429,8 @@
 
 	/* set the completion timeout for interface */
 	ret_val = igb_set_pcie_completion_timeout(hw);
-	if (ret_val) {
+	if (ret_val)
 		hw_dbg("PCI-E Set completion timeout has failed.\n");
-	}
 
 	hw_dbg("Masking off all interrupts\n");
 	wr32(E1000_IMC, 0xffffffff);
@@ -1447,7 +1439,7 @@
 	wr32(E1000_TCTL, E1000_TCTL_PSP);
 	wrfl();
 
-	msleep(10);
+	usleep_range(10000, 20000);
 
 	ctrl = rd32(E1000_CTRL);
 
@@ -1622,7 +1614,7 @@
 {
 	u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
 	bool pcs_autoneg;
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 	u16 data;
 
 	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
@@ -1676,7 +1668,7 @@
 		    hw->mac.type == e1000_82576) {
 			ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
 			if (ret_val) {
-				printk(KERN_DEBUG "NVM Read Error\n\n");
+				hw_dbg(KERN_DEBUG "NVM Read Error\n\n");
 				return ret_val;
 			}
 
@@ -1689,7 +1681,7 @@
 		 * link either autoneg or be forced to 1000/Full
 		 */
 		ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
-		            E1000_CTRL_FD | E1000_CTRL_FRCDPX;
+				E1000_CTRL_FD | E1000_CTRL_FRCDPX;
 
 		/* set speed of 1000/Full if speed/duplex is forced */
 		reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
@@ -1925,7 +1917,7 @@
 	}
 	/* Poll all queues to verify they have shut down */
 	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
-		msleep(1);
+		usleep_range(1000, 2000);
 		rx_enabled = 0;
 		for (i = 0; i < 4; i++)
 			rx_enabled |= rd32(E1000_RXDCTL(i));
@@ -1953,7 +1945,7 @@
 	wr32(E1000_RCTL, temp_rctl);
 	wr32(E1000_RCTL, temp_rctl | E1000_RCTL_EN);
 	wrfl();
-	msleep(2);
+	usleep_range(2000, 3000);
 
 	/* Enable RX queues that were previously enabled and restore our
 	 * previous state
@@ -2005,14 +1997,14 @@
 	 * 16ms to 55ms
 	 */
 	ret_val = igb_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-	                                &pcie_devctl2);
+					&pcie_devctl2);
 	if (ret_val)
 		goto out;
 
 	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
 
 	ret_val = igb_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-	                                 &pcie_devctl2);
+					 &pcie_devctl2);
 out:
 	/* disable completion timeout resend */
 	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
@@ -2241,7 +2233,7 @@
 	wr32(E1000_TCTL, E1000_TCTL_PSP);
 	wrfl();
 
-	msleep(10);
+	usleep_range(10000, 11000);
 
 	/* Determine whether or not a global dev reset is requested */
 	if (global_device_reset &&
@@ -2259,7 +2251,7 @@
 
 	/* Add delay to insure DEV_RST has time to complete */
 	if (global_device_reset)
-		msleep(5);
+		usleep_range(5000, 6000);
 
 	ret_val = igb_get_auto_rd_done(hw);
 	if (ret_val) {
@@ -2436,8 +2428,7 @@
 
 	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
 	if (ret_val) {
-		hw_dbg("NVM Read Error while updating checksum"
-			" compatibility bit.\n");
+		hw_dbg("NVM Read Error while updating checksum compatibility bit.\n");
 		goto out;
 	}
 
@@ -2447,8 +2438,7 @@
 		ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
 					&nvm_data);
 		if (ret_val) {
-			hw_dbg("NVM Write Error while updating checksum"
-				" compatibility bit.\n");
+			hw_dbg("NVM Write Error while updating checksum compatibility bit.\n");
 			goto out;
 		}
 	}
@@ -2525,7 +2515,7 @@
 static s32 __igb_access_emi_reg(struct e1000_hw *hw, u16 address,
 				  u16 *data, bool read)
 {
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 
 	ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
 	if (ret_val)
@@ -2559,7 +2549,6 @@
  **/
 s32 igb_set_eee_i350(struct e1000_hw *hw)
 {
-	s32 ret_val = 0;
 	u32 ipcnfg, eeer;
 
 	if ((hw->mac.type < e1000_i350) ||
@@ -2593,7 +2582,7 @@
 	rd32(E1000_EEER);
 out:
 
-	return ret_val;
+	return 0;
 }
 
 /**
@@ -2720,7 +2709,6 @@
  **/
 static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
 {
-	s32 status = E1000_SUCCESS;
 	u16 ets_offset;
 	u16 ets_cfg;
 	u16 ets_sensor;
@@ -2738,7 +2726,7 @@
 	/* Return the internal sensor only if ETS is unsupported */
 	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
 	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
-		return status;
+		return 0;
 
 	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
 	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
@@ -2762,7 +2750,7 @@
 					E1000_I2C_THERMAL_SENSOR_ADDR,
 					&data->sensor[i].temp);
 	}
-	return status;
+	return 0;
 }
 
 /**
@@ -2774,7 +2762,6 @@
  **/
 static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
 {
-	s32 status = E1000_SUCCESS;
 	u16 ets_offset;
 	u16 ets_cfg;
 	u16 ets_sensor;
@@ -2800,7 +2787,7 @@
 	/* Return the internal sensor only if ETS is unsupported */
 	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
 	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
-		return status;
+		return 0;
 
 	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
 	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
@@ -2831,7 +2818,7 @@
 							low_thresh_delta;
 		}
 	}
-	return status;
+	return 0;
 }
 
 #endif

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index 09d78be..b407c55 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_82575_H_
 #define _E1000_82575_H_
@@ -37,9 +34,9 @@
 		       u8 data);
 
 #define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \
-                                     (ID_LED_DEF1_DEF2 <<  8) | \
-                                     (ID_LED_DEF1_DEF2 <<  4) | \
-                                     (ID_LED_OFF1_ON2))
+				     (ID_LED_DEF1_DEF2 <<  8) | \
+				     (ID_LED_DEF1_DEF2 <<  4) | \
+				     (ID_LED_OFF1_ON2))
 
 #define E1000_RAR_ENTRIES_82575        16
 #define E1000_RAR_ENTRIES_82576        24
@@ -67,16 +64,16 @@
 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX    0x01000000
 
 #define E1000_EICR_TX_QUEUE ( \
-    E1000_EICR_TX_QUEUE0 |    \
-    E1000_EICR_TX_QUEUE1 |    \
-    E1000_EICR_TX_QUEUE2 |    \
-    E1000_EICR_TX_QUEUE3)
+	E1000_EICR_TX_QUEUE0 |    \
+	E1000_EICR_TX_QUEUE1 |    \
+	E1000_EICR_TX_QUEUE2 |    \
+	E1000_EICR_TX_QUEUE3)
 
 #define E1000_EICR_RX_QUEUE ( \
-    E1000_EICR_RX_QUEUE0 |    \
-    E1000_EICR_RX_QUEUE1 |    \
-    E1000_EICR_RX_QUEUE2 |    \
-    E1000_EICR_RX_QUEUE3)
+	E1000_EICR_RX_QUEUE0 |    \
+	E1000_EICR_RX_QUEUE1 |    \
+	E1000_EICR_RX_QUEUE2 |    \
+	E1000_EICR_RX_QUEUE3)
 
 /* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
 #define E1000_IMIREXT_SIZE_BP     0x00001000  /* Packet size bypass */
@@ -92,8 +89,7 @@
 		struct {
 			struct {
 				__le16 pkt_info;   /* RSS type, Packet type */
-				__le16 hdr_info;   /* Split Header,
-						    * header buffer length */
+				__le16 hdr_info;   /* Split Head, buf len */
 			} lo_dword;
 			union {
 				__le32 rss;          /* RSS Hash */

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index b05bf92..2a8bb35 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_DEFINES_H_
 #define _E1000_DEFINES_H_
@@ -101,11 +98,11 @@
 
 /* Same mask, but for extended and packet split descriptors */
 #define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
-    E1000_RXDEXT_STATERR_CE  |            \
-    E1000_RXDEXT_STATERR_SE  |            \
-    E1000_RXDEXT_STATERR_SEQ |            \
-    E1000_RXDEXT_STATERR_CXE |            \
-    E1000_RXDEXT_STATERR_RXE)
+	E1000_RXDEXT_STATERR_CE  |            \
+	E1000_RXDEXT_STATERR_SE  |            \
+	E1000_RXDEXT_STATERR_SEQ |            \
+	E1000_RXDEXT_STATERR_CXE |            \
+	E1000_RXDEXT_STATERR_RXE)
 
 #define E1000_MRQC_RSS_FIELD_IPV4_TCP          0x00010000
 #define E1000_MRQC_RSS_FIELD_IPV4              0x00020000
@@ -307,39 +304,34 @@
 #define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
 
 /* DMA Coalescing register fields */
-#define E1000_DMACR_DMACWT_MASK         0x00003FFF /* DMA Coalescing
-							* Watchdog Timer */
-#define E1000_DMACR_DMACTHR_MASK        0x00FF0000 /* DMA Coalescing Receive
-							* Threshold */
+#define E1000_DMACR_DMACWT_MASK         0x00003FFF /* DMA Coal Watchdog Timer */
+#define E1000_DMACR_DMACTHR_MASK        0x00FF0000 /* DMA Coal Rx Threshold */
 #define E1000_DMACR_DMACTHR_SHIFT       16
-#define E1000_DMACR_DMAC_LX_MASK        0x30000000 /* Lx when no PCIe
-							* transactions */
+#define E1000_DMACR_DMAC_LX_MASK        0x30000000 /* Lx when no PCIe trans */
 #define E1000_DMACR_DMAC_LX_SHIFT       28
 #define E1000_DMACR_DMAC_EN             0x80000000 /* Enable DMA Coalescing */
 /* DMA Coalescing BMC-to-OS Watchdog Enable */
 #define E1000_DMACR_DC_BMC2OSW_EN	0x00008000
 
-#define E1000_DMCTXTH_DMCTTHR_MASK      0x00000FFF /* DMA Coalescing Transmit
-							* Threshold */
+#define E1000_DMCTXTH_DMCTTHR_MASK      0x00000FFF /* DMA Coal Tx Threshold */
 
 #define E1000_DMCTLX_TTLX_MASK          0x00000FFF /* Time to LX request */
 
-#define E1000_DMCRTRH_UTRESH_MASK       0x0007FFFF /* Receive Traffic Rate
-							* Threshold */
-#define E1000_DMCRTRH_LRPRCW            0x80000000 /* Rcv packet rate in
-							* current window */
+#define E1000_DMCRTRH_UTRESH_MASK       0x0007FFFF /* Rx Traffic Rate Thresh */
+#define E1000_DMCRTRH_LRPRCW            0x80000000 /* Rx pkt rate curr window */
 
-#define E1000_DMCCNT_CCOUNT_MASK        0x01FFFFFF /* DMA Coal Rcv Traffic
-							* Current Cnt */
+#define E1000_DMCCNT_CCOUNT_MASK        0x01FFFFFF /* DMA Coal Rx Current Cnt */
 
-#define E1000_FCRTC_RTH_COAL_MASK       0x0003FFF0 /* Flow ctrl Rcv Threshold
-							* High val */
+#define E1000_FCRTC_RTH_COAL_MASK       0x0003FFF0 /* FC Rx Thresh High val */
 #define E1000_FCRTC_RTH_COAL_SHIFT      4
 #define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision */
 
 /* Timestamp in Rx buffer */
 #define E1000_RXPBS_CFG_TS_EN           0x80000000
 
+#define I210_RXPBSIZE_DEFAULT		0x000000A2 /* RXPBSIZE default */
+#define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
+
 /* SerDes Control */
 #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
 
@@ -406,12 +398,12 @@
  *   o LSC    = Link Status Change
  */
 #define IMS_ENABLE_MASK ( \
-    E1000_IMS_RXT0   |    \
-    E1000_IMS_TXDW   |    \
-    E1000_IMS_RXDMT0 |    \
-    E1000_IMS_RXSEQ  |    \
-    E1000_IMS_LSC    |    \
-    E1000_IMS_DOUTSYNC)
+	E1000_IMS_RXT0   |    \
+	E1000_IMS_TXDW   |    \
+	E1000_IMS_RXDMT0 |    \
+	E1000_IMS_RXSEQ  |    \
+	E1000_IMS_LSC    |    \
+	E1000_IMS_DOUTSYNC)
 
 /* Interrupt Mask Set */
 #define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
@@ -467,7 +459,6 @@
 #define E1000_RAH_POOL_1 0x00040000
 
 /* Error Codes */
-#define E1000_SUCCESS      0
 #define E1000_ERR_NVM      1
 #define E1000_ERR_PHY      2
 #define E1000_ERR_CONFIG   3
@@ -1011,8 +1002,7 @@
 #define E1000_VFTA_ENTRY_BIT_SHIFT_MASK      0x1F
 
 /* DMA Coalescing register fields */
-#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision based
-                                                      on DMA coal */
+#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power on DMA coal */
 
 /* Tx Rate-Scheduler Config fields */
 #define E1000_RTTBCNRC_RS_ENA		0x80000000

diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 10741d1..89925e4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h

@@ -1,28 +1,24 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_HW_H_
 #define _E1000_HW_H_
@@ -320,15 +316,15 @@
 #include "e1000_mbx.h"
 
 struct e1000_mac_operations {
-	s32  (*check_for_link)(struct e1000_hw *);
-	s32  (*reset_hw)(struct e1000_hw *);
-	s32  (*init_hw)(struct e1000_hw *);
+	s32 (*check_for_link)(struct e1000_hw *);
+	s32 (*reset_hw)(struct e1000_hw *);
+	s32 (*init_hw)(struct e1000_hw *);
 	bool (*check_mng_mode)(struct e1000_hw *);
-	s32  (*setup_physical_interface)(struct e1000_hw *);
+	s32 (*setup_physical_interface)(struct e1000_hw *);
 	void (*rar_set)(struct e1000_hw *, u8 *, u32);
-	s32  (*read_mac_addr)(struct e1000_hw *);
-	s32  (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *);
-	s32  (*acquire_swfw_sync)(struct e1000_hw *, u16);
+	s32 (*read_mac_addr)(struct e1000_hw *);
+	s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *);
+	s32 (*acquire_swfw_sync)(struct e1000_hw *, u16);
 	void (*release_swfw_sync)(struct e1000_hw *, u16);
 #ifdef CONFIG_IGB_HWMON
 	s32 (*get_thermal_sensor_data)(struct e1000_hw *);
@@ -338,31 +334,31 @@
 };
 
 struct e1000_phy_operations {
-	s32  (*acquire)(struct e1000_hw *);
-	s32  (*check_polarity)(struct e1000_hw *);
-	s32  (*check_reset_block)(struct e1000_hw *);
-	s32  (*force_speed_duplex)(struct e1000_hw *);
-	s32  (*get_cfg_done)(struct e1000_hw *hw);
-	s32  (*get_cable_length)(struct e1000_hw *);
-	s32  (*get_phy_info)(struct e1000_hw *);
-	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
+	s32 (*acquire)(struct e1000_hw *);
+	s32 (*check_polarity)(struct e1000_hw *);
+	s32 (*check_reset_block)(struct e1000_hw *);
+	s32 (*force_speed_duplex)(struct e1000_hw *);
+	s32 (*get_cfg_done)(struct e1000_hw *hw);
+	s32 (*get_cable_length)(struct e1000_hw *);
+	s32 (*get_phy_info)(struct e1000_hw *);
+	s32 (*read_reg)(struct e1000_hw *, u32, u16 *);
 	void (*release)(struct e1000_hw *);
-	s32  (*reset)(struct e1000_hw *);
-	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
-	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
-	s32  (*write_reg)(struct e1000_hw *, u32, u16);
+	s32 (*reset)(struct e1000_hw *);
+	s32 (*set_d0_lplu_state)(struct e1000_hw *, bool);
+	s32 (*set_d3_lplu_state)(struct e1000_hw *, bool);
+	s32 (*write_reg)(struct e1000_hw *, u32, u16);
 	s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
 	s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
 };
 
 struct e1000_nvm_operations {
-	s32  (*acquire)(struct e1000_hw *);
-	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
+	s32 (*acquire)(struct e1000_hw *);
+	s32 (*read)(struct e1000_hw *, u16, u16, u16 *);
 	void (*release)(struct e1000_hw *);
-	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
-	s32  (*update)(struct e1000_hw *);
-	s32  (*validate)(struct e1000_hw *);
-	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
+	s32 (*write)(struct e1000_hw *, u16, u16, u16 *);
+	s32 (*update)(struct e1000_hw *);
+	s32 (*validate)(struct e1000_hw *);
+	s32 (*valid_led_default)(struct e1000_hw *, u16 *);
 };
 
 #define E1000_MAX_SENSORS		3

diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index f67f8a1..337161f 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 /* e1000_i210
  * e1000_i211
@@ -100,7 +97,7 @@
 		return -E1000_ERR_NVM;
 	}
 
-	return E1000_SUCCESS;
+	return 0;
 }
 
 /**
@@ -142,7 +139,7 @@
 	u32 swfw_sync;
 	u32 swmask = mask;
 	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
 
 	while (i < timeout) {
@@ -187,7 +184,7 @@
 {
 	u32 swfw_sync;
 
-	while (igb_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
+	while (igb_get_hw_semaphore_i210(hw))
 		; /* Empty */
 
 	swfw_sync = rd32(E1000_SW_FW_SYNC);
@@ -210,7 +207,7 @@
 static s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
 				  u16 *data)
 {
-	s32 status = E1000_SUCCESS;
+	s32 status = 0;
 	u16 i, count;
 
 	/* We cannot hold synchronization semaphores for too long,
@@ -220,7 +217,7 @@
 	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
 		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
 			E1000_EERD_EEWR_MAX_COUNT : (words - i);
-		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+		if (!(hw->nvm.ops.acquire(hw))) {
 			status = igb_read_nvm_eerd(hw, offset, count,
 						     data + i);
 			hw->nvm.ops.release(hw);
@@ -228,7 +225,7 @@
 			status = E1000_ERR_SWFW_SYNC;
 		}
 
-		if (status != E1000_SUCCESS)
+		if (status)
 			break;
 	}
 
@@ -253,7 +250,7 @@
 	struct e1000_nvm_info *nvm = &hw->nvm;
 	u32 i, k, eewr = 0;
 	u32 attempts = 100000;
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 
 	/* A check for invalid values:  offset too large, too many words,
 	 * too many words for the offset, and not enough words.
@@ -275,13 +272,13 @@
 		for (k = 0; k < attempts; k++) {
 			if (E1000_NVM_RW_REG_DONE &
 			    rd32(E1000_SRWR)) {
-				ret_val = E1000_SUCCESS;
+				ret_val = 0;
 				break;
 			}
 			udelay(5);
 	}
 
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			hw_dbg("Shadow RAM write EEWR timed out\n");
 			break;
 		}
@@ -310,7 +307,7 @@
 static s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
 				   u16 *data)
 {
-	s32 status = E1000_SUCCESS;
+	s32 status = 0;
 	u16 i, count;
 
 	/* We cannot hold synchronization semaphores for too long,
@@ -320,7 +317,7 @@
 	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
 		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
 			E1000_EERD_EEWR_MAX_COUNT : (words - i);
-		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+		if (!(hw->nvm.ops.acquire(hw))) {
 			status = igb_write_nvm_srwr(hw, offset, count,
 						      data + i);
 			hw->nvm.ops.release(hw);
@@ -328,7 +325,7 @@
 			status = E1000_ERR_SWFW_SYNC;
 		}
 
-		if (status != E1000_SUCCESS)
+		if (status)
 			break;
 	}
 
@@ -367,12 +364,12 @@
 				*data = INVM_DWORD_TO_WORD_DATA(invm_dword);
 				hw_dbg("Read INVM Word 0x%02x = %x\n",
 					  address, *data);
-				status = E1000_SUCCESS;
+				status = 0;
 				break;
 			}
 		}
 	}
-	if (status != E1000_SUCCESS)
+	if (status)
 		hw_dbg("Requested word 0x%02x not found in OTP\n", address);
 	return status;
 }
@@ -388,7 +385,7 @@
 static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset,
 				u16 words __always_unused, u16 *data)
 {
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 
 	/* Only the MAC addr is required to be present in the iNVM */
 	switch (offset) {
@@ -398,43 +395,44 @@
 						     &data[1]);
 		ret_val |= igb_read_invm_word_i210(hw, (u8)offset+2,
 						     &data[2]);
-		if (ret_val != E1000_SUCCESS)
+		if (ret_val)
 			hw_dbg("MAC Addr not found in iNVM\n");
 		break;
 	case NVM_INIT_CTRL_2:
 		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			*data = NVM_INIT_CTRL_2_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 		}
 		break;
 	case NVM_INIT_CTRL_4:
 		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			*data = NVM_INIT_CTRL_4_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 		}
 		break;
 	case NVM_LED_1_CFG:
 		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			*data = NVM_LED_1_CFG_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 		}
 		break;
 	case NVM_LED_0_2_CFG:
 		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			*data = NVM_LED_0_2_CFG_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 		}
 		break;
 	case NVM_ID_LED_SETTINGS:
 		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			*data = ID_LED_RESERVED_FFFF;
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 		}
+		break;
 	case NVM_SUB_DEV_ID:
 		*data = hw->subsystem_device_id;
 		break;
@@ -488,14 +486,14 @@
 		/* Check if we have first version location used */
 		if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
 			version = 0;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 		/* Check if we have second version location used */
 		else if ((i == 1) &&
 			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
 			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 		/* Check if we have odd version location
@@ -506,7 +504,7 @@
 			 (i != 1))) {
 			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
 				  >> 13;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 		/* Check if we have even version location
@@ -515,12 +513,12 @@
 		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
 			 ((*record & 0x3) == 0)) {
 			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 	}
 
-	if (status == E1000_SUCCESS) {
+	if (!status) {
 		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
 					>> E1000_INVM_MAJOR_SHIFT;
 		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
@@ -533,7 +531,7 @@
 		/* Check if we have image type in first location used */
 		if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
 			invm_ver->invm_img_type = 0;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 		/* Check if we have image type in first location used */
@@ -542,7 +540,7 @@
 			 ((((*record & 0x3) != 0) && (i != 1)))) {
 			invm_ver->invm_img_type =
 				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
-			status = E1000_SUCCESS;
+			status = 0;
 			break;
 		}
 	}
@@ -558,10 +556,10 @@
  **/
 static s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw)
 {
-	s32 status = E1000_SUCCESS;
+	s32 status = 0;
 	s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
 
-	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+	if (!(hw->nvm.ops.acquire(hw))) {
 
 		/* Replace the read function with semaphore grabbing with
 		 * the one that skips this for a while.
@@ -593,7 +591,7 @@
  **/
 static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw)
 {
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 	u16 checksum = 0;
 	u16 i, nvm_data;
 
@@ -602,12 +600,12 @@
 	 * EEPROM read fails
 	 */
 	ret_val = igb_read_nvm_eerd(hw, 0, 1, &nvm_data);
-	if (ret_val != E1000_SUCCESS) {
+	if (ret_val) {
 		hw_dbg("EEPROM read failed\n");
 		goto out;
 	}
 
-	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+	if (!(hw->nvm.ops.acquire(hw))) {
 		/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
 		 * because we do not want to take the synchronization
 		 * semaphores twice here.
@@ -625,7 +623,7 @@
 		checksum = (u16) NVM_SUM - checksum;
 		ret_val = igb_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
 						&checksum);
-		if (ret_val != E1000_SUCCESS) {
+		if (ret_val) {
 			hw->nvm.ops.release(hw);
 			hw_dbg("NVM Write Error while updating checksum.\n");
 			goto out;
@@ -654,7 +652,7 @@
 	for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
 		reg = rd32(E1000_EECD);
 		if (reg & E1000_EECD_FLUDONE_I210) {
-			ret_val = E1000_SUCCESS;
+			ret_val = 0;
 			break;
 		}
 		udelay(5);
@@ -687,7 +685,7 @@
  **/
 static s32 igb_update_flash_i210(struct e1000_hw *hw)
 {
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 	u32 flup;
 
 	ret_val = igb_pool_flash_update_done_i210(hw);
@@ -700,7 +698,7 @@
 	wr32(E1000_EECD, flup);
 
 	ret_val = igb_pool_flash_update_done_i210(hw);
-	if (ret_val == E1000_SUCCESS)
+	if (ret_val)
 		hw_dbg("Flash update complete\n");
 	else
 		hw_dbg("Flash update time out\n");
@@ -753,7 +751,7 @@
 static s32 __igb_access_xmdio_reg(struct e1000_hw *hw, u16 address,
 				  u8 dev_addr, u16 *data, bool read)
 {
-	s32 ret_val = E1000_SUCCESS;
+	s32 ret_val = 0;
 
 	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
 	if (ret_val)

diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index 907fe99..9f34976 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_I210_H_
 #define _E1000_I210_H_

diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 1e0c404..2a88595 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
@@ -442,7 +439,7 @@
  *  The caller must have a packed mc_addr_list of multicast addresses.
  **/
 void igb_update_mc_addr_list(struct e1000_hw *hw,
-                             u8 *mc_addr_list, u32 mc_addr_count)
+			     u8 *mc_addr_list, u32 mc_addr_count)
 {
 	u32 hash_value, hash_bit, hash_reg;
 	int i;
@@ -866,8 +863,7 @@
 			goto out;
 
 		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
-			hw_dbg("Copper PHY and Auto Neg "
-				 "has not completed.\n");
+			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
 			goto out;
 		}
 
@@ -1265,7 +1261,7 @@
 	while (i < AUTO_READ_DONE_TIMEOUT) {
 		if (rd32(E1000_EECD) & E1000_EECD_AUTO_RD)
 			break;
-		msleep(1);
+		usleep_range(1000, 2000);
 		i++;
 	}
 
@@ -1298,7 +1294,7 @@
 	}
 
 	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
-		switch(hw->phy.media_type) {
+		switch (hw->phy.media_type) {
 		case e1000_media_type_internal_serdes:
 			*data = ID_LED_DEFAULT_82575_SERDES;
 			break;

diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
index 99299ba..ea24961b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_MAC_H_
 #define _E1000_MAC_H_

diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index d5b1217..162cc49 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #include "e1000_mbx.h"
 

diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
index f52f551..d20af6b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_MBX_H_
 #define _E1000_MBX_H_

diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index 9abf829..e8280d0 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c

@@ -1,28 +1,24 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
@@ -480,6 +476,7 @@
 		/* Loop to allow for up to whole page write of eeprom */
 		while (widx < words) {
 			u16 word_out = data[widx];
+
 			word_out = (word_out >> 8) | (word_out << 8);
 			igb_shift_out_eec_bits(hw, word_out, 16);
 			widx++;
@@ -801,5 +798,4 @@
 		fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
 			| eeprom_verl;
 	}
-	return;
 }

diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
index 5b10117..febc9cd 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.h
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_NVM_H_
 #define _E1000_NVM_H_
@@ -32,7 +29,7 @@
 s32  igb_read_mac_addr(struct e1000_hw *hw);
 s32  igb_read_part_num(struct e1000_hw *hw, u32 *part_num);
 s32  igb_read_part_string(struct e1000_hw *hw, u8 *part_num,
-                          u32 part_num_size);
+			  u32 part_num_size);
 s32  igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
 s32  igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
 s32  igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);

diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 4009bba..c1bb64d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
@@ -924,8 +921,7 @@
 	if (phy->autoneg_wait_to_complete) {
 		ret_val = igb_wait_autoneg(hw);
 		if (ret_val) {
-			hw_dbg("Error while waiting for "
-			       "autoneg to complete\n");
+			hw_dbg("Error while waiting for autoneg to complete\n");
 			goto out;
 		}
 	}
@@ -2208,16 +2204,10 @@
 void igb_power_up_phy_copper(struct e1000_hw *hw)
 {
 	u16 mii_reg = 0;
-	u16 power_reg = 0;
 
 	/* The PHY will retain its settings across a power down/up cycle */
 	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
 	mii_reg &= ~MII_CR_POWER_DOWN;
-	if (hw->phy.type == e1000_phy_i210) {
-		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
-		power_reg &= ~GS40G_CS_POWER_DOWN;
-		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
-	}
 	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
 }
 
@@ -2231,20 +2221,12 @@
 void igb_power_down_phy_copper(struct e1000_hw *hw)
 {
 	u16 mii_reg = 0;
-	u16 power_reg = 0;
 
 	/* The PHY will retain its settings across a power down/up cycle */
 	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
 	mii_reg |= MII_CR_POWER_DOWN;
-
-	/* i210 Phy requires an additional bit for power up/down */
-	if (hw->phy.type == e1000_phy_i210) {
-		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
-		power_reg |= GS40G_CS_POWER_DOWN;
-		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
-	}
 	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
-	msleep(1);
+	usleep_range(1000, 2000);
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 4c2c36c..7af4ffa 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_PHY_H_
 #define _E1000_PHY_H_
@@ -154,7 +151,6 @@
 #define GS40G_MAC_LB			0x4140
 #define GS40G_MAC_SPEED_1G		0X0006
 #define GS40G_COPPER_SPEC		0x0010
-#define GS40G_CS_POWER_DOWN		0x0002
 #define GS40G_LINE_LB			0x4000
 
 /* SFP modules ID memory locations */

diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index bdb246e..1cc4b1a7 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #ifndef _E1000_REGS_H_
 #define _E1000_REGS_H_
@@ -195,6 +192,10 @@
 				    : (0x0E038 + ((_n) * 0x40)))
 #define E1000_TDWBAH(_n)  ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) \
 				    : (0x0E03C + ((_n) * 0x40)))
+
+#define E1000_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
+#define E1000_TXPBS	0x03404  /* Tx Packet Buffer Size - RW */
+
 #define E1000_TDFH     0x03410  /* TX Data FIFO Head - RW */
 #define E1000_TDFT     0x03418  /* TX Data FIFO Tail - RW */
 #define E1000_TDFHS    0x03420  /* TX Data FIFO Head Saved - RW */
@@ -301,9 +302,9 @@
 #define E1000_RA2      0x054E0  /* 2nd half of Rx address array - RW Array */
 #define E1000_PSRTYPE(_i)       (0x05480 + ((_i) * 4))
 #define E1000_RAL(_i)  (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
-                                       (0x054E0 + ((_i - 16) * 8)))
+					(0x054E0 + ((_i - 16) * 8)))
 #define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
-                                       (0x054E4 + ((_i - 16) * 8)))
+					(0x054E4 + ((_i - 16) * 8)))
 #define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
 #define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
 #define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
@@ -358,8 +359,7 @@
 #define E1000_VMBMEM(_n)       (0x00800 + (64 * (_n)))
 #define E1000_VMOLR(_n)        (0x05AD0 + (4 * (_n)))
 #define E1000_DVMOLR(_n)       (0x0C038 + (64 * (_n)))
-#define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN Virtual Machine
-                                                       * Filter - RW */
+#define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN VM Filter */
 #define E1000_VMVIR(_n)        (0x03700 + (4 * (_n)))
 
 struct e1000_hw;

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 2713006..06102d1 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h

@@ -1,29 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 /* Linux PRO/1000 Ethernet Driver main header file */
 
@@ -198,6 +194,7 @@
 	unsigned int bytecount;
 	u16 gso_segs;
 	__be16 protocol;
+
 	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
 	u32 tx_flags;

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index e5570ac..c737d1f 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 /* ethtool support for igb */
 
@@ -144,6 +141,7 @@
 	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
 	struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags;
 	u32 status;
+	u32 speed;
 
 	status = rd32(E1000_STATUS);
 	if (hw->phy.media_type == e1000_media_type_copper) {
@@ -218,13 +216,13 @@
 	if (status & E1000_STATUS_LU) {
 		if ((status & E1000_STATUS_2P5_SKU) &&
 		    !(status & E1000_STATUS_2P5_SKU_OVER)) {
-			ecmd->speed = SPEED_2500;
+			speed = SPEED_2500;
 		} else if (status & E1000_STATUS_SPEED_1000) {
-			ecmd->speed = SPEED_1000;
+			speed = SPEED_1000;
 		} else if (status & E1000_STATUS_SPEED_100) {
-			ecmd->speed = SPEED_100;
+			speed = SPEED_100;
 		} else {
-			ecmd->speed = SPEED_10;
+			speed = SPEED_10;
 		}
 		if ((status & E1000_STATUS_FD) ||
 		    hw->phy.media_type != e1000_media_type_copper)
@@ -232,9 +230,10 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ecmd->speed = -1;
-		ecmd->duplex = -1;
+		speed = SPEED_UNKNOWN;
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
+	ethtool_cmd_speed_set(ecmd, speed);
 	if ((hw->phy.media_type == e1000_media_type_fiber) ||
 	    hw->mac.autoneg)
 		ecmd->autoneg = AUTONEG_ENABLE;
@@ -286,7 +285,7 @@
 	}
 
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		msleep(1);
+		usleep_range(1000, 2000);
 
 	if (ecmd->autoneg == AUTONEG_ENABLE) {
 		hw->mac.autoneg = 1;
@@ -399,7 +398,7 @@
 	adapter->fc_autoneg = pause->autoneg;
 
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		msleep(1);
+		usleep_range(1000, 2000);
 
 	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
 		hw->fc.requested_mode = e1000_fc_default;
@@ -886,7 +885,7 @@
 	}
 
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		msleep(1);
+		usleep_range(1000, 2000);
 
 	if (!netif_running(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
@@ -1060,8 +1059,8 @@
 	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
 	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
 	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
 						0xFFFFFFFF, 0xFFFFFFFF },
@@ -1103,8 +1102,8 @@
 	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
 	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
 	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
 						0xFFFFFFFF, 0xFFFFFFFF },
@@ -1132,8 +1131,10 @@
 	{ E1000_RDBAH(4),  0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ E1000_RDLEN(4),  0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
 	/* Enable all RX queues before testing. */
-	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
-	{ E1000_RXDCTL(4), 0x40, 12,  WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
+	{ E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
 	/* RDH is read-only for 82576, only test RDT. */
 	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_RDT(4),	   0x40, 12,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
@@ -1149,14 +1150,14 @@
 	{ E1000_TDBAH(4),  0x40, 12,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ E1000_TDLEN(4),  0x40, 12,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
 	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
 	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
 	{ E1000_RA,	   0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ E1000_RA,	   0, 16, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF },
 	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	   0, 128,TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ 0, 0, 0, 0 }
 };
 
@@ -1170,7 +1171,8 @@
 	{ E1000_RDBAH(0),  0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
 	{ E1000_RDLEN(0),  0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
 	/* Enable all four RX queues before testing. */
-	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
 	/* RDH is read-only for 82575, only test RDT. */
 	{ E1000_RDT(0),    0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
 	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 },
@@ -1196,8 +1198,8 @@
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 pat, val;
-	static const u32 _test[] =
-		{0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+	static const u32 _test[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
 	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {
 		wr32(reg, (_test[pat] & write));
 		val = rd32(reg) & mask;
@@ -1206,11 +1208,11 @@
 				"pattern test reg %04X failed: got 0x%08X expected 0x%08X\n",
 				reg, val, (_test[pat] & write & mask));
 			*data = reg;
-			return 1;
+			return true;
 		}
 	}
 
-	return 0;
+	return false;
 }
 
 static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data,
@@ -1218,17 +1220,18 @@
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 val;
+
 	wr32(reg, write & mask);
 	val = rd32(reg);
 	if ((write & mask) != (val & mask)) {
 		dev_err(&adapter->pdev->dev,
-			"set/check reg %04X test failed: got 0x%08X expected 0x%08X\n", reg,
-			(val & mask), (write & mask));
+			"set/check reg %04X test failed: got 0x%08X expected 0x%08X\n",
+			reg, (val & mask), (write & mask));
 		*data = reg;
-		return 1;
+		return true;
 	}
 
-	return 0;
+	return false;
 }
 
 #define REG_PATTERN_TEST(reg, mask, write) \
@@ -1387,14 +1390,14 @@
 	/* Hook up test interrupt handler just for this test */
 	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
 		if (request_irq(adapter->msix_entries[0].vector,
-		                igb_test_intr, 0, netdev->name, adapter)) {
+				igb_test_intr, 0, netdev->name, adapter)) {
 			*data = 1;
 			return -1;
 		}
 	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
 		shared_int = false;
 		if (request_irq(irq,
-		                igb_test_intr, 0, netdev->name, adapter)) {
+				igb_test_intr, 0, netdev->name, adapter)) {
 			*data = 1;
 			return -1;
 		}
@@ -1412,7 +1415,7 @@
 	/* Disable all the interrupts */
 	wr32(E1000_IMC, ~0);
 	wrfl();
-	msleep(10);
+	usleep_range(10000, 11000);
 
 	/* Define all writable bits for ICS */
 	switch (hw->mac.type) {
@@ -1459,7 +1462,7 @@
 			wr32(E1000_IMC, mask);
 			wr32(E1000_ICS, mask);
 			wrfl();
-			msleep(10);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr & mask) {
 				*data = 3;
@@ -1481,7 +1484,7 @@
 		wr32(E1000_IMS, mask);
 		wr32(E1000_ICS, mask);
 		wrfl();
-		msleep(10);
+		usleep_range(10000, 11000);
 
 		if (!(adapter->test_icr & mask)) {
 			*data = 4;
@@ -1503,7 +1506,7 @@
 			wr32(E1000_IMC, ~mask);
 			wr32(E1000_ICS, ~mask);
 			wrfl();
-			msleep(10);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr & mask) {
 				*data = 5;
@@ -1515,7 +1518,7 @@
 	/* Disable all the interrupts */
 	wr32(E1000_IMC, ~0);
 	wrfl();
-	msleep(10);
+	usleep_range(10000, 11000);
 
 	/* Unhook test interrupt handler */
 	if (adapter->flags & IGB_FLAG_HAS_MSIX)
@@ -1664,8 +1667,8 @@
 		(hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
 		(hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
 		(hw->device_id == E1000_DEV_ID_DH89XXCC_SFP) ||
-		(hw->device_id == E1000_DEV_ID_I354_SGMII)) {
-
+		(hw->device_id == E1000_DEV_ID_I354_SGMII) ||
+		(hw->device_id == E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)) {
 			/* Enable DH89xxCC MPHY for near end loopback */
 			reg = rd32(E1000_MPHY_ADDR_CTL);
 			reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
@@ -1949,6 +1952,7 @@
 	*data = 0;
 	if (hw->phy.media_type == e1000_media_type_internal_serdes) {
 		int i = 0;
+
 		hw->mac.serdes_has_link = false;
 
 		/* On some blade server designs, link establishment
@@ -2413,9 +2417,11 @@
 	switch (cmd->flow_type) {
 	case TCP_V4_FLOW:
 		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
 	case UDP_V4_FLOW:
 		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
 			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
 	case SCTP_V4_FLOW:
 	case AH_ESP_V4_FLOW:
 	case AH_V4_FLOW:
@@ -2425,9 +2431,11 @@
 		break;
 	case TCP_V6_FLOW:
 		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
 	case UDP_V6_FLOW:
 		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
 			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
 	case SCTP_V6_FLOW:
 	case AH_ESP_V6_FLOW:
 	case AH_V6_FLOW:
@@ -2730,7 +2738,7 @@
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	u32 status = E1000_SUCCESS;
+	u32 status = 0;
 	u16 sff8472_rev, addr_mode;
 	bool page_swap = false;
 
@@ -2740,12 +2748,12 @@
 
 	/* Check whether we support SFF-8472 or not */
 	status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_COMP, &sff8472_rev);
-	if (status != E1000_SUCCESS)
+	if (status)
 		return -EIO;
 
 	/* addressing mode is not supported */
 	status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_SWAP, &addr_mode);
-	if (status != E1000_SUCCESS)
+	if (status)
 		return -EIO;
 
 	/* addressing mode is not supported */
@@ -2772,7 +2780,7 @@
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	u32 status = E1000_SUCCESS;
+	u32 status = 0;
 	u16 *dataword;
 	u16 first_word, last_word;
 	int i = 0;
@@ -2791,7 +2799,7 @@
 	/* Read EEPROM block, SFF-8079/SFF-8472, word at a time */
 	for (i = 0; i < last_word - first_word + 1; i++) {
 		status = igb_read_phy_reg_i2c(hw, first_word + i, &dataword[i]);
-		if (status != E1000_SUCCESS) {
+		if (status) {
 			/* Error occurred while reading module */
 			kfree(dataword);
 			return -EIO;
@@ -2824,7 +2832,7 @@
 	return IGB_RETA_SIZE;
 }
 
-static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -2870,7 +2878,8 @@
 	}
 }
 
-static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -3019,8 +3028,8 @@
 	.get_module_info	= igb_get_module_info,
 	.get_module_eeprom	= igb_get_module_eeprom,
 	.get_rxfh_indir_size	= igb_get_rxfh_indir_size,
-	.get_rxfh_indir		= igb_get_rxfh_indir,
-	.set_rxfh_indir		= igb_set_rxfh_indir,
+	.get_rxfh		= igb_get_rxfh,
+	.set_rxfh		= igb_set_rxfh,
 	.get_channels		= igb_get_channels,
 	.set_channels		= igb_set_channels,
 	.begin			= igb_ethtool_begin,
@@ -3029,5 +3038,5 @@
 
 void igb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
+	netdev->ethtool_ops = &igb_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 8333f67..44b6a68 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #include "igb.h"
 #include "e1000_82575.h"

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 16430a8..f145adb 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c

@@ -1,28 +1,25 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Intel(R) Gigabit Ethernet Linux driver
+ * Copyright(c) 2007-2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -75,7 +72,7 @@
 	[board_82575] = &e1000_82575_info,
 };
 
-static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
+static const struct pci_device_id igb_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
@@ -117,7 +114,6 @@
 
 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
 
-void igb_reset(struct igb_adapter *);
 static int igb_setup_all_tx_resources(struct igb_adapter *);
 static int igb_setup_all_rx_resources(struct igb_adapter *);
 static void igb_free_all_tx_resources(struct igb_adapter *);
@@ -141,7 +137,7 @@
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
-						 struct rtnl_link_stats64 *stats);
+					  struct rtnl_link_stats64 *stats);
 static int igb_change_mtu(struct net_device *, int);
 static int igb_set_mac(struct net_device *, void *);
 static void igb_set_uta(struct igb_adapter *adapter);
@@ -159,7 +155,8 @@
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
-static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
+static void igb_vlan_mode(struct net_device *netdev,
+			  netdev_features_t features);
 static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16);
 static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16);
 static void igb_restore_vlan(struct igb_adapter *);
@@ -172,7 +169,7 @@
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 			       int vf, u16 vlan, u8 qos);
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
@@ -215,10 +212,9 @@
 static void igb_netpoll(struct net_device *);
 #endif
 #ifdef CONFIG_PCI_IOV
-static unsigned int max_vfs = 0;
+static unsigned int max_vfs;
 module_param(max_vfs, uint, 0);
-MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
-                 "per physical function");
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function");
 #endif /* CONFIG_PCI_IOV */
 
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
@@ -384,8 +380,7 @@
 	/* Print netdevice Info */
 	if (netdev) {
 		dev_info(&adapter->pdev->dev, "Net device Info\n");
-		pr_info("Device Name     state            trans_start      "
-			"last_rx\n");
+		pr_info("Device Name     state            trans_start      last_rx\n");
 		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
 			netdev->state, netdev->trans_start, netdev->last_rx);
 	}
@@ -438,9 +433,7 @@
 		pr_info("------------------------------------\n");
 		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
 		pr_info("------------------------------------\n");
-		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
-			"[bi->dma       ] leng  ntw timestamp        "
-			"bi->skb\n");
+		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
 
 		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
 			const char *next_desc;
@@ -458,9 +451,8 @@
 			else
 				next_desc = "";
 
-			pr_info("T [0x%03X]    %016llX %016llX %016llX"
-				" %04X  %p %016llX %p%s\n", i,
-				le64_to_cpu(u0->a),
+			pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
+				i, le64_to_cpu(u0->a),
 				le64_to_cpu(u0->b),
 				(u64)dma_unmap_addr(buffer_info, dma),
 				dma_unmap_len(buffer_info, len),
@@ -519,10 +511,8 @@
 		pr_info("------------------------------------\n");
 		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
 		pr_info("------------------------------------\n");
-		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
-			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
-		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
-			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
+		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
+		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
 
 		for (i = 0; i < rx_ring->count; i++) {
 			const char *next_desc;
@@ -584,7 +574,7 @@
 	struct e1000_hw *hw = &adapter->hw;
 	s32 i2cctl = rd32(E1000_I2CPARAMS);
 
-	return ((i2cctl & E1000_I2C_DATA_IN) != 0);
+	return !!(i2cctl & E1000_I2C_DATA_IN);
 }
 
 /**
@@ -648,7 +638,7 @@
 	struct e1000_hw *hw = &adapter->hw;
 	s32 i2cctl = rd32(E1000_I2CPARAMS);
 
-	return ((i2cctl & E1000_I2C_CLK_IN) != 0);
+	return !!(i2cctl & E1000_I2C_CLK_IN);
 }
 
 static const struct i2c_algo_bit_data igb_i2c_algo = {
@@ -681,9 +671,9 @@
 static int __init igb_init_module(void)
 {
 	int ret;
+
 	pr_info("%s - version %s\n",
 	       igb_driver_string, igb_driver_version);
-
 	pr_info("%s\n", igb_copyright);
 
 #ifdef CONFIG_IGB_DCA
@@ -736,12 +726,14 @@
 				adapter->rx_ring[i]->reg_idx = rbase_offset +
 							       Q_IDX_82576(i);
 		}
+		/* Fall through */
 	case e1000_82575:
 	case e1000_82580:
 	case e1000_i350:
 	case e1000_i354:
 	case e1000_i210:
 	case e1000_i211:
+		/* Fall through */
 	default:
 		for (; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
@@ -1292,8 +1284,7 @@
 		if (adapter->hw.mac.type >= e1000_82576)
 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
 
-		/*
-		 * On i350, i354, i210, and i211, loopback VLAN packets
+		/* On i350, i354, i210, and i211, loopback VLAN packets
 		 * have the tag byte-swapped.
 		 */
 		if (adapter->hw.mac.type >= e1000_i350)
@@ -1345,6 +1336,7 @@
 	for (; v_idx < q_vectors; v_idx++) {
 		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
 		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
 		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
 					 tqpv, txr_idx, rqpv, rxr_idx);
 
@@ -1484,6 +1476,7 @@
 	 */
 	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
 		u32 regval = rd32(E1000_EIAM);
+
 		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
 		wr32(E1000_EIMC, adapter->eims_enable_mask);
 		regval = rd32(E1000_EIAC);
@@ -1495,6 +1488,7 @@
 	wrfl();
 	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
 		int i;
+
 		for (i = 0; i < adapter->num_q_vectors; i++)
 			synchronize_irq(adapter->msix_entries[i].vector);
 	} else {
@@ -1513,6 +1507,7 @@
 	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
 		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
 		u32 regval = rd32(E1000_EIAC);
+
 		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
 		regval = rd32(E1000_EIAM);
 		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
@@ -1745,6 +1740,7 @@
 	/* notify VFs that reset has been completed */
 	if (adapter->vfs_allocated_count) {
 		u32 reg_data = rd32(E1000_CTRL_EXT);
+
 		reg_data |= E1000_CTRL_EXT_PFRSTD;
 		wr32(E1000_CTRL_EXT, reg_data);
 	}
@@ -1787,7 +1783,7 @@
 	wr32(E1000_TCTL, tctl);
 	/* flush both disables and wait for them to finish */
 	wrfl();
-	msleep(10);
+	usleep_range(10000, 11000);
 
 	igb_irq_disable(adapter);
 
@@ -1827,7 +1823,7 @@
 {
 	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		msleep(1);
+		usleep_range(1000, 2000);
 	igb_down(adapter);
 	igb_up(adapter);
 	clear_bit(__IGB_RESETTING, &adapter->state);
@@ -1960,6 +1956,7 @@
 	/* disable receive for all VFs and wait one second */
 	if (adapter->vfs_allocated_count) {
 		int i;
+
 		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
 			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
 
@@ -2087,7 +2084,7 @@
 	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
 	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2142,7 +2139,6 @@
 		}
 		break;
 	}
-	return;
 }
 
 /**
@@ -2203,11 +2199,11 @@
  **/
 static s32 igb_init_i2c(struct igb_adapter *adapter)
 {
-	s32 status = E1000_SUCCESS;
+	s32 status = 0;
 
 	/* I2C interface supported on i350 devices */
 	if (adapter->hw.mac.type != e1000_i350)
-		return E1000_SUCCESS;
+		return 0;
 
 	/* Initialize the i2c bus which is controlled by the registers.
 	 * This bus will use the i2c_algo_bit structue that implements
@@ -2437,6 +2433,12 @@
 	/* get firmware version for ethtool -i */
 	igb_set_fw_version(adapter);
 
+	/* configure RXPBSIZE and TXPBSIZE */
+	if (hw->mac.type == e1000_i210) {
+		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+	}
+
 	setup_timer(&adapter->watchdog_timer, igb_watchdog,
 		    (unsigned long) adapter);
 	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
@@ -2529,7 +2531,8 @@
 	}
 
 	/* let the f/w know that the h/w is now under the control of the
-	 * driver. */
+	 * driver.
+	 */
 	igb_get_hw_control(adapter);
 
 	strcpy(netdev->name, "eth%d");
@@ -3077,6 +3080,7 @@
 	/* notify VFs that reset has been completed */
 	if (adapter->vfs_allocated_count) {
 		u32 reg_data = rd32(E1000_CTRL_EXT);
+
 		reg_data |= E1000_CTRL_EXT_PFRSTD;
 		wr32(E1000_CTRL_EXT, reg_data);
 	}
@@ -3248,7 +3252,7 @@
  *  Configure a transmit ring after a reset.
  **/
 void igb_configure_tx_ring(struct igb_adapter *adapter,
-                           struct igb_ring *ring)
+			   struct igb_ring *ring)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 txdctl = 0;
@@ -3389,7 +3393,8 @@
 
 	if (adapter->rss_indir_tbl_init != num_rx_queues) {
 		for (j = 0; j < IGB_RETA_SIZE; j++)
-			adapter->rss_indir_tbl[j] = (j * num_rx_queues) / IGB_RETA_SIZE;
+			adapter->rss_indir_tbl[j] =
+			(j * num_rx_queues) / IGB_RETA_SIZE;
 		adapter->rss_indir_tbl_init = num_rx_queues;
 	}
 	igb_write_rss_indir_tbl(adapter);
@@ -3430,6 +3435,7 @@
 		if (hw->mac.type > e1000_82575) {
 			/* Set the default pool for the PF's first queue */
 			u32 vtctl = rd32(E1000_VT_CTL);
+
 			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
 				   E1000_VT_CTL_DISABLE_DEF_POOL);
 			vtctl |= adapter->vfs_allocated_count <<
@@ -3511,7 +3517,7 @@
 }
 
 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
-                                   int vfn)
+				   int vfn)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 vmolr;
@@ -4058,7 +4064,8 @@
 	switch (hw->mac.type) {
 	case e1000_82576:
 	case e1000_i350:
-		if (!(wvbr = rd32(E1000_WVBR)))
+		wvbr = rd32(E1000_WVBR);
+		if (!wvbr)
 			return;
 		break;
 	default:
@@ -4077,7 +4084,7 @@
 	if (!adapter->wvbr)
 		return;
 
-	for(j = 0; j < adapter->vfs_allocated_count; j++) {
+	for (j = 0; j < adapter->vfs_allocated_count; j++) {
 		if (adapter->wvbr & (1 << j) ||
 		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
 			dev_warn(&adapter->pdev->dev,
@@ -4209,14 +4216,15 @@
 
 		if (!netif_carrier_ok(netdev)) {
 			u32 ctrl;
+
 			hw->mac.ops.get_speed_and_duplex(hw,
 							 &adapter->link_speed,
 							 &adapter->link_duplex);
 
 			ctrl = rd32(E1000_CTRL);
 			/* Links status message must follow this format */
-			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
-			       "Duplex, Flow Control: %s\n",
+			netdev_info(netdev,
+			       "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
 			       netdev->name,
 			       adapter->link_speed,
 			       adapter->link_duplex == FULL_DUPLEX ?
@@ -4242,11 +4250,8 @@
 
 			/* check for thermal sensor event */
 			if (igb_thermal_sensor_event(hw,
-			    E1000_THSTAT_LINK_THROTTLE)) {
-				netdev_info(netdev, "The network adapter link "
-					    "speed was downshifted because it "
-					    "overheated\n");
-			}
+			    E1000_THSTAT_LINK_THROTTLE))
+				netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n");
 
 			/* adjust timeout factor according to speed/duplex */
 			adapter->tx_timeout_factor = 1;
@@ -4277,12 +4282,11 @@
 			/* check for thermal sensor event */
 			if (igb_thermal_sensor_event(hw,
 			    E1000_THSTAT_PWR_DOWN)) {
-				netdev_err(netdev, "The network adapter was "
-					   "stopped because it overheated\n");
+				netdev_err(netdev, "The network adapter was stopped because it overheated\n");
 			}
 
 			/* Links status message must follow this format */
-			printk(KERN_INFO "igb: %s NIC Link is Down\n",
+			netdev_info(netdev, "igb: %s NIC Link is Down\n",
 			       netdev->name);
 			netif_carrier_off(netdev);
 
@@ -4344,6 +4348,7 @@
 	/* Cause software interrupt to ensure Rx ring is cleaned */
 	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
 		u32 eics = 0;
+
 		for (i = 0; i < adapter->num_q_vectors; i++)
 			eics |= adapter->q_vector[i]->eims_value;
 		wr32(E1000_EICS, eics);
@@ -4483,13 +4488,12 @@
 	case low_latency:  /* 50 usec aka 20000 ints/s */
 		if (bytes > 10000) {
 			/* this if handles the TSO accounting */
-			if (bytes/packets > 8000) {
+			if (bytes/packets > 8000)
 				itrval = bulk_latency;
-			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
+			else if ((packets < 10) || ((bytes/packets) > 1200))
 				itrval = bulk_latency;
-			} else if ((packets > 35)) {
+			else if ((packets > 35))
 				itrval = lowest_latency;
-			}
 		} else if (bytes/packets > 2000) {
 			itrval = bulk_latency;
 		} else if (packets <= 2 && bytes < 512) {
@@ -4675,6 +4679,7 @@
 			return;
 	} else {
 		u8 l4_hdr = 0;
+
 		switch (first->protocol) {
 		case htons(ETH_P_IP):
 			vlan_macip_lens |= skb_network_header_len(skb);
@@ -4962,6 +4967,7 @@
 	 */
 	if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) {
 		unsigned short f;
+
 		for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 			count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
 	} else {
@@ -5140,7 +5146,7 @@
 		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
 
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		msleep(1);
+		usleep_range(1000, 2000);
 
 	/* igb_down has a dependency on max_frame_size */
 	adapter->max_frame_size = max_frame;
@@ -5621,6 +5627,7 @@
 			vmolr |= E1000_VMOLR_MPME;
 		} else if (vf_data->num_vf_mc_hashes) {
 			int j;
+
 			vmolr |= E1000_VMOLR_ROMPE;
 			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
 				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
@@ -5672,6 +5679,7 @@
 
 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
 		u32 vmolr = rd32(E1000_VMOLR(i));
+
 		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
 
 		vf_data = &adapter->vf_data[i];
@@ -5770,6 +5778,7 @@
 
 			if (!adapter->vf_data[vf].vlans_enabled) {
 				u32 size;
+
 				reg = rd32(E1000_VMOLR(vf));
 				size = reg & E1000_VMOLR_RLPML_MASK;
 				size += 4;
@@ -5798,6 +5807,7 @@
 			adapter->vf_data[vf].vlans_enabled--;
 			if (!adapter->vf_data[vf].vlans_enabled) {
 				u32 size;
+
 				reg = rd32(E1000_VMOLR(vf));
 				size = reg & E1000_VMOLR_RLPML_MASK;
 				size -= 4;
@@ -5902,8 +5912,8 @@
 	 */
 	if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
 		u32 vlvf, bits;
-
 		int regndx = igb_find_vlvf_entry(adapter, vid);
+
 		if (regndx < 0)
 			goto out;
 		/* See if any other pools are set for this VLAN filter
@@ -6494,7 +6504,7 @@
 	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
 	/* transfer page from old buffer to new buffer */
-	memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
+	*new_buff = *old_buff;
 
 	/* sync the buffer for use by the device */
 	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
@@ -6963,6 +6973,7 @@
 	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
 		u16 vid;
+
 		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
 		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
 			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
@@ -7051,7 +7062,7 @@
 	if (cleaned_count)
 		igb_alloc_rx_buffers(rx_ring, cleaned_count);
 
-	return (total_packets < budget);
+	return total_packets < budget;
 }
 
 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
@@ -7172,7 +7183,7 @@
 		break;
 	case SIOCGMIIREG:
 		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
-		                     &data->val_out))
+				     &data->val_out))
 			return -EIO;
 		break;
 	case SIOCSMIIREG:
@@ -7873,7 +7884,8 @@
 	}
 }
 
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -7882,15 +7894,19 @@
 	if (hw->mac.type != e1000_82576)
 		return -EOPNOTSUPP;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	actual_link_speed = igb_link_mbps(adapter->link_speed);
 	if ((vf >= adapter->vfs_allocated_count) ||
 	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
-	    (tx_rate < 0) || (tx_rate > actual_link_speed))
+	    (max_tx_rate < 0) ||
+	    (max_tx_rate > actual_link_speed))
 		return -EINVAL;
 
 	adapter->vf_rate_link_speed = actual_link_speed;
-	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
-	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+	adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
+	igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
 
 	return 0;
 }
@@ -7919,7 +7935,7 @@
 	wr32(reg_offset, reg_val);
 
 	adapter->vf_data[vf].spoofchk_enabled = setting;
-	return E1000_SUCCESS;
+	return 0;
 }
 
 static int igb_ndo_get_vf_config(struct net_device *netdev,
@@ -7930,7 +7946,8 @@
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
@@ -7955,11 +7972,13 @@
 		reg = rd32(E1000_DTXCTL);
 		reg |= E1000_DTXCTL_VLAN_ADDED;
 		wr32(E1000_DTXCTL, reg);
+		/* Fall through */
 	case e1000_82580:
 		/* enable replication vlan tag stripping */
 		reg = rd32(E1000_RPLOLR);
 		reg |= E1000_RPLOLR_STRVLAN;
 		wr32(E1000_RPLOLR, reg);
+		/* Fall through */
 	case e1000_i350:
 		/* none of the above registers are supported by i350 */
 		break;
@@ -8049,6 +8068,7 @@
 		} /* endif adapter->dmac is not disabled */
 	} else if (hw->mac.type == e1000_82580) {
 		u32 reg = rd32(E1000_PCIEMISC);
+
 		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
 		wr32(E1000_DMACR, 0);
 	}
@@ -8077,8 +8097,7 @@
 
 	swfw_mask = E1000_SWFW_PHY0_SM;
 
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
-	    != E1000_SUCCESS)
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
 		return E1000_ERR_SWFW_SYNC;
 
 	status = i2c_smbus_read_byte_data(this_client, byte_offset);
@@ -8088,7 +8107,7 @@
 		return E1000_ERR_I2C;
 	else {
 		*data = status;
-		return E1000_SUCCESS;
+		return 0;
 	}
 }
 
@@ -8113,7 +8132,7 @@
 	if (!this_client)
 		return E1000_ERR_I2C;
 
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS)
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
 		return E1000_ERR_SWFW_SYNC;
 	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
 	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
@@ -8121,7 +8140,7 @@
 	if (status)
 		return E1000_ERR_I2C;
 	else
-		return E1000_SUCCESS;
+		return 0;
 
 }
 

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index ab25e49..794c139 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c

@@ -360,8 +360,8 @@
 	return 0;
 }
 
-static int igb_ptp_enable(struct ptp_clock_info *ptp,
-			  struct ptp_clock_request *rq, int on)
+static int igb_ptp_feature_enable(struct ptp_clock_info *ptp,
+				  struct ptp_clock_request *rq, int on)
 {
 	return -EOPNOTSUPP;
 }
@@ -559,10 +559,11 @@
 	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
 		-EFAULT : 0;
 }
+
 /**
- * igb_ptp_set_ts_config - control hardware time stamping
- * @netdev:
- * @ifreq:
+ * igb_ptp_set_timestamp_mode - setup hardware for timestamping
+ * @adapter: networking device structure
+ * @config: hwtstamp configuration
  *
  * Outgoing time stamping can be enabled and disabled. Play nice and
  * disable it when requested, although it shouldn't case any overhead
@@ -575,12 +576,11 @@
  * type has to be specified. Matching the kind of event packet is
  * not supported, with the exception of "all V2 events regardless of
  * level 2 or 4".
- **/
-int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+ */
+static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
+				      struct hwtstamp_config *config)
 {
-	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct hwtstamp_config *config = &adapter->tstamp_config;
 	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
 	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
 	u32 tsync_rx_cfg = 0;
@@ -588,9 +588,6 @@
 	bool is_l2 = false;
 	u32 regval;
 
-	if (copy_from_user(config, ifr->ifr_data, sizeof(*config)))
-		return -EFAULT;
-
 	/* reserved for future extensions */
 	if (config->flags)
 		return -EINVAL;
@@ -725,7 +722,33 @@
 	regval = rd32(E1000_RXSTMPL);
 	regval = rd32(E1000_RXSTMPH);
 
-	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+	return 0;
+}
+
+/**
+ * igb_ptp_set_ts_config - set hardware time stamping config
+ * @netdev:
+ * @ifreq:
+ *
+ **/
+int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = igb_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	memcpy(&adapter->tstamp_config, &config,
+	       sizeof(adapter->tstamp_config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
 		-EFAULT : 0;
 }
 
@@ -745,7 +768,7 @@
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
 		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
 		adapter->ptp_caps.settime = igb_ptp_settime_82576;
-		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable;
 		adapter->cc.read = igb_ptp_read_82576;
 		adapter->cc.mask = CLOCKSOURCE_MASK(64);
 		adapter->cc.mult = 1;
@@ -765,7 +788,7 @@
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
 		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
 		adapter->ptp_caps.settime = igb_ptp_settime_82576;
-		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable;
 		adapter->cc.read = igb_ptp_read_82580;
 		adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
 		adapter->cc.mult = 1;
@@ -784,7 +807,7 @@
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
 		adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
 		adapter->ptp_caps.settime = igb_ptp_settime_i210;
-		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable;
 		/* Enable the timer functions by clearing bit 31. */
 		wr32(E1000_TSAUXC, 0x0);
 		break;
@@ -820,6 +843,9 @@
 		wr32(E1000_IMS, E1000_IMS_TS);
 	}
 
+	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
@@ -884,7 +910,7 @@
 		return;
 
 	/* reset the tstamp_config */
-	memset(&adapter->tstamp_config, 0, sizeof(adapter->tstamp_config));
+	igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82576:

diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index 90eef07..2178f87 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c

@@ -101,8 +101,8 @@
 		else
 			ecmd->duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_DISABLE;
@@ -119,7 +119,6 @@
 static void igbvf_get_pauseparam(struct net_device *netdev,
                                  struct ethtool_pauseparam *pause)
 {
-	return;
 }
 
 static int igbvf_set_pauseparam(struct net_device *netdev,
@@ -476,5 +475,5 @@
 
 void igbvf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops);
+	netdev->ethtool_ops = &igbvf_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index dbb7dd2..b311e9e 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c

@@ -107,8 +107,8 @@
 		ethtool_cmd_speed_set(ecmd, SPEED_10000);
 		ecmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	ecmd->autoneg = AUTONEG_DISABLE;
@@ -656,5 +656,5 @@
 
 void ixgb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgb_ethtool_ops);
+	netdev->ethtool_ops = &ixgb_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index c6c4ca7..ac9f214 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h

@@ -155,7 +155,6 @@
 struct vf_macvlans {
 	struct list_head l;
 	int vf;
-	int rar_entry;
 	bool free;
 	bool is_macvlan;
 	u8 vf_macvlan[ETH_ALEN];
@@ -363,7 +362,7 @@
 	for (pos = (head).ring; pos != NULL; pos = pos->next)
 
 #define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
-                              ? 8 : 1)
+			      ? 8 : 1)
 #define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
 
 /* MAX_Q_VECTORS of these are allocated,
@@ -613,6 +612,15 @@
 #define MAX_MSIX_VECTORS_82598 18
 #define MAX_Q_VECTORS_82598 16
 
+struct ixgbe_mac_addr {
+	u8 addr[ETH_ALEN];
+	u16 queue;
+	u16 state; /* bitmask */
+};
+#define IXGBE_MAC_STATE_DEFAULT		0x1
+#define IXGBE_MAC_STATE_MODIFIED	0x2
+#define IXGBE_MAC_STATE_IN_USE		0x4
+
 #define MAX_Q_VECTORS MAX_Q_VECTORS_82599
 #define MAX_MSIX_COUNT MAX_MSIX_VECTORS_82599
 
@@ -785,6 +793,7 @@
 
 	u32 timer_event_accumulator;
 	u32 vferr_refcount;
+	struct ixgbe_mac_addr *mac_table;
 	struct kobject *info_kobj;
 #ifdef CONFIG_IXGBE_HWMON
 	struct hwmon_buff *ixgbe_hwmon_buff;
@@ -863,6 +872,13 @@
 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
 int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
 			       u16 subdevice_id);
+#ifdef CONFIG_PCI_IOV
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
+#endif
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+			 u8 *addr, u16 queue);
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+			 u8 *addr, u16 queue);
 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
 netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *,
 				  struct ixgbe_ring *);
@@ -941,6 +957,7 @@
 }
 
 void ixgbe_ptp_init(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_stop(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 4c78ea8..1560933 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c

@@ -41,10 +41,10 @@
 #define IXGBE_82598_RX_PB_SIZE	 512
 
 static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
-                                         ixgbe_link_speed speed,
-                                         bool autoneg_wait_to_complete);
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete);
 static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
-                                       u8 *eeprom_data);
+				       u8 *eeprom_data);
 
 /**
  *  ixgbe_set_pcie_completion_timeout - set pci-e completion timeout
@@ -140,7 +140,7 @@
 		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
 		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
 		phy->ops.get_firmware_version =
-		             &ixgbe_get_phy_firmware_version_tnx;
+			     &ixgbe_get_phy_firmware_version_tnx;
 		break;
 	case ixgbe_phy_nl:
 		phy->ops.reset = &ixgbe_reset_phy_nl;
@@ -156,8 +156,8 @@
 
 		/* Check to see if SFP+ module is supported */
 		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw,
-		                                            &list_offset,
-		                                            &data_offset);
+							    &list_offset,
+							    &data_offset);
 		if (ret_val != 0) {
 			ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
 			goto out;
@@ -219,8 +219,8 @@
  *  Determines the link capabilities by reading the AUTOC register.
  **/
 static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
-                                             ixgbe_link_speed *speed,
-                                             bool *autoneg)
+					     ixgbe_link_speed *speed,
+					     bool *autoneg)
 {
 	s32 status = 0;
 	u32 autoc = 0;
@@ -337,19 +337,25 @@
 	int i;
 	bool link_up;
 
-	/*
-	 * Validate the water mark configuration for packet buffer 0.  Zero
-	 * water marks indicate that the packet buffer was not configured
-	 * and the watermarks for packet buffer 0 should always be configured.
-	 */
-	if (!hw->fc.low_water ||
-	    !hw->fc.high_water[0] ||
-	    !hw->fc.pause_time) {
-		hw_dbg(hw, "Invalid water mark configuration\n");
+	/* Validate the water mark configuration */
+	if (!hw->fc.pause_time) {
 		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
 		goto out;
 	}
 
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+				goto out;
+			}
+		}
+	}
+
 	/*
 	 * On 82598 having Rx FC on causes resets while doing 1G
 	 * so if it's on turn it off once we know link_speed. For
@@ -432,12 +438,11 @@
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg);
 	IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg);
 
-	fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE;
-
 	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
 	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
 		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
 		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
 			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
 			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
 			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth);
@@ -468,7 +473,7 @@
  *  Restarts the link.  Performs autonegotiation if needed.
  **/
 static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
-                                      bool autoneg_wait_to_complete)
+				      bool autoneg_wait_to_complete)
 {
 	u32 autoc_reg;
 	u32 links_reg;
@@ -550,8 +555,8 @@
  *  Reads the links register to determine if link is up and the current speed
  **/
 static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
-                                      ixgbe_link_speed *speed, bool *link_up,
-                                      bool link_up_wait_to_complete)
+				      ixgbe_link_speed *speed, bool *link_up,
+				      bool link_up_wait_to_complete)
 {
 	u32 links_reg;
 	u32 i;
@@ -567,7 +572,7 @@
 		hw->phy.ops.read_reg(hw, 0xC79F, MDIO_MMD_PMAPMD, &link_reg);
 		hw->phy.ops.read_reg(hw, 0xC79F, MDIO_MMD_PMAPMD, &link_reg);
 		hw->phy.ops.read_reg(hw, 0xC00C, MDIO_MMD_PMAPMD,
-		                     &adapt_comp_reg);
+				     &adapt_comp_reg);
 		if (link_up_wait_to_complete) {
 			for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
 				if ((link_reg & 1) &&
@@ -579,11 +584,11 @@
 				}
 				msleep(100);
 				hw->phy.ops.read_reg(hw, 0xC79F,
-				                     MDIO_MMD_PMAPMD,
-				                     &link_reg);
+						     MDIO_MMD_PMAPMD,
+						     &link_reg);
 				hw->phy.ops.read_reg(hw, 0xC00C,
-				                     MDIO_MMD_PMAPMD,
-				                     &adapt_comp_reg);
+						     MDIO_MMD_PMAPMD,
+						     &adapt_comp_reg);
 			}
 		} else {
 			if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0))
@@ -656,7 +661,7 @@
 
 	/* Set KX4/KX support according to speed requested */
 	else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN ||
-	         link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+		 link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
 		autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK;
 		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
 			autoc |= IXGBE_AUTOC_KX4_SUPP;
@@ -689,14 +694,14 @@
  *  Sets the link speed in the AUTOC register in the MAC and restarts link.
  **/
 static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
-                                               ixgbe_link_speed speed,
-                                               bool autoneg_wait_to_complete)
+					       ixgbe_link_speed speed,
+					       bool autoneg_wait_to_complete)
 {
 	s32 status;
 
 	/* Setup the PHY according to input speed */
 	status = hw->phy.ops.setup_link_speed(hw, speed,
-	                                      autoneg_wait_to_complete);
+					      autoneg_wait_to_complete);
 	/* Set up MAC */
 	ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
 
@@ -735,28 +740,28 @@
 	if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) {
 		/* Enable Tx Atlas so packets can be transmitted again */
 		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
-		                             &analog_val);
+					     &analog_val);
 		analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN;
 		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
-		                              analog_val);
+					      analog_val);
 
 		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
-		                             &analog_val);
+					     &analog_val);
 		analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
 		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
-		                              analog_val);
+					      analog_val);
 
 		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
-		                             &analog_val);
+					     &analog_val);
 		analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
 		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
-		                              analog_val);
+					      analog_val);
 
 		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
-		                             &analog_val);
+					     &analog_val);
 		analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
 		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
-		                              analog_val);
+					      analog_val);
 	}
 
 	/* Reset PHY */
@@ -955,7 +960,7 @@
 	for (vlanbyte = 0; vlanbyte < 4; vlanbyte++)
 		for (offset = 0; offset < hw->mac.vft_size; offset++)
 			IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset),
-			                0);
+					0);
 
 	return 0;
 }
@@ -973,7 +978,7 @@
 	u32  atlas_ctl;
 
 	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL,
-	                IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
+			IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
 	IXGBE_WRITE_FLUSH(hw);
 	udelay(10);
 	atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
@@ -1273,8 +1278,6 @@
 	/* Setup Tx packet buffer sizes */
 	for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++)
 		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB);
-
-	return;
 }
 
 static struct ixgbe_mac_operations mac_ops_82598 = {

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index f32b3dd..bc7c924 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c

@@ -48,17 +48,17 @@
 						 ixgbe_link_speed speed,
 						 bool autoneg_wait_to_complete);
 static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
-                                           ixgbe_link_speed speed,
-                                           bool autoneg_wait_to_complete);
+					   ixgbe_link_speed speed,
+					   bool autoneg_wait_to_complete);
 static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
 static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
 				      bool autoneg_wait_to_complete);
 static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
-                               ixgbe_link_speed speed,
-                               bool autoneg_wait_to_complete);
+			       ixgbe_link_speed speed,
+			       bool autoneg_wait_to_complete);
 static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
-                                         ixgbe_link_speed speed,
-                                         bool autoneg_wait_to_complete);
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete);
 static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
 static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
 				     u8 dev_addr, u8 *data);
@@ -96,9 +96,9 @@
 	if ((mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
 	    !ixgbe_mng_enabled(hw)) {
 		mac->ops.disable_tx_laser =
-		                       &ixgbe_disable_tx_laser_multispeed_fiber;
+				       &ixgbe_disable_tx_laser_multispeed_fiber;
 		mac->ops.enable_tx_laser =
-		                        &ixgbe_enable_tx_laser_multispeed_fiber;
+					&ixgbe_enable_tx_laser_multispeed_fiber;
 		mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber;
 	} else {
 		mac->ops.disable_tx_laser = NULL;
@@ -132,13 +132,13 @@
 		hw->phy.ops.reset = NULL;
 
 		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
-		                                              &data_offset);
+							      &data_offset);
 		if (ret_val != 0)
 			goto setup_sfp_out;
 
 		/* PHY config will finish before releasing the semaphore */
 		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
-		                                        IXGBE_GSSR_MAC_CSR_SM);
+							IXGBE_GSSR_MAC_CSR_SM);
 		if (ret_val != 0) {
 			ret_val = IXGBE_ERR_SWFW_SYNC;
 			goto setup_sfp_out;
@@ -334,7 +334,7 @@
 		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
 		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
 		phy->ops.get_firmware_version =
-		             &ixgbe_get_phy_firmware_version_tnx;
+			     &ixgbe_get_phy_firmware_version_tnx;
 		break;
 	default:
 		break;
@@ -352,7 +352,7 @@
  *  Determines the link capabilities by reading the AUTOC register.
  **/
 static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
-                                             ixgbe_link_speed *speed,
+					     ixgbe_link_speed *speed,
 					     bool *autoneg)
 {
 	s32 status = 0;
@@ -543,7 +543,7 @@
  *  Restarts the link.  Performs autonegotiation if needed.
  **/
 static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
-                               bool autoneg_wait_to_complete)
+			       bool autoneg_wait_to_complete)
 {
 	u32 autoc_reg;
 	u32 links_reg;
@@ -672,8 +672,8 @@
  *  Set the link speed in the AUTOC register and restarts link.
  **/
 static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
-                                          ixgbe_link_speed speed,
-                                          bool autoneg_wait_to_complete)
+					  ixgbe_link_speed speed,
+					  bool autoneg_wait_to_complete)
 {
 	s32 status = 0;
 	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
@@ -820,8 +820,8 @@
 	 */
 	if (speedcnt > 1)
 		status = ixgbe_setup_mac_link_multispeed_fiber(hw,
-		                                               highest_link_speed,
-		                                               autoneg_wait_to_complete);
+							       highest_link_speed,
+							       autoneg_wait_to_complete);
 
 out:
 	/* Set autoneg_advertised value based on input link speed */
@@ -1009,8 +1009,8 @@
 		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
 			autoc |= IXGBE_AUTOC_KX_SUPP;
 	} else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
-	           (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
-	            link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
+		   (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
+		    link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
 		/* Switch from 1G SFI to 10G SFI if requested */
 		if ((speed == IXGBE_LINK_SPEED_10GB_FULL) &&
 		    (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) {
@@ -1018,7 +1018,7 @@
 			autoc |= IXGBE_AUTOC_LMS_10G_SERIAL;
 		}
 	} else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) &&
-	           (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
+		   (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
 		/* Switch from 10G SFI to 1G SFI if requested */
 		if ((speed == IXGBE_LINK_SPEED_1GB_FULL) &&
 		    (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) {
@@ -1051,7 +1051,7 @@
 				}
 				if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
 					status =
-					        IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+						IXGBE_ERR_AUTONEG_NOT_COMPLETE;
 					hw_dbg(hw, "Autoneg did not complete.\n");
 				}
 			}
@@ -1074,14 +1074,14 @@
  *  Restarts link on PHY and MAC based on settings passed in.
  **/
 static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
-                                         ixgbe_link_speed speed,
-                                         bool autoneg_wait_to_complete)
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete)
 {
 	s32 status;
 
 	/* Setup the PHY according to input speed */
 	status = hw->phy.ops.setup_link_speed(hw, speed,
-	                                      autoneg_wait_to_complete);
+					      autoneg_wait_to_complete);
 	/* Set up MAC */
 	ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete);
 
@@ -1224,7 +1224,7 @@
 		    (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
 			autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
 			autoc2 |= (hw->mac.orig_autoc2 &
-			           IXGBE_AUTOC2_UPPER_MASK);
+				   IXGBE_AUTOC2_UPPER_MASK);
 			IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
 		}
 	}
@@ -1246,7 +1246,7 @@
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
 	if (is_valid_ether_addr(hw->mac.san_addr)) {
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
 		/* Save the SAN MAC RAR index */
 		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
@@ -1257,7 +1257,7 @@
 
 	/* Store the alternative WWNN/WWPN prefix */
 	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
-	                               &hw->mac.wwpn_prefix);
+				       &hw->mac.wwpn_prefix);
 
 reset_hw_out:
 	return status;
@@ -1271,6 +1271,7 @@
 {
 	int i;
 	u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
+
 	fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
 
 	/*
@@ -1284,8 +1285,7 @@
 		udelay(10);
 	}
 	if (i >= IXGBE_FDIRCMD_CMD_POLL) {
-		hw_dbg(hw, "Flow Director previous command isn't complete, "
-		       "aborting table re-initialization.\n");
+		hw_dbg(hw, "Flow Director previous command isn't complete, aborting table re-initialization.\n");
 		return IXGBE_ERR_FDIR_REINIT_FAILED;
 	}
 
@@ -1299,12 +1299,12 @@
 	 * - write 0 to bit 8 of FDIRCMD register
 	 */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
-	                (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
-	                 IXGBE_FDIRCMD_CLEARHT));
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
+			 IXGBE_FDIRCMD_CLEARHT));
 	IXGBE_WRITE_FLUSH(hw);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
-	                (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
-	                 ~IXGBE_FDIRCMD_CLEARHT));
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+			 ~IXGBE_FDIRCMD_CLEARHT));
 	IXGBE_WRITE_FLUSH(hw);
 	/*
 	 * Clear FDIR Hash register to clear any leftover hashes
@@ -1319,7 +1319,7 @@
 	/* Poll init-done after we write FDIRCTRL register */
 	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
 		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-		                   IXGBE_FDIRCTRL_INIT_DONE)
+				   IXGBE_FDIRCTRL_INIT_DONE)
 			break;
 		usleep_range(1000, 2000);
 	}
@@ -1368,7 +1368,7 @@
 	IXGBE_WRITE_FLUSH(hw);
 	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
 		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-		                   IXGBE_FDIRCTRL_INIT_DONE)
+				   IXGBE_FDIRCTRL_INIT_DONE)
 			break;
 		usleep_range(1000, 2000);
 	}
@@ -1453,7 +1453,7 @@
 		bucket_hash ^= hi_hash_dword >> n; \
 	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
 		sig_hash ^= hi_hash_dword << (16 - n); \
-} while (0);
+} while (0)
 
 /**
  *  ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
@@ -1529,9 +1529,9 @@
  *  @queue: queue index to direct traffic to
  **/
 s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
-                                          union ixgbe_atr_hash_dword input,
-                                          union ixgbe_atr_hash_dword common,
-                                          u8 queue)
+					  union ixgbe_atr_hash_dword input,
+					  union ixgbe_atr_hash_dword common,
+					  u8 queue)
 {
 	u64  fdirhashcmd;
 	u32  fdircmd;
@@ -1555,7 +1555,7 @@
 
 	/* configure FDIRCMD register */
 	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
-	          IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
 	fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
 	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
 
@@ -1579,7 +1579,7 @@
 		bucket_hash ^= lo_hash_dword >> n; \
 	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
 		bucket_hash ^= hi_hash_dword >> n; \
-} while (0);
+} while (0)
 
 /**
  *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
@@ -1651,6 +1651,7 @@
 static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
 {
 	u32 mask = ntohs(input_mask->formatted.dst_port);
+
 	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
 	mask |= ntohs(input_mask->formatted.src_port);
 	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
@@ -1885,7 +1886,7 @@
 	u32  core_ctl;
 
 	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD |
-	                (reg << 8));
+			(reg << 8));
 	IXGBE_WRITE_FLUSH(hw);
 	udelay(10);
 	core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 981b8a7..4e5385a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c

@@ -41,7 +41,7 @@
 static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
 static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
 static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
-                                        u16 count);
+					u16 count);
 static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count);
 static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
 static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
@@ -271,6 +271,7 @@
  **/
 s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 {
+	s32 ret_val;
 	u32 ctrl_ext;
 
 	/* Set the media type */
@@ -292,12 +293,15 @@
 	IXGBE_WRITE_FLUSH(hw);
 
 	/* Setup flow control */
-	ixgbe_setup_fc(hw);
+	ret_val = ixgbe_setup_fc(hw);
+	if (!ret_val)
+		goto out;
 
 	/* Clear adapter stopped flag */
 	hw->adapter_stopped = false;
 
-	return 0;
+out:
+	return ret_val;
 }
 
 /**
@@ -481,7 +485,7 @@
  *  Reads the part number string from the EEPROM.
  **/
 s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
-                                  u32 pba_num_size)
+				  u32 pba_num_size)
 {
 	s32 ret_val;
 	u16 data;
@@ -814,9 +818,8 @@
 			eeprom->address_bits = 16;
 		else
 			eeprom->address_bits = 8;
-		hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: "
-			  "%d\n", eeprom->type, eeprom->word_size,
-			  eeprom->address_bits);
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: %d\n",
+		       eeprom->type, eeprom->word_size, eeprom->address_bits);
 	}
 
 	return 0;
@@ -1388,8 +1391,7 @@
 	}
 
 	if (i == timeout) {
-		hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore "
-		       "not granted.\n");
+		hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore not granted.\n");
 		/*
 		 * this release is particularly important because our attempts
 		 * above to get the semaphore may have succeeded, and if there
@@ -1434,14 +1436,12 @@
 		 * was not granted because we don't have access to the EEPROM
 		 */
 		if (i >= timeout) {
-			hw_dbg(hw, "SWESMBI Software EEPROM semaphore "
-			       "not granted.\n");
+			hw_dbg(hw, "SWESMBI Software EEPROM semaphore not granted.\n");
 			ixgbe_release_eeprom_semaphore(hw);
 			status = IXGBE_ERR_EEPROM;
 		}
 	} else {
-		hw_dbg(hw, "Software semaphore SMBI between device drivers "
-		       "not granted.\n");
+		hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n");
 	}
 
 	return status;
@@ -1483,7 +1483,7 @@
 	 */
 	for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) {
 		ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI,
-		                            IXGBE_EEPROM_OPCODE_BITS);
+					    IXGBE_EEPROM_OPCODE_BITS);
 		spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8);
 		if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI))
 			break;
@@ -1532,7 +1532,7 @@
  *  @count: number of bits to shift out
  **/
 static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
-                                        u16 count)
+					u16 count)
 {
 	u32 eec;
 	u32 mask;
@@ -1736,7 +1736,7 @@
  *  caller does not need checksum_val, the value can be NULL.
  **/
 s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
-                                           u16 *checksum_val)
+					   u16 *checksum_val)
 {
 	s32 status;
 	u16 checksum;
@@ -1809,7 +1809,7 @@
  *  Puts an ethernet address into a receive address register.
  **/
 s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-                          u32 enable_addr)
+			  u32 enable_addr)
 {
 	u32 rar_low, rar_high;
 	u32 rar_entries = hw->mac.num_rar_entries;
@@ -2053,7 +2053,7 @@
 
 	if (hw->addr_ctrl.mta_in_use > 0)
 		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL,
-		                IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
+				IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
 
 	hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n");
 	return 0;
@@ -2071,7 +2071,7 @@
 
 	if (a->mta_in_use > 0)
 		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE |
-		                hw->mac.mc_filter_type);
+				hw->mac.mc_filter_type);
 
 	return 0;
 }
@@ -2106,19 +2106,25 @@
 	u32 fcrtl, fcrth;
 	int i;
 
-	/*
-	 * Validate the water mark configuration for packet buffer 0.  Zero
-	 * water marks indicate that the packet buffer was not configured
-	 * and the watermarks for packet buffer 0 should always be configured.
-	 */
-	if (!hw->fc.low_water ||
-	    !hw->fc.high_water[0] ||
-	    !hw->fc.pause_time) {
-		hw_dbg(hw, "Invalid water mark configuration\n");
+	/* Validate the water mark configuration. */
+	if (!hw->fc.pause_time) {
 		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
 		goto out;
 	}
 
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+				goto out;
+			}
+		}
+	}
+
 	/* Negotiate the fc mode to use */
 	ixgbe_fc_autoneg(hw);
 
@@ -2181,12 +2187,11 @@
 	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
 	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
 
-	fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE;
-
 	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
 	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
 		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
 		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
 			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
 			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
 		} else {
@@ -2654,8 +2659,7 @@
 
 	/* For informational purposes only */
 	if (i >= IXGBE_MAX_SECRX_POLL)
-		hw_dbg(hw, "Rx unit being enabled before security "
-		       "path fully disabled.  Continuing with init.\n");
+		hw_dbg(hw, "Rx unit being enabled before security path fully disabled. Continuing with init.\n");
 
 	return 0;
 
@@ -2782,7 +2786,7 @@
  *  get and set mac_addr routines.
  **/
 static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
-                                        u16 *san_mac_offset)
+					u16 *san_mac_offset)
 {
 	s32 ret_val;
 
@@ -2828,7 +2832,7 @@
 	hw->mac.ops.set_lan_id(hw);
 	/* apply the port offset to the address offset */
 	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
-	                 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
 	for (i = 0; i < 3; i++) {
 		ret_val = hw->eeprom.ops.read(hw, san_mac_offset,
 					      &san_mac_data);
@@ -3068,7 +3072,7 @@
  *  Turn on/off specified VLAN in the VLAN filter table.
  **/
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-                           bool vlan_on)
+			   bool vlan_on)
 {
 	s32 regindex;
 	u32 bitindex;
@@ -3190,9 +3194,9 @@
 				 * Ignore it. */
 				vfta_changed = false;
 			}
-		}
-		else
+		} else {
 			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
+		}
 	}
 
 	if (vfta_changed)
@@ -3292,7 +3296,7 @@
  *  block to check the support for the alternative WWNN/WWPN prefix support.
  **/
 s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-                                        u16 *wwpn_prefix)
+					u16 *wwpn_prefix)
 {
 	u16 offset, caps;
 	u16 alt_san_mac_blk_offset;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index f12c40f..2ae5d4b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h

@@ -39,7 +39,7 @@
 s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
 s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
 s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
-                                  u32 pba_num_size);
+				  u32 pba_num_size);
 s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
 enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status);
 enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status);
@@ -61,16 +61,16 @@
 s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
 				    u16 words, u16 *data);
 s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-                                       u16 *data);
+				       u16 *data);
 s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
 					      u16 words, u16 *data);
 u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
 s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
-                                           u16 *checksum_val);
+					   u16 *checksum_val);
 s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-                          u32 enable_addr);
+			  u32 enable_addr);
 s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
 s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
 s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
@@ -92,13 +92,13 @@
 s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
 s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
-                           u32 vind, bool vlan_on);
+			   u32 vind, bool vlan_on);
 s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
 s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
-                                 ixgbe_link_speed *speed,
-                                 bool *link_up, bool link_up_wait_to_complete);
+				 ixgbe_link_speed *speed,
+				 bool *link_up, bool link_up_wait_to_complete);
 s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-                                 u16 *wwpn_prefix);
+				 u16 *wwpn_prefix);
 
 s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *, u32 *reg_val);
 s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked);
@@ -141,8 +141,6 @@
 	return unlikely(!addr);
 }
 
-void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg);
-
 static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value)
 {
 	u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
@@ -172,18 +170,7 @@
 }
 #define IXGBE_WRITE_REG64(a, reg, value) ixgbe_write_reg64((a), (reg), (value))
 
-static inline u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
-{
-	u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
-	u32 value;
-
-	if (ixgbe_removed(reg_addr))
-		return IXGBE_FAILED_READ_REG;
-	value = readl(reg_addr + reg);
-	if (unlikely(value == IXGBE_FAILED_READ_REG))
-		ixgbe_check_remove(hw, reg);
-	return value;
-}
+u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg);
 #define IXGBE_READ_REG(a, reg) ixgbe_read_reg((a), (reg))
 
 #define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) \

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
index e055e00..a689ee0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c

@@ -267,7 +267,7 @@
  * Configure dcb settings and enable dcb mode.
  */
 s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
-                        struct ixgbe_dcb_config *dcb_config)
+			struct ixgbe_dcb_config *dcb_config)
 {
 	s32 ret = 0;
 	u8 pfc_en;
@@ -389,7 +389,6 @@
 	for (i = 0; i < MAX_USER_PRIORITY; i++)
 		map[i] = IXGBE_RTRUP2TC_UP_MASK &
 			(reg >> (i * IXGBE_RTRUP2TC_UP_SHIFT));
-	return;
 }
 
 void ixgbe_dcb_read_rtrup2tc(struct ixgbe_hw *hw, u8 *map)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index 7a77f37..d3ba63f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c

@@ -208,7 +208,6 @@
 
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
 
-	fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE;
 	/* Configure PFC Tx thresholds per TC */
 	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
 		if (!(pfc_en & (1 << i))) {
@@ -217,6 +216,7 @@
 			continue;
 		}
 
+		fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
 		reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
 		IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
 		IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
index bdb99b3..3b932fe 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c

@@ -242,7 +242,6 @@
 			max_tc = prio_tc[i];
 	}
 
-	fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE;
 
 	/* Configure PFC Tx thresholds per TC */
 	for (i = 0; i <= max_tc; i++) {
@@ -257,6 +256,7 @@
 
 		if (enabled) {
 			reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
 			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
 		} else {
 			reg = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
index d5a1e3d..90c3702 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h

@@ -31,17 +31,17 @@
 
 /* DCB register definitions */
 #define IXGBE_RTTDCS_TDPAC      0x00000001 /* 0 Round Robin,
-                                            * 1 WSP - Weighted Strict Priority
-                                            */
+					    * 1 WSP - Weighted Strict Priority
+					    */
 #define IXGBE_RTTDCS_VMPAC      0x00000002 /* 0 Round Robin,
-                                            * 1 WRR - Weighted Round Robin
-                                            */
+					    * 1 WRR - Weighted Round Robin
+					    */
 #define IXGBE_RTTDCS_TDRM       0x00000010 /* Transmit Recycle Mode */
 #define IXGBE_RTTDCS_ARBDIS     0x00000040 /* DCB arbiter disable */
 #define IXGBE_RTTDCS_BDPM       0x00400000 /* Bypass Data Pipe - must clear! */
 #define IXGBE_RTTDCS_BPBFSM     0x00800000 /* Bypass PB Free Space - must
-                                             * clear!
-                                             */
+					     * clear!
+					     */
 #define IXGBE_RTTDCS_SPEED_CHG  0x80000000 /* Link speed change */
 
 /* Receive UP2TC mapping */
@@ -56,11 +56,11 @@
 #define IXGBE_RTRPT4C_LSP       0x80000000 /* LSP enable bit */
 
 #define IXGBE_RDRXCTL_MPBEN     0x00000010 /* DMA config for multiple packet
-                                            * buffers enable
-                                            */
+					    * buffers enable
+					    */
 #define IXGBE_RDRXCTL_MCEN      0x00000040 /* DMA config for multiple cores
-                                            * (RSS) enable
-                                            */
+					    * (RSS) enable
+					    */
 
 /* RTRPCS Bit Masks */
 #define IXGBE_RTRPCS_RRM        0x00000002 /* Receive Recycle Mode enable */
@@ -81,8 +81,8 @@
 
 /* RTTPCS Bit Masks */
 #define IXGBE_RTTPCS_TPPAC      0x00000020 /* 0 Round Robin,
-                                            * 1 SP - Strict Priority
-                                            */
+					    * 1 SP - Strict Priority
+					    */
 #define IXGBE_RTTPCS_ARBDIS     0x00000040 /* Arbiter disable */
 #define IXGBE_RTTPCS_TPRM       0x00000100 /* Transmit Recycle Mode enable */
 #define IXGBE_RTTPCS_ARBD_SHIFT 22

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index edd89a1..5172b6b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c

@@ -192,8 +192,8 @@
 }
 
 static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
-                                         u8 prio, u8 bwg_id, u8 bw_pct,
-                                         u8 up_map)
+					 u8 prio, u8 bwg_id, u8 bw_pct,
+					 u8 up_map)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -210,7 +210,7 @@
 }
 
 static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
-                                          u8 bw_pct)
+					  u8 bw_pct)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -218,8 +218,8 @@
 }
 
 static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
-                                         u8 prio, u8 bwg_id, u8 bw_pct,
-                                         u8 up_map)
+					 u8 prio, u8 bwg_id, u8 bw_pct,
+					 u8 up_map)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -236,7 +236,7 @@
 }
 
 static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
-                                          u8 bw_pct)
+					  u8 bw_pct)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -244,8 +244,8 @@
 }
 
 static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
-                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
-                                         u8 *up_map)
+					 u8 *prio, u8 *bwg_id, u8 *bw_pct,
+					 u8 *up_map)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -256,7 +256,7 @@
 }
 
 static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
-                                          u8 *bw_pct)
+					  u8 *bw_pct)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -264,8 +264,8 @@
 }
 
 static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
-                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
-                                         u8 *up_map)
+					 u8 *prio, u8 *bwg_id, u8 *bw_pct,
+					 u8 *up_map)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -276,7 +276,7 @@
 }
 
 static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
-                                          u8 *bw_pct)
+					  u8 *bw_pct)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -284,7 +284,7 @@
 }
 
 static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
-                                    u8 setting)
+				    u8 setting)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -295,7 +295,7 @@
 }
 
 static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
-                                    u8 *setting)
+				    u8 *setting)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
index 472b0f4..5e2c1e3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c

@@ -253,8 +253,7 @@
  **/
 void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter)
 {
-	if (adapter->ixgbe_dbg_adapter)
-		debugfs_remove_recursive(adapter->ixgbe_dbg_adapter);
+	debugfs_remove_recursive(adapter->ixgbe_dbg_adapter);
 	adapter->ixgbe_dbg_adapter = NULL;
 }
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 6c55c14..a452730 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c

@@ -141,8 +141,8 @@
 			 sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
 			/ sizeof(u64))
 #define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
-                         IXGBE_PB_STATS_LEN + \
-                         IXGBE_QUEUE_STATS_LEN)
+			 IXGBE_PB_STATS_LEN + \
+			 IXGBE_QUEUE_STATS_LEN)
 
 static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 	"Register test  (offline)", "Eeprom test    (offline)",
@@ -152,7 +152,7 @@
 #define IXGBE_TEST_LEN sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN
 
 static int ixgbe_get_settings(struct net_device *netdev,
-                              struct ethtool_cmd *ecmd)
+			      struct ethtool_cmd *ecmd)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -161,13 +161,6 @@
 	bool autoneg = false;
 	bool link_up;
 
-	/* SFP type is needed for get_link_capabilities */
-	if (hw->phy.media_type & (ixgbe_media_type_fiber |
-				  ixgbe_media_type_fiber_qsfp)) {
-		if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
-				hw->phy.ops.identify_sfp(hw);
-	}
-
 	hw->mac.ops.get_link_capabilities(hw, &supported_link, &autoneg);
 
 	/* set the supported link speeds */
@@ -303,15 +296,15 @@
 		}
 		ecmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	return 0;
 }
 
 static int ixgbe_set_settings(struct net_device *netdev,
-                              struct ethtool_cmd *ecmd)
+			      struct ethtool_cmd *ecmd)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -368,7 +361,7 @@
 }
 
 static void ixgbe_get_pauseparam(struct net_device *netdev,
-                                 struct ethtool_pauseparam *pause)
+				 struct ethtool_pauseparam *pause)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -390,7 +383,7 @@
 }
 
 static int ixgbe_set_pauseparam(struct net_device *netdev,
-                                struct ethtool_pauseparam *pause)
+				struct ethtool_pauseparam *pause)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -450,7 +443,7 @@
 #define IXGBE_GET_STAT(_A_, _R_) _A_->stats._R_
 
 static void ixgbe_get_regs(struct net_device *netdev,
-                           struct ethtool_regs *regs, void *p)
+			   struct ethtool_regs *regs, void *p)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -812,7 +805,7 @@
 }
 
 static int ixgbe_get_eeprom(struct net_device *netdev,
-                            struct ethtool_eeprom *eeprom, u8 *bytes)
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -918,7 +911,7 @@
 }
 
 static void ixgbe_get_drvinfo(struct net_device *netdev,
-                              struct ethtool_drvinfo *drvinfo)
+			      struct ethtool_drvinfo *drvinfo)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	u32 nvm_track_id;
@@ -940,7 +933,7 @@
 }
 
 static void ixgbe_get_ringparam(struct net_device *netdev,
-                                struct ethtool_ringparam *ring)
+				struct ethtool_ringparam *ring)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_ring *tx_ring = adapter->tx_ring[0];
@@ -953,7 +946,7 @@
 }
 
 static int ixgbe_set_ringparam(struct net_device *netdev,
-                               struct ethtool_ringparam *ring)
+			       struct ethtool_ringparam *ring)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_ring *temp_ring;
@@ -1082,7 +1075,7 @@
 }
 
 static void ixgbe_get_ethtool_stats(struct net_device *netdev,
-                                    struct ethtool_stats *stats, u64 *data)
+				    struct ethtool_stats *stats, u64 *data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct rtnl_link_stats64 temp;
@@ -1110,7 +1103,7 @@
 		}
 
 		data[i] = (ixgbe_gstrings_stats[i].sizeof_stat ==
-		           sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	for (j = 0; j < netdev->num_tx_queues; j++) {
 		ring = adapter->tx_ring[j];
@@ -1180,7 +1173,7 @@
 }
 
 static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
-                              u8 *data)
+			      u8 *data)
 {
 	char *p = (char *)data;
 	int i;
@@ -1357,8 +1350,7 @@
 		ixgbe_write_reg(&adapter->hw, reg, test_pattern[pat] & write);
 		val = ixgbe_read_reg(&adapter->hw, reg);
 		if (val != (test_pattern[pat] & write & mask)) {
-			e_err(drv, "pattern test reg %04X failed: got "
-			      "0x%08X expected 0x%08X\n",
+			e_err(drv, "pattern test reg %04X failed: got 0x%08X expected 0x%08X\n",
 			      reg, val, (test_pattern[pat] & write & mask));
 			*data = reg;
 			ixgbe_write_reg(&adapter->hw, reg, before);
@@ -1382,8 +1374,8 @@
 	ixgbe_write_reg(&adapter->hw, reg, write & mask);
 	val = ixgbe_read_reg(&adapter->hw, reg);
 	if ((write & mask) != (val & mask)) {
-		e_err(drv, "set/check reg %04X test failed: got 0x%08X "
-		      "expected 0x%08X\n", reg, (val & mask), (write & mask));
+		e_err(drv, "set/check reg %04X test failed: got 0x%08X expected 0x%08X\n",
+		      reg, (val & mask), (write & mask));
 		*data = reg;
 		ixgbe_write_reg(&adapter->hw, reg, before);
 		return true;
@@ -1430,8 +1422,8 @@
 	ixgbe_write_reg(&adapter->hw, IXGBE_STATUS, toggle);
 	after = ixgbe_read_reg(&adapter->hw, IXGBE_STATUS) & toggle;
 	if (value != after) {
-		e_err(drv, "failed STATUS register test got: 0x%08X "
-		      "expected: 0x%08X\n", after, value);
+		e_err(drv, "failed STATUS register test got: 0x%08X expected: 0x%08X\n",
+		      after, value);
 		*data = 1;
 		return 1;
 	}
@@ -1533,10 +1525,10 @@
 			return -1;
 		}
 	} else if (!request_irq(irq, ixgbe_test_intr, IRQF_PROBE_SHARED,
-	                        netdev->name, netdev)) {
+				netdev->name, netdev)) {
 		shared_int = false;
 	} else if (request_irq(irq, ixgbe_test_intr, IRQF_SHARED,
-	                       netdev->name, netdev)) {
+			       netdev->name, netdev)) {
 		*data = 1;
 		return -1;
 	}
@@ -1563,9 +1555,9 @@
 			 */
 			adapter->test_icr = 0;
 			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
-			                ~mask & 0x00007FFF);
+					~mask & 0x00007FFF);
 			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
-			                ~mask & 0x00007FFF);
+					~mask & 0x00007FFF);
 			IXGBE_WRITE_FLUSH(&adapter->hw);
 			usleep_range(10000, 20000);
 
@@ -1587,7 +1579,7 @@
 		IXGBE_WRITE_FLUSH(&adapter->hw);
 		usleep_range(10000, 20000);
 
-		if (!(adapter->test_icr &mask)) {
+		if (!(adapter->test_icr & mask)) {
 			*data = 4;
 			break;
 		}
@@ -1602,9 +1594,9 @@
 			 */
 			adapter->test_icr = 0;
 			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
-			                ~mask & 0x00007FFF);
+					~mask & 0x00007FFF);
 			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
-			                ~mask & 0x00007FFF);
+					~mask & 0x00007FFF);
 			IXGBE_WRITE_FLUSH(&adapter->hw);
 			usleep_range(10000, 20000);
 
@@ -1964,7 +1956,7 @@
 }
 
 static void ixgbe_diag_test(struct net_device *netdev,
-                            struct ethtool_test *eth_test, u64 *data)
+			    struct ethtool_test *eth_test, u64 *data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	bool if_running = netif_running(netdev);
@@ -1987,10 +1979,7 @@
 			int i;
 			for (i = 0; i < adapter->num_vfs; i++) {
 				if (adapter->vfinfo[i].clear_to_send) {
-					netdev_warn(netdev, "%s",
-						    "offline diagnostic is not "
-						    "supported when VFs are "
-						    "present\n");
+					netdev_warn(netdev, "offline diagnostic is not supported when VFs are present\n");
 					data[0] = 1;
 					data[1] = 1;
 					data[2] = 1;
@@ -2037,8 +2026,7 @@
 		 * loopback diagnostic. */
 		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
 				      IXGBE_FLAG_VMDQ_ENABLED)) {
-			e_info(hw, "Skip MAC loopback diagnostic in VT "
-			       "mode\n");
+			e_info(hw, "Skip MAC loopback diagnostic in VT mode\n");
 			data[3] = 0;
 			goto skip_loopback;
 		}
@@ -2078,7 +2066,7 @@
 }
 
 static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
-                               struct ethtool_wolinfo *wol)
+			       struct ethtool_wolinfo *wol)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	int retval = 0;
@@ -2094,12 +2082,12 @@
 }
 
 static void ixgbe_get_wol(struct net_device *netdev,
-                          struct ethtool_wolinfo *wol)
+			  struct ethtool_wolinfo *wol)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	wol->supported = WAKE_UCAST | WAKE_MCAST |
-	                 WAKE_BCAST | WAKE_MAGIC;
+			 WAKE_BCAST | WAKE_MAGIC;
 	wol->wolopts = 0;
 
 	if (ixgbe_wol_exclusion(adapter, wol) ||
@@ -2181,7 +2169,7 @@
 }
 
 static int ixgbe_get_coalesce(struct net_device *netdev,
-                              struct ethtool_coalesce *ec)
+			      struct ethtool_coalesce *ec)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -2222,8 +2210,7 @@
 	    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
 		if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
 			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
-			e_info(probe, "rx-usecs value high enough "
-				      "to re-enable RSC\n");
+			e_info(probe, "rx-usecs value high enough to re-enable RSC\n");
 			return true;
 		}
 	/* if interrupt rate is too high then disable RSC */
@@ -2236,7 +2223,7 @@
 }
 
 static int ixgbe_set_coalesce(struct net_device *netdev,
-                              struct ethtool_coalesce *ec)
+			      struct ethtool_coalesce *ec)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_q_vector *q_vector;
@@ -2421,9 +2408,11 @@
 	switch (cmd->flow_type) {
 	case TCP_V4_FLOW:
 		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fallthrough */
 	case UDP_V4_FLOW:
 		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
 			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fallthrough */
 	case SCTP_V4_FLOW:
 	case AH_ESP_V4_FLOW:
 	case AH_V4_FLOW:
@@ -2433,9 +2422,11 @@
 		break;
 	case TCP_V6_FLOW:
 		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fallthrough */
 	case UDP_V6_FLOW:
 		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
 			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fallthrough */
 	case SCTP_V6_FLOW:
 	case AH_ESP_V6_FLOW:
 	case AH_V6_FLOW:
@@ -2787,8 +2778,7 @@
 
 		if ((flags2 & UDP_RSS_FLAGS) &&
 		    !(adapter->flags2 & UDP_RSS_FLAGS))
-			e_warn(drv, "enabling UDP RSS: fragmented packets"
-			       " may arrive out of order to the stack above\n");
+			e_warn(drv, "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
 
 		adapter->flags2 = flags2;
 
@@ -3099,5 +3089,5 @@
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
+	netdev->ethtool_ops = &ixgbe_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index b16cc78..0772b77 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h

@@ -81,9 +81,7 @@
 	void *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
 	unsigned long mode;
-#ifdef CONFIG_IXGBE_DCB
 	u8 up;
-#endif
 };
 
 #endif /* _IXGBE_FCOE_H */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 2067d39..2d9451e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c

@@ -1113,8 +1113,8 @@
 	err = pci_enable_msi(adapter->pdev);
 	if (err) {
 		netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev,
-			     "Unable to allocate MSI interrupt, "
-			     "falling back to legacy.  Error: %d\n", err);
+			     "Unable to allocate MSI interrupt, falling back to legacy.  Error: %d\n",
+			     err);
 		return;
 	}
 	adapter->flags |= IXGBE_FLAG_MSI_ENABLED;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c047c3e..f5aa331 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

@@ -301,7 +301,7 @@
 		ixgbe_service_event_schedule(adapter);
 }
 
-void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
 {
 	u32 value;
 
@@ -320,6 +320,32 @@
 		ixgbe_remove_adapter(hw);
 }
 
+/**
+ * ixgbe_read_reg - Read from device register
+ * @hw: hw specific details
+ * @reg: offset of register to read
+ *
+ * Returns : value read or IXGBE_FAILED_READ_REG if removed
+ *
+ * This function is used to read device registers. It checks for device
+ * removal by confirming any read that returns all ones by checking the
+ * status register value for all ones. This function avoids reading from
+ * the hardware if a removal was previously detected in which case it
+ * returns IXGBE_FAILED_READ_REG (all ones).
+ */
+u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
+{
+	u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
+	u32 value;
+
+	if (ixgbe_removed(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+	value = readl(reg_addr + reg);
+	if (unlikely(value == IXGBE_FAILED_READ_REG))
+		ixgbe_check_remove(hw, reg);
+	return value;
+}
+
 static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev)
 {
 	u16 value;
@@ -3743,35 +3769,6 @@
 }
 
 /**
- * ixgbe_vlan_filter_disable - helper to disable hw vlan filtering
- * @adapter: driver data
- */
-static void ixgbe_vlan_filter_disable(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 vlnctrl;
-
-	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-	vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
-	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-}
-
-/**
- * ixgbe_vlan_filter_enable - helper to enable hw vlan filtering
- * @adapter: driver data
- */
-static void ixgbe_vlan_filter_enable(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 vlnctrl;
-
-	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-	vlnctrl |= IXGBE_VLNCTRL_VFE;
-	vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
-	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-}
-
-/**
  * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping
  * @adapter: driver data
  */
@@ -3850,6 +3847,158 @@
 }
 
 /**
+ * ixgbe_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
+ **/
+static int ixgbe_write_mc_addr_list(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (!netif_running(netdev))
+		return 0;
+
+	if (hw->mac.ops.update_mc_addr_list)
+		hw->mac.ops.update_mc_addr_list(hw, netdev);
+	else
+		return -ENOMEM;
+
+#ifdef CONFIG_PCI_IOV
+	ixgbe_restore_vf_multicasts(adapter);
+#endif
+
+	return netdev_mc_count(netdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+			hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
+					    adapter->mac_table[i].queue,
+					    IXGBE_RAH_AV);
+		else
+			hw->mac.ops.clear_rar(hw, i);
+
+		adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED);
+	}
+}
+#endif
+
+static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
+			if (adapter->mac_table[i].state &
+			    IXGBE_MAC_STATE_IN_USE)
+				hw->mac.ops.set_rar(hw, i,
+						adapter->mac_table[i].addr,
+						adapter->mac_table[i].queue,
+						IXGBE_RAH_AV);
+			else
+				hw->mac.ops.clear_rar(hw, i);
+
+			adapter->mac_table[i].state &=
+						~(IXGBE_MAC_STATE_MODIFIED);
+		}
+	}
+}
+
+static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
+{
+	int i;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+		adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+		memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+		adapter->mac_table[i].queue = 0;
+	}
+	ixgbe_sync_mac_table(adapter);
+}
+
+static int ixgbe_available_rars(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, count = 0;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state == 0)
+			count++;
+	}
+	return count;
+}
+
+/* this function destroys the first RAR entry */
+static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter,
+					 u8 *addr)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN);
+	adapter->mac_table[0].queue = VMDQ_P(0);
+	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
+				       IXGBE_MAC_STATE_IN_USE);
+	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
+			    adapter->mac_table[0].queue,
+			    IXGBE_RAH_AV);
+}
+
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+			continue;
+		adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
+						IXGBE_MAC_STATE_IN_USE);
+		ether_addr_copy(adapter->mac_table[i].addr, addr);
+		adapter->mac_table[i].queue = queue;
+		ixgbe_sync_mac_table(adapter);
+		return i;
+	}
+	return -ENOMEM;
+}
+
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+{
+	/* search table for addr, if found, set to 0 and sync */
+	int i;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+		    adapter->mac_table[i].queue == queue) {
+			adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+			adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+			adapter->mac_table[i].queue = 0;
+			ixgbe_sync_mac_table(adapter);
+			return 0;
+		}
+	}
+	return -ENOMEM;
+}
+/**
  * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
  * @netdev: network interface device structure
  *
@@ -3858,39 +4007,23 @@
  *                0 on no addresses written
  *                X on writing X addresses to the RAR table
  **/
-static int ixgbe_write_uc_addr_list(struct net_device *netdev)
+static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned int rar_entries = hw->mac.num_rar_entries - 1;
 	int count = 0;
 
-	/* In SR-IOV/VMDQ modes significantly less RAR entries are available */
-	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-		rar_entries = IXGBE_MAX_PF_MACVLANS - 1;
-
 	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev_uc_count(netdev) > rar_entries)
+	if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
 		return -ENOMEM;
 
 	if (!netdev_uc_empty(netdev)) {
 		struct netdev_hw_addr *ha;
-		/* return error if we do not support writing to RAR table */
-		if (!hw->mac.ops.set_rar)
-			return -ENOMEM;
-
 		netdev_for_each_uc_addr(ha, netdev) {
-			if (!rar_entries)
-				break;
-			hw->mac.ops.set_rar(hw, rar_entries--, ha->addr,
-					    VMDQ_P(0), IXGBE_RAH_AV);
+			ixgbe_del_mac_filter(adapter, ha->addr, vfn);
+			ixgbe_add_mac_filter(adapter, ha->addr, vfn);
 			count++;
 		}
 	}
-	/* write the addresses in reverse order to avoid write combining */
-	for (; rar_entries > 0 ; rar_entries--)
-		hw->mac.ops.clear_rar(hw, rar_entries);
-
 	return count;
 }
 
@@ -3908,11 +4041,12 @@
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
+	u32 vlnctrl;
 	int count;
 
 	/* Check for Promiscuous and All Multicast modes */
-
 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
 
 	/* set all bits that we expect to always be set */
 	fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */
@@ -3922,26 +4056,24 @@
 
 	/* clear the bits we are changing the status of */
 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
-
+	vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
 	if (netdev->flags & IFF_PROMISC) {
 		hw->addr_ctrl.user_set_promisc = true;
 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
-		vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_MPE);
+		vmolr |= IXGBE_VMOLR_MPE;
 		/* Only disable hardware filter vlans in promiscuous mode
 		 * if SR-IOV and VMDQ are disabled - otherwise ensure
 		 * that hardware VLAN filters remain enabled.
 		 */
 		if (!(adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED |
 					IXGBE_FLAG_SRIOV_ENABLED)))
-			ixgbe_vlan_filter_disable(adapter);
-		else
-			ixgbe_vlan_filter_enable(adapter);
+			vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
 	} else {
 		if (netdev->flags & IFF_ALLMULTI) {
 			fctrl |= IXGBE_FCTRL_MPE;
 			vmolr |= IXGBE_VMOLR_MPE;
 		}
-		ixgbe_vlan_filter_enable(adapter);
+		vlnctrl |= IXGBE_VLNCTRL_VFE;
 		hw->addr_ctrl.user_set_promisc = false;
 	}
 
@@ -3950,7 +4082,7 @@
 	 * sufficient space to store all the addresses then enable
 	 * unicast promiscuous mode
 	 */
-	count = ixgbe_write_uc_addr_list(netdev);
+	count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0));
 	if (count < 0) {
 		fctrl |= IXGBE_FCTRL_UPE;
 		vmolr |= IXGBE_VMOLR_ROPE;
@@ -3960,11 +4092,13 @@
 	 * then we should just turn on promiscuous mode so
 	 * that we can at least receive multicast traffic
 	 */
-	hw->mac.ops.update_mc_addr_list(hw, netdev);
-	vmolr |= IXGBE_VMOLR_ROMPE;
-
-	if (adapter->num_vfs)
-		ixgbe_restore_vf_multicasts(adapter);
+	count = ixgbe_write_mc_addr_list(netdev);
+	if (count < 0) {
+		fctrl |= IXGBE_FCTRL_MPE;
+		vmolr |= IXGBE_VMOLR_MPE;
+	} else if (count) {
+		vmolr |= IXGBE_VMOLR_ROMPE;
+	}
 
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) &
@@ -3985,6 +4119,7 @@
 		/* NOTE:  VLAN filtering is disabled by setting PROMISC */
 	}
 
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
 
 	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -4101,8 +4236,8 @@
 	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
 	    (pb == ixgbe_fcoe_get_tc(adapter)))
 		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
-
 #endif
+
 	/* Calculate delay value for device */
 	switch (hw->mac.type) {
 	case ixgbe_mac_X540:
@@ -4143,7 +4278,7 @@
  * @adapter: board private structure to calculate for
  * @pb: packet buffer to calculate
  */
-static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter)
+static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct net_device *dev = adapter->netdev;
@@ -4153,6 +4288,14 @@
 	/* Calculate max LAN frame size */
 	tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
 
+#ifdef IXGBE_FCOE
+	/* FCoE traffic class uses FCOE jumbo frames */
+	if ((dev->features & NETIF_F_FCOE_MTU) &&
+	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
+	    (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up)))
+		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+#endif
+
 	/* Calculate delay value for device */
 	switch (hw->mac.type) {
 	case ixgbe_mac_X540:
@@ -4179,15 +4322,17 @@
 	if (!num_tc)
 		num_tc = 1;
 
-	hw->fc.low_water = ixgbe_lpbthresh(adapter);
-
 	for (i = 0; i < num_tc; i++) {
 		hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
+		hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
 
 		/* Low water marks must not be larger than high water marks */
-		if (hw->fc.low_water > hw->fc.high_water[i])
-			hw->fc.low_water = 0;
+		if (hw->fc.low_water[i] > hw->fc.high_water[i])
+			hw->fc.low_water[i] = 0;
 	}
+
+	for (; i < MAX_TRAFFIC_CLASS; i++)
+		hw->fc.high_water[i] = 0;
 }
 
 static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
@@ -4249,20 +4394,10 @@
 		vmolr |= IXGBE_VMOLR_ROMPE;
 		hw->mac.ops.update_mc_addr_list(hw, dev);
 	}
-	ixgbe_write_uc_addr_list(adapter->netdev);
+	ixgbe_write_uc_addr_list(adapter->netdev, pool);
 	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
 }
 
-static void ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
-				 u8 *addr, u16 pool)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned int entry;
-
-	entry = hw->mac.num_rar_entries - pool;
-	hw->mac.ops.set_rar(hw, entry, addr, VMDQ_P(pool), IXGBE_RAH_AV);
-}
-
 static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter)
 {
 	struct ixgbe_adapter *adapter = vadapter->real_adapter;
@@ -4521,6 +4656,8 @@
 	case ixgbe_phy_qsfp_active_unknown:
 	case ixgbe_phy_qsfp_intel:
 	case ixgbe_phy_qsfp_unknown:
+	/* ixgbe_phy_none is set when no SFP module is present */
+	case ixgbe_phy_none:
 		return true;
 	case ixgbe_phy_nl:
 		if (hw->mac.type == ixgbe_mac_82598EB)
@@ -4742,7 +4879,9 @@
 void ixgbe_reset(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
 	int err;
+	u8 old_addr[ETH_ALEN];
 
 	if (ixgbe_removed(hw->hw_addr))
 		return;
@@ -4778,9 +4917,10 @@
 	}
 
 	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
-
-	/* reprogram the RAR[0] in case user changed it. */
-	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV);
+	/* do not flush user set addresses */
+	memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len);
+	ixgbe_flush_sw_mac_table(adapter);
+	ixgbe_mac_set_default_filter(adapter, old_addr);
 
 	/* update SAN MAC vmdq pool selection */
 	if (hw->mac.san_mac_rar_index)
@@ -5026,6 +5166,10 @@
 #endif /* CONFIG_IXGBE_DCB */
 #endif /* IXGBE_FCOE */
 
+	adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
+				     hw->mac.num_rar_entries,
+				     GFP_ATOMIC);
+
 	/* Set MAC specific capability flags and exceptions */
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
@@ -5517,6 +5661,17 @@
 	return err;
 }
 
+static void ixgbe_close_suspend(struct ixgbe_adapter *adapter)
+{
+	ixgbe_ptp_suspend(adapter);
+
+	ixgbe_down(adapter);
+	ixgbe_free_irq(adapter);
+
+	ixgbe_free_all_tx_resources(adapter);
+	ixgbe_free_all_rx_resources(adapter);
+}
+
 /**
  * ixgbe_close - Disables a network interface
  * @netdev: network interface device structure
@@ -5534,14 +5689,10 @@
 
 	ixgbe_ptp_stop(adapter);
 
-	ixgbe_down(adapter);
-	ixgbe_free_irq(adapter);
+	ixgbe_close_suspend(adapter);
 
 	ixgbe_fdir_filter_exit(adapter);
 
-	ixgbe_free_all_tx_resources(adapter);
-	ixgbe_free_all_rx_resources(adapter);
-
 	ixgbe_release_hw_control(adapter);
 
 	return 0;
@@ -5608,12 +5759,8 @@
 	netif_device_detach(netdev);
 
 	rtnl_lock();
-	if (netif_running(netdev)) {
-		ixgbe_down(adapter);
-		ixgbe_free_irq(adapter);
-		ixgbe_free_all_tx_resources(adapter);
-		ixgbe_free_all_rx_resources(adapter);
-	}
+	if (netif_running(netdev))
+		ixgbe_close_suspend(adapter);
 	rtnl_unlock();
 
 	ixgbe_clear_interrupt_scheme(adapter);
@@ -5945,7 +6092,7 @@
 	if (ixgbe_reinit_fdir_tables_82599(hw) == 0) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
-			        &(adapter->tx_ring[i]->state));
+				&(adapter->tx_ring[i]->state));
 		/* re-enable flow director interrupts */
 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
 	} else {
@@ -7172,16 +7319,17 @@
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct sockaddr *addr = p;
+	int ret;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 
-	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV);
-
-	return 0;
+	ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
+	return ret > 0 ? 0 : ret;
 }
 
 static int
@@ -7783,7 +7931,7 @@
 	.ndo_do_ioctl		= ixgbe_ioctl,
 	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= ixgbe_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
@@ -8187,6 +8335,8 @@
 		goto err_sw_init;
 	}
 
+	ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr);
+
 	setup_timer(&adapter->service_timer, &ixgbe_service_timer,
 		    (unsigned long) adapter);
 
@@ -8242,7 +8392,7 @@
 	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
 		e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
 			   hw->mac.type, hw->phy.type, hw->phy.sfp_type,
-		           part_str);
+			   part_str);
 	else
 		e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
 			   hw->mac.type, hw->phy.type, part_str);
@@ -8304,8 +8454,8 @@
 
 	ixgbe_dbg_adapter_init(adapter);
 
-	/* Need link setup for MNG FW, else wait for IXGBE_UP */
-	if (ixgbe_mng_enabled(hw) && hw->mac.ops.setup_link)
+	/* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
+	if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
 		hw->mac.ops.setup_link(hw,
 			IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
 			true);
@@ -8319,6 +8469,7 @@
 	ixgbe_disable_sriov(adapter);
 	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
 	iounmap(adapter->io_addr);
+	kfree(adapter->mac_table);
 err_ioremap:
 	free_netdev(netdev);
 err_alloc_etherdev:
@@ -8392,6 +8543,7 @@
 
 	e_dev_info("complete\n");
 
+	kfree(adapter->mac_table);
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
index f5c6af2..1918e0a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c

@@ -223,7 +223,7 @@
  *  received an ack to that message within delay * timeout period
  **/
 static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
-                           u16 mbx_id)
+			   u16 mbx_id)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	s32 ret_val = IXGBE_ERR_MBX;
@@ -269,7 +269,7 @@
 	u32 vf_bit = vf_number % 16;
 
 	if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFREQ_VF1 << vf_bit,
-	                            index)) {
+				    index)) {
 		ret_val = 0;
 		hw->mbx.stats.reqs++;
 	}
@@ -291,7 +291,7 @@
 	u32 vf_bit = vf_number % 16;
 
 	if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFACK_VF1 << vf_bit,
-	                            index)) {
+				    index)) {
 		ret_val = 0;
 		hw->mbx.stats.acks++;
 	}
@@ -366,7 +366,7 @@
  *  returns SUCCESS if it successfully copied message into the buffer
  **/
 static s32 ixgbe_write_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
-                              u16 vf_number)
+			      u16 vf_number)
 {
 	s32 ret_val;
 	u16 i;
@@ -407,7 +407,7 @@
  *  a message due to a VF request so no polling for message is needed.
  **/
 static s32 ixgbe_read_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
-                             u16 vf_number)
+			     u16 vf_number)
 {
 	s32 ret_val;
 	u16 i;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index a9b9ad6..a5cb755 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h

@@ -54,11 +54,11 @@
  * Message ACK's are the value or'd with 0xF0000000
  */
 #define IXGBE_VT_MSGTYPE_ACK      0x80000000  /* Messages below or'd with
-                                               * this are the ACK */
+					       * this are the ACK */
 #define IXGBE_VT_MSGTYPE_NACK     0x40000000  /* Messages below or'd with
-                                               * this are the NACK */
+					       * this are the NACK */
 #define IXGBE_VT_MSGTYPE_CTS      0x20000000  /* Indicates that VF is still
-                                                 clear to send requests */
+						 clear to send requests */
 #define IXGBE_VT_MSGINFO_SHIFT    16
 /* bits 23:16 are used for exra info for certain messages */
 #define IXGBE_VT_MSGINFO_MASK     (0xFF << IXGBE_VT_MSGINFO_SHIFT)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index a76af8e2..ff68b7a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c

@@ -67,7 +67,7 @@
 			if (mdio45_probe(&hw->phy.mdio, phy_addr) == 0) {
 				ixgbe_get_phy_id(hw);
 				hw->phy.type =
-				        ixgbe_get_phy_type_from_id(hw->phy.id);
+					ixgbe_get_phy_type_from_id(hw->phy.id);
 
 				if (hw->phy.type == ixgbe_phy_unknown) {
 					hw->phy.ops.read_reg(hw,
@@ -136,12 +136,12 @@
 	u16 phy_id_low = 0;
 
 	status = hw->phy.ops.read_reg(hw, MDIO_DEVID1, MDIO_MMD_PMAPMD,
-	                              &phy_id_high);
+				      &phy_id_high);
 
 	if (status == 0) {
 		hw->phy.id = (u32)(phy_id_high << 16);
 		status = hw->phy.ops.read_reg(hw, MDIO_DEVID2, MDIO_MMD_PMAPMD,
-		                              &phy_id_low);
+					      &phy_id_low);
 		hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
 		hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
 	}
@@ -318,7 +318,7 @@
  *  @phy_data: Pointer to read data from PHY register
  **/
 s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-                               u32 device_type, u16 *phy_data)
+			       u32 device_type, u16 *phy_data)
 {
 	s32 status;
 	u16 gssr;
@@ -421,7 +421,7 @@
  *  @phy_data: Data to write to the PHY register
  **/
 s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-                                u32 device_type, u16 phy_data)
+				u32 device_type, u16 phy_data)
 {
 	s32 status;
 	u16 gssr;
@@ -548,8 +548,8 @@
  *  @speed: new link speed
  **/
 s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
-                                       ixgbe_link_speed speed,
-                                       bool autoneg_wait_to_complete)
+				       ixgbe_link_speed speed,
+				       bool autoneg_wait_to_complete)
 {
 
 	/*
@@ -582,8 +582,8 @@
  * Determines the link capabilities by reading the AUTOC register.
  */
 s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
-                                               ixgbe_link_speed *speed,
-                                               bool *autoneg)
+					       ixgbe_link_speed *speed,
+					       bool *autoneg)
 {
 	s32 status = IXGBE_ERR_LINK_SETUP;
 	u16 speed_ability;
@@ -592,7 +592,7 @@
 	*autoneg = true;
 
 	status = hw->phy.ops.read_reg(hw, MDIO_SPEED, MDIO_MMD_PMAPMD,
-	                              &speed_ability);
+				      &speed_ability);
 
 	if (status == 0) {
 		if (speed_ability & MDIO_SPEED_10G)
@@ -806,11 +806,11 @@
 
 	/* reset the PHY and poll for completion */
 	hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS,
-	                      (phy_data | MDIO_CTRL1_RESET));
+			      (phy_data | MDIO_CTRL1_RESET));
 
 	for (i = 0; i < 100; i++) {
 		hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS,
-		                     &phy_data);
+				     &phy_data);
 		if ((phy_data & MDIO_CTRL1_RESET) == 0)
 			break;
 		usleep_range(10000, 20000);
@@ -824,7 +824,7 @@
 
 	/* Get init offsets */
 	ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
-	                                              &data_offset);
+						      &data_offset);
 	if (ret_val != 0)
 		goto out;
 
@@ -838,7 +838,7 @@
 		if (ret_val)
 			goto err_eeprom;
 		control = (eword & IXGBE_CONTROL_MASK_NL) >>
-		           IXGBE_CONTROL_SHIFT_NL;
+			   IXGBE_CONTROL_SHIFT_NL;
 		edata = eword & IXGBE_DATA_MASK_NL;
 		switch (control) {
 		case IXGBE_DELAY_NL:
@@ -859,7 +859,7 @@
 				if (ret_val)
 					goto err_eeprom;
 				hw->phy.ops.write_reg(hw, phy_offset,
-				                      MDIO_MMD_PMAPMD, eword);
+						      MDIO_MMD_PMAPMD, eword);
 				hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
 				       phy_offset);
 				data_offset++;
@@ -1010,10 +1010,10 @@
 			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
 				if (hw->bus.lan_id == 0)
 					hw->phy.sfp_type =
-					             ixgbe_sfp_type_da_cu_core0;
+						     ixgbe_sfp_type_da_cu_core0;
 				else
 					hw->phy.sfp_type =
-					             ixgbe_sfp_type_da_cu_core1;
+						     ixgbe_sfp_type_da_cu_core1;
 			} else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) {
 				hw->phy.ops.read_i2c_eeprom(
 						hw, IXGBE_SFF_CABLE_SPEC_COMP,
@@ -1035,10 +1035,10 @@
 				    IXGBE_SFF_10GBASELR_CAPABLE)) {
 				if (hw->bus.lan_id == 0)
 					hw->phy.sfp_type =
-					              ixgbe_sfp_type_srlr_core0;
+						      ixgbe_sfp_type_srlr_core0;
 				else
 					hw->phy.sfp_type =
-					              ixgbe_sfp_type_srlr_core1;
+						      ixgbe_sfp_type_srlr_core1;
 			} else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) {
 				if (hw->bus.lan_id == 0)
 					hw->phy.sfp_type =
@@ -1087,15 +1087,15 @@
 				goto err_read_i2c_eeprom;
 
 			status = hw->phy.ops.read_i2c_eeprom(hw,
-			                            IXGBE_SFF_VENDOR_OUI_BYTE1,
-			                            &oui_bytes[1]);
+						    IXGBE_SFF_VENDOR_OUI_BYTE1,
+						    &oui_bytes[1]);
 
 			if (status != 0)
 				goto err_read_i2c_eeprom;
 
 			status = hw->phy.ops.read_i2c_eeprom(hw,
-			                            IXGBE_SFF_VENDOR_OUI_BYTE2,
-			                            &oui_bytes[2]);
+						    IXGBE_SFF_VENDOR_OUI_BYTE2,
+						    &oui_bytes[2]);
 
 			if (status != 0)
 				goto err_read_i2c_eeprom;
@@ -1403,8 +1403,8 @@
  *  so it returns the offsets to the phy init sequence block.
  **/
 s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
-                                        u16 *list_offset,
-                                        u16 *data_offset)
+					u16 *list_offset,
+					u16 *data_offset)
 {
 	u16 sfp_id;
 	u16 sfp_type = hw->phy.sfp_type;
@@ -1493,11 +1493,11 @@
  *  Performs byte read operation to SFP module's EEPROM over I2C interface.
  **/
 s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                  u8 *eeprom_data)
+				  u8 *eeprom_data)
 {
 	return hw->phy.ops.read_i2c_byte(hw, byte_offset,
-	                                 IXGBE_I2C_EEPROM_DEV_ADDR,
-	                                 eeprom_data);
+					 IXGBE_I2C_EEPROM_DEV_ADDR,
+					 eeprom_data);
 }
 
 /**
@@ -1525,11 +1525,11 @@
  *  Performs byte write operation to SFP module's EEPROM over I2C interface.
  **/
 s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                   u8 eeprom_data)
+				   u8 eeprom_data)
 {
 	return hw->phy.ops.write_i2c_byte(hw, byte_offset,
-	                                  IXGBE_I2C_EEPROM_DEV_ADDR,
-	                                  eeprom_data);
+					  IXGBE_I2C_EEPROM_DEV_ADDR,
+					  eeprom_data);
 }
 
 /**
@@ -1542,7 +1542,7 @@
  *  a specified device address.
  **/
 s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                u8 dev_addr, u8 *data)
+				u8 dev_addr, u8 *data)
 {
 	s32 status = 0;
 	u32 max_retry = 10;
@@ -1631,7 +1631,7 @@
  *  a specified device address.
  **/
 s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                 u8 dev_addr, u8 data)
+				 u8 dev_addr, u8 data)
 {
 	s32 status = 0;
 	u32 max_retry = 1;
@@ -2046,7 +2046,7 @@
 
 	/* Check that the LASI temp alarm status was triggered */
 	hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG,
-	                     MDIO_MMD_PMAPMD, &phy_data);
+			     MDIO_MMD_PMAPMD, &phy_data);
 
 	if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM))
 		goto out;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index 0bb047f..54071ed 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h

@@ -114,47 +114,47 @@
 s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
 s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
 s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-                               u32 device_type, u16 *phy_data);
+			       u32 device_type, u16 *phy_data);
 s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-                                u32 device_type, u16 phy_data);
+				u32 device_type, u16 phy_data);
 s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
 			   u32 device_type, u16 *phy_data);
 s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
 			    u32 device_type, u16 phy_data);
 s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
 s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
-                                       ixgbe_link_speed speed,
-                                       bool autoneg_wait_to_complete);
+				       ixgbe_link_speed speed,
+				       bool autoneg_wait_to_complete);
 s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
-                                               ixgbe_link_speed *speed,
-                                               bool *autoneg);
+					       ixgbe_link_speed *speed,
+					       bool *autoneg);
 bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw);
 
 /* PHY specific */
 s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
-                             ixgbe_link_speed *speed,
-                             bool *link_up);
+			     ixgbe_link_speed *speed,
+			     bool *link_up);
 s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
 s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
-                                       u16 *firmware_version);
+				       u16 *firmware_version);
 s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
-                                           u16 *firmware_version);
+					   u16 *firmware_version);
 
 s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
 s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
 s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
 s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
-                                        u16 *list_offset,
-                                        u16 *data_offset);
+					u16 *list_offset,
+					u16 *data_offset);
 s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
 s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                u8 dev_addr, u8 *data);
+				u8 dev_addr, u8 *data);
 s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                 u8 dev_addr, u8 data);
+				 u8 dev_addr, u8 data);
 s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                  u8 *eeprom_data);
+				  u8 *eeprom_data);
 s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
 				   u8 *sff8472_data);
 s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-                                   u8 eeprom_data);
+				   u8 eeprom_data);
 #endif /* _IXGBE_PHY_H_ */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 8902ae6..68f87ec 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c

@@ -26,7 +26,6 @@
 
 *******************************************************************************/
 #include "ixgbe.h"
-#include <linux/export.h>
 #include <linux/ptp_classify.h>
 
 /*
@@ -334,7 +333,7 @@
 }
 
 /**
- * ixgbe_ptp_enable
+ * ixgbe_ptp_feature_enable
  * @ptp: the ptp clock structure
  * @rq: the requested feature to change
  * @on: whether to enable or disable the feature
@@ -342,8 +341,8 @@
  * enable (or disable) ancillary features of the phc subsystem.
  * our driver only supports the PPS feature on the X540
  */
-static int ixgbe_ptp_enable(struct ptp_clock_info *ptp,
-			    struct ptp_clock_request *rq, int on)
+static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp,
+				    struct ptp_clock_request *rq, int on)
 {
 	struct ixgbe_adapter *adapter =
 		container_of(ptp, struct ixgbe_adapter, ptp_caps);
@@ -570,9 +569,9 @@
 }
 
 /**
- * ixgbe_ptp_set_ts_config - control hardware time stamping
- * @adapter: pointer to adapter struct
- * @ifreq: ioctl data
+ * ixgbe_ptp_set_timestamp_mode - setup the hardware for the requested mode
+ * @adapter: the private ixgbe adapter structure
+ * @config: the hwtstamp configuration requested
  *
  * Outgoing time stamping can be enabled and disabled. Play nice and
  * disable it when requested, although it shouldn't cause any overhead
@@ -590,25 +589,25 @@
  * packets, regardless of the type specified in the register, only use V2
  * Event mode. This more accurately tells the user what the hardware is going
  * to do anyways.
+ *
+ * Note: this may modify the hwtstamp configuration towards a more general
+ * mode, if required to support the specifically requested mode.
  */
-int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
+static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+				 struct hwtstamp_config *config)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct hwtstamp_config config;
 	u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
 	u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
 	u32 tsync_rx_mtrl = PTP_EV_PORT << 16;
 	bool is_l2 = false;
 	u32 regval;
 
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
-
 	/* reserved for future extensions */
-	if (config.flags)
+	if (config->flags)
 		return -EINVAL;
 
-	switch (config.tx_type) {
+	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		tsync_tx_ctl = 0;
 	case HWTSTAMP_TX_ON:
@@ -617,7 +616,7 @@
 		return -ERANGE;
 	}
 
-	switch (config.rx_filter) {
+	switch (config->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		tsync_rx_ctl = 0;
 		tsync_rx_mtrl = 0;
@@ -641,7 +640,7 @@
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
 		is_l2 = true;
-		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_ALL:
@@ -652,7 +651,7 @@
 		 * Delay_Req messages and hardware does not support
 		 * timestamping all packets => return error
 		 */
-		config.rx_filter = HWTSTAMP_FILTER_NONE;
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
 		return -ERANGE;
 	}
 
@@ -671,7 +670,6 @@
 	else
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), 0);
 
-
 	/* enable/disable TX */
 	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
 	regval &= ~IXGBE_TSYNCTXCTL_ENABLED;
@@ -693,6 +691,29 @@
 	regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
 	regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
 
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_set_ts_config - user entry point for timestamp mode
+ * @adapter: pointer to adapter struct
+ * @ifreq: ioctl data
+ *
+ * Set hardware to requested mode. If unsupported, return an error with no
+ * changes. Otherwise, store the mode for future reference.
+ */
+int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = ixgbe_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
 	/* save these settings for future reference */
 	memcpy(&adapter->tstamp_config, &config,
 	       sizeof(adapter->tstamp_config));
@@ -790,9 +811,13 @@
  * ixgbe_ptp_reset
  * @adapter: the ixgbe private board structure
  *
- * When the MAC resets, all timesync features are reset. This function should be
- * called to re-enable the PTP clock structure. It will re-init the timecounter
- * structure based on the kernel time as well as setup the cycle counter data.
+ * When the MAC resets, all the hardware bits for timesync are reset. This
+ * function is used to re-enable the device for PTP based on current settings.
+ * We do lose the current clock time, so just reset the cyclecounter to the
+ * system real clock time.
+ *
+ * This function will maintain hwtstamp_config settings, and resets the SDP
+ * output if it was enabled.
  */
 void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
 {
@@ -804,8 +829,8 @@
 	IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000);
 	IXGBE_WRITE_FLUSH(hw);
 
-	/* Reset the saved tstamp_config */
-	memset(&adapter->tstamp_config, 0, sizeof(adapter->tstamp_config));
+	/* reset the hardware timestamping mode */
+	ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
 
 	ixgbe_ptp_start_cyclecounter(adapter);
 
@@ -825,16 +850,23 @@
 }
 
 /**
- * ixgbe_ptp_init
+ * ixgbe_ptp_create_clock
  * @adapter: the ixgbe private adapter structure
  *
- * This function performs the required steps for enabling ptp
- * support. If ptp support has already been loaded it simply calls the
- * cyclecounter init routine and exits.
+ * This function performs setup of the user entry point function table and
+ * initializes the PTP clock device, which is used to access the clock-like
+ * features of the PTP core. It will be called by ixgbe_ptp_init, only if
+ * there isn't already a clock device (such as after a suspend/resume cycle,
+ * where the clock device wasn't destroyed).
  */
-void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
+static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	long err;
+
+	/* do nothing if we already have a clock device */
+	if (!IS_ERR_OR_NULL(adapter->ptp_clock))
+		return 0;
 
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_X540:
@@ -851,7 +883,7 @@
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
 		adapter->ptp_caps.gettime = ixgbe_ptp_gettime;
 		adapter->ptp_caps.settime = ixgbe_ptp_settime;
-		adapter->ptp_caps.enable = ixgbe_ptp_enable;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
 		break;
 	case ixgbe_mac_82599EB:
 		snprintf(adapter->ptp_caps.name,
@@ -867,24 +899,57 @@
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
 		adapter->ptp_caps.gettime = ixgbe_ptp_gettime;
 		adapter->ptp_caps.settime = ixgbe_ptp_settime;
-		adapter->ptp_caps.enable = ixgbe_ptp_enable;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
 		break;
 	default:
 		adapter->ptp_clock = NULL;
-		return;
+		return -EOPNOTSUPP;
 	}
 
-	spin_lock_init(&adapter->tmreg_lock);
-	INIT_WORK(&adapter->ptp_tx_work, ixgbe_ptp_tx_hwtstamp_work);
-
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
+		err = PTR_ERR(adapter->ptp_clock);
 		adapter->ptp_clock = NULL;
 		e_dev_err("ptp_clock_register failed\n");
+		return err;
 	} else
 		e_dev_info("registered PHC device on %s\n", netdev->name);
 
+	/* set default timestamp mode to disabled here. We do this in
+	 * create_clock instead of init, because we don't want to override the
+	 * previous settings during a resume cycle.
+	 */
+	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_init
+ * @adapter: the ixgbe private adapter structure
+ *
+ * This function performs the required steps for enabling PTP
+ * support. If PTP support has already been loaded it simply calls the
+ * cyclecounter init routine and exits.
+ */
+void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
+{
+	/* initialize the spin lock first since we can't control when a user
+	 * will call the entry functions once we have initialized the clock
+	 * device
+	 */
+	spin_lock_init(&adapter->tmreg_lock);
+
+	/* obtain a PTP device, or re-use an existing device */
+	if (ixgbe_ptp_create_clock(adapter))
+		return;
+
+	/* we have a clock so we can initialize work now */
+	INIT_WORK(&adapter->ptp_tx_work, ixgbe_ptp_tx_hwtstamp_work);
+
+	/* reset the PTP related hardware bits */
 	ixgbe_ptp_reset(adapter);
 
 	/* enter the IXGBE_PTP_RUNNING state */
@@ -894,28 +959,45 @@
 }
 
 /**
- * ixgbe_ptp_stop - disable ptp device and stop the overflow check
- * @adapter: pointer to adapter struct
+ * ixgbe_ptp_suspend - stop PTP work items
+ * @ adapter: pointer to adapter struct
  *
- * this function stops the ptp support, and cancels the delayed work.
+ * this function suspends PTP activity, and prevents more PTP work from being
+ * generated, but does not destroy the PTP clock device.
  */
-void ixgbe_ptp_stop(struct ixgbe_adapter *adapter)
+void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter)
 {
 	/* Leave the IXGBE_PTP_RUNNING state. */
 	if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state))
 		return;
 
-	/* stop the PPS signal */
-	adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
-	ixgbe_ptp_setup_sdp(adapter);
+	/* since this might be called in suspend, we don't clear the state,
+	 * but simply reset the auxiliary PPS signal control register
+	 */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0);
 
+	/* ensure that we cancel any pending PTP Tx work item in progress */
 	cancel_work_sync(&adapter->ptp_tx_work);
 	if (adapter->ptp_tx_skb) {
 		dev_kfree_skb_any(adapter->ptp_tx_skb);
 		adapter->ptp_tx_skb = NULL;
 		clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
 	}
+}
 
+/**
+ * ixgbe_ptp_stop - close the PTP device
+ * @adapter: pointer to adapter struct
+ *
+ * completely destroy the PTP device, should only be called when the device is
+ * being fully closed.
+ */
+void ixgbe_ptp_stop(struct ixgbe_adapter *adapter)
+{
+	/* first, suspend PTP activity */
+	ixgbe_ptp_suspend(adapter);
+
+	/* disable the PTP clock device */
 	if (adapter->ptp_clock) {
 		ptp_clock_unregister(adapter->ptp_clock);
 		adapter->ptp_clock = NULL;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index e6c68d3..16b3a1c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c

@@ -72,8 +72,6 @@
 		for (i = 0; i < num_vf_macvlans; i++) {
 			mv_list->vf = -1;
 			mv_list->free = true;
-			mv_list->rar_entry = hw->mac.num_rar_entries -
-				(i + adapter->num_vfs + 1);
 			list_add(&mv_list->l, &adapter->vf_mvs.l);
 			mv_list++;
 		}
@@ -327,6 +325,7 @@
 	u32 vector_bit;
 	u32 vector_reg;
 	u32 mta_reg;
+	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
 	/* only so many hash values supported */
 	entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
@@ -353,25 +352,13 @@
 		mta_reg |= (1 << vector_bit);
 		IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
 	}
+	vmolr |= IXGBE_VMOLR_ROMPE;
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
 
 	return 0;
 }
 
-static void ixgbe_restore_vf_macvlans(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct list_head *pos;
-	struct vf_macvlans *entry;
-
-	list_for_each(pos, &adapter->vf_mvs.l) {
-		entry = list_entry(pos, struct vf_macvlans, l);
-		if (!entry->free)
-			hw->mac.ops.set_rar(hw, entry->rar_entry,
-					    entry->vf_macvlan,
-					    entry->vf, IXGBE_RAH_AV);
-	}
-}
-
+#ifdef CONFIG_PCI_IOV
 void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -382,6 +369,7 @@
 	u32 mta_reg;
 
 	for (i = 0; i < adapter->num_vfs; i++) {
+		u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i));
 		vfinfo = &adapter->vfinfo[i];
 		for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) {
 			hw->addr_ctrl.mta_in_use++;
@@ -391,11 +379,18 @@
 			mta_reg |= (1 << vector_bit);
 			IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
 		}
+
+		if (vfinfo->num_vf_mc_hashes)
+			vmolr |= IXGBE_VMOLR_ROMPE;
+		else
+			vmolr &= ~IXGBE_VMOLR_ROMPE;
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
 	}
 
 	/* Restore any VF macvlans */
-	ixgbe_restore_vf_macvlans(adapter);
+	ixgbe_full_sync_mac_table(adapter);
 }
+#endif
 
 static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
 			     u32 vf)
@@ -495,8 +490,7 @@
 static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe)
 {
 	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
-	vmolr |= (IXGBE_VMOLR_ROMPE |
-		  IXGBE_VMOLR_BAM);
+	vmolr |= IXGBE_VMOLR_BAM;
 	if (aupe)
 		vmolr |= IXGBE_VMOLR_AUPE;
 	else
@@ -514,7 +508,6 @@
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
-	int rar_entry = hw->mac.num_rar_entries - (vf + 1);
 	u8 num_tcs = netdev_get_num_tc(adapter->netdev);
 
 	/* add PF assigned VLAN or VLAN 0 */
@@ -544,7 +537,7 @@
 	/* Flush and reset the mta with the new values */
 	ixgbe_set_rx_mode(adapter->netdev);
 
-	hw->mac.ops.clear_rar(hw, rar_entry);
+	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
 
 	/* reset VF api back to unknown */
 	adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10;
@@ -553,11 +546,9 @@
 static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
 			    int vf, unsigned char *mac_addr)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
-	int rar_entry = hw->mac.num_rar_entries - (vf + 1);
-
+	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
 	memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
-	hw->mac.ops.set_rar(hw, rar_entry, mac_addr, vf, IXGBE_RAH_AV);
+	ixgbe_add_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
 
 	return 0;
 }
@@ -565,7 +556,6 @@
 static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 				int vf, int index, unsigned char *mac_addr)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
 	struct list_head *pos;
 	struct vf_macvlans *entry;
 
@@ -576,7 +566,8 @@
 				entry->vf = -1;
 				entry->free = true;
 				entry->is_macvlan = false;
-				hw->mac.ops.clear_rar(hw, entry->rar_entry);
+				ixgbe_del_mac_filter(adapter,
+						     entry->vf_macvlan, vf);
 			}
 		}
 	}
@@ -612,7 +603,7 @@
 	entry->vf = vf;
 	memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
 
-	hw->mac.ops.set_rar(hw, entry->rar_entry, mac_addr, vf, IXGBE_RAH_AV);
+	ixgbe_add_mac_filter(adapter, mac_addr, vf);
 
 	return 0;
 }
@@ -1138,9 +1129,9 @@
 			adapter->vfinfo[vf].vlan_count--;
 		adapter->vfinfo[vf].pf_vlan = 0;
 		adapter->vfinfo[vf].pf_qos = 0;
-       }
+	}
 out:
-       return err;
+	return err;
 }
 
 static int ixgbe_link_mbps(struct ixgbe_adapter *adapter)
@@ -1231,7 +1222,8 @@
 	}
 }
 
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	int link_speed;
@@ -1249,13 +1241,16 @@
 	if (link_speed != 10000)
 		return -EINVAL;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	/* rate limit cannot be less than 10Mbs or greater than link speed */
-	if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed)))
+	if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed)))
 		return -EINVAL;
 
 	/* store values */
 	adapter->vf_rate_link_speed = link_speed;
-	adapter->vfinfo[vf].tx_rate = tx_rate;
+	adapter->vfinfo[vf].tx_rate = max_tx_rate;
 
 	/* update hardware configuration */
 	ixgbe_set_vf_rate_limit(adapter, vf);
@@ -1297,7 +1292,8 @@
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vfinfo[vf].pf_vlan;
 	ivi->qos = adapter->vfinfo[vf].pf_qos;
 	ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 139eadd..32c26d5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h

@@ -34,7 +34,9 @@
  */
 #define IXGBE_MAX_VFS_DRV_LIMIT  (IXGBE_MAX_VF_FUNCTIONS - 1)
 
+#ifdef CONFIG_PCI_IOV
 void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
+#endif
 void ixgbe_msg_task(struct ixgbe_adapter *adapter);
 int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
 void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
@@ -42,7 +44,8 @@
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
 			   u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate);
 int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 8a6ff24..9a89f98 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h

@@ -160,7 +160,7 @@
 #define IXGBE_MAX_EITR     0x00000FF8
 #define IXGBE_MIN_EITR     8
 #define IXGBE_EITR(_i)  (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \
-                         (0x012300 + (((_i) - 24) * 4)))
+			 (0x012300 + (((_i) - 24) * 4)))
 #define IXGBE_EITR_ITR_INT_MASK 0x00000FF8
 #define IXGBE_EITR_LLI_MOD      0x00008000
 #define IXGBE_EITR_CNT_WDIS     0x80000000
@@ -213,7 +213,7 @@
  * 64-127: 0x0D014 + (n-64)*0x40
  */
 #define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \
-                          (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
+			  (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
 			  (0x0D014 + (((_i) - 64) * 0x40))))
 /*
  * Rx DCA Control Register:
@@ -222,11 +222,11 @@
  * 64-127: 0x0D00C + (n-64)*0x40
  */
 #define IXGBE_DCA_RXCTRL(_i)    (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \
-                                 (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
+				 (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
 				 (0x0D00C + (((_i) - 64) * 0x40))))
 #define IXGBE_RDRXCTL           0x02F00
 #define IXGBE_RXPBSIZE(_i)      (0x03C00 + ((_i) * 4))
-                                             /* 8 of these 0x03C00 - 0x03C1C */
+					     /* 8 of these 0x03C00 - 0x03C1C */
 #define IXGBE_RXCTRL    0x03000
 #define IXGBE_DROPEN    0x03D04
 #define IXGBE_RXPBSIZE_SHIFT 10
@@ -239,14 +239,14 @@
 /* Multicast Table Array - 128 entries */
 #define IXGBE_MTA(_i)   (0x05200 + ((_i) * 4))
 #define IXGBE_RAL(_i)   (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
-                         (0x0A200 + ((_i) * 8)))
+			 (0x0A200 + ((_i) * 8)))
 #define IXGBE_RAH(_i)   (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
-                         (0x0A204 + ((_i) * 8)))
+			 (0x0A204 + ((_i) * 8)))
 #define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8))
 #define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8))
 /* Packet split receive type */
 #define IXGBE_PSRTYPE(_i)    (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \
-                              (0x0EA00 + ((_i) * 4)))
+			      (0x0EA00 + ((_i) * 4)))
 /* array of 4096 1-bit vlan filters */
 #define IXGBE_VFTA(_i)  (0x0A000 + ((_i) * 4))
 /*array of 4096 4-bit vlan vmdq indices */
@@ -696,7 +696,7 @@
 
 #define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4))
 #define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \
-                         (0x08600 + ((_i) * 4)))
+			 (0x08600 + ((_i) * 4)))
 #define IXGBE_TQSM(_i)  (0x08600 + ((_i) * 4))
 
 #define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */
@@ -820,7 +820,7 @@
 #define IXGBE_GCR_EXT_VT_MODE_32        0x00000002
 #define IXGBE_GCR_EXT_VT_MODE_64        0x00000003
 #define IXGBE_GCR_EXT_SRIOV             (IXGBE_GCR_EXT_MSIX_EN | \
-                                         IXGBE_GCR_EXT_VT_MODE_64)
+					 IXGBE_GCR_EXT_VT_MODE_64)
 
 /* Time Sync Registers */
 #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */
@@ -1396,10 +1396,10 @@
 #define IXGBE_EIMC_OTHER        IXGBE_EICR_OTHER     /* INT Cause Active */
 
 #define IXGBE_EIMS_ENABLE_MASK ( \
-                                IXGBE_EIMS_RTX_QUEUE       | \
-                                IXGBE_EIMS_LSC             | \
-                                IXGBE_EIMS_TCP_TIMER       | \
-                                IXGBE_EIMS_OTHER)
+				IXGBE_EIMS_RTX_QUEUE       | \
+				IXGBE_EIMS_LSC             | \
+				IXGBE_EIMS_TCP_TIMER       | \
+				IXGBE_EIMS_OTHER)
 
 /* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
 #define IXGBE_IMIR_PORT_IM_EN     0x00010000  /* TCP port enable */
@@ -2161,18 +2161,18 @@
 
 /* Masks to determine if packets should be dropped due to frame errors */
 #define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
-                                      IXGBE_RXD_ERR_CE | \
-                                      IXGBE_RXD_ERR_LE | \
-                                      IXGBE_RXD_ERR_PE | \
-                                      IXGBE_RXD_ERR_OSE | \
-                                      IXGBE_RXD_ERR_USE)
+				      IXGBE_RXD_ERR_CE | \
+				      IXGBE_RXD_ERR_LE | \
+				      IXGBE_RXD_ERR_PE | \
+				      IXGBE_RXD_ERR_OSE | \
+				      IXGBE_RXD_ERR_USE)
 
 #define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
-                                      IXGBE_RXDADV_ERR_CE | \
-                                      IXGBE_RXDADV_ERR_LE | \
-                                      IXGBE_RXDADV_ERR_PE | \
-                                      IXGBE_RXDADV_ERR_OSE | \
-                                      IXGBE_RXDADV_ERR_USE)
+				      IXGBE_RXDADV_ERR_CE | \
+				      IXGBE_RXDADV_ERR_LE | \
+				      IXGBE_RXDADV_ERR_PE | \
+				      IXGBE_RXDADV_ERR_OSE | \
+				      IXGBE_RXDADV_ERR_USE)
 
 /* Multicast bit mask */
 #define IXGBE_MCSTCTRL_MFE      0x4
@@ -2393,9 +2393,9 @@
 #define IXGBE_ADVTXD_CC         0x00000080 /* Check Context */
 #define IXGBE_ADVTXD_POPTS_SHIFT      8  /* Adv desc POPTS shift */
 #define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \
-                                 IXGBE_ADVTXD_POPTS_SHIFT)
+				 IXGBE_ADVTXD_POPTS_SHIFT)
 #define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
-                                 IXGBE_ADVTXD_POPTS_SHIFT)
+				 IXGBE_ADVTXD_POPTS_SHIFT)
 #define IXGBE_ADVTXD_POPTS_ISCO_1ST  0x00000000 /* 1st TSO of iSCSI PDU */
 #define IXGBE_ADVTXD_POPTS_ISCO_MDL  0x00000800 /* Middle TSO of iSCSI PDU */
 #define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
@@ -2435,10 +2435,10 @@
 #define IXGBE_LINK_SPEED_1GB_FULL  0x0020
 #define IXGBE_LINK_SPEED_10GB_FULL 0x0080
 #define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \
-                                        IXGBE_LINK_SPEED_10GB_FULL)
+					IXGBE_LINK_SPEED_10GB_FULL)
 #define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \
-                                        IXGBE_LINK_SPEED_1GB_FULL | \
-                                        IXGBE_LINK_SPEED_10GB_FULL)
+					IXGBE_LINK_SPEED_1GB_FULL | \
+					IXGBE_LINK_SPEED_10GB_FULL)
 
 
 /* Physical layer type */
@@ -2746,7 +2746,7 @@
 /* Flow control parameters */
 struct ixgbe_fc_info {
 	u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */
-	u32 low_water; /* Flow Control Low-water */
+	u32 low_water[MAX_TRAFFIC_CLASS]; /* Flow Control Low-water */
 	u16 pause_time; /* Flow Control Pause timer */
 	bool send_xon; /* Flow control send XON */
 	bool strict_ieee; /* Strict IEEE mode */
@@ -2840,7 +2840,7 @@
 
 /* iterator type for walking multicast address lists */
 typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
-                                  u32 *vmdq);
+				  u32 *vmdq);
 
 /* Function pointer table */
 struct ixgbe_eeprom_operations {
@@ -2887,7 +2887,7 @@
 	s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
 	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
 	s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
-	                             bool *);
+				     bool *);
 
 	/* Packet Buffer Manipulation */
 	void (*set_rxpba)(struct ixgbe_hw *, int, u32, int);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 188a597..40dd798 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c

@@ -81,7 +81,7 @@
 				     bool autoneg_wait_to_complete)
 {
 	return hw->phy.ops.setup_link_speed(hw, speed,
-	                                    autoneg_wait_to_complete);
+					    autoneg_wait_to_complete);
 }
 
 /**
@@ -155,7 +155,7 @@
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
 	if (is_valid_ether_addr(hw->mac.san_addr)) {
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
 		/* Save the SAN MAC RAR index */
 		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
@@ -166,7 +166,7 @@
 
 	/* Store the alternative WWNN/WWPN prefix */
 	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
-	                           &hw->mac.wwpn_prefix);
+				   &hw->mac.wwpn_prefix);
 
 reset_hw_out:
 	return status;
@@ -237,9 +237,9 @@
 
 		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
 		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
-		                    IXGBE_EEC_SIZE_SHIFT);
+				    IXGBE_EEC_SIZE_SHIFT);
 		eeprom->word_size = 1 << (eeprom_size +
-		                          IXGBE_EEPROM_WORD_SIZE_SHIFT);
+					  IXGBE_EEPROM_WORD_SIZE_SHIFT);
 
 		hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
 		       eeprom->type, eeprom->word_size);
@@ -712,8 +712,7 @@
 			udelay(50);
 		}
 	} else {
-		hw_dbg(hw, "Software semaphore SMBI between device drivers "
-		           "not granted.\n");
+		hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n");
 	}
 
 	return status;
@@ -813,7 +812,7 @@
 	.clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
 	.get_media_type         = &ixgbe_get_media_type_X540,
 	.get_supported_physical_layer =
-                                  &ixgbe_get_supported_physical_layer_X540,
+				  &ixgbe_get_supported_physical_layer_X540,
 	.enable_rx_dma          = &ixgbe_enable_rx_dma_generic,
 	.get_mac_addr           = &ixgbe_get_mac_addr_generic,
 	.get_san_mac_addr       = &ixgbe_get_san_mac_addr_generic,

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 1baecb6..d420f12 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c

@@ -135,8 +135,8 @@
 		ethtool_cmd_speed_set(ecmd, speed);
 		ecmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	return 0;
@@ -813,5 +813,5 @@
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbevf_ethtool_ops);
+	netdev->ethtool_ops = &ixgbevf_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index de2793b..75467f8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

@@ -85,7 +85,7 @@
 MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl);
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
-MODULE_DESCRIPTION("Intel(R) 82599 Virtual Function Driver");
+MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index b7b8d74..b151a94 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c

@@ -42,6 +42,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/etherdevice.h>
@@ -179,10 +180,18 @@
  * Misc definitions.
  */
 #define DEFAULT_RX_QUEUE_SIZE	128
-#define DEFAULT_TX_QUEUE_SIZE	256
+#define DEFAULT_TX_QUEUE_SIZE	512
 #define SKB_DMA_REALIGN		((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
 
+#define TSO_HEADER_SIZE		128
 
+/* Max number of allowed TCP segments for software TSO */
+#define MV643XX_MAX_TSO_SEGS 100
+#define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+	((addr >= txq->tso_hdrs_dma) && \
+	 (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
 /*
  * RX/TX descriptors.
  */
@@ -250,6 +259,7 @@
 #define GEN_TCP_UDP_CHECKSUM		0x00020000
 #define UDP_FRAME			0x00010000
 #define MAC_HDR_EXTRA_4_BYTES		0x00008000
+#define GEN_TCP_UDP_CHK_FULL		0x00000400
 #define MAC_HDR_EXTRA_8_BYTES		0x00000200
 
 #define TX_IHL_SHIFT			11
@@ -345,6 +355,12 @@
 	int tx_curr_desc;
 	int tx_used_desc;
 
+	int tx_stop_threshold;
+	int tx_wake_threshold;
+
+	char *tso_hdrs;
+	dma_addr_t tso_hdrs_dma;
+
 	struct tx_desc *tx_desc_area;
 	dma_addr_t tx_desc_dma;
 	int tx_desc_area_size;
@@ -491,7 +507,7 @@
 
 	if (netif_tx_queue_stopped(nq)) {
 		__netif_tx_lock(nq, smp_processor_id());
-		if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1)
+		if (txq->tx_desc_count <= txq->tx_wake_threshold)
 			netif_tx_wake_queue(nq);
 		__netif_tx_unlock(nq);
 	}
@@ -661,6 +677,198 @@
 	return 0;
 }
 
+static inline __be16 sum16_as_be(__sum16 sum)
+{
+	return (__force __be16)sum;
+}
+
+static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb,
+		       u16 *l4i_chk, u32 *command, int length)
+{
+	int ret;
+	u32 cmd = 0;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int hdr_len;
+		int tag_bytes;
+
+		BUG_ON(skb->protocol != htons(ETH_P_IP) &&
+		       skb->protocol != htons(ETH_P_8021Q));
+
+		hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
+		tag_bytes = hdr_len - ETH_HLEN;
+
+		if (length - hdr_len > mp->shared->tx_csum_limit ||
+		    unlikely(tag_bytes & ~12)) {
+			ret = skb_checksum_help(skb);
+			if (!ret)
+				goto no_csum;
+			return ret;
+		}
+
+		if (tag_bytes & 4)
+			cmd |= MAC_HDR_EXTRA_4_BYTES;
+		if (tag_bytes & 8)
+			cmd |= MAC_HDR_EXTRA_8_BYTES;
+
+		cmd |= GEN_TCP_UDP_CHECKSUM | GEN_TCP_UDP_CHK_FULL |
+			   GEN_IP_V4_CHECKSUM   |
+			   ip_hdr(skb)->ihl << TX_IHL_SHIFT;
+
+		/* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL
+		 * it seems we don't need to pass the initial checksum. */
+		switch (ip_hdr(skb)->protocol) {
+		case IPPROTO_UDP:
+			cmd |= UDP_FRAME;
+			*l4i_chk = 0;
+			break;
+		case IPPROTO_TCP:
+			*l4i_chk = 0;
+			break;
+		default:
+			WARN(1, "protocol not supported");
+		}
+	} else {
+no_csum:
+		/* Errata BTS #50, IHL must be 5 if no HW checksum */
+		cmd |= 5 << TX_IHL_SHIFT;
+	}
+	*command = cmd;
+	return 0;
+}
+
+static inline int
+txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
+		 struct sk_buff *skb, char *data, int length,
+		 bool last_tcp, bool is_last)
+{
+	int tx_index;
+	u32 cmd_sts;
+	struct tx_desc *desc;
+
+	tx_index = txq->tx_curr_desc++;
+	if (txq->tx_curr_desc == txq->tx_ring_size)
+		txq->tx_curr_desc = 0;
+	desc = &txq->tx_desc_area[tx_index];
+
+	desc->l4i_chk = 0;
+	desc->byte_cnt = length;
+	desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+				       length, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
+		WARN(1, "dma_map_single failed!\n");
+		return -ENOMEM;
+	}
+
+	cmd_sts = BUFFER_OWNED_BY_DMA;
+	if (last_tcp) {
+		/* last descriptor in the TCP packet */
+		cmd_sts |= ZERO_PADDING | TX_LAST_DESC;
+		/* last descriptor in SKB */
+		if (is_last)
+			cmd_sts |= TX_ENABLE_INTERRUPT;
+	}
+	desc->cmd_sts = cmd_sts;
+	return 0;
+}
+
+static inline void
+txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+{
+	struct mv643xx_eth_private *mp = txq_to_mp(txq);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int tx_index;
+	struct tx_desc *desc;
+	int ret;
+	u32 cmd_csum = 0;
+	u16 l4i_chk = 0;
+
+	tx_index = txq->tx_curr_desc;
+	desc = &txq->tx_desc_area[tx_index];
+
+	ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_csum, length);
+	if (ret)
+		WARN(1, "failed to prepare checksum!");
+
+	/* Should we set this? Can't use the value from skb_tx_csum()
+	 * as it's not the correct initial L4 checksum to use. */
+	desc->l4i_chk = 0;
+
+	desc->byte_cnt = hdr_len;
+	desc->buf_ptr = txq->tso_hdrs_dma +
+			txq->tx_curr_desc * TSO_HEADER_SIZE;
+	desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
+				   GEN_CRC;
+
+	txq->tx_curr_desc++;
+	if (txq->tx_curr_desc == txq->tx_ring_size)
+		txq->tx_curr_desc = 0;
+}
+
+static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
+			  struct net_device *dev)
+{
+	struct mv643xx_eth_private *mp = txq_to_mp(txq);
+	int total_len, data_left, ret;
+	int desc_count = 0;
+	struct tso_t tso;
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	/* Count needed descriptors */
+	if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
+		netdev_dbg(dev, "not enough descriptors for TSO!\n");
+		return -EBUSY;
+	}
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	tso_start(skb, &tso);
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		char *hdr;
+
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+		desc_count++;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
+		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+		txq_put_hdr_tso(skb, txq, data_left);
+
+		while (data_left > 0) {
+			int size;
+			desc_count++;
+
+			size = min_t(int, tso.size, data_left);
+			ret = txq_put_data_tso(dev, txq, skb, tso.data, size,
+					       size == data_left,
+					       total_len == 0);
+			if (ret)
+				goto err_release;
+			data_left -= size;
+			tso_build_data(skb, &tso, size);
+		}
+	}
+
+	__skb_queue_tail(&txq->tx_skb, skb);
+	skb_tx_timestamp(skb);
+
+	/* clear TX_END status */
+	mp->work_tx_end &= ~(1 << txq->index);
+
+	/* ensure all descriptors are written before poking hardware */
+	wmb();
+	txq_enable(txq);
+	txq->tx_desc_count += desc_count;
+	return 0;
+err_release:
+	/* TODO: Release all used data descriptors; header descriptors must not
+	 * be DMA-unmapped.
+	 */
+	return ret;
+}
+
 static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
 {
 	struct mv643xx_eth_private *mp = txq_to_mp(txq);
@@ -671,8 +879,10 @@
 		skb_frag_t *this_frag;
 		int tx_index;
 		struct tx_desc *desc;
+		void *addr;
 
 		this_frag = &skb_shinfo(skb)->frags[frag];
+		addr = page_address(this_frag->page.p) + this_frag->page_offset;
 		tx_index = txq->tx_curr_desc++;
 		if (txq->tx_curr_desc == txq->tx_ring_size)
 			txq->tx_curr_desc = 0;
@@ -692,19 +902,13 @@
 
 		desc->l4i_chk = 0;
 		desc->byte_cnt = skb_frag_size(this_frag);
-		desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent,
-						 this_frag, 0,
-						 skb_frag_size(this_frag),
-						 DMA_TO_DEVICE);
+		desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
+					       desc->byte_cnt, DMA_TO_DEVICE);
 	}
 }
 
-static inline __be16 sum16_as_be(__sum16 sum)
-{
-	return (__force __be16)sum;
-}
-
-static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
+static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb,
+			  struct net_device *dev)
 {
 	struct mv643xx_eth_private *mp = txq_to_mp(txq);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
@@ -712,54 +916,22 @@
 	struct tx_desc *desc;
 	u32 cmd_sts;
 	u16 l4i_chk;
-	int length;
+	int length, ret;
 
-	cmd_sts = TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
+	cmd_sts = 0;
 	l4i_chk = 0;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int hdr_len;
-		int tag_bytes;
-
-		BUG_ON(skb->protocol != htons(ETH_P_IP) &&
-		       skb->protocol != htons(ETH_P_8021Q));
-
-		hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
-		tag_bytes = hdr_len - ETH_HLEN;
-		if (skb->len - hdr_len > mp->shared->tx_csum_limit ||
-		    unlikely(tag_bytes & ~12)) {
-			if (skb_checksum_help(skb) == 0)
-				goto no_csum;
-			dev_kfree_skb_any(skb);
-			return 1;
-		}
-
-		if (tag_bytes & 4)
-			cmd_sts |= MAC_HDR_EXTRA_4_BYTES;
-		if (tag_bytes & 8)
-			cmd_sts |= MAC_HDR_EXTRA_8_BYTES;
-
-		cmd_sts |= GEN_TCP_UDP_CHECKSUM |
-			   GEN_IP_V4_CHECKSUM   |
-			   ip_hdr(skb)->ihl << TX_IHL_SHIFT;
-
-		switch (ip_hdr(skb)->protocol) {
-		case IPPROTO_UDP:
-			cmd_sts |= UDP_FRAME;
-			l4i_chk = ntohs(sum16_as_be(udp_hdr(skb)->check));
-			break;
-		case IPPROTO_TCP:
-			l4i_chk = ntohs(sum16_as_be(tcp_hdr(skb)->check));
-			break;
-		default:
-			BUG();
-		}
-	} else {
-no_csum:
-		/* Errata BTS #50, IHL must be 5 if no HW checksum */
-		cmd_sts |= 5 << TX_IHL_SHIFT;
+	if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) {
+		if (net_ratelimit())
+			netdev_err(dev, "tx queue full?!\n");
+		return -EBUSY;
 	}
 
+	ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_sts, skb->len);
+	if (ret)
+		return ret;
+	cmd_sts |= TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
+
 	tx_index = txq->tx_curr_desc++;
 	if (txq->tx_curr_desc == txq->tx_ring_size)
 		txq->tx_curr_desc = 0;
@@ -801,7 +973,7 @@
 static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
-	int length, queue;
+	int length, queue, ret;
 	struct tx_queue *txq;
 	struct netdev_queue *nq;
 
@@ -810,30 +982,26 @@
 	nq = netdev_get_tx_queue(dev, queue);
 
 	if (has_tiny_unaligned_frags(skb) && __skb_linearize(skb)) {
-		txq->tx_dropped++;
 		netdev_printk(KERN_DEBUG, dev,
 			      "failed to linearize skb with tiny unaligned fragment\n");
 		return NETDEV_TX_BUSY;
 	}
 
-	if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) {
-		if (net_ratelimit())
-			netdev_err(dev, "tx queue full?!\n");
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
 	length = skb->len;
 
-	if (!txq_submit_skb(txq, skb)) {
-		int entries_left;
-
+	if (skb_is_gso(skb))
+		ret = txq_submit_tso(txq, skb, dev);
+	else
+		ret = txq_submit_skb(txq, skb, dev);
+	if (!ret) {
 		txq->tx_bytes += length;
 		txq->tx_packets++;
 
-		entries_left = txq->tx_ring_size - txq->tx_desc_count;
-		if (entries_left < MAX_SKB_FRAGS + 1)
+		if (txq->tx_desc_count >= txq->tx_stop_threshold)
 			netif_tx_stop_queue(nq);
+	} else {
+		txq->tx_dropped++;
+		dev_kfree_skb_any(skb);
 	}
 
 	return NETDEV_TX_OK;
@@ -907,14 +1075,9 @@
 			mp->dev->stats.tx_errors++;
 		}
 
-		if (cmd_sts & TX_FIRST_DESC) {
+		if (!IS_TSO_HEADER(txq, desc->buf_ptr))
 			dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
 					 desc->byte_cnt, DMA_TO_DEVICE);
-		} else {
-			dma_unmap_page(mp->dev->dev.parent, desc->buf_ptr,
-				       desc->byte_cnt, DMA_TO_DEVICE);
-		}
-
 		dev_kfree_skb(skb);
 	}
 
@@ -1010,8 +1173,9 @@
 
 
 /* mii management interface *************************************************/
-static void mv643xx_adjust_pscr(struct mv643xx_eth_private *mp)
+static void mv643xx_eth_adjust_link(struct net_device *dev)
 {
+	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	u32 pscr = rdlp(mp, PORT_SERIAL_CONTROL);
 	u32 autoneg_disable = FORCE_LINK_PASS |
 	             DISABLE_AUTO_NEG_SPEED_GMII |
@@ -1387,7 +1551,7 @@
 
 	ret = phy_ethtool_sset(mp->phy, cmd);
 	if (!ret)
-		mv643xx_adjust_pscr(mp);
+		mv643xx_eth_adjust_link(dev);
 	return ret;
 }
 
@@ -1456,7 +1620,11 @@
 		return -EINVAL;
 
 	mp->rx_ring_size = er->rx_pending < 4096 ? er->rx_pending : 4096;
-	mp->tx_ring_size = er->tx_pending < 4096 ? er->tx_pending : 4096;
+	mp->tx_ring_size = clamp_t(unsigned int, er->tx_pending,
+				   MV643XX_MAX_SKB_DESCS * 2, 4096);
+	if (mp->tx_ring_size != er->tx_pending)
+		netdev_warn(dev, "TX queue size set to %u (requested %u)\n",
+			    mp->tx_ring_size, er->tx_pending);
 
 	if (netif_running(dev)) {
 		mv643xx_eth_stop(dev);
@@ -1832,6 +2000,13 @@
 
 	txq->tx_ring_size = mp->tx_ring_size;
 
+	/* A queue must always have room for at least one skb.
+	 * Therefore, stop the queue when the free entries reaches
+	 * the maximum number of descriptors per skb.
+	 */
+	txq->tx_stop_threshold = txq->tx_ring_size - MV643XX_MAX_SKB_DESCS;
+	txq->tx_wake_threshold = txq->tx_stop_threshold / 2;
+
 	txq->tx_desc_count = 0;
 	txq->tx_curr_desc = 0;
 	txq->tx_used_desc = 0;
@@ -1871,6 +2046,15 @@
 					nexti * sizeof(struct tx_desc);
 	}
 
+	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+	txq->tso_hdrs = dma_alloc_coherent(mp->dev->dev.parent,
+					   txq->tx_ring_size * TSO_HEADER_SIZE,
+					   &txq->tso_hdrs_dma, GFP_KERNEL);
+	if (txq->tso_hdrs == NULL) {
+		dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
+				  txq->tx_desc_area, txq->tx_desc_dma);
+		return -ENOMEM;
+	}
 	skb_queue_head_init(&txq->tx_skb);
 
 	return 0;
@@ -1891,6 +2075,10 @@
 	else
 		dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
 				  txq->tx_desc_area, txq->tx_desc_dma);
+	if (txq->tso_hdrs)
+		dma_free_coherent(mp->dev->dev.parent,
+				  txq->tx_ring_size * TSO_HEADER_SIZE,
+				  txq->tso_hdrs, txq->tso_hdrs_dma);
 }
 
 
@@ -2303,7 +2491,7 @@
 
 	ret = phy_mii_ioctl(mp->phy, ifr, cmd);
 	if (!ret)
-		mv643xx_adjust_pscr(mp);
+		mv643xx_eth_adjust_link(dev);
 	return ret;
 }
 
@@ -2678,6 +2866,7 @@
 		       struct mv643xx_eth_platform_data *pd)
 {
 	struct net_device *dev = mp->dev;
+	unsigned int tx_ring_size;
 
 	if (is_valid_ether_addr(pd->mac_addr))
 		memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN);
@@ -2692,22 +2881,22 @@
 
 	mp->rxq_count = pd->rx_queue_count ? : 1;
 
-	mp->tx_ring_size = DEFAULT_TX_QUEUE_SIZE;
+	tx_ring_size = DEFAULT_TX_QUEUE_SIZE;
 	if (pd->tx_queue_size)
-		mp->tx_ring_size = pd->tx_queue_size;
+		tx_ring_size = pd->tx_queue_size;
+
+	mp->tx_ring_size = clamp_t(unsigned int, tx_ring_size,
+				   MV643XX_MAX_SKB_DESCS * 2, 4096);
+	if (mp->tx_ring_size != tx_ring_size)
+		netdev_warn(dev, "TX queue size set to %u (requested %u)\n",
+			    mp->tx_ring_size, tx_ring_size);
+
 	mp->tx_desc_sram_addr = pd->tx_sram_addr;
 	mp->tx_desc_sram_size = pd->tx_sram_size;
 
 	mp->txq_count = pd->tx_queue_count ? : 1;
 }
 
-static void mv643xx_eth_adjust_link(struct net_device *dev)
-{
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
-
-	mv643xx_adjust_pscr(mp);
-}
-
 static struct phy_device *phy_scan(struct mv643xx_eth_private *mp,
 				   int phy_addr)
 {
@@ -2889,7 +3078,7 @@
 	if (err)
 		goto out;
 
-	SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops);
+	dev->ethtool_ops = &mv643xx_eth_ethtool_ops;
 
 	init_pscr(mp, pd->speed, pd->duplex);
 
@@ -2921,11 +3110,14 @@
 	dev->watchdog_timeo = 2 * HZ;
 	dev->base_addr = 0;
 
-	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	dev->vlan_features = dev->features;
+
+	dev->features |= NETIF_F_RXCSUM;
+	dev->hw_features = dev->features;
 
 	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->gso_max_segs = MV643XX_MAX_TSO_SEGS;
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 

diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 9d5ced2..fc2fb25 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c

@@ -195,11 +195,10 @@
 		return -ENODEV;
 	}
 
-	bus = mdiobus_alloc_size(sizeof(struct orion_mdio_dev));
-	if (!bus) {
-		dev_err(&pdev->dev, "Cannot allocate MDIO bus\n");
+	bus = devm_mdiobus_alloc_size(&pdev->dev,
+				      sizeof(struct orion_mdio_dev));
+	if (!bus)
 		return -ENOMEM;
-	}
 
 	bus->name = "orion_mdio_bus";
 	bus->read = orion_mdio_read;
@@ -208,11 +207,10 @@
 		 dev_name(&pdev->dev));
 	bus->parent = &pdev->dev;
 
-	bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
-	if (!bus->irq) {
-		mdiobus_free(bus);
+	bus->irq = devm_kmalloc_array(&pdev->dev, PHY_MAX_ADDR, sizeof(int),
+				      GFP_KERNEL);
+	if (!bus->irq)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < PHY_MAX_ADDR; i++)
 		bus->irq[i] = PHY_POLL;
@@ -264,8 +262,6 @@
 out_mdio:
 	if (!IS_ERR(dev->clk))
 		clk_disable_unprepare(dev->clk);
-	kfree(bus->irq);
-	mdiobus_free(bus);
 	return ret;
 }
 
@@ -276,8 +272,6 @@
 
 	writel(0, dev->regs + MVMDIO_ERR_INT_MASK);
 	mdiobus_unregister(bus);
-	kfree(bus->irq);
-	mdiobus_free(bus);
 	if (!IS_ERR(dev->clk))
 		clk_disable_unprepare(dev->clk);
 

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 14786c8..45beca1 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c

@@ -23,6 +23,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <linux/io.h>
+#include <net/tso.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
@@ -218,9 +219,6 @@
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
-/* Napi polling weight */
-#define MVNETA_RX_POLL_WEIGHT		64
-
 /* The two bytes Marvell header. Either contains a special value used
  * by Marvell switches when a specific hardware mode is enabled (not
  * supported by this driver) or is filled automatically by zeroes on
@@ -244,12 +242,20 @@
 
 #define MVNETA_TX_MTU_MAX		0x3ffff
 
+/* TSO header size */
+#define TSO_HEADER_SIZE 128
+
 /* Max number of Rx descriptors */
 #define MVNETA_MAX_RXD 128
 
 /* Max number of Tx descriptors */
 #define MVNETA_MAX_TXD 532
 
+/* Max number of allowed TCP segments for software TSO */
+#define MVNETA_MAX_TSO_SEGS 100
+
+#define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE	32
 
@@ -258,6 +264,10 @@
 	      ETH_HLEN + ETH_FCS_LEN,			     \
 	      MVNETA_CPU_D_CACHE_LINE_SIZE)
 
+#define IS_TSO_HEADER(txq, addr) \
+	((addr >= txq->tso_hdrs_phys) && \
+	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
+
 #define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
 
 struct mvneta_pcpu_stats {
@@ -279,9 +289,6 @@
 	u32 cause_rx_tx;
 	struct napi_struct napi;
 
-	/* Napi weight */
-	int weight;
-
 	/* Core clock */
 	struct clk *clk;
 	u8 mcast_count[256];
@@ -390,6 +397,8 @@
 	 * descriptor ring
 	 */
 	int count;
+	int tx_stop_threshold;
+	int tx_wake_threshold;
 
 	/* Array of transmitted skb */
 	struct sk_buff **tx_skb;
@@ -413,6 +422,12 @@
 
 	/* Index of the next TX DMA descriptor to process */
 	int next_desc_to_proc;
+
+	/* DMA buffers for TSO headers */
+	char *tso_hdrs;
+
+	/* DMA address of TSO headers */
+	dma_addr_t tso_hdrs_phys;
 };
 
 struct mvneta_rx_queue {
@@ -441,7 +456,10 @@
 	int next_desc_to_proc;
 };
 
-static int rxq_number = 8;
+/* The hardware supports eight (8) rx queues, but we are only allowing
+ * the first one to be used. Therefore, let's just allocate one queue.
+ */
+static int rxq_number = 1;
 static int txq_number = 8;
 
 static int rxq_def;
@@ -1277,11 +1295,12 @@
 
 		mvneta_txq_inc_get(txq);
 
+		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
+			dma_unmap_single(pp->dev->dev.parent,
+					 tx_desc->buf_phys_addr,
+					 tx_desc->data_size, DMA_TO_DEVICE);
 		if (!skb)
 			continue;
-
-		dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr,
-				 tx_desc->data_size, DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -1302,7 +1321,7 @@
 	txq->count -= tx_done;
 
 	if (netif_tx_queue_stopped(nq)) {
-		if (txq->size - txq->count >= MAX_SKB_FRAGS + 1)
+		if (txq->count <= txq->tx_wake_threshold)
 			netif_tx_wake_queue(nq);
 	}
 }
@@ -1519,14 +1538,134 @@
 	return rx_done;
 }
 
+static inline void
+mvneta_tso_put_hdr(struct sk_buff *skb,
+		   struct mvneta_port *pp, struct mvneta_tx_queue *txq)
+{
+	struct mvneta_tx_desc *tx_desc;
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	txq->tx_skb[txq->txq_put_index] = NULL;
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = hdr_len;
+	tx_desc->command = mvneta_skb_tx_csum(pp, skb);
+	tx_desc->command |= MVNETA_TXD_F_DESC;
+	tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
+				 txq->txq_put_index * TSO_HEADER_SIZE;
+	mvneta_txq_inc_put(txq);
+}
+
+static inline int
+mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
+		    struct sk_buff *skb, char *data, int size,
+		    bool last_tcp, bool is_last)
+{
+	struct mvneta_tx_desc *tx_desc;
+
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = size;
+	tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data,
+						size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent,
+		     tx_desc->buf_phys_addr))) {
+		mvneta_txq_desc_put(txq);
+		return -ENOMEM;
+	}
+
+	tx_desc->command = 0;
+	txq->tx_skb[txq->txq_put_index] = NULL;
+
+	if (last_tcp) {
+		/* last descriptor in the TCP packet */
+		tx_desc->command = MVNETA_TXD_L_DESC;
+
+		/* last descriptor in SKB */
+		if (is_last)
+			txq->tx_skb[txq->txq_put_index] = skb;
+	}
+	mvneta_txq_inc_put(txq);
+	return 0;
+}
+
+static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
+			 struct mvneta_tx_queue *txq)
+{
+	int total_len, data_left;
+	int desc_count = 0;
+	struct mvneta_port *pp = netdev_priv(dev);
+	struct tso_t tso;
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int i;
+
+	/* Count needed descriptors */
+	if ((txq->count + tso_count_descs(skb)) >= txq->size)
+		return 0;
+
+	if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
+		pr_info("*** Is this even  possible???!?!?\n");
+		return 0;
+	}
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	tso_start(skb, &tso);
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		char *hdr;
+
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+		desc_count++;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
+		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+
+		mvneta_tso_put_hdr(skb, pp, txq);
+
+		while (data_left > 0) {
+			int size;
+			desc_count++;
+
+			size = min_t(int, tso.size, data_left);
+
+			if (mvneta_tso_put_data(dev, txq, skb,
+						 tso.data, size,
+						 size == data_left,
+						 total_len == 0))
+				goto err_release;
+			data_left -= size;
+
+			tso_build_data(skb, &tso, size);
+		}
+	}
+
+	return desc_count;
+
+err_release:
+	/* Release all used data descriptors; header descriptors must not
+	 * be DMA-unmapped.
+	 */
+	for (i = desc_count - 1; i >= 0; i--) {
+		struct mvneta_tx_desc *tx_desc = txq->descs + i;
+		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
+			dma_unmap_single(pp->dev->dev.parent,
+					 tx_desc->buf_phys_addr,
+					 tx_desc->data_size,
+					 DMA_TO_DEVICE);
+		mvneta_txq_desc_put(txq);
+	}
+	return 0;
+}
+
 /* Handle tx fragmentation processing */
 static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 				  struct mvneta_tx_queue *txq)
 {
 	struct mvneta_tx_desc *tx_desc;
-	int i;
+	int i, nr_frags = skb_shinfo(skb)->nr_frags;
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+	for (i = 0; i < nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		void *addr = page_address(frag->page.p) + frag->page_offset;
 
@@ -1543,20 +1682,16 @@
 			goto error;
 		}
 
-		if (i == (skb_shinfo(skb)->nr_frags - 1)) {
+		if (i == nr_frags - 1) {
 			/* Last descriptor */
 			tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
-
 			txq->tx_skb[txq->txq_put_index] = skb;
-
-			mvneta_txq_inc_put(txq);
 		} else {
 			/* Descriptor in the middle: Not First, Not Last */
 			tx_desc->command = 0;
-
 			txq->tx_skb[txq->txq_put_index] = NULL;
-			mvneta_txq_inc_put(txq);
 		}
+		mvneta_txq_inc_put(txq);
 	}
 
 	return 0;
@@ -1584,15 +1719,18 @@
 	u16 txq_id = skb_get_queue_mapping(skb);
 	struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
 	struct mvneta_tx_desc *tx_desc;
-	struct netdev_queue *nq;
 	int frags = 0;
 	u32 tx_cmd;
 
 	if (!netif_running(dev))
 		goto out;
 
+	if (skb_is_gso(skb)) {
+		frags = mvneta_tx_tso(skb, dev, txq);
+		goto out;
+	}
+
 	frags = skb_shinfo(skb)->nr_frags + 1;
-	nq    = netdev_get_tx_queue(dev, txq_id);
 
 	/* Get a descriptor for the first part of the packet */
 	tx_desc = mvneta_txq_next_desc_get(txq);
@@ -1635,15 +1773,16 @@
 		}
 	}
 
-	txq->count += frags;
-	mvneta_txq_pend_desc_add(pp, txq, frags);
-
-	if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
-		netif_tx_stop_queue(nq);
-
 out:
 	if (frags > 0) {
 		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+		txq->count += frags;
+		mvneta_txq_pend_desc_add(pp, txq, frags);
+
+		if (txq->count >= txq->tx_stop_threshold)
+			netif_tx_stop_queue(nq);
 
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
@@ -2003,7 +2142,7 @@
 {
 	int queue;
 
-	/* free the skb's in the hal tx ring */
+	/* free the skb's in the tx ring */
 	for (queue = 0; queue < txq_number; queue++)
 		mvneta_txq_done_force(pp, &pp->txqs[queue]);
 
@@ -2081,6 +2220,14 @@
 {
 	txq->size = pp->tx_ring_size;
 
+	/* A queue must always have room for at least one skb.
+	 * Therefore, stop the queue when the free entries reaches
+	 * the maximum number of descriptors per skb.
+	 */
+	txq->tx_stop_threshold = txq->size - MVNETA_MAX_SKB_DESCS;
+	txq->tx_wake_threshold = txq->tx_stop_threshold / 2;
+
+
 	/* Allocate memory for TX descriptors */
 	txq->descs = dma_alloc_coherent(pp->dev->dev.parent,
 					txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2109,6 +2256,18 @@
 				  txq->descs, txq->descs_phys);
 		return -ENOMEM;
 	}
+
+	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+	txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
+					   txq->size * TSO_HEADER_SIZE,
+					   &txq->tso_hdrs_phys, GFP_KERNEL);
+	if (txq->tso_hdrs == NULL) {
+		kfree(txq->tx_skb);
+		dma_free_coherent(pp->dev->dev.parent,
+				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
+				  txq->descs, txq->descs_phys);
+		return -ENOMEM;
+	}
 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
 
 	return 0;
@@ -2120,6 +2279,10 @@
 {
 	kfree(txq->tx_skb);
 
+	if (txq->tso_hdrs)
+		dma_free_coherent(pp->dev->dev.parent,
+				  txq->size * TSO_HEADER_SIZE,
+				  txq->tso_hdrs, txq->tso_hdrs_phys);
 	if (txq->descs)
 		dma_free_coherent(pp->dev->dev.parent,
 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2279,24 +2442,28 @@
 		return 0;
 
 	/* The interface is running, so we have to force a
-	 * reallocation of the RXQs
+	 * reallocation of the queues
 	 */
 	mvneta_stop_dev(pp);
 
 	mvneta_cleanup_txqs(pp);
 	mvneta_cleanup_rxqs(pp);
 
-	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
+	pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
 	                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	ret = mvneta_setup_rxqs(pp);
 	if (ret) {
-		netdev_err(pp->dev, "unable to setup rxqs after MTU change\n");
+		netdev_err(dev, "unable to setup rxqs after MTU change\n");
 		return ret;
 	}
 
-	mvneta_setup_txqs(pp);
+	ret = mvneta_setup_txqs(pp);
+	if (ret) {
+		netdev_err(dev, "unable to setup txqs after MTU change\n");
+		return ret;
+	}
 
 	mvneta_start_dev(pp);
 	mvneta_port_up(pp);
@@ -2323,22 +2490,19 @@
 static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	u8 *mac = addr + 2;
-	int i;
+	struct sockaddr *sockaddr = addr;
+	int ret;
 
-	if (netif_running(dev))
-		return -EBUSY;
-
+	ret = eth_prepare_mac_addr_change(dev, addr);
+	if (ret < 0)
+		return ret;
 	/* Remove previous address table entry */
 	mvneta_mac_addr_set(pp, dev->dev_addr, -1);
 
 	/* Set new addr in hw */
-	mvneta_mac_addr_set(pp, mac, rxq_def);
+	mvneta_mac_addr_set(pp, sockaddr->sa_data, rxq_def);
 
-	/* Set addr in the device */
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = mac[i];
-
+	eth_commit_mac_addr_change(dev, addr);
 	return 0;
 }
 
@@ -2433,8 +2597,6 @@
 	struct mvneta_port *pp = netdev_priv(dev);
 	int ret;
 
-	mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
-
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
 	                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -2600,8 +2762,12 @@
 		return -EINVAL;
 	pp->rx_ring_size = ring->rx_pending < MVNETA_MAX_RXD ?
 		ring->rx_pending : MVNETA_MAX_RXD;
-	pp->tx_ring_size = ring->tx_pending < MVNETA_MAX_TXD ?
-		ring->tx_pending : MVNETA_MAX_TXD;
+
+	pp->tx_ring_size = clamp_t(u16, ring->tx_pending,
+				   MVNETA_MAX_SKB_DESCS * 2, MVNETA_MAX_TXD);
+	if (pp->tx_ring_size != ring->tx_pending)
+		netdev_warn(dev, "TX queue size set to %u (requested %u)\n",
+			    pp->tx_ring_size, ring->tx_pending);
 
 	if (netif_running(dev)) {
 		mvneta_stop(dev);
@@ -2638,7 +2804,7 @@
 };
 
 /* Initialize hw */
-static int mvneta_init(struct mvneta_port *pp, int phy_addr)
+static int mvneta_init(struct device *dev, struct mvneta_port *pp)
 {
 	int queue;
 
@@ -2648,8 +2814,8 @@
 	/* Set port default values */
 	mvneta_defaults_set(pp);
 
-	pp->txqs = kzalloc(txq_number * sizeof(struct mvneta_tx_queue),
-			   GFP_KERNEL);
+	pp->txqs = devm_kcalloc(dev, txq_number, sizeof(struct mvneta_tx_queue),
+				GFP_KERNEL);
 	if (!pp->txqs)
 		return -ENOMEM;
 
@@ -2661,12 +2827,10 @@
 		txq->done_pkts_coal = MVNETA_TXDONE_COAL_PKTS;
 	}
 
-	pp->rxqs = kzalloc(rxq_number * sizeof(struct mvneta_rx_queue),
-			   GFP_KERNEL);
-	if (!pp->rxqs) {
-		kfree(pp->txqs);
+	pp->rxqs = devm_kcalloc(dev, rxq_number, sizeof(struct mvneta_rx_queue),
+				GFP_KERNEL);
+	if (!pp->rxqs)
 		return -ENOMEM;
-	}
 
 	/* Create Rx descriptor rings */
 	for (queue = 0; queue < rxq_number; queue++) {
@@ -2680,12 +2844,6 @@
 	return 0;
 }
 
-static void mvneta_deinit(struct mvneta_port *pp)
-{
-	kfree(pp->txqs);
-	kfree(pp->rxqs);
-}
-
 /* platform glue : initialize decoding windows */
 static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 				     const struct mbus_dram_target_info *dram)
@@ -2768,7 +2926,6 @@
 	struct resource *res;
 	struct device_node *dn = pdev->dev.of_node;
 	struct device_node *phy_node;
-	u32 phy_addr;
 	struct mvneta_port *pp;
 	struct net_device *dev;
 	const char *dt_mac_addr;
@@ -2797,9 +2954,22 @@
 
 	phy_node = of_parse_phandle(dn, "phy", 0);
 	if (!phy_node) {
-		dev_err(&pdev->dev, "no associated PHY\n");
-		err = -ENODEV;
-		goto err_free_irq;
+		if (!of_phy_is_fixed_link(dn)) {
+			dev_err(&pdev->dev, "no PHY specified\n");
+			err = -ENODEV;
+			goto err_free_irq;
+		}
+
+		err = of_phy_register_fixed_link(dn);
+		if (err < 0) {
+			dev_err(&pdev->dev, "cannot register fixed PHY\n");
+			goto err_free_irq;
+		}
+
+		/* In the case of a fixed PHY, the DT node associated
+		 * to the PHY is the Ethernet MAC DT node.
+		 */
+		phy_node = dn;
 	}
 
 	phy_mode = of_get_phy_mode(dn);
@@ -2813,11 +2983,9 @@
 	dev->watchdog_timeo = 5 * HZ;
 	dev->netdev_ops = &mvneta_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops);
+	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
 	pp = netdev_priv(dev);
-
-	pp->weight = MVNETA_RX_POLL_WEIGHT;
 	pp->phy_node = phy_node;
 	pp->phy_interface = phy_mode;
 
@@ -2864,33 +3032,32 @@
 	pp->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	err = mvneta_init(pp, phy_addr);
-	if (err < 0) {
-		dev_err(&pdev->dev, "can't init eth hal\n");
+	err = mvneta_init(&pdev->dev, pp);
+	if (err < 0)
 		goto err_free_stats;
-	}
 
 	err = mvneta_port_power_up(pp, phy_mode);
 	if (err < 0) {
 		dev_err(&pdev->dev, "can't power up port\n");
-		goto err_deinit;
+		goto err_free_stats;
 	}
 
 	dram_target_info = mv_mbus_dram_info();
 	if (dram_target_info)
 		mvneta_conf_mbus_windows(pp, dram_target_info);
 
-	netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
+	netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT);
 
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM;
-	dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
-	dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	dev->hw_features |= dev->features;
+	dev->vlan_features |= dev->features;
 	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->gso_max_segs = MVNETA_MAX_TSO_SEGS;
 
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register\n");
-		goto err_deinit;
+		goto err_free_stats;
 	}
 
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from,
@@ -2900,8 +3067,6 @@
 
 	return 0;
 
-err_deinit:
-	mvneta_deinit(pp);
 err_free_stats:
 	free_percpu(pp->stats);
 err_clk:
@@ -2920,7 +3085,6 @@
 	struct mvneta_port *pp = netdev_priv(dev);
 
 	unregister_netdev(dev);
-	mvneta_deinit(pp);
 	clk_disable_unprepare(pp->clk);
 	free_percpu(pp->stats);
 	irq_dispose_mapping(dev->irq);

diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index b358c2f..8f5aa7c 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c

@@ -1488,7 +1488,7 @@
 	dev->netdev_ops = &pxa168_eth_netdev_ops;
 	dev->watchdog_timeo = 2 * HZ;
 	dev->base_addr = 0;
-	SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops);
+	dev->ethtool_ops = &pxa168_ethtool_ops;
 
 	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
 

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index b811064..6969338 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c

@@ -4760,7 +4760,7 @@
 
 	SET_NETDEV_DEV(dev, &hw->pdev->dev);
 	dev->irq = hw->pdev->irq;
-	SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops);
+	dev->ethtool_ops = &sky2_ethtool_ops;
 	dev->watchdog_timeo = TX_WATCHDOG;
 	dev->netdev_ops = &sky2_netdev_ops[port];
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index c3ad464..b0297da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c

@@ -171,7 +171,7 @@
  */
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf)
+		   struct mlx4_buf *buf, gfp_t gfp)
 {
 	dma_addr_t t;
 
@@ -180,7 +180,7 @@
 		buf->npages       = 1;
 		buf->page_shift   = get_order(size) + PAGE_SHIFT;
 		buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
-						       size, &t, GFP_KERNEL);
+						       size, &t, gfp);
 		if (!buf->direct.buf)
 			return -ENOMEM;
 
@@ -200,14 +200,14 @@
 		buf->npages      = buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					   GFP_KERNEL);
+					   gfp);
 		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
 				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-						   &t, GFP_KERNEL);
+						   &t, gfp);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
@@ -218,7 +218,7 @@
 
 		if (BITS_PER_LONG == 64) {
 			struct page **pages;
-			pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+			pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
 			if (!pages)
 				goto err_free;
 			for (i = 0; i < buf->nbufs; ++i)
@@ -260,11 +260,12 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
+						 gfp_t gfp)
 {
 	struct mlx4_db_pgdir *pgdir;
 
-	pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+	pgdir = kzalloc(sizeof *pgdir, gfp);
 	if (!pgdir)
 		return NULL;
 
@@ -272,7 +273,7 @@
 	pgdir->bits[0] = pgdir->order0;
 	pgdir->bits[1] = pgdir->order1;
 	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-					    &pgdir->db_dma, GFP_KERNEL);
+					    &pgdir->db_dma, gfp);
 	if (!pgdir->db_page) {
 		kfree(pgdir);
 		return NULL;
@@ -312,7 +313,7 @@
 	return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_db_pgdir *pgdir;
@@ -324,7 +325,7 @@
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev));
+	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -376,13 +377,13 @@
 {
 	int err;
 
-	err = mlx4_db_alloc(dev, &wqres->db, 1);
+	err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
 	if (err)
 		return err;
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf);
+	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_db;
 
@@ -391,7 +392,7 @@
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 92d3249..5d940a2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c

@@ -212,8 +212,7 @@
 
 	/* First, verify that the master reports correct status */
 	if (comm_pending(dev)) {
-		mlx4_warn(dev, "Communication channel is not idle."
-			  "my toggle is %d (cmd:0x%x)\n",
+		mlx4_warn(dev, "Communication channel is not idle - my toggle is %d (cmd:0x%x)\n",
 			  priv->cmd.comm_toggle, cmd);
 		return -EAGAIN;
 	}
@@ -422,9 +421,8 @@
 					*out_param =
 						be64_to_cpu(vhcr->out_param);
 				else {
-					mlx4_err(dev, "response expected while"
-						 "output mailbox is NULL for "
-						 "command 0x%x\n", op);
+					mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+						 op);
 					vhcr->status = CMD_STAT_BAD_PARAM;
 				}
 			}
@@ -439,16 +437,15 @@
 					*out_param =
 						be64_to_cpu(vhcr->out_param);
 				else {
-					mlx4_err(dev, "response expected while"
-						 "output mailbox is NULL for "
-						 "command 0x%x\n", op);
+					mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+						 op);
 					vhcr->status = CMD_STAT_BAD_PARAM;
 				}
 			}
 			ret = mlx4_status_to_errno(vhcr->status);
 		} else
-			mlx4_err(dev, "failed execution of VHCR_POST command"
-				 "opcode 0x%x\n", op);
+			mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n",
+				 op);
 	}
 
 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
@@ -476,6 +473,13 @@
 		goto out;
 	}
 
+	if (out_is_imm && !out_param) {
+		mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+			 op);
+		err = -EINVAL;
+		goto out;
+	}
+
 	err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
 			    in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
 	if (err)
@@ -554,6 +558,13 @@
 	cmd->free_head = context->next;
 	spin_unlock(&cmd->context_lock);
 
+	if (out_is_imm && !out_param) {
+		mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+			 op);
+		err = -EINVAL;
+		goto out;
+	}
+
 	init_completion(&context->done);
 
 	mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
@@ -625,9 +636,8 @@
 
 	if ((slave_addr & 0xfff) | (master_addr & 0xfff) |
 	    (slave & ~0x7f) | (size & 0xff)) {
-		mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx "
-			      "master_addr:0x%llx slave_id:%d size:%d\n",
-			      slave_addr, master_addr, slave, size);
+		mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx master_addr:0x%llx slave_id:%d size:%d\n",
+			 slave_addr, master_addr, slave, size);
 		return -EINVAL;
 	}
 
@@ -705,20 +715,28 @@
 	struct ib_smp *smp = inbox->buf;
 	u32 index;
 	u8 port;
+	u8 opcode_modifier;
 	u16 *table;
 	int err;
 	int vidx, pidx;
+	int network_view;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct ib_smp *outsmp = outbox->buf;
 	__be16 *outtab = (__be16 *)(outsmp->data);
 	__be32 slave_cap_mask;
 	__be64 slave_node_guid;
+
 	port = vhcr->in_modifier;
 
+	/* network-view bit is for driver use only, and should not be passed to FW */
+	opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+	network_view = !!(vhcr->op_modifier & 0x8);
+
 	if (smp->base_version == 1 &&
 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
 	    smp->class_version == 1) {
-		if (smp->method	== IB_MGMT_METHOD_GET) {
+		/* host view is paravirtualized */
+		if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
 			if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
 				index = be32_to_cpu(smp->attr_mod);
 				if (port < 1 || port > dev->caps.num_ports)
@@ -743,7 +761,7 @@
 				/*get the slave specific caps:*/
 				/*do the command */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					    vhcr->in_modifier, vhcr->op_modifier,
+					    vhcr->in_modifier, opcode_modifier,
 					    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				/* modify the response for slaves */
 				if (!err && slave != mlx4_master_func_num(dev)) {
@@ -760,7 +778,7 @@
 				smp->attr_mod = cpu_to_be32(slave / 8);
 				/* execute cmd */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					/* if needed, move slave gid to index 0 */
@@ -774,7 +792,7 @@
 			}
 			if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					slave_node_guid =  mlx4_get_slave_node_guid(dev, slave);
@@ -784,19 +802,24 @@
 			}
 		}
 	}
+
+	/* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+	 * These are the MADs used by ib verbs (such as ib_query_gids).
+	 */
 	if (slave != mlx4_master_func_num(dev) &&
-	    ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
-	     (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
-	      smp->method == IB_MGMT_METHOD_SET))) {
-		mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
-			 "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
-			 slave, smp->method, smp->mgmt_class,
-			 be16_to_cpu(smp->attr_id));
-		return -EPERM;
+	    !mlx4_vf_smi_enabled(dev, slave, port)) {
+		if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+		      smp->method == IB_MGMT_METHOD_GET) || network_view) {
+			mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+				 slave, smp->method, smp->mgmt_class,
+				 network_view ? "Network" : "Host",
+				 be16_to_cpu(smp->attr_id));
+			return -EPERM;
+		}
 	}
-	/*default:*/
+
 	return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-				    vhcr->in_modifier, vhcr->op_modifier,
+				    vhcr->in_modifier, opcode_modifier,
 				    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 }
 
@@ -1409,8 +1432,8 @@
 				      ALIGN(sizeof(struct mlx4_vhcr_cmd),
 					    MLX4_ACCESS_MEM_ALIGN), 1);
 		if (ret) {
-			mlx4_err(dev, "%s:Failed reading vhcr"
-				 "ret: 0x%x\n", __func__, ret);
+			mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
+				 __func__, ret);
 			kfree(vhcr);
 			return ret;
 		}
@@ -1461,9 +1484,8 @@
 
 	/* Apply permission and bound checks if applicable */
 	if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) {
-		mlx4_warn(dev, "Command:0x%x from slave: %d failed protection "
-			  "checks for resource_id:%d\n", vhcr->op, slave,
-			  vhcr->in_modifier);
+		mlx4_warn(dev, "Command:0x%x from slave: %d failed protection checks for resource_id:%d\n",
+			  vhcr->op, slave, vhcr->in_modifier);
 		vhcr_cmd->status = CMD_STAT_BAD_OP;
 		goto out_status;
 	}
@@ -1502,8 +1524,7 @@
 	}
 
 	if (err) {
-		mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with"
-			  " error:%d, status %d\n",
+		mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
 			  vhcr->op, slave, vhcr->errno, err);
 		vhcr_cmd->status = mlx4_errno_to_status(err);
 		goto out_status;
@@ -1537,8 +1558,8 @@
 				 __func__);
 		else if (vhcr->e_bit &&
 			 mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe))
-				mlx4_warn(dev, "Failed to generate command completion "
-					  "eqe for slave %d\n", slave);
+				mlx4_warn(dev, "Failed to generate command completion eqe for slave %d\n",
+					  slave);
 	}
 
 out:
@@ -1577,8 +1598,9 @@
 
 	mlx4_dbg(dev, "updating immediately admin params slave %d port %d\n",
 		 slave, port);
-	mlx4_dbg(dev, "vlan %d QoS %d link down %d\n", vp_admin->default_vlan,
-		 vp_admin->default_qos, vp_admin->link_state);
+	mlx4_dbg(dev, "vlan %d QoS %d link down %d\n",
+		 vp_admin->default_vlan, vp_admin->default_qos,
+		 vp_admin->link_state);
 
 	work = kzalloc(sizeof(*work), GFP_KERNEL);
 	if (!work)
@@ -1591,7 +1613,7 @@
 						   &admin_vlan_ix);
 			if (err) {
 				kfree(work);
-				mlx4_warn((&priv->dev),
+				mlx4_warn(&priv->dev,
 					  "No vlan resources slave %d, port %d\n",
 					  slave, port);
 				return err;
@@ -1600,7 +1622,7 @@
 			admin_vlan_ix = NO_INDX;
 		}
 		work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN;
-		mlx4_dbg((&(priv->dev)),
+		mlx4_dbg(&priv->dev,
 			 "alloc vlan %d idx  %d slave %d port %d\n",
 			 (int)(vp_admin->default_vlan),
 			 admin_vlan_ix, slave, port);
@@ -1653,6 +1675,8 @@
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			priv->mfunc.master.vf_admin[slave].enable_smi[port];
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 		vp_oper->state = *vp_admin;
@@ -1661,12 +1685,12 @@
 						   vp_admin->default_vlan, &(vp_oper->vlan_idx));
 			if (err) {
 				vp_oper->vlan_idx = NO_INDX;
-				mlx4_warn((&priv->dev),
+				mlx4_warn(&priv->dev,
 					  "No vlan resorces slave %d, port %d\n",
 					  slave, port);
 				return err;
 			}
-			mlx4_dbg((&(priv->dev)), "alloc vlan %d idx  %d slave %d port %d\n",
+			mlx4_dbg(&priv->dev, "alloc vlan %d idx  %d slave %d port %d\n",
 				 (int)(vp_oper->state.default_vlan),
 				 vp_oper->vlan_idx, slave, port);
 		}
@@ -1677,12 +1701,12 @@
 			if (0 > vp_oper->mac_idx) {
 				err = vp_oper->mac_idx;
 				vp_oper->mac_idx = NO_INDX;
-				mlx4_warn((&priv->dev),
+				mlx4_warn(&priv->dev,
 					  "No mac resorces slave %d, port %d\n",
 					  slave, port);
 				return err;
 			}
-			mlx4_dbg((&(priv->dev)), "alloc mac %llx idx  %d slave %d port %d\n",
+			mlx4_dbg(&priv->dev, "alloc mac %llx idx  %d slave %d port %d\n",
 				 vp_oper->state.mac, vp_oper->mac_idx, slave, port);
 		}
 	}
@@ -1704,6 +1728,8 @@
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			MLX4_VF_SMI_DISABLED;
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		if (NO_INDX != vp_oper->vlan_idx) {
 			__mlx4_unregister_vlan(&priv->dev,
@@ -1731,8 +1757,8 @@
 	slave_state[slave].comm_toggle ^= 1;
 	reply = (u32) slave_state[slave].comm_toggle << 31;
 	if (toggle != slave_state[slave].comm_toggle) {
-		mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER"
-			  "STATE COMPROMISIED ***\n", toggle, slave);
+		mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER STATE COMPROMISED ***\n",
+			  toggle, slave);
 		goto reset_slave;
 	}
 	if (cmd == MLX4_COMM_CMD_RESET) {
@@ -1759,8 +1785,8 @@
 	/*command from slave in the middle of FLR*/
 	if (cmd != MLX4_COMM_CMD_RESET &&
 	    MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) {
-		mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) "
-			  "in the middle of FLR\n", slave, cmd);
+		mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) in the middle of FLR\n",
+			  slave, cmd);
 		return;
 	}
 
@@ -1798,8 +1824,8 @@
 
 		mutex_lock(&priv->cmd.slave_cmd_mutex);
 		if (mlx4_master_process_vhcr(dev, slave, NULL)) {
-			mlx4_err(dev, "Failed processing vhcr for slave:%d,"
-				 " resetting slave.\n", slave);
+			mlx4_err(dev, "Failed processing vhcr for slave:%d, resetting slave\n",
+				 slave);
 			mutex_unlock(&priv->cmd.slave_cmd_mutex);
 			goto reset_slave;
 		}
@@ -1816,8 +1842,7 @@
 		is_going_down = 1;
 	spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
 	if (is_going_down) {
-		mlx4_warn(dev, "Slave is going down aborting command(%d)"
-			  " executing from slave:%d\n",
+		mlx4_warn(dev, "Slave is going down aborting command(%d) executing from slave:%d\n",
 			  cmd, slave);
 		return;
 	}
@@ -1880,10 +1905,9 @@
 			if (toggle != slt) {
 				if (master->slave_state[slave].comm_toggle
 				    != slt) {
-					printk(KERN_INFO "slave %d out of sync."
-					       " read toggle %d, state toggle %d. "
-					       "Resynching.\n", slave, slt,
-					       master->slave_state[slave].comm_toggle);
+					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
+						slave, slt,
+						master->slave_state[slave].comm_toggle);
 					master->slave_state[slave].comm_toggle =
 						slt;
 				}
@@ -1896,8 +1920,7 @@
 	}
 
 	if (reported && reported != served)
-		mlx4_warn(dev, "Got command event with bitmask from %d slaves"
-			  " but %d were served\n",
+		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
 			  reported, served);
 
 	if (mlx4_ARM_COMM_CHANNEL(dev))
@@ -1953,7 +1976,7 @@
 		ioremap(pci_resource_start(dev->pdev, 2) +
 			MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
 	if (!priv->mfunc.comm) {
-		mlx4_err(dev, "Couldn't map communication vector.\n");
+		mlx4_err(dev, "Couldn't map communication vector\n");
 		goto err_vhcr;
 	}
 
@@ -2080,7 +2103,7 @@
 		priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
 					MLX4_HCR_BASE, MLX4_HCR_SIZE);
 		if (!priv->cmd.hcr) {
-			mlx4_err(dev, "Couldn't map command register.\n");
+			mlx4_err(dev, "Couldn't map command register\n");
 			return -ENOMEM;
 		}
 	}
@@ -2481,11 +2504,12 @@
 	ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff);
 	ivf->mac[5] = ((s_info->mac)  & 0xff);
 
-	ivf->vlan	= s_info->default_vlan;
-	ivf->qos	= s_info->default_qos;
-	ivf->tx_rate	= s_info->tx_rate;
-	ivf->spoofchk	= s_info->spoofchk;
-	ivf->linkstate	= s_info->link_state;
+	ivf->vlan		= s_info->default_vlan;
+	ivf->qos		= s_info->default_qos;
+	ivf->max_tx_rate	= s_info->tx_rate;
+	ivf->min_tx_rate	= 0;
+	ivf->spoofchk		= s_info->spoofchk;
+	ivf->linkstate		= s_info->link_state;
 
 	return 0;
 }
@@ -2537,3 +2561,50 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+		MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 1;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+		MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enabled)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 0;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS ||
+	    enabled < 0 || enabled > 1)
+		return -EINVAL;
+
+	priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 0487121..80f7252 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c

@@ -173,11 +173,11 @@
 	if (*cqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &cq_table->table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;
@@ -293,6 +293,9 @@
 	atomic_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 
+	cq->irq = priv->eq_table.eq[cq->vector].irq;
+	cq->irq_affinity_change = false;
+
 	return 0;
 
 err_radix:

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index c2cd8d3..4b21307 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c

@@ -125,8 +125,7 @@
 						   &cq->vector)) {
 					cq->vector = (cq->ring + 1 + priv->port)
 					    % mdev->dev->caps.num_comp_vectors;
-					mlx4_warn(mdev, "Failed Assigning an EQ to "
-						  "%s ,Falling back to legacy EQ's\n",
+					mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
 						  name);
 				}
 			}
@@ -164,6 +163,13 @@
 		netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
 			       NAPI_POLL_WEIGHT);
 	} else {
+		struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
+
+		err = irq_set_affinity_hint(cq->mcq.irq,
+					    ring->affinity_mask);
+		if (err)
+			mlx4_warn(mdev, "Failed setting affinity hint\n");
+
 		netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
 		napi_hash_add(&cq->napi);
 	}
@@ -180,8 +186,11 @@
 
 	mlx4_en_unmap_buffer(&cq->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
-	if (priv->mdev->dev->caps.comp_pool && cq->vector)
+	if (priv->mdev->dev->caps.comp_pool && cq->vector) {
+		if (!cq->is_tx)
+			irq_set_affinity_hint(cq->mcq.irq, NULL);
 		mlx4_release_eq(priv->mdev->dev, cq->vector);
+	}
 	cq->vector = 0;
 	cq->buf_size = 0;
 	cq->buf = NULL;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 3e8d336..fa1a069 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c

@@ -378,8 +378,8 @@
 		ethtool_cmd_speed_set(cmd, priv->port_state.link_speed);
 		cmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(cmd, -1);
-		cmd->duplex = -1;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->duplex = DUPLEX_UNKNOWN;
 	}
 
 	if (trans_type > 0 && trans_type <= 0xC) {
@@ -564,7 +564,7 @@
 	return priv->rx_ring_num;
 }
 
-static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index)
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
@@ -582,8 +582,8 @@
 	return err;
 }
 
-static int mlx4_en_set_rxfh_indir(struct net_device *dev,
-		const u32 *ring_index)
+static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
+			    const u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -925,13 +925,13 @@
 		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
 	} else {
 		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
-			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n",
+			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
 				cmd->fs.ring_cookie);
 			return -EINVAL;
 		}
 		qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn;
 		if (!qpn) {
-			en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n",
+			en_warn(priv, "rxnfc: RX ring (%llu) is inactive\n",
 				cmd->fs.ring_cookie);
 			return -EINVAL;
 		}
@@ -956,7 +956,7 @@
 	}
 	err = mlx4_flow_attach(priv->mdev->dev, &rule, &reg_id);
 	if (err) {
-		en_err(priv, "Fail to attach network rule at location %d.\n",
+		en_err(priv, "Fail to attach network rule at location %d\n",
 		       cmd->fs.location);
 		goto out_free_list;
 	}
@@ -1121,7 +1121,7 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int port_up;
+	int port_up = 0;
 	int err = 0;
 
 	if (channel->other_count || channel->combined_count ||
@@ -1151,7 +1151,8 @@
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
-	mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
+	if (dev->num_tc)
+		mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
 
 	en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num);
 	en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
@@ -1223,8 +1224,8 @@
 	.get_rxnfc = mlx4_en_get_rxnfc,
 	.set_rxnfc = mlx4_en_set_rxnfc,
 	.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
-	.get_rxfh_indir = mlx4_en_get_rxfh_indir,
-	.set_rxfh_indir = mlx4_en_set_rxfh_indir,
+	.get_rxfh = mlx4_en_get_rxfh,
+	.set_rxfh = mlx4_en_set_rxfh,
 	.get_channels = mlx4_en_get_channels,
 	.set_channels = mlx4_en_set_channels,
 	.get_ts_info = mlx4_en_get_ts_info,

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 0c59d4f..f953c1d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c

@@ -133,7 +133,7 @@
 			MLX4_EN_MAX_TX_RING_P_UP);
 	if (params->udp_rss && !(mdev->dev->caps.flags
 					& MLX4_DEV_CAP_FLAG_UDP_RSS)) {
-		mlx4_warn(mdev, "UDP RSS is not supported on this device.\n");
+		mlx4_warn(mdev, "UDP RSS is not supported on this device\n");
 		params->udp_rss = 0;
 	}
 	for (i = 1; i <= MLX4_MAX_PORTS; i++) {
@@ -251,8 +251,7 @@
 
 	mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
 	if (!mdev->LSO_support)
-		mlx4_warn(mdev, "LSO not supported, please upgrade to later "
-				"FW version to enable LSO\n");
+		mlx4_warn(mdev, "LSO not supported, please upgrade to later FW version to enable LSO\n");
 
 	if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull,
 			 MLX4_PERM_LOCAL_WRITE |  MLX4_PERM_LOCAL_READ,
@@ -268,7 +267,7 @@
 	/* Build device profile according to supplied module parameters */
 	err = mlx4_en_get_profile(mdev);
 	if (err) {
-		mlx4_err(mdev, "Bad module parameters, aborting.\n");
+		mlx4_err(mdev, "Bad module parameters, aborting\n");
 		goto err_mr;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7e4b172..7d4fb7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -130,7 +130,7 @@
 	case IPPROTO_TCP:
 		return MLX4_NET_TRANS_RULE_ID_TCP;
 	default:
-		return -EPROTONOSUPPORT;
+		return MLX4_NET_TRANS_RULE_NUM;
 	}
 };
 
@@ -177,7 +177,7 @@
 	int rc;
 	__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
 
-	if (spec_tcp_udp.id < 0) {
+	if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) {
 		en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n",
 			filter->ip_proto);
 		goto ignore;
@@ -770,11 +770,12 @@
 					  priv->dev->dev_addr, priv->prev_mac);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
-		memcpy(priv->prev_mac, priv->dev->dev_addr,
-		       sizeof(priv->prev_mac));
 	} else
 		en_dbg(HW, priv, "Port is down while registering mac, exiting...\n");
 
+	memcpy(priv->prev_mac, priv->dev->dev_addr,
+	       sizeof(priv->prev_mac));
+
 	return err;
 }
 
@@ -788,9 +789,8 @@
 	if (!is_valid_ether_addr(saddr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-
 	mutex_lock(&mdev->state_lock);
+	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
 	err = mlx4_en_do_set_mac(priv);
 	mutex_unlock(&mdev->state_lock);
 
@@ -1526,6 +1526,27 @@
 	mutex_unlock(&mdev->state_lock);
 }
 
+static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
+	int numa_node = priv->mdev->dev->numa_node;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpumask_set_cpu_local_first(ring_idx, numa_node,
+					  ring->affinity_mask);
+	if (ret)
+		free_cpumask_var(ring->affinity_mask);
+
+	return ret;
+}
+
+static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
+}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1567,17 +1588,25 @@
 
 		mlx4_en_cq_init_lock(cq);
 
+		err = mlx4_en_init_affinity_hint(priv, i);
+		if (err) {
+			en_err(priv, "Failed preparing IRQ affinity hint\n");
+			goto cq_err;
+		}
+
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
+			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
 		for (j = 0; j < cq->size; j++)
 			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
 		err = mlx4_en_set_cq_moder(priv, cq);
 		if (err) {
-			en_err(priv, "Failed setting cq moderation parameters");
+			en_err(priv, "Failed setting cq moderation parameters\n");
 			mlx4_en_deactivate_cq(priv, cq);
+			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
 		mlx4_en_arm_cq(priv, cq);
@@ -1615,7 +1644,7 @@
 		}
 		err = mlx4_en_set_cq_moder(priv, cq);
 		if (err) {
-			en_err(priv, "Failed setting cq moderation parameters");
+			en_err(priv, "Failed setting cq moderation parameters\n");
 			mlx4_en_deactivate_cq(priv, cq);
 			goto tx_err;
 		}
@@ -1715,8 +1744,10 @@
 mac_err:
 	mlx4_en_put_qp(priv);
 cq_err:
-	while (rx_index--)
+	while (rx_index--) {
 		mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]);
+		mlx4_en_free_affinity_hint(priv, i);
+	}
 	for (i = 0; i < priv->rx_ring_num; i++)
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 
@@ -1847,6 +1878,8 @@
 			msleep(1);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
+
+		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
@@ -2539,7 +2572,7 @@
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
-	SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops);
+	dev->ethtool_ops = &mlx4_en_ethtool_ops;
 
 	/*
 	 * Set driver features
@@ -2594,8 +2627,8 @@
 				    prof->tx_pause, prof->tx_ppp,
 				    prof->rx_pause, prof->rx_ppp);
 	if (err) {
-		en_err(priv, "Failed setting port general configurations "
-		       "for port %d, with error %d\n", priv->port, err);
+		en_err(priv, "Failed setting port general configurations for port %d, with error %d\n",
+		       priv->port, err);
 		goto out;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ba049ae..d2d4157 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c

@@ -270,13 +270,11 @@
 						    ring->actual_size,
 						    GFP_KERNEL)) {
 				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
-					en_err(priv, "Failed to allocate "
-						     "enough rx buffers\n");
+					en_err(priv, "Failed to allocate enough rx buffers\n");
 					return -ENOMEM;
 				} else {
 					new_size = rounddown_pow_of_two(ring->actual_size);
-					en_warn(priv, "Only %d buffers allocated "
-						      "reducing ring size to %d",
+					en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n",
 						ring->actual_size, new_size);
 					goto reduce_rings;
 				}
@@ -685,10 +683,9 @@
 		/* Drop packet on bad receive or bad checksum */
 		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 						MLX4_CQE_OPCODE_ERROR)) {
-			en_err(priv, "CQE completed in error - vendor "
-				  "syndrom:%d syndrom:%d\n",
-				  ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
-				  ((struct mlx4_err_cqe *) cqe)->syndrome);
+			en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
+			       ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
+			       ((struct mlx4_err_cqe *)cqe)->syndrome);
 			goto next;
 		}
 		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
@@ -898,10 +895,17 @@
 	mlx4_en_cq_unlock_napi(cq);
 
 	/* If we used up all the quota - we're probably not done yet... */
-	if (done == budget)
+	if (done == budget) {
 		INC_PERF_COUNTER(priv->pstats.napi_quota);
-	else {
+		if (unlikely(cq->mcq.irq_affinity_change)) {
+			cq->mcq.irq_affinity_change = false;
+			napi_complete(napi);
+			mlx4_en_arm_cq(priv, cq);
+			return 0;
+		}
+	} else {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 	}
@@ -944,8 +948,8 @@
 	priv->rx_skb_size = eff_mtu;
 	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc));
 
-	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
-		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
+	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n",
+	       eff_mtu, priv->num_frags);
 	for (i = 0; i < priv->num_frags; i++) {
 		en_err(priv,
 		       "  frag:%d - size:%d prefix:%d align:%d stride:%d\n",
@@ -972,7 +976,7 @@
 	if (!context)
 		return -ENOMEM;
 
-	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
 		goto out;
@@ -1012,7 +1016,7 @@
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
 	}
-	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating drop qp\n");
 		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1071,7 +1075,7 @@
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
 		goto rss_err;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index dd1f6d3..8be7483 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -108,12 +108,12 @@
 
 	ring->buf = ring->wqres.buf.direct.buf;
 
-	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d "
-	       "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
-	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
+	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
+	       ring, ring->buf, ring->size, ring->buf_size,
+	       (unsigned long long) ring->wqres.buf.direct.map);
 
 	ring->qpn = qpn;
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_map;
@@ -122,7 +122,7 @@
 
 	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 	if (err) {
-		en_dbg(DRV, priv, "working without blueflame (%d)", err);
+		en_dbg(DRV, priv, "working without blueflame (%d)\n", err);
 		ring->bf.uar = &mdev->priv_uar;
 		ring->bf.uar->map = mdev->uar_map;
 		ring->bf_enabled = false;
@@ -474,9 +474,15 @@
 	/* If we used up all the quota - we're probably not done yet... */
 	if (done < budget) {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 		return done;
+	} else if (unlikely(cq->mcq.irq_affinity_change)) {
+		cq->mcq.irq_affinity_change = false;
+		napi_complete(napi);
+		mlx4_en_arm_cq(priv, cq);
+		return 0;
 	}
 	return budget;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index d501a2b..d954ec1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c

@@ -53,6 +53,11 @@
 	MLX4_EQ_ENTRY_SIZE	= 0x20
 };
 
+struct mlx4_irq_notify {
+	void *arg;
+	struct irq_affinity_notify notify;
+};
+
 #define MLX4_EQ_STATUS_OK	   ( 0 << 28)
 #define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MLX4_EQ_OWNER_SW	   ( 0 << 24)
@@ -152,14 +157,13 @@
 				if (i != dev->caps.function &&
 				    master->slave_state[i].active)
 					if (mlx4_GEN_EQE(dev, i, eqe))
-						mlx4_warn(dev, "Failed to "
-							  " generate event "
-							  "for slave %d\n", i);
+						mlx4_warn(dev, "Failed to generate event for slave %d\n",
+							  i);
 			}
 		} else {
 			if (mlx4_GEN_EQE(dev, slave, eqe))
-				mlx4_warn(dev, "Failed to generate event "
-					       "for slave %d\n", slave);
+				mlx4_warn(dev, "Failed to generate event for slave %d\n",
+					  slave);
 		}
 		++slave_eq->cons;
 	}
@@ -177,8 +181,8 @@
 	s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
 	if ((!!(s_eqe->owner & 0x80)) ^
 	    (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
-		mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
-			  "No free EQE on slave events queue\n", slave);
+		mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. No free EQE on slave events queue\n",
+			  slave);
 		spin_unlock_irqrestore(&slave_eq->event_lock, flags);
 		return;
 	}
@@ -375,9 +379,9 @@
 		}
 		break;
 	default:
-		pr_err("%s: BUG!!! UNKNOWN state: "
-		       "slave:%d, port:%d\n", __func__, slave, port);
-			goto out;
+		pr_err("%s: BUG!!! UNKNOWN state: slave:%d, port:%d\n",
+		       __func__, slave, port);
+		goto out;
 	}
 	ret = mlx4_get_slave_port_state(dev, slave, port);
 
@@ -425,8 +429,8 @@
 	for (i = 0 ; i < dev->num_slaves; i++) {
 
 		if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
-			mlx4_dbg(dev, "mlx4_handle_slave_flr: "
-				 "clean slave: %d\n", i);
+			mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
+				 i);
 
 			mlx4_delete_all_resources_for_slave(dev, i);
 			/*return the slave to running mode*/
@@ -438,8 +442,8 @@
 			err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
 				       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 			if (err)
-				mlx4_warn(dev, "Failed to notify FW on "
-					  "FLR done (slave:%d)\n", i);
+				mlx4_warn(dev, "Failed to notify FW on FLR done (slave:%d)\n",
+					  i);
 		}
 	}
 }
@@ -490,9 +494,7 @@
 						be32_to_cpu(eqe->event.qp.qpn)
 						& 0xffffff, &slave);
 				if (ret && ret != -ENOENT) {
-					mlx4_dbg(dev, "QP event %02x(%02x) on "
-						 "EQ %d at index %u: could "
-						 "not get slave id (%d)\n",
+					mlx4_dbg(dev, "QP event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
 						 eqe->type, eqe->subtype,
 						 eq->eqn, eq->cons_index, ret);
 					break;
@@ -520,23 +522,19 @@
 						& 0xffffff,
 						&slave);
 				if (ret && ret != -ENOENT) {
-					mlx4_warn(dev, "SRQ event %02x(%02x) "
-						  "on EQ %d at index %u: could"
-						  " not get slave id (%d)\n",
+					mlx4_warn(dev, "SRQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
 						  eqe->type, eqe->subtype,
 						  eq->eqn, eq->cons_index, ret);
 					break;
 				}
-				mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x,"
-					  " event: %02x(%02x)\n", __func__,
-					  slave,
+				mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
+					  __func__, slave,
 					  be32_to_cpu(eqe->event.srq.srqn),
 					  eqe->type, eqe->subtype);
 
 				if (!ret && slave != dev->caps.function) {
-					mlx4_warn(dev, "%s: sending event "
-						  "%02x(%02x) to slave:%d\n",
-						   __func__, eqe->type,
+					mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n",
+						  __func__, eqe->type,
 						  eqe->subtype, slave);
 					mlx4_slave_event(dev, slave, eqe);
 					break;
@@ -569,8 +567,7 @@
 					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
 						if (i == mlx4_master_func_num(dev))
 							continue;
-						mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
-							 " to slave: %d, port:%d\n",
+						mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n",
 							 __func__, i, port);
 						s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
 						if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) {
@@ -634,11 +631,9 @@
 					be32_to_cpu(eqe->event.cq_err.cqn)
 					& 0xffffff, &slave);
 				if (ret && ret != -ENOENT) {
-					mlx4_dbg(dev, "CQ event %02x(%02x) on "
-						 "EQ %d at index %u: could "
-						  "not get slave id (%d)\n",
-						  eqe->type, eqe->subtype,
-						  eq->eqn, eq->cons_index, ret);
+					mlx4_dbg(dev, "CQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
+						 eqe->type, eqe->subtype,
+						 eq->eqn, eq->cons_index, ret);
 					break;
 				}
 
@@ -667,8 +662,7 @@
 
 		case MLX4_EVENT_TYPE_COMM_CHANNEL:
 			if (!mlx4_is_master(dev)) {
-				mlx4_warn(dev, "Received comm channel event "
-					       "for non master device\n");
+				mlx4_warn(dev, "Received comm channel event for non master device\n");
 				break;
 			}
 			memcpy(&priv->mfunc.master.comm_arm_bit_vector,
@@ -681,8 +675,7 @@
 		case MLX4_EVENT_TYPE_FLR_EVENT:
 			flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
 			if (!mlx4_is_master(dev)) {
-				mlx4_warn(dev, "Non-master function received"
-					       "FLR event\n");
+				mlx4_warn(dev, "Non-master function received FLR event\n");
 				break;
 			}
 
@@ -711,22 +704,17 @@
 			if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
 				if (mlx4_is_master(dev))
 					for (i = 0; i < dev->num_slaves; i++) {
-						mlx4_dbg(dev, "%s: Sending "
-							"MLX4_FATAL_WARNING_SUBTYPE_WARMING"
-							" to slave: %d\n", __func__, i);
+						mlx4_dbg(dev, "%s: Sending MLX4_FATAL_WARNING_SUBTYPE_WARMING to slave: %d\n",
+							 __func__, i);
 						if (i == dev->caps.function)
 							continue;
 						mlx4_slave_event(dev, i, eqe);
 					}
-				mlx4_err(dev, "Temperature Threshold was reached! "
-					"Threshold: %d celsius degrees; "
-					"Current Temperature: %d\n",
-					be16_to_cpu(eqe->event.warming.warning_threshold),
-					be16_to_cpu(eqe->event.warming.current_temperature));
+				mlx4_err(dev, "Temperature Threshold was reached! Threshold: %d celsius degrees; Current Temperature: %d\n",
+					 be16_to_cpu(eqe->event.warming.warning_threshold),
+					 be16_to_cpu(eqe->event.warming.current_temperature));
 			} else
-				mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
-					  "subtype %02x on EQ %d at index %u. owner=%x, "
-					  "nent=0x%x, slave=%x, ownership=%s\n",
+				mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), subtype %02x on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n",
 					  eqe->type, eqe->subtype, eq->eqn,
 					  eq->cons_index, eqe->owner, eq->nent,
 					  eqe->slave_id,
@@ -743,9 +731,7 @@
 		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
 		case MLX4_EVENT_TYPE_ECC_DETECT:
 		default:
-			mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at "
-				  "index %u. owner=%x, nent=0x%x, slave=%x, "
-				  "ownership=%s\n",
+			mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n",
 				  eqe->type, eqe->subtype, eq->eqn,
 				  eq->cons_index, eqe->owner, eq->nent,
 				  eqe->slave_id,
@@ -1088,7 +1074,7 @@
 	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
 				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
 	if (!priv->clr_base) {
-		mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
+		mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n");
 		return -ENOMEM;
 	}
 
@@ -1102,6 +1088,57 @@
 	iounmap(priv->clr_base);
 }
 
+static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct mlx4_irq_notify *n = container_of(notify,
+						 struct mlx4_irq_notify,
+						 notify);
+	struct mlx4_priv *priv = (struct mlx4_priv *)n->arg;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) {
+		struct mlx4_cq *cq = (struct mlx4_cq *)(*slot);
+
+		if (cq->irq == notify->irq)
+			cq->irq_affinity_change = true;
+	}
+}
+
+static void mlx4_release_irq_notifier(struct kref *ref)
+{
+	struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify,
+						 notify.kref);
+	kfree(n);
+}
+
+static void mlx4_assign_irq_notifier(struct mlx4_priv *priv,
+				     struct mlx4_dev *dev, int irq)
+{
+	struct mlx4_irq_notify *irq_notifier = NULL;
+	int err = 0;
+
+	irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL);
+	if (!irq_notifier) {
+		mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n",
+			  irq);
+		return;
+	}
+
+	irq_notifier->notify.irq = irq;
+	irq_notifier->notify.notify = mlx4_irq_notifier_notify;
+	irq_notifier->notify.release = mlx4_release_irq_notifier;
+	irq_notifier->arg = priv;
+	err = irq_set_affinity_notifier(irq, &irq_notifier->notify);
+	if (err) {
+		kfree(irq_notifier);
+		irq_notifier = NULL;
+		mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq);
+	}
+}
+
+
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1372,6 +1409,9 @@
 				continue;
 				/*we dont want to break here*/
 			}
+			mlx4_assign_irq_notifier(priv, dev,
+						 priv->eq_table.eq[vec].irq);
+
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
 	}
@@ -1398,6 +1438,9 @@
 		  Belonging to a legacy EQ*/
 		mutex_lock(&priv->msix_ctl.pool_lock);
 		if (priv->msix_ctl.pool_bm & 1ULL << i) {
+			irq_set_affinity_notifier(
+				priv->eq_table.eq[vec].irq,
+				NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d16a4d1..688e1ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -178,8 +178,8 @@
 				struct mlx4_cmd_info *cmd)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	u8	field;
-	u32	size;
+	u8	field, port;
+	u32	size, proxy_qp, qkey;
 	int	err = 0;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET		0x0
@@ -209,6 +209,7 @@
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET		0x3
+#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET	0x4
 #define QUERY_FUNC_CAP_FLAGS0_OFFSET		0x8
 #define QUERY_FUNC_CAP_FLAGS1_OFFSET		0xc
 
@@ -221,6 +222,7 @@
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC		0x40
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN	0x80
 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO			0x10
+#define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
 
@@ -234,28 +236,35 @@
 			return -EINVAL;
 
 		vhcr->in_modifier = converted_port;
-		/* Set nic_info bit to mark new fields support */
-		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
-		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
-
 		/* phys-port = logical-port */
 		field = vhcr->in_modifier -
 			find_first_bit(actv_ports.ports, dev->caps.num_ports);
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
 
-		field = vhcr->in_modifier;
+		port = vhcr->in_modifier;
+		proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+
+		/* Set nic_info bit to mark new fields support */
+		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+
+		if (mlx4_vf_smi_enabled(dev, slave, port) &&
+		    !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) {
+			field |= QUERY_FUNC_CAP_VF_ENABLE_QP0;
+			MLX4_PUT(outbox->buf, qkey,
+				 QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		}
+		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
 		/* size is now the QP number */
-		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
 
 		size += 2;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
 
-		size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
-
-		size += 2;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY);
+		proxy_qp += 2;
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY);
 
 		MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
 			 QUERY_FUNC_CAP_PHYS_PORT_ID);
@@ -326,7 +335,7 @@
 	struct mlx4_cmd_mailbox *mailbox;
 	u32			*outbox;
 	u8			field, op_modifier;
-	u32			size;
+	u32			size, qkey;
 	int			err = 0, quotas = 0;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
@@ -414,7 +423,7 @@
 
 	MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
 	if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
-		if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_OFFSET) {
+		if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) {
 			mlx4_err(dev, "VLAN is enforced on this port\n");
 			err = -EPROTONOSUPPORT;
 			goto out;
@@ -428,8 +437,7 @@
 	} else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
 		MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 		if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) {
-			mlx4_err(dev, "phy_wqe_gid is "
-				 "enforced on this ib port\n");
+			mlx4_err(dev, "phy_wqe_gid is enforced on this ib port\n");
 			err = -EPROTONOSUPPORT;
 			goto out;
 		}
@@ -442,6 +450,13 @@
 		goto out;
 	}
 
+	if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
+		MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		func_cap->qp0_qkey = qkey;
+	} else {
+		func_cap->qp0_qkey = 0;
+	}
+
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
 	func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
 
@@ -1054,10 +1069,10 @@
 		 */
 		lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1;
 		if (lg < MLX4_ICM_PAGE_SHIFT) {
-			mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
-				   MLX4_ICM_PAGE_SIZE,
-				   (unsigned long long) mlx4_icm_addr(&iter),
-				   mlx4_icm_size(&iter));
+			mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx)\n",
+				  MLX4_ICM_PAGE_SIZE,
+				  (unsigned long long) mlx4_icm_addr(&iter),
+				  mlx4_icm_size(&iter));
 			err = -EINVAL;
 			goto out;
 		}
@@ -1093,14 +1108,14 @@
 
 	switch (op) {
 	case MLX4_CMD_MAP_FA:
-		mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
+		mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW\n", tc, ts);
 		break;
 	case MLX4_CMD_MAP_ICM_AUX:
-		mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
+		mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux\n", tc, ts);
 		break;
 	case MLX4_CMD_MAP_ICM:
-		mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
-			  tc, ts, (unsigned long long) virt - (ts << 10));
+		mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM\n",
+			 tc, ts, (unsigned long long) virt - (ts << 10));
 		break;
 	}
 
@@ -1186,14 +1201,13 @@
 	MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET);
 	if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV ||
 	    cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) {
-		mlx4_err(dev, "Installed FW has unsupported "
-			 "command interface revision %d.\n",
+		mlx4_err(dev, "Installed FW has unsupported command interface revision %d\n",
 			 cmd_if_rev);
 		mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n",
 			 (int) (dev->caps.fw_ver >> 32),
 			 (int) (dev->caps.fw_ver >> 16) & 0xffff,
 			 (int) dev->caps.fw_ver & 0xffff);
-		mlx4_err(dev, "This driver version supports only revisions %d to %d.\n",
+		mlx4_err(dev, "This driver version supports only revisions %d to %d\n",
 			 MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV);
 		err = -ENODEV;
 		goto out;

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 6811ee0..1fce03e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h

@@ -134,6 +134,7 @@
 	int	max_eq;
 	int	reserved_eq;
 	int	mcg_quota;
+	u32	qp0_qkey;
 	u32	qp0_tunnel_qpn;
 	u32	qp0_proxy_qpn;
 	u32	qp1_tunnel_qpn;

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 5fbf492..97c9b1d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c

@@ -245,7 +245,8 @@
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   gfp_t gfp)
 {
 	u32 i = (obj & (table->num_obj - 1)) /
 			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -259,7 +260,7 @@
 	}
 
 	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+				       (table->lowmem ? gfp : GFP_HIGHUSER) |
 				       __GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
@@ -356,7 +357,7 @@
 	u32 i;
 
 	for (i = start; i <= end; i += inc) {
-		err = mlx4_table_get(dev, table, i);
+		err = mlx4_table_get(dev, table, i, GFP_KERNEL);
 		if (err)
 			goto fail;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index dee67fa..0c73645 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h

@@ -71,7 +71,8 @@
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   gfp_t gfp);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 u32 start, u32 end);

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index c187d74..5f42f6d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c

@@ -104,8 +104,6 @@
 MODULE_PARM_DESC(enable_64b_cqe_eqe,
 		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 
-#define HCA_GLOBAL_CAP_MASK            0
-
 #define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
 
 static char mlx4_version[] =
@@ -134,8 +132,7 @@
 
 static bool use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
-		  "(0/1, default 0)");
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 
 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
@@ -163,8 +160,7 @@
 	for (i = 0; i < dev->caps.num_ports - 1; i++) {
 		if (port_type[i] != port_type[i + 1]) {
 			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
-				mlx4_err(dev, "Only same port types supported "
-					 "on this HCA, aborting.\n");
+				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
 				return -EINVAL;
 			}
 		}
@@ -172,8 +168,8 @@
 
 	for (i = 0; i < dev->caps.num_ports; i++) {
 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
-			mlx4_err(dev, "Requested port type for port %d is not "
-				      "supported on this HCA\n", i + 1);
+			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
+				 i + 1);
 			return -EINVAL;
 		}
 	}
@@ -195,26 +191,23 @@
 
 	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
 	if (err) {
-		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 		return err;
 	}
 
 	if (dev_cap->min_page_sz > PAGE_SIZE) {
-		mlx4_err(dev, "HCA minimum page size of %d bigger than "
-			 "kernel PAGE_SIZE of %ld, aborting.\n",
+		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 			 dev_cap->min_page_sz, PAGE_SIZE);
 		return -ENODEV;
 	}
 	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
-		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
-			 "aborting.\n",
+		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
 			 dev_cap->num_ports, MLX4_MAX_PORTS);
 		return -ENODEV;
 	}
 
 	if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
-		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
-			 "PCI resource 2 size of 0x%llx, aborting.\n",
+		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 			 dev_cap->uar_size,
 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
 		return -ENODEV;
@@ -296,7 +289,6 @@
 
 	dev->caps.log_num_macs  = log_num_mac;
 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
-	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
@@ -347,14 +339,12 @@
 
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
-			mlx4_warn(dev, "Requested number of MACs is too much "
-				  "for port %d, reducing to %d.\n",
+			mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
 				  i, 1 << dev->caps.log_num_macs);
 		}
 		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
 			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
-			mlx4_warn(dev, "Requested number of VLANs is too much "
-				  "for port %d, reducing to %d.\n",
+			mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
 				  i, 1 << dev->caps.log_num_vlans);
 		}
 	}
@@ -366,7 +356,6 @@
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 		(1 << dev->caps.log_num_macs) *
 		(1 << dev->caps.log_num_vlans) *
-		(1 << dev->caps.log_num_prios) *
 		dev->caps.num_ports;
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 
@@ -584,13 +573,14 @@
 	memset(&hca_param, 0, sizeof(hca_param));
 	err = mlx4_QUERY_HCA(dev, &hca_param);
 	if (err) {
-		mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
 		return err;
 	}
 
-	/*fail if the hca has an unknown capability */
-	if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
-	    HCA_GLOBAL_CAP_MASK) {
+	/* fail if the hca has an unknown global capability
+	 * at this time global_caps should be always zeroed
+	 */
+	if (hca_param.global_caps) {
 		mlx4_err(dev, "Unknown hca global capabilities\n");
 		return -ENOSYS;
 	}
@@ -603,19 +593,18 @@
 	dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
 	err = mlx4_dev_cap(dev, &dev_cap);
 	if (err) {
-		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 		return err;
 	}
 
 	err = mlx4_QUERY_FW(dev);
 	if (err)
-		mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
+		mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
 
 	page_size = ~dev->caps.page_size_cap + 1;
 	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
 	if (page_size > PAGE_SIZE) {
-		mlx4_err(dev, "HCA minimum page size of %d bigger than "
-			 "kernel PAGE_SIZE of %ld, aborting.\n",
+		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 			 page_size, PAGE_SIZE);
 		return -ENODEV;
 	}
@@ -633,8 +622,8 @@
 	memset(&func_cap, 0, sizeof(func_cap));
 	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 	if (err) {
-		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
-			  err);
+		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
+			 err);
 		return err;
 	}
 
@@ -661,18 +650,20 @@
 	dev->caps.num_amgms             = 0;
 
 	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
-		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
-			 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
+			 dev->caps.num_ports, MLX4_MAX_PORTS);
 		return -ENODEV;
 	}
 
+	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
 	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 
 	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
+	    !dev->caps.qp0_qkey) {
 		err = -ENOMEM;
 		goto err_mem;
 	}
@@ -680,10 +671,11 @@
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
 		if (err) {
-			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
-				 " port %d, aborting (%d).\n", i, err);
+			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
+				 i, err);
 			goto err_mem;
 		}
+		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
 		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
 		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
 		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
@@ -699,8 +691,7 @@
 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
 				       dev->caps.reserved_uars) >
 				       pci_resource_len(dev->pdev, 2)) {
-		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
-			 "PCI resource 2 size of 0x%llx, aborting.\n",
+		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 			 dev->caps.uar_page_size * dev->caps.num_uars,
 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
 		goto err_mem;
@@ -722,19 +713,23 @@
 	}
 
 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
-	mlx4_warn(dev, "Timestamping is not supported in slave mode.\n");
+	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 
 	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
 
 	return 0;
 
 err_mem:
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
 	kfree(dev->caps.qp1_proxy);
-	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
-		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	dev->caps.qp0_qkey = NULL;
+	dev->caps.qp0_tunnel = NULL;
+	dev->caps.qp0_proxy = NULL;
+	dev->caps.qp1_tunnel = NULL;
+	dev->caps.qp1_proxy = NULL;
 
 	return err;
 }
@@ -784,8 +779,8 @@
 			dev->caps.port_type[port] = port_types[port - 1];
 			err = mlx4_SET_PORT(dev, port, -1);
 			if (err) {
-				mlx4_err(dev, "Failed to set port %d, "
-					      "aborting\n", port);
+				mlx4_err(dev, "Failed to set port %d, aborting\n",
+					 port);
 				goto out;
 			}
 		}
@@ -868,9 +863,7 @@
 		}
 	}
 	if (err) {
-		mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
-			       "Set only 'eth' or 'ib' for both ports "
-			       "(should be the same)\n");
+		mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
 		goto out;
 	}
 
@@ -975,8 +968,8 @@
 		mlx4_CLOSE_PORT(mdev, port);
 		err = mlx4_SET_PORT(mdev, port, -1);
 		if (err) {
-			mlx4_err(mdev, "Failed to set port %d, "
-				      "aborting\n", port);
+			mlx4_err(mdev, "Failed to set port %d, aborting\n",
+				 port);
 			goto err_set_port;
 		}
 	}
@@ -995,19 +988,19 @@
 	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
 					 GFP_HIGHUSER | __GFP_NOWARN, 0);
 	if (!priv->fw.fw_icm) {
-		mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
+		mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
 		return -ENOMEM;
 	}
 
 	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
 	if (err) {
-		mlx4_err(dev, "MAP_FA command failed, aborting.\n");
+		mlx4_err(dev, "MAP_FA command failed, aborting\n");
 		goto err_free;
 	}
 
 	err = mlx4_RUN_FW(dev);
 	if (err) {
-		mlx4_err(dev, "RUN_FW command failed, aborting.\n");
+		mlx4_err(dev, "RUN_FW command failed, aborting\n");
 		goto err_unmap_fa;
 	}
 
@@ -1091,30 +1084,30 @@
 
 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
 	if (err) {
-		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
+		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
 		return err;
 	}
 
-	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
+	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
 		 (unsigned long long) icm_size >> 10,
 		 (unsigned long long) aux_pages << 2);
 
 	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
 					  GFP_HIGHUSER | __GFP_NOWARN, 0);
 	if (!priv->fw.aux_icm) {
-		mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
+		mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
 		return -ENOMEM;
 	}
 
 	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
 	if (err) {
-		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
+		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
 		goto err_free_aux;
 	}
 
 	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
 	if (err) {
-		mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
 		goto err_unmap_aux;
 	}
 
@@ -1125,7 +1118,7 @@
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
 				  num_eqs, num_eqs, 0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
 		goto err_unmap_cmpt;
 	}
 
@@ -1146,7 +1139,7 @@
 				  dev->caps.num_mtts,
 				  dev->caps.reserved_mtts, 1, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
 		goto err_unmap_eq;
 	}
 
@@ -1156,7 +1149,7 @@
 				  dev->caps.num_mpts,
 				  dev->caps.reserved_mrws, 1, 1);
 	if (err) {
-		mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
 		goto err_unmap_mtt;
 	}
 
@@ -1167,7 +1160,7 @@
 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
 				  0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map QP context memory, aborting\n");
 		goto err_unmap_dmpt;
 	}
 
@@ -1178,7 +1171,7 @@
 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
 				  0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
 		goto err_unmap_qp;
 	}
 
@@ -1189,7 +1182,7 @@
 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
 				  0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
 		goto err_unmap_auxc;
 	}
 
@@ -1210,7 +1203,7 @@
 				  dev->caps.num_cqs,
 				  dev->caps.reserved_cqs, 0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
 		goto err_unmap_rdmarc;
 	}
 
@@ -1220,7 +1213,7 @@
 				  dev->caps.num_srqs,
 				  dev->caps.reserved_srqs, 0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
 		goto err_unmap_cq;
 	}
 
@@ -1238,7 +1231,7 @@
 				  dev->caps.num_mgms + dev->caps.num_amgms,
 				  0, 0);
 	if (err) {
-		mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
+		mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
 		goto err_unmap_srq;
 	}
 
@@ -1315,7 +1308,7 @@
 
 	mutex_lock(&priv->cmd.slave_cmd_mutex);
 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
-		mlx4_warn(dev, "Failed to close slave function.\n");
+		mlx4_warn(dev, "Failed to close slave function\n");
 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
 }
 
@@ -1413,7 +1406,7 @@
 	u32 cmd_channel_ver;
 
 	if (atomic_read(&pf_loading)) {
-		mlx4_warn(dev, "PF is not ready. Deferring probe\n");
+		mlx4_warn(dev, "PF is not ready - Deferring probe\n");
 		return -EPROBE_DEFER;
 	}
 
@@ -1426,8 +1419,7 @@
 	 * NUM_OF_RESET_RETRIES times before leaving.*/
 	if (ret_from_reset) {
 		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
-			mlx4_warn(dev, "slave is currently in the "
-				  "middle of FLR. Deferring probe.\n");
+			mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
 			mutex_unlock(&priv->cmd.slave_cmd_mutex);
 			return -EPROBE_DEFER;
 		} else
@@ -1441,8 +1433,7 @@
 
 	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
 		MLX4_COMM_GET_IF_REV(slave_read)) {
-		mlx4_err(dev, "slave driver version is not supported"
-			 " by the master\n");
+		mlx4_err(dev, "slave driver version is not supported by the master\n");
 		goto err;
 	}
 
@@ -1520,8 +1511,7 @@
 
 			if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
 			    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
-				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
-					  "set to use B0 steering. Falling back to A0 steering mode.\n");
+				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
 		}
 		dev->oper_log_mgm_entry_size =
 			mlx4_log_num_mgm_entry_size > 0 ?
@@ -1529,8 +1519,7 @@
 			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
 	}
-	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
-		 "modparam log_num_mgm_entry_size = %d\n",
+	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
 		 mlx4_steering_mode_str(dev->caps.steering_mode),
 		 dev->oper_log_mgm_entry_size,
 		 mlx4_log_num_mgm_entry_size);
@@ -1564,15 +1553,15 @@
 		err = mlx4_QUERY_FW(dev);
 		if (err) {
 			if (err == -EACCES)
-				mlx4_info(dev, "non-primary physical function, skipping.\n");
+				mlx4_info(dev, "non-primary physical function, skipping\n");
 			else
-				mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+				mlx4_err(dev, "QUERY_FW command failed, aborting\n");
 			return err;
 		}
 
 		err = mlx4_load_fw(dev);
 		if (err) {
-			mlx4_err(dev, "Failed to start FW, aborting.\n");
+			mlx4_err(dev, "Failed to start FW, aborting\n");
 			return err;
 		}
 
@@ -1584,7 +1573,7 @@
 
 		err = mlx4_dev_cap(dev, &dev_cap);
 		if (err) {
-			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 			goto err_stop_fw;
 		}
 
@@ -1625,7 +1614,7 @@
 
 		err = mlx4_INIT_HCA(dev, &init_hca);
 		if (err) {
-			mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
 			goto err_free_icm;
 		}
 		/*
@@ -1636,7 +1625,7 @@
 			memset(&init_hca, 0, sizeof(init_hca));
 			err = mlx4_QUERY_HCA(dev, &init_hca);
 			if (err) {
-				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
+				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 			} else {
 				dev->caps.hca_core_clock =
@@ -1649,14 +1638,14 @@
 			if (!dev->caps.hca_core_clock) {
 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 				mlx4_err(dev,
-					 "HCA frequency is 0. Timestamping is not supported.");
+					 "HCA frequency is 0 - timestamping is not supported\n");
 			} else if (map_internal_clock(dev)) {
 				/*
 				 * Map internal clock,
 				 * in case of failure disable timestamping
 				 */
 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
-				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n");
+				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
 			}
 		}
 	} else {
@@ -1683,7 +1672,7 @@
 
 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
 	if (err) {
-		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
+		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
 		goto unmap_bf;
 	}
 
@@ -1696,6 +1685,14 @@
 	unmap_internal_clock(dev);
 	unmap_bf_area(dev);
 
+	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
+		kfree(dev->caps.qp0_tunnel);
+		kfree(dev->caps.qp0_proxy);
+		kfree(dev->caps.qp1_tunnel);
+		kfree(dev->caps.qp1_proxy);
+	}
+
 err_close:
 	if (mlx4_is_slave(dev))
 		mlx4_slave_exit(dev);
@@ -1793,79 +1790,69 @@
 
 	err = mlx4_init_uar_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "user access region table, aborting.\n");
-		return err;
+		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
+		 return err;
 	}
 
 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
 	if (err) {
-		mlx4_err(dev, "Failed to allocate driver access region, "
-			 "aborting.\n");
+		mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
 		goto err_uar_table_free;
 	}
 
 	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
 	if (!priv->kar) {
-		mlx4_err(dev, "Couldn't map kernel access region, "
-			 "aborting.\n");
+		mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
 		err = -ENOMEM;
 		goto err_uar_free;
 	}
 
 	err = mlx4_init_pd_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "protection domain table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
 		goto err_kar_unmap;
 	}
 
 	err = mlx4_init_xrcd_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "reliable connection domain table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
 		goto err_pd_table_free;
 	}
 
 	err = mlx4_init_mr_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "memory region table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
 		goto err_xrcd_table_free;
 	}
 
 	if (!mlx4_is_slave(dev)) {
 		err = mlx4_init_mcg_table(dev);
 		if (err) {
-			mlx4_err(dev, "Failed to initialize multicast group table, aborting.\n");
+			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
 			goto err_mr_table_free;
 		}
 	}
 
 	err = mlx4_init_eq_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "event queue table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
 		goto err_mcg_table_free;
 	}
 
 	err = mlx4_cmd_use_events(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to switch to event-driven "
-			 "firmware commands, aborting.\n");
+		mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
 		goto err_eq_table_free;
 	}
 
 	err = mlx4_NOP(dev);
 	if (err) {
 		if (dev->flags & MLX4_FLAG_MSI_X) {
-			mlx4_warn(dev, "NOP command failed to generate MSI-X "
-				  "interrupt IRQ %d).\n",
+			mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
 				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
-			mlx4_warn(dev, "Trying again without MSI-X.\n");
+			mlx4_warn(dev, "Trying again without MSI-X\n");
 		} else {
-			mlx4_err(dev, "NOP command failed to generate interrupt "
-				 "(IRQ %d), aborting.\n",
+			mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
 				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
 		}
@@ -1877,28 +1864,25 @@
 
 	err = mlx4_init_cq_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "completion queue table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
 		goto err_cmd_poll;
 	}
 
 	err = mlx4_init_srq_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "shared receive queue table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
 		goto err_cq_table_free;
 	}
 
 	err = mlx4_init_qp_table(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "queue pair table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
 		goto err_srq_table_free;
 	}
 
 	err = mlx4_init_counters_table(dev);
 	if (err && err != -ENOENT) {
-		mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
+		mlx4_err(dev, "Failed to initialize counters table, aborting\n");
 		goto err_qp_table_free;
 	}
 
@@ -1908,9 +1892,8 @@
 			err = mlx4_get_port_ib_caps(dev, port,
 						    &ib_port_default_caps);
 			if (err)
-				mlx4_warn(dev, "failed to get port %d default "
-					  "ib capabilities (%d). Continuing "
-					  "with caps = 0\n", port, err);
+				mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
+					  port, err);
 			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
 
 			/* initialize per-slave default ib port capabilities */
@@ -1920,7 +1903,7 @@
 					if (i == mlx4_master_func_num(dev))
 						continue;
 					priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
-							ib_port_default_caps;
+						ib_port_default_caps;
 				}
 			}
 
@@ -1933,7 +1916,7 @@
 					    dev->caps.pkey_table_len[port] : -1);
 			if (err) {
 				mlx4_err(dev, "Failed to set port %d, aborting\n",
-					port);
+					 port);
 				goto err_counters_table_free;
 			}
 		}
@@ -2009,7 +1992,7 @@
 			kfree(entries);
 			goto no_msi;
 		} else if (nreq < MSIX_LEGACY_SZ +
-				  dev->caps.num_ports * MIN_MSIX_P_PORT) {
+			   dev->caps.num_ports * MIN_MSIX_P_PORT) {
 			/*Working in legacy mode , all EQ's shared*/
 			dev->caps.comp_pool           = 0;
 			dev->caps.num_comp_vectors = nreq - 1;
@@ -2210,8 +2193,7 @@
 
 	err = pci_enable_device(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device, "
-			"aborting.\n");
+		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
 		return err;
 	}
 
@@ -2258,14 +2240,13 @@
 	 */
 	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
 	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing DCS, aborting."
-			"(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
+		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
 			pci_dev_data, pci_resource_flags(pdev, 0));
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
+		dev_err(&pdev->dev, "Missing UAR, aborting\n");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -2280,21 +2261,19 @@
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
 			goto err_release_regions;
 		}
 	}
 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
-			 "consistent PCI DMA mask.\n");
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
-				"aborting.\n");
+			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
 			goto err_release_regions;
 		}
 	}
@@ -2325,7 +2304,7 @@
 		if (total_vfs) {
 			unsigned vfs_offset = 0;
 			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
-			     vfs_offset + nvfs[i] < extended_func_num(pdev);
+				     vfs_offset + nvfs[i] < extended_func_num(pdev);
 			     vfs_offset += nvfs[i], i++)
 				;
 			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
@@ -2351,8 +2330,7 @@
 			if (err < 0)
 				goto err_free_dev;
 			else {
-				mlx4_warn(dev, "Multiple PFs not yet supported."
-					  " Skipping PF.\n");
+				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
 				err = -EINVAL;
 				goto err_free_dev;
 			}
@@ -2362,8 +2340,8 @@
 			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n",
 				  total_vfs);
 			dev->dev_vfs = kzalloc(
-					total_vfs * sizeof(*dev->dev_vfs),
-					GFP_KERNEL);
+				total_vfs * sizeof(*dev->dev_vfs),
+				GFP_KERNEL);
 			if (NULL == dev->dev_vfs) {
 				mlx4_err(dev, "Failed to allocate memory for VFs\n");
 				err = 0;
@@ -2371,14 +2349,14 @@
 				atomic_inc(&pf_loading);
 				err = pci_enable_sriov(pdev, total_vfs);
 				if (err) {
-					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
+					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
 						 err);
 					atomic_dec(&pf_loading);
 					err = 0;
 				} else {
 					mlx4_warn(dev, "Running in master mode\n");
 					dev->flags |= MLX4_FLAG_SRIOV |
-						      MLX4_FLAG_MASTER;
+						MLX4_FLAG_MASTER;
 					dev->num_vfs = total_vfs;
 					sriov_initialized = 1;
 				}
@@ -2395,7 +2373,7 @@
 		 */
 		err = mlx4_reset(dev);
 		if (err) {
-			mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+			mlx4_err(dev, "Failed to reset HCA, aborting\n");
 			goto err_rel_own;
 		}
 	}
@@ -2403,7 +2381,7 @@
 slave_start:
 	err = mlx4_cmd_init(dev);
 	if (err) {
-		mlx4_err(dev, "Failed to init command interface, aborting.\n");
+		mlx4_err(dev, "Failed to init command interface, aborting\n");
 		goto err_sriov;
 	}
 
@@ -2417,8 +2395,7 @@
 			dev->num_slaves = 0;
 			err = mlx4_multi_func_init(dev);
 			if (err) {
-				mlx4_err(dev, "Failed to init slave mfunc"
-					 " interface, aborting.\n");
+				mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
 				goto err_cmd;
 			}
 		}
@@ -2450,8 +2427,7 @@
 		unsigned sum = 0;
 		err = mlx4_multi_func_init(dev);
 		if (err) {
-			mlx4_err(dev, "Failed to init master mfunc"
-				 "interface, aborting.\n");
+			mlx4_err(dev, "Failed to init master mfunc interface, aborting\n");
 			goto err_close;
 		}
 		if (sriov_initialized) {
@@ -2462,10 +2438,7 @@
 			if (ib_ports &&
 			    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
 				mlx4_err(dev,
-					 "Invalid syntax of num_vfs/probe_vfs "
-					 "with IB port. Single port VFs syntax"
-					 " is only supported when all ports "
-					 "are configured as ethernet\n");
+					 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
 				goto err_close;
 			}
 			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) {
@@ -2491,8 +2464,7 @@
 	if ((mlx4_is_mfunc(dev)) &&
 	    !(dev->flags & MLX4_FLAG_MSI_X)) {
 		err = -ENOSYS;
-		mlx4_err(dev, "INTx is not supported in multi-function mode."
-			 " aborting.\n");
+		mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
 		goto err_free_eq;
 	}
 
@@ -2566,6 +2538,14 @@
 	if (mlx4_is_master(dev))
 		mlx4_multi_func_cleanup(dev);
 
+	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
+		kfree(dev->caps.qp0_tunnel);
+		kfree(dev->caps.qp0_proxy);
+		kfree(dev->caps.qp1_tunnel);
+		kfree(dev->caps.qp1_proxy);
+	}
+
 err_close:
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
@@ -2637,7 +2617,7 @@
 	/* in SRIOV it is not allowed to unload the pf's
 	 * driver while there are alive vf's */
 	if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
-		printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
+		pr_warn("Removing PF when there are assigned VF's !!!\n");
 	mlx4_stop_sense(dev);
 	mlx4_unregister_device(dev);
 
@@ -2689,6 +2669,7 @@
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
@@ -2800,7 +2781,7 @@
 	.name		= DRV_NAME,
 	.id_table	= mlx4_pci_table,
 	.probe		= mlx4_init_one,
-	.shutdown	= mlx4_remove_one,
+	.shutdown	= __mlx4_remove_one,
 	.remove		= mlx4_remove_one,
 	.err_handler    = &mlx4_err_handler,
 };
@@ -2808,33 +2789,36 @@
 static int __init mlx4_verify_params(void)
 {
 	if ((log_num_mac < 0) || (log_num_mac > 7)) {
-		pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
+		pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
 		return -1;
 	}
 
 	if (log_num_vlan != 0)
-		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
-			   MLX4_LOG_NUM_VLANS);
+		pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+			MLX4_LOG_NUM_VLANS);
+
+	if (use_prio != 0)
+		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
 
 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
-		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+		pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
+			log_mtts_per_seg);
 		return -1;
 	}
 
 	/* Check if module param for ports type has legal combination */
 	if (port_type_array[0] == false && port_type_array[1] == true) {
-		printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+		pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
 		port_type_array[0] = true;
 	}
 
 	if (mlx4_log_num_mgm_entry_size != -1 &&
 	    (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
 	     mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
-		pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
-			   "in legal range (-1 or %d..%d)\n",
-			   mlx4_log_num_mgm_entry_size,
-			   MLX4_MIN_MGM_LOG_ENTRY_SIZE,
-			   MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-1 or %d..%d)\n",
+			mlx4_log_num_mgm_entry_size,
+			MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+			MLX4_MAX_MGM_LOG_ENTRY_SIZE);
 		return -1;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 80ccb4e..4c36def 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -638,7 +638,7 @@
 
 		if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
 			if (*index != hash) {
-				mlx4_err(dev, "Found zero MGID in AMGM.\n");
+				mlx4_err(dev, "Found zero MGID in AMGM\n");
 				err = -EINVAL;
 			}
 			return err;
@@ -874,7 +874,7 @@
 	mlx4_err(dev, "%s", buf);
 
 	if (len >= BUF_SIZE)
-		mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n");
+		mlx4_err(dev, "Network rule error message was truncated, print buffer is too small\n");
 }
 
 int mlx4_flow_attach(struct mlx4_dev *dev,
@@ -897,7 +897,7 @@
 		ret = parse_trans_rule(dev, cur, mailbox->buf + size);
 		if (ret < 0) {
 			mlx4_free_cmd_mailbox(dev, mailbox);
-			return -EINVAL;
+			return ret;
 		}
 		size += ret;
 	}
@@ -905,10 +905,10 @@
 	ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id);
 	if (ret == -ENOMEM)
 		mlx4_err_rule(dev,
-			      "mcg table is full. Fail to register network rule.\n",
+			      "mcg table is full. Fail to register network rule\n",
 			      rule);
 	else if (ret)
-		mlx4_err_rule(dev, "Fail to register network rule.\n", rule);
+		mlx4_err_rule(dev, "Fail to register network rule\n", rule);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
@@ -994,7 +994,7 @@
 
 	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
 	if (members_count == dev->caps.num_qp_per_mgm) {
-		mlx4_err(dev, "MGM at index %x is full.\n", index);
+		mlx4_err(dev, "MGM at index %x is full\n", index);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -1042,7 +1042,7 @@
 	}
 	if (err && link && index != -1) {
 		if (index < dev->caps.num_mgms)
-			mlx4_warn(dev, "Got AMGM index %d < %d",
+			mlx4_warn(dev, "Got AMGM index %d < %d\n",
 				  index, dev->caps.num_mgms);
 		else
 			mlx4_bitmap_free(&priv->mcg_table.bitmap,
@@ -1133,7 +1133,7 @@
 
 		if (amgm_index) {
 			if (amgm_index < dev->caps.num_mgms)
-				mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d",
+				mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d\n",
 					  index, amgm_index, dev->caps.num_mgms);
 			else
 				mlx4_bitmap_free(&priv->mcg_table.bitmap,
@@ -1153,7 +1153,7 @@
 			goto out;
 
 		if (index < dev->caps.num_mgms)
-			mlx4_warn(dev, "entry %d had next AMGM index %d < %d",
+			mlx4_warn(dev, "entry %d had next AMGM index %d < %d\n",
 				  prev, index, dev->caps.num_mgms);
 		else
 			mlx4_bitmap_free(&priv->mcg_table.bitmap,

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 8e9eb02..1d8af73 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h

@@ -133,6 +133,11 @@
 	MLX4_COMM_CMD_FLR = 254
 };
 
+enum {
+	MLX4_VF_SMI_DISABLED,
+	MLX4_VF_SMI_ENABLED
+};
+
 /*The flag indicates that the slave should delay the RESET cmd*/
 #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
 /*indicates how many retries will be done if we are in the middle of FLR*/
@@ -216,18 +221,19 @@
 #define mlx4_debug_level	(0)
 #endif /* CONFIG_MLX4_DEBUG */
 
-#define mlx4_dbg(mdev, format, arg...)					\
+#define mlx4_dbg(mdev, format, ...)					\
 do {									\
 	if (mlx4_debug_level)						\
-		dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ##arg); \
+		dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format,	\
+			   ##__VA_ARGS__);				\
 } while (0)
 
-#define mlx4_err(mdev, format, arg...) \
-	dev_err(&mdev->pdev->dev, format, ##arg)
-#define mlx4_info(mdev, format, arg...) \
-	dev_info(&mdev->pdev->dev, format, ##arg)
-#define mlx4_warn(mdev, format, arg...) \
-	dev_warn(&mdev->pdev->dev, format, ##arg)
+#define mlx4_err(mdev, format, ...)					\
+	dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+#define mlx4_info(mdev, format, ...)					\
+	dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+#define mlx4_warn(mdev, format, ...)					\
+	dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
 
 extern int mlx4_log_num_mgm_entry_size;
 extern int log_mtts_per_seg;
@@ -488,6 +494,7 @@
 
 struct mlx4_vf_admin_state {
 	struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+	u8 enable_smi[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_vport_oper_state {
@@ -495,8 +502,10 @@
 	int mac_idx;
 	int vlan_idx;
 };
+
 struct mlx4_vf_oper_state {
 	struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+	u8 smi_enabled[MLX4_MAX_PORTS + 1];
 };
 
 struct slave_list {
@@ -895,7 +904,7 @@
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -903,7 +912,7 @@
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 04d9b6fe..0e15295 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

@@ -313,6 +313,7 @@
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	int hwtstamp_rx_filter;
+	cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
@@ -830,26 +831,26 @@
 int en_print(const char *level, const struct mlx4_en_priv *priv,
 	     const char *format, ...);
 
-#define en_dbg(mlevel, priv, format, arg...)			\
-do {								\
-	if (NETIF_MSG_##mlevel & priv->msg_enable)		\
-		en_print(KERN_DEBUG, priv, format, ##arg);	\
+#define en_dbg(mlevel, priv, format, ...)				\
+do {									\
+	if (NETIF_MSG_##mlevel & (priv)->msg_enable)			\
+		en_print(KERN_DEBUG, priv, format, ##__VA_ARGS__);	\
 } while (0)
-#define en_warn(priv, format, arg...)			\
-	en_print(KERN_WARNING, priv, format, ##arg)
-#define en_err(priv, format, arg...)			\
-	en_print(KERN_ERR, priv, format, ##arg)
-#define en_info(priv, format, arg...)			\
-	en_print(KERN_INFO, priv, format, ## arg)
+#define en_warn(priv, format, ...)					\
+	en_print(KERN_WARNING, priv, format, ##__VA_ARGS__)
+#define en_err(priv, format, ...)					\
+	en_print(KERN_ERR, priv, format, ##__VA_ARGS__)
+#define en_info(priv, format, ...)					\
+	en_print(KERN_INFO, priv, format, ##__VA_ARGS__)
 
-#define mlx4_err(mdev, format, arg...)			\
-	pr_err("%s %s: " format, DRV_NAME,		\
-	       dev_name(&mdev->pdev->dev), ##arg)
-#define mlx4_info(mdev, format, arg...)			\
-	pr_info("%s %s: " format, DRV_NAME,		\
-		dev_name(&mdev->pdev->dev), ##arg)
-#define mlx4_warn(mdev, format, arg...)			\
-	pr_warning("%s %s: " format, DRV_NAME,		\
-		   dev_name(&mdev->pdev->dev), ##arg)
+#define mlx4_err(mdev, format, ...)					\
+	pr_err(DRV_NAME " %s: " format,					\
+	       dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
+#define mlx4_info(mdev, format, ...)					\
+	pr_info(DRV_NAME " %s: " format,				\
+		dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
+#define mlx4_warn(mdev, format, ...)					\
+	pr_warn(DRV_NAME " %s: " format,				\
+		dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2483585..2839abb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c

@@ -250,8 +250,8 @@
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_WRAPPED);
 		if (err)
-			mlx4_warn(dev, "Failed to free mtt range at:"
-				  "%d order:%d\n", offset, order);
+			mlx4_warn(dev, "Failed to free mtt range at:%d order:%d\n",
+				  offset, order);
 		return;
 	}
 	 __mlx4_free_mtt_range(dev, offset, order);
@@ -364,14 +364,14 @@
 	__mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	u64 param = 0;
 
@@ -382,7 +382,7 @@
 							MLX4_CMD_TIME_CLASS_A,
 							MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_mpt_alloc_icm(dev, index);
+	return __mlx4_mpt_alloc_icm(dev, index, gfp);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -436,8 +436,8 @@
 				     key_to_hw_index(mr->key) &
 				     (dev->caps.num_mpts - 1));
 		if (err) {
-			mlx4_warn(dev, "HW2SW_MPT failed (%d),", err);
-			mlx4_warn(dev, "MR has MWs bound to it.\n");
+			mlx4_warn(dev, "HW2SW_MPT failed (%d), MR has MWs bound to it\n",
+				  err);
 			return err;
 		}
 
@@ -469,7 +469,7 @@
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
 	if (err)
 		return err;
 
@@ -627,13 +627,14 @@
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf)
+		       struct mlx4_buf *buf, gfp_t gfp)
 {
 	u64 *page_list;
 	int err;
 	int i;
 
-	page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
+	page_list = kmalloc(buf->npages * sizeof *page_list,
+			    gfp);
 	if (!page_list)
 		return -ENOMEM;
 
@@ -680,7 +681,7 @@
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
 	if (err)
 		return err;
 
@@ -773,7 +774,7 @@
 			mlx4_alloc_mtt_range(dev,
 					     fls(dev->caps.reserved_mtts - 1));
 		if (priv->reserved_mtts < 0) {
-			mlx4_warn(dev, "MTT table of order %u is too small.\n",
+			mlx4_warn(dev, "MTT table of order %u is too small\n",
 				  mr_table->mtt_buddy.max_order);
 			err = -ENOMEM;
 			goto err_reserve_mtts;
@@ -954,8 +955,7 @@
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox)) {
 		err = PTR_ERR(mailbox);
-		printk(KERN_WARNING "mlx4_ib: mlx4_alloc_cmd_mailbox"
-		       " failed (%d)\n", err);
+		pr_warn("mlx4_ib: mlx4_alloc_cmd_mailbox failed (%d)\n", err);
 		return;
 	}
 
@@ -964,8 +964,7 @@
 			     (dev->caps.num_mpts - 1));
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	if (err) {
-		printk(KERN_WARNING "mlx4_ib: mlx4_HW2SW_MPT failed (%d)\n",
-		       err);
+		pr_warn("mlx4_ib: mlx4_HW2SW_MPT failed (%d)\n", err);
 		return;
 	}
 	fmr->mr.enabled = MLX4_MPT_EN_SW;

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 5ec6f20..7ab9717 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c

@@ -254,8 +254,8 @@
 	if (validate_index(dev, table, index))
 		goto out;
 	if (--table->refs[index]) {
-		mlx4_dbg(dev, "Have more references for index %d,"
-			 "no need to modify mac table\n", index);
+		mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n",
+			 index);
 		goto out;
 	}
 
@@ -453,9 +453,8 @@
 	}
 
 	if (--table->refs[index]) {
-		mlx4_dbg(dev, "Have %d more references for index %d,"
-			 "no need to modify vlan table\n", table->refs[index],
-			 index);
+		mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n",
+			 table->refs[index], index);
 		goto out;
 	}
 	table->entries[index] = 0;
@@ -796,8 +795,7 @@
 					if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
 						    sizeof(gid_entry_tbl->raw))) {
 						/* found duplicate */
-						mlx4_warn(dev, "requested gid entry for slave:%d "
-							  "is a duplicate of gid at index %d\n",
+						mlx4_warn(dev, "requested gid entry for slave:%d is a duplicate of gid at index %d\n",
 							  slave, i);
 						mutex_unlock(&(priv->port[port].gid_table.mutex));
 						return -EINVAL;

diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 8e0c3cc..14089d9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c

@@ -164,18 +164,17 @@
 		}
 
 		if (total_size > dev_cap->max_icm_sz) {
-			mlx4_err(dev, "Profile requires 0x%llx bytes; "
-				  "won't fit in 0x%llx bytes of context memory.\n",
-				  (unsigned long long) total_size,
-				  (unsigned long long) dev_cap->max_icm_sz);
+			mlx4_err(dev, "Profile requires 0x%llx bytes; won't fit in 0x%llx bytes of context memory\n",
+				 (unsigned long long) total_size,
+				 (unsigned long long) dev_cap->max_icm_sz);
 			kfree(profile);
 			return -ENOMEM;
 		}
 
 		if (profile[i].size)
-			mlx4_dbg(dev, "  profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, "
-				  "size 0x%10llx\n",
-				 i, res_name[profile[i].type], profile[i].log_num,
+			mlx4_dbg(dev, "  profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, size 0x%10llx\n",
+				 i, res_name[profile[i].type],
+				 profile[i].log_num,
 				 (unsigned long long) profile[i].start,
 				 (unsigned long long) profile[i].size);
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index fbd32af..0dc31d8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c

@@ -264,37 +264,37 @@
 			       MLX4_CMD_FREE_RES,
 			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 		if (err) {
-			mlx4_warn(dev, "Failed to release qp range"
-				  " base:%d cnt:%d\n", base_qpn, cnt);
+			mlx4_warn(dev, "Failed to release qp range base:%d cnt:%d\n",
+				  base_qpn, cnt);
 		}
 	} else
 		 __mlx4_qp_release_range(dev, base_qpn, cnt);
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
 	if (err)
 		goto err_put_qp;
 
-	err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
 	if (err)
 		goto err_put_auxc;
 
-	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
 	if (err)
 		goto err_put_altc;
 
-	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
 	if (err)
 		goto err_put_rdmarc;
 
@@ -316,7 +316,7 @@
 	return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
 	u64 param = 0;
 
@@ -326,7 +326,7 @@
 				    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_qp_alloc_icm(dev, qpn);
+	return __mlx4_qp_alloc_icm(dev, qpn, gfp);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -355,7 +355,7 @@
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -366,7 +366,7 @@
 
 	qp->qpn = qpn;
 
-	err = mlx4_qp_alloc_icm(dev, qpn);
+	err = mlx4_qp_alloc_icm(dev, qpn, gfp);
 	if (err)
 		return err;
 
@@ -612,8 +612,7 @@
 		err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
 				     context, 0, 0, qp);
 		if (err) {
-			mlx4_err(dev, "Failed to bring QP to state: "
-				 "%d with error: %d\n",
+			mlx4_err(dev, "Failed to bring QP to state: %d with error: %d\n",
 				 states[i + 1], err);
 			return err;
 		}

diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c
index dd1b509..ea1c6d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/reset.c
+++ b/drivers/net/ethernet/mellanox/mlx4/reset.c

@@ -72,8 +72,7 @@
 	hca_header = kmalloc(256, GFP_KERNEL);
 	if (!hca_header) {
 		err = -ENOMEM;
-		mlx4_err(dev, "Couldn't allocate memory to save HCA "
-			  "PCI header, aborting.\n");
+		mlx4_err(dev, "Couldn't allocate memory to save HCA PCI header, aborting\n");
 		goto out;
 	}
 
@@ -84,8 +83,7 @@
 			continue;
 		if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
 			err = -ENODEV;
-			mlx4_err(dev, "Couldn't save HCA "
-				  "PCI header, aborting.\n");
+			mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n");
 			goto out;
 		}
 	}
@@ -94,7 +92,7 @@
 			MLX4_RESET_SIZE);
 	if (!reset) {
 		err = -ENOMEM;
-		mlx4_err(dev, "Couldn't map HCA reset register, aborting.\n");
+		mlx4_err(dev, "Couldn't map HCA reset register, aborting\n");
 		goto out;
 	}
 
@@ -133,8 +131,7 @@
 
 	if (vendor == 0xffff) {
 		err = -ENODEV;
-		mlx4_err(dev, "PCI device did not come back after reset, "
-			  "aborting.\n");
+		mlx4_err(dev, "PCI device did not come back after reset, aborting\n");
 		goto out;
 	}
 
@@ -144,16 +141,14 @@
 		if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL,
 					       devctl)) {
 			err = -ENODEV;
-			mlx4_err(dev, "Couldn't restore HCA PCI Express "
-				 "Device Control register, aborting.\n");
+			mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n");
 			goto out;
 		}
 		linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
 		if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL,
 					       linkctl)) {
 			err = -ENODEV;
-			mlx4_err(dev, "Couldn't restore HCA PCI Express "
-				 "Link control register, aborting.\n");
+			mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n");
 			goto out;
 		}
 	}
@@ -164,8 +159,8 @@
 
 		if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
 			err = -ENODEV;
-			mlx4_err(dev, "Couldn't restore HCA reg %x, "
-				  "aborting.\n", i);
+			mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n",
+				 i);
 			goto out;
 		}
 	}
@@ -173,8 +168,7 @@
 	if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
 				   hca_header[PCI_COMMAND / 4])) {
 		err = -ENODEV;
-		mlx4_err(dev, "Couldn't restore HCA COMMAND, "
-			  "aborting.\n");
+		mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n");
 		goto out;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index f16e539..0efc136 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

@@ -279,7 +279,7 @@
 };
 
 /* For Debug uses */
-static const char *ResourceType(enum mlx4_resource rt)
+static const char *resource_str(enum mlx4_resource rt)
 {
 	switch (rt) {
 	case RES_QP: return "RES_QP";
@@ -307,6 +307,7 @@
 		&priv->mfunc.master.res_tracker.res_alloc[res_type];
 	int err = -EINVAL;
 	int allocated, free, reserved, guaranteed, from_free;
+	int from_rsvd;
 
 	if (slave > dev->num_vfs)
 		return -EINVAL;
@@ -321,11 +322,16 @@
 		res_alloc->res_reserved;
 	guaranteed = res_alloc->guaranteed[slave];
 
-	if (allocated + count > res_alloc->quota[slave])
+	if (allocated + count > res_alloc->quota[slave]) {
+		mlx4_warn(dev, "VF %d port %d res %s: quota exceeded, count %d alloc %d quota %d\n",
+			  slave, port, resource_str(res_type), count,
+			  allocated, res_alloc->quota[slave]);
 		goto out;
+	}
 
 	if (allocated + count <= guaranteed) {
 		err = 0;
+		from_rsvd = count;
 	} else {
 		/* portion may need to be obtained from free area */
 		if (guaranteed - allocated > 0)
@@ -333,8 +339,14 @@
 		else
 			from_free = count;
 
-		if (free - from_free > reserved)
+		from_rsvd = count - from_free;
+
+		if (free - from_free >= reserved)
 			err = 0;
+		else
+			mlx4_warn(dev, "VF %d port %d res %s: free pool empty, free %d from_free %d rsvd %d\n",
+				  slave, port, resource_str(res_type), free,
+				  from_free, reserved);
 	}
 
 	if (!err) {
@@ -342,9 +354,11 @@
 		if (port > 0) {
 			res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
 			res_alloc->res_port_free[port - 1] -= count;
+			res_alloc->res_port_rsvd[port - 1] -= from_rsvd;
 		} else {
 			res_alloc->allocated[slave] += count;
 			res_alloc->res_free -= count;
+			res_alloc->res_reserved -= from_rsvd;
 		}
 	}
 
@@ -360,17 +374,36 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct resource_allocator *res_alloc =
 		&priv->mfunc.master.res_tracker.res_alloc[res_type];
+	int allocated, guaranteed, from_rsvd;
 
 	if (slave > dev->num_vfs)
 		return;
 
 	spin_lock(&res_alloc->alloc_lock);
+
+	allocated = (port > 0) ?
+		res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+		res_alloc->allocated[slave];
+	guaranteed = res_alloc->guaranteed[slave];
+
+	if (allocated - count >= guaranteed) {
+		from_rsvd = 0;
+	} else {
+		/* portion may need to be returned to reserved area */
+		if (allocated - guaranteed > 0)
+			from_rsvd = count - (allocated - guaranteed);
+		else
+			from_rsvd = count;
+	}
+
 	if (port > 0) {
 		res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
 		res_alloc->res_port_free[port - 1] += count;
+		res_alloc->res_port_rsvd[port - 1] += from_rsvd;
 	} else {
 		res_alloc->allocated[slave] -= count;
 		res_alloc->res_free += count;
+		res_alloc->res_reserved += from_rsvd;
 	}
 
 	spin_unlock(&res_alloc->alloc_lock);
@@ -963,7 +996,7 @@
 		ret = alloc_srq_tr(id);
 		break;
 	case RES_MAC:
-		printk(KERN_ERR "implementation missing\n");
+		pr_err("implementation missing\n");
 		return NULL;
 	case RES_COUNTER:
 		ret = alloc_counter_tr(id);
@@ -1057,10 +1090,10 @@
 {
 	if (res->com.state == RES_MTT_BUSY ||
 	    atomic_read(&res->ref_count)) {
-		printk(KERN_DEBUG "%s-%d: state %s, ref_count %d\n",
-		       __func__, __LINE__,
-		       mtt_states_str(res->com.state),
-		       atomic_read(&res->ref_count));
+		pr_devel("%s-%d: state %s, ref_count %d\n",
+			 __func__, __LINE__,
+			 mtt_states_str(res->com.state),
+			 atomic_read(&res->ref_count));
 		return -EBUSY;
 	} else if (res->com.state != RES_MTT_ALLOCATED)
 		return -EPERM;
@@ -1533,7 +1566,7 @@
 			return err;
 
 		if (!fw_reserved(dev, qpn)) {
-			err = __mlx4_qp_alloc_icm(dev, qpn);
+			err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
 			if (err) {
 				res_abort_move(dev, slave, RES_QP, qpn);
 				return err;
@@ -1620,7 +1653,7 @@
 		if (err)
 			return err;
 
-		err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+		err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
 		if (err) {
 			res_abort_move(dev, slave, RES_MPT, id);
 			return err;
@@ -2828,10 +2861,12 @@
 }
 
 static int verify_qp_parameters(struct mlx4_dev *dev,
+				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
 				enum qp_transition transition, u8 slave)
 {
 	u32			qp_type;
+	u32			qpn;
 	struct mlx4_qp_context	*qp_ctx;
 	enum mlx4_qp_optpar	optpar;
 	int port;
@@ -2874,8 +2909,22 @@
 		default:
 			break;
 		}
-
 		break;
+
+	case MLX4_QP_ST_MLX:
+		qpn = vhcr->in_modifier & 0x7fffff;
+		port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+		if (transition == QP_TRANS_INIT2RTR &&
+		    slave != mlx4_master_func_num(dev) &&
+		    mlx4_is_qp_reserved(dev, qpn) &&
+		    !mlx4_vf_smi_enabled(dev, slave, port)) {
+			/* only enabled VFs may create MLX proxy QPs */
+			mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n",
+				 __func__, slave, port);
+			return -EPERM;
+		}
+		break;
+
 	default:
 		break;
 	}
@@ -3455,7 +3504,7 @@
 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave);
 	if (err)
 		return err;
 
@@ -3509,7 +3558,7 @@
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave);
 	if (err)
 		return err;
 
@@ -3531,7 +3580,7 @@
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave);
 	if (err)
 		return err;
 
@@ -3568,7 +3617,7 @@
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave);
 	if (err)
 		return err;
 
@@ -3590,7 +3639,7 @@
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave);
 	if (err)
 		return err;
 
@@ -3881,7 +3930,7 @@
 		}
 	}
 	if (!be_mac) {
-		pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n",
+		pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d\n",
 		       port);
 		return -EINVAL;
 	}
@@ -3978,7 +4027,7 @@
 	qpn = be32_to_cpu(ctrl->qpn) & 0xffffff;
 	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err) {
-		pr_err("Steering rule with qpn 0x%x rejected.\n", qpn);
+		pr_err("Steering rule with qpn 0x%x rejected\n", qpn);
 		return err;
 	}
 	rule_header = (struct _rule_hw *)(ctrl + 1);
@@ -3996,7 +4045,7 @@
 	case MLX4_NET_TRANS_RULE_ID_IPV4:
 	case MLX4_NET_TRANS_RULE_ID_TCP:
 	case MLX4_NET_TRANS_RULE_ID_UDP:
-		pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n");
+		pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n");
 		if (add_eth_header(dev, slave, inbox, rlist, header_id)) {
 			err = -EINVAL;
 			goto err_put;
@@ -4005,7 +4054,7 @@
 			sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
 		break;
 	default:
-		pr_err("Corrupted mailbox.\n");
+		pr_err("Corrupted mailbox\n");
 		err = -EINVAL;
 		goto err_put;
 	}
@@ -4019,7 +4068,7 @@
 
 	err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn);
 	if (err) {
-		mlx4_err(dev, "Fail to add flow steering resources.\n ");
+		mlx4_err(dev, "Fail to add flow steering resources\n");
 		/* detach rule*/
 		mlx4_cmd(dev, vhcr->out_param, 0, 0,
 			 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
@@ -4057,7 +4106,7 @@
 
 	err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
 	if (err) {
-		mlx4_err(dev, "Fail to remove flow steering resources.\n ");
+		mlx4_err(dev, "Fail to remove flow steering resources\n");
 		goto out;
 	}
 
@@ -4135,7 +4184,7 @@
 					if (print)
 						mlx4_dbg(dev,
 							 "%s id 0x%llx is busy\n",
-							  ResourceType(type),
+							  resource_str(type),
 							  r->res_id);
 					++busy;
 				} else {
@@ -4186,8 +4235,8 @@
 
 	err = move_all_busy(dev, slave, RES_QP);
 	if (err)
-		mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy"
-			  "for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
@@ -4225,10 +4274,8 @@
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_NATIVE);
 					if (err)
-						mlx4_dbg(dev, "rem_slave_qps: failed"
-							 " to move slave %d qpn %d to"
-							 " reset\n", slave,
-							 qp->local_qpn);
+						mlx4_dbg(dev, "rem_slave_qps: failed to move slave %d qpn %d to reset\n",
+							 slave, qp->local_qpn);
 					atomic_dec(&qp->rcq->ref_count);
 					atomic_dec(&qp->scq->ref_count);
 					atomic_dec(&qp->mtt->ref_count);
@@ -4262,8 +4309,8 @@
 
 	err = move_all_busy(dev, slave, RES_SRQ);
 	if (err)
-		mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(srq, tmp, srq_list, com.list) {
@@ -4293,9 +4340,7 @@
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_NATIVE);
 					if (err)
-						mlx4_dbg(dev, "rem_slave_srqs: failed"
-							 " to move slave %d srq %d to"
-							 " SW ownership\n",
+						mlx4_dbg(dev, "rem_slave_srqs: failed to move slave %d srq %d to SW ownership\n",
 							 slave, srqn);
 
 					atomic_dec(&srq->mtt->ref_count);
@@ -4330,8 +4375,8 @@
 
 	err = move_all_busy(dev, slave, RES_CQ);
 	if (err)
-		mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(cq, tmp, cq_list, com.list) {
@@ -4361,9 +4406,7 @@
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_NATIVE);
 					if (err)
-						mlx4_dbg(dev, "rem_slave_cqs: failed"
-							 " to move slave %d cq %d to"
-							 " SW ownership\n",
+						mlx4_dbg(dev, "rem_slave_cqs: failed to move slave %d cq %d to SW ownership\n",
 							 slave, cqn);
 					atomic_dec(&cq->mtt->ref_count);
 					state = RES_CQ_ALLOCATED;
@@ -4395,8 +4438,8 @@
 
 	err = move_all_busy(dev, slave, RES_MPT);
 	if (err)
-		mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) {
@@ -4431,9 +4474,7 @@
 						     MLX4_CMD_TIME_CLASS_A,
 						     MLX4_CMD_NATIVE);
 					if (err)
-						mlx4_dbg(dev, "rem_slave_mrs: failed"
-							 " to move slave %d mpt %d to"
-							 " SW ownership\n",
+						mlx4_dbg(dev, "rem_slave_mrs: failed to move slave %d mpt %d to SW ownership\n",
 							 slave, mptn);
 					if (mpt->mtt)
 						atomic_dec(&mpt->mtt->ref_count);
@@ -4465,8 +4506,8 @@
 
 	err = move_all_busy(dev, slave, RES_MTT);
 	if (err)
-		mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts  - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) {
@@ -4568,8 +4609,8 @@
 
 	err = move_all_busy(dev, slave, RES_EQ);
 	if (err)
-		mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(eq, tmp, eq_list, com.list) {
@@ -4601,9 +4642,8 @@
 							   MLX4_CMD_TIME_CLASS_A,
 							   MLX4_CMD_NATIVE);
 					if (err)
-						mlx4_dbg(dev, "rem_slave_eqs: failed"
-							 " to move slave %d eqs %d to"
-							 " SW ownership\n", slave, eqn);
+						mlx4_dbg(dev, "rem_slave_eqs: failed to move slave %d eqs %d to SW ownership\n",
+							 slave, eqn);
 					mlx4_free_cmd_mailbox(dev, mailbox);
 					atomic_dec(&eq->mtt->ref_count);
 					state = RES_EQ_RESERVED;
@@ -4632,8 +4672,8 @@
 
 	err = move_all_busy(dev, slave, RES_COUNTER);
 	if (err)
-		mlx4_warn(dev, "rem_slave_counters: Could not move all counters to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
@@ -4663,8 +4703,8 @@
 
 	err = move_all_busy(dev, slave, RES_XRCD);
 	if (err)
-		mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns to "
-			  "busy for slave %d\n", slave);
+		mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns - too busy for slave %d\n",
+			  slave);
 
 	spin_lock_irq(mlx4_tlock(dev));
 	list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) {
@@ -4809,10 +4849,8 @@
 				       0, MLX4_CMD_UPDATE_QP,
 				       MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 			if (err) {
-				mlx4_info(dev, "UPDATE_QP failed for slave %d, "
-					  "port %d, qpn %d (%d)\n",
-					  work->slave, port, qp->local_qpn,
-					  err);
+				mlx4_info(dev, "UPDATE_QP failed for slave %d, port %d, qpn %d (%d)\n",
+					  work->slave, port, qp->local_qpn, err);
 				errors++;
 			}
 		}

diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 98faf87..6714662 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c

@@ -103,11 +103,11 @@
 	if (*srqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &srq_table->table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 405c4fb..87d1b01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

@@ -620,8 +620,8 @@
 			       mlx5_command_str(msg_to_opcode(ent->in)),
 			       msg_to_opcode(ent->in));
 	}
-	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err,
-		      deliv_status_to_str(ent->status), ent->status);
+	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
+		      err, deliv_status_to_str(ent->status), ent->status);
 
 	return err;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 64a61b2..7f39ebc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

@@ -208,7 +208,8 @@
 		 */
 		rmb();
 
-		mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type));
+		mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
+			      eq->eqn, eqe_type_str(eqe->type));
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
@@ -270,14 +271,16 @@
 				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
 				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
 
-				mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
+				mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+					      func_id, npages);
 				mlx5_core_req_pages_handler(dev, func_id, npages);
 			}
 			break;
 
 
 		default:
-			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn);
+			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
+				       eqe->type, eq->eqn);
 			break;
 		}
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c3eee5f..ee24f13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -66,10 +66,10 @@
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
 			return err;
 		}
 	}
@@ -77,11 +77,11 @@
 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (err) {
 		dev_warn(&pdev->dev,
-			 "Warning: couldn't set 64-bit consistent PCI DMA mask.\n");
+			 "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(&pdev->dev,
-				"Can't set consistent PCI DMA mask, aborting.\n");
+				"Can't set consistent PCI DMA mask, aborting\n");
 			return err;
 		}
 	}
@@ -95,7 +95,7 @@
 	int err = 0;
 
 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing registers BAR, aborting.\n");
+		dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
 		return -ENODEV;
 	}
 
@@ -319,13 +319,13 @@
 
 	err = pci_enable_device(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n");
+		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
 		goto err_dbg;
 	}
 
 	err = request_bar(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "error requesting BARs, aborting.\n");
+		dev_err(&pdev->dev, "error requesting BARs, aborting\n");
 		goto err_disable;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 68b74e1..f0c9f9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

@@ -39,24 +39,26 @@
 
 extern int mlx5_core_debug_mask;
 
-#define mlx5_core_dbg(dev, format, arg...)				       \
-pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,   \
-	 current->pid, ##arg)
+#define mlx5_core_dbg(dev, format, ...)					\
+	pr_debug("%s:%s:%d:(pid %d): " format,				\
+		 (dev)->priv.name, __func__, __LINE__, current->pid,	\
+		 ##__VA_ARGS__)
 
-#define mlx5_core_dbg_mask(dev, mask, format, arg...)			       \
-do {									       \
-	if ((mask) & mlx5_core_debug_mask)				       \
-		pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name,       \
-			 __func__, __LINE__, current->pid, ##arg);	       \
+#define mlx5_core_dbg_mask(dev, mask, format, ...)			\
+do {									\
+	if ((mask) & mlx5_core_debug_mask)				\
+		mlx5_core_dbg(dev, format, ##__VA_ARGS__);		\
 } while (0)
 
-#define mlx5_core_err(dev, format, arg...) \
-pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,     \
-	current->pid, ##arg)
+#define mlx5_core_err(dev, format, ...)					\
+	pr_err("%s:%s:%d:(pid %d): " format,				\
+	       (dev)->priv.name, __func__, __LINE__, current->pid,	\
+	       ##__VA_ARGS__)
 
-#define mlx5_core_warn(dev, format, arg...) \
-pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,    \
-	current->pid, ##arg)
+#define mlx5_core_warn(dev, format, ...)				\
+	pr_warn("%s:%s:%d:(pid %d): " format,				\
+		(dev)->priv.name, __func__, __LINE__, current->pid,	\
+		##__VA_ARGS__)
 
 enum {
 	MLX5_CMD_DATA, /* print command payload only */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 4cc9276..ba0401d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c

@@ -73,7 +73,7 @@
 	}
 
 	if (err) {
-		mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
+		mlx5_core_dbg(dev, "cmd exec failed %d\n", err);
 		return err;
 	}
 
@@ -82,7 +82,11 @@
 		return mlx5_cmd_status_to_err(&lout.hdr);
 	}
 
+	mr->iova = be64_to_cpu(in->seg.start_addr);
+	mr->size = be64_to_cpu(in->seg.len);
 	mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+	mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      be32_to_cpu(lout.mkey), key, mr->key);
 
@@ -191,7 +195,8 @@
 	}
 
 	if (out.hdr.status) {
-		mlx5_core_err(dev, "create_psv bad status %d\n", out.hdr.status);
+		mlx5_core_err(dev, "create_psv bad status %d\n",
+			      out.hdr.status);
 		return mlx5_cmd_status_to_err(&out.hdr);
 	}
 
@@ -220,7 +225,8 @@
 	}
 
 	if (out.hdr.status) {
-		mlx5_core_err(dev, "destroy_psv bad status %d\n", out.hdr.status);
+		mlx5_core_err(dev, "destroy_psv bad status %d\n",
+			      out.hdr.status);
 		err = mlx5_cmd_status_to_err(&out.hdr);
 		goto out;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index d59790a..c2a953e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

@@ -311,7 +311,8 @@
 	in->num_entries = cpu_to_be32(npages);
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	if (err) {
-		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
+		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
+			       func_id, npages, err);
 		goto out_alloc;
 	}
 	dev->priv.fw_pages += npages;
@@ -319,7 +320,8 @@
 	if (out.hdr.status) {
 		err = mlx5_cmd_status_to_err(&out.hdr);
 		if (err) {
-			mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status);
+			mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n",
+				       func_id, npages, out.hdr.status);
 			goto out_alloc;
 		}
 	}
@@ -378,7 +380,7 @@
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err) {
-		mlx5_core_err(dev, "failed recliaming pages\n");
+		mlx5_core_err(dev, "failed reclaiming pages\n");
 		goto out_free;
 	}
 	dev->priv.fw_pages -= npages;
@@ -414,8 +416,8 @@
 		err = give_pages(dev, req->func_id, req->npages, 1);
 
 	if (err)
-		mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ?
-			       "reclaim" : "give", err);
+		mlx5_core_warn(dev, "%s fail %d\n",
+			       req->npages < 0 ? "reclaim" : "give", err);
 
 	kfree(req);
 }
@@ -487,7 +489,8 @@
 					    optimal_reclaimed_pages(),
 					    &nclaimed);
 			if (err) {
-				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
+				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
+					       err);
 				return err;
 			}
 			if (nclaimed)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 5105762..8145b46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c

@@ -79,7 +79,7 @@
 
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	if (err) {
-		mlx5_core_warn(dev, "ret %d", err);
+		mlx5_core_warn(dev, "ret %d\n", err);
 		return err;
 	}
 
@@ -96,7 +96,7 @@
 	err = radix_tree_insert(&table->tree, qp->qpn, qp);
 	spin_unlock_irq(&table->lock);
 	if (err) {
-		mlx5_core_warn(dev, "err %d", err);
+		mlx5_core_warn(dev, "err %d\n", err);
 		goto err_cmd;
 	}
 

diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c
index 16435b3..6c7c78ba 100644
--- a/drivers/net/ethernet/micrel/ks8695net.c
+++ b/drivers/net/ethernet/micrel/ks8695net.c

@@ -1504,15 +1504,15 @@
 	if (ksp->phyiface_regs && ksp->link_irq == -1) {
 		ks8695_init_switch(ksp);
 		ksp->dtype = KS8695_DTYPE_LAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	} else if (ksp->phyiface_regs && ksp->link_irq != -1) {
 		ks8695_init_wan_phy(ksp);
 		ksp->dtype = KS8695_DTYPE_WAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_wan_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_wan_ethtool_ops;
 	} else {
 		/* No initialisation since HPNA does not have a PHY */
 		ksp->dtype = KS8695_DTYPE_HPNA;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	}
 
 	/* And bring up the net_device with the net core */

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index e0c92e0..66d4ab7 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c

@@ -26,6 +26,8 @@
 #include <linux/regulator/consumer.h>
 
 #include <linux/spi/spi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
 
 #include "ks8851.h"
 
@@ -85,6 +87,8 @@
  * @eeprom_size: Companion eeprom size in Bytes, 0 if no eeprom
  * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM.
  * @vdd_reg:	Optional regulator supplying the chip
+ * @vdd_io: Optional digital power supply for IO
+ * @gpio: Optional reset_n gpio
  *
  * The @lock ensures that the chip is protected when certain operations are
  * in progress. When the read or write packet transfer is in progress, most
@@ -133,6 +137,8 @@
 
 	struct eeprom_93cx6	eeprom;
 	struct regulator	*vdd_reg;
+	struct regulator	*vdd_io;
+	int			gpio;
 };
 
 static int msg_enable;
@@ -1404,6 +1410,7 @@
 	struct ks8851_net *ks;
 	int ret;
 	unsigned cider;
+	int gpio;
 
 	ndev = alloc_etherdev(sizeof(struct ks8851_net));
 	if (!ndev)
@@ -1417,20 +1424,53 @@
 	ks->spidev = spi;
 	ks->tx_space = 6144;
 
-	ks->vdd_reg = regulator_get_optional(&spi->dev, "vdd");
-	if (IS_ERR(ks->vdd_reg)) {
-		ret = PTR_ERR(ks->vdd_reg);
-		if (ret == -EPROBE_DEFER)
-			goto err_reg;
-	} else {
-		ret = regulator_enable(ks->vdd_reg);
+	gpio = of_get_named_gpio_flags(spi->dev.of_node, "reset-gpios",
+				       0, NULL);
+	if (gpio == -EPROBE_DEFER) {
+		ret = gpio;
+		goto err_gpio;
+	}
+
+	ks->gpio = gpio;
+	if (gpio_is_valid(gpio)) {
+		ret = devm_gpio_request_one(&spi->dev, gpio,
+					    GPIOF_OUT_INIT_LOW, "ks8851_rst_n");
 		if (ret) {
-			dev_err(&spi->dev, "regulator enable fail: %d\n",
-				ret);
-			goto err_reg_en;
+			dev_err(&spi->dev, "reset gpio request failed\n");
+			goto err_gpio;
 		}
 	}
 
+	ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io");
+	if (IS_ERR(ks->vdd_io)) {
+		ret = PTR_ERR(ks->vdd_io);
+		goto err_reg_io;
+	}
+
+	ret = regulator_enable(ks->vdd_io);
+	if (ret) {
+		dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
+			ret);
+		goto err_reg_io;
+	}
+
+	ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd");
+	if (IS_ERR(ks->vdd_reg)) {
+		ret = PTR_ERR(ks->vdd_reg);
+		goto err_reg;
+	}
+
+	ret = regulator_enable(ks->vdd_reg);
+	if (ret) {
+		dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
+			ret);
+		goto err_reg;
+	}
+
+	if (gpio_is_valid(gpio)) {
+		usleep_range(10000, 11000);
+		gpio_set_value(gpio, 1);
+	}
 
 	mutex_init(&ks->lock);
 	spin_lock_init(&ks->statelock);
@@ -1471,7 +1511,7 @@
 
 	skb_queue_head_init(&ks->txq);
 
-	SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops);
+	ndev->ethtool_ops = &ks8851_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &spi->dev);
 
 	spi_set_drvdata(spi, ks);
@@ -1527,13 +1567,14 @@
 	free_irq(ndev->irq, ks);
 
 err_irq:
+	if (gpio_is_valid(gpio))
+		gpio_set_value(gpio, 0);
 err_id:
-	if (!IS_ERR(ks->vdd_reg))
-		regulator_disable(ks->vdd_reg);
-err_reg_en:
-	if (!IS_ERR(ks->vdd_reg))
-		regulator_put(ks->vdd_reg);
+	regulator_disable(ks->vdd_reg);
 err_reg:
+	regulator_disable(ks->vdd_io);
+err_reg_io:
+err_gpio:
 	free_netdev(ndev);
 	return ret;
 }
@@ -1547,18 +1588,24 @@
 
 	unregister_netdev(priv->netdev);
 	free_irq(spi->irq, priv);
-	if (!IS_ERR(priv->vdd_reg)) {
-		regulator_disable(priv->vdd_reg);
-		regulator_put(priv->vdd_reg);
-	}
+	if (gpio_is_valid(priv->gpio))
+		gpio_set_value(priv->gpio, 0);
+	regulator_disable(priv->vdd_reg);
+	regulator_disable(priv->vdd_io);
 	free_netdev(priv->netdev);
 
 	return 0;
 }
 
+static const struct of_device_id ks8851_match_table[] = {
+	{ .compatible = "micrel,ks8851" },
+	{ }
+};
+
 static struct spi_driver ks8851_driver = {
 	.driver = {
 		.name = "ks8851",
+		.of_match_table = ks8851_match_table,
 		.owner = THIS_MODULE,
 		.pm = &ks8851_pm_ops,
 	},

diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 14ac0e2..064a48d 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c

@@ -4930,7 +4930,7 @@
 		 * Only reset the hardware if time between calls is long
 		 * enough.
 		 */
-		if (jiffies - last_reset <= dev->watchdog_timeo)
+		if (time_before_eq(jiffies, last_reset + dev->watchdog_timeo))
 			hw_priv = NULL;
 	}
 
@@ -7072,6 +7072,7 @@
 		dev = alloc_etherdev(sizeof(struct dev_priv));
 		if (!dev)
 			goto pcidev_init_reg_err;
+		SET_NETDEV_DEV(dev, &pdev->dev);
 		info->netdev[i] = dev;
 
 		priv = netdev_priv(dev);
@@ -7106,7 +7107,7 @@
 		}
 
 		dev->netdev_ops = &netdev_ops;
-		SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+		dev->ethtool_ops = &netdev_ethtool_ops;
 		if (register_netdev(dev))
 			goto pcidev_init_reg_err;
 		port_set_power_saving(port, true);

diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index c7b40aa..b1b5f66 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c

@@ -1593,7 +1593,7 @@
 	dev->irq = spi->irq;
 	dev->netdev_ops = &enc28j60_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &enc28j60_ethtool_ops);
+	dev->ethtool_ops = &enc28j60_ethtool_ops;
 
 	enc28j60_lowpower(priv, true);
 

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 130f6b2..f3d5d79 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c

@@ -4112,7 +4112,7 @@
 	setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer,
 		    (unsigned long)mgp);
 
-	SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
+	netdev->ethtool_ops = &myri10ge_ethtool_ops;
 	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
 	status = register_netdev(netdev);
 	if (status != 0) {

diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 64ec2a4..291fba8 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c

@@ -927,7 +927,7 @@
 	dev->netdev_ops = &natsemi_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	if (mtu)
 		dev->mtu = mtu;

diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index dbccf1d..19bb824 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c

@@ -2030,7 +2030,7 @@
 		pci_dev->subsystem_vendor, pci_dev->subsystem_device);
 
 	ndev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ops);
+	ndev->ethtool_ops = &ops;
 	ndev->watchdog_timeo = 5 * HZ;
 	pci_set_drvdata(pci_dev, ndev);
 

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index a2844ff..be58764 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c

@@ -534,15 +534,6 @@
 	netif_tx_start_all_queues(sp->dev);
 }
 
-static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
-{
-	if (!sp->config.multiq)
-		sp->mac_control.fifos[fifo_no].queue_state =
-			FIFO_QUEUE_START;
-
-	netif_tx_start_all_queues(sp->dev);
-}
-
 static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp)
 {
 	if (!sp->config.multiq) {
@@ -5369,8 +5360,8 @@
 		ethtool_cmd_speed_set(info, SPEED_10000);
 		info->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(info, -1);
-		info->duplex = -1;
+		ethtool_cmd_speed_set(info, SPEED_UNKNOWN);
+		info->duplex = DUPLEX_UNKNOWN;
 	}
 
 	info->autoneg = AUTONEG_DISABLE;
@@ -7919,7 +7910,7 @@
 
 	/*  Driver entry points */
 	dev->netdev_ops = &s2io_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6 |
 		NETIF_F_RXCSUM | NETIF_F_LRO;

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 089b713..2bbd01f 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c

@@ -120,7 +120,6 @@
 {
 	u64 val64;
 	u32 i = 0;
-	enum vxge_hw_status ret = VXGE_HW_FAIL;
 
 	udelay(10);
 
@@ -139,7 +138,7 @@
 		mdelay(1);
 	} while (++i <= max_millis);
 
-	return ret;
+	return VXGE_HW_FAIL;
 }
 
 static inline enum vxge_hw_status
@@ -1682,12 +1681,10 @@
 			struct __vxge_hw_device *hldev,
 			struct vxge_hw_device_stats_sw_info *sw_stats)
 {
-	enum vxge_hw_status status = VXGE_HW_OK;
-
 	memcpy(sw_stats, &hldev->stats.sw_dev_info_stats,
 		sizeof(struct vxge_hw_device_stats_sw_info));
 
-	return status;
+	return VXGE_HW_OK;
 }
 
 /*
@@ -3228,7 +3225,6 @@
 vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask)
 {
 	struct vxge_hw_vpmgmt_reg       __iomem *vpmgmt_reg;
-	enum vxge_hw_status status = VXGE_HW_OK;
 	int i = 0, j = 0;
 
 	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
@@ -3241,7 +3237,7 @@
 				return VXGE_HW_FAIL;
 		}
 	}
-	return status;
+	return VXGE_HW_OK;
 }
 /*
  * vxge_hw_mgmt_reg_Write - Write Titan register.
@@ -3979,7 +3975,6 @@
 {
 	u32 i, mtu = 0, max_pyld = 0;
 	u64 val64;
-	enum vxge_hw_status status = VXGE_HW_OK;
 
 	for (i = 0; i < VXGE_HW_MAC_MAX_MAC_PORT_ID; i++) {
 
@@ -4009,7 +4004,7 @@
 	else
 		VXGE_HW_DEVICE_LINK_STATE_SET(vpath->hldev, VXGE_HW_LINK_DOWN);
 
-	return status;
+	return VXGE_HW_OK;
 }
 
 /*
@@ -4039,14 +4034,13 @@
 __vxge_hw_vpath_reset(struct __vxge_hw_device *hldev, u32 vp_id)
 {
 	u64 val64;
-	enum vxge_hw_status status = VXGE_HW_OK;
 
 	val64 = VXGE_HW_CMN_RSTHDLR_CFG0_SW_RESET_VPATH(1 << (16 - vp_id));
 
 	__vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(val64, 0, 32),
 				&hldev->common_reg->cmn_rsthdlr_cfg0);
 
-	return status;
+	return VXGE_HW_OK;
 }
 
 /*
@@ -4227,7 +4221,6 @@
 __vxge_hw_vpath_mac_configure(struct __vxge_hw_device *hldev, u32 vp_id)
 {
 	u64 val64;
-	enum vxge_hw_status status = VXGE_HW_OK;
 	struct __vxge_hw_virtualpath *vpath;
 	struct vxge_hw_vp_config *vp_config;
 	struct vxge_hw_vpath_reg __iomem *vp_reg;
@@ -4283,7 +4276,7 @@
 
 		writeq(val64, &vp_reg->rxmac_vcfg1);
 	}
-	return status;
+	return VXGE_HW_OK;
 }
 
 /*
@@ -4295,7 +4288,6 @@
 __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
 {
 	u64 val64;
-	enum vxge_hw_status status = VXGE_HW_OK;
 	struct __vxge_hw_virtualpath *vpath;
 	struct vxge_hw_vpath_reg __iomem *vp_reg;
 	struct vxge_hw_vp_config *config;
@@ -4545,7 +4537,7 @@
 	val64 |= VXGE_HW_TIM_WRKLD_CLC_CNT_RX_TX(3);
 	writeq(val64, &vp_reg->tim_wrkld_clc);
 
-	return status;
+	return VXGE_HW_OK;
 }
 
 /*

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
index f8f0738..b07d552 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c

@@ -62,8 +62,8 @@
 		ethtool_cmd_speed_set(info, SPEED_10000);
 		info->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(info, -1);
-		info->duplex = -1;
+		ethtool_cmd_speed_set(info, SPEED_UNKNOWN);
+		info->duplex = DUPLEX_UNKNOWN;
 	}
 
 	info->autoneg = AUTONEG_DISABLE;
@@ -1128,5 +1128,5 @@
 
 void vxge_initialize_ethtool_ops(struct net_device *ndev)
 {
-	SET_ETHTOOL_OPS(ndev, &vxge_ethtool_ops);
+	ndev->ethtool_ops = &vxge_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index d107bcb..7a0dead 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c

@@ -2122,7 +2122,7 @@
 static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
 {
 	fifo->interrupt_count++;
-	if (jiffies > fifo->jiffies + HZ / 100) {
+	if (time_before(fifo->jiffies + HZ / 100, jiffies)) {
 		struct __vxge_hw_fifo *hw_fifo = fifo->handle;
 
 		fifo->jiffies = jiffies;
@@ -2150,7 +2150,7 @@
 static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
 {
 	ring->interrupt_count++;
-	if (jiffies > ring->jiffies + HZ / 100) {
+	if (time_before(ring->jiffies + HZ / 100, jiffies)) {
 		struct __vxge_hw_ring *hw_ring = ring->handle;
 
 		ring->jiffies = jiffies;

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index fddb464..9afc536 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c

@@ -406,7 +406,7 @@
 
 #define NV_RX_DESCRIPTORVALID	(1<<16)
 #define NV_RX_MISSEDFRAME	(1<<17)
-#define NV_RX_SUBSTRACT1	(1<<18)
+#define NV_RX_SUBTRACT1		(1<<18)
 #define NV_RX_ERROR1		(1<<23)
 #define NV_RX_ERROR2		(1<<24)
 #define NV_RX_ERROR3		(1<<25)
@@ -423,7 +423,7 @@
 #define NV_RX2_CHECKSUM_IP_TCP	(0x14000000)
 #define NV_RX2_CHECKSUM_IP_UDP	(0x18000000)
 #define NV_RX2_DESCRIPTORVALID	(1<<29)
-#define NV_RX2_SUBSTRACT1	(1<<25)
+#define NV_RX2_SUBTRACT1	(1<<25)
 #define NV_RX2_ERROR1		(1<<18)
 #define NV_RX2_ERROR2		(1<<19)
 #define NV_RX2_ERROR3		(1<<20)
@@ -2832,7 +2832,7 @@
 					}
 					/* framing errors are soft errors */
 					else if ((flags & NV_RX_ERROR_MASK) == NV_RX_FRAMINGERR) {
-						if (flags & NV_RX_SUBSTRACT1)
+						if (flags & NV_RX_SUBTRACT1)
 							len--;
 					}
 					/* the rest are hard errors */
@@ -2863,7 +2863,7 @@
 					}
 					/* framing errors are soft errors */
 					else if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_FRAMINGERR) {
-						if (flags & NV_RX2_SUBSTRACT1)
+						if (flags & NV_RX2_SUBTRACT1)
 							len--;
 					}
 					/* the rest are hard errors */
@@ -2937,7 +2937,7 @@
 				}
 				/* framing errors are soft errors */
 				else if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_FRAMINGERR) {
-					if (flags & NV_RX2_SUBSTRACT1)
+					if (flags & NV_RX2_SUBTRACT1)
 						len--;
 				}
 				/* the rest are hard errors */
@@ -4285,8 +4285,8 @@
 		if (np->duplex)
 			ecmd->duplex = DUPLEX_FULL;
 	} else {
-		speed = -1;
-		ecmd->duplex = -1;
+		speed = SPEED_UNKNOWN;
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 	ethtool_cmd_speed_set(ecmd, speed);
 	ecmd->autoneg = np->autoneg;
@@ -5766,7 +5766,7 @@
 		dev->netdev_ops = &nv_netdev_ops_optimized;
 
 	netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->watchdog_timeo = NV_WATCHDOG_TIMEO;
 
 	pci_set_drvdata(pci_dev, dev);

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 422d9b5..8706c0d 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c

@@ -1361,7 +1361,7 @@
 	__lpc_eth_clock_enable(pldat, true);
 
 	/* Map IO space */
-	pldat->net_base = ioremap(res->start, res->end - res->start + 1);
+	pldat->net_base = ioremap(res->start, resource_size(res));
 	if (!pldat->net_base) {
 		dev_err(&pdev->dev, "failed to map registers\n");
 		ret = -ENOMEM;
@@ -1417,10 +1417,8 @@
 	}
 	pldat->dma_buff_base_p = dma_handle;
 
-	netdev_dbg(ndev, "IO address start     :0x%08x\n",
-			res->start);
-	netdev_dbg(ndev, "IO address size      :%d\n",
-			res->end - res->start + 1);
+	netdev_dbg(ndev, "IO address space     :%pR\n", res);
+	netdev_dbg(ndev, "IO address size      :%d\n", resource_size(res));
 	netdev_dbg(ndev, "IO address (mapped)  :0x%p\n",
 			pldat->net_base);
 	netdev_dbg(ndev, "IRQ number           :%d\n", ndev->irq);

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index a588ffd..44c8be1 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig

@@ -4,7 +4,7 @@
 
 config PCH_GBE
 	tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
-	depends on PCI && (X86 || COMPILE_TEST)
+	depends on PCI && (X86_32 || COMPILE_TEST)
 	select MII
 	select PTP_1588_CLOCK_PCH
 	---help---

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 826f0cc..4fe8ea9 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c

@@ -91,7 +91,7 @@
 	ecmd->advertising &= ~(ADVERTISED_TP | ADVERTISED_1000baseT_Half);
 
 	if (!netif_carrier_ok(adapter->netdev))
-		ethtool_cmd_speed_set(ecmd, -1);
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
 	return ret;
 }
 
@@ -508,5 +508,5 @@
 
 void pch_gbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &pch_gbe_ethtool_ops);
+	netdev->ethtool_ops = &pch_gbe_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index b6bdeb3..9a997e4 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c

@@ -724,10 +724,8 @@
 
 	/* The Hamachi-specific entries in the device structure. */
 	dev->netdev_ops = &hamachi_netdev_ops;
-	if (chip_tbl[hmp->chip_id].flags & CanHaveMII)
-		SET_ETHTOOL_OPS(dev, &ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(dev, &ethtool_ops_no_mii);
+	dev->ethtool_ops = (chip_tbl[hmp->chip_id].flags & CanHaveMII) ?
+		&ethtool_ops : &ethtool_ops_no_mii;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	if (mtu)
 		dev->mtu = mtu;

diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 9a6cb48..69a8dc0 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c

@@ -472,7 +472,7 @@
 
 	/* The Yellowfin-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	if (mtu)

diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index c14bd31..d49cba1 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig

@@ -66,6 +66,17 @@
 	  Say Y here if you want to enable hardware offload support for
 	  Virtual eXtensible Local Area Network (VXLAN) in the driver.
 
+config QLCNIC_HWMON
+	bool "QLOGIC QLCNIC 82XX and 83XX family HWMON support"
+	depends on QLCNIC && HWMON && !(QLCNIC=y && HWMON=m)
+	default y
+	---help---
+	  This configuration parameter can be used to read the
+	  board temperature in Converged Ethernet devices
+	  supported by qlcnic.
+
+	  This data is available via the hwmon sysfs interface.
+
 config QLGE
 	tristate "QLogic QLGE 10Gb Ethernet Driver Support"
 	depends on PCI

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index f09c35d..5bf0581 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c

@@ -1373,7 +1373,7 @@
 
 	netxen_nic_change_mtu(netdev, netdev->mtu);
 
-	SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops);
+	netdev->ethtool_ops = &netxen_nic_ethtool_ops;
 
 	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
 	                      NETIF_F_RXCSUM;

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2eabd44..b5d6bc1 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c

@@ -3838,7 +3838,7 @@
 
 	/* Set driver entry points */
 	ndev->netdev_ops = &ql3xxx_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ql3xxx_ethtool_ops);
+	ndev->ethtool_ops = &ql3xxx_ethtool_ops;
 	ndev->watchdog_timeo = 5 * HZ;
 
 	netif_napi_add(ndev, &qdev->napi, ql_poll, 64);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index f785d01..be618b9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h

@@ -39,8 +39,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 57
-#define QLCNIC_LINUX_VERSIONID  "5.3.57"
+#define _QLCNIC_LINUX_SUBVERSION 60
+#define QLCNIC_LINUX_VERSIONID  "5.3.60"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
@@ -441,6 +441,8 @@
 	u32	rsvd1[0];
 };
 
+#define QLC_PEX_DMA_READ_SIZE	(PAGE_SIZE * 16)
+
 struct qlcnic_fw_dump {
 	u8	clr;	/* flag to indicate if dump is cleared */
 	bool	enable; /* enable/disable dump */
@@ -537,6 +539,7 @@
 	u8 phys_port_id[ETH_ALEN];
 	u8 lb_mode;
 	u16 vxlan_port;
+	struct device *hwmon_dev;
 };
 
 struct qlcnic_adapter_stats {
@@ -1018,6 +1021,8 @@
 #define QLCNIC_DEL_VXLAN_PORT		0x200000
 #endif
 
+#define QLCNIC_VLAN_FILTERING		0x800000
+
 #define QLCNIC_IS_MSI_FAMILY(adapter) \
 	((adapter)->flags & (QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED))
 #define QLCNIC_IS_TSO_CAPABLE(adapter)  \
@@ -1316,6 +1321,7 @@
 #define QL_STATUS_INVALID_PARAM	-1
 
 #define MAX_BW			100	/* % of link speed */
+#define MIN_BW			1	/* % of link speed */
 #define MAX_VLAN_ID		4095
 #define MIN_VLAN_ID		2
 #define DEFAULT_MAC_LEARN	1
@@ -1692,7 +1698,7 @@
 int qlcnic_setup_netdev(struct qlcnic_adapter *, struct net_device *, int);
 void qlcnic_set_netdev_features(struct qlcnic_adapter *,
 				struct qlcnic_esw_func_cfg *);
-void qlcnic_sriov_vf_schedule_multi(struct net_device *);
+void qlcnic_sriov_vf_set_multi(struct net_device *);
 int qlcnic_is_valid_nic_func(struct qlcnic_adapter *, u8);
 int qlcnic_get_pci_func_type(struct qlcnic_adapter *, u16, u16 *, u16 *,
 			     u16 *);
@@ -2338,6 +2344,16 @@
 	return (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X) ? true : false;
 }
 
+static inline bool qlcnic_sriov_check(struct qlcnic_adapter *adapter)
+{
+	bool status;
+
+	status = (qlcnic_sriov_pf_check(adapter) ||
+		  qlcnic_sriov_vf_check(adapter)) ? true : false;
+
+	return status;
+}
+
 static inline u32 qlcnic_get_vnic_func_count(struct qlcnic_adapter *adapter)
 {
 	if (qlcnic_84xx_check(adapter))
@@ -2345,4 +2361,18 @@
 	else
 		return QLC_DEFAULT_VNIC_COUNT;
 }
+
+#ifdef CONFIG_QLCNIC_HWMON
+void qlcnic_register_hwmon_dev(struct qlcnic_adapter *);
+void qlcnic_unregister_hwmon_dev(struct qlcnic_adapter *);
+#else
+static inline void qlcnic_register_hwmon_dev(struct qlcnic_adapter *adapter)
+{
+	return;
+}
+static inline void qlcnic_unregister_hwmon_dev(struct qlcnic_adapter *adapter)
+{
+	return;
+}
+#endif
 #endif				/* __QLCNIC_H_ */

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index b7cffb4..a4a4ec0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c

@@ -33,6 +33,7 @@
 #define RSS_HASHTYPE_IP_TCP		0x3
 #define QLC_83XX_FW_MBX_CMD		0
 #define QLC_SKIP_INACTIVE_PCI_REGS	7
+#define QLC_MAX_LEGACY_FUNC_SUPP	8
 
 static const struct qlcnic_mailbox_metadata qlcnic_83xx_mbx_tbl[] = {
 	{QLCNIC_CMD_CONFIGURE_IP_ADDR, 6, 1},
@@ -357,8 +358,15 @@
 	if (!ahw->intr_tbl)
 		return -ENOMEM;
 
-	if (!(adapter->flags & QLCNIC_MSIX_ENABLED))
+	if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) {
+		if (adapter->ahw->pci_func >= QLC_MAX_LEGACY_FUNC_SUPP) {
+			dev_err(&adapter->pdev->dev, "PCI function number 8 and higher are not supported with legacy interrupt, func 0x%x\n",
+				ahw->pci_func);
+			return -EOPNOTSUPP;
+		}
+
 		qlcnic_83xx_enable_legacy(adapter);
+	}
 
 	for (i = 0; i < num_msix; i++) {
 		if (adapter->flags & QLCNIC_MSIX_ENABLED)
@@ -879,6 +887,9 @@
 			return 0;
 		}
 	}
+
+	dev_err(&adapter->pdev->dev, "%s: Invalid mailbox command opcode 0x%x\n",
+		__func__, type);
 	return -EINVAL;
 }
 
@@ -3026,19 +3037,18 @@
 	QLCRDX(adapter->ahw, QLC_83XX_DRV_UNLOCK);
 }
 
-int qlcnic_83xx_ms_mem_write128(struct qlcnic_adapter *adapter, u64 addr,
+int qlcnic_ms_mem_write128(struct qlcnic_adapter *adapter, u64 addr,
 				u32 *data, u32 count)
 {
 	int i, j, ret = 0;
 	u32 temp;
-	int err = 0;
 
 	/* Check alignment */
 	if (addr & 0xF)
 		return -EIO;
 
 	mutex_lock(&adapter->ahw->mem_lock);
-	qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_ADDR_HI, 0);
+	qlcnic_ind_wr(adapter, QLCNIC_MS_ADDR_HI, 0);
 
 	for (i = 0; i < count; i++, addr += 16) {
 		if (!((ADDR_IN_RANGE(addr, QLCNIC_ADDR_QDR_NET,
@@ -3049,26 +3059,16 @@
 			return -EIO;
 		}
 
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_ADDR_LO, addr);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_WRTDATA_LO,
-					     *data++);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_WRTDATA_HI,
-					     *data++);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_WRTDATA_ULO,
-					     *data++);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_WRTDATA_UHI,
-					     *data++);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_CTRL,
-					     QLCNIC_TA_WRITE_ENABLE);
-		qlcnic_83xx_wrt_reg_indirect(adapter, QLCNIC_MS_CTRL,
-					     QLCNIC_TA_WRITE_START);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_ADDR_LO, addr);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_WRTDATA_LO, *data++);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_WRTDATA_HI, *data++);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_WRTDATA_ULO, *data++);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_WRTDATA_UHI, *data++);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_CTRL, QLCNIC_TA_WRITE_ENABLE);
+		qlcnic_ind_wr(adapter, QLCNIC_MS_CTRL, QLCNIC_TA_WRITE_START);
 
 		for (j = 0; j < MAX_CTL_CHECK; j++) {
-			temp = QLCRD32(adapter, QLCNIC_MS_CTRL, &err);
-			if (err == -EIO) {
-				mutex_unlock(&adapter->ahw->mem_lock);
-				return err;
-			}
+			temp = qlcnic_ind_rd(adapter, QLCNIC_MS_CTRL);
 
 			if ((temp & TA_CTL_BUSY) == 0)
 				break;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 88d809c..2bf101a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h

@@ -418,7 +418,6 @@
 #define QLC_83XX_GET_FUNC_MODE_FROM_NPAR_INFO(val)	(val & 0x80000000)
 #define QLC_83XX_GET_LRO_CAPABILITY(val)		(val & 0x20)
 #define QLC_83XX_GET_LSO_CAPABILITY(val)		(val & 0x40)
-#define QLC_83XX_GET_LSO_CAPABILITY(val)		(val & 0x40)
 #define QLC_83XX_GET_HW_LRO_CAPABILITY(val)		(val & 0x400)
 #define QLC_83XX_GET_VLAN_ALIGN_CAPABILITY(val)	(val & 0x4000)
 #define QLC_83XX_GET_FW_LRO_MSS_CAPABILITY(val)	(val & 0x20000)
@@ -560,7 +559,7 @@
 void qlcnic_83xx_napi_enable(struct qlcnic_adapter *);
 void qlcnic_83xx_napi_disable(struct qlcnic_adapter *);
 int qlcnic_83xx_config_led(struct qlcnic_adapter *, u32, u32);
-void qlcnic_ind_wr(struct qlcnic_adapter *, u32, u32);
+int qlcnic_ind_wr(struct qlcnic_adapter *, u32, u32);
 int qlcnic_ind_rd(struct qlcnic_adapter *, u32);
 int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *);
 int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *,
@@ -617,7 +616,6 @@
 int qlcnic_83xx_lock_driver(struct qlcnic_adapter *);
 void qlcnic_83xx_unlock_driver(struct qlcnic_adapter *);
 int qlcnic_83xx_set_default_offload_settings(struct qlcnic_adapter *);
-int qlcnic_83xx_ms_mem_write128(struct qlcnic_adapter *, u64, u32 *, u32);
 int qlcnic_83xx_idc_vnic_pf_entry(struct qlcnic_adapter *);
 int qlcnic_83xx_disable_vnic_mode(struct qlcnic_adapter *, int);
 int qlcnic_83xx_config_vnic_opmode(struct qlcnic_adapter *);
@@ -659,4 +657,5 @@
 u32 qlcnic_83xx_get_cap_size(void *, int);
 void qlcnic_83xx_set_sys_info(void *, int, u32);
 void qlcnic_83xx_store_cap_mask(void *, u32);
+int qlcnic_ms_mem_write128(struct qlcnic_adapter *, u64, u32 *, u32);
 #endif

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index ba20c72..f33559b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c

@@ -1363,8 +1363,8 @@
 		return ret;
 	}
 	/* 16 byte write to MS memory */
-	ret = qlcnic_83xx_ms_mem_write128(adapter, dest, (u32 *)p_cache,
-					  size / 16);
+	ret = qlcnic_ms_mem_write128(adapter, dest, (u32 *)p_cache,
+				     size / 16);
 	if (ret) {
 		vfree(p_cache);
 		return ret;
@@ -1389,8 +1389,8 @@
 	p_cache = (u32 *)fw->data;
 	addr = (u64)dest;
 
-	ret = qlcnic_83xx_ms_mem_write128(adapter, addr,
-					  p_cache, size / 16);
+	ret = qlcnic_ms_mem_write128(adapter, addr,
+				     p_cache, size / 16);
 	if (ret) {
 		dev_err(&adapter->pdev->dev, "MS memory write failed\n");
 		release_firmware(fw);
@@ -1405,8 +1405,8 @@
 			data[i] = fw->data[size + i];
 		for (; i < 16; i++)
 			data[i] = 0;
-		ret = qlcnic_83xx_ms_mem_write128(adapter, addr,
-						  (u32 *)data, 1);
+		ret = qlcnic_ms_mem_write128(adapter, addr,
+					     (u32 *)data, 1);
 		if (ret) {
 			dev_err(&adapter->pdev->dev,
 				"MS memory write failed\n");
@@ -2181,6 +2181,8 @@
 		max_sds_rings = QLCNIC_MAX_SDS_RINGS;
 		max_tx_rings = QLCNIC_MAX_TX_RINGS;
 	} else {
+		dev_err(&adapter->pdev->dev, "%s: Invalid opmode %d\n",
+			__func__, ret);
 		return -EIO;
 	}
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index c1e11f5..304e247 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c

@@ -1027,8 +1027,11 @@
 	u32 arg1;
 
 	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC ||
-	    !(adapter->eswitch[id].flags & QLCNIC_SWITCH_ENABLE))
+	    !(adapter->eswitch[id].flags & QLCNIC_SWITCH_ENABLE)) {
+		dev_err(&adapter->pdev->dev, "%s: Not a management function\n",
+			__func__);
 		return err;
+	}
 
 	arg1 = id | (enable_mirroring ? BIT_4 : 0);
 	arg1 |= pci_func << 8;
@@ -1318,8 +1321,12 @@
 	u32 arg1, arg2 = 0;
 	u8 pci_func;
 
-	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC) {
+		dev_err(&adapter->pdev->dev, "%s: Not a management function\n",
+			__func__);
 		return err;
+	}
+
 	pci_func = esw_cfg->pci_func;
 	index = qlcnic_is_valid_nic_func(adapter, pci_func);
 	if (index < 0)
@@ -1363,6 +1370,8 @@
 			arg1 &= ~(0x0ffff << 16);
 			break;
 	default:
+		dev_err(&adapter->pdev->dev, "%s: Invalid opmode 0x%x\n",
+			__func__, esw_cfg->op_mode);
 		return err;
 	}
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 5bacf52..1b7f3db 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c

@@ -726,6 +726,11 @@
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 	int err;
 
+	if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) {
+		netdev_err(dev, "No RSS/TSS support in non MSI-X mode\n");
+		return -EINVAL;
+	}
+
 	if (channel->other_count || channel->combined_count)
 		return -EINVAL;
 
@@ -734,7 +739,7 @@
 	if (err)
 		return err;
 
-	if (channel->rx_count) {
+	if (adapter->drv_sds_rings != channel->rx_count) {
 		err = qlcnic_validate_rings(adapter, channel->rx_count,
 					    QLCNIC_RX_QUEUE);
 		if (err) {
@@ -745,7 +750,7 @@
 		adapter->drv_rss_rings = channel->rx_count;
 	}
 
-	if (channel->tx_count) {
+	if (adapter->drv_tx_rings != channel->tx_count) {
 		err = qlcnic_validate_rings(adapter, channel->tx_count,
 					    QLCNIC_TX_QUEUE);
 		if (err) {

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 9f3adf4..851cb4a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c

@@ -373,12 +373,16 @@
 	return data;
 }
 
-void qlcnic_ind_wr(struct qlcnic_adapter *adapter, u32 addr, u32 data)
+int qlcnic_ind_wr(struct qlcnic_adapter *adapter, u32 addr, u32 data)
 {
+	int ret = 0;
+
 	if (qlcnic_82xx_check(adapter))
 		qlcnic_write_window_reg(addr, adapter->ahw->pci_base0, data);
 	else
-		qlcnic_83xx_wrt_reg_indirect(adapter, addr, data);
+		ret = qlcnic_83xx_wrt_reg_indirect(adapter, addr, data);
+
+	return ret;
 }
 
 static int
@@ -567,28 +571,14 @@
 void qlcnic_set_multi(struct net_device *netdev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	struct qlcnic_mac_vlan_list *cur;
-	struct netdev_hw_addr *ha;
-	size_t temp;
 
 	if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state))
 		return;
-	if (qlcnic_sriov_vf_check(adapter)) {
-		if (!netdev_mc_empty(netdev)) {
-			netdev_for_each_mc_addr(ha, netdev) {
-				temp = sizeof(struct qlcnic_mac_vlan_list);
-				cur = kzalloc(temp, GFP_ATOMIC);
-				if (cur == NULL)
-					break;
-				memcpy(cur->mac_addr,
-				       ha->addr, ETH_ALEN);
-				list_add_tail(&cur->list, &adapter->vf_mc_list);
-			}
-		}
-		qlcnic_sriov_vf_schedule_multi(adapter->netdev);
-		return;
-	}
-	__qlcnic_set_multi(netdev, 0);
+
+	if (qlcnic_sriov_vf_check(adapter))
+		qlcnic_sriov_vf_set_multi(netdev);
+	else
+		__qlcnic_set_multi(netdev, 0);
 }
 
 int qlcnic_82xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32 mode)
@@ -630,7 +620,7 @@
 	struct hlist_node *n;
 	struct hlist_head *head;
 	int i;
-	unsigned long time;
+	unsigned long expires;
 	u8 cmd;
 
 	for (i = 0; i < adapter->fhash.fbucket_size; i++) {
@@ -638,8 +628,8 @@
 		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			cmd =  tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL :
 						  QLCNIC_MAC_DEL;
-			time = tmp_fil->ftime;
-			if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) {
+			expires = tmp_fil->ftime + QLCNIC_FILTER_AGE * HZ;
+			if (time_before(expires, jiffies)) {
 				qlcnic_sre_macaddr_change(adapter,
 							  tmp_fil->faddr,
 							  tmp_fil->vlan_id,
@@ -657,8 +647,8 @@
 
 		hlist_for_each_entry_safe(tmp_fil, n, head, fnode)
 		{
-			time = tmp_fil->ftime;
-			if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) {
+			expires = tmp_fil->ftime + QLCNIC_FILTER_AGE * HZ;
+			if (time_before(expires, jiffies)) {
 				spin_lock_bh(&adapter->rx_mac_learn_lock);
 				adapter->rx_fhash.fnum--;
 				hlist_del(&tmp_fil->fnode);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 173b3d1..e45bf09 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c

@@ -305,7 +305,6 @@
 {
 	struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data);
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
-	struct net_device *netdev = adapter->netdev;
 	u16 protocol = ntohs(skb->protocol);
 	struct qlcnic_filter *fil, *tmp_fil;
 	struct hlist_head *head;
@@ -314,27 +313,16 @@
 	u16 vlan_id = 0;
 	u8 hindex, hval;
 
-	if (!qlcnic_sriov_pf_check(adapter)) {
-		if (ether_addr_equal(phdr->h_source, adapter->mac_addr))
-			return;
-	} else {
+	if (ether_addr_equal(phdr->h_source, adapter->mac_addr))
+		return;
+
+	if (adapter->flags & QLCNIC_VLAN_FILTERING) {
 		if (protocol == ETH_P_8021Q) {
 			vh = (struct vlan_ethhdr *)skb->data;
 			vlan_id = ntohs(vh->h_vlan_TCI);
 		} else if (vlan_tx_tag_present(skb)) {
 			vlan_id = vlan_tx_tag_get(skb);
 		}
-
-		if (ether_addr_equal(phdr->h_source, adapter->mac_addr) &&
-		    !vlan_id)
-			return;
-	}
-
-	if (adapter->fhash.fnum >= adapter->fhash.fmax) {
-		adapter->stats.mac_filter_limit_overrun++;
-		netdev_info(netdev, "Can not add more than %d mac-vlan filters, configured %d\n",
-			    adapter->fhash.fmax, adapter->fhash.fnum);
-		return;
 	}
 
 	memcpy(&src_addr, phdr->h_source, ETH_ALEN);
@@ -353,6 +341,11 @@
 		}
 	}
 
+	if (unlikely(adapter->fhash.fnum >= adapter->fhash.fmax)) {
+		adapter->stats.mac_filter_limit_overrun++;
+		return;
+	}
+
 	fil = kzalloc(sizeof(struct qlcnic_filter), GFP_ATOMIC);
 	if (!fil)
 		return;
@@ -1216,8 +1209,7 @@
 	if (!skb)
 		return buffer;
 
-	if (adapter->drv_mac_learn &&
-	    (adapter->flags & QLCNIC_ESWITCH_ENABLED)) {
+	if (adapter->rx_mac_learn) {
 		t_vid = 0;
 		is_lb_pkt = qlcnic_82xx_is_lb_pkt(sts_data0);
 		qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid);
@@ -1293,8 +1285,7 @@
 	if (!skb)
 		return buffer;
 
-	if (adapter->drv_mac_learn &&
-	    (adapter->flags & QLCNIC_ESWITCH_ENABLED)) {
+	if (adapter->rx_mac_learn) {
 		t_vid = 0;
 		is_lb_pkt = qlcnic_82xx_is_lb_pkt(sts_data0);
 		qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 7e55e88..4fc1867 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c

@@ -378,7 +378,8 @@
 	if (!adapter->fdb_mac_learn)
 		return ndo_dflt_fdb_del(ndm, tb, netdev, addr);
 
-	if (adapter->flags & QLCNIC_ESWITCH_ENABLED) {
+	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
+	    qlcnic_sriov_check(adapter)) {
 		if (is_unicast_ether_addr(addr)) {
 			err = dev_uc_del(netdev, addr);
 			if (!err)
@@ -402,7 +403,8 @@
 	if (!adapter->fdb_mac_learn)
 		return ndo_dflt_fdb_add(ndm, tb, netdev, addr, flags);
 
-	if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)) {
+	if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) &&
+	    !qlcnic_sriov_check(adapter)) {
 		pr_info("%s: FDB e-switch is not enabled\n", __func__);
 		return -EOPNOTSUPP;
 	}
@@ -432,7 +434,8 @@
 	if (!adapter->fdb_mac_learn)
 		return ndo_dflt_fdb_dump(skb, ncb, netdev, idx);
 
-	if (adapter->flags & QLCNIC_ESWITCH_ENABLED)
+	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
+	    qlcnic_sriov_check(adapter))
 		idx = ndo_dflt_fdb_dump(skb, ncb, netdev, idx);
 
 	return idx;
@@ -522,7 +525,7 @@
 #endif
 #ifdef CONFIG_QLCNIC_SRIOV
 	.ndo_set_vf_mac		= qlcnic_sriov_set_vf_mac,
-	.ndo_set_vf_tx_rate	= qlcnic_sriov_set_vf_tx_rate,
+	.ndo_set_vf_rate	= qlcnic_sriov_set_vf_tx_rate,
 	.ndo_get_vf_config	= qlcnic_sriov_get_vf_config,
 	.ndo_set_vf_vlan	= qlcnic_sriov_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= qlcnic_sriov_set_vf_spoofchk,
@@ -690,10 +693,10 @@
 		adapter->msix_entries[vector].entry = vector;
 
 restore:
-	err = pci_enable_msix(pdev, adapter->msix_entries, num_msix);
-	if (err > 0) {
+	err = pci_enable_msix_exact(pdev, adapter->msix_entries, num_msix);
+	if (err == -ENOSPC) {
 		if (!adapter->drv_tss_rings && !adapter->drv_rss_rings)
-			return -ENOSPC;
+			return err;
 
 		netdev_info(adapter->netdev,
 			    "Unable to allocate %d MSI-X vectors, Available vectors %d\n",
@@ -1014,6 +1017,8 @@
 
 		if (pfn >= ahw->max_vnic_func) {
 			ret = QL_STATUS_INVALID_PARAM;
+			dev_err(&adapter->pdev->dev, "%s: Invalid function 0x%x, max 0x%x\n",
+				__func__, pfn, ahw->max_vnic_func);
 			goto err_eswitch;
 		}
 
@@ -1915,8 +1920,6 @@
 	if (!test_and_clear_bit(__QLCNIC_DEV_UP, &adapter->state))
 		return;
 
-	if (qlcnic_sriov_vf_check(adapter))
-		qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc);
 	smp_mb();
 	netif_carrier_off(netdev);
 	adapter->ahw->linkup = 0;
@@ -1928,6 +1931,8 @@
 		qlcnic_delete_lb_filters(adapter);
 
 	qlcnic_nic_set_promisc(adapter, QLCNIC_NIU_NON_PROMISC_MODE);
+	if (qlcnic_sriov_vf_check(adapter))
+		qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc);
 
 	qlcnic_napi_disable(adapter);
 
@@ -2052,6 +2057,7 @@
 
 static int qlcnic_alloc_adapter_resources(struct qlcnic_adapter *adapter)
 {
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
 	int err = 0;
 
 	adapter->recv_ctx = kzalloc(sizeof(struct qlcnic_recv_context),
@@ -2061,6 +2067,18 @@
 		goto err_out;
 	}
 
+	if (qlcnic_83xx_check(adapter)) {
+		ahw->coal.type = QLCNIC_INTR_COAL_TYPE_RX_TX;
+		ahw->coal.tx_time_us = QLCNIC_DEF_INTR_COALESCE_TX_TIME_US;
+		ahw->coal.tx_packets = QLCNIC_DEF_INTR_COALESCE_TX_PACKETS;
+		ahw->coal.rx_time_us = QLCNIC_DEF_INTR_COALESCE_RX_TIME_US;
+		ahw->coal.rx_packets = QLCNIC_DEF_INTR_COALESCE_RX_PACKETS;
+	} else {
+		ahw->coal.type = QLCNIC_INTR_COAL_TYPE_RX;
+		ahw->coal.rx_time_us = QLCNIC_DEF_INTR_COALESCE_RX_TIME_US;
+		ahw->coal.rx_packets = QLCNIC_DEF_INTR_COALESCE_RX_PACKETS;
+	}
+
 	/* clear stats */
 	memset(&adapter->stats, 0, sizeof(adapter->stats));
 err_out:
@@ -2069,12 +2087,20 @@
 
 static void qlcnic_free_adapter_resources(struct qlcnic_adapter *adapter)
 {
+	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
+
 	kfree(adapter->recv_ctx);
 	adapter->recv_ctx = NULL;
 
-	if (adapter->ahw->fw_dump.tmpl_hdr) {
-		vfree(adapter->ahw->fw_dump.tmpl_hdr);
-		adapter->ahw->fw_dump.tmpl_hdr = NULL;
+	if (fw_dump->tmpl_hdr) {
+		vfree(fw_dump->tmpl_hdr);
+		fw_dump->tmpl_hdr = NULL;
+	}
+
+	if (fw_dump->dma_buffer) {
+		dma_free_coherent(&adapter->pdev->dev, QLC_PEX_DMA_READ_SIZE,
+				  fw_dump->dma_buffer, fw_dump->phys_addr);
+		fw_dump->dma_buffer = NULL;
 	}
 
 	kfree(adapter->ahw->reset.buff);
@@ -2247,10 +2273,8 @@
 
 	qlcnic_change_mtu(netdev, netdev->mtu);
 
-	if (qlcnic_sriov_vf_check(adapter))
-		SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops);
+	netdev->ethtool_ops = (qlcnic_sriov_vf_check(adapter)) ?
+		&qlcnic_sriov_vf_ethtool_ops : &qlcnic_ethtool_ops;
 
 	netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			     NETIF_F_IPV6_CSUM | NETIF_F_GRO |
@@ -2417,9 +2441,6 @@
 	int err, pci_using_dac = -1;
 	char board_name[QLCNIC_MAX_BOARD_NAME_LEN + 19]; /* MAC + ": " + name */
 
-	if (pdev->is_virtfn)
-		return -ENODEV;
-
 	err = pci_enable_device(pdev);
 	if (err)
 		return err;
@@ -2552,9 +2573,11 @@
 			case -ENOMEM:
 				dev_err(&pdev->dev, "Adapter initialization failed. Please reboot\n");
 				goto err_out_free_hw;
+			case -EOPNOTSUPP:
+				dev_err(&pdev->dev, "Adapter initialization failed\n");
+				goto err_out_free_hw;
 			default:
-				dev_err(&pdev->dev, "Adapter initialization failed. A reboot may be required to recover from this failure\n");
-				dev_err(&pdev->dev, "If reboot does not help to recover from this failure, try a flash update of the adapter\n");
+				dev_err(&pdev->dev, "Adapter initialization failed. Driver will load in maintenance mode to recover the adapter using the application\n");
 				goto err_out_maintenance_mode;
 			}
 		}
@@ -2628,7 +2651,7 @@
 		qlcnic_alloc_lb_filters_mem(adapter);
 
 	qlcnic_add_sysfs(adapter);
-
+	qlcnic_register_hwmon_dev(adapter);
 	return 0;
 
 err_out_disable_mbx_intr:
@@ -2665,7 +2688,7 @@
 err_out_maintenance_mode:
 	set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state);
 	netdev->netdev_ops = &qlcnic_netdev_failed_ops;
-	SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops);
+	netdev->ethtool_ops = &qlcnic_ethtool_failed_ops;
 	ahw->port_type = QLCNIC_XGBE;
 
 	if (qlcnic_83xx_check(adapter))
@@ -2698,9 +2721,9 @@
 		return;
 
 	netdev = adapter->netdev;
-	qlcnic_sriov_pf_disable(adapter);
 
 	qlcnic_cancel_idc_work(adapter);
+	qlcnic_sriov_pf_disable(adapter);
 	ahw = adapter->ahw;
 
 	unregister_netdev(netdev);
@@ -2735,6 +2758,8 @@
 
 	qlcnic_remove_sysfs(adapter);
 
+	qlcnic_unregister_hwmon_dev(adapter);
+
 	qlcnic_cleanup_pci_map(adapter->ahw);
 
 	qlcnic_release_firmware(adapter);
@@ -2828,6 +2853,8 @@
 	return 0;
 }
 
+#define QLCNIC_VF_LB_BUCKET_SIZE 1
+
 void qlcnic_alloc_lb_filters_mem(struct qlcnic_adapter *adapter)
 {
 	void *head;
@@ -2843,7 +2870,10 @@
 	spin_lock_init(&adapter->mac_learn_lock);
 	spin_lock_init(&adapter->rx_mac_learn_lock);
 
-	if (qlcnic_82xx_check(adapter)) {
+	if (qlcnic_sriov_vf_check(adapter)) {
+		filter_size = QLCNIC_83XX_SRIOV_VF_MAX_MAC - 1;
+		adapter->fhash.fbucket_size = QLCNIC_VF_LB_BUCKET_SIZE;
+	} else if (qlcnic_82xx_check(adapter)) {
 		filter_size = QLCNIC_LB_MAX_FILTERS;
 		adapter->fhash.fbucket_size = QLCNIC_LB_BUCKET_SIZE;
 	} else {
@@ -3973,16 +4003,6 @@
 		strcpy(buf, "Tx");
 	}
 
-	if (!QLCNIC_IS_MSI_FAMILY(adapter)) {
-		netdev_err(netdev, "No RSS/TSS support in INT-x mode\n");
-		return -EINVAL;
-	}
-
-	if (adapter->flags & QLCNIC_MSI_ENABLED) {
-		netdev_err(netdev, "No RSS/TSS support in MSI mode\n");
-		return -EINVAL;
-	}
-
 	if (!is_power_of_2(ring_cnt)) {
 		netdev_err(netdev, "%s rings value should be a power of 2\n",
 			   buf);
@@ -4122,7 +4142,7 @@
 
 	rcu_read_lock();
 	for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) {
-		dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid);
+		dev = __vlan_find_dev_deep_rcu(netdev, htons(ETH_P_8021Q), vid);
 		if (!dev)
 			continue;
 		qlcnic_config_indev_addr(adapter, dev, event);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index 37b979b..e46fc39 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c

@@ -238,6 +238,8 @@
 
 	hdr->drv_cap_mask = hdr->cap_mask;
 	fw_dump->cap_mask = hdr->cap_mask;
+
+	fw_dump->use_pex_dma = (hdr->capabilities & BIT_0) ? true : false;
 }
 
 inline u32 qlcnic_82xx_get_cap_size(void *t_hdr, int index)
@@ -276,6 +278,8 @@
 	hdr->saved_state[index] = value;
 }
 
+#define QLCNIC_TEMPLATE_VERSION (0x20001)
+
 void qlcnic_83xx_cache_tmpl_hdr_values(struct qlcnic_fw_dump *fw_dump)
 {
 	struct qlcnic_83xx_dump_template_hdr *hdr;
@@ -288,6 +292,9 @@
 
 	hdr->drv_cap_mask = hdr->cap_mask;
 	fw_dump->cap_mask = hdr->cap_mask;
+
+	fw_dump->use_pex_dma = (fw_dump->version & 0xfffff) >=
+			       QLCNIC_TEMPLATE_VERSION;
 }
 
 inline u32 qlcnic_83xx_get_cap_size(void *t_hdr, int index)
@@ -653,34 +660,31 @@
 #define QLC_DMA_CMD_BUFF_ADDR_HI	4
 #define QLC_DMA_CMD_STATUS_CTRL		8
 
-#define QLC_PEX_DMA_READ_SIZE		(PAGE_SIZE * 16)
-
 static int qlcnic_start_pex_dma(struct qlcnic_adapter *adapter,
 				struct __mem *mem)
 {
-	struct qlcnic_83xx_dump_template_hdr *tmpl_hdr;
 	struct device *dev = &adapter->pdev->dev;
 	u32 dma_no, dma_base_addr, temp_addr;
 	int i, ret, dma_sts;
+	void *tmpl_hdr;
 
 	tmpl_hdr = adapter->ahw->fw_dump.tmpl_hdr;
-	dma_no = tmpl_hdr->saved_state[QLC_83XX_DMA_ENGINE_INDEX];
+	dma_no = qlcnic_get_saved_state(adapter, tmpl_hdr,
+					QLC_83XX_DMA_ENGINE_INDEX);
 	dma_base_addr = QLC_DMA_REG_BASE_ADDR(dma_no);
 
 	temp_addr = dma_base_addr + QLC_DMA_CMD_BUFF_ADDR_LOW;
-	ret = qlcnic_83xx_wrt_reg_indirect(adapter, temp_addr,
-					   mem->desc_card_addr);
+	ret = qlcnic_ind_wr(adapter, temp_addr, mem->desc_card_addr);
 	if (ret)
 		return ret;
 
 	temp_addr = dma_base_addr + QLC_DMA_CMD_BUFF_ADDR_HI;
-	ret = qlcnic_83xx_wrt_reg_indirect(adapter, temp_addr, 0);
+	ret = qlcnic_ind_wr(adapter, temp_addr, 0);
 	if (ret)
 		return ret;
 
 	temp_addr = dma_base_addr + QLC_DMA_CMD_STATUS_CTRL;
-	ret = qlcnic_83xx_wrt_reg_indirect(adapter, temp_addr,
-					   mem->start_dma_cmd);
+	ret = qlcnic_ind_wr(adapter, temp_addr, mem->start_dma_cmd);
 	if (ret)
 		return ret;
 
@@ -710,15 +714,16 @@
 	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
 	u32 temp, dma_base_addr, size = 0, read_size = 0;
 	struct qlcnic_pex_dma_descriptor *dma_descr;
-	struct qlcnic_83xx_dump_template_hdr *tmpl_hdr;
 	struct device *dev = &adapter->pdev->dev;
 	dma_addr_t dma_phys_addr;
 	void *dma_buffer;
+	void *tmpl_hdr;
 
 	tmpl_hdr = fw_dump->tmpl_hdr;
 
 	/* Check if DMA engine is available */
-	temp = tmpl_hdr->saved_state[QLC_83XX_DMA_ENGINE_INDEX];
+	temp = qlcnic_get_saved_state(adapter, tmpl_hdr,
+				      QLC_83XX_DMA_ENGINE_INDEX);
 	dma_base_addr = QLC_DMA_REG_BASE_ADDR(temp);
 	temp = qlcnic_ind_rd(adapter,
 			     dma_base_addr + QLC_DMA_CMD_STATUS_CTRL);
@@ -764,8 +769,8 @@
 
 		/* Write DMA descriptor to MS memory*/
 		temp = sizeof(struct qlcnic_pex_dma_descriptor) / 16;
-		*ret = qlcnic_83xx_ms_mem_write128(adapter, mem->desc_card_addr,
-						   (u32 *)dma_descr, temp);
+		*ret = qlcnic_ms_mem_write128(adapter, mem->desc_card_addr,
+					      (u32 *)dma_descr, temp);
 		if (*ret) {
 			dev_info(dev, "Failed to write DMA descriptor to MS memory at address 0x%x\n",
 				 mem->desc_card_addr);
@@ -1141,8 +1146,6 @@
 	return err;
 }
 
-#define QLCNIC_TEMPLATE_VERSION (0x20001)
-
 int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_hardware_context *ahw;
@@ -1150,6 +1153,7 @@
 	u32 version, csum, *tmp_buf;
 	u8 use_flash_temp = 0;
 	u32 temp_size = 0;
+	void *temp_buffer;
 	int err;
 
 	ahw = adapter->ahw;
@@ -1199,16 +1203,23 @@
 
 	qlcnic_cache_tmpl_hdr_values(adapter, fw_dump);
 
+	if (fw_dump->use_pex_dma) {
+		fw_dump->dma_buffer = NULL;
+		temp_buffer = dma_alloc_coherent(&adapter->pdev->dev,
+						 QLC_PEX_DMA_READ_SIZE,
+						 &fw_dump->phys_addr,
+						 GFP_KERNEL);
+		if (!temp_buffer)
+			fw_dump->use_pex_dma = false;
+		else
+			fw_dump->dma_buffer = temp_buffer;
+	}
+
+
 	dev_info(&adapter->pdev->dev,
 		 "Default minidump capture mask 0x%x\n",
 		 fw_dump->cap_mask);
 
-	if (qlcnic_83xx_check(adapter) &&
-	    (fw_dump->version & 0xfffff) >= QLCNIC_TEMPLATE_VERSION)
-		fw_dump->use_pex_dma = true;
-	else
-		fw_dump->use_pex_dma = false;
-
 	qlcnic_enable_fw_dump_state(adapter);
 
 	return 0;
@@ -1224,7 +1235,7 @@
 	struct device *dev = &adapter->pdev->dev;
 	struct qlcnic_hardware_context *ahw;
 	struct qlcnic_dump_entry *entry;
-	void *temp_buffer, *tmpl_hdr;
+	void *tmpl_hdr;
 	u32 ocm_window;
 	__le32 *buffer;
 	char mesg[64];
@@ -1268,16 +1279,6 @@
 	qlcnic_set_sys_info(adapter, tmpl_hdr, 0, QLCNIC_DRIVER_VERSION);
 	qlcnic_set_sys_info(adapter, tmpl_hdr, 1, adapter->fw_version);
 
-	if (fw_dump->use_pex_dma) {
-		temp_buffer = dma_alloc_coherent(dev, QLC_PEX_DMA_READ_SIZE,
-						 &fw_dump->phys_addr,
-						 GFP_KERNEL);
-		if (!temp_buffer)
-			fw_dump->use_pex_dma = false;
-		else
-			fw_dump->dma_buffer = temp_buffer;
-	}
-
 	if (qlcnic_82xx_check(adapter)) {
 		ops_cnt = ARRAY_SIZE(qlcnic_fw_dump_ops);
 		fw_dump_ops = qlcnic_fw_dump_ops;
@@ -1335,10 +1336,6 @@
 	/* Send a udev event to notify availability of FW dump */
 	kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, msg);
 
-	if (fw_dump->use_pex_dma)
-		dma_free_coherent(dev, QLC_PEX_DMA_READ_SIZE,
-				  fw_dump->dma_buffer, fw_dump->phys_addr);
-
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 396bd1f..4677b2e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h

@@ -52,6 +52,7 @@
 	QLCNIC_BC_CMD_CFG_GUEST_VLAN = 0x3,
 };
 
+#define QLCNIC_83XX_SRIOV_VF_MAX_MAC 2
 #define QLC_BC_CMD 1
 
 struct qlcnic_trans_list {
@@ -151,13 +152,14 @@
 	struct qlcnic_trans_list	rcv_pend;
 	struct qlcnic_adapter		*adapter;
 	struct qlcnic_vport		*vp;
-	struct mutex			vlan_list_lock;	/* Lock for VLAN list */
+	spinlock_t			vlan_list_lock;	/* Lock for VLAN list */
 };
 
 struct qlcnic_async_work_list {
 	struct list_head	list;
 	struct work_struct	work;
 	void			*ptr;
+	struct qlcnic_cmd_args	*cmd;
 };
 
 struct qlcnic_back_channel {
@@ -231,7 +233,7 @@
 void qlcnic_sriov_pf_reset(struct qlcnic_adapter *);
 int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *);
 int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *);
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int);
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
 			       struct ifla_vf_info *);
 int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 6afe9c1..1659c80 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c

@@ -39,6 +39,8 @@
 static void qlcnic_sriov_process_bc_cmd(struct work_struct *);
 static int qlcnic_sriov_vf_shutdown(struct pci_dev *);
 static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *);
+static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *,
+					struct qlcnic_cmd_args *);
 
 static struct qlcnic_hardware_ops qlcnic_sriov_vf_hw_ops = {
 	.read_crb			= qlcnic_83xx_read_crb,
@@ -181,7 +183,7 @@
 		vf->adapter = adapter;
 		vf->pci_func = qlcnic_sriov_virtid_fn(adapter, i);
 		mutex_init(&vf->send_cmd_lock);
-		mutex_init(&vf->vlan_list_lock);
+		spin_lock_init(&vf->vlan_list_lock);
 		INIT_LIST_HEAD(&vf->rcv_act.wait_list);
 		INIT_LIST_HEAD(&vf->rcv_pend.wait_list);
 		spin_lock_init(&vf->rcv_act.lock);
@@ -197,8 +199,10 @@
 				goto qlcnic_destroy_async_wq;
 			}
 			sriov->vf_info[i].vp = vp;
+			vp->vlan_mode = QLC_GUEST_VLAN_MODE;
 			vp->max_tx_bw = MAX_BW;
-			vp->spoofchk = true;
+			vp->min_tx_bw = MIN_BW;
+			vp->spoofchk = false;
 			random_ether_addr(vp->mac);
 			dev_info(&adapter->pdev->dev,
 				 "MAC Address %pM is configured for VF %d\n",
@@ -454,6 +458,7 @@
 	struct qlcnic_cmd_args cmd;
 	int ret = 0;
 
+	memset(&cmd, 0, sizeof(cmd));
 	ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, QLCNIC_BC_CMD_GET_ACL);
 	if (ret)
 		return ret;
@@ -515,6 +520,8 @@
 {
 	int err;
 
+	adapter->flags |= QLCNIC_VLAN_FILTERING;
+	adapter->ahw->total_nic_func = 1;
 	INIT_LIST_HEAD(&adapter->vf_mc_list);
 	if (!qlcnic_use_msi_x && !!qlcnic_use_msi)
 		dev_warn(&adapter->pdev->dev,
@@ -770,6 +777,7 @@
 		cmd->req.arg = (u32 *)trans->req_pay;
 		cmd->rsp.arg = (u32 *)trans->rsp_pay;
 		cmd_op = cmd->req.arg[0] & 0xff;
+		cmd->cmd_op = cmd_op;
 		remainder = (trans->rsp_pay_size) % (bc_pay_sz);
 		num_frags = (trans->rsp_pay_size) / (bc_pay_sz);
 		if (remainder)
@@ -1356,7 +1364,7 @@
 	return -EIO;
 }
 
-static int qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
+static int __qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
 				  struct qlcnic_cmd_args *cmd)
 {
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
@@ -1408,12 +1416,17 @@
 	    (mbx_err_code == QLCNIC_MBX_PORT_RSP_OK)) {
 		rsp = QLCNIC_RCODE_SUCCESS;
 	} else {
-		rsp = mbx_err_code;
-		if (!rsp)
-			rsp = 1;
-		dev_err(dev,
-			"MBX command 0x%x failed with err:0x%x for VF %d\n",
-			opcode, mbx_err_code, func);
+		if (cmd->type == QLC_83XX_MBX_CMD_NO_WAIT) {
+			rsp = QLCNIC_RCODE_SUCCESS;
+		} else {
+			rsp = mbx_err_code;
+			if (!rsp)
+				rsp = 1;
+
+			dev_err(dev,
+				"MBX command 0x%x failed with err:0x%x for VF %d\n",
+				opcode, mbx_err_code, func);
+		}
 	}
 
 err_out:
@@ -1435,12 +1448,23 @@
 	return rsp;
 }
 
+
+static int qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
+				  struct qlcnic_cmd_args *cmd)
+{
+	if (cmd->type == QLC_83XX_MBX_CMD_NO_WAIT)
+		return qlcnic_sriov_async_issue_cmd(adapter, cmd);
+	else
+		return __qlcnic_sriov_issue_cmd(adapter, cmd);
+}
+
 static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_op)
 {
 	struct qlcnic_cmd_args cmd;
 	struct qlcnic_vf_info *vf = &adapter->ahw->sriov->vf_info[0];
 	int ret;
 
+	memset(&cmd, 0, sizeof(cmd));
 	if (qlcnic_sriov_alloc_bc_mbx_args(&cmd, cmd_op))
 		return -ENOMEM;
 
@@ -1465,58 +1489,28 @@
 	return ret;
 }
 
-static void qlcnic_vf_add_mc_list(struct net_device *netdev)
+static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
-	struct qlcnic_mac_vlan_list *cur;
-	struct list_head *head, tmp_list;
 	struct qlcnic_vf_info *vf;
 	u16 vlan_id;
 	int i;
 
-	static const u8 bcast_addr[ETH_ALEN] = {
-		0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	};
-
 	vf = &adapter->ahw->sriov->vf_info[0];
-	INIT_LIST_HEAD(&tmp_list);
-	head = &adapter->vf_mc_list;
-	netif_addr_lock_bh(netdev);
 
-	while (!list_empty(head)) {
-		cur = list_entry(head->next, struct qlcnic_mac_vlan_list, list);
-		list_move(&cur->list, &tmp_list);
-	}
-
-	netif_addr_unlock_bh(netdev);
-
-	while (!list_empty(&tmp_list)) {
-		cur = list_entry((&tmp_list)->next,
-				 struct qlcnic_mac_vlan_list, list);
-		if (!qlcnic_sriov_check_any_vlan(vf)) {
-			qlcnic_nic_add_mac(adapter, bcast_addr, 0);
-			qlcnic_nic_add_mac(adapter, cur->mac_addr, 0);
-		} else {
-			mutex_lock(&vf->vlan_list_lock);
-			for (i = 0; i < sriov->num_allowed_vlans; i++) {
-				vlan_id = vf->sriov_vlans[i];
-				if (vlan_id) {
-					qlcnic_nic_add_mac(adapter, bcast_addr,
-							   vlan_id);
-					qlcnic_nic_add_mac(adapter,
-							   cur->mac_addr,
-							   vlan_id);
-				}
-			}
-			mutex_unlock(&vf->vlan_list_lock);
-			if (qlcnic_84xx_check(adapter)) {
-				qlcnic_nic_add_mac(adapter, bcast_addr, 0);
-				qlcnic_nic_add_mac(adapter, cur->mac_addr, 0);
-			}
+	if (!qlcnic_sriov_check_any_vlan(vf)) {
+		qlcnic_nic_add_mac(adapter, mac, 0);
+	} else {
+		spin_lock(&vf->vlan_list_lock);
+		for (i = 0; i < sriov->num_allowed_vlans; i++) {
+			vlan_id = vf->sriov_vlans[i];
+			if (vlan_id)
+				qlcnic_nic_add_mac(adapter, mac, vlan_id);
 		}
-		list_del(&cur->list);
-		kfree(cur);
+		spin_unlock(&vf->vlan_list_lock);
+		if (qlcnic_84xx_check(adapter))
+			qlcnic_nic_add_mac(adapter, mac, 0);
 	}
 }
 
@@ -1525,6 +1519,7 @@
 	struct list_head *head = &bc->async_list;
 	struct qlcnic_async_work_list *entry;
 
+	flush_workqueue(bc->bc_async_wq);
 	while (!list_empty(head)) {
 		entry = list_entry(head->next, struct qlcnic_async_work_list,
 				   list);
@@ -1534,10 +1529,14 @@
 	}
 }
 
-static void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
+void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
+	static const u8 bcast_addr[ETH_ALEN] = {
+		0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	};
+	struct netdev_hw_addr *ha;
 	u32 mode = VPORT_MISS_MODE_DROP;
 
 	if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state))
@@ -1549,23 +1548,49 @@
 	} else if ((netdev->flags & IFF_ALLMULTI) ||
 		   (netdev_mc_count(netdev) > ahw->max_mc_count)) {
 		mode = VPORT_MISS_MODE_ACCEPT_MULTI;
+	} else {
+		qlcnic_vf_add_mc_list(netdev, bcast_addr);
+		if (!netdev_mc_empty(netdev)) {
+			netdev_for_each_mc_addr(ha, netdev)
+				qlcnic_vf_add_mc_list(netdev, ha->addr);
+		}
 	}
 
-	if (qlcnic_sriov_vf_check(adapter))
-		qlcnic_vf_add_mc_list(netdev);
+	/* configure unicast MAC address, if there is not sufficient space
+	 * to store all the unicast addresses then enable promiscuous mode
+	 */
+	if (netdev_uc_count(netdev) > ahw->max_uc_count) {
+		mode = VPORT_MISS_MODE_ACCEPT_ALL;
+	} else if (!netdev_uc_empty(netdev)) {
+		netdev_for_each_uc_addr(ha, netdev)
+			qlcnic_vf_add_mc_list(netdev, ha->addr);
+	}
+
+	if (adapter->pdev->is_virtfn) {
+		if (mode == VPORT_MISS_MODE_ACCEPT_ALL &&
+		    !adapter->fdb_mac_learn) {
+			qlcnic_alloc_lb_filters_mem(adapter);
+			adapter->drv_mac_learn = 1;
+			adapter->rx_mac_learn = true;
+		} else {
+			adapter->drv_mac_learn = 0;
+			adapter->rx_mac_learn = false;
+		}
+	}
 
 	qlcnic_nic_set_promisc(adapter, mode);
 }
 
-static void qlcnic_sriov_handle_async_multi(struct work_struct *work)
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
 {
 	struct qlcnic_async_work_list *entry;
-	struct net_device *netdev;
+	struct qlcnic_adapter *adapter;
+	struct qlcnic_cmd_args *cmd;
 
 	entry = container_of(work, struct qlcnic_async_work_list, work);
-	netdev = (struct net_device *)entry->ptr;
-
-	qlcnic_sriov_vf_set_multi(netdev);
+	adapter = entry->ptr;
+	cmd = entry->cmd;
+	__qlcnic_sriov_issue_cmd(adapter, cmd);
 	return;
 }
 
@@ -1595,8 +1620,9 @@
 	return entry;
 }
 
-static void qlcnic_sriov_schedule_bc_async_work(struct qlcnic_back_channel *bc,
-						work_func_t func, void *data)
+static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
+					    work_func_t func, void *data,
+					    struct qlcnic_cmd_args *cmd)
 {
 	struct qlcnic_async_work_list *entry = NULL;
 
@@ -1605,21 +1631,23 @@
 		return;
 
 	entry->ptr = data;
+	entry->cmd = cmd;
 	INIT_WORK(&entry->work, func);
 	queue_work(bc->bc_async_wq, &entry->work);
 }
 
-void qlcnic_sriov_vf_schedule_multi(struct net_device *netdev)
+static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
+					struct qlcnic_cmd_args *cmd)
 {
 
-	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_back_channel *bc = &adapter->ahw->sriov->bc;
 
 	if (adapter->need_fw_reset)
-		return;
+		return -EIO;
 
-	qlcnic_sriov_schedule_bc_async_work(bc, qlcnic_sriov_handle_async_multi,
-					    netdev);
+	qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
+					adapter, cmd);
+	return 0;
 }
 
 static int qlcnic_sriov_vf_reinit_driver(struct qlcnic_adapter *adapter)
@@ -1843,6 +1871,12 @@
 	return 0;
 }
 
+static void qlcnic_sriov_vf_periodic_tasks(struct qlcnic_adapter *adapter)
+{
+	if (adapter->fhash.fnum)
+		qlcnic_prune_lb_filters(adapter);
+}
+
 static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *work)
 {
 	struct qlcnic_adapter *adapter;
@@ -1874,6 +1908,8 @@
 	}
 
 	idc->prev_state = idc->curr_state;
+	qlcnic_sriov_vf_periodic_tasks(adapter);
+
 	if (!ret && test_bit(QLC_83XX_MODULE_LOADED, &idc->status))
 		qlcnic_schedule_work(adapter, qlcnic_sriov_vf_poll_dev_state,
 				     idc->delay);
@@ -1897,7 +1933,7 @@
 	if (!vf->sriov_vlans)
 		return err;
 
-	mutex_lock(&vf->vlan_list_lock);
+	spin_lock_bh(&vf->vlan_list_lock);
 
 	for (i = 0; i < sriov->num_allowed_vlans; i++) {
 		if (vf->sriov_vlans[i] == vlan_id) {
@@ -1906,7 +1942,7 @@
 		}
 	}
 
-	mutex_unlock(&vf->vlan_list_lock);
+	spin_unlock_bh(&vf->vlan_list_lock);
 	return err;
 }
 
@@ -1915,12 +1951,12 @@
 {
 	int err = 0;
 
-	mutex_lock(&vf->vlan_list_lock);
+	spin_lock_bh(&vf->vlan_list_lock);
 
 	if (vf->num_vlan >= sriov->num_allowed_vlans)
 		err = -EINVAL;
 
-	mutex_unlock(&vf->vlan_list_lock);
+	spin_unlock_bh(&vf->vlan_list_lock);
 	return err;
 }
 
@@ -1973,7 +2009,7 @@
 	if (!vf->sriov_vlans)
 		return;
 
-	mutex_lock(&vf->vlan_list_lock);
+	spin_lock_bh(&vf->vlan_list_lock);
 
 	switch (opcode) {
 	case QLC_VLAN_ADD:
@@ -1986,7 +2022,7 @@
 		netdev_err(adapter->netdev, "Invalid VLAN operation\n");
 	}
 
-	mutex_unlock(&vf->vlan_list_lock);
+	spin_unlock_bh(&vf->vlan_list_lock);
 	return;
 }
 
@@ -1994,10 +2030,12 @@
 				   u16 vid, u8 enable)
 {
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
+	struct net_device *netdev = adapter->netdev;
 	struct qlcnic_vf_info *vf;
 	struct qlcnic_cmd_args cmd;
 	int ret;
 
+	memset(&cmd, 0, sizeof(cmd));
 	if (vid == 0)
 		return 0;
 
@@ -2019,14 +2057,18 @@
 		dev_err(&adapter->pdev->dev,
 			"Failed to configure guest VLAN, err=%d\n", ret);
 	} else {
+		netif_addr_lock_bh(netdev);
 		qlcnic_free_mac_list(adapter);
+		netif_addr_unlock_bh(netdev);
 
 		if (enable)
 			qlcnic_sriov_vlan_operation(vf, vid, QLC_VLAN_ADD);
 		else
 			qlcnic_sriov_vlan_operation(vf, vid, QLC_VLAN_DELETE);
 
-		qlcnic_set_multi(adapter->netdev);
+		netif_addr_lock_bh(netdev);
+		qlcnic_set_multi(netdev);
+		netif_addr_unlock_bh(netdev);
 	}
 
 	qlcnic_free_mbx_args(&cmd);
@@ -2157,11 +2199,11 @@
 {
 	bool err = false;
 
-	mutex_lock(&vf->vlan_list_lock);
+	spin_lock_bh(&vf->vlan_list_lock);
 
 	if (vf->num_vlan)
 		err = true;
 
-	mutex_unlock(&vf->vlan_list_lock);
+	spin_unlock_bh(&vf->vlan_list_lock);
 	return err;
 }

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 2801379..a29538b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c

@@ -16,6 +16,7 @@
 #define QLC_VF_FLOOD_BIT	BIT_16
 #define QLC_FLOOD_MODE		0x5
 #define QLC_SRIOV_ALLOW_VLAN0	BIT_19
+#define QLC_INTR_COAL_TYPE_MASK	0x7
 
 static int qlcnic_sriov_pf_get_vport_handle(struct qlcnic_adapter *, u8);
 
@@ -83,7 +84,7 @@
 	info->max_tx_ques = res->num_tx_queues / max;
 
 	if (qlcnic_83xx_pf_check(adapter))
-		num_macs = 1;
+		num_macs = QLCNIC_83XX_SRIOV_VF_MAX_MAC;
 
 	info->max_rx_mcast_mac_filters = res->num_rx_mcast_mac_filters;
 
@@ -337,9 +338,12 @@
 
 	cmd.req.arg[1] = 0x4;
 	if (enable) {
+		adapter->flags |= QLCNIC_VLAN_FILTERING;
 		cmd.req.arg[1] |= BIT_16;
 		if (qlcnic_84xx_check(adapter))
 			cmd.req.arg[1] |= QLC_SRIOV_ALLOW_VLAN0;
+	} else {
+		adapter->flags &= ~QLCNIC_VLAN_FILTERING;
 	}
 
 	err = qlcnic_issue_cmd(adapter, &cmd);
@@ -471,12 +475,12 @@
 		return -EPERM;
 	}
 
+	qlcnic_sriov_pf_disable(adapter);
+
 	rtnl_lock();
 	if (netif_running(netdev))
 		__qlcnic_down(adapter, netdev);
 
-	qlcnic_sriov_pf_disable(adapter);
-
 	qlcnic_sriov_free_vlans(adapter);
 
 	qlcnic_sriov_pf_cleanup(adapter);
@@ -595,7 +599,6 @@
 
 	qlcnic_sriov_alloc_vlans(adapter);
 
-	err = qlcnic_sriov_pf_enable(adapter, num_vfs);
 	return err;
 
 del_flr_queue:
@@ -626,25 +629,36 @@
 		__qlcnic_down(adapter, netdev);
 
 	err = __qlcnic_pci_sriov_enable(adapter, num_vfs);
-	if (err) {
-		netdev_info(netdev, "Failed to enable SR-IOV on port %d\n",
-			    adapter->portnum);
+	if (err)
+		goto error;
 
-		err = -EIO;
-		if (qlcnic_83xx_configure_opmode(adapter))
-			goto error;
-	} else {
+	if (netif_running(netdev))
+		__qlcnic_up(adapter, netdev);
+
+	rtnl_unlock();
+	err = qlcnic_sriov_pf_enable(adapter, num_vfs);
+	if (!err) {
 		netdev_info(netdev,
 			    "SR-IOV is enabled successfully on port %d\n",
 			    adapter->portnum);
 		/* Return number of vfs enabled */
-		err = num_vfs;
+		return num_vfs;
 	}
+
+	rtnl_lock();
 	if (netif_running(netdev))
-		__qlcnic_up(adapter, netdev);
+		__qlcnic_down(adapter, netdev);
 
 error:
+	if (!qlcnic_83xx_configure_opmode(adapter)) {
+		if (netif_running(netdev))
+			__qlcnic_up(adapter, netdev);
+	}
+
 	rtnl_unlock();
+	netdev_info(netdev, "Failed to enable SR-IOV on port %d\n",
+		    adapter->portnum);
+
 	return err;
 }
 
@@ -773,7 +787,7 @@
 				       struct qlcnic_vf_info *vf,
 				       u16 vlan, u8 op)
 {
-	struct qlcnic_cmd_args cmd;
+	struct qlcnic_cmd_args *cmd;
 	struct qlcnic_macvlan_mbx mv;
 	struct qlcnic_vport *vp;
 	u8 *addr;
@@ -783,21 +797,27 @@
 
 	vp = vf->vp;
 
-	if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIG_MAC_VLAN))
+	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
+	if (!cmd)
 		return -ENOMEM;
 
+	err = qlcnic_alloc_mbx_args(cmd, adapter, QLCNIC_CMD_CONFIG_MAC_VLAN);
+	if (err)
+		goto free_cmd;
+
+	cmd->type = QLC_83XX_MBX_CMD_NO_WAIT;
 	vpid = qlcnic_sriov_pf_get_vport_handle(adapter, vf->pci_func);
 	if (vpid < 0) {
 		err = -EINVAL;
-		goto out;
+		goto free_args;
 	}
 
 	if (vlan)
 		op = ((op == QLCNIC_MAC_ADD || op == QLCNIC_MAC_VLAN_ADD) ?
 		      QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_VLAN_DEL);
 
-	cmd.req.arg[1] = op | (1 << 8) | (3 << 6);
-	cmd.req.arg[1] |= ((vpid & 0xffff) << 16) | BIT_31;
+	cmd->req.arg[1] = op | (1 << 8) | (3 << 6);
+	cmd->req.arg[1] |= ((vpid & 0xffff) << 16) | BIT_31;
 
 	addr = vp->mac;
 	mv.vlan = vlan;
@@ -807,18 +827,18 @@
 	mv.mac_addr3 = addr[3];
 	mv.mac_addr4 = addr[4];
 	mv.mac_addr5 = addr[5];
-	buf = &cmd.req.arg[2];
+	buf = &cmd->req.arg[2];
 	memcpy(buf, &mv, sizeof(struct qlcnic_macvlan_mbx));
 
-	err = qlcnic_issue_cmd(adapter, &cmd);
+	err = qlcnic_issue_cmd(adapter, cmd);
 
-	if (err)
-		dev_err(&adapter->pdev->dev,
-			"MAC-VLAN %s to CAM failed, err=%d.\n",
-			((op == 1) ? "add " : "delete "), err);
+	if (!err)
+		return err;
 
-out:
-	qlcnic_free_mbx_args(&cmd);
+free_args:
+	qlcnic_free_mbx_args(cmd);
+free_cmd:
+	kfree(cmd);
 	return err;
 }
 
@@ -840,7 +860,7 @@
 
 	sriov = adapter->ahw->sriov;
 
-	mutex_lock(&vf->vlan_list_lock);
+	spin_lock_bh(&vf->vlan_list_lock);
 	if (vf->num_vlan) {
 		for (i = 0; i < sriov->num_allowed_vlans; i++) {
 			vlan = vf->sriov_vlans[i];
@@ -849,7 +869,7 @@
 							    opcode);
 		}
 	}
-	mutex_unlock(&vf->vlan_list_lock);
+	spin_unlock_bh(&vf->vlan_list_lock);
 
 	if (vf->vp->vlan_mode != QLC_PVID_MODE) {
 		if (qlcnic_83xx_pf_check(adapter) &&
@@ -1178,19 +1198,41 @@
 {
 	struct qlcnic_nic_intr_coalesce *coal = &adapter->ahw->coal;
 	u16 ctx_id, pkts, time;
+	int err = -EINVAL;
+	u8 type;
 
+	type = cmd->req.arg[1] & QLC_INTR_COAL_TYPE_MASK;
 	ctx_id = cmd->req.arg[1] >> 16;
 	pkts = cmd->req.arg[2] & 0xffff;
 	time = cmd->req.arg[2] >> 16;
 
-	if (ctx_id != vf->rx_ctx_id)
-		return -EINVAL;
-	if (pkts > coal->rx_packets)
-		return -EINVAL;
-	if (time < coal->rx_time_us)
-		return -EINVAL;
+	switch (type) {
+	case QLCNIC_INTR_COAL_TYPE_RX:
+		if (ctx_id != vf->rx_ctx_id || pkts > coal->rx_packets ||
+		    time < coal->rx_time_us)
+			goto err_label;
+		break;
+	case QLCNIC_INTR_COAL_TYPE_TX:
+		if (ctx_id != vf->tx_ctx_id || pkts > coal->tx_packets ||
+		    time < coal->tx_time_us)
+			goto err_label;
+		break;
+	default:
+		netdev_err(adapter->netdev, "Invalid coalescing type 0x%x received\n",
+			   type);
+		return err;
+	}
 
 	return 0;
+
+err_label:
+	netdev_err(adapter->netdev, "Expected: rx_ctx_id 0x%x rx_packets 0x%x rx_time_us 0x%x tx_ctx_id 0x%x tx_packets 0x%x tx_time_us 0x%x\n",
+		   vf->rx_ctx_id, coal->rx_packets, coal->rx_time_us,
+		   vf->tx_ctx_id, coal->tx_packets, coal->tx_time_us);
+	netdev_err(adapter->netdev, "Received: ctx_id 0x%x packets 0x%x time_us 0x%x type 0x%x\n",
+		   ctx_id, pkts, time, type);
+
+	return err;
 }
 
 static int qlcnic_sriov_pf_cfg_intrcoal_cmd(struct qlcnic_bc_trans *tran,
@@ -1214,7 +1256,6 @@
 					     struct qlcnic_vf_info *vf,
 					     struct qlcnic_cmd_args *cmd)
 {
-	struct qlcnic_macvlan_mbx *macvlan;
 	struct qlcnic_vport *vp = vf->vp;
 	u8 op, new_op;
 
@@ -1224,14 +1265,6 @@
 	cmd->req.arg[1] |= (vf->vp->handle << 16);
 	cmd->req.arg[1] |= BIT_31;
 
-	macvlan = (struct qlcnic_macvlan_mbx *)&cmd->req.arg[2];
-	if (!(macvlan->mac_addr0 & BIT_0)) {
-		dev_err(&adapter->pdev->dev,
-			"MAC address change is not allowed from VF %d",
-			vf->pci_func);
-		return -EINVAL;
-	}
-
 	if (vp->vlan_mode == QLC_PVID_MODE) {
 		op = cmd->req.arg[1] & 0x7;
 		cmd->req.arg[1] &= ~0x7;
@@ -1815,7 +1848,8 @@
 	return 0;
 }
 
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate)
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf,
+				int min_tx_rate, int max_tx_rate)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1830,35 +1864,52 @@
 	if (vf >= sriov->num_vfs)
 		return -EINVAL;
 
-	if (tx_rate >= 10000 || tx_rate < 100) {
-		netdev_err(netdev,
-			   "Invalid Tx rate, allowed range is [%d - %d]",
-			   QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE);
-		return -EINVAL;
-	}
-
-	if (tx_rate == 0)
-		tx_rate = 10000;
-
 	vf_info = &sriov->vf_info[vf];
 	vp = vf_info->vp;
 	vpid = vp->handle;
 
+	if (!min_tx_rate)
+		min_tx_rate = QLC_VF_MIN_TX_RATE;
+
+	if (max_tx_rate &&
+	    (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) {
+		netdev_err(netdev,
+			   "Invalid max Tx rate, allowed range is [%d - %d]",
+			   min_tx_rate, QLC_VF_MAX_TX_RATE);
+		return -EINVAL;
+	}
+
+	if (!max_tx_rate)
+		max_tx_rate = 10000;
+
+	if (min_tx_rate &&
+	    (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) {
+		netdev_err(netdev,
+			   "Invalid min Tx rate, allowed range is [%d - %d]",
+			   QLC_VF_MIN_TX_RATE, max_tx_rate);
+		return -EINVAL;
+	}
+
 	if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) {
 		if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 
-		nic_info.max_tx_bw = tx_rate / 100;
+		nic_info.max_tx_bw = max_tx_rate / 100;
+		nic_info.min_tx_bw = min_tx_rate / 100;
 		nic_info.bit_offsets = BIT_0;
 
 		if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 	}
 
-	vp->max_tx_bw = tx_rate / 100;
+	vp->max_tx_bw = max_tx_rate / 100;
 	netdev_info(netdev,
-		    "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
-		    tx_rate, vp->max_tx_bw, vf);
+		    "Setting Max Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    max_tx_rate, vp->max_tx_bw, vf);
+	vp->min_tx_bw = min_tx_rate / 100;
+	netdev_info(netdev,
+		    "Setting Min Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    min_tx_rate, vp->min_tx_bw, vf);
 	return 0;
 }
 
@@ -1957,9 +2008,13 @@
 	ivi->qos = vp->qos;
 	ivi->spoofchk = vp->spoofchk;
 	if (vp->max_tx_bw == MAX_BW)
-		ivi->tx_rate = 0;
+		ivi->max_tx_rate = 0;
 	else
-		ivi->tx_rate = vp->max_tx_bw * 100;
+		ivi->max_tx_rate = vp->max_tx_bw * 100;
+	if (vp->min_tx_bw == MIN_BW)
+		ivi->min_tx_rate = 0;
+	else
+		ivi->min_tx_rate = vp->min_tx_bw * 100;
 
 	ivi->vf = vf;
 	return 0;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index cd346e2..f5786d5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c

@@ -19,6 +19,10 @@
 #include <linux/sysfs.h>
 #include <linux/aer.h>
 #include <linux/log2.h>
+#ifdef CONFIG_QLCNIC_HWMON
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#endif
 
 #define QLC_STATUS_UNSUPPORTED_CMD	-2
 
@@ -358,6 +362,8 @@
 		if (adapter->npars[i].pci_func == pci_func)
 			return i;
 	}
+
+	dev_err(&adapter->pdev->dev, "%s: Invalid nic function\n", __func__);
 	return -EINVAL;
 }
 
@@ -1243,6 +1249,68 @@
 	.write = qlcnic_83xx_sysfs_flash_write_handler,
 };
 
+#ifdef CONFIG_QLCNIC_HWMON
+
+static ssize_t qlcnic_hwmon_show_temp(struct device *dev,
+				      struct device_attribute *dev_attr,
+				      char *buf)
+{
+	struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
+	unsigned int temperature = 0, value = 0;
+
+	if (qlcnic_83xx_check(adapter))
+		value = QLCRDX(adapter->ahw, QLC_83XX_ASIC_TEMP);
+	else if (qlcnic_82xx_check(adapter))
+		value = QLC_SHARED_REG_RD32(adapter, QLCNIC_ASIC_TEMP);
+
+	temperature = qlcnic_get_temp_val(value);
+	/* display millidegree celcius */
+	temperature *= 1000;
+	return sprintf(buf, "%u\n", temperature);
+}
+
+/* hwmon-sysfs attributes */
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+			  qlcnic_hwmon_show_temp, NULL, 1);
+
+static struct attribute *qlcnic_hwmon_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(qlcnic_hwmon);
+
+void qlcnic_register_hwmon_dev(struct qlcnic_adapter *adapter)
+{
+	struct device *dev = &adapter->pdev->dev;
+	struct device *hwmon_dev;
+
+	/* Skip hwmon registration for a VF device */
+	if (qlcnic_sriov_vf_check(adapter)) {
+		adapter->ahw->hwmon_dev = NULL;
+		return;
+	}
+	hwmon_dev = hwmon_device_register_with_groups(dev, qlcnic_driver_name,
+						      adapter,
+						      qlcnic_hwmon_groups);
+	if (IS_ERR(hwmon_dev)) {
+		dev_err(dev, "Cannot register with hwmon, err=%ld\n",
+			PTR_ERR(hwmon_dev));
+		hwmon_dev = NULL;
+	}
+	adapter->ahw->hwmon_dev = hwmon_dev;
+}
+
+void qlcnic_unregister_hwmon_dev(struct qlcnic_adapter *adapter)
+{
+	struct device *hwmon_dev = adapter->ahw->hwmon_dev;
+	if (hwmon_dev) {
+		hwmon_device_unregister(hwmon_dev);
+		adapter->ahw->hwmon_dev = NULL;
+	}
+}
+#endif
+
 void qlcnic_create_sysfs_entries(struct qlcnic_adapter *adapter)
 {
 	struct device *dev = &adapter->pdev->dev;

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 0a1d76a..b40050e 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c

@@ -3595,7 +3595,7 @@
 	}
 	return status;
 err_irq:
-	netif_err(qdev, ifup, qdev->ndev, "Failed to get the interrupts!!!/n");
+	netif_err(qdev, ifup, qdev->ndev, "Failed to get the interrupts!!!\n");
 	ql_free_irq(qdev);
 	return status;
 }
@@ -4770,7 +4770,7 @@
 	ndev->irq = pdev->irq;
 
 	ndev->netdev_ops = &qlge_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &qlge_ethtool_ops);
+	ndev->ethtool_ops = &qlge_ethtool_ops;
 	ndev->watchdog_timeo = 10 * HZ;
 
 	err = register_netdev(ndev);

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index aa1c079..be425ad 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c

@@ -7125,7 +7125,7 @@
 	for (i = 0; i < ETH_ALEN; i++)
 		dev->dev_addr[i] = RTL_R8(MAC0 + i);
 
-	SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops);
+	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 
 	netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 6a9509c..7622213 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c

@@ -307,6 +307,27 @@
 };
 
 static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
+	[EDMR]		= 0x0000,
+	[EDTRR]		= 0x0004,
+	[EDRRR]		= 0x0008,
+	[TDLAR]		= 0x000c,
+	[RDLAR]		= 0x0010,
+	[EESR]		= 0x0014,
+	[EESIPR]	= 0x0018,
+	[TRSCER]	= 0x001c,
+	[RMFCR]		= 0x0020,
+	[TFTR]		= 0x0024,
+	[FDR]		= 0x0028,
+	[RMCR]		= 0x002c,
+	[EDOCR]		= 0x0030,
+	[FCFTR]		= 0x0034,
+	[RPADIR]	= 0x0038,
+	[TRIMD]		= 0x003c,
+	[RBWAR]		= 0x0040,
+	[RDFAR]		= 0x0044,
+	[TBRAR]		= 0x004c,
+	[TDFAR]		= 0x0050,
+
 	[ECMR]		= 0x0160,
 	[ECSR]		= 0x0164,
 	[ECSIPR]	= 0x0168,
@@ -546,7 +567,6 @@
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
 	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
-	.rmcr_value	= RMCR_RNC,
 
 	.tx_check	= EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
 	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
@@ -624,7 +644,6 @@
 			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
 			  EESR_TDE | EESR_ECI,
 	.fdr_value	= 0x0000072f,
-	.rmcr_value	= RMCR_RNC,
 
 	.irq_flags	= IRQF_SHARED,
 	.apr		= 1,
@@ -752,7 +771,6 @@
 			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
 			  EESR_TDE | EESR_ECI,
 	.fdr_value	= 0x0000070f,
-	.rmcr_value	= RMCR_RNC,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -784,7 +802,6 @@
 			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
 			  EESR_TDE | EESR_ECI,
 	.fdr_value	= 0x0000070f,
-	.rmcr_value	= RMCR_RNC,
 
 	.no_psr		= 1,
 	.apr		= 1,
@@ -833,9 +850,6 @@
 	if (!cd->fdr_value)
 		cd->fdr_value = DEFAULT_FDR_INIT;
 
-	if (!cd->rmcr_value)
-		cd->rmcr_value = DEFAULT_RMCR_VALUE;
-
 	if (!cd->tx_check)
 		cd->tx_check = DEFAULT_TX_CHECK;
 
@@ -1287,8 +1301,8 @@
 	sh_eth_write(ndev, mdp->cd->fdr_value, FDR);
 	sh_eth_write(ndev, 0, TFTR);
 
-	/* Frame recv control */
-	sh_eth_write(ndev, mdp->cd->rmcr_value, RMCR);
+	/* Frame recv control (enable multiple-packets per rx irq) */
+	sh_eth_write(ndev, RMCR_RNC, RMCR);
 
 	sh_eth_write(ndev, DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2, TRSCER);
 
@@ -1385,7 +1399,6 @@
 	int entry = mdp->cur_rx % mdp->num_rx_ring;
 	int boguscnt = (mdp->dirty_rx + mdp->num_rx_ring) - mdp->cur_rx;
 	struct sk_buff *skb;
-	int exceeded = 0;
 	u16 pkt_len = 0;
 	u32 desc_status;
 
@@ -1397,10 +1410,9 @@
 		if (--boguscnt < 0)
 			break;
 
-		if (*quota <= 0) {
-			exceeded = 1;
+		if (*quota <= 0)
 			break;
-		}
+
 		(*quota)--;
 
 		if (!(desc_status & RDFEND))
@@ -1448,7 +1460,6 @@
 			ndev->stats.rx_packets++;
 			ndev->stats.rx_bytes += pkt_len;
 		}
-		rxdesc->status |= cpu_to_edmac(mdp, RD_RACT);
 		entry = (++mdp->cur_rx) % mdp->num_rx_ring;
 		rxdesc = &mdp->rx_ring[entry];
 	}
@@ -1494,7 +1505,7 @@
 		sh_eth_write(ndev, EDRRR_R, EDRRR);
 	}
 
-	return exceeded;
+	return *quota <= 0;
 }
 
 static void sh_eth_rcv_snd_disable(struct net_device *ndev)
@@ -2627,8 +2638,8 @@
 		 pdev->name, pdev->id);
 
 	/* PHY IRQ */
-	mdp->mii_bus->irq = devm_kzalloc(dev, sizeof(int) * PHY_MAX_ADDR,
-					 GFP_KERNEL);
+	mdp->mii_bus->irq = devm_kmalloc_array(dev, PHY_MAX_ADDR, sizeof(int),
+					       GFP_KERNEL);
 	if (!mdp->mii_bus->irq) {
 		ret = -ENOMEM;
 		goto out_free_bus;
@@ -2843,7 +2854,7 @@
 		ndev->netdev_ops = &sh_eth_netdev_ops_tsu;
 	else
 		ndev->netdev_ops = &sh_eth_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops);
+	ndev->ethtool_ops = &sh_eth_ethtool_ops;
 	ndev->watchdog_timeo = TX_TIMEOUT;
 
 	/* debug message level */

diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index d55e37c..b37c427 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h

@@ -319,7 +319,6 @@
 enum RMCR_BIT {
 	RMCR_RNC = 0x00000001,
 };
-#define DEFAULT_RMCR_VALUE	0x00000000
 
 /* ECMR */
 enum FELIC_MODE_BIT {
@@ -466,7 +465,6 @@
 	unsigned long fdr_value;
 	unsigned long fcftr_value;
 	unsigned long rpadir_value;
-	unsigned long rmcr_value;
 
 	/* interrupt checking mask */
 	unsigned long tx_check;

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 0415fa5..c0981ae 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c

@@ -520,5 +520,5 @@
 
 void sxgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops);
+	netdev->ethtool_ops = &sxgbe_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 82a9a98..6984944 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c

@@ -425,8 +425,8 @@
  * @rx_rsize: ring size
  * Description:  this function initializes the DMA RX descriptor
  */
-void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring,
-		  int rx_rsize)
+static void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring,
+			 int rx_rsize)
 {
 	dma_free_coherent(dev, rx_rsize * sizeof(struct sxgbe_rx_norm_desc),
 			  rx_ring->dma_rx, rx_ring->dma_rx_phy);
@@ -519,8 +519,8 @@
  * @tx_rsize: ring size
  * Description:  this function initializes the DMA TX descriptor
  */
-void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring,
-		  int tx_rsize)
+static void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring,
+			 int tx_rsize)
 {
 	dma_free_coherent(dev, tx_rsize * sizeof(struct sxgbe_tx_norm_desc),
 			  tx_ring->dma_tx, tx_ring->dma_tx_phy);
@@ -1221,11 +1221,10 @@
 
 	return 0;
 }
-
 /* Prepare first Tx descriptor for doing TSO operation */
-void sxgbe_tso_prepare(struct sxgbe_priv_data *priv,
-		       struct sxgbe_tx_norm_desc *first_desc,
-		       struct sk_buff *skb)
+static void sxgbe_tso_prepare(struct sxgbe_priv_data *priv,
+			      struct sxgbe_tx_norm_desc *first_desc,
+			      struct sk_buff *skb)
 {
 	unsigned int total_hdr_len, tcp_hdr_len;
 
@@ -1914,40 +1913,6 @@
 		   readl(ioaddr + SXGBE_HASH_LOW));
 }
 
-/**
- * sxgbe_config - entry point for changing configuration mode passed on by
- * ifconfig
- * @dev : pointer to the device structure
- * @map : pointer to the device mapping structure
- * Description:
- * This function is a driver entry point which gets called by the kernel
- * whenever some device configuration is changed.
- * Return value:
- * This function returns 0 if success and appropriate error otherwise.
- */
-static int sxgbe_config(struct net_device *dev, struct ifmap *map)
-{
-	struct sxgbe_priv_data *priv = netdev_priv(dev);
-
-	/* Can't act on a running interface */
-	if (dev->flags & IFF_UP)
-		return -EBUSY;
-
-	/* Don't allow changing the I/O address */
-	if (map->base_addr != (unsigned long)priv->ioaddr) {
-		netdev_warn(dev, "can't change I/O address\n");
-		return -EOPNOTSUPP;
-	}
-
-	/* Don't allow changing the IRQ */
-	if (map->irq != priv->irq) {
-		netdev_warn(dev, "not change IRQ number %d\n", priv->irq);
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 /**
  * sxgbe_poll_controller - entry point for polling receive by device
@@ -2009,7 +1974,6 @@
 	.ndo_set_rx_mode	= sxgbe_set_rx_mode,
 	.ndo_tx_timeout		= sxgbe_tx_timeout,
 	.ndo_do_ioctl		= sxgbe_ioctl,
-	.ndo_set_config		= sxgbe_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= sxgbe_poll_controller,
 #endif

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h
index 56f8bf5..81437d9 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h

@@ -188,7 +188,6 @@
 
 /* L3/L4 function registers */
 #define SXGBE_CORE_L34_ADDCTL_REG	0x0C00
-#define SXGBE_CORE_L34_ADDCTL_REG	0x0C00
 #define SXGBE_CORE_L34_DATA_REG		0x0C04
 
 /* ARP registers */

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 63d595f..1e27404 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c

@@ -2248,7 +2248,7 @@
 	} else {
 		net_dev->netdev_ops = &efx_farch_netdev_ops;
 	}
-	SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
+	net_dev->ethtool_ops = &efx_ethtool_ops;
 	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
 
 	rtnl_lock();

diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 0de8b07..74739c4 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c

@@ -1033,7 +1033,7 @@
 		0 : ARRAY_SIZE(efx->rx_indir_table));
 }
 
-static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
+static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1041,8 +1041,8 @@
 	return 0;
 }
 
-static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
-				      const u32 *indir)
+static int efx_ethtool_set_rxfh(struct net_device *net_dev,
+				const u32 *indir, const u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1125,8 +1125,8 @@
 	.get_rxnfc		= efx_ethtool_get_rxnfc,
 	.set_rxnfc		= efx_ethtool_set_rxnfc,
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
-	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
-	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
+	.get_rxfh		= efx_ethtool_get_rxfh,
+	.set_rxfh		= efx_ethtool_set_rxfh,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,

diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h
index 4d3f119..afb94aa 100644
--- a/drivers/net/ethernet/sfc/io.h
+++ b/drivers/net/ethernet/sfc/io.h

@@ -66,10 +66,17 @@
 #define EFX_USE_QWORD_IO 1
 #endif
 
+/* Hardware issue requires that only 64-bit naturally aligned writes
+ * are seen by hardware. Its not strictly necessary to restrict to
+ * x86_64 arch, but done for safety since unusual write combining behaviour
+ * can break PIO.
+ */
+#ifdef CONFIG_X86_64
 /* PIO is a win only if write-combining is possible */
 #ifdef ARCH_HAS_IOREMAP_WC
 #define EFX_USE_PIO 1
 #endif
+#endif
 
 #ifdef EFX_USE_QWORD_IO
 static inline void _efx_writeq(struct efx_nic *efx, __le64 value,

diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index 9a9205e..43d2e64 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c

@@ -1633,7 +1633,8 @@
 
 	ivi->vf = vf_i;
 	ether_addr_copy(ivi->mac, vf->addr.mac_addr);
-	ivi->tx_rate = 0;
+	ivi->max_tx_rate = 0;
+	ivi->min_tx_rate = 0;
 	tci = ntohs(vf->addr.tci);
 	ivi->vlan = tci & VLAN_VID_MASK;
 	ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;

diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index fa94753..ede8dcc 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c

@@ -189,6 +189,18 @@
 	u8 buf[L1_CACHE_BYTES];
 };
 
+/* Copy in explicit 64-bit writes. */
+static void efx_memcpy_64(void __iomem *dest, void *src, size_t len)
+{
+	u64 *src64 = src;
+	u64 __iomem *dest64 = dest;
+	size_t l64 = len / 8;
+	size_t i;
+
+	for (i = 0; i < l64; i++)
+		writeq(src64[i], &dest64[i]);
+}
+
 /* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
  * Advances piobuf pointer. Leaves additional data in the copy buffer.
  */
@@ -198,7 +210,7 @@
 {
 	int block_len = len & ~(sizeof(copy_buf->buf) - 1);
 
-	memcpy_toio(*piobuf, data, block_len);
+	efx_memcpy_64(*piobuf, data, block_len);
 	*piobuf += block_len;
 	len -= block_len;
 
@@ -230,7 +242,7 @@
 		if (copy_buf->used < sizeof(copy_buf->buf))
 			return;
 
-		memcpy_toio(*piobuf, copy_buf->buf, sizeof(copy_buf->buf));
+		efx_memcpy_64(*piobuf, copy_buf->buf, sizeof(copy_buf->buf));
 		*piobuf += sizeof(copy_buf->buf);
 		data += copy_to_buf;
 		len -= copy_to_buf;
@@ -245,7 +257,7 @@
 {
 	/* if there's anything in it, write the whole buffer, including junk */
 	if (copy_buf->used)
-		memcpy_toio(piobuf, copy_buf->buf, sizeof(copy_buf->buf));
+		efx_memcpy_64(piobuf, copy_buf->buf, sizeof(copy_buf->buf));
 }
 
 /* Traverse skb structure and copy fragments in to PIO buffer.
@@ -304,8 +316,8 @@
 		 */
 		BUILD_BUG_ON(L1_CACHE_BYTES >
 			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
-		memcpy_toio(tx_queue->piobuf, skb->data,
-			    ALIGN(skb->len, L1_CACHE_BYTES));
+		efx_memcpy_64(tx_queue->piobuf, skb->data,
+			      ALIGN(skb->len, L1_CACHE_BYTES));
 	}
 
 	EFX_POPULATE_QWORD_5(buffer->option,

diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index acbbe48..a863399 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c

@@ -1877,7 +1877,7 @@
 
 	dev->netdev_ops = &sis190_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops);
+	dev->ethtool_ops = &sis190_ethtool_ops;
 	dev->watchdog_timeo = SIS190_TX_TIMEOUT;
 
 	spin_lock_init(&tp->lock);

diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index c7a4868..6b33127 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c

@@ -318,7 +318,7 @@
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &ethtool_ops);
+    dev->ethtool_ops = &ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     smc->mii_if.dev = dev;

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index a0fc151..5e13fa5 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c

@@ -2477,6 +2477,8 @@
 		goto out_disable_resources;
 	}
 
+	netif_carrier_off(dev);
+
 	retval = register_netdev(dev);
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error %i registering device", retval);

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5f9cb8..c62e67f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c

@@ -322,9 +322,7 @@
 		return -EBUSY;
 	}
 	cmd->transceiver = XCVR_INTERNAL;
-	spin_lock_irq(&priv->lock);
 	rc = phy_ethtool_gset(phy, cmd);
-	spin_unlock_irq(&priv->lock);
 	return rc;
 }
 
@@ -431,8 +429,6 @@
 	if (priv->pcs)	/* FIXME */
 		return;
 
-	spin_lock(&priv->lock);
-
 	pause->rx_pause = 0;
 	pause->tx_pause = 0;
 	pause->autoneg = priv->phydev->autoneg;
@@ -442,7 +438,6 @@
 	if (priv->flow_ctrl & FLOW_TX)
 		pause->tx_pause = 1;
 
-	spin_unlock(&priv->lock);
 }
 
 static int
@@ -457,8 +452,6 @@
 	if (priv->pcs)	/* FIXME */
 		return -EOPNOTSUPP;
 
-	spin_lock(&priv->lock);
-
 	if (pause->rx_pause)
 		new_pause |= FLOW_RX;
 	if (pause->tx_pause)
@@ -473,7 +466,6 @@
 	} else
 		priv->hw->mac->flow_ctrl(priv->ioaddr, phy->duplex,
 					 priv->flow_ctrl, priv->pause);
-	spin_unlock(&priv->lock);
 	return ret;
 }
 
@@ -784,5 +776,5 @@
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops);
+	netdev->ethtool_ops = &stmmac_ethtool_ops;
 }

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0f4841d..057a120 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -1753,7 +1753,7 @@
 	}
 
 	/* Request the IRQ lines */
-	if (priv->lpi_irq != -ENXIO) {
+	if (priv->lpi_irq > 0) {
 		ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
 				  dev->name, dev);
 		if (unlikely(ret < 0)) {
@@ -1813,7 +1813,7 @@
 	free_irq(dev->irq, dev);
 	if (priv->wol_irq != dev->irq)
 		free_irq(priv->wol_irq, dev);
-	if (priv->lpi_irq != -ENXIO)
+	if (priv->lpi_irq > 0)
 		free_irq(priv->lpi_irq, dev);
 
 	/* Stop TX/RX DMA and clear the descriptors */
@@ -2212,27 +2212,6 @@
 	stmmac_tx_err(priv);
 }
 
-/* Configuration changes (passed on by ifconfig) */
-static int stmmac_config(struct net_device *dev, struct ifmap *map)
-{
-	if (dev->flags & IFF_UP)	/* can't act on a running interface */
-		return -EBUSY;
-
-	/* Don't allow changing the I/O address */
-	if (map->base_addr != dev->base_addr) {
-		pr_warn("%s: can't change I/O address\n", dev->name);
-		return -EOPNOTSUPP;
-	}
-
-	/* Don't allow changing the IRQ */
-	if (map->irq != dev->irq) {
-		pr_warn("%s: not change IRQ number %d\n", dev->name, dev->irq);
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
 /**
  *  stmmac_set_rx_mode - entry point for multicast addressing
  *  @dev : pointer to the device structure
@@ -2598,7 +2577,6 @@
 	.ndo_set_rx_mode = stmmac_set_rx_mode,
 	.ndo_tx_timeout = stmmac_tx_timeout,
 	.ndo_do_ioctl = stmmac_ioctl,
-	.ndo_set_config = stmmac_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = stmmac_poll_controller,
 #endif

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index a468eb1..a5b1e1b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

@@ -205,10 +205,13 @@
 	if (new_bus == NULL)
 		return -ENOMEM;
 
-	if (mdio_bus_data->irqs)
+	if (mdio_bus_data->irqs) {
 		irqlist = mdio_bus_data->irqs;
-	else
+	} else {
+		for (addr = 0; addr < PHY_MAX_ADDR; addr++)
+			priv->mii_irq[addr] = PHY_POLL;
 		irqlist = priv->mii_irq;
+	}
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 46aef510..ea7a65b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

@@ -237,10 +237,12 @@
 
 	/* Get the MAC information */
 	priv->dev->irq = platform_get_irq_byname(pdev, "macirq");
-	if (priv->dev->irq == -ENXIO) {
-		pr_err("%s: ERROR: MAC IRQ configuration "
-		       "information not found\n", __func__);
-		return -ENXIO;
+	if (priv->dev->irq < 0) {
+		if (priv->dev->irq != -EPROBE_DEFER) {
+			netdev_err(priv->dev,
+				   "MAC IRQ configuration information not found\n");
+		}
+		return priv->dev->irq;
 	}
 
 	/*
@@ -252,10 +254,15 @@
 	 * so the driver will continue to use the mac irq (ndev->irq)
 	 */
 	priv->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
-	if (priv->wol_irq == -ENXIO)
+	if (priv->wol_irq < 0) {
+		if (priv->wol_irq == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
 		priv->wol_irq = priv->dev->irq;
+	}
 
 	priv->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
+	if (priv->lpi_irq == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	platform_set_drvdata(pdev, priv->dev);
 

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 2ead877..38da73a 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c

@@ -2413,7 +2413,7 @@
 		.get_ethtool_stats = bdx_get_ethtool_stats,
 	};
 
-	SET_ETHTOOL_OPS(netdev, &bdx_ethtool_ops);
+	netdev->ethtool_ops = &bdx_ethtool_ops;
 }
 
 /**

diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 73f74f3..7399a52 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c

@@ -313,19 +313,6 @@
 
 static struct mii_bus *cpmac_mii;
 
-static int cpmac_config(struct net_device *dev, struct ifmap *map)
-{
-	if (dev->flags & IFF_UP)
-		return -EBUSY;
-
-	/* Don't allow changing the I/O address */
-	if (map->base_addr != dev->base_addr)
-		return -EOPNOTSUPP;
-
-	/* ignore other fields */
-	return 0;
-}
-
 static void cpmac_set_multicast_list(struct net_device *dev)
 {
 	struct netdev_hw_addr *ha;
@@ -1100,7 +1087,6 @@
 	.ndo_tx_timeout		= cpmac_tx_timeout,
 	.ndo_set_rx_mode	= cpmac_set_multicast_list,
 	.ndo_do_ioctl		= cpmac_ioctl,
-	.ndo_set_config		= cpmac_config,
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 148da9a..aa8bf45 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c

@@ -29,6 +29,8 @@
 #define AM33XX_GMII_SEL_RMII2_IO_CLK_EN	BIT(7)
 #define AM33XX_GMII_SEL_RMII1_IO_CLK_EN	BIT(6)
 
+#define GMII_SEL_MODE_MASK		0x3
+
 struct cpsw_phy_sel_priv {
 	struct device	*dev;
 	u32 __iomem	*gmii_sel;
@@ -65,7 +67,7 @@
 		break;
 	};
 
-	mask = 0x3 << (slave * 2) | BIT(slave + 6);
+	mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6);
 	mode <<= slave * 2;
 
 	if (priv->rmii_clock_external) {
@@ -81,6 +83,55 @@
 	writel(reg, priv->gmii_sel);
 }
 
+static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv,
+				 phy_interface_t phy_mode, int slave)
+{
+	u32 reg;
+	u32 mask;
+	u32 mode = 0;
+
+	reg = readl(priv->gmii_sel);
+
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		mode = AM33XX_GMII_SEL_MODE_RMII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		mode = AM33XX_GMII_SEL_MODE_RGMII;
+		break;
+
+	case PHY_INTERFACE_MODE_MII:
+	default:
+		mode = AM33XX_GMII_SEL_MODE_MII;
+		break;
+	};
+
+	switch (slave) {
+	case 0:
+		mask = GMII_SEL_MODE_MASK;
+		break;
+	case 1:
+		mask = GMII_SEL_MODE_MASK << 4;
+		mode <<= 4;
+		break;
+	default:
+		dev_err(priv->dev, "invalid slave number...\n");
+		return;
+	}
+
+	if (priv->rmii_clock_external)
+		dev_err(priv->dev, "RMII External clock is not supported\n");
+
+	reg &= ~mask;
+	reg |= mode;
+
+	writel(reg, priv->gmii_sel);
+}
+
 static struct platform_driver cpsw_phy_sel_driver;
 static int match(struct device *dev, void *data)
 {
@@ -112,6 +163,14 @@
 		.compatible	= "ti,am3352-cpsw-phy-sel",
 		.data		= &cpsw_gmii_sel_am3352,
 	},
+	{
+		.compatible	= "ti,dra7xx-cpsw-phy-sel",
+		.data		= &cpsw_gmii_sel_dra7xx,
+	},
+	{
+		.compatible	= "ti,am43xx-cpsw-phy-sel",
+		.data		= &cpsw_gmii_sel_am3352,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, cpsw_phy_sel_id_table);
@@ -132,6 +191,7 @@
 		return -ENOMEM;
 	}
 
+	priv->dev = &pdev->dev;
 	priv->cpsw_phy_sel = of_id->data;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gmii-sel");

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c331b7e..ff380da 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c

@@ -143,13 +143,13 @@
 		u32 i;		\
 		for (i = 0; i < priv->num_irqs; i++) \
 			enable_irq(priv->irqs_table[i]); \
-	} while (0);
+	} while (0)
 #define cpsw_disable_irq(priv)	\
 	do {			\
 		u32 i;		\
 		for (i = 0; i < priv->num_irqs; i++) \
 			disable_irq_nosync(priv->irqs_table[i]); \
-	} while (0);
+	} while (0)
 
 #define cpsw_slave_index(priv)				\
 		((priv->data.dual_emac) ? priv->emac_port :	\
@@ -248,20 +248,31 @@
 #define TS_131              (1<<11) /* Time Sync Dest IP Addr 131 enable */
 #define TS_130              (1<<10) /* Time Sync Dest IP Addr 130 enable */
 #define TS_129              (1<<9)  /* Time Sync Dest IP Addr 129 enable */
-#define TS_BIT8             (1<<8)  /* ts_ttl_nonzero? */
+#define TS_TTL_NONZERO      (1<<8)  /* Time Sync Time To Live Non-zero enable */
+#define TS_ANNEX_F_EN       (1<<6)  /* Time Sync Annex F enable */
 #define TS_ANNEX_D_EN       (1<<4)  /* Time Sync Annex D enable */
 #define TS_LTYPE2_EN        (1<<3)  /* Time Sync LTYPE 2 enable */
 #define TS_LTYPE1_EN        (1<<2)  /* Time Sync LTYPE 1 enable */
 #define TS_TX_EN            (1<<1)  /* Time Sync Transmit Enable */
 #define TS_RX_EN            (1<<0)  /* Time Sync Receive Enable */
 
-#define CTRL_TS_BITS \
-	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 | TS_BIT8 | \
-	 TS_ANNEX_D_EN | TS_LTYPE1_EN)
+#define CTRL_V2_TS_BITS \
+	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+	 TS_TTL_NONZERO  | TS_ANNEX_D_EN | TS_LTYPE1_EN)
 
-#define CTRL_ALL_TS_MASK (CTRL_TS_BITS | TS_TX_EN | TS_RX_EN)
-#define CTRL_TX_TS_BITS  (CTRL_TS_BITS | TS_TX_EN)
-#define CTRL_RX_TS_BITS  (CTRL_TS_BITS | TS_RX_EN)
+#define CTRL_V2_ALL_TS_MASK (CTRL_V2_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V2_TX_TS_BITS  (CTRL_V2_TS_BITS | TS_TX_EN)
+#define CTRL_V2_RX_TS_BITS  (CTRL_V2_TS_BITS | TS_RX_EN)
+
+
+#define CTRL_V3_TS_BITS \
+	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+	 TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
+	 TS_LTYPE1_EN)
+
+#define CTRL_V3_ALL_TS_MASK (CTRL_V3_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V3_TX_TS_BITS  (CTRL_V3_TS_BITS | TS_TX_EN)
+#define CTRL_V3_RX_TS_BITS  (CTRL_V3_TS_BITS | TS_RX_EN)
 
 /* Bit definitions for the CPSW2_TS_SEQ_MTYPE register */
 #define TS_SEQ_ID_OFFSET_SHIFT   (16)    /* Time Sync Sequence ID Offset */
@@ -1376,13 +1387,27 @@
 		slave = &priv->slaves[priv->data.active_slave];
 
 	ctrl = slave_read(slave, CPSW2_CONTROL);
-	ctrl &= ~CTRL_ALL_TS_MASK;
+	switch (priv->version) {
+	case CPSW_VERSION_2:
+		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-	if (priv->cpts->tx_enable)
-		ctrl |= CTRL_TX_TS_BITS;
+		if (priv->cpts->tx_enable)
+			ctrl |= CTRL_V2_TX_TS_BITS;
 
-	if (priv->cpts->rx_enable)
-		ctrl |= CTRL_RX_TS_BITS;
+		if (priv->cpts->rx_enable)
+			ctrl |= CTRL_V2_RX_TS_BITS;
+	break;
+	case CPSW_VERSION_3:
+	default:
+		ctrl &= ~CTRL_V3_ALL_TS_MASK;
+
+		if (priv->cpts->tx_enable)
+			ctrl |= CTRL_V3_TX_TS_BITS;
+
+		if (priv->cpts->rx_enable)
+			ctrl |= CTRL_V3_RX_TS_BITS;
+	break;
+	}
 
 	mtype = (30 << TS_SEQ_ID_OFFSET_SHIFT) | EVENT_MSG_BITS;
 
@@ -1398,7 +1423,8 @@
 	struct hwtstamp_config cfg;
 
 	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2)
+	    priv->version != CPSW_VERSION_2 &&
+	    priv->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -1443,6 +1469,7 @@
 		cpsw_hwtstamp_v1(priv);
 		break;
 	case CPSW_VERSION_2:
+	case CPSW_VERSION_3:
 		cpsw_hwtstamp_v2(priv);
 		break;
 	default:
@@ -1459,7 +1486,8 @@
 	struct hwtstamp_config cfg;
 
 	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2)
+	    priv->version != CPSW_VERSION_2 &&
+	    priv->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
@@ -1780,25 +1808,25 @@
 		return -EINVAL;
 
 	if (of_property_read_u32(node, "slaves", &prop)) {
-		pr_err("Missing slaves property in the DT.\n");
+		dev_err(&pdev->dev, "Missing slaves property in the DT.\n");
 		return -EINVAL;
 	}
 	data->slaves = prop;
 
 	if (of_property_read_u32(node, "active_slave", &prop)) {
-		pr_err("Missing active_slave property in the DT.\n");
+		dev_err(&pdev->dev, "Missing active_slave property in the DT.\n");
 		return -EINVAL;
 	}
 	data->active_slave = prop;
 
 	if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
-		pr_err("Missing cpts_clock_mult property in the DT.\n");
+		dev_err(&pdev->dev, "Missing cpts_clock_mult property in the DT.\n");
 		return -EINVAL;
 	}
 	data->cpts_clock_mult = prop;
 
 	if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
-		pr_err("Missing cpts_clock_shift property in the DT.\n");
+		dev_err(&pdev->dev, "Missing cpts_clock_shift property in the DT.\n");
 		return -EINVAL;
 	}
 	data->cpts_clock_shift = prop;
@@ -1810,31 +1838,31 @@
 		return -ENOMEM;
 
 	if (of_property_read_u32(node, "cpdma_channels", &prop)) {
-		pr_err("Missing cpdma_channels property in the DT.\n");
+		dev_err(&pdev->dev, "Missing cpdma_channels property in the DT.\n");
 		return -EINVAL;
 	}
 	data->channels = prop;
 
 	if (of_property_read_u32(node, "ale_entries", &prop)) {
-		pr_err("Missing ale_entries property in the DT.\n");
+		dev_err(&pdev->dev, "Missing ale_entries property in the DT.\n");
 		return -EINVAL;
 	}
 	data->ale_entries = prop;
 
 	if (of_property_read_u32(node, "bd_ram_size", &prop)) {
-		pr_err("Missing bd_ram_size property in the DT.\n");
+		dev_err(&pdev->dev, "Missing bd_ram_size property in the DT.\n");
 		return -EINVAL;
 	}
 	data->bd_ram_size = prop;
 
 	if (of_property_read_u32(node, "rx_descs", &prop)) {
-		pr_err("Missing rx_descs property in the DT.\n");
+		dev_err(&pdev->dev, "Missing rx_descs property in the DT.\n");
 		return -EINVAL;
 	}
 	data->rx_descs = prop;
 
 	if (of_property_read_u32(node, "mac_control", &prop)) {
-		pr_err("Missing mac_control property in the DT.\n");
+		dev_err(&pdev->dev, "Missing mac_control property in the DT.\n");
 		return -EINVAL;
 	}
 	data->mac_control = prop;
@@ -1848,7 +1876,7 @@
 	ret = of_platform_populate(node, NULL, NULL, &pdev->dev);
 	/* We do not want to force this, as in some cases may not have child */
 	if (ret)
-		pr_warn("Doesn't have any child node\n");
+		dev_warn(&pdev->dev, "Doesn't have any child node\n");
 
 	for_each_child_of_node(node, slave_node) {
 		struct cpsw_slave_data *slave_data = data->slave_data + i;
@@ -1865,7 +1893,7 @@
 
 		parp = of_get_property(slave_node, "phy_id", &lenp);
 		if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
-			pr_err("Missing slave[%d] phy_id property\n", i);
+			dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
 			return -EINVAL;
 		}
 		mdio_node = of_find_node_by_phandle(be32_to_cpup(parp));
@@ -1885,18 +1913,18 @@
 
 		slave_data->phy_if = of_get_phy_mode(slave_node);
 		if (slave_data->phy_if < 0) {
-			pr_err("Missing or malformed slave[%d] phy-mode property\n",
-			       i);
+			dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
+				i);
 			return slave_data->phy_if;
 		}
 
 		if (data->dual_emac) {
 			if (of_property_read_u32(slave_node, "dual_emac_res_vlan",
 						 &prop)) {
-				pr_err("Missing dual_emac_res_vlan in DT.\n");
+				dev_err(&pdev->dev, "Missing dual_emac_res_vlan in DT.\n");
 				slave_data->dual_emac_res_vlan = i+1;
-				pr_err("Using %d as Reserved VLAN for %d slave\n",
-				       slave_data->dual_emac_res_vlan, i);
+				dev_err(&pdev->dev, "Using %d as Reserved VLAN for %d slave\n",
+					slave_data->dual_emac_res_vlan, i);
 			} else {
 				slave_data->dual_emac_res_vlan = prop;
 			}
@@ -1920,7 +1948,7 @@
 
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
-		pr_err("cpsw: error allocating net_device\n");
+		dev_err(&pdev->dev, "cpsw: error allocating net_device\n");
 		return -ENOMEM;
 	}
 
@@ -1936,10 +1964,10 @@
 	if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
 		memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
 			ETH_ALEN);
-		pr_info("cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
 	} else {
 		random_ether_addr(priv_sl2->mac_addr);
-		pr_info("cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
 	}
 	memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
 
@@ -1970,14 +1998,14 @@
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	ret = register_netdev(ndev);
 	if (ret) {
-		pr_err("cpsw: error registering net device\n");
+		dev_err(&pdev->dev, "cpsw: error registering net device\n");
 		free_netdev(ndev);
 		ret = -ENODEV;
 	}
@@ -1999,7 +2027,7 @@
 
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
-		pr_err("error allocating net_device\n");
+		dev_err(&pdev->dev, "error allocating net_device\n");
 		return -ENOMEM;
 	}
 
@@ -2014,7 +2042,7 @@
 	priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
 	priv->irq_enabled = true;
 	if (!priv->cpts) {
-		pr_err("error allocating cpts\n");
+		dev_err(&pdev->dev, "error allocating cpts\n");
 		goto clean_ndev_ret;
 	}
 
@@ -2027,7 +2055,7 @@
 	pinctrl_pm_select_default_state(&pdev->dev);
 
 	if (cpsw_probe_dt(&priv->data, pdev)) {
-		pr_err("cpsw: platform data missing\n");
+		dev_err(&pdev->dev, "cpsw: platform data missing\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
@@ -2035,10 +2063,10 @@
 
 	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
 		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
-		pr_info("Detected MACID = %pM\n", priv->mac_addr);
+		dev_info(&pdev->dev, "Detected MACID = %pM\n", priv->mac_addr);
 	} else {
 		eth_random_addr(priv->mac_addr);
-		pr_info("Random MACID = %pM\n", priv->mac_addr);
+		dev_info(&pdev->dev, "Random MACID = %pM\n", priv->mac_addr);
 	}
 
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
@@ -2199,7 +2227,7 @@
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 2435139..6b56f85 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c

@@ -236,13 +236,11 @@
 	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 }
 
-#define CPTS_REF_CLOCK_NAME "cpsw_cpts_rft_clk"
-
-static void cpts_clk_init(struct cpts *cpts)
+static void cpts_clk_init(struct device *dev, struct cpts *cpts)
 {
-	cpts->refclk = clk_get(NULL, CPTS_REF_CLOCK_NAME);
+	cpts->refclk = devm_clk_get(dev, "cpts");
 	if (IS_ERR(cpts->refclk)) {
-		pr_err("Failed to clk_get %s\n", CPTS_REF_CLOCK_NAME);
+		dev_err(dev, "Failed to get cpts refclk\n");
 		cpts->refclk = NULL;
 		return;
 	}
@@ -252,7 +250,6 @@
 static void cpts_clk_release(struct cpts *cpts)
 {
 	clk_disable(cpts->refclk);
-	clk_put(cpts->refclk);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -390,7 +387,7 @@
 	for (i = 0; i < CPTS_MAX_EVENTS; i++)
 		list_add(&cpts->pool_data[i].list, &cpts->pool);
 
-	cpts_clk_init(cpts);
+	cpts_clk_init(dev, cpts);
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
 

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 88ef270..4a000f6 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c

@@ -158,9 +158,9 @@
 	int bitmap_size;
 	struct cpdma_desc_pool *pool;
 
-	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL);
 	if (!pool)
-		return NULL;
+		goto fail;
 
 	spin_lock_init(&pool->lock);
 
@@ -170,7 +170,7 @@
 	pool->num_desc	= size / pool->desc_size;
 
 	bitmap_size  = (pool->num_desc / BITS_PER_LONG) * sizeof(long);
-	pool->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	pool->bitmap = devm_kzalloc(dev, bitmap_size, GFP_KERNEL);
 	if (!pool->bitmap)
 		goto fail;
 
@@ -187,10 +187,7 @@
 
 	if (pool->iomap)
 		return pool;
-
 fail:
-	kfree(pool->bitmap);
-	kfree(pool);
 	return NULL;
 }
 
@@ -203,7 +200,6 @@
 
 	spin_lock_irqsave(&pool->lock, flags);
 	WARN_ON(pool->used_desc);
-	kfree(pool->bitmap);
 	if (pool->cpumap) {
 		dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
 				  pool->phys);
@@ -211,7 +207,6 @@
 		iounmap(pool->iomap);
 	}
 	spin_unlock_irqrestore(&pool->lock, flags);
-	kfree(pool);
 }
 
 static inline dma_addr_t desc_phys(struct cpdma_desc_pool *pool,
@@ -276,7 +271,7 @@
 {
 	struct cpdma_ctlr *ctlr;
 
-	ctlr = kzalloc(sizeof(*ctlr), GFP_KERNEL);
+	ctlr = devm_kzalloc(params->dev, sizeof(*ctlr), GFP_KERNEL);
 	if (!ctlr)
 		return NULL;
 
@@ -290,10 +285,8 @@
 					    ctlr->params.desc_hw_addr,
 					    ctlr->params.desc_mem_size,
 					    ctlr->params.desc_align);
-	if (!ctlr->pool) {
-		kfree(ctlr);
+	if (!ctlr->pool)
 		return NULL;
-	}
 
 	if (WARN_ON(ctlr->num_chan > CPDMA_MAX_CHANNELS))
 		ctlr->num_chan = CPDMA_MAX_CHANNELS;
@@ -468,7 +461,6 @@
 
 	cpdma_desc_pool_destroy(ctlr->pool);
 	spin_unlock_irqrestore(&ctlr->lock, flags);
-	kfree(ctlr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
@@ -507,21 +499,22 @@
 				     cpdma_handler_fn handler)
 {
 	struct cpdma_chan *chan;
-	int ret, offset = (chan_num % CPDMA_MAX_CHANNELS) * 4;
+	int offset = (chan_num % CPDMA_MAX_CHANNELS) * 4;
 	unsigned long flags;
 
 	if (__chan_linear(chan_num) >= ctlr->num_chan)
 		return NULL;
 
-	ret = -ENOMEM;
-	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	chan = devm_kzalloc(ctlr->dev, sizeof(*chan), GFP_KERNEL);
 	if (!chan)
-		goto err_chan_alloc;
+		return ERR_PTR(-ENOMEM);
 
 	spin_lock_irqsave(&ctlr->lock, flags);
-	ret = -EBUSY;
-	if (ctlr->channels[chan_num])
-		goto err_chan_busy;
+	if (ctlr->channels[chan_num]) {
+		spin_unlock_irqrestore(&ctlr->lock, flags);
+		devm_kfree(ctlr->dev, chan);
+		return ERR_PTR(-EBUSY);
+	}
 
 	chan->ctlr	= ctlr;
 	chan->state	= CPDMA_STATE_IDLE;
@@ -551,12 +544,6 @@
 	ctlr->channels[chan_num] = chan;
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return chan;
-
-err_chan_busy:
-	spin_unlock_irqrestore(&ctlr->lock, flags);
-	kfree(chan);
-err_chan_alloc:
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_create);
 

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 8f0e69c..35a139e 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c

@@ -1567,7 +1567,6 @@
 	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ,
 					    res_num))) {
 		for (irq_num = res->start; irq_num <= res->end; irq_num++) {
-			dev_err(emac_dev, "Request IRQ %d\n", irq_num);
 			if (request_irq(irq_num, emac_irq, 0, ndev->name,
 					ndev)) {
 				dev_err(emac_dev,
@@ -1865,7 +1864,6 @@
 	struct emac_priv *priv;
 	unsigned long hw_ram_addr;
 	struct emac_platform_data *pdata;
-	struct device *emac_dev;
 	struct cpdma_params dma_params;
 	struct clk *emac_clk;
 	unsigned long emac_bus_frequency;
@@ -1911,7 +1909,6 @@
 	priv->coal_intvl = 0;
 	priv->bus_freq_mhz = (u32)(emac_bus_frequency / 1000000);
 
-	emac_dev = &ndev->dev;
 	/* Get EMAC platform data */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
@@ -1930,7 +1927,7 @@
 		hw_ram_addr = (u32 __force)res->start + pdata->ctrl_ram_offset;
 
 	memset(&dma_params, 0, sizeof(dma_params));
-	dma_params.dev			= emac_dev;
+	dma_params.dev			= &pdev->dev;
 	dma_params.dmaregs		= priv->emac_base;
 	dma_params.rxthresh		= priv->emac_base + 0x120;
 	dma_params.rxfree		= priv->emac_base + 0x140;
@@ -1980,7 +1977,7 @@
 	}
 
 	ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ethtool_ops);
+	ndev->ethtool_ops = &ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT);
 
 	/* register the network device */
@@ -1994,7 +1991,7 @@
 
 
 	if (netif_msg_probe(priv)) {
-		dev_notice(emac_dev, "DaVinci EMAC Probe found device "\
+		dev_notice(&pdev->dev, "DaVinci EMAC Probe found device "
 			   "(regs: %p, irq: %d)\n",
 			   (void *)priv->emac_base_phys, ndev->irq);
 	}

diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 0cca9de..735dc53 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c

@@ -303,7 +303,7 @@
 		return -EINVAL;
 
 	if (of_property_read_u32(node, "bus_freq", &prop)) {
-		pr_err("Missing bus_freq property in the DT.\n");
+		dev_err(&pdev->dev, "Missing bus_freq property in the DT.\n");
 		return -EINVAL;
 	}
 	data->bus_freq = prop;
@@ -321,15 +321,14 @@
 	struct phy_device *phy;
 	int ret, addr;
 
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->bus = mdiobus_alloc();
+	data->bus = devm_mdiobus_alloc(dev);
 	if (!data->bus) {
 		dev_err(dev, "failed to alloc mii bus\n");
-		ret = -ENOMEM;
-		goto bail_out;
+		return -ENOMEM;
 	}
 
 	if (dev->of_node) {
@@ -349,12 +348,9 @@
 	data->bus->parent	= dev;
 	data->bus->priv		= data;
 
-	/* Select default pin state */
-	pinctrl_pm_select_default_state(&pdev->dev);
-
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
-	data->clk = clk_get(&pdev->dev, "fck");
+	data->clk = devm_clk_get(dev, "fck");
 	if (IS_ERR(data->clk)) {
 		dev_err(dev, "failed to get device clock\n");
 		ret = PTR_ERR(data->clk);
@@ -367,24 +363,9 @@
 	spin_lock_init(&data->lock);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "could not find register map resource\n");
-		ret = -ENOENT;
-		goto bail_out;
-	}
-
-	res = devm_request_mem_region(dev, res->start, resource_size(res),
-					    dev_name(dev));
-	if (!res) {
-		dev_err(dev, "could not allocate register map resource\n");
-		ret = -ENXIO;
-		goto bail_out;
-	}
-
-	data->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
-	if (!data->regs) {
-		dev_err(dev, "could not map mdio registers\n");
-		ret = -ENOMEM;
+	data->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->regs)) {
+		ret = PTR_ERR(data->regs);
 		goto bail_out;
 	}
 
@@ -406,16 +387,9 @@
 	return 0;
 
 bail_out:
-	if (data->bus)
-		mdiobus_free(data->bus);
-
-	if (data->clk)
-		clk_put(data->clk);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	kfree(data);
-
 	return ret;
 }
 
@@ -423,18 +397,12 @@
 {
 	struct davinci_mdio_data *data = platform_get_drvdata(pdev);
 
-	if (data->bus) {
+	if (data->bus)
 		mdiobus_unregister(data->bus);
-		mdiobus_free(data->bus);
-	}
 
-	if (data->clk)
-		clk_put(data->clk);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	kfree(data);
-
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 449011b..14389f8 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c

@@ -2192,7 +2192,6 @@
 {
 	int ret;
 	int i;
-	int nz_addr = 0;
 	struct net_device *dev;
 	struct tile_net_priv *priv;
 
@@ -2212,7 +2211,6 @@
 
 	/* Initialize "priv". */
 	priv = netdev_priv(dev);
-	memset(priv, 0, sizeof(*priv));
 	priv->dev = dev;
 	priv->channel = -1;
 	priv->loopify_channel = -1;
@@ -2223,15 +2221,10 @@
 	 * be done before the device is opened.  If the MAC is all zeroes,
 	 * we use a random address, since we're probably on the simulator.
 	 */
-	for (i = 0; i < 6; i++)
-		nz_addr |= mac[i];
-
-	if (nz_addr) {
-		memcpy(dev->dev_addr, mac, ETH_ALEN);
-		dev->addr_len = 6;
-	} else {
+	if (!is_zero_ether_addr(mac))
+		ether_addr_copy(dev->dev_addr, mac);
+	else
 		eth_hw_addr_random(dev);
-	}
 
 	/* Register the network device. */
 	ret = register_netdev(dev);

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index d899d00..bb79928 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

@@ -1561,7 +1561,7 @@
 	 * alloc netdev
 	 */
 	*netdev = alloc_etherdev(sizeof(struct gelic_port));
-	if (!netdev) {
+	if (!*netdev) {
 		kfree(card->unalign);
 		return NULL;
 	}

diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index 8a049a2..f66ddae 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig

@@ -19,7 +19,7 @@
 
 config VIA_RHINE
 	tristate "VIA Rhine support"
-	depends on PCI
+	depends on (PCI || USE_OF)
 	select CRC32
 	select MII
 	---help---

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index f61dc2b..2d72f96 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c

@@ -94,6 +94,10 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -116,13 +120,6 @@
 static const char version[] =
 	"v1.10-LK" DRV_VERSION " " DRV_RELDATE " Written by Donald Becker";
 
-/* This driver was written to use PCI memory space. Some early versions
-   of the Rhine may only work correctly with I/O space accesses. */
-#ifdef CONFIG_VIA_RHINE_MMIO
-#define USE_MMIO
-#else
-#endif
-
 MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
 MODULE_LICENSE("GPL");
@@ -260,6 +257,12 @@
 	rq6patterns	= 0x0040,	/* 6 instead of 4 patterns for WOL */
 	rqStatusWBRace	= 0x0080,	/* Tx Status Writeback Error possible */
 	rqRhineI	= 0x0100,	/* See comment below */
+	rqIntPHY	= 0x0200,	/* Integrated PHY */
+	rqMgmt		= 0x0400,	/* Management adapter */
+	rqNeedEnMMIO	= 0x0800,	/* Whether the core needs to be
+					 * switched from PIO mode to MMIO
+					 * (only applies to PCI)
+					 */
 };
 /*
  * rqRhineI: VT86C100A (aka Rhine-I) uses different bits to enable
@@ -279,6 +282,15 @@
 };
 MODULE_DEVICE_TABLE(pci, rhine_pci_tbl);
 
+/* OpenFirmware identifiers for platform-bus devices
+ * The .data field is currently only used to store quirks
+ */
+static u32 vt8500_quirks = rqWOL | rqForceReset | rq6patterns;
+static struct of_device_id rhine_of_tbl[] = {
+	{ .compatible = "via,vt8500-rhine", .data = &vt8500_quirks },
+	{ }	/* terminate list */
+};
+MODULE_DEVICE_TABLE(of, rhine_of_tbl);
 
 /* Offsets to the device registers. */
 enum register_offsets {
@@ -338,13 +350,11 @@
 	BCR1_MED1=0x80,		/* for VT6102 */
 };
 
-#ifdef USE_MMIO
 /* Registers we check that mmio and reg are the same. */
 static const int mmio_verify_registers[] = {
 	RxConfig, TxConfig, IntrEnable, ConfigA, ConfigB, ConfigC, ConfigD,
 	0
 };
-#endif
 
 /* Bits in the interrupt status/mask registers. */
 enum intr_status_bits {
@@ -446,7 +456,7 @@
 	unsigned char *tx_bufs;
 	dma_addr_t tx_bufs_dma;
 
-	struct pci_dev *pdev;
+	int irq;
 	long pioaddr;
 	struct net_device *dev;
 	struct napi_struct napi;
@@ -649,20 +659,46 @@
 		   "failed" : "succeeded");
 }
 
-#ifdef USE_MMIO
 static void enable_mmio(long pioaddr, u32 quirks)
 {
 	int n;
-	if (quirks & rqRhineI) {
-		/* More recent docs say that this bit is reserved ... */
-		n = inb(pioaddr + ConfigA) | 0x20;
-		outb(n, pioaddr + ConfigA);
-	} else {
-		n = inb(pioaddr + ConfigD) | 0x80;
-		outb(n, pioaddr + ConfigD);
+
+	if (quirks & rqNeedEnMMIO) {
+		if (quirks & rqRhineI) {
+			/* More recent docs say that this bit is reserved */
+			n = inb(pioaddr + ConfigA) | 0x20;
+			outb(n, pioaddr + ConfigA);
+		} else {
+			n = inb(pioaddr + ConfigD) | 0x80;
+			outb(n, pioaddr + ConfigD);
+		}
 	}
 }
-#endif
+
+static inline int verify_mmio(struct device *hwdev,
+			      long pioaddr,
+			      void __iomem *ioaddr,
+			      u32 quirks)
+{
+	if (quirks & rqNeedEnMMIO) {
+		int i = 0;
+
+		/* Check that selected MMIO registers match the PIO ones */
+		while (mmio_verify_registers[i]) {
+			int reg = mmio_verify_registers[i++];
+			unsigned char a = inb(pioaddr+reg);
+			unsigned char b = readb(ioaddr+reg);
+
+			if (a != b) {
+				dev_err(hwdev,
+					"MMIO do not match PIO [%02x] (%02x != %02x)\n",
+					reg, a, b);
+				return -EIO;
+			}
+		}
+	}
+	return 0;
+}
 
 /*
  * Loads bytes 0x00-0x05, 0x6E-0x6F, 0x78-0x7B from EEPROM
@@ -682,14 +718,12 @@
 	if (i > 512)
 		pr_info("%4d cycles used @ %s:%d\n", i, __func__, __LINE__);
 
-#ifdef USE_MMIO
 	/*
 	 * Reloading from EEPROM overwrites ConfigA-D, so we must re-enable
 	 * MMIO. If reloading EEPROM was done first this could be avoided, but
 	 * it is not known if that still works with the "win98-reboot" problem.
 	 */
 	enable_mmio(pioaddr, rp->quirks);
-#endif
 
 	/* Turn off EEPROM-controlled wake-up (magic packet) */
 	if (rp->quirks & rqWOL)
@@ -701,7 +735,7 @@
 static void rhine_poll(struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
-	const int irq = rp->pdev->irq;
+	const int irq = rp->irq;
 
 	disable_irq(irq);
 	rhine_interrupt(irq, dev);
@@ -846,7 +880,8 @@
 		msleep(5);
 
 	/* Reload EEPROM controlled bytes cleared by soft reset */
-	rhine_reload_eeprom(pioaddr, dev);
+	if (dev_is_pci(dev->dev.parent))
+		rhine_reload_eeprom(pioaddr, dev);
 }
 
 static const struct net_device_ops rhine_netdev_ops = {
@@ -867,125 +902,37 @@
 #endif
 };
 
-static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int rhine_init_one_common(struct device *hwdev, u32 quirks,
+				 long pioaddr, void __iomem *ioaddr, int irq)
 {
 	struct net_device *dev;
 	struct rhine_private *rp;
-	int i, rc;
-	u32 quirks;
-	long pioaddr;
-	long memaddr;
-	void __iomem *ioaddr;
-	int io_size, phy_id;
+	int i, rc, phy_id;
 	const char *name;
-#ifdef USE_MMIO
-	int bar = 1;
-#else
-	int bar = 0;
-#endif
-
-/* when built into the kernel, we only print version if device is found */
-#ifndef MODULE
-	pr_info_once("%s\n", version);
-#endif
-
-	io_size = 256;
-	phy_id = 0;
-	quirks = 0;
-	name = "Rhine";
-	if (pdev->revision < VTunknown0) {
-		quirks = rqRhineI;
-		io_size = 128;
-	}
-	else if (pdev->revision >= VT6102) {
-		quirks = rqWOL | rqForceReset;
-		if (pdev->revision < VT6105) {
-			name = "Rhine II";
-			quirks |= rqStatusWBRace;	/* Rhine-II exclusive */
-		}
-		else {
-			phy_id = 1;	/* Integrated PHY, phy_id fixed to 1 */
-			if (pdev->revision >= VT6105_B0)
-				quirks |= rq6patterns;
-			if (pdev->revision < VT6105M)
-				name = "Rhine III";
-			else
-				name = "Rhine III (Management Adapter)";
-		}
-	}
-
-	rc = pci_enable_device(pdev);
-	if (rc)
-		goto err_out;
 
 	/* this should always be supported */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(hwdev, DMA_BIT_MASK(32));
 	if (rc) {
-		dev_err(&pdev->dev,
-			"32-bit PCI DMA addresses not supported by the card!?\n");
-		goto err_out_pci_disable;
+		dev_err(hwdev, "32-bit DMA addresses not supported by the card!?\n");
+		goto err_out;
 	}
 
-	/* sanity check */
-	if ((pci_resource_len(pdev, 0) < io_size) ||
-	    (pci_resource_len(pdev, 1) < io_size)) {
-		rc = -EIO;
-		dev_err(&pdev->dev, "Insufficient PCI resources, aborting\n");
-		goto err_out_pci_disable;
-	}
-
-	pioaddr = pci_resource_start(pdev, 0);
-	memaddr = pci_resource_start(pdev, 1);
-
-	pci_set_master(pdev);
-
 	dev = alloc_etherdev(sizeof(struct rhine_private));
 	if (!dev) {
 		rc = -ENOMEM;
-		goto err_out_pci_disable;
+		goto err_out;
 	}
-	SET_NETDEV_DEV(dev, &pdev->dev);
+	SET_NETDEV_DEV(dev, hwdev);
 
 	rp = netdev_priv(dev);
 	rp->dev = dev;
 	rp->quirks = quirks;
 	rp->pioaddr = pioaddr;
-	rp->pdev = pdev;
+	rp->base = ioaddr;
+	rp->irq = irq;
 	rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT);
 
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_out_free_netdev;
-
-	ioaddr = pci_iomap(pdev, bar, io_size);
-	if (!ioaddr) {
-		rc = -EIO;
-		dev_err(&pdev->dev,
-			"ioremap failed for device %s, region 0x%X @ 0x%lX\n",
-			pci_name(pdev), io_size, memaddr);
-		goto err_out_free_res;
-	}
-
-#ifdef USE_MMIO
-	enable_mmio(pioaddr, quirks);
-
-	/* Check that selected MMIO registers match the PIO ones */
-	i = 0;
-	while (mmio_verify_registers[i]) {
-		int reg = mmio_verify_registers[i++];
-		unsigned char a = inb(pioaddr+reg);
-		unsigned char b = readb(ioaddr+reg);
-		if (a != b) {
-			rc = -EIO;
-			dev_err(&pdev->dev,
-				"MMIO do not match PIO [%02x] (%02x != %02x)\n",
-				reg, a, b);
-			goto err_out_unmap;
-		}
-	}
-#endif /* USE_MMIO */
-
-	rp->base = ioaddr;
+	phy_id = rp->quirks & rqIntPHY ? 1 : 0;
 
 	u64_stats_init(&rp->tx_stats.syncp);
 	u64_stats_init(&rp->rx_stats.syncp);
@@ -1030,7 +977,7 @@
 	if (rp->quirks & rqRhineI)
 		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
 
-	if (pdev->revision >= VT6105M)
+	if (rp->quirks & rqMgmt)
 		dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
 				 NETIF_F_HW_VLAN_CTAG_RX |
 				 NETIF_F_HW_VLAN_CTAG_FILTER;
@@ -1038,18 +985,21 @@
 	/* dev->name not defined before register_netdev()! */
 	rc = register_netdev(dev);
 	if (rc)
-		goto err_out_unmap;
+		goto err_out_free_netdev;
+
+	if (rp->quirks & rqRhineI)
+		name = "Rhine";
+	else if (rp->quirks & rqStatusWBRace)
+		name = "Rhine II";
+	else if (rp->quirks & rqMgmt)
+		name = "Rhine III (Management Adapter)";
+	else
+		name = "Rhine III";
 
 	netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n",
-		    name,
-#ifdef USE_MMIO
-		    memaddr,
-#else
-		    (long)ioaddr,
-#endif
-		    dev->dev_addr, pdev->irq);
+		    name, (long)ioaddr, dev->dev_addr, rp->irq);
 
-	pci_set_drvdata(pdev, dev);
+	dev_set_drvdata(hwdev, dev);
 
 	{
 		u16 mii_cmd;
@@ -1078,41 +1028,158 @@
 
 	return 0;
 
+err_out_free_netdev:
+	free_netdev(dev);
+err_out:
+	return rc;
+}
+
+static int rhine_init_one_pci(struct pci_dev *pdev,
+			      const struct pci_device_id *ent)
+{
+	struct device *hwdev = &pdev->dev;
+	int rc;
+	long pioaddr, memaddr;
+	void __iomem *ioaddr;
+	int io_size = pdev->revision < VTunknown0 ? 128 : 256;
+
+/* This driver was written to use PCI memory space. Some early versions
+ * of the Rhine may only work correctly with I/O space accesses.
+ * TODO: determine for which revisions this is true and assign the flag
+ *	 in code as opposed to this Kconfig option (???)
+ */
+#ifdef CONFIG_VIA_RHINE_MMIO
+	u32 quirks = rqNeedEnMMIO;
+#else
+	u32 quirks = 0;
+#endif
+
+/* when built into the kernel, we only print version if device is found */
+#ifndef MODULE
+	pr_info_once("%s\n", version);
+#endif
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_out;
+
+	if (pdev->revision < VTunknown0) {
+		quirks |= rqRhineI;
+	} else if (pdev->revision >= VT6102) {
+		quirks |= rqWOL | rqForceReset;
+		if (pdev->revision < VT6105) {
+			quirks |= rqStatusWBRace;
+		} else {
+			quirks |= rqIntPHY;
+			if (pdev->revision >= VT6105_B0)
+				quirks |= rq6patterns;
+			if (pdev->revision >= VT6105M)
+				quirks |= rqMgmt;
+		}
+	}
+
+	/* sanity check */
+	if ((pci_resource_len(pdev, 0) < io_size) ||
+	    (pci_resource_len(pdev, 1) < io_size)) {
+		rc = -EIO;
+		dev_err(hwdev, "Insufficient PCI resources, aborting\n");
+		goto err_out_pci_disable;
+	}
+
+	pioaddr = pci_resource_start(pdev, 0);
+	memaddr = pci_resource_start(pdev, 1);
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out_pci_disable;
+
+	ioaddr = pci_iomap(pdev, (quirks & rqNeedEnMMIO ? 1 : 0), io_size);
+	if (!ioaddr) {
+		rc = -EIO;
+		dev_err(hwdev,
+			"ioremap failed for device %s, region 0x%X @ 0x%lX\n",
+			dev_name(hwdev), io_size, memaddr);
+		goto err_out_free_res;
+	}
+
+	enable_mmio(pioaddr, quirks);
+
+	rc = verify_mmio(hwdev, pioaddr, ioaddr, quirks);
+	if (rc)
+		goto err_out_unmap;
+
+	rc = rhine_init_one_common(&pdev->dev, quirks,
+				   pioaddr, ioaddr, pdev->irq);
+	if (!rc)
+		return 0;
+
 err_out_unmap:
 	pci_iounmap(pdev, ioaddr);
 err_out_free_res:
 	pci_release_regions(pdev);
-err_out_free_netdev:
-	free_netdev(dev);
 err_out_pci_disable:
 	pci_disable_device(pdev);
 err_out:
 	return rc;
 }
 
+static int rhine_init_one_platform(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	const u32 *quirks;
+	int irq;
+	struct resource *res;
+	void __iomem *ioaddr;
+
+	match = of_match_device(rhine_of_tbl, &pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ioaddr))
+		return PTR_ERR(ioaddr);
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (!irq)
+		return -EINVAL;
+
+	quirks = match->data;
+	if (!quirks)
+		return -EINVAL;
+
+	return rhine_init_one_common(&pdev->dev, *quirks,
+				     (long)ioaddr, ioaddr, irq);
+}
+
 static int alloc_ring(struct net_device* dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	void *ring;
 	dma_addr_t ring_dma;
 
-	ring = pci_alloc_consistent(rp->pdev,
-				    RX_RING_SIZE * sizeof(struct rx_desc) +
-				    TX_RING_SIZE * sizeof(struct tx_desc),
-				    &ring_dma);
+	ring = dma_alloc_coherent(hwdev,
+				  RX_RING_SIZE * sizeof(struct rx_desc) +
+				  TX_RING_SIZE * sizeof(struct tx_desc),
+				  &ring_dma,
+				  GFP_ATOMIC);
 	if (!ring) {
 		netdev_err(dev, "Could not allocate DMA memory\n");
 		return -ENOMEM;
 	}
 	if (rp->quirks & rqRhineI) {
-		rp->tx_bufs = pci_alloc_consistent(rp->pdev,
-						   PKT_BUF_SZ * TX_RING_SIZE,
-						   &rp->tx_bufs_dma);
+		rp->tx_bufs = dma_alloc_coherent(hwdev,
+						 PKT_BUF_SZ * TX_RING_SIZE,
+						 &rp->tx_bufs_dma,
+						 GFP_ATOMIC);
 		if (rp->tx_bufs == NULL) {
-			pci_free_consistent(rp->pdev,
-				    RX_RING_SIZE * sizeof(struct rx_desc) +
-				    TX_RING_SIZE * sizeof(struct tx_desc),
-				    ring, ring_dma);
+			dma_free_coherent(hwdev,
+					  RX_RING_SIZE * sizeof(struct rx_desc) +
+					  TX_RING_SIZE * sizeof(struct tx_desc),
+					  ring, ring_dma);
 			return -ENOMEM;
 		}
 	}
@@ -1128,16 +1195,17 @@
 static void free_ring(struct net_device* dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 
-	pci_free_consistent(rp->pdev,
-			    RX_RING_SIZE * sizeof(struct rx_desc) +
-			    TX_RING_SIZE * sizeof(struct tx_desc),
-			    rp->rx_ring, rp->rx_ring_dma);
+	dma_free_coherent(hwdev,
+			  RX_RING_SIZE * sizeof(struct rx_desc) +
+			  TX_RING_SIZE * sizeof(struct tx_desc),
+			  rp->rx_ring, rp->rx_ring_dma);
 	rp->tx_ring = NULL;
 
 	if (rp->tx_bufs)
-		pci_free_consistent(rp->pdev, PKT_BUF_SZ * TX_RING_SIZE,
-				    rp->tx_bufs, rp->tx_bufs_dma);
+		dma_free_coherent(hwdev, PKT_BUF_SZ * TX_RING_SIZE,
+				  rp->tx_bufs, rp->tx_bufs_dma);
 
 	rp->tx_bufs = NULL;
 
@@ -1146,6 +1214,7 @@
 static void alloc_rbufs(struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	dma_addr_t next;
 	int i;
 
@@ -1174,9 +1243,9 @@
 			break;
 
 		rp->rx_skbuff_dma[i] =
-			pci_map_single(rp->pdev, skb->data, rp->rx_buf_sz,
-				       PCI_DMA_FROMDEVICE);
-		if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[i])) {
+			dma_map_single(hwdev, skb->data, rp->rx_buf_sz,
+				       DMA_FROM_DEVICE);
+		if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) {
 			rp->rx_skbuff_dma[i] = 0;
 			dev_kfree_skb(skb);
 			break;
@@ -1190,6 +1259,7 @@
 static void free_rbufs(struct net_device* dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	int i;
 
 	/* Free all the skbuffs in the Rx queue. */
@@ -1197,9 +1267,9 @@
 		rp->rx_ring[i].rx_status = 0;
 		rp->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
 		if (rp->rx_skbuff[i]) {
-			pci_unmap_single(rp->pdev,
+			dma_unmap_single(hwdev,
 					 rp->rx_skbuff_dma[i],
-					 rp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+					 rp->rx_buf_sz, DMA_FROM_DEVICE);
 			dev_kfree_skb(rp->rx_skbuff[i]);
 		}
 		rp->rx_skbuff[i] = NULL;
@@ -1230,6 +1300,7 @@
 static void free_tbufs(struct net_device* dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	int i;
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
@@ -1238,10 +1309,10 @@
 		rp->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
 		if (rp->tx_skbuff[i]) {
 			if (rp->tx_skbuff_dma[i]) {
-				pci_unmap_single(rp->pdev,
+				dma_unmap_single(hwdev,
 						 rp->tx_skbuff_dma[i],
 						 rp->tx_skbuff[i]->len,
-						 PCI_DMA_TODEVICE);
+						 DMA_TO_DEVICE);
 			}
 			dev_kfree_skb(rp->tx_skbuff[i]);
 		}
@@ -1278,8 +1349,9 @@
 		/* autoneg is off: Link is always assumed to be up */
 		if (!netif_carrier_ok(dev))
 			netif_carrier_on(dev);
-	} else	/* Let MMI library update carrier status */
-		rhine_check_media(dev, 0);
+	}
+
+	rhine_check_media(dev, 0);
 
 	netif_info(rp, link, dev, "force_media %d, carrier %d\n",
 		   mii->force_media, netif_carrier_ok(dev));
@@ -1469,7 +1541,7 @@
 
 	rhine_set_rx_mode(dev);
 
-	if (rp->pdev->revision >= VT6105M)
+	if (rp->quirks & rqMgmt)
 		rhine_init_cam_filter(dev);
 
 	napi_enable(&rp->napi);
@@ -1581,16 +1653,15 @@
 	void __iomem *ioaddr = rp->base;
 	int rc;
 
-	rc = request_irq(rp->pdev->irq, rhine_interrupt, IRQF_SHARED, dev->name,
-			dev);
+	rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev);
 	if (rc)
 		return rc;
 
-	netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->pdev->irq);
+	netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq);
 
 	rc = alloc_ring(dev);
 	if (rc) {
-		free_irq(rp->pdev->irq, dev);
+		free_irq(rp->irq, dev);
 		return rc;
 	}
 	alloc_rbufs(dev);
@@ -1659,6 +1730,7 @@
 				  struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	void __iomem *ioaddr = rp->base;
 	unsigned entry;
 
@@ -1695,9 +1767,9 @@
 						       rp->tx_bufs));
 	} else {
 		rp->tx_skbuff_dma[entry] =
-			pci_map_single(rp->pdev, skb->data, skb->len,
-				       PCI_DMA_TODEVICE);
-		if (dma_mapping_error(&rp->pdev->dev, rp->tx_skbuff_dma[entry])) {
+			dma_map_single(hwdev, skb->data, skb->len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(hwdev, rp->tx_skbuff_dma[entry])) {
 			dev_kfree_skb_any(skb);
 			rp->tx_skbuff_dma[entry] = 0;
 			dev->stats.tx_dropped++;
@@ -1788,6 +1860,7 @@
 static void rhine_tx(struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
 
 	/* find and cleanup dirty tx descriptors */
@@ -1831,10 +1904,10 @@
 		}
 		/* Free the original skb. */
 		if (rp->tx_skbuff_dma[entry]) {
-			pci_unmap_single(rp->pdev,
+			dma_unmap_single(hwdev,
 					 rp->tx_skbuff_dma[entry],
 					 rp->tx_skbuff[entry]->len,
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 		}
 		dev_consume_skb_any(rp->tx_skbuff[entry]);
 		rp->tx_skbuff[entry] = NULL;
@@ -1863,6 +1936,7 @@
 static int rhine_rx(struct net_device *dev, int limit)
 {
 	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 	int count;
 	int entry = rp->cur_rx % RX_RING_SIZE;
 
@@ -1924,19 +1998,19 @@
 			if (pkt_len < rx_copybreak)
 				skb = netdev_alloc_skb_ip_align(dev, pkt_len);
 			if (skb) {
-				pci_dma_sync_single_for_cpu(rp->pdev,
-							    rp->rx_skbuff_dma[entry],
-							    rp->rx_buf_sz,
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(hwdev,
+							rp->rx_skbuff_dma[entry],
+							rp->rx_buf_sz,
+							DMA_FROM_DEVICE);
 
 				skb_copy_to_linear_data(skb,
 						 rp->rx_skbuff[entry]->data,
 						 pkt_len);
 				skb_put(skb, pkt_len);
-				pci_dma_sync_single_for_device(rp->pdev,
-							       rp->rx_skbuff_dma[entry],
-							       rp->rx_buf_sz,
-							       PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(hwdev,
+							   rp->rx_skbuff_dma[entry],
+							   rp->rx_buf_sz,
+							   DMA_FROM_DEVICE);
 			} else {
 				skb = rp->rx_skbuff[entry];
 				if (skb == NULL) {
@@ -1945,10 +2019,10 @@
 				}
 				rp->rx_skbuff[entry] = NULL;
 				skb_put(skb, pkt_len);
-				pci_unmap_single(rp->pdev,
+				dma_unmap_single(hwdev,
 						 rp->rx_skbuff_dma[entry],
 						 rp->rx_buf_sz,
-						 PCI_DMA_FROMDEVICE);
+						 DMA_FROM_DEVICE);
 			}
 
 			if (unlikely(desc_length & DescTag))
@@ -1979,10 +2053,11 @@
 			if (skb == NULL)
 				break;	/* Better luck next round. */
 			rp->rx_skbuff_dma[entry] =
-				pci_map_single(rp->pdev, skb->data,
+				dma_map_single(hwdev, skb->data,
 					       rp->rx_buf_sz,
-					       PCI_DMA_FROMDEVICE);
-			if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[entry])) {
+					       DMA_FROM_DEVICE);
+			if (dma_mapping_error(hwdev,
+					      rp->rx_skbuff_dma[entry])) {
 				dev_kfree_skb(skb);
 				rp->rx_skbuff_dma[entry] = 0;
 				break;
@@ -2103,7 +2178,7 @@
 		/* Too many to match, or accept all multicasts. */
 		iowrite32(0xffffffff, ioaddr + MulticastFilter0);
 		iowrite32(0xffffffff, ioaddr + MulticastFilter1);
-	} else if (rp->pdev->revision >= VT6105M) {
+	} else if (rp->quirks & rqMgmt) {
 		int i = 0;
 		u32 mCAMmask = 0;	/* 32 mCAMs (6105M and better) */
 		netdev_for_each_mc_addr(ha, dev) {
@@ -2125,7 +2200,7 @@
 		iowrite32(mc_filter[1], ioaddr + MulticastFilter1);
 	}
 	/* enable/disable VLAN receive filtering */
-	if (rp->pdev->revision >= VT6105M) {
+	if (rp->quirks & rqMgmt) {
 		if (dev->flags & IFF_PROMISC)
 			BYTE_REG_BITS_OFF(BCR1_VIDFR, ioaddr + PCIBusConfig1);
 		else
@@ -2136,11 +2211,11 @@
 
 static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	struct rhine_private *rp = netdev_priv(dev);
+	struct device *hwdev = dev->dev.parent;
 
 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(rp->pdev), sizeof(info->bus_info));
+	strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info));
 }
 
 static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -2277,7 +2352,7 @@
 	/* Stop the chip's Tx and Rx processes. */
 	iowrite16(CmdStop, ioaddr + ChipCmd);
 
-	free_irq(rp->pdev->irq, dev);
+	free_irq(rp->irq, dev);
 	free_rbufs(dev);
 	free_tbufs(dev);
 	free_ring(dev);
@@ -2286,7 +2361,7 @@
 }
 
 
-static void rhine_remove_one(struct pci_dev *pdev)
+static void rhine_remove_one_pci(struct pci_dev *pdev)
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rhine_private *rp = netdev_priv(dev);
@@ -2300,7 +2375,21 @@
 	pci_disable_device(pdev);
 }
 
-static void rhine_shutdown (struct pci_dev *pdev)
+static int rhine_remove_one_platform(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	struct rhine_private *rp = netdev_priv(dev);
+
+	unregister_netdev(dev);
+
+	iounmap(rp->base);
+
+	free_netdev(dev);
+
+	return 0;
+}
+
+static void rhine_shutdown_pci(struct pci_dev *pdev)
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rhine_private *rp = netdev_priv(dev);
@@ -2354,8 +2443,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int rhine_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rhine_private *rp = netdev_priv(dev);
 
 	if (!netif_running(dev))
@@ -2367,23 +2455,21 @@
 
 	netif_device_detach(dev);
 
-	rhine_shutdown(pdev);
+	if (dev_is_pci(device))
+		rhine_shutdown_pci(to_pci_dev(device));
 
 	return 0;
 }
 
 static int rhine_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rhine_private *rp = netdev_priv(dev);
 
 	if (!netif_running(dev))
 		return 0;
 
-#ifdef USE_MMIO
 	enable_mmio(rp->pioaddr, rp->quirks);
-#endif
 	rhine_power_init(dev);
 	free_tbufs(dev);
 	free_rbufs(dev);
@@ -2408,15 +2494,26 @@
 
 #endif /* !CONFIG_PM_SLEEP */
 
-static struct pci_driver rhine_driver = {
+static struct pci_driver rhine_driver_pci = {
 	.name		= DRV_NAME,
 	.id_table	= rhine_pci_tbl,
-	.probe		= rhine_init_one,
-	.remove		= rhine_remove_one,
-	.shutdown	= rhine_shutdown,
+	.probe		= rhine_init_one_pci,
+	.remove		= rhine_remove_one_pci,
+	.shutdown	= rhine_shutdown_pci,
 	.driver.pm	= RHINE_PM_OPS,
 };
 
+static struct platform_driver rhine_driver_platform = {
+	.probe		= rhine_init_one_platform,
+	.remove		= rhine_remove_one_platform,
+	.driver = {
+		.name	= DRV_NAME,
+		.owner	= THIS_MODULE,
+		.of_match_table	= rhine_of_tbl,
+		.pm		= RHINE_PM_OPS,
+	}
+};
+
 static struct dmi_system_id rhine_dmi_table[] __initdata = {
 	{
 		.ident = "EPIA-M",
@@ -2437,6 +2534,8 @@
 
 static int __init rhine_init(void)
 {
+	int ret_pci, ret_platform;
+
 /* when a module, this is printed whether or not devices are found in probe */
 #ifdef MODULE
 	pr_info("%s\n", version);
@@ -2449,13 +2548,19 @@
 	else if (avoid_D3)
 		pr_info("avoid_D3 set\n");
 
-	return pci_register_driver(&rhine_driver);
+	ret_pci = pci_register_driver(&rhine_driver_pci);
+	ret_platform = platform_driver_register(&rhine_driver_platform);
+	if ((ret_pci < 0) && (ret_platform < 0))
+		return ret_pci;
+
+	return 0;
 }
 
 
 static void __exit rhine_cleanup(void)
 {
-	pci_unregister_driver(&rhine_driver);
+	platform_driver_unregister(&rhine_driver_platform);
+	pci_unregister_driver(&rhine_driver_pci);
 }
 
 

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index fa193c4..4ef818a 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c

@@ -75,7 +75,7 @@
 	long end = jiffies + 2;
 
 	while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) {
-		if (end - jiffies <= 0) {
+		if (time_before_eq(end, jiffies)) {
 			WARN_ON(1);
 			return -ETIMEDOUT;
 		}

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
index 64b4639..d4abf47 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c

@@ -22,7 +22,7 @@
 	long end = jiffies + 2;
 	while (!(axienet_ior(lp, XAE_MDIO_MCR_OFFSET) &
 		 XAE_MDIO_MCR_READY_MASK)) {
-		if (end - jiffies <= 0) {
+		if (time_before_eq(end, jiffies)) {
 			WARN_ON(1);
 			return -ETIMEDOUT;
 		}

diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 0d87c67..8c4aed3 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c

@@ -702,7 +702,7 @@
 	*/
 	while (__raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) &
 			XEL_MDIOCTRL_MDIOSTS_MASK) {
-		if (end - jiffies <= 0) {
+		if (time_before_eq(end, jiffies)) {
 			WARN_ON(1);
 			return -ETIMEDOUT;
 		}

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d18f711d..6cc37c1 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h

@@ -28,50 +28,119 @@
 #include <linux/hyperv.h>
 #include <linux/rndis.h>
 
-/* Fwd declaration */
-struct hv_netvsc_packet;
-struct ndis_tcp_ip_checksum_info;
+/* RSS related */
+#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */
+#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */
 
-/* Represent the xfer page packet which contains 1 or more netvsc packet */
-struct xferpage_packet {
-	struct list_head list_ent;
-	u32 status;
+#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
+#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
 
-	/* # of netvsc packets this xfer packet contains */
-	u32 count;
+#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
+#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
+
+struct ndis_obj_header {
+	u8 type;
+	u8 rev;
+	u16 size;
+} __packed;
+
+/* ndis_recv_scale_cap/cap_flag */
+#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR       0x02000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC       0x04000000
+#define NDIS_RSS_CAPS_USING_MSI_X                 0x08000000
+#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS      0x10000000
+#define NDIS_RSS_CAPS_SUPPORTS_MSI_X              0x20000000
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4          0x00000100
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6          0x00000200
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX       0x00000400
+
+struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
+	struct ndis_obj_header hdr;
+	u32 cap_flag;
+	u32 num_int_msg;
+	u32 num_recv_que;
+	u16 num_indirect_tabent;
+} __packed;
+
+
+/* ndis_recv_scale_param flags */
+#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED     0x0001
+#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED    0x0002
+#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED       0x0004
+#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED     0x0008
+#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS            0x0010
+
+/* Hash info bits */
+#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
+#define NDIS_HASH_IPV4          0x00000100
+#define NDIS_HASH_TCP_IPV4      0x00000200
+#define NDIS_HASH_IPV6          0x00000400
+#define NDIS_HASH_IPV6_EX       0x00000800
+#define NDIS_HASH_TCP_IPV6      0x00001000
+#define NDIS_HASH_TCP_IPV6_EX   0x00002000
+
+#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
+
+#define ITAB_NUM 128
+#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+extern u8 netvsc_hash_key[];
+
+struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
+	struct ndis_obj_header hdr;
+
+	/* Qualifies the rest of the information */
+	u16 flag;
+
+	/* The base CPU number to do receive processing. not used */
+	u16 base_cpu_number;
+
+	/* This describes the hash function and type being enabled */
+	u32 hashinfo;
+
+	/* The size of indirection table array */
+	u16 indirect_tabsize;
+
+	/* The offset of the indirection table from the beginning of this
+	 * structure
+	 */
+	u32 indirect_taboffset;
+
+	/* The size of the hash secret key */
+	u16 hashkey_size;
+
+	/* The offset of the secret key from the beginning of this structure */
+	u32 kashkey_offset;
+
+	u32 processor_masks_offset;
+	u32 num_processor_masks;
+	u32 processor_masks_entry_size;
 };
 
+/* Fwd declaration */
+struct ndis_tcp_ip_checksum_info;
+
 /*
  * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
  * within the RNDIS
  */
 struct hv_netvsc_packet {
 	/* Bookkeeping stuff */
-	struct list_head list_ent;
 	u32 status;
 
 	struct hv_device *device;
 	bool is_data_pkt;
 	u16 vlan_tci;
 
-	/*
-	 * Valid only for receives when we break a xfer page packet
-	 * into multiple netvsc packets
-	 */
-	struct xferpage_packet *xfer_page_pkt;
+	u16 q_idx;
+	struct vmbus_channel *channel;
 
-	union {
-		struct {
-			u64 recv_completion_tid;
-			void *recv_completion_ctx;
-			void (*recv_completion)(void *context);
-		} recv;
-		struct {
-			u64 send_completion_tid;
-			void *send_completion_ctx;
-			void (*send_completion)(void *context);
-		} send;
-	} completion;
+	u64 send_completion_tid;
+	void *send_completion_ctx;
+	void (*send_completion)(void *context);
+
+	u32 send_buf_index;
 
 	/* This points to the memory after page_buf */
 	struct rndis_message *rndis_msg;
@@ -120,6 +189,7 @@
 int netvsc_recv_callback(struct hv_device *device_obj,
 			struct hv_netvsc_packet *packet,
 			struct ndis_tcp_ip_checksum_info *csum_info);
+void netvsc_channel_cb(void *context);
 int rndis_filter_open(struct hv_device *dev);
 int rndis_filter_close(struct hv_device *dev);
 int rndis_filter_device_add(struct hv_device *dev,
@@ -514,14 +584,16 @@
 
 #define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16)	/* 16MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15)  /* 15MB */
+#define NETVSC_SEND_BUFFER_SIZE			(1024 * 1024)   /* 1MB */
+#define NETVSC_INVALID_INDEX			-1
+
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 
-/* Preallocated receive packets */
-#define NETVSC_RECEIVE_PACKETLIST_COUNT		256
-
 #define NETVSC_PACKET_SIZE                      2048
 
+#define VRSS_SEND_TAB_SIZE 16
+
 /* Per netvsc channel-specific */
 struct netvsc_device {
 	struct hv_device *dev;
@@ -532,12 +604,6 @@
 	wait_queue_head_t wait_drain;
 	bool start_remove;
 	bool destroy;
-	/*
-	 * List of free preallocated hv_netvsc_packet to represent receive
-	 * packet
-	 */
-	struct list_head recv_pkt_list;
-	spinlock_t recv_pkt_list_lock;
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
@@ -546,6 +612,15 @@
 	u32 recv_section_cnt;
 	struct nvsp_1_receive_buffer_section *recv_section;
 
+	/* Send buffer allocated by us */
+	void *send_buf;
+	u32 send_buf_size;
+	u32 send_buf_gpadl_handle;
+	u32 send_section_cnt;
+	u32 send_section_size;
+	unsigned long *send_section_map;
+	int map_words;
+
 	/* Used for NetVSP initialization protocol */
 	struct completion channel_init_wait;
 	struct nvsp_message channel_init_pkt;
@@ -555,10 +630,20 @@
 
 	struct net_device *ndev;
 
+	struct vmbus_channel *chn_table[NR_CPUS];
+	u32 send_table[VRSS_SEND_TAB_SIZE];
+	u32 num_chn;
+	atomic_t queue_sends[NR_CPUS];
+
 	/* Holds rndis device info */
 	void *extension;
-	/* The recive buffer for this device */
+
+	int ring_size;
+
+	/* The primary channel callback buffer */
 	unsigned char cb_buffer[NETVSC_PACKET_SIZE];
+	/* The sub channel callback buffer */
+	unsigned char *sub_cb_buf;
 };
 
 /* NdisInitialize message */
@@ -706,6 +791,7 @@
 	IEEE_8021Q_INFO,
 	ORIGINAL_PKTINFO,
 	PACKET_CANCEL_ID,
+	NBL_HASH_VALUE = PACKET_CANCEL_ID,
 	ORIGINAL_NET_BUFLIST,
 	CACHED_NET_BUFLIST,
 	SHORT_PKT_PADINFO,
@@ -852,6 +938,9 @@
 #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
 		sizeof(struct ndis_tcp_lso_info))
 
+#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+		sizeof(u32))
+
 /* Format of Information buffer passed in a SetRequest for the OID */
 /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
 struct rndis_config_parameter_info {

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index f7629ec..c041f63 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c

@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
+#include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
 
@@ -80,7 +81,7 @@
 }
 
 
-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+static int netvsc_destroy_buf(struct netvsc_device *net_device)
 {
 	struct nvsp_message *revoke_packet;
 	int ret = 0;
@@ -146,10 +147,62 @@
 		net_device->recv_section = NULL;
 	}
 
+	/* Deal with the send buffer we may have setup.
+	 * If we got a  send section size, it means we received a
+	 * SendsendBufferComplete msg (ie sent
+	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
+	 * to send a revoke msg here
+	 */
+	if (net_device->send_section_size) {
+		/* Send the revoke receive buffer */
+		revoke_packet = &net_device->revoke_packet;
+		memset(revoke_packet, 0, sizeof(struct nvsp_message));
+
+		revoke_packet->hdr.msg_type =
+			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
+		revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
+
+		ret = vmbus_sendpacket(net_device->dev->channel,
+				       revoke_packet,
+				       sizeof(struct nvsp_message),
+				       (unsigned long)revoke_packet,
+				       VM_PKT_DATA_INBAND, 0);
+		/* If we failed here, we might as well return and
+		 * have a leak rather than continue and a bugchk
+		 */
+		if (ret != 0) {
+			netdev_err(ndev, "unable to send "
+				   "revoke send buffer to netvsp\n");
+			return ret;
+		}
+	}
+	/* Teardown the gpadl on the vsp end */
+	if (net_device->send_buf_gpadl_handle) {
+		ret = vmbus_teardown_gpadl(net_device->dev->channel,
+					   net_device->send_buf_gpadl_handle);
+
+		/* If we failed here, we might as well return and have a leak
+		 * rather than continue and a bugchk
+		 */
+		if (ret != 0) {
+			netdev_err(ndev,
+				   "unable to teardown send buffer's gpadl\n");
+			return ret;
+		}
+		net_device->recv_buf_gpadl_handle = 0;
+	}
+	if (net_device->send_buf) {
+		/* Free up the receive buffer */
+		free_pages((unsigned long)net_device->send_buf,
+			   get_order(net_device->send_buf_size));
+		net_device->send_buf = NULL;
+	}
+	kfree(net_device->send_section_map);
+
 	return ret;
 }
 
-static int netvsc_init_recv_buf(struct hv_device *device)
+static int netvsc_init_buf(struct hv_device *device)
 {
 	int ret = 0;
 	int t;
@@ -248,10 +301,90 @@
 		goto cleanup;
 	}
 
+	/* Now setup the send buffer.
+	 */
+	net_device->send_buf =
+		(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
+					 get_order(net_device->send_buf_size));
+	if (!net_device->send_buf) {
+		netdev_err(ndev, "unable to allocate send "
+			   "buffer of size %d\n", net_device->send_buf_size);
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	/* Establish the gpadl handle for this buffer on this
+	 * channel.  Note: This call uses the vmbus connection rather
+	 * than the channel to establish the gpadl handle.
+	 */
+	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
+				    net_device->send_buf_size,
+				    &net_device->send_buf_gpadl_handle);
+	if (ret != 0) {
+		netdev_err(ndev,
+			   "unable to establish send buffer's gpadl\n");
+		goto cleanup;
+	}
+
+	/* Notify the NetVsp of the gpadl handle */
+	init_packet = &net_device->channel_init_pkt;
+	memset(init_packet, 0, sizeof(struct nvsp_message));
+	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
+	init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
+		net_device->send_buf_gpadl_handle;
+	init_packet->msg.v1_msg.send_recv_buf.id = 0;
+
+	/* Send the gpadl notification request */
+	ret = vmbus_sendpacket(device->channel, init_packet,
+			       sizeof(struct nvsp_message),
+			       (unsigned long)init_packet,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret != 0) {
+		netdev_err(ndev,
+			   "unable to send send buffer's gpadl to netvsp\n");
+		goto cleanup;
+	}
+
+	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+	BUG_ON(t == 0);
+
+	/* Check the response */
+	if (init_packet->msg.v1_msg.
+	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
+		netdev_err(ndev, "Unable to complete send buffer "
+			   "initialization with NetVsp - status %d\n",
+			   init_packet->msg.v1_msg.
+			   send_recv_buf_complete.status);
+		ret = -EINVAL;
+		goto cleanup;
+	}
+
+	/* Parse the response */
+	net_device->send_section_size = init_packet->msg.
+				v1_msg.send_send_buf_complete.section_size;
+
+	/* Section count is simply the size divided by the section size.
+	 */
+	net_device->send_section_cnt =
+		net_device->send_buf_size/net_device->send_section_size;
+
+	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
+		 net_device->send_section_size, net_device->send_section_cnt);
+
+	/* Setup state for managing the send buffer. */
+	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
+					     BITS_PER_LONG);
+
+	net_device->send_section_map =
+		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+	if (net_device->send_section_map == NULL)
+		goto cleanup;
+
 	goto exit;
 
 cleanup:
-	netvsc_destroy_recv_buf(net_device);
+	netvsc_destroy_buf(net_device);
 
 exit:
 	return ret;
@@ -369,8 +502,9 @@
 		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
 	else
 		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 
-	ret = netvsc_init_recv_buf(device);
+	ret = netvsc_init_buf(device);
 
 cleanup:
 	return ret;
@@ -378,7 +512,7 @@
 
 static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
 {
-	netvsc_destroy_recv_buf(net_device);
+	netvsc_destroy_buf(net_device);
 }
 
 /*
@@ -387,7 +521,6 @@
 int netvsc_device_remove(struct hv_device *device)
 {
 	struct netvsc_device *net_device;
-	struct hv_netvsc_packet *netvsc_packet, *pos;
 	unsigned long flags;
 
 	net_device = hv_get_drvdata(device);
@@ -416,11 +549,8 @@
 	vmbus_close(device->channel);
 
 	/* Release all resources */
-	list_for_each_entry_safe(netvsc_packet, pos,
-				 &net_device->recv_pkt_list, list_ent) {
-		list_del(&netvsc_packet->list_ent);
-		kfree(netvsc_packet);
-	}
+	if (net_device->sub_cb_buf)
+		vfree(net_device->sub_cb_buf);
 
 	kfree(net_device);
 	return 0;
@@ -444,6 +574,12 @@
 	return avail_write * 100 / ring_info->ring_datasize;
 }
 
+static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
+					 u32 index)
+{
+	sync_change_bit(index, net_device->send_section_map);
+}
+
 static void netvsc_send_completion(struct netvsc_device *net_device,
 				   struct hv_device *device,
 				   struct vmpacket_descriptor *packet)
@@ -451,6 +587,7 @@
 	struct nvsp_message *nvsp_packet;
 	struct hv_netvsc_packet *nvsc_packet;
 	struct net_device *ndev;
+	u32 send_index;
 
 	ndev = net_device->ndev;
 
@@ -461,7 +598,9 @@
 	    (nvsp_packet->hdr.msg_type ==
 	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
 	    (nvsp_packet->hdr.msg_type ==
-	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
+	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
+	    (nvsp_packet->hdr.msg_type ==
+	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
 		/* Copy the response back */
 		memcpy(&net_device->channel_init_pkt, nvsp_packet,
 		       sizeof(struct nvsp_message));
@@ -469,28 +608,39 @@
 	} else if (nvsp_packet->hdr.msg_type ==
 		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
 		int num_outstanding_sends;
+		u16 q_idx = 0;
+		struct vmbus_channel *channel = device->channel;
+		int queue_sends;
 
 		/* Get the send context */
 		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
 			packet->trans_id;
 
 		/* Notify the layer above us */
-		if (nvsc_packet)
-			nvsc_packet->completion.send.send_completion(
-				nvsc_packet->completion.send.
-				send_completion_ctx);
+		if (nvsc_packet) {
+			send_index = nvsc_packet->send_buf_index;
+			if (send_index != NETVSC_INVALID_INDEX)
+				netvsc_free_send_slot(net_device, send_index);
+			q_idx = nvsc_packet->q_idx;
+			channel = nvsc_packet->channel;
+			nvsc_packet->send_completion(nvsc_packet->
+						     send_completion_ctx);
+		}
 
 		num_outstanding_sends =
 			atomic_dec_return(&net_device->num_outstanding_sends);
+		queue_sends = atomic_dec_return(&net_device->
+						queue_sends[q_idx]);
 
 		if (net_device->destroy && num_outstanding_sends == 0)
 			wake_up(&net_device->wait_drain);
 
-		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
-			(hv_ringbuf_avail_percent(&device->channel->outbound)
-			> RING_AVAIL_PERCENT_HIWATER ||
-			num_outstanding_sends < 1))
-				netif_wake_queue(ndev);
+		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+		    !net_device->start_remove &&
+		    (hv_ringbuf_avail_percent(&channel->outbound) >
+		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
+				netif_tx_wake_queue(netdev_get_tx_queue(
+						    ndev, q_idx));
 	} else {
 		netdev_err(ndev, "Unknown send completion packet type- "
 			   "%d received!!\n", nvsp_packet->hdr.msg_type);
@@ -498,6 +648,52 @@
 
 }
 
+static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
+{
+	unsigned long index;
+	u32 max_words = net_device->map_words;
+	unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
+	u32 section_cnt = net_device->send_section_cnt;
+	int ret_val = NETVSC_INVALID_INDEX;
+	int i;
+	int prev_val;
+
+	for (i = 0; i < max_words; i++) {
+		if (!~(map_addr[i]))
+			continue;
+		index = ffz(map_addr[i]);
+		prev_val = sync_test_and_set_bit(index, &map_addr[i]);
+		if (prev_val)
+			continue;
+		if ((index + (i * BITS_PER_LONG)) >= section_cnt)
+			break;
+		ret_val = (index + (i * BITS_PER_LONG));
+		break;
+	}
+	return ret_val;
+}
+
+u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+			    unsigned int section_index,
+			    struct hv_netvsc_packet *packet)
+{
+	char *start = net_device->send_buf;
+	char *dest = (start + (section_index * net_device->send_section_size));
+	int i;
+	u32 msg_size = 0;
+
+	for (i = 0; i < packet->page_buf_cnt; i++) {
+		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
+		u32 offset = packet->page_buf[i].offset;
+		u32 len = packet->page_buf[i].len;
+
+		memcpy(dest, (src + offset), len);
+		msg_size += len;
+		dest += len;
+	}
+	return msg_size;
+}
+
 int netvsc_send(struct hv_device *device,
 			struct hv_netvsc_packet *packet)
 {
@@ -505,7 +701,12 @@
 	int ret = 0;
 	struct nvsp_message sendMessage;
 	struct net_device *ndev;
+	struct vmbus_channel *out_channel = NULL;
 	u64 req_id;
+	unsigned int section_index = NETVSC_INVALID_INDEX;
+	u32 msg_size = 0;
+	struct sk_buff *skb;
+
 
 	net_device = get_outbound_net_device(device);
 	if (!net_device)
@@ -521,25 +722,46 @@
 		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
 	}
 
-	/* Not using send buffer section */
-	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
-		0xFFFFFFFF;
-	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+	/* Attempt to send via sendbuf */
+	if (packet->total_data_buflen < net_device->send_section_size) {
+		section_index = netvsc_get_next_send_section(net_device);
+		if (section_index != NETVSC_INVALID_INDEX) {
+			msg_size = netvsc_copy_to_send_buf(net_device,
+							   section_index,
+							   packet);
+			skb = (struct sk_buff *)
+			      (unsigned long)packet->send_completion_tid;
+			if (skb)
+				dev_kfree_skb_any(skb);
+			packet->page_buf_cnt = 0;
+		}
+	}
+	packet->send_buf_index = section_index;
 
-	if (packet->completion.send.send_completion)
+
+	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
+		section_index;
+	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
+
+	if (packet->send_completion)
 		req_id = (ulong)packet;
 	else
 		req_id = 0;
 
+	out_channel = net_device->chn_table[packet->q_idx];
+	if (out_channel == NULL)
+		out_channel = device->channel;
+	packet->channel = out_channel;
+
 	if (packet->page_buf_cnt) {
-		ret = vmbus_sendpacket_pagebuffer(device->channel,
+		ret = vmbus_sendpacket_pagebuffer(out_channel,
 						  packet->page_buf,
 						  packet->page_buf_cnt,
 						  &sendMessage,
 						  sizeof(struct nvsp_message),
 						  req_id);
 	} else {
-		ret = vmbus_sendpacket(device->channel, &sendMessage,
+		ret = vmbus_sendpacket(out_channel, &sendMessage,
 				sizeof(struct nvsp_message),
 				req_id,
 				VM_PKT_DATA_INBAND,
@@ -548,17 +770,24 @@
 
 	if (ret == 0) {
 		atomic_inc(&net_device->num_outstanding_sends);
-		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
+		atomic_inc(&net_device->queue_sends[packet->q_idx]);
+
+		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
 			RING_AVAIL_PERCENT_LOWATER) {
-			netif_stop_queue(ndev);
+			netif_tx_stop_queue(netdev_get_tx_queue(
+					    ndev, packet->q_idx));
+
 			if (atomic_read(&net_device->
-				num_outstanding_sends) < 1)
-				netif_wake_queue(ndev);
+				queue_sends[packet->q_idx]) < 1)
+				netif_tx_wake_queue(netdev_get_tx_queue(
+						    ndev, packet->q_idx));
 		}
 	} else if (ret == -EAGAIN) {
-		netif_stop_queue(ndev);
-		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
-			netif_wake_queue(ndev);
+		netif_tx_stop_queue(netdev_get_tx_queue(
+				    ndev, packet->q_idx));
+		if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
+			netif_tx_wake_queue(netdev_get_tx_queue(
+					    ndev, packet->q_idx));
 			ret = -ENOSPC;
 		}
 	} else {
@@ -570,6 +799,7 @@
 }
 
 static void netvsc_send_recv_completion(struct hv_device *device,
+					struct vmbus_channel *channel,
 					struct netvsc_device *net_device,
 					u64 transaction_id, u32 status)
 {
@@ -587,7 +817,7 @@
 
 retry_send_cmplt:
 	/* Send the completion */
-	ret = vmbus_sendpacket(device->channel, &recvcompMessage,
+	ret = vmbus_sendpacket(channel, &recvcompMessage,
 			       sizeof(struct nvsp_message), transaction_id,
 			       VM_PKT_COMP, 0);
 	if (ret == 0) {
@@ -613,76 +843,20 @@
 	}
 }
 
-/* Send a receive completion packet to RNDIS device (ie NetVsp) */
-static void netvsc_receive_completion(void *context)
-{
-	struct hv_netvsc_packet *packet = context;
-	struct hv_device *device = packet->device;
-	struct netvsc_device *net_device;
-	u64 transaction_id = 0;
-	bool fsend_receive_comp = false;
-	unsigned long flags;
-	struct net_device *ndev;
-	u32 status = NVSP_STAT_NONE;
-
-	/*
-	 * Even though it seems logical to do a GetOutboundNetDevice() here to
-	 * send out receive completion, we are using GetInboundNetDevice()
-	 * since we may have disable outbound traffic already.
-	 */
-	net_device = get_inbound_net_device(device);
-	if (!net_device)
-		return;
-	ndev = net_device->ndev;
-
-	/* Overloading use of the lock. */
-	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
-
-	if (packet->status != NVSP_STAT_SUCCESS)
-		packet->xfer_page_pkt->status = NVSP_STAT_FAIL;
-
-	packet->xfer_page_pkt->count--;
-
-	/*
-	 * Last one in the line that represent 1 xfer page packet.
-	 * Return the xfer page packet itself to the freelist
-	 */
-	if (packet->xfer_page_pkt->count == 0) {
-		fsend_receive_comp = true;
-		transaction_id = packet->completion.recv.recv_completion_tid;
-		status = packet->xfer_page_pkt->status;
-		list_add_tail(&packet->xfer_page_pkt->list_ent,
-			      &net_device->recv_pkt_list);
-
-	}
-
-	/* Put the packet back */
-	list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
-	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
-
-	/* Send a receive completion for the xfer page packet */
-	if (fsend_receive_comp)
-		netvsc_send_recv_completion(device, net_device, transaction_id,
-					status);
-
-}
-
 static void netvsc_receive(struct netvsc_device *net_device,
+			struct vmbus_channel *channel,
 			struct hv_device *device,
 			struct vmpacket_descriptor *packet)
 {
 	struct vmtransfer_page_packet_header *vmxferpage_packet;
 	struct nvsp_message *nvsp_packet;
-	struct hv_netvsc_packet *netvsc_packet = NULL;
-	/* struct netvsc_driver *netvscDriver; */
-	struct xferpage_packet *xferpage_packet = NULL;
+	struct hv_netvsc_packet nv_pkt;
+	struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
+	u32 status = NVSP_STAT_SUCCESS;
 	int i;
 	int count = 0;
-	unsigned long flags;
 	struct net_device *ndev;
 
-	LIST_HEAD(listHead);
-
 	ndev = net_device->ndev;
 
 	/*
@@ -715,77 +889,14 @@
 		return;
 	}
 
-	/*
-	 * Grab free packets (range count + 1) to represent this xfer
-	 * page packet. +1 to represent the xfer page packet itself.
-	 * We grab it here so that we know exactly how many we can
-	 * fulfil
-	 */
-	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
-	while (!list_empty(&net_device->recv_pkt_list)) {
-		list_move_tail(net_device->recv_pkt_list.next, &listHead);
-		if (++count == vmxferpage_packet->range_cnt + 1)
-			break;
-	}
-	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
-
-	/*
-	 * We need at least 2 netvsc pkts (1 to represent the xfer
-	 * page and at least 1 for the range) i.e. we can handled
-	 * some of the xfer page packet ranges...
-	 */
-	if (count < 2) {
-		netdev_err(ndev, "Got only %d netvsc pkt...needed "
-			"%d pkts. Dropping this xfer page packet completely!\n",
-			count, vmxferpage_packet->range_cnt + 1);
-
-		/* Return it to the freelist */
-		spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
-		for (i = count; i != 0; i--) {
-			list_move_tail(listHead.next,
-				       &net_device->recv_pkt_list);
-		}
-		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
-				       flags);
-
-		netvsc_send_recv_completion(device, net_device,
-					    vmxferpage_packet->d.trans_id,
-					    NVSP_STAT_FAIL);
-
-		return;
-	}
-
-	/* Remove the 1st packet to represent the xfer page packet itself */
-	xferpage_packet = (struct xferpage_packet *)listHead.next;
-	list_del(&xferpage_packet->list_ent);
-	xferpage_packet->status = NVSP_STAT_SUCCESS;
-
-	/* This is how much we can satisfy */
-	xferpage_packet->count = count - 1;
-
-	if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
-		netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
-			"this xfer page...got %d\n",
-			vmxferpage_packet->range_cnt, xferpage_packet->count);
-	}
+	count = vmxferpage_packet->range_cnt;
+	netvsc_packet->device = device;
+	netvsc_packet->channel = channel;
 
 	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
-	for (i = 0; i < (count - 1); i++) {
-		netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
-		list_del(&netvsc_packet->list_ent);
-
+	for (i = 0; i < count; i++) {
 		/* Initialize the netvsc packet */
 		netvsc_packet->status = NVSP_STAT_SUCCESS;
-		netvsc_packet->xfer_page_pkt = xferpage_packet;
-		netvsc_packet->completion.recv.recv_completion =
-					netvsc_receive_completion;
-		netvsc_packet->completion.recv.recv_completion_ctx =
-					netvsc_packet;
-		netvsc_packet->device = device;
-		/* Save this so that we can send it back */
-		netvsc_packet->completion.recv.recv_completion_tid =
-					vmxferpage_packet->d.trans_id;
-
 		netvsc_packet->data = (void *)((unsigned long)net_device->
 			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
 		netvsc_packet->total_data_buflen =
@@ -794,16 +905,53 @@
 		/* Pass it to the upper layer */
 		rndis_filter_receive(device, netvsc_packet);
 
-		netvsc_receive_completion(netvsc_packet->
-				completion.recv.recv_completion_ctx);
+		if (netvsc_packet->status != NVSP_STAT_SUCCESS)
+			status = NVSP_STAT_FAIL;
 	}
 
+	netvsc_send_recv_completion(device, channel, net_device,
+				    vmxferpage_packet->d.trans_id, status);
 }
 
-static void netvsc_channel_cb(void *context)
+
+static void netvsc_send_table(struct hv_device *hdev,
+			      struct vmpacket_descriptor *vmpkt)
+{
+	struct netvsc_device *nvscdev;
+	struct net_device *ndev;
+	struct nvsp_message *nvmsg;
+	int i;
+	u32 count, *tab;
+
+	nvscdev = get_outbound_net_device(hdev);
+	if (!nvscdev)
+		return;
+	ndev = nvscdev->ndev;
+
+	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
+					(vmpkt->offset8 << 3));
+
+	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
+		return;
+
+	count = nvmsg->msg.v5_msg.send_table.count;
+	if (count != VRSS_SEND_TAB_SIZE) {
+		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
+		return;
+	}
+
+	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
+		      nvmsg->msg.v5_msg.send_table.offset);
+
+	for (i = 0; i < count; i++)
+		nvscdev->send_table[i] = tab[i];
+}
+
+void netvsc_channel_cb(void *context)
 {
 	int ret;
-	struct hv_device *device = context;
+	struct vmbus_channel *channel = (struct vmbus_channel *)context;
+	struct hv_device *device;
 	struct netvsc_device *net_device;
 	u32 bytes_recvd;
 	u64 request_id;
@@ -812,14 +960,19 @@
 	int bufferlen = NETVSC_PACKET_SIZE;
 	struct net_device *ndev;
 
+	if (channel->primary_channel != NULL)
+		device = channel->primary_channel->device_obj;
+	else
+		device = channel->device_obj;
+
 	net_device = get_inbound_net_device(device);
 	if (!net_device)
 		return;
 	ndev = net_device->ndev;
-	buffer = net_device->cb_buffer;
+	buffer = get_per_channel_state(channel);
 
 	do {
-		ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
+		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
 					   &bytes_recvd, &request_id);
 		if (ret == 0) {
 			if (bytes_recvd > 0) {
@@ -831,8 +984,12 @@
 					break;
 
 				case VM_PKT_DATA_USING_XFER_PAGES:
-					netvsc_receive(net_device,
-							device, desc);
+					netvsc_receive(net_device, channel,
+						       device, desc);
+					break;
+
+				case VM_PKT_DATA_INBAND:
+					netvsc_send_table(device, desc);
 					break;
 
 				default:
@@ -880,11 +1037,9 @@
 int netvsc_device_add(struct hv_device *device, void *additional_info)
 {
 	int ret = 0;
-	int i;
 	int ring_size =
 	((struct netvsc_device_info *)additional_info)->ring_size;
 	struct netvsc_device *net_device;
-	struct hv_netvsc_packet *packet, *pos;
 	struct net_device *ndev;
 
 	net_device = alloc_net_device(device);
@@ -893,6 +1048,8 @@
 		goto cleanup;
 	}
 
+	net_device->ring_size = ring_size;
+
 	/*
 	 * Coming into this function, struct net_device * is
 	 * registered as the driver private data.
@@ -903,24 +1060,14 @@
 	ndev = net_device->ndev;
 
 	/* Initialize the NetVSC channel extension */
-	spin_lock_init(&net_device->recv_pkt_list_lock);
-
-	INIT_LIST_HEAD(&net_device->recv_pkt_list);
-
-	for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
-		packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
-		if (!packet)
-			break;
-
-		list_add_tail(&packet->list_ent,
-			      &net_device->recv_pkt_list);
-	}
 	init_completion(&net_device->channel_init_wait);
 
+	set_per_channel_state(device->channel, net_device->cb_buffer);
+
 	/* Open the channel */
 	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
 			 ring_size * PAGE_SIZE, NULL, 0,
-			 netvsc_channel_cb, device);
+			 netvsc_channel_cb, device->channel);
 
 	if (ret != 0) {
 		netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -930,6 +1077,8 @@
 	/* Channel is opened */
 	pr_info("hv_netvsc channel opened successfully\n");
 
+	net_device->chn_table[0] = device->channel;
+
 	/* Connect with the NetVsp */
 	ret = netvsc_connect_vsp(device);
 	if (ret != 0) {
@@ -946,16 +1095,8 @@
 
 cleanup:
 
-	if (net_device) {
-		list_for_each_entry_safe(packet, pos,
-					 &net_device->recv_pkt_list,
-					 list_ent) {
-			list_del(&packet->list_ent);
-			kfree(packet);
-		}
-
+	if (net_device)
 		kfree(net_device);
-	}
 
 	return ret;
 }

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7918d51..4fd71b7 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -101,7 +101,7 @@
 		return ret;
 	}
 
-	netif_start_queue(net);
+	netif_tx_start_all_queues(net);
 
 	nvdev = hv_get_drvdata(device_obj);
 	rdev = nvdev->extension;
@@ -149,15 +149,100 @@
 	return ppi;
 }
 
+union sub_key {
+	u64 k;
+	struct {
+		u8 pad[3];
+		u8 kb;
+		u32 ka;
+	};
+};
+
+/* Toeplitz hash function
+ * data: network byte order
+ * return: host byte order
+ */
+static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
+{
+	union sub_key subk;
+	int k_next = 4;
+	u8 dt;
+	int i, j;
+	u32 ret = 0;
+
+	subk.k = 0;
+	subk.ka = ntohl(*(u32 *)key);
+
+	for (i = 0; i < dlen; i++) {
+		subk.kb = key[k_next];
+		k_next = (k_next + 1) % klen;
+		dt = data[i];
+		for (j = 0; j < 8; j++) {
+			if (dt & 0x80)
+				ret ^= subk.ka;
+			dt <<= 1;
+			subk.k <<= 1;
+		}
+	}
+
+	return ret;
+}
+
+static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
+{
+	struct iphdr *iphdr;
+	int data_len;
+	bool ret = false;
+
+	if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
+		return false;
+
+	iphdr = ip_hdr(skb);
+
+	if (iphdr->version == 4) {
+		if (iphdr->protocol == IPPROTO_TCP)
+			data_len = 12;
+		else
+			data_len = 8;
+		*hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
+				  (u8 *)&iphdr->saddr, data_len);
+		ret = true;
+	}
+
+	return ret;
+}
+
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+			void *accel_priv, select_queue_fallback_t fallback)
+{
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
+	struct hv_device *hdev =  net_device_ctx->device_ctx;
+	struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
+	u32 hash;
+	u16 q_idx = 0;
+
+	if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+		return 0;
+
+	if (netvsc_set_hash(&hash, skb)) {
+		q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+			ndev->real_num_tx_queues;
+		skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+	}
+
+	return q_idx;
+}
+
 static void netvsc_xmit_completion(void *context)
 {
 	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
 	struct sk_buff *skb = (struct sk_buff *)
-		(unsigned long)packet->completion.send.send_completion_tid;
+		(unsigned long)packet->send_completion_tid;
+	u32 index = packet->send_buf_index;
 
 	kfree(packet);
 
-	if (skb)
+	if (skb && (index == NETVSC_INVALID_INDEX))
 		dev_kfree_skb_any(skb);
 }
 
@@ -301,6 +386,7 @@
 	struct ndis_tcp_lso_info *lso_info;
 	int  hdr_offset;
 	u32 net_trans_info;
+	u32 hash;
 
 
 	/* We will atmost need two pages to describe the rndis
@@ -319,9 +405,8 @@
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
 			 (num_data_pgs * sizeof(struct hv_page_buffer)) +
 			 sizeof(struct rndis_message) +
-			 NDIS_VLAN_PPI_SIZE +
-			 NDIS_CSUM_PPI_SIZE +
-			 NDIS_LSO_PPI_SIZE, GFP_ATOMIC);
+			 NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE +
+			 NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC);
 	if (!packet) {
 		/* out of memory, drop packet */
 		netdev_err(net, "unable to allocate hv_netvsc_packet\n");
@@ -333,6 +418,8 @@
 
 	packet->vlan_tci = skb->vlan_tci;
 
+	packet->q_idx = skb_get_queue_mapping(skb);
+
 	packet->is_data_pkt = true;
 	packet->total_data_buflen = skb->len;
 
@@ -341,9 +428,9 @@
 				(num_data_pgs * sizeof(struct hv_page_buffer)));
 
 	/* Set the completion routine */
-	packet->completion.send.send_completion = netvsc_xmit_completion;
-	packet->completion.send.send_completion_ctx = packet;
-	packet->completion.send.send_completion_tid = (unsigned long)skb;
+	packet->send_completion = netvsc_xmit_completion;
+	packet->send_completion_ctx = packet;
+	packet->send_completion_tid = (unsigned long)skb;
 
 	isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
 
@@ -358,6 +445,14 @@
 
 	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
 
+	hash = skb_get_hash_raw(skb);
+	if (hash != 0 && net->real_num_tx_queues > 1) {
+		rndis_msg_size += NDIS_HASH_PPI_SIZE;
+		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
+				    NBL_HASH_VALUE);
+		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
+	}
+
 	if (isvlan) {
 		struct ndis_pkt_8021q_info *vlan;
 
@@ -558,6 +653,9 @@
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       packet->vlan_tci);
 
+	skb_record_rx_queue(skb, packet->channel->
+			    offermsg.offer.sub_channel_index);
+
 	net->stats.rx_packets++;
 	net->stats.rx_bytes += packet->total_data_buflen;
 
@@ -606,7 +704,7 @@
 	hv_set_drvdata(hdev, ndev);
 	device_info.ring_size = ring_size;
 	rndis_filter_device_add(hdev, &device_info);
-	netif_wake_queue(ndev);
+	netif_tx_wake_all_queues(ndev);
 
 	return 0;
 }
@@ -652,6 +750,7 @@
 	.ndo_change_mtu =		netvsc_change_mtu,
 	.ndo_validate_addr =		eth_validate_addr,
 	.ndo_set_mac_address =		netvsc_set_mac_addr,
+	.ndo_select_queue =		netvsc_select_queue,
 };
 
 /*
@@ -698,9 +797,11 @@
 	struct net_device *net = NULL;
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device_info device_info;
+	struct netvsc_device *nvdev;
 	int ret;
 
-	net = alloc_etherdev(sizeof(struct net_device_context));
+	net = alloc_etherdev_mq(sizeof(struct net_device_context),
+				num_online_cpus());
 	if (!net)
 		return -ENOMEM;
 
@@ -719,7 +820,7 @@
 	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
 			NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-	SET_ETHTOOL_OPS(net, &ethtool_ops);
+	net->ethtool_ops = &ethtool_ops;
 	SET_NETDEV_DEV(net, &dev->device);
 
 	/* Notify the netvsc driver of the new device */
@@ -733,6 +834,10 @@
 	}
 	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
+	nvdev = hv_get_drvdata(dev);
+	netif_set_real_num_tx_queues(net, nvdev->num_chn);
+	netif_set_real_num_rx_queues(net, nvdev->num_chn);
+
 	ret = register_netdev(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 143a98c..99c527a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c

@@ -31,7 +31,7 @@
 #include "hyperv_net.h"
 
 
-#define RNDIS_EXT_LEN 100
+#define RNDIS_EXT_LEN PAGE_SIZE
 struct rndis_request {
 	struct list_head list_ent;
 	struct completion  wait_event;
@@ -94,6 +94,8 @@
 	rndis_msg->ndis_msg_type = msg_type;
 	rndis_msg->msg_len = msg_len;
 
+	request->pkt.q_idx = 0;
+
 	/*
 	 * Set the request id. This field is always after the rndis header for
 	 * request/response packet types so we just used the SetRequest as a
@@ -234,7 +236,7 @@
 			packet->page_buf[0].len;
 	}
 
-	packet->completion.send.send_completion = NULL;
+	packet->send_completion = NULL;
 
 	ret = netvsc_send(dev->net_dev->dev, packet);
 	return ret;
@@ -399,8 +401,6 @@
 	pkt->total_data_buflen = rndis_pkt->data_len;
 	pkt->data = (void *)((unsigned long)pkt->data + data_offset);
 
-	pkt->is_data_pkt = true;
-
 	vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
 	if (vlan) {
 		pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid |
@@ -509,6 +509,19 @@
 	query->info_buflen = 0;
 	query->dev_vc_handle = 0;
 
+	if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
+		struct ndis_recv_scale_cap *cap;
+
+		request->request_msg.msg_len +=
+			sizeof(struct ndis_recv_scale_cap);
+		query->info_buflen = sizeof(struct ndis_recv_scale_cap);
+		cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
+						     query->info_buf_offset);
+		cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+		cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+		cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
+	}
+
 	ret = rndis_filter_send_request(dev, request);
 	if (ret != 0)
 		goto cleanup;
@@ -695,6 +708,89 @@
 	return ret;
 }
 
+u8 netvsc_hash_key[HASH_KEYLEN] = {
+	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
+{
+	struct net_device *ndev = rdev->net_dev->ndev;
+	struct rndis_request *request;
+	struct rndis_set_request *set;
+	struct rndis_set_complete *set_complete;
+	u32 extlen = sizeof(struct ndis_recv_scale_param) +
+		     4*ITAB_NUM + HASH_KEYLEN;
+	struct ndis_recv_scale_param *rssp;
+	u32 *itab;
+	u8 *keyp;
+	int i, t, ret;
+
+	request = get_rndis_request(
+			rdev, RNDIS_MSG_SET,
+			RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+	if (!request)
+		return -ENOMEM;
+
+	set = &request->request_msg.msg.set_req;
+	set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
+	set->info_buflen = extlen;
+	set->info_buf_offset = sizeof(struct rndis_set_request);
+	set->dev_vc_handle = 0;
+
+	rssp = (struct ndis_recv_scale_param *)(set + 1);
+	rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
+	rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+	rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
+	rssp->flag = 0;
+	rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
+			 NDIS_HASH_TCP_IPV4;
+	rssp->indirect_tabsize = 4*ITAB_NUM;
+	rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
+	rssp->hashkey_size = HASH_KEYLEN;
+	rssp->kashkey_offset = rssp->indirect_taboffset +
+			       rssp->indirect_tabsize;
+
+	/* Set indirection table entries */
+	itab = (u32 *)(rssp + 1);
+	for (i = 0; i < ITAB_NUM; i++)
+		itab[i] = i % num_queue;
+
+	/* Set hask key values */
+	keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+	for (i = 0; i < HASH_KEYLEN; i++)
+		keyp[i] = netvsc_hash_key[i];
+
+
+	ret = rndis_filter_send_request(rdev, request);
+	if (ret != 0)
+		goto cleanup;
+
+	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+	if (t == 0) {
+		netdev_err(ndev, "timeout before we got a set response...\n");
+		/* can't put_rndis_request, since we may still receive a
+		 * send-completion.
+		 */
+		return -ETIMEDOUT;
+	} else {
+		set_complete = &request->response_msg.msg.set_complete;
+		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+			netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
+				   set_complete->status);
+			ret = -EINVAL;
+		}
+	}
+
+cleanup:
+	put_rndis_request(rdev, request);
+	return ret;
+}
+
+
 static int rndis_filter_query_device_link_status(struct rndis_device *dev)
 {
 	u32 size = sizeof(u32);
@@ -886,6 +982,28 @@
 	return ret;
 }
 
+static void netvsc_sc_open(struct vmbus_channel *new_sc)
+{
+	struct netvsc_device *nvscdev;
+	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+	int ret;
+
+	nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
+
+	if (chn_index >= nvscdev->num_chn)
+		return;
+
+	set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
+			      NETVSC_PACKET_SIZE);
+
+	ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
+			 nvscdev->ring_size * PAGE_SIZE, NULL, 0,
+			 netvsc_channel_cb, new_sc);
+
+	if (ret == 0)
+		nvscdev->chn_table[chn_index] = new_sc;
+}
+
 int rndis_filter_device_add(struct hv_device *dev,
 				  void *additional_info)
 {
@@ -894,6 +1012,10 @@
 	struct rndis_device *rndis_device;
 	struct netvsc_device_info *device_info = additional_info;
 	struct ndis_offload_params offloads;
+	struct nvsp_message *init_packet;
+	int t;
+	struct ndis_recv_scale_cap rsscap;
+	u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
 
 	rndis_device = get_rndis_device();
 	if (!rndis_device)
@@ -913,6 +1035,7 @@
 
 	/* Initialize the rndis device */
 	net_device = hv_get_drvdata(dev);
+	net_device->num_chn = 1;
 
 	net_device->extension = rndis_device;
 	rndis_device->net_dev = net_device;
@@ -952,7 +1075,6 @@
 	if (ret)
 		goto err_dev_remv;
 
-
 	rndis_filter_query_device_link_status(rndis_device);
 
 	device_info->link_state = rndis_device->link_state;
@@ -961,7 +1083,66 @@
 		 rndis_device->hw_mac_adr,
 		 device_info->link_state ? "down" : "up");
 
-	return ret;
+	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+		return 0;
+
+	/* vRSS setup */
+	memset(&rsscap, 0, rsscap_size);
+	ret = rndis_filter_query_device(rndis_device,
+					OID_GEN_RECEIVE_SCALE_CAPABILITIES,
+					&rsscap, &rsscap_size);
+	if (ret || rsscap.num_recv_que < 2)
+		goto out;
+
+	net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
+			       num_online_cpus() : rsscap.num_recv_que;
+	if (net_device->num_chn == 1)
+		goto out;
+
+	net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
+					 NETVSC_PACKET_SIZE);
+	if (!net_device->sub_cb_buf) {
+		net_device->num_chn = 1;
+		dev_info(&dev->device, "No memory for subchannels.\n");
+		goto out;
+	}
+
+	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
+
+	init_packet = &net_device->channel_init_pkt;
+	memset(init_packet, 0, sizeof(struct nvsp_message));
+	init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
+	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
+	init_packet->msg.v5_msg.subchn_req.num_subchannels =
+						net_device->num_chn - 1;
+	ret = vmbus_sendpacket(dev->channel, init_packet,
+			       sizeof(struct nvsp_message),
+			       (unsigned long)init_packet,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		goto out;
+	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+	if (t == 0) {
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+	if (init_packet->msg.v5_msg.subchn_comp.status !=
+	    NVSP_STAT_SUCCESS) {
+		ret = -ENODEV;
+		goto out;
+	}
+	net_device->num_chn = 1 +
+		init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+
+	vmbus_are_subchannels_present(dev->channel);
+
+	ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
+
+out:
+	if (ret)
+		net_device->num_chn = 1;
+	return 0; /* return 0 because primary channel can be used alone */
 
 err_dev_remv:
 	rndis_filter_device_remove(dev);

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index e36f194..4517b14 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c

@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
@@ -692,10 +693,7 @@
 	if (rc < 0)
 		goto err_rx;
 
-	rc = at86rf230_start(dev);
-
-	return rc;
-
+	return at86rf230_start(dev);
 err_rx:
 	at86rf230_start(dev);
 err:
@@ -963,33 +961,24 @@
 	return at86rf230_isr(irq, data);
 }
 
-static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol)
-{
-	return at86rf230_write_subreg(lp, SR_IRQ_POLARITY, pol);
-}
-
 static int at86rf230_hw_init(struct at86rf230_local *lp)
 {
-	struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data;
-	int rc, irq_pol;
-	u8 status;
+	int rc, irq_pol, irq_type;
+	u8 dvdd;
 	u8 csma_seed[2];
 
-	rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
-	if (rc)
-		return rc;
-
 	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_FORCE_TRX_OFF);
 	if (rc)
 		return rc;
 
+	irq_type = irq_get_trigger_type(lp->spi->irq);
 	/* configure irq polarity, defaults to high active */
-	if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
+	if (irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
 		irq_pol = IRQ_ACTIVE_LOW;
 	else
 		irq_pol = IRQ_ACTIVE_HIGH;
 
-	rc = at86rf230_irq_polarity(lp, irq_pol);
+	rc = at86rf230_write_subreg(lp, SR_IRQ_POLARITY, irq_pol);
 	if (rc)
 		return rc;
 
@@ -1017,10 +1006,10 @@
 	/* Wait the next SLEEP cycle */
 	msleep(100);
 
-	rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &status);
+	rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &dvdd);
 	if (rc)
 		return rc;
-	if (!status) {
+	if (!dvdd) {
 		dev_err(&lp->spi->dev, "DVDD error\n");
 		return -EINVAL;
 	}
@@ -1032,7 +1021,6 @@
 at86rf230_get_pdata(struct spi_device *spi)
 {
 	struct at86rf230_platform_data *pdata;
-	const char *irq_type;
 
 	if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node)
 		return spi->dev.platform_data;
@@ -1044,19 +1032,6 @@
 	pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0);
 	pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0);
 
-	pdata->irq_type = IRQF_TRIGGER_RISING;
-	of_property_read_string(spi->dev.of_node, "irq-type", &irq_type);
-	if (!strcmp(irq_type, "level-high"))
-		pdata->irq_type = IRQF_TRIGGER_HIGH;
-	else if (!strcmp(irq_type, "level-low"))
-		pdata->irq_type = IRQF_TRIGGER_LOW;
-	else if (!strcmp(irq_type, "edge-rising"))
-		pdata->irq_type = IRQF_TRIGGER_RISING;
-	else if (!strcmp(irq_type, "edge-falling"))
-		pdata->irq_type = IRQF_TRIGGER_FALLING;
-	else
-		dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n");
-
 	spi->dev.platform_data = pdata;
 done:
 	return pdata;
@@ -1071,7 +1046,7 @@
 	u8 part = 0, version = 0, status;
 	irq_handler_t irq_handler;
 	work_func_t irq_worker;
-	int rc;
+	int rc, irq_type;
 	const char *chip;
 	struct ieee802154_ops *ops = NULL;
 
@@ -1087,27 +1062,17 @@
 	}
 
 	if (gpio_is_valid(pdata->rstn)) {
-		rc = gpio_request(pdata->rstn, "rstn");
+		rc = devm_gpio_request_one(&spi->dev, pdata->rstn,
+					   GPIOF_OUT_INIT_HIGH, "rstn");
 		if (rc)
 			return rc;
 	}
 
 	if (gpio_is_valid(pdata->slp_tr)) {
-		rc = gpio_request(pdata->slp_tr, "slp_tr");
+		rc = devm_gpio_request_one(&spi->dev, pdata->slp_tr,
+					   GPIOF_OUT_INIT_LOW, "slp_tr");
 		if (rc)
-			goto err_slp_tr;
-	}
-
-	if (gpio_is_valid(pdata->rstn)) {
-		rc = gpio_direction_output(pdata->rstn, 1);
-		if (rc)
-			goto err_gpio_dir;
-	}
-
-	if (gpio_is_valid(pdata->slp_tr)) {
-		rc = gpio_direction_output(pdata->slp_tr, 0);
-		if (rc)
-			goto err_gpio_dir;
+			return rc;
 	}
 
 	/* Reset */
@@ -1121,13 +1086,12 @@
 
 	rc = __at86rf230_detect_device(spi, &man_id, &part, &version);
 	if (rc < 0)
-		goto err_gpio_dir;
+		return rc;
 
 	if (man_id != 0x001f) {
 		dev_err(&spi->dev, "Non-Atmel dev found (MAN_ID %02x %02x)\n",
 			man_id >> 8, man_id & 0xFF);
-		rc = -EINVAL;
-		goto err_gpio_dir;
+		return -EINVAL;
 	}
 
 	switch (part) {
@@ -1154,16 +1118,12 @@
 	}
 
 	dev_info(&spi->dev, "Detected %s chip version %d\n", chip, version);
-	if (!ops) {
-		rc = -ENOTSUPP;
-		goto err_gpio_dir;
-	}
+	if (!ops)
+		return -ENOTSUPP;
 
 	dev = ieee802154_alloc_device(sizeof(*lp), ops);
-	if (!dev) {
-		rc = -ENOMEM;
-		goto err_gpio_dir;
-	}
+	if (!dev)
+		return -ENOMEM;
 
 	lp = dev->priv;
 	lp->dev = dev;
@@ -1176,7 +1136,8 @@
 	dev->extra_tx_headroom = 0;
 	dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK;
 
-	if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
 		irq_worker = at86rf230_irqwork;
 		irq_handler = at86rf230_isr;
 	} else {
@@ -1202,75 +1163,65 @@
 	if (rc)
 		goto err_hw_init;
 
-	rc = request_irq(spi->irq, irq_handler,
-			 IRQF_SHARED | pdata->irq_type,
-			 dev_name(&spi->dev), lp);
-	if (rc)
-		goto err_hw_init;
-
 	/* Read irq status register to reset irq line */
 	rc = at86rf230_read_subreg(lp, RG_IRQ_STATUS, 0xff, 0, &status);
 	if (rc)
-		goto err_irq;
+		goto err_hw_init;
+
+	rc = devm_request_irq(&spi->dev, spi->irq, irq_handler, IRQF_SHARED,
+			      dev_name(&spi->dev), lp);
+	if (rc)
+		goto err_hw_init;
 
 	rc = ieee802154_register_device(lp->dev);
 	if (rc)
-		goto err_irq;
+		goto err_hw_init;
 
 	return rc;
 
-err_irq:
-	free_irq(spi->irq, lp);
 err_hw_init:
 	flush_work(&lp->irqwork);
-	spi_set_drvdata(spi, NULL);
 	mutex_destroy(&lp->bmux);
 	ieee802154_free_device(lp->dev);
 
-err_gpio_dir:
-	if (gpio_is_valid(pdata->slp_tr))
-		gpio_free(pdata->slp_tr);
-err_slp_tr:
-	if (gpio_is_valid(pdata->rstn))
-		gpio_free(pdata->rstn);
 	return rc;
 }
 
 static int at86rf230_remove(struct spi_device *spi)
 {
 	struct at86rf230_local *lp = spi_get_drvdata(spi);
-	struct at86rf230_platform_data *pdata = spi->dev.platform_data;
 
 	/* mask all at86rf230 irq's */
 	at86rf230_write_subreg(lp, SR_IRQ_MASK, 0);
 	ieee802154_unregister_device(lp->dev);
-
-	free_irq(spi->irq, lp);
 	flush_work(&lp->irqwork);
-
-	if (gpio_is_valid(pdata->slp_tr))
-		gpio_free(pdata->slp_tr);
-	if (gpio_is_valid(pdata->rstn))
-		gpio_free(pdata->rstn);
-
 	mutex_destroy(&lp->bmux);
 	ieee802154_free_device(lp->dev);
-
 	dev_dbg(&spi->dev, "unregistered at86rf230\n");
+
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_OF)
-static struct of_device_id at86rf230_of_match[] = {
+static const struct of_device_id at86rf230_of_match[] = {
 	{ .compatible = "atmel,at86rf230", },
 	{ .compatible = "atmel,at86rf231", },
 	{ .compatible = "atmel,at86rf233", },
 	{ .compatible = "atmel,at86rf212", },
 	{ },
 };
-#endif
+MODULE_DEVICE_TABLE(of, at86rf230_of_match);
+
+static const struct spi_device_id at86rf230_device_id[] = {
+	{ .name = "at86rf230", },
+	{ .name = "at86rf231", },
+	{ .name = "at86rf233", },
+	{ .name = "at86rf212", },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, at86rf230_device_id);
 
 static struct spi_driver at86rf230_driver = {
+	.id_table = at86rf230_device_id,
 	.driver = {
 		.of_match_table = of_match_ptr(at86rf230_of_match),
 		.name	= "at86rf230",

diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index b8d2217..27d8320 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c

@@ -26,6 +26,7 @@
 #include <linux/timer.h>
 #include <linux/platform_device.h>
 #include <linux/netdevice.h>
+#include <linux/device.h>
 #include <linux/spinlock.h>
 #include <net/mac802154.h>
 #include <net/wpan-phy.h>
@@ -228,7 +229,8 @@
 	int err = -ENOMEM;
 	int i;
 
-	priv = kzalloc(sizeof(struct fakelb_priv), GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct fakelb_priv),
+			    GFP_KERNEL);
 	if (!priv)
 		goto err_alloc;
 
@@ -248,7 +250,6 @@
 err_slave:
 	list_for_each_entry(dp, &priv->list, list)
 		fakelb_del(dp);
-	kfree(priv);
 err_alloc:
 	return err;
 }
@@ -260,7 +261,6 @@
 
 	list_for_each_entry_safe(dp, temp, &priv->list, list)
 		fakelb_del(dp);
-	kfree(priv);
 
 	return 0;
 }

diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index 78a6552..4048062 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c

@@ -618,12 +618,12 @@
 
 	printk(KERN_INFO "mrf24j40: probe(). IRQ: %d\n", spi->irq);
 
-	devrec = kzalloc(sizeof(struct mrf24j40), GFP_KERNEL);
+	devrec = devm_kzalloc(&spi->dev, sizeof(struct mrf24j40), GFP_KERNEL);
 	if (!devrec)
-		goto err_devrec;
-	devrec->buf = kzalloc(3, GFP_KERNEL);
+		goto err_ret;
+	devrec->buf = devm_kzalloc(&spi->dev, 3, GFP_KERNEL);
 	if (!devrec->buf)
-		goto err_buf;
+		goto err_ret;
 
 	spi->mode = SPI_MODE_0; /* TODO: Is this appropriate for right here? */
 	if (spi->max_speed_hz > MAX_SPI_SPEED_HZ)
@@ -638,7 +638,7 @@
 
 	devrec->dev = ieee802154_alloc_device(0, &mrf24j40_ops);
 	if (!devrec->dev)
-		goto err_alloc_dev;
+		goto err_ret;
 
 	devrec->dev->priv = devrec;
 	devrec->dev->parent = &devrec->spi->dev;
@@ -676,12 +676,13 @@
 	val &= ~0x3; /* Clear RX mode (normal) */
 	write_short_reg(devrec, REG_RXMCR, val);
 
-	ret = request_threaded_irq(spi->irq,
-				   NULL,
-				   mrf24j40_isr,
-				   IRQF_TRIGGER_LOW|IRQF_ONESHOT,
-				   dev_name(&spi->dev),
-				   devrec);
+	ret = devm_request_threaded_irq(&spi->dev,
+					spi->irq,
+					NULL,
+					mrf24j40_isr,
+					IRQF_TRIGGER_LOW|IRQF_ONESHOT,
+					dev_name(&spi->dev),
+					devrec);
 
 	if (ret) {
 		dev_err(printdev(devrec), "Unable to get IRQ");
@@ -695,11 +696,7 @@
 	ieee802154_unregister_device(devrec->dev);
 err_register_device:
 	ieee802154_free_device(devrec->dev);
-err_alloc_dev:
-	kfree(devrec->buf);
-err_buf:
-	kfree(devrec);
-err_devrec:
+err_ret:
 	return ret;
 }
 
@@ -709,15 +706,11 @@
 
 	dev_dbg(printdev(devrec), "remove\n");
 
-	free_irq(spi->irq, devrec);
 	ieee802154_unregister_device(devrec->dev);
 	ieee802154_free_device(devrec->dev);
 	/* TODO: Will ieee802154_free_device() wait until ->xmit() is
 	 * complete? */
 
-	/* Clean up the SPI stuff. */
-	kfree(devrec->buf);
-	kfree(devrec);
 	return 0;
 }
 

diff --git a/drivers/net/irda/Kconfig b/drivers/net/irda/Kconfig
index 3da44d5..8d101d6 100644
--- a/drivers/net/irda/Kconfig
+++ b/drivers/net/irda/Kconfig

@@ -396,7 +396,8 @@
 
 config SH_IRDA
 	tristate "SuperH IrDA driver"
-	depends on IRDA && ARCH_SHMOBILE
+	depends on IRDA
+	depends on ARCH_SHMOBILE || COMPILE_TEST
 	help
 	  Say Y here if your want to enable SuperH IrDA devices.
 

diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 2900af0..998bb89 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c

@@ -510,10 +510,8 @@
  */
 static int via_ircc_read_dongle_id(int iobase)
 {
-	int dongle_id = 9;	/* Default to IBM */
-
 	IRDA_ERROR("via-ircc: dongle probing not supported, please specify dongle_id module parameter.\n");
-	return dongle_id;
+	return 9;	/* Default to IBM */
 }
 
 /*
@@ -926,7 +924,6 @@
 static int via_ircc_dma_xmit_complete(struct via_ircc_cb *self)
 {
 	int iobase;
-	int ret = TRUE;
 	u8 Tx_status;
 
 	IRDA_DEBUG(3, "%s()\n", __func__);
@@ -983,7 +980,7 @@
 	// Tell the network layer, that we can accept more frames 
 	netif_wake_queue(self->netdev);
 //F01   }
-	return ret;
+	return TRUE;
 }
 
 /*

diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index e641bb2..11dbdf3 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c

@@ -62,10 +62,6 @@
 #include "w83977af.h"
 #include "w83977af_ir.h"
 
-#ifdef  CONFIG_ARCH_NETWINDER            /* Adjust to NetWinder differences */
-#undef  CONFIG_NETWINDER_TX_DMA_PROBLEMS /* Not needed */
-#define CONFIG_NETWINDER_RX_DMA_PROBLEMS /* Must have this one! */
-#endif
 #define CONFIG_USE_W977_PNP        /* Currently needed */
 #define PIO_MAX_SPEED       115200 
 
@@ -332,7 +328,7 @@
  		w977_write_reg(0x74, dma+1, efbase[i]);
 #else
  		w977_write_reg(0x74, dma, efbase[i]);   
-#endif /*CONFIG_ARCH_NETWINDER */
+#endif /* CONFIG_ARCH_NETWINDER */
  		w977_write_reg(0x75, 0x04, efbase[i]);  /* Disable Tx DMA */
   	
  		/* Set append hardware CRC, enable IR bank selection */	
@@ -563,10 +559,6 @@
 static void w83977af_dma_write(struct w83977af_ir *self, int iobase)
 {
 	__u8 set;
-#ifdef CONFIG_NETWINDER_TX_DMA_PROBLEMS
-	unsigned long flags;
-	__u8 hcr;
-#endif
         IRDA_DEBUG(4, "%s(), len=%d\n", __func__ , self->tx_buff.len);
 
 	/* Save current set */
@@ -579,30 +571,13 @@
 	/* Choose transmit DMA channel  */ 
 	switch_bank(iobase, SET2);
 	outb(ADCR1_D_CHSW|/*ADCR1_DMA_F|*/ADCR1_ADV_SL, iobase+ADCR1);
-#ifdef CONFIG_NETWINDER_TX_DMA_PROBLEMS
-	spin_lock_irqsave(&self->lock, flags);
-
-	disable_dma(self->io.dma);
-	clear_dma_ff(self->io.dma);
-	set_dma_mode(self->io.dma, DMA_MODE_READ);
-	set_dma_addr(self->io.dma, self->tx_buff_dma);
-	set_dma_count(self->io.dma, self->tx_buff.len);
-#else
 	irda_setup_dma(self->io.dma, self->tx_buff_dma, self->tx_buff.len,
 		       DMA_MODE_WRITE);	
-#endif
 	self->io.direction = IO_XMIT;
 	
 	/* Enable DMA */
  	switch_bank(iobase, SET0);
-#ifdef CONFIG_NETWINDER_TX_DMA_PROBLEMS
-	hcr = inb(iobase+HCR);
-	outb(hcr | HCR_EN_DMA, iobase+HCR);
-	enable_dma(self->io.dma);
-	spin_unlock_irqrestore(&self->lock, flags);
-#else	
 	outb(inb(iobase+HCR) | HCR_EN_DMA | HCR_TX_WT, iobase+HCR);
-#endif
 
 	/* Restore set register */
 	outb(set, iobase+SSR);
@@ -711,7 +686,7 @@
 {
 	int iobase;
 	__u8 set;
-#ifdef CONFIG_NETWINDER_RX_DMA_PROBLEMS
+#ifdef CONFIG_ARCH_NETWINDER
 	unsigned long flags;
 	__u8 hcr;
 #endif
@@ -736,7 +711,7 @@
 	self->io.direction = IO_RECV;
 	self->rx_buff.data = self->rx_buff.head;
 
-#ifdef CONFIG_NETWINDER_RX_DMA_PROBLEMS
+#ifdef CONFIG_ARCH_NETWINDER
 	spin_lock_irqsave(&self->lock, flags);
 
 	disable_dma(self->io.dma);
@@ -759,7 +734,7 @@
 	
 	/* Enable DMA */
 	switch_bank(iobase, SET0);
-#ifdef CONFIG_NETWINDER_RX_DMA_PROBLEMS
+#ifdef CONFIG_ARCH_NETWINDER
 	hcr = inb(iobase+HCR);
 	outb(hcr | HCR_EN_DMA, iobase+HCR);
 	enable_dma(self->io.dma);

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d53e299..958df38 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c

@@ -30,8 +30,10 @@
 #include <linux/if_link.h>
 #include <linux/if_macvlan.h>
 #include <linux/hash.h>
+#include <linux/workqueue.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
+#include <linux/netpoll.h>
 
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
@@ -40,10 +42,19 @@
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct rcu_head		rcu;
+	struct sk_buff_head	bc_queue;
+	struct work_struct	bc_work;
 	bool 			passthru;
-	int			count;
 };
 
+#define MACVLAN_PORT_IS_EMPTY(port)    list_empty(&port->vlans)
+
+struct macvlan_skb_cb {
+	const struct macvlan_dev *src;
+};
+
+#define MACVLAN_SKB_CB(__skb) ((struct macvlan_skb_cb *)&((__skb)->cb[0]))
+
 static void macvlan_port_destroy(struct net_device *dev);
 
 static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev)
@@ -120,7 +131,7 @@
 	struct net_device *dev = vlan->dev;
 
 	if (local)
-		return dev_forward_skb(dev, skb);
+		return __dev_forward_skb(dev, skb);
 
 	skb->dev = dev;
 	if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
@@ -128,7 +139,7 @@
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
-	return netif_rx(skb);
+	return 0;
 }
 
 static u32 macvlan_hash_mix(const struct macvlan_dev *vlan)
@@ -175,13 +186,87 @@
 			if (likely(nskb))
 				err = macvlan_broadcast_one(
 					nskb, vlan, eth,
-					mode == MACVLAN_MODE_BRIDGE);
+					mode == MACVLAN_MODE_BRIDGE) ?:
+				      netif_rx_ni(nskb);
 			macvlan_count_rx(vlan, skb->len + ETH_HLEN,
 					 err == NET_RX_SUCCESS, 1);
 		}
 	}
 }
 
+static void macvlan_process_broadcast(struct work_struct *w)
+{
+	struct macvlan_port *port = container_of(w, struct macvlan_port,
+						 bc_work);
+	struct sk_buff *skb;
+	struct sk_buff_head list;
+
+	skb_queue_head_init(&list);
+
+	spin_lock_bh(&port->bc_queue.lock);
+	skb_queue_splice_tail_init(&port->bc_queue, &list);
+	spin_unlock_bh(&port->bc_queue.lock);
+
+	while ((skb = __skb_dequeue(&list))) {
+		const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
+
+		rcu_read_lock();
+
+		if (!src)
+			/* frame comes from an external address */
+			macvlan_broadcast(skb, port, NULL,
+					  MACVLAN_MODE_PRIVATE |
+					  MACVLAN_MODE_VEPA    |
+					  MACVLAN_MODE_PASSTHRU|
+					  MACVLAN_MODE_BRIDGE);
+		else if (src->mode == MACVLAN_MODE_VEPA)
+			/* flood to everyone except source */
+			macvlan_broadcast(skb, port, src->dev,
+					  MACVLAN_MODE_VEPA |
+					  MACVLAN_MODE_BRIDGE);
+		else
+			/*
+			 * flood only to VEPA ports, bridge ports
+			 * already saw the frame on the way out.
+			 */
+			macvlan_broadcast(skb, port, src->dev,
+					  MACVLAN_MODE_VEPA);
+
+		rcu_read_unlock();
+
+		kfree_skb(skb);
+	}
+}
+
+static void macvlan_broadcast_enqueue(struct macvlan_port *port,
+				      struct sk_buff *skb)
+{
+	struct sk_buff *nskb;
+	int err = -ENOMEM;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		goto err;
+
+	spin_lock(&port->bc_queue.lock);
+	if (skb_queue_len(&port->bc_queue) < skb->dev->tx_queue_len) {
+		__skb_queue_tail(&port->bc_queue, nskb);
+		err = 0;
+	}
+	spin_unlock(&port->bc_queue.lock);
+
+	if (err)
+		goto free_nskb;
+
+	schedule_work(&port->bc_work);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+err:
+	atomic_long_inc(&skb->dev->rx_dropped);
+}
+
 /* called under rcu_read_lock() from netif_receive_skb */
 static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 {
@@ -201,32 +286,18 @@
 			return RX_HANDLER_CONSUMED;
 		eth = eth_hdr(skb);
 		src = macvlan_hash_lookup(port, eth->h_source);
-		if (!src)
-			/* frame comes from an external address */
-			macvlan_broadcast(skb, port, NULL,
-					  MACVLAN_MODE_PRIVATE |
-					  MACVLAN_MODE_VEPA    |
-					  MACVLAN_MODE_PASSTHRU|
-					  MACVLAN_MODE_BRIDGE);
-		else if (src->mode == MACVLAN_MODE_VEPA)
-			/* flood to everyone except source */
-			macvlan_broadcast(skb, port, src->dev,
-					  MACVLAN_MODE_VEPA |
-					  MACVLAN_MODE_BRIDGE);
-		else if (src->mode == MACVLAN_MODE_BRIDGE)
-			/*
-			 * flood only to VEPA ports, bridge ports
-			 * already saw the frame on the way out.
-			 */
-			macvlan_broadcast(skb, port, src->dev,
-					  MACVLAN_MODE_VEPA);
-		else {
+		if (src && src->mode != MACVLAN_MODE_VEPA &&
+		    src->mode != MACVLAN_MODE_BRIDGE) {
 			/* forward to original port. */
 			vlan = src;
-			ret = macvlan_broadcast_one(skb, vlan, eth, 0);
+			ret = macvlan_broadcast_one(skb, vlan, eth, 0) ?:
+			      netif_rx(skb);
 			goto out;
 		}
 
+		MACVLAN_SKB_CB(skb)->src = src;
+		macvlan_broadcast_enqueue(port, skb);
+
 		return RX_HANDLER_PASS;
 	}
 
@@ -287,12 +358,26 @@
 	return dev_queue_xmit(skb);
 }
 
+static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	unsigned int len = skb->len;
 	int ret;
-	const struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	if (unlikely(netpoll_tx_running(dev)))
+		return macvlan_netpoll_send_skb(vlan, skb);
 
 	if (vlan->fwd_priv) {
 		skb->dev = vlan->lowerdev;
@@ -424,33 +509,47 @@
 	return 0;
 }
 
-static int macvlan_set_mac_address(struct net_device *dev, void *p)
+static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
-	struct sockaddr *addr = p;
 	int err;
 
+	if (!(dev->flags & IFF_UP)) {
+		/* Just copy in the new address */
+		ether_addr_copy(dev->dev_addr, addr);
+	} else {
+		/* Rehash and update the device filters */
+		if (macvlan_addr_busy(vlan->port, addr))
+			return -EBUSY;
+
+		if (!vlan->port->passthru) {
+			err = dev_uc_add(lowerdev, addr);
+			if (err)
+				return err;
+
+			dev_uc_del(lowerdev, dev->dev_addr);
+		}
+
+		macvlan_hash_change_addr(vlan, addr);
+	}
+	return 0;
+}
+
+static int macvlan_set_mac_address(struct net_device *dev, void *p)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct sockaddr *addr = p;
+
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	if (!(dev->flags & IFF_UP)) {
-		/* Just copy in the new address */
-		memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
-	} else {
-		/* Rehash and update the device filters */
-		if (macvlan_addr_busy(vlan->port, addr->sa_data))
-			return -EBUSY;
-
-		err = dev_uc_add(lowerdev, addr->sa_data);
-		if (err)
-			return err;
-
-		dev_uc_del(lowerdev, dev->dev_addr);
-
-		macvlan_hash_change_addr(vlan, addr->sa_data);
+	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+		dev_set_mac_address(vlan->lowerdev, addr);
+		return 0;
 	}
-	return 0;
+
+	return macvlan_sync_address(dev, addr->sa_data);
 }
 
 static void macvlan_change_rx_flags(struct net_device *dev, int change)
@@ -567,8 +666,7 @@
 
 	free_percpu(vlan->pcpu_stats);
 
-	port->count -= 1;
-	if (!port->count)
+	if (MACVLAN_PORT_IS_EMPTY(port))
 		macvlan_port_destroy(port->dev);
 }
 
@@ -705,6 +803,50 @@
 	return features;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void macvlan_dev_poll_controller(struct net_device *dev)
+{
+	return;
+}
+
+static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *real_dev = vlan->lowerdev;
+	struct netpoll *netpoll;
+	int err = 0;
+
+	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!netpoll)
+		goto out;
+
+	err = __netpoll_setup(netpoll, real_dev);
+	if (err) {
+		kfree(netpoll);
+		goto out;
+	}
+
+	vlan->netpoll = netpoll;
+
+out:
+	return err;
+}
+
+static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
+
+	if (!netpoll)
+		return;
+
+	vlan->netpoll = NULL;
+
+	__netpoll_free_async(netpoll);
+}
+#endif	/* CONFIG_NET_POLL_CONTROLLER */
+
 static const struct ethtool_ops macvlan_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_settings		= macvlan_ethtool_get_settings,
@@ -730,6 +872,11 @@
 	.ndo_fdb_del		= macvlan_fdb_del,
 	.ndo_fdb_dump		= ndo_dflt_fdb_dump,
 	.ndo_get_lock_subclass  = macvlan_get_nest_level,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= macvlan_dev_poll_controller,
+	.ndo_netpoll_setup	= macvlan_dev_netpoll_setup,
+	.ndo_netpoll_cleanup	= macvlan_dev_netpoll_cleanup,
+#endif
 };
 
 void macvlan_common_setup(struct net_device *dev)
@@ -770,6 +917,9 @@
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 
+	skb_queue_head_init(&port->bc_queue);
+	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
+
 	err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
 	if (err)
 		kfree(port);
@@ -782,6 +932,7 @@
 {
 	struct macvlan_port *port = macvlan_port_get_rtnl(dev);
 
+	cancel_work_sync(&port->bc_work);
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
 	kfree_rcu(port, rcu);
@@ -868,13 +1019,12 @@
 		vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-		if (port->count)
+		if (!MACVLAN_PORT_IS_EMPTY(port))
 			return -EINVAL;
 		port->passthru = true;
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
-	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
@@ -892,8 +1042,7 @@
 unregister_netdev:
 	unregister_netdevice(dev);
 destroy_port:
-	port->count -= 1;
-	if (!port->count)
+	if (MACVLAN_PORT_IS_EMPTY(port))
 		macvlan_port_destroy(lowerdev);
 
 	return err;
@@ -1028,6 +1177,25 @@
 			netdev_update_features(vlan->dev);
 		}
 		break;
+	case NETDEV_CHANGEMTU:
+		list_for_each_entry(vlan, &port->vlans, list) {
+			if (vlan->dev->mtu <= dev->mtu)
+				continue;
+			dev_set_mtu(vlan->dev, dev->mtu);
+		}
+		break;
+	case NETDEV_CHANGEADDR:
+		if (!port->passthru)
+			return NOTIFY_DONE;
+
+		vlan = list_first_entry_or_null(&port->vlans,
+						struct macvlan_dev,
+						list);
+
+		if (macvlan_sync_address(vlan->dev, dev->dev_addr))
+			return NOTIFY_BAD;
+
+		break;
 	case NETDEV_UNREGISTER:
 		/* twiddle thumbs on netns device moves */
 		if (dev->reg_state != NETREG_UNREGISTERING)
@@ -1036,11 +1204,17 @@
 		list_for_each_entry_safe(vlan, next, &port->vlans, list)
 			vlan->dev->rtnl_link_ops->dellink(vlan->dev, &list_kill);
 		unregister_netdevice_many(&list_kill);
-		list_del(&list_kill);
 		break;
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* Forbid underlaying device to change its type. */
 		return NOTIFY_BAD;
+
+	case NETDEV_NOTIFY_PEERS:
+	case NETDEV_BONDING_FAILOVER:
+	case NETDEV_RESEND_IGMP:
+		/* Propagate to all vlans */
+		list_for_each_entry(vlan, &port->vlans, list)
+			call_netdevice_notifiers(event, vlan->dev);
 	}
 	return NOTIFY_DONE;
 }

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 63aa9d9..5a7e639 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c

@@ -298,7 +298,6 @@
 {
 	cmd->supported = SUPPORTED_Backplane;
 	cmd->advertising = ADVERTISED_Backplane;
-	cmd->speed = SPEED_UNKNOWN;
 	ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
 	cmd->duplex = DUPLEX_FULL;
 	cmd->port = PORT_OTHER;
@@ -348,7 +347,7 @@
 	memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len);
 
 	ndev->netdev_ops = &ntb_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ntb_ethtool_ops);
+	ndev->ethtool_ops = &ntb_ethtool_ops;
 
 	dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers);
 	if (!dev->qp) {

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 6a17f92..65de0ca 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig

@@ -24,6 +24,12 @@
 	---help---
 	  Currently supports the am79c874
 
+config AMD_XGBE_PHY
+	tristate "Driver for the AMD 10GbE (amd-xgbe) PHYs"
+	depends on OF
+	---help---
+	  Currently supports the AMD 10GbE PHY
+
 config MARVELL_PHY
 	tristate "Drivers for Marvell PHYs"
 	---help---

diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 07d2402..7dc3d5b 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile

@@ -33,3 +33,4 @@
 obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
 obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
+obj-$(CONFIG_AMD_XGBE_PHY)	+= amd-xgbe-phy.o

diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c
new file mode 100644
index 0000000..b57c224
--- /dev/null
+++ b/drivers/net/phy/amd-xgbe-phy.c

@@ -0,0 +1,1357 @@
+/*
+ * AMD 10Gb Ethernet PHY driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include <linux/mdio.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/uaccess.h>
+#include <asm/irq.h>
+
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("1.0.0-a");
+MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver");
+
+#define XGBE_PHY_ID	0x000162d0
+#define XGBE_PHY_MASK	0xfffffff0
+
+#define XGBE_AN_INT_CMPLT		0x01
+#define XGBE_AN_INC_LINK		0x02
+#define XGBE_AN_PG_RCV			0x04
+
+#define XNP_MCF_NULL_MESSAGE		0x001
+#define XNP_ACK_PROCESSED		(1 << 12)
+#define XNP_MP_FORMATTED		(1 << 13)
+#define XNP_NP_EXCHANGE			(1 << 15)
+
+#ifndef MDIO_PMA_10GBR_PMD_CTRL
+#define MDIO_PMA_10GBR_PMD_CTRL		0x0096
+#endif
+#ifndef MDIO_PMA_10GBR_FEC_CTRL
+#define MDIO_PMA_10GBR_FEC_CTRL		0x00ab
+#endif
+#ifndef MDIO_AN_XNP
+#define MDIO_AN_XNP			0x0016
+#endif
+
+#ifndef MDIO_AN_INTMASK
+#define MDIO_AN_INTMASK			0x8001
+#endif
+#ifndef MDIO_AN_INT
+#define MDIO_AN_INT			0x8002
+#endif
+
+#ifndef MDIO_CTRL1_SPEED1G
+#define MDIO_CTRL1_SPEED1G		(MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
+#endif
+
+/* SerDes integration register offsets */
+#define SIR0_STATUS			0x0040
+#define SIR1_SPEED			0x0000
+
+/* SerDes integration register entry bit positions and sizes */
+#define SIR0_STATUS_RX_READY_INDEX	0
+#define SIR0_STATUS_RX_READY_WIDTH	1
+#define SIR0_STATUS_TX_READY_INDEX	8
+#define SIR0_STATUS_TX_READY_WIDTH	1
+#define SIR1_SPEED_DATARATE_INDEX	4
+#define SIR1_SPEED_DATARATE_WIDTH	2
+#define SIR1_SPEED_PI_SPD_SEL_INDEX	12
+#define SIR1_SPEED_PI_SPD_SEL_WIDTH	4
+#define SIR1_SPEED_PLLSEL_INDEX		3
+#define SIR1_SPEED_PLLSEL_WIDTH		1
+#define SIR1_SPEED_RATECHANGE_INDEX	6
+#define SIR1_SPEED_RATECHANGE_WIDTH	1
+#define SIR1_SPEED_TXAMP_INDEX		8
+#define SIR1_SPEED_TXAMP_WIDTH		4
+#define SIR1_SPEED_WORDMODE_INDEX	0
+#define SIR1_SPEED_WORDMODE_WIDTH	3
+
+#define SPEED_10000_CDR			0x7
+#define SPEED_10000_PLL			0x1
+#define SPEED_10000_RATE		0x0
+#define SPEED_10000_TXAMP		0xa
+#define SPEED_10000_WORD		0x7
+
+#define SPEED_2500_CDR			0x2
+#define SPEED_2500_PLL			0x0
+#define SPEED_2500_RATE			0x2
+#define SPEED_2500_TXAMP		0xf
+#define SPEED_2500_WORD			0x1
+
+#define SPEED_1000_CDR			0x2
+#define SPEED_1000_PLL			0x0
+#define SPEED_1000_RATE			0x3
+#define SPEED_1000_TXAMP		0xf
+#define SPEED_1000_WORD			0x1
+
+
+/* SerDes RxTx register offsets */
+#define RXTX_REG20			0x0050
+#define RXTX_REG114			0x01c8
+
+/* SerDes RxTx register entry bit positions and sizes */
+#define RXTX_REG20_BLWC_ENA_INDEX	2
+#define RXTX_REG20_BLWC_ENA_WIDTH	1
+#define RXTX_REG114_PQ_REG_INDEX	9
+#define RXTX_REG114_PQ_REG_WIDTH	7
+
+#define RXTX_10000_BLWC			0
+#define RXTX_10000_PQ			0x1e
+
+#define RXTX_2500_BLWC			1
+#define RXTX_2500_PQ			0xa
+
+#define RXTX_1000_BLWC			1
+#define RXTX_1000_PQ			0xa
+
+/* Bit setting and getting macros
+ *  The get macro will extract the current bit field value from within
+ *  the variable
+ *
+ *  The set macro will clear the current bit field value within the
+ *  variable and then set the bit field of the variable to the
+ *  specified value
+ */
+#define GET_BITS(_var, _index, _width)					\
+	(((_var) >> (_index)) & ((0x1 << (_width)) - 1))
+
+#define SET_BITS(_var, _index, _width, _val)				\
+do {									\
+	(_var) &= ~(((0x1 << (_width)) - 1) << (_index));		\
+	(_var) |= (((_val) & ((0x1 << (_width)) - 1)) << (_index));	\
+} while (0)
+
+/* Macros for reading or writing SerDes integration registers
+ *  The ioread macros will get bit fields or full values using the
+ *  register definitions formed using the input names
+ *
+ *  The iowrite macros will set bit fields or full values using the
+ *  register definitions formed using the input names
+ */
+#define XSIR0_IOREAD(_priv, _reg)					\
+	ioread16((_priv)->sir0_regs + _reg)
+
+#define XSIR0_IOREAD_BITS(_priv, _reg, _field)				\
+	GET_BITS(XSIR0_IOREAD((_priv), _reg),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XSIR0_IOWRITE(_priv, _reg, _val)				\
+	iowrite16((_val), (_priv)->sir0_regs + _reg)
+
+#define XSIR0_IOWRITE_BITS(_priv, _reg, _field, _val)			\
+do {									\
+	u16 reg_val = XSIR0_IOREAD((_priv), _reg);			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XSIR0_IOWRITE((_priv), _reg, reg_val);				\
+} while (0)
+
+#define XSIR1_IOREAD(_priv, _reg)					\
+	ioread16((_priv)->sir1_regs + _reg)
+
+#define XSIR1_IOREAD_BITS(_priv, _reg, _field)				\
+	GET_BITS(XSIR1_IOREAD((_priv), _reg),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XSIR1_IOWRITE(_priv, _reg, _val)				\
+	iowrite16((_val), (_priv)->sir1_regs + _reg)
+
+#define XSIR1_IOWRITE_BITS(_priv, _reg, _field, _val)			\
+do {									\
+	u16 reg_val = XSIR1_IOREAD((_priv), _reg);			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XSIR1_IOWRITE((_priv), _reg, reg_val);				\
+} while (0)
+
+
+/* Macros for reading or writing SerDes RxTx registers
+ *  The ioread macros will get bit fields or full values using the
+ *  register definitions formed using the input names
+ *
+ *  The iowrite macros will set bit fields or full values using the
+ *  register definitions formed using the input names
+ */
+#define XRXTX_IOREAD(_priv, _reg)					\
+	ioread16((_priv)->rxtx_regs + _reg)
+
+#define XRXTX_IOREAD_BITS(_priv, _reg, _field)				\
+	GET_BITS(XRXTX_IOREAD((_priv), _reg),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XRXTX_IOWRITE(_priv, _reg, _val)				\
+	iowrite16((_val), (_priv)->rxtx_regs + _reg)
+
+#define XRXTX_IOWRITE_BITS(_priv, _reg, _field, _val)			\
+do {									\
+	u16 reg_val = XRXTX_IOREAD((_priv), _reg);			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XRXTX_IOWRITE((_priv), _reg, reg_val);				\
+} while (0)
+
+
+enum amd_xgbe_phy_an {
+	AMD_XGBE_AN_READY = 0,
+	AMD_XGBE_AN_START,
+	AMD_XGBE_AN_EVENT,
+	AMD_XGBE_AN_PAGE_RECEIVED,
+	AMD_XGBE_AN_INCOMPAT_LINK,
+	AMD_XGBE_AN_COMPLETE,
+	AMD_XGBE_AN_NO_LINK,
+	AMD_XGBE_AN_EXIT,
+	AMD_XGBE_AN_ERROR,
+};
+
+enum amd_xgbe_phy_rx {
+	AMD_XGBE_RX_READY = 0,
+	AMD_XGBE_RX_BPA,
+	AMD_XGBE_RX_XNP,
+	AMD_XGBE_RX_COMPLETE,
+};
+
+enum amd_xgbe_phy_mode {
+	AMD_XGBE_MODE_KR,
+	AMD_XGBE_MODE_KX,
+};
+
+struct amd_xgbe_phy_priv {
+	struct platform_device *pdev;
+	struct device *dev;
+
+	struct phy_device *phydev;
+
+	/* SerDes related mmio resources */
+	struct resource *rxtx_res;
+	struct resource *sir0_res;
+	struct resource *sir1_res;
+
+	/* SerDes related mmio registers */
+	void __iomem *rxtx_regs;	/* SerDes Rx/Tx CSRs */
+	void __iomem *sir0_regs;	/* SerDes integration registers (1/2) */
+	void __iomem *sir1_regs;	/* SerDes integration registers (2/2) */
+
+	/* Maintain link status for re-starting auto-negotiation */
+	unsigned int link;
+	enum amd_xgbe_phy_mode mode;
+
+	/* Auto-negotiation state machine support */
+	struct mutex an_mutex;
+	enum amd_xgbe_phy_an an_result;
+	enum amd_xgbe_phy_an an_state;
+	enum amd_xgbe_phy_rx kr_state;
+	enum amd_xgbe_phy_rx kx_state;
+	struct work_struct an_work;
+	struct workqueue_struct *an_workqueue;
+};
+
+static int amd_xgbe_an_enable_kr_training(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	if (ret < 0)
+		return ret;
+
+	ret |= 0x02;
+	phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, ret);
+
+	return 0;
+}
+
+static int amd_xgbe_an_disable_kr_training(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~0x02;
+	phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, ret);
+
+	return 0;
+}
+
+static int amd_xgbe_phy_pcs_power_cycle(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret |= MDIO_CTRL1_LPOWER;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	usleep_range(75, 100);
+
+	ret &= ~MDIO_CTRL1_LPOWER;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	return 0;
+}
+
+static void amd_xgbe_phy_serdes_start_ratechange(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+
+	/* Assert Rx and Tx ratechange */
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, RATECHANGE, 1);
+}
+
+static void amd_xgbe_phy_serdes_complete_ratechange(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+
+	/* Release Rx and Tx ratechange */
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, RATECHANGE, 0);
+
+	/* Wait for Rx and Tx ready */
+	while (!XSIR0_IOREAD_BITS(priv, SIR0_STATUS, RX_READY) &&
+	       !XSIR0_IOREAD_BITS(priv, SIR0_STATUS, TX_READY))
+		usleep_range(10, 20);
+}
+
+static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ret;
+
+	/* Enable KR training */
+	ret = amd_xgbe_an_enable_kr_training(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set PCS to KR/10G speed */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_PCS_CTRL2_TYPE;
+	ret |= MDIO_PCS_CTRL2_10GBR;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_CTRL1_SPEEDSEL;
+	ret |= MDIO_CTRL1_SPEED10G;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = amd_xgbe_phy_pcs_power_cycle(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set SerDes to 10G speed */
+	amd_xgbe_phy_serdes_start_ratechange(phydev);
+
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_10000_RATE);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_10000_WORD);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, SPEED_10000_TXAMP);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_10000_PLL);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PI_SPD_SEL, SPEED_10000_CDR);
+
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, RXTX_10000_BLWC);
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, RXTX_10000_PQ);
+
+	amd_xgbe_phy_serdes_complete_ratechange(phydev);
+
+	priv->mode = AMD_XGBE_MODE_KR;
+
+	return 0;
+}
+
+static int amd_xgbe_phy_gmii_2500_mode(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ret;
+
+	/* Disable KR training */
+	ret = amd_xgbe_an_disable_kr_training(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set PCS to KX/1G speed */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_PCS_CTRL2_TYPE;
+	ret |= MDIO_PCS_CTRL2_10GBX;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_CTRL1_SPEEDSEL;
+	ret |= MDIO_CTRL1_SPEED1G;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = amd_xgbe_phy_pcs_power_cycle(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set SerDes to 2.5G speed */
+	amd_xgbe_phy_serdes_start_ratechange(phydev);
+
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_2500_RATE);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_2500_WORD);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, SPEED_2500_TXAMP);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_2500_PLL);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PI_SPD_SEL, SPEED_2500_CDR);
+
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, RXTX_2500_BLWC);
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, RXTX_2500_PQ);
+
+	amd_xgbe_phy_serdes_complete_ratechange(phydev);
+
+	priv->mode = AMD_XGBE_MODE_KX;
+
+	return 0;
+}
+
+static int amd_xgbe_phy_gmii_mode(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ret;
+
+	/* Disable KR training */
+	ret = amd_xgbe_an_disable_kr_training(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set PCS to KX/1G speed */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_PCS_CTRL2_TYPE;
+	ret |= MDIO_PCS_CTRL2_10GBX;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_CTRL1_SPEEDSEL;
+	ret |= MDIO_CTRL1_SPEED1G;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = amd_xgbe_phy_pcs_power_cycle(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Set SerDes to 1G speed */
+	amd_xgbe_phy_serdes_start_ratechange(phydev);
+
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_1000_RATE);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_1000_WORD);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, SPEED_1000_TXAMP);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_1000_PLL);
+	XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PI_SPD_SEL, SPEED_1000_CDR);
+
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, RXTX_1000_BLWC);
+	XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, RXTX_1000_PQ);
+
+	amd_xgbe_phy_serdes_complete_ratechange(phydev);
+
+	priv->mode = AMD_XGBE_MODE_KX;
+
+	return 0;
+}
+
+static int amd_xgbe_phy_switch_mode(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ret;
+
+	/* If we are in KR switch to KX, and vice-versa */
+	if (priv->mode == AMD_XGBE_MODE_KR)
+		ret = amd_xgbe_phy_gmii_mode(phydev);
+	else
+		ret = amd_xgbe_phy_xgmii_mode(phydev);
+
+	return ret;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_switch_mode(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = amd_xgbe_phy_switch_mode(phydev);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	return AMD_XGBE_AN_START;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_tx_training(struct phy_device *phydev,
+						    enum amd_xgbe_phy_rx *state)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ad_reg, lp_reg, ret;
+
+	*state = AMD_XGBE_RX_COMPLETE;
+
+	/* If we're in KX mode then we're done */
+	if (priv->mode == AMD_XGBE_MODE_KX)
+		return AMD_XGBE_AN_EVENT;
+
+	/* Enable/Disable FEC */
+	ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+	if (ad_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+	if (lp_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FEC_CTRL);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	if ((ad_reg & 0xc000) && (lp_reg & 0xc000))
+		ret |= 0x01;
+	else
+		ret &= ~0x01;
+
+	phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FEC_CTRL, ret);
+
+	/* Start KR training */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	ret |= 0x01;
+	phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, ret);
+
+	return AMD_XGBE_AN_EVENT;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_tx_xnp(struct phy_device *phydev,
+					       enum amd_xgbe_phy_rx *state)
+{
+	u16 msg;
+
+	*state = AMD_XGBE_RX_XNP;
+
+	msg = XNP_MCF_NULL_MESSAGE;
+	msg |= XNP_MP_FORMATTED;
+
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP + 2, 0);
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP + 1, 0);
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP, msg);
+
+	return AMD_XGBE_AN_EVENT;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_rx_bpa(struct phy_device *phydev,
+					       enum amd_xgbe_phy_rx *state)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	unsigned int link_support;
+	int ret, ad_reg, lp_reg;
+
+	/* Read Base Ability register 2 first */
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	/* Check for a supported mode, otherwise restart in a different one */
+	link_support = (priv->mode == AMD_XGBE_MODE_KR) ? 0x80 : 0x20;
+	if (!(ret & link_support))
+		return amd_xgbe_an_switch_mode(phydev);
+
+	/* Check Extended Next Page support */
+	ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+	if (ad_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
+	if (lp_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	return ((ad_reg & XNP_NP_EXCHANGE) || (lp_reg & XNP_NP_EXCHANGE)) ?
+	       amd_xgbe_an_tx_xnp(phydev, state) :
+	       amd_xgbe_an_tx_training(phydev, state);
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_rx_xnp(struct phy_device *phydev,
+					       enum amd_xgbe_phy_rx *state)
+{
+	int ad_reg, lp_reg;
+
+	/* Check Extended Next Page support */
+	ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+	if (ad_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
+	if (lp_reg < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	return ((ad_reg & XNP_NP_EXCHANGE) || (lp_reg & XNP_NP_EXCHANGE)) ?
+	       amd_xgbe_an_tx_xnp(phydev, state) :
+	       amd_xgbe_an_tx_training(phydev, state);
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_start(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	int ret;
+
+	/* Be sure we aren't looping trying to negotiate */
+	if (priv->mode == AMD_XGBE_MODE_KR) {
+		if (priv->kr_state != AMD_XGBE_RX_READY)
+			return AMD_XGBE_AN_NO_LINK;
+		priv->kr_state = AMD_XGBE_RX_BPA;
+	} else {
+		if (priv->kx_state != AMD_XGBE_RX_READY)
+			return AMD_XGBE_AN_NO_LINK;
+		priv->kx_state = AMD_XGBE_RX_BPA;
+	}
+
+	/* Set up Advertisement register 3 first */
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	if (phydev->supported & SUPPORTED_10000baseR_FEC)
+		ret |= 0xc000;
+	else
+		ret &= ~0xc000;
+
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2, ret);
+
+	/* Set up Advertisement register 2 next */
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	if (phydev->supported & SUPPORTED_10000baseKR_Full)
+		ret |= 0x80;
+	else
+		ret &= ~0x80;
+
+	if (phydev->supported & SUPPORTED_1000baseKX_Full)
+		ret |= 0x20;
+	else
+		ret &= ~0x20;
+
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1, ret);
+
+	/* Set up Advertisement register 1 last */
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	if (phydev->supported & SUPPORTED_Pause)
+		ret |= 0x400;
+	else
+		ret &= ~0x400;
+
+	if (phydev->supported & SUPPORTED_Asym_Pause)
+		ret |= 0x800;
+	else
+		ret &= ~0x800;
+
+	/* We don't intend to perform XNP */
+	ret &= ~XNP_NP_EXCHANGE;
+
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, ret);
+
+	/* Enable and start auto-negotiation */
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	ret |= MDIO_AN_CTRL1_ENABLE;
+	ret |= MDIO_AN_CTRL1_RESTART;
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, ret);
+
+	return AMD_XGBE_AN_EVENT;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_event(struct phy_device *phydev)
+{
+	enum amd_xgbe_phy_an new_state;
+	int ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT);
+	if (ret < 0)
+		return AMD_XGBE_AN_ERROR;
+
+	new_state = AMD_XGBE_AN_EVENT;
+	if (ret & XGBE_AN_PG_RCV)
+		new_state = AMD_XGBE_AN_PAGE_RECEIVED;
+	else if (ret & XGBE_AN_INC_LINK)
+		new_state = AMD_XGBE_AN_INCOMPAT_LINK;
+	else if (ret & XGBE_AN_INT_CMPLT)
+		new_state = AMD_XGBE_AN_COMPLETE;
+
+	if (new_state != AMD_XGBE_AN_EVENT)
+		phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0);
+
+	return new_state;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_page_received(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	enum amd_xgbe_phy_rx *state;
+	int ret;
+
+	state = (priv->mode == AMD_XGBE_MODE_KR) ? &priv->kr_state
+						 : &priv->kx_state;
+
+	switch (*state) {
+	case AMD_XGBE_RX_BPA:
+		ret = amd_xgbe_an_rx_bpa(phydev, state);
+		break;
+
+	case AMD_XGBE_RX_XNP:
+		ret = amd_xgbe_an_rx_xnp(phydev, state);
+		break;
+
+	default:
+		ret = AMD_XGBE_AN_ERROR;
+	}
+
+	return ret;
+}
+
+static enum amd_xgbe_phy_an amd_xgbe_an_incompat_link(struct phy_device *phydev)
+{
+	return amd_xgbe_an_switch_mode(phydev);
+}
+
+static void amd_xgbe_an_state_machine(struct work_struct *work)
+{
+	struct amd_xgbe_phy_priv *priv = container_of(work,
+						      struct amd_xgbe_phy_priv,
+						      an_work);
+	struct phy_device *phydev = priv->phydev;
+	enum amd_xgbe_phy_an cur_state;
+	int sleep;
+
+	while (1) {
+		mutex_lock(&priv->an_mutex);
+
+		cur_state = priv->an_state;
+
+		switch (priv->an_state) {
+		case AMD_XGBE_AN_START:
+			priv->an_state = amd_xgbe_an_start(phydev);
+			break;
+
+		case AMD_XGBE_AN_EVENT:
+			priv->an_state = amd_xgbe_an_event(phydev);
+			break;
+
+		case AMD_XGBE_AN_PAGE_RECEIVED:
+			priv->an_state = amd_xgbe_an_page_received(phydev);
+			break;
+
+		case AMD_XGBE_AN_INCOMPAT_LINK:
+			priv->an_state = amd_xgbe_an_incompat_link(phydev);
+			break;
+
+		case AMD_XGBE_AN_COMPLETE:
+		case AMD_XGBE_AN_NO_LINK:
+		case AMD_XGBE_AN_EXIT:
+			goto exit_unlock;
+
+		default:
+			priv->an_state = AMD_XGBE_AN_ERROR;
+		}
+
+		if (priv->an_state == AMD_XGBE_AN_ERROR) {
+			netdev_err(phydev->attached_dev,
+				   "error during auto-negotiation, state=%u\n",
+				   cur_state);
+			goto exit_unlock;
+		}
+
+		sleep = (priv->an_state == AMD_XGBE_AN_EVENT) ? 1 : 0;
+
+		mutex_unlock(&priv->an_mutex);
+
+		if (sleep)
+			usleep_range(20, 50);
+	}
+
+exit_unlock:
+	priv->an_result = priv->an_state;
+	priv->an_state = AMD_XGBE_AN_READY;
+
+	mutex_unlock(&priv->an_mutex);
+}
+
+static int amd_xgbe_phy_soft_reset(struct phy_device *phydev)
+{
+	int count, ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret |= MDIO_CTRL1_RESET;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	count = 50;
+	do {
+		msleep(20);
+		ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+		if (ret < 0)
+			return ret;
+	} while ((ret & MDIO_CTRL1_RESET) && --count);
+
+	if (ret & MDIO_CTRL1_RESET)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int amd_xgbe_phy_config_init(struct phy_device *phydev)
+{
+	/* Initialize supported features */
+	phydev->supported = SUPPORTED_Autoneg;
+	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	phydev->supported |= SUPPORTED_Backplane;
+	phydev->supported |= SUPPORTED_1000baseKX_Full |
+			     SUPPORTED_2500baseX_Full;
+	phydev->supported |= SUPPORTED_10000baseKR_Full |
+			     SUPPORTED_10000baseR_FEC;
+	phydev->advertising = phydev->supported;
+
+	/* Turn off and clear interrupts */
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0);
+
+	return 0;
+}
+
+static int amd_xgbe_phy_setup_forced(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Disable auto-negotiation */
+	ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~MDIO_AN_CTRL1_ENABLE;
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, ret);
+
+	/* Validate/Set specified speed */
+	switch (phydev->speed) {
+	case SPEED_10000:
+		ret = amd_xgbe_phy_xgmii_mode(phydev);
+		break;
+
+	case SPEED_2500:
+		ret = amd_xgbe_phy_gmii_2500_mode(phydev);
+		break;
+
+	case SPEED_1000:
+		ret = amd_xgbe_phy_gmii_mode(phydev);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	/* Validate duplex mode */
+	if (phydev->duplex != DUPLEX_FULL)
+		return -EINVAL;
+
+	phydev->pause = 0;
+	phydev->asym_pause = 0;
+
+	return 0;
+}
+
+static int amd_xgbe_phy_config_aneg(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	u32 mmd_mask = phydev->c45_ids.devices_in_package;
+	int ret;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return amd_xgbe_phy_setup_forced(phydev);
+
+	/* Make sure we have the AN MMD present */
+	if (!(mmd_mask & MDIO_DEVS_AN))
+		return -EINVAL;
+
+	/* Get the current speed mode */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (ret < 0)
+		return ret;
+
+	/* Start/Restart the auto-negotiation state machine */
+	mutex_lock(&priv->an_mutex);
+	priv->an_result = AMD_XGBE_AN_READY;
+	priv->an_state = AMD_XGBE_AN_START;
+	priv->kr_state = AMD_XGBE_RX_READY;
+	priv->kx_state = AMD_XGBE_RX_READY;
+	mutex_unlock(&priv->an_mutex);
+
+	queue_work(priv->an_workqueue, &priv->an_work);
+
+	return 0;
+}
+
+static int amd_xgbe_phy_aneg_done(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	enum amd_xgbe_phy_an state;
+
+	mutex_lock(&priv->an_mutex);
+	state = priv->an_result;
+	mutex_unlock(&priv->an_mutex);
+
+	return (state == AMD_XGBE_AN_COMPLETE);
+}
+
+static int amd_xgbe_phy_update_link(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	enum amd_xgbe_phy_an state;
+	unsigned int check_again, autoneg;
+	int ret;
+
+	/* If we're doing auto-negotiation don't report link down */
+	mutex_lock(&priv->an_mutex);
+	state = priv->an_state;
+	mutex_unlock(&priv->an_mutex);
+
+	if (state != AMD_XGBE_AN_READY) {
+		phydev->link = 1;
+		return 0;
+	}
+
+	/* Since the device can be in the wrong mode when a link is
+	 * (re-)established (cable connected after the interface is
+	 * up, etc.), the link status may report no link. If there
+	 * is no link, try switching modes and checking the status
+	 * again.
+	 */
+	check_again = 1;
+again:
+	/* Link status is latched low, so read once to clear
+	 * and then read again to get current state
+	 */
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if (ret < 0)
+		return ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if (ret < 0)
+		return ret;
+
+	phydev->link = (ret & MDIO_STAT1_LSTATUS) ? 1 : 0;
+
+	if (!phydev->link) {
+		ret = amd_xgbe_phy_switch_mode(phydev);
+		if (check_again) {
+			check_again = 0;
+			goto again;
+		}
+	}
+
+	autoneg = (phydev->link && !priv->link) ? 1 : 0;
+	priv->link = phydev->link;
+	if (autoneg) {
+		/* Link is (back) up, re-start auto-negotiation */
+		ret = amd_xgbe_phy_config_aneg(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int amd_xgbe_phy_read_status(struct phy_device *phydev)
+{
+	u32 mmd_mask = phydev->c45_ids.devices_in_package;
+	int ret, mode, ad_ret, lp_ret;
+
+	ret = amd_xgbe_phy_update_link(phydev);
+	if (ret)
+		return ret;
+
+	mode = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (mode < 0)
+		return mode;
+	mode &= MDIO_PCS_CTRL2_TYPE;
+
+	if (phydev->autoneg == AUTONEG_ENABLE) {
+		if (!(mmd_mask & MDIO_DEVS_AN))
+			return -EINVAL;
+
+		if (!amd_xgbe_phy_aneg_done(phydev))
+			return 0;
+
+		/* Compare Advertisement and Link Partner register 1 */
+		ad_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+		if (ad_ret < 0)
+			return ad_ret;
+		lp_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
+		if (lp_ret < 0)
+			return lp_ret;
+
+		ad_ret &= lp_ret;
+		phydev->pause = (ad_ret & 0x400) ? 1 : 0;
+		phydev->asym_pause = (ad_ret & 0x800) ? 1 : 0;
+
+		/* Compare Advertisement and Link Partner register 2 */
+		ad_ret = phy_read_mmd(phydev, MDIO_MMD_AN,
+				      MDIO_AN_ADVERTISE + 1);
+		if (ad_ret < 0)
+			return ad_ret;
+		lp_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+		if (lp_ret < 0)
+			return lp_ret;
+
+		ad_ret &= lp_ret;
+		if (ad_ret & 0x80) {
+			phydev->speed = SPEED_10000;
+			if (mode != MDIO_PCS_CTRL2_10GBR) {
+				ret = amd_xgbe_phy_xgmii_mode(phydev);
+				if (ret < 0)
+					return ret;
+			}
+		} else {
+			phydev->speed = SPEED_1000;
+			if (mode == MDIO_PCS_CTRL2_10GBR) {
+				ret = amd_xgbe_phy_gmii_mode(phydev);
+				if (ret < 0)
+					return ret;
+			}
+		}
+
+		phydev->duplex = DUPLEX_FULL;
+	} else {
+		phydev->speed = (mode == MDIO_PCS_CTRL2_10GBR) ? SPEED_10000
+							       : SPEED_1000;
+		phydev->duplex = DUPLEX_FULL;
+		phydev->pause = 0;
+		phydev->asym_pause = 0;
+	}
+
+	return 0;
+}
+
+static int amd_xgbe_phy_suspend(struct phy_device *phydev)
+{
+	int ret;
+
+	mutex_lock(&phydev->lock);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		goto unlock;
+
+	ret |= MDIO_CTRL1_LPOWER;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = 0;
+
+unlock:
+	mutex_unlock(&phydev->lock);
+
+	return ret;
+}
+
+static int amd_xgbe_phy_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	mutex_lock(&phydev->lock);
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (ret < 0)
+		goto unlock;
+
+	ret &= ~MDIO_CTRL1_LPOWER;
+	phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = 0;
+
+unlock:
+	mutex_unlock(&phydev->lock);
+
+	return ret;
+}
+
+static int amd_xgbe_phy_probe(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv;
+	struct platform_device *pdev;
+	struct device *dev;
+	char *wq_name;
+	int ret;
+
+	if (!phydev->dev.of_node)
+		return -EINVAL;
+
+	pdev = of_find_device_by_node(phydev->dev.of_node);
+	if (!pdev)
+		return -EINVAL;
+	dev = &pdev->dev;
+
+	wq_name = kasprintf(GFP_KERNEL, "%s-amd-xgbe-phy", phydev->bus->name);
+	if (!wq_name) {
+		ret = -ENOMEM;
+		goto err_pdev;
+	}
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto err_name;
+	}
+
+	priv->pdev = pdev;
+	priv->dev = dev;
+	priv->phydev = phydev;
+
+	/* Get the device mmio areas */
+	priv->rxtx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->rxtx_regs = devm_ioremap_resource(dev, priv->rxtx_res);
+	if (IS_ERR(priv->rxtx_regs)) {
+		dev_err(dev, "rxtx ioremap failed\n");
+		ret = PTR_ERR(priv->rxtx_regs);
+		goto err_priv;
+	}
+
+	priv->sir0_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->sir0_regs = devm_ioremap_resource(dev, priv->sir0_res);
+	if (IS_ERR(priv->sir0_regs)) {
+		dev_err(dev, "sir0 ioremap failed\n");
+		ret = PTR_ERR(priv->sir0_regs);
+		goto err_rxtx;
+	}
+
+	priv->sir1_res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	priv->sir1_regs = devm_ioremap_resource(dev, priv->sir1_res);
+	if (IS_ERR(priv->sir1_regs)) {
+		dev_err(dev, "sir1 ioremap failed\n");
+		ret = PTR_ERR(priv->sir1_regs);
+		goto err_sir0;
+	}
+
+	priv->link = 1;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2);
+	if (ret < 0)
+		goto err_sir1;
+	if ((ret & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR)
+		priv->mode = AMD_XGBE_MODE_KR;
+	else
+		priv->mode = AMD_XGBE_MODE_KX;
+
+	mutex_init(&priv->an_mutex);
+	INIT_WORK(&priv->an_work, amd_xgbe_an_state_machine);
+	priv->an_workqueue = create_singlethread_workqueue(wq_name);
+	if (!priv->an_workqueue) {
+		ret = -ENOMEM;
+		goto err_sir1;
+	}
+
+	phydev->priv = priv;
+
+	kfree(wq_name);
+	of_dev_put(pdev);
+
+	return 0;
+
+err_sir1:
+	devm_iounmap(dev, priv->sir1_regs);
+	devm_release_mem_region(dev, priv->sir1_res->start,
+				resource_size(priv->sir1_res));
+
+err_sir0:
+	devm_iounmap(dev, priv->sir0_regs);
+	devm_release_mem_region(dev, priv->sir0_res->start,
+				resource_size(priv->sir0_res));
+
+err_rxtx:
+	devm_iounmap(dev, priv->rxtx_regs);
+	devm_release_mem_region(dev, priv->rxtx_res->start,
+				resource_size(priv->rxtx_res));
+
+err_priv:
+	devm_kfree(dev, priv);
+
+err_name:
+	kfree(wq_name);
+
+err_pdev:
+	of_dev_put(pdev);
+
+	return ret;
+}
+
+static void amd_xgbe_phy_remove(struct phy_device *phydev)
+{
+	struct amd_xgbe_phy_priv *priv = phydev->priv;
+	struct device *dev = priv->dev;
+
+	/* Stop any in process auto-negotiation */
+	mutex_lock(&priv->an_mutex);
+	priv->an_state = AMD_XGBE_AN_EXIT;
+	mutex_unlock(&priv->an_mutex);
+
+	flush_workqueue(priv->an_workqueue);
+	destroy_workqueue(priv->an_workqueue);
+
+	/* Release resources */
+	devm_iounmap(dev, priv->sir1_regs);
+	devm_release_mem_region(dev, priv->sir1_res->start,
+				resource_size(priv->sir1_res));
+
+	devm_iounmap(dev, priv->sir0_regs);
+	devm_release_mem_region(dev, priv->sir0_res->start,
+				resource_size(priv->sir0_res));
+
+	devm_iounmap(dev, priv->rxtx_regs);
+	devm_release_mem_region(dev, priv->rxtx_res->start,
+				resource_size(priv->rxtx_res));
+
+	devm_kfree(dev, priv);
+}
+
+static int amd_xgbe_match_phy_device(struct phy_device *phydev)
+{
+	return phydev->c45_ids.device_ids[MDIO_MMD_PCS] == XGBE_PHY_ID;
+}
+
+static struct phy_driver amd_xgbe_phy_driver[] = {
+	{
+		.phy_id			= XGBE_PHY_ID,
+		.phy_id_mask		= XGBE_PHY_MASK,
+		.name			= "AMD XGBE PHY",
+		.features		= 0,
+		.probe			= amd_xgbe_phy_probe,
+		.remove			= amd_xgbe_phy_remove,
+		.soft_reset		= amd_xgbe_phy_soft_reset,
+		.config_init		= amd_xgbe_phy_config_init,
+		.suspend		= amd_xgbe_phy_suspend,
+		.resume			= amd_xgbe_phy_resume,
+		.config_aneg		= amd_xgbe_phy_config_aneg,
+		.aneg_done		= amd_xgbe_phy_aneg_done,
+		.read_status		= amd_xgbe_phy_read_status,
+		.match_phy_device	= amd_xgbe_match_phy_device,
+		.driver			= {
+			.owner = THIS_MODULE,
+		},
+	},
+};
+
+static int __init amd_xgbe_phy_init(void)
+{
+	return phy_drivers_register(amd_xgbe_phy_driver,
+				    ARRAY_SIZE(amd_xgbe_phy_driver));
+}
+
+static void __exit amd_xgbe_phy_exit(void)
+{
+	phy_drivers_unregister(amd_xgbe_phy_driver,
+			       ARRAY_SIZE(amd_xgbe_phy_driver));
+}
+
+module_init(amd_xgbe_phy_init);
+module_exit(amd_xgbe_phy_exit);
+
+static struct mdio_device_id __maybe_unused amd_xgbe_phy_ids[] = {
+	{ XGBE_PHY_ID, XGBE_PHY_MASK },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, amd_xgbe_phy_ids);

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 643464d..6c622ae 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c

@@ -144,41 +144,11 @@
 
 static int at803x_config_init(struct phy_device *phydev)
 {
-	int val;
 	int ret;
-	u32 features;
 
-	features = SUPPORTED_TP | SUPPORTED_MII | SUPPORTED_AUI |
-		   SUPPORTED_FIBRE | SUPPORTED_BNC;
-
-	val = phy_read(phydev, MII_BMSR);
-	if (val < 0)
-		return val;
-
-	if (val & BMSR_ANEGCAPABLE)
-		features |= SUPPORTED_Autoneg;
-	if (val & BMSR_100FULL)
-		features |= SUPPORTED_100baseT_Full;
-	if (val & BMSR_100HALF)
-		features |= SUPPORTED_100baseT_Half;
-	if (val & BMSR_10FULL)
-		features |= SUPPORTED_10baseT_Full;
-	if (val & BMSR_10HALF)
-		features |= SUPPORTED_10baseT_Half;
-
-	if (val & BMSR_ESTATEN) {
-		val = phy_read(phydev, MII_ESTATUS);
-		if (val < 0)
-			return val;
-
-		if (val & ESTATUS_1000_TFULL)
-			features |= SUPPORTED_1000baseT_Full;
-		if (val & ESTATUS_1000_THALF)
-			features |= SUPPORTED_1000baseT_Half;
-	}
-
-	phydev->supported = features;
-	phydev->advertising = features;
+	ret = genphy_config_init(phydev);
+	if (ret < 0)
+		return ret;
 
 	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
 		ret = phy_write(phydev, AT803X_DEBUG_ADDR,
@@ -283,8 +253,7 @@
 
 static void __exit atheros_exit(void)
 {
-	return phy_drivers_unregister(at803x_driver,
-				      ARRAY_SIZE(at803x_driver));
+	phy_drivers_unregister(at803x_driver, ARRAY_SIZE(at803x_driver));
 }
 
 module_init(atheros_init);

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index ba55adf..d60d875 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c

@@ -21,6 +21,7 @@
 #include <linux/phy_fixed.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #define MII_REGS_NUM 29
 
@@ -31,7 +32,7 @@
 };
 
 struct fixed_phy {
-	int id;
+	int addr;
 	u16 regs[MII_REGS_NUM];
 	struct phy_device *phydev;
 	struct fixed_phy_status status;
@@ -104,8 +105,8 @@
 	if (fp->status.asym_pause)
 		lpa |= LPA_PAUSE_ASYM;
 
-	fp->regs[MII_PHYSID1] = fp->id >> 16;
-	fp->regs[MII_PHYSID2] = fp->id;
+	fp->regs[MII_PHYSID1] = 0;
+	fp->regs[MII_PHYSID2] = 0;
 
 	fp->regs[MII_BMSR] = bmsr;
 	fp->regs[MII_BMCR] = bmcr;
@@ -115,7 +116,7 @@
 	return 0;
 }
 
-static int fixed_mdio_read(struct mii_bus *bus, int phy_id, int reg_num)
+static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
 {
 	struct fixed_mdio_bus *fmb = bus->priv;
 	struct fixed_phy *fp;
@@ -124,7 +125,7 @@
 		return -1;
 
 	list_for_each_entry(fp, &fmb->phys, node) {
-		if (fp->id == phy_id) {
+		if (fp->addr == phy_addr) {
 			/* Issue callback if user registered it. */
 			if (fp->link_update) {
 				fp->link_update(fp->phydev->attached_dev,
@@ -138,7 +139,7 @@
 	return 0xFFFF;
 }
 
-static int fixed_mdio_write(struct mii_bus *bus, int phy_id, int reg_num,
+static int fixed_mdio_write(struct mii_bus *bus, int phy_addr, int reg_num,
 			    u16 val)
 {
 	return 0;
@@ -160,7 +161,7 @@
 		return -EINVAL;
 
 	list_for_each_entry(fp, &fmb->phys, node) {
-		if (fp->id == phydev->phy_id) {
+		if (fp->addr == phydev->addr) {
 			fp->link_update = link_update;
 			fp->phydev = phydev;
 			return 0;
@@ -171,7 +172,7 @@
 }
 EXPORT_SYMBOL_GPL(fixed_phy_set_link_update);
 
-int fixed_phy_add(unsigned int irq, int phy_id,
+int fixed_phy_add(unsigned int irq, int phy_addr,
 		  struct fixed_phy_status *status)
 {
 	int ret;
@@ -184,9 +185,9 @@
 
 	memset(fp->regs, 0xFF,  sizeof(fp->regs[0]) * MII_REGS_NUM);
 
-	fmb->irqs[phy_id] = irq;
+	fmb->irqs[phy_addr] = irq;
 
-	fp->id = phy_id;
+	fp->addr = phy_addr;
 	fp->status = *status;
 
 	ret = fixed_phy_update_regs(fp);
@@ -203,6 +204,66 @@
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+void fixed_phy_del(int phy_addr)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp, *tmp;
+
+	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
+		if (fp->addr == phy_addr) {
+			list_del(&fp->node);
+			kfree(fp);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(fixed_phy_del);
+
+static int phy_fixed_addr;
+static DEFINE_SPINLOCK(phy_fixed_addr_lock);
+
+int fixed_phy_register(unsigned int irq,
+		       struct fixed_phy_status *status,
+		       struct device_node *np)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct phy_device *phy;
+	int phy_addr;
+	int ret;
+
+	/* Get the next available PHY address, up to PHY_MAX_ADDR */
+	spin_lock(&phy_fixed_addr_lock);
+	if (phy_fixed_addr == PHY_MAX_ADDR) {
+		spin_unlock(&phy_fixed_addr_lock);
+		return -ENOSPC;
+	}
+	phy_addr = phy_fixed_addr++;
+	spin_unlock(&phy_fixed_addr_lock);
+
+	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
+	if (ret < 0)
+		return ret;
+
+	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
+	if (!phy || IS_ERR(phy)) {
+		fixed_phy_del(phy_addr);
+		return -EINVAL;
+	}
+
+	of_node_get(np);
+	phy->dev.of_node = np;
+
+	ret = phy_device_register(phy);
+	if (ret) {
+		phy_device_free(phy);
+		of_node_put(np);
+		fixed_phy_del(phy_addr);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int __init fixed_mdio_bus_init(void)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 76f54b3..2e58aa5 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c

@@ -69,6 +69,73 @@
 }
 EXPORT_SYMBOL(mdiobus_alloc_size);
 
+static void _devm_mdiobus_free(struct device *dev, void *res)
+{
+	mdiobus_free(*(struct mii_bus **)res);
+}
+
+static int devm_mdiobus_match(struct device *dev, void *res, void *data)
+{
+	struct mii_bus **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size()
+ * @dev:		Device to allocate mii_bus for
+ * @sizeof_priv:	Space to allocate for private structure.
+ *
+ * Managed mdiobus_alloc_size. mii_bus allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * If an mii_bus allocated with this function needs to be freed separately,
+ * devm_mdiobus_free() must be used.
+ *
+ * RETURNS:
+ * Pointer to allocated mii_bus on success, NULL on failure.
+ */
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
+{
+	struct mii_bus **ptr, *bus;
+
+	ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	/* use raw alloc_dr for kmalloc caller tracing */
+	bus = mdiobus_alloc_size(sizeof_priv);
+	if (bus) {
+		*ptr = bus;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_alloc_size);
+
+/**
+ * devm_mdiobus_free - Resource-managed mdiobus_free()
+ * @dev:		Device this mii_bus belongs to
+ * @bus:		the mii_bus associated with the device
+ *
+ * Free mii_bus allocated with devm_mdiobus_alloc_size().
+ */
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus)
+{
+	int rc;
+
+	rc = devres_release(dev, _devm_mdiobus_free,
+			    devm_mdiobus_match, bus);
+	WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_free);
+
 /**
  * mdiobus_release - mii_bus device release callback
  * @d: the target struct device that contains the mii_bus
@@ -233,6 +300,12 @@
 	if (IS_ERR(phydev) || phydev == NULL)
 		return phydev;
 
+	/*
+	 * For DT, see if the auto-probed phy has a correspoding child
+	 * in the bus node, and set the of_node pointer in this case.
+	 */
+	of_mdiobus_link_phydev(bus, phydev);
+
 	err = phy_device_register(phydev);
 	if (err) {
 		phy_device_free(phydev);

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index d849684..bc7c7d2 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c

@@ -283,6 +283,110 @@
 	return 0;
 }
 
+#define MII_KSZ9031RN_MMD_CTRL_REG	0x0d
+#define MII_KSZ9031RN_MMD_REGDATA_REG	0x0e
+#define OP_DATA				1
+#define KSZ9031_PS_TO_REG		60
+
+/* Extended registers */
+#define MII_KSZ9031RN_CONTROL_PAD_SKEW	4
+#define MII_KSZ9031RN_RX_DATA_PAD_SKEW	5
+#define MII_KSZ9031RN_TX_DATA_PAD_SKEW	6
+#define MII_KSZ9031RN_CLK_PAD_SKEW	8
+
+static int ksz9031_extended_write(struct phy_device *phydev,
+				  u8 mode, u32 dev_addr, u32 regnum, u16 val)
+{
+	phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr);
+	phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum);
+	phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr);
+	return phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, val);
+}
+
+static int ksz9031_extended_read(struct phy_device *phydev,
+				 u8 mode, u32 dev_addr, u32 regnum)
+{
+	phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr);
+	phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum);
+	phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr);
+	return phy_read(phydev, MII_KSZ9031RN_MMD_REGDATA_REG);
+}
+
+static int ksz9031_of_load_skew_values(struct phy_device *phydev,
+				       struct device_node *of_node,
+				       u16 reg, size_t field_sz,
+				       char *field[], u8 numfields)
+{
+	int val[4] = {-1, -2, -3, -4};
+	int matches = 0;
+	u16 mask;
+	u16 maxval;
+	u16 newval;
+	int i;
+
+	for (i = 0; i < numfields; i++)
+		if (!of_property_read_u32(of_node, field[i], val + i))
+			matches++;
+
+	if (!matches)
+		return 0;
+
+	if (matches < numfields)
+		newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg);
+	else
+		newval = 0;
+
+	maxval = (field_sz == 4) ? 0xf : 0x1f;
+	for (i = 0; i < numfields; i++)
+		if (val[i] != -(i + 1)) {
+			mask = 0xffff;
+			mask ^= maxval << (field_sz * i);
+			newval = (newval & mask) |
+				(((val[i] / KSZ9031_PS_TO_REG) & maxval)
+					<< (field_sz * i));
+		}
+
+	return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
+}
+
+static int ksz9031_config_init(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct device_node *of_node = dev->of_node;
+	char *clk_skews[2] = {"rxc-skew-ps", "txc-skew-ps"};
+	char *rx_data_skews[4] = {
+		"rxd0-skew-ps", "rxd1-skew-ps",
+		"rxd2-skew-ps", "rxd3-skew-ps"
+	};
+	char *tx_data_skews[4] = {
+		"txd0-skew-ps", "txd1-skew-ps",
+		"txd2-skew-ps", "txd3-skew-ps"
+	};
+	char *control_skews[2] = {"txen-skew-ps", "rxdv-skew-ps"};
+
+	if (!of_node && dev->parent->of_node)
+		of_node = dev->parent->of_node;
+
+	if (of_node) {
+		ksz9031_of_load_skew_values(phydev, of_node,
+				MII_KSZ9031RN_CLK_PAD_SKEW, 5,
+				clk_skews, 2);
+
+		ksz9031_of_load_skew_values(phydev, of_node,
+				MII_KSZ9031RN_CONTROL_PAD_SKEW, 4,
+				control_skews, 2);
+
+		ksz9031_of_load_skew_values(phydev, of_node,
+				MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4,
+				rx_data_skews, 4);
+
+		ksz9031_of_load_skew_values(phydev, of_node,
+				MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
+				tx_data_skews, 4);
+	}
+	return 0;
+}
+
 #define KSZ8873MLL_GLOBAL_CONTROL_4	0x06
 #define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX	(1 << 6)
 #define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED	(1 << 4)
@@ -469,7 +573,7 @@
 	.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
-	.config_init	= kszphy_config_init,
+	.config_init	= ksz9031_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 4987a1c..35d753d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c

@@ -33,6 +33,7 @@
 #include <linux/mdio.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
+#include <linux/of.h>
 
 #include <asm/irq.h>
 
@@ -1067,14 +1068,11 @@
 }
 EXPORT_SYMBOL(genphy_soft_reset);
 
-static int genphy_config_init(struct phy_device *phydev)
+int genphy_config_init(struct phy_device *phydev)
 {
 	int val;
 	u32 features;
 
-	/* For now, I'll claim that the generic driver supports
-	 * all possible port types
-	 */
 	features = (SUPPORTED_TP | SUPPORTED_MII
 			| SUPPORTED_AUI | SUPPORTED_FIBRE |
 			SUPPORTED_BNC);
@@ -1107,8 +1105,8 @@
 			features |= SUPPORTED_1000baseT_Half;
 	}
 
-	phydev->supported = features;
-	phydev->advertising = features;
+	phydev->supported &= features;
+	phydev->advertising &= features;
 
 	return 0;
 }
@@ -1118,6 +1116,7 @@
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL(genphy_config_init);
 
 static int gen10g_config_init(struct phy_device *phydev)
 {
@@ -1168,6 +1167,38 @@
 	return 0;
 }
 
+static void of_set_phy_supported(struct phy_device *phydev)
+{
+	struct device_node *node = phydev->dev.of_node;
+	u32 max_speed;
+
+	if (!IS_ENABLED(CONFIG_OF_MDIO))
+		return;
+
+	if (!node)
+		return;
+
+	if (!of_property_read_u32(node, "max-speed", &max_speed)) {
+		/* The default values for phydev->supported are provided by the PHY
+		 * driver "features" member, we want to reset to sane defaults fist
+		 * before supporting higher speeds.
+		 */
+		phydev->supported &= PHY_DEFAULT_FEATURES;
+
+		switch (max_speed) {
+		default:
+			return;
+
+		case SPEED_1000:
+			phydev->supported |= PHY_1000BT_FEATURES;
+		case SPEED_100:
+			phydev->supported |= PHY_100BT_FEATURES;
+		case SPEED_10:
+			phydev->supported |= PHY_10BT_FEATURES;
+		}
+	}
+}
+
 /**
  * phy_probe - probe and init a PHY device
  * @dev: device to probe and init
@@ -1202,7 +1233,8 @@
 	 * or both of these values
 	 */
 	phydev->supported = phydrv->features;
-	phydev->advertising = phydrv->features;
+	of_set_phy_supported(phydev);
+	phydev->advertising = phydev->supported;
 
 	/* Set the state to READY by default */
 	phydev->state = PHY_READY;
@@ -1295,7 +1327,9 @@
 	.name		= "Generic PHY",
 	.soft_reset	= genphy_soft_reset,
 	.config_init	= genphy_config_init,
-	.features	= 0,
+	.features	= PHY_GBIT_FEATURES | SUPPORTED_MII |
+			  SUPPORTED_AUI | SUPPORTED_FIBRE |
+			  SUPPORTED_BNC,
 	.config_aneg	= genphy_config_aneg,
 	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index fa1d69a..45483fd 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c

@@ -64,65 +64,51 @@
 	return err;
 }
 
-/* RTL8201CP */
-static struct phy_driver rtl8201cp_driver = {
-	.phy_id         = 0x00008201,
-	.name           = "RTL8201CP Ethernet",
-	.phy_id_mask    = 0x0000ffff,
-	.features       = PHY_BASIC_FEATURES,
-	.flags          = PHY_HAS_INTERRUPT,
-	.config_aneg    = &genphy_config_aneg,
-	.read_status    = &genphy_read_status,
-	.driver         = { .owner = THIS_MODULE,},
-};
-
-/* RTL8211B */
-static struct phy_driver rtl8211b_driver = {
-	.phy_id		= 0x001cc912,
-	.name		= "RTL8211B Gigabit Ethernet",
-	.phy_id_mask	= 0x001fffff,
-	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT,
-	.config_aneg	= &genphy_config_aneg,
-	.read_status	= &genphy_read_status,
-	.ack_interrupt	= &rtl821x_ack_interrupt,
-	.config_intr	= &rtl8211b_config_intr,
-	.driver		= { .owner = THIS_MODULE,},
-};
-
-/* RTL8211E */
-static struct phy_driver rtl8211e_driver = {
-	.phy_id		= 0x001cc915,
-	.name		= "RTL8211E Gigabit Ethernet",
-	.phy_id_mask	= 0x001fffff,
-	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT,
-	.config_aneg	= &genphy_config_aneg,
-	.read_status	= &genphy_read_status,
-	.ack_interrupt	= &rtl821x_ack_interrupt,
-	.config_intr	= &rtl8211e_config_intr,
-	.suspend	= genphy_suspend,
-	.resume		= genphy_resume,
-	.driver		= { .owner = THIS_MODULE,},
+static struct phy_driver realtek_drvs[] = {
+	{
+		.phy_id         = 0x00008201,
+		.name           = "RTL8201CP Ethernet",
+		.phy_id_mask    = 0x0000ffff,
+		.features       = PHY_BASIC_FEATURES,
+		.flags          = PHY_HAS_INTERRUPT,
+		.config_aneg    = &genphy_config_aneg,
+		.read_status    = &genphy_read_status,
+		.driver         = { .owner = THIS_MODULE,},
+	}, {
+		.phy_id		= 0x001cc912,
+		.name		= "RTL8211B Gigabit Ethernet",
+		.phy_id_mask	= 0x001fffff,
+		.features	= PHY_GBIT_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_aneg	= &genphy_config_aneg,
+		.read_status	= &genphy_read_status,
+		.ack_interrupt	= &rtl821x_ack_interrupt,
+		.config_intr	= &rtl8211b_config_intr,
+		.driver		= { .owner = THIS_MODULE,},
+	}, {
+		.phy_id		= 0x001cc915,
+		.name		= "RTL8211E Gigabit Ethernet",
+		.phy_id_mask	= 0x001fffff,
+		.features	= PHY_GBIT_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_aneg	= &genphy_config_aneg,
+		.read_status	= &genphy_read_status,
+		.ack_interrupt	= &rtl821x_ack_interrupt,
+		.config_intr	= &rtl8211e_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.driver		= { .owner = THIS_MODULE,},
+	},
 };
 
 static int __init realtek_init(void)
 {
-	int ret;
-
-	ret = phy_driver_register(&rtl8201cp_driver);
-	if (ret < 0)
-		return -ENODEV;
-	ret = phy_driver_register(&rtl8211b_driver);
-	if (ret < 0)
-		return -ENODEV;
-	return phy_driver_register(&rtl8211e_driver);
+	return phy_drivers_register(realtek_drvs, ARRAY_SIZE(realtek_drvs));
 }
 
 static void __exit realtek_exit(void)
 {
-	phy_driver_unregister(&rtl8211b_driver);
-	phy_driver_unregister(&rtl8211e_driver);
+	phy_drivers_unregister(realtek_drvs, ARRAY_SIZE(realtek_drvs));
 }
 
 module_init(realtek_init);

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 11f3481..180c494 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c

@@ -249,8 +249,7 @@
 
 static void __exit smsc_exit(void)
 {
-	return phy_drivers_unregister(smsc_phy_driver,
-		ARRAY_SIZE(smsc_phy_driver));
+	phy_drivers_unregister(smsc_phy_driver, ARRAY_SIZE(smsc_phy_driver));
 }
 
 MODULE_DESCRIPTION("SMSC PHY driver");

diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 14372c6..5dc0935d 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c

@@ -319,8 +319,7 @@
 
 static void __exit vsc82xx_exit(void)
 {
-	return phy_drivers_unregister(vsc82xx_driver,
-		ARRAY_SIZE(vsc82xx_driver));
+	phy_drivers_unregister(vsc82xx_driver, ARRAY_SIZE(vsc82xx_driver));
 }
 
 module_init(vsc82xx_init);

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e3923eb..91d6c12 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c

@@ -757,7 +757,7 @@
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};
@@ -778,7 +778,7 @@
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 0180531..1aff970 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c

@@ -281,7 +281,7 @@
 	nf_reset(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	ip_local_out(skb);

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index a849718..dac7a0d 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c

@@ -494,7 +494,7 @@
 	ndev->mtu = RIO_MAX_MSG_SIZE - 14;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
-	SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops);
+	ndev->ethtool_ops = &rionet_ethtool_ops;
 
 	spin_lock_init(&rnet->lock);
 	spin_lock_init(&rnet->tx_lock);

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ce4989b..b4958c7 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c

@@ -968,7 +968,7 @@
 static void __team_compute_features(struct team *team)
 {
 	struct team_port *port;
-	u32 vlan_features = TEAM_VLAN_FEATURES;
+	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
 	unsigned short max_hard_header_len = ETH_HLEN;
 	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
 

diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index dbde341..a58dfeb 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c

@@ -49,7 +49,7 @@
 struct lb_priv_ex {
 	struct team *team;
 	struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE];
-	struct sock_fprog *orig_fprog;
+	struct sock_fprog_kern *orig_fprog;
 	struct {
 		unsigned int refresh_interval; /* in tenths of second */
 		struct delayed_work refresh_dw;
@@ -241,15 +241,15 @@
 	return 0;
 }
 
-static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
+static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len,
 			  const void *data)
 {
-	struct sock_fprog *fprog;
+	struct sock_fprog_kern *fprog;
 	struct sock_filter *filter = (struct sock_filter *) data;
 
 	if (data_len % sizeof(struct sock_filter))
 		return -EINVAL;
-	fprog = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
+	fprog = kmalloc(sizeof(*fprog), GFP_KERNEL);
 	if (!fprog)
 		return -ENOMEM;
 	fprog->filter = kmemdup(filter, data_len, GFP_KERNEL);
@@ -262,7 +262,7 @@
 	return 0;
 }
 
-static void __fprog_destroy(struct sock_fprog *fprog)
+static void __fprog_destroy(struct sock_fprog_kern *fprog)
 {
 	kfree(fprog->filter);
 	kfree(fprog);
@@ -273,7 +273,7 @@
 	struct lb_priv *lb_priv = get_lb_priv(team);
 	struct sk_filter *fp = NULL;
 	struct sk_filter *orig_fp;
-	struct sock_fprog *fprog = NULL;
+	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
 	if (ctx->data.bin_val.len) {

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..98bad1f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -498,12 +498,12 @@
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
 		BUG_ON(!tfile);
-		wake_up_all(&tfile->wq.wait);
+		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
 		RCU_INIT_POINTER(tfile->tun, NULL);
 		--tun->numqueues;
 	}
 	list_for_each_entry(tfile, &tun->disabled, next) {
-		wake_up_all(&tfile->wq.wait);
+		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
 		RCU_INIT_POINTER(tfile->tun, NULL);
 	}
 	BUG_ON(tun->numqueues != 0);
@@ -807,8 +807,7 @@
 	/* Notify and wake up reader process */
 	if (tfile->flags & TUN_FASYNC)
 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
-				   POLLRDNORM | POLLRDBAND);
+	tfile->socket.sk->sk_data_ready(tfile->socket.sk);
 
 	rcu_read_unlock();
 	return NETDEV_TX_OK;
@@ -965,7 +964,7 @@
 
 	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
 
-	poll_wait(file, &tfile->wq.wait, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
@@ -1330,47 +1329,26 @@
 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			   const struct iovec *iv, ssize_t len, int noblock)
 {
-	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t ret = 0;
+	int peeked, err, off = 0;
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
-	if (unlikely(!noblock))
-		add_wait_queue(&tfile->wq.wait, &wait);
-	while (len) {
-		if (unlikely(!noblock))
-			current->state = TASK_INTERRUPTIBLE;
+	if (!len)
+		return ret;
 
-		/* Read frames from the queue */
-		if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) {
-			if (noblock) {
-				ret = -EAGAIN;
-				break;
-			}
-			if (signal_pending(current)) {
-				ret = -ERESTARTSYS;
-				break;
-			}
-			if (tun->dev->reg_state != NETREG_REGISTERED) {
-				ret = -EIO;
-				break;
-			}
+	if (tun->dev->reg_state != NETREG_REGISTERED)
+		return -EIO;
 
-			/* Nothing to read, let's sleep */
-			schedule();
-			continue;
-		}
-
+	/* Read frames from queue */
+	skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
+				  &peeked, &off, &err);
+	if (skb) {
 		ret = tun_put_user(tun, tfile, skb, iv, len);
 		kfree_skb(skb);
-		break;
-	}
-
-	if (unlikely(!noblock)) {
-		current->state = TASK_RUNNING;
-		remove_wait_queue(&tfile->wq.wait, &wait);
-	}
+	} else
+		ret = err;
 
 	return ret;
 }
@@ -2199,8 +2177,8 @@
 	tfile->flags = 0;
 	tfile->ifindex = 0;
 
-	rcu_assign_pointer(tfile->socket.wq, &tfile->wq);
 	init_waitqueue_head(&tfile->wq.wait);
+	RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
 
 	tfile->socket.file = file;
 	tfile->socket.ops = &tun_socket_ops;

diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 630caf4..8cfc3bb 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c

@@ -793,7 +793,7 @@
 
 	netdev->netdev_ops = &catc_netdev_ops;
 	netdev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	catc->usbdev = usbdev;
 	catc->netdev = netdev;

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 2e025dd..5ee7a1d 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c

@@ -24,13 +24,21 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 
+/* alternative VLAN for IP session 0 if not untagged */
+#define MBIM_IPS0_VID	4094
+
 /* driver specific data - must match cdc_ncm usage */
 struct cdc_mbim_state {
 	struct cdc_ncm_ctx *ctx;
 	atomic_t pmcount;
 	struct usb_driver *subdriver;
-	struct usb_interface *control;
-	struct usb_interface *data;
+	unsigned long _unused;
+	unsigned long flags;
+};
+
+/* flags for the cdc_mbim_state.flags field */
+enum cdc_mbim_flags {
+	FLAG_IPS0_VLAN = 1 << 0,	/* IP session 0 is tagged  */
 };
 
 /* using a counter to merge subdriver requests with our own into a combined state */
@@ -62,16 +70,91 @@
 	return cdc_mbim_manage_power(dev, status);
 }
 
+static int cdc_mbim_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_mbim_state *info = (void *)&dev->data;
+
+	/* creation of this VLAN is a request to tag IP session 0 */
+	if (vid == MBIM_IPS0_VID)
+		info->flags |= FLAG_IPS0_VLAN;
+	else
+		if (vid >= 512)	/* we don't map these to MBIM session */
+			return -EINVAL;
+	return 0;
+}
+
+static int cdc_mbim_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_mbim_state *info = (void *)&dev->data;
+
+	/* this is a request for an untagged IP session 0 */
+	if (vid == MBIM_IPS0_VID)
+		info->flags &= ~FLAG_IPS0_VLAN;
+	return 0;
+}
+
+static const struct net_device_ops cdc_mbim_netdev_ops = {
+	.ndo_open             = usbnet_open,
+	.ndo_stop             = usbnet_stop,
+	.ndo_start_xmit       = usbnet_start_xmit,
+	.ndo_tx_timeout       = usbnet_tx_timeout,
+	.ndo_change_mtu       = usbnet_change_mtu,
+	.ndo_set_mac_address  = eth_mac_addr,
+	.ndo_validate_addr    = eth_validate_addr,
+	.ndo_vlan_rx_add_vid  = cdc_mbim_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = cdc_mbim_rx_kill_vid,
+};
+
+/* Change the control interface altsetting and update the .driver_info
+ * pointer if the matching entry after changing class codes points to
+ * a different struct
+ */
+static int cdc_mbim_set_ctrlalt(struct usbnet *dev, struct usb_interface *intf, u8 alt)
+{
+	struct usb_driver *driver = to_usb_driver(intf->dev.driver);
+	const struct usb_device_id *id;
+	struct driver_info *info;
+	int ret;
+
+	ret = usb_set_interface(dev->udev,
+				intf->cur_altsetting->desc.bInterfaceNumber,
+				alt);
+	if (ret)
+		return ret;
+
+	id = usb_match_id(intf, driver->id_table);
+	if (!id)
+		return -ENODEV;
+
+	info = (struct driver_info *)id->driver_info;
+	if (info != dev->driver_info) {
+		dev_dbg(&intf->dev, "driver_info updated to '%s'\n",
+			info->description);
+		dev->driver_info = info;
+	}
+	return 0;
+}
 
 static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
 	int ret = -ENODEV;
-	u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf);
+	u8 data_altsetting = 1;
 	struct cdc_mbim_state *info = (void *)&dev->data;
 
-	/* Probably NCM, defer for cdc_ncm_bind */
+	/* should we change control altsetting on a NCM/MBIM function? */
+	if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) {
+		data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
+		ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM);
+		if (ret)
+			goto err;
+		ret = -ENODEV;
+	}
+
+	/* we will hit this for NCM/MBIM functions if prefer_mbim is false */
 	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
 		goto err;
 
@@ -101,7 +184,10 @@
 	dev->net->flags |= IFF_NOARP;
 
 	/* no need to put the VLAN tci in the packet headers */
-	dev->net->features |= NETIF_F_HW_VLAN_CTAG_TX;
+	dev->net->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	/* monitor VLAN additions and removals */
+	dev->net->netdev_ops = &cdc_mbim_netdev_ops;
 err:
 	return ret;
 }
@@ -164,12 +250,24 @@
 			skb_pull(skb, ETH_HLEN);
 		}
 
+		/* Is IP session <0> tagged too? */
+		if (info->flags & FLAG_IPS0_VLAN) {
+			/* drop all untagged packets */
+			if (!tci)
+				goto error;
+			/* map MBIM_IPS0_VID to IPS<0> */
+			if (tci == MBIM_IPS0_VID)
+				tci = 0;
+		}
+
 		/* mapping VLANs to MBIM sessions:
-		 *   no tag     => IPS session <0>
+		 *   no tag     => IPS session <0> if !FLAG_IPS0_VLAN
 		 *   1 - 255    => IPS session <vlanid>
 		 *   256 - 511  => DSS session <vlanid - 256>
-		 *   512 - 4095 => unsupported, drop
+		 *   512 - 4093 => unsupported, drop
+		 *   4094       => IPS session <0> if FLAG_IPS0_VLAN
 		 */
+
 		switch (tci & 0x0f00) {
 		case 0x0000: /* VLAN ID 0 - 255 */
 			if (!is_ip)
@@ -178,6 +276,8 @@
 			c[3] = tci;
 			break;
 		case 0x0100: /* VLAN ID 256 - 511 */
+			if (is_ip)
+				goto error;
 			sign = cpu_to_le32(USB_CDC_MBIM_NDP16_DSS_SIGN);
 			c = (u8 *)&sign;
 			c[3] = tci;
@@ -223,8 +323,8 @@
 	/* need to send the NA on the VLAN dev, if any */
 	rcu_read_lock();
 	if (tci) {
-		netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q),
-					      tci);
+		netdev = __vlan_find_dev_deep_rcu(dev->net, htons(ETH_P_8021Q),
+						  tci);
 		if (!netdev) {
 			rcu_read_unlock();
 			return;
@@ -268,7 +368,7 @@
 	__be16 proto = htons(ETH_P_802_3);
 	struct sk_buff *skb = NULL;
 
-	if (tci < 256) { /* IPS session? */
+	if (tci < 256 || tci == MBIM_IPS0_VID) { /* IPS session? */
 		if (len < sizeof(struct iphdr))
 			goto err;
 
@@ -320,6 +420,7 @@
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 	u8 *c;
 	u16 tci;
 
@@ -338,6 +439,9 @@
 	case cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN):
 		c = (u8 *)&ndp16->dwSignature;
 		tci = c[3];
+		/* tag IPS<0> packets too if MBIM_IPS0_VID exists */
+		if (!tci && info->flags & FLAG_IPS0_VLAN)
+			tci = MBIM_IPS0_VID;
 		break;
 	case cpu_to_le32(USB_CDC_MBIM_NDP16_DSS_SIGN):
 		c = (u8 *)&ndp16->dwSignature;
@@ -379,6 +483,7 @@
 			if (!skb)
 				goto error;
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -387,6 +492,10 @@
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9a2bd11..80a844e 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c

@@ -65,19 +65,384 @@
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
 static struct usb_driver cdc_ncm_driver;
 
-static int cdc_ncm_setup(struct usbnet *dev)
+struct cdc_ncm_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define CDC_NCM_STAT(str, m) { \
+		.stat_string = str, \
+		.sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \
+		.stat_offset = offsetof(struct cdc_ncm_ctx, m) }
+#define CDC_NCM_SIMPLE_STAT(m)	CDC_NCM_STAT(__stringify(m), m)
+
+static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
+	CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_timeout),
+	CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram),
+	CDC_NCM_SIMPLE_STAT(tx_overhead),
+	CDC_NCM_SIMPLE_STAT(tx_ntbs),
+	CDC_NCM_SIMPLE_STAT(rx_overhead),
+	CDC_NCM_SIMPLE_STAT(rx_ntbs),
+};
+
+static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(cdc_ncm_gstrings_stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void cdc_ncm_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats __always_unused *stats,
+				    u64 *data)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	int i;
+	char *p = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+		p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset;
+		data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+			memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx);
+
+static const struct ethtool_ops cdc_ncm_ethtool_ops = {
+	.get_settings      = usbnet_get_settings,
+	.set_settings      = usbnet_set_settings,
+	.get_link          = usbnet_get_link,
+	.nway_reset        = usbnet_nway_reset,
+	.get_drvinfo       = usbnet_get_drvinfo,
+	.get_msglevel      = usbnet_get_msglevel,
+	.set_msglevel      = usbnet_set_msglevel,
+	.get_ts_info       = ethtool_op_get_ts_info,
+	.get_sset_count    = cdc_ncm_get_sset_count,
+	.get_strings       = cdc_ncm_get_strings,
+	.get_ethtool_stats = cdc_ncm_get_ethtool_stats,
+};
+
+static u32 cdc_ncm_check_rx_max(struct usbnet *dev, u32 new_rx)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
-	u32 val;
-	u8 flags;
-	u8 iface_no;
-	int err;
-	int eth_hlen;
-	u16 mbim_mtu;
-	u16 ntb_fmt_supported;
-	__le16 max_datagram_size;
+	u32 val, max, min;
 
-	iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
+	/* clamp new_rx to sane values */
+	min = USB_CDC_NCM_NTB_MIN_IN_SIZE;
+	max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_RX, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize));
+
+	/* dwNtbInMaxSize spec violation? Use MIN size for both limits */
+	if (max < min) {
+		dev_warn(&dev->intf->dev, "dwNtbInMaxSize=%u is too small. Using %u\n",
+			 le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize), min);
+		max = min;
+	}
+
+	val = clamp_t(u32, new_rx, min, max);
+	if (val != new_rx)
+		dev_dbg(&dev->intf->dev, "rx_max must be in the [%u, %u] range\n", min, max);
+
+	return val;
+}
+
+static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 val, max, min;
+
+	/* clamp new_tx to sane values */
+	min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16);
+	max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
+
+	/* some devices set dwNtbOutMaxSize too low for the above default */
+	min = min(min, max);
+
+	val = clamp_t(u32, new_tx, min, max);
+	if (val != new_tx)
+		dev_dbg(&dev->intf->dev, "tx_max must be in the [%u, %u] range\n", min, max);
+
+	return val;
+}
+
+static ssize_t cdc_ncm_show_min_tx_pkt(struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	return sprintf(buf, "%u\n", ctx->min_tx_pkt);
+}
+
+static ssize_t cdc_ncm_show_rx_max(struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	return sprintf(buf, "%u\n", ctx->rx_max);
+}
+
+static ssize_t cdc_ncm_show_tx_max(struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	return sprintf(buf, "%u\n", ctx->tx_max);
+}
+
+static ssize_t cdc_ncm_show_tx_timer_usecs(struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	return sprintf(buf, "%u\n", ctx->timer_interval / (u32)NSEC_PER_USEC);
+}
+
+static ssize_t cdc_ncm_store_min_tx_pkt(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	unsigned long val;
+
+	/* no need to restrict values - anything from 0 to infinity is OK */
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	ctx->min_tx_pkt = val;
+	return len;
+}
+
+static ssize_t cdc_ncm_store_rx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) || cdc_ncm_check_rx_max(dev, val) != val)
+		return -EINVAL;
+
+	cdc_ncm_update_rxtx_max(dev, val, ctx->tx_max);
+	return len;
+}
+
+static ssize_t cdc_ncm_store_tx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) || cdc_ncm_check_tx_max(dev, val) != val)
+		return -EINVAL;
+
+	cdc_ncm_update_rxtx_max(dev, ctx->rx_max, val);
+	return len;
+}
+
+static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct usbnet *dev = netdev_priv(to_net_dev(d));
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	ssize_t ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+	if (val && (val < CDC_NCM_TIMER_INTERVAL_MIN || val > CDC_NCM_TIMER_INTERVAL_MAX))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->mtx);
+	ctx->timer_interval = val * NSEC_PER_USEC;
+	if (!ctx->timer_interval)
+		ctx->tx_timer_pending = 0;
+	spin_unlock_bh(&ctx->mtx);
+	return len;
+}
+
+static DEVICE_ATTR(min_tx_pkt, S_IRUGO | S_IWUSR, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
+static DEVICE_ATTR(rx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
+static DEVICE_ATTR(tx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
+static DEVICE_ATTR(tx_timer_usecs, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
+
+#define NCM_PARM_ATTR(name, format, tocpu)				\
+static ssize_t cdc_ncm_show_##name(struct device *d, struct device_attribute *attr, char *buf) \
+{ \
+	struct usbnet *dev = netdev_priv(to_net_dev(d)); \
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; \
+	return sprintf(buf, format "\n", tocpu(ctx->ncm_parm.name));	\
+} \
+static DEVICE_ATTR(name, S_IRUGO, cdc_ncm_show_##name, NULL)
+
+NCM_PARM_ATTR(bmNtbFormatsSupported, "0x%04x", le16_to_cpu);
+NCM_PARM_ATTR(dwNtbInMaxSize, "%u", le32_to_cpu);
+NCM_PARM_ATTR(wNdpInDivisor, "%u", le16_to_cpu);
+NCM_PARM_ATTR(wNdpInPayloadRemainder, "%u", le16_to_cpu);
+NCM_PARM_ATTR(wNdpInAlignment, "%u", le16_to_cpu);
+NCM_PARM_ATTR(dwNtbOutMaxSize, "%u", le32_to_cpu);
+NCM_PARM_ATTR(wNdpOutDivisor, "%u", le16_to_cpu);
+NCM_PARM_ATTR(wNdpOutPayloadRemainder, "%u", le16_to_cpu);
+NCM_PARM_ATTR(wNdpOutAlignment, "%u", le16_to_cpu);
+NCM_PARM_ATTR(wNtbOutMaxDatagrams, "%u", le16_to_cpu);
+
+static struct attribute *cdc_ncm_sysfs_attrs[] = {
+	&dev_attr_min_tx_pkt.attr,
+	&dev_attr_rx_max.attr,
+	&dev_attr_tx_max.attr,
+	&dev_attr_tx_timer_usecs.attr,
+	&dev_attr_bmNtbFormatsSupported.attr,
+	&dev_attr_dwNtbInMaxSize.attr,
+	&dev_attr_wNdpInDivisor.attr,
+	&dev_attr_wNdpInPayloadRemainder.attr,
+	&dev_attr_wNdpInAlignment.attr,
+	&dev_attr_dwNtbOutMaxSize.attr,
+	&dev_attr_wNdpOutDivisor.attr,
+	&dev_attr_wNdpOutPayloadRemainder.attr,
+	&dev_attr_wNdpOutAlignment.attr,
+	&dev_attr_wNtbOutMaxDatagrams.attr,
+	NULL,
+};
+
+static struct attribute_group cdc_ncm_sysfs_attr_group = {
+	.name = "cdc_ncm",
+	.attrs = cdc_ncm_sysfs_attrs,
+};
+
+/* handle rx_max and tx_max changes */
+static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
+	u32 val;
+
+	val = cdc_ncm_check_rx_max(dev, new_rx);
+
+	/* inform device about NTB input size changes */
+	if (val != ctx->rx_max) {
+		__le32 dwNtbInMaxSize = cpu_to_le32(val);
+
+		dev_info(&dev->intf->dev, "setting rx_max = %u\n", val);
+
+		/* tell device to use new size */
+		if (usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE,
+				     USB_TYPE_CLASS | USB_DIR_OUT
+				     | USB_RECIP_INTERFACE,
+				     0, iface_no, &dwNtbInMaxSize, 4) < 0)
+			dev_dbg(&dev->intf->dev, "Setting NTB Input Size failed\n");
+		else
+			ctx->rx_max = val;
+	}
+
+	/* usbnet use these values for sizing rx queues */
+	if (dev->rx_urb_size != ctx->rx_max) {
+		dev->rx_urb_size = ctx->rx_max;
+		if (netif_running(dev->net))
+			usbnet_unlink_rx_urbs(dev);
+	}
+
+	val = cdc_ncm_check_tx_max(dev, new_tx);
+	if (val != ctx->tx_max)
+		dev_info(&dev->intf->dev, "setting tx_max = %u\n", val);
+
+	/* Adding a pad byte here if necessary simplifies the handling
+	 * in cdc_ncm_fill_tx_frame, making tx_max always represent
+	 * the real skb max size.
+	 *
+	 * We cannot use dev->maxpacket here because this is called from
+	 * .bind which is called before usbnet sets up dev->maxpacket
+	 */
+	if (val != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) &&
+	    val % usb_maxpacket(dev->udev, dev->out, 1) == 0)
+		val++;
+
+	/* we might need to flush any pending tx buffers if running */
+	if (netif_running(dev->net) && val > ctx->tx_max) {
+		netif_tx_lock_bh(dev->net);
+		usbnet_start_xmit(NULL, dev->net);
+		/* make sure tx_curr_skb is reallocated if it was empty */
+		if (ctx->tx_curr_skb) {
+			dev_kfree_skb_any(ctx->tx_curr_skb);
+			ctx->tx_curr_skb = NULL;
+		}
+		ctx->tx_max = val;
+		netif_tx_unlock_bh(dev->net);
+	} else {
+		ctx->tx_max = val;
+	}
+
+	dev->hard_mtu = ctx->tx_max;
+
+	/* max qlen depend on hard_mtu and rx_urb_size */
+	usbnet_update_max_qlen(dev);
+
+	/* never pad more than 3 full USB packets per transfer */
+	ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out, 1),
+				  CDC_NCM_MIN_TX_PKT, ctx->tx_max);
+}
+
+/* helpers for NCM and MBIM differences */
+static u8 cdc_ncm_flags(struct usbnet *dev)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting) && ctx->mbim_desc)
+		return ctx->mbim_desc->bmNetworkCapabilities;
+	if (ctx->func_desc)
+		return ctx->func_desc->bmNetworkCapabilities;
+	return 0;
+}
+
+static int cdc_ncm_eth_hlen(struct usbnet *dev)
+{
+	if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting))
+		return 0;
+	return ETH_HLEN;
+}
+
+static u32 cdc_ncm_min_dgram_size(struct usbnet *dev)
+{
+	if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting))
+		return CDC_MBIM_MIN_DATAGRAM_SIZE;
+	return CDC_NCM_MIN_DATAGRAM_SIZE;
+}
+
+static u32 cdc_ncm_max_dgram_size(struct usbnet *dev)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting) && ctx->mbim_desc)
+		return le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize);
+	if (ctx->ether_desc)
+		return le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+	return CDC_NCM_MAX_DATAGRAM_SIZE;
+}
+
+/* initial one-time device setup.  MUST be called with the data interface
+ * in altsetting 0
+ */
+static int cdc_ncm_init(struct usbnet *dev)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
+	int err;
 
 	err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS,
 			      USB_TYPE_CLASS | USB_DIR_IN
@@ -89,7 +454,36 @@
 		return err; /* GET_NTB_PARAMETERS is required */
 	}
 
-	/* read correct set of parameters according to device mode */
+	/* set CRC Mode */
+	if (cdc_ncm_flags(dev) & USB_CDC_NCM_NCAP_CRC_MODE) {
+		dev_dbg(&dev->intf->dev, "Setting CRC mode off\n");
+		err = usbnet_write_cmd(dev, USB_CDC_SET_CRC_MODE,
+				       USB_TYPE_CLASS | USB_DIR_OUT
+				       | USB_RECIP_INTERFACE,
+				       USB_CDC_NCM_CRC_NOT_APPENDED,
+				       iface_no, NULL, 0);
+		if (err < 0)
+			dev_err(&dev->intf->dev, "SET_CRC_MODE failed\n");
+	}
+
+	/* set NTB format, if both formats are supported.
+	 *
+	 * "The host shall only send this command while the NCM Data
+	 *  Interface is in alternate setting 0."
+	 */
+	if (le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported) &
+						USB_CDC_NCM_NTB32_SUPPORTED) {
+		dev_dbg(&dev->intf->dev, "Setting NTB format to 16-bit\n");
+		err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT,
+				       USB_TYPE_CLASS | USB_DIR_OUT
+				       | USB_RECIP_INTERFACE,
+				       USB_CDC_NCM_NTB16_FORMAT,
+				       iface_no, NULL, 0);
+		if (err < 0)
+			dev_err(&dev->intf->dev, "SET_NTB_FORMAT failed\n");
+	}
+
+	/* set initial device values */
 	ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize);
 	ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize);
 	ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder);
@@ -97,72 +491,79 @@
 	ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment);
 	/* devices prior to NCM Errata shall set this field to zero */
 	ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams);
-	ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported);
-
-	/* there are some minor differences in NCM and MBIM defaults */
-	if (cdc_ncm_comm_intf_is_mbim(ctx->control->cur_altsetting)) {
-		if (!ctx->mbim_desc)
-			return -EINVAL;
-		eth_hlen = 0;
-		flags = ctx->mbim_desc->bmNetworkCapabilities;
-		ctx->max_datagram_size = le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize);
-		if (ctx->max_datagram_size < CDC_MBIM_MIN_DATAGRAM_SIZE)
-			ctx->max_datagram_size = CDC_MBIM_MIN_DATAGRAM_SIZE;
-	} else {
-		if (!ctx->func_desc)
-			return -EINVAL;
-		eth_hlen = ETH_HLEN;
-		flags = ctx->func_desc->bmNetworkCapabilities;
-		ctx->max_datagram_size = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
-		if (ctx->max_datagram_size < CDC_NCM_MIN_DATAGRAM_SIZE)
-			ctx->max_datagram_size = CDC_NCM_MIN_DATAGRAM_SIZE;
-	}
-
-	/* common absolute max for NCM and MBIM */
-	if (ctx->max_datagram_size > CDC_NCM_MAX_DATAGRAM_SIZE)
-		ctx->max_datagram_size = CDC_NCM_MAX_DATAGRAM_SIZE;
 
 	dev_dbg(&dev->intf->dev,
 		"dwNtbInMaxSize=%u dwNtbOutMaxSize=%u wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u wNdpOutAlignment=%u wNtbOutMaxDatagrams=%u flags=0x%x\n",
 		ctx->rx_max, ctx->tx_max, ctx->tx_remainder, ctx->tx_modulus,
-		ctx->tx_ndp_modulus, ctx->tx_max_datagrams, flags);
+		ctx->tx_ndp_modulus, ctx->tx_max_datagrams, cdc_ncm_flags(dev));
 
 	/* max count of tx datagrams */
 	if ((ctx->tx_max_datagrams == 0) ||
 			(ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX))
 		ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
 
-	/* verify maximum size of received NTB in bytes */
-	if (ctx->rx_max < USB_CDC_NCM_NTB_MIN_IN_SIZE) {
-		dev_dbg(&dev->intf->dev, "Using min receive length=%d\n",
-			USB_CDC_NCM_NTB_MIN_IN_SIZE);
-		ctx->rx_max = USB_CDC_NCM_NTB_MIN_IN_SIZE;
+	/* set up maximum NDP size */
+	ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16);
+
+	/* initial coalescing timer interval */
+	ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC;
+
+	return 0;
+}
+
+/* set a new max datagram size */
+static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
+	__le16 max_datagram_size;
+	u16 mbim_mtu;
+	int err;
+
+	/* set default based on descriptors */
+	ctx->max_datagram_size = clamp_t(u32, new_size,
+					 cdc_ncm_min_dgram_size(dev),
+					 CDC_NCM_MAX_DATAGRAM_SIZE);
+
+	/* inform the device about the selected Max Datagram Size? */
+	if (!(cdc_ncm_flags(dev) & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE))
+		goto out;
+
+	/* read current mtu value from device */
+	err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
+			      USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
+			      0, iface_no, &max_datagram_size, 2);
+	if (err < 0) {
+		dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
+		goto out;
 	}
 
-	if (ctx->rx_max > CDC_NCM_NTB_MAX_SIZE_RX) {
-		dev_dbg(&dev->intf->dev, "Using default maximum receive length=%d\n",
-			CDC_NCM_NTB_MAX_SIZE_RX);
-		ctx->rx_max = CDC_NCM_NTB_MAX_SIZE_RX;
-	}
+	if (le16_to_cpu(max_datagram_size) == ctx->max_datagram_size)
+		goto out;
 
-	/* inform device about NTB input size changes */
-	if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
-		__le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+	max_datagram_size = cpu_to_le16(ctx->max_datagram_size);
+	err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE,
+			       USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE,
+			       0, iface_no, &max_datagram_size, 2);
+	if (err < 0)
+		dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n");
 
-		err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE,
-				       USB_TYPE_CLASS | USB_DIR_OUT
-				       | USB_RECIP_INTERFACE,
-				       0, iface_no, &dwNtbInMaxSize, 4);
-		if (err < 0)
-			dev_dbg(&dev->intf->dev, "Setting NTB Input Size failed\n");
-	}
+out:
+	/* set MTU to max supported by the device if necessary */
+	dev->net->mtu = min_t(int, dev->net->mtu, ctx->max_datagram_size - cdc_ncm_eth_hlen(dev));
 
-	/* verify maximum size of transmitted NTB in bytes */
-	if (ctx->tx_max > CDC_NCM_NTB_MAX_SIZE_TX) {
-		dev_dbg(&dev->intf->dev, "Using default maximum transmit length=%d\n",
-			CDC_NCM_NTB_MAX_SIZE_TX);
-		ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX;
+	/* do not exceed operater preferred MTU */
+	if (ctx->mbim_extended_desc) {
+		mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU);
+		if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu)
+			dev->net->mtu = mbim_mtu;
 	}
+}
+
+static void cdc_ncm_fix_modulus(struct usbnet *dev)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 val;
 
 	/*
 	 * verify that the structure alignment is:
@@ -199,68 +600,31 @@
 	}
 
 	/* adjust TX-remainder according to NCM specification. */
-	ctx->tx_remainder = ((ctx->tx_remainder - eth_hlen) &
+	ctx->tx_remainder = ((ctx->tx_remainder - cdc_ncm_eth_hlen(dev)) &
 			     (ctx->tx_modulus - 1));
+}
 
-	/* additional configuration */
+static int cdc_ncm_setup(struct usbnet *dev)
+{
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 def_rx, def_tx;
 
-	/* set CRC Mode */
-	if (flags & USB_CDC_NCM_NCAP_CRC_MODE) {
-		err = usbnet_write_cmd(dev, USB_CDC_SET_CRC_MODE,
-				       USB_TYPE_CLASS | USB_DIR_OUT
-				       | USB_RECIP_INTERFACE,
-				       USB_CDC_NCM_CRC_NOT_APPENDED,
-				       iface_no, NULL, 0);
-		if (err < 0)
-			dev_dbg(&dev->intf->dev, "Setting CRC mode off failed\n");
-	}
+	/* be conservative when selecting intial buffer size to
+	 * increase the number of hosts this will work for
+	 */
+	def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize));
+	def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
 
-	/* set NTB format, if both formats are supported */
-	if (ntb_fmt_supported & USB_CDC_NCM_NTH32_SIGN) {
-		err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT,
-				       USB_TYPE_CLASS | USB_DIR_OUT
-				       | USB_RECIP_INTERFACE,
-				       USB_CDC_NCM_NTB16_FORMAT,
-				       iface_no, NULL, 0);
-		if (err < 0)
-			dev_dbg(&dev->intf->dev, "Setting NTB format to 16-bit failed\n");
-	}
+	/* clamp rx_max and tx_max and inform device */
+	cdc_ncm_update_rxtx_max(dev, def_rx, def_tx);
 
-	/* inform the device about the selected Max Datagram Size */
-	if (!(flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE))
-		goto out;
+	/* sanitize the modulus and remainder values */
+	cdc_ncm_fix_modulus(dev);
 
-	/* read current mtu value from device */
-	err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
-			      USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
-			      0, iface_no, &max_datagram_size, 2);
-	if (err < 0) {
-		dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
-		goto out;
-	}
-
-	if (le16_to_cpu(max_datagram_size) == ctx->max_datagram_size)
-		goto out;
-
-	max_datagram_size = cpu_to_le16(ctx->max_datagram_size);
-	err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE,
-			       USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE,
-			       0, iface_no, &max_datagram_size, 2);
-	if (err < 0)
-		dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n");
-
-out:
-	/* set MTU to max supported by the device if necessary */
-	if (dev->net->mtu > ctx->max_datagram_size - eth_hlen)
-		dev->net->mtu = ctx->max_datagram_size - eth_hlen;
-
-	/* do not exceed operater preferred MTU */
-	if (ctx->mbim_extended_desc) {
-		mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU);
-		if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu)
-			dev->net->mtu = mbim_mtu;
-	}
-
+	/* set max datagram size */
+	cdc_ncm_set_dgram_size(dev, cdc_ncm_max_dgram_size(dev));
 	return 0;
 }
 
@@ -424,10 +788,21 @@
 	}
 
 	/* check if we got everything */
-	if (!ctx->data || (!ctx->mbim_desc && !ctx->ether_desc)) {
-		dev_dbg(&intf->dev, "CDC descriptors missing\n");
+	if (!ctx->data) {
+		dev_dbg(&intf->dev, "CDC Union missing and no IAD found\n");
 		goto error;
 	}
+	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) {
+		if (!ctx->mbim_desc) {
+			dev_dbg(&intf->dev, "MBIM functional descriptor missing\n");
+			goto error;
+		}
+	} else {
+		if (!ctx->ether_desc || !ctx->func_desc) {
+			dev_dbg(&intf->dev, "NCM or ECM functional descriptors missing\n");
+			goto error;
+		}
+	}
 
 	/* claim data interface, if different from control */
 	if (ctx->data != ctx->control) {
@@ -447,8 +822,8 @@
 		goto error2;
 	}
 
-	/* initialize data interface */
-	if (cdc_ncm_setup(dev))
+	/* initialize basic device settings */
+	if (cdc_ncm_init(dev))
 		goto error2;
 
 	/* configure data interface */
@@ -477,18 +852,14 @@
 		dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr);
 	}
 
-	/* usbnet use these values for sizing tx/rx queues */
-	dev->hard_mtu = ctx->tx_max;
-	dev->rx_urb_size = ctx->rx_max;
+	/* finish setting up the device specific data */
+	cdc_ncm_setup(dev);
 
-	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
-	 * outside the sane range. Adding a pad byte here if necessary
-	 * simplifies the handling in cdc_ncm_fill_tx_frame, making
-	 * tx_max always represent the real skb max size.
-	 */
-	if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) &&
-	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
-		ctx->tx_max++;
+	/* override ethtool_ops */
+	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
+
+	/* add our sysfs attrs */
+	dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group;
 
 	return 0;
 
@@ -541,10 +912,10 @@
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
 
-/* Select the MBIM altsetting iff it is preferred and available,
- * returning the number of the corresponding data interface altsetting
+/* Return the number of the MBIM control interface altsetting iff it
+ * is preferred and available,
  */
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf)
 {
 	struct usb_host_interface *alt;
 
@@ -563,15 +934,15 @@
 	 *   the rules given in section 6 (USB Device Model) of this
 	 *   specification."
 	 */
-	if (prefer_mbim && intf->num_altsetting == 2) {
+	if (intf->num_altsetting < 2)
+		return intf->cur_altsetting->desc.bAlternateSetting;
+
+	if (prefer_mbim) {
 		alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM);
-		if (alt && cdc_ncm_comm_intf_is_mbim(alt) &&
-		    !usb_set_interface(dev->udev,
-				       intf->cur_altsetting->desc.bInterfaceNumber,
-				       CDC_NCM_COMM_ALTSETTING_MBIM))
-			return CDC_NCM_DATA_ALTSETTING_MBIM;
+		if (alt && cdc_ncm_comm_intf_is_mbim(alt))
+			return CDC_NCM_COMM_ALTSETTING_MBIM;
 	}
-	return CDC_NCM_DATA_ALTSETTING_NCM;
+	return CDC_NCM_COMM_ALTSETTING_NCM;
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
 
@@ -580,12 +951,11 @@
 	int ret;
 
 	/* MBIM backwards compatible function? */
-	cdc_ncm_select_altsetting(dev, intf);
-	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+	if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
 		return -ENODEV;
 
-	/* NCM data altsetting is always 1 */
-	ret = cdc_ncm_bind_common(dev, intf, 1);
+	/* The NCM data altsetting is fixed */
+	ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM);
 
 	/*
 	 * We should get an event when network connection is "connected" or
@@ -628,7 +998,7 @@
 	cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
 
 	/* verify that there is room for the NDP and the datagram (reserve) */
-	if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE)
+	if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
 		return NULL;
 
 	/* link to it */
@@ -638,7 +1008,7 @@
 		nth16->wNdpIndex = cpu_to_le16(skb->len);
 
 	/* push a new empty NDP */
-	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE);
+	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size);
 	ndp16->dwSignature = sign;
 	ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16));
 	return ndp16;
@@ -683,6 +1053,9 @@
 
 		/* count total number of frames in this NTB */
 		ctx->tx_curr_frame_num = 0;
+
+		/* recent payload counter for this skb_out */
+		ctx->tx_curr_frame_payload = 0;
 	}
 
 	for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) {
@@ -720,6 +1093,7 @@
 				ctx->tx_rem_sign = sign;
 				skb = NULL;
 				ready2send = 1;
+				ctx->tx_reason_ntb_full++;	/* count reason for transmitting */
 			}
 			break;
 		}
@@ -733,12 +1107,14 @@
 		ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len);
 		ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16));
 		memcpy(skb_put(skb_out, skb->len), skb->data, skb->len);
+		ctx->tx_curr_frame_payload += skb->len;	/* count real tx payload data */
 		dev_kfree_skb_any(skb);
 		skb = NULL;
 
 		/* send now if this NDP is full */
 		if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) {
 			ready2send = 1;
+			ctx->tx_reason_ndp_full++;	/* count reason for transmitting */
 			break;
 		}
 	}
@@ -758,7 +1134,7 @@
 		ctx->tx_curr_skb = skb_out;
 		goto exit_no_skb;
 
-	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) {
+	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) {
 		/* wait for more frames */
 		/* push variables */
 		ctx->tx_curr_skb = skb_out;
@@ -768,11 +1144,13 @@
 		goto exit_no_skb;
 
 	} else {
+		if (n == ctx->tx_max_datagrams)
+			ctx->tx_reason_max_datagram++;	/* count reason for transmitting */
 		/* frame goes out */
 		/* variables will be reset at next call */
 	}
 
-	/* If collected data size is less or equal CDC_NCM_MIN_TX_PKT
+	/* If collected data size is less or equal ctx->min_tx_pkt
 	 * bytes, we send buffers as it is. If we get more data, it
 	 * would be more efficient for USB HS mobile device with DMA
 	 * engine to receive a full size NTB, than canceling DMA
@@ -782,7 +1160,7 @@
 	 * a ZLP after full sized NTBs.
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
-	    skb_out->len > CDC_NCM_MIN_TX_PKT)
+	    skb_out->len > ctx->min_tx_pkt)
 		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0,
 		       ctx->tx_max - skb_out->len);
 	else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0)
@@ -795,11 +1173,22 @@
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
 	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
+
+	/* keep private stats: framing overhead and number of NTBs */
+	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
+	ctx->tx_ntbs++;
+
+	/* usbnet has already counted all the framing overhead.
+	 * Adjust the stats so that the tx_bytes counter show real
+	 * payload data instead.
+	 */
+	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
+
 	return skb_out;
 
 exit_no_skb:
-	/* Start timer, if there is a remaining skb */
-	if (ctx->tx_curr_skb != NULL)
+	/* Start timer, if there is a remaining non-empty skb */
+	if (ctx->tx_curr_skb != NULL && n > 0)
 		cdc_ncm_tx_timeout_start(ctx);
 	return NULL;
 }
@@ -810,7 +1199,7 @@
 	/* start timer, if not already started */
 	if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop)))
 		hrtimer_start(&ctx->tx_timer,
-				ktime_set(0, CDC_NCM_TIMER_INTERVAL),
+				ktime_set(0, ctx->timer_interval),
 				HRTIMER_MODE_REL);
 }
 
@@ -835,6 +1224,7 @@
 		cdc_ncm_tx_timeout_start(ctx);
 		spin_unlock_bh(&ctx->mtx);
 	} else if (dev->net != NULL) {
+		ctx->tx_reason_timeout++;	/* count reason for transmitting */
 		spin_unlock_bh(&ctx->mtx);
 		netif_tx_lock_bh(dev->net);
 		usbnet_start_xmit(NULL, dev->net);
@@ -970,6 +1360,7 @@
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 
 	ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in);
 	if (ndpoffset < 0)
@@ -1015,13 +1406,13 @@
 			break;
 
 		} else {
-			skb = skb_clone(skb_in, GFP_ATOMIC);
+			/* create a fresh copy to reduce truesize */
+			skb = netdev_alloc_skb_ip_align(dev->net,  len);
 			if (!skb)
 				goto error;
-			skb->len = len;
-			skb->data = ((u8 *)skb_in->data) + offset;
-			skb_set_tail_pointer(skb, len);
+			memcpy(skb_put(skb, len), skb_in->data + offset, len);
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -1030,6 +1421,10 @@
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;
@@ -1049,14 +1444,14 @@
 	 */
 	if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
 		netif_info(dev, link, dev->net,
-		       "%u mbit/s downlink %u mbit/s uplink\n",
-		       (unsigned int)(rx_speed / 1000000U),
-		       (unsigned int)(tx_speed / 1000000U));
+			   "%u mbit/s downlink %u mbit/s uplink\n",
+			   (unsigned int)(rx_speed / 1000000U),
+			   (unsigned int)(tx_speed / 1000000U));
 	} else {
 		netif_info(dev, link, dev->net,
-		       "%u kbit/s downlink %u kbit/s uplink\n",
-		       (unsigned int)(rx_speed / 1000U),
-		       (unsigned int)(tx_speed / 1000U));
+			   "%u kbit/s downlink %u kbit/s uplink\n",
+			   (unsigned int)(rx_speed / 1000U),
+			   (unsigned int)(tx_speed / 1000U));
 	}
 }
 
@@ -1086,11 +1481,10 @@
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
-		ctx->connected = le16_to_cpu(event->wValue);
 		netif_info(dev, link, dev->net,
 			   "network connection: %sconnected\n",
-			   ctx->connected ? "" : "dis");
-		usbnet_link_change(dev, ctx->connected, 0);
+			   !!event->wValue ? "" : "dis");
+		usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
@@ -1110,23 +1504,11 @@
 	}
 }
 
-static int cdc_ncm_check_connect(struct usbnet *dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)dev->data[0];
-	if (ctx == NULL)
-		return 1;	/* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1140,7 +1522,6 @@
 			| FLAG_WWAN,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1154,7 +1535,6 @@
 			| FLAG_WWAN | FLAG_NOARP,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 660bd5e..a3a0586 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c

@@ -2425,7 +2425,7 @@
 	net->type = ARPHRD_NONE;
 	net->mtu = DEFAULT_MTU - 14;
 	net->tx_queue_len = 10;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	/* and initialize the semaphore */
 	spin_lock_init(&hso_net->net_lock);

diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 312178d..f9822bc 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c

@@ -172,24 +172,11 @@
 	return ret;
 }
 
-static int huawei_cdc_ncm_check_connect(struct usbnet *usbnet_dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)usbnet_dev->data[0];
-
-	if (ctx == NULL)
-		return 1; /* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info huawei_cdc_ncm_info = {
 	.description = "Huawei CDC NCM device",
 	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
 	.bind = huawei_cdc_ncm_bind,
 	.unbind = huawei_cdc_ncm_unbind,
-	.check_connect = huawei_cdc_ncm_check_connect,
 	.manage_power = huawei_cdc_ncm_manage_power,
 	.rx_fixup = cdc_ncm_rx_fixup,
 	.tx_fixup = cdc_ncm_tx_fixup,

diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 973275f..76465b1 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c

@@ -534,7 +534,7 @@
 	usb_set_intfdata(intf, dev);
 
 	SET_NETDEV_DEV(netdev, &intf->dev);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	retval = register_netdev(netdev);
 	if (retval) {

diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index a359d3b..dcb6d33 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c

@@ -1171,7 +1171,7 @@
 	netdev->netdev_ops = &kaweth_netdev_ops;
 	netdev->watchdog_timeo = KAWETH_TX_TIMEOUT;
 	netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	/* kaweth is zeroed as part of alloc_netdev */
 	INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl);

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 03e8a15..f840802 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c

@@ -1159,7 +1159,7 @@
 
 	net->watchdog_timeo = PEGASUS_TX_TIMEOUT;
 	net->netdev_ops = &pegasus_netdev_ops;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	pegasus->mii.dev = net;
 	pegasus->mii.mdio_read = mdio_read;
 	pegasus->mii.mdio_write = mdio_write;

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index dc4bf06..cf62d7e 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c

@@ -763,7 +763,12 @@
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
-	{QMI_FIXED_INTF(0x0b3c, 0xc005, 6)},    /* Olivetti Olicard 200 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc002, 4)},	/* Olivetti Olicard 140 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc004, 6)},	/* Olivetti Olicard 155 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc005, 6)},	/* Olivetti Olicard 200 */
+	{QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)},	/* Olivetti Olicard 160 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)},	/* Olivetti Olicard 500 */
 	{QMI_FIXED_INTF(0x1e2d, 0x0060, 4)},	/* Cinterion PLxx */
 	{QMI_FIXED_INTF(0x1e2d, 0x0053, 4)},	/* Cinterion PHxx,PXxx */

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3fbfb08..2543196 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c

@@ -630,12 +630,10 @@
 	int ret;
 	void *tmp;
 
-	tmp = kmalloc(size, GFP_KERNEL);
+	tmp = kmemdup(data, size, GFP_KERNEL);
 	if (!tmp)
 		return -ENOMEM;
 
-	memcpy(tmp, data, size);
-
 	ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
 			       RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE,
 			       value, index, tmp, size, 500);
@@ -3452,7 +3450,7 @@
 			      NETIF_F_TSO | NETIF_F_FRAGLIST |
 			      NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
 	tp->mii.dev = netdev;

diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index da2c458..6e87e57 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c

@@ -878,7 +878,7 @@
 	dev->netdev = netdev;
 	netdev->netdev_ops = &rtl8150_netdev_ops;
 	netdev->watchdog_timeo = RTL8150_TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	dev->intr_interval = 100;	/* 100ms */
 
 	if (!alloc_all_urbs(dev)) {

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8a852b5..7d9f84a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -1646,7 +1646,7 @@
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
 
-	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
+	dev->ethtool_ops = &virtnet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
@@ -1724,6 +1724,13 @@
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
 		vi->has_cvq = true;
 
+	if (vi->any_header_sg) {
+		if (vi->mergeable_rx_bufs)
+			dev->needed_headroom = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		else
+			dev->needed_headroom = sizeof(struct virtio_net_hdr);
+	}
+
 	/* Use single tx/rx queue pair as default */
 	vi->curr_queue_pairs = 1;
 	vi->max_queue_pairs = max_queue_pairs;

diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 600ab56..40c1c7b 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c

@@ -431,8 +431,8 @@
 		ethtool_cmd_speed_set(ecmd, adapter->link_speed);
 		ecmd->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, -1);
-		ecmd->duplex = -1;
+		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+		ecmd->duplex = DUPLEX_UNKNOWN;
 	}
 	return 0;
 }
@@ -579,7 +579,7 @@
 }
 
 static int
-vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
+vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
@@ -592,7 +592,7 @@
 }
 
 static int
-vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p)
+vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	unsigned long flags;
@@ -628,12 +628,12 @@
 	.get_rxnfc         = vmxnet3_get_rxnfc,
 #ifdef VMXNET3_RSS
 	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
-	.get_rxfh_indir    = vmxnet3_get_rss_indir,
-	.set_rxfh_indir    = vmxnet3_set_rss_indir,
+	.get_rxfh          = vmxnet3_get_rss,
+	.set_rxfh          = vmxnet3_set_rss,
 #endif
 };
 
 void vmxnet3_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops);
+	netdev->ethtool_ops = &vmxnet3_ethtool_ops;
 }

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4dbb2ed..1610d51 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c

@@ -127,6 +127,7 @@
 	struct list_head  next;		/* vxlan's per namespace list */
 	struct vxlan_sock *vn_sock;	/* listening socket */
 	struct net_device *dev;
+	struct net	  *net;		/* netns for packet i/o */
 	struct vxlan_rdst default_dst;	/* default destination */
 	union vxlan_addr  saddr;	/* source address */
 	__be16		  dst_port;
@@ -134,7 +135,7 @@
 	__u16		  port_max;
 	__u8		  tos;		/* TOS override */
 	__u8		  ttl;
-	u32		  flags;	/* VXLAN_F_* below */
+	u32		  flags;	/* VXLAN_F_* in vxlan.h */
 
 	struct work_struct sock_work;
 	struct work_struct igmp_join;
@@ -149,13 +150,6 @@
 	struct hlist_head fdb_head[FDB_HASH_SIZE];
 };
 
-#define VXLAN_F_LEARN	0x01
-#define VXLAN_F_PROXY	0x02
-#define VXLAN_F_RSC	0x04
-#define VXLAN_F_L2MISS	0x08
-#define VXLAN_F_L3MISS	0x10
-#define VXLAN_F_IPV6	0x20 /* internal flag */
-
 /* salt for hash table */
 static u32 vxlan_salt __read_mostly;
 static struct workqueue_struct *vxlan_wq;
@@ -571,6 +565,7 @@
 			goto out;
 	}
 	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
 
 	off_eth = skb_gro_offset(skb);
 	hlen = off_eth + sizeof(*eh);
@@ -605,6 +600,7 @@
 	}
 
 	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
+	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
 	pp = ptype->callbacks.gro_receive(head, skb);
 
 out_unlock:
@@ -1203,6 +1199,7 @@
 
 	remote_ip = &vxlan->default_dst.remote_ip;
 	skb_reset_mac_header(skb);
+	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
 	skb->protocol = eth_type_trans(skb, vxlan->dev);
 
 	/* Ignore packet loops (and multicast echo) */
@@ -1599,18 +1596,11 @@
 }
 EXPORT_SYMBOL_GPL(vxlan_src_port);
 
-static int handle_offloads(struct sk_buff *skb)
+static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
+						    bool udp_csum)
 {
-	if (skb_is_gso(skb)) {
-		int err = skb_unclone(skb, GFP_ATOMIC);
-		if (unlikely(err))
-			return err;
-
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
+	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	return iptunnel_handle_offloads(skb, udp_csum, type);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1618,7 +1608,8 @@
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni)
+			   __be16 src_port, __be16 dst_port, __be32 vni,
+			   bool xnet)
 {
 	struct ipv6hdr *ip6h;
 	struct vxlanhdr *vxh;
@@ -1626,12 +1617,11 @@
 	int min_headroom;
 	int err;
 
-	if (!skb->encapsulation) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+	if (IS_ERR(skb))
+		return -EINVAL;
 
-	skb_scrub_packet(skb, false);
+	skb_scrub_packet(skb, xnet);
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
 			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
@@ -1663,27 +1653,14 @@
 	uh->source = src_port;
 
 	uh->len = htons(skb->len);
-	uh->check = 0;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_set(skb, dst);
 
-	if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
-		__wsum csum = skb_checksum(skb, 0, skb->len, 0);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
-					    IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_ipv6_magic(saddr, daddr,
-					     skb->len, IPPROTO_UDP, 0);
-	}
+	udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
+		      saddr, daddr, skb->len);
 
 	__skb_push(skb, sizeof(*ip6h));
 	skb_reset_network_header(skb);
@@ -1699,10 +1676,6 @@
 	ip6h->daddr	  = *daddr;
 	ip6h->saddr	  = *saddr;
 
-	err = handle_offloads(skb);
-	if (err)
-		return err;
-
 	ip6tunnel_xmit(skb, dev);
 	return 0;
 }
@@ -1711,17 +1684,16 @@
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni)
+		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
 	int min_headroom;
 	int err;
 
-	if (!skb->encapsulation) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+	if (IS_ERR(skb))
+		return -EINVAL;
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
@@ -1753,14 +1725,12 @@
 	uh->source = src_port;
 
 	uh->len = htons(skb->len);
-	uh->check = 0;
 
-	err = handle_offloads(skb);
-	if (err)
-		return err;
+	udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
+		     src, dst, skb->len);
 
 	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
-			     tos, ttl, df, false);
+			     tos, ttl, df, xnet);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1853,7 +1823,7 @@
 		fl4.daddr = dst->sin.sin_addr.s_addr;
 		fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
 
-		rt = ip_route_output_key(dev_net(dev), &fl4);
+		rt = ip_route_output_key(vxlan->net, &fl4);
 		if (IS_ERR(rt)) {
 			netdev_dbg(dev, "no route to %pI4\n",
 				   &dst->sin.sin_addr.s_addr);
@@ -1874,7 +1844,7 @@
 			struct vxlan_dev *dst_vxlan;
 
 			ip_rt_put(rt);
-			dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+			dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1887,7 +1857,8 @@
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
 				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8));
+				     htonl(vni << 8),
+				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 
 		if (err < 0)
 			goto rt_tx_error;
@@ -1927,7 +1898,7 @@
 			struct vxlan_dev *dst_vxlan;
 
 			dst_release(ndst);
-			dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+			dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1938,7 +1909,8 @@
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8));
+				      src_port, dst_port, htonl(vni << 8),
+				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 	}
 
@@ -2082,7 +2054,7 @@
 static int vxlan_init(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	struct vxlan_sock *vs;
 
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -2090,7 +2062,7 @@
 		return -ENOMEM;
 
 	spin_lock(&vn->sock_lock);
-	vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
+	vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
 	if (vs) {
 		/* If we have a socket with same port already, reuse it */
 		atomic_inc(&vs->refcnt);
@@ -2172,8 +2144,8 @@
 /* Cleanup timer and forwarding table on shutdown */
 static int vxlan_stop(struct net_device *dev)
 {
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 
 	if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
@@ -2202,7 +2174,7 @@
 	struct net_device *lowerdev;
 	int max_mtu;
 
-	lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex);
+	lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
 	if (lowerdev == NULL)
 		return eth_change_mtu(dev, new_mtu);
 
@@ -2285,7 +2257,6 @@
 
 	dev->tx_queue_len = 0;
 	dev->features	|= NETIF_F_LLTX;
-	dev->features	|= NETIF_F_NETNS_LOCAL;
 	dev->features	|= NETIF_F_SG | NETIF_F_HW_CSUM;
 	dev->features   |= NETIF_F_RXCSUM;
 	dev->features   |= NETIF_F_GSO_SOFTWARE;
@@ -2401,7 +2372,7 @@
  * could be used for both IPv4 and IPv6 communications, but
  * users may set bindv6only=1.
  */
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
 {
 	struct sock *sk;
 	struct socket *sock;
@@ -2438,18 +2409,25 @@
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
+
+	if (flags & VXLAN_F_UDP_ZERO_CSUM6_TX)
+		udp_set_no_check6_tx(sk, true);
+
+	if (flags & VXLAN_F_UDP_ZERO_CSUM6_RX)
+		udp_set_no_check6_rx(sk, true);
+
 	return sock;
 }
 
 #else
 
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
 {
 		return ERR_PTR(-EPFNOSUPPORT);
 }
 #endif
 
-static struct socket *create_v4_sock(struct net *net, __be16 port)
+static struct socket *create_v4_sock(struct net *net, __be16 port, u32 flags)
 {
 	struct sock *sk;
 	struct socket *sock;
@@ -2482,18 +2460,24 @@
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
+
+	if (!(flags & VXLAN_F_UDP_CSUM))
+		sock->sk->sk_no_check_tx = 1;
+
 	return sock;
 }
 
 /* Create new listen socket if needed */
 static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
-					      vxlan_rcv_t *rcv, void *data, bool ipv6)
+					      vxlan_rcv_t *rcv, void *data,
+					      u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 	struct socket *sock;
 	struct sock *sk;
 	unsigned int h;
+	bool ipv6 = !!(flags & VXLAN_F_IPV6);
 
 	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
 	if (!vs)
@@ -2505,9 +2489,9 @@
 	INIT_WORK(&vs->del_work, vxlan_del_work);
 
 	if (ipv6)
-		sock = create_v6_sock(net, port);
+		sock = create_v6_sock(net, port, flags);
 	else
-		sock = create_v4_sock(net, port);
+		sock = create_v4_sock(net, port, flags);
 	if (IS_ERR(sock)) {
 		kfree(vs);
 		return ERR_CAST(sock);
@@ -2545,12 +2529,12 @@
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
-				  bool no_share, bool ipv6)
+				  bool no_share, u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 
-	vs = vxlan_socket_create(net, port, rcv, data, ipv6);
+	vs = vxlan_socket_create(net, port, rcv, data, flags);
 	if (!IS_ERR(vs))
 		return vs;
 
@@ -2578,12 +2562,12 @@
 static void vxlan_sock_work(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
-	struct net *net = dev_net(vxlan->dev);
+	struct net *net = vxlan->net;
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	__be16 port = vxlan->dst_port;
 	struct vxlan_sock *nvs;
 
-	nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags & VXLAN_F_IPV6);
+	nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
 	spin_lock(&vn->sock_lock);
 	if (!IS_ERR(nvs))
 		vxlan_vs_add_dev(nvs, vxlan);
@@ -2605,6 +2589,8 @@
 	if (!data[IFLA_VXLAN_ID])
 		return -EINVAL;
 
+	vxlan->net = dev_net(dev);
+
 	vni = nla_get_u32(data[IFLA_VXLAN_ID]);
 	dst->remote_vni = vni;
 
@@ -2705,12 +2691,23 @@
 	if (data[IFLA_VXLAN_PORT])
 		vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
 
+	if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+		vxlan->flags |= VXLAN_F_UDP_CSUM;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+
 	if (vxlan_find_vni(net, vni, vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
 		return -EEXIST;
 	}
 
-	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
+	dev->ethtool_ops = &vxlan_ethtool_ops;
 
 	/* create an fdb entry for a valid default destination */
 	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
@@ -2739,8 +2736,8 @@
 
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 {
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 
 	spin_lock(&vn->sock_lock);
 	if (!hlist_unhashed(&vxlan->hlist))
@@ -2768,7 +2765,10 @@
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_AGEING */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LIMIT */
 		nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
-		nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */
+		nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
 		0;
 }
 
@@ -2828,7 +2828,13 @@
 			!!(vxlan->flags & VXLAN_F_L3MISS)) ||
 	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
 	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) ||
-	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port))
+	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
+			!!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
@@ -2905,8 +2911,33 @@
 	return 0;
 }
 
+static void __net_exit vxlan_exit_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_dev *vxlan, *next;
+	struct net_device *dev, *aux;
+	LIST_HEAD(list);
+
+	rtnl_lock();
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &vxlan_link_ops)
+			unregister_netdevice_queue(dev, &list);
+
+	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+		/* If vxlan->dev is in the same netns, it has already been added
+		 * to the list by the previous loop.
+		 */
+		if (!net_eq(dev_net(vxlan->dev), net))
+			unregister_netdevice_queue(dev, &list);
+	}
+
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
 static struct pernet_operations vxlan_net_ops = {
 	.init = vxlan_init_net,
+	.exit = vxlan_exit_net,
 	.id   = &vxlan_net_id,
 	.size = sizeof(struct vxlan_net),
 };

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index bcfff0d..93ace04 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c

@@ -26,6 +26,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 #include <linux/if.h>
 #include <linux/hdlc.h>
 #include <asm/io.h>
@@ -678,7 +679,6 @@
 fst_cpureset(struct fst_card_info *card)
 {
 	unsigned char interrupt_line_register;
-	unsigned long j = jiffies + 1;
 	unsigned int regval;
 
 	if (card->family == FST_FAMILY_TXU) {
@@ -696,16 +696,12 @@
 		/*
 		 * We are delaying here to allow the 9054 to reset itself
 		 */
-		j = jiffies + 1;
-		while (jiffies < j)
-			/* Do nothing */ ;
+		usleep_range(10, 20);
 		outw(0x240f, card->pci_conf + CNTRL_9054 + 2);
 		/*
 		 * We are delaying here to allow the 9054 to reload its eeprom
 		 */
-		j = jiffies + 1;
-		while (jiffies < j)
-			/* Do nothing */ ;
+		usleep_range(10, 20);
 		outw(0x040f, card->pci_conf + CNTRL_9054 + 2);
 
 		if (pci_write_config_byte
@@ -886,20 +882,18 @@
  *      Receive a frame through the DMA
  */
 static inline void
-fst_rx_dma(struct fst_card_info *card, dma_addr_t skb,
-	   dma_addr_t mem, int len)
+fst_rx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 {
 	/*
 	 * This routine will setup the DMA and start it
 	 */
 
-	dbg(DBG_RX, "In fst_rx_dma %lx %lx %d\n",
-	    (unsigned long) skb, (unsigned long) mem, len);
+	dbg(DBG_RX, "In fst_rx_dma %x %x %d\n", (u32)dma, mem, len);
 	if (card->dmarx_in_progress) {
 		dbg(DBG_ASS, "In fst_rx_dma while dma in progress\n");
 	}
 
-	outl(skb, card->pci_conf + DMAPADR0);	/* Copy to here */
+	outl(dma, card->pci_conf + DMAPADR0);	/* Copy to here */
 	outl(mem, card->pci_conf + DMALADR0);	/* from here */
 	outl(len, card->pci_conf + DMASIZ0);	/* for this length */
 	outl(0x00000000c, card->pci_conf + DMADPR0);	/* In this direction */
@@ -915,20 +909,19 @@
  *      Send a frame through the DMA
  */
 static inline void
-fst_tx_dma(struct fst_card_info *card, unsigned char *skb,
-	   unsigned char *mem, int len)
+fst_tx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 {
 	/*
 	 * This routine will setup the DMA and start it.
 	 */
 
-	dbg(DBG_TX, "In fst_tx_dma %p %p %d\n", skb, mem, len);
+	dbg(DBG_TX, "In fst_tx_dma %x %x %d\n", (u32)dma, mem, len);
 	if (card->dmatx_in_progress) {
 		dbg(DBG_ASS, "In fst_tx_dma while dma in progress\n");
 	}
 
-	outl((unsigned long) skb, card->pci_conf + DMAPADR1);	/* Copy from here */
-	outl((unsigned long) mem, card->pci_conf + DMALADR1);	/* to here */
+	outl(dma, card->pci_conf + DMAPADR1);	/* Copy from here */
+	outl(mem, card->pci_conf + DMALADR1);	/* to here */
 	outl(len, card->pci_conf + DMASIZ1);	/* for this length */
 	outl(0x000000004, card->pci_conf + DMADPR1);	/* In this direction */
 
@@ -1405,9 +1398,7 @@
 					card->dma_len_tx = skb->len;
 					card->dma_txpos = port->txpos;
 					fst_tx_dma(card,
-						   (char *) card->
-						   tx_dma_handle_card,
-						   (char *)
+						   card->tx_dma_handle_card,
 						   BUF_OFFSET(txBuffer[pi]
 							      [port->txpos][0]),
 						   skb->len);

diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index de3bbf4..cdd45fb 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c

@@ -1322,10 +1322,6 @@
 
 static int sdla_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct frad_local *flp;
-
-	flp = netdev_priv(dev);
-
 	if (netif_running(dev))
 		return -EBUSY;
 

diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index 4a01e5c..4c41790 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c

@@ -1061,7 +1061,7 @@
 		goto error_bad_major;
 	}
 	result = 0;
-	if (minor < I2400M_HDIv_MINOR_2 && minor > I2400M_HDIv_MINOR)
+	if (minor > I2400M_HDIv_MINOR_2 || minor < I2400M_HDIv_MINOR)
 		dev_warn(dev, "untested minor fw version %u.%u.%u\n",
 			 major, minor, branch);
 	/* Yes, we ignore the branch -- we don't have to track it */

diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 9c34d2f..9c78090 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c

@@ -500,26 +500,23 @@
  */
 int i2400m_pre_reset(struct i2400m *i2400m)
 {
-	int result;
 	struct device *dev = i2400m_dev(i2400m);
 
 	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
 	d_printf(1, dev, "pre-reset shut down\n");
 
-	result = 0;
 	mutex_lock(&i2400m->init_mutex);
 	if (i2400m->updown) {
 		netif_tx_disable(i2400m->wimax_dev.net_dev);
 		__i2400m_dev_stop(i2400m);
-		result = 0;
 		/* down't set updown to zero -- this way
 		 * post_reset can restore properly */
 	}
 	mutex_unlock(&i2400m->init_mutex);
 	if (i2400m->bus_release)
 		i2400m->bus_release(i2400m);
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
+	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(i2400m_pre_reset);
 

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 99b3bfa..d48776e 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c

@@ -365,15 +365,15 @@
 static int at76_usbdfu_download(struct usb_device *udev, u8 *buf, u32 size,
 				int manifest_sync_timeout)
 {
-	u8 *block;
-	struct dfu_status dfu_stat_buf;
 	int ret = 0;
 	int need_dfu_state = 1;
 	int is_done = 0;
-	u8 dfu_state = 0;
 	u32 dfu_timeout = 0;
 	int bsize = 0;
 	int blockno = 0;
+	struct dfu_status *dfu_stat_buf = NULL;
+	u8 *dfu_state = NULL;
+	u8 *block = NULL;
 
 	at76_dbg(DBG_DFU, "%s( %p, %u, %d)", __func__, buf, size,
 		 manifest_sync_timeout);
@@ -383,13 +383,28 @@
 		return -EINVAL;
 	}
 
+	dfu_stat_buf = kmalloc(sizeof(struct dfu_status), GFP_KERNEL);
+	if (!dfu_stat_buf) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
 	block = kmalloc(FW_BLOCK_SIZE, GFP_KERNEL);
-	if (!block)
-		return -ENOMEM;
+	if (!block) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	dfu_state = kmalloc(sizeof(u8), GFP_KERNEL);
+	if (!dfu_state) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	*dfu_state = 0;
 
 	do {
 		if (need_dfu_state) {
-			ret = at76_dfu_get_state(udev, &dfu_state);
+			ret = at76_dfu_get_state(udev, dfu_state);
 			if (ret < 0) {
 				dev_err(&udev->dev,
 					"cannot get DFU state: %d\n", ret);
@@ -398,13 +413,13 @@
 			need_dfu_state = 0;
 		}
 
-		switch (dfu_state) {
+		switch (*dfu_state) {
 		case STATE_DFU_DOWNLOAD_SYNC:
 			at76_dbg(DBG_DFU, "STATE_DFU_DOWNLOAD_SYNC");
-			ret = at76_dfu_get_status(udev, &dfu_stat_buf);
+			ret = at76_dfu_get_status(udev, dfu_stat_buf);
 			if (ret >= 0) {
-				dfu_state = dfu_stat_buf.state;
-				dfu_timeout = at76_get_timeout(&dfu_stat_buf);
+				*dfu_state = dfu_stat_buf->state;
+				dfu_timeout = at76_get_timeout(dfu_stat_buf);
 				need_dfu_state = 0;
 			} else
 				dev_err(&udev->dev,
@@ -447,12 +462,12 @@
 		case STATE_DFU_MANIFEST_SYNC:
 			at76_dbg(DBG_DFU, "STATE_DFU_MANIFEST_SYNC");
 
-			ret = at76_dfu_get_status(udev, &dfu_stat_buf);
+			ret = at76_dfu_get_status(udev, dfu_stat_buf);
 			if (ret < 0)
 				break;
 
-			dfu_state = dfu_stat_buf.state;
-			dfu_timeout = at76_get_timeout(&dfu_stat_buf);
+			*dfu_state = dfu_stat_buf->state;
+			dfu_timeout = at76_get_timeout(dfu_stat_buf);
 			need_dfu_state = 0;
 
 			/* override the timeout from the status response,
@@ -484,14 +499,17 @@
 			break;
 
 		default:
-			at76_dbg(DBG_DFU, "DFU UNKNOWN STATE (%d)", dfu_state);
+			at76_dbg(DBG_DFU, "DFU UNKNOWN STATE (%d)", *dfu_state);
 			ret = -EINVAL;
 			break;
 		}
 	} while (!is_done && (ret >= 0));
 
 exit:
+	kfree(dfu_state);
 	kfree(block);
+	kfree(dfu_stat_buf);
+
 	if (ret >= 0)
 		ret = 0;
 
@@ -1277,6 +1295,7 @@
 			dev_err(&udev->dev,
 				"loading %dth firmware block failed: %d\n",
 				blockno, ret);
+			ret = -EIO;
 			goto exit;
 		}
 		buf += bsize;
@@ -1410,6 +1429,8 @@
 	/* remove BSSID from previous run */
 	memset(priv->bssid, 0, ETH_ALEN);
 
+	priv->scanning = false;
+
 	if (at76_set_radio(priv, 1) == 1)
 		at76_wait_completion(priv, CMD_RADIO_ON);
 
@@ -1483,6 +1504,52 @@
 	mutex_unlock(&priv->mtx);
 }
 
+/* This is a workaround to make scan working:
+ * currently mac80211 does not process frames with no frequency
+ * information.
+ * However during scan the HW performs a sweep by itself, and we
+ * are unable to know where the radio is actually tuned.
+ * This function tries to do its best to guess this information..
+ * During scan, If the current frame is a beacon or a probe response,
+ * the channel information is extracted from it.
+ * When not scanning, for other frames, or if it happens that for
+ * whatever reason we fail to parse beacons and probe responses, this
+ * function returns the priv->channel information, that should be correct
+ * at least when we are not scanning.
+ */
+static inline int at76_guess_freq(struct at76_priv *priv)
+{
+	size_t el_off;
+	const u8 *el;
+	int channel = priv->channel;
+	int len = priv->rx_skb->len;
+	struct ieee80211_hdr *hdr = (void *)priv->rx_skb->data;
+
+	if (!priv->scanning)
+		goto exit;
+
+	if (len < 24)
+		goto exit;
+
+	if (ieee80211_is_probe_resp(hdr->frame_control)) {
+		el_off = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
+		el = ((struct ieee80211_mgmt *)hdr)->u.probe_resp.variable;
+	} else if (ieee80211_is_beacon(hdr->frame_control)) {
+		el_off = offsetof(struct ieee80211_mgmt, u.beacon.variable);
+		el = ((struct ieee80211_mgmt *)hdr)->u.beacon.variable;
+	} else {
+		goto exit;
+	}
+	len -= el_off;
+
+	el = cfg80211_find_ie(WLAN_EID_DS_PARAMS, el, len);
+	if (el && el[1] > 0)
+		channel = el[2];
+
+exit:
+	return ieee80211_channel_to_frequency(channel, IEEE80211_BAND_2GHZ);
+}
+
 static void at76_rx_tasklet(unsigned long param)
 {
 	struct urb *urb = (struct urb *)param;
@@ -1523,6 +1590,8 @@
 	rx_status.signal = buf->rssi;
 	rx_status.flag |= RX_FLAG_DECRYPTED;
 	rx_status.flag |= RX_FLAG_IV_STRIPPED;
+	rx_status.band = IEEE80211_BAND_2GHZ;
+	rx_status.freq = at76_guess_freq(priv);
 
 	at76_dbg(DBG_MAC80211, "calling ieee80211_rx_irqsafe(): %d/%d",
 		 priv->rx_skb->len, priv->rx_skb->data_len);
@@ -1875,6 +1944,8 @@
 	if (is_valid_ether_addr(priv->bssid))
 		at76_join(priv);
 
+	priv->scanning = false;
+
 	mutex_unlock(&priv->mtx);
 
 	ieee80211_scan_completed(priv->hw, false);
@@ -1929,6 +2000,7 @@
 		goto exit;
 	}
 
+	priv->scanning = true;
 	ieee80211_queue_delayed_work(priv->hw, &priv->dwork_hw_scan,
 				     SCAN_POLL_INTERVAL);
 
@@ -2020,6 +2092,44 @@
 	ieee80211_queue_work(hw, &priv->work_set_promisc);
 }
 
+static int at76_set_wep(struct at76_priv *priv)
+{
+	int ret = 0;
+	struct mib_mac_wep *mib_data = &priv->mib_buf.data.wep_mib;
+
+	priv->mib_buf.type = MIB_MAC_WEP;
+	priv->mib_buf.size = sizeof(struct mib_mac_wep);
+	priv->mib_buf.index = 0;
+
+	memset(mib_data, 0, sizeof(*mib_data));
+
+	if (priv->wep_enabled) {
+		if (priv->wep_keys_len[priv->wep_key_id] > WEP_SMALL_KEY_LEN)
+			mib_data->encryption_level = 2;
+		else
+			mib_data->encryption_level = 1;
+
+		/* always exclude unencrypted if WEP is active */
+		mib_data->exclude_unencrypted = 1;
+	} else {
+		mib_data->exclude_unencrypted = 0;
+		mib_data->encryption_level = 0;
+	}
+
+	mib_data->privacy_invoked = priv->wep_enabled;
+	mib_data->wep_default_key_id = priv->wep_key_id;
+	memcpy(mib_data->wep_default_keyvalue, priv->wep_keys,
+	       sizeof(priv->wep_keys));
+
+	ret = at76_set_mib(priv, &priv->mib_buf);
+
+	if (ret < 0)
+		wiphy_err(priv->hw->wiphy,
+			  "set_mib (wep) failed: %d\n", ret);
+
+	return ret;
+}
+
 static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			struct ieee80211_vif *vif, struct ieee80211_sta *sta,
 			struct ieee80211_key_conf *key)
@@ -2062,7 +2172,7 @@
 			priv->wep_enabled = 1;
 	}
 
-	at76_startup_device(priv);
+	at76_set_wep(priv);
 
 	mutex_unlock(&priv->mtx);
 
@@ -2330,16 +2440,22 @@
 	struct usb_device *udev;
 	int op_mode;
 	int need_ext_fw = 0;
-	struct mib_fw_version fwv;
+	struct mib_fw_version *fwv = NULL;
 	int board_type = (int)id->driver_info;
 
 	udev = usb_get_dev(interface_to_usbdev(interface));
 
+	fwv = kmalloc(sizeof(*fwv), GFP_KERNEL);
+	if (!fwv) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
 	/* Load firmware into kernel memory */
 	fwe = at76_load_firmware(udev, board_type);
 	if (!fwe) {
 		ret = -ENOENT;
-		goto error;
+		goto exit;
 	}
 
 	op_mode = at76_get_op_mode(udev);
@@ -2353,7 +2469,7 @@
 		dev_err(&interface->dev,
 			"cannot handle a device in HW_CONFIG_MODE\n");
 		ret = -EBUSY;
-		goto error;
+		goto exit;
 	}
 
 	if (op_mode != OPMODE_NORMAL_NIC_WITH_FLASH
@@ -2366,10 +2482,10 @@
 			dev_err(&interface->dev,
 				"error %d downloading internal firmware\n",
 				ret);
-			goto error;
+			goto exit;
 		}
 		usb_put_dev(udev);
-		return ret;
+		goto exit;
 	}
 
 	/* Internal firmware already inside the device.  Get firmware
@@ -2382,8 +2498,8 @@
 	 * query the device for the fw version */
 	if ((fwe->fw_version.major > 0 || fwe->fw_version.minor >= 100)
 	    || (op_mode == OPMODE_NORMAL_NIC_WITH_FLASH)) {
-		ret = at76_get_mib(udev, MIB_FW_VERSION, &fwv, sizeof(fwv));
-		if (ret < 0 || (fwv.major | fwv.minor) == 0)
+		ret = at76_get_mib(udev, MIB_FW_VERSION, fwv, sizeof(*fwv));
+		if (ret < 0 || (fwv->major | fwv->minor) == 0)
 			need_ext_fw = 1;
 	} else
 		/* No way to check firmware version, reload to be sure */
@@ -2394,37 +2510,37 @@
 			   "downloading external firmware\n");
 
 		ret = at76_load_external_fw(udev, fwe);
-		if (ret)
-			goto error;
+		if (ret < 0)
+			goto exit;
 
 		/* Re-check firmware version */
-		ret = at76_get_mib(udev, MIB_FW_VERSION, &fwv, sizeof(fwv));
+		ret = at76_get_mib(udev, MIB_FW_VERSION, fwv, sizeof(*fwv));
 		if (ret < 0) {
 			dev_err(&interface->dev,
 				"error %d getting firmware version\n", ret);
-			goto error;
+			goto exit;
 		}
 	}
 
 	priv = at76_alloc_new_device(udev);
 	if (!priv) {
 		ret = -ENOMEM;
-		goto error;
+		goto exit;
 	}
 
 	usb_set_intfdata(interface, priv);
 
-	memcpy(&priv->fw_version, &fwv, sizeof(struct mib_fw_version));
+	memcpy(&priv->fw_version, fwv, sizeof(struct mib_fw_version));
 	priv->board_type = board_type;
 
 	ret = at76_init_new_device(priv, interface);
 	if (ret < 0)
 		at76_delete_device(priv);
 
-	return ret;
-
-error:
-	usb_put_dev(udev);
+exit:
+	kfree(fwv);
+	if (ret < 0)
+		usb_put_dev(udev);
 	return ret;
 }
 

diff --git a/drivers/net/wireless/at76c50x-usb.h b/drivers/net/wireless/at76c50x-usb.h
index f14a654..55090a3 100644
--- a/drivers/net/wireless/at76c50x-usb.h
+++ b/drivers/net/wireless/at76c50x-usb.h

@@ -219,18 +219,6 @@
 	u8 reserved;
 } __packed;
 
-struct set_mib_buffer {
-	u8 type;
-	u8 size;
-	u8 index;
-	u8 reserved;
-	union {
-		u8 byte;
-		__le16 word;
-		u8 addr[ETH_ALEN];
-	} data;
-} __packed;
-
 struct mib_local {
 	u16 reserved0;
 	u8 beacon_enable;
@@ -334,6 +322,19 @@
 	u8 channel_list[14];	/* 0 for invalid channels */
 } __packed;
 
+struct set_mib_buffer {
+	u8 type;
+	u8 size;
+	u8 index;
+	u8 reserved;
+	union {
+		u8 byte;
+		__le16 word;
+		u8 addr[ETH_ALEN];
+		struct mib_mac_wep wep_mib;
+	} data;
+} __packed;
+
 struct at76_fw_header {
 	__le32 crc;		/* CRC32 of the whole image */
 	__le32 board_type;	/* firmware compatibility code */
@@ -417,6 +418,7 @@
 	int scan_max_time;	/* scan max channel time */
 	int scan_mode;		/* SCAN_TYPE_ACTIVE, SCAN_TYPE_PASSIVE */
 	int scan_need_any;	/* if set, need to scan for any ESSID */
+	bool scanning;		/* if set, the scan is running */
 
 	u16 assoc_id;		/* current association ID, if associated */
 

diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 507d9a9..f920506 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c

@@ -1090,7 +1090,8 @@
 	return ret;
 }
 
-static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ar5523_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct ar5523 *ar = hw->priv;
 

diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c
index a1f0996..17d221a 100644
--- a/drivers/net/wireless/ath/ath10k/bmi.c
+++ b/drivers/net/wireless/ath/ath10k/bmi.c

@@ -175,7 +175,7 @@
 	return 0;
 }
 
-int ath10k_bmi_execute(struct ath10k *ar, u32 address, u32 *param)
+int ath10k_bmi_execute(struct ath10k *ar, u32 address, u32 param, u32 *result)
 {
 	struct bmi_cmd cmd;
 	union bmi_resp resp;
@@ -184,7 +184,7 @@
 	int ret;
 
 	ath10k_dbg(ATH10K_DBG_BMI, "bmi execute address 0x%x param 0x%x\n",
-		   address, *param);
+		   address, param);
 
 	if (ar->bmi.done_sent) {
 		ath10k_warn("command disallowed\n");
@@ -193,7 +193,7 @@
 
 	cmd.id            = __cpu_to_le32(BMI_EXECUTE);
 	cmd.execute.addr  = __cpu_to_le32(address);
-	cmd.execute.param = __cpu_to_le32(*param);
+	cmd.execute.param = __cpu_to_le32(param);
 
 	ret = ath10k_hif_exchange_bmi_msg(ar, &cmd, cmdlen, &resp, &resplen);
 	if (ret) {
@@ -204,10 +204,13 @@
 	if (resplen < sizeof(resp.execute)) {
 		ath10k_warn("invalid execute response length (%d)\n",
 			    resplen);
-		return ret;
+		return -EIO;
 	}
 
-	*param = __le32_to_cpu(resp.execute.result);
+	*result = __le32_to_cpu(resp.execute.result);
+
+	ath10k_dbg(ATH10K_DBG_BMI, "bmi execute result 0x%x\n", *result);
+
 	return 0;
 }
 

diff --git a/drivers/net/wireless/ath/ath10k/bmi.h b/drivers/net/wireless/ath/ath10k/bmi.h
index 8d81ce1..111ab70 100644
--- a/drivers/net/wireless/ath/ath10k/bmi.h
+++ b/drivers/net/wireless/ath/ath10k/bmi.h

@@ -201,7 +201,8 @@
 									\
 		addr = host_interest_item_address(HI_ITEM(item));	\
 		ret = ath10k_bmi_read_memory(ar, addr, (u8 *)&tmp, 4); \
-		*val = __le32_to_cpu(tmp);				\
+		if (!ret)						\
+			*val = __le32_to_cpu(tmp);			\
 		ret;							\
 	 })
 
@@ -217,7 +218,7 @@
 		ret;							\
 	})
 
-int ath10k_bmi_execute(struct ath10k *ar, u32 address, u32 *param);
+int ath10k_bmi_execute(struct ath10k *ar, u32 address, u32 param, u32 *result);
 int ath10k_bmi_lz_stream_start(struct ath10k *ar, u32 address);
 int ath10k_bmi_lz_data(struct ath10k *ar, const void *buffer, u32 length);
 int ath10k_bmi_fast_download(struct ath10k *ar, u32 address,

diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index a79499c..d185dc0 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c

@@ -329,6 +329,33 @@
 	return ret;
 }
 
+void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
+{
+	struct ath10k *ar = pipe->ar;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce_ring *src_ring = pipe->src_ring;
+	u32 ctrl_addr = pipe->ctrl_addr;
+
+	lockdep_assert_held(&ar_pci->ce_lock);
+
+	/*
+	 * This function must be called only if there is an incomplete
+	 * scatter-gather transfer (before index register is updated)
+	 * that needs to be cleaned up.
+	 */
+	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
+		return;
+
+	if (WARN_ON_ONCE(src_ring->write_index ==
+			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
+		return;
+
+	src_ring->write_index--;
+	src_ring->write_index &= src_ring->nentries_mask;
+
+	src_ring->per_transfer_context[src_ring->write_index] = NULL;
+}
+
 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
 		   void *per_transfer_context,
 		   u32 buffer,
@@ -840,35 +867,17 @@
 
 static int ath10k_ce_init_src_ring(struct ath10k *ar,
 				   unsigned int ce_id,
-				   struct ath10k_ce_pipe *ce_state,
 				   const struct ce_attr *attr)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_ring *src_ring;
-	unsigned int nentries = attr->src_nentries;
-	unsigned int ce_nbytes;
-	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
-	dma_addr_t base_addr;
-	char *ptr;
+	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
+	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
 
-	nentries = roundup_pow_of_two(nentries);
+	nentries = roundup_pow_of_two(attr->src_nentries);
 
-	if (ce_state->src_ring) {
-		WARN_ON(ce_state->src_ring->nentries != nentries);
-		return 0;
-	}
-
-	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
-	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
-	if (ptr == NULL)
-		return -ENOMEM;
-
-	ce_state->src_ring = (struct ath10k_ce_ring *)ptr;
-	src_ring = ce_state->src_ring;
-
-	ptr += sizeof(struct ath10k_ce_ring);
-	src_ring->nentries = nentries;
-	src_ring->nentries_mask = nentries - 1;
+	memset(src_ring->per_transfer_context, 0,
+	       nentries * sizeof(*src_ring->per_transfer_context));
 
 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
 	src_ring->sw_index &= src_ring->nentries_mask;
@@ -878,21 +887,87 @@
 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
 	src_ring->write_index &= src_ring->nentries_mask;
 
-	src_ring->per_transfer_context = (void **)ptr;
+	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
+					 src_ring->base_addr_ce_space);
+	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
+	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
+	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
+	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
+	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
+
+	ath10k_dbg(ATH10K_DBG_BOOT,
+		   "boot init ce src ring id %d entries %d base_addr %p\n",
+		   ce_id, nentries, src_ring->base_addr_owner_space);
+
+	return 0;
+}
+
+static int ath10k_ce_init_dest_ring(struct ath10k *ar,
+				    unsigned int ce_id,
+				    const struct ce_attr *attr)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
+	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
+
+	nentries = roundup_pow_of_two(attr->dest_nentries);
+
+	memset(dest_ring->per_transfer_context, 0,
+	       nentries * sizeof(*dest_ring->per_transfer_context));
+
+	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
+	dest_ring->sw_index &= dest_ring->nentries_mask;
+	dest_ring->write_index =
+		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
+	dest_ring->write_index &= dest_ring->nentries_mask;
+
+	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
+					  dest_ring->base_addr_ce_space);
+	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
+	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
+	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
+	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
+
+	ath10k_dbg(ATH10K_DBG_BOOT,
+		   "boot ce dest ring id %d entries %d base_addr %p\n",
+		   ce_id, nentries, dest_ring->base_addr_owner_space);
+
+	return 0;
+}
+
+static struct ath10k_ce_ring *
+ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
+			 const struct ce_attr *attr)
+{
+	struct ath10k_ce_ring *src_ring;
+	u32 nentries = attr->src_nentries;
+	dma_addr_t base_addr;
+
+	nentries = roundup_pow_of_two(nentries);
+
+	src_ring = kzalloc(sizeof(*src_ring) +
+			   (nentries *
+			    sizeof(*src_ring->per_transfer_context)),
+			   GFP_KERNEL);
+	if (src_ring == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	src_ring->nentries = nentries;
+	src_ring->nentries_mask = nentries - 1;
 
 	/*
 	 * Legacy platforms that do not support cache
 	 * coherent DMA are unsupported
 	 */
 	src_ring->base_addr_owner_space_unaligned =
-		pci_alloc_consistent(ar_pci->pdev,
-				     (nentries * sizeof(struct ce_desc) +
-				      CE_DESC_RING_ALIGN),
-				     &base_addr);
+		dma_alloc_coherent(ar->dev,
+				   (nentries * sizeof(struct ce_desc) +
+				    CE_DESC_RING_ALIGN),
+				   &base_addr, GFP_KERNEL);
 	if (!src_ring->base_addr_owner_space_unaligned) {
-		kfree(ce_state->src_ring);
-		ce_state->src_ring = NULL;
-		return -ENOMEM;
+		kfree(src_ring);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	src_ring->base_addr_ce_space_unaligned = base_addr;
@@ -912,88 +987,54 @@
 		kmalloc((nentries * sizeof(struct ce_desc) +
 			 CE_DESC_RING_ALIGN), GFP_KERNEL);
 	if (!src_ring->shadow_base_unaligned) {
-		pci_free_consistent(ar_pci->pdev,
-				    (nentries * sizeof(struct ce_desc) +
-				     CE_DESC_RING_ALIGN),
-				    src_ring->base_addr_owner_space,
-				    src_ring->base_addr_ce_space);
-		kfree(ce_state->src_ring);
-		ce_state->src_ring = NULL;
-		return -ENOMEM;
+		dma_free_coherent(ar->dev,
+				  (nentries * sizeof(struct ce_desc) +
+				   CE_DESC_RING_ALIGN),
+				  src_ring->base_addr_owner_space,
+				  src_ring->base_addr_ce_space);
+		kfree(src_ring);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	src_ring->shadow_base = PTR_ALIGN(
 			src_ring->shadow_base_unaligned,
 			CE_DESC_RING_ALIGN);
 
-	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
-					 src_ring->base_addr_ce_space);
-	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
-	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
-	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
-	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
-	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
-
-	ath10k_dbg(ATH10K_DBG_BOOT,
-		   "boot ce src ring id %d entries %d base_addr %p\n",
-		   ce_id, nentries, src_ring->base_addr_owner_space);
-
-	return 0;
+	return src_ring;
 }
 
-static int ath10k_ce_init_dest_ring(struct ath10k *ar,
-				    unsigned int ce_id,
-				    struct ath10k_ce_pipe *ce_state,
-				    const struct ce_attr *attr)
+static struct ath10k_ce_ring *
+ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
+			  const struct ce_attr *attr)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	struct ath10k_ce_ring *dest_ring;
-	unsigned int nentries = attr->dest_nentries;
-	unsigned int ce_nbytes;
-	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
+	u32 nentries;
 	dma_addr_t base_addr;
-	char *ptr;
 
-	nentries = roundup_pow_of_two(nentries);
+	nentries = roundup_pow_of_two(attr->dest_nentries);
 
-	if (ce_state->dest_ring) {
-		WARN_ON(ce_state->dest_ring->nentries != nentries);
-		return 0;
-	}
+	dest_ring = kzalloc(sizeof(*dest_ring) +
+			    (nentries *
+			     sizeof(*dest_ring->per_transfer_context)),
+			    GFP_KERNEL);
+	if (dest_ring == NULL)
+		return ERR_PTR(-ENOMEM);
 
-	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
-	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
-	if (ptr == NULL)
-		return -ENOMEM;
-
-	ce_state->dest_ring = (struct ath10k_ce_ring *)ptr;
-	dest_ring = ce_state->dest_ring;
-
-	ptr += sizeof(struct ath10k_ce_ring);
 	dest_ring->nentries = nentries;
 	dest_ring->nentries_mask = nentries - 1;
 
-	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
-	dest_ring->sw_index &= dest_ring->nentries_mask;
-	dest_ring->write_index =
-		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
-	dest_ring->write_index &= dest_ring->nentries_mask;
-
-	dest_ring->per_transfer_context = (void **)ptr;
-
 	/*
 	 * Legacy platforms that do not support cache
 	 * coherent DMA are unsupported
 	 */
 	dest_ring->base_addr_owner_space_unaligned =
-		pci_alloc_consistent(ar_pci->pdev,
-				     (nentries * sizeof(struct ce_desc) +
-				      CE_DESC_RING_ALIGN),
-				     &base_addr);
+		dma_alloc_coherent(ar->dev,
+				   (nentries * sizeof(struct ce_desc) +
+				    CE_DESC_RING_ALIGN),
+				   &base_addr, GFP_KERNEL);
 	if (!dest_ring->base_addr_owner_space_unaligned) {
-		kfree(ce_state->dest_ring);
-		ce_state->dest_ring = NULL;
-		return -ENOMEM;
+		kfree(dest_ring);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	dest_ring->base_addr_ce_space_unaligned = base_addr;
@@ -1012,39 +1053,7 @@
 			dest_ring->base_addr_ce_space_unaligned,
 			CE_DESC_RING_ALIGN);
 
-	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
-					  dest_ring->base_addr_ce_space);
-	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
-	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
-	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
-	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
-
-	ath10k_dbg(ATH10K_DBG_BOOT,
-		   "boot ce dest ring id %d entries %d base_addr %p\n",
-		   ce_id, nentries, dest_ring->base_addr_owner_space);
-
-	return 0;
-}
-
-static struct ath10k_ce_pipe *ath10k_ce_init_state(struct ath10k *ar,
-					     unsigned int ce_id,
-					     const struct ce_attr *attr)
-{
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
-	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
-
-	spin_lock_bh(&ar_pci->ce_lock);
-
-	ce_state->ar = ar;
-	ce_state->id = ce_id;
-	ce_state->ctrl_addr = ctrl_addr;
-	ce_state->attr_flags = attr->flags;
-	ce_state->src_sz_max = attr->src_sz_max;
-
-	spin_unlock_bh(&ar_pci->ce_lock);
-
-	return ce_state;
+	return dest_ring;
 }
 
 /*
@@ -1054,11 +1063,11 @@
  * initialization. It may be that only one side or the other is
  * initialized by software/firmware.
  */
-struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
-				unsigned int ce_id,
-				const struct ce_attr *attr)
+int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
+			const struct ce_attr *attr)
 {
-	struct ath10k_ce_pipe *ce_state;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
 	int ret;
 
 	/*
@@ -1074,64 +1083,128 @@
 
 	ret = ath10k_pci_wake(ar);
 	if (ret)
-		return NULL;
+		return ret;
 
-	ce_state = ath10k_ce_init_state(ar, ce_id, attr);
-	if (!ce_state) {
-		ath10k_err("Failed to initialize CE state for ID: %d\n", ce_id);
-		goto out;
-	}
+	spin_lock_bh(&ar_pci->ce_lock);
+	ce_state->ar = ar;
+	ce_state->id = ce_id;
+	ce_state->ctrl_addr = ath10k_ce_base_address(ce_id);
+	ce_state->attr_flags = attr->flags;
+	ce_state->src_sz_max = attr->src_sz_max;
+	spin_unlock_bh(&ar_pci->ce_lock);
 
 	if (attr->src_nentries) {
-		ret = ath10k_ce_init_src_ring(ar, ce_id, ce_state, attr);
+		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
 		if (ret) {
 			ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n",
 				   ce_id, ret);
-			ath10k_ce_deinit(ce_state);
-			ce_state = NULL;
 			goto out;
 		}
 	}
 
 	if (attr->dest_nentries) {
-		ret = ath10k_ce_init_dest_ring(ar, ce_id, ce_state, attr);
+		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
 		if (ret) {
 			ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n",
 				   ce_id, ret);
-			ath10k_ce_deinit(ce_state);
-			ce_state = NULL;
 			goto out;
 		}
 	}
 
 out:
 	ath10k_pci_sleep(ar);
-	return ce_state;
+	return ret;
 }
 
-void ath10k_ce_deinit(struct ath10k_ce_pipe *ce_state)
+static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
 {
-	struct ath10k *ar = ce_state->ar;
+	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
+
+	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0);
+	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
+	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
+	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
+}
+
+static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
+{
+	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
+
+	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0);
+	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
+	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
+}
+
+void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
+{
+	int ret;
+
+	ret = ath10k_pci_wake(ar);
+	if (ret)
+		return;
+
+	ath10k_ce_deinit_src_ring(ar, ce_id);
+	ath10k_ce_deinit_dest_ring(ar, ce_id);
+
+	ath10k_pci_sleep(ar);
+}
+
+int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
+			 const struct ce_attr *attr)
+{
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	int ret;
+
+	if (attr->src_nentries) {
+		ce_state->src_ring = ath10k_ce_alloc_src_ring(ar, ce_id, attr);
+		if (IS_ERR(ce_state->src_ring)) {
+			ret = PTR_ERR(ce_state->src_ring);
+			ath10k_err("failed to allocate copy engine source ring %d: %d\n",
+				   ce_id, ret);
+			ce_state->src_ring = NULL;
+			return ret;
+		}
+	}
+
+	if (attr->dest_nentries) {
+		ce_state->dest_ring = ath10k_ce_alloc_dest_ring(ar, ce_id,
+								attr);
+		if (IS_ERR(ce_state->dest_ring)) {
+			ret = PTR_ERR(ce_state->dest_ring);
+			ath10k_err("failed to allocate copy engine destination ring %d: %d\n",
+				   ce_id, ret);
+			ce_state->dest_ring = NULL;
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
 
 	if (ce_state->src_ring) {
 		kfree(ce_state->src_ring->shadow_base_unaligned);
-		pci_free_consistent(ar_pci->pdev,
-				    (ce_state->src_ring->nentries *
-				     sizeof(struct ce_desc) +
-				     CE_DESC_RING_ALIGN),
-				    ce_state->src_ring->base_addr_owner_space,
-				    ce_state->src_ring->base_addr_ce_space);
+		dma_free_coherent(ar->dev,
+				  (ce_state->src_ring->nentries *
+				   sizeof(struct ce_desc) +
+				   CE_DESC_RING_ALIGN),
+				  ce_state->src_ring->base_addr_owner_space,
+				  ce_state->src_ring->base_addr_ce_space);
 		kfree(ce_state->src_ring);
 	}
 
 	if (ce_state->dest_ring) {
-		pci_free_consistent(ar_pci->pdev,
-				    (ce_state->dest_ring->nentries *
-				     sizeof(struct ce_desc) +
-				     CE_DESC_RING_ALIGN),
-				    ce_state->dest_ring->base_addr_owner_space,
-				    ce_state->dest_ring->base_addr_ce_space);
+		dma_free_coherent(ar->dev,
+				  (ce_state->dest_ring->nentries *
+				   sizeof(struct ce_desc) +
+				   CE_DESC_RING_ALIGN),
+				  ce_state->dest_ring->base_addr_owner_space,
+				  ce_state->dest_ring->base_addr_ce_space);
 		kfree(ce_state->dest_ring);
 	}
 

diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h
index 8eb7f99..7a5a36f 100644
--- a/drivers/net/wireless/ath/ath10k/ce.h
+++ b/drivers/net/wireless/ath/ath10k/ce.h

@@ -104,7 +104,8 @@
 	void *shadow_base_unaligned;
 	struct ce_desc *shadow_base;
 
-	void **per_transfer_context;
+	/* keep last */
+	void *per_transfer_context[0];
 };
 
 struct ath10k_ce_pipe {
@@ -159,6 +160,8 @@
 			  unsigned int transfer_id,
 			  unsigned int flags);
 
+void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe);
+
 void ath10k_ce_send_cb_register(struct ath10k_ce_pipe *ce_state,
 				void (*send_cb)(struct ath10k_ce_pipe *),
 				int disable_interrupts);
@@ -210,10 +213,12 @@
 
 /*==================CE Engine Initialization=======================*/
 
-/* Initialize an instance of a CE */
-struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
-				unsigned int ce_id,
-				const struct ce_attr *attr);
+int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
+			const struct ce_attr *attr);
+void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id);
+int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
+			  const struct ce_attr *attr);
+void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id);
 
 /*==================CE Engine Shutdown=======================*/
 /*
@@ -236,8 +241,6 @@
 			       unsigned int *nbytesp,
 			       unsigned int *transfer_idp);
 
-void ath10k_ce_deinit(struct ath10k_ce_pipe *ce_state);
-
 /*==================CE Interrupt Handlers====================*/
 void ath10k_ce_per_engine_service_any(struct ath10k *ar);
 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id);

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index ebc5fc2..82017f5 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c

@@ -58,36 +58,6 @@
 	complete(&ar->target_suspend);
 }
 
-static int ath10k_init_connect_htc(struct ath10k *ar)
-{
-	int status;
-
-	status = ath10k_wmi_connect_htc_service(ar);
-	if (status)
-		goto conn_fail;
-
-	/* Start HTC */
-	status = ath10k_htc_start(&ar->htc);
-	if (status)
-		goto conn_fail;
-
-	/* Wait for WMI event to be ready */
-	status = ath10k_wmi_wait_for_service_ready(ar);
-	if (status <= 0) {
-		ath10k_warn("wmi service ready event not received");
-		status = -ETIMEDOUT;
-		goto timeout;
-	}
-
-	ath10k_dbg(ATH10K_DBG_BOOT, "boot wmi ready\n");
-	return 0;
-
-timeout:
-	ath10k_htc_stop(&ar->htc);
-conn_fail:
-	return status;
-}
-
 static int ath10k_init_configure_target(struct ath10k *ar)
 {
 	u32 param_host;
@@ -249,30 +219,40 @@
 
 static int ath10k_download_and_run_otp(struct ath10k *ar)
 {
-	u32 address = ar->hw_params.patch_load_addr;
-	u32 exec_param;
+	u32 result, address = ar->hw_params.patch_load_addr;
 	int ret;
 
 	/* OTP is optional */
 
-	if (!ar->otp_data || !ar->otp_len)
+	if (!ar->otp_data || !ar->otp_len) {
+		ath10k_warn("Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n",
+			    ar->otp_data, ar->otp_len);
 		return 0;
+	}
+
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd\n",
+		   address, ar->otp_len);
 
 	ret = ath10k_bmi_fast_download(ar, address, ar->otp_data, ar->otp_len);
 	if (ret) {
 		ath10k_err("could not write otp (%d)\n", ret);
-		goto exit;
+		return ret;
 	}
 
-	exec_param = 0;
-	ret = ath10k_bmi_execute(ar, address, &exec_param);
+	ret = ath10k_bmi_execute(ar, address, 0, &result);
 	if (ret) {
 		ath10k_err("could not execute otp (%d)\n", ret);
-		goto exit;
+		return ret;
 	}
 
-exit:
-	return ret;
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot otp execute result %d\n", result);
+
+	if (result != 0) {
+		ath10k_err("otp calibration failed: %d", result);
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int ath10k_download_fw(struct ath10k *ar)
@@ -389,8 +369,8 @@
 	/* first fetch the firmware file (firmware-*.bin) */
 	ar->firmware = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, name);
 	if (IS_ERR(ar->firmware)) {
-		ath10k_err("Could not fetch firmware file '%s': %ld\n",
-			   name, PTR_ERR(ar->firmware));
+		ath10k_err("could not fetch firmware file '%s/%s': %ld\n",
+			   ar->hw_params.fw.dir, name, PTR_ERR(ar->firmware));
 		return PTR_ERR(ar->firmware);
 	}
 
@@ -401,14 +381,14 @@
 	magic_len = strlen(ATH10K_FIRMWARE_MAGIC) + 1;
 
 	if (len < magic_len) {
-		ath10k_err("firmware image too small to contain magic: %zu\n",
-			   len);
+		ath10k_err("firmware file '%s/%s' too small to contain magic: %zu\n",
+			   ar->hw_params.fw.dir, name, len);
 		ret = -EINVAL;
 		goto err;
 	}
 
 	if (memcmp(data, ATH10K_FIRMWARE_MAGIC, magic_len) != 0) {
-		ath10k_err("Invalid firmware magic\n");
+		ath10k_err("invalid firmware magic\n");
 		ret = -EINVAL;
 		goto err;
 	}
@@ -430,7 +410,7 @@
 		data += sizeof(*hdr);
 
 		if (len < ie_len) {
-			ath10k_err("Invalid length for FW IE %d (%zu < %zu)\n",
+			ath10k_err("invalid length for FW IE %d (%zu < %zu)\n",
 				   ie_id, len, ie_len);
 			ret = -EINVAL;
 			goto err;
@@ -513,8 +493,8 @@
 	}
 
 	if (!ar->firmware_data || !ar->firmware_len) {
-		ath10k_warn("No ATH10K_FW_IE_FW_IMAGE found from %s, skipping\n",
-			    name);
+		ath10k_warn("No ATH10K_FW_IE_FW_IMAGE found from '%s/%s', skipping\n",
+			    ar->hw_params.fw.dir, name);
 		ret = -ENOMEDIUM;
 		goto err;
 	}
@@ -531,7 +511,9 @@
 					 ar->hw_params.fw.board);
 	if (IS_ERR(ar->board)) {
 		ret = PTR_ERR(ar->board);
-		ath10k_err("could not fetch board data (%d)\n", ret);
+		ath10k_err("could not fetch board data '%s/%s' (%d)\n",
+			   ar->hw_params.fw.dir, ar->hw_params.fw.board,
+			   ret);
 		goto err;
 	}
 
@@ -549,19 +531,21 @@
 {
 	int ret;
 
+	ar->fw_api = 2;
+	ath10k_dbg(ATH10K_DBG_BOOT, "trying fw api %d\n", ar->fw_api);
+
 	ret = ath10k_core_fetch_firmware_api_n(ar, ATH10K_FW_API2_FILE);
-	if (ret == 0) {
-		ar->fw_api = 2;
-		goto out;
-	}
+	if (ret == 0)
+		goto success;
+
+	ar->fw_api = 1;
+	ath10k_dbg(ATH10K_DBG_BOOT, "trying fw api %d\n", ar->fw_api);
 
 	ret = ath10k_core_fetch_firmware_api_1(ar);
 	if (ret)
 		return ret;
 
-	ar->fw_api = 1;
-
-out:
+success:
 	ath10k_dbg(ATH10K_DBG_BOOT, "using fw api %d\n", ar->fw_api);
 
 	return 0;
@@ -572,16 +556,22 @@
 	int ret;
 
 	ret = ath10k_download_board_data(ar);
-	if (ret)
+	if (ret) {
+		ath10k_err("failed to download board data: %d\n", ret);
 		return ret;
+	}
 
 	ret = ath10k_download_and_run_otp(ar);
-	if (ret)
+	if (ret) {
+		ath10k_err("failed to run otp: %d\n", ret);
 		return ret;
+	}
 
 	ret = ath10k_download_fw(ar);
-	if (ret)
+	if (ret) {
+		ath10k_err("failed to download firmware: %d\n", ret);
 		return ret;
+	}
 
 	return ret;
 }
@@ -660,8 +650,9 @@
 
 	switch (ar->state) {
 	case ATH10K_STATE_ON:
-		ath10k_halt(ar);
 		ar->state = ATH10K_STATE_RESTARTING;
+		del_timer_sync(&ar->scan.timeout);
+		ath10k_reset_scan((unsigned long)ar);
 		ieee80211_restart_hw(ar->hw);
 		break;
 	case ATH10K_STATE_OFF:
@@ -670,6 +661,8 @@
 		ath10k_warn("cannot restart a device that hasn't been started\n");
 		break;
 	case ATH10K_STATE_RESTARTING:
+		/* hw restart might be requested from multiple places */
+		break;
 	case ATH10K_STATE_RESTARTED:
 		ar->state = ATH10K_STATE_WEDGED;
 		/* fall through */
@@ -681,70 +674,6 @@
 	mutex_unlock(&ar->conf_mutex);
 }
 
-struct ath10k *ath10k_core_create(void *hif_priv, struct device *dev,
-				  const struct ath10k_hif_ops *hif_ops)
-{
-	struct ath10k *ar;
-
-	ar = ath10k_mac_create();
-	if (!ar)
-		return NULL;
-
-	ar->ath_common.priv = ar;
-	ar->ath_common.hw = ar->hw;
-
-	ar->p2p = !!ath10k_p2p;
-	ar->dev = dev;
-
-	ar->hif.priv = hif_priv;
-	ar->hif.ops = hif_ops;
-
-	init_completion(&ar->scan.started);
-	init_completion(&ar->scan.completed);
-	init_completion(&ar->scan.on_channel);
-	init_completion(&ar->target_suspend);
-
-	init_completion(&ar->install_key_done);
-	init_completion(&ar->vdev_setup_done);
-
-	setup_timer(&ar->scan.timeout, ath10k_reset_scan, (unsigned long)ar);
-
-	ar->workqueue = create_singlethread_workqueue("ath10k_wq");
-	if (!ar->workqueue)
-		goto err_wq;
-
-	mutex_init(&ar->conf_mutex);
-	spin_lock_init(&ar->data_lock);
-
-	INIT_LIST_HEAD(&ar->peers);
-	init_waitqueue_head(&ar->peer_mapping_wq);
-
-	init_completion(&ar->offchan_tx_completed);
-	INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work);
-	skb_queue_head_init(&ar->offchan_tx_queue);
-
-	INIT_WORK(&ar->wmi_mgmt_tx_work, ath10k_mgmt_over_wmi_tx_work);
-	skb_queue_head_init(&ar->wmi_mgmt_tx_queue);
-
-	INIT_WORK(&ar->restart_work, ath10k_core_restart);
-
-	return ar;
-
-err_wq:
-	ath10k_mac_destroy(ar);
-	return NULL;
-}
-EXPORT_SYMBOL(ath10k_core_create);
-
-void ath10k_core_destroy(struct ath10k *ar)
-{
-	flush_workqueue(ar->workqueue);
-	destroy_workqueue(ar->workqueue);
-
-	ath10k_mac_destroy(ar);
-}
-EXPORT_SYMBOL(ath10k_core_destroy);
-
 int ath10k_core_start(struct ath10k *ar)
 {
 	int status;
@@ -785,10 +714,28 @@
 		goto err;
 	}
 
+	status = ath10k_htt_init(ar);
+	if (status) {
+		ath10k_err("failed to init htt: %d\n", status);
+		goto err_wmi_detach;
+	}
+
+	status = ath10k_htt_tx_alloc(&ar->htt);
+	if (status) {
+		ath10k_err("failed to alloc htt tx: %d\n", status);
+		goto err_wmi_detach;
+	}
+
+	status = ath10k_htt_rx_alloc(&ar->htt);
+	if (status) {
+		ath10k_err("failed to alloc htt rx: %d\n", status);
+		goto err_htt_tx_detach;
+	}
+
 	status = ath10k_hif_start(ar);
 	if (status) {
 		ath10k_err("could not start HIF: %d\n", status);
-		goto err_wmi_detach;
+		goto err_htt_rx_detach;
 	}
 
 	status = ath10k_htc_wait_target(&ar->htc);
@@ -797,15 +744,30 @@
 		goto err_hif_stop;
 	}
 
-	status = ath10k_htt_attach(ar);
+	status = ath10k_htt_connect(&ar->htt);
 	if (status) {
-		ath10k_err("could not attach htt (%d)\n", status);
+		ath10k_err("failed to connect htt (%d)\n", status);
 		goto err_hif_stop;
 	}
 
-	status = ath10k_init_connect_htc(ar);
-	if (status)
-		goto err_htt_detach;
+	status = ath10k_wmi_connect(ar);
+	if (status) {
+		ath10k_err("could not connect wmi: %d\n", status);
+		goto err_hif_stop;
+	}
+
+	status = ath10k_htc_start(&ar->htc);
+	if (status) {
+		ath10k_err("failed to start htc: %d\n", status);
+		goto err_hif_stop;
+	}
+
+	status = ath10k_wmi_wait_for_service_ready(ar);
+	if (status <= 0) {
+		ath10k_warn("wmi service ready event not received");
+		status = -ETIMEDOUT;
+		goto err_htc_stop;
+	}
 
 	ath10k_dbg(ATH10K_DBG_BOOT, "firmware %s booted\n",
 		   ar->hw->wiphy->fw_version);
@@ -813,31 +775,36 @@
 	status = ath10k_wmi_cmd_init(ar);
 	if (status) {
 		ath10k_err("could not send WMI init command (%d)\n", status);
-		goto err_disconnect_htc;
+		goto err_htc_stop;
 	}
 
 	status = ath10k_wmi_wait_for_unified_ready(ar);
 	if (status <= 0) {
 		ath10k_err("wmi unified ready event not received\n");
 		status = -ETIMEDOUT;
-		goto err_disconnect_htc;
+		goto err_htc_stop;
 	}
 
-	status = ath10k_htt_attach_target(&ar->htt);
-	if (status)
-		goto err_disconnect_htc;
+	status = ath10k_htt_setup(&ar->htt);
+	if (status) {
+		ath10k_err("failed to setup htt: %d\n", status);
+		goto err_htc_stop;
+	}
 
 	status = ath10k_debug_start(ar);
 	if (status)
-		goto err_disconnect_htc;
+		goto err_htc_stop;
 
 	ar->free_vdev_map = (1 << TARGET_NUM_VDEVS) - 1;
 	INIT_LIST_HEAD(&ar->arvifs);
 
 	if (!test_bit(ATH10K_FLAG_FIRST_BOOT_DONE, &ar->dev_flags))
-		ath10k_info("%s (0x%x) fw %s api %d htt %d.%d\n",
-			    ar->hw_params.name, ar->target_version,
-			    ar->hw->wiphy->fw_version, ar->fw_api,
+		ath10k_info("%s (0x%08x, 0x%08x) fw %s api %d htt %d.%d\n",
+			    ar->hw_params.name,
+			    ar->target_version,
+			    ar->chip_id,
+			    ar->hw->wiphy->fw_version,
+			    ar->fw_api,
 			    ar->htt.target_version_major,
 			    ar->htt.target_version_minor);
 
@@ -845,12 +812,14 @@
 
 	return 0;
 
-err_disconnect_htc:
+err_htc_stop:
 	ath10k_htc_stop(&ar->htc);
-err_htt_detach:
-	ath10k_htt_detach(&ar->htt);
 err_hif_stop:
 	ath10k_hif_stop(ar);
+err_htt_rx_detach:
+	ath10k_htt_rx_free(&ar->htt);
+err_htt_tx_detach:
+	ath10k_htt_tx_free(&ar->htt);
 err_wmi_detach:
 	ath10k_wmi_detach(ar);
 err:
@@ -885,10 +854,14 @@
 	lockdep_assert_held(&ar->conf_mutex);
 
 	/* try to suspend target */
-	ath10k_wait_for_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR);
+	if (ar->state != ATH10K_STATE_RESTARTING)
+		ath10k_wait_for_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR);
+
 	ath10k_debug_stop(ar);
 	ath10k_htc_stop(&ar->htc);
-	ath10k_htt_detach(&ar->htt);
+	ath10k_hif_stop(ar);
+	ath10k_htt_tx_free(&ar->htt);
+	ath10k_htt_rx_free(&ar->htt);
 	ath10k_wmi_detach(ar);
 }
 EXPORT_SYMBOL(ath10k_core_stop);
@@ -980,22 +953,15 @@
 	return 0;
 }
 
-int ath10k_core_register(struct ath10k *ar, u32 chip_id)
+static void ath10k_core_register_work(struct work_struct *work)
 {
+	struct ath10k *ar = container_of(work, struct ath10k, register_work);
 	int status;
 
-	ar->chip_id = chip_id;
-
-	status = ath10k_core_check_chip_id(ar);
-	if (status) {
-		ath10k_err("Unsupported chip id 0x%08x\n", ar->chip_id);
-		return status;
-	}
-
 	status = ath10k_core_probe_fw(ar);
 	if (status) {
 		ath10k_err("could not probe fw (%d)\n", status);
-		return status;
+		goto err;
 	}
 
 	status = ath10k_mac_register(ar);
@@ -1010,18 +976,43 @@
 		goto err_unregister_mac;
 	}
 
-	return 0;
+	set_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags);
+	return;
 
 err_unregister_mac:
 	ath10k_mac_unregister(ar);
 err_release_fw:
 	ath10k_core_free_firmware_files(ar);
-	return status;
+err:
+	device_release_driver(ar->dev);
+	return;
+}
+
+int ath10k_core_register(struct ath10k *ar, u32 chip_id)
+{
+	int status;
+
+	ar->chip_id = chip_id;
+
+	status = ath10k_core_check_chip_id(ar);
+	if (status) {
+		ath10k_err("Unsupported chip id 0x%08x\n", ar->chip_id);
+		return status;
+	}
+
+	queue_work(ar->workqueue, &ar->register_work);
+
+	return 0;
 }
 EXPORT_SYMBOL(ath10k_core_register);
 
 void ath10k_core_unregister(struct ath10k *ar)
 {
+	cancel_work_sync(&ar->register_work);
+
+	if (!test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags))
+		return;
+
 	/* We must unregister from mac80211 before we stop HTC and HIF.
 	 * Otherwise we will fail to submit commands to FW and mac80211 will be
 	 * unhappy about callback failures. */
@@ -1033,6 +1024,71 @@
 }
 EXPORT_SYMBOL(ath10k_core_unregister);
 
+struct ath10k *ath10k_core_create(void *hif_priv, struct device *dev,
+				  const struct ath10k_hif_ops *hif_ops)
+{
+	struct ath10k *ar;
+
+	ar = ath10k_mac_create();
+	if (!ar)
+		return NULL;
+
+	ar->ath_common.priv = ar;
+	ar->ath_common.hw = ar->hw;
+
+	ar->p2p = !!ath10k_p2p;
+	ar->dev = dev;
+
+	ar->hif.priv = hif_priv;
+	ar->hif.ops = hif_ops;
+
+	init_completion(&ar->scan.started);
+	init_completion(&ar->scan.completed);
+	init_completion(&ar->scan.on_channel);
+	init_completion(&ar->target_suspend);
+
+	init_completion(&ar->install_key_done);
+	init_completion(&ar->vdev_setup_done);
+
+	setup_timer(&ar->scan.timeout, ath10k_reset_scan, (unsigned long)ar);
+
+	ar->workqueue = create_singlethread_workqueue("ath10k_wq");
+	if (!ar->workqueue)
+		goto err_wq;
+
+	mutex_init(&ar->conf_mutex);
+	spin_lock_init(&ar->data_lock);
+
+	INIT_LIST_HEAD(&ar->peers);
+	init_waitqueue_head(&ar->peer_mapping_wq);
+
+	init_completion(&ar->offchan_tx_completed);
+	INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work);
+	skb_queue_head_init(&ar->offchan_tx_queue);
+
+	INIT_WORK(&ar->wmi_mgmt_tx_work, ath10k_mgmt_over_wmi_tx_work);
+	skb_queue_head_init(&ar->wmi_mgmt_tx_queue);
+
+	INIT_WORK(&ar->register_work, ath10k_core_register_work);
+	INIT_WORK(&ar->restart_work, ath10k_core_restart);
+
+	return ar;
+
+err_wq:
+	ath10k_mac_destroy(ar);
+	return NULL;
+}
+EXPORT_SYMBOL(ath10k_core_create);
+
+void ath10k_core_destroy(struct ath10k *ar)
+{
+	flush_workqueue(ar->workqueue);
+	destroy_workqueue(ar->workqueue);
+
+	ath10k_mac_destroy(ar);
+}
+EXPORT_SYMBOL(ath10k_core_destroy);
+
 MODULE_AUTHOR("Qualcomm Atheros");
 MODULE_DESCRIPTION("Core module for QCA988X PCIe devices.");
 MODULE_LICENSE("Dual BSD/GPL");

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 0e71979..68ceef6 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h

@@ -119,6 +119,7 @@
 	u8 peer_macaddr[ETH_ALEN];
 	u32 peer_rssi;
 	u32 peer_tx_rate;
+	u32 peer_rx_rate; /* 10x only */
 };
 
 struct ath10k_target_stats {
@@ -130,6 +131,12 @@
 	u32 cycle_count;
 	u32 phy_err_count;
 	u32 chan_tx_power;
+	u32 ack_rx_bad;
+	u32 rts_bad;
+	u32 rts_good;
+	u32 fcs_bad;
+	u32 no_beacons;
+	u32 mib_int_count;
 
 	/* PDEV TX stats */
 	s32 comp_queued;
@@ -260,6 +267,8 @@
 	u8 fixed_rate;
 	u8 fixed_nss;
 	u8 force_sgi;
+	bool use_cts_prot;
+	int num_legacy_stations;
 };
 
 struct ath10k_vif_iter {
@@ -326,6 +335,7 @@
 	/* Indicates that ath10k device is during CAC phase of DFS */
 	ATH10K_CAC_RUNNING,
 	ATH10K_FLAG_FIRST_BOOT_DONE,
+	ATH10K_FLAG_CORE_REGISTERED,
 };
 
 struct ath10k {
@@ -419,13 +429,24 @@
 	struct cfg80211_chan_def chandef;
 
 	int free_vdev_map;
+	bool promisc;
+	bool monitor;
 	int monitor_vdev_id;
-	bool monitor_enabled;
-	bool monitor_present;
+	bool monitor_started;
 	unsigned int filter_flags;
 	unsigned long dev_flags;
 	u32 dfs_block_radar_events;
 
+	/* protected by conf_mutex */
+	bool radar_enabled;
+	int num_started_vdevs;
+
+	/* Protected by conf-mutex */
+	u8 supp_tx_chainmask;
+	u8 supp_rx_chainmask;
+	u8 cfg_tx_chainmask;
+	u8 cfg_rx_chainmask;
+
 	struct wmi_pdev_set_wmm_params_arg wmm_params;
 	struct completion install_key_done;
 
@@ -456,6 +477,7 @@
 
 	enum ath10k_state state;
 
+	struct work_struct register_work;
 	struct work_struct restart_work;
 
 	/* cycle count is reported twice for each visited channel during scan.

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 6debd28..1b7ff4b 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c

@@ -161,7 +161,7 @@
 	u8 *tmp = ev->data;
 	struct ath10k_target_stats *stats;
 	int num_pdev_stats, num_vdev_stats, num_peer_stats;
-	struct wmi_pdev_stats *ps;
+	struct wmi_pdev_stats_10x *ps;
 	int i;
 
 	spin_lock_bh(&ar->data_lock);
@@ -173,7 +173,7 @@
 	num_peer_stats = __le32_to_cpu(ev->num_peer_stats); /* 0 or max peers */
 
 	if (num_pdev_stats) {
-		ps = (struct wmi_pdev_stats *)tmp;
+		ps = (struct wmi_pdev_stats_10x *)tmp;
 
 		stats->ch_noise_floor = __le32_to_cpu(ps->chan_nf);
 		stats->tx_frame_count = __le32_to_cpu(ps->tx_frame_count);
@@ -228,7 +228,18 @@
 		stats->phy_err_drop = __le32_to_cpu(ps->wal.rx.phy_err_drop);
 		stats->mpdu_errs = __le32_to_cpu(ps->wal.rx.mpdu_errs);
 
-		tmp += sizeof(struct wmi_pdev_stats);
+		if (test_bit(ATH10K_FW_FEATURE_WMI_10X,
+			     ar->fw_features)) {
+			stats->ack_rx_bad = __le32_to_cpu(ps->ack_rx_bad);
+			stats->rts_bad = __le32_to_cpu(ps->rts_bad);
+			stats->rts_good = __le32_to_cpu(ps->rts_good);
+			stats->fcs_bad = __le32_to_cpu(ps->fcs_bad);
+			stats->no_beacons = __le32_to_cpu(ps->no_beacons);
+			stats->mib_int_count = __le32_to_cpu(ps->mib_int_count);
+			tmp += sizeof(struct wmi_pdev_stats_10x);
+		} else {
+			tmp += sizeof(struct wmi_pdev_stats_old);
+		}
 	}
 
 	/* 0 or max vdevs */
@@ -243,22 +254,29 @@
 	}
 
 	if (num_peer_stats) {
-		struct wmi_peer_stats *peer_stats;
+		struct wmi_peer_stats_10x *peer_stats;
 		struct ath10k_peer_stat *s;
 
 		stats->peers = num_peer_stats;
 
 		for (i = 0; i < num_peer_stats; i++) {
-			peer_stats = (struct wmi_peer_stats *)tmp;
+			peer_stats = (struct wmi_peer_stats_10x *)tmp;
 			s = &stats->peer_stat[i];
 
-			WMI_MAC_ADDR_TO_CHAR_ARRAY(&peer_stats->peer_macaddr,
-						   s->peer_macaddr);
+			memcpy(s->peer_macaddr, &peer_stats->peer_macaddr.addr,
+			       ETH_ALEN);
 			s->peer_rssi = __le32_to_cpu(peer_stats->peer_rssi);
 			s->peer_tx_rate =
 				__le32_to_cpu(peer_stats->peer_tx_rate);
+			if (test_bit(ATH10K_FW_FEATURE_WMI_10X,
+				     ar->fw_features)) {
+				s->peer_rx_rate =
+					__le32_to_cpu(peer_stats->peer_rx_rate);
+				tmp += sizeof(struct wmi_peer_stats_10x);
 
-			tmp += sizeof(struct wmi_peer_stats);
+			} else {
+				tmp += sizeof(struct wmi_peer_stats_old);
+			}
 		}
 	}
 
@@ -272,7 +290,7 @@
 	struct ath10k *ar = file->private_data;
 	struct ath10k_target_stats *fw_stats;
 	char *buf = NULL;
-	unsigned int len = 0, buf_len = 2500;
+	unsigned int len = 0, buf_len = 8000;
 	ssize_t ret_cnt = 0;
 	long left;
 	int i;
@@ -320,6 +338,16 @@
 			 "Cycle count", fw_stats->cycle_count);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
 			 "PHY error count", fw_stats->phy_err_count);
+	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
+			 "RTS bad count", fw_stats->rts_bad);
+	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
+			 "RTS good count", fw_stats->rts_good);
+	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
+			 "FCS bad count", fw_stats->fcs_bad);
+	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
+			 "No beacon count", fw_stats->no_beacons);
+	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
+			 "MIB int count", fw_stats->mib_int_count);
 
 	len += scnprintf(buf + len, buf_len - len, "\n");
 	len += scnprintf(buf + len, buf_len - len, "%30s\n",
@@ -411,8 +439,8 @@
 			 "MPDU errors (FCS, MIC, ENC)", fw_stats->mpdu_errs);
 
 	len += scnprintf(buf + len, buf_len - len, "\n");
-	len += scnprintf(buf + len, buf_len - len, "%30s\n",
-			 "ath10k PEER stats");
+	len += scnprintf(buf + len, buf_len - len, "%30s (%d)\n",
+			 "ath10k PEER stats", fw_stats->peers);
 	len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
 				 "=================");
 
@@ -425,6 +453,9 @@
 		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
 				 "Peer TX rate",
 				 fw_stats->peer_stat[i].peer_tx_rate);
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "Peer RX rate",
+				 fw_stats->peer_stat[i].peer_rx_rate);
 		len += scnprintf(buf + len, buf_len - len, "\n");
 	}
 	spin_unlock_bh(&ar->data_lock);
@@ -451,27 +482,37 @@
 					     char __user *user_buf,
 					     size_t count, loff_t *ppos)
 {
-	const char buf[] = "To simulate firmware crash write the keyword"
-			   " `crash` to this file.\nThis will force firmware"
-			   " to report a crash to the host system.\n";
+	const char buf[] = "To simulate firmware crash write one of the"
+			   " keywords to this file:\n `soft` - this will send"
+			   " WMI_FORCE_FW_HANG_ASSERT to firmware if FW"
+			   " supports that command.\n `hard` - this will send"
+			   " to firmware command with illegal parameters"
+			   " causing firmware crash.\n";
+
 	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
 }
 
+/* Simulate firmware crash:
+ * 'soft': Call wmi command causing firmware hang. This firmware hang is
+ * recoverable by warm firmware reset.
+ * 'hard': Force firmware crash by setting any vdev parameter for not allowed
+ * vdev id. This is hard firmware crash because it is recoverable only by cold
+ * firmware reset.
+ */
 static ssize_t ath10k_write_simulate_fw_crash(struct file *file,
 					      const char __user *user_buf,
 					      size_t count, loff_t *ppos)
 {
 	struct ath10k *ar = file->private_data;
-	char buf[32] = {};
+	char buf[32];
 	int ret;
 
 	mutex_lock(&ar->conf_mutex);
 
 	simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
-	if (strcmp(buf, "crash") && strcmp(buf, "crash\n")) {
-		ret = -EINVAL;
-		goto exit;
-	}
+
+	/* make sure that buf is null terminated */
+	buf[sizeof(buf) - 1] = 0;
 
 	if (ar->state != ATH10K_STATE_ON &&
 	    ar->state != ATH10K_STATE_RESTARTED) {
@@ -479,14 +520,30 @@
 		goto exit;
 	}
 
-	ath10k_info("simulating firmware crash\n");
+	/* drop the possible '\n' from the end */
+	if (buf[count - 1] == '\n') {
+		buf[count - 1] = 0;
+		count--;
+	}
 
-	ret = ath10k_wmi_force_fw_hang(ar, WMI_FORCE_FW_HANG_ASSERT, 0);
-	if (ret)
-		ath10k_warn("failed to force fw hang (%d)\n", ret);
+	if (!strcmp(buf, "soft")) {
+		ath10k_info("simulating soft firmware crash\n");
+		ret = ath10k_wmi_force_fw_hang(ar, WMI_FORCE_FW_HANG_ASSERT, 0);
+	} else if (!strcmp(buf, "hard")) {
+		ath10k_info("simulating hard firmware crash\n");
+		ret = ath10k_wmi_vdev_set_param(ar, TARGET_NUM_VDEVS + 1,
+					ar->wmi.vdev_param->rts_threshold, 0);
+	} else {
+		ret = -EINVAL;
+		goto exit;
+	}
 
-	if (ret == 0)
-		ret = count;
+	if (ret) {
+		ath10k_warn("failed to simulate firmware crash: %d\n", ret);
+		goto exit;
+	}
+
+	ret = count;
 
 exit:
 	mutex_unlock(&ar->conf_mutex);

diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 7f1bccd..e493db4 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c

@@ -157,6 +157,9 @@
 			goto err_pull;
 		}
 		ep->tx_credits -= credits;
+		ath10k_dbg(ATH10K_DBG_HTC,
+			   "htc ep %d consumed %d credits (total %d)\n",
+			   eid, credits, ep->tx_credits);
 		spin_unlock_bh(&htc->tx_lock);
 	}
 
@@ -185,6 +188,9 @@
 	if (ep->tx_credit_flow_enabled) {
 		spin_lock_bh(&htc->tx_lock);
 		ep->tx_credits += credits;
+		ath10k_dbg(ATH10K_DBG_HTC,
+			   "htc ep %d reverted %d credits back (total %d)\n",
+			   eid, credits, ep->tx_credits);
 		spin_unlock_bh(&htc->tx_lock);
 
 		if (ep->ep_ops.ep_tx_credits)
@@ -234,12 +240,12 @@
 		if (report->eid >= ATH10K_HTC_EP_COUNT)
 			break;
 
-		ath10k_dbg(ATH10K_DBG_HTC, "ep %d got %d credits\n",
-			   report->eid, report->credits);
-
 		ep = &htc->endpoint[report->eid];
 		ep->tx_credits += report->credits;
 
+		ath10k_dbg(ATH10K_DBG_HTC, "htc ep %d got %d credits (total %d)\n",
+			   report->eid, report->credits, ep->tx_credits);
+
 		if (ep->ep_ops.ep_tx_credits) {
 			spin_unlock_bh(&htc->tx_lock);
 			ep->ep_ops.ep_tx_credits(htc->ar);
@@ -824,17 +830,11 @@
 	return 0;
 }
 
-/*
- * stop HTC communications, i.e. stop interrupt reception, and flush all
- * queued buffers
- */
 void ath10k_htc_stop(struct ath10k_htc *htc)
 {
 	spin_lock_bh(&htc->tx_lock);
 	htc->stopped = true;
 	spin_unlock_bh(&htc->tx_lock);
-
-	ath10k_hif_stop(htc->ar);
 }
 
 /* registered target arrival callback from the HIF layer */

diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c
index 69697af5..19c12cc 100644
--- a/drivers/net/wireless/ath/ath10k/htt.c
+++ b/drivers/net/wireless/ath/ath10k/htt.c

@@ -22,7 +22,7 @@
 #include "core.h"
 #include "debug.h"
 
-static int ath10k_htt_htc_attach(struct ath10k_htt *htt)
+int ath10k_htt_connect(struct ath10k_htt *htt)
 {
 	struct ath10k_htc_svc_conn_req conn_req;
 	struct ath10k_htc_svc_conn_resp conn_resp;
@@ -48,39 +48,14 @@
 	return 0;
 }
 
-int ath10k_htt_attach(struct ath10k *ar)
+int ath10k_htt_init(struct ath10k *ar)
 {
 	struct ath10k_htt *htt = &ar->htt;
-	int ret;
 
 	htt->ar = ar;
 	htt->max_throughput_mbps = 800;
 
 	/*
-	 * Connect to HTC service.
-	 * This has to be done before calling ath10k_htt_rx_attach,
-	 * since ath10k_htt_rx_attach involves sending a rx ring configure
-	 * message to the target.
-	 */
-	ret = ath10k_htt_htc_attach(htt);
-	if (ret) {
-		ath10k_err("could not attach htt htc (%d)\n", ret);
-		goto err_htc_attach;
-	}
-
-	ret = ath10k_htt_tx_attach(htt);
-	if (ret) {
-		ath10k_err("could not attach htt tx (%d)\n", ret);
-		goto err_htc_attach;
-	}
-
-	ret = ath10k_htt_rx_attach(htt);
-	if (ret) {
-		ath10k_err("could not attach htt rx (%d)\n", ret);
-		goto err_rx_attach;
-	}
-
-	/*
 	 * Prefetch enough data to satisfy target
 	 * classification engine.
 	 * This is for LL chips. HL chips will probably
@@ -93,11 +68,6 @@
 		2; /* ip4 dscp or ip6 priority */
 
 	return 0;
-
-err_rx_attach:
-	ath10k_htt_tx_detach(htt);
-err_htc_attach:
-	return ret;
 }
 
 #define HTT_TARGET_VERSION_TIMEOUT_HZ (3*HZ)
@@ -117,7 +87,7 @@
 	return 0;
 }
 
-int ath10k_htt_attach_target(struct ath10k_htt *htt)
+int ath10k_htt_setup(struct ath10k_htt *htt)
 {
 	int status;
 
@@ -140,9 +110,3 @@
 
 	return ath10k_htt_send_rx_ring_cfg_ll(htt);
 }
-
-void ath10k_htt_detach(struct ath10k_htt *htt)
-{
-	ath10k_htt_rx_detach(htt);
-	ath10k_htt_tx_detach(htt);
-}

diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 654867f..9a26346 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h

@@ -21,6 +21,7 @@
 #include <linux/bug.h>
 #include <linux/interrupt.h>
 #include <linux/dmapool.h>
+#include <net/mac80211.h>
 
 #include "htc.h"
 #include "rx_desc.h"
@@ -1172,23 +1173,6 @@
 	u16 peer_id;
 };
 
-struct htt_rx_info {
-	struct sk_buff *skb;
-	enum htt_rx_mpdu_status status;
-	enum htt_rx_mpdu_encrypt_type encrypt_type;
-	s8 signal;
-	struct {
-		u8 info0;
-		u32 info1;
-		u32 info2;
-	} rate;
-
-	u32 tsf;
-	bool fcs_err;
-	bool amsdu_more;
-	bool mic_err;
-};
-
 struct ath10k_htt_txbuf {
 	struct htt_data_tx_desc_frag frags[2];
 	struct ath10k_htc_hdr htc_hdr;
@@ -1289,6 +1273,9 @@
 	struct tasklet_struct txrx_compl_task;
 	struct sk_buff_head tx_compl_q;
 	struct sk_buff_head rx_compl_q;
+
+	/* rx_status template */
+	struct ieee80211_rx_status rx_status;
 };
 
 #define RX_HTT_HDR_STATUS_LEN 64
@@ -1341,14 +1328,16 @@
 #define HTT_LOG2_MAX_CACHE_LINE_SIZE 7	/* 2^7 = 128 */
 #define HTT_MAX_CACHE_LINE_SIZE_MASK ((1 << HTT_LOG2_MAX_CACHE_LINE_SIZE) - 1)
 
-int ath10k_htt_attach(struct ath10k *ar);
-int ath10k_htt_attach_target(struct ath10k_htt *htt);
-void ath10k_htt_detach(struct ath10k_htt *htt);
+int ath10k_htt_connect(struct ath10k_htt *htt);
+int ath10k_htt_init(struct ath10k *ar);
+int ath10k_htt_setup(struct ath10k_htt *htt);
 
-int ath10k_htt_tx_attach(struct ath10k_htt *htt);
-void ath10k_htt_tx_detach(struct ath10k_htt *htt);
-int ath10k_htt_rx_attach(struct ath10k_htt *htt);
-void ath10k_htt_rx_detach(struct ath10k_htt *htt);
+int ath10k_htt_tx_alloc(struct ath10k_htt *htt);
+void ath10k_htt_tx_free(struct ath10k_htt *htt);
+
+int ath10k_htt_rx_alloc(struct ath10k_htt *htt);
+void ath10k_htt_rx_free(struct ath10k_htt *htt);
+
 void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb);
 void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb);
 int ath10k_htt_h2t_ver_req_msg(struct ath10k_htt *htt);

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cdcbe2d..6c102b1 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c

@@ -225,10 +225,26 @@
 	ath10k_htt_rx_msdu_buff_replenish(htt);
 }
 
-void ath10k_htt_rx_detach(struct ath10k_htt *htt)
+static void ath10k_htt_rx_ring_clean_up(struct ath10k_htt *htt)
 {
-	int sw_rd_idx = htt->rx_ring.sw_rd_idx.msdu_payld;
+	struct sk_buff *skb;
+	int i;
 
+	for (i = 0; i < htt->rx_ring.size; i++) {
+		skb = htt->rx_ring.netbufs_ring[i];
+		if (!skb)
+			continue;
+
+		dma_unmap_single(htt->ar->dev, ATH10K_SKB_CB(skb)->paddr,
+				 skb->len + skb_tailroom(skb),
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		htt->rx_ring.netbufs_ring[i] = NULL;
+	}
+}
+
+void ath10k_htt_rx_free(struct ath10k_htt *htt)
+{
 	del_timer_sync(&htt->rx_ring.refill_retry_timer);
 	tasklet_kill(&htt->rx_replenish_task);
 	tasklet_kill(&htt->txrx_compl_task);
@@ -236,18 +252,7 @@
 	skb_queue_purge(&htt->tx_compl_q);
 	skb_queue_purge(&htt->rx_compl_q);
 
-	while (sw_rd_idx != __le32_to_cpu(*(htt->rx_ring.alloc_idx.vaddr))) {
-		struct sk_buff *skb =
-				htt->rx_ring.netbufs_ring[sw_rd_idx];
-		struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);
-
-		dma_unmap_single(htt->ar->dev, cb->paddr,
-				 skb->len + skb_tailroom(skb),
-				 DMA_FROM_DEVICE);
-		dev_kfree_skb_any(htt->rx_ring.netbufs_ring[sw_rd_idx]);
-		sw_rd_idx++;
-		sw_rd_idx &= htt->rx_ring.size_mask;
-	}
+	ath10k_htt_rx_ring_clean_up(htt);
 
 	dma_free_coherent(htt->ar->dev,
 			  (htt->rx_ring.size *
@@ -277,6 +282,7 @@
 
 	idx = htt->rx_ring.sw_rd_idx.msdu_payld;
 	msdu = htt->rx_ring.netbufs_ring[idx];
+	htt->rx_ring.netbufs_ring[idx] = NULL;
 
 	idx++;
 	idx &= htt->rx_ring.size_mask;
@@ -297,6 +303,7 @@
 	}
 }
 
+/* return: < 0 fatal error, 0 - non chained msdu, 1 chained msdu */
 static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt,
 				   u8 **fw_desc, int *fw_desc_len,
 				   struct sk_buff **head_msdu,
@@ -305,12 +312,13 @@
 	int msdu_len, msdu_chaining = 0;
 	struct sk_buff *msdu;
 	struct htt_rx_desc *rx_desc;
+	bool corrupted = false;
 
 	lockdep_assert_held(&htt->rx_ring.lock);
 
 	if (htt->rx_confused) {
 		ath10k_warn("htt is confused. refusing rx\n");
-		return 0;
+		return -1;
 	}
 
 	msdu = *head_msdu = ath10k_htt_rx_netbuf_pop(htt);
@@ -398,7 +406,6 @@
 		msdu_len = MS(__le32_to_cpu(rx_desc->msdu_start.info0),
 			      RX_MSDU_START_INFO0_MSDU_LENGTH);
 		msdu_chained = rx_desc->frag_info.ring2_more_count;
-		msdu_chaining = msdu_chained;
 
 		if (msdu_len_invalid)
 			msdu_len = 0;
@@ -426,11 +433,15 @@
 
 			msdu->next = next;
 			msdu = next;
+			msdu_chaining = 1;
 		}
 
 		last_msdu = __le32_to_cpu(rx_desc->msdu_end.info0) &
 				RX_MSDU_END_INFO0_LAST_MSDU;
 
+		if (msdu_chaining && !last_msdu)
+			corrupted = true;
+
 		if (last_msdu) {
 			msdu->next = NULL;
 			break;
@@ -442,6 +453,23 @@
 	}
 	*tail_msdu = msdu;
 
+	if (*head_msdu == NULL)
+		msdu_chaining = -1;
+
+	/*
+	 * Apparently FW sometimes reports weird chained MSDU sequences with
+	 * more than one rx descriptor. This seems like a bug but needs more
+	 * analyzing. For the time being fix it by dropping such sequences to
+	 * avoid blowing up the host system.
+	 */
+	if (corrupted) {
+		ath10k_warn("failed to pop chained msdus, dropping\n");
+		ath10k_htt_rx_free_msdu_chain(*head_msdu);
+		*head_msdu = NULL;
+		*tail_msdu = NULL;
+		msdu_chaining = -EINVAL;
+	}
+
 	/*
 	 * Don't refill the ring yet.
 	 *
@@ -464,7 +492,7 @@
 	ath10k_htt_rx_msdu_buff_replenish(htt);
 }
 
-int ath10k_htt_rx_attach(struct ath10k_htt *htt)
+int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
 {
 	dma_addr_t paddr;
 	void *vaddr;
@@ -490,7 +518,7 @@
 	htt->rx_ring.fill_level = ath10k_htt_rx_ring_fill_level(htt);
 
 	htt->rx_ring.netbufs_ring =
-		kmalloc(htt->rx_ring.size * sizeof(struct sk_buff *),
+		kzalloc(htt->rx_ring.size * sizeof(struct sk_buff *),
 			GFP_KERNEL);
 	if (!htt->rx_ring.netbufs_ring)
 		goto err_netbuf;
@@ -636,6 +664,203 @@
 	__be16 len;
 } __packed;
 
+static const u8 rx_legacy_rate_idx[] = {
+	3,	/* 0x00  - 11Mbps  */
+	2,	/* 0x01  - 5.5Mbps */
+	1,	/* 0x02  - 2Mbps   */
+	0,	/* 0x03  - 1Mbps   */
+	3,	/* 0x04  - 11Mbps  */
+	2,	/* 0x05  - 5.5Mbps */
+	1,	/* 0x06  - 2Mbps   */
+	0,	/* 0x07  - 1Mbps   */
+	10,	/* 0x08  - 48Mbps  */
+	8,	/* 0x09  - 24Mbps  */
+	6,	/* 0x0A  - 12Mbps  */
+	4,	/* 0x0B  - 6Mbps   */
+	11,	/* 0x0C  - 54Mbps  */
+	9,	/* 0x0D  - 36Mbps  */
+	7,	/* 0x0E  - 18Mbps  */
+	5,	/* 0x0F  - 9Mbps   */
+};
+
+static void ath10k_htt_rx_h_rates(struct ath10k *ar,
+				  enum ieee80211_band band,
+				  u8 info0, u32 info1, u32 info2,
+				  struct ieee80211_rx_status *status)
+{
+	u8 cck, rate, rate_idx, bw, sgi, mcs, nss;
+	u8 preamble = 0;
+
+	/* Check if valid fields */
+	if (!(info0 & HTT_RX_INDICATION_INFO0_START_VALID))
+		return;
+
+	preamble = MS(info1, HTT_RX_INDICATION_INFO1_PREAMBLE_TYPE);
+
+	switch (preamble) {
+	case HTT_RX_LEGACY:
+		cck = info0 & HTT_RX_INDICATION_INFO0_LEGACY_RATE_CCK;
+		rate = MS(info0, HTT_RX_INDICATION_INFO0_LEGACY_RATE);
+		rate_idx = 0;
+
+		if (rate < 0x08 || rate > 0x0F)
+			break;
+
+		switch (band) {
+		case IEEE80211_BAND_2GHZ:
+			if (cck)
+				rate &= ~BIT(3);
+			rate_idx = rx_legacy_rate_idx[rate];
+			break;
+		case IEEE80211_BAND_5GHZ:
+			rate_idx = rx_legacy_rate_idx[rate];
+			/* We are using same rate table registering
+			   HW - ath10k_rates[]. In case of 5GHz skip
+			   CCK rates, so -4 here */
+			rate_idx -= 4;
+			break;
+		default:
+			break;
+		}
+
+		status->rate_idx = rate_idx;
+		break;
+	case HTT_RX_HT:
+	case HTT_RX_HT_WITH_TXBF:
+		/* HT-SIG - Table 20-11 in info1 and info2 */
+		mcs = info1 & 0x1F;
+		nss = mcs >> 3;
+		bw = (info1 >> 7) & 1;
+		sgi = (info2 >> 7) & 1;
+
+		status->rate_idx = mcs;
+		status->flag |= RX_FLAG_HT;
+		if (sgi)
+			status->flag |= RX_FLAG_SHORT_GI;
+		if (bw)
+			status->flag |= RX_FLAG_40MHZ;
+		break;
+	case HTT_RX_VHT:
+	case HTT_RX_VHT_WITH_TXBF:
+		/* VHT-SIG-A1 in info 1, VHT-SIG-A2 in info2
+		   TODO check this */
+		mcs = (info2 >> 4) & 0x0F;
+		nss = ((info1 >> 10) & 0x07) + 1;
+		bw = info1 & 3;
+		sgi = info2 & 1;
+
+		status->rate_idx = mcs;
+		status->vht_nss = nss;
+
+		if (sgi)
+			status->flag |= RX_FLAG_SHORT_GI;
+
+		switch (bw) {
+		/* 20MHZ */
+		case 0:
+			break;
+		/* 40MHZ */
+		case 1:
+			status->flag |= RX_FLAG_40MHZ;
+			break;
+		/* 80MHZ */
+		case 2:
+			status->vht_flag |= RX_VHT_FLAG_80MHZ;
+		}
+
+		status->flag |= RX_FLAG_VHT;
+		break;
+	default:
+		break;
+	}
+}
+
+static void ath10k_htt_rx_h_protected(struct ath10k_htt *htt,
+				      struct ieee80211_rx_status *rx_status,
+				      struct sk_buff *skb,
+				      enum htt_rx_mpdu_encrypt_type enctype,
+				      enum rx_msdu_decap_format fmt,
+				      bool dot11frag)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	rx_status->flag &= ~(RX_FLAG_DECRYPTED |
+			     RX_FLAG_IV_STRIPPED |
+			     RX_FLAG_MMIC_STRIPPED);
+
+	if (enctype == HTT_RX_MPDU_ENCRYPT_NONE)
+		return;
+
+	/*
+	 * There's no explicit rx descriptor flag to indicate whether a given
+	 * frame has been decrypted or not. We're forced to use the decap
+	 * format as an implicit indication. However fragmentation rx is always
+	 * raw and it probably never reports undecrypted raws.
+	 *
+	 * This makes sure sniffed frames are reported as-is without stripping
+	 * the protected flag.
+	 */
+	if (fmt == RX_MSDU_DECAP_RAW && !dot11frag)
+		return;
+
+	rx_status->flag |= RX_FLAG_DECRYPTED |
+			   RX_FLAG_IV_STRIPPED |
+			   RX_FLAG_MMIC_STRIPPED;
+	hdr->frame_control = __cpu_to_le16(__le16_to_cpu(hdr->frame_control) &
+					   ~IEEE80211_FCTL_PROTECTED);
+}
+
+static bool ath10k_htt_rx_h_channel(struct ath10k *ar,
+				    struct ieee80211_rx_status *status)
+{
+	struct ieee80211_channel *ch;
+
+	spin_lock_bh(&ar->data_lock);
+	ch = ar->scan_channel;
+	if (!ch)
+		ch = ar->rx_channel;
+	spin_unlock_bh(&ar->data_lock);
+
+	if (!ch)
+		return false;
+
+	status->band = ch->band;
+	status->freq = ch->center_freq;
+
+	return true;
+}
+
+static void ath10k_process_rx(struct ath10k *ar,
+			      struct ieee80211_rx_status *rx_status,
+			      struct sk_buff *skb)
+{
+	struct ieee80211_rx_status *status;
+
+	status = IEEE80211_SKB_RXCB(skb);
+	*status = *rx_status;
+
+	ath10k_dbg(ATH10K_DBG_DATA,
+		   "rx skb %p len %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%x fcs-err %imic-err %i\n",
+		   skb,
+		   skb->len,
+		   status->flag == 0 ? "legacy" : "",
+		   status->flag & RX_FLAG_HT ? "ht" : "",
+		   status->flag & RX_FLAG_VHT ? "vht" : "",
+		   status->flag & RX_FLAG_40MHZ ? "40" : "",
+		   status->vht_flag & RX_VHT_FLAG_80MHZ ? "80" : "",
+		   status->flag & RX_FLAG_SHORT_GI ? "sgi " : "",
+		   status->rate_idx,
+		   status->vht_nss,
+		   status->freq,
+		   status->band, status->flag,
+		   !!(status->flag & RX_FLAG_FAILED_FCS_CRC),
+		   !!(status->flag & RX_FLAG_MMIC_ERROR));
+	ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "rx skb: ",
+			skb->data, skb->len);
+
+	ieee80211_rx(ar->hw, skb);
+}
+
 static int ath10k_htt_rx_nwifi_hdrlen(struct ieee80211_hdr *hdr)
 {
 	/* nwifi header is padded to 4 bytes. this fixes 4addr rx */
@@ -643,11 +868,12 @@
 }
 
 static void ath10k_htt_rx_amsdu(struct ath10k_htt *htt,
-				struct htt_rx_info *info)
+				struct ieee80211_rx_status *rx_status,
+				struct sk_buff *skb_in)
 {
 	struct htt_rx_desc *rxd;
+	struct sk_buff *skb = skb_in;
 	struct sk_buff *first;
-	struct sk_buff *skb = info->skb;
 	enum rx_msdu_decap_format fmt;
 	enum htt_rx_mpdu_encrypt_type enctype;
 	struct ieee80211_hdr *hdr;
@@ -728,24 +954,28 @@
 			break;
 		}
 
-		info->skb = skb;
-		info->encrypt_type = enctype;
+		skb_in = skb;
+		ath10k_htt_rx_h_protected(htt, rx_status, skb_in, enctype, fmt,
+					  false);
 		skb = skb->next;
-		info->skb->next = NULL;
+		skb_in->next = NULL;
 
 		if (skb)
-			info->amsdu_more = true;
+			rx_status->flag |= RX_FLAG_AMSDU_MORE;
+		else
+			rx_status->flag &= ~RX_FLAG_AMSDU_MORE;
 
-		ath10k_process_rx(htt->ar, info);
+		ath10k_process_rx(htt->ar, rx_status, skb_in);
 	}
 
 	/* FIXME: It might be nice to re-assemble the A-MSDU when there's a
 	 * monitor interface active for sniffing purposes. */
 }
 
-static void ath10k_htt_rx_msdu(struct ath10k_htt *htt, struct htt_rx_info *info)
+static void ath10k_htt_rx_msdu(struct ath10k_htt *htt,
+			       struct ieee80211_rx_status *rx_status,
+			       struct sk_buff *skb)
 {
-	struct sk_buff *skb = info->skb;
 	struct htt_rx_desc *rxd;
 	struct ieee80211_hdr *hdr;
 	enum rx_msdu_decap_format fmt;
@@ -808,66 +1038,9 @@
 		break;
 	}
 
-	info->skb = skb;
-	info->encrypt_type = enctype;
+	ath10k_htt_rx_h_protected(htt, rx_status, skb, enctype, fmt, false);
 
-	ath10k_process_rx(htt->ar, info);
-}
-
-static bool ath10k_htt_rx_has_decrypt_err(struct sk_buff *skb)
-{
-	struct htt_rx_desc *rxd;
-	u32 flags;
-
-	rxd = (void *)skb->data - sizeof(*rxd);
-	flags = __le32_to_cpu(rxd->attention.flags);
-
-	if (flags & RX_ATTENTION_FLAGS_DECRYPT_ERR)
-		return true;
-
-	return false;
-}
-
-static bool ath10k_htt_rx_has_fcs_err(struct sk_buff *skb)
-{
-	struct htt_rx_desc *rxd;
-	u32 flags;
-
-	rxd = (void *)skb->data - sizeof(*rxd);
-	flags = __le32_to_cpu(rxd->attention.flags);
-
-	if (flags & RX_ATTENTION_FLAGS_FCS_ERR)
-		return true;
-
-	return false;
-}
-
-static bool ath10k_htt_rx_has_mic_err(struct sk_buff *skb)
-{
-	struct htt_rx_desc *rxd;
-	u32 flags;
-
-	rxd = (void *)skb->data - sizeof(*rxd);
-	flags = __le32_to_cpu(rxd->attention.flags);
-
-	if (flags & RX_ATTENTION_FLAGS_TKIP_MIC_ERR)
-		return true;
-
-	return false;
-}
-
-static bool ath10k_htt_rx_is_mgmt(struct sk_buff *skb)
-{
-	struct htt_rx_desc *rxd;
-	u32 flags;
-
-	rxd = (void *)skb->data - sizeof(*rxd);
-	flags = __le32_to_cpu(rxd->attention.flags);
-
-	if (flags & RX_ATTENTION_FLAGS_MGMT_TYPE)
-		return true;
-
-	return false;
+	ath10k_process_rx(htt->ar, rx_status, skb);
 }
 
 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb)
@@ -952,21 +1125,73 @@
 	return 0;
 }
 
+static bool ath10k_htt_rx_amsdu_allowed(struct ath10k_htt *htt,
+					struct sk_buff *head,
+					enum htt_rx_mpdu_status status,
+					bool channel_set,
+					u32 attention)
+{
+	if (head->len == 0) {
+		ath10k_dbg(ATH10K_DBG_HTT,
+			   "htt rx dropping due to zero-len\n");
+		return false;
+	}
+
+	if (attention & RX_ATTENTION_FLAGS_DECRYPT_ERR) {
+		ath10k_dbg(ATH10K_DBG_HTT,
+			   "htt rx dropping due to decrypt-err\n");
+		return false;
+	}
+
+	if (!channel_set) {
+		ath10k_warn("no channel configured; ignoring frame!\n");
+		return false;
+	}
+
+	/* Skip mgmt frames while we handle this in WMI */
+	if (status == HTT_RX_IND_MPDU_STATUS_MGMT_CTRL ||
+	    attention & RX_ATTENTION_FLAGS_MGMT_TYPE) {
+		ath10k_dbg(ATH10K_DBG_HTT, "htt rx mgmt ctrl\n");
+		return false;
+	}
+
+	if (status != HTT_RX_IND_MPDU_STATUS_OK &&
+	    status != HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR &&
+	    status != HTT_RX_IND_MPDU_STATUS_ERR_INV_PEER &&
+	    !htt->ar->monitor_started) {
+		ath10k_dbg(ATH10K_DBG_HTT,
+			   "htt rx ignoring frame w/ status %d\n",
+			   status);
+		return false;
+	}
+
+	if (test_bit(ATH10K_CAC_RUNNING, &htt->ar->dev_flags)) {
+		ath10k_dbg(ATH10K_DBG_HTT,
+			   "htt rx CAC running\n");
+		return false;
+	}
+
+	return true;
+}
+
 static void ath10k_htt_rx_handler(struct ath10k_htt *htt,
 				  struct htt_rx_indication *rx)
 {
-	struct htt_rx_info info;
+	struct ieee80211_rx_status *rx_status = &htt->rx_status;
 	struct htt_rx_indication_mpdu_range *mpdu_ranges;
+	struct htt_rx_desc *rxd;
+	enum htt_rx_mpdu_status status;
 	struct ieee80211_hdr *hdr;
 	int num_mpdu_ranges;
+	u32 attention;
 	int fw_desc_len;
 	u8 *fw_desc;
+	bool channel_set;
 	int i, j;
+	int ret;
 
 	lockdep_assert_held(&htt->rx_ring.lock);
 
-	memset(&info, 0, sizeof(info));
-
 	fw_desc_len = __le16_to_cpu(rx->prefix.fw_rx_desc_bytes);
 	fw_desc = (u8 *)&rx->fw_desc;
 
@@ -974,106 +1199,90 @@
 			     HTT_RX_INDICATION_INFO1_NUM_MPDU_RANGES);
 	mpdu_ranges = htt_rx_ind_get_mpdu_ranges(rx);
 
+	/* Fill this once, while this is per-ppdu */
+	if (rx->ppdu.info0 & HTT_RX_INDICATION_INFO0_START_VALID) {
+		memset(rx_status, 0, sizeof(*rx_status));
+		rx_status->signal  = ATH10K_DEFAULT_NOISE_FLOOR +
+				     rx->ppdu.combined_rssi;
+	}
+
+	if (rx->ppdu.info0 & HTT_RX_INDICATION_INFO0_END_VALID) {
+		/* TSF available only in 32-bit */
+		rx_status->mactime = __le32_to_cpu(rx->ppdu.tsf) & 0xffffffff;
+		rx_status->flag |= RX_FLAG_MACTIME_END;
+	}
+
+	channel_set = ath10k_htt_rx_h_channel(htt->ar, rx_status);
+
+	if (channel_set) {
+		ath10k_htt_rx_h_rates(htt->ar, rx_status->band,
+				      rx->ppdu.info0,
+				      __le32_to_cpu(rx->ppdu.info1),
+				      __le32_to_cpu(rx->ppdu.info2),
+				      rx_status);
+	}
+
 	ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "htt rx ind: ",
 			rx, sizeof(*rx) +
 			(sizeof(struct htt_rx_indication_mpdu_range) *
 				num_mpdu_ranges));
 
 	for (i = 0; i < num_mpdu_ranges; i++) {
-		info.status = mpdu_ranges[i].mpdu_range_status;
+		status = mpdu_ranges[i].mpdu_range_status;
 
 		for (j = 0; j < mpdu_ranges[i].mpdu_count; j++) {
 			struct sk_buff *msdu_head, *msdu_tail;
-			enum htt_rx_mpdu_status status;
-			int msdu_chaining;
 
 			msdu_head = NULL;
 			msdu_tail = NULL;
-			msdu_chaining = ath10k_htt_rx_amsdu_pop(htt,
-							 &fw_desc,
-							 &fw_desc_len,
-							 &msdu_head,
-							 &msdu_tail);
+			ret = ath10k_htt_rx_amsdu_pop(htt,
+						      &fw_desc,
+						      &fw_desc_len,
+						      &msdu_head,
+						      &msdu_tail);
 
-			if (!msdu_head) {
-				ath10k_warn("htt rx no data!\n");
-				continue;
-			}
-
-			if (msdu_head->len == 0) {
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx dropping due to zero-len\n");
+			if (ret < 0) {
+				ath10k_warn("failed to pop amsdu from htt rx ring %d\n",
+					    ret);
 				ath10k_htt_rx_free_msdu_chain(msdu_head);
 				continue;
 			}
 
-			if (ath10k_htt_rx_has_decrypt_err(msdu_head)) {
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx dropping due to decrypt-err\n");
+			rxd = container_of((void *)msdu_head->data,
+					   struct htt_rx_desc,
+					   msdu_payload);
+			attention = __le32_to_cpu(rxd->attention.flags);
+
+			if (!ath10k_htt_rx_amsdu_allowed(htt, msdu_head,
+							 status,
+							 channel_set,
+							 attention)) {
 				ath10k_htt_rx_free_msdu_chain(msdu_head);
 				continue;
 			}
 
-			status = info.status;
-
-			/* Skip mgmt frames while we handle this in WMI */
-			if (status == HTT_RX_IND_MPDU_STATUS_MGMT_CTRL ||
-			    ath10k_htt_rx_is_mgmt(msdu_head)) {
-				ath10k_dbg(ATH10K_DBG_HTT, "htt rx mgmt ctrl\n");
+			if (ret > 0 &&
+			    ath10k_unchain_msdu(msdu_head) < 0) {
 				ath10k_htt_rx_free_msdu_chain(msdu_head);
 				continue;
 			}
 
-			if (status != HTT_RX_IND_MPDU_STATUS_OK &&
-			    status != HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR &&
-			    status != HTT_RX_IND_MPDU_STATUS_ERR_INV_PEER &&
-			    !htt->ar->monitor_enabled) {
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx ignoring frame w/ status %d\n",
-					   status);
-				ath10k_htt_rx_free_msdu_chain(msdu_head);
-				continue;
-			}
+			if (attention & RX_ATTENTION_FLAGS_FCS_ERR)
+				rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
+			else
+				rx_status->flag &= ~RX_FLAG_FAILED_FCS_CRC;
 
-			if (test_bit(ATH10K_CAC_RUNNING, &htt->ar->dev_flags)) {
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx CAC running\n");
-				ath10k_htt_rx_free_msdu_chain(msdu_head);
-				continue;
-			}
-
-			if (msdu_chaining &&
-			    (ath10k_unchain_msdu(msdu_head) < 0)) {
-				ath10k_htt_rx_free_msdu_chain(msdu_head);
-				continue;
-			}
-
-			info.skb     = msdu_head;
-			info.fcs_err = ath10k_htt_rx_has_fcs_err(msdu_head);
-			info.mic_err = ath10k_htt_rx_has_mic_err(msdu_head);
-
-			if (info.fcs_err)
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx has FCS err\n");
-
-			if (info.mic_err)
-				ath10k_dbg(ATH10K_DBG_HTT,
-					   "htt rx has MIC err\n");
-
-			info.signal  = ATH10K_DEFAULT_NOISE_FLOOR;
-			info.signal += rx->ppdu.combined_rssi;
-
-			info.rate.info0 = rx->ppdu.info0;
-			info.rate.info1 = __le32_to_cpu(rx->ppdu.info1);
-			info.rate.info2 = __le32_to_cpu(rx->ppdu.info2);
-			info.tsf = __le32_to_cpu(rx->ppdu.tsf);
+			if (attention & RX_ATTENTION_FLAGS_TKIP_MIC_ERR)
+				rx_status->flag |= RX_FLAG_MMIC_ERROR;
+			else
+				rx_status->flag &= ~RX_FLAG_MMIC_ERROR;
 
 			hdr = ath10k_htt_rx_skb_get_hdr(msdu_head);
 
 			if (ath10k_htt_rx_hdr_is_amsdu(hdr))
-				ath10k_htt_rx_amsdu(htt, &info);
+				ath10k_htt_rx_amsdu(htt, rx_status, msdu_head);
 			else
-				ath10k_htt_rx_msdu(htt, &info);
+				ath10k_htt_rx_msdu(htt, rx_status, msdu_head);
 		}
 	}
 
@@ -1084,11 +1293,12 @@
 				struct htt_rx_fragment_indication *frag)
 {
 	struct sk_buff *msdu_head, *msdu_tail;
+	enum htt_rx_mpdu_encrypt_type enctype;
 	struct htt_rx_desc *rxd;
 	enum rx_msdu_decap_format fmt;
-	struct htt_rx_info info = {};
+	struct ieee80211_rx_status *rx_status = &htt->rx_status;
 	struct ieee80211_hdr *hdr;
-	int msdu_chaining;
+	int ret;
 	bool tkip_mic_err;
 	bool decrypt_err;
 	u8 *fw_desc;
@@ -1102,24 +1312,21 @@
 	msdu_tail = NULL;
 
 	spin_lock_bh(&htt->rx_ring.lock);
-	msdu_chaining = ath10k_htt_rx_amsdu_pop(htt, &fw_desc, &fw_desc_len,
-						&msdu_head, &msdu_tail);
+	ret = ath10k_htt_rx_amsdu_pop(htt, &fw_desc, &fw_desc_len,
+				      &msdu_head, &msdu_tail);
 	spin_unlock_bh(&htt->rx_ring.lock);
 
 	ath10k_dbg(ATH10K_DBG_HTT_DUMP, "htt rx frag ahead\n");
 
-	if (!msdu_head) {
-		ath10k_warn("htt rx frag no data\n");
-		return;
-	}
-
-	if (msdu_chaining || msdu_head != msdu_tail) {
-		ath10k_warn("aggregation with fragmentation?!\n");
+	if (ret) {
+		ath10k_warn("failed to pop amsdu from httr rx ring for fragmented rx %d\n",
+			    ret);
 		ath10k_htt_rx_free_msdu_chain(msdu_head);
 		return;
 	}
 
 	/* FIXME: implement signal strength */
+	rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
 
 	hdr = (struct ieee80211_hdr *)msdu_head->data;
 	rxd = (void *)msdu_head->data - sizeof(*rxd);
@@ -1136,57 +1343,55 @@
 		goto end;
 	}
 
-	info.skb = msdu_head;
-	info.status = HTT_RX_IND_MPDU_STATUS_OK;
-	info.encrypt_type = MS(__le32_to_cpu(rxd->mpdu_start.info0),
-				RX_MPDU_START_INFO0_ENCRYPT_TYPE);
-	info.skb->ip_summed = ath10k_htt_rx_get_csum_state(info.skb);
+	enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0),
+		     RX_MPDU_START_INFO0_ENCRYPT_TYPE);
+	ath10k_htt_rx_h_protected(htt, rx_status, msdu_head, enctype, fmt,
+				  true);
+	msdu_head->ip_summed = ath10k_htt_rx_get_csum_state(msdu_head);
 
-	if (tkip_mic_err) {
+	if (tkip_mic_err)
 		ath10k_warn("tkip mic error\n");
-		info.status = HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR;
-	}
 
 	if (decrypt_err) {
 		ath10k_warn("decryption err in fragmented rx\n");
-		dev_kfree_skb_any(info.skb);
+		dev_kfree_skb_any(msdu_head);
 		goto end;
 	}
 
-	if (info.encrypt_type != HTT_RX_MPDU_ENCRYPT_NONE) {
+	if (enctype != HTT_RX_MPDU_ENCRYPT_NONE) {
 		hdrlen = ieee80211_hdrlen(hdr->frame_control);
-		paramlen = ath10k_htt_rx_crypto_param_len(info.encrypt_type);
+		paramlen = ath10k_htt_rx_crypto_param_len(enctype);
 
 		/* It is more efficient to move the header than the payload */
-		memmove((void *)info.skb->data + paramlen,
-			(void *)info.skb->data,
+		memmove((void *)msdu_head->data + paramlen,
+			(void *)msdu_head->data,
 			hdrlen);
-		skb_pull(info.skb, paramlen);
-		hdr = (struct ieee80211_hdr *)info.skb->data;
+		skb_pull(msdu_head, paramlen);
+		hdr = (struct ieee80211_hdr *)msdu_head->data;
 	}
 
 	/* remove trailing FCS */
 	trim  = 4;
 
 	/* remove crypto trailer */
-	trim += ath10k_htt_rx_crypto_tail_len(info.encrypt_type);
+	trim += ath10k_htt_rx_crypto_tail_len(enctype);
 
 	/* last fragment of TKIP frags has MIC */
 	if (!ieee80211_has_morefrags(hdr->frame_control) &&
-	    info.encrypt_type == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+	    enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
 		trim += 8;
 
-	if (trim > info.skb->len) {
+	if (trim > msdu_head->len) {
 		ath10k_warn("htt rx fragment: trailer longer than the frame itself? drop\n");
-		dev_kfree_skb_any(info.skb);
+		dev_kfree_skb_any(msdu_head);
 		goto end;
 	}
 
-	skb_trim(info.skb, info.skb->len - trim);
+	skb_trim(msdu_head, msdu_head->len - trim);
 
 	ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "htt rx frag mpdu: ",
-			info.skb->data, info.skb->len);
-	ath10k_process_rx(htt->ar, &info);
+			msdu_head->data, msdu_head->len);
+	ath10k_process_rx(htt->ar, rx_status, msdu_head);
 
 end:
 	if (fw_desc_len > 0) {

diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 7a3e2e4..7064354 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c

@@ -83,7 +83,7 @@
 	__clear_bit(msdu_id, htt->used_msdu_ids);
 }
 
-int ath10k_htt_tx_attach(struct ath10k_htt *htt)
+int ath10k_htt_tx_alloc(struct ath10k_htt *htt)
 {
 	spin_lock_init(&htt->tx_lock);
 	init_waitqueue_head(&htt->empty_tx_wq);
@@ -120,7 +120,7 @@
 	return 0;
 }
 
-static void ath10k_htt_tx_cleanup_pending(struct ath10k_htt *htt)
+static void ath10k_htt_tx_free_pending(struct ath10k_htt *htt)
 {
 	struct htt_tx_done tx_done = {0};
 	int msdu_id;
@@ -141,9 +141,9 @@
 	spin_unlock_bh(&htt->tx_lock);
 }
 
-void ath10k_htt_tx_detach(struct ath10k_htt *htt)
+void ath10k_htt_tx_free(struct ath10k_htt *htt)
 {
-	ath10k_htt_tx_cleanup_pending(htt);
+	ath10k_htt_tx_free_pending(htt);
 	kfree(htt->pending_tx);
 	kfree(htt->used_msdu_ids);
 	dma_pool_destroy(htt->tx_pool);

diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 35fc44e..007e855 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h

@@ -28,6 +28,7 @@
 #define QCA988X_HW_2_0_CHIP_ID_REV	0x2
 #define QCA988X_HW_2_0_FW_DIR		"ath10k/QCA988X/hw2.0"
 #define QCA988X_HW_2_0_FW_FILE		"firmware.bin"
+#define QCA988X_HW_2_0_FW_2_FILE	"firmware-2.bin"
 #define QCA988X_HW_2_0_OTP_FILE		"otp.bin"
 #define QCA988X_HW_2_0_BOARD_DATA_FILE	"board.bin"
 #define QCA988X_HW_2_0_PATCH_LOAD_ADDR	0x1234

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 511a2f8..a210800 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c

@@ -54,7 +54,10 @@
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_CCMP:
 		arg.key_cipher = WMI_CIPHER_AES_CCM;
-		key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
+		if (arvif->vdev_type == WMI_VDEV_TYPE_AP)
+			key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT;
+		else
+			key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
 		arg.key_cipher = WMI_CIPHER_TKIP;
@@ -165,7 +168,7 @@
 			first_errno = ret;
 
 		if (ret)
-			ath10k_warn("could not remove peer wep key %d (%d)\n",
+			ath10k_warn("failed to remove peer wep key %d: %d\n",
 				    i, ret);
 
 		peer->keys[i] = NULL;
@@ -213,7 +216,8 @@
 			first_errno = ret;
 
 		if (ret)
-			ath10k_warn("could not remove key for %pM\n", addr);
+			ath10k_warn("failed to remove key for %pM: %d\n",
+				    addr, ret);
 	}
 
 	return first_errno;
@@ -323,14 +327,14 @@
 
 	ret = ath10k_wmi_peer_create(ar, vdev_id, addr);
 	if (ret) {
-		ath10k_warn("Failed to create wmi peer %pM on vdev %i: %i\n",
+		ath10k_warn("failed to create wmi peer %pM on vdev %i: %i\n",
 			    addr, vdev_id, ret);
 		return ret;
 	}
 
 	ret = ath10k_wait_for_peer_created(ar, vdev_id, addr);
 	if (ret) {
-		ath10k_warn("Failed to wait for created wmi peer %pM on vdev %i: %i\n",
+		ath10k_warn("failed to wait for created wmi peer %pM on vdev %i: %i\n",
 			    addr, vdev_id, ret);
 		return ret;
 	}
@@ -351,7 +355,7 @@
 	ret = ath10k_wmi_pdev_set_param(ar, param,
 					ATH10K_KICKOUT_THRESHOLD);
 	if (ret) {
-		ath10k_warn("Failed to set kickout threshold on vdev %i: %d\n",
+		ath10k_warn("failed to set kickout threshold on vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
@@ -360,7 +364,7 @@
 	ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
 					ATH10K_KEEPALIVE_MIN_IDLE);
 	if (ret) {
-		ath10k_warn("Failed to set keepalive minimum idle time on vdev %i : %d\n",
+		ath10k_warn("failed to set keepalive minimum idle time on vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
@@ -369,7 +373,7 @@
 	ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
 					ATH10K_KEEPALIVE_MAX_IDLE);
 	if (ret) {
-		ath10k_warn("Failed to set keepalive maximum idle time on vdev %i: %d\n",
+		ath10k_warn("failed to set keepalive maximum idle time on vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
@@ -378,7 +382,7 @@
 	ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
 					ATH10K_KEEPALIVE_MAX_UNRESPONSIVE);
 	if (ret) {
-		ath10k_warn("Failed to set keepalive maximum unresponsive time on vdev %i: %d\n",
+		ath10k_warn("failed to set keepalive maximum unresponsive time on vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
@@ -488,6 +492,310 @@
 	return 0;
 }
 
+static bool ath10k_monitor_is_enabled(struct ath10k *ar)
+{
+	lockdep_assert_held(&ar->conf_mutex);
+
+	ath10k_dbg(ATH10K_DBG_MAC,
+		   "mac monitor refs: promisc %d monitor %d cac %d\n",
+		   ar->promisc, ar->monitor,
+		   test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags));
+
+	return ar->promisc || ar->monitor ||
+	       test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
+}
+
+static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id)
+{
+	struct cfg80211_chan_def *chandef = &ar->chandef;
+	struct ieee80211_channel *channel = chandef->chan;
+	struct wmi_vdev_start_request_arg arg = {};
+	int ret = 0;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	arg.vdev_id = vdev_id;
+	arg.channel.freq = channel->center_freq;
+	arg.channel.band_center_freq1 = chandef->center_freq1;
+
+	/* TODO setup this dynamically, what in case we
+	   don't have any vifs? */
+	arg.channel.mode = chan_to_phymode(chandef);
+	arg.channel.chan_radar =
+			!!(channel->flags & IEEE80211_CHAN_RADAR);
+
+	arg.channel.min_power = 0;
+	arg.channel.max_power = channel->max_power * 2;
+	arg.channel.max_reg_power = channel->max_reg_power * 2;
+	arg.channel.max_antenna_gain = channel->max_antenna_gain * 2;
+
+	ret = ath10k_wmi_vdev_start(ar, &arg);
+	if (ret) {
+		ath10k_warn("failed to request monitor vdev %i start: %d\n",
+			    vdev_id, ret);
+		return ret;
+	}
+
+	ret = ath10k_vdev_setup_sync(ar);
+	if (ret) {
+		ath10k_warn("failed to synchronize setup for monitor vdev %i: %d\n",
+			    vdev_id, ret);
+		return ret;
+	}
+
+	ret = ath10k_wmi_vdev_up(ar, vdev_id, 0, ar->mac_addr);
+	if (ret) {
+		ath10k_warn("failed to put up monitor vdev %i: %d\n",
+			    vdev_id, ret);
+		goto vdev_stop;
+	}
+
+	ar->monitor_vdev_id = vdev_id;
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %i started\n",
+		   ar->monitor_vdev_id);
+	return 0;
+
+vdev_stop:
+	ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
+	if (ret)
+		ath10k_warn("failed to stop monitor vdev %i after start failure: %d\n",
+			    ar->monitor_vdev_id, ret);
+
+	return ret;
+}
+
+static int ath10k_monitor_vdev_stop(struct ath10k *ar)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	ret = ath10k_wmi_vdev_down(ar, ar->monitor_vdev_id);
+	if (ret)
+		ath10k_warn("failed to put down monitor vdev %i: %d\n",
+			    ar->monitor_vdev_id, ret);
+
+	ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
+	if (ret)
+		ath10k_warn("failed to to request monitor vdev %i stop: %d\n",
+			    ar->monitor_vdev_id, ret);
+
+	ret = ath10k_vdev_setup_sync(ar);
+	if (ret)
+		ath10k_warn("failed to synchronise monitor vdev %i: %d\n",
+			    ar->monitor_vdev_id, ret);
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %i stopped\n",
+		   ar->monitor_vdev_id);
+	return ret;
+}
+
+static int ath10k_monitor_vdev_create(struct ath10k *ar)
+{
+	int bit, ret = 0;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	bit = ffs(ar->free_vdev_map);
+	if (bit == 0) {
+		ath10k_warn("failed to find free vdev id for monitor vdev\n");
+		return -ENOMEM;
+	}
+
+	ar->monitor_vdev_id = bit - 1;
+	ar->free_vdev_map &= ~(1 << ar->monitor_vdev_id);
+
+	ret = ath10k_wmi_vdev_create(ar, ar->monitor_vdev_id,
+				     WMI_VDEV_TYPE_MONITOR,
+				     0, ar->mac_addr);
+	if (ret) {
+		ath10k_warn("failed to request monitor vdev %i creation: %d\n",
+			    ar->monitor_vdev_id, ret);
+		goto vdev_fail;
+	}
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %d created\n",
+		   ar->monitor_vdev_id);
+
+	return 0;
+
+vdev_fail:
+	/*
+	 * Restore the ID to the global map.
+	 */
+	ar->free_vdev_map |= 1 << (ar->monitor_vdev_id);
+	return ret;
+}
+
+static int ath10k_monitor_vdev_delete(struct ath10k *ar)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	ret = ath10k_wmi_vdev_delete(ar, ar->monitor_vdev_id);
+	if (ret) {
+		ath10k_warn("failed to request wmi monitor vdev %i removal: %d\n",
+			    ar->monitor_vdev_id, ret);
+		return ret;
+	}
+
+	ar->free_vdev_map |= 1 << (ar->monitor_vdev_id);
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %d deleted\n",
+		   ar->monitor_vdev_id);
+	return ret;
+}
+
+static int ath10k_monitor_start(struct ath10k *ar)
+{
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	if (!ath10k_monitor_is_enabled(ar)) {
+		ath10k_warn("trying to start monitor with no references\n");
+		return 0;
+	}
+
+	if (ar->monitor_started) {
+		ath10k_dbg(ATH10K_DBG_MAC, "mac monitor already started\n");
+		return 0;
+	}
+
+	ret = ath10k_monitor_vdev_create(ar);
+	if (ret) {
+		ath10k_warn("failed to create monitor vdev: %d\n", ret);
+		return ret;
+	}
+
+	ret = ath10k_monitor_vdev_start(ar, ar->monitor_vdev_id);
+	if (ret) {
+		ath10k_warn("failed to start monitor vdev: %d\n", ret);
+		ath10k_monitor_vdev_delete(ar);
+		return ret;
+	}
+
+	ar->monitor_started = true;
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor started\n");
+
+	return 0;
+}
+
+static void ath10k_monitor_stop(struct ath10k *ar)
+{
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	if (ath10k_monitor_is_enabled(ar)) {
+		ath10k_dbg(ATH10K_DBG_MAC,
+			   "mac monitor will be stopped later\n");
+		return;
+	}
+
+	if (!ar->monitor_started) {
+		ath10k_dbg(ATH10K_DBG_MAC,
+			   "mac monitor probably failed to start earlier\n");
+		return;
+	}
+
+	ret = ath10k_monitor_vdev_stop(ar);
+	if (ret)
+		ath10k_warn("failed to stop monitor vdev: %d\n", ret);
+
+	ret = ath10k_monitor_vdev_delete(ar);
+	if (ret)
+		ath10k_warn("failed to delete monitor vdev: %d\n", ret);
+
+	ar->monitor_started = false;
+	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor stopped\n");
+}
+
+static int ath10k_recalc_rtscts_prot(struct ath10k_vif *arvif)
+{
+	struct ath10k *ar = arvif->ar;
+	u32 vdev_param, rts_cts = 0;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	vdev_param = ar->wmi.vdev_param->enable_rtscts;
+
+	if (arvif->use_cts_prot || arvif->num_legacy_stations > 0)
+		rts_cts |= SM(WMI_RTSCTS_ENABLED, WMI_RTSCTS_SET);
+
+	if (arvif->num_legacy_stations > 0)
+		rts_cts |= SM(WMI_RTSCTS_ACROSS_SW_RETRIES,
+			      WMI_RTSCTS_PROFILE);
+
+	return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
+					 rts_cts);
+}
+
+static int ath10k_start_cac(struct ath10k *ar)
+{
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	set_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
+
+	ret = ath10k_monitor_start(ar);
+	if (ret) {
+		ath10k_warn("failed to start monitor (cac): %d\n", ret);
+		clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
+		return ret;
+	}
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac cac start monitor vdev %d\n",
+		   ar->monitor_vdev_id);
+
+	return 0;
+}
+
+static int ath10k_stop_cac(struct ath10k *ar)
+{
+	lockdep_assert_held(&ar->conf_mutex);
+
+	/* CAC is not running - do nothing */
+	if (!test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags))
+		return 0;
+
+	clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
+	ath10k_monitor_stop(ar);
+
+	ath10k_dbg(ATH10K_DBG_MAC, "mac cac finished\n");
+
+	return 0;
+}
+
+static void ath10k_recalc_radar_detection(struct ath10k *ar)
+{
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	ath10k_stop_cac(ar);
+
+	if (!ar->radar_enabled)
+		return;
+
+	if (ar->num_started_vdevs > 0)
+		return;
+
+	ret = ath10k_start_cac(ar);
+	if (ret) {
+		/*
+		 * Not possible to start CAC on current channel so starting
+		 * radiation is not allowed, make this channel DFS_UNAVAILABLE
+		 * by indicating that radar was detected.
+		 */
+		ath10k_warn("failed to start CAC: %d\n", ret);
+		ieee80211_radar_detected(ar->hw);
+	}
+}
+
 static int ath10k_vdev_start(struct ath10k_vif *arvif)
 {
 	struct ath10k *ar = arvif->ar;
@@ -532,18 +840,21 @@
 
 	ret = ath10k_wmi_vdev_start(ar, &arg);
 	if (ret) {
-		ath10k_warn("WMI vdev %i start failed: ret %d\n",
+		ath10k_warn("failed to start WMI vdev %i: %d\n",
 			    arg.vdev_id, ret);
 		return ret;
 	}
 
 	ret = ath10k_vdev_setup_sync(ar);
 	if (ret) {
-		ath10k_warn("vdev %i setup failed %d\n",
+		ath10k_warn("failed to synchronise setup for vdev %i: %d\n",
 			    arg.vdev_id, ret);
 		return ret;
 	}
 
+	ar->num_started_vdevs++;
+	ath10k_recalc_radar_detection(ar);
+
 	return ret;
 }
 
@@ -558,288 +869,28 @@
 
 	ret = ath10k_wmi_vdev_stop(ar, arvif->vdev_id);
 	if (ret) {
-		ath10k_warn("WMI vdev %i stop failed: ret %d\n",
+		ath10k_warn("failed to stop WMI vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
 
 	ret = ath10k_vdev_setup_sync(ar);
 	if (ret) {
-		ath10k_warn("vdev %i setup sync failed %d\n",
+		ath10k_warn("failed to syncronise setup for vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
 
-	return ret;
-}
+	WARN_ON(ar->num_started_vdevs == 0);
 
-static int ath10k_monitor_start(struct ath10k *ar, int vdev_id)
-{
-	struct cfg80211_chan_def *chandef = &ar->chandef;
-	struct ieee80211_channel *channel = chandef->chan;
-	struct wmi_vdev_start_request_arg arg = {};
-	int ret = 0;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	if (!ar->monitor_present) {
-		ath10k_warn("mac montor stop -- monitor is not present\n");
-		return -EINVAL;
+	if (ar->num_started_vdevs != 0) {
+		ar->num_started_vdevs--;
+		ath10k_recalc_radar_detection(ar);
 	}
 
-	arg.vdev_id = vdev_id;
-	arg.channel.freq = channel->center_freq;
-	arg.channel.band_center_freq1 = chandef->center_freq1;
-
-	/* TODO setup this dynamically, what in case we
-	   don't have any vifs? */
-	arg.channel.mode = chan_to_phymode(chandef);
-	arg.channel.chan_radar =
-			!!(channel->flags & IEEE80211_CHAN_RADAR);
-
-	arg.channel.min_power = 0;
-	arg.channel.max_power = channel->max_power * 2;
-	arg.channel.max_reg_power = channel->max_reg_power * 2;
-	arg.channel.max_antenna_gain = channel->max_antenna_gain * 2;
-
-	ret = ath10k_wmi_vdev_start(ar, &arg);
-	if (ret) {
-		ath10k_warn("Monitor vdev %i start failed: ret %d\n",
-			    vdev_id, ret);
-		return ret;
-	}
-
-	ret = ath10k_vdev_setup_sync(ar);
-	if (ret) {
-		ath10k_warn("Monitor vdev %i setup failed %d\n",
-			    vdev_id, ret);
-		return ret;
-	}
-
-	ret = ath10k_wmi_vdev_up(ar, vdev_id, 0, ar->mac_addr);
-	if (ret) {
-		ath10k_warn("Monitor vdev %i up failed: %d\n",
-			    vdev_id, ret);
-		goto vdev_stop;
-	}
-
-	ar->monitor_vdev_id = vdev_id;
-	ar->monitor_enabled = true;
-
-	return 0;
-
-vdev_stop:
-	ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
-	if (ret)
-		ath10k_warn("Monitor vdev %i stop failed: %d\n",
-			    ar->monitor_vdev_id, ret);
-
 	return ret;
 }
 
-static int ath10k_monitor_stop(struct ath10k *ar)
-{
-	int ret = 0;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	if (!ar->monitor_present) {
-		ath10k_warn("mac montor stop -- monitor is not present\n");
-		return -EINVAL;
-	}
-
-	if (!ar->monitor_enabled) {
-		ath10k_warn("mac montor stop -- monitor is not enabled\n");
-		return -EINVAL;
-	}
-
-	ret = ath10k_wmi_vdev_down(ar, ar->monitor_vdev_id);
-	if (ret)
-		ath10k_warn("Monitor vdev %i down failed: %d\n",
-			    ar->monitor_vdev_id, ret);
-
-	ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
-	if (ret)
-		ath10k_warn("Monitor vdev %i stop failed: %d\n",
-			    ar->monitor_vdev_id, ret);
-
-	ret = ath10k_vdev_setup_sync(ar);
-	if (ret)
-		ath10k_warn("Monitor_down sync failed, vdev %i: %d\n",
-			    ar->monitor_vdev_id, ret);
-
-	ar->monitor_enabled = false;
-	return ret;
-}
-
-static int ath10k_monitor_create(struct ath10k *ar)
-{
-	int bit, ret = 0;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	if (ar->monitor_present) {
-		ath10k_warn("Monitor mode already enabled\n");
-		return 0;
-	}
-
-	bit = ffs(ar->free_vdev_map);
-	if (bit == 0) {
-		ath10k_warn("No free VDEV slots\n");
-		return -ENOMEM;
-	}
-
-	ar->monitor_vdev_id = bit - 1;
-	ar->free_vdev_map &= ~(1 << ar->monitor_vdev_id);
-
-	ret = ath10k_wmi_vdev_create(ar, ar->monitor_vdev_id,
-				     WMI_VDEV_TYPE_MONITOR,
-				     0, ar->mac_addr);
-	if (ret) {
-		ath10k_warn("WMI vdev %i monitor create failed: ret %d\n",
-			    ar->monitor_vdev_id, ret);
-		goto vdev_fail;
-	}
-
-	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %d created\n",
-		   ar->monitor_vdev_id);
-
-	ar->monitor_present = true;
-	return 0;
-
-vdev_fail:
-	/*
-	 * Restore the ID to the global map.
-	 */
-	ar->free_vdev_map |= 1 << (ar->monitor_vdev_id);
-	return ret;
-}
-
-static int ath10k_monitor_destroy(struct ath10k *ar)
-{
-	int ret = 0;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	if (!ar->monitor_present)
-		return 0;
-
-	ret = ath10k_wmi_vdev_delete(ar, ar->monitor_vdev_id);
-	if (ret) {
-		ath10k_warn("WMI vdev %i monitor delete failed: %d\n",
-			    ar->monitor_vdev_id, ret);
-		return ret;
-	}
-
-	ar->free_vdev_map |= 1 << (ar->monitor_vdev_id);
-	ar->monitor_present = false;
-
-	ath10k_dbg(ATH10K_DBG_MAC, "mac monitor vdev %d deleted\n",
-		   ar->monitor_vdev_id);
-	return ret;
-}
-
-static int ath10k_start_cac(struct ath10k *ar)
-{
-	int ret;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	set_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
-
-	ret = ath10k_monitor_create(ar);
-	if (ret) {
-		clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
-		return ret;
-	}
-
-	ret = ath10k_monitor_start(ar, ar->monitor_vdev_id);
-	if (ret) {
-		clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
-		ath10k_monitor_destroy(ar);
-		return ret;
-	}
-
-	ath10k_dbg(ATH10K_DBG_MAC, "mac cac start monitor vdev %d\n",
-		   ar->monitor_vdev_id);
-
-	return 0;
-}
-
-static int ath10k_stop_cac(struct ath10k *ar)
-{
-	lockdep_assert_held(&ar->conf_mutex);
-
-	/* CAC is not running - do nothing */
-	if (!test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags))
-		return 0;
-
-	ath10k_monitor_stop(ar);
-	ath10k_monitor_destroy(ar);
-	clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
-
-	ath10k_dbg(ATH10K_DBG_MAC, "mac cac finished\n");
-
-	return 0;
-}
-
-static const char *ath10k_dfs_state(enum nl80211_dfs_state dfs_state)
-{
-	switch (dfs_state) {
-	case NL80211_DFS_USABLE:
-		return "USABLE";
-	case NL80211_DFS_UNAVAILABLE:
-		return "UNAVAILABLE";
-	case NL80211_DFS_AVAILABLE:
-		return "AVAILABLE";
-	default:
-		WARN_ON(1);
-		return "bug";
-	}
-}
-
-static void ath10k_config_radar_detection(struct ath10k *ar)
-{
-	struct ieee80211_channel *chan = ar->hw->conf.chandef.chan;
-	bool radar = ar->hw->conf.radar_enabled;
-	bool chan_radar = !!(chan->flags & IEEE80211_CHAN_RADAR);
-	enum nl80211_dfs_state dfs_state = chan->dfs_state;
-	int ret;
-
-	lockdep_assert_held(&ar->conf_mutex);
-
-	ath10k_dbg(ATH10K_DBG_MAC,
-		   "mac radar config update: chan %dMHz radar %d chan radar %d chan state %s\n",
-		   chan->center_freq, radar, chan_radar,
-		   ath10k_dfs_state(dfs_state));
-
-	/*
-	 * It's safe to call it even if CAC is not started.
-	 * This call here guarantees changing channel, etc. will stop CAC.
-	 */
-	ath10k_stop_cac(ar);
-
-	if (!radar)
-		return;
-
-	if (!chan_radar)
-		return;
-
-	if (dfs_state != NL80211_DFS_USABLE)
-		return;
-
-	ret = ath10k_start_cac(ar);
-	if (ret) {
-		/*
-		 * Not possible to start CAC on current channel so starting
-		 * radiation is not allowed, make this channel DFS_UNAVAILABLE
-		 * by indicating that radar was detected.
-		 */
-		ath10k_warn("failed to start CAC (%d)\n", ret);
-		ieee80211_radar_detected(ar->hw);
-	}
-}
-
 static void ath10k_control_beaconing(struct ath10k_vif *arvif,
 				struct ieee80211_bss_conf *info)
 {
@@ -880,7 +931,7 @@
 	ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
 				 arvif->bssid);
 	if (ret) {
-		ath10k_warn("Failed to bring up vdev %d: %i\n",
+		ath10k_warn("failed to bring up vdev %d: %i\n",
 			    arvif->vdev_id, ret);
 		ath10k_vdev_stop(arvif);
 		return;
@@ -904,7 +955,7 @@
 	if (!info->ibss_joined) {
 		ret = ath10k_peer_delete(arvif->ar, arvif->vdev_id, self_peer);
 		if (ret)
-			ath10k_warn("Failed to delete IBSS self peer:%pM for VDEV:%d ret:%d\n",
+			ath10k_warn("failed to delete IBSS self peer %pM for vdev %d: %d\n",
 				    self_peer, arvif->vdev_id, ret);
 
 		if (is_zero_ether_addr(arvif->bssid))
@@ -913,7 +964,7 @@
 		ret = ath10k_peer_delete(arvif->ar, arvif->vdev_id,
 					 arvif->bssid);
 		if (ret) {
-			ath10k_warn("Failed to delete IBSS BSSID peer:%pM for VDEV:%d ret:%d\n",
+			ath10k_warn("failed to delete IBSS BSSID peer %pM for vdev %d: %d\n",
 				    arvif->bssid, arvif->vdev_id, ret);
 			return;
 		}
@@ -925,7 +976,7 @@
 
 	ret = ath10k_peer_create(arvif->ar, arvif->vdev_id, self_peer);
 	if (ret) {
-		ath10k_warn("Failed to create IBSS self peer:%pM for VDEV:%d ret:%d\n",
+		ath10k_warn("failed to create IBSS self peer %pM for vdev %d: %d\n",
 			    self_peer, arvif->vdev_id, ret);
 		return;
 	}
@@ -934,7 +985,7 @@
 	ret = ath10k_wmi_vdev_set_param(arvif->ar, arvif->vdev_id, vdev_param,
 					ATH10K_DEFAULT_ATIM);
 	if (ret)
-		ath10k_warn("Failed to set IBSS ATIM for VDEV:%d ret:%d\n",
+		ath10k_warn("failed to set IBSS ATIM for vdev %d: %d\n",
 			    arvif->vdev_id, ret);
 }
 
@@ -961,7 +1012,7 @@
 		ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param,
 						  conf->dynamic_ps_timeout);
 		if (ret) {
-			ath10k_warn("Failed to set inactivity time for vdev %d: %i\n",
+			ath10k_warn("failed to set inactivity time for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 			return ret;
 		}
@@ -974,8 +1025,8 @@
 
 	ret = ath10k_wmi_set_psmode(ar, arvif->vdev_id, psmode);
 	if (ret) {
-		ath10k_warn("Failed to set PS Mode: %d for VDEV: %d\n",
-			    psmode, arvif->vdev_id);
+		ath10k_warn("failed to set PS Mode %d for vdev %d: %d\n",
+			    psmode, arvif->vdev_id, ret);
 		return ret;
 	}
 
@@ -1429,7 +1480,7 @@
 
 	ap_sta = ieee80211_find_sta(vif, bss_conf->bssid);
 	if (!ap_sta) {
-		ath10k_warn("Failed to find station entry for %pM, vdev %i\n",
+		ath10k_warn("failed to find station entry for bss %pM vdev %i\n",
 			    bss_conf->bssid, arvif->vdev_id);
 		rcu_read_unlock();
 		return;
@@ -1442,7 +1493,7 @@
 	ret = ath10k_peer_assoc_prepare(ar, arvif, ap_sta,
 					bss_conf, &peer_arg);
 	if (ret) {
-		ath10k_warn("Peer assoc prepare failed for %pM vdev %i\n: %d",
+		ath10k_warn("failed to prepare peer assoc for %pM vdev %i: %d\n",
 			    bss_conf->bssid, arvif->vdev_id, ret);
 		rcu_read_unlock();
 		return;
@@ -1452,7 +1503,7 @@
 
 	ret = ath10k_wmi_peer_assoc(ar, &peer_arg);
 	if (ret) {
-		ath10k_warn("Peer assoc failed for %pM vdev %i\n: %d",
+		ath10k_warn("failed to run peer assoc for %pM vdev %i: %d\n",
 			    bss_conf->bssid, arvif->vdev_id, ret);
 		return;
 	}
@@ -1473,7 +1524,7 @@
 
 	ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, arvif->aid, arvif->bssid);
 	if (ret) {
-		ath10k_warn("VDEV: %d up failed: ret %d\n",
+		ath10k_warn("failed to set vdev %d up: %d\n",
 			    arvif->vdev_id, ret);
 		return;
 	}
@@ -1524,7 +1575,7 @@
 }
 
 static int ath10k_station_assoc(struct ath10k *ar, struct ath10k_vif *arvif,
-				struct ieee80211_sta *sta)
+				struct ieee80211_sta *sta, bool reassoc)
 {
 	struct wmi_peer_assoc_complete_arg peer_arg;
 	int ret = 0;
@@ -1533,34 +1584,46 @@
 
 	ret = ath10k_peer_assoc_prepare(ar, arvif, sta, NULL, &peer_arg);
 	if (ret) {
-		ath10k_warn("WMI peer assoc prepare failed for %pM vdev %i: %i\n",
+		ath10k_warn("failed to prepare WMI peer assoc for %pM vdev %i: %i\n",
 			    sta->addr, arvif->vdev_id, ret);
 		return ret;
 	}
 
+	peer_arg.peer_reassoc = reassoc;
 	ret = ath10k_wmi_peer_assoc(ar, &peer_arg);
 	if (ret) {
-		ath10k_warn("Peer assoc failed for STA %pM vdev %i: %d\n",
+		ath10k_warn("failed to run peer assoc for STA %pM vdev %i: %d\n",
 			    sta->addr, arvif->vdev_id, ret);
 		return ret;
 	}
 
 	ret = ath10k_setup_peer_smps(ar, arvif, sta->addr, &sta->ht_cap);
 	if (ret) {
-		ath10k_warn("failed to setup peer SMPS for vdev: %d\n", ret);
+		ath10k_warn("failed to setup peer SMPS for vdev %d: %d\n",
+			    arvif->vdev_id, ret);
 		return ret;
 	}
 
+	if (!sta->wme) {
+		arvif->num_legacy_stations++;
+		ret  = ath10k_recalc_rtscts_prot(arvif);
+		if (ret) {
+			ath10k_warn("failed to recalculate rts/cts prot for vdev %d: %d\n",
+				    arvif->vdev_id, ret);
+			return ret;
+		}
+	}
+
 	ret = ath10k_install_peer_wep_keys(arvif, sta->addr);
 	if (ret) {
-		ath10k_warn("could not install peer wep keys for vdev %i: %d\n",
+		ath10k_warn("failed to install peer wep keys for vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
 
 	ret = ath10k_peer_assoc_qos_ap(ar, arvif, sta);
 	if (ret) {
-		ath10k_warn("could not set qos params for STA %pM for vdev %i: %d\n",
+		ath10k_warn("failed to set qos params for STA %pM for vdev %i: %d\n",
 			    sta->addr, arvif->vdev_id, ret);
 		return ret;
 	}
@@ -1575,9 +1638,19 @@
 
 	lockdep_assert_held(&ar->conf_mutex);
 
+	if (!sta->wme) {
+		arvif->num_legacy_stations--;
+		ret = ath10k_recalc_rtscts_prot(arvif);
+		if (ret) {
+			ath10k_warn("failed to recalculate rts/cts prot for vdev %d: %d\n",
+				    arvif->vdev_id, ret);
+			return ret;
+		}
+	}
+
 	ret = ath10k_clear_peer_keys(arvif, sta->addr);
 	if (ret) {
-		ath10k_warn("could not clear all peer wep keys for vdev %i: %d\n",
+		ath10k_warn("failed to clear all peer wep keys for vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		return ret;
 	}
@@ -1685,19 +1758,44 @@
 	return ret;
 }
 
+static enum wmi_dfs_region
+ath10k_mac_get_dfs_region(enum nl80211_dfs_regions dfs_region)
+{
+	switch (dfs_region) {
+	case NL80211_DFS_UNSET:
+		return WMI_UNINIT_DFS_DOMAIN;
+	case NL80211_DFS_FCC:
+		return WMI_FCC_DFS_DOMAIN;
+	case NL80211_DFS_ETSI:
+		return WMI_ETSI_DFS_DOMAIN;
+	case NL80211_DFS_JP:
+		return WMI_MKK4_DFS_DOMAIN;
+	}
+	return WMI_UNINIT_DFS_DOMAIN;
+}
+
 static void ath10k_regd_update(struct ath10k *ar)
 {
 	struct reg_dmn_pair_mapping *regpair;
 	int ret;
+	enum wmi_dfs_region wmi_dfs_reg;
+	enum nl80211_dfs_regions nl_dfs_reg;
 
 	lockdep_assert_held(&ar->conf_mutex);
 
 	ret = ath10k_update_channel_list(ar);
 	if (ret)
-		ath10k_warn("could not update channel list (%d)\n", ret);
+		ath10k_warn("failed to update channel list: %d\n", ret);
 
 	regpair = ar->ath_common.regulatory.regpair;
 
+	if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) {
+		nl_dfs_reg = ar->dfs_detector->region;
+		wmi_dfs_reg = ath10k_mac_get_dfs_region(nl_dfs_reg);
+	} else {
+		wmi_dfs_reg = WMI_UNINIT_DFS_DOMAIN;
+	}
+
 	/* Target allows setting up per-band regdomain but ath_common provides
 	 * a combined one only */
 	ret = ath10k_wmi_pdev_set_regdomain(ar,
@@ -1705,9 +1803,10 @@
 					    regpair->reg_domain, /* 2ghz */
 					    regpair->reg_domain, /* 5ghz */
 					    regpair->reg_2ghz_ctl,
-					    regpair->reg_5ghz_ctl);
+					    regpair->reg_5ghz_ctl,
+					    wmi_dfs_reg);
 	if (ret)
-		ath10k_warn("could not set pdev regdomain (%d)\n", ret);
+		ath10k_warn("failed to set pdev regdomain: %d\n", ret);
 }
 
 static void ath10k_reg_notifier(struct wiphy *wiphy,
@@ -1725,7 +1824,7 @@
 		result = ar->dfs_detector->set_dfs_domain(ar->dfs_detector,
 							  request->dfs_region);
 		if (!result)
-			ath10k_warn("dfs region 0x%X not supported, will trigger radar for every pulse\n",
+			ath10k_warn("DFS region 0x%X not supported, will trigger radar for every pulse\n",
 				    request->dfs_region);
 	}
 
@@ -1759,10 +1858,10 @@
 	if (info->control.vif)
 		return ath10k_vif_to_arvif(info->control.vif)->vdev_id;
 
-	if (ar->monitor_enabled)
+	if (ar->monitor_started)
 		return ar->monitor_vdev_id;
 
-	ath10k_warn("could not resolve vdev id\n");
+	ath10k_warn("failed to resolve vdev id\n");
 	return 0;
 }
 
@@ -1792,8 +1891,13 @@
 						wep_key_work);
 	int ret, keyidx = arvif->def_wep_key_newidx;
 
+	mutex_lock(&arvif->ar->conf_mutex);
+
+	if (arvif->ar->state != ATH10K_STATE_ON)
+		goto unlock;
+
 	if (arvif->def_wep_key_idx == keyidx)
-		return;
+		goto unlock;
 
 	ath10k_dbg(ATH10K_DBG_MAC, "mac vdev %d set keyidx %d\n",
 		   arvif->vdev_id, keyidx);
@@ -1803,11 +1907,16 @@
 					arvif->ar->wmi.vdev_param->def_keyid,
 					keyidx);
 	if (ret) {
-		ath10k_warn("could not update wep keyidx (%d)\n", ret);
-		return;
+		ath10k_warn("failed to update wep key index for vdev %d: %d\n",
+			    arvif->vdev_id,
+			    ret);
+		goto unlock;
 	}
 
 	arvif->def_wep_key_idx = keyidx;
+
+unlock:
+	mutex_unlock(&arvif->ar->conf_mutex);
 }
 
 static void ath10k_tx_h_update_wep_key(struct sk_buff *skb)
@@ -1879,7 +1988,7 @@
 			     ar->fw_features)) {
 			if (skb_queue_len(&ar->wmi_mgmt_tx_queue) >=
 			    ATH10K_MAX_NUM_MGMT_PENDING) {
-				ath10k_warn("wmi mgmt_tx queue limit reached\n");
+				ath10k_warn("reached WMI management tranmist queue limit\n");
 				ret = -EBUSY;
 				goto exit;
 			}
@@ -1903,7 +2012,7 @@
 
 exit:
 	if (ret) {
-		ath10k_warn("tx failed (%d). dropping packet.\n", ret);
+		ath10k_warn("failed to transmit packet, dropping: %d\n", ret);
 		ieee80211_free_txskb(ar->hw, skb);
 	}
 }
@@ -1964,7 +2073,7 @@
 		if (!peer) {
 			ret = ath10k_peer_create(ar, vdev_id, peer_addr);
 			if (ret)
-				ath10k_warn("peer %pM on vdev %d not created (%d)\n",
+				ath10k_warn("failed to create peer %pM on vdev %d: %d\n",
 					    peer_addr, vdev_id, ret);
 		}
 
@@ -1984,7 +2093,7 @@
 		if (!peer) {
 			ret = ath10k_peer_delete(ar, vdev_id, peer_addr);
 			if (ret)
-				ath10k_warn("peer %pM on vdev %d not deleted (%d)\n",
+				ath10k_warn("failed to delete peer %pM on vdev %d: %d\n",
 					    peer_addr, vdev_id, ret);
 		}
 
@@ -2018,7 +2127,8 @@
 
 		ret = ath10k_wmi_mgmt_tx(ar, skb);
 		if (ret) {
-			ath10k_warn("wmi mgmt_tx failed (%d)\n", ret);
+			ath10k_warn("failed to transmit management frame via WMI: %d\n",
+				    ret);
 			ieee80211_free_txskb(ar->hw, skb);
 		}
 	}
@@ -2043,7 +2153,7 @@
 		return;
 	}
 
-	ath10k_warn("scan timeout. resetting. fw issue?\n");
+	ath10k_warn("scan timed out, firmware problem?\n");
 
 	if (ar->scan.is_roc)
 		ieee80211_remain_on_channel_expired(ar->hw);
@@ -2079,7 +2189,7 @@
 
 	ret = ath10k_wmi_stop_scan(ar, &arg);
 	if (ret) {
-		ath10k_warn("could not submit wmi stop scan (%d)\n", ret);
+		ath10k_warn("failed to stop wmi scan: %d\n", ret);
 		spin_lock_bh(&ar->data_lock);
 		ar->scan.in_progress = false;
 		ath10k_offchan_tx_purge(ar);
@@ -2099,7 +2209,7 @@
 
 	spin_lock_bh(&ar->data_lock);
 	if (ar->scan.in_progress) {
-		ath10k_warn("could not stop scan. its still in progress\n");
+		ath10k_warn("failed to stop scan, it's still in progress\n");
 		ar->scan.in_progress = false;
 		ath10k_offchan_tx_purge(ar);
 		ret = -ETIMEDOUT;
@@ -2187,72 +2297,171 @@
 	ath10k_tx_htt(ar, skb);
 }
 
-/*
- * Initialize various parameters with default vaules.
- */
-void ath10k_halt(struct ath10k *ar)
+/* Must not be called with conf_mutex held as workers can use that also. */
+static void ath10k_drain_tx(struct ath10k *ar)
 {
-	lockdep_assert_held(&ar->conf_mutex);
+	/* make sure rcu-protected mac80211 tx path itself is drained */
+	synchronize_net();
 
-	ath10k_stop_cac(ar);
-	del_timer_sync(&ar->scan.timeout);
 	ath10k_offchan_tx_purge(ar);
 	ath10k_mgmt_over_wmi_tx_purge(ar);
+
+	cancel_work_sync(&ar->offchan_tx_work);
+	cancel_work_sync(&ar->wmi_mgmt_tx_work);
+}
+
+void ath10k_halt(struct ath10k *ar)
+{
+	struct ath10k_vif *arvif;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	if (ath10k_monitor_is_enabled(ar)) {
+		clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
+		ar->promisc = false;
+		ar->monitor = false;
+		ath10k_monitor_stop(ar);
+	}
+
+	del_timer_sync(&ar->scan.timeout);
+	ath10k_reset_scan((unsigned long)ar);
 	ath10k_peer_cleanup_all(ar);
 	ath10k_core_stop(ar);
 	ath10k_hif_power_down(ar);
 
 	spin_lock_bh(&ar->data_lock);
-	if (ar->scan.in_progress) {
-		del_timer(&ar->scan.timeout);
-		ar->scan.in_progress = false;
-		ieee80211_scan_completed(ar->hw, true);
+	list_for_each_entry(arvif, &ar->arvifs, list) {
+		if (!arvif->beacon)
+			continue;
+
+		dma_unmap_single(arvif->ar->dev,
+				 ATH10K_SKB_CB(arvif->beacon)->paddr,
+				 arvif->beacon->len, DMA_TO_DEVICE);
+		dev_kfree_skb_any(arvif->beacon);
+		arvif->beacon = NULL;
 	}
 	spin_unlock_bh(&ar->data_lock);
 }
 
+static int ath10k_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant)
+{
+	struct ath10k *ar = hw->priv;
+
+	mutex_lock(&ar->conf_mutex);
+
+	if (ar->cfg_tx_chainmask) {
+		*tx_ant = ar->cfg_tx_chainmask;
+		*rx_ant = ar->cfg_rx_chainmask;
+	} else {
+		*tx_ant = ar->supp_tx_chainmask;
+		*rx_ant = ar->supp_rx_chainmask;
+	}
+
+	mutex_unlock(&ar->conf_mutex);
+
+	return 0;
+}
+
+static int __ath10k_set_antenna(struct ath10k *ar, u32 tx_ant, u32 rx_ant)
+{
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	ar->cfg_tx_chainmask = tx_ant;
+	ar->cfg_rx_chainmask = rx_ant;
+
+	if ((ar->state != ATH10K_STATE_ON) &&
+	    (ar->state != ATH10K_STATE_RESTARTED))
+		return 0;
+
+	ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->tx_chain_mask,
+					tx_ant);
+	if (ret) {
+		ath10k_warn("failed to set tx-chainmask: %d, req 0x%x\n",
+			    ret, tx_ant);
+		return ret;
+	}
+
+	ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rx_chain_mask,
+					rx_ant);
+	if (ret) {
+		ath10k_warn("failed to set rx-chainmask: %d, req 0x%x\n",
+			    ret, rx_ant);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ath10k_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
+{
+	struct ath10k *ar = hw->priv;
+	int ret;
+
+	mutex_lock(&ar->conf_mutex);
+	ret = __ath10k_set_antenna(ar, tx_ant, rx_ant);
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
 static int ath10k_start(struct ieee80211_hw *hw)
 {
 	struct ath10k *ar = hw->priv;
 	int ret = 0;
 
+	/*
+	 * This makes sense only when restarting hw. It is harmless to call
+	 * uncoditionally. This is necessary to make sure no HTT/WMI tx
+	 * commands will be submitted while restarting.
+	 */
+	ath10k_drain_tx(ar);
+
 	mutex_lock(&ar->conf_mutex);
 
-	if (ar->state != ATH10K_STATE_OFF &&
-	    ar->state != ATH10K_STATE_RESTARTING) {
+	switch (ar->state) {
+	case ATH10K_STATE_OFF:
+		ar->state = ATH10K_STATE_ON;
+		break;
+	case ATH10K_STATE_RESTARTING:
+		ath10k_halt(ar);
+		ar->state = ATH10K_STATE_RESTARTED;
+		break;
+	case ATH10K_STATE_ON:
+	case ATH10K_STATE_RESTARTED:
+	case ATH10K_STATE_WEDGED:
+		WARN_ON(1);
 		ret = -EINVAL;
-		goto exit;
+		goto err;
 	}
 
 	ret = ath10k_hif_power_up(ar);
 	if (ret) {
-		ath10k_err("could not init hif (%d)\n", ret);
-		ar->state = ATH10K_STATE_OFF;
-		goto exit;
+		ath10k_err("Could not init hif: %d\n", ret);
+		goto err_off;
 	}
 
 	ret = ath10k_core_start(ar);
 	if (ret) {
-		ath10k_err("could not init core (%d)\n", ret);
-		ath10k_hif_power_down(ar);
-		ar->state = ATH10K_STATE_OFF;
-		goto exit;
+		ath10k_err("Could not init core: %d\n", ret);
+		goto err_power_down;
 	}
 
-	if (ar->state == ATH10K_STATE_OFF)
-		ar->state = ATH10K_STATE_ON;
-	else if (ar->state == ATH10K_STATE_RESTARTING)
-		ar->state = ATH10K_STATE_RESTARTED;
-
 	ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->pmf_qos, 1);
-	if (ret)
-		ath10k_warn("could not enable WMI_PDEV_PARAM_PMF_QOS (%d)\n",
-			    ret);
+	if (ret) {
+		ath10k_warn("failed to enable PMF QOS: %d\n", ret);
+		goto err_core_stop;
+	}
 
 	ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->dynamic_bw, 1);
-	if (ret)
-		ath10k_warn("could not init WMI_PDEV_PARAM_DYNAMIC_BW (%d)\n",
-			    ret);
+	if (ret) {
+		ath10k_warn("failed to enable dynamic BW: %d\n", ret);
+		goto err_core_stop;
+	}
+
+	if (ar->cfg_tx_chainmask)
+		__ath10k_set_antenna(ar, ar->cfg_tx_chainmask,
+				     ar->cfg_rx_chainmask);
 
 	/*
 	 * By default FW set ARP frames ac to voice (6). In that case ARP
@@ -2266,15 +2475,27 @@
 	ret = ath10k_wmi_pdev_set_param(ar,
 					ar->wmi.pdev_param->arp_ac_override, 0);
 	if (ret) {
-		ath10k_warn("could not set arp ac override parameter: %d\n",
+		ath10k_warn("failed to set arp ac override parameter: %d\n",
 			    ret);
-		goto exit;
+		goto err_core_stop;
 	}
 
+	ar->num_started_vdevs = 0;
 	ath10k_regd_update(ar);
-	ret = 0;
 
-exit:
+	mutex_unlock(&ar->conf_mutex);
+	return 0;
+
+err_core_stop:
+	ath10k_core_stop(ar);
+
+err_power_down:
+	ath10k_hif_power_down(ar);
+
+err_off:
+	ar->state = ATH10K_STATE_OFF;
+
+err:
 	mutex_unlock(&ar->conf_mutex);
 	return ret;
 }
@@ -2283,19 +2504,15 @@
 {
 	struct ath10k *ar = hw->priv;
 
-	mutex_lock(&ar->conf_mutex);
-	if (ar->state == ATH10K_STATE_ON ||
-	    ar->state == ATH10K_STATE_RESTARTED ||
-	    ar->state == ATH10K_STATE_WEDGED)
-		ath10k_halt(ar);
+	ath10k_drain_tx(ar);
 
-	ar->state = ATH10K_STATE_OFF;
+	mutex_lock(&ar->conf_mutex);
+	if (ar->state != ATH10K_STATE_OFF) {
+		ath10k_halt(ar);
+		ar->state = ATH10K_STATE_OFF;
+	}
 	mutex_unlock(&ar->conf_mutex);
 
-	ath10k_mgmt_over_wmi_tx_purge(ar);
-
-	cancel_work_sync(&ar->offchan_tx_work);
-	cancel_work_sync(&ar->wmi_mgmt_tx_work);
 	cancel_work_sync(&ar->restart_work);
 }
 
@@ -2309,7 +2526,7 @@
 	list_for_each_entry(arvif, &ar->arvifs, list) {
 		ret = ath10k_mac_vif_setup_ps(arvif);
 		if (ret) {
-			ath10k_warn("could not setup powersave (%d)\n", ret);
+			ath10k_warn("failed to setup powersave: %d\n", ret);
 			break;
 		}
 	}
@@ -2343,7 +2560,6 @@
 static void ath10k_config_chan(struct ath10k *ar)
 {
 	struct ath10k_vif *arvif;
-	bool monitor_was_enabled;
 	int ret;
 
 	lockdep_assert_held(&ar->conf_mutex);
@@ -2357,10 +2573,8 @@
 
 	/* First stop monitor interface. Some FW versions crash if there's a
 	 * lone monitor interface. */
-	monitor_was_enabled = ar->monitor_enabled;
-
-	if (ar->monitor_enabled)
-		ath10k_monitor_stop(ar);
+	if (ar->monitor_started)
+		ath10k_monitor_vdev_stop(ar);
 
 	list_for_each_entry(arvif, &ar->arvifs, list) {
 		if (!arvif->is_started)
@@ -2371,7 +2585,7 @@
 
 		ret = ath10k_vdev_stop(arvif);
 		if (ret) {
-			ath10k_warn("could not stop vdev %d (%d)\n",
+			ath10k_warn("failed to stop vdev %d: %d\n",
 				    arvif->vdev_id, ret);
 			continue;
 		}
@@ -2388,7 +2602,7 @@
 
 		ret = ath10k_vdev_start(arvif);
 		if (ret) {
-			ath10k_warn("could not start vdev %d (%d)\n",
+			ath10k_warn("failed to start vdev %d: %d\n",
 				    arvif->vdev_id, ret);
 			continue;
 		}
@@ -2399,14 +2613,14 @@
 		ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
 					 arvif->bssid);
 		if (ret) {
-			ath10k_warn("could not bring vdev up %d (%d)\n",
+			ath10k_warn("failed to bring vdev up %d: %d\n",
 				    arvif->vdev_id, ret);
 			continue;
 		}
 	}
 
-	if (monitor_was_enabled)
-		ath10k_monitor_start(ar, ar->monitor_vdev_id);
+	if (ath10k_monitor_is_enabled(ar))
+		ath10k_monitor_vdev_start(ar, ar->monitor_vdev_id);
 }
 
 static int ath10k_config(struct ieee80211_hw *hw, u32 changed)
@@ -2420,15 +2634,17 @@
 
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
 		ath10k_dbg(ATH10K_DBG_MAC,
-			   "mac config channel %d mhz flags 0x%x\n",
+			   "mac config channel %dMHz flags 0x%x radar %d\n",
 			   conf->chandef.chan->center_freq,
-			   conf->chandef.chan->flags);
+			   conf->chandef.chan->flags,
+			   conf->radar_enabled);
 
 		spin_lock_bh(&ar->data_lock);
 		ar->rx_channel = conf->chandef.chan;
 		spin_unlock_bh(&ar->data_lock);
 
-		ath10k_config_radar_detection(ar);
+		ar->radar_enabled = conf->radar_enabled;
+		ath10k_recalc_radar_detection(ar);
 
 		if (!cfg80211_chandef_identical(&ar->chandef, &conf->chandef)) {
 			ar->chandef = conf->chandef;
@@ -2444,14 +2660,14 @@
 		ret = ath10k_wmi_pdev_set_param(ar, param,
 						hw->conf.power_level * 2);
 		if (ret)
-			ath10k_warn("mac failed to set 2g txpower %d (%d)\n",
+			ath10k_warn("failed to set 2g txpower %d: %d\n",
 				    hw->conf.power_level, ret);
 
 		param = ar->wmi.pdev_param->txpower_limit5g;
 		ret = ath10k_wmi_pdev_set_param(ar, param,
 						hw->conf.power_level * 2);
 		if (ret)
-			ath10k_warn("mac failed to set 5g txpower %d (%d)\n",
+			ath10k_warn("failed to set 5g txpower %d: %d\n",
 				    hw->conf.power_level, ret);
 	}
 
@@ -2459,10 +2675,19 @@
 		ath10k_config_ps(ar);
 
 	if (changed & IEEE80211_CONF_CHANGE_MONITOR) {
-		if (conf->flags & IEEE80211_CONF_MONITOR)
-			ret = ath10k_monitor_create(ar);
-		else
-			ret = ath10k_monitor_destroy(ar);
+		if (conf->flags & IEEE80211_CONF_MONITOR && !ar->monitor) {
+			ar->monitor = true;
+			ret = ath10k_monitor_start(ar);
+			if (ret) {
+				ath10k_warn("failed to start monitor (config): %d\n",
+					    ret);
+				ar->monitor = false;
+			}
+		} else if (!(conf->flags & IEEE80211_CONF_MONITOR) &&
+			   ar->monitor) {
+			ar->monitor = false;
+			ath10k_monitor_stop(ar);
+		}
 	}
 
 	mutex_unlock(&ar->conf_mutex);
@@ -2497,12 +2722,6 @@
 	INIT_WORK(&arvif->wep_key_work, ath10k_tx_wep_key_work);
 	INIT_LIST_HEAD(&arvif->list);
 
-	if ((vif->type == NL80211_IFTYPE_MONITOR) && ar->monitor_present) {
-		ath10k_warn("Only one monitor interface allowed\n");
-		ret = -EBUSY;
-		goto err;
-	}
-
 	bit = ffs(ar->free_vdev_map);
 	if (bit == 0) {
 		ret = -EBUSY;
@@ -2545,7 +2764,7 @@
 	ret = ath10k_wmi_vdev_create(ar, arvif->vdev_id, arvif->vdev_type,
 				     arvif->vdev_subtype, vif->addr);
 	if (ret) {
-		ath10k_warn("WMI vdev %i create failed: ret %d\n",
+		ath10k_warn("failed to create WMI vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 		goto err;
 	}
@@ -2557,7 +2776,7 @@
 	ret = ath10k_wmi_vdev_set_param(ar, 0, vdev_param,
 					arvif->def_wep_key_idx);
 	if (ret) {
-		ath10k_warn("Failed to set vdev %i default keyid: %d\n",
+		ath10k_warn("failed to set vdev %i default key id: %d\n",
 			    arvif->vdev_id, ret);
 		goto err_vdev_delete;
 	}
@@ -2567,7 +2786,7 @@
 					ATH10K_HW_TXRX_NATIVE_WIFI);
 	/* 10.X firmware does not support this VDEV parameter. Do not warn */
 	if (ret && ret != -EOPNOTSUPP) {
-		ath10k_warn("Failed to set vdev %i TX encap: %d\n",
+		ath10k_warn("failed to set vdev %i TX encapsulation: %d\n",
 			    arvif->vdev_id, ret);
 		goto err_vdev_delete;
 	}
@@ -2575,14 +2794,14 @@
 	if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
 		ret = ath10k_peer_create(ar, arvif->vdev_id, vif->addr);
 		if (ret) {
-			ath10k_warn("Failed to create vdev %i peer for AP: %d\n",
+			ath10k_warn("failed to create vdev %i peer for AP: %d\n",
 				    arvif->vdev_id, ret);
 			goto err_vdev_delete;
 		}
 
 		ret = ath10k_mac_set_kickout(arvif);
 		if (ret) {
-			ath10k_warn("Failed to set vdev %i kickout parameters: %d\n",
+			ath10k_warn("failed to set vdev %i kickout parameters: %d\n",
 				    arvif->vdev_id, ret);
 			goto err_peer_delete;
 		}
@@ -2594,7 +2813,7 @@
 		ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
 						  param, value);
 		if (ret) {
-			ath10k_warn("Failed to set vdev %i RX wake policy: %d\n",
+			ath10k_warn("failed to set vdev %i RX wake policy: %d\n",
 				    arvif->vdev_id, ret);
 			goto err_peer_delete;
 		}
@@ -2604,7 +2823,7 @@
 		ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
 						  param, value);
 		if (ret) {
-			ath10k_warn("Failed to set vdev %i TX wake thresh: %d\n",
+			ath10k_warn("failed to set vdev %i TX wake thresh: %d\n",
 				    arvif->vdev_id, ret);
 			goto err_peer_delete;
 		}
@@ -2614,7 +2833,7 @@
 		ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
 						  param, value);
 		if (ret) {
-			ath10k_warn("Failed to set vdev %i PSPOLL count: %d\n",
+			ath10k_warn("failed to set vdev %i PSPOLL count: %d\n",
 				    arvif->vdev_id, ret);
 			goto err_peer_delete;
 		}
@@ -2622,21 +2841,18 @@
 
 	ret = ath10k_mac_set_rts(arvif, ar->hw->wiphy->rts_threshold);
 	if (ret) {
-		ath10k_warn("failed to set rts threshold for vdev %d (%d)\n",
+		ath10k_warn("failed to set rts threshold for vdev %d: %d\n",
 			    arvif->vdev_id, ret);
 		goto err_peer_delete;
 	}
 
 	ret = ath10k_mac_set_frag(arvif, ar->hw->wiphy->frag_threshold);
 	if (ret) {
-		ath10k_warn("failed to set frag threshold for vdev %d (%d)\n",
+		ath10k_warn("failed to set frag threshold for vdev %d: %d\n",
 			    arvif->vdev_id, ret);
 		goto err_peer_delete;
 	}
 
-	if (arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)
-		ar->monitor_present = true;
-
 	mutex_unlock(&ar->conf_mutex);
 	return 0;
 
@@ -2668,6 +2884,9 @@
 
 	spin_lock_bh(&ar->data_lock);
 	if (arvif->beacon) {
+		dma_unmap_single(arvif->ar->dev,
+				 ATH10K_SKB_CB(arvif->beacon)->paddr,
+				 arvif->beacon->len, DMA_TO_DEVICE);
 		dev_kfree_skb_any(arvif->beacon);
 		arvif->beacon = NULL;
 	}
@@ -2679,7 +2898,7 @@
 	if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
 		ret = ath10k_peer_delete(arvif->ar, arvif->vdev_id, vif->addr);
 		if (ret)
-			ath10k_warn("Failed to remove peer for AP vdev %i: %d\n",
+			ath10k_warn("failed to remove peer for AP vdev %i: %d\n",
 				    arvif->vdev_id, ret);
 
 		kfree(arvif->u.ap.noa_data);
@@ -2690,12 +2909,9 @@
 
 	ret = ath10k_wmi_vdev_delete(ar, arvif->vdev_id);
 	if (ret)
-		ath10k_warn("WMI vdev %i delete failed: %d\n",
+		ath10k_warn("failed to delete WMI vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 
-	if (arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)
-		ar->monitor_present = false;
-
 	ath10k_peer_cleanup(ar, arvif->vdev_id);
 
 	mutex_unlock(&ar->conf_mutex);
@@ -2728,28 +2944,17 @@
 	*total_flags &= SUPPORTED_FILTERS;
 	ar->filter_flags = *total_flags;
 
-	/* Monitor must not be started if it wasn't created first.
-	 * Promiscuous mode may be started on a non-monitor interface - in
-	 * such case the monitor vdev is not created so starting the
-	 * monitor makes no sense. Since ath10k uses no special RX filters
-	 * (only BSS filter in STA mode) there's no need for any special
-	 * action here. */
-	if ((ar->filter_flags & FIF_PROMISC_IN_BSS) &&
-	    !ar->monitor_enabled && ar->monitor_present) {
-		ath10k_dbg(ATH10K_DBG_MAC, "mac monitor %d start\n",
-			   ar->monitor_vdev_id);
-
-		ret = ath10k_monitor_start(ar, ar->monitor_vdev_id);
-		if (ret)
-			ath10k_warn("Unable to start monitor mode\n");
-	} else if (!(ar->filter_flags & FIF_PROMISC_IN_BSS) &&
-		   ar->monitor_enabled && ar->monitor_present) {
-		ath10k_dbg(ATH10K_DBG_MAC, "mac monitor %d stop\n",
-			   ar->monitor_vdev_id);
-
-		ret = ath10k_monitor_stop(ar);
-		if (ret)
-			ath10k_warn("Unable to stop monitor mode\n");
+	if (ar->filter_flags & FIF_PROMISC_IN_BSS && !ar->promisc) {
+		ar->promisc = true;
+		ret = ath10k_monitor_start(ar);
+		if (ret) {
+			ath10k_warn("failed to start monitor (promisc): %d\n",
+				    ret);
+			ar->promisc = false;
+		}
+	} else if (!(ar->filter_flags & FIF_PROMISC_IN_BSS) && ar->promisc) {
+		ar->promisc = false;
+		ath10k_monitor_stop(ar);
 	}
 
 	mutex_unlock(&ar->conf_mutex);
@@ -2780,7 +2985,7 @@
 			   arvif->vdev_id, arvif->beacon_interval);
 
 		if (ret)
-			ath10k_warn("Failed to set beacon interval for vdev %d: %i\n",
+			ath10k_warn("failed to set beacon interval for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2793,7 +2998,7 @@
 		ret = ath10k_wmi_pdev_set_param(ar, pdev_param,
 						WMI_BEACON_STAGGERED_MODE);
 		if (ret)
-			ath10k_warn("Failed to set beacon mode for vdev %d: %i\n",
+			ath10k_warn("failed to set beacon mode for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2808,7 +3013,7 @@
 		ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
 						arvif->dtim_period);
 		if (ret)
-			ath10k_warn("Failed to set dtim period for vdev %d: %i\n",
+			ath10k_warn("failed to set dtim period for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2820,7 +3025,12 @@
 		arvif->u.ap.hidden_ssid = info->hidden_ssid;
 	}
 
-	if (changed & BSS_CHANGED_BSSID) {
+	/*
+	 * Firmware manages AP self-peer internally so make sure to not create
+	 * it in driver. Otherwise AP self-peer deletion may timeout later.
+	 */
+	if (changed & BSS_CHANGED_BSSID &&
+	    vif->type != NL80211_IFTYPE_AP) {
 		if (!is_zero_ether_addr(info->bssid)) {
 			ath10k_dbg(ATH10K_DBG_MAC,
 				   "mac vdev %d create peer %pM\n",
@@ -2829,7 +3039,7 @@
 			ret = ath10k_peer_create(ar, arvif->vdev_id,
 						 info->bssid);
 			if (ret)
-				ath10k_warn("Failed to add peer %pM for vdev %d when changing bssid: %i\n",
+				ath10k_warn("failed to add peer %pM for vdev %d when changing bssid: %i\n",
 					    info->bssid, arvif->vdev_id, ret);
 
 			if (vif->type == NL80211_IFTYPE_STATION) {
@@ -2868,20 +3078,13 @@
 		ath10k_control_beaconing(arvif, info);
 
 	if (changed & BSS_CHANGED_ERP_CTS_PROT) {
-		u32 cts_prot;
-		if (info->use_cts_prot)
-			cts_prot = 1;
-		else
-			cts_prot = 0;
-
+		arvif->use_cts_prot = info->use_cts_prot;
 		ath10k_dbg(ATH10K_DBG_MAC, "mac vdev %d cts_prot %d\n",
-			   arvif->vdev_id, cts_prot);
+			   arvif->vdev_id, info->use_cts_prot);
 
-		vdev_param = ar->wmi.vdev_param->enable_rtscts;
-		ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
-						cts_prot);
+		ret = ath10k_recalc_rtscts_prot(arvif);
 		if (ret)
-			ath10k_warn("Failed to set CTS prot for vdev %d: %d\n",
+			ath10k_warn("failed to recalculate rts/cts prot for vdev %d: %d\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2900,7 +3103,7 @@
 		ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
 						slottime);
 		if (ret)
-			ath10k_warn("Failed to set erp slot for vdev %d: %i\n",
+			ath10k_warn("failed to set erp slot for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2919,7 +3122,7 @@
 		ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
 						preamble);
 		if (ret)
-			ath10k_warn("Failed to set preamble for vdev %d: %i\n",
+			ath10k_warn("failed to set preamble for vdev %d: %i\n",
 				    arvif->vdev_id, ret);
 	}
 
@@ -2990,7 +3193,7 @@
 
 	ret = ath10k_start_scan(ar, &arg);
 	if (ret) {
-		ath10k_warn("could not start hw scan (%d)\n", ret);
+		ath10k_warn("failed to start hw scan: %d\n", ret);
 		spin_lock_bh(&ar->data_lock);
 		ar->scan.in_progress = false;
 		spin_unlock_bh(&ar->data_lock);
@@ -3010,8 +3213,7 @@
 	mutex_lock(&ar->conf_mutex);
 	ret = ath10k_abort_scan(ar);
 	if (ret) {
-		ath10k_warn("couldn't abort scan (%d). forcefully sending scan completion to mac80211\n",
-			    ret);
+		ath10k_warn("failed to abort scan: %d\n", ret);
 		ieee80211_scan_completed(hw, 1 /* aborted */);
 	}
 	mutex_unlock(&ar->conf_mutex);
@@ -3089,7 +3291,7 @@
 
 	if (!peer) {
 		if (cmd == SET_KEY) {
-			ath10k_warn("cannot install key for non-existent peer %pM\n",
+			ath10k_warn("failed to install key for non-existent peer %pM\n",
 				    peer_addr);
 			ret = -EOPNOTSUPP;
 			goto exit;
@@ -3112,7 +3314,7 @@
 
 	ret = ath10k_install_key(arvif, key, cmd, peer_addr);
 	if (ret) {
-		ath10k_warn("key installation failed for vdev %i peer %pM: %d\n",
+		ath10k_warn("failed to install key for vdev %i peer %pM: %d\n",
 			    arvif->vdev_id, peer_addr, ret);
 		goto exit;
 	}
@@ -3127,7 +3329,7 @@
 		peer->keys[key->keyidx] = NULL;
 	else if (peer == NULL)
 		/* impossible unless FW goes crazy */
-		ath10k_warn("peer %pM disappeared!\n", peer_addr);
+		ath10k_warn("Peer %pM disappeared!\n", peer_addr);
 	spin_unlock_bh(&ar->data_lock);
 
 exit:
@@ -3195,6 +3397,16 @@
 				    sta->addr, smps, err);
 	}
 
+	if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) {
+		ath10k_dbg(ATH10K_DBG_MAC, "mac update sta %pM supp rates\n",
+			   sta->addr);
+
+		err = ath10k_station_assoc(ar, arvif, sta, true);
+		if (err)
+			ath10k_warn("failed to reassociate station: %pM\n",
+				    sta->addr);
+	}
+
 	mutex_unlock(&ar->conf_mutex);
 }
 
@@ -3236,7 +3448,7 @@
 			max_num_peers = TARGET_NUM_PEERS;
 
 		if (ar->num_peers >= max_num_peers) {
-			ath10k_warn("Number of peers exceeded: peers number %d (max peers %d)\n",
+			ath10k_warn("number of peers exceeded: peers number %d (max peers %d)\n",
 				    ar->num_peers, max_num_peers);
 			ret = -ENOBUFS;
 			goto exit;
@@ -3248,7 +3460,7 @@
 
 		ret = ath10k_peer_create(ar, arvif->vdev_id, sta->addr);
 		if (ret)
-			ath10k_warn("Failed to add peer %pM for vdev %d when adding a new sta: %i\n",
+			ath10k_warn("failed to add peer %pM for vdev %d when adding a new sta: %i\n",
 				    sta->addr, arvif->vdev_id, ret);
 	} else if ((old_state == IEEE80211_STA_NONE &&
 		    new_state == IEEE80211_STA_NOTEXIST)) {
@@ -3260,7 +3472,7 @@
 			   arvif->vdev_id, sta->addr);
 		ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr);
 		if (ret)
-			ath10k_warn("Failed to delete peer %pM for vdev %d: %i\n",
+			ath10k_warn("failed to delete peer %pM for vdev %d: %i\n",
 				    sta->addr, arvif->vdev_id, ret);
 
 		if (vif->type == NL80211_IFTYPE_STATION)
@@ -3275,9 +3487,9 @@
 		ath10k_dbg(ATH10K_DBG_MAC, "mac sta %pM associated\n",
 			   sta->addr);
 
-		ret = ath10k_station_assoc(ar, arvif, sta);
+		ret = ath10k_station_assoc(ar, arvif, sta, false);
 		if (ret)
-			ath10k_warn("Failed to associate station %pM for vdev %i: %i\n",
+			ath10k_warn("failed to associate station %pM for vdev %i: %i\n",
 				    sta->addr, arvif->vdev_id, ret);
 	} else if (old_state == IEEE80211_STA_ASSOC &&
 		   new_state == IEEE80211_STA_AUTH &&
@@ -3291,7 +3503,7 @@
 
 		ret = ath10k_station_disassoc(ar, arvif, sta);
 		if (ret)
-			ath10k_warn("Failed to disassociate station: %pM vdev %i ret %i\n",
+			ath10k_warn("failed to disassociate station: %pM vdev %i: %i\n",
 				    sta->addr, arvif->vdev_id, ret);
 	}
 exit:
@@ -3339,7 +3551,7 @@
 					  WMI_STA_PS_PARAM_UAPSD,
 					  arvif->u.sta.uapsd);
 	if (ret) {
-		ath10k_warn("could not set uapsd params %d\n", ret);
+		ath10k_warn("failed to set uapsd params: %d\n", ret);
 		goto exit;
 	}
 
@@ -3352,7 +3564,7 @@
 					  WMI_STA_PS_PARAM_RX_WAKE_POLICY,
 					  value);
 	if (ret)
-		ath10k_warn("could not set rx wake param %d\n", ret);
+		ath10k_warn("failed to set rx wake param: %d\n", ret);
 
 exit:
 	return ret;
@@ -3402,13 +3614,13 @@
 	/* FIXME: FW accepts wmm params per hw, not per vif */
 	ret = ath10k_wmi_pdev_set_wmm_params(ar, &ar->wmm_params);
 	if (ret) {
-		ath10k_warn("could not set wmm params %d\n", ret);
+		ath10k_warn("failed to set wmm params: %d\n", ret);
 		goto exit;
 	}
 
 	ret = ath10k_conf_tx_uapsd(ar, vif, ac, params->uapsd);
 	if (ret)
-		ath10k_warn("could not set sta uapsd %d\n", ret);
+		ath10k_warn("failed to set sta uapsd: %d\n", ret);
 
 exit:
 	mutex_unlock(&ar->conf_mutex);
@@ -3461,7 +3673,7 @@
 
 	ret = ath10k_start_scan(ar, &arg);
 	if (ret) {
-		ath10k_warn("could not start roc scan (%d)\n", ret);
+		ath10k_warn("failed to start roc scan: %d\n", ret);
 		spin_lock_bh(&ar->data_lock);
 		ar->scan.in_progress = false;
 		spin_unlock_bh(&ar->data_lock);
@@ -3470,7 +3682,7 @@
 
 	ret = wait_for_completion_timeout(&ar->scan.on_channel, 3*HZ);
 	if (ret == 0) {
-		ath10k_warn("could not switch to channel for roc scan\n");
+		ath10k_warn("failed to switch to channel for roc scan\n");
 		ath10k_abort_scan(ar);
 		ret = -ETIMEDOUT;
 		goto exit;
@@ -3511,7 +3723,7 @@
 
 		ret = ath10k_mac_set_rts(arvif, value);
 		if (ret) {
-			ath10k_warn("could not set rts threshold for vdev %d (%d)\n",
+			ath10k_warn("failed to set rts threshold for vdev %d: %d\n",
 				    arvif->vdev_id, ret);
 			break;
 		}
@@ -3534,7 +3746,7 @@
 
 		ret = ath10k_mac_set_rts(arvif, value);
 		if (ret) {
-			ath10k_warn("could not set fragmentation threshold for vdev %d (%d)\n",
+			ath10k_warn("failed to set fragmentation threshold for vdev %d: %d\n",
 				    arvif->vdev_id, ret);
 			break;
 		}
@@ -3544,7 +3756,8 @@
 	return ret;
 }
 
-static void ath10k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct ath10k *ar = hw->priv;
 	bool skip;
@@ -3573,7 +3786,7 @@
 		}), ATH10K_FLUSH_TIMEOUT_HZ);
 
 	if (ret <= 0 || skip)
-		ath10k_warn("tx not flushed (skip %i ar-state %i): %i\n",
+		ath10k_warn("failed to flush transmit queue (skip %i ar-state %i): %i\n",
 			    skip, ar->state, ret);
 
 skip:
@@ -3608,7 +3821,7 @@
 
 	ret = ath10k_hif_suspend(ar);
 	if (ret) {
-		ath10k_warn("could not suspend hif (%d)\n", ret);
+		ath10k_warn("failed to suspend hif: %d\n", ret);
 		goto resume;
 	}
 
@@ -3617,7 +3830,7 @@
 resume:
 	ret = ath10k_wmi_pdev_resume_target(ar);
 	if (ret)
-		ath10k_warn("could not resume target (%d)\n", ret);
+		ath10k_warn("failed to resume target: %d\n", ret);
 
 	ret = 1;
 exit:
@@ -3634,14 +3847,14 @@
 
 	ret = ath10k_hif_resume(ar);
 	if (ret) {
-		ath10k_warn("could not resume hif (%d)\n", ret);
+		ath10k_warn("failed to resume hif: %d\n", ret);
 		ret = 1;
 		goto exit;
 	}
 
 	ret = ath10k_wmi_pdev_resume_target(ar);
 	if (ret) {
-		ath10k_warn("could not resume target (%d)\n", ret);
+		ath10k_warn("failed to resume target: %d\n", ret);
 		ret = 1;
 		goto exit;
 	}
@@ -3964,7 +4177,7 @@
 	ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id,
 					vdev_param, fixed_rate);
 	if (ret) {
-		ath10k_warn("Could not set fixed_rate param 0x%02x: %d\n",
+		ath10k_warn("failed to set fixed rate param 0x%02x: %d\n",
 			    fixed_rate, ret);
 		ret = -EINVAL;
 		goto exit;
@@ -3977,7 +4190,7 @@
 					vdev_param, fixed_nss);
 
 	if (ret) {
-		ath10k_warn("Could not set fixed_nss param %d: %d\n",
+		ath10k_warn("failed to set fixed nss param %d: %d\n",
 			    fixed_nss, ret);
 		ret = -EINVAL;
 		goto exit;
@@ -3990,7 +4203,7 @@
 					force_sgi);
 
 	if (ret) {
-		ath10k_warn("Could not set sgi param %d: %d\n",
+		ath10k_warn("failed to set sgi param %d: %d\n",
 			    force_sgi, ret);
 		ret = -EINVAL;
 		goto exit;
@@ -4026,7 +4239,7 @@
 	}
 
 	if (fixed_rate == WMI_FIXED_RATE_NONE && force_sgi) {
-		ath10k_warn("Could not force SGI usage for default rate settings\n");
+		ath10k_warn("failed to force SGI usage for default rate settings\n");
 		return -EINVAL;
 	}
 
@@ -4034,14 +4247,6 @@
 					   fixed_nss, force_sgi);
 }
 
-static void ath10k_channel_switch_beacon(struct ieee80211_hw *hw,
-					 struct ieee80211_vif *vif,
-					 struct cfg80211_chan_def *chandef)
-{
-	/* there's no need to do anything here. vif->csa_active is enough */
-	return;
-}
-
 static void ath10k_sta_rc_update(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta,
@@ -4072,8 +4277,8 @@
 			bw = WMI_PEER_CHWIDTH_80MHZ;
 			break;
 		case IEEE80211_STA_RX_BW_160:
-			ath10k_warn("mac sta rc update for %pM: invalid bw %d\n",
-				    sta->addr, sta->bandwidth);
+			ath10k_warn("Invalid bandwith %d in rc update for %pM\n",
+				    sta->bandwidth, sta->addr);
 			bw = WMI_PEER_CHWIDTH_20MHZ;
 			break;
 		}
@@ -4099,8 +4304,8 @@
 			smps = WMI_PEER_SMPS_DYNAMIC;
 			break;
 		case IEEE80211_SMPS_NUM_MODES:
-			ath10k_warn("mac sta rc update for %pM: invalid smps: %d\n",
-				    sta->addr, sta->smps_mode);
+			ath10k_warn("Invalid smps %d in sta rc update for %pM\n",
+				    sta->smps_mode, sta->addr);
 			smps = WMI_PEER_SMPS_PS_NONE;
 			break;
 		}
@@ -4108,15 +4313,6 @@
 		arsta->smps = smps;
 	}
 
-	if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) {
-		/* FIXME: Not implemented. Probably the only way to do it would
-		 * be to re-assoc the peer. */
-		changed &= ~IEEE80211_RC_SUPP_RATES_CHANGED;
-		ath10k_dbg(ATH10K_DBG_MAC,
-			   "mac sta rc update for %pM: changing supported rates not implemented\n",
-			   sta->addr);
-	}
-
 	arsta->changed |= changed;
 
 	spin_unlock_bh(&ar->data_lock);
@@ -4154,10 +4350,11 @@
 	.set_frag_threshold		= ath10k_set_frag_threshold,
 	.flush				= ath10k_flush,
 	.tx_last_beacon			= ath10k_tx_last_beacon,
+	.set_antenna			= ath10k_set_antenna,
+	.get_antenna			= ath10k_get_antenna,
 	.restart_complete		= ath10k_restart_complete,
 	.get_survey			= ath10k_get_survey,
 	.set_bitrate_mask		= ath10k_set_bitrate_mask,
-	.channel_switch_beacon		= ath10k_channel_switch_beacon,
 	.sta_rc_update			= ath10k_sta_rc_update,
 	.get_tsf			= ath10k_get_tsf,
 #ifdef CONFIG_PM
@@ -4503,6 +4700,18 @@
 		BIT(NL80211_IFTYPE_ADHOC) |
 		BIT(NL80211_IFTYPE_AP);
 
+	if (test_bit(ATH10K_FW_FEATURE_WMI_10X, ar->fw_features)) {
+		/* TODO:  Have to deal with 2x2 chips if/when the come out. */
+		ar->supp_tx_chainmask = TARGET_10X_TX_CHAIN_MASK;
+		ar->supp_rx_chainmask = TARGET_10X_RX_CHAIN_MASK;
+	} else {
+		ar->supp_tx_chainmask = TARGET_TX_CHAIN_MASK;
+		ar->supp_rx_chainmask = TARGET_RX_CHAIN_MASK;
+	}
+
+	ar->hw->wiphy->available_antennas_rx = ar->supp_rx_chainmask;
+	ar->hw->wiphy->available_antennas_tx = ar->supp_tx_chainmask;
+
 	if (!test_bit(ATH10K_FW_FEATURE_NO_P2P, ar->fw_features))
 		ar->hw->wiphy->interface_modes |=
 			BIT(NL80211_IFTYPE_P2P_CLIENT) |
@@ -4516,7 +4725,6 @@
 			IEEE80211_HW_REPORTS_TX_ACK_STATUS |
 			IEEE80211_HW_HAS_RATE_CONTROL |
 			IEEE80211_HW_SUPPORTS_STATIC_SMPS |
-			IEEE80211_HW_WANT_MONITOR_VIF |
 			IEEE80211_HW_AP_LINK_PS |
 			IEEE80211_HW_SPECTRUM_MGMT;
 
@@ -4570,19 +4778,19 @@
 							     NL80211_DFS_UNSET);
 
 		if (!ar->dfs_detector)
-			ath10k_warn("dfs pattern detector init failed\n");
+			ath10k_warn("failed to initialise DFS pattern detector\n");
 	}
 
 	ret = ath_regd_init(&ar->ath_common.regulatory, ar->hw->wiphy,
 			    ath10k_reg_notifier);
 	if (ret) {
-		ath10k_err("Regulatory initialization failed: %i\n", ret);
+		ath10k_err("failed to initialise regulatory: %i\n", ret);
 		goto err_free;
 	}
 
 	ret = ieee80211_register_hw(ar->hw);
 	if (ret) {
-		ath10k_err("ieee80211 registration failed: %d\n", ret);
+		ath10k_err("failed to register ieee80211: %d\n", ret);
 		goto err_free;
 	}
 

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 9d242d8..d0004d5 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c

@@ -39,15 +39,28 @@
 	ATH10K_PCI_IRQ_MSI = 2,
 };
 
-static unsigned int ath10k_target_ps;
-static unsigned int ath10k_pci_irq_mode = ATH10K_PCI_IRQ_AUTO;
+enum ath10k_pci_reset_mode {
+	ATH10K_PCI_RESET_AUTO = 0,
+	ATH10K_PCI_RESET_WARM_ONLY = 1,
+};
 
-module_param(ath10k_target_ps, uint, 0644);
-MODULE_PARM_DESC(ath10k_target_ps, "Enable ath10k Target (SoC) PS option");
+static unsigned int ath10k_pci_target_ps;
+static unsigned int ath10k_pci_irq_mode = ATH10K_PCI_IRQ_AUTO;
+static unsigned int ath10k_pci_reset_mode = ATH10K_PCI_RESET_AUTO;
+
+module_param_named(target_ps, ath10k_pci_target_ps, uint, 0644);
+MODULE_PARM_DESC(target_ps, "Enable ath10k Target (SoC) PS option");
 
 module_param_named(irq_mode, ath10k_pci_irq_mode, uint, 0644);
 MODULE_PARM_DESC(irq_mode, "0: auto, 1: legacy, 2: msi (default: 0)");
 
+module_param_named(reset_mode, ath10k_pci_reset_mode, uint, 0644);
+MODULE_PARM_DESC(reset_mode, "0: auto, 1: warm only (default: 0)");
+
+/* how long wait to wait for target to initialise, in ms */
+#define ATH10K_PCI_TARGET_WAIT 3000
+#define ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS 3
+
 #define QCA988X_2_0_DEVICE_ID	(0x003c)
 
 static DEFINE_PCI_DEVICE_TABLE(ath10k_pci_id_table) = {
@@ -346,9 +359,10 @@
 	 *   2) Buffer in DMA-able space
 	 */
 	orig_nbytes = nbytes;
-	data_buf = (unsigned char *)pci_alloc_consistent(ar_pci->pdev,
-							 orig_nbytes,
-							 &ce_data_base);
+	data_buf = (unsigned char *)dma_alloc_coherent(ar->dev,
+						       orig_nbytes,
+						       &ce_data_base,
+						       GFP_ATOMIC);
 
 	if (!data_buf) {
 		ret = -ENOMEM;
@@ -442,12 +456,12 @@
 				__le32_to_cpu(((__le32 *)data_buf)[i]);
 		}
 	} else
-		ath10k_dbg(ATH10K_DBG_PCI, "%s failure (0x%x)\n",
-			   __func__, address);
+		ath10k_warn("failed to read diag value at 0x%x: %d\n",
+			    address, ret);
 
 	if (data_buf)
-		pci_free_consistent(ar_pci->pdev, orig_nbytes,
-				    data_buf, ce_data_base);
+		dma_free_coherent(ar->dev, orig_nbytes, data_buf,
+				  ce_data_base);
 
 	return ret;
 }
@@ -490,9 +504,10 @@
 	 *   2) Buffer in DMA-able space
 	 */
 	orig_nbytes = nbytes;
-	data_buf = (unsigned char *)pci_alloc_consistent(ar_pci->pdev,
-							 orig_nbytes,
-							 &ce_data_base);
+	data_buf = (unsigned char *)dma_alloc_coherent(ar->dev,
+						       orig_nbytes,
+						       &ce_data_base,
+						       GFP_ATOMIC);
 	if (!data_buf) {
 		ret = -ENOMEM;
 		goto done;
@@ -588,13 +603,13 @@
 
 done:
 	if (data_buf) {
-		pci_free_consistent(ar_pci->pdev, orig_nbytes, data_buf,
-				    ce_data_base);
+		dma_free_coherent(ar->dev, orig_nbytes, data_buf,
+				  ce_data_base);
 	}
 
 	if (ret != 0)
-		ath10k_dbg(ATH10K_DBG_PCI, "%s failure (0x%x)\n", __func__,
-			   address);
+		ath10k_warn("failed to write diag value at 0x%x: %d\n",
+			    address, ret);
 
 	return ret;
 }
@@ -747,17 +762,21 @@
 	struct ath10k_pci_pipe *pci_pipe = &ar_pci->pipe_info[pipe_id];
 	struct ath10k_ce_pipe *ce_pipe = pci_pipe->ce_hdl;
 	struct ath10k_ce_ring *src_ring = ce_pipe->src_ring;
-	unsigned int nentries_mask = src_ring->nentries_mask;
-	unsigned int sw_index = src_ring->sw_index;
-	unsigned int write_index = src_ring->write_index;
-	int err, i;
+	unsigned int nentries_mask;
+	unsigned int sw_index;
+	unsigned int write_index;
+	int err, i = 0;
 
 	spin_lock_bh(&ar_pci->ce_lock);
 
+	nentries_mask = src_ring->nentries_mask;
+	sw_index = src_ring->sw_index;
+	write_index = src_ring->write_index;
+
 	if (unlikely(CE_RING_DELTA(nentries_mask,
 				   write_index, sw_index - 1) < n_items)) {
 		err = -ENOBUFS;
-		goto unlock;
+		goto err;
 	}
 
 	for (i = 0; i < n_items - 1; i++) {
@@ -774,7 +793,7 @@
 					    items[i].transfer_id,
 					    CE_SEND_FLAG_GATHER);
 		if (err)
-			goto unlock;
+			goto err;
 	}
 
 	/* `i` is equal to `n_items -1` after for() */
@@ -792,10 +811,15 @@
 				    items[i].transfer_id,
 				    0);
 	if (err)
-		goto unlock;
+		goto err;
 
-	err = 0;
-unlock:
+	spin_unlock_bh(&ar_pci->ce_lock);
+	return 0;
+
+err:
+	for (; i > 0; i--)
+		__ath10k_ce_send_revert(ce_pipe);
+
 	spin_unlock_bh(&ar_pci->ce_lock);
 	return err;
 }
@@ -803,6 +827,9 @@
 static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	ath10k_dbg(ATH10K_DBG_PCI, "pci hif get free queue number\n");
+
 	return ath10k_ce_num_free_src_entries(ar_pci->pipe_info[pipe].ce_hdl);
 }
 
@@ -854,6 +881,8 @@
 static void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
 					       int force)
 {
+	ath10k_dbg(ATH10K_DBG_PCI, "pci hif send complete check\n");
+
 	if (!force) {
 		int resources;
 		/*
@@ -880,7 +909,7 @@
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
-	ath10k_dbg(ATH10K_DBG_PCI, "%s\n", __func__);
+	ath10k_dbg(ATH10K_DBG_PCI, "pci hif set callbacks\n");
 
 	memcpy(&ar_pci->msg_callbacks_current, callbacks,
 	       sizeof(ar_pci->msg_callbacks_current));
@@ -938,6 +967,8 @@
 {
 	int ret = 0;
 
+	ath10k_dbg(ATH10K_DBG_PCI, "pci hif map service\n");
+
 	/* polling for received messages not supported */
 	*dl_is_polled = 0;
 
@@ -997,6 +1028,8 @@
 {
 	int ul_is_polled, dl_is_polled;
 
+	ath10k_dbg(ATH10K_DBG_PCI, "pci hif get default pipe\n");
+
 	(void)ath10k_pci_hif_map_service_to_pipe(ar,
 						 ATH10K_HTC_SVC_ID_RSVD_CTRL,
 						 ul_pipe,
@@ -1098,6 +1131,8 @@
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret, ret_early;
 
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot hif start\n");
+
 	ath10k_pci_free_early_irq(ar);
 	ath10k_pci_kill_tasklet(ar);
 
@@ -1233,18 +1268,10 @@
 
 static void ath10k_pci_ce_deinit(struct ath10k *ar)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_pci_pipe *pipe_info;
-	int pipe_num;
+	int i;
 
-	for (pipe_num = 0; pipe_num < CE_COUNT; pipe_num++) {
-		pipe_info = &ar_pci->pipe_info[pipe_num];
-		if (pipe_info->ce_hdl) {
-			ath10k_ce_deinit(pipe_info->ce_hdl);
-			pipe_info->ce_hdl = NULL;
-			pipe_info->buf_sz = 0;
-		}
-	}
+	for (i = 0; i < CE_COUNT; i++)
+		ath10k_ce_deinit_pipe(ar, i);
 }
 
 static void ath10k_pci_hif_stop(struct ath10k *ar)
@@ -1252,7 +1279,10 @@
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret;
 
-	ath10k_dbg(ATH10K_DBG_PCI, "%s\n", __func__);
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot hif stop\n");
+
+	if (WARN_ON(!ar_pci->started))
+		return;
 
 	ret = ath10k_ce_disable_interrupts(ar);
 	if (ret)
@@ -1697,30 +1727,49 @@
 	return 0;
 }
 
+static int ath10k_pci_alloc_ce(struct ath10k *ar)
+{
+	int i, ret;
 
+	for (i = 0; i < CE_COUNT; i++) {
+		ret = ath10k_ce_alloc_pipe(ar, i, &host_ce_config_wlan[i]);
+		if (ret) {
+			ath10k_err("failed to allocate copy engine pipe %d: %d\n",
+				   i, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void ath10k_pci_free_ce(struct ath10k *ar)
+{
+	int i;
+
+	for (i = 0; i < CE_COUNT; i++)
+		ath10k_ce_free_pipe(ar, i);
+}
 
 static int ath10k_pci_ce_init(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	struct ath10k_pci_pipe *pipe_info;
 	const struct ce_attr *attr;
-	int pipe_num;
+	int pipe_num, ret;
 
 	for (pipe_num = 0; pipe_num < CE_COUNT; pipe_num++) {
 		pipe_info = &ar_pci->pipe_info[pipe_num];
+		pipe_info->ce_hdl = &ar_pci->ce_states[pipe_num];
 		pipe_info->pipe_num = pipe_num;
 		pipe_info->hif_ce_state = ar;
 		attr = &host_ce_config_wlan[pipe_num];
 
-		pipe_info->ce_hdl = ath10k_ce_init(ar, pipe_num, attr);
-		if (pipe_info->ce_hdl == NULL) {
-			ath10k_err("failed to initialize CE for pipe: %d\n",
-				   pipe_num);
-
-			/* It is safe to call it here. It checks if ce_hdl is
-			 * valid for each pipe */
-			ath10k_pci_ce_deinit(ar);
-			return -1;
+		ret = ath10k_ce_init_pipe(ar, pipe_num, attr);
+		if (ret) {
+			ath10k_err("failed to initialize copy engine pipe %d: %d\n",
+				   pipe_num, ret);
+			return ret;
 		}
 
 		if (pipe_num == CE_COUNT - 1) {
@@ -1741,16 +1790,15 @@
 static void ath10k_pci_fw_interrupt_handler(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	u32 fw_indicator_address, fw_indicator;
+	u32 fw_indicator;
 
 	ath10k_pci_wake(ar);
 
-	fw_indicator_address = ar_pci->fw_indicator_address;
-	fw_indicator = ath10k_pci_read32(ar, fw_indicator_address);
+	fw_indicator = ath10k_pci_read32(ar, FW_INDICATOR_ADDRESS);
 
 	if (fw_indicator & FW_IND_EVENT_PENDING) {
 		/* ACK: clear Target-side pending event */
-		ath10k_pci_write32(ar, fw_indicator_address,
+		ath10k_pci_write32(ar, FW_INDICATOR_ADDRESS,
 				   fw_indicator & ~FW_IND_EVENT_PENDING);
 
 		if (ar_pci->started) {
@@ -1767,13 +1815,32 @@
 	ath10k_pci_sleep(ar);
 }
 
+/* this function effectively clears target memory controller assert line */
+static void ath10k_pci_warm_reset_si0(struct ath10k *ar)
+{
+	u32 val;
+
+	val = ath10k_pci_soc_read32(ar, SOC_RESET_CONTROL_ADDRESS);
+	ath10k_pci_soc_write32(ar, SOC_RESET_CONTROL_ADDRESS,
+			       val | SOC_RESET_CONTROL_SI0_RST_MASK);
+	val = ath10k_pci_soc_read32(ar, SOC_RESET_CONTROL_ADDRESS);
+
+	msleep(10);
+
+	val = ath10k_pci_soc_read32(ar, SOC_RESET_CONTROL_ADDRESS);
+	ath10k_pci_soc_write32(ar, SOC_RESET_CONTROL_ADDRESS,
+			       val & ~SOC_RESET_CONTROL_SI0_RST_MASK);
+	val = ath10k_pci_soc_read32(ar, SOC_RESET_CONTROL_ADDRESS);
+
+	msleep(10);
+}
+
 static int ath10k_pci_warm_reset(struct ath10k *ar)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret = 0;
 	u32 val;
 
-	ath10k_dbg(ATH10K_DBG_BOOT, "boot performing warm chip reset\n");
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot warm reset\n");
 
 	ret = ath10k_do_pci_wake(ar);
 	if (ret) {
@@ -1801,7 +1868,7 @@
 	msleep(100);
 
 	/* clear fw indicator */
-	ath10k_pci_write32(ar, ar_pci->fw_indicator_address, 0);
+	ath10k_pci_write32(ar, FW_INDICATOR_ADDRESS, 0);
 
 	/* clear target LF timer interrupts */
 	val = ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS +
@@ -1826,6 +1893,8 @@
 				SOC_RESET_CONTROL_ADDRESS);
 	msleep(10);
 
+	ath10k_pci_warm_reset_si0(ar);
+
 	/* debug */
 	val = ath10k_pci_read32(ar, SOC_CORE_BASE_ADDRESS +
 				PCIE_INTR_CAUSE_ADDRESS);
@@ -1934,7 +2003,9 @@
 		irq_mode = "legacy";
 
 	if (!test_bit(ATH10K_FLAG_FIRST_BOOT_DONE, &ar->dev_flags))
-		ath10k_info("pci irq %s\n", irq_mode);
+		ath10k_info("pci irq %s irq_mode %d reset_mode %d\n",
+			    irq_mode, ath10k_pci_irq_mode,
+			    ath10k_pci_reset_mode);
 
 	return 0;
 
@@ -1952,23 +2023,52 @@
 	return ret;
 }
 
+static int ath10k_pci_hif_power_up_warm(struct ath10k *ar)
+{
+	int i, ret;
+
+	/*
+	 * Sometime warm reset succeeds after retries.
+	 *
+	 * FIXME: It might be possible to tune ath10k_pci_warm_reset() to work
+	 * at first try.
+	 */
+	for (i = 0; i < ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS; i++) {
+		ret = __ath10k_pci_hif_power_up(ar, false);
+		if (ret == 0)
+			break;
+
+		ath10k_warn("failed to warm reset (attempt %d out of %d): %d\n",
+			    i + 1, ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS, ret);
+	}
+
+	return ret;
+}
+
 static int ath10k_pci_hif_power_up(struct ath10k *ar)
 {
 	int ret;
 
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot hif power up\n");
+
 	/*
 	 * Hardware CUS232 version 2 has some issues with cold reset and the
 	 * preferred (and safer) way to perform a device reset is through a
 	 * warm reset.
 	 *
-	 * Warm reset doesn't always work though (notably after a firmware
-	 * crash) so fall back to cold reset if necessary.
+	 * Warm reset doesn't always work though so fall back to cold reset may
+	 * be necessary.
 	 */
-	ret = __ath10k_pci_hif_power_up(ar, false);
+	ret = ath10k_pci_hif_power_up_warm(ar);
 	if (ret) {
-		ath10k_warn("failed to power up target using warm reset (%d), trying cold reset\n",
+		ath10k_warn("failed to power up target using warm reset: %d\n",
 			    ret);
 
+		if (ath10k_pci_reset_mode == ATH10K_PCI_RESET_WARM_ONLY)
+			return ret;
+
+		ath10k_warn("trying cold reset\n");
+
 		ret = __ath10k_pci_hif_power_up(ar, true);
 		if (ret) {
 			ath10k_err("failed to power up target using cold reset too (%d)\n",
@@ -1984,12 +2084,14 @@
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot hif power down\n");
+
 	ath10k_pci_free_early_irq(ar);
 	ath10k_pci_kill_tasklet(ar);
 	ath10k_pci_deinit_irq(ar);
+	ath10k_pci_ce_deinit(ar);
 	ath10k_pci_warm_reset(ar);
 
-	ath10k_pci_ce_deinit(ar);
 	if (!test_bit(ATH10K_PCI_FEATURE_SOC_POWER_SAVE, ar_pci->features))
 		ath10k_do_pci_sleep(ar);
 }
@@ -2137,7 +2239,6 @@
 static void ath10k_pci_early_irq_tasklet(unsigned long data)
 {
 	struct ath10k *ar = (struct ath10k *)data;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	u32 fw_ind;
 	int ret;
 
@@ -2148,14 +2249,11 @@
 		return;
 	}
 
-	fw_ind = ath10k_pci_read32(ar, ar_pci->fw_indicator_address);
+	fw_ind = ath10k_pci_read32(ar, FW_INDICATOR_ADDRESS);
 	if (fw_ind & FW_IND_EVENT_PENDING) {
-		ath10k_pci_write32(ar, ar_pci->fw_indicator_address,
+		ath10k_pci_write32(ar, FW_INDICATOR_ADDRESS,
 				   fw_ind & ~FW_IND_EVENT_PENDING);
-
-		/* Some structures are unavailable during early boot or at
-		 * driver teardown so just print that the device has crashed. */
-		ath10k_warn("device crashed - no diagnostics available\n");
+		ath10k_pci_hif_dump_area(ar);
 	}
 
 	ath10k_pci_sleep(ar);
@@ -2385,33 +2483,69 @@
 static int ath10k_pci_wait_for_target_init(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	int wait_limit = 300; /* 3 sec */
+	unsigned long timeout;
 	int ret;
+	u32 val;
+
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot waiting target to initialise\n");
 
 	ret = ath10k_pci_wake(ar);
 	if (ret) {
-		ath10k_err("failed to wake up target: %d\n", ret);
+		ath10k_err("failed to wake up target for init: %d\n", ret);
 		return ret;
 	}
 
-	while (wait_limit-- &&
-	       !(ioread32(ar_pci->mem + FW_INDICATOR_ADDRESS) &
-		 FW_IND_INITIALIZED)) {
+	timeout = jiffies + msecs_to_jiffies(ATH10K_PCI_TARGET_WAIT);
+
+	do {
+		val = ath10k_pci_read32(ar, FW_INDICATOR_ADDRESS);
+
+		ath10k_dbg(ATH10K_DBG_BOOT, "boot target indicator %x\n", val);
+
+		/* target should never return this */
+		if (val == 0xffffffff)
+			continue;
+
+		/* the device has crashed so don't bother trying anymore */
+		if (val & FW_IND_EVENT_PENDING)
+			break;
+
+		if (val & FW_IND_INITIALIZED)
+			break;
+
 		if (ar_pci->num_msi_intrs == 0)
 			/* Fix potential race by repeating CORE_BASE writes */
-			iowrite32(PCIE_INTR_FIRMWARE_MASK |
-				  PCIE_INTR_CE_MASK_ALL,
-				  ar_pci->mem + (SOC_CORE_BASE_ADDRESS |
-						 PCIE_INTR_ENABLE_ADDRESS));
-		mdelay(10);
-	}
+			ath10k_pci_soc_write32(ar, PCIE_INTR_ENABLE_ADDRESS,
+					       PCIE_INTR_FIRMWARE_MASK |
+					       PCIE_INTR_CE_MASK_ALL);
 
-	if (wait_limit < 0) {
-		ath10k_err("target stalled\n");
+		mdelay(10);
+	} while (time_before(jiffies, timeout));
+
+	if (val == 0xffffffff) {
+		ath10k_err("failed to read device register, device is gone\n");
 		ret = -EIO;
 		goto out;
 	}
 
+	if (val & FW_IND_EVENT_PENDING) {
+		ath10k_warn("device has crashed during init\n");
+		ath10k_pci_write32(ar, FW_INDICATOR_ADDRESS,
+				   val & ~FW_IND_EVENT_PENDING);
+		ath10k_pci_hif_dump_area(ar);
+		ret = -ECOMM;
+		goto out;
+	}
+
+	if (!(val & FW_IND_INITIALIZED)) {
+		ath10k_err("failed to receive initialized event from target: %08x\n",
+			   val);
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot target initialised\n");
+
 out:
 	ath10k_pci_sleep(ar);
 	return ret;
@@ -2422,6 +2556,8 @@
 	int i, ret;
 	u32 val;
 
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot cold reset\n");
+
 	ret = ath10k_do_pci_wake(ar);
 	if (ret) {
 		ath10k_err("failed to wake up target: %d\n",
@@ -2453,6 +2589,9 @@
 	}
 
 	ath10k_do_pci_sleep(ar);
+
+	ath10k_dbg(ATH10K_DBG_BOOT, "boot cold reset complete\n");
+
 	return 0;
 }
 
@@ -2484,7 +2623,7 @@
 	struct ath10k_pci *ar_pci;
 	u32 lcr_val, chip_id;
 
-	ath10k_dbg(ATH10K_DBG_PCI, "%s\n", __func__);
+	ath10k_dbg(ATH10K_DBG_PCI, "pci probe\n");
 
 	ar_pci = kzalloc(sizeof(*ar_pci), GFP_KERNEL);
 	if (ar_pci == NULL)
@@ -2503,7 +2642,7 @@
 		goto err_ar_pci;
 	}
 
-	if (ath10k_target_ps)
+	if (ath10k_pci_target_ps)
 		set_bit(ATH10K_PCI_FEATURE_SOC_POWER_SAVE, ar_pci->features);
 
 	ath10k_pci_dump_features(ar_pci);
@@ -2516,23 +2655,10 @@
 	}
 
 	ar_pci->ar = ar;
-	ar_pci->fw_indicator_address = FW_INDICATOR_ADDRESS;
 	atomic_set(&ar_pci->keep_awake_count, 0);
 
 	pci_set_drvdata(pdev, ar);
 
-	/*
-	 * Without any knowledge of the Host, the Target may have been reset or
-	 * power cycled and its Config Space may no longer reflect the PCI
-	 * address space that was assigned earlier by the PCI infrastructure.
-	 * Refresh it now.
-	 */
-	ret = pci_assign_resource(pdev, BAR_NUM);
-	if (ret) {
-		ath10k_err("failed to assign PCI space: %d\n", ret);
-		goto err_ar;
-	}
-
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		ath10k_err("failed to enable PCI device: %d\n", ret);
@@ -2594,16 +2720,24 @@
 
 	ath10k_do_pci_sleep(ar);
 
+	ret = ath10k_pci_alloc_ce(ar);
+	if (ret) {
+		ath10k_err("failed to allocate copy engine pipes: %d\n", ret);
+		goto err_iomap;
+	}
+
 	ath10k_dbg(ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem);
 
 	ret = ath10k_core_register(ar, chip_id);
 	if (ret) {
 		ath10k_err("failed to register driver core: %d\n", ret);
-		goto err_iomap;
+		goto err_free_ce;
 	}
 
 	return 0;
 
+err_free_ce:
+	ath10k_pci_free_ce(ar);
 err_iomap:
 	pci_iounmap(pdev, mem);
 err_master:
@@ -2626,7 +2760,7 @@
 	struct ath10k *ar = pci_get_drvdata(pdev);
 	struct ath10k_pci *ar_pci;
 
-	ath10k_dbg(ATH10K_DBG_PCI, "%s\n", __func__);
+	ath10k_dbg(ATH10K_DBG_PCI, "pci remove\n");
 
 	if (!ar)
 		return;
@@ -2636,9 +2770,8 @@
 	if (!ar_pci)
 		return;
 
-	tasklet_kill(&ar_pci->msi_fw_err);
-
 	ath10k_core_unregister(ar);
+	ath10k_pci_free_ce(ar);
 
 	pci_iounmap(pdev, ar_pci->mem);
 	pci_release_region(pdev, BAR_NUM);
@@ -2680,6 +2813,5 @@
 MODULE_AUTHOR("Qualcomm Atheros");
 MODULE_DESCRIPTION("Driver support for Atheros QCA988X PCIe devices");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" QCA988X_HW_2_0_FW_FILE);
-MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" QCA988X_HW_2_0_OTP_FILE);
+MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" QCA988X_HW_2_0_FW_2_FILE);
 MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" QCA988X_HW_2_0_BOARD_DATA_FILE);

diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index b43fdb4..dfdebb4 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h

@@ -189,9 +189,6 @@
 
 	struct ath10k_hif_cb msg_callbacks_current;
 
-	/* Target address used to signal a pending firmware event */
-	u32 fw_indicator_address;
-
 	/* Copy Engine used for Diagnostic Accesses */
 	struct ath10k_ce_pipe *ce_diag;
 

diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 0541dd9..82669a7 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c

@@ -100,189 +100,6 @@
 		wake_up(&htt->empty_tx_wq);
 }
 
-static const u8 rx_legacy_rate_idx[] = {
-	3,	/* 0x00  - 11Mbps  */
-	2,	/* 0x01  - 5.5Mbps */
-	1,	/* 0x02  - 2Mbps   */
-	0,	/* 0x03  - 1Mbps   */
-	3,	/* 0x04  - 11Mbps  */
-	2,	/* 0x05  - 5.5Mbps */
-	1,	/* 0x06  - 2Mbps   */
-	0,	/* 0x07  - 1Mbps   */
-	10,	/* 0x08  - 48Mbps  */
-	8,	/* 0x09  - 24Mbps  */
-	6,	/* 0x0A  - 12Mbps  */
-	4,	/* 0x0B  - 6Mbps   */
-	11,	/* 0x0C  - 54Mbps  */
-	9,	/* 0x0D  - 36Mbps  */
-	7,	/* 0x0E  - 18Mbps  */
-	5,	/* 0x0F  - 9Mbps   */
-};
-
-static void process_rx_rates(struct ath10k *ar, struct htt_rx_info *info,
-			     enum ieee80211_band band,
-			     struct ieee80211_rx_status *status)
-{
-	u8 cck, rate, rate_idx, bw, sgi, mcs, nss;
-	u8 info0 = info->rate.info0;
-	u32 info1 = info->rate.info1;
-	u32 info2 = info->rate.info2;
-	u8 preamble = 0;
-
-	/* Check if valid fields */
-	if (!(info0 & HTT_RX_INDICATION_INFO0_START_VALID))
-		return;
-
-	preamble = MS(info1, HTT_RX_INDICATION_INFO1_PREAMBLE_TYPE);
-
-	switch (preamble) {
-	case HTT_RX_LEGACY:
-		cck = info0 & HTT_RX_INDICATION_INFO0_LEGACY_RATE_CCK;
-		rate = MS(info0, HTT_RX_INDICATION_INFO0_LEGACY_RATE);
-		rate_idx = 0;
-
-		if (rate < 0x08 || rate > 0x0F)
-			break;
-
-		switch (band) {
-		case IEEE80211_BAND_2GHZ:
-			if (cck)
-				rate &= ~BIT(3);
-			rate_idx = rx_legacy_rate_idx[rate];
-			break;
-		case IEEE80211_BAND_5GHZ:
-			rate_idx = rx_legacy_rate_idx[rate];
-			/* We are using same rate table registering
-			   HW - ath10k_rates[]. In case of 5GHz skip
-			   CCK rates, so -4 here */
-			rate_idx -= 4;
-			break;
-		default:
-			break;
-		}
-
-		status->rate_idx = rate_idx;
-		break;
-	case HTT_RX_HT:
-	case HTT_RX_HT_WITH_TXBF:
-		/* HT-SIG - Table 20-11 in info1 and info2 */
-		mcs = info1 & 0x1F;
-		nss = mcs >> 3;
-		bw = (info1 >> 7) & 1;
-		sgi = (info2 >> 7) & 1;
-
-		status->rate_idx = mcs;
-		status->flag |= RX_FLAG_HT;
-		if (sgi)
-			status->flag |= RX_FLAG_SHORT_GI;
-		if (bw)
-			status->flag |= RX_FLAG_40MHZ;
-		break;
-	case HTT_RX_VHT:
-	case HTT_RX_VHT_WITH_TXBF:
-		/* VHT-SIG-A1 in info 1, VHT-SIG-A2 in info2
-		   TODO check this */
-		mcs = (info2 >> 4) & 0x0F;
-		nss = ((info1 >> 10) & 0x07) + 1;
-		bw = info1 & 3;
-		sgi = info2 & 1;
-
-		status->rate_idx = mcs;
-		status->vht_nss = nss;
-
-		if (sgi)
-			status->flag |= RX_FLAG_SHORT_GI;
-
-		switch (bw) {
-		/* 20MHZ */
-		case 0:
-			break;
-		/* 40MHZ */
-		case 1:
-			status->flag |= RX_FLAG_40MHZ;
-			break;
-		/* 80MHZ */
-		case 2:
-			status->vht_flag |= RX_VHT_FLAG_80MHZ;
-		}
-
-		status->flag |= RX_FLAG_VHT;
-		break;
-	default:
-		break;
-	}
-}
-
-void ath10k_process_rx(struct ath10k *ar, struct htt_rx_info *info)
-{
-	struct ieee80211_rx_status *status;
-	struct ieee80211_channel *ch;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)info->skb->data;
-
-	status = IEEE80211_SKB_RXCB(info->skb);
-	memset(status, 0, sizeof(*status));
-
-	if (info->encrypt_type != HTT_RX_MPDU_ENCRYPT_NONE) {
-		status->flag |= RX_FLAG_DECRYPTED | RX_FLAG_IV_STRIPPED |
-				RX_FLAG_MMIC_STRIPPED;
-		hdr->frame_control = __cpu_to_le16(
-				__le16_to_cpu(hdr->frame_control) &
-				~IEEE80211_FCTL_PROTECTED);
-	}
-
-	if (info->mic_err)
-		status->flag |= RX_FLAG_MMIC_ERROR;
-
-	if (info->fcs_err)
-		status->flag |= RX_FLAG_FAILED_FCS_CRC;
-
-	if (info->amsdu_more)
-		status->flag |= RX_FLAG_AMSDU_MORE;
-
-	status->signal = info->signal;
-
-	spin_lock_bh(&ar->data_lock);
-	ch = ar->scan_channel;
-	if (!ch)
-		ch = ar->rx_channel;
-	spin_unlock_bh(&ar->data_lock);
-
-	if (!ch) {
-		ath10k_warn("no channel configured; ignoring frame!\n");
-		dev_kfree_skb_any(info->skb);
-		return;
-	}
-
-	process_rx_rates(ar, info, ch->band, status);
-	status->band = ch->band;
-	status->freq = ch->center_freq;
-
-	if (info->rate.info0 & HTT_RX_INDICATION_INFO0_END_VALID) {
-		/* TSF available only in 32-bit */
-		status->mactime = info->tsf & 0xffffffff;
-		status->flag |= RX_FLAG_MACTIME_END;
-	}
-
-	ath10k_dbg(ATH10K_DBG_DATA,
-		   "rx skb %p len %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%x fcs-err %i\n",
-		   info->skb,
-		   info->skb->len,
-		   status->flag == 0 ? "legacy" : "",
-		   status->flag & RX_FLAG_HT ? "ht" : "",
-		   status->flag & RX_FLAG_VHT ? "vht" : "",
-		   status->flag & RX_FLAG_40MHZ ? "40" : "",
-		   status->vht_flag & RX_VHT_FLAG_80MHZ ? "80" : "",
-		   status->flag & RX_FLAG_SHORT_GI ? "sgi " : "",
-		   status->rate_idx,
-		   status->vht_nss,
-		   status->freq,
-		   status->band, status->flag, info->fcs_err);
-	ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "rx skb: ",
-			info->skb->data, info->skb->len);
-
-	ieee80211_rx(ar->hw, info->skb);
-}
-
 struct ath10k_peer *ath10k_peer_find(struct ath10k *ar, int vdev_id,
 				     const u8 *addr)
 {

diff --git a/drivers/net/wireless/ath/ath10k/txrx.h b/drivers/net/wireless/ath/ath10k/txrx.h
index 356dc9c..aee3e20 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.h
+++ b/drivers/net/wireless/ath/ath10k/txrx.h

@@ -21,7 +21,6 @@
 
 void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 			  const struct htt_tx_done *tx_done);
-void ath10k_process_rx(struct ath10k *ar, struct htt_rx_info *info);
 
 struct ath10k_peer *ath10k_peer_find(struct ath10k *ar, int vdev_id,
 				     const u8 *addr);

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index cb1f7b5..4b7782a 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c

@@ -639,6 +639,7 @@
 	struct sk_buff *wmi_skb;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int len;
+	u32 buf_len = skb->len;
 	u16 fc;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -648,6 +649,15 @@
 		return -EINVAL;
 
 	len = sizeof(cmd->hdr) + skb->len;
+
+	if ((ieee80211_is_action(hdr->frame_control) ||
+	     ieee80211_is_deauth(hdr->frame_control) ||
+	     ieee80211_is_disassoc(hdr->frame_control)) &&
+	     ieee80211_has_protected(hdr->frame_control)) {
+		len += IEEE80211_CCMP_MIC_LEN;
+		buf_len += IEEE80211_CCMP_MIC_LEN;
+	}
+
 	len = round_up(len, 4);
 
 	wmi_skb = ath10k_wmi_alloc_skb(len);
@@ -659,7 +669,7 @@
 	cmd->hdr.vdev_id = __cpu_to_le32(ATH10K_SKB_CB(skb)->vdev_id);
 	cmd->hdr.tx_rate = 0;
 	cmd->hdr.tx_power = 0;
-	cmd->hdr.buf_len = __cpu_to_le32((u32)(skb->len));
+	cmd->hdr.buf_len = __cpu_to_le32(buf_len);
 
 	memcpy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr), ETH_ALEN);
 	memcpy(cmd->buf, skb->data, skb->len);
@@ -957,10 +967,16 @@
 	 * frames with Protected Bit set. */
 	if (ieee80211_has_protected(hdr->frame_control) &&
 	    !ieee80211_is_auth(hdr->frame_control)) {
-		status->flag |= RX_FLAG_DECRYPTED | RX_FLAG_IV_STRIPPED |
-				RX_FLAG_MMIC_STRIPPED;
-		hdr->frame_control = __cpu_to_le16(fc &
+		status->flag |= RX_FLAG_DECRYPTED;
+
+		if (!ieee80211_is_action(hdr->frame_control) &&
+		    !ieee80211_is_deauth(hdr->frame_control) &&
+		    !ieee80211_is_disassoc(hdr->frame_control)) {
+			status->flag |= RX_FLAG_IV_STRIPPED |
+					RX_FLAG_MMIC_STRIPPED;
+			hdr->frame_control = __cpu_to_le16(fc &
 					~IEEE80211_FCTL_PROTECTED);
+		}
 	}
 
 	ath10k_dbg(ATH10K_DBG_MGMT,
@@ -1362,13 +1378,10 @@
 	struct sk_buff *bcn;
 	int ret, vdev_id = 0;
 
-	ath10k_dbg(ATH10K_DBG_MGMT, "WMI_HOST_SWBA_EVENTID\n");
-
 	ev = (struct wmi_host_swba_event *)skb->data;
 	map = __le32_to_cpu(ev->vdev_map);
 
-	ath10k_dbg(ATH10K_DBG_MGMT, "host swba:\n"
-		   "-vdev map 0x%x\n",
+	ath10k_dbg(ATH10K_DBG_MGMT, "mgmt swba vdev_map 0x%x\n",
 		   ev->vdev_map);
 
 	for (; map; map >>= 1, vdev_id++) {
@@ -1385,12 +1398,7 @@
 		bcn_info = &ev->bcn_info[i];
 
 		ath10k_dbg(ATH10K_DBG_MGMT,
-			   "-bcn_info[%d]:\n"
-			   "--tim_len %d\n"
-			   "--tim_mcast %d\n"
-			   "--tim_changed %d\n"
-			   "--tim_num_ps_pending %d\n"
-			   "--tim_bitmap 0x%08x%08x%08x%08x\n",
+			   "mgmt event bcn_info %d tim_len %d mcast %d changed %d num_ps_pending %d bitmap 0x%08x%08x%08x%08x\n",
 			   i,
 			   __le32_to_cpu(bcn_info->tim_info.tim_len),
 			   __le32_to_cpu(bcn_info->tim_info.tim_mcast),
@@ -1439,6 +1447,7 @@
 					 ATH10K_SKB_CB(arvif->beacon)->paddr,
 					 arvif->beacon->len, DMA_TO_DEVICE);
 			dev_kfree_skb_any(arvif->beacon);
+			arvif->beacon = NULL;
 		}
 
 		ATH10K_SKB_CB(bcn)->paddr = dma_map_single(arvif->ar->dev,
@@ -1448,6 +1457,7 @@
 					ATH10K_SKB_CB(bcn)->paddr);
 		if (ret) {
 			ath10k_warn("failed to map beacon: %d\n", ret);
+			dev_kfree_skb_any(bcn);
 			goto skip;
 		}
 
@@ -2365,7 +2375,7 @@
 	ar->wmi.num_mem_chunks = 0;
 }
 
-int ath10k_wmi_connect_htc_service(struct ath10k *ar)
+int ath10k_wmi_connect(struct ath10k *ar)
 {
 	int status;
 	struct ath10k_htc_svc_conn_req conn_req;
@@ -2393,8 +2403,9 @@
 	return 0;
 }
 
-int ath10k_wmi_pdev_set_regdomain(struct ath10k *ar, u16 rd, u16 rd2g,
-				  u16 rd5g, u16 ctl2g, u16 ctl5g)
+static int ath10k_wmi_main_pdev_set_regdomain(struct ath10k *ar, u16 rd,
+					      u16 rd2g, u16 rd5g, u16 ctl2g,
+					      u16 ctl5g)
 {
 	struct wmi_pdev_set_regdomain_cmd *cmd;
 	struct sk_buff *skb;
@@ -2418,6 +2429,46 @@
 				   ar->wmi.cmd->pdev_set_regdomain_cmdid);
 }
 
+static int ath10k_wmi_10x_pdev_set_regdomain(struct ath10k *ar, u16 rd,
+					     u16 rd2g, u16 rd5g,
+					     u16 ctl2g, u16 ctl5g,
+					     enum wmi_dfs_region dfs_reg)
+{
+	struct wmi_pdev_set_regdomain_cmd_10x *cmd;
+	struct sk_buff *skb;
+
+	skb = ath10k_wmi_alloc_skb(sizeof(*cmd));
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_pdev_set_regdomain_cmd_10x *)skb->data;
+	cmd->reg_domain = __cpu_to_le32(rd);
+	cmd->reg_domain_2G = __cpu_to_le32(rd2g);
+	cmd->reg_domain_5G = __cpu_to_le32(rd5g);
+	cmd->conformance_test_limit_2G = __cpu_to_le32(ctl2g);
+	cmd->conformance_test_limit_5G = __cpu_to_le32(ctl5g);
+	cmd->dfs_domain = __cpu_to_le32(dfs_reg);
+
+	ath10k_dbg(ATH10K_DBG_WMI,
+		   "wmi pdev regdomain rd %x rd2g %x rd5g %x ctl2g %x ctl5g %x dfs_region %x\n",
+		   rd, rd2g, rd5g, ctl2g, ctl5g, dfs_reg);
+
+	return ath10k_wmi_cmd_send(ar, skb,
+				   ar->wmi.cmd->pdev_set_regdomain_cmdid);
+}
+
+int ath10k_wmi_pdev_set_regdomain(struct ath10k *ar, u16 rd, u16 rd2g,
+				  u16 rd5g, u16 ctl2g, u16 ctl5g,
+				  enum wmi_dfs_region dfs_reg)
+{
+	if (test_bit(ATH10K_FW_FEATURE_WMI_10X, ar->fw_features))
+		return ath10k_wmi_10x_pdev_set_regdomain(ar, rd, rd2g, rd5g,
+							ctl2g, ctl5g, dfs_reg);
+	else
+		return ath10k_wmi_main_pdev_set_regdomain(ar, rd, rd2g, rd5g,
+							 ctl2g, ctl5g);
+}
+
 int ath10k_wmi_pdev_set_channel(struct ath10k *ar,
 				const struct wmi_channel_arg *arg)
 {
@@ -3456,8 +3507,9 @@
 		__cpu_to_le32(arg->peer_vht_rates.tx_mcs_set);
 
 	ath10k_dbg(ATH10K_DBG_WMI,
-		   "wmi peer assoc vdev %d addr %pM\n",
-		   arg->vdev_id, arg->addr);
+		   "wmi peer assoc vdev %d addr %pM (%s)\n",
+		   arg->vdev_id, arg->addr,
+		   arg->peer_reassoc ? "reassociate" : "new");
 	return ath10k_wmi_cmd_send(ar, skb, ar->wmi.cmd->peer_assoc_cmdid);
 }
 

diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index f51d5ca..e93df2c 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h

@@ -198,16 +198,6 @@
 	} __packed;
 } __packed;
 
-/* macro to convert MAC address from WMI word format to char array */
-#define WMI_MAC_ADDR_TO_CHAR_ARRAY(pwmi_mac_addr, c_macaddr) do { \
-	(c_macaddr)[0] =  ((pwmi_mac_addr)->word0) & 0xff; \
-	(c_macaddr)[1] = (((pwmi_mac_addr)->word0) >> 8) & 0xff; \
-	(c_macaddr)[2] = (((pwmi_mac_addr)->word0) >> 16) & 0xff; \
-	(c_macaddr)[3] = (((pwmi_mac_addr)->word0) >> 24) & 0xff; \
-	(c_macaddr)[4] =  ((pwmi_mac_addr)->word1) & 0xff; \
-	(c_macaddr)[5] = (((pwmi_mac_addr)->word1) >> 8) & 0xff; \
-	} while (0)
-
 struct wmi_cmd_map {
 	u32 init_cmdid;
 	u32 start_scan_cmdid;
@@ -2185,6 +2175,31 @@
 	__le32 conformance_test_limit_5G;
 } __packed;
 
+enum wmi_dfs_region {
+	/* Uninitialized dfs domain */
+	WMI_UNINIT_DFS_DOMAIN = 0,
+
+	/* FCC3 dfs domain */
+	WMI_FCC_DFS_DOMAIN = 1,
+
+	/* ETSI dfs domain */
+	WMI_ETSI_DFS_DOMAIN = 2,
+
+	/*Japan dfs domain */
+	WMI_MKK4_DFS_DOMAIN = 3,
+};
+
+struct wmi_pdev_set_regdomain_cmd_10x {
+	__le32 reg_domain;
+	__le32 reg_domain_2G;
+	__le32 reg_domain_5G;
+	__le32 conformance_test_limit_2G;
+	__le32 conformance_test_limit_5G;
+
+	/* dfs domain from wmi_dfs_region */
+	__le32 dfs_domain;
+} __packed;
+
 /* Command to set/unset chip in quiet mode */
 struct wmi_pdev_set_quiet_cmd {
 	/* period in TUs */
@@ -2210,6 +2225,19 @@
 	ATH10K_PROT_RTSCTS   = 2,    /* RTS-CTS */
 };
 
+enum wmi_rtscts_profile {
+	WMI_RTSCTS_FOR_NO_RATESERIES = 0,
+	WMI_RTSCTS_FOR_SECOND_RATESERIES,
+	WMI_RTSCTS_ACROSS_SW_RETRIES
+};
+
+#define WMI_RTSCTS_ENABLED		1
+#define WMI_RTSCTS_SET_MASK		0x0f
+#define WMI_RTSCTS_SET_LSB		0
+
+#define WMI_RTSCTS_PROFILE_MASK		0xf0
+#define WMI_RTSCTS_PROFILE_LSB		4
+
 enum wmi_beacon_gen_mode {
 	WMI_BEACON_STAGGERED_MODE = 0,
 	WMI_BEACON_BURST_MODE = 1
@@ -2295,9 +2323,9 @@
 #define WMI_PDEV_PARAM_UNSUPPORTED 0
 
 enum wmi_pdev_param {
-	/* TX chian mask */
+	/* TX chain mask */
 	WMI_PDEV_PARAM_TX_CHAIN_MASK = 0x1,
-	/* RX chian mask */
+	/* RX chain mask */
 	WMI_PDEV_PARAM_RX_CHAIN_MASK,
 	/* TX power limit for 2G Radio */
 	WMI_PDEV_PARAM_TXPOWER_LIMIT2G,
@@ -2682,6 +2710,9 @@
 	/* wal pdev resets  */
 	__le32 pdev_resets;
 
+	/* frames dropped due to non-availability of stateless TIDs */
+	__le32 stateless_tid_alloc_failure;
+
 	__le32 phy_underrun;
 
 	/* MPDU is more than txop limit */
@@ -2738,13 +2769,21 @@
 	WMI_REQUEST_AP_STAT	= 0x02
 };
 
+struct wlan_inst_rssi_args {
+	__le16 cfg_retry_count;
+	__le16 retry_count;
+};
+
 struct wmi_request_stats_cmd {
 	__le32 stats_id;
 
-	/*
-	 * Space to add parameters like
-	 * peer mac addr
-	 */
+	__le32 vdev_id;
+
+	/* peer MAC address */
+	struct wmi_mac_addr peer_macaddr;
+
+	/* Instantaneous RSSI arguments */
+	struct wlan_inst_rssi_args inst_rssi_args;
 } __packed;
 
 /* Suspend option */
@@ -2795,7 +2834,7 @@
  * PDEV statistics
  * TODO: add all PDEV stats here
  */
-struct wmi_pdev_stats {
+struct wmi_pdev_stats_old {
 	__le32 chan_nf;        /* Channel noise floor */
 	__le32 tx_frame_count; /* TX frame count */
 	__le32 rx_frame_count; /* RX frame count */
@@ -2806,6 +2845,23 @@
 	struct wal_dbg_stats wal; /* WAL dbg stats */
 } __packed;
 
+struct wmi_pdev_stats_10x {
+	__le32 chan_nf;        /* Channel noise floor */
+	__le32 tx_frame_count; /* TX frame count */
+	__le32 rx_frame_count; /* RX frame count */
+	__le32 rx_clear_count; /* rx clear count */
+	__le32 cycle_count;    /* cycle count */
+	__le32 phy_err_count;  /* Phy error count */
+	__le32 chan_tx_pwr;    /* channel tx power */
+	struct wal_dbg_stats wal; /* WAL dbg stats */
+	__le32 ack_rx_bad;
+	__le32 rts_bad;
+	__le32 rts_good;
+	__le32 fcs_bad;
+	__le32 no_beacons;
+	__le32 mib_int_count;
+} __packed;
+
 /*
  * VDEV statistics
  * TODO: add all VDEV stats here
@@ -2818,12 +2874,19 @@
  * peer statistics.
  * TODO: add more stats
  */
-struct wmi_peer_stats {
+struct wmi_peer_stats_old {
 	struct wmi_mac_addr peer_macaddr;
 	__le32 peer_rssi;
 	__le32 peer_tx_rate;
 } __packed;
 
+struct wmi_peer_stats_10x {
+	struct wmi_mac_addr peer_macaddr;
+	__le32 peer_rssi;
+	__le32 peer_tx_rate;
+	__le32 peer_rx_rate;
+} __packed;
+
 struct wmi_vdev_create_cmd {
 	__le32 vdev_id;
 	__le32 vdev_type;
@@ -4196,13 +4259,14 @@
 int ath10k_wmi_wait_for_service_ready(struct ath10k *ar);
 int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar);
 
-int ath10k_wmi_connect_htc_service(struct ath10k *ar);
+int ath10k_wmi_connect(struct ath10k *ar);
 int ath10k_wmi_pdev_set_channel(struct ath10k *ar,
 				const struct wmi_channel_arg *);
 int ath10k_wmi_pdev_suspend_target(struct ath10k *ar, u32 suspend_opt);
 int ath10k_wmi_pdev_resume_target(struct ath10k *ar);
 int ath10k_wmi_pdev_set_regdomain(struct ath10k *ar, u16 rd, u16 rd2g,
-				  u16 rd5g, u16 ctl2g, u16 ctl5g);
+				  u16 rd5g, u16 ctl2g, u16 ctl5g,
+				  enum wmi_dfs_region dfs_reg);
 int ath10k_wmi_pdev_set_param(struct ath10k *ar, u32 id, u32 value);
 int ath10k_wmi_cmd_init(struct ath10k *ar);
 int ath10k_wmi_start_scan(struct ath10k *ar, const struct wmi_start_scan_arg *);

diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 1a2973b..0fce1c7 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c

@@ -3709,8 +3709,8 @@
 			AR5K_REG_MS(AR5K_TUNE_MAX_TXPOWER, AR5K_TPC_CHIRP),
 			AR5K_TPC);
 	} else {
-		ath5k_hw_reg_write(ah, AR5K_PHY_TXPOWER_RATE_MAX |
-			AR5K_TUNE_MAX_TXPOWER, AR5K_PHY_TXPOWER_RATE_MAX);
+		ath5k_hw_reg_write(ah, AR5K_TUNE_MAX_TXPOWER,
+			AR5K_PHY_TXPOWER_RATE_MAX);
 	}
 
 	return 0;

diff --git a/drivers/net/wireless/ath/ath6kl/Kconfig b/drivers/net/wireless/ath/ath6kl/Kconfig
index e39e586..9c125ff 100644
--- a/drivers/net/wireless/ath/ath6kl/Kconfig
+++ b/drivers/net/wireless/ath/ath6kl/Kconfig

@@ -1,11 +1,19 @@
 config ATH6KL
 	tristate "Atheros mobile chipsets support"
+	depends on CFG80211
+        ---help---
+	  This module adds core support for wireless adapters based on
+	  Atheros AR6003 and AR6004 chipsets. You still need separate
+	  bus drivers for USB and SDIO to be able to use real devices.
+
+	  If you choose to build it as a module, it will be called
+	  ath6kl_core. Please note that AR6002 and AR6001 are not
+	  supported by this driver.
 
 config ATH6KL_SDIO
 	tristate "Atheros ath6kl SDIO support"
 	depends on ATH6KL
 	depends on MMC
-	depends on CFG80211
 	---help---
 	  This module adds support for wireless adapters based on
 	  Atheros AR6003 and AR6004 chipsets running over SDIO. If you
@@ -17,25 +25,31 @@
 	tristate "Atheros ath6kl USB support"
 	depends on ATH6KL
 	depends on USB
-	depends on CFG80211
 	---help---
 	  This module adds support for wireless adapters based on
-	  Atheros AR6004 chipset running over USB. This is still under
-	  implementation and it isn't functional. If you choose to
-	  build it as a module, it will be called ath6kl_usb.
+	  Atheros AR6004 chipset and chipsets based on it running over
+	  USB. If you choose to build it as a module, it will be
+	  called ath6kl_usb.
 
 config ATH6KL_DEBUG
 	bool "Atheros ath6kl debugging"
 	depends on ATH6KL
 	---help---
-	  Enables debug support
+	  Enables ath6kl debug support, including debug messages
+	  enabled with debug_mask module parameter and debugfs
+	  interface.
+
+	  If unsure, say Y to make it easier to debug problems.
 
 config ATH6KL_TRACING
 	bool "Atheros ath6kl tracing support"
 	depends on ATH6KL
 	depends on EVENT_TRACING
 	---help---
-	  Select this to ath6kl use tracing infrastructure.
+	  Select this to ath6kl use tracing infrastructure which, for
+	  example, can be enabled with help of trace-cmd. All debug
+	  messages and commands are delivered to using individually
+	  enablable trace points.
 
 	  If unsure, say Y to make it easier to debug problems.
 
@@ -47,3 +61,5 @@
 	  Enabling this makes it possible to change the regdomain in
 	  the firmware. This can be only enabled if regulatory requirements
 	  are taken into account.
+
+	  If unsure, say N.

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index c2c6f46..0e26f4a 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c

@@ -724,8 +724,9 @@
 			ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
 				   "added bss %pM to cfg80211\n", bssid);
 		kfree(ie);
-	} else
+	} else {
 		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "cfg80211 already has a bss\n");
+	}
 
 	return bss;
 }
@@ -970,7 +971,6 @@
 					  ssid_list[i].flag,
 					  ssid_list[i].ssid.ssid_len,
 					  ssid_list[i].ssid.ssid);
-
 	}
 
 	/* Make sure no old entries are left behind */
@@ -1759,7 +1759,7 @@
 }
 
 static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			      const u8 *mac, struct station_info *sinfo)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
@@ -1897,7 +1897,6 @@
 
 	/* Configure the patterns that we received from the user. */
 	for (i = 0; i < wow->n_patterns; i++) {
-
 		/*
 		 * Convert given nl80211 specific mask value to equivalent
 		 * driver specific mask value and send it to the chip along
@@ -2850,8 +2849,9 @@
 	if (p.prwise_crypto_type == 0) {
 		p.prwise_crypto_type = NONE_CRYPT;
 		ath6kl_set_cipher(vif, 0, true);
-	} else if (info->crypto.n_ciphers_pairwise == 1)
+	} else if (info->crypto.n_ciphers_pairwise == 1) {
 		ath6kl_set_cipher(vif, info->crypto.ciphers_pairwise[0], true);
+	}
 
 	switch (info->crypto.cipher_group) {
 	case WLAN_CIPHER_SUITE_WEP40:
@@ -2897,7 +2897,6 @@
 	}
 
 	if (info->inactivity_timeout) {
-
 		inactivity_timeout = info->inactivity_timeout;
 
 		if (ar->hw.flags & ATH6KL_HW_AP_INACTIVITY_MINS)
@@ -2975,7 +2974,7 @@
 static const u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac)
+			      const u8 *mac)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
@@ -2986,7 +2985,8 @@
 }
 
 static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_parameters *params)
+				 const u8 *mac,
+				 struct station_parameters *params)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);

diff --git a/drivers/net/wireless/ath/ath6kl/core.c b/drivers/net/wireless/ath/ath6kl/core.c
index 4b46adb..b0b6520 100644
--- a/drivers/net/wireless/ath/ath6kl/core.c
+++ b/drivers/net/wireless/ath/ath6kl/core.c

@@ -45,9 +45,9 @@
 module_param(recovery_enable, uint, 0644);
 module_param(heart_beat_poll, uint, 0644);
 MODULE_PARM_DESC(recovery_enable, "Enable recovery from firmware error");
-MODULE_PARM_DESC(heart_beat_poll, "Enable fw error detection periodic"   \
-		 "polling. This also specifies the polling interval in"  \
-		 "msecs. Set reocvery_enable for this to be effective");
+MODULE_PARM_DESC(heart_beat_poll,
+		 "Enable fw error detection periodic polling in msecs - Also set recovery_enable for this to be effective");
+
 
 void ath6kl_core_tx_complete(struct ath6kl *ar, struct sk_buff *skb)
 {

diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index dbfd17d..55c4064 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c

@@ -172,7 +172,6 @@
 			   struct ath6kl_irq_proc_registers *irq_proc_reg,
 			   struct ath6kl_irq_enable_reg *irq_enable_reg)
 {
-
 	ath6kl_dbg(ATH6KL_DBG_IRQ, ("<------- Register Table -------->\n"));
 
 	if (irq_proc_reg != NULL) {
@@ -219,7 +218,6 @@
 				   "GMBOX lookahead alias 1:   0x%x\n",
 				   irq_proc_reg->rx_gmbox_lkahd_alias[1]);
 		}
-
 	}
 
 	if (irq_enable_reg != NULL) {
@@ -1396,7 +1394,6 @@
 						const char __user *user_buf,
 						size_t count, loff_t *ppos)
 {
-
 	struct ath6kl *ar = file->private_data;
 	struct ath6kl_vif *vif;
 	char buf[200];
@@ -1575,7 +1572,6 @@
 				const char __user *user_buf,
 				size_t count, loff_t *ppos)
 {
-
 	struct ath6kl *ar = file->private_data;
 	struct ath6kl_vif *vif;
 	char buf[100];

diff --git a/drivers/net/wireless/ath/ath6kl/debug.h b/drivers/net/wireless/ath/ath6kl/debug.h
index ca9ba00..e194c10d 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.h
+++ b/drivers/net/wireless/ath/ath6kl/debug.h

@@ -97,8 +97,8 @@
 		struct ath6kl_irq_proc_registers *irq_proc_reg,
 		struct ath6kl_irq_enable_reg *irq_en_reg)
 {
-
 }
+
 static inline void dump_cred_dist_stats(struct htc_target *target)
 {
 }

diff --git a/drivers/net/wireless/ath/ath6kl/hif.c b/drivers/net/wireless/ath/ath6kl/hif.c
index fea7709..18c0708 100644
--- a/drivers/net/wireless/ath/ath6kl/hif.c
+++ b/drivers/net/wireless/ath/ath6kl/hif.c

@@ -37,7 +37,6 @@
 	buf = req->virt_dma_buf;
 
 	for (i = 0; i < req->scat_entries; i++) {
-
 		if (from_dma)
 			memcpy(req->scat_list[i].buf, buf,
 			       req->scat_list[i].len);
@@ -116,7 +115,6 @@
 			    le32_to_cpu(regdump_val[i + 2]),
 			    le32_to_cpu(regdump_val[i + 3]));
 	}
-
 }
 
 static int ath6kl_hif_proc_dbg_intr(struct ath6kl_device *dev)
@@ -701,5 +699,4 @@
 
 fail_setup:
 	return status;
-
 }

diff --git a/drivers/net/wireless/ath/ath6kl/hif.h b/drivers/net/wireless/ath/ath6kl/hif.h
index 61f6b21..dc6bd8c 100644
--- a/drivers/net/wireless/ath/ath6kl/hif.h
+++ b/drivers/net/wireless/ath/ath6kl/hif.h

@@ -197,9 +197,9 @@
 	/* bounce buffer for upper layers to copy to/from */
 	u8 *virt_dma_buf;
 
-	struct hif_scatter_item scat_list[1];
-
 	u32 scat_q_depth;
+
+	struct hif_scatter_item scat_list[0];
 };
 
 struct ath6kl_irq_proc_registers {

diff --git a/drivers/net/wireless/ath/ath6kl/htc_mbox.c b/drivers/net/wireless/ath/ath6kl/htc_mbox.c
index 65e5b71..e481f14 100644
--- a/drivers/net/wireless/ath/ath6kl/htc_mbox.c
+++ b/drivers/net/wireless/ath/ath6kl/htc_mbox.c

@@ -112,9 +112,9 @@
 		if (cur_ep_dist->endpoint == ENDPOINT_0)
 			continue;
 
-		if (cur_ep_dist->svc_id == WMI_CONTROL_SVC)
+		if (cur_ep_dist->svc_id == WMI_CONTROL_SVC) {
 			cur_ep_dist->cred_norm = cur_ep_dist->cred_per_msg;
-		else {
+		} else {
 			/*
 			 * For the remaining data endpoints, we assume that
 			 * each cred_per_msg are the same. We use a simple
@@ -129,7 +129,6 @@
 			count = (count * 3) >> 2;
 			count = max(count, cur_ep_dist->cred_per_msg);
 			cur_ep_dist->cred_norm = count;
-
 		}
 
 		ath6kl_dbg(ATH6KL_DBG_CREDIT,
@@ -549,7 +548,6 @@
 			     enum htc_endpoint_id eid, unsigned int len,
 			     int *req_cred)
 {
-
 	*req_cred = (len > target->tgt_cred_sz) ?
 		     DIV_ROUND_UP(len, target->tgt_cred_sz) : 1;
 
@@ -608,7 +606,6 @@
 	unsigned int len;
 
 	while (true) {
-
 		flags = 0;
 
 		if (list_empty(&endpoint->txq))
@@ -889,7 +886,6 @@
 		ac = target->dev->ar->ep2ac_map[endpoint->eid];
 
 	while (true) {
-
 		if (list_empty(&endpoint->txq))
 			break;
 
@@ -1190,7 +1186,6 @@
 		list_add_tail(&packet->list, &container);
 		htc_tx_complete(endpoint, &container);
 	}
-
 }
 
 static void ath6kl_htc_flush_txep_all(struct htc_target *target)
@@ -1394,7 +1389,6 @@
 
 	ep_cb = ep->ep_cb;
 	for (j = 0; j < n_msg; j++) {
-
 		/*
 		 * Reset flag, any packets allocated using the
 		 * rx_alloc() API cannot be recycled on
@@ -1424,9 +1418,9 @@
 				}
 			}
 
-			if (list_empty(&ep->rx_bufq))
+			if (list_empty(&ep->rx_bufq)) {
 				packet = NULL;
-			else {
+			} else {
 				packet = list_first_entry(&ep->rx_bufq,
 						struct htc_packet, list);
 				list_del(&packet->list);
@@ -1487,7 +1481,6 @@
 	spin_lock_bh(&target->rx_lock);
 
 	for (i = 0; i < msg; i++) {
-
 		htc_hdr = (struct htc_frame_hdr *)&lk_ahds[i];
 
 		if (htc_hdr->eid >= ENDPOINT_MAX) {
@@ -1708,7 +1701,6 @@
 		lk_ahd = (struct htc_lookahead_report *) record_buf;
 		if ((lk_ahd->pre_valid == ((~lk_ahd->post_valid) & 0xFF)) &&
 		    next_lk_ahds) {
-
 			ath6kl_dbg(ATH6KL_DBG_HTC,
 				   "htc rx lk_ahd found pre_valid 0x%x post_valid 0x%x\n",
 				   lk_ahd->pre_valid, lk_ahd->post_valid);
@@ -1755,7 +1747,6 @@
 	}
 
 	return 0;
-
 }
 
 static int htc_proc_trailer(struct htc_target *target,
@@ -1776,7 +1767,6 @@
 	status = 0;
 
 	while (len > 0) {
-
 		if (len < sizeof(struct htc_record_hdr)) {
 			status = -ENOMEM;
 			break;
@@ -2098,7 +2088,6 @@
 		}
 
 		if (!fetched_pkts) {
-
 			packet = list_first_entry(rx_pktq, struct htc_packet,
 						   list);
 
@@ -2173,7 +2162,6 @@
 	look_aheads[0] = msg_look_ahead;
 
 	while (true) {
-
 		/*
 		 * First lookahead sets the expected endpoint IDs for all
 		 * packets in a bundle.
@@ -2825,8 +2813,9 @@
 			packet->buf = packet->buf_start;
 			packet->endpoint = ENDPOINT_0;
 			list_add_tail(&packet->list, &target->free_ctrl_rxbuf);
-		} else
+		} else {
 			list_add_tail(&packet->list, &target->free_ctrl_txbuf);
+		}
 	}
 
 	return 0;

diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
index 67aa924..756fe52 100644
--- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c

@@ -137,7 +137,6 @@
 			credits_required = 0;
 
 		} else {
-
 			if (ep->cred_dist.credits < credits_required)
 				break;
 
@@ -169,7 +168,6 @@
 		/* queue this packet into the caller's queue */
 		list_add_tail(&packet->list, queue);
 	}
-
 }
 
 static void get_htc_packet(struct htc_target *target,
@@ -279,7 +277,6 @@
 			list_add(&packet->list, pkt_queue);
 			break;
 		}
-
 	}
 
 	if (status != 0) {
@@ -385,7 +382,6 @@
 			 */
 			list_for_each_entry_safe(packet, tmp_pkt,
 						 txq, list) {
-
 				ath6kl_dbg(ATH6KL_DBG_HTC,
 					   "%s: Indicat overflowed TX pkts: %p\n",
 					   __func__, packet);
@@ -403,7 +399,6 @@
 					list_move_tail(&packet->list,
 						       &send_queue);
 				}
-
 			}
 
 			if (list_empty(&send_queue)) {
@@ -454,7 +449,6 @@
 	 * enough transmit resources.
 	 */
 	while (true) {
-
 		if (get_queue_depth(&ep->txq) == 0)
 			break;
 
@@ -495,8 +489,8 @@
 		}
 
 		spin_lock_bh(&target->tx_lock);
-
 	}
+
 	/* done with this endpoint, we can clear the count */
 	ep->tx_proc_cnt = 0;
 	spin_unlock_bh(&target->tx_lock);
@@ -1106,7 +1100,6 @@
 	dev_kfree_skb(skb);
 
 	return status;
-
 }
 
 static void htc_flush_rx_queue(struct htc_target *target,
@@ -1258,7 +1251,6 @@
 		tx_alloc = 0;
 
 	} else {
-
 		tx_alloc = htc_get_credit_alloc(target, conn_req->svc_id);
 		if (tx_alloc == 0) {
 			status = -ENOMEM;

diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c
index 4f316bd..d5ef211 100644
--- a/drivers/net/wireless/ath/ath6kl/init.c
+++ b/drivers/net/wireless/ath/ath6kl/init.c

@@ -1192,7 +1192,6 @@
 
 	if (board_ext_address &&
 	    ar->fw_board_len == (board_data_size + board_ext_data_size)) {
-
 		/* write extended board data */
 		ath6kl_dbg(ATH6KL_DBG_BOOT,
 			   "writing extended board data to 0x%x (%d B)\n",

diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 5839fc2..d565546 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c

@@ -571,7 +571,6 @@
 
 static int ath6kl_commit_ch_switch(struct ath6kl_vif *vif, u16 channel)
 {
-
 	struct ath6kl *ar = vif->ar;
 
 	vif->profile.ch = cpu_to_le16(channel);
@@ -600,7 +599,6 @@
 
 static void ath6kl_check_ch_switch(struct ath6kl *ar, u16 channel)
 {
-
 	struct ath6kl_vif *vif;
 	int res = 0;
 
@@ -692,9 +690,9 @@
 		cfg80211_michael_mic_failure(vif->ndev, sta->mac,
 					     NL80211_KEYTYPE_PAIRWISE, keyid,
 					     tsc, GFP_KERNEL);
-	} else
+	} else {
 		ath6kl_cfg80211_tkip_micerr_event(vif, keyid, ismcast);
-
+	}
 }
 
 static void ath6kl_update_target_stats(struct ath6kl_vif *vif, u8 *ptr, u32 len)
@@ -1093,8 +1091,9 @@
 	if (test_bit(CONNECTED, &vif->flags)) {
 		netif_carrier_on(dev);
 		netif_wake_queue(dev);
-	} else
+	} else {
 		netif_carrier_off(dev);
+	}
 
 	return 0;
 }
@@ -1146,7 +1145,6 @@
 			dev->features = features | NETIF_F_RXCSUM;
 			return err;
 		}
-
 	}
 
 	return err;

diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c
index 7126bdd..339d89f 100644
--- a/drivers/net/wireless/ath/ath6kl/sdio.c
+++ b/drivers/net/wireless/ath/ath6kl/sdio.c

@@ -348,7 +348,7 @@
 	int i, scat_req_sz, scat_list_sz, size;
 	u8 *virt_buf;
 
-	scat_list_sz = (n_scat_entry - 1) * sizeof(struct hif_scatter_item);
+	scat_list_sz = n_scat_entry * sizeof(struct hif_scatter_item);
 	scat_req_sz = sizeof(*s_req) + scat_list_sz;
 
 	if (!virt_scat)
@@ -425,8 +425,9 @@
 			memcpy(tbuf, buf, len);
 
 		bounced = true;
-	} else
+	} else {
 		tbuf = buf;
+	}
 
 	ret = ath6kl_sdio_io(ar_sdio->func, request, addr, tbuf, len);
 	if ((request & HIF_READ) && bounced)
@@ -441,9 +442,9 @@
 static void __ath6kl_sdio_write_async(struct ath6kl_sdio *ar_sdio,
 				      struct bus_request *req)
 {
-	if (req->scat_req)
+	if (req->scat_req) {
 		ath6kl_sdio_scat_rw(ar_sdio, req);
-	else {
+	} else {
 		void *context;
 		int status;
 
@@ -656,7 +657,6 @@
 	list_add_tail(&s_req->list, &ar_sdio->scat_req);
 
 	spin_unlock_bh(&ar_sdio->scat_lock);
-
 }
 
 /* scatter gather read write request */
@@ -674,9 +674,9 @@
 		   "hif-scatter: total len: %d scatter entries: %d\n",
 		   scat_req->len, scat_req->scat_entries);
 
-	if (request & HIF_SYNCHRONOUS)
+	if (request & HIF_SYNCHRONOUS) {
 		status = ath6kl_sdio_scat_rw(ar_sdio, scat_req->busrequest);
-	else {
+	} else {
 		spin_lock_bh(&ar_sdio->wr_async_lock);
 		list_add_tail(&scat_req->busrequest->list, &ar_sdio->wr_asyncq);
 		spin_unlock_bh(&ar_sdio->wr_async_lock);
@@ -856,7 +856,6 @@
 
 	if (ar->suspend_mode == WLAN_POWER_STATE_WOW ||
 	    (!ar->suspend_mode && wow)) {
-
 		ret = ath6kl_set_sdio_pm_caps(ar);
 		if (ret)
 			goto cut_pwr;
@@ -878,7 +877,6 @@
 
 	if (ar->suspend_mode == WLAN_POWER_STATE_DEEP_SLEEP ||
 	    !ar->suspend_mode || try_deepsleep) {
-
 		flags = sdio_get_host_pm_caps(func);
 		if (!(flags & MMC_PM_KEEP_POWER))
 			goto cut_pwr;
@@ -1061,7 +1059,6 @@
 
 	timeout = jiffies + msecs_to_jiffies(BMI_COMMUNICATION_TIMEOUT);
 	while (time_before(jiffies, timeout) && !ar->bmi.cmd_credits) {
-
 		/*
 		 * Hit the credit counter with a 4-byte access, the first byte
 		 * read will hit the counter and cause a decrement, while the

diff --git a/drivers/net/wireless/ath/ath6kl/target.h b/drivers/net/wireless/ath/ath6kl/target.h
index a580a62..d5eeeae 100644
--- a/drivers/net/wireless/ath/ath6kl/target.h
+++ b/drivers/net/wireless/ath/ath6kl/target.h

@@ -289,7 +289,7 @@
 	u32 hi_hp_rx_traffic_ratio;                    /* 0xd8 */
 
 	/* test applications flags */
-	u32 hi_test_apps_related    ;                  /* 0xdc */
+	u32 hi_test_apps_related;                      /* 0xdc */
 	/* location of test script */
 	u32 hi_ota_testscript;                         /* 0xe0 */
 	/* location of CAL data */

diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c
index ebb2404..40432fe 100644
--- a/drivers/net/wireless/ath/ath6kl/txrx.c
+++ b/drivers/net/wireless/ath/ath6kl/txrx.c

@@ -125,8 +125,9 @@
 		*flags |= WMI_DATA_HDR_FLAGS_UAPSD;
 		spin_unlock_bh(&conn->psq_lock);
 		return false;
-	} else if (!conn->apsd_info)
+	} else if (!conn->apsd_info) {
 		return false;
+	}
 
 	if (test_bit(WMM_ENABLED, &vif->flags)) {
 		ether_type = be16_to_cpu(datap->h_proto);
@@ -316,8 +317,9 @@
 		cookie = NULL;
 		ath6kl_err("wmi ctrl ep full, dropping pkt : 0x%p, len:%d\n",
 			   skb, skb->len);
-	} else
+	} else {
 		cookie = ath6kl_alloc_cookie(ar);
+	}
 
 	if (cookie == NULL) {
 		spin_unlock_bh(&ar->lock);
@@ -359,7 +361,7 @@
 	struct ath6kl_vif *vif = netdev_priv(dev);
 	u32 map_no = 0;
 	u16 htc_tag = ATH6KL_DATA_PKT_TAG;
-	u8 ac = 99 ; /* initialize to unmapped ac */
+	u8 ac = 99; /* initialize to unmapped ac */
 	bool chk_adhoc_ps_mapping = false;
 	int ret;
 	struct wmi_tx_meta_v2 meta_v2;
@@ -449,8 +451,9 @@
 			if (ret)
 				goto fail_tx;
 		}
-	} else
+	} else {
 		goto fail_tx;
+	}
 
 	spin_lock_bh(&ar->lock);
 
@@ -702,7 +705,6 @@
 
 	/* reap completed packets */
 	while (!list_empty(packet_queue)) {
-
 		packet = list_first_entry(packet_queue, struct htc_packet,
 					  list);
 		list_del(&packet->list);
@@ -1089,8 +1091,9 @@
 			else
 				skb_queue_tail(&rxtid->q, node->skb);
 			node->skb = NULL;
-		} else
+		} else {
 			stats->num_hole++;
+		}
 
 		rxtid->seq_next = ATH6KL_NEXT_SEQ_NO(rxtid->seq_next);
 		idx = AGGR_WIN_IDX(rxtid->seq_next, rxtid->hold_q_sz);
@@ -1211,7 +1214,7 @@
 		return is_queued;
 
 	spin_lock_bh(&rxtid->lock);
-	for (idx = 0 ; idx < rxtid->hold_q_sz; idx++) {
+	for (idx = 0; idx < rxtid->hold_q_sz; idx++) {
 		if (rxtid->hold_q[idx].skb) {
 			/*
 			 * There is a frame in the queue and no
@@ -1265,7 +1268,6 @@
 	is_apsdq_empty_at_start = is_apsdq_empty;
 
 	while ((!is_apsdq_empty) && (num_frames_to_deliver)) {
-
 		spin_lock_bh(&conn->psq_lock);
 		skb = skb_dequeue(&conn->apsdq);
 		is_apsdq_empty = skb_queue_empty(&conn->apsdq);
@@ -1606,16 +1608,18 @@
 			if (!conn)
 				return;
 			aggr_conn = conn->aggr_conn;
-		} else
+		} else {
 			aggr_conn = vif->aggr_cntxt->aggr_conn;
+		}
 
 		if (aggr_process_recv_frm(aggr_conn, tid, seq_no,
 					  is_amsdu, skb)) {
 			/* aggregation code will handle the skb */
 			return;
 		}
-	} else if (!is_broadcast_ether_addr(datap->h_dest))
+	} else if (!is_broadcast_ether_addr(datap->h_dest)) {
 		vif->net_stats.multicast++;
+	}
 
 	ath6kl_deliver_frames_to_nw_stack(vif->ndev, skb);
 }
@@ -1710,8 +1714,9 @@
 		sta = ath6kl_find_sta_by_aid(vif->ar, aid);
 		if (sta)
 			aggr_conn = sta->aggr_conn;
-	} else
+	} else {
 		aggr_conn = vif->aggr_cntxt->aggr_conn;
+	}
 
 	if (!aggr_conn)
 		return;
@@ -1766,7 +1771,6 @@
 		skb_queue_head_init(&rxtid->q);
 		spin_lock_init(&rxtid->lock);
 	}
-
 }
 
 struct aggr_info *aggr_init(struct ath6kl_vif *vif)
@@ -1806,8 +1810,9 @@
 		sta = ath6kl_find_sta_by_aid(vif->ar, aid);
 		if (sta)
 			aggr_conn = sta->aggr_conn;
-	} else
+	} else {
 		aggr_conn = vif->aggr_cntxt->aggr_conn;
+	}
 
 	if (!aggr_conn)
 		return;

diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c
index 56c3fd5..3afc5a4 100644
--- a/drivers/net/wireless/ath/ath6kl/usb.c
+++ b/drivers/net/wireless/ath/ath6kl/usb.c

@@ -236,7 +236,6 @@
 			break;
 		kfree(urb_context);
 	}
-
 }
 
 static void ath6kl_usb_cleanup_pipe_resources(struct ath6kl_usb *ar_usb)
@@ -245,7 +244,6 @@
 
 	for (i = 0; i < ATH6KL_USB_PIPE_MAX; i++)
 		ath6kl_usb_free_pipe_resources(&ar_usb->pipes[i]);
-
 }
 
 static u8 ath6kl_usb_get_logical_pipe_num(struct ath6kl_usb *ar_usb,

diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index 8b4ce28..4d7f9e4 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c

@@ -289,8 +289,9 @@
 			   ath6kl_wmi_determine_user_priority(((u8 *) llc_hdr) +
 					sizeof(struct ath6kl_llc_snap_hdr),
 					layer2_priority);
-		} else
+		} else {
 			usr_pri = layer2_priority & 0x7;
+		}
 
 		/*
 		 * Queue the EAPOL frames in the same WMM_AC_VO queue
@@ -359,8 +360,9 @@
 		hdr_size = roundup(sizeof(struct ieee80211_qos_hdr),
 				   sizeof(u32));
 		skb_pull(skb, hdr_size);
-	} else if (sub_type == cpu_to_le16(IEEE80211_STYPE_DATA))
+	} else if (sub_type == cpu_to_le16(IEEE80211_STYPE_DATA)) {
 		skb_pull(skb, sizeof(struct ieee80211_hdr_3addr));
+	}
 
 	datap = skb->data;
 	llc_hdr = (struct ath6kl_llc_snap_hdr *)(datap);
@@ -936,7 +938,6 @@
 
 static void ath6kl_wmi_regdomain_event(struct wmi *wmi, u8 *datap, int len)
 {
-
 	struct ath6kl_wmi_regdomain *ev;
 	struct country_code_to_enum_rd *country = NULL;
 	struct reg_dmn_pair_mapping *regpair = NULL;
@@ -946,10 +947,9 @@
 	ev = (struct ath6kl_wmi_regdomain *) datap;
 	reg_code = le32_to_cpu(ev->reg_code);
 
-	if ((reg_code >> ATH6KL_COUNTRY_RD_SHIFT) & COUNTRY_ERD_FLAG)
+	if ((reg_code >> ATH6KL_COUNTRY_RD_SHIFT) & COUNTRY_ERD_FLAG) {
 		country = ath6kl_regd_find_country((u16) reg_code);
-	else if (!(((u16) reg_code & WORLD_SKU_MASK) == WORLD_SKU_PREFIX)) {
-
+	} else if (!(((u16) reg_code & WORLD_SKU_MASK) == WORLD_SKU_PREFIX)) {
 		regpair = ath6kl_get_regpair((u16) reg_code);
 		country = ath6kl_regd_find_country_by_rd((u16) reg_code);
 		if (regpair)
@@ -1499,7 +1499,6 @@
 
 	if ((reply->cac_indication == CAC_INDICATION_ADMISSION_RESP) &&
 	    (reply->status_code != IEEE80211_TSPEC_STATUS_ADMISS_ACCEPTED)) {
-
 		ts = (struct ieee80211_tspec_ie *) &(reply->tspec_suggestion);
 		tsinfo = le16_to_cpu(ts->tsinfo);
 		tsid = (tsinfo >> IEEE80211_WMM_IE_TSPEC_TID_SHIFT) &
@@ -1530,7 +1529,6 @@
 	 * for delete qos stream from AP
 	 */
 	else if (reply->cac_indication == CAC_INDICATION_DELETE) {
-
 		ts = (struct ieee80211_tspec_ie *) &(reply->tspec_suggestion);
 		tsinfo = le16_to_cpu(ts->tsinfo);
 		ts_id = ((tsinfo >> IEEE80211_WMM_IE_TSPEC_TID_SHIFT) &
@@ -2322,7 +2320,7 @@
 	return ret;
 }
 
-int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk)
+int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk)
 {
 	struct sk_buff *skb;
 	struct wmi_add_krk_cmd *cmd;
@@ -2479,7 +2477,6 @@
 		goto free_data_skb;
 
 	for (index = 0; index < num_pri_streams; index++) {
-
 		if (WARN_ON(!data_sync_bufs[index].skb))
 			goto free_data_skb;
 
@@ -2704,7 +2701,6 @@
 
 	for (i = 0; i < WMM_NUM_AC; i++) {
 		if (stream_exist & (1 << i)) {
-
 			/*
 			 * FIXME: Is this lock & unlock inside
 			 * for loop correct? may need rework.
@@ -2870,8 +2866,9 @@
 	if (host_mode == ATH6KL_HOST_MODE_ASLEEP) {
 		ath6kl_wmi_relinquish_implicit_pstream_credits(wmi);
 		cmd->asleep = cpu_to_le32(1);
-	} else
+	} else {
 		cmd->awake = cpu_to_le32(1);
+	}
 
 	ret = ath6kl_wmi_cmd_send(wmi, if_idx, skb,
 				  WMI_SET_HOST_SLEEP_MODE_CMDID,

diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h
index 5c702ae..bb23fc0 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.h
+++ b/drivers/net/wireless/ath/ath6kl/wmi.h

@@ -898,7 +898,6 @@
  *  flags here
  */
 enum wmi_scan_ctrl_flags_bits {
-
 	/* set if can scan in the connect cmd */
 	CONNECT_SCAN_CTRL_FLAGS = 0x01,
 
@@ -2617,7 +2616,7 @@
 			  u8 *key_material,
 			  u8 key_op_ctrl, u8 *mac_addr,
 			  enum wmi_sync_flag sync_flag);
-int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk);
+int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk);
 int ath6kl_wmi_deletekey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index);
 int ath6kl_wmi_setpmkid_cmd(struct wmi *wmi, u8 if_idx, const u8 *bssid,
 			    const u8 *pmkid, bool set);

diff --git a/drivers/net/wireless/ath/ath9k/Makefile b/drivers/net/wireless/ath/ath9k/Makefile
index 8e1c7b0..8fcd586 100644
--- a/drivers/net/wireless/ath/ath9k/Makefile
+++ b/drivers/net/wireless/ath/ath9k/Makefile

@@ -53,7 +53,8 @@
 obj-$(CONFIG_ATH9K_COMMON) += ath9k_common.o
 ath9k_common-y:=	common.o \
 			common-init.o \
-			common-beacon.o
+			common-beacon.o \
+			common-debug.o
 
 ath9k_htc-y +=	htc_hst.o \
 		hif_usb.o \

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
index 0a6163e..c38399b 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h

@@ -410,7 +410,7 @@
 	{0x00009e30, 0x06336f77},
 	{0x00009e34, 0x6af6532f},
 	{0x00009e38, 0x0cc80c00},
-	{0x00009e40, 0x0d261820},
+	{0x00009e40, 0x0d261800},
 	{0x00009e4c, 0x00001004},
 	{0x00009e50, 0x00ff03f1},
 	{0x00009e54, 0x00000000},

diff --git a/drivers/net/wireless/ath/ath9k/ar9330_1p1_initvals.h b/drivers/net/wireless/ath/ath9k/ar9330_1p1_initvals.h
index f76139b..2c42ff0 100644
--- a/drivers/net/wireless/ath/ath9k/ar9330_1p1_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9330_1p1_initvals.h

@@ -592,7 +592,7 @@
 	{0x00009e30, 0x06336f77},
 	{0x00009e34, 0x6af6532f},
 	{0x00009e38, 0x0cc80c00},
-	{0x00009e40, 0x0d261820},
+	{0x00009e40, 0x0d261800},
 	{0x00009e4c, 0x00001004},
 	{0x00009e50, 0x00ff03f1},
 	{0x00009fc0, 0x803e4788},

diff --git a/drivers/net/wireless/ath/ath9k/ar9330_1p2_initvals.h b/drivers/net/wireless/ath/ath9k/ar9330_1p2_initvals.h
index 0ac8be9..2154efc 100644
--- a/drivers/net/wireless/ath/ath9k/ar9330_1p2_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9330_1p2_initvals.h

@@ -231,7 +231,7 @@
 	{0x00009e30, 0x06336f77},
 	{0x00009e34, 0x6af6532f},
 	{0x00009e38, 0x0cc80c00},
-	{0x00009e40, 0x0d261820},
+	{0x00009e40, 0x0d261800},
 	{0x00009e4c, 0x00001004},
 	{0x00009e50, 0x00ff03f1},
 	{0x00009fc0, 0x803e4788},

diff --git a/drivers/net/wireless/ath/ath9k/ar9340_initvals.h b/drivers/net/wireless/ath/ath9k/ar9340_initvals.h
index a01f0ed..b995ffe 100644
--- a/drivers/net/wireless/ath/ath9k/ar9340_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9340_initvals.h

@@ -318,7 +318,7 @@
 	{0x00009e30, 0x06336f77},
 	{0x00009e34, 0x6af6532f},
 	{0x00009e38, 0x0cc80c00},
-	{0x00009e40, 0x0d261820},
+	{0x00009e40, 0x0d261800},
 	{0x00009e4c, 0x00001004},
 	{0x00009e50, 0x00ff03f1},
 	{0x00009e54, 0x00000000},
@@ -348,9 +348,9 @@
 	{0x0000a370, 0x00000000},
 	{0x0000a390, 0x00000001},
 	{0x0000a394, 0x00000444},
-	{0x0000a398, 0x00000000},
-	{0x0000a39c, 0x210d0401},
-	{0x0000a3a0, 0xab9a7144},
+	{0x0000a398, 0x001f0e0f},
+	{0x0000a39c, 0x0075393f},
+	{0x0000a3a0, 0xb79f6427},
 	{0x0000a3a4, 0x00000000},
 	{0x0000a3a8, 0xaaaaaaaa},
 	{0x0000a3ac, 0x3c466478},

diff --git a/drivers/net/wireless/ath/ath9k/ar953x_initvals.h b/drivers/net/wireless/ath/ath9k/ar953x_initvals.h
index 3c9113d..8e5c3b9 100644
--- a/drivers/net/wireless/ath/ath9k/ar953x_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar953x_initvals.h

@@ -257,9 +257,9 @@
 	{0x0000a370, 0x00000000},
 	{0x0000a390, 0x00000001},
 	{0x0000a394, 0x00000444},
-	{0x0000a398, 0x1f020503},
-	{0x0000a39c, 0x29180c03},
-	{0x0000a3a0, 0x9a8b6844},
+	{0x0000a398, 0x001f0e0f},
+	{0x0000a39c, 0x0075393f},
+	{0x0000a3a0, 0xb79f6427},
 	{0x0000a3a4, 0x000000ff},
 	{0x0000a3a8, 0x6a6a6a6a},
 	{0x0000a3ac, 0x6a6a6a6a},

diff --git a/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h b/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h
index e6aec2c..a5ca652 100644
--- a/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h

@@ -90,7 +90,7 @@
 	{0x00009e30, 0x06336f77},
 	{0x00009e34, 0x6af6532f},
 	{0x00009e38, 0x0cc80c00},
-	{0x00009e40, 0x0d261820},
+	{0x00009e40, 0x0d261800},
 	{0x00009e4c, 0x00001004},
 	{0x00009e50, 0x00ff03f1},
 	{0x00009e54, 0x00000000},

diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 3ba03dd..2ca8f7e 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h

@@ -23,8 +23,8 @@
 #include <linux/leds.h>
 #include <linux/completion.h>
 
-#include "debug.h"
 #include "common.h"
+#include "debug.h"
 #include "mci.h"
 #include "dfs.h"
 #include "spectral.h"
@@ -114,6 +114,9 @@
 #define ATH_TXFIFO_DEPTH           8
 #define ATH_TX_ERROR               0x01
 
+/* Stop tx traffic 1ms before the GO goes away */
+#define ATH_P2P_PS_STOP_TIME       1000
+
 #define IEEE80211_SEQ_SEQ_SHIFT    4
 #define IEEE80211_SEQ_MAX          4096
 #define IEEE80211_WEP_IVLEN        3
@@ -271,6 +274,7 @@
 #ifdef CONFIG_ATH9K_STATION_STATISTICS
 	struct ath_rx_rate_stats rx_rate_stats;
 #endif
+	u8 key_idx[4];
 };
 
 struct ath_tx_control {
@@ -366,11 +370,15 @@
 /********/
 
 struct ath_vif {
+	struct ieee80211_vif *vif;
 	struct ath_node mcast_node;
 	int av_bslot;
 	bool primary_sta_vif;
 	__le64 tsf_adjust; /* TSF adjustment for staggered beacons */
 	struct ath_buf *av_bcbuf;
+
+	/* P2P Client */
+	struct ieee80211_noa_data noa;
 };
 
 struct ath9k_vif_iter_data {
@@ -463,6 +471,8 @@
 void ath_update_survey_nf(struct ath_softc *sc, int channel);
 void ath9k_queue_reset(struct ath_softc *sc, enum ath_reset_type type);
 void ath_ps_full_sleep(unsigned long data);
+void ath9k_p2p_ps_timer(void *priv);
+void ath9k_update_p2p_ps(struct ath_softc *sc, struct ieee80211_vif *vif);
 
 /**********/
 /* BTCOEX */
@@ -713,6 +723,9 @@
 	struct completion paprd_complete;
 	wait_queue_head_t tx_wait;
 
+	struct ath_gen_timer *p2p_ps_timer;
+	struct ath_vif *p2p_ps_vif;
+
 	unsigned long driver_data;
 
 	u8 gtt_cnt;
@@ -757,6 +770,7 @@
 	struct ath_ant_comb ant_comb;
 	u8 ant_tx, ant_rx;
 	struct dfs_pattern_detector *dfs_detector;
+	u64 dfs_prev_pulse_ts;
 	u32 wow_enabled;
 	/* relay(fs) channel for spectral scan */
 	struct rchan *rfs_chan_spec_scan;

diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index bd9e634..e387f0b 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c

@@ -537,8 +537,6 @@
 	cur_conf->dtim_period = bss_conf->dtim_period;
 	cur_conf->dtim_count = 1;
 	cur_conf->ibss_creator = bss_conf->ibss_creator;
-	cur_conf->bmiss_timeout =
-		ATH_DEFAULT_BMISS_LIMIT * cur_conf->beacon_interval;
 
 	/*
 	 * It looks like mac80211 may end up using beacon interval of zero in
@@ -549,6 +547,9 @@
 	if (cur_conf->beacon_interval == 0)
 		cur_conf->beacon_interval = 100;
 
+	cur_conf->bmiss_timeout =
+		ATH_DEFAULT_BMISS_LIMIT * cur_conf->beacon_interval;
+
 	/*
 	 * We don't parse dtim period from mac80211 during the driver
 	 * initialization as it breaks association with hidden-ssid

diff --git a/drivers/net/wireless/ath/ath9k/common-debug.c b/drivers/net/wireless/ath/ath9k/common-debug.c
new file mode 100644
index 0000000..3b289f9
--- /dev/null
+++ b/drivers/net/wireless/ath/ath9k/common-debug.c

@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2008-2011 Atheros Communications Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "common.h"
+
+static ssize_t read_file_modal_eeprom(struct file *file, char __user *user_buf,
+				      size_t count, loff_t *ppos)
+{
+	struct ath_hw *ah = file->private_data;
+	u32 len = 0, size = 6000;
+	char *buf;
+	size_t retval;
+
+	buf = kzalloc(size, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	len = ah->eep_ops->dump_eeprom(ah, false, buf, len, size);
+
+	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return retval;
+}
+
+static const struct file_operations fops_modal_eeprom = {
+	.read = read_file_modal_eeprom,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+
+void ath9k_cmn_debug_modal_eeprom(struct dentry *debugfs_phy,
+				  struct ath_hw *ah)
+{
+	debugfs_create_file("modal_eeprom", S_IRUSR, debugfs_phy, ah,
+			    &fops_modal_eeprom);
+}
+EXPORT_SYMBOL(ath9k_cmn_debug_modal_eeprom);
+
+static ssize_t read_file_base_eeprom(struct file *file, char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	struct ath_hw *ah = file->private_data;
+	u32 len = 0, size = 1500;
+	ssize_t retval = 0;
+	char *buf;
+
+	buf = kzalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len = ah->eep_ops->dump_eeprom(ah, true, buf, len, size);
+
+	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return retval;
+}
+
+static const struct file_operations fops_base_eeprom = {
+	.read = read_file_base_eeprom,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+void ath9k_cmn_debug_base_eeprom(struct dentry *debugfs_phy,
+				 struct ath_hw *ah)
+{
+	debugfs_create_file("base_eeprom", S_IRUSR, debugfs_phy, ah,
+			    &fops_base_eeprom);
+}
+EXPORT_SYMBOL(ath9k_cmn_debug_base_eeprom);
+
+void ath9k_cmn_debug_stat_rx(struct ath_rx_stats *rxstats,
+			     struct ath_rx_status *rs)
+{
+#define RX_PHY_ERR_INC(c) rxstats->phy_err_stats[c]++
+#define RX_CMN_STAT_INC(c) (rxstats->c++)
+
+	RX_CMN_STAT_INC(rx_pkts_all);
+	rxstats->rx_bytes_all += rs->rs_datalen;
+
+	if (rs->rs_status & ATH9K_RXERR_CRC)
+		RX_CMN_STAT_INC(crc_err);
+	if (rs->rs_status & ATH9K_RXERR_DECRYPT)
+		RX_CMN_STAT_INC(decrypt_crc_err);
+	if (rs->rs_status & ATH9K_RXERR_MIC)
+		RX_CMN_STAT_INC(mic_err);
+	if (rs->rs_status & ATH9K_RX_DELIM_CRC_PRE)
+		RX_CMN_STAT_INC(pre_delim_crc_err);
+	if (rs->rs_status & ATH9K_RX_DELIM_CRC_POST)
+		RX_CMN_STAT_INC(post_delim_crc_err);
+	if (rs->rs_status & ATH9K_RX_DECRYPT_BUSY)
+		RX_CMN_STAT_INC(decrypt_busy_err);
+
+	if (rs->rs_status & ATH9K_RXERR_PHY) {
+		RX_CMN_STAT_INC(phy_err);
+		if (rs->rs_phyerr < ATH9K_PHYERR_MAX)
+			RX_PHY_ERR_INC(rs->rs_phyerr);
+	}
+
+#undef RX_CMN_STAT_INC
+#undef RX_PHY_ERR_INC
+}
+EXPORT_SYMBOL(ath9k_cmn_debug_stat_rx);
+
+static ssize_t read_file_recv(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+#define RXS_ERR(s, e)					\
+	do {						\
+		len += scnprintf(buf + len, size - len,	\
+				 "%18s : %10u\n", s,	\
+				 rxstats->e);		\
+	} while (0)
+
+	struct ath_rx_stats *rxstats = file->private_data;
+	char *buf;
+	unsigned int len = 0, size = 1600;
+	ssize_t retval = 0;
+
+	buf = kzalloc(size, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	RXS_ERR("PKTS-ALL", rx_pkts_all);
+	RXS_ERR("BYTES-ALL", rx_bytes_all);
+	RXS_ERR("BEACONS", rx_beacons);
+	RXS_ERR("FRAGS", rx_frags);
+	RXS_ERR("SPECTRAL", rx_spectral);
+
+	RXS_ERR("CRC ERR", crc_err);
+	RXS_ERR("DECRYPT CRC ERR", decrypt_crc_err);
+	RXS_ERR("PHY ERR", phy_err);
+	RXS_ERR("MIC ERR", mic_err);
+	RXS_ERR("PRE-DELIM CRC ERR", pre_delim_crc_err);
+	RXS_ERR("POST-DELIM CRC ERR", post_delim_crc_err);
+	RXS_ERR("DECRYPT BUSY ERR", decrypt_busy_err);
+	RXS_ERR("LENGTH-ERR", rx_len_err);
+	RXS_ERR("OOM-ERR", rx_oom_err);
+	RXS_ERR("RATE-ERR", rx_rate_err);
+	RXS_ERR("TOO-MANY-FRAGS", rx_too_many_frags_err);
+
+	if (len > size)
+		len = size;
+
+	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return retval;
+
+#undef RXS_ERR
+}
+
+static const struct file_operations fops_recv = {
+	.read = read_file_recv,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+void ath9k_cmn_debug_recv(struct dentry *debugfs_phy,
+			  struct ath_rx_stats *rxstats)
+{
+	debugfs_create_file("recv", S_IRUSR, debugfs_phy, rxstats,
+			    &fops_recv);
+}
+EXPORT_SYMBOL(ath9k_cmn_debug_recv);
+
+static ssize_t read_file_phy_err(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+#define PHY_ERR(s, p) \
+	len += scnprintf(buf + len, size - len, "%22s : %10u\n", s, \
+			 rxstats->phy_err_stats[p]);
+
+	struct ath_rx_stats *rxstats = file->private_data;
+	char *buf;
+	unsigned int len = 0, size = 1600;
+	ssize_t retval = 0;
+
+	buf = kzalloc(size, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	PHY_ERR("UNDERRUN ERR", ATH9K_PHYERR_UNDERRUN);
+	PHY_ERR("TIMING ERR", ATH9K_PHYERR_TIMING);
+	PHY_ERR("PARITY ERR", ATH9K_PHYERR_PARITY);
+	PHY_ERR("RATE ERR", ATH9K_PHYERR_RATE);
+	PHY_ERR("LENGTH ERR", ATH9K_PHYERR_LENGTH);
+	PHY_ERR("RADAR ERR", ATH9K_PHYERR_RADAR);
+	PHY_ERR("SERVICE ERR", ATH9K_PHYERR_SERVICE);
+	PHY_ERR("TOR ERR", ATH9K_PHYERR_TOR);
+	PHY_ERR("OFDM-TIMING ERR", ATH9K_PHYERR_OFDM_TIMING);
+	PHY_ERR("OFDM-SIGNAL-PARITY ERR", ATH9K_PHYERR_OFDM_SIGNAL_PARITY);
+	PHY_ERR("OFDM-RATE ERR", ATH9K_PHYERR_OFDM_RATE_ILLEGAL);
+	PHY_ERR("OFDM-LENGTH ERR", ATH9K_PHYERR_OFDM_LENGTH_ILLEGAL);
+	PHY_ERR("OFDM-POWER-DROP ERR", ATH9K_PHYERR_OFDM_POWER_DROP);
+	PHY_ERR("OFDM-SERVICE ERR", ATH9K_PHYERR_OFDM_SERVICE);
+	PHY_ERR("OFDM-RESTART ERR", ATH9K_PHYERR_OFDM_RESTART);
+	PHY_ERR("FALSE-RADAR-EXT ERR", ATH9K_PHYERR_FALSE_RADAR_EXT);
+	PHY_ERR("CCK-TIMING ERR", ATH9K_PHYERR_CCK_TIMING);
+	PHY_ERR("CCK-HEADER-CRC ERR", ATH9K_PHYERR_CCK_HEADER_CRC);
+	PHY_ERR("CCK-RATE ERR", ATH9K_PHYERR_CCK_RATE_ILLEGAL);
+	PHY_ERR("CCK-SERVICE ERR", ATH9K_PHYERR_CCK_SERVICE);
+	PHY_ERR("CCK-RESTART ERR", ATH9K_PHYERR_CCK_RESTART);
+	PHY_ERR("CCK-LENGTH ERR", ATH9K_PHYERR_CCK_LENGTH_ILLEGAL);
+	PHY_ERR("CCK-POWER-DROP ERR", ATH9K_PHYERR_CCK_POWER_DROP);
+	PHY_ERR("HT-CRC ERR", ATH9K_PHYERR_HT_CRC_ERROR);
+	PHY_ERR("HT-LENGTH ERR", ATH9K_PHYERR_HT_LENGTH_ILLEGAL);
+	PHY_ERR("HT-RATE ERR", ATH9K_PHYERR_HT_RATE_ILLEGAL);
+
+	if (len > size)
+		len = size;
+
+	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return retval;
+
+#undef PHY_ERR
+}
+
+static const struct file_operations fops_phy_err = {
+	.read = read_file_phy_err,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+void ath9k_cmn_debug_phy_err(struct dentry *debugfs_phy,
+			     struct ath_rx_stats *rxstats)
+{
+	debugfs_create_file("phy_err", S_IRUSR, debugfs_phy, rxstats,
+			    &fops_phy_err);
+}
+EXPORT_SYMBOL(ath9k_cmn_debug_phy_err);

diff --git a/drivers/net/wireless/ath/ath9k/common-debug.h b/drivers/net/wireless/ath/ath9k/common-debug.h
new file mode 100644
index 0000000..7c97884
--- /dev/null
+++ b/drivers/net/wireless/ath/ath9k/common-debug.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2008-2011 Atheros Communications Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+
+/**
+ * struct ath_rx_stats - RX Statistics
+ * @rx_pkts_all:  No. of total frames received, including ones that
+	may have had errors.
+ * @rx_bytes_all:  No. of total bytes received, including ones that
+	may have had errors.
+ * @crc_err: No. of frames with incorrect CRC value
+ * @decrypt_crc_err: No. of frames whose CRC check failed after
+	decryption process completed
+ * @phy_err: No. of frames whose reception failed because the PHY
+	encountered an error
+ * @mic_err: No. of frames with incorrect TKIP MIC verification failure
+ * @pre_delim_crc_err: Pre-Frame delimiter CRC error detections
+ * @post_delim_crc_err: Post-Frame delimiter CRC error detections
+ * @decrypt_busy_err: Decryption interruptions counter
+ * @phy_err_stats: Individual PHY error statistics
+ * @rx_len_err:  No. of frames discarded due to bad length.
+ * @rx_oom_err:  No. of frames dropped due to OOM issues.
+ * @rx_rate_err:  No. of frames dropped due to rate errors.
+ * @rx_too_many_frags_err:  Frames dropped due to too-many-frags received.
+ * @rx_beacons:  No. of beacons received.
+ * @rx_frags:  No. of rx-fragements received.
+ * @rx_spectral: No of spectral packets received.
+ */
+struct ath_rx_stats {
+	u32 rx_pkts_all;
+	u32 rx_bytes_all;
+	u32 crc_err;
+	u32 decrypt_crc_err;
+	u32 phy_err;
+	u32 mic_err;
+	u32 pre_delim_crc_err;
+	u32 post_delim_crc_err;
+	u32 decrypt_busy_err;
+	u32 phy_err_stats[ATH9K_PHYERR_MAX];
+	u32 rx_len_err;
+	u32 rx_oom_err;
+	u32 rx_rate_err;
+	u32 rx_too_many_frags_err;
+	u32 rx_beacons;
+	u32 rx_frags;
+	u32 rx_spectral;
+};
+
+void ath9k_cmn_debug_modal_eeprom(struct dentry *debugfs_phy,
+				  struct ath_hw *ah);
+void ath9k_cmn_debug_base_eeprom(struct dentry *debugfs_phy,
+				 struct ath_hw *ah);
+void ath9k_cmn_debug_stat_rx(struct ath_rx_stats *rxstats,
+			     struct ath_rx_status *rs);
+void ath9k_cmn_debug_recv(struct dentry *debugfs_phy,
+			  struct ath_rx_stats *rxstats);
+void ath9k_cmn_debug_phy_err(struct dentry *debugfs_phy,
+			     struct ath_rx_stats *rxstats);

diff --git a/drivers/net/wireless/ath/ath9k/common.h b/drivers/net/wireless/ath/ath9k/common.h
index ca38116..ffc454b 100644
--- a/drivers/net/wireless/ath/ath9k/common.h
+++ b/drivers/net/wireless/ath/ath9k/common.h

@@ -23,6 +23,7 @@
 
 #include "common-init.h"
 #include "common-beacon.h"
+#include "common-debug.h"
 
 /* Common header for Atheros 802.11n base driver cores */
 

diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c
index 780ff1b..6cc42be 100644
--- a/drivers/net/wireless/ath/ath9k/debug.c
+++ b/drivers/net/wireless/ath/ath9k/debug.c

@@ -948,151 +948,11 @@
 	.llseek = default_llseek,
 };
 
-static ssize_t read_file_recv(struct file *file, char __user *user_buf,
-			      size_t count, loff_t *ppos)
-{
-#define RXS_ERR(s, e)					    \
-	do {						    \
-		len += scnprintf(buf + len, size - len,	    \
-				 "%18s : %10u\n", s,	    \
-				 sc->debug.stats.rxstats.e);\
-	} while (0)
-
-	struct ath_softc *sc = file->private_data;
-	char *buf;
-	unsigned int len = 0, size = 1600;
-	ssize_t retval = 0;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	RXS_ERR("PKTS-ALL", rx_pkts_all);
-	RXS_ERR("BYTES-ALL", rx_bytes_all);
-	RXS_ERR("BEACONS", rx_beacons);
-	RXS_ERR("FRAGS", rx_frags);
-	RXS_ERR("SPECTRAL", rx_spectral);
-
-	RXS_ERR("CRC ERR", crc_err);
-	RXS_ERR("DECRYPT CRC ERR", decrypt_crc_err);
-	RXS_ERR("PHY ERR", phy_err);
-	RXS_ERR("MIC ERR", mic_err);
-	RXS_ERR("PRE-DELIM CRC ERR", pre_delim_crc_err);
-	RXS_ERR("POST-DELIM CRC ERR", post_delim_crc_err);
-	RXS_ERR("DECRYPT BUSY ERR", decrypt_busy_err);
-	RXS_ERR("LENGTH-ERR", rx_len_err);
-	RXS_ERR("OOM-ERR", rx_oom_err);
-	RXS_ERR("RATE-ERR", rx_rate_err);
-	RXS_ERR("TOO-MANY-FRAGS", rx_too_many_frags_err);
-
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-
-#undef RXS_ERR
-}
-
 void ath_debug_stat_rx(struct ath_softc *sc, struct ath_rx_status *rs)
 {
-#define RX_PHY_ERR_INC(c) sc->debug.stats.rxstats.phy_err_stats[c]++
-
-	RX_STAT_INC(rx_pkts_all);
-	sc->debug.stats.rxstats.rx_bytes_all += rs->rs_datalen;
-
-	if (rs->rs_status & ATH9K_RXERR_CRC)
-		RX_STAT_INC(crc_err);
-	if (rs->rs_status & ATH9K_RXERR_DECRYPT)
-		RX_STAT_INC(decrypt_crc_err);
-	if (rs->rs_status & ATH9K_RXERR_MIC)
-		RX_STAT_INC(mic_err);
-	if (rs->rs_status & ATH9K_RX_DELIM_CRC_PRE)
-		RX_STAT_INC(pre_delim_crc_err);
-	if (rs->rs_status & ATH9K_RX_DELIM_CRC_POST)
-		RX_STAT_INC(post_delim_crc_err);
-	if (rs->rs_status & ATH9K_RX_DECRYPT_BUSY)
-		RX_STAT_INC(decrypt_busy_err);
-
-	if (rs->rs_status & ATH9K_RXERR_PHY) {
-		RX_STAT_INC(phy_err);
-		if (rs->rs_phyerr < ATH9K_PHYERR_MAX)
-			RX_PHY_ERR_INC(rs->rs_phyerr);
-	}
-
-#undef RX_PHY_ERR_INC
+	ath9k_cmn_debug_stat_rx(&sc->debug.stats.rxstats, rs);
 }
 
-static const struct file_operations fops_recv = {
-	.read = read_file_recv,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
-static ssize_t read_file_phy_err(struct file *file, char __user *user_buf,
-				 size_t count, loff_t *ppos)
-{
-#define PHY_ERR(s, p) \
-	len += scnprintf(buf + len, size - len, "%22s : %10u\n", s, \
-			 sc->debug.stats.rxstats.phy_err_stats[p]);
-
-	struct ath_softc *sc = file->private_data;
-	char *buf;
-	unsigned int len = 0, size = 1600;
-	ssize_t retval = 0;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	PHY_ERR("UNDERRUN ERR", ATH9K_PHYERR_UNDERRUN);
-	PHY_ERR("TIMING ERR", ATH9K_PHYERR_TIMING);
-	PHY_ERR("PARITY ERR", ATH9K_PHYERR_PARITY);
-	PHY_ERR("RATE ERR", ATH9K_PHYERR_RATE);
-	PHY_ERR("LENGTH ERR", ATH9K_PHYERR_LENGTH);
-	PHY_ERR("RADAR ERR", ATH9K_PHYERR_RADAR);
-	PHY_ERR("SERVICE ERR", ATH9K_PHYERR_SERVICE);
-	PHY_ERR("TOR ERR", ATH9K_PHYERR_TOR);
-	PHY_ERR("OFDM-TIMING ERR", ATH9K_PHYERR_OFDM_TIMING);
-	PHY_ERR("OFDM-SIGNAL-PARITY ERR", ATH9K_PHYERR_OFDM_SIGNAL_PARITY);
-	PHY_ERR("OFDM-RATE ERR", ATH9K_PHYERR_OFDM_RATE_ILLEGAL);
-	PHY_ERR("OFDM-LENGTH ERR", ATH9K_PHYERR_OFDM_LENGTH_ILLEGAL);
-	PHY_ERR("OFDM-POWER-DROP ERR", ATH9K_PHYERR_OFDM_POWER_DROP);
-	PHY_ERR("OFDM-SERVICE ERR", ATH9K_PHYERR_OFDM_SERVICE);
-	PHY_ERR("OFDM-RESTART ERR", ATH9K_PHYERR_OFDM_RESTART);
-	PHY_ERR("FALSE-RADAR-EXT ERR", ATH9K_PHYERR_FALSE_RADAR_EXT);
-	PHY_ERR("CCK-TIMING ERR", ATH9K_PHYERR_CCK_TIMING);
-	PHY_ERR("CCK-HEADER-CRC ERR", ATH9K_PHYERR_CCK_HEADER_CRC);
-	PHY_ERR("CCK-RATE ERR", ATH9K_PHYERR_CCK_RATE_ILLEGAL);
-	PHY_ERR("CCK-SERVICE ERR", ATH9K_PHYERR_CCK_SERVICE);
-	PHY_ERR("CCK-RESTART ERR", ATH9K_PHYERR_CCK_RESTART);
-	PHY_ERR("CCK-LENGTH ERR", ATH9K_PHYERR_CCK_LENGTH_ILLEGAL);
-	PHY_ERR("CCK-POWER-DROP ERR", ATH9K_PHYERR_CCK_POWER_DROP);
-	PHY_ERR("HT-CRC ERR", ATH9K_PHYERR_HT_CRC_ERROR);
-	PHY_ERR("HT-LENGTH ERR", ATH9K_PHYERR_HT_LENGTH_ILLEGAL);
-	PHY_ERR("HT-RATE ERR", ATH9K_PHYERR_HT_RATE_ILLEGAL);
-
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-
-#undef PHY_ERR
-}
-
-static const struct file_operations fops_phy_err = {
-	.read = read_file_phy_err,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
 static ssize_t read_file_regidx(struct file *file, char __user *user_buf,
                                 size_t count, loff_t *ppos)
 {
@@ -1268,62 +1128,6 @@
 	.llseek = default_llseek,
 };
 
-static ssize_t read_file_base_eeprom(struct file *file, char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-	struct ath_softc *sc = file->private_data;
-	struct ath_hw *ah = sc->sc_ah;
-	u32 len = 0, size = 1500;
-	ssize_t retval = 0;
-	char *buf;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	len = ah->eep_ops->dump_eeprom(ah, true, buf, len, size);
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-}
-
-static const struct file_operations fops_base_eeprom = {
-	.read = read_file_base_eeprom,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
-static ssize_t read_file_modal_eeprom(struct file *file, char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-	struct ath_softc *sc = file->private_data;
-	struct ath_hw *ah = sc->sc_ah;
-	u32 len = 0, size = 6000;
-	char *buf;
-	size_t retval;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	len = ah->eep_ops->dump_eeprom(ah, false, buf, len, size);
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-}
-
-static const struct file_operations fops_modal_eeprom = {
-	.read = read_file_modal_eeprom,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
 #ifdef CONFIG_ATH9K_BTCOEX_SUPPORT
 static ssize_t read_file_btcoex(struct file *file, char __user *user_buf,
 				size_t count, loff_t *ppos)
@@ -1524,10 +1328,10 @@
 			    &fops_misc);
 	debugfs_create_file("reset", S_IRUSR, sc->debug.debugfs_phy, sc,
 			    &fops_reset);
-	debugfs_create_file("recv", S_IRUSR, sc->debug.debugfs_phy, sc,
-			    &fops_recv);
-	debugfs_create_file("phy_err", S_IRUSR, sc->debug.debugfs_phy, sc,
-			    &fops_phy_err);
+
+	ath9k_cmn_debug_recv(sc->debug.debugfs_phy, &sc->debug.stats.rxstats);
+	ath9k_cmn_debug_phy_err(sc->debug.debugfs_phy, &sc->debug.stats.rxstats);
+
 	debugfs_create_u8("rx_chainmask", S_IRUSR, sc->debug.debugfs_phy,
 			  &ah->rxchainmask);
 	debugfs_create_u8("tx_chainmask", S_IRUSR, sc->debug.debugfs_phy,
@@ -1547,10 +1351,10 @@
 			    &fops_regdump);
 	debugfs_create_file("dump_nfcal", S_IRUSR, sc->debug.debugfs_phy, sc,
 			    &fops_dump_nfcal);
-	debugfs_create_file("base_eeprom", S_IRUSR, sc->debug.debugfs_phy, sc,
-			    &fops_base_eeprom);
-	debugfs_create_file("modal_eeprom", S_IRUSR, sc->debug.debugfs_phy, sc,
-			    &fops_modal_eeprom);
+
+	ath9k_cmn_debug_base_eeprom(sc->debug.debugfs_phy, sc->sc_ah);
+	ath9k_cmn_debug_modal_eeprom(sc->debug.debugfs_phy, sc->sc_ah);
+
 	debugfs_create_u32("gpio_mask", S_IRUSR | S_IWUSR,
 			   sc->debug.debugfs_phy, &sc->sc_ah->gpio_mask);
 	debugfs_create_u32("gpio_val", S_IRUSR | S_IWUSR,

diff --git a/drivers/net/wireless/ath/ath9k/debug.h b/drivers/net/wireless/ath/ath9k/debug.h
index 559a68c..53ae15b 100644
--- a/drivers/net/wireless/ath/ath9k/debug.h
+++ b/drivers/net/wireless/ath/ath9k/debug.h

@@ -221,50 +221,6 @@
 	} cck_stats[4];
 };
 
-/**
- * struct ath_rx_stats - RX Statistics
- * @rx_pkts_all:  No. of total frames received, including ones that
-	may have had errors.
- * @rx_bytes_all:  No. of total bytes received, including ones that
-	may have had errors.
- * @crc_err: No. of frames with incorrect CRC value
- * @decrypt_crc_err: No. of frames whose CRC check failed after
-	decryption process completed
- * @phy_err: No. of frames whose reception failed because the PHY
-	encountered an error
- * @mic_err: No. of frames with incorrect TKIP MIC verification failure
- * @pre_delim_crc_err: Pre-Frame delimiter CRC error detections
- * @post_delim_crc_err: Post-Frame delimiter CRC error detections
- * @decrypt_busy_err: Decryption interruptions counter
- * @phy_err_stats: Individual PHY error statistics
- * @rx_len_err:  No. of frames discarded due to bad length.
- * @rx_oom_err:  No. of frames dropped due to OOM issues.
- * @rx_rate_err:  No. of frames dropped due to rate errors.
- * @rx_too_many_frags_err:  Frames dropped due to too-many-frags received.
- * @rx_beacons:  No. of beacons received.
- * @rx_frags:  No. of rx-fragements received.
- * @rx_spectral: No of spectral packets received.
- */
-struct ath_rx_stats {
-	u32 rx_pkts_all;
-	u32 rx_bytes_all;
-	u32 crc_err;
-	u32 decrypt_crc_err;
-	u32 phy_err;
-	u32 mic_err;
-	u32 pre_delim_crc_err;
-	u32 post_delim_crc_err;
-	u32 decrypt_busy_err;
-	u32 phy_err_stats[ATH9K_PHYERR_MAX];
-	u32 rx_len_err;
-	u32 rx_oom_err;
-	u32 rx_rate_err;
-	u32 rx_too_many_frags_err;
-	u32 rx_beacons;
-	u32 rx_frags;
-	u32 rx_spectral;
-};
-
 #define ANT_MAIN 0
 #define ANT_ALT  1
 

diff --git a/drivers/net/wireless/ath/ath9k/dfs.c b/drivers/net/wireless/ath/ath9k/dfs.c
index 857bb28..726271c 100644
--- a/drivers/net/wireless/ath/ath9k/dfs.c
+++ b/drivers/net/wireless/ath/ath9k/dfs.c

@@ -178,12 +178,12 @@
 	pe.ts = mactime;
 	if (ath9k_postprocess_radar_event(sc, &ard, &pe)) {
 		struct dfs_pattern_detector *pd = sc->dfs_detector;
-		static u64 last_ts;
 		ath_dbg(common, DFS,
 			"ath9k_dfs_process_phyerr: channel=%d, ts=%llu, "
 			"width=%d, rssi=%d, delta_ts=%llu\n",
-			pe.freq, pe.ts, pe.width, pe.rssi, pe.ts-last_ts);
-		last_ts = pe.ts;
+			pe.freq, pe.ts, pe.width, pe.rssi,
+			pe.ts - sc->dfs_prev_pulse_ts);
+		sc->dfs_prev_pulse_ts = pe.ts;
 		DFS_STAT_INC(sc, pulses_processed);
 		if (pd != NULL && pd->add_pulse(pd, &pe)) {
 			DFS_STAT_INC(sc, radar_detected);

diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
index dab1f0c..09a5d72 100644
--- a/drivers/net/wireless/ath/ath9k/htc.h
+++ b/drivers/net/wireless/ath/ath9k/htc.h

@@ -325,14 +325,14 @@
 
 #define TX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c++)
 #define TX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c += a)
-#define RX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.rx_stats.c++)
-#define RX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.rx_stats.c += a)
+#define RX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c++)
+#define RX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c += a)
 #define CAB_STAT_INC   priv->debug.tx_stats.cab_queued++
 
 #define TX_QSTAT_INC(q) (priv->debug.tx_stats.queue_stats[q]++)
 
 void ath9k_htc_err_stat_rx(struct ath9k_htc_priv *priv,
-			   struct ath_htc_rx_status *rxs);
+			   struct ath_rx_status *rs);
 
 struct ath_tx_stats {
 	u32 buf_queued;
@@ -345,25 +345,18 @@
 	u32 queue_stats[IEEE80211_NUM_ACS];
 };
 
-struct ath_rx_stats {
+struct ath_skbrx_stats {
 	u32 skb_allocated;
 	u32 skb_completed;
 	u32 skb_completed_bytes;
 	u32 skb_dropped;
-	u32 err_crc;
-	u32 err_decrypt_crc;
-	u32 err_mic;
-	u32 err_pre_delim;
-	u32 err_post_delim;
-	u32 err_decrypt_busy;
-	u32 err_phy;
-	u32 err_phy_stats[ATH9K_PHYERR_MAX];
 };
 
 struct ath9k_debug {
 	struct dentry *debugfs_phy;
 	struct ath_tx_stats tx_stats;
 	struct ath_rx_stats rx_stats;
+	struct ath_skbrx_stats skbrx_stats;
 };
 
 void ath9k_htc_get_et_strings(struct ieee80211_hw *hw,
@@ -385,7 +378,7 @@
 #define TX_QSTAT_INC(c) do { } while (0)
 
 static inline void ath9k_htc_err_stat_rx(struct ath9k_htc_priv *priv,
-					 struct ath_htc_rx_status *rxs)
+					 struct ath_rx_status *rs)
 {
 }
 

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
index fb071ee..8b529e4 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c

@@ -243,39 +243,14 @@
 };
 
 void ath9k_htc_err_stat_rx(struct ath9k_htc_priv *priv,
-			   struct ath_htc_rx_status *rxs)
+			     struct ath_rx_status *rs)
 {
-#define RX_PHY_ERR_INC(c) priv->debug.rx_stats.err_phy_stats[c]++
-
-	if (rxs->rs_status & ATH9K_RXERR_CRC)
-		priv->debug.rx_stats.err_crc++;
-	if (rxs->rs_status & ATH9K_RXERR_DECRYPT)
-		priv->debug.rx_stats.err_decrypt_crc++;
-	if (rxs->rs_status & ATH9K_RXERR_MIC)
-		priv->debug.rx_stats.err_mic++;
-	if (rxs->rs_status & ATH9K_RX_DELIM_CRC_PRE)
-		priv->debug.rx_stats.err_pre_delim++;
-	if (rxs->rs_status & ATH9K_RX_DELIM_CRC_POST)
-		priv->debug.rx_stats.err_post_delim++;
-	if (rxs->rs_status & ATH9K_RX_DECRYPT_BUSY)
-		priv->debug.rx_stats.err_decrypt_busy++;
-
-	if (rxs->rs_status & ATH9K_RXERR_PHY) {
-		priv->debug.rx_stats.err_phy++;
-		if (rxs->rs_phyerr < ATH9K_PHYERR_MAX)
-			RX_PHY_ERR_INC(rxs->rs_phyerr);
-	}
-
-#undef RX_PHY_ERR_INC
+	ath9k_cmn_debug_stat_rx(&priv->debug.rx_stats, rs);
 }
 
-static ssize_t read_file_recv(struct file *file, char __user *user_buf,
+static ssize_t read_file_skb_rx(struct file *file, char __user *user_buf,
 			      size_t count, loff_t *ppos)
 {
-#define PHY_ERR(s, p)							\
-	len += scnprintf(buf + len, size - len, "%20s : %10u\n", s,	\
-			 priv->debug.rx_stats.err_phy_stats[p]);
-
 	struct ath9k_htc_priv *priv = file->private_data;
 	char *buf;
 	unsigned int len = 0, size = 1500;
@@ -287,63 +262,13 @@
 
 	len += scnprintf(buf + len, size - len,
 			 "%20s : %10u\n", "SKBs allocated",
-			 priv->debug.rx_stats.skb_allocated);
+			 priv->debug.skbrx_stats.skb_allocated);
 	len += scnprintf(buf + len, size - len,
 			 "%20s : %10u\n", "SKBs completed",
-			 priv->debug.rx_stats.skb_completed);
+			 priv->debug.skbrx_stats.skb_completed);
 	len += scnprintf(buf + len, size - len,
 			 "%20s : %10u\n", "SKBs Dropped",
-			 priv->debug.rx_stats.skb_dropped);
-
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "CRC ERR",
-			 priv->debug.rx_stats.err_crc);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "DECRYPT CRC ERR",
-			 priv->debug.rx_stats.err_decrypt_crc);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "MIC ERR",
-			 priv->debug.rx_stats.err_mic);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "PRE-DELIM CRC ERR",
-			 priv->debug.rx_stats.err_pre_delim);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "POST-DELIM CRC ERR",
-			 priv->debug.rx_stats.err_post_delim);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "DECRYPT BUSY ERR",
-			 priv->debug.rx_stats.err_decrypt_busy);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10u\n", "TOTAL PHY ERR",
-			 priv->debug.rx_stats.err_phy);
-
-
-	PHY_ERR("UNDERRUN", ATH9K_PHYERR_UNDERRUN);
-	PHY_ERR("TIMING", ATH9K_PHYERR_TIMING);
-	PHY_ERR("PARITY", ATH9K_PHYERR_PARITY);
-	PHY_ERR("RATE", ATH9K_PHYERR_RATE);
-	PHY_ERR("LENGTH", ATH9K_PHYERR_LENGTH);
-	PHY_ERR("RADAR", ATH9K_PHYERR_RADAR);
-	PHY_ERR("SERVICE", ATH9K_PHYERR_SERVICE);
-	PHY_ERR("TOR", ATH9K_PHYERR_TOR);
-	PHY_ERR("OFDM-TIMING", ATH9K_PHYERR_OFDM_TIMING);
-	PHY_ERR("OFDM-SIGNAL-PARITY", ATH9K_PHYERR_OFDM_SIGNAL_PARITY);
-	PHY_ERR("OFDM-RATE", ATH9K_PHYERR_OFDM_RATE_ILLEGAL);
-	PHY_ERR("OFDM-LENGTH", ATH9K_PHYERR_OFDM_LENGTH_ILLEGAL);
-	PHY_ERR("OFDM-POWER-DROP", ATH9K_PHYERR_OFDM_POWER_DROP);
-	PHY_ERR("OFDM-SERVICE", ATH9K_PHYERR_OFDM_SERVICE);
-	PHY_ERR("OFDM-RESTART", ATH9K_PHYERR_OFDM_RESTART);
-	PHY_ERR("FALSE-RADAR-EXT", ATH9K_PHYERR_FALSE_RADAR_EXT);
-	PHY_ERR("CCK-TIMING", ATH9K_PHYERR_CCK_TIMING);
-	PHY_ERR("CCK-HEADER-CRC", ATH9K_PHYERR_CCK_HEADER_CRC);
-	PHY_ERR("CCK-RATE", ATH9K_PHYERR_CCK_RATE_ILLEGAL);
-	PHY_ERR("CCK-SERVICE", ATH9K_PHYERR_CCK_SERVICE);
-	PHY_ERR("CCK-RESTART", ATH9K_PHYERR_CCK_RESTART);
-	PHY_ERR("CCK-LENGTH", ATH9K_PHYERR_CCK_LENGTH_ILLEGAL);
-	PHY_ERR("CCK-POWER-DROP", ATH9K_PHYERR_CCK_POWER_DROP);
-	PHY_ERR("HT-CRC", ATH9K_PHYERR_HT_CRC_ERROR);
-	PHY_ERR("HT-LENGTH", ATH9K_PHYERR_HT_LENGTH_ILLEGAL);
-	PHY_ERR("HT-RATE", ATH9K_PHYERR_HT_RATE_ILLEGAL);
+			 priv->debug.skbrx_stats.skb_dropped);
 
 	if (len > size)
 		len = size;
@@ -352,12 +277,10 @@
 	kfree(buf);
 
 	return retval;
-
-#undef PHY_ERR
 }
 
-static const struct file_operations fops_recv = {
-	.read = read_file_recv,
+static const struct file_operations fops_skb_rx = {
+	.read = read_file_skb_rx,
 	.open = simple_open,
 	.owner = THIS_MODULE,
 	.llseek = default_llseek,
@@ -486,423 +409,6 @@
 	.llseek = default_llseek,
 };
 
-static ssize_t read_file_base_eeprom(struct file *file, char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-	struct ath9k_htc_priv *priv = file->private_data;
-	struct ath_common *common = ath9k_hw_common(priv->ah);
-	struct base_eep_header *pBase = NULL;
-	unsigned int len = 0, size = 1500;
-	ssize_t retval = 0;
-	char *buf;
-
-	pBase = ath9k_htc_get_eeprom_base(priv);
-
-	if (pBase == NULL) {
-		ath_err(common, "Unknown EEPROM type\n");
-		return 0;
-	}
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "Major Version",
-			 pBase->version >> 12);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "Minor Version",
-			 pBase->version & 0xFFF);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "Checksum",
-			 pBase->checksum);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "Length",
-			 pBase->length);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "RegDomain1",
-			 pBase->regDmn[0]);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n", "RegDomain2",
-			 pBase->regDmn[1]);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "TX Mask", pBase->txMask);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "RX Mask", pBase->rxMask);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Allow 5GHz",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_11A));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Allow 2GHz",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_11G));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Disable 2GHz HT20",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_N_2G_HT20));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Disable 2GHz HT40",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_N_2G_HT40));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Disable 5Ghz HT20",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_N_5G_HT20));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Disable 5Ghz HT40",
-			 !!(pBase->opCapFlags & AR5416_OPFLAGS_N_5G_HT40));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Big Endian",
-			 !!(pBase->eepMisc & 0x01));
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Cal Bin Major Ver",
-			 (pBase->binBuildNumber >> 24) & 0xFF);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Cal Bin Minor Ver",
-			 (pBase->binBuildNumber >> 16) & 0xFF);
-	len += scnprintf(buf + len, size - len,
-			 "%20s : %10d\n",
-			 "Cal Bin Build",
-			 (pBase->binBuildNumber >> 8) & 0xFF);
-
-	/*
-	 * UB91 specific data.
-	 */
-	if (AR_SREV_9271(priv->ah)) {
-		struct base_eep_header_4k *pBase4k =
-			&priv->ah->eeprom.map4k.baseEepHeader;
-
-		len += scnprintf(buf + len, size - len,
-				 "%20s : %10d\n",
-				 "TX Gain type",
-				 pBase4k->txGainType);
-	}
-
-	/*
-	 * UB95 specific data.
-	 */
-	if (priv->ah->hw_version.usbdev == AR9287_USB) {
-		struct base_eep_ar9287_header *pBase9287 =
-			&priv->ah->eeprom.map9287.baseEepHeader;
-
-		len += scnprintf(buf + len, size - len,
-				 "%20s : %10ddB\n",
-				 "Power Table Offset",
-				 pBase9287->pwrTableOffset);
-
-		len += scnprintf(buf + len, size - len,
-				 "%20s : %10d\n",
-				 "OpenLoop Power Ctrl",
-				 pBase9287->openLoopPwrCntl);
-	}
-
-	len += scnprintf(buf + len, size - len, "%20s : %pM\n", "MacAddress",
-			 pBase->macAddr);
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-}
-
-static const struct file_operations fops_base_eeprom = {
-	.read = read_file_base_eeprom,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
-static ssize_t read_4k_modal_eeprom(struct file *file,
-				    char __user *user_buf,
-				    size_t count, loff_t *ppos)
-{
-#define PR_EEP(_s, _val)						\
-	do {								\
-		len += scnprintf(buf + len, size - len, "%20s : %10d\n",\
-				 _s, (_val));				\
-	} while (0)
-
-	struct ath9k_htc_priv *priv = file->private_data;
-	struct modal_eep_4k_header *pModal = &priv->ah->eeprom.map4k.modalHeader;
-	unsigned int len = 0, size = 2048;
-	ssize_t retval = 0;
-	char *buf;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	PR_EEP("Chain0 Ant. Control", pModal->antCtrlChain[0]);
-	PR_EEP("Ant. Common Control", pModal->antCtrlCommon);
-	PR_EEP("Chain0 Ant. Gain", pModal->antennaGainCh[0]);
-	PR_EEP("Switch Settle", pModal->switchSettling);
-	PR_EEP("Chain0 TxRxAtten", pModal->txRxAttenCh[0]);
-	PR_EEP("Chain0 RxTxMargin", pModal->rxTxMarginCh[0]);
-	PR_EEP("ADC Desired size", pModal->adcDesiredSize);
-	PR_EEP("PGA Desired size", pModal->pgaDesiredSize);
-	PR_EEP("Chain0 xlna Gain", pModal->xlnaGainCh[0]);
-	PR_EEP("txEndToXpaOff", pModal->txEndToXpaOff);
-	PR_EEP("txEndToRxOn", pModal->txEndToRxOn);
-	PR_EEP("txFrameToXpaOn", pModal->txFrameToXpaOn);
-	PR_EEP("CCA Threshold)", pModal->thresh62);
-	PR_EEP("Chain0 NF Threshold", pModal->noiseFloorThreshCh[0]);
-	PR_EEP("xpdGain", pModal->xpdGain);
-	PR_EEP("External PD", pModal->xpd);
-	PR_EEP("Chain0 I Coefficient", pModal->iqCalICh[0]);
-	PR_EEP("Chain0 Q Coefficient", pModal->iqCalQCh[0]);
-	PR_EEP("pdGainOverlap", pModal->pdGainOverlap);
-	PR_EEP("O/D Bias Version", pModal->version);
-	PR_EEP("CCK OutputBias", pModal->ob_0);
-	PR_EEP("BPSK OutputBias", pModal->ob_1);
-	PR_EEP("QPSK OutputBias", pModal->ob_2);
-	PR_EEP("16QAM OutputBias", pModal->ob_3);
-	PR_EEP("64QAM OutputBias", pModal->ob_4);
-	PR_EEP("CCK Driver1_Bias", pModal->db1_0);
-	PR_EEP("BPSK Driver1_Bias", pModal->db1_1);
-	PR_EEP("QPSK Driver1_Bias", pModal->db1_2);
-	PR_EEP("16QAM Driver1_Bias", pModal->db1_3);
-	PR_EEP("64QAM Driver1_Bias", pModal->db1_4);
-	PR_EEP("CCK Driver2_Bias", pModal->db2_0);
-	PR_EEP("BPSK Driver2_Bias", pModal->db2_1);
-	PR_EEP("QPSK Driver2_Bias", pModal->db2_2);
-	PR_EEP("16QAM Driver2_Bias", pModal->db2_3);
-	PR_EEP("64QAM Driver2_Bias", pModal->db2_4);
-	PR_EEP("xPA Bias Level", pModal->xpaBiasLvl);
-	PR_EEP("txFrameToDataStart", pModal->txFrameToDataStart);
-	PR_EEP("txFrameToPaOn", pModal->txFrameToPaOn);
-	PR_EEP("HT40 Power Inc.", pModal->ht40PowerIncForPdadc);
-	PR_EEP("Chain0 bswAtten", pModal->bswAtten[0]);
-	PR_EEP("Chain0 bswMargin", pModal->bswMargin[0]);
-	PR_EEP("HT40 Switch Settle", pModal->swSettleHt40);
-	PR_EEP("Chain0 xatten2Db", pModal->xatten2Db[0]);
-	PR_EEP("Chain0 xatten2Margin", pModal->xatten2Margin[0]);
-	PR_EEP("Ant. Diversity ctl1", pModal->antdiv_ctl1);
-	PR_EEP("Ant. Diversity ctl2", pModal->antdiv_ctl2);
-	PR_EEP("TX Diversity", pModal->tx_diversity);
-
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-
-#undef PR_EEP
-}
-
-static ssize_t read_def_modal_eeprom(struct file *file,
-				     char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-#define PR_EEP(_s, _val)						\
-	do {								\
-		if (pBase->opCapFlags & AR5416_OPFLAGS_11G) {		\
-			pModal = &priv->ah->eeprom.def.modalHeader[1];	\
-			len += scnprintf(buf + len, size - len, "%20s : %8d%7s", \
-					 _s, (_val), "|");		\
-		}							\
-		if (pBase->opCapFlags & AR5416_OPFLAGS_11A) {		\
-			pModal = &priv->ah->eeprom.def.modalHeader[0];	\
-			len += scnprintf(buf + len, size - len, "%9d\n",\
-					(_val));			\
-		}							\
-	} while (0)
-
-	struct ath9k_htc_priv *priv = file->private_data;
-	struct base_eep_header *pBase = &priv->ah->eeprom.def.baseEepHeader;
-	struct modal_eep_header *pModal = NULL;
-	unsigned int len = 0, size = 3500;
-	ssize_t retval = 0;
-	char *buf;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	len += scnprintf(buf + len, size - len,
-			 "%31s %15s\n", "2G", "5G");
-	len += scnprintf(buf + len, size - len,
-			 "%32s %16s\n", "====", "====\n");
-
-	PR_EEP("Chain0 Ant. Control", pModal->antCtrlChain[0]);
-	PR_EEP("Chain1 Ant. Control", pModal->antCtrlChain[1]);
-	PR_EEP("Chain2 Ant. Control", pModal->antCtrlChain[2]);
-	PR_EEP("Ant. Common Control", pModal->antCtrlCommon);
-	PR_EEP("Chain0 Ant. Gain", pModal->antennaGainCh[0]);
-	PR_EEP("Chain1 Ant. Gain", pModal->antennaGainCh[1]);
-	PR_EEP("Chain2 Ant. Gain", pModal->antennaGainCh[2]);
-	PR_EEP("Switch Settle", pModal->switchSettling);
-	PR_EEP("Chain0 TxRxAtten", pModal->txRxAttenCh[0]);
-	PR_EEP("Chain1 TxRxAtten", pModal->txRxAttenCh[1]);
-	PR_EEP("Chain2 TxRxAtten", pModal->txRxAttenCh[2]);
-	PR_EEP("Chain0 RxTxMargin", pModal->rxTxMarginCh[0]);
-	PR_EEP("Chain1 RxTxMargin", pModal->rxTxMarginCh[1]);
-	PR_EEP("Chain2 RxTxMargin", pModal->rxTxMarginCh[2]);
-	PR_EEP("ADC Desired size", pModal->adcDesiredSize);
-	PR_EEP("PGA Desired size", pModal->pgaDesiredSize);
-	PR_EEP("Chain0 xlna Gain", pModal->xlnaGainCh[0]);
-	PR_EEP("Chain1 xlna Gain", pModal->xlnaGainCh[1]);
-	PR_EEP("Chain2 xlna Gain", pModal->xlnaGainCh[2]);
-	PR_EEP("txEndToXpaOff", pModal->txEndToXpaOff);
-	PR_EEP("txEndToRxOn", pModal->txEndToRxOn);
-	PR_EEP("txFrameToXpaOn", pModal->txFrameToXpaOn);
-	PR_EEP("CCA Threshold)", pModal->thresh62);
-	PR_EEP("Chain0 NF Threshold", pModal->noiseFloorThreshCh[0]);
-	PR_EEP("Chain1 NF Threshold", pModal->noiseFloorThreshCh[1]);
-	PR_EEP("Chain2 NF Threshold", pModal->noiseFloorThreshCh[2]);
-	PR_EEP("xpdGain", pModal->xpdGain);
-	PR_EEP("External PD", pModal->xpd);
-	PR_EEP("Chain0 I Coefficient", pModal->iqCalICh[0]);
-	PR_EEP("Chain1 I Coefficient", pModal->iqCalICh[1]);
-	PR_EEP("Chain2 I Coefficient", pModal->iqCalICh[2]);
-	PR_EEP("Chain0 Q Coefficient", pModal->iqCalQCh[0]);
-	PR_EEP("Chain1 Q Coefficient", pModal->iqCalQCh[1]);
-	PR_EEP("Chain2 Q Coefficient", pModal->iqCalQCh[2]);
-	PR_EEP("pdGainOverlap", pModal->pdGainOverlap);
-	PR_EEP("Chain0 OutputBias", pModal->ob);
-	PR_EEP("Chain0 DriverBias", pModal->db);
-	PR_EEP("xPA Bias Level", pModal->xpaBiasLvl);
-	PR_EEP("2chain pwr decrease", pModal->pwrDecreaseFor2Chain);
-	PR_EEP("3chain pwr decrease", pModal->pwrDecreaseFor3Chain);
-	PR_EEP("txFrameToDataStart", pModal->txFrameToDataStart);
-	PR_EEP("txFrameToPaOn", pModal->txFrameToPaOn);
-	PR_EEP("HT40 Power Inc.", pModal->ht40PowerIncForPdadc);
-	PR_EEP("Chain0 bswAtten", pModal->bswAtten[0]);
-	PR_EEP("Chain1 bswAtten", pModal->bswAtten[1]);
-	PR_EEP("Chain2 bswAtten", pModal->bswAtten[2]);
-	PR_EEP("Chain0 bswMargin", pModal->bswMargin[0]);
-	PR_EEP("Chain1 bswMargin", pModal->bswMargin[1]);
-	PR_EEP("Chain2 bswMargin", pModal->bswMargin[2]);
-	PR_EEP("HT40 Switch Settle", pModal->swSettleHt40);
-	PR_EEP("Chain0 xatten2Db", pModal->xatten2Db[0]);
-	PR_EEP("Chain1 xatten2Db", pModal->xatten2Db[1]);
-	PR_EEP("Chain2 xatten2Db", pModal->xatten2Db[2]);
-	PR_EEP("Chain0 xatten2Margin", pModal->xatten2Margin[0]);
-	PR_EEP("Chain1 xatten2Margin", pModal->xatten2Margin[1]);
-	PR_EEP("Chain2 xatten2Margin", pModal->xatten2Margin[2]);
-	PR_EEP("Chain1 OutputBias", pModal->ob_ch1);
-	PR_EEP("Chain1 DriverBias", pModal->db_ch1);
-	PR_EEP("LNA Control", pModal->lna_ctl);
-	PR_EEP("XPA Bias Freq0", pModal->xpaBiasLvlFreq[0]);
-	PR_EEP("XPA Bias Freq1", pModal->xpaBiasLvlFreq[1]);
-	PR_EEP("XPA Bias Freq2", pModal->xpaBiasLvlFreq[2]);
-
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-
-#undef PR_EEP
-}
-
-static ssize_t read_9287_modal_eeprom(struct file *file,
-				      char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-#define PR_EEP(_s, _val)						\
-	do {								\
-		len += scnprintf(buf + len, size - len, "%20s : %10d\n",\
-				 _s, (_val));				\
-	} while (0)
-
-	struct ath9k_htc_priv *priv = file->private_data;
-	struct modal_eep_ar9287_header *pModal = &priv->ah->eeprom.map9287.modalHeader;
-	unsigned int len = 0, size = 3000;
-	ssize_t retval = 0;
-	char *buf;
-
-	buf = kzalloc(size, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	PR_EEP("Chain0 Ant. Control", pModal->antCtrlChain[0]);
-	PR_EEP("Chain1 Ant. Control", pModal->antCtrlChain[1]);
-	PR_EEP("Ant. Common Control", pModal->antCtrlCommon);
-	PR_EEP("Chain0 Ant. Gain", pModal->antennaGainCh[0]);
-	PR_EEP("Chain1 Ant. Gain", pModal->antennaGainCh[1]);
-	PR_EEP("Switch Settle", pModal->switchSettling);
-	PR_EEP("Chain0 TxRxAtten", pModal->txRxAttenCh[0]);
-	PR_EEP("Chain1 TxRxAtten", pModal->txRxAttenCh[1]);
-	PR_EEP("Chain0 RxTxMargin", pModal->rxTxMarginCh[0]);
-	PR_EEP("Chain1 RxTxMargin", pModal->rxTxMarginCh[1]);
-	PR_EEP("ADC Desired size", pModal->adcDesiredSize);
-	PR_EEP("txEndToXpaOff", pModal->txEndToXpaOff);
-	PR_EEP("txEndToRxOn", pModal->txEndToRxOn);
-	PR_EEP("txFrameToXpaOn", pModal->txFrameToXpaOn);
-	PR_EEP("CCA Threshold)", pModal->thresh62);
-	PR_EEP("Chain0 NF Threshold", pModal->noiseFloorThreshCh[0]);
-	PR_EEP("Chain1 NF Threshold", pModal->noiseFloorThreshCh[1]);
-	PR_EEP("xpdGain", pModal->xpdGain);
-	PR_EEP("External PD", pModal->xpd);
-	PR_EEP("Chain0 I Coefficient", pModal->iqCalICh[0]);
-	PR_EEP("Chain1 I Coefficient", pModal->iqCalICh[1]);
-	PR_EEP("Chain0 Q Coefficient", pModal->iqCalQCh[0]);
-	PR_EEP("Chain1 Q Coefficient", pModal->iqCalQCh[1]);
-	PR_EEP("pdGainOverlap", pModal->pdGainOverlap);
-	PR_EEP("xPA Bias Level", pModal->xpaBiasLvl);
-	PR_EEP("txFrameToDataStart", pModal->txFrameToDataStart);
-	PR_EEP("txFrameToPaOn", pModal->txFrameToPaOn);
-	PR_EEP("HT40 Power Inc.", pModal->ht40PowerIncForPdadc);
-	PR_EEP("Chain0 bswAtten", pModal->bswAtten[0]);
-	PR_EEP("Chain1 bswAtten", pModal->bswAtten[1]);
-	PR_EEP("Chain0 bswMargin", pModal->bswMargin[0]);
-	PR_EEP("Chain1 bswMargin", pModal->bswMargin[1]);
-	PR_EEP("HT40 Switch Settle", pModal->swSettleHt40);
-	PR_EEP("AR92x7 Version", pModal->version);
-	PR_EEP("DriverBias1", pModal->db1);
-	PR_EEP("DriverBias2", pModal->db1);
-	PR_EEP("CCK OutputBias", pModal->ob_cck);
-	PR_EEP("PSK OutputBias", pModal->ob_psk);
-	PR_EEP("QAM OutputBias", pModal->ob_qam);
-	PR_EEP("PAL_OFF OutputBias", pModal->ob_pal_off);
-
-	if (len > size)
-		len = size;
-
-	retval = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-
-	return retval;
-
-#undef PR_EEP
-}
-
-static ssize_t read_file_modal_eeprom(struct file *file, char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-	struct ath9k_htc_priv *priv = file->private_data;
-
-	if (AR_SREV_9271(priv->ah))
-		return read_4k_modal_eeprom(file, user_buf, count, ppos);
-	else if (priv->ah->hw_version.usbdev == AR9280_USB)
-		return read_def_modal_eeprom(file, user_buf, count, ppos);
-	else if (priv->ah->hw_version.usbdev == AR9287_USB)
-		return read_9287_modal_eeprom(file, user_buf, count, ppos);
-
-	return 0;
-}
-
-static const struct file_operations fops_modal_eeprom = {
-	.read = read_file_modal_eeprom,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
-
 /* Ethtool support for get-stats */
 #define AMKSTR(nm) #nm "_BE", #nm "_BK", #nm "_VI", #nm "_VO"
 static const char ath9k_htc_gstrings_stats[][ETH_GSTRING_LEN] = {
@@ -947,6 +453,8 @@
 
 #define STXBASE priv->debug.tx_stats
 #define SRXBASE priv->debug.rx_stats
+#define SKBTXBASE priv->debug.tx_stats
+#define SKBRXBASE priv->debug.skbrx_stats
 #define ASTXQ(a)					\
 	data[i++] = STXBASE.a[IEEE80211_AC_BE];		\
 	data[i++] = STXBASE.a[IEEE80211_AC_BK];		\
@@ -960,24 +468,24 @@
 	struct ath9k_htc_priv *priv = hw->priv;
 	int i = 0;
 
-	data[i++] = STXBASE.skb_success;
-	data[i++] = STXBASE.skb_success_bytes;
-	data[i++] = SRXBASE.skb_completed;
-	data[i++] = SRXBASE.skb_completed_bytes;
+	data[i++] = SKBTXBASE.skb_success;
+	data[i++] = SKBTXBASE.skb_success_bytes;
+	data[i++] = SKBRXBASE.skb_completed;
+	data[i++] = SKBRXBASE.skb_completed_bytes;
 
 	ASTXQ(queue_stats);
 
-	data[i++] = SRXBASE.err_crc;
-	data[i++] = SRXBASE.err_decrypt_crc;
-	data[i++] = SRXBASE.err_phy;
-	data[i++] = SRXBASE.err_mic;
-	data[i++] = SRXBASE.err_pre_delim;
-	data[i++] = SRXBASE.err_post_delim;
-	data[i++] = SRXBASE.err_decrypt_busy;
+	data[i++] = SRXBASE.crc_err;
+	data[i++] = SRXBASE.decrypt_crc_err;
+	data[i++] = SRXBASE.phy_err;
+	data[i++] = SRXBASE.mic_err;
+	data[i++] = SRXBASE.pre_delim_crc_err;
+	data[i++] = SRXBASE.post_delim_crc_err;
+	data[i++] = SRXBASE.decrypt_busy_err;
 
-	data[i++] = SRXBASE.err_phy_stats[ATH9K_PHYERR_RADAR];
-	data[i++] = SRXBASE.err_phy_stats[ATH9K_PHYERR_OFDM_TIMING];
-	data[i++] = SRXBASE.err_phy_stats[ATH9K_PHYERR_CCK_TIMING];
+	data[i++] = SRXBASE.phy_err_stats[ATH9K_PHYERR_RADAR];
+	data[i++] = SRXBASE.phy_err_stats[ATH9K_PHYERR_OFDM_TIMING];
+	data[i++] = SRXBASE.phy_err_stats[ATH9K_PHYERR_CCK_TIMING];
 
 	WARN_ON(i != ATH9K_HTC_SSTATS_LEN);
 }
@@ -1001,18 +509,21 @@
 			    priv, &fops_tgt_rx_stats);
 	debugfs_create_file("xmit", S_IRUSR, priv->debug.debugfs_phy,
 			    priv, &fops_xmit);
-	debugfs_create_file("recv", S_IRUSR, priv->debug.debugfs_phy,
-			    priv, &fops_recv);
+	debugfs_create_file("skb_rx", S_IRUSR, priv->debug.debugfs_phy,
+			    priv, &fops_skb_rx);
+
+	ath9k_cmn_debug_recv(priv->debug.debugfs_phy, &priv->debug.rx_stats);
+	ath9k_cmn_debug_phy_err(priv->debug.debugfs_phy, &priv->debug.rx_stats);
+
 	debugfs_create_file("slot", S_IRUSR, priv->debug.debugfs_phy,
 			    priv, &fops_slot);
 	debugfs_create_file("queue", S_IRUSR, priv->debug.debugfs_phy,
 			    priv, &fops_queue);
 	debugfs_create_file("debug", S_IRUSR | S_IWUSR, priv->debug.debugfs_phy,
 			    priv, &fops_debug);
-	debugfs_create_file("base_eeprom", S_IRUSR, priv->debug.debugfs_phy,
-			    priv, &fops_base_eeprom);
-	debugfs_create_file("modal_eeprom", S_IRUSR, priv->debug.debugfs_phy,
-			    priv, &fops_modal_eeprom);
+
+	ath9k_cmn_debug_base_eeprom(priv->debug.debugfs_phy, priv->ah);
+	ath9k_cmn_debug_modal_eeprom(priv->debug.debugfs_phy, priv->ah);
 
 	return 0;
 }

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 289f3d8..bb86eb2 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c

@@ -996,8 +996,6 @@
 		goto rx_next;
 	}
 
-	ath9k_htc_err_stat_rx(priv, rxstatus);
-
 	/* Get the RX status information */
 
 	memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
@@ -1005,6 +1003,7 @@
 	/* Copy everything from ath_htc_rx_status (HTC_RX_FRAME_HEADER).
 	 * After this, we can drop this part of skb. */
 	rx_status_htc_to_ath(&rx_stats, rxstatus);
+	ath9k_htc_err_stat_rx(priv, &rx_stats);
 	rx_status->mactime = be64_to_cpu(rxstatus->rs_tstamp);
 	skb_pull(skb, HTC_RX_FRAME_HEADER_SIZE);
 

diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index c8a9dfa..2a8ed83 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c

@@ -26,7 +26,6 @@
 #include "ar9003_mac.h"
 #include "ar9003_mci.h"
 #include "ar9003_phy.h"
-#include "debug.h"
 #include "ath9k.h"
 
 static bool ath9k_hw_set_reset_reg(struct ath_hw *ah, u32 type);
@@ -246,6 +245,8 @@
 		return;
 	case AR9300_DEVID_AR953X:
 		ah->hw_version.macVersion = AR_SREV_VERSION_9531;
+		if (ah->get_mac_revision)
+			ah->hw_version.macRev = ah->get_mac_revision();
 		return;
 	}
 

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 36ae649..0246b99 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c

@@ -61,6 +61,10 @@
 module_param_named(ps_enable, ath9k_ps_enable, int, 0444);
 MODULE_PARM_DESC(ps_enable, "Enable WLAN PowerSave");
 
+static int ath9k_use_chanctx;
+module_param_named(use_chanctx, ath9k_use_chanctx, int, 0444);
+MODULE_PARM_DESC(use_chanctx, "Enable channel context for concurrency");
+
 bool is_ath9k_unloaded;
 
 #ifdef CONFIG_MAC80211_LEDS
@@ -508,7 +512,7 @@
 	sc->tx99_power = MAX_RATE_POWER + 1;
 	init_waitqueue_head(&sc->tx_wait);
 
-	if (!pdata) {
+	if (!pdata || pdata->use_eeprom) {
 		ah->ah_flags |= AH_USE_EEPROM;
 		sc->sc_ah->led_pin = -1;
 	} else {
@@ -589,6 +593,9 @@
 	if (ret)
 		goto err_btcoex;
 
+	sc->p2p_ps_timer = ath_gen_timer_alloc(sc->sc_ah, ath9k_p2p_ps_timer,
+		NULL, sc, AR_FIRST_NDP_TIMER);
+
 	ath9k_cmn_init_crypto(sc->sc_ah);
 	ath9k_init_misc(sc);
 	ath_fill_led_pin(sc);
@@ -643,17 +650,20 @@
 }
 
 static const struct ieee80211_iface_limit if_limits[] = {
-	{ .max = 2048,	.types = BIT(NL80211_IFTYPE_STATION) |
-				 BIT(NL80211_IFTYPE_P2P_CLIENT) |
-				 BIT(NL80211_IFTYPE_WDS) },
+	{ .max = 2048,	.types = BIT(NL80211_IFTYPE_STATION) },
 	{ .max = 8,	.types =
 #ifdef CONFIG_MAC80211_MESH
 				 BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
-				 BIT(NL80211_IFTYPE_AP) |
+				 BIT(NL80211_IFTYPE_AP) },
+	{ .max = 1,	.types = BIT(NL80211_IFTYPE_P2P_CLIENT) |
 				 BIT(NL80211_IFTYPE_P2P_GO) },
 };
 
+static const struct ieee80211_iface_limit wds_limits[] = {
+	{ .max = 2048,	.types = BIT(NL80211_IFTYPE_WDS) },
+};
+
 static const struct ieee80211_iface_limit if_dfs_limits[] = {
 	{ .max = 1,	.types = BIT(NL80211_IFTYPE_AP) |
 #ifdef CONFIG_MAC80211_MESH
@@ -670,6 +680,13 @@
 		.num_different_channels = 1,
 		.beacon_int_infra_match = true,
 	},
+	{
+		.limits = wds_limits,
+		.n_limits = ARRAY_SIZE(wds_limits),
+		.max_interfaces = 2048,
+		.num_different_channels = 1,
+		.beacon_int_infra_match = true,
+	},
 #ifdef CONFIG_ATH9K_DFS_CERTIFIED
 	{
 		.limits = if_dfs_limits,
@@ -711,19 +728,23 @@
 	if (AR_SREV_9160_10_OR_LATER(sc->sc_ah) || ath9k_modparam_nohwcrypt)
 		hw->flags |= IEEE80211_HW_MFP_CAPABLE;
 
-	hw->wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
+	hw->wiphy->features |= (NL80211_FEATURE_ACTIVE_MONITOR |
+				NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE);
 
 	if (!config_enabled(CONFIG_ATH9K_TX99)) {
 		hw->wiphy->interface_modes =
 			BIT(NL80211_IFTYPE_P2P_GO) |
 			BIT(NL80211_IFTYPE_P2P_CLIENT) |
 			BIT(NL80211_IFTYPE_AP) |
-			BIT(NL80211_IFTYPE_WDS) |
 			BIT(NL80211_IFTYPE_STATION) |
 			BIT(NL80211_IFTYPE_ADHOC) |
 			BIT(NL80211_IFTYPE_MESH_POINT);
 		hw->wiphy->iface_combinations = if_comb;
-		hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
+		if (!ath9k_use_chanctx) {
+			hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
+			hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_WDS);
+		} else
+			hw->wiphy->n_iface_combinations = 1;
 	}
 
 	hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -855,6 +876,9 @@
 {
 	int i = 0;
 
+	if (sc->p2p_ps_timer)
+		ath_gen_timer_free(sc->sc_ah, sc->p2p_ps_timer);
+
 	ath9k_deinit_btcoex(sc);
 
 	for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++)

diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c
index 51ce36f..275205a 100644
--- a/drivers/net/wireless/ath/ath9k/mac.c
+++ b/drivers/net/wireless/ath/ath9k/mac.c

@@ -958,3 +958,25 @@
 	return;
 }
 EXPORT_SYMBOL(ath9k_hw_set_interrupts);
+
+#define ATH9K_HW_MAX_DCU       10
+#define ATH9K_HW_SLICE_PER_DCU 16
+#define ATH9K_HW_BIT_IN_SLICE  16
+void ath9k_hw_set_tx_filter(struct ath_hw *ah, u8 destidx, bool set)
+{
+	int dcu_idx;
+	u32 filter;
+
+	for (dcu_idx = 0; dcu_idx < 10; dcu_idx++) {
+		filter = SM(set, AR_D_TXBLK_WRITE_COMMAND);
+		filter |= SM(dcu_idx, AR_D_TXBLK_WRITE_DCU);
+		filter |= SM((destidx / ATH9K_HW_SLICE_PER_DCU),
+			     AR_D_TXBLK_WRITE_SLICE);
+		filter |= BIT(destidx % ATH9K_HW_BIT_IN_SLICE);
+		ath_dbg(ath9k_hw_common(ah), PS,
+			"DCU%d staid %d set %d txfilter %08x\n",
+			dcu_idx, destidx, set, filter);
+		REG_WRITE(ah, AR_D_TXBLK_BASE, filter);
+	}
+}
+EXPORT_SYMBOL(ath9k_hw_set_tx_filter);

diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h
index 89df634..da76867 100644
--- a/drivers/net/wireless/ath/ath9k/mac.h
+++ b/drivers/net/wireless/ath/ath9k/mac.h

@@ -729,6 +729,7 @@
 void ath9k_hw_abortpcurecv(struct ath_hw *ah);
 bool ath9k_hw_stopdmarecv(struct ath_hw *ah, bool *reset);
 int ath9k_hw_beaconq_setup(struct ath_hw *ah);
+void ath9k_hw_set_tx_filter(struct ath_hw *ah, u8 destidx, bool set);
 
 /* Interrupt Handling */
 bool ath9k_hw_intrpend(struct ath_hw *ah);

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index d69853b..62ac95d 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c

@@ -261,6 +261,8 @@
 	sc->gtt_cnt = 0;
 	ieee80211_wake_queues(sc->hw);
 
+	ath9k_p2p_ps_timer(sc);
+
 	return true;
 }
 
@@ -419,6 +421,7 @@
 	an->sc = sc;
 	an->sta = sta;
 	an->vif = vif;
+	memset(&an->key_idx, 0, sizeof(an->key_idx));
 
 	ath_tx_node_init(sc, an);
 }
@@ -1119,6 +1122,8 @@
 	if (ath9k_uses_beacons(vif->type))
 		ath9k_beacon_assign_slot(sc, vif);
 
+	avp->vif = vif;
+
 	an->sc = sc;
 	an->sta = NULL;
 	an->vif = vif;
@@ -1163,6 +1168,29 @@
 	return 0;
 }
 
+static void
+ath9k_update_p2p_ps_timer(struct ath_softc *sc, struct ath_vif *avp)
+{
+	struct ath_hw *ah = sc->sc_ah;
+	s32 tsf, target_tsf;
+
+	if (!avp || !avp->noa.has_next_tsf)
+		return;
+
+	ath9k_hw_gen_timer_stop(ah, sc->p2p_ps_timer);
+
+	tsf = ath9k_hw_gettsf32(sc->sc_ah);
+
+	target_tsf = avp->noa.next_tsf;
+	if (!avp->noa.absent)
+		target_tsf -= ATH_P2P_PS_STOP_TIME;
+
+	if (target_tsf - tsf < ATH_P2P_PS_STOP_TIME)
+		target_tsf = tsf + ATH_P2P_PS_STOP_TIME;
+
+	ath9k_hw_gen_timer_start(ah, sc->p2p_ps_timer, (u32) target_tsf, 1000000);
+}
+
 static void ath9k_remove_interface(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif)
 {
@@ -1174,6 +1202,13 @@
 
 	mutex_lock(&sc->mutex);
 
+	spin_lock_bh(&sc->sc_pcu_lock);
+	if (avp == sc->p2p_ps_vif) {
+		sc->p2p_ps_vif = NULL;
+		ath9k_update_p2p_ps_timer(sc, NULL);
+	}
+	spin_unlock_bh(&sc->sc_pcu_lock);
+
 	sc->nvifs--;
 	sc->tx99_vif = NULL;
 
@@ -1427,8 +1462,10 @@
 		return 0;
 
 	key = ath_key_config(common, vif, sta, &ps_key);
-	if (key > 0)
+	if (key > 0) {
 		an->ps_key = key;
+		an->key_idx[0] = key;
+	}
 
 	return 0;
 }
@@ -1446,6 +1483,7 @@
 
 	ath_key_delete(common, &ps_key);
 	an->ps_key = 0;
+	an->key_idx[0] = 0;
 }
 
 static int ath9k_sta_remove(struct ieee80211_hw *hw,
@@ -1460,6 +1498,19 @@
 	return 0;
 }
 
+static void ath9k_sta_set_tx_filter(struct ath_hw *ah,
+				    struct ath_node *an,
+				    bool set)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) {
+		if (!an->key_idx[i])
+			continue;
+		ath9k_hw_set_tx_filter(ah, an->key_idx[i], set);
+	}
+}
+
 static void ath9k_sta_notify(struct ieee80211_hw *hw,
 			 struct ieee80211_vif *vif,
 			 enum sta_notify_cmd cmd,
@@ -1472,8 +1523,10 @@
 	case STA_NOTIFY_SLEEP:
 		an->sleeping = true;
 		ath_tx_aggr_sleep(sta, sc, an);
+		ath9k_sta_set_tx_filter(sc->sc_ah, an, true);
 		break;
 	case STA_NOTIFY_AWAKE:
+		ath9k_sta_set_tx_filter(sc->sc_ah, an, false);
 		an->sleeping = false;
 		ath_tx_aggr_wakeup(sc, an);
 		break;
@@ -1529,7 +1582,8 @@
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
-	int ret = 0;
+	struct ath_node *an = NULL;
+	int ret = 0, i;
 
 	if (ath9k_modparam_nohwcrypt)
 		return -ENOSPC;
@@ -1551,13 +1605,16 @@
 
 	mutex_lock(&sc->mutex);
 	ath9k_ps_wakeup(sc);
-	ath_dbg(common, CONFIG, "Set HW Key\n");
+	ath_dbg(common, CONFIG, "Set HW Key %d\n", cmd);
+	if (sta)
+		an = (struct ath_node *)sta->drv_priv;
 
 	switch (cmd) {
 	case SET_KEY:
 		if (sta)
 			ath9k_del_ps_key(sc, vif, sta);
 
+		key->hw_key_idx = 0;
 		ret = ath_key_config(common, vif, sta, key);
 		if (ret >= 0) {
 			key->hw_key_idx = ret;
@@ -1570,9 +1627,27 @@
 				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
 			ret = 0;
 		}
+		if (an && key->hw_key_idx) {
+			for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) {
+				if (an->key_idx[i])
+					continue;
+				an->key_idx[i] = key->hw_key_idx;
+				break;
+			}
+			WARN_ON(i == ARRAY_SIZE(an->key_idx));
+		}
 		break;
 	case DISABLE_KEY:
 		ath_key_delete(common, key);
+		if (an) {
+			for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) {
+				if (an->key_idx[i] != key->hw_key_idx)
+					continue;
+				an->key_idx[i] = 0;
+				break;
+			}
+		}
+		key->hw_key_idx = 0;
 		break;
 	default:
 		ret = -EINVAL;
@@ -1636,6 +1711,66 @@
 		ath9k_set_assoc_state(sc, vif);
 }
 
+void ath9k_p2p_ps_timer(void *priv)
+{
+	struct ath_softc *sc = priv;
+	struct ath_vif *avp = sc->p2p_ps_vif;
+	struct ieee80211_vif *vif;
+	struct ieee80211_sta *sta;
+	struct ath_node *an;
+	u32 tsf;
+
+	if (!avp)
+		return;
+
+	tsf = ath9k_hw_gettsf32(sc->sc_ah);
+	if (!avp->noa.absent)
+		tsf += ATH_P2P_PS_STOP_TIME;
+
+	if (!avp->noa.has_next_tsf ||
+	    avp->noa.next_tsf - tsf > BIT(31))
+		ieee80211_update_p2p_noa(&avp->noa, tsf);
+
+	ath9k_update_p2p_ps_timer(sc, avp);
+
+	rcu_read_lock();
+
+	vif = avp->vif;
+	sta = ieee80211_find_sta(vif, vif->bss_conf.bssid);
+	if (!sta)
+		goto out;
+
+	an = (void *) sta->drv_priv;
+	if (an->sleeping == !!avp->noa.absent)
+		goto out;
+
+	an->sleeping = avp->noa.absent;
+	if (an->sleeping)
+		ath_tx_aggr_sleep(sta, sc, an);
+	else
+		ath_tx_aggr_wakeup(sc, an);
+
+out:
+	rcu_read_unlock();
+}
+
+void ath9k_update_p2p_ps(struct ath_softc *sc, struct ieee80211_vif *vif)
+{
+	struct ath_vif *avp = (void *)vif->drv_priv;
+	u32 tsf;
+
+	if (!sc->p2p_ps_timer)
+		return;
+
+	if (vif->type != NL80211_IFTYPE_STATION || !vif->p2p)
+		return;
+
+	sc->p2p_ps_vif = avp;
+	tsf = ath9k_hw_gettsf32(sc->sc_ah);
+	ieee80211_parse_p2p_noa(&vif->bss_conf.p2p_noa_attr, &avp->noa, tsf);
+	ath9k_update_p2p_ps_timer(sc, avp);
+}
+
 static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif,
 				   struct ieee80211_bss_conf *bss_conf,
@@ -1650,6 +1785,7 @@
 	struct ath_hw *ah = sc->sc_ah;
 	struct ath_common *common = ath9k_hw_common(ah);
 	struct ath_vif *avp = (void *)vif->drv_priv;
+	unsigned long flags;
 	int slottime;
 
 	ath9k_ps_wakeup(sc);
@@ -1710,6 +1846,15 @@
 		}
 	}
 
+	if (changed & BSS_CHANGED_P2P_PS) {
+		spin_lock_bh(&sc->sc_pcu_lock);
+		spin_lock_irqsave(&sc->sc_pm_lock, flags);
+		if (!(sc->ps_flags & PS_BEACON_SYNC))
+			ath9k_update_p2p_ps(sc, vif);
+		spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
+		spin_unlock_bh(&sc->sc_pcu_lock);
+	}
+
 	if (changed & CHECK_ANI)
 		ath_check_ani(sc);
 
@@ -1883,7 +2028,8 @@
 	return !!npend;
 }
 
-static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath9k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			u32 queues, bool drop)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_hw *ah = sc->sc_ah;
@@ -2084,14 +2230,6 @@
 	clear_bit(ATH_OP_SCANNING, &common->op_flags);
 }
 
-static void ath9k_channel_switch_beacon(struct ieee80211_hw *hw,
-					struct ieee80211_vif *vif,
-					struct cfg80211_chan_def *chandef)
-{
-	/* depend on vif->csa_active only */
-	return;
-}
-
 struct ieee80211_ops ath9k_ops = {
 	.tx 		    = ath9k_tx,
 	.start 		    = ath9k_start,
@@ -2139,5 +2277,4 @@
 #endif
 	.sw_scan_start	    = ath9k_sw_scan_start,
 	.sw_scan_complete   = ath9k_sw_scan_complete,
-	.channel_switch_beacon     = ath9k_channel_switch_beacon,
 };

diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 914dbc6..4dec09e 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c

@@ -686,7 +686,7 @@
 	struct ath_softc *sc = (struct ath_softc *) common->priv;
 	struct ath9k_platform_data *pdata = sc->dev->platform_data;
 
-	if (pdata) {
+	if (pdata && !pdata->use_eeprom) {
 		if (off >= (ARRAY_SIZE(pdata->eeprom_data))) {
 			ath_err(common,
 				"%s: eeprom read failed, offset %08x is out of range\n",
@@ -914,6 +914,7 @@
 	 */
 	ath9k_stop_btcoex(sc);
 	ath9k_hw_disable(sc->sc_ah);
+	del_timer_sync(&sc->sleep_timer);
 	ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP);
 
 	return 0;

diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 19df969..9105a92 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c

@@ -34,7 +34,8 @@
  * buffer (or rx fifo). This can incorrectly acknowledge packets
  * to a sender if last desc is self-linked.
  */
-static void ath_rx_buf_link(struct ath_softc *sc, struct ath_rxbuf *bf)
+static void ath_rx_buf_link(struct ath_softc *sc, struct ath_rxbuf *bf,
+			    bool flush)
 {
 	struct ath_hw *ah = sc->sc_ah;
 	struct ath_common *common = ath9k_hw_common(ah);
@@ -59,18 +60,19 @@
 			     common->rx_bufsize,
 			     0);
 
-	if (sc->rx.rxlink == NULL)
-		ath9k_hw_putrxbuf(ah, bf->bf_daddr);
-	else
+	if (sc->rx.rxlink)
 		*sc->rx.rxlink = bf->bf_daddr;
+	else if (!flush)
+		ath9k_hw_putrxbuf(ah, bf->bf_daddr);
 
 	sc->rx.rxlink = &ds->ds_link;
 }
 
-static void ath_rx_buf_relink(struct ath_softc *sc, struct ath_rxbuf *bf)
+static void ath_rx_buf_relink(struct ath_softc *sc, struct ath_rxbuf *bf,
+			      bool flush)
 {
 	if (sc->rx.buf_hold)
-		ath_rx_buf_link(sc, sc->rx.buf_hold);
+		ath_rx_buf_link(sc, sc->rx.buf_hold, flush);
 
 	sc->rx.buf_hold = bf;
 }
@@ -442,7 +444,7 @@
 	sc->rx.buf_hold = NULL;
 	sc->rx.rxlink = NULL;
 	list_for_each_entry_safe(bf, tbf, &sc->rx.rxbuf, list) {
-		ath_rx_buf_link(sc, bf);
+		ath_rx_buf_link(sc, bf, false);
 	}
 
 	/* We could have deleted elements so the list may be empty now */
@@ -538,7 +540,10 @@
 		sc->ps_flags &= ~PS_BEACON_SYNC;
 		ath_dbg(common, PS,
 			"Reconfigure beacon timers based on synchronized timestamp\n");
-		ath9k_set_beacon(sc);
+		if (!(WARN_ON_ONCE(sc->cur_beacon_conf.beacon_interval == 0)))
+			ath9k_set_beacon(sc);
+		if (sc->p2p_ps_vif)
+			ath9k_update_p2p_ps(sc, sc->p2p_ps_vif->vif);
 	}
 
 	if (ath_beacon_dtim_pending_cab(skb)) {
@@ -1115,12 +1120,12 @@
 requeue:
 		list_add_tail(&bf->list, &sc->rx.rxbuf);
 
-		if (edma) {
-			ath_rx_edma_buf_link(sc, qtype);
-		} else {
-			ath_rx_buf_relink(sc, bf);
+		if (!edma) {
+			ath_rx_buf_relink(sc, bf, flush);
 			if (!flush)
 				ath9k_hw_rxena(ah);
+		} else if (!flush) {
+			ath_rx_edma_buf_link(sc, qtype);
 		}
 
 		if (!budget--)

diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index b1fd3fa..f1bbce3 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h

@@ -505,9 +505,6 @@
 #define AR_D_QCUMASK         0x000003FF
 #define AR_D_QCUMASK_RESV0   0xFFFFFC00
 
-#define AR_D_TXBLK_CMD  0x1038
-#define AR_D_TXBLK_DATA(i) (AR_D_TXBLK_CMD+(i))
-
 #define AR_D0_LCL_IFS     0x1040
 #define AR_D1_LCL_IFS     0x1044
 #define AR_D2_LCL_IFS     0x1048

diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 4c8cdb0..f8ded84 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c

@@ -1707,7 +1707,9 @@
 	return 0;
 }
 
-static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void carl9170_op_flush(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      u32 queues, bool drop)
 {
 	struct ar9170 *ar = hw->priv;
 	unsigned int vid;

diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index ca115f3..f35c7f3 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c

@@ -1076,8 +1076,14 @@
 
 	carl9170_set_state(ar, CARL9170_STOPPED);
 
-	return request_firmware_nowait(THIS_MODULE, 1, CARL9170FW_NAME,
+	err = request_firmware_nowait(THIS_MODULE, 1, CARL9170FW_NAME,
 		&ar->udev->dev, GFP_KERNEL, ar, carl9170_usb_firmware_step2);
+	if (err) {
+		usb_put_dev(udev);
+		usb_put_dev(udev);
+		carl9170_free(ar);
+	}
+	return err;
 }
 
 static void carl9170_usb_disconnect(struct usb_interface *intf)

diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c
index a1a69c5..650be79 100644
--- a/drivers/net/wireless/ath/dfs_pattern_detector.c
+++ b/drivers/net/wireless/ath/dfs_pattern_detector.c

@@ -73,9 +73,52 @@
 	.radar_types		= etsi_radar_ref_types_v15,
 };
 
-/* for now, we support ETSI radar types, FCC and JP are TODO */
+#define FCC_PATTERN(ID, WMIN, WMAX, PMIN, PMAX, PRF, PPB)	\
+{								\
+	ID, WIDTH_LOWER(WMIN), WIDTH_UPPER(WMAX),		\
+	PMIN - PRI_TOLERANCE,					\
+	PMAX * PRF + PRI_TOLERANCE, PRF, PPB * PRF,		\
+	PPB_THRESH(PPB), PRI_TOLERANCE,				\
+}
+
+static const struct radar_detector_specs fcc_radar_ref_types[] = {
+	FCC_PATTERN(0, 0, 1, 1428, 1428, 1, 18),
+	FCC_PATTERN(1, 0, 5, 150, 230, 1, 23),
+	FCC_PATTERN(2, 6, 10, 200, 500, 1, 16),
+	FCC_PATTERN(3, 11, 20, 200, 500, 1, 12),
+	FCC_PATTERN(4, 50, 100, 1000, 2000, 20, 1),
+	FCC_PATTERN(5, 0, 1, 333, 333, 1, 9),
+};
+
+static const struct radar_types fcc_radar_types = {
+	.region			= NL80211_DFS_FCC,
+	.num_radar_types	= ARRAY_SIZE(fcc_radar_ref_types),
+	.radar_types		= fcc_radar_ref_types,
+};
+
+#define JP_PATTERN FCC_PATTERN
+static const struct radar_detector_specs jp_radar_ref_types[] = {
+	JP_PATTERN(0, 0, 1, 1428, 1428, 1, 18),
+	JP_PATTERN(1, 2, 3, 3846, 3846, 1, 18),
+	JP_PATTERN(2, 0, 1, 1388, 1388, 1, 18),
+	JP_PATTERN(3, 1, 2, 4000, 4000, 1, 18),
+	JP_PATTERN(4, 0, 5, 150, 230, 1, 23),
+	JP_PATTERN(5, 6, 10, 200, 500, 1, 16),
+	JP_PATTERN(6, 11, 20, 200, 500, 1, 12),
+	JP_PATTERN(7, 50, 100, 1000, 2000, 20, 1),
+	JP_PATTERN(5, 0, 1, 333, 333, 1, 9),
+};
+
+static const struct radar_types jp_radar_types = {
+	.region			= NL80211_DFS_JP,
+	.num_radar_types	= ARRAY_SIZE(jp_radar_ref_types),
+	.radar_types		= jp_radar_ref_types,
+};
+
 static const struct radar_types *dfs_domains[] = {
 	&etsi_radar_types_v15,
+	&fcc_radar_types,
+	&jp_radar_types,
 };
 
 /**

diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 7bf0ef8..6398693 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c

@@ -2068,7 +2068,7 @@
 		if (!msg_ind)
 			goto nomem;
 		msg_ind->msg_len = len;
-		msg_ind->msg = kmalloc(len, GFP_KERNEL);
+		msg_ind->msg = kmemdup(buf, len, GFP_KERNEL);
 		if (!msg_ind->msg) {
 			kfree(msg_ind);
 nomem:
@@ -2080,7 +2080,6 @@
 				    msg_header->msg_type);
 			break;
 		}
-		memcpy(msg_ind->msg, buf, len);
 		mutex_lock(&wcn->hal_ind_mutex);
 		list_add_tail(&msg_ind->list, &wcn->hal_ind_queue);
 		queue_work(wcn->hal_ind_wq, &wcn->hal_ind_work);

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 4806a49..820d4eb 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c

@@ -172,7 +172,7 @@
 
 static int wil_cfg80211_get_station(struct wiphy *wiphy,
 				    struct net_device *ndev,
-				    u8 *mac, struct station_info *sinfo)
+				    const u8 *mac, struct station_info *sinfo)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	int rc;
@@ -288,6 +288,7 @@
 	}
 
 	wil->scan_request = request;
+	mod_timer(&wil->scan_timer, jiffies + WIL6210_SCAN_TO);
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd.num_channels = 0;
@@ -671,7 +672,7 @@
 }
 
 static int wil_cfg80211_del_station(struct wiphy *wiphy,
-				    struct net_device *dev, u8 *mac)
+				    struct net_device *dev, const u8 *mac)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 

diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index ecdabe4..8d4bc4b 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c

@@ -35,7 +35,7 @@
 	void __iomem *x = wmi_addr(wil, vring->hwtail);
 
 	seq_printf(s, "VRING %s = {\n", name);
-	seq_printf(s, "  pa     = 0x%016llx\n", (unsigned long long)vring->pa);
+	seq_printf(s, "  pa     = %pad\n", &vring->pa);
 	seq_printf(s, "  va     = 0x%p\n", vring->va);
 	seq_printf(s, "  size   = %d\n", vring->size);
 	seq_printf(s, "  swtail = %d\n", vring->swtail);
@@ -473,7 +473,7 @@
 			   u[0], u[1], u[2], u[3]);
 		seq_printf(s, "  DMA = 0x%08x 0x%08x 0x%08x 0x%08x\n",
 			   u[4], u[5], u[6], u[7]);
-		seq_printf(s, "  SKB = %p\n", skb);
+		seq_printf(s, "  SKB = 0x%p\n", skb);
 
 		if (skb) {
 			skb_get(skb);

diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index 5824cd4..73593aa 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c

@@ -338,7 +338,7 @@
 	}
 
 	if (isr)
-		wil_err(wil, "un-handled MISC ISR bits 0x%08x\n", isr);
+		wil_dbg_irq(wil, "un-handled MISC ISR bits 0x%08x\n", isr);
 
 	wil->isr_misc = 0;
 

diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 95f4efe..11e6d9d 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c

@@ -81,7 +81,7 @@
 	memset(&sta->stats, 0, sizeof(sta->stats));
 }
 
-static void _wil6210_disconnect(struct wil6210_priv *wil, void *bssid)
+static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid)
 {
 	int cid = -ENOENT;
 	struct net_device *ndev = wil_to_ndev(wil);
@@ -150,6 +150,15 @@
 	schedule_work(&wil->disconnect_worker);
 }
 
+static void wil_scan_timer_fn(ulong x)
+{
+	struct wil6210_priv *wil = (void *)x;
+
+	clear_bit(wil_status_fwready, &wil->status);
+	wil_err(wil, "Scan timeout detected, start fw error recovery\n");
+	schedule_work(&wil->fw_error_worker);
+}
+
 static void wil_fw_error_worker(struct work_struct *work)
 {
 	struct wil6210_priv *wil = container_of(work,
@@ -161,12 +170,30 @@
 	if (no_fw_recovery)
 		return;
 
+	/* increment @recovery_count if less then WIL6210_FW_RECOVERY_TO
+	 * passed since last recovery attempt
+	 */
+	if (time_is_after_jiffies(wil->last_fw_recovery +
+				  WIL6210_FW_RECOVERY_TO))
+		wil->recovery_count++;
+	else
+		wil->recovery_count = 1; /* fw was alive for a long time */
+
+	if (wil->recovery_count > WIL6210_FW_RECOVERY_RETRIES) {
+		wil_err(wil, "too many recovery attempts (%d), giving up\n",
+			wil->recovery_count);
+		return;
+	}
+
+	wil->last_fw_recovery = jiffies;
+
 	mutex_lock(&wil->mutex);
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_MONITOR:
-		wil_info(wil, "fw error recovery started...\n");
+		wil_info(wil, "fw error recovery started (try %d)...\n",
+			 wil->recovery_count);
 		wil_reset(wil);
 
 		/* need to re-allocate Rx ring after reset */
@@ -230,6 +257,7 @@
 
 	wil->pending_connect_cid = -1;
 	setup_timer(&wil->connect_timer, wil_connect_timer_fn, (ulong)wil);
+	setup_timer(&wil->scan_timer, wil_scan_timer_fn, (ulong)wil);
 
 	INIT_WORK(&wil->connect_worker, wil_connect_worker);
 	INIT_WORK(&wil->disconnect_worker, wil_disconnect_worker);
@@ -249,10 +277,12 @@
 		return -EAGAIN;
 	}
 
+	wil->last_fw_recovery = jiffies;
+
 	return 0;
 }
 
-void wil6210_disconnect(struct wil6210_priv *wil, void *bssid)
+void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid)
 {
 	del_timer_sync(&wil->connect_timer);
 	_wil6210_disconnect(wil, bssid);
@@ -260,6 +290,7 @@
 
 void wil_priv_deinit(struct wil6210_priv *wil)
 {
+	del_timer_sync(&wil->scan_timer);
 	cancel_work_sync(&wil->disconnect_worker);
 	cancel_work_sync(&wil->fw_error_worker);
 	mutex_lock(&wil->mutex);
@@ -363,8 +394,8 @@
 		wil_err(wil, "Firmware not ready\n");
 		return -ETIME;
 	} else {
-		wil_dbg_misc(wil, "FW ready after %d ms\n",
-			     jiffies_to_msecs(to-left));
+		wil_info(wil, "FW ready after %d ms. HW version 0x%08x\n",
+			 jiffies_to_msecs(to-left), wil->hw_version);
 	}
 	return 0;
 }
@@ -391,6 +422,7 @@
 	if (wil->scan_request) {
 		wil_dbg_misc(wil, "Abort scan_request 0x%p\n",
 			     wil->scan_request);
+		del_timer_sync(&wil->scan_timer);
 		cfg80211_scan_done(wil->scan_request, true);
 		wil->scan_request = NULL;
 	}
@@ -520,6 +552,7 @@
 	napi_disable(&wil->napi_tx);
 
 	if (wil->scan_request) {
+		del_timer_sync(&wil->scan_timer);
 		cfg80211_scan_done(wil->scan_request, true);
 		wil->scan_request = NULL;
 	}

diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index fdcaeb8..106b6dc 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c

@@ -32,12 +32,26 @@
 	return wil_down(wil);
 }
 
+static int wil_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct wil6210_priv *wil = ndev_to_wil(ndev);
+
+	if (new_mtu < 68 || new_mtu > IEEE80211_MAX_DATA_LEN_DMG)
+		return -EINVAL;
+
+	wil_dbg_misc(wil, "change MTU %d -> %d\n", ndev->mtu, new_mtu);
+	ndev->mtu = new_mtu;
+
+	return 0;
+}
+
 static const struct net_device_ops wil_netdev_ops = {
 	.ndo_open		= wil_open,
 	.ndo_stop		= wil_stop,
 	.ndo_start_xmit		= wil_start_xmit,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= wil_change_mtu,
 };
 
 static int wil6210_netdev_poll_rx(struct napi_struct *napi, int budget)

diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index f1e1bb3..1e2e07b 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c

@@ -74,8 +74,6 @@
 	if (rc)
 		goto release_irq;
 
-	wil_info(wil, "HW version: 0x%08x\n", wil->hw_version);
-
 	return 0;
 
  release_irq:
@@ -140,7 +138,7 @@
 		goto err_release_reg;
 	}
 	/* rollback to err_iounmap */
-	dev_info(&pdev->dev, "CSR at %pR -> %p\n", &pdev->resource[0], csr);
+	dev_info(&pdev->dev, "CSR at %pR -> 0x%p\n", &pdev->resource[0], csr);
 
 	wil = wil_if_alloc(dev, csr);
 	if (IS_ERR(wil)) {

diff --git a/drivers/net/wireless/ath/wil6210/rx_reorder.c b/drivers/net/wireless/ath/wil6210/rx_reorder.c
index d04629f..747ae12 100644
--- a/drivers/net/wireless/ath/wil6210/rx_reorder.c
+++ b/drivers/net/wireless/ath/wil6210/rx_reorder.c

@@ -49,10 +49,17 @@
 {
 	int index;
 
-	while (seq_less(r->head_seq_num, hseq)) {
+	/* note: this function is never called with
+	 * hseq preceding r->head_seq_num, i.e it is always true
+	 * !seq_less(hseq, r->head_seq_num)
+	 * and thus on loop exit it should be
+	 * r->head_seq_num == hseq
+	 */
+	while (seq_less(r->head_seq_num, hseq) && r->stored_mpdu_num) {
 		index = reorder_index(r, r->head_seq_num);
 		wil_release_reorder_frame(wil, r, index);
 	}
+	r->head_seq_num = hseq;
 }
 
 static void wil_reorder_release(struct wil6210_priv *wil,
@@ -91,6 +98,22 @@
 
 	spin_lock(&r->reorder_lock);
 
+	/** Due to the race between WMI events, where BACK establishment
+	 * reported, and data Rx, few packets may be pass up before reorder
+	 * buffer get allocated. Catch up by pretending SSN is what we
+	 * see in the 1-st Rx packet
+	 */
+	if (r->first_time) {
+		r->first_time = false;
+		if (seq != r->head_seq_num) {
+			wil_err(wil, "Error: 1-st frame with wrong sequence"
+				" %d, should be %d. Fixing...\n", seq,
+				r->head_seq_num);
+			r->head_seq_num = seq;
+			r->ssn = seq;
+		}
+	}
+
 	/* frame with out of date sequence number */
 	if (seq_less(seq, r->head_seq_num)) {
 		dev_kfree_skb(skb);
@@ -162,6 +185,7 @@
 	r->head_seq_num = ssn;
 	r->buf_size = size;
 	r->stored_mpdu_num = 0;
+	r->first_time = true;
 	return r;
 }
 

diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index c8c5474..0784ef3 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c

@@ -64,6 +64,22 @@
 	return vring->size - used - 1;
 }
 
+/**
+ * wil_vring_wmark_low - low watermark for available descriptor space
+ */
+static inline int wil_vring_wmark_low(struct vring *vring)
+{
+	return vring->size/8;
+}
+
+/**
+ * wil_vring_wmark_high - high watermark for available descriptor space
+ */
+static inline int wil_vring_wmark_high(struct vring *vring)
+{
+	return vring->size/4;
+}
+
 static int wil_vring_alloc(struct wil6210_priv *wil, struct vring *vring)
 {
 	struct device *dev = wil_to_dev(wil);
@@ -98,8 +114,8 @@
 		_d->dma.status = TX_DMA_STATUS_DU;
 	}
 
-	wil_dbg_misc(wil, "vring[%d] 0x%p:0x%016llx 0x%p\n", vring->size,
-		     vring->va, (unsigned long long)vring->pa, vring->ctx);
+	wil_dbg_misc(wil, "vring[%d] 0x%p:%pad 0x%p\n", vring->size,
+		     vring->va, &vring->pa, vring->ctx);
 
 	return 0;
 }
@@ -880,8 +896,8 @@
 	pa = dma_map_single(dev, skb->data,
 			skb_headlen(skb), DMA_TO_DEVICE);
 
-	wil_dbg_txrx(wil, "Tx skb %d bytes %p -> %#08llx\n", skb_headlen(skb),
-		     skb->data, (unsigned long long)pa);
+	wil_dbg_txrx(wil, "Tx skb %d bytes 0x%p -> %pad\n", skb_headlen(skb),
+		     skb->data, &pa);
 	wil_hex_dump_txrx("Tx ", DUMP_PREFIX_OFFSET, 16, 1,
 			  skb->data, skb_headlen(skb), false);
 
@@ -1007,7 +1023,7 @@
 	rc = wil_tx_vring(wil, vring, skb);
 
 	/* do we still have enough room in the vring? */
-	if (wil_vring_avail_tx(vring) < vring->size/8)
+	if (wil_vring_avail_tx(vring) < wil_vring_wmark_low(vring))
 		netif_tx_stop_all_queues(wil_to_ndev(wil));
 
 	switch (rc) {
@@ -1116,7 +1132,7 @@
 			done++;
 		}
 	}
-	if (wil_vring_avail_tx(vring) > vring->size/4)
+	if (wil_vring_avail_tx(vring) > wil_vring_wmark_high(vring))
 		netif_tx_wake_all_queues(wil_to_ndev(wil));
 
 	return done;

diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 2a2dec7..e25edc5 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h

@@ -35,11 +35,14 @@
 #define WIL6210_MEM_SIZE (2*1024*1024UL)
 
 #define WIL6210_RX_RING_SIZE	(128)
-#define WIL6210_TX_RING_SIZE	(128)
+#define WIL6210_TX_RING_SIZE	(512)
 #define WIL6210_MAX_TX_RINGS	(24) /* HW limit */
 #define WIL6210_MAX_CID		(8) /* HW limit */
 #define WIL6210_NAPI_BUDGET	(16) /* arbitrary */
 #define WIL6210_ITR_TRSH	(10000) /* arbitrary - about 15 IRQs/msec */
+#define WIL6210_FW_RECOVERY_RETRIES	(5) /* try to recover this many times */
+#define WIL6210_FW_RECOVERY_TO	msecs_to_jiffies(5000)
+#define WIL6210_SCAN_TO		msecs_to_jiffies(10000)
 
 /* Hardware definitions begin */
 
@@ -301,6 +304,7 @@
 	u16 buf_size;
 	u16 timeout;
 	u8 dialog_token;
+	bool first_time; /* is it 1-st time this buffer used? */
 };
 
 struct wil6210_stats {
@@ -360,6 +364,8 @@
 	u32 fw_version;
 	u32 hw_version;
 	u8 n_mids; /* number of additional MIDs as reported by FW */
+	int recovery_count; /* num of FW recovery attempts in a short time */
+	unsigned long last_fw_recovery; /* jiffies of last fw recovery */
 	/* profile */
 	u32 monitor_flags;
 	u32 secure_pcp; /* create secure PCP? */
@@ -381,6 +387,7 @@
 	struct work_struct disconnect_worker;
 	struct work_struct fw_error_worker;	/* for FW error recovery */
 	struct timer_list connect_timer;
+	struct timer_list scan_timer; /* detect scan timeout */
 	int pending_connect_cid;
 	struct list_head pending_wmi_ev;
 	/*
@@ -507,7 +514,7 @@
 int wmi_set_mac_address(struct wil6210_priv *wil, void *addr);
 int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype, u8 chan);
 int wmi_pcp_stop(struct wil6210_priv *wil);
-void wil6210_disconnect(struct wil6210_priv *wil, void *bssid);
+void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid);
 
 int wil_rx_init(struct wil6210_priv *wil);
 void wil_rx_fini(struct wil6210_priv *wil);

diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 2ba56ee..6cc0e18 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c

@@ -192,7 +192,7 @@
 	might_sleep();
 
 	if (!test_bit(wil_status_fwready, &wil->status)) {
-		wil_err(wil, "FW not ready\n");
+		wil_err(wil, "WMI: cannot send command while FW not ready\n");
 		return -EAGAIN;
 	}
 
@@ -276,8 +276,8 @@
 	wil->fw_version = le32_to_cpu(evt->sw_version);
 	wil->n_mids = evt->numof_additional_mids;
 
-	wil_dbg_wmi(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version,
-		    evt->mac, wil->n_mids);
+	wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version,
+		 evt->mac, wil->n_mids);
 
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		memcpy(ndev->dev_addr, evt->mac, ETH_ALEN);
@@ -290,7 +290,7 @@
 static void wmi_evt_fw_ready(struct wil6210_priv *wil, int id, void *d,
 			     int len)
 {
-	wil_dbg_wmi(wil, "WMI: FW ready\n");
+	wil_dbg_wmi(wil, "WMI: got FW ready event\n");
 
 	set_bit(wil_status_fwready, &wil->status);
 	/* reuse wmi_ready for the firmware ready indication */
@@ -348,9 +348,10 @@
 {
 	if (wil->scan_request) {
 		struct wmi_scan_complete_event *data = d;
-		bool aborted = (data->status != 0);
+		bool aborted = (data->status != WMI_SCAN_SUCCESS);
 
 		wil_dbg_wmi(wil, "SCAN_COMPLETE(0x%08x)\n", data->status);
+		del_timer_sync(&wil->scan_timer);
 		cfg80211_scan_done(wil->scan_request, aborted);
 		wil->scan_request = NULL;
 	} else {
@@ -658,21 +659,27 @@
 	u8 *cmd;
 	void __iomem *src;
 	ulong flags;
+	unsigned n;
 
 	if (!test_bit(wil_status_reset_done, &wil->status)) {
 		wil_err(wil, "Reset not completed\n");
 		return;
 	}
 
-	for (;;) {
+	for (n = 0;; n++) {
 		u16 len;
 
 		r->head = ioread32(wil->csr + HOST_MBOX +
 				   offsetof(struct wil6210_mbox_ctl, rx.head));
-		if (r->tail == r->head)
+		if (r->tail == r->head) {
+			if (n == 0)
+				wil_dbg_wmi(wil, "No events?\n");
 			return;
+		}
 
-		/* read cmd from tail */
+		wil_dbg_wmi(wil, "Mbox head %08x tail %08x\n",
+			    r->head, r->tail);
+		/* read cmd descriptor from tail */
 		wil_memcpy_fromio_32(&d_tail, wil->csr + HOSTADDR(r->tail),
 				     sizeof(struct wil6210_mbox_ring_desc));
 		if (d_tail.sync == 0) {
@@ -680,13 +687,18 @@
 			return;
 		}
 
+		/* read cmd header from descriptor */
 		if (0 != wmi_read_hdr(wil, d_tail.addr, &hdr)) {
 			wil_err(wil, "Mbox evt at 0x%08x?\n",
 				le32_to_cpu(d_tail.addr));
 			return;
 		}
-
 		len = le16_to_cpu(hdr.len);
+		wil_dbg_wmi(wil, "Mbox evt %04x %04x %04x %02x\n",
+			    le16_to_cpu(hdr.seq), len, le16_to_cpu(hdr.type),
+			    hdr.flags);
+
+		/* read cmd buffer from descriptor */
 		src = wmi_buffer(wil, d_tail.addr) +
 		      sizeof(struct wil6210_mbox_hdr);
 		evt = kmalloc(ALIGN(offsetof(struct pending_wmi_event,
@@ -702,9 +714,6 @@
 		iowrite32(0, wil->csr + HOSTADDR(r->tail) +
 			  offsetof(struct wil6210_mbox_ring_desc, sync));
 		/* indicate */
-		wil_dbg_wmi(wil, "Mbox evt %04x %04x %04x %02x\n",
-			    le16_to_cpu(hdr.seq), len, le16_to_cpu(hdr.type),
-			    hdr.flags);
 		if ((hdr.type == WIL_MBOX_HDR_TYPE_WMI) &&
 		    (len >= sizeof(struct wil6210_mbox_hdr_wmi))) {
 			struct wil6210_mbox_hdr_wmi *wmi = &evt->event.wmi;
@@ -734,6 +743,8 @@
 			wil_dbg_wmi(wil, "queue_work -> %d\n", q);
 		}
 	}
+	if (n > 1)
+		wil_dbg_wmi(wil, "%s -> %d events processed\n", __func__, n);
 }
 
 int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len,
@@ -802,6 +813,7 @@
 		.network_type = wmi_nettype,
 		.disable_sec_offload = 1,
 		.channel = chan - 1,
+		.pcp_max_assoc_sta = WIL6210_MAX_CID,
 	};
 	struct {
 		struct wil6210_mbox_hdr_wmi wmi;

diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h
index 50b8528..17334c8 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h

@@ -28,7 +28,7 @@
 #define __WILOCITY_WMI_H__
 
 /* General */
-
+#define WILOCITY_MAX_ASSOC_STA (8)
 #define WMI_MAC_LEN		(6)
 #define WMI_PROX_RANGE_NUM	(3)
 
@@ -219,15 +219,6 @@
 	__le16 disconnect_reason;
 } __packed;
 
-/*
- * WMI_RECONNECT_CMDID
- */
-struct wmi_reconnect_cmd {
-	u8 channel;			/* hint */
-	u8 reserved;
-	u8 bssid[WMI_MAC_LEN];		/* mandatory if set */
-} __packed;
-
 
 /*
  * WMI_SET_PMK_CMDID
@@ -296,11 +287,13 @@
 	WMI_LONG_SCAN		= 0,
 	WMI_SHORT_SCAN		= 1,
 	WMI_PBC_SCAN		= 2,
+	WMI_ACTIVE_SCAN		= 3,
+	WMI_DIRECT_SCAN		= 4,
 };
 
 struct wmi_start_scan_cmd {
-	u8 reserved[8];
-
+	u8 direct_scan_mac_addr[6];
+	u8 reserved[2];
 	__le32 home_dwell_time;	/* Max duration in the home channel(ms) */
 	__le32 force_scan_interval;	/* Time interval between scans (ms)*/
 	u8 scan_type;		/* wmi_scan_type */
@@ -332,6 +325,7 @@
 	u8 ssid[WMI_MAX_SSID_LEN];
 } __packed;
 
+
 /*
  * WMI_SET_APPIE_CMDID
  * Add Application specified IE to a management frame
@@ -427,7 +421,7 @@
 	__le16 frag_num;
 	__le64 ss_mask;
 	u8 network_type;
-	u8 reserved;
+	u8 pcp_max_assoc_sta;
 	u8 disable_sec_offload;
 	u8 disable_sec;
 } __packed;
@@ -450,7 +444,7 @@
 struct wmi_port_allocate_cmd {
 	u8 mac[WMI_MAC_LEN];
 	u8 port_role;
-	u8 midid;
+	u8 mid;
 } __packed;
 
 /*
@@ -467,6 +461,7 @@
 enum wmi_discovery_mode {
 	WMI_DISCOVERY_MODE_NON_OFFLOAD	= 0,
 	WMI_DISCOVERY_MODE_OFFLOAD	= 1,
+	WMI_DISCOVERY_MODE_PEER2PEER	= 2,
 };
 
 struct wmi_p2p_cfg_cmd {
@@ -493,7 +488,8 @@
  */
 struct wmi_pcp_start_cmd {
 	__le16 bcon_interval;
-	u8 reserved0[10];
+	u8 pcp_max_assoc_sta;
+	u8 reserved0[9];
 	u8 network_type;
 	u8 channel;
 	u8 disable_sec_offload;
@@ -857,6 +853,7 @@
 	WMI_RF_MGMT_STATUS_EVENTID		= 0x1853,
 	WMI_BF_SM_MGMT_DONE_EVENTID		= 0x1838,
 	WMI_RX_MGMT_PACKET_EVENTID		= 0x1840,
+	WMI_TX_MGMT_PACKET_EVENTID		= 0x1841,
 
 	/* Performance monitoring events */
 	WMI_DATA_PORT_OPEN_EVENTID		= 0x1860,
@@ -1040,16 +1037,23 @@
 struct wmi_disconnect_event {
 	__le16 protocol_reason_status;	/* reason code, see 802.11 spec. */
 	u8 bssid[WMI_MAC_LEN];		/* set if known */
-	u8 disconnect_reason;		/* see wmi_disconnect_reason_e */
-	u8 assoc_resp_len;
-	u8 assoc_info[0];
+	u8 disconnect_reason;		/* see wmi_disconnect_reason */
+	u8 assoc_resp_len;		/* not in use */
+	u8 assoc_info[0];		/* not in use */
 } __packed;
 
 /*
  * WMI_SCAN_COMPLETE_EVENTID
  */
+enum scan_status {
+	WMI_SCAN_SUCCESS	= 0,
+	WMI_SCAN_FAILED		= 1,
+	WMI_SCAN_ABORTED	= 2,
+	WMI_SCAN_REJECTED	= 3,
+};
+
 struct wmi_scan_complete_event {
-	__le32 status;
+	__le32 status;	/* scan_status */
 } __packed;
 
 /*
@@ -1256,6 +1260,14 @@
 	u8 channel;	/* From Radio MNGR */
 } __packed;
 
+
+/*
+ * WMI_TX_MGMT_PACKET_EVENTID
+ */
+struct wmi_tx_mgmt_packet_event {
+	u8 payload[0];
+} __packed;
+
 struct wmi_rx_mgmt_packet_event {
 	struct wmi_rx_mgmt_info info;
 	u8 payload[0];

diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index 088d544..e3f67b8 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig

@@ -1,7 +1,8 @@
 config B43
 	tristate "Broadcom 43xx wireless support (mac80211 stack)"
-	depends on SSB_POSSIBLE && MAC80211 && HAS_DMA
-	select SSB
+	depends on (BCMA_POSSIBLE || SSB_POSSIBLE) && MAC80211 && HAS_DMA
+	select BCMA if B43_BCMA
+	select SSB if B43_SSB
 	select FW_LOADER
 	---help---
 	  b43 is a driver for the Broadcom 43xx series wireless devices.
@@ -27,14 +28,33 @@
 	  If unsure, say M.
 
 config B43_BCMA
-	bool "Support for BCMA bus"
-	depends on B43 && (BCMA = y || BCMA = B43)
-	default y
+	bool
 
 config B43_SSB
 	bool
-	depends on B43 && (SSB = y || SSB = B43)
-	default y
+
+choice
+	prompt "Supported bus types"
+	depends on B43
+	default B43_BCMA_AND_SSB
+
+config B43_BUSES_BCMA_AND_SSB
+	bool "BCMA and SSB"
+	depends on BCMA_POSSIBLE && SSB_POSSIBLE
+	select B43_BCMA
+	select B43_SSB
+
+config B43_BUSES_BCMA
+	bool "BCMA only"
+	depends on BCMA_POSSIBLE
+	select B43_BCMA
+
+config B43_BUSES_SSB
+	bool "SSB only"
+	depends on SSB_POSSIBLE
+	select B43_SSB
+
+endchoice
 
 # Auto-select SSB PCI-HOST support, if possible
 config B43_PCI_AUTOSELECT
@@ -53,7 +73,7 @@
 
 config B43_PCMCIA
 	bool "Broadcom 43xx PCMCIA device support"
-	depends on B43 && SSB_PCMCIAHOST_POSSIBLE
+	depends on B43 && B43_SSB && SSB_PCMCIAHOST_POSSIBLE
 	select SSB_PCMCIAHOST
 	---help---
 	  Broadcom 43xx PCMCIA device support.
@@ -73,7 +93,7 @@
 
 config B43_SDIO
 	bool "Broadcom 43xx SDIO device support"
-	depends on B43 && SSB_SDIOHOST_POSSIBLE
+	depends on B43 && B43_SSB && SSB_SDIOHOST_POSSIBLE
 	select SSB_SDIOHOST
 	---help---
 	  Broadcom 43xx device support for Soft-MAC SDIO devices.
@@ -98,7 +118,7 @@
 
 config B43_PIO
 	bool
-	depends on B43
+	depends on B43 && B43_SSB
 	select SSB_BLOCKIO
 	default y
 
@@ -116,7 +136,7 @@
 
 config B43_PHY_LP
 	bool "Support for low-power (LP-PHY) devices"
-	depends on B43
+	depends on B43 && B43_SSB
 	default y
 	---help---
 	  Support for the LP-PHY.

diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h
index 54376fd..4113b69 100644
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h

@@ -915,10 +915,6 @@
 	char rng_name[30 + 1];
 #endif /* CONFIG_B43_HWRNG */
 
-	/* List of all wireless devices on this chip */
-	struct list_head devlist;
-	u8 nr_devs;
-
 	bool radiotap_enabled;
 	bool radio_enabled;
 

diff --git a/drivers/net/wireless/b43/bus.h b/drivers/net/wireless/b43/bus.h
index 184c956..f3205c6 100644
--- a/drivers/net/wireless/b43/bus.h
+++ b/drivers/net/wireless/b43/bus.h

@@ -5,7 +5,9 @@
 #ifdef CONFIG_B43_BCMA
 	B43_BUS_BCMA,
 #endif
+#ifdef CONFIG_B43_SSB
 	B43_BUS_SSB,
+#endif
 };
 
 struct b43_bus_dev {
@@ -52,13 +54,21 @@
 
 static inline bool b43_bus_host_is_pcmcia(struct b43_bus_dev *dev)
 {
+#ifdef CONFIG_B43_SSB
 	return (dev->bus_type == B43_BUS_SSB &&
 		dev->sdev->bus->bustype == SSB_BUSTYPE_PCMCIA);
+#else
+	return false;
+#endif
 }
 static inline bool b43_bus_host_is_sdio(struct b43_bus_dev *dev)
 {
+#ifdef CONFIG_B43_SSB
 	return (dev->bus_type == B43_BUS_SSB &&
 		dev->sdev->bus->bustype == SSB_BUSTYPE_SDIO);
+#else
+	return false;
+#endif
 }
 
 struct b43_bus_dev *b43_bus_dev_bcma_init(struct bcma_device *core);

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 69fc3d6..32538ac 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c

@@ -182,7 +182,7 @@
 #define b43_g_ratetable		(__b43_ratetable + 0)
 #define b43_g_ratetable_size	12
 
-#define CHAN4G(_channel, _freq, _flags) {			\
+#define CHAN2G(_channel, _freq, _flags) {			\
 	.band			= IEEE80211_BAND_2GHZ,		\
 	.center_freq		= (_freq),			\
 	.hw_value		= (_channel),			\
@@ -191,23 +191,31 @@
 	.max_power		= 30,				\
 }
 static struct ieee80211_channel b43_2ghz_chantable[] = {
-	CHAN4G(1, 2412, 0),
-	CHAN4G(2, 2417, 0),
-	CHAN4G(3, 2422, 0),
-	CHAN4G(4, 2427, 0),
-	CHAN4G(5, 2432, 0),
-	CHAN4G(6, 2437, 0),
-	CHAN4G(7, 2442, 0),
-	CHAN4G(8, 2447, 0),
-	CHAN4G(9, 2452, 0),
-	CHAN4G(10, 2457, 0),
-	CHAN4G(11, 2462, 0),
-	CHAN4G(12, 2467, 0),
-	CHAN4G(13, 2472, 0),
-	CHAN4G(14, 2484, 0),
+	CHAN2G(1, 2412, 0),
+	CHAN2G(2, 2417, 0),
+	CHAN2G(3, 2422, 0),
+	CHAN2G(4, 2427, 0),
+	CHAN2G(5, 2432, 0),
+	CHAN2G(6, 2437, 0),
+	CHAN2G(7, 2442, 0),
+	CHAN2G(8, 2447, 0),
+	CHAN2G(9, 2452, 0),
+	CHAN2G(10, 2457, 0),
+	CHAN2G(11, 2462, 0),
+	CHAN2G(12, 2467, 0),
+	CHAN2G(13, 2472, 0),
+	CHAN2G(14, 2484, 0),
 };
-#undef CHAN4G
+#undef CHAN2G
 
+#define CHAN4G(_channel, _flags) {				\
+	.band			= IEEE80211_BAND_5GHZ,		\
+	.center_freq		= 4000 + (5 * (_channel)),	\
+	.hw_value		= (_channel),			\
+	.flags			= (_flags),			\
+	.max_antenna_gain	= 0,				\
+	.max_power		= 30,				\
+}
 #define CHAN5G(_channel, _flags) {				\
 	.band			= IEEE80211_BAND_5GHZ,		\
 	.center_freq		= 5000 + (5 * (_channel)),	\
@@ -217,6 +225,18 @@
 	.max_power		= 30,				\
 }
 static struct ieee80211_channel b43_5ghz_nphy_chantable[] = {
+	CHAN4G(184, 0),		CHAN4G(186, 0),
+	CHAN4G(188, 0),		CHAN4G(190, 0),
+	CHAN4G(192, 0),		CHAN4G(194, 0),
+	CHAN4G(196, 0),		CHAN4G(198, 0),
+	CHAN4G(200, 0),		CHAN4G(202, 0),
+	CHAN4G(204, 0),		CHAN4G(206, 0),
+	CHAN4G(208, 0),		CHAN4G(210, 0),
+	CHAN4G(212, 0),		CHAN4G(214, 0),
+	CHAN4G(216, 0),		CHAN4G(218, 0),
+	CHAN4G(220, 0),		CHAN4G(222, 0),
+	CHAN4G(224, 0),		CHAN4G(226, 0),
+	CHAN4G(228, 0),
 	CHAN5G(32, 0),		CHAN5G(34, 0),
 	CHAN5G(36, 0),		CHAN5G(38, 0),
 	CHAN5G(40, 0),		CHAN5G(42, 0),
@@ -260,18 +280,7 @@
 	CHAN5G(170, 0),		CHAN5G(172, 0),
 	CHAN5G(174, 0),		CHAN5G(176, 0),
 	CHAN5G(178, 0),		CHAN5G(180, 0),
-	CHAN5G(182, 0),		CHAN5G(184, 0),
-	CHAN5G(186, 0),		CHAN5G(188, 0),
-	CHAN5G(190, 0),		CHAN5G(192, 0),
-	CHAN5G(194, 0),		CHAN5G(196, 0),
-	CHAN5G(198, 0),		CHAN5G(200, 0),
-	CHAN5G(202, 0),		CHAN5G(204, 0),
-	CHAN5G(206, 0),		CHAN5G(208, 0),
-	CHAN5G(210, 0),		CHAN5G(212, 0),
-	CHAN5G(214, 0),		CHAN5G(216, 0),
-	CHAN5G(218, 0),		CHAN5G(220, 0),
-	CHAN5G(222, 0),		CHAN5G(224, 0),
-	CHAN5G(226, 0),		CHAN5G(228, 0),
+	CHAN5G(182, 0),
 };
 
 static struct ieee80211_channel b43_5ghz_aphy_chantable[] = {
@@ -295,6 +304,7 @@
 	CHAN5G(208, 0),		CHAN5G(212, 0),
 	CHAN5G(216, 0),
 };
+#undef CHAN4G
 #undef CHAN5G
 
 static struct ieee80211_supported_band b43_band_5GHz_nphy = {
@@ -1175,18 +1185,7 @@
 	bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, flags);
 	udelay(2);
 
-	/* Take PHY out of reset */
-	flags = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
-	flags &= ~B43_BCMA_IOCTL_PHY_RESET;
-	flags |= BCMA_IOCTL_FGC;
-	bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, flags);
-	udelay(1);
-
-	/* Do not force clock anymore */
-	flags = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
-	flags &= ~BCMA_IOCTL_FGC;
-	bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, flags);
-	udelay(1);
+	b43_phy_take_out_of_reset(dev);
 }
 
 static void b43_bcma_wireless_core_reset(struct b43_wldev *dev, bool gmode)
@@ -1195,18 +1194,22 @@
 		  B43_BCMA_CLKCTLST_PHY_PLL_REQ;
 	u32 status = B43_BCMA_CLKCTLST_80211_PLL_ST |
 		     B43_BCMA_CLKCTLST_PHY_PLL_ST;
+	u32 flags;
 
-	b43_device_enable(dev, B43_BCMA_IOCTL_PHY_CLKEN);
+	flags = B43_BCMA_IOCTL_PHY_CLKEN;
+	if (gmode)
+		flags |= B43_BCMA_IOCTL_GMODE;
+	b43_device_enable(dev, flags);
+
 	bcma_core_set_clockmode(dev->dev->bdev, BCMA_CLKMODE_FAST);
 	b43_bcma_phy_reset(dev);
 	bcma_core_pll_ctl(dev->dev->bdev, req, status, true);
 }
 #endif
 
+#ifdef CONFIG_B43_SSB
 static void b43_ssb_wireless_core_reset(struct b43_wldev *dev, bool gmode)
 {
-	struct ssb_device *sdev = dev->dev->sdev;
-	u32 tmslow;
 	u32 flags = 0;
 
 	if (gmode)
@@ -1218,18 +1221,9 @@
 	b43_device_enable(dev, flags);
 	msleep(2);		/* Wait for the PLL to turn on. */
 
-	/* Now take the PHY out of Reset again */
-	tmslow = ssb_read32(sdev, SSB_TMSLOW);
-	tmslow |= SSB_TMSLOW_FGC;
-	tmslow &= ~B43_TMSLOW_PHYRESET;
-	ssb_write32(sdev, SSB_TMSLOW, tmslow);
-	ssb_read32(sdev, SSB_TMSLOW);	/* flush */
-	msleep(1);
-	tmslow &= ~SSB_TMSLOW_FGC;
-	ssb_write32(sdev, SSB_TMSLOW, tmslow);
-	ssb_read32(sdev, SSB_TMSLOW);	/* flush */
-	msleep(1);
+	b43_phy_take_out_of_reset(dev);
 }
+#endif
 
 void b43_wireless_core_reset(struct b43_wldev *dev, bool gmode)
 {
@@ -2704,32 +2698,37 @@
 	struct b43_firmware *fw = &dev->fw;
 	const struct b43_iv *ivals;
 	size_t count;
-	int err;
 
 	hdr = (const struct b43_fw_header *)(fw->initvals.data->data);
 	ivals = (const struct b43_iv *)(fw->initvals.data->data + hdr_len);
 	count = be32_to_cpu(hdr->size);
-	err = b43_write_initvals(dev, ivals, count,
+	return b43_write_initvals(dev, ivals, count,
 				 fw->initvals.data->size - hdr_len);
-	if (err)
-		goto out;
-	if (fw->initvals_band.data) {
-		hdr = (const struct b43_fw_header *)(fw->initvals_band.data->data);
-		ivals = (const struct b43_iv *)(fw->initvals_band.data->data + hdr_len);
-		count = be32_to_cpu(hdr->size);
-		err = b43_write_initvals(dev, ivals, count,
-					 fw->initvals_band.data->size - hdr_len);
-		if (err)
-			goto out;
-	}
-out:
+}
 
-	return err;
+static int b43_upload_initvals_band(struct b43_wldev *dev)
+{
+	const size_t hdr_len = sizeof(struct b43_fw_header);
+	const struct b43_fw_header *hdr;
+	struct b43_firmware *fw = &dev->fw;
+	const struct b43_iv *ivals;
+	size_t count;
+
+	if (!fw->initvals_band.data)
+		return 0;
+
+	hdr = (const struct b43_fw_header *)(fw->initvals_band.data->data);
+	ivals = (const struct b43_iv *)(fw->initvals_band.data->data + hdr_len);
+	count = be32_to_cpu(hdr->size);
+	return b43_write_initvals(dev, ivals, count,
+				  fw->initvals_band.data->size - hdr_len);
 }
 
 /* Initialize the GPIOs
  * http://bcm-specs.sipsolutions.net/GPIO
  */
+
+#ifdef CONFIG_B43_SSB
 static struct ssb_device *b43_ssb_gpio_dev(struct b43_wldev *dev)
 {
 	struct ssb_bus *bus = dev->dev->sdev->bus;
@@ -2740,10 +2739,13 @@
 	return bus->chipco.dev;
 #endif
 }
+#endif
 
 static int b43_gpio_init(struct b43_wldev *dev)
 {
+#ifdef CONFIG_B43_SSB
 	struct ssb_device *gpiodev;
+#endif
 	u32 mask, set;
 
 	b43_maskset32(dev, B43_MMIO_MACCTL, ~B43_MACCTL_GPOUTSMSK, 0);
@@ -2802,7 +2804,9 @@
 /* Turn off all GPIO stuff. Call this on module unload, for example. */
 static void b43_gpio_cleanup(struct b43_wldev *dev)
 {
+#ifdef CONFIG_B43_SSB
 	struct ssb_device *gpiodev;
+#endif
 
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
@@ -3086,6 +3090,10 @@
 	if (err)
 		goto err_gpio_clean;
 
+	err = b43_upload_initvals_band(dev);
+	if (err)
+		goto err_gpio_clean;
+
 	/* Turn the Analog on and initialize the PHY. */
 	phy->ops->switch_analog(dev, 1);
 	err = b43_phy_init(dev);
@@ -3685,37 +3693,6 @@
 	mutex_unlock(&wl->mutex);
 }
 
-static void b43_put_phy_into_reset(struct b43_wldev *dev)
-{
-	u32 tmp;
-
-	switch (dev->dev->bus_type) {
-#ifdef CONFIG_B43_BCMA
-	case B43_BUS_BCMA:
-		b43err(dev->wl,
-		       "Putting PHY into reset not supported on BCMA\n");
-		break;
-#endif
-#ifdef CONFIG_B43_SSB
-	case B43_BUS_SSB:
-		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
-		tmp &= ~B43_TMSLOW_GMODE;
-		tmp |= B43_TMSLOW_PHYRESET;
-		tmp |= SSB_TMSLOW_FGC;
-		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
-		msleep(1);
-
-		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
-		tmp &= ~SSB_TMSLOW_FGC;
-		tmp |= B43_TMSLOW_PHYRESET;
-		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
-		msleep(1);
-
-		break;
-#endif
-	}
-}
-
 static const char *band_to_string(enum ieee80211_band band)
 {
 	switch (band) {
@@ -3731,94 +3708,75 @@
 }
 
 /* Expects wl->mutex locked */
-static int b43_switch_band(struct b43_wl *wl, struct ieee80211_channel *chan)
+static int b43_switch_band(struct b43_wldev *dev,
+			   struct ieee80211_channel *chan)
 {
-	struct b43_wldev *up_dev = NULL;
-	struct b43_wldev *down_dev;
-	struct b43_wldev *d;
-	int err;
-	bool uninitialized_var(gmode);
-	int prev_status;
+	struct b43_phy *phy = &dev->phy;
+	bool gmode;
+	u32 tmp;
 
-	/* Find a device and PHY which supports the band. */
-	list_for_each_entry(d, &wl->devlist, list) {
-		switch (chan->band) {
-		case IEEE80211_BAND_5GHZ:
-			if (d->phy.supports_5ghz) {
-				up_dev = d;
-				gmode = false;
-			}
-			break;
-		case IEEE80211_BAND_2GHZ:
-			if (d->phy.supports_2ghz) {
-				up_dev = d;
-				gmode = true;
-			}
-			break;
-		default:
-			B43_WARN_ON(1);
-			return -EINVAL;
-		}
-		if (up_dev)
-			break;
+	switch (chan->band) {
+	case IEEE80211_BAND_5GHZ:
+		gmode = false;
+		break;
+	case IEEE80211_BAND_2GHZ:
+		gmode = true;
+		break;
+	default:
+		B43_WARN_ON(1);
+		return -EINVAL;
 	}
-	if (!up_dev) {
-		b43err(wl, "Could not find a device for %s-GHz band operation\n",
+
+	if (!((gmode && phy->supports_2ghz) ||
+	      (!gmode && phy->supports_5ghz))) {
+		b43err(dev->wl, "This device doesn't support %s-GHz band\n",
 		       band_to_string(chan->band));
 		return -ENODEV;
 	}
-	if ((up_dev == wl->current_dev) &&
-	    (!!wl->current_dev->phy.gmode == !!gmode)) {
+
+	if (!!phy->gmode == !!gmode) {
 		/* This device is already running. */
 		return 0;
 	}
-	b43dbg(wl, "Switching to %s-GHz band\n",
+
+	b43dbg(dev->wl, "Switching to %s GHz band\n",
 	       band_to_string(chan->band));
-	down_dev = wl->current_dev;
 
-	prev_status = b43_status(down_dev);
-	/* Shutdown the currently running core. */
-	if (prev_status >= B43_STAT_STARTED)
-		down_dev = b43_wireless_core_stop(down_dev);
-	if (prev_status >= B43_STAT_INITIALIZED)
-		b43_wireless_core_exit(down_dev);
+	/* Some new devices don't need disabling radio for band switching */
+	if (!(phy->type == B43_PHYTYPE_N && phy->rev >= 3))
+		b43_software_rfkill(dev, true);
 
-	if (down_dev != up_dev) {
-		/* We switch to a different core, so we put PHY into
-		 * RESET on the old core. */
-		b43_put_phy_into_reset(down_dev);
+	phy->gmode = gmode;
+	b43_phy_put_into_reset(dev);
+	switch (dev->dev->bus_type) {
+#ifdef CONFIG_B43_BCMA
+	case B43_BUS_BCMA:
+		tmp = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
+		if (gmode)
+			tmp |= B43_BCMA_IOCTL_GMODE;
+		else
+			tmp &= ~B43_BCMA_IOCTL_GMODE;
+		bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, tmp);
+		break;
+#endif
+#ifdef CONFIG_B43_SSB
+	case B43_BUS_SSB:
+		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
+		if (gmode)
+			tmp |= B43_TMSLOW_GMODE;
+		else
+			tmp &= ~B43_TMSLOW_GMODE;
+		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
+		break;
+#endif
 	}
+	b43_phy_take_out_of_reset(dev);
 
-	/* Now start the new core. */
-	up_dev->phy.gmode = gmode;
-	if (prev_status >= B43_STAT_INITIALIZED) {
-		err = b43_wireless_core_init(up_dev);
-		if (err) {
-			b43err(wl, "Fatal: Could not initialize device for "
-			       "selected %s-GHz band\n",
-			       band_to_string(chan->band));
-			goto init_failure;
-		}
-	}
-	if (prev_status >= B43_STAT_STARTED) {
-		err = b43_wireless_core_start(up_dev);
-		if (err) {
-			b43err(wl, "Fatal: Could not start device for "
-			       "selected %s-GHz band\n",
-			       band_to_string(chan->band));
-			b43_wireless_core_exit(up_dev);
-			goto init_failure;
-		}
-	}
-	B43_WARN_ON(b43_status(up_dev) != prev_status);
+	b43_upload_initvals_band(dev);
 
-	wl->current_dev = up_dev;
+	b43_phy_init(dev);
 
 	return 0;
-init_failure:
-	/* Whoops, failed to init the new core. No core is operating now. */
-	wl->current_dev = NULL;
-	return err;
 }
 
 /* Write the short and long frame retry limit values. */
@@ -3851,8 +3809,10 @@
 
 	dev = wl->current_dev;
 
+	b43_mac_suspend(dev);
+
 	/* Switch the band (if necessary). This might change the active core. */
-	err = b43_switch_band(wl, conf->chandef.chan);
+	err = b43_switch_band(dev, conf->chandef.chan);
 	if (err)
 		goto out_unlock_mutex;
 
@@ -3871,8 +3831,6 @@
 	else
 		phy->is_40mhz = false;
 
-	b43_mac_suspend(dev);
-
 	if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS)
 		b43_set_retry_limits(dev, conf->short_frame_max_tx_count,
 					  conf->long_frame_max_tx_count);
@@ -4582,8 +4540,12 @@
 	struct ssb_bus *bus;
 	u32 tmp;
 
+#ifdef CONFIG_B43_SSB
 	if (dev->dev->bus_type != B43_BUS_SSB)
 		return;
+#else
+	return;
+#endif
 
 	bus = dev->dev->sdev->bus;
 
@@ -4738,7 +4700,7 @@
 	}
 	if (sprom->boardflags_lo & B43_BFL_XTAL_NOSLOW)
 		hf |= B43_HF_DSCRQ; /* Disable slowclock requests from ucode. */
-#ifdef CONFIG_SSB_DRIVER_PCICORE
+#if defined(CONFIG_B43_SSB) && defined(CONFIG_SSB_DRIVER_PCICORE)
 	if (dev->dev->bus_type == B43_BUS_SSB &&
 	    dev->dev->sdev->bus->bustype == SSB_BUSTYPE_PCI &&
 	    dev->dev->sdev->bus->pcicore.dev->id.revision <= 10)
@@ -5129,10 +5091,82 @@
 	b43_phy_free(dev);
 }
 
+static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy,
+				bool *have_5ghz_phy)
+{
+	u16 dev_id = 0;
+
+#ifdef CONFIG_B43_BCMA
+	if (dev->dev->bus_type == B43_BUS_BCMA &&
+	    dev->dev->bdev->bus->hosttype == BCMA_HOSTTYPE_PCI)
+		dev_id = dev->dev->bdev->bus->host_pci->device;
+#endif
+#ifdef CONFIG_B43_SSB
+	if (dev->dev->bus_type == B43_BUS_SSB &&
+	    dev->dev->sdev->bus->bustype == SSB_BUSTYPE_PCI)
+		dev_id = dev->dev->sdev->bus->host_pci->device;
+#endif
+	/* Override with SPROM value if available */
+	if (dev->dev->bus_sprom->dev_id)
+		dev_id = dev->dev->bus_sprom->dev_id;
+
+	/* Note: below IDs can be "virtual" (not maching e.g. real PCI ID) */
+	switch (dev_id) {
+	case 0x4324: /* BCM4306 */
+	case 0x4312: /* BCM4311 */
+	case 0x4319: /* BCM4318 */
+	case 0x4328: /* BCM4321 */
+	case 0x432b: /* BCM4322 */
+	case 0x4350: /* BCM43222 */
+	case 0x4353: /* BCM43224 */
+	case 0x0576: /* BCM43224 */
+	case 0x435f: /* BCM6362 */
+	case 0x4331: /* BCM4331 */
+	case 0x4359: /* BCM43228 */
+	case 0x43a0: /* BCM4360 */
+	case 0x43b1: /* BCM4352 */
+		/* Dual band devices */
+		*have_2ghz_phy = true;
+		*have_5ghz_phy = true;
+		return;
+	case 0x4321: /* BCM4306 */
+	case 0x4313: /* BCM4311 */
+	case 0x431a: /* BCM4318 */
+	case 0x432a: /* BCM4321 */
+	case 0x432d: /* BCM4322 */
+	case 0x4352: /* BCM43222 */
+	case 0x4333: /* BCM4331 */
+	case 0x43a2: /* BCM4360 */
+	case 0x43b3: /* BCM4352 */
+		/* 5 GHz only devices */
+		*have_2ghz_phy = false;
+		*have_5ghz_phy = true;
+		return;
+	}
+
+	/* As a fallback, try to guess using PHY type */
+	switch (dev->phy.type) {
+	case B43_PHYTYPE_A:
+		*have_2ghz_phy = false;
+		*have_5ghz_phy = true;
+		return;
+	case B43_PHYTYPE_G:
+	case B43_PHYTYPE_N:
+	case B43_PHYTYPE_LP:
+	case B43_PHYTYPE_HT:
+	case B43_PHYTYPE_LCN:
+		*have_2ghz_phy = true;
+		*have_5ghz_phy = false;
+		return;
+	}
+
+	B43_WARN_ON(1);
+}
+
 static int b43_wireless_core_attach(struct b43_wldev *dev)
 {
 	struct b43_wl *wl = dev->wl;
-	struct pci_dev *pdev = NULL;
+	struct b43_phy *phy = &dev->phy;
 	int err;
 	u32 tmp;
 	bool have_2ghz_phy = false, have_5ghz_phy = false;
@@ -5144,19 +5178,15 @@
 	 * that in core_init(), too.
 	 */
 
-#ifdef CONFIG_B43_SSB
-	if (dev->dev->bus_type == B43_BUS_SSB &&
-	    dev->dev->sdev->bus->bustype == SSB_BUSTYPE_PCI)
-		pdev = dev->dev->sdev->bus->host_pci;
-#endif
-
 	err = b43_bus_powerup(dev, 0);
 	if (err) {
 		b43err(wl, "Bus powerup failed\n");
 		goto out;
 	}
 
-	/* Get the PHY type. */
+	phy->do_full_init = true;
+
+	/* Try to guess supported bands for the first init needs */
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
@@ -5178,51 +5208,31 @@
 	}
 
 	dev->phy.gmode = have_2ghz_phy;
-	dev->phy.radio_on = true;
 	b43_wireless_core_reset(dev, dev->phy.gmode);
 
+	/* Get the PHY type. */
 	err = b43_phy_versioning(dev);
 	if (err)
 		goto err_powerdown;
-	/* Check if this device supports multiband. */
-	if (!pdev ||
-	    (pdev->device != 0x4312 &&
-	     pdev->device != 0x4319 && pdev->device != 0x4324)) {
-		/* No multiband support. */
-		have_2ghz_phy = false;
+
+	/* Get real info about supported bands */
+	b43_supported_bands(dev, &have_2ghz_phy, &have_5ghz_phy);
+
+	/* We don't support 5 GHz on some PHYs yet */
+	switch (dev->phy.type) {
+	case B43_PHYTYPE_A:
+	case B43_PHYTYPE_N:
+	case B43_PHYTYPE_LP:
+	case B43_PHYTYPE_HT:
+		b43warn(wl, "5 GHz band is unsupported on this PHY\n");
 		have_5ghz_phy = false;
-		switch (dev->phy.type) {
-		case B43_PHYTYPE_A:
-			have_5ghz_phy = true;
-			break;
-		case B43_PHYTYPE_LP: //FIXME not always!
-#if 0 //FIXME enabling 5GHz causes a NULL pointer dereference
-			have_5ghz_phy = 1;
-#endif
-		case B43_PHYTYPE_G:
-		case B43_PHYTYPE_N:
-		case B43_PHYTYPE_HT:
-		case B43_PHYTYPE_LCN:
-			have_2ghz_phy = true;
-			break;
-		default:
-			B43_WARN_ON(1);
-		}
 	}
-	if (dev->phy.type == B43_PHYTYPE_A) {
-		/* FIXME */
-		b43err(wl, "IEEE 802.11a devices are unsupported\n");
+
+	if (!have_2ghz_phy && !have_5ghz_phy) {
+		b43err(wl, "b43 can't support any band on this device\n");
 		err = -EOPNOTSUPP;
 		goto err_powerdown;
 	}
-	if (1 /* disable A-PHY */) {
-		/* FIXME: For now we disable the A-PHY on multi-PHY devices. */
-		if (dev->phy.type != B43_PHYTYPE_N &&
-		    dev->phy.type != B43_PHYTYPE_LP) {
-			have_2ghz_phy = true;
-			have_5ghz_phy = false;
-		}
-	}
 
 	err = b43_phy_allocate(dev);
 	if (err)
@@ -5270,7 +5280,6 @@
 	b43_debugfs_remove_device(wldev);
 	b43_wireless_core_detach(wldev);
 	list_del(&wldev->list);
-	wl->nr_devs--;
 	b43_bus_set_wldev(dev, NULL);
 	kfree(wldev);
 }
@@ -5295,8 +5304,6 @@
 	if (err)
 		goto err_kfree_wldev;
 
-	list_add(&wldev->list, &wl->devlist);
-	wl->nr_devs++;
 	b43_bus_set_wldev(dev, wldev);
 	b43_debugfs_add_device(wldev);
 
@@ -5314,6 +5321,7 @@
 	(pdev->subsystem_vendor == PCI_VENDOR_ID_##_subvendor) &&	\
 	(pdev->subsystem_device == _subdevice)				)
 
+#ifdef CONFIG_B43_SSB
 static void b43_sprom_fixup(struct ssb_bus *bus)
 {
 	struct pci_dev *pdev;
@@ -5345,6 +5353,7 @@
 	ssb_set_devtypedata(dev->sdev, NULL);
 	ieee80211_free_hw(hw);
 }
+#endif
 
 static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev)
 {
@@ -5386,7 +5395,6 @@
 	wl->hw = hw;
 	mutex_init(&wl->mutex);
 	spin_lock_init(&wl->hardirq_lock);
-	INIT_LIST_HEAD(&wl->devlist);
 	INIT_WORK(&wl->beacon_update_trigger, b43_beacon_update_trigger_work);
 	INIT_WORK(&wl->txpower_adjust_work, b43_phy_txpower_adjust_work);
 	INIT_WORK(&wl->tx_work, b43_tx_work);
@@ -5486,39 +5494,42 @@
 	struct b43_bus_dev *dev;
 	struct b43_wl *wl;
 	int err;
-	int first = 0;
 
 	dev = b43_bus_dev_ssb_init(sdev);
 	if (!dev)
 		return -ENOMEM;
 
 	wl = ssb_get_devtypedata(sdev);
-	if (!wl) {
-		/* Probing the first core. Must setup common struct b43_wl */
-		first = 1;
-		b43_sprom_fixup(sdev->bus);
-		wl = b43_wireless_init(dev);
-		if (IS_ERR(wl)) {
-			err = PTR_ERR(wl);
-			goto out;
-		}
-		ssb_set_devtypedata(sdev, wl);
-		B43_WARN_ON(ssb_get_devtypedata(sdev) != wl);
+	if (wl) {
+		b43err(NULL, "Dual-core devices are not supported\n");
+		err = -ENOTSUPP;
+		goto err_ssb_kfree_dev;
 	}
+
+	b43_sprom_fixup(sdev->bus);
+
+	wl = b43_wireless_init(dev);
+	if (IS_ERR(wl)) {
+		err = PTR_ERR(wl);
+		goto err_ssb_kfree_dev;
+	}
+	ssb_set_devtypedata(sdev, wl);
+	B43_WARN_ON(ssb_get_devtypedata(sdev) != wl);
+
 	err = b43_one_core_attach(dev, wl);
 	if (err)
-		goto err_wireless_exit;
+		goto err_ssb_wireless_exit;
 
 	/* setup and start work to load firmware */
 	INIT_WORK(&wl->firmware_load, b43_request_firmware);
 	schedule_work(&wl->firmware_load);
 
-      out:
 	return err;
 
-      err_wireless_exit:
-	if (first)
-		b43_wireless_exit(dev, wl);
+err_ssb_wireless_exit:
+	b43_wireless_exit(dev, wl);
+err_ssb_kfree_dev:
+	kfree(dev);
 	return err;
 }
 
@@ -5546,13 +5557,8 @@
 	/* Unregister HW RNG driver */
 	b43_rng_exit(wl);
 
-	if (list_empty(&wl->devlist)) {
-		b43_leds_unregister(wl);
-		/* Last core on the chip unregistered.
-		 * We can destroy common struct b43_wl.
-		 */
-		b43_wireless_exit(dev, wl);
-	}
+	b43_leds_unregister(wl);
+	b43_wireless_exit(dev, wl);
 }
 
 static struct ssb_driver b43_ssb_driver = {

diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c
index dbaa518..08244b3 100644
--- a/drivers/net/wireless/b43/phy_common.c
+++ b/drivers/net/wireless/b43/phy_common.c

@@ -96,12 +96,16 @@
 
 	phy->channel = ops->get_default_chan(dev);
 
-	ops->software_rfkill(dev, false);
+	phy->ops->switch_analog(dev, true);
+	b43_software_rfkill(dev, false);
+
 	err = ops->init(dev);
 	if (err) {
 		b43err(dev->wl, "PHY init failed\n");
 		goto err_block_rf;
 	}
+	phy->do_full_init = false;
+
 	/* Make sure to switch hardware and firmware (SHM) to
 	 * the default channel. */
 	err = b43_switch_channel(dev, ops->get_default_chan(dev));
@@ -113,10 +117,11 @@
 	return 0;
 
 err_phy_exit:
+	phy->do_full_init = true;
 	if (ops->exit)
 		ops->exit(dev);
 err_block_rf:
-	ops->software_rfkill(dev, true);
+	b43_software_rfkill(dev, true);
 
 	return err;
 }
@@ -125,7 +130,8 @@
 {
 	const struct b43_phy_operations *ops = dev->phy.ops;
 
-	ops->software_rfkill(dev, true);
+	b43_software_rfkill(dev, true);
+	dev->phy.do_full_init = true;
 	if (ops->exit)
 		ops->exit(dev);
 }
@@ -312,6 +318,90 @@
 	}
 }
 
+void b43_phy_put_into_reset(struct b43_wldev *dev)
+{
+	u32 tmp;
+
+	switch (dev->dev->bus_type) {
+#ifdef CONFIG_B43_BCMA
+	case B43_BUS_BCMA:
+		tmp = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
+		tmp &= ~B43_BCMA_IOCTL_GMODE;
+		tmp |= B43_BCMA_IOCTL_PHY_RESET;
+		tmp |= BCMA_IOCTL_FGC;
+		bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, tmp);
+		udelay(1);
+
+		tmp = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
+		tmp &= ~BCMA_IOCTL_FGC;
+		bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, tmp);
+		udelay(1);
+		break;
+#endif
+#ifdef CONFIG_B43_SSB
+	case B43_BUS_SSB:
+		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
+		tmp &= ~B43_TMSLOW_GMODE;
+		tmp |= B43_TMSLOW_PHYRESET;
+		tmp |= SSB_TMSLOW_FGC;
+		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
+		usleep_range(1000, 2000);
+
+		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
+		tmp &= ~SSB_TMSLOW_FGC;
+		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
+		usleep_range(1000, 2000);
+
+		break;
+#endif
+	}
+}
+
+void b43_phy_take_out_of_reset(struct b43_wldev *dev)
+{
+	u32 tmp;
+
+	switch (dev->dev->bus_type) {
+#ifdef CONFIG_B43_BCMA
+	case B43_BUS_BCMA:
+		/* Unset reset bit (with forcing clock) */
+		tmp = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
+		tmp &= ~B43_BCMA_IOCTL_PHY_RESET;
+		tmp &= ~B43_BCMA_IOCTL_PHY_CLKEN;
+		tmp |= BCMA_IOCTL_FGC;
+		bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, tmp);
+		udelay(1);
+
+		/* Do not force clock anymore */
+		tmp = bcma_aread32(dev->dev->bdev, BCMA_IOCTL);
+		tmp &= ~BCMA_IOCTL_FGC;
+		tmp |= B43_BCMA_IOCTL_PHY_CLKEN;
+		bcma_awrite32(dev->dev->bdev, BCMA_IOCTL, tmp);
+		udelay(1);
+		break;
+#endif
+#ifdef CONFIG_B43_SSB
+	case B43_BUS_SSB:
+		/* Unset reset bit (with forcing clock) */
+		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
+		tmp &= ~B43_TMSLOW_PHYRESET;
+		tmp &= ~B43_TMSLOW_PHYCLKEN;
+		tmp |= SSB_TMSLOW_FGC;
+		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
+		ssb_read32(dev->dev->sdev, SSB_TMSLOW); /* flush */
+		usleep_range(1000, 2000);
+
+		tmp = ssb_read32(dev->dev->sdev, SSB_TMSLOW);
+		tmp &= ~SSB_TMSLOW_FGC;
+		tmp |= B43_TMSLOW_PHYCLKEN;
+		ssb_write32(dev->dev->sdev, SSB_TMSLOW, tmp);
+		ssb_read32(dev->dev->sdev, SSB_TMSLOW); /* flush */
+		usleep_range(1000, 2000);
+		break;
+#endif
+	}
+}
+
 int b43_switch_channel(struct b43_wldev *dev, unsigned int new_channel)
 {
 	struct b43_phy *phy = &(dev->phy);

diff --git a/drivers/net/wireless/b43/phy_common.h b/drivers/net/wireless/b43/phy_common.h
index f1b9993..4ad6240 100644
--- a/drivers/net/wireless/b43/phy_common.h
+++ b/drivers/net/wireless/b43/phy_common.h

@@ -231,9 +231,12 @@
 	/* HT info */
 	bool is_40mhz;
 
-	/* GMODE bit enabled? */
+	/* Is GMODE (2 GHz mode) bit enabled? */
 	bool gmode;
 
+	/* After power reset full init has to be performed */
+	bool do_full_init;
+
 	/* Analog Type */
 	u8 analog;
 	/* B43_PHYTYPE_ */
@@ -390,6 +393,9 @@
  */
 void b43_phy_unlock(struct b43_wldev *dev);
 
+void b43_phy_put_into_reset(struct b43_wldev *dev);
+void b43_phy_take_out_of_reset(struct b43_wldev *dev);
+
 /**
  * b43_switch_channel - Switch to another channel
  */

diff --git a/drivers/net/wireless/b43/phy_g.c b/drivers/net/wireless/b43/phy_g.c
index 12f467b..8f5c14b 100644
--- a/drivers/net/wireless/b43/phy_g.c
+++ b/drivers/net/wireless/b43/phy_g.c

@@ -1587,6 +1587,7 @@
 	b43_write16(dev, 0x03E4, (b43_read16(dev, 0x03E4) & 0xFFC0) | 0x0004);
 }
 
+/* http://bcm-v4.sipsolutions.net/802.11/PHY/Init/B6 */
 static void b43_phy_initb6(struct b43_wldev *dev)
 {
 	struct b43_phy *phy = &dev->phy;
@@ -1670,7 +1671,7 @@
 		b43_radio_write16(dev, 0x50, 0x20);
 	}
 	if (phy->radio_rev <= 2) {
-		b43_radio_write16(dev, 0x7C, 0x20);
+		b43_radio_write16(dev, 0x50, 0x20);
 		b43_radio_write16(dev, 0x5A, 0x70);
 		b43_radio_write16(dev, 0x5B, 0x7B);
 		b43_radio_write16(dev, 0x5C, 0xB0);
@@ -1686,9 +1687,8 @@
 		b43_phy_write(dev, 0x2A, 0x8AC0);
 	b43_phy_write(dev, 0x0038, 0x0668);
 	b43_set_txpower_g(dev, &gphy->bbatt, &gphy->rfatt, gphy->tx_control);
-	if (phy->radio_rev <= 5) {
+	if (phy->radio_rev == 4 || phy->radio_rev == 5)
 		b43_phy_maskset(dev, 0x5D, 0xFF80, 0x0003);
-	}
 	if (phy->radio_rev <= 2)
 		b43_radio_write16(dev, 0x005D, 0x000D);
 

diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index 24ccbe9..86569f6 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c

@@ -257,6 +257,72 @@
 	}
 }
 
+static void b43_nphy_rf_ctl_intc_override_rev7(struct b43_wldev *dev,
+					       enum n_intc_override intc_override,
+					       u16 value, u8 core_sel)
+{
+	u16 reg, tmp, tmp2, val;
+	int core;
+
+	for (core = 0; core < 2; core++) {
+		if ((core_sel == 1 && core != 0) ||
+		    (core_sel == 2 && core != 1))
+			continue;
+
+		reg = (core == 0) ? B43_NPHY_RFCTL_INTC1 : B43_NPHY_RFCTL_INTC2;
+
+		switch (intc_override) {
+		case N_INTC_OVERRIDE_OFF:
+			b43_phy_write(dev, reg, 0);
+			b43_nphy_force_rf_sequence(dev, B43_RFSEQ_RESET2RX);
+			break;
+		case N_INTC_OVERRIDE_TRSW:
+			b43_phy_maskset(dev, reg, ~0xC0, value << 6);
+			b43_phy_set(dev, reg, 0x400);
+
+			b43_phy_mask(dev, 0x2ff, ~0xC000 & 0xFFFF);
+			b43_phy_set(dev, 0x2ff, 0x2000);
+			b43_phy_set(dev, 0x2ff, 0x0001);
+			break;
+		case N_INTC_OVERRIDE_PA:
+			tmp = 0x0030;
+			if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ)
+				val = value << 5;
+			else
+				val = value << 4;
+			b43_phy_maskset(dev, reg, ~tmp, val);
+			b43_phy_set(dev, reg, 0x1000);
+			break;
+		case N_INTC_OVERRIDE_EXT_LNA_PU:
+			if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
+				tmp = 0x0001;
+				tmp2 = 0x0004;
+				val = value;
+			} else {
+				tmp = 0x0004;
+				tmp2 = 0x0001;
+				val = value << 2;
+			}
+			b43_phy_maskset(dev, reg, ~tmp, val);
+			b43_phy_mask(dev, reg, ~tmp2);
+			break;
+		case N_INTC_OVERRIDE_EXT_LNA_GAIN:
+			if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
+				tmp = 0x0002;
+				tmp2 = 0x0008;
+				val = value << 1;
+			} else {
+				tmp = 0x0008;
+				tmp2 = 0x0002;
+				val = value << 3;
+			}
+			b43_phy_maskset(dev, reg, ~tmp, val);
+			b43_phy_mask(dev, reg, ~tmp2);
+			break;
+		}
+	}
+}
+
 /* http://bcm-v4.sipsolutions.net/802.11/PHY/N/RFCtrlIntcOverride */
 static void b43_nphy_rf_ctl_intc_override(struct b43_wldev *dev,
 					  enum n_intc_override intc_override,
@@ -265,6 +331,12 @@
 	u8 i, j;
 	u16 reg, tmp, val;
 
+	if (dev->phy.rev >= 7) {
+		b43_nphy_rf_ctl_intc_override_rev7(dev, intc_override, value,
+						   core);
+		return;
+	}
+
 	B43_WARN_ON(dev->phy.rev < 3);
 
 	for (i = 0; i < 2; i++) {
@@ -419,7 +491,8 @@
 		static const u16 clip[] = { 0xFFFF, 0xFFFF };
 		if (nphy->deaf_count++ == 0) {
 			nphy->classifier_state = b43_nphy_classifier(dev, 0, 0);
-			b43_nphy_classifier(dev, 0x7, 0);
+			b43_nphy_classifier(dev, 0x7,
+					    B43_NPHY_CLASSCTL_WAITEDEN);
 			b43_nphy_read_clip_detection(dev, nphy->clip_state);
 			b43_nphy_write_clip_detection(dev, clip);
 		}
@@ -627,13 +700,11 @@
 	b43_radio_mask(dev, R2057_RFPLL_MISC_CAL_RESETN, ~0x78);
 	b43_radio_mask(dev, R2057_XTAL_CONFIG2, ~0x80);
 
-	if (dev->phy.n->init_por) {
+	if (dev->phy.do_full_init) {
 		b43_radio_2057_rcal(dev);
 		b43_radio_2057_rccal(dev);
 	}
 	b43_radio_mask(dev, R2057_RFPLL_MASTER, ~0x8);
-
-	dev->phy.n->init_por = false;
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/Radio/2057/Init */
@@ -734,9 +805,16 @@
 	u16 bias, cbias;
 	u16 pag_boost, padg_boost, pgag_boost, mixg_boost;
 	u16 paa_boost, pada_boost, pgaa_boost, mixa_boost;
+	bool is_pkg_fab_smic;
 
 	B43_WARN_ON(dev->phy.rev < 3);
 
+	is_pkg_fab_smic =
+		((dev->dev->chip_id == BCMA_CHIP_ID_BCM43224 ||
+		  dev->dev->chip_id == BCMA_CHIP_ID_BCM43225 ||
+		  dev->dev->chip_id == BCMA_CHIP_ID_BCM43421) &&
+		 dev->dev->chip_pkg == BCMA_PKG_ID_BCM43224_FAB_SMIC);
+
 	b43_chantab_radio_2056_upload(dev, e);
 	b2056_upload_syn_pll_cp2(dev, band == IEEE80211_BAND_5GHZ);
 
@@ -744,7 +822,8 @@
 	    b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) {
 		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER1, 0x1F);
 		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER2, 0x1F);
-		if (dev->dev->chip_id == 0x4716) {
+		if (dev->dev->chip_id == BCMA_CHIP_ID_BCM4716 ||
+		    dev->dev->chip_id == BCMA_CHIP_ID_BCM47162) {
 			b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER4, 0x14);
 			b43_radio_write(dev, B2056_SYN_PLL_CP2, 0);
 		} else {
@@ -752,6 +831,13 @@
 			b43_radio_write(dev, B2056_SYN_PLL_CP2, 0x14);
 		}
 	}
+	if (sprom->boardflags2_hi & B43_BFH2_GPLL_WAR2 &&
+	    b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) {
+		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER1, 0x1f);
+		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER2, 0x1f);
+		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER4, 0x0b);
+		b43_radio_write(dev, B2056_SYN_PLL_CP2, 0x20);
+	}
 	if (sprom->boardflags2_lo & B43_BFL2_APLL_WAR &&
 	    b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
 		b43_radio_write(dev, B2056_SYN_PLL_LOOPFILTER1, 0x1F);
@@ -767,7 +853,8 @@
 				b43_radio_write(dev,
 					offset | B2056_TX_PADG_IDAC, 0xcc);
 
-				if (dev->dev->chip_id == 0x4716) {
+				if (dev->dev->chip_id == BCMA_CHIP_ID_BCM4716 ||
+				    dev->dev->chip_id == BCMA_CHIP_ID_BCM47162) {
 					bias = 0x40;
 					cbias = 0x45;
 					pag_boost = 0x5;
@@ -776,6 +863,10 @@
 				} else {
 					bias = 0x25;
 					cbias = 0x20;
+					if (is_pkg_fab_smic) {
+						bias = 0x2a;
+						cbias = 0x38;
+					}
 					pag_boost = 0x4;
 					pgag_boost = 0x03;
 					mixg_boost = 0x65;
@@ -844,6 +935,8 @@
 			mixa_boost = 0xF;
 		}
 
+		cbias = is_pkg_fab_smic ? 0x35 : 0x30;
+
 		for (i = 0; i < 2; i++) {
 			offset = i ? B2056_TX1 : B2056_TX0;
 
@@ -862,11 +955,11 @@
 			b43_radio_write(dev,
 				offset | B2056_TX_PADA_CASCBIAS, 0x03);
 			b43_radio_write(dev,
-				offset | B2056_TX_INTPAA_IAUX_STAT, 0x50);
+				offset | B2056_TX_INTPAA_IAUX_STAT, 0x30);
 			b43_radio_write(dev,
-				offset | B2056_TX_INTPAA_IMAIN_STAT, 0x50);
+				offset | B2056_TX_INTPAA_IMAIN_STAT, 0x30);
 			b43_radio_write(dev,
-				offset | B2056_TX_INTPAA_CASCBIAS, 0x30);
+				offset | B2056_TX_INTPAA_CASCBIAS, cbias);
 		}
 	}
 
@@ -933,7 +1026,7 @@
 	b43_radio_mask(dev, B2056_SYN_COM_RESET, ~0x2);
 	b43_radio_mask(dev, B2056_SYN_PLL_MAST2, ~0xFC);
 	b43_radio_mask(dev, B2056_SYN_RCCAL_CTRL0, ~0x1);
-	if (dev->phy.n->init_por)
+	if (dev->phy.do_full_init)
 		b43_radio_2056_rcal(dev);
 }
 
@@ -946,8 +1039,6 @@
 	b43_radio_init2056_pre(dev);
 	b2056_upload_inittabs(dev, 0, 0);
 	b43_radio_init2056_post(dev);
-
-	dev->phy.n->init_por = false;
 }
 
 /**************************************************
@@ -1164,23 +1255,20 @@
 	u16 seq_mode;
 	u32 tmp;
 
-	if (nphy->hang_avoid)
-		b43_nphy_stay_in_carrier_search(dev, true);
+	b43_nphy_stay_in_carrier_search(dev, true);
 
 	if ((nphy->bb_mult_save & 0x80000000) == 0) {
 		tmp = b43_ntab_read(dev, B43_NTAB16(15, 87));
 		nphy->bb_mult_save = (tmp & 0xFFFF) | 0x80000000;
 	}
 
+	/* TODO: add modify_bbmult argument */
 	if (!dev->phy.is_40mhz)
 		tmp = 0x6464;
 	else
 		tmp = 0x4747;
 	b43_ntab_write(dev, B43_NTAB16(15, 87), tmp);
 
-	if (nphy->hang_avoid)
-		b43_nphy_stay_in_carrier_search(dev, false);
-
 	b43_phy_write(dev, B43_NPHY_SAMP_DEPCNT, (samps - 1));
 
 	if (loops != 0xFFFF)
@@ -1213,6 +1301,8 @@
 		b43err(dev->wl, "run samples timeout\n");
 
 	b43_phy_write(dev, B43_NPHY_RFSEQMODE, seq_mode);
+
+	b43_nphy_stay_in_carrier_search(dev, false);
 }
 
 /**************************************************
@@ -1588,8 +1678,8 @@
 	struct b43_phy_n *nphy = dev->phy.n;
 
 	u16 saved_regs_phy_rfctl[2];
-	u16 saved_regs_phy[13];
-	u16 regs_to_store[] = {
+	u16 saved_regs_phy[22];
+	u16 regs_to_store_rev3[] = {
 		B43_NPHY_AFECTL_OVER1, B43_NPHY_AFECTL_OVER,
 		B43_NPHY_AFECTL_C1, B43_NPHY_AFECTL_C2,
 		B43_NPHY_TXF_40CO_B1S1, B43_NPHY_RFCTL_OVER,
@@ -1598,6 +1688,20 @@
 		B43_NPHY_RFCTL_LUT_TRSW_UP1, B43_NPHY_RFCTL_LUT_TRSW_UP2,
 		B43_NPHY_RFCTL_RSSIO1, B43_NPHY_RFCTL_RSSIO2
 	};
+	u16 regs_to_store_rev7[] = {
+		B43_NPHY_AFECTL_OVER1, B43_NPHY_AFECTL_OVER,
+		B43_NPHY_AFECTL_C1, B43_NPHY_AFECTL_C2,
+		B43_NPHY_TXF_40CO_B1S1, B43_NPHY_RFCTL_OVER,
+		0x342, 0x343, 0x346, 0x347,
+		0x2ff,
+		B43_NPHY_TXF_40CO_B1S0, B43_NPHY_TXF_40CO_B32S1,
+		B43_NPHY_RFCTL_CMD,
+		B43_NPHY_RFCTL_LUT_TRSW_UP1, B43_NPHY_RFCTL_LUT_TRSW_UP2,
+		0x340, 0x341, 0x344, 0x345,
+		B43_NPHY_RFCTL_RSSIO1, B43_NPHY_RFCTL_RSSIO2
+	};
+	u16 *regs_to_store;
+	int regs_amount;
 
 	u16 class;
 
@@ -1617,6 +1721,15 @@
 	u8 rx_core_state;
 	int core, i, j, vcm;
 
+	if (dev->phy.rev >= 7) {
+		regs_to_store = regs_to_store_rev7;
+		regs_amount = ARRAY_SIZE(regs_to_store_rev7);
+	} else {
+		regs_to_store = regs_to_store_rev3;
+		regs_amount = ARRAY_SIZE(regs_to_store_rev3);
+	}
+	BUG_ON(regs_amount > ARRAY_SIZE(saved_regs_phy));
+
 	class = b43_nphy_classifier(dev, 0, 0);
 	b43_nphy_classifier(dev, 7, 4);
 	b43_nphy_read_clip_detection(dev, clip_state);
@@ -1624,22 +1737,29 @@
 
 	saved_regs_phy_rfctl[0] = b43_phy_read(dev, B43_NPHY_RFCTL_INTC1);
 	saved_regs_phy_rfctl[1] = b43_phy_read(dev, B43_NPHY_RFCTL_INTC2);
-	for (i = 0; i < ARRAY_SIZE(regs_to_store); i++)
+	for (i = 0; i < regs_amount; i++)
 		saved_regs_phy[i] = b43_phy_read(dev, regs_to_store[i]);
 
 	b43_nphy_rf_ctl_intc_override(dev, N_INTC_OVERRIDE_OFF, 0, 7);
 	b43_nphy_rf_ctl_intc_override(dev, N_INTC_OVERRIDE_TRSW, 1, 7);
-	b43_nphy_rf_ctl_override(dev, 0x1, 0, 0, false);
-	b43_nphy_rf_ctl_override(dev, 0x2, 1, 0, false);
-	b43_nphy_rf_ctl_override(dev, 0x80, 1, 0, false);
-	b43_nphy_rf_ctl_override(dev, 0x40, 1, 0, false);
 
-	if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
-		b43_nphy_rf_ctl_override(dev, 0x20, 0, 0, false);
-		b43_nphy_rf_ctl_override(dev, 0x10, 1, 0, false);
+	if (dev->phy.rev >= 7) {
+		/* TODO */
+		if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
+		} else {
+		}
 	} else {
-		b43_nphy_rf_ctl_override(dev, 0x10, 0, 0, false);
-		b43_nphy_rf_ctl_override(dev, 0x20, 1, 0, false);
+		b43_nphy_rf_ctl_override(dev, 0x1, 0, 0, false);
+		b43_nphy_rf_ctl_override(dev, 0x2, 1, 0, false);
+		b43_nphy_rf_ctl_override(dev, 0x80, 1, 0, false);
+		b43_nphy_rf_ctl_override(dev, 0x40, 1, 0, false);
+		if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) {
+			b43_nphy_rf_ctl_override(dev, 0x20, 0, 0, false);
+			b43_nphy_rf_ctl_override(dev, 0x10, 1, 0, false);
+		} else {
+			b43_nphy_rf_ctl_override(dev, 0x10, 0, 0, false);
+			b43_nphy_rf_ctl_override(dev, 0x20, 1, 0, false);
+		}
 	}
 
 	rx_core_state = b43_nphy_get_rx_core_state(dev);
@@ -1654,8 +1774,11 @@
 
 		/* Grab RSSI results for every possible VCM */
 		for (vcm = 0; vcm < 8; vcm++) {
-			b43_radio_maskset(dev, r | B2056_RX_RSSI_MISC, 0xE3,
-					vcm << 2);
+			if (dev->phy.rev >= 7)
+				;
+			else
+				b43_radio_maskset(dev, r | B2056_RX_RSSI_MISC,
+						  0xE3, vcm << 2);
 			b43_nphy_poll_rssi(dev, N_RSSI_NB, results[vcm], 8);
 		}
 
@@ -1682,8 +1805,11 @@
 		}
 
 		/* Select the best VCM */
-		b43_radio_maskset(dev, r | B2056_RX_RSSI_MISC, 0xE3,
-				  vcm_final << 2);
+		if (dev->phy.rev >= 7)
+			;
+		else
+			b43_radio_maskset(dev, r | B2056_RX_RSSI_MISC,
+					  0xE3, vcm_final << 2);
 
 		for (i = 0; i < 4; i++) {
 			if (core != i / 2)
@@ -1736,9 +1862,9 @@
 
 	b43_phy_set(dev, B43_NPHY_RFCTL_OVER, 0x1);
 	b43_phy_set(dev, B43_NPHY_RFCTL_CMD, B43_NPHY_RFCTL_CMD_RXTX);
-	b43_phy_mask(dev, B43_NPHY_TXF_40CO_B1S1, ~0x1);
+	b43_phy_mask(dev, B43_NPHY_RFCTL_OVER, ~0x1);
 
-	for (i = 0; i < ARRAY_SIZE(regs_to_store); i++)
+	for (i = 0; i < regs_amount; i++)
 		b43_phy_write(dev, regs_to_store[i], saved_regs_phy[i]);
 
 	/* Store for future configuration */
@@ -2494,8 +2620,8 @@
 	struct ssb_sprom *sprom = dev->dev->bus_sprom;
 
 	/* TX to RX */
-	u8 tx2rx_events[8] = { 0x4, 0x3, 0x6, 0x5, 0x2, 0x1, 0x8, 0x1F };
-	u8 tx2rx_delays[8] = { 8, 4, 2, 2, 4, 4, 6, 1 };
+	u8 tx2rx_events[7] = { 0x4, 0x3, 0x5, 0x2, 0x1, 0x8, 0x1F };
+	u8 tx2rx_delays[7] = { 8, 4, 4, 4, 4, 6, 1 };
 	/* RX to TX */
 	u8 rx2tx_events_ipa[9] = { 0x0, 0x1, 0x2, 0x8, 0x5, 0x6, 0xF, 0x3,
 					0x1F };
@@ -2503,6 +2629,23 @@
 	u8 rx2tx_events[9] = { 0x0, 0x1, 0x2, 0x8, 0x5, 0x6, 0x3, 0x4, 0x1F };
 	u8 rx2tx_delays[9] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 };
 
+	u16 vmids[5][4] = {
+		{ 0xa2, 0xb4, 0xb4, 0x89, }, /* 0 */
+		{ 0xb4, 0xb4, 0xb4, 0x24, }, /* 1 */
+		{ 0xa2, 0xb4, 0xb4, 0x74, }, /* 2 */
+		{ 0xa2, 0xb4, 0xb4, 0x270, }, /* 3 */
+		{ 0xa2, 0xb4, 0xb4, 0x00, }, /* 4 and 5 */
+	};
+	u16 gains[5][4] = {
+		{ 0x02, 0x02, 0x02, 0x00, }, /* 0 */
+		{ 0x02, 0x02, 0x02, 0x02, }, /* 1 */
+		{ 0x02, 0x02, 0x02, 0x04, }, /* 2 */
+		{ 0x02, 0x02, 0x02, 0x00, }, /* 3 */
+		{ 0x02, 0x02, 0x02, 0x00, }, /* 4 and 5 */
+	};
+	u16 *vmid, *gain;
+
+	u8 pdet_range;
 	u16 tmp16;
 	u32 tmp32;
 
@@ -2561,7 +2704,71 @@
 	b43_ntab_write(dev, B43_NTAB16(8, 0), 2);
 	b43_ntab_write(dev, B43_NTAB16(8, 16), 2);
 
-	/* TODO */
+	if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ)
+		pdet_range = sprom->fem.ghz2.pdet_range;
+	else
+		pdet_range = sprom->fem.ghz5.pdet_range;
+	vmid = vmids[min_t(u16, pdet_range, 4)];
+	gain = gains[min_t(u16, pdet_range, 4)];
+	switch (pdet_range) {
+	case 3:
+		if (!(dev->phy.rev >= 4 &&
+		      b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ))
+			break;
+		/* FALL THROUGH */
+	case 0:
+	case 1:
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x08), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x18), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x0c), 4, gain);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x1c), 4, gain);
+		break;
+	case 2:
+		if (dev->phy.rev >= 6) {
+			if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ)
+				vmid[3] = 0x94;
+			else
+				vmid[3] = 0x8e;
+			gain[3] = 3;
+		} else if (dev->phy.rev == 5) {
+			vmid[3] = 0x84;
+			gain[3] = 2;
+		}
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x08), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x18), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x0c), 4, gain);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x1c), 4, gain);
+		break;
+	case 4:
+	case 5:
+		if (b43_current_band(dev->wl) != IEEE80211_BAND_2GHZ) {
+			if (pdet_range == 4) {
+				vmid[3] = 0x8e;
+				tmp16 = 0x96;
+				gain[3] = 0x2;
+			} else {
+				vmid[3] = 0x89;
+				tmp16 = 0x89;
+				gain[3] = 0;
+			}
+		} else {
+			if (pdet_range == 4) {
+				vmid[3] = 0x89;
+				tmp16 = 0x8b;
+				gain[3] = 0x2;
+			} else {
+				vmid[3] = 0x74;
+				tmp16 = 0x70;
+				gain[3] = 0;
+			}
+		}
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x08), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x0c), 4, gain);
+		vmid[3] = tmp16;
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x18), 4, vmid);
+		b43_ntab_write_bulk(dev, B43_NTAB16(8, 0x1c), 4, gain);
+		break;
+	}
 
 	b43_radio_write(dev, B2056_RX0 | B2056_RX_MIXA_MAST_BIAS, 0x00);
 	b43_radio_write(dev, B2056_RX1 | B2056_RX_MIXA_MAST_BIAS, 0x00);
@@ -2600,7 +2807,7 @@
 	/* Dropped probably-always-true condition */
 	b43_phy_write(dev, B43_NPHY_ED_CRS40ASSERTTHRESH0, 0x03eb);
 	b43_phy_write(dev, B43_NPHY_ED_CRS40ASSERTTHRESH1, 0x03eb);
-	b43_phy_write(dev, B43_NPHY_ED_CRS40DEASSERTTHRESH1, 0x0341);
+	b43_phy_write(dev, B43_NPHY_ED_CRS40DEASSERTTHRESH0, 0x0341);
 	b43_phy_write(dev, B43_NPHY_ED_CRS40DEASSERTTHRESH1, 0x0341);
 	b43_phy_write(dev, B43_NPHY_ED_CRS20LASSERTTHRESH0, 0x042b);
 	b43_phy_write(dev, B43_NPHY_ED_CRS20LASSERTTHRESH1, 0x042b);
@@ -3211,6 +3418,20 @@
 	u8 idx, delta;
 	u8 i, stf_mode;
 
+	/* Array adj_pwr_tbl corresponds to the hardware table. It consists of
+	 * 21 groups, each containing 4 entries.
+	 *
+	 * First group has entries for CCK modulation.
+	 * The rest of groups has 1 entry per modulation (SISO, CDD, STBC, SDM).
+	 *
+	 * Group 0 is for CCK
+	 * Groups 1..4 use BPSK (group per coding rate)
+	 * Groups 5..8 use QPSK (group per coding rate)
+	 * Groups 9..12 use 16-QAM (group per coding rate)
+	 * Groups 13..16 use 64-QAM (group per coding rate)
+	 * Groups 17..20 are unknown
+	 */
+
 	for (i = 0; i < 4; i++)
 		nphy->adj_pwr_tbl[i] = nphy->tx_power_offset[i];
 
@@ -3409,10 +3630,8 @@
 	}
 
 	b43_nphy_tx_prepare_adjusted_power_table(dev);
-	/*
 	b43_ntab_write_bulk(dev, B43_NTAB16(26, 64), 84, nphy->adj_pwr_tbl);
 	b43_ntab_write_bulk(dev, B43_NTAB16(27, 64), 84, nphy->adj_pwr_tbl);
-	*/
 
 	if (nphy->hang_avoid)
 		b43_nphy_stay_in_carrier_search(dev, false);
@@ -5124,7 +5343,7 @@
 	b43_phy_write(dev, B43_NPHY_TXMACIF_HOLDOFF, 0x0015);
 	b43_phy_write(dev, B43_NPHY_TXMACDELAY, 0x0320);
 	if (phy->rev >= 3 && phy->rev <= 6)
-		b43_phy_write(dev, B43_NPHY_PLOAD_CSENSE_EXTLEN, 0x0014);
+		b43_phy_write(dev, B43_NPHY_PLOAD_CSENSE_EXTLEN, 0x0032);
 	b43_nphy_tx_lp_fbw(dev);
 	if (phy->rev >= 3)
 		b43_nphy_spur_workaround(dev);
@@ -5338,7 +5557,6 @@
 	nphy->hang_avoid = (phy->rev == 3 || phy->rev == 4);
 	nphy->spur_avoid = (phy->rev >= 3) ?
 				B43_SPUR_AVOID_AUTO : B43_SPUR_AVOID_DISABLE;
-	nphy->init_por = true;
 	nphy->gain_boost = true; /* this way we follow wl, assume it is true */
 	nphy->txrx_chain = 2; /* sth different than 0 and 1 for now */
 	nphy->phyrxchain = 3; /* to avoid b43_nphy_set_rx_core_state like wl */
@@ -5379,8 +5597,6 @@
 		nphy->ipa2g_on = sprom->fem.ghz2.extpa_gain == 2;
 		nphy->ipa5g_on = sprom->fem.ghz5.extpa_gain == 2;
 	}
-
-	nphy->init_por = true;
 }
 
 static void b43_nphy_op_free(struct b43_wldev *dev)
@@ -5441,8 +5657,11 @@
 {
 	/* Register 1 is a 32-bit register. */
 	B43_WARN_ON(reg == 1);
-	/* N-PHY needs 0x100 for read access */
-	reg |= 0x100;
+
+	if (dev->phy.rev >= 7)
+		reg |= 0x200; /* Radio 0x2057 */
+	else
+		reg |= 0x100;
 
 	b43_write16(dev, B43_MMIO_RADIO_CONTROL, reg);
 	return b43_read16(dev, B43_MMIO_RADIO_DATA_LOW);
@@ -5488,10 +5707,12 @@
 		}
 	} else {
 		if (dev->phy.rev >= 7) {
-			b43_radio_2057_init(dev);
+			if (!dev->phy.radio_on)
+				b43_radio_2057_init(dev);
 			b43_switch_channel(dev, dev->phy.channel);
 		} else if (dev->phy.rev >= 3) {
-			b43_radio_init2056(dev);
+			if (!dev->phy.radio_on)
+				b43_radio_init2056(dev);
 			b43_switch_channel(dev, dev->phy.channel);
 		} else {
 			b43_radio_init2055(dev);

diff --git a/drivers/net/wireless/b43/phy_n.h b/drivers/net/wireless/b43/phy_n.h
index 9a5b6bc..ecfbf66 100644
--- a/drivers/net/wireless/b43/phy_n.h
+++ b/drivers/net/wireless/b43/phy_n.h

@@ -931,7 +931,6 @@
 	u16 papd_epsilon_offset[2];
 	s32 preamble_override;
 	u32 bb_mult_save;
-	bool init_por;
 
 	bool gain_boost;
 	bool elna_gain_config;

diff --git a/drivers/net/wireless/b43/radio_2056.c b/drivers/net/wireless/b43/radio_2056.c
index b4fd934..2ce2560 100644
--- a/drivers/net/wireless/b43/radio_2056.c
+++ b/drivers/net/wireless/b43/radio_2056.c

@@ -48,7 +48,7 @@
 	unsigned int rx_length;
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev3_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev3_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -232,7 +232,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev3_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev3_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -380,7 +380,7 @@
 	[B2056_TX_STATUS_TXLPF_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev3_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev3_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -530,7 +530,7 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev4_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev4_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -714,7 +714,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev4_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev4_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -862,7 +862,7 @@
 	[B2056_TX_STATUS_TXLPF_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev4_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_phy_rev4_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1012,7 +1012,7 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev5_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev5_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1196,7 +1196,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev5_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev5_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1352,7 +1352,7 @@
 	[B2056_TX_GMBB_IDAC7]		= { .ghz5 = 0x0075, .ghz2 = 0x0075, UPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev5_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev5_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1502,7 +1502,7 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev6_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev6_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1686,7 +1686,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev6_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev6_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1842,7 +1842,7 @@
 	[B2056_TX_GMBB_IDAC7]		= { .ghz5 = 0x0070, .ghz2 = 0x0070, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev6_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev6_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -1992,7 +1992,7 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev7_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev7_9_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2176,7 +2176,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev7_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev7_9_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2332,7 +2332,7 @@
 	[B2056_TX_GMBB_IDAC7]		= { .ghz5 = 0x0075, .ghz2 = 0x0075, UPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev7_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev7_9_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2482,7 +2482,7 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev8_syn[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev8_syn[] = {
 	[B2056_SYN_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_SYN_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2666,7 +2666,7 @@
 	[B2056_SYN_LOGEN_TX_CMOS_VALID]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev8_tx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev8_tx[] = {
 	[B2056_TX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_TX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2822,7 +2822,7 @@
 	[B2056_TX_GMBB_IDAC7]		= { .ghz5 = 0x0070, .ghz2 = 0x0070, NOUPLOAD, },
 };
 
-static const struct b2056_inittab_entry b2056_inittab_rev8_rx[] = {
+static const struct b2056_inittab_entry b2056_inittab_radio_rev8_rx[] = {
 	[B2056_RX_RESERVED_ADDR2]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR3]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 	[B2056_RX_RESERVED_ADDR4]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
@@ -2972,24 +2972,69 @@
 	[B2056_RX_STATUS_HPC_RC]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, NOUPLOAD, },
 };
 
-#define INITTABSPTS(prefix) \
-	.syn		= prefix##_syn,			\
-	.syn_length	= ARRAY_SIZE(prefix##_syn),	\
-	.tx		= prefix##_tx,			\
-	.tx_length	= ARRAY_SIZE(prefix##_tx),	\
-	.rx		= prefix##_rx,			\
-	.rx_length	= ARRAY_SIZE(prefix##_rx)
-
-static const struct b2056_inittabs_pts b2056_inittabs[] = {
-	[3] = { INITTABSPTS(b2056_inittab_rev3) },
-	[4] = { INITTABSPTS(b2056_inittab_rev4) },
-	[5] = { INITTABSPTS(b2056_inittab_rev5) },
-	[6] = { INITTABSPTS(b2056_inittab_rev6) },
-	[7] = { INITTABSPTS(b2056_inittab_rev7) },
-	[8] = { INITTABSPTS(b2056_inittab_rev8) },
-	[9] = { INITTABSPTS(b2056_inittab_rev7) },
+static const struct b2056_inittab_entry b2056_inittab_radio_rev11_syn[] = {
+	[B2056_SYN_PLL_PFD]		= { .ghz5 = 0x0006, .ghz2 = 0x0006, UPLOAD, },
+	[B2056_SYN_PLL_CP2]		= { .ghz5 = 0x003f, .ghz2 = 0x003f, UPLOAD, },
+	[B2056_SYN_PLL_LOOPFILTER1]	= { .ghz5 = 0x0006, .ghz2 = 0x0006, UPLOAD, },
+	[B2056_SYN_PLL_LOOPFILTER2]	= { .ghz5 = 0x0006, .ghz2 = 0x0006, UPLOAD, },
+	[B2056_SYN_PLL_LOOPFILTER4]	= { .ghz5 = 0x002b, .ghz2 = 0x002b, UPLOAD, },
+	[B2056_SYN_PLL_VCO2]		= { .ghz5 = 0x00f7, .ghz2 = 0x00f7, UPLOAD, },
+	[B2056_SYN_PLL_VCOCAL12]	= { .ghz5 = 0x0007, .ghz2 = 0x0007, UPLOAD, },
+	[B2056_SYN_LOGENBUF2]		= { .ghz5 = 0x008f, .ghz2 = 0x008f, UPLOAD, },
 };
 
+static const struct b2056_inittab_entry b2056_inittab_radio_rev11_tx[] = {
+	[B2056_TX_PA_SPARE2]		= { .ghz5 = 0x00ee, .ghz2 = 0x00ee, UPLOAD, },
+	[B2056_TX_INTPAA_IAUX_STAT]	= { .ghz5 = 0x0050, .ghz2 = 0x0050, UPLOAD, },
+	[B2056_TX_INTPAA_IMAIN_STAT]	= { .ghz5 = 0x0050, .ghz2 = 0x0050, UPLOAD, },
+	[B2056_TX_INTPAA_PASLOPE]	= { .ghz5 = 0x00f0, .ghz2 = 0x00f0, UPLOAD, },
+	[B2056_TX_INTPAG_PASLOPE]	= { .ghz5 = 0x00f0, .ghz2 = 0x00f0, UPLOAD, },
+	[B2056_TX_PADA_IDAC]		= { .ghz5 = 0x00ff, .ghz2 = 0x00ff, UPLOAD, },
+	[B2056_TX_PADA_SLOPE]		= { .ghz5 = 0x0070, .ghz2 = 0x0070, UPLOAD, },
+	[B2056_TX_PADG_SLOPE]		= { .ghz5 = 0x0070, .ghz2 = 0x0070, UPLOAD, },
+	[B2056_TX_PGAA_IDAC]		= { .ghz5 = 0x00ff, .ghz2 = 0x00ff, UPLOAD, },
+	[B2056_TX_PGAA_SLOPE]		= { .ghz5 = 0x0077, .ghz2 = 0x0077, UPLOAD, },
+	[B2056_TX_PGAG_SLOPE]		= { .ghz5 = 0x0077, .ghz2 = 0x0077, UPLOAD, },
+	[B2056_TX_GMBB_IDAC]		= { .ghz5 = 0x0000, .ghz2 = 0x0000, UPLOAD, },
+	[B2056_TX_TXSPARE1]		= { .ghz5 = 0x0030, .ghz2 = 0x0030, UPLOAD, },
+};
+
+static const struct b2056_inittab_entry b2056_inittab_radio_rev11_rx[] = {
+	[B2056_RX_BIASPOLE_LNAA1_IDAC]	= { .ghz5 = 0x0017, .ghz2 = 0x0017, UPLOAD, },
+	[B2056_RX_LNAA2_IDAC]		= { .ghz5 = 0x00ff, .ghz2 = 0x00ff, UPLOAD, },
+	[B2056_RX_BIASPOLE_LNAG1_IDAC]	= { .ghz5 = 0x0017, .ghz2 = 0x0017, UPLOAD, },
+	[B2056_RX_LNAG2_IDAC]		= { .ghz5 = 0x00f0, .ghz2 = 0x00f0, UPLOAD, },
+	[B2056_RX_MIXA_VCM]		= { .ghz5 = 0x0055, .ghz2 = 0x0055, UPLOAD, },
+	[B2056_RX_MIXA_LOB_BIAS]	= { .ghz5 = 0x0088, .ghz2 = 0x0088, UPLOAD, },
+	[B2056_RX_MIXA_BIAS_AUX]	= { .ghz5 = 0x0007, .ghz2 = 0x0007, UPLOAD, },
+	[B2056_RX_MIXG_VCM]		= { .ghz5 = 0x0055, .ghz2 = 0x0055, UPLOAD, },
+	[B2056_RX_TIA_IOPAMP]		= { .ghz5 = 0x0026, .ghz2 = 0x0026, UPLOAD, },
+	[B2056_RX_TIA_QOPAMP]		= { .ghz5 = 0x0026, .ghz2 = 0x0026, UPLOAD, },
+	[B2056_RX_TIA_IMISC]		= { .ghz5 = 0x000f, .ghz2 = 0x000f, UPLOAD, },
+	[B2056_RX_TIA_QMISC]		= { .ghz5 = 0x000f, .ghz2 = 0x000f, UPLOAD, },
+	[B2056_RX_RXLPF_OUTVCM]		= { .ghz5 = 0x0004, .ghz2 = 0x0004, UPLOAD, },
+	[B2056_RX_VGA_BIAS_DCCANCEL]	= { .ghz5 = 0x0000, .ghz2 = 0x0000, UPLOAD, },
+	[B2056_RX_RXSPARE3]		= { .ghz5 = 0x0005, .ghz2 = 0x0005, UPLOAD, },
+};
+
+#define INITTABSPTS(prefix) \
+	static const struct b2056_inittabs_pts prefix = {	\
+		.syn		= prefix##_syn,			\
+		.syn_length	= ARRAY_SIZE(prefix##_syn),	\
+		.tx		= prefix##_tx,			\
+		.tx_length	= ARRAY_SIZE(prefix##_tx),	\
+		.rx		= prefix##_rx,			\
+		.rx_length	= ARRAY_SIZE(prefix##_rx),	\
+	}
+
+INITTABSPTS(b2056_inittab_phy_rev3);
+INITTABSPTS(b2056_inittab_phy_rev4);
+INITTABSPTS(b2056_inittab_radio_rev5);
+INITTABSPTS(b2056_inittab_radio_rev6);
+INITTABSPTS(b2056_inittab_radio_rev7_9);
+INITTABSPTS(b2056_inittab_radio_rev8);
+INITTABSPTS(b2056_inittab_radio_rev11);
+
 #define RADIOREGS3(r00, r01, r02, r03, r04, r05, r06, r07, r08, r09, \
 		   r10, r11, r12, r13, r14, r15, r16, r17, r18, r19, \
 		   r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, \
@@ -3041,7 +3086,7 @@
 	.phy_regs.phy_bw6	= r5
 
 /* http://bcm-v4.sipsolutions.net/802.11/Radio/2056/ChannelTable */
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev3[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_phy_rev3[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -4036,7 +4081,7 @@
   },
 };
 
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev4[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_phy_rev4[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -5031,7 +5076,7 @@
   },
 };
 
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev5[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_radio_rev5[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -6026,7 +6071,7 @@
   },
 };
 
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev6[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_radio_rev6[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -7021,7 +7066,7 @@
   },
 };
 
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev7_9[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_radio_rev7_9[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -8016,7 +8061,7 @@
   },
 };
 
-static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_rev8[] = {
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_radio_rev8[] = {
   {	.freq			= 4920,
 	RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x04,
 		   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
@@ -9011,6 +9056,1154 @@
   },
 };
 
+static const struct b43_nphy_channeltab_entry_rev3 b43_nphy_channeltab_radio_rev11[] = {
+	{
+		.freq			= 4920,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xec, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07b4, 0x07b0, 0x07ac, 0x0214, 0x0215, 0x0216),
+	},
+	{
+		.freq			= 4930,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xed, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07b8, 0x07b4, 0x07b0, 0x0213, 0x0214, 0x0215),
+	},
+	{
+		.freq			= 4940,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xee, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07bc, 0x07b8, 0x07b4, 0x0212, 0x0213, 0x0214),
+	},
+	{
+		.freq			= 4950,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xef, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x00, 0x00, 0x00, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07c0, 0x07bc, 0x07b8, 0x0211, 0x0212, 0x0213),
+	},
+	{
+		.freq			= 4960,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf0, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07c4, 0x07c0, 0x07bc, 0x020f, 0x0211, 0x0212),
+	},
+	{
+		.freq			= 4970,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf1, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07c8, 0x07c4, 0x07c0, 0x020e, 0x020f, 0x0211),
+	},
+	{
+		.freq			= 4980,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf2, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07cc, 0x07c8, 0x07c4, 0x020d, 0x020e, 0x020f),
+	},
+	{
+		.freq			= 4990,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf3, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07d0, 0x07cc, 0x07c8, 0x020c, 0x020d, 0x020e),
+	},
+	{
+		.freq			= 5000,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf4, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07d4, 0x07d0, 0x07cc, 0x020b, 0x020c, 0x020d),
+	},
+	{
+		.freq			= 5010,
+		RADIOREGS3(0xff, 0x01, 0x01, 0x01, 0xf5, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07d8, 0x07d4, 0x07d0, 0x020a, 0x020b, 0x020c),
+	},
+	{
+		.freq			= 5020,
+		RADIOREGS3(0xf7, 0x01, 0x01, 0x01, 0xf6, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07dc, 0x07d8, 0x07d4, 0x0209, 0x020a, 0x020b),
+	},
+	{
+		.freq			= 5030,
+		RADIOREGS3(0xf7, 0x01, 0x01, 0x01, 0xf7, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07e0, 0x07dc, 0x07d8, 0x0208, 0x0209, 0x020a),
+	},
+	{
+		.freq			= 5040,
+		RADIOREGS3(0xef, 0x01, 0x01, 0x01, 0xf8, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07e4, 0x07e0, 0x07dc, 0x0207, 0x0208, 0x0209),
+	},
+	{
+		.freq			= 5050,
+		RADIOREGS3(0xef, 0x01, 0x01, 0x01, 0xf9, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07e8, 0x07e4, 0x07e0, 0x0206, 0x0207, 0x0208),
+	},
+	{
+		.freq			= 5060,
+		RADIOREGS3(0xe6, 0x01, 0x01, 0x01, 0xfa, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfe, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfe, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07ec, 0x07e8, 0x07e4, 0x0205, 0x0206, 0x0207),
+	},
+	{
+		.freq			= 5070,
+		RADIOREGS3(0xe6, 0x01, 0x01, 0x01, 0xfb, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfd, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfd, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07f0, 0x07ec, 0x07e8, 0x0204, 0x0205, 0x0206),
+	},
+	{
+		.freq			= 5080,
+		RADIOREGS3(0xde, 0x01, 0x01, 0x01, 0xfc, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfd, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfd, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07f4, 0x07f0, 0x07ec, 0x0203, 0x0204, 0x0205),
+	},
+	{
+		.freq			= 5090,
+		RADIOREGS3(0xde, 0x01, 0x01, 0x01, 0xfd, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x01, 0x01, 0x01, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfd, 0x00, 0x09, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfd, 0x00, 0x09, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07f8, 0x07f4, 0x07f0, 0x0202, 0x0203, 0x0204),
+	},
+	{
+		.freq			= 5100,
+		RADIOREGS3(0xd6, 0x01, 0x01, 0x01, 0xfe, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfd, 0x00, 0x08, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfd, 0x00, 0x08, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x07fc, 0x07f8, 0x07f4, 0x0201, 0x0202, 0x0203),
+	},
+	{
+		.freq			= 5110,
+		RADIOREGS3(0xd6, 0x01, 0x01, 0x01, 0xff, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfc, 0x00, 0x08, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfc, 0x00, 0x08, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0800, 0x07fc, 0x07f8, 0x0200, 0x0201, 0x0202),
+	},
+	{
+		.freq			= 5120,
+		RADIOREGS3(0xce, 0x01, 0x01, 0x02, 0x00, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfc, 0x00, 0x08, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfc, 0x00, 0x08, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0804, 0x0800, 0x07fc, 0x01ff, 0x0200, 0x0201),
+	},
+	{
+		.freq			= 5130,
+		RADIOREGS3(0xce, 0x01, 0x01, 0x02, 0x01, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfc, 0x00, 0x08, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfc, 0x00, 0x08, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0808, 0x0804, 0x0800, 0x01fe, 0x01ff, 0x0200),
+	},
+	{
+		.freq			= 5140,
+		RADIOREGS3(0xc6, 0x01, 0x01, 0x02, 0x02, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfb, 0x00, 0x08, 0x00, 0x77, 0x00, 0x0f,
+			   0x00, 0x6f, 0x00, 0xfb, 0x00, 0x08, 0x00, 0x77,
+			   0x00, 0x0f, 0x00, 0x6f, 0x00),
+		PHYREGS(0x080c, 0x0808, 0x0804, 0x01fd, 0x01fe, 0x01ff),
+	},
+	{
+		.freq			= 5160,
+		RADIOREGS3(0xbe, 0x01, 0x01, 0x02, 0x04, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfa, 0x00, 0x07, 0x00, 0x77, 0x00, 0x0e,
+			   0x00, 0x6f, 0x00, 0xfa, 0x00, 0x07, 0x00, 0x77,
+			   0x00, 0x0e, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0814, 0x0810, 0x080c, 0x01fb, 0x01fc, 0x01fd),
+	},
+	{
+		.freq			= 5170,
+		RADIOREGS3(0xbe, 0x01, 0x01, 0x02, 0x05, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xfa, 0x00, 0x07, 0x00, 0x77, 0x00, 0x0e,
+			   0x00, 0x6f, 0x00, 0xfa, 0x00, 0x07, 0x00, 0x77,
+			   0x00, 0x0e, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0818, 0x0814, 0x0810, 0x01fa, 0x01fb, 0x01fc),
+	},
+	{
+		.freq			= 5180,
+		RADIOREGS3(0xb6, 0x01, 0x01, 0x02, 0x06, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xf9, 0x00, 0x06, 0x00, 0x77, 0x00, 0x0e,
+			   0x00, 0x6f, 0x00, 0xf9, 0x00, 0x06, 0x00, 0x77,
+			   0x00, 0x0e, 0x00, 0x6f, 0x00),
+		PHYREGS(0x081c, 0x0818, 0x0814, 0x01f9, 0x01fa, 0x01fb),
+	},
+	{
+		.freq			= 5190,
+		RADIOREGS3(0xb6, 0x01, 0x01, 0x02, 0x07, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xf9, 0x00, 0x06, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xf9, 0x00, 0x06, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0820, 0x081c, 0x0818, 0x01f8, 0x01f9, 0x01fa),
+	},
+	{
+		.freq			= 5200,
+		RADIOREGS3(0xaf, 0x01, 0x01, 0x02, 0x08, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xf9, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xf9, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0824, 0x0820, 0x081c, 0x01f7, 0x01f8, 0x01f9),
+	},
+	{
+		.freq			= 5210,
+		RADIOREGS3(0xaf, 0x01, 0x01, 0x02, 0x09, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8f, 0x0f, 0x00,
+			   0xff, 0xf9, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xf9, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0828, 0x0824, 0x0820, 0x01f6, 0x01f7, 0x01f8),
+	},
+	{
+		.freq			= 5220,
+		RADIOREGS3(0xa7, 0x01, 0x01, 0x02, 0x0a, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8e, 0x0f, 0x00,
+			   0xfe, 0xd8, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xd8, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x082c, 0x0828, 0x0824, 0x01f5, 0x01f6, 0x01f7),
+	},
+	{
+		.freq			= 5230,
+		RADIOREGS3(0xa7, 0x01, 0x01, 0x02, 0x0b, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8e, 0x0f, 0x00,
+			   0xee, 0xd8, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xd8, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0830, 0x082c, 0x0828, 0x01f4, 0x01f5, 0x01f6),
+	},
+	{
+		.freq			= 5240,
+		RADIOREGS3(0xa0, 0x01, 0x01, 0x02, 0x0c, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8e, 0x0f, 0x00,
+			   0xee, 0xc8, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xc8, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0834, 0x0830, 0x082c, 0x01f3, 0x01f4, 0x01f5),
+	},
+	{
+		.freq			= 5250,
+		RADIOREGS3(0xa0, 0x01, 0x01, 0x02, 0x0d, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8e, 0x0f, 0x00,
+			   0xed, 0xc7, 0x00, 0x05, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0838, 0x0834, 0x0830, 0x01f2, 0x01f3, 0x01f4),
+	},
+	{
+		.freq			= 5260,
+		RADIOREGS3(0x98, 0x01, 0x01, 0x02, 0x0e, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x02, 0x02, 0x02, 0x8e, 0x0e, 0x00,
+			   0xed, 0xc7, 0x00, 0x04, 0x00, 0x77, 0x00, 0x0d,
+			   0x00, 0x6f, 0x00, 0xc7, 0x00, 0x04, 0x00, 0x77,
+			   0x00, 0x0d, 0x00, 0x6f, 0x00),
+		PHYREGS(0x083c, 0x0838, 0x0834, 0x01f1, 0x01f2, 0x01f3),
+	},
+	{
+		.freq			= 5270,
+		RADIOREGS3(0x98, 0x01, 0x01, 0x02, 0x0f, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8e, 0x0e, 0x00,
+			   0xed, 0xc7, 0x00, 0x04, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xc7, 0x00, 0x04, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0840, 0x083c, 0x0838, 0x01f0, 0x01f1, 0x01f2),
+	},
+	{
+		.freq			= 5280,
+		RADIOREGS3(0x91, 0x01, 0x01, 0x02, 0x10, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0e, 0x00,
+			   0xdc, 0xb7, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xb7, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0844, 0x0840, 0x083c, 0x01f0, 0x01f0, 0x01f1),
+	},
+	{
+		.freq			= 5290,
+		RADIOREGS3(0x91, 0x01, 0x01, 0x02, 0x11, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0e, 0x00,
+			   0xdc, 0xb7, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xb7, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0848, 0x0844, 0x0840, 0x01ef, 0x01f0, 0x01f0),
+	},
+	{
+		.freq			= 5300,
+		RADIOREGS3(0x8a, 0x01, 0x01, 0x02, 0x12, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0e, 0x00,
+			   0xdc, 0xb7, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xb7, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x084c, 0x0848, 0x0844, 0x01ee, 0x01ef, 0x01f0),
+	},
+	{
+		.freq			= 5310,
+		RADIOREGS3(0x8a, 0x01, 0x01, 0x02, 0x13, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0e, 0x00,
+			   0xdc, 0xb7, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xb7, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0850, 0x084c, 0x0848, 0x01ed, 0x01ee, 0x01ef),
+	},
+	{
+		.freq			= 5320,
+		RADIOREGS3(0x83, 0x01, 0x01, 0x02, 0x14, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0e, 0x00,
+			   0xdb, 0xb7, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0c,
+			   0x00, 0x6f, 0x00, 0xb7, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0c, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0854, 0x0850, 0x084c, 0x01ec, 0x01ed, 0x01ee),
+	},
+	{
+		.freq			= 5330,
+		RADIOREGS3(0x83, 0x01, 0x01, 0x02, 0x15, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0d, 0x00,
+			   0xcb, 0xa6, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0b,
+			   0x00, 0x6f, 0x00, 0xa6, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0b, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0858, 0x0854, 0x0850, 0x01eb, 0x01ec, 0x01ed),
+	},
+	{
+		.freq			= 5340,
+		RADIOREGS3(0x7c, 0x01, 0x01, 0x02, 0x16, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8d, 0x0d, 0x00,
+			   0xca, 0xa6, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0b,
+			   0x00, 0x6f, 0x00, 0xa6, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0b, 0x00, 0x6f, 0x00),
+		PHYREGS(0x085c, 0x0858, 0x0854, 0x01ea, 0x01eb, 0x01ec),
+	},
+	{
+		.freq			= 5350,
+		RADIOREGS3(0x7c, 0x01, 0x01, 0x02, 0x17, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0d, 0x00,
+			   0xca, 0xa6, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0b,
+			   0x00, 0x6f, 0x00, 0xa6, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0b, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0860, 0x085c, 0x0858, 0x01e9, 0x01ea, 0x01eb),
+	},
+	{
+		.freq			= 5360,
+		RADIOREGS3(0x75, 0x01, 0x01, 0x02, 0x18, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0d, 0x00,
+			   0xc9, 0x95, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x95, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0864, 0x0860, 0x085c, 0x01e8, 0x01e9, 0x01ea),
+	},
+	{
+		.freq			= 5370,
+		RADIOREGS3(0x75, 0x01, 0x01, 0x02, 0x19, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0d, 0x00,
+			   0xc9, 0x95, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x95, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0868, 0x0864, 0x0860, 0x01e7, 0x01e8, 0x01e9),
+	},
+	{
+		.freq			= 5380,
+		RADIOREGS3(0x6e, 0x01, 0x01, 0x02, 0x1a, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0c, 0x00,
+			   0xb8, 0x95, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x95, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x086c, 0x0868, 0x0864, 0x01e6, 0x01e7, 0x01e8),
+	},
+	{
+		.freq			= 5390,
+		RADIOREGS3(0x6e, 0x01, 0x01, 0x02, 0x1b, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0c, 0x00,
+			   0xb8, 0x84, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0870, 0x086c, 0x0868, 0x01e5, 0x01e6, 0x01e7),
+	},
+	{
+		.freq			= 5400,
+		RADIOREGS3(0x67, 0x01, 0x01, 0x02, 0x1c, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0c, 0x00,
+			   0xb8, 0x84, 0x00, 0x03, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x03, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0874, 0x0870, 0x086c, 0x01e5, 0x01e5, 0x01e6),
+	},
+	{
+		.freq			= 5410,
+		RADIOREGS3(0x67, 0x01, 0x01, 0x02, 0x1d, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0c, 0x00,
+			   0xb7, 0x84, 0x00, 0x02, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x02, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0878, 0x0874, 0x0870, 0x01e4, 0x01e5, 0x01e5),
+	},
+	{
+		.freq			= 5420,
+		RADIOREGS3(0x61, 0x01, 0x01, 0x02, 0x1e, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0c, 0x00,
+			   0xa7, 0x84, 0x00, 0x02, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x02, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x087c, 0x0878, 0x0874, 0x01e3, 0x01e4, 0x01e5),
+	},
+	{
+		.freq			= 5430,
+		RADIOREGS3(0x61, 0x01, 0x01, 0x02, 0x1f, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x03, 0x03, 0x03, 0x8c, 0x0b, 0x00,
+			   0xa6, 0x84, 0x00, 0x02, 0x00, 0x77, 0x00, 0x0a,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x02, 0x00, 0x77,
+			   0x00, 0x0a, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0880, 0x087c, 0x0878, 0x01e2, 0x01e3, 0x01e4),
+	},
+	{
+		.freq			= 5440,
+		RADIOREGS3(0x5a, 0x01, 0x01, 0x02, 0x20, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8b, 0x0b, 0x00,
+			   0xa6, 0x84, 0x00, 0x02, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x02, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0884, 0x0880, 0x087c, 0x01e1, 0x01e2, 0x01e3),
+	},
+	{
+		.freq			= 5450,
+		RADIOREGS3(0x5a, 0x01, 0x01, 0x02, 0x21, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8b, 0x0b, 0x00,
+			   0x95, 0x84, 0x00, 0x01, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x01, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0888, 0x0884, 0x0880, 0x01e0, 0x01e1, 0x01e2),
+	},
+	{
+		.freq			= 5460,
+		RADIOREGS3(0x53, 0x01, 0x01, 0x02, 0x22, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8b, 0x0b, 0x00,
+			   0x95, 0x84, 0x00, 0x01, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x84, 0x00, 0x01, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x088c, 0x0888, 0x0884, 0x01df, 0x01e0, 0x01e1),
+	},
+	{
+		.freq			= 5470,
+		RADIOREGS3(0x53, 0x01, 0x01, 0x02, 0x23, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8b, 0x0b, 0x00,
+			   0x94, 0x73, 0x00, 0x01, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x01, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0890, 0x088c, 0x0888, 0x01de, 0x01df, 0x01e0),
+	},
+	{
+		.freq			= 5480,
+		RADIOREGS3(0x4d, 0x01, 0x01, 0x02, 0x24, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x0a, 0x00,
+			   0x84, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0894, 0x0890, 0x088c, 0x01dd, 0x01de, 0x01df),
+	},
+	{
+		.freq			= 5490,
+		RADIOREGS3(0x4d, 0x01, 0x01, 0x02, 0x25, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x0a, 0x00,
+			   0x83, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x0898, 0x0894, 0x0890, 0x01dd, 0x01dd, 0x01de),
+	},
+	{
+		.freq			= 5500,
+		RADIOREGS3(0x47, 0x01, 0x01, 0x02, 0x26, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x0a, 0x00,
+			   0x82, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x089c, 0x0898, 0x0894, 0x01dc, 0x01dd, 0x01dd),
+	},
+	{
+		.freq			= 5510,
+		RADIOREGS3(0x47, 0x01, 0x01, 0x02, 0x27, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x0a, 0x00,
+			   0x82, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08a0, 0x089c, 0x0898, 0x01db, 0x01dc, 0x01dd),
+	},
+	{
+		.freq			= 5520,
+		RADIOREGS3(0x40, 0x01, 0x01, 0x02, 0x28, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x0a, 0x00,
+			   0x72, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08a4, 0x08a0, 0x089c, 0x01da, 0x01db, 0x01dc),
+	},
+	{
+		.freq			= 5530,
+		RADIOREGS3(0x40, 0x01, 0x01, 0x02, 0x29, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x09, 0x00,
+			   0x72, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08a8, 0x08a4, 0x08a0, 0x01d9, 0x01da, 0x01db),
+	},
+	{
+		.freq			= 5540,
+		RADIOREGS3(0x3a, 0x01, 0x01, 0x02, 0x2a, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x8a, 0x09, 0x00,
+			   0x71, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08ac, 0x08a8, 0x08a4, 0x01d8, 0x01d9, 0x01da),
+	},
+	{
+		.freq			= 5550,
+		RADIOREGS3(0x3a, 0x01, 0x01, 0x02, 0x2b, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x89, 0x09, 0x00,
+			   0x61, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08b0, 0x08ac, 0x08a8, 0x01d7, 0x01d8, 0x01d9),
+	},
+	{
+		.freq			= 5560,
+		RADIOREGS3(0x34, 0x01, 0x01, 0x02, 0x2c, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x89, 0x09, 0x00,
+			   0x61, 0x73, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x73, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08b4, 0x08b0, 0x08ac, 0x01d7, 0x01d7, 0x01d8),
+	},
+	{
+		.freq			= 5570,
+		RADIOREGS3(0x34, 0x01, 0x01, 0x02, 0x2d, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x89, 0x09, 0x00,
+			   0x61, 0x62, 0x00, 0x00, 0x00, 0x77, 0x00, 0x09,
+			   0x00, 0x6f, 0x00, 0x62, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x09, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08b8, 0x08b4, 0x08b0, 0x01d6, 0x01d7, 0x01d7),
+	},
+	{
+		.freq			= 5580,
+		RADIOREGS3(0x2e, 0x01, 0x01, 0x02, 0x2e, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x89, 0x08, 0x00,
+			   0x60, 0x62, 0x00, 0x00, 0x00, 0x77, 0x00, 0x08,
+			   0x00, 0x6f, 0x00, 0x62, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x08, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08bc, 0x08b8, 0x08b4, 0x01d5, 0x01d6, 0x01d7),
+	},
+	{
+		.freq			= 5590,
+		RADIOREGS3(0x2e, 0x01, 0x01, 0x02, 0x2f, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x04, 0x04, 0x04, 0x89, 0x08, 0x00,
+			   0x50, 0x61, 0x00, 0x00, 0x00, 0x77, 0x00, 0x08,
+			   0x00, 0x6f, 0x00, 0x61, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x08, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08c0, 0x08bc, 0x08b8, 0x01d4, 0x01d5, 0x01d6),
+	},
+	{
+		.freq			= 5600,
+		RADIOREGS3(0x28, 0x01, 0x01, 0x02, 0x30, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x89, 0x08, 0x00,
+			   0x50, 0x51, 0x00, 0x00, 0x00, 0x77, 0x00, 0x08,
+			   0x00, 0x6f, 0x00, 0x51, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x08, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08c4, 0x08c0, 0x08bc, 0x01d3, 0x01d4, 0x01d5),
+	},
+	{
+		.freq			= 5610,
+		RADIOREGS3(0x28, 0x01, 0x01, 0x02, 0x31, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x89, 0x08, 0x00,
+			   0x50, 0x51, 0x00, 0x00, 0x00, 0x77, 0x00, 0x08,
+			   0x00, 0x6f, 0x00, 0x51, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x08, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08c8, 0x08c4, 0x08c0, 0x01d2, 0x01d3, 0x01d4),
+	},
+	{
+		.freq			= 5620,
+		RADIOREGS3(0x21, 0x01, 0x01, 0x02, 0x32, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x89, 0x08, 0x00,
+			   0x50, 0x50, 0x00, 0x00, 0x00, 0x77, 0x00, 0x07,
+			   0x00, 0x6f, 0x00, 0x50, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x07, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08cc, 0x08c8, 0x08c4, 0x01d2, 0x01d2, 0x01d3),
+	},
+	{
+		.freq			= 5630,
+		RADIOREGS3(0x21, 0x01, 0x01, 0x02, 0x33, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x88, 0x07, 0x00,
+			   0x50, 0x50, 0x00, 0x00, 0x00, 0x77, 0x00, 0x07,
+			   0x00, 0x6f, 0x00, 0x50, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x07, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08d0, 0x08cc, 0x08c8, 0x01d1, 0x01d2, 0x01d2),
+	},
+	{
+		.freq			= 5640,
+		RADIOREGS3(0x1c, 0x01, 0x01, 0x02, 0x34, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x88, 0x07, 0x00,
+			   0x40, 0x50, 0x00, 0x00, 0x00, 0x77, 0x00, 0x07,
+			   0x00, 0x6f, 0x00, 0x50, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x07, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08d4, 0x08d0, 0x08cc, 0x01d0, 0x01d1, 0x01d2),
+	},
+	{
+		.freq			= 5650,
+		RADIOREGS3(0x1c, 0x01, 0x01, 0x02, 0x35, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x88, 0x07, 0x00,
+			   0x40, 0x40, 0x00, 0x00, 0x00, 0x77, 0x00, 0x07,
+			   0x00, 0x6f, 0x00, 0x40, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x07, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08d8, 0x08d4, 0x08d0, 0x01cf, 0x01d0, 0x01d1),
+	},
+	{
+		.freq			= 5660,
+		RADIOREGS3(0x16, 0x01, 0x01, 0x02, 0x36, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x88, 0x07, 0x00,
+			   0x40, 0x40, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6f, 0x00, 0x40, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08dc, 0x08d8, 0x08d4, 0x01ce, 0x01cf, 0x01d0),
+	},
+	{
+		.freq			= 5670,
+		RADIOREGS3(0x16, 0x01, 0x01, 0x02, 0x37, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x88, 0x07, 0x00,
+			   0x40, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6f, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08e0, 0x08dc, 0x08d8, 0x01ce, 0x01ce, 0x01cf),
+	},
+	{
+		.freq			= 5680,
+		RADIOREGS3(0x10, 0x01, 0x01, 0x02, 0x38, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6f, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08e4, 0x08e0, 0x08dc, 0x01cd, 0x01ce, 0x01ce),
+	},
+	{
+		.freq			= 5690,
+		RADIOREGS3(0x10, 0x01, 0x01, 0x02, 0x39, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6f, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6f, 0x00),
+		PHYREGS(0x08e8, 0x08e4, 0x08e0, 0x01cc, 0x01cd, 0x01ce),
+	},
+	{
+		.freq			= 5700,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3a, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6e, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6e, 0x00),
+		PHYREGS(0x08ec, 0x08e8, 0x08e4, 0x01cb, 0x01cc, 0x01cd),
+	},
+	{
+		.freq			= 5710,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3b, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6e, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6e, 0x00),
+		PHYREGS(0x08f0, 0x08ec, 0x08e8, 0x01ca, 0x01cb, 0x01cc),
+	},
+	{
+		.freq			= 5720,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3c, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6e, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6e, 0x00),
+		PHYREGS(0x08f4, 0x08f0, 0x08ec, 0x01c9, 0x01ca, 0x01cb),
+	},
+	{
+		.freq			= 5725,
+		RADIOREGS3(0x03, 0x01, 0x02, 0x04, 0x79, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x87, 0x06, 0x00,
+			   0x30, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6e, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6e, 0x00),
+		PHYREGS(0x08f6, 0x08f2, 0x08ee, 0x01c9, 0x01ca, 0x01cb),
+	},
+	{
+		.freq			= 5730,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3d, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x20, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6e, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6e, 0x00),
+		PHYREGS(0x08f8, 0x08f4, 0x08f0, 0x01c9, 0x01c9, 0x01ca),
+	},
+	{
+		.freq			= 5735,
+		RADIOREGS3(0x03, 0x01, 0x02, 0x04, 0x7b, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x20, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6d, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6d, 0x00),
+		PHYREGS(0x08fa, 0x08f6, 0x08f2, 0x01c8, 0x01c9, 0x01ca),
+	},
+	{
+		.freq			= 5740,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3e, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x20, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6d, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6d, 0x00),
+		PHYREGS(0x08fc, 0x08f8, 0x08f4, 0x01c8, 0x01c9, 0x01c9),
+	},
+	{
+		.freq			= 5745,
+		RADIOREGS3(0xfe, 0x00, 0x02, 0x04, 0x7d, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x20, 0x30, 0x00, 0x00, 0x00, 0x77, 0x00, 0x06,
+			   0x00, 0x6d, 0x00, 0x30, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x06, 0x00, 0x6d, 0x00),
+		PHYREGS(0x08fe, 0x08fa, 0x08f6, 0x01c8, 0x01c8, 0x01c9),
+	},
+	{
+		.freq			= 5750,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x3f, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x20, 0x20, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6d, 0x00, 0x20, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6d, 0x00),
+		PHYREGS(0x0900, 0x08fc, 0x08f8, 0x01c7, 0x01c8, 0x01c9),
+	},
+	{
+		.freq			= 5755,
+		RADIOREGS3(0xfe, 0x00, 0x02, 0x04, 0x7f, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x87, 0x05, 0x00,
+			   0x10, 0x20, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6c, 0x00, 0x20, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6c, 0x00),
+		PHYREGS(0x0902, 0x08fe, 0x08fa, 0x01c7, 0x01c8, 0x01c8),
+	},
+	{
+		.freq			= 5760,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x40, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x86, 0x05, 0x00,
+			   0x10, 0x20, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6c, 0x00, 0x20, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6c, 0x00),
+		PHYREGS(0x0904, 0x0900, 0x08fc, 0x01c6, 0x01c7, 0x01c8),
+	},
+	{
+		.freq			= 5765,
+		RADIOREGS3(0xf8, 0x00, 0x02, 0x04, 0x81, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x86, 0x05, 0x00,
+			   0x10, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6c, 0x00),
+		PHYREGS(0x0906, 0x0902, 0x08fe, 0x01c6, 0x01c7, 0x01c8),
+	},
+	{
+		.freq			= 5770,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x41, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x86, 0x04, 0x00,
+			   0x10, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x0908, 0x0904, 0x0900, 0x01c6, 0x01c6, 0x01c7),
+	},
+	{
+		.freq			= 5775,
+		RADIOREGS3(0xf8, 0x00, 0x02, 0x04, 0x83, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x05, 0x05, 0x05, 0x86, 0x04, 0x00,
+			   0x10, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x090a, 0x0906, 0x0902, 0x01c5, 0x01c6, 0x01c7),
+	},
+	{
+		.freq			= 5780,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x42, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x05, 0x05, 0x05, 0x86, 0x04, 0x00,
+			   0x10, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x090c, 0x0908, 0x0904, 0x01c5, 0x01c6, 0x01c6),
+	},
+	{
+		.freq			= 5785,
+		RADIOREGS3(0xf2, 0x00, 0x02, 0x04, 0x85, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x090e, 0x090a, 0x0906, 0x01c4, 0x01c5, 0x01c6),
+	},
+	{
+		.freq			= 5790,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x43, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x10, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x10, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x0910, 0x090c, 0x0908, 0x01c4, 0x01c5, 0x01c6),
+	},
+	{
+		.freq			= 5795,
+		RADIOREGS3(0xf2, 0x00, 0x02, 0x04, 0x87, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x0912, 0x090e, 0x090a, 0x01c4, 0x01c4, 0x01c5),
+	},
+	{
+		.freq			= 5800,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x44, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6b, 0x00),
+		PHYREGS(0x0914, 0x0910, 0x090c, 0x01c3, 0x01c4, 0x01c5),
+	},
+	{
+		.freq			= 5805,
+		RADIOREGS3(0xed, 0x00, 0x02, 0x04, 0x89, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6a, 0x00),
+		PHYREGS(0x0916, 0x0912, 0x090e, 0x01c3, 0x01c4, 0x01c4),
+	},
+	{
+		.freq			= 5810,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x45, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6a, 0x00),
+		PHYREGS(0x0918, 0x0914, 0x0910, 0x01c2, 0x01c3, 0x01c4),
+	},
+	{
+		.freq			= 5815,
+		RADIOREGS3(0xed, 0x00, 0x02, 0x04, 0x8b, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6a, 0x00),
+		PHYREGS(0x091a, 0x0916, 0x0912, 0x01c2, 0x01c3, 0x01c4),
+	},
+	{
+		.freq			= 5820,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x46, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x6a, 0x00),
+		PHYREGS(0x091c, 0x0918, 0x0914, 0x01c2, 0x01c2, 0x01c3),
+	},
+	{
+		.freq			= 5825,
+		RADIOREGS3(0xed, 0x00, 0x02, 0x04, 0x8d, 0x05, 0x05, 0x02,
+			   0x15, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x69, 0x00),
+		PHYREGS(0x091e, 0x091a, 0x0916, 0x01c1, 0x01c2, 0x01c3),
+	},
+	{
+		.freq			= 5830,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x47, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05,
+			   0x00, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x05, 0x00, 0x69, 0x00),
+		PHYREGS(0x0920, 0x091c, 0x0918, 0x01c1, 0x01c2, 0x01c2),
+	},
+	{
+		.freq			= 5840,
+		RADIOREGS3(0x0a, 0x01, 0x01, 0x02, 0x48, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x86, 0x04, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x69, 0x00),
+		PHYREGS(0x0924, 0x0920, 0x091c, 0x01c0, 0x01c1, 0x01c2),
+	},
+	{
+		.freq			= 5850,
+		RADIOREGS3(0xe0, 0x00, 0x01, 0x02, 0x49, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x69, 0x00),
+		PHYREGS(0x0928, 0x0924, 0x0920, 0x01bf, 0x01c0, 0x01c1),
+	},
+	{
+		.freq			= 5860,
+		RADIOREGS3(0xde, 0x00, 0x01, 0x02, 0x4a, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x69, 0x00),
+		PHYREGS(0x092c, 0x0928, 0x0924, 0x01bf, 0x01bf, 0x01c0),
+	},
+	{
+		.freq			= 5870,
+		RADIOREGS3(0xdb, 0x00, 0x01, 0x02, 0x4b, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x68, 0x00),
+		PHYREGS(0x0930, 0x092c, 0x0928, 0x01be, 0x01bf, 0x01bf),
+	},
+	{
+		.freq			= 5880,
+		RADIOREGS3(0xd8, 0x00, 0x01, 0x02, 0x4c, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x68, 0x00),
+		PHYREGS(0x0934, 0x0930, 0x092c, 0x01bd, 0x01be, 0x01bf),
+	},
+	{
+		.freq			= 5890,
+		RADIOREGS3(0xd6, 0x00, 0x01, 0x02, 0x4d, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x68, 0x00),
+		PHYREGS(0x0938, 0x0934, 0x0930, 0x01bc, 0x01bd, 0x01be),
+	},
+	{
+		.freq			= 5900,
+		RADIOREGS3(0xd3, 0x00, 0x01, 0x02, 0x4e, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x68, 0x00),
+		PHYREGS(0x093c, 0x0938, 0x0934, 0x01bc, 0x01bc, 0x01bd),
+	},
+	{
+		.freq			= 5910,
+		RADIOREGS3(0xd6, 0x00, 0x01, 0x02, 0x4f, 0x05, 0x05, 0x02,
+			   0x0c, 0x01, 0x06, 0x06, 0x06, 0x85, 0x03, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x04,
+			   0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77,
+			   0x00, 0x04, 0x00, 0x68, 0x00),
+		PHYREGS(0x0940, 0x093c, 0x0938, 0x01bb, 0x01bc, 0x01bc),
+	},
+	{
+		.freq			= 2412,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x6c, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x04, 0x04, 0x04, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x78, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0b, 0x00, 0x0a, 0x00, 0x89, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0b, 0x00, 0x0a),
+		PHYREGS(0x03c9, 0x03c5, 0x03c1, 0x043a, 0x043f, 0x0443),
+	},
+	{
+		.freq			= 2417,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x71, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x78, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0b, 0x00, 0x0a, 0x00, 0x89, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0b, 0x00, 0x0a),
+		PHYREGS(0x03cb, 0x03c7, 0x03c3, 0x0438, 0x043d, 0x0441),
+	},
+	{
+		.freq			= 2422,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x76, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x67, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0b, 0x00, 0x0a, 0x00, 0x89, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0b, 0x00, 0x0a),
+		PHYREGS(0x03cd, 0x03c9, 0x03c5, 0x0436, 0x043a, 0x043f),
+	},
+	{
+		.freq			= 2427,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x7b, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x57, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x0a, 0x00, 0x78, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x0a),
+		PHYREGS(0x03cf, 0x03cb, 0x03c7, 0x0434, 0x0438, 0x043d),
+	},
+	{
+		.freq			= 2432,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x80, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x56, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x0a, 0x00, 0x77, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x0a),
+		PHYREGS(0x03d1, 0x03cd, 0x03c9, 0x0431, 0x0436, 0x043a),
+	},
+	{
+		.freq			= 2437,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x85, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x46, 0x00, 0x03, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x0a, 0x00, 0x76, 0x00, 0x03, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x0a),
+		PHYREGS(0x03d3, 0x03cf, 0x03cb, 0x042f, 0x0434, 0x0438),
+	},
+	{
+		.freq			= 2442,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x8a, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x05, 0x05, 0x05, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x45, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x0a, 0x00, 0x66, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x0a),
+		PHYREGS(0x03d5, 0x03d1, 0x03cd, 0x042d, 0x0431, 0x0436),
+	},
+	{
+		.freq			= 2447,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x8f, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x06, 0x06, 0x06, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x34, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x09, 0x00, 0x55, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x09),
+		PHYREGS(0x03d7, 0x03d3, 0x03cf, 0x042b, 0x042f, 0x0434),
+	},
+	{
+		.freq			= 2452,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x94, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x06, 0x06, 0x06, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x23, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x09, 0x00, 0x45, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x09),
+		PHYREGS(0x03d9, 0x03d5, 0x03d1, 0x0429, 0x042d, 0x0431),
+	},
+	{
+		.freq			= 2457,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x99, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x06, 0x06, 0x06, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x12, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x0a, 0x00, 0x09, 0x00, 0x34, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x0a, 0x00, 0x09),
+		PHYREGS(0x03db, 0x03d7, 0x03d3, 0x0427, 0x042b, 0x042f),
+	},
+	{
+		.freq			= 2462,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0x9e, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x06, 0x06, 0x06, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x09, 0x00, 0x09, 0x00, 0x33, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x09, 0x00, 0x09),
+		PHYREGS(0x03dd, 0x03d9, 0x03d5, 0x0424, 0x0429, 0x042d),
+	},
+	{
+		.freq			= 2467,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0xa3, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x06, 0x06, 0x06, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x09, 0x00, 0x09, 0x00, 0x22, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x09, 0x00, 0x09),
+		PHYREGS(0x03df, 0x03db, 0x03d7, 0x0422, 0x0427, 0x042b),
+	},
+	{
+		.freq			= 2472,
+		RADIOREGS3(0x00, 0x01, 0x03, 0x09, 0xa8, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x07, 0x07, 0x07, 0x8f, 0x30, 0x00,
+			   0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x09, 0x00, 0x09, 0x00, 0x11, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x09, 0x00, 0x09),
+		PHYREGS(0x03e1, 0x03dd, 0x03d9, 0x0420, 0x0424, 0x0429),
+	},
+	{
+		.freq			= 2484,
+		RADIOREGS3(0xff, 0x01, 0x03, 0x09, 0xb4, 0x06, 0x06, 0x04,
+			   0x2b, 0x01, 0x07, 0x07, 0x07, 0x8f, 0x20, 0x00,
+			   0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x70, 0x00,
+			   0x09, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00,
+			   0x70, 0x00, 0x09, 0x00, 0x09),
+		PHYREGS(0x03e6, 0x03e2, 0x03de, 0x041b, 0x041f, 0x0424),
+	},
+};
+
+static const struct b2056_inittabs_pts
+*b43_nphy_get_inittabs_rev3(struct b43_wldev *dev)
+{
+	struct b43_phy *phy = &dev->phy;
+
+	switch (dev->phy.rev) {
+	case 3:
+		return &b2056_inittab_phy_rev3;
+	case 4:
+		return &b2056_inittab_phy_rev4;
+	default:
+		switch (phy->radio_rev) {
+		case 5:
+			return &b2056_inittab_radio_rev5;
+		case 6:
+			return &b2056_inittab_radio_rev6;
+		case 7:
+		case 9:
+			return &b2056_inittab_radio_rev7_9;
+		case 8:
+			return &b2056_inittab_radio_rev8;
+		case 11:
+			return &b2056_inittab_radio_rev11;
+		}
+	}
+
+	return NULL;
+}
+
 static void b2056_upload_inittab(struct b43_wldev *dev, bool ghz5,
 				 bool ignore_uploadflag, u16 routing,
 				 const struct b2056_inittab_entry *e,
@@ -9037,11 +10230,11 @@
 {
 	const struct b2056_inittabs_pts *pts;
 
-	if (dev->phy.rev >= ARRAY_SIZE(b2056_inittabs)) {
+	pts = b43_nphy_get_inittabs_rev3(dev);
+	if (!pts) {
 		B43_WARN_ON(1);
 		return;
 	}
-	pts = &b2056_inittabs[dev->phy.rev];
 
 	b2056_upload_inittab(dev, ghz5, ignore_uploadflag,
 				B2056_SYN, pts->syn, pts->syn_length);
@@ -9060,11 +10253,12 @@
 	const struct b2056_inittabs_pts *pts;
 	const struct b2056_inittab_entry *e;
 
-	if (dev->phy.rev >= ARRAY_SIZE(b2056_inittabs)) {
+	pts = b43_nphy_get_inittabs_rev3(dev);
+	if (!pts) {
 		B43_WARN_ON(1);
 		return;
 	}
-	pts = &b2056_inittabs[dev->phy.rev];
+
 	e = &pts->syn[B2056_SYN_PLL_CP2];
 
 	b43_radio_write(dev, B2056_SYN_PLL_CP2, ghz5 ? e->ghz5 : e->ghz2);
@@ -9073,38 +10267,46 @@
 const struct b43_nphy_channeltab_entry_rev3 *
 b43_nphy_get_chantabent_rev3(struct b43_wldev *dev, u16 freq)
 {
+	struct b43_phy *phy = &dev->phy;
 	const struct b43_nphy_channeltab_entry_rev3 *e;
 	unsigned int length, i;
 
-	switch (dev->phy.rev) {
+	switch (phy->rev) {
 	case 3:
-		e = b43_nphy_channeltab_rev3;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev3);
+		e = b43_nphy_channeltab_phy_rev3;
+		length = ARRAY_SIZE(b43_nphy_channeltab_phy_rev3);
 		break;
 	case 4:
-		e = b43_nphy_channeltab_rev4;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev4);
-		break;
-	case 5:
-		e = b43_nphy_channeltab_rev5;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev5);
-		break;
-	case 6:
-		e = b43_nphy_channeltab_rev6;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev6);
-		break;
-	case 7:
-	case 9:
-		e = b43_nphy_channeltab_rev7_9;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev7_9);
-		break;
-	case 8:
-		e = b43_nphy_channeltab_rev8;
-		length = ARRAY_SIZE(b43_nphy_channeltab_rev8);
+		e = b43_nphy_channeltab_phy_rev4;
+		length = ARRAY_SIZE(b43_nphy_channeltab_phy_rev4);
 		break;
 	default:
-		B43_WARN_ON(1);
-		return NULL;
+		switch (phy->radio_rev) {
+		case 5:
+			e = b43_nphy_channeltab_radio_rev5;
+			length = ARRAY_SIZE(b43_nphy_channeltab_radio_rev5);
+			break;
+		case 6:
+			e = b43_nphy_channeltab_radio_rev6;
+			length = ARRAY_SIZE(b43_nphy_channeltab_radio_rev6);
+			break;
+		case 7:
+		case 9:
+			e = b43_nphy_channeltab_radio_rev7_9;
+			length = ARRAY_SIZE(b43_nphy_channeltab_radio_rev7_9);
+			break;
+		case 8:
+			e = b43_nphy_channeltab_radio_rev8;
+			length = ARRAY_SIZE(b43_nphy_channeltab_radio_rev8);
+			break;
+		case 11:
+			e = b43_nphy_channeltab_radio_rev11;
+			length = ARRAY_SIZE(b43_nphy_channeltab_radio_rev11);
+			break;
+		default:
+			B43_WARN_ON(1);
+			return NULL;
+		}
 	}
 
 	for (i = 0; i < length; i++, e++) {

diff --git a/drivers/net/wireless/b43/tables_nphy.c b/drivers/net/wireless/b43/tables_nphy.c
index 94c755f..4047c05 100644
--- a/drivers/net/wireless/b43/tables_nphy.c
+++ b/drivers/net/wireless/b43/tables_nphy.c

@@ -1627,74 +1627,7 @@
 	0xfa58fc00, 0x0b64fc7e, 0x0800f7b6, 0x00f006be,
 };
 
-static const u32 b43_ntab_noisevar0_r3[] = {
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
-};
-
-static const u32 b43_ntab_noisevar1_r3[] = {
+static const u32 b43_ntab_noisevar_r3[] = {
 	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
 	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
 	0x02110211, 0x0000014d, 0x02110211, 0x0000014d,
@@ -3109,31 +3042,32 @@
 		antswlut = sprom->fem.ghz2.antswlut;
 
 	/* Static tables */
-	ntab_upload(dev, B43_NTAB_FRAMESTRUCT_R3, b43_ntab_framestruct_r3);
-	ntab_upload(dev, B43_NTAB_PILOT_R3, b43_ntab_pilot_r3);
-	ntab_upload(dev, B43_NTAB_TMAP_R3, b43_ntab_tmap_r3);
-	ntab_upload(dev, B43_NTAB_INTLEVEL_R3, b43_ntab_intlevel_r3);
-	ntab_upload(dev, B43_NTAB_TDTRN_R3, b43_ntab_tdtrn_r3);
-	ntab_upload(dev, B43_NTAB_NOISEVAR0_R3, b43_ntab_noisevar0_r3);
-	ntab_upload(dev, B43_NTAB_NOISEVAR1_R3, b43_ntab_noisevar1_r3);
-	ntab_upload(dev, B43_NTAB_MCS_R3, b43_ntab_mcs_r3);
-	ntab_upload(dev, B43_NTAB_TDI20A0_R3, b43_ntab_tdi20a0_r3);
-	ntab_upload(dev, B43_NTAB_TDI20A1_R3, b43_ntab_tdi20a1_r3);
-	ntab_upload(dev, B43_NTAB_TDI40A0_R3, b43_ntab_tdi40a0_r3);
-	ntab_upload(dev, B43_NTAB_TDI40A1_R3, b43_ntab_tdi40a1_r3);
-	ntab_upload(dev, B43_NTAB_PILOTLT_R3, b43_ntab_pilotlt_r3);
-	ntab_upload(dev, B43_NTAB_CHANEST_R3, b43_ntab_channelest_r3);
-	ntab_upload(dev, B43_NTAB_FRAMELT_R3, b43_ntab_framelookup_r3);
-	ntab_upload(dev, B43_NTAB_C0_ESTPLT_R3, b43_ntab_estimatepowerlt0_r3);
-	ntab_upload(dev, B43_NTAB_C1_ESTPLT_R3, b43_ntab_estimatepowerlt1_r3);
-	ntab_upload(dev, B43_NTAB_C0_ADJPLT_R3, b43_ntab_adjustpower0_r3);
-	ntab_upload(dev, B43_NTAB_C1_ADJPLT_R3, b43_ntab_adjustpower1_r3);
-	ntab_upload(dev, B43_NTAB_C0_GAINCTL_R3, b43_ntab_gainctl0_r3);
-	ntab_upload(dev, B43_NTAB_C1_GAINCTL_R3, b43_ntab_gainctl1_r3);
-	ntab_upload(dev, B43_NTAB_C0_IQLT_R3, b43_ntab_iqlt0_r3);
-	ntab_upload(dev, B43_NTAB_C1_IQLT_R3, b43_ntab_iqlt1_r3);
-	ntab_upload(dev, B43_NTAB_C0_LOFEEDTH_R3, b43_ntab_loftlt0_r3);
-	ntab_upload(dev, B43_NTAB_C1_LOFEEDTH_R3, b43_ntab_loftlt1_r3);
+	if (dev->phy.do_full_init) {
+		ntab_upload(dev, B43_NTAB_FRAMESTRUCT_R3, b43_ntab_framestruct_r3);
+		ntab_upload(dev, B43_NTAB_PILOT_R3, b43_ntab_pilot_r3);
+		ntab_upload(dev, B43_NTAB_TMAP_R3, b43_ntab_tmap_r3);
+		ntab_upload(dev, B43_NTAB_INTLEVEL_R3, b43_ntab_intlevel_r3);
+		ntab_upload(dev, B43_NTAB_TDTRN_R3, b43_ntab_tdtrn_r3);
+		ntab_upload(dev, B43_NTAB_NOISEVAR_R3, b43_ntab_noisevar_r3);
+		ntab_upload(dev, B43_NTAB_MCS_R3, b43_ntab_mcs_r3);
+		ntab_upload(dev, B43_NTAB_TDI20A0_R3, b43_ntab_tdi20a0_r3);
+		ntab_upload(dev, B43_NTAB_TDI20A1_R3, b43_ntab_tdi20a1_r3);
+		ntab_upload(dev, B43_NTAB_TDI40A0_R3, b43_ntab_tdi40a0_r3);
+		ntab_upload(dev, B43_NTAB_TDI40A1_R3, b43_ntab_tdi40a1_r3);
+		ntab_upload(dev, B43_NTAB_PILOTLT_R3, b43_ntab_pilotlt_r3);
+		ntab_upload(dev, B43_NTAB_CHANEST_R3, b43_ntab_channelest_r3);
+		ntab_upload(dev, B43_NTAB_FRAMELT_R3, b43_ntab_framelookup_r3);
+		ntab_upload(dev, B43_NTAB_C0_ESTPLT_R3, b43_ntab_estimatepowerlt0_r3);
+		ntab_upload(dev, B43_NTAB_C1_ESTPLT_R3, b43_ntab_estimatepowerlt1_r3);
+		ntab_upload(dev, B43_NTAB_C0_ADJPLT_R3, b43_ntab_adjustpower0_r3);
+		ntab_upload(dev, B43_NTAB_C1_ADJPLT_R3, b43_ntab_adjustpower1_r3);
+		ntab_upload(dev, B43_NTAB_C0_GAINCTL_R3, b43_ntab_gainctl0_r3);
+		ntab_upload(dev, B43_NTAB_C1_GAINCTL_R3, b43_ntab_gainctl1_r3);
+		ntab_upload(dev, B43_NTAB_C0_IQLT_R3, b43_ntab_iqlt0_r3);
+		ntab_upload(dev, B43_NTAB_C1_IQLT_R3, b43_ntab_iqlt1_r3);
+		ntab_upload(dev, B43_NTAB_C0_LOFEEDTH_R3, b43_ntab_loftlt0_r3);
+		ntab_upload(dev, B43_NTAB_C1_LOFEEDTH_R3, b43_ntab_loftlt1_r3);
+	}
 
 	/* Volatile tables */
 	if (antswlut < ARRAY_SIZE(b43_ntab_antswctl_r3))
@@ -3146,20 +3080,22 @@
 static void b43_nphy_tables_init_rev0(struct b43_wldev *dev)
 {
 	/* Static tables */
-	ntab_upload(dev, B43_NTAB_FRAMESTRUCT, b43_ntab_framestruct);
-	ntab_upload(dev, B43_NTAB_FRAMELT, b43_ntab_framelookup);
-	ntab_upload(dev, B43_NTAB_TMAP, b43_ntab_tmap);
-	ntab_upload(dev, B43_NTAB_TDTRN, b43_ntab_tdtrn);
-	ntab_upload(dev, B43_NTAB_INTLEVEL, b43_ntab_intlevel);
-	ntab_upload(dev, B43_NTAB_PILOT, b43_ntab_pilot);
-	ntab_upload(dev, B43_NTAB_TDI20A0, b43_ntab_tdi20a0);
-	ntab_upload(dev, B43_NTAB_TDI20A1, b43_ntab_tdi20a1);
-	ntab_upload(dev, B43_NTAB_TDI40A0, b43_ntab_tdi40a0);
-	ntab_upload(dev, B43_NTAB_TDI40A1, b43_ntab_tdi40a1);
-	ntab_upload(dev, B43_NTAB_CHANEST, b43_ntab_channelest);
-	ntab_upload(dev, B43_NTAB_MCS, b43_ntab_mcs);
-	ntab_upload(dev, B43_NTAB_NOISEVAR10, b43_ntab_noisevar10);
-	ntab_upload(dev, B43_NTAB_NOISEVAR11, b43_ntab_noisevar11);
+	if (dev->phy.do_full_init) {
+		ntab_upload(dev, B43_NTAB_FRAMESTRUCT, b43_ntab_framestruct);
+		ntab_upload(dev, B43_NTAB_FRAMELT, b43_ntab_framelookup);
+		ntab_upload(dev, B43_NTAB_TMAP, b43_ntab_tmap);
+		ntab_upload(dev, B43_NTAB_TDTRN, b43_ntab_tdtrn);
+		ntab_upload(dev, B43_NTAB_INTLEVEL, b43_ntab_intlevel);
+		ntab_upload(dev, B43_NTAB_PILOT, b43_ntab_pilot);
+		ntab_upload(dev, B43_NTAB_TDI20A0, b43_ntab_tdi20a0);
+		ntab_upload(dev, B43_NTAB_TDI20A1, b43_ntab_tdi20a1);
+		ntab_upload(dev, B43_NTAB_TDI40A0, b43_ntab_tdi40a0);
+		ntab_upload(dev, B43_NTAB_TDI40A1, b43_ntab_tdi40a1);
+		ntab_upload(dev, B43_NTAB_CHANEST, b43_ntab_channelest);
+		ntab_upload(dev, B43_NTAB_MCS, b43_ntab_mcs);
+		ntab_upload(dev, B43_NTAB_NOISEVAR10, b43_ntab_noisevar10);
+		ntab_upload(dev, B43_NTAB_NOISEVAR11, b43_ntab_noisevar11);
+	}
 
 	/* Volatile tables */
 	ntab_upload(dev, B43_NTAB_BDI, b43_ntab_bdi);

diff --git a/drivers/net/wireless/b43/tables_nphy.h b/drivers/net/wireless/b43/tables_nphy.h
index 9ff33ad..3a58aee 100644
--- a/drivers/net/wireless/b43/tables_nphy.h
+++ b/drivers/net/wireless/b43/tables_nphy.h

@@ -143,8 +143,7 @@
 #define B43_NTAB_TMAP_R3		B43_NTAB32(12,   0) /* TM AP  */
 #define B43_NTAB_INTLEVEL_R3		B43_NTAB32(13,   0) /* INT LV  */
 #define B43_NTAB_TDTRN_R3		B43_NTAB32(14,   0) /* TD TRN  */
-#define B43_NTAB_NOISEVAR0_R3		B43_NTAB32(16,   0) /* noise variance 0  */
-#define B43_NTAB_NOISEVAR1_R3		B43_NTAB32(16, 128) /* noise variance 1  */
+#define B43_NTAB_NOISEVAR_R3		B43_NTAB32(16,   0) /* noise variance */
 #define B43_NTAB_MCS_R3			B43_NTAB16(18,   0) /* MCS  */
 #define B43_NTAB_TDI20A0_R3		B43_NTAB32(19, 128) /* TDI 20/0  */
 #define B43_NTAB_TDI20A1_R3		B43_NTAB32(19, 256) /* TDI 20/1  */

diff --git a/drivers/net/wireless/b43/wa.c b/drivers/net/wireless/b43/wa.c
index 9b1a038..c218c08 100644
--- a/drivers/net/wireless/b43/wa.c
+++ b/drivers/net/wireless/b43/wa.c

@@ -441,7 +441,7 @@
 
 static void b43_wa_tr_ltov(struct b43_wldev *dev) /* TR Lookup Table Original Values */
 {
-	b43_gtab_write(dev, B43_GTAB_ORIGTR, 0, 0xC480);
+	b43_gtab_write(dev, B43_GTAB_ORIGTR, 0, 0x7654);
 }
 
 static void b43_wa_cpll_nonpilot(struct b43_wldev *dev)

diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 31adb8c..4f38f19 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c

@@ -408,7 +408,7 @@
 		mac_ctl |= B43_TXH_MAC_HWSEQ;
 	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		mac_ctl |= B43_TXH_MAC_STMSDU;
-	if (phy->type == B43_PHYTYPE_A)
+	if (!phy->gmode)
 		mac_ctl |= B43_TXH_MAC_5GHZ;
 
 	/* Overwrite rates[0].count to make the retry calculation

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/Makefile b/drivers/net/wireless/brcm80211/brcmfmac/Makefile
index 1d2ceac..98e67c1 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/Makefile
+++ b/drivers/net/wireless/brcm80211/brcmfmac/Makefile

@@ -33,7 +33,7 @@
 		bcdc.o \
 		dhd_common.o \
 		dhd_linux.o \
-		nvram.o \
+		firmware.o \
 		btcoex.o
 brcmfmac-$(CONFIG_BRCMFMAC_SDIO) += \
 		dhd_sdio.o \

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd.h b/drivers/net/wireless/brcm80211/brcmfmac/dhd.h
index 939d6b1..16f9ab2 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd.h
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd.h

@@ -186,7 +186,7 @@
 void brcmf_txflowblock_if(struct brcmf_if *ifp,
 			  enum brcmf_netif_stop_reason reason, bool state);
 u32 brcmf_get_chip_info(struct brcmf_if *ifp);
-void brcmf_txfinalize(struct brcmf_pub *drvr, struct sk_buff *txp,
+void brcmf_txfinalize(struct brcmf_pub *drvr, struct sk_buff *txp, u8 ifidx,
 		      bool success);
 
 /* Sets dongle media info (drv_version, mac address). */

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h b/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h
index c453561..7735328 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h

@@ -63,7 +63,6 @@
  */
 struct brcmf_bus_ops {
 	int (*preinit)(struct device *dev);
-	int (*init)(struct device *dev);
 	void (*stop)(struct device *dev);
 	int (*txdata)(struct device *dev, struct sk_buff *skb);
 	int (*txctl)(struct device *dev, unsigned char *msg, uint len);
@@ -99,6 +98,7 @@
 	unsigned long tx_realloc;
 	u32 chip;
 	u32 chiprev;
+	bool always_use_fws_queue;
 
 	struct brcmf_bus_ops *ops;
 };
@@ -113,11 +113,6 @@
 	return bus->ops->preinit(bus->dev);
 }
 
-static inline int brcmf_bus_init(struct brcmf_bus *bus)
-{
-	return bus->ops->init(bus->dev);
-}
-
 static inline void brcmf_bus_stop(struct brcmf_bus *bus)
 {
 	bus->ops->stop(bus->dev);

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c
index 6a8983a..ed3e32c 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c

@@ -32,6 +32,9 @@
 #define BRCMF_DEFAULT_SCAN_UNASSOC_TIME	40
 #define BRCMF_DEFAULT_PACKET_FILTER	"100 0 0 0 0x01 0x00"
 
+/* boost value for RSSI_DELTA in preferred join selection */
+#define BRCMF_JOIN_PREF_RSSI_BOOST	8
+
 
 bool brcmf_c_prec_enq(struct device *dev, struct pktq *q,
 		      struct sk_buff *pkt, int prec)
@@ -246,6 +249,7 @@
 {
 	s8 eventmask[BRCMF_EVENTING_MASK_LEN];
 	u8 buf[BRCMF_DCMD_SMLEN];
+	struct brcmf_join_pref_params join_pref_params[2];
 	char *ptr;
 	s32 err;
 
@@ -298,6 +302,20 @@
 		goto done;
 	}
 
+	/* Setup join_pref to select target by RSSI(with boost on 5GHz) */
+	join_pref_params[0].type = BRCMF_JOIN_PREF_RSSI_DELTA;
+	join_pref_params[0].len = 2;
+	join_pref_params[0].rssi_gain = BRCMF_JOIN_PREF_RSSI_BOOST;
+	join_pref_params[0].band = WLC_BAND_5G;
+	join_pref_params[1].type = BRCMF_JOIN_PREF_RSSI;
+	join_pref_params[1].len = 2;
+	join_pref_params[1].rssi_gain = 0;
+	join_pref_params[1].band = 0;
+	err = brcmf_fil_iovar_data_set(ifp, "join_pref", join_pref_params,
+				       sizeof(join_pref_params));
+	if (err)
+		brcmf_err("Set join_pref error (%d)\n", err);
+
 	/* Setup event_msgs, enable E_IF */
 	err = brcmf_fil_iovar_data_get(ifp, "event_msgs", eventmask,
 				       BRCMF_EVENTING_MASK_LEN);

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
index 7d28cd3..09dd8c1 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c

@@ -190,7 +190,7 @@
 	int ret;
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	struct brcmf_pub *drvr = ifp->drvr;
-	struct ethhdr *eh;
+	struct ethhdr *eh = (struct ethhdr *)(skb->data);
 
 	brcmf_dbg(DATA, "Enter, idx=%d\n", ifp->bssidx);
 
@@ -236,6 +236,9 @@
 		goto done;
 	}
 
+	if (eh->h_proto == htons(ETH_P_PAE))
+		atomic_inc(&ifp->pend_8021x_cnt);
+
 	ret = brcmf_fws_process_skb(ifp, skb);
 
 done:
@@ -538,31 +541,26 @@
 		brcmf_netif_rx(ifp, skb);
 }
 
-void brcmf_txfinalize(struct brcmf_pub *drvr, struct sk_buff *txp,
+void brcmf_txfinalize(struct brcmf_pub *drvr, struct sk_buff *txp, u8 ifidx,
 		      bool success)
 {
 	struct brcmf_if *ifp;
 	struct ethhdr *eh;
-	u8 ifidx;
 	u16 type;
-	int res;
-
-	res = brcmf_proto_hdrpull(drvr, false, &ifidx, txp);
 
 	ifp = drvr->iflist[ifidx];
 	if (!ifp)
 		goto done;
 
-	if (res == 0) {
-		eh = (struct ethhdr *)(txp->data);
-		type = ntohs(eh->h_proto);
+	eh = (struct ethhdr *)(txp->data);
+	type = ntohs(eh->h_proto);
 
-		if (type == ETH_P_PAE) {
-			atomic_dec(&ifp->pend_8021x_cnt);
-			if (waitqueue_active(&ifp->pend_8021x_wait))
-				wake_up(&ifp->pend_8021x_wait);
-		}
+	if (type == ETH_P_PAE) {
+		atomic_dec(&ifp->pend_8021x_cnt);
+		if (waitqueue_active(&ifp->pend_8021x_wait))
+			wake_up(&ifp->pend_8021x_wait);
 	}
+
 	if (!success)
 		ifp->stats.tx_errors++;
 done:
@@ -573,13 +571,17 @@
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
 	struct brcmf_pub *drvr = bus_if->drvr;
+	u8 ifidx;
 
 	/* await txstatus signal for firmware if active */
 	if (brcmf_fws_fc_active(drvr->fws)) {
 		if (!success)
 			brcmf_fws_bustxfail(drvr->fws, txp);
 	} else {
-		brcmf_txfinalize(drvr, txp, success);
+		if (brcmf_proto_hdrpull(drvr, false, &ifidx, txp))
+			brcmu_pkt_buf_free_skb(txp);
+		else
+			brcmf_txfinalize(drvr, txp, ifidx, success);
 	}
 }
 
@@ -914,13 +916,6 @@
 
 	brcmf_dbg(TRACE, "\n");
 
-	/* Bring up the bus */
-	ret = brcmf_bus_init(bus_if);
-	if (ret != 0) {
-		brcmf_err("brcmf_sdbrcm_bus_init failed %d\n", ret);
-		return ret;
-	}
-
 	/* add primary networking interface */
 	ifp = brcmf_add_if(drvr, 0, 0, "wlan%d", NULL);
 	if (IS_ERR(ifp))

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
index 13c89a0..8fa0dbb 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c

@@ -42,7 +42,7 @@
 #include <soc.h>
 #include "sdio_host.h"
 #include "chip.h"
-#include "nvram.h"
+#include "firmware.h"
 
 #define DCMD_RESP_TIMEOUT  2000	/* In milli second */
 
@@ -632,43 +632,28 @@
 	{ BCM4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) }
 };
 
-
-static const struct firmware *brcmf_sdio_get_fw(struct brcmf_sdio *bus,
-						  enum brcmf_firmware_type type)
+static const char *brcmf_sdio_get_fwname(struct brcmf_chip *ci,
+					 enum brcmf_firmware_type type)
 {
-	const struct firmware *fw;
-	const char *name;
-	int err, i;
+	int i;
 
 	for (i = 0; i < ARRAY_SIZE(brcmf_fwname_data); i++) {
-		if (brcmf_fwname_data[i].chipid == bus->ci->chip &&
-		    brcmf_fwname_data[i].revmsk & BIT(bus->ci->chiprev)) {
+		if (brcmf_fwname_data[i].chipid == ci->chip &&
+		    brcmf_fwname_data[i].revmsk & BIT(ci->chiprev)) {
 			switch (type) {
 			case BRCMF_FIRMWARE_BIN:
-				name = brcmf_fwname_data[i].bin;
-				break;
+				return brcmf_fwname_data[i].bin;
 			case BRCMF_FIRMWARE_NVRAM:
-				name = brcmf_fwname_data[i].nv;
-				break;
+				return brcmf_fwname_data[i].nv;
 			default:
 				brcmf_err("invalid firmware type (%d)\n", type);
 				return NULL;
 			}
-			goto found;
 		}
 	}
 	brcmf_err("Unknown chipid %d [%d]\n",
-		  bus->ci->chip, bus->ci->chiprev);
+		  ci->chip, ci->chiprev);
 	return NULL;
-
-found:
-	err = request_firmware(&fw, name, &bus->sdiodev->func[2]->dev);
-	if ((err) || (!fw)) {
-		brcmf_err("fail to request firmware %s (%d)\n", name, err);
-		return NULL;
-	}
-
-	return fw;
 }
 
 static void pkt_align(struct sk_buff *p, int len, int align)
@@ -3278,20 +3263,13 @@
 }
 
 static int brcmf_sdio_download_nvram(struct brcmf_sdio *bus,
-				     const struct firmware *nv)
+				     void *vars, u32 varsz)
 {
-	void *vars;
-	u32 varsz;
 	int address;
 	int err;
 
 	brcmf_dbg(TRACE, "Enter\n");
 
-	vars = brcmf_nvram_strip(nv, &varsz);
-
-	if (vars == NULL)
-		return -EINVAL;
-
 	address = bus->ci->ramsize - varsz + bus->ci->rambase;
 	err = brcmf_sdiod_ramrw(bus->sdiodev, true, address, vars, varsz);
 	if (err)
@@ -3300,15 +3278,14 @@
 	else if (!brcmf_sdio_verifymemory(bus->sdiodev, address, vars, varsz))
 		err = -EIO;
 
-	brcmf_nvram_free(vars);
-
 	return err;
 }
 
-static int brcmf_sdio_download_firmware(struct brcmf_sdio *bus)
+static int brcmf_sdio_download_firmware(struct brcmf_sdio *bus,
+					const struct firmware *fw,
+					void *nvram, u32 nvlen)
 {
 	int bcmerror = -EFAULT;
-	const struct firmware *fw;
 	u32 rstvec;
 
 	sdio_claim_host(bus->sdiodev->func[1]);
@@ -3317,12 +3294,6 @@
 	/* Keep arm in reset */
 	brcmf_chip_enter_download(bus->ci);
 
-	fw = brcmf_sdio_get_fw(bus, BRCMF_FIRMWARE_BIN);
-	if (fw == NULL) {
-		bcmerror = -ENOENT;
-		goto err;
-	}
-
 	rstvec = get_unaligned_le32(fw->data);
 	brcmf_dbg(SDIO, "firmware rstvec: %x\n", rstvec);
 
@@ -3330,17 +3301,12 @@
 	release_firmware(fw);
 	if (bcmerror) {
 		brcmf_err("dongle image file download failed\n");
+		brcmf_fw_nvram_free(nvram);
 		goto err;
 	}
 
-	fw = brcmf_sdio_get_fw(bus, BRCMF_FIRMWARE_NVRAM);
-	if (fw == NULL) {
-		bcmerror = -ENOENT;
-		goto err;
-	}
-
-	bcmerror = brcmf_sdio_download_nvram(bus, fw);
-	release_firmware(fw);
+	bcmerror = brcmf_sdio_download_nvram(bus, nvram, nvlen);
+	brcmf_fw_nvram_free(nvram);
 	if (bcmerror) {
 		brcmf_err("dongle nvram file download failed\n");
 		goto err;
@@ -3490,97 +3456,6 @@
 	return err;
 }
 
-static int brcmf_sdio_bus_init(struct device *dev)
-{
-	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	struct brcmf_sdio *bus = sdiodev->bus;
-	int err, ret = 0;
-	u8 saveclk;
-
-	brcmf_dbg(TRACE, "Enter\n");
-
-	/* try to download image and nvram to the dongle */
-	if (bus_if->state == BRCMF_BUS_DOWN) {
-		bus->alp_only = true;
-		err = brcmf_sdio_download_firmware(bus);
-		if (err)
-			return err;
-		bus->alp_only = false;
-	}
-
-	if (!bus->sdiodev->bus_if->drvr)
-		return 0;
-
-	/* Start the watchdog timer */
-	bus->sdcnt.tickcnt = 0;
-	brcmf_sdio_wd_timer(bus, BRCMF_WD_POLL_MS);
-
-	sdio_claim_host(bus->sdiodev->func[1]);
-
-	/* Make sure backplane clock is on, needed to generate F2 interrupt */
-	brcmf_sdio_clkctl(bus, CLK_AVAIL, false);
-	if (bus->clkstate != CLK_AVAIL)
-		goto exit;
-
-	/* Force clocks on backplane to be sure F2 interrupt propagates */
-	saveclk = brcmf_sdiod_regrb(bus->sdiodev,
-				    SBSDIO_FUNC1_CHIPCLKCSR, &err);
-	if (!err) {
-		brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
-				  (saveclk | SBSDIO_FORCE_HT), &err);
-	}
-	if (err) {
-		brcmf_err("Failed to force clock for F2: err %d\n", err);
-		goto exit;
-	}
-
-	/* Enable function 2 (frame transfers) */
-	w_sdreg32(bus, SDPCM_PROT_VERSION << SMB_DATA_VERSION_SHIFT,
-		  offsetof(struct sdpcmd_regs, tosbmailboxdata));
-	err = sdio_enable_func(bus->sdiodev->func[SDIO_FUNC_2]);
-
-
-	brcmf_dbg(INFO, "enable F2: err=%d\n", err);
-
-	/* If F2 successfully enabled, set core and enable interrupts */
-	if (!err) {
-		/* Set up the interrupt mask and enable interrupts */
-		bus->hostintmask = HOSTINTMASK;
-		w_sdreg32(bus, bus->hostintmask,
-			  offsetof(struct sdpcmd_regs, hostintmask));
-
-		brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_WATERMARK, 8, &err);
-	} else {
-		/* Disable F2 again */
-		sdio_disable_func(bus->sdiodev->func[SDIO_FUNC_2]);
-		ret = -ENODEV;
-	}
-
-	if (brcmf_chip_sr_capable(bus->ci)) {
-		brcmf_sdio_sr_init(bus);
-	} else {
-		/* Restore previous clock setting */
-		brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
-				  saveclk, &err);
-	}
-
-	if (ret == 0) {
-		ret = brcmf_sdiod_intr_register(bus->sdiodev);
-		if (ret != 0)
-			brcmf_err("intr register failed:%d\n", ret);
-	}
-
-	/* If we didn't come up, turn off backplane clock */
-	if (ret != 0)
-		brcmf_sdio_clkctl(bus, CLK_NONE, false);
-
-exit:
-	sdio_release_host(bus->sdiodev->func[1]);
-
-	return ret;
-}
-
 void brcmf_sdio_isr(struct brcmf_sdio *bus)
 {
 	brcmf_dbg(TRACE, "Enter\n");
@@ -4020,13 +3895,114 @@
 static struct brcmf_bus_ops brcmf_sdio_bus_ops = {
 	.stop = brcmf_sdio_bus_stop,
 	.preinit = brcmf_sdio_bus_preinit,
-	.init = brcmf_sdio_bus_init,
 	.txdata = brcmf_sdio_bus_txdata,
 	.txctl = brcmf_sdio_bus_txctl,
 	.rxctl = brcmf_sdio_bus_rxctl,
 	.gettxq = brcmf_sdio_bus_gettxq,
 };
 
+static void brcmf_sdio_firmware_callback(struct device *dev,
+					 const struct firmware *code,
+					 void *nvram, u32 nvram_len)
+{
+	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+	struct brcmf_sdio *bus = sdiodev->bus;
+	int err = 0;
+	u8 saveclk;
+
+	brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev));
+
+	/* try to download image and nvram to the dongle */
+	if (bus_if->state == BRCMF_BUS_DOWN) {
+		bus->alp_only = true;
+		err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
+		if (err)
+			goto fail;
+		bus->alp_only = false;
+	}
+
+	if (!bus_if->drvr)
+		return;
+
+	/* Start the watchdog timer */
+	bus->sdcnt.tickcnt = 0;
+	brcmf_sdio_wd_timer(bus, BRCMF_WD_POLL_MS);
+
+	sdio_claim_host(sdiodev->func[1]);
+
+	/* Make sure backplane clock is on, needed to generate F2 interrupt */
+	brcmf_sdio_clkctl(bus, CLK_AVAIL, false);
+	if (bus->clkstate != CLK_AVAIL)
+		goto release;
+
+	/* Force clocks on backplane to be sure F2 interrupt propagates */
+	saveclk = brcmf_sdiod_regrb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR, &err);
+	if (!err) {
+		brcmf_sdiod_regwb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+				  (saveclk | SBSDIO_FORCE_HT), &err);
+	}
+	if (err) {
+		brcmf_err("Failed to force clock for F2: err %d\n", err);
+		goto release;
+	}
+
+	/* Enable function 2 (frame transfers) */
+	w_sdreg32(bus, SDPCM_PROT_VERSION << SMB_DATA_VERSION_SHIFT,
+		  offsetof(struct sdpcmd_regs, tosbmailboxdata));
+	err = sdio_enable_func(sdiodev->func[SDIO_FUNC_2]);
+
+
+	brcmf_dbg(INFO, "enable F2: err=%d\n", err);
+
+	/* If F2 successfully enabled, set core and enable interrupts */
+	if (!err) {
+		/* Set up the interrupt mask and enable interrupts */
+		bus->hostintmask = HOSTINTMASK;
+		w_sdreg32(bus, bus->hostintmask,
+			  offsetof(struct sdpcmd_regs, hostintmask));
+
+		brcmf_sdiod_regwb(sdiodev, SBSDIO_WATERMARK, 8, &err);
+	} else {
+		/* Disable F2 again */
+		sdio_disable_func(sdiodev->func[SDIO_FUNC_2]);
+		goto release;
+	}
+
+	if (brcmf_chip_sr_capable(bus->ci)) {
+		brcmf_sdio_sr_init(bus);
+	} else {
+		/* Restore previous clock setting */
+		brcmf_sdiod_regwb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+				  saveclk, &err);
+	}
+
+	if (err == 0) {
+		err = brcmf_sdiod_intr_register(sdiodev);
+		if (err != 0)
+			brcmf_err("intr register failed:%d\n", err);
+	}
+
+	/* If we didn't come up, turn off backplane clock */
+	if (err != 0)
+		brcmf_sdio_clkctl(bus, CLK_NONE, false);
+
+	sdio_release_host(sdiodev->func[1]);
+
+	err = brcmf_bus_start(dev);
+	if (err != 0) {
+		brcmf_err("dongle is not responding\n");
+		goto fail;
+	}
+	return;
+
+release:
+	sdio_release_host(sdiodev->func[1]);
+fail:
+	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
+	device_release_driver(dev);
+}
+
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 {
 	int ret;
@@ -4110,8 +4086,13 @@
 		goto fail;
 	}
 
+	/* Query the F2 block size, set roundup accordingly */
+	bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
+	bus->roundup = min(max_roundup, bus->blocksize);
+
 	/* Allocate buffers */
 	if (bus->sdiodev->bus_if->maxctl) {
+		bus->sdiodev->bus_if->maxctl += bus->roundup;
 		bus->rxblen =
 		    roundup((bus->sdiodev->bus_if->maxctl + SDPCM_HDRLEN),
 			    ALIGNMENT) + bus->head_align;
@@ -4139,10 +4120,6 @@
 	bus->idletime = BRCMF_IDLE_INTERVAL;
 	bus->idleclock = BRCMF_IDLE_ACTIVE;
 
-	/* Query the F2 block size, set roundup accordingly */
-	bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
-	bus->roundup = min(max_roundup, bus->blocksize);
-
 	/* SR state */
 	bus->sleeping = false;
 	bus->sr_enabled = false;
@@ -4150,10 +4127,14 @@
 	brcmf_sdio_debugfs_create(bus);
 	brcmf_dbg(INFO, "completed!!\n");
 
-	/* if firmware path present try to download and bring up bus */
-	ret = brcmf_bus_start(bus->sdiodev->dev);
+	ret = brcmf_fw_get_firmwares(sdiodev->dev, BRCMF_FW_REQUEST_NVRAM,
+				     brcmf_sdio_get_fwname(bus->ci,
+							   BRCMF_FIRMWARE_BIN),
+				     brcmf_sdio_get_fwname(bus->ci,
+							   BRCMF_FIRMWARE_NVRAM),
+				     brcmf_sdio_firmware_callback);
 	if (ret != 0) {
-		brcmf_err("dongle is not responding\n");
+		brcmf_err("async firmware request failed: %d\n", ret);
 		goto fail;
 	}
 
@@ -4173,9 +4154,7 @@
 		/* De-register interrupt handler */
 		brcmf_sdiod_intr_unregister(bus->sdiodev);
 
-		if (bus->sdiodev->bus_if->drvr) {
-			brcmf_detach(bus->sdiodev->dev);
-		}
+		brcmf_detach(bus->sdiodev->dev);
 
 		cancel_work_sync(&bus->datawork);
 		if (bus->brcmf_wq)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/brcm80211/brcmfmac/firmware.c
new file mode 100644
index 0000000..7b7d237
--- /dev/null
+++ b/drivers/net/wireless/brcm80211/brcmfmac/firmware.c

@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2013 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+
+#include "dhd_dbg.h"
+#include "firmware.h"
+
+enum nvram_parser_state {
+	IDLE,
+	KEY,
+	VALUE,
+	COMMENT,
+	END
+};
+
+/**
+ * struct nvram_parser - internal info for parser.
+ *
+ * @state: current parser state.
+ * @fwnv: input buffer being parsed.
+ * @nvram: output buffer with parse result.
+ * @nvram_len: lenght of parse result.
+ * @line: current line.
+ * @column: current column in line.
+ * @pos: byte offset in input buffer.
+ * @entry: start position of key,value entry.
+ */
+struct nvram_parser {
+	enum nvram_parser_state state;
+	const struct firmware *fwnv;
+	u8 *nvram;
+	u32 nvram_len;
+	u32 line;
+	u32 column;
+	u32 pos;
+	u32 entry;
+};
+
+static bool is_nvram_char(char c)
+{
+	/* comment marker excluded */
+	if (c == '#')
+		return false;
+
+	/* key and value may have any other readable character */
+	return (c > 0x20 && c < 0x7f);
+}
+
+static bool is_whitespace(char c)
+{
+	return (c == ' ' || c == '\r' || c == '\n' || c == '\t');
+}
+
+static enum nvram_parser_state brcmf_nvram_handle_idle(struct nvram_parser *nvp)
+{
+	char c;
+
+	c = nvp->fwnv->data[nvp->pos];
+	if (c == '\n')
+		return COMMENT;
+	if (is_whitespace(c))
+		goto proceed;
+	if (c == '#')
+		return COMMENT;
+	if (is_nvram_char(c)) {
+		nvp->entry = nvp->pos;
+		return KEY;
+	}
+	brcmf_dbg(INFO, "warning: ln=%d:col=%d: ignoring invalid character\n",
+		  nvp->line, nvp->column);
+proceed:
+	nvp->column++;
+	nvp->pos++;
+	return IDLE;
+}
+
+static enum nvram_parser_state brcmf_nvram_handle_key(struct nvram_parser *nvp)
+{
+	enum nvram_parser_state st = nvp->state;
+	char c;
+
+	c = nvp->fwnv->data[nvp->pos];
+	if (c == '=') {
+		st = VALUE;
+	} else if (!is_nvram_char(c)) {
+		brcmf_dbg(INFO, "warning: ln=%d:col=%d: '=' expected, skip invalid key entry\n",
+			  nvp->line, nvp->column);
+		return COMMENT;
+	}
+
+	nvp->column++;
+	nvp->pos++;
+	return st;
+}
+
+static enum nvram_parser_state
+brcmf_nvram_handle_value(struct nvram_parser *nvp)
+{
+	char c;
+	char *skv;
+	char *ekv;
+	u32 cplen;
+
+	c = nvp->fwnv->data[nvp->pos];
+	if (!is_nvram_char(c)) {
+		/* key,value pair complete */
+		ekv = (u8 *)&nvp->fwnv->data[nvp->pos];
+		skv = (u8 *)&nvp->fwnv->data[nvp->entry];
+		cplen = ekv - skv;
+		/* copy to output buffer */
+		memcpy(&nvp->nvram[nvp->nvram_len], skv, cplen);
+		nvp->nvram_len += cplen;
+		nvp->nvram[nvp->nvram_len] = '\0';
+		nvp->nvram_len++;
+		return IDLE;
+	}
+	nvp->pos++;
+	nvp->column++;
+	return VALUE;
+}
+
+static enum nvram_parser_state
+brcmf_nvram_handle_comment(struct nvram_parser *nvp)
+{
+	char *eol, *sol;
+
+	sol = (char *)&nvp->fwnv->data[nvp->pos];
+	eol = strchr(sol, '\n');
+	if (eol == NULL)
+		return END;
+
+	/* eat all moving to next line */
+	nvp->line++;
+	nvp->column = 1;
+	nvp->pos += (eol - sol) + 1;
+	return IDLE;
+}
+
+static enum nvram_parser_state brcmf_nvram_handle_end(struct nvram_parser *nvp)
+{
+	/* final state */
+	return END;
+}
+
+static enum nvram_parser_state
+(*nv_parser_states[])(struct nvram_parser *nvp) = {
+	brcmf_nvram_handle_idle,
+	brcmf_nvram_handle_key,
+	brcmf_nvram_handle_value,
+	brcmf_nvram_handle_comment,
+	brcmf_nvram_handle_end
+};
+
+static int brcmf_init_nvram_parser(struct nvram_parser *nvp,
+				   const struct firmware *nv)
+{
+	memset(nvp, 0, sizeof(*nvp));
+	nvp->fwnv = nv;
+	/* Alloc for extra 0 byte + roundup by 4 + length field */
+	nvp->nvram = kzalloc(nv->size + 1 + 3 + sizeof(u32), GFP_KERNEL);
+	if (!nvp->nvram)
+		return -ENOMEM;
+
+	nvp->line = 1;
+	nvp->column = 1;
+	return 0;
+}
+
+/* brcmf_nvram_strip :Takes a buffer of "<var>=<value>\n" lines read from a fil
+ * and ending in a NUL. Removes carriage returns, empty lines, comment lines,
+ * and converts newlines to NULs. Shortens buffer as needed and pads with NULs.
+ * End of buffer is completed with token identifying length of buffer.
+ */
+static void *brcmf_fw_nvram_strip(const struct firmware *nv, u32 *new_length)
+{
+	struct nvram_parser nvp;
+	u32 pad;
+	u32 token;
+	__le32 token_le;
+
+	if (brcmf_init_nvram_parser(&nvp, nv) < 0)
+		return NULL;
+
+	while (nvp.pos < nv->size) {
+		nvp.state = nv_parser_states[nvp.state](&nvp);
+		if (nvp.state == END)
+			break;
+	}
+	pad = nvp.nvram_len;
+	*new_length = roundup(nvp.nvram_len + 1, 4);
+	while (pad != *new_length) {
+		nvp.nvram[pad] = 0;
+		pad++;
+	}
+
+	token = *new_length / 4;
+	token = (~token << 16) | (token & 0x0000FFFF);
+	token_le = cpu_to_le32(token);
+
+	memcpy(&nvp.nvram[*new_length], &token_le, sizeof(token_le));
+	*new_length += sizeof(token_le);
+
+	return nvp.nvram;
+}
+
+void brcmf_fw_nvram_free(void *nvram)
+{
+	kfree(nvram);
+}
+
+struct brcmf_fw {
+	struct device *dev;
+	u16 flags;
+	const struct firmware *code;
+	const char *nvram_name;
+	void (*done)(struct device *dev, const struct firmware *fw,
+		     void *nvram_image, u32 nvram_len);
+};
+
+static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
+{
+	struct brcmf_fw *fwctx = ctx;
+	u32 nvram_length = 0;
+	void *nvram = NULL;
+
+	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
+	if (!fw && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
+		goto fail;
+
+	if (fw) {
+		nvram = brcmf_fw_nvram_strip(fw, &nvram_length);
+		release_firmware(fw);
+		if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
+			goto fail;
+	}
+
+	fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length);
+	kfree(fwctx);
+	return;
+
+fail:
+	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
+	if (fwctx->code)
+		release_firmware(fwctx->code);
+	device_release_driver(fwctx->dev);
+	kfree(fwctx);
+}
+
+static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
+{
+	struct brcmf_fw *fwctx = ctx;
+	int ret;
+
+	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
+	if (!fw)
+		goto fail;
+
+	/* only requested code so done here */
+	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) {
+		fwctx->done(fwctx->dev, fw, NULL, 0);
+		kfree(fwctx);
+		return;
+	}
+	fwctx->code = fw;
+	ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
+				      fwctx->dev, GFP_KERNEL, fwctx,
+				      brcmf_fw_request_nvram_done);
+
+	if (!ret)
+		return;
+
+	/* when nvram is optional call .done() callback here */
+	if (fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL) {
+		fwctx->done(fwctx->dev, fw, NULL, 0);
+		kfree(fwctx);
+		return;
+	}
+
+	/* failed nvram request */
+	release_firmware(fw);
+fail:
+	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
+	device_release_driver(fwctx->dev);
+	kfree(fwctx);
+}
+
+int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
+			   const char *code, const char *nvram,
+			   void (*fw_cb)(struct device *dev,
+					 const struct firmware *fw,
+					 void *nvram_image, u32 nvram_len))
+{
+	struct brcmf_fw *fwctx;
+
+	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
+	if (!fw_cb || !code)
+		return -EINVAL;
+
+	if ((flags & BRCMF_FW_REQUEST_NVRAM) && !nvram)
+		return -EINVAL;
+
+	fwctx = kzalloc(sizeof(*fwctx), GFP_KERNEL);
+	if (!fwctx)
+		return -ENOMEM;
+
+	fwctx->dev = dev;
+	fwctx->flags = flags;
+	fwctx->done = fw_cb;
+	if (flags & BRCMF_FW_REQUEST_NVRAM)
+		fwctx->nvram_name = nvram;
+
+	return request_firmware_nowait(THIS_MODULE, true, code, dev,
+				       GFP_KERNEL, fwctx,
+				       brcmf_fw_request_code_done);
+}

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/brcm80211/brcmfmac/firmware.h
new file mode 100644
index 0000000..6431bfd
--- /dev/null
+++ b/drivers/net/wireless/brcm80211/brcmfmac/firmware.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef BRCMFMAC_FIRMWARE_H
+#define BRCMFMAC_FIRMWARE_H
+
+#define BRCMF_FW_REQUEST		0x000F
+#define  BRCMF_FW_REQUEST_NVRAM		0x0001
+#define BRCMF_FW_REQ_FLAGS		0x00F0
+#define  BRCMF_FW_REQ_NV_OPTIONAL	0x0010
+
+void brcmf_fw_nvram_free(void *nvram);
+/*
+ * Request firmware(s) asynchronously. When the asynchronous request
+ * fails it will not use the callback, but call device_release_driver()
+ * instead which will call the driver .remove() callback.
+ */
+int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
+			   const char *code, const char *nvram,
+			   void (*fw_cb)(struct device *dev,
+					 const struct firmware *fw,
+					 void *nvram_image, u32 nvram_len));
+
+#endif /* BRCMFMAC_FIRMWARE_H */

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/brcm80211/brcmfmac/fwil_types.h
index 614e488..2bc68a2 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/fwil_types.h
+++ b/drivers/net/wireless/brcm80211/brcmfmac/fwil_types.h

@@ -53,6 +53,14 @@
 #define BRCMF_OBSS_COEX_OFF		0
 #define BRCMF_OBSS_COEX_ON		1
 
+/* join preference types for join_pref iovar */
+enum brcmf_join_pref_types {
+	BRCMF_JOIN_PREF_RSSI = 1,
+	BRCMF_JOIN_PREF_WPA,
+	BRCMF_JOIN_PREF_BAND,
+	BRCMF_JOIN_PREF_RSSI_DELTA,
+};
+
 enum brcmf_fil_p2p_if_types {
 	BRCMF_FIL_P2P_IF_CLIENT,
 	BRCMF_FIL_P2P_IF_GO,
@@ -282,6 +290,22 @@
 	__le16 chanspec_list[1];
 };
 
+/**
+ * struct join_pref params - parameters for preferred join selection.
+ *
+ * @type: preference type (see enum brcmf_join_pref_types).
+ * @len: length of bytes following (currently always 2).
+ * @rssi_gain: signal gain for selection (only when @type is RSSI_DELTA).
+ * @band: band to which selection preference applies.
+ *	This is used if @type is BAND or RSSI_DELTA.
+ */
+struct brcmf_join_pref_params {
+	u8 type;
+	u8 len;
+	u8 rssi_gain;
+	u8 band;
+};
+
 /* used for join with or without a specific bssid and channel list */
 struct brcmf_join_params {
 	struct brcmf_ssid_le ssid_le;

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c
index c3e7d76..699908d 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c

@@ -476,6 +476,7 @@
 	bool bus_flow_blocked;
 	bool creditmap_received;
 	u8 mode;
+	bool avoid_queueing;
 };
 
 /*
@@ -1369,13 +1370,12 @@
 }
 
 static int brcmf_fws_txstatus_suppressed(struct brcmf_fws_info *fws, int fifo,
-					 struct sk_buff *skb, u32 genbit,
-					 u16 seq)
+					 struct sk_buff *skb, u8 ifidx,
+					 u32 genbit, u16 seq)
 {
 	struct brcmf_fws_mac_descriptor *entry = brcmf_skbcb(skb)->mac;
 	u32 hslot;
 	int ret;
-	u8 ifidx;
 
 	hslot = brcmf_skb_htod_tag_get_field(skb, HSLOT);
 
@@ -1389,29 +1389,21 @@
 
 	entry->generation = genbit;
 
-	ret = brcmf_proto_hdrpull(fws->drvr, false, &ifidx, skb);
-	if (ret == 0) {
-		brcmf_skb_htod_tag_set_field(skb, GENERATION, genbit);
-		brcmf_skbcb(skb)->htod_seq = seq;
-		if (brcmf_skb_htod_seq_get_field(skb, FROMFW)) {
-			brcmf_skb_htod_seq_set_field(skb, FROMDRV, 1);
-			brcmf_skb_htod_seq_set_field(skb, FROMFW, 0);
-		} else {
-			brcmf_skb_htod_seq_set_field(skb, FROMDRV, 0);
-		}
-		ret = brcmf_fws_enq(fws, BRCMF_FWS_SKBSTATE_SUPPRESSED, fifo,
-				    skb);
+	brcmf_skb_htod_tag_set_field(skb, GENERATION, genbit);
+	brcmf_skbcb(skb)->htod_seq = seq;
+	if (brcmf_skb_htod_seq_get_field(skb, FROMFW)) {
+		brcmf_skb_htod_seq_set_field(skb, FROMDRV, 1);
+		brcmf_skb_htod_seq_set_field(skb, FROMFW, 0);
+	} else {
+		brcmf_skb_htod_seq_set_field(skb, FROMDRV, 0);
 	}
+	ret = brcmf_fws_enq(fws, BRCMF_FWS_SKBSTATE_SUPPRESSED, fifo, skb);
 
 	if (ret != 0) {
-		/* suppress q is full or hdrpull failed, drop this packet */
-		brcmf_fws_hanger_poppkt(&fws->hanger, hslot, &skb,
-					true);
+		/* suppress q is full drop this packet */
+		brcmf_fws_hanger_poppkt(&fws->hanger, hslot, &skb, true);
 	} else {
-		/*
-		 * Mark suppressed to avoid a double free during
-		 * wlfc cleanup
-		 */
+		/* Mark suppressed to avoid a double free during wlfc cleanup */
 		brcmf_fws_hanger_mark_suppressed(&fws->hanger, hslot);
 	}
 
@@ -1428,6 +1420,7 @@
 	struct sk_buff *skb;
 	struct brcmf_skbuff_cb *skcb;
 	struct brcmf_fws_mac_descriptor *entry = NULL;
+	u8 ifidx;
 
 	brcmf_dbg(DATA, "flags %d\n", flags);
 
@@ -1476,12 +1469,15 @@
 	}
 	brcmf_fws_macdesc_return_req_credit(skb);
 
+	if (brcmf_proto_hdrpull(fws->drvr, false, &ifidx, skb)) {
+		brcmu_pkt_buf_free_skb(skb);
+		return -EINVAL;
+	}
 	if (!remove_from_hanger)
-		ret = brcmf_fws_txstatus_suppressed(fws, fifo, skb, genbit,
-						    seq);
-
+		ret = brcmf_fws_txstatus_suppressed(fws, fifo, skb, ifidx,
+						    genbit, seq);
 	if (remove_from_hanger || ret)
-		brcmf_txfinalize(fws->drvr, skb, true);
+		brcmf_txfinalize(fws->drvr, skb, ifidx, true);
 
 	return 0;
 }
@@ -1868,7 +1864,7 @@
 	struct ethhdr *eh = (struct ethhdr *)(skb->data);
 	int fifo = BRCMF_FWS_FIFO_BCMC;
 	bool multicast = is_multicast_ether_addr(eh->h_dest);
-	bool pae = eh->h_proto == htons(ETH_P_PAE);
+	int rc = 0;
 
 	brcmf_dbg(DATA, "tx proto=0x%X\n", ntohs(eh->h_proto));
 	/* determine the priority */
@@ -1876,8 +1872,13 @@
 		skb->priority = cfg80211_classify8021d(skb, NULL);
 
 	drvr->tx_multicast += !!multicast;
-	if (pae)
-		atomic_inc(&ifp->pend_8021x_cnt);
+
+	if (fws->avoid_queueing) {
+		rc = brcmf_proto_txdata(drvr, ifp->ifidx, 0, skb);
+		if (rc < 0)
+			brcmf_txfinalize(drvr, skb, ifp->ifidx, false);
+		return rc;
+	}
 
 	/* set control buffer information */
 	skcb->if_flags = 0;
@@ -1899,15 +1900,12 @@
 		brcmf_fws_schedule_deq(fws);
 	} else {
 		brcmf_err("drop skb: no hanger slot\n");
-		if (pae) {
-			atomic_dec(&ifp->pend_8021x_cnt);
-			if (waitqueue_active(&ifp->pend_8021x_wait))
-				wake_up(&ifp->pend_8021x_wait);
-		}
-		brcmu_pkt_buf_free_skb(skb);
+		brcmf_txfinalize(drvr, skb, ifp->ifidx, false);
+		rc = -ENOMEM;
 	}
 	brcmf_fws_unlock(fws);
-	return 0;
+
+	return rc;
 }
 
 void brcmf_fws_reset_interface(struct brcmf_if *ifp)
@@ -1982,7 +1980,8 @@
 				ret = brcmf_proto_txdata(drvr, ifidx, 0, skb);
 				brcmf_fws_lock(fws);
 				if (ret < 0)
-					brcmf_txfinalize(drvr, skb, false);
+					brcmf_txfinalize(drvr, skb, ifidx,
+							 false);
 				if (fws->bus_flow_blocked)
 					break;
 			}
@@ -2039,6 +2038,13 @@
 	fws->drvr = drvr;
 	fws->fcmode = fcmode;
 
+	if ((drvr->bus_if->always_use_fws_queue == false) &&
+	    (fcmode == BRCMF_FWS_FCMODE_NONE)) {
+		fws->avoid_queueing = true;
+		brcmf_dbg(INFO, "FWS queueing will be avoided\n");
+		return 0;
+	}
+
 	fws->fws_wq = create_singlethread_workqueue("brcmf_fws_wq");
 	if (fws->fws_wq == NULL) {
 		brcmf_err("workqueue creation failed\n");

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/nvram.c b/drivers/net/wireless/brcm80211/brcmfmac/nvram.c
deleted file mode 100644
index d5ef86d..0000000
--- a/drivers/net/wireless/brcm80211/brcmfmac/nvram.c
+++ /dev/null

@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2013 Broadcom Corporation
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/firmware.h>
-
-#include "nvram.h"
-
-/* brcmf_nvram_strip :Takes a buffer of "<var>=<value>\n" lines read from a file
- * and ending in a NUL. Removes carriage returns, empty lines, comment lines,
- * and converts newlines to NULs. Shortens buffer as needed and pads with NULs.
- * End of buffer is completed with token identifying length of buffer.
- */
-void *brcmf_nvram_strip(const struct firmware *nv, u32 *new_length)
-{
-	u8 *nvram;
-	u32 i;
-	u32 len;
-	u32 column;
-	u8 val;
-	bool comment;
-	u32 token;
-	__le32 token_le;
-
-	/* Alloc for extra 0 byte + roundup by 4 + length field */
-	nvram = kmalloc(nv->size + 1 + 3 + sizeof(token_le), GFP_KERNEL);
-	if (!nvram)
-		return NULL;
-
-	len = 0;
-	column = 0;
-	comment = false;
-	for (i = 0; i < nv->size; i++) {
-		val = nv->data[i];
-		if (val == 0)
-			break;
-		if (val == '\r')
-			continue;
-		if (comment && (val != '\n'))
-			continue;
-		comment = false;
-		if (val == '#') {
-			comment = true;
-			continue;
-		}
-		if (val == '\n') {
-			if (column == 0)
-				continue;
-			nvram[len] = 0;
-			len++;
-			column = 0;
-			continue;
-		}
-		nvram[len] = val;
-		len++;
-		column++;
-	}
-	column = len;
-	*new_length = roundup(len + 1, 4);
-	while (column != *new_length) {
-		nvram[column] = 0;
-		column++;
-	}
-
-	token = *new_length / 4;
-	token = (~token << 16) | (token & 0x0000FFFF);
-	token_le = cpu_to_le32(token);
-
-	memcpy(&nvram[*new_length], &token_le, sizeof(token_le));
-	*new_length += sizeof(token_le);
-
-	return nvram;
-}
-
-void brcmf_nvram_free(void *nvram)
-{
-	kfree(nvram);
-}
-
-

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/nvram.h b/drivers/net/wireless/brcm80211/brcmfmac/nvram.h
deleted file mode 100644
index d454580..0000000
--- a/drivers/net/wireless/brcm80211/brcmfmac/nvram.h
+++ /dev/null

@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2013 Broadcom Corporation
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef BRCMFMAC_NVRAM_H
-#define BRCMFMAC_NVRAM_H
-
-
-void *brcmf_nvram_strip(const struct firmware *nv, u32 *new_length);
-void brcmf_nvram_free(void *nvram);
-
-
-#endif /* BRCMFMAC_NVRAM_H */

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c
index 24f65cd..6db51a6 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c

@@ -25,6 +25,7 @@
 #include <dhd_bus.h>
 #include <dhd_dbg.h>
 
+#include "firmware.h"
 #include "usb_rdl.h"
 #include "usb.h"
 
@@ -61,12 +62,6 @@
 	u8 *image;
 	int image_len;
 };
-static struct list_head fw_image_list;
-
-struct intr_transfer_buf {
-	u32 notification;
-	u32 reserved;
-};
 
 struct brcmf_usbdev_info {
 	struct brcmf_usbdev bus_pub; /* MUST BE FIRST */
@@ -75,7 +70,7 @@
 	struct list_head rx_postq;
 	struct list_head tx_freeq;
 	struct list_head tx_postq;
-	uint rx_pipe, tx_pipe, intr_pipe, rx_pipe2;
+	uint rx_pipe, tx_pipe, rx_pipe2;
 
 	int rx_low_watermark;
 	int tx_low_watermark;
@@ -87,7 +82,7 @@
 	struct brcmf_usbreq *tx_reqs;
 	struct brcmf_usbreq *rx_reqs;
 
-	u8 *image;	/* buffer for combine fw and nvram */
+	const u8 *image;	/* buffer for combine fw and nvram */
 	int image_len;
 
 	struct usb_device *usbdev;
@@ -104,10 +99,6 @@
 	ulong ctl_op;
 
 	struct urb *bulk_urb; /* used for FW download */
-	struct urb *intr_urb; /* URB for interrupt endpoint */
-	int intr_size;          /* Size of interrupt message */
-	int interval;           /* Interrupt polling interval */
-	struct intr_transfer_buf intr; /* Data buffer for interrupt endpoint */
 };
 
 static void brcmf_usb_rx_refill(struct brcmf_usbdev_info *devinfo,
@@ -531,39 +522,6 @@
 	}
 }
 
-static void
-brcmf_usb_intr_complete(struct urb *urb)
-{
-	struct brcmf_usbdev_info *devinfo =
-			(struct brcmf_usbdev_info *)urb->context;
-	int err;
-
-	brcmf_dbg(USB, "Enter, urb->status=%d\n", urb->status);
-
-	if (devinfo == NULL)
-		return;
-
-	if (unlikely(urb->status)) {
-		if (urb->status == -ENOENT ||
-		    urb->status == -ESHUTDOWN ||
-		    urb->status == -ENODEV) {
-			brcmf_usb_state_change(devinfo,
-					       BRCMFMAC_USB_STATE_DOWN);
-		}
-	}
-
-	if (devinfo->bus_pub.state == BRCMFMAC_USB_STATE_DOWN) {
-		brcmf_err("intr cb when DBUS down, ignoring\n");
-		return;
-	}
-
-	if (devinfo->bus_pub.state == BRCMFMAC_USB_STATE_UP) {
-		err = usb_submit_urb(devinfo->intr_urb, GFP_ATOMIC);
-		if (err)
-			brcmf_err("usb_submit_urb, err=%d\n", err);
-	}
-}
-
 static int brcmf_usb_tx(struct device *dev, struct sk_buff *skb)
 {
 	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(dev);
@@ -619,7 +577,6 @@
 {
 	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(dev);
 	u16 ifnum;
-	int ret;
 
 	brcmf_dbg(USB, "Enter\n");
 	if (devinfo->bus_pub.state == BRCMFMAC_USB_STATE_UP)
@@ -628,23 +585,6 @@
 	/* Success, indicate devinfo is fully up */
 	brcmf_usb_state_change(devinfo, BRCMFMAC_USB_STATE_UP);
 
-	if (devinfo->intr_urb) {
-		usb_fill_int_urb(devinfo->intr_urb, devinfo->usbdev,
-			devinfo->intr_pipe,
-			&devinfo->intr,
-			devinfo->intr_size,
-			(usb_complete_t)brcmf_usb_intr_complete,
-			devinfo,
-			devinfo->interval);
-
-		ret = usb_submit_urb(devinfo->intr_urb, GFP_ATOMIC);
-		if (ret) {
-			brcmf_err("USB_SUBMIT_URB failed with status %d\n",
-				  ret);
-			return -EINVAL;
-		}
-	}
-
 	if (devinfo->ctl_urb) {
 		devinfo->ctl_in_pipe = usb_rcvctrlpipe(devinfo->usbdev, 0);
 		devinfo->ctl_out_pipe = usb_sndctrlpipe(devinfo->usbdev, 0);
@@ -681,8 +621,6 @@
 		return;
 
 	brcmf_usb_state_change(devinfo, BRCMFMAC_USB_STATE_DOWN);
-	if (devinfo->intr_urb)
-		usb_kill_urb(devinfo->intr_urb);
 
 	if (devinfo->ctl_urb)
 		usb_kill_urb(devinfo->ctl_urb);
@@ -1021,7 +959,7 @@
 	}
 
 	err = brcmf_usb_dlstart(devinfo,
-		devinfo->image, devinfo->image_len);
+		(u8 *)devinfo->image, devinfo->image_len);
 	if (err == 0)
 		err = brcmf_usb_dlrun(devinfo);
 	return err;
@@ -1036,7 +974,6 @@
 	brcmf_usb_free_q(&devinfo->rx_freeq, false);
 	brcmf_usb_free_q(&devinfo->tx_freeq, false);
 
-	usb_free_urb(devinfo->intr_urb);
 	usb_free_urb(devinfo->ctl_urb);
 	usb_free_urb(devinfo->bulk_urb);
 
@@ -1080,68 +1017,20 @@
 	return -1;
 }
 
-static int brcmf_usb_get_fw(struct brcmf_usbdev_info *devinfo)
+static const char *brcmf_usb_get_fwname(struct brcmf_usbdev_info *devinfo)
 {
-	s8 *fwname;
-	const struct firmware *fw;
-	struct brcmf_usb_image *fw_image;
-	int err;
-
-	brcmf_dbg(USB, "Enter\n");
 	switch (devinfo->bus_pub.devid) {
 	case 43143:
-		fwname = BRCMF_USB_43143_FW_NAME;
-		break;
+		return BRCMF_USB_43143_FW_NAME;
 	case 43235:
 	case 43236:
 	case 43238:
-		fwname = BRCMF_USB_43236_FW_NAME;
-		break;
+		return BRCMF_USB_43236_FW_NAME;
 	case 43242:
-		fwname = BRCMF_USB_43242_FW_NAME;
-		break;
+		return BRCMF_USB_43242_FW_NAME;
 	default:
-		return -EINVAL;
-		break;
+		return NULL;
 	}
-	brcmf_dbg(USB, "Loading FW %s\n", fwname);
-	list_for_each_entry(fw_image, &fw_image_list, list) {
-		if (fw_image->fwname == fwname) {
-			devinfo->image = fw_image->image;
-			devinfo->image_len = fw_image->image_len;
-			return 0;
-		}
-	}
-	/* fw image not yet loaded. Load it now and add to list */
-	err = request_firmware(&fw, fwname, devinfo->dev);
-	if (!fw) {
-		brcmf_err("fail to request firmware %s\n", fwname);
-		return err;
-	}
-	if (check_file(fw->data) < 0) {
-		brcmf_err("invalid firmware %s\n", fwname);
-		return -EINVAL;
-	}
-
-	fw_image = kzalloc(sizeof(*fw_image), GFP_ATOMIC);
-	if (!fw_image)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&fw_image->list);
-	list_add_tail(&fw_image->list, &fw_image_list);
-	fw_image->fwname = fwname;
-	fw_image->image = vmalloc(fw->size);
-	if (!fw_image->image)
-		return -ENOMEM;
-
-	memcpy(fw_image->image, fw->data, fw->size);
-	fw_image->image_len = fw->size;
-
-	release_firmware(fw);
-
-	devinfo->image = fw_image->image;
-	devinfo->image_len = fw_image->image_len;
-
-	return 0;
 }
 
 
@@ -1186,11 +1075,6 @@
 		goto error;
 	devinfo->tx_freecount = ntxq;
 
-	devinfo->intr_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!devinfo->intr_urb) {
-		brcmf_err("usb_alloc_urb (intr) failed\n");
-		goto error;
-	}
 	devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!devinfo->ctl_urb) {
 		brcmf_err("usb_alloc_urb (ctl) failed\n");
@@ -1202,16 +1086,6 @@
 		goto error;
 	}
 
-	if (!brcmf_usb_dlneeded(devinfo))
-		return &devinfo->bus_pub;
-
-	brcmf_dbg(USB, "Start fw downloading\n");
-	if (brcmf_usb_get_fw(devinfo))
-		goto error;
-
-	if (brcmf_usb_fw_download(devinfo))
-		goto error;
-
 	return &devinfo->bus_pub;
 
 error:
@@ -1222,18 +1096,77 @@
 
 static struct brcmf_bus_ops brcmf_usb_bus_ops = {
 	.txdata = brcmf_usb_tx,
-	.init = brcmf_usb_up,
 	.stop = brcmf_usb_down,
 	.txctl = brcmf_usb_tx_ctlpkt,
 	.rxctl = brcmf_usb_rx_ctlpkt,
 };
 
+static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
+{
+	int ret;
+
+	/* Attach to the common driver interface */
+	ret = brcmf_attach(devinfo->dev);
+	if (ret) {
+		brcmf_err("brcmf_attach failed\n");
+		return ret;
+	}
+
+	ret = brcmf_usb_up(devinfo->dev);
+	if (ret)
+		goto fail;
+
+	ret = brcmf_bus_start(devinfo->dev);
+	if (ret)
+		goto fail;
+
+	return 0;
+fail:
+	brcmf_detach(devinfo->dev);
+	return ret;
+}
+
+static void brcmf_usb_probe_phase2(struct device *dev,
+				   const struct firmware *fw,
+				   void *nvram, u32 nvlen)
+{
+	struct brcmf_bus *bus = dev_get_drvdata(dev);
+	struct brcmf_usbdev_info *devinfo;
+	int ret;
+
+	brcmf_dbg(USB, "Start fw downloading\n");
+	ret = check_file(fw->data);
+	if (ret < 0) {
+		brcmf_err("invalid firmware\n");
+		release_firmware(fw);
+		goto error;
+	}
+
+	devinfo = bus->bus_priv.usb->devinfo;
+	devinfo->image = fw->data;
+	devinfo->image_len = fw->size;
+
+	ret = brcmf_usb_fw_download(devinfo);
+	release_firmware(fw);
+	if (ret)
+		goto error;
+
+	ret = brcmf_usb_bus_setup(devinfo);
+	if (ret)
+		goto error;
+
+	return;
+error:
+	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), ret);
+	device_release_driver(dev);
+}
+
 static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 {
 	struct brcmf_bus *bus = NULL;
 	struct brcmf_usbdev *bus_pub = NULL;
-	int ret;
 	struct device *dev = devinfo->dev;
+	int ret;
 
 	brcmf_dbg(USB, "Enter\n");
 	bus_pub = brcmf_usb_attach(devinfo, BRCMF_USB_NRXQ, BRCMF_USB_NTXQ);
@@ -1254,22 +1187,18 @@
 	bus->chip = bus_pub->devid;
 	bus->chiprev = bus_pub->chiprev;
 	bus->proto_type = BRCMF_PROTO_BCDC;
+	bus->always_use_fws_queue = true;
 
-	/* Attach to the common driver interface */
-	ret = brcmf_attach(dev);
-	if (ret) {
-		brcmf_err("brcmf_attach failed\n");
-		goto fail;
+	if (!brcmf_usb_dlneeded(devinfo)) {
+		ret = brcmf_usb_bus_setup(devinfo);
+		if (ret)
+			goto fail;
 	}
-
-	ret = brcmf_bus_start(dev);
-	if (ret) {
-		brcmf_err("dongle is not responding\n");
-		brcmf_detach(dev);
-		goto fail;
-	}
-
+	/* request firmware here */
+	brcmf_fw_get_firmwares(dev, 0, brcmf_usb_get_fwname(devinfo), NULL,
+			       brcmf_usb_probe_phase2);
 	return 0;
+
 fail:
 	/* Release resources in reverse order */
 	kfree(bus);
@@ -1357,9 +1286,6 @@
 		goto fail;
 	}
 
-	endpoint_num = endpoint->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
-	devinfo->intr_pipe = usb_rcvintpipe(usb, endpoint_num);
-
 	devinfo->rx_pipe = 0;
 	devinfo->rx_pipe2 = 0;
 	devinfo->tx_pipe = 0;
@@ -1391,16 +1317,9 @@
 		}
 	}
 
-	/* Allocate interrupt URB and data buffer */
-	/* RNDIS says 8-byte intr, our old drivers used 4-byte */
-	if (IFEPDESC(usb, CONTROL_IF, 0).wMaxPacketSize == cpu_to_le16(16))
-		devinfo->intr_size = 8;
-	else
-		devinfo->intr_size = 4;
-
-	devinfo->interval = IFEPDESC(usb, CONTROL_IF, 0).bInterval;
-
-	if (usb->speed == USB_SPEED_HIGH)
+	if (usb->speed == USB_SPEED_SUPER)
+		brcmf_dbg(USB, "Broadcom super speed USB wireless device detected\n");
+	else if (usb->speed == USB_SPEED_HIGH)
 		brcmf_dbg(USB, "Broadcom high speed USB wireless device detected\n");
 	else
 		brcmf_dbg(USB, "Broadcom full speed USB wireless device detected\n");
@@ -1455,23 +1374,18 @@
 	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(&usb->dev);
 
 	brcmf_dbg(USB, "Enter\n");
-	if (!brcmf_attach(devinfo->dev))
-		return brcmf_bus_start(&usb->dev);
-
-	return 0;
+	return brcmf_usb_bus_setup(devinfo);
 }
 
 static int brcmf_usb_reset_resume(struct usb_interface *intf)
 {
 	struct usb_device *usb = interface_to_usbdev(intf);
 	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(&usb->dev);
-
 	brcmf_dbg(USB, "Enter\n");
 
-	if (!brcmf_usb_fw_download(devinfo))
-		return brcmf_usb_resume(intf);
-
-	return -EIO;
+	return brcmf_fw_get_firmwares(&usb->dev, 0,
+				      brcmf_usb_get_fwname(devinfo), NULL,
+				      brcmf_usb_probe_phase2);
 }
 
 #define BRCMF_USB_VENDOR_ID_BROADCOM	0x0a5c
@@ -1506,16 +1420,6 @@
 	.disable_hub_initiated_lpm = 1,
 };
 
-static void brcmf_release_fw(struct list_head *q)
-{
-	struct brcmf_usb_image *fw_image, *next;
-
-	list_for_each_entry_safe(fw_image, next, q, list) {
-		vfree(fw_image->image);
-		list_del_init(&fw_image->list);
-	}
-}
-
 static int brcmf_usb_reset_device(struct device *dev, void *notused)
 {
 	/* device past is the usb interface so we
@@ -1534,12 +1438,10 @@
 	ret = driver_for_each_device(drv, NULL, NULL,
 				     brcmf_usb_reset_device);
 	usb_deregister(&brcmf_usbdrvr);
-	brcmf_release_fw(&fw_image_list);
 }
 
 void brcmf_usb_register(void)
 {
 	brcmf_dbg(USB, "Enter\n");
-	INIT_LIST_HEAD(&fw_image_list);
 	usb_register(&brcmf_usbdrvr);
 }

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
index be198529..d8fa276 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c

@@ -221,9 +221,9 @@
 		 */
 		REG_RULE(2484-10, 2484+10, 20, 6, 20, 0),
 		/* IEEE 802.11a, channel 36..64 */
-		REG_RULE(5150-10, 5350+10, 40, 6, 20, 0),
+		REG_RULE(5150-10, 5350+10, 80, 6, 20, 0),
 		/* IEEE 802.11a, channel 100..165 */
-		REG_RULE(5470-10, 5850+10, 40, 6, 20, 0), }
+		REG_RULE(5470-10, 5850+10, 80, 6, 20, 0), }
 };
 
 static const u32 __wl_cipher_suites[] = {
@@ -341,6 +341,60 @@
 	return qdbm;
 }
 
+static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf,
+			       struct cfg80211_chan_def *ch)
+{
+	struct brcmu_chan ch_inf;
+	s32 primary_offset;
+
+	brcmf_dbg(TRACE, "chandef: control %d center %d width %d\n",
+		  ch->chan->center_freq, ch->center_freq1, ch->width);
+	ch_inf.chnum = ieee80211_frequency_to_channel(ch->center_freq1);
+	primary_offset = ch->center_freq1 - ch->chan->center_freq;
+	switch (ch->width) {
+	case NL80211_CHAN_WIDTH_20:
+		ch_inf.bw = BRCMU_CHAN_BW_20;
+		WARN_ON(primary_offset != 0);
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		ch_inf.bw = BRCMU_CHAN_BW_40;
+		if (primary_offset < 0)
+			ch_inf.sb = BRCMU_CHAN_SB_U;
+		else
+			ch_inf.sb = BRCMU_CHAN_SB_L;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+		ch_inf.bw = BRCMU_CHAN_BW_80;
+		if (primary_offset < 0) {
+			if (primary_offset < -CH_10MHZ_APART)
+				ch_inf.sb = BRCMU_CHAN_SB_UU;
+			else
+				ch_inf.sb = BRCMU_CHAN_SB_UL;
+		} else {
+			if (primary_offset > CH_10MHZ_APART)
+				ch_inf.sb = BRCMU_CHAN_SB_LL;
+			else
+				ch_inf.sb = BRCMU_CHAN_SB_LU;
+		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+	switch (ch->chan->band) {
+	case IEEE80211_BAND_2GHZ:
+		ch_inf.band = BRCMU_CHAN_BAND_2G;
+		break;
+	case IEEE80211_BAND_5GHZ:
+		ch_inf.band = BRCMU_CHAN_BAND_5G;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+	d11inf->encchspec(&ch_inf);
+
+	return ch_inf.chspec;
+}
+
 u16 channel_to_chanspec(struct brcmu_d11inf *d11inf,
 			struct ieee80211_channel *ch)
 {
@@ -586,6 +640,9 @@
 		if (err)
 			brcmf_err("Scan abort  failed\n");
 	}
+
+	brcmf_set_mpc(ifp, 1);
+
 	/*
 	 * e-scan can be initiated by scheduled scan
 	 * which takes precedence.
@@ -595,12 +652,10 @@
 		cfg->sched_escan = false;
 		if (!aborted)
 			cfg80211_sched_scan_results(cfg_to_wiphy(cfg));
-		brcmf_set_mpc(ifp, 1);
 	} else if (scan_request) {
 		brcmf_dbg(SCAN, "ESCAN Completed scan: %s\n",
 			  aborted ? "Aborted" : "Done");
 		cfg80211_scan_done(scan_request, aborted);
-		brcmf_set_mpc(ifp, 1);
 	}
 	if (!test_and_clear_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status))
 		brcmf_dbg(SCAN, "Scan complete, probably P2P scan\n");
@@ -1236,8 +1291,8 @@
 				params->chandef.chan->center_freq);
 		if (params->channel_fixed) {
 			/* adding chanspec */
-			chanspec = channel_to_chanspec(&cfg->d11inf,
-						       params->chandef.chan);
+			chanspec = chandef_to_chanspec(&cfg->d11inf,
+						       &params->chandef);
 			join_params.params_le.chanspec_list[0] =
 				cpu_to_le16(chanspec);
 			join_params.params_le.chanspec_num = cpu_to_le32(1);
@@ -2182,7 +2237,7 @@
 
 static s32
 brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
-			   u8 *mac, struct station_info *sinfo)
+			   const u8 *mac, struct station_info *sinfo)
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	struct brcmf_cfg80211_profile *profile = &ifp->vif->profile;
@@ -3124,7 +3179,7 @@
 	}
 
 	if (!request->n_ssids || !request->n_match_sets) {
-		brcmf_err("Invalid sched scan req!! n_ssids:%d\n",
+		brcmf_dbg(SCAN, "Invalid sched scan req!! n_ssids:%d\n",
 			  request->n_ssids);
 		return -EINVAL;
 	}
@@ -3734,23 +3789,6 @@
 }
 
 static s32
-brcmf_cfg80211_set_channel(struct brcmf_cfg80211_info *cfg,
-			   struct brcmf_if *ifp,
-			   struct ieee80211_channel *channel)
-{
-	u16 chanspec;
-	s32 err;
-
-	brcmf_dbg(TRACE, "band=%d, center_freq=%d\n", channel->band,
-		  channel->center_freq);
-
-	chanspec = channel_to_chanspec(&cfg->d11inf, channel);
-	err = brcmf_fil_iovar_int_set(ifp, "chanspec", chanspec);
-
-	return err;
-}
-
-static s32
 brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 			struct cfg80211_ap_settings *settings)
 {
@@ -3765,11 +3803,12 @@
 	struct brcmf_join_params join_params;
 	enum nl80211_iftype dev_role;
 	struct brcmf_fil_bss_enable_le bss_enable;
+	u16 chanspec;
 
-	brcmf_dbg(TRACE, "channel_type=%d, beacon_interval=%d, dtim_period=%d,\n",
-		  cfg80211_get_chandef_type(&settings->chandef),
-		  settings->beacon_interval,
-		  settings->dtim_period);
+	brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n",
+		  settings->chandef.chan->hw_value,
+		  settings->chandef.center_freq1, settings->chandef.width,
+		  settings->beacon_interval, settings->dtim_period);
 	brcmf_dbg(TRACE, "ssid=%s(%zu), auth_type=%d, inactivity_timeout=%d\n",
 		  settings->ssid, settings->ssid_len, settings->auth_type,
 		  settings->inactivity_timeout);
@@ -3826,9 +3865,10 @@
 
 	brcmf_config_ap_mgmt_ie(ifp->vif, &settings->beacon);
 
-	err = brcmf_cfg80211_set_channel(cfg, ifp, settings->chandef.chan);
+	chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef);
+	err = brcmf_fil_iovar_int_set(ifp, "chanspec", chanspec);
 	if (err < 0) {
-		brcmf_err("Set Channel failed, %d\n", err);
+		brcmf_err("Set Channel failed: chspec=%d, %d\n", chanspec, err);
 		goto exit;
 	}
 
@@ -3975,7 +4015,7 @@
 
 static int
 brcmf_cfg80211_del_station(struct wiphy *wiphy, struct net_device *ndev,
-			   u8 *mac)
+			   const u8 *mac)
 {
 	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct brcmf_scb_val_le scbval;
@@ -4203,7 +4243,7 @@
 }
 
 static int brcmf_cfg80211_tdls_oper(struct wiphy *wiphy,
-				    struct net_device *ndev, u8 *peer,
+				    struct net_device *ndev, const u8 *peer,
 				    enum nl80211_tdls_operation oper)
 {
 	struct brcmf_if *ifp;
@@ -4364,6 +4404,8 @@
 			WIPHY_FLAG_OFFCHAN_TX |
 			WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL |
 			WIPHY_FLAG_SUPPORTS_TDLS;
+	if (!brcmf_roamoff)
+		wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM;
 	wiphy->mgmt_stypes = brcmf_txrx_stypes;
 	wiphy->max_remain_on_channel_duration = 5000;
 	brcmf_wiphy_pno_params(wiphy);
@@ -4685,7 +4727,6 @@
 	struct brcmf_cfg80211_profile *profile = &ifp->vif->profile;
 	struct ieee80211_channel *chan;
 	s32 err = 0;
-	u16 reason;
 
 	if (brcmf_is_apmode(ifp->vif)) {
 		err = brcmf_notify_connect_status_ap(cfg, ndev, e, data);
@@ -4706,16 +4747,6 @@
 		brcmf_dbg(CONN, "Linkdown\n");
 		if (!brcmf_is_ibssmode(ifp->vif)) {
 			brcmf_bss_connect_done(cfg, ndev, e, false);
-			if (test_and_clear_bit(BRCMF_VIF_STATUS_CONNECTED,
-					       &ifp->vif->sme_state)) {
-				reason = 0;
-				if (((e->event_code == BRCMF_E_DEAUTH_IND) ||
-				     (e->event_code == BRCMF_E_DISASSOC_IND)) &&
-				    (e->reason != WLAN_REASON_UNSPECIFIED))
-					reason = e->reason;
-				cfg80211_disconnected(ndev, reason, NULL, 0,
-						      GFP_KERNEL);
-			}
 		}
 		brcmf_link_down(ifp->vif);
 		brcmf_init_prof(ndev_to_prof(ndev));
@@ -5215,6 +5246,9 @@
 		if (!(bw_cap[band] & WLC_BW_40MHZ_BIT) &&
 		    ch.bw == BRCMU_CHAN_BW_40)
 			continue;
+		if (!(bw_cap[band] & WLC_BW_80MHZ_BIT) &&
+		    ch.bw == BRCMU_CHAN_BW_80)
+			continue;
 		update = false;
 		for (j = 0; (j < *n_cnt && (*n_cnt < array_size)); j++) {
 			if (band_chan_arr[j].hw_value == ch.chnum) {
@@ -5231,10 +5265,13 @@
 				ieee80211_channel_to_frequency(ch.chnum, band);
 			band_chan_arr[index].hw_value = ch.chnum;
 
-			if (ch.bw == BRCMU_CHAN_BW_40) {
-				/* assuming the order is HT20, HT40 Upper,
-				 * HT40 lower from chanspecs
-				 */
+			/* assuming the chanspecs order is HT20,
+			 * HT40 upper, HT40 lower, and VHT80.
+			 */
+			if (ch.bw == BRCMU_CHAN_BW_80) {
+				band_chan_arr[index].flags &=
+					~IEEE80211_CHAN_NO_80MHZ;
+			} else if (ch.bw == BRCMU_CHAN_BW_40) {
 				ht40_flag = band_chan_arr[index].flags &
 					    IEEE80211_CHAN_NO_HT40;
 				if (ch.sb == BRCMU_CHAN_SB_U) {
@@ -5255,8 +5292,13 @@
 						    IEEE80211_CHAN_NO_HT40MINUS;
 				}
 			} else {
+				/* disable other bandwidths for now as mentioned
+				 * order assure they are enabled for subsequent
+				 * chanspecs.
+				 */
 				band_chan_arr[index].flags =
-							IEEE80211_CHAN_NO_HT40;
+						IEEE80211_CHAN_NO_HT40 |
+						IEEE80211_CHAN_NO_80MHZ;
 				ch.bw = BRCMU_CHAN_BW_20;
 				cfg->d11inf.encchspec(&ch);
 				channel = ch.chspec;
@@ -5323,13 +5365,63 @@
 	}
 }
 
+static void brcmf_update_ht_cap(struct ieee80211_supported_band *band,
+				u32 bw_cap[2], u32 nchain)
+{
+	band->ht_cap.ht_supported = true;
+	if (bw_cap[band->band] & WLC_BW_40MHZ_BIT) {
+		band->ht_cap.cap |= IEEE80211_HT_CAP_SGI_40;
+		band->ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+	}
+	band->ht_cap.cap |= IEEE80211_HT_CAP_SGI_20;
+	band->ht_cap.cap |= IEEE80211_HT_CAP_DSSSCCK40;
+	band->ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
+	band->ht_cap.ampdu_density = IEEE80211_HT_MPDU_DENSITY_16;
+	memset(band->ht_cap.mcs.rx_mask, 0xff, nchain);
+	band->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
+}
+
+static __le16 brcmf_get_mcs_map(u32 nchain, enum ieee80211_vht_mcs_support supp)
+{
+	u16 mcs_map;
+	int i;
+
+	for (i = 0, mcs_map = 0xFFFF; i < nchain; i++)
+		mcs_map = (mcs_map << 2) | supp;
+
+	return cpu_to_le16(mcs_map);
+}
+
+static void brcmf_update_vht_cap(struct ieee80211_supported_band *band,
+				 u32 bw_cap[2], u32 nchain)
+{
+	__le16 mcs_map;
+
+	/* not allowed in 2.4G band */
+	if (band->band == IEEE80211_BAND_2GHZ)
+		return;
+
+	band->vht_cap.vht_supported = true;
+	/* 80MHz is mandatory */
+	band->vht_cap.cap |= IEEE80211_VHT_CAP_SHORT_GI_80;
+	if (bw_cap[band->band] & WLC_BW_160MHZ_BIT) {
+		band->vht_cap.cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
+		band->vht_cap.cap |= IEEE80211_VHT_CAP_SHORT_GI_160;
+	}
+	/* all support 256-QAM */
+	mcs_map = brcmf_get_mcs_map(nchain, IEEE80211_VHT_MCS_SUPPORT_0_9);
+	band->vht_cap.vht_mcs.rx_mcs_map = mcs_map;
+	band->vht_cap.vht_mcs.tx_mcs_map = mcs_map;
+}
+
 static s32 brcmf_update_wiphybands(struct brcmf_cfg80211_info *cfg)
 {
 	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
 	struct wiphy *wiphy;
 	s32 phy_list;
 	u32 band_list[3];
-	u32 nmode;
+	u32 nmode = 0;
+	u32 vhtmode = 0;
 	u32 bw_cap[2] = { 0, 0 };
 	u32 rxchain;
 	u32 nchain;
@@ -5360,14 +5452,16 @@
 	brcmf_dbg(INFO, "BRCMF_C_GET_BANDLIST reported: 0x%08x 0x%08x 0x%08x phy\n",
 		  band_list[0], band_list[1], band_list[2]);
 
+	(void)brcmf_fil_iovar_int_get(ifp, "vhtmode", &vhtmode);
 	err = brcmf_fil_iovar_int_get(ifp, "nmode", &nmode);
 	if (err) {
 		brcmf_err("nmode error (%d)\n", err);
 	} else {
 		brcmf_get_bwcap(ifp, bw_cap);
 	}
-	brcmf_dbg(INFO, "nmode=%d, bw_cap=(%d, %d)\n", nmode,
-		  bw_cap[IEEE80211_BAND_2GHZ], bw_cap[IEEE80211_BAND_5GHZ]);
+	brcmf_dbg(INFO, "nmode=%d, vhtmode=%d, bw_cap=(%d, %d)\n",
+		  nmode, vhtmode, bw_cap[IEEE80211_BAND_2GHZ],
+		  bw_cap[IEEE80211_BAND_5GHZ]);
 
 	err = brcmf_fil_iovar_int_get(ifp, "rxchain", &rxchain);
 	if (err) {
@@ -5398,17 +5492,10 @@
 		else
 			continue;
 
-		if (bw_cap[band->band] & WLC_BW_40MHZ_BIT) {
-			band->ht_cap.cap |= IEEE80211_HT_CAP_SGI_40;
-			band->ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-		}
-		band->ht_cap.cap |= IEEE80211_HT_CAP_SGI_20;
-		band->ht_cap.cap |= IEEE80211_HT_CAP_DSSSCCK40;
-		band->ht_cap.ht_supported = true;
-		band->ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
-		band->ht_cap.ampdu_density = IEEE80211_HT_MPDU_DENSITY_16;
-		memset(band->ht_cap.mcs.rx_mask, 0xff, nchain);
-		band->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
+		if (nmode)
+			brcmf_update_ht_cap(band, bw_cap, nchain);
+		if (vhtmode)
+			brcmf_update_vht_cap(band, bw_cap, nchain);
 		bands[band->band] = band;
 	}
 

diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index 8c5fa4e..43c71bf 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c

@@ -897,7 +897,8 @@
 	return result;
 }
 
-static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void brcms_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    u32 queues, bool drop)
 {
 	struct brcms_info *wl = hw->priv;
 	int ret;

diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index 9417cb5..af8ba64 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c

@@ -4870,14 +4870,11 @@
 /*
  * low level detach
  */
-static int brcms_b_detach(struct brcms_c_info *wlc)
+static void brcms_b_detach(struct brcms_c_info *wlc)
 {
 	uint i;
 	struct brcms_hw_band *band;
 	struct brcms_hardware *wlc_hw = wlc->hw;
-	int callbacks;
-
-	callbacks = 0;
 
 	brcms_b_detach_dmapio(wlc_hw);
 
@@ -4900,9 +4897,6 @@
 		ai_detach(wlc_hw->sih);
 		wlc_hw->sih = NULL;
 	}
-
-	return callbacks;
-
 }
 
 /*
@@ -4917,14 +4911,15 @@
  */
 uint brcms_c_detach(struct brcms_c_info *wlc)
 {
-	uint callbacks = 0;
+	uint callbacks;
 
 	if (wlc == NULL)
 		return 0;
 
-	callbacks += brcms_b_detach(wlc);
+	brcms_b_detach(wlc);
 
 	/* delete software timers */
+	callbacks = 0;
 	if (!brcms_c_radio_monitor_stop(wlc))
 		callbacks++;
 

diff --git a/drivers/net/wireless/brcm80211/brcmutil/d11.c b/drivers/net/wireless/brcm80211/brcmutil/d11.c
index 30e54e2..2b2522b 100644
--- a/drivers/net/wireless/brcm80211/brcmutil/d11.c
+++ b/drivers/net/wireless/brcm80211/brcmutil/d11.c

@@ -21,19 +21,46 @@
 #include <brcmu_wifi.h>
 #include <brcmu_d11.h>
 
+static u16 d11n_sb(enum brcmu_chan_sb sb)
+{
+	switch (sb) {
+	case BRCMU_CHAN_SB_NONE:
+		return BRCMU_CHSPEC_D11N_SB_N;
+	case BRCMU_CHAN_SB_L:
+		return BRCMU_CHSPEC_D11N_SB_L;
+	case BRCMU_CHAN_SB_U:
+		return BRCMU_CHSPEC_D11N_SB_U;
+	default:
+		WARN_ON(1);
+	}
+	return 0;
+}
+
+static u16 d11n_bw(enum brcmu_chan_bw bw)
+{
+	switch (bw) {
+	case BRCMU_CHAN_BW_20:
+		return BRCMU_CHSPEC_D11N_BW_20;
+	case BRCMU_CHAN_BW_40:
+		return BRCMU_CHSPEC_D11N_BW_40;
+	default:
+		WARN_ON(1);
+	}
+	return 0;
+}
+
 static void brcmu_d11n_encchspec(struct brcmu_chan *ch)
 {
-	ch->chspec = ch->chnum & BRCMU_CHSPEC_CH_MASK;
+	if (ch->bw == BRCMU_CHAN_BW_20)
+		ch->sb = BRCMU_CHAN_SB_NONE;
 
-	switch (ch->bw) {
-	case BRCMU_CHAN_BW_20:
-		ch->chspec |= BRCMU_CHSPEC_D11N_BW_20 | BRCMU_CHSPEC_D11N_SB_N;
-		break;
-	case BRCMU_CHAN_BW_40:
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	ch->chspec = 0;
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_CH_MASK,
+			BRCMU_CHSPEC_CH_SHIFT, ch->chnum);
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_D11N_SB_MASK,
+			0, d11n_sb(ch->sb));
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_D11N_BW_MASK,
+			0, d11n_bw(ch->bw));
 
 	if (ch->chnum <= CH_MAX_2G_CHANNEL)
 		ch->chspec |= BRCMU_CHSPEC_D11N_BND_2G;
@@ -41,23 +68,34 @@
 		ch->chspec |= BRCMU_CHSPEC_D11N_BND_5G;
 }
 
+static u16 d11ac_bw(enum brcmu_chan_bw bw)
+{
+	switch (bw) {
+	case BRCMU_CHAN_BW_20:
+		return BRCMU_CHSPEC_D11AC_BW_20;
+	case BRCMU_CHAN_BW_40:
+		return BRCMU_CHSPEC_D11AC_BW_40;
+	case BRCMU_CHAN_BW_80:
+		return BRCMU_CHSPEC_D11AC_BW_80;
+	default:
+		WARN_ON(1);
+	}
+	return 0;
+}
+
 static void brcmu_d11ac_encchspec(struct brcmu_chan *ch)
 {
-	ch->chspec = ch->chnum & BRCMU_CHSPEC_CH_MASK;
+	if (ch->bw == BRCMU_CHAN_BW_20 || ch->sb == BRCMU_CHAN_SB_NONE)
+		ch->sb = BRCMU_CHAN_SB_L;
 
-	switch (ch->bw) {
-	case BRCMU_CHAN_BW_20:
-		ch->chspec |= BRCMU_CHSPEC_D11AC_BW_20;
-		break;
-	case BRCMU_CHAN_BW_40:
-	case BRCMU_CHAN_BW_80:
-	case BRCMU_CHAN_BW_80P80:
-	case BRCMU_CHAN_BW_160:
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_CH_MASK,
+			BRCMU_CHSPEC_CH_SHIFT, ch->chnum);
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_D11AC_SB_MASK,
+			BRCMU_CHSPEC_D11AC_SB_SHIFT, ch->sb);
+	brcmu_maskset16(&ch->chspec, BRCMU_CHSPEC_D11AC_BW_MASK,
+			0, d11ac_bw(ch->bw));
 
+	ch->chspec &= ~BRCMU_CHSPEC_D11AC_BND_MASK;
 	if (ch->chnum <= CH_MAX_2G_CHANNEL)
 		ch->chspec |= BRCMU_CHSPEC_D11AC_BND_2G;
 	else
@@ -73,6 +111,7 @@
 	switch (ch->chspec & BRCMU_CHSPEC_D11N_BW_MASK) {
 	case BRCMU_CHSPEC_D11N_BW_20:
 		ch->bw = BRCMU_CHAN_BW_20;
+		ch->sb = BRCMU_CHAN_SB_NONE;
 		break;
 	case BRCMU_CHSPEC_D11N_BW_40:
 		ch->bw = BRCMU_CHAN_BW_40;
@@ -112,6 +151,7 @@
 	switch (ch->chspec & BRCMU_CHSPEC_D11AC_BW_MASK) {
 	case BRCMU_CHSPEC_D11AC_BW_20:
 		ch->bw = BRCMU_CHAN_BW_20;
+		ch->sb = BRCMU_CHAN_SB_NONE;
 		break;
 	case BRCMU_CHSPEC_D11AC_BW_40:
 		ch->bw = BRCMU_CHAN_BW_40;
@@ -128,6 +168,25 @@
 		break;
 	case BRCMU_CHSPEC_D11AC_BW_80:
 		ch->bw = BRCMU_CHAN_BW_80;
+		ch->sb = brcmu_maskget16(ch->chspec, BRCMU_CHSPEC_D11AC_SB_MASK,
+					 BRCMU_CHSPEC_D11AC_SB_SHIFT);
+		switch (ch->sb) {
+		case BRCMU_CHAN_SB_LL:
+			ch->chnum -= CH_30MHZ_APART;
+			break;
+		case BRCMU_CHAN_SB_LU:
+			ch->chnum -= CH_10MHZ_APART;
+			break;
+		case BRCMU_CHAN_SB_UL:
+			ch->chnum += CH_10MHZ_APART;
+			break;
+		case BRCMU_CHAN_SB_UU:
+			ch->chnum += CH_30MHZ_APART;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
 		break;
 	case BRCMU_CHSPEC_D11AC_BW_8080:
 	case BRCMU_CHSPEC_D11AC_BW_160:

diff --git a/drivers/net/wireless/brcm80211/include/brcmu_d11.h b/drivers/net/wireless/brcm80211/include/brcmu_d11.h
index 8660a2c..f9745ea 100644
--- a/drivers/net/wireless/brcm80211/include/brcmu_d11.h
+++ b/drivers/net/wireless/brcm80211/include/brcmu_d11.h

@@ -108,13 +108,7 @@
 };
 
 enum brcmu_chan_sb {
-	BRCMU_CHAN_SB_NONE = 0,
-	BRCMU_CHAN_SB_L,
-	BRCMU_CHAN_SB_U,
-	BRCMU_CHAN_SB_LL,
-	BRCMU_CHAN_SB_LU,
-	BRCMU_CHAN_SB_UL,
-	BRCMU_CHAN_SB_UU,
+	BRCMU_CHAN_SB_NONE = -1,
 	BRCMU_CHAN_SB_LLL,
 	BRCMU_CHAN_SB_LLU,
 	BRCMU_CHAN_SB_LUL,
@@ -123,6 +117,12 @@
 	BRCMU_CHAN_SB_ULU,
 	BRCMU_CHAN_SB_UUL,
 	BRCMU_CHAN_SB_UUU,
+	BRCMU_CHAN_SB_L = BRCMU_CHAN_SB_LLL,
+	BRCMU_CHAN_SB_U = BRCMU_CHAN_SB_LLU,
+	BRCMU_CHAN_SB_LL = BRCMU_CHAN_SB_LLL,
+	BRCMU_CHAN_SB_LU = BRCMU_CHAN_SB_LLU,
+	BRCMU_CHAN_SB_UL = BRCMU_CHAN_SB_LUL,
+	BRCMU_CHAN_SB_UU = BRCMU_CHAN_SB_LUU,
 };
 
 struct brcmu_chan {

diff --git a/drivers/net/wireless/brcm80211/include/brcmu_wifi.h b/drivers/net/wireless/brcm80211/include/brcmu_wifi.h
index 74419d4..76b5d3a 100644
--- a/drivers/net/wireless/brcm80211/include/brcmu_wifi.h
+++ b/drivers/net/wireless/brcm80211/include/brcmu_wifi.h

@@ -29,6 +29,7 @@
 #define CH_UPPER_SB			0x01
 #define CH_LOWER_SB			0x02
 #define CH_EWA_VALID			0x04
+#define CH_30MHZ_APART			6
 #define CH_20MHZ_APART			4
 #define CH_10MHZ_APART			2
 #define CH_5MHZ_APART			1 /* 2G band channels are 5 Mhz apart */

diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 103f7bc..cd0cad7 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c

@@ -936,7 +936,8 @@
 	return ret;
 }
 
-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop)
 {
 	struct cw1200_common *priv = hw->priv;
 

diff --git a/drivers/net/wireless/cw1200/sta.h b/drivers/net/wireless/cw1200/sta.h
index 35babb6..b7e386b 100644
--- a/drivers/net/wireless/cw1200/sta.h
+++ b/drivers/net/wireless/cw1200/sta.h

@@ -40,7 +40,8 @@
 
 int cw1200_set_rts_threshold(struct ieee80211_hw *hw, u32 value);
 
-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop);
 
 u64 cw1200_prepare_multicast(struct ieee80211_hw *hw,
 			     struct netdev_hw_addr_list *mc_list);

diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 67db34e..52919ad 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c

@@ -882,7 +882,7 @@
 	dev->mtu = local->mtu;
 
 
-	SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
+	dev->ethtool_ops = &prism2_ethtool_ops;
 
 }
 

diff --git a/drivers/net/wireless/iwlegacy/3945.c b/drivers/net/wireless/iwlegacy/3945.c
index d37a6fd..b598e28 100644
--- a/drivers/net/wireless/iwlegacy/3945.c
+++ b/drivers/net/wireless/iwlegacy/3945.c

@@ -573,7 +573,7 @@
 		rx_status.flag |= RX_FLAG_SHORTPRE;
 
 	if ((unlikely(rx_stats->phy_count > 20))) {
-		D_DROP("dsp size out of range [0,20]: %d/n",
+		D_DROP("dsp size out of range [0,20]: %d\n",
 		       rx_stats->phy_count);
 		return;
 	}

diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c
index 888ad5c..c159c05 100644
--- a/drivers/net/wireless/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/iwlegacy/4965-mac.c

@@ -670,7 +670,7 @@
 	}
 
 	if ((unlikely(phy_res->cfg_phy_cnt > 20))) {
-		D_DROP("dsp size out of range [0,20]: %d/n",
+		D_DROP("dsp size out of range [0,20]: %d\n",
 		       phy_res->cfg_phy_cnt);
 		return;
 	}

diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c
index 4f42174..ecc6746 100644
--- a/drivers/net/wireless/iwlegacy/common.c
+++ b/drivers/net/wireless/iwlegacy/common.c

@@ -4755,7 +4755,8 @@
 }
 EXPORT_SYMBOL(il_mac_change_interface);
 
-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop)
 {
 	struct il_priv *il = hw->priv;
 	unsigned long timeout = jiffies + msecs_to_jiffies(500);

diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h
index dfb13c7..ea5c0f8 100644
--- a/drivers/net/wireless/iwlegacy/common.h
+++ b/drivers/net/wireless/iwlegacy/common.h

@@ -1723,7 +1723,8 @@
 			     struct ieee80211_vif *vif);
 int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    enum nl80211_iftype newtype, bool newp2p);
-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop);
 int il_alloc_txq_mem(struct il_priv *il);
 void il_free_txq_mem(struct il_priv *il);
 

diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index 74b3b4d..7fd5042 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig

@@ -2,10 +2,6 @@
 	tristate "Intel Wireless WiFi Next Gen AGN - Wireless-N/Advanced-N/Ultimate-N (iwlwifi) "
 	depends on PCI && MAC80211 && HAS_IOMEM
 	select FW_LOADER
-	select NEW_LEDS
-	select LEDS_CLASS
-	select LEDS_TRIGGERS
-	select MAC80211_LEDS
 	---help---
 	  Select to build the driver supporting the:
 
@@ -43,6 +39,14 @@
 	  say M here and read <file:Documentation/kbuild/modules.txt>.  The
 	  module will be called iwlwifi.
 
+config IWLWIFI_LEDS
+	bool
+	depends on IWLWIFI
+	depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI
+	select LEDS_TRIGGERS
+	select MAC80211_LEDS
+	default y
+
 config IWLDVM
 	tristate "Intel Wireless WiFi DVM Firmware support"
 	depends on IWLWIFI
@@ -124,7 +128,6 @@
 	  Enable use of experimental ucode for testing and debugging.
 
 config IWLWIFI_DEVICE_TRACING
-
 	bool "iwlwifi device access tracing"
 	depends on IWLWIFI
 	depends on EVENT_TRACING

diff --git a/drivers/net/wireless/iwlwifi/dvm/Makefile b/drivers/net/wireless/iwlwifi/dvm/Makefile
index dce7ab2..4d19685 100644
--- a/drivers/net/wireless/iwlwifi/dvm/Makefile
+++ b/drivers/net/wireless/iwlwifi/dvm/Makefile

@@ -4,9 +4,10 @@
 iwldvm-objs		+= lib.o calib.o tt.o sta.o rx.o
 
 iwldvm-objs		+= power.o
-iwldvm-objs		+= scan.o led.o
+iwldvm-objs		+= scan.o
 iwldvm-objs		+= rxon.o devices.o
 
+iwldvm-$(CONFIG_IWLWIFI_LEDS) += led.o
 iwldvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o
 
 ccflags-y += -D__CHECK_ENDIAN__ -I$(src)/../

diff --git a/drivers/net/wireless/iwlwifi/dvm/calib.c b/drivers/net/wireless/iwlwifi/dvm/calib.c
index be1086c..20e6aa9 100644
--- a/drivers/net/wireless/iwlwifi/dvm/calib.c
+++ b/drivers/net/wireless/iwlwifi/dvm/calib.c

@@ -94,7 +94,6 @@
 {
 	struct iwl_host_cmd hcmd = {
 		.id = REPLY_PHY_CALIBRATION_CMD,
-		.flags = CMD_SYNC,
 	};
 	struct iwl_calib_result *res;
 

diff --git a/drivers/net/wireless/iwlwifi/dvm/debugfs.c b/drivers/net/wireless/iwlwifi/dvm/debugfs.c
index d2fe259..0ffb6ff 100644
--- a/drivers/net/wireless/iwlwifi/dvm/debugfs.c
+++ b/drivers/net/wireless/iwlwifi/dvm/debugfs.c

@@ -1481,7 +1481,7 @@
 
 	/* make request to uCode to retrieve statistics information */
 	mutex_lock(&priv->mutex);
-	ret = iwl_send_statistics_request(priv, CMD_SYNC, false);
+	ret = iwl_send_statistics_request(priv, 0, false);
 	mutex_unlock(&priv->mutex);
 
 	if (ret)
@@ -1868,7 +1868,7 @@
 
 	/* make request to uCode to retrieve statistics information */
 	mutex_lock(&priv->mutex);
-	iwl_send_statistics_request(priv, CMD_SYNC, true);
+	iwl_send_statistics_request(priv, 0, true);
 	mutex_unlock(&priv->mutex);
 
 	return count;
@@ -2188,7 +2188,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_ECHO,
 		.len = { 0 },
-		.flags = CMD_SYNC,
 	};
 
 	ret = iwl_dvm_send_cmd(priv, &cmd);
@@ -2320,7 +2319,7 @@
 	mutex_lock(&priv->mutex);
 
 	/* take the return value to make compiler happy - it will fail anyway */
-	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_ERROR, CMD_SYNC, 0, NULL);
+	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_ERROR, 0, 0, NULL);
 
 	mutex_unlock(&priv->mutex);
 

diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h
index 3441f70..a6f22c3 100644
--- a/drivers/net/wireless/iwlwifi/dvm/dev.h
+++ b/drivers/net/wireless/iwlwifi/dvm/dev.h

@@ -888,9 +888,11 @@
 
 	struct iwl_event_log event_log;
 
+#ifdef CONFIG_IWLWIFI_LEDS
 	struct led_classdev led;
 	unsigned long blink_on, blink_off;
 	bool led_registered;
+#endif
 
 	/* WoWLAN GTK rekey data */
 	u8 kck[NL80211_KCK_LEN], kek[NL80211_KEK_LEN];

diff --git a/drivers/net/wireless/iwlwifi/dvm/devices.c b/drivers/net/wireless/iwlwifi/dvm/devices.c
index 758c54e..34b41e5 100644
--- a/drivers/net/wireless/iwlwifi/dvm/devices.c
+++ b/drivers/net/wireless/iwlwifi/dvm/devices.c

@@ -417,7 +417,6 @@
 	struct iwl_host_cmd hcmd = {
 		.id = REPLY_CHANNEL_SWITCH,
 		.len = { sizeof(cmd), },
-		.flags = CMD_SYNC,
 		.data = { &cmd, },
 	};
 
@@ -579,7 +578,6 @@
 	struct iwl_host_cmd hcmd = {
 		.id = REPLY_CHANNEL_SWITCH,
 		.len = { sizeof(*cmd), },
-		.flags = CMD_SYNC,
 		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
 	};
 	int err;

diff --git a/drivers/net/wireless/iwlwifi/dvm/led.h b/drivers/net/wireless/iwlwifi/dvm/led.h
index 6a0817d..1c6b225 100644
--- a/drivers/net/wireless/iwlwifi/dvm/led.h
+++ b/drivers/net/wireless/iwlwifi/dvm/led.h

@@ -36,8 +36,20 @@
 #define IWL_LED_ACTIVITY       (0<<1)
 #define IWL_LED_LINK           (1<<1)
 
+#ifdef CONFIG_IWLWIFI_LEDS
 void iwlagn_led_enable(struct iwl_priv *priv);
 void iwl_leds_init(struct iwl_priv *priv);
 void iwl_leds_exit(struct iwl_priv *priv);
+#else
+static inline void iwlagn_led_enable(struct iwl_priv *priv)
+{
+}
+static inline void iwl_leds_init(struct iwl_priv *priv)
+{
+}
+static inline void iwl_leds_exit(struct iwl_priv *priv)
+{
+}
+#endif
 
 #endif /* __iwl_leds_h__ */

diff --git a/drivers/net/wireless/iwlwifi/dvm/lib.c b/drivers/net/wireless/iwlwifi/dvm/lib.c
index 576f7ee..2191621 100644
--- a/drivers/net/wireless/iwlwifi/dvm/lib.c
+++ b/drivers/net/wireless/iwlwifi/dvm/lib.c

@@ -81,7 +81,7 @@
 	else
 		tx_ant_cfg_cmd = REPLY_TX_POWER_DBM_CMD;
 
-	return iwl_dvm_send_cmd_pdu(priv, tx_ant_cfg_cmd, CMD_SYNC,
+	return iwl_dvm_send_cmd_pdu(priv, tx_ant_cfg_cmd, 0,
 			sizeof(tx_power_cmd), &tx_power_cmd);
 }
 
@@ -141,7 +141,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_TXFIFO_FLUSH,
 		.len = { sizeof(struct iwl_txfifo_flush_cmd), },
-		.flags = CMD_SYNC,
 		.data = { &flush_cmd, },
 	};
 
@@ -180,7 +179,7 @@
 		goto done;
 	}
 	IWL_DEBUG_INFO(priv, "wait transmit/flush all frames\n");
-	iwl_trans_wait_tx_queue_empty(priv->trans);
+	iwl_trans_wait_tx_queue_empty(priv->trans, 0xffffffff);
 done:
 	ieee80211_wake_queues(priv->hw);
 	mutex_unlock(&priv->mutex);
@@ -333,12 +332,12 @@
 		memcpy(&bt_cmd_v2.basic, &basic,
 			sizeof(basic));
 		ret = iwl_dvm_send_cmd_pdu(priv, REPLY_BT_CONFIG,
-			CMD_SYNC, sizeof(bt_cmd_v2), &bt_cmd_v2);
+			0, sizeof(bt_cmd_v2), &bt_cmd_v2);
 	} else {
 		memcpy(&bt_cmd_v1.basic, &basic,
 			sizeof(basic));
 		ret = iwl_dvm_send_cmd_pdu(priv, REPLY_BT_CONFIG,
-			CMD_SYNC, sizeof(bt_cmd_v1), &bt_cmd_v1);
+			0, sizeof(bt_cmd_v1), &bt_cmd_v1);
 	}
 	if (ret)
 		IWL_ERR(priv, "failed to send BT Coex Config\n");
@@ -1044,7 +1043,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_WOWLAN_PATTERNS,
 		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
-		.flags = CMD_SYNC,
 	};
 	int i, err;
 
@@ -1201,7 +1199,6 @@
 		if (key_data.use_rsc_tsc) {
 			struct iwl_host_cmd rsc_tsc_cmd = {
 				.id = REPLY_WOWLAN_TSC_RSC_PARAMS,
-				.flags = CMD_SYNC,
 				.data[0] = key_data.rsc_tsc,
 				.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
 				.len[0] = sizeof(*key_data.rsc_tsc),
@@ -1215,7 +1212,7 @@
 		if (key_data.use_tkip) {
 			ret = iwl_dvm_send_cmd_pdu(priv,
 						 REPLY_WOWLAN_TKIP_PARAMS,
-						 CMD_SYNC, sizeof(tkip_cmd),
+						 0, sizeof(tkip_cmd),
 						 &tkip_cmd);
 			if (ret)
 				goto out;
@@ -1231,20 +1228,20 @@
 
 			ret = iwl_dvm_send_cmd_pdu(priv,
 						 REPLY_WOWLAN_KEK_KCK_MATERIAL,
-						 CMD_SYNC, sizeof(kek_kck_cmd),
+						 0, sizeof(kek_kck_cmd),
 						 &kek_kck_cmd);
 			if (ret)
 				goto out;
 		}
 	}
 
-	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_D3_CONFIG, CMD_SYNC,
+	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_D3_CONFIG, 0,
 				     sizeof(d3_cfg_cmd), &d3_cfg_cmd);
 	if (ret)
 		goto out;
 
 	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_WOWLAN_WAKEUP_FILTER,
-				 CMD_SYNC, sizeof(wakeup_filter_cmd),
+				 0, sizeof(wakeup_filter_cmd),
 				 &wakeup_filter_cmd);
 	if (ret)
 		goto out;

diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index dd55c9c..29af7b5 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c

@@ -1091,7 +1091,8 @@
 			FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL;
 }
 
-static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			     u32 queues, bool drop)
 {
 	struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
 
@@ -1119,7 +1120,7 @@
 		}
 	}
 	IWL_DEBUG_MAC80211(priv, "wait transmit/flush all frames\n");
-	iwl_trans_wait_tx_queue_empty(priv->trans);
+	iwl_trans_wait_tx_queue_empty(priv->trans, 0xffffffff);
 done:
 	mutex_unlock(&priv->mutex);
 	IWL_DEBUG_MAC80211(priv, "leave\n");

diff --git a/drivers/net/wireless/iwlwifi/dvm/main.c b/drivers/net/wireless/iwlwifi/dvm/main.c
index 6a6df71..0b7f46f 100644
--- a/drivers/net/wireless/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/iwlwifi/dvm/main.c

@@ -128,7 +128,6 @@
 	struct iwl_tx_beacon_cmd *tx_beacon_cmd;
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_TX_BEACON,
-		.flags = CMD_SYNC,
 	};
 	struct ieee80211_tx_info *info;
 	u32 frame_size;
@@ -311,8 +310,7 @@
 					sizeof(struct iwl_statistics_cmd),
 					&statistics_cmd);
 	else
-		return iwl_dvm_send_cmd_pdu(priv, REPLY_STATISTICS_CMD,
-					CMD_SYNC,
+		return iwl_dvm_send_cmd_pdu(priv, REPLY_STATISTICS_CMD, 0,
 					sizeof(struct iwl_statistics_cmd),
 					&statistics_cmd);
 }
@@ -622,7 +620,7 @@
 
 		ret = iwl_dvm_send_cmd_pdu(priv,
 				       REPLY_CT_KILL_CONFIG_CMD,
-				       CMD_SYNC, sizeof(adv_cmd), &adv_cmd);
+				       0, sizeof(adv_cmd), &adv_cmd);
 		if (ret)
 			IWL_ERR(priv, "REPLY_CT_KILL_CONFIG_CMD failed\n");
 		else
@@ -637,7 +635,7 @@
 
 		ret = iwl_dvm_send_cmd_pdu(priv,
 				       REPLY_CT_KILL_CONFIG_CMD,
-				       CMD_SYNC, sizeof(cmd), &cmd);
+				       0, sizeof(cmd), &cmd);
 		if (ret)
 			IWL_ERR(priv, "REPLY_CT_KILL_CONFIG_CMD failed\n");
 		else
@@ -673,9 +671,7 @@
 
 	if (IWL_UCODE_API(priv->fw->ucode_ver) > 1) {
 		IWL_DEBUG_HC(priv, "select valid tx ant: %u\n", valid_tx_ant);
-		return iwl_dvm_send_cmd_pdu(priv,
-					TX_ANT_CONFIGURATION_CMD,
-					CMD_SYNC,
+		return iwl_dvm_send_cmd_pdu(priv, TX_ANT_CONFIGURATION_CMD, 0,
 					sizeof(struct iwl_tx_ant_config_cmd),
 					&tx_ant_cmd);
 	} else {
@@ -703,7 +699,7 @@
 		(bt_cmd.flags == BT_COEX_DISABLE) ? "disable" : "active");
 
 	if (iwl_dvm_send_cmd_pdu(priv, REPLY_BT_CONFIG,
-			     CMD_SYNC, sizeof(struct iwl_bt_cmd), &bt_cmd))
+			     0, sizeof(struct iwl_bt_cmd), &bt_cmd))
 		IWL_ERR(priv, "failed to send BT Coex Config\n");
 }
 
@@ -987,7 +983,7 @@
 			ieee80211_restart_hw(priv->hw);
 		else
 			IWL_ERR(priv,
-				"Cannot request restart before registrating with mac80211");
+				"Cannot request restart before registrating with mac80211\n");
 	} else {
 		WARN_ON(1);
 	}
@@ -1127,7 +1123,6 @@
 static int iwl_eeprom_init_hw_params(struct iwl_priv *priv)
 {
 	struct iwl_nvm_data *data = priv->nvm_data;
-	char *debug_msg;
 
 	if (data->sku_cap_11n_enable &&
 	    !priv->cfg->ht_params) {
@@ -1141,8 +1136,8 @@
 		return -EINVAL;
 	}
 
-	debug_msg = "Device SKU: 24GHz %s %s, 52GHz %s %s, 11.n %s %s\n";
-	IWL_DEBUG_INFO(priv, debug_msg,
+	IWL_DEBUG_INFO(priv,
+		       "Device SKU: 24GHz %s %s, 52GHz %s %s, 11.n %s %s\n",
 		       data->sku_cap_band_24GHz_enable ? "" : "NOT", "enabled",
 		       data->sku_cap_band_52GHz_enable ? "" : "NOT", "enabled",
 		       data->sku_cap_11n_enable ? "" : "NOT", "enabled");
@@ -1350,7 +1345,7 @@
 	iwl_set_hw_params(priv);
 
 	if (!(priv->nvm_data->sku_cap_ipan_enable)) {
-		IWL_DEBUG_INFO(priv, "Your EEPROM disabled PAN");
+		IWL_DEBUG_INFO(priv, "Your EEPROM disabled PAN\n");
 		ucode_flags &= ~IWL_UCODE_TLV_FLAGS_PAN;
 		/*
 		 * if not PAN, then don't support P2P -- might be a uCode
@@ -2019,10 +2014,10 @@
 
 	for (mq = 0; mq < IWLAGN_FIRST_AMPDU_QUEUE; mq++) {
 		if (!test_bit(mq, &priv->transport_queue_stop)) {
-			IWL_DEBUG_TX_QUEUES(priv, "Wake queue %d", mq);
+			IWL_DEBUG_TX_QUEUES(priv, "Wake queue %d\n", mq);
 			ieee80211_wake_queue(priv->hw, mq);
 		} else {
-			IWL_DEBUG_TX_QUEUES(priv, "Don't wake queue %d", mq);
+			IWL_DEBUG_TX_QUEUES(priv, "Don't wake queue %d\n", mq);
 		}
 	}
 
@@ -2053,6 +2048,17 @@
 	return false;
 }
 
+static void iwl_napi_add(struct iwl_op_mode *op_mode,
+			 struct napi_struct *napi,
+			 struct net_device *napi_dev,
+			 int (*poll)(struct napi_struct *, int),
+			 int weight)
+{
+	struct iwl_priv *priv = IWL_OP_MODE_GET_DVM(op_mode);
+
+	ieee80211_napi_add(priv->hw, napi, napi_dev, poll, weight);
+}
+
 static const struct iwl_op_mode_ops iwl_dvm_ops = {
 	.start = iwl_op_mode_dvm_start,
 	.stop = iwl_op_mode_dvm_stop,
@@ -2065,6 +2071,7 @@
 	.cmd_queue_full = iwl_cmd_queue_full,
 	.nic_config = iwl_nic_config,
 	.wimax_active = iwl_wimax_active,
+	.napi_add = iwl_napi_add,
 };
 
 /*****************************************************************************

diff --git a/drivers/net/wireless/iwlwifi/dvm/power.c b/drivers/net/wireless/iwlwifi/dvm/power.c
index b4e6141..f2c1439 100644
--- a/drivers/net/wireless/iwlwifi/dvm/power.c
+++ b/drivers/net/wireless/iwlwifi/dvm/power.c

@@ -278,7 +278,7 @@
 			le32_to_cpu(cmd->sleep_interval[3]),
 			le32_to_cpu(cmd->sleep_interval[4]));
 
-	return iwl_dvm_send_cmd_pdu(priv, POWER_TABLE_CMD, CMD_SYNC,
+	return iwl_dvm_send_cmd_pdu(priv, POWER_TABLE_CMD, 0,
 				sizeof(struct iwl_powertable_cmd), cmd);
 }
 
@@ -361,7 +361,7 @@
 
 		memcpy(&priv->power_data.sleep_cmd, cmd, sizeof(*cmd));
 	} else
-		IWL_ERR(priv, "set power fail, ret = %d", ret);
+		IWL_ERR(priv, "set power fail, ret = %d\n", ret);
 
 	return ret;
 }

diff --git a/drivers/net/wireless/iwlwifi/dvm/rs.c b/drivers/net/wireless/iwlwifi/dvm/rs.c
index aa773a2..32b78a6 100644
--- a/drivers/net/wireless/iwlwifi/dvm/rs.c
+++ b/drivers/net/wireless/iwlwifi/dvm/rs.c

@@ -1453,7 +1453,7 @@
 			tbl->action = IWL_LEGACY_SWITCH_SISO;
 		break;
 	default:
-		IWL_ERR(priv, "Invalid BT load %d", priv->bt_traffic_load);
+		IWL_ERR(priv, "Invalid BT load %d\n", priv->bt_traffic_load);
 		break;
 	}
 
@@ -1628,7 +1628,7 @@
 			tbl->action = IWL_SISO_SWITCH_ANTENNA1;
 		break;
 	default:
-		IWL_ERR(priv, "Invalid BT load %d", priv->bt_traffic_load);
+		IWL_ERR(priv, "Invalid BT load %d\n", priv->bt_traffic_load);
 		break;
 	}
 
@@ -1799,7 +1799,7 @@
 			tbl->action = IWL_MIMO2_SWITCH_SISO_A;
 		break;
 	default:
-		IWL_ERR(priv, "Invalid BT load %d", priv->bt_traffic_load);
+		IWL_ERR(priv, "Invalid BT load %d\n", priv->bt_traffic_load);
 		break;
 	}
 
@@ -1969,7 +1969,7 @@
 			tbl->action = IWL_MIMO3_SWITCH_SISO_A;
 		break;
 	default:
-		IWL_ERR(priv, "Invalid BT load %d", priv->bt_traffic_load);
+		IWL_ERR(priv, "Invalid BT load %d\n", priv->bt_traffic_load);
 		break;
 	}
 
@@ -2709,7 +2709,7 @@
 	rs_set_expected_tpt_table(lq_sta, tbl);
 	rs_fill_link_cmd(NULL, lq_sta, rate);
 	priv->stations[lq_sta->lq.sta_id].lq = &lq_sta->lq;
-	iwl_send_lq_cmd(priv, ctx, &lq_sta->lq, CMD_SYNC, true);
+	iwl_send_lq_cmd(priv, ctx, &lq_sta->lq, 0, true);
 }
 
 static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, void *priv_sta,

diff --git a/drivers/net/wireless/iwlwifi/dvm/rx.c b/drivers/net/wireless/iwlwifi/dvm/rx.c
index cd83773..debec96 100644
--- a/drivers/net/wireless/iwlwifi/dvm/rx.c
+++ b/drivers/net/wireless/iwlwifi/dvm/rx.c

@@ -786,7 +786,7 @@
 
 	memcpy(IEEE80211_SKB_RXCB(skb), stats, sizeof(*stats));
 
-	ieee80211_rx_ni(priv->hw, skb);
+	ieee80211_rx(priv->hw, skb);
 }
 
 static u32 iwlagn_translate_rx_status(struct iwl_priv *priv, u32 decrypt_in)

diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c
index 503a81e..ed50de6 100644
--- a/drivers/net/wireless/iwlwifi/dvm/rxon.c
+++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c

@@ -104,7 +104,7 @@
 
 	send->filter_flags &= ~RXON_FILTER_ASSOC_MSK;
 	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd,
-				CMD_SYNC, sizeof(*send), send);
+				0, sizeof(*send), send);
 
 	send->filter_flags = old_filter;
 
@@ -134,7 +134,7 @@
 	send->filter_flags &= ~RXON_FILTER_ASSOC_MSK;
 	send->dev_type = RXON_DEV_TYPE_P2P;
 	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd,
-				CMD_SYNC, sizeof(*send), send);
+				0, sizeof(*send), send);
 
 	send->filter_flags = old_filter;
 	send->dev_type = old_dev_type;
@@ -160,7 +160,7 @@
 	int ret;
 
 	send->filter_flags &= ~RXON_FILTER_ASSOC_MSK;
-	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd, CMD_SYNC,
+	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd, 0,
 				sizeof(*send), send);
 
 	send->filter_flags = old_filter;
@@ -189,7 +189,7 @@
 		      ctx->qos_data.qos_active,
 		      ctx->qos_data.def_qos_parm.qos_flags);
 
-	ret = iwl_dvm_send_cmd_pdu(priv, ctx->qos_cmd, CMD_SYNC,
+	ret = iwl_dvm_send_cmd_pdu(priv, ctx->qos_cmd, 0,
 			       sizeof(struct iwl_qosparam_cmd),
 			       &ctx->qos_data.def_qos_parm);
 	if (ret)
@@ -353,7 +353,7 @@
 			le16_to_cpu(ctx->timing.atim_window));
 
 	return iwl_dvm_send_cmd_pdu(priv, ctx->rxon_timing_cmd,
-				CMD_SYNC, sizeof(ctx->timing), &ctx->timing);
+				0, sizeof(ctx->timing), &ctx->timing);
 }
 
 static int iwlagn_rxon_disconn(struct iwl_priv *priv,
@@ -495,7 +495,7 @@
 	 * Associated RXON doesn't clear the station table in uCode,
 	 * so we don't need to restore stations etc. after this.
 	 */
-	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd, CMD_SYNC,
+	ret = iwl_dvm_send_cmd_pdu(priv, ctx->rxon_cmd, 0,
 		      sizeof(struct iwl_rxon_cmd), &ctx->staging);
 	if (ret) {
 		IWL_ERR(priv, "Error setting new RXON (%d)\n", ret);
@@ -610,7 +610,7 @@
 	cmd.slots[0].width = cpu_to_le16(slot0);
 	cmd.slots[1].width = cpu_to_le16(slot1);
 
-	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_WIPAN_PARAMS, CMD_SYNC,
+	ret = iwl_dvm_send_cmd_pdu(priv, REPLY_WIPAN_PARAMS, 0,
 			sizeof(cmd), &cmd);
 	if (ret)
 		IWL_ERR(priv, "Error setting PAN parameters (%d)\n", ret);
@@ -823,7 +823,7 @@
 
 	if ((rxon->flags & (RXON_FLG_CCK_MSK | RXON_FLG_AUTO_DETECT_MSK))
 			== (RXON_FLG_CCK_MSK | RXON_FLG_AUTO_DETECT_MSK)) {
-		IWL_WARN(priv, "CCK and auto detect");
+		IWL_WARN(priv, "CCK and auto detect\n");
 		errors |= BIT(8);
 	}
 
@@ -1395,7 +1395,7 @@
 			priv->phy_calib_chain_noise_reset_cmd);
 		ret = iwl_dvm_send_cmd_pdu(priv,
 					REPLY_PHY_CALIBRATION_CMD,
-					CMD_SYNC, sizeof(cmd), &cmd);
+					0, sizeof(cmd), &cmd);
 		if (ret)
 			IWL_ERR(priv,
 				"Could not send REPLY_PHY_CALIBRATION_CMD\n");

diff --git a/drivers/net/wireless/iwlwifi/dvm/scan.c b/drivers/net/wireless/iwlwifi/dvm/scan.c
index be98b91..43bef90 100644
--- a/drivers/net/wireless/iwlwifi/dvm/scan.c
+++ b/drivers/net/wireless/iwlwifi/dvm/scan.c

@@ -59,7 +59,7 @@
 	int ret;
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_SCAN_ABORT_CMD,
-		.flags = CMD_SYNC | CMD_WANT_SKB,
+		.flags = CMD_WANT_SKB,
 	};
 	__le32 *status;
 
@@ -639,7 +639,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_SCAN_CMD,
 		.len = { sizeof(struct iwl_scan_cmd), },
-		.flags = CMD_SYNC,
 	};
 	struct iwl_scan_cmd *scan;
 	struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_BSS];

diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c
index 9cdd91c..6ec86ad 100644
--- a/drivers/net/wireless/iwlwifi/dvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/dvm/sta.c

@@ -39,7 +39,7 @@
 	lockdep_assert_held(&priv->sta_lock);
 
 	if (sta_id >= IWLAGN_STATION_COUNT) {
-		IWL_ERR(priv, "invalid sta_id %u", sta_id);
+		IWL_ERR(priv, "invalid sta_id %u\n", sta_id);
 		return -EINVAL;
 	}
 	if (!(priv->stations[sta_id].used & IWL_STA_DRIVER_ACTIVE))
@@ -165,7 +165,7 @@
 	iwl_free_resp(&cmd);
 
 	if (cmd.handler_status)
-		IWL_ERR(priv, "%s - error in the CMD response %d", __func__,
+		IWL_ERR(priv, "%s - error in the CMD response %d\n", __func__,
 			cmd.handler_status);
 
 	return cmd.handler_status;
@@ -261,7 +261,7 @@
 	cmd.station_flags = flags;
 	cmd.sta.sta_id = sta_id;
 
-	return iwl_send_add_sta(priv, &cmd, CMD_SYNC);
+	return iwl_send_add_sta(priv, &cmd, 0);
 }
 
 static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index,
@@ -413,7 +413,7 @@
 	spin_unlock_bh(&priv->sta_lock);
 
 	/* Add station to device's station table */
-	ret = iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+	ret = iwl_send_add_sta(priv, &sta_cmd, 0);
 	if (ret) {
 		spin_lock_bh(&priv->sta_lock);
 		IWL_ERR(priv, "Adding station %pM failed.\n",
@@ -456,7 +456,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = REPLY_REMOVE_STA,
 		.len = { sizeof(struct iwl_rem_sta_cmd), },
-		.flags = CMD_SYNC,
 		.data = { &rm_sta_cmd, },
 	};
 
@@ -740,7 +739,7 @@
 					send_lq = true;
 			}
 			spin_unlock_bh(&priv->sta_lock);
-			ret = iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+			ret = iwl_send_add_sta(priv, &sta_cmd, 0);
 			if (ret) {
 				spin_lock_bh(&priv->sta_lock);
 				IWL_ERR(priv, "Adding station %pM failed.\n",
@@ -756,8 +755,7 @@
 			 * current LQ command
 			 */
 			if (send_lq)
-				iwl_send_lq_cmd(priv, ctx, &lq,
-						CMD_SYNC, true);
+				iwl_send_lq_cmd(priv, ctx, &lq, 0, true);
 			spin_lock_bh(&priv->sta_lock);
 			priv->stations[i].used &= ~IWL_STA_UCODE_INPROGRESS;
 		}
@@ -968,7 +966,7 @@
 		return -ENOMEM;
 	}
 
-	ret = iwl_send_lq_cmd(priv, ctx, link_cmd, CMD_SYNC, true);
+	ret = iwl_send_lq_cmd(priv, ctx, link_cmd, 0, true);
 	if (ret)
 		IWL_ERR(priv, "Link quality command failed (%d)\n", ret);
 
@@ -999,7 +997,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = ctx->wep_key_cmd,
 		.data = { wep_cmd, },
-		.flags = CMD_SYNC,
 	};
 
 	might_sleep();
@@ -1248,7 +1245,7 @@
 	sta_cmd.sta.modify_mask = STA_MODIFY_KEY_MASK;
 	sta_cmd.mode = STA_CONTROL_MODIFY_MSK;
 
-	return iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+	return iwl_send_add_sta(priv, &sta_cmd, 0);
 }
 
 int iwl_set_dynamic_key(struct iwl_priv *priv,
@@ -1284,13 +1281,13 @@
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
 		ret = iwlagn_send_sta_key(priv, keyconf, sta_id,
-					  seq.tkip.iv32, p1k, CMD_SYNC);
+					  seq.tkip.iv32, p1k, 0);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_WEP104:
 		ret = iwlagn_send_sta_key(priv, keyconf, sta_id,
-					  0, NULL, CMD_SYNC);
+					  0, NULL, 0);
 		break;
 	default:
 		IWL_ERR(priv, "Unknown cipher %x\n", keyconf->cipher);
@@ -1409,7 +1406,7 @@
 	memcpy(&sta_cmd, &priv->stations[sta_id].sta, sizeof(struct iwl_addsta_cmd));
 	spin_unlock_bh(&priv->sta_lock);
 
-	return iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+	return iwl_send_add_sta(priv, &sta_cmd, 0);
 }
 
 int iwl_sta_rx_agg_start(struct iwl_priv *priv, struct ieee80211_sta *sta,
@@ -1433,7 +1430,7 @@
 	memcpy(&sta_cmd, &priv->stations[sta_id].sta, sizeof(struct iwl_addsta_cmd));
 	spin_unlock_bh(&priv->sta_lock);
 
-	return iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+	return iwl_send_add_sta(priv, &sta_cmd, 0);
 }
 
 int iwl_sta_rx_agg_stop(struct iwl_priv *priv, struct ieee80211_sta *sta,
@@ -1458,7 +1455,7 @@
 	memcpy(&sta_cmd, &priv->stations[sta_id].sta, sizeof(struct iwl_addsta_cmd));
 	spin_unlock_bh(&priv->sta_lock);
 
-	return iwl_send_add_sta(priv, &sta_cmd, CMD_SYNC);
+	return iwl_send_add_sta(priv, &sta_cmd, 0);
 }
 
 

diff --git a/drivers/net/wireless/iwlwifi/dvm/tt.c b/drivers/net/wireless/iwlwifi/dvm/tt.c
index 058c589..acb981a 100644
--- a/drivers/net/wireless/iwlwifi/dvm/tt.c
+++ b/drivers/net/wireless/iwlwifi/dvm/tt.c

@@ -236,7 +236,7 @@
 {
 	IWL_DEBUG_TEMP(priv, "Prepare to enter IWL_TI_CT_KILL\n");
 	/* make request to retrieve statistics information */
-	iwl_send_statistics_request(priv, CMD_SYNC, false);
+	iwl_send_statistics_request(priv, 0, false);
 	/* Reschedule the ct_kill wait timer */
 	mod_timer(&priv->thermal_throttle.ct_kill_waiting_tm,
 		 jiffies + msecs_to_jiffies(CT_KILL_WAITING_DURATION));

diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c
index 398dd09..3255a17 100644
--- a/drivers/net/wireless/iwlwifi/dvm/tx.c
+++ b/drivers/net/wireless/iwlwifi/dvm/tx.c

@@ -402,10 +402,10 @@
 		/* aggregation is on for this <sta,tid> */
 		if (info->flags & IEEE80211_TX_CTL_AMPDU &&
 		    tid_data->agg.state != IWL_AGG_ON) {
-			IWL_ERR(priv, "TX_CTL_AMPDU while not in AGG:"
-				" Tx flags = 0x%08x, agg.state = %d",
+			IWL_ERR(priv,
+				"TX_CTL_AMPDU while not in AGG: Tx flags = 0x%08x, agg.state = %d\n",
 				info->flags, tid_data->agg.state);
-			IWL_ERR(priv, "sta_id = %d, tid = %d seq_num = %d",
+			IWL_ERR(priv, "sta_id = %d, tid = %d seq_num = %d\n",
 				sta_id, tid,
 				IEEE80211_SEQ_TO_SN(tid_data->seq_number));
 			goto drop_unlock_sta;
@@ -416,7 +416,7 @@
 		 */
 		if (WARN_ONCE(tid_data->agg.state != IWL_AGG_ON &&
 			      tid_data->agg.state != IWL_AGG_OFF,
-		    "Tx while agg.state = %d", tid_data->agg.state))
+			      "Tx while agg.state = %d\n", tid_data->agg.state))
 			goto drop_unlock_sta;
 
 		seq_number = tid_data->seq_number;
@@ -778,8 +778,8 @@
 		/* There are no packets for this RA / TID in the HW any more */
 		if (tid_data->agg.ssn == tid_data->next_reclaimed) {
 			IWL_DEBUG_TX_QUEUES(priv,
-				"Can continue DELBA flow ssn = next_recl ="
-				" %d", tid_data->next_reclaimed);
+				"Can continue DELBA flow ssn = next_recl = %d\n",
+				tid_data->next_reclaimed);
 			iwl_trans_txq_disable(priv->trans,
 					      tid_data->agg.txq_id);
 			iwlagn_dealloc_agg_txq(priv, tid_data->agg.txq_id);
@@ -791,8 +791,8 @@
 		/* There are no packets for this RA / TID in the HW any more */
 		if (tid_data->agg.ssn == tid_data->next_reclaimed) {
 			IWL_DEBUG_TX_QUEUES(priv,
-				"Can continue ADDBA flow ssn = next_recl ="
-				" %d", tid_data->next_reclaimed);
+				"Can continue ADDBA flow ssn = next_recl = %d\n",
+				tid_data->next_reclaimed);
 			tid_data->agg.state = IWL_AGG_STARTING;
 			ieee80211_start_tx_ba_cb_irqsafe(vif, addr, tid);
 		}
@@ -1216,8 +1216,8 @@
 			    ctx->vif->type == NL80211_IFTYPE_STATION) {
 				/* block and stop all queues */
 				priv->passive_no_rx = true;
-				IWL_DEBUG_TX_QUEUES(priv, "stop all queues: "
-						    "passive channel");
+				IWL_DEBUG_TX_QUEUES(priv,
+					"stop all queues: passive channel\n");
 				ieee80211_stop_queues(priv->hw);
 
 				IWL_DEBUG_TX_REPLY(priv,
@@ -1271,7 +1271,7 @@
 
 	while (!skb_queue_empty(&skbs)) {
 		skb = __skb_dequeue(&skbs);
-		ieee80211_tx_status_ni(priv->hw, skb);
+		ieee80211_tx_status(priv->hw, skb);
 	}
 
 	return 0;
@@ -1411,7 +1411,7 @@
 
 	while (!skb_queue_empty(&reclaimed_skbs)) {
 		skb = __skb_dequeue(&reclaimed_skbs);
-		ieee80211_tx_status_ni(priv->hw, skb);
+		ieee80211_tx_status(priv->hw, skb);
 	}
 
 	return 0;

diff --git a/drivers/net/wireless/iwlwifi/dvm/ucode.c b/drivers/net/wireless/iwlwifi/dvm/ucode.c
index cf03ef5..d5cee15 100644
--- a/drivers/net/wireless/iwlwifi/dvm/ucode.c
+++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c

@@ -172,7 +172,7 @@
 	memset(&coex_cmd, 0, sizeof(coex_cmd));
 
 	return iwl_dvm_send_cmd_pdu(priv,
-				COEX_PRIORITY_TABLE_CMD, CMD_SYNC,
+				COEX_PRIORITY_TABLE_CMD, 0,
 				sizeof(coex_cmd), &coex_cmd);
 }
 
@@ -205,7 +205,7 @@
 	memcpy(prio_tbl_cmd.prio_tbl, iwl_bt_prio_tbl,
 		sizeof(iwl_bt_prio_tbl));
 	if (iwl_dvm_send_cmd_pdu(priv,
-				REPLY_BT_COEX_PRIO_TABLE, CMD_SYNC,
+				REPLY_BT_COEX_PRIO_TABLE, 0,
 				sizeof(prio_tbl_cmd), &prio_tbl_cmd))
 		IWL_ERR(priv, "failed to send BT prio tbl command\n");
 }
@@ -218,7 +218,7 @@
 	env_cmd.action = action;
 	env_cmd.type = type;
 	ret = iwl_dvm_send_cmd_pdu(priv,
-			       REPLY_BT_COEX_PROT_ENV, CMD_SYNC,
+			       REPLY_BT_COEX_PROT_ENV, 0,
 			       sizeof(env_cmd), &env_cmd);
 	if (ret)
 		IWL_ERR(priv, "failed to send BT env command\n");

diff --git a/drivers/net/wireless/iwlwifi/iwl-1000.c b/drivers/net/wireless/iwlwifi/iwl-1000.c
index 854ba84..c3817fa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-1000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-1000.c

@@ -62,6 +62,7 @@
 	.led_compensation = 51,
 	.wd_timeout = IWL_WATCHDOG_DISABLED,
 	.max_event_log_size = 128,
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_ht_params iwl1000_ht_params = {

diff --git a/drivers/net/wireless/iwlwifi/iwl-2000.c b/drivers/net/wireless/iwlwifi/iwl-2000.c
index 3e63323..21e5d08 100644
--- a/drivers/net/wireless/iwlwifi/iwl-2000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-2000.c

@@ -75,6 +75,7 @@
 	.wd_timeout = IWL_DEF_WD_TIMEOUT,
 	.max_event_log_size = 512,
 	.shadow_reg_enable = false, /* TODO: fix bugs using this feature */
+	.scd_chain_ext_wa = true,
 };
 
 
@@ -88,6 +89,7 @@
 	.wd_timeout = IWL_LONG_WD_TIMEOUT,
 	.max_event_log_size = 512,
 	.shadow_reg_enable = false, /* TODO: fix bugs using this feature */
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_ht_params iwl2000_ht_params = {

diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 6674f2c..332bbed 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c

@@ -61,6 +61,7 @@
 	.led_compensation = 51,
 	.wd_timeout = IWL_WATCHDOG_DISABLED,
 	.max_event_log_size = 512,
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_ht_params iwl5000_ht_params = {

diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c
index 8048de9..8f2c3c8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-6000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-6000.c

@@ -85,6 +85,7 @@
 	.wd_timeout = IWL_DEF_WD_TIMEOUT,
 	.max_event_log_size = 512,
 	.shadow_reg_enable = false, /* TODO: fix bugs using this feature */
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_base_params iwl6050_base_params = {
@@ -97,6 +98,7 @@
 	.wd_timeout = IWL_DEF_WD_TIMEOUT,
 	.max_event_log_size = 1024,
 	.shadow_reg_enable = false, /* TODO: fix bugs using this feature */
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_base_params iwl6000_g2_base_params = {
@@ -109,6 +111,7 @@
 	.wd_timeout = IWL_LONG_WD_TIMEOUT,
 	.max_event_log_size = 512,
 	.shadow_reg_enable = false, /* TODO: fix bugs using this feature */
+	.scd_chain_ext_wa = true,
 };
 
 static const struct iwl_ht_params iwl6000_ht_params = {

diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c
index 4c2d4ef..4873006 100644
--- a/drivers/net/wireless/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-7000.c

@@ -71,12 +71,12 @@
 #define IWL3160_UCODE_API_MAX	9
 
 /* Oldest version we won't warn about */
-#define IWL7260_UCODE_API_OK	8
-#define IWL3160_UCODE_API_OK	8
+#define IWL7260_UCODE_API_OK	9
+#define IWL3160_UCODE_API_OK	9
 
 /* Lowest firmware API version supported */
-#define IWL7260_UCODE_API_MIN	7
-#define IWL3160_UCODE_API_MIN	7
+#define IWL7260_UCODE_API_MIN	8
+#define IWL3160_UCODE_API_MIN	8
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION		0x0a1d
@@ -98,7 +98,7 @@
 #define NVM_HW_SECTION_NUM_FAMILY_7000		0
 
 static const struct iwl_base_params iwl7000_base_params = {
-	.eeprom_size = OTP_LOW_IMAGE_SIZE,
+	.eeprom_size = OTP_LOW_IMAGE_SIZE_FAMILY_7000,
 	.num_of_queues = IWLAGN_NUM_QUEUES,
 	.pll_cfg_val = 0,
 	.shadow_ram_support = true,
@@ -107,6 +107,7 @@
 	.max_event_log_size = 512,
 	.shadow_reg_enable = true,
 	.pcie_l1_allowed = true,
+	.apmg_wake_up_wa = true,
 };
 
 static const struct iwl_ht_params iwl7000_ht_params = {

diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c
index f5bd82b..51c4153 100644
--- a/drivers/net/wireless/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-8000.c

@@ -83,9 +83,10 @@
 #define IWL8000_MODULE_FIRMWARE(api) IWL8000_FW_PRE __stringify(api) ".ucode"
 
 #define NVM_HW_SECTION_NUM_FAMILY_8000		10
+#define DEFAULT_NVM_FILE_FAMILY_8000		"iwl_nvm_8000.bin"
 
 static const struct iwl_base_params iwl8000_base_params = {
-	.eeprom_size = OTP_LOW_IMAGE_SIZE,
+	.eeprom_size = OTP_LOW_IMAGE_SIZE_FAMILY_8000,
 	.num_of_queues = IWLAGN_NUM_QUEUES,
 	.pll_cfg_val = 0,
 	.shadow_ram_support = true,
@@ -118,6 +119,7 @@
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
+	.default_nvm_file = DEFAULT_NVM_FILE_FAMILY_8000,
 };
 
 const struct iwl_cfg iwl8260_n_cfg = {
@@ -127,6 +129,7 @@
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
+	.default_nvm_file = DEFAULT_NVM_FILE_FAMILY_8000,
 };
 
 MODULE_FIRMWARE(IWL8000_MODULE_FIRMWARE(IWL8000_UCODE_API_OK));

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-hw.h b/drivers/net/wireless/iwlwifi/iwl-agn-hw.h
index 7f37fb8..04a483d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-hw.h
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-hw.h

@@ -102,9 +102,7 @@
 
 /* EEPROM */
 #define IWLAGN_EEPROM_IMG_SIZE		2048
-/* OTP */
-/* lower blocks contain EEPROM image and calibration data */
-#define OTP_LOW_IMAGE_SIZE		(2 * 512 * sizeof(u16)) /* 2 KB */
+
 /* high blocks contain PAPD data */
 #define OTP_HIGH_IMAGE_SIZE_6x00        (6 * 512 * sizeof(u16)) /* 6 KB */
 #define OTP_HIGH_IMAGE_SIZE_1000        (0x200 * sizeof(u16)) /* 1024 bytes */

diff --git a/drivers/net/wireless/iwlwifi/iwl-config.h b/drivers/net/wireless/iwlwifi/iwl-config.h
index 3f17dc3..b7047905 100644
--- a/drivers/net/wireless/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/iwlwifi/iwl-config.h

@@ -146,6 +146,9 @@
  * @wd_timeout: TX queues watchdog timeout
  * @max_event_log_size: size of event log buffer size for ucode event logging
  * @shadow_reg_enable: HW shadow register support
+ * @apmg_wake_up_wa: should the MAC access REQ be asserted when a command
+ *	is in flight. This is due to a HW bug in 7260, 3160 and 7265.
+ * @scd_chain_ext_wa: should the chain extension feature in SCD be disabled.
  */
 struct iwl_base_params {
 	int eeprom_size;
@@ -160,6 +163,8 @@
 	u32 max_event_log_size;
 	const bool shadow_reg_enable;
 	const bool pcie_l1_allowed;
+	const bool apmg_wake_up_wa;
+	const bool scd_chain_ext_wa;
 };
 
 /*
@@ -188,6 +193,11 @@
 #define EEPROM_6000_REG_BAND_24_HT40_CHANNELS	0x80
 #define EEPROM_REGULATORY_BAND_NO_HT40		0
 
+/* lower blocks contain EEPROM image and calibration data */
+#define OTP_LOW_IMAGE_SIZE		(2 * 512 * sizeof(u16)) /* 2 KB */
+#define OTP_LOW_IMAGE_SIZE_FAMILY_7000	(16 * 512 * sizeof(u16)) /* 16 KB */
+#define OTP_LOW_IMAGE_SIZE_FAMILY_8000	(32 * 512 * sizeof(u16)) /* 32 KB */
+
 struct iwl_eeprom_params {
 	const u8 regulatory_bands[7];
 	bool enhanced_txpower;
@@ -264,6 +274,8 @@
 	u8   nvm_hw_section_num;
 	bool lp_xtal_workaround;
 	const struct iwl_pwr_tx_backoff *pwr_tx_backoffs;
+	bool no_power_up_nic_in_init;
+	const char *default_nvm_file;
 };
 
 /*

diff --git a/drivers/net/wireless/iwlwifi/iwl-debug.c b/drivers/net/wireless/iwlwifi/iwl-debug.c
index 8a44f59..09feff4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-debug.c
+++ b/drivers/net/wireless/iwlwifi/iwl-debug.c

@@ -61,8 +61,6 @@
  *
  *****************************************************************************/
 
-#define DEBUG
-
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/export.h>
@@ -128,8 +126,8 @@
 #ifdef CONFIG_IWLWIFI_DEBUG
 	if (iwl_have_debug_level(level) &&
 	    (!limit || net_ratelimit()))
-		dev_dbg(dev, "%c %s %pV", in_interrupt() ? 'I' : 'U',
-			function, &vaf);
+		dev_printk(KERN_DEBUG, dev, "%c %s %pV",
+			   in_interrupt() ? 'I' : 'U', function, &vaf);
 #endif
 	trace_iwlwifi_dbg(level, in_interrupt(), function, &vaf);
 	va_end(args);

diff --git a/drivers/net/wireless/iwlwifi/iwl-debug.h b/drivers/net/wireless/iwlwifi/iwl-debug.h
index c8cbdbe..2950835 100644
--- a/drivers/net/wireless/iwlwifi/iwl-debug.h
+++ b/drivers/net/wireless/iwlwifi/iwl-debug.h

@@ -47,12 +47,32 @@
 void __iwl_info(struct device *dev, const char *fmt, ...) __printf(2, 3);
 void __iwl_crit(struct device *dev, const char *fmt, ...) __printf(2, 3);
 
+/* not all compilers can evaluate strlen() at compile time, so use sizeof() */
+#define CHECK_FOR_NEWLINE(f) BUILD_BUG_ON(f[sizeof(f) - 2] != '\n')
+
 /* No matter what is m (priv, bus, trans), this will work */
-#define IWL_ERR(m, f, a...) __iwl_err((m)->dev, false, false, f, ## a)
-#define IWL_ERR_DEV(d, f, a...) __iwl_err((d), false, false, f, ## a)
-#define IWL_WARN(m, f, a...) __iwl_warn((m)->dev, f, ## a)
-#define IWL_INFO(m, f, a...) __iwl_info((m)->dev, f, ## a)
-#define IWL_CRIT(m, f, a...) __iwl_crit((m)->dev, f, ## a)
+#define IWL_ERR_DEV(d, f, a...)						\
+	do {								\
+		CHECK_FOR_NEWLINE(f);					\
+		__iwl_err((d), false, false, f, ## a);			\
+	} while (0)
+#define IWL_ERR(m, f, a...)						\
+	IWL_ERR_DEV((m)->dev, f, ## a)
+#define IWL_WARN(m, f, a...)						\
+	do {								\
+		CHECK_FOR_NEWLINE(f);					\
+		__iwl_warn((m)->dev, f, ## a);				\
+	} while (0)
+#define IWL_INFO(m, f, a...)						\
+	do {								\
+		CHECK_FOR_NEWLINE(f);					\
+		__iwl_info((m)->dev, f, ## a);				\
+	} while (0)
+#define IWL_CRIT(m, f, a...)						\
+	do {								\
+		CHECK_FOR_NEWLINE(f);					\
+		__iwl_crit((m)->dev, f, ## a);				\
+	} while (0)
 
 #if defined(CONFIG_IWLWIFI_DEBUG) || defined(CONFIG_IWLWIFI_DEVICE_TRACING)
 void __iwl_dbg(struct device *dev,
@@ -72,12 +92,17 @@
 		       DUMP_PREFIX_OFFSET, 16, 1, p, len, 1);		\
 } while (0)
 
+#define __IWL_DEBUG_DEV(dev, level, limit, fmt, args...)		\
+	do {								\
+		CHECK_FOR_NEWLINE(fmt);					\
+		__iwl_dbg(dev, level, limit, __func__, fmt, ##args);	\
+	} while (0)
 #define IWL_DEBUG(m, level, fmt, args...)				\
-	__iwl_dbg((m)->dev, level, false, __func__, fmt, ##args)
+	__IWL_DEBUG_DEV((m)->dev, level, false, fmt, ##args)
 #define IWL_DEBUG_DEV(dev, level, fmt, args...)				\
-	__iwl_dbg((dev), level, false, __func__, fmt, ##args)
+	__IWL_DEBUG_DEV(dev, level, false, fmt, ##args)
 #define IWL_DEBUG_LIMIT(m, level, fmt, args...)				\
-	__iwl_dbg((m)->dev, level, true, __func__, fmt, ##args)
+	__IWL_DEBUG_DEV((m)->dev, level, true, fmt, ##args)
 
 #ifdef CONFIG_IWLWIFI_DEBUG
 #define iwl_print_hex_dump(m, level, p, len)				\

diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c
index 0a3e841..f2a5c12 100644
--- a/drivers/net/wireless/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/iwlwifi/iwl-drv.c

@@ -1243,6 +1243,7 @@
 	.bt_coex_active = true,
 	.power_level = IWL_POWER_INDEX_1,
 	.wd_disable = true,
+	.uapsd_disable = false,
 	/* the rest are 0 by default */
 };
 IWL_EXPORT_SYMBOL(iwlwifi_mod_params);
@@ -1356,6 +1357,10 @@
 module_param_named(nvm_file, iwlwifi_mod_params.nvm_file, charp, S_IRUGO);
 MODULE_PARM_DESC(nvm_file, "NVM file name");
 
+module_param_named(uapsd_disable, iwlwifi_mod_params.uapsd_disable,
+		   bool, S_IRUGO);
+MODULE_PARM_DESC(uapsd_disable, "disable U-APSD functionality (default: N)");
+
 /*
  * set bt_coex_active to true, uCode will do kill/defer
  * every time the priority line is asserted (BT is sending signals on the

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-error-dump.h b/drivers/net/wireless/iwlwifi/iwl-fw-error-dump.h
similarity index 82%
rename from drivers/net/wireless/iwlwifi/mvm/fw-error-dump.h
rename to drivers/net/wireless/iwlwifi/iwl-fw-error-dump.h
index 58c8941..2953ffc 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-error-dump.h
+++ b/drivers/net/wireless/iwlwifi/iwl-fw-error-dump.h

@@ -71,10 +71,15 @@
  * enum iwl_fw_error_dump_type - types of data in the dump file
  * @IWL_FW_ERROR_DUMP_SRAM:
  * @IWL_FW_ERROR_DUMP_REG:
+ * @IWL_FW_ERROR_DUMP_RXF:
+ * @IWL_FW_ERROR_DUMP_TXCMD: last TX command data, structured as
+ *	&struct iwl_fw_error_dump_txcmd packets
  */
 enum iwl_fw_error_dump_type {
 	IWL_FW_ERROR_DUMP_SRAM = 0,
 	IWL_FW_ERROR_DUMP_REG = 1,
+	IWL_FW_ERROR_DUMP_RXF = 2,
+	IWL_FW_ERROR_DUMP_TXCMD = 3,
 
 	IWL_FW_ERROR_DUMP_MAX,
 };
@@ -89,7 +94,7 @@
 	__le32 type;
 	__le32 len;
 	__u8 data[];
-} __packed __aligned(4);
+} __packed;
 
 /**
  * struct iwl_fw_error_dump_file - the layout of the header of the file
@@ -101,6 +106,29 @@
 	__le32 barker;
 	__le32 file_len;
 	u8 data[0];
-} __packed __aligned(4);
+} __packed;
+
+/**
+ * struct iwl_fw_error_dump_txcmd - TX command data
+ * @cmdlen: original length of command
+ * @caplen: captured length of command (may be less)
+ * @data: captured command data, @caplen bytes
+ */
+struct iwl_fw_error_dump_txcmd {
+	__le32 cmdlen;
+	__le32 caplen;
+	u8 data[];
+} __packed;
+
+/**
+ * iwl_mvm_fw_error_next_data - advance fw error dump data pointer
+ * @data: previous data block
+ * Returns: next data block
+ */
+static inline struct iwl_fw_error_dump_data *
+iwl_mvm_fw_error_next_data(struct iwl_fw_error_dump_data *data)
+{
+	return (void *)(data->data + le32_to_cpu(data->len));
+}
 
 #endif /* __fw_error_dump_h__ */

diff --git a/drivers/net/wireless/iwlwifi/iwl-fw.h b/drivers/net/wireless/iwlwifi/iwl-fw.h
index d14f193..0aa7c00 100644
--- a/drivers/net/wireless/iwlwifi/iwl-fw.h
+++ b/drivers/net/wireless/iwlwifi/iwl-fw.h

@@ -74,29 +74,24 @@
  * @IWL_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w).
  * @IWL_UCODE_TLV_FLAGS_P2P: This uCode image supports P2P.
  * @IWL_UCODE_TLV_FLAGS_DW_BC_TABLE: The SCD byte count table is in DWORDS
- * @IWL_UCODE_TLV_FLAGS_UAPSD: This uCode image supports uAPSD
+ * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: This uCode image supports uAPSD
  * @IWL_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan
  *	offload profile config command.
- * @IWL_UCODE_TLV_FLAGS_RX_ENERGY_API: supports rx signal strength api
- * @IWL_UCODE_TLV_FLAGS_TIME_EVENT_API_V2: using the new time event API.
  * @IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS: D3 image supports up to six
  *	(rather than two) IPv6 addresses
- * @IWL_UCODE_TLV_FLAGS_BF_UPDATED: new beacon filtering API
  * @IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID: not sending a probe with the SSID element
  *	from the probe request template.
- * @IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API: modified D3 API to allow keeping
- *	connection when going back to D0
  * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version)
  * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version)
- * @IWL_UCODE_TLV_FLAGS_SCHED_SCAN: this uCode image supports scheduled scan.
- * @IWL_UCODE_TLV_FLAGS_STA_KEY_CMD: new ADD_STA and ADD_STA_KEY command API
- * @IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD: support device wide power command
- *	containing CAM (Continuous Active Mode) indication.
+ * @IWL_UCODE_TLV_FLAGS_P2P_PM: P2P client supports PM as a stand alone MAC
  * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_DCM: support power save on BSS station and
  *	P2P client interfaces simultaneously if they are in different bindings.
+ * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and
+ *	P2P client interfaces simultaneously if they are in same bindings.
  * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save
  * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering.
  * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients
+ * @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS.
  */
 enum iwl_ucode_tlv_flag {
 	IWL_UCODE_TLV_FLAGS_PAN			= BIT(0),
@@ -104,22 +99,16 @@
 	IWL_UCODE_TLV_FLAGS_MFP			= BIT(2),
 	IWL_UCODE_TLV_FLAGS_P2P			= BIT(3),
 	IWL_UCODE_TLV_FLAGS_DW_BC_TABLE		= BIT(4),
-	IWL_UCODE_TLV_FLAGS_NEWBT_COEX		= BIT(5),
-	IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT	= BIT(6),
 	IWL_UCODE_TLV_FLAGS_SHORT_BL		= BIT(7),
-	IWL_UCODE_TLV_FLAGS_RX_ENERGY_API	= BIT(8),
-	IWL_UCODE_TLV_FLAGS_TIME_EVENT_API_V2	= BIT(9),
 	IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS	= BIT(10),
-	IWL_UCODE_TLV_FLAGS_BF_UPDATED		= BIT(11),
 	IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID	= BIT(12),
-	IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API	= BIT(14),
 	IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL	= BIT(15),
 	IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE	= BIT(16),
-	IWL_UCODE_TLV_FLAGS_SCHED_SCAN		= BIT(17),
-	IWL_UCODE_TLV_FLAGS_STA_KEY_CMD		= BIT(19),
-	IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD	= BIT(20),
+	IWL_UCODE_TLV_FLAGS_P2P_PM		= BIT(21),
 	IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM	= BIT(22),
+	IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM	= BIT(23),
 	IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT	= BIT(24),
+	IWL_UCODE_TLV_FLAGS_EBS_SUPPORT		= BIT(25),
 	IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD	= BIT(26),
 	IWL_UCODE_TLV_FLAGS_BCAST_FILTERING	= BIT(29),
 	IWL_UCODE_TLV_FLAGS_GO_UAPSD		= BIT(30),
@@ -128,9 +117,11 @@
 /**
  * enum iwl_ucode_tlv_api - ucode api
  * @IWL_UCODE_TLV_API_WOWLAN_CONFIG_TID: wowlan config includes tid field.
+ * @IWL_UCODE_TLV_API_CSA_FLOW: ucode can do unbind-bind flow for CSA.
  */
 enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_WOWLAN_CONFIG_TID	= BIT(0),
+	IWL_UCODE_TLV_API_CSA_FLOW		= BIT(4),
 };
 
 /**
@@ -183,6 +174,7 @@
 #define IWL_UCODE_SECTION_MAX 12
 #define IWL_API_ARRAY_SIZE	1
 #define IWL_CAPABILITIES_ARRAY_SIZE	1
+#define CPU1_CPU2_SEPARATOR_SECTION	0xFFFFCCCC
 
 struct iwl_ucode_capabilities {
 	u32 max_probe_length;
@@ -205,6 +197,11 @@
 	bool is_dual_cpus;
 };
 
+struct iwl_sf_region {
+	u32 addr;
+	u32 size;
+};
+
 /* uCode version contains 4 values: Major/Minor/API/Serial */
 #define IWL_UCODE_MAJOR(ver)	(((ver) & 0xFF000000) >> 24)
 #define IWL_UCODE_MINOR(ver)	(((ver) & 0x00FF0000) >> 16)

diff --git a/drivers/net/wireless/iwlwifi/iwl-io.c b/drivers/net/wireless/iwlwifi/iwl-io.c
index 44cc3cf..5eef4ae 100644
--- a/drivers/net/wireless/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/iwlwifi/iwl-io.c

@@ -33,6 +33,7 @@
 #include "iwl-io.h"
 #include "iwl-csr.h"
 #include "iwl-debug.h"
+#include "iwl-prph.h"
 #include "iwl-fh.h"
 
 #define IWL_POLL_INTERVAL 10	/* microseconds */
@@ -183,6 +184,23 @@
 }
 IWL_EXPORT_SYMBOL(iwl_clear_bits_prph);
 
+void iwl_force_nmi(struct iwl_trans *trans)
+{
+	/*
+	 * In HW previous to the 8000 HW family, and in the 8000 HW family
+	 * itself when the revision step==0, the DEVICE_SET_NMI_REG is used
+	 * to force an NMI. Otherwise, a different register -
+	 * DEVICE_SET_NMI_8000B_REG - is used.
+	 */
+	if ((trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) ||
+	    ((trans->hw_rev & 0xc) == 0x0))
+		iwl_write_prph(trans, DEVICE_SET_NMI_REG, DEVICE_SET_NMI_VAL);
+	else
+		iwl_write_prph(trans, DEVICE_SET_NMI_8000B_REG,
+			       DEVICE_SET_NMI_8000B_VAL);
+}
+IWL_EXPORT_SYMBOL(iwl_force_nmi);
+
 static const char *get_fh_string(int cmd)
 {
 #define IWL_CMD(x) case x: return #x

diff --git a/drivers/net/wireless/iwlwifi/iwl-io.h b/drivers/net/wireless/iwlwifi/iwl-io.h
index 665ddd9..705d12c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-io.h
+++ b/drivers/net/wireless/iwlwifi/iwl-io.h

@@ -80,6 +80,7 @@
 void iwl_set_bits_mask_prph(struct iwl_trans *trans, u32 ofs,
 			    u32 bits, u32 mask);
 void iwl_clear_bits_prph(struct iwl_trans *trans, u32 ofs, u32 mask);
+void iwl_force_nmi(struct iwl_trans *trans);
 
 /* Error handling */
 int iwl_dump_fh(struct iwl_trans *trans, char **buf);

diff --git a/drivers/net/wireless/iwlwifi/iwl-modparams.h b/drivers/net/wireless/iwlwifi/iwl-modparams.h
index d994317..d051857 100644
--- a/drivers/net/wireless/iwlwifi/iwl-modparams.h
+++ b/drivers/net/wireless/iwlwifi/iwl-modparams.h

@@ -119,6 +119,7 @@
 #endif
 	int ant_coupling;
 	char *nvm_file;
+	bool uapsd_disable;
 };
 
 #endif /* #__iwl_modparams_h__ */

diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
index 6be30c6..85eee79 100644
--- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c

@@ -62,6 +62,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/etherdevice.h>
 #include "iwl-drv.h"
 #include "iwl-modparams.h"
 #include "iwl-nvm-parse.h"
@@ -127,19 +128,20 @@
 
 static const u8 iwl_nvm_channels_family_8000[] = {
 	/* 2.4 GHz */
-	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
 	/* 5 GHz */
 	36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92,
 	96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
 	149, 153, 157, 161, 165, 169, 173, 177, 181
 };
 
-#define IWL_NUM_CHANNELS	ARRAY_SIZE(iwl_nvm_channels)
+#define IWL_NUM_CHANNELS		ARRAY_SIZE(iwl_nvm_channels)
 #define IWL_NUM_CHANNELS_FAMILY_8000	ARRAY_SIZE(iwl_nvm_channels_family_8000)
-#define NUM_2GHZ_CHANNELS	14
-#define FIRST_2GHZ_HT_MINUS	5
-#define LAST_2GHZ_HT_PLUS	9
-#define LAST_5GHZ_HT		161
+#define NUM_2GHZ_CHANNELS		14
+#define NUM_2GHZ_CHANNELS_FAMILY_8000	14
+#define FIRST_2GHZ_HT_MINUS		5
+#define LAST_2GHZ_HT_PLUS		9
+#define LAST_5GHZ_HT			161
 
 #define DEFAULT_MAX_TX_POWER 16
 
@@ -202,21 +204,23 @@
 	struct ieee80211_channel *channel;
 	u16 ch_flags;
 	bool is_5ghz;
-	int num_of_ch;
+	int num_of_ch, num_2ghz_channels;
 	const u8 *nvm_chan;
 
 	if (cfg->device_family != IWL_DEVICE_FAMILY_8000) {
 		num_of_ch = IWL_NUM_CHANNELS;
 		nvm_chan = &iwl_nvm_channels[0];
+		num_2ghz_channels = NUM_2GHZ_CHANNELS;
 	} else {
 		num_of_ch = IWL_NUM_CHANNELS_FAMILY_8000;
 		nvm_chan = &iwl_nvm_channels_family_8000[0];
+		num_2ghz_channels = NUM_2GHZ_CHANNELS_FAMILY_8000;
 	}
 
 	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
 		ch_flags = __le16_to_cpup(nvm_ch_flags + ch_idx);
 
-		if (ch_idx >= NUM_2GHZ_CHANNELS &&
+		if (ch_idx >= num_2ghz_channels &&
 		    !data->sku_cap_band_52GHz_enable)
 			ch_flags &= ~NVM_CHANNEL_VALID;
 
@@ -225,7 +229,7 @@
 					 "Ch. %d Flags %x [%sGHz] - No traffic\n",
 					 nvm_chan[ch_idx],
 					 ch_flags,
-					 (ch_idx >= NUM_2GHZ_CHANNELS) ?
+					 (ch_idx >= num_2ghz_channels) ?
 					 "5.2" : "2.4");
 			continue;
 		}
@@ -234,7 +238,7 @@
 		n_channels++;
 
 		channel->hw_value = nvm_chan[ch_idx];
-		channel->band = (ch_idx < NUM_2GHZ_CHANNELS) ?
+		channel->band = (ch_idx < num_2ghz_channels) ?
 				IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ;
 		channel->center_freq =
 			ieee80211_channel_to_frequency(
@@ -242,7 +246,7 @@
 
 		/* TODO: Need to be dependent to the NVM */
 		channel->flags = IEEE80211_CHAN_NO_HT40;
-		if (ch_idx < NUM_2GHZ_CHANNELS &&
+		if (ch_idx < num_2ghz_channels &&
 		    (ch_flags & NVM_CHANNEL_40MHZ)) {
 			if (nvm_chan[ch_idx] <= LAST_2GHZ_HT_PLUS)
 				channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
@@ -250,7 +254,7 @@
 				channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
 		} else if (nvm_chan[ch_idx] <= LAST_5GHZ_HT &&
 			   (ch_flags & NVM_CHANNEL_40MHZ)) {
-			if ((ch_idx - NUM_2GHZ_CHANNELS) % 2 == 0)
+			if ((ch_idx - num_2ghz_channels) % 2 == 0)
 				channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
 			else
 				channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
@@ -447,13 +451,7 @@
 			       struct iwl_nvm_data *data,
 			       const __le16 *nvm_sec)
 {
-	u8 hw_addr[ETH_ALEN];
-
-	if (cfg->device_family != IWL_DEVICE_FAMILY_8000)
-		memcpy(hw_addr, nvm_sec + HW_ADDR, ETH_ALEN);
-	else
-		memcpy(hw_addr, nvm_sec + MAC_ADDRESS_OVERRIDE_FAMILY_8000,
-		       ETH_ALEN);
+	const u8 *hw_addr = (const u8 *)(nvm_sec + HW_ADDR);
 
 	/* The byte order is little endian 16 bit, meaning 214365 */
 	data->hw_addr[0] = hw_addr[1];
@@ -464,6 +462,41 @@
 	data->hw_addr[5] = hw_addr[4];
 }
 
+static void iwl_set_hw_address_family_8000(const struct iwl_cfg *cfg,
+					   struct iwl_nvm_data *data,
+					   const __le16 *mac_override,
+					   const __le16 *nvm_hw)
+{
+	const u8 *hw_addr;
+
+	if (mac_override) {
+		hw_addr = (const u8 *)(mac_override +
+				 MAC_ADDRESS_OVERRIDE_FAMILY_8000);
+
+		/* The byte order is little endian 16 bit, meaning 214365 */
+		data->hw_addr[0] = hw_addr[1];
+		data->hw_addr[1] = hw_addr[0];
+		data->hw_addr[2] = hw_addr[3];
+		data->hw_addr[3] = hw_addr[2];
+		data->hw_addr[4] = hw_addr[5];
+		data->hw_addr[5] = hw_addr[4];
+
+		if (is_valid_ether_addr(hw_addr))
+			return;
+	}
+
+	/* take the MAC address from the OTP */
+	hw_addr = (const u8 *)(nvm_hw + HW_ADDR0_FAMILY_8000);
+	data->hw_addr[0] = hw_addr[3];
+	data->hw_addr[1] = hw_addr[2];
+	data->hw_addr[2] = hw_addr[1];
+	data->hw_addr[3] = hw_addr[0];
+
+	hw_addr = (const u8 *)(nvm_hw + HW_ADDR1_FAMILY_8000);
+	data->hw_addr[4] = hw_addr[1];
+	data->hw_addr[5] = hw_addr[0];
+}
+
 struct iwl_nvm_data *
 iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg,
 		   const __le16 *nvm_hw, const __le16 *nvm_sw,
@@ -523,7 +556,7 @@
 				rx_chains);
 	} else {
 		/* MAC address in family 8000 */
-		iwl_set_hw_address(cfg, data, mac_override);
+		iwl_set_hw_address_family_8000(cfg, data, mac_override, nvm_hw);
 
 		iwl_init_sbands(dev, cfg, data, regulatory,
 				sku & NVM_SKU_CAP_11AC_ENABLE, tx_chains,

diff --git a/drivers/net/wireless/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/iwlwifi/iwl-op-mode.h
index ea29504..99785c8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-op-mode.h
+++ b/drivers/net/wireless/iwlwifi/iwl-op-mode.h

@@ -63,6 +63,7 @@
 #ifndef __iwl_op_mode_h__
 #define __iwl_op_mode_h__
 
+#include <linux/netdevice.h>
 #include <linux/debugfs.h>
 
 struct iwl_op_mode;
@@ -112,8 +113,11 @@
  * @stop: stop the op_mode. Must free all the memory allocated.
  *	May sleep
  * @rx: Rx notification to the op_mode. rxb is the Rx buffer itself. Cmd is the
- *	HCMD this Rx responds to.
- *	This callback may sleep, it is called from a threaded IRQ handler.
+ *	HCMD this Rx responds to. Can't sleep.
+ * @napi_add: NAPI initialisation. The transport is fully responsible for NAPI,
+ *	but the higher layers need to know about it (in particular mac80211 to
+ *	to able to call the right NAPI RX functions); this function is needed
+ *	to eventually call netif_napi_add() with higher layer involvement.
  * @queue_full: notifies that a HW queue is full.
  *	Must be atomic and called with BH disabled.
  * @queue_not_full: notifies that a HW queue is not full any more.
@@ -143,6 +147,11 @@
 	void (*stop)(struct iwl_op_mode *op_mode);
 	int (*rx)(struct iwl_op_mode *op_mode, struct iwl_rx_cmd_buffer *rxb,
 		  struct iwl_device_cmd *cmd);
+	void (*napi_add)(struct iwl_op_mode *op_mode,
+			 struct napi_struct *napi,
+			 struct net_device *napi_dev,
+			 int (*poll)(struct napi_struct *, int),
+			 int weight);
 	void (*queue_full)(struct iwl_op_mode *op_mode, int queue);
 	void (*queue_not_full)(struct iwl_op_mode *op_mode, int queue);
 	bool (*hw_rf_kill)(struct iwl_op_mode *op_mode, bool state);
@@ -180,7 +189,6 @@
 				  struct iwl_rx_cmd_buffer *rxb,
 				  struct iwl_device_cmd *cmd)
 {
-	might_sleep();
 	return op_mode->ops->rx(op_mode, rxb, cmd);
 }
 
@@ -249,4 +257,15 @@
 	return op_mode->ops->exit_d0i3(op_mode);
 }
 
+static inline void iwl_op_mode_napi_add(struct iwl_op_mode *op_mode,
+					struct napi_struct *napi,
+					struct net_device *napi_dev,
+					int (*poll)(struct napi_struct *, int),
+					int weight)
+{
+	if (!op_mode->ops->napi_add)
+		return;
+	op_mode->ops->napi_add(op_mode, napi, napi_dev, poll, weight);
+}
+
 #endif /* __iwl_op_mode_h__ */

diff --git a/drivers/net/wireless/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/iwlwifi/iwl-phy-db.c
index b761ac4..d4fb5ca 100644
--- a/drivers/net/wireless/iwlwifi/iwl-phy-db.c
+++ b/drivers/net/wireless/iwlwifi/iwl-phy-db.c

@@ -345,7 +345,6 @@
 	struct iwl_phy_db_cmd phy_db_cmd;
 	struct iwl_host_cmd cmd = {
 		.id = PHY_DB_CMD,
-		.flags = CMD_SYNC,
 	};
 
 	IWL_DEBUG_INFO(phy_db->trans,
@@ -393,13 +392,13 @@
 					  entry->data);
 		if (err) {
 			IWL_ERR(phy_db->trans,
-				"Can't SEND phy_db section %d (%d), err %d",
+				"Can't SEND phy_db section %d (%d), err %d\n",
 				type, i, err);
 			return err;
 		}
 
 		IWL_DEBUG_INFO(phy_db->trans,
-			       "Sent PHY_DB HCMD, type = %d num = %d",
+			       "Sent PHY_DB HCMD, type = %d num = %d\n",
 			       type, i);
 	}
 
@@ -451,7 +450,7 @@
 						 IWL_NUM_PAPD_CH_GROUPS);
 	if (err) {
 		IWL_ERR(phy_db->trans,
-			"Cannot send channel specific PAPD groups");
+			"Cannot send channel specific PAPD groups\n");
 		return err;
 	}
 
@@ -461,7 +460,7 @@
 						 IWL_NUM_TXP_CH_GROUPS);
 	if (err) {
 		IWL_ERR(phy_db->trans,
-			"Cannot send channel specific TX power groups");
+			"Cannot send channel specific TX power groups\n");
 		return err;
 	}
 

diff --git a/drivers/net/wireless/iwlwifi/iwl-prph.h b/drivers/net/wireless/iwlwifi/iwl-prph.h
index 5f657c5..4997e27 100644
--- a/drivers/net/wireless/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/iwlwifi/iwl-prph.h

@@ -105,6 +105,9 @@
 
 /* Device NMI register */
 #define DEVICE_SET_NMI_REG 0x00a01c30
+#define DEVICE_SET_NMI_VAL 0x1
+#define DEVICE_SET_NMI_8000B_REG 0x00a01c24
+#define DEVICE_SET_NMI_8000B_VAL 0x1000000
 
 /* Shared registers (0x0..0x3ff, via target indirect or periphery */
 #define SHR_BASE	0x00a10000
@@ -348,4 +351,12 @@
 
 #define LMPM_SECURE_TIME_OUT	(100)
 
+/* Rx FIFO */
+#define RXF_SIZE_ADDR			(0xa00c88)
+#define RXF_SIZE_BYTE_CND_POS		(7)
+#define RXF_SIZE_BYTE_CNT_MSK		(0x3ff << RXF_SIZE_BYTE_CND_POS)
+
+#define RXF_LD_FENCE_OFFSET_ADDR	(0xa00c10)
+#define RXF_FIFO_RD_FENCE_ADDR		(0xa00c0c)
+
 #endif				/* __iwl_prph_h__ */

diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 8cdb0dd..34d49e1 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h

@@ -189,10 +189,9 @@
 /**
  * enum CMD_MODE - how to send the host commands ?
  *
- * @CMD_SYNC: The caller will be stalled until the fw responds to the command
  * @CMD_ASYNC: Return right away and don't wait for the response
- * @CMD_WANT_SKB: valid only with CMD_SYNC. The caller needs the buffer of the
- *	response. The caller needs to call iwl_free_resp when done.
+ * @CMD_WANT_SKB: Not valid with CMD_ASYNC. The caller needs the buffer of
+ *	the response. The caller needs to call iwl_free_resp when done.
  * @CMD_HIGH_PRIO: The command is high priority - it goes to the front of the
  *	command queue, but after other high priority commands. valid only
  *	with CMD_ASYNC.
@@ -202,7 +201,6 @@
  *	(i.e. mark it as non-idle).
  */
 enum CMD_MODE {
-	CMD_SYNC		= 0,
 	CMD_ASYNC		= BIT(0),
 	CMD_WANT_SKB		= BIT(1),
 	CMD_SEND_IN_RFKILL	= BIT(2),
@@ -427,7 +425,7 @@
  * @send_cmd:send a host command. Must return -ERFKILL if RFkill is asserted.
  *	If RFkill is asserted in the middle of a SYNC host command, it must
  *	return -ERFKILL straight away.
- *	May sleep only if CMD_SYNC is set
+ *	May sleep only if CMD_ASYNC is not set
  * @tx: send an skb
  *	Must be atomic
  * @reclaim: free packet until ssn. Returns a list of freed packets.
@@ -437,8 +435,7 @@
  *	this one. The op_mode must not configure the HCMD queue. May sleep.
  * @txq_disable: de-configure a Tx queue to send AMPDUs
  *	Must be atomic
- * @wait_tx_queue_empty: wait until all tx queues are empty
- *	May sleep
+ * @wait_tx_queue_empty: wait until tx queues are empty. May sleep.
  * @dbgfs_register: add the dbgfs files under this directory. Files will be
  *	automatically deleted.
  * @write8: write a u8 to a register at offset ofs from the BAR
@@ -464,6 +461,11 @@
  * @unref: release a reference previously taken with @ref. Note that
  *	initially the reference count is 1, making an initial @unref
  *	necessary to allow low power states.
+ * @dump_data: fill a data dump with debug data, maybe containing last
+ *	TX'ed commands and similar. When called with a NULL buffer and
+ *	zero buffer length, provide only the (estimated) required buffer
+ *	length. Return the used buffer length.
+ *	Note that the transport must fill in the proper file headers.
  */
 struct iwl_trans_ops {
 
@@ -471,6 +473,8 @@
 	void (*op_mode_leave)(struct iwl_trans *iwl_trans);
 	int (*start_fw)(struct iwl_trans *trans, const struct fw_img *fw,
 			bool run_in_rfkill);
+	int (*update_sf)(struct iwl_trans *trans,
+			 struct iwl_sf_region *st_fwrd_space);
 	void (*fw_alive)(struct iwl_trans *trans, u32 scd_addr);
 	void (*stop_device)(struct iwl_trans *trans);
 
@@ -490,7 +494,7 @@
 	void (*txq_disable)(struct iwl_trans *trans, int queue);
 
 	int (*dbgfs_register)(struct iwl_trans *trans, struct dentry* dir);
-	int (*wait_tx_queue_empty)(struct iwl_trans *trans);
+	int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm);
 
 	void (*write8)(struct iwl_trans *trans, u32 ofs, u8 val);
 	void (*write32)(struct iwl_trans *trans, u32 ofs, u32 val);
@@ -512,6 +516,10 @@
 			      u32 value);
 	void (*ref)(struct iwl_trans *trans);
 	void (*unref)(struct iwl_trans *trans);
+
+#ifdef CONFIG_IWLWIFI_DEBUGFS
+	u32 (*dump_data)(struct iwl_trans *trans, void *buf, u32 buflen);
+#endif
 };
 
 /**
@@ -630,6 +638,17 @@
 	return trans->ops->start_fw(trans, fw, run_in_rfkill);
 }
 
+static inline int iwl_trans_update_sf(struct iwl_trans *trans,
+				      struct iwl_sf_region *st_fwrd_space)
+{
+	might_sleep();
+
+	if (trans->ops->update_sf)
+		return trans->ops->update_sf(trans, st_fwrd_space);
+
+	return 0;
+}
+
 static inline void iwl_trans_stop_device(struct iwl_trans *trans)
 {
 	might_sleep();
@@ -665,6 +684,16 @@
 		trans->ops->unref(trans);
 }
 
+#ifdef CONFIG_IWLWIFI_DEBUGFS
+static inline u32 iwl_trans_dump_data(struct iwl_trans *trans,
+				      void *buf, u32 buflen)
+{
+	if (!trans->ops->dump_data)
+		return 0;
+	return trans->ops->dump_data(trans, buf, buflen);
+}
+#endif
+
 static inline int iwl_trans_send_cmd(struct iwl_trans *trans,
 				     struct iwl_host_cmd *cmd)
 {
@@ -678,7 +707,7 @@
 		return -EIO;
 
 	if (unlikely(trans->state != IWL_TRANS_FW_ALIVE)) {
-		IWL_ERR(trans, "%s bad state = %d", __func__, trans->state);
+		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 		return -EIO;
 	}
 
@@ -720,7 +749,7 @@
 		return -EIO;
 
 	if (unlikely(trans->state != IWL_TRANS_FW_ALIVE))
-		IWL_ERR(trans, "%s bad state = %d", __func__, trans->state);
+		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 
 	return trans->ops->tx(trans, skb, dev_cmd, queue);
 }
@@ -729,7 +758,7 @@
 				     int ssn, struct sk_buff_head *skbs)
 {
 	if (unlikely(trans->state != IWL_TRANS_FW_ALIVE))
-		IWL_ERR(trans, "%s bad state = %d", __func__, trans->state);
+		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 
 	trans->ops->reclaim(trans, queue, ssn, skbs);
 }
@@ -746,7 +775,7 @@
 	might_sleep();
 
 	if (unlikely((trans->state != IWL_TRANS_FW_ALIVE)))
-		IWL_ERR(trans, "%s bad state = %d", __func__, trans->state);
+		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 
 	trans->ops->txq_enable(trans, queue, fifo, sta_id, tid,
 				 frame_limit, ssn);
@@ -759,12 +788,13 @@
 			     IWL_MAX_TID_COUNT, IWL_FRAME_LIMIT, 0);
 }
 
-static inline int iwl_trans_wait_tx_queue_empty(struct iwl_trans *trans)
+static inline int iwl_trans_wait_tx_queue_empty(struct iwl_trans *trans,
+						u32 txq_bm)
 {
 	if (unlikely(trans->state != IWL_TRANS_FW_ALIVE))
-		IWL_ERR(trans, "%s bad state = %d", __func__, trans->state);
+		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 
-	return trans->ops->wait_tx_queue_empty(trans);
+	return trans->ops->wait_tx_queue_empty(trans, txq_bm);
 }
 
 static inline int iwl_trans_dbgfs_register(struct iwl_trans *trans,

diff --git a/drivers/net/wireless/iwlwifi/mvm/Makefile b/drivers/net/wireless/iwlwifi/mvm/Makefile
index ccdd3b7..c30d7f6 100644
--- a/drivers/net/wireless/iwlwifi/mvm/Makefile
+++ b/drivers/net/wireless/iwlwifi/mvm/Makefile

@@ -3,8 +3,9 @@
 iwlmvm-y += utils.o rx.o tx.o binding.o quota.o sta.o sf.o
 iwlmvm-y += scan.o time-event.o rs.o
 iwlmvm-y += power.o coex.o
-iwlmvm-y += led.o tt.o offloading.o
+iwlmvm-y += tt.o offloading.o
 iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o
+iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o
 iwlmvm-$(CONFIG_PM_SLEEP) += d3.o
 
 ccflags-y += -D__CHECK_ENDIAN__ -I$(src)/../

diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c
index 0489314..c8c3b38 100644
--- a/drivers/net/wireless/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/iwlwifi/mvm/coex.c

@@ -104,12 +104,9 @@
 #define BT_DISABLE_REDUCED_TXPOWER_THRESHOLD	(-65)
 #define BT_ANTENNA_COUPLING_THRESHOLD		(30)
 
-int iwl_send_bt_prio_tbl(struct iwl_mvm *mvm)
+static int iwl_send_bt_prio_tbl(struct iwl_mvm *mvm)
 {
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_NEWBT_COEX))
-		return 0;
-
-	return iwl_mvm_send_cmd_pdu(mvm, BT_COEX_PRIO_TABLE, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, BT_COEX_PRIO_TABLE, 0,
 				    sizeof(struct iwl_bt_coex_prio_tbl_cmd),
 				    &iwl_bt_prio_tbl);
 }
@@ -127,10 +124,10 @@
 };
 
 static const __le32 iwl_bt_prio_boost[BT_COEX_BOOST_SIZE] = {
-	cpu_to_le32(0xf0f0f0f0),
-	cpu_to_le32(0xc0c0c0c0),
-	cpu_to_le32(0xfcfcfcfc),
-	cpu_to_le32(0xff00ff00),
+	cpu_to_le32(0xf0f0f0f0), /* 50% */
+	cpu_to_le32(0xc0c0c0c0), /* 25% */
+	cpu_to_le32(0xfcfcfcfc), /* 75% */
+	cpu_to_le32(0xfefefefe), /* 87.5% */
 };
 
 static const __le32 iwl_single_shared_ant[BT_COEX_MAX_LUT][BT_COEX_LUT_SIZE] = {
@@ -303,8 +300,8 @@
 };
 
 static const __le32 iwl_bt_mprio_lut[BT_COEX_MULTI_PRIO_LUT_SIZE] = {
-	cpu_to_le32(0x22002200),
-	cpu_to_le32(0x33113311),
+	cpu_to_le32(0x28412201),
+	cpu_to_le32(0x11118451),
 };
 
 struct corunning_block_luts {
@@ -568,13 +565,13 @@
 		.id = BT_CONFIG,
 		.len = { sizeof(*bt_cmd), },
 		.dataflags = { IWL_HCMD_DFL_NOCOPY, },
-		.flags = CMD_SYNC,
 	};
 	int ret;
 	u32 flags;
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_NEWBT_COEX))
-		return 0;
+	ret = iwl_send_bt_prio_tbl(mvm);
+	if (ret)
+		return ret;
 
 	bt_cmd = kzalloc(sizeof(*bt_cmd), GFP_KERNEL);
 	if (!bt_cmd)
@@ -582,10 +579,12 @@
 	cmd.data[0] = bt_cmd;
 
 	bt_cmd->max_kill = 5;
-	bt_cmd->bt4_antenna_isolation_thr = BT_ANTENNA_COUPLING_THRESHOLD,
-	bt_cmd->bt4_antenna_isolation = iwlwifi_mod_params.ant_coupling,
-	bt_cmd->bt4_tx_tx_delta_freq_thr = 15,
-	bt_cmd->bt4_tx_rx_max_freq0 = 15,
+	bt_cmd->bt4_antenna_isolation_thr = BT_ANTENNA_COUPLING_THRESHOLD;
+	bt_cmd->bt4_antenna_isolation = iwlwifi_mod_params.ant_coupling;
+	bt_cmd->bt4_tx_tx_delta_freq_thr = 15;
+	bt_cmd->bt4_tx_rx_max_freq0 = 15;
+	bt_cmd->override_primary_lut = BT_COEX_INVALID_LUT;
+	bt_cmd->override_secondary_lut = BT_COEX_INVALID_LUT;
 
 	flags = iwlwifi_mod_params.bt_coex_active ?
 			BT_COEX_NW : BT_COEX_DISABLE;
@@ -663,7 +662,6 @@
 		.data[0] = &bt_cmd,
 		.len = { sizeof(*bt_cmd), },
 		.dataflags = { IWL_HCMD_DFL_NOCOPY, },
-		.flags = CMD_SYNC,
 	};
 	int ret = 0;
 
@@ -717,7 +715,8 @@
 	return ret;
 }
 
-int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id, bool enable)
+static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id,
+				       bool enable)
 {
 	struct iwl_bt_coex_cmd *bt_cmd;
 	/* Send ASYNC since this can be sent from an atomic context */
@@ -735,8 +734,7 @@
 		return 0;
 
 	/* nothing to do */
-	if (mvmsta->bt_reduced_txpower_dbg ||
-	    mvmsta->bt_reduced_txpower == enable)
+	if (mvmsta->bt_reduced_txpower == enable)
 		return 0;
 
 	bt_cmd = kzalloc(sizeof(*bt_cmd), GFP_ATOMIC);
@@ -803,23 +801,10 @@
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_STATION:
+		/* Count BSSes vifs */
+		data->num_bss_ifaces++;
 		/* default smps_mode for BSS / P2P client is AUTOMATIC */
 		smps_mode = IEEE80211_SMPS_AUTOMATIC;
-		data->num_bss_ifaces++;
-
-		/*
-		 * Count unassoc BSSes, relax SMSP constraints
-		 * and disable reduced Tx Power
-		 */
-		if (!vif->bss_conf.assoc) {
-			iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_BT_COEX,
-					    smps_mode);
-			if (iwl_mvm_bt_coex_reduced_txp(mvm,
-							mvmvif->ap_sta_id,
-							false))
-				IWL_ERR(mvm, "Couldn't send BT_CONFIG cmd\n");
-			return;
-		}
 		break;
 	case NL80211_IFTYPE_AP:
 		/* default smps_mode for AP / GO is OFF */
@@ -845,8 +830,12 @@
 		/* ... relax constraints and disable rssi events */
 		iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_BT_COEX,
 				    smps_mode);
-		if (vif->type == NL80211_IFTYPE_STATION)
+		data->reduced_tx_power = false;
+		if (vif->type == NL80211_IFTYPE_STATION) {
+			iwl_mvm_bt_coex_reduced_txp(mvm, mvmvif->ap_sta_id,
+						    false);
 			iwl_mvm_bt_coex_enable_rssi_event(mvm, vif, false, 0);
+		}
 		return;
 	}
 
@@ -857,6 +846,11 @@
 		smps_mode = vif->type == NL80211_IFTYPE_AP ?
 				IEEE80211_SMPS_OFF :
 				IEEE80211_SMPS_DYNAMIC;
+
+	/* relax SMPS contraints for next association */
+	if (!vif->bss_conf.assoc)
+		smps_mode = IEEE80211_SMPS_AUTOMATIC;
+
 	IWL_DEBUG_COEX(data->mvm,
 		       "mac %d: bt_status %d bt_activity_grading %d smps_req %d\n",
 		       mvmvif->id, data->notif->bt_status, bt_activity_grading,
@@ -903,22 +897,18 @@
 		/* if secondary is not NULL, it might be a GO */
 		data->secondary = chanctx_conf;
 
-	/* don't reduce the Tx power if in loose scheme */
+	/*
+	 * don't reduce the Tx power if one of these is true:
+	 *  we are in LOOSE
+	 *  single share antenna product
+	 *  BT is active
+	 *  we are associated
+	 */
 	if (iwl_get_coex_type(mvm, vif) == BT_COEX_LOOSE_LUT ||
-	    mvm->cfg->bt_shared_single_ant) {
+	    mvm->cfg->bt_shared_single_ant || !vif->bss_conf.assoc ||
+	    !data->notif->bt_status) {
 		data->reduced_tx_power = false;
-		iwl_mvm_bt_coex_enable_rssi_event(mvm, vif, false, 0);
-		return;
-	}
-
-	/* reduced Txpower only if BT is on, so ...*/
-	if (!data->notif->bt_status) {
-		/* ... cancel reduced Tx power ... */
-		if (iwl_mvm_bt_coex_reduced_txp(mvm, mvmvif->ap_sta_id, false))
-			IWL_ERR(mvm, "Couldn't send BT_CONFIG cmd\n");
-		data->reduced_tx_power = false;
-
-		/* ... and there is no need to get reports on RSSI any more. */
+		iwl_mvm_bt_coex_reduced_txp(mvm, mvmvif->ap_sta_id, false);
 		iwl_mvm_bt_coex_enable_rssi_event(mvm, vif, false, 0);
 		return;
 	}
@@ -1022,9 +1012,9 @@
 
 	/* Don't spam the fw with the same command over and over */
 	if (memcmp(&cmd, &mvm->last_bt_ci_cmd, sizeof(cmd))) {
-		if (iwl_mvm_send_cmd_pdu(mvm, BT_COEX_CI, CMD_SYNC,
+		if (iwl_mvm_send_cmd_pdu(mvm, BT_COEX_CI, 0,
 					 sizeof(cmd), &cmd))
-			IWL_ERR(mvm, "Failed to send BT_CI cmd");
+			IWL_ERR(mvm, "Failed to send BT_CI cmd\n");
 		memcpy(&mvm->last_bt_ci_cmd, &cmd, sizeof(cmd));
 	}
 
@@ -1039,7 +1029,6 @@
 		IWL_ERR(mvm, "Failed to update the ctrl_kill_msk\n");
 }
 
-/* upon association, the fw will send in BT Coex notification */
 int iwl_mvm_rx_bt_coex_notif(struct iwl_mvm *mvm,
 			     struct iwl_rx_cmd_buffer *rxb,
 			     struct iwl_device_cmd *dev_cmd)
@@ -1215,6 +1204,17 @@
 	return iwl_get_coex_type(mvm, mvmsta->vif) == BT_COEX_TIGHT_LUT;
 }
 
+bool iwl_mvm_bt_coex_is_tpc_allowed(struct iwl_mvm *mvm,
+				    enum ieee80211_band band)
+{
+	u32 bt_activity = le32_to_cpu(mvm->last_bt_notif.bt_activity_grading);
+
+	if (band != IEEE80211_BAND_2GHZ)
+		return false;
+
+	return bt_activity >= BT_LOW_TRAFFIC;
+}
+
 u8 iwl_mvm_bt_coex_tx_prio(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
 			   struct ieee80211_tx_info *info, u8 ac)
 {
@@ -1249,9 +1249,6 @@
 
 void iwl_mvm_bt_coex_vif_change(struct iwl_mvm *mvm)
 {
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_NEWBT_COEX))
-		return;
-
 	iwl_mvm_bt_coex_notif_handle(mvm);
 }
 
@@ -1270,7 +1267,6 @@
 		.id = BT_CONFIG,
 		.len = { sizeof(*bt_cmd), },
 		.dataflags = { IWL_HCMD_DFL_NOCOPY, },
-		.flags = CMD_SYNC,
 	};
 
 	if (!IWL_MVM_BT_COEX_CORUNNING)

diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c
index e56f5a0..645b3cf 100644
--- a/drivers/net/wireless/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/iwlwifi/mvm/d3.c

@@ -193,8 +193,7 @@
 			wkc.wep_key.key_offset = data->wep_key_idx;
 		}
 
-		ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, CMD_SYNC,
-					   sizeof(wkc), &wkc);
+		ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, 0, sizeof(wkc), &wkc);
 		data->error = ret != 0;
 
 		mvm->ptk_ivlen = key->iv_len;
@@ -341,7 +340,6 @@
 	struct iwl_host_cmd cmd = {
 		.id = WOWLAN_PATTERNS,
 		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
-		.flags = CMD_SYNC,
 	};
 	int i, err;
 
@@ -518,7 +516,6 @@
 		.id = REMOTE_WAKE_CONFIG_CMD,
 		.len = { sizeof(*cfg), },
 		.dataflags = { IWL_HCMD_DFL_NOCOPY, },
-		.flags = CMD_SYNC,
 	};
 	int ret;
 
@@ -666,10 +663,8 @@
 
 	if (WARN_ON(!vif->bss_conf.assoc))
 		return -EINVAL;
-	/* hack */
-	vif->bss_conf.assoc = false;
+
 	ret = iwl_mvm_mac_ctxt_add(mvm, vif);
-	vif->bss_conf.assoc = true;
 	if (ret)
 		return ret;
 
@@ -705,7 +700,7 @@
 		return ret;
 	rcu_assign_pointer(mvm->fw_id_to_mac_id[mvmvif->ap_sta_id], ap_sta);
 
-	ret = iwl_mvm_mac_ctxt_changed(mvm, vif);
+	ret = iwl_mvm_mac_ctxt_changed(mvm, vif, false);
 	if (ret)
 		return ret;
 
@@ -719,7 +714,7 @@
 	for (i = 1; i < MAX_BINDINGS; i++)
 		quota_cmd.quotas[i].id_and_color = cpu_to_le32(FW_CTXT_INVALID);
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, CMD_SYNC,
+	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0,
 				   sizeof(quota_cmd), &quota_cmd);
 	if (ret)
 		IWL_ERR(mvm, "Failed to send quota: %d\n", ret);
@@ -739,15 +734,13 @@
 	};
 	struct iwl_host_cmd cmd = {
 		.id = NON_QOS_TX_COUNTER_CMD,
-		.flags = CMD_SYNC | CMD_WANT_SKB,
+		.flags = CMD_WANT_SKB,
 	};
 	int err;
 	u32 size;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API) {
-		cmd.data[0] = &query_cmd;
-		cmd.len[0] = sizeof(query_cmd);
-	}
+	cmd.data[0] = &query_cmd;
+	cmd.len[0] = sizeof(query_cmd);
 
 	err = iwl_mvm_send_cmd(mvm, &cmd);
 	if (err)
@@ -758,10 +751,8 @@
 		err = -EINVAL;
 	} else {
 		err = le16_to_cpup((__le16 *)cmd.resp_pkt->data);
-		/* new API returns next, not last-used seqno */
-		if (mvm->fw->ucode_capa.flags &
-				IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API)
-			err = (u16) (err - 0x10);
+		/* firmware returns next, not last-used seqno */
+		err = (u16) (err - 0x10);
 	}
 
 	iwl_free_resp(&cmd);
@@ -785,11 +776,7 @@
 
 	mvmvif->seqno_valid = false;
 
-	if (!(mvm->fw->ucode_capa.flags &
-			IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API))
-		return;
-
-	if (iwl_mvm_send_cmd_pdu(mvm, NON_QOS_TX_COUNTER_CMD, CMD_SYNC,
+	if (iwl_mvm_send_cmd_pdu(mvm, NON_QOS_TX_COUNTER_CMD, 0,
 				 sizeof(query_cmd), &query_cmd))
 		IWL_ERR(mvm, "failed to set non-QoS seqno\n");
 }
@@ -804,7 +791,7 @@
 	if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_WOWLAN_CONFIG_TID)
 		cmd_len = sizeof(*cmd);
 
-	return iwl_mvm_send_cmd_pdu(mvm, WOWLAN_CONFIGURATION, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, WOWLAN_CONFIGURATION, 0,
 				    cmd_len, cmd);
 }
 
@@ -833,7 +820,7 @@
 	};
 	struct iwl_host_cmd d3_cfg_cmd = {
 		.id = D3_CONFIG_CMD,
-		.flags = CMD_SYNC | CMD_WANT_SKB,
+		.flags = CMD_WANT_SKB,
 		.data[0] = &d3_cfg_cmd_data,
 		.len[0] = sizeof(d3_cfg_cmd_data),
 	};
@@ -983,7 +970,6 @@
 		if (key_data.use_rsc_tsc) {
 			struct iwl_host_cmd rsc_tsc_cmd = {
 				.id = WOWLAN_TSC_RSC_PARAM,
-				.flags = CMD_SYNC,
 				.data[0] = key_data.rsc_tsc,
 				.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
 				.len[0] = sizeof(*key_data.rsc_tsc),
@@ -997,7 +983,7 @@
 		if (key_data.use_tkip) {
 			ret = iwl_mvm_send_cmd_pdu(mvm,
 						   WOWLAN_TKIP_PARAM,
-						   CMD_SYNC, sizeof(tkip_cmd),
+						   0, sizeof(tkip_cmd),
 						   &tkip_cmd);
 			if (ret)
 				goto out;
@@ -1014,8 +1000,7 @@
 			kek_kck_cmd.replay_ctr = mvmvif->rekey_data.replay_ctr;
 
 			ret = iwl_mvm_send_cmd_pdu(mvm,
-						   WOWLAN_KEK_KCK_MATERIAL,
-						   CMD_SYNC,
+						   WOWLAN_KEK_KCK_MATERIAL, 0,
 						   sizeof(kek_kck_cmd),
 						   &kek_kck_cmd);
 			if (ret)
@@ -1031,7 +1016,7 @@
 	if (ret)
 		goto out;
 
-	ret = iwl_mvm_send_proto_offload(mvm, vif, false, CMD_SYNC);
+	ret = iwl_mvm_send_proto_offload(mvm, vif, false, 0);
 	if (ret)
 		goto out;
 
@@ -1043,7 +1028,7 @@
 	if (ret)
 		goto out;
 
-	ret = iwl_mvm_power_update_mac(mvm, vif);
+	ret = iwl_mvm_power_update_mac(mvm);
 	if (ret)
 		goto out;
 
@@ -1082,6 +1067,15 @@
 
 int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 {
+	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+
+	if (iwl_mvm_is_d0i3_supported(mvm)) {
+		mutex_lock(&mvm->d0i3_suspend_mutex);
+		__set_bit(D0I3_DEFER_WAKEUP, &mvm->d0i3_suspend_flags);
+		mutex_unlock(&mvm->d0i3_suspend_mutex);
+		return 0;
+	}
+
 	return __iwl_mvm_suspend(hw, wowlan, false);
 }
 
@@ -1277,7 +1271,7 @@
 }
 
 static void iwl_mvm_set_key_rx_seq(struct ieee80211_key_conf *key,
-				   struct iwl_wowlan_status_v6 *status)
+				   struct iwl_wowlan_status *status)
 {
 	union iwl_all_tsc_rsc *rsc = &status->gtk.rsc.all_tsc_rsc;
 
@@ -1294,7 +1288,7 @@
 }
 
 struct iwl_mvm_d3_gtk_iter_data {
-	struct iwl_wowlan_status_v6 *status;
+	struct iwl_wowlan_status *status;
 	void *last_gtk;
 	u32 cipher;
 	bool find_phase, unhandled_cipher;
@@ -1370,7 +1364,7 @@
 
 static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm,
 					  struct ieee80211_vif *vif,
-					  struct iwl_wowlan_status_v6 *status)
+					  struct iwl_wowlan_status *status)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_d3_gtk_iter_data gtkdata = {
@@ -1465,10 +1459,10 @@
 	} err_info;
 	struct iwl_host_cmd cmd = {
 		.id = WOWLAN_GET_STATUSES,
-		.flags = CMD_SYNC | CMD_WANT_SKB,
+		.flags = CMD_WANT_SKB,
 	};
 	struct iwl_wowlan_status_data status;
-	struct iwl_wowlan_status_v6 *status_v6;
+	struct iwl_wowlan_status *fw_status;
 	int ret, len, status_size, i;
 	bool keep;
 	struct ieee80211_sta *ap_sta;
@@ -1491,7 +1485,7 @@
 	}
 
 	/* only for tracing for now */
-	ret = iwl_mvm_send_cmd_pdu(mvm, OFFLOADS_QUERY_CMD, CMD_SYNC, 0, NULL);
+	ret = iwl_mvm_send_cmd_pdu(mvm, OFFLOADS_QUERY_CMD, 0, 0, NULL);
 	if (ret)
 		IWL_ERR(mvm, "failed to query offload statistics (%d)\n", ret);
 
@@ -1505,10 +1499,7 @@
 	if (!cmd.resp_pkt)
 		goto out_unlock;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API)
-		status_size = sizeof(struct iwl_wowlan_status_v6);
-	else
-		status_size = sizeof(struct iwl_wowlan_status_v4);
+	status_size = sizeof(*fw_status);
 
 	len = iwl_rx_packet_payload_len(cmd.resp_pkt);
 	if (len < status_size) {
@@ -1516,35 +1507,18 @@
 		goto out_free_resp;
 	}
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_D3_CONTINUITY_API) {
-		status_v6 = (void *)cmd.resp_pkt->data;
+	fw_status = (void *)cmd.resp_pkt->data;
 
-		status.pattern_number = le16_to_cpu(status_v6->pattern_number);
-		for (i = 0; i < 8; i++)
-			status.qos_seq_ctr[i] =
-				le16_to_cpu(status_v6->qos_seq_ctr[i]);
-		status.wakeup_reasons = le32_to_cpu(status_v6->wakeup_reasons);
-		status.wake_packet_length =
-			le32_to_cpu(status_v6->wake_packet_length);
-		status.wake_packet_bufsize =
-			le32_to_cpu(status_v6->wake_packet_bufsize);
-		status.wake_packet = status_v6->wake_packet;
-	} else {
-		struct iwl_wowlan_status_v4 *status_v4;
-		status_v6 = NULL;
-		status_v4 = (void *)cmd.resp_pkt->data;
-
-		status.pattern_number = le16_to_cpu(status_v4->pattern_number);
-		for (i = 0; i < 8; i++)
-			status.qos_seq_ctr[i] =
-				le16_to_cpu(status_v4->qos_seq_ctr[i]);
-		status.wakeup_reasons = le32_to_cpu(status_v4->wakeup_reasons);
-		status.wake_packet_length =
-			le32_to_cpu(status_v4->wake_packet_length);
-		status.wake_packet_bufsize =
-			le32_to_cpu(status_v4->wake_packet_bufsize);
-		status.wake_packet = status_v4->wake_packet;
-	}
+	status.pattern_number = le16_to_cpu(fw_status->pattern_number);
+	for (i = 0; i < 8; i++)
+		status.qos_seq_ctr[i] =
+			le16_to_cpu(fw_status->qos_seq_ctr[i]);
+	status.wakeup_reasons = le32_to_cpu(fw_status->wakeup_reasons);
+	status.wake_packet_length =
+		le32_to_cpu(fw_status->wake_packet_length);
+	status.wake_packet_bufsize =
+		le32_to_cpu(fw_status->wake_packet_bufsize);
+	status.wake_packet = fw_status->wake_packet;
 
 	if (len != status_size + ALIGN(status.wake_packet_bufsize, 4)) {
 		IWL_ERR(mvm, "Invalid WoWLAN status response!\n");
@@ -1571,7 +1545,7 @@
 
 	iwl_mvm_report_wakeup_reasons(mvm, vif, &status);
 
-	keep = iwl_mvm_setup_connection_keep(mvm, vif, status_v6);
+	keep = iwl_mvm_setup_connection_keep(mvm, vif, fw_status);
 
 	iwl_free_resp(&cmd);
 	return keep;
@@ -1674,6 +1648,19 @@
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 
+	if (iwl_mvm_is_d0i3_supported(mvm)) {
+		bool exit_now;
+
+		mutex_lock(&mvm->d0i3_suspend_mutex);
+		__clear_bit(D0I3_DEFER_WAKEUP, &mvm->d0i3_suspend_flags);
+		exit_now = __test_and_clear_bit(D0I3_PENDING_WAKEUP,
+						&mvm->d0i3_suspend_flags);
+		mutex_unlock(&mvm->d0i3_suspend_mutex);
+		if (exit_now)
+			_iwl_mvm_exit_d0i3(mvm);
+		return 0;
+	}
+
 	return __iwl_mvm_resume(mvm, false);
 }
 

diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c
index 9b59e1d..2e90ff7 100644
--- a/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c

@@ -103,10 +103,6 @@
 		IWL_DEBUG_POWER(mvm, "tx_data_timeout=%d\n", val);
 		dbgfs_pm->tx_data_timeout = val;
 		break;
-	case MVM_DEBUGFS_PM_DISABLE_POWER_OFF:
-		IWL_DEBUG_POWER(mvm, "disable_power_off=%d\n", val);
-		dbgfs_pm->disable_power_off = val;
-		break;
 	case MVM_DEBUGFS_PM_LPRX_ENA:
 		IWL_DEBUG_POWER(mvm, "lprx %s\n", val ? "enabled" : "disabled");
 		dbgfs_pm->lprx_ena = val;
@@ -154,12 +150,6 @@
 		if (sscanf(buf + 16, "%d", &val) != 1)
 			return -EINVAL;
 		param = MVM_DEBUGFS_PM_TX_DATA_TIMEOUT;
-	} else if (!strncmp("disable_power_off=", buf, 18) &&
-		   !(mvm->fw->ucode_capa.flags &
-		     IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD)) {
-		if (sscanf(buf + 18, "%d", &val) != 1)
-			return -EINVAL;
-		param = MVM_DEBUGFS_PM_DISABLE_POWER_OFF;
 	} else if (!strncmp("lprx=", buf, 5)) {
 		if (sscanf(buf + 5, "%d", &val) != 1)
 			return -EINVAL;
@@ -185,7 +175,7 @@
 
 	mutex_lock(&mvm->mutex);
 	iwl_dbgfs_update_pm(mvm, vif, param, val);
-	ret = iwl_mvm_power_update_mac(mvm, vif);
+	ret = iwl_mvm_power_update_mac(mvm);
 	mutex_unlock(&mvm->mutex);
 
 	return ret ?: count;
@@ -272,10 +262,9 @@
 			struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;
 
 			pos += scnprintf(buf+pos, bufsz-pos,
-					 "ap_sta_id %d - reduced Tx power %d force %d\n",
+					 "ap_sta_id %d - reduced Tx power %d\n",
 					 ap_sta_id,
-					 mvm_sta->bt_reduced_txpower,
-					 mvm_sta->bt_reduced_txpower_dbg);
+					 mvm_sta->bt_reduced_txpower);
 		}
 	}
 
@@ -293,41 +282,6 @@
 	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
 }
 
-static ssize_t iwl_dbgfs_reduced_txp_write(struct ieee80211_vif *vif,
-					   char *buf, size_t count,
-					   loff_t *ppos)
-{
-	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	struct iwl_mvm *mvm = mvmvif->mvm;
-	struct iwl_mvm_sta *mvmsta;
-	bool reduced_tx_power;
-	int ret;
-
-	if (mvmvif->ap_sta_id >= ARRAY_SIZE(mvm->fw_id_to_mac_id))
-		return -ENOTCONN;
-
-	if (strtobool(buf, &reduced_tx_power) != 0)
-		return -EINVAL;
-
-	mutex_lock(&mvm->mutex);
-
-	mvmsta = iwl_mvm_sta_from_staid_protected(mvm, mvmvif->ap_sta_id);
-	if (IS_ERR_OR_NULL(mvmsta)) {
-		mutex_unlock(&mvm->mutex);
-		return -ENOTCONN;
-	}
-
-	mvmsta->bt_reduced_txpower_dbg = false;
-	ret = iwl_mvm_bt_coex_reduced_txp(mvm, mvmvif->ap_sta_id,
-					  reduced_tx_power);
-	if (!ret)
-		mvmsta->bt_reduced_txpower_dbg = true;
-
-	mutex_unlock(&mvm->mutex);
-
-	return ret ? : count;
-}
-
 static void iwl_dbgfs_update_bf(struct ieee80211_vif *vif,
 				enum iwl_dbgfs_bf_mask param, int value)
 {
@@ -462,9 +416,9 @@
 	mutex_lock(&mvm->mutex);
 	iwl_dbgfs_update_bf(vif, param, value);
 	if (param == MVM_DEBUGFS_BF_ENABLE_BEACON_FILTER && !value)
-		ret = iwl_mvm_disable_beacon_filter(mvm, vif, CMD_SYNC);
+		ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
 	else
-		ret = iwl_mvm_enable_beacon_filter(mvm, vif, CMD_SYNC);
+		ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
 	mutex_unlock(&mvm->mutex);
 
 	return ret ?: count;
@@ -568,7 +522,6 @@
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(pm_params, 32);
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(bf_params, 256);
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(low_latency, 10);
-MVM_DEBUGFS_WRITE_FILE_OPS(reduced_txp, 10);
 
 void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
@@ -592,8 +545,7 @@
 		return;
 	}
 
-	if ((mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT) &&
-	    iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
+	if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
 	    ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
 	     (vif->type == NL80211_IFTYPE_STATION && vif->p2p &&
 	      mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)))
@@ -601,7 +553,6 @@
 					 S_IRUSR);
 
 	MVM_DEBUGFS_ADD_FILE_VIF(mac_params, mvmvif->dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(reduced_txp, mvmvif->dbgfs_dir, S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE_VIF(low_latency, mvmvif->dbgfs_dir,
 				 S_IRUSR | S_IWUSR);
 

diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/iwlwifi/mvm/debugfs.c
index 1b52dee..29ca726 100644
--- a/drivers/net/wireless/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/iwlwifi/mvm/debugfs.c

@@ -65,9 +65,8 @@
 #include "mvm.h"
 #include "sta.h"
 #include "iwl-io.h"
-#include "iwl-prph.h"
 #include "debugfs.h"
-#include "fw-error-dump.h"
+#include "iwl-fw-error-dump.h"
 
 static ssize_t iwl_dbgfs_tx_flush_write(struct iwl_mvm *mvm, char *buf,
 					size_t count, loff_t *ppos)
@@ -136,9 +135,6 @@
 
 	file->private_data = mvm->fw_error_dump;
 	mvm->fw_error_dump = NULL;
-	kfree(mvm->fw_error_sram);
-	mvm->fw_error_sram = NULL;
-	mvm->fw_error_sram_len = 0;
 	ret = 0;
 
 out:
@@ -684,7 +680,7 @@
 		mvm->restart_fw++;
 
 	/* take the return value to make compiler happy - it will fail anyway */
-	ret = iwl_mvm_send_cmd_pdu(mvm, REPLY_ERROR, CMD_SYNC, 0, NULL);
+	ret = iwl_mvm_send_cmd_pdu(mvm, REPLY_ERROR, 0, 0, NULL);
 
 	mutex_unlock(&mvm->mutex);
 
@@ -694,7 +690,7 @@
 static ssize_t iwl_dbgfs_fw_nmi_write(struct iwl_mvm *mvm, char *buf,
 				      size_t count, loff_t *ppos)
 {
-	iwl_write_prph(mvm->trans, DEVICE_SET_NMI_REG, 1);
+	iwl_force_nmi(mvm->trans);
 
 	return count;
 }
@@ -841,7 +837,7 @@
 	/* send updated bcast filtering configuration */
 	if (mvm->dbgfs_bcast_filtering.override &&
 	    iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
-		err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, CMD_SYNC,
+		err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
 					   sizeof(cmd), &cmd);
 	mutex_unlock(&mvm->mutex);
 
@@ -913,7 +909,7 @@
 	/* send updated bcast filtering configuration */
 	if (mvm->dbgfs_bcast_filtering.override &&
 	    iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
-		err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, CMD_SYNC,
+		err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
 					   sizeof(cmd), &cmd);
 	mutex_unlock(&mvm->mutex);
 
@@ -1004,6 +1000,7 @@
 	PRINT_MVM_REF(IWL_MVM_REF_P2P_CLIENT);
 	PRINT_MVM_REF(IWL_MVM_REF_AP_IBSS);
 	PRINT_MVM_REF(IWL_MVM_REF_USER);
+	PRINT_MVM_REF(IWL_MVM_REF_EXIT_WORK);
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
 }
@@ -1108,9 +1105,9 @@
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(d0i3_refs, 8);
 
 static const struct file_operations iwl_dbgfs_fw_error_dump_ops = {
-        .open = iwl_dbgfs_fw_error_dump_open,
-        .read = iwl_dbgfs_fw_error_dump_read,
-        .release = iwl_dbgfs_fw_error_dump_release,
+	.open = iwl_dbgfs_fw_error_dump_open,
+	.read = iwl_dbgfs_fw_error_dump_read,
+	.release = iwl_dbgfs_fw_error_dump_release,
 };
 
 #ifdef CONFIG_IWLWIFI_BCAST_FILTERING
@@ -1138,9 +1135,8 @@
 	MVM_DEBUGFS_ADD_FILE(fw_error_dump, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(bt_notif, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, S_IRUSR);
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD)
-		MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir,
-				     S_IRUSR | S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir,
+			     S_IRUSR | S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE(fw_rx_stats, mvm->debugfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(drv_rx_stats, mvm->debugfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(fw_restart, mvm->debugfs_dir, S_IWUSR);

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-coex.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-coex.h
index 21877e5..5fe82c29 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-coex.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-coex.h

@@ -141,7 +141,8 @@
 	BT_COEX_TX_DIS_LUT,
 
 	BT_COEX_MAX_LUT,
-};
+	BT_COEX_INVALID_LUT = 0xff,
+}; /* BT_COEX_DECISION_LUT_INDEX_API_E_VER_1 */
 
 #define BT_COEX_LUT_SIZE (12)
 #define BT_COEX_CORUN_LUT_SIZE (32)
@@ -154,19 +155,23 @@
  * @flags:&enum iwl_bt_coex_flags
  * @max_kill:
  * @bt_reduced_tx_power: enum %iwl_bt_reduced_tx_power
- * @bt4_antenna_isolation:
- * @bt4_antenna_isolation_thr:
- * @bt4_tx_tx_delta_freq_thr:
- * @bt4_tx_rx_max_freq0:
- * @bt_prio_boost:
+ * @override_primary_lut: enum %iwl_bt_coex_lut_type: BT_COEX_INVALID_LUT
+ *	should be set by default
+ * @override_secondary_lut: enum %iwl_bt_coex_lut_type: BT_COEX_INVALID_LUT
+ *	should be set by default
+ * @bt4_antenna_isolation: antenna isolation
+ * @bt4_antenna_isolation_thr: antenna threshold value
+ * @bt4_tx_tx_delta_freq_thr: TxTx delta frequency
+ * @bt4_tx_rx_max_freq0: TxRx max frequency
+ * @bt_prio_boost: BT priority boost registers
  * @wifi_tx_prio_boost: SW boost of wifi tx priority
  * @wifi_rx_prio_boost: SW boost of wifi rx priority
- * @kill_ack_msk:
- * @kill_cts_msk:
- * @decision_lut:
- * @bt4_multiprio_lut:
- * @bt4_corun_lut20:
- * @bt4_corun_lut40:
+ * @kill_ack_msk: kill ACK mask. 1 - Tx ACK, 0 - kill Tx of ACK.
+ * @kill_cts_msk: kill CTS mask. 1 - Tx CTS, 0 - kill Tx of CTS.
+ * @decision_lut: PTA decision LUT, per Prio-Ch
+ * @bt4_multiprio_lut: multi priority LUT configuration
+ * @bt4_corun_lut20: co-running 20 MHz LUT configuration
+ * @bt4_corun_lut40: co-running 40 MHz LUT configuration
  * @valid_bit_msk: enum %iwl_bt_coex_valid_bit_msk
  *
  * The structure is used for the BT_COEX command.
@@ -175,7 +180,8 @@
 	__le32 flags;
 	u8 max_kill;
 	u8 bt_reduced_tx_power;
-	u8 reserved[2];
+	u8 override_primary_lut;
+	u8 override_secondary_lut;
 
 	u8 bt4_antenna_isolation;
 	u8 bt4_antenna_isolation_thr;
@@ -194,7 +200,7 @@
 	__le32 bt4_corun_lut40[BT_COEX_CORUN_LUT_SIZE];
 
 	__le32 valid_bit_msk;
-} __packed; /* BT_COEX_CMD_API_S_VER_3 */
+} __packed; /* BT_COEX_CMD_API_S_VER_5 */
 
 /**
  * struct iwl_bt_coex_ci_cmd - bt coex channel inhibition command
@@ -282,7 +288,7 @@
 	BT_ON_NO_CONNECTION	= 1,
 	BT_LOW_TRAFFIC		= 2,
 	BT_HIGH_TRAFFIC		= 3,
-};
+}; /* BT_COEX_BT_ACTIVITY_GRADING_API_E_VER_1 */
 
 /**
  * struct iwl_bt_coex_profile_notif - notification about BT coex
@@ -310,7 +316,7 @@
 	__le32 primary_ch_lut;
 	__le32 secondary_ch_lut;
 	__le32 bt_activity_grading;
-} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_2 */
+} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_3 */
 
 enum iwl_bt_coex_prio_table_event {
 	BT_COEX_PRIO_TBL_EVT_INIT_CALIB1		= 0,

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h
index 10fcc1a..13696fe 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h

@@ -345,21 +345,6 @@
 	IWL_WOWLAN_WAKEUP_BY_REM_WAKE_WAKEUP_PACKET		= BIT(12),
 }; /* WOWLAN_WAKE_UP_REASON_API_E_VER_2 */
 
-struct iwl_wowlan_status_v4 {
-	__le64 replay_ctr;
-	__le16 pattern_number;
-	__le16 non_qos_seq_ctr;
-	__le16 qos_seq_ctr[8];
-	__le32 wakeup_reasons;
-	__le32 rekey_status;
-	__le32 num_of_gtk_rekeys;
-	__le32 transmitted_ndps;
-	__le32 received_beacons;
-	__le32 wake_packet_length;
-	__le32 wake_packet_bufsize;
-	u8 wake_packet[]; /* can be truncated from _length to _bufsize */
-} __packed; /* WOWLAN_STATUSES_API_S_VER_4 */
-
 struct iwl_wowlan_gtk_status {
 	u8 key_index;
 	u8 reserved[3];
@@ -368,7 +353,7 @@
 	struct iwl_wowlan_rsc_tsc_params_cmd rsc;
 } __packed;
 
-struct iwl_wowlan_status_v6 {
+struct iwl_wowlan_status {
 	struct iwl_wowlan_gtk_status gtk;
 	__le64 replay_ctr;
 	__le16 pattern_number;

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-rs.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-rs.h
index 39148b5..8bb5b94 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-rs.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-rs.h

@@ -334,7 +334,7 @@
  */
 struct iwl_lq_cmd {
 	u8 sta_id;
-	u8 reserved1;
+	u8 reduced_tpc;
 	u16 control;
 	/* LINK_QUAL_GENERAL_PARAMS_API_S_VER_1 */
 	u8 flags;

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
index d73a89e..6959fda 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h

@@ -169,8 +169,12 @@
 	SCAN_TYPE_DISCOVERY_FORCED	= 6,
 }; /* SCAN_ACTIVITY_TYPE_E_VER_1 */
 
-/* Maximal number of channels to scan */
-#define MAX_NUM_SCAN_CHANNELS 0x24
+/**
+ * Maximal number of channels to scan
+ * it should be equal to:
+ * max(IWL_NUM_CHANNELS, IWL_NUM_CHANNELS_FAMILY_8000)
+ */
+#define MAX_NUM_SCAN_CHANNELS 50
 
 /**
  * struct iwl_scan_cmd - scan request command
@@ -534,13 +538,16 @@
  *
  * IWL_SCAN_OFFLOAD_FLAG_PASS_ALL: pass all results - no filtering.
  * IWL_SCAN_OFFLOAD_FLAG_CACHED_CHANNEL: add cached channels to partial scan.
- * IWL_SCAN_OFFLOAD_FLAG_ENERGY_SCAN: use energy based scan before partial scan
- *	on A band.
+ * IWL_SCAN_OFFLOAD_FLAG_EBS_QUICK_MODE: EBS duration is 100mSec - typical
+ *	beacon period. Finding channel activity in this mode is not guaranteed.
+ * IWL_SCAN_OFFLOAD_FLAG_EBS_ACCURATE_MODE: EBS duration is 200mSec.
+ *	Assuming beacon period is 100ms finding channel activity is guaranteed.
  */
 enum iwl_scan_offload_flags {
 	IWL_SCAN_OFFLOAD_FLAG_PASS_ALL		= BIT(0),
 	IWL_SCAN_OFFLOAD_FLAG_CACHED_CHANNEL	= BIT(2),
-	IWL_SCAN_OFFLOAD_FLAG_ENERGY_SCAN	= BIT(3),
+	IWL_SCAN_OFFLOAD_FLAG_EBS_QUICK_MODE	= BIT(5),
+	IWL_SCAN_OFFLOAD_FLAG_EBS_ACCURATE_MODE	= BIT(6),
 };
 
 /**
@@ -563,17 +570,24 @@
 	IWL_SCAN_OFFLOAD_ABORTED	= 2,
 };
 
+enum iwl_scan_ebs_status {
+	IWL_SCAN_EBS_SUCCESS,
+	IWL_SCAN_EBS_FAILED,
+	IWL_SCAN_EBS_CHAN_NOT_FOUND,
+};
+
 /**
  * iwl_scan_offload_complete - SCAN_OFFLOAD_COMPLETE_NTF_API_S_VER_1
  * @last_schedule_line:		last schedule line executed (fast or regular)
  * @last_schedule_iteration:	last scan iteration executed before scan abort
  * @status:			enum iwl_scan_offload_compleate_status
+ * @ebs_status: last EBS status, see IWL_SCAN_EBS_*
  */
 struct iwl_scan_offload_complete {
 	u8 last_schedule_line;
 	u8 last_schedule_iteration;
 	u8 status;
-	u8 reserved;
+	u8 ebs_status;
 } __packed;
 
 /**

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-sta.h
index d636478..39cebee 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-sta.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-sta.h

@@ -255,22 +255,19 @@
 } __packed;
 
 /**
- * struct iwl_mvm_add_sta_cmd_v5 - Add/modify a station in the fw's sta table.
+ * struct iwl_mvm_add_sta_cmd - Add/modify a station in the fw's sta table.
  * ( REPLY_ADD_STA = 0x18 )
  * @add_modify: 1: modify existing, 0: add new station
- * @unicast_tx_key_id: unicast tx key id. Relevant only when unicast key sent
- * @multicast_tx_key_id: multicast tx key id. Relevant only when multicast key
- *	sent
+ * @awake_acs:
+ * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable
+ *	AMPDU for tid x. Set %STA_MODIFY_TID_DISABLE_TX to change this field.
  * @mac_id_n_color: the Mac context this station belongs to
  * @addr[ETH_ALEN]: station's MAC address
  * @sta_id: index of station in uCode's station table
  * @modify_mask: STA_MODIFY_*, selects which parameters to modify vs. leave
  *	alone. 1 - modify, 0 - don't change.
- * @key: look at %iwl_mvm_keyinfo
  * @station_flags: look at %iwl_sta_flags
  * @station_flags_msk: what of %station_flags have changed
- * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable
- *	AMPDU for tid x. Set %STA_MODIFY_TID_DISABLE_TX to change this field.
  * @add_immediate_ba_tid: tid for which to add block-ack support (Rx)
  *	Set %STA_MODIFY_ADD_BA_TID to use this field, and also set
  *	add_immediate_ba_ssn.
@@ -294,40 +291,7 @@
  * ADD_STA sets up the table entry for one station, either creating a new
  * entry, or modifying a pre-existing one.
  */
-struct iwl_mvm_add_sta_cmd_v5 {
-	u8 add_modify;
-	u8 unicast_tx_key_id;
-	u8 multicast_tx_key_id;
-	u8 reserved1;
-	__le32 mac_id_n_color;
-	u8 addr[ETH_ALEN];
-	__le16 reserved2;
-	u8 sta_id;
-	u8 modify_mask;
-	__le16 reserved3;
-	struct iwl_mvm_keyinfo key;
-	__le32 station_flags;
-	__le32 station_flags_msk;
-	__le16 tid_disable_tx;
-	__le16 reserved4;
-	u8 add_immediate_ba_tid;
-	u8 remove_immediate_ba_tid;
-	__le16 add_immediate_ba_ssn;
-	__le16 sleep_tx_count;
-	__le16 sleep_state_flags;
-	__le16 assoc_id;
-	__le16 beamform_flags;
-	__le32 tfd_queue_msk;
-} __packed; /* ADD_STA_CMD_API_S_VER_5 */
-
-/**
- * struct iwl_mvm_add_sta_cmd_v7 - Add / modify a station
- * VER_7 of this command is quite similar to VER_5 except
- * exclusion of all fields related to the security key installation.
- * It only differs from VER_6 by the "awake_acs" field that is
- * reserved and ignored in VER_6.
- */
-struct iwl_mvm_add_sta_cmd_v7 {
+struct iwl_mvm_add_sta_cmd {
 	u8 add_modify;
 	u8 awake_acs;
 	__le16 tid_disable_tx;

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-tx.h
index 8e122f3..6cc5f52 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-tx.h

@@ -482,7 +482,8 @@
 	u8 pa_integ_res_b[3];
 	u8 pa_integ_res_c[3];
 	__le16 measurement_req_id;
-	__le16 reserved;
+	u8 reduced_tpc;
+	u8 reserved;
 
 	__le32 tfd_info;
 	__le16 seq_ctl;

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
index 6e75b52..309a9b9 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h

@@ -71,6 +71,7 @@
 #include "fw-api-power.h"
 #include "fw-api-d3.h"
 #include "fw-api-coex.h"
+#include "fw-api-scan.h"
 
 /* maximal number of Tx queues in any platform */
 #define IWL_MVM_MAX_QUEUES	20
@@ -604,52 +605,7 @@
 	TE_V1_NOTIF_INTERNAL_FRAG_END = BIT(7),
 }; /* MAC_EVENT_ACTION_API_E_VER_2 */
 
-
-/**
- * struct iwl_time_event_cmd_api_v1 - configuring Time Events
- * with struct MAC_TIME_EVENT_DATA_API_S_VER_1 (see also
- * with version 2. determined by IWL_UCODE_TLV_FLAGS)
- * ( TIME_EVENT_CMD = 0x29 )
- * @id_and_color: ID and color of the relevant MAC
- * @action: action to perform, one of FW_CTXT_ACTION_*
- * @id: this field has two meanings, depending on the action:
- *	If the action is ADD, then it means the type of event to add.
- *	For all other actions it is the unique event ID assigned when the
- *	event was added by the FW.
- * @apply_time: When to start the Time Event (in GP2)
- * @max_delay: maximum delay to event's start (apply time), in TU
- * @depends_on: the unique ID of the event we depend on (if any)
- * @interval: interval between repetitions, in TU
- * @interval_reciprocal: 2^32 / interval
- * @duration: duration of event in TU
- * @repeat: how many repetitions to do, can be TE_REPEAT_ENDLESS
- * @dep_policy: one of TE_V1_INDEPENDENT, TE_V1_DEP_OTHER, TE_V1_DEP_TSF
- *	and TE_V1_EVENT_SOCIOPATHIC
- * @is_present: 0 or 1, are we present or absent during the Time Event
- * @max_frags: maximal number of fragments the Time Event can be divided to
- * @notify: notifications using TE_V1_NOTIF_* (whom to notify when)
- */
-struct iwl_time_event_cmd_v1 {
-	/* COMMON_INDEX_HDR_API_S_VER_1 */
-	__le32 id_and_color;
-	__le32 action;
-	__le32 id;
-	/* MAC_TIME_EVENT_DATA_API_S_VER_1 */
-	__le32 apply_time;
-	__le32 max_delay;
-	__le32 dep_policy;
-	__le32 depends_on;
-	__le32 is_present;
-	__le32 max_frags;
-	__le32 interval;
-	__le32 interval_reciprocal;
-	__le32 duration;
-	__le32 repeat;
-	__le32 notify;
-} __packed; /* MAC_TIME_EVENT_CMD_API_S_VER_1 */
-
-
-/* Time event - defines for command API v2 */
+/* Time event - defines for command API */
 
 /*
  * @TE_V2_FRAG_NONE: fragmentation of the time event is NOT allowed.
@@ -680,7 +636,7 @@
 #define TE_V2_PLACEMENT_POS	12
 #define TE_V2_ABSENCE_POS	15
 
-/* Time event policy values (for time event cmd api v2)
+/* Time event policy values
  * A notification (both event and fragment) includes a status indicating weather
  * the FW was able to schedule the event or not. For fragment start/end
  * notification the status is always success. There is no start/end fragment
@@ -727,7 +683,7 @@
 };
 
 /**
- * struct iwl_time_event_cmd_api_v2 - configuring Time Events
+ * struct iwl_time_event_cmd_api - configuring Time Events
  * with struct MAC_TIME_EVENT_DATA_API_S_VER_2 (see also
  * with version 1. determined by IWL_UCODE_TLV_FLAGS)
  * ( TIME_EVENT_CMD = 0x29 )
@@ -750,7 +706,7 @@
  *	TE_EVENT_SOCIOPATHIC
  *	using TE_ABSENCE and using TE_NOTIF_*
  */
-struct iwl_time_event_cmd_v2 {
+struct iwl_time_event_cmd {
 	/* COMMON_INDEX_HDR_API_S_VER_1 */
 	__le32 id_and_color;
 	__le32 action;

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index 7ce2006..883e702 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c

@@ -99,7 +99,7 @@
 	};
 
 	IWL_DEBUG_FW(mvm, "select valid tx ant: %u\n", valid_tx_ant);
-	return iwl_mvm_send_cmd_pdu(mvm, TX_ANT_CONFIGURATION_CMD, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, TX_ANT_CONFIGURATION_CMD, 0,
 				    sizeof(tx_ant_cmd), &tx_ant_cmd);
 }
 
@@ -137,6 +137,8 @@
 		alive_data->scd_base_addr = le32_to_cpu(palive2->scd_base_ptr);
 		mvm->umac_error_event_table =
 			le32_to_cpu(palive2->error_info_addr);
+		mvm->sf_space.addr = le32_to_cpu(palive2->st_fwrd_addr);
+		mvm->sf_space.size = le32_to_cpu(palive2->st_fwrd_size);
 
 		alive_data->valid = le16_to_cpu(palive2->status) ==
 				    IWL_ALIVE_STATUS_OK;
@@ -180,6 +182,7 @@
 	int ret, i;
 	enum iwl_ucode_type old_type = mvm->cur_ucode;
 	static const u8 alive_cmd[] = { MVM_ALIVE };
+	struct iwl_sf_region st_fwrd_space;
 
 	fw = iwl_get_ucode_image(mvm, ucode_type);
 	if (WARN_ON(!fw))
@@ -215,6 +218,14 @@
 		return -EIO;
 	}
 
+	/*
+	 * update the sdio allocation according to the pointer we get in the
+	 * alive notification.
+	 */
+	st_fwrd_space.addr = mvm->sf_space.addr;
+	st_fwrd_space.size = mvm->sf_space.size;
+	ret = iwl_trans_update_sf(mvm->trans, &st_fwrd_space);
+
 	iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr);
 
 	/*
@@ -256,7 +267,7 @@
 	IWL_DEBUG_INFO(mvm, "Sending Phy CFG command: 0x%x\n",
 		       phy_cfg_cmd.phy_cfg);
 
-	return iwl_mvm_send_cmd_pdu(mvm, PHY_CONFIGURATION_CMD, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, PHY_CONFIGURATION_CMD, 0,
 				    sizeof(phy_cfg_cmd), &phy_cfg_cmd);
 }
 
@@ -288,14 +299,14 @@
 		goto error;
 	}
 
-	ret = iwl_send_bt_prio_tbl(mvm);
+	ret = iwl_send_bt_init_conf(mvm);
 	if (ret)
 		goto error;
 
 	/* Read the NVM only at driver load time, no need to do this twice */
 	if (read_nvm) {
 		/* Read nvm */
-		ret = iwl_nvm_init(mvm);
+		ret = iwl_nvm_init(mvm, true);
 		if (ret) {
 			IWL_ERR(mvm, "Failed to read NVM: %d\n", ret);
 			goto error;
@@ -303,7 +314,7 @@
 	}
 
 	/* In case we read the NVM from external file, load it to the NIC */
-	if (iwlwifi_mod_params.nvm_file)
+	if (mvm->nvm_file_name)
 		iwl_mvm_load_nvm_to_nic(mvm);
 
 	ret = iwl_nvm_check_version(mvm->nvm_data, mvm->trans);
@@ -424,10 +435,6 @@
 	if (ret)
 		goto error;
 
-	ret = iwl_send_bt_prio_tbl(mvm);
-	if (ret)
-		goto error;
-
 	ret = iwl_send_bt_init_conf(mvm);
 	if (ret)
 		goto error;
@@ -468,12 +475,6 @@
 	/* Initialize tx backoffs to the minimal possible */
 	iwl_mvm_tt_tx_backoff(mvm, 0);
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)) {
-		ret = iwl_power_legacy_set_cam_mode(mvm);
-		if (ret)
-			goto error;
-	}
-
 	ret = iwl_mvm_power_update_device(mvm);
 	if (ret)
 		goto error;

diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c
index 9ccec10..8b53027 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c

@@ -667,12 +667,9 @@
 	if (vif->bss_conf.qos)
 		cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA);
 
-	/* Don't use cts to self as the fw doesn't support it currently. */
 	if (vif->bss_conf.use_cts_prot) {
 		cmd->protection_flags |= cpu_to_le32(MAC_PROT_FLG_TGG_PROTECT);
-		if (IWL_UCODE_API(mvm->fw->ucode_ver) >= 8)
-			cmd->protection_flags |=
-				cpu_to_le32(MAC_PROT_FLG_SELF_CTS_EN);
+		cmd->protection_flags |= cpu_to_le32(MAC_PROT_FLG_SELF_CTS_EN);
 	}
 	IWL_DEBUG_RATE(mvm, "use_cts_prot %d, ht_operation_mode %d\n",
 		       vif->bss_conf.use_cts_prot,
@@ -688,7 +685,7 @@
 static int iwl_mvm_mac_ctxt_send_cmd(struct iwl_mvm *mvm,
 				     struct iwl_mac_ctx_cmd *cmd)
 {
-	int ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, CMD_SYNC,
+	int ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, 0,
 				       sizeof(*cmd), cmd);
 	if (ret)
 		IWL_ERR(mvm, "Failed to send MAC context (action:%d): %d\n",
@@ -696,19 +693,39 @@
 	return ret;
 }
 
-/*
- * Fill the specific data for mac context of type station or p2p client
- */
-static void iwl_mvm_mac_ctxt_cmd_fill_sta(struct iwl_mvm *mvm,
-					  struct ieee80211_vif *vif,
-					  struct iwl_mac_data_sta *ctxt_sta,
-					  bool force_assoc_off)
+static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm,
+				    struct ieee80211_vif *vif,
+				    u32 action, bool force_assoc_off)
 {
+	struct iwl_mac_ctx_cmd cmd = {};
+	struct iwl_mac_data_sta *ctxt_sta;
+
+	WARN_ON(vif->type != NL80211_IFTYPE_STATION);
+
+	/* Fill the common data for all mac context types */
+	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, action);
+
+	if (vif->p2p) {
+		struct ieee80211_p2p_noa_attr *noa =
+			&vif->bss_conf.p2p_noa_attr;
+
+		cmd.p2p_sta.ctwin = cpu_to_le32(noa->oppps_ctwindow &
+					IEEE80211_P2P_OPPPS_CTWINDOW_MASK);
+		ctxt_sta = &cmd.p2p_sta.sta;
+	} else {
+		ctxt_sta = &cmd.sta;
+	}
+
 	/* We need the dtim_period to set the MAC as associated */
 	if (vif->bss_conf.assoc && vif->bss_conf.dtim_period &&
 	    !force_assoc_off) {
 		u32 dtim_offs;
 
+		/* Allow beacons to pass through as long as we are not
+		 * associated, or we do not have dtim period information.
+		 */
+		cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON);
+
 		/*
 		 * The DTIM count counts down, so when it is N that means N
 		 * more beacon intervals happen until the DTIM TBTT. Therefore
@@ -755,51 +772,6 @@
 
 	ctxt_sta->listen_interval = cpu_to_le32(mvm->hw->conf.listen_interval);
 	ctxt_sta->assoc_id = cpu_to_le32(vif->bss_conf.aid);
-}
-
-static int iwl_mvm_mac_ctxt_cmd_station(struct iwl_mvm *mvm,
-					struct ieee80211_vif *vif,
-					u32 action)
-{
-	struct iwl_mac_ctx_cmd cmd = {};
-
-	WARN_ON(vif->type != NL80211_IFTYPE_STATION || vif->p2p);
-
-	/* Fill the common data for all mac context types */
-	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, action);
-
-	/* Allow beacons to pass through as long as we are not associated,or we
-	 * do not have dtim period information */
-	if (!vif->bss_conf.assoc || !vif->bss_conf.dtim_period)
-		cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON);
-	else
-		cmd.filter_flags &= ~cpu_to_le32(MAC_FILTER_IN_BEACON);
-
-	/* Fill the data specific for station mode */
-	iwl_mvm_mac_ctxt_cmd_fill_sta(mvm, vif, &cmd.sta,
-				      action == FW_CTXT_ACTION_ADD);
-
-	return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd);
-}
-
-static int iwl_mvm_mac_ctxt_cmd_p2p_client(struct iwl_mvm *mvm,
-					   struct ieee80211_vif *vif,
-					   u32 action)
-{
-	struct iwl_mac_ctx_cmd cmd = {};
-	struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr;
-
-	WARN_ON(vif->type != NL80211_IFTYPE_STATION || !vif->p2p);
-
-	/* Fill the common data for all mac context types */
-	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, action);
-
-	/* Fill the data specific for station mode */
-	iwl_mvm_mac_ctxt_cmd_fill_sta(mvm, vif, &cmd.p2p_sta.sta,
-				      action == FW_CTXT_ACTION_ADD);
-
-	cmd.p2p_sta.ctwin = cpu_to_le32(noa->oppps_ctwindow &
-					IEEE80211_P2P_OPPPS_CTWINDOW_MASK);
 
 	return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd);
 }
@@ -1137,16 +1109,12 @@
 }
 
 static int iwl_mvm_mac_ctx_send(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-				u32 action)
+				u32 action, bool force_assoc_off)
 {
 	switch (vif->type) {
 	case NL80211_IFTYPE_STATION:
-		if (!vif->p2p)
-			return iwl_mvm_mac_ctxt_cmd_station(mvm, vif,
-							    action);
-		else
-			return iwl_mvm_mac_ctxt_cmd_p2p_client(mvm, vif,
-							       action);
+		return iwl_mvm_mac_ctxt_cmd_sta(mvm, vif, action,
+						force_assoc_off);
 		break;
 	case NL80211_IFTYPE_AP:
 		if (!vif->p2p)
@@ -1176,7 +1144,8 @@
 		      vif->addr, ieee80211_vif_type_p2p(vif)))
 		return -EIO;
 
-	ret = iwl_mvm_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_ADD);
+	ret = iwl_mvm_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_ADD,
+				   true);
 	if (ret)
 		return ret;
 
@@ -1187,7 +1156,8 @@
 	return 0;
 }
 
-int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+			     bool force_assoc_off)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 
@@ -1195,7 +1165,8 @@
 		      vif->addr, ieee80211_vif_type_p2p(vif)))
 		return -EIO;
 
-	return iwl_mvm_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_MODIFY);
+	return iwl_mvm_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_MODIFY,
+				    force_assoc_off);
 }
 
 int iwl_mvm_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
@@ -1214,7 +1185,7 @@
 							   mvmvif->color));
 	cmd.action = cpu_to_le32(FW_CTXT_ACTION_REMOVE);
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, CMD_SYNC,
+	ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, 0,
 				   sizeof(cmd), &cmd);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to remove MAC context: %d\n", ret);
@@ -1240,11 +1211,23 @@
 	u32 rate __maybe_unused =
 		le32_to_cpu(beacon->beacon_notify_hdr.initial_rate);
 
+	lockdep_assert_held(&mvm->mutex);
+
 	IWL_DEBUG_RX(mvm, "beacon status %#x retries:%d tsf:0x%16llX rate:%d\n",
 		     status & TX_STATUS_MSK,
 		     beacon->beacon_notify_hdr.failure_frame,
 		     le64_to_cpu(beacon->tsf),
 		     rate);
+
+	if (unlikely(mvm->csa_vif && mvm->csa_vif->csa_active)) {
+		if (!ieee80211_csa_is_complete(mvm->csa_vif)) {
+			iwl_mvm_mac_ctxt_beacon_changed(mvm, mvm->csa_vif);
+		} else {
+			ieee80211_csa_finish(mvm->csa_vif);
+			mvm->csa_vif = NULL;
+		}
+	}
+
 	return 0;
 }
 

diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 8735ef1..7215f59 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c

@@ -295,7 +295,9 @@
 	    !iwlwifi_mod_params.sw_crypto)
 		hw->flags |= IEEE80211_HW_MFP_CAPABLE;
 
-	if (0 && mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT) {
+	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT &&
+	    IWL_UCODE_API(mvm->fw->ucode_ver) >= 9 &&
+	    !iwlwifi_mod_params.uapsd_disable) {
 		hw->flags |= IEEE80211_HW_SUPPORTS_UAPSD;
 		hw->uapsd_queues = IWL_UAPSD_AC_INFO;
 		hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
@@ -309,11 +311,8 @@
 		BIT(NL80211_IFTYPE_P2P_CLIENT) |
 		BIT(NL80211_IFTYPE_AP) |
 		BIT(NL80211_IFTYPE_P2P_GO) |
-		BIT(NL80211_IFTYPE_P2P_DEVICE);
-
-	/* IBSS has bugs in older versions */
-	if (IWL_UCODE_API(mvm->fw->ucode_ver) >= 8)
-		hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC);
+		BIT(NL80211_IFTYPE_P2P_DEVICE) |
+		BIT(NL80211_IFTYPE_ADHOC);
 
 	hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
 	hw->wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG |
@@ -322,6 +321,9 @@
 	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_GO_UAPSD)
 		hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD;
 
+	if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_CSA_FLOW)
+		hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
+
 	hw->wiphy->iface_combinations = iwl_mvm_iface_combinations;
 	hw->wiphy->n_iface_combinations =
 		ARRAY_SIZE(iwl_mvm_iface_combinations);
@@ -365,14 +367,11 @@
 	else
 		hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_SCHED_SCAN) {
-		hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
-		hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
-		hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
-		/* we create the 802.11 header and zero length SSID IE. */
-		hw->wiphy->max_sched_scan_ie_len =
-					SCAN_OFFLOAD_PROBE_REQ_SIZE - 24 - 2;
-	}
+	hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
+	hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
+	hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
+	/* we create the 802.11 header and zero length SSID IE. */
+	hw->wiphy->max_sched_scan_ie_len = SCAN_OFFLOAD_PROBE_REQ_SIZE - 24 - 2;
 
 	hw->wiphy->features |= NL80211_FEATURE_P2P_GO_CTWIN |
 			       NL80211_FEATURE_P2P_GO_OPPPS;
@@ -386,7 +385,11 @@
 	}
 
 #ifdef CONFIG_PM_SLEEP
-	if (mvm->fw->img[IWL_UCODE_WOWLAN].sec[0].len &&
+	if (iwl_mvm_is_d0i3_supported(mvm) &&
+	    device_can_wakeup(mvm->trans->dev)) {
+		mvm->wowlan.flags = WIPHY_WOWLAN_ANY;
+		hw->wiphy->wowlan = &mvm->wowlan;
+	} else if (mvm->fw->img[IWL_UCODE_WOWLAN].sec[0].len &&
 	    mvm->trans->ops->d3_suspend &&
 	    mvm->trans->ops->d3_resume &&
 	    device_can_wakeup(mvm->trans->dev)) {
@@ -540,13 +543,22 @@
 		return -EACCES;
 
 	/* return from D0i3 before starting a new Tx aggregation */
-	if (action == IEEE80211_AMPDU_TX_START) {
+	switch (action) {
+	case IEEE80211_AMPDU_TX_START:
+	case IEEE80211_AMPDU_TX_STOP_CONT:
+	case IEEE80211_AMPDU_TX_STOP_FLUSH:
+	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
+	case IEEE80211_AMPDU_TX_OPERATIONAL:
 		iwl_mvm_ref(mvm, IWL_MVM_REF_TX_AGG);
 		tx_agg_ref = true;
 
 		/*
-		 * wait synchronously until D0i3 exit to get the correct
-		 * sequence number for the tid
+		 * for tx start, wait synchronously until D0i3 exit to
+		 * get the correct sequence number for the tid.
+		 * additionally, some other ampdu actions use direct
+		 * target access, which is not handled automatically
+		 * by the trans layer (unlike commands), so wait for
+		 * d0i3 exit in these cases as well.
 		 */
 		if (!wait_event_timeout(mvm->d0i3_exit_waitq,
 			  !test_bit(IWL_MVM_STATUS_IN_D0I3, &mvm->status), HZ)) {
@@ -554,6 +566,9 @@
 			iwl_mvm_unref(mvm, IWL_MVM_REF_TX_AGG);
 			return -EIO;
 		}
+		break;
+	default:
+		break;
 	}
 
 	mutex_lock(&mvm->mutex);
@@ -758,7 +773,7 @@
 		.pwr_restriction = cpu_to_le16(tx_power),
 	};
 
-	return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0,
 				    sizeof(reduce_txpwr_cmd),
 				    &reduce_txpwr_cmd);
 }
@@ -817,18 +832,17 @@
 	if (ret)
 		goto out_release;
 
-	ret = iwl_mvm_power_update_mac(mvm, vif);
+	ret = iwl_mvm_power_update_mac(mvm);
 	if (ret)
 		goto out_release;
 
 	/* beacon filtering */
-	ret = iwl_mvm_disable_beacon_filter(mvm, vif, CMD_SYNC);
+	ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
 	if (ret)
 		goto out_remove_mac;
 
-	if (!mvm->bf_allowed_vif && false &&
-	    vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
-	    mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BF_UPDATED){
+	if (!mvm->bf_allowed_vif &&
+	    vif->type == NL80211_IFTYPE_STATION && !vif->p2p) {
 		mvm->bf_allowed_vif = mvmvif;
 		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
 				     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
@@ -969,7 +983,7 @@
 	if (mvm->vif_count && vif->type != NL80211_IFTYPE_P2P_DEVICE)
 		mvm->vif_count--;
 
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
 	iwl_mvm_mac_ctxt_remove(mvm, vif);
 
 out_release:
@@ -1223,10 +1237,14 @@
 	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING))
 		return 0;
 
+	/* bcast filtering isn't supported for P2P client */
+	if (vif->p2p)
+		return 0;
+
 	if (!iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
 		return 0;
 
-	return iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
 				    sizeof(cmd), &cmd);
 }
 #else
@@ -1253,7 +1271,7 @@
 	if (changes & BSS_CHANGED_ASSOC && bss_conf->assoc)
 		iwl_mvm_mac_ctxt_recalc_tsf_id(mvm, vif);
 
-	ret = iwl_mvm_mac_ctxt_changed(mvm, vif);
+	ret = iwl_mvm_mac_ctxt_changed(mvm, vif, false);
 	if (ret)
 		IWL_ERR(mvm, "failed to update MAC %pM\n", vif->addr);
 
@@ -1333,10 +1351,10 @@
 		iwl_mvm_remove_time_event(mvm, mvmvif,
 					  &mvmvif->time_event_data);
 		iwl_mvm_sf_update(mvm, vif, false);
-		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, CMD_SYNC));
+		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
 	} else if (changes & (BSS_CHANGED_PS | BSS_CHANGED_P2P_PS |
 			      BSS_CHANGED_QOS)) {
-		ret = iwl_mvm_power_update_mac(mvm, vif);
+		ret = iwl_mvm_power_update_mac(mvm);
 		if (ret)
 			IWL_ERR(mvm, "failed to update power mode\n");
 	}
@@ -1347,16 +1365,19 @@
 	}
 
 	if (changes & BSS_CHANGED_CQM) {
-		IWL_DEBUG_MAC80211(mvm, "cqm info_changed");
+		IWL_DEBUG_MAC80211(mvm, "cqm info_changed\n");
 		/* reset cqm events tracking */
 		mvmvif->bf_data.last_cqm_event = 0;
-		ret = iwl_mvm_update_beacon_filter(mvm, vif, false, CMD_SYNC);
-		if (ret)
-			IWL_ERR(mvm, "failed to update CQM thresholds\n");
+		if (mvmvif->bf_data.bf_enabled) {
+			ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+			if (ret)
+				IWL_ERR(mvm,
+					"failed to update CQM thresholds\n");
+		}
 	}
 
 	if (changes & BSS_CHANGED_ARP_FILTER) {
-		IWL_DEBUG_MAC80211(mvm, "arp filter changed");
+		IWL_DEBUG_MAC80211(mvm, "arp filter changed\n");
 		iwl_mvm_configure_bcast_filter(mvm, vif);
 	}
 }
@@ -1402,7 +1423,7 @@
 	mvmvif->ap_ibss_active = true;
 
 	/* power updated needs to be done before quotas */
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
 
 	ret = iwl_mvm_update_quotas(mvm, vif);
 	if (ret)
@@ -1410,7 +1431,7 @@
 
 	/* Need to update the P2P Device MAC (only GO, IBSS is single vif) */
 	if (vif->p2p && mvm->p2p_device_vif)
-		iwl_mvm_mac_ctxt_changed(mvm, mvm->p2p_device_vif);
+		iwl_mvm_mac_ctxt_changed(mvm, mvm->p2p_device_vif, false);
 
 	iwl_mvm_ref(mvm, IWL_MVM_REF_AP_IBSS);
 
@@ -1420,7 +1441,7 @@
 	return 0;
 
 out_quota_failed:
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
 	mvmvif->ap_ibss_active = false;
 	iwl_mvm_send_rm_bcast_sta(mvm, &mvmvif->bcast_sta);
 out_unbind:
@@ -1450,13 +1471,13 @@
 
 	/* Need to update the P2P Device MAC (only GO, IBSS is single vif) */
 	if (vif->p2p && mvm->p2p_device_vif)
-		iwl_mvm_mac_ctxt_changed(mvm, mvm->p2p_device_vif);
+		iwl_mvm_mac_ctxt_changed(mvm, mvm->p2p_device_vif, false);
 
 	iwl_mvm_update_quotas(mvm, NULL);
 	iwl_mvm_send_rm_bcast_sta(mvm, &mvmvif->bcast_sta);
 	iwl_mvm_binding_remove_vif(mvm, vif);
 
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
 
 	iwl_mvm_mac_ctxt_remove(mvm, vif);
 
@@ -1477,7 +1498,7 @@
 
 	if (changes & (BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_HT |
 		       BSS_CHANGED_BANDWIDTH) &&
-	    iwl_mvm_mac_ctxt_changed(mvm, vif))
+	    iwl_mvm_mac_ctxt_changed(mvm, vif, false))
 		IWL_ERR(mvm, "failed to update MAC %pM\n", vif->addr);
 
 	/* Need to send a new beacon template to the FW */
@@ -1495,6 +1516,9 @@
 
 	mutex_lock(&mvm->mutex);
 
+	if (changes & BSS_CHANGED_IDLE && !bss_conf->idle)
+		iwl_mvm_sched_scan_stop(mvm, true);
+
 	switch (vif->type) {
 	case NL80211_IFTYPE_STATION:
 		iwl_mvm_bss_info_changed_station(mvm, vif, bss_conf, changes);
@@ -1525,7 +1549,7 @@
 
 	switch (mvm->scan_status) {
 	case IWL_MVM_SCAN_SCHED:
-		ret = iwl_mvm_sched_scan_stop(mvm);
+		ret = iwl_mvm_sched_scan_stop(mvm, true);
 		if (ret) {
 			ret = -EBUSY;
 			goto out;
@@ -1697,6 +1721,11 @@
 		ret = iwl_mvm_add_sta(mvm, vif, sta);
 	} else if (old_state == IEEE80211_STA_NONE &&
 		   new_state == IEEE80211_STA_AUTH) {
+		/*
+		 * EBS may be disabled due to previous failures reported by FW.
+		 * Reset EBS status here assuming environment has been changed.
+		 */
+		mvm->last_ebs_successful = true;
 		ret = 0;
 	} else if (old_state == IEEE80211_STA_AUTH &&
 		   new_state == IEEE80211_STA_ASSOC) {
@@ -1708,14 +1737,12 @@
 	} else if (old_state == IEEE80211_STA_ASSOC &&
 		   new_state == IEEE80211_STA_AUTHORIZED) {
 		/* enable beacon filtering */
-		if (vif->bss_conf.dtim_period)
-			WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif,
-							     CMD_SYNC));
+		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
 		ret = 0;
 	} else if (old_state == IEEE80211_STA_AUTHORIZED &&
 		   new_state == IEEE80211_STA_ASSOC) {
 		/* disable beacon filtering */
-		WARN_ON(iwl_mvm_disable_beacon_filter(mvm, vif, CMD_SYNC));
+		WARN_ON(iwl_mvm_disable_beacon_filter(mvm, vif, 0));
 		ret = 0;
 	} else if (old_state == IEEE80211_STA_ASSOC &&
 		   new_state == IEEE80211_STA_AUTH) {
@@ -1772,7 +1799,7 @@
 		int ret;
 
 		mutex_lock(&mvm->mutex);
-		ret = iwl_mvm_mac_ctxt_changed(mvm, vif);
+		ret = iwl_mvm_mac_ctxt_changed(mvm, vif, false);
 		mutex_unlock(&mvm->mutex);
 		return ret;
 	}
@@ -1865,7 +1892,7 @@
 	int ret;
 
 	mutex_lock(&mvm->mutex);
-	ret = iwl_mvm_sched_scan_stop(mvm);
+	ret = iwl_mvm_sched_scan_stop(mvm, false);
 	mutex_unlock(&mvm->mutex);
 	iwl_mvm_wait_for_async_handlers(mvm);
 
@@ -2161,10 +2188,10 @@
 		return;
 
 	mutex_lock(&mvm->mutex);
+	iwl_mvm_bt_coex_vif_change(mvm);
 	iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &ctx->min_def,
 				 ctx->rx_chains_static,
 				 ctx->rx_chains_dynamic);
-	iwl_mvm_bt_coex_vif_change(mvm);
 	mutex_unlock(&mvm->mutex);
 }
 
@@ -2184,6 +2211,11 @@
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_AP:
+		/* Unless it's a CSA flow we have nothing to do here */
+		if (vif->csa_active) {
+			mvmvif->ap_ibss_active = true;
+			break;
+		}
 	case NL80211_IFTYPE_ADHOC:
 		/*
 		 * The AP binding flow is handled as part of the start_ap flow
@@ -2207,7 +2239,7 @@
 	 * Power state must be updated before quotas,
 	 * otherwise fw will complain.
 	 */
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
 
 	/* Setting the quota at this stage is only required for monitor
 	 * interfaces. For the other types, the bss_info changed flow
@@ -2220,11 +2252,17 @@
 			goto out_remove_binding;
 	}
 
+	/* Handle binding during CSA */
+	if (vif->type == NL80211_IFTYPE_AP) {
+		iwl_mvm_update_quotas(mvm, vif);
+		iwl_mvm_mac_ctxt_changed(mvm, vif, false);
+	}
+
 	goto out_unlock;
 
  out_remove_binding:
 	iwl_mvm_binding_remove_vif(mvm, vif);
-	iwl_mvm_power_update_mac(mvm, vif);
+	iwl_mvm_power_update_mac(mvm);
  out_unlock:
 	mutex_unlock(&mvm->mutex);
 	if (ret)
@@ -2244,22 +2282,29 @@
 	iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data);
 
 	switch (vif->type) {
-	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_ADHOC:
 		goto out_unlock;
 	case NL80211_IFTYPE_MONITOR:
 		mvmvif->monitor_active = false;
 		iwl_mvm_update_quotas(mvm, NULL);
 		break;
+	case NL80211_IFTYPE_AP:
+		/* This part is triggered only during CSA */
+		if (!vif->csa_active || !mvmvif->ap_ibss_active)
+			goto out_unlock;
+
+		mvmvif->ap_ibss_active = false;
+		iwl_mvm_update_quotas(mvm, NULL);
+		/*TODO: bt_coex notification here? */
 	default:
 		break;
 	}
 
 	iwl_mvm_binding_remove_vif(mvm, vif);
-	iwl_mvm_power_update_mac(mvm, vif);
 
 out_unlock:
 	mvmvif->phy_ctxt = NULL;
+	iwl_mvm_power_update_mac(mvm);
 	mutex_unlock(&mvm->mutex);
 }
 
@@ -2323,9 +2368,8 @@
 			return -EINVAL;
 
 		if (nla_get_u32(tb[IWL_MVM_TM_ATTR_BEACON_FILTER_STATE]))
-			return iwl_mvm_enable_beacon_filter(mvm, vif,
-							    CMD_SYNC);
-		return iwl_mvm_disable_beacon_filter(mvm, vif, CMD_SYNC);
+			return iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+		return iwl_mvm_disable_beacon_filter(mvm, vif, 0);
 	}
 
 	return -EOPNOTSUPP;
@@ -2346,6 +2390,53 @@
 }
 #endif
 
+static void iwl_mvm_channel_switch_beacon(struct ieee80211_hw *hw,
+					  struct ieee80211_vif *vif,
+					  struct cfg80211_chan_def *chandef)
+{
+	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+
+	mutex_lock(&mvm->mutex);
+	if (WARN(mvm->csa_vif && mvm->csa_vif->csa_active,
+		 "Another CSA is already in progress"))
+		goto out_unlock;
+
+	IWL_DEBUG_MAC80211(mvm, "CSA started to freq %d\n",
+			   chandef->center_freq1);
+	mvm->csa_vif = vif;
+
+out_unlock:
+	mutex_unlock(&mvm->mutex);
+}
+
+static void iwl_mvm_mac_flush(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif, u32 queues, bool drop)
+{
+	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+	struct iwl_mvm_vif *mvmvif;
+	struct iwl_mvm_sta *mvmsta;
+
+	if (!vif || vif->type != NL80211_IFTYPE_STATION)
+		return;
+
+	mutex_lock(&mvm->mutex);
+	mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	mvmsta = iwl_mvm_sta_from_staid_protected(mvm, mvmvif->ap_sta_id);
+
+	if (WARN_ON_ONCE(!mvmsta))
+		goto done;
+
+	if (drop) {
+		if (iwl_mvm_flush_tx_path(mvm, mvmsta->tfd_queue_msk, true))
+			IWL_ERR(mvm, "flush request fail\n");
+	} else {
+		iwl_trans_wait_tx_queue_empty(mvm->trans,
+					      mvmsta->tfd_queue_msk);
+	}
+done:
+	mutex_unlock(&mvm->mutex);
+}
+
 const struct ieee80211_ops iwl_mvm_hw_ops = {
 	.tx = iwl_mvm_mac_tx,
 	.ampdu_action = iwl_mvm_mac_ampdu_action,
@@ -2369,6 +2460,7 @@
 	.sta_rc_update = iwl_mvm_sta_rc_update,
 	.conf_tx = iwl_mvm_mac_conf_tx,
 	.mgd_prepare_tx = iwl_mvm_mac_mgd_prepare_tx,
+	.flush = iwl_mvm_mac_flush,
 	.sched_scan_start = iwl_mvm_mac_sched_scan_start,
 	.sched_scan_stop = iwl_mvm_mac_sched_scan_stop,
 	.set_key = iwl_mvm_mac_set_key,
@@ -2388,6 +2480,8 @@
 
 	.set_tim = iwl_mvm_set_tim,
 
+	.channel_switch_beacon = iwl_mvm_channel_switch_beacon,
+
 	CFG80211_TESTMODE_CMD(iwl_mvm_mac_testmode_cmd)
 
 #ifdef CONFIG_PM_SLEEP

diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index f1ec098..fcc6c29 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h

@@ -164,7 +164,6 @@
 	MVM_DEBUGFS_PM_SKIP_DTIM_PERIODS = BIT(2),
 	MVM_DEBUGFS_PM_RX_DATA_TIMEOUT = BIT(3),
 	MVM_DEBUGFS_PM_TX_DATA_TIMEOUT = BIT(4),
-	MVM_DEBUGFS_PM_DISABLE_POWER_OFF = BIT(5),
 	MVM_DEBUGFS_PM_LPRX_ENA = BIT(6),
 	MVM_DEBUGFS_PM_LPRX_RSSI_THRESHOLD = BIT(7),
 	MVM_DEBUGFS_PM_SNOOZE_ENABLE = BIT(8),
@@ -177,7 +176,6 @@
 	u32 tx_data_timeout;
 	bool skip_over_dtim;
 	u8 skip_dtim_periods;
-	bool disable_power_off;
 	bool lprx_ena;
 	u32 lprx_rssi_threshold;
 	bool snooze_ena;
@@ -232,6 +230,7 @@
 	IWL_MVM_REF_USER,
 	IWL_MVM_REF_TX,
 	IWL_MVM_REF_TX_AGG,
+	IWL_MVM_REF_EXIT_WORK,
 
 	IWL_MVM_REF_COUNT,
 };
@@ -265,6 +264,7 @@
  * @uploaded: indicates the MAC context has been added to the device
  * @ap_ibss_active: indicates that AP/IBSS is configured and that the interface
  *	should get quota etc.
+ * @pm_enabled - Indicate if MAC power management is allowed
  * @monitor_active: indicates that monitor context is configured, and that the
  *	interface should get quota etc.
  * @low_latency: indicates that this interface is in low-latency mode
@@ -283,6 +283,7 @@
 
 	bool uploaded;
 	bool ap_ibss_active;
+	bool pm_enabled;
 	bool monitor_active;
 	bool low_latency;
 	struct iwl_mvm_vif_bf_data bf_data;
@@ -451,6 +452,11 @@
 	int last_frame_idx;
 };
 
+enum {
+	D0I3_DEFER_WAKEUP,
+	D0I3_PENDING_WAKEUP,
+};
+
 struct iwl_mvm {
 	/* for logger access */
 	struct device *dev;
@@ -484,6 +490,7 @@
 	u32 log_event_table;
 	u32 umac_error_event_table;
 	bool support_umac_log;
+	struct iwl_sf_region sf_space;
 
 	u32 ampdu_ref;
 
@@ -495,6 +502,7 @@
 	u8 queue_to_mac80211[IWL_MAX_HW_QUEUES];
 	atomic_t queue_stop_count[IWL_MAX_HW_QUEUES];
 
+	const char *nvm_file_name;
 	struct iwl_nvm_data *nvm_data;
 	/* NVM sections */
 	struct iwl_nvm_section nvm_sections[NVM_MAX_NUM_SECTIONS];
@@ -535,6 +543,8 @@
 	/* Internal station */
 	struct iwl_mvm_int_sta aux_sta;
 
+	bool last_ebs_successful;
+
 	u8 scan_last_antenna_idx; /* to toggle TX between antennas */
 	u8 mgmt_last_antenna_idx;
 
@@ -578,8 +588,12 @@
 	void *fw_error_dump;
 	void *fw_error_sram;
 	u32 fw_error_sram_len;
+	u32 *fw_error_rxf;
+	u32 fw_error_rxf_len;
 
+#ifdef CONFIG_IWLWIFI_LEDS
 	struct led_classdev led;
+#endif
 
 	struct ieee80211_vif *p2p_device_vif;
 
@@ -601,6 +615,9 @@
 	bool d0i3_offloading;
 	struct work_struct d0i3_exit_work;
 	struct sk_buff_head d0i3_tx;
+	/* protect d0i3_suspend_flags */
+	struct mutex d0i3_suspend_mutex;
+	unsigned long d0i3_suspend_flags;
 	/* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */
 	spinlock_t d0i3_tx_lock;
 	wait_queue_head_t d0i3_exit_waitq;
@@ -629,8 +646,8 @@
 
 	/* Indicate if device power save is allowed */
 	bool ps_disabled;
-	/* Indicate if device power management is allowed */
-	bool pm_disabled;
+
+	struct ieee80211_vif *csa_vif;
 };
 
 /* Extract MVM priv from op_mode and _hw */
@@ -705,6 +722,7 @@
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm);
 void iwl_mvm_fw_error_sram_dump(struct iwl_mvm *mvm);
+void iwl_mvm_fw_error_rxf_dump(struct iwl_mvm *mvm);
 #endif
 u8 first_antenna(u8 mask);
 u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
@@ -745,7 +763,7 @@
 			  struct iwl_device_cmd *cmd);
 
 /* NVM */
-int iwl_nvm_init(struct iwl_mvm *mvm);
+int iwl_nvm_init(struct iwl_mvm *mvm, bool read_nvm_from_nic);
 int iwl_mvm_load_nvm_to_nic(struct iwl_mvm *mvm);
 
 int iwl_mvm_up(struct iwl_mvm *mvm);
@@ -796,7 +814,8 @@
 int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
-int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+			     bool force_assoc_off);
 int iwl_mvm_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 u32 iwl_mvm_mac_get_queues_mask(struct iwl_mvm *mvm,
 				struct ieee80211_vif *vif);
@@ -840,7 +859,7 @@
 				       struct cfg80211_sched_scan_request *req);
 int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm,
 			     struct cfg80211_sched_scan_request *req);
-int iwl_mvm_sched_scan_stop(struct iwl_mvm *mvm);
+int iwl_mvm_sched_scan_stop(struct iwl_mvm *mvm, bool notify);
 int iwl_mvm_rx_sched_scan_results(struct iwl_mvm *mvm,
 				  struct iwl_rx_cmd_buffer *rxb,
 				  struct iwl_device_cmd *cmd);
@@ -874,10 +893,8 @@
 int rs_pretty_print_rate(char *buf, const u32 rate);
 
 /* power management */
-int iwl_power_legacy_set_cam_mode(struct iwl_mvm *mvm);
-
 int iwl_mvm_power_update_device(struct iwl_mvm *mvm);
-int iwl_mvm_power_update_mac(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_power_update_mac(struct iwl_mvm *mvm);
 int iwl_mvm_power_mac_dbgfs_read(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				 char *buf, int bufsz);
 
@@ -886,8 +903,18 @@
 					     struct iwl_rx_cmd_buffer *rxb,
 					     struct iwl_device_cmd *cmd);
 
+#ifdef CONFIG_IWLWIFI_LEDS
 int iwl_mvm_leds_init(struct iwl_mvm *mvm);
 void iwl_mvm_leds_exit(struct iwl_mvm *mvm);
+#else
+static inline int iwl_mvm_leds_init(struct iwl_mvm *mvm)
+{
+	return 0;
+}
+static inline void iwl_mvm_leds_exit(struct iwl_mvm *mvm)
+{
+}
+#endif
 
 /* D3 (WoWLAN, NetDetect) */
 int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan);
@@ -922,9 +949,9 @@
 void iwl_mvm_ref(struct iwl_mvm *mvm, enum iwl_mvm_ref_type ref_type);
 void iwl_mvm_unref(struct iwl_mvm *mvm, enum iwl_mvm_ref_type ref_type);
 void iwl_mvm_d0i3_enable_tx(struct iwl_mvm *mvm, __le16 *qos_seq);
+int _iwl_mvm_exit_d0i3(struct iwl_mvm *mvm);
 
 /* BT Coex */
-int iwl_send_bt_prio_tbl(struct iwl_mvm *mvm);
 int iwl_send_bt_init_conf(struct iwl_mvm *mvm);
 int iwl_mvm_rx_bt_coex_notif(struct iwl_mvm *mvm,
 			     struct iwl_rx_cmd_buffer *rxb,
@@ -936,9 +963,10 @@
 				struct ieee80211_sta *sta);
 bool iwl_mvm_bt_coex_is_mimo_allowed(struct iwl_mvm *mvm,
 				     struct ieee80211_sta *sta);
+bool iwl_mvm_bt_coex_is_tpc_allowed(struct iwl_mvm *mvm,
+				    enum ieee80211_band band);
 u8 iwl_mvm_bt_coex_tx_prio(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
 			   struct ieee80211_tx_info *info, u8 ac);
-int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id, bool enable);
 
 enum iwl_bt_kill_msk {
 	BT_KILL_MSK_DEFAULT,
@@ -969,17 +997,11 @@
 int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
 				  struct ieee80211_vif *vif,
 				  u32 flags);
-int iwl_mvm_update_beacon_abort(struct iwl_mvm *mvm,
-				struct ieee80211_vif *vif, bool enable);
-int iwl_mvm_update_beacon_filter(struct iwl_mvm *mvm,
-				 struct ieee80211_vif *vif,
-				 bool force,
-				 u32 flags);
-
 /* SMPS */
 void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				enum iwl_mvm_smps_type_request req_type,
 				enum ieee80211_smps_mode smps_request);
+bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm);
 
 /* Low latency */
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,

diff --git a/drivers/net/wireless/iwlwifi/mvm/nvm.c b/drivers/net/wireless/iwlwifi/mvm/nvm.c
index cf2d09f..808f78f 100644
--- a/drivers/net/wireless/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/iwlwifi/mvm/nvm.c

@@ -74,6 +74,12 @@
 #define NVM_WRITE_OPCODE 1
 #define NVM_READ_OPCODE 0
 
+/* load nvm chunk response */
+enum {
+	READ_NVM_CHUNK_SUCCEED = 0,
+	READ_NVM_CHUNK_NOT_VALID_ADDRESS = 1
+};
+
 /*
  * prepare the NVM host command w/ the pointers to the nvm buffer
  * and send it to fw
@@ -90,7 +96,7 @@
 	struct iwl_host_cmd cmd = {
 		.id = NVM_ACCESS_CMD,
 		.len = { sizeof(struct iwl_nvm_access_cmd), length },
-		.flags = CMD_SYNC | CMD_SEND_IN_RFKILL,
+		.flags = CMD_SEND_IN_RFKILL,
 		.data = { &nvm_access_cmd, data },
 		/* data may come from vmalloc, so use _DUP */
 		.dataflags = { 0, IWL_HCMD_DFL_DUP },
@@ -112,7 +118,7 @@
 	struct iwl_rx_packet *pkt;
 	struct iwl_host_cmd cmd = {
 		.id = NVM_ACCESS_CMD,
-		.flags = CMD_SYNC | CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
+		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
 		.data = { &nvm_access_cmd, },
 	};
 	int ret, bytes_read, offset_read;
@@ -139,10 +145,26 @@
 	offset_read = le16_to_cpu(nvm_resp->offset);
 	resp_data = nvm_resp->data;
 	if (ret) {
-		IWL_ERR(mvm,
-			"NVM access command failed with status %d (device: %s)\n",
-			ret, mvm->cfg->name);
-		ret = -EINVAL;
+		if ((offset != 0) &&
+		    (ret == READ_NVM_CHUNK_NOT_VALID_ADDRESS)) {
+			/*
+			 * meaning of NOT_VALID_ADDRESS:
+			 * driver try to read chunk from address that is
+			 * multiple of 2K and got an error since addr is empty.
+			 * meaning of (offset != 0): driver already
+			 * read valid data from another chunk so this case
+			 * is not an error.
+			 */
+			IWL_DEBUG_EEPROM(mvm->trans->dev,
+					 "NVM access command failed on offset 0x%x since that section size is multiple 2K\n",
+					 offset);
+			ret = 0;
+		} else {
+			IWL_DEBUG_EEPROM(mvm->trans->dev,
+					 "NVM access command failed with status %d (device: %s)\n",
+					 ret, mvm->cfg->name);
+			ret = -EIO;
+		}
 		goto exit;
 	}
 
@@ -211,9 +233,9 @@
 	while (ret == length) {
 		ret = iwl_nvm_read_chunk(mvm, section, offset, length, data);
 		if (ret < 0) {
-			IWL_ERR(mvm,
-				"Cannot read NVM from section %d offset %d, length %d\n",
-				section, offset, length);
+			IWL_DEBUG_EEPROM(mvm->trans->dev,
+					 "Cannot read NVM from section %d offset %d, length %d\n",
+					 section, offset, length);
 			return ret;
 		}
 		offset += ret;
@@ -238,13 +260,20 @@
 			return NULL;
 		}
 	} else {
+		/* SW and REGULATORY sections are mandatory */
 		if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data ||
-		    !mvm->nvm_sections[NVM_SECTION_TYPE_MAC_OVERRIDE].data ||
 		    !mvm->nvm_sections[NVM_SECTION_TYPE_REGULATORY].data) {
 			IWL_ERR(mvm,
 				"Can't parse empty family 8000 NVM sections\n");
 			return NULL;
 		}
+		/* MAC_OVERRIDE or at least HW section must exist */
+		if (!mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data &&
+		    !mvm->nvm_sections[NVM_SECTION_TYPE_MAC_OVERRIDE].data) {
+			IWL_ERR(mvm,
+				"Can't parse mac_address, empty sections\n");
+			return NULL;
+		}
 	}
 
 	if (WARN_ON(!mvm->cfg))
@@ -311,16 +340,16 @@
 	 * get here after that we assume the NVM request can be satisfied
 	 * synchronously.
 	 */
-	ret = request_firmware(&fw_entry, iwlwifi_mod_params.nvm_file,
+	ret = request_firmware(&fw_entry, mvm->nvm_file_name,
 			       mvm->trans->dev);
 	if (ret) {
 		IWL_ERR(mvm, "ERROR: %s isn't available %d\n",
-			iwlwifi_mod_params.nvm_file, ret);
+			mvm->nvm_file_name, ret);
 		return ret;
 	}
 
 	IWL_INFO(mvm, "Loaded NVM file %s (%zu bytes)\n",
-		 iwlwifi_mod_params.nvm_file, fw_entry->size);
+		 mvm->nvm_file_name, fw_entry->size);
 
 	if (fw_entry->size < sizeof(*file_sec)) {
 		IWL_ERR(mvm, "NVM file too small\n");
@@ -427,53 +456,28 @@
 	return ret;
 }
 
-int iwl_nvm_init(struct iwl_mvm *mvm)
+int iwl_nvm_init(struct iwl_mvm *mvm, bool read_nvm_from_nic)
 {
-	int ret, i, section;
+	int ret, section;
 	u8 *nvm_buffer, *temp;
-	int nvm_to_read[NVM_MAX_NUM_SECTIONS];
-	int num_of_sections_to_read;
 
 	if (WARN_ON_ONCE(mvm->cfg->nvm_hw_section_num >= NVM_MAX_NUM_SECTIONS))
 		return -EINVAL;
 
-	/* load external NVM if configured */
-	if (iwlwifi_mod_params.nvm_file) {
-		/* move to External NVM flow */
-		ret = iwl_mvm_read_external_nvm(mvm);
-		if (ret)
-			return ret;
-	} else {
-		/* list of NVM sections we are allowed/need to read */
-		if (mvm->trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) {
-			nvm_to_read[0] = mvm->cfg->nvm_hw_section_num;
-			nvm_to_read[1] = NVM_SECTION_TYPE_SW;
-			nvm_to_read[2] = NVM_SECTION_TYPE_CALIBRATION;
-			nvm_to_read[3] = NVM_SECTION_TYPE_PRODUCTION;
-			num_of_sections_to_read = 4;
-		} else {
-			nvm_to_read[0] = NVM_SECTION_TYPE_SW;
-			nvm_to_read[1] = NVM_SECTION_TYPE_CALIBRATION;
-			nvm_to_read[2] = NVM_SECTION_TYPE_PRODUCTION;
-			nvm_to_read[3] = NVM_SECTION_TYPE_REGULATORY;
-			nvm_to_read[4] = NVM_SECTION_TYPE_MAC_OVERRIDE;
-			num_of_sections_to_read = 5;
-		}
-
+	/* load NVM values from nic */
+	if (read_nvm_from_nic) {
 		/* Read From FW NVM */
 		IWL_DEBUG_EEPROM(mvm->trans->dev, "Read from NVM\n");
 
-		/* TODO: find correct NVM max size for a section */
 		nvm_buffer = kmalloc(mvm->cfg->base_params->eeprom_size,
 				     GFP_KERNEL);
 		if (!nvm_buffer)
 			return -ENOMEM;
-		for (i = 0; i < num_of_sections_to_read; i++) {
-			section = nvm_to_read[i];
+		for (section = 0; section < NVM_MAX_NUM_SECTIONS; section++) {
 			/* we override the constness for initial read */
 			ret = iwl_nvm_read_section(mvm, section, nvm_buffer);
 			if (ret < 0)
-				break;
+				continue;
 			temp = kmemdup(nvm_buffer, ret, GFP_KERNEL);
 			if (!temp) {
 				ret = -ENOMEM;
@@ -502,15 +506,21 @@
 					mvm->nvm_hw_blob.size = ret;
 					break;
 				}
-				WARN(1, "section: %d", section);
 			}
 #endif
 		}
 		kfree(nvm_buffer);
-		if (ret < 0)
+	}
+
+	/* load external NVM if configured */
+	if (mvm->nvm_file_name) {
+		/* move to External NVM flow */
+		ret = iwl_mvm_read_external_nvm(mvm);
+		if (ret)
 			return ret;
 	}
 
+	/* parse the relevant nvm sections */
 	mvm->nvm_data = iwl_parse_nvm_sections(mvm);
 	if (!mvm->nvm_data)
 		return -ENODATA;

diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index 9545d7f..cc2f7de 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c

@@ -79,8 +79,8 @@
 #include "iwl-prph.h"
 #include "rs.h"
 #include "fw-api-scan.h"
-#include "fw-error-dump.h"
 #include "time-event.h"
+#include "iwl-fw-error-dump.h"
 
 /*
  * module name, copyright, version, etc.
@@ -220,7 +220,7 @@
 	RX_HANDLER(BA_NOTIF, iwl_mvm_rx_ba_notif, false),
 
 	RX_HANDLER(BT_PROFILE_NOTIFICATION, iwl_mvm_rx_bt_coex_notif, true),
-	RX_HANDLER(BEACON_NOTIFICATION, iwl_mvm_rx_beacon_notif, false),
+	RX_HANDLER(BEACON_NOTIFICATION, iwl_mvm_rx_beacon_notif, true),
 	RX_HANDLER(STATISTICS_NOTIFICATION, iwl_mvm_rx_statistics, true),
 	RX_HANDLER(ANTENNA_COUPLING_NOTIFICATION,
 		   iwl_mvm_rx_ant_coupling_notif, true),
@@ -402,6 +402,7 @@
 	mvm->sf_state = SF_UNINIT;
 
 	mutex_init(&mvm->mutex);
+	mutex_init(&mvm->d0i3_suspend_mutex);
 	spin_lock_init(&mvm->async_handlers_lock);
 	INIT_LIST_HEAD(&mvm->time_event_list);
 	INIT_LIST_HEAD(&mvm->async_handlers_list);
@@ -465,13 +466,24 @@
 
 	min_backoff = calc_min_backoff(trans, cfg);
 	iwl_mvm_tt_initialize(mvm, min_backoff);
+	/* set the nvm_file_name according to priority */
+	if (iwlwifi_mod_params.nvm_file)
+		mvm->nvm_file_name = iwlwifi_mod_params.nvm_file;
+	else
+		mvm->nvm_file_name = mvm->cfg->default_nvm_file;
+
+	if (WARN(cfg->no_power_up_nic_in_init && !mvm->nvm_file_name,
+		 "not allowing power-up and not having nvm_file\n"))
+		goto out_free;
 
 	/*
-	 * If the NVM exists in an external file,
-	 * there is no need to unnecessarily power up the NIC at driver load
+	 * Even if nvm exists in the nvm_file driver should read agin the nvm
+	 * from the nic because there might be entries that exist in the OTP
+	 * and not in the file.
+	 * for nics with no_power_up_nic_in_init: rely completley on nvm_file
 	 */
-	if (iwlwifi_mod_params.nvm_file) {
-		err = iwl_nvm_init(mvm);
+	if (cfg->no_power_up_nic_in_init && mvm->nvm_file_name) {
+		err = iwl_nvm_init(mvm, false);
 		if (err)
 			goto out_free;
 	} else {
@@ -518,7 +530,7 @@
  out_free:
 	iwl_phy_db_free(mvm->phy_db);
 	kfree(mvm->scan_cmd);
-	if (!iwlwifi_mod_params.nvm_file)
+	if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name)
 		iwl_trans_op_mode_leave(trans);
 	ieee80211_free_hw(mvm->hw);
 	return NULL;
@@ -538,6 +550,7 @@
 	kfree(mvm->scan_cmd);
 	vfree(mvm->fw_error_dump);
 	kfree(mvm->fw_error_sram);
+	kfree(mvm->fw_error_rxf);
 	kfree(mvm->mcast_filter_cmd);
 	mvm->mcast_filter_cmd = NULL;
 
@@ -814,6 +827,7 @@
 	struct iwl_fw_error_dump_file *dump_file;
 	struct iwl_fw_error_dump_data *dump_data;
 	u32 file_len;
+	u32 trans_len;
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -821,8 +835,13 @@
 		return;
 
 	file_len = mvm->fw_error_sram_len +
+		   mvm->fw_error_rxf_len +
 		   sizeof(*dump_file) +
-		   sizeof(*dump_data);
+		   sizeof(*dump_data) * 2;
+
+	trans_len = iwl_trans_dump_data(mvm->trans, NULL, 0);
+	if (trans_len)
+		file_len += trans_len;
 
 	dump_file = vmalloc(file_len);
 	if (!dump_file)
@@ -833,7 +852,12 @@
 	dump_file->barker = cpu_to_le32(IWL_FW_ERROR_DUMP_BARKER);
 	dump_file->file_len = cpu_to_le32(file_len);
 	dump_data = (void *)dump_file->data;
-	dump_data->type = IWL_FW_ERROR_DUMP_SRAM;
+	dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_RXF);
+	dump_data->len = cpu_to_le32(mvm->fw_error_rxf_len);
+	memcpy(dump_data->data, mvm->fw_error_rxf, mvm->fw_error_rxf_len);
+
+	dump_data = iwl_mvm_fw_error_next_data(dump_data);
+	dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_SRAM);
 	dump_data->len = cpu_to_le32(mvm->fw_error_sram_len);
 
 	/*
@@ -842,6 +866,23 @@
 	 * mvm->fw_error_sram right now.
 	 */
 	memcpy(dump_data->data, mvm->fw_error_sram, mvm->fw_error_sram_len);
+
+	kfree(mvm->fw_error_rxf);
+	mvm->fw_error_rxf = NULL;
+	mvm->fw_error_rxf_len = 0;
+
+	kfree(mvm->fw_error_sram);
+	mvm->fw_error_sram = NULL;
+	mvm->fw_error_sram_len = 0;
+
+	if (trans_len) {
+		void *buf = iwl_mvm_fw_error_next_data(dump_data);
+		u32 real_trans_len = iwl_trans_dump_data(mvm->trans, buf,
+							 trans_len);
+		dump_data = (void *)((u8 *)buf + real_trans_len);
+		dump_file->file_len =
+			cpu_to_le32(file_len - trans_len + real_trans_len);
+	}
 }
 #endif
 
@@ -853,6 +894,7 @@
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	iwl_mvm_fw_error_sram_dump(mvm);
+	iwl_mvm_fw_error_rxf_dump(mvm);
 #endif
 
 	iwl_mvm_nic_restart(mvm);
@@ -1126,9 +1168,9 @@
 	struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, d0i3_exit_work);
 	struct iwl_host_cmd get_status_cmd = {
 		.id = WOWLAN_GET_STATUSES,
-		.flags = CMD_SYNC | CMD_HIGH_PRIO | CMD_WANT_SKB,
+		.flags = CMD_HIGH_PRIO | CMD_WANT_SKB,
 	};
-	struct iwl_wowlan_status_v6 *status;
+	struct iwl_wowlan_status *status;
 	int ret;
 	u32 disconnection_reasons, wakeup_reasons;
 	__le16 *qos_seq = NULL;
@@ -1158,18 +1200,27 @@
 	iwl_free_resp(&get_status_cmd);
 out:
 	iwl_mvm_d0i3_enable_tx(mvm, qos_seq);
+	iwl_mvm_unref(mvm, IWL_MVM_REF_EXIT_WORK);
 	mutex_unlock(&mvm->mutex);
 }
 
-static int iwl_mvm_exit_d0i3(struct iwl_op_mode *op_mode)
+int _iwl_mvm_exit_d0i3(struct iwl_mvm *mvm)
 {
-	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
 	u32 flags = CMD_ASYNC | CMD_HIGH_PRIO | CMD_SEND_IN_IDLE |
 		    CMD_WAKE_UP_TRANS;
 	int ret;
 
 	IWL_DEBUG_RPM(mvm, "MVM exiting D0i3\n");
 
+	mutex_lock(&mvm->d0i3_suspend_mutex);
+	if (test_bit(D0I3_DEFER_WAKEUP, &mvm->d0i3_suspend_flags)) {
+		IWL_DEBUG_RPM(mvm, "Deferring d0i3 exit until resume\n");
+		__set_bit(D0I3_PENDING_WAKEUP, &mvm->d0i3_suspend_flags);
+		mutex_unlock(&mvm->d0i3_suspend_mutex);
+		return 0;
+	}
+	mutex_unlock(&mvm->d0i3_suspend_mutex);
+
 	ret = iwl_mvm_send_cmd_pdu(mvm, D0I3_END_CMD, flags, 0, NULL);
 	if (ret)
 		goto out;
@@ -1183,6 +1234,25 @@
 	return ret;
 }
 
+static int iwl_mvm_exit_d0i3(struct iwl_op_mode *op_mode)
+{
+	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+
+	iwl_mvm_ref(mvm, IWL_MVM_REF_EXIT_WORK);
+	return _iwl_mvm_exit_d0i3(mvm);
+}
+
+static void iwl_mvm_napi_add(struct iwl_op_mode *op_mode,
+			     struct napi_struct *napi,
+			     struct net_device *napi_dev,
+			     int (*poll)(struct napi_struct *, int),
+			     int weight)
+{
+	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+
+	ieee80211_napi_add(mvm->hw, napi, napi_dev, poll, weight);
+}
+
 static const struct iwl_op_mode_ops iwl_mvm_ops = {
 	.start = iwl_op_mode_mvm_start,
 	.stop = iwl_op_mode_mvm_stop,
@@ -1196,4 +1266,5 @@
 	.nic_config = iwl_mvm_nic_config,
 	.enter_d0i3 = iwl_mvm_enter_d0i3,
 	.exit_d0i3 = iwl_mvm_exit_d0i3,
+	.napi_add = iwl_mvm_napi_add,
 };

diff --git a/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c
index 237efe0..539f3a9 100644
--- a/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c

@@ -156,6 +156,18 @@
 	idle_cnt = chains_static;
 	active_cnt = chains_dynamic;
 
+	/* In scenarios where we only ever use a single-stream rates,
+	 * i.e. legacy 11b/g/a associations, single-stream APs or even
+	 * static SMPS, enable both chains to get diversity, improving
+	 * the case where we're far enough from the AP that attenuation
+	 * between the two antennas is sufficiently different to impact
+	 * performance.
+	 */
+	if (active_cnt == 1 && iwl_mvm_rx_diversity_allowed(mvm)) {
+		idle_cnt = 2;
+		active_cnt = 2;
+	}
+
 	cmd->rxchain_info = cpu_to_le32(mvm->fw->valid_rx_ant <<
 					PHY_RX_CHAIN_VALID_POS);
 	cmd->rxchain_info |= cpu_to_le32(idle_cnt << PHY_RX_CHAIN_CNT_POS);
@@ -187,7 +199,7 @@
 	iwl_mvm_phy_ctxt_cmd_data(mvm, &cmd, chandef,
 				  chains_static, chains_dynamic);
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD, CMD_SYNC,
+	ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD, 0,
 				   sizeof(struct iwl_phy_context_cmd),
 				   &cmd);
 	if (ret)
@@ -202,18 +214,15 @@
 			 struct cfg80211_chan_def *chandef,
 			 u8 chains_static, u8 chains_dynamic)
 {
-	int ret;
-
 	WARN_ON(!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
 		ctxt->ref);
 	lockdep_assert_held(&mvm->mutex);
 
 	ctxt->channel = chandef->chan;
-	ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
-				     chains_static, chains_dynamic,
-				     FW_CTXT_ACTION_ADD, 0);
 
-	return ret;
+	return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
+				      chains_static, chains_dynamic,
+				      FW_CTXT_ACTION_ADD, 0);
 }
 
 /*

diff --git a/drivers/net/wireless/iwlwifi/mvm/power.c b/drivers/net/wireless/iwlwifi/mvm/power.c
index 6b636ea..c182a8b 100644
--- a/drivers/net/wireless/iwlwifi/mvm/power.c
+++ b/drivers/net/wireless/iwlwifi/mvm/power.c

@@ -123,28 +123,6 @@
 	cmd->ba_enable_beacon_abort = cpu_to_le32(mvmvif->bf_data.ba_enabled);
 }
 
-int iwl_mvm_update_beacon_abort(struct iwl_mvm *mvm,
-				struct ieee80211_vif *vif, bool enable)
-{
-	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	struct iwl_beacon_filter_cmd cmd = {
-		IWL_BF_CMD_CONFIG_DEFAULTS,
-		.bf_enable_beacon_filter = cpu_to_le32(1),
-		.ba_enable_beacon_abort = cpu_to_le32(enable),
-	};
-
-	if (!mvmvif->bf_data.bf_enabled)
-		return 0;
-
-	if (mvm->cur_ucode == IWL_UCODE_WOWLAN)
-		cmd.ba_escape_timer = cpu_to_le32(IWL_BA_ESCAPE_TIMER_D3);
-
-	mvmvif->bf_data.ba_enabled = enable;
-	iwl_mvm_beacon_filter_set_cqm_params(mvm, vif, &cmd);
-	iwl_mvm_beacon_filter_debugfs_parameters(vif, &cmd);
-	return iwl_mvm_beacon_filter_send_cmd(mvm, &cmd, CMD_SYNC);
-}
-
 static void iwl_mvm_power_log(struct iwl_mvm *mvm,
 			      struct iwl_mac_power_cmd *cmd)
 {
@@ -268,6 +246,57 @@
 		IWL_MVM_PS_HEAVY_RX_THLD_PERCENT;
 }
 
+static void iwl_mvm_binding_iterator(void *_data, u8 *mac,
+				      struct ieee80211_vif *vif)
+{
+	unsigned long *data = _data;
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+	if (!mvmvif->phy_ctxt)
+		return;
+
+	if (vif->type == NL80211_IFTYPE_STATION ||
+	    vif->type == NL80211_IFTYPE_AP)
+		__set_bit(mvmvif->phy_ctxt->id, data);
+}
+
+static bool iwl_mvm_power_allow_uapsd(struct iwl_mvm *mvm,
+				       struct ieee80211_vif *vif)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	unsigned long phy_ctxt_counter = 0;
+
+	ieee80211_iterate_active_interfaces_atomic(mvm->hw,
+						   IEEE80211_IFACE_ITER_NORMAL,
+						   iwl_mvm_binding_iterator,
+						   &phy_ctxt_counter);
+
+	if (!memcmp(mvmvif->uapsd_misbehaving_bssid, vif->bss_conf.bssid,
+		    ETH_ALEN))
+		return false;
+
+	if (vif->p2p &&
+	    !(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD))
+		return false;
+	/*
+	 * Avoid using uAPSD if P2P client is associated to GO that uses
+	 * opportunistic power save. This is due to current FW limitation.
+	 */
+	if (vif->p2p &&
+	    (vif->bss_conf.p2p_noa_attr.oppps_ctwindow &
+	    IEEE80211_P2P_OPPPS_ENABLE_BIT))
+		return false;
+
+	/*
+	 * Avoid using uAPSD if client is in DCM -
+	 * low latency issue in Miracast
+	 */
+	if (hweight8(phy_ctxt_counter) >= 2)
+		return false;
+
+	return true;
+}
+
 static void iwl_mvm_power_build_cmd(struct iwl_mvm *mvm,
 				    struct ieee80211_vif *vif,
 				    struct iwl_mac_power_cmd *cmd)
@@ -280,7 +309,6 @@
 	bool radar_detect = false;
 	struct iwl_mvm_vif *mvmvif __maybe_unused =
 		iwl_mvm_vif_from_mac80211(vif);
-	bool allow_uapsd = true;
 
 	cmd->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
 							    mvmvif->color));
@@ -303,13 +331,8 @@
 
 	cmd->flags |= cpu_to_le16(POWER_FLAGS_POWER_SAVE_ENA_MSK);
 
-#ifdef CONFIG_IWLWIFI_DEBUGFS
-	if (mvmvif->dbgfs_pm.mask & MVM_DEBUGFS_PM_DISABLE_POWER_OFF &&
-	    mvmvif->dbgfs_pm.disable_power_off)
-		cmd->flags &= cpu_to_le16(~POWER_FLAGS_POWER_SAVE_ENA_MSK);
-#endif
 	if (!vif->bss_conf.ps || iwl_mvm_vif_low_latency(mvmvif) ||
-	    mvm->pm_disabled)
+	    !mvmvif->pm_enabled)
 		return;
 
 	cmd->flags |= cpu_to_le16(POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK);
@@ -351,23 +374,7 @@
 			cpu_to_le32(IWL_MVM_WOWLAN_PS_TX_DATA_TIMEOUT);
 	}
 
-	if (!memcmp(mvmvif->uapsd_misbehaving_bssid, vif->bss_conf.bssid,
-		    ETH_ALEN))
-		allow_uapsd = false;
-
-	if (vif->p2p &&
-	    !(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD))
-		allow_uapsd = false;
-	/*
-	 * Avoid using uAPSD if P2P client is associated to GO that uses
-	 * opportunistic power save. This is due to current FW limitation.
-	 */
-	if (vif->p2p &&
-	    vif->bss_conf.p2p_noa_attr.oppps_ctwindow &
-	    IEEE80211_P2P_OPPPS_ENABLE_BIT)
-		allow_uapsd = false;
-
-	if (allow_uapsd)
+	if (iwl_mvm_power_allow_uapsd(mvm, vif))
 		iwl_mvm_power_configure_uapsd(mvm, vif, cmd);
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
@@ -421,20 +428,13 @@
 {
 	struct iwl_mac_power_cmd cmd = {};
 
-	if (vif->type != NL80211_IFTYPE_STATION)
-		return 0;
-
-	if (vif->p2p &&
-	    !(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM))
-		return 0;
-
 	iwl_mvm_power_build_cmd(mvm, vif, &cmd);
 	iwl_mvm_power_log(mvm, &cmd);
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	memcpy(&iwl_mvm_vif_from_mac80211(vif)->mac_pwr_cmd, &cmd, sizeof(cmd));
 #endif
 
-	return iwl_mvm_send_cmd_pdu(mvm, MAC_PM_POWER_TABLE, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, MAC_PM_POWER_TABLE, 0,
 				    sizeof(cmd), &cmd);
 }
 
@@ -444,12 +444,6 @@
 		.flags = cpu_to_le16(DEVICE_POWER_FLAGS_POWER_SAVE_ENA_MSK),
 	};
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT))
-		return 0;
-
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD))
-		return 0;
-
 	if (iwlmvm_mod_params.power_scheme == IWL_POWER_SCHEME_CAM)
 		mvm->ps_disabled = true;
 
@@ -466,7 +460,7 @@
 			"Sending device power command with flags = 0x%X\n",
 			cmd.flags);
 
-	return iwl_mvm_send_cmd_pdu(mvm, POWER_TABLE_CMD, CMD_SYNC, sizeof(cmd),
+	return iwl_mvm_send_cmd_pdu(mvm, POWER_TABLE_CMD, 0, sizeof(cmd),
 				    &cmd);
 }
 
@@ -508,86 +502,69 @@
 	return 0;
 }
 
-struct iwl_power_constraint {
+struct iwl_power_vifs {
 	struct ieee80211_vif *bf_vif;
 	struct ieee80211_vif *bss_vif;
 	struct ieee80211_vif *p2p_vif;
-	u16 bss_phyctx_id;
-	u16 p2p_phyctx_id;
-	bool pm_disabled;
-	bool ps_disabled;
-	struct iwl_mvm *mvm;
+	struct ieee80211_vif *ap_vif;
+	struct ieee80211_vif *monitor_vif;
+	bool p2p_active;
+	bool bss_active;
+	bool ap_active;
+	bool monitor_active;
 };
 
 static void iwl_mvm_power_iterator(void *_data, u8 *mac,
 				   struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	struct iwl_power_constraint *power_iterator = _data;
-	struct iwl_mvm *mvm = power_iterator->mvm;
+	struct iwl_power_vifs *power_iterator = _data;
 
+	mvmvif->pm_enabled = false;
 	switch (ieee80211_vif_type_p2p(vif)) {
 	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
 
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_AP:
-		/* no BSS power mgmt if we have an active AP */
-		if (mvmvif->ap_ibss_active)
-			power_iterator->pm_disabled = true;
+		/* only a single MAC of the same type */
+		WARN_ON(power_iterator->ap_vif);
+		power_iterator->ap_vif = vif;
+		if (mvmvif->phy_ctxt)
+			if (mvmvif->phy_ctxt->id < MAX_PHYS)
+				power_iterator->ap_active = true;
 		break;
 
 	case NL80211_IFTYPE_MONITOR:
-		/* no BSS power mgmt and no device power save */
-		power_iterator->pm_disabled = true;
-		power_iterator->ps_disabled = true;
+		/* only a single MAC of the same type */
+		WARN_ON(power_iterator->monitor_vif);
+		power_iterator->monitor_vif = vif;
+		if (mvmvif->phy_ctxt)
+			if (mvmvif->phy_ctxt->id < MAX_PHYS)
+				power_iterator->monitor_active = true;
 		break;
 
 	case NL80211_IFTYPE_P2P_CLIENT:
-		if (mvmvif->phy_ctxt)
-			power_iterator->p2p_phyctx_id = mvmvif->phy_ctxt->id;
-
-		/* we should have only one P2P vif */
+		/* only a single MAC of the same type */
 		WARN_ON(power_iterator->p2p_vif);
 		power_iterator->p2p_vif = vif;
-
-		IWL_DEBUG_POWER(mvm, "p2p: p2p_id=%d, bss_id=%d\n",
-				power_iterator->p2p_phyctx_id,
-				power_iterator->bss_phyctx_id);
-		if (!(mvm->fw->ucode_capa.flags &
-		      IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) {
-			/* no BSS power mgmt if we have a P2P client*/
-			power_iterator->pm_disabled = true;
-		} else if (power_iterator->p2p_phyctx_id < MAX_PHYS &&
-			   power_iterator->bss_phyctx_id < MAX_PHYS &&
-			   power_iterator->p2p_phyctx_id ==
-			   power_iterator->bss_phyctx_id) {
-			power_iterator->pm_disabled = true;
-		}
+		if (mvmvif->phy_ctxt)
+			if (mvmvif->phy_ctxt->id < MAX_PHYS)
+				power_iterator->p2p_active = true;
 		break;
 
 	case NL80211_IFTYPE_STATION:
-		if (mvmvif->phy_ctxt)
-			power_iterator->bss_phyctx_id = mvmvif->phy_ctxt->id;
-
-		/* we should have only one BSS vif */
+		/* only a single MAC of the same type */
 		WARN_ON(power_iterator->bss_vif);
 		power_iterator->bss_vif = vif;
+		if (mvmvif->phy_ctxt)
+			if (mvmvif->phy_ctxt->id < MAX_PHYS)
+				power_iterator->bss_active = true;
 
 		if (mvmvif->bf_data.bf_enabled &&
 		    !WARN_ON(power_iterator->bf_vif))
 			power_iterator->bf_vif = vif;
 
-		IWL_DEBUG_POWER(mvm, "bss: p2p_id=%d, bss_id=%d\n",
-				power_iterator->p2p_phyctx_id,
-				power_iterator->bss_phyctx_id);
-		if (mvm->fw->ucode_capa.flags &
-		    IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM &&
-			(power_iterator->p2p_phyctx_id < MAX_PHYS &&
-			 power_iterator->bss_phyctx_id < MAX_PHYS &&
-			 power_iterator->p2p_phyctx_id ==
-			 power_iterator->bss_phyctx_id))
-			power_iterator->pm_disabled = true;
 		break;
 
 	default:
@@ -596,70 +573,73 @@
 }
 
 static void
-iwl_mvm_power_get_global_constraint(struct iwl_mvm *mvm,
-				    struct iwl_power_constraint *constraint)
+iwl_mvm_power_set_pm(struct iwl_mvm *mvm,
+				    struct iwl_power_vifs *vifs)
 {
+	struct iwl_mvm_vif *bss_mvmvif = NULL;
+	struct iwl_mvm_vif *p2p_mvmvif = NULL;
+	struct iwl_mvm_vif *ap_mvmvif = NULL;
+	bool client_same_channel = false;
+	bool ap_same_channel = false;
+
 	lockdep_assert_held(&mvm->mutex);
 
-	if (iwlmvm_mod_params.power_scheme == IWL_POWER_SCHEME_CAM) {
-		constraint->pm_disabled = true;
-		constraint->ps_disabled = true;
-	}
-
+	/* get vifs info + set pm_enable to false */
 	ieee80211_iterate_active_interfaces_atomic(mvm->hw,
 					    IEEE80211_IFACE_ITER_NORMAL,
-					    iwl_mvm_power_iterator, constraint);
-}
+					    iwl_mvm_power_iterator, vifs);
 
-int iwl_mvm_power_update_mac(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
-{
-	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	struct iwl_power_constraint constraint = {
-		    .p2p_phyctx_id = MAX_PHYS,
-		    .bss_phyctx_id = MAX_PHYS,
-		    .mvm = mvm,
-	};
-	bool ba_enable;
-	int ret;
+	if (vifs->bss_vif)
+		bss_mvmvif = iwl_mvm_vif_from_mac80211(vifs->bss_vif);
 
-	lockdep_assert_held(&mvm->mutex);
+	if (vifs->p2p_vif)
+		p2p_mvmvif = iwl_mvm_vif_from_mac80211(vifs->p2p_vif);
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT))
-		return 0;
+	if (vifs->ap_vif)
+		ap_mvmvif = iwl_mvm_vif_from_mac80211(vifs->ap_vif);
 
-	iwl_mvm_power_get_global_constraint(mvm, &constraint);
-	mvm->ps_disabled = constraint.ps_disabled;
-	mvm->pm_disabled = constraint.pm_disabled;
-
-	/* don't update device power state unless we add / remove monitor */
-	if (vif->type == NL80211_IFTYPE_MONITOR) {
-		ret = iwl_mvm_power_update_device(mvm);
-		if (ret)
-			return ret;
+	/* enable PM on bss if bss stand alone */
+	if (vifs->bss_active && !vifs->p2p_active && !vifs->ap_active) {
+		bss_mvmvif->pm_enabled = true;
+		return;
 	}
 
-	if (constraint.bss_vif) {
-		ret = iwl_mvm_power_send_cmd(mvm, constraint.bss_vif);
-		if (ret)
-			return ret;
+	/* enable PM on p2p if p2p stand alone */
+	if (vifs->p2p_active && !vifs->bss_active && !vifs->ap_active) {
+		if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)
+			p2p_mvmvif->pm_enabled = true;
+		return;
 	}
 
-	if (constraint.p2p_vif) {
-		ret = iwl_mvm_power_send_cmd(mvm, constraint.p2p_vif);
-		if (ret)
-			return ret;
+	if (vifs->bss_active && vifs->p2p_active)
+		client_same_channel = (bss_mvmvif->phy_ctxt->id ==
+				       p2p_mvmvif->phy_ctxt->id);
+	if (vifs->bss_active && vifs->ap_active)
+		ap_same_channel = (bss_mvmvif->phy_ctxt->id ==
+				   ap_mvmvif->phy_ctxt->id);
+
+	/* clients are not stand alone: enable PM if DCM */
+	if (!(client_same_channel || ap_same_channel) &&
+	    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) {
+		if (vifs->bss_active)
+			bss_mvmvif->pm_enabled = true;
+		if (vifs->p2p_active &&
+		    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM))
+			p2p_mvmvif->pm_enabled = true;
+		return;
 	}
 
-	if (!constraint.bf_vif)
-		return 0;
-
-	vif = constraint.bf_vif;
-	mvmvif = iwl_mvm_vif_from_mac80211(vif);
-
-	ba_enable = !(constraint.pm_disabled || constraint.ps_disabled ||
-		      !vif->bss_conf.ps || iwl_mvm_vif_low_latency(mvmvif));
-
-	return iwl_mvm_update_beacon_abort(mvm, constraint.bf_vif, ba_enable);
+	/*
+	 * There is only one channel in the system and there are only
+	 * bss and p2p clients that share it
+	 */
+	if (client_same_channel && !vifs->ap_active &&
+	    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM)) {
+		/* share same channel*/
+		bss_mvmvif->pm_enabled = true;
+		if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)
+			p2p_mvmvif->pm_enabled = true;
+	}
 }
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
@@ -671,19 +651,10 @@
 	struct iwl_mac_power_cmd cmd = {};
 	int pos = 0;
 
-	if (WARN_ON(!(mvm->fw->ucode_capa.flags &
-		      IWL_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)))
-		return 0;
-
 	mutex_lock(&mvm->mutex);
 	memcpy(&cmd, &mvmvif->mac_pwr_cmd, sizeof(cmd));
 	mutex_unlock(&mvm->mutex);
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DEVICE_PS_CMD))
-		pos += scnprintf(buf+pos, bufsz-pos, "disable_power_off = %d\n",
-				 (cmd.flags &
-				 cpu_to_le16(POWER_FLAGS_POWER_SAVE_ENA_MSK)) ?
-				 0 : 1);
 	pos += scnprintf(buf+pos, bufsz-pos, "power_scheme = %d\n",
 			 iwlmvm_mod_params.power_scheme);
 	pos += scnprintf(buf+pos, bufsz-pos, "flags = 0x%x\n",
@@ -790,7 +761,7 @@
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	int ret;
 
-	if (mvmvif != mvm->bf_allowed_vif ||
+	if (mvmvif != mvm->bf_allowed_vif || !vif->bss_conf.dtim_period ||
 	    vif->type != NL80211_IFTYPE_STATION || vif->p2p)
 		return 0;
 
@@ -818,6 +789,26 @@
 	return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, flags, false);
 }
 
+static int iwl_mvm_update_beacon_abort(struct iwl_mvm *mvm,
+				       struct ieee80211_vif *vif,
+				       bool enable)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct iwl_beacon_filter_cmd cmd = {
+		IWL_BF_CMD_CONFIG_DEFAULTS,
+		.bf_enable_beacon_filter = cpu_to_le32(1),
+	};
+
+	if (!mvmvif->bf_data.bf_enabled)
+		return 0;
+
+	if (mvm->cur_ucode == IWL_UCODE_WOWLAN)
+		cmd.ba_escape_timer = cpu_to_le32(IWL_BA_ESCAPE_TIMER_D3);
+
+	mvmvif->bf_data.ba_enabled = enable;
+	return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, 0, false);
+}
+
 int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
 				  struct ieee80211_vif *vif,
 				  u32 flags)
@@ -826,8 +817,7 @@
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	int ret;
 
-	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BF_UPDATED) ||
-	    vif->type != NL80211_IFTYPE_STATION || vif->p2p)
+	if (vif->type != NL80211_IFTYPE_STATION || vif->p2p)
 		return 0;
 
 	ret = iwl_mvm_beacon_filter_send_cmd(mvm, &cmd, flags);
@@ -838,6 +828,55 @@
 	return ret;
 }
 
+int iwl_mvm_power_update_mac(struct iwl_mvm *mvm)
+{
+	struct iwl_mvm_vif *mvmvif;
+	struct iwl_power_vifs vifs = {};
+	bool ba_enable;
+	int ret;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	iwl_mvm_power_set_pm(mvm, &vifs);
+
+	/* disable PS if CAM */
+	if (iwlmvm_mod_params.power_scheme == IWL_POWER_SCHEME_CAM) {
+		mvm->ps_disabled = true;
+	} else {
+	/* don't update device power state unless we add / remove monitor */
+		if (vifs.monitor_vif) {
+			if (vifs.monitor_active)
+				mvm->ps_disabled = true;
+			ret = iwl_mvm_power_update_device(mvm);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (vifs.bss_vif) {
+		ret = iwl_mvm_power_send_cmd(mvm, vifs.bss_vif);
+		if (ret)
+			return ret;
+	}
+
+	if (vifs.p2p_vif) {
+		ret = iwl_mvm_power_send_cmd(mvm, vifs.p2p_vif);
+		if (ret)
+			return ret;
+	}
+
+	if (!vifs.bf_vif)
+		return 0;
+
+	mvmvif = iwl_mvm_vif_from_mac80211(vifs.bf_vif);
+
+	ba_enable = !(!mvmvif->pm_enabled || mvm->ps_disabled ||
+		      !vifs.bf_vif->bss_conf.ps ||
+		      iwl_mvm_vif_low_latency(mvmvif));
+
+	return iwl_mvm_update_beacon_abort(mvm, vifs.bf_vif, ba_enable);
+}
+
 int iwl_mvm_update_d0i3_power_mode(struct iwl_mvm *mvm,
 				   struct ieee80211_vif *vif,
 				   bool enable, u32 flags)
@@ -861,9 +900,10 @@
 		if (WARN_ON(!dtimper_msec))
 			return 0;
 
-		cmd.flags |=
-			cpu_to_le16(POWER_FLAGS_SKIP_OVER_DTIM_MSK);
 		cmd.skip_dtim_periods = 300 / dtimper_msec;
+		if (cmd.skip_dtim_periods)
+			cmd.flags |=
+				cpu_to_le16(POWER_FLAGS_SKIP_OVER_DTIM_MSK);
 	}
 	iwl_mvm_power_log(mvm, &cmd);
 #ifdef CONFIG_IWLWIFI_DEBUGFS
@@ -894,33 +934,3 @@
 
 	return ret;
 }
-
-int iwl_mvm_update_beacon_filter(struct iwl_mvm *mvm,
-				 struct ieee80211_vif *vif,
-				 bool force,
-				 u32 flags)
-{
-	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-
-	if (mvmvif != mvm->bf_allowed_vif)
-		return 0;
-
-	if (!mvmvif->bf_data.bf_enabled) {
-		/* disable beacon filtering explicitly if force is true */
-		if (force)
-			return iwl_mvm_disable_beacon_filter(mvm, vif, flags);
-		return 0;
-	}
-
-	return iwl_mvm_enable_beacon_filter(mvm, vif, flags);
-}
-
-int iwl_power_legacy_set_cam_mode(struct iwl_mvm *mvm)
-{
-	struct iwl_powertable_cmd cmd = {
-		.keep_alive_seconds = POWER_KEEP_ALIVE_PERIOD_SEC,
-	};
-
-	return iwl_mvm_send_cmd_pdu(mvm, POWER_TABLE_CMD, CMD_SYNC,
-				    sizeof(cmd), &cmd);
-}

diff --git a/drivers/net/wireless/iwlwifi/mvm/quota.c b/drivers/net/wireless/iwlwifi/mvm/quota.c
index 35e86e0..ba68d7b 100644
--- a/drivers/net/wireless/iwlwifi/mvm/quota.c
+++ b/drivers/net/wireless/iwlwifi/mvm/quota.c

@@ -285,7 +285,7 @@
 
 	iwl_mvm_adjust_quota_for_noa(mvm, &cmd);
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, CMD_SYNC,
+	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0,
 				   sizeof(cmd), &cmd);
 	if (ret)
 		IWL_ERR(mvm, "Failed to send quota: %d\n", ret);

diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c
index e1c8388..306a6ca 100644
--- a/drivers/net/wireless/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/iwlwifi/mvm/rs.c

@@ -211,7 +211,7 @@
 		.next_columns = {
 			RS_COLUMN_LEGACY_ANT_B,
 			RS_COLUMN_SISO_ANT_A,
-			RS_COLUMN_SISO_ANT_B,
+			RS_COLUMN_MIMO2,
 			RS_COLUMN_INVALID,
 			RS_COLUMN_INVALID,
 			RS_COLUMN_INVALID,
@@ -223,8 +223,8 @@
 		.ant = ANT_B,
 		.next_columns = {
 			RS_COLUMN_LEGACY_ANT_A,
-			RS_COLUMN_SISO_ANT_A,
 			RS_COLUMN_SISO_ANT_B,
+			RS_COLUMN_MIMO2,
 			RS_COLUMN_INVALID,
 			RS_COLUMN_INVALID,
 			RS_COLUMN_INVALID,
@@ -238,10 +238,10 @@
 			RS_COLUMN_SISO_ANT_B,
 			RS_COLUMN_MIMO2,
 			RS_COLUMN_SISO_ANT_A_SGI,
-			RS_COLUMN_SISO_ANT_B_SGI,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
 			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_siso_allow,
@@ -254,10 +254,10 @@
 			RS_COLUMN_SISO_ANT_A,
 			RS_COLUMN_MIMO2,
 			RS_COLUMN_SISO_ANT_B_SGI,
-			RS_COLUMN_SISO_ANT_A_SGI,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
 			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_siso_allow,
@@ -271,10 +271,10 @@
 			RS_COLUMN_SISO_ANT_B_SGI,
 			RS_COLUMN_MIMO2_SGI,
 			RS_COLUMN_SISO_ANT_A,
-			RS_COLUMN_SISO_ANT_B,
-			RS_COLUMN_MIMO2,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_siso_allow,
@@ -289,10 +289,10 @@
 			RS_COLUMN_SISO_ANT_A_SGI,
 			RS_COLUMN_MIMO2_SGI,
 			RS_COLUMN_SISO_ANT_B,
-			RS_COLUMN_SISO_ANT_A,
-			RS_COLUMN_MIMO2,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_siso_allow,
@@ -304,12 +304,12 @@
 		.ant = ANT_AB,
 		.next_columns = {
 			RS_COLUMN_SISO_ANT_A,
-			RS_COLUMN_SISO_ANT_B,
-			RS_COLUMN_SISO_ANT_A_SGI,
-			RS_COLUMN_SISO_ANT_B_SGI,
 			RS_COLUMN_MIMO2_SGI,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_mimo_allow,
@@ -321,12 +321,12 @@
 		.sgi = true,
 		.next_columns = {
 			RS_COLUMN_SISO_ANT_A_SGI,
-			RS_COLUMN_SISO_ANT_B_SGI,
-			RS_COLUMN_SISO_ANT_A,
-			RS_COLUMN_SISO_ANT_B,
 			RS_COLUMN_MIMO2,
 			RS_COLUMN_LEGACY_ANT_A,
 			RS_COLUMN_LEGACY_ANT_B,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
+			RS_COLUMN_INVALID,
 		},
 		.checks = {
 			rs_mimo_allow,
@@ -527,6 +527,9 @@
 	IWL_DEBUG_RATE(mvm, "Clearing up window stats\n");
 	for (i = 0; i < IWL_RATE_COUNT; i++)
 		rs_rate_scale_clear_window(&tbl->win[i]);
+
+	for (i = 0; i < ARRAY_SIZE(tbl->tpc_win); i++)
+		rs_rate_scale_clear_window(&tbl->tpc_win[i]);
 }
 
 static inline u8 rs_is_valid_ant(u8 valid_antenna, u8 ant_type)
@@ -656,17 +659,34 @@
 	return 0;
 }
 
-static int rs_collect_tx_data(struct iwl_scale_tbl_info *tbl,
-			      int scale_index, int attempts, int successes)
+static int rs_collect_tx_data(struct iwl_lq_sta *lq_sta,
+			      struct iwl_scale_tbl_info *tbl,
+			      int scale_index, int attempts, int successes,
+			      u8 reduced_txp)
 {
 	struct iwl_rate_scale_data *window = NULL;
+	int ret;
 
 	if (scale_index < 0 || scale_index >= IWL_RATE_COUNT)
 		return -EINVAL;
 
+	if (tbl->column != RS_COLUMN_INVALID) {
+		lq_sta->tx_stats[tbl->column][scale_index].total += attempts;
+		lq_sta->tx_stats[tbl->column][scale_index].success += successes;
+	}
+
 	/* Select window for current tx bit rate */
 	window = &(tbl->win[scale_index]);
 
+	ret = _rs_collect_tx_data(tbl, scale_index, attempts, successes,
+				  window);
+	if (ret)
+		return ret;
+
+	if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
+		return -EINVAL;
+
+	window = &tbl->tpc_win[reduced_txp];
 	return _rs_collect_tx_data(tbl, scale_index, attempts, successes,
 				   window);
 }
@@ -1000,6 +1020,7 @@
 	u32 ucode_rate;
 	struct rs_rate rate;
 	struct iwl_scale_tbl_info *curr_tbl, *other_tbl, *tmp_tbl;
+	u8 reduced_txp = (uintptr_t)info->status.status_driver_data[0];
 
 	/* Treat uninitialized rate scaling data same as non-existing. */
 	if (!lq_sta) {
@@ -1141,9 +1162,10 @@
 	if (info->flags & IEEE80211_TX_STAT_AMPDU) {
 		ucode_rate = le32_to_cpu(table->rs_table[0]);
 		rs_rate_from_ucode_rate(ucode_rate, info->band, &rate);
-		rs_collect_tx_data(curr_tbl, rate.index,
+		rs_collect_tx_data(lq_sta, curr_tbl, rate.index,
 				   info->status.ampdu_len,
-				   info->status.ampdu_ack_len);
+				   info->status.ampdu_ack_len,
+				   reduced_txp);
 
 		/* Update success/fail counts if not searching for new mode */
 		if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN) {
@@ -1176,8 +1198,9 @@
 			else
 				continue;
 
-			rs_collect_tx_data(tmp_tbl, rate.index, 1,
-					   i < retries ? 0 : legacy_success);
+			rs_collect_tx_data(lq_sta, tmp_tbl, rate.index, 1,
+					   i < retries ? 0 : legacy_success,
+					   reduced_txp);
 		}
 
 		/* Update success/fail counts if not searching for new mode */
@@ -1188,6 +1211,7 @@
 	}
 	/* The last TX rate is cached in lq_sta; it's set in if/else above */
 	lq_sta->last_rate_n_flags = ucode_rate;
+	IWL_DEBUG_RATE(mvm, "reduced txpower: %d\n", reduced_txp);
 done:
 	/* See if there's a better rate or modulation mode to try. */
 	if (sta && sta->supp_rates[sband->band])
@@ -1311,105 +1335,50 @@
 	tbl->expected_tpt = rs_get_expected_tpt_table(lq_sta, column, rate->bw);
 }
 
-/*
- * Find starting rate for new "search" high-throughput mode of modulation.
- * Goal is to find lowest expected rate (under perfect conditions) that is
- * above the current measured throughput of "active" mode, to give new mode
- * a fair chance to prove itself without too many challenges.
- *
- * This gets called when transitioning to more aggressive modulation
- * (i.e. legacy to SISO or MIMO, or SISO to MIMO), as well as less aggressive
- * (i.e. MIMO to SISO).  When moving to MIMO, bit rate will typically need
- * to decrease to match "active" throughput.  When moving from MIMO to SISO,
- * bit rate will typically need to increase, but not if performance was bad.
- */
 static s32 rs_get_best_rate(struct iwl_mvm *mvm,
 			    struct iwl_lq_sta *lq_sta,
 			    struct iwl_scale_tbl_info *tbl,	/* "search" */
-			    u16 rate_mask, s8 index)
+			    unsigned long rate_mask, s8 index)
 {
-	/* "active" values */
 	struct iwl_scale_tbl_info *active_tbl =
 	    &(lq_sta->lq_info[lq_sta->active_tbl]);
-	s32 active_sr = active_tbl->win[index].success_ratio;
-	s32 active_tpt = active_tbl->expected_tpt[index];
-	/* expected "search" throughput */
+	s32 success_ratio = active_tbl->win[index].success_ratio;
+	u16 expected_current_tpt = active_tbl->expected_tpt[index];
 	const u16 *tpt_tbl = tbl->expected_tpt;
-
-	s32 new_rate, high, low, start_hi;
 	u16 high_low;
-	s8 rate = index;
+	u32 target_tpt;
+	int rate_idx;
 
-	new_rate = high = low = start_hi = IWL_RATE_INVALID;
-
-	while (1) {
-		high_low = rs_get_adjacent_rate(mvm, rate, rate_mask,
-						tbl->rate.type);
-
-		low = high_low & 0xff;
-		high = (high_low >> 8) & 0xff;
-
-		/*
-		 * Lower the "search" bit rate, to give new "search" mode
-		 * approximately the same throughput as "active" if:
-		 *
-		 * 1) "Active" mode has been working modestly well (but not
-		 *    great), and expected "search" throughput (under perfect
-		 *    conditions) at candidate rate is above the actual
-		 *    measured "active" throughput (but less than expected
-		 *    "active" throughput under perfect conditions).
-		 * OR
-		 * 2) "Active" mode has been working perfectly or very well
-		 *    and expected "search" throughput (under perfect
-		 *    conditions) at candidate rate is above expected
-		 *    "active" throughput (under perfect conditions).
-		 */
-		if ((((100 * tpt_tbl[rate]) > lq_sta->last_tpt) &&
-		     ((active_sr > RS_SR_FORCE_DECREASE) &&
-		      (active_sr <= IWL_RATE_HIGH_TH) &&
-		      (tpt_tbl[rate] <= active_tpt))) ||
-		    ((active_sr >= IWL_RATE_SCALE_SWITCH) &&
-		     (tpt_tbl[rate] > active_tpt))) {
-			/* (2nd or later pass)
-			 * If we've already tried to raise the rate, and are
-			 * now trying to lower it, use the higher rate. */
-			if (start_hi != IWL_RATE_INVALID) {
-				new_rate = start_hi;
-				break;
-			}
-
-			new_rate = rate;
-
-			/* Loop again with lower rate */
-			if (low != IWL_RATE_INVALID)
-				rate = low;
-
-			/* Lower rate not available, use the original */
-			else
-				break;
-
-		/* Else try to raise the "search" rate to match "active" */
-		} else {
-			/* (2nd or later pass)
-			 * If we've already tried to lower the rate, and are
-			 * now trying to raise it, use the lower rate. */
-			if (new_rate != IWL_RATE_INVALID)
-				break;
-
-			/* Loop again with higher rate */
-			else if (high != IWL_RATE_INVALID) {
-				start_hi = high;
-				rate = high;
-
-			/* Higher rate not available, use the original */
-			} else {
-				new_rate = rate;
-				break;
-			}
-		}
+	if (success_ratio > RS_SR_NO_DECREASE) {
+		target_tpt = 100 * expected_current_tpt;
+		IWL_DEBUG_RATE(mvm,
+			       "SR %d high. Find rate exceeding EXPECTED_CURRENT %d\n",
+			       success_ratio, target_tpt);
+	} else {
+		target_tpt = lq_sta->last_tpt;
+		IWL_DEBUG_RATE(mvm,
+			       "SR %d not thag good. Find rate exceeding ACTUAL_TPT %d\n",
+			       success_ratio, target_tpt);
 	}
 
-	return new_rate;
+	rate_idx = find_first_bit(&rate_mask, BITS_PER_LONG);
+
+	while (rate_idx != IWL_RATE_INVALID) {
+		if (target_tpt < (100 * tpt_tbl[rate_idx]))
+			break;
+
+		high_low = rs_get_adjacent_rate(mvm, rate_idx, rate_mask,
+						tbl->rate.type);
+
+		rate_idx = (high_low >> 8) & 0xff;
+	}
+
+	IWL_DEBUG_RATE(mvm, "Best rate found %d target_tp %d expected_new %d\n",
+		       rate_idx, target_tpt,
+		       rate_idx != IWL_RATE_INVALID ?
+		       100 * tpt_tbl[rate_idx] : IWL_INVALID_VALUE);
+
+	return rate_idx;
 }
 
 static u32 rs_bw_from_sta_bw(struct ieee80211_sta *sta)
@@ -1584,7 +1553,7 @@
 
 		tpt = lq_sta->last_tpt / 100;
 		expected_tpt_tbl = rs_get_expected_tpt_table(lq_sta, next_col,
-							     tbl->rate.bw);
+						     rs_bw_from_sta_bw(sta));
 		if (WARN_ON_ONCE(!expected_tpt_tbl))
 			continue;
 
@@ -1625,7 +1594,7 @@
 	const struct rs_tx_column *curr_column = &rs_tx_columns[tbl->column];
 	u32 sz = (sizeof(struct iwl_scale_tbl_info) -
 		  (sizeof(struct iwl_rate_scale_data) * IWL_RATE_COUNT));
-	u16 rate_mask = 0;
+	unsigned long rate_mask = 0;
 	u32 rate_idx = 0;
 
 	memcpy(search_tbl, tbl, sz);
@@ -1667,7 +1636,7 @@
 		    !(BIT(rate_idx) & rate_mask)) {
 			IWL_DEBUG_RATE(mvm,
 				       "can not switch with index %d"
-				       " rate mask %x\n",
+				       " rate mask %lx\n",
 				       rate_idx, rate_mask);
 
 			goto err;
@@ -1769,6 +1738,203 @@
 	return action;
 }
 
+static void rs_get_adjacent_txp(struct iwl_mvm *mvm, int index,
+				int *weaker, int *stronger)
+{
+	*weaker = index + TPC_TX_POWER_STEP;
+	if (*weaker > TPC_MAX_REDUCTION)
+		*weaker = TPC_INVALID;
+
+	*stronger = index - TPC_TX_POWER_STEP;
+	if (*stronger < 0)
+		*stronger = TPC_INVALID;
+}
+
+static bool rs_tpc_allowed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+			   struct rs_rate *rate, enum ieee80211_band band)
+{
+	int index = rate->index;
+	bool cam = (iwlmvm_mod_params.power_scheme == IWL_POWER_SCHEME_CAM);
+	bool sta_ps_disabled = (vif->type == NL80211_IFTYPE_STATION &&
+				!vif->bss_conf.ps);
+
+	IWL_DEBUG_RATE(mvm, "cam: %d sta_ps_disabled %d\n",
+		       cam, sta_ps_disabled);
+	/*
+	 * allow tpc only if power management is enabled, or bt coex
+	 * activity grade allows it and we are on 2.4Ghz.
+	 */
+	if ((cam || sta_ps_disabled) &&
+	    !iwl_mvm_bt_coex_is_tpc_allowed(mvm, band))
+		return false;
+
+	IWL_DEBUG_RATE(mvm, "check rate, table type: %d\n", rate->type);
+	if (is_legacy(rate))
+		return index == IWL_RATE_54M_INDEX;
+	if (is_ht(rate))
+		return index == IWL_RATE_MCS_7_INDEX;
+	if (is_vht(rate))
+		return index == IWL_RATE_MCS_7_INDEX ||
+		       index == IWL_RATE_MCS_8_INDEX ||
+		       index == IWL_RATE_MCS_9_INDEX;
+
+	WARN_ON_ONCE(1);
+	return false;
+}
+
+enum tpc_action {
+	TPC_ACTION_STAY,
+	TPC_ACTION_DECREASE,
+	TPC_ACTION_INCREASE,
+	TPC_ACTION_NO_RESTIRCTION,
+};
+
+static enum tpc_action rs_get_tpc_action(struct iwl_mvm *mvm,
+					 s32 sr, int weak, int strong,
+					 int current_tpt,
+					 int weak_tpt, int strong_tpt)
+{
+	/* stay until we have valid tpt */
+	if (current_tpt == IWL_INVALID_VALUE) {
+		IWL_DEBUG_RATE(mvm, "no current tpt. stay.\n");
+		return TPC_ACTION_STAY;
+	}
+
+	/* Too many failures, increase txp */
+	if (sr <= TPC_SR_FORCE_INCREASE || current_tpt == 0) {
+		IWL_DEBUG_RATE(mvm, "increase txp because of weak SR\n");
+		return TPC_ACTION_NO_RESTIRCTION;
+	}
+
+	/* try decreasing first if applicable */
+	if (weak != TPC_INVALID) {
+		if (weak_tpt == IWL_INVALID_VALUE &&
+		    (strong_tpt == IWL_INVALID_VALUE ||
+		     current_tpt >= strong_tpt)) {
+			IWL_DEBUG_RATE(mvm,
+				       "no weak txp measurement. decrease txp\n");
+			return TPC_ACTION_DECREASE;
+		}
+
+		if (weak_tpt > current_tpt) {
+			IWL_DEBUG_RATE(mvm,
+				       "lower txp has better tpt. decrease txp\n");
+			return TPC_ACTION_DECREASE;
+		}
+	}
+
+	/* next, increase if needed */
+	if (sr < TPC_SR_NO_INCREASE && strong != TPC_INVALID) {
+		if (weak_tpt == IWL_INVALID_VALUE &&
+		    strong_tpt != IWL_INVALID_VALUE &&
+		    current_tpt < strong_tpt) {
+			IWL_DEBUG_RATE(mvm,
+				       "higher txp has better tpt. increase txp\n");
+			return TPC_ACTION_INCREASE;
+		}
+
+		if (weak_tpt < current_tpt &&
+		    (strong_tpt == IWL_INVALID_VALUE ||
+		     strong_tpt > current_tpt)) {
+			IWL_DEBUG_RATE(mvm,
+				       "lower txp has worse tpt. increase txp\n");
+			return TPC_ACTION_INCREASE;
+		}
+	}
+
+	IWL_DEBUG_RATE(mvm, "no need to increase or decrease txp - stay\n");
+	return TPC_ACTION_STAY;
+}
+
+static bool rs_tpc_perform(struct iwl_mvm *mvm,
+			   struct ieee80211_sta *sta,
+			   struct iwl_lq_sta *lq_sta,
+			   struct iwl_scale_tbl_info *tbl)
+{
+	struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;
+	struct ieee80211_vif *vif = mvm_sta->vif;
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	enum ieee80211_band band;
+	struct iwl_rate_scale_data *window;
+	struct rs_rate *rate = &tbl->rate;
+	enum tpc_action action;
+	s32 sr;
+	u8 cur = lq_sta->lq.reduced_tpc;
+	int current_tpt;
+	int weak, strong;
+	int weak_tpt = IWL_INVALID_VALUE, strong_tpt = IWL_INVALID_VALUE;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	if (lq_sta->dbg_fixed_txp_reduction <= TPC_MAX_REDUCTION) {
+		IWL_DEBUG_RATE(mvm, "fixed tpc: %d\n",
+			       lq_sta->dbg_fixed_txp_reduction);
+		lq_sta->lq.reduced_tpc = lq_sta->dbg_fixed_txp_reduction;
+		return cur != lq_sta->dbg_fixed_txp_reduction;
+	}
+#endif
+
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(vif->chanctx_conf);
+	if (WARN_ON(!chanctx_conf))
+		band = IEEE80211_NUM_BANDS;
+	else
+		band = chanctx_conf->def.chan->band;
+	rcu_read_unlock();
+
+	if (!rs_tpc_allowed(mvm, vif, rate, band)) {
+		IWL_DEBUG_RATE(mvm,
+			       "tpc is not allowed. remove txp restrictions\n");
+		lq_sta->lq.reduced_tpc = TPC_NO_REDUCTION;
+		return cur != TPC_NO_REDUCTION;
+	}
+
+	rs_get_adjacent_txp(mvm, cur, &weak, &strong);
+
+	/* Collect measured throughputs for current and adjacent rates */
+	window = tbl->tpc_win;
+	sr = window[cur].success_ratio;
+	current_tpt = window[cur].average_tpt;
+	if (weak != TPC_INVALID)
+		weak_tpt = window[weak].average_tpt;
+	if (strong != TPC_INVALID)
+		strong_tpt = window[strong].average_tpt;
+
+	IWL_DEBUG_RATE(mvm,
+		       "(TPC: %d): cur_tpt %d SR %d weak %d strong %d weak_tpt %d strong_tpt %d\n",
+		       cur, current_tpt, sr, weak, strong,
+		       weak_tpt, strong_tpt);
+
+	action = rs_get_tpc_action(mvm, sr, weak, strong,
+				   current_tpt, weak_tpt, strong_tpt);
+
+	/* override actions if we are on the edge */
+	if (weak == TPC_INVALID && action == TPC_ACTION_DECREASE) {
+		IWL_DEBUG_RATE(mvm, "already in lowest txp, stay\n");
+		action = TPC_ACTION_STAY;
+	} else if (strong == TPC_INVALID &&
+		   (action == TPC_ACTION_INCREASE ||
+		    action == TPC_ACTION_NO_RESTIRCTION)) {
+		IWL_DEBUG_RATE(mvm, "already in highest txp, stay\n");
+		action = TPC_ACTION_STAY;
+	}
+
+	switch (action) {
+	case TPC_ACTION_DECREASE:
+		lq_sta->lq.reduced_tpc = weak;
+		return true;
+	case TPC_ACTION_INCREASE:
+		lq_sta->lq.reduced_tpc = strong;
+		return true;
+	case TPC_ACTION_NO_RESTIRCTION:
+		lq_sta->lq.reduced_tpc = TPC_NO_REDUCTION;
+		return true;
+	case TPC_ACTION_STAY:
+		/* do nothing */
+		break;
+	}
+	return false;
+}
+
 /*
  * Do rate scaling and search for new modulation mode.
  */
@@ -2019,6 +2185,9 @@
 		break;
 	case RS_ACTION_STAY:
 		/* No change */
+		if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN)
+			update_lq = rs_tpc_perform(mvm, sta, lq_sta, tbl);
+		break;
 	default:
 		break;
 	}
@@ -2271,10 +2440,6 @@
 			if (i == IWL_RATE_9M_INDEX)
 				continue;
 
-			/* Disable MCS9 as a workaround */
-			if (i == IWL_RATE_MCS_9_INDEX)
-				continue;
-
 			/* VHT MCS9 isn't valid for 20Mhz for NSS=1,2 */
 			if (i == IWL_RATE_MCS_9_INDEX &&
 			    sta->bandwidth == IEEE80211_STA_RX_BW_20)
@@ -2293,10 +2458,6 @@
 			if (i == IWL_RATE_9M_INDEX)
 				continue;
 
-			/* Disable MCS9 as a workaround */
-			if (i == IWL_RATE_MCS_9_INDEX)
-				continue;
-
 			/* VHT MCS9 isn't valid for 20Mhz for NSS=1,2 */
 			if (i == IWL_RATE_MCS_9_INDEX &&
 			    sta->bandwidth == IEEE80211_STA_RX_BW_20)
@@ -2478,6 +2639,7 @@
 	lq_sta->is_agg = 0;
 #ifdef CONFIG_MAC80211_DEBUGFS
 	lq_sta->dbg_fixed_rate = 0;
+	lq_sta->dbg_fixed_txp_reduction = TPC_INVALID;
 #endif
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	iwl_mvm_reset_frame_stats(mvm, &mvm->drv_rx_stats);
@@ -2653,6 +2815,7 @@
 		rs_build_rates_table_from_fixed(mvm, lq_cmd,
 						lq_sta->band,
 						lq_sta->dbg_fixed_rate);
+		lq_cmd->reduced_tpc = 0;
 		ant = (lq_sta->dbg_fixed_rate & RATE_MCS_ANT_ABC_MSK) >>
 			RATE_MCS_ANT_POS;
 	} else
@@ -2783,7 +2946,6 @@
 	size_t buf_size;
 	u32 parsed_rate;
 
-
 	mvm = lq_sta->drv;
 	memset(buf, 0, sizeof(buf));
 	buf_size = min(count, sizeof(buf) -  1);
@@ -2856,6 +3018,7 @@
 			lq_sta->lq.agg_disable_start_th,
 			lq_sta->lq.agg_frame_cnt_limit);
 
+	desc += sprintf(buff+desc, "reduced tpc=%d\n", lq_sta->lq.reduced_tpc);
 	desc += sprintf(buff+desc,
 			"Start idx [0]=0x%x [1]=0x%x [2]=0x%x [3]=0x%x\n",
 			lq_sta->lq.initial_rate_index[0],
@@ -2928,6 +3091,94 @@
 	.llseek = default_llseek,
 };
 
+static ssize_t rs_sta_dbgfs_drv_tx_stats_read(struct file *file,
+					      char __user *user_buf,
+					      size_t count, loff_t *ppos)
+{
+	static const char * const column_name[] = {
+		[RS_COLUMN_LEGACY_ANT_A] = "LEGACY_ANT_A",
+		[RS_COLUMN_LEGACY_ANT_B] = "LEGACY_ANT_B",
+		[RS_COLUMN_SISO_ANT_A] = "SISO_ANT_A",
+		[RS_COLUMN_SISO_ANT_B] = "SISO_ANT_B",
+		[RS_COLUMN_SISO_ANT_A_SGI] = "SISO_ANT_A_SGI",
+		[RS_COLUMN_SISO_ANT_B_SGI] = "SISO_ANT_B_SGI",
+		[RS_COLUMN_MIMO2] = "MIMO2",
+		[RS_COLUMN_MIMO2_SGI] = "MIMO2_SGI",
+	};
+
+	static const char * const rate_name[] = {
+		[IWL_RATE_1M_INDEX] = "1M",
+		[IWL_RATE_2M_INDEX] = "2M",
+		[IWL_RATE_5M_INDEX] = "5.5M",
+		[IWL_RATE_11M_INDEX] = "11M",
+		[IWL_RATE_6M_INDEX] = "6M|MCS0",
+		[IWL_RATE_9M_INDEX] = "9M",
+		[IWL_RATE_12M_INDEX] = "12M|MCS1",
+		[IWL_RATE_18M_INDEX] = "18M|MCS2",
+		[IWL_RATE_24M_INDEX] = "24M|MCS3",
+		[IWL_RATE_36M_INDEX] = "36M|MCS4",
+		[IWL_RATE_48M_INDEX] = "48M|MCS5",
+		[IWL_RATE_54M_INDEX] = "54M|MCS6",
+		[IWL_RATE_MCS_7_INDEX] = "MCS7",
+		[IWL_RATE_MCS_8_INDEX] = "MCS8",
+		[IWL_RATE_MCS_9_INDEX] = "MCS9",
+	};
+
+	char *buff, *pos, *endpos;
+	int col, rate;
+	ssize_t ret;
+	struct iwl_lq_sta *lq_sta = file->private_data;
+	struct rs_rate_stats *stats;
+	static const size_t bufsz = 1024;
+
+	buff = kmalloc(bufsz, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+	pos = buff;
+	endpos = pos + bufsz;
+
+	pos += scnprintf(pos, endpos - pos, "COLUMN,");
+	for (rate = 0; rate < IWL_RATE_COUNT; rate++)
+		pos += scnprintf(pos, endpos - pos, "%s,", rate_name[rate]);
+	pos += scnprintf(pos, endpos - pos, "\n");
+
+	for (col = 0; col < RS_COLUMN_COUNT; col++) {
+		pos += scnprintf(pos, endpos - pos,
+				 "%s,", column_name[col]);
+
+		for (rate = 0; rate < IWL_RATE_COUNT; rate++) {
+			stats = &(lq_sta->tx_stats[col][rate]);
+			pos += scnprintf(pos, endpos - pos,
+					 "%llu/%llu,",
+					 stats->success,
+					 stats->total);
+		}
+		pos += scnprintf(pos, endpos - pos, "\n");
+	}
+
+	ret = simple_read_from_buffer(user_buf, count, ppos, buff, pos - buff);
+	kfree(buff);
+	return ret;
+}
+
+static ssize_t rs_sta_dbgfs_drv_tx_stats_write(struct file *file,
+					       const char __user *user_buf,
+					       size_t count, loff_t *ppos)
+{
+	struct iwl_lq_sta *lq_sta = file->private_data;
+	memset(lq_sta->tx_stats, 0, sizeof(lq_sta->tx_stats));
+
+	return count;
+}
+
+static const struct file_operations rs_sta_dbgfs_drv_tx_stats_ops = {
+	.read = rs_sta_dbgfs_drv_tx_stats_read,
+	.write = rs_sta_dbgfs_drv_tx_stats_write,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 static void rs_add_debugfs(void *mvm, void *mvm_sta, struct dentry *dir)
 {
 	struct iwl_lq_sta *lq_sta = mvm_sta;
@@ -2937,9 +3188,15 @@
 	lq_sta->rs_sta_dbgfs_stats_table_file =
 		debugfs_create_file("rate_stats_table", S_IRUSR, dir,
 				    lq_sta, &rs_sta_dbgfs_stats_table_ops);
+	lq_sta->rs_sta_dbgfs_drv_tx_stats_file =
+		debugfs_create_file("drv_tx_stats", S_IRUSR | S_IWUSR, dir,
+				    lq_sta, &rs_sta_dbgfs_drv_tx_stats_ops);
 	lq_sta->rs_sta_dbgfs_tx_agg_tid_en_file =
 		debugfs_create_u8("tx_agg_tid_enable", S_IRUSR | S_IWUSR, dir,
 				  &lq_sta->tx_agg_tid_en);
+	lq_sta->rs_sta_dbgfs_reduced_txp_file =
+		debugfs_create_u8("reduced_tpc", S_IRUSR | S_IWUSR, dir,
+				  &lq_sta->dbg_fixed_txp_reduction);
 }
 
 static void rs_remove_debugfs(void *mvm, void *mvm_sta)
@@ -2947,7 +3204,9 @@
 	struct iwl_lq_sta *lq_sta = mvm_sta;
 	debugfs_remove(lq_sta->rs_sta_dbgfs_scale_table_file);
 	debugfs_remove(lq_sta->rs_sta_dbgfs_stats_table_file);
+	debugfs_remove(lq_sta->rs_sta_dbgfs_drv_tx_stats_file);
 	debugfs_remove(lq_sta->rs_sta_dbgfs_tx_agg_tid_en_file);
+	debugfs_remove(lq_sta->rs_sta_dbgfs_reduced_txp_file);
 }
 #endif
 

diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.h b/drivers/net/wireless/iwlwifi/mvm/rs.h
index 0acfac9..374a83d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/rs.h
+++ b/drivers/net/wireless/iwlwifi/mvm/rs.h

@@ -158,6 +158,13 @@
 #define RS_SR_FORCE_DECREASE		1920	/*  15% */
 #define RS_SR_NO_DECREASE		10880	/*  85% */
 
+#define TPC_SR_FORCE_INCREASE		9600	/* 75% */
+#define TPC_SR_NO_INCREASE		10880	/* 85% */
+#define TPC_TX_POWER_STEP		3
+#define TPC_MAX_REDUCTION		15
+#define TPC_NO_REDUCTION		0
+#define TPC_INVALID			0xff
+
 #define LINK_QUAL_AGG_TIME_LIMIT_DEF	(4000) /* 4 milliseconds */
 #define LINK_QUAL_AGG_TIME_LIMIT_MAX	(8000)
 #define LINK_QUAL_AGG_TIME_LIMIT_MIN	(100)
@@ -266,9 +273,16 @@
 	RS_COLUMN_MIMO2_SGI,
 
 	RS_COLUMN_LAST = RS_COLUMN_MIMO2_SGI,
+	RS_COLUMN_COUNT = RS_COLUMN_LAST + 1,
 	RS_COLUMN_INVALID,
 };
 
+/* Packet stats per rate */
+struct rs_rate_stats {
+	u64 success;
+	u64 total;
+};
+
 /**
  * struct iwl_scale_tbl_info -- tx params and success history for all rates
  *
@@ -280,6 +294,8 @@
 	enum rs_column column;
 	const u16 *expected_tpt;	/* throughput metrics; expected_tpt_G, etc. */
 	struct iwl_rate_scale_data win[IWL_RATE_COUNT]; /* rate histories */
+	/* per txpower-reduction history */
+	struct iwl_rate_scale_data tpc_win[TPC_MAX_REDUCTION + 1];
 };
 
 enum {
@@ -315,6 +331,8 @@
 	bool is_vht;
 	enum ieee80211_band band;
 
+	struct rs_rate_stats tx_stats[RS_COLUMN_COUNT][IWL_RATE_COUNT];
+
 	/* The following are bitmaps of rates; IWL_RATE_6M_MASK, etc. */
 	unsigned long active_legacy_rate;
 	unsigned long active_siso_rate;
@@ -334,8 +352,11 @@
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct dentry *rs_sta_dbgfs_scale_table_file;
 	struct dentry *rs_sta_dbgfs_stats_table_file;
+	struct dentry *rs_sta_dbgfs_drv_tx_stats_file;
 	struct dentry *rs_sta_dbgfs_tx_agg_tid_en_file;
+	struct dentry *rs_sta_dbgfs_reduced_txp_file;
 	u32 dbg_fixed_rate;
+	u8 dbg_fixed_txp_reduction;
 #endif
 	struct iwl_mvm *drv;
 
@@ -345,6 +366,9 @@
 	u32 last_rate_n_flags;
 	/* packets destined for this STA are aggregated */
 	u8 is_agg;
+
+	/* tx power reduce for this sta */
+	int tpc_reduce;
 };
 
 /* Initialize station's rate scaling information after adding station */

diff --git a/drivers/net/wireless/iwlwifi/mvm/rx.c b/drivers/net/wireless/iwlwifi/mvm/rx.c
index 6061553..cf72769 100644
--- a/drivers/net/wireless/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/iwlwifi/mvm/rx.c

@@ -60,7 +60,6 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 #include "iwl-trans.h"
-
 #include "mvm.h"
 #include "fw-api.h"
 
@@ -130,42 +129,7 @@
 
 	memcpy(IEEE80211_SKB_RXCB(skb), stats, sizeof(*stats));
 
-	ieee80211_rx_ni(mvm->hw, skb);
-}
-
-static void iwl_mvm_calc_rssi(struct iwl_mvm *mvm,
-			      struct iwl_rx_phy_info *phy_info,
-			      struct ieee80211_rx_status *rx_status)
-{
-	int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm;
-	u32 agc_a, agc_b;
-	u32 val;
-
-	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_AGC_IDX]);
-	agc_a = (val & IWL_OFDM_AGC_A_MSK) >> IWL_OFDM_AGC_A_POS;
-	agc_b = (val & IWL_OFDM_AGC_B_MSK) >> IWL_OFDM_AGC_B_POS;
-
-	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_RSSI_AB_IDX]);
-	rssi_a = (val & IWL_OFDM_RSSI_INBAND_A_MSK) >> IWL_OFDM_RSSI_A_POS;
-	rssi_b = (val & IWL_OFDM_RSSI_INBAND_B_MSK) >> IWL_OFDM_RSSI_B_POS;
-
-	/*
-	 * dBm = rssi dB - agc dB - constant.
-	 * Higher AGC (higher radio gain) means lower signal.
-	 */
-	rssi_a_dbm = rssi_a - IWL_RSSI_OFFSET - agc_a;
-	rssi_b_dbm = rssi_b - IWL_RSSI_OFFSET - agc_b;
-	max_rssi_dbm = max_t(int, rssi_a_dbm, rssi_b_dbm);
-
-	IWL_DEBUG_STATS(mvm, "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n",
-			rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b);
-
-	rx_status->signal = max_rssi_dbm;
-	rx_status->chains = (le16_to_cpu(phy_info->phy_flags) &
-				RX_RES_PHY_FLAGS_ANTENNA)
-					>> RX_RES_PHY_FLAGS_ANTENNA_POS;
-	rx_status->chain_signal[0] = rssi_a_dbm;
-	rx_status->chain_signal[1] = rssi_b_dbm;
+	ieee80211_rx(mvm->hw, skb);
 }
 
 /*
@@ -337,10 +301,7 @@
 	 */
 	/*rx_status.flag |= RX_FLAG_MACTIME_MPDU;*/
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_RX_ENERGY_API)
-		iwl_mvm_get_signal_strength(mvm, phy_info, &rx_status);
-	else
-		iwl_mvm_calc_rssi(mvm, phy_info, &rx_status);
+	iwl_mvm_get_signal_strength(mvm, phy_info, &rx_status);
 
 	IWL_DEBUG_STATS_LIMIT(mvm, "Rssi %d, TSF %llu\n", rx_status.signal,
 			      (unsigned long long)rx_status.mactime);
@@ -394,6 +355,8 @@
 		rx_status.rate_idx = rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK;
 		rx_status.flag |= RX_FLAG_VHT;
 		rx_status.flag |= stbc << RX_FLAG_STBC_SHIFT;
+		if (rate_n_flags & RATE_MCS_BF_MSK)
+			rx_status.vht_flag |= RX_VHT_FLAG_BF;
 	} else {
 		rx_status.rate_idx =
 			iwl_mvm_legacy_rate_to_mac80211_idx(rate_n_flags,

diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c
index c28de54..4b6c7d4 100644
--- a/drivers/net/wireless/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/iwlwifi/mvm/scan.c

@@ -306,7 +306,6 @@
 		.id = SCAN_REQUEST_CMD,
 		.len = { 0, },
 		.data = { mvm->scan_cmd, },
-		.flags = CMD_SYNC,
 		.dataflags = { IWL_HCMD_DFL_NOCOPY, },
 	};
 	struct iwl_scan_cmd *cmd = mvm->scan_cmd;
@@ -319,7 +318,10 @@
 	struct iwl_mvm_scan_params params = {};
 
 	lockdep_assert_held(&mvm->mutex);
-	BUG_ON(mvm->scan_cmd == NULL);
+
+	/* we should have failed registration if scan_cmd was NULL */
+	if (WARN_ON(mvm->scan_cmd == NULL))
+		return -ENOMEM;
 
 	IWL_DEBUG_SCAN(mvm, "Handling mac80211 scan request\n");
 	mvm->scan_status = IWL_MVM_SCAN_OS;
@@ -514,7 +516,7 @@
 				   ARRAY_SIZE(scan_abort_notif),
 				   iwl_mvm_scan_abort_notif, NULL);
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, SCAN_ABORT_CMD, CMD_SYNC, 0, NULL);
+	ret = iwl_mvm_send_cmd_pdu(mvm, SCAN_ABORT_CMD, 0, 0, NULL);
 	if (ret) {
 		IWL_ERR(mvm, "Couldn't send SCAN_ABORT_CMD: %d\n", ret);
 		/* mac80211's state will be cleaned in the nic_restart flow */
@@ -538,9 +540,13 @@
 	/* scan status must be locked for proper checking */
 	lockdep_assert_held(&mvm->mutex);
 
-	IWL_DEBUG_SCAN(mvm, "Scheduled scan completed, status %s\n",
+	IWL_DEBUG_SCAN(mvm,
+		       "Scheduled scan completed, status %s EBS status %s:%d\n",
 		       scan_notif->status == IWL_SCAN_OFFLOAD_COMPLETED ?
-		       "completed" : "aborted");
+		       "completed" : "aborted", scan_notif->ebs_status ==
+		       IWL_SCAN_EBS_SUCCESS ? "success" : "failed",
+		       scan_notif->ebs_status);
+
 
 	/* only call mac80211 completion if the stop was initiated by FW */
 	if (mvm->scan_status == IWL_MVM_SCAN_SCHED) {
@@ -548,6 +554,8 @@
 		ieee80211_sched_scan_stopped(mvm->hw);
 	}
 
+	mvm->last_ebs_successful = !scan_notif->ebs_status;
+
 	return 0;
 }
 
@@ -740,7 +748,6 @@
 	struct iwl_scan_offload_cfg *scan_cfg;
 	struct iwl_host_cmd cmd = {
 		.id = SCAN_OFFLOAD_CONFIG_CMD,
-		.flags = CMD_SYNC,
 	};
 	struct iwl_mvm_scan_params params = {};
 
@@ -798,7 +805,6 @@
 	struct iwl_scan_offload_blacklist *blacklist;
 	struct iwl_host_cmd cmd = {
 		.id = SCAN_OFFLOAD_UPDATE_PROFILES_CMD,
-		.flags = CMD_SYNC,
 		.len[1] = sizeof(*profile_cfg),
 		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
 		.dataflags[1] = IWL_HCMD_DFL_NOCOPY,
@@ -884,7 +890,12 @@
 		scan_req.flags |= cpu_to_le16(IWL_SCAN_OFFLOAD_FLAG_PASS_ALL);
 	}
 
-	return iwl_mvm_send_cmd_pdu(mvm, SCAN_OFFLOAD_REQUEST_CMD, CMD_SYNC,
+	if (mvm->last_ebs_successful &&
+	    mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_EBS_SUPPORT)
+		scan_req.flags |=
+			cpu_to_le16(IWL_SCAN_OFFLOAD_FLAG_EBS_ACCURATE_MODE);
+
+	return iwl_mvm_send_cmd_pdu(mvm, SCAN_OFFLOAD_REQUEST_CMD, 0,
 				    sizeof(scan_req), &scan_req);
 }
 
@@ -893,7 +904,6 @@
 	int ret;
 	struct iwl_host_cmd cmd = {
 		.id = SCAN_OFFLOAD_ABORT_CMD,
-		.flags = CMD_SYNC,
 	};
 	u32 status;
 
@@ -922,7 +932,7 @@
 	return ret;
 }
 
-int iwl_mvm_sched_scan_stop(struct iwl_mvm *mvm)
+int iwl_mvm_sched_scan_stop(struct iwl_mvm *mvm, bool notify)
 {
 	int ret;
 	struct iwl_notification_wait wait_scan_done;
@@ -960,5 +970,8 @@
 	 */
 	mvm->scan_status = IWL_MVM_SCAN_NONE;
 
+	if (notify)
+		ieee80211_sched_scan_stopped(mvm->hw);
+
 	return 0;
 }

diff --git a/drivers/net/wireless/iwlwifi/mvm/sf.c b/drivers/net/wireless/iwlwifi/mvm/sf.c
index 88809b2..7edfd15 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sf.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sf.c

@@ -237,9 +237,6 @@
 		.sta_vif_ap_sta_id = IWL_MVM_STATION_COUNT,
 	};
 
-	if (IWL_UCODE_API(mvm->fw->ucode_ver) < 8)
-		return 0;
-
 	/*
 	 * Ignore the call if we are in HW Restart flow, or if the handled
 	 * vif is a p2p device.

diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index f339ef8..1fb01ea 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c

@@ -66,115 +66,6 @@
 #include "sta.h"
 #include "rs.h"
 
-static void iwl_mvm_add_sta_cmd_v7_to_v5(struct iwl_mvm_add_sta_cmd_v7 *cmd_v7,
-					 struct iwl_mvm_add_sta_cmd_v5 *cmd_v5)
-{
-	memset(cmd_v5, 0, sizeof(*cmd_v5));
-
-	cmd_v5->add_modify = cmd_v7->add_modify;
-	cmd_v5->tid_disable_tx = cmd_v7->tid_disable_tx;
-	cmd_v5->mac_id_n_color = cmd_v7->mac_id_n_color;
-	memcpy(cmd_v5->addr, cmd_v7->addr, ETH_ALEN);
-	cmd_v5->sta_id = cmd_v7->sta_id;
-	cmd_v5->modify_mask = cmd_v7->modify_mask;
-	cmd_v5->station_flags = cmd_v7->station_flags;
-	cmd_v5->station_flags_msk = cmd_v7->station_flags_msk;
-	cmd_v5->add_immediate_ba_tid = cmd_v7->add_immediate_ba_tid;
-	cmd_v5->remove_immediate_ba_tid = cmd_v7->remove_immediate_ba_tid;
-	cmd_v5->add_immediate_ba_ssn = cmd_v7->add_immediate_ba_ssn;
-	cmd_v5->sleep_tx_count = cmd_v7->sleep_tx_count;
-	cmd_v5->sleep_state_flags = cmd_v7->sleep_state_flags;
-	cmd_v5->assoc_id = cmd_v7->assoc_id;
-	cmd_v5->beamform_flags = cmd_v7->beamform_flags;
-	cmd_v5->tfd_queue_msk = cmd_v7->tfd_queue_msk;
-}
-
-static void
-iwl_mvm_add_sta_key_to_add_sta_cmd_v5(struct iwl_mvm_add_sta_key_cmd *key_cmd,
-				      struct iwl_mvm_add_sta_cmd_v5 *sta_cmd,
-				      u32 mac_id_n_color)
-{
-	memset(sta_cmd, 0, sizeof(*sta_cmd));
-
-	sta_cmd->sta_id = key_cmd->sta_id;
-	sta_cmd->add_modify = STA_MODE_MODIFY;
-	sta_cmd->modify_mask = STA_MODIFY_KEY;
-	sta_cmd->mac_id_n_color = cpu_to_le32(mac_id_n_color);
-
-	sta_cmd->key.key_offset = key_cmd->key_offset;
-	sta_cmd->key.key_flags = key_cmd->key_flags;
-	memcpy(sta_cmd->key.key, key_cmd->key, sizeof(sta_cmd->key.key));
-	sta_cmd->key.tkip_rx_tsc_byte2 = key_cmd->tkip_rx_tsc_byte2;
-	memcpy(sta_cmd->key.tkip_rx_ttak, key_cmd->tkip_rx_ttak,
-	       sizeof(sta_cmd->key.tkip_rx_ttak));
-}
-
-static int iwl_mvm_send_add_sta_cmd_status(struct iwl_mvm *mvm,
-					   struct iwl_mvm_add_sta_cmd_v7 *cmd,
-					   int *status)
-{
-	struct iwl_mvm_add_sta_cmd_v5 cmd_v5;
-
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_STA_KEY_CMD)
-		return iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(*cmd),
-						   cmd, status);
-
-	iwl_mvm_add_sta_cmd_v7_to_v5(cmd, &cmd_v5);
-
-	return iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd_v5),
-					   &cmd_v5, status);
-}
-
-static int iwl_mvm_send_add_sta_cmd(struct iwl_mvm *mvm, u32 flags,
-				    struct iwl_mvm_add_sta_cmd_v7 *cmd)
-{
-	struct iwl_mvm_add_sta_cmd_v5 cmd_v5;
-
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_STA_KEY_CMD)
-		return iwl_mvm_send_cmd_pdu(mvm, ADD_STA, flags,
-					    sizeof(*cmd), cmd);
-
-	iwl_mvm_add_sta_cmd_v7_to_v5(cmd, &cmd_v5);
-
-	return iwl_mvm_send_cmd_pdu(mvm, ADD_STA, flags, sizeof(cmd_v5),
-				    &cmd_v5);
-}
-
-static int
-iwl_mvm_send_add_sta_key_cmd_status(struct iwl_mvm *mvm,
-				    struct iwl_mvm_add_sta_key_cmd *cmd,
-				    u32 mac_id_n_color,
-				    int *status)
-{
-	struct iwl_mvm_add_sta_cmd_v5 sta_cmd;
-
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_STA_KEY_CMD)
-		return iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA_KEY,
-						   sizeof(*cmd), cmd, status);
-
-	iwl_mvm_add_sta_key_to_add_sta_cmd_v5(cmd, &sta_cmd, mac_id_n_color);
-
-	return iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(sta_cmd),
-					   &sta_cmd, status);
-}
-
-static int iwl_mvm_send_add_sta_key_cmd(struct iwl_mvm *mvm,
-					u32 flags,
-					struct iwl_mvm_add_sta_key_cmd *cmd,
-					u32 mac_id_n_color)
-{
-	struct iwl_mvm_add_sta_cmd_v5 sta_cmd;
-
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_STA_KEY_CMD)
-		return iwl_mvm_send_cmd_pdu(mvm, ADD_STA_KEY, flags,
-					    sizeof(*cmd), cmd);
-
-	iwl_mvm_add_sta_key_to_add_sta_cmd_v5(cmd, &sta_cmd, mac_id_n_color);
-
-	return iwl_mvm_send_cmd_pdu(mvm, ADD_STA, flags, sizeof(sta_cmd),
-				    &sta_cmd);
-}
-
 static int iwl_mvm_find_free_sta_id(struct iwl_mvm *mvm,
 				    enum nl80211_iftype iftype)
 {
@@ -207,7 +98,7 @@
 			   bool update)
 {
 	struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;
-	struct iwl_mvm_add_sta_cmd_v7 add_sta_cmd;
+	struct iwl_mvm_add_sta_cmd add_sta_cmd;
 	int ret;
 	u32 status;
 	u32 agg_size = 0, mpdu_dens = 0;
@@ -295,7 +186,8 @@
 		cpu_to_le32(mpdu_dens << STA_FLG_AGG_MPDU_DENS_SHIFT);
 
 	status = ADD_STA_SUCCESS;
-	ret = iwl_mvm_send_add_sta_cmd_status(mvm, &add_sta_cmd, &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(add_sta_cmd),
+					  &add_sta_cmd, &status);
 	if (ret)
 		return ret;
 
@@ -380,7 +272,7 @@
 int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
 		      bool drain)
 {
-	struct iwl_mvm_add_sta_cmd_v7 cmd = {};
+	struct iwl_mvm_add_sta_cmd cmd = {};
 	int ret;
 	u32 status;
 
@@ -393,7 +285,8 @@
 	cmd.station_flags_msk = cpu_to_le32(STA_FLG_DRAIN_FLOW);
 
 	status = ADD_STA_SUCCESS;
-	ret = iwl_mvm_send_add_sta_cmd_status(mvm, &cmd, &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd),
+					  &cmd, &status);
 	if (ret)
 		return ret;
 
@@ -434,7 +327,7 @@
 		return -EINVAL;
 	}
 
-	ret = iwl_mvm_send_cmd_pdu(mvm, REMOVE_STA, CMD_SYNC,
+	ret = iwl_mvm_send_cmd_pdu(mvm, REMOVE_STA, 0,
 				   sizeof(rm_sta_cmd), &rm_sta_cmd);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to remove station. Id=%d\n", sta_id);
@@ -498,7 +391,7 @@
 				sta_id);
 			continue;
 		}
-		rcu_assign_pointer(mvm->fw_id_to_mac_id[sta_id], NULL);
+		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
 		clear_bit(sta_id, mvm->sta_drained);
 	}
 
@@ -520,14 +413,6 @@
 		/* flush its queues here since we are freeing mvm_sta */
 		ret = iwl_mvm_flush_tx_path(mvm, mvm_sta->tfd_queue_msk, true);
 
-		/*
-		 * Put a non-NULL since the fw station isn't removed.
-		 * It will be removed after the MAC will be set as
-		 * unassoc.
-		 */
-		rcu_assign_pointer(mvm->fw_id_to_mac_id[mvm_sta->sta_id],
-				   ERR_PTR(-EINVAL));
-
 		/* if we are associated - we can't remove the AP STA now */
 		if (vif->bss_conf.assoc)
 			return ret;
@@ -557,7 +442,7 @@
 	} else {
 		spin_unlock_bh(&mvm_sta->lock);
 		ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
-		rcu_assign_pointer(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
+		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
 	}
 
 	return ret;
@@ -571,7 +456,7 @@
 
 	lockdep_assert_held(&mvm->mutex);
 
-	rcu_assign_pointer(mvm->fw_id_to_mac_id[sta_id], NULL);
+	RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
 	return ret;
 }
 
@@ -593,7 +478,7 @@
 
 void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
 {
-	rcu_assign_pointer(mvm->fw_id_to_mac_id[sta->sta_id], NULL);
+	RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta->sta_id], NULL);
 	memset(sta, 0, sizeof(struct iwl_mvm_int_sta));
 	sta->sta_id = IWL_MVM_STATION_COUNT;
 }
@@ -603,13 +488,13 @@
 				      const u8 *addr,
 				      u16 mac_id, u16 color)
 {
-	struct iwl_mvm_add_sta_cmd_v7 cmd;
+	struct iwl_mvm_add_sta_cmd cmd;
 	int ret;
 	u32 status;
 
 	lockdep_assert_held(&mvm->mutex);
 
-	memset(&cmd, 0, sizeof(struct iwl_mvm_add_sta_cmd_v7));
+	memset(&cmd, 0, sizeof(cmd));
 	cmd.sta_id = sta->sta_id;
 	cmd.mac_id_n_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mac_id,
 							     color));
@@ -619,7 +504,8 @@
 	if (addr)
 		memcpy(cmd.addr, addr, ETH_ALEN);
 
-	ret = iwl_mvm_send_add_sta_cmd_status(mvm, &cmd, &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd),
+					  &cmd, &status);
 	if (ret)
 		return ret;
 
@@ -753,7 +639,7 @@
 		       int tid, u16 ssn, bool start)
 {
 	struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;
-	struct iwl_mvm_add_sta_cmd_v7 cmd = {};
+	struct iwl_mvm_add_sta_cmd cmd = {};
 	int ret;
 	u32 status;
 
@@ -777,7 +663,8 @@
 				  STA_MODIFY_REMOVE_BA_TID;
 
 	status = ADD_STA_SUCCESS;
-	ret = iwl_mvm_send_add_sta_cmd_status(mvm, &cmd, &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd),
+					  &cmd, &status);
 	if (ret)
 		return ret;
 
@@ -812,7 +699,7 @@
 			      int tid, u8 queue, bool start)
 {
 	struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;
-	struct iwl_mvm_add_sta_cmd_v7 cmd = {};
+	struct iwl_mvm_add_sta_cmd cmd = {};
 	int ret;
 	u32 status;
 
@@ -834,7 +721,8 @@
 	cmd.tid_disable_tx = cpu_to_le16(mvm_sta->tid_disable_agg);
 
 	status = ADD_STA_SUCCESS;
-	ret = iwl_mvm_send_add_sta_cmd_status(mvm, &cmd, &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd),
+					  &cmd, &status);
 	if (ret)
 		return ret;
 
@@ -1129,12 +1017,11 @@
 				u8 sta_id, u32 tkip_iv32, u16 *tkip_p1k,
 				u32 cmd_flags)
 {
-	__le16 key_flags;
 	struct iwl_mvm_add_sta_key_cmd cmd = {};
+	__le16 key_flags;
 	int ret, status;
 	u16 keyidx;
 	int i;
-	u32 mac_id_n_color = mvm_sta->mac_id_n_color;
 
 	keyidx = (keyconf->keyidx << STA_KEY_FLG_KEYID_POS) &
 		 STA_KEY_FLG_KEYID_MSK;
@@ -1166,13 +1053,12 @@
 	cmd.sta_id = sta_id;
 
 	status = ADD_STA_SUCCESS;
-	if (cmd_flags == CMD_SYNC)
-		ret = iwl_mvm_send_add_sta_key_cmd_status(mvm, &cmd,
-							  mac_id_n_color,
-							  &status);
+	if (cmd_flags & CMD_ASYNC)
+		ret =  iwl_mvm_send_cmd_pdu(mvm, ADD_STA_KEY, CMD_ASYNC,
+					    sizeof(cmd), &cmd);
 	else
-		ret = iwl_mvm_send_add_sta_key_cmd(mvm, CMD_ASYNC, &cmd,
-						   mac_id_n_color);
+		ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA_KEY, sizeof(cmd),
+						  &cmd, &status);
 
 	switch (status) {
 	case ADD_STA_SUCCESS:
@@ -1225,7 +1111,7 @@
 		       remove_key ? "removing" : "installing",
 		       igtk_cmd.sta_id);
 
-	return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, CMD_SYNC,
+	return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0,
 				    sizeof(igtk_cmd), &igtk_cmd);
 }
 
@@ -1312,15 +1198,15 @@
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, sta_id,
-					   seq.tkip.iv32, p1k, CMD_SYNC);
+					   seq.tkip.iv32, p1k, 0);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, sta_id,
-					   0, NULL, CMD_SYNC);
+					   0, NULL, 0);
 		break;
 	default:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf,
-					   sta_id, 0, NULL, CMD_SYNC);
+					   sta_id, 0, NULL, 0);
 	}
 
 	if (ret)
@@ -1399,9 +1285,8 @@
 	cmd.sta_id = sta_id;
 
 	status = ADD_STA_SUCCESS;
-	ret = iwl_mvm_send_add_sta_key_cmd_status(mvm, &cmd,
-						  mvm_sta->mac_id_n_color,
-						  &status);
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA_KEY, sizeof(cmd),
+					  &cmd, &status);
 
 	switch (status) {
 	case ADD_STA_SUCCESS:
@@ -1448,7 +1333,7 @@
 				struct ieee80211_sta *sta)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	struct iwl_mvm_add_sta_cmd_v7 cmd = {
+	struct iwl_mvm_add_sta_cmd cmd = {
 		.add_modify = STA_MODE_MODIFY,
 		.sta_id = mvmsta->sta_id,
 		.station_flags_msk = cpu_to_le32(STA_FLG_PS),
@@ -1456,7 +1341,7 @@
 	};
 	int ret;
 
-	ret = iwl_mvm_send_add_sta_cmd(mvm, CMD_ASYNC, &cmd);
+	ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, sizeof(cmd), &cmd);
 	if (ret)
 		IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret);
 }
@@ -1468,7 +1353,7 @@
 				       bool agg)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	struct iwl_mvm_add_sta_cmd_v7 cmd = {
+	struct iwl_mvm_add_sta_cmd cmd = {
 		.add_modify = STA_MODE_MODIFY,
 		.sta_id = mvmsta->sta_id,
 		.modify_mask = STA_MODIFY_SLEEPING_STA_TX_COUNT,
@@ -1538,7 +1423,7 @@
 		cmd.sleep_state_flags |= cpu_to_le16(STA_SLEEP_STATE_UAPSD);
 	}
 
-	ret = iwl_mvm_send_add_sta_cmd(mvm, CMD_ASYNC, &cmd);
+	ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, sizeof(cmd), &cmd);
 	if (ret)
 		IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret);
 }

diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.h b/drivers/net/wireless/iwlwifi/mvm/sta.h
index 2ed84c4..d98e8a2 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.h

@@ -253,6 +253,8 @@
  *	This is basically (last acked packet++).
  * @rate_n_flags: Rate at which Tx was attempted. Holds the data between the
  *	Tx response (TX_CMD), and the block ack notification (COMPRESSED_BA).
+ * @reduced_tpc: Reduced tx power. Holds the data between the
+ *	Tx response (TX_CMD), and the block ack notification (COMPRESSED_BA).
  * @state: state of the BA agreement establishment / tear down.
  * @txq_id: Tx queue used by the BA session
  * @ssn: the first packet to be sent in AGG HW queue in Tx AGG start flow, or
@@ -265,6 +267,7 @@
 	u16 next_reclaimed;
 	/* The rest is Tx AGG related */
 	u32 rate_n_flags;
+	u8 reduced_tpc;
 	enum iwl_mvm_agg_state state;
 	u16 txq_id;
 	u16 ssn;
@@ -284,8 +287,6 @@
  * @tid_disable_agg: bitmap: if bit(tid) is set, the fw won't send ampdus for
  *	tid.
  * @max_agg_bufsize: the maximal size of the AGG buffer for this station
- * @bt_reduced_txpower_dbg: debug mode in which %bt_reduced_txpower is forced
- *	by debugfs.
  * @bt_reduced_txpower: is reduced tx power enabled for this station
  * @next_status_eosp: the next reclaimed packet is a PS-Poll response and
  *	we need to signal the EOSP
@@ -306,7 +307,6 @@
 	u32 mac_id_n_color;
 	u16 tid_disable_agg;
 	u8 max_agg_bufsize;
-	bool bt_reduced_txpower_dbg;
 	bool bt_reduced_txpower;
 	bool next_status_eosp;
 	spinlock_t lock;

diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c
index 6133124..80100f6 100644
--- a/drivers/net/wireless/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c

@@ -273,67 +273,10 @@
 	return true;
 }
 
-/* used to convert from time event API v2 to v1 */
-#define TE_V2_DEP_POLICY_MSK (TE_V2_DEP_OTHER | TE_V2_DEP_TSF |\
-			     TE_V2_EVENT_SOCIOPATHIC)
-static inline u16 te_v2_get_notify(__le16 policy)
-{
-	return le16_to_cpu(policy) & TE_V2_NOTIF_MSK;
-}
-
-static inline u16 te_v2_get_dep_policy(__le16 policy)
-{
-	return (le16_to_cpu(policy) & TE_V2_DEP_POLICY_MSK) >>
-		TE_V2_PLACEMENT_POS;
-}
-
-static inline u16 te_v2_get_absence(__le16 policy)
-{
-	return (le16_to_cpu(policy) & TE_V2_ABSENCE) >> TE_V2_ABSENCE_POS;
-}
-
-static void iwl_mvm_te_v2_to_v1(const struct iwl_time_event_cmd_v2 *cmd_v2,
-				struct iwl_time_event_cmd_v1 *cmd_v1)
-{
-	cmd_v1->id_and_color = cmd_v2->id_and_color;
-	cmd_v1->action = cmd_v2->action;
-	cmd_v1->id = cmd_v2->id;
-	cmd_v1->apply_time = cmd_v2->apply_time;
-	cmd_v1->max_delay = cmd_v2->max_delay;
-	cmd_v1->depends_on = cmd_v2->depends_on;
-	cmd_v1->interval = cmd_v2->interval;
-	cmd_v1->duration = cmd_v2->duration;
-	if (cmd_v2->repeat == TE_V2_REPEAT_ENDLESS)
-		cmd_v1->repeat = cpu_to_le32(TE_V1_REPEAT_ENDLESS);
-	else
-		cmd_v1->repeat = cpu_to_le32(cmd_v2->repeat);
-	cmd_v1->max_frags = cpu_to_le32(cmd_v2->max_frags);
-	cmd_v1->interval_reciprocal = 0; /* unused */
-
-	cmd_v1->dep_policy = cpu_to_le32(te_v2_get_dep_policy(cmd_v2->policy));
-	cmd_v1->is_present = cpu_to_le32(!te_v2_get_absence(cmd_v2->policy));
-	cmd_v1->notify = cpu_to_le32(te_v2_get_notify(cmd_v2->policy));
-}
-
-static int iwl_mvm_send_time_event_cmd(struct iwl_mvm *mvm,
-				       const struct iwl_time_event_cmd_v2 *cmd)
-{
-	struct iwl_time_event_cmd_v1 cmd_v1;
-
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_TIME_EVENT_API_V2)
-		return iwl_mvm_send_cmd_pdu(mvm, TIME_EVENT_CMD, CMD_SYNC,
-					    sizeof(*cmd), cmd);
-
-	iwl_mvm_te_v2_to_v1(cmd, &cmd_v1);
-	return iwl_mvm_send_cmd_pdu(mvm, TIME_EVENT_CMD, CMD_SYNC,
-				    sizeof(cmd_v1), &cmd_v1);
-}
-
-
 static int iwl_mvm_time_event_send_add(struct iwl_mvm *mvm,
 				       struct ieee80211_vif *vif,
 				       struct iwl_mvm_time_event_data *te_data,
-				       struct iwl_time_event_cmd_v2 *te_cmd)
+				       struct iwl_time_event_cmd *te_cmd)
 {
 	static const u8 time_event_response[] = { TIME_EVENT_CMD };
 	struct iwl_notification_wait wait_time_event;
@@ -369,7 +312,8 @@
 				   ARRAY_SIZE(time_event_response),
 				   iwl_mvm_time_event_response, te_data);
 
-	ret = iwl_mvm_send_time_event_cmd(mvm, te_cmd);
+	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_EVENT_CMD, 0,
+					    sizeof(*te_cmd), te_cmd);
 	if (ret) {
 		IWL_ERR(mvm, "Couldn't send TIME_EVENT_CMD: %d\n", ret);
 		iwl_remove_notification(&mvm->notif_wait, &wait_time_event);
@@ -397,7 +341,7 @@
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_time_event_data *te_data = &mvmvif->time_event_data;
-	struct iwl_time_event_cmd_v2 time_cmd = {};
+	struct iwl_time_event_cmd time_cmd = {};
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -453,7 +397,7 @@
 			       struct iwl_mvm_vif *mvmvif,
 			       struct iwl_mvm_time_event_data *te_data)
 {
-	struct iwl_time_event_cmd_v2 time_cmd = {};
+	struct iwl_time_event_cmd time_cmd = {};
 	u32 id, uid;
 	int ret;
 
@@ -490,7 +434,8 @@
 		cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color));
 
 	IWL_DEBUG_TE(mvm, "Removing TE 0x%x\n", le32_to_cpu(time_cmd.id));
-	ret = iwl_mvm_send_time_event_cmd(mvm, &time_cmd);
+	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_EVENT_CMD, 0,
+				   sizeof(time_cmd), &time_cmd);
 	if (WARN_ON(ret))
 		return;
 }
@@ -510,7 +455,7 @@
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_time_event_data *te_data = &mvmvif->time_event_data;
-	struct iwl_time_event_cmd_v2 time_cmd = {};
+	struct iwl_time_event_cmd time_cmd = {};
 
 	lockdep_assert_held(&mvm->mutex);
 	if (te_data->running) {

diff --git a/drivers/net/wireless/iwlwifi/mvm/tt.c b/drivers/net/wireless/iwlwifi/mvm/tt.c
index 7a99fa3..8685615 100644
--- a/drivers/net/wireless/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/iwlwifi/mvm/tt.c

@@ -409,7 +409,6 @@
 		.id = REPLY_THERMAL_MNG_BACKOFF,
 		.len = { sizeof(u32), },
 		.data = { &backoff, },
-		.flags = CMD_SYNC,
 	};
 
 	backoff = max(backoff, mvm->thermal_throttle.min_backoff);
@@ -468,13 +467,14 @@
 	}
 
 	if (params->support_tx_backoff) {
-		tx_backoff = 0;
+		tx_backoff = tt->min_backoff;
 		for (i = 0; i < TT_TX_BACKOFF_SIZE; i++) {
 			if (temperature < params->tx_backoff[i].temperature)
 				break;
-			tx_backoff = params->tx_backoff[i].backoff;
+			tx_backoff = max(tt->min_backoff,
+					 params->tx_backoff[i].backoff);
 		}
-		if (tx_backoff != 0)
+		if (tx_backoff != tt->min_backoff)
 			throttle_enable = true;
 		if (tt->tx_backoff != tx_backoff)
 			iwl_mvm_tt_tx_backoff(mvm, tx_backoff);
@@ -484,7 +484,8 @@
 		IWL_WARN(mvm,
 			 "Due to high temperature thermal throttling initiated\n");
 		tt->throttle = true;
-	} else if (tt->throttle && !tt->dynamic_smps && tt->tx_backoff == 0 &&
+	} else if (tt->throttle && !tt->dynamic_smps &&
+		   tt->tx_backoff == tt->min_backoff &&
 		   temperature <= params->tx_protection_exit) {
 		IWL_WARN(mvm,
 			 "Temperature is back to normal thermal throttling stopped\n");

diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c
index 879aeac..3846a6c 100644
--- a/drivers/net/wireless/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/iwlwifi/mvm/tx.c

@@ -636,7 +636,11 @@
 			seq_ctl = le16_to_cpu(hdr->seq_ctrl);
 		}
 
-		ieee80211_tx_status_ni(mvm->hw, skb);
+		BUILD_BUG_ON(ARRAY_SIZE(info->status.status_driver_data) < 1);
+		info->status.status_driver_data[0] =
+				(void *)(uintptr_t)tx_resp->reduced_tpc;
+
+		ieee80211_tx_status(mvm->hw, skb);
 	}
 
 	if (txq_id >= mvm->first_agg_queue) {
@@ -815,6 +819,7 @@
 		struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 		mvmsta->tid_data[tid].rate_n_flags =
 			le32_to_cpu(tx_resp->initial_rate);
+		mvmsta->tid_data[tid].reduced_tpc = tx_resp->reduced_tpc;
 	}
 
 	rcu_read_unlock();
@@ -928,6 +933,8 @@
 			info->status.ampdu_len = ba_notif->txed;
 			iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags,
 						    info);
+			info->status.status_driver_data[0] =
+				(void *)(uintptr_t)tid_data->reduced_tpc;
 		}
 	}
 
@@ -937,7 +944,7 @@
 
 	while (!skb_queue_empty(&reclaimed_skbs)) {
 		skb = __skb_dequeue(&reclaimed_skbs);
-		ieee80211_tx_status_ni(mvm->hw, skb);
+		ieee80211_tx_status(mvm->hw, skb);
 	}
 
 	return 0;
@@ -951,7 +958,7 @@
 		.flush_ctl = cpu_to_le16(DUMP_TX_FIFO_FLUSH),
 	};
 
-	u32 flags = sync ? CMD_SYNC : CMD_ASYNC;
+	u32 flags = sync ? 0 : CMD_ASYNC;
 
 	ret = iwl_mvm_send_cmd_pdu(mvm, TXPATH_FLUSH, flags,
 				   sizeof(flush_cmd), &flush_cmd);

diff --git a/drivers/net/wireless/iwlwifi/mvm/utils.c b/drivers/net/wireless/iwlwifi/mvm/utils.c
index 2180902..aa9fc77 100644
--- a/drivers/net/wireless/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/iwlwifi/mvm/utils.c

@@ -64,6 +64,7 @@
 
 #include "iwl-debug.h"
 #include "iwl-io.h"
+#include "iwl-prph.h"
 
 #include "mvm.h"
 #include "fw-api-rs.h"
@@ -143,7 +144,7 @@
 		      "cmd flags %x", cmd->flags))
 		return -EINVAL;
 
-	cmd->flags |= CMD_SYNC | CMD_WANT_SKB;
+	cmd->flags |= CMD_WANT_SKB;
 
 	ret = iwl_trans_send_cmd(mvm->trans, cmd);
 	if (ret == -ERFKILL) {
@@ -469,6 +470,8 @@
 			mvm->status, table.valid);
 	}
 
+	/* Do not change this output - scripts rely on it */
+
 	IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
 
 	trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
@@ -516,13 +519,14 @@
 		iwl_mvm_dump_umac_error_log(mvm);
 }
 
+#ifdef CONFIG_IWLWIFI_DEBUGFS
 void iwl_mvm_fw_error_sram_dump(struct iwl_mvm *mvm)
 {
 	const struct fw_img *img;
 	u32 ofs, sram_len;
 	void *sram;
 
-	if (!mvm->ucode_loaded || mvm->fw_error_sram)
+	if (!mvm->ucode_loaded || mvm->fw_error_sram || mvm->fw_error_dump)
 		return;
 
 	img = &mvm->fw->img[mvm->cur_ucode];
@@ -538,6 +542,48 @@
 	mvm->fw_error_sram_len = sram_len;
 }
 
+void iwl_mvm_fw_error_rxf_dump(struct iwl_mvm *mvm)
+{
+	int i, reg_val;
+	unsigned long flags;
+
+	if (!mvm->ucode_loaded || mvm->fw_error_rxf || mvm->fw_error_dump)
+		return;
+
+	/* reading buffer size */
+	reg_val = iwl_trans_read_prph(mvm->trans, RXF_SIZE_ADDR);
+	mvm->fw_error_rxf_len =
+		(reg_val & RXF_SIZE_BYTE_CNT_MSK) >> RXF_SIZE_BYTE_CND_POS;
+
+	/* the register holds the value divided by 128 */
+	mvm->fw_error_rxf_len = mvm->fw_error_rxf_len << 7;
+
+	if (!mvm->fw_error_rxf_len)
+		return;
+
+	mvm->fw_error_rxf =  kzalloc(mvm->fw_error_rxf_len, GFP_ATOMIC);
+	if (!mvm->fw_error_rxf) {
+		mvm->fw_error_rxf_len = 0;
+		return;
+	}
+
+	if (!iwl_trans_grab_nic_access(mvm->trans, false, &flags)) {
+		kfree(mvm->fw_error_rxf);
+		mvm->fw_error_rxf = NULL;
+		mvm->fw_error_rxf_len = 0;
+		return;
+	}
+
+	for (i = 0; i < (mvm->fw_error_rxf_len / sizeof(u32)); i++) {
+		iwl_trans_write_prph(mvm->trans, RXF_LD_FENCE_OFFSET_ADDR,
+				     i * sizeof(u32));
+		mvm->fw_error_rxf[i] =
+			iwl_trans_read_prph(mvm->trans, RXF_FIFO_RD_FENCE_ADDR);
+	}
+	iwl_trans_release_nic_access(mvm->trans, &flags);
+}
+#endif
+
 /**
  * iwl_mvm_send_lq_cmd() - Send link quality command
  * @init: This command is sent as part of station initialization right
@@ -553,7 +599,7 @@
 	struct iwl_host_cmd cmd = {
 		.id = LQ_CMD,
 		.len = { sizeof(struct iwl_lq_cmd), },
-		.flags = init ? CMD_SYNC : CMD_ASYNC,
+		.flags = init ? 0 : CMD_ASYNC,
 		.data = { lq, },
 	};
 
@@ -604,6 +650,39 @@
 	ieee80211_request_smps(vif, smps_mode);
 }
 
+static void iwl_mvm_diversity_iter(void *_data, u8 *mac,
+				   struct ieee80211_vif *vif)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	bool *result = _data;
+	int i;
+
+	for (i = 0; i < NUM_IWL_MVM_SMPS_REQ; i++) {
+		if (mvmvif->smps_requests[i] == IEEE80211_SMPS_STATIC ||
+		    mvmvif->smps_requests[i] == IEEE80211_SMPS_DYNAMIC)
+			*result = false;
+	}
+}
+
+bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm)
+{
+	bool result = true;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (num_of_ant(mvm->fw->valid_rx_ant) == 1)
+		return false;
+
+	if (!mvm->cfg->rx_with_siso_diversity)
+		return false;
+
+	ieee80211_iterate_active_interfaces_atomic(
+			mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
+			iwl_mvm_diversity_iter, &result);
+
+	return result;
+}
+
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			       bool value)
 {
@@ -623,7 +702,7 @@
 
 	iwl_mvm_bt_coex_vif_change(mvm);
 
-	return iwl_mvm_power_update_mac(mvm, vif);
+	return iwl_mvm_power_update_mac(mvm);
 }
 
 static void iwl_mvm_ll_iter(void *_data, u8 *mac, struct ieee80211_vif *vif)

diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 3d1d57f..7091a18 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c

@@ -417,7 +417,7 @@
 	    splx->package.count != 2 ||
 	    splx->package.elements[0].type != ACPI_TYPE_INTEGER ||
 	    splx->package.elements[0].integer.value != 0) {
-		IWL_ERR(trans, "Unsupported splx structure");
+		IWL_ERR(trans, "Unsupported splx structure\n");
 		return 0;
 	}
 
@@ -426,14 +426,14 @@
 	    limits->package.count < 2 ||
 	    limits->package.elements[0].type != ACPI_TYPE_INTEGER ||
 	    limits->package.elements[1].type != ACPI_TYPE_INTEGER) {
-		IWL_ERR(trans, "Invalid limits element");
+		IWL_ERR(trans, "Invalid limits element\n");
 		return 0;
 	}
 
 	domain_type = &limits->package.elements[0];
 	power_limit = &limits->package.elements[1];
 	if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) {
-		IWL_DEBUG_INFO(trans, "WiFi power is not limited");
+		IWL_DEBUG_INFO(trans, "WiFi power is not limited\n");
 		return 0;
 	}
 
@@ -450,26 +450,26 @@
 	pxsx_handle = ACPI_HANDLE(&pdev->dev);
 	if (!pxsx_handle) {
 		IWL_DEBUG_INFO(trans,
-			       "Could not retrieve root port ACPI handle");
+			       "Could not retrieve root port ACPI handle\n");
 		return;
 	}
 
 	/* Get the method's handle */
 	status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle);
 	if (ACPI_FAILURE(status)) {
-		IWL_DEBUG_INFO(trans, "SPL method not found");
+		IWL_DEBUG_INFO(trans, "SPL method not found\n");
 		return;
 	}
 
 	/* Call SPLC with no arguments */
 	status = acpi_evaluate_object(handle, NULL, NULL, &splx);
 	if (ACPI_FAILURE(status)) {
-		IWL_ERR(trans, "SPLC invocation failed (0x%x)", status);
+		IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status);
 		return;
 	}
 
 	trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer);
-	IWL_DEBUG_INFO(trans, "Default power limit set to %lld",
+	IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n",
 		       trans->dflt_pwr_limit);
 	kfree(splx.pointer);
 }

diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h
index 9091513..6c22b23 100644
--- a/drivers/net/wireless/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/iwlwifi/pcie/internal.h

@@ -102,7 +102,7 @@
 	u32 write_actual;
 	struct list_head rx_free;
 	struct list_head rx_used;
-	int need_update;
+	bool need_update;
 	struct iwl_rb_status *rb_stts;
 	dma_addr_t rb_stts_dma;
 	spinlock_t lock;
@@ -117,21 +117,19 @@
 /**
  * iwl_queue_inc_wrap - increment queue index, wrap back to beginning
  * @index -- current index
- * @n_bd -- total number of entries in queue (must be power of 2)
  */
-static inline int iwl_queue_inc_wrap(int index, int n_bd)
+static inline int iwl_queue_inc_wrap(int index)
 {
-	return ++index & (n_bd - 1);
+	return ++index & (TFD_QUEUE_SIZE_MAX - 1);
 }
 
 /**
  * iwl_queue_dec_wrap - decrement queue index, wrap back to end
  * @index -- current index
- * @n_bd -- total number of entries in queue (must be power of 2)
  */
-static inline int iwl_queue_dec_wrap(int index, int n_bd)
+static inline int iwl_queue_dec_wrap(int index)
 {
-	return --index & (n_bd - 1);
+	return --index & (TFD_QUEUE_SIZE_MAX - 1);
 }
 
 struct iwl_cmd_meta {
@@ -145,13 +143,13 @@
  *
  * Contains common data for Rx and Tx queues.
  *
- * Note the difference between n_bd and n_window: the hardware
- * always assumes 256 descriptors, so n_bd is always 256 (unless
+ * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
+ * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
  * there might be HW changes in the future). For the normal TX
  * queues, n_window, which is the size of the software queue data
  * is also 256; however, for the command queue, n_window is only
  * 32 since we don't need so many commands pending. Since the HW
- * still uses 256 BDs for DMA though, n_bd stays 256. As a result,
+ * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result,
  * the software buffers (in the variables @meta, @txb in struct
  * iwl_txq) only have 32 entries, while the HW buffers (@tfds in
  * the same struct) have 256.
@@ -162,7 +160,6 @@
  * data is a window overlayed over the HW queue.
  */
 struct iwl_queue {
-	int n_bd;              /* number of BDs in this queue */
 	int write_ptr;       /* 1-st empty entry (index) host_w*/
 	int read_ptr;         /* last used entry (index) host_r*/
 	/* use for monitoring and recovering the stuck queue */
@@ -231,7 +228,7 @@
 	spinlock_t lock;
 	struct timer_list stuck_timer;
 	struct iwl_trans_pcie *trans_pcie;
-	u8 need_update;
+	bool need_update;
 	u8 active;
 	bool ampdu;
 };
@@ -270,6 +267,9 @@
 	struct iwl_trans *trans;
 	struct iwl_drv *drv;
 
+	struct net_device napi_dev;
+	struct napi_struct napi;
+
 	/* INT ICT Table */
 	__le32 *ict_tbl;
 	dma_addr_t ict_tbl_dma;
@@ -362,7 +362,7 @@
 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue);
 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		      struct iwl_device_cmd *dev_cmd, int txq_id);
-void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq);
+void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans);
 int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd);
 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 			    struct iwl_rx_cmd_buffer *rxb, int handler_status);
@@ -370,6 +370,13 @@
 			    struct sk_buff_head *skbs);
 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans);
 
+static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
+{
+	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+
+	return le16_to_cpu(tb->hi_n_len) >> 4;
+}
+
 /*****************************************************
 * Error handling
 ******************************************************/

diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index fdfa396..a2698e5 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c

@@ -145,15 +145,13 @@
 /*
  * iwl_pcie_rxq_inc_wr_ptr - Update the write pointer for the RX queue
  */
-static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
-				    struct iwl_rxq *rxq)
+static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans)
 {
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_rxq *rxq = &trans_pcie->rxq;
 	u32 reg;
 
-	spin_lock(&rxq->lock);
-
-	if (rxq->need_update == 0)
-		goto exit_unlock;
+	lockdep_assert_held(&rxq->lock);
 
 	/*
 	 * explicitly wake up the NIC if:
@@ -169,13 +167,27 @@
 				       reg);
 			iwl_set_bit(trans, CSR_GP_CNTRL,
 				    CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-			goto exit_unlock;
+			rxq->need_update = true;
+			return;
 		}
 	}
 
 	rxq->write_actual = round_down(rxq->write, 8);
 	iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
-	rxq->need_update = 0;
+}
+
+static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_rxq *rxq = &trans_pcie->rxq;
+
+	spin_lock(&rxq->lock);
+
+	if (!rxq->need_update)
+		goto exit_unlock;
+
+	iwl_pcie_rxq_inc_wr_ptr(trans);
+	rxq->need_update = false;
 
  exit_unlock:
 	spin_unlock(&rxq->lock);
@@ -236,9 +248,8 @@
 	 * Increment device's write pointer in multiples of 8. */
 	if (rxq->write_actual != (rxq->write & ~0x7)) {
 		spin_lock(&rxq->lock);
-		rxq->need_update = 1;
+		iwl_pcie_rxq_inc_wr_ptr(trans);
 		spin_unlock(&rxq->lock);
-		iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
 	}
 }
 
@@ -362,20 +373,9 @@
  * Also restock the Rx queue via iwl_pcie_rxq_restock.
  * This is called as a scheduled work item (except for during initialization)
  */
-static void iwl_pcie_rx_replenish(struct iwl_trans *trans)
+static void iwl_pcie_rx_replenish(struct iwl_trans *trans, gfp_t gfp)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
-	iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL);
-
-	spin_lock(&trans_pcie->irq_lock);
-	iwl_pcie_rxq_restock(trans);
-	spin_unlock(&trans_pcie->irq_lock);
-}
-
-static void iwl_pcie_rx_replenish_now(struct iwl_trans *trans)
-{
-	iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+	iwl_pcie_rxq_alloc_rbs(trans, gfp);
 
 	iwl_pcie_rxq_restock(trans);
 }
@@ -385,7 +385,7 @@
 	struct iwl_trans_pcie *trans_pcie =
 	    container_of(data, struct iwl_trans_pcie, rx_replenish);
 
-	iwl_pcie_rx_replenish(trans_pcie->trans);
+	iwl_pcie_rx_replenish(trans_pcie->trans, GFP_KERNEL);
 }
 
 static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
@@ -521,14 +521,13 @@
 	memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
 	spin_unlock(&rxq->lock);
 
-	iwl_pcie_rx_replenish(trans);
+	iwl_pcie_rx_replenish(trans, GFP_KERNEL);
 
 	iwl_pcie_rx_hw_init(trans, rxq);
 
-	spin_lock(&trans_pcie->irq_lock);
-	rxq->need_update = 1;
-	iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
-	spin_unlock(&trans_pcie->irq_lock);
+	spin_lock(&rxq->lock);
+	iwl_pcie_rxq_inc_wr_ptr(trans);
+	spin_unlock(&rxq->lock);
 
 	return 0;
 }
@@ -673,7 +672,6 @@
 	/* Reuse the page if possible. For notification packets and
 	 * SKBs that fail to Rx correctly, add them back into the
 	 * rx_free list for reuse later. */
-	spin_lock(&rxq->lock);
 	if (rxb->page != NULL) {
 		rxb->page_dma =
 			dma_map_page(trans->dev, rxb->page, 0,
@@ -694,7 +692,6 @@
 		}
 	} else
 		list_add_tail(&rxb->list, &rxq->rx_used);
-	spin_unlock(&rxq->lock);
 }
 
 /*
@@ -709,6 +706,8 @@
 	u32 count = 8;
 	int total_empty;
 
+restart:
+	spin_lock(&rxq->lock);
 	/* uCode's read index (stored in shared DRAM) indicates the last Rx
 	 * buffer that the driver may process (last buffer filled by ucode). */
 	r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
@@ -743,18 +742,25 @@
 			count++;
 			if (count >= 8) {
 				rxq->read = i;
-				iwl_pcie_rx_replenish_now(trans);
+				spin_unlock(&rxq->lock);
+				iwl_pcie_rx_replenish(trans, GFP_ATOMIC);
 				count = 0;
+				goto restart;
 			}
 		}
 	}
 
 	/* Backtrack one entry */
 	rxq->read = i;
+	spin_unlock(&rxq->lock);
+
 	if (fill_rx)
-		iwl_pcie_rx_replenish_now(trans);
+		iwl_pcie_rx_replenish(trans, GFP_ATOMIC);
 	else
 		iwl_pcie_rxq_restock(trans);
+
+	if (trans_pcie->napi.poll)
+		napi_gro_flush(&trans_pcie->napi, false);
 }
 
 /*
@@ -844,7 +850,7 @@
 				trans_pcie->ict_index, read);
 		trans_pcie->ict_tbl[trans_pcie->ict_index] = 0;
 		trans_pcie->ict_index =
-			iwl_queue_inc_wrap(trans_pcie->ict_index, ICT_COUNT);
+			((trans_pcie->ict_index + 1) & (ICT_COUNT - 1));
 
 		read = le32_to_cpu(trans_pcie->ict_tbl[trans_pcie->ict_index]);
 		trace_iwlwifi_dev_ict_read(trans->dev, trans_pcie->ict_index,
@@ -876,7 +882,6 @@
 	struct isr_statistics *isr_stats = &trans_pcie->isr_stats;
 	u32 inta = 0;
 	u32 handled = 0;
-	u32 i;
 
 	lock_map_acquire(&trans->sync_cmd_lockdep_map);
 
@@ -1028,9 +1033,8 @@
 	/* uCode wakes up after power-down sleep */
 	if (inta & CSR_INT_BIT_WAKEUP) {
 		IWL_DEBUG_ISR(trans, "Wakeup interrupt\n");
-		iwl_pcie_rxq_inc_wr_ptr(trans, &trans_pcie->rxq);
-		for (i = 0; i < trans->cfg->base_params->num_of_queues; i++)
-			iwl_pcie_txq_inc_wr_ptr(trans, &trans_pcie->txq[i]);
+		iwl_pcie_rxq_check_wrptr(trans);
+		iwl_pcie_txq_check_wrptrs(trans);
 
 		isr_stats->wakeup++;
 
@@ -1068,8 +1072,6 @@
 		iwl_write8(trans, CSR_INT_PERIODIC_REG,
 			    CSR_INT_PERIODIC_DIS);
 
-		iwl_pcie_rx_handle(trans);
-
 		/*
 		 * Enable periodic interrupt in 8 msec only if we received
 		 * real RX interrupt (instead of just periodic int), to catch
@@ -1082,6 +1084,10 @@
 				   CSR_INT_PERIODIC_ENA);
 
 		isr_stats->rx++;
+
+		local_bh_disable();
+		iwl_pcie_rx_handle(trans);
+		local_bh_enable();
 	}
 
 	/* This "Tx" DMA channel is used only for loading uCode */

diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 2365553..788085b 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c

@@ -73,6 +73,7 @@
 #include "iwl-csr.h"
 #include "iwl-prph.h"
 #include "iwl-agn-hw.h"
+#include "iwl-fw-error-dump.h"
 #include "internal.h"
 
 static u32 iwl_trans_pcie_read_shr(struct iwl_trans *trans, u32 reg)
@@ -103,7 +104,6 @@
 
 /* PCI registers */
 #define PCI_CFG_RETRY_TIMEOUT	0x041
-#define CPU1_CPU2_SEPARATOR_SECTION	0xFFFFCCCC
 
 static void iwl_pcie_apm_config(struct iwl_trans *trans)
 {
@@ -454,6 +454,7 @@
 {
 	int ret;
 	int t = 0;
+	int iter;
 
 	IWL_DEBUG_INFO(trans, "iwl_trans_prepare_card_hw enter\n");
 
@@ -462,18 +463,23 @@
 	if (ret >= 0)
 		return 0;
 
-	/* If HW is not ready, prepare the conditions to check again */
-	iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
-		    CSR_HW_IF_CONFIG_REG_PREPARE);
+	for (iter = 0; iter < 10; iter++) {
+		/* If HW is not ready, prepare the conditions to check again */
+		iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
+			    CSR_HW_IF_CONFIG_REG_PREPARE);
 
-	do {
-		ret = iwl_pcie_set_hw_ready(trans);
-		if (ret >= 0)
-			return 0;
+		do {
+			ret = iwl_pcie_set_hw_ready(trans);
+			if (ret >= 0)
+				return 0;
 
-		usleep_range(200, 1000);
-		t += 200;
-	} while (t < 150000);
+			usleep_range(200, 1000);
+			t += 200;
+		} while (t < 150000);
+		msleep(25);
+	}
+
+	IWL_DEBUG_INFO(trans, "got NIC after %d iterations\n", iter);
 
 	return ret;
 }
@@ -1053,6 +1059,12 @@
 	iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val);
 }
 
+static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
+{
+	WARN_ON(1);
+	return 0;
+}
+
 static void iwl_trans_pcie_configure(struct iwl_trans *trans,
 				     const struct iwl_trans_config *trans_cfg)
 {
@@ -1079,6 +1091,18 @@
 
 	trans_pcie->command_names = trans_cfg->command_names;
 	trans_pcie->bc_table_dword = trans_cfg->bc_table_dword;
+
+	/* Initialize NAPI here - it should be before registering to mac80211
+	 * in the opmode but after the HW struct is allocated.
+	 * As this function may be called again in some corner cases don't
+	 * do anything if NAPI was already initialized.
+	 */
+	if (!trans_pcie->napi.poll && trans->op_mode->ops->napi_add) {
+		init_dummy_netdev(&trans_pcie->napi_dev);
+		iwl_op_mode_napi_add(trans->op_mode, &trans_pcie->napi,
+				     &trans_pcie->napi_dev,
+				     iwl_pcie_dummy_napi_poll, 64);
+	}
 }
 
 void iwl_trans_pcie_free(struct iwl_trans *trans)
@@ -1099,6 +1123,9 @@
 	pci_disable_device(trans_pcie->pci_dev);
 	kmem_cache_destroy(trans->dev_cmd_pool);
 
+	if (trans_pcie->napi.poll)
+		netif_napi_del(&trans_pcie->napi);
+
 	kfree(trans);
 }
 
@@ -1237,7 +1264,7 @@
 
 #define IWL_FLUSH_WAIT_MS	2000
 
-static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans)
+static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
@@ -1250,13 +1277,31 @@
 
 	/* waiting for all the tx frames complete might take a while */
 	for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) {
+		u8 wr_ptr;
+
 		if (cnt == trans_pcie->cmd_queue)
 			continue;
+		if (!test_bit(cnt, trans_pcie->queue_used))
+			continue;
+		if (!(BIT(cnt) & txq_bm))
+			continue;
+
+		IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt);
 		txq = &trans_pcie->txq[cnt];
 		q = &txq->q;
-		while (q->read_ptr != q->write_ptr && !time_after(jiffies,
-		       now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS)))
+		wr_ptr = ACCESS_ONCE(q->write_ptr);
+
+		while (q->read_ptr != ACCESS_ONCE(q->write_ptr) &&
+		       !time_after(jiffies,
+				   now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
+			u8 write_ptr = ACCESS_ONCE(q->write_ptr);
+
+			if (WARN_ONCE(wr_ptr != write_ptr,
+				      "WR pointer moved while flushing %d -> %d\n",
+				      wr_ptr, write_ptr))
+				return -ETIMEDOUT;
 			msleep(1);
+		}
 
 		if (q->read_ptr != q->write_ptr) {
 			IWL_ERR(trans,
@@ -1264,6 +1309,7 @@
 			ret = -ETIMEDOUT;
 			break;
 		}
+		IWL_DEBUG_TX_QUEUES(trans, "Queue %d is now empty.\n", cnt);
 	}
 
 	if (!ret)
@@ -1298,8 +1344,8 @@
 		IWL_ERR(trans,
 			"Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
 			cnt, active ? "" : "in", fifo, tbl_dw,
-			iwl_read_prph(trans,
-				      SCD_QUEUE_RDPTR(cnt)) & (txq->q.n_bd - 1),
+			iwl_read_prph(trans, SCD_QUEUE_RDPTR(cnt)) &
+				(TFD_QUEUE_SIZE_MAX - 1),
 			iwl_read_prph(trans, SCD_QUEUE_WRPTR(cnt)));
 	}
 
@@ -1630,6 +1676,61 @@
 	IWL_ERR(trans, "failed to create the trans debugfs entry\n");
 	return -ENOMEM;
 }
+
+static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd)
+{
+	u32 cmdlen = 0;
+	int i;
+
+	for (i = 0; i < IWL_NUM_OF_TBS; i++)
+		cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i);
+
+	return cmdlen;
+}
+
+static u32 iwl_trans_pcie_dump_data(struct iwl_trans *trans,
+				    void *buf, u32 buflen)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_fw_error_dump_data *data;
+	struct iwl_txq *cmdq = &trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_fw_error_dump_txcmd *txcmd;
+	u32 len;
+	int i, ptr;
+
+	if (!buf)
+		return sizeof(*data) +
+		       cmdq->q.n_window * (sizeof(*txcmd) +
+					   TFD_MAX_PAYLOAD_SIZE);
+
+	len = 0;
+	data = buf;
+	data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD);
+	txcmd = (void *)data->data;
+	spin_lock_bh(&cmdq->lock);
+	ptr = cmdq->q.write_ptr;
+	for (i = 0; i < cmdq->q.n_window; i++) {
+		u8 idx = get_cmd_index(&cmdq->q, ptr);
+		u32 caplen, cmdlen;
+
+		cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]);
+		caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
+
+		if (cmdlen) {
+			len += sizeof(*txcmd) + caplen;
+			txcmd->cmdlen = cpu_to_le32(cmdlen);
+			txcmd->caplen = cpu_to_le32(caplen);
+			memcpy(txcmd->data, cmdq->entries[idx].cmd, caplen);
+			txcmd = (void *)((u8 *)txcmd->data + caplen);
+		}
+
+		ptr = iwl_queue_dec_wrap(ptr);
+	}
+	spin_unlock_bh(&cmdq->lock);
+
+	data->len = cpu_to_le32(len);
+	return sizeof(*data) + len;
+}
 #else
 static int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans,
 					 struct dentry *dir)
@@ -1672,6 +1773,10 @@
 	.grab_nic_access = iwl_trans_pcie_grab_nic_access,
 	.release_nic_access = iwl_trans_pcie_release_nic_access,
 	.set_bits_mask = iwl_trans_pcie_set_bits_mask,
+
+#ifdef CONFIG_IWLWIFI_DEBUGFS
+	.dump_data = iwl_trans_pcie_dump_data,
+#endif
 };
 
 struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,

diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index 3b0c72c..038940a 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c

@@ -70,20 +70,20 @@
 
 	/*
 	 * To avoid ambiguity between empty and completely full queues, there
-	 * should always be less than q->n_bd elements in the queue.
-	 * If q->n_window is smaller than q->n_bd, there is no need to reserve
-	 * any queue entries for this purpose.
+	 * should always be less than TFD_QUEUE_SIZE_MAX elements in the queue.
+	 * If q->n_window is smaller than TFD_QUEUE_SIZE_MAX, there is no need
+	 * to reserve any queue entries for this purpose.
 	 */
-	if (q->n_window < q->n_bd)
+	if (q->n_window < TFD_QUEUE_SIZE_MAX)
 		max = q->n_window;
 	else
-		max = q->n_bd - 1;
+		max = TFD_QUEUE_SIZE_MAX - 1;
 
 	/*
-	 * q->n_bd is a power of 2, so the following is equivalent to modulo by
-	 * q->n_bd and is well defined for negative dividends.
+	 * TFD_QUEUE_SIZE_MAX is a power of 2, so the following is equivalent to
+	 * modulo by TFD_QUEUE_SIZE_MAX and is well defined.
 	 */
-	used = (q->write_ptr - q->read_ptr) & (q->n_bd - 1);
+	used = (q->write_ptr - q->read_ptr) & (TFD_QUEUE_SIZE_MAX - 1);
 
 	if (WARN_ON(used > max))
 		return 0;
@@ -94,17 +94,11 @@
 /*
  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
  */
-static int iwl_queue_init(struct iwl_queue *q, int count, int slots_num, u32 id)
+static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id)
 {
-	q->n_bd = count;
 	q->n_window = slots_num;
 	q->id = id;
 
-	/* count must be power-of-two size, otherwise iwl_queue_inc_wrap
-	 * and iwl_queue_dec_wrap are broken. */
-	if (WARN_ON(!is_power_of_2(count)))
-		return -EINVAL;
-
 	/* slots_num must be power-of-two size, otherwise
 	 * get_cmd_index is broken. */
 	if (WARN_ON(!is_power_of_2(slots_num)))
@@ -197,17 +191,17 @@
 		IWL_ERR(trans,
 			"Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
 			i, active ? "" : "in", fifo, tbl_dw,
-			iwl_read_prph(trans,
-				      SCD_QUEUE_RDPTR(i)) & (txq->q.n_bd - 1),
+			iwl_read_prph(trans, SCD_QUEUE_RDPTR(i)) &
+				(TFD_QUEUE_SIZE_MAX - 1),
 			iwl_read_prph(trans, SCD_QUEUE_WRPTR(i)));
 	}
 
 	for (i = q->read_ptr; i != q->write_ptr;
-	     i = iwl_queue_inc_wrap(i, q->n_bd))
+	     i = iwl_queue_inc_wrap(i))
 		IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
 			le32_to_cpu(txq->scratchbufs[i].scratch));
 
-	iwl_write_prph(trans, DEVICE_SET_NMI_REG, 1);
+	iwl_force_nmi(trans);
 }
 
 /*
@@ -287,14 +281,14 @@
 /*
  * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
  */
-void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq)
+static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
+				    struct iwl_txq *txq)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 reg = 0;
 	int txq_id = txq->q.id;
 
-	if (txq->need_update == 0)
-		return;
+	lockdep_assert_held(&txq->lock);
 
 	/*
 	 * explicitly wake up the NIC if:
@@ -317,6 +311,7 @@
 				       txq_id, reg);
 			iwl_set_bit(trans, CSR_GP_CNTRL,
 				    CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+			txq->need_update = true;
 			return;
 		}
 	}
@@ -327,8 +322,23 @@
 	 */
 	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr);
 	iwl_write32(trans, HBUS_TARG_WRPTR, txq->q.write_ptr | (txq_id << 8));
+}
 
-	txq->need_update = 0;
+void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	int i;
+
+	for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
+		struct iwl_txq *txq = &trans_pcie->txq[i];
+
+		spin_lock_bh(&txq->lock);
+		if (trans_pcie->txq[i].need_update) {
+			iwl_pcie_txq_inc_wr_ptr(trans, txq);
+			trans_pcie->txq[i].need_update = false;
+		}
+		spin_unlock_bh(&txq->lock);
+	}
 }
 
 static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
@@ -343,13 +353,6 @@
 	return addr;
 }
 
-static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
-{
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
-
-	return le16_to_cpu(tb->hi_n_len) >> 4;
-}
-
 static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
 				       dma_addr_t addr, u16 len)
 {
@@ -409,13 +412,17 @@
 {
 	struct iwl_tfd *tfd_tmp = txq->tfds;
 
-	/* rd_ptr is bounded by n_bd and idx is bounded by n_window */
+	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
+	 * idx is bounded by n_window
+	 */
 	int rd_ptr = txq->q.read_ptr;
 	int idx = get_cmd_index(&txq->q, rd_ptr);
 
 	lockdep_assert_held(&txq->lock);
 
-	/* We have only q->n_window txq->entries, but we use q->n_bd tfds */
+	/* We have only q->n_window txq->entries, but we use
+	 * TFD_QUEUE_SIZE_MAX tfds
+	 */
 	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]);
 
 	/* free SKB */
@@ -436,7 +443,7 @@
 }
 
 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-				  dma_addr_t addr, u16 len, u8 reset)
+				  dma_addr_t addr, u16 len, bool reset)
 {
 	struct iwl_queue *q;
 	struct iwl_tfd *tfd, *tfd_tmp;
@@ -542,15 +549,14 @@
 {
 	int ret;
 
-	txq->need_update = 0;
+	txq->need_update = false;
 
 	/* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
 	 * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
 	BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
 
 	/* Initialize queue's high/low-water marks, and head/tail indexes */
-	ret = iwl_queue_init(&txq->q, TFD_QUEUE_SIZE_MAX, slots_num,
-			txq_id);
+	ret = iwl_queue_init(&txq->q, slots_num, txq_id);
 	if (ret)
 		return ret;
 
@@ -575,15 +581,12 @@
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
 	struct iwl_queue *q = &txq->q;
 
-	if (!q->n_bd)
-		return;
-
 	spin_lock_bh(&txq->lock);
 	while (q->write_ptr != q->read_ptr) {
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
 				   txq_id, q->read_ptr);
 		iwl_pcie_txq_free_tfd(trans, txq);
-		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd);
+		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr);
 	}
 	txq->active = false;
 	spin_unlock_bh(&txq->lock);
@@ -620,10 +623,12 @@
 		}
 
 	/* De-alloc circular buffer of TFDs */
-	if (txq->q.n_bd) {
-		dma_free_coherent(dev, sizeof(struct iwl_tfd) *
-				  txq->q.n_bd, txq->tfds, txq->q.dma_addr);
+	if (txq->tfds) {
+		dma_free_coherent(dev,
+				  sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX,
+				  txq->tfds, txq->q.dma_addr);
 		txq->q.dma_addr = 0;
+		txq->tfds = NULL;
 
 		dma_free_coherent(dev,
 				  sizeof(*txq->scratchbufs) * txq->q.n_window,
@@ -680,7 +685,8 @@
 	/* The chain extension of the SCD doesn't work well. This feature is
 	 * enabled by default by the HW, so we need to disable it manually.
 	 */
-	iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
+	if (trans->cfg->base_params->scd_chain_ext_wa)
+		iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
 
 	iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
 				trans_pcie->cmd_fifo);
@@ -931,8 +937,7 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-	/* n_bd is usually 256 => n_bd - 1 = 0xff */
-	int tfd_num = ssn & (txq->q.n_bd - 1);
+	int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1);
 	struct iwl_queue *q = &txq->q;
 	int last_to_free;
 
@@ -956,12 +961,12 @@
 
 	/*Since we free until index _not_ inclusive, the one before index is
 	 * the last we will free. This one must be used */
-	last_to_free = iwl_queue_dec_wrap(tfd_num, q->n_bd);
+	last_to_free = iwl_queue_dec_wrap(tfd_num);
 
 	if (!iwl_queue_used(q, last_to_free)) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
-			__func__, txq_id, last_to_free, q->n_bd,
+			__func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX,
 			q->write_ptr, q->read_ptr);
 		goto out;
 	}
@@ -971,7 +976,7 @@
 
 	for (;
 	     q->read_ptr != tfd_num;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
 
 		if (WARN_ON_ONCE(txq->entries[txq->q.read_ptr].skb == NULL))
 			continue;
@@ -1010,25 +1015,26 @@
 
 	lockdep_assert_held(&txq->lock);
 
-	if ((idx >= q->n_bd) || (!iwl_queue_used(q, idx))) {
+	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
-			__func__, txq_id, idx, q->n_bd,
+			__func__, txq_id, idx, TFD_QUEUE_SIZE_MAX,
 			q->write_ptr, q->read_ptr);
 		return;
 	}
 
-	for (idx = iwl_queue_inc_wrap(idx, q->n_bd); q->read_ptr != idx;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+	for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx;
+	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
 
 		if (nfreed++ > 0) {
 			IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
 				idx, q->write_ptr, q->read_ptr);
-			iwl_write_prph(trans, DEVICE_SET_NMI_REG, 1);
+			iwl_force_nmi(trans);
 		}
 	}
 
-	if (q->read_ptr == q->write_ptr) {
+	if (trans->cfg->base_params->apmg_wake_up_wa &&
+	    q->read_ptr == q->write_ptr) {
 		spin_lock_irqsave(&trans_pcie->reg_lock, flags);
 		WARN_ON(!trans_pcie->cmd_in_flight);
 		trans_pcie->cmd_in_flight = false;
@@ -1309,27 +1315,38 @@
 	cmd_pos = offsetof(struct iwl_device_cmd, payload);
 	copy_size = sizeof(out_cmd->hdr);
 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
-		int copy = 0;
+		int copy;
 
 		if (!cmd->len[i])
 			continue;
 
-		/* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
+		/* copy everything if not nocopy/dup */
+		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
+					   IWL_HCMD_DFL_DUP))) {
+			copy = cmd->len[i];
+
+			memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
+			cmd_pos += copy;
+			copy_size += copy;
+			continue;
+		}
+
+		/*
+		 * Otherwise we need at least IWL_HCMD_SCRATCHBUF_SIZE copied
+		 * in total (for the scratchbuf handling), but copy up to what
+		 * we can fit into the payload for debug dump purposes.
+		 */
+		copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
+
+		memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
+		cmd_pos += copy;
+
+		/* However, treat copy_size the proper way, we need it below */
 		if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
 			copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
 
 			if (copy > cmd->len[i])
 				copy = cmd->len[i];
-		}
-
-		/* copy everything if not nocopy/dup */
-		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
-					   IWL_HCMD_DFL_DUP)))
-			copy = cmd->len[i];
-
-		if (copy) {
-			memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
-			cmd_pos += copy;
 			copy_size += copy;
 		}
 	}
@@ -1345,7 +1362,7 @@
 	memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
 	iwl_pcie_txq_build_tfd(trans, txq,
 			       iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
-			       scratch_size, 1);
+			       scratch_size, true);
 
 	/* map first command fragment, if any remains */
 	if (copy_size > scratch_size) {
@@ -1361,7 +1378,7 @@
 		}
 
 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
-				       copy_size - scratch_size, 0);
+				       copy_size - scratch_size, false);
 	}
 
 	/* map the remaining (adjusted) nocopy/dup fragments */
@@ -1384,7 +1401,7 @@
 			goto out;
 		}
 
-		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], 0);
+		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
 	}
 
 	out_meta->flags = cmd->flags;
@@ -1392,8 +1409,6 @@
 		kfree(txq->entries[idx].free_buf);
 	txq->entries[idx].free_buf = dup_buf;
 
-	txq->need_update = 1;
-
 	trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr);
 
 	/* start timer if queue currently empty */
@@ -1405,9 +1420,11 @@
 	/*
 	 * wake up the NIC to make sure that the firmware will see the host
 	 * command - we will let the NIC sleep once all the host commands
-	 * returned.
+	 * returned. This needs to be done only on NICs that have
+	 * apmg_wake_up_wa set.
 	 */
-	if (!trans_pcie->cmd_in_flight) {
+	if (trans->cfg->base_params->apmg_wake_up_wa &&
+	    !trans_pcie->cmd_in_flight) {
 		trans_pcie->cmd_in_flight = true;
 		__iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
 					 CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
@@ -1427,7 +1444,7 @@
 	}
 
 	/* Increment and update queue's write index */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr, q->n_bd);
+	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
 	iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
 	spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
@@ -1583,7 +1600,7 @@
 			       get_cmd_string(trans_pcie, cmd->id));
 		ret = -ETIMEDOUT;
 
-		iwl_write_prph(trans, DEVICE_SET_NMI_REG, 1);
+		iwl_force_nmi(trans);
 		iwl_trans_fw_error(trans);
 
 		goto cancel;
@@ -1661,7 +1678,7 @@
 	dma_addr_t tb0_phys, tb1_phys, scratch_phys;
 	void *tb1_addr;
 	u16 len, tb1_len, tb2_len;
-	u8 wait_write_ptr = 0;
+	bool wait_write_ptr;
 	__le16 fc = hdr->frame_control;
 	u8 hdr_len = ieee80211_hdrlen(fc);
 	u16 wifi_seq;
@@ -1722,7 +1739,7 @@
 	memcpy(&txq->scratchbufs[q->write_ptr], &dev_cmd->hdr,
 	       IWL_HCMD_SCRATCHBUF_SIZE);
 	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
-			       IWL_HCMD_SCRATCHBUF_SIZE, 1);
+			       IWL_HCMD_SCRATCHBUF_SIZE, true);
 
 	/* there must be data left over for TB1 or this code must be changed */
 	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_HCMD_SCRATCHBUF_SIZE);
@@ -1732,7 +1749,7 @@
 	tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
 		goto out_err;
-	iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, 0);
+	iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
 
 	/*
 	 * Set up TFD's third entry to point directly to remainder
@@ -1748,7 +1765,7 @@
 					   &txq->tfds[q->write_ptr]);
 			goto out_err;
 		}
-		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, 0);
+		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false);
 	}
 
 	/* Set up entry for this TFD in Tx byte-count array */
@@ -1762,12 +1779,7 @@
 	trace_iwlwifi_dev_tx_data(trans->dev, skb,
 				  skb->data + hdr_len, tb2_len);
 
-	if (!ieee80211_has_morefrags(fc)) {
-		txq->need_update = 1;
-	} else {
-		wait_write_ptr = 1;
-		txq->need_update = 0;
-	}
+	wait_write_ptr = ieee80211_has_morefrags(fc);
 
 	/* start timer if queue currently empty */
 	if (txq->need_update && q->read_ptr == q->write_ptr &&
@@ -1775,22 +1787,19 @@
 		mod_timer(&txq->stuck_timer, jiffies + trans_pcie->wd_timeout);
 
 	/* Tell device the write index *just past* this latest filled TFD */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr, q->n_bd);
-	iwl_pcie_txq_inc_wr_ptr(trans, txq);
+	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
+	if (!wait_write_ptr)
+		iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
 	/*
 	 * At this point the frame is "transmitted" successfully
-	 * and we will get a TX status notification eventually,
-	 * regardless of the value of ret. "ret" only indicates
-	 * whether or not we should update the write pointer.
+	 * and we will get a TX status notification eventually.
 	 */
 	if (iwl_queue_space(q) < q->high_mark) {
-		if (wait_write_ptr) {
-			txq->need_update = 1;
+		if (wait_write_ptr)
 			iwl_pcie_txq_inc_wr_ptr(trans, txq);
-		} else {
+		else
 			iwl_stop_queue(trans, txq);
-		}
 	}
 	spin_unlock(&txq->lock);
 	return 0;

diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 54e344a..47a998d 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c

@@ -1006,9 +1006,8 @@
 } __packed;
 
 static int lbs_set_key_material(struct lbs_private *priv,
-				int key_type,
-				int key_info,
-				u8 *key, u16 key_len)
+				int key_type, int key_info,
+				const u8 *key, u16 key_len)
 {
 	struct cmd_key_material cmd;
 	int ret;
@@ -1610,7 +1609,7 @@
  */
 
 static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			       const u8 *mac, struct station_info *sinfo)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
 	s8 signal, noise;

diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h
index ab966f0..407784a 100644
--- a/drivers/net/wireless/libertas/defs.h
+++ b/drivers/net/wireless/libertas/defs.h

@@ -90,7 +90,8 @@
 #define lbs_deb_cfg80211(fmt, args...)  LBS_DEB_LL(LBS_DEB_CFG80211, " cfg80211", fmt, ##args)
 
 #ifdef DEBUG
-static inline void lbs_deb_hex(unsigned int grp, const char *prompt, u8 *buf, int len)
+static inline void lbs_deb_hex(unsigned int grp, const char *prompt,
+			       const u8 *buf, int len)
 {
 	int i = 0;
 

diff --git a/drivers/net/wireless/libertas/rx.c b/drivers/net/wireless/libertas/rx.c
index c7366b0..e446fed 100644
--- a/drivers/net/wireless/libertas/rx.c
+++ b/drivers/net/wireless/libertas/rx.c

@@ -71,8 +71,10 @@
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	if (priv->wdev->iftype == NL80211_IFTYPE_MONITOR)
-		return process_rxed_802_11_packet(priv, skb);
+	if (priv->wdev->iftype == NL80211_IFTYPE_MONITOR) {
+		ret = process_rxed_802_11_packet(priv, skb);
+		goto done;
+	}
 
 	p_rx_pd = (struct rxpd *) skb->data;
 	p_rx_pkt = (struct rxpackethdr *) ((u8 *)p_rx_pd +
@@ -86,7 +88,7 @@
 	if (skb->len < (ETH_HLEN + 8 + sizeof(struct rxpd))) {
 		lbs_deb_rx("rx err: frame received with bad length\n");
 		dev->stats.rx_length_errors++;
-		ret = 0;
+		ret = -EINVAL;
 		dev_kfree_skb(skb);
 		goto done;
 	}

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9d7a52f..a312c65 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c

@@ -1676,7 +1676,9 @@
 	return 0;
 }
 
-static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void mac80211_hwsim_flush(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 u32 queues, bool drop)
 {
 	/* Not implemented, queues only on kernel side */
 }
@@ -2056,6 +2058,7 @@
 			    WIPHY_FLAG_AP_UAPSD |
 			    WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 	hw->wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
+	hw->wiphy->features |= NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE;
 
 	/* ask mac80211 to reserve space for magic */
 	hw->vif_data_size = sizeof(struct hwsim_vif_priv);

diff --git a/drivers/net/wireless/mwifiex/11ac.c b/drivers/net/wireless/mwifiex/11ac.c
index c92f27a..706831d 100644
--- a/drivers/net/wireless/mwifiex/11ac.c
+++ b/drivers/net/wireless/mwifiex/11ac.c

@@ -212,8 +212,7 @@
 				      sizeof(struct mwifiex_ie_types_header));
 			memcpy((u8 *)vht_op +
 				sizeof(struct mwifiex_ie_types_header),
-			       (u8 *)bss_desc->bcn_vht_oper +
-			       sizeof(struct ieee_types_header),
+			       (u8 *)bss_desc->bcn_vht_oper,
 			       le16_to_cpu(vht_op->header.len));
 
 			/* negotiate the channel width and central freq

diff --git a/drivers/net/wireless/mwifiex/11n.c b/drivers/net/wireless/mwifiex/11n.c
index d14ead8..e1c2f67 100644
--- a/drivers/net/wireless/mwifiex/11n.c
+++ b/drivers/net/wireless/mwifiex/11n.c

@@ -345,8 +345,7 @@
 
 			memcpy((u8 *) ht_info +
 			       sizeof(struct mwifiex_ie_types_header),
-			       (u8 *) bss_desc->bcn_ht_oper +
-			       sizeof(struct ieee_types_header),
+			       (u8 *)bss_desc->bcn_ht_oper,
 			       le16_to_cpu(ht_info->header.len));
 
 			if (!(sband->ht_cap.cap &
@@ -750,3 +749,45 @@
 
 	return;
 }
+
+u8 mwifiex_get_sec_chan_offset(int chan)
+{
+	u8 sec_offset;
+
+	switch (chan) {
+	case 36:
+	case 44:
+	case 52:
+	case 60:
+	case 100:
+	case 108:
+	case 116:
+	case 124:
+	case 132:
+	case 140:
+	case 149:
+	case 157:
+		sec_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+		break;
+	case 40:
+	case 48:
+	case 56:
+	case 64:
+	case 104:
+	case 112:
+	case 120:
+	case 128:
+	case 136:
+	case 144:
+	case 153:
+	case 161:
+		sec_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW;
+		break;
+	case 165:
+	default:
+		sec_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+		break;
+	}
+
+	return sec_offset;
+}

diff --git a/drivers/net/wireless/mwifiex/11n.h b/drivers/net/wireless/mwifiex/11n.h
index 40b007a..0b73fa0 100644
--- a/drivers/net/wireless/mwifiex/11n.h
+++ b/drivers/net/wireless/mwifiex/11n.h

@@ -63,6 +63,7 @@
 				int cmd_action,
 				struct mwifiex_ds_11n_amsdu_aggr_ctrl *aa_ctrl);
 void mwifiex_del_tx_ba_stream_tbl_by_ra(struct mwifiex_private *priv, u8 *ra);
+u8 mwifiex_get_sec_chan_offset(int chan);
 
 static inline u8
 mwifiex_is_station_ampdu_allowed(struct mwifiex_private *priv,
@@ -199,7 +200,7 @@
 }
 
 static inline u8
-mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, u8 *ra)
+mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, const u8 *ra)
 {
 	struct mwifiex_sta_node *node = mwifiex_get_sta_entry(priv, ra);
 	if (node)

diff --git a/drivers/net/wireless/mwifiex/11n_aggr.c b/drivers/net/wireless/mwifiex/11n_aggr.c
index 63211707..5b32106 100644
--- a/drivers/net/wireless/mwifiex/11n_aggr.c
+++ b/drivers/net/wireless/mwifiex/11n_aggr.c

@@ -100,6 +100,7 @@
 			    struct sk_buff *skb)
 {
 	struct txpd *local_tx_pd;
+	struct mwifiex_txinfo *tx_info = MWIFIEX_SKB_TXCB(skb);
 
 	skb_push(skb, sizeof(*local_tx_pd));
 
@@ -118,6 +119,9 @@
 	local_tx_pd->tx_pkt_length = cpu_to_le16(skb->len -
 						 sizeof(*local_tx_pd));
 
+	if (tx_info->flags & MWIFIEX_BUF_FLAG_TDLS_PKT)
+		local_tx_pd->flags |= MWIFIEX_TXPD_FLAGS_TDLS_PACKET;
+
 	if (local_tx_pd->tx_control == 0)
 		/* TxCtrl set by user or default */
 		local_tx_pd->tx_control = cpu_to_le32(priv->pkt_tx_ctrl);
@@ -160,6 +164,7 @@
 	int pad = 0, ret;
 	struct mwifiex_tx_param tx_param;
 	struct txpd *ptx_pd = NULL;
+	struct timeval tv;
 	int headroom = adapter->iface_type == MWIFIEX_USB ? 0 : INTF_HEADER_LEN;
 
 	skb_src = skb_peek(&pra_list->skb_head);
@@ -182,8 +187,14 @@
 
 	tx_info_aggr->bss_type = tx_info_src->bss_type;
 	tx_info_aggr->bss_num = tx_info_src->bss_num;
+
+	if (tx_info_src->flags & MWIFIEX_BUF_FLAG_TDLS_PKT)
+		tx_info_aggr->flags |= MWIFIEX_BUF_FLAG_TDLS_PKT;
 	skb_aggr->priority = skb_src->priority;
 
+	do_gettimeofday(&tv);
+	skb_aggr->tstamp = timeval_to_ktime(tv);
+
 	do {
 		/* Check if AMSDU can accommodate this MSDU */
 		if (skb_tailroom(skb_aggr) < (skb_src->len + LLC_SNAP_LEN))
@@ -236,18 +247,11 @@
 		ret = adapter->if_ops.host_to_card(adapter, MWIFIEX_USB_EP_DATA,
 						   skb_aggr, NULL);
 	} else {
-		/*
-		 * Padding per MSDU will affect the length of next
-		 * packet and hence the exact length of next packet
-		 * is uncertain here.
-		 *
-		 * Also, aggregation of transmission buffer, while
-		 * downloading the data to the card, wont gain much
-		 * on the AMSDU packets as the AMSDU packets utilizes
-		 * the transmission buffer space to the maximum
-		 * (adapter->tx_buf_size).
-		 */
-		tx_param.next_pkt_len = 0;
+		if (skb_src)
+			tx_param.next_pkt_len =
+					skb_src->len + sizeof(struct txpd);
+		else
+			tx_param.next_pkt_len = 0;
 
 		ret = adapter->if_ops.host_to_card(adapter, MWIFIEX_TYPE_DATA,
 						   skb_aggr, &tx_param);

diff --git a/drivers/net/wireless/mwifiex/README b/drivers/net/wireless/mwifiex/README
index b9242c3..3b55ce5 100644
--- a/drivers/net/wireless/mwifiex/README
+++ b/drivers/net/wireless/mwifiex/README

@@ -200,4 +200,11 @@
 
 	cat getlog
 
+fw_dump
+	This command is used to dump firmware memory into files.
+	Separate file will be created for each memory segment.
+	Usage:
+
+	cat fw_dump
+
 ===============================================================================

diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 21ee27a..e95dec9 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c

@@ -994,7 +994,7 @@
  */
 static int
 mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *mac, struct station_info *sinfo)
+			     const u8 *mac, struct station_info *sinfo)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -1270,7 +1270,7 @@
  */
 static int
 mwifiex_cfg80211_del_station(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *mac)
+			     const u8 *mac)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 	struct mwifiex_sta_node *sta_node;
@@ -2629,7 +2629,7 @@
  */
 static int
 mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, u8 action_code, u8 dialog_token,
+			   const u8 *peer, u8 action_code, u8 dialog_token,
 			   u16 status_code, u32 peer_capability,
 			   const u8 *extra_ies, size_t extra_ies_len)
 {
@@ -2701,7 +2701,7 @@
 
 static int
 mwifiex_cfg80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, enum nl80211_tdls_operation action)
+			   const u8 *peer, enum nl80211_tdls_operation action)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -2748,9 +2748,8 @@
 }
 
 static int
-mwifiex_cfg80211_add_station(struct wiphy *wiphy,
-			     struct net_device *dev,
-			     u8 *mac, struct station_parameters *params)
+mwifiex_cfg80211_add_station(struct wiphy *wiphy, struct net_device *dev,
+			     const u8 *mac, struct station_parameters *params)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -2765,9 +2764,9 @@
 }
 
 static int
-mwifiex_cfg80211_change_station(struct wiphy *wiphy,
-				struct net_device *dev,
-				u8 *mac, struct station_parameters *params)
+mwifiex_cfg80211_change_station(struct wiphy *wiphy, struct net_device *dev,
+				const u8 *mac,
+				struct station_parameters *params)
 {
 	int ret;
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);

diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c
index 1062c91..8dee6c8 100644
--- a/drivers/net/wireless/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/mwifiex/cmdevt.c

@@ -955,8 +955,6 @@
 			adapter->cmd_wait_q.status = -ETIMEDOUT;
 			wake_up_interruptible(&adapter->cmd_wait_q.wait);
 			mwifiex_cancel_pending_ioctl(adapter);
-			/* reset cmd_sent flag to unblock new commands */
-			adapter->cmd_sent = false;
 		}
 	}
 	if (adapter->hw_status == MWIFIEX_HW_STATUS_INITIALIZING)

diff --git a/drivers/net/wireless/mwifiex/debugfs.c b/drivers/net/wireless/mwifiex/debugfs.c
index b8a49aa..7b419bb 100644
--- a/drivers/net/wireless/mwifiex/debugfs.c
+++ b/drivers/net/wireless/mwifiex/debugfs.c

@@ -257,6 +257,29 @@
 }
 
 /*
+ * Proc firmware dump read handler.
+ *
+ * This function is called when the 'fw_dump' file is opened for
+ * reading.
+ * This function dumps firmware memory in different files
+ * (ex. DTCM, ITCM, SQRAM etc.) based on the the segments for
+ * debugging.
+ */
+static ssize_t
+mwifiex_fw_dump_read(struct file *file, char __user *ubuf,
+		     size_t count, loff_t *ppos)
+{
+	struct mwifiex_private *priv = file->private_data;
+
+	if (!priv->adapter->if_ops.fw_dump)
+		return -EIO;
+
+	priv->adapter->if_ops.fw_dump(priv->adapter);
+
+	return 0;
+}
+
+/*
  * Proc getlog file read handler.
  *
  * This function is called when the 'getlog' file is opened for reading
@@ -699,6 +722,7 @@
 MWIFIEX_DFS_FILE_READ_OPS(info);
 MWIFIEX_DFS_FILE_READ_OPS(debug);
 MWIFIEX_DFS_FILE_READ_OPS(getlog);
+MWIFIEX_DFS_FILE_READ_OPS(fw_dump);
 MWIFIEX_DFS_FILE_OPS(regrdwr);
 MWIFIEX_DFS_FILE_OPS(rdeeprom);
 
@@ -722,6 +746,7 @@
 	MWIFIEX_DFS_ADD_FILE(getlog);
 	MWIFIEX_DFS_ADD_FILE(regrdwr);
 	MWIFIEX_DFS_ADD_FILE(rdeeprom);
+	MWIFIEX_DFS_ADD_FILE(fw_dump);
 }
 
 /*

diff --git a/drivers/net/wireless/mwifiex/decl.h b/drivers/net/wireless/mwifiex/decl.h
index e7b3e16..38da6ff 100644
--- a/drivers/net/wireless/mwifiex/decl.h
+++ b/drivers/net/wireless/mwifiex/decl.h

@@ -42,12 +42,12 @@
 #define MWIFIEX_MAX_TX_BASTREAM_SUPPORTED	2
 #define MWIFIEX_MAX_RX_BASTREAM_SUPPORTED	16
 
-#define MWIFIEX_STA_AMPDU_DEF_TXWINSIZE        16
-#define MWIFIEX_STA_AMPDU_DEF_RXWINSIZE        32
+#define MWIFIEX_STA_AMPDU_DEF_TXWINSIZE        64
+#define MWIFIEX_STA_AMPDU_DEF_RXWINSIZE        64
 #define MWIFIEX_UAP_AMPDU_DEF_TXWINSIZE        32
 #define MWIFIEX_UAP_AMPDU_DEF_RXWINSIZE        16
-#define MWIFIEX_11AC_STA_AMPDU_DEF_TXWINSIZE   32
-#define MWIFIEX_11AC_STA_AMPDU_DEF_RXWINSIZE   48
+#define MWIFIEX_11AC_STA_AMPDU_DEF_TXWINSIZE   64
+#define MWIFIEX_11AC_STA_AMPDU_DEF_RXWINSIZE   64
 #define MWIFIEX_11AC_UAP_AMPDU_DEF_TXWINSIZE   48
 #define MWIFIEX_11AC_UAP_AMPDU_DEF_RXWINSIZE   32
 

diff --git a/drivers/net/wireless/mwifiex/fw.h b/drivers/net/wireless/mwifiex/fw.h
index b485dc1..3175dd0 100644
--- a/drivers/net/wireless/mwifiex/fw.h
+++ b/drivers/net/wireless/mwifiex/fw.h

@@ -169,6 +169,7 @@
 #define TLV_TYPE_GWK_CIPHER         (PROPRIETARY_TLV_BASE_ID + 146)
 #define TLV_TYPE_COALESCE_RULE      (PROPRIETARY_TLV_BASE_ID + 154)
 #define TLV_TYPE_KEY_PARAM_V2       (PROPRIETARY_TLV_BASE_ID + 156)
+#define TLV_TYPE_TDLS_IDLE_TIMEOUT  (PROPRIETARY_TLV_BASE_ID + 194)
 #define TLV_TYPE_FW_API_REV         (PROPRIETARY_TLV_BASE_ID + 199)
 
 #define MWIFIEX_TX_DATA_BUF_SIZE_2K        2048
@@ -229,6 +230,7 @@
 #define ISENABLED_40MHZ_INTOLERANT(Dot11nDevCap) (Dot11nDevCap & BIT(8))
 #define ISSUPP_RXLDPC(Dot11nDevCap) (Dot11nDevCap & BIT(22))
 #define ISSUPP_BEAMFORMING(Dot11nDevCap) (Dot11nDevCap & BIT(30))
+#define ISALLOWED_CHANWIDTH40(ht_param) (ht_param & BIT(2))
 
 /* httxcfg bitmap
  * 0		reserved
@@ -403,7 +405,7 @@
 #define HS_CFG_CANCEL			0xffffffff
 #define HS_CFG_COND_DEF			0x00000000
 #define HS_CFG_GPIO_DEF			0xff
-#define HS_CFG_GAP_DEF			0
+#define HS_CFG_GAP_DEF			0xff
 #define HS_CFG_COND_BROADCAST_DATA	0x00000001
 #define HS_CFG_COND_UNICAST_DATA	0x00000002
 #define HS_CFG_COND_MAC_EVENT		0x00000004
@@ -487,6 +489,7 @@
 #define EVENT_UAP_MIC_COUNTERMEASURES   0x0000004c
 #define EVENT_HOSTWAKE_STAIE		0x0000004d
 #define EVENT_CHANNEL_SWITCH_ANN        0x00000050
+#define EVENT_TDLS_GENERIC_EVENT        0x00000052
 #define EVENT_EXT_SCAN_REPORT           0x00000058
 #define EVENT_REMAIN_ON_CHAN_EXPIRED    0x0000005f
 
@@ -519,6 +522,7 @@
 #define ACT_TDLS_DELETE            0x00
 #define ACT_TDLS_CREATE            0x01
 #define ACT_TDLS_CONFIG            0x02
+#define TDLS_EVENT_LINK_TEAR_DOWN  3
 
 #define MWIFIEX_FW_V15		   15
 
@@ -535,6 +539,7 @@
 #define MWIFIEX_TxPD_POWER_MGMT_NULL_PACKET 0x01
 #define MWIFIEX_TxPD_POWER_MGMT_LAST_PACKET 0x08
 #define MWIFIEX_TXPD_FLAGS_TDLS_PACKET      0x10
+#define MWIFIEX_RXPD_FLAGS_TDLS_PACKET      0x01
 
 struct txpd {
 	u8 bss_type;
@@ -577,7 +582,7 @@
 	 * [Bit 7] Reserved
 	 */
 	u8 ht_info;
-	u8 reserved;
+	u8 flags;
 } __packed;
 
 struct uap_txpd {
@@ -708,6 +713,13 @@
 	u8 ie[MWIFIEX_MAX_VSIE_LEN];
 };
 
+#define MWIFIEX_TDLS_IDLE_TIMEOUT	60
+
+struct mwifiex_ie_types_tdls_idle_timeout {
+	struct mwifiex_ie_types_header header;
+	__le16 value;
+} __packed;
+
 struct mwifiex_ie_types_rsn_param_set {
 	struct mwifiex_ie_types_header header;
 	u8 rsn_ie[1];
@@ -1745,6 +1757,15 @@
 	__le16 events;
 } __packed;
 
+struct mwifiex_tdls_generic_event {
+	__le16 type;
+	u8 peer_mac[ETH_ALEN];
+	union {
+		__le16 reason_code;
+		__le16 reserved;
+	} u;
+} __packed;
+
 struct mwifiex_ie {
 	__le16 ie_index;
 	__le16 mgmt_subtype_mask;

diff --git a/drivers/net/wireless/mwifiex/ioctl.h b/drivers/net/wireless/mwifiex/ioctl.h
index ee494db..1b57672 100644
--- a/drivers/net/wireless/mwifiex/ioctl.h
+++ b/drivers/net/wireless/mwifiex/ioctl.h

@@ -303,7 +303,7 @@
 	u32 rx_ant;
 };
 
-#define MWIFIEX_NUM_OF_CMD_BUFFER	20
+#define MWIFIEX_NUM_OF_CMD_BUFFER	50
 #define MWIFIEX_SIZE_OF_CMD_BUFFER	2048
 
 enum {

diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index 9c771b3..cbabc12 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c

@@ -521,7 +521,6 @@
 		release_firmware(adapter->firmware);
 		adapter->firmware = NULL;
 	}
-	complete(&adapter->fw_load);
 	if (init_failed)
 		mwifiex_free_adapter(adapter);
 	up(sem);
@@ -535,7 +534,6 @@
 {
 	int ret;
 
-	init_completion(&adapter->fw_load);
 	ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name,
 				      adapter->dev, GFP_KERNEL, adapter,
 				      mwifiex_fw_dpc);

diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index d53e1e8..1398afa 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h

@@ -672,6 +672,7 @@
 	int (*init_fw_port) (struct mwifiex_adapter *);
 	int (*dnld_fw) (struct mwifiex_adapter *, struct mwifiex_fw_image *);
 	void (*card_reset) (struct mwifiex_adapter *);
+	void (*fw_dump)(struct mwifiex_adapter *);
 	int (*clean_pcie_ring) (struct mwifiex_adapter *adapter);
 };
 
@@ -787,7 +788,6 @@
 	struct mwifiex_wait_queue cmd_wait_q;
 	u8 scan_wait_q_woken;
 	spinlock_t queue_lock;		/* lock for tx queues */
-	struct completion fw_load;
 	u8 country_code[IEEE80211_COUNTRY_STRING_LEN];
 	u16 max_mgmt_ie_index;
 	u8 scan_delay_cnt;
@@ -910,8 +910,6 @@
 				  struct sk_buff *skb);
 int mwifiex_process_sta_event(struct mwifiex_private *);
 int mwifiex_process_uap_event(struct mwifiex_private *);
-struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac);
 void mwifiex_delete_all_station_list(struct mwifiex_private *priv);
 void *mwifiex_process_sta_txpd(struct mwifiex_private *, struct sk_buff *skb);
 void *mwifiex_process_uap_txpd(struct mwifiex_private *, struct sk_buff *skb);
@@ -1101,7 +1099,7 @@
 		return 0;
 
 	/* Clear csa channel, if DFS channel move time has passed */
-	if (jiffies > priv->csa_expire_time) {
+	if (time_after(jiffies, priv->csa_expire_time)) {
 		priv->csa_chan = 0;
 		priv->csa_expire_time = 0;
 	}
@@ -1220,26 +1218,26 @@
 extern const struct ethtool_ops mwifiex_ethtool_ops;
 
 void mwifiex_del_all_sta_list(struct mwifiex_private *priv);
-void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac);
+void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac);
 void
 mwifiex_set_sta_ht_cap(struct mwifiex_private *priv, const u8 *ies,
 		       int ies_len, struct mwifiex_sta_node *node);
 struct mwifiex_sta_node *
-mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac);
+mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac);
 struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac);
-int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, u8 *peer,
+mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac);
+int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
 				 u8 action_code, u8 dialog_token,
 				 u16 status_code, const u8 *extra_ies,
 				 size_t extra_ies_len);
-int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
-				 u16 status_code, const u8 *extra_ies,
-				 size_t extra_ies_len);
+int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer,
+				   u8 action_code, u8 dialog_token,
+				   u16 status_code, const u8 *extra_ies,
+				   size_t extra_ies_len);
 void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv,
 				       u8 *buf, int len);
-int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action);
-int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac);
+int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action);
+int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac);
 void mwifiex_disable_all_tdls_links(struct mwifiex_private *priv);
 bool mwifiex_is_bss_in_11ac_mode(struct mwifiex_private *priv);
 u8 mwifiex_get_center_freq_index(struct mwifiex_private *priv, u8 band,

diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c
index a7e8b96..574d4b5 100644
--- a/drivers/net/wireless/mwifiex/pcie.c
+++ b/drivers/net/wireless/mwifiex/pcie.c

@@ -221,9 +221,6 @@
 	if (!adapter || !adapter->priv_num)
 		return;
 
-	/* In case driver is removed when asynchronous FW load is in progress */
-	wait_for_completion(&adapter->fw_load);
-
 	if (user_rmmod) {
 #ifdef CONFIG_PM_SLEEP
 		if (adapter->is_suspended)
@@ -1074,6 +1071,7 @@
  * is mapped to PCI device memory. Tx ring pointers are advanced accordingly.
  * Download ready interrupt to FW is deffered if Tx ring is not full and
  * additional payload can be accomodated.
+ * Caller must ensure tx_param parameter to this function is not NULL.
  */
 static int
 mwifiex_pcie_send_data(struct mwifiex_adapter *adapter, struct sk_buff *skb,

diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c
index 7b3af3d..45c5b34 100644
--- a/drivers/net/wireless/mwifiex/scan.c
+++ b/drivers/net/wireless/mwifiex/scan.c

@@ -29,9 +29,6 @@
 #define MWIFIEX_MAX_CHANNELS_PER_SPECIFIC_SCAN   14
 
 #define MWIFIEX_DEF_CHANNELS_PER_SCAN_CMD	4
-#define MWIFIEX_LIMIT_1_CHANNEL_PER_SCAN_CMD	15
-#define MWIFIEX_LIMIT_2_CHANNELS_PER_SCAN_CMD	27
-#define MWIFIEX_LIMIT_3_CHANNELS_PER_SCAN_CMD	35
 
 /* Memory needed to store a max sized Channel List TLV for a firmware scan */
 #define CHAN_TLV_MAX_SIZE  (sizeof(struct mwifiex_ie_types_header)         \
@@ -1055,20 +1052,10 @@
 
 	/*
 	 * In associated state we will reduce the number of channels scanned per
-	 * scan command to avoid any traffic delay/loss. This number is decided
-	 * based on total number of channels to be scanned due to constraints
-	 * of command buffers.
+	 * scan command to 1 to avoid any traffic delay/loss.
 	 */
-	if (priv->media_connected) {
-		if (chan_num < MWIFIEX_LIMIT_1_CHANNEL_PER_SCAN_CMD)
+	if (priv->media_connected)
 			*max_chan_per_scan = 1;
-		else if (chan_num < MWIFIEX_LIMIT_2_CHANNELS_PER_SCAN_CMD)
-			*max_chan_per_scan = 2;
-		else if (chan_num < MWIFIEX_LIMIT_3_CHANNELS_PER_SCAN_CMD)
-			*max_chan_per_scan = 3;
-		else
-			*max_chan_per_scan = 4;
-	}
 }
 
 /*
@@ -1353,23 +1340,17 @@
 					      bss_entry->beacon_buf);
 			break;
 		case WLAN_EID_BSS_COEX_2040:
-			bss_entry->bcn_bss_co_2040 = current_ptr +
-				sizeof(struct ieee_types_header);
-			bss_entry->bss_co_2040_offset = (u16) (current_ptr +
-					sizeof(struct ieee_types_header) -
-						bss_entry->beacon_buf);
+			bss_entry->bcn_bss_co_2040 = current_ptr;
+			bss_entry->bss_co_2040_offset =
+				(u16) (current_ptr - bss_entry->beacon_buf);
 			break;
 		case WLAN_EID_EXT_CAPABILITY:
-			bss_entry->bcn_ext_cap = current_ptr +
-				sizeof(struct ieee_types_header);
-			bss_entry->ext_cap_offset = (u16) (current_ptr +
-					sizeof(struct ieee_types_header) -
-					bss_entry->beacon_buf);
+			bss_entry->bcn_ext_cap = current_ptr;
+			bss_entry->ext_cap_offset =
+				(u16) (current_ptr - bss_entry->beacon_buf);
 			break;
 		case WLAN_EID_OPMODE_NOTIF:
-			bss_entry->oper_mode =
-				(void *)(current_ptr +
-					 sizeof(struct ieee_types_header));
+			bss_entry->oper_mode = (void *)current_ptr;
 			bss_entry->oper_mode_offset =
 					(u16)((u8 *)bss_entry->oper_mode -
 					      bss_entry->beacon_buf);
@@ -1757,6 +1738,19 @@
 	return 0;
 }
 
+static void mwifiex_complete_scan(struct mwifiex_private *priv)
+{
+	struct mwifiex_adapter *adapter = priv->adapter;
+
+	if (adapter->curr_cmd->wait_q_enabled) {
+		adapter->cmd_wait_q.status = 0;
+		if (!priv->scan_request) {
+			dev_dbg(adapter->dev, "complete internal scan\n");
+			mwifiex_complete_cmd(adapter, adapter->curr_cmd);
+		}
+	}
+}
+
 static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 {
 	struct mwifiex_adapter *adapter = priv->adapter;
@@ -1770,16 +1764,9 @@
 		adapter->scan_processing = false;
 		spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, flags);
 
-		/* Need to indicate IOCTL complete */
-		if (adapter->curr_cmd->wait_q_enabled) {
-			adapter->cmd_wait_q.status = 0;
-			if (!priv->scan_request) {
-				dev_dbg(adapter->dev,
-					"complete internal scan\n");
-				mwifiex_complete_cmd(adapter,
-						     adapter->curr_cmd);
-			}
-		}
+		if (!adapter->ext_scan)
+			mwifiex_complete_scan(priv);
+
 		if (priv->report_scan_result)
 			priv->report_scan_result = false;
 
@@ -1984,6 +1971,9 @@
 int mwifiex_ret_802_11_scan_ext(struct mwifiex_private *priv)
 {
 	dev_dbg(priv->adapter->dev, "info: EXT scan returns successfully\n");
+
+	mwifiex_complete_scan(priv);
+
 	return 0;
 }
 

diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c
index d206f04..4ce3d7b 100644
--- a/drivers/net/wireless/mwifiex/sdio.c
+++ b/drivers/net/wireless/mwifiex/sdio.c

@@ -85,6 +85,8 @@
 		card->supports_sdio_new_mode = data->supports_sdio_new_mode;
 		card->has_control_mask = data->has_control_mask;
 		card->tx_buf_size = data->tx_buf_size;
+		card->mp_tx_agg_buf_size = data->mp_tx_agg_buf_size;
+		card->mp_rx_agg_buf_size = data->mp_rx_agg_buf_size;
 	}
 
 	sdio_claim_host(func);
@@ -177,9 +179,6 @@
 	if (!adapter || !adapter->priv_num)
 		return;
 
-	/* In case driver is removed when asynchronous FW load is in progress */
-	wait_for_completion(&adapter->fw_load);
-
 	if (user_rmmod) {
 		if (adapter->is_suspended)
 			mwifiex_sdio_resume(adapter->dev);
@@ -1679,8 +1678,12 @@
 	if (ret) {
 		if (type == MWIFIEX_TYPE_CMD)
 			adapter->cmd_sent = false;
-		if (type == MWIFIEX_TYPE_DATA)
+		if (type == MWIFIEX_TYPE_DATA) {
 			adapter->data_sent = false;
+			/* restore curr_wr_port in error cases */
+			card->curr_wr_port = port;
+			card->mp_wr_bitmap |= (u32)(1 << card->curr_wr_port);
+		}
 	} else {
 		if (type == MWIFIEX_TYPE_DATA) {
 			if (!(card->mp_wr_bitmap & (1 << card->curr_wr_port)))
@@ -1842,8 +1845,8 @@
 	card->mpa_rx.len_arr = kzalloc(sizeof(*card->mpa_rx.len_arr) *
 				       card->mp_agg_pkt_limit, GFP_KERNEL);
 	ret = mwifiex_alloc_sdio_mpa_buffers(adapter,
-					     SDIO_MP_TX_AGGR_DEF_BUF_SIZE,
-					     SDIO_MP_RX_AGGR_DEF_BUF_SIZE);
+					     card->mp_tx_agg_buf_size,
+					     card->mp_rx_agg_buf_size);
 	if (ret) {
 		dev_err(adapter->dev, "failed to alloc sdio mp-a buffers\n");
 		kfree(card->mp_regs);

diff --git a/drivers/net/wireless/mwifiex/sdio.h b/drivers/net/wireless/mwifiex/sdio.h
index c71201b..6eea30b 100644
--- a/drivers/net/wireless/mwifiex/sdio.h
+++ b/drivers/net/wireless/mwifiex/sdio.h

@@ -64,10 +64,8 @@
 #define UP_LD_CMD_PORT_HOST_INT_STATUS	(0x40U)
 #define DN_LD_CMD_PORT_HOST_INT_STATUS	(0x80U)
 
-#define SDIO_MP_TX_AGGR_DEF_BUF_SIZE        (8192)	/* 8K */
-
-/* Multi port RX aggregation buffer size */
-#define SDIO_MP_RX_AGGR_DEF_BUF_SIZE        (16384)	/* 16K */
+#define MWIFIEX_MP_AGGR_BUF_SIZE_16K	(16384)
+#define MWIFIEX_MP_AGGR_BUF_SIZE_32K	(32768)
 
 /* Misc. Config Register : Auto Re-enable interrupts */
 #define AUTO_RE_ENABLE_INT              BIT(4)
@@ -234,6 +232,8 @@
 	bool supports_sdio_new_mode;
 	bool has_control_mask;
 	u16 tx_buf_size;
+	u32 mp_tx_agg_buf_size;
+	u32 mp_rx_agg_buf_size;
 
 	u32 mp_rd_bitmap;
 	u32 mp_wr_bitmap;
@@ -258,6 +258,8 @@
 	bool supports_sdio_new_mode;
 	bool has_control_mask;
 	u16 tx_buf_size;
+	u32 mp_tx_agg_buf_size;
+	u32 mp_rx_agg_buf_size;
 };
 
 static const struct mwifiex_sdio_card_reg mwifiex_reg_sd87xx = {
@@ -315,6 +317,8 @@
 	.supports_sdio_new_mode = false,
 	.has_control_mask = true,
 	.tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K,
+	.mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
+	.mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
 };
 
 static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = {
@@ -325,6 +329,8 @@
 	.supports_sdio_new_mode = false,
 	.has_control_mask = true,
 	.tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K,
+	.mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
+	.mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
 };
 
 static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = {
@@ -335,6 +341,8 @@
 	.supports_sdio_new_mode = false,
 	.has_control_mask = true,
 	.tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K,
+	.mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
+	.mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_16K,
 };
 
 static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = {
@@ -345,6 +353,8 @@
 	.supports_sdio_new_mode = true,
 	.has_control_mask = false,
 	.tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K,
+	.mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_32K,
+	.mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_32K,
 };
 
 /*

diff --git a/drivers/net/wireless/mwifiex/sta_cmd.c b/drivers/net/wireless/mwifiex/sta_cmd.c
index e3cac14..88202ce 100644
--- a/drivers/net/wireless/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/mwifiex/sta_cmd.c

@@ -1546,6 +1546,7 @@
 	struct mwifiex_ie_types_extcap *extcap;
 	struct mwifiex_ie_types_vhtcap *vht_capab;
 	struct mwifiex_ie_types_aid *aid;
+	struct mwifiex_ie_types_tdls_idle_timeout *timeout;
 	u8 *pos, qos_info;
 	u16 config_len = 0;
 	struct station_parameters *params = priv->sta_params;
@@ -1643,6 +1644,12 @@
 			config_len += sizeof(struct mwifiex_ie_types_aid);
 		}
 
+		timeout = (void *)(pos + config_len);
+		timeout->header.type = cpu_to_le16(TLV_TYPE_TDLS_IDLE_TIMEOUT);
+		timeout->header.len = cpu_to_le16(sizeof(timeout->value));
+		timeout->value = cpu_to_le16(MWIFIEX_TDLS_IDLE_TIMEOUT);
+		config_len += sizeof(struct mwifiex_ie_types_tdls_idle_timeout);
+
 		break;
 	default:
 		dev_err(priv->adapter->dev, "Unknown TDLS operation\n");

diff --git a/drivers/net/wireless/mwifiex/sta_cmdresp.c b/drivers/net/wireless/mwifiex/sta_cmdresp.c
index bfebb01..577f297 100644
--- a/drivers/net/wireless/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/mwifiex/sta_cmdresp.c

@@ -865,14 +865,20 @@
 
 	switch (action) {
 	case ACT_TDLS_DELETE:
-		if (reason)
-			dev_err(priv->adapter->dev,
-				"TDLS link delete for %pM failed: reason %d\n",
-				cmd_tdls_oper->peer_mac, reason);
-		else
+		if (reason) {
+			if (!node || reason == TDLS_ERR_LINK_NONEXISTENT)
+				dev_dbg(priv->adapter->dev,
+					"TDLS link delete for %pM failed: reason %d\n",
+					cmd_tdls_oper->peer_mac, reason);
+			else
+				dev_err(priv->adapter->dev,
+					"TDLS link delete for %pM failed: reason %d\n",
+					cmd_tdls_oper->peer_mac, reason);
+		} else {
 			dev_dbg(priv->adapter->dev,
-				"TDLS link config for %pM successful\n",
+				"TDLS link delete for %pM successful\n",
 				cmd_tdls_oper->peer_mac);
+		}
 		break;
 	case ACT_TDLS_CREATE:
 		if (reason) {

diff --git a/drivers/net/wireless/mwifiex/sta_event.c b/drivers/net/wireless/mwifiex/sta_event.c
index 368450c..f6395ef 100644
--- a/drivers/net/wireless/mwifiex/sta_event.c
+++ b/drivers/net/wireless/mwifiex/sta_event.c

@@ -134,6 +134,46 @@
 		netif_carrier_off(priv->netdev);
 }
 
+static int mwifiex_parse_tdls_event(struct mwifiex_private *priv,
+				    struct sk_buff *event_skb)
+{
+	int ret = 0;
+	struct mwifiex_adapter *adapter = priv->adapter;
+	struct mwifiex_sta_node *sta_ptr;
+	struct mwifiex_tdls_generic_event *tdls_evt =
+			(void *)event_skb->data + sizeof(adapter->event_cause);
+
+	/* reserved 2 bytes are not mandatory in tdls event */
+	if (event_skb->len < (sizeof(struct mwifiex_tdls_generic_event) -
+			      sizeof(u16) - sizeof(adapter->event_cause))) {
+		dev_err(adapter->dev, "Invalid event length!\n");
+		return -1;
+	}
+
+	sta_ptr = mwifiex_get_sta_entry(priv, tdls_evt->peer_mac);
+	if (!sta_ptr) {
+		dev_err(adapter->dev, "cannot get sta entry!\n");
+		return -1;
+	}
+
+	switch (le16_to_cpu(tdls_evt->type)) {
+	case TDLS_EVENT_LINK_TEAR_DOWN:
+		cfg80211_tdls_oper_request(priv->netdev,
+					   tdls_evt->peer_mac,
+					   NL80211_TDLS_TEARDOWN,
+					   le16_to_cpu(tdls_evt->u.reason_code),
+					   GFP_KERNEL);
+		ret = mwifiex_tdls_oper(priv, tdls_evt->peer_mac,
+					MWIFIEX_TDLS_DISABLE_LINK);
+		queue_work(adapter->workqueue, &adapter->main_work);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 /*
  * This function handles events generated by firmware.
  *
@@ -459,6 +499,10 @@
 			false);
 		break;
 
+	case EVENT_TDLS_GENERIC_EVENT:
+		ret = mwifiex_parse_tdls_event(priv, adapter->event_skb);
+		break;
+
 	default:
 		dev_dbg(adapter->dev, "event: unknown event id: %#x\n",
 			eventcause);

diff --git a/drivers/net/wireless/mwifiex/sta_rx.c b/drivers/net/wireless/mwifiex/sta_rx.c
index ed26387..8b639d7 100644
--- a/drivers/net/wireless/mwifiex/sta_rx.c
+++ b/drivers/net/wireless/mwifiex/sta_rx.c

@@ -183,6 +183,7 @@
 	struct rx_packet_hdr *rx_pkt_hdr;
 	u8 ta[ETH_ALEN];
 	u16 rx_pkt_type, rx_pkt_offset, rx_pkt_length, seq_num;
+	struct mwifiex_sta_node *sta_ptr;
 
 	local_rx_pd = (struct rxpd *) (skb->data);
 	rx_pkt_type = le16_to_cpu(local_rx_pd->rx_pkt_type);
@@ -213,14 +214,25 @@
 	 * If the packet is not an unicast packet then send the packet
 	 * directly to os. Don't pass thru rx reordering
 	 */
-	if (!IS_11N_ENABLED(priv) ||
+	if ((!IS_11N_ENABLED(priv) &&
+	     !(ISSUPP_TDLS_ENABLED(priv->adapter->fw_cap_info) &&
+	       !(local_rx_pd->flags & MWIFIEX_RXPD_FLAGS_TDLS_PACKET))) ||
 	    !ether_addr_equal_unaligned(priv->curr_addr, rx_pkt_hdr->eth803_hdr.h_dest)) {
 		mwifiex_process_rx_packet(priv, skb);
 		return ret;
 	}
 
-	if (mwifiex_queuing_ra_based(priv)) {
+	if (mwifiex_queuing_ra_based(priv) ||
+	    (ISSUPP_TDLS_ENABLED(priv->adapter->fw_cap_info) &&
+	     local_rx_pd->flags & MWIFIEX_RXPD_FLAGS_TDLS_PACKET)) {
 		memcpy(ta, rx_pkt_hdr->eth803_hdr.h_source, ETH_ALEN);
+		if (local_rx_pd->flags & MWIFIEX_RXPD_FLAGS_TDLS_PACKET &&
+		    local_rx_pd->priority < MAX_NUM_TID) {
+			sta_ptr = mwifiex_get_sta_entry(priv, ta);
+			if (sta_ptr)
+				sta_ptr->rx_seq[local_rx_pd->priority] =
+					      le16_to_cpu(local_rx_pd->seq_num);
+		}
 	} else {
 		if (rx_pkt_type != PKT_TYPE_BAR)
 			priv->rx_seq[local_rx_pd->priority] = seq_num;

diff --git a/drivers/net/wireless/mwifiex/sta_tx.c b/drivers/net/wireless/mwifiex/sta_tx.c
index 1236a5d..5fce7e7 100644
--- a/drivers/net/wireless/mwifiex/sta_tx.c
+++ b/drivers/net/wireless/mwifiex/sta_tx.c

@@ -128,6 +128,7 @@
 {
 	struct mwifiex_adapter *adapter = priv->adapter;
 	struct txpd *local_tx_pd;
+	struct mwifiex_tx_param tx_param;
 /* sizeof(struct txpd) + Interface specific header */
 #define NULL_PACKET_HDR 64
 	u32 data_len = NULL_PACKET_HDR;
@@ -168,8 +169,9 @@
 						   skb, NULL);
 	} else {
 		skb_push(skb, INTF_HEADER_LEN);
+		tx_param.next_pkt_len = 0;
 		ret = adapter->if_ops.host_to_card(adapter, MWIFIEX_TYPE_DATA,
-						   skb, NULL);
+						   skb, &tx_param);
 	}
 	switch (ret) {
 	case -EBUSY:

diff --git a/drivers/net/wireless/mwifiex/tdls.c b/drivers/net/wireless/mwifiex/tdls.c
index 97662a1..e73034f 100644
--- a/drivers/net/wireless/mwifiex/tdls.c
+++ b/drivers/net/wireless/mwifiex/tdls.c

@@ -25,8 +25,8 @@
 #define TDLS_RESP_FIX_LEN     8
 #define TDLS_CONFIRM_FIX_LEN  6
 
-static void
-mwifiex_restore_tdls_packets(struct mwifiex_private *priv, u8 *mac, u8 status)
+static void mwifiex_restore_tdls_packets(struct mwifiex_private *priv,
+					 const u8 *mac, u8 status)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 	struct list_head *tid_list;
@@ -84,7 +84,8 @@
 	return;
 }
 
-static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv, u8 *mac)
+static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv,
+				      const u8 *mac)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 	struct list_head *ra_list_head;
@@ -185,8 +186,50 @@
 	return 0;
 }
 
+static int
+mwifiex_tdls_add_ht_oper(struct mwifiex_private *priv, const u8 *mac,
+			 u8 vht_enabled, struct sk_buff *skb)
+{
+	struct ieee80211_ht_operation *ht_oper;
+	struct mwifiex_sta_node *sta_ptr;
+	struct mwifiex_bssdescriptor *bss_desc =
+					&priv->curr_bss_params.bss_descriptor;
+	u8 *pos;
+
+	sta_ptr = mwifiex_get_sta_entry(priv, mac);
+	if (unlikely(!sta_ptr)) {
+		dev_warn(priv->adapter->dev,
+			 "TDLS peer station not found in list\n");
+		return -1;
+	}
+
+	pos = (void *)skb_put(skb, sizeof(struct ieee80211_ht_operation) + 2);
+	*pos++ = WLAN_EID_HT_OPERATION;
+	*pos++ = sizeof(struct ieee80211_ht_operation);
+	ht_oper = (void *)pos;
+
+	ht_oper->primary_chan = bss_desc->channel;
+
+	/* follow AP's channel bandwidth */
+	if (ISSUPP_CHANWIDTH40(priv->adapter->hw_dot_11n_dev_cap) &&
+	    bss_desc->bcn_ht_cap &&
+	    ISALLOWED_CHANWIDTH40(bss_desc->bcn_ht_oper->ht_param))
+		ht_oper->ht_param = bss_desc->bcn_ht_oper->ht_param;
+
+	if (vht_enabled) {
+		ht_oper->ht_param =
+			  mwifiex_get_sec_chan_offset(bss_desc->channel);
+		ht_oper->ht_param |= BIT(2);
+	}
+
+	memcpy(&sta_ptr->tdls_cap.ht_oper, ht_oper,
+	       sizeof(struct ieee80211_ht_operation));
+
+	return 0;
+}
+
 static int mwifiex_tdls_add_vht_oper(struct mwifiex_private *priv,
-				     u8 *mac, struct sk_buff *skb)
+				     const u8 *mac, struct sk_buff *skb)
 {
 	struct mwifiex_bssdescriptor *bss_desc;
 	struct ieee80211_vht_operation *vht_oper;
@@ -325,8 +368,9 @@
 }
 
 static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
-			     u8 *peer, u8 action_code, u8 dialog_token,
-			     u16 status_code, struct sk_buff *skb)
+					const u8 *peer, u8 action_code,
+					u8 dialog_token,
+					u16 status_code, struct sk_buff *skb)
 {
 	struct ieee80211_tdls_data *tf;
 	int ret;
@@ -428,6 +472,17 @@
 				dev_kfree_skb_any(skb);
 				return ret;
 			}
+			ret = mwifiex_tdls_add_ht_oper(priv, peer, 1, skb);
+			if (ret) {
+				dev_kfree_skb_any(skb);
+				return ret;
+			}
+		} else {
+			ret = mwifiex_tdls_add_ht_oper(priv, peer, 0, skb);
+			if (ret) {
+				dev_kfree_skb_any(skb);
+				return ret;
+			}
 		}
 		break;
 
@@ -453,7 +508,8 @@
 }
 
 static void
-mwifiex_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, u8 *peer, u8 *bssid)
+mwifiex_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+			 const u8 *peer, const u8 *bssid)
 {
 	struct ieee80211_tdls_lnkie *lnkid;
 
@@ -467,8 +523,8 @@
 	memcpy(lnkid->resp_sta, peer, ETH_ALEN);
 }
 
-int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
+int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
+				 u8 action_code, u8 dialog_token,
 				 u16 status_code, const u8 *extra_ies,
 				 size_t extra_ies_len)
 {
@@ -560,7 +616,8 @@
 }
 
 static int
-mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, u8 *peer,
+mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv,
+				    const u8 *peer,
 				    u8 action_code, u8 dialog_token,
 				    u16 status_code, struct sk_buff *skb)
 {
@@ -638,10 +695,10 @@
 	return 0;
 }
 
-int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
-				 u16 status_code, const u8 *extra_ies,
-				 size_t extra_ies_len)
+int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer,
+				   u8 action_code, u8 dialog_token,
+				   u16 status_code, const u8 *extra_ies,
+				   size_t extra_ies_len)
 {
 	struct sk_buff *skb;
 	struct mwifiex_txinfo *tx_info;
@@ -848,7 +905,7 @@
 }
 
 static int
-mwifiex_tdls_process_config_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_config_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -869,7 +926,7 @@
 }
 
 static int
-mwifiex_tdls_process_create_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_create_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -896,7 +953,7 @@
 }
 
 static int
-mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -925,7 +982,7 @@
 }
 
 static int
-mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct ieee80211_mcs_info mcs;
@@ -982,7 +1039,7 @@
 	return 0;
 }
 
-int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action)
+int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action)
 {
 	switch (action) {
 	case MWIFIEX_TDLS_ENABLE_LINK:
@@ -997,7 +1054,7 @@
 	return 0;
 }
 
-int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac)
+int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *sta_ptr;
 

diff --git a/drivers/net/wireless/mwifiex/uap_cmd.c b/drivers/net/wireless/mwifiex/uap_cmd.c
index 9be6544..3264355 100644
--- a/drivers/net/wireless/mwifiex/uap_cmd.c
+++ b/drivers/net/wireless/mwifiex/uap_cmd.c

@@ -175,17 +175,19 @@
 		switch (GET_RXSTBC(cap_info)) {
 		case MWIFIEX_RX_STBC1:
 			/* HT_CAP 1X1 mode */
-			memset(&bss_cfg->ht_cap.mcs, 0xff, 1);
+			bss_cfg->ht_cap.mcs.rx_mask[0] = 0xff;
 			break;
 		case MWIFIEX_RX_STBC12:	/* fall through */
 		case MWIFIEX_RX_STBC123:
 			/* HT_CAP 2X2 mode */
-			memset(&bss_cfg->ht_cap.mcs, 0xff, 2);
+			bss_cfg->ht_cap.mcs.rx_mask[0] = 0xff;
+			bss_cfg->ht_cap.mcs.rx_mask[1] = 0xff;
 			break;
 		default:
 			dev_warn(priv->adapter->dev,
 				 "Unsupported RX-STBC, default to 2x2\n");
-			memset(&bss_cfg->ht_cap.mcs, 0xff, 2);
+			bss_cfg->ht_cap.mcs.rx_mask[0] = 0xff;
+			bss_cfg->ht_cap.mcs.rx_mask[1] = 0xff;
 			break;
 		}
 		priv->ap_11n_enabled = 1;

diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c
index edbe4af..a8ce813 100644
--- a/drivers/net/wireless/mwifiex/usb.c
+++ b/drivers/net/wireless/mwifiex/usb.c

@@ -22,9 +22,9 @@
 
 #define USB_VERSION	"1.0"
 
+static u8 user_rmmod;
 static struct mwifiex_if_ops usb_ops;
 static struct semaphore add_remove_card_sem;
-static struct usb_card_rec *usb_card;
 
 static struct usb_device_id mwifiex_usb_table[] = {
 	/* 8797 */
@@ -532,28 +532,38 @@
 static void mwifiex_usb_disconnect(struct usb_interface *intf)
 {
 	struct usb_card_rec *card = usb_get_intfdata(intf);
+	struct mwifiex_adapter *adapter;
 
-	if (!card) {
-		pr_err("%s: card is NULL\n", __func__);
+	if (!card || !card->adapter) {
+		pr_err("%s: card or card->adapter is NULL\n", __func__);
 		return;
 	}
 
+	adapter = card->adapter;
+	if (!adapter->priv_num)
+		return;
+
+	if (user_rmmod) {
+#ifdef CONFIG_PM
+		if (adapter->is_suspended)
+			mwifiex_usb_resume(intf);
+#endif
+
+		mwifiex_deauthenticate_all(adapter);
+
+		mwifiex_init_shutdown_fw(mwifiex_get_priv(adapter,
+							  MWIFIEX_BSS_ROLE_ANY),
+					 MWIFIEX_FUNC_SHUTDOWN);
+	}
+
 	mwifiex_usb_free(card);
 
-	if (card->adapter) {
-		struct mwifiex_adapter *adapter = card->adapter;
-
-		if (!adapter->priv_num)
-			return;
-
-		dev_dbg(adapter->dev, "%s: removing card\n", __func__);
-		mwifiex_remove_card(adapter, &add_remove_card_sem);
-	}
+	dev_dbg(adapter->dev, "%s: removing card\n", __func__);
+	mwifiex_remove_card(adapter, &add_remove_card_sem);
 
 	usb_set_intfdata(intf, NULL);
 	usb_put_dev(interface_to_usbdev(intf));
 	kfree(card);
-	usb_card = NULL;
 
 	return;
 }
@@ -565,6 +575,7 @@
 	.id_table = mwifiex_usb_table,
 	.suspend = mwifiex_usb_suspend,
 	.resume = mwifiex_usb_resume,
+	.soft_unbind = 1,
 };
 
 static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter)
@@ -762,7 +773,6 @@
 
 	card->adapter = adapter;
 	adapter->dev = &card->udev->dev;
-	usb_card = card;
 
 	switch (le16_to_cpu(card->udev->descriptor.idProduct)) {
 	case USB8897_PID_1:
@@ -1025,25 +1035,8 @@
 	if (!down_interruptible(&add_remove_card_sem))
 		up(&add_remove_card_sem);
 
-	if (usb_card && usb_card->adapter) {
-		struct mwifiex_adapter *adapter = usb_card->adapter;
-
-		/* In case driver is removed when asynchronous FW downloading is
-		 * in progress
-		 */
-		wait_for_completion(&adapter->fw_load);
-
-#ifdef CONFIG_PM
-		if (adapter->is_suspended)
-			mwifiex_usb_resume(usb_card->intf);
-#endif
-
-		mwifiex_deauthenticate_all(adapter);
-
-		mwifiex_init_shutdown_fw(mwifiex_get_priv(adapter,
-							  MWIFIEX_BSS_ROLE_ANY),
-					 MWIFIEX_FUNC_SHUTDOWN);
-	}
+	/* set the flag as user is removing this module */
+	user_rmmod = 1;
 
 	usb_deregister(&mwifiex_usb_driver);
 }

diff --git a/drivers/net/wireless/mwifiex/util.c b/drivers/net/wireless/mwifiex/util.c
index c3824e3..6da5abf 100644
--- a/drivers/net/wireless/mwifiex/util.c
+++ b/drivers/net/wireless/mwifiex/util.c

@@ -259,7 +259,7 @@
  * NULL is returned if station entry is not found in associated STA list.
  */
 struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac)
+mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 
@@ -280,7 +280,7 @@
  * If received mac address is NULL, NULL is returned.
  */
 struct mwifiex_sta_node *
-mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac)
+mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 	unsigned long flags;
@@ -332,7 +332,7 @@
 }
 
 /* This function will delete a station entry from station list */
-void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac)
+void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 	unsigned long flags;

diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c
index 0a7cc74..d3671d0 100644
--- a/drivers/net/wireless/mwifiex/wmm.c
+++ b/drivers/net/wireless/mwifiex/wmm.c

@@ -92,7 +92,7 @@
  * The function also initializes the list with the provided RA.
  */
 static struct mwifiex_ra_list_tbl *
-mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, u8 *ra)
+mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, const u8 *ra)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
@@ -139,8 +139,7 @@
  * This function allocates and adds a RA list for all TIDs
  * with the given RA.
  */
-void
-mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra)
+void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra)
 {
 	int i;
 	struct mwifiex_ra_list_tbl *ra_list;
@@ -164,6 +163,7 @@
 		if (!mwifiex_queuing_ra_based(priv)) {
 			if (mwifiex_get_tdls_link_status(priv, ra) ==
 			    TDLS_SETUP_COMPLETE) {
+				ra_list->tdls_link = true;
 				ra_list->is_11n_enabled =
 					mwifiex_tdls_peer_11n_enabled(priv, ra);
 			} else {
@@ -426,15 +426,6 @@
 							priv->tos_to_tid_inv[i];
 		}
 
-		priv->aggr_prio_tbl[6].amsdu
-					= priv->aggr_prio_tbl[6].ampdu_ap
-					= priv->aggr_prio_tbl[6].ampdu_user
-					= BA_STREAM_NOT_ALLOWED;
-
-		priv->aggr_prio_tbl[7].amsdu = priv->aggr_prio_tbl[7].ampdu_ap
-					= priv->aggr_prio_tbl[7].ampdu_user
-					= BA_STREAM_NOT_ALLOWED;
-
 		mwifiex_set_ba_params(priv);
 		mwifiex_reset_11n_rx_seq_num(priv);
 
@@ -575,7 +566,7 @@
  */
 static struct mwifiex_ra_list_tbl *
 mwifiex_wmm_get_ralist_node(struct mwifiex_private *priv, u8 tid,
-			    u8 *ra_addr)
+			    const u8 *ra_addr)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
@@ -596,7 +587,8 @@
  * retrieved.
  */
 struct mwifiex_ra_list_tbl *
-mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr)
+mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid,
+			    const u8 *ra_addr)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
@@ -657,7 +649,7 @@
 		if (ntohs(eth_hdr->h_proto) == ETH_P_TDLS)
 			dev_dbg(adapter->dev,
 				"TDLS setup packet for %pM. Don't block\n", ra);
-		else
+		else if (memcmp(priv->cfg_bssid, ra, ETH_ALEN))
 			tdls_status = mwifiex_get_tdls_link_status(priv, ra);
 	}
 

diff --git a/drivers/net/wireless/mwifiex/wmm.h b/drivers/net/wireless/mwifiex/wmm.h
index 83e4208..eca56e3 100644
--- a/drivers/net/wireless/mwifiex/wmm.h
+++ b/drivers/net/wireless/mwifiex/wmm.h

@@ -99,7 +99,7 @@
 
 void mwifiex_wmm_add_buf_txqueue(struct mwifiex_private *priv,
 				 struct sk_buff *skb);
-void mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra);
+void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra);
 void mwifiex_rotate_priolists(struct mwifiex_private *priv,
 			      struct mwifiex_ra_list_tbl *ra, int tid);
 
@@ -123,7 +123,8 @@
 int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv,
 			       const struct host_cmd_ds_command *resp);
 struct mwifiex_ra_list_tbl *
-mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr);
+mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid,
+			    const u8 *ra_addr);
 u8 mwifiex_wmm_downgrade_tid(struct mwifiex_private *priv, u32 tid);
 
 #endif /* !_MWIFIEX_WMM_H_ */

diff --git a/drivers/net/wireless/orinoco/hw.c b/drivers/net/wireless/orinoco/hw.c
index 49300d0..e27e328 100644
--- a/drivers/net/wireless/orinoco/hw.c
+++ b/drivers/net/wireless/orinoco/hw.c

@@ -988,8 +988,8 @@
  * tsc must be NULL or up to 8 bytes
  */
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, u8 *key, u8 *rsc, size_t rsc_len,
-			      u8 *tsc, size_t tsc_len)
+			      int set_tx, const u8 *key, const u8 *rsc,
+			      size_t rsc_len, const u8 *tsc, size_t tsc_len)
 {
 	struct {
 		__le16 idx;

diff --git a/drivers/net/wireless/orinoco/hw.h b/drivers/net/wireless/orinoco/hw.h
index 8f6831f..466d1ed 100644
--- a/drivers/net/wireless/orinoco/hw.h
+++ b/drivers/net/wireless/orinoco/hw.h

@@ -38,8 +38,8 @@
 int __orinoco_hw_setup_wepkeys(struct orinoco_private *priv);
 int __orinoco_hw_setup_enc(struct orinoco_private *priv);
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, u8 *key, u8 *rsc, size_t rsc_len,
-			      u8 *tsc, size_t tsc_len);
+			      int set_tx, const u8 *key, const u8 *rsc,
+			      size_t rsc_len, const u8 *tsc, size_t tsc_len);
 int orinoco_clear_tkip_key(struct orinoco_private *priv, int key_idx);
 int __orinoco_hw_set_multicast_list(struct orinoco_private *priv,
 				    struct net_device *dev,

diff --git a/drivers/net/wireless/orinoco/orinoco_usb.c b/drivers/net/wireless/orinoco/orinoco_usb.c
index 3ac7133..c90939c 100644
--- a/drivers/net/wireless/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/orinoco/orinoco_usb.c

@@ -1673,7 +1673,7 @@
 		firmware.code = fw_entry->data;
 	}
 	if (firmware.size && firmware.code) {
-		if (ezusb_firmware_download(upriv, &firmware))
+		if (ezusb_firmware_download(upriv, &firmware) < 0)
 			goto error;
 	} else {
 		err("No firmware to download");

diff --git a/drivers/net/wireless/orinoco/wext.c b/drivers/net/wireless/orinoco/wext.c
index b7a867b..6abdaf0 100644
--- a/drivers/net/wireless/orinoco/wext.c
+++ b/drivers/net/wireless/orinoco/wext.c

@@ -52,9 +52,9 @@
 	priv->keys[index].seq_len = seq_len;
 
 	if (key_len)
-		memcpy(priv->keys[index].key, key, key_len);
+		memcpy((void *)priv->keys[index].key, key, key_len);
 	if (seq_len)
-		memcpy(priv->keys[index].seq, seq, seq_len);
+		memcpy((void *)priv->keys[index].seq, seq, seq_len);
 
 	switch (alg) {
 	case ORINOCO_ALG_TKIP:

diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index eede90b..7be3a48 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c

@@ -669,7 +669,8 @@
 	return total;
 }
 
-static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop)
+static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
+		      u32 queues, bool drop)
 {
 	struct p54_common *priv = dev->priv;
 	unsigned int total, i;

diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index cbf0a58..8330fa3 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c

@@ -343,7 +343,7 @@
 	ray_release(link);
 
 	local = netdev_priv(dev);
-	del_timer(&local->timer);
+	del_timer_sync(&local->timer);
 
 	if (link->priv) {
 		unregister_netdev(dev);

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 39d22a1..d2a9a08 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c

@@ -517,7 +517,7 @@
 				 u8 key_index, bool unicast, bool multicast);
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
-					u8 *mac, struct station_info *sinfo);
+			     const u8 *mac, struct station_info *sinfo);
 
 static int rndis_dump_station(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *mac, struct station_info *sinfo);
@@ -2490,7 +2490,7 @@
 }
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
-					u8 *mac, struct station_info *sinfo)
+			     const u8 *mac, struct station_info *sinfo)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;

diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 8416474..54aaeb0 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c

@@ -656,6 +656,7 @@
 	case IEEE80211_AMPDU_TX_START:
 		common->vif_info[ii].seq_start = seq_no;
 		ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+		status = 0;
 		break;
 
 	case IEEE80211_AMPDU_TX_STOP_CONT:

diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index 1b28cda..2eefbf1 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c

@@ -1083,7 +1083,7 @@
 {
 	if (status) {
 		rsi_hal_send_sta_notify_frame(common,
-					      NL80211_IFTYPE_STATION,
+					      RSI_IFTYPE_STATION,
 					      STA_CONNECTED,
 					      bssid,
 					      qos_enable,
@@ -1092,7 +1092,7 @@
 			rsi_send_auto_rate_request(common);
 	} else {
 		rsi_hal_send_sta_notify_frame(common,
-					      NL80211_IFTYPE_STATION,
+					      RSI_IFTYPE_STATION,
 					      STA_DISCONNECTED,
 					      bssid,
 					      qos_enable,

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 2e39d38..46e7af4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c

@@ -285,7 +285,6 @@
 		if (err) {
 			rsi_dbg(ERR_ZONE, "%s: CCCR speed reg read failed: %d\n",
 				__func__, err);
-			card->state &= ~MMC_STATE_HIGHSPEED;
 		} else {
 			err = rsi_cmd52writebyte(card,
 						 SDIO_CCCR_SPEED,
@@ -296,14 +295,13 @@
 					__func__, err);
 				return;
 			}
-			mmc_card_set_highspeed(card);
 			host->ios.timing = MMC_TIMING_SD_HS;
 			host->ops->set_ios(host, &host->ios);
 		}
 	}
 
 	/* Set clock */
-	if (mmc_card_highspeed(card))
+	if (mmc_card_hs(card))
 		clock = 50000000;
 	else
 		clock = card->cis.max_dtr;

diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index f2f7078..d3fbe33 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h

@@ -63,7 +63,7 @@
 				     u8 *name)
 {
 	init_completion(&thread->completion);
-	thread->task = kthread_run(func_ptr, common, name);
+	thread->task = kthread_run(func_ptr, common, "%s", name);
 	if (IS_ERR(thread->task))
 		return (int)PTR_ERR(thread->task);
 

diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index ac67c4a..225215a 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h

@@ -73,6 +73,7 @@
 #define RX_BA_INDICATION                1
 #define RSI_TBL_SZ                      40
 #define MAX_RETRIES                     8
+#define RSI_IFTYPE_STATION		 0
 
 #define STD_RATE_MCS7                   0x07
 #define STD_RATE_MCS6                   0x06

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 41d4a81..c17fcf2 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c

@@ -1005,10 +1005,9 @@
 				   entry->skb->len + padding_len);
 
 	/*
-	 * Enable beaconing again.
+	 * Restore beaconing state.
 	 */
-	rt2x00_set_field32(&reg, BCN_TIME_CFG_BEACON_GEN, 1);
-	rt2800_register_write(rt2x00dev, BCN_TIME_CFG, reg);
+	rt2800_register_write(rt2x00dev, BCN_TIME_CFG, orig_reg);
 
 	/*
 	 * Clean up beacon skb.
@@ -1039,13 +1038,14 @@
 void rt2800_clear_beacon(struct queue_entry *entry)
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
-	u32 reg;
+	u32 orig_reg, reg;
 
 	/*
 	 * Disable beaconing while we are reloading the beacon data,
 	 * otherwise we might be sending out invalid data.
 	 */
-	rt2800_register_read(rt2x00dev, BCN_TIME_CFG, &reg);
+	rt2800_register_read(rt2x00dev, BCN_TIME_CFG, &orig_reg);
+	reg = orig_reg;
 	rt2x00_set_field32(&reg, BCN_TIME_CFG_BEACON_GEN, 0);
 	rt2800_register_write(rt2x00dev, BCN_TIME_CFG, reg);
 
@@ -1055,10 +1055,9 @@
 	rt2800_clear_beacon_register(rt2x00dev, entry->entry_idx);
 
 	/*
-	 * Enabled beaconing again.
+	 * Restore beaconing state.
 	 */
-	rt2x00_set_field32(&reg, BCN_TIME_CFG_BEACON_GEN, 1);
-	rt2800_register_write(rt2x00dev, BCN_TIME_CFG, reg);
+	rt2800_register_write(rt2x00dev, BCN_TIME_CFG, orig_reg);
 }
 EXPORT_SYMBOL_GPL(rt2800_clear_beacon);
 

diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index e3b885d..010b765 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h

@@ -1448,7 +1448,8 @@
 		      struct ieee80211_vif *vif, u16 queue,
 		      const struct ieee80211_tx_queue_params *params);
 void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw);
-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		     u32 queues, bool drop);
 int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
 int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant);
 void rt2x00mac_get_ringparam(struct ieee80211_hw *hw,

diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index a87ee9b..212ac48 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c

@@ -749,7 +749,8 @@
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll);
 
-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		     u32 queues, bool drop)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct data_queue *queue;

diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index 1057245..86c43d1 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c

@@ -68,6 +68,12 @@
 		}
 	}
 
+	/* If the port is powered down, we get a -EPROTO error, and this
+	 * leads to a endless loop. So just say that the device is gone.
+	 */
+	if (status == -EPROTO)
+		clear_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags);
+
 	rt2x00_err(rt2x00dev,
 		   "Vendor Request 0x%02x failed for offset 0x%04x with error %d\n",
 		   request, offset, status);

diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 2440298..9048a9c 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c

@@ -2031,13 +2031,14 @@
 static void rt61pci_clear_beacon(struct queue_entry *entry)
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
-	u32 reg;
+	u32 orig_reg, reg;
 
 	/*
 	 * Disable beaconing while we are reloading the beacon data,
 	 * otherwise we might be sending out invalid data.
 	 */
-	rt2x00mmio_register_read(rt2x00dev, TXRX_CSR9, &reg);
+	rt2x00mmio_register_read(rt2x00dev, TXRX_CSR9, &orig_reg);
+	reg = orig_reg;
 	rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 0);
 	rt2x00mmio_register_write(rt2x00dev, TXRX_CSR9, reg);
 
@@ -2048,10 +2049,9 @@
 				  HW_BEACON_OFFSET(entry->entry_idx), 0);
 
 	/*
-	 * Enable beaconing again.
+	 * Restore global beaconing state.
 	 */
-	rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 1);
-	rt2x00mmio_register_write(rt2x00dev, TXRX_CSR9, reg);
+	rt2x00mmio_register_write(rt2x00dev, TXRX_CSR9, orig_reg);
 }
 
 /*

diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index a140170..95724ff 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c

@@ -1597,13 +1597,14 @@
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 	unsigned int beacon_base;
-	u32 reg;
+	u32 orig_reg, reg;
 
 	/*
 	 * Disable beaconing while we are reloading the beacon data,
 	 * otherwise we might be sending out invalid data.
 	 */
-	rt2x00usb_register_read(rt2x00dev, TXRX_CSR9, &reg);
+	rt2x00usb_register_read(rt2x00dev, TXRX_CSR9, &orig_reg);
+	reg = orig_reg;
 	rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 0);
 	rt2x00usb_register_write(rt2x00dev, TXRX_CSR9, reg);
 
@@ -1614,10 +1615,9 @@
 	rt2x00usb_register_write(rt2x00dev, beacon_base, 0);
 
 	/*
-	 * Enable beaconing again.
+	 * Restore beaconing state.
 	 */
-	rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 1);
-	rt2x00usb_register_write(rt2x00dev, TXRX_CSR9, reg);
+	rt2x00usb_register_write(rt2x00dev, TXRX_CSR9, orig_reg);
 }
 
 static int rt73usb_get_tx_data_len(struct queue_entry *entry)

diff --git a/drivers/net/wireless/rtl818x/rtl8180/Makefile b/drivers/net/wireless/rtl818x/rtl8180/Makefile
index 08b056d..21005bd 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/Makefile
+++ b/drivers/net/wireless/rtl818x/rtl8180/Makefile

@@ -1,5 +1,5 @@
-rtl8180-objs		:= dev.o rtl8225.o sa2400.o max2820.o grf5101.o rtl8225se.o
+rtl818x_pci-objs	:= dev.o rtl8225.o sa2400.o max2820.o grf5101.o rtl8225se.o
 
-obj-$(CONFIG_RTL8180)	+= rtl8180.o
+obj-$(CONFIG_RTL8180)	+= rtl818x_pci.o
 
 ccflags-y += -Idrivers/net/wireless/rtl818x

diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c
index 98d8256f..2c1c02b 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c

@@ -284,6 +284,8 @@
 			rx_status.band = dev->conf.chandef.chan->band;
 			rx_status.mactime = tsft;
 			rx_status.flag |= RX_FLAG_MACTIME_START;
+			if (flags & RTL818X_RX_DESC_FLAG_SPLCP)
+				rx_status.flag |= RX_FLAG_SHORTPRE;
 			if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR)
 				rx_status.flag |= RX_FLAG_FAILED_FCS_CRC;
 
@@ -461,18 +463,23 @@
 			    RTL818X_TX_DESC_FLAG_NO_ENC;
 
 	rc_flags = info->control.rates[0].flags;
+
+	/* HW will perform RTS-CTS when only RTS flags is set.
+	 * HW will perform CTS-to-self when both RTS and CTS flags are set.
+	 * RTS rate and RTS duration will be used also for CTS-to-self.
+	 */
 	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		tx_flags |= RTL818X_TX_DESC_FLAG_RTS;
 		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
+		rts_duration = ieee80211_rts_duration(dev, priv->vif,
+						skb->len, info);
 	} else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
-		tx_flags |= RTL818X_TX_DESC_FLAG_CTS;
+		tx_flags |= RTL818X_TX_DESC_FLAG_RTS | RTL818X_TX_DESC_FLAG_CTS;
 		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
+		rts_duration = ieee80211_ctstoself_duration(dev, priv->vif,
+						skb->len, info);
 	}
 
-	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS)
-		rts_duration = ieee80211_rts_duration(dev, priv->vif, skb->len,
-						      info);
-
 	if (priv->chip_family == RTL818X_CHIP_FAMILY_RTL8180) {
 		unsigned int remainder;
 
@@ -683,9 +690,8 @@
 	struct rtl8180_priv *priv = dev->priv;
 
 	if (priv->chip_family == RTL818X_CHIP_FAMILY_RTL8187SE) {
-		rtl818x_iowrite32(priv, &priv->map->IMR, IMR_TMGDOK |
-			  IMR_TBDER | IMR_THPDER |
-			  IMR_THPDER | IMR_THPDOK |
+		rtl818x_iowrite32(priv, &priv->map->IMR,
+			  IMR_TBDER | IMR_TBDOK |
 			  IMR_TVODER | IMR_TVODOK |
 			  IMR_TVIDER | IMR_TVIDOK |
 			  IMR_TBEDER | IMR_TBEDOK |
@@ -911,7 +917,10 @@
 		reg32 &= 0x00ffff00;
 		reg32 |= 0xb8000054;
 		rtl818x_iowrite32(priv, &priv->map->RF_PARA, reg32);
-	}
+	} else
+		/* stop unused queus (no dma alloc) */
+		rtl818x_iowrite8(priv, &priv->map->TX_DMA_POLLING,
+			    (1<<1) | (1<<2));
 
 	priv->rf->init(dev);
 

diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c
index 0ca17cd..629ad8c 100644
--- a/drivers/net/wireless/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c

@@ -253,14 +253,21 @@
 	flags |= ieee80211_get_tx_rate(dev, info)->hw_value << 24;
 	if (ieee80211_has_morefrags(tx_hdr->frame_control))
 		flags |= RTL818X_TX_DESC_FLAG_MOREFRAG;
+
+	/* HW will perform RTS-CTS when only RTS flags is set.
+	 * HW will perform CTS-to-self when both RTS and CTS flags are set.
+	 * RTS rate and RTS duration will be used also for CTS-to-self.
+	 */
 	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		flags |= RTL818X_TX_DESC_FLAG_RTS;
 		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 		rts_dur = ieee80211_rts_duration(dev, priv->vif,
 						 skb->len, info);
 	} else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
-		flags |= RTL818X_TX_DESC_FLAG_CTS;
+		flags |= RTL818X_TX_DESC_FLAG_RTS | RTL818X_TX_DESC_FLAG_CTS;
 		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
+		rts_dur = ieee80211_ctstoself_duration(dev, priv->vif,
+						 skb->len, info);
 	}
 
 	if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
@@ -381,6 +388,8 @@
 	rx_status.freq = dev->conf.chandef.chan->center_freq;
 	rx_status.band = dev->conf.chandef.chan->band;
 	rx_status.flag |= RX_FLAG_MACTIME_START;
+	if (flags & RTL818X_RX_DESC_FLAG_SPLCP)
+		rx_status.flag |= RX_FLAG_SHORTPRE;
 	if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR)
 		rx_status.flag |= RX_FLAG_FAILED_FCS_CRC;
 	memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));

diff --git a/drivers/net/wireless/rtl818x/rtl818x.h b/drivers/net/wireless/rtl818x/rtl818x.h
index 45ea4e1..7abef95 100644
--- a/drivers/net/wireless/rtl818x/rtl818x.h
+++ b/drivers/net/wireless/rtl818x/rtl818x.h

@@ -334,9 +334,9 @@
  * I don't like to introduce a ton of "reserved"..
  * They are for RTL8187SE
  */
-#define REG_ADDR1(addr)	((u8 __iomem *)priv->map + addr)
-#define REG_ADDR2(addr)	((__le16 __iomem *)priv->map + (addr >> 1))
-#define REG_ADDR4(addr)	((__le32 __iomem *)priv->map + (addr >> 2))
+#define REG_ADDR1(addr)	((u8 __iomem *)priv->map + (addr))
+#define REG_ADDR2(addr)	((__le16 __iomem *)priv->map + ((addr) >> 1))
+#define REG_ADDR4(addr)	((__le32 __iomem *)priv->map + ((addr) >> 2))
 
 #define FEMR_SE		REG_ADDR2(0x1D4)
 #define ARFR		REG_ADDR2(0x1E0)

diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index 4ec424f..b1ed6d0 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c

@@ -1387,7 +1387,8 @@
  * before switch channel or power save, or tx buffer packet
  * maybe send after offchannel or rf sleep, this may cause
  * dis-association by AP */
-static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void rtl_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 

diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c
index 94cd9df..b14cf5a 100644
--- a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c

@@ -2515,23 +2515,3 @@
 void rtl88ee_resume(struct ieee80211_hw *hw)
 {
 }
-
-/* Turn on AAP (RCR:bit 0) for promicuous mode. */
-void rtl88ee_allow_all_destaddr(struct ieee80211_hw *hw,
-				bool allow_all_da, bool write_into_reg)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-
-	if (allow_all_da) /* Set BIT0 */
-		rtlpci->receive_config |= RCR_AAP;
-	 else /* Clear BIT0 */
-		rtlpci->receive_config &= ~RCR_AAP;
-
-	if (write_into_reg)
-		rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
-
-	RT_TRACE(rtlpriv, COMP_TURBO | COMP_INIT, DBG_LOUD,
-		 "receive_config = 0x%08X, write_into_reg =%d\n",
-		 rtlpci->receive_config, write_into_reg);
-}

diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.h b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.h
index b4460a4..1850fde 100644
--- a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.h
+++ b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.h

@@ -61,8 +61,6 @@
 void rtl8188ee_bt_hw_init(struct ieee80211_hw *hw);
 void rtl88ee_suspend(struct ieee80211_hw *hw);
 void rtl88ee_resume(struct ieee80211_hw *hw);
-void rtl88ee_allow_all_destaddr(struct ieee80211_hw *hw,
-				bool allow_all_da, bool write_into_reg);
 void rtl88ee_fw_clk_off_timer_callback(unsigned long data);
 
 #endif

diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c
index 1b4101b..842d693 100644
--- a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c

@@ -93,7 +93,7 @@
 	u8 tid;
 
 	rtl8188ee_bt_reg_init(hw);
-	rtlpci->msi_support = true;
+	rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support;
 
 	rtlpriv->dm.dm_initialgain_enable = 1;
 	rtlpriv->dm.dm_flag = 0;
@@ -255,7 +255,6 @@
 	.enable_hw_sec = rtl88ee_enable_hw_security_config,
 	.set_key = rtl88ee_set_key,
 	.init_sw_leds = rtl88ee_init_sw_leds,
-	.allow_all_destaddr = rtl88ee_allow_all_destaddr,
 	.get_bbreg = rtl88e_phy_query_bb_reg,
 	.set_bbreg = rtl88e_phy_set_bb_reg,
 	.get_rfreg = rtl88e_phy_query_rf_reg,
@@ -267,6 +266,7 @@
 	.inactiveps = true,
 	.swctrl_lps = false,
 	.fwctrl_lps = true,
+	.msi_support = false,
 	.debug = DBG_EMERG,
 };
 
@@ -383,10 +383,12 @@
 module_param_named(ips, rtl88ee_mod_params.inactiveps, bool, 0444);
 module_param_named(swlps, rtl88ee_mod_params.swctrl_lps, bool, 0444);
 module_param_named(fwlps, rtl88ee_mod_params.fwctrl_lps, bool, 0444);
+module_param_named(msi, rtl88ee_mod_params.msi_support, bool, 0444);
 MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
 MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
 MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
 MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
+MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n");
 MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
 
 static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);

diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
index 55adf04..cdecb0f 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c

@@ -2423,24 +2423,3 @@
 void rtl92ce_resume(struct ieee80211_hw *hw)
 {
 }
-
-/* Turn on AAP (RCR:bit 0) for promicuous mode. */
-void rtl92ce_allow_all_destaddr(struct ieee80211_hw *hw,
-	bool allow_all_da, bool write_into_reg)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-
-	if (allow_all_da) {/* Set BIT0 */
-		rtlpci->receive_config |= RCR_AAP;
-	} else {/* Clear BIT0 */
-		rtlpci->receive_config &= ~RCR_AAP;
-	}
-
-	if (write_into_reg)
-		rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
-
-	RT_TRACE(rtlpriv, COMP_TURBO | COMP_INIT, DBG_LOUD,
-		 "receive_config=0x%08X, write_into_reg=%d\n",
-		 rtlpci->receive_config, write_into_reg);
-}

diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.h b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.h
index 2d063b0..5533070 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.h

@@ -76,7 +76,5 @@
 void rtl8192ce_bt_hw_init(struct ieee80211_hw *hw);
 void rtl92ce_suspend(struct ieee80211_hw *hw);
 void rtl92ce_resume(struct ieee80211_hw *hw);
-void rtl92ce_allow_all_destaddr(struct ieee80211_hw *hw,
-				bool allow_all_da, bool write_into_reg);
 
 #endif

diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
index b790320..12f21f4 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c

@@ -229,7 +229,6 @@
 	.enable_hw_sec = rtl92ce_enable_hw_security_config,
 	.set_key = rtl92ce_set_key,
 	.init_sw_leds = rtl92ce_init_sw_leds,
-	.allow_all_destaddr = rtl92ce_allow_all_destaddr,
 	.get_bbreg = rtl92c_phy_query_bb_reg,
 	.set_bbreg = rtl92c_phy_set_bb_reg,
 	.set_rfreg = rtl92ce_phy_set_rf_reg,

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index 07cb06d..a903c26 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c

@@ -511,7 +511,7 @@
 			pr_info("MAC auto ON okay!\n");
 			break;
 		}
-		if (pollingCount++ > 100) {
+		if (pollingCount++ > 1000) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG,
 				 "Failed to polling REG_APS_FSMCO[APFM_ONMAC] done!\n");
 			return -ENODEV;

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
index c613110..361435f 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c

@@ -395,9 +395,6 @@
 	/* .resume = rtl_usb_resume, */
 	/* .reset_resume = rtl8192c_resume, */
 #endif /* CONFIG_PM */
-#ifdef CONFIG_AUTOSUSPEND
-	.supports_autosuspend = 1,
-#endif
 	.disable_hub_initiated_lpm = 1,
 };
 

diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
index 9098558..1c7101b 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c

@@ -2544,23 +2544,3 @@
 		pci_write_config_dword(rtlpci->pdev, 0x40,
 			val & 0xffff00ff);
 }
-
-/* Turn on AAP (RCR:bit 0) for promicuous mode. */
-void rtl92se_allow_all_destaddr(struct ieee80211_hw *hw,
-				bool allow_all_da, bool write_into_reg)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-
-	if (allow_all_da) /* Set BIT0 */
-		rtlpci->receive_config |= RCR_AAP;
-	else /* Clear BIT0 */
-		rtlpci->receive_config &= ~RCR_AAP;
-
-	if (write_into_reg)
-		rtl_write_dword(rtlpriv, RCR, rtlpci->receive_config);
-
-	RT_TRACE(rtlpriv, COMP_TURBO | COMP_INIT, DBG_LOUD,
-		 "receive_config=0x%08X, write_into_reg=%d\n",
-		 rtlpci->receive_config, write_into_reg);
-}

diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.h b/drivers/net/wireless/rtlwifi/rtl8192se/hw.h
index da48aa8..4cacee1 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.h

@@ -74,7 +74,5 @@
 		     u8 enc_algo, bool is_wepkey, bool clear_all);
 void rtl92se_suspend(struct ieee80211_hw *hw);
 void rtl92se_resume(struct ieee80211_hw *hw);
-void rtl92se_allow_all_destaddr(struct ieee80211_hw *hw,
-				bool allow_all_da, bool write_into_reg);
 
 #endif

diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c
index 2e8e6f8..1bff2a0 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c

@@ -290,7 +290,6 @@
 	.enable_hw_sec = rtl92se_enable_hw_security_config,
 	.set_key = rtl92se_set_key,
 	.init_sw_leds = rtl92se_init_sw_leds,
-	.allow_all_destaddr = rtl92se_allow_all_destaddr,
 	.get_bbreg = rtl92s_phy_query_bb_reg,
 	.set_bbreg = rtl92s_phy_set_bb_reg,
 	.get_rfreg = rtl92s_phy_query_rf_reg,

diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hal_bt_coexist.c b/drivers/net/wireless/rtlwifi/rtl8723ae/hal_bt_coexist.c
index 48fee1b..5b4a714 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723ae/hal_bt_coexist.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hal_bt_coexist.c

@@ -32,7 +32,6 @@
 #include "dm.h"
 #include "fw.h"
 #include "../rtl8723com/fw_common.h"
-#include "../rtl8723com/fw_common.h"
 #include "phy.h"
 #include "reg.h"
 #include "hal_btc.h"

diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c
index 65c9e80..87f6916 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c

@@ -2383,24 +2383,3 @@
 void rtl8723ae_resume(struct ieee80211_hw *hw)
 {
 }
-
-/* Turn on AAP (RCR:bit 0) for promicuous mode. */
-void rtl8723ae_allow_all_destaddr(struct ieee80211_hw *hw,
-	bool allow_all_da, bool write_into_reg)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-
-	if (allow_all_da) /* Set BIT0 */
-		rtlpci->receive_config |= RCR_AAP;
-	else /* Clear BIT0 */
-		rtlpci->receive_config &= ~RCR_AAP;
-
-	if (write_into_reg)
-		rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
-
-
-	RT_TRACE(rtlpriv, COMP_TURBO | COMP_INIT, DBG_LOUD,
-		 "receive_config=0x%08X, write_into_reg=%d\n",
-		 rtlpci->receive_config, write_into_reg);
-}

diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.h b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.h
index 6fa24f7..d3bc39f 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.h
+++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.h

@@ -67,7 +67,5 @@
 void rtl8723ae_bt_hw_init(struct ieee80211_hw *hw);
 void rtl8723ae_suspend(struct ieee80211_hw *hw);
 void rtl8723ae_resume(struct ieee80211_hw *hw);
-void rtl8723ae_allow_all_destaddr(struct ieee80211_hw *hw,
-				  bool allow_all_da, bool write_into_reg);
 
 #endif

diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c
index 1087a3b..73cba1e 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c

@@ -238,7 +238,6 @@
 	.enable_hw_sec = rtl8723ae_enable_hw_security_config,
 	.set_key = rtl8723ae_set_key,
 	.init_sw_leds = rtl8723ae_init_sw_leds,
-	.allow_all_destaddr = rtl8723ae_allow_all_destaddr,
 	.get_bbreg = rtl8723_phy_query_bb_reg,
 	.set_bbreg = rtl8723_phy_set_bb_reg,
 	.get_rfreg = rtl8723ae_phy_query_rf_reg,

diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c
index 0fdf090..3d55549 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c

@@ -2501,23 +2501,3 @@
 void rtl8723be_resume(struct ieee80211_hw *hw)
 {
 }
-
-/* Turn on AAP (RCR:bit 0) for promicuous mode. */
-void rtl8723be_allow_all_destaddr(struct ieee80211_hw *hw, bool allow_all_da,
-				  bool write_into_reg)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-
-	if (allow_all_da) /* Set BIT0 */
-		rtlpci->receive_config |= RCR_AAP;
-	else /* Clear BIT0 */
-		rtlpci->receive_config &= ~RCR_AAP;
-
-	if (write_into_reg)
-		rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
-
-	RT_TRACE(rtlpriv, COMP_TURBO | COMP_INIT, DBG_LOUD,
-		 "receive_config = 0x%08X, write_into_reg =%d\n",
-		 rtlpci->receive_config, write_into_reg);
-}

diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/hw.h b/drivers/net/wireless/rtlwifi/rtl8723be/hw.h
index b7449a9..64c7551 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723be/hw.h
+++ b/drivers/net/wireless/rtlwifi/rtl8723be/hw.h

@@ -59,6 +59,4 @@
 void rtl8723be_bt_hw_init(struct ieee80211_hw *hw);
 void rtl8723be_suspend(struct ieee80211_hw *hw);
 void rtl8723be_resume(struct ieee80211_hw *hw);
-void rtl8723be_allow_all_destaddr(struct ieee80211_hw *hw, bool allow_all_da,
-				  bool write_into_reg);
 #endif

diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c
index b4577eb..ff12bf4 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c

@@ -92,7 +92,7 @@
 	struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
 
 	rtl8723be_bt_reg_init(hw);
-	rtlpci->msi_support = true;
+	rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support;
 	rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
 
 	rtlpriv->dm.dm_initialgain_enable = 1;
@@ -253,6 +253,7 @@
 	.inactiveps = true,
 	.swctrl_lps = false,
 	.fwctrl_lps = true,
+	.msi_support = false,
 	.debug = DBG_EMERG,
 };
 
@@ -365,9 +366,11 @@
 module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444);
 module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444);
 module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444);
+module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444);
 MODULE_PARM_DESC(swenc, "using hardware crypto (default 0 [hardware])\n");
 MODULE_PARM_DESC(ips, "using no link power save (default 1 is open)\n");
 MODULE_PARM_DESC(fwlps, "using linked fw control power save (default 1 is open)\n");
+MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n");
 MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
 
 static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);

diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/trx.c b/drivers/net/wireless/rtlwifi/rtl8723be/trx.c
index e0a0d8c..969eaea 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723be/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723be/trx.c

@@ -33,7 +33,6 @@
 #include "trx.h"
 #include "led.h"
 #include "dm.h"
-#include "phy.h"
 
 static u8 _rtl8723be_map_hwqueue_to_fwqueue(struct sk_buff *skb, u8 hw_queue)
 {

diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h
index 6965afd..407a793 100644
--- a/drivers/net/wireless/rtlwifi/wifi.h
+++ b/drivers/net/wireless/rtlwifi/wifi.h

@@ -1960,8 +1960,6 @@
 			  u32 regaddr, u32 bitmask);
 	void (*set_rfreg) (struct ieee80211_hw *hw, enum radio_path rfpath,
 			   u32 regaddr, u32 bitmask, u32 data);
-	void (*allow_all_destaddr)(struct ieee80211_hw *hw,
-		bool allow_all_da, bool write_into_reg);
 	void (*linked_set_reg) (struct ieee80211_hw *hw);
 	void (*chk_switch_dmdp) (struct ieee80211_hw *hw);
 	void (*dualmac_easy_concurrent) (struct ieee80211_hw *hw);
@@ -2030,6 +2028,10 @@
 
 	/* default: 1 = using linked fw power save */
 	bool fwctrl_lps;
+
+	/* default: 0 = not using MSI interrupts mode */
+	/* submodules should set their own defalut value */
+	bool msi_support;
 };
 
 struct rtl_hal_usbint_cfg {

diff --git a/drivers/net/wireless/ti/wl1251/acx.c b/drivers/net/wireless/ti/wl1251/acx.c
index 5a4ec56..5695628 100644
--- a/drivers/net/wireless/ti/wl1251/acx.c
+++ b/drivers/net/wireless/ti/wl1251/acx.c

@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 
 #include "wl1251.h"
 #include "reg.h"

diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c
index bf1fa18..ede31f0 100644
--- a/drivers/net/wireless/ti/wl1251/cmd.c
+++ b/drivers/net/wireless/ti/wl1251/cmd.c

@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 #include <linux/etherdevice.h>
 
 #include "wl1251.h"

diff --git a/drivers/net/wireless/ti/wl1251/event.c b/drivers/net/wireless/ti/wl1251/event.c
index db01053..c986303 100644
--- a/drivers/net/wireless/ti/wl1251/event.c
+++ b/drivers/net/wireless/ti/wl1251/event.c

@@ -124,11 +124,12 @@
 			return ret;
 	}
 
-	if (wl->vif && vector & SYNCHRONIZATION_TIMEOUT_EVENT_ID) {
+	if (vector & SYNCHRONIZATION_TIMEOUT_EVENT_ID) {
 		wl1251_debug(DEBUG_EVENT, "SYNCHRONIZATION_TIMEOUT_EVENT");
 
 		/* indicate to the stack, that beacons have been lost */
-		ieee80211_beacon_loss(wl->vif);
+		if (wl->vif && wl->vif->type == NL80211_IFTYPE_STATION)
+			ieee80211_beacon_loss(wl->vif);
 	}
 
 	if (vector & REGAINED_BSS_EVENT_ID) {

diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 757e257..4e782f1 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c

@@ -550,6 +550,34 @@
 	mutex_unlock(&wl->mutex);
 }
 
+static int wl1251_build_null_data(struct wl1251 *wl)
+{
+	struct sk_buff *skb = NULL;
+	int size;
+	void *ptr;
+	int ret = -ENOMEM;
+
+	if (wl->bss_type == BSS_TYPE_IBSS) {
+		size = sizeof(struct wl12xx_null_data_template);
+		ptr = NULL;
+	} else {
+		skb = ieee80211_nullfunc_get(wl->hw, wl->vif);
+		if (!skb)
+			goto out;
+		size = skb->len;
+		ptr = skb->data;
+	}
+
+	ret = wl1251_cmd_template_set(wl, CMD_NULL_DATA, ptr, size);
+
+out:
+	dev_kfree_skb(skb);
+	if (ret)
+		wl1251_warning("cmd buld null data failed: %d", ret);
+
+	return ret;
+}
+
 static int wl1251_build_qos_null_data(struct wl1251 *wl)
 {
 	struct ieee80211_qos_hdr template;
@@ -687,16 +715,6 @@
 		wl->power_level = conf->power_level;
 	}
 
-	/*
-	 * Tell stack that connection is lost because hw encryption isn't
-	 * supported in monitor mode.
-	 * This requires temporary enabling of the hw connection monitor flag
-	 */
-	if ((changed & IEEE80211_CONF_CHANGE_MONITOR) && wl->vif) {
-		wl->hw->flags |= IEEE80211_HW_CONNECTION_MONITOR;
-		ieee80211_connection_loss(wl->vif);
-	}
-
 out_sleep:
 	wl1251_ps_elp_sleep(wl);
 
@@ -1103,24 +1121,19 @@
 		wl->rssi_thold = bss_conf->cqm_rssi_thold;
 	}
 
-	if (changed & BSS_CHANGED_BSSID) {
+	if ((changed & BSS_CHANGED_BSSID) &&
+	    memcmp(wl->bssid, bss_conf->bssid, ETH_ALEN)) {
 		memcpy(wl->bssid, bss_conf->bssid, ETH_ALEN);
 
-		skb = ieee80211_nullfunc_get(wl->hw, wl->vif);
-		if (!skb)
-			goto out_sleep;
+		if (!is_zero_ether_addr(wl->bssid)) {
+			ret = wl1251_build_null_data(wl);
+			if (ret < 0)
+				goto out_sleep;
 
-		ret = wl1251_cmd_template_set(wl, CMD_NULL_DATA,
-					      skb->data, skb->len);
-		dev_kfree_skb(skb);
-		if (ret < 0)
-			goto out_sleep;
+			ret = wl1251_build_qos_null_data(wl);
+			if (ret < 0)
+				goto out_sleep;
 
-		ret = wl1251_build_qos_null_data(wl);
-		if (ret < 0)
-			goto out;
-
-		if (wl->bss_type != BSS_TYPE_IBSS) {
 			ret = wl1251_join(wl, wl->bss_type, wl->channel,
 					  wl->beacon_int, wl->dtim_period);
 			if (ret < 0)
@@ -1129,9 +1142,6 @@
 	}
 
 	if (changed & BSS_CHANGED_ASSOC) {
-		/* Disable temporary enabled hw connection monitor flag */
-		wl->hw->flags &= ~IEEE80211_HW_CONNECTION_MONITOR;
-
 		if (bss_conf->assoc) {
 			wl->beacon_int = bss_conf->beacon_int;
 
@@ -1216,8 +1226,8 @@
 		if (ret < 0)
 			goto out_sleep;
 
-		ret = wl1251_join(wl, wl->bss_type, wl->beacon_int,
-				  wl->channel, wl->dtim_period);
+		ret = wl1251_join(wl, wl->bss_type, wl->channel,
+				  wl->beacon_int, wl->dtim_period);
 
 		if (ret < 0)
 			goto out_sleep;

diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index b06d36d..a0aa8fa 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c

@@ -23,6 +23,7 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/swab.h>
 #include <linux/crc7.h>
 #include <linux/spi/spi.h>
 #include <linux/wl12xx.h>
@@ -83,47 +84,44 @@
 
 static void wl1251_spi_wake(struct wl1251 *wl)
 {
-	u8 crc[WSPI_INIT_CMD_CRC_LEN], *cmd;
 	struct spi_transfer t;
 	struct spi_message m;
+	u8 *cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
 
-	cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
 	if (!cmd) {
 		wl1251_error("could not allocate cmd for spi init");
 		return;
 	}
 
-	memset(crc, 0, sizeof(crc));
 	memset(&t, 0, sizeof(t));
 	spi_message_init(&m);
 
 	/* Set WSPI_INIT_COMMAND
 	 * the data is being send from the MSB to LSB
 	 */
-	cmd[2] = 0xff;
-	cmd[3] = 0xff;
-	cmd[1] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX;
-	cmd[0] = 0;
-	cmd[7] = 0;
-	cmd[6] |= HW_ACCESS_WSPI_INIT_CMD_MASK << 3;
-	cmd[6] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN;
+	cmd[0] = 0xff;
+	cmd[1] = 0xff;
+	cmd[2] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX;
+	cmd[3] = 0;
+	cmd[4] = 0;
+	cmd[5] = HW_ACCESS_WSPI_INIT_CMD_MASK << 3;
+	cmd[5] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN;
 
-	if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0)
-		cmd[5] |=  WSPI_INIT_CMD_DIS_FIXEDBUSY;
-	else
-		cmd[5] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
-
-	cmd[5] |= WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS
+	cmd[6] = WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS
 		| WSPI_INIT_CMD_WSPI | WSPI_INIT_CMD_WS;
 
-	crc[0] = cmd[1];
-	crc[1] = cmd[0];
-	crc[2] = cmd[7];
-	crc[3] = cmd[6];
-	crc[4] = cmd[5];
+	if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0)
+		cmd[6] |= WSPI_INIT_CMD_DIS_FIXEDBUSY;
+	else
+		cmd[6] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[7] = crc7_be(0, cmd+2, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
+	/*
+	 * The above is the logical order; it must actually be stored
+	 * in the buffer byte-swapped.
+	 */
+	__swab32s((u32 *)cmd);
+	__swab32s((u32 *)cmd+1);
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;

diff --git a/drivers/net/wireless/ti/wlcore/debugfs.h b/drivers/net/wireless/ti/wlcore/debugfs.h
index f7381dd..0f2cfb0 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.h
+++ b/drivers/net/wireless/ti/wlcore/debugfs.h

@@ -57,7 +57,7 @@
 					    wl, &name## _ops);		\
 		if (!entry || IS_ERR(entry))				\
 			goto err;					\
-	} while (0);
+	} while (0)
 
 
 #define DEBUGFS_ADD_PREFIX(prefix, name, parent)			\
@@ -66,7 +66,7 @@
 				    wl, &prefix## _## name## _ops);	\
 		if (!entry || IS_ERR(entry))				\
 			goto err;					\
-	} while (0);
+	} while (0)
 
 #define DEBUGFS_FWSTATS_FILE(sub, name, fmt, struct_type)		\
 static ssize_t sub## _ ##name## _read(struct file *file,		\

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index e71eae3..3d6028e 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c

@@ -1416,7 +1416,7 @@
 
 int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter,
 				 u16 offset, u8 flags,
-				 u8 *pattern, u8 len)
+				 const u8 *pattern, u8 len)
 {
 	struct wl12xx_rx_filter_field *field;
 
@@ -5184,7 +5184,8 @@
 	mutex_unlock(&wl->mutex);
 }
 
-static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void wlcore_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    u32 queues, bool drop)
 {
 	struct wl1271 *wl = hw->priv;
 

diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 29ef249..d3dd7bf 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c

@@ -217,7 +217,7 @@
 static int wl1271_probe(struct sdio_func *func,
 				  const struct sdio_device_id *id)
 {
-	struct wlcore_platdev_data *pdev_data;
+	struct wlcore_platdev_data pdev_data;
 	struct wl12xx_sdio_glue *glue;
 	struct resource res[1];
 	mmc_pm_flag_t mmcflags;
@@ -228,16 +228,13 @@
 	if (func->num != 0x02)
 		return -ENODEV;
 
-	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
-	if (!pdev_data)
-		goto out;
-
-	pdev_data->if_ops = &sdio_ops;
+	memset(&pdev_data, 0x00, sizeof(pdev_data));
+	pdev_data.if_ops = &sdio_ops;
 
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&func->dev, "can't allocate glue\n");
-		goto out_free_pdev_data;
+		goto out;
 	}
 
 	glue->dev = &func->dev;
@@ -248,9 +245,9 @@
 	/* Use block mode for transferring over one block size of data */
 	func->card->quirks |= MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 
-	pdev_data->pdata = wl12xx_get_platform_data();
-	if (IS_ERR(pdev_data->pdata)) {
-		ret = PTR_ERR(pdev_data->pdata);
+	pdev_data.pdata = wl12xx_get_platform_data();
+	if (IS_ERR(pdev_data.pdata)) {
+		ret = PTR_ERR(pdev_data.pdata);
 		dev_err(glue->dev, "missing wlan platform data: %d\n", ret);
 		goto out_free_glue;
 	}
@@ -260,7 +257,7 @@
 	dev_dbg(glue->dev, "sdio PM caps = 0x%x\n", mmcflags);
 
 	if (mmcflags & MMC_PM_KEEP_POWER)
-		pdev_data->pdata->pwr_in_suspend = true;
+		pdev_data.pdata->pwr_in_suspend = true;
 
 	sdio_set_drvdata(func, glue);
 
@@ -289,7 +286,7 @@
 
 	memset(res, 0x00, sizeof(res));
 
-	res[0].start = pdev_data->pdata->irq;
+	res[0].start = pdev_data.pdata->irq;
 	res[0].flags = IORESOURCE_IRQ;
 	res[0].name = "irq";
 
@@ -299,8 +296,8 @@
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, pdev_data,
-				       sizeof(*pdev_data));
+	ret = platform_device_add_data(glue->core, &pdev_data,
+				       sizeof(pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -319,9 +316,6 @@
 out_free_glue:
 	kfree(glue);
 
-out_free_pdev_data:
-	kfree(pdev_data);
-
 out:
 	return ret;
 }

diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index dbe826d..392c882 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c

@@ -24,11 +24,12 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
 #include <linux/crc7.h>
 #include <linux/spi/spi.h>
 #include <linux/wl12xx.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
 
 #include "wlcore.h"
 #include "wl12xx_80211.h"
@@ -110,18 +111,16 @@
 static void wl12xx_spi_init(struct device *child)
 {
 	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
-	u8 crc[WSPI_INIT_CMD_CRC_LEN], *cmd;
 	struct spi_transfer t;
 	struct spi_message m;
+	u8 *cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
 
-	cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
 	if (!cmd) {
 		dev_err(child->parent,
 			"could not allocate cmd for spi init\n");
 		return;
 	}
 
-	memset(crc, 0, sizeof(crc));
 	memset(&t, 0, sizeof(t));
 	spi_message_init(&m);
 
@@ -129,30 +128,29 @@
 	 * Set WSPI_INIT_COMMAND
 	 * the data is being send from the MSB to LSB
 	 */
-	cmd[2] = 0xff;
-	cmd[3] = 0xff;
-	cmd[1] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX;
-	cmd[0] = 0;
-	cmd[7] = 0;
-	cmd[6] |= HW_ACCESS_WSPI_INIT_CMD_MASK << 3;
-	cmd[6] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN;
+	cmd[0] = 0xff;
+	cmd[1] = 0xff;
+	cmd[2] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX;
+	cmd[3] = 0;
+	cmd[4] = 0;
+	cmd[5] = HW_ACCESS_WSPI_INIT_CMD_MASK << 3;
+	cmd[5] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN;
 
-	if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0)
-		cmd[5] |=  WSPI_INIT_CMD_DIS_FIXEDBUSY;
-	else
-		cmd[5] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
-
-	cmd[5] |= WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS
+	cmd[6] = WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS
 		| WSPI_INIT_CMD_WSPI | WSPI_INIT_CMD_WS;
 
-	crc[0] = cmd[1];
-	crc[1] = cmd[0];
-	crc[2] = cmd[7];
-	crc[3] = cmd[6];
-	crc[4] = cmd[5];
+	if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0)
+		cmd[6] |= WSPI_INIT_CMD_DIS_FIXEDBUSY;
+	else
+		cmd[6] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[7] = crc7_be(0, cmd+2, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
+	/*
+	 * The above is the logical order; it must actually be stored
+	 * in the buffer byte-swapped.
+	 */
+	__swab32s((u32 *)cmd);
+	__swab32s((u32 *)cmd+1);
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;
@@ -327,27 +325,25 @@
 static int wl1271_probe(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue;
-	struct wlcore_platdev_data *pdev_data;
+	struct wlcore_platdev_data pdev_data;
 	struct resource res[1];
 	int ret = -ENOMEM;
 
-	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
-	if (!pdev_data)
-		goto out;
+	memset(&pdev_data, 0x00, sizeof(pdev_data));
 
-	pdev_data->pdata = dev_get_platdata(&spi->dev);
-	if (!pdev_data->pdata) {
+	pdev_data.pdata = dev_get_platdata(&spi->dev);
+	if (!pdev_data.pdata) {
 		dev_err(&spi->dev, "no platform data\n");
 		ret = -ENODEV;
-		goto out_free_pdev_data;
+		goto out;
 	}
 
-	pdev_data->if_ops = &spi_ops;
+	pdev_data.if_ops = &spi_ops;
 
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&spi->dev, "can't allocate glue\n");
-		goto out_free_pdev_data;
+		goto out;
 	}
 
 	glue->dev = &spi->dev;
@@ -385,8 +381,8 @@
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, pdev_data,
-				       sizeof(*pdev_data));
+	ret = platform_device_add_data(glue->core, &pdev_data,
+				       sizeof(pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -406,9 +402,6 @@
 out_free_glue:
 	kfree(glue);
 
-out_free_pdev_data:
-	kfree(pdev_data);
-
 out:
 	return ret;
 }

diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 756e890..c2c34a8 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h

@@ -512,8 +512,8 @@
 void wl12xx_queue_recovery_work(struct wl1271 *wl);
 size_t wl12xx_copy_fwlog(struct wl1271 *wl, u8 *memblock, size_t maxlen);
 int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter,
-					u16 offset, u8 flags,
-					u8 *pattern, u8 len);
+				 u16 offset, u8 flags,
+				 const u8 *pattern, u8 len);
 void wl1271_rx_filter_free(struct wl12xx_rx_filter *filter);
 struct wl12xx_rx_filter *wl1271_rx_filter_alloc(void);
 int wl1271_rx_filter_get_fields_size(struct wl12xx_rx_filter *filter);

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 0d4a285..4dd7c4a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h

@@ -99,22 +99,43 @@
  */
 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
 
-struct xenvif {
-	/* Unique identifier for this interface. */
-	domid_t          domid;
-	unsigned int     handle;
+/* Queue name is interface name with "-qNNN" appended */
+#define QUEUE_NAME_SIZE (IFNAMSIZ + 5)
 
-	/* Is this interface disabled? True when backend discovers
-	 * frontend is rogue.
+/* IRQ name is queue name with "-tx" or "-rx" appended */
+#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
+
+struct xenvif;
+
+struct xenvif_stats {
+	/* Stats fields to be updated per-queue.
+	 * A subset of struct net_device_stats that contains only the
+	 * fields that are updated in netback.c for each queue.
 	 */
-	bool disabled;
+	unsigned int rx_bytes;
+	unsigned int rx_packets;
+	unsigned int tx_bytes;
+	unsigned int tx_packets;
+
+	/* Additional stats used by xenvif */
+	unsigned long rx_gso_checksum_fixup;
+	unsigned long tx_zerocopy_sent;
+	unsigned long tx_zerocopy_success;
+	unsigned long tx_zerocopy_fail;
+	unsigned long tx_frag_overflow;
+};
+
+struct xenvif_queue { /* Per-queue data for xenvif */
+	unsigned int id; /* Queue ID, 0-based */
+	char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
+	struct xenvif *vif; /* Parent VIF */
 
 	/* Use NAPI for guest TX */
 	struct napi_struct napi;
 	/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
 	unsigned int tx_irq;
 	/* Only used when feature-split-event-channels = 1 */
-	char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
+	char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 	struct xen_netif_tx_back_ring tx;
 	struct sk_buff_head tx_queue;
 	struct page *mmap_pages[MAX_PENDING_REQS];
@@ -150,7 +171,7 @@
 	/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
 	unsigned int rx_irq;
 	/* Only used when feature-split-event-channels = 1 */
-	char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
+	char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 	struct xen_netif_rx_back_ring rx;
 	struct sk_buff_head rx_queue;
 	RING_IDX rx_last_skb_slots;
@@ -158,14 +179,29 @@
 
 	struct timer_list wake_queue;
 
-	/* This array is allocated seperately as it is large */
-	struct gnttab_copy *grant_copy_op;
+	struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
 
 	/* We create one meta structure per ring request we consume, so
 	 * the maximum number is the same as the ring size.
 	 */
 	struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE];
 
+	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+	unsigned long   credit_bytes;
+	unsigned long   credit_usec;
+	unsigned long   remaining_credit;
+	struct timer_list credit_timeout;
+	u64 credit_window_start;
+
+	/* Statistics */
+	struct xenvif_stats stats;
+};
+
+struct xenvif {
+	/* Unique identifier for this interface. */
+	domid_t          domid;
+	unsigned int     handle;
+
 	u8               fe_dev_addr[6];
 
 	/* Frontend feature information. */
@@ -179,19 +215,13 @@
 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */
 
-	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-	unsigned long   credit_bytes;
-	unsigned long   credit_usec;
-	unsigned long   remaining_credit;
-	struct timer_list credit_timeout;
-	u64 credit_window_start;
+	/* Is this interface disabled? True when backend discovers
+	 * frontend is rogue.
+	 */
+	bool disabled;
 
-	/* Statistics */
-	unsigned long rx_gso_checksum_fixup;
-	unsigned long tx_zerocopy_sent;
-	unsigned long tx_zerocopy_success;
-	unsigned long tx_zerocopy_fail;
-	unsigned long tx_frag_overflow;
+	/* Queues */
+	struct xenvif_queue *queues;
 
 	/* Miscellaneous private stuff. */
 	struct net_device *dev;
@@ -206,7 +236,10 @@
 			    domid_t domid,
 			    unsigned int handle);
 
-int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
+int xenvif_init_queue(struct xenvif_queue *queue);
+void xenvif_deinit_queue(struct xenvif_queue *queue);
+
+int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
 		   unsigned long rx_ring_ref, unsigned int tx_evtchn,
 		   unsigned int rx_evtchn);
 void xenvif_disconnect(struct xenvif *vif);
@@ -217,44 +250,47 @@
 
 int xenvif_schedulable(struct xenvif *vif);
 
-int xenvif_must_stop_queue(struct xenvif *vif);
+int xenvif_must_stop_queue(struct xenvif_queue *queue);
+
+int xenvif_queue_stopped(struct xenvif_queue *queue);
+void xenvif_wake_queue(struct xenvif_queue *queue);
 
 /* (Un)Map communication rings. */
-void xenvif_unmap_frontend_rings(struct xenvif *vif);
-int xenvif_map_frontend_rings(struct xenvif *vif,
+void xenvif_unmap_frontend_rings(struct xenvif_queue *queue);
+int xenvif_map_frontend_rings(struct xenvif_queue *queue,
 			      grant_ref_t tx_ring_ref,
 			      grant_ref_t rx_ring_ref);
 
 /* Check for SKBs from frontend and schedule backend processing */
-void xenvif_napi_schedule_or_enable_events(struct xenvif *vif);
+void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue);
 
 /* Prevent the device from generating any further traffic. */
 void xenvif_carrier_off(struct xenvif *vif);
 
-int xenvif_tx_action(struct xenvif *vif, int budget);
+int xenvif_tx_action(struct xenvif_queue *queue, int budget);
 
 int xenvif_kthread_guest_rx(void *data);
-void xenvif_kick_thread(struct xenvif *vif);
+void xenvif_kick_thread(struct xenvif_queue *queue);
 
 int xenvif_dealloc_kthread(void *data);
 
 /* Determine whether the needed number of slots (req) are available,
  * and set req_event if not.
  */
-bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
+bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed);
 
-void xenvif_stop_queue(struct xenvif *vif);
+void xenvif_carrier_on(struct xenvif *vif);
 
 /* Callback from stack when TX packet can be released */
 void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
 
 /* Unmap a pending page and release it back to the guest */
-void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
+void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
 
-static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
+static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
 {
 	return MAX_PENDING_REQS -
-		vif->pending_prod + vif->pending_cons;
+		queue->pending_prod + queue->pending_cons;
 }
 
 /* Callback from stack when TX packet can be released */
@@ -264,5 +300,6 @@
 
 extern unsigned int rx_drain_timeout_msecs;
 extern unsigned int rx_drain_timeout_jiffies;
+extern unsigned int xenvif_max_queues;
 
 #endif /* __XEN_NETBACK__COMMON_H__ */

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 20e9def..852da34 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c

@@ -43,6 +43,16 @@
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
 
+static inline void xenvif_stop_queue(struct xenvif_queue *queue)
+{
+	struct net_device *dev = queue->vif->dev;
+
+	if (!queue->vif->can_queue)
+		return;
+
+	netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+}
+
 int xenvif_schedulable(struct xenvif *vif)
 {
 	return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
@@ -50,33 +60,34 @@
 
 static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
 {
-	struct xenvif *vif = dev_id;
+	struct xenvif_queue *queue = dev_id;
 
-	if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
-		napi_schedule(&vif->napi);
+	if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
+		napi_schedule(&queue->napi);
 
 	return IRQ_HANDLED;
 }
 
-static int xenvif_poll(struct napi_struct *napi, int budget)
+int xenvif_poll(struct napi_struct *napi, int budget)
 {
-	struct xenvif *vif = container_of(napi, struct xenvif, napi);
+	struct xenvif_queue *queue =
+		container_of(napi, struct xenvif_queue, napi);
 	int work_done;
 
 	/* This vif is rogue, we pretend we've there is nothing to do
 	 * for this vif to deschedule it from NAPI. But this interface
 	 * will be turned off in thread context later.
 	 */
-	if (unlikely(vif->disabled)) {
+	if (unlikely(queue->vif->disabled)) {
 		napi_complete(napi);
 		return 0;
 	}
 
-	work_done = xenvif_tx_action(vif, budget);
+	work_done = xenvif_tx_action(queue, budget);
 
 	if (work_done < budget) {
 		napi_complete(napi);
-		xenvif_napi_schedule_or_enable_events(vif);
+		xenvif_napi_schedule_or_enable_events(queue);
 	}
 
 	return work_done;
@@ -84,9 +95,9 @@
 
 static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
 {
-	struct xenvif *vif = dev_id;
+	struct xenvif_queue *queue = dev_id;
 
-	xenvif_kick_thread(vif);
+	xenvif_kick_thread(queue);
 
 	return IRQ_HANDLED;
 }
@@ -99,28 +110,80 @@
 	return IRQ_HANDLED;
 }
 
-static void xenvif_wake_queue(unsigned long data)
+int xenvif_queue_stopped(struct xenvif_queue *queue)
 {
-	struct xenvif *vif = (struct xenvif *)data;
+	struct net_device *dev = queue->vif->dev;
+	unsigned int id = queue->id;
+	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
+}
 
-	if (netif_queue_stopped(vif->dev)) {
-		netdev_err(vif->dev, "draining TX queue\n");
-		vif->rx_queue_purge = true;
-		xenvif_kick_thread(vif);
-		netif_wake_queue(vif->dev);
+void xenvif_wake_queue(struct xenvif_queue *queue)
+{
+	struct net_device *dev = queue->vif->dev;
+	unsigned int id = queue->id;
+	netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
+}
+
+/* Callback to wake the queue and drain it on timeout */
+static void xenvif_wake_queue_callback(unsigned long data)
+{
+	struct xenvif_queue *queue = (struct xenvif_queue *)data;
+
+	if (xenvif_queue_stopped(queue)) {
+		netdev_err(queue->vif->dev, "draining TX queue\n");
+		queue->rx_queue_purge = true;
+		xenvif_kick_thread(queue);
+		xenvif_wake_queue(queue);
 	}
 }
 
+static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+			       void *accel_priv, select_queue_fallback_t fallback)
+{
+	unsigned int num_queues = dev->real_num_tx_queues;
+	u32 hash;
+	u16 queue_index;
+
+	/* First, check if there is only one queue to optimise the
+	 * single-queue or old frontend scenario.
+	 */
+	if (num_queues == 1) {
+		queue_index = 0;
+	} else {
+		/* Use skb_get_hash to obtain an L4 hash if available */
+		hash = skb_get_hash(skb);
+		queue_index = hash % num_queues;
+	}
+
+	return queue_index;
+}
+
 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = dev->real_num_tx_queues;
+	u16 index;
 	int min_slots_needed;
 
 	BUG_ON(skb->dev != dev);
 
-	/* Drop the packet if vif is not ready */
-	if (vif->task == NULL ||
-	    vif->dealloc_task == NULL ||
+	/* Drop the packet if queues are not set up */
+	if (num_queues < 1)
+		goto drop;
+
+	/* Obtain the queue to be used to transmit this packet */
+	index = skb_get_queue_mapping(skb);
+	if (index >= num_queues) {
+		pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
+				    index, vif->dev->name);
+		index %= num_queues;
+	}
+	queue = &vif->queues[index];
+
+	/* Drop the packet if queue is not ready */
+	if (queue->task == NULL ||
+	    queue->dealloc_task == NULL ||
 	    !xenvif_schedulable(vif))
 		goto drop;
 
@@ -139,16 +202,16 @@
 	 * then turn off the queue to give the ring a chance to
 	 * drain.
 	 */
-	if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
-		vif->wake_queue.function = xenvif_wake_queue;
-		vif->wake_queue.data = (unsigned long)vif;
-		xenvif_stop_queue(vif);
-		mod_timer(&vif->wake_queue,
+	if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
+		queue->wake_queue.function = xenvif_wake_queue_callback;
+		queue->wake_queue.data = (unsigned long)queue;
+		xenvif_stop_queue(queue);
+		mod_timer(&queue->wake_queue,
 			jiffies + rx_drain_timeout_jiffies);
 	}
 
-	skb_queue_tail(&vif->rx_queue, skb);
-	xenvif_kick_thread(vif);
+	skb_queue_tail(&queue->rx_queue, skb);
+	xenvif_kick_thread(queue);
 
 	return NETDEV_TX_OK;
 
@@ -161,25 +224,65 @@
 static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = dev->real_num_tx_queues;
+	unsigned long rx_bytes = 0;
+	unsigned long rx_packets = 0;
+	unsigned long tx_bytes = 0;
+	unsigned long tx_packets = 0;
+	unsigned int index;
+
+	if (vif->queues == NULL)
+		goto out;
+
+	/* Aggregate tx and rx stats from each queue */
+	for (index = 0; index < num_queues; ++index) {
+		queue = &vif->queues[index];
+		rx_bytes += queue->stats.rx_bytes;
+		rx_packets += queue->stats.rx_packets;
+		tx_bytes += queue->stats.tx_bytes;
+		tx_packets += queue->stats.tx_packets;
+	}
+
+out:
+	vif->dev->stats.rx_bytes = rx_bytes;
+	vif->dev->stats.rx_packets = rx_packets;
+	vif->dev->stats.tx_bytes = tx_bytes;
+	vif->dev->stats.tx_packets = tx_packets;
+
 	return &vif->dev->stats;
 }
 
 static void xenvif_up(struct xenvif *vif)
 {
-	napi_enable(&vif->napi);
-	enable_irq(vif->tx_irq);
-	if (vif->tx_irq != vif->rx_irq)
-		enable_irq(vif->rx_irq);
-	xenvif_napi_schedule_or_enable_events(vif);
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = vif->dev->real_num_tx_queues;
+	unsigned int queue_index;
+
+	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+		queue = &vif->queues[queue_index];
+		napi_enable(&queue->napi);
+		enable_irq(queue->tx_irq);
+		if (queue->tx_irq != queue->rx_irq)
+			enable_irq(queue->rx_irq);
+		xenvif_napi_schedule_or_enable_events(queue);
+	}
 }
 
 static void xenvif_down(struct xenvif *vif)
 {
-	napi_disable(&vif->napi);
-	disable_irq(vif->tx_irq);
-	if (vif->tx_irq != vif->rx_irq)
-		disable_irq(vif->rx_irq);
-	del_timer_sync(&vif->credit_timeout);
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = vif->dev->real_num_tx_queues;
+	unsigned int queue_index;
+
+	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+		queue = &vif->queues[queue_index];
+		napi_disable(&queue->napi);
+		disable_irq(queue->tx_irq);
+		if (queue->tx_irq != queue->rx_irq)
+			disable_irq(queue->rx_irq);
+		del_timer_sync(&queue->credit_timeout);
+	}
 }
 
 static int xenvif_open(struct net_device *dev)
@@ -187,7 +290,7 @@
 	struct xenvif *vif = netdev_priv(dev);
 	if (netif_carrier_ok(dev))
 		xenvif_up(vif);
-	netif_start_queue(dev);
+	netif_tx_start_all_queues(dev);
 	return 0;
 }
 
@@ -196,7 +299,7 @@
 	struct xenvif *vif = netdev_priv(dev);
 	if (netif_carrier_ok(dev))
 		xenvif_down(vif);
-	netif_stop_queue(dev);
+	netif_tx_stop_all_queues(dev);
 	return 0;
 }
 
@@ -236,29 +339,29 @@
 } xenvif_stats[] = {
 	{
 		"rx_gso_checksum_fixup",
-		offsetof(struct xenvif, rx_gso_checksum_fixup)
+		offsetof(struct xenvif_stats, rx_gso_checksum_fixup)
 	},
 	/* If (sent != success + fail), there are probably packets never
 	 * freed up properly!
 	 */
 	{
 		"tx_zerocopy_sent",
-		offsetof(struct xenvif, tx_zerocopy_sent),
+		offsetof(struct xenvif_stats, tx_zerocopy_sent),
 	},
 	{
 		"tx_zerocopy_success",
-		offsetof(struct xenvif, tx_zerocopy_success),
+		offsetof(struct xenvif_stats, tx_zerocopy_success),
 	},
 	{
 		"tx_zerocopy_fail",
-		offsetof(struct xenvif, tx_zerocopy_fail)
+		offsetof(struct xenvif_stats, tx_zerocopy_fail)
 	},
 	/* Number of packets exceeding MAX_SKB_FRAG slots. You should use
 	 * a guest with the same MAX_SKB_FRAG
 	 */
 	{
 		"tx_frag_overflow",
-		offsetof(struct xenvif, tx_frag_overflow)
+		offsetof(struct xenvif_stats, tx_frag_overflow)
 	},
 };
 
@@ -275,11 +378,20 @@
 static void xenvif_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 * data)
 {
-	void *vif = netdev_priv(dev);
+	struct xenvif *vif = netdev_priv(dev);
+	unsigned int num_queues = dev->real_num_tx_queues;
 	int i;
+	unsigned int queue_index;
+	struct xenvif_stats *vif_stats;
 
-	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
-		data[i] = *(unsigned long *)(vif + xenvif_stats[i].offset);
+	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
+		unsigned long accum = 0;
+		for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+			vif_stats = &vif->queues[queue_index].stats;
+			accum += *(unsigned long *)(vif_stats + xenvif_stats[i].offset);
+		}
+		data[i] = accum;
+	}
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
@@ -312,6 +424,7 @@
 	.ndo_fix_features = xenvif_fix_features,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_validate_addr   = eth_validate_addr,
+	.ndo_select_queue = xenvif_select_queue,
 };
 
 struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
@@ -321,10 +434,14 @@
 	struct net_device *dev;
 	struct xenvif *vif;
 	char name[IFNAMSIZ] = {};
-	int i;
 
 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-	dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
+	/* Allocate a netdev with the max. supported number of queues.
+	 * When the guest selects the desired number, it will be updated
+	 * via netif_set_real_num_tx_queues().
+	 */
+	dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup,
+			      xenvif_max_queues);
 	if (dev == NULL) {
 		pr_warn("Could not allocate netdev for %s\n", name);
 		return ERR_PTR(-ENOMEM);
@@ -334,66 +451,28 @@
 
 	vif = netdev_priv(dev);
 
-	vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) *
-				     MAX_GRANT_COPY_OPS);
-	if (vif->grant_copy_op == NULL) {
-		pr_warn("Could not allocate grant copy space for %s\n", name);
-		free_netdev(dev);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	vif->domid  = domid;
 	vif->handle = handle;
 	vif->can_sg = 1;
 	vif->ip_csum = 1;
 	vif->dev = dev;
-
 	vif->disabled = false;
 
-	vif->credit_bytes = vif->remaining_credit = ~0UL;
-	vif->credit_usec  = 0UL;
-	init_timer(&vif->credit_timeout);
-	vif->credit_window_start = get_jiffies_64();
-
-	init_timer(&vif->wake_queue);
+	/* Start out with no queues. The call below does not require
+	 * rtnl_lock() as it happens before register_netdev().
+	 */
+	vif->queues = NULL;
+	netif_set_real_num_tx_queues(dev, 0);
 
 	dev->netdev_ops	= &xenvif_netdev_ops;
 	dev->hw_features = NETIF_F_SG |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6;
 	dev->features = dev->hw_features | NETIF_F_RXCSUM;
-	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
+	dev->ethtool_ops = &xenvif_ethtool_ops;
 
 	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
 
-	skb_queue_head_init(&vif->rx_queue);
-	skb_queue_head_init(&vif->tx_queue);
-
-	vif->pending_cons = 0;
-	vif->pending_prod = MAX_PENDING_REQS;
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		vif->pending_ring[i] = i;
-	spin_lock_init(&vif->callback_lock);
-	spin_lock_init(&vif->response_lock);
-	/* If ballooning is disabled, this will consume real memory, so you
-	 * better enable it. The long term solution would be to use just a
-	 * bunch of valid page descriptors, without dependency on ballooning
-	 */
-	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
-				       vif->mmap_pages,
-				       false);
-	if (err) {
-		netdev_err(dev, "Could not reserve mmap_pages\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	for (i = 0; i < MAX_PENDING_REQS; i++) {
-		vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
-			{ .callback = xenvif_zerocopy_callback,
-			  .ctx = NULL,
-			  .desc = i };
-		vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
-	}
-
 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
 	 * largest non-broadcast address to prevent the address getting
@@ -403,8 +482,6 @@
 	memset(dev->dev_addr, 0xFF, ETH_ALEN);
 	dev->dev_addr[0] &= ~0x01;
 
-	netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
-
 	netif_carrier_off(dev);
 
 	err = register_netdev(dev);
@@ -421,76 +498,56 @@
 	return vif;
 }
 
-int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
-		   unsigned long rx_ring_ref, unsigned int tx_evtchn,
-		   unsigned int rx_evtchn)
+int xenvif_init_queue(struct xenvif_queue *queue)
 {
-	struct task_struct *task;
-	int err = -ENOMEM;
+	int err, i;
 
-	BUG_ON(vif->tx_irq);
-	BUG_ON(vif->task);
-	BUG_ON(vif->dealloc_task);
+	queue->credit_bytes = queue->remaining_credit = ~0UL;
+	queue->credit_usec  = 0UL;
+	init_timer(&queue->credit_timeout);
+	queue->credit_window_start = get_jiffies_64();
 
-	err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
-	if (err < 0)
-		goto err;
+	skb_queue_head_init(&queue->rx_queue);
+	skb_queue_head_init(&queue->tx_queue);
 
-	init_waitqueue_head(&vif->wq);
-	init_waitqueue_head(&vif->dealloc_wq);
+	queue->pending_cons = 0;
+	queue->pending_prod = MAX_PENDING_REQS;
+	for (i = 0; i < MAX_PENDING_REQS; ++i)
+		queue->pending_ring[i] = i;
 
-	if (tx_evtchn == rx_evtchn) {
-		/* feature-split-event-channels == 0 */
-		err = bind_interdomain_evtchn_to_irqhandler(
-			vif->domid, tx_evtchn, xenvif_interrupt, 0,
-			vif->dev->name, vif);
-		if (err < 0)
-			goto err_unmap;
-		vif->tx_irq = vif->rx_irq = err;
-		disable_irq(vif->tx_irq);
-	} else {
-		/* feature-split-event-channels == 1 */
-		snprintf(vif->tx_irq_name, sizeof(vif->tx_irq_name),
-			 "%s-tx", vif->dev->name);
-		err = bind_interdomain_evtchn_to_irqhandler(
-			vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
-			vif->tx_irq_name, vif);
-		if (err < 0)
-			goto err_unmap;
-		vif->tx_irq = err;
-		disable_irq(vif->tx_irq);
+	spin_lock_init(&queue->callback_lock);
+	spin_lock_init(&queue->response_lock);
 
-		snprintf(vif->rx_irq_name, sizeof(vif->rx_irq_name),
-			 "%s-rx", vif->dev->name);
-		err = bind_interdomain_evtchn_to_irqhandler(
-			vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
-			vif->rx_irq_name, vif);
-		if (err < 0)
-			goto err_tx_unbind;
-		vif->rx_irq = err;
-		disable_irq(vif->rx_irq);
+	/* If ballooning is disabled, this will consume real memory, so you
+	 * better enable it. The long term solution would be to use just a
+	 * bunch of valid page descriptors, without dependency on ballooning
+	 */
+	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+				       queue->mmap_pages,
+				       false);
+	if (err) {
+		netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n");
+		return -ENOMEM;
 	}
 
-	task = kthread_create(xenvif_kthread_guest_rx,
-			      (void *)vif, "%s-guest-rx", vif->dev->name);
-	if (IS_ERR(task)) {
-		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
-		err = PTR_ERR(task);
-		goto err_rx_unbind;
+	for (i = 0; i < MAX_PENDING_REQS; i++) {
+		queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
+			{ .callback = xenvif_zerocopy_callback,
+			  .ctx = NULL,
+			  .desc = i };
+		queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
 	}
 
-	vif->task = task;
+	init_timer(&queue->wake_queue);
 
-	task = kthread_create(xenvif_dealloc_kthread,
-			      (void *)vif, "%s-dealloc", vif->dev->name);
-	if (IS_ERR(task)) {
-		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
-		err = PTR_ERR(task);
-		goto err_rx_unbind;
-	}
+	netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+			XENVIF_NAPI_WEIGHT);
 
-	vif->dealloc_task = task;
+	return 0;
+}
 
+void xenvif_carrier_on(struct xenvif *vif)
+{
 	rtnl_lock();
 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
 		dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -499,20 +556,89 @@
 	if (netif_running(vif->dev))
 		xenvif_up(vif);
 	rtnl_unlock();
+}
 
-	wake_up_process(vif->task);
-	wake_up_process(vif->dealloc_task);
+int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
+		   unsigned long rx_ring_ref, unsigned int tx_evtchn,
+		   unsigned int rx_evtchn)
+{
+	struct task_struct *task;
+	int err = -ENOMEM;
+
+	BUG_ON(queue->tx_irq);
+	BUG_ON(queue->task);
+	BUG_ON(queue->dealloc_task);
+
+	err = xenvif_map_frontend_rings(queue, tx_ring_ref, rx_ring_ref);
+	if (err < 0)
+		goto err;
+
+	init_waitqueue_head(&queue->wq);
+	init_waitqueue_head(&queue->dealloc_wq);
+
+	if (tx_evtchn == rx_evtchn) {
+		/* feature-split-event-channels == 0 */
+		err = bind_interdomain_evtchn_to_irqhandler(
+			queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
+			queue->name, queue);
+		if (err < 0)
+			goto err_unmap;
+		queue->tx_irq = queue->rx_irq = err;
+		disable_irq(queue->tx_irq);
+	} else {
+		/* feature-split-event-channels == 1 */
+		snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
+			 "%s-tx", queue->name);
+		err = bind_interdomain_evtchn_to_irqhandler(
+			queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
+			queue->tx_irq_name, queue);
+		if (err < 0)
+			goto err_unmap;
+		queue->tx_irq = err;
+		disable_irq(queue->tx_irq);
+
+		snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
+			 "%s-rx", queue->name);
+		err = bind_interdomain_evtchn_to_irqhandler(
+			queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
+			queue->rx_irq_name, queue);
+		if (err < 0)
+			goto err_tx_unbind;
+		queue->rx_irq = err;
+		disable_irq(queue->rx_irq);
+	}
+
+	task = kthread_create(xenvif_kthread_guest_rx,
+			      (void *)queue, "%s-guest-rx", queue->name);
+	if (IS_ERR(task)) {
+		pr_warn("Could not allocate kthread for %s\n", queue->name);
+		err = PTR_ERR(task);
+		goto err_rx_unbind;
+	}
+	queue->task = task;
+
+	task = kthread_create(xenvif_dealloc_kthread,
+			      (void *)queue, "%s-dealloc", queue->name);
+	if (IS_ERR(task)) {
+		pr_warn("Could not allocate kthread for %s\n", queue->name);
+		err = PTR_ERR(task);
+		goto err_rx_unbind;
+	}
+	queue->dealloc_task = task;
+
+	wake_up_process(queue->task);
+	wake_up_process(queue->dealloc_task);
 
 	return 0;
 
 err_rx_unbind:
-	unbind_from_irqhandler(vif->rx_irq, vif);
-	vif->rx_irq = 0;
+	unbind_from_irqhandler(queue->rx_irq, queue);
+	queue->rx_irq = 0;
 err_tx_unbind:
-	unbind_from_irqhandler(vif->tx_irq, vif);
-	vif->tx_irq = 0;
+	unbind_from_irqhandler(queue->tx_irq, queue);
+	queue->tx_irq = 0;
 err_unmap:
-	xenvif_unmap_frontend_rings(vif);
+	xenvif_unmap_frontend_rings(queue);
 err:
 	module_put(THIS_MODULE);
 	return err;
@@ -529,38 +655,77 @@
 	rtnl_unlock();
 }
 
+static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
+				      unsigned int worst_case_skb_lifetime)
+{
+	int i, unmap_timeout = 0;
+
+	for (i = 0; i < MAX_PENDING_REQS; ++i) {
+		if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+			unmap_timeout++;
+			schedule_timeout(msecs_to_jiffies(1000));
+			if (unmap_timeout > worst_case_skb_lifetime &&
+			    net_ratelimit())
+				netdev_err(queue->vif->dev,
+					   "Page still granted! Index: %x\n",
+					   i);
+			i = -1;
+		}
+	}
+}
+
 void xenvif_disconnect(struct xenvif *vif)
 {
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = vif->dev->real_num_tx_queues;
+	unsigned int queue_index;
+
 	if (netif_carrier_ok(vif->dev))
 		xenvif_carrier_off(vif);
 
-	if (vif->task) {
-		del_timer_sync(&vif->wake_queue);
-		kthread_stop(vif->task);
-		vif->task = NULL;
-	}
+	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+		queue = &vif->queues[queue_index];
 
-	if (vif->dealloc_task) {
-		kthread_stop(vif->dealloc_task);
-		vif->dealloc_task = NULL;
-	}
-
-	if (vif->tx_irq) {
-		if (vif->tx_irq == vif->rx_irq)
-			unbind_from_irqhandler(vif->tx_irq, vif);
-		else {
-			unbind_from_irqhandler(vif->tx_irq, vif);
-			unbind_from_irqhandler(vif->rx_irq, vif);
+		if (queue->task) {
+			del_timer_sync(&queue->wake_queue);
+			kthread_stop(queue->task);
+			queue->task = NULL;
 		}
-		vif->tx_irq = 0;
-	}
 
-	xenvif_unmap_frontend_rings(vif);
+		if (queue->dealloc_task) {
+			kthread_stop(queue->dealloc_task);
+			queue->dealloc_task = NULL;
+		}
+
+		if (queue->tx_irq) {
+			if (queue->tx_irq == queue->rx_irq)
+				unbind_from_irqhandler(queue->tx_irq, queue);
+			else {
+				unbind_from_irqhandler(queue->tx_irq, queue);
+				unbind_from_irqhandler(queue->rx_irq, queue);
+			}
+			queue->tx_irq = 0;
+		}
+
+		xenvif_unmap_frontend_rings(queue);
+	}
+}
+
+/* Reverse the relevant parts of xenvif_init_queue().
+ * Used for queue teardown from xenvif_free(), and on the
+ * error handling paths in xenbus.c:connect().
+ */
+void xenvif_deinit_queue(struct xenvif_queue *queue)
+{
+	free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
+	netif_napi_del(&queue->napi);
 }
 
 void xenvif_free(struct xenvif *vif)
 {
-	int i, unmap_timeout = 0;
+	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues = vif->dev->real_num_tx_queues;
+	unsigned int queue_index;
 	/* Here we want to avoid timeout messages if an skb can be legitimately
 	 * stuck somewhere else. Realistically this could be an another vif's
 	 * internal or QDisc queue. That another vif also has this
@@ -575,33 +740,21 @@
 	unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
 		DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
 
-	for (i = 0; i < MAX_PENDING_REQS; ++i) {
-		if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
-			unmap_timeout++;
-			schedule_timeout(msecs_to_jiffies(1000));
-			if (unmap_timeout > worst_case_skb_lifetime &&
-			    net_ratelimit())
-				netdev_err(vif->dev,
-					   "Page still granted! Index: %x\n",
-					   i);
-			/* If there are still unmapped pages, reset the loop to
-			 * start checking again. We shouldn't exit here until
-			 * dealloc thread and NAPI instance release all the
-			 * pages. If a kernel bug causes the skbs to stall
-			 * somewhere, the interface cannot be brought down
-			 * properly.
-			 */
-			i = -1;
-		}
-	}
-
-	free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
-
-	netif_napi_del(&vif->napi);
-
 	unregister_netdev(vif->dev);
 
-	vfree(vif->grant_copy_op);
+	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+		queue = &vif->queues[queue_index];
+		xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
+		xenvif_deinit_queue(queue);
+	}
+
+	/* Free the array of queues. The call below does not require
+	 * rtnl_lock() because it happens after unregister_netdev().
+	 */
+	netif_set_real_num_tx_queues(vif->dev, 0);
+	vfree(vif->queues);
+	vif->queues = NULL;
+
 	free_netdev(vif->dev);
 
 	module_put(THIS_MODULE);

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 7367208..1844a47 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c

@@ -62,6 +62,11 @@
 module_param(rx_drain_timeout_msecs, uint, 0444);
 unsigned int rx_drain_timeout_jiffies;
 
+unsigned int xenvif_max_queues;
+module_param_named(max_queues, xenvif_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+		 "Maximum number of queues per virtual interface");
+
 /*
  * This is the maximum slots a skb can have. If a guest sends a skb
  * which exceeds this limit it is considered malicious.
@@ -70,33 +75,33 @@
 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
 module_param(fatal_skb_slots, uint, 0444);
 
-static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
 			       u8 status);
 
-static void make_tx_response(struct xenvif *vif,
+static void make_tx_response(struct xenvif_queue *queue,
 			     struct xen_netif_tx_request *txp,
 			     s8       st);
 
-static inline int tx_work_todo(struct xenvif *vif);
-static inline int rx_work_todo(struct xenvif *vif);
+static inline int tx_work_todo(struct xenvif_queue *queue);
+static inline int rx_work_todo(struct xenvif_queue *queue);
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
 					     u16      size,
 					     u16      flags);
 
-static inline unsigned long idx_to_pfn(struct xenvif *vif,
+static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
 				       u16 idx)
 {
-	return page_to_pfn(vif->mmap_pages[idx]);
+	return page_to_pfn(queue->mmap_pages[idx]);
 }
 
-static inline unsigned long idx_to_kaddr(struct xenvif *vif,
+static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
 					 u16 idx)
 {
-	return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
 }
 
 #define callback_param(vif, pending_idx) \
@@ -104,13 +109,13 @@
 
 /* Find the containing VIF's structure from a pointer in pending_tx_info array
  */
-static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf)
+static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
 {
 	u16 pending_idx = ubuf->desc;
 	struct pending_tx_info *temp =
 		container_of(ubuf, struct pending_tx_info, callback_struct);
 	return container_of(temp - pending_idx,
-			    struct xenvif,
+			    struct xenvif_queue,
 			    pending_tx_info[0]);
 }
 
@@ -136,24 +141,24 @@
 	return i & (MAX_PENDING_REQS-1);
 }
 
-bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
+bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
 {
 	RING_IDX prod, cons;
 
 	do {
-		prod = vif->rx.sring->req_prod;
-		cons = vif->rx.req_cons;
+		prod = queue->rx.sring->req_prod;
+		cons = queue->rx.req_cons;
 
 		if (prod - cons >= needed)
 			return true;
 
-		vif->rx.sring->req_event = prod + 1;
+		queue->rx.sring->req_event = prod + 1;
 
 		/* Make sure event is visible before we check prod
 		 * again.
 		 */
 		mb();
-	} while (vif->rx.sring->req_prod != prod);
+	} while (queue->rx.sring->req_prod != prod);
 
 	return false;
 }
@@ -163,7 +168,8 @@
  * adding 'size' bytes to a buffer which currently contains 'offset'
  * bytes.
  */
-static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+static bool start_new_rx_buffer(int offset, unsigned long size, int head,
+				bool full_coalesce)
 {
 	/* simple case: we have completely filled the current buffer. */
 	if (offset == MAX_BUFFER_OFFSET)
@@ -175,6 +181,7 @@
 	 *     (i)   this frag would fit completely in the next buffer
 	 * and (ii)  there is already some data in the current buffer
 	 * and (iii) this is not the head buffer.
+	 * and (iv)  there is no need to fully utilize the buffers
 	 *
 	 * Where:
 	 * - (i) stops us splitting a frag into two copies
@@ -185,6 +192,8 @@
 	 *   by (ii) but is explicitly checked because
 	 *   netfront relies on the first buffer being
 	 *   non-empty and can crash otherwise.
+	 * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
+	 *   slot
 	 *
 	 * This means we will effectively linearise small
 	 * frags but do not needlessly split large buffers
@@ -192,7 +201,8 @@
 	 * own buffers as before.
 	 */
 	BUG_ON(size > MAX_BUFFER_OFFSET);
-	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)
+	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
+	    !full_coalesce)
 		return true;
 
 	return false;
@@ -207,13 +217,13 @@
 	grant_ref_t copy_gref;
 };
 
-static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
+static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
 						 struct netrx_pending_operations *npo)
 {
 	struct xenvif_rx_meta *meta;
 	struct xen_netif_rx_request *req;
 
-	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
 
 	meta = npo->meta + npo->meta_prod++;
 	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
@@ -227,15 +237,22 @@
 	return meta;
 }
 
+struct xenvif_rx_cb {
+	int meta_slots_used;
+	bool full_coalesce;
+};
+
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
 /*
  * Set up the grant operations for this fragment. If it's a flipping
  * interface, we also set up the unmap request from here.
  */
-static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
+static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
 				 struct netrx_pending_operations *npo,
 				 struct page *page, unsigned long size,
 				 unsigned long offset, int *head,
-				 struct xenvif *foreign_vif,
+				 struct xenvif_queue *foreign_queue,
 				 grant_ref_t foreign_gref)
 {
 	struct gnttab_copy *copy_gop;
@@ -261,14 +278,17 @@
 		if (bytes > size)
 			bytes = size;
 
-		if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
+		if (start_new_rx_buffer(npo->copy_off,
+					bytes,
+					*head,
+					XENVIF_RX_CB(skb)->full_coalesce)) {
 			/*
 			 * Netfront requires there to be some data in the head
 			 * buffer.
 			 */
 			BUG_ON(*head);
 
-			meta = get_next_rx_buffer(vif, npo);
+			meta = get_next_rx_buffer(queue, npo);
 		}
 
 		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
@@ -278,8 +298,8 @@
 		copy_gop->flags = GNTCOPY_dest_gref;
 		copy_gop->len = bytes;
 
-		if (foreign_vif) {
-			copy_gop->source.domid = foreign_vif->domid;
+		if (foreign_queue) {
+			copy_gop->source.domid = foreign_queue->vif->domid;
 			copy_gop->source.u.ref = foreign_gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
 		} else {
@@ -289,7 +309,7 @@
 		}
 		copy_gop->source.offset = offset;
 
-		copy_gop->dest.domid = vif->domid;
+		copy_gop->dest.domid = queue->vif->domid;
 		copy_gop->dest.offset = npo->copy_off;
 		copy_gop->dest.u.ref = npo->copy_gref;
 
@@ -314,8 +334,8 @@
 				gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
 		}
 
-		if (*head && ((1 << gso_type) & vif->gso_mask))
-			vif->rx.req_cons++;
+		if (*head && ((1 << gso_type) & queue->vif->gso_mask))
+			queue->rx.req_cons++;
 
 		*head = 0; /* There must be something in this buffer now. */
 
@@ -337,13 +357,13 @@
 						const int i,
 						const struct ubuf_info *ubuf)
 {
-	struct xenvif *foreign_vif = ubuf_to_vif(ubuf);
+	struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf);
 
 	do {
 		u16 pending_idx = ubuf->desc;
 
 		if (skb_shinfo(skb)->frags[i].page.p ==
-		    foreign_vif->mmap_pages[pending_idx])
+		    foreign_queue->mmap_pages[pending_idx])
 			break;
 		ubuf = (struct ubuf_info *) ubuf->ctx;
 	} while (ubuf);
@@ -364,7 +384,8 @@
  * frontend-side LRO).
  */
 static int xenvif_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo)
+			  struct netrx_pending_operations *npo,
+			  struct xenvif_queue *queue)
 {
 	struct xenvif *vif = netdev_priv(skb->dev);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
@@ -390,7 +411,7 @@
 
 	/* Set up a GSO prefix descriptor, if necessary */
 	if ((1 << gso_type) & vif->gso_prefix_mask) {
-		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+		req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
 		meta = npo->meta + npo->meta_prod++;
 		meta->gso_type = gso_type;
 		meta->gso_size = skb_shinfo(skb)->gso_size;
@@ -398,7 +419,7 @@
 		meta->id = req->id;
 	}
 
-	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
 	meta = npo->meta + npo->meta_prod++;
 
 	if ((1 << gso_type) & vif->gso_mask) {
@@ -422,7 +443,7 @@
 		if (data + len > skb_tail_pointer(skb))
 			len = skb_tail_pointer(skb) - data;
 
-		xenvif_gop_frag_copy(vif, skb, npo,
+		xenvif_gop_frag_copy(queue, skb, npo,
 				     virt_to_page(data), len, offset, &head,
 				     NULL,
 				     0);
@@ -433,7 +454,7 @@
 		/* This variable also signals whether foreign_gref has a real
 		 * value or not.
 		 */
-		struct xenvif *foreign_vif = NULL;
+		struct xenvif_queue *foreign_queue = NULL;
 		grant_ref_t foreign_gref;
 
 		if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
@@ -458,8 +479,9 @@
 			if (likely(ubuf)) {
 				u16 pending_idx = ubuf->desc;
 
-				foreign_vif = ubuf_to_vif(ubuf);
-				foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref;
+				foreign_queue = ubuf_to_queue(ubuf);
+				foreign_gref =
+					foreign_queue->pending_tx_info[pending_idx].req.gref;
 				/* Just a safety measure. If this was the last
 				 * element on the list, the for loop will
 				 * iterate again if a local page were added to
@@ -477,13 +499,13 @@
 				 */
 				ubuf = head_ubuf;
 		}
-		xenvif_gop_frag_copy(vif, skb, npo,
+		xenvif_gop_frag_copy(queue, skb, npo,
 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
 				     skb_shinfo(skb)->frags[i].page_offset,
 				     &head,
-				     foreign_vif,
-				     foreign_vif ? foreign_gref : UINT_MAX);
+				     foreign_queue,
+				     foreign_queue ? foreign_gref : UINT_MAX);
 	}
 
 	return npo->meta_prod - old_meta_prod;
@@ -515,7 +537,7 @@
 	return status;
 }
 
-static void xenvif_add_frag_responses(struct xenvif *vif, int status,
+static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
 				      struct xenvif_rx_meta *meta,
 				      int nr_meta_slots)
 {
@@ -536,23 +558,17 @@
 			flags = XEN_NETRXF_more_data;
 
 		offset = 0;
-		make_rx_response(vif, meta[i].id, status, offset,
+		make_rx_response(queue, meta[i].id, status, offset,
 				 meta[i].size, flags);
 	}
 }
 
-struct xenvif_rx_cb {
-	int meta_slots_used;
-};
-
-#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
-void xenvif_kick_thread(struct xenvif *vif)
+void xenvif_kick_thread(struct xenvif_queue *queue)
 {
-	wake_up(&vif->wq);
+	wake_up(&queue->wq);
 }
 
-static void xenvif_rx_action(struct xenvif *vif)
+static void xenvif_rx_action(struct xenvif_queue *queue)
 {
 	s8 status;
 	u16 flags;
@@ -565,13 +581,13 @@
 	bool need_to_notify = false;
 
 	struct netrx_pending_operations npo = {
-		.copy  = vif->grant_copy_op,
-		.meta  = vif->meta,
+		.copy  = queue->grant_copy_op,
+		.meta  = queue->meta,
 	};
 
 	skb_queue_head_init(&rxq);
 
-	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
+	while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
 		RING_IDX max_slots_needed;
 		RING_IDX old_req_cons;
 		RING_IDX ring_slots_used;
@@ -602,10 +618,15 @@
 
 		/* To avoid the estimate becoming too pessimal for some
 		 * frontends that limit posted rx requests, cap the estimate
-		 * at MAX_SKB_FRAGS.
+		 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
+		 * the skb into the provided slots.
 		 */
-		if (max_slots_needed > MAX_SKB_FRAGS)
+		if (max_slots_needed > MAX_SKB_FRAGS) {
 			max_slots_needed = MAX_SKB_FRAGS;
+			XENVIF_RX_CB(skb)->full_coalesce = true;
+		} else {
+			XENVIF_RX_CB(skb)->full_coalesce = false;
+		}
 
 		/* We may need one more slot for GSO metadata */
 		if (skb_is_gso(skb) &&
@@ -614,42 +635,42 @@
 			max_slots_needed++;
 
 		/* If the skb may not fit then bail out now */
-		if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
-			skb_queue_head(&vif->rx_queue, skb);
+		if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
+			skb_queue_head(&queue->rx_queue, skb);
 			need_to_notify = true;
-			vif->rx_last_skb_slots = max_slots_needed;
+			queue->rx_last_skb_slots = max_slots_needed;
 			break;
 		} else
-			vif->rx_last_skb_slots = 0;
+			queue->rx_last_skb_slots = 0;
 
-		old_req_cons = vif->rx.req_cons;
-		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
-		ring_slots_used = vif->rx.req_cons - old_req_cons;
+		old_req_cons = queue->rx.req_cons;
+		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
+		ring_slots_used = queue->rx.req_cons - old_req_cons;
 
 		BUG_ON(ring_slots_used > max_slots_needed);
 
 		__skb_queue_tail(&rxq, skb);
 	}
 
-	BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
+	BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
 
 	if (!npo.copy_prod)
 		goto done;
 
 	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
-	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
+	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
 
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 
-		if ((1 << vif->meta[npo.meta_cons].gso_type) &
-		    vif->gso_prefix_mask) {
-			resp = RING_GET_RESPONSE(&vif->rx,
-						 vif->rx.rsp_prod_pvt++);
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    queue->vif->gso_prefix_mask) {
+			resp = RING_GET_RESPONSE(&queue->rx,
+						 queue->rx.rsp_prod_pvt++);
 
 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
 
-			resp->offset = vif->meta[npo.meta_cons].gso_size;
-			resp->id = vif->meta[npo.meta_cons].id;
+			resp->offset = queue->meta[npo.meta_cons].gso_size;
+			resp->id = queue->meta[npo.meta_cons].id;
 			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
 
 			npo.meta_cons++;
@@ -657,10 +678,10 @@
 		}
 
 
-		vif->dev->stats.tx_bytes += skb->len;
-		vif->dev->stats.tx_packets++;
+		queue->stats.tx_bytes += skb->len;
+		queue->stats.tx_packets++;
 
-		status = xenvif_check_gop(vif,
+		status = xenvif_check_gop(queue->vif,
 					  XENVIF_RX_CB(skb)->meta_slots_used,
 					  &npo);
 
@@ -676,22 +697,22 @@
 			flags |= XEN_NETRXF_data_validated;
 
 		offset = 0;
-		resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
+		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
 					status, offset,
-					vif->meta[npo.meta_cons].size,
+					queue->meta[npo.meta_cons].size,
 					flags);
 
-		if ((1 << vif->meta[npo.meta_cons].gso_type) &
-		    vif->gso_mask) {
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    queue->vif->gso_mask) {
 			struct xen_netif_extra_info *gso =
 				(struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&vif->rx,
-						  vif->rx.rsp_prod_pvt++);
+				RING_GET_RESPONSE(&queue->rx,
+						  queue->rx.rsp_prod_pvt++);
 
 			resp->flags |= XEN_NETRXF_extra_info;
 
-			gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
-			gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
+			gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
+			gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
 			gso->u.gso.pad = 0;
 			gso->u.gso.features = 0;
 
@@ -699,11 +720,11 @@
 			gso->flags = 0;
 		}
 
-		xenvif_add_frag_responses(vif, status,
-					  vif->meta + npo.meta_cons + 1,
+		xenvif_add_frag_responses(queue, status,
+					  queue->meta + npo.meta_cons + 1,
 					  XENVIF_RX_CB(skb)->meta_slots_used);
 
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
 
 		need_to_notify |= !!ret;
 
@@ -713,20 +734,20 @@
 
 done:
 	if (need_to_notify)
-		notify_remote_via_irq(vif->rx_irq);
+		notify_remote_via_irq(queue->rx_irq);
 }
 
-void xenvif_napi_schedule_or_enable_events(struct xenvif *vif)
+void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
 {
 	int more_to_do;
 
-	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
+	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
 
 	if (more_to_do)
-		napi_schedule(&vif->napi);
+		napi_schedule(&queue->napi);
 }
 
-static void tx_add_credit(struct xenvif *vif)
+static void tx_add_credit(struct xenvif_queue *queue)
 {
 	unsigned long max_burst, max_credit;
 
@@ -734,55 +755,57 @@
 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 	 * Otherwise the interface can seize up due to insufficient credit.
 	 */
-	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
+	max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
 	max_burst = min(max_burst, 131072UL);
-	max_burst = max(max_burst, vif->credit_bytes);
+	max_burst = max(max_burst, queue->credit_bytes);
 
 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
-	max_credit = vif->remaining_credit + vif->credit_bytes;
-	if (max_credit < vif->remaining_credit)
+	max_credit = queue->remaining_credit + queue->credit_bytes;
+	if (max_credit < queue->remaining_credit)
 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
-	vif->remaining_credit = min(max_credit, max_burst);
+	queue->remaining_credit = min(max_credit, max_burst);
 }
 
 static void tx_credit_callback(unsigned long data)
 {
-	struct xenvif *vif = (struct xenvif *)data;
-	tx_add_credit(vif);
-	xenvif_napi_schedule_or_enable_events(vif);
+	struct xenvif_queue *queue = (struct xenvif_queue *)data;
+	tx_add_credit(queue);
+	xenvif_napi_schedule_or_enable_events(queue);
 }
 
-static void xenvif_tx_err(struct xenvif *vif,
+static void xenvif_tx_err(struct xenvif_queue *queue,
 			  struct xen_netif_tx_request *txp, RING_IDX end)
 {
-	RING_IDX cons = vif->tx.req_cons;
+	RING_IDX cons = queue->tx.req_cons;
 	unsigned long flags;
 
 	do {
-		spin_lock_irqsave(&vif->response_lock, flags);
-		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
-		spin_unlock_irqrestore(&vif->response_lock, flags);
+		spin_lock_irqsave(&queue->response_lock, flags);
+		make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
+		spin_unlock_irqrestore(&queue->response_lock, flags);
 		if (cons == end)
 			break;
-		txp = RING_GET_REQUEST(&vif->tx, cons++);
+		txp = RING_GET_REQUEST(&queue->tx, cons++);
 	} while (1);
-	vif->tx.req_cons = cons;
+	queue->tx.req_cons = cons;
 }
 
 static void xenvif_fatal_tx_err(struct xenvif *vif)
 {
 	netdev_err(vif->dev, "fatal error; disabling device\n");
 	vif->disabled = true;
-	xenvif_kick_thread(vif);
+	/* Disable the vif from queue 0's kthread */
+	if (vif->queues)
+		xenvif_kick_thread(&vif->queues[0]);
 }
 
-static int xenvif_count_requests(struct xenvif *vif,
+static int xenvif_count_requests(struct xenvif_queue *queue,
 				 struct xen_netif_tx_request *first,
 				 struct xen_netif_tx_request *txp,
 				 int work_to_do)
 {
-	RING_IDX cons = vif->tx.req_cons;
+	RING_IDX cons = queue->tx.req_cons;
 	int slots = 0;
 	int drop_err = 0;
 	int more_data;
@@ -794,10 +817,10 @@
 		struct xen_netif_tx_request dropped_tx = { 0 };
 
 		if (slots >= work_to_do) {
-			netdev_err(vif->dev,
+			netdev_err(queue->vif->dev,
 				   "Asked for %d slots but exceeds this limit\n",
 				   work_to_do);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			return -ENODATA;
 		}
 
@@ -805,10 +828,10 @@
 		 * considered malicious.
 		 */
 		if (unlikely(slots >= fatal_skb_slots)) {
-			netdev_err(vif->dev,
+			netdev_err(queue->vif->dev,
 				   "Malicious frontend using %d slots, threshold %u\n",
 				   slots, fatal_skb_slots);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			return -E2BIG;
 		}
 
@@ -821,7 +844,7 @@
 		 */
 		if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
 			if (net_ratelimit())
-				netdev_dbg(vif->dev,
+				netdev_dbg(queue->vif->dev,
 					   "Too many slots (%d) exceeding limit (%d), dropping packet\n",
 					   slots, XEN_NETBK_LEGACY_SLOTS_MAX);
 			drop_err = -E2BIG;
@@ -830,7 +853,7 @@
 		if (drop_err)
 			txp = &dropped_tx;
 
-		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
+		memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
 		       sizeof(*txp));
 
 		/* If the guest submitted a frame >= 64 KiB then
@@ -844,7 +867,7 @@
 		 */
 		if (!drop_err && txp->size > first->size) {
 			if (net_ratelimit())
-				netdev_dbg(vif->dev,
+				netdev_dbg(queue->vif->dev,
 					   "Invalid tx request, slot size %u > remaining size %u\n",
 					   txp->size, first->size);
 			drop_err = -EIO;
@@ -854,9 +877,9 @@
 		slots++;
 
 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-			netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
+			netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
 				 txp->offset, txp->size);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			return -EINVAL;
 		}
 
@@ -868,7 +891,7 @@
 	} while (more_data);
 
 	if (drop_err) {
-		xenvif_tx_err(vif, first, cons + slots);
+		xenvif_tx_err(queue, first, cons + slots);
 		return drop_err;
 	}
 
@@ -882,17 +905,17 @@
 
 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
 
-static inline void xenvif_tx_create_map_op(struct xenvif *vif,
+static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
 					  u16 pending_idx,
 					  struct xen_netif_tx_request *txp,
 					  struct gnttab_map_grant_ref *mop)
 {
-	vif->pages_to_map[mop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
-	gnttab_set_map_op(mop, idx_to_kaddr(vif, pending_idx),
+	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
+	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
 			  GNTMAP_host_map | GNTMAP_readonly,
-			  txp->gref, vif->domid);
+			  txp->gref, queue->vif->domid);
 
-	memcpy(&vif->pending_tx_info[pending_idx].req, txp,
+	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
 	       sizeof(*txp));
 }
 
@@ -913,7 +936,7 @@
 	return skb;
 }
 
-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
 							struct sk_buff *skb,
 							struct xen_netif_tx_request *txp,
 							struct gnttab_map_grant_ref *gop)
@@ -940,9 +963,9 @@
 
 	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
 	     shinfo->nr_frags++, txp++, gop++) {
-		index = pending_index(vif->pending_cons++);
-		pending_idx = vif->pending_ring[index];
-		xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
+		index = pending_index(queue->pending_cons++);
+		pending_idx = queue->pending_ring[index];
+		xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
 		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
 	}
 
@@ -950,7 +973,7 @@
 		struct sk_buff *nskb = xenvif_alloc_skb(0);
 		if (unlikely(nskb == NULL)) {
 			if (net_ratelimit())
-				netdev_err(vif->dev,
+				netdev_err(queue->vif->dev,
 					   "Can't allocate the frag_list skb.\n");
 			return NULL;
 		}
@@ -960,9 +983,9 @@
 
 		for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
 		     shinfo->nr_frags++, txp++, gop++) {
-			index = pending_index(vif->pending_cons++);
-			pending_idx = vif->pending_ring[index];
-			xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
+			index = pending_index(queue->pending_cons++);
+			pending_idx = queue->pending_ring[index];
+			xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
 			frag_set_pending_idx(&frags[shinfo->nr_frags],
 					     pending_idx);
 		}
@@ -973,34 +996,34 @@
 	return gop;
 }
 
-static inline void xenvif_grant_handle_set(struct xenvif *vif,
+static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
 					   u16 pending_idx,
 					   grant_handle_t handle)
 {
-	if (unlikely(vif->grant_tx_handle[pending_idx] !=
+	if (unlikely(queue->grant_tx_handle[pending_idx] !=
 		     NETBACK_INVALID_HANDLE)) {
-		netdev_err(vif->dev,
+		netdev_err(queue->vif->dev,
 			   "Trying to overwrite active handle! pending_idx: %x\n",
 			   pending_idx);
 		BUG();
 	}
-	vif->grant_tx_handle[pending_idx] = handle;
+	queue->grant_tx_handle[pending_idx] = handle;
 }
 
-static inline void xenvif_grant_handle_reset(struct xenvif *vif,
+static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
 					     u16 pending_idx)
 {
-	if (unlikely(vif->grant_tx_handle[pending_idx] ==
+	if (unlikely(queue->grant_tx_handle[pending_idx] ==
 		     NETBACK_INVALID_HANDLE)) {
-		netdev_err(vif->dev,
+		netdev_err(queue->vif->dev,
 			   "Trying to unmap invalid handle! pending_idx: %x\n",
 			   pending_idx);
 		BUG();
 	}
-	vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
+	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
 }
 
-static int xenvif_tx_check_gop(struct xenvif *vif,
+static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 			       struct sk_buff *skb,
 			       struct gnttab_map_grant_ref **gopp_map,
 			       struct gnttab_copy **gopp_copy)
@@ -1017,12 +1040,12 @@
 	(*gopp_copy)++;
 	if (unlikely(err)) {
 		if (net_ratelimit())
-			netdev_dbg(vif->dev,
+			netdev_dbg(queue->vif->dev,
 				   "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
 				   (*gopp_copy)->status,
 				   pending_idx,
 				   (*gopp_copy)->source.u.ref);
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 	}
 
 check_frags:
@@ -1035,24 +1058,24 @@
 		newerr = gop_map->status;
 
 		if (likely(!newerr)) {
-			xenvif_grant_handle_set(vif,
+			xenvif_grant_handle_set(queue,
 						pending_idx,
 						gop_map->handle);
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
-				xenvif_idx_unmap(vif, pending_idx);
+				xenvif_idx_unmap(queue, pending_idx);
 			continue;
 		}
 
 		/* Error on this fragment: respond to client with an error. */
 		if (net_ratelimit())
-			netdev_dbg(vif->dev,
+			netdev_dbg(queue->vif->dev,
 				   "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
 				   i,
 				   gop_map->status,
 				   pending_idx,
 				   gop_map->ref);
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 
 		/* Not the first error? Preceding frags already invalidated. */
 		if (err)
@@ -1060,7 +1083,7 @@
 		/* First error: invalidate preceding fragments. */
 		for (j = 0; j < i; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
-			xenvif_idx_unmap(vif, pending_idx);
+			xenvif_idx_unmap(queue, pending_idx);
 		}
 
 		/* Remember the error: invalidate all subsequent fragments. */
@@ -1084,7 +1107,7 @@
 		shinfo = skb_shinfo(first_skb);
 		for (j = 0; j < shinfo->nr_frags; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
-			xenvif_idx_unmap(vif, pending_idx);
+			xenvif_idx_unmap(queue, pending_idx);
 		}
 	}
 
@@ -1092,7 +1115,7 @@
 	return err;
 }
 
-static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
+static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
@@ -1110,23 +1133,23 @@
 		/* If this is not the first frag, chain it to the previous*/
 		if (prev_pending_idx == INVALID_PENDING_IDX)
 			skb_shinfo(skb)->destructor_arg =
-				&callback_param(vif, pending_idx);
+				&callback_param(queue, pending_idx);
 		else
-			callback_param(vif, prev_pending_idx).ctx =
-				&callback_param(vif, pending_idx);
+			callback_param(queue, prev_pending_idx).ctx =
+				&callback_param(queue, pending_idx);
 
-		callback_param(vif, pending_idx).ctx = NULL;
+		callback_param(queue, pending_idx).ctx = NULL;
 		prev_pending_idx = pending_idx;
 
-		txp = &vif->pending_tx_info[pending_idx].req;
-		page = virt_to_page(idx_to_kaddr(vif, pending_idx));
+		txp = &queue->pending_tx_info[pending_idx].req;
+		page = virt_to_page(idx_to_kaddr(queue, pending_idx));
 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
 		skb->len += txp->size;
 		skb->data_len += txp->size;
 		skb->truesize += txp->size;
 
 		/* Take an extra reference to offset network stack's put_page */
-		get_page(vif->mmap_pages[pending_idx]);
+		get_page(queue->mmap_pages[pending_idx]);
 	}
 	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
 	 * overlaps with "index", and "mapping" is not set. I think mapping
@@ -1136,33 +1159,33 @@
 	skb->pfmemalloc	= false;
 }
 
-static int xenvif_get_extras(struct xenvif *vif,
+static int xenvif_get_extras(struct xenvif_queue *queue,
 				struct xen_netif_extra_info *extras,
 				int work_to_do)
 {
 	struct xen_netif_extra_info extra;
-	RING_IDX cons = vif->tx.req_cons;
+	RING_IDX cons = queue->tx.req_cons;
 
 	do {
 		if (unlikely(work_to_do-- <= 0)) {
-			netdev_err(vif->dev, "Missing extra info\n");
-			xenvif_fatal_tx_err(vif);
+			netdev_err(queue->vif->dev, "Missing extra info\n");
+			xenvif_fatal_tx_err(queue->vif);
 			return -EBADR;
 		}
 
-		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
+		memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
 		       sizeof(extra));
 		if (unlikely(!extra.type ||
 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-			vif->tx.req_cons = ++cons;
-			netdev_err(vif->dev,
+			queue->tx.req_cons = ++cons;
+			netdev_err(queue->vif->dev,
 				   "Invalid extra type: %d\n", extra.type);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			return -EINVAL;
 		}
 
 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-		vif->tx.req_cons = ++cons;
+		queue->tx.req_cons = ++cons;
 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
 	return work_to_do;
@@ -1197,7 +1220,7 @@
 	return 0;
 }
 
-static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
+static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
 {
 	bool recalculate_partial_csum = false;
 
@@ -1207,7 +1230,7 @@
 	 * recalculate the partial checksum.
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-		vif->rx_gso_checksum_fixup++;
+		queue->stats.rx_gso_checksum_fixup++;
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		recalculate_partial_csum = true;
 	}
@@ -1219,31 +1242,31 @@
 	return skb_checksum_setup(skb, recalculate_partial_csum);
 }
 
-static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
+static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 {
 	u64 now = get_jiffies_64();
-	u64 next_credit = vif->credit_window_start +
-		msecs_to_jiffies(vif->credit_usec / 1000);
+	u64 next_credit = queue->credit_window_start +
+		msecs_to_jiffies(queue->credit_usec / 1000);
 
 	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&vif->credit_timeout))
+	if (timer_pending(&queue->credit_timeout))
 		return true;
 
 	/* Passed the point where we can replenish credit? */
 	if (time_after_eq64(now, next_credit)) {
-		vif->credit_window_start = now;
-		tx_add_credit(vif);
+		queue->credit_window_start = now;
+		tx_add_credit(queue);
 	}
 
 	/* Still too big to send right now? Set a callback. */
-	if (size > vif->remaining_credit) {
-		vif->credit_timeout.data     =
-			(unsigned long)vif;
-		vif->credit_timeout.function =
+	if (size > queue->remaining_credit) {
+		queue->credit_timeout.data     =
+			(unsigned long)queue;
+		queue->credit_timeout.function =
 			tx_credit_callback;
-		mod_timer(&vif->credit_timeout,
+		mod_timer(&queue->credit_timeout,
 			  next_credit);
-		vif->credit_window_start = next_credit;
+		queue->credit_window_start = next_credit;
 
 		return true;
 	}
@@ -1251,16 +1274,16 @@
 	return false;
 }
 
-static void xenvif_tx_build_gops(struct xenvif *vif,
+static void xenvif_tx_build_gops(struct xenvif_queue *queue,
 				     int budget,
 				     unsigned *copy_ops,
 				     unsigned *map_ops)
 {
-	struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
+	struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop;
 	struct sk_buff *skb;
 	int ret;
 
-	while (skb_queue_len(&vif->tx_queue) < budget) {
+	while (skb_queue_len(&queue->tx_queue) < budget) {
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
@@ -1270,69 +1293,69 @@
 		unsigned int data_len;
 		pending_ring_idx_t index;
 
-		if (vif->tx.sring->req_prod - vif->tx.req_cons >
+		if (queue->tx.sring->req_prod - queue->tx.req_cons >
 		    XEN_NETIF_TX_RING_SIZE) {
-			netdev_err(vif->dev,
+			netdev_err(queue->vif->dev,
 				   "Impossible number of requests. "
 				   "req_prod %d, req_cons %d, size %ld\n",
-				   vif->tx.sring->req_prod, vif->tx.req_cons,
+				   queue->tx.sring->req_prod, queue->tx.req_cons,
 				   XEN_NETIF_TX_RING_SIZE);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			break;
 		}
 
-		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
+		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
 		if (!work_to_do)
 			break;
 
-		idx = vif->tx.req_cons;
+		idx = queue->tx.req_cons;
 		rmb(); /* Ensure that we see the request before we copy it. */
-		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
+		memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
 
 		/* Credit-based scheduling. */
-		if (txreq.size > vif->remaining_credit &&
-		    tx_credit_exceeded(vif, txreq.size))
+		if (txreq.size > queue->remaining_credit &&
+		    tx_credit_exceeded(queue, txreq.size))
 			break;
 
-		vif->remaining_credit -= txreq.size;
+		queue->remaining_credit -= txreq.size;
 
 		work_to_do--;
-		vif->tx.req_cons = ++idx;
+		queue->tx.req_cons = ++idx;
 
 		memset(extras, 0, sizeof(extras));
 		if (txreq.flags & XEN_NETTXF_extra_info) {
-			work_to_do = xenvif_get_extras(vif, extras,
+			work_to_do = xenvif_get_extras(queue, extras,
 						       work_to_do);
-			idx = vif->tx.req_cons;
+			idx = queue->tx.req_cons;
 			if (unlikely(work_to_do < 0))
 				break;
 		}
 
-		ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
+		ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do);
 		if (unlikely(ret < 0))
 			break;
 
 		idx += ret;
 
 		if (unlikely(txreq.size < ETH_HLEN)) {
-			netdev_dbg(vif->dev,
+			netdev_dbg(queue->vif->dev,
 				   "Bad packet size: %d\n", txreq.size);
-			xenvif_tx_err(vif, &txreq, idx);
+			xenvif_tx_err(queue, &txreq, idx);
 			break;
 		}
 
 		/* No crossing a page as the payload mustn't fragment. */
 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-			netdev_err(vif->dev,
+			netdev_err(queue->vif->dev,
 				   "txreq.offset: %x, size: %u, end: %lu\n",
 				   txreq.offset, txreq.size,
 				   (txreq.offset&~PAGE_MASK) + txreq.size);
-			xenvif_fatal_tx_err(vif);
+			xenvif_fatal_tx_err(queue->vif);
 			break;
 		}
 
-		index = pending_index(vif->pending_cons);
-		pending_idx = vif->pending_ring[index];
+		index = pending_index(queue->pending_cons);
+		pending_idx = queue->pending_ring[index];
 
 		data_len = (txreq.size > PKT_PROT_LEN &&
 			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
@@ -1340,9 +1363,9 @@
 
 		skb = xenvif_alloc_skb(data_len);
 		if (unlikely(skb == NULL)) {
-			netdev_dbg(vif->dev,
+			netdev_dbg(queue->vif->dev,
 				   "Can't allocate a skb in start_xmit.\n");
-			xenvif_tx_err(vif, &txreq, idx);
+			xenvif_tx_err(queue, &txreq, idx);
 			break;
 		}
 
@@ -1350,7 +1373,7 @@
 			struct xen_netif_extra_info *gso;
 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 
-			if (xenvif_set_skb_gso(vif, skb, gso)) {
+			if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
 				/* Failure in xenvif_set_skb_gso is fatal. */
 				kfree_skb(skb);
 				break;
@@ -1360,18 +1383,18 @@
 		XENVIF_TX_CB(skb)->pending_idx = pending_idx;
 
 		__skb_put(skb, data_len);
-		vif->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
-		vif->tx_copy_ops[*copy_ops].source.domid = vif->domid;
-		vif->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
+		queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+		queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
+		queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
 
-		vif->tx_copy_ops[*copy_ops].dest.u.gmfn =
+		queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
 			virt_to_mfn(skb->data);
-		vif->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
-		vif->tx_copy_ops[*copy_ops].dest.offset =
+		queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+		queue->tx_copy_ops[*copy_ops].dest.offset =
 			offset_in_page(skb->data);
 
-		vif->tx_copy_ops[*copy_ops].len = data_len;
-		vif->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+		queue->tx_copy_ops[*copy_ops].len = data_len;
+		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
 
 		(*copy_ops)++;
 
@@ -1380,42 +1403,42 @@
 			skb_shinfo(skb)->nr_frags++;
 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
 					     pending_idx);
-			xenvif_tx_create_map_op(vif, pending_idx, &txreq, gop);
+			xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop);
 			gop++;
 		} else {
 			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
 					     INVALID_PENDING_IDX);
-			memcpy(&vif->pending_tx_info[pending_idx].req, &txreq,
+			memcpy(&queue->pending_tx_info[pending_idx].req, &txreq,
 			       sizeof(txreq));
 		}
 
-		vif->pending_cons++;
+		queue->pending_cons++;
 
-		request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
+		request_gop = xenvif_get_requests(queue, skb, txfrags, gop);
 		if (request_gop == NULL) {
 			kfree_skb(skb);
-			xenvif_tx_err(vif, &txreq, idx);
+			xenvif_tx_err(queue, &txreq, idx);
 			break;
 		}
 		gop = request_gop;
 
-		__skb_queue_tail(&vif->tx_queue, skb);
+		__skb_queue_tail(&queue->tx_queue, skb);
 
-		vif->tx.req_cons = idx;
+		queue->tx.req_cons = idx;
 
-		if (((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops)) ||
-		    (*copy_ops >= ARRAY_SIZE(vif->tx_copy_ops)))
+		if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
+		    (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
 			break;
 	}
 
-	(*map_ops) = gop - vif->tx_map_ops;
+	(*map_ops) = gop - queue->tx_map_ops;
 	return;
 }
 
 /* Consolidate skb with a frag_list into a brand new one with local pages on
  * frags. Returns 0 or -ENOMEM if can't allocate new pages.
  */
-static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
+static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
 {
 	unsigned int offset = skb_headlen(skb);
 	skb_frag_t frags[MAX_SKB_FRAGS];
@@ -1423,10 +1446,10 @@
 	struct ubuf_info *uarg;
 	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
 
-	vif->tx_zerocopy_sent += 2;
-	vif->tx_frag_overflow++;
+	queue->stats.tx_zerocopy_sent += 2;
+	queue->stats.tx_frag_overflow++;
 
-	xenvif_fill_frags(vif, nskb);
+	xenvif_fill_frags(queue, nskb);
 	/* Subtract frags size, we will correct it later */
 	skb->truesize -= skb->data_len;
 	skb->len += nskb->len;
@@ -1478,37 +1501,37 @@
 	return 0;
 }
 
-static int xenvif_tx_submit(struct xenvif *vif)
+static int xenvif_tx_submit(struct xenvif_queue *queue)
 {
-	struct gnttab_map_grant_ref *gop_map = vif->tx_map_ops;
-	struct gnttab_copy *gop_copy = vif->tx_copy_ops;
+	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
+	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
 	struct sk_buff *skb;
 	int work_done = 0;
 
-	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
+	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
 		u16 pending_idx;
 		unsigned data_len;
 
 		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
-		txp = &vif->pending_tx_info[pending_idx].req;
+		txp = &queue->pending_tx_info[pending_idx].req;
 
 		/* Check the remap error code. */
-		if (unlikely(xenvif_tx_check_gop(vif, skb, &gop_map, &gop_copy))) {
+		if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
 			continue;
 		}
 
 		data_len = skb->len;
-		callback_param(vif, pending_idx).ctx = NULL;
+		callback_param(queue, pending_idx).ctx = NULL;
 		if (data_len < txp->size) {
 			/* Append the packet payload as a fragment. */
 			txp->offset += data_len;
 			txp->size -= data_len;
 		} else {
 			/* Schedule a response immediately. */
-			xenvif_idx_release(vif, pending_idx,
+			xenvif_idx_release(queue, pending_idx,
 					   XEN_NETIF_RSP_OKAY);
 		}
 
@@ -1517,12 +1540,12 @@
 		else if (txp->flags & XEN_NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		xenvif_fill_frags(vif, skb);
+		xenvif_fill_frags(queue, skb);
 
 		if (unlikely(skb_has_frag_list(skb))) {
-			if (xenvif_handle_frag_list(vif, skb)) {
+			if (xenvif_handle_frag_list(queue, skb)) {
 				if (net_ratelimit())
-					netdev_err(vif->dev,
+					netdev_err(queue->vif->dev,
 						   "Not enough memory to consolidate frag_list!\n");
 				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 				kfree_skb(skb);
@@ -1535,12 +1558,12 @@
 			__pskb_pull_tail(skb, target - skb_headlen(skb));
 		}
 
-		skb->dev      = vif->dev;
+		skb->dev      = queue->vif->dev;
 		skb->protocol = eth_type_trans(skb, skb->dev);
 		skb_reset_network_header(skb);
 
-		if (checksum_setup(vif, skb)) {
-			netdev_dbg(vif->dev,
+		if (checksum_setup(queue, skb)) {
+			netdev_dbg(queue->vif->dev,
 				   "Can't setup checksum in net_tx_action\n");
 			/* We have to set this flag to trigger the callback */
 			if (skb_shinfo(skb)->destructor_arg)
@@ -1565,8 +1588,8 @@
 				DIV_ROUND_UP(skb->len - hdrlen, mss);
 		}
 
-		vif->dev->stats.rx_bytes += skb->len;
-		vif->dev->stats.rx_packets++;
+		queue->stats.rx_bytes += skb->len;
+		queue->stats.rx_packets++;
 
 		work_done++;
 
@@ -1577,7 +1600,7 @@
 		 */
 		if (skb_shinfo(skb)->destructor_arg) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
-			vif->tx_zerocopy_sent++;
+			queue->stats.tx_zerocopy_sent++;
 		}
 
 		netif_receive_skb(skb);
@@ -1590,47 +1613,47 @@
 {
 	unsigned long flags;
 	pending_ring_idx_t index;
-	struct xenvif *vif = ubuf_to_vif(ubuf);
+	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
 
 	/* This is the only place where we grab this lock, to protect callbacks
 	 * from each other.
 	 */
-	spin_lock_irqsave(&vif->callback_lock, flags);
+	spin_lock_irqsave(&queue->callback_lock, flags);
 	do {
 		u16 pending_idx = ubuf->desc;
 		ubuf = (struct ubuf_info *) ubuf->ctx;
-		BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
+		BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
 			MAX_PENDING_REQS);
-		index = pending_index(vif->dealloc_prod);
-		vif->dealloc_ring[index] = pending_idx;
+		index = pending_index(queue->dealloc_prod);
+		queue->dealloc_ring[index] = pending_idx;
 		/* Sync with xenvif_tx_dealloc_action:
 		 * insert idx then incr producer.
 		 */
 		smp_wmb();
-		vif->dealloc_prod++;
+		queue->dealloc_prod++;
 	} while (ubuf);
-	wake_up(&vif->dealloc_wq);
-	spin_unlock_irqrestore(&vif->callback_lock, flags);
+	wake_up(&queue->dealloc_wq);
+	spin_unlock_irqrestore(&queue->callback_lock, flags);
 
 	if (likely(zerocopy_success))
-		vif->tx_zerocopy_success++;
+		queue->stats.tx_zerocopy_success++;
 	else
-		vif->tx_zerocopy_fail++;
+		queue->stats.tx_zerocopy_fail++;
 }
 
-static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
+static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
 {
 	struct gnttab_unmap_grant_ref *gop;
 	pending_ring_idx_t dc, dp;
 	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
 	unsigned int i = 0;
 
-	dc = vif->dealloc_cons;
-	gop = vif->tx_unmap_ops;
+	dc = queue->dealloc_cons;
+	gop = queue->tx_unmap_ops;
 
 	/* Free up any grants we have finished using */
 	do {
-		dp = vif->dealloc_prod;
+		dp = queue->dealloc_prod;
 
 		/* Ensure we see all indices enqueued by all
 		 * xenvif_zerocopy_callback().
@@ -1638,38 +1661,38 @@
 		smp_rmb();
 
 		while (dc != dp) {
-			BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
+			BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS);
 			pending_idx =
-				vif->dealloc_ring[pending_index(dc++)];
+				queue->dealloc_ring[pending_index(dc++)];
 
-			pending_idx_release[gop-vif->tx_unmap_ops] =
+			pending_idx_release[gop-queue->tx_unmap_ops] =
 				pending_idx;
-			vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
-				vif->mmap_pages[pending_idx];
+			queue->pages_to_unmap[gop-queue->tx_unmap_ops] =
+				queue->mmap_pages[pending_idx];
 			gnttab_set_unmap_op(gop,
-					    idx_to_kaddr(vif, pending_idx),
+					    idx_to_kaddr(queue, pending_idx),
 					    GNTMAP_host_map,
-					    vif->grant_tx_handle[pending_idx]);
-			xenvif_grant_handle_reset(vif, pending_idx);
+					    queue->grant_tx_handle[pending_idx]);
+			xenvif_grant_handle_reset(queue, pending_idx);
 			++gop;
 		}
 
-	} while (dp != vif->dealloc_prod);
+	} while (dp != queue->dealloc_prod);
 
-	vif->dealloc_cons = dc;
+	queue->dealloc_cons = dc;
 
-	if (gop - vif->tx_unmap_ops > 0) {
+	if (gop - queue->tx_unmap_ops > 0) {
 		int ret;
-		ret = gnttab_unmap_refs(vif->tx_unmap_ops,
+		ret = gnttab_unmap_refs(queue->tx_unmap_ops,
 					NULL,
-					vif->pages_to_unmap,
-					gop - vif->tx_unmap_ops);
+					queue->pages_to_unmap,
+					gop - queue->tx_unmap_ops);
 		if (ret) {
-			netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
-				   gop - vif->tx_unmap_ops, ret);
-			for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
+			netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
+				   gop - queue->tx_unmap_ops, ret);
+			for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
 				if (gop[i].status != GNTST_okay)
-					netdev_err(vif->dev,
+					netdev_err(queue->vif->dev,
 						   " host_addr: %llx handle: %x status: %d\n",
 						   gop[i].host_addr,
 						   gop[i].handle,
@@ -1679,91 +1702,91 @@
 		}
 	}
 
-	for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
-		xenvif_idx_release(vif, pending_idx_release[i],
+	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
+		xenvif_idx_release(queue, pending_idx_release[i],
 				   XEN_NETIF_RSP_OKAY);
 }
 
 
 /* Called after netfront has transmitted */
-int xenvif_tx_action(struct xenvif *vif, int budget)
+int xenvif_tx_action(struct xenvif_queue *queue, int budget)
 {
 	unsigned nr_mops, nr_cops = 0;
 	int work_done, ret;
 
-	if (unlikely(!tx_work_todo(vif)))
+	if (unlikely(!tx_work_todo(queue)))
 		return 0;
 
-	xenvif_tx_build_gops(vif, budget, &nr_cops, &nr_mops);
+	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
 
 	if (nr_cops == 0)
 		return 0;
 
-	gnttab_batch_copy(vif->tx_copy_ops, nr_cops);
+	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
 	if (nr_mops != 0) {
-		ret = gnttab_map_refs(vif->tx_map_ops,
+		ret = gnttab_map_refs(queue->tx_map_ops,
 				      NULL,
-				      vif->pages_to_map,
+				      queue->pages_to_map,
 				      nr_mops);
 		BUG_ON(ret);
 	}
 
-	work_done = xenvif_tx_submit(vif);
+	work_done = xenvif_tx_submit(queue);
 
 	return work_done;
 }
 
-static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
 			       u8 status)
 {
 	struct pending_tx_info *pending_tx_info;
 	pending_ring_idx_t index;
 	unsigned long flags;
 
-	pending_tx_info = &vif->pending_tx_info[pending_idx];
-	spin_lock_irqsave(&vif->response_lock, flags);
-	make_tx_response(vif, &pending_tx_info->req, status);
-	index = pending_index(vif->pending_prod);
-	vif->pending_ring[index] = pending_idx;
+	pending_tx_info = &queue->pending_tx_info[pending_idx];
+	spin_lock_irqsave(&queue->response_lock, flags);
+	make_tx_response(queue, &pending_tx_info->req, status);
+	index = pending_index(queue->pending_prod);
+	queue->pending_ring[index] = pending_idx;
 	/* TX shouldn't use the index before we give it back here */
 	mb();
-	vif->pending_prod++;
-	spin_unlock_irqrestore(&vif->response_lock, flags);
+	queue->pending_prod++;
+	spin_unlock_irqrestore(&queue->response_lock, flags);
 }
 
 
-static void make_tx_response(struct xenvif *vif,
+static void make_tx_response(struct xenvif_queue *queue,
 			     struct xen_netif_tx_request *txp,
 			     s8       st)
 {
-	RING_IDX i = vif->tx.rsp_prod_pvt;
+	RING_IDX i = queue->tx.rsp_prod_pvt;
 	struct xen_netif_tx_response *resp;
 	int notify;
 
-	resp = RING_GET_RESPONSE(&vif->tx, i);
+	resp = RING_GET_RESPONSE(&queue->tx, i);
 	resp->id     = txp->id;
 	resp->status = st;
 
 	if (txp->flags & XEN_NETTXF_extra_info)
-		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+		RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
 
-	vif->tx.rsp_prod_pvt = ++i;
-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
+	queue->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
 	if (notify)
-		notify_remote_via_irq(vif->tx_irq);
+		notify_remote_via_irq(queue->tx_irq);
 }
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
 					     u16      size,
 					     u16      flags)
 {
-	RING_IDX i = vif->rx.rsp_prod_pvt;
+	RING_IDX i = queue->rx.rsp_prod_pvt;
 	struct xen_netif_rx_response *resp;
 
-	resp = RING_GET_RESPONSE(&vif->rx, i);
+	resp = RING_GET_RESPONSE(&queue->rx, i);
 	resp->offset     = offset;
 	resp->flags      = flags;
 	resp->id         = id;
@@ -1771,26 +1794,26 @@
 	if (st < 0)
 		resp->status = (s16)st;
 
-	vif->rx.rsp_prod_pvt = ++i;
+	queue->rx.rsp_prod_pvt = ++i;
 
 	return resp;
 }
 
-void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
+void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
 {
 	int ret;
 	struct gnttab_unmap_grant_ref tx_unmap_op;
 
 	gnttab_set_unmap_op(&tx_unmap_op,
-			    idx_to_kaddr(vif, pending_idx),
+			    idx_to_kaddr(queue, pending_idx),
 			    GNTMAP_host_map,
-			    vif->grant_tx_handle[pending_idx]);
-	xenvif_grant_handle_reset(vif, pending_idx);
+			    queue->grant_tx_handle[pending_idx]);
+	xenvif_grant_handle_reset(queue, pending_idx);
 
 	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
-				&vif->mmap_pages[pending_idx], 1);
+				&queue->mmap_pages[pending_idx], 1);
 	if (ret) {
-		netdev_err(vif->dev,
+		netdev_err(queue->vif->dev,
 			   "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
 			   ret,
 			   pending_idx,
@@ -1800,41 +1823,40 @@
 		BUG();
 	}
 
-	xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY);
 }
 
-static inline int rx_work_todo(struct xenvif *vif)
+static inline int rx_work_todo(struct xenvif_queue *queue)
 {
-	return (!skb_queue_empty(&vif->rx_queue) &&
-	       xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
-	       vif->rx_queue_purge;
+	return (!skb_queue_empty(&queue->rx_queue) &&
+	       xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
+	       queue->rx_queue_purge;
 }
 
-static inline int tx_work_todo(struct xenvif *vif)
+static inline int tx_work_todo(struct xenvif_queue *queue)
 {
-
-	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)))
+	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
 		return 1;
 
 	return 0;
 }
 
-static inline bool tx_dealloc_work_todo(struct xenvif *vif)
+static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
 {
-	return vif->dealloc_cons != vif->dealloc_prod;
+	return queue->dealloc_cons != queue->dealloc_prod;
 }
 
-void xenvif_unmap_frontend_rings(struct xenvif *vif)
+void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
 {
-	if (vif->tx.sring)
-		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
-					vif->tx.sring);
-	if (vif->rx.sring)
-		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
-					vif->rx.sring);
+	if (queue->tx.sring)
+		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
+					queue->tx.sring);
+	if (queue->rx.sring)
+		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
+					queue->rx.sring);
 }
 
-int xenvif_map_frontend_rings(struct xenvif *vif,
+int xenvif_map_frontend_rings(struct xenvif_queue *queue,
 			      grant_ref_t tx_ring_ref,
 			      grant_ref_t rx_ring_ref)
 {
@@ -1844,85 +1866,78 @@
 
 	int err = -ENOMEM;
 
-	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
 				     tx_ring_ref, &addr);
 	if (err)
 		goto err;
 
 	txs = (struct xen_netif_tx_sring *)addr;
-	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
+	BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
 
-	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
 				     rx_ring_ref, &addr);
 	if (err)
 		goto err;
 
 	rxs = (struct xen_netif_rx_sring *)addr;
-	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
+	BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
 
 	return 0;
 
 err:
-	xenvif_unmap_frontend_rings(vif);
+	xenvif_unmap_frontend_rings(queue);
 	return err;
 }
 
-void xenvif_stop_queue(struct xenvif *vif)
+static void xenvif_start_queue(struct xenvif_queue *queue)
 {
-	if (!vif->can_queue)
-		return;
-
-	netif_stop_queue(vif->dev);
-}
-
-static void xenvif_start_queue(struct xenvif *vif)
-{
-	if (xenvif_schedulable(vif))
-		netif_wake_queue(vif->dev);
+	if (xenvif_schedulable(queue->vif))
+		xenvif_wake_queue(queue);
 }
 
 int xenvif_kthread_guest_rx(void *data)
 {
-	struct xenvif *vif = data;
+	struct xenvif_queue *queue = data;
 	struct sk_buff *skb;
 
 	while (!kthread_should_stop()) {
-		wait_event_interruptible(vif->wq,
-					 rx_work_todo(vif) ||
-					 vif->disabled ||
+		wait_event_interruptible(queue->wq,
+					 rx_work_todo(queue) ||
+					 queue->vif->disabled ||
 					 kthread_should_stop());
 
 		/* This frontend is found to be rogue, disable it in
 		 * kthread context. Currently this is only set when
 		 * netback finds out frontend sends malformed packet,
 		 * but we cannot disable the interface in softirq
-		 * context so we defer it here.
+		 * context so we defer it here, if this thread is
+		 * associated with queue 0.
 		 */
-		if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))
-			xenvif_carrier_off(vif);
+		if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
+			xenvif_carrier_off(queue->vif);
 
 		if (kthread_should_stop())
 			break;
 
-		if (vif->rx_queue_purge) {
-			skb_queue_purge(&vif->rx_queue);
-			vif->rx_queue_purge = false;
+		if (queue->rx_queue_purge) {
+			skb_queue_purge(&queue->rx_queue);
+			queue->rx_queue_purge = false;
 		}
 
-		if (!skb_queue_empty(&vif->rx_queue))
-			xenvif_rx_action(vif);
+		if (!skb_queue_empty(&queue->rx_queue))
+			xenvif_rx_action(queue);
 
-		if (skb_queue_empty(&vif->rx_queue) &&
-		    netif_queue_stopped(vif->dev)) {
-			del_timer_sync(&vif->wake_queue);
-			xenvif_start_queue(vif);
+		if (skb_queue_empty(&queue->rx_queue) &&
+		    xenvif_queue_stopped(queue)) {
+			del_timer_sync(&queue->wake_queue);
+			xenvif_start_queue(queue);
 		}
 
 		cond_resched();
 	}
 
 	/* Bin any remaining skbs */
-	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
+	while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
 		dev_kfree_skb(skb);
 
 	return 0;
@@ -1930,22 +1945,22 @@
 
 int xenvif_dealloc_kthread(void *data)
 {
-	struct xenvif *vif = data;
+	struct xenvif_queue *queue = data;
 
 	while (!kthread_should_stop()) {
-		wait_event_interruptible(vif->dealloc_wq,
-					 tx_dealloc_work_todo(vif) ||
+		wait_event_interruptible(queue->dealloc_wq,
+					 tx_dealloc_work_todo(queue) ||
 					 kthread_should_stop());
 		if (kthread_should_stop())
 			break;
 
-		xenvif_tx_dealloc_action(vif);
+		xenvif_tx_dealloc_action(queue);
 		cond_resched();
 	}
 
 	/* Unmap anything remaining*/
-	if (tx_dealloc_work_todo(vif))
-		xenvif_tx_dealloc_action(vif);
+	if (tx_dealloc_work_todo(queue))
+		xenvif_tx_dealloc_action(queue);
 
 	return 0;
 }
@@ -1957,6 +1972,9 @@
 	if (!xen_domain())
 		return -ENODEV;
 
+	/* Allow as many queues as there are CPUs, by default */
+	xenvif_max_queues = num_online_cpus();
+
 	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
 		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
 			fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 7a206cf..96c63dc2 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c

@@ -19,6 +19,8 @@
 */
 
 #include "common.h"
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
 
 struct backend_info {
 	struct xenbus_device *dev;
@@ -34,8 +36,9 @@
 	u8 have_hotplug_status_watch:1;
 };
 
-static int connect_rings(struct backend_info *);
-static void connect(struct backend_info *);
+static int connect_rings(struct backend_info *be, struct xenvif_queue *queue);
+static void connect(struct backend_info *be);
+static int read_xenbus_vif_flags(struct backend_info *be);
 static void backend_create_xenvif(struct backend_info *be);
 static void unregister_hotplug_status_watch(struct backend_info *be);
 static void set_backend_state(struct backend_info *be,
@@ -157,6 +160,12 @@
 	if (err)
 		pr_debug("Error writing feature-split-event-channels\n");
 
+	/* Multi-queue support: This is an optional feature. */
+	err = xenbus_printf(XBT_NIL, dev->nodename,
+			    "multi-queue-max-queues", "%u", xenvif_max_queues);
+	if (err)
+		pr_debug("Error writing multi-queue-max-queues\n");
+
 	err = xenbus_switch_state(dev, XenbusStateInitWait);
 	if (err)
 		goto fail;
@@ -485,10 +494,26 @@
 {
 	int err;
 	struct xenbus_device *dev = be->dev;
+	unsigned long credit_bytes, credit_usec;
+	unsigned int queue_index;
+	unsigned int requested_num_queues;
+	struct xenvif_queue *queue;
 
-	err = connect_rings(be);
-	if (err)
+	/* Check whether the frontend requested multiple queues
+	 * and read the number requested.
+	 */
+	err = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "multi-queue-num-queues",
+			   "%u", &requested_num_queues);
+	if (err < 0) {
+		requested_num_queues = 1; /* Fall back to single queue */
+	} else if (requested_num_queues > xenvif_max_queues) {
+		/* buggy or malicious guest */
+		xenbus_dev_fatal(dev, err,
+				 "guest requested %u queues, exceeding the maximum of %u.",
+				 requested_num_queues, xenvif_max_queues);
 		return;
+	}
 
 	err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
 	if (err) {
@@ -496,9 +521,54 @@
 		return;
 	}
 
-	xen_net_read_rate(dev, &be->vif->credit_bytes,
-			  &be->vif->credit_usec);
-	be->vif->remaining_credit = be->vif->credit_bytes;
+	xen_net_read_rate(dev, &credit_bytes, &credit_usec);
+	read_xenbus_vif_flags(be);
+
+	/* Use the number of queues requested by the frontend */
+	be->vif->queues = vzalloc(requested_num_queues *
+				  sizeof(struct xenvif_queue));
+	rtnl_lock();
+	netif_set_real_num_tx_queues(be->vif->dev, requested_num_queues);
+	rtnl_unlock();
+
+	for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
+		queue = &be->vif->queues[queue_index];
+		queue->vif = be->vif;
+		queue->id = queue_index;
+		snprintf(queue->name, sizeof(queue->name), "%s-q%u",
+				be->vif->dev->name, queue->id);
+
+		err = xenvif_init_queue(queue);
+		if (err) {
+			/* xenvif_init_queue() cleans up after itself on
+			 * failure, but we need to clean up any previously
+			 * initialised queues. Set num_queues to i so that
+			 * earlier queues can be destroyed using the regular
+			 * disconnect logic.
+			 */
+			rtnl_lock();
+			netif_set_real_num_tx_queues(be->vif->dev, queue_index);
+			rtnl_unlock();
+			goto err;
+		}
+
+		queue->remaining_credit = credit_bytes;
+
+		err = connect_rings(be, queue);
+		if (err) {
+			/* connect_rings() cleans up after itself on failure,
+			 * but we need to clean up after xenvif_init_queue() here,
+			 * and also clean up any previously initialised queues.
+			 */
+			xenvif_deinit_queue(queue);
+			rtnl_lock();
+			netif_set_real_num_tx_queues(be->vif->dev, queue_index);
+			rtnl_unlock();
+			goto err;
+		}
+	}
+
+	xenvif_carrier_on(be->vif);
 
 	unregister_hotplug_status_watch(be);
 	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
@@ -507,45 +577,109 @@
 	if (!err)
 		be->have_hotplug_status_watch = 1;
 
-	netif_wake_queue(be->vif->dev);
+	netif_tx_wake_all_queues(be->vif->dev);
+
+	return;
+
+err:
+	if (be->vif->dev->real_num_tx_queues > 0)
+		xenvif_disconnect(be->vif); /* Clean up existing queues */
+	vfree(be->vif->queues);
+	be->vif->queues = NULL;
+	rtnl_lock();
+	netif_set_real_num_tx_queues(be->vif->dev, 0);
+	rtnl_unlock();
+	return;
 }
 
 
-static int connect_rings(struct backend_info *be)
+static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
 {
-	struct xenvif *vif = be->vif;
 	struct xenbus_device *dev = be->dev;
+	unsigned int num_queues = queue->vif->dev->real_num_tx_queues;
 	unsigned long tx_ring_ref, rx_ring_ref;
-	unsigned int tx_evtchn, rx_evtchn, rx_copy;
+	unsigned int tx_evtchn, rx_evtchn;
 	int err;
-	int val;
+	char *xspath;
+	size_t xspathsize;
+	const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
 
-	err = xenbus_gather(XBT_NIL, dev->otherend,
+	/* If the frontend requested 1 queue, or we have fallen back
+	 * to single queue due to lack of frontend support for multi-
+	 * queue, expect the remaining XenStore keys in the toplevel
+	 * directory. Otherwise, expect them in a subdirectory called
+	 * queue-N.
+	 */
+	if (num_queues == 1) {
+		xspath = kzalloc(strlen(dev->otherend) + 1, GFP_KERNEL);
+		if (!xspath) {
+			xenbus_dev_fatal(dev, -ENOMEM,
+					 "reading ring references");
+			return -ENOMEM;
+		}
+		strcpy(xspath, dev->otherend);
+	} else {
+		xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
+		xspath = kzalloc(xspathsize, GFP_KERNEL);
+		if (!xspath) {
+			xenbus_dev_fatal(dev, -ENOMEM,
+					 "reading ring references");
+			return -ENOMEM;
+		}
+		snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend,
+			 queue->id);
+	}
+
+	err = xenbus_gather(XBT_NIL, xspath,
 			    "tx-ring-ref", "%lu", &tx_ring_ref,
 			    "rx-ring-ref", "%lu", &rx_ring_ref, NULL);
 	if (err) {
 		xenbus_dev_fatal(dev, err,
 				 "reading %s/ring-ref",
-				 dev->otherend);
-		return err;
+				 xspath);
+		goto err;
 	}
 
 	/* Try split event channels first, then single event channel. */
-	err = xenbus_gather(XBT_NIL, dev->otherend,
+	err = xenbus_gather(XBT_NIL, xspath,
 			    "event-channel-tx", "%u", &tx_evtchn,
 			    "event-channel-rx", "%u", &rx_evtchn, NULL);
 	if (err < 0) {
-		err = xenbus_scanf(XBT_NIL, dev->otherend,
+		err = xenbus_scanf(XBT_NIL, xspath,
 				   "event-channel", "%u", &tx_evtchn);
 		if (err < 0) {
 			xenbus_dev_fatal(dev, err,
 					 "reading %s/event-channel(-tx/rx)",
-					 dev->otherend);
-			return err;
+					 xspath);
+			goto err;
 		}
 		rx_evtchn = tx_evtchn;
 	}
 
+	/* Map the shared frame, irq etc. */
+	err = xenvif_connect(queue, tx_ring_ref, rx_ring_ref,
+			     tx_evtchn, rx_evtchn);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "mapping shared-frames %lu/%lu port tx %u rx %u",
+				 tx_ring_ref, rx_ring_ref,
+				 tx_evtchn, rx_evtchn);
+		goto err;
+	}
+
+	err = 0;
+err: /* Regular return falls through with err == 0 */
+	kfree(xspath);
+	return err;
+}
+
+static int read_xenbus_vif_flags(struct backend_info *be)
+{
+	struct xenvif *vif = be->vif;
+	struct xenbus_device *dev = be->dev;
+	unsigned int rx_copy;
+	int err, val;
+
 	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
 			   &rx_copy);
 	if (err == -ENOENT) {
@@ -621,16 +755,6 @@
 		val = 0;
 	vif->ipv6_csum = !!val;
 
-	/* Map the shared frame, irq etc. */
-	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
-			     tx_evtchn, rx_evtchn);
-	if (err) {
-		xenbus_dev_fatal(dev, err,
-				 "mapping shared-frames %lu/%lu port tx %u rx %u",
-				 tx_ring_ref, rx_ring_ref,
-				 tx_evtchn, rx_evtchn);
-		return err;
-	}
 	return 0;
 }
 

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 158b5e6..5a7872a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c

@@ -57,6 +57,12 @@
 #include <xen/interface/memory.h>
 #include <xen/interface/grant_table.h>
 
+/* Module parameters */
+static unsigned int xennet_max_queues;
+module_param_named(max_queues, xennet_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+		 "Maximum number of queues per virtual interface");
+
 static const struct ethtool_ops xennet_ethtool_ops;
 
 struct netfront_cb {
@@ -73,6 +79,12 @@
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
 #define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
 
+/* Queue name is interface name with "-qNNN" appended */
+#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
+
+/* IRQ name is queue name with "-tx" or "-rx" appended */
+#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
+
 struct netfront_stats {
 	u64			rx_packets;
 	u64			tx_packets;
@@ -81,9 +93,12 @@
 	struct u64_stats_sync	syncp;
 };
 
-struct netfront_info {
-	struct list_head list;
-	struct net_device *netdev;
+struct netfront_info;
+
+struct netfront_queue {
+	unsigned int id; /* Queue ID, 0-based */
+	char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
+	struct netfront_info *info;
 
 	struct napi_struct napi;
 
@@ -93,10 +108,8 @@
 	unsigned int tx_evtchn, rx_evtchn;
 	unsigned int tx_irq, rx_irq;
 	/* Only used when split event channels support is enabled */
-	char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
-	char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
-
-	struct xenbus_device *xbdev;
+	char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
+	char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 
 	spinlock_t   tx_lock;
 	struct xen_netif_tx_front_ring tx;
@@ -140,11 +153,21 @@
 	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+};
+
+struct netfront_info {
+	struct list_head list;
+	struct net_device *netdev;
+
+	struct xenbus_device *xbdev;
+
+	/* Multi-queue support */
+	struct netfront_queue *queues;
 
 	/* Statistics */
 	struct netfront_stats __percpu *stats;
 
-	unsigned long rx_gso_checksum_fixup;
+	atomic_t rx_gso_checksum_fixup;
 };
 
 struct netfront_rx_info {
@@ -187,21 +210,21 @@
 	return idx & (NET_RX_RING_SIZE - 1);
 }
 
-static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
+static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 					 RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
-	struct sk_buff *skb = np->rx_skbs[i];
-	np->rx_skbs[i] = NULL;
+	struct sk_buff *skb = queue->rx_skbs[i];
+	queue->rx_skbs[i] = NULL;
 	return skb;
 }
 
-static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
+static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 					    RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
-	grant_ref_t ref = np->grant_rx_ref[i];
-	np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	grant_ref_t ref = queue->grant_rx_ref[i];
+	queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 	return ref;
 }
 
@@ -221,41 +244,40 @@
 
 static void rx_refill_timeout(unsigned long data)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netfront_info *np = netdev_priv(dev);
-	napi_schedule(&np->napi);
+	struct netfront_queue *queue = (struct netfront_queue *)data;
+	napi_schedule(&queue->napi);
 }
 
-static int netfront_tx_slot_available(struct netfront_info *np)
+static int netfront_tx_slot_available(struct netfront_queue *queue)
 {
-	return (np->tx.req_prod_pvt - np->tx.rsp_cons) <
+	return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 		(TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
 }
 
-static void xennet_maybe_wake_tx(struct net_device *dev)
+static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 {
-	struct netfront_info *np = netdev_priv(dev);
+	struct net_device *dev = queue->info->netdev;
+	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 
-	if (unlikely(netif_queue_stopped(dev)) &&
-	    netfront_tx_slot_available(np) &&
+	if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
+	    netfront_tx_slot_available(queue) &&
 	    likely(netif_running(dev)))
-		netif_wake_queue(dev);
+		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 }
 
-static void xennet_alloc_rx_buffers(struct net_device *dev)
+static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 {
 	unsigned short id;
-	struct netfront_info *np = netdev_priv(dev);
 	struct sk_buff *skb;
 	struct page *page;
 	int i, batch_target, notify;
-	RING_IDX req_prod = np->rx.req_prod_pvt;
+	RING_IDX req_prod = queue->rx.req_prod_pvt;
 	grant_ref_t ref;
 	unsigned long pfn;
 	void *vaddr;
 	struct xen_netif_rx_request *req;
 
-	if (unlikely(!netif_carrier_ok(dev)))
+	if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 		return;
 
 	/*
@@ -264,9 +286,10 @@
 	 * allocator, so should reduce the chance of failed allocation requests
 	 * both for ourself and for other kernel subsystems.
 	 */
-	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
-	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
-		skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD + NET_IP_ALIGN,
+	batch_target = queue->rx_target - (req_prod - queue->rx.rsp_cons);
+	for (i = skb_queue_len(&queue->rx_batch); i < batch_target; i++) {
+		skb = __netdev_alloc_skb(queue->info->netdev,
+					 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 					 GFP_ATOMIC | __GFP_NOWARN);
 		if (unlikely(!skb))
 			goto no_skb;
@@ -279,7 +302,7 @@
 			kfree_skb(skb);
 no_skb:
 			/* Could not allocate any skbuffs. Try again later. */
-			mod_timer(&np->rx_refill_timer,
+			mod_timer(&queue->rx_refill_timer,
 				  jiffies + (HZ/10));
 
 			/* Any skbuffs queued for refill? Force them out. */
@@ -289,44 +312,44 @@
 		}
 
 		skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
-		__skb_queue_tail(&np->rx_batch, skb);
+		__skb_queue_tail(&queue->rx_batch, skb);
 	}
 
 	/* Is the batch large enough to be worthwhile? */
-	if (i < (np->rx_target/2)) {
-		if (req_prod > np->rx.sring->req_prod)
+	if (i < (queue->rx_target/2)) {
+		if (req_prod > queue->rx.sring->req_prod)
 			goto push;
 		return;
 	}
 
 	/* Adjust our fill target if we risked running out of buffers. */
-	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
-	    ((np->rx_target *= 2) > np->rx_max_target))
-		np->rx_target = np->rx_max_target;
+	if (((req_prod - queue->rx.sring->rsp_prod) < (queue->rx_target / 4)) &&
+	    ((queue->rx_target *= 2) > queue->rx_max_target))
+		queue->rx_target = queue->rx_max_target;
 
  refill:
 	for (i = 0; ; i++) {
-		skb = __skb_dequeue(&np->rx_batch);
+		skb = __skb_dequeue(&queue->rx_batch);
 		if (skb == NULL)
 			break;
 
-		skb->dev = dev;
+		skb->dev = queue->info->netdev;
 
 		id = xennet_rxidx(req_prod + i);
 
-		BUG_ON(np->rx_skbs[id]);
-		np->rx_skbs[id] = skb;
+		BUG_ON(queue->rx_skbs[id]);
+		queue->rx_skbs[id] = skb;
 
-		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+		ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 		BUG_ON((signed short)ref < 0);
-		np->grant_rx_ref[id] = ref;
+		queue->grant_rx_ref[id] = ref;
 
 		pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 		vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 
-		req = RING_GET_REQUEST(&np->rx, req_prod + i);
+		req = RING_GET_REQUEST(&queue->rx, req_prod + i);
 		gnttab_grant_foreign_access_ref(ref,
-						np->xbdev->otherend_id,
+						queue->info->xbdev->otherend_id,
 						pfn_to_mfn(pfn),
 						0);
 
@@ -337,72 +360,77 @@
 	wmb();		/* barrier so backend seens requests */
 
 	/* Above is a suitable barrier to ensure backend will see requests. */
-	np->rx.req_prod_pvt = req_prod + i;
+	queue->rx.req_prod_pvt = req_prod + i;
  push:
-	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 	if (notify)
-		notify_remote_via_irq(np->rx_irq);
+		notify_remote_via_irq(queue->rx_irq);
 }
 
 static int xennet_open(struct net_device *dev)
 {
 	struct netfront_info *np = netdev_priv(dev);
+	unsigned int num_queues = dev->real_num_tx_queues;
+	unsigned int i = 0;
+	struct netfront_queue *queue = NULL;
 
-	napi_enable(&np->napi);
+	for (i = 0; i < num_queues; ++i) {
+		queue = &np->queues[i];
+		napi_enable(&queue->napi);
 
-	spin_lock_bh(&np->rx_lock);
-	if (netif_carrier_ok(dev)) {
-		xennet_alloc_rx_buffers(dev);
-		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
-		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			napi_schedule(&np->napi);
+		spin_lock_bh(&queue->rx_lock);
+		if (netif_carrier_ok(dev)) {
+			xennet_alloc_rx_buffers(queue);
+			queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
+			if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
+				napi_schedule(&queue->napi);
+		}
+		spin_unlock_bh(&queue->rx_lock);
 	}
-	spin_unlock_bh(&np->rx_lock);
 
-	netif_start_queue(dev);
+	netif_tx_start_all_queues(dev);
 
 	return 0;
 }
 
-static void xennet_tx_buf_gc(struct net_device *dev)
+static void xennet_tx_buf_gc(struct netfront_queue *queue)
 {
 	RING_IDX cons, prod;
 	unsigned short id;
-	struct netfront_info *np = netdev_priv(dev);
 	struct sk_buff *skb;
 
-	BUG_ON(!netif_carrier_ok(dev));
+	BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
 	do {
-		prod = np->tx.sring->rsp_prod;
+		prod = queue->tx.sring->rsp_prod;
 		rmb(); /* Ensure we see responses up to 'rp'. */
 
-		for (cons = np->tx.rsp_cons; cons != prod; cons++) {
+		for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 			struct xen_netif_tx_response *txrsp;
 
-			txrsp = RING_GET_RESPONSE(&np->tx, cons);
+			txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 			if (txrsp->status == XEN_NETIF_RSP_NULL)
 				continue;
 
 			id  = txrsp->id;
-			skb = np->tx_skbs[id].skb;
+			skb = queue->tx_skbs[id].skb;
 			if (unlikely(gnttab_query_foreign_access(
-				np->grant_tx_ref[id]) != 0)) {
+				queue->grant_tx_ref[id]) != 0)) {
 				pr_alert("%s: warning -- grant still in use by backend domain\n",
 					 __func__);
 				BUG();
 			}
 			gnttab_end_foreign_access_ref(
-				np->grant_tx_ref[id], GNTMAP_readonly);
+				queue->grant_tx_ref[id], GNTMAP_readonly);
 			gnttab_release_grant_reference(
-				&np->gref_tx_head, np->grant_tx_ref[id]);
-			np->grant_tx_ref[id] = GRANT_INVALID_REF;
-			np->grant_tx_page[id] = NULL;
-			add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
+				&queue->gref_tx_head, queue->grant_tx_ref[id]);
+			queue->grant_tx_ref[id] = GRANT_INVALID_REF;
+			queue->grant_tx_page[id] = NULL;
+			add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 			dev_kfree_skb_irq(skb);
 		}
 
-		np->tx.rsp_cons = prod;
+		queue->tx.rsp_cons = prod;
 
 		/*
 		 * Set a new event, then check for race with update of tx_cons.
@@ -412,21 +440,20 @@
 		 * data is outstanding: in such cases notification from Xen is
 		 * likely to be the only kick that we'll get.
 		 */
-		np->tx.sring->rsp_event =
-			prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+		queue->tx.sring->rsp_event =
+			prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
 		mb();		/* update shared area */
-	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+	} while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
 
-	xennet_maybe_wake_tx(dev);
+	xennet_maybe_wake_tx(queue);
 }
 
-static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+static void xennet_make_frags(struct sk_buff *skb, struct netfront_queue *queue,
 			      struct xen_netif_tx_request *tx)
 {
-	struct netfront_info *np = netdev_priv(dev);
 	char *data = skb->data;
 	unsigned long mfn;
-	RING_IDX prod = np->tx.req_prod_pvt;
+	RING_IDX prod = queue->tx.req_prod_pvt;
 	int frags = skb_shinfo(skb)->nr_frags;
 	unsigned int offset = offset_in_page(data);
 	unsigned int len = skb_headlen(skb);
@@ -443,19 +470,19 @@
 		data += tx->size;
 		offset = 0;
 
-		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
-		np->tx_skbs[id].skb = skb_get(skb);
-		tx = RING_GET_REQUEST(&np->tx, prod++);
+		id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
+		queue->tx_skbs[id].skb = skb_get(skb);
+		tx = RING_GET_REQUEST(&queue->tx, prod++);
 		tx->id = id;
-		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+		ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 		BUG_ON((signed short)ref < 0);
 
 		mfn = virt_to_mfn(data);
-		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+		gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 						mfn, GNTMAP_readonly);
 
-		np->grant_tx_page[id] = virt_to_page(data);
-		tx->gref = np->grant_tx_ref[id] = ref;
+		queue->grant_tx_page[id] = virt_to_page(data);
+		tx->gref = queue->grant_tx_ref[id] = ref;
 		tx->offset = offset;
 		tx->size = len;
 		tx->flags = 0;
@@ -487,21 +514,21 @@
 
 			tx->flags |= XEN_NETTXF_more_data;
 
-			id = get_id_from_freelist(&np->tx_skb_freelist,
-						  np->tx_skbs);
-			np->tx_skbs[id].skb = skb_get(skb);
-			tx = RING_GET_REQUEST(&np->tx, prod++);
+			id = get_id_from_freelist(&queue->tx_skb_freelist,
+						  queue->tx_skbs);
+			queue->tx_skbs[id].skb = skb_get(skb);
+			tx = RING_GET_REQUEST(&queue->tx, prod++);
 			tx->id = id;
-			ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+			ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 			BUG_ON((signed short)ref < 0);
 
 			mfn = pfn_to_mfn(page_to_pfn(page));
 			gnttab_grant_foreign_access_ref(ref,
-							np->xbdev->otherend_id,
+							queue->info->xbdev->otherend_id,
 							mfn, GNTMAP_readonly);
 
-			np->grant_tx_page[id] = page;
-			tx->gref = np->grant_tx_ref[id] = ref;
+			queue->grant_tx_page[id] = page;
+			tx->gref = queue->grant_tx_ref[id] = ref;
 			tx->offset = offset;
 			tx->size = bytes;
 			tx->flags = 0;
@@ -518,7 +545,7 @@
 		}
 	}
 
-	np->tx.req_prod_pvt = prod;
+	queue->tx.req_prod_pvt = prod;
 }
 
 /*
@@ -544,6 +571,24 @@
 	return pages;
 }
 
+static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
+			       void *accel_priv, select_queue_fallback_t fallback)
+{
+	unsigned int num_queues = dev->real_num_tx_queues;
+	u32 hash;
+	u16 queue_idx;
+
+	/* First, check if there is only one queue */
+	if (num_queues == 1) {
+		queue_idx = 0;
+	} else {
+		hash = skb_get_hash(skb);
+		queue_idx = hash % num_queues;
+	}
+
+	return queue_idx;
+}
+
 static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned short id;
@@ -559,6 +604,16 @@
 	unsigned int offset = offset_in_page(data);
 	unsigned int len = skb_headlen(skb);
 	unsigned long flags;
+	struct netfront_queue *queue = NULL;
+	unsigned int num_queues = dev->real_num_tx_queues;
+	u16 queue_index;
+
+	/* Drop the packet if no queues are set up */
+	if (num_queues < 1)
+		goto drop;
+	/* Determine which queue to transmit this SKB on */
+	queue_index = skb_get_queue_mapping(skb);
+	queue = &np->queues[queue_index];
 
 	/* If skb->len is too big for wire format, drop skb and alert
 	 * user about misconfiguration.
@@ -578,30 +633,30 @@
 		goto drop;
 	}
 
-	spin_lock_irqsave(&np->tx_lock, flags);
+	spin_lock_irqsave(&queue->tx_lock, flags);
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (slots > 1 && !xennet_can_sg(dev)) ||
 		     netif_needs_gso(skb, netif_skb_features(skb)))) {
-		spin_unlock_irqrestore(&np->tx_lock, flags);
+		spin_unlock_irqrestore(&queue->tx_lock, flags);
 		goto drop;
 	}
 
-	i = np->tx.req_prod_pvt;
+	i = queue->tx.req_prod_pvt;
 
-	id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
-	np->tx_skbs[id].skb = skb;
+	id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
+	queue->tx_skbs[id].skb = skb;
 
-	tx = RING_GET_REQUEST(&np->tx, i);
+	tx = RING_GET_REQUEST(&queue->tx, i);
 
 	tx->id   = id;
-	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+	ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 	BUG_ON((signed short)ref < 0);
 	mfn = virt_to_mfn(data);
 	gnttab_grant_foreign_access_ref(
-		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
-	np->grant_tx_page[id] = virt_to_page(data);
-	tx->gref = np->grant_tx_ref[id] = ref;
+		ref, queue->info->xbdev->otherend_id, mfn, GNTMAP_readonly);
+	queue->grant_tx_page[id] = virt_to_page(data);
+	tx->gref = queue->grant_tx_ref[id] = ref;
 	tx->offset = offset;
 	tx->size = len;
 
@@ -617,7 +672,7 @@
 		struct xen_netif_extra_info *gso;
 
 		gso = (struct xen_netif_extra_info *)
-			RING_GET_REQUEST(&np->tx, ++i);
+			RING_GET_REQUEST(&queue->tx, ++i);
 
 		tx->flags |= XEN_NETTXF_extra_info;
 
@@ -632,14 +687,14 @@
 		gso->flags = 0;
 	}
 
-	np->tx.req_prod_pvt = i + 1;
+	queue->tx.req_prod_pvt = i + 1;
 
-	xennet_make_frags(skb, dev, tx);
+	xennet_make_frags(skb, queue, tx);
 	tx->size = skb->len;
 
-	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 	if (notify)
-		notify_remote_via_irq(np->tx_irq);
+		notify_remote_via_irq(queue->tx_irq);
 
 	u64_stats_update_begin(&stats->syncp);
 	stats->tx_bytes += skb->len;
@@ -647,12 +702,12 @@
 	u64_stats_update_end(&stats->syncp);
 
 	/* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
-	xennet_tx_buf_gc(dev);
+	xennet_tx_buf_gc(queue);
 
-	if (!netfront_tx_slot_available(np))
-		netif_stop_queue(dev);
+	if (!netfront_tx_slot_available(queue))
+		netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 
-	spin_unlock_irqrestore(&np->tx_lock, flags);
+	spin_unlock_irqrestore(&queue->tx_lock, flags);
 
 	return NETDEV_TX_OK;
 
@@ -665,32 +720,38 @@
 static int xennet_close(struct net_device *dev)
 {
 	struct netfront_info *np = netdev_priv(dev);
-	netif_stop_queue(np->netdev);
-	napi_disable(&np->napi);
+	unsigned int num_queues = dev->real_num_tx_queues;
+	unsigned int i;
+	struct netfront_queue *queue;
+	netif_tx_stop_all_queues(np->netdev);
+	for (i = 0; i < num_queues; ++i) {
+		queue = &np->queues[i];
+		napi_disable(&queue->napi);
+	}
 	return 0;
 }
 
-static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
+static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 				grant_ref_t ref)
 {
-	int new = xennet_rxidx(np->rx.req_prod_pvt);
+	int new = xennet_rxidx(queue->rx.req_prod_pvt);
 
-	BUG_ON(np->rx_skbs[new]);
-	np->rx_skbs[new] = skb;
-	np->grant_rx_ref[new] = ref;
-	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
-	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
-	np->rx.req_prod_pvt++;
+	BUG_ON(queue->rx_skbs[new]);
+	queue->rx_skbs[new] = skb;
+	queue->grant_rx_ref[new] = ref;
+	RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
+	RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
+	queue->rx.req_prod_pvt++;
 }
 
-static int xennet_get_extras(struct netfront_info *np,
+static int xennet_get_extras(struct netfront_queue *queue,
 			     struct xen_netif_extra_info *extras,
 			     RING_IDX rp)
 
 {
 	struct xen_netif_extra_info *extra;
-	struct device *dev = &np->netdev->dev;
-	RING_IDX cons = np->rx.rsp_cons;
+	struct device *dev = &queue->info->netdev->dev;
+	RING_IDX cons = queue->rx.rsp_cons;
 	int err = 0;
 
 	do {
@@ -705,7 +766,7 @@
 		}
 
 		extra = (struct xen_netif_extra_info *)
-			RING_GET_RESPONSE(&np->rx, ++cons);
+			RING_GET_RESPONSE(&queue->rx, ++cons);
 
 		if (unlikely(!extra->type ||
 			     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
@@ -718,33 +779,33 @@
 			       sizeof(*extra));
 		}
 
-		skb = xennet_get_rx_skb(np, cons);
-		ref = xennet_get_rx_ref(np, cons);
-		xennet_move_rx_slot(np, skb, ref);
+		skb = xennet_get_rx_skb(queue, cons);
+		ref = xennet_get_rx_ref(queue, cons);
+		xennet_move_rx_slot(queue, skb, ref);
 	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
-	np->rx.rsp_cons = cons;
+	queue->rx.rsp_cons = cons;
 	return err;
 }
 
-static int xennet_get_responses(struct netfront_info *np,
+static int xennet_get_responses(struct netfront_queue *queue,
 				struct netfront_rx_info *rinfo, RING_IDX rp,
 				struct sk_buff_head *list)
 {
 	struct xen_netif_rx_response *rx = &rinfo->rx;
 	struct xen_netif_extra_info *extras = rinfo->extras;
-	struct device *dev = &np->netdev->dev;
-	RING_IDX cons = np->rx.rsp_cons;
-	struct sk_buff *skb = xennet_get_rx_skb(np, cons);
-	grant_ref_t ref = xennet_get_rx_ref(np, cons);
+	struct device *dev = &queue->info->netdev->dev;
+	RING_IDX cons = queue->rx.rsp_cons;
+	struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
+	grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 	int slots = 1;
 	int err = 0;
 	unsigned long ret;
 
 	if (rx->flags & XEN_NETRXF_extra_info) {
-		err = xennet_get_extras(np, extras, rp);
-		cons = np->rx.rsp_cons;
+		err = xennet_get_extras(queue, extras, rp);
+		cons = queue->rx.rsp_cons;
 	}
 
 	for (;;) {
@@ -753,7 +814,7 @@
 			if (net_ratelimit())
 				dev_warn(dev, "rx->offset: %x, size: %u\n",
 					 rx->offset, rx->status);
-			xennet_move_rx_slot(np, skb, ref);
+			xennet_move_rx_slot(queue, skb, ref);
 			err = -EINVAL;
 			goto next;
 		}
@@ -774,7 +835,7 @@
 		ret = gnttab_end_foreign_access_ref(ref, 0);
 		BUG_ON(!ret);
 
-		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+		gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 
 		__skb_queue_tail(list, skb);
 
@@ -789,9 +850,9 @@
 			break;
 		}
 
-		rx = RING_GET_RESPONSE(&np->rx, cons + slots);
-		skb = xennet_get_rx_skb(np, cons + slots);
-		ref = xennet_get_rx_ref(np, cons + slots);
+		rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
+		skb = xennet_get_rx_skb(queue, cons + slots);
+		ref = xennet_get_rx_ref(queue, cons + slots);
 		slots++;
 	}
 
@@ -802,7 +863,7 @@
 	}
 
 	if (unlikely(err))
-		np->rx.rsp_cons = cons + slots;
+		queue->rx.rsp_cons = cons + slots;
 
 	return err;
 }
@@ -836,17 +897,17 @@
 	return 0;
 }
 
-static RING_IDX xennet_fill_frags(struct netfront_info *np,
+static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 				  struct sk_buff *skb,
 				  struct sk_buff_head *list)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	RING_IDX cons = np->rx.rsp_cons;
+	RING_IDX cons = queue->rx.rsp_cons;
 	struct sk_buff *nskb;
 
 	while ((nskb = __skb_dequeue(list))) {
 		struct xen_netif_rx_response *rx =
-			RING_GET_RESPONSE(&np->rx, ++cons);
+			RING_GET_RESPONSE(&queue->rx, ++cons);
 		skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 
 		if (shinfo->nr_frags == MAX_SKB_FRAGS) {
@@ -879,7 +940,7 @@
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 		struct netfront_info *np = netdev_priv(dev);
-		np->rx_gso_checksum_fixup++;
+		atomic_inc(&np->rx_gso_checksum_fixup);
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		recalculate_partial_csum = true;
 	}
@@ -891,11 +952,10 @@
 	return skb_checksum_setup(skb, recalculate_partial_csum);
 }
 
-static int handle_incoming_queue(struct net_device *dev,
+static int handle_incoming_queue(struct netfront_queue *queue,
 				 struct sk_buff_head *rxq)
 {
-	struct netfront_info *np = netdev_priv(dev);
-	struct netfront_stats *stats = this_cpu_ptr(np->stats);
+	struct netfront_stats *stats = this_cpu_ptr(queue->info->stats);
 	int packets_dropped = 0;
 	struct sk_buff *skb;
 
@@ -906,13 +966,13 @@
 			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 
 		/* Ethernet work: Delayed to here as it peeks the header. */
-		skb->protocol = eth_type_trans(skb, dev);
+		skb->protocol = eth_type_trans(skb, queue->info->netdev);
 		skb_reset_network_header(skb);
 
-		if (checksum_setup(dev, skb)) {
+		if (checksum_setup(queue->info->netdev, skb)) {
 			kfree_skb(skb);
 			packets_dropped++;
-			dev->stats.rx_errors++;
+			queue->info->netdev->stats.rx_errors++;
 			continue;
 		}
 
@@ -922,7 +982,7 @@
 		u64_stats_update_end(&stats->syncp);
 
 		/* Pass it up. */
-		napi_gro_receive(&np->napi, skb);
+		napi_gro_receive(&queue->napi, skb);
 	}
 
 	return packets_dropped;
@@ -930,8 +990,8 @@
 
 static int xennet_poll(struct napi_struct *napi, int budget)
 {
-	struct netfront_info *np = container_of(napi, struct netfront_info, napi);
-	struct net_device *dev = np->netdev;
+	struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
+	struct net_device *dev = queue->info->netdev;
 	struct sk_buff *skb;
 	struct netfront_rx_info rinfo;
 	struct xen_netif_rx_response *rx = &rinfo.rx;
@@ -944,29 +1004,29 @@
 	unsigned long flags;
 	int err;
 
-	spin_lock(&np->rx_lock);
+	spin_lock(&queue->rx_lock);
 
 	skb_queue_head_init(&rxq);
 	skb_queue_head_init(&errq);
 	skb_queue_head_init(&tmpq);
 
-	rp = np->rx.sring->rsp_prod;
+	rp = queue->rx.sring->rsp_prod;
 	rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-	i = np->rx.rsp_cons;
+	i = queue->rx.rsp_cons;
 	work_done = 0;
 	while ((i != rp) && (work_done < budget)) {
-		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+		memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
 		memset(extras, 0, sizeof(rinfo.extras));
 
-		err = xennet_get_responses(np, &rinfo, rp, &tmpq);
+		err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
 
 		if (unlikely(err)) {
 err:
 			while ((skb = __skb_dequeue(&tmpq)))
 				__skb_queue_tail(&errq, skb);
 			dev->stats.rx_errors++;
-			i = np->rx.rsp_cons;
+			i = queue->rx.rsp_cons;
 			continue;
 		}
 
@@ -978,7 +1038,7 @@
 
 			if (unlikely(xennet_set_skb_gso(skb, gso))) {
 				__skb_queue_head(&tmpq, skb);
-				np->rx.rsp_cons += skb_queue_len(&tmpq);
+				queue->rx.rsp_cons += skb_queue_len(&tmpq);
 				goto err;
 			}
 		}
@@ -992,7 +1052,7 @@
 		skb->data_len = rx->status;
 		skb->len += rx->status;
 
-		i = xennet_fill_frags(np, skb, &tmpq);
+		i = xennet_fill_frags(queue, skb, &tmpq);
 
 		if (rx->flags & XEN_NETRXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1001,22 +1061,22 @@
 
 		__skb_queue_tail(&rxq, skb);
 
-		np->rx.rsp_cons = ++i;
+		queue->rx.rsp_cons = ++i;
 		work_done++;
 	}
 
 	__skb_queue_purge(&errq);
 
-	work_done -= handle_incoming_queue(dev, &rxq);
+	work_done -= handle_incoming_queue(queue, &rxq);
 
 	/* If we get a callback with very few responses, reduce fill target. */
 	/* NB. Note exponential increase, linear decrease. */
-	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
-	     ((3*np->rx_target) / 4)) &&
-	    (--np->rx_target < np->rx_min_target))
-		np->rx_target = np->rx_min_target;
+	if (((queue->rx.req_prod_pvt - queue->rx.sring->rsp_prod) >
+	     ((3*queue->rx_target) / 4)) &&
+	    (--queue->rx_target < queue->rx_min_target))
+		queue->rx_target = queue->rx_min_target;
 
-	xennet_alloc_rx_buffers(dev);
+	xennet_alloc_rx_buffers(queue);
 
 	if (work_done < budget) {
 		int more_to_do = 0;
@@ -1025,14 +1085,14 @@
 
 		local_irq_save(flags);
 
-		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+		RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
 		if (!more_to_do)
 			__napi_complete(napi);
 
 		local_irq_restore(flags);
 	}
 
-	spin_unlock(&np->rx_lock);
+	spin_unlock(&queue->rx_lock);
 
 	return work_done;
 }
@@ -1080,43 +1140,43 @@
 	return tot;
 }
 
-static void xennet_release_tx_bufs(struct netfront_info *np)
+static void xennet_release_tx_bufs(struct netfront_queue *queue)
 {
 	struct sk_buff *skb;
 	int i;
 
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
 		/* Skip over entries which are actually freelist references */
-		if (skb_entry_is_link(&np->tx_skbs[i]))
+		if (skb_entry_is_link(&queue->tx_skbs[i]))
 			continue;
 
-		skb = np->tx_skbs[i].skb;
-		get_page(np->grant_tx_page[i]);
-		gnttab_end_foreign_access(np->grant_tx_ref[i],
+		skb = queue->tx_skbs[i].skb;
+		get_page(queue->grant_tx_page[i]);
+		gnttab_end_foreign_access(queue->grant_tx_ref[i],
 					  GNTMAP_readonly,
-					  (unsigned long)page_address(np->grant_tx_page[i]));
-		np->grant_tx_page[i] = NULL;
-		np->grant_tx_ref[i] = GRANT_INVALID_REF;
-		add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
+					  (unsigned long)page_address(queue->grant_tx_page[i]));
+		queue->grant_tx_page[i] = NULL;
+		queue->grant_tx_ref[i] = GRANT_INVALID_REF;
+		add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
 		dev_kfree_skb_irq(skb);
 	}
 }
 
-static void xennet_release_rx_bufs(struct netfront_info *np)
+static void xennet_release_rx_bufs(struct netfront_queue *queue)
 {
 	int id, ref;
 
-	spin_lock_bh(&np->rx_lock);
+	spin_lock_bh(&queue->rx_lock);
 
 	for (id = 0; id < NET_RX_RING_SIZE; id++) {
 		struct sk_buff *skb;
 		struct page *page;
 
-		skb = np->rx_skbs[id];
+		skb = queue->rx_skbs[id];
 		if (!skb)
 			continue;
 
-		ref = np->grant_rx_ref[id];
+		ref = queue->grant_rx_ref[id];
 		if (ref == GRANT_INVALID_REF)
 			continue;
 
@@ -1128,21 +1188,28 @@
 		get_page(page);
 		gnttab_end_foreign_access(ref, 0,
 					  (unsigned long)page_address(page));
-		np->grant_rx_ref[id] = GRANT_INVALID_REF;
+		queue->grant_rx_ref[id] = GRANT_INVALID_REF;
 
 		kfree_skb(skb);
 	}
 
-	spin_unlock_bh(&np->rx_lock);
+	spin_unlock_bh(&queue->rx_lock);
 }
 
 static void xennet_uninit(struct net_device *dev)
 {
 	struct netfront_info *np = netdev_priv(dev);
-	xennet_release_tx_bufs(np);
-	xennet_release_rx_bufs(np);
-	gnttab_free_grant_references(np->gref_tx_head);
-	gnttab_free_grant_references(np->gref_rx_head);
+	unsigned int num_queues = dev->real_num_tx_queues;
+	struct netfront_queue *queue;
+	unsigned int i;
+
+	for (i = 0; i < num_queues; ++i) {
+		queue = &np->queues[i];
+		xennet_release_tx_bufs(queue);
+		xennet_release_rx_bufs(queue);
+		gnttab_free_grant_references(queue->gref_tx_head);
+		gnttab_free_grant_references(queue->gref_rx_head);
+	}
 }
 
 static netdev_features_t xennet_fix_features(struct net_device *dev,
@@ -1203,25 +1270,24 @@
 
 static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
 {
-	struct netfront_info *np = dev_id;
-	struct net_device *dev = np->netdev;
+	struct netfront_queue *queue = dev_id;
 	unsigned long flags;
 
-	spin_lock_irqsave(&np->tx_lock, flags);
-	xennet_tx_buf_gc(dev);
-	spin_unlock_irqrestore(&np->tx_lock, flags);
+	spin_lock_irqsave(&queue->tx_lock, flags);
+	xennet_tx_buf_gc(queue);
+	spin_unlock_irqrestore(&queue->tx_lock, flags);
 
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
 {
-	struct netfront_info *np = dev_id;
-	struct net_device *dev = np->netdev;
+	struct netfront_queue *queue = dev_id;
+	struct net_device *dev = queue->info->netdev;
 
 	if (likely(netif_carrier_ok(dev) &&
-		   RING_HAS_UNCONSUMED_RESPONSES(&np->rx)))
-			napi_schedule(&np->napi);
+		   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
+			napi_schedule(&queue->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1236,7 +1302,12 @@
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void xennet_poll_controller(struct net_device *dev)
 {
-	xennet_interrupt(0, dev);
+	/* Poll each queue */
+	struct netfront_info *info = netdev_priv(dev);
+	unsigned int num_queues = dev->real_num_tx_queues;
+	unsigned int i;
+	for (i = 0; i < num_queues; ++i)
+		xennet_interrupt(0, &info->queues[i]);
 }
 #endif
 
@@ -1251,6 +1322,7 @@
 	.ndo_validate_addr   = eth_validate_addr,
 	.ndo_fix_features    = xennet_fix_features,
 	.ndo_set_features    = xennet_set_features,
+	.ndo_select_queue    = xennet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = xennet_poll_controller,
 #endif
@@ -1258,66 +1330,30 @@
 
 static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 {
-	int i, err;
+	int err;
 	struct net_device *netdev;
 	struct netfront_info *np;
 
-	netdev = alloc_etherdev(sizeof(struct netfront_info));
+	netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
 	if (!netdev)
 		return ERR_PTR(-ENOMEM);
 
 	np                   = netdev_priv(netdev);
 	np->xbdev            = dev;
 
-	spin_lock_init(&np->tx_lock);
-	spin_lock_init(&np->rx_lock);
-
-	skb_queue_head_init(&np->rx_batch);
-	np->rx_target     = RX_DFL_MIN_TARGET;
-	np->rx_min_target = RX_DFL_MIN_TARGET;
-	np->rx_max_target = RX_MAX_TARGET;
-
-	init_timer(&np->rx_refill_timer);
-	np->rx_refill_timer.data = (unsigned long)netdev;
-	np->rx_refill_timer.function = rx_refill_timeout;
+	/* No need to use rtnl_lock() before the call below as it
+	 * happens before register_netdev().
+	 */
+	netif_set_real_num_tx_queues(netdev, 0);
+	np->queues = NULL;
 
 	err = -ENOMEM;
 	np->stats = netdev_alloc_pcpu_stats(struct netfront_stats);
 	if (np->stats == NULL)
 		goto exit;
 
-	/* Initialise tx_skbs as a free chain containing every entry. */
-	np->tx_skb_freelist = 0;
-	for (i = 0; i < NET_TX_RING_SIZE; i++) {
-		skb_entry_set_link(&np->tx_skbs[i], i+1);
-		np->grant_tx_ref[i] = GRANT_INVALID_REF;
-		np->grant_tx_page[i] = NULL;
-	}
-
-	/* Clear out rx_skbs */
-	for (i = 0; i < NET_RX_RING_SIZE; i++) {
-		np->rx_skbs[i] = NULL;
-		np->grant_rx_ref[i] = GRANT_INVALID_REF;
-	}
-
-	/* A grant for every tx ring slot */
-	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
-					  &np->gref_tx_head) < 0) {
-		pr_alert("can't alloc tx grant refs\n");
-		err = -ENOMEM;
-		goto exit_free_stats;
-	}
-	/* A grant for every rx ring slot */
-	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
-					  &np->gref_rx_head) < 0) {
-		pr_alert("can't alloc rx grant refs\n");
-		err = -ENOMEM;
-		goto exit_free_tx;
-	}
-
 	netdev->netdev_ops	= &xennet_netdev_ops;
 
-	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				  NETIF_F_GSO_ROBUST;
 	netdev->hw_features	= NETIF_F_SG |
@@ -1332,7 +1368,7 @@
          */
 	netdev->features |= netdev->hw_features;
 
-	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
+	netdev->ethtool_ops = &xennet_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
 	netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
@@ -1343,10 +1379,6 @@
 
 	return netdev;
 
- exit_free_tx:
-	gnttab_free_grant_references(np->gref_tx_head);
- exit_free_stats:
-	free_percpu(np->stats);
  exit:
 	free_netdev(netdev);
 	return ERR_PTR(err);
@@ -1404,30 +1436,36 @@
 
 static void xennet_disconnect_backend(struct netfront_info *info)
 {
-	/* Stop old i/f to prevent errors whilst we rebuild the state. */
-	spin_lock_bh(&info->rx_lock);
-	spin_lock_irq(&info->tx_lock);
-	netif_carrier_off(info->netdev);
-	spin_unlock_irq(&info->tx_lock);
-	spin_unlock_bh(&info->rx_lock);
+	unsigned int i = 0;
+	struct netfront_queue *queue = NULL;
+	unsigned int num_queues = info->netdev->real_num_tx_queues;
 
-	if (info->tx_irq && (info->tx_irq == info->rx_irq))
-		unbind_from_irqhandler(info->tx_irq, info);
-	if (info->tx_irq && (info->tx_irq != info->rx_irq)) {
-		unbind_from_irqhandler(info->tx_irq, info);
-		unbind_from_irqhandler(info->rx_irq, info);
+	for (i = 0; i < num_queues; ++i) {
+		/* Stop old i/f to prevent errors whilst we rebuild the state. */
+		spin_lock_bh(&queue->rx_lock);
+		spin_lock_irq(&queue->tx_lock);
+		netif_carrier_off(queue->info->netdev);
+		spin_unlock_irq(&queue->tx_lock);
+		spin_unlock_bh(&queue->rx_lock);
+
+		if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
+			unbind_from_irqhandler(queue->tx_irq, queue);
+		if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
+			unbind_from_irqhandler(queue->tx_irq, queue);
+			unbind_from_irqhandler(queue->rx_irq, queue);
+		}
+		queue->tx_evtchn = queue->rx_evtchn = 0;
+		queue->tx_irq = queue->rx_irq = 0;
+
+		/* End access and free the pages */
+		xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
+		xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
+
+		queue->tx_ring_ref = GRANT_INVALID_REF;
+		queue->rx_ring_ref = GRANT_INVALID_REF;
+		queue->tx.sring = NULL;
+		queue->rx.sring = NULL;
 	}
-	info->tx_evtchn = info->rx_evtchn = 0;
-	info->tx_irq = info->rx_irq = 0;
-
-	/* End access and free the pages */
-	xennet_end_access(info->tx_ring_ref, info->tx.sring);
-	xennet_end_access(info->rx_ring_ref, info->rx.sring);
-
-	info->tx_ring_ref = GRANT_INVALID_REF;
-	info->rx_ring_ref = GRANT_INVALID_REF;
-	info->tx.sring = NULL;
-	info->rx.sring = NULL;
 }
 
 /**
@@ -1468,100 +1506,86 @@
 	return 0;
 }
 
-static int setup_netfront_single(struct netfront_info *info)
+static int setup_netfront_single(struct netfront_queue *queue)
 {
 	int err;
 
-	err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn);
+	err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
 	if (err < 0)
 		goto fail;
 
-	err = bind_evtchn_to_irqhandler(info->tx_evtchn,
+	err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
 					xennet_interrupt,
-					0, info->netdev->name, info);
+					0, queue->info->netdev->name, queue);
 	if (err < 0)
 		goto bind_fail;
-	info->rx_evtchn = info->tx_evtchn;
-	info->rx_irq = info->tx_irq = err;
+	queue->rx_evtchn = queue->tx_evtchn;
+	queue->rx_irq = queue->tx_irq = err;
 
 	return 0;
 
 bind_fail:
-	xenbus_free_evtchn(info->xbdev, info->tx_evtchn);
-	info->tx_evtchn = 0;
+	xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
+	queue->tx_evtchn = 0;
 fail:
 	return err;
 }
 
-static int setup_netfront_split(struct netfront_info *info)
+static int setup_netfront_split(struct netfront_queue *queue)
 {
 	int err;
 
-	err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn);
+	err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
 	if (err < 0)
 		goto fail;
-	err = xenbus_alloc_evtchn(info->xbdev, &info->rx_evtchn);
+	err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
 	if (err < 0)
 		goto alloc_rx_evtchn_fail;
 
-	snprintf(info->tx_irq_name, sizeof(info->tx_irq_name),
-		 "%s-tx", info->netdev->name);
-	err = bind_evtchn_to_irqhandler(info->tx_evtchn,
+	snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
+		 "%s-tx", queue->name);
+	err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
 					xennet_tx_interrupt,
-					0, info->tx_irq_name, info);
+					0, queue->tx_irq_name, queue);
 	if (err < 0)
 		goto bind_tx_fail;
-	info->tx_irq = err;
+	queue->tx_irq = err;
 
-	snprintf(info->rx_irq_name, sizeof(info->rx_irq_name),
-		 "%s-rx", info->netdev->name);
-	err = bind_evtchn_to_irqhandler(info->rx_evtchn,
+	snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
+		 "%s-rx", queue->name);
+	err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
 					xennet_rx_interrupt,
-					0, info->rx_irq_name, info);
+					0, queue->rx_irq_name, queue);
 	if (err < 0)
 		goto bind_rx_fail;
-	info->rx_irq = err;
+	queue->rx_irq = err;
 
 	return 0;
 
 bind_rx_fail:
-	unbind_from_irqhandler(info->tx_irq, info);
-	info->tx_irq = 0;
+	unbind_from_irqhandler(queue->tx_irq, queue);
+	queue->tx_irq = 0;
 bind_tx_fail:
-	xenbus_free_evtchn(info->xbdev, info->rx_evtchn);
-	info->rx_evtchn = 0;
+	xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
+	queue->rx_evtchn = 0;
 alloc_rx_evtchn_fail:
-	xenbus_free_evtchn(info->xbdev, info->tx_evtchn);
-	info->tx_evtchn = 0;
+	xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
+	queue->tx_evtchn = 0;
 fail:
 	return err;
 }
 
-static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+static int setup_netfront(struct xenbus_device *dev,
+			struct netfront_queue *queue, unsigned int feature_split_evtchn)
 {
 	struct xen_netif_tx_sring *txs;
 	struct xen_netif_rx_sring *rxs;
 	int err;
-	struct net_device *netdev = info->netdev;
-	unsigned int feature_split_evtchn;
 
-	info->tx_ring_ref = GRANT_INVALID_REF;
-	info->rx_ring_ref = GRANT_INVALID_REF;
-	info->rx.sring = NULL;
-	info->tx.sring = NULL;
-	netdev->irq = 0;
-
-	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-			   "feature-split-event-channels", "%u",
-			   &feature_split_evtchn);
-	if (err < 0)
-		feature_split_evtchn = 0;
-
-	err = xen_net_read_mac(dev, netdev->dev_addr);
-	if (err) {
-		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
-		goto fail;
-	}
+	queue->tx_ring_ref = GRANT_INVALID_REF;
+	queue->rx_ring_ref = GRANT_INVALID_REF;
+	queue->rx.sring = NULL;
+	queue->tx.sring = NULL;
 
 	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!txs) {
@@ -1570,13 +1594,13 @@
 		goto fail;
 	}
 	SHARED_RING_INIT(txs);
-	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+	FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
 
 	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
 	if (err < 0)
 		goto grant_tx_ring_fail;
+	queue->tx_ring_ref = err;
 
-	info->tx_ring_ref = err;
 	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!rxs) {
 		err = -ENOMEM;
@@ -1584,21 +1608,21 @@
 		goto alloc_rx_ring_fail;
 	}
 	SHARED_RING_INIT(rxs);
-	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+	FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
 
 	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
 	if (err < 0)
 		goto grant_rx_ring_fail;
-	info->rx_ring_ref = err;
+	queue->rx_ring_ref = err;
 
 	if (feature_split_evtchn)
-		err = setup_netfront_split(info);
+		err = setup_netfront_split(queue);
 	/* setup single event channel if
 	 *  a) feature-split-event-channels == 0
 	 *  b) feature-split-event-channels == 1 but failed to setup
 	 */
 	if (!feature_split_evtchn || (feature_split_evtchn && err))
-		err = setup_netfront_single(info);
+		err = setup_netfront_single(queue);
 
 	if (err)
 		goto alloc_evtchn_fail;
@@ -1609,17 +1633,163 @@
 	 * granted pages because backend is not accessing it at this point.
 	 */
 alloc_evtchn_fail:
-	gnttab_end_foreign_access_ref(info->rx_ring_ref, 0);
+	gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
 grant_rx_ring_fail:
 	free_page((unsigned long)rxs);
 alloc_rx_ring_fail:
-	gnttab_end_foreign_access_ref(info->tx_ring_ref, 0);
+	gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
 grant_tx_ring_fail:
 	free_page((unsigned long)txs);
 fail:
 	return err;
 }
 
+/* Queue-specific initialisation
+ * This used to be done in xennet_create_dev() but must now
+ * be run per-queue.
+ */
+static int xennet_init_queue(struct netfront_queue *queue)
+{
+	unsigned short i;
+	int err = 0;
+
+	spin_lock_init(&queue->tx_lock);
+	spin_lock_init(&queue->rx_lock);
+
+	skb_queue_head_init(&queue->rx_batch);
+	queue->rx_target     = RX_DFL_MIN_TARGET;
+	queue->rx_min_target = RX_DFL_MIN_TARGET;
+	queue->rx_max_target = RX_MAX_TARGET;
+
+	init_timer(&queue->rx_refill_timer);
+	queue->rx_refill_timer.data = (unsigned long)queue;
+	queue->rx_refill_timer.function = rx_refill_timeout;
+
+	snprintf(queue->name, sizeof(queue->name), "%s-q%u",
+		 queue->info->netdev->name, queue->id);
+
+	/* Initialise tx_skbs as a free chain containing every entry. */
+	queue->tx_skb_freelist = 0;
+	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+		skb_entry_set_link(&queue->tx_skbs[i], i+1);
+		queue->grant_tx_ref[i] = GRANT_INVALID_REF;
+		queue->grant_tx_page[i] = NULL;
+	}
+
+	/* Clear out rx_skbs */
+	for (i = 0; i < NET_RX_RING_SIZE; i++) {
+		queue->rx_skbs[i] = NULL;
+		queue->grant_rx_ref[i] = GRANT_INVALID_REF;
+	}
+
+	/* A grant for every tx ring slot */
+	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+					  &queue->gref_tx_head) < 0) {
+		pr_alert("can't alloc tx grant refs\n");
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	/* A grant for every rx ring slot */
+	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+					  &queue->gref_rx_head) < 0) {
+		pr_alert("can't alloc rx grant refs\n");
+		err = -ENOMEM;
+		goto exit_free_tx;
+	}
+
+	netif_napi_add(queue->info->netdev, &queue->napi, xennet_poll, 64);
+
+	return 0;
+
+ exit_free_tx:
+	gnttab_free_grant_references(queue->gref_tx_head);
+ exit:
+	return err;
+}
+
+static int write_queue_xenstore_keys(struct netfront_queue *queue,
+			   struct xenbus_transaction *xbt, int write_hierarchical)
+{
+	/* Write the queue-specific keys into XenStore in the traditional
+	 * way for a single queue, or in a queue subkeys for multiple
+	 * queues.
+	 */
+	struct xenbus_device *dev = queue->info->xbdev;
+	int err;
+	const char *message;
+	char *path;
+	size_t pathsize;
+
+	/* Choose the correct place to write the keys */
+	if (write_hierarchical) {
+		pathsize = strlen(dev->nodename) + 10;
+		path = kzalloc(pathsize, GFP_KERNEL);
+		if (!path) {
+			err = -ENOMEM;
+			message = "out of memory while writing ring references";
+			goto error;
+		}
+		snprintf(path, pathsize, "%s/queue-%u",
+				dev->nodename, queue->id);
+	} else {
+		path = (char *)dev->nodename;
+	}
+
+	/* Write ring references */
+	err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
+			queue->tx_ring_ref);
+	if (err) {
+		message = "writing tx-ring-ref";
+		goto error;
+	}
+
+	err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
+			queue->rx_ring_ref);
+	if (err) {
+		message = "writing rx-ring-ref";
+		goto error;
+	}
+
+	/* Write event channels; taking into account both shared
+	 * and split event channel scenarios.
+	 */
+	if (queue->tx_evtchn == queue->rx_evtchn) {
+		/* Shared event channel */
+		err = xenbus_printf(*xbt, path,
+				"event-channel", "%u", queue->tx_evtchn);
+		if (err) {
+			message = "writing event-channel";
+			goto error;
+		}
+	} else {
+		/* Split event channels */
+		err = xenbus_printf(*xbt, path,
+				"event-channel-tx", "%u", queue->tx_evtchn);
+		if (err) {
+			message = "writing event-channel-tx";
+			goto error;
+		}
+
+		err = xenbus_printf(*xbt, path,
+				"event-channel-rx", "%u", queue->rx_evtchn);
+		if (err) {
+			message = "writing event-channel-rx";
+			goto error;
+		}
+	}
+
+	if (write_hierarchical)
+		kfree(path);
+	return 0;
+
+error:
+	if (write_hierarchical)
+		kfree(path);
+	xenbus_dev_fatal(dev, err, "%s", message);
+	return err;
+}
+
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_netback(struct xenbus_device *dev,
 			   struct netfront_info *info)
@@ -1627,11 +1797,83 @@
 	const char *message;
 	struct xenbus_transaction xbt;
 	int err;
+	unsigned int feature_split_evtchn;
+	unsigned int i = 0;
+	unsigned int max_queues = 0;
+	struct netfront_queue *queue = NULL;
+	unsigned int num_queues = 1;
 
-	/* Create shared ring, alloc event channel. */
-	err = setup_netfront(dev, info);
-	if (err)
+	info->netdev->irq = 0;
+
+	/* Check if backend supports multiple queues */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &max_queues);
+	if (err < 0)
+		max_queues = 1;
+	num_queues = min(max_queues, xennet_max_queues);
+
+	/* Check feature-split-event-channels */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-split-event-channels", "%u",
+			   &feature_split_evtchn);
+	if (err < 0)
+		feature_split_evtchn = 0;
+
+	/* Read mac addr. */
+	err = xen_net_read_mac(dev, info->netdev->dev_addr);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
 		goto out;
+	}
+
+	/* Allocate array of queues */
+	info->queues = kcalloc(num_queues, sizeof(struct netfront_queue), GFP_KERNEL);
+	if (!info->queues) {
+		err = -ENOMEM;
+		goto out;
+	}
+	rtnl_lock();
+	netif_set_real_num_tx_queues(info->netdev, num_queues);
+	rtnl_unlock();
+
+	/* Create shared ring, alloc event channel -- for each queue */
+	for (i = 0; i < num_queues; ++i) {
+		queue = &info->queues[i];
+		queue->id = i;
+		queue->info = info;
+		err = xennet_init_queue(queue);
+		if (err) {
+			/* xennet_init_queue() cleans up after itself on failure,
+			 * but we still have to clean up any previously initialised
+			 * queues. If i > 0, set num_queues to i, then goto
+			 * destroy_ring, which calls xennet_disconnect_backend()
+			 * to tidy up.
+			 */
+			if (i > 0) {
+				rtnl_lock();
+				netif_set_real_num_tx_queues(info->netdev, i);
+				rtnl_unlock();
+				goto destroy_ring;
+			} else {
+				goto out;
+			}
+		}
+		err = setup_netfront(dev, queue, feature_split_evtchn);
+		if (err) {
+			/* As for xennet_init_queue(), setup_netfront() will tidy
+			 * up the current queue on error, but we need to clean up
+			 * those already allocated.
+			 */
+			if (i > 0) {
+				rtnl_lock();
+				netif_set_real_num_tx_queues(info->netdev, i);
+				rtnl_unlock();
+				goto destroy_ring;
+			} else {
+				goto out;
+			}
+		}
+	}
 
 again:
 	err = xenbus_transaction_start(&xbt);
@@ -1640,41 +1882,29 @@
 		goto destroy_ring;
 	}
 
-	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
-			    info->tx_ring_ref);
-	if (err) {
-		message = "writing tx ring-ref";
-		goto abort_transaction;
-	}
-	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
-			    info->rx_ring_ref);
-	if (err) {
-		message = "writing rx ring-ref";
-		goto abort_transaction;
-	}
-
-	if (info->tx_evtchn == info->rx_evtchn) {
-		err = xenbus_printf(xbt, dev->nodename,
-				    "event-channel", "%u", info->tx_evtchn);
-		if (err) {
-			message = "writing event-channel";
-			goto abort_transaction;
-		}
+	if (num_queues == 1) {
+		err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
+		if (err)
+			goto abort_transaction_no_dev_fatal;
 	} else {
-		err = xenbus_printf(xbt, dev->nodename,
-				    "event-channel-tx", "%u", info->tx_evtchn);
+		/* Write the number of queues */
+		err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues",
+				    "%u", num_queues);
 		if (err) {
-			message = "writing event-channel-tx";
-			goto abort_transaction;
+			message = "writing multi-queue-num-queues";
+			goto abort_transaction_no_dev_fatal;
 		}
-		err = xenbus_printf(xbt, dev->nodename,
-				    "event-channel-rx", "%u", info->rx_evtchn);
-		if (err) {
-			message = "writing event-channel-rx";
-			goto abort_transaction;
+
+		/* Write the keys for each queue */
+		for (i = 0; i < num_queues; ++i) {
+			queue = &info->queues[i];
+			err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
+			if (err)
+				goto abort_transaction_no_dev_fatal;
 		}
 	}
 
+	/* The remaining keys are not queue-specific */
 	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
 			    1);
 	if (err) {
@@ -1724,10 +1954,16 @@
 	return 0;
 
  abort_transaction:
-	xenbus_transaction_end(xbt, 1);
 	xenbus_dev_fatal(dev, err, "%s", message);
+abort_transaction_no_dev_fatal:
+	xenbus_transaction_end(xbt, 1);
  destroy_ring:
 	xennet_disconnect_backend(info);
+	kfree(info->queues);
+	info->queues = NULL;
+	rtnl_lock();
+	netif_set_real_num_tx_queues(info->netdev, 0);
+	rtnl_lock();
  out:
 	return err;
 }
@@ -1735,11 +1971,14 @@
 static int xennet_connect(struct net_device *dev)
 {
 	struct netfront_info *np = netdev_priv(dev);
+	unsigned int num_queues = 0;
 	int i, requeue_idx, err;
 	struct sk_buff *skb;
 	grant_ref_t ref;
 	struct xen_netif_rx_request *req;
 	unsigned int feature_rx_copy;
+	unsigned int j = 0;
+	struct netfront_queue *queue = NULL;
 
 	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
 			   "feature-rx-copy", "%u", &feature_rx_copy);
@@ -1756,41 +1995,48 @@
 	if (err)
 		return err;
 
+	/* talk_to_netback() sets the correct number of queues */
+	num_queues = dev->real_num_tx_queues;
+
 	rtnl_lock();
 	netdev_update_features(dev);
 	rtnl_unlock();
 
-	spin_lock_bh(&np->rx_lock);
-	spin_lock_irq(&np->tx_lock);
+	/* By now, the queue structures have been set up */
+	for (j = 0; j < num_queues; ++j) {
+		queue = &np->queues[j];
+		spin_lock_bh(&queue->rx_lock);
+		spin_lock_irq(&queue->tx_lock);
 
-	/* Step 1: Discard all pending TX packet fragments. */
-	xennet_release_tx_bufs(np);
+		/* Step 1: Discard all pending TX packet fragments. */
+		xennet_release_tx_bufs(queue);
 
-	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
-	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
-		skb_frag_t *frag;
-		const struct page *page;
-		if (!np->rx_skbs[i])
-			continue;
+		/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+		for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+			skb_frag_t *frag;
+			const struct page *page;
+			if (!queue->rx_skbs[i])
+				continue;
 
-		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
-		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
-		req = RING_GET_REQUEST(&np->rx, requeue_idx);
+			skb = queue->rx_skbs[requeue_idx] = xennet_get_rx_skb(queue, i);
+			ref = queue->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(queue, i);
+			req = RING_GET_REQUEST(&queue->rx, requeue_idx);
 
-		frag = &skb_shinfo(skb)->frags[0];
-		page = skb_frag_page(frag);
-		gnttab_grant_foreign_access_ref(
-			ref, np->xbdev->otherend_id,
-			pfn_to_mfn(page_to_pfn(page)),
-			0);
-		req->gref = ref;
-		req->id   = requeue_idx;
+			frag = &skb_shinfo(skb)->frags[0];
+			page = skb_frag_page(frag);
+			gnttab_grant_foreign_access_ref(
+				ref, queue->info->xbdev->otherend_id,
+				pfn_to_mfn(page_to_pfn(page)),
+				0);
+			req->gref = ref;
+			req->id   = requeue_idx;
 
-		requeue_idx++;
+			requeue_idx++;
+		}
+
+		queue->rx.req_prod_pvt = requeue_idx;
 	}
 
-	np->rx.req_prod_pvt = requeue_idx;
-
 	/*
 	 * Step 3: All public and private state should now be sane.  Get
 	 * ready to start sending and receiving packets and give the driver
@@ -1798,14 +2044,17 @@
 	 * packets.
 	 */
 	netif_carrier_on(np->netdev);
-	notify_remote_via_irq(np->tx_irq);
-	if (np->tx_irq != np->rx_irq)
-		notify_remote_via_irq(np->rx_irq);
-	xennet_tx_buf_gc(dev);
-	xennet_alloc_rx_buffers(dev);
+	for (j = 0; j < num_queues; ++j) {
+		queue = &np->queues[j];
+		notify_remote_via_irq(queue->tx_irq);
+		if (queue->tx_irq != queue->rx_irq)
+			notify_remote_via_irq(queue->rx_irq);
+		xennet_tx_buf_gc(queue);
+		xennet_alloc_rx_buffers(queue);
 
-	spin_unlock_irq(&np->tx_lock);
-	spin_unlock_bh(&np->rx_lock);
+		spin_unlock_irq(&queue->tx_lock);
+		spin_unlock_bh(&queue->rx_lock);
+	}
 
 	return 0;
 }
@@ -1878,7 +2127,7 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
-		data[i] = *(unsigned long *)(np + xennet_stats[i].offset);
+		data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
 }
 
 static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
@@ -1909,8 +2158,12 @@
 {
 	struct net_device *netdev = to_net_dev(dev);
 	struct netfront_info *info = netdev_priv(netdev);
+	unsigned int num_queues = netdev->real_num_tx_queues;
 
-	return sprintf(buf, "%u\n", info->rx_min_target);
+	if (num_queues)
+		return sprintf(buf, "%u\n", info->queues[0].rx_min_target);
+	else
+		return sprintf(buf, "%u\n", RX_MIN_TARGET);
 }
 
 static ssize_t store_rxbuf_min(struct device *dev,
@@ -1919,8 +2172,11 @@
 {
 	struct net_device *netdev = to_net_dev(dev);
 	struct netfront_info *np = netdev_priv(netdev);
+	unsigned int num_queues = netdev->real_num_tx_queues;
 	char *endp;
 	unsigned long target;
+	unsigned int i;
+	struct netfront_queue *queue;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1934,16 +2190,19 @@
 	if (target > RX_MAX_TARGET)
 		target = RX_MAX_TARGET;
 
-	spin_lock_bh(&np->rx_lock);
-	if (target > np->rx_max_target)
-		np->rx_max_target = target;
-	np->rx_min_target = target;
-	if (target > np->rx_target)
-		np->rx_target = target;
+	for (i = 0; i < num_queues; ++i) {
+		queue = &np->queues[i];
+		spin_lock_bh(&queue->rx_lock);
+		if (target > queue->rx_max_target)
+			queue->rx_max_target = target;
+		queue->rx_min_target = target;
+		if (target > queue->rx_target)
+			queue->rx_target = target;
 
-	xennet_alloc_rx_buffers(netdev);
+		xennet_alloc_rx_buffers(queue);
 
-	spin_unlock_bh(&np->rx_lock);
+		spin_unlock_bh(&queue->rx_lock);
+	}
 	return len;
 }
 
@@ -1952,8 +2211,12 @@
 {
 	struct net_device *netdev = to_net_dev(dev);
 	struct netfront_info *info = netdev_priv(netdev);
+	unsigned int num_queues = netdev->real_num_tx_queues;
 
-	return sprintf(buf, "%u\n", info->rx_max_target);
+	if (num_queues)
+		return sprintf(buf, "%u\n", info->queues[0].rx_max_target);
+	else
+		return sprintf(buf, "%u\n", RX_MAX_TARGET);
 }
 
 static ssize_t store_rxbuf_max(struct device *dev,
@@ -1962,8 +2225,11 @@
 {
 	struct net_device *netdev = to_net_dev(dev);
 	struct netfront_info *np = netdev_priv(netdev);
+	unsigned int num_queues = netdev->real_num_tx_queues;
 	char *endp;
 	unsigned long target;
+	unsigned int i = 0;
+	struct netfront_queue *queue = NULL;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1977,16 +2243,19 @@
 	if (target > RX_MAX_TARGET)
 		target = RX_MAX_TARGET;
 
-	spin_lock_bh(&np->rx_lock);
-	if (target < np->rx_min_target)
-		np->rx_min_target = target;
-	np->rx_max_target = target;
-	if (target < np->rx_target)
-		np->rx_target = target;
+	for (i = 0; i < num_queues; ++i) {
+		queue = &np->queues[i];
+		spin_lock_bh(&queue->rx_lock);
+		if (target < queue->rx_min_target)
+			queue->rx_min_target = target;
+		queue->rx_max_target = target;
+		if (target < queue->rx_target)
+			queue->rx_target = target;
 
-	xennet_alloc_rx_buffers(netdev);
+		xennet_alloc_rx_buffers(queue);
 
-	spin_unlock_bh(&np->rx_lock);
+		spin_unlock_bh(&queue->rx_lock);
+	}
 	return len;
 }
 
@@ -1995,8 +2264,12 @@
 {
 	struct net_device *netdev = to_net_dev(dev);
 	struct netfront_info *info = netdev_priv(netdev);
+	unsigned int num_queues = netdev->real_num_tx_queues;
 
-	return sprintf(buf, "%u\n", info->rx_target);
+	if (num_queues)
+		return sprintf(buf, "%u\n", info->queues[0].rx_target);
+	else
+		return sprintf(buf, "0\n");
 }
 
 static struct device_attribute xennet_attrs[] = {
@@ -2043,6 +2316,9 @@
 static int xennet_remove(struct xenbus_device *dev)
 {
 	struct netfront_info *info = dev_get_drvdata(&dev->dev);
+	unsigned int num_queues = info->netdev->real_num_tx_queues;
+	struct netfront_queue *queue = NULL;
+	unsigned int i = 0;
 
 	dev_dbg(&dev->dev, "%s\n", dev->nodename);
 
@@ -2052,7 +2328,15 @@
 
 	unregister_netdev(info->netdev);
 
-	del_timer_sync(&info->rx_refill_timer);
+	for (i = 0; i < num_queues; ++i) {
+		queue = &info->queues[i];
+		del_timer_sync(&queue->rx_refill_timer);
+	}
+
+	if (num_queues) {
+		kfree(info->queues);
+		info->queues = NULL;
+	}
 
 	free_percpu(info->stats);
 
@@ -2078,6 +2362,9 @@
 
 	pr_info("Initialising Xen virtual ethernet driver\n");
 
+	/* Allow as many queues as there are CPUs, by default */
+	xennet_max_queues = num_online_cpus();
+
 	return xenbus_register_frontend(&netfront_driver);
 }
 module_init(netif_init);

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 65d4ca1..26c66a1 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig

@@ -71,5 +71,6 @@
 source "drivers/nfc/pn544/Kconfig"
 source "drivers/nfc/microread/Kconfig"
 source "drivers/nfc/nfcmrvl/Kconfig"
+source "drivers/nfc/st21nfca/Kconfig"
 
 endmenu

diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index ae42a3f..23225b0 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile

@@ -11,5 +11,6 @@
 obj-$(CONFIG_NFC_PORT100)	+= port100.o
 obj-$(CONFIG_NFC_MRVL)		+= nfcmrvl/
 obj-$(CONFIG_NFC_TRF7970A)	+= trf7970a.o
+obj-$(CONFIG_NFC_ST21NFCA)  += st21nfca/
 
 ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG

diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index f2acd85..440291a 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c

@@ -22,6 +22,8 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/of_irq.h>
 #include <linux/miscdevice.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -857,6 +859,92 @@
 	}
 }
 
+#ifdef CONFIG_OF
+
+static int pn544_hci_i2c_of_request_resources(struct i2c_client *client)
+{
+	struct pn544_i2c_phy *phy = i2c_get_clientdata(client);
+	struct device_node *pp;
+	int ret;
+
+	pp = client->dev.of_node;
+	if (!pp) {
+		ret = -ENODEV;
+		goto err_dt;
+	}
+
+	/* Obtention of EN GPIO from device tree */
+	ret = of_get_named_gpio(pp, "enable-gpios", 0);
+	if (ret < 0) {
+		if (ret != -EPROBE_DEFER)
+			nfc_err(&client->dev,
+				"Failed to get EN gpio, error: %d\n", ret);
+		goto err_dt;
+	}
+	phy->gpio_en = ret;
+
+	/* Configuration of EN GPIO */
+	ret = gpio_request(phy->gpio_en, "pn544_en");
+	if (ret) {
+		nfc_err(&client->dev, "Fail EN pin\n");
+		goto err_dt;
+	}
+	ret = gpio_direction_output(phy->gpio_en, 0);
+	if (ret) {
+		nfc_err(&client->dev, "Fail EN pin direction\n");
+		goto err_gpio_en;
+	}
+
+	/* Obtention of FW GPIO from device tree */
+	ret = of_get_named_gpio(pp, "firmware-gpios", 0);
+	if (ret < 0) {
+		if (ret != -EPROBE_DEFER)
+			nfc_err(&client->dev,
+				"Failed to get FW gpio, error: %d\n", ret);
+		goto err_gpio_en;
+	}
+	phy->gpio_fw = ret;
+
+	/* Configuration of FW GPIO */
+	ret = gpio_request(phy->gpio_fw, "pn544_fw");
+	if (ret) {
+		nfc_err(&client->dev, "Fail FW pin\n");
+		goto err_gpio_en;
+	}
+	ret = gpio_direction_output(phy->gpio_fw, 0);
+	if (ret) {
+		nfc_err(&client->dev, "Fail FW pin direction\n");
+		goto err_gpio_fw;
+	}
+
+	/* IRQ */
+	ret = irq_of_parse_and_map(pp, 0);
+	if (ret < 0) {
+		nfc_err(&client->dev,
+			"Unable to get irq, error: %d\n", ret);
+		goto err_gpio_fw;
+	}
+	client->irq = ret;
+
+	return 0;
+
+err_gpio_fw:
+	gpio_free(phy->gpio_fw);
+err_gpio_en:
+	gpio_free(phy->gpio_en);
+err_dt:
+	return ret;
+}
+
+#else
+
+static int pn544_hci_i2c_of_request_resources(struct i2c_client *client)
+{
+	return -ENODEV;
+}
+
+#endif
+
 static int pn544_hci_i2c_probe(struct i2c_client *client,
 			       const struct i2c_device_id *id)
 {
@@ -887,26 +975,37 @@
 	i2c_set_clientdata(client, phy);
 
 	pdata = client->dev.platform_data;
-	if (pdata == NULL) {
+
+	/* No platform data, using device tree. */
+	if (!pdata && client->dev.of_node) {
+		r = pn544_hci_i2c_of_request_resources(client);
+		if (r) {
+			nfc_err(&client->dev, "No DT data\n");
+			return r;
+		}
+	/* Using platform data. */
+	} else if (pdata) {
+
+		if (pdata->request_resources == NULL) {
+			nfc_err(&client->dev, "request_resources() missing\n");
+			return -EINVAL;
+		}
+
+		r = pdata->request_resources(client);
+		if (r) {
+			nfc_err(&client->dev,
+				"Cannot get platform resources\n");
+			return r;
+		}
+
+		phy->gpio_en = pdata->get_gpio(NFC_GPIO_ENABLE);
+		phy->gpio_fw = pdata->get_gpio(NFC_GPIO_FW_RESET);
+		phy->gpio_irq = pdata->get_gpio(NFC_GPIO_IRQ);
+	} else {
 		nfc_err(&client->dev, "No platform data\n");
 		return -EINVAL;
 	}
 
-	if (pdata->request_resources == NULL) {
-		nfc_err(&client->dev, "request_resources() missing\n");
-		return -EINVAL;
-	}
-
-	r = pdata->request_resources(client);
-	if (r) {
-		nfc_err(&client->dev, "Cannot get platform resources\n");
-		return r;
-	}
-
-	phy->gpio_en = pdata->get_gpio(NFC_GPIO_ENABLE);
-	phy->gpio_fw = pdata->get_gpio(NFC_GPIO_FW_RESET);
-	phy->gpio_irq = pdata->get_gpio(NFC_GPIO_IRQ);
-
 	pn544_hci_i2c_platform_init(phy);
 
 	r = request_threaded_irq(client->irq, NULL, pn544_hci_i2c_irq_thread_fn,
@@ -930,8 +1029,12 @@
 	free_irq(client->irq, phy);
 
 err_rti:
-	if (pdata->free_resources != NULL)
+	if (!pdata) {
+		gpio_free(phy->gpio_en);
+		gpio_free(phy->gpio_fw);
+	} else if (pdata->free_resources) {
 		pdata->free_resources();
+	}
 
 	return r;
 }
@@ -953,15 +1056,30 @@
 		pn544_hci_i2c_disable(phy);
 
 	free_irq(client->irq, phy);
-	if (pdata->free_resources)
+
+	/* No platform data, GPIOs have been requested by this driver */
+	if (!pdata) {
+		gpio_free(phy->gpio_en);
+		gpio_free(phy->gpio_fw);
+	/* Using platform data */
+	} else if (pdata->free_resources) {
 		pdata->free_resources();
+	}
 
 	return 0;
 }
 
+static const struct of_device_id of_pn544_i2c_match[] = {
+	{ .compatible = "nxp,pn544-i2c", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_pn544_i2c_match);
+
 static struct i2c_driver pn544_hci_i2c_driver = {
 	.driver = {
 		   .name = PN544_HCI_I2C_DRIVER_NAME,
+		   .owner  = THIS_MODULE,
+		   .of_match_table = of_match_ptr(of_pn544_i2c_match),
 		  },
 	.probe = pn544_hci_i2c_probe,
 	.id_table = pn544_hci_i2c_id_table,

diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index b7a372a..4ac4d31 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c

@@ -28,7 +28,8 @@
 			   NFC_PROTO_MIFARE_MASK   | \
 			   NFC_PROTO_FELICA_MASK   | \
 			   NFC_PROTO_NFC_DEP_MASK  | \
-			   NFC_PROTO_ISO14443_MASK)
+			   NFC_PROTO_ISO14443_MASK | \
+			   NFC_PROTO_ISO14443_B_MASK)
 
 #define PORT100_CAPABILITIES (NFC_DIGITAL_DRV_CAPS_IN_CRC | \
 			      NFC_DIGITAL_DRV_CAPS_TG_CRC)
@@ -120,6 +121,7 @@
 #define PORT100_COMM_TYPE_IN_212F 0x01
 #define PORT100_COMM_TYPE_IN_424F 0x02
 #define PORT100_COMM_TYPE_IN_106A 0x03
+#define PORT100_COMM_TYPE_IN_106B 0x07
 
 static const struct port100_in_rf_setting in_rf_settings[] = {
 	[NFC_DIGITAL_RF_TECH_212F] = {
@@ -140,6 +142,12 @@
 		.in_recv_set_number = 15,
 		.in_recv_comm_type  = PORT100_COMM_TYPE_IN_106A,
 	},
+	[NFC_DIGITAL_RF_TECH_106B] = {
+		.in_send_set_number = 3,
+		.in_send_comm_type  = PORT100_COMM_TYPE_IN_106B,
+		.in_recv_set_number = 15,
+		.in_recv_comm_type  = PORT100_COMM_TYPE_IN_106B,
+	},
 	/* Ensures the array has NFC_DIGITAL_RF_TECH_LAST elements */
 	[NFC_DIGITAL_RF_TECH_LAST] = { 0 },
 };
@@ -340,6 +348,32 @@
 	[NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED] = {
 		{ PORT100_IN_PROT_END, 0 },
 	},
+	[NFC_DIGITAL_FRAMING_NFCB] = {
+		{ PORT100_IN_PROT_INITIAL_GUARD_TIME,     20 },
+		{ PORT100_IN_PROT_ADD_CRC,                 1 },
+		{ PORT100_IN_PROT_CHECK_CRC,               1 },
+		{ PORT100_IN_PROT_MULTI_CARD,              0 },
+		{ PORT100_IN_PROT_ADD_PARITY,              0 },
+		{ PORT100_IN_PROT_CHECK_PARITY,            0 },
+		{ PORT100_IN_PROT_BITWISE_AC_RECV_MODE,    0 },
+		{ PORT100_IN_PROT_VALID_BIT_NUMBER,        8 },
+		{ PORT100_IN_PROT_CRYPTO1,                 0 },
+		{ PORT100_IN_PROT_ADD_SOF,                 1 },
+		{ PORT100_IN_PROT_CHECK_SOF,               1 },
+		{ PORT100_IN_PROT_ADD_EOF,                 1 },
+		{ PORT100_IN_PROT_CHECK_EOF,               1 },
+		{ PORT100_IN_PROT_DEAF_TIME,               4 },
+		{ PORT100_IN_PROT_CRM,                     0 },
+		{ PORT100_IN_PROT_CRM_MIN_LEN,             0 },
+		{ PORT100_IN_PROT_T1_TAG_FRAME,            0 },
+		{ PORT100_IN_PROT_RFCA,                    0 },
+		{ PORT100_IN_PROT_GUARD_TIME_AT_INITIATOR, 6 },
+		{ PORT100_IN_PROT_END,                     0 },
+	},
+	[NFC_DIGITAL_FRAMING_NFCB_T4T] = {
+		/* nfc_digital_framing_nfcb */
+		{ PORT100_IN_PROT_END,                     0 },
+	},
 	/* Ensures the array has NFC_DIGITAL_FRAMING_LAST elements */
 	[NFC_DIGITAL_FRAMING_LAST] = {
 		{ PORT100_IN_PROT_END, 0 },

diff --git a/drivers/nfc/st21nfca/Kconfig b/drivers/nfc/st21nfca/Kconfig
new file mode 100644
index 0000000..ee459f0
--- /dev/null
+++ b/drivers/nfc/st21nfca/Kconfig

@@ -0,0 +1,23 @@
+config NFC_ST21NFCA
+	tristate "STMicroelectronics ST21NFCA NFC driver"
+	depends on NFC_HCI
+	select CRC_CCITT
+	default n
+	---help---
+	  STMicroelectronics ST21NFCA core driver. It implements the chipset
+	  HCI logic and hooks into the NFC kernel APIs. Physical layers will
+	  register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called st21nfca.
+	  Say N if unsure.
+
+config NFC_ST21NFCA_I2C
+	tristate "NFC ST21NFCA i2c support"
+	depends on NFC_ST21NFCA && I2C && NFC_SHDLC
+	---help---
+	  This module adds support for the STMicroelectronics st21nfca i2c interface.
+	  Select this if your platform is using the i2c bus.
+
+	  If you choose to build a module, it'll be called st21nfca_i2c.
+	  Say N if unsure.

diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile
new file mode 100644
index 0000000..038ed09
--- /dev/null
+++ b/drivers/nfc/st21nfca/Makefile

@@ -0,0 +1,8 @@
+#
+# Makefile for ST21NFCA HCI based NFC driver
+#
+
+st21nfca_i2c-objs  = i2c.o
+
+obj-$(CONFIG_NFC_ST21NFCA)     += st21nfca.o
+obj-$(CONFIG_NFC_ST21NFCA_I2C) += st21nfca_i2c.o

diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
new file mode 100644
index 0000000..3f954ed
--- /dev/null
+++ b/drivers/nfc/st21nfca/i2c.c

@@ -0,0 +1,724 @@
+/*
+ * I2C Link Layer for ST21NFCA HCI based Driver
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nfc.h>
+#include <linux/firmware.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/platform_data/st21nfca.h>
+
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+#include <net/nfc/nfc.h>
+
+#include "st21nfca.h"
+
+/*
+ * Every frame starts with ST21NFCA_SOF_EOF and ends with ST21NFCA_SOF_EOF.
+ * Because ST21NFCA_SOF_EOF is a possible data value, there is a mecanism
+ * called byte stuffing has been introduced.
+ *
+ * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+ * - insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+ * - xor byte with ST21NFCA_BYTE_STUFFING_MASK
+ */
+#define ST21NFCA_SOF_EOF		0x7e
+#define ST21NFCA_BYTE_STUFFING_MASK	0x20
+#define ST21NFCA_ESCAPE_BYTE_STUFFING	0x7d
+
+/* SOF + 00 */
+#define ST21NFCA_FRAME_HEADROOM			2
+
+/* 2 bytes crc + EOF */
+#define ST21NFCA_FRAME_TAILROOM 3
+#define IS_START_OF_FRAME(buf) (buf[0] == ST21NFCA_SOF_EOF && \
+				buf[1] == 0)
+
+#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
+
+static struct i2c_device_id st21nfca_hci_i2c_id_table[] = {
+	{ST21NFCA_HCI_DRIVER_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table);
+
+struct st21nfca_i2c_phy {
+	struct i2c_client *i2c_dev;
+	struct nfc_hci_dev *hdev;
+
+	unsigned int gpio_ena;
+	unsigned int gpio_irq;
+	unsigned int irq_polarity;
+
+	struct sk_buff *pending_skb;
+	int current_read_len;
+	/*
+	 * crc might have fail because i2c macro
+	 * is disable due to other interface activity
+	 */
+	int crc_trials;
+
+	int powered;
+	int run_mode;
+
+	/*
+	 * < 0 if hardware error occured (e.g. i2c err)
+	 * and prevents normal operation.
+	 */
+	int hard_fault;
+	struct mutex phy_lock;
+};
+static u8 len_seq[] = { 13, 24, 15, 29 };
+static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+
+#define I2C_DUMP_SKB(info, skb)					\
+do {								\
+	pr_debug("%s:\n", info);				\
+	print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET,	\
+		       16, 1, (skb)->data, (skb)->len, 0);	\
+} while (0)
+
+/*
+ * In order to get the CLF in a known state we generate an internal reboot
+ * using a proprietary command.
+ * Once the reboot is completed, we expect to receive a ST21NFCA_SOF_EOF
+ * fill buffer.
+ */
+static int st21nfca_hci_platform_init(struct st21nfca_i2c_phy *phy)
+{
+	u16 wait_reboot[] = { 50, 300, 1000 };
+	char reboot_cmd[] = { 0x7E, 0x66, 0x48, 0xF6, 0x7E };
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE];
+	int i, r = -1;
+
+	for (i = 0; i < ARRAY_SIZE(wait_reboot) && r < 0; i++) {
+		r = i2c_master_send(phy->i2c_dev, reboot_cmd,
+				    sizeof(reboot_cmd));
+		if (r < 0)
+			msleep(wait_reboot[i]);
+	}
+	if (r < 0)
+		return r;
+
+	/* CLF is spending about 20ms to do an internal reboot */
+	msleep(20);
+	r = -1;
+	for (i = 0; i < ARRAY_SIZE(wait_reboot) && r < 0; i++) {
+		r = i2c_master_recv(phy->i2c_dev, tmp,
+				    ST21NFCA_HCI_LLC_MAX_SIZE);
+		if (r < 0)
+			msleep(wait_reboot[i]);
+	}
+	if (r < 0)
+		return r;
+
+	for (i = 0; i < ST21NFCA_HCI_LLC_MAX_SIZE &&
+		tmp[i] == ST21NFCA_SOF_EOF; i++)
+		;
+
+	if (r != ST21NFCA_HCI_LLC_MAX_SIZE)
+		return -ENODEV;
+
+	usleep_range(1000, 1500);
+	return 0;
+}
+
+static int st21nfca_hci_i2c_enable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	gpio_set_value(phy->gpio_ena, 1);
+	phy->powered = 1;
+	phy->run_mode = ST21NFCA_HCI_MODE;
+
+	usleep_range(10000, 15000);
+
+	return 0;
+}
+
+static void st21nfca_hci_i2c_disable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	pr_info("\n");
+	gpio_set_value(phy->gpio_ena, 0);
+
+	phy->powered = 0;
+}
+
+static void st21nfca_hci_add_len_crc(struct sk_buff *skb)
+{
+	u16 crc;
+	u8 tmp;
+
+	*skb_push(skb, 1) = 0;
+
+	crc = crc_ccitt(0xffff, skb->data, skb->len);
+	crc = ~crc;
+
+	tmp = crc & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+
+	tmp = (crc >> 8) & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+}
+
+static void st21nfca_hci_remove_len_crc(struct sk_buff *skb)
+{
+	skb_pull(skb, ST21NFCA_FRAME_HEADROOM);
+	skb_trim(skb, skb->len - ST21NFCA_FRAME_TAILROOM);
+}
+
+/*
+ * Writing a frame must not return the number of written bytes.
+ * It must return either zero for success, or <0 for error.
+ * In addition, it must not alter the skb
+ */
+static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
+{
+	int r = -1, i, j;
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client = phy->i2c_dev;
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE * 2];
+
+	I2C_DUMP_SKB("st21nfca_hci_i2c_write", skb);
+
+
+	if (phy->hard_fault != 0)
+		return phy->hard_fault;
+
+	/*
+	 * Compute CRC before byte stuffing computation on frame
+	 * Note st21nfca_hci_add_len_crc is doing a byte stuffing
+	 * on its own value
+	 */
+	st21nfca_hci_add_len_crc(skb);
+
+	/* add ST21NFCA_SOF_EOF on tail */
+	*skb_put(skb, 1) = ST21NFCA_SOF_EOF;
+	/* add ST21NFCA_SOF_EOF on head */
+	*skb_push(skb, 1) = ST21NFCA_SOF_EOF;
+
+	/*
+	 * Compute byte stuffing
+	 * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+	 * insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+	 * xor byte with ST21NFCA_BYTE_STUFFING_MASK
+	 */
+	tmp[0] = skb->data[0];
+	for (i = 1, j = 1; i < skb->len - 1; i++, j++) {
+		if (skb->data[i] == ST21NFCA_SOF_EOF
+		    || skb->data[i] == ST21NFCA_ESCAPE_BYTE_STUFFING) {
+			tmp[j] = ST21NFCA_ESCAPE_BYTE_STUFFING;
+			j++;
+			tmp[j] = skb->data[i] ^ ST21NFCA_BYTE_STUFFING_MASK;
+		} else {
+			tmp[j] = skb->data[i];
+		}
+	}
+	tmp[j] = skb->data[i];
+	j++;
+
+	/*
+	 * Manage sleep mode
+	 * Try 3 times to send data with delay between each
+	 */
+	mutex_lock(&phy->phy_lock);
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) {
+		r = i2c_master_send(client, tmp, j);
+		if (r < 0)
+			msleep(wait_tab[i]);
+	}
+	mutex_unlock(&phy->phy_lock);
+
+	if (r >= 0) {
+		if (r != j)
+			r = -EREMOTEIO;
+		else
+			r = 0;
+	}
+
+	st21nfca_hci_remove_len_crc(skb);
+
+	return r;
+}
+
+static int get_frame_size(u8 *buf, int buflen)
+{
+	int len = 0;
+	if (buf[len + 1] == ST21NFCA_SOF_EOF)
+		return 0;
+
+	for (len = 1; len < buflen && buf[len] != ST21NFCA_SOF_EOF; len++)
+		;
+
+	return len;
+}
+
+static int check_crc(u8 *buf, int buflen)
+{
+	u16 crc;
+
+	crc = crc_ccitt(0xffff, buf, buflen - 2);
+	crc = ~crc;
+
+	if (buf[buflen - 2] != (crc & 0xff) || buf[buflen - 1] != (crc >> 8)) {
+		pr_err(ST21NFCA_HCI_DRIVER_NAME
+		       ": CRC error 0x%x != 0x%x 0x%x\n", crc, buf[buflen - 1],
+		       buf[buflen - 2]);
+
+		pr_info(DRIVER_DESC ": %s : BAD CRC\n", __func__);
+		print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE,
+			       16, 2, buf, buflen, false);
+		return -EPERM;
+	}
+	return 0;
+}
+
+/*
+ * Prepare received data for upper layer.
+ * Received data include byte stuffing, crc and sof/eof
+ * which is not usable by hci part.
+ * returns:
+ * frame size without sof/eof, header and byte stuffing
+ * -EBADMSG : frame was incorrect and discarded
+ */
+static int st21nfca_hci_i2c_repack(struct sk_buff *skb)
+{
+	int i, j, r, size;
+	if (skb->len < 1 || (skb->len > 1 && skb->data[1] != 0))
+		return -EBADMSG;
+
+	size = get_frame_size(skb->data, skb->len);
+	if (size > 0) {
+		skb_trim(skb, size);
+		/* remove ST21NFCA byte stuffing for upper layer */
+		for (i = 1, j = 0; i < skb->len; i++) {
+			if (skb->data[i + j] ==
+					(u8) ST21NFCA_ESCAPE_BYTE_STUFFING) {
+				skb->data[i] = skb->data[i + j + 1]
+						| ST21NFCA_BYTE_STUFFING_MASK;
+				i++;
+				j++;
+			}
+			skb->data[i] = skb->data[i + j];
+		}
+		/* remove byte stuffing useless byte */
+		skb_trim(skb, i - j);
+		/* remove ST21NFCA_SOF_EOF from head */
+		skb_pull(skb, 1);
+
+		r = check_crc(skb->data, skb->len);
+		if (r != 0) {
+			i = 0;
+			return -EBADMSG;
+		}
+
+		/* remove headbyte */
+		skb_pull(skb, 1);
+		/* remove crc. Byte Stuffing is already removed here */
+		skb_trim(skb, skb->len - 2);
+		return skb->len;
+	}
+	return 0;
+}
+
+/*
+ * Reads an shdlc frame and returns it in a newly allocated sk_buff. Guarantees
+ * that i2c bus will be flushed and that next read will start on a new frame.
+ * returned skb contains only LLC header and payload.
+ * returns:
+ * frame size : if received frame is complete (find ST21NFCA_SOF_EOF at
+ * end of read)
+ * -EAGAIN : if received frame is incomplete (not find ST21NFCA_SOF_EOF
+ * at end of read)
+ * -EREMOTEIO : i2c read error (fatal)
+ * -EBADMSG : frame was incorrect and discarded
+ * (value returned from st21nfca_hci_i2c_repack)
+ * -EIO : if no ST21NFCA_SOF_EOF is found after reaching
+ * the read length end sequence
+ */
+static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy,
+				 struct sk_buff *skb)
+{
+	int r, i;
+	u8 len;
+	u8 buf[ST21NFCA_HCI_LLC_MAX_PAYLOAD];
+	struct i2c_client *client = phy->i2c_dev;
+
+	if (phy->current_read_len < ARRAY_SIZE(len_seq)) {
+		len = len_seq[phy->current_read_len];
+
+		/*
+		 * Add retry mecanism
+		 * Operation on I2C interface may fail in case of operation on
+		 * RF or SWP interface
+		 */
+		r = 0;
+		mutex_lock(&phy->phy_lock);
+		for (i = 0; i < ARRAY_SIZE(wait_tab) && r <= 0; i++) {
+			r = i2c_master_recv(client, buf, len);
+			if (r < 0)
+				msleep(wait_tab[i]);
+		}
+		mutex_unlock(&phy->phy_lock);
+
+		if (r != len) {
+			phy->current_read_len = 0;
+			return -EREMOTEIO;
+		}
+
+		/*
+		 * The first read sequence does not start with SOF.
+		 * Data is corrupeted so we drop it.
+		 */
+		if (!phy->current_read_len && buf[0] != ST21NFCA_SOF_EOF) {
+			skb_trim(skb, 0);
+			phy->current_read_len = 0;
+			return -EIO;
+		} else if (phy->current_read_len &&
+			IS_START_OF_FRAME(buf)) {
+			/*
+			 * Previous frame transmission was interrupted and
+			 * the frame got repeated.
+			 * Received frame start with ST21NFCA_SOF_EOF + 00.
+			 */
+			skb_trim(skb, 0);
+			phy->current_read_len = 0;
+		}
+
+		memcpy(skb_put(skb, len), buf, len);
+
+		if (skb->data[skb->len - 1] == ST21NFCA_SOF_EOF) {
+			phy->current_read_len = 0;
+			return st21nfca_hci_i2c_repack(skb);
+		}
+		phy->current_read_len++;
+		return -EAGAIN;
+	}
+	return -EIO;
+}
+
+/*
+ * Reads an shdlc frame from the chip. This is not as straightforward as it
+ * seems. The frame format is data-crc, and corruption can occur anywhere
+ * while transiting on i2c bus, such that we could read an invalid data.
+ * The tricky case is when we read a corrupted data or crc. We must detect
+ * this here in order to determine that data can be transmitted to the hci
+ * core. This is the reason why we check the crc here.
+ * The CLF will repeat a frame until we send a RR on that frame.
+ *
+ * On ST21NFCA, IRQ goes in idle when read starts. As no size information are
+ * available in the incoming data, other IRQ might come. Every IRQ will trigger
+ * a read sequence with different length and will fill the current frame.
+ * The reception is complete once we reach a ST21NFCA_SOF_EOF.
+ */
+static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client;
+
+	int r;
+
+	if (!phy || irq != phy->i2c_dev->irq) {
+		WARN_ON_ONCE(1);
+		return IRQ_NONE;
+	}
+
+	client = phy->i2c_dev;
+	dev_dbg(&client->dev, "IRQ\n");
+
+	if (phy->hard_fault != 0)
+		return IRQ_HANDLED;
+
+	r = st21nfca_hci_i2c_read(phy, phy->pending_skb);
+	if (r == -EREMOTEIO) {
+		phy->hard_fault = r;
+
+		nfc_hci_recv_frame(phy->hdev, NULL);
+
+		return IRQ_HANDLED;
+	} else if (r == -EAGAIN || r == -EIO) {
+		return IRQ_HANDLED;
+	} else if (r == -EBADMSG && phy->crc_trials < ARRAY_SIZE(wait_tab)) {
+		/*
+		 * With ST21NFCA, only one interface (I2C, RF or SWP)
+		 * may be active at a time.
+		 * Having incorrect crc is usually due to i2c macrocell
+		 * deactivation in the middle of a transmission.
+		 * It may generate corrupted data on i2c.
+		 * We give sometime to get i2c back.
+		 * The complete frame will be repeated.
+		 */
+		msleep(wait_tab[phy->crc_trials]);
+		phy->crc_trials++;
+		phy->current_read_len = 0;
+		kfree_skb(phy->pending_skb);
+	} else if (r > 0) {
+		/*
+		 * We succeeded to read data from the CLF and
+		 * data is valid.
+		 * Reset counter.
+		 */
+		nfc_hci_recv_frame(phy->hdev, phy->pending_skb);
+		phy->crc_trials = 0;
+	}
+
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL) {
+		phy->hard_fault = -ENOMEM;
+		nfc_hci_recv_frame(phy->hdev, NULL);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct nfc_phy_ops i2c_phy_ops = {
+	.write = st21nfca_hci_i2c_write,
+	.enable = st21nfca_hci_i2c_enable,
+	.disable = st21nfca_hci_i2c_disable,
+};
+
+#ifdef CONFIG_OF
+static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
+{
+	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
+	struct device_node *pp;
+	int gpio;
+	int r;
+
+	pp = client->dev.of_node;
+	if (!pp)
+		return -ENODEV;
+
+	/* Get GPIO from device tree */
+	gpio = of_get_named_gpio(pp, "enable-gpios", 0);
+	if (gpio < 0) {
+		nfc_err(&client->dev, "Failed to retrieve enable-gpios from device tree\n");
+		return gpio;
+	}
+
+	/* GPIO request and configuration */
+	r = devm_gpio_request(&client->dev, gpio, "clf_enable");
+	if (r) {
+		nfc_err(&client->dev, "Failed to request enable pin\n");
+		return -ENODEV;
+	}
+
+	r = gpio_direction_output(gpio, 1);
+	if (r) {
+		nfc_err(&client->dev, "Failed to set enable pin direction as output\n");
+		return -ENODEV;
+	}
+	phy->gpio_ena = gpio;
+
+	/* IRQ */
+	r = irq_of_parse_and_map(pp, 0);
+	if (r < 0) {
+		nfc_err(&client->dev,
+				"Unable to get irq, error: %d\n", r);
+		return r;
+	}
+
+	phy->irq_polarity = irq_get_trigger_type(r);
+	client->irq = r;
+
+	return 0;
+}
+#else
+static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
+{
+	return -ENODEV;
+}
+#endif
+
+static int st21nfca_hci_i2c_request_resources(struct i2c_client *client)
+{
+	struct st21nfca_nfc_platform_data *pdata;
+	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
+	int r;
+	int irq;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	/* store for later use */
+	phy->gpio_irq = pdata->gpio_irq;
+	phy->gpio_ena = pdata->gpio_ena;
+	phy->irq_polarity = pdata->irq_polarity;
+
+	r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up");
+	if (r) {
+		pr_err("%s : gpio_request failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = gpio_direction_input(phy->gpio_irq);
+	if (r) {
+		pr_err("%s : gpio_direction_input failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	if (phy->gpio_ena > 0) {
+		r = devm_gpio_request(&client->dev,
+					phy->gpio_ena, "clf_enable");
+		if (r) {
+			pr_err("%s : ena gpio_request failed\n", __FILE__);
+			return -ENODEV;
+		}
+		r = gpio_direction_output(phy->gpio_ena, 1);
+
+		if (r) {
+			pr_err("%s : ena gpio_direction_output failed\n",
+			       __FILE__);
+			return -ENODEV;
+		}
+	}
+
+	/* IRQ */
+	irq = gpio_to_irq(phy->gpio_irq);
+	if (irq < 0) {
+		nfc_err(&client->dev,
+				"Unable to get irq number for GPIO %d error %d\n",
+				phy->gpio_irq, r);
+		return -ENODEV;
+	}
+	client->irq = irq;
+
+	return 0;
+}
+
+static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct st21nfca_i2c_phy *phy;
+	struct st21nfca_nfc_platform_data *pdata;
+	int r;
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
+		return -ENODEV;
+	}
+
+	phy = devm_kzalloc(&client->dev, sizeof(struct st21nfca_i2c_phy),
+			   GFP_KERNEL);
+	if (!phy) {
+		nfc_err(&client->dev,
+			"Cannot allocate memory for st21nfca i2c phy.\n");
+		return -ENOMEM;
+	}
+
+	phy->i2c_dev = client;
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL)
+		return -ENOMEM;
+
+	phy->current_read_len = 0;
+	phy->crc_trials = 0;
+	mutex_init(&phy->phy_lock);
+	i2c_set_clientdata(client, phy);
+
+	pdata = client->dev.platform_data;
+	if (!pdata && client->dev.of_node) {
+		r = st21nfca_hci_i2c_of_request_resources(client);
+		if (r) {
+			nfc_err(&client->dev, "No platform data\n");
+			return r;
+		}
+	} else if (pdata) {
+		r = st21nfca_hci_i2c_request_resources(client);
+		if (r) {
+			nfc_err(&client->dev, "Cannot get platform resources\n");
+			return r;
+		}
+	} else {
+		nfc_err(&client->dev, "st21nfca platform resources not available\n");
+		return -ENODEV;
+	}
+
+	r = st21nfca_hci_platform_init(phy);
+	if (r < 0) {
+		nfc_err(&client->dev, "Unable to reboot st21nfca\n");
+		return -ENODEV;
+	}
+
+	r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+				st21nfca_hci_irq_thread_fn,
+				phy->irq_polarity | IRQF_ONESHOT,
+				ST21NFCA_HCI_DRIVER_NAME, phy);
+	if (r < 0) {
+		nfc_err(&client->dev, "Unable to register IRQ handler\n");
+		return r;
+	}
+
+	return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+			       ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM,
+			       ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev);
+}
+
+static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+{
+	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	st21nfca_hci_remove(phy->hdev);
+
+	if (phy->powered)
+		st21nfca_hci_i2c_disable(phy);
+
+	return 0;
+}
+
+static const struct of_device_id of_st21nfca_i2c_match[] = {
+	{ .compatible = "st,st21nfca_i2c", },
+	{}
+};
+
+static struct i2c_driver st21nfca_hci_i2c_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = ST21NFCA_HCI_I2C_DRIVER_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(of_st21nfca_i2c_match),
+	},
+	.probe = st21nfca_hci_i2c_probe,
+	.id_table = st21nfca_hci_i2c_id_table,
+	.remove = st21nfca_hci_i2c_remove,
+};
+
+module_i2c_driver(st21nfca_hci_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);

diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c
new file mode 100644
index 0000000..51e0f00b
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.c

@@ -0,0 +1,698 @@
+/*
+ * HCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/nfc.h>
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+
+#include "st21nfca.h"
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define FULL_VERSION_LEN 3
+
+/* Proprietary gates, events, commands and registers */
+
+/* Commands that apply to all RF readers */
+#define ST21NFCA_RF_READER_CMD_PRESENCE_CHECK	0x30
+
+#define ST21NFCA_RF_READER_ISO15693_GATE	0x12
+#define ST21NFCA_RF_READER_ISO15693_INVENTORY 0x01
+
+/*
+ * Reader gate for communication with contact-less cards using Type A
+ * protocol ISO14443-3 but not compliant with ISO14443-4
+ */
+#define ST21NFCA_RF_READER_14443_3_A_GATE	0x15
+#define ST21NFCA_RF_READER_14443_3_A_UID	0x02
+#define ST21NFCA_RF_READER_14443_3_A_ATQA	0x03
+#define ST21NFCA_RF_READER_14443_3_A_SAK	0x04
+
+#define ST21NFCA_DEVICE_MGNT_GATE		0x01
+#define ST21NFCA_DEVICE_MGNT_PIPE		0x02
+
+#define ST21NFCA_DM_GETINFO         0x13
+#define ST21NFCA_DM_GETINFO_PIPE_LIST       0x02
+#define ST21NFCA_DM_GETINFO_PIPE_INFO       0x01
+#define ST21NFCA_DM_PIPE_CREATED        0x02
+#define ST21NFCA_DM_PIPE_OPEN           0x04
+#define ST21NFCA_DM_RF_ACTIVE           0x80
+
+#define ST21NFCA_DM_IS_PIPE_OPEN(p) \
+	((p & 0x0f) == (ST21NFCA_DM_PIPE_CREATED | ST21NFCA_DM_PIPE_OPEN))
+
+#define ST21NFCA_NFC_MODE	0x03	/* NFC_MODE parameter*/
+
+static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
+
+static struct nfc_hci_gate st21nfca_gates[] = {
+	{NFC_HCI_ADMIN_GATE, NFC_HCI_ADMIN_PIPE},
+	{NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_LINK_MGMT_GATE, NFC_HCI_LINK_MGMT_PIPE},
+	{NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE},
+	{ST21NFCA_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_RF_READER_ISO15693_GATE, NFC_HCI_INVALID_PIPE},
+};
+
+struct st21nfca_pipe_info {
+	u8 pipe_state;
+	u8 src_host_id;
+	u8 src_gate_id;
+	u8 dst_host_id;
+	u8 dst_gate_id;
+} __packed;
+
+/* Largest headroom needed for outgoing custom commands */
+#define ST21NFCA_CMDS_HEADROOM  7
+
+static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev)
+{
+	int i, j, r;
+	struct sk_buff *skb_pipe_list, *skb_pipe_info;
+	struct st21nfca_pipe_info *info;
+
+	u8 pipe_list[] = { ST21NFCA_DM_GETINFO_PIPE_LIST,
+		NFC_HCI_TERMINAL_HOST_ID
+	};
+	u8 pipe_info[] = { ST21NFCA_DM_GETINFO_PIPE_INFO,
+		NFC_HCI_TERMINAL_HOST_ID, 0
+	};
+
+	skb_pipe_list = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE, GFP_KERNEL);
+	if (!skb_pipe_list) {
+		r = -ENOMEM;
+		goto free_list;
+	}
+
+	skb_pipe_info = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE, GFP_KERNEL);
+	if (!skb_pipe_info) {
+		r = -ENOMEM;
+		goto free_info;
+	}
+
+	/* On ST21NFCA device pipes number are dynamics
+	 * A maximum of 16 pipes can be created at the same time
+	 * If pipes are already created, hci_dev_up will fail.
+	 * Doing a clear all pipe is a bad idea because:
+	 * - It does useless EEPROM cycling
+	 * - It might cause issue for secure elements support
+	 * (such as removing connectivity or APDU reader pipe)
+	 * A better approach on ST21NFCA is to:
+	 * - get a pipe list for each host.
+	 * (eg: NFC_HCI_HOST_CONTROLLER_ID for now).
+	 * (TODO Later on UICC HOST and eSE HOST)
+	 * - get pipe information
+	 * - match retrieved pipe list in st21nfca_gates
+	 * ST21NFCA_DEVICE_MGNT_GATE is a proprietary gate
+	 * with ST21NFCA_DEVICE_MGNT_PIPE.
+	 * Pipe can be closed and need to be open.
+	 */
+	r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID,
+		ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE);
+	if (r < 0)
+		goto free_info;
+
+	/* Get pipe list */
+	r = nfc_hci_send_cmd(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+			ST21NFCA_DM_GETINFO, pipe_list, sizeof(pipe_list),
+			&skb_pipe_list);
+	if (r < 0)
+		goto free_info;
+
+	/* Complete the existing gate_pipe table */
+	for (i = 0; i < skb_pipe_list->len; i++) {
+		pipe_info[2] = skb_pipe_list->data[i];
+		r = nfc_hci_send_cmd(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+					ST21NFCA_DM_GETINFO, pipe_info,
+					sizeof(pipe_info), &skb_pipe_info);
+
+		if (r)
+			continue;
+
+		/*
+		 * Match pipe ID and gate ID
+		 * Output format from ST21NFC_DM_GETINFO is:
+		 * - pipe state (1byte)
+		 * - source hid (1byte)
+		 * - source gid (1byte)
+		 * - destination hid (1byte)
+		 * - destination gid (1byte)
+		 */
+		info = (struct st21nfca_pipe_info *) skb_pipe_info->data;
+		for (j = 0; (j < ARRAY_SIZE(st21nfca_gates)) &&
+			(st21nfca_gates[j].gate != info->dst_gate_id);
+			j++)
+			;
+
+		if (j < ARRAY_SIZE(st21nfca_gates) &&
+			st21nfca_gates[j].gate == info->dst_gate_id &&
+			ST21NFCA_DM_IS_PIPE_OPEN(info->pipe_state)) {
+			st21nfca_gates[j].pipe = pipe_info[2];
+			hdev->gate2pipe[st21nfca_gates[j].gate] =
+				st21nfca_gates[j].pipe;
+		}
+	}
+
+	/*
+	 * 3 gates have a well known pipe ID.
+	 * They will never appear in the pipe list
+	 */
+	if (skb_pipe_list->len + 3 < ARRAY_SIZE(st21nfca_gates)) {
+		for (i = skb_pipe_list->len + 3;
+				i < ARRAY_SIZE(st21nfca_gates); i++) {
+			r = nfc_hci_connect_gate(hdev,
+					NFC_HCI_HOST_CONTROLLER_ID,
+					st21nfca_gates[i].gate,
+					st21nfca_gates[i].pipe);
+			if (r < 0)
+				goto free_info;
+		}
+	}
+
+	memcpy(hdev->init_data.gates, st21nfca_gates, sizeof(st21nfca_gates));
+free_info:
+	kfree_skb(skb_pipe_info);
+free_list:
+	kfree_skb(skb_pipe_list);
+	return r;
+}
+
+static int st21nfca_hci_open(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	int r;
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state != ST21NFCA_ST_COLD) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	r = info->phy_ops->enable(info->phy_id);
+
+	if (r == 0)
+		info->state = ST21NFCA_ST_READY;
+
+out:
+	mutex_unlock(&info->info_lock);
+	return r;
+}
+
+static void st21nfca_hci_close(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state == ST21NFCA_ST_COLD)
+		goto out;
+
+	info->phy_ops->disable(info->phy_id);
+	info->state = ST21NFCA_ST_COLD;
+
+out:
+	mutex_unlock(&info->info_lock);
+}
+
+static int st21nfca_hci_ready(struct nfc_hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	u8 param;
+	int r;
+
+	param = NFC_HCI_UICC_HOST_ID;
+	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+			      NFC_HCI_ADMIN_WHITELIST, &param, 1);
+	if (r < 0)
+		return r;
+
+	/* Set NFC_MODE in device management gate to enable */
+	r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+			      ST21NFCA_NFC_MODE, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->data[0] == 0) {
+		kfree_skb(skb);
+		param = 1;
+
+		r = nfc_hci_set_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+					ST21NFCA_NFC_MODE, &param, 1);
+		if (r < 0)
+			return r;
+	}
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+
+	r = nfc_hci_get_param(hdev, NFC_HCI_ID_MGMT_GATE,
+			      NFC_HCI_ID_MGMT_VERSION_SW, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->len != FULL_VERSION_LEN) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	print_hex_dump(KERN_DEBUG, "FULL VERSION SOFTWARE INFO: ",
+		       DUMP_PREFIX_NONE, 16, 1,
+		       skb->data, FULL_VERSION_LEN, false);
+
+	kfree_skb(skb);
+
+	return 0;
+}
+
+static int st21nfca_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->write(info->phy_id, skb);
+}
+
+static int st21nfca_hci_start_poll(struct nfc_hci_dev *hdev,
+				   u32 im_protocols, u32 tm_protocols)
+{
+	int r;
+
+	pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n",
+		__func__, im_protocols, tm_protocols);
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+	if (im_protocols) {
+		/*
+		 * enable polling according to im_protocols & tm_protocols
+		 * - CLOSE pipe according to im_protocols & tm_protocols
+		 */
+		if ((NFC_HCI_RF_READER_B_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_B_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((NFC_HCI_RF_READER_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_F_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_F_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_14443_3_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_14443_3_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_ISO15693_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_ISO15693_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+				       NFC_HCI_EVT_READER_REQUESTED, NULL, 0);
+		if (r < 0)
+			nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+					   NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	}
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_atqa(struct nfc_hci_dev *hdev, u16 *atqa)
+{
+	int r;
+	struct sk_buff *atqa_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_ATQA, &atqa_skb);
+	if (r < 0)
+		goto exit;
+
+	if (atqa_skb->len != 2) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*atqa = be16_to_cpu(*(__be16 *) atqa_skb->data);
+
+exit:
+	kfree_skb(atqa_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_sak(struct nfc_hci_dev *hdev, u8 *sak)
+{
+	int r;
+	struct sk_buff *sak_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_SAK, &sak_skb);
+	if (r < 0)
+		goto exit;
+
+	if (sak_skb->len != 1) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*sak = sak_skb->data[0];
+
+exit:
+	kfree_skb(sak_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_uid(struct nfc_hci_dev *hdev, u8 *gate,
+				       int *len)
+{
+	int r;
+	struct sk_buff *uid_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_UID, &uid_skb);
+	if (r < 0)
+		goto exit;
+
+	if (uid_skb->len == 0 || uid_skb->len > NFC_NFCID1_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	gate = uid_skb->data;
+	*len = uid_skb->len;
+exit:
+	kfree_skb(uid_skb);
+	return r;
+}
+
+static int st21nfca_get_iso15693_inventory(struct nfc_hci_dev *hdev,
+					   struct nfc_target *target)
+{
+	int r;
+	struct sk_buff *inventory_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_ISO15693_GATE,
+			      ST21NFCA_RF_READER_ISO15693_INVENTORY,
+			      &inventory_skb);
+	if (r < 0)
+		goto exit;
+
+	skb_pull(inventory_skb, 2);
+
+	if (inventory_skb->len == 0 ||
+	    inventory_skb->len > NFC_ISO15693_UID_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	memcpy(target->iso15693_uid, inventory_skb->data, inventory_skb->len);
+	target->iso15693_dsfid	= inventory_skb->data[1];
+	target->is_iso15693 = 1;
+exit:
+	kfree_skb(inventory_skb);
+	return r;
+}
+
+static int st21nfca_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
+					 struct nfc_target *target)
+{
+	int r, len;
+	u16 atqa;
+	u8 sak;
+	u8 uid[NFC_NFCID1_MAXSIZE];
+
+	switch (gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		target->supported_protocols = NFC_PROTO_FELICA_MASK;
+		break;
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		/* ISO14443-3 type 1 or 2 tags */
+		r = st21nfca_get_iso14443_3_atqa(hdev, &atqa);
+		if (r < 0)
+			return r;
+		if (atqa == 0x000c) {
+			target->supported_protocols = NFC_PROTO_JEWEL_MASK;
+			target->sens_res = 0x0c00;
+		} else {
+			r = st21nfca_get_iso14443_3_sak(hdev, &sak);
+			if (r < 0)
+				return r;
+
+			r = st21nfca_get_iso14443_3_uid(hdev, uid, &len);
+			if (r < 0)
+				return r;
+
+			target->supported_protocols =
+			    nfc_hci_sak_to_protocol(sak);
+			if (target->supported_protocols == 0xffffffff)
+				return -EPROTO;
+
+			target->sens_res = atqa;
+			target->sel_res = sak;
+			memcpy(target->nfcid1, uid, len);
+			target->nfcid1_len = len;
+		}
+
+		break;
+	case ST21NFCA_RF_READER_ISO15693_GATE:
+		target->supported_protocols = NFC_PROTO_ISO15693_MASK;
+		r = st21nfca_get_iso15693_inventory(hdev, target);
+		if (r < 0)
+			return r;
+		break;
+	default:
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+#define ST21NFCA_CB_TYPE_READER_ISO15693 1
+static void st21nfca_hci_data_exchange_cb(void *context, struct sk_buff *skb,
+					  int err)
+{
+	struct st21nfca_hci_info *info = context;
+
+	switch (info->async_cb_type) {
+	case ST21NFCA_CB_TYPE_READER_ISO15693:
+		if (err == 0)
+			skb_trim(skb, skb->len - 1);
+		info->async_cb(info->async_cb_context, skb, err);
+		break;
+	default:
+		if (err == 0)
+			kfree_skb(skb);
+		break;
+	}
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the data exchange
+ *    1: driver doesn't especially handle, please do standard processing
+ */
+static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev,
+				      struct nfc_target *target,
+				      struct sk_buff *skb,
+				      data_exchange_cb_t cb, void *cb_context)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	pr_info(DRIVER_DESC ": %s for gate=%d len=%d\n", __func__,
+		target->hci_reader_gate, skb->len);
+
+	switch (target->hci_reader_gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		*skb_push(skb, 1) = 0x1a;
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		*skb_push(skb, 1) = 0x1a;	/* CTR, see spec:10.2.2.1 */
+
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	case ST21NFCA_RF_READER_ISO15693_GATE:
+		info->async_cb_type = ST21NFCA_CB_TYPE_READER_ISO15693;
+		info->async_cb = cb;
+		info->async_cb_context = cb_context;
+
+		*skb_push(skb, 1) = 0x17;
+
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len,
+					      st21nfca_hci_data_exchange_cb,
+					      info);
+		break;
+	default:
+		return 1;
+	}
+}
+
+static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev,
+				       struct nfc_target *target)
+{
+	u8 fwi = 0x11;
+	switch (target->hci_reader_gate) {
+	case NFC_HCI_RF_READER_A_GATE:
+	case NFC_HCI_RF_READER_B_GATE:
+		/*
+		 * PRESENCE_CHECK on those gates is available
+		 * However, the answer to this command is taking 3 * fwi
+		 * if the card is no present.
+		 * Instead, we send an empty I-Frame with a very short
+		 * configurable fwi ~604µs.
+		 */
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_WR_XCHG_DATA, &fwi, 1, NULL);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_RF_READER_CMD_PRESENCE_CHECK,
+					NULL, 0, NULL);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct nfc_hci_ops st21nfca_hci_ops = {
+	.open = st21nfca_hci_open,
+	.close = st21nfca_hci_close,
+	.load_session = st21nfca_hci_load_session,
+	.hci_ready = st21nfca_hci_ready,
+	.xmit = st21nfca_hci_xmit,
+	.start_poll = st21nfca_hci_start_poll,
+	.target_from_gate = st21nfca_hci_target_from_gate,
+	.im_transceive = st21nfca_hci_im_transceive,
+	.check_presence = st21nfca_hci_check_presence,
+};
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev)
+{
+	struct st21nfca_hci_info *info;
+	int r = 0;
+	int dev_num;
+	u32 protocols;
+	struct nfc_hci_init_data init_data;
+	unsigned long quirks = 0;
+
+	info = kzalloc(sizeof(struct st21nfca_hci_info), GFP_KERNEL);
+	if (!info) {
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	info->phy_ops = phy_ops;
+	info->phy_id = phy_id;
+	info->state = ST21NFCA_ST_COLD;
+	mutex_init(&info->info_lock);
+
+	init_data.gate_count = ARRAY_SIZE(st21nfca_gates);
+
+	memcpy(init_data.gates, st21nfca_gates, sizeof(st21nfca_gates));
+
+	/*
+	 * Session id must include the driver name + i2c bus addr
+	 * persistent info to discriminate 2 identical chips
+	 */
+	dev_num = find_first_zero_bit(dev_mask, ST21NFCA_NUM_DEVICES);
+	if (dev_num >= ST21NFCA_NUM_DEVICES)
+		goto err_alloc_hdev;
+
+	scnprintf(init_data.session_id, sizeof(init_data.session_id), "%s%2x",
+		  "ST21AH", dev_num);
+
+	protocols = NFC_PROTO_JEWEL_MASK |
+	    NFC_PROTO_MIFARE_MASK |
+	    NFC_PROTO_FELICA_MASK |
+	    NFC_PROTO_ISO14443_MASK |
+	    NFC_PROTO_ISO14443_B_MASK |
+	    NFC_PROTO_ISO15693_MASK;
+
+	set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks);
+
+	info->hdev =
+	    nfc_hci_allocate_device(&st21nfca_hci_ops, &init_data, quirks,
+				    protocols, llc_name,
+				    phy_headroom + ST21NFCA_CMDS_HEADROOM,
+				    phy_tailroom, phy_payload);
+
+	if (!info->hdev) {
+		pr_err("Cannot allocate nfc hdev.\n");
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
+
+	*hdev = info->hdev;
+
+	return 0;
+
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
+	kfree(info);
+
+	return r;
+}
+EXPORT_SYMBOL(st21nfca_hci_probe);
+
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	nfc_hci_unregister_device(hdev);
+	nfc_hci_free_device(hdev);
+	kfree(info);
+}
+EXPORT_SYMBOL(st21nfca_hci_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);

diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
new file mode 100644
index 0000000..334cd90
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.h

@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LOCAL_ST21NFCA_H_
+#define __LOCAL_ST21NFCA_H_
+
+#include <net/nfc/hci.h>
+
+#define HCI_MODE 0
+
+/* framing in HCI mode */
+#define ST21NFCA_SOF_EOF_LEN    2
+
+/* Almost every time value is 0 */
+#define ST21NFCA_HCI_LLC_LEN    1
+
+/* Size in worst case :
+ * In normal case CRC len = 2 but byte stuffing
+ * may appear in case one CRC byte = ST21NFCA_SOF_EOF
+ */
+#define ST21NFCA_HCI_LLC_CRC    4
+
+#define ST21NFCA_HCI_LLC_LEN_CRC        (ST21NFCA_SOF_EOF_LEN + \
+						ST21NFCA_HCI_LLC_LEN + \
+						ST21NFCA_HCI_LLC_CRC)
+#define ST21NFCA_HCI_LLC_MIN_SIZE       (1 + ST21NFCA_HCI_LLC_LEN_CRC)
+
+/* Worst case when adding byte stuffing between each byte */
+#define ST21NFCA_HCI_LLC_MAX_PAYLOAD    29
+#define ST21NFCA_HCI_LLC_MAX_SIZE       (ST21NFCA_HCI_LLC_LEN_CRC + 1 + \
+					ST21NFCA_HCI_LLC_MAX_PAYLOAD)
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define ST21NFCA_HCI_MODE 0
+
+#define ST21NFCA_NUM_DEVICES 256
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev);
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev);
+
+enum st21nfca_state {
+	ST21NFCA_ST_COLD,
+	ST21NFCA_ST_READY,
+};
+
+struct st21nfca_hci_info {
+	struct nfc_phy_ops *phy_ops;
+	void *phy_id;
+
+	struct nfc_hci_dev *hdev;
+
+	enum st21nfca_state state;
+
+	struct mutex info_lock;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
+
+} __packed;
+
+/* Reader RF commands */
+#define ST21NFCA_WR_XCHG_DATA            0x10
+
+#define ST21NFCA_RF_READER_F_GATE               0x14
+#define ST21NFCA_RF_READER_F_DATARATE 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_106 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_212 0x02
+#define ST21NFCA_RF_READER_F_DATARATE_424 0x04
+
+#endif /* __LOCAL_ST21NFCA_H_ */

diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index d9babe9..3b78b03 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c

@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/netdevice.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 #include <linux/nfc.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -67,14 +68,14 @@
  * only the SRX bit set, it means that all of the data has been received
  * (once what's in the fifo has been read).  However, depending on timing
  * an interrupt status with only the SRX bit set may not be recived.  In
- * those cases, the timeout mechanism is used to wait 5 ms in case more
- * data arrives.  After 5 ms, it is assumed that all of the data has been
+ * those cases, the timeout mechanism is used to wait 20 ms in case more
+ * data arrives.  After 20 ms, it is assumed that all of the data has been
  * received and the accumulated rx data is sent upstream.  The
  * 'TRF7970A_ST_WAIT_FOR_RX_DATA_CONT' state is used for this purpose
  * (i.e., it indicates that some data has been received but we're not sure
  * if there is more coming so a timeout in this state means all data has
- * been received and there isn't an error).  The delay is 5 ms since delays
- * over 2 ms have been observed during testing (a little extra just in case).
+ * been received and there isn't an error).  The delay is 20 ms since delays
+ * of ~16 ms have been observed during testing.
  *
  * Type 2 write and sector select commands respond with a 4-bit ACK or NACK.
  * Having only 4 bits in the FIFO won't normally generate an interrupt so
@@ -104,8 +105,11 @@
 
 #define TRF7970A_SUPPORTED_PROTOCOLS \
 		(NFC_PROTO_MIFARE_MASK | NFC_PROTO_ISO14443_MASK |	\
+		 NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_FELICA_MASK | \
 		 NFC_PROTO_ISO15693_MASK)
 
+#define TRF7970A_AUTOSUSPEND_DELAY		30000 /* 30 seconds */
+
 /* TX data must be prefixed with a FIFO reset cmd, a cmd that depends
  * on what the current framing is, the address of the TX length byte 1
  * register (0x1d), and the 2 byte length of the data to be transmitted.
@@ -120,7 +124,7 @@
 /* TX length is 3 nibbles long ==> 4KB - 1 bytes max */
 #define TRF7970A_TX_MAX				(4096 - 1)
 
-#define TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT	5
+#define TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT	20
 #define TRF7970A_WAIT_FOR_FIFO_DRAIN_TIMEOUT	3
 #define TRF7970A_WAIT_TO_ISSUE_ISO15693_EOF	20
 
@@ -330,13 +334,15 @@
 	struct regulator		*regulator;
 	struct nfc_digital_dev		*ddev;
 	u32				quirks;
-	bool				powering_up;
 	bool				aborting;
 	struct sk_buff			*tx_skb;
 	struct sk_buff			*rx_skb;
 	nfc_digital_cmd_complete_t	cb;
 	void				*cb_arg;
+	u8				chip_status_ctrl;
 	u8				iso_ctrl;
+	u8				iso_ctrl_tech;
+	u8				modulator_sys_clk_ctrl;
 	u8				special_fcn_reg1;
 	int				technology;
 	int				framing;
@@ -681,7 +687,9 @@
 			trf->ignore_timeout =
 				!cancel_delayed_work(&trf->timeout_work);
 			trf7970a_drain_fifo(trf, status);
-		} else if (!(status & TRF7970A_IRQ_STATUS_TX)) {
+		} else if (status == TRF7970A_IRQ_STATUS_TX) {
+			trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
+		} else {
 			trf7970a_send_err_upstream(trf, -EIO);
 		}
 		break;
@@ -757,8 +765,8 @@
 	if (ret)
 		goto err_out;
 
-	ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-			TRF7970A_MODULATOR_DEPTH_OOK);
+	/* Must clear NFC Target Detection Level reg due to erratum */
+	ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL, 0);
 	if (ret)
 		goto err_out;
 
@@ -774,12 +782,7 @@
 
 	trf->special_fcn_reg1 = 0;
 
-	ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-			TRF7970A_CHIP_STATUS_RF_ON |
-				TRF7970A_CHIP_STATUS_VRS5_3);
-	if (ret)
-		goto err_out;
-
+	trf->iso_ctrl = 0xff;
 	return 0;
 
 err_out:
@@ -791,53 +794,29 @@
 {
 	dev_dbg(trf->dev, "Switching rf off\n");
 
-	gpio_set_value(trf->en_gpio, 0);
-	gpio_set_value(trf->en2_gpio, 0);
+	trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON;
+
+	trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL, trf->chip_status_ctrl);
 
 	trf->aborting = false;
 	trf->state = TRF7970A_ST_OFF;
+
+	pm_runtime_mark_last_busy(trf->dev);
+	pm_runtime_put_autosuspend(trf->dev);
 }
 
-static int trf7970a_switch_rf_on(struct trf7970a *trf)
+static void trf7970a_switch_rf_on(struct trf7970a *trf)
 {
-	unsigned long delay;
-	int ret;
-
 	dev_dbg(trf->dev, "Switching rf on\n");
 
-	if (trf->powering_up)
-		usleep_range(5000, 6000);
+	pm_runtime_get_sync(trf->dev);
 
-	gpio_set_value(trf->en2_gpio, 1);
-	usleep_range(1000, 2000);
-	gpio_set_value(trf->en_gpio, 1);
-
-	/* The delay between enabling the trf7970a and issuing the first
-	 * command is significantly longer the very first time after powering
-	 * up.  Make sure the longer delay is only done the first time.
-	 */
-	if (trf->powering_up) {
-		delay = 20000;
-		trf->powering_up = false;
-	} else {
-		delay = 5000;
-	}
-
-	usleep_range(delay, delay + 1000);
-
-	ret = trf7970a_init(trf);
-	if (ret)
-		trf7970a_switch_rf_off(trf);
-	else
-		trf->state = TRF7970A_ST_IDLE;
-
-	return ret;
+	trf->state = TRF7970A_ST_IDLE;
 }
 
 static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
-	int ret = 0;
 
 	dev_dbg(trf->dev, "Switching RF - state: %d, on: %d\n", trf->state, on);
 
@@ -846,7 +825,7 @@
 	if (on) {
 		switch (trf->state) {
 		case TRF7970A_ST_OFF:
-			ret = trf7970a_switch_rf_on(trf);
+			trf7970a_switch_rf_on(trf);
 			break;
 		case TRF7970A_ST_IDLE:
 		case TRF7970A_ST_IDLE_RX_BLOCKED:
@@ -871,7 +850,7 @@
 	}
 
 	mutex_unlock(&trf->lock);
-	return ret;
+	return 0;
 }
 
 static int trf7970a_config_rf_tech(struct trf7970a *trf, int tech)
@@ -882,10 +861,24 @@
 
 	switch (tech) {
 	case NFC_DIGITAL_RF_TECH_106A:
-		trf->iso_ctrl = TRF7970A_ISO_CTRL_14443A_106;
+		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443A_106;
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_DEPTH_OOK;
+		break;
+	case NFC_DIGITAL_RF_TECH_106B:
+		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443B_106;
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_DEPTH_ASK10;
+		break;
+	case NFC_DIGITAL_RF_TECH_212F:
+		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_212;
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_DEPTH_ASK10;
+		break;
+	case NFC_DIGITAL_RF_TECH_424F:
+		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_424;
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_DEPTH_ASK10;
 		break;
 	case NFC_DIGITAL_RF_TECH_ISO15693:
-		trf->iso_ctrl = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
+		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_DEPTH_OOK;
 		break;
 	default:
 		dev_dbg(trf->dev, "Unsupported rf technology: %d\n", tech);
@@ -899,24 +892,31 @@
 
 static int trf7970a_config_framing(struct trf7970a *trf, int framing)
 {
+	u8 iso_ctrl = trf->iso_ctrl_tech;
+	int ret;
+
 	dev_dbg(trf->dev, "framing: %d\n", framing);
 
 	switch (framing) {
 	case NFC_DIGITAL_FRAMING_NFCA_SHORT:
 	case NFC_DIGITAL_FRAMING_NFCA_STANDARD:
 		trf->tx_cmd = TRF7970A_CMD_TRANSMIT_NO_CRC;
-		trf->iso_ctrl |= TRF7970A_ISO_CTRL_RX_CRC_N;
+		iso_ctrl |= TRF7970A_ISO_CTRL_RX_CRC_N;
 		break;
 	case NFC_DIGITAL_FRAMING_NFCA_STANDARD_WITH_CRC_A:
 	case NFC_DIGITAL_FRAMING_NFCA_T4T:
+	case NFC_DIGITAL_FRAMING_NFCB:
+	case NFC_DIGITAL_FRAMING_NFCB_T4T:
+	case NFC_DIGITAL_FRAMING_NFCF:
+	case NFC_DIGITAL_FRAMING_NFCF_T3T:
 	case NFC_DIGITAL_FRAMING_ISO15693_INVENTORY:
 	case NFC_DIGITAL_FRAMING_ISO15693_T5T:
 		trf->tx_cmd = TRF7970A_CMD_TRANSMIT;
-		trf->iso_ctrl &= ~TRF7970A_ISO_CTRL_RX_CRC_N;
+		iso_ctrl &= ~TRF7970A_ISO_CTRL_RX_CRC_N;
 		break;
 	case NFC_DIGITAL_FRAMING_NFCA_T2T:
 		trf->tx_cmd = TRF7970A_CMD_TRANSMIT;
-		trf->iso_ctrl |= TRF7970A_ISO_CTRL_RX_CRC_N;
+		iso_ctrl |= TRF7970A_ISO_CTRL_RX_CRC_N;
 		break;
 	default:
 		dev_dbg(trf->dev, "Unsupported Framing: %d\n", framing);
@@ -925,24 +925,46 @@
 
 	trf->framing = framing;
 
-	return trf7970a_write(trf, TRF7970A_ISO_CTRL, trf->iso_ctrl);
+	if (iso_ctrl != trf->iso_ctrl) {
+		ret = trf7970a_write(trf, TRF7970A_ISO_CTRL, iso_ctrl);
+		if (ret)
+			return ret;
+
+		trf->iso_ctrl = iso_ctrl;
+
+		ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
+				trf->modulator_sys_clk_ctrl);
+		if (ret)
+			return ret;
+	}
+
+	if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
+		ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
+				trf->chip_status_ctrl |
+					TRF7970A_CHIP_STATUS_RF_ON);
+		if (ret)
+			return ret;
+
+		trf->chip_status_ctrl |= TRF7970A_CHIP_STATUS_RF_ON;
+
+		usleep_range(5000, 6000);
+	}
+
+	return 0;
 }
 
 static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
 		int param)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
-	int ret = 0;
+	int ret;
 
 	dev_dbg(trf->dev, "Configure hw - type: %d, param: %d\n", type, param);
 
 	mutex_lock(&trf->lock);
 
-	if (trf->state == TRF7970A_ST_OFF) {
-		ret = trf7970a_switch_rf_on(trf);
-		if (ret)
-			goto err_out;
-	}
+	if (trf->state == TRF7970A_ST_OFF)
+		trf7970a_switch_rf_on(trf);
 
 	switch (type) {
 	case NFC_DIGITAL_CONFIG_RF_TECH:
@@ -956,7 +978,6 @@
 		ret = -EINVAL;
 	}
 
-err_out:
 	mutex_unlock(&trf->lock);
 	return ret;
 }
@@ -1191,7 +1212,18 @@
 	dev_dbg(trf->dev, "Abort process initiated\n");
 
 	mutex_lock(&trf->lock);
-	trf->aborting = true;
+
+	switch (trf->state) {
+	case TRF7970A_ST_WAIT_FOR_TX_FIFO:
+	case TRF7970A_ST_WAIT_FOR_RX_DATA:
+	case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT:
+	case TRF7970A_ST_WAIT_TO_ISSUE_EOF:
+		trf->aborting = true;
+		break;
+	default:
+		break;
+	}
+
 	mutex_unlock(&trf->lock);
 }
 
@@ -1206,12 +1238,25 @@
 	.abort_cmd		= trf7970a_abort_cmd,
 };
 
+static int trf7970a_get_autosuspend_delay(struct device_node *np)
+{
+	int autosuspend_delay, ret;
+
+	ret = of_property_read_u32(np, "autosuspend-delay", &autosuspend_delay);
+	if (ret)
+		autosuspend_delay = TRF7970A_AUTOSUSPEND_DELAY;
+
+	of_node_put(np);
+
+	return autosuspend_delay;
+}
+
 static int trf7970a_probe(struct spi_device *spi)
 {
 	struct device_node *np = spi->dev.of_node;
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct trf7970a *trf;
-	int ret;
+	int uvolts, autosuspend_delay, ret;
 
 	if (!np) {
 		dev_err(&spi->dev, "No Device Tree entry\n");
@@ -1281,7 +1326,10 @@
 		goto err_destroy_lock;
 	}
 
-	trf->powering_up = true;
+	uvolts = regulator_get_voltage(trf->regulator);
+
+	if (uvolts > 4000000)
+		trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
 
 	trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops,
 			TRF7970A_SUPPORTED_PROTOCOLS,
@@ -1297,6 +1345,12 @@
 	nfc_digital_set_drvdata(trf->ddev, trf);
 	spi_set_drvdata(spi, trf);
 
+	autosuspend_delay = trf7970a_get_autosuspend_delay(np);
+
+	pm_runtime_set_autosuspend_delay(trf->dev, autosuspend_delay);
+	pm_runtime_use_autosuspend(trf->dev);
+	pm_runtime_enable(trf->dev);
+
 	ret = nfc_digital_register_device(trf->ddev);
 	if (ret) {
 		dev_err(trf->dev, "Can't register NFC digital device: %d\n",
@@ -1307,6 +1361,7 @@
 	return 0;
 
 err_free_ddev:
+	pm_runtime_disable(trf->dev);
 	nfc_digital_free_device(trf->ddev);
 err_disable_regulator:
 	regulator_disable(trf->regulator);
@@ -1321,15 +1376,16 @@
 
 	mutex_lock(&trf->lock);
 
-	trf7970a_switch_rf_off(trf);
-	trf7970a_init(trf);
-
 	switch (trf->state) {
 	case TRF7970A_ST_WAIT_FOR_TX_FIFO:
 	case TRF7970A_ST_WAIT_FOR_RX_DATA:
 	case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT:
 	case TRF7970A_ST_WAIT_TO_ISSUE_EOF:
 		trf7970a_send_err_upstream(trf, -ECANCELED);
+		/* FALLTHROUGH */
+	case TRF7970A_ST_IDLE:
+	case TRF7970A_ST_IDLE_RX_BLOCKED:
+		pm_runtime_put_sync(trf->dev);
 		break;
 	default:
 		break;
@@ -1337,6 +1393,8 @@
 
 	mutex_unlock(&trf->lock);
 
+	pm_runtime_disable(trf->dev);
+
 	nfc_digital_unregister_device(trf->ddev);
 	nfc_digital_free_device(trf->ddev);
 
@@ -1347,6 +1405,70 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_RUNTIME
+static int trf7970a_pm_runtime_suspend(struct device *dev)
+{
+	struct spi_device *spi = container_of(dev, struct spi_device, dev);
+	struct trf7970a *trf = spi_get_drvdata(spi);
+	int ret;
+
+	dev_dbg(dev, "Runtime suspend\n");
+
+	if (trf->state != TRF7970A_ST_OFF) {
+		dev_dbg(dev, "Can't suspend - not in OFF state (%d)\n",
+				trf->state);
+		return -EBUSY;
+	}
+
+	gpio_set_value(trf->en_gpio, 0);
+	gpio_set_value(trf->en2_gpio, 0);
+
+	ret = regulator_disable(trf->regulator);
+	if (ret)
+		dev_err(dev, "%s - Can't disable VIN: %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int trf7970a_pm_runtime_resume(struct device *dev)
+{
+	struct spi_device *spi = container_of(dev, struct spi_device, dev);
+	struct trf7970a *trf = spi_get_drvdata(spi);
+	int ret;
+
+	dev_dbg(dev, "Runtime resume\n");
+
+	ret = regulator_enable(trf->regulator);
+	if (ret) {
+		dev_err(dev, "%s - Can't enable VIN: %d\n", __func__, ret);
+		return ret;
+	}
+
+	usleep_range(5000, 6000);
+
+	gpio_set_value(trf->en2_gpio, 1);
+	usleep_range(1000, 2000);
+	gpio_set_value(trf->en_gpio, 1);
+
+	usleep_range(20000, 21000);
+
+	ret = trf7970a_init(trf);
+	if (ret) {
+		dev_err(dev, "%s - Can't initialize: %d\n", __func__, ret);
+		return ret;
+	}
+
+	pm_runtime_mark_last_busy(dev);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops trf7970a_pm_ops = {
+	SET_RUNTIME_PM_OPS(trf7970a_pm_runtime_suspend,
+			trf7970a_pm_runtime_resume, NULL)
+};
+
 static const struct spi_device_id trf7970a_id_table[] = {
 	{ "trf7970a", TRF7970A_QUIRK_IRQ_STATUS_READ_ERRATA },
 	{ }
@@ -1360,6 +1482,7 @@
 	.driver		= {
 		.name	= "trf7970a",
 		.owner	= THIS_MODULE,
+		.pm	= &trf7970a_pm_ops,
 	},
 };
 

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 9a95831..fb4a598 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c

@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/err.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
@@ -22,27 +23,6 @@
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
 MODULE_LICENSE("GPL");
 
-static void of_set_phy_supported(struct phy_device *phydev, u32 max_speed)
-{
-	/* The default values for phydev->supported are provided by the PHY
-	 * driver "features" member, we want to reset to sane defaults fist
-	 * before supporting higher speeds.
-	 */
-	phydev->supported &= PHY_DEFAULT_FEATURES;
-
-	switch (max_speed) {
-	default:
-		return;
-
-	case SPEED_1000:
-		phydev->supported |= PHY_1000BT_FEATURES;
-	case SPEED_100:
-		phydev->supported |= PHY_100BT_FEATURES;
-	case SPEED_10:
-		phydev->supported |= PHY_10BT_FEATURES;
-	}
-}
-
 /* Extract the clause 22 phy ID from the compatible string of the form
  * ethernet-phy-idAAAA.BBBB */
 static int of_get_phy_id(struct device_node *device, u32 *phy_id)
@@ -66,7 +46,6 @@
 	struct phy_device *phy;
 	bool is_c45;
 	int rc;
-	u32 max_speed = 0;
 	u32 phy_id;
 
 	is_c45 = of_device_is_compatible(child,
@@ -103,17 +82,33 @@
 		return 1;
 	}
 
-	/* Set phydev->supported based on the "max-speed" property
-	 * if present */
-	if (!of_property_read_u32(child, "max-speed", &max_speed))
-		of_set_phy_supported(phy, max_speed);
-
 	dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
 		child->name, addr);
 
 	return 0;
 }
 
+static int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
+{
+	u32 addr;
+	int ret;
+
+	ret = of_property_read_u32(np, "reg", &addr);
+	if (ret < 0) {
+		dev_err(dev, "%s has invalid PHY address\n", np->full_name);
+		return ret;
+	}
+
+	/* A PHY must have a reg property in the range [0-31] */
+	if (addr >= PHY_MAX_ADDR) {
+		dev_err(dev, "%s PHY address %i is too large\n",
+			np->full_name, addr);
+		return -EINVAL;
+	}
+
+	return addr;
+}
+
 /**
  * of_mdiobus_register - Register mii_bus and create PHYs from the device tree
  * @mdio: pointer to mii_bus structure
@@ -126,9 +121,8 @@
 {
 	struct device_node *child;
 	const __be32 *paddr;
-	u32 addr;
 	bool scanphys = false;
-	int rc, i, len;
+	int addr, rc, i;
 
 	/* Mask out all PHYs from auto probing.  Instead the PHYs listed in
 	 * the device tree are populated after the bus has been registered */
@@ -148,19 +142,9 @@
 
 	/* Loop over the child nodes and register a phy_device for each one */
 	for_each_available_child_of_node(np, child) {
-		/* A PHY must have a reg property in the range [0-31] */
-		paddr = of_get_property(child, "reg", &len);
-		if (!paddr || len < sizeof(*paddr)) {
+		addr = of_mdio_parse_addr(&mdio->dev, child);
+		if (addr < 0) {
 			scanphys = true;
-			dev_err(&mdio->dev, "%s has invalid PHY address\n",
-				child->full_name);
-			continue;
-		}
-
-		addr = be32_to_cpup(paddr);
-		if (addr >= PHY_MAX_ADDR) {
-			dev_err(&mdio->dev, "%s PHY address %i is too large\n",
-				child->full_name, addr);
 			continue;
 		}
 
@@ -175,7 +159,7 @@
 	/* auto scan for PHYs with empty reg property */
 	for_each_available_child_of_node(np, child) {
 		/* Skip PHYs with reg property set */
-		paddr = of_get_property(child, "reg", &len);
+		paddr = of_get_property(child, "reg", NULL);
 		if (paddr)
 			continue;
 
@@ -198,6 +182,40 @@
 }
 EXPORT_SYMBOL(of_mdiobus_register);
 
+/**
+ * of_mdiobus_link_phydev - Find a device node for a phy
+ * @mdio: pointer to mii_bus structure
+ * @phydev: phydev for which the of_node pointer should be set
+ *
+ * Walk the list of subnodes of a mdio bus and look for a node that matches the
+ * phy's address with its 'reg' property. If found, set the of_node pointer for
+ * the phy. This allows auto-probed pyh devices to be supplied with information
+ * passed in via DT.
+ */
+void of_mdiobus_link_phydev(struct mii_bus *mdio,
+			    struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct device_node *child;
+
+	if (dev->of_node || !mdio->dev.of_node)
+		return;
+
+	for_each_available_child_of_node(mdio->dev.of_node, child) {
+		int addr;
+
+		addr = of_mdio_parse_addr(&mdio->dev, child);
+		if (addr < 0)
+			continue;
+
+		if (addr == phydev->addr) {
+			dev->of_node = child;
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL(of_mdiobus_link_phydev);
+
 /* Helper function for of_phy_find_device */
 static int of_phy_match(struct device *dev, void *phy_np)
 {
@@ -245,44 +263,6 @@
 EXPORT_SYMBOL(of_phy_connect);
 
 /**
- * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy
- * @dev: pointer to net_device claiming the phy
- * @hndlr: Link state callback for the network device
- * @iface: PHY data interface type
- *
- * This function is a temporary stop-gap and will be removed soon.  It is
- * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers.  Do
- * not call this function from new drivers.
- */
-struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					     void (*hndlr)(struct net_device *),
-					     phy_interface_t iface)
-{
-	struct device_node *net_np;
-	char bus_id[MII_BUS_ID_SIZE + 3];
-	struct phy_device *phy;
-	const __be32 *phy_id;
-	int sz;
-
-	if (!dev->dev.parent)
-		return NULL;
-
-	net_np = dev->dev.parent->of_node;
-	if (!net_np)
-		return NULL;
-
-	phy_id = of_get_property(net_np, "fixed-link", &sz);
-	if (!phy_id || sz < sizeof(*phy_id))
-		return NULL;
-
-	sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
-
-	phy = phy_connect(dev, bus_id, hndlr, iface);
-	return IS_ERR(phy) ? NULL : phy;
-}
-EXPORT_SYMBOL(of_phy_connect_fixed_link);
-
-/**
  * of_phy_attach - Attach to a PHY without starting the state machine
  * @dev: pointer to net_device claiming the phy
  * @phy_np: Node pointer for the PHY
@@ -301,3 +281,69 @@
 	return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_attach);
+
+#if defined(CONFIG_FIXED_PHY)
+/*
+ * of_phy_is_fixed_link() and of_phy_register_fixed_link() must
+ * support two DT bindings:
+ * - the old DT binding, where 'fixed-link' was a property with 5
+ *   cells encoding various informations about the fixed PHY
+ * - the new DT binding, where 'fixed-link' is a sub-node of the
+ *   Ethernet device.
+ */
+bool of_phy_is_fixed_link(struct device_node *np)
+{
+	struct device_node *dn;
+	int len;
+
+	/* New binding */
+	dn = of_get_child_by_name(np, "fixed-link");
+	if (dn) {
+		of_node_put(dn);
+		return true;
+	}
+
+	/* Old binding */
+	if (of_get_property(np, "fixed-link", &len) &&
+	    len == (5 * sizeof(__be32)))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(of_phy_is_fixed_link);
+
+int of_phy_register_fixed_link(struct device_node *np)
+{
+	struct fixed_phy_status status = {};
+	struct device_node *fixed_link_node;
+	const __be32 *fixed_link_prop;
+	int len;
+
+	/* New binding */
+	fixed_link_node = of_get_child_by_name(np, "fixed-link");
+	if (fixed_link_node) {
+		status.link = 1;
+		status.duplex = of_property_read_bool(np, "full-duplex");
+		if (of_property_read_u32(fixed_link_node, "speed", &status.speed))
+			return -EINVAL;
+		status.pause = of_property_read_bool(np, "pause");
+		status.asym_pause = of_property_read_bool(np, "asym-pause");
+		of_node_put(fixed_link_node);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	/* Old binding */
+	fixed_link_prop = of_get_property(np, "fixed-link", &len);
+	if (fixed_link_prop && len == (5 * sizeof(__be32))) {
+		status.link = 1;
+		status.duplex = be32_to_cpu(fixed_link_prop[1]);
+		status.speed = be32_to_cpu(fixed_link_prop[2]);
+		status.pause = be32_to_cpu(fixed_link_prop[3]);
+		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(of_phy_register_fixed_link);
+#endif

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 8c148f3..d292d7c 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c

@@ -231,10 +231,7 @@
 		}
 
 		if (time_after(jiffies, timeout)) {
-			dev_printk(KERN_DEBUG, &dev->dev,
-				   "vpd r/w failed.  This is likely a firmware "
-				   "bug on this device.  Contact the card "
-				   "vendor for a firmware update.");
+			dev_printk(KERN_DEBUG, &dev->dev, "vpd r/w failed.  This is likely a firmware bug on this device.  Contact the card vendor for a firmware update\n");
 			return -ETIMEDOUT;
 		}
 		if (fatal_signal_pending(current))

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 447d393..73aef51 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c

@@ -226,6 +226,7 @@
 					 type_mask, alignf, alignf_data,
 					 &pci_32_bit);
 }
+EXPORT_SYMBOL(pci_bus_alloc_resource);
 
 void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
 
@@ -253,6 +254,7 @@
 
 	dev->is_added = 1;
 }
+EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
 /**
  * pci_bus_add_devices - start driver for PCI devices
@@ -279,6 +281,7 @@
 			pci_bus_add_devices(child);
 	}
 }
+EXPORT_SYMBOL(pci_bus_add_devices);
 
 /** pci_walk_bus - walk devices on/under bus, calling callback.
  *  @top      bus whose devices should be walked
@@ -344,6 +347,3 @@
 }
 EXPORT_SYMBOL(pci_bus_put);
 
-EXPORT_SYMBOL(pci_bus_alloc_resource);
-EXPORT_SYMBOL_GPL(pci_bus_add_device);
-EXPORT_SYMBOL(pci_bus_add_devices);

diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c
index 1632661..c5d0ca3 100644
--- a/drivers/pci/host/pci-exynos.c
+++ b/drivers/pci/host/pci-exynos.c

@@ -545,7 +545,6 @@
 	pp->root_bus_nr = -1;
 	pp->ops = &exynos_pcie_host_ops;
 
-	spin_lock_init(&pp->conf_lock);
 	ret = dw_pcie_host_init(pp);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to initialize host\n");

diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index a5645ae..a568efa 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c

@@ -507,7 +507,6 @@
 	pp->root_bus_nr = -1;
 	pp->ops = &imx6_pcie_host_ops;
 
-	spin_lock_init(&pp->conf_lock);
 	ret = dw_pcie_host_init(pp);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to initialize host\n");

diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index e384e25..ce23e0f 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c

@@ -113,7 +113,6 @@
 struct mvebu_pcie_port {
 	char *name;
 	void __iomem *base;
-	spinlock_t conf_lock;
 	u32 port;
 	u32 lane;
 	int devfn;
@@ -329,9 +328,11 @@
 		ret = mvebu_mbus_add_window_remap_by_id(target, attribute, base,
 							sz, remap);
 		if (ret) {
+			phys_addr_t end = base + sz - 1;
+
 			dev_err(&port->pcie->pdev->dev,
-				"Could not create MBus window at 0x%x, size 0x%x: %d\n",
-				base, sz, ret);
+				"Could not create MBus window at [mem %pa-%pa]: %d\n",
+				&base, &end, ret);
 			mvebu_pcie_del_windows(port, base - size_mapped,
 					       size_mapped);
 			return;
@@ -613,9 +614,9 @@
 	return sys->private_data;
 }
 
-static struct mvebu_pcie_port *
-mvebu_pcie_find_port(struct mvebu_pcie *pcie, struct pci_bus *bus,
-		     int devfn)
+static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
+						    struct pci_bus *bus,
+						    int devfn)
 {
 	int i;
 
@@ -638,7 +639,6 @@
 {
 	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
 	struct mvebu_pcie_port *port;
-	unsigned long flags;
 	int ret;
 
 	port = mvebu_pcie_find_port(pcie, bus, devfn);
@@ -664,10 +664,8 @@
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	/* Access the real PCIe interface */
-	spin_lock_irqsave(&port->conf_lock, flags);
 	ret = mvebu_pcie_hw_wr_conf(port, bus, devfn,
 				    where, size, val);
-	spin_unlock_irqrestore(&port->conf_lock, flags);
 
 	return ret;
 }
@@ -678,7 +676,6 @@
 {
 	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
 	struct mvebu_pcie_port *port;
-	unsigned long flags;
 	int ret;
 
 	port = mvebu_pcie_find_port(pcie, bus, devfn);
@@ -710,10 +707,8 @@
 	}
 
 	/* Access the real PCIe interface */
-	spin_lock_irqsave(&port->conf_lock, flags);
 	ret = mvebu_pcie_hw_rd_conf(port, bus, devfn,
 				    where, size, val);
-	spin_unlock_irqrestore(&port->conf_lock, flags);
 
 	return ret;
 }
@@ -786,10 +781,10 @@
 }
 
 static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
-						const struct resource *res,
-						resource_size_t start,
-						resource_size_t size,
-						resource_size_t align)
+						 const struct resource *res,
+						 resource_size_t start,
+						 resource_size_t size,
+						 resource_size_t align)
 {
 	if (dev->bus->number != 0)
 		return start;
@@ -839,7 +834,8 @@
  * found, maps it.
  */
 static void __iomem *mvebu_pcie_map_registers(struct platform_device *pdev,
-		      struct device_node *np, struct mvebu_pcie_port *port)
+					      struct device_node *np,
+					      struct mvebu_pcie_port *port)
 {
 	struct resource regs;
 	int ret = 0;
@@ -1060,7 +1056,6 @@
 		mvebu_pcie_set_local_dev_nr(port, 1);
 
 		port->dn = child;
-		spin_lock_init(&port->conf_lock);
 		mvebu_sw_pci_bridge_init(port);
 		i++;
 	}

diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index e3bf9e6..1eaf4df 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c

@@ -643,7 +643,6 @@
 			int size, u32 *val)
 {
 	struct pcie_port *pp = sys_to_pcie(bus->sysdata);
-	unsigned long flags;
 	int ret;
 
 	if (!pp) {
@@ -656,13 +655,11 @@
 		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 
-	spin_lock_irqsave(&pp->conf_lock, flags);
 	if (bus->number != pp->root_bus_nr)
 		ret = dw_pcie_rd_other_conf(pp, bus, devfn,
 						where, size, val);
 	else
 		ret = dw_pcie_rd_own_conf(pp, where, size, val);
-	spin_unlock_irqrestore(&pp->conf_lock, flags);
 
 	return ret;
 }
@@ -671,7 +668,6 @@
 			int where, int size, u32 val)
 {
 	struct pcie_port *pp = sys_to_pcie(bus->sysdata);
-	unsigned long flags;
 	int ret;
 
 	if (!pp) {
@@ -682,13 +678,11 @@
 	if (dw_pcie_valid_config(pp, bus, PCI_SLOT(devfn)) == 0)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	spin_lock_irqsave(&pp->conf_lock, flags);
 	if (bus->number != pp->root_bus_nr)
 		ret = dw_pcie_wr_other_conf(pp, bus, devfn,
 						where, size, val);
 	else
 		ret = dw_pcie_wr_own_conf(pp, where, size, val);
-	spin_unlock_irqrestore(&pp->conf_lock, flags);
 
 	return ret;
 }

diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h
index a169d22..77f592f 100644
--- a/drivers/pci/host/pcie-designware.h
+++ b/drivers/pci/host/pcie-designware.h

@@ -41,7 +41,6 @@
 	void __iomem		*va_cfg1_base;
 	u64			io_base;
 	u64			mem_base;
-	spinlock_t		conf_lock;
 	struct resource		cfg;
 	struct resource		io;
 	struct resource		mem;

diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 8e06124..f7d3de3 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c

@@ -277,9 +277,8 @@
 	else if (size == 2)
 		*val = (*val >> (8 * (where & 2))) & 0xffff;
 
-	dev_dbg(&bus->dev, "pcie-config-read: bus=%3d devfn=0x%04x "
-		"where=0x%04x size=%d val=0x%08lx\n", bus->number,
-		devfn, where, size, (unsigned long)*val);
+	dev_dbg(&bus->dev, "pcie-config-read: bus=%3d devfn=0x%04x where=0x%04x size=%d val=0x%08lx\n",
+		bus->number, devfn, where, size, (unsigned long)*val);
 
 	return ret;
 }
@@ -302,9 +301,8 @@
 	if (ret != PCIBIOS_SUCCESSFUL)
 		return ret;
 
-	dev_dbg(&bus->dev, "pcie-config-write: bus=%3d devfn=0x%04x "
-		"where=0x%04x size=%d val=0x%08lx\n", bus->number,
-		devfn, where, size, (unsigned long)val);
+	dev_dbg(&bus->dev, "pcie-config-write: bus=%3d devfn=0x%04x where=0x%04x size=%d val=0x%08lx\n",
+		bus->number, devfn, where, size, (unsigned long)val);
 
 	if (size == 1) {
 		shift = 8 * (where & 3);

diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 2b85924..b0e61bf 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h

@@ -142,6 +142,16 @@
 	return func_to_acpi_device(func)->handle;
 }
 
+struct acpiphp_root_context {
+	struct acpi_hotplug_context hp;
+	struct acpiphp_bridge *root_bridge;
+};
+
+static inline struct acpiphp_root_context *to_acpiphp_root_context(struct acpi_hotplug_context *hp)
+{
+	return container_of(hp, struct acpiphp_root_context, hp);
+}
+
 /*
  * struct acpiphp_attention_info - device specific attention registration
  *

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 728c31f..e291efc 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c

@@ -63,10 +63,6 @@
 MODULE_PARM_DESC(disable, "disable acpiphp driver");
 module_param_named(disable, acpiphp_disabled, bool, 0444);
 
-/* export the attention callback registration methods */
-EXPORT_SYMBOL_GPL(acpiphp_register_attention);
-EXPORT_SYMBOL_GPL(acpiphp_unregister_attention);
-
 static int enable_slot		(struct hotplug_slot *slot);
 static int disable_slot		(struct hotplug_slot *slot);
 static int set_attention_status (struct hotplug_slot *slot, u8 value);
@@ -104,6 +100,7 @@
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(acpiphp_register_attention);
 
 
 /**
@@ -124,6 +121,7 @@
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(acpiphp_unregister_attention);
 
 
 /**

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 75e1783..602d153 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c

@@ -351,11 +351,9 @@
 			slot->slot = NULL;
 			bridge->nr_slots--;
 			if (retval == -EBUSY)
-				pr_warn("Slot %llu already registered by another "
-					"hotplug driver\n", sun);
+				pr_warn("Slot %llu already registered by another hotplug driver\n", sun);
 			else
-				pr_warn("acpiphp_register_hotplug_slot failed "
-					"(err code = 0x%x)\n", retval);
+				pr_warn("acpiphp_register_hotplug_slot failed (err code = 0x%x)\n", retval);
 		}
 		/* Even if the slot registration fails, we can still use it. */
 	}
@@ -373,17 +371,13 @@
 
 static struct acpiphp_bridge *acpiphp_dev_to_bridge(struct acpi_device *adev)
 {
-	struct acpiphp_context *context;
 	struct acpiphp_bridge *bridge = NULL;
 
 	acpi_lock_hp_context();
-	context = acpiphp_get_context(adev);
-	if (context) {
-		bridge = context->bridge;
+	if (adev->hp) {
+		bridge = to_acpiphp_root_context(adev->hp)->root_bridge;
 		if (bridge)
 			get_bridge(bridge);
-
-		acpiphp_put_context(context);
 	}
 	acpi_unlock_hp_context();
 	return bridge;
@@ -881,7 +875,17 @@
 	 */
 	get_device(&bus->dev);
 
-	if (!pci_is_root_bus(bridge->pci_bus)) {
+	acpi_lock_hp_context();
+	if (pci_is_root_bus(bridge->pci_bus)) {
+		struct acpiphp_root_context *root_context;
+
+		root_context = kzalloc(sizeof(*root_context), GFP_KERNEL);
+		if (!root_context)
+			goto err;
+
+		root_context->root_bridge = bridge;
+		acpi_set_hp_context(adev, &root_context->hp, NULL, NULL, NULL);
+	} else {
 		struct acpiphp_context *context;
 
 		/*
@@ -890,21 +894,16 @@
 		 * parent is going to be handled by pciehp, in which case this
 		 * bridge is not interesting to us either.
 		 */
-		acpi_lock_hp_context();
 		context = acpiphp_get_context(adev);
-		if (!context) {
-			acpi_unlock_hp_context();
-			put_device(&bus->dev);
-			pci_dev_put(bridge->pci_dev);
-			kfree(bridge);
-			return;
-		}
+		if (!context)
+			goto err;
+
 		bridge->context = context;
 		context->bridge = bridge;
 		/* Get a reference to the parent bridge. */
 		get_bridge(context->func.parent);
-		acpi_unlock_hp_context();
 	}
+	acpi_unlock_hp_context();
 
 	/* Must be added to the list prior to calling acpiphp_add_context(). */
 	mutex_lock(&bridge_mutex);
@@ -919,6 +918,30 @@
 		cleanup_bridge(bridge);
 		put_bridge(bridge);
 	}
+	return;
+
+ err:
+	acpi_unlock_hp_context();
+	put_device(&bus->dev);
+	pci_dev_put(bridge->pci_dev);
+	kfree(bridge);
+}
+
+void acpiphp_drop_bridge(struct acpiphp_bridge *bridge)
+{
+	if (pci_is_root_bus(bridge->pci_bus)) {
+		struct acpiphp_root_context *root_context;
+		struct acpi_device *adev;
+
+		acpi_lock_hp_context();
+		adev = ACPI_COMPANION(bridge->pci_bus->bridge);
+		root_context = to_acpiphp_root_context(adev->hp);
+		adev->hp = NULL;
+		acpi_unlock_hp_context();
+		kfree(root_context);
+	}
+	cleanup_bridge(bridge);
+	put_bridge(bridge);
 }
 
 /**
@@ -936,8 +959,7 @@
 	list_for_each_entry(bridge, &bridge_list, list)
 		if (bridge->pci_bus == bus) {
 			mutex_unlock(&bridge_mutex);
-			cleanup_bridge(bridge);
-			put_bridge(bridge);
+			acpiphp_drop_bridge(bridge);
 			return;
 		}
 

diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h
index 1356211..6a0ddf7 100644
--- a/drivers/pci/hotplug/cpci_hotplug.h
+++ b/drivers/pci/hotplug/cpci_hotplug.h

@@ -56,9 +56,9 @@
 	int (*enable_irq) (void);
 	int (*disable_irq) (void);
 	int (*check_irq) (void *dev_id);
-	int (*hardware_test) (struct slot* slot, u32 value);
-	u8  (*get_power) (struct slot* slot);
-	int (*set_power) (struct slot* slot, int value);
+	int (*hardware_test) (struct slot *slot, u32 value);
+	u8  (*get_power) (struct slot *slot);
+	int (*set_power) (struct slot *slot, int value);
 };
 
 struct cpci_hp_controller {
@@ -89,13 +89,13 @@
 u8 cpci_get_attention_status(struct slot *slot);
 u8 cpci_get_latch_status(struct slot *slot);
 u8 cpci_get_adapter_status(struct slot *slot);
-u16 cpci_get_hs_csr(struct slot * slot);
+u16 cpci_get_hs_csr(struct slot *slot);
 int cpci_set_attention_status(struct slot *slot, int status);
-int cpci_check_and_clear_ins(struct slot * slot);
-int cpci_check_ext(struct slot * slot);
-int cpci_clear_ext(struct slot * slot);
-int cpci_led_on(struct slot * slot);
-int cpci_led_off(struct slot * slot);
+int cpci_check_and_clear_ins(struct slot *slot);
+int cpci_check_ext(struct slot *slot);
+int cpci_clear_ext(struct slot *slot);
+int cpci_led_on(struct slot *slot);
+int cpci_led_off(struct slot *slot);
 int cpci_configure_slot(struct slot *slot);
 int cpci_unconfigure_slot(struct slot *slot);
 

diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 00c81a3..e09cf78 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c

@@ -65,10 +65,10 @@
 static int enable_slot(struct hotplug_slot *slot);
 static int disable_slot(struct hotplug_slot *slot);
 static int set_attention_status(struct hotplug_slot *slot, u8 value);
-static int get_power_status(struct hotplug_slot *slot, u8 * value);
-static int get_attention_status(struct hotplug_slot *slot, u8 * value);
-static int get_adapter_status(struct hotplug_slot *slot, u8 * value);
-static int get_latch_status(struct hotplug_slot *slot, u8 * value);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+static int get_attention_status(struct hotplug_slot *slot, u8 *value);
+static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
+static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 
 static struct hotplug_slot_ops cpci_hotplug_slot_ops = {
 	.enable_slot = enable_slot,
@@ -168,7 +168,7 @@
 }
 
 static int
-get_power_status(struct hotplug_slot *hotplug_slot, u8 * value)
+get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct slot *slot = hotplug_slot->private;
 
@@ -177,7 +177,7 @@
 }
 
 static int
-get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value)
+get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct slot *slot = hotplug_slot->private;
 
@@ -192,14 +192,14 @@
 }
 
 static int
-get_adapter_status(struct hotplug_slot *hotplug_slot, u8 * value)
+get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	*value = hotplug_slot->info->adapter_status;
 	return 0;
 }
 
 static int
-get_latch_status(struct hotplug_slot *hotplug_slot, u8 * value)
+get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	*value = hotplug_slot->info->latch_status;
 	return 0;
@@ -299,6 +299,7 @@
 error:
 	return status;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_register_bus);
 
 int
 cpci_hp_unregister_bus(struct pci_bus *bus)
@@ -329,6 +330,7 @@
 	up_write(&list_rwsem);
 	return status;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_unregister_bus);
 
 /* This is the interrupt mode interrupt handler */
 static irqreturn_t
@@ -360,7 +362,7 @@
 init_slots(int clear_ins)
 {
 	struct slot *slot;
-	struct pci_dev* dev;
+	struct pci_dev *dev;
 
 	dbg("%s - enter", __func__);
 	down_read(&list_rwsem);
@@ -614,6 +616,7 @@
 		controller = new_controller;
 	return status;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_register_controller);
 
 static void
 cleanup_slots(void)
@@ -653,6 +656,7 @@
 		status = -ENODEV;
 	return status;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_unregister_controller);
 
 int
 cpci_hp_start(void)
@@ -690,6 +694,7 @@
 	dbg("%s - exit", __func__);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_start);
 
 int
 cpci_hp_stop(void)
@@ -704,6 +709,7 @@
 	cpci_stop_thread();
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cpci_hp_stop);
 
 int __init
 cpci_hotplug_init(int debug)
@@ -721,10 +727,3 @@
 	cpci_hp_stop();
 	cpci_hp_unregister_controller(controller);
 }
-
-EXPORT_SYMBOL_GPL(cpci_hp_register_controller);
-EXPORT_SYMBOL_GPL(cpci_hp_unregister_controller);
-EXPORT_SYMBOL_GPL(cpci_hp_register_bus);
-EXPORT_SYMBOL_GPL(cpci_hp_unregister_bus);
-EXPORT_SYMBOL_GPL(cpci_hp_start);
-EXPORT_SYMBOL_GPL(cpci_hp_stop);

diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index f6ef64c..7d48eca 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c

@@ -46,7 +46,7 @@
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 
-u8 cpci_get_attention_status(struct slot* slot)
+u8 cpci_get_attention_status(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -66,7 +66,7 @@
 	return hs_csr & 0x0008 ? 1 : 0;
 }
 
-int cpci_set_attention_status(struct slot* slot, int status)
+int cpci_set_attention_status(struct slot *slot, int status)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -93,7 +93,7 @@
 	return 1;
 }
 
-u16 cpci_get_hs_csr(struct slot* slot)
+u16 cpci_get_hs_csr(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -111,7 +111,7 @@
 	return hs_csr;
 }
 
-int cpci_check_and_clear_ins(struct slot* slot)
+int cpci_check_and_clear_ins(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -140,7 +140,7 @@
 	return ins;
 }
 
-int cpci_check_ext(struct slot* slot)
+int cpci_check_ext(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -161,7 +161,7 @@
 	return ext;
 }
 
-int cpci_clear_ext(struct slot* slot)
+int cpci_clear_ext(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -187,7 +187,7 @@
 	return 0;
 }
 
-int cpci_led_on(struct slot* slot)
+int cpci_led_on(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -216,7 +216,7 @@
 	return 0;
 }
 
-int cpci_led_off(struct slot* slot)
+int cpci_led_off(struct slot *slot)
 {
 	int hs_cap;
 	u16 hs_csr;
@@ -303,7 +303,7 @@
 	return ret;
 }
 
-int cpci_unconfigure_slot(struct slot* slot)
+int cpci_unconfigure_slot(struct slot *slot)
 {
 	struct pci_dev *dev, *temp;
 

diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c
index 7536eef..04fcd78 100644
--- a/drivers/pci/hotplug/cpcihp_generic.c
+++ b/drivers/pci/hotplug/cpcihp_generic.c

@@ -78,8 +78,8 @@
 
 static int __init validate_parameters(void)
 {
-	char* str;
-	char* p;
+	char *str;
+	char *p;
 	unsigned long tmp;
 
 	if(!bridge) {
@@ -142,8 +142,8 @@
 static int __init cpcihp_generic_init(void)
 {
 	int status;
-	struct resource* r;
-	struct pci_dev* dev;
+	struct resource *r;
+	struct pci_dev *dev;
 
 	info(DRIVER_DESC " version: " DRIVER_VERSION);
 	status = validate_parameters();

diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index e8c4a7c..6757b3e 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c

@@ -295,7 +295,7 @@
 
 static int __init zt5550_init(void)
 {
-	struct resource* r;
+	struct resource *r;
 	int rc;
 
 	info(DRIVER_DESC " version: " DRIVER_VERSION);

diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index 516b877..0450f40 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h

@@ -255,7 +255,7 @@
 	struct pci_resource *io_head;
 	struct pci_resource *bus_head;
 	struct timer_list *p_task_event;
-	struct pci_dev* pci_dev;
+	struct pci_dev *pci_dev;
 };
 
 struct slot {
@@ -278,7 +278,7 @@
 };
 
 struct pci_resource {
-	struct pci_resource * next;
+	struct pci_resource *next;
 	u32 base;
 	u32 length;
 };

diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 037e261..4aaee74 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c

@@ -94,7 +94,7 @@
  *
  * Returns pointer to the head of the SMBIOS tables (or %NULL).
  */
-static void __iomem * detect_SMBIOS_pointer(void __iomem *begin, void __iomem *end)
+static void __iomem *detect_SMBIOS_pointer(void __iomem *begin, void __iomem *end)
 {
 	void __iomem *fp;
 	void __iomem *endp;
@@ -131,7 +131,7 @@
  *
  * For unexpected switch opens
  */
-static int init_SERR(struct controller * ctrl)
+static int init_SERR(struct controller *ctrl)
 {
 	u32 tempdword;
 	u32 number_of_slots;
@@ -291,7 +291,7 @@
 	kfree(slot);
 }
 
-static int ctrl_slot_cleanup (struct controller * ctrl)
+static int ctrl_slot_cleanup (struct controller *ctrl)
 {
 	struct slot *old_slot, *next_slot;
 
@@ -706,8 +706,7 @@
 		hotplug_slot_info->adapter_status =
 			get_presence_status(ctrl, slot);
 
-		dbg("registering bus %d, dev %d, number %d, "
-				"ctrl->slot_device_offset %d, slot %d\n",
+		dbg("registering bus %d, dev %d, number %d, ctrl->slot_device_offset %d, slot %d\n",
 				slot->bus, slot->device,
 				slot->number, ctrl->slot_device_offset,
 				slot_number);
@@ -837,8 +836,7 @@
 
 	bus = pdev->subordinate;
 	if (!bus) {
-		dev_notice(&pdev->dev, "the device is not a bridge, "
-				"skipping\n");
+		dev_notice(&pdev->dev, "the device is not a bridge, skipping\n");
 		rc = -ENODEV;
 		goto err_disable_device;
 	}

diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index f593585..bde47fc 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c

@@ -39,9 +39,9 @@
 #include <linux/kthread.h>
 #include "cpqphp.h"
 
-static u32 configure_new_device(struct controller* ctrl, struct pci_func *func,
+static u32 configure_new_device(struct controller *ctrl, struct pci_func *func,
 			u8 behind_bridge, struct resource_lists *resources);
-static int configure_new_function(struct controller* ctrl, struct pci_func *func,
+static int configure_new_function(struct controller *ctrl, struct pci_func *func,
 			u8 behind_bridge, struct resource_lists *resources);
 static void interrupt_event_handler(struct controller *ctrl);
 
@@ -64,7 +64,7 @@
 
 /* FIXME: The following line needs to be somewhere else... */
 #define WRONG_BUS_FREQUENCY 0x07
-static u8 handle_switch_change(u8 change, struct controller * ctrl)
+static u8 handle_switch_change(u8 change, struct controller *ctrl)
 {
 	int hp_slot;
 	u8 rc = 0;
@@ -138,7 +138,7 @@
 }
 
 
-static u8 handle_presence_change(u16 change, struct controller * ctrl)
+static u8 handle_presence_change(u16 change, struct controller *ctrl)
 {
 	int hp_slot;
 	u8 rc = 0;
@@ -232,7 +232,7 @@
 }
 
 
-static u8 handle_power_fault(u8 change, struct controller * ctrl)
+static u8 handle_power_fault(u8 change, struct controller *ctrl)
 {
 	int hp_slot;
 	u8 rc = 0;
@@ -997,7 +997,7 @@
  *
  * Returns %0 if successful, !0 otherwise.
  */
-static int slot_remove(struct pci_func * old_slot)
+static int slot_remove(struct pci_func *old_slot)
 {
 	struct pci_func *next;
 
@@ -1109,7 +1109,7 @@
 
 /* DJZ: I don't think is_bridge will work as is.
  * FIXME */
-static int is_bridge(struct pci_func * func)
+static int is_bridge(struct pci_func *func)
 {
 	/* Check the header type */
 	if (((func->config_space[0x03] >> 16) & 0xFF) == 0x01)
@@ -1625,7 +1625,7 @@
  * @replace_flag: whether replacing or adding a new device
  * @ctrl: target controller
  */
-static u32 remove_board(struct pci_func * func, u32 replace_flag, struct controller * ctrl)
+static u32 remove_board(struct pci_func *func, u32 replace_flag, struct controller *ctrl)
 {
 	int index;
 	u8 skip = 0;
@@ -1742,7 +1742,7 @@
 
 
 /* this is the main worker thread */
-static int event_thread(void* data)
+static int event_thread(void *data)
 {
 	struct controller *ctrl;
 
@@ -1992,7 +1992,7 @@
 	u16 temp_word;
 	u32 tempdword;
 	int rc;
-	struct slot* p_slot;
+	struct slot *p_slot;
 	int physical_slot = 0;
 
 	tempdword = 0;
@@ -2088,7 +2088,7 @@
 	u8 replace_flag;
 	u32 rc = 0;
 	unsigned int devfn;
-	struct slot* p_slot;
+	struct slot *p_slot;
 	struct pci_bus *pci_bus = ctrl->pci_bus;
 	int physical_slot=0;
 
@@ -2270,8 +2270,8 @@
  *
  * Returns 0 if success.
  */
-static u32 configure_new_device(struct controller * ctrl, struct pci_func * func,
-				 u8 behind_bridge, struct resource_lists * resources)
+static u32 configure_new_device(struct controller  *ctrl, struct pci_func  *func,
+				 u8 behind_bridge, struct resource_lists  *resources)
 {
 	u8 temp_byte, function, max_functions, stop_it;
 	int rc;

diff --git a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c
index 9600a39..0968a9b 100644
--- a/drivers/pci/hotplug/cpqphp_nvram.c
+++ b/drivers/pci/hotplug/cpqphp_nvram.c

@@ -107,7 +107,7 @@
  */
 
 
-static u32 add_byte( u32 **p_buffer, u8 value, u32 *used, u32 *avail)
+static u32 add_byte(u32 **p_buffer, u8 value, u32 *used, u32 *avail)
 {
 	u8 **tByte;
 
@@ -122,7 +122,7 @@
 }
 
 
-static u32 add_dword( u32 **p_buffer, u32 value, u32 *used, u32 *avail)
+static u32 add_dword(u32 **p_buffer, u32 value, u32 *used, u32 *avail)
 {
 	if ((*used + 4) > *avail)
 		return(1);
@@ -267,12 +267,12 @@
 	ctrl = cpqhp_ctrl_list;
 
 	/* The revision of this structure */
-	rc = add_byte( &pFill, 1 + ctrl->push_flag, &usedbytes, &available);
+	rc = add_byte(&pFill, 1 + ctrl->push_flag, &usedbytes, &available);
 	if (rc)
 		return(rc);
 
 	/* The number of controllers */
-	rc = add_byte( &pFill, 1, &usedbytes, &available);
+	rc = add_byte(&pFill, 1, &usedbytes, &available);
 	if (rc)
 		return(rc);
 
@@ -282,22 +282,22 @@
 		numCtrl++;
 
 		/* The bus number */
-		rc = add_byte( &pFill, ctrl->bus, &usedbytes, &available);
+		rc = add_byte(&pFill, ctrl->bus, &usedbytes, &available);
 		if (rc)
 			return(rc);
 
 		/* The device Number */
-		rc = add_byte( &pFill, PCI_SLOT(ctrl->pci_dev->devfn), &usedbytes, &available);
+		rc = add_byte(&pFill, PCI_SLOT(ctrl->pci_dev->devfn), &usedbytes, &available);
 		if (rc)
 			return(rc);
 
 		/* The function Number */
-		rc = add_byte( &pFill, PCI_FUNC(ctrl->pci_dev->devfn), &usedbytes, &available);
+		rc = add_byte(&pFill, PCI_FUNC(ctrl->pci_dev->devfn), &usedbytes, &available);
 		if (rc)
 			return(rc);
 
 		/* Skip the number of available entries */
-		rc = add_dword( &pFill, 0, &usedbytes, &available);
+		rc = add_dword(&pFill, 0, &usedbytes, &available);
 		if (rc)
 			return(rc);
 
@@ -311,12 +311,12 @@
 			loop ++;
 
 			/* base */
-			rc = add_dword( &pFill, resNode->base, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
 			/* length */
-			rc = add_dword( &pFill, resNode->length, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
@@ -336,12 +336,12 @@
 			loop ++;
 
 			/* base */
-			rc = add_dword( &pFill, resNode->base, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
 			/* length */
-			rc = add_dword( &pFill, resNode->length, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
@@ -361,12 +361,12 @@
 			loop ++;
 
 			/* base */
-			rc = add_dword( &pFill, resNode->base, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
 			/* length */
-			rc = add_dword( &pFill, resNode->length, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
@@ -386,12 +386,12 @@
 			loop ++;
 
 			/* base */
-			rc = add_dword( &pFill, resNode->base, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
 			if (rc)
 				return(rc);
 
 			/* length */
-			rc = add_dword( &pFill, resNode->length, &usedbytes, &available);
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
 			if (rc)
 				return(rc);
 

diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index a3e3c20..1c8c2f1 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c

@@ -81,7 +81,7 @@
 }
 
 
-int cpqhp_configure_device (struct controller* ctrl, struct pci_func* func)
+int cpqhp_configure_device (struct controller *ctrl, struct pci_func *func)
 {
 	struct pci_bus *child;
 	int num;
@@ -121,7 +121,7 @@
 }
 
 
-int cpqhp_unconfigure_device(struct pci_func* func)
+int cpqhp_unconfigure_device(struct pci_func *func)
 {
 	int j;
 
@@ -129,7 +129,7 @@
 
 	pci_lock_rescan_remove();
 	for (j=0; j<8 ; j++) {
-		struct pci_dev* temp = pci_get_bus_and_slot(func->bus, PCI_DEVFN(func->device, j));
+		struct pci_dev *temp = pci_get_bus_and_slot(func->bus, PCI_DEVFN(func->device, j));
 		if (temp) {
 			pci_dev_put(temp);
 			pci_stop_and_remove_bus_device(temp);
@@ -203,7 +203,7 @@
 }
 
 
-static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 * dev_num)
+static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_num)
 {
 	u16 tdevice;
 	u32 work;
@@ -280,7 +280,7 @@
 }
 
 
-int cpqhp_get_bus_dev (struct controller *ctrl, u8 * bus_num, u8 * dev_num, u8 slot)
+int cpqhp_get_bus_dev (struct controller *ctrl, u8 *bus_num, u8 *dev_num, u8 slot)
 {
 	/* plain (bridges allowed) */
 	return PCI_GetBusDevHelper(ctrl, bus_num, dev_num, slot, 0);
@@ -465,7 +465,7 @@
  *
  * returns 0 if success
  */
-int cpqhp_save_slot_config (struct controller *ctrl, struct pci_func * new_slot)
+int cpqhp_save_slot_config (struct controller *ctrl, struct pci_func *new_slot)
 {
 	long rc;
 	u8 class_code;
@@ -549,7 +549,7 @@
  *
  * returns 0 if success
  */
-int cpqhp_save_base_addr_length(struct controller *ctrl, struct pci_func * func)
+int cpqhp_save_base_addr_length(struct controller *ctrl, struct pci_func *func)
 {
 	u8 cloop;
 	u8 header_type;
@@ -686,7 +686,7 @@
  *
  * returns 0 if success
  */
-int cpqhp_save_used_resources (struct controller *ctrl, struct pci_func * func)
+int cpqhp_save_used_resources (struct controller *ctrl, struct pci_func *func)
 {
 	u8 cloop;
 	u8 header_type;
@@ -949,7 +949,7 @@
  *
  * returns 0 if success
  */
-int cpqhp_configure_board(struct controller *ctrl, struct pci_func * func)
+int cpqhp_configure_board(struct controller *ctrl, struct pci_func *func)
 {
 	int cloop;
 	u8 header_type;
@@ -1027,7 +1027,7 @@
  *
  * returns 0 if the board is the same nonzero otherwise
  */
-int cpqhp_valid_replace(struct controller *ctrl, struct pci_func * func)
+int cpqhp_valid_replace(struct controller *ctrl, struct pci_func *func)
 {
 	u8 cloop;
 	u8 header_type;
@@ -1419,7 +1419,7 @@
  *
  * returns 0 if success
  */
-int cpqhp_return_board_resources(struct pci_func * func, struct resource_lists * resources)
+int cpqhp_return_board_resources(struct pci_func *func, struct resource_lists *resources)
 {
 	int rc = 0;
 	struct pci_resource *node;
@@ -1475,7 +1475,7 @@
  *
  * Puts node back in the resource list pointed to by head
  */
-void cpqhp_destroy_resource_list (struct resource_lists * resources)
+void cpqhp_destroy_resource_list (struct resource_lists *resources)
 {
 	struct pci_resource *res, *tres;
 
@@ -1522,7 +1522,7 @@
  *
  * Puts node back in the resource list pointed to by head
  */
-void cpqhp_destroy_board_resources (struct pci_func * func)
+void cpqhp_destroy_board_resources (struct pci_func *func)
 {
 	struct pci_resource *res, *tres;
 

diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
index 17c1f36..4a392c4 100644
--- a/drivers/pci/hotplug/cpqphp_sysfs.c
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c

@@ -79,7 +79,7 @@
 
 static int show_dev (struct controller *ctrl, char *buf)
 {
-	char * out = buf;
+	char *out = buf;
 	int index;
 	struct pci_resource *res;
 	struct pci_func *new_slot;

diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index cf3ac1e..f7b8684 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c

@@ -74,7 +74,7 @@
 static inline int get_cur_bus_info(struct slot **sl)
 {
 	int rc = 1;
-	struct slot * slot_cur = *sl;
+	struct slot *slot_cur = *sl;
 
 	debug("options = %x\n", slot_cur->ctrl->options);
 	debug("revision = %x\n", slot_cur->ctrl->revision);
@@ -114,8 +114,8 @@
 
 static int __init get_max_slots (void)
 {
-	struct slot * slot_cur;
-	struct list_head * tmp;
+	struct slot *slot_cur;
+	struct list_head *tmp;
 	u8 slot_count = 0;
 
 	list_for_each(tmp, &ibmphp_slot_head) {
@@ -280,7 +280,7 @@
 	return rc;
 }
 
-static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int rc = -ENODEV;
 	struct slot *pslot;
@@ -311,7 +311,7 @@
 	return rc;
 }
 
-static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int rc = -ENODEV;
 	struct slot *pslot;
@@ -338,7 +338,7 @@
 }
 
 
-static int get_power_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int rc = -ENODEV;
 	struct slot *pslot;
@@ -364,7 +364,7 @@
 	return rc;
 }
 
-static int get_adapter_present(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_adapter_present(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int rc = -ENODEV;
 	struct slot *pslot;
@@ -433,7 +433,7 @@
 }
 
 /*
-static int get_max_adapter_speed_1(struct hotplug_slot *hotplug_slot, u8 * value, u8 flag)
+static int get_max_adapter_speed_1(struct hotplug_slot *hotplug_slot, u8 *value, u8 flag)
 {
 	int rc = -ENODEV;
 	struct slot *pslot;
@@ -471,7 +471,7 @@
 	return rc;
 }
 
-static int get_bus_name(struct hotplug_slot *hotplug_slot, char * value)
+static int get_bus_name(struct hotplug_slot *hotplug_slot, char *value)
 {
 	int rc = -ENODEV;
 	struct slot *pslot = NULL;
@@ -671,7 +671,7 @@
 {
 	struct pci_func *func_cur;
 	struct slot *slot_cur;
-	struct list_head * tmp;
+	struct list_head *tmp;
 	list_for_each(tmp, &ibmphp_slot_head) {
 		slot_cur = list_entry(tmp, struct slot, ibm_slot_list);
 		if (slot_cur->func) {
@@ -696,8 +696,8 @@
 static void free_slots(void)
 {
 	struct slot *slot_cur;
-	struct list_head * tmp;
-	struct list_head * next;
+	struct list_head *tmp;
+	struct list_head *next;
 
 	debug("%s -- enter\n", __func__);
 
@@ -825,10 +825,10 @@
 /*******************************************************
  * Returns whether the bus is empty or not
  *******************************************************/
-static int is_bus_empty(struct slot * slot_cur)
+static int is_bus_empty(struct slot *slot_cur)
 {
 	int rc;
-	struct slot * tmp_slot;
+	struct slot *tmp_slot;
 	u8 i = slot_cur->bus_on->slot_min;
 
 	while (i <= slot_cur->bus_on->slot_max) {
@@ -856,7 +856,7 @@
  * Parameters: slot
  * Returns: bus is set (0) or error code
  ***********************************************************/
-static int set_bus(struct slot * slot_cur)
+static int set_bus(struct slot *slot_cur)
 {
 	int rc;
 	u8 speed;
@@ -956,7 +956,7 @@
 static int check_limitations(struct slot *slot_cur)
 {
 	u8 i;
-	struct slot * tmp_slot;
+	struct slot *tmp_slot;
 	u8 count = 0;
 	u8 limitation = 0;
 
@@ -1045,8 +1045,7 @@
 	rc = check_limitations(slot_cur);
 	if (rc) {
 		err("Adding this card exceeds the limitations of this bus.\n");
-		err("(i.e., >1 133MHz cards running on same bus, or "
-		     ">2 66 PCI cards running on same bus.\n");
+		err("(i.e., >1 133MHz cards running on same bus, or >2 66 PCI cards running on same bus.\n");
 		err("Try hot-adding into another bus\n");
 		rc = -EINVAL;
 		goto error_nopower;
@@ -1070,12 +1069,10 @@
 					!(SLOT_PWRGD(slot_cur->status)))
 			err("power fault occurred trying to power up\n");
 		else if (SLOT_BUS_SPEED(slot_cur->status)) {
-			err("bus speed mismatch occurred.  please check "
-				"current bus speed and card capability\n");
+			err("bus speed mismatch occurred.  please check current bus speed and card capability\n");
 			print_card_capability(slot_cur);
 		} else if (SLOT_BUS_MODE(slot_cur->ext_status)) {
-			err("bus mode mismatch occurred.  please check "
-				"current bus mode and card capability\n");
+			err("bus mode mismatch occurred.  please check current bus mode and card capability\n");
 			print_card_capability(slot_cur);
 		}
 		ibmphp_update_slot_info(slot_cur);
@@ -1098,8 +1095,7 @@
 		goto error_power;
 	}
 	if (SLOT_POWER(slot_cur->status) && (SLOT_BUS_SPEED(slot_cur->status))) {
-		err("bus speed mismatch occurred.  please check current bus "
-					"speed and card capability\n");
+		err("bus speed mismatch occurred.  please check current bus speed and card capability\n");
 		print_card_capability(slot_cur);
 		goto error_power;
 	}

diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index bd04415..0f65ac5 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c

@@ -563,7 +563,7 @@
 	return rc;
 }
 
-static struct opt_rio_lo * find_rxe_num (u8 slot_num)
+static struct opt_rio_lo *find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
 
@@ -575,7 +575,7 @@
 	return NULL;
 }
 
-static struct opt_rio * find_chassis_num (u8 slot_num)
+static struct opt_rio *find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
 
@@ -593,7 +593,7 @@
 static u8 calculate_first_slot (u8 slot_num)
 {
 	u8 first_slot = 1;
-	struct slot * slot_cur;
+	struct slot *slot_cur;
 
 	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
@@ -607,7 +607,7 @@
 
 #define SLOT_NAME_SIZE 30
 
-static char *create_file_name (struct slot * slot_cur)
+static char *create_file_name (struct slot *slot_cur)
 {
 	struct opt_rio *opt_vg_ptr = NULL;
 	struct opt_rio_lo *opt_lo_ptr = NULL;
@@ -1192,7 +1192,7 @@
 	}
 	return rc;
 }
-static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
+static int ibmphp_probe (struct pci_dev *dev, const struct pci_device_id *ids)
 {
 	struct controller *ctrl;
 

diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c
index 5fc7a08..a936022 100644
--- a/drivers/pci/hotplug/ibmphp_hpc.c
+++ b/drivers/pci/hotplug/ibmphp_hpc.c

@@ -533,7 +533,7 @@
 *
 * Return   0 or error codes
 *---------------------------------------------------------------------*/
-int ibmphp_hpc_readslot (struct slot * pslot, u8 cmd, u8 * pstatus)
+int ibmphp_hpc_readslot (struct slot *pslot, u8 cmd, u8 *pstatus)
 {
 	void __iomem *wpg_bbar = NULL;
 	struct controller *ctlr_ptr;
@@ -672,7 +672,7 @@
 *
 * Action: issue a WRITE command to HPC
 *---------------------------------------------------------------------*/
-int ibmphp_hpc_writeslot (struct slot * pslot, u8 cmd)
+int ibmphp_hpc_writeslot (struct slot *pslot, u8 cmd)
 {
 	void __iomem *wpg_bbar = NULL;
 	struct controller *ctlr_ptr;
@@ -1102,7 +1102,7 @@
 * Value:
 *---------------------------------------------------------------------*/
 static int hpc_wait_ctlr_notworking (int timeout, struct controller *ctlr_ptr, void __iomem *wpg_bbar,
-				    u8 * pstatus)
+				    u8 *pstatus)
 {
 	int rc = 0;
 	u8 done = 0;

diff --git a/drivers/pci/hotplug/ibmphp_pci.c b/drivers/pci/hotplug/ibmphp_pci.c
index 639ea3a..2fd2967 100644
--- a/drivers/pci/hotplug/ibmphp_pci.c
+++ b/drivers/pci/hotplug/ibmphp_pci.c

@@ -47,7 +47,7 @@
  * We also assign the same irq numbers for multi function devices.
  * These are PIC mode, so shouldn't matter n.e.ways (hopefully)
  */
-static void assign_alt_irq (struct pci_func * cur_func, u8 class_code)
+static void assign_alt_irq (struct pci_func *cur_func, u8 class_code)
 {
 	int j;
 	for (j = 0; j < 4; j++) {
@@ -137,8 +137,8 @@
 				     "Please choose another device.\n", cur_func->device);
 				return -ENODEV;
 			} else if (class == PCI_CLASS_DISPLAY_VGA) {
-				err ("The device %x is not supported for hot plugging. "
-				     "Please choose another device.\n", cur_func->device);
+				err ("The device %x is not supported for hot plugging. Please choose another device.\n",
+				     cur_func->device);
 				return -ENODEV;
 			}
 			switch (hdr_type) {
@@ -179,8 +179,8 @@
 				case PCI_HEADER_TYPE_MULTIBRIDGE:
 					class >>= 8;
 					if (class != PCI_CLASS_BRIDGE_PCI) {
-						err ("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging. "
-						     "Please insert another card.\n", cur_func->device);
+						err ("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging.  Please insert another card.\n",
+						     cur_func->device);
 						return -ENODEV;
 					}
 					assign_alt_irq (cur_func, class_code);
@@ -247,8 +247,8 @@
 					class >>= 8;
 					debug ("class now is %x\n", class);
 					if (class != PCI_CLASS_BRIDGE_PCI) {
-						err ("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging. "
-						     "Please insert another card.\n", cur_func->device);
+						err ("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging.  Please insert another card.\n",
+						     cur_func->device);
 						return -ENODEV;
 					}
 
@@ -1073,7 +1073,7 @@
  * Input: bridge function
  * Output: amount of resources needed
  *****************************************************************************/
-static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno)
+static struct res_needed *scan_behind_bridge (struct pci_func *func, u8 busno)
 {
 	int count, len[6];
 	u16 vendor_id;
@@ -1125,13 +1125,11 @@
 
 				class >>= 8;	/* to take revision out, class = class.subclass.prog i/f */
 				if (class == PCI_CLASS_NOT_DEFINED_VGA) {
-					err ("The device %x is VGA compatible and as is not supported for hot plugging. "
-					     "Please choose another device.\n", device);
+					err ("The device %x is VGA compatible and as is not supported for hot plugging.  Please choose another device.\n", device);
 					amount->not_correct = 1;
 					return amount;
 				} else if (class == PCI_CLASS_DISPLAY_VGA) {
-					err ("The device %x is not supported for hot plugging. "
-					     "Please choose another device.\n", device);
+					err ("The device %x is not supported for hot plugging.  Please choose another device.\n", device);
 					amount->not_correct = 1;
 					return amount;
 				}
@@ -1483,12 +1481,10 @@
 			debug ("hdr_type %x, class %x\n", hdr_type, class);
 			class >>= 8;	/* to take revision out, class = class.subclass.prog i/f */
 			if (class == PCI_CLASS_NOT_DEFINED_VGA) {
-				err ("The device %x function %x is VGA compatible and is not supported for hot removing. "
-				     "Please choose another device.\n", device, function);
+				err ("The device %x function %x is VGA compatible and is not supported for hot removing.  Please choose another device.\n", device, function);
 				return -ENODEV;
 			} else if (class == PCI_CLASS_DISPLAY_VGA) {
-				err ("The device %x function %x is not supported for hot removing. "
-				     "Please choose another device.\n", device, function);
+				err ("The device %x function %x is not supported for hot removing.  Please choose another device.\n", device, function);
 				return -ENODEV;
 			}
 
@@ -1513,9 +1509,7 @@
 				case PCI_HEADER_TYPE_BRIDGE:
 					class >>= 8;
 					if (class != PCI_CLASS_BRIDGE_PCI) {
-						err ("This device %x function %x is not PCI-to-PCI bridge, "
-						     "and is not supported for hot-removing. "
-						     "Please try another card.\n", device, function);
+						err ("This device %x function %x is not PCI-to-PCI bridge, and is not supported for hot-removing.  Please try another card.\n", device, function);
 						return -ENODEV;
 					}
 					rc = unconfigure_boot_bridge (busno, device, function);
@@ -1529,9 +1523,7 @@
 				case PCI_HEADER_TYPE_MULTIBRIDGE:
 					class >>= 8;
 					if (class != PCI_CLASS_BRIDGE_PCI) {
-						err ("This device %x function %x is not PCI-to-PCI bridge, "
-						     "and is not supported for hot-removing. "
-						     "Please try another card.\n", device, function);
+						err ("This device %x function %x is not PCI-to-PCI bridge,  and is not supported for hot-removing.  Please try another card.\n", device, function);
 						return -ENODEV;
 					}
 					rc = unconfigure_boot_bridge (busno, device, function);

diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c
index a265acb..f34745a 100644
--- a/drivers/pci/hotplug/ibmphp_res.c
+++ b/drivers/pci/hotplug/ibmphp_res.c

@@ -46,9 +46,9 @@
 
 static LIST_HEAD(gbuses);
 
-static struct bus_node * __init alloc_error_bus (struct ebda_pci_rsrc * curr, u8 busno, int flag)
+static struct bus_node * __init alloc_error_bus (struct ebda_pci_rsrc *curr, u8 busno, int flag)
 {
-	struct bus_node * newbus;
+	struct bus_node *newbus;
 
 	if (!(curr) && !(flag)) {
 		err ("NULL pointer passed\n");
@@ -69,7 +69,7 @@
 	return newbus;
 }
 
-static struct resource_node * __init alloc_resources (struct ebda_pci_rsrc * curr)
+static struct resource_node * __init alloc_resources (struct ebda_pci_rsrc *curr)
 {
 	struct resource_node *rs;
 
@@ -93,7 +93,7 @@
 
 static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node **new_range, struct ebda_pci_rsrc *curr, int flag, u8 first_bus)
 {
-	struct bus_node * newbus;
+	struct bus_node *newbus;
 	struct range_node *newrange;
 	u8 num_ranges = 0;
 
@@ -789,8 +789,7 @@
 	bus_cur = find_bus_wprev (res->busno, NULL, 0);
 
 	if (!bus_cur) {
-		err ("cannot find corresponding bus of the io resource to remove  "
-			"bailing out...\n");
+		err ("cannot find corresponding bus of the io resource to remove  bailing out...\n");
 		return -ENODEV;
 	}
 
@@ -934,9 +933,9 @@
 	return 0;
 }
 
-static struct range_node * find_range (struct bus_node *bus_cur, struct resource_node * res)
+static struct range_node *find_range (struct bus_node *bus_cur, struct resource_node *res)
 {
-	struct range_node * range = NULL;
+	struct range_node *range = NULL;
 
 	switch (res->type) {
 		case IO:

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index cfa92a9..56d8486 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c

@@ -59,14 +59,12 @@
 #define DRIVER_DESC	"PCI Hot Plug PCI Core"
 
 
-//////////////////////////////////////////////////////////////////
-
 static LIST_HEAD(pci_hotplug_slot_list);
 static DEFINE_MUTEX(pci_hp_mutex);
 
 /* Weee, fun with macros... */
-#define GET_STATUS(name,type)	\
-static int get_##name (struct hotplug_slot *slot, type *value)		\
+#define GET_STATUS(name, type)	\
+static int get_##name(struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
@@ -92,42 +90,41 @@
 
 	retval = get_power_status(slot->hotplug, &value);
 	if (retval)
-		goto exit;
-	retval = sprintf (buf, "%d\n", value);
-exit:
-	return retval;
+		return retval;
+
+	return sprintf(buf, "%d\n", value);
 }
 
 static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
-		size_t count)
+				size_t count)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
 	unsigned long lpower;
 	u8 power;
 	int retval = 0;
 
-	lpower = simple_strtoul (buf, NULL, 10);
+	lpower = simple_strtoul(buf, NULL, 10);
 	power = (u8)(lpower & 0xff);
-	dbg ("power = %d\n", power);
+	dbg("power = %d\n", power);
 
 	if (!try_module_get(slot->ops->owner)) {
 		retval = -ENODEV;
 		goto exit;
 	}
 	switch (power) {
-		case 0:
-			if (slot->ops->disable_slot)
-				retval = slot->ops->disable_slot(slot);
-			break;
+	case 0:
+		if (slot->ops->disable_slot)
+			retval = slot->ops->disable_slot(slot);
+		break;
 
-		case 1:
-			if (slot->ops->enable_slot)
-				retval = slot->ops->enable_slot(slot);
-			break;
+	case 1:
+		if (slot->ops->enable_slot)
+			retval = slot->ops->enable_slot(slot);
+		break;
 
-		default:
-			err ("Illegal value specified for power\n");
-			retval = -EINVAL;
+	default:
+		err("Illegal value specified for power\n");
+		retval = -EINVAL;
 	}
 	module_put(slot->ops->owner);
 
@@ -150,24 +147,22 @@
 
 	retval = get_attention_status(slot->hotplug, &value);
 	if (retval)
-		goto exit;
-	retval = sprintf(buf, "%d\n", value);
+		return retval;
 
-exit:
-	return retval;
+	return sprintf(buf, "%d\n", value);
 }
 
 static ssize_t attention_write_file(struct pci_slot *slot, const char *buf,
-		size_t count)
+				    size_t count)
 {
 	struct hotplug_slot_ops *ops = slot->hotplug->ops;
 	unsigned long lattention;
 	u8 attention;
 	int retval = 0;
 
-	lattention = simple_strtoul (buf, NULL, 10);
+	lattention = simple_strtoul(buf, NULL, 10);
 	attention = (u8)(lattention & 0xff);
-	dbg (" - attention = %d\n", attention);
+	dbg(" - attention = %d\n", attention);
 
 	if (!try_module_get(ops->owner)) {
 		retval = -ENODEV;
@@ -196,11 +191,9 @@
 
 	retval = get_latch_status(slot->hotplug, &value);
 	if (retval)
-		goto exit;
-	retval = sprintf (buf, "%d\n", value);
+		return retval;
 
-exit:
-	return retval;
+	return sprintf(buf, "%d\n", value);
 }
 
 static struct pci_slot_attribute hotplug_slot_attr_latch = {
@@ -215,11 +208,9 @@
 
 	retval = get_adapter_status(slot->hotplug, &value);
 	if (retval)
-		goto exit;
-	retval = sprintf (buf, "%d\n", value);
+		return retval;
 
-exit:
-	return retval;
+	return sprintf(buf, "%d\n", value);
 }
 
 static struct pci_slot_attribute hotplug_slot_attr_presence = {
@@ -228,7 +219,7 @@
 };
 
 static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
-		size_t count)
+			       size_t count)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
 	unsigned long ltest;
@@ -237,7 +228,7 @@
 
 	ltest = simple_strtoul (buf, NULL, 10);
 	test = (u32)(ltest & 0xffffffff);
-	dbg ("test = %d\n", test);
+	dbg("test = %d\n", test);
 
 	if (!try_module_get(slot->ops->owner)) {
 		retval = -ENODEV;
@@ -261,6 +252,7 @@
 static bool has_power_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
+
 	if ((!slot) || (!slot->ops))
 		return false;
 	if ((slot->ops->enable_slot) ||
@@ -273,6 +265,7 @@
 static bool has_attention_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
+
 	if ((!slot) || (!slot->ops))
 		return false;
 	if ((slot->ops->set_attention_status) ||
@@ -284,6 +277,7 @@
 static bool has_latch_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
+
 	if ((!slot) || (!slot->ops))
 		return false;
 	if (slot->ops->get_latch_status)
@@ -294,6 +288,7 @@
 static bool has_adapter_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
+
 	if ((!slot) || (!slot->ops))
 		return false;
 	if (slot->ops->get_adapter_status)
@@ -304,6 +299,7 @@
 static bool has_test_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
+
 	if ((!slot) || (!slot->ops))
 		return false;
 	if (slot->ops->hardware_test)
@@ -397,13 +393,13 @@
 	pci_hp_remove_module_link(slot);
 }
 
-static struct hotplug_slot *get_slot_from_name (const char *name)
+static struct hotplug_slot *get_slot_from_name(const char *name)
 {
 	struct hotplug_slot *slot;
 	struct list_head *tmp;
 
-	list_for_each (tmp, &pci_hotplug_slot_list) {
-		slot = list_entry (tmp, struct hotplug_slot, slot_list);
+	list_for_each(tmp, &pci_hotplug_slot_list) {
+		slot = list_entry(tmp, struct hotplug_slot, slot_list);
 		if (strcmp(hotplug_slot_name(slot), name) == 0)
 			return slot;
 	}
@@ -436,8 +432,7 @@
 	if ((slot->info == NULL) || (slot->ops == NULL))
 		return -EINVAL;
 	if (slot->release == NULL) {
-		dbg("Why are you trying to register a hotplug slot "
-		    "without a proper release function?\n");
+		dbg("Why are you trying to register a hotplug slot without a proper release function?\n");
 		return -EINVAL;
 	}
 
@@ -468,6 +463,7 @@
 	mutex_unlock(&pci_hp_mutex);
 	return result;
 }
+EXPORT_SYMBOL_GPL(__pci_hp_register);
 
 /**
  * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem
@@ -506,6 +502,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pci_hp_deregister);
 
 /**
  * pci_hp_change_slot_info - changes the slot's information structure in the core
@@ -527,24 +524,23 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);
 
-static int __init pci_hotplug_init (void)
+static int __init pci_hotplug_init(void)
 {
 	int result;
 
 	result = cpci_hotplug_init(debug);
 	if (result) {
-		err ("cpci_hotplug_init with error %d\n", result);
-		goto err_cpci;
+		err("cpci_hotplug_init with error %d\n", result);
+		return result;
 	}
 
-	info (DRIVER_DESC " version: " DRIVER_VERSION "\n");
-
-err_cpci:
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
 	return result;
 }
 
-static void __exit pci_hotplug_exit (void)
+static void __exit pci_hotplug_exit(void)
 {
 	cpci_hotplug_exit();
 }
@@ -557,7 +553,3 @@
 MODULE_LICENSE("GPL");
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-
-EXPORT_SYMBOL_GPL(__pci_hp_register);
-EXPORT_SYMBOL_GPL(pci_hp_deregister);
-EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);

diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 20fea57..93cc926 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c

@@ -103,10 +103,10 @@
 }
 
 static struct pcie_port_service_driver __initdata dummy_driver = {
-        .name           = "pciehp_dummy",
+	.name		= "pciehp_dummy",
 	.port_type	= PCIE_ANY_PORT,
 	.service	= PCIE_PORT_SERVICE_HP,
-        .probe          = dummy_probe,
+	.probe		= dummy_probe,
 };
 
 static int __init select_detection_mode(void)

diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 0e0a2ff..a2297db 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c

@@ -266,8 +266,7 @@
 	rc = init_slot(ctrl);
 	if (rc) {
 		if (rc == -EBUSY)
-			ctrl_warn(ctrl, "Slot already registered by another "
-				  "hotplug driver\n");
+			ctrl_warn(ctrl, "Slot already registered by another hotplug driver\n");
 		else
 			ctrl_err(ctrl, "Slot initialization failed\n");
 		goto err_out_release_ctlr;
@@ -312,12 +311,12 @@
 }
 
 #ifdef CONFIG_PM
-static int pciehp_suspend (struct pcie_device *dev)
+static int pciehp_suspend(struct pcie_device *dev)
 {
 	return 0;
 }
 
-static int pciehp_resume (struct pcie_device *dev)
+static int pciehp_resume(struct pcie_device *dev)
 {
 	struct controller *ctrl;
 	struct slot *slot;

diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index c75e6a6..ff32e85 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c

@@ -175,7 +175,7 @@
    hotplug controller logic
  */
 
-static void set_slot_off(struct controller *ctrl, struct slot * pslot)
+static void set_slot_off(struct controller *ctrl, struct slot *pslot)
 {
 	/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
 	if (POWER_CTRL(ctrl)) {
@@ -376,14 +376,12 @@
 		pciehp_get_power_status(p_slot, &getstatus);
 		if (getstatus) {
 			p_slot->state = BLINKINGOFF_STATE;
-			ctrl_info(ctrl,
-				  "PCI slot #%s - powering off due to button "
-				  "press.\n", slot_name(p_slot));
+			ctrl_info(ctrl, "PCI slot #%s - powering off due to button press\n",
+				  slot_name(p_slot));
 		} else {
 			p_slot->state = BLINKINGON_STATE;
-			ctrl_info(ctrl,
-				  "PCI slot #%s - powering on due to button "
-				  "press.\n", slot_name(p_slot));
+			ctrl_info(ctrl, "PCI slot #%s - powering on due to button press\n",
+				  slot_name(p_slot));
 		}
 		/* blink green LED and turn off amber */
 		pciehp_green_led_blink(p_slot);
@@ -404,8 +402,8 @@
 		else
 			pciehp_green_led_off(p_slot);
 		pciehp_set_attention_status(p_slot, 0);
-		ctrl_info(ctrl, "PCI slot #%s - action canceled "
-			  "due to button press\n", slot_name(p_slot));
+		ctrl_info(ctrl, "PCI slot #%s - action canceled due to button press\n",
+			  slot_name(p_slot));
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 1463412..42914e0 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c

@@ -174,12 +174,10 @@
 			 * event even though it supports none of power
 			 * controller, attention led, power led and EMI.
 			 */
-			ctrl_dbg(ctrl, "Unexpected CMD_COMPLETED. Need to "
-				 "wait for command completed event.\n");
+			ctrl_dbg(ctrl, "Unexpected CMD_COMPLETED. Need to wait for command completed event\n");
 			ctrl->no_cmd_complete = 0;
 		} else {
-			ctrl_dbg(ctrl, "Unexpected CMD_COMPLETED. Maybe "
-				 "the controller is broken.\n");
+			ctrl_dbg(ctrl, "Unexpected CMD_COMPLETED. Maybe the controller is broken\n");
 		}
 	}
 
@@ -203,7 +201,7 @@
 		if (!(slot_ctrl & PCI_EXP_SLTCTL_HPIE) ||
 		    !(slot_ctrl & PCI_EXP_SLTCTL_CCIE))
 			poll = 1;
-                pcie_wait_cmd(ctrl, poll);
+		pcie_wait_cmd(ctrl, poll);
 	}
 	mutex_unlock(&ctrl->ctrl_lock);
 }
@@ -276,15 +274,15 @@
 	bool found;
 	u16 lnk_status;
 
-        /*
-         * Data Link Layer Link Active Reporting must be capable for
-         * hot-plug capable downstream port. But old controller might
-         * not implement it. In this case, we wait for 1000 ms.
-         */
-        if (ctrl->link_active_reporting)
-                pcie_wait_link_active(ctrl);
-        else
-                msleep(1000);
+	/*
+	 * Data Link Layer Link Active Reporting must be capable for
+	 * hot-plug capable downstream port. But old controller might
+	 * not implement it. In this case, we wait for 1000 ms.
+	*/
+	if (ctrl->link_active_reporting)
+		pcie_wait_link_active(ctrl);
+	else
+		msleep(1000);
 
 	/* wait 100ms before read pci conf, and try in 1s */
 	msleep(100);
@@ -295,7 +293,7 @@
 	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
 	if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
 	    !(lnk_status & PCI_EXP_LNKSTA_NLW)) {
-		ctrl_err(ctrl, "Link Training Error occurs \n");
+		ctrl_err(ctrl, "Link Training Error occurs\n");
 		return -1;
 	}
 
@@ -414,7 +412,7 @@
 		return;
 
 	switch (value) {
-	case 0 :	/* turn off */
+	case 0:		/* turn off */
 		slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_OFF;
 		break;
 	case 1:		/* turn on */
@@ -470,7 +468,7 @@
 		 PCI_EXP_SLTCTL_PWR_IND_BLINK);
 }
 
-int pciehp_power_on_slot(struct slot * slot)
+int pciehp_power_on_slot(struct slot *slot)
 {
 	struct controller *ctrl = slot->ctrl;
 	struct pci_dev *pdev = ctrl_dev(ctrl);
@@ -496,7 +494,7 @@
 	return retval;
 }
 
-void pciehp_power_off_slot(struct slot * slot)
+void pciehp_power_off_slot(struct slot *slot)
 {
 	struct controller *ctrl = slot->ctrl;
 
@@ -756,7 +754,7 @@
 	ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
-#define FLAG(x,y)	(((x) & (y)) ? '+' : '-')
+#define FLAG(x, y)	(((x) & (y)) ? '+' : '-')
 
 struct controller *pcie_init(struct pcie_device *dev)
 {
@@ -783,14 +781,14 @@
 	 */
 	if (NO_CMD_CMPL(ctrl) ||
 	    !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl)))
-	    ctrl->no_cmd_complete = 1;
+		ctrl->no_cmd_complete = 1;
 
-        /* Check if Data Link Layer Link Active Reporting is implemented */
-        pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
-        if (link_cap & PCI_EXP_LNKCAP_DLLLARC) {
-                ctrl_dbg(ctrl, "Link Active Reporting supported\n");
-                ctrl->link_active_reporting = 1;
-        }
+	/* Check if Data Link Layer Link Active Reporting is implemented */
+	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
+	if (link_cap & PCI_EXP_LNKCAP_DLLLARC) {
+		ctrl_dbg(ctrl, "Link Active Reporting supported\n");
+		ctrl->link_active_reporting = 1;
+	}
 
 	/* Clear all remaining event bits in Slot Status register */
 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,

diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index b6cb1df..5f871f4 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c

@@ -46,9 +46,8 @@
 
 	dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
 	if (dev) {
-		ctrl_err(ctrl, "Device %s already exists "
-			 "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
-			 pci_domain_nr(parent), parent->number);
+		ctrl_err(ctrl, "Device %s already exists at %04x:%02x:00, cannot hot-add\n",
+			 pci_name(dev), pci_domain_nr(parent), parent->number);
 		pci_dev_put(dev);
 		ret = -EEXIST;
 		goto out;

diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c
index ac69094..d062c00 100644
--- a/drivers/pci/hotplug/pcihp_skeleton.c
+++ b/drivers/pci/hotplug/pcihp_skeleton.c

@@ -51,7 +51,7 @@
 #define dbg(format, arg...)					\
 	do {							\
 		if (debug)					\
-			printk (KERN_DEBUG "%s: " format "\n",	\
+			printk(KERN_DEBUG "%s: " format "\n",	\
 				MY_NAME , ## arg);		\
 	} while (0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
@@ -128,18 +128,18 @@
 	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
 
 	switch (status) {
-		case 0:
-			/*
-			 * Fill in code here to turn light off
-			 */
-			break;
+	case 0:
+		/*
+		 * Fill in code here to turn light off
+		 */
+		break;
 
-		case 1:
-		default:
-			/*
-			 * Fill in code here to turn light on
-			 */
-			break;
+	case 1:
+	default:
+		/*
+		 * Fill in code here to turn light on
+		 */
+		break;
 	}
 
 	return retval;
@@ -153,12 +153,12 @@
 	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
 
 	switch (value) {
-		case 0:
-			/* Specify a test here */
-			break;
-		case 1:
-			/* Specify another test here */
-			break;
+	case 0:
+		/* Specify a test here */
+		break;
+	case 1:
+		/* Specify another test here */
+		break;
 	}
 
 	return retval;

diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 984d708..93aa29f 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c

@@ -39,6 +39,7 @@
 
 bool rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
+EXPORT_SYMBOL_GPL(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
 #define DRIVER_AUTHOR	"Linda Xie <lxie@us.ibm.com>"
@@ -88,7 +89,7 @@
  * @hotplug_slot: slot to get status
  * @value: pointer to store status
  */
-static int get_power_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int retval, level;
 	struct slot *slot = (struct slot *)hotplug_slot->private;
@@ -104,14 +105,14 @@
  * @hotplug_slot: slot to get status
  * @value: pointer to store status
  */
-static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct slot *slot = (struct slot *)hotplug_slot->private;
 	*value = slot->hotplug_slot->info->attention_status;
 	return 0;
 }
 
-static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 * value)
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct slot *slot = (struct slot *)hotplug_slot->private;
 	int rc, state;
@@ -241,6 +242,7 @@
 
 	return -EINVAL;
 }
+EXPORT_SYMBOL_GPL(rpaphp_get_drc_props);
 
 static int is_php_type(char *drc_type)
 {
@@ -350,6 +352,7 @@
 	/* XXX FIXME: reports a failure only if last entry in loop failed */
 	return retval;
 }
+EXPORT_SYMBOL_GPL(rpaphp_add_slot);
 
 static void __exit cleanup_slots(void)
 {
@@ -443,7 +446,3 @@
 
 module_init(rpaphp_init);
 module_exit(rpaphp_exit);
-
-EXPORT_SYMBOL_GPL(rpaphp_add_slot);
-EXPORT_SYMBOL_GPL(rpaphp_slot_head);
-EXPORT_SYMBOL_GPL(rpaphp_get_drc_props);

diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index 613043f..bada2099 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c

@@ -188,7 +188,7 @@
 	return 0;
 }
 
-static struct hotplug_slot * sn_hp_destroy(void)
+static struct hotplug_slot *sn_hp_destroy(void)
 {
 	struct slot *slot;
 	struct pci_slot *pci_slot;
@@ -250,15 +250,13 @@
 	}
 
 	if (rc == PCI_L1_ERR) {
-		dev_dbg(&slot->pci_bus->self->dev,
-			"L1 failure %d with message: %s",
+		dev_dbg(&slot->pci_bus->self->dev, "L1 failure %d with message: %s",
 			resp.resp_sub_errno, resp.resp_l1_msg);
 		return -EPERM;
 	}
 
 	if (rc) {
-		dev_dbg(&slot->pci_bus->self->dev,
-			"insert failed with error %d sub-error %d\n",
+		dev_dbg(&slot->pci_bus->self->dev, "insert failed with error %d sub-error %d\n",
 			rc, resp.resp_sub_errno);
 		return -EIO;
 	}
@@ -288,21 +286,18 @@
 	}
 
 	if ((action == PCI_REQ_SLOT_ELIGIBLE) && (rc == PCI_EMPTY_33MHZ)) {
-		dev_dbg(&slot->pci_bus->self->dev,
-			"Cannot remove last 33MHz card\n");
+		dev_dbg(&slot->pci_bus->self->dev, "Cannot remove last 33MHz card\n");
 		return -EPERM;
 	}
 
 	if ((action == PCI_REQ_SLOT_ELIGIBLE) && (rc == PCI_L1_ERR)) {
-		dev_dbg(&slot->pci_bus->self->dev,
-			"L1 failure %d with message \n%s\n",
+		dev_dbg(&slot->pci_bus->self->dev, "L1 failure %d with message \n%s\n",
 			resp.resp_sub_errno, resp.resp_l1_msg);
 		return -EPERM;
 	}
 
 	if ((action == PCI_REQ_SLOT_ELIGIBLE) && rc) {
-		dev_dbg(&slot->pci_bus->self->dev,
-			"remove failed with error %d sub-error %d\n",
+		dev_dbg(&slot->pci_bus->self->dev, "remove failed with error %d sub-error %d\n",
 			rc, resp.resp_sub_errno);
 		return -EIO;
 	}
@@ -417,8 +412,7 @@
 		phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
 		if (acpi_bus_get_device(phandle, &pdevice)) {
-			dev_dbg(&slot->pci_bus->self->dev,
-				"no parent device, assuming NULL\n");
+			dev_dbg(&slot->pci_bus->self->dev, "no parent device, assuming NULL\n");
 			pdevice = NULL;
 		}
 
@@ -447,10 +441,8 @@
 
 				ret = acpi_bus_scan(chandle);
 				if (ACPI_FAILURE(ret)) {
-					printk(KERN_ERR "%s: acpi_bus_scan "
-					       "failed (0x%x) for slot %d "
-					       "func %d\n", __func__,
-					       ret, (int)(adr>>16),
+					printk(KERN_ERR "%s: acpi_bus_scan failed (0x%x) for slot %d func %d\n",
+					       __func__, ret, (int)(adr>>16),
 					       (int)(adr&0xffff));
 					/* try to continue on */
 				}
@@ -471,11 +463,9 @@
 	mutex_unlock(&sn_hotplug_mutex);
 
 	if (rc == 0)
-		dev_dbg(&slot->pci_bus->self->dev,
-			"insert operation successful\n");
+		dev_dbg(&slot->pci_bus->self->dev, "insert operation successful\n");
 	else
-		dev_dbg(&slot->pci_bus->self->dev,
-			"insert operation failed rc = %d\n", rc);
+		dev_dbg(&slot->pci_bus->self->dev, "insert operation failed rc = %d\n", rc);
 
 	return rc;
 }
@@ -561,8 +551,7 @@
 		acpi_status ret;
 		ret = acpi_unload_table_id(ssdt_id);
 		if (ACPI_FAILURE(ret)) {
-			printk(KERN_ERR "%s: acpi_unload_table_id "
-			       "failed (0x%x) for id %d\n",
+			printk(KERN_ERR "%s: acpi_unload_table_id failed (0x%x) for id %d\n",
 			       __func__, ret, ssdt_id);
 			/* try to continue on */
 		}

diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 6152909..5897d51 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h

@@ -180,7 +180,7 @@
 int shpchp_unconfigure_device(struct slot *p_slot);
 void cleanup_slots(struct controller *ctrl);
 void shpchp_queue_pushbutton_work(struct work_struct *work);
-int shpc_init( struct controller *ctrl, struct pci_dev *pdev);
+int shpc_init(struct controller *ctrl, struct pci_dev *pdev);
 
 static inline const char *slot_name(struct slot *slot)
 {
@@ -295,7 +295,7 @@
 		pci_write_config_dword(p_slot->ctrl->pci_dev, PCIX_MEM_BASE_LIMIT_OFFSET, rse_set);
 	}
 	/* restore MiscII register */
-	pci_read_config_dword( p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, &pcix_misc2_temp );
+	pci_read_config_dword(p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, &pcix_misc2_temp );
 
 	if (p_slot->ctrl->pcix_misc2_reg & SERRFATALENABLE_MASK)
 		pcix_misc2_temp |= SERRFATALENABLE_MASK;

diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index faf13ab..294ef4b 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c

@@ -143,8 +143,7 @@
 		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
 		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
 
-		ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
-			 "hp_slot=%x sun=%x slot_device_offset=%x\n",
+		ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x hp_slot=%x sun=%x slot_device_offset=%x\n",
 			 pci_domain_nr(ctrl->pci_dev->subordinate),
 			 slot->bus, slot->device, slot->hp_slot, slot->number,
 			 ctrl->slot_device_offset);

diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
index 6efc2ec..a81fb67 100644
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c

@@ -162,7 +162,7 @@
 
 	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
 
-	if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
+	if (!(p_slot->hpc_ops->query_power_fault(p_slot))) {
 		/*
 		 * Power fault Cleared
 		 */
@@ -196,8 +196,8 @@
 
 	ctrl_dbg(ctrl, "Change speed to %d\n", speed);
 	if ((rc = p_slot->hpc_ops->set_bus_speed_mode(p_slot, speed))) {
-		ctrl_err(ctrl, "%s: Issue of set bus speed mode command "
-			 "failed\n", __func__);
+		ctrl_err(ctrl, "%s: Issue of set bus speed mode command failed\n",
+			 __func__);
 		return WRONG_BUS_FREQUENCY;
 	}
 	return rc;
@@ -215,8 +215,8 @@
 	 */
 	if (flag) {
 		if (asp < bsp) {
-			ctrl_err(ctrl, "Speed of bus %x and adapter %x "
-				 "mismatch\n", bsp, asp);
+			ctrl_err(ctrl, "Speed of bus %x and adapter %x mismatch\n",
+				 bsp, asp);
 			rc = WRONG_BUS_FREQUENCY;
 		}
 		return rc;
@@ -250,8 +250,7 @@
 
 	hp_slot = p_slot->device - ctrl->slot_device_offset;
 
-	ctrl_dbg(ctrl,
-		 "%s: p_slot->device, slot_offset, hp_slot = %d, %d ,%d\n",
+	ctrl_dbg(ctrl, "%s: p_slot->device, slot_offset, hp_slot = %d, %d ,%d\n",
 		 __func__, p_slot->device, ctrl->slot_device_offset, hp_slot);
 
 	/* Power on slot without connecting to bus */
@@ -263,8 +262,8 @@
 
 	if ((ctrl->pci_dev->vendor == 0x8086) && (ctrl->pci_dev->device == 0x0332)) {
 		if ((rc = p_slot->hpc_ops->set_bus_speed_mode(p_slot, PCI_SPEED_33MHz))) {
-			ctrl_err(ctrl, "%s: Issue of set bus speed mode command"
-				 " failed\n", __func__);
+			ctrl_err(ctrl, "%s: Issue of set bus speed mode command failed\n",
+				 __func__);
 			return WRONG_BUS_FREQUENCY;
 		}
 
@@ -277,8 +276,7 @@
 
 	rc = p_slot->hpc_ops->get_adapter_speed(p_slot, &asp);
 	if (rc) {
-		ctrl_err(ctrl, "Can't get adapter speed or "
-			 "bus mode mismatch\n");
+		ctrl_err(ctrl, "Can't get adapter speed or bus mode mismatch\n");
 		return WRONG_BUS_FREQUENCY;
 	}
 
@@ -289,8 +287,8 @@
 	if (!list_empty(&ctrl->pci_dev->subordinate->devices))
 		slots_not_empty = 1;
 
-	ctrl_dbg(ctrl, "%s: slots_not_empty %d, adapter_speed %d, bus_speed %d,"
-		 " max_bus_speed %d\n", __func__, slots_not_empty, asp,
+	ctrl_dbg(ctrl, "%s: slots_not_empty %d, adapter_speed %d, bus_speed %d, max_bus_speed %d\n",
+		 __func__, slots_not_empty, asp,
 		 bsp, msp);
 
 	rc = fix_bus_speed(ctrl, p_slot, slots_not_empty, asp, bsp, msp);
@@ -490,12 +488,12 @@
 		p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (getstatus) {
 			p_slot->state = BLINKINGOFF_STATE;
-			ctrl_info(ctrl, "PCI slot #%s - powering off due to "
-				  "button press.\n", slot_name(p_slot));
+			ctrl_info(ctrl, "PCI slot #%s - powering off due to button press\n",
+				  slot_name(p_slot));
 		} else {
 			p_slot->state = BLINKINGON_STATE;
-			ctrl_info(ctrl, "PCI slot #%s - powering on due to "
-				  "button press.\n", slot_name(p_slot));
+			ctrl_info(ctrl, "PCI slot #%s - powering on due to button press\n",
+				  slot_name(p_slot));
 		}
 		/* blink green LED and turn off amber */
 		p_slot->hpc_ops->green_led_blink(p_slot);
@@ -518,8 +516,8 @@
 		else
 			p_slot->hpc_ops->green_led_off(p_slot);
 		p_slot->hpc_ops->set_attention_status(p_slot, 0);
-		ctrl_info(ctrl, "PCI slot #%s - action canceled due to "
-			  "button press\n", slot_name(p_slot));
+		ctrl_info(ctrl, "PCI slot #%s - action canceled due to button press\n",
+			  slot_name(p_slot));
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:

diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index 2d7f474..29e2235 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c

@@ -341,8 +341,7 @@
 
 	cmd_status = hpc_check_cmd_status(slot->ctrl);
 	if (cmd_status) {
-		ctrl_err(ctrl,
-			 "Failed to issued command 0x%x (error code = %d)\n",
+		ctrl_err(ctrl, "Failed to issued command 0x%x (error code = %d)\n",
 			 cmd, cmd_status);
 		retval = -EIO;
 	}
@@ -404,7 +403,7 @@
 	return 0;
 }
 
-static int hpc_get_power_status(struct slot * slot, u8 *status)
+static int hpc_get_power_status(struct slot *slot, u8 *status)
 {
 	struct controller *ctrl = slot->ctrl;
 	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
@@ -528,7 +527,7 @@
 	return retval;
 }
 
-static int hpc_query_power_fault(struct slot * slot)
+static int hpc_query_power_fault(struct slot *slot)
 {
 	struct controller *ctrl = slot->ctrl;
 	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
@@ -614,7 +613,7 @@
 	release_mem_region(ctrl->mmio_base, ctrl->mmio_size);
 }
 
-static int hpc_power_on_slot(struct slot * slot)
+static int hpc_power_on_slot(struct slot *slot)
 {
 	int retval;
 
@@ -625,7 +624,7 @@
 	return retval;
 }
 
-static int hpc_slot_enable(struct slot * slot)
+static int hpc_slot_enable(struct slot *slot)
 {
 	int retval;
 
@@ -638,7 +637,7 @@
 	return retval;
 }
 
-static int hpc_slot_disable(struct slot * slot)
+static int hpc_slot_disable(struct slot *slot)
 {
 	int retval;
 
@@ -720,7 +719,7 @@
 }
 
 
-static int hpc_set_bus_speed_mode(struct slot * slot, enum pci_bus_speed value)
+static int hpc_set_bus_speed_mode(struct slot *slot, enum pci_bus_speed value)
 {
 	int retval;
 	struct controller *ctrl = slot->ctrl;
@@ -974,8 +973,8 @@
 		for (i = 0; i < 9 + num_slots; i++) {
 			rc = shpc_indirect_read(ctrl, i, &tempdword);
 			if (rc) {
-				ctrl_err(ctrl,
-					 "Cannot read creg (index = %d)\n", i);
+				ctrl_err(ctrl, "Cannot read creg (index = %d)\n",
+					 i);
 				goto abort;
 			}
 			ctrl_dbg(ctrl, " offset %d: value %x\n", i, tempdword);
@@ -1060,10 +1059,8 @@
 		/* Installs the interrupt handler */
 		rc = pci_enable_msi(pdev);
 		if (rc) {
-			ctrl_info(ctrl,
-				  "Can't get msi for the hotplug controller\n");
-			ctrl_info(ctrl,
-				  "Use INTx for the hotplug controller\n");
+			ctrl_info(ctrl, "Can't get msi for the hotplug controller\n");
+			ctrl_info(ctrl, "Use INTx for the hotplug controller\n");
 		}
 
 		rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED,
@@ -1071,8 +1068,8 @@
 		ctrl_dbg(ctrl, "request_irq %d (returns %d)\n",
 			 ctrl->pci_dev->irq, rc);
 		if (rc) {
-			ctrl_err(ctrl, "Can't get irq %d for the hotplug "
-				 "controller\n", ctrl->pci_dev->irq);
+			ctrl_err(ctrl, "Can't get irq %d for the hotplug controller\n",
+				 ctrl->pci_dev->irq);
 			goto abort_iounmap;
 		}
 	}

diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c
index 9202d13..469454e 100644
--- a/drivers/pci/hotplug/shpchp_pci.c
+++ b/drivers/pci/hotplug/shpchp_pci.c

@@ -46,9 +46,9 @@
 
 	dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (dev) {
-		ctrl_err(ctrl, "Device %s already exists "
-			 "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev),
-			 pci_domain_nr(parent), p_slot->bus, p_slot->device);
+		ctrl_err(ctrl, "Device %s already exists at %04x:%02x:%02x, cannot hot-add\n",
+			 pci_name(dev), pci_domain_nr(parent),
+			 p_slot->bus, p_slot->device);
 		pci_dev_put(dev);
 		ret = -EINVAL;
 		goto out;

diff --git a/drivers/pci/hotplug/shpchp_sysfs.c b/drivers/pci/hotplug/shpchp_sysfs.c
index e8c31fe..52875b3 100644
--- a/drivers/pci/hotplug/shpchp_sysfs.c
+++ b/drivers/pci/hotplug/shpchp_sysfs.c

@@ -38,7 +38,7 @@
 static ssize_t show_ctrl (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev;
-	char * out = buf;
+	char *out = buf;
 	int index, busnr;
 	struct resource *res;
 	struct pci_bus *bus;

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index d68b030..a94dd2c 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c

@@ -102,7 +102,7 @@
 	spin_unlock_irqrestore(&ht_irq_lock, flags);
 
 	max_irq = (data >> 16) & 0xff;
-	if ( idx > max_irq)
+	if (idx > max_irq)
 		return -EINVAL;
 
 	cfg = kmalloc(sizeof(*cfg), GFP_KERNEL);
@@ -131,6 +131,7 @@
 
 	return irq;
 }
+EXPORT_SYMBOL(__ht_create_irq);
 
 /**
  * ht_create_irq - create an irq and attach it to a device.
@@ -146,6 +147,7 @@
 {
 	return __ht_create_irq(dev, idx, NULL);
 }
+EXPORT_SYMBOL(ht_create_irq);
 
 /**
  * ht_destroy_irq - destroy an irq created with ht_create_irq
@@ -165,7 +167,4 @@
 
 	kfree(cfg);
 }
-
-EXPORT_SYMBOL(__ht_create_irq);
-EXPORT_SYMBOL(ht_create_irq);
 EXPORT_SYMBOL(ht_destroy_irq);

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 27a7e67..13f3d30 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c

@@ -413,7 +413,7 @@
 	if (dev->msi_irq_groups) {
 		sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
 		msi_attrs = dev->msi_irq_groups[0]->attrs;
-		list_for_each_entry(entry, &dev->msi_list, list) {
+		while (msi_attrs[count]) {
 			dev_attr = container_of(msi_attrs[count],
 						struct device_attribute, attr);
 			kfree(dev_attr->attr.name);
@@ -980,8 +980,7 @@
 
 	/* Check whether driver already requested for MSI irq */
 	if (dev->msi_enabled) {
-		dev_info(&dev->dev, "can't enable MSI-X "
-		       "(MSI IRQ already assigned)\n");
+		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
 		return -EINVAL;
 	}
 	status = msix_capability_init(dev, entries, nvec);

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 837d71f..3f8e3db 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c

@@ -77,6 +77,7 @@
 
 	return retval;
 }
+EXPORT_SYMBOL_GPL(pci_add_dynid);
 
 static void pci_free_dynids(struct pci_driver *drv)
 {
@@ -98,15 +99,15 @@
  *
  * Allow PCI IDs to be added to an existing driver via sysfs.
  */
-static ssize_t
-store_new_id(struct device_driver *driver, const char *buf, size_t count)
+static ssize_t store_new_id(struct device_driver *driver, const char *buf,
+			    size_t count)
 {
 	struct pci_driver *pdrv = to_pci_driver(driver);
 	const struct pci_device_id *ids = pdrv->id_table;
-	__u32 vendor, device, subvendor=PCI_ANY_ID,
-		subdevice=PCI_ANY_ID, class=0, class_mask=0;
-	unsigned long driver_data=0;
-	int fields=0;
+	__u32 vendor, device, subvendor = PCI_ANY_ID,
+		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+	unsigned long driver_data = 0;
+	int fields = 0;
 	int retval = 0;
 
 	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
@@ -166,8 +167,8 @@
  *
  * Removes a dynamic pci device ID to this driver.
  */
-static ssize_t
-store_remove_id(struct device_driver *driver, const char *buf, size_t count)
+static ssize_t store_remove_id(struct device_driver *driver, const char *buf,
+			       size_t count)
 {
 	struct pci_dynid *dynid, *n;
 	struct pci_driver *pdrv = to_pci_driver(driver);
@@ -235,6 +236,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(pci_match_id);
 
 static const struct pci_device_id pci_device_id_any = {
 	.vendor = PCI_ANY_ID,
@@ -372,8 +374,7 @@
  * returns 0 on success, else error.
  * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
  */
-static int
-__pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
+static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
 {
 	const struct pci_device_id *id;
 	int error = 0;
@@ -390,7 +391,7 @@
 	return error;
 }
 
-static int pci_device_probe(struct device * dev)
+static int pci_device_probe(struct device *dev)
 {
 	int error = 0;
 	struct pci_driver *drv;
@@ -406,10 +407,10 @@
 	return error;
 }
 
-static int pci_device_remove(struct device * dev)
+static int pci_device_remove(struct device *dev)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
 
 	if (drv) {
 		if (drv->remove) {
@@ -537,8 +538,8 @@
 
 static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
 
 	if (drv && drv->suspend) {
 		pci_power_t prev = pci_dev->current_state;
@@ -564,8 +565,8 @@
 
 static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
 
 	if (drv && drv->suspend_late) {
 		pci_power_t prev = pci_dev->current_state;
@@ -595,8 +596,8 @@
 
 static int pci_legacy_resume_early(struct device *dev)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
 
 	return drv && drv->resume_early ?
 			drv->resume_early(pci_dev) : 0;
@@ -604,8 +605,8 @@
 
 static int pci_legacy_resume(struct device *dev)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
 
 	pci_fixup_device(pci_fixup_resume, pci_dev);
 
@@ -1255,6 +1256,7 @@
 	/* register with core */
 	return driver_register(&drv->driver);
 }
+EXPORT_SYMBOL(__pci_register_driver);
 
 /**
  * pci_unregister_driver - unregister a pci driver
@@ -1266,12 +1268,12 @@
  * driverless.
  */
 
-void
-pci_unregister_driver(struct pci_driver *drv)
+void pci_unregister_driver(struct pci_driver *drv)
 {
 	driver_unregister(&drv->driver);
 	pci_free_dynids(drv);
 }
+EXPORT_SYMBOL(pci_unregister_driver);
 
 static struct pci_driver pci_compat_driver = {
 	.name = "compat"
@@ -1284,19 +1286,19 @@
  * Returns the appropriate pci_driver structure or %NULL if there is no
  * registered driver for the device.
  */
-struct pci_driver *
-pci_dev_driver(const struct pci_dev *dev)
+struct pci_driver *pci_dev_driver(const struct pci_dev *dev)
 {
 	if (dev->driver)
 		return dev->driver;
 	else {
 		int i;
-		for(i=0; i<=PCI_ROM_RESOURCE; i++)
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++)
 			if (dev->resource[i].flags & IORESOURCE_BUSY)
 				return &pci_compat_driver;
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(pci_dev_driver);
 
 /**
  * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure
@@ -1342,6 +1344,7 @@
 		get_device(&dev->dev);
 	return dev;
 }
+EXPORT_SYMBOL(pci_dev_get);
 
 /**
  * pci_dev_put - release a use of the pci device structure
@@ -1355,6 +1358,7 @@
 	if (dev)
 		put_device(&dev->dev);
 }
+EXPORT_SYMBOL(pci_dev_put);
 
 static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
@@ -1400,19 +1404,10 @@
 	.drv_groups	= pci_drv_groups,
 	.pm		= PCI_PM_OPS_PTR,
 };
+EXPORT_SYMBOL(pci_bus_type);
 
 static int __init pci_driver_init(void)
 {
 	return bus_register(&pci_bus_type);
 }
-
 postcore_initcall(pci_driver_init);
-
-EXPORT_SYMBOL_GPL(pci_add_dynid);
-EXPORT_SYMBOL(pci_match_id);
-EXPORT_SYMBOL(__pci_register_driver);
-EXPORT_SYMBOL(pci_unregister_driver);
-EXPORT_SYMBOL(pci_dev_driver);
-EXPORT_SYMBOL(pci_bus_type);
-EXPORT_SYMBOL(pci_dev_get);
-EXPORT_SYMBOL(pci_dev_put);

diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 45113da..a3fbe20 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c

@@ -40,9 +40,8 @@
 	SMBIOS_ATTR_INSTANCE_SHOW,
 };
 
-static size_t
-find_smbios_instance_string(struct pci_dev *pdev, char *buf,
-			    enum smbios_attr_enum attribute)
+static size_t find_smbios_instance_string(struct pci_dev *pdev, char *buf,
+					  enum smbios_attr_enum attribute)
 {
 	const struct dmi_device *dmi;
 	struct dmi_dev_onboard *donboard;
@@ -74,9 +73,8 @@
 	return 0;
 }
 
-static umode_t
-smbios_instance_string_exist(struct kobject *kobj, struct attribute *attr,
-			     int n)
+static umode_t smbios_instance_string_exist(struct kobject *kobj,
+					    struct attribute *attr, int n)
 {
 	struct device *dev;
 	struct pci_dev *pdev;
@@ -88,8 +86,8 @@
 					   S_IRUGO : 0;
 }
 
-static ssize_t
-smbioslabel_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t smbioslabel_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev;
 	pdev = to_pci_dev(dev);
@@ -98,9 +96,8 @@
 					   SMBIOS_ATTR_LABEL_SHOW);
 }
 
-static ssize_t
-smbiosinstance_show(struct device *dev,
-		    struct device_attribute *attr, char *buf)
+static ssize_t smbiosinstance_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev;
 	pdev = to_pci_dev(dev);
@@ -130,26 +127,22 @@
 	.is_visible = smbios_instance_string_exist,
 };
 
-static int
-pci_create_smbiosname_file(struct pci_dev *pdev)
+static int pci_create_smbiosname_file(struct pci_dev *pdev)
 {
 	return sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group);
 }
 
-static void
-pci_remove_smbiosname_file(struct pci_dev *pdev)
+static void pci_remove_smbiosname_file(struct pci_dev *pdev)
 {
 	sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group);
 }
 #else
-static inline int
-pci_create_smbiosname_file(struct pci_dev *pdev)
+static inline int pci_create_smbiosname_file(struct pci_dev *pdev)
 {
 	return -1;
 }
 
-static inline void
-pci_remove_smbiosname_file(struct pci_dev *pdev)
+static inline void pci_remove_smbiosname_file(struct pci_dev *pdev)
 {
 }
 #endif
@@ -175,8 +168,8 @@
 	buf[len] = '\n';
 }
 
-static int
-dsm_get_label(struct device *dev, char *buf, enum acpi_attr_enum attr)
+static int dsm_get_label(struct device *dev, char *buf,
+			 enum acpi_attr_enum attr)
 {
 	acpi_handle handle;
 	union acpi_object *obj, *tmp;
@@ -212,8 +205,7 @@
 	return len;
 }
 
-static bool
-device_has_dsm(struct device *dev)
+static bool device_has_dsm(struct device *dev)
 {
 	acpi_handle handle;
 
@@ -225,8 +217,8 @@
 				1 << DEVICE_LABEL_DSM);
 }
 
-static umode_t
-acpi_index_string_exist(struct kobject *kobj, struct attribute *attr, int n)
+static umode_t acpi_index_string_exist(struct kobject *kobj,
+				       struct attribute *attr, int n)
 {
 	struct device *dev;
 
@@ -238,14 +230,14 @@
 	return 0;
 }
 
-static ssize_t
-acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t acpilabel_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	return dsm_get_label(dev, buf, ACPI_ATTR_LABEL_SHOW);
 }
 
-static ssize_t
-acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t acpiindex_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	return dsm_get_label(dev, buf, ACPI_ATTR_INDEX_SHOW);
 }
@@ -271,33 +263,28 @@
 	.is_visible = acpi_index_string_exist,
 };
 
-static int
-pci_create_acpi_index_label_files(struct pci_dev *pdev)
+static int pci_create_acpi_index_label_files(struct pci_dev *pdev)
 {
 	return sysfs_create_group(&pdev->dev.kobj, &acpi_attr_group);
 }
 
-static int
-pci_remove_acpi_index_label_files(struct pci_dev *pdev)
+static int pci_remove_acpi_index_label_files(struct pci_dev *pdev)
 {
 	sysfs_remove_group(&pdev->dev.kobj, &acpi_attr_group);
 	return 0;
 }
 #else
-static inline int
-pci_create_acpi_index_label_files(struct pci_dev *pdev)
+static inline int pci_create_acpi_index_label_files(struct pci_dev *pdev)
 {
 	return -1;
 }
 
-static inline int
-pci_remove_acpi_index_label_files(struct pci_dev *pdev)
+static inline int pci_remove_acpi_index_label_files(struct pci_dev *pdev)
 {
 	return -1;
 }
 
-static inline bool
-device_has_dsm(struct device *dev)
+static inline bool device_has_dsm(struct device *dev)
 {
 	return false;
 }

diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
index 2ff7750..886fb35 100644
--- a/drivers/pci/pci-stub.c
+++ b/drivers/pci/pci-stub.c

@@ -55,7 +55,7 @@
 	p = ids;
 	while ((id = strsep(&p, ","))) {
 		unsigned int vendor, device, subvendor = PCI_ANY_ID,
-			subdevice = PCI_ANY_ID, class=0, class_mask=0;
+			subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
 		int fields;
 
 		if (!strlen(id))

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 84c3509..9ff0a90 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c

@@ -41,8 +41,8 @@
 {									\
 	struct pci_dev *pdev;						\
 									\
-	pdev = to_pci_dev (dev);					\
-	return sprintf (buf, format_string, pdev->field);		\
+	pdev = to_pci_dev(dev);						\
+	return sprintf(buf, format_string, pdev->field);		\
 }									\
 static DEVICE_ATTR_RO(field)
 
@@ -58,7 +58,7 @@
 					 char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	return sprintf (buf, "%u\n", pdev->broken_parity_status);
+	return sprintf(buf, "%u\n", pdev->broken_parity_status);
 }
 
 static ssize_t broken_parity_status_store(struct device *dev,
@@ -77,10 +77,8 @@
 }
 static DEVICE_ATTR_RW(broken_parity_status);
 
-static ssize_t pci_dev_show_local_cpu(struct device *dev,
-		int type,
-		struct device_attribute *attr,
-		char *buf)
+static ssize_t pci_dev_show_local_cpu(struct device *dev, int type,
+				      struct device_attribute *attr, char *buf)
 {
 	const struct cpumask *mask;
 	int len;
@@ -101,14 +99,14 @@
 }
 
 static ssize_t local_cpus_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	return pci_dev_show_local_cpu(dev, 1, attr, buf);
 }
 static DEVICE_ATTR_RO(local_cpus);
 
 static ssize_t local_cpulist_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
+				  struct device_attribute *attr, char *buf)
 {
 	return pci_dev_show_local_cpu(dev, 0, attr, buf);
 }
@@ -117,8 +115,7 @@
 /*
  * PCI Bus Class Devices
  */
-static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
-					int type,
+static ssize_t pci_bus_show_cpuaffinity(struct device *dev, int type,
 					struct device_attribute *attr,
 					char *buf)
 {
@@ -149,11 +146,11 @@
 static DEVICE_ATTR_RO(cpulistaffinity);
 
 /* show resources */
-static ssize_t
-resource_show(struct device * dev, struct device_attribute *attr, char * buf)
+static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	char * str = buf;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	char *str = buf;
 	int i;
 	int max;
 	resource_size_t start, end;
@@ -166,7 +163,7 @@
 	for (i = 0; i < max; i++) {
 		struct resource *res =  &pci_dev->resource[i];
 		pci_resource_to_user(pci_dev, i, res, &start, &end);
-		str += sprintf(str,"0x%016llx 0x%016llx 0x%016llx\n",
+		str += sprintf(str, "0x%016llx 0x%016llx 0x%016llx\n",
 			       (unsigned long long)start,
 			       (unsigned long long)end,
 			       (unsigned long long)res->flags);
@@ -175,7 +172,8 @@
 }
 static DEVICE_ATTR_RO(resource);
 
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 
@@ -187,9 +185,8 @@
 }
 static DEVICE_ATTR_RO(modalias);
 
-static ssize_t enabled_store(struct device *dev,
-			     struct device_attribute *attr, const char *buf,
-			     size_t count)
+static ssize_t enabled_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long val;
@@ -213,57 +210,56 @@
 	return result < 0 ? result : count;
 }
 
-static ssize_t enabled_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+static ssize_t enabled_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
 {
 	struct pci_dev *pdev;
 
-	pdev = to_pci_dev (dev);
-	return sprintf (buf, "%u\n", atomic_read(&pdev->enable_cnt));
+	pdev = to_pci_dev(dev);
+	return sprintf(buf, "%u\n", atomic_read(&pdev->enable_cnt));
 }
 static DEVICE_ATTR_RW(enabled);
 
 #ifdef CONFIG_NUMA
-static ssize_t
-numa_node_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
 {
-	return sprintf (buf, "%d\n", dev->numa_node);
+	return sprintf(buf, "%d\n", dev->numa_node);
 }
 static DEVICE_ATTR_RO(numa_node);
 #endif
 
-static ssize_t
-dma_mask_bits_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t dma_mask_bits_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	return sprintf (buf, "%d\n", fls64(pdev->dma_mask));
+	return sprintf(buf, "%d\n", fls64(pdev->dma_mask));
 }
 static DEVICE_ATTR_RO(dma_mask_bits);
 
-static ssize_t
-consistent_dma_mask_bits_show(struct device *dev, struct device_attribute *attr,
-				 char *buf)
+static ssize_t consistent_dma_mask_bits_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
 {
-	return sprintf (buf, "%d\n", fls64(dev->coherent_dma_mask));
+	return sprintf(buf, "%d\n", fls64(dev->coherent_dma_mask));
 }
 static DEVICE_ATTR_RO(consistent_dma_mask_bits);
 
-static ssize_t
-msi_bus_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t msi_bus_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
 	if (!pdev->subordinate)
 		return 0;
 
-	return sprintf (buf, "%u\n",
-			!(pdev->subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI));
+	return sprintf(buf, "%u\n",
+		       !(pdev->subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI));
 }
 
-static ssize_t
-msi_bus_store(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
+static ssize_t msi_bus_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long val;
@@ -290,8 +286,8 @@
 	    !!val) {
 		pdev->subordinate->bus_flags ^= PCI_BUS_FLAGS_NO_MSI;
 
-		dev_warn(&pdev->dev, "forced subordinate bus to%s support MSI,"
-			 " bad things could happen\n", val ? "" : " not");
+		dev_warn(&pdev->dev, "forced subordinate bus to%s support MSI, bad things could happen\n",
+			 val ? "" : " not");
 	}
 
 	return count;
@@ -331,9 +327,9 @@
 	NULL,
 };
 
-static ssize_t
-dev_rescan_store(struct device *dev, struct device_attribute *attr,
-		 const char *buf, size_t count)
+static ssize_t dev_rescan_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
 {
 	unsigned long val;
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -352,9 +348,8 @@
 							(S_IWUSR|S_IWGRP),
 							NULL, dev_rescan_store);
 
-static ssize_t
-remove_store(struct device *dev, struct device_attribute *attr,
-	     const char *buf, size_t count)
+static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
 	unsigned long val;
 
@@ -369,9 +364,9 @@
 							(S_IWUSR|S_IWGRP),
 							NULL, remove_store);
 
-static ssize_t
-dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
-		 const char *buf, size_t count)
+static ssize_t dev_bus_rescan_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
 {
 	unsigned long val;
 	struct pci_bus *bus = to_pci_bus(dev);
@@ -412,7 +407,7 @@
 				   struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	return sprintf (buf, "%u\n", pdev->d3cold_allowed);
+	return sprintf(buf, "%u\n", pdev->d3cold_allowed);
 }
 static DEVICE_ATTR_RW(d3cold_allowed);
 #endif
@@ -607,8 +602,8 @@
 	NULL,
 };
 
-static ssize_t
-boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *vga_dev = vga_default_device();
@@ -622,22 +617,21 @@
 }
 static struct device_attribute vga_attr = __ATTR_RO(boot_vga);
 
-static ssize_t
-pci_read_config(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t off, size_t count)
+static ssize_t pci_read_config(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
 {
-	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+	struct pci_dev *dev = to_pci_dev(container_of(kobj, struct device,
+						      kobj));
 	unsigned int size = 64;
 	loff_t init_off = off;
-	u8 *data = (u8*) buf;
+	u8 *data = (u8 *) buf;
 
 	/* Several chips lock up trying to read undefined config space */
-	if (security_capable(filp->f_cred, &init_user_ns, CAP_SYS_ADMIN) == 0) {
+	if (security_capable(filp->f_cred, &init_user_ns, CAP_SYS_ADMIN) == 0)
 		size = dev->cfg_size;
-	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
+	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
-	}
 
 	if (off > size)
 		return 0;
@@ -700,15 +694,15 @@
 	return count;
 }
 
-static ssize_t
-pci_write_config(struct file* filp, struct kobject *kobj,
-		 struct bin_attribute *bin_attr,
-		 char *buf, loff_t off, size_t count)
+static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
 {
-	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+	struct pci_dev *dev = to_pci_dev(container_of(kobj, struct device,
+						      kobj));
 	unsigned int size = count;
 	loff_t init_off = off;
-	u8 *data = (u8*) buf;
+	u8 *data = (u8 *) buf;
 
 	if (off > dev->cfg_size)
 		return 0;
@@ -728,10 +722,10 @@
 	if ((off & 3) && size > 2) {
 		u16 val = data[off - init_off];
 		val |= (u16) data[off - init_off + 1] << 8;
-                pci_user_write_config_word(dev, off, val);
-                off += 2;
-                size -= 2;
-        }
+		pci_user_write_config_word(dev, off, val);
+		off += 2;
+		size -= 2;
+	}
 
 	while (size > 3) {
 		u32 val = data[off - init_off];
@@ -762,10 +756,9 @@
 	return count;
 }
 
-static ssize_t
-read_vpd_attr(struct file *filp, struct kobject *kobj,
-	      struct bin_attribute *bin_attr,
-	      char *buf, loff_t off, size_t count)
+static ssize_t read_vpd_attr(struct file *filp, struct kobject *kobj,
+			     struct bin_attribute *bin_attr, char *buf,
+			     loff_t off, size_t count)
 {
 	struct pci_dev *dev =
 		to_pci_dev(container_of(kobj, struct device, kobj));
@@ -778,10 +771,9 @@
 	return pci_read_vpd(dev, off, count, buf);
 }
 
-static ssize_t
-write_vpd_attr(struct file *filp, struct kobject *kobj,
-	       struct bin_attribute *bin_attr,
-	       char *buf, loff_t off, size_t count)
+static ssize_t write_vpd_attr(struct file *filp, struct kobject *kobj,
+			      struct bin_attribute *bin_attr, char *buf,
+			      loff_t off, size_t count)
 {
 	struct pci_dev *dev =
 		to_pci_dev(container_of(kobj, struct device, kobj));
@@ -807,20 +799,18 @@
  * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_read).
  */
-static ssize_t
-pci_read_legacy_io(struct file *filp, struct kobject *kobj,
-		   struct bin_attribute *bin_attr,
-		   char *buf, loff_t off, size_t count)
+static ssize_t pci_read_legacy_io(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *bin_attr, char *buf,
+				  loff_t off, size_t count)
 {
-        struct pci_bus *bus = to_pci_bus(container_of(kobj,
-                                                      struct device,
+	struct pci_bus *bus = to_pci_bus(container_of(kobj, struct device,
 						      kobj));
 
-        /* Only support 1, 2 or 4 byte accesses */
-        if (count != 1 && count != 2 && count != 4)
-                return -EINVAL;
+	/* Only support 1, 2 or 4 byte accesses */
+	if (count != 1 && count != 2 && count != 4)
+		return -EINVAL;
 
-        return pci_legacy_read(bus, off, (u32 *)buf, count);
+	return pci_legacy_read(bus, off, (u32 *)buf, count);
 }
 
 /**
@@ -835,19 +825,18 @@
  * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_write).
  */
-static ssize_t
-pci_write_legacy_io(struct file *filp, struct kobject *kobj,
-		    struct bin_attribute *bin_attr,
-		    char *buf, loff_t off, size_t count)
+static ssize_t pci_write_legacy_io(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *bin_attr, char *buf,
+				   loff_t off, size_t count)
 {
-        struct pci_bus *bus = to_pci_bus(container_of(kobj,
-						      struct device,
+	struct pci_bus *bus = to_pci_bus(container_of(kobj, struct device,
 						      kobj));
-        /* Only support 1, 2 or 4 byte accesses */
-        if (count != 1 && count != 2 && count != 4)
-                return -EINVAL;
 
-        return pci_legacy_write(bus, off, *(u32 *)buf, count);
+	/* Only support 1, 2 or 4 byte accesses */
+	if (count != 1 && count != 2 && count != 4)
+		return -EINVAL;
+
+	return pci_legacy_write(bus, off, *(u32 *)buf, count);
 }
 
 /**
@@ -861,16 +850,14 @@
  * legacy memory space (first meg of bus space) into application virtual
  * memory space.
  */
-static int
-pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj,
-		    struct bin_attribute *attr,
-                    struct vm_area_struct *vma)
+static int pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *attr,
+			       struct vm_area_struct *vma)
 {
-        struct pci_bus *bus = to_pci_bus(container_of(kobj,
-                                                      struct device,
+	struct pci_bus *bus = to_pci_bus(container_of(kobj, struct device,
 						      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+	return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
 }
 
 /**
@@ -884,16 +871,14 @@
  * legacy IO space (first meg of bus space) into application virtual
  * memory space. Returns -ENOSYS if the operation isn't supported
  */
-static int
-pci_mmap_legacy_io(struct file *filp, struct kobject *kobj,
-		   struct bin_attribute *attr,
-		   struct vm_area_struct *vma)
+static int pci_mmap_legacy_io(struct file *filp, struct kobject *kobj,
+			      struct bin_attribute *attr,
+			      struct vm_area_struct *vma)
 {
-        struct pci_bus *bus = to_pci_bus(container_of(kobj,
-                                                      struct device,
+	struct pci_bus *bus = to_pci_bus(container_of(kobj, struct device,
 						      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+	return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
 }
 
 /**
@@ -903,10 +888,9 @@
  *
  * Stub implementation. Can be overridden by arch if necessary.
  */
-void __weak
-pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
+void __weak pci_adjust_legacy_attr(struct pci_bus *b,
+				   enum pci_mmap_state mmap_type)
 {
-	return;
 }
 
 /**
@@ -961,8 +945,7 @@
 	kfree(b->legacy_io);
 	b->legacy_io = NULL;
 kzalloc_err:
-	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
-	       "and ISA memory resources to sysfs\n");
+	printk(KERN_WARNING "pci: warning: could not create legacy I/O port and ISA memory resources to sysfs\n");
 	return;
 }
 
@@ -1005,9 +988,8 @@
  *
  * Use the regular PCI mapping routines to map a PCI resource into userspace.
  */
-static int
-pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
-		  struct vm_area_struct *vma, int write_combine)
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int write_combine)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
 						       struct device, kobj));
@@ -1023,8 +1005,7 @@
 		return -ENODEV;
 
 	if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) {
-		WARN(1, "process \"%s\" tried to map 0x%08lx bytes "
-			"at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
+		WARN(1, "process \"%s\" tried to map 0x%08lx bytes at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
 			current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
 			pci_name(pdev), i,
 			(u64)pci_resource_start(pdev, i),
@@ -1046,26 +1027,23 @@
 	return pci_mmap_page_range(pdev, vma, mmap_type, write_combine);
 }
 
-static int
-pci_mmap_resource_uc(struct file *filp, struct kobject *kobj,
-		     struct bin_attribute *attr,
-		     struct vm_area_struct *vma)
+static int pci_mmap_resource_uc(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 0);
 }
 
-static int
-pci_mmap_resource_wc(struct file *filp, struct kobject *kobj,
-		     struct bin_attribute *attr,
-		     struct vm_area_struct *vma)
+static int pci_mmap_resource_wc(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 1);
 }
 
-static ssize_t
-pci_resource_io(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr, char *buf,
-		loff_t off, size_t count, bool write)
+static ssize_t pci_resource_io(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *attr, char *buf,
+			       loff_t off, size_t count, bool write)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
 						       struct device, kobj));
@@ -1110,18 +1088,16 @@
 	return -EINVAL;
 }
 
-static ssize_t
-pci_read_resource_io(struct file *filp, struct kobject *kobj,
-		     struct bin_attribute *attr, char *buf,
-		     loff_t off, size_t count)
+static ssize_t pci_read_resource_io(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr, char *buf,
+				    loff_t off, size_t count)
 {
 	return pci_resource_io(filp, kobj, attr, buf, off, count, false);
 }
 
-static ssize_t
-pci_write_resource_io(struct file *filp, struct kobject *kobj,
-		      struct bin_attribute *attr, char *buf,
-		      loff_t off, size_t count)
+static ssize_t pci_write_resource_io(struct file *filp, struct kobject *kobj,
+				     struct bin_attribute *attr, char *buf,
+				     loff_t off, size_t count)
 {
 	return pci_resource_io(filp, kobj, attr, buf, off, count, true);
 }
@@ -1133,8 +1109,7 @@
  * If we created resource files for @pdev, remove them from sysfs and
  * free their resources.
  */
-static void
-pci_remove_resource_files(struct pci_dev *pdev)
+static void pci_remove_resource_files(struct pci_dev *pdev)
 {
 	int i;
 
@@ -1237,10 +1212,9 @@
  *
  * writing anything except 0 enables it
  */
-static ssize_t
-pci_write_rom(struct file *filp, struct kobject *kobj,
-	      struct bin_attribute *bin_attr,
-	      char *buf, loff_t off, size_t count)
+static ssize_t pci_write_rom(struct file *filp, struct kobject *kobj,
+			     struct bin_attribute *bin_attr, char *buf,
+			     loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
 
@@ -1264,10 +1238,9 @@
  * Put @count bytes starting at @off into @buf from the ROM in the PCI
  * device corresponding to @kobj.
  */
-static ssize_t
-pci_read_rom(struct file *filp, struct kobject *kobj,
-	     struct bin_attribute *bin_attr,
-	     char *buf, loff_t off, size_t count)
+static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *bin_attr, char *buf,
+			    loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
 	void __iomem *rom;
@@ -1313,9 +1286,8 @@
 	.write = pci_write_config,
 };
 
-static ssize_t reset_store(struct device *dev,
-			   struct device_attribute *attr, const char *buf,
-			   size_t count)
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long val;
@@ -1382,7 +1354,7 @@
 	return retval;
 }
 
-int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
+int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
 {
 	int retval;
 	int rom_size = 0;
@@ -1520,7 +1492,6 @@
 
 	return 0;
 }
-
 late_initcall(pci_sysfs_init);
 
 static struct attribute *pci_dev_dev_attrs[] = {
@@ -1529,7 +1500,7 @@
 };
 
 static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
-						struct attribute *a, int n)
+					 struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -1548,7 +1519,7 @@
 };
 
 static umode_t pci_dev_hp_attrs_are_visible(struct kobject *kobj,
-						struct attribute *a, int n)
+					    struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -1572,7 +1543,7 @@
 };
 
 static umode_t sriov_attrs_are_visible(struct kobject *kobj,
-					 struct attribute *a, int n)
+				       struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 436a76a..63a54a3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c

@@ -114,7 +114,7 @@
 	max = bus->busn_res.end;
 	list_for_each_entry(tmp, &bus->children, node) {
 		n = pci_bus_max_busnr(tmp);
-		if(n > max)
+		if (n > max)
 			max = n;
 	}
 	return max;
@@ -226,6 +226,7 @@
 
 	return pos;
 }
+EXPORT_SYMBOL(pci_find_capability);
 
 /**
  * pci_bus_find_capability - query for devices' capabilities
@@ -253,6 +254,7 @@
 
 	return pos;
 }
+EXPORT_SYMBOL(pci_bus_find_capability);
 
 /**
  * pci_find_next_ext_capability - Find an extended capability
@@ -403,8 +405,8 @@
  *  For given resource region of given device, return the resource
  *  region of parent bus the given region is contained in.
  */
-struct resource *
-pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
+struct resource *pci_find_parent_resource(const struct pci_dev *dev,
+					  struct resource *res)
 {
 	const struct pci_bus *bus = dev->bus;
 	struct resource *r;
@@ -436,6 +438,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(pci_find_parent_resource);
 
 /**
  * pci_wait_for_pending - wait for @mask bit(s) to clear in status word @pos
@@ -470,8 +473,7 @@
  * Restore the BAR values for a given device, so as to make it
  * accessible by its driver.
  */
-static void
-pci_restore_bars(struct pci_dev *dev)
+static void pci_restore_bars(struct pci_dev *dev)
 {
 	int i;
 
@@ -496,7 +498,7 @@
 }
 
 static inline int platform_pci_set_power_state(struct pci_dev *dev,
-                                                pci_power_t t)
+					       pci_power_t t)
 {
 	return pci_platform_pm ? pci_platform_pm->set_state(dev, t) : -ENOSYS;
 }
@@ -553,8 +555,8 @@
 	 */
 	if (state != PCI_D0 && dev->current_state <= PCI_D3cold
 	    && dev->current_state > state) {
-		dev_err(&dev->dev, "invalid power transition "
-			"(from state %d to %d)\n", dev->current_state, state);
+		dev_err(&dev->dev, "invalid power transition (from state %d to %d)\n",
+			dev->current_state, state);
 		return -EINVAL;
 	}
 
@@ -601,8 +603,8 @@
 	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
 	dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
 	if (dev->current_state != state && printk_ratelimit())
-		dev_info(&dev->dev, "Refused to change power state, "
-			"currently in D%d\n", dev->current_state);
+		dev_info(&dev->dev, "Refused to change power state, currently in D%d\n",
+			 dev->current_state);
 
 	/*
 	 * According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
@@ -846,6 +848,7 @@
 
 	return error;
 }
+EXPORT_SYMBOL(pci_set_power_state);
 
 /**
  * pci_choose_state - Choose the power state of a PCI device
@@ -884,12 +887,10 @@
 	}
 	return PCI_D0;
 }
-
 EXPORT_SYMBOL(pci_choose_state);
 
 #define PCI_EXP_SAVE_REGS	7
 
-
 static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
 						       u16 cap, bool extended)
 {
@@ -1001,8 +1002,7 @@
  * pci_save_state - save the PCI configuration space of a device before suspending
  * @dev: - PCI device that we're dealing with
  */
-int
-pci_save_state(struct pci_dev *dev)
+int pci_save_state(struct pci_dev *dev)
 {
 	int i;
 	/* XXX: 100% dword access ok here? */
@@ -1017,6 +1017,7 @@
 		return i;
 	return 0;
 }
+EXPORT_SYMBOL(pci_save_state);
 
 static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
 				     u32 saved_val, int retry)
@@ -1028,8 +1029,8 @@
 		return;
 
 	for (;;) {
-		dev_dbg(&pdev->dev, "restoring config space at offset "
-			"%#x (was %#x, writing %#x)\n", offset, val, saved_val);
+		dev_dbg(&pdev->dev, "restoring config space at offset %#x (was %#x, writing %#x)\n",
+			offset, val, saved_val);
 		pci_write_config_dword(pdev, offset, saved_val);
 		if (retry-- <= 0)
 			return;
@@ -1087,6 +1088,7 @@
 
 	dev->state_saved = false;
 }
+EXPORT_SYMBOL(pci_restore_state);
 
 struct pci_saved_state {
 	u32 config_space[16];
@@ -1231,6 +1233,7 @@
 		return do_pci_enable_device(dev, (1 << PCI_NUM_RESOURCES) - 1);
 	return 0;
 }
+EXPORT_SYMBOL(pci_reenable_device);
 
 static void pci_enable_bridge(struct pci_dev *dev)
 {
@@ -1305,6 +1308,7 @@
 {
 	return pci_enable_device_flags(dev, IORESOURCE_IO);
 }
+EXPORT_SYMBOL(pci_enable_device_io);
 
 /**
  * pci_enable_device_mem - Initialize a device for use with Memory space
@@ -1318,6 +1322,7 @@
 {
 	return pci_enable_device_flags(dev, IORESOURCE_MEM);
 }
+EXPORT_SYMBOL(pci_enable_device_mem);
 
 /**
  * pci_enable_device - Initialize device before it's used by a driver.
@@ -1334,6 +1339,7 @@
 {
 	return pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO);
 }
+EXPORT_SYMBOL(pci_enable_device);
 
 /*
  * Managed PCI resources.  This manages device on/off, intx/msi/msix
@@ -1416,6 +1422,7 @@
 	}
 	return rc;
 }
+EXPORT_SYMBOL(pcim_enable_device);
 
 /**
  * pcim_pin_device - Pin managed PCI device
@@ -1434,6 +1441,7 @@
 	if (dr)
 		dr->pinned = 1;
 }
+EXPORT_SYMBOL(pcim_pin_device);
 
 /*
  * pcibios_add_device - provide arch specific hooks when adding device dev
@@ -1443,7 +1451,7 @@
  * devices are added. This is the default implementation. Architecture
  * implementations can override this.
  */
-int __weak pcibios_add_device (struct pci_dev *dev)
+int __weak pcibios_add_device(struct pci_dev *dev)
 {
 	return 0;
 }
@@ -1515,8 +1523,7 @@
  * Note we don't actually disable the device until all callers of
  * pci_enable_device() have called pci_disable_device().
  */
-void
-pci_disable_device(struct pci_dev *dev)
+void pci_disable_device(struct pci_dev *dev)
 {
 	struct pci_devres *dr;
 
@@ -1534,6 +1541,7 @@
 
 	dev->is_busmaster = 0;
 }
+EXPORT_SYMBOL(pci_disable_device);
 
 /**
  * pcibios_set_pcie_reset_state - set reset state for device dev
@@ -1562,6 +1570,7 @@
 {
 	return pcibios_set_pcie_reset_state(dev, state);
 }
+EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
 
 /**
  * pci_check_pme_status - Check if given device has generated PME.
@@ -1641,6 +1650,7 @@
 
 	return !!(dev->pme_support & (1 << state));
 }
+EXPORT_SYMBOL(pci_pme_capable);
 
 static void pci_pme_list_scan(struct work_struct *work)
 {
@@ -1745,6 +1755,7 @@
 
 	dev_dbg(&dev->dev, "PME# %s\n", enable ? "enabled" : "disabled");
 }
+EXPORT_SYMBOL(pci_pme_active);
 
 /**
  * __pci_enable_wake - enable PCI device as wakeup event source
@@ -1830,6 +1841,7 @@
 			pci_enable_wake(dev, PCI_D3cold, enable) :
 			pci_enable_wake(dev, PCI_D3hot, enable);
 }
+EXPORT_SYMBOL(pci_wake_from_d3);
 
 /**
  * pci_target_state - find an appropriate low power state for a given PCI dev
@@ -1908,6 +1920,7 @@
 
 	return error;
 }
+EXPORT_SYMBOL(pci_prepare_to_sleep);
 
 /**
  * pci_back_from_sleep - turn PCI device on during system-wide transition into working state
@@ -1920,6 +1933,7 @@
 	pci_enable_wake(dev, PCI_D0, false);
 	return pci_set_power_state(dev, PCI_D0);
 }
+EXPORT_SYMBOL(pci_back_from_sleep);
 
 /**
  * pci_finish_runtime_suspend - Carry out PCI-specific part of runtime suspend.
@@ -2415,8 +2429,7 @@
 	return (((pin - 1) + slot) % 4) + 1;
 }
 
-int
-pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
+int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
 	u8 pin;
 
@@ -2478,6 +2491,7 @@
 	if (dr)
 		dr->region_mask &= ~(1 << bar);
 }
+EXPORT_SYMBOL(pci_release_region);
 
 /**
  *	__pci_request_region - Reserved PCI I/O and memory resource
@@ -2498,8 +2512,8 @@
  *	Returns 0 on success, or %EBUSY on error.  A warning
  *	message is also printed on failure.
  */
-static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_name,
-									int exclusive)
+static int __pci_request_region(struct pci_dev *pdev, int bar,
+				const char *res_name, int exclusive)
 {
 	struct pci_devres *dr;
 
@@ -2510,8 +2524,7 @@
 		if (!request_region(pci_resource_start(pdev, bar),
 			    pci_resource_len(pdev, bar), res_name))
 			goto err_out;
-	}
-	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+	} else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
 		if (!__request_mem_region(pci_resource_start(pdev, bar),
 					pci_resource_len(pdev, bar), res_name,
 					exclusive))
@@ -2548,6 +2561,7 @@
 {
 	return __pci_request_region(pdev, bar, res_name, 0);
 }
+EXPORT_SYMBOL(pci_request_region);
 
 /**
  *	pci_request_region_exclusive - Reserved PCI I/O and memory resource
@@ -2567,10 +2581,13 @@
  *	explicitly not allowed to map the resource via /dev/mem or
  *	sysfs.
  */
-int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name)
+int pci_request_region_exclusive(struct pci_dev *pdev, int bar,
+				 const char *res_name)
 {
 	return __pci_request_region(pdev, bar, res_name, IORESOURCE_EXCLUSIVE);
 }
+EXPORT_SYMBOL(pci_request_region_exclusive);
+
 /**
  * pci_release_selected_regions - Release selected PCI I/O and memory resources
  * @pdev: PCI device whose resources were previously reserved
@@ -2587,9 +2604,10 @@
 		if (bars & (1 << i))
 			pci_release_region(pdev, i);
 }
+EXPORT_SYMBOL(pci_release_selected_regions);
 
 static int __pci_request_selected_regions(struct pci_dev *pdev, int bars,
-				 const char *res_name, int excl)
+					  const char *res_name, int excl)
 {
 	int i;
 
@@ -2600,7 +2618,7 @@
 	return 0;
 
 err_out:
-	while(--i >= 0)
+	while (--i >= 0)
 		if (bars & (1 << i))
 			pci_release_region(pdev, i);
 
@@ -2619,13 +2637,15 @@
 {
 	return __pci_request_selected_regions(pdev, bars, res_name, 0);
 }
+EXPORT_SYMBOL(pci_request_selected_regions);
 
-int pci_request_selected_regions_exclusive(struct pci_dev *pdev,
-				 int bars, const char *res_name)
+int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars,
+					   const char *res_name)
 {
 	return __pci_request_selected_regions(pdev, bars, res_name,
 			IORESOURCE_EXCLUSIVE);
 }
+EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
 
 /**
  *	pci_release_regions - Release reserved PCI I/O and memory resources
@@ -2640,6 +2660,7 @@
 {
 	pci_release_selected_regions(pdev, (1 << 6) - 1);
 }
+EXPORT_SYMBOL(pci_release_regions);
 
 /**
  *	pci_request_regions - Reserved PCI I/O and memory resources
@@ -2658,6 +2679,7 @@
 {
 	return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name);
 }
+EXPORT_SYMBOL(pci_request_regions);
 
 /**
  *	pci_request_regions_exclusive - Reserved PCI I/O and memory resources
@@ -2680,6 +2702,7 @@
 	return pci_request_selected_regions_exclusive(pdev,
 					((1 << 6) - 1), res_name);
 }
+EXPORT_SYMBOL(pci_request_regions_exclusive);
 
 static void __pci_set_master(struct pci_dev *dev, bool enable)
 {
@@ -2749,6 +2772,7 @@
 	__pci_set_master(dev, true);
 	pcibios_set_master(dev);
 }
+EXPORT_SYMBOL(pci_set_master);
 
 /**
  * pci_clear_master - disables bus-mastering for device dev
@@ -2758,6 +2782,7 @@
 {
 	__pci_set_master(dev, false);
 }
+EXPORT_SYMBOL(pci_clear_master);
 
 /**
  * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
@@ -2790,30 +2815,13 @@
 	if (cacheline_size == pci_cache_line_size)
 		return 0;
 
-	dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not "
-		   "supported\n", pci_cache_line_size << 2);
+	dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not supported\n",
+		   pci_cache_line_size << 2);
 
 	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(pci_set_cacheline_size);
 
-#ifdef PCI_DISABLE_MWI
-int pci_set_mwi(struct pci_dev *dev)
-{
-	return 0;
-}
-
-int pci_try_set_mwi(struct pci_dev *dev)
-{
-	return 0;
-}
-
-void pci_clear_mwi(struct pci_dev *dev)
-{
-}
-
-#else
-
 /**
  * pci_set_mwi - enables memory-write-invalidate PCI transaction
  * @dev: the PCI device for which MWI is enabled
@@ -2822,9 +2830,11 @@
  *
  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
  */
-int
-pci_set_mwi(struct pci_dev *dev)
+int pci_set_mwi(struct pci_dev *dev)
 {
+#ifdef PCI_DISABLE_MWI
+	return 0;
+#else
 	int rc;
 	u16 cmd;
 
@@ -2833,14 +2843,15 @@
 		return rc;
 
 	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	if (! (cmd & PCI_COMMAND_INVALIDATE)) {
+	if (!(cmd & PCI_COMMAND_INVALIDATE)) {
 		dev_dbg(&dev->dev, "enabling Mem-Wr-Inval\n");
 		cmd |= PCI_COMMAND_INVALIDATE;
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
-
 	return 0;
+#endif
 }
+EXPORT_SYMBOL(pci_set_mwi);
 
 /**
  * pci_try_set_mwi - enables memory-write-invalidate PCI transaction
@@ -2853,9 +2864,13 @@
  */
 int pci_try_set_mwi(struct pci_dev *dev)
 {
-	int rc = pci_set_mwi(dev);
-	return rc;
+#ifdef PCI_DISABLE_MWI
+	return 0;
+#else
+	return pci_set_mwi(dev);
+#endif
 }
+EXPORT_SYMBOL(pci_try_set_mwi);
 
 /**
  * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
@@ -2863,9 +2878,9 @@
  *
  * Disables PCI Memory-Write-Invalidate transaction on the device
  */
-void
-pci_clear_mwi(struct pci_dev *dev)
+void pci_clear_mwi(struct pci_dev *dev)
 {
+#ifndef PCI_DISABLE_MWI
 	u16 cmd;
 
 	pci_read_config_word(dev, PCI_COMMAND, &cmd);
@@ -2873,8 +2888,9 @@
 		cmd &= ~PCI_COMMAND_INVALIDATE;
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
+#endif
 }
-#endif /* ! PCI_DISABLE_MWI */
+EXPORT_SYMBOL(pci_clear_mwi);
 
 /**
  * pci_intx - enables/disables PCI INTx for device dev
@@ -2883,18 +2899,16 @@
  *
  * Enables/disables PCI INTx for device dev
  */
-void
-pci_intx(struct pci_dev *pdev, int enable)
+void pci_intx(struct pci_dev *pdev, int enable)
 {
 	u16 pci_command, new;
 
 	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
 
-	if (enable) {
+	if (enable)
 		new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
-	} else {
+	else
 		new = pci_command | PCI_COMMAND_INTX_DISABLE;
-	}
 
 	if (new != pci_command) {
 		struct pci_devres *dr;
@@ -2908,6 +2922,7 @@
 		}
 	}
 }
+EXPORT_SYMBOL_GPL(pci_intx);
 
 /**
  * pci_intx_mask_supported - probe for INTx masking support
@@ -2937,8 +2952,8 @@
 	 * go ahead and check it.
 	 */
 	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
-		dev_err(&dev->dev, "Command register changed from "
-			"0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+		dev_err(&dev->dev, "Command register changed from 0x%x to 0x%x: driver or hardware bug?\n",
+			orig, new);
 	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
 		mask_supported = true;
 		pci_write_config_word(dev, PCI_COMMAND, orig);
@@ -3124,8 +3139,7 @@
 	if (pci_wait_for_pending(dev, pos + PCI_AF_STATUS, PCI_AF_STATUS_TP))
 		goto clear;
 
-	dev_err(&dev->dev, "transaction is not cleared; "
-			"proceeding with reset anyway\n");
+	dev_err(&dev->dev, "transaction is not cleared; proceeding with reset anyway\n");
 
 clear:
 	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
@@ -3179,14 +3193,7 @@
 	return 0;
 }
 
-/**
- * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
- * @dev: Bridge device
- *
- * Use the bridge control register to assert reset on the secondary bus.
- * Devices on the secondary bus are left in power-on state.
- */
-void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
 {
 	u16 ctrl;
 
@@ -3211,6 +3218,18 @@
 	 */
 	ssleep(1);
 }
+
+/**
+ * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
+ * @dev: Bridge device
+ *
+ * Use the bridge control register to assert reset on the secondary bus.
+ * Devices on the secondary bus are left in power-on state.
+ */
+void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+{
+	pcibios_reset_secondary_bus(dev);
+}
 EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
 
 static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
@@ -4095,6 +4114,7 @@
 			bars |= (1 << i);
 	return bars;
 }
+EXPORT_SYMBOL(pci_select_bars);
 
 /**
  * pci_resource_bar - get position of the BAR associated with a resource
@@ -4134,7 +4154,7 @@
 }
 
 static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
-		      unsigned int command_bits, u32 flags)
+				  unsigned int command_bits, u32 flags)
 {
 	if (arch_set_vga_state)
 		return arch_set_vga_state(dev, decode, command_bits,
@@ -4246,11 +4266,10 @@
 			bus == dev->bus->number &&
 			slot == PCI_SLOT(dev->devfn) &&
 			func == PCI_FUNC(dev->devfn)) {
-			if (align_order == -1) {
+			if (align_order == -1)
 				align = PAGE_SIZE;
-			} else {
+			else
 				align = 1 << align_order;
-			}
 			/* Found */
 			break;
 		}
@@ -4368,7 +4387,6 @@
 	return bus_create_file(&pci_bus_type,
 					&bus_attr_resource_alignment);
 }
-
 late_initcall(pci_resource_alignment_sysfs_init);
 
 static void pci_no_domains(void)
@@ -4447,41 +4465,3 @@
 	return 0;
 }
 early_param("pci", pci_setup);
-
-EXPORT_SYMBOL(pci_reenable_device);
-EXPORT_SYMBOL(pci_enable_device_io);
-EXPORT_SYMBOL(pci_enable_device_mem);
-EXPORT_SYMBOL(pci_enable_device);
-EXPORT_SYMBOL(pcim_enable_device);
-EXPORT_SYMBOL(pcim_pin_device);
-EXPORT_SYMBOL(pci_disable_device);
-EXPORT_SYMBOL(pci_find_capability);
-EXPORT_SYMBOL(pci_bus_find_capability);
-EXPORT_SYMBOL(pci_release_regions);
-EXPORT_SYMBOL(pci_request_regions);
-EXPORT_SYMBOL(pci_request_regions_exclusive);
-EXPORT_SYMBOL(pci_release_region);
-EXPORT_SYMBOL(pci_request_region);
-EXPORT_SYMBOL(pci_request_region_exclusive);
-EXPORT_SYMBOL(pci_release_selected_regions);
-EXPORT_SYMBOL(pci_request_selected_regions);
-EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
-EXPORT_SYMBOL(pci_set_master);
-EXPORT_SYMBOL(pci_clear_master);
-EXPORT_SYMBOL(pci_set_mwi);
-EXPORT_SYMBOL(pci_try_set_mwi);
-EXPORT_SYMBOL(pci_clear_mwi);
-EXPORT_SYMBOL_GPL(pci_intx);
-EXPORT_SYMBOL(pci_assign_resource);
-EXPORT_SYMBOL(pci_find_parent_resource);
-EXPORT_SYMBOL(pci_select_bars);
-
-EXPORT_SYMBOL(pci_set_power_state);
-EXPORT_SYMBOL(pci_save_state);
-EXPORT_SYMBOL(pci_restore_state);
-EXPORT_SYMBOL(pci_pme_capable);
-EXPORT_SYMBOL(pci_pme_active);
-EXPORT_SYMBOL(pci_wake_from_d3);
-EXPORT_SYMBOL(pci_prepare_to_sleep);
-EXPORT_SYMBOL(pci_back_from_sleep);
-EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);

diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c
index 587e7e8..182224a 100644
--- a/drivers/pci/pcie/aer/aer_inject.c
+++ b/drivers/pci/pcie/aer/aer_inject.c

@@ -397,16 +397,14 @@
 	if (!aer_mask_override && einj->cor_status &&
 	    !(einj->cor_status & ~cor_mask)) {
 		ret = -EINVAL;
-		printk(KERN_WARNING "The correctable error(s) is masked "
-				"by device\n");
+		printk(KERN_WARNING "The correctable error(s) is masked by device\n");
 		spin_unlock_irqrestore(&inject_lock, flags);
 		goto out_put;
 	}
 	if (!aer_mask_override && einj->uncor_status &&
 	    !(einj->uncor_status & ~uncor_mask)) {
 		ret = -EINVAL;
-		printk(KERN_WARNING "The uncorrectable error(s) is masked "
-				"by device\n");
+		printk(KERN_WARNING "The uncorrectable error(s) is masked by device\n");
 		spin_unlock_irqrestore(&inject_lock, flags);
 		goto out_put;
 	}
@@ -464,8 +462,7 @@
 			goto out_put;
 		}
 		aer_irq(-1, edev);
-	}
-	else
+	} else
 		ret = -EINVAL;
 out_put:
 	kfree(err_alloc);

diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index b2c8881..5653ea9 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c

@@ -542,8 +542,7 @@
 #define AER_RECOVER_RING_ORDER		4
 #define AER_RECOVER_RING_SIZE		(1 << AER_RECOVER_RING_ORDER)
 
-struct aer_recover_entry
-{
+struct aer_recover_entry {
 	u8	bus;
 	u8	devfn;
 	u16	domain;

diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 34ff702..36ed31b5 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c

@@ -172,9 +172,7 @@
 	int id = ((dev->bus->number << 8) | dev->devfn);
 
 	if (!info->status) {
-		dev_err(&dev->dev,
-			"PCIe Bus Error: severity=%s, type=Unaccessible, "
-			"id=%04x(Unregistered Agent ID)\n",
+		dev_err(&dev->dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n",
 			aer_error_severity_string[info->severity], id);
 		goto out;
 	}
@@ -182,13 +180,11 @@
 	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
 	agent = AER_GET_AGENT(info->severity, info->status);
 
-	dev_err(&dev->dev,
-		"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
+	dev_err(&dev->dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
 		aer_error_severity_string[info->severity],
 		aer_error_layer[layer], id, aer_agent_string[agent]);
 
-	dev_err(&dev->dev,
-		"  device [%04x:%04x] error status/mask=%08x/%08x\n",
+	dev_err(&dev->dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
 		dev->vendor, dev->device,
 		info->status, info->mask);
 

diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index bbc3bdd..82e06a8 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c

@@ -199,8 +199,7 @@
 		 * assuming that the PME was reported by a PCIe-PCI bridge that
 		 * used devfn different from zero.
 		 */
-		dev_dbg(&port->dev, "PME interrupt generated for "
-			"non-existent device %02x:%02x.%d\n",
+		dev_dbg(&port->dev, "PME interrupt generated for non-existent device %02x:%02x.%d\n",
 			busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
 		found = pcie_pme_from_pci_bridge(bus, 0);
 	}

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 0d8fdc4..80887ea 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c

@@ -204,8 +204,8 @@
 		return -ENODEV;
 
 	if (!dev->irq && dev->pin) {
-		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
-			 "check vendor BIOS\n", dev->vendor, dev->device);
+		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; check vendor BIOS\n",
+			 dev->vendor, dev->device);
 	}
 	status = pcie_port_device_register(dev);
 	if (status)
@@ -397,7 +397,7 @@
 static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d)
 {
 	pr_notice("%s detected: will not use MSI for PCIe PME signaling\n",
-			d->ident);
+		  d->ident);
 	pcie_pme_disable_msi();
 	return 0;
 }

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2bbf522..e3cf8a2 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c

@@ -168,7 +168,7 @@
  * Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
  */
 int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
-			struct resource *res, unsigned int pos)
+		    struct resource *res, unsigned int pos)
 {
 	u32 l, sz, mask;
 	u64 l64, sz64, mask64;
@@ -433,8 +433,7 @@
 			limit |= ((unsigned long) mem_limit_hi) << 32;
 #else
 			if (mem_base_hi || mem_limit_hi) {
-				dev_err(&dev->dev, "can't handle 64-bit "
-					"address space for bridge\n");
+				dev_err(&dev->dev, "can't handle 64-bit address space for bridge\n");
 				return;
 			}
 #endif
@@ -604,7 +603,6 @@
 	return agp_speeds[index];
 }
 
-
 static void pci_set_bus_speed(struct pci_bus *bus)
 {
 	struct pci_dev *bridge = bus->self;
@@ -636,11 +634,10 @@
 		} else if (status & PCI_X_SSTATUS_266MHZ) {
 			max = PCI_SPEED_133MHz_PCIX_266;
 		} else if (status & PCI_X_SSTATUS_133MHZ) {
-			if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2) {
+			if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2)
 				max = PCI_SPEED_133MHz_PCIX_ECC;
-			} else {
+			else
 				max = PCI_SPEED_133MHz_PCIX;
-			}
 		} else {
 			max = PCI_SPEED_66MHz_PCIX;
 		}
@@ -664,7 +661,6 @@
 	}
 }
 
-
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
 {
@@ -729,7 +725,8 @@
 	return child;
 }
 
-struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
+struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
+				int busnr)
 {
 	struct pci_bus *child;
 
@@ -741,6 +738,7 @@
 	}
 	return child;
 }
+EXPORT_SYMBOL(pci_add_new_bus);
 
 /*
  * If it's a bridge, configure it and scan the bus behind it.
@@ -887,7 +885,7 @@
 			 * as cards with a PCI-to-PCI bridge can be
 			 * inserted later.
 			 */
-			for (i=0; i<CARDBUS_RESERVE_BUSNR; i++) {
+			for (i = 0; i < CARDBUS_RESERVE_BUSNR; i++) {
 				struct pci_bus *parent = bus;
 				if (pci_find_bus(pci_domain_nr(bus),
 							max+i+1))
@@ -934,8 +932,7 @@
 		    (child->number > bus->busn_res.end) ||
 		    (child->number < bus->number) ||
 		    (child->busn_res.end < bus->number)) {
-			dev_info(&child->dev, "%pR %s "
-				"hidden behind%s bridge %s %pR\n",
+			dev_info(&child->dev, "%pR %s hidden behind%s bridge %s %pR\n",
 				&child->busn_res,
 				(bus->number > child->busn_res.end &&
 				 bus->busn_res.end < child->number) ?
@@ -952,6 +949,7 @@
 
 	return max;
 }
+EXPORT_SYMBOL(pci_scan_bridge);
 
 /*
  * Read interrupt line and base address registers.
@@ -992,7 +990,6 @@
 		pdev->is_hotplug_bridge = 1;
 }
 
-
 /**
  * pci_ext_cfg_is_aliased - is ext config space just an alias of std config?
  * @dev: PCI device
@@ -1225,13 +1222,13 @@
 		break;
 
 	default:				    /* unknown header */
-		dev_err(&dev->dev, "unknown header type %02x, "
-			"ignoring device\n", dev->hdr_type);
+		dev_err(&dev->dev, "unknown header type %02x, ignoring device\n",
+			dev->hdr_type);
 		return -EIO;
 
 	bad:
-		dev_err(&dev->dev, "ignoring class %#08x (doesn't match header "
-			"type %02x)\n", dev->class, dev->hdr_type);
+		dev_err(&dev->dev, "ignoring class %#08x (doesn't match header type %02x)\n",
+			dev->class, dev->hdr_type);
 		dev->class = PCI_CLASS_NOT_DEFINED;
 	}
 
@@ -1283,7 +1280,7 @@
 EXPORT_SYMBOL(pci_alloc_dev);
 
 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
-				 int crs_timeout)
+				int crs_timeout)
 {
 	int delay = 1;
 
@@ -1306,10 +1303,9 @@
 			return false;
 		/* Card hasn't responded in 60 seconds?  Must be stuck. */
 		if (delay > crs_timeout) {
-			printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not "
-					"responding\n", pci_domain_nr(bus),
-					bus->number, PCI_SLOT(devfn),
-					PCI_FUNC(devfn));
+			printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n",
+			       pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+			       PCI_FUNC(devfn));
 			return false;
 		}
 	}
@@ -1519,6 +1515,7 @@
 
 	return nr;
 }
+EXPORT_SYMBOL(pci_scan_slot);
 
 static int pcie_find_smpss(struct pci_dev *dev, void *data)
 {
@@ -1613,9 +1610,7 @@
 	}
 
 	if (mrrs < 128)
-		dev_err(&dev->dev, "MRRS was unable to be configured with a "
-			"safe value.  If problems are experienced, try running "
-			"with pci=pcie_bus_safe.\n");
+		dev_err(&dev->dev, "MRRS was unable to be configured with a safe value.  If problems are experienced, try running with pci=pcie_bus_safe\n");
 }
 
 static void pcie_bus_detect_mps(struct pci_dev *dev)
@@ -1652,8 +1647,8 @@
 	pcie_write_mps(dev, mps);
 	pcie_write_mrrs(dev);
 
-	dev_info(&dev->dev, "Max Payload Size set to %4d/%4d (was %4d), "
-		 "Max Read Rq %4d\n", pcie_get_mps(dev), 128 << dev->pcie_mpss,
+	dev_info(&dev->dev, "Max Payload Size set to %4d/%4d (was %4d), Max Read Rq %4d\n",
+		 pcie_get_mps(dev), 128 << dev->pcie_mpss,
 		 orig_mps, pcie_get_readrq(dev));
 
 	return 0;
@@ -1716,7 +1711,7 @@
 		bus->is_added = 1;
 	}
 
-	for (pass=0; pass < 2; pass++)
+	for (pass = 0; pass < 2; pass++)
 		list_for_each_entry(dev, &bus->devices, bus_list) {
 			if (pci_is_bridge(dev))
 				max = pci_scan_bridge(bus, dev, max, pass);
@@ -1732,6 +1727,7 @@
 	dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
 	return max;
 }
+EXPORT_SYMBOL_GPL(pci_scan_child_bus);
 
 /**
  * pcibios_root_bridge_prepare - Platform-specific host bridge setup.
@@ -2040,11 +2036,6 @@
 }
 EXPORT_SYMBOL_GPL(pci_rescan_bus);
 
-EXPORT_SYMBOL(pci_add_new_bus);
-EXPORT_SYMBOL(pci_scan_slot);
-EXPORT_SYMBOL(pci_scan_bridge);
-EXPORT_SYMBOL_GPL(pci_scan_child_bus);
-
 /*
  * pci_rescan_bus(), pci_rescan_bus_bridge_resize() and PCI device removal
  * routines should always be executed under this mutex.
@@ -2063,7 +2054,8 @@
 }
 EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
 
-static int __init pci_sort_bf_cmp(const struct device *d_a, const struct device *d_b)
+static int __init pci_sort_bf_cmp(const struct device *d_a,
+				  const struct device *d_b)
 {
 	const struct pci_dev *a = to_pci_dev(d_a);
 	const struct pci_dev *b = to_pci_dev(d_b);

diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 46d1378..3f155e7 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c

@@ -17,15 +17,14 @@
 
 static int proc_initialized;	/* = 0 */
 
-static loff_t
-proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
+static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	struct pci_dev *dev = PDE_DATA(file_inode(file));
 	return fixed_size_llseek(file, off, whence, dev->cfg_size);
 }
 
-static ssize_t
-proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+static ssize_t proc_bus_pci_read(struct file *file, char __user *buf,
+				 size_t nbytes, loff_t *ppos)
 {
 	struct pci_dev *dev = PDE_DATA(file_inode(file));
 	unsigned int pos = *ppos;
@@ -108,8 +107,8 @@
 	return nbytes;
 }
 
-static ssize_t
-proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
+static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
+				  size_t nbytes, loff_t *ppos)
 {
 	struct inode *ino = file_inode(file);
 	struct pci_dev *dev = PDE_DATA(ino);
@@ -413,7 +412,7 @@
 	return 0;
 }
 
-int pci_proc_detach_bus(struct pci_bus* bus)
+int pci_proc_detach_bus(struct pci_bus *bus)
 {
 	proc_remove(bus->procdir);
 	return 0;
@@ -423,6 +422,7 @@
 {
 	return seq_open(file, &proc_bus_pci_devices_op);
 }
+
 static const struct file_operations proc_bus_pci_dev_operations = {
 	.owner		= THIS_MODULE,
 	.open		= proc_bus_pci_dev_open,
@@ -443,6 +443,4 @@
 
 	return 0;
 }
-
 device_initcall(pci_proc_init);
-

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 92e68c77..d0f6926 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c

@@ -48,8 +48,8 @@
 {
 	dev->broken_parity_status = 1;	/* This device gives false positives */
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR, quirk_mellanox_tavor);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE, quirk_mellanox_tavor);
 
 /* Deal with broken BIOSes that neglect to enable passive release,
    which can cause problems in combination with the 82441FX/PPro MTRRs */
@@ -82,7 +82,7 @@
 static void quirk_isa_dma_hangs(struct pci_dev *dev)
 {
 	if (!isa_dma_bridge_buggy) {
-		isa_dma_bridge_buggy=1;
+		isa_dma_bridge_buggy = 1;
 		dev_info(&dev->dev, "Activating ISA DMA hang workarounds\n");
 	}
 }
@@ -123,7 +123,7 @@
  */
 static void quirk_nopcipci(struct pci_dev *dev)
 {
-	if ((pci_pci_problems & PCIPCI_FAIL)==0) {
+	if ((pci_pci_problems & PCIPCI_FAIL) == 0) {
 		dev_info(&dev->dev, "Disabling direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_FAIL;
 	}
@@ -148,7 +148,7 @@
  */
 static void quirk_triton(struct pci_dev *dev)
 {
-	if ((pci_pci_problems&PCIPCI_TRITON)==0) {
+	if ((pci_pci_problems&PCIPCI_TRITON) == 0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_TRITON;
 	}
@@ -163,8 +163,8 @@
  *	Made according to a windows driver based patch by George E. Breese
  *	see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
  *	and http://www.georgebreese.com/net/software/#PCI
- *      Also see http://www.au-ja.org/review-kt133a-1-en.phtml for
- *      the info on which Mr Breese based his work.
+ *	Also see http://www.au-ja.org/review-kt133a-1-en.phtml for
+ *	the info on which Mr Breese based his work.
  *
  *	Updated based on further information from the site and also on
  *	information provided by VIA
@@ -177,14 +177,14 @@
 	   a buggy southbridge */
 
 	p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
-	if (p!=NULL) {
+	if (p != NULL) {
 		/* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
 		/* Check for buggy part revisions */
 		if (p->revision < 0x40 || p->revision > 0x42)
 			goto exit;
 	} else {
 		p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
-		if (p==NULL)	/* No problem parts */
+		if (p == NULL)	/* No problem parts */
 			goto exit;
 		/* Check for buggy part revisions */
 		if (p->revision < 0x10 || p->revision > 0x12)
@@ -227,7 +227,7 @@
  */
 static void quirk_viaetbf(struct pci_dev *dev)
 {
-	if ((pci_pci_problems&PCIPCI_VIAETBF)==0) {
+	if ((pci_pci_problems&PCIPCI_VIAETBF) == 0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_VIAETBF;
 	}
@@ -236,7 +236,7 @@
 
 static void quirk_vsfx(struct pci_dev *dev)
 {
-	if ((pci_pci_problems&PCIPCI_VSFX)==0) {
+	if ((pci_pci_problems&PCIPCI_VSFX) == 0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_VSFX;
 	}
@@ -251,7 +251,7 @@
  */
 static void quirk_alimagik(struct pci_dev *dev)
 {
-	if ((pci_pci_problems&PCIPCI_ALIMAGIK)==0) {
+	if ((pci_pci_problems&PCIPCI_ALIMAGIK) == 0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_ALIMAGIK|PCIPCI_TRITON;
 	}
@@ -265,7 +265,7 @@
  */
 static void quirk_natoma(struct pci_dev *dev)
 {
-	if ((pci_pci_problems&PCIPCI_NATOMA)==0) {
+	if ((pci_pci_problems&PCIPCI_NATOMA) == 0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
 		pci_pci_problems |= PCIPCI_NATOMA;
 	}
@@ -315,8 +315,7 @@
 	if (pci_resource_len(dev, 0) != 8) {
 		struct resource *res = &dev->resource[0];
 		res->end = res->start + 8 - 1;
-		dev_info(&dev->dev, "CS5536 ISA bridge bug detected "
-				"(incorrect header); workaround applied.\n");
+		dev_info(&dev->dev, "CS5536 ISA bridge bug detected (incorrect header); workaround applied\n");
 	}
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, quirk_cs5536_vsa);
@@ -400,7 +399,8 @@
 	 * let's get enough confirmation reports first.
 	 */
 	base &= -size;
-	dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base + size - 1);
+	dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base,
+		 base + size - 1);
 }
 
 static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int port, unsigned int enable)
@@ -425,7 +425,8 @@
 	 * reserve it, but let's get enough confirmation reports first.
 	 */
 	base &= -size;
-	dev_info(&dev->dev, "%s MMIO at %04x-%04x\n", name, base, base + size - 1);
+	dev_info(&dev->dev, "%s MMIO at %04x-%04x\n", name, base,
+		 base + size - 1);
 }
 
 /*
@@ -668,8 +669,7 @@
 	struct pci_dev *pdev;
 	u16 command;
 
-	dev_warn(&dev->dev, "TI XIO2000a quirk detected; "
-		"secondary bus fast back-to-back transfers disabled\n");
+	dev_warn(&dev->dev, "TI XIO2000a quirk detected; secondary bus fast back-to-back transfers disabled\n");
 	list_for_each_entry(pdev, &dev->subordinate->devices, bus_list) {
 		pci_read_config_word(pdev, PCI_COMMAND, &command);
 		if (command & PCI_COMMAND_FAST_BACK)
@@ -703,7 +703,7 @@
 	       tmp == 0 ? "Disa" : "Ena");
 
 	/* Offset 0x58: External APIC IRQ output control */
-	pci_write_config_byte (dev, 0x58, tmp);
+	pci_write_config_byte(dev, 0x58, tmp);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic);
@@ -761,8 +761,8 @@
 static void quirk_amd_8131_mmrbc(struct pci_dev *dev)
 {
 	if (dev->subordinate && dev->revision <= 0x12) {
-		dev_info(&dev->dev, "AMD8131 rev %x detected; "
-			"disabling PCI-X MMRBC\n", dev->revision);
+		dev_info(&dev->dev, "AMD8131 rev %x detected; disabling PCI-X MMRBC\n",
+			 dev->revision);
 		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MMRBC;
 	}
 }
@@ -916,12 +916,12 @@
 {
 	u32 pcic;
 	pci_read_config_dword(dev, 0x4C, &pcic);
-	if ((pcic&6)!=6) {
+	if ((pcic & 6) != 6) {
 		pcic |= 6;
 		dev_warn(&dev->dev, "BIOS failed to enable PCI standards compliance; fixing this error\n");
 		pci_write_config_dword(dev, 0x4C, pcic);
 		pci_read_config_dword(dev, 0x84, &pcic);
-		pcic |= (1<<23);	/* Required in this mode */
+		pcic |= (1 << 23);	/* Required in this mode */
 		pci_write_config_dword(dev, 0x84, pcic);
 	}
 }
@@ -937,7 +937,7 @@
  */
 static void quirk_dunord(struct pci_dev *dev)
 {
-	struct resource *r = &dev->resource [1];
+	struct resource *r = &dev->resource[1];
 
 	r->flags |= IORESOURCE_UNSET;
 	r->start = 0;
@@ -967,11 +967,13 @@
 static void quirk_mediagx_master(struct pci_dev *dev)
 {
 	u8 reg;
+
 	pci_read_config_byte(dev, 0x41, &reg);
 	if (reg & 2) {
 		reg &= ~2;
-		dev_info(&dev->dev, "Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n", reg);
-                pci_write_config_byte(dev, 0x41, reg);
+		dev_info(&dev->dev, "Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n",
+			 reg);
+		pci_write_config_byte(dev, 0x41, reg);
 	}
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CYRIX,	PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master);
@@ -1120,7 +1122,7 @@
 {
 	if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK)) {
 		if (dev->device == PCI_DEVICE_ID_INTEL_82845_HB)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x8025: /* P4B-LX */
 			case 0x8070: /* P4B */
 			case 0x8088: /* P4B533 */
@@ -1128,14 +1130,14 @@
 				asus_hides_smbus = 1;
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_HB)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x80b1: /* P4GE-V */
 			case 0x80b2: /* P4PE */
 			case 0x8093: /* P4B533-V */
 				asus_hides_smbus = 1;
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82850_HB)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x8030: /* P4T533 */
 				asus_hides_smbus = 1;
 			}
@@ -1175,7 +1177,7 @@
 			}
 	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
 		if (dev->device ==  PCI_DEVICE_ID_INTEL_82855PM_HB)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x088C: /* HP Compaq nc8000 */
 			case 0x0890: /* HP Compaq nc6000 */
 				asus_hides_smbus = 1;
@@ -1192,20 +1194,20 @@
 			case 0x12bf: /* HP xw4100 */
 				asus_hides_smbus = 1;
 			}
-       } else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG)) {
-               if (dev->device ==  PCI_DEVICE_ID_INTEL_82855PM_HB)
-                       switch(dev->subsystem_device) {
-                       case 0xC00C: /* Samsung P35 notebook */
-                               asus_hides_smbus = 1;
-                       }
+	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG)) {
+		if (dev->device ==  PCI_DEVICE_ID_INTEL_82855PM_HB)
+			switch (dev->subsystem_device) {
+			case 0xC00C: /* Samsung P35 notebook */
+				asus_hides_smbus = 1;
+		}
 	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ)) {
 		if (dev->device == PCI_DEVICE_ID_INTEL_82855PM_HB)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x0058: /* Compaq Evo N620c */
 				asus_hides_smbus = 1;
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82810_IG3)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0xB16C: /* Compaq Deskpro EP 401963-001 (PCA# 010174) */
 				/* Motherboard doesn't have Host bridge
 				 * subvendor/subdevice IDs, therefore checking
@@ -1213,7 +1215,7 @@
 				asus_hides_smbus = 1;
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
-			switch(dev->subsystem_device) {
+			switch (dev->subsystem_device) {
 			case 0x00b8: /* Compaq Evo D510 CMT */
 			case 0x00b9: /* Compaq Evo D510 SFF */
 			case 0x00ba: /* Compaq Evo D510 USDT */
@@ -1261,7 +1263,8 @@
 		pci_write_config_word(dev, 0xF2, val & (~0x8));
 		pci_read_config_word(dev, 0xF2, &val);
 		if (val & 0x8)
-			dev_info(&dev->dev, "i801 SMBus device continues to play 'hide and seek'! 0x%x\n", val);
+			dev_info(&dev->dev, "i801 SMBus device continues to play 'hide and seek'! 0x%x\n",
+				 val);
 		else
 			dev_info(&dev->dev, "Enabled i801 SMBus device\n");
 	}
@@ -1409,7 +1412,8 @@
 		pci_write_config_byte(dev, 0x50, val & (~0xc0));
 		pci_read_config_byte(dev, 0x50, &val);
 		if (val & 0xc0)
-			dev_info(&dev->dev, "Onboard AC97/MC97 devices continue to play 'hide and seek'! 0x%x\n", val);
+			dev_info(&dev->dev, "Onboard AC97/MC97 devices continue to play 'hide and seek'! 0x%x\n",
+				 val);
 		else
 			dev_info(&dev->dev, "Enabled onboard AC97/MC97 devices\n");
 	}
@@ -1514,10 +1518,8 @@
 
 	/* The next five BARs all seem to be rubbish, so just clean
 	 * them out */
-	for (i=1; i < 6; i++) {
+	for (i = 1; i < 6; i++)
 		memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
-	}
-
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
 #endif
@@ -1552,7 +1554,7 @@
  * Some Intel PCI Express chipsets have trouble with downstream
  * device power management.
  */
-static void quirk_intel_pcie_pm(struct pci_dev * dev)
+static void quirk_intel_pcie_pm(struct pci_dev *dev)
 {
 	pci_pm_d3_delay = 120;
 	dev->no_d1d2 = 1;
@@ -1721,8 +1723,8 @@
 
 	pci_read_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, &pci_config_word);
 	if (!pci_config_word) {
-		dev_info(&dev->dev, "boot interrupts on device [%04x:%04x] "
-			 "already disabled\n", dev->vendor, dev->device);
+		dev_info(&dev->dev, "boot interrupts on device [%04x:%04x] already disabled\n",
+			 dev->vendor, dev->device);
 		return;
 	}
 	pci_write_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, 0);
@@ -1770,8 +1772,7 @@
 		if (pci_resource_len(dev, bar) == 0x80 &&
 		    (pci_resource_start(dev, bar) & 0x80)) {
 			struct resource *r = &dev->resource[bar];
-			dev_info(&dev->dev,
-				 "Re-allocating PLX PCI 9050 BAR %u to length 256 to avoid bit 7 bug\n",
+			dev_info(&dev->dev, "Re-allocating PLX PCI 9050 BAR %u to length 256 to avoid bit 7 bug\n",
 				 bar);
 			r->flags |= IORESOURCE_UNSET;
 			r->start = 0;
@@ -1818,9 +1819,7 @@
 	case PCI_DEVICE_ID_NETMOS_9845:
 	case PCI_DEVICE_ID_NETMOS_9855:
 		if (num_parallel) {
-			dev_info(&dev->dev, "Netmos %04x (%u parallel, "
-				"%u serial); changing class SERIAL to OTHER "
-				"(use parport_serial)\n",
+			dev_info(&dev->dev, "Netmos %04x (%u parallel, %u serial); changing class SERIAL to OTHER (use parport_serial)\n",
 				dev->device, num_parallel, num_serial);
 			dev->class = (PCI_CLASS_COMMUNICATION_OTHER << 8) |
 			    (dev->class & 0xff);
@@ -1887,8 +1886,7 @@
 
 	cmd_hi = readb(csr + 3);
 	if (cmd_hi == 0) {
-		dev_warn(&dev->dev, "Firmware left e100 interrupts enabled; "
-			"disabling\n");
+		dev_warn(&dev->dev, "Firmware left e100 interrupts enabled; disabling\n");
 		writeb(1, csr + 3);
 	}
 
@@ -1958,8 +1956,7 @@
 	if (pci_read_config_byte(dev, 0xf41, &b) == 0) {
 		if (!(b & 0x20)) {
 			pci_write_config_byte(dev, 0xf41, b | 0x20);
-			dev_info(&dev->dev,
-			       "Linking AER extended capability\n");
+			dev_info(&dev->dev, "Linking AER extended capability\n");
 		}
 	}
 }
@@ -1997,8 +1994,7 @@
 			/* Turn off PCI Bus Parking */
 			pci_write_config_byte(dev, 0x76, b ^ 0x40);
 
-			dev_info(&dev->dev,
-				"Disabling VIA CX700 PCI parking\n");
+			dev_info(&dev->dev, "Disabling VIA CX700 PCI parking\n");
 		}
 	}
 
@@ -2013,8 +2009,7 @@
 			/* Disable "Read FIFO Timer" */
 			pci_write_config_byte(dev, 0x77, 0x0);
 
-			dev_info(&dev->dev,
-				"Disabling VIA CX700 PCI caching\n");
+			dev_info(&dev->dev, "Disabling VIA CX700 PCI caching\n");
 		}
 	}
 }
@@ -2149,8 +2144,7 @@
 static void quirk_disable_msi(struct pci_dev *dev)
 {
 	if (dev->subordinate) {
-		dev_warn(&dev->dev, "MSI quirk detected; "
-			"subordinate MSI disabled\n");
+		dev_warn(&dev->dev, "MSI quirk detected; subordinate MSI disabled\n");
 		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
 	}
 }
@@ -2189,8 +2183,7 @@
 		u8 flags;
 
 		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
-					 &flags) == 0)
-		{
+					 &flags) == 0) {
 			dev_info(&dev->dev, "Found %s HT MSI Mapping\n",
 				flags & HT_MSI_FLAGS_ENABLE ?
 				"enabled" : "disabled");
@@ -2207,8 +2200,7 @@
 static void quirk_msi_ht_cap(struct pci_dev *dev)
 {
 	if (dev->subordinate && !msi_ht_cap_enabled(dev)) {
-		dev_warn(&dev->dev, "MSI quirk detected; "
-			"subordinate MSI disabled\n");
+		dev_warn(&dev->dev, "MSI quirk detected; subordinate MSI disabled\n");
 		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
 	}
 }
@@ -2232,8 +2224,7 @@
 	if (!pdev)
 		return;
 	if (!msi_ht_cap_enabled(dev) && !msi_ht_cap_enabled(pdev)) {
-		dev_warn(&dev->dev, "MSI quirk detected; "
-			"subordinate MSI disabled\n");
+		dev_warn(&dev->dev, "MSI quirk detected; subordinate MSI disabled\n");
 		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
 	}
 	pci_dev_put(pdev);
@@ -2279,8 +2270,7 @@
 	if (board_name &&
 	    (strstr(board_name, "P5N32-SLI PREMIUM") ||
 	     strstr(board_name, "P5N32-E SLI"))) {
-		dev_info(&dev->dev,
-			 "Disabling msi for MCP55 NIC on P5N32-SLI\n");
+		dev_info(&dev->dev, "Disabling msi for MCP55 NIC on P5N32-SLI\n");
 		dev->no_msi = 1;
 	}
 }
@@ -2489,8 +2479,7 @@
 	 */
 	host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
 	if (host_bridge == NULL) {
-		dev_warn(&dev->dev,
-			 "nv_msi_ht_cap_quirk didn't locate host bridge\n");
+		dev_warn(&dev->dev, "nv_msi_ht_cap_quirk didn't locate host bridge\n");
 		return;
 	}
 
@@ -2817,8 +2806,7 @@
 	 */
 	err = pci_read_config_word(dev, 0x48, &rcc);
 	if (err) {
-		dev_err(&dev->dev, "Error attempting to read the read "
-			"completion coalescing register.\n");
+		dev_err(&dev->dev, "Error attempting to read the read completion coalescing register\n");
 		return;
 	}
 
@@ -2829,13 +2817,11 @@
 
 	err = pci_write_config_word(dev, 0x48, rcc);
 	if (err) {
-		dev_err(&dev->dev, "Error attempting to write the read "
-			"completion coalescing register.\n");
+		dev_err(&dev->dev, "Error attempting to write the read completion coalescing register\n");
 		return;
 	}
 
-	pr_info_once("Read completion coalescing disabled due to hardware "
-		     "errata relating to 256B MPS.\n");
+	pr_info_once("Read completion coalescing disabled due to hardware errata relating to 256B MPS\n");
 }
 /* Intel 5000 series memory controllers and ports 2-7 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25c0, quirk_intel_mc_errata);
@@ -2944,8 +2930,7 @@
 
 	/* Check if any interrupt line is still enabled */
 	if (readl(regs + I915_DEIER_REG) != 0) {
-		dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; "
-			"disabling\n");
+		dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; disabling\n");
 
 		writel(0, regs + I915_DEIER_REG);
 	}
@@ -3040,7 +3025,7 @@
 {
 	struct pci_fixup *start, *end;
 
-	switch(pass) {
+	switch (pass) {
 	case pci_fixup_early:
 		start = __start_pci_fixups_early;
 		end = __end_pci_fixups_early;
@@ -3112,8 +3097,8 @@
 			if (!tmp || cls == tmp)
 				continue;
 
-			printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), "
-			       "using %u bytes\n", cls << 2, tmp << 2,
+			printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), using %u bytes\n",
+			       cls << 2, tmp << 2,
 			       pci_dfl_cache_line_size << 2);
 			pci_cache_line_size = pci_dfl_cache_line_size;
 		}
@@ -3342,6 +3327,85 @@
 	return -ENOTTY;
 }
 
+static void quirk_dma_func0_alias(struct pci_dev *dev)
+{
+	if (PCI_FUNC(dev->devfn) != 0) {
+		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+	}
+}
+
+/*
+ * https://bugzilla.redhat.com/show_bug.cgi?id=605888
+ *
+ * Some Ricoh devices use function 0 as the PCIe requester ID for DMA.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, quirk_dma_func0_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias);
+
+static void quirk_dma_func1_alias(struct pci_dev *dev)
+{
+	if (PCI_FUNC(dev->devfn) != 1) {
+		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 1);
+		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+	}
+}
+
+/*
+ * Marvell 88SE9123 uses function 1 as the requester ID for DMA.  In some
+ * SKUs function 1 is present and is a legacy IDE controller, in other
+ * SKUs this function is not present, making this a ghost requester.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=42679
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c59 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x917a,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
+			 quirk_dma_func1_alias);
+/* https://bugs.gentoo.org/show_bug.cgi?id=497630 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON,
+			 PCI_DEVICE_ID_JMICRON_JMB388_ESD,
+			 quirk_dma_func1_alias);
+
+/*
+ * A few PCIe-to-PCI bridges fail to expose a PCIe capability, resulting in
+ * using the wrong DMA alias for the device.  Some of these devices can be
+ * used as either forward or reverse bridges, so we need to test whether the
+ * device is operating in the correct mode.  We could probably apply this
+ * quirk to PCI_ANY_ID, but for now we'll just use known offenders.  The test
+ * is for a non-root, non-PCIe bridge where the upstream device is PCIe and
+ * is not a PCIe-to-PCI bridge, then @pdev is actually a PCIe-to-PCI bridge.
+ */
+static void quirk_use_pcie_bridge_dma_alias(struct pci_dev *pdev)
+{
+	if (!pci_is_root_bus(pdev->bus) &&
+	    pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+	    !pci_is_pcie(pdev) && pci_is_pcie(pdev->bus->self) &&
+	    pci_pcie_type(pdev->bus->self) != PCI_EXP_TYPE_PCI_BRIDGE)
+		pdev->dev_flags |= PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS;
+}
+/* ASM1083/1085, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c46 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080,
+			 quirk_use_pcie_bridge_dma_alias);
+/* Tundra 8113, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c43 */
+DECLARE_PCI_FIXUP_HEADER(0x10e3, 0x8113, quirk_use_pcie_bridge_dma_alias);
+/* ITE 8892, https://bugzilla.kernel.org/show_bug.cgi?id=73551 */
+DECLARE_PCI_FIXUP_HEADER(0x1283, 0x8892, quirk_use_pcie_bridge_dma_alias);
+
 static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
 {
 	if (!PCI_FUNC(dev->devfn))

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index c183945..f955edb 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c

@@ -38,6 +38,7 @@
 	pci_write_config_dword(pdev, pdev->rom_base_reg, rom_addr);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pci_enable_rom);
 
 /**
  * pci_disable_rom - disable ROM decoding for a PCI device
@@ -53,6 +54,7 @@
 	rom_addr &= ~PCI_ROM_ADDRESS_ENABLE;
 	pci_write_config_dword(pdev, pdev->rom_base_reg, rom_addr);
 }
+EXPORT_SYMBOL_GPL(pci_disable_rom);
 
 /**
  * pci_get_rom_size - obtain the actual size of the ROM image
@@ -135,7 +137,7 @@
 		} else {
 			/* assign the ROM an address if it doesn't have one */
 			if (res->parent == NULL &&
-			    pci_assign_resource(pdev,PCI_ROM_RESOURCE))
+			    pci_assign_resource(pdev, PCI_ROM_RESOURCE))
 				return NULL;
 			start = pci_resource_start(pdev, PCI_ROM_RESOURCE);
 			*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
@@ -166,6 +168,7 @@
 	*size = pci_get_rom_size(pdev, rom, *size);
 	return rom;
 }
+EXPORT_SYMBOL(pci_map_rom);
 
 /**
  * pci_unmap_rom - unmap the ROM from kernel space
@@ -187,6 +190,7 @@
 	if (!(res->flags & (IORESOURCE_ROM_ENABLE | IORESOURCE_ROM_SHADOW)))
 		pci_disable_rom(pdev);
 }
+EXPORT_SYMBOL(pci_unmap_rom);
 
 /**
  * pci_cleanup_rom - free the ROM copy created by pci_map_rom_copy
@@ -199,7 +203,7 @@
 	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
 
 	if (res->flags & IORESOURCE_ROM_COPY) {
-		kfree((void*)(unsigned long)res->start);
+		kfree((void *)(unsigned long)res->start);
 		res->flags |= IORESOURCE_UNSET;
 		res->flags &= ~IORESOURCE_ROM_COPY;
 		res->start = 0;
@@ -222,9 +226,4 @@
 
 	return NULL;
 }
-
-EXPORT_SYMBOL(pci_map_rom);
-EXPORT_SYMBOL(pci_unmap_rom);
-EXPORT_SYMBOL_GPL(pci_enable_rom);
-EXPORT_SYMBOL_GPL(pci_disable_rom);
 EXPORT_SYMBOL(pci_platform_rom);

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 8e495bd..827ad83 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c

@@ -17,14 +17,100 @@
 EXPORT_SYMBOL_GPL(pci_bus_sem);
 
 /*
+ * pci_for_each_dma_alias - Iterate over DMA aliases for a device
+ * @pdev: starting downstream device
+ * @fn: function to call for each alias
+ * @data: opaque data to pass to @fn
+ *
+ * Starting @pdev, walk up the bus calling @fn for each possible alias
+ * of @pdev at the root bus.
+ */
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data)
+{
+	struct pci_bus *bus;
+	int ret;
+
+	ret = fn(pdev, PCI_DEVID(pdev->bus->number, pdev->devfn), data);
+	if (ret)
+		return ret;
+
+	/*
+	 * If the device is broken and uses an alias requester ID for
+	 * DMA, iterate over that too.
+	 */
+	if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) {
+		ret = fn(pdev, PCI_DEVID(pdev->bus->number,
+					 pdev->dma_alias_devfn), data);
+		if (ret)
+			return ret;
+	}
+
+	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+		struct pci_dev *tmp;
+
+		/* Skip virtual buses */
+		if (!bus->self)
+			continue;
+
+		tmp = bus->self;
+
+		/*
+		 * PCIe-to-PCI/X bridges alias transactions from downstream
+		 * devices using the subordinate bus number (PCI Express to
+		 * PCI/PCI-X Bridge Spec, rev 1.0, sec 2.3).  For all cases
+		 * where the upstream bus is PCI/X we alias to the bridge
+		 * (there are various conditions in the previous reference
+		 * where the bridge may take ownership of transactions, even
+		 * when the secondary interface is PCI-X).
+		 */
+		if (pci_is_pcie(tmp)) {
+			switch (pci_pcie_type(tmp)) {
+			case PCI_EXP_TYPE_ROOT_PORT:
+			case PCI_EXP_TYPE_UPSTREAM:
+			case PCI_EXP_TYPE_DOWNSTREAM:
+				continue;
+			case PCI_EXP_TYPE_PCI_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+				if (ret)
+					return ret;
+				continue;
+			case PCI_EXP_TYPE_PCIE_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
+				if (ret)
+					return ret;
+				continue;
+			}
+		} else {
+			if (tmp->dev_flags & PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS)
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+			else
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+/*
  * find the upstream PCIe-to-PCI bridge of a PCI device
  * if the device is PCIE, return NULL
  * if the device isn't connected to a PCIe bridge (that is its parent is a
  * legacy PCI bridge and the bridge is directly connected to bus 0), return its
  * parent
  */
-struct pci_dev *
-pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
+struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
 {
 	struct pci_dev *tmp = NULL;
 
@@ -56,12 +142,12 @@
 	struct pci_bus *child;
 	struct pci_bus *tmp;
 
-	if(bus->number == busnr)
+	if (bus->number == busnr)
 		return bus;
 
 	list_for_each_entry(tmp, &bus->children, node) {
 		child = pci_do_find_bus(tmp, busnr);
-		if(child)
+		if (child)
 			return child;
 	}
 	return NULL;
@@ -76,7 +162,7 @@
  * in the global list of PCI buses.  If the bus is found, a pointer to its
  * data structure is returned.  If no bus is found, %NULL is returned.
  */
-struct pci_bus * pci_find_bus(int domain, int busnr)
+struct pci_bus *pci_find_bus(int domain, int busnr)
 {
 	struct pci_bus *bus = NULL;
 	struct pci_bus *tmp_bus;
@@ -90,6 +176,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(pci_find_bus);
 
 /**
  * pci_find_next_bus - begin or continue searching for a PCI bus
@@ -100,8 +187,7 @@
  * @from is not %NULL, searches continue from next device on the
  * global list.
  */
-struct pci_bus *
-pci_find_next_bus(const struct pci_bus *from)
+struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
 {
 	struct list_head *n;
 	struct pci_bus *b = NULL;
@@ -114,6 +200,7 @@
 	up_read(&pci_bus_sem);
 	return b;
 }
+EXPORT_SYMBOL(pci_find_next_bus);
 
 /**
  * pci_get_slot - locate PCI device for a given PCI slot
@@ -147,6 +234,7 @@
 	up_read(&pci_bus_sem);
 	return dev;
 }
+EXPORT_SYMBOL(pci_get_slot);
 
 /**
  * pci_get_domain_bus_and_slot - locate PCI device for a given PCI domain (segment), bus, and slot
@@ -251,6 +339,7 @@
 
 	return pci_get_dev_by_id(&id, from);
 }
+EXPORT_SYMBOL(pci_get_subsys);
 
 /**
  * pci_get_device - begin or continue searching for a PCI device by vendor/device id
@@ -266,11 +355,12 @@
  * from next device on the global list.  The reference count for @from is
  * always decremented if it is not %NULL.
  */
-struct pci_dev *
-pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from)
+struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
+			       struct pci_dev *from)
 {
 	return pci_get_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
 }
+EXPORT_SYMBOL(pci_get_device);
 
 /**
  * pci_get_class - begin or continue searching for a PCI device by class
@@ -299,6 +389,7 @@
 
 	return pci_get_dev_by_id(&id, from);
 }
+EXPORT_SYMBOL(pci_get_class);
 
 /**
  * pci_dev_present - Returns 1 if device matching the device list is present, 0 if not.
@@ -328,12 +419,3 @@
 	return 0;
 }
 EXPORT_SYMBOL(pci_dev_present);
-
-/* For boot time work */
-EXPORT_SYMBOL(pci_find_bus);
-EXPORT_SYMBOL(pci_find_next_bus);
-/* For everyone */
-EXPORT_SYMBOL(pci_get_device);
-EXPORT_SYMBOL(pci_get_subsys);
-EXPORT_SYMBOL(pci_get_slot);
-EXPORT_SYMBOL(pci_get_class);

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index fd9b545..a5a63ec 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c

@@ -68,7 +68,7 @@
 
 	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 	if (!tmp) {
-		pr_warning("add_to_list: kmalloc() failed!\n");
+		pr_warn("add_to_list: kmalloc() failed!\n");
 		return -ENOMEM;
 	}
 
@@ -148,8 +148,7 @@
 
 		tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 		if (!tmp)
-			panic("pdev_sort_resources(): "
-			      "kmalloc() failed!\n");
+			panic("pdev_sort_resources(): kmalloc() failed!\n");
 		tmp->res = r;
 		tmp->dev = dev;
 
@@ -736,7 +735,7 @@
 {
 	if (size < min_size)
 		size = min_size;
-	if (old_size == 1 )
+	if (old_size == 1)
 		old_size = 0;
 	/* To be fixed in 2.5: we should have sort of HAVE_ISA
 	   flag in the struct pci_bus. */
@@ -757,7 +756,7 @@
 {
 	if (size < min_size)
 		size = min_size;
-	if (old_size == 1 )
+	if (old_size == 1)
 		old_size = 0;
 	if (size < old_size)
 		size = old_size;
@@ -859,9 +858,8 @@
 			resource_size(b_res), min_align);
 	if (!size0 && !size1) {
 		if (b_res->start || b_res->end)
-			dev_info(&bus->self->dev, "disabling bridge window "
-				 "%pR to %pR (unused)\n", b_res,
-				 &bus->busn_res);
+			dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n",
+				 b_res, &bus->busn_res);
 		b_res->flags = 0;
 		return;
 	}
@@ -872,10 +870,9 @@
 	if (size1 > size0 && realloc_head) {
 		add_to_list(realloc_head, bus->self, b_res, size1-size0,
 			    min_align);
-		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
-				 "%pR to %pR add_size %llx\n", b_res,
-				 &bus->busn_res,
-				 (unsigned long long)size1-size0);
+		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n",
+			   b_res, &bus->busn_res,
+			   (unsigned long long)size1-size0);
 	}
 }
 
@@ -974,9 +971,8 @@
 			if (order < 0)
 				order = 0;
 			if (order >= ARRAY_SIZE(aligns)) {
-				dev_warn(&dev->dev, "disabling BAR %d: %pR "
-					 "(bad alignment %#llx)\n", i, r,
-					 (unsigned long long) align);
+				dev_warn(&dev->dev, "disabling BAR %d: %pR (bad alignment %#llx)\n",
+					 i, r, (unsigned long long) align);
 				r->flags = 0;
 				continue;
 			}
@@ -1003,9 +999,8 @@
 				resource_size(b_res), min_align);
 	if (!size0 && !size1) {
 		if (b_res->start || b_res->end)
-			dev_info(&bus->self->dev, "disabling bridge window "
-				 "%pR to %pR (unused)\n", b_res,
-				 &bus->busn_res);
+			dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n",
+				 b_res, &bus->busn_res);
 		b_res->flags = 0;
 		return 0;
 	}
@@ -1014,9 +1009,9 @@
 	b_res->flags |= IORESOURCE_STARTALIGN;
 	if (size1 > size0 && realloc_head) {
 		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
-		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
-				 "%pR to %pR add_size %llx\n", b_res,
-				 &bus->busn_res, (unsigned long long)size1-size0);
+		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n",
+			   b_res, &bus->busn_res,
+			   (unsigned long long)size1-size0);
 	}
 	return 0;
 }
@@ -1274,8 +1269,8 @@
 			break;
 
 		default:
-			dev_info(&dev->dev, "not setting up bridge for bus "
-				 "%04x:%02x\n", pci_domain_nr(b), b->number);
+			dev_info(&dev->dev, "not setting up bridge for bus %04x:%02x\n",
+				 pci_domain_nr(b), b->number);
 			break;
 		}
 	}
@@ -1312,8 +1307,8 @@
 		break;
 
 	default:
-		dev_info(&bridge->dev, "not setting up bridge for bus "
-			 "%04x:%02x\n", pci_domain_nr(b), b->number);
+		dev_info(&bridge->dev, "not setting up bridge for bus %04x:%02x\n",
+			 pci_domain_nr(b), b->number);
 		break;
 	}
 }
@@ -1430,10 +1425,10 @@
 
 	pci_bus_for_each_resource(bus, res, i) {
 		if (!res || !res->end || !res->flags)
-                        continue;
+			continue;
 
 		dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res);
-        }
+	}
 }
 
 static void pci_bus_dump_resources(struct pci_bus *bus)
@@ -1458,7 +1453,7 @@
 	int depth = 0;
 	struct pci_bus *child_bus;
 
-	list_for_each_entry(child_bus, &bus->children, node){
+	list_for_each_entry(child_bus, &bus->children, node) {
 		int ret;
 
 		ret = pci_bus_get_depth(child_bus);

diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index dbc4ffc..4e2d595 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c

@@ -22,10 +22,9 @@
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-static void
-pdev_fixup_irq(struct pci_dev *dev,
-	       u8 (*swizzle)(struct pci_dev *, u8 *),
-	       int (*map_irq)(const struct pci_dev *, u8, u8))
+static void pdev_fixup_irq(struct pci_dev *dev,
+			   u8 (*swizzle)(struct pci_dev *, u8 *),
+			   int (*map_irq)(const struct pci_dev *, u8, u8))
 {
 	u8 pin, slot;
 	int irq = 0;
@@ -58,11 +57,11 @@
 	pcibios_update_irq(dev, irq);
 }
 
-void
-pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
-	       int (*map_irq)(const struct pci_dev *, u8, u8))
+void pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
+		    int (*map_irq)(const struct pci_dev *, u8, u8))
 {
 	struct pci_dev *dev = NULL;
+
 	for_each_pci_dev(dev)
 		pdev_fixup_irq(dev, swizzle, map_irq);
 }

diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 33f9e32..caed1ce 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c

@@ -96,8 +96,8 @@
 		pci_write_config_dword(dev, reg + 4, new);
 		pci_read_config_dword(dev, reg + 4, &check);
 		if (check != new) {
-			dev_err(&dev->dev, "BAR %d: error updating "
-			       "(high %#08x != %#08x)\n", resno, new, check);
+			dev_err(&dev->dev, "BAR %d: error updating (high %#08x != %#08x)\n",
+				resno, new, check);
 		}
 	}
 
@@ -289,8 +289,8 @@
 	res->flags |= IORESOURCE_UNSET;
 	align = pci_resource_alignment(dev, res);
 	if (!align) {
-		dev_info(&dev->dev, "BAR %d: can't assign %pR "
-			 "(bogus alignment)\n", resno, res);
+		dev_info(&dev->dev, "BAR %d: can't assign %pR (bogus alignment)\n",
+			 resno, res);
 		return -EINVAL;
 	}
 
@@ -314,6 +314,7 @@
 	}
 	return ret;
 }
+EXPORT_SYMBOL(pci_assign_resource);
 
 int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize,
 			resource_size_t min_align)
@@ -324,8 +325,8 @@
 
 	res->flags |= IORESOURCE_UNSET;
 	if (!res->parent) {
-		dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resource %pR "
-			 "\n", resno, res);
+		dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resource %pR\n",
+			 resno, res);
 		return -EINVAL;
 	}
 

diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index 24750a1..b91c4da 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c

@@ -99,7 +99,7 @@
 	if (!dev)
 		return -ENODEV;
 
-	switch(len) {
+	switch (len) {
 	case 1:
 		err = get_user(byte, (u8 __user *)buf);
 		if (err)

diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 7f3aad0..7f1a2e2 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c

@@ -84,21 +84,19 @@
 	I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
 };
 
+static int mxt_t19_keys[] = {
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	BTN_LEFT
+};
+
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.x_line			= 18,
-	.y_line			= 12,
-	.x_size			= 102*20,
-	.y_size			= 68*20,
-	.blen			= 0x80,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x32,
-	.voltage		= 0,	/* 3.3V */
-	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= true,
-	.key_map		= { KEY_RESERVED,
-				    KEY_RESERVED,
-				    KEY_RESERVED,
-				    BTN_LEFT },
+	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
+	.t19_keymap		= mxt_t19_keys,
 	.config			= NULL,
 	.config_length		= 0,
 };
@@ -110,16 +108,7 @@
 };
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
-	.x_line			= 32,
-	.y_line			= 50,
-	.x_size			= 1700,
-	.y_size			= 2560,
-	.blen			= 0x89,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x28,
-	.voltage		= 0,	/* 3.3V */
-	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= false,
 	.config			= NULL,
 	.config_length		= 0,
 };

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 27df2c5..172f26c 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig

@@ -102,7 +102,7 @@
 	default n
 	---help---
 	This driver adds support for rfkill and backlight control to Dell
-	laptops.
+	laptops (except for some models covered by the Compal driver).
 
 config DELL_WMI
 	tristate "Dell WMI extras"
@@ -127,6 +127,16 @@
 	  To compile this driver as a module, choose M here: the module will
 	  be called dell-wmi-aio.
 
+config DELL_SMO8800
+	tristate "Dell Latitude freefall driver (ACPI SMO8800/SMO8810)"
+	depends on ACPI
+	---help---
+	  Say Y here if you want to support SMO8800/SMO8810 freefall device
+	  on Dell Latitude laptops.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called dell-smo8800.
+
 
 config FUJITSU_LAPTOP
 	tristate "Fujitsu Laptop Extras"
@@ -265,23 +275,21 @@
 	  R2, R3, R5, T2, W2 and Y2 series), say Y.
 
 config COMPAL_LAPTOP
-	tristate "Compal Laptop Extras"
+	tristate "Compal (and others) Laptop Extras"
 	depends on ACPI
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on RFKILL
 	depends on HWMON
 	depends on POWER_SUPPLY
 	---help---
-	  This is a driver for laptops built by Compal:
+	  This is a driver for laptops built by Compal, and some models by
+	  other brands (e.g. Dell, Toshiba).
 
-	  Compal FL90/IFL90
-	  Compal FL91/IFL91
-	  Compal FL92/JFL92
-	  Compal FT00/IFT00
+	  It adds support for rfkill, Bluetooth, WLAN and LCD brightness
+	  control.
 
-	  It adds support for Bluetooth, WLAN and LCD brightness control.
-
-	  If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here.
+	  For a (possibly incomplete) list of supported laptops, please refer
+	  to: Documentation/platform/x86-laptop-drivers.txt
 
 config SONY_LAPTOP
 	tristate "Sony Laptop Extras"
@@ -724,7 +732,7 @@
 
 config XO1_RFKILL
 	tristate "OLPC XO-1 software RF kill switch"
-	depends on OLPC
+	depends on OLPC || COMPILE_TEST
 	depends on RFKILL
 	---help---
 	  Support for enabling/disabling the WLAN interface on the OLPC XO-1
@@ -732,6 +740,7 @@
 
 config XO15_EBOOK
 	tristate "OLPC XO-1.5 ebook switch"
+	depends on OLPC || COMPILE_TEST
 	depends on ACPI && INPUT
 	---help---
 	  Support for the ebook switch on the OLPC XO-1.5 laptop.

diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 1a2eafc..c4ca428 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile

@@ -13,6 +13,7 @@
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_AIO)	+= dell-wmi-aio.o
+obj-$(CONFIG_DELL_SMO8800)	+= dell-smo8800.o
 obj-$(CONFIG_ACER_WMI)		+= acer-wmi.o
 obj-$(CONFIG_ACERHDF)		+= acerhdf.o
 obj-$(CONFIG_HP_ACCEL)		+= hp_accel.o

diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 541f951..297b664 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c

@@ -32,6 +32,7 @@
 #define WMAX_METHOD_HDMI_STATUS		0x2
 #define WMAX_METHOD_BRIGHTNESS		0x3
 #define WMAX_METHOD_ZONE_CONTROL	0x4
+#define WMAX_METHOD_HDMI_CABLE		0x5
 
 MODULE_AUTHOR("Mario Limonciello <mario_limonciello@dell.com>");
 MODULE_DESCRIPTION("Alienware special feature control");
@@ -350,12 +351,11 @@
 	char *name;
 
 	if (interface == WMAX) {
-		global_led.max_brightness = 100;
 		lighting_control_state = WMAX_RUNNING;
 	} else if (interface == LEGACY) {
-		global_led.max_brightness = 0x0F;
 		lighting_control_state = LEGACY_RUNNING;
 	}
+	global_led.max_brightness = 0x0F;
 	global_brightness = global_led.max_brightness;
 
 	/*
@@ -423,41 +423,85 @@
 	The HDMI mux sysfs node indicates the status of the HDMI input mux.
 	It can toggle between standard system GPU output and HDMI input.
 */
-static ssize_t show_hdmi(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static acpi_status alienware_hdmi_command(struct hdmi_args *in_args,
+					  u32 command, int *out_data)
 {
 	acpi_status status;
-	struct acpi_buffer input;
 	union acpi_object *obj;
-	u32 tmp = 0;
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer input;
+	struct acpi_buffer output;
+
+	input.length = (acpi_size) sizeof(*in_args);
+	input.pointer = in_args;
+	if (out_data != NULL) {
+		output.length = ACPI_ALLOCATE_BUFFER;
+		output.pointer = NULL;
+		status = wmi_evaluate_method(WMAX_CONTROL_GUID, 1,
+					     command, &input, &output);
+	} else
+		status = wmi_evaluate_method(WMAX_CONTROL_GUID, 1,
+					     command, &input, NULL);
+
+	if (ACPI_SUCCESS(status) && out_data != NULL) {
+		obj = (union acpi_object *)output.pointer;
+		if (obj && obj->type == ACPI_TYPE_INTEGER)
+			*out_data = (u32) obj->integer.value;
+	}
+	return status;
+
+}
+
+static ssize_t show_hdmi_cable(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
 	struct hdmi_args in_args = {
 		.arg = 0,
 	};
-	input.length = (acpi_size) sizeof(in_args);
-	input.pointer = &in_args;
-	status = wmi_evaluate_method(WMAX_CONTROL_GUID, 1,
-				     WMAX_METHOD_HDMI_STATUS, &input, &output);
+	status =
+	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_CABLE,
+				   (u32 *) &out_data);
+	if (ACPI_SUCCESS(status)) {
+		if (out_data == 0)
+			return scnprintf(buf, PAGE_SIZE,
+					 "[unconnected] connected unknown\n");
+		else if (out_data == 1)
+			return scnprintf(buf, PAGE_SIZE,
+					 "unconnected [connected] unknown\n");
+	}
+	pr_err("alienware-wmi: unknown HDMI cable status: %d\n", status);
+	return scnprintf(buf, PAGE_SIZE, "unconnected connected [unknown]\n");
+}
+
+static ssize_t show_hdmi_source(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
+	struct hdmi_args in_args = {
+		.arg = 0,
+	};
+	status =
+	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_STATUS,
+				   (u32 *) &out_data);
 
 	if (ACPI_SUCCESS(status)) {
-		obj = (union acpi_object *)output.pointer;
-		if (obj && obj->type == ACPI_TYPE_INTEGER)
-			tmp = (u32) obj->integer.value;
-		if (tmp == 1)
+		if (out_data == 1)
 			return scnprintf(buf, PAGE_SIZE,
 					 "[input] gpu unknown\n");
-		else if (tmp == 2)
+		else if (out_data == 2)
 			return scnprintf(buf, PAGE_SIZE,
 					 "input [gpu] unknown\n");
 	}
-	pr_err("alienware-wmi: unknown HDMI status: %d\n", status);
+	pr_err("alienware-wmi: unknown HDMI source status: %d\n", out_data);
 	return scnprintf(buf, PAGE_SIZE, "input gpu [unknown]\n");
 }
 
-static ssize_t toggle_hdmi(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t count)
+static ssize_t toggle_hdmi_source(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
-	struct acpi_buffer input;
 	acpi_status status;
 	struct hdmi_args args;
 	if (strcmp(buf, "gpu\n") == 0)
@@ -467,33 +511,46 @@
 	else
 		args.arg = 3;
 	pr_debug("alienware-wmi: setting hdmi to %d : %s", args.arg, buf);
-	input.length = (acpi_size) sizeof(args);
-	input.pointer = &args;
-	status = wmi_evaluate_method(WMAX_CONTROL_GUID, 1,
-				     WMAX_METHOD_HDMI_SOURCE, &input, NULL);
+
+	status = alienware_hdmi_command(&args, WMAX_METHOD_HDMI_SOURCE, NULL);
+
 	if (ACPI_FAILURE(status))
 		pr_err("alienware-wmi: HDMI toggle failed: results: %u\n",
 		       status);
 	return count;
 }
 
-static DEVICE_ATTR(hdmi, S_IRUGO | S_IWUSR, show_hdmi, toggle_hdmi);
+static DEVICE_ATTR(cable, S_IRUGO, show_hdmi_cable, NULL);
+static DEVICE_ATTR(source, S_IRUGO | S_IWUSR, show_hdmi_source,
+		   toggle_hdmi_source);
 
-static void remove_hdmi(struct platform_device *device)
+static struct attribute *hdmi_attrs[] = {
+	&dev_attr_cable.attr,
+	&dev_attr_source.attr,
+	NULL,
+};
+
+static struct attribute_group hdmi_attribute_group = {
+	.name = "hdmi",
+	.attrs = hdmi_attrs,
+};
+
+static void remove_hdmi(struct platform_device *dev)
 {
-	device_remove_file(&device->dev, &dev_attr_hdmi);
+	sysfs_remove_group(&dev->dev.kobj, &hdmi_attribute_group);
 }
 
-static int create_hdmi(void)
+static int create_hdmi(struct platform_device *dev)
 {
-	int ret = -ENOMEM;
-	ret = device_create_file(&platform_device->dev, &dev_attr_hdmi);
+	int ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &hdmi_attribute_group);
 	if (ret)
 		goto error_create_hdmi;
 	return 0;
 
 error_create_hdmi:
-	remove_hdmi(platform_device);
+	remove_hdmi(dev);
 	return ret;
 }
 
@@ -527,7 +584,7 @@
 		goto fail_platform_device2;
 
 	if (interface == WMAX) {
-		ret = create_hdmi();
+		ret = create_hdmi(platform_device);
 		if (ret)
 			goto fail_prep_hdmi;
 	}

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 563f59e..ddf0eef 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c

@@ -137,6 +137,15 @@
 	},
 	{
 		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X550CA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X550CA"),
+		},
+		.driver_data = &quirk_asus_x401u,
+	},
+	{
+		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. X55A",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 91ef69a..3c6cced 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c

@@ -266,7 +266,7 @@
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	acpi_status status;
 	union acpi_object *obj;
-	u32 tmp;
+	u32 tmp = 0;
 
 	status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, 1, method_id,
 				     &input, &output);
@@ -277,8 +277,6 @@
 	obj = (union acpi_object *)output.pointer;
 	if (obj && obj->type == ACPI_TYPE_INTEGER)
 		tmp = (u32) obj->integer.value;
-	else
-		tmp = 0;
 
 	if (retval)
 		*retval = tmp;

diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c
new file mode 100644
index 0000000..a653716
--- /dev/null
+++ b/drivers/platform/x86/dell-smo8800.c

@@ -0,0 +1,233 @@
+/*
+ *  dell-smo8800.c - Dell Latitude ACPI SMO8800/SMO8810 freefall sensor driver
+ *
+ *  Copyright (C) 2012 Sonal Santan <sonal.santan@gmail.com>
+ *  Copyright (C) 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ *  This is loosely based on lis3lv02d driver.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#define DRIVER_NAME "smo8800"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+
+struct smo8800_device {
+	u32 irq;                     /* acpi device irq */
+	atomic_t counter;            /* count after last read */
+	struct miscdevice miscdev;   /* for /dev/freefall */
+	unsigned long misc_opened;   /* whether the device is open */
+	wait_queue_head_t misc_wait; /* Wait queue for the misc dev */
+	struct device *dev;          /* acpi device */
+};
+
+static irqreturn_t smo8800_interrupt_quick(int irq, void *data)
+{
+	struct smo8800_device *smo8800 = data;
+
+	atomic_inc(&smo8800->counter);
+	wake_up_interruptible(&smo8800->misc_wait);
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t smo8800_interrupt_thread(int irq, void *data)
+{
+	struct smo8800_device *smo8800 = data;
+
+	dev_info(smo8800->dev, "detected free fall\n");
+	return IRQ_HANDLED;
+}
+
+static acpi_status smo8800_get_resource(struct acpi_resource *resource,
+					void *context)
+{
+	struct acpi_resource_extended_irq *irq;
+
+	if (resource->type != ACPI_RESOURCE_TYPE_EXTENDED_IRQ)
+		return AE_OK;
+
+	irq = &resource->data.extended_irq;
+	if (!irq || !irq->interrupt_count)
+		return AE_OK;
+
+	*((u32 *)context) = irq->interrupts[0];
+	return AE_CTRL_TERMINATE;
+}
+
+static u32 smo8800_get_irq(struct acpi_device *device)
+{
+	u32 irq = 0;
+	acpi_status status;
+
+	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+				     smo8800_get_resource, &irq);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&device->dev, "acpi_walk_resources failed\n");
+		return 0;
+	}
+
+	return irq;
+}
+
+static ssize_t smo8800_misc_read(struct file *file, char __user *buf,
+				 size_t count, loff_t *pos)
+{
+	struct smo8800_device *smo8800 = container_of(file->private_data,
+					 struct smo8800_device, miscdev);
+
+	u32 data = 0;
+	unsigned char byte_data = 0;
+	ssize_t retval = 1;
+
+	if (count < 1)
+		return -EINVAL;
+
+	atomic_set(&smo8800->counter, 0);
+	retval = wait_event_interruptible(smo8800->misc_wait,
+				(data = atomic_xchg(&smo8800->counter, 0)));
+
+	if (retval)
+		return retval;
+
+	byte_data = 1;
+	retval = 1;
+
+	if (data < 255)
+		byte_data = data;
+	else
+		byte_data = 255;
+
+	if (put_user(byte_data, buf))
+		retval = -EFAULT;
+
+	return retval;
+}
+
+static int smo8800_misc_open(struct inode *inode, struct file *file)
+{
+	struct smo8800_device *smo8800 = container_of(file->private_data,
+					 struct smo8800_device, miscdev);
+
+	if (test_and_set_bit(0, &smo8800->misc_opened))
+		return -EBUSY; /* already open */
+
+	atomic_set(&smo8800->counter, 0);
+	return 0;
+}
+
+static int smo8800_misc_release(struct inode *inode, struct file *file)
+{
+	struct smo8800_device *smo8800 = container_of(file->private_data,
+					 struct smo8800_device, miscdev);
+
+	clear_bit(0, &smo8800->misc_opened); /* release the device */
+	return 0;
+}
+
+static const struct file_operations smo8800_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = smo8800_misc_read,
+	.open = smo8800_misc_open,
+	.release = smo8800_misc_release,
+};
+
+static int smo8800_add(struct acpi_device *device)
+{
+	int err;
+	struct smo8800_device *smo8800;
+
+	smo8800 = devm_kzalloc(&device->dev, sizeof(*smo8800), GFP_KERNEL);
+	if (!smo8800) {
+		dev_err(&device->dev, "failed to allocate device data\n");
+		return -ENOMEM;
+	}
+
+	smo8800->dev = &device->dev;
+	smo8800->miscdev.minor = MISC_DYNAMIC_MINOR;
+	smo8800->miscdev.name = "freefall";
+	smo8800->miscdev.fops = &smo8800_misc_fops;
+
+	init_waitqueue_head(&smo8800->misc_wait);
+
+	err = misc_register(&smo8800->miscdev);
+	if (err) {
+		dev_err(&device->dev, "failed to register misc dev: %d\n", err);
+		return err;
+	}
+
+	device->driver_data = smo8800;
+
+	smo8800->irq = smo8800_get_irq(device);
+	if (!smo8800->irq) {
+		dev_err(&device->dev, "failed to obtain IRQ\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	err = request_threaded_irq(smo8800->irq, smo8800_interrupt_quick,
+				   smo8800_interrupt_thread,
+				   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				   DRIVER_NAME, smo8800);
+	if (err) {
+		dev_err(&device->dev,
+			"failed to request thread for IRQ %d: %d\n",
+			smo8800->irq, err);
+		goto error;
+	}
+
+	dev_dbg(&device->dev, "device /dev/freefall registered with IRQ %d\n",
+		 smo8800->irq);
+	return 0;
+
+error:
+	misc_deregister(&smo8800->miscdev);
+	return err;
+}
+
+static int smo8800_remove(struct acpi_device *device)
+{
+	struct smo8800_device *smo8800 = device->driver_data;
+
+	free_irq(smo8800->irq, smo8800);
+	misc_deregister(&smo8800->miscdev);
+	dev_dbg(&device->dev, "device /dev/freefall unregistered\n");
+	return 0;
+}
+
+static const struct acpi_device_id smo8800_ids[] = {
+	{ "SMO8800", 0 },
+	{ "SMO8810", 0 },
+	{ "", 0 },
+};
+
+MODULE_DEVICE_TABLE(acpi, smo8800_ids);
+
+static struct acpi_driver smo8800_driver = {
+	.name = DRIVER_NAME,
+	.class = "Latitude",
+	.ids = smo8800_ids,
+	.ops = {
+		.add = smo8800_add,
+		.remove = smo8800_remove,
+	},
+	.owner = THIS_MODULE,
+};
+
+module_acpi_driver(smo8800_driver);
+
+MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO8800/SMO8810)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sonal Santan, Pali Rohár");

diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 8ba8956..484a867 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c

@@ -53,6 +53,7 @@
 #define HPWMI_ALS_QUERY 0x3
 #define HPWMI_HARDWARE_QUERY 0x4
 #define HPWMI_WIRELESS_QUERY 0x5
+#define HPWMI_BIOS_QUERY 0x9
 #define HPWMI_HOTKEY_QUERY 0xc
 #define HPWMI_FEATURE_QUERY 0xd
 #define HPWMI_WIRELESS2_QUERY 0x1b
@@ -144,6 +145,7 @@
 	{ KE_KEY, 0x2142, { KEY_MEDIA } },
 	{ KE_KEY, 0x213b, { KEY_INFO } },
 	{ KE_KEY, 0x2169, { KEY_DIRECTION } },
+	{ KE_KEY, 0x216a, { KEY_SETUP } },
 	{ KE_KEY, 0x231b, { KEY_HELP } },
 	{ KE_END, 0 }
 };
@@ -304,6 +306,19 @@
 	return (state & 0x10) ? 1 : 0;
 }
 
+static int hp_wmi_enable_hotkeys(void)
+{
+	int ret;
+	int query = 0x6e;
+
+	ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query),
+				   0);
+
+	if (ret)
+		return -EINVAL;
+	return 0;
+}
+
 static int hp_wmi_set_block(void *data, bool blocked)
 {
 	enum hp_wmi_radio r = (enum hp_wmi_radio) data;
@@ -648,6 +663,9 @@
 			    hp_wmi_tablet_state());
 	input_sync(hp_wmi_input_dev);
 
+	if (hp_wmi_bios_2009_later() == 4)
+		hp_wmi_enable_hotkeys();
+
 	status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL);
 	if (ACPI_FAILURE(status)) {
 		err = -EIO;

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 6dd060a..b4c495a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c

@@ -36,6 +36,8 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/i8042.h>
+#include <linux/dmi.h>
+#include <linux/device.h>
 
 #define IDEAPAD_RFKILL_DEV_NUM	(3)
 
@@ -819,6 +821,19 @@
 	}
 }
 
+/* Blacklist for devices where the ideapad rfkill interface does not work */
+static struct dmi_system_id rfkill_blacklist[] = {
+	/* The Lenovo Yoga 2 11 always reports everything as blocked */
+	{
+		.ident = "Lenovo Yoga 2 11",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 2 11"),
+		},
+	},
+	{}
+};
+
 static int ideapad_acpi_add(struct platform_device *pdev)
 {
 	int ret, i;
@@ -833,7 +848,7 @@
 	if (read_method_int(adev->handle, "_CFG", &cfg))
 		return -ENODEV;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
@@ -844,7 +859,7 @@
 
 	ret = ideapad_sysfs_init(priv);
 	if (ret)
-		goto sysfs_failed;
+		return ret;
 
 	ret = ideapad_debugfs_init(priv);
 	if (ret)
@@ -854,11 +869,10 @@
 	if (ret)
 		goto input_failed;
 
-	for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) {
-		if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg))
-			ideapad_register_rfkill(priv, i);
-		else
-			priv->rfk[i] = NULL;
+	if (!dmi_check_system(rfkill_blacklist)) {
+		for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++)
+			if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg))
+				ideapad_register_rfkill(priv, i);
 	}
 	ideapad_sync_rfk_state(priv);
 	ideapad_sync_touchpad_state(priv);
@@ -884,8 +898,6 @@
 	ideapad_debugfs_exit(priv);
 debugfs_failed:
 	ideapad_sysfs_exit(priv);
-sysfs_failed:
-	kfree(priv);
 	return ret;
 }
 
@@ -903,7 +915,6 @@
 	ideapad_debugfs_exit(priv);
 	ideapad_sysfs_exit(priv);
 	dev_set_drvdata(&pdev->dev, NULL);
-	kfree(priv);
 
 	return 0;
 }

diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index 93fab8b..ab7860a 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c

@@ -481,7 +481,8 @@
 	int i;
 	struct platform_info *pinfo;
 
-	pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL);
+	pinfo = devm_kzalloc(&pdev->dev, sizeof(struct platform_info),
+			     GFP_KERNEL);
 	if (!pinfo)
 		return -ENOMEM;
 
@@ -489,7 +490,6 @@
 	ret = mid_initialize_adc(&pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "ADC init failed");
-		kfree(pinfo);
 		return ret;
 	}
 
@@ -520,7 +520,6 @@
 		thermal_zone_device_unregister(pinfo->tzd[i]);
 	}
 	configure_adc(0);
-	kfree(pinfo);
 	return ret;
 }
 
@@ -541,8 +540,6 @@
 		thermal_zone_device_unregister(pinfo->tzd[i]);
 	}
 
-	kfree(pinfo);
-
 	/* Stop the ADC */
 	return configure_adc(0);
 }

diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index 2805988..40929e4 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c

@@ -91,7 +91,7 @@
 
 static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
-	if (offset > 8) {
+	if (offset >= 8) {
 		pr_err("only pin 0-7 support input\n");
 		return -1;/* we only have 8 GPIO can use as input */
 	}
@@ -130,7 +130,7 @@
 	int ret;
 
 	/* we only have 8 GPIO pins we can use as input */
-	if (offset > 8)
+	if (offset >= 8)
 		return -EOPNOTSUPP;
 	ret = intel_scu_ipc_ioread8(GPIO0 + offset, &r);
 	if (ret < 0)

diff --git a/drivers/platform/x86/pvpanic.c b/drivers/platform/x86/pvpanic.c
index c9f6e51..073a90a 100644
--- a/drivers/platform/x86/pvpanic.c
+++ b/drivers/platform/x86/pvpanic.c

@@ -70,6 +70,7 @@
 
 static struct notifier_block pvpanic_panic_nb = {
 	.notifier_call = pvpanic_panic_notify,
+	.priority = 1, /* let this called before broken drm_fb_helper */
 };
 
 

diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index d1f03005..5a59665 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c

@@ -27,6 +27,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/efi.h>
+#include <linux/suspend.h>
 #include <acpi/video.h>
 
 /*
@@ -340,6 +341,8 @@
 	struct samsung_laptop_debug debug;
 	struct samsung_quirks *quirks;
 
+	struct notifier_block pm_nb;
+
 	bool handle_backlight;
 	bool has_stepping_quirk;
 
@@ -348,6 +351,8 @@
 
 struct samsung_quirks {
 	bool broken_acpi_video;
+	bool four_kbd_backlight_levels;
+	bool enable_kbd_backlight;
 };
 
 static struct samsung_quirks samsung_unknown = {};
@@ -356,6 +361,11 @@
 	.broken_acpi_video = true,
 };
 
+static struct samsung_quirks samsung_np740u3e = {
+	.four_kbd_backlight_levels = true,
+	.enable_kbd_backlight = true,
+};
+
 static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force,
@@ -1051,6 +1061,8 @@
 		samsung->kbd_led.brightness_set = kbd_led_set;
 		samsung->kbd_led.brightness_get = kbd_led_get;
 		samsung->kbd_led.max_brightness = 8;
+		if (samsung->quirks->four_kbd_backlight_levels)
+			samsung->kbd_led.max_brightness = 4;
 
 		ret = led_classdev_register(&samsung->platform_device->dev,
 					   &samsung->kbd_led);
@@ -1414,6 +1426,19 @@
 	}
 }
 
+static int samsung_pm_notification(struct notifier_block *nb,
+				   unsigned long val, void *ptr)
+{
+	struct samsung_laptop *samsung;
+
+	samsung = container_of(nb, struct samsung_laptop, pm_nb);
+	if (val == PM_POST_HIBERNATION &&
+	    samsung->quirks->enable_kbd_backlight)
+		kbd_backlight_enable(samsung);
+
+	return 0;
+}
+
 static int __init samsung_platform_init(struct samsung_laptop *samsung)
 {
 	struct platform_device *pdev;
@@ -1534,6 +1559,15 @@
 		},
 	 .driver_data = &samsung_broken_acpi_video,
 	},
+	{
+	 .callback = samsung_dmi_matched,
+	 .ident = "730U3E/740U3E",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "730U3E/740U3E"),
+		},
+	 .driver_data = &samsung_np740u3e,
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(dmi, samsung_dmi_table);
@@ -1608,6 +1642,9 @@
 	if (ret)
 		goto error_debugfs;
 
+	samsung->pm_nb.notifier_call = samsung_pm_notification;
+	register_pm_notifier(&samsung->pm_nb);
+
 	samsung_platform_device = samsung->platform_device;
 	return ret;
 
@@ -1633,6 +1670,7 @@
 	struct samsung_laptop *samsung;
 
 	samsung = platform_get_drvdata(samsung_platform_device);
+	unregister_pm_notifier(&samsung->pm_nb);
 
 	samsung_debugfs_exit(samsung);
 	samsung_leds_exit(samsung);

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 15e61c1..d82f196 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c

@@ -3171,8 +3171,10 @@
 		KEY_MICMUTE,	/* 0x1a: Mic mute (since ?400 or so) */
 
 		/* (assignments unknown, please report if found) */
-		KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
 		KEY_UNKNOWN,
+
+		/* Extra keys in use since the X240 / T440 / T540 */
+		KEY_CONFIG, KEY_SEARCH, KEY_SCALE, KEY_COMPUTER,
 		},
 	};
 

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 46473ca..76441dc 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c

@@ -56,6 +56,7 @@
 #include <linux/workqueue.h>
 #include <linux/i8042.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/uaccess.h>
 
 MODULE_AUTHOR("John Belmonte");
@@ -213,6 +214,30 @@
 	{ KE_END, 0 },
 };
 
+/* alternative keymap */
+static const struct dmi_system_id toshiba_alt_keymap_dmi[] = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Satellite M840"),
+		},
+	},
+	{}
+};
+
+static const struct key_entry toshiba_acpi_alt_keymap[] = {
+	{ KE_KEY, 0x157, { KEY_MUTE } },
+	{ KE_KEY, 0x102, { KEY_ZOOMOUT } },
+	{ KE_KEY, 0x103, { KEY_ZOOMIN } },
+	{ KE_KEY, 0x139, { KEY_ZOOMRESET } },
+	{ KE_KEY, 0x13e, { KEY_SWITCHVIDEOMODE } },
+	{ KE_KEY, 0x13c, { KEY_BRIGHTNESSDOWN } },
+	{ KE_KEY, 0x13d, { KEY_BRIGHTNESSUP } },
+	{ KE_KEY, 0x158, { KEY_WLAN } },
+	{ KE_KEY, 0x13f, { KEY_TOUCHPAD_TOGGLE } },
+	{ KE_END, 0 },
+};
+
 /* utility
  */
 
@@ -1440,6 +1465,7 @@
 	acpi_handle ec_handle;
 	int error;
 	u32 hci_result;
+	const struct key_entry *keymap = toshiba_acpi_keymap;
 
 	dev->hotkey_dev = input_allocate_device();
 	if (!dev->hotkey_dev)
@@ -1449,7 +1475,9 @@
 	dev->hotkey_dev->phys = "toshiba_acpi/input0";
 	dev->hotkey_dev->id.bustype = BUS_HOST;
 
-	error = sparse_keymap_setup(dev->hotkey_dev, toshiba_acpi_keymap, NULL);
+	if (dmi_check_system(toshiba_alt_keymap_dmi))
+		keymap = toshiba_acpi_alt_keymap;
+	error = sparse_keymap_setup(dev->hotkey_dev, keymap, NULL);
 	if (error)
 		goto err_free_dev;
 

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index e25d2bc..296b0ec 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c

@@ -142,7 +142,10 @@
 		delta = ktime_to_ns(kt);
 		err = ops->adjtime(ops, delta);
 	} else if (tx->modes & ADJ_FREQUENCY) {
-		err = ops->adjfreq(ops, scaled_ppm_to_ppb(tx->freq));
+		s32 ppb = scaled_ppm_to_ppb(tx->freq);
+		if (ppb > ops->max_adj || ppb < -ops->max_adj)
+			return -ERANGE;
+		err = ops->adjfreq(ops, ppb);
 		ptp->dialed_frequency = tx->freq;
 	} else if (tx->modes == 0) {
 		tx->freq = ptp->dialed_frequency;

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 5b34ff2..4ad7b89 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig

@@ -62,6 +62,15 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-atmel-tcb.
 
+config PWM_BCM_KONA
+	tristate "Kona PWM support"
+	depends on ARCH_BCM_MOBILE
+	help
+	  Generic PWM framework driver for Broadcom Kona PWM block.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-bcm-kona.
+
 config PWM_BFIN
 	tristate "Blackfin PWM support"
 	depends on BFIN_GPTIMERS

diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index e57d2c3..5c86a19 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile

@@ -3,6 +3,7 @@
 obj-$(CONFIG_PWM_AB8500)	+= pwm-ab8500.o
 obj-$(CONFIG_PWM_ATMEL)		+= pwm-atmel.o
 obj-$(CONFIG_PWM_ATMEL_TCB)	+= pwm-atmel-tcb.o
+obj-$(CONFIG_PWM_BCM_KONA)	+= pwm-bcm-kona.o
 obj-$(CONFIG_PWM_BFIN)		+= pwm-bfin.o
 obj-$(CONFIG_PWM_CLPS711X)	+= pwm-clps711x.o
 obj-$(CONFIG_PWM_EP93XX)	+= pwm-ep93xx.o

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index a804713..4b66bf0 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c

@@ -661,10 +661,16 @@
 		}
 	}
 
+	mutex_unlock(&pwm_lookup_lock);
+
 	if (chip)
 		pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id);
+	if (IS_ERR(pwm))
+		return pwm;
 
-	mutex_unlock(&pwm_lookup_lock);
+	pwm_set_period(pwm, p->period);
+	pwm_set_polarity(pwm, p->polarity);
+
 
 	return pwm;
 }

diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
index 1d07a6f..4c07a84 100644
--- a/drivers/pwm/pwm-ab8500.c
+++ b/drivers/pwm/pwm-ab8500.c

@@ -20,10 +20,6 @@
 #define AB8500_PWM_OUT_CTRL2_REG	0x61
 #define AB8500_PWM_OUT_CTRL7_REG	0x66
 
-/* backlight driver constants */
-#define ENABLE_PWM			1
-#define DISABLE_PWM			0
-
 struct ab8500_pwm_chip {
 	struct pwm_chip chip;
 };
@@ -64,7 +60,7 @@
 
 	ret = abx500_mask_and_set_register_interruptible(chip->dev,
 				AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
-				1 << (chip->base - 1), ENABLE_PWM);
+				1 << (chip->base - 1), 1 << (chip->base - 1));
 	if (ret < 0)
 		dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
 							pwm->label, ret);
@@ -77,11 +73,10 @@
 
 	ret = abx500_mask_and_set_register_interruptible(chip->dev,
 				AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
-				1 << (chip->base - 1), DISABLE_PWM);
+				1 << (chip->base - 1), 0);
 	if (ret < 0)
 		dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
 							pwm->label, ret);
-	return;
 }
 
 static const struct pwm_ops ab8500_pwm_ops = {
@@ -101,10 +96,8 @@
 	 * device which is required for ab8500 read and write
 	 */
 	ab8500 = devm_kzalloc(&pdev->dev, sizeof(*ab8500), GFP_KERNEL);
-	if (ab8500 == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (ab8500 == NULL)
 		return -ENOMEM;
-	}
 
 	ab8500->chip.dev = &pdev->dev;
 	ab8500->chip.ops = &ab8500_pwm_ops;

diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 0adc952..6e700a5 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c

@@ -357,6 +357,7 @@
 
 	atmel_pwm->chip.base = -1;
 	atmel_pwm->chip.npwm = 4;
+	atmel_pwm->chip.can_sleep = true;
 	atmel_pwm->config = data->config;
 
 	ret = pwmchip_add(&atmel_pwm->chip);

diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c
new file mode 100644
index 0000000..02bc048
--- /dev/null
+++ b/drivers/pwm/pwm-bcm-kona.c

@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+/*
+ * The Kona PWM has some unusual characteristics.  Here are the main points.
+ *
+ * 1) There is no disable bit and the hardware docs advise programming a zero
+ *    duty to achieve output equivalent to that of a normal disable operation.
+ *
+ * 2) Changes to prescale, duty, period, and polarity do not take effect until
+ *    a subsequent rising edge of the trigger bit.
+ *
+ * 3) If the smooth bit and trigger bit are both low, the output is a constant
+ *    high signal.  Otherwise, the earlier waveform continues to be output.
+ *
+ * 4) If the smooth bit is set on the rising edge of the trigger bit, output
+ *    will transition to the new settings on a period boundary (which could be
+ *    seconds away).  If the smooth bit is clear, new settings will be applied
+ *    as soon as possible (the hardware always has a 400ns delay).
+ *
+ * 5) When the external clock that feeds the PWM is disabled, output is pegged
+ *    high or low depending on its state at that exact instant.
+ */
+
+#define PWM_CONTROL_OFFSET			(0x00000000)
+#define PWM_CONTROL_SMOOTH_SHIFT(chan)		(24 + (chan))
+#define PWM_CONTROL_TYPE_SHIFT(chan)		(16 + (chan))
+#define PWM_CONTROL_POLARITY_SHIFT(chan)	(8 + (chan))
+#define PWM_CONTROL_TRIGGER_SHIFT(chan)		(chan)
+
+#define PRESCALE_OFFSET				(0x00000004)
+#define PRESCALE_SHIFT(chan)			((chan) << 2)
+#define PRESCALE_MASK(chan)			(0x7 << PRESCALE_SHIFT(chan))
+#define PRESCALE_MIN				(0x00000000)
+#define PRESCALE_MAX				(0x00000007)
+
+#define PERIOD_COUNT_OFFSET(chan)		(0x00000008 + ((chan) << 3))
+#define PERIOD_COUNT_MIN			(0x00000002)
+#define PERIOD_COUNT_MAX			(0x00ffffff)
+
+#define DUTY_CYCLE_HIGH_OFFSET(chan)		(0x0000000c + ((chan) << 3))
+#define DUTY_CYCLE_HIGH_MIN			(0x00000000)
+#define DUTY_CYCLE_HIGH_MAX			(0x00ffffff)
+
+struct kona_pwmc {
+	struct pwm_chip chip;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *_chip)
+{
+	return container_of(_chip, struct kona_pwmc, chip);
+}
+
+static void kona_pwmc_apply_settings(struct kona_pwmc *kp, unsigned int chan)
+{
+	unsigned int value = readl(kp->base + PWM_CONTROL_OFFSET);
+
+	/* Clear trigger bit but set smooth bit to maintain old output */
+	value |= 1 << PWM_CONTROL_SMOOTH_SHIFT(chan);
+	value &= ~(1 << PWM_CONTROL_TRIGGER_SHIFT(chan));
+	writel(value, kp->base + PWM_CONTROL_OFFSET);
+
+	/* Set trigger bit and clear smooth bit to apply new settings */
+	value &= ~(1 << PWM_CONTROL_SMOOTH_SHIFT(chan));
+	value |= 1 << PWM_CONTROL_TRIGGER_SHIFT(chan);
+	writel(value, kp->base + PWM_CONTROL_OFFSET);
+}
+
+static int kona_pwmc_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			    int duty_ns, int period_ns)
+{
+	struct kona_pwmc *kp = to_kona_pwmc(chip);
+	u64 val, div, rate;
+	unsigned long prescale = PRESCALE_MIN, pc, dc;
+	unsigned int value, chan = pwm->hwpwm;
+
+	/*
+	 * Find period count, duty count and prescale to suit duty_ns and
+	 * period_ns. This is done according to formulas described below:
+	 *
+	 * period_ns = 10^9 * (PRESCALE + 1) * PC / PWM_CLK_RATE
+	 * duty_ns = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE
+	 *
+	 * PC = (PWM_CLK_RATE * period_ns) / (10^9 * (PRESCALE + 1))
+	 * DC = (PWM_CLK_RATE * duty_ns) / (10^9 * (PRESCALE + 1))
+	 */
+
+	rate = clk_get_rate(kp->clk);
+
+	while (1) {
+		div = 1000000000;
+		div *= 1 + prescale;
+		val = rate * period_ns;
+		pc = div64_u64(val, div);
+		val = rate * duty_ns;
+		dc = div64_u64(val, div);
+
+		/* If duty_ns or period_ns are not achievable then return */
+		if (pc < PERIOD_COUNT_MIN || dc < DUTY_CYCLE_HIGH_MIN)
+			return -EINVAL;
+
+		/* If pc and dc are in bounds, the calculation is done */
+		if (pc <= PERIOD_COUNT_MAX && dc <= DUTY_CYCLE_HIGH_MAX)
+			break;
+
+		/* Otherwise, increase prescale and recalculate pc and dc */
+		if (++prescale > PRESCALE_MAX)
+			return -EINVAL;
+	}
+
+	/* If the PWM channel is enabled, write the settings to the HW */
+	if (test_bit(PWMF_ENABLED, &pwm->flags)) {
+		value = readl(kp->base + PRESCALE_OFFSET);
+		value &= ~PRESCALE_MASK(chan);
+		value |= prescale << PRESCALE_SHIFT(chan);
+		writel(value, kp->base + PRESCALE_OFFSET);
+
+		writel(pc, kp->base + PERIOD_COUNT_OFFSET(chan));
+
+		writel(dc, kp->base + DUTY_CYCLE_HIGH_OFFSET(chan));
+
+		kona_pwmc_apply_settings(kp, chan);
+	}
+
+	return 0;
+}
+
+static int kona_pwmc_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
+				  enum pwm_polarity polarity)
+{
+	struct kona_pwmc *kp = to_kona_pwmc(chip);
+	unsigned int chan = pwm->hwpwm;
+	unsigned int value;
+	int ret;
+
+	ret = clk_prepare_enable(kp->clk);
+	if (ret < 0) {
+		dev_err(chip->dev, "failed to enable clock: %d\n", ret);
+		return ret;
+	}
+
+	value = readl(kp->base + PWM_CONTROL_OFFSET);
+
+	if (polarity == PWM_POLARITY_NORMAL)
+		value |= 1 << PWM_CONTROL_POLARITY_SHIFT(chan);
+	else
+		value &= ~(1 << PWM_CONTROL_POLARITY_SHIFT(chan));
+
+	writel(value, kp->base + PWM_CONTROL_OFFSET);
+
+	kona_pwmc_apply_settings(kp, chan);
+
+	/* Wait for waveform to settle before gating off the clock */
+	ndelay(400);
+
+	clk_disable_unprepare(kp->clk);
+
+	return 0;
+}
+
+static int kona_pwmc_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct kona_pwmc *kp = to_kona_pwmc(chip);
+	int ret;
+
+	ret = clk_prepare_enable(kp->clk);
+	if (ret < 0) {
+		dev_err(chip->dev, "failed to enable clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = kona_pwmc_config(chip, pwm, pwm->duty_cycle, pwm->period);
+	if (ret < 0) {
+		clk_disable_unprepare(kp->clk);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void kona_pwmc_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct kona_pwmc *kp = to_kona_pwmc(chip);
+	unsigned int chan = pwm->hwpwm;
+
+	/* Simulate a disable by configuring for zero duty */
+	writel(0, kp->base + DUTY_CYCLE_HIGH_OFFSET(chan));
+	kona_pwmc_apply_settings(kp, chan);
+
+	/* Wait for waveform to settle before gating off the clock */
+	ndelay(400);
+
+	clk_disable_unprepare(kp->clk);
+}
+
+static const struct pwm_ops kona_pwm_ops = {
+	.config = kona_pwmc_config,
+	.set_polarity = kona_pwmc_set_polarity,
+	.enable = kona_pwmc_enable,
+	.disable = kona_pwmc_disable,
+	.owner = THIS_MODULE,
+};
+
+static int kona_pwmc_probe(struct platform_device *pdev)
+{
+	struct kona_pwmc *kp;
+	struct resource *res;
+	unsigned int chan;
+	unsigned int value = 0;
+	int ret = 0;
+
+	kp = devm_kzalloc(&pdev->dev, sizeof(*kp), GFP_KERNEL);
+	if (kp == NULL)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, kp);
+
+	kp->chip.dev = &pdev->dev;
+	kp->chip.ops = &kona_pwm_ops;
+	kp->chip.base = -1;
+	kp->chip.npwm = 6;
+	kp->chip.of_xlate = of_pwm_xlate_with_flags;
+	kp->chip.of_pwm_n_cells = 3;
+	kp->chip.can_sleep = true;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	kp->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(kp->base))
+		return PTR_ERR(kp->base);
+
+	kp->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(kp->clk)) {
+		dev_err(&pdev->dev, "failed to get clock: %ld\n",
+			PTR_ERR(kp->clk));
+		return PTR_ERR(kp->clk);
+	}
+
+	ret = clk_prepare_enable(kp->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to enable clock: %d\n", ret);
+		return ret;
+	}
+
+	/* Set smooth mode, push/pull, and normal polarity for all channels */
+	for (chan = 0; chan < kp->chip.npwm; chan++) {
+		value |= (1 << PWM_CONTROL_SMOOTH_SHIFT(chan));
+		value |= (1 << PWM_CONTROL_TYPE_SHIFT(chan));
+		value |= (1 << PWM_CONTROL_POLARITY_SHIFT(chan));
+	}
+
+	writel(value, kp->base + PWM_CONTROL_OFFSET);
+
+	clk_disable_unprepare(kp->clk);
+
+	ret = pwmchip_add(&kp->chip);
+	if (ret < 0)
+		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
+
+	return ret;
+}
+
+static int kona_pwmc_remove(struct platform_device *pdev)
+{
+	struct kona_pwmc *kp = platform_get_drvdata(pdev);
+	unsigned int chan;
+
+	for (chan = 0; chan < kp->chip.npwm; chan++)
+		if (test_bit(PWMF_ENABLED, &kp->chip.pwms[chan].flags))
+			clk_disable_unprepare(kp->clk);
+
+	return pwmchip_remove(&kp->chip);
+}
+
+static const struct of_device_id bcm_kona_pwmc_dt[] = {
+	{ .compatible = "brcm,kona-pwm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bcm_kona_pwmc_dt);
+
+static struct platform_driver kona_pwmc_driver = {
+	.driver = {
+		.name = "bcm-kona-pwm",
+		.of_match_table = bcm_kona_pwmc_dt,
+	},
+	.probe = kona_pwmc_probe,
+	.remove = kona_pwmc_remove,
+};
+module_platform_driver(kona_pwmc_driver);
+
+MODULE_AUTHOR("Broadcom Corporation <bcm-kernel-feedback-list@broadcom.com>");
+MODULE_AUTHOR("Tim Kryger <tkryger@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom Kona PWM driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 420169e..a18bc8f 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c

@@ -454,6 +454,7 @@
 	fpc->chip.of_pwm_n_cells = 3;
 	fpc->chip.base = -1;
 	fpc->chip.npwm = 8;
+	fpc->chip.can_sleep = true;
 
 	ret = pwmchip_add(&fpc->chip);
 	if (ret < 0) {

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index cc47733..d797c7b 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c

@@ -241,10 +241,8 @@
 		return -ENODEV;
 
 	imx = devm_kzalloc(&pdev->dev, sizeof(*imx), GFP_KERNEL);
-	if (imx == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (imx == NULL)
 		return -ENOMEM;
-	}
 
 	imx->clk_per = devm_clk_get(&pdev->dev, "per");
 	if (IS_ERR(imx->clk_per)) {

diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
index a40b9c3..2c39b0e 100644
--- a/drivers/pwm/pwm-lp3943.c
+++ b/drivers/pwm/pwm-lp3943.c

@@ -278,6 +278,7 @@
 	lp3943_pwm->chip.dev = &pdev->dev;
 	lp3943_pwm->chip.ops = &lp3943_pwm_ops;
 	lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
+	lp3943_pwm->chip.can_sleep = true;
 
 	platform_set_drvdata(pdev, lp3943_pwm);
 

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 449e372..44ce6c6 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c

@@ -6,6 +6,7 @@
  * Author: Chew Kean Ho <kean.ho.chew@intel.com>
  * Author: Chang Rebecca Swee Fun <rebecca.swee.fun.chang@intel.com>
  * Author: Chew Chiau Ee <chiau.ee.chew@intel.com>
+ * Author: Alan Cox <alan@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +20,9 @@
 #include <linux/module.h>
 #include <linux/pwm.h>
 #include <linux/platform_device.h>
+#include <linux/pci.h>
+
+static int pci_drv, plat_drv;	/* So we know which drivers registered */
 
 #define PWM				0x00000000
 #define PWM_ENABLE			BIT(31)
@@ -34,6 +38,16 @@
 	struct pwm_chip chip;
 	void __iomem *regs;
 	struct clk *clk;
+	unsigned long clk_rate;
+};
+
+struct pwm_lpss_boardinfo {
+	unsigned long clk_rate;
+};
+
+/* BayTrail */
+static const struct pwm_lpss_boardinfo byt_info = {
+	25000000
 };
 
 static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
@@ -55,7 +69,7 @@
 	/* The equation is: base_unit = ((freq / c) * 65536) + correction */
 	base_unit = freq * 65536;
 
-	c = clk_get_rate(lpwm->clk);
+	c = lpwm->clk_rate;
 	if (!c)
 		return -EINVAL;
 
@@ -113,52 +127,48 @@
 	.owner = THIS_MODULE,
 };
 
-static const struct acpi_device_id pwm_lpss_acpi_match[] = {
-	{ "80860F09", 0 },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
-
-static int pwm_lpss_probe(struct platform_device *pdev)
+static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev,
+					    struct resource *r,
+					    const struct pwm_lpss_boardinfo *info)
 {
 	struct pwm_lpss_chip *lpwm;
-	struct resource *r;
 	int ret;
 
-	lpwm = devm_kzalloc(&pdev->dev, sizeof(*lpwm), GFP_KERNEL);
+	lpwm = devm_kzalloc(dev, sizeof(*lpwm), GFP_KERNEL);
 	if (!lpwm)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	lpwm->regs = devm_ioremap_resource(&pdev->dev, r);
+	lpwm->regs = devm_ioremap_resource(dev, r);
 	if (IS_ERR(lpwm->regs))
-		return PTR_ERR(lpwm->regs);
+		return ERR_CAST(lpwm->regs);
 
-	lpwm->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(lpwm->clk)) {
-		dev_err(&pdev->dev, "failed to get PWM clock\n");
-		return PTR_ERR(lpwm->clk);
+	if (info) {
+		lpwm->clk_rate = info->clk_rate;
+	} else {
+		lpwm->clk = devm_clk_get(dev, NULL);
+		if (IS_ERR(lpwm->clk)) {
+			dev_err(dev, "failed to get PWM clock\n");
+			return ERR_CAST(lpwm->clk);
+		}
+		lpwm->clk_rate = clk_get_rate(lpwm->clk);
 	}
 
-	lpwm->chip.dev = &pdev->dev;
+	lpwm->chip.dev = dev;
 	lpwm->chip.ops = &pwm_lpss_ops;
 	lpwm->chip.base = -1;
 	lpwm->chip.npwm = 1;
 
 	ret = pwmchip_add(&lpwm->chip);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-		return ret;
+		dev_err(dev, "failed to add PWM chip: %d\n", ret);
+		return ERR_PTR(ret);
 	}
 
-	platform_set_drvdata(pdev, lpwm);
-	return 0;
+	return lpwm;
 }
 
-static int pwm_lpss_remove(struct platform_device *pdev)
+static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
 {
-	struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
 	u32 ctrl;
 
 	ctrl = readl(lpwm->regs + PWM);
@@ -167,15 +177,104 @@
 	return pwmchip_remove(&lpwm->chip);
 }
 
-static struct platform_driver pwm_lpss_driver = {
+static int pwm_lpss_probe_pci(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	const struct pwm_lpss_boardinfo *info;
+	struct pwm_lpss_chip *lpwm;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err < 0)
+		return err;
+
+	info = (struct pwm_lpss_boardinfo *)id->driver_data;
+	lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info);
+	if (IS_ERR(lpwm))
+		return PTR_ERR(lpwm);
+
+	pci_set_drvdata(pdev, lpwm);
+	return 0;
+}
+
+static void pwm_lpss_remove_pci(struct pci_dev *pdev)
+{
+	struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev);
+
+	pwm_lpss_remove(lpwm);
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id pwm_lpss_pci_ids[] = {
+	{ PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&byt_info},
+	{ PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&byt_info},
+	{ },
+};
+MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids);
+
+static struct pci_driver pwm_lpss_driver_pci = {
+	.name = "pwm-lpss",
+	.id_table = pwm_lpss_pci_ids,
+	.probe = pwm_lpss_probe_pci,
+	.remove = pwm_lpss_remove_pci,
+};
+
+static int pwm_lpss_probe_platform(struct platform_device *pdev)
+{
+	struct pwm_lpss_chip *lpwm;
+	struct resource *r;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	lpwm = pwm_lpss_probe(&pdev->dev, r, NULL);
+	if (IS_ERR(lpwm))
+		return PTR_ERR(lpwm);
+
+	platform_set_drvdata(pdev, lpwm);
+	return 0;
+}
+
+static int pwm_lpss_remove_platform(struct platform_device *pdev)
+{
+	struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
+
+	return pwm_lpss_remove(lpwm);
+}
+
+static const struct acpi_device_id pwm_lpss_acpi_match[] = {
+	{ "80860F09", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
+
+static struct platform_driver pwm_lpss_driver_platform = {
 	.driver = {
 		.name = "pwm-lpss",
 		.acpi_match_table = pwm_lpss_acpi_match,
 	},
-	.probe = pwm_lpss_probe,
-	.remove = pwm_lpss_remove,
+	.probe = pwm_lpss_probe_platform,
+	.remove = pwm_lpss_remove_platform,
 };
-module_platform_driver(pwm_lpss_driver);
+
+static int __init pwm_init(void)
+{
+	pci_drv = pci_register_driver(&pwm_lpss_driver_pci);
+	plat_drv = platform_driver_register(&pwm_lpss_driver_platform);
+	if (pci_drv && plat_drv)
+		return pci_drv;
+
+	return 0;
+}
+module_init(pwm_init);
+
+static void __exit pwm_exit(void)
+{
+	if (!pci_drv)
+		pci_unregister_driver(&pwm_lpss_driver_pci);
+	if (!plat_drv)
+		platform_driver_unregister(&pwm_lpss_driver_platform);
+}
+module_exit(pwm_exit);
 
 MODULE_DESCRIPTION("PWM driver for Intel LPSS");
 MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");

diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index 9475bc7..4f1bb4e 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c

@@ -147,6 +147,7 @@
 	mxs->chip.dev = &pdev->dev;
 	mxs->chip.ops = &mxs_pwm_ops;
 	mxs->chip.base = -1;
+	mxs->chip.can_sleep = true;
 	ret = of_property_read_u32(np, "fsl,pwm-number", &mxs->chip.npwm);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to get pwm number: %d\n", ret);

diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index cd356d8..0b312ec 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c

@@ -179,10 +179,8 @@
 		return -EINVAL;
 
 	pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
-	if (pwm == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (pwm == NULL)
 		return -ENOMEM;
-	}
 
 	pwm->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pwm->clk))

diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index aff6ba9..3b71b42 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c

@@ -21,13 +21,14 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/platform_data/pwm-renesas-tpu.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#define TPU_CHANNEL_MAX		4
+
 #define TPU_TSTR		0x00	/* Timer start register (shared) */
 
 #define TPU_TCRn		0x00	/* Timer control register */
@@ -87,7 +88,6 @@
 
 struct tpu_device {
 	struct platform_device *pdev;
-	enum pwm_polarity polarities[TPU_CHANNEL_MAX];
 	struct pwm_chip chip;
 	spinlock_t lock;
 
@@ -229,7 +229,7 @@
 
 	pwm->tpu = tpu;
 	pwm->channel = _pwm->hwpwm;
-	pwm->polarity = tpu->polarities[pwm->channel];
+	pwm->polarity = PWM_POLARITY_NORMAL;
 	pwm->prescaler = 0;
 	pwm->period = 0;
 	pwm->duty = 0;
@@ -388,16 +388,6 @@
  * Probe and remove
  */
 
-static void tpu_parse_pdata(struct tpu_device *tpu)
-{
-	struct tpu_pwm_platform_data *pdata = tpu->pdev->dev.platform_data;
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(tpu->polarities); ++i)
-		tpu->polarities[i] = pdata ? pdata->channels[i].polarity
-				   : PWM_POLARITY_NORMAL;
-}
-
 static int tpu_probe(struct platform_device *pdev)
 {
 	struct tpu_device *tpu;
@@ -405,17 +395,12 @@
 	int ret;
 
 	tpu = devm_kzalloc(&pdev->dev, sizeof(*tpu), GFP_KERNEL);
-	if (tpu == NULL) {
-		dev_err(&pdev->dev, "failed to allocate driver data\n");
+	if (tpu == NULL)
 		return -ENOMEM;
-	}
 
 	spin_lock_init(&tpu->lock);
 	tpu->pdev = pdev;
 
-	/* Initialize device configuration from platform data. */
-	tpu_parse_pdata(tpu);
-
 	/* Map memory, get clock and pin control. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tpu->base = devm_ioremap_resource(&pdev->dev, res);

diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index d66529a..ba6b650 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c

@@ -335,9 +335,6 @@
 	writel(tcnt, our_chip->base + REG_TCNTB(pwm->hwpwm));
 	writel(tcmp, our_chip->base + REG_TCMPB(pwm->hwpwm));
 
-	if (test_bit(PWMF_ENABLED, &pwm->flags))
-		pwm_samsung_enable(chip, pwm);
-
 	chan->period_ns = period_ns;
 	chan->tin_ns = tin_ns;
 	chan->duty_ns = duty_ns;

diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
index cb2d4f0..6fd93e6 100644
--- a/drivers/pwm/pwm-spear.c
+++ b/drivers/pwm/pwm-spear.c

@@ -179,10 +179,8 @@
 	u32 val;
 
 	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
-	if (!pc) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pc)
 		return -ENOMEM;
-	}
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pc->mmio_base = devm_ioremap_resource(&pdev->dev, r);
@@ -222,7 +220,7 @@
 	}
 
 	ret = pwmchip_add(&pc->chip);
-	if (!ret) {
+	if (ret < 0) {
 		clk_unprepare(pc->clk);
 		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
 	}

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 74298c5..61d86b9 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c

@@ -173,10 +173,8 @@
 	int ret;
 
 	pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
-	if (!pwm) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pwm)
 		return -ENOMEM;
-	}
 
 	pwm->dev = &pdev->dev;
 

diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 032092c..74efbe7 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c

@@ -209,10 +209,8 @@
 	u16 status;
 
 	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
-	if (!pc) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pc)
 		return -ENOMEM;
-	}
 
 	clk = devm_clk_get(&pdev->dev, "fck");
 	if (IS_ERR(clk)) {

diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index aee4471..cb75133 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c

@@ -138,12 +138,12 @@
 	return container_of(chip, struct ehrpwm_pwm_chip, chip);
 }
 
-static u16 ehrpwm_read(void __iomem *base, int offset)
+static inline u16 ehrpwm_read(void __iomem *base, int offset)
 {
 	return readw(base + offset);
 }
 
-static void ehrpwm_write(void __iomem *base, int offset, unsigned int val)
+static inline void ehrpwm_write(void __iomem *base, int offset, unsigned int val)
 {
 	writew(val & 0xFFFF, base + offset);
 }
@@ -440,10 +440,8 @@
 	u16 status;
 
 	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
-	if (!pc) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pc)
 		return -ENOMEM;
-	}
 
 	clk = devm_clk_get(&pdev->dev, "fck");
 	if (IS_ERR(clk)) {
@@ -531,6 +529,7 @@
 	return pwmchip_remove(&pc->chip);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc)
 {
 	pm_runtime_get_sync(pc->chip.dev);
@@ -557,7 +556,6 @@
 	ehrpwm_write(pc->mmio_base, TBCTL, pc->ctx.tbctl);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int ehrpwm_pwm_suspend(struct device *dev)
 {
 	struct ehrpwm_pwm_chip *pc = dev_get_drvdata(dev);

diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index b99a50e..04f7672 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c

@@ -265,14 +265,6 @@
 
 	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG);
 	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to read TOGGLE3\n", pwm->label);
-		goto out;
-	}
-
-	val |= TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXS | TWL6030_PWMXEN);
-
-	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG);
-	if (ret < 0) {
 		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
 		goto out;
 	}

diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index 323125a..652e6b5 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c

@@ -211,10 +211,8 @@
 	}
 
 	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
-	if (chip == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (chip == NULL)
 		return -ENOMEM;
-	}
 
 	chip->chip.dev = &pdev->dev;
 	chip->chip.ops = &vt8500_pwm_ops;

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index f53e78b..6ff95b0 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c

@@ -266,11 +266,11 @@
 	return count;
 }
 
-static DEVICE_ATTR(min_microvolts, 0666, show_min_uV, set_min_uV);
-static DEVICE_ATTR(max_microvolts, 0666, show_max_uV, set_max_uV);
-static DEVICE_ATTR(min_microamps, 0666, show_min_uA, set_min_uA);
-static DEVICE_ATTR(max_microamps, 0666, show_max_uA, set_max_uA);
-static DEVICE_ATTR(mode, 0666, show_mode, set_mode);
+static DEVICE_ATTR(min_microvolts, 0664, show_min_uV, set_min_uV);
+static DEVICE_ATTR(max_microvolts, 0664, show_max_uV, set_max_uV);
+static DEVICE_ATTR(min_microamps, 0664, show_min_uA, set_min_uA);
+static DEVICE_ATTR(max_microamps, 0664, show_max_uA, set_max_uA);
+static DEVICE_ATTR(mode, 0664, show_mode, set_mode);
 
 static struct attribute *regulator_virtual_attributes[] = {
 	&dev_attr_min_microvolts.attr,

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 71988b6..0754f5c 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig

@@ -530,11 +530,11 @@
 	  will be called rtc-rv3029c2.
 
 config RTC_DRV_S5M
-	tristate "Samsung S5M series"
+	tristate "Samsung S2M/S5M series"
 	depends on MFD_SEC_CORE
 	help
 	  If you say yes here you will get support for the
-	  RTC of Samsung S5M PMIC series.
+	  RTC of Samsung S2MPS14 and S5M PMIC series.
 
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-s5m.

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 8ec2d6a..8f06250 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd
+ * Copyright (c) 2013-2014 Samsung Electronics Co., Ltd
  *	http://www.samsung.com
  *
  *  Copyright (C) 2013 Google, Inc
@@ -17,27 +17,76 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/slab.h>
 #include <linux/bcd.h>
-#include <linux/bitops.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
-#include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/irq.h>
 #include <linux/mfd/samsung/rtc.h>
+#include <linux/mfd/samsung/s2mps14.h>
 
 /*
  * Maximum number of retries for checking changes in UDR field
- * of SEC_RTC_UDR_CON register (to limit possible endless loop).
+ * of S5M_RTC_UDR_CON register (to limit possible endless loop).
  *
  * After writing to RTC registers (setting time or alarm) read the UDR field
- * in SEC_RTC_UDR_CON register. UDR is auto-cleared when data have
+ * in S5M_RTC_UDR_CON register. UDR is auto-cleared when data have
  * been transferred.
  */
 #define UDR_READ_RETRY_CNT	5
 
+/* Registers used by the driver which are different between chipsets. */
+struct s5m_rtc_reg_config {
+	/* Number of registers used for setting time/alarm0/alarm1 */
+	unsigned int regs_count;
+	/* First register for time, seconds */
+	unsigned int time;
+	/* RTC control register */
+	unsigned int ctrl;
+	/* First register for alarm 0, seconds */
+	unsigned int alarm0;
+	/* First register for alarm 1, seconds */
+	unsigned int alarm1;
+	/* SMPL/WTSR register */
+	unsigned int smpl_wtsr;
+	/*
+	 * Register for update flag (UDR). Typically setting UDR field to 1
+	 * will enable update of time or alarm register. Then it will be
+	 * auto-cleared after successful update.
+	 */
+	unsigned int rtc_udr_update;
+	/* Mask for UDR field in 'rtc_udr_update' register */
+	unsigned int rtc_udr_mask;
+};
+
+/* Register map for S5M8763 and S5M8767 */
+static const struct s5m_rtc_reg_config s5m_rtc_regs = {
+	.regs_count		= 8,
+	.time			= S5M_RTC_SEC,
+	.ctrl			= S5M_ALARM1_CONF,
+	.alarm0			= S5M_ALARM0_SEC,
+	.alarm1			= S5M_ALARM1_SEC,
+	.smpl_wtsr		= S5M_WTSR_SMPL_CNTL,
+	.rtc_udr_update		= S5M_RTC_UDR_CON,
+	.rtc_udr_mask		= S5M_RTC_UDR_MASK,
+};
+
+/*
+ * Register map for S2MPS14.
+ * It may be also suitable for S2MPS11 but this was not tested.
+ */
+static const struct s5m_rtc_reg_config s2mps_rtc_regs = {
+	.regs_count		= 7,
+	.time			= S2MPS_RTC_SEC,
+	.ctrl			= S2MPS_RTC_CTRL,
+	.alarm0			= S2MPS_ALARM0_SEC,
+	.alarm1			= S2MPS_ALARM1_SEC,
+	.smpl_wtsr		= S2MPS_WTSR_SMPL_CNTL,
+	.rtc_udr_update		= S2MPS_RTC_UDR_CON,
+	.rtc_udr_mask		= S2MPS_RTC_WUDR_MASK,
+};
+
 struct s5m_rtc_info {
 	struct device *dev;
 	struct i2c_client *i2c;
@@ -48,13 +97,14 @@
 	int device_type;
 	int rtc_24hr_mode;
 	bool wtsr_smpl;
+	const struct s5m_rtc_reg_config	*regs;
 };
 
 static const struct regmap_config s5m_rtc_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 
-	.max_register = SEC_RTC_REG_MAX,
+	.max_register = S5M_RTC_REG_MAX,
 };
 
 static const struct regmap_config s2mps14_rtc_regmap_config = {
@@ -119,8 +169,9 @@
 	unsigned int data;
 
 	do {
-		ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
-	} while (--retry && (data & RTC_UDR_MASK) && !ret);
+		ret = regmap_read(info->regmap, info->regs->rtc_udr_update,
+				&data);
+	} while (--retry && (data & info->regs->rtc_udr_mask) && !ret);
 
 	if (!retry)
 		dev_err(info->dev, "waiting for UDR update, reached max number of retries\n");
@@ -128,21 +179,53 @@
 	return ret;
 }
 
+static inline int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
+		struct rtc_wkalrm *alarm)
+{
+	int ret;
+	unsigned int val;
+
+	switch (info->device_type) {
+	case S5M8767X:
+	case S5M8763X:
+		ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
+		val &= S5M_ALARM0_STATUS;
+		break;
+	case S2MPS14X:
+		ret = regmap_read(info->s5m87xx->regmap_pmic, S2MPS14_REG_ST2,
+				&val);
+		val &= S2MPS_ALARM0_STATUS;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret < 0)
+		return ret;
+
+	if (val)
+		alarm->pending = 1;
+	else
+		alarm->pending = 0;
+
+	return 0;
+}
+
 static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
 {
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, info->regs->rtc_udr_update, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to read update reg(%d)\n", ret);
 		return ret;
 	}
 
-	data |= RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data |= info->regs->rtc_udr_mask;
+	if (info->device_type == S5M8763X || info->device_type == S5M8767X)
+		data |= S5M_RTC_TIME_EN_MASK;
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, info->regs->rtc_udr_update, data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to write update reg(%d)\n", ret);
 		return ret;
@@ -158,17 +241,27 @@
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, info->regs->rtc_udr_update, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to read update reg(%d)\n",
 			__func__, ret);
 		return ret;
 	}
 
-	data &= ~RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data |= info->regs->rtc_udr_mask;
+	switch (info->device_type) {
+	case S5M8763X:
+	case S5M8767X:
+		data &= ~S5M_RTC_TIME_EN_MASK;
+		break;
+	case S2MPS14X:
+		data |= S2MPS_RTC_RUDR_MASK;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, info->regs->rtc_udr_update, data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write update reg(%d)\n",
 			__func__, ret);
@@ -215,10 +308,22 @@
 static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	int ret;
 
-	ret = regmap_bulk_read(info->regmap, SEC_RTC_SEC, data, 8);
+	if (info->device_type == S2MPS14X) {
+		ret = regmap_update_bits(info->regmap,
+				info->regs->rtc_udr_update,
+				S2MPS_RTC_RUDR_MASK, S2MPS_RTC_RUDR_MASK);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare registers for time reading: %d\n",
+				ret);
+			return ret;
+		}
+	}
+	ret = regmap_bulk_read(info->regmap, info->regs->time, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
@@ -228,6 +333,7 @@
 		break;
 
 	case S5M8767X:
+	case S2MPS14X:
 		s5m8767_data_to_tm(data, tm, info->rtc_24hr_mode);
 		break;
 
@@ -245,7 +351,7 @@
 static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	int ret = 0;
 
 	switch (info->device_type) {
@@ -253,6 +359,7 @@
 		s5m8763_tm_to_data(tm, data);
 		break;
 	case S5M8767X:
+	case S2MPS14X:
 		ret = s5m8767_tm_to_data(tm, data);
 		break;
 	default:
@@ -266,7 +373,8 @@
 		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-	ret = regmap_raw_write(info->regmap, SEC_RTC_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, info->regs->time, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
@@ -278,70 +386,60 @@
 static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	unsigned int val;
 	int ret, i;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
 	switch (info->device_type) {
 	case S5M8763X:
 		s5m8763_data_to_tm(data, &alrm->time);
-		ret = regmap_read(info->regmap, SEC_ALARM0_CONF, &val);
+		ret = regmap_read(info->regmap, S5M_ALARM0_CONF, &val);
 		if (ret < 0)
 			return ret;
 
 		alrm->enabled = !!val;
-
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
-		if (ret < 0)
-			return ret;
-
 		break;
 
 	case S5M8767X:
+	case S2MPS14X:
 		s5m8767_data_to_tm(data, &alrm->time, info->rtc_24hr_mode);
-		dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
-			1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
-			alrm->time.tm_mday, alrm->time.tm_hour,
-			alrm->time.tm_min, alrm->time.tm_sec,
-			alrm->time.tm_wday);
-
 		alrm->enabled = 0;
-		for (i = 0; i < 7; i++) {
+		for (i = 0; i < info->regs->regs_count; i++) {
 			if (data[i] & ALARM_ENABLE_MASK) {
 				alrm->enabled = 1;
 				break;
 			}
 		}
-
-		alrm->pending = 0;
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
-		if (ret < 0)
-			return ret;
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
-	if (val & ALARM0_STATUS)
-		alrm->pending = 1;
-	else
-		alrm->pending = 0;
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+		alrm->time.tm_mday, alrm->time.tm_hour,
+		alrm->time.tm_min, alrm->time.tm_sec,
+		alrm->time.tm_wday);
+
+	ret = s5m_check_peding_alarm_interrupt(info, alrm);
 
 	return 0;
 }
 
 static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 {
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	int ret, i;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
@@ -352,14 +450,16 @@
 
 	switch (info->device_type) {
 	case S5M8763X:
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, 0);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, 0);
 		break;
 
 	case S5M8767X:
-		for (i = 0; i < 7; i++)
+	case S2MPS14X:
+		for (i = 0; i < info->regs->regs_count; i++)
 			data[i] &= ~ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+				info->regs->regs_count);
 		if (ret < 0)
 			return ret;
 
@@ -377,11 +477,12 @@
 static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 {
 	int ret;
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	u8 alarm0_conf;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
@@ -393,10 +494,11 @@
 	switch (info->device_type) {
 	case S5M8763X:
 		alarm0_conf = 0x77;
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, alarm0_conf);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, alarm0_conf);
 		break;
 
 	case S5M8767X:
+	case S2MPS14X:
 		data[RTC_SEC] |= ALARM_ENABLE_MASK;
 		data[RTC_MIN] |= ALARM_ENABLE_MASK;
 		data[RTC_HOUR] |= ALARM_ENABLE_MASK;
@@ -408,7 +510,8 @@
 		if (data[RTC_YEAR1] & 0x7f)
 			data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+				info->regs->regs_count);
 		if (ret < 0)
 			return ret;
 		ret = s5m8767_rtc_set_alarm_reg(info);
@@ -425,7 +528,7 @@
 static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[8];
+	u8 data[info->regs->regs_count];
 	int ret;
 
 	switch (info->device_type) {
@@ -434,6 +537,7 @@
 		break;
 
 	case S5M8767X:
+	case S2MPS14X:
 		s5m8767_tm_to_data(&alrm->time, data);
 		break;
 
@@ -450,7 +554,8 @@
 	if (ret < 0)
 		return ret;
 
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+			info->regs->regs_count);
 	if (ret < 0)
 		return ret;
 
@@ -495,7 +600,7 @@
 static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, info->regs->smpl_wtsr,
 				 WTSR_ENABLE_MASK,
 				 enable ? WTSR_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -506,7 +611,7 @@
 static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, info->regs->smpl_wtsr,
 				 SMPL_ENABLE_MASK,
 				 enable ? SMPL_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -517,50 +622,41 @@
 static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 {
 	u8 data[2];
-	unsigned int tp_read;
 	int ret;
-	struct rtc_time tm;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &tp_read);
-	if (ret < 0) {
-		dev_err(info->dev, "%s: fail to read control reg(%d)\n",
-			__func__, ret);
-		return ret;
+	switch (info->device_type) {
+	case S5M8763X:
+	case S5M8767X:
+		/* UDR update time. Default of 7.32 ms is too long. */
+		ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
+				S5M_RTC_UDR_T_MASK, S5M_RTC_UDR_T_450_US);
+		if (ret < 0)
+			dev_err(info->dev, "%s: fail to change UDR time: %d\n",
+					__func__, ret);
+
+		/* Set RTC control register : Binary mode, 24hour mode */
+		data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+		data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+
+		ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
+		break;
+
+	case S2MPS14X:
+		data[0] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+		ret = regmap_write(info->regmap, info->regs->ctrl, data[0]);
+		break;
+
+	default:
+		return -EINVAL;
 	}
 
-	/* Set RTC control register : Binary mode, 24hour mode */
-	data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
-	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
-
 	info->rtc_24hr_mode = 1;
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_CONF, data, 2);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
 			__func__, ret);
 		return ret;
 	}
 
-	/* In first boot time, Set rtc time to 1/1/2012 00:00:00(SUN) */
-	if ((tp_read & RTC_TCON_MASK) == 0) {
-		dev_dbg(info->dev, "rtc init\n");
-		tm.tm_sec = 0;
-		tm.tm_min = 0;
-		tm.tm_hour = 0;
-		tm.tm_wday = 0;
-		tm.tm_mday = 1;
-		tm.tm_mon = 0;
-		tm.tm_year = 112;
-		tm.tm_yday = 0;
-		tm.tm_isdst = 0;
-		ret = s5m_rtc_set_time(info->dev, &tm);
-	}
-
-	ret = regmap_update_bits(info->regmap, SEC_RTC_UDR_CON,
-				 RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
-	if (ret < 0)
-		dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
-			__func__, ret);
-
 	return ret;
 }
 
@@ -570,7 +666,7 @@
 	struct sec_platform_data *pdata = s5m87xx->pdata;
 	struct s5m_rtc_info *info;
 	const struct regmap_config *regmap_cfg;
-	int ret;
+	int ret, alarm_irq;
 
 	if (!pdata) {
 		dev_err(pdev->dev.parent, "Platform data not supplied\n");
@@ -584,12 +680,18 @@
 	switch (pdata->device_type) {
 	case S2MPS14X:
 		regmap_cfg = &s2mps14_rtc_regmap_config;
+		info->regs = &s2mps_rtc_regs;
+		alarm_irq = S2MPS14_IRQ_RTCA0;
 		break;
 	case S5M8763X:
 		regmap_cfg = &s5m_rtc_regmap_config;
+		info->regs = &s5m_rtc_regs;
+		alarm_irq = S5M8763_IRQ_ALARM0;
 		break;
 	case S5M8767X:
 		regmap_cfg = &s5m_rtc_regmap_config;
+		info->regs = &s5m_rtc_regs;
+		alarm_irq = S5M8767_IRQ_RTCA1;
 		break;
 	default:
 		dev_err(&pdev->dev, "Device type is not supported by RTC driver\n");
@@ -615,20 +717,11 @@
 	info->device_type = s5m87xx->device_type;
 	info->wtsr_smpl = s5m87xx->wtsr_smpl;
 
-	switch (pdata->device_type) {
-	case S5M8763X:
-		info->irq = regmap_irq_get_virq(s5m87xx->irq_data,
-				S5M8763_IRQ_ALARM0);
-		break;
-
-	case S5M8767X:
-		info->irq = regmap_irq_get_virq(s5m87xx->irq_data,
-				S5M8767_IRQ_RTCA1);
-		break;
-
-	default:
+	info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq);
+	if (info->irq <= 0) {
 		ret = -EINVAL;
-		dev_err(&pdev->dev, "Unsupported device type: %d\n", ret);
+		dev_err(&pdev->dev, "Failed to get virtual IRQ %d\n",
+				alarm_irq);
 		goto err;
 	}
 
@@ -676,7 +769,7 @@
 	if (info->wtsr_smpl) {
 		for (i = 0; i < 3; i++) {
 			s5m_rtc_enable_wtsr(info, false);
-			regmap_read(info->regmap, SEC_WTSR_SMPL_CNTL, &val);
+			regmap_read(info->regmap, info->regs->smpl_wtsr, &val);
 			pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
 			if (val & WTSR_ENABLE_MASK)
 				pr_emerg("%s: fail to disable WTSR\n",
@@ -730,7 +823,8 @@
 static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume);
 
 static const struct platform_device_id s5m_rtc_id[] = {
-	{ "s5m-rtc", 0 },
+	{ "s5m-rtc",		S5M8767X },
+	{ "s2mps14-rtc",	S2MPS14X },
 };
 
 static struct platform_driver s5m_rtc_driver = {
@@ -749,6 +843,6 @@
 
 /* Module information */
 MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
-MODULE_DESCRIPTION("Samsung S5M RTC driver");
+MODULE_DESCRIPTION("Samsung S5M/S2MPS14 RTC driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:s5m-rtc");

diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index 1e1fc67..d2c0b44 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c

@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/kvm_para.h>
+#include <linux/notifier.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/cio.h>
@@ -62,6 +63,7 @@
 	struct vq_config_block *config_block;
 	bool is_thinint;
 	bool going_away;
+	bool device_lost;
 	void *airq_info;
 };
 
@@ -1010,11 +1012,14 @@
 	unsigned long flags;
 	struct virtio_ccw_device *vcdev = virtio_grab_drvdata(cdev);
 
-	if (vcdev && cdev->online)
+	if (vcdev && cdev->online) {
+		if (vcdev->device_lost)
+			virtio_break_device(&vcdev->vdev);
 		unregister_virtio_device(&vcdev->vdev);
-	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
-	dev_set_drvdata(&cdev->dev, NULL);
-	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+		spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+		dev_set_drvdata(&cdev->dev, NULL);
+		spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+	}
 	cdev->handler = NULL;
 }
 
@@ -1023,12 +1028,14 @@
 	unsigned long flags;
 	struct virtio_ccw_device *vcdev = virtio_grab_drvdata(cdev);
 
-	if (vcdev) {
-		unregister_virtio_device(&vcdev->vdev);
-		spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
-		dev_set_drvdata(&cdev->dev, NULL);
-		spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
-	}
+	if (!vcdev)
+		return 0;
+	if (vcdev->device_lost)
+		virtio_break_device(&vcdev->vdev);
+	unregister_virtio_device(&vcdev->vdev);
+	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	dev_set_drvdata(&cdev->dev, NULL);
+	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 	return 0;
 }
 
@@ -1096,8 +1103,26 @@
 
 static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
 {
-	/* TODO: Check whether we need special handling here. */
-	return 0;
+	int rc;
+	struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+
+	/*
+	 * Make sure vcdev is set
+	 * i.e. set_offline/remove callback not already running
+	 */
+	if (!vcdev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case CIO_GONE:
+		vcdev->device_lost = true;
+		rc = NOTIFY_DONE;
+		break;
+	default:
+		rc = NOTIFY_DONE;
+		break;
+	}
+	return rc;
 }
 
 static struct ccw_device_id virtio_ids[] = {

diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index fd7b3bd..d837c3c 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c

@@ -3348,7 +3348,7 @@
 	}
 	CLAW_DBF_TEXT(2, setup, "init_mod");
 	claw_root_dev = root_device_register("claw");
-	ret = PTR_RET(claw_root_dev);
+	ret = PTR_ERR_OR_ZERO(claw_root_dev);
 	if (ret)
 		goto register_err;
 	ret = ccw_driver_register(&claw_ccw_driver);

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 70b3a02..03b6ad0 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c

@@ -1837,7 +1837,7 @@
 	if (ret)
 		goto out_err;
 	ctcm_root_dev = root_device_register("ctcm");
-	ret = PTR_RET(ctcm_root_dev);
+	ret = PTR_ERR_OR_ZERO(ctcm_root_dev);
 	if (ret)
 		goto register_err;
 	ret = ccw_driver_register(&ctcm_ccw_driver);

diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c
index 985b5dc..6bcfbbb 100644
--- a/drivers/s390/net/ctcm_sysfs.c
+++ b/drivers/s390/net/ctcm_sysfs.c

@@ -34,8 +34,9 @@
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct net_device *ndev;
-	int bs1;
+	unsigned int bs1;
 	struct ctcm_priv *priv = dev_get_drvdata(dev);
+	int rc;
 
 	ndev = priv->channel[CTCM_READ]->netdev;
 	if (!(priv && priv->channel[CTCM_READ] && ndev)) {
@@ -43,7 +44,9 @@
 		return -ENODEV;
 	}
 
-	sscanf(buf, "%u", &bs1);
+	rc = sscanf(buf, "%u", &bs1);
+	if (rc != 1)
+		goto einval;
 	if (bs1 > CTCM_BUFSIZE_LIMIT)
 					goto einval;
 	if (bs1 < (576 + LL_HEADER_LENGTH + 2))
@@ -143,13 +146,14 @@
 static ssize_t ctcm_proto_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	int value;
+	int value, rc;
 	struct ctcm_priv *priv = dev_get_drvdata(dev);
 
 	if (!priv)
 		return -ENODEV;
-	sscanf(buf, "%u", &value);
-	if (!((value == CTCM_PROTO_S390)  ||
+	rc = sscanf(buf, "%d", &value);
+	if ((rc != 1) ||
+	    !((value == CTCM_PROTO_S390)  ||
 	      (value == CTCM_PROTO_LINUX) ||
 	      (value == CTCM_PROTO_MPC) ||
 	      (value == CTCM_PROTO_OS390)))

diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index c461f2a..0a7d87c 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c

@@ -1943,14 +1943,16 @@
 lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
         struct lcs_card *card;
-        int value;
+	int value, rc;
 
 	card = dev_get_drvdata(dev);
 
         if (!card)
                 return 0;
 
-        sscanf(buf, "%u", &value);
+	rc = sscanf(buf, "%d", &value);
+	if (rc != 1)
+		return -EINVAL;
         /* TODO: sanity checks */
         card->portno = value;
 
@@ -1997,14 +1999,17 @@
 lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
         struct lcs_card *card;
-        int value;
+	unsigned int value;
+	int rc;
 
 	card = dev_get_drvdata(dev);
 
         if (!card)
                 return 0;
 
-        sscanf(buf, "%u", &value);
+	rc = sscanf(buf, "%u", &value);
+	if (rc != 1)
+		return -EINVAL;
         /* TODO: sanity checks */
         card->lancmd_timeout = value;
 
@@ -2442,7 +2447,7 @@
 	if (rc)
 		goto out_err;
 	lcs_root_dev = root_device_register("lcs");
-	rc = PTR_RET(lcs_root_dev);
+	rc = PTR_ERR_OR_ZERO(lcs_root_dev);
 	if (rc)
 		goto register_err;
 	rc = ccw_driver_register(&lcs_ccw_driver);

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5333b2c..a2088af 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h

@@ -268,10 +268,8 @@
 #define QETH_NO_PRIO_QUEUEING 0
 #define QETH_PRIO_Q_ING_PREC  1
 #define QETH_PRIO_Q_ING_TOS   2
-#define IP_TOS_LOWDELAY 0x10
-#define IP_TOS_HIGHTHROUGHPUT 0x08
-#define IP_TOS_HIGHRELIABILITY 0x04
-#define IP_TOS_NOTIMPORTANT 0x02
+#define QETH_PRIO_Q_ING_SKB   3
+#define QETH_PRIO_Q_ING_VLAN  4
 
 /* Packing */
 #define QETH_LOW_WATERMARK_PACK  2

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index e89f38c..f54bec5 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c

@@ -20,6 +20,7 @@
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <net/iucv/af_iucv.h>
+#include <net/dsfield.h>
 
 #include <asm/ebcdic.h>
 #include <asm/chpid.h>
@@ -1013,7 +1014,7 @@
 
 	card = CARD_FROM_CDEV(cdev);
 
-	if (!IS_ERR(irb))
+	if (!card || !IS_ERR(irb))
 		return 0;
 
 	switch (PTR_ERR(irb)) {
@@ -1029,7 +1030,7 @@
 		QETH_CARD_TEXT(card, 2, "ckirberr");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -ETIMEDOUT);
 		if (intparm == QETH_RCD_PARM) {
-			if (card && (card->data.ccwdev == cdev)) {
+			if (card->data.ccwdev == cdev) {
 				card->data.state = CH_STATE_DOWN;
 				wake_up(&card->wait_q);
 			}
@@ -3662,42 +3663,56 @@
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_output_handler);
 
+/**
+ * Note: Function assumes that we have 4 outbound queues.
+ */
 int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 			int ipv, int cast_type)
 {
-	if (!ipv && (card->info.type == QETH_CARD_TYPE_OSD ||
-		     card->info.type == QETH_CARD_TYPE_OSX))
-		return card->qdio.default_out_queue;
-	switch (card->qdio.no_out_queues) {
-	case 4:
-		if (cast_type && card->info.is_multicast_different)
-			return card->info.is_multicast_different &
-				(card->qdio.no_out_queues - 1);
-		if (card->qdio.do_prio_queueing && (ipv == 4)) {
-			const u8 tos = ip_hdr(skb)->tos;
+	__be16 *tci;
+	u8 tos;
 
-			if (card->qdio.do_prio_queueing ==
-				QETH_PRIO_Q_ING_TOS) {
-				if (tos & IP_TOS_NOTIMPORTANT)
-					return 3;
-				if (tos & IP_TOS_HIGHRELIABILITY)
-					return 2;
-				if (tos & IP_TOS_HIGHTHROUGHPUT)
-					return 1;
-				if (tos & IP_TOS_LOWDELAY)
-					return 0;
-			}
-			if (card->qdio.do_prio_queueing ==
-				QETH_PRIO_Q_ING_PREC)
-				return 3 - (tos >> 6);
-		} else if (card->qdio.do_prio_queueing && (ipv == 6)) {
-			/* TODO: IPv6!!! */
+	if (cast_type && card->info.is_multicast_different)
+		return card->info.is_multicast_different &
+			(card->qdio.no_out_queues - 1);
+
+	switch (card->qdio.do_prio_queueing) {
+	case QETH_PRIO_Q_ING_TOS:
+	case QETH_PRIO_Q_ING_PREC:
+		switch (ipv) {
+		case 4:
+			tos = ipv4_get_dsfield(ip_hdr(skb));
+			break;
+		case 6:
+			tos = ipv6_get_dsfield(ipv6_hdr(skb));
+			break;
+		default:
+			return card->qdio.default_out_queue;
 		}
-		return card->qdio.default_out_queue;
-	case 1: /* fallthrough for single-out-queue 1920-device */
+		if (card->qdio.do_prio_queueing == QETH_PRIO_Q_ING_PREC)
+			return ~tos >> 6 & 3;
+		if (tos & IPTOS_MINCOST)
+			return 3;
+		if (tos & IPTOS_RELIABILITY)
+			return 2;
+		if (tos & IPTOS_THROUGHPUT)
+			return 1;
+		if (tos & IPTOS_LOWDELAY)
+			return 0;
+		break;
+	case QETH_PRIO_Q_ING_SKB:
+		if (skb->priority > 5)
+			return 0;
+		return ~skb->priority >> 1 & 3;
+	case QETH_PRIO_Q_ING_VLAN:
+		tci = &((struct ethhdr *)skb->data)->h_proto;
+		if (*tci == ETH_P_8021Q)
+			return ~*(tci + 1) >> (VLAN_PRIO_SHIFT + 1) & 3;
+		break;
 	default:
-		return card->qdio.default_out_queue;
+		break;
 	}
+	return card->qdio.default_out_queue;
 }
 EXPORT_SYMBOL_GPL(qeth_get_priority_queue);
 
@@ -5703,6 +5718,7 @@
 	struct qeth_card *card = netdev->ml_priv;
 	enum qeth_link_types link_type;
 	struct carrier_info carrier_info;
+	u32 speed;
 
 	if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan))
 		link_type = QETH_LINK_TYPE_10GBIT_ETH;
@@ -5717,28 +5733,29 @@
 	case QETH_LINK_TYPE_FAST_ETH:
 	case QETH_LINK_TYPE_LANE_ETH100:
 		qeth_set_ecmd_adv_sup(ecmd, SPEED_100, PORT_TP);
-		ecmd->speed = SPEED_100;
+		speed = SPEED_100;
 		ecmd->port = PORT_TP;
 		break;
 
 	case QETH_LINK_TYPE_GBIT_ETH:
 	case QETH_LINK_TYPE_LANE_ETH1000:
 		qeth_set_ecmd_adv_sup(ecmd, SPEED_1000, PORT_FIBRE);
-		ecmd->speed = SPEED_1000;
+		speed = SPEED_1000;
 		ecmd->port = PORT_FIBRE;
 		break;
 
 	case QETH_LINK_TYPE_10GBIT_ETH:
 		qeth_set_ecmd_adv_sup(ecmd, SPEED_10000, PORT_FIBRE);
-		ecmd->speed = SPEED_10000;
+		speed = SPEED_10000;
 		ecmd->port = PORT_FIBRE;
 		break;
 
 	default:
 		qeth_set_ecmd_adv_sup(ecmd, SPEED_10, PORT_TP);
-		ecmd->speed = SPEED_10;
+		speed = SPEED_10;
 		ecmd->port = PORT_TP;
 	}
+	ethtool_cmd_speed_set(ecmd, speed);
 
 	/* Check if we can obtain more accurate information.	 */
 	/* If QUERY_CARD_INFO command is not supported or fails, */
@@ -5783,18 +5800,19 @@
 
 	switch (carrier_info.port_speed) {
 	case CARD_INFO_PORTS_10M:
-		ecmd->speed = SPEED_10;
+		speed = SPEED_10;
 		break;
 	case CARD_INFO_PORTS_100M:
-		ecmd->speed = SPEED_100;
+		speed = SPEED_100;
 		break;
 	case CARD_INFO_PORTS_1G:
-		ecmd->speed = SPEED_1000;
+		speed = SPEED_1000;
 		break;
 	case CARD_INFO_PORTS_10G:
-		ecmd->speed = SPEED_10000;
+		speed = SPEED_10000;
 		break;
 	}
+	ethtool_cmd_speed_set(ecmd, speed);
 
 	return 0;
 }
@@ -5816,7 +5834,7 @@
 	if (rc)
 		goto out_err;
 	qeth_core_root_dev = root_device_register("qeth");
-	rc = PTR_RET(qeth_core_root_dev);
+	rc = PTR_ERR_OR_ZERO(qeth_core_root_dev);
 	if (rc)
 		goto register_err;
 	qeth_core_header_cache = kmem_cache_create("qeth_hdr",

diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 425c0ec..8a25a2b 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c

@@ -217,6 +217,10 @@
 		return sprintf(buf, "%s\n", "by precedence");
 	case QETH_PRIO_Q_ING_TOS:
 		return sprintf(buf, "%s\n", "by type of service");
+	case QETH_PRIO_Q_ING_SKB:
+		return sprintf(buf, "%s\n", "by skb-priority");
+	case QETH_PRIO_Q_ING_VLAN:
+		return sprintf(buf, "%s\n", "by VLAN headers");
 	default:
 		return sprintf(buf, "always queue %i\n",
 			       card->qdio.default_out_queue);
@@ -250,11 +254,23 @@
 	}
 
 	tmp = strsep((char **) &buf, "\n");
-	if (!strcmp(tmp, "prio_queueing_prec"))
+	if (!strcmp(tmp, "prio_queueing_prec")) {
 		card->qdio.do_prio_queueing = QETH_PRIO_Q_ING_PREC;
-	else if (!strcmp(tmp, "prio_queueing_tos"))
+		card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
+	} else if (!strcmp(tmp, "prio_queueing_skb")) {
+		card->qdio.do_prio_queueing = QETH_PRIO_Q_ING_SKB;
+		card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
+	} else if (!strcmp(tmp, "prio_queueing_tos")) {
 		card->qdio.do_prio_queueing = QETH_PRIO_Q_ING_TOS;
-	else if (!strcmp(tmp, "no_prio_queueing:0")) {
+		card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
+	} else if (!strcmp(tmp, "prio_queueing_vlan")) {
+		if (!card->options.layer2) {
+			rc = -ENOTSUPP;
+			goto out;
+		}
+		card->qdio.do_prio_queueing = QETH_PRIO_Q_ING_VLAN;
+		card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
+	} else if (!strcmp(tmp, "no_prio_queueing:0")) {
 		card->qdio.do_prio_queueing = QETH_NO_PRIO_QUEUEING;
 		card->qdio.default_out_queue = 0;
 	} else if (!strcmp(tmp, "no_prio_queueing:1")) {

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8dea3f1..5ef5b4f 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c

@@ -725,15 +725,20 @@
 	int elements = 0;
 	struct qeth_card *card = dev->ml_priv;
 	struct sk_buff *new_skb = skb;
-	int ipv = qeth_get_ip_version(skb);
 	int cast_type = qeth_l2_get_cast_type(card, skb);
-	struct qeth_qdio_out_q *queue = card->qdio.out_qs
-		[qeth_get_priority_queue(card, skb, ipv, cast_type)];
+	struct qeth_qdio_out_q *queue;
 	int tx_bytes = skb->len;
 	int data_offset = -1;
 	int elements_needed = 0;
 	int hd_len = 0;
 
+	if (card->qdio.do_prio_queueing || (cast_type &&
+					card->info.is_multicast_different))
+		queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb,
+					qeth_get_ip_version(skb), cast_type)];
+	else
+		queue = card->qdio.out_qs[card->qdio.default_out_queue];
+
 	if ((card->state != CARD_STATE_UP) || !card->lan_online) {
 		card->stats.tx_carrier_errors++;
 		goto tx_drop;
@@ -964,10 +969,9 @@
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	if (card->info.type != QETH_CARD_TYPE_OSN)
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops);
+	card->dev->ethtool_ops =
+		(card->info.type != QETH_CARD_TYPE_OSN) ?
+		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3524d34..14e0b58 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c

@@ -63,7 +63,7 @@
 int qeth_l3_string_to_ipaddr4(const char *buf, __u8 *addr)
 {
 	int count = 0, rc = 0;
-	int in[4];
+	unsigned int in[4];
 	char c;
 
 	rc = sscanf(buf, "%u.%u.%u.%u%c",
@@ -1659,7 +1659,7 @@
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1721,7 +1721,7 @@
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1766,7 +1766,7 @@
 
 	QETH_CARD_TEXT(card, 4, "frvaddr4");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in_dev = in_dev_get(netdev);
@@ -1796,7 +1796,7 @@
 
 	QETH_CARD_TEXT(card, 4, "frvaddr6");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in6_dev = in6_dev_get(netdev);
@@ -2089,7 +2089,7 @@
 		struct net_device *netdev;
 
 		rcu_read_lock();
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		rcu_read_unlock();
 		if (netdev == dev) {
@@ -2926,8 +2926,11 @@
 	struct sk_buff *new_skb = NULL;
 	int ipv = qeth_get_ip_version(skb);
 	int cast_type = qeth_l3_get_cast_type(card, skb);
-	struct qeth_qdio_out_q *queue = card->qdio.out_qs
-		[qeth_get_priority_queue(card, skb, ipv, cast_type)];
+	struct qeth_qdio_out_q *queue =
+		card->qdio.out_qs[card->qdio.do_prio_queueing
+			|| (cast_type && card->info.is_multicast_different) ?
+			qeth_get_priority_queue(card, skb, ipv, cast_type) :
+			card->qdio.default_out_queue];
 	int tx_bytes = skb->len;
 	bool large_send;
 	int data_offset = -1;
@@ -3298,7 +3301,7 @@
 	card->dev->ml_priv = card;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
-	SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops);
+	card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 02832d6..baca589 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig

@@ -1773,6 +1773,7 @@
 config SCSI_VIRTIO
 	tristate "virtio-scsi support"
 	depends on VIRTIO
+	select BLK_DEV_INTEGRITY
 	help
           This is the virtual HBA driver for virtio.  If the kernel will
           be used in a virtual machine, say Y or M.

diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index bcd2238..93d13fc 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c

@@ -27,8 +27,6 @@
  */
 
 /*
- * $Log: NCR5380.c,v $
-
  * Revision 1.10 1998/9/2	Alan Cox
  *				(alan@lxorguk.ukuu.org.uk)
  * Fixed up the timer lockups reported so far. Things still suck. Looking 
@@ -89,13 +87,6 @@
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_transport_spi.h>
 
-#ifndef NDEBUG
-#define NDEBUG 0
-#endif
-#ifndef NDEBUG_ABORT
-#define NDEBUG_ABORT 0
-#endif
-
 #if (NDEBUG & NDEBUG_LISTS)
 #define LIST(x,y) {printk("LINE:%d   Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
 #define REMOVE(w,x,y,z) {printk("LINE:%d   Removing: %p->%p  %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
@@ -1005,7 +996,7 @@
 		LIST(cmd, tmp);
 		tmp->host_scribble = (unsigned char *) cmd;
 	}
-	dprintk(NDEBUG_QUEUES, ("scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail"));
+	dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
 	/* Run the coroutine if it isn't already running. */
 	/* Kick off command processing */
@@ -1040,7 +1031,7 @@
 		/* Lock held here */
 		done = 1;
 		if (!hostdata->connected && !hostdata->selecting) {
-			dprintk(NDEBUG_MAIN, ("scsi%d : not connected\n", instance->host_no));
+			dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
 			/*
 			 * Search through the issue_queue for a command destined
 			 * for a target that's not busy.
@@ -1048,7 +1039,7 @@
 			for (tmp = (Scsi_Cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (Scsi_Cmnd *) tmp->host_scribble) 
 			{
 				if (prev != tmp)
-					dprintk(NDEBUG_LISTS, ("MAIN tmp=%p   target=%d   busy=%d lun=%d\n", tmp, tmp->target, hostdata->busy[tmp->target], tmp->lun));
+					dprintk(NDEBUG_LISTS, "MAIN tmp=%p   target=%d   busy=%d lun=%d\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
 				/*  When we find one, remove it from the issue queue. */
 				if (!(hostdata->busy[tmp->device->id] & (1 << tmp->device->lun))) {
 					if (prev) {
@@ -1066,7 +1057,7 @@
 					 * On failure, we must add the command back to the
 					 *   issue queue so we can keep trying. 
 					 */
-					dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, ("scsi%d : main() : command for target %d lun %d removed from issue_queue\n", instance->host_no, tmp->target, tmp->lun));
+					dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %d removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
 	
 					/*
 					 * A successful selection is defined as one that 
@@ -1095,7 +1086,7 @@
 						tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
 						hostdata->issue_queue = tmp;
 						done = 0;
-						dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, ("scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no));
+						dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
 					}
 					/* lock held here still */
 				}	/* if target/lun is not busy */
@@ -1125,9 +1116,9 @@
 #endif
 		    && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
 		    ) {
-			dprintk(NDEBUG_MAIN, ("scsi%d : main() : performing information transfer\n", instance->host_no));
+			dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
 			NCR5380_information_transfer(instance);
-			dprintk(NDEBUG_MAIN, ("scsi%d : main() : done set false\n", instance->host_no));
+			dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
 			done = 0;
 		} else
 			break;
@@ -1159,8 +1150,8 @@
 	unsigned char basr;
 	unsigned long flags;
 
-	dprintk(NDEBUG_INTR, ("scsi : NCR5380 irq %d triggered\n",
-		instance->irq));
+	dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
+		instance->irq);
 
 	do {
 		done = 1;
@@ -1173,14 +1164,14 @@
 			NCR5380_dprint(NDEBUG_INTR, instance);
 			if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
 				done = 0;
-				dprintk(NDEBUG_INTR, ("scsi%d : SEL interrupt\n", instance->host_no));
+				dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
 				NCR5380_reselect(instance);
 				(void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 			} else if (basr & BASR_PARITY_ERROR) {
-				dprintk(NDEBUG_INTR, ("scsi%d : PARITY interrupt\n", instance->host_no));
+				dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
 				(void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 			} else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-				dprintk(NDEBUG_INTR, ("scsi%d : RESET interrupt\n", instance->host_no));
+				dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
 				(void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 			} else {
 #if defined(REAL_DMA)
@@ -1210,7 +1201,7 @@
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 				}
 #else
-				dprintk(NDEBUG_INTR, ("scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG)));
+				dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
 				(void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 #endif
 			}
@@ -1304,7 +1295,7 @@
 	hostdata->restart_select = 0;
 
 	NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-	dprintk(NDEBUG_ARBITRATION, ("scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id));
+	dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
 
 	/* 
 	 * Set the phase bits to 0, otherwise the NCR5380 won't drive the 
@@ -1333,7 +1324,7 @@
 		goto failed;
 	}
 
-	dprintk(NDEBUG_ARBITRATION, ("scsi%d : arbitration complete\n", instance->host_no));
+	dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
 
 	/* 
 	 * The arbitration delay is 2.2us, but this is a minimum and there is 
@@ -1347,7 +1338,7 @@
 	/* Check for lost arbitration */
 	if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
 		NCR5380_write(MODE_REG, MR_BASE);
-		dprintk(NDEBUG_ARBITRATION, ("scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no));
+		dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
 		goto failed;
 	}
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
@@ -1360,7 +1351,7 @@
 	    (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
 		NCR5380_write(MODE_REG, MR_BASE);
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-		dprintk(NDEBUG_ARBITRATION, ("scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no));
+		dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
 		goto failed;
 	}
 	/* 
@@ -1370,7 +1361,7 @@
 
 	udelay(2);
 
-	dprintk(NDEBUG_ARBITRATION, ("scsi%d : won arbitration\n", instance->host_no));
+	dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
 
 	/* 
 	 * Now that we have won arbitration, start Selection process, asserting 
@@ -1422,7 +1413,7 @@
 
 	udelay(1);
 
-	dprintk(NDEBUG_SELECTION, ("scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd)));
+	dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
 
 	/* 
 	 * The SCSI specification calls for a 250 ms timeout for the actual 
@@ -1487,7 +1478,7 @@
 		collect_stats(hostdata, cmd);
 		cmd->scsi_done(cmd);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-		dprintk(NDEBUG_SELECTION, ("scsi%d : target did not respond within 250ms\n", instance->host_no));
+		dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 		return 0;
 	}
@@ -1520,7 +1511,7 @@
 		goto failed;
 	}
 
-	dprintk(NDEBUG_SELECTION, ("scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id));
+	dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
 	tmp[0] = IDENTIFY(((instance->irq == SCSI_IRQ_NONE) ? 0 : 1), cmd->device->lun);
 
 	len = 1;
@@ -1530,7 +1521,7 @@
 	data = tmp;
 	phase = PHASE_MSGOUT;
 	NCR5380_transfer_pio(instance, &phase, &len, &data);
-	dprintk(NDEBUG_SELECTION, ("scsi%d : nexus established.\n", instance->host_no));
+	dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
 	/* XXX need to handle errors here */
 	hostdata->connected = cmd;
 	hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
@@ -1583,9 +1574,9 @@
 	NCR5380_setup(instance);
 
 	if (!(p & SR_IO))
-		dprintk(NDEBUG_PIO, ("scsi%d : pio write %d bytes\n", instance->host_no, c));
+		dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
 	else
-		dprintk(NDEBUG_PIO, ("scsi%d : pio read %d bytes\n", instance->host_no, c));
+		dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
 
 	/* 
 	 * The NCR5380 chip will only drive the SCSI bus when the 
@@ -1620,11 +1611,11 @@
 			break;
 		}
 
-		dprintk(NDEBUG_HANDSHAKE, ("scsi%d : REQ detected\n", instance->host_no));
+		dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
 
 		/* Check for phase mismatch */
 		if ((tmp & PHASE_MASK) != p) {
-			dprintk(NDEBUG_HANDSHAKE, ("scsi%d : phase mismatch\n", instance->host_no));
+			dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
 			NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
 			break;
 		}
@@ -1660,7 +1651,7 @@
 
 		/* FIXME - if this fails bus reset ?? */
 		NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
-		dprintk(NDEBUG_HANDSHAKE, ("scsi%d : req false, handshake complete\n", instance->host_no));
+		dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
 
 /*
  * We have several special cases to consider during REQ/ACK handshaking : 
@@ -1681,7 +1672,7 @@
 		}
 	} while (--c);
 
-	dprintk(NDEBUG_PIO, ("scsi%d : residual %d\n", instance->host_no, c));
+	dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
 
 	*count = c;
 	*data = d;
@@ -1828,7 +1819,7 @@
 		c -= 2;
 	}
 #endif
-	dprintk(NDEBUG_DMA, ("scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d));
+	dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
 	hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
 #endif
 
@@ -1857,7 +1848,7 @@
 		NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
 #endif				/* def REAL_DMA */
 
-	dprintk(NDEBUG_DMA, ("scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG)));
+	dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
 
 	/* 
 	 *	On the PAS16 at least I/O recovery delays are not needed here.
@@ -1934,7 +1925,7 @@
 		}
 	}
 
-	dprintk(NDEBUG_DMA, ("scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG)));
+	dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
 
 	NCR5380_write(MODE_REG, MR_BASE);
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -1948,7 +1939,7 @@
 #ifdef READ_OVERRUNS
 	if (*phase == p && (p & SR_IO) && residue == 0) {
 		if (overrun) {
-			dprintk(NDEBUG_DMA, ("Got an input overrun, using saved byte\n"));
+			dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
 			**data = saved_data;
 			*data += 1;
 			*count -= 1;
@@ -1957,13 +1948,13 @@
 			printk("No overrun??\n");
 			cnt = toPIO = 2;
 		}
-		dprintk(NDEBUG_DMA, ("Doing %d-byte PIO to 0x%X\n", cnt, *data));
+		dprintk(NDEBUG_DMA, "Doing %d-byte PIO to 0x%X\n", cnt, *data);
 		NCR5380_transfer_pio(instance, phase, &cnt, data);
 		*count -= toPIO - cnt;
 	}
 #endif
 
-	dprintk(NDEBUG_DMA, ("Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count)));
+	dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
 	return 0;
 
 #elif defined(REAL_DMA)
@@ -2013,7 +2004,7 @@
 		foo = NCR5380_pwrite(instance, d, c);
 #else
 		int timeout;
-		dprintk(NDEBUG_C400_PWRITE, ("About to pwrite %d bytes\n", c));
+		dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
 		if (!(foo = NCR5380_pwrite(instance, d, c))) {
 			/*
 			 * Wait for the last byte to be sent.  If REQ is being asserted for 
@@ -2024,19 +2015,19 @@
 				while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
 
 				if (!timeout)
-					dprintk(NDEBUG_LAST_BYTE_SENT, ("scsi%d : timed out on last byte\n", instance->host_no));
+					dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
 
 				if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
 					hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
 					if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
 						hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
-						dprintk(NDEBUG_LAST_WRITE_SENT, ("scsi%d : last bit sent works\n", instance->host_no));
+						dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
 					}
 				}
 			} else {
-				dprintk(NDEBUG_C400_PWRITE, ("Waiting for LASTBYTE\n"));
+				dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
 				while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
-				dprintk(NDEBUG_C400_PWRITE, ("Got LASTBYTE\n"));
+				dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
 			}
 		}
 #endif
@@ -2045,9 +2036,9 @@
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
 	if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
-		dprintk(NDEBUG_C400_PWRITE, ("53C400w: Checking for IRQ\n"));
+		dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
 		if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
-			dprintk(NDEBUG_C400_PWRITE, ("53C400w:    got it, reading reset interrupt reg\n"));
+			dprintk(NDEBUG_C400_PWRITE, "53C400w:    got it, reading reset interrupt reg\n");
 			NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 		} else {
 			printk("53C400w:    IRQ NOT THERE!\n");
@@ -2139,7 +2130,7 @@
 					--cmd->SCp.buffers_residual;
 					cmd->SCp.this_residual = cmd->SCp.buffer->length;
 					cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-					dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual));
+					dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
 				}
 				/*
 				 * The preferred transfer method is going to be 
@@ -2219,7 +2210,7 @@
 				case LINKED_FLG_CMD_COMPLETE:
 					/* Accept message by clearing ACK */
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-					dprintk(NDEBUG_LINKED, ("scsi%d : target %d lun %d linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun));
+					dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %d linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
 					/* 
 					 * Sanity check : A linked command should only terminate with
 					 * one of these messages if there are more linked commands
@@ -2235,7 +2226,7 @@
 					/* The next command is still part of this process */
 					cmd->next_link->tag = cmd->tag;
 					cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-					dprintk(NDEBUG_LINKED, ("scsi%d : target %d lun %d linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun));
+					dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %d linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
 					collect_stats(hostdata, cmd);
 					cmd->scsi_done(cmd);
 					cmd = hostdata->connected;
@@ -2247,7 +2238,7 @@
 					sink = 1;
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 					hostdata->connected = NULL;
-					dprintk(NDEBUG_QUEUES, ("scsi%d : command for target %d, lun %d completed\n", instance->host_no, cmd->device->id, cmd->device->lun));
+					dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %d completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
 					hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 
 					/* 
@@ -2281,13 +2272,13 @@
 					if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
 						scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
 
-						dprintk(NDEBUG_AUTOSENSE, ("scsi%d : performing request sense\n", instance->host_no));
+						dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
 
 						LIST(cmd, hostdata->issue_queue);
 						cmd->host_scribble = (unsigned char *)
 						    hostdata->issue_queue;
 						hostdata->issue_queue = (Scsi_Cmnd *) cmd;
-						dprintk(NDEBUG_QUEUES, ("scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no));
+						dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
 					} else
 #endif				/* def AUTOSENSE */
 					{
@@ -2327,7 +2318,7 @@
 						    hostdata->disconnected_queue;
 						hostdata->connected = NULL;
 						hostdata->disconnected_queue = cmd;
-						dprintk(NDEBUG_QUEUES, ("scsi%d : command for target %d lun %d was moved from connected to" "  the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun));
+						dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %d was moved from connected to" "  the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
 						/* 
 						 * Restore phase bits to 0 so an interrupted selection, 
 						 * arbitration can resume.
@@ -2373,14 +2364,14 @@
 					extended_msg[0] = EXTENDED_MESSAGE;
 					/* Accept first byte by clearing ACK */
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-					dprintk(NDEBUG_EXTENDED, ("scsi%d : receiving extended message\n", instance->host_no));
+					dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
 
 					len = 2;
 					data = extended_msg + 1;
 					phase = PHASE_MSGIN;
 					NCR5380_transfer_pio(instance, &phase, &len, &data);
 
-					dprintk(NDEBUG_EXTENDED, ("scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]));
+					dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
 
 					if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
 						/* Accept third byte by clearing ACK */
@@ -2390,7 +2381,7 @@
 						phase = PHASE_MSGIN;
 
 						NCR5380_transfer_pio(instance, &phase, &len, &data);
-						dprintk(NDEBUG_EXTENDED, ("scsi%d : message received, residual %d\n", instance->host_no, len));
+						dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
 
 						switch (extended_msg[2]) {
 						case EXTENDED_SDTR:
@@ -2456,7 +2447,7 @@
 				NCR5380_transfer_pio(instance, &phase, &len, &data);
 				if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
 					NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-					dprintk(NDEBUG_USLEEP, ("scsi%d : issued command, sleeping until %ul\n", instance->host_no, hostdata->time_expires));
+					dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
 					return;
 				}
 				break;
@@ -2468,7 +2459,7 @@
 				break;
 			default:
 				printk("scsi%d : unknown phase\n", instance->host_no);
-				NCR5380_dprint(NDEBUG_ALL, instance);
+				NCR5380_dprint(NDEBUG_ANY, instance);
 			}	/* switch(phase) */
 		}		/* if (tmp * SR_REQ) */
 		else {
@@ -2476,7 +2467,7 @@
 			 */
 			if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
 				NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-				dprintk(NDEBUG_USLEEP, ("scsi%d : poll timed out, sleeping until %ul\n", instance->host_no, hostdata->time_expires));
+				dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
 				return;
 			}
 		}
@@ -2517,7 +2508,7 @@
 	hostdata->restart_select = 1;
 
 	target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
-	dprintk(NDEBUG_SELECTION, ("scsi%d : reselect\n", instance->host_no));
+	dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
 
 	/* 
 	 * At this point, we have detected that our SCSI ID is on the bus,
@@ -2597,7 +2588,7 @@
 		do_abort(instance);
 	} else {
 		hostdata->connected = tmp;
-		dprintk(NDEBUG_RESELECTION, ("scsi%d : nexus established, target = %d, lun = %d, tag = %d\n", instance->host_no, tmp->target, tmp->lun, tmp->tag));
+		dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %d, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
 	}
 }
 
@@ -2682,8 +2673,8 @@
 
 	NCR5380_setup(instance);
 
-	dprintk(NDEBUG_ABORT, ("scsi%d : abort called\n", instance->host_no));
-	dprintk(NDEBUG_ABORT, ("        basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG)));
+	dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
+	dprintk(NDEBUG_ABORT, "        basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
 
 #if 0
 /*
@@ -2693,7 +2684,7 @@
  */
 
 	if (hostdata->connected == cmd) {
-		dprintk(NDEBUG_ABORT, ("scsi%d : aborting connected command\n", instance->host_no));
+		dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
 		hostdata->aborted = 1;
 /*
  * We should perform BSY checking, and make sure we haven't slipped
@@ -2721,14 +2712,14 @@
  *          from the issue queue.
  */
  
-	dprintk(NDEBUG_ABORT, ("scsi%d : abort going into loop.\n", instance->host_no));
+	dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
 	for (prev = (Scsi_Cmnd **) & (hostdata->issue_queue), tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp; prev = (Scsi_Cmnd **) & (tmp->host_scribble), tmp = (Scsi_Cmnd *) tmp->host_scribble)
 		if (cmd == tmp) {
 			REMOVE(5, *prev, tmp, tmp->host_scribble);
 			(*prev) = (Scsi_Cmnd *) tmp->host_scribble;
 			tmp->host_scribble = NULL;
 			tmp->result = DID_ABORT << 16;
-			dprintk(NDEBUG_ABORT, ("scsi%d : abort removed command from issue queue.\n", instance->host_no));
+			dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
 			tmp->scsi_done(tmp);
 			return SUCCESS;
 		}
@@ -2750,7 +2741,7 @@
  */
 
 	if (hostdata->connected) {
-		dprintk(NDEBUG_ABORT, ("scsi%d : abort failed, command connected.\n", instance->host_no));
+		dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
 		return FAILED;
 	}
 /*
@@ -2780,11 +2771,11 @@
 
 	for (tmp = (Scsi_Cmnd *) hostdata->disconnected_queue; tmp; tmp = (Scsi_Cmnd *) tmp->host_scribble)
 		if (cmd == tmp) {
-			dprintk(NDEBUG_ABORT, ("scsi%d : aborting disconnected command.\n", instance->host_no));
+			dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
 
 			if (NCR5380_select(instance, cmd, (int) cmd->tag))
 				return FAILED;
-			dprintk(NDEBUG_ABORT, ("scsi%d : nexus reestablished.\n", instance->host_no));
+			dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
 
 			do_abort(instance);
 

diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index 14964d0..c79ddfa 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h

@@ -21,10 +21,6 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: NCR5380.h,v $
- */
-
 #ifndef NCR5380_H
 #define NCR5380_H
 
@@ -60,6 +56,9 @@
 #define NDEBUG_C400_PREAD	0x100000
 #define NDEBUG_C400_PWRITE	0x200000
 #define NDEBUG_LISTS		0x400000
+#define NDEBUG_ABORT		0x800000
+#define NDEBUG_TAGS		0x1000000
+#define NDEBUG_MERGING		0x2000000
 
 #define NDEBUG_ANY		0xFFFFFFFFUL
 
@@ -292,9 +291,24 @@
 
 #ifdef __KERNEL__
 
-#define dprintk(a,b)			do {} while(0)
-#define NCR5380_dprint(a,b)		do {} while(0)
-#define NCR5380_dprint_phase(a,b)	do {} while(0)
+#ifndef NDEBUG
+#define NDEBUG (0)
+#endif
+
+#define dprintk(flg, fmt, ...) \
+	do { if ((NDEBUG) & (flg)) pr_debug(fmt, ## __VA_ARGS__); } while (0)
+
+#if NDEBUG
+#define NCR5380_dprint(flg, arg) \
+	do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
+#define NCR5380_dprint_phase(flg, arg) \
+	do { if ((NDEBUG) & (flg)) NCR5380_print_phase(arg); } while (0)
+static void NCR5380_print_phase(struct Scsi_Host *instance);
+static void NCR5380_print(struct Scsi_Host *instance);
+#else
+#define NCR5380_dprint(flg, arg)       do {} while (0)
+#define NCR5380_dprint_phase(flg, arg) do {} while (0)
+#endif
 
 #if defined(AUTOPROBE_IRQ)
 static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
@@ -307,10 +321,6 @@
 #endif
 static void NCR5380_main(struct work_struct *work);
 static void __maybe_unused NCR5380_print_options(struct Scsi_Host *instance);
-#ifdef NDEBUG
-static void NCR5380_print_phase(struct Scsi_Host *instance);
-static void NCR5380_print(struct Scsi_Host *instance);
-#endif
 static int NCR5380_abort(Scsi_Cmnd * cmd);
 static int NCR5380_bus_reset(Scsi_Cmnd * cmd);
 static int NCR5380_queue_command(struct Scsi_Host *, struct scsi_cmnd *);

diff --git a/drivers/scsi/aic7xxx/aic79xx_pci.c b/drivers/scsi/aic7xxx/aic79xx_pci.c
index 14b5f8d..cc9bd26 100644
--- a/drivers/scsi/aic7xxx/aic79xx_pci.c
+++ b/drivers/scsi/aic7xxx/aic79xx_pci.c

@@ -827,7 +827,7 @@
 		for (bit = 0; bit < 8; bit++) {
 
 			if ((pci_status[i] & (0x1 << bit)) != 0) {
-				static const char *s;
+				const char *s;
 
 				s = pci_status_strings[bit];
 				if (i == 7/*TARG*/ && bit == 3)
@@ -887,23 +887,15 @@
 
 		for (bit = 0; bit < 8; bit++) {
 
-			if ((split_status[i] & (0x1 << bit)) != 0) {
-				static const char *s;
-
-				s = split_status_strings[bit];
-				printk(s, ahd_name(ahd),
+			if ((split_status[i] & (0x1 << bit)) != 0)
+				printk(split_status_strings[bit], ahd_name(ahd),
 				       split_status_source[i]);
-			}
 
 			if (i > 1)
 				continue;
 
-			if ((sg_split_status[i] & (0x1 << bit)) != 0) {
-				static const char *s;
-
-				s = split_status_strings[bit];
-				printk(s, ahd_name(ahd), "SG");
-			}
+			if ((sg_split_status[i] & (0x1 << bit)) != 0)
+				printk(split_status_strings[bit], ahd_name(ahd), "SG");
 		}
 	}
 	/*

diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 059ff47..2e797a3 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c

@@ -62,13 +62,6 @@
  */
 #undef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE
 /*
- * SCSI-II Linked command support.
- *
- * The higher level code doesn't support linked commands yet, and so the option
- * is undef'd here.
- */
-#undef CONFIG_SCSI_ACORNSCSI_LINK
-/*
  * SCSI-II Synchronous transfer support.
  *
  * Tried and tested...
@@ -160,10 +153,6 @@
 #error "Yippee!  ABORT TAG is now defined!  Remove this error!"
 #endif
 
-#ifdef CONFIG_SCSI_ACORNSCSI_LINK
-#error SCSI2 LINKed commands not supported (yet)!
-#endif
-
 #ifdef USE_DMAC
 /*
  * DMAC setup parameters
@@ -1668,42 +1657,6 @@
 	}
 	break;
 
-#ifdef CONFIG_SCSI_ACORNSCSI_LINK
-    case LINKED_CMD_COMPLETE:
-    case LINKED_FLG_CMD_COMPLETE:
-	/*
-	 * We don't support linked commands yet
-	 */
-	if (0) {
-#if (DEBUG & DEBUG_LINK)
-	    printk("scsi%d.%c: lun %d tag %d linked command complete\n",
-		    host->host->host_no, acornscsi_target(host), host->SCpnt->tag);
-#endif
-	    /*
-	     * A linked command should only terminate with one of these messages
-	     * if there are more linked commands available.
-	     */
-	    if (!host->SCpnt->next_link) {
-		printk(KERN_WARNING "scsi%d.%c: lun %d tag %d linked command complete, but no next_link\n",
-			instance->host_no, acornscsi_target(host), host->SCpnt->tag);
-		acornscsi_sbic_issuecmd(host, CMND_ASSERTATN);
-		msgqueue_addmsg(&host->scsi.msgs, 1, ABORT);
-	    } else {
-		struct scsi_cmnd *SCpnt = host->SCpnt;
-
-		acornscsi_dma_cleanup(host);
-
-		host->SCpnt = host->SCpnt->next_link;
-		host->SCpnt->tag = SCpnt->tag;
-		SCpnt->result = DID_OK | host->scsi.SCp.Message << 8 | host->Scsi.SCp.Status;
-		SCpnt->done(SCpnt);
-
-		/* initialise host->SCpnt->SCp */
-	    }
-	    break;
-	}
-#endif
-
     default: /* reject message */
 	printk(KERN_ERR "scsi%d.%c: unrecognised message %02X, rejecting\n",
 		host->host->host_no, acornscsi_target(host),
@@ -2825,9 +2778,6 @@
 #ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE
     " TAG"
 #endif
-#ifdef CONFIG_SCSI_ACORNSCSI_LINK
-    " LINK"
-#endif
 #if (DEBUG & DEBUG_NO_WRITE)
     " NOWRITE (" __stringify(NO_WRITE) ")"
 #endif
@@ -2851,9 +2801,6 @@
 #ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE
     " TAG"
 #endif
-#ifdef CONFIG_SCSI_ACORNSCSI_LINK
-    " LINK"
-#endif
 #if (DEBUG & DEBUG_NO_WRITE)
     " NOWRITE (" __stringify(NO_WRITE) ")"
 #endif

diff --git a/drivers/scsi/arm/cumana_1.c b/drivers/scsi/arm/cumana_1.c
index f8e0609..8ef810a 100644
--- a/drivers/scsi/arm/cumana_1.c
+++ b/drivers/scsi/arm/cumana_1.c

@@ -36,9 +36,6 @@
 	void __iomem *base;		\
 	void __iomem *dma
 
-#define BOARD_NORMAL	0
-#define BOARD_NCR53C400	1
-
 #include "../NCR5380.h"
 
 void cumanascsi_setup(char *str, int *ints)

diff --git a/drivers/scsi/arm/oak.c b/drivers/scsi/arm/oak.c
index 4266eef..188e734 100644
--- a/drivers/scsi/arm/oak.c
+++ b/drivers/scsi/arm/oak.c

@@ -37,9 +37,6 @@
 #define NCR5380_implementation_fields	\
 	void __iomem *base
 
-#define BOARD_NORMAL	0
-#define BOARD_NCR53C400	1
-
 #include "../NCR5380.h"
 
 #undef START_DMA_INITIATOR_RECEIVE_REG

diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index 0f3cdbc..1814aa2 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c

@@ -370,7 +370,7 @@
 		return 0;
 	if (TagAlloc[cmd->device->id][cmd->device->lun].nr_allocated >=
 	    TagAlloc[cmd->device->id][cmd->device->lun].queue_size) {
-		TAG_PRINTK("scsi%d: target %d lun %d: no free tags\n",
+		dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
 			   H_NO(cmd), cmd->device->id, cmd->device->lun);
 		return 1;
 	}
@@ -394,7 +394,7 @@
 	    !setup_use_tagged_queuing || !cmd->device->tagged_supported) {
 		cmd->tag = TAG_NONE;
 		hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
-		TAG_PRINTK("scsi%d: target %d lun %d now allocated by untagged "
+		dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
 			   "command\n", H_NO(cmd), cmd->device->id, cmd->device->lun);
 	} else {
 		TAG_ALLOC *ta = &TagAlloc[cmd->device->id][cmd->device->lun];
@@ -402,7 +402,7 @@
 		cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
 		set_bit(cmd->tag, ta->allocated);
 		ta->nr_allocated++;
-		TAG_PRINTK("scsi%d: using tag %d for target %d lun %d "
+		dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
 			   "(now %d tags in use)\n",
 			   H_NO(cmd), cmd->tag, cmd->device->id,
 			   cmd->device->lun, ta->nr_allocated);
@@ -420,7 +420,7 @@
 
 	if (cmd->tag == TAG_NONE) {
 		hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-		TAG_PRINTK("scsi%d: target %d lun %d untagged cmd finished\n",
+		dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
 			   H_NO(cmd), cmd->device->id, cmd->device->lun);
 	} else if (cmd->tag >= MAX_TAGS) {
 		printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
@@ -429,7 +429,7 @@
 		TAG_ALLOC *ta = &TagAlloc[cmd->device->id][cmd->device->lun];
 		clear_bit(cmd->tag, ta->allocated);
 		ta->nr_allocated--;
-		TAG_PRINTK("scsi%d: freed tag %d for target %d lun %d\n",
+		dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
 			   H_NO(cmd), cmd->tag, cmd->device->id, cmd->device->lun);
 	}
 }
@@ -478,7 +478,7 @@
 	for (endaddr = virt_to_phys(cmd->SCp.ptr + cmd->SCp.this_residual - 1) + 1;
 	     cmd->SCp.buffers_residual &&
 	     virt_to_phys(sg_virt(&cmd->SCp.buffer[1])) == endaddr;) {
-		MER_PRINTK("VTOP(%p) == %08lx -> merging\n",
+		dprintk(NDEBUG_MERGING, "VTOP(%p) == %08lx -> merging\n",
 			   page_address(sg_page(&cmd->SCp.buffer[1])), endaddr);
 #if (NDEBUG & NDEBUG_MERGING)
 		++cnt;
@@ -490,7 +490,7 @@
 	}
 #if (NDEBUG & NDEBUG_MERGING)
 	if (oldlen != cmd->SCp.this_residual)
-		MER_PRINTK("merged %d buffers from %p, new length %08x\n",
+		dprintk(NDEBUG_MERGING, "merged %d buffers from %p, new length %08x\n",
 			   cnt, cmd->SCp.ptr, cmd->SCp.this_residual);
 #endif
 }
@@ -626,16 +626,6 @@
 	}
 }
 
-#else /* !NDEBUG */
-
-/* dummies... */
-static inline void NCR5380_print(struct Scsi_Host *instance)
-{
-};
-static inline void NCR5380_print_phase(struct Scsi_Host *instance)
-{
-};
-
 #endif
 
 /*
@@ -676,7 +666,7 @@
 {
 	static int done = 0;
 	if (!done) {
-		INI_PRINTK("scsi : NCR5380_all_init()\n");
+		dprintk(NDEBUG_INIT, "scsi : NCR5380_all_init()\n");
 		done = 1;
 	}
 }
@@ -739,8 +729,8 @@
 	Scsi_Cmnd *ptr;
 	unsigned long flags;
 
-	NCR_PRINT(NDEBUG_ANY);
-	NCR_PRINT_PHASE(NDEBUG_ANY);
+	NCR5380_dprint(NDEBUG_ANY, instance);
+	NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
 	hostdata = (struct NCR5380_hostdata *)instance->hostdata;
 
@@ -984,7 +974,7 @@
 	}
 	local_irq_restore(flags);
 
-	QU_PRINTK("scsi%d: command added to %s of queue\n", H_NO(cmd),
+	dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
 		  (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
 	/* If queue_command() is called from an interrupt (real one or bottom
@@ -1054,7 +1044,7 @@
 		done = 1;
 
 		if (!hostdata->connected) {
-			MAIN_PRINTK("scsi%d: not connected\n", HOSTNO);
+			dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
 			/*
 			 * Search through the issue_queue for a command destined
 			 * for a target that's not busy.
@@ -1107,7 +1097,7 @@
 					 * On failure, we must add the command back to the
 					 *   issue queue so we can keep trying.
 					 */
-					MAIN_PRINTK("scsi%d: main(): command for target %d "
+					dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
 						    "lun %d removed from issue_queue\n",
 						    HOSTNO, tmp->device->id, tmp->device->lun);
 					/*
@@ -1140,7 +1130,7 @@
 #endif
 						falcon_dont_release--;
 						local_irq_restore(flags);
-						MAIN_PRINTK("scsi%d: main(): select() failed, "
+						dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
 							    "returned to issue_queue\n", HOSTNO);
 						if (hostdata->connected)
 							break;
@@ -1155,10 +1145,10 @@
 #endif
 		    ) {
 			local_irq_restore(flags);
-			MAIN_PRINTK("scsi%d: main: performing information transfer\n",
+			dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
 				    HOSTNO);
 			NCR5380_information_transfer(instance);
-			MAIN_PRINTK("scsi%d: main: done set false\n", HOSTNO);
+			dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
 			done = 0;
 		}
 	} while (!done);
@@ -1204,12 +1194,12 @@
 			    (BASR_PHASE_MATCH|BASR_ACK)) {
 				saved_data = NCR5380_read(INPUT_DATA_REG);
 				overrun = 1;
-				DMA_PRINTK("scsi%d: read overrun handled\n", HOSTNO);
+				dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
 			}
 		}
 	}
 
-	DMA_PRINTK("scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
+	dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
 		   HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
 		   NCR5380_read(STATUS_REG));
 
@@ -1229,13 +1219,13 @@
 		if ((NCR5380_read(STATUS_REG) & PHASE_MASK) == p && (p & SR_IO)) {
 			cnt = toPIO = atari_read_overruns;
 			if (overrun) {
-				DMA_PRINTK("Got an input overrun, using saved byte\n");
+				dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
 				*(*data)++ = saved_data;
 				(*count)--;
 				cnt--;
 				toPIO--;
 			}
-			DMA_PRINTK("Doing %d-byte PIO to 0x%08lx\n", cnt, (long)*data);
+			dprintk(NDEBUG_DMA, "Doing %d-byte PIO to 0x%08lx\n", cnt, (long)*data);
 			NCR5380_transfer_pio(instance, &p, &cnt, data);
 			*count -= toPIO - cnt;
 		}
@@ -1261,25 +1251,25 @@
 	int done = 1, handled = 0;
 	unsigned char basr;
 
-	INT_PRINTK("scsi%d: NCR5380 irq triggered\n", HOSTNO);
+	dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
 
 	/* Look for pending interrupts */
 	basr = NCR5380_read(BUS_AND_STATUS_REG);
-	INT_PRINTK("scsi%d: BASR=%02x\n", HOSTNO, basr);
+	dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
 	/* dispatch to appropriate routine if found and done=0 */
 	if (basr & BASR_IRQ) {
-		NCR_PRINT(NDEBUG_INTR);
+		NCR5380_dprint(NDEBUG_INTR, instance);
 		if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
 			done = 0;
 			ENABLE_IRQ();
-			INT_PRINTK("scsi%d: SEL interrupt\n", HOSTNO);
+			dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
 			NCR5380_reselect(instance);
 			(void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 		} else if (basr & BASR_PARITY_ERROR) {
-			INT_PRINTK("scsi%d: PARITY interrupt\n", HOSTNO);
+			dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
 			(void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 		} else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-			INT_PRINTK("scsi%d: RESET interrupt\n", HOSTNO);
+			dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
 			(void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 		} else {
 			/*
@@ -1298,7 +1288,7 @@
 			    ((basr & BASR_END_DMA_TRANSFER) ||
 			     !(basr & BASR_PHASE_MATCH))) {
 
-				INT_PRINTK("scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
+				dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
 				NCR5380_dma_complete( instance );
 				done = 0;
 				ENABLE_IRQ();
@@ -1323,7 +1313,7 @@
 	}
 
 	if (!done) {
-		INT_PRINTK("scsi%d: in int routine, calling main\n", HOSTNO);
+		dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
 		/* Put a call to NCR5380_main() on the queue... */
 		queue_main();
 	}
@@ -1396,8 +1386,8 @@
 	unsigned long flags;
 
 	hostdata->restart_select = 0;
-	NCR_PRINT(NDEBUG_ARBITRATION);
-	ARB_PRINTK("scsi%d: starting arbitration, id = %d\n", HOSTNO,
+	NCR5380_dprint(NDEBUG_ARBITRATION, instance);
+	dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
 		   instance->this_id);
 
 	/*
@@ -1442,7 +1432,7 @@
 		;
 #endif
 
-	ARB_PRINTK("scsi%d: arbitration complete\n", HOSTNO);
+	dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
 
 	if (hostdata->connected) {
 		NCR5380_write(MODE_REG, MR_BASE);
@@ -1463,7 +1453,7 @@
 	    (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
 	    hostdata->connected) {
 		NCR5380_write(MODE_REG, MR_BASE);
-		ARB_PRINTK("scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
+		dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
 			   HOSTNO);
 		return -1;
 	}
@@ -1478,7 +1468,7 @@
 	    hostdata->connected) {
 		NCR5380_write(MODE_REG, MR_BASE);
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-		ARB_PRINTK("scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
+		dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
 			   HOSTNO);
 		return -1;
 	}
@@ -1501,7 +1491,7 @@
 		return -1;
 	}
 
-	ARB_PRINTK("scsi%d: won arbitration\n", HOSTNO);
+	dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
 
 	/*
 	 * Now that we have won arbitration, start Selection process, asserting
@@ -1561,7 +1551,7 @@
 
 	udelay(1);
 
-	SEL_PRINTK("scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+	dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
 
 	/*
 	 * The SCSI specification calls for a 250 ms timeout for the actual
@@ -1617,7 +1607,7 @@
 			printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
 			if (hostdata->restart_select)
 				printk(KERN_NOTICE "\trestart select\n");
-			NCR_PRINT(NDEBUG_ANY);
+			NCR5380_dprint(NDEBUG_ANY, instance);
 			NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 			return -1;
 		}
@@ -1630,7 +1620,7 @@
 #endif
 		cmd->scsi_done(cmd);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-		SEL_PRINTK("scsi%d: target did not respond within 250ms\n", HOSTNO);
+		dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 		return 0;
 	}
@@ -1656,7 +1646,7 @@
 	while (!(NCR5380_read(STATUS_REG) & SR_REQ))
 		;
 
-	SEL_PRINTK("scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
+	dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
 		   HOSTNO, cmd->device->id);
 	tmp[0] = IDENTIFY(1, cmd->device->lun);
 
@@ -1676,7 +1666,7 @@
 	data = tmp;
 	phase = PHASE_MSGOUT;
 	NCR5380_transfer_pio(instance, &phase, &len, &data);
-	SEL_PRINTK("scsi%d: nexus established.\n", HOSTNO);
+	dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
 	/* XXX need to handle errors here */
 	hostdata->connected = cmd;
 #ifndef SUPPORT_TAGS
@@ -1737,12 +1727,12 @@
 		while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
 			;
 
-		HSH_PRINTK("scsi%d: REQ detected\n", HOSTNO);
+		dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
 
 		/* Check for phase mismatch */
 		if ((tmp & PHASE_MASK) != p) {
-			PIO_PRINTK("scsi%d: phase mismatch\n", HOSTNO);
-			NCR_PRINT_PHASE(NDEBUG_PIO);
+			dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+			NCR5380_dprint_phase(NDEBUG_PIO, instance);
 			break;
 		}
 
@@ -1764,25 +1754,25 @@
 		if (!(p & SR_IO)) {
 			if (!((p & SR_MSG) && c > 1)) {
 				NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
-				NCR_PRINT(NDEBUG_PIO);
+				NCR5380_dprint(NDEBUG_PIO, instance);
 				NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
 					      ICR_ASSERT_DATA | ICR_ASSERT_ACK);
 			} else {
 				NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
 					      ICR_ASSERT_DATA | ICR_ASSERT_ATN);
-				NCR_PRINT(NDEBUG_PIO);
+				NCR5380_dprint(NDEBUG_PIO, instance);
 				NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
 					      ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
 			}
 		} else {
-			NCR_PRINT(NDEBUG_PIO);
+			NCR5380_dprint(NDEBUG_PIO, instance);
 			NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
 		}
 
 		while (NCR5380_read(STATUS_REG) & SR_REQ)
 			;
 
-		HSH_PRINTK("scsi%d: req false, handshake complete\n", HOSTNO);
+		dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
 
 		/*
 		 * We have several special cases to consider during REQ/ACK handshaking :
@@ -1803,7 +1793,7 @@
 		}
 	} while (--c);
 
-	PIO_PRINTK("scsi%d: residual %d\n", HOSTNO, c);
+	dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
 
 	*count = c;
 	*data = d;
@@ -1917,7 +1907,7 @@
 	if (atari_read_overruns && (p & SR_IO))
 		c -= atari_read_overruns;
 
-	DMA_PRINTK("scsi%d: initializing DMA for %s, %d bytes %s %p\n",
+	dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
 		   HOSTNO, (p & SR_IO) ? "reading" : "writing",
 		   c, (p & SR_IO) ? "to" : "from", d);
 
@@ -1997,7 +1987,7 @@
 			phase = (tmp & PHASE_MASK);
 			if (phase != old_phase) {
 				old_phase = phase;
-				NCR_PRINT_PHASE(NDEBUG_INFORMATION);
+				NCR5380_dprint_phase(NDEBUG_INFORMATION, instance);
 			}
 
 			if (sink && (phase != PHASE_MSGOUT)) {
@@ -2039,7 +2029,7 @@
 					 * they are at contiguous physical addresses.
 					 */
 					merge_contiguous_buffers(cmd);
-					INF_PRINTK("scsi%d: %d bytes and %d buffers left\n",
+					dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
 						   HOSTNO, cmd->SCp.this_residual,
 						   cmd->SCp.buffers_residual);
 				}
@@ -2123,7 +2113,7 @@
 					/* Accept message by clearing ACK */
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-					LNK_PRINTK("scsi%d: target %d lun %d linked command "
+					dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %d linked command "
 						   "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
 
 					/* Enable reselect interrupts */
@@ -2148,7 +2138,7 @@
 					 * and don't free it! */
 					cmd->next_link->tag = cmd->tag;
 					cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-					LNK_PRINTK("scsi%d: target %d lun %d linked request "
+					dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %d linked request "
 						   "done, calling scsi_done().\n",
 						   HOSTNO, cmd->device->id, cmd->device->lun);
 #ifdef NCR5380_STATS
@@ -2165,7 +2155,7 @@
 					/* ++guenther: possible race with Falcon locking */
 					falcon_dont_release++;
 					hostdata->connected = NULL;
-					QU_PRINTK("scsi%d: command for target %d, lun %d "
+					dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %d "
 						  "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
 #ifdef SUPPORT_TAGS
 					cmd_free_tag(cmd);
@@ -2179,7 +2169,7 @@
 						/* ++Andreas: the mid level code knows about
 						   QUEUE_FULL now. */
 						TAG_ALLOC *ta = &TagAlloc[cmd->device->id][cmd->device->lun];
-						TAG_PRINTK("scsi%d: target %d lun %d returned "
+						dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d returned "
 							   "QUEUE_FULL after %d commands\n",
 							   HOSTNO, cmd->device->id, cmd->device->lun,
 							   ta->nr_allocated);
@@ -2224,14 +2214,14 @@
 					    (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
 						scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
 
-						ASEN_PRINTK("scsi%d: performing request sense\n", HOSTNO);
+						dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
 
 						local_irq_save(flags);
 						LIST(cmd,hostdata->issue_queue);
 						SET_NEXT(cmd, hostdata->issue_queue);
 						hostdata->issue_queue = (Scsi_Cmnd *) cmd;
 						local_irq_restore(flags);
-						QU_PRINTK("scsi%d: REQUEST SENSE added to head of "
+						dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
 							  "issue queue\n", H_NO(cmd));
 					} else
 #endif /* def AUTOSENSE */
@@ -2277,7 +2267,7 @@
 						cmd->device->tagged_supported = 0;
 						hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
 						cmd->tag = TAG_NONE;
-						TAG_PRINTK("scsi%d: target %d lun %d rejected "
+						dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d rejected "
 							   "QUEUE_TAG message; tagged queuing "
 							   "disabled\n",
 							   HOSTNO, cmd->device->id, cmd->device->lun);
@@ -2294,7 +2284,7 @@
 					hostdata->connected = NULL;
 					hostdata->disconnected_queue = cmd;
 					local_irq_restore(flags);
-					QU_PRINTK("scsi%d: command for target %d lun %d was "
+					dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %d was "
 						  "moved from connected to the "
 						  "disconnected_queue\n", HOSTNO,
 						  cmd->device->id, cmd->device->lun);
@@ -2344,13 +2334,13 @@
 					/* Accept first byte by clearing ACK */
 					NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-					EXT_PRINTK("scsi%d: receiving extended message\n", HOSTNO);
+					dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
 
 					len = 2;
 					data = extended_msg + 1;
 					phase = PHASE_MSGIN;
 					NCR5380_transfer_pio(instance, &phase, &len, &data);
-					EXT_PRINTK("scsi%d: length=%d, code=0x%02x\n", HOSTNO,
+					dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
 						   (int)extended_msg[1], (int)extended_msg[2]);
 
 					if (!len && extended_msg[1] <=
@@ -2362,7 +2352,7 @@
 						phase = PHASE_MSGIN;
 
 						NCR5380_transfer_pio(instance, &phase, &len, &data);
-						EXT_PRINTK("scsi%d: message received, residual %d\n",
+						dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
 							   HOSTNO, len);
 
 						switch (extended_msg[2]) {
@@ -2451,7 +2441,7 @@
 				break;
 			default:
 				printk("scsi%d: unknown phase\n", HOSTNO);
-				NCR_PRINT(NDEBUG_ANY);
+				NCR5380_dprint(NDEBUG_ANY, instance);
 			} /* switch(phase) */
 		} /* if (tmp * SR_REQ) */
 	} /* while (1) */
@@ -2493,7 +2483,7 @@
 
 	target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
 
-	RSL_PRINTK("scsi%d: reselect\n", HOSTNO);
+	dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
 
 	/*
 	 * At this point, we have detected that our SCSI ID is on the bus,
@@ -2544,7 +2534,7 @@
 		if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
 		    msg[1] == SIMPLE_QUEUE_TAG)
 			tag = msg[2];
-		TAG_PRINTK("scsi%d: target mask %02x, lun %d sent tag %d at "
+		dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
 			   "reselection\n", HOSTNO, target_mask, lun, tag);
 	}
 #endif
@@ -2598,7 +2588,7 @@
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
 	hostdata->connected = tmp;
-	RSL_PRINTK("scsi%d: nexus established, target = %d, lun = %d, tag = %d\n",
+	dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %d, tag = %d\n",
 		   HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
 	falcon_dont_release--;
 }
@@ -2640,7 +2630,7 @@
 		printk(KERN_ERR "scsi%d: !!BINGO!! Falcon has no lock in NCR5380_abort\n",
 		       HOSTNO);
 
-	ABRT_PRINTK("scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
+	dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
 		    NCR5380_read(BUS_AND_STATUS_REG),
 		    NCR5380_read(STATUS_REG));
 
@@ -2653,7 +2643,7 @@
 
 	if (hostdata->connected == cmd) {
 
-		ABRT_PRINTK("scsi%d: aborting connected command\n", HOSTNO);
+		dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
 		/*
 		 * We should perform BSY checking, and make sure we haven't slipped
 		 * into BUS FREE.
@@ -2683,11 +2673,11 @@
 			local_irq_restore(flags);
 			cmd->scsi_done(cmd);
 			falcon_release_lock_if_possible(hostdata);
-			return SCSI_ABORT_SUCCESS;
+			return SUCCESS;
 		} else {
 /*			local_irq_restore(flags); */
 			printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-			return SCSI_ABORT_ERROR;
+			return FAILED;
 		}
 	}
 #endif
@@ -2705,13 +2695,13 @@
 			SET_NEXT(tmp, NULL);
 			tmp->result = DID_ABORT << 16;
 			local_irq_restore(flags);
-			ABRT_PRINTK("scsi%d: abort removed command from issue queue.\n",
+			dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
 				    HOSTNO);
 			/* Tagged queuing note: no tag to free here, hasn't been assigned
 			 * yet... */
 			tmp->scsi_done(tmp);
 			falcon_release_lock_if_possible(hostdata);
-			return SCSI_ABORT_SUCCESS;
+			return SUCCESS;
 		}
 	}
 
@@ -2728,8 +2718,8 @@
 
 	if (hostdata->connected) {
 		local_irq_restore(flags);
-		ABRT_PRINTK("scsi%d: abort failed, command connected.\n", HOSTNO);
-		return SCSI_ABORT_SNOOZE;
+		dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
+		return FAILED;
 	}
 
 	/*
@@ -2761,12 +2751,12 @@
 	     tmp = NEXT(tmp)) {
 		if (cmd == tmp) {
 			local_irq_restore(flags);
-			ABRT_PRINTK("scsi%d: aborting disconnected command.\n", HOSTNO);
+			dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
 
 			if (NCR5380_select(instance, cmd, (int)cmd->tag))
-				return SCSI_ABORT_BUSY;
+				return FAILED;
 
-			ABRT_PRINTK("scsi%d: nexus reestablished.\n", HOSTNO);
+			dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
 
 			do_abort(instance);
 
@@ -2791,7 +2781,7 @@
 					local_irq_restore(flags);
 					tmp->scsi_done(tmp);
 					falcon_release_lock_if_possible(hostdata);
-					return SCSI_ABORT_SUCCESS;
+					return SUCCESS;
 				}
 			}
 		}
@@ -2816,7 +2806,7 @@
 	 */
 	falcon_release_lock_if_possible(hostdata);
 
-	return SCSI_ABORT_NOT_RUNNING;
+	return FAILED;
 }
 
 
@@ -2825,7 +2815,7 @@
  *
  * Purpose : reset the SCSI bus.
  *
- * Returns : SCSI_RESET_WAKEUP
+ * Returns : SUCCESS or FAILURE
  *
  */
 
@@ -2834,7 +2824,7 @@
 	SETUP_HOSTDATA(cmd->device->host);
 	int i;
 	unsigned long flags;
-#if 1
+#if defined(RESET_RUN_DONE)
 	Scsi_Cmnd *connected, *disconnected_queue;
 #endif
 
@@ -2859,7 +2849,14 @@
 	 * through anymore ... */
 	(void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
-#if 1	/* XXX Should now be done by midlevel code, but it's broken XXX */
+	/* MSch 20140115 - looking at the generic NCR5380 driver, all of this
+	 * should go.
+	 * Catch-22: if we don't clear all queues, the SCSI driver lock will
+	 * not be reset by atari_scsi_reset()!
+	 */
+
+#if defined(RESET_RUN_DONE)
+	/* XXX Should now be done by midlevel code, but it's broken XXX */
 	/* XXX see below                                            XXX */
 
 	/* MSch: old-style reset: actually abort all command processing here */
@@ -2890,7 +2887,7 @@
 	 */
 
 	if ((cmd = connected)) {
-		ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
+		dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
 		cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
 		cmd->scsi_done(cmd);
 	}
@@ -2902,7 +2899,7 @@
 		cmd->scsi_done(cmd);
 	}
 	if (i > 0)
-		ABRT_PRINTK("scsi: reset aborted %d disconnected command(s)\n", i);
+		dprintk(NDEBUG_ABORT, "scsi: reset aborted %d disconnected command(s)\n", i);
 
 	/* The Falcon lock should be released after a reset...
 	 */
@@ -2915,7 +2912,7 @@
 	 * the midlevel code that the reset was SUCCESSFUL, and there is no
 	 * need to 'wake up' the commands by a request_sense
 	 */
-	return SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET;
+	return SUCCESS;
 #else /* 1 */
 
 	/* MSch: new-style reset handling: let the mid-level do what it can */
@@ -2942,11 +2939,11 @@
 	 */
 
 	if (hostdata->issue_queue)
-		ABRT_PRINTK("scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
+		dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
 	if (hostdata->connected)
-		ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
+		dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
 	if (hostdata->disconnected_queue)
-		ABRT_PRINTK("scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+		dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
 
 	local_irq_save(flags);
 	hostdata->issue_queue = NULL;
@@ -2963,6 +2960,6 @@
 	local_irq_restore(flags);
 
 	/* we did no complete reset of all commands, so a wakeup is required */
-	return SCSI_RESET_WAKEUP | SCSI_RESET_BUS_RESET;
+	return SUCCESS;
 #endif /* 1 */
 }

diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index a8d721f..b522134 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c

@@ -67,12 +67,6 @@
 
 #include <linux/module.h>
 
-#define NDEBUG (0)
-
-#define NDEBUG_ABORT		0x00100000
-#define NDEBUG_TAGS		0x00200000
-#define NDEBUG_MERGING		0x00400000
-
 #define AUTOSENSE
 /* For the Atari version, use only polled IO or REAL_DMA */
 #define	REAL_DMA
@@ -314,7 +308,7 @@
 
 	dma_stat = tt_scsi_dma.dma_ctrl;
 
-	INT_PRINTK("scsi%d: NCR5380 interrupt, DMA status = %02x\n",
+	dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
 		   atari_scsi_host->host_no, dma_stat & 0xff);
 
 	/* Look if it was the DMA that has interrupted: First possibility
@@ -340,7 +334,7 @@
 	if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
 		atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
 
-		DMA_PRINTK("SCSI DMA: There are %ld residual bytes.\n",
+		dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
 			   atari_dma_residual);
 
 		if ((signed int)atari_dma_residual < 0)
@@ -371,7 +365,7 @@
 			 * other command.  These shouldn't disconnect anyway.
 			 */
 			if (atari_dma_residual & 0x1ff) {
-				DMA_PRINTK("SCSI DMA: DMA bug corrected, "
+				dprintk(NDEBUG_DMA, "SCSI DMA: DMA bug corrected, "
 					   "difference %ld bytes\n",
 					   512 - (atari_dma_residual & 0x1ff));
 				atari_dma_residual = (atari_dma_residual + 511) & ~0x1ff;
@@ -438,7 +432,7 @@
 			       "ST-DMA fifo\n", transferred & 15);
 
 		atari_dma_residual = HOSTDATA_DMALEN - transferred;
-		DMA_PRINTK("SCSI DMA: There are %ld residual bytes.\n",
+		dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
 			   atari_dma_residual);
 	} else
 		atari_dma_residual = 0;
@@ -474,11 +468,11 @@
 		/* there are 'nr' bytes left for the last long address
 		   before the DMA pointer */
 		phys_dst ^= nr;
-		DMA_PRINTK("SCSI DMA: there are %d rest bytes for phys addr 0x%08lx",
+		dprintk(NDEBUG_DMA, "SCSI DMA: there are %d rest bytes for phys addr 0x%08lx",
 			   nr, phys_dst);
 		/* The content of the DMA pointer is a physical address!  */
 		dst = phys_to_virt(phys_dst);
-		DMA_PRINTK(" = virt addr %p\n", dst);
+		dprintk(NDEBUG_DMA, " = virt addr %p\n", dst);
 		for (src = (char *)&tt_scsi_dma.dma_restdata; nr != 0; --nr)
 			*dst++ = *src++;
 	}
@@ -827,7 +821,7 @@
 	} else {
 		atari_turnon_irq(IRQ_MFP_FSCSI);
 	}
-	if ((rv & SCSI_RESET_ACTION) == SCSI_RESET_SUCCESS)
+	if (rv == SUCCESS)
 		falcon_release_lock_if_possible(hostdata);
 
 	return rv;
@@ -883,7 +877,7 @@
 {
 	unsigned long addr = virt_to_phys(data);
 
-	DMA_PRINTK("scsi%d: setting up dma, data = %p, phys = %lx, count = %ld, "
+	dprintk(NDEBUG_DMA, "scsi%d: setting up dma, data = %p, phys = %lx, count = %ld, "
 		   "dir = %d\n", instance->host_no, data, addr, count, dir);
 
 	if (!IS_A_TT() && !STRAM_ADDR(addr)) {
@@ -1063,7 +1057,7 @@
 		possible_len = limit;
 
 	if (possible_len != wanted_len)
-		DMA_PRINTK("Sorry, must cut DMA transfer size to %ld bytes "
+		dprintk(NDEBUG_DMA, "Sorry, must cut DMA transfer size to %ld bytes "
 			   "instead of %ld\n", possible_len, wanted_len);
 
 	return possible_len;

diff --git a/drivers/scsi/atari_scsi.h b/drivers/scsi/atari_scsi.h
index 11c624b..3299d91 100644
--- a/drivers/scsi/atari_scsi.h
+++ b/drivers/scsi/atari_scsi.h

@@ -54,125 +54,6 @@
 #define	NCR5380_dma_xfer_len(i,cmd,phase) \
 	atari_dma_xfer_len(cmd->SCp.this_residual,cmd,((phase) & SR_IO) ? 0 : 1)
 
-/* former generic SCSI error handling stuff */
-
-#define SCSI_ABORT_SNOOZE 0
-#define SCSI_ABORT_SUCCESS 1
-#define SCSI_ABORT_PENDING 2
-#define SCSI_ABORT_BUSY 3
-#define SCSI_ABORT_NOT_RUNNING 4
-#define SCSI_ABORT_ERROR 5
-
-#define SCSI_RESET_SNOOZE 0
-#define SCSI_RESET_PUNT 1
-#define SCSI_RESET_SUCCESS 2
-#define SCSI_RESET_PENDING 3
-#define SCSI_RESET_WAKEUP 4
-#define SCSI_RESET_NOT_RUNNING 5
-#define SCSI_RESET_ERROR 6
-
-#define SCSI_RESET_SYNCHRONOUS		0x01
-#define SCSI_RESET_ASYNCHRONOUS		0x02
-#define SCSI_RESET_SUGGEST_BUS_RESET	0x04
-#define SCSI_RESET_SUGGEST_HOST_RESET	0x08
-
-#define SCSI_RESET_BUS_RESET 0x100
-#define SCSI_RESET_HOST_RESET 0x200
-#define SCSI_RESET_ACTION   0xff
-
-/* Debugging printk definitions:
- *
- *  ARB  -> arbitration
- *  ASEN -> auto-sense
- *  DMA  -> DMA
- *  HSH  -> PIO handshake
- *  INF  -> information transfer
- *  INI  -> initialization
- *  INT  -> interrupt
- *  LNK  -> linked commands
- *  MAIN -> NCR5380_main() control flow
- *  NDAT -> no data-out phase
- *  NWR  -> no write commands
- *  PIO  -> PIO transfers
- *  PDMA -> pseudo DMA (unused on Atari)
- *  QU   -> queues
- *  RSL  -> reselections
- *  SEL  -> selections
- *  USL  -> usleep cpde (unused on Atari)
- *  LBS  -> last byte sent (unused on Atari)
- *  RSS  -> restarting of selections
- *  EXT  -> extended messages
- *  ABRT -> aborting and resetting
- *  TAG  -> queue tag handling
- *  MER  -> merging of consec. buffers
- *
- */
-
-#define dprint(flg, format...)			\
-({						\
-	if (NDEBUG & (flg))			\
-		printk(KERN_DEBUG format);	\
-})
-
-#define ARB_PRINTK(format, args...) \
-	dprint(NDEBUG_ARBITRATION, format , ## args)
-#define ASEN_PRINTK(format, args...) \
-	dprint(NDEBUG_AUTOSENSE, format , ## args)
-#define DMA_PRINTK(format, args...) \
-	dprint(NDEBUG_DMA, format , ## args)
-#define HSH_PRINTK(format, args...) \
-	dprint(NDEBUG_HANDSHAKE, format , ## args)
-#define INF_PRINTK(format, args...) \
-	dprint(NDEBUG_INFORMATION, format , ## args)
-#define INI_PRINTK(format, args...) \
-	dprint(NDEBUG_INIT, format , ## args)
-#define INT_PRINTK(format, args...) \
-	dprint(NDEBUG_INTR, format , ## args)
-#define LNK_PRINTK(format, args...) \
-	dprint(NDEBUG_LINKED, format , ## args)
-#define MAIN_PRINTK(format, args...) \
-	dprint(NDEBUG_MAIN, format , ## args)
-#define NDAT_PRINTK(format, args...) \
-	dprint(NDEBUG_NO_DATAOUT, format , ## args)
-#define NWR_PRINTK(format, args...) \
-	dprint(NDEBUG_NO_WRITE, format , ## args)
-#define PIO_PRINTK(format, args...) \
-	dprint(NDEBUG_PIO, format , ## args)
-#define PDMA_PRINTK(format, args...) \
-	dprint(NDEBUG_PSEUDO_DMA, format , ## args)
-#define QU_PRINTK(format, args...) \
-	dprint(NDEBUG_QUEUES, format , ## args)
-#define RSL_PRINTK(format, args...) \
-	dprint(NDEBUG_RESELECTION, format , ## args)
-#define SEL_PRINTK(format, args...) \
-	dprint(NDEBUG_SELECTION, format , ## args)
-#define USL_PRINTK(format, args...) \
-	dprint(NDEBUG_USLEEP, format , ## args)
-#define LBS_PRINTK(format, args...) \
-	dprint(NDEBUG_LAST_BYTE_SENT, format , ## args)
-#define RSS_PRINTK(format, args...) \
-	dprint(NDEBUG_RESTART_SELECT, format , ## args)
-#define EXT_PRINTK(format, args...) \
-	dprint(NDEBUG_EXTENDED, format , ## args)
-#define ABRT_PRINTK(format, args...) \
-	dprint(NDEBUG_ABORT, format , ## args)
-#define TAG_PRINTK(format, args...) \
-	dprint(NDEBUG_TAGS, format , ## args)
-#define MER_PRINTK(format, args...) \
-	dprint(NDEBUG_MERGING, format , ## args)
-
-/* conditional macros for NCR5380_print_{,phase,status} */
-
-#define NCR_PRINT(mask)	\
-	((NDEBUG & (mask)) ? NCR5380_print(instance) : (void)0)
-
-#define NCR_PRINT_PHASE(mask) \
-	((NDEBUG & (mask)) ? NCR5380_print_phase(instance) : (void)0)
-
-#define NCR_PRINT_STATUS(mask) \
-	((NDEBUG & (mask)) ? NCR5380_print_status(instance) : (void)0)
-
-
 #endif /* ndef ASM */
 #endif /* ATARI_SCSI_H */
 

diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 1bfb0bd..860f527 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h

@@ -83,9 +83,20 @@
 
 /*ISCSI */
 
+struct be_aic_obj {		/* Adaptive interrupt coalescing (AIC) info */
+	bool enable;
+	u32 min_eqd;		/* in usecs */
+	u32 max_eqd;		/* in usecs */
+	u32 prev_eqd;		/* in usecs */
+	u32 et_eqd;		/* configured val when aic is off */
+	ulong jiffs;
+	u64 eq_prev;		/* Used to calculate eqe */
+};
+
 struct be_eq_obj {
 	bool todo_mcc_cq;
 	bool todo_cq;
+	u32 cq_count;
 	struct be_queue_info q;
 	struct beiscsi_hba *phba;
 	struct be_queue_info *cq;

diff --git a/drivers/scsi/be2iscsi/be_cmds.h b/drivers/scsi/be2iscsi/be_cmds.h
index 7cf7f99..cc7405c 100644
--- a/drivers/scsi/be2iscsi/be_cmds.h
+++ b/drivers/scsi/be2iscsi/be_cmds.h

@@ -71,6 +71,7 @@
 #define BEISCSI_FW_MBX_TIMEOUT	100
 
 /* MBOX Command VER */
+#define MBX_CMD_VER1	0x01
 #define MBX_CMD_VER2	0x02
 
 struct be_mcc_compl {
@@ -271,6 +272,12 @@
 	u16 rsvd0;		/* sword */
 } __packed;
 
+struct be_set_eqd {
+	u32 eq_id;
+	u32 phase;
+	u32 delay_multiplier;
+} __packed;
+
 struct mgmt_chap_format {
 	u32 flags;
 	u8  intr_chap_name[256];
@@ -622,7 +629,7 @@
 		u32 eq_id;
 		u32 phase;
 		u32 delay_multiplier;
-	} delay[8];
+	} delay[MAX_CPUS];
 } __packed;
 
 /******************** Get MAC ADDR *******************/
@@ -708,6 +715,8 @@
 
 void free_mcc_tag(struct be_ctrl_info *ctrl, unsigned int tag);
 
+int be_cmd_modify_eq_delay(struct beiscsi_hba *phba, struct be_set_eqd *,
+			    int num);
 int beiscsi_mccq_compl(struct beiscsi_hba *phba,
 			uint32_t tag, struct be_mcc_wrb **wrb,
 			struct be_dma_mem *mbx_cmd_mem);
@@ -1005,6 +1014,26 @@
 	u8 rsvd0[3];
 } __packed;
 
+struct tcp_connect_and_offload_in_v1 {
+	struct be_cmd_req_hdr hdr;
+	struct ip_addr_format ip_address;
+	u16 tcp_port;
+	u16 cid;
+	u16 cq_id;
+	u16 defq_id;
+	struct phys_addr dataout_template_pa;
+	u16 hdr_ring_id;
+	u16 data_ring_id;
+	u8 do_offload;
+	u8 ifd_state;
+	u8 rsvd0[2];
+	u16 tcp_window_size;
+	u8 tcp_window_scale_count;
+	u8 rsvd1;
+	u32 tcp_mss:24;
+	u8 rsvd2;
+} __packed;
+
 struct tcp_connect_and_offload_out {
 	struct be_cmd_resp_hdr hdr;
 	u32 connection_handle;

diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index a3df433..fd284ff 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c

@@ -1106,7 +1106,7 @@
 	struct beiscsi_hba *phba = beiscsi_ep->phba;
 	struct tcp_connect_and_offload_out *ptcpcnct_out;
 	struct be_dma_mem nonemb_cmd;
-	unsigned int tag;
+	unsigned int tag, req_memsize;
 	int ret = -ENOMEM;
 
 	beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG,
@@ -1127,8 +1127,14 @@
 		       (beiscsi_ep->ep_cid)] = ep;
 
 	beiscsi_ep->cid_vld = 0;
+
+	if (is_chip_be2_be3r(phba))
+		req_memsize = sizeof(struct tcp_connect_and_offload_in);
+	else
+		req_memsize = sizeof(struct tcp_connect_and_offload_in_v1);
+
 	nonemb_cmd.va = pci_alloc_consistent(phba->ctrl.pdev,
-				sizeof(struct tcp_connect_and_offload_in),
+				req_memsize,
 				&nonemb_cmd.dma);
 	if (nonemb_cmd.va == NULL) {
 
@@ -1139,7 +1145,7 @@
 		beiscsi_free_ep(beiscsi_ep);
 		return -ENOMEM;
 	}
-	nonemb_cmd.size = sizeof(struct tcp_connect_and_offload_in);
+	nonemb_cmd.size = req_memsize;
 	memset(nonemb_cmd.va, 0, nonemb_cmd.size);
 	tag = mgmt_open_connection(phba, dst_addr, beiscsi_ep, &nonemb_cmd);
 	if (tag <= 0) {

diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 0d82229..5543490 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c

@@ -599,15 +599,7 @@
 	pci_set_drvdata(pcidev, phba);
 	phba->interface_handle = 0xFFFFFFFF;
 
-	if (iscsi_host_add(shost, &phba->pcidev->dev))
-		goto free_devices;
-
 	return phba;
-
-free_devices:
-	pci_dev_put(phba->pcidev);
-	iscsi_host_free(phba->shost);
-	return NULL;
 }
 
 static void beiscsi_unmap_pci_function(struct beiscsi_hba *phba)
@@ -2279,6 +2271,7 @@
 
 	pbe_eq = container_of(iop, struct be_eq_obj, iopoll);
 	ret = beiscsi_process_cq(pbe_eq);
+	pbe_eq->cq_count += ret;
 	if (ret < budget) {
 		phba = pbe_eq->phba;
 		blk_iopoll_complete(iop);
@@ -3692,7 +3685,7 @@
 	struct hwi_controller *phwi_ctrlr;
 	struct hwi_context_memory *phwi_context;
 	struct hwi_async_pdu_context *pasync_ctx;
-	int i, eq_num, ulp_num;
+	int i, eq_for_mcc, ulp_num;
 
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
@@ -3729,16 +3722,17 @@
 		if (q->created)
 			beiscsi_cmd_q_destroy(ctrl, q, QTYPE_CQ);
 	}
+
+	be_mcc_queues_destroy(phba);
 	if (phba->msix_enabled)
-		eq_num = 1;
+		eq_for_mcc = 1;
 	else
-		eq_num = 0;
-	for (i = 0; i < (phba->num_cpus + eq_num); i++) {
+		eq_for_mcc = 0;
+	for (i = 0; i < (phba->num_cpus + eq_for_mcc); i++) {
 		q = &phwi_context->be_eq[i].q;
 		if (q->created)
 			beiscsi_cmd_q_destroy(ctrl, q, QTYPE_EQ);
 	}
-	be_mcc_queues_destroy(phba);
 	be_cmd_fw_uninit(ctrl);
 }
 
@@ -3833,9 +3827,9 @@
 
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
-	phwi_context->max_eqd = 0;
+	phwi_context->max_eqd = 128;
 	phwi_context->min_eqd = 0;
-	phwi_context->cur_eqd = 64;
+	phwi_context->cur_eqd = 0;
 	be_cmd_fw_initialize(&phba->ctrl);
 
 	status = beiscsi_create_eqs(phba, phwi_context);
@@ -5290,6 +5284,57 @@
 	return;
 }
 
+static void be_eqd_update(struct beiscsi_hba *phba)
+{
+	struct be_set_eqd set_eqd[MAX_CPUS];
+	struct be_aic_obj *aic;
+	struct be_eq_obj *pbe_eq;
+	struct hwi_controller *phwi_ctrlr;
+	struct hwi_context_memory *phwi_context;
+	int eqd, i, num = 0;
+	ulong now;
+	u32 pps, delta;
+	unsigned int tag;
+
+	phwi_ctrlr = phba->phwi_ctrlr;
+	phwi_context = phwi_ctrlr->phwi_ctxt;
+
+	for (i = 0; i <= phba->num_cpus; i++) {
+		aic = &phba->aic_obj[i];
+		pbe_eq = &phwi_context->be_eq[i];
+		now = jiffies;
+		if (!aic->jiffs || time_before(now, aic->jiffs) ||
+		    pbe_eq->cq_count < aic->eq_prev) {
+			aic->jiffs = now;
+			aic->eq_prev = pbe_eq->cq_count;
+			continue;
+		}
+		delta = jiffies_to_msecs(now - aic->jiffs);
+		pps = (((u32)(pbe_eq->cq_count - aic->eq_prev) * 1000) / delta);
+		eqd = (pps / 1500) << 2;
+
+		if (eqd < 8)
+			eqd = 0;
+		eqd = min_t(u32, eqd, phwi_context->max_eqd);
+		eqd = max_t(u32, eqd, phwi_context->min_eqd);
+
+		aic->jiffs = now;
+		aic->eq_prev = pbe_eq->cq_count;
+
+		if (eqd != aic->prev_eqd) {
+			set_eqd[num].delay_multiplier = (eqd * 65)/100;
+			set_eqd[num].eq_id = pbe_eq->q.id;
+			aic->prev_eqd = eqd;
+			num++;
+		}
+	}
+	if (num) {
+		tag = be_cmd_modify_eq_delay(phba, set_eqd, num);
+		if (tag)
+			beiscsi_mccq_compl(phba, tag, NULL, NULL);
+	}
+}
+
 /*
  * beiscsi_hw_health_check()- Check adapter health
  * @work: work item to check HW health
@@ -5303,6 +5348,8 @@
 		container_of(work, struct beiscsi_hba,
 			     beiscsi_hw_check_task.work);
 
+	be_eqd_update(phba);
+
 	beiscsi_ue_detect(phba);
 
 	schedule_delayed_work(&phba->beiscsi_hw_check_task,
@@ -5579,7 +5626,7 @@
 		phba->ctrl.mcc_numtag[i + 1] = 0;
 		phba->ctrl.mcc_tag_available++;
 		memset(&phba->ctrl.ptag_state[i].tag_mem_state, 0,
-		       sizeof(struct beiscsi_mcc_tag_state));
+		       sizeof(struct be_dma_mem));
 	}
 
 	phba->ctrl.mcc_alloc_index = phba->ctrl.mcc_free_index = 0;
@@ -5621,6 +5668,9 @@
 	}
 	hwi_enable_intr(phba);
 
+	if (iscsi_host_add(phba->shost, &phba->pcidev->dev))
+		goto free_blkenbld;
+
 	if (beiscsi_setup_boot_info(phba))
 		/*
 		 * log error but continue, because we may not be using

diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 9380b55..9ceab42 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h

@@ -36,7 +36,7 @@
 #include <scsi/scsi_transport_iscsi.h>
 
 #define DRV_NAME		"be2iscsi"
-#define BUILD_STR		"10.2.125.0"
+#define BUILD_STR		"10.2.273.0"
 #define BE_NAME			"Emulex OneConnect" \
 				"Open-iSCSI Driver version" BUILD_STR
 #define DRV_DESC		BE_NAME " " "Driver"
@@ -71,8 +71,8 @@
 
 #define BEISCSI_SGLIST_ELEMENTS	30
 
-#define BEISCSI_CMD_PER_LUN	128	/* scsi_host->cmd_per_lun */
-#define BEISCSI_MAX_SECTORS	2048	/* scsi_host->max_sectors */
+#define BEISCSI_CMD_PER_LUN	128 /* scsi_host->cmd_per_lun */
+#define BEISCSI_MAX_SECTORS	1024 /* scsi_host->max_sectors */
 #define BEISCSI_TEMPLATE_HDR_PER_CXN_SIZE 128 /* Template size per cxn */
 
 #define BEISCSI_MAX_CMD_LEN	16	/* scsi_host->max_cmd_len */
@@ -427,6 +427,7 @@
 	struct mgmt_session_info boot_sess;
 	struct invalidate_command_table inv_tbl[128];
 
+	struct be_aic_obj aic_obj[MAX_CPUS];
 	unsigned int attr_log_enable;
 	int (*iotask_fn)(struct iscsi_task *,
 			struct scatterlist *sg,

diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index 088bdf7..6045aa7 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c

@@ -155,6 +155,43 @@
 	}
 }
 
+int be_cmd_modify_eq_delay(struct beiscsi_hba *phba,
+		 struct be_set_eqd *set_eqd, int num)
+{
+	struct be_ctrl_info *ctrl = &phba->ctrl;
+	struct be_mcc_wrb *wrb;
+	struct be_cmd_req_modify_eq_delay *req;
+	unsigned int tag = 0;
+	int i;
+
+	spin_lock(&ctrl->mbox_lock);
+	tag = alloc_mcc_tag(phba);
+	if (!tag) {
+		spin_unlock(&ctrl->mbox_lock);
+		return tag;
+	}
+
+	wrb = wrb_from_mccq(phba);
+	req = embedded_payload(wrb);
+
+	wrb->tag0 |= tag;
+	be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0);
+	be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
+		OPCODE_COMMON_MODIFY_EQ_DELAY, sizeof(*req));
+
+	req->num_eq = cpu_to_le32(num);
+	for (i = 0; i < num; i++) {
+		req->delay[i].eq_id = cpu_to_le32(set_eqd[i].eq_id);
+		req->delay[i].phase = 0;
+		req->delay[i].delay_multiplier =
+				cpu_to_le32(set_eqd[i].delay_multiplier);
+	}
+
+	be_mcc_notify(phba);
+	spin_unlock(&ctrl->mbox_lock);
+	return tag;
+}
+
 /**
  * mgmt_reopen_session()- Reopen a session based on reopen_type
  * @phba: Device priv structure instance
@@ -447,8 +484,8 @@
 					 struct be_dma_mem *nonemb_cmd)
 {
 	struct be_cmd_resp_hdr *resp;
-	struct be_mcc_wrb *wrb = wrb_from_mccq(phba);
-	struct be_sge *mcc_sge = nonembedded_sgl(wrb);
+	struct be_mcc_wrb *wrb;
+	struct be_sge *mcc_sge;
 	unsigned int tag = 0;
 	struct iscsi_bsg_request *bsg_req = job->request;
 	struct be_bsg_vendor_cmd *req = nonemb_cmd->va;
@@ -465,7 +502,6 @@
 	req->sector = sector;
 	req->offset = offset;
 	spin_lock(&ctrl->mbox_lock);
-	memset(wrb, 0, sizeof(*wrb));
 
 	switch (bsg_req->rqst_data.h_vendor.vendor_cmd[0]) {
 	case BEISCSI_WRITE_FLASH:
@@ -495,6 +531,8 @@
 		return tag;
 	}
 
+	wrb = wrb_from_mccq(phba);
+	mcc_sge = nonembedded_sgl(wrb);
 	be_wrb_hdr_prepare(wrb, nonemb_cmd->size, false,
 			   job->request_payload.sg_cnt);
 	mcc_sge->pa_hi = cpu_to_le32(upper_32_bits(nonemb_cmd->dma));
@@ -525,7 +563,6 @@
 	int status = 0;
 
 	spin_lock(&ctrl->mbox_lock);
-	memset(wrb, 0, sizeof(*wrb));
 
 	be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0);
 	be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ISCSI,
@@ -675,7 +712,7 @@
 	struct sockaddr_in6 *daddr_in6 = (struct sockaddr_in6 *)dst_addr;
 	struct be_ctrl_info *ctrl = &phba->ctrl;
 	struct be_mcc_wrb *wrb;
-	struct tcp_connect_and_offload_in *req;
+	struct tcp_connect_and_offload_in_v1 *req;
 	unsigned short def_hdr_id;
 	unsigned short def_data_id;
 	struct phys_addr template_address = { 0, 0 };
@@ -702,17 +739,16 @@
 		return tag;
 	}
 	wrb = wrb_from_mccq(phba);
-	memset(wrb, 0, sizeof(*wrb));
 	sge = nonembedded_sgl(wrb);
 
 	req = nonemb_cmd->va;
 	memset(req, 0, sizeof(*req));
 	wrb->tag0 |= tag;
 
-	be_wrb_hdr_prepare(wrb, sizeof(*req), false, 1);
+	be_wrb_hdr_prepare(wrb, nonemb_cmd->size, false, 1);
 	be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ISCSI,
 			   OPCODE_COMMON_ISCSI_TCP_CONNECT_AND_OFFLOAD,
-			   sizeof(*req));
+			   nonemb_cmd->size);
 	if (dst_addr->sa_family == PF_INET) {
 		__be32 s_addr = daddr_in->sin_addr.s_addr;
 		req->ip_address.ip_type = BE2_IPV4;
@@ -758,6 +794,13 @@
 	sge->pa_hi = cpu_to_le32(upper_32_bits(nonemb_cmd->dma));
 	sge->pa_lo = cpu_to_le32(nonemb_cmd->dma & 0xFFFFFFFF);
 	sge->len = cpu_to_le32(nonemb_cmd->size);
+
+	if (!is_chip_be2_be3r(phba)) {
+		req->hdr.version = MBX_CMD_VER1;
+		req->tcp_window_size = 0;
+		req->tcp_window_scale_count = 2;
+	}
+
 	be_mcc_notify(phba);
 	spin_unlock(&ctrl->mbox_lock);
 	return tag;
@@ -804,7 +847,7 @@
 				int resp_buf_len)
 {
 	struct be_ctrl_info *ctrl = &phba->ctrl;
-	struct be_mcc_wrb *wrb = wrb_from_mccq(phba);
+	struct be_mcc_wrb *wrb;
 	struct be_sge *sge;
 	unsigned int tag;
 	int rc = 0;
@@ -816,7 +859,8 @@
 		rc = -ENOMEM;
 		goto free_cmd;
 	}
-	memset(wrb, 0, sizeof(*wrb));
+
+	wrb = wrb_from_mccq(phba);
 	wrb->tag0 |= tag;
 	sge = nonembedded_sgl(wrb);
 

diff --git a/drivers/scsi/be2iscsi/be_mgmt.h b/drivers/scsi/be2iscsi/be_mgmt.h
index 01b8c97..24a8fc5 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.h
+++ b/drivers/scsi/be2iscsi/be_mgmt.h

@@ -335,5 +335,7 @@
 void beiscsi_offload_cxn_v2(struct beiscsi_offload_params *params,
 			     struct wrb_handle *pwrb_handle);
 void beiscsi_ue_detect(struct beiscsi_hba *phba);
+int be_cmd_modify_eq_delay(struct beiscsi_hba *phba,
+			 struct be_set_eqd *, int num);
 
 #endif

diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index cc0fbcd..7593b7c 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c

@@ -507,7 +507,7 @@
 	struct bfad_vport_s   *vport;
 	int rc;
 
-	vport = kzalloc(sizeof(struct bfad_vport_s), GFP_KERNEL);
+	vport = kzalloc(sizeof(struct bfad_vport_s), GFP_ATOMIC);
 	if (!vport) {
 		bfa_trc(bfad, 0);
 		return;

diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 46a3765..512aed3 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c

@@ -1966,26 +1966,29 @@
 {
 	int i;
 	int segment_count;
-	int hash_table_size;
 	u32 *pbl;
 
-	segment_count = hba->hash_tbl_segment_count;
-	hash_table_size = BNX2FC_NUM_MAX_SESS * BNX2FC_MAX_ROWS_IN_HASH_TBL *
-		sizeof(struct fcoe_hash_table_entry);
+	if (hba->hash_tbl_segments) {
 
-	pbl = hba->hash_tbl_pbl;
-	for (i = 0; i < segment_count; ++i) {
-		dma_addr_t dma_address;
+		pbl = hba->hash_tbl_pbl;
+		if (pbl) {
+			segment_count = hba->hash_tbl_segment_count;
+			for (i = 0; i < segment_count; ++i) {
+				dma_addr_t dma_address;
 
-		dma_address = le32_to_cpu(*pbl);
-		++pbl;
-		dma_address += ((u64)le32_to_cpu(*pbl)) << 32;
-		++pbl;
-		dma_free_coherent(&hba->pcidev->dev,
-				  BNX2FC_HASH_TBL_CHUNK_SIZE,
-				  hba->hash_tbl_segments[i],
-				  dma_address);
+				dma_address = le32_to_cpu(*pbl);
+				++pbl;
+				dma_address += ((u64)le32_to_cpu(*pbl)) << 32;
+				++pbl;
+				dma_free_coherent(&hba->pcidev->dev,
+						  BNX2FC_HASH_TBL_CHUNK_SIZE,
+						  hba->hash_tbl_segments[i],
+						  dma_address);
+			}
+		}
 
+		kfree(hba->hash_tbl_segments);
+		hba->hash_tbl_segments = NULL;
 	}
 
 	if (hba->hash_tbl_pbl) {
@@ -2023,7 +2026,7 @@
 	dma_segment_array = kzalloc(dma_segment_array_size, GFP_KERNEL);
 	if (!dma_segment_array) {
 		printk(KERN_ERR PFX "hash table pointers (dma) alloc failed\n");
-		return -ENOMEM;
+		goto cleanup_ht;
 	}
 
 	for (i = 0; i < segment_count; ++i) {
@@ -2034,15 +2037,7 @@
 					   GFP_KERNEL);
 		if (!hba->hash_tbl_segments[i]) {
 			printk(KERN_ERR PFX "hash segment alloc failed\n");
-			while (--i >= 0) {
-				dma_free_coherent(&hba->pcidev->dev,
-						    BNX2FC_HASH_TBL_CHUNK_SIZE,
-						    hba->hash_tbl_segments[i],
-						    dma_segment_array[i]);
-				hba->hash_tbl_segments[i] = NULL;
-			}
-			kfree(dma_segment_array);
-			return -ENOMEM;
+			goto cleanup_dma;
 		}
 		memset(hba->hash_tbl_segments[i], 0,
 		       BNX2FC_HASH_TBL_CHUNK_SIZE);
@@ -2054,8 +2049,7 @@
 					       GFP_KERNEL);
 	if (!hba->hash_tbl_pbl) {
 		printk(KERN_ERR PFX "hash table pbl alloc failed\n");
-		kfree(dma_segment_array);
-		return -ENOMEM;
+		goto cleanup_dma;
 	}
 	memset(hba->hash_tbl_pbl, 0, PAGE_SIZE);
 
@@ -2080,6 +2074,22 @@
 	}
 	kfree(dma_segment_array);
 	return 0;
+
+cleanup_dma:
+	for (i = 0; i < segment_count; ++i) {
+		if (hba->hash_tbl_segments[i])
+			dma_free_coherent(&hba->pcidev->dev,
+					    BNX2FC_HASH_TBL_CHUNK_SIZE,
+					    hba->hash_tbl_segments[i],
+					    dma_segment_array[i]);
+	}
+
+	kfree(dma_segment_array);
+
+cleanup_ht:
+	kfree(hba->hash_tbl_segments);
+	hba->hash_tbl_segments = NULL;
+	return -ENOMEM;
 }
 
 /**

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5248c88..7bcf67e 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c

@@ -120,6 +120,7 @@
 			    "%s: blk_get_request failed\n", __func__);
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -128,7 +129,6 @@
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = ALUA_FAILOVER_RETRIES;

diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index e1c8be0..6f07f7f 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c

@@ -280,6 +280,7 @@
 		return NULL;
 	}
 
+	blk_rq_set_block_pc(rq);
 	rq->cmd_len = COMMAND_SIZE(cmd);
 	rq->cmd[0] = cmd;
 
@@ -304,7 +305,6 @@
 		break;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->timeout = CLARIION_TIMEOUT;

diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 084062b..e9d9fea 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c

@@ -120,7 +120,7 @@
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
@@ -250,7 +250,7 @@
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(START_STOP);

diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 4b9cf93..826069d 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c

@@ -279,6 +279,7 @@
 				"get_rdac_req: blk_get_request failed.\n");
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -287,7 +288,6 @@
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = RDAC_RETRIES;

diff --git a/drivers/scsi/dtc.c b/drivers/scsi/dtc.c
index eb29fe7..0a667fe 100644
--- a/drivers/scsi/dtc.c
+++ b/drivers/scsi/dtc.c

@@ -3,8 +3,6 @@
 #define PSEUDO_DMA
 #define DONT_USE_INTR
 #define UNSAFE			/* Leave interrupts enabled during pseudo-dma I/O */
-#define xNDEBUG (NDEBUG_INTR+NDEBUG_RESELECTION+\
-		 NDEBUG_SELECTION+NDEBUG_ARBITRATION)
 #define DMA_WORKS_RIGHT
 
 

diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index f37f3e3..6504a19 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c

@@ -390,7 +390,7 @@
 	esas2r_log_dev(ESAS2R_LOG_INFO, &(pcid->dev),
 		       "pci_enable_device() OK");
 	esas2r_log_dev(ESAS2R_LOG_INFO, &(pcid->dev),
-		       "after pci_device_enable() enable_cnt: %d",
+		       "after pci_enable_device() enable_cnt: %d",
 		       pcid->enable_cnt.counter);
 
 	host = scsi_host_alloc(&driver_template, host_alloc_size);

diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 528d43b..1d3521e 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h

@@ -39,14 +39,15 @@
 
 #define DRV_NAME		"fnic"
 #define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
-#define DRV_VERSION		"1.5.0.45"
+#define DRV_VERSION		"1.6.0.10"
 #define PFX			DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
 #define DESC_CLEAN_LOW_WATERMARK 8
 #define FNIC_UCSM_DFLT_THROTTLE_CNT_BLD	16 /* UCSM default throttle count */
 #define FNIC_MIN_IO_REQ			256 /* Min IO throttle count */
-#define FNIC_MAX_IO_REQ		2048 /* scsi_cmnd tag map entries */
+#define FNIC_MAX_IO_REQ		1024 /* scsi_cmnd tag map entries */
+#define FNIC_DFLT_IO_REQ        256 /* Default scsi_cmnd tag map entries */
 #define	FNIC_IO_LOCKS		64 /* IO locks: power of 2 */
 #define FNIC_DFLT_QUEUE_DEPTH	32
 #define	FNIC_STATS_RATE_LIMIT	4 /* limit rate at which stats are pulled up */

diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c
index b6073f8..2c613bd 100644
--- a/drivers/scsi/fnic/fnic_debugfs.c
+++ b/drivers/scsi/fnic/fnic_debugfs.c

@@ -25,6 +25,21 @@
 static struct dentry *fnic_trace_enable;
 static struct dentry *fnic_stats_debugfs_root;
 
+static struct dentry *fnic_fc_trace_debugfs_file;
+static struct dentry *fnic_fc_rdata_trace_debugfs_file;
+static struct dentry *fnic_fc_trace_enable;
+static struct dentry *fnic_fc_trace_clear;
+
+struct fc_trace_flag_type {
+	u8 fc_row_file;
+	u8 fc_normal_file;
+	u8 fnic_trace;
+	u8 fc_trace;
+	u8 fc_clear;
+};
+
+static struct fc_trace_flag_type *fc_trc_flag;
+
 /*
  * fnic_debugfs_init - Initialize debugfs for fnic debug logging
  *
@@ -56,6 +71,18 @@
 		return rc;
 	}
 
+	/* Allocate memory to structure */
+	fc_trc_flag = (struct fc_trace_flag_type *)
+		vmalloc(sizeof(struct fc_trace_flag_type));
+
+	if (fc_trc_flag) {
+		fc_trc_flag->fc_row_file = 0;
+		fc_trc_flag->fc_normal_file = 1;
+		fc_trc_flag->fnic_trace = 2;
+		fc_trc_flag->fc_trace = 3;
+		fc_trc_flag->fc_clear = 4;
+	}
+
 	rc = 0;
 	return rc;
 }
@@ -74,15 +101,19 @@
 
 	debugfs_remove(fnic_trace_debugfs_root);
 	fnic_trace_debugfs_root = NULL;
+
+	if (fc_trc_flag)
+		vfree(fc_trc_flag);
 }
 
 /*
- * fnic_trace_ctrl_open - Open the trace_enable file
+ * fnic_trace_ctrl_open - Open the trace_enable file for fnic_trace
+ *               Or Open fc_trace_enable file for fc_trace
  * @inode: The inode pointer.
  * @file: The file pointer to attach the trace enable/disable flag.
  *
  * Description:
- * This routine opens a debugsfs file trace_enable.
+ * This routine opens a debugsfs file trace_enable or fc_trace_enable.
  *
  * Returns:
  * This function returns zero if successful.
@@ -94,15 +125,19 @@
 }
 
 /*
- * fnic_trace_ctrl_read - Read a trace_enable debugfs file
+ * fnic_trace_ctrl_read -
+ *          Read  trace_enable ,fc_trace_enable
+ *              or fc_trace_clear debugfs file
  * @filp: The file pointer to read from.
  * @ubuf: The buffer to copy the data to.
  * @cnt: The number of bytes to read.
  * @ppos: The position in the file to start reading from.
  *
  * Description:
- * This routine reads value of variable fnic_tracing_enabled
- * and stores into local @buf. It will start reading file at @ppos and
+ * This routine reads value of variable fnic_tracing_enabled or
+ * fnic_fc_tracing_enabled or fnic_fc_trace_cleared
+ * and stores into local @buf.
+ * It will start reading file at @ppos and
  * copy up to @cnt of data to @ubuf from @buf.
  *
  * Returns:
@@ -114,13 +149,25 @@
 {
 	char buf[64];
 	int len;
-	len = sprintf(buf, "%u\n", fnic_tracing_enabled);
+	u8 *trace_type;
+	len = 0;
+	trace_type = (u8 *)filp->private_data;
+	if (*trace_type == fc_trc_flag->fnic_trace)
+		len = sprintf(buf, "%u\n", fnic_tracing_enabled);
+	else if (*trace_type == fc_trc_flag->fc_trace)
+		len = sprintf(buf, "%u\n", fnic_fc_tracing_enabled);
+	else if (*trace_type == fc_trc_flag->fc_clear)
+		len = sprintf(buf, "%u\n", fnic_fc_trace_cleared);
+	else
+		pr_err("fnic: Cannot read to any debugfs file\n");
 
 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
 
 /*
- * fnic_trace_ctrl_write - Write to trace_enable debugfs file
+ * fnic_trace_ctrl_write -
+ * Write to trace_enable, fc_trace_enable or
+ *         fc_trace_clear debugfs file
  * @filp: The file pointer to write from.
  * @ubuf: The buffer to copy the data from.
  * @cnt: The number of bytes to write.
@@ -128,7 +175,8 @@
  *
  * Description:
  * This routine writes data from user buffer @ubuf to buffer @buf and
- * sets fnic_tracing_enabled value as per user input.
+ * sets fc_trace_enable ,tracing_enable or fnic_fc_trace_cleared
+ * value as per user input.
  *
  * Returns:
  * This function returns the amount of data that was written.
@@ -140,6 +188,8 @@
 	char buf[64];
 	unsigned long val;
 	int ret;
+	u8 *trace_type;
+	trace_type = (u8 *)filp->private_data;
 
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
@@ -153,12 +203,27 @@
 	if (ret < 0)
 		return ret;
 
-	fnic_tracing_enabled = val;
+	if (*trace_type == fc_trc_flag->fnic_trace)
+		fnic_tracing_enabled = val;
+	else if (*trace_type == fc_trc_flag->fc_trace)
+		fnic_fc_tracing_enabled = val;
+	else if (*trace_type == fc_trc_flag->fc_clear)
+		fnic_fc_trace_cleared = val;
+	else
+		pr_err("fnic: cannot write to any debufs file\n");
+
 	(*ppos)++;
 
 	return cnt;
 }
 
+static const struct file_operations fnic_trace_ctrl_fops = {
+	.owner = THIS_MODULE,
+	.open = fnic_trace_ctrl_open,
+	.read = fnic_trace_ctrl_read,
+	.write = fnic_trace_ctrl_write,
+};
+
 /*
  * fnic_trace_debugfs_open - Open the fnic trace log
  * @inode: The inode pointer
@@ -178,19 +243,36 @@
 				  struct file *file)
 {
 	fnic_dbgfs_t *fnic_dbg_prt;
+	u8 *rdata_ptr;
+	rdata_ptr = (u8 *)inode->i_private;
 	fnic_dbg_prt = kzalloc(sizeof(fnic_dbgfs_t), GFP_KERNEL);
 	if (!fnic_dbg_prt)
 		return -ENOMEM;
 
-	fnic_dbg_prt->buffer = vmalloc((3*(trace_max_pages * PAGE_SIZE)));
-	if (!fnic_dbg_prt->buffer) {
-		kfree(fnic_dbg_prt);
-		return -ENOMEM;
+	if (*rdata_ptr == fc_trc_flag->fnic_trace) {
+		fnic_dbg_prt->buffer = vmalloc(3 *
+					(trace_max_pages * PAGE_SIZE));
+		if (!fnic_dbg_prt->buffer) {
+			kfree(fnic_dbg_prt);
+			return -ENOMEM;
+		}
+		memset((void *)fnic_dbg_prt->buffer, 0,
+		3 * (trace_max_pages * PAGE_SIZE));
+		fnic_dbg_prt->buffer_len = fnic_get_trace_data(fnic_dbg_prt);
+	} else {
+		fnic_dbg_prt->buffer =
+			vmalloc(3 * (fnic_fc_trace_max_pages * PAGE_SIZE));
+		if (!fnic_dbg_prt->buffer) {
+			kfree(fnic_dbg_prt);
+			return -ENOMEM;
+		}
+		memset((void *)fnic_dbg_prt->buffer, 0,
+			3 * (fnic_fc_trace_max_pages * PAGE_SIZE));
+		fnic_dbg_prt->buffer_len =
+			fnic_fc_trace_get_data(fnic_dbg_prt, *rdata_ptr);
 	}
-	memset((void *)fnic_dbg_prt->buffer, 0,
-			  (3*(trace_max_pages * PAGE_SIZE)));
-	fnic_dbg_prt->buffer_len = fnic_get_trace_data(fnic_dbg_prt);
 	file->private_data = fnic_dbg_prt;
+
 	return 0;
 }
 
@@ -272,13 +354,6 @@
 	return 0;
 }
 
-static const struct file_operations fnic_trace_ctrl_fops = {
-	.owner = THIS_MODULE,
-	.open = fnic_trace_ctrl_open,
-	.read = fnic_trace_ctrl_read,
-	.write = fnic_trace_ctrl_write,
-};
-
 static const struct file_operations fnic_trace_debugfs_fops = {
 	.owner = THIS_MODULE,
 	.open = fnic_trace_debugfs_open,
@@ -306,9 +381,10 @@
 		return rc;
 	}
 	fnic_trace_enable = debugfs_create_file("tracing_enable",
-					  S_IFREG|S_IRUGO|S_IWUSR,
-					  fnic_trace_debugfs_root,
-					  NULL, &fnic_trace_ctrl_fops);
+					S_IFREG|S_IRUGO|S_IWUSR,
+					fnic_trace_debugfs_root,
+					&(fc_trc_flag->fnic_trace),
+					&fnic_trace_ctrl_fops);
 
 	if (!fnic_trace_enable) {
 		printk(KERN_DEBUG
@@ -317,10 +393,10 @@
 	}
 
 	fnic_trace_debugfs_file = debugfs_create_file("trace",
-						  S_IFREG|S_IRUGO|S_IWUSR,
-						  fnic_trace_debugfs_root,
-						  NULL,
-						  &fnic_trace_debugfs_fops);
+					S_IFREG|S_IRUGO|S_IWUSR,
+					fnic_trace_debugfs_root,
+					&(fc_trc_flag->fnic_trace),
+					&fnic_trace_debugfs_fops);
 
 	if (!fnic_trace_debugfs_file) {
 		printk(KERN_DEBUG
@@ -340,14 +416,104 @@
  */
 void fnic_trace_debugfs_terminate(void)
 {
-	if (fnic_trace_debugfs_file) {
-		debugfs_remove(fnic_trace_debugfs_file);
-		fnic_trace_debugfs_file = NULL;
+	debugfs_remove(fnic_trace_debugfs_file);
+	fnic_trace_debugfs_file = NULL;
+
+	debugfs_remove(fnic_trace_enable);
+	fnic_trace_enable = NULL;
+}
+
+/*
+ * fnic_fc_trace_debugfs_init -
+ * Initialize debugfs for fnic control frame trace logging
+ *
+ * Description:
+ * When Debugfs is configured this routine sets up the fnic_fc debugfs
+ * file system. If not already created, this routine will create the
+ * create file trace to log fnic fc trace buffer output into debugfs and
+ * it will also create file fc_trace_enable to control enable/disable of
+ * trace logging into trace buffer.
+ */
+
+int fnic_fc_trace_debugfs_init(void)
+{
+	int rc = -1;
+
+	if (!fnic_trace_debugfs_root) {
+		pr_err("fnic:Debugfs root directory doesn't exist\n");
+		return rc;
 	}
-	if (fnic_trace_enable) {
-		debugfs_remove(fnic_trace_enable);
-		fnic_trace_enable = NULL;
+
+	fnic_fc_trace_enable = debugfs_create_file("fc_trace_enable",
+					S_IFREG|S_IRUGO|S_IWUSR,
+					fnic_trace_debugfs_root,
+					&(fc_trc_flag->fc_trace),
+					&fnic_trace_ctrl_fops);
+
+	if (!fnic_fc_trace_enable) {
+		pr_err("fnic: Failed create fc_trace_enable file\n");
+		return rc;
 	}
+
+	fnic_fc_trace_clear = debugfs_create_file("fc_trace_clear",
+					S_IFREG|S_IRUGO|S_IWUSR,
+					fnic_trace_debugfs_root,
+					&(fc_trc_flag->fc_clear),
+					&fnic_trace_ctrl_fops);
+
+	if (!fnic_fc_trace_clear) {
+		pr_err("fnic: Failed to create fc_trace_enable file\n");
+		return rc;
+	}
+
+	fnic_fc_rdata_trace_debugfs_file =
+		debugfs_create_file("fc_trace_rdata",
+				    S_IFREG|S_IRUGO|S_IWUSR,
+				    fnic_trace_debugfs_root,
+				    &(fc_trc_flag->fc_normal_file),
+				    &fnic_trace_debugfs_fops);
+
+	if (!fnic_fc_rdata_trace_debugfs_file) {
+		pr_err("fnic: Failed create fc_rdata_trace file\n");
+		return rc;
+	}
+
+	fnic_fc_trace_debugfs_file =
+		debugfs_create_file("fc_trace",
+				    S_IFREG|S_IRUGO|S_IWUSR,
+				    fnic_trace_debugfs_root,
+				    &(fc_trc_flag->fc_row_file),
+				    &fnic_trace_debugfs_fops);
+
+	if (!fnic_fc_trace_debugfs_file) {
+		pr_err("fnic: Failed to create fc_trace file\n");
+		return rc;
+	}
+	rc = 0;
+	return rc;
+}
+
+/*
+ * fnic_fc_trace_debugfs_terminate - Tear down debugfs infrastructure
+ *
+ * Description:
+ * When Debugfs is configured this routine removes debugfs file system
+ * elements that are specific to fnic_fc trace logging.
+ */
+
+void fnic_fc_trace_debugfs_terminate(void)
+{
+	debugfs_remove(fnic_fc_trace_debugfs_file);
+	fnic_fc_trace_debugfs_file = NULL;
+
+	debugfs_remove(fnic_fc_rdata_trace_debugfs_file);
+	fnic_fc_rdata_trace_debugfs_file = NULL;
+
+	debugfs_remove(fnic_fc_trace_enable);
+	fnic_fc_trace_enable = NULL;
+
+	debugfs_remove(fnic_fc_trace_clear);
+	fnic_fc_trace_clear = NULL;
 }
 
 /*

diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 1671325..1b948f6 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c

@@ -66,19 +66,35 @@
 	fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev);
 
 	if (old_link_status == fnic->link_status) {
-		if (!fnic->link_status)
+		if (!fnic->link_status) {
 			/* DOWN -> DOWN */
 			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
-		else {
+			fnic_fc_trace_set_data(fnic->lport->host->host_no,
+				FNIC_FC_LE, "Link Status: DOWN->DOWN",
+				strlen("Link Status: DOWN->DOWN"));
+		} else {
 			if (old_link_down_cnt != fnic->link_down_cnt) {
 				/* UP -> DOWN -> UP */
 				fnic->lport->host_stats.link_failure_count++;
 				spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+				fnic_fc_trace_set_data(
+					fnic->lport->host->host_no,
+					FNIC_FC_LE,
+					"Link Status:UP_DOWN_UP",
+					strlen("Link_Status:UP_DOWN_UP")
+					);
 				FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
 					     "link down\n");
 				fcoe_ctlr_link_down(&fnic->ctlr);
 				if (fnic->config.flags & VFCF_FIP_CAPABLE) {
 					/* start FCoE VLAN discovery */
+					fnic_fc_trace_set_data(
+						fnic->lport->host->host_no,
+						FNIC_FC_LE,
+						"Link Status: UP_DOWN_UP_VLAN",
+						strlen(
+						"Link Status: UP_DOWN_UP_VLAN")
+						);
 					fnic_fcoe_send_vlan_req(fnic);
 					return;
 				}
@@ -88,22 +104,36 @@
 			} else
 				/* UP -> UP */
 				spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+				fnic_fc_trace_set_data(
+					fnic->lport->host->host_no, FNIC_FC_LE,
+					"Link Status: UP_UP",
+					strlen("Link Status: UP_UP"));
 		}
 	} else if (fnic->link_status) {
 		/* DOWN -> UP */
 		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 		if (fnic->config.flags & VFCF_FIP_CAPABLE) {
 			/* start FCoE VLAN discovery */
+				fnic_fc_trace_set_data(
+				fnic->lport->host->host_no,
+				FNIC_FC_LE, "Link Status: DOWN_UP_VLAN",
+				strlen("Link Status: DOWN_UP_VLAN"));
 			fnic_fcoe_send_vlan_req(fnic);
 			return;
 		}
 		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link up\n");
+		fnic_fc_trace_set_data(fnic->lport->host->host_no, FNIC_FC_LE,
+			"Link Status: DOWN_UP", strlen("Link Status: DOWN_UP"));
 		fcoe_ctlr_link_up(&fnic->ctlr);
 	} else {
 		/* UP -> DOWN */
 		fnic->lport->host_stats.link_failure_count++;
 		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link down\n");
+		fnic_fc_trace_set_data(
+			fnic->lport->host->host_no, FNIC_FC_LE,
+			"Link Status: UP_DOWN",
+			strlen("Link Status: UP_DOWN"));
 		fcoe_ctlr_link_down(&fnic->ctlr);
 	}
 
@@ -267,11 +297,6 @@
 
 	if (desc->fip_dtype == FIP_DT_FLOGI) {
 
-		shost_printk(KERN_DEBUG, lport->host,
-			  " FIP TYPE FLOGI: fab name:%llx "
-			  "vfid:%d map:%x\n",
-			  fip->sel_fcf->fabric_name, fip->sel_fcf->vfid,
-			  fip->sel_fcf->fc_map);
 		if (dlen < sizeof(*els) + sizeof(*fh) + 1)
 			return 0;
 
@@ -616,6 +641,10 @@
 					"using UCSM\n");
 			goto drop;
 		}
+		if ((fnic_fc_trace_set_data(fnic->lport->host->host_no,
+			FNIC_FC_RECV|0x80, (char *)skb->data, skb->len)) != 0) {
+			printk(KERN_ERR "fnic ctlr frame trace error!!!");
+		}
 		skb_queue_tail(&fnic->fip_frame_queue, skb);
 		queue_work(fnic_fip_queue, &fnic->fip_frame_work);
 		return 1;		/* let caller know packet was used */
@@ -844,6 +873,10 @@
 	}
 	fr_dev(fp) = fnic->lport;
 	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+	if ((fnic_fc_trace_set_data(fnic->lport->host->host_no, FNIC_FC_RECV,
+					(char *)skb->data, skb->len)) != 0) {
+		printk(KERN_ERR "fnic ctlr frame trace error!!!");
+	}
 
 	skb_queue_tail(&fnic->frame_queue, skb);
 	queue_work(fnic_event_queue, &fnic->frame_work);
@@ -951,6 +984,15 @@
 		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
 		vlan_hdr->h_vlan_encapsulated_proto = eth_hdr->h_proto;
 		vlan_hdr->h_vlan_TCI = htons(fnic->vlan_id);
+		if ((fnic_fc_trace_set_data(fnic->lport->host->host_no,
+			FNIC_FC_SEND|0x80, (char *)eth_hdr, skb->len)) != 0) {
+			printk(KERN_ERR "fnic ctlr frame trace error!!!");
+		}
+	} else {
+		if ((fnic_fc_trace_set_data(fnic->lport->host->host_no,
+			FNIC_FC_SEND|0x80, (char *)skb->data, skb->len)) != 0) {
+			printk(KERN_ERR "fnic ctlr frame trace error!!!");
+		}
 	}
 
 	pa = pci_map_single(fnic->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
@@ -1023,6 +1065,11 @@
 
 	pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE);
 
+	if ((fnic_fc_trace_set_data(fnic->lport->host->host_no, FNIC_FC_SEND,
+				(char *)eth_hdr, tot_len)) != 0) {
+		printk(KERN_ERR "fnic ctlr frame trace error!!!");
+	}
+
 	spin_lock_irqsave(&fnic->wq_lock[0], flags);
 
 	if (!vnic_wq_desc_avail(wq)) {

diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 33e4ec2..8c56fdc 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c

@@ -74,6 +74,11 @@
 MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages "
 					"for fnic trace buffer");
 
+unsigned int fnic_fc_trace_max_pages = 64;
+module_param(fnic_fc_trace_max_pages, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(fnic_fc_trace_max_pages,
+		 "Total allocated memory pages for fc trace buffer");
+
 static unsigned int fnic_max_qdepth = FNIC_DFLT_QUEUE_DEPTH;
 module_param(fnic_max_qdepth, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(fnic_max_qdepth, "Queue depth to report for each LUN");
@@ -111,7 +116,7 @@
 	.change_queue_type = fc_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
-	.can_queue = FNIC_MAX_IO_REQ,
+	.can_queue = FNIC_DFLT_IO_REQ,
 	.use_clustering = ENABLE_CLUSTERING,
 	.sg_tablesize = FNIC_MAX_SG_DESC_CNT,
 	.max_sectors = 0xffff,
@@ -773,6 +778,7 @@
 		shost_printk(KERN_INFO, fnic->lport->host,
 			     "firmware uses non-FIP mode\n");
 		fcoe_ctlr_init(&fnic->ctlr, FIP_MODE_NON_FIP);
+		fnic->ctlr.state = FIP_ST_NON_FIP;
 	}
 	fnic->state = FNIC_IN_FC_MODE;
 
@@ -1033,11 +1039,20 @@
 	/* Allocate memory for trace buffer */
 	err = fnic_trace_buf_init();
 	if (err < 0) {
-		printk(KERN_ERR PFX "Trace buffer initialization Failed "
-				  "Fnic Tracing utility is disabled\n");
+		printk(KERN_ERR PFX
+		       "Trace buffer initialization Failed. "
+		       "Fnic Tracing utility is disabled\n");
 		fnic_trace_free();
 	}
 
+    /* Allocate memory for fc trace buffer */
+	err = fnic_fc_trace_init();
+	if (err < 0) {
+		printk(KERN_ERR PFX "FC trace buffer initialization Failed "
+		       "FC frame tracing utility is disabled\n");
+		fnic_fc_trace_free();
+	}
+
 	/* Create a cache for allocation of default size sgls */
 	len = sizeof(struct fnic_dflt_sgl_list);
 	fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create
@@ -1118,6 +1133,7 @@
 	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
 err_create_fnic_sgl_slab_dflt:
 	fnic_trace_free();
+	fnic_fc_trace_free();
 	fnic_debugfs_terminate();
 	return err;
 }
@@ -1135,6 +1151,7 @@
 	kmem_cache_destroy(fnic_io_req_cache);
 	fc_release_transport(fnic_fc_transport);
 	fnic_trace_free();
+	fnic_fc_trace_free();
 	fnic_debugfs_terminate();
 }
 

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 0521436..ea28b5c 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c

@@ -1312,8 +1312,9 @@
 
 cleanup_scsi_cmd:
 		sc->result = DID_TRANSPORT_DISRUPTED << 16;
-		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_cleanup_io:"
-			      " DID_TRANSPORT_DISRUPTED\n");
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "%s: sc duration = %lu DID_TRANSPORT_DISRUPTED\n",
+			      __func__, (jiffies - start_time));
 
 		if (atomic64_read(&fnic->io_cmpl_skip))
 			atomic64_dec(&fnic->io_cmpl_skip);
@@ -1733,6 +1734,7 @@
 	struct fnic_stats *fnic_stats;
 	struct abort_stats *abts_stats;
 	struct terminate_stats *term_stats;
+	enum fnic_ioreq_state old_ioreq_state;
 	int tag;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 
@@ -1793,6 +1795,7 @@
 	 * the completion wont be done till mid-layer, since abort
 	 * has already started.
 	 */
+	old_ioreq_state = CMD_STATE(sc);
 	CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
 	CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
 
@@ -1816,6 +1819,8 @@
 	if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req,
 				    fc_lun.scsi_lun, io_req)) {
 		spin_lock_irqsave(io_lock, flags);
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+			CMD_STATE(sc) = old_ioreq_state;
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
 		if (io_req)
 			io_req->abts_done = NULL;
@@ -1859,12 +1864,8 @@
 	if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
 		spin_unlock_irqrestore(io_lock, flags);
 		if (task_req == FCPIO_ITMF_ABT_TASK) {
-			FNIC_SCSI_DBG(KERN_INFO,
-				fnic->lport->host, "Abort Driver Timeout\n");
 			atomic64_inc(&abts_stats->abort_drv_timeouts);
 		} else {
-			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
-				"Terminate Driver Timeout\n");
 			atomic64_inc(&term_stats->terminate_drv_timeouts);
 		}
 		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT;

diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c
index e002e71..c772859 100644
--- a/drivers/scsi/fnic/fnic_trace.c
+++ b/drivers/scsi/fnic/fnic_trace.c

@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
 #include "fnic_io.h"
 #include "fnic.h"
 
@@ -32,6 +33,16 @@
 static fnic_trace_dbg_t fnic_trace_entries;
 int fnic_tracing_enabled = 1;
 
+/* static char *fnic_fc_ctlr_trace_buf_p; */
+
+static int fc_trace_max_entries;
+static unsigned long fnic_fc_ctlr_trace_buf_p;
+static fnic_trace_dbg_t fc_trace_entries;
+int fnic_fc_tracing_enabled = 1;
+int fnic_fc_trace_cleared = 1;
+static DEFINE_SPINLOCK(fnic_fc_trace_lock);
+
+
 /*
  * fnic_trace_get_buf - Give buffer pointer to user to fill up trace information
  *
@@ -428,10 +439,10 @@
 	}
 	err = fnic_trace_debugfs_init();
 	if (err < 0) {
-		printk(KERN_ERR PFX "Failed to initialize debugfs for tracing\n");
+		pr_err("fnic: Failed to initialize debugfs for tracing\n");
 		goto err_fnic_trace_debugfs_init;
 	}
-	printk(KERN_INFO PFX "Successfully Initialized Trace Buffer\n");
+	pr_info("fnic: Successfully Initialized Trace Buffer\n");
 	return err;
 err_fnic_trace_debugfs_init:
 	fnic_trace_free();
@@ -456,3 +467,314 @@
 	}
 	printk(KERN_INFO PFX "Successfully Freed Trace Buffer\n");
 }
+
+/*
+ * fnic_fc_ctlr_trace_buf_init -
+ * Initialize trace buffer to log fnic control frames
+ * Description:
+ * Initialize trace buffer data structure by allocating
+ * required memory for trace data as well as for Indexes.
+ * Frame size is 256 bytes and
+ * memory is allocated for 1024 entries of 256 bytes.
+ * Page_offset(Index) is set to the address of trace entry
+ * and page_offset is initialized by adding frame size
+ * to the previous page_offset entry.
+ */
+
+int fnic_fc_trace_init(void)
+{
+	unsigned long fc_trace_buf_head;
+	int err = 0;
+	int i;
+
+	fc_trace_max_entries = (fnic_fc_trace_max_pages * PAGE_SIZE)/
+				FC_TRC_SIZE_BYTES;
+	fnic_fc_ctlr_trace_buf_p = (unsigned long)vmalloc(
+					fnic_fc_trace_max_pages * PAGE_SIZE);
+	if (!fnic_fc_ctlr_trace_buf_p) {
+		pr_err("fnic: Failed to allocate memory for "
+		       "FC Control Trace Buf\n");
+		err = -ENOMEM;
+		goto err_fnic_fc_ctlr_trace_buf_init;
+	}
+
+	memset((void *)fnic_fc_ctlr_trace_buf_p, 0,
+			fnic_fc_trace_max_pages * PAGE_SIZE);
+
+	/* Allocate memory for page offset */
+	fc_trace_entries.page_offset = vmalloc(fc_trace_max_entries *
+						sizeof(unsigned long));
+	if (!fc_trace_entries.page_offset) {
+		pr_err("fnic:Failed to allocate memory for page_offset\n");
+		if (fnic_fc_ctlr_trace_buf_p) {
+			pr_err("fnic: Freeing FC Control Trace Buf\n");
+			vfree((void *)fnic_fc_ctlr_trace_buf_p);
+			fnic_fc_ctlr_trace_buf_p = 0;
+		}
+		err = -ENOMEM;
+		goto err_fnic_fc_ctlr_trace_buf_init;
+	}
+	memset((void *)fc_trace_entries.page_offset, 0,
+	       (fc_trace_max_entries * sizeof(unsigned long)));
+
+	fc_trace_entries.rd_idx = fc_trace_entries.wr_idx = 0;
+	fc_trace_buf_head = fnic_fc_ctlr_trace_buf_p;
+
+	/*
+	* Set up fc_trace_entries.page_offset field with memory location
+	* for every trace entry
+	*/
+	for (i = 0; i < fc_trace_max_entries; i++) {
+		fc_trace_entries.page_offset[i] = fc_trace_buf_head;
+		fc_trace_buf_head += FC_TRC_SIZE_BYTES;
+	}
+	err = fnic_fc_trace_debugfs_init();
+	if (err < 0) {
+		pr_err("fnic: Failed to initialize FC_CTLR tracing.\n");
+		goto err_fnic_fc_ctlr_trace_debugfs_init;
+	}
+	pr_info("fnic: Successfully Initialized FC_CTLR Trace Buffer\n");
+	return err;
+
+err_fnic_fc_ctlr_trace_debugfs_init:
+	fnic_fc_trace_free();
+err_fnic_fc_ctlr_trace_buf_init:
+	return err;
+}
+
+/*
+ * Fnic_fc_ctlr_trace_free - Free memory of fnic_fc_ctlr trace data structures.
+ */
+void fnic_fc_trace_free(void)
+{
+	fnic_fc_tracing_enabled = 0;
+	fnic_fc_trace_debugfs_terminate();
+	if (fc_trace_entries.page_offset) {
+		vfree((void *)fc_trace_entries.page_offset);
+		fc_trace_entries.page_offset = NULL;
+	}
+	if (fnic_fc_ctlr_trace_buf_p) {
+		vfree((void *)fnic_fc_ctlr_trace_buf_p);
+		fnic_fc_ctlr_trace_buf_p = 0;
+	}
+	pr_info("fnic:Successfully FC_CTLR Freed Trace Buffer\n");
+}
+
+/*
+ * fnic_fc_ctlr_set_trace_data:
+ *       Maintain rd & wr idx accordingly and set data
+ * Passed parameters:
+ *       host_no: host number accociated with fnic
+ *       frame_type: send_frame, rece_frame or link event
+ *       fc_frame: pointer to fc_frame
+ *       frame_len: Length of the fc_frame
+ * Description:
+ *   This routine will get next available wr_idx and
+ *   copy all passed trace data to the buffer pointed by wr_idx
+ *   and increment wr_idx. It will also make sure that we dont
+ *   overwrite the entry which we are reading and also
+ *   wrap around if we reach the maximum entries.
+ * Returned Value:
+ *   It will return 0 for success or -1 for failure
+ */
+int fnic_fc_trace_set_data(u32 host_no, u8 frame_type,
+				char *frame, u32 fc_trc_frame_len)
+{
+	unsigned long flags;
+	struct fc_trace_hdr *fc_buf;
+	unsigned long eth_fcoe_hdr_len;
+	char *fc_trace;
+
+	if (fnic_fc_tracing_enabled == 0)
+		return 0;
+
+	spin_lock_irqsave(&fnic_fc_trace_lock, flags);
+
+	if (fnic_fc_trace_cleared == 1) {
+		fc_trace_entries.rd_idx = fc_trace_entries.wr_idx = 0;
+		pr_info("fnic: Reseting the read idx\n");
+		memset((void *)fnic_fc_ctlr_trace_buf_p, 0,
+				fnic_fc_trace_max_pages * PAGE_SIZE);
+		fnic_fc_trace_cleared = 0;
+	}
+
+	fc_buf = (struct fc_trace_hdr *)
+		fc_trace_entries.page_offset[fc_trace_entries.wr_idx];
+
+	fc_trace_entries.wr_idx++;
+
+	if (fc_trace_entries.wr_idx >= fc_trace_max_entries)
+		fc_trace_entries.wr_idx = 0;
+
+	if (fc_trace_entries.wr_idx == fc_trace_entries.rd_idx) {
+		fc_trace_entries.rd_idx++;
+		if (fc_trace_entries.rd_idx >= fc_trace_max_entries)
+			fc_trace_entries.rd_idx = 0;
+	}
+
+	fc_buf->time_stamp = CURRENT_TIME;
+	fc_buf->host_no = host_no;
+	fc_buf->frame_type = frame_type;
+
+	fc_trace = (char *)FC_TRACE_ADDRESS(fc_buf);
+
+	/* During the receive path, we do not have eth hdr as well as fcoe hdr
+	 * at trace entry point so we will stuff 0xff just to make it generic.
+	 */
+	if (frame_type == FNIC_FC_RECV) {
+		eth_fcoe_hdr_len = sizeof(struct ethhdr) +
+					sizeof(struct fcoe_hdr);
+		fc_trc_frame_len = fc_trc_frame_len + eth_fcoe_hdr_len;
+		memset((char *)fc_trace, 0xff, eth_fcoe_hdr_len);
+		/* Copy the rest of data frame */
+		memcpy((char *)(fc_trace + eth_fcoe_hdr_len), (void *)frame,
+		min_t(u8, fc_trc_frame_len,
+			(u8)(FC_TRC_SIZE_BYTES - FC_TRC_HEADER_SIZE)));
+	} else {
+		memcpy((char *)fc_trace, (void *)frame,
+		min_t(u8, fc_trc_frame_len,
+			(u8)(FC_TRC_SIZE_BYTES - FC_TRC_HEADER_SIZE)));
+	}
+
+	/* Store the actual received length */
+	fc_buf->frame_len = fc_trc_frame_len;
+
+	spin_unlock_irqrestore(&fnic_fc_trace_lock, flags);
+	return 0;
+}
+
+/*
+ * fnic_fc_ctlr_get_trace_data: Copy trace buffer to a memory file
+ * Passed parameter:
+ *       @fnic_dbgfs_t: pointer to debugfs trace buffer
+ *       rdata_flag: 1 => Unformated file
+ *                   0 => formated file
+ * Description:
+ *       This routine will copy the trace data to memory file with
+ *       proper formatting and also copy to another memory
+ *       file without formatting for further procesing.
+ * Retrun Value:
+ *       Number of bytes that were dumped into fnic_dbgfs_t
+ */
+
+int fnic_fc_trace_get_data(fnic_dbgfs_t *fnic_dbgfs_prt, u8 rdata_flag)
+{
+	int rd_idx, wr_idx;
+	unsigned long flags;
+	int len = 0, j;
+	struct fc_trace_hdr *tdata;
+	char *fc_trace;
+
+	spin_lock_irqsave(&fnic_fc_trace_lock, flags);
+	if (fc_trace_entries.wr_idx == fc_trace_entries.rd_idx) {
+		spin_unlock_irqrestore(&fnic_fc_trace_lock, flags);
+		pr_info("fnic: Buffer is empty\n");
+		return 0;
+	}
+	rd_idx = fc_trace_entries.rd_idx;
+	wr_idx = fc_trace_entries.wr_idx;
+	if (rdata_flag == 0) {
+		len += snprintf(fnic_dbgfs_prt->buffer + len,
+			(fnic_fc_trace_max_pages * PAGE_SIZE * 3) - len,
+			"Time Stamp (UTC)\t\t"
+			"Host No:   F Type:  len:     FCoE_FRAME:\n");
+	}
+
+	while (rd_idx != wr_idx) {
+		tdata = (struct fc_trace_hdr *)
+			fc_trace_entries.page_offset[rd_idx];
+		if (!tdata) {
+			pr_info("fnic: Rd data is NULL\n");
+			spin_unlock_irqrestore(&fnic_fc_trace_lock, flags);
+			return 0;
+		}
+		if (rdata_flag == 0) {
+			copy_and_format_trace_data(tdata,
+				fnic_dbgfs_prt, &len, rdata_flag);
+		} else {
+			fc_trace = (char *)tdata;
+			for (j = 0; j < FC_TRC_SIZE_BYTES; j++) {
+				len += snprintf(fnic_dbgfs_prt->buffer + len,
+				(fnic_fc_trace_max_pages * PAGE_SIZE * 3)
+				- len, "%02x", fc_trace[j] & 0xff);
+			} /* for loop */
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				(fnic_fc_trace_max_pages * PAGE_SIZE * 3) - len,
+				"\n");
+		}
+		rd_idx++;
+		if (rd_idx > (fc_trace_max_entries - 1))
+			rd_idx = 0;
+	}
+
+	spin_unlock_irqrestore(&fnic_fc_trace_lock, flags);
+	return len;
+}
+
+/*
+ * copy_and_format_trace_data: Copy formatted data to char * buffer
+ * Passed Parameter:
+ *      @fc_trace_hdr_t: pointer to trace data
+ *      @fnic_dbgfs_t: pointer to debugfs trace buffer
+ *      @orig_len: pointer to len
+ *      rdata_flag: 0 => Formated file, 1 => Unformated file
+ * Description:
+ *      This routine will format and copy the passed trace data
+ *      for formated file or unformated file accordingly.
+ */
+
+void copy_and_format_trace_data(struct fc_trace_hdr *tdata,
+				fnic_dbgfs_t *fnic_dbgfs_prt, int *orig_len,
+				u8 rdata_flag)
+{
+	struct tm tm;
+	int j, i = 1, len;
+	char *fc_trace, *fmt;
+	int ethhdr_len = sizeof(struct ethhdr) - 1;
+	int fcoehdr_len = sizeof(struct fcoe_hdr);
+	int fchdr_len = sizeof(struct fc_frame_header);
+	int max_size = fnic_fc_trace_max_pages * PAGE_SIZE * 3;
+
+	tdata->frame_type = tdata->frame_type & 0x7F;
+
+	len = *orig_len;
+
+	time_to_tm(tdata->time_stamp.tv_sec, 0, &tm);
+
+	fmt = "%02d:%02d:%04ld %02d:%02d:%02d.%09lu ns%8x       %c%8x\t";
+	len += snprintf(fnic_dbgfs_prt->buffer + len,
+		(fnic_fc_trace_max_pages * PAGE_SIZE * 3) - len,
+		fmt,
+		tm.tm_mon + 1, tm.tm_mday, tm.tm_year + 1900,
+		tm.tm_hour, tm.tm_min, tm.tm_sec,
+		tdata->time_stamp.tv_nsec, tdata->host_no,
+		tdata->frame_type, tdata->frame_len);
+
+	fc_trace = (char *)FC_TRACE_ADDRESS(tdata);
+
+	for (j = 0; j < min_t(u8, tdata->frame_len,
+		(u8)(FC_TRC_SIZE_BYTES - FC_TRC_HEADER_SIZE)); j++) {
+		if (tdata->frame_type == FNIC_FC_LE) {
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				max_size - len, "%c", fc_trace[j]);
+		} else {
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				max_size - len, "%02x", fc_trace[j] & 0xff);
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				max_size - len, " ");
+			if (j == ethhdr_len ||
+				j == ethhdr_len + fcoehdr_len ||
+				j == ethhdr_len + fcoehdr_len + fchdr_len ||
+				(i > 3 && j%fchdr_len == 0)) {
+				len += snprintf(fnic_dbgfs_prt->buffer
+					+ len, (fnic_fc_trace_max_pages
+					* PAGE_SIZE * 3) - len,
+					"\n\t\t\t\t\t\t\t\t");
+				i++;
+			}
+		} /* end of else*/
+	} /* End of for loop*/
+	len += snprintf(fnic_dbgfs_prt->buffer + len,
+		max_size - len, "\n");
+	*orig_len = len;
+}

diff --git a/drivers/scsi/fnic/fnic_trace.h b/drivers/scsi/fnic/fnic_trace.h
index d412f2e..a8aa057 100644
--- a/drivers/scsi/fnic/fnic_trace.h
+++ b/drivers/scsi/fnic/fnic_trace.h

@@ -19,6 +19,17 @@
 #define __FNIC_TRACE_H__
 
 #define FNIC_ENTRY_SIZE_BYTES 64
+#define FC_TRC_SIZE_BYTES 256
+#define FC_TRC_HEADER_SIZE sizeof(struct fc_trace_hdr)
+
+/*
+ * Fisrt bit of FNIC_FC_RECV and FNIC_FC_SEND is used to represent the type
+ * of frame 1 => Eth frame, 0=> FC frame
+ */
+
+#define FNIC_FC_RECV 0x52 /* Character R */
+#define FNIC_FC_SEND 0x54 /* Character T */
+#define FNIC_FC_LE 0x4C /* Character L */
 
 extern ssize_t simple_read_from_buffer(void __user *to,
 					  size_t count,
@@ -30,6 +41,10 @@
 extern int fnic_tracing_enabled;
 extern unsigned int trace_max_pages;
 
+extern unsigned int fnic_fc_trace_max_pages;
+extern int fnic_fc_tracing_enabled;
+extern int fnic_fc_trace_cleared;
+
 typedef struct fnic_trace_dbg {
 	int wr_idx;
 	int rd_idx;
@@ -56,6 +71,16 @@
 
 typedef struct fnic_trace_data fnic_trace_data_t;
 
+struct fc_trace_hdr {
+	struct timespec time_stamp;
+	u32 host_no;
+	u8 frame_type;
+	u8 frame_len;
+} __attribute__((__packed__));
+
+#define FC_TRACE_ADDRESS(a) \
+	((unsigned long)(a) + sizeof(struct fc_trace_hdr))
+
 #define FNIC_TRACE_ENTRY_SIZE \
 		  (FNIC_ENTRY_SIZE_BYTES - sizeof(fnic_trace_data_t))
 
@@ -88,4 +113,17 @@
 void fnic_debugfs_terminate(void);
 int fnic_trace_debugfs_init(void);
 void fnic_trace_debugfs_terminate(void);
+
+/* Fnic FC CTLR Trace releated function */
+int fnic_fc_trace_init(void);
+void fnic_fc_trace_free(void);
+int fnic_fc_trace_set_data(u32 host_no, u8 frame_type,
+				char *frame, u32 fc_frame_len);
+int fnic_fc_trace_get_data(fnic_dbgfs_t *fnic_dbgfs_prt, u8 rdata_flag);
+void copy_and_format_trace_data(struct fc_trace_hdr *tdata,
+				fnic_dbgfs_t *fnic_dbgfs_prt,
+				int *len, u8 rdata_flag);
+int fnic_fc_trace_debugfs_init(void);
+void fnic_fc_trace_debugfs_terminate(void);
+
 #endif

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 7176365..a1bc8ca 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c

@@ -78,10 +78,6 @@
  *     
  */
 
-/*
- * $Log: generic_NCR5380.c,v $
- */
-
 /* settings for DTC3181E card with only Mustek scanner attached */
 #define USLEEP
 #define USLEEP_POLL	1

diff --git a/drivers/scsi/g_NCR5380.h b/drivers/scsi/g_NCR5380.h
index 1bcdb7b..703adf7 100644
--- a/drivers/scsi/g_NCR5380.h
+++ b/drivers/scsi/g_NCR5380.h

@@ -25,10 +25,6 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: generic_NCR5380.h,v $
- */
-
 #ifndef GENERIC_NCR5380_H
 #define GENERIC_NCR5380_H
 
@@ -58,8 +54,6 @@
 #define CAN_QUEUE 16
 #endif
 
-#ifndef HOSTS_C
-
 #define __STRVAL(x) #x
 #define STRVAL(x) __STRVAL(x)
 
@@ -131,7 +125,6 @@
 #define BOARD_NCR53C400A 2
 #define BOARD_DTC3181E	3
 
-#endif /* else def HOSTS_C */
 #endif /* ndef ASM */
 #endif /* GENERIC_NCR5380_H */
 

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 9a6e4a2..31184b3 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c

@@ -48,6 +48,7 @@
 #include <linux/bitmap.h>
 #include <linux/atomic.h>
 #include <linux/jiffies.h>
+#include <linux/percpu.h>
 #include <asm/div64.h>
 #include "hpsa_cmd.h"
 #include "hpsa.h"
@@ -115,9 +116,15 @@
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C3},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C4},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C5},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C6},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C7},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C8},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C9},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21CA},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21CB},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21CC},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21CD},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21CE},
 	{PCI_VENDOR_ID_HP_3PAR, 0x0075, 0x1590, 0x0076},
 	{PCI_VENDOR_ID_HP_3PAR, 0x0075, 0x1590, 0x0087},
 	{PCI_VENDOR_ID_HP_3PAR, 0x0075, 0x1590, 0x007D},
@@ -165,9 +172,15 @@
 	{0x21C3103C, "Smart Array", &SA5_access},
 	{0x21C4103C, "Smart Array", &SA5_access},
 	{0x21C5103C, "Smart Array", &SA5_access},
+	{0x21C6103C, "Smart Array", &SA5_access},
 	{0x21C7103C, "Smart Array", &SA5_access},
 	{0x21C8103C, "Smart Array", &SA5_access},
 	{0x21C9103C, "Smart Array", &SA5_access},
+	{0x21CA103C, "Smart Array", &SA5_access},
+	{0x21CB103C, "Smart Array", &SA5_access},
+	{0x21CC103C, "Smart Array", &SA5_access},
+	{0x21CD103C, "Smart Array", &SA5_access},
+	{0x21CE103C, "Smart Array", &SA5_access},
 	{0x00761590, "HP Storage P1224 Array Controller", &SA5_access},
 	{0x00871590, "HP Storage P1224e Array Controller", &SA5_access},
 	{0x007D1590, "HP Storage P1228 Array Controller", &SA5_access},
@@ -181,7 +194,8 @@
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
 static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg);
-static void start_io(struct ctlr_info *h);
+static void lock_and_start_io(struct ctlr_info *h);
+static void start_io(struct ctlr_info *h, unsigned long *flags);
 
 #ifdef CONFIG_COMPAT
 static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd, void *arg);
@@ -683,7 +697,7 @@
 static inline u32 next_command(struct ctlr_info *h, u8 q)
 {
 	u32 a;
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags;
 
 	if (h->transMethod & CFGTBL_Trans_io_accel1)
@@ -832,8 +846,8 @@
 	spin_lock_irqsave(&h->lock, flags);
 	addQ(&h->reqQ, c);
 	h->Qdepth++;
+	start_io(h, &flags);
 	spin_unlock_irqrestore(&h->lock, flags);
-	start_io(h);
 }
 
 static inline void removeQ(struct CommandList *c)
@@ -1542,9 +1556,13 @@
 			dev_warn(&h->pdev->dev,
 				"%s: task complete with check condition.\n",
 				"HP SSD Smart Path");
+			cmd->result |= SAM_STAT_CHECK_CONDITION;
 			if (c2->error_data.data_present !=
-					IOACCEL2_SENSE_DATA_PRESENT)
+					IOACCEL2_SENSE_DATA_PRESENT) {
+				memset(cmd->sense_buffer, 0,
+					SCSI_SENSE_BUFFERSIZE);
 				break;
+			}
 			/* copy the sense data */
 			data_len = c2->error_data.sense_data_len;
 			if (data_len > SCSI_SENSE_BUFFERSIZE)
@@ -1554,7 +1572,6 @@
 					sizeof(c2->error_data.sense_data_buff);
 			memcpy(cmd->sense_buffer,
 				c2->error_data.sense_data_buff, data_len);
-			cmd->result |= SAM_STAT_CHECK_CONDITION;
 			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_BUSY:
@@ -1639,16 +1656,6 @@
 	if (is_logical_dev_addr_mode(dev->scsi3addr) &&
 		c2->error_data.serv_response ==
 			IOACCEL2_SERV_RESPONSE_FAILURE) {
-		if (c2->error_data.status ==
-			IOACCEL2_STATUS_SR_IOACCEL_DISABLED)
-			dev_warn(&h->pdev->dev,
-				"%s: Path is unavailable, retrying on standard path.\n",
-				"HP SSD Smart Path");
-		else
-			dev_warn(&h->pdev->dev,
-				"%s: Error 0x%02x, retrying on standard path.\n",
-				"HP SSD Smart Path", c2->error_data.status);
-
 		dev->offload_enabled = 0;
 		h->drv_req_rescan = 1;	/* schedule controller for a rescan */
 		cmd->result = DID_SOFT_ERROR << 16;
@@ -1979,20 +1986,26 @@
 	wait_for_completion(&wait);
 }
 
+static u32 lockup_detected(struct ctlr_info *h)
+{
+	int cpu;
+	u32 rc, *lockup_detected;
+
+	cpu = get_cpu();
+	lockup_detected = per_cpu_ptr(h->lockup_detected, cpu);
+	rc = *lockup_detected;
+	put_cpu();
+	return rc;
+}
+
 static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	unsigned long flags;
-
 	/* If controller lockup detected, fake a hardware error. */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h)))
 		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-	} else {
-		spin_unlock_irqrestore(&h->lock, flags);
+	else
 		hpsa_scsi_do_simple_cmd_core(h, c);
-	}
 }
 
 #define MAX_DRIVER_CMD_RETRIES 25
@@ -2417,7 +2430,7 @@
 		buflen = 16;
 	buf = kzalloc(64, GFP_KERNEL);
 	if (!buf)
-		return -1;
+		return -ENOMEM;
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | 0x83, buf, 64);
 	if (rc == 0)
 		memcpy(device_id, &buf[8], buflen);
@@ -2503,27 +2516,21 @@
 		return HPSA_VPD_LV_STATUS_UNSUPPORTED;
 
 	/* Does controller have VPD for logical volume status? */
-	if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS)) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD page is unsupported.\n");
+	if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS))
 		goto exit_failed;
-	}
 
 	/* Get the size of the VPD return buffer */
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS,
 					buf, HPSA_VPD_HEADER_SZ);
-	if (rc != 0) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n");
+	if (rc != 0)
 		goto exit_failed;
-	}
 	size = buf[3];
 
 	/* Now get the whole VPD buffer */
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS,
 					buf, size + HPSA_VPD_HEADER_SZ);
-	if (rc != 0) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n");
+	if (rc != 0)
 		goto exit_failed;
-	}
 	status = buf[4]; /* status byte */
 
 	kfree(buf);
@@ -2536,11 +2543,11 @@
 /* Determine offline status of a volume.
  * Return either:
  *  0 (not offline)
- * -1 (offline for unknown reasons)
+ *  0xff (offline for unknown reasons)
  *  # (integer code indicating one of several NOT READY states
  *     describing why a volume is to be kept offline)
  */
-static unsigned char hpsa_volume_offline(struct ctlr_info *h,
+static int hpsa_volume_offline(struct ctlr_info *h,
 					unsigned char scsi3addr[])
 {
 	struct CommandList *c;
@@ -2639,11 +2646,15 @@
 
 	if (this_device->devtype == TYPE_DISK &&
 		is_logical_dev_addr_mode(scsi3addr)) {
+		int volume_offline;
+
 		hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
 		if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
 			hpsa_get_ioaccel_status(h, scsi3addr, this_device);
-		this_device->volume_offline =
-			hpsa_volume_offline(h, scsi3addr);
+		volume_offline = hpsa_volume_offline(h, scsi3addr);
+		if (volume_offline < 0 || volume_offline > 0xff)
+			volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
+		this_device->volume_offline = volume_offline & 0xff;
 	} else {
 		this_device->raid_level = RAID_UNKNOWN;
 		this_device->offload_config = 0;
@@ -2836,6 +2847,8 @@
 
 	/* Get the list of physical devices */
 	physicals = kzalloc(reportsize, GFP_KERNEL);
+	if (physicals == NULL)
+		return 0;
 	if (hpsa_scsi_do_report_phys_luns(h, (struct ReportLUNdata *) physicals,
 		reportsize, extended)) {
 		dev_err(&h->pdev->dev,
@@ -2847,26 +2860,20 @@
 	nphysicals = be32_to_cpu(*((__be32 *)physicals->LUNListLength)) /
 							responsesize;
 
-
 	/* find ioaccel2 handle in list of physicals: */
 	for (i = 0; i < nphysicals; i++) {
+		struct ext_report_lun_entry *entry = &physicals->LUN[i];
+
 		/* handle is in bytes 28-31 of each lun */
-		if (memcmp(&((struct ReportExtendedLUNdata *)
-				physicals)->LUN[i][20], &find, 4) != 0) {
+		if (entry->ioaccel_handle != find)
 			continue; /* didn't match */
-		}
 		found = 1;
-		memcpy(scsi3addr, &((struct ReportExtendedLUNdata *)
-					physicals)->LUN[i][0], 8);
+		memcpy(scsi3addr, entry->lunid, 8);
 		if (h->raid_offload_debug > 0)
 			dev_info(&h->pdev->dev,
-				"%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				"%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%8phN\n",
 				__func__, find,
-				((struct ReportExtendedLUNdata *)
-					physicals)->LUN[i][20],
-				scsi3addr[0], scsi3addr[1], scsi3addr[2],
-				scsi3addr[3], scsi3addr[4], scsi3addr[5],
-				scsi3addr[6], scsi3addr[7]);
+				entry->ioaccel_handle, scsi3addr);
 		break; /* found it */
 	}
 
@@ -2951,7 +2958,8 @@
 		return RAID_CTLR_LUNID;
 
 	if (i < logicals_start)
-		return &physdev_list->LUN[i - (raid_ctlr_position == 0)][0];
+		return &physdev_list->LUN[i -
+				(raid_ctlr_position == 0)].lunid[0];
 
 	if (i < last_device)
 		return &logdev_list->LUN[i - nphysicals -
@@ -2963,19 +2971,24 @@
 static int hpsa_hba_mode_enabled(struct ctlr_info *h)
 {
 	int rc;
+	int hba_mode_enabled;
 	struct bmic_controller_parameters *ctlr_params;
 	ctlr_params = kzalloc(sizeof(struct bmic_controller_parameters),
 		GFP_KERNEL);
 
 	if (!ctlr_params)
-		return 0;
+		return -ENOMEM;
 	rc = hpsa_bmic_ctrl_mode_sense(h, RAID_CTLR_LUNID, 0, ctlr_params,
 		sizeof(struct bmic_controller_parameters));
-	if (rc != 0) {
+	if (rc) {
 		kfree(ctlr_params);
-		return 0;
+		return rc;
 	}
-	return ctlr_params->nvram_flags & (1 << 3) ? 1 : 0;
+
+	hba_mode_enabled =
+		((ctlr_params->nvram_flags & HBA_MODE_ENABLED_FLAG) != 0);
+	kfree(ctlr_params);
+	return hba_mode_enabled;
 }
 
 static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
@@ -3001,7 +3014,7 @@
 	int reportlunsize = sizeof(*physdev_list) + HPSA_MAX_PHYS_LUN * 24;
 	int i, n_ext_target_devs, ndevs_to_allocate;
 	int raid_ctlr_position;
-	u8 rescan_hba_mode;
+	int rescan_hba_mode;
 	DECLARE_BITMAP(lunzerobits, MAX_EXT_TARGETS);
 
 	currentsd = kzalloc(sizeof(*currentsd) * HPSA_MAX_DEVICES, GFP_KERNEL);
@@ -3016,6 +3029,8 @@
 	memset(lunzerobits, 0, sizeof(lunzerobits));
 
 	rescan_hba_mode = hpsa_hba_mode_enabled(h);
+	if (rescan_hba_mode < 0)
+		goto out;
 
 	if (!h->hba_mode_enabled && rescan_hba_mode)
 		dev_warn(&h->pdev->dev, "HBA mode enabled\n");
@@ -3053,7 +3068,7 @@
 		ndev_allocated++;
 	}
 
-	if (unlikely(is_scsi_rev_5(h)))
+	if (is_scsi_rev_5(h))
 		raid_ctlr_position = 0;
 	else
 		raid_ctlr_position = nphysicals + nlogicals;
@@ -3950,7 +3965,6 @@
 	struct hpsa_scsi_dev_t *dev;
 	unsigned char scsi3addr[8];
 	struct CommandList *c;
-	unsigned long flags;
 	int rc = 0;
 
 	/* Get the ptr to our adapter structure out of cmd->host. */
@@ -3963,14 +3977,11 @@
 	}
 	memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
 
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h))) {
 		cmd->result = DID_ERROR << 16;
 		done(cmd);
 		return 0;
 	}
-	spin_unlock_irqrestore(&h->lock, flags);
 	c = cmd_alloc(h);
 	if (c == NULL) {			/* trouble... */
 		dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
@@ -4082,16 +4093,13 @@
 	 * we can prevent new rescan threads from piling up on a
 	 * locked up controller.
 	 */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h))) {
 		spin_lock_irqsave(&h->scan_lock, flags);
 		h->scan_finished = 1;
 		wake_up_all(&h->scan_wait_queue);
 		spin_unlock_irqrestore(&h->scan_lock, flags);
 		return 1;
 	}
-	spin_unlock_irqrestore(&h->lock, flags);
 	return 0;
 }
 
@@ -4942,7 +4950,7 @@
 		buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
 		if (buff == NULL)
 			return -EFAULT;
-		if (iocommand.Request.Type.Direction == XFER_WRITE) {
+		if (iocommand.Request.Type.Direction & XFER_WRITE) {
 			/* Copy the data into the buffer we created */
 			if (copy_from_user(buff, iocommand.buf,
 				iocommand.buf_size)) {
@@ -5005,7 +5013,7 @@
 		rc = -EFAULT;
 		goto out;
 	}
-	if (iocommand.Request.Type.Direction == XFER_READ &&
+	if ((iocommand.Request.Type.Direction & XFER_READ) &&
 		iocommand.buf_size > 0) {
 		/* Copy the data out of the buffer we created */
 		if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
@@ -5082,7 +5090,7 @@
 			status = -ENOMEM;
 			goto cleanup1;
 		}
-		if (ioc->Request.Type.Direction == XFER_WRITE) {
+		if (ioc->Request.Type.Direction & XFER_WRITE) {
 			if (copy_from_user(buff[sg_used], data_ptr, sz)) {
 				status = -ENOMEM;
 				goto cleanup1;
@@ -5134,7 +5142,7 @@
 		status = -EFAULT;
 		goto cleanup0;
 	}
-	if (ioc->Request.Type.Direction == XFER_READ && ioc->buf_size > 0) {
+	if ((ioc->Request.Type.Direction & XFER_READ) && ioc->buf_size > 0) {
 		/* Copy the data out of the buffer we created */
 		BYTE __user *ptr = ioc->buf;
 		for (i = 0; i < sg_used; i++) {
@@ -5438,13 +5446,12 @@
 
 /* Takes cmds off the submission queue and sends them to the hardware,
  * then puts them on the queue of cmds waiting for completion.
+ * Assumes h->lock is held
  */
-static void start_io(struct ctlr_info *h)
+static void start_io(struct ctlr_info *h, unsigned long *flags)
 {
 	struct CommandList *c;
-	unsigned long flags;
 
-	spin_lock_irqsave(&h->lock, flags);
 	while (!list_empty(&h->reqQ)) {
 		c = list_entry(h->reqQ.next, struct CommandList, list);
 		/* can't do anything if fifo is full */
@@ -5467,14 +5474,20 @@
 		 * condition.
 		 */
 		h->commands_outstanding++;
-		if (h->commands_outstanding > h->max_outstanding)
-			h->max_outstanding = h->commands_outstanding;
 
 		/* Tell the controller execute command */
-		spin_unlock_irqrestore(&h->lock, flags);
+		spin_unlock_irqrestore(&h->lock, *flags);
 		h->access.submit_command(h, c);
-		spin_lock_irqsave(&h->lock, flags);
+		spin_lock_irqsave(&h->lock, *flags);
 	}
+}
+
+static void lock_and_start_io(struct ctlr_info *h)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	start_io(h, &flags);
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
@@ -5542,7 +5555,7 @@
 	else if (c->cmd_type == CMD_IOCTL_PEND)
 		complete(c->waiting);
 	if (unlikely(io_may_be_stalled))
-		start_io(h);
+		lock_and_start_io(h);
 }
 
 static inline u32 hpsa_tag_contains_index(u32 tag)
@@ -5819,12 +5832,12 @@
 		dev_info(&pdev->dev, "using doorbell to reset controller\n");
 		writel(use_doorbell, vaddr + SA5_DOORBELL);
 
-		/* PMC hardware guys tell us we need a 5 second delay after
+		/* PMC hardware guys tell us we need a 10 second delay after
 		 * doorbell reset and before any attempt to talk to the board
 		 * at all to ensure that this actually works and doesn't fall
 		 * over in some weird corner cases.
 		 */
-		msleep(5000);
+		msleep(10000);
 	} else { /* Try to do it the PCI power state way */
 
 		/* Quoting from the Open CISS Specification: "The Power
@@ -6145,6 +6158,8 @@
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
 		dev_info(&h->pdev->dev, "MSIX\n");
 		h->msix_vector = MAX_REPLY_QUEUES;
+		if (h->msix_vector > num_online_cpus())
+			h->msix_vector = num_online_cpus();
 		err = pci_enable_msix(h->pdev, hpsa_msix_entries,
 				      h->msix_vector);
 		if (err > 0) {
@@ -6594,6 +6609,17 @@
 			h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
 }
 
+static void hpsa_irq_affinity_hints(struct ctlr_info *h)
+{
+	int i, cpu, rc;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < h->msix_vector; i++) {
+		rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+}
+
 static int hpsa_request_irq(struct ctlr_info *h,
 	irqreturn_t (*msixhandler)(int, void *),
 	irqreturn_t (*intxhandler)(int, void *))
@@ -6613,6 +6639,7 @@
 			rc = request_irq(h->intr[i], msixhandler,
 					0, h->devname,
 					&h->q[i]);
+		hpsa_irq_affinity_hints(h);
 	} else {
 		/* Use single reply pool */
 		if (h->msix_vector > 0 || h->msi_vector) {
@@ -6664,12 +6691,15 @@
 	if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
 		/* Single reply queue, only one irq to free */
 		i = h->intr_mode;
+		irq_set_affinity_hint(h->intr[i], NULL);
 		free_irq(h->intr[i], &h->q[i]);
 		return;
 	}
 
-	for (i = 0; i < h->msix_vector; i++)
+	for (i = 0; i < h->msix_vector; i++) {
+		irq_set_affinity_hint(h->intr[i], NULL);
 		free_irq(h->intr[i], &h->q[i]);
+	}
 }
 
 static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h)
@@ -6686,6 +6716,20 @@
 #endif /* CONFIG_PCI_MSI */
 }
 
+static void hpsa_free_reply_queues(struct ctlr_info *h)
+{
+	int i;
+
+	for (i = 0; i < h->nreply_queues; i++) {
+		if (!h->reply_queue[i].head)
+			continue;
+		pci_free_consistent(h->pdev, h->reply_queue_size,
+			h->reply_queue[i].head, h->reply_queue[i].busaddr);
+		h->reply_queue[i].head = NULL;
+		h->reply_queue[i].busaddr = 0;
+	}
+}
+
 static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
 {
 	hpsa_free_irqs_and_disable_msix(h);
@@ -6693,8 +6737,7 @@
 	hpsa_free_cmd_pool(h);
 	kfree(h->ioaccel1_blockFetchTable);
 	kfree(h->blockFetchTable);
-	pci_free_consistent(h->pdev, h->reply_pool_size,
-		h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	if (h->vaddr)
 		iounmap(h->vaddr);
 	if (h->transtable)
@@ -6719,16 +6762,38 @@
 	}
 }
 
+static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value)
+{
+	int i, cpu;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < num_online_cpus(); i++) {
+		u32 *lockup_detected;
+		lockup_detected = per_cpu_ptr(h->lockup_detected, cpu);
+		*lockup_detected = value;
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+	wmb(); /* be sure the per-cpu variables are out to memory */
+}
+
 static void controller_lockup_detected(struct ctlr_info *h)
 {
 	unsigned long flags;
+	u32 lockup_detected;
 
 	h->access.set_intr_mask(h, HPSA_INTR_OFF);
 	spin_lock_irqsave(&h->lock, flags);
-	h->lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+	lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+	if (!lockup_detected) {
+		/* no heartbeat, but controller gave us a zero. */
+		dev_warn(&h->pdev->dev,
+			"lockup detected but scratchpad register is zero\n");
+		lockup_detected = 0xffffffff;
+	}
+	set_lockup_detected_for_all_cpus(h, lockup_detected);
 	spin_unlock_irqrestore(&h->lock, flags);
 	dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
-			h->lockup_detected);
+			lockup_detected);
 	pci_disable_device(h->pdev);
 	spin_lock_irqsave(&h->lock, flags);
 	fail_all_cmds_on_list(h, &h->cmpQ);
@@ -6863,7 +6928,7 @@
 	struct ctlr_info *h = container_of(to_delayed_work(work),
 					struct ctlr_info, monitor_ctlr_work);
 	detect_controller_lockup(h);
-	if (h->lockup_detected)
+	if (lockup_detected(h))
 		return;
 
 	if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) {
@@ -6913,7 +6978,6 @@
 	 * the 5 lower bits of the address are used by the hardware. and by
 	 * the driver.  See comments in hpsa.h for more info.
 	 */
-#define COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT);
 	h = kzalloc(sizeof(*h), GFP_KERNEL);
 	if (!h)
@@ -6928,6 +6992,13 @@
 	spin_lock_init(&h->offline_device_lock);
 	spin_lock_init(&h->scan_lock);
 	spin_lock_init(&h->passthru_count_lock);
+
+	/* Allocate and clear per-cpu variable lockup_detected */
+	h->lockup_detected = alloc_percpu(u32);
+	if (!h->lockup_detected)
+		goto clean1;
+	set_lockup_detected_for_all_cpus(h, 0);
+
 	rc = hpsa_pci_init(h);
 	if (rc != 0)
 		goto clean1;
@@ -7051,6 +7122,8 @@
 	free_irqs(h);
 clean2:
 clean1:
+	if (h->lockup_detected)
+		free_percpu(h->lockup_detected);
 	kfree(h);
 	return rc;
 }
@@ -7059,16 +7132,10 @@
 {
 	char *flush_buf;
 	struct CommandList *c;
-	unsigned long flags;
 
 	/* Don't bother trying to flush the cache if locked up */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h)))
 		return;
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-
 	flush_buf = kzalloc(4, GFP_KERNEL);
 	if (!flush_buf)
 		return;
@@ -7144,8 +7211,7 @@
 	pci_free_consistent(h->pdev,
 		h->nr_cmds * sizeof(struct ErrorInfo),
 		h->errinfo_pool, h->errinfo_pool_dhandle);
-	pci_free_consistent(h->pdev, h->reply_pool_size,
-		h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	kfree(h->cmd_pool_bits);
 	kfree(h->blockFetchTable);
 	kfree(h->ioaccel1_blockFetchTable);
@@ -7153,6 +7219,7 @@
 	kfree(h->hba_inquiry_data);
 	pci_disable_device(pdev);
 	pci_release_regions(pdev);
+	free_percpu(h->lockup_detected);
 	kfree(h);
 }
 
@@ -7257,8 +7324,16 @@
 	 * 10 = 6 s/g entry or 24k
 	 */
 
+	/* If the controller supports either ioaccel method then
+	 * we can also use the RAID stack submit path that does not
+	 * perform the superfluous readl() after each command submission.
+	 */
+	if (trans_support & (CFGTBL_Trans_io_accel1 | CFGTBL_Trans_io_accel2))
+		access = SA5_performant_access_no_read;
+
 	/* Controller spec: zero out this buffer. */
-	memset(h->reply_pool, 0, h->reply_pool_size);
+	for (i = 0; i < h->nreply_queues; i++)
+		memset(h->reply_queue[i].head, 0, h->reply_queue_size);
 
 	bft[7] = SG_ENTRIES_IN_CMD + 4;
 	calc_bucket_map(bft, ARRAY_SIZE(bft),
@@ -7274,8 +7349,7 @@
 
 	for (i = 0; i < h->nreply_queues; i++) {
 		writel(0, &h->transtable->RepQAddr[i].upper);
-		writel(h->reply_pool_dhandle +
-			(h->max_commands * sizeof(u64) * i),
+		writel(h->reply_queue[i].busaddr,
 			&h->transtable->RepQAddr[i].lower);
 	}
 
@@ -7323,8 +7397,10 @@
 				h->ioaccel1_blockFetchTable);
 
 		/* initialize all reply queue entries to unused */
-		memset(h->reply_pool, (u8) IOACCEL_MODE1_REPLY_UNUSED,
-				h->reply_pool_size);
+		for (i = 0; i < h->nreply_queues; i++)
+			memset(h->reply_queue[i].head,
+				(u8) IOACCEL_MODE1_REPLY_UNUSED,
+				h->reply_queue_size);
 
 		/* set all the constant fields in the accelerator command
 		 * frames once at init time to save CPU cycles later.
@@ -7386,7 +7462,6 @@
 	 * because the 7 lower bits of the address are used by the
 	 * hardware.
 	 */
-#define IOACCEL1_COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct io_accel1_cmd) %
 			IOACCEL1_COMMANDLIST_ALIGNMENT);
 	h->ioaccel_cmd_pool =
@@ -7424,7 +7499,6 @@
 	if (h->ioaccel_maxsg > IOACCEL2_MAXSGENTRIES)
 		h->ioaccel_maxsg = IOACCEL2_MAXSGENTRIES;
 
-#define IOACCEL2_COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct io_accel2_cmd) %
 			IOACCEL2_COMMANDLIST_ALIGNMENT);
 	h->ioaccel2_cmd_pool =
@@ -7482,16 +7556,17 @@
 		}
 	}
 
-	/* TODO, check that this next line h->nreply_queues is correct */
 	h->nreply_queues = h->msix_vector > 0 ? h->msix_vector : 1;
 	hpsa_get_max_perf_mode_cmds(h);
 	/* Performant mode ring buffer and supporting data structures */
-	h->reply_pool_size = h->max_commands * sizeof(u64) * h->nreply_queues;
-	h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size,
-				&(h->reply_pool_dhandle));
+	h->reply_queue_size = h->max_commands * sizeof(u64);
 
 	for (i = 0; i < h->nreply_queues; i++) {
-		h->reply_queue[i].head = &h->reply_pool[h->max_commands * i];
+		h->reply_queue[i].head = pci_alloc_consistent(h->pdev,
+						h->reply_queue_size,
+						&(h->reply_queue[i].busaddr));
+		if (!h->reply_queue[i].head)
+			goto clean_up;
 		h->reply_queue[i].size = h->max_commands;
 		h->reply_queue[i].wraparound = 1;  /* spec: init to 1 */
 		h->reply_queue[i].current_entry = 0;
@@ -7500,18 +7575,14 @@
 	/* Need a block fetch table for performant mode */
 	h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) *
 				sizeof(u32)), GFP_KERNEL);
-
-	if ((h->reply_pool == NULL)
-		|| (h->blockFetchTable == NULL))
+	if (!h->blockFetchTable)
 		goto clean_up;
 
 	hpsa_enter_performant_mode(h, trans_support);
 	return;
 
 clean_up:
-	if (h->reply_pool)
-		pci_free_consistent(h->pdev, h->reply_pool_size,
-			h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	kfree(h->blockFetchTable);
 }
 

diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 44235a2..24472ce 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h

@@ -57,11 +57,12 @@
 
 };
 
-struct reply_pool {
+struct reply_queue_buffer {
 	u64 *head;
 	size_t size;
 	u8 wraparound;
 	u32 current_entry;
+	dma_addr_t busaddr;
 };
 
 #pragma pack(1)
@@ -90,6 +91,7 @@
 	u8   automatic_drive_slamming;
 	u8   reserved1;
 	u8   nvram_flags;
+#define HBA_MODE_ENABLED_FLAG (1 << 3)
 	u8   cache_nvram_flags;
 	u8   drive_config_flags;
 	u16  reserved2;
@@ -115,11 +117,8 @@
 	int 	nr_cmds; /* Number of commands allowed on this controller */
 	struct CfgTable __iomem *cfgtable;
 	int	interrupts_enabled;
-	int	major;
 	int 	max_commands;
 	int	commands_outstanding;
-	int 	max_outstanding; /* Debug */
-	int	usage_count;  /* number of opens all all minor devices */
 #	define PERF_MODE_INT	0
 #	define DOORBELL_INT	1
 #	define SIMPLE_MODE_INT	2
@@ -176,11 +175,9 @@
 	/*
 	 * Performant mode completion buffers
 	 */
-	u64 *reply_pool;
-	size_t reply_pool_size;
-	struct reply_pool reply_queue[MAX_REPLY_QUEUES];
+	size_t reply_queue_size;
+	struct reply_queue_buffer reply_queue[MAX_REPLY_QUEUES];
 	u8 nreply_queues;
-	dma_addr_t reply_pool_dhandle;
 	u32 *blockFetchTable;
 	u32 *ioaccel1_blockFetchTable;
 	u32 *ioaccel2_blockFetchTable;
@@ -195,7 +192,7 @@
 	u64 last_heartbeat_timestamp;
 	u32 heartbeat_sample_interval;
 	atomic_t firmware_flash_in_progress;
-	u32 lockup_detected;
+	u32 *lockup_detected;
 	struct delayed_work monitor_ctlr_work;
 	int remove_in_progress;
 	u32 fifo_recently_full;
@@ -232,11 +229,9 @@
 #define CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE	(1 << 31)
 
 #define RESCAN_REQUIRED_EVENT_BITS \
-		(CTLR_STATE_CHANGE_EVENT | \
-		CTLR_ENCLOSURE_HOT_PLUG_EVENT | \
+		(CTLR_ENCLOSURE_HOT_PLUG_EVENT | \
 		CTLR_STATE_CHANGE_EVENT_PHYSICAL_DRV | \
 		CTLR_STATE_CHANGE_EVENT_LOGICAL_DRV | \
-		CTLR_STATE_CHANGE_EVENT_REDUNDANT_CNTRL | \
 		CTLR_STATE_CHANGE_EVENT_AIO_ENABLED_DISABLED | \
 		CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE)
 	spinlock_t offline_device_lock;
@@ -345,22 +340,23 @@
 static void SA5_submit_command(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr,
-		c->Header.Tag.lower);
 	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
 	(void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
 }
 
+static void SA5_submit_command_no_read(struct ctlr_info *h,
+	struct CommandList *c)
+{
+	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+}
+
 static void SA5_submit_command_ioaccel2(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr,
-		c->Header.Tag.lower);
 	if (c->cmd_type == CMD_IOACCEL2)
 		writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
 	else
 		writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
-	(void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
 }
 
 /*
@@ -398,7 +394,7 @@
 
 static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q)
 {
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags, register_value = FIFO_EMPTY;
 
 	/* msi auto clears the interrupt pending bit. */
@@ -477,7 +473,6 @@
 {
 	unsigned long register_value  =
 		readl(h->vaddr + SA5_INTR_STATUS);
-	dev_dbg(&h->pdev->dev, "intr_pending %lx\n", register_value);
 	return register_value & SA5_INTR_PENDING;
 }
 
@@ -514,7 +509,7 @@
 static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q)
 {
 	u64 register_value;
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags;
 
 	BUG_ON(q >= h->nreply_queues);
@@ -572,6 +567,14 @@
 	SA5_performant_completed,
 };
 
+static struct access_method SA5_performant_access_no_read = {
+	SA5_submit_command_no_read,
+	SA5_performant_intr_mask,
+	SA5_fifo_full,
+	SA5_performant_intr_pending,
+	SA5_performant_completed,
+};
+
 struct board_type {
 	u32	board_id;
 	char	*product_name;

diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index b5cc705..b5125dc 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h

@@ -151,7 +151,7 @@
 #define HPSA_VPD_HEADER_SZ              4
 
 /* Logical volume states */
-#define HPSA_VPD_LV_STATUS_UNSUPPORTED			-1
+#define HPSA_VPD_LV_STATUS_UNSUPPORTED			0xff
 #define HPSA_LV_OK                                      0x0
 #define HPSA_LV_UNDERGOING_ERASE			0x0F
 #define HPSA_LV_UNDERGOING_RPI				0x12
@@ -238,11 +238,21 @@
 	u8 LUN[HPSA_MAX_LUN][8];
 };
 
+struct ext_report_lun_entry {
+	u8 lunid[8];
+	u8 wwid[8];
+	u8 device_type;
+	u8 device_flags;
+	u8 lun_count; /* multi-lun device, how many luns */
+	u8 redundant_paths;
+	u32 ioaccel_handle; /* ioaccel1 only uses lower 16 bits */
+};
+
 struct ReportExtendedLUNdata {
 	u8 LUNListLength[4];
 	u8 extended_response_flag;
 	u8 reserved[3];
-	u8 LUN[HPSA_MAX_LUN][24];
+	struct ext_report_lun_entry LUN[HPSA_MAX_LUN];
 };
 
 struct SenseSubsystem_info {
@@ -375,6 +385,7 @@
  *        or a bus address.
  */
 
+#define COMMANDLIST_ALIGNMENT 128
 struct CommandList {
 	struct CommandListHeader Header;
 	struct RequestBlock      Request;
@@ -389,21 +400,7 @@
 	struct list_head list;
 	struct completion *waiting;
 	void   *scsi_cmd;
-
-/* on 64 bit architectures, to get this to be 32-byte-aligned
- * it so happens we need PAD_64 bytes of padding, on 32 bit systems,
- * we need PAD_32 bytes of padding (see below).   This does that.
- * If it happens that 64 bit and 32 bit systems need different
- * padding, PAD_32 and PAD_64 can be set independently, and.
- * the code below will do the right thing.
- */
-#define IS_32_BIT ((8 - sizeof(long))/4)
-#define IS_64_BIT (!IS_32_BIT)
-#define PAD_32 (40)
-#define PAD_64 (12)
-#define COMMANDLIST_PAD (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
-	u8 pad[COMMANDLIST_PAD];
-};
+} __aligned(COMMANDLIST_ALIGNMENT);
 
 /* Max S/G elements in I/O accelerator command */
 #define IOACCEL1_MAXSGENTRIES           24
@@ -413,6 +410,7 @@
  * Structure for I/O accelerator (mode 1) commands.
  * Note that this structure must be 128-byte aligned in size.
  */
+#define IOACCEL1_COMMANDLIST_ALIGNMENT 128
 struct io_accel1_cmd {
 	u16 dev_handle;			/* 0x00 - 0x01 */
 	u8  reserved1;			/* 0x02 */
@@ -440,12 +438,7 @@
 	struct vals32 host_addr;	/* 0x70 - 0x77 */
 	u8  CISS_LUN[8];		/* 0x78 - 0x7F */
 	struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES];
-#define IOACCEL1_PAD_64 0
-#define IOACCEL1_PAD_32 0
-#define IOACCEL1_PAD (IS_32_BIT * IOACCEL1_PAD_32 + \
-			IS_64_BIT * IOACCEL1_PAD_64)
-	u8 pad[IOACCEL1_PAD];
-};
+} __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT);
 
 #define IOACCEL1_FUNCTION_SCSIIO        0x00
 #define IOACCEL1_SGLOFFSET              32
@@ -510,14 +503,11 @@
 	u8 sense_data_buff[32];		/* sense/response data buffer */
 };
 
-#define IOACCEL2_64_PAD 76
-#define IOACCEL2_32_PAD 76
-#define IOACCEL2_PAD (IS_32_BIT * IOACCEL2_32_PAD + \
-			IS_64_BIT * IOACCEL2_64_PAD)
 /*
  * Structure for I/O accelerator (mode 2 or m2) commands.
  * Note that this structure must be 128-byte aligned in size.
  */
+#define IOACCEL2_COMMANDLIST_ALIGNMENT 128
 struct io_accel2_cmd {
 	u8  IU_type;			/* IU Type */
 	u8  direction;			/* direction, memtype, and encryption */
@@ -544,8 +534,7 @@
 	u32 tweak_upper;		/* Encryption tweak, upper 4 bytes */
 	struct ioaccel2_sg_element sg[IOACCEL2_MAXSGENTRIES];
 	struct io_accel2_scsi_response error_data;
-	u8 pad[IOACCEL2_PAD];
-};
+} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT);
 
 /*
  * defines for Mode 2 command struct
@@ -636,7 +625,7 @@
 	u32            RepQCount;
 	u32            RepQCtrAddrLow32;
 	u32            RepQCtrAddrHigh32;
-#define MAX_REPLY_QUEUES 8
+#define MAX_REPLY_QUEUES 64
 	struct vals32  RepQAddr[MAX_REPLY_QUEUES];
 };
 

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 1185484..a669f2d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c

@@ -244,7 +244,7 @@
 	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
 	sk->sk_state_change = tcp_sw_conn->old_state_change;
 	sk->sk_write_space  = tcp_sw_conn->old_write_space;
-	sk->sk_no_check	 = 0;
+	sk->sk_no_check_tx = 0;
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 26dc005b..3d1bc67 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c

@@ -338,7 +338,7 @@
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 	struct iscsi_scsi_req *hdr;
-	unsigned hdrlength, cmd_len;
+	unsigned hdrlength, cmd_len, transfer_length;
 	itt_t itt;
 	int rc;
 
@@ -391,11 +391,11 @@
 	if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
 		task->protected = true;
 
+	transfer_length = scsi_transfer_length(sc);
+	hdr->data_length = cpu_to_be32(transfer_length);
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
-		unsigned out_len = scsi_out(sc)->length;
 		struct iscsi_r2t_info *r2t = &task->unsol_r2t;
 
-		hdr->data_length = cpu_to_be32(out_len);
 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
 		/*
 		 * Write counters:
@@ -414,18 +414,19 @@
 		memset(r2t, 0, sizeof(*r2t));
 
 		if (session->imm_data_en) {
-			if (out_len >= session->first_burst)
+			if (transfer_length >= session->first_burst)
 				task->imm_count = min(session->first_burst,
 							conn->max_xmit_dlength);
 			else
-				task->imm_count = min(out_len,
-							conn->max_xmit_dlength);
+				task->imm_count = min(transfer_length,
+						      conn->max_xmit_dlength);
 			hton24(hdr->dlength, task->imm_count);
 		} else
 			zero_data(hdr->dlength);
 
 		if (!session->initial_r2t_en) {
-			r2t->data_length = min(session->first_burst, out_len) -
+			r2t->data_length = min(session->first_burst,
+					       transfer_length) -
 					       task->imm_count;
 			r2t->data_offset = task->imm_count;
 			r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
@@ -438,7 +439,6 @@
 	} else {
 		hdr->flags |= ISCSI_FLAG_CMD_FINAL;
 		zero_data(hdr->dlength);
-		hdr->data_length = cpu_to_be32(scsi_in(sc)->length);
 
 		if (sc->sc_data_direction == DMA_FROM_DEVICE)
 			hdr->flags |= ISCSI_FLAG_CMD_READ;
@@ -466,7 +466,7 @@
 			  scsi_bidi_cmnd(sc) ? "bidirectional" :
 			  sc->sc_data_direction == DMA_TO_DEVICE ?
 			  "write" : "read", conn->id, sc, sc->cmnd[0],
-			  task->itt, scsi_bufflen(sc),
+			  task->itt, transfer_length,
 			  scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0,
 			  session->cmdsn,
 			  session->max_cmdsn - session->exp_cmdsn + 1);
@@ -1442,9 +1442,9 @@
 		conn->task = NULL;
 	}
 	/* regular RX path uses back_lock */
-	spin_lock_bh(&conn->session->back_lock);
+	spin_lock(&conn->session->back_lock);
 	__iscsi_put_task(task);
-	spin_unlock_bh(&conn->session->back_lock);
+	spin_unlock(&conn->session->back_lock);
 	return rc;
 }
 

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 94a3caf..434e903 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -640,6 +640,7 @@
 #define HBA_DEVLOSS_TMO         0x2000 /* HBA in devloss timeout */
 #define HBA_RRQ_ACTIVE		0x4000 /* process the rrq active list */
 #define HBA_FCP_IOQ_FLUSH	0x8000 /* FCP I/O queues being flushed */
+#define HBA_FW_DUMP_OP		0x10000 /* Skips fn reset before FW dump */
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
 

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 8d5b6ce..1d7a5c3 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -919,10 +919,15 @@
 		phba->cfg_sriov_nr_virtfn = 0;
 	}
 
+	if (opcode == LPFC_FW_DUMP)
+		phba->hba_flag |= HBA_FW_DUMP_OP;
+
 	status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
 
-	if (status != 0)
+	if (status != 0) {
+		phba->hba_flag &= ~HBA_FW_DUMP_OP;
 		return status;
+	}
 
 	/* wait for the device to be quiesced before firmware reset */
 	msleep(100);
@@ -2364,7 +2369,7 @@
 	uint8_t wwpn[WWN_SZ];
 	int rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	/* count may include a LF at end of string */
@@ -2432,7 +2437,7 @@
 	uint8_t wwpn[WWN_SZ];
 	int rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	/* count may include a LF at end of string */
@@ -2499,7 +2504,7 @@
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
 	int val = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (!isdigit(buf[0]))
@@ -2565,7 +2570,7 @@
 
 	int rc = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (oas_state) {
@@ -2670,7 +2675,7 @@
 	uint64_t oas_lun;
 	int len = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0)
@@ -2716,7 +2721,7 @@
 	uint64_t scsi_lun;
 	ssize_t rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0)
@@ -4655,7 +4660,7 @@
 #       0x0 - 0x7f  = CS_CTL field in FC header (high 7 bits)
 # Value range is [0x0,0x7f]. Default value is 0
 */
-LPFC_ATTR_R(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
+LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
 
 /*
 # lpfc_enable_bg: Enable BlockGuard (Emulex's Implementation of T10-DIF)

diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index ca2f4ea..5b5c825 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index a94d4c9..928ef60 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2010-2012 Emulex.  All rights reserved.                *
+ * Copyright (C) 2010-2014 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index adda0bf..db5604f 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -289,6 +289,7 @@
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
+void lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba);
 void lpfc_sli_hba_iocb_abort(struct lpfc_hba *);
 void lpfc_sli_flush_fcp_rings(struct lpfc_hba *);
 int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
@@ -310,6 +311,9 @@
 int lpfc_sli_sum_iocb(struct lpfc_vport *, uint16_t, uint64_t, lpfc_ctx_cmd);
 int lpfc_sli_abort_iocb(struct lpfc_vport *, struct lpfc_sli_ring *, uint16_t,
 			uint64_t, lpfc_ctx_cmd);
+int
+lpfc_sli_abort_taskmgmt(struct lpfc_vport *, struct lpfc_sli_ring *,
+			uint16_t, uint64_t, lpfc_ctx_cmd);
 
 void lpfc_mbox_timeout(unsigned long);
 void lpfc_mbox_timeout_handler(struct lpfc_hba *);

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 828c08e..b0aedce 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2007-2012 Emulex.  All rights reserved.           *
+ * Copyright (C) 2007-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -2314,7 +2314,7 @@
 			goto too_big;
 	}
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/* OAS CQ */
 		qp = phba->sli4_hba.oas_cq;

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 624fe0b..7a5d81a 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 294c072..2a17e31 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -5634,6 +5634,9 @@
 		ndlp->active_rrqs_xri_bitmap =
 				mempool_alloc(vport->phba->active_rrq_pool,
 					      GFP_KERNEL);
+		if (ndlp->active_rrqs_xri_bitmap)
+			memset(ndlp->active_rrqs_xri_bitmap, 0,
+			       ndlp->phba->cfg_rrq_xri_bitmap_sz);
 	}
 
 

diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 3d9438c..2362592 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index fd79f7d..f432ec1 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.                *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 635eeb3..06f9a5b 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -820,7 +820,139 @@
 }
 
 /**
+ * lpfc_sli4_free_sp_events - Cleanup sp_queue_events to free
+ * rspiocb which got deferred
+ *
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup completed slow path events after HBA is reset
+ * when bringing down the SLI Layer.
+ *
+ *
+ * Return codes
+ *   void.
+ **/
+static void
+lpfc_sli4_free_sp_events(struct lpfc_hba *phba)
+{
+	struct lpfc_iocbq *rspiocbq;
+	struct hbq_dmabuf *dmabuf;
+	struct lpfc_cq_event *cq_event;
+
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~HBA_SP_QUEUE_EVT;
+	spin_unlock_irq(&phba->hbalock);
+
+	while (!list_empty(&phba->sli4_hba.sp_queue_event)) {
+		/* Get the response iocb from the head of work queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_queue_event,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+
+		switch (bf_get(lpfc_wcqe_c_code, &cq_event->cqe.wcqe_cmpl)) {
+		case CQE_CODE_COMPL_WQE:
+			rspiocbq = container_of(cq_event, struct lpfc_iocbq,
+						 cq_event);
+			lpfc_sli_release_iocbq(phba, rspiocbq);
+			break;
+		case CQE_CODE_RECEIVE:
+		case CQE_CODE_RECEIVE_V1:
+			dmabuf = container_of(cq_event, struct hbq_dmabuf,
+					      cq_event);
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+		}
+	}
+}
+
+/**
+ * lpfc_hba_free_post_buf - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup posted ELS buffers after the HBA is reset
+ * when bringing down the SLI Layer.
+ *
+ *
+ * Return codes
+ *   void.
+ **/
+static void
+lpfc_hba_free_post_buf(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
+	struct lpfc_dmabuf *mp, *next_mp;
+	LIST_HEAD(buflist);
+	int count;
+
+	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
+		lpfc_sli_hbqbuf_free_all(phba);
+	else {
+		/* Cleanup preposted buffers on the ELS ring */
+		pring = &psli->ring[LPFC_ELS_RING];
+		spin_lock_irq(&phba->hbalock);
+		list_splice_init(&pring->postbufq, &buflist);
+		spin_unlock_irq(&phba->hbalock);
+
+		count = 0;
+		list_for_each_entry_safe(mp, next_mp, &buflist, list) {
+			list_del(&mp->list);
+			count++;
+			lpfc_mbuf_free(phba, mp->virt, mp->phys);
+			kfree(mp);
+		}
+
+		spin_lock_irq(&phba->hbalock);
+		pring->postbufq_cnt -= count;
+		spin_unlock_irq(&phba->hbalock);
+	}
+}
+
+/**
+ * lpfc_hba_clean_txcmplq - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup the txcmplq after the HBA is reset when bringing
+ * down the SLI Layer.
+ *
+ * Return codes
+ *   void
+ **/
+static void
+lpfc_hba_clean_txcmplq(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
+	LIST_HEAD(completions);
+	int i;
+
+	for (i = 0; i < psli->num_rings; i++) {
+		pring = &psli->ring[i];
+		if (phba->sli_rev >= LPFC_SLI_REV4)
+			spin_lock_irq(&pring->ring_lock);
+		else
+			spin_lock_irq(&phba->hbalock);
+		/* At this point in time the HBA is either reset or DOA. Either
+		 * way, nothing should be on txcmplq as it will NEVER complete.
+		 */
+		list_splice_init(&pring->txcmplq, &completions);
+		pring->txcmplq_cnt = 0;
+
+		if (phba->sli_rev >= LPFC_SLI_REV4)
+			spin_unlock_irq(&pring->ring_lock);
+		else
+			spin_unlock_irq(&phba->hbalock);
+
+		/* Cancel all the IOCBs from the completions list */
+		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_ABORTED);
+		lpfc_sli_abort_iocb_ring(phba, pring);
+	}
+}
+
+/**
  * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset
+	int i;
  * @phba: pointer to lpfc HBA data structure.
  *
  * This routine will do uninitialization after the HBA is reset when bring
@@ -833,44 +965,8 @@
 static int
 lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_dmabuf *mp, *next_mp;
-	LIST_HEAD(completions);
-	int i;
-
-	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
-		lpfc_sli_hbqbuf_free_all(phba);
-	else {
-		/* Cleanup preposted buffers on the ELS ring */
-		pring = &psli->ring[LPFC_ELS_RING];
-		list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
-			list_del(&mp->list);
-			pring->postbufq_cnt--;
-			lpfc_mbuf_free(phba, mp->virt, mp->phys);
-			kfree(mp);
-		}
-	}
-
-	spin_lock_irq(&phba->hbalock);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-
-		/* At this point in time the HBA is either reset or DOA. Either
-		 * way, nothing should be on txcmplq as it will NEVER complete.
-		 */
-		list_splice_init(&pring->txcmplq, &completions);
-		spin_unlock_irq(&phba->hbalock);
-
-		/* Cancel all the IOCBs from the completions list */
-		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-				      IOERR_SLI_ABORTED);
-
-		lpfc_sli_abort_iocb_ring(phba, pring);
-		spin_lock_irq(&phba->hbalock);
-	}
-	spin_unlock_irq(&phba->hbalock);
-
+	lpfc_hba_free_post_buf(phba);
+	lpfc_hba_clean_txcmplq(phba);
 	return 0;
 }
 
@@ -890,13 +986,12 @@
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
 	LIST_HEAD(aborts);
-	int ret;
 	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
 
-	ret = lpfc_hba_down_post_s3(phba);
-	if (ret)
-		return ret;
+	lpfc_hba_free_post_buf(phba);
+	lpfc_hba_clean_txcmplq(phba);
+
 	/* At this point in time the HBA is either reset or DOA. Either
 	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
 	 * on the lpfc_sgl_list so that it can either be freed if the
@@ -932,6 +1027,8 @@
 	spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
 	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
 	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+
+	lpfc_sli4_free_sp_events(phba);
 	return 0;
 }
 
@@ -1250,7 +1347,6 @@
 lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
 {
 	uint32_t old_host_status = phba->work_hs;
-	struct lpfc_sli_ring  *pring;
 	struct lpfc_sli *psli = &phba->sli;
 
 	/* If the pci channel is offline, ignore possible errors,
@@ -1279,8 +1375,7 @@
 	 * dropped by the firmware. Error iocb (I/O) on txcmplq and let the
 	 * SCSI layer retry it after re-establishing link.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 
 	/*
 	 * There was a firmware error. Take the hba offline and then
@@ -1348,7 +1443,6 @@
 {
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_sli   *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
 	uint32_t event_data;
 	unsigned long temperature;
 	struct temp_event temp_event_data;
@@ -1400,8 +1494,7 @@
 		* Error iocb (I/O) on txcmplq and let the SCSI layer
 		* retry it after re-establishing link.
 		*/
-		pring = &psli->ring[psli->fcp_ring];
-		lpfc_sli_abort_iocb_ring(phba, pring);
+		lpfc_sli_abort_fcp_rings(phba);
 
 		/*
 		 * There was a firmware error.  Take the hba offline and then
@@ -1940,78 +2033,81 @@
 
 	switch (dev_id) {
 	case PCI_DEVICE_ID_FIREFLY:
-		m = (typeof(m)){"LP6000", "PCI", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP6000", "PCI",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_SUPERFLY:
 		if (vp->rev.biuRev >= 1 && vp->rev.biuRev <= 3)
-			m = (typeof(m)){"LP7000", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP7000", "PCI", ""};
 		else
-			m = (typeof(m)){"LP7000E", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP7000E", "PCI", ""};
+		m.function = "Obsolete, Unsupported Fibre Channel Adapter";
 		break;
 	case PCI_DEVICE_ID_DRAGONFLY:
 		m = (typeof(m)){"LP8000", "PCI",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_CENTAUR:
 		if (FC_JEDEC_ID(vp->rev.biuRev) == CENTAUR_2G_JEDEC_ID)
-			m = (typeof(m)){"LP9002", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP9002", "PCI", ""};
 		else
-			m = (typeof(m)){"LP9000", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP9000", "PCI", ""};
+		m.function = "Obsolete, Unsupported Fibre Channel Adapter";
 		break;
 	case PCI_DEVICE_ID_RFLY:
 		m = (typeof(m)){"LP952", "PCI",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PEGASUS:
 		m = (typeof(m)){"LP9802", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_THOR:
 		m = (typeof(m)){"LP10000", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_VIPER:
 		m = (typeof(m)){"LPX1000",  "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PFLY:
 		m = (typeof(m)){"LP982", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_TFLY:
 		m = (typeof(m)){"LP1050", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS:
 		m = (typeof(m)){"LP11000", "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS_SCSP:
 		m = (typeof(m)){"LP11000-SP", "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS_DCSP:
 		m = (typeof(m)){"LP11002-SP",  "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE:
-		m = (typeof(m)){"LPe1000", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1000", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE_SCSP:
-		m = (typeof(m)){"LPe1000-SP", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1000-SP", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE_DCSP:
-		m = (typeof(m)){"LPe1002-SP", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1002-SP", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_BMID:
 		m = (typeof(m)){"LP1150", "PCI-X2", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_BSMB:
-		m = (typeof(m)){"LP111", "PCI-X2", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP111", "PCI-X2",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_ZEPHYR:
 		m = (typeof(m)){"LPe11000", "PCIe", "Fibre Channel Adapter"};
@@ -2030,16 +2126,20 @@
 		m = (typeof(m)){"LPe111", "PCIe", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP101:
-		m = (typeof(m)){"LP101", "PCI-X", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP101", "PCI-X",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP10000S:
-		m = (typeof(m)){"LP10000-S", "PCI", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP10000-S", "PCI",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP11000S:
-		m = (typeof(m)){"LP11000-S", "PCI-X2", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP11000-S", "PCI-X2",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LPE11000S:
-		m = (typeof(m)){"LPe11000-S", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe11000-S", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_SAT:
 		m = (typeof(m)){"LPe12000", "PCIe", "Fibre Channel Adapter"};
@@ -2060,20 +2160,21 @@
 		m = (typeof(m)){"LPe12000-S", "PCIe", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HORNET:
-		m = (typeof(m)){"LP21000", "PCIe", "FCoE Adapter"};
+		m = (typeof(m)){"LP21000", "PCIe",
+				"Obsolete, Unsupported FCoE Adapter"};
 		GE = 1;
 		break;
 	case PCI_DEVICE_ID_PROTEUS_VF:
 		m = (typeof(m)){"LPev12000", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PROTEUS_PF:
 		m = (typeof(m)){"LPev12000", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PROTEUS_S:
 		m = (typeof(m)){"LPemv12002-S", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_TIGERSHARK:
 		oneConnect = 1;
@@ -2089,17 +2190,24 @@
 		break;
 	case PCI_DEVICE_ID_BALIUS:
 		m = (typeof(m)){"LPVe12002", "PCIe Shared I/O",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LANCER_FC:
-	case PCI_DEVICE_ID_LANCER_FC_VF:
 		m = (typeof(m)){"LPe16000", "PCIe", "Fibre Channel Adapter"};
 		break;
+	case PCI_DEVICE_ID_LANCER_FC_VF:
+		m = (typeof(m)){"LPe16000", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
+		break;
 	case PCI_DEVICE_ID_LANCER_FCOE:
-	case PCI_DEVICE_ID_LANCER_FCOE_VF:
 		oneConnect = 1;
 		m = (typeof(m)){"OCe15100", "PCIe", "FCoE"};
 		break;
+	case PCI_DEVICE_ID_LANCER_FCOE_VF:
+		oneConnect = 1;
+		m = (typeof(m)){"OCe15100", "PCIe",
+				"Obsolete, Unsupported FCoE"};
+		break;
 	case PCI_DEVICE_ID_SKYHAWK:
 	case PCI_DEVICE_ID_SKYHAWK_VF:
 		oneConnect = 1;
@@ -4614,7 +4722,10 @@
 		phba->link_state = LPFC_HBA_ERROR;
 		return;
 	}
-	lpfc_offline_prep(phba, LPFC_MBX_WAIT);
+	if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
+		lpfc_offline_prep(phba, LPFC_MBX_WAIT);
+	else
+		lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
 	lpfc_offline(phba);
 	lpfc_sli_brdrestart(phba);
 	lpfc_online(phba);
@@ -9663,9 +9774,6 @@
 static void
 lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
-
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2723 PCI channel I/O abort preparing for recovery\n");
 
@@ -9673,8 +9781,7 @@
 	 * There may be errored I/Os through HBA, abort all I/Os on txcmplq
 	 * and let the SCSI mid-layer to retry them to recover.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 }
 
 /**
@@ -10417,17 +10524,13 @@
 static void
 lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
-
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2828 PCI channel I/O abort preparing for recovery\n");
 	/*
 	 * There may be errored I/Os through HBA, abort all I/Os on txcmplq
 	 * and let the SCSI mid-layer to retry them to recover.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 }
 
 /**
@@ -10898,7 +11001,7 @@
 	if (phba->sli4_hba.pc_sli4_params.oas_supported) {
 		phba->cfg_fof = 1;
 	} else {
-		phba->cfg_EnableXLane = 0;
+		phba->cfg_fof = 0;
 		if (phba->device_data_mem_pool)
 			mempool_destroy(phba->device_data_mem_pool);
 		phba->device_data_mem_pool = NULL;
@@ -10928,7 +11031,7 @@
 	if (rc)
 		return -ENOMEM;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		rc = lpfc_cq_create(phba, phba->sli4_hba.oas_cq,
 				    phba->sli4_hba.fof_eq, LPFC_WCQ, LPFC_FCP);
@@ -10947,8 +11050,7 @@
 	return 0;
 
 out_oas_wq:
-	if (phba->cfg_EnableXLane)
-		lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq);
+	lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq);
 out_oas_cq:
 	lpfc_eq_destroy(phba, phba->sli4_hba.fof_eq);
 	return rc;
@@ -10982,7 +11084,7 @@
 
 	phba->sli4_hba.fof_eq = qdesc;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/* Create OAS CQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,

diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index ed419aa..3fa6533 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2012 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 462453e..2df11da 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -73,7 +73,7 @@
 {
 	struct lpfc_vport *vport = (struct lpfc_vport *)sdev->host->hostdata;
 
-	if (vport->phba->cfg_EnableXLane)
+	if (vport->phba->cfg_fof)
 		return ((struct lpfc_device_data *)sdev->hostdata)->rport_data;
 	else
 		return (struct lpfc_rport_data *)sdev->hostdata;
@@ -3462,7 +3462,7 @@
 	 * If the OAS driver feature is enabled and the lun is enabled for
 	 * OAS, set the oas iocb related flags.
 	 */
-	if ((phba->cfg_EnableXLane) && ((struct lpfc_device_data *)
+	if ((phba->cfg_fof) && ((struct lpfc_device_data *)
 		scsi_cmnd->device->hostdata)->oas_enabled)
 		lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_OAS;
 	return 0;
@@ -4314,6 +4314,7 @@
 		fcp_cmnd->fcpCntl1 = SIMPLE_Q;
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
+	piocbq->iocb.un.fcpi.fcpi_XRdy = 0;
 
 	/*
 	 * There are three possibilities here - use scatter-gather segment, use
@@ -4782,7 +4783,9 @@
 	struct lpfc_scsi_buf *lpfc_cmd;
 	IOCB_t *cmd, *icmd;
 	int ret = SUCCESS, status = 0;
-	unsigned long flags;
+	struct lpfc_sli_ring *pring_s4;
+	int ring_number, ret_val;
+	unsigned long flags, iflags;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
 
 	status = fc_block_scsi_eh(cmnd);
@@ -4833,6 +4836,14 @@
 
 	BUG_ON(iocb->context1 != lpfc_cmd);
 
+	/* abort issued in recovery is still in progress */
+	if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
+		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+			 "3389 SCSI Layer I/O Abort Request is pending\n");
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto wait_for_cmpl;
+	}
+
 	abtsiocb = __lpfc_sli_get_iocbq(phba);
 	if (abtsiocb == NULL) {
 		ret = FAILED;
@@ -4871,11 +4882,23 @@
 
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		ring_number = MAX_SLI3_CONFIGURED_RINGS + iocb->fcp_wqidx;
+		pring_s4 = &phba->sli.ring[ring_number];
+		/* Note: both hbalock and ring_lock must be set here */
+		spin_lock_irqsave(&pring_s4->ring_lock, iflags);
+		ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
+						abtsiocb, 0);
+		spin_unlock_irqrestore(&pring_s4->ring_lock, iflags);
+	} else {
+		ret_val = __lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
+						abtsiocb, 0);
+	}
 	/* no longer need the lock after this point */
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
-	if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) ==
-	    IOCB_ERROR) {
+
+	if (ret_val == IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, abtsiocb);
 		ret = FAILED;
 		goto out;
@@ -4885,12 +4908,16 @@
 		lpfc_sli_handle_fast_ring_event(phba,
 			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
+wait_for_cmpl:
 	lpfc_cmd->waitq = &waitq;
 	/* Wait for abort to complete */
 	wait_event_timeout(waitq,
 			  (lpfc_cmd->pCmd != cmnd),
 			   msecs_to_jiffies(2*vport->cfg_devloss_tmo*1000));
+
+	spin_lock_irqsave(shost->host_lock, flags);
 	lpfc_cmd->waitq = NULL;
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	if (lpfc_cmd->pCmd == cmnd) {
 		ret = FAILED;
@@ -5172,8 +5199,9 @@
 
 	cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
 	if (cnt)
-		lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
-				    tgt_id, lun_id, context);
+		lpfc_sli_abort_taskmgmt(vport,
+					&phba->sli.ring[phba->sli.fcp_ring],
+					tgt_id, lun_id, context);
 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
 	while (time_after(later, jiffies) && cnt) {
 		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
@@ -5491,7 +5519,7 @@
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/*
 		 * Check to see if the device data structure for the lun
@@ -5616,7 +5644,7 @@
 	struct lpfc_device_data *device_data = sdev->hostdata;
 
 	atomic_dec(&phba->sdev_cnt);
-	if ((phba->cfg_EnableXLane) && (device_data)) {
+	if ((phba->cfg_fof) && (device_data)) {
 		spin_lock_irqsave(&phba->devicelock, flags);
 		device_data->available = false;
 		if (!device_data->oas_enabled)
@@ -5655,7 +5683,7 @@
 	int memory_flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn  ||
-	    !(phba->cfg_EnableXLane))
+	    !(phba->cfg_fof))
 		return NULL;
 
 	/* Attempt to create the device data to contain lun info */
@@ -5693,7 +5721,7 @@
 {
 
 	if (unlikely(!phba) || !lun_info  ||
-	    !(phba->cfg_EnableXLane))
+	    !(phba->cfg_fof))
 		return;
 
 	if (!list_empty(&lun_info->listentry))
@@ -5727,7 +5755,7 @@
 	struct lpfc_device_data *lun_info;
 
 	if (unlikely(!phba) || !list || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return NULL;
 
 	/* Check to see if the lun is already enabled for OAS. */
@@ -5789,7 +5817,7 @@
 	    !starting_lun || !found_vport_wwpn ||
 	    !found_target_wwpn || !found_lun || !found_lun_status ||
 	    (*starting_lun == NO_MORE_OAS_LUN) ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	lun = *starting_lun;
@@ -5873,7 +5901,7 @@
 	unsigned long flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	spin_lock_irqsave(&phba->devicelock, flags);
@@ -5930,7 +5958,7 @@
 	unsigned long flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	spin_lock_irqsave(&phba->devicelock, flags);

diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 0120bfc..0389ac1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 6bb51f8..32ada05 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -265,6 +265,16 @@
 		return NULL;
 
 	q->hba_index = idx;
+
+	/*
+	 * insert barrier for instruction interlock : data from the hardware
+	 * must have the valid bit checked before it can be copied and acted
+	 * upon. Given what was seen in lpfc_sli4_cq_get() of speculative
+	 * instructions allowing action on content before valid bit checked,
+	 * add barrier here as well. May not be needed as "content" is a
+	 * single 32-bit entity here (vs multi word structure for cq's).
+	 */
+	mb();
 	return eqe;
 }
 
@@ -370,6 +380,17 @@
 
 	cqe = q->qe[q->hba_index].cqe;
 	q->hba_index = idx;
+
+	/*
+	 * insert barrier for instruction interlock : data from the hardware
+	 * must have the valid bit checked before it can be copied and acted
+	 * upon. Speculative instructions were allowing a bcopy at the start
+	 * of lpfc_sli4_fp_handle_wcqe(), which is called immediately
+	 * after our return, to copy data before the valid bit check above
+	 * was done. As such, some of the copied data was stale. The barrier
+	 * ensures the check is before any data is copied.
+	 */
+	mb();
 	return cqe;
 }
 
@@ -3511,14 +3532,27 @@
 	/* Error everything on txq and txcmplq
 	 * First do the txq.
 	 */
-	spin_lock_irq(&phba->hbalock);
-	list_splice_init(&pring->txq, &completions);
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txq, &completions);
+		pring->txq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
 
-	/* Next issue ABTS for everything on the txcmplq */
-	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-		lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_lock_irq(&phba->hbalock);
+		/* Next issue ABTS for everything on the txcmplq */
+		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_unlock_irq(&phba->hbalock);
+	} else {
+		spin_lock_irq(&phba->hbalock);
+		list_splice_init(&pring->txq, &completions);
+		pring->txq_cnt = 0;
 
-	spin_unlock_irq(&phba->hbalock);
+		/* Next issue ABTS for everything on the txcmplq */
+		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_unlock_irq(&phba->hbalock);
+	}
 
 	/* Cancel all the IOCBs from the completions list */
 	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
@@ -3526,6 +3560,36 @@
 }
 
 /**
+ * lpfc_sli_abort_fcp_rings - Abort all iocbs in all FCP rings
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ *
+ * This function aborts all iocbs in FCP rings and frees all the iocb
+ * objects in txq. This function issues an abort iocb for all the iocb commands
+ * in txcmplq. The iocbs in the txcmplq is not guaranteed to complete before
+ * the return of this function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	/* Look on all the FCP Rings for the iotag */
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
+	} else {
+		pring = &psli->ring[psli->fcp_ring];
+		lpfc_sli_abort_iocb_ring(phba, pring);
+	}
+}
+
+
+/**
  * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring
  * @phba: Pointer to HBA context object.
  *
@@ -3542,28 +3606,55 @@
 	LIST_HEAD(txcmplq);
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring  *pring;
-
-	/* Currently, only one fcp ring */
-	pring = &psli->ring[psli->fcp_ring];
+	uint32_t i;
 
 	spin_lock_irq(&phba->hbalock);
-	/* Retrieve everything on txq */
-	list_splice_init(&pring->txq, &txq);
-
-	/* Retrieve everything on the txcmplq */
-	list_splice_init(&pring->txcmplq, &txcmplq);
-
 	/* Indicate the I/O queues are flushed */
 	phba->hba_flag |= HBA_FCP_IOQ_FLUSH;
 	spin_unlock_irq(&phba->hbalock);
 
-	/* Flush the txq */
-	lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_DOWN);
+	/* Look on all the FCP Rings for the iotag */
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
 
-	/* Flush the txcmpq */
-	lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_DOWN);
+			spin_lock_irq(&pring->ring_lock);
+			/* Retrieve everything on txq */
+			list_splice_init(&pring->txq, &txq);
+			/* Retrieve everything on the txcmplq */
+			list_splice_init(&pring->txcmplq, &txcmplq);
+			pring->txq_cnt = 0;
+			pring->txcmplq_cnt = 0;
+			spin_unlock_irq(&pring->ring_lock);
+
+			/* Flush the txq */
+			lpfc_sli_cancel_iocbs(phba, &txq,
+					      IOSTAT_LOCAL_REJECT,
+					      IOERR_SLI_DOWN);
+			/* Flush the txcmpq */
+			lpfc_sli_cancel_iocbs(phba, &txcmplq,
+					      IOSTAT_LOCAL_REJECT,
+					      IOERR_SLI_DOWN);
+		}
+	} else {
+		pring = &psli->ring[psli->fcp_ring];
+
+		spin_lock_irq(&phba->hbalock);
+		/* Retrieve everything on txq */
+		list_splice_init(&pring->txq, &txq);
+		/* Retrieve everything on the txcmplq */
+		list_splice_init(&pring->txcmplq, &txcmplq);
+		pring->txq_cnt = 0;
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&phba->hbalock);
+
+		/* Flush the txq */
+		lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+		/* Flush the txcmpq */
+		lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+	}
 }
 
 /**
@@ -3966,12 +4057,13 @@
 {
 	struct lpfc_sli *psli = &phba->sli;
 	uint16_t cfg_value;
-	int rc;
+	int rc = 0;
 
 	/* Reset HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"0295 Reset HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			"0295 Reset HBA Data: x%x x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag,
+			phba->hba_flag);
 
 	/* perform board reset */
 	phba->fc_eventTag = 0;
@@ -3984,6 +4076,12 @@
 	phba->fcf.fcf_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */
+	if (phba->hba_flag & HBA_FW_DUMP_OP) {
+		phba->hba_flag &= ~HBA_FW_DUMP_OP;
+		return rc;
+	}
+
 	/* Now physically reset the device */
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 			"0389 Performing PCI function reset!\n");
@@ -4981,7 +5079,7 @@
 		} while (++fcp_eqidx < phba->cfg_fcp_io_channel);
 	}
 
-	if (phba->cfg_EnableXLane)
+	if (phba->cfg_fof)
 		lpfc_sli4_cq_release(phba->sli4_hba.oas_cq, LPFC_QUEUE_REARM);
 
 	if (phba->sli4_hba.hba_eq) {
@@ -6701,7 +6799,6 @@
 	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
 	MAILBOX_t *mb = &pmbox->u.mb;
 	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
 
 	/* If the mailbox completed, process the completion and return */
 	if (lpfc_sli4_process_missed_mbox_completions(phba))
@@ -6743,8 +6840,7 @@
 	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 			"0345 Resetting board due to mailbox timeout\n");
@@ -8112,6 +8208,7 @@
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
 	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
+	wqe->generic.wqe_com.word10 = 0;
 	/* words0-2 bpl convert bde */
 	if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
 		numBdes = iocbq->iocb.un.genreq64.bdl.bdeSize /
@@ -8618,8 +8715,7 @@
 
 	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
 	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag &
-			LPFC_IO_OAS))) {
+		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) {
 			wq = phba->sli4_hba.fcp_wq[piocb->fcp_wqidx];
 		} else {
 			wq = phba->sli4_hba.oas_wq;
@@ -8714,7 +8810,7 @@
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		if (piocb->iocb_flag &  LPFC_IO_FCP) {
-			if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag &
+			if (!phba->cfg_fof || (!(piocb->iocb_flag &
 				LPFC_IO_OAS))) {
 				if (unlikely(!phba->sli4_hba.fcp_wq))
 					return IOCB_ERROR;
@@ -9149,6 +9245,7 @@
 		pring->sli.sli3.next_cmdidx  = 0;
 		pring->sli.sli3.local_getidx = 0;
 		pring->sli.sli3.cmdidx = 0;
+		pring->flag = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -9784,43 +9881,6 @@
 }
 
 /**
- * lpfc_sli_iocb_ring_abort - Unconditionally abort all iocbs on an iocb ring
- * @phba: Pointer to HBA context object.
- * @pring: Pointer to driver SLI ring object.
- *
- * This function aborts all iocbs in the given ring and frees all the iocb
- * objects in txq. This function issues abort iocbs unconditionally for all
- * the iocb commands in txcmplq. The iocbs in the txcmplq is not guaranteed
- * to complete before the return of this function. The caller is not required
- * to hold any locks.
- **/
-static void
-lpfc_sli_iocb_ring_abort(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
-{
-	LIST_HEAD(completions);
-	struct lpfc_iocbq *iocb, *next_iocb;
-
-	if (pring->ringno == LPFC_ELS_RING)
-		lpfc_fabric_abort_hba(phba);
-
-	spin_lock_irq(&phba->hbalock);
-
-	/* Take off all the iocbs on txq for cancelling */
-	list_splice_init(&pring->txq, &completions);
-	pring->txq_cnt = 0;
-
-	/* Next issue ABTS for everything on the txcmplq */
-	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-		lpfc_sli_abort_iotag_issue(phba, pring, iocb);
-
-	spin_unlock_irq(&phba->hbalock);
-
-	/* Cancel all the IOCBs from the completions list */
-	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_ABORTED);
-}
-
-/**
  * lpfc_sli_hba_iocb_abort - Abort all iocbs to an hba.
  * @phba: pointer to lpfc HBA data structure.
  *
@@ -9835,7 +9895,7 @@
 
 	for (i = 0; i < psli->num_rings; i++) {
 		pring = &psli->ring[i];
-		lpfc_sli_iocb_ring_abort(phba, pring);
+		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
 
@@ -10060,6 +10120,124 @@
 }
 
 /**
+ * lpfc_sli_abort_taskmgmt - issue abort for all commands on a host/target/LUN
+ * @vport: Pointer to virtual port.
+ * @pring: Pointer to driver SLI ring object.
+ * @tgt_id: SCSI ID of the target.
+ * @lun_id: LUN ID of the scsi device.
+ * @taskmgmt_cmd: LPFC_CTX_LUN/LPFC_CTX_TGT/LPFC_CTX_HOST.
+ *
+ * This function sends an abort command for every SCSI command
+ * associated with the given virtual port pending on the ring
+ * filtered by lpfc_sli_validate_fcp_iocb function.
+ * When taskmgmt_cmd == LPFC_CTX_LUN, the function sends abort only to the
+ * FCP iocbs associated with lun specified by tgt_id and lun_id
+ * parameters
+ * When taskmgmt_cmd == LPFC_CTX_TGT, the function sends abort only to the
+ * FCP iocbs associated with SCSI target specified by tgt_id parameter.
+ * When taskmgmt_cmd == LPFC_CTX_HOST, the function sends abort to all
+ * FCP iocbs associated with virtual port.
+ * This function returns number of iocbs it aborted .
+ * This function is called with no locks held right after a taskmgmt
+ * command is sent.
+ **/
+int
+lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
+			uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *abtsiocbq;
+	struct lpfc_iocbq *iocbq;
+	IOCB_t *icmd;
+	int sum, i, ret_val;
+	unsigned long iflags;
+	struct lpfc_sli_ring *pring_s4;
+	uint32_t ring_number;
+
+	spin_lock_irq(&phba->hbalock);
+
+	/* all I/Os are in process of being flushed */
+	if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
+		spin_unlock_irq(&phba->hbalock);
+		return 0;
+	}
+	sum = 0;
+
+	for (i = 1; i <= phba->sli.last_iotag; i++) {
+		iocbq = phba->sli.iocbq_lookup[i];
+
+		if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id,
+					       cmd) != 0)
+			continue;
+
+		/*
+		 * If the iocbq is already being aborted, don't take a second
+		 * action, but do count it.
+		 */
+		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+			continue;
+
+		/* issue ABTS for this IOCB based on iotag */
+		abtsiocbq = __lpfc_sli_get_iocbq(phba);
+		if (abtsiocbq == NULL)
+			continue;
+
+		icmd = &iocbq->iocb;
+		abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
+		abtsiocbq->iocb.un.acxri.abortContextTag = icmd->ulpContext;
+		if (phba->sli_rev == LPFC_SLI_REV4)
+			abtsiocbq->iocb.un.acxri.abortIoTag =
+							 iocbq->sli4_xritag;
+		else
+			abtsiocbq->iocb.un.acxri.abortIoTag = icmd->ulpIoTag;
+		abtsiocbq->iocb.ulpLe = 1;
+		abtsiocbq->iocb.ulpClass = icmd->ulpClass;
+		abtsiocbq->vport = vport;
+
+		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+		abtsiocbq->fcp_wqidx = iocbq->fcp_wqidx;
+		if (iocbq->iocb_flag & LPFC_IO_FCP)
+			abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+		if (lpfc_is_link_up(phba))
+			abtsiocbq->iocb.ulpCommand = CMD_ABORT_XRI_CN;
+		else
+			abtsiocbq->iocb.ulpCommand = CMD_CLOSE_XRI_CN;
+
+		/* Setup callback routine and issue the command. */
+		abtsiocbq->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
+
+		/*
+		 * Indicate the IO is being aborted by the driver and set
+		 * the caller's flag into the aborted IO.
+		 */
+		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+		if (phba->sli_rev == LPFC_SLI_REV4) {
+			ring_number = MAX_SLI3_CONFIGURED_RINGS +
+					 iocbq->fcp_wqidx;
+			pring_s4 = &phba->sli.ring[ring_number];
+			/* Note: both hbalock and ring_lock must be set here */
+			spin_lock_irqsave(&pring_s4->ring_lock, iflags);
+			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
+							abtsiocbq, 0);
+			spin_unlock_irqrestore(&pring_s4->ring_lock, iflags);
+		} else {
+			ret_val = __lpfc_sli_issue_iocb(phba, pring->ringno,
+							abtsiocbq, 0);
+		}
+
+
+		if (ret_val == IOCB_ERROR)
+			__lpfc_sli_release_iocbq(phba, abtsiocbq);
+		else
+			sum++;
+	}
+	spin_unlock_irq(&phba->hbalock);
+	return sum;
+}
+
+/**
  * lpfc_sli_wake_iocb_wait - lpfc_sli_issue_iocb_wait's completion handler
  * @phba: Pointer to HBA context object.
  * @cmdiocbq: Pointer to command iocb.

diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 6f04080..edb4883 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 9b8cda8..7f50aa0 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index e32cbec..41675c1 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.45"
+#define LPFC_DRIVER_VERSION "10.2.8001.0."
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
@@ -30,4 +30,4 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
 		LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright(c) 2004-2013 Emulex.  All rights reserved."
+#define LPFC_COPYRIGHT "Copyright(c) 2004-2014 Emulex.  All rights reserved."

diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index f5cdc68..6a039eb 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c

@@ -25,10 +25,6 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: mac_NCR5380.c,v $
- */
-
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/ctype.h>
@@ -58,12 +54,6 @@
 
 #include "NCR5380.h"
 
-#if 0
-#define NDEBUG (NDEBUG_INTR | NDEBUG_PSEUDO_DMA | NDEBUG_ARBITRATION | NDEBUG_SELECTION | NDEBUG_RESELECTION)
-#else
-#define NDEBUG (NDEBUG_ABORT)
-#endif
-
 #define RESET_BOOT
 #define DRIVER_SETUP
 

diff --git a/drivers/scsi/mac_scsi.h b/drivers/scsi/mac_scsi.h
index 7dc62fc..06969b0 100644
--- a/drivers/scsi/mac_scsi.h
+++ b/drivers/scsi/mac_scsi.h

@@ -22,10 +22,6 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: cumana_NCR5380.h,v $
- */
-
 #ifndef MAC_NCR5380_H
 #define MAC_NCR5380_H
 
@@ -51,8 +47,6 @@
 
 #include <scsi/scsicam.h>
 
-#ifndef HOSTS_C
-
 #define NCR5380_implementation_fields \
     int port, ctrl
 
@@ -75,10 +69,6 @@
 #define NCR5380_show_info macscsi_show_info
 #define NCR5380_write_info macscsi_write_info
 
-#define BOARD_NORMAL	0
-#define BOARD_NCR53C400	1
-
-#endif /* ndef HOSTS_C */
 #endif /* ndef ASM */
 #endif /* MAC_NCR5380_H */
 

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index d84d02c..112799b 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c

@@ -3061,7 +3061,8 @@
 	u32 cur_state;
 	u32 abs_state, curr_abs_state;
 
-	fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK;
+	abs_state = instance->instancet->read_fw_status_reg(instance->reg_set);
+	fw_state = abs_state & MFI_STATE_MASK;
 
 	if (fw_state != MFI_STATE_READY)
 		printk(KERN_INFO "megasas: Waiting for FW to come to ready"
@@ -3069,9 +3070,6 @@
 
 	while (fw_state != MFI_STATE_READY) {
 
-		abs_state =
-		instance->instancet->read_fw_status_reg(instance->reg_set);
-
 		switch (fw_state) {
 
 		case MFI_STATE_FAULT:
@@ -3223,10 +3221,8 @@
 		 * The cur_state should not last for more than max_wait secs
 		 */
 		for (i = 0; i < (max_wait * 1000); i++) {
-			fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) &
-					MFI_STATE_MASK ;
-		curr_abs_state =
-		instance->instancet->read_fw_status_reg(instance->reg_set);
+			curr_abs_state = instance->instancet->
+				read_fw_status_reg(instance->reg_set);
 
 			if (abs_state == curr_abs_state) {
 				msleep(1);
@@ -3242,6 +3238,9 @@
 			       "in %d secs\n", fw_state, max_wait);
 			return -ENODEV;
 		}
+
+		abs_state = curr_abs_state;
+		fw_state = curr_abs_state & MFI_STATE_MASK;
 	}
 	printk(KERN_INFO "megasas: FW now in Ready state\n");
 

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index bde63f7..8b88118 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c

@@ -1739,14 +1739,14 @@
 			list_for_each_entry_safe(chain_req, next,
 			    &ioc->scsi_lookup[i].chain_list, tracker_list) {
 				list_del_init(&chain_req->tracker_list);
-				list_add_tail(&chain_req->tracker_list,
+				list_add(&chain_req->tracker_list,
 				    &ioc->free_chain_list);
 			}
 		}
 		ioc->scsi_lookup[i].cb_idx = 0xFF;
 		ioc->scsi_lookup[i].scmd = NULL;
 		ioc->scsi_lookup[i].direct_io = 0;
-		list_add_tail(&ioc->scsi_lookup[i].tracker_list,
+		list_add(&ioc->scsi_lookup[i].tracker_list,
 		    &ioc->free_list);
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 
@@ -1764,13 +1764,13 @@
 		/* hi-priority */
 		i = smid - ioc->hi_priority_smid;
 		ioc->hpr_lookup[i].cb_idx = 0xFF;
-		list_add_tail(&ioc->hpr_lookup[i].tracker_list,
+		list_add(&ioc->hpr_lookup[i].tracker_list,
 		    &ioc->hpr_free_list);
 	} else if (smid <= ioc->hba_queue_depth) {
 		/* internal queue */
 		i = smid - ioc->internal_smid;
 		ioc->internal_lookup[i].cb_idx = 0xFF;
-		list_add_tail(&ioc->internal_lookup[i].tracker_list,
+		list_add(&ioc->internal_lookup[i].tracker_list,
 		    &ioc->internal_free_list);
 	}
 	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 1f2ac3a..fd3b998 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h

@@ -1065,7 +1065,7 @@
     u32 reply);
 int mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle,
 	uint channel, uint id, uint lun, u8 type, u16 smid_task,
-	ulong timeout, unsigned long serial_number, enum mutex_type m_type);
+	ulong timeout, enum mutex_type m_type);
 void mpt2sas_scsih_set_tm_flag(struct MPT2SAS_ADAPTER *ioc, u16 handle);
 void mpt2sas_scsih_clear_tm_flag(struct MPT2SAS_ADAPTER *ioc, u16 handle);
 void mpt2sas_expander_remove(struct MPT2SAS_ADAPTER *ioc, u64 sas_address);

diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index b7f887c..62df8f9 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c

@@ -987,7 +987,7 @@
 			mpt2sas_scsih_issue_tm(ioc,
 			    le16_to_cpu(mpi_request->FunctionDependent1), 0, 0,
 			    0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, 10,
-			    0, TM_MUTEX_ON);
+			    TM_MUTEX_ON);
 			ioc->tm_cmds.status = MPT2_CMD_NOT_USED;
 		} else
 			mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP,

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 6fd7d40..5055f92 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c

@@ -2368,7 +2368,6 @@
  * @type: MPI2_SCSITASKMGMT_TASKTYPE__XXX (defined in mpi2_init.h)
  * @smid_task: smid assigned to the task
  * @timeout: timeout in seconds
- * @serial_number: the serial_number from scmd
  * @m_type: TM_MUTEX_ON or TM_MUTEX_OFF
  * Context: user
  *
@@ -2381,7 +2380,7 @@
 int
 mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel,
     uint id, uint lun, u8 type, u16 smid_task, ulong timeout,
-	unsigned long serial_number, enum mutex_type m_type)
+	enum mutex_type m_type)
 {
 	Mpi2SCSITaskManagementRequest_t *mpi_request;
 	Mpi2SCSITaskManagementReply_t *mpi_reply;
@@ -2634,8 +2633,7 @@
 	handle = sas_device_priv_data->sas_target->handle;
 	r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, scmd->device->lun,
-	    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30,
-	    scmd->serial_number, TM_MUTEX_ON);
+	    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30, TM_MUTEX_ON);
 
  out:
 	sdev_printk(KERN_INFO, scmd->device, "task abort: %s scmd(%p)\n",
@@ -2696,8 +2694,7 @@
 
 	r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, scmd->device->lun,
-	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, 0,
-	    TM_MUTEX_ON);
+	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, TM_MUTEX_ON);
 
  out:
 	sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n",
@@ -2757,7 +2754,7 @@
 
 	r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0,
-	    30, 0, TM_MUTEX_ON);
+	    30, TM_MUTEX_ON);
 
  out:
 	starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n",
@@ -3953,9 +3950,9 @@
  * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
  */
 static int
-_scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
+_scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 {
-	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
 	struct MPT2SAS_TARGET *sas_target_priv_data;
 	struct _raid_device *raid_device;
@@ -3963,7 +3960,6 @@
 	u32 mpi_control;
 	u16 smid;
 
-	scmd->scsi_done = done;
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 		scmd->result = DID_NO_CONNECT << 16;
@@ -4039,7 +4035,7 @@
 	    MPT_TARGET_FLAGS_RAID_COMPONENT)
 		mpi_request->Function = MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH;
 	else
-		mpi_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
+	mpi_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 	mpi_request->DevHandle =
 	    cpu_to_le16(sas_device_priv_data->sas_target->handle);
 	mpi_request->DataLength = cpu_to_le32(scsi_bufflen(scmd));
@@ -4083,8 +4079,6 @@
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
 
-static DEF_SCSI_QCMD(_scsih_qcmd)
-
 /**
  * _scsih_normalize_sense - normalize descriptor and fixed format sense data
  * @sense_buffer: sense data returned by target
@@ -5880,7 +5874,7 @@
 
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 		r = mpt2sas_scsih_issue_tm(ioc, handle, 0, 0, lun,
-		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30, 0,
+		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30,
 		    TM_MUTEX_OFF);
 		if (r == FAILED) {
 			sdev_printk(KERN_WARNING, sdev,
@@ -5922,7 +5916,7 @@
 
 		r = mpt2sas_scsih_issue_tm(ioc, handle, sdev->channel, sdev->id,
 		    sdev->lun, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30,
-		    scmd->serial_number, TM_MUTEX_OFF);
+		    TM_MUTEX_OFF);
 		if (r == FAILED) {
 			sdev_printk(KERN_WARNING, sdev,
 			    "mpt2sas_scsih_issue_tm: ABORT_TASK: FAILED : "

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 0ebf5d9..9b90a6f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h

@@ -993,7 +993,7 @@
 
 int mpt3sas_scsih_issue_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 	uint channel, uint id, uint lun, u8 type, u16 smid_task,
-	ulong timeout, unsigned long serial_number,  enum mutex_type m_type);
+	ulong timeout, enum mutex_type m_type);
 void mpt3sas_scsih_set_tm_flag(struct MPT3SAS_ADAPTER *ioc, u16 handle);
 void mpt3sas_scsih_clear_tm_flag(struct MPT3SAS_ADAPTER *ioc, u16 handle);
 void mpt3sas_expander_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address);

diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 9b89de1..ba9cbe5 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c

@@ -980,7 +980,7 @@
 			mpt3sas_scsih_issue_tm(ioc,
 			    le16_to_cpu(mpi_request->FunctionDependent1), 0, 0,
 			    0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, 30,
-			    0, TM_MUTEX_ON);
+			    TM_MUTEX_ON);
 		} else
 			mpt3sas_base_hard_reset_handler(ioc, CAN_SLEEP,
 			    FORCE_BIG_HAMMER);

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index a961fe1..18e713d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c

@@ -2029,7 +2029,6 @@
  * @type: MPI2_SCSITASKMGMT_TASKTYPE__XXX (defined in mpi2_init.h)
  * @smid_task: smid assigned to the task
  * @timeout: timeout in seconds
- * @serial_number: the serial_number from scmd
  * @m_type: TM_MUTEX_ON or TM_MUTEX_OFF
  * Context: user
  *
@@ -2042,7 +2041,7 @@
 int
 mpt3sas_scsih_issue_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, uint channel,
 	uint id, uint lun, u8 type, u16 smid_task, ulong timeout,
-	unsigned long serial_number, enum mutex_type m_type)
+	enum mutex_type m_type)
 {
 	Mpi2SCSITaskManagementRequest_t *mpi_request;
 	Mpi2SCSITaskManagementReply_t *mpi_reply;
@@ -2293,8 +2292,7 @@
 	handle = sas_device_priv_data->sas_target->handle;
 	r = mpt3sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, scmd->device->lun,
-	    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30,
-	    scmd->serial_number, TM_MUTEX_ON);
+	    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30, TM_MUTEX_ON);
 
  out:
 	sdev_printk(KERN_INFO, scmd->device, "task abort: %s scmd(%p)\n",
@@ -2353,8 +2351,7 @@
 
 	r = mpt3sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, scmd->device->lun,
-	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, 0,
-	    TM_MUTEX_ON);
+	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, TM_MUTEX_ON);
 
  out:
 	sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n",
@@ -2414,7 +2411,7 @@
 
 	r = mpt3sas_scsih_issue_tm(ioc, handle, scmd->device->channel,
 	    scmd->device->id, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0,
-	    30, 0, TM_MUTEX_ON);
+	    30, TM_MUTEX_ON);
 
  out:
 	starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n",
@@ -3518,7 +3515,7 @@
 
 
 /**
- * _scsih_qcmd_lck - main scsi request entry point
+ * _scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
  * @done: function pointer to be invoked on completion
  *
@@ -3529,9 +3526,9 @@
  * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
  */
 static int
-_scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
+_scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 {
-	struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
+	struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
 	struct MPT3SAS_DEVICE *sas_device_priv_data;
 	struct MPT3SAS_TARGET *sas_target_priv_data;
 	Mpi2SCSIIORequest_t *mpi_request;
@@ -3544,7 +3541,6 @@
 		scsi_print_command(scmd);
 #endif
 
-	scmd->scsi_done = done;
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 		scmd->result = DID_NO_CONNECT << 16;
@@ -3659,8 +3655,6 @@
  out:
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
-static DEF_SCSI_QCMD(_scsih_qcmd)
-
 
 /**
  * _scsih_normalize_sense - normalize descriptor and fixed format sense data
@@ -5425,7 +5419,7 @@
 
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 		r = mpt3sas_scsih_issue_tm(ioc, handle, 0, 0, lun,
-		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30, 0,
+		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30,
 		    TM_MUTEX_OFF);
 		if (r == FAILED) {
 			sdev_printk(KERN_WARNING, sdev,
@@ -5467,7 +5461,7 @@
 
 		r = mpt3sas_scsih_issue_tm(ioc, handle, sdev->channel, sdev->id,
 		    sdev->lun, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30,
-		    scmd->serial_number, TM_MUTEX_OFF);
+		    TM_MUTEX_OFF);
 		if (r == FAILED) {
 			sdev_printk(KERN_WARNING, sdev,
 			    "mpt3sas_scsih_issue_tm: ABORT_TASK: FAILED : "

diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 5ff978b..eacee48 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c

@@ -728,6 +728,15 @@
 		.class_mask	= 0,
 		.driver_data	= chip_9485,
 	},
+	{
+		.vendor		= PCI_VENDOR_ID_MARVELL_EXT,
+		.device		= 0x9485,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x9485,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_9485,
+	},
 	{ PCI_VDEVICE(OCZ, 0x1021), chip_9485}, /* OCZ RevoDrive3 */
 	{ PCI_VDEVICE(OCZ, 0x1022), chip_9485}, /* OCZ RevoDrive3/zDriveR4 (exact model unknown) */
 	{ PCI_VDEVICE(OCZ, 0x1040), chip_9485}, /* OCZ RevoDrive3/zDriveR4 (exact model unknown) */

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index bac04c2..5f4cbf0 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c

@@ -1570,6 +1570,7 @@
 		if (unlikely(!req))
 			return ERR_PTR(-ENOMEM);
 
+		blk_rq_set_block_pc(req);
 		return req;
 	}
 }
@@ -1590,7 +1591,6 @@
 	}
 
 	or->request = req;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 
 	req->timeout = or->timeout;
@@ -1608,7 +1608,7 @@
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			req->cmd_type = REQ_TYPE_BLOCK_PC;
+			blk_rq_set_block_pc(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)

diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 21883a2..0727ea7 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c

@@ -365,7 +365,7 @@
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	SRpnt->bio = NULL;

diff --git a/drivers/scsi/pas16.h b/drivers/scsi/pas16.h
index 3721342..aa528f5 100644
--- a/drivers/scsi/pas16.h
+++ b/drivers/scsi/pas16.h

@@ -129,8 +129,6 @@
 #define CAN_QUEUE 32 
 #endif
 
-#ifndef HOSTS_C
-
 #define NCR5380_implementation_fields \
     volatile unsigned short io_port
 
@@ -171,6 +169,5 @@
    
 #define PAS16_IRQS 0xd4a8 
 
-#endif /* else def HOSTS_C */
 #endif /* ndef ASM */
 #endif /* PAS16_H */

diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c
index 28b4e81..a368d77 100644
--- a/drivers/scsi/pm8001/pm8001_ctl.c
+++ b/drivers/scsi/pm8001/pm8001_ctl.c

@@ -395,6 +395,8 @@
 	payload.offset = 0;
 	payload.length = 4096;
 	payload.func_specific = kzalloc(4096, GFP_KERNEL);
+	if (!payload.func_specific)
+		return -ENOMEM;
 	PM8001_CHIP_DISP->get_nvmd_req(pm8001_ha, &payload);
 	wait_for_completion(&completion);
 	virt_addr = pm8001_ha->memoryMap.region[NVMD].virt_ptr;
@@ -402,6 +404,7 @@
 		bios_index++)
 		str += sprintf(str, "%c",
 			*((u8 *)((u8 *)virt_addr+bios_index)));
+	kfree(payload.func_specific);
 	return str - buf;
 }
 static DEVICE_ATTR(bios_version, S_IRUGO, pm8001_ctl_bios_version_show, NULL);
@@ -729,7 +732,7 @@
 			flash_error_table[i].reason);
 }
 
-static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUGO,
+static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUSR|S_IWGRP,
 	pm8001_show_update_fw, pm8001_store_update_fw);
 struct device_attribute *pm8001_host_attrs[] = {
 	&dev_attr_interface_rev,

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 07befcf..16fe519 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -664,7 +664,7 @@
 		}
 
 		rval = qla2x00_read_sfp(vha, ha->sfp_data_dma, ha->sfp_data,
-		    addr, offset, SFP_BLOCK_SIZE, 0);
+		    addr, offset, SFP_BLOCK_SIZE, BIT_1);
 		if (rval != QLA_SUCCESS) {
 			ql_log(ql_log_warn, vha, 0x706d,
 			    "Unable to read SFP data (%x/%x/%x).\n", rval,
@@ -1495,7 +1495,7 @@
 
 	if (!ha->fw_dumped)
 		size = 0;
-	else if (IS_QLA82XX(ha))
+	else if (IS_P3P_TYPE(ha))
 		size = ha->md_template_size + ha->md_dump_size;
 	else
 		size = ha->fw_dump_len;

diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 71ff340..524f9eb 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -2054,9 +2054,49 @@
 		bsg_job->reply->reply_payload_rcv_len = sizeof(sr);
 		break;
 	default:
-		ql_log(ql_log_warn, vha, 0x708c,
+		ql_dbg(ql_dbg_user, vha, 0x708c,
 		    "Unknown serdes cmd %x.\n", sr.cmd);
-		rval = -EDOM;
+		rval = -EINVAL;
+		break;
+	}
+
+	bsg_job->reply->reply_data.vendor_reply.vendor_rsp[0] =
+	    rval ? EXT_STATUS_MAILBOX : 0;
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	bsg_job->reply->result = DID_OK << 16;
+	bsg_job->job_done(bsg_job);
+	return 0;
+}
+
+static int
+qla8044_serdes_op(struct fc_bsg_job *bsg_job)
+{
+	struct Scsi_Host *host = bsg_job->shost;
+	scsi_qla_host_t *vha = shost_priv(host);
+	int rval = 0;
+	struct qla_serdes_reg_ex sr;
+
+	memset(&sr, 0, sizeof(sr));
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &sr, sizeof(sr));
+
+	switch (sr.cmd) {
+	case INT_SC_SERDES_WRITE_REG:
+		rval = qla8044_write_serdes_word(vha, sr.addr, sr.val);
+		bsg_job->reply->reply_payload_rcv_len = 0;
+		break;
+	case INT_SC_SERDES_READ_REG:
+		rval = qla8044_read_serdes_word(vha, sr.addr, &sr.val);
+		sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+		    bsg_job->reply_payload.sg_cnt, &sr, sizeof(sr));
+		bsg_job->reply->reply_payload_rcv_len = sizeof(sr);
+		break;
+	default:
+		ql_dbg(ql_dbg_user, vha, 0x70cf,
+		    "Unknown serdes cmd %x.\n", sr.cmd);
+		rval = -EINVAL;
 		break;
 	}
 
@@ -2121,6 +2161,9 @@
 	case QL_VND_SERDES_OP:
 		return qla26xx_serdes_op(bsg_job);
 
+	case QL_VND_SERDES_OP_EX:
+		return qla8044_serdes_op(bsg_job);
+
 	default:
 		return -ENOSYS;
 	}

diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h
index e5c2126..d38f9ef 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.h
+++ b/drivers/scsi/qla2xxx/qla_bsg.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -24,6 +24,7 @@
 #define QL_VND_READ_I2C		0x11
 #define QL_VND_FX00_MGMT_CMD	0x12
 #define QL_VND_SERDES_OP	0x13
+#define	QL_VND_SERDES_OP_EX	0x14
 
 /* BSG Vendor specific subcode returns */
 #define EXT_STATUS_OK			0
@@ -225,4 +226,10 @@
 	uint16_t val;
 } __packed;
 
+struct qla_serdes_reg_ex {
+	uint16_t cmd;
+	uint32_t addr;
+	uint32_t val;
+} __packed;
+
 #endif

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 97255f7..c72ee97b 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -15,7 +15,7 @@
  * |                              |                    | 0x0144,0x0146	|
  * |                              |                    | 0x015b-0x0160	|
  * |                              |                    | 0x016e-0x0170	|
- * | Mailbox commands             |       0x1187       | 0x1018-0x1019	|
+ * | Mailbox commands             |       0x118d       | 0x1018-0x1019	|
  * |                              |                    | 0x10ca         |
  * |                              |                    | 0x1115-0x1116  |
  * |                              |                    | 0x111a-0x111b	|
@@ -45,12 +45,16 @@
  * |                              |                    | 0x70ad-0x70ae  |
  * |                              |                    | 0x70d7-0x70db  |
  * |                              |                    | 0x70de-0x70df  |
- * | Task Management              |       0x803d       | 0x8025-0x8026  |
- * |                              |                    | 0x800b,0x8039  |
+ * | Task Management              |       0x803d       | 0x8000,0x800b  |
+ * |                              |                    | 0x8019         |
+ * |                              |                    | 0x8025,0x8026  |
+ * |                              |                    | 0x8031,0x8032  |
+ * |                              |                    | 0x8039,0x803c  |
  * | AER/EEH                      |       0x9011       |		|
  * | Virtual Port                 |       0xa007       |		|
- * | ISP82XX Specific             |       0xb14c       | 0xb002,0xb024  |
+ * | ISP82XX Specific             |       0xb157       | 0xb002,0xb024  |
  * |                              |                    | 0xb09e,0xb0ae  |
+ * |				  |		       | 0xb0c3,0xb0c6  |
  * |                              |                    | 0xb0e0-0xb0ef  |
  * |                              |                    | 0xb085,0xb0dc  |
  * |                              |                    | 0xb107,0xb108  |
@@ -60,12 +64,12 @@
  * |                              |                    | 0xb13c-0xb140  |
  * |                              |                    | 0xb149		|
  * | MultiQ                       |       0xc00c       |		|
- * | Misc                         |       0xd2ff       | 0xd017-0xd019	|
+ * | Misc                         |       0xd212       | 0xd017-0xd019	|
  * |                              |                    | 0xd020		|
- * |                              |                    | 0xd02e-0xd0ff	|
+ * |                              |                    | 0xd030-0xd0ff	|
  * |                              |                    | 0xd101-0xd1fe	|
- * |                              |                    | 0xd212-0xd2fe	|
- * | Target Mode		  |	  0xe070       | 0xe021		|
+ * |                              |                    | 0xd213-0xd2fe	|
+ * | Target Mode		  |	  0xe078       |		|
  * | Target Mode Management	  |	  0xf072       | 0xf002-0xf003	|
  * |                              |                    | 0xf046-0xf049  |
  * | Target Mode Task Management  |	  0x1000b      |		|
@@ -277,9 +281,15 @@
 	if (rval != QLA_SUCCESS)
 		return rval;
 
+	set_bit(RISC_SRAM_DUMP_CMPL, &ha->fw_dump_cap_flags);
+
 	/* External Memory. */
-	return qla24xx_dump_ram(ha, 0x100000, *nxt,
+	rval = qla24xx_dump_ram(ha, 0x100000, *nxt,
 	    ha->fw_memory_size - 0x100000 + 1, nxt);
+	if (rval == QLA_SUCCESS)
+		set_bit(RISC_EXT_MEM_DUMP_CMPL, &ha->fw_dump_cap_flags);
+
+	return rval;
 }
 
 static uint32_t *
@@ -296,23 +306,15 @@
 	return buf;
 }
 
-int
-qla24xx_pause_risc(struct device_reg_24xx __iomem *reg)
+void
+qla24xx_pause_risc(struct device_reg_24xx __iomem *reg, struct qla_hw_data *ha)
 {
-	int rval = QLA_SUCCESS;
-	uint32_t cnt;
-
 	WRT_REG_DWORD(&reg->hccr, HCCRX_SET_RISC_PAUSE);
-	for (cnt = 30000;
-	    ((RD_REG_DWORD(&reg->host_status) & HSRX_RISC_PAUSED) == 0) &&
-	    rval == QLA_SUCCESS; cnt--) {
-		if (cnt)
-			udelay(100);
-		else
-			rval = QLA_FUNCTION_TIMEOUT;
-	}
 
-	return rval;
+	/* 100 usec delay is sufficient enough for hardware to pause RISC */
+	udelay(100);
+	if (RD_REG_DWORD(&reg->host_status) & HSRX_RISC_PAUSED)
+		set_bit(RISC_PAUSE_CMPL, &ha->fw_dump_cap_flags);
 }
 
 int
@@ -320,10 +322,14 @@
 {
 	int rval = QLA_SUCCESS;
 	uint32_t cnt;
-	uint16_t mb0, wd;
+	uint16_t wd;
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
 
-	/* Reset RISC. */
+	/*
+	 * Reset RISC. The delay is dependent on system architecture.
+	 * Driver can proceed with the reset sequence after waiting
+	 * for a timeout period.
+	 */
 	WRT_REG_DWORD(&reg->ctrl_status, CSRX_DMA_SHUTDOWN|MWB_4096_BYTES);
 	for (cnt = 0; cnt < 30000; cnt++) {
 		if ((RD_REG_DWORD(&reg->ctrl_status) & CSRX_DMA_ACTIVE) == 0)
@@ -331,19 +337,14 @@
 
 		udelay(10);
 	}
+	if (!(RD_REG_DWORD(&reg->ctrl_status) & CSRX_DMA_ACTIVE))
+		set_bit(DMA_SHUTDOWN_CMPL, &ha->fw_dump_cap_flags);
 
 	WRT_REG_DWORD(&reg->ctrl_status,
 	    CSRX_ISP_SOFT_RESET|CSRX_DMA_SHUTDOWN|MWB_4096_BYTES);
 	pci_read_config_word(ha->pdev, PCI_COMMAND, &wd);
 
 	udelay(100);
-	/* Wait for firmware to complete NVRAM accesses. */
-	mb0 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
-	for (cnt = 10000 ; cnt && mb0; cnt--) {
-		udelay(5);
-		mb0 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
-		barrier();
-	}
 
 	/* Wait for soft-reset to complete. */
 	for (cnt = 0; cnt < 30000; cnt++) {
@@ -353,16 +354,21 @@
 
 		udelay(10);
 	}
+	if (!(RD_REG_DWORD(&reg->ctrl_status) & CSRX_ISP_SOFT_RESET))
+		set_bit(ISP_RESET_CMPL, &ha->fw_dump_cap_flags);
+
 	WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_RESET);
 	RD_REG_DWORD(&reg->hccr);             /* PCI Posting. */
 
-	for (cnt = 30000; RD_REG_WORD(&reg->mailbox0) != 0 &&
+	for (cnt = 10000; RD_REG_WORD(&reg->mailbox0) != 0 &&
 	    rval == QLA_SUCCESS; cnt--) {
 		if (cnt)
-			udelay(100);
+			udelay(10);
 		else
 			rval = QLA_FUNCTION_TIMEOUT;
 	}
+	if (rval == QLA_SUCCESS)
+		set_bit(RISC_RDY_AFT_RESET, &ha->fw_dump_cap_flags);
 
 	return rval;
 }
@@ -659,12 +665,13 @@
 
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0xd000,
-		    "Failed to dump firmware (%x).\n", rval);
+		    "Failed to dump firmware (%x), dump status flags (0x%lx).\n",
+		    rval, ha->fw_dump_cap_flags);
 		ha->fw_dumped = 0;
 	} else {
 		ql_log(ql_log_info, vha, 0xd001,
-		    "Firmware dump saved to temp buffer (%ld/%p).\n",
-		    vha->host_no, ha->fw_dump);
+		    "Firmware dump saved to temp buffer (%ld/%p), dump status flags (0x%lx).\n",
+		    vha->host_no, ha->fw_dump, ha->fw_dump_cap_flags);
 		ha->fw_dumped = 1;
 		qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP);
 	}
@@ -1053,6 +1060,7 @@
 
 	risc_address = ext_mem_cnt = 0;
 	flags = 0;
+	ha->fw_dump_cap_flags = 0;
 
 	if (!hardware_locked)
 		spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -1075,10 +1083,11 @@
 
 	fw->host_status = htonl(RD_REG_DWORD(&reg->host_status));
 
-	/* Pause RISC. */
-	rval = qla24xx_pause_risc(reg);
-	if (rval != QLA_SUCCESS)
-		goto qla24xx_fw_dump_failed_0;
+	/*
+	 * Pause RISC. No need to track timeout, as resetting the chip
+	 * is the right approach incase of pause timeout
+	 */
+	qla24xx_pause_risc(reg, ha);
 
 	/* Host interface registers. */
 	dmp_reg = &reg->flash_addr;
@@ -1302,6 +1311,7 @@
 
 	risc_address = ext_mem_cnt = 0;
 	flags = 0;
+	ha->fw_dump_cap_flags = 0;
 
 	if (!hardware_locked)
 		spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -1325,10 +1335,11 @@
 
 	fw->host_status = htonl(RD_REG_DWORD(&reg->host_status));
 
-	/* Pause RISC. */
-	rval = qla24xx_pause_risc(reg);
-	if (rval != QLA_SUCCESS)
-		goto qla25xx_fw_dump_failed_0;
+	/*
+	 * Pause RISC. No need to track timeout, as resetting the chip
+	 * is the right approach incase of pause timeout
+	 */
+	qla24xx_pause_risc(reg, ha);
 
 	/* Host/Risc registers. */
 	iter_reg = fw->host_risc_reg;
@@ -1619,6 +1630,7 @@
 
 	risc_address = ext_mem_cnt = 0;
 	flags = 0;
+	ha->fw_dump_cap_flags = 0;
 
 	if (!hardware_locked)
 		spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -1641,10 +1653,11 @@
 
 	fw->host_status = htonl(RD_REG_DWORD(&reg->host_status));
 
-	/* Pause RISC. */
-	rval = qla24xx_pause_risc(reg);
-	if (rval != QLA_SUCCESS)
-		goto qla81xx_fw_dump_failed_0;
+	/*
+	 * Pause RISC. No need to track timeout, as resetting the chip
+	 * is the right approach incase of pause timeout
+	 */
+	qla24xx_pause_risc(reg, ha);
 
 	/* Host/Risc registers. */
 	iter_reg = fw->host_risc_reg;
@@ -1938,6 +1951,7 @@
 
 	risc_address = ext_mem_cnt = 0;
 	flags = 0;
+	ha->fw_dump_cap_flags = 0;
 
 	if (!hardware_locked)
 		spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -1959,10 +1973,11 @@
 
 	fw->host_status = htonl(RD_REG_DWORD(&reg->host_status));
 
-	/* Pause RISC. */
-	rval = qla24xx_pause_risc(reg);
-	if (rval != QLA_SUCCESS)
-		goto qla83xx_fw_dump_failed_0;
+	/*
+	 * Pause RISC. No need to track timeout, as resetting the chip
+	 * is the right approach incase of pause timeout
+	 */
+	qla24xx_pause_risc(reg, ha);
 
 	WRT_REG_DWORD(&reg->iobase_addr, 0x6000);
 	dmp_reg = &reg->iobase_window;
@@ -2385,9 +2400,11 @@
 			nxt += sizeof(fw->code_ram);
 			nxt += (ha->fw_memory_size - 0x100000 + 1);
 			goto copy_queue;
-		} else
+		} else {
+			set_bit(RISC_RDY_AFT_RESET, &ha->fw_dump_cap_flags);
 			ql_log(ql_log_warn, vha, 0xd010,
 			    "bigger hammer success?\n");
+		}
 	}
 
 	rval = qla24xx_dump_memory(ha, fw->code_ram, sizeof(fw->code_ram),

diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index cc96104..e1fc4e6 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -353,5 +353,6 @@
 	uint32_t, void **);
 extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *,
 	uint32_t, void **);
-extern int qla24xx_pause_risc(struct device_reg_24xx __iomem *);
+extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *,
+	struct qla_hw_data *);
 extern int qla24xx_soft_reset(struct qla_hw_data *);

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 6a10613..de5d0ae 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -965,6 +965,13 @@
  */
 #define MBC_WRITE_MPI_REGISTER		0x01    /* Write MPI Register. */
 
+/*
+ * ISP8044 mailbox commands
+ */
+#define MBC_SET_GET_ETH_SERDES_REG	0x150
+#define HCS_WRITE_SERDES		0x3
+#define HCS_READ_SERDES			0x4
+
 /* Firmware return data sizes */
 #define FCAL_MAP_SIZE	128
 
@@ -1622,25 +1629,35 @@
 #define PO_MODE_DIF_PASS	2
 #define PO_MODE_DIF_REPLACE	3
 #define PO_MODE_DIF_TCP_CKSUM	6
-#define PO_ENABLE_DIF_BUNDLING	BIT_8
 #define PO_ENABLE_INCR_GUARD_SEED	BIT_3
-#define PO_DISABLE_INCR_REF_TAG	BIT_5
 #define PO_DISABLE_GUARD_CHECK	BIT_4
+#define PO_DISABLE_INCR_REF_TAG	BIT_5
+#define PO_DIS_HEADER_MODE	BIT_7
+#define PO_ENABLE_DIF_BUNDLING	BIT_8
+#define PO_DIS_FRAME_MODE	BIT_9
+#define PO_DIS_VALD_APP_ESC	BIT_10 /* Dis validation for escape tag/ffffh */
+#define PO_DIS_VALD_APP_REF_ESC BIT_11
+
+#define PO_DIS_APP_TAG_REPL	BIT_12 /* disable REG Tag replacement */
+#define PO_DIS_REF_TAG_REPL	BIT_13
+#define PO_DIS_APP_TAG_VALD	BIT_14 /* disable REF Tag validation */
+#define PO_DIS_REF_TAG_VALD	BIT_15
+
 /*
  * ISP queue - 64-Bit addressing, continuation crc entry structure definition.
  */
 struct crc_context {
 	uint32_t handle;		/* System handle. */
-	uint32_t ref_tag;
-	uint16_t app_tag;
+	__le32 ref_tag;
+	__le16 app_tag;
 	uint8_t ref_tag_mask[4];	/* Validation/Replacement Mask*/
 	uint8_t app_tag_mask[2];	/* Validation/Replacement Mask*/
-	uint16_t guard_seed;		/* Initial Guard Seed */
-	uint16_t prot_opts;		/* Requested Data Protection Mode */
-	uint16_t blk_size;		/* Data size in bytes */
+	__le16 guard_seed;		/* Initial Guard Seed */
+	__le16 prot_opts;		/* Requested Data Protection Mode */
+	__le16 blk_size;		/* Data size in bytes */
 	uint16_t runt_blk_guard;	/* Guard value for runt block (tape
 					 * only) */
-	uint32_t byte_count;		/* Total byte count/ total data
+	__le32 byte_count;		/* Total byte count/ total data
 					 * transfer count */
 	union {
 		struct {
@@ -1654,10 +1671,10 @@
 			uint32_t	reserved_6;
 		} nobundling;
 		struct {
-			uint32_t	dif_byte_count;	/* Total DIF byte
+			__le32	dif_byte_count;	/* Total DIF byte
 							 * count */
 			uint16_t	reserved_1;
-			uint16_t	dseg_count;	/* Data segment count */
+			__le16	dseg_count;	/* Data segment count */
 			uint32_t	reserved_2;
 			uint32_t	data_address[2];
 			uint32_t	data_length;
@@ -1748,6 +1765,8 @@
 #define CS_PORT_CONFIG_CHG	0x2A	/* Port Configuration Changed */
 #define CS_PORT_BUSY		0x2B	/* Port Busy */
 #define CS_COMPLETE_CHKCOND	0x30	/* Error? */
+#define CS_IOCB_ERROR		0x31	/* Generic error for IOCB request
+					   failure */
 #define CS_BAD_PAYLOAD		0x80	/* Driver defined */
 #define CS_UNKNOWN		0x81	/* Driver defined */
 #define CS_RETRY		0x82	/* Driver defined */
@@ -2676,6 +2695,7 @@
 	uint32_t __iomem *rsp_q_out;
 	uint16_t  ring_index;
 	uint16_t  out_ptr;
+	uint16_t  *in_ptr;		/* queue shadow in index */
 	uint16_t  length;
 	uint16_t  options;
 	uint16_t  rid;
@@ -2702,6 +2722,7 @@
 	uint32_t __iomem *req_q_out;
 	uint16_t  ring_index;
 	uint16_t  in_ptr;
+	uint16_t  *out_ptr;		/* queue shadow out index */
 	uint16_t  cnt;
 	uint16_t  length;
 	uint16_t  options;
@@ -2907,6 +2928,8 @@
 #define PCI_DEVICE_ID_QLOGIC_ISP8031	0x8031
 #define PCI_DEVICE_ID_QLOGIC_ISP2031	0x2031
 #define PCI_DEVICE_ID_QLOGIC_ISP2071	0x2071
+#define PCI_DEVICE_ID_QLOGIC_ISP2271	0x2271
+
 	uint32_t	device_type;
 #define DT_ISP2100                      BIT_0
 #define DT_ISP2200                      BIT_1
@@ -2928,7 +2951,8 @@
 #define DT_ISPFX00			BIT_17
 #define DT_ISP8044			BIT_18
 #define DT_ISP2071			BIT_19
-#define DT_ISP_LAST			(DT_ISP2071 << 1)
+#define DT_ISP2271			BIT_20
+#define DT_ISP_LAST			(DT_ISP2271 << 1)
 
 #define DT_T10_PI                       BIT_25
 #define DT_IIDMA                        BIT_26
@@ -2959,6 +2983,7 @@
 #define IS_QLA8031(ha)	(DT_MASK(ha) & DT_ISP8031)
 #define IS_QLAFX00(ha)	(DT_MASK(ha) & DT_ISPFX00)
 #define IS_QLA2071(ha)	(DT_MASK(ha) & DT_ISP2071)
+#define IS_QLA2271(ha)	(DT_MASK(ha) & DT_ISP2271)
 
 #define IS_QLA23XX(ha)  (IS_QLA2300(ha) || IS_QLA2312(ha) || IS_QLA2322(ha) || \
 			IS_QLA6312(ha) || IS_QLA6322(ha))
@@ -2967,7 +2992,7 @@
 #define IS_QLA25XX(ha)  (IS_QLA2532(ha))
 #define IS_QLA83XX(ha)	(IS_QLA2031(ha) || IS_QLA8031(ha))
 #define IS_QLA84XX(ha)  (IS_QLA8432(ha))
-#define IS_QLA27XX(ha)  (IS_QLA2071(ha))
+#define IS_QLA27XX(ha)  (IS_QLA2071(ha) || IS_QLA2271(ha))
 #define IS_QLA24XX_TYPE(ha)     (IS_QLA24XX(ha) || IS_QLA54XX(ha) || \
 				IS_QLA84XX(ha))
 #define IS_CNA_CAPABLE(ha)	(IS_QLA81XX(ha) || IS_QLA82XX(ha) || \
@@ -3006,6 +3031,7 @@
     (((ha)->fw_attributes_h << 16 | (ha)->fw_attributes) & BIT_22))
 #define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha))
 #define IS_TGT_MODE_CAPABLE(ha)	(ha->tgt.atio_q_length)
+#define IS_SHADOW_REG_CAPABLE(ha)  (IS_QLA27XX(ha))
 
 	/* HBA serial number */
 	uint8_t		serial0;
@@ -3136,7 +3162,15 @@
 	struct qla2xxx_fw_dump *fw_dump;
 	uint32_t	fw_dump_len;
 	int		fw_dumped;
+	unsigned long	fw_dump_cap_flags;
+#define RISC_PAUSE_CMPL		0
+#define DMA_SHUTDOWN_CMPL	1
+#define ISP_RESET_CMPL		2
+#define RISC_RDY_AFT_RESET	3
+#define RISC_SRAM_DUMP_CMPL	4
+#define RISC_EXT_MEM_DUMP_CMPL	5
 	int		fw_dump_reading;
+	int		prev_minidump_failed;
 	dma_addr_t	eft_dma;
 	void		*eft;
 /* Current size of mctp dump is 0x086064 bytes */

diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 32ab809..2ca39b8 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */

diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 3a7353e..eb8f572 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -371,7 +371,10 @@
 	 * BIT 14 = Data Rate bit 1
 	 * BIT 15 = Data Rate bit 2
 	 * BIT 16 = Enable 75 ohm Termination Select
-	 * BIT 17-31 = Reserved
+	 * BIT 17-28 = Reserved
+	 * BIT 29 = Enable response queue 0 in index shadowing
+	 * BIT 30 = Enable request queue 0 out index shadowing
+	 * BIT 31 = Reserved
 	 */
 	uint32_t firmware_options_3;
 	uint16_t qos;
@@ -1134,13 +1137,6 @@
 #define MIN_MULTI_ID_FABRIC	64	/* Must be power-of-2. */
 #define MAX_MULTI_ID_FABRIC	256	/* ... */
 
-#define for_each_mapped_vp_idx(_ha, _idx)		\
-	for (_idx = find_next_bit((_ha)->vp_idx_map,	\
-		(_ha)->max_npiv_vports + 1, 1);		\
-	    _idx <= (_ha)->max_npiv_vports;		\
-	    _idx = find_next_bit((_ha)->vp_idx_map,	\
-		(_ha)->max_npiv_vports + 1, _idx + 1))	\
-
 struct mid_conf_entry_24xx {
 	uint16_t reserved_1;
 

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index e665e81..d48dea8 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -220,6 +220,13 @@
 
 extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
 extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
+extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
+	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
+	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
+	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+
 
 /*
  * Global Function Prototypes in qla_mbx.c source file.
@@ -347,6 +354,11 @@
 qla2x00_read_serdes_word(scsi_qla_host_t *, uint16_t, uint16_t *);
 
 extern int
+qla8044_write_serdes_word(scsi_qla_host_t *, uint32_t, uint32_t);
+extern int
+qla8044_read_serdes_word(scsi_qla_host_t *, uint32_t, uint32_t *);
+
+extern int
 qla2x00_set_serdes_params(scsi_qla_host_t *, uint16_t, uint16_t, uint16_t);
 
 extern int

diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index e377f9d2..a0df3b1 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 38aeb54..e218441 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1476,6 +1476,7 @@
 	}
 
 	ha->fw_dumped = 0;
+	ha->fw_dump_cap_flags = 0;
 	dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0;
 	req_q_size = rsp_q_size = 0;
 
@@ -2061,6 +2062,10 @@
 	icb->atio_q_address[0] = cpu_to_le32(LSD(ha->tgt.atio_dma));
 	icb->atio_q_address[1] = cpu_to_le32(MSD(ha->tgt.atio_dma));
 
+	if (IS_SHADOW_REG_CAPABLE(ha))
+		icb->firmware_options_2 |=
+		    __constant_cpu_to_le32(BIT_30|BIT_29);
+
 	if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
 		icb->qos = __constant_cpu_to_le16(QLA_DEFAULT_QUE_QOS);
 		icb->rid = __constant_cpu_to_le16(rid);
@@ -2138,6 +2143,8 @@
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
+		req->out_ptr = (void *)(req->ring + req->length);
+		*req->out_ptr = 0;
 		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++)
 			req->outstanding_cmds[cnt] = NULL;
 
@@ -2153,6 +2160,8 @@
 		rsp = ha->rsp_q_map[que];
 		if (!rsp)
 			continue;
+		rsp->in_ptr = (void *)(rsp->ring + rsp->length);
+		*rsp->in_ptr = 0;
 		/* Initialize response queue entries */
 		if (IS_QLAFX00(ha))
 			qlafx00_init_response_q_entries(rsp);
@@ -3406,7 +3415,7 @@
 					    fcport->d_id.b.domain,
 					    fcport->d_id.b.area,
 					    fcport->d_id.b.al_pa);
-					fcport->loop_id = FC_NO_LOOP_ID;
+					qla2x00_clear_loop_id(fcport);
 				}
 			}
 		}
@@ -4727,7 +4736,6 @@
 qla2x00_restart_isp(scsi_qla_host_t *vha)
 {
 	int status = 0;
-	uint32_t wait_time;
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = ha->req_q_map[0];
 	struct rsp_que *rsp = ha->rsp_q_map[0];
@@ -4744,14 +4752,12 @@
 	if (!status && !(status = qla2x00_init_rings(vha))) {
 		clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
 		ha->flags.chip_reset_done = 1;
+
 		/* Initialize the queues in use */
 		qla25xx_init_queues(ha);
 
 		status = qla2x00_fw_ready(vha);
 		if (!status) {
-			ql_dbg(ql_dbg_taskm, vha, 0x8031,
-			    "Start configure loop status = %d.\n", status);
-
 			/* Issue a marker after FW becomes ready. */
 			qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
 
@@ -4766,24 +4772,12 @@
 				qlt_24xx_process_atio_queue(vha);
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-			/* Wait at most MAX_TARGET RSCNs for a stable link. */
-			wait_time = 256;
-			do {
-				clear_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-				qla2x00_configure_loop(vha);
-				wait_time--;
-			} while (!atomic_read(&vha->loop_down_timer) &&
-				!(test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags))
-				&& wait_time && (test_bit(LOOP_RESYNC_NEEDED,
-				&vha->dpc_flags)));
+			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		}
 
 		/* if no cable then assume it's good */
 		if ((vha->device_flags & DFLG_NO_CABLE))
 			status = 0;
-
-		ql_dbg(ql_dbg_taskm, vha, 0x8032,
-		    "Configure loop done, status = 0x%x.\n", status);
 	}
 	return (status);
 }
@@ -6130,7 +6124,6 @@
 qla82xx_restart_isp(scsi_qla_host_t *vha)
 {
 	int status, rval;
-	uint32_t wait_time;
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = ha->req_q_map[0];
 	struct rsp_que *rsp = ha->rsp_q_map[0];
@@ -6144,31 +6137,15 @@
 
 		status = qla2x00_fw_ready(vha);
 		if (!status) {
-			ql_log(ql_log_info, vha, 0x803c,
-			    "Start configure loop, status =%d.\n", status);
-
 			/* Issue a marker after FW becomes ready. */
 			qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
-
 			vha->flags.online = 1;
-			/* Wait at most MAX_TARGET RSCNs for a stable link. */
-			wait_time = 256;
-			do {
-				clear_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-				qla2x00_configure_loop(vha);
-				wait_time--;
-			} while (!atomic_read(&vha->loop_down_timer) &&
-			    !(test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags)) &&
-			    wait_time &&
-			    (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)));
+			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		}
 
 		/* if no cable then assume it's good */
 		if ((vha->device_flags & DFLG_NO_CABLE))
 			status = 0;
-
-		ql_log(ql_log_info, vha, 0x8000,
-		    "Configure loop done, status = 0x%x.\n", status);
 	}
 
 	if (!status) {
@@ -6182,8 +6159,6 @@
 			vha->marker_needed = 1;
 		}
 
-		vha->flags.online = 1;
-
 		ha->isp_ops->enable_intrs(ha);
 
 		ha->isp_abort_cnt = 0;

diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index ce8b5fb..b3b1d6f 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h

@@ -1,10 +1,11 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
+#include "qla_target.h"
 /**
  * qla24xx_calc_iocbs() - Determine number of Command Type 3 and
  * Continuation Type 1 IOCBs to allocate.
@@ -128,12 +129,20 @@
 }
 
 static inline void
-qla2x00_clean_dsd_pool(struct qla_hw_data *ha, srb_t *sp)
+qla2x00_clean_dsd_pool(struct qla_hw_data *ha, srb_t *sp,
+	struct qla_tgt_cmd *tc)
 {
 	struct dsd_dma *dsd_ptr, *tdsd_ptr;
 	struct crc_context *ctx;
 
-	ctx = (struct crc_context *)GET_CMD_CTX_SP(sp);
+	if (sp)
+		ctx = (struct crc_context *)GET_CMD_CTX_SP(sp);
+	else if (tc)
+		ctx = (struct crc_context *)tc->ctx;
+	else {
+		BUG();
+		return;
+	}
 
 	/* clean up allocated prev pool */
 	list_for_each_entry_safe(dsd_ptr, tdsd_ptr,

diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index e607568..7609315 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -936,9 +936,9 @@
 	return 1;
 }
 
-static int
+int
 qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -948,21 +948,35 @@
 	uint32_t *cur_dsd = dsd;
 	uint16_t	used_dsds = tot_dsds;
 
-	uint32_t	prot_int;
+	uint32_t	prot_int; /* protection interval */
 	uint32_t	partial;
 	struct qla2_sgx sgx;
 	dma_addr_t	sle_dma;
 	uint32_t	sle_dma_len, tot_prot_dma_len = 0;
-	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-
-	prot_int = cmd->device->sector_size;
+	struct scsi_cmnd *cmd;
+	struct scsi_qla_host *vha;
 
 	memset(&sgx, 0, sizeof(struct qla2_sgx));
-	sgx.tot_bytes = scsi_bufflen(cmd);
-	sgx.cur_sg = scsi_sglist(cmd);
-	sgx.sp = sp;
+	if (sp) {
+		vha = sp->fcport->vha;
+		cmd = GET_CMD_SP(sp);
+		prot_int = cmd->device->sector_size;
 
-	sg_prot = scsi_prot_sglist(cmd);
+		sgx.tot_bytes = scsi_bufflen(cmd);
+		sgx.cur_sg = scsi_sglist(cmd);
+		sgx.sp = sp;
+
+		sg_prot = scsi_prot_sglist(cmd);
+	} else if (tc) {
+		vha = tc->vha;
+		prot_int      = tc->blk_sz;
+		sgx.tot_bytes = tc->bufflen;
+		sgx.cur_sg    = tc->sg;
+		sg_prot	      = tc->prot_sg;
+	} else {
+		BUG();
+		return 1;
+	}
 
 	while (qla24xx_get_one_block_sg(prot_int, &sgx, &partial)) {
 
@@ -995,10 +1009,18 @@
 				return 1;
 			}
 
-			list_add_tail(&dsd_ptr->list,
-			    &((struct crc_context *)sp->u.scmd.ctx)->dsd_list);
+			if (sp) {
+				list_add_tail(&dsd_ptr->list,
+				    &((struct crc_context *)
+					    sp->u.scmd.ctx)->dsd_list);
 
-			sp->flags |= SRB_CRC_CTX_DSD_VALID;
+				sp->flags |= SRB_CRC_CTX_DSD_VALID;
+			} else {
+				list_add_tail(&dsd_ptr->list,
+				    &(tc->ctx->dsd_list));
+				tc->ctx_dsd_alloced = 1;
+			}
+
 
 			/* add new list to cmd iocb or last list */
 			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
@@ -1033,21 +1055,35 @@
 	return 0;
 }
 
-static int
+int
 qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
-	uint16_t tot_dsds)
+	uint16_t tot_dsds, struct qla_tgt_cmd *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
 	uint32_t dsd_list_len;
 	struct dsd_dma *dsd_ptr;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *sgl;
 	uint32_t *cur_dsd = dsd;
 	int	i;
 	uint16_t	used_dsds = tot_dsds;
-	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+	struct scsi_cmnd *cmd;
+	struct scsi_qla_host *vha;
 
-	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+	if (sp) {
+		cmd = GET_CMD_SP(sp);
+		sgl = scsi_sglist(cmd);
+		vha = sp->fcport->vha;
+	} else if (tc) {
+		sgl = tc->sg;
+		vha = tc->vha;
+	} else {
+		BUG();
+		return 1;
+	}
+
+
+	for_each_sg(sgl, sg, tot_dsds, i) {
 		dma_addr_t	sle_dma;
 
 		/* Allocate additional continuation packets? */
@@ -1076,10 +1112,17 @@
 				return 1;
 			}
 
-			list_add_tail(&dsd_ptr->list,
-			    &((struct crc_context *)sp->u.scmd.ctx)->dsd_list);
+			if (sp) {
+				list_add_tail(&dsd_ptr->list,
+				    &((struct crc_context *)
+					    sp->u.scmd.ctx)->dsd_list);
 
-			sp->flags |= SRB_CRC_CTX_DSD_VALID;
+				sp->flags |= SRB_CRC_CTX_DSD_VALID;
+			} else {
+				list_add_tail(&dsd_ptr->list,
+				    &(tc->ctx->dsd_list));
+				tc->ctx_dsd_alloced = 1;
+			}
 
 			/* add new list to cmd iocb or last list */
 			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
@@ -1102,23 +1145,37 @@
 	return 0;
 }
 
-static int
+int
 qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-							uint32_t *dsd,
-	uint16_t tot_dsds)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
 	uint32_t dsd_list_len;
 	struct dsd_dma *dsd_ptr;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *sgl;
 	int	i;
 	struct scsi_cmnd *cmd;
 	uint32_t *cur_dsd = dsd;
-	uint16_t	used_dsds = tot_dsds;
+	uint16_t used_dsds = tot_dsds;
+	struct scsi_qla_host *vha;
 
-	cmd = GET_CMD_SP(sp);
-	scsi_for_each_prot_sg(cmd, sg, tot_dsds, i) {
+	if (sp) {
+		cmd = GET_CMD_SP(sp);
+		sgl = scsi_prot_sglist(cmd);
+		vha = sp->fcport->vha;
+	} else if (tc) {
+		vha = tc->vha;
+		sgl = tc->prot_sg;
+	} else {
+		BUG();
+		return 1;
+	}
+
+	ql_dbg(ql_dbg_tgt, vha, 0xe021,
+		"%s: enter\n", __func__);
+
+	for_each_sg(sgl, sg, tot_dsds, i) {
 		dma_addr_t	sle_dma;
 
 		/* Allocate additional continuation packets? */
@@ -1147,10 +1204,17 @@
 				return 1;
 			}
 
-			list_add_tail(&dsd_ptr->list,
-			    &((struct crc_context *)sp->u.scmd.ctx)->dsd_list);
+			if (sp) {
+				list_add_tail(&dsd_ptr->list,
+				    &((struct crc_context *)
+					    sp->u.scmd.ctx)->dsd_list);
 
-			sp->flags |= SRB_CRC_CTX_DSD_VALID;
+				sp->flags |= SRB_CRC_CTX_DSD_VALID;
+			} else {
+				list_add_tail(&dsd_ptr->list,
+				    &(tc->ctx->dsd_list));
+				tc->ctx_dsd_alloced = 1;
+			}
 
 			/* add new list to cmd iocb or last list */
 			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
@@ -1386,10 +1450,10 @@
 
 	if (!bundling && tot_prot_dsds) {
 		if (qla24xx_walk_and_build_sglist_no_difb(ha, sp,
-		    cur_dsd, tot_dsds))
+			cur_dsd, tot_dsds, NULL))
 			goto crc_queuing_error;
 	} else if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd,
-	    (tot_dsds - tot_prot_dsds)))
+			(tot_dsds - tot_prot_dsds), NULL))
 		goto crc_queuing_error;
 
 	if (bundling && tot_prot_dsds) {
@@ -1398,7 +1462,7 @@
 			__constant_cpu_to_le16(CF_DIF_SEG_DESCR_ENABLE);
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
 		if (qla24xx_walk_and_build_prot_sglist(ha, sp, cur_dsd,
-		    tot_prot_dsds))
+				tot_prot_dsds, NULL))
 			goto crc_queuing_error;
 	}
 	return QLA_SUCCESS;
@@ -1478,8 +1542,8 @@
 	tot_dsds = nseg;
 	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = RD_REG_DWORD_RELAXED(req->req_q_out);
-
+		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+		    RD_REG_DWORD_RELAXED(req->req_q_out);
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -1697,8 +1761,8 @@
 	tot_prot_dsds = nseg;
 	tot_dsds += nseg;
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = RD_REG_DWORD_RELAXED(req->req_q_out);
-
+		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+		    RD_REG_DWORD_RELAXED(req->req_q_out);
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -2825,8 +2889,8 @@
 
 	/* Check for room on request queue. */
 	if (req->cnt < req_cnt + 2) {
-		cnt = RD_REG_DWORD_RELAXED(req->req_q_out);
-
+		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+		    RD_REG_DWORD_RELAXED(req->req_q_out);
 		if  (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 95314ef..a56825c 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -2009,11 +2009,13 @@
 		ql_dbg(ql_dbg_io, vha, 0x3017,
 		    "Invalid status handle (0x%x).\n", sts->handle);
 
-		if (IS_P3P_TYPE(ha))
-			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
-		else
-			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
-		qla2xxx_wake_dpc(vha);
+		if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) {
+			if (IS_P3P_TYPE(ha))
+				set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+			else
+				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			qla2xxx_wake_dpc(vha);
+		}
 		return;
 	}
 
@@ -2472,12 +2474,14 @@
 		if (pkt->entry_status != 0) {
 			qla2x00_error_entry(vha, rsp, (sts_entry_t *) pkt);
 
-			(void)qlt_24xx_process_response_error(vha, pkt);
+			if (qlt_24xx_process_response_error(vha, pkt))
+				goto process_err;
 
 			((response_t *)pkt)->signature = RESPONSE_PROCESSED;
 			wmb();
 			continue;
 		}
+process_err:
 
 		switch (pkt->entry_type) {
 		case STATUS_TYPE:
@@ -2494,10 +2498,10 @@
 			qla24xx_logio_entry(vha, rsp->req,
 			    (struct logio_entry_24xx *)pkt);
 			break;
-                case CT_IOCB_TYPE:
+		case CT_IOCB_TYPE:
 			qla24xx_els_ct_entry(vha, rsp->req, pkt, CT_IOCB_TYPE);
 			break;
-                case ELS_IOCB_TYPE:
+		case ELS_IOCB_TYPE:
 			qla24xx_els_ct_entry(vha, rsp->req, pkt, ELS_IOCB_TYPE);
 			break;
 		case ABTS_RECV_24XX:
@@ -2506,6 +2510,7 @@
 		case ABTS_RESP_24XX:
 		case CTIO_TYPE7:
 		case NOTIFY_ACK_TYPE:
+		case CTIO_CRC2:
 			qlt_response_pkt_all_vps(vha, (response_t *)pkt);
 			break;
 		case MARKER_TYPE:

diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 2528709..1c33a77 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1319,7 +1319,7 @@
 
 		left = 0;
 
-		list = kzalloc(dma_size, GFP_KERNEL);
+		list = kmemdup(pmap, dma_size, GFP_KERNEL);
 		if (!list) {
 			ql_log(ql_log_warn, vha, 0x1140,
 			    "%s(%ld): failed to allocate node names list "
@@ -1328,7 +1328,6 @@
 			goto out_free;
 		}
 
-		memcpy(list, pmap, dma_size);
 restart:
 		dma_free_coherent(&ha->pdev->dev, dma_size, pmap, pmap_dma);
 	}
@@ -2644,7 +2643,10 @@
 		ql_dbg(ql_dbg_mbx, vha, 0x1090,
 		    "Failed to complete IOCB -- completion status (%x).\n",
 		    le16_to_cpu(abt->nport_handle));
-		rval = QLA_FUNCTION_FAILED;
+		if (abt->nport_handle == CS_IOCB_ERROR)
+			rval = QLA_FUNCTION_PARAMETER_ERROR;
+		else
+			rval = QLA_FUNCTION_FAILED;
 	} else {
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1091,
 		    "Done %s.\n", __func__);
@@ -2879,6 +2881,78 @@
 	return rval;
 }
 
+int
+qla8044_write_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA8044(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1186,
+	    "Entered %s.\n", __func__);
+
+	mcp->mb[0] = MBC_SET_GET_ETH_SERDES_REG;
+	mcp->mb[1] = HCS_WRITE_SERDES;
+	mcp->mb[3] = LSW(addr);
+	mcp->mb[4] = MSW(addr);
+	mcp->mb[5] = LSW(data);
+	mcp->mb[6] = MSW(data);
+	mcp->out_mb = MBX_6|MBX_5|MBX_4|MBX_3|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x1187,
+		    "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1188,
+		    "Done %s.\n", __func__);
+	}
+
+	return rval;
+}
+
+int
+qla8044_read_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t *data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA8044(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1189,
+	    "Entered %s.\n", __func__);
+
+	mcp->mb[0] = MBC_SET_GET_ETH_SERDES_REG;
+	mcp->mb[1] = HCS_READ_SERDES;
+	mcp->mb[3] = LSW(addr);
+	mcp->mb[4] = MSW(addr);
+	mcp->out_mb = MBX_4|MBX_3|MBX_1|MBX_0;
+	mcp->in_mb = MBX_2|MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	*data = mcp->mb[2] << 16 | mcp->mb[1];
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x118a,
+		    "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x118b,
+		    "Done %s.\n", __func__);
+	}
+
+	return rval;
+}
+
 /**
  * qla2x00_set_serdes_params() -
  * @ha: HA context
@@ -3660,6 +3734,9 @@
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d3,
 	    "Entered %s.\n", __func__);
 
+	if (IS_SHADOW_REG_CAPABLE(ha))
+		req->options |= BIT_13;
+
 	mcp->mb[0] = MBC_INITIALIZE_MULTIQ;
 	mcp->mb[1] = req->options;
 	mcp->mb[2] = MSW(LSD(req->dma));
@@ -3679,7 +3756,7 @@
 	/* que in ptr index */
 	mcp->mb[8] = 0;
 	/* que out ptr index */
-	mcp->mb[9] = 0;
+	mcp->mb[9] = *req->out_ptr = 0;
 	mcp->out_mb = MBX_14|MBX_13|MBX_12|MBX_11|MBX_10|MBX_9|MBX_8|MBX_7|
 			MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->in_mb = MBX_0;
@@ -3688,7 +3765,7 @@
 
 	if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha))
 		mcp->in_mb |= MBX_1;
-	if (IS_QLA83XX(ha) || !IS_QLA27XX(ha)) {
+	if (IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
 		mcp->out_mb |= MBX_15;
 		/* debug q create issue in SR-IOV */
 		mcp->in_mb |= MBX_9 | MBX_8 | MBX_7;
@@ -3697,7 +3774,7 @@
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	if (!(req->options & BIT_0)) {
 		WRT_REG_DWORD(req->req_q_in, 0);
-		if (!IS_QLA83XX(ha) || !IS_QLA27XX(ha))
+		if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha))
 			WRT_REG_DWORD(req->req_q_out, 0);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
@@ -3726,6 +3803,9 @@
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d6,
 	    "Entered %s.\n", __func__);
 
+	if (IS_SHADOW_REG_CAPABLE(ha))
+		rsp->options |= BIT_13;
+
 	mcp->mb[0] = MBC_INITIALIZE_MULTIQ;
 	mcp->mb[1] = rsp->options;
 	mcp->mb[2] = MSW(LSD(rsp->dma));
@@ -3740,7 +3820,7 @@
 
 	mcp->mb[4] = rsp->id;
 	/* que in ptr index */
-	mcp->mb[8] = 0;
+	mcp->mb[8] = *rsp->in_ptr = 0;
 	/* que out ptr index */
 	mcp->mb[9] = 0;
 	mcp->out_mb = MBX_14|MBX_13|MBX_9|MBX_8|MBX_7

diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index f0a8522..8999824 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */

diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 0aaf6a9..abeb390 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -527,21 +527,63 @@
 	struct qla_hw_data *ha = vha->hw;
 	int i, core;
 	uint32_t cnt;
+	uint32_t reg_val;
+
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x80004, 0);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x82004, 0);
+
+	/* stop the XOR DMA engines */
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x60920, 0x02);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x60924, 0x02);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0xf0920, 0x02);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0xf0924, 0x02);
+
+	/* stop the IDMA engines */
+	reg_val = QLAFX00_GET_HBA_SOC_REG(ha, 0x60840);
+	reg_val &= ~(1<<12);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x60840, reg_val);
+
+	reg_val = QLAFX00_GET_HBA_SOC_REG(ha, 0x60844);
+	reg_val &= ~(1<<12);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x60844, reg_val);
+
+	reg_val = QLAFX00_GET_HBA_SOC_REG(ha, 0x60848);
+	reg_val &= ~(1<<12);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x60848, reg_val);
+
+	reg_val = QLAFX00_GET_HBA_SOC_REG(ha, 0x6084C);
+	reg_val &= ~(1<<12);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x6084C, reg_val);
+
+	for (i = 0; i < 100000; i++) {
+		if ((QLAFX00_GET_HBA_SOC_REG(ha, 0xd0000) & 0x10000000) == 0 &&
+		    (QLAFX00_GET_HBA_SOC_REG(ha, 0x10600) & 0x1) == 0)
+			break;
+		udelay(100);
+	}
 
 	/* Set all 4 cores in reset */
 	for (i = 0; i < 4; i++) {
 		QLAFX00_SET_HBA_SOC_REG(ha,
 		    (SOC_SW_RST_CONTROL_REG_CORE0 + 8*i), (0xF01));
-	}
-
-	/* Set all 4 core Clock gating control */
-	for (i = 0; i < 4; i++) {
 		QLAFX00_SET_HBA_SOC_REG(ha,
 		    (SOC_SW_RST_CONTROL_REG_CORE0 + 4 + 8*i), (0x01010101));
 	}
 
 	/* Reset all units in Fabric */
-	QLAFX00_SET_HBA_SOC_REG(ha, SOC_FABRIC_RST_CONTROL_REG, (0x11F0101));
+	QLAFX00_SET_HBA_SOC_REG(ha, SOC_FABRIC_RST_CONTROL_REG, (0x011f0101));
+
+	/* */
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x10610, 1);
+	QLAFX00_SET_HBA_SOC_REG(ha, 0x10600, 0);
+
+	/* Set all 4 core Memory Power Down Registers */
+	for (i = 0; i < 5; i++) {
+		QLAFX00_SET_HBA_SOC_REG(ha,
+		    (SOC_PWR_MANAGEMENT_PWR_DOWN_REG + 4*i), (0x0));
+	}
 
 	/* Reset all interrupt control registers */
 	for (i = 0; i < 115; i++) {
@@ -564,20 +606,19 @@
 	QLAFX00_SET_HBA_SOC_REG(ha, SOC_FABRIC_CONTROL_REG, (0x2));
 	QLAFX00_SET_HBA_SOC_REG(ha, SOC_FABRIC_CONFIG_REG, (0x3));
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-
 	/* Kick in Fabric units */
 	QLAFX00_SET_HBA_SOC_REG(ha, SOC_FABRIC_RST_CONTROL_REG, (0x0));
 
 	/* Kick in Core0 to start boot process */
 	QLAFX00_SET_HBA_SOC_REG(ha, SOC_SW_RST_CONTROL_REG_CORE0, (0xF00));
 
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	/* Wait 10secs for soft-reset to complete. */
 	for (cnt = 10; cnt; cnt--) {
 		msleep(1000);
 		barrier();
 	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
 /**
@@ -597,7 +638,6 @@
 
 	ha->isp_ops->disable_intrs(ha);
 	qlafx00_soc_cpu_reset(vha);
-	ha->isp_ops->enable_intrs(ha);
 }
 
 /**
@@ -2675,7 +2715,7 @@
 	uint16_t lreq_q_out = 0;
 
 	lreq_q_in = RD_REG_DWORD(rsp->rsp_q_in);
-	lreq_q_out = RD_REG_DWORD(rsp->rsp_q_out);
+	lreq_q_out = rsp->ring_index;
 
 	while (lreq_q_in != lreq_q_out) {
 		lptr = rsp->ring_ptr;
@@ -3426,7 +3466,7 @@
 	    sp->fcport->vha, 0x3047,
 	    (uint8_t *)&fx_iocb, sizeof(struct fxdisc_entry_fx00));
 
-	memcpy((void *)pfxiocb, &fx_iocb,
+	memcpy_toio((void __iomem *)pfxiocb, &fx_iocb,
 	    sizeof(struct fxdisc_entry_fx00));
 	wmb();
 }

diff --git a/drivers/scsi/qla2xxx/qla_mr.h b/drivers/scsi/qla2xxx/qla_mr.h
index e529dfa..aeaa1b4 100644
--- a/drivers/scsi/qla2xxx/qla_mr.h
+++ b/drivers/scsi/qla2xxx/qla_mr.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -351,6 +351,7 @@
 #define SOC_FABRIC_RST_CONTROL_REG       0x0020840
 #define SOC_FABRIC_CONTROL_REG           0x0020200
 #define SOC_FABRIC_CONFIG_REG            0x0020204
+#define SOC_PWR_MANAGEMENT_PWR_DOWN_REG  0x001820C
 
 #define SOC_INTERRUPT_SOURCE_I_CONTROL_REG     0x0020B00
 #define SOC_CORE_TIMER_REG                     0x0021850

diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 5511e24..58f3c91 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -848,6 +848,7 @@
 {
 	int done = 0, timeout = 0;
 	uint32_t lock_owner = 0;
+	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	while (!done) {
 		/* acquire semaphore2 from PCI HW block */
@@ -856,17 +857,21 @@
 			break;
 		if (timeout >= qla82xx_rom_lock_timeout) {
 			lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID);
+			ql_log(ql_log_warn, vha, 0xb157,
+			    "%s: Simultaneous flash access by following ports, active port = %d: accessing port = %d",
+			    __func__, ha->portnum, lock_owner);
 			return -1;
 		}
 		timeout++;
 	}
-	qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, ROM_LOCK_DRIVER);
+	qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, ha->portnum);
 	return 0;
 }
 
 static void
 qla82xx_rom_unlock(struct qla_hw_data *ha)
 {
+	qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, 0xffffffff);
 	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
 }
 
@@ -950,6 +955,7 @@
 qla82xx_rom_fast_read(struct qla_hw_data *ha, int addr, int *valp)
 {
 	int ret, loops = 0;
+	uint32_t lock_owner = 0;
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	while ((qla82xx_rom_lock(ha) != 0) && (loops < 50000)) {
@@ -958,8 +964,10 @@
 		loops++;
 	}
 	if (loops >= 50000) {
+		lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID);
 		ql_log(ql_log_fatal, vha, 0x00b9,
-		    "Failed to acquire SEM2 lock.\n");
+		    "Failed to acquire SEM2 lock, Lock Owner %u.\n",
+		    lock_owner);
 		return -1;
 	}
 	ret = qla82xx_do_rom_fast_read(ha, addr, valp);
@@ -1057,6 +1065,7 @@
 ql82xx_rom_lock_d(struct qla_hw_data *ha)
 {
 	int loops = 0;
+	uint32_t lock_owner = 0;
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	while ((qla82xx_rom_lock(ha) != 0) && (loops < 50000)) {
@@ -1065,8 +1074,9 @@
 		loops++;
 	}
 	if (loops >= 50000) {
+		lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID);
 		ql_log(ql_log_warn, vha, 0xb010,
-		    "ROM lock failed.\n");
+		    "ROM lock failed, Lock Owner %u.\n", lock_owner);
 		return -1;
 	}
 	return 0;
@@ -2811,12 +2821,14 @@
 qla82xx_rom_lock_recovery(struct qla_hw_data *ha)
 {
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
+	uint32_t lock_owner = 0;
 
-	if (qla82xx_rom_lock(ha))
+	if (qla82xx_rom_lock(ha)) {
+		lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID);
 		/* Someone else is holding the lock. */
 		ql_log(ql_log_info, vha, 0xb022,
-		    "Resetting rom_lock.\n");
-
+		    "Resetting rom_lock, Lock Owner %u.\n", lock_owner);
+	}
 	/*
 	 * Either we got the lock, or someone
 	 * else died while holding it.
@@ -2840,47 +2852,30 @@
 qla82xx_device_bootstrap(scsi_qla_host_t *vha)
 {
 	int rval = QLA_SUCCESS;
-	int i, timeout;
+	int i;
 	uint32_t old_count, count;
 	struct qla_hw_data *ha = vha->hw;
-	int need_reset = 0, peg_stuck = 1;
+	int need_reset = 0;
 
 	need_reset = qla82xx_need_reset(ha);
 
-	old_count = qla82xx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
-
-	for (i = 0; i < 10; i++) {
-		timeout = msleep_interruptible(200);
-		if (timeout) {
-			qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
-				QLA8XXX_DEV_FAILED);
-			return QLA_FUNCTION_FAILED;
-		}
-
-		count = qla82xx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
-		if (count != old_count)
-			peg_stuck = 0;
-	}
-
 	if (need_reset) {
 		/* We are trying to perform a recovery here. */
-		if (peg_stuck)
+		if (ha->flags.isp82xx_fw_hung)
 			qla82xx_rom_lock_recovery(ha);
-		goto dev_initialize;
 	} else  {
-		/* Start of day for this ha context. */
-		if (peg_stuck) {
-			/* Either we are the first or recovery in progress. */
-			qla82xx_rom_lock_recovery(ha);
-			goto dev_initialize;
-		} else
-			/* Firmware already running. */
-			goto dev_ready;
+		old_count = qla82xx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
+		for (i = 0; i < 10; i++) {
+			msleep(200);
+			count = qla82xx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
+			if (count != old_count) {
+				rval = QLA_SUCCESS;
+				goto dev_ready;
+			}
+		}
+		qla82xx_rom_lock_recovery(ha);
 	}
 
-	return rval;
-
-dev_initialize:
 	/* set to DEV_INITIALIZING */
 	ql_log(ql_log_info, vha, 0x009e,
 	    "HW State: INITIALIZING.\n");
@@ -3142,18 +3137,18 @@
 
 	if (ql2xmdenable) {
 		if (!ha->fw_dumped) {
-			if (fw_major_version != ha->fw_major_version ||
+			if ((fw_major_version != ha->fw_major_version ||
 			    fw_minor_version != ha->fw_minor_version ||
-			    fw_subminor_version != ha->fw_subminor_version) {
+			    fw_subminor_version != ha->fw_subminor_version) ||
+			    (ha->prev_minidump_failed)) {
 				ql_dbg(ql_dbg_p3p, vha, 0xb02d,
-				    "Firmware version differs "
-				    "Previous version: %d:%d:%d - "
-				    "New version: %d:%d:%d\n",
+				    "Firmware version differs Previous version: %d:%d:%d - New version: %d:%d:%d, prev_minidump_failed: %d.\n",
 				    fw_major_version, fw_minor_version,
 				    fw_subminor_version,
 				    ha->fw_major_version,
 				    ha->fw_minor_version,
-				    ha->fw_subminor_version);
+				    ha->fw_subminor_version,
+				    ha->prev_minidump_failed);
 				/* Release MiniDump resources */
 				qla82xx_md_free(vha);
 				/* ALlocate MiniDump resources */
@@ -3682,8 +3677,10 @@
 			for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 				sp = req->outstanding_cmds[cnt];
 				if (sp) {
-					if (!sp->u.scmd.ctx ||
-					    (sp->flags & SRB_FCP_CMND_DMA_VALID)) {
+					if ((!sp->u.scmd.ctx ||
+					    (sp->flags &
+						SRB_FCP_CMND_DMA_VALID)) &&
+						!ha->flags.isp82xx_fw_hung) {
 						spin_unlock_irqrestore(
 						    &ha->hardware_lock, flags);
 						if (ha->isp_ops->abort_command(sp)) {

diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index 1bb93db..59c4778 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -333,9 +333,6 @@
 #define QLA82XX_ROMUSB_ROM_INSTR_OPCODE		(ROMUSB_ROM + 0x0004)
 #define QLA82XX_ROMUSB_GLB_CAS_RST		(ROMUSB_GLB + 0x0038)
 
-/* Lock IDs for ROM lock */
-#define ROM_LOCK_DRIVER       0x0d417340
-
 #define QLA82XX_PCI_CRB_WINDOWSIZE 0x00100000	 /* all are 1MB windows */
 #define QLA82XX_PCI_CRB_WINDOW(A) \
 	(QLA82XX_PCI_CRBSPACE + (A)*QLA82XX_PCI_CRB_WINDOWSIZE)
@@ -1186,6 +1183,7 @@
 #define CRB_NIU_XG_PAUSE_CTL_P1        0x8
 
 #define qla82xx_get_temp_val(x)          ((x) >> 16)
+#define qla82xx_get_temp_val1(x)          ((x) && 0x0000FFFF)
 #define qla82xx_get_temp_state(x)        ((x) & 0xffff)
 #define qla82xx_encode_temp(val, state)  (((val) << 16) | (state))
 

diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c
index 86cf108..da9e390 100644
--- a/drivers/scsi/qla2xxx/qla_nx2.c
+++ b/drivers/scsi/qla2xxx/qla_nx2.c

@@ -1,17 +1,20 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #include <linux/vmalloc.h>
+#include <linux/delay.h>
 
 #include "qla_def.h"
 #include "qla_gbl.h"
 
 #include <linux/delay.h>
 
+#define TIMEOUT_100_MS 100
+
 /* 8044 Flash Read/Write functions */
 uint32_t
 qla8044_rd_reg(struct qla_hw_data *ha, ulong addr)
@@ -117,6 +120,95 @@
 	qla8044_wr_reg_indirect(vha, waddr, value);
 }
 
+static int
+qla8044_poll_wait_for_ready(struct scsi_qla_host *vha, uint32_t addr1,
+	uint32_t mask)
+{
+	unsigned long timeout;
+	uint32_t temp;
+
+	/* jiffies after 100ms */
+	timeout = jiffies + msecs_to_jiffies(TIMEOUT_100_MS);
+	do {
+		qla8044_rd_reg_indirect(vha, addr1, &temp);
+		if ((temp & mask) != 0)
+			break;
+		if (time_after_eq(jiffies, timeout)) {
+			ql_log(ql_log_warn, vha, 0xb151,
+				"Error in processing rdmdio entry\n");
+			return -1;
+		}
+	} while (1);
+
+	return 0;
+}
+
+static uint32_t
+qla8044_ipmdio_rd_reg(struct scsi_qla_host *vha,
+	uint32_t addr1, uint32_t addr3, uint32_t mask, uint32_t addr)
+{
+	uint32_t temp;
+	int ret = 0;
+
+	ret = qla8044_poll_wait_for_ready(vha, addr1, mask);
+	if (ret == -1)
+		return -1;
+
+	temp = (0x40000000 | addr);
+	qla8044_wr_reg_indirect(vha, addr1, temp);
+
+	ret = qla8044_poll_wait_for_ready(vha, addr1, mask);
+	if (ret == -1)
+		return 0;
+
+	qla8044_rd_reg_indirect(vha, addr3, &ret);
+
+	return ret;
+}
+
+
+static int
+qla8044_poll_wait_ipmdio_bus_idle(struct scsi_qla_host *vha,
+	uint32_t addr1, uint32_t addr2, uint32_t addr3, uint32_t mask)
+{
+	unsigned long timeout;
+	uint32_t temp;
+
+	/* jiffies after 100 msecs */
+	timeout = jiffies + msecs_to_jiffies(TIMEOUT_100_MS);
+	do {
+		temp = qla8044_ipmdio_rd_reg(vha, addr1, addr3, mask, addr2);
+		if ((temp & 0x1) != 1)
+			break;
+		if (time_after_eq(jiffies, timeout)) {
+			ql_log(ql_log_warn, vha, 0xb152,
+			    "Error in processing mdiobus idle\n");
+			return -1;
+		}
+	} while (1);
+
+	return 0;
+}
+
+static int
+qla8044_ipmdio_wr_reg(struct scsi_qla_host *vha, uint32_t addr1,
+	uint32_t addr3, uint32_t mask, uint32_t addr, uint32_t value)
+{
+	int ret = 0;
+
+	ret = qla8044_poll_wait_for_ready(vha, addr1, mask);
+	if (ret == -1)
+		return -1;
+
+	qla8044_wr_reg_indirect(vha, addr3, value);
+	qla8044_wr_reg_indirect(vha, addr1, addr);
+
+	ret = qla8044_poll_wait_for_ready(vha, addr1, mask);
+	if (ret == -1)
+		return -1;
+
+	return 0;
+}
 /*
  * qla8044_rmw_crb_reg - Read value from raddr, AND with test_mask,
  * Shift Left,Right/OR/XOR with values RMW header and write value to waddr.
@@ -356,8 +448,8 @@
 			lock_owner = qla8044_rd_reg(ha,
 			    QLA8044_FLASH_LOCK_ID);
 			ql_log(ql_log_warn, vha, 0xb113,
-			    "%s: flash lock by %d failed, held by %d\n",
-				__func__, ha->portnum, lock_owner);
+			    "%s: Simultaneous flash access by following ports, active port = %d: accessing port = %d",
+			    __func__, ha->portnum, lock_owner);
 			ret_val = QLA_FUNCTION_FAILED;
 			break;
 		}
@@ -1541,7 +1633,7 @@
 qla8044_need_reset_handler(struct scsi_qla_host *vha)
 {
 	uint32_t dev_state = 0, drv_state, drv_active;
-	unsigned long reset_timeout, dev_init_timeout;
+	unsigned long reset_timeout;
 	struct qla_hw_data *ha = vha->hw;
 
 	ql_log(ql_log_fatal, vha, 0xb0c2,
@@ -1555,84 +1647,78 @@
 		qla8044_idc_lock(ha);
 	}
 
+	dev_state = qla8044_rd_direct(vha,
+	    QLA8044_CRB_DEV_STATE_INDEX);
 	drv_state = qla8044_rd_direct(vha,
 	    QLA8044_CRB_DRV_STATE_INDEX);
 	drv_active = qla8044_rd_direct(vha,
 	    QLA8044_CRB_DRV_ACTIVE_INDEX);
 
 	ql_log(ql_log_info, vha, 0xb0c5,
-	    "%s(%ld): drv_state = 0x%x, drv_active = 0x%x\n",
-	    __func__, vha->host_no, drv_state, drv_active);
+	    "%s(%ld): drv_state = 0x%x, drv_active = 0x%x dev_state = 0x%x\n",
+	    __func__, vha->host_no, drv_state, drv_active, dev_state);
 
-	if (!ha->flags.nic_core_reset_owner) {
-		ql_dbg(ql_dbg_p3p, vha, 0xb0c3,
-		    "%s(%ld): reset acknowledged\n",
-		    __func__, vha->host_no);
-		qla8044_set_rst_ready(vha);
+	qla8044_set_rst_ready(vha);
 
-		/* Non-reset owners ACK Reset and wait for device INIT state
-		 * as part of Reset Recovery by Reset Owner
-		 */
-		dev_init_timeout = jiffies + (ha->fcoe_reset_timeout * HZ);
+	/* wait for 10 seconds for reset ack from all functions */
+	reset_timeout = jiffies + (ha->fcoe_reset_timeout * HZ);
 
-		do {
-			if (time_after_eq(jiffies, dev_init_timeout)) {
-				ql_log(ql_log_info, vha, 0xb0c4,
-				    "%s: Non Reset owner: Reset Ack Timeout!\n",
-				    __func__);
-				break;
-			}
+	do {
+		if (time_after_eq(jiffies, reset_timeout)) {
+			ql_log(ql_log_info, vha, 0xb0c4,
+			    "%s: Function %d: Reset Ack Timeout!, drv_state: 0x%08x, drv_active: 0x%08x\n",
+			    __func__, ha->portnum, drv_state, drv_active);
+			break;
+		}
 
-			qla8044_idc_unlock(ha);
-			msleep(1000);
-			qla8044_idc_lock(ha);
+		qla8044_idc_unlock(ha);
+		msleep(1000);
+		qla8044_idc_lock(ha);
 
-			dev_state = qla8044_rd_direct(vha,
-					QLA8044_CRB_DEV_STATE_INDEX);
-		} while (((drv_state & drv_active) != drv_active) &&
-		    (dev_state == QLA8XXX_DEV_NEED_RESET));
+		dev_state = qla8044_rd_direct(vha,
+		    QLA8044_CRB_DEV_STATE_INDEX);
+		drv_state = qla8044_rd_direct(vha,
+		    QLA8044_CRB_DRV_STATE_INDEX);
+		drv_active = qla8044_rd_direct(vha,
+		    QLA8044_CRB_DRV_ACTIVE_INDEX);
+	} while (((drv_state & drv_active) != drv_active) &&
+	    (dev_state == QLA8XXX_DEV_NEED_RESET));
+
+	/* Remove IDC participation of functions not acknowledging */
+	if (drv_state != drv_active) {
+		ql_log(ql_log_info, vha, 0xb0c7,
+		    "%s(%ld): Function %d turning off drv_active of non-acking function 0x%x\n",
+		    __func__, vha->host_no, ha->portnum,
+		    (drv_active ^ drv_state));
+		drv_active = drv_active & drv_state;
+		qla8044_wr_direct(vha, QLA8044_CRB_DRV_ACTIVE_INDEX,
+		    drv_active);
 	} else {
-		qla8044_set_rst_ready(vha);
-
-		/* wait for 10 seconds for reset ack from all functions */
-		reset_timeout = jiffies + (ha->fcoe_reset_timeout * HZ);
-
-		while ((drv_state & drv_active) != drv_active) {
-			if (time_after_eq(jiffies, reset_timeout)) {
-				ql_log(ql_log_info, vha, 0xb0c6,
-				    "%s: RESET TIMEOUT!"
-				    "drv_state: 0x%08x, drv_active: 0x%08x\n",
-				    QLA2XXX_DRIVER_NAME, drv_state, drv_active);
-				break;
-			}
-
-			qla8044_idc_unlock(ha);
-			msleep(1000);
-			qla8044_idc_lock(ha);
-
-			drv_state = qla8044_rd_direct(vha,
-			    QLA8044_CRB_DRV_STATE_INDEX);
-			drv_active = qla8044_rd_direct(vha,
-			    QLA8044_CRB_DRV_ACTIVE_INDEX);
-		}
-
-		if (drv_state != drv_active) {
-			ql_log(ql_log_info, vha, 0xb0c7,
-			    "%s(%ld): Reset_owner turning off drv_active "
-			    "of non-acking function 0x%x\n", __func__,
-			    vha->host_no, (drv_active ^ drv_state));
-			drv_active = drv_active & drv_state;
-			qla8044_wr_direct(vha, QLA8044_CRB_DRV_ACTIVE_INDEX,
-			    drv_active);
-		}
-
 		/*
-		* Clear RESET OWNER, will be set at next reset
-		* by next RST_OWNER
-		*/
-		ha->flags.nic_core_reset_owner = 0;
+		 * Reset owner should execute reset recovery,
+		 * if all functions acknowledged
+		 */
+		if ((ha->flags.nic_core_reset_owner) &&
+		    (dev_state == QLA8XXX_DEV_NEED_RESET)) {
+			ha->flags.nic_core_reset_owner = 0;
+			qla8044_device_bootstrap(vha);
+			return;
+		}
+	}
 
-		/* Start Reset Recovery */
+	/* Exit if non active function */
+	if (!(drv_active & (1 << ha->portnum))) {
+		ha->flags.nic_core_reset_owner = 0;
+		return;
+	}
+
+	/*
+	 * Execute Reset Recovery if Reset Owner or Function 7
+	 * is the only active function
+	 */
+	if (ha->flags.nic_core_reset_owner ||
+	    ((drv_state & drv_active) == QLA8044_FUN7_ACTIVE_INDEX)) {
+		ha->flags.nic_core_reset_owner = 0;
 		qla8044_device_bootstrap(vha);
 	}
 }
@@ -1655,6 +1741,19 @@
 	qla8044_wr_direct(vha, QLA8044_CRB_DRV_ACTIVE_INDEX, drv_active);
 }
 
+static int
+qla8044_check_drv_active(struct scsi_qla_host *vha)
+{
+	uint32_t drv_active;
+	struct qla_hw_data *ha = vha->hw;
+
+	drv_active = qla8044_rd_direct(vha, QLA8044_CRB_DRV_ACTIVE_INDEX);
+	if (drv_active & (1 << ha->portnum))
+		return QLA_SUCCESS;
+	else
+		return QLA_TEST_FAILED;
+}
+
 static void
 qla8044_clear_idc_dontreset(struct scsi_qla_host *vha)
 {
@@ -1837,14 +1936,16 @@
 
 	while (1) {
 		if (time_after_eq(jiffies, dev_init_timeout)) {
-			ql_log(ql_log_warn, vha, 0xb0cf,
-			    "%s: Device Init Failed 0x%x = %s\n",
-			    QLA2XXX_DRIVER_NAME, dev_state,
-			    dev_state < MAX_STATES ?
-			    qdev_state(dev_state) : "Unknown");
-
-			qla8044_wr_direct(vha, QLA8044_CRB_DEV_STATE_INDEX,
-			    QLA8XXX_DEV_FAILED);
+			if (qla8044_check_drv_active(vha) == QLA_SUCCESS) {
+				ql_log(ql_log_warn, vha, 0xb0cf,
+				    "%s: Device Init Failed 0x%x = %s\n",
+				    QLA2XXX_DRIVER_NAME, dev_state,
+				    dev_state < MAX_STATES ?
+				    qdev_state(dev_state) : "Unknown");
+				qla8044_wr_direct(vha,
+				    QLA8044_CRB_DEV_STATE_INDEX,
+				    QLA8XXX_DEV_FAILED);
+			}
 		}
 
 		dev_state = qla8044_rd_direct(vha, QLA8044_CRB_DEV_STATE_INDEX);
@@ -2017,6 +2118,13 @@
 	    test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags))) {
 		dev_state = qla8044_rd_direct(vha, QLA8044_CRB_DEV_STATE_INDEX);
 
+		if (qla8044_check_fw_alive(vha)) {
+			ha->flags.isp82xx_fw_hung = 1;
+			ql_log(ql_log_warn, vha, 0xb10a,
+			    "Firmware hung.\n");
+			qla82xx_clear_pending_mbx(vha);
+		}
+
 		if (qla8044_check_temp(vha)) {
 			set_bit(ISP_UNRECOVERABLE, &vha->dpc_flags);
 			ha->flags.isp82xx_fw_hung = 1;
@@ -2037,7 +2145,7 @@
 			qla2xxx_wake_dpc(vha);
 		} else  {
 			/* Check firmware health */
-			if (qla8044_check_fw_alive(vha)) {
+			if (ha->flags.isp82xx_fw_hung) {
 				halt_status = qla8044_rd_direct(vha,
 					QLA8044_PEG_HALT_STATUS1_INDEX);
 				if (halt_status &
@@ -2073,12 +2181,8 @@
 						    __func__);
 						set_bit(ISP_ABORT_NEEDED,
 						    &vha->dpc_flags);
-						qla82xx_clear_pending_mbx(vha);
 					}
 				}
-				ha->flags.isp82xx_fw_hung = 1;
-				ql_log(ql_log_warn, vha, 0xb10a,
-				    "Firmware hung.\n");
 				qla2xxx_wake_dpc(vha);
 			}
 		}
@@ -2286,8 +2390,6 @@
 		}
 
 		if (j >= MAX_CTL_CHECK) {
-			printk_ratelimited(KERN_ERR
-			    "%s: failed to read through agent\n", __func__);
 			write_unlock_irqrestore(&ha->hw_lock, flags);
 			return QLA_SUCCESS;
 		}
@@ -2882,6 +2984,231 @@
 	return rval;
 }
 
+static uint32_t
+qla8044_minidump_process_rddfe(struct scsi_qla_host *vha,
+	struct qla8044_minidump_entry_hdr *entry_hdr, uint32_t **d_ptr)
+{
+	int loop_cnt;
+	uint32_t addr1, addr2, value, data, temp, wrVal;
+	uint8_t stride, stride2;
+	uint16_t count;
+	uint32_t poll, mask, data_size, modify_mask;
+	uint32_t wait_count = 0;
+
+	uint32_t *data_ptr = *d_ptr;
+
+	struct qla8044_minidump_entry_rddfe *rddfe;
+	rddfe = (struct qla8044_minidump_entry_rddfe *) entry_hdr;
+
+	addr1 = rddfe->addr_1;
+	value = rddfe->value;
+	stride = rddfe->stride;
+	stride2 = rddfe->stride2;
+	count = rddfe->count;
+
+	poll = rddfe->poll;
+	mask = rddfe->mask;
+	modify_mask = rddfe->modify_mask;
+	data_size = rddfe->data_size;
+
+	addr2 = addr1 + stride;
+
+	for (loop_cnt = 0x0; loop_cnt < count; loop_cnt++) {
+		qla8044_wr_reg_indirect(vha, addr1, (0x40000000 | value));
+
+		wait_count = 0;
+		while (wait_count < poll) {
+			qla8044_rd_reg_indirect(vha, addr1, &temp);
+			if ((temp & mask) != 0)
+				break;
+			wait_count++;
+		}
+
+		if (wait_count == poll) {
+			ql_log(ql_log_warn, vha, 0xb153,
+			    "%s: TIMEOUT\n", __func__);
+			goto error;
+		} else {
+			qla8044_rd_reg_indirect(vha, addr2, &temp);
+			temp = temp & modify_mask;
+			temp = (temp | ((loop_cnt << 16) | loop_cnt));
+			wrVal = ((temp << 16) | temp);
+
+			qla8044_wr_reg_indirect(vha, addr2, wrVal);
+			qla8044_wr_reg_indirect(vha, addr1, value);
+
+			wait_count = 0;
+			while (wait_count < poll) {
+				qla8044_rd_reg_indirect(vha, addr1, &temp);
+				if ((temp & mask) != 0)
+					break;
+				wait_count++;
+			}
+			if (wait_count == poll) {
+				ql_log(ql_log_warn, vha, 0xb154,
+				    "%s: TIMEOUT\n", __func__);
+				goto error;
+			}
+
+			qla8044_wr_reg_indirect(vha, addr1,
+			    ((0x40000000 | value) + stride2));
+			wait_count = 0;
+			while (wait_count < poll) {
+				qla8044_rd_reg_indirect(vha, addr1, &temp);
+				if ((temp & mask) != 0)
+					break;
+				wait_count++;
+			}
+
+			if (wait_count == poll) {
+				ql_log(ql_log_warn, vha, 0xb155,
+				    "%s: TIMEOUT\n", __func__);
+				goto error;
+			}
+
+			qla8044_rd_reg_indirect(vha, addr2, &data);
+
+			*data_ptr++ = wrVal;
+			*data_ptr++ = data;
+		}
+
+	}
+
+	*d_ptr = data_ptr;
+	return QLA_SUCCESS;
+
+error:
+	return -1;
+
+}
+
+static uint32_t
+qla8044_minidump_process_rdmdio(struct scsi_qla_host *vha,
+	struct qla8044_minidump_entry_hdr *entry_hdr, uint32_t **d_ptr)
+{
+	int ret = 0;
+	uint32_t addr1, addr2, value1, value2, data, selVal;
+	uint8_t stride1, stride2;
+	uint32_t addr3, addr4, addr5, addr6, addr7;
+	uint16_t count, loop_cnt;
+	uint32_t poll, mask;
+	uint32_t *data_ptr = *d_ptr;
+
+	struct qla8044_minidump_entry_rdmdio *rdmdio;
+
+	rdmdio = (struct qla8044_minidump_entry_rdmdio *) entry_hdr;
+
+	addr1 = rdmdio->addr_1;
+	addr2 = rdmdio->addr_2;
+	value1 = rdmdio->value_1;
+	stride1 = rdmdio->stride_1;
+	stride2 = rdmdio->stride_2;
+	count = rdmdio->count;
+
+	poll = rdmdio->poll;
+	mask = rdmdio->mask;
+	value2 = rdmdio->value_2;
+
+	addr3 = addr1 + stride1;
+
+	for (loop_cnt = 0; loop_cnt < count; loop_cnt++) {
+		ret = qla8044_poll_wait_ipmdio_bus_idle(vha, addr1, addr2,
+		    addr3, mask);
+		if (ret == -1)
+			goto error;
+
+		addr4 = addr2 - stride1;
+		ret = qla8044_ipmdio_wr_reg(vha, addr1, addr3, mask, addr4,
+		    value2);
+		if (ret == -1)
+			goto error;
+
+		addr5 = addr2 - (2 * stride1);
+		ret = qla8044_ipmdio_wr_reg(vha, addr1, addr3, mask, addr5,
+		    value1);
+		if (ret == -1)
+			goto error;
+
+		addr6 = addr2 - (3 * stride1);
+		ret = qla8044_ipmdio_wr_reg(vha, addr1, addr3, mask,
+		    addr6, 0x2);
+		if (ret == -1)
+			goto error;
+
+		ret = qla8044_poll_wait_ipmdio_bus_idle(vha, addr1, addr2,
+		    addr3, mask);
+		if (ret == -1)
+			goto error;
+
+		addr7 = addr2 - (4 * stride1);
+			data = qla8044_ipmdio_rd_reg(vha, addr1, addr3,
+			    mask, addr7);
+		if (data == -1)
+			goto error;
+
+		selVal = (value2 << 18) | (value1 << 2) | 2;
+
+		stride2 = rdmdio->stride_2;
+		*data_ptr++ = selVal;
+		*data_ptr++ = data;
+
+		value1 = value1 + stride2;
+		*d_ptr = data_ptr;
+	}
+
+	return 0;
+
+error:
+	return -1;
+}
+
+static uint32_t qla8044_minidump_process_pollwr(struct scsi_qla_host *vha,
+		struct qla8044_minidump_entry_hdr *entry_hdr, uint32_t **d_ptr)
+{
+	uint32_t addr1, addr2, value1, value2, poll, mask, r_value;
+	uint32_t wait_count = 0;
+	struct qla8044_minidump_entry_pollwr *pollwr_hdr;
+
+	pollwr_hdr = (struct qla8044_minidump_entry_pollwr *)entry_hdr;
+	addr1 = pollwr_hdr->addr_1;
+	addr2 = pollwr_hdr->addr_2;
+	value1 = pollwr_hdr->value_1;
+	value2 = pollwr_hdr->value_2;
+
+	poll = pollwr_hdr->poll;
+	mask = pollwr_hdr->mask;
+
+	while (wait_count < poll) {
+		qla8044_rd_reg_indirect(vha, addr1, &r_value);
+
+		if ((r_value & poll) != 0)
+			break;
+		wait_count++;
+	}
+
+	if (wait_count == poll) {
+		ql_log(ql_log_warn, vha, 0xb156, "%s: TIMEOUT\n", __func__);
+		goto error;
+	}
+
+	qla8044_wr_reg_indirect(vha, addr2, value2);
+	qla8044_wr_reg_indirect(vha, addr1, value1);
+
+	wait_count = 0;
+	while (wait_count < poll) {
+		qla8044_rd_reg_indirect(vha, addr1, &r_value);
+
+		if ((r_value & poll) != 0)
+			break;
+		wait_count++;
+	}
+
+	return QLA_SUCCESS;
+
+error:
+	return -1;
+}
+
 /*
  *
  * qla8044_collect_md_data - Retrieve firmware minidump data.
@@ -3089,6 +3416,24 @@
 			if (rval != QLA_SUCCESS)
 				qla8044_mark_entry_skipped(vha, entry_hdr, i);
 			break;
+		case QLA8044_RDDFE:
+			rval = qla8044_minidump_process_rddfe(vha, entry_hdr,
+			    &data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla8044_mark_entry_skipped(vha, entry_hdr, i);
+			break;
+		case QLA8044_RDMDIO:
+			rval = qla8044_minidump_process_rdmdio(vha, entry_hdr,
+			    &data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla8044_mark_entry_skipped(vha, entry_hdr, i);
+			break;
+		case QLA8044_POLLWR:
+			rval = qla8044_minidump_process_pollwr(vha, entry_hdr,
+			    &data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla8044_mark_entry_skipped(vha, entry_hdr, i);
+			break;
 		case QLA82XX_RDNOP:
 		default:
 			qla8044_mark_entry_skipped(vha, entry_hdr, i);
@@ -3110,6 +3455,7 @@
 		    "Dump data mismatch: Data collected: "
 		    "[0x%x], total_data_size:[0x%x]\n",
 		    data_collected, ha->md_dump_size);
+		rval = QLA_FUNCTION_FAILED;
 		goto md_failed;
 	}
 
@@ -3134,10 +3480,12 @@
 
 	if (!qla8044_collect_md_data(vha)) {
 		ha->fw_dumped = 1;
+		ha->prev_minidump_failed = 0;
 	} else {
 		ql_log(ql_log_fatal, vha, 0xb0db,
 		    "%s: Unable to collect minidump\n",
 		    __func__);
+		ha->prev_minidump_failed = 1;
 	}
 }
 

diff --git a/drivers/scsi/qla2xxx/qla_nx2.h b/drivers/scsi/qla2xxx/qla_nx2.h
index 2ab2eab..ada3605 100644
--- a/drivers/scsi/qla2xxx/qla_nx2.h
+++ b/drivers/scsi/qla2xxx/qla_nx2.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -133,6 +133,7 @@
 #define QLA8044_LINK_SPEED(f)		(0x36E0+(((f) >> 2) * 4))
 #define QLA8044_MAX_LINK_SPEED(f)       (0x36F0+(((f) / 4) * 4))
 #define QLA8044_LINK_SPEED_FACTOR	10
+#define QLA8044_FUN7_ACTIVE_INDEX	0x80
 
 /* FLASH API Defines */
 #define QLA8044_FLASH_MAX_WAIT_USEC	100
@@ -431,6 +432,50 @@
 	uint32_t rsvd_1;
 } __packed;
 
+struct qla8044_minidump_entry_rddfe {
+	struct qla8044_minidump_entry_hdr h;
+	uint32_t addr_1;
+	uint32_t value;
+	uint8_t stride;
+	uint8_t stride2;
+	uint16_t count;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t modify_mask;
+	uint32_t data_size;
+	uint32_t rsvd;
+
+} __packed;
+
+struct qla8044_minidump_entry_rdmdio {
+	struct qla8044_minidump_entry_hdr h;
+
+	uint32_t addr_1;
+	uint32_t addr_2;
+	uint32_t value_1;
+	uint8_t stride_1;
+	uint8_t stride_2;
+	uint16_t count;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t value_2;
+	uint32_t data_size;
+
+} __packed;
+
+struct qla8044_minidump_entry_pollwr {
+	struct qla8044_minidump_entry_hdr h;
+	uint32_t addr_1;
+	uint32_t addr_2;
+	uint32_t value_1;
+	uint32_t value_2;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t data_size;
+	uint32_t rsvd;
+
+}  __packed;
+
 /* RDMUX2 Entry */
 struct qla8044_minidump_entry_rdmux2 {
 	struct qla8044_minidump_entry_hdr h;
@@ -516,6 +561,9 @@
 #define QLA8044_DBG_RSVD_ARRAY_LEN              8
 #define QLA8044_DBG_OCM_WNDREG_ARRAY_LEN        16
 #define QLA8044_SS_PCI_INDEX                    0
+#define QLA8044_RDDFE          38
+#define QLA8044_RDMDIO         39
+#define QLA8044_POLLWR         40
 
 struct qla8044_minidump_template_hdr {
 	uint32_t entry_type;

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index afc8481..d96bfb5 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -616,7 +616,7 @@
 
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
-		qla2x00_clean_dsd_pool(ha, sp);
+		qla2x00_clean_dsd_pool(ha, sp, NULL);
 		sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
 	}
 
@@ -781,7 +781,7 @@
 qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd)
 {
 #define ABORT_POLLING_PERIOD	1000
-#define ABORT_WAIT_ITER		((10 * 1000) / (ABORT_POLLING_PERIOD))
+#define ABORT_WAIT_ITER		((2 * 1000) / (ABORT_POLLING_PERIOD))
 	unsigned long wait_iter = ABORT_WAIT_ITER;
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	struct qla_hw_data *ha = vha->hw;
@@ -844,11 +844,8 @@
 }
 
 /*
- * qla2x00_wait_for_reset_ready
- *    Wait till the HBA is online after going through
- *    <= MAX_RETRIES_OF_ISP_ABORT  or
- *    finally HBA is disabled ie marked offline or flash
- *    operations are in progress.
+ * qla2x00_wait_for_hba_ready
+ * Wait till the HBA is ready before doing driver unload
  *
  * Input:
  *     ha - pointer to host adapter structure
@@ -857,35 +854,15 @@
  *    Does context switching-Release SPIN_LOCK
  *    (if any) before calling this routine.
  *
- * Return:
- *    Success (Adapter is online/no flash ops) : 0
- *    Failed  (Adapter is offline/disabled/flash ops in progress) : 1
  */
-static int
-qla2x00_wait_for_reset_ready(scsi_qla_host_t *vha)
+static void
+qla2x00_wait_for_hba_ready(scsi_qla_host_t *vha)
 {
-	int		return_status;
-	unsigned long	wait_online;
 	struct qla_hw_data *ha = vha->hw;
-	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 
-	wait_online = jiffies + (MAX_LOOP_TIMEOUT * HZ);
-	while (((test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) ||
-	    test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) ||
-	    test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) ||
-	    ha->optrom_state != QLA_SWAITING ||
-	    ha->dpc_active) && time_before(jiffies, wait_online))
+	while ((!(vha->flags.online) || ha->dpc_active ||
+	    ha->flags.mbox_busy))
 		msleep(1000);
-
-	if (base_vha->flags.online &&  ha->optrom_state == QLA_SWAITING)
-		return_status = QLA_SUCCESS;
-	else
-		return_status = QLA_FUNCTION_FAILED;
-
-	ql_dbg(ql_dbg_taskm, vha, 0x8019,
-	    "%s return status=%d.\n", __func__, return_status);
-
-	return return_status;
 }
 
 int
@@ -945,7 +922,7 @@
 	int ret;
 	unsigned int id, lun;
 	unsigned long flags;
-	int wait = 0;
+	int rval, wait = 0;
 	struct qla_hw_data *ha = vha->hw;
 
 	if (!CMD_SP(cmd))
@@ -974,10 +951,20 @@
 	sp_get(sp);
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	if (ha->isp_ops->abort_command(sp)) {
-		ret = FAILED;
+	rval = ha->isp_ops->abort_command(sp);
+	if (rval) {
+		if (rval == QLA_FUNCTION_PARAMETER_ERROR) {
+			/*
+			 * Decrement the ref_count since we can't find the
+			 * command
+			 */
+			atomic_dec(&sp->ref_count);
+			ret = SUCCESS;
+		} else
+			ret = FAILED;
+
 		ql_dbg(ql_dbg_taskm, vha, 0x8003,
-		    "Abort command mbx failed cmd=%p.\n", cmd);
+		    "Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
 	} else {
 		ql_dbg(ql_dbg_taskm, vha, 0x8004,
 		    "Abort command mbx success cmd=%p.\n", cmd);
@@ -985,6 +972,12 @@
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
+	/*
+	 * Clear the slot in the oustanding_cmds array if we can't find the
+	 * command to reclaim the resources.
+	 */
+	if (rval == QLA_FUNCTION_PARAMETER_ERROR)
+		vha->req->outstanding_cmds[sp->handle] = NULL;
 	sp->done(ha, sp, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -1236,7 +1229,11 @@
 	ql_log(ql_log_info, vha, 0x8018,
 	    "ADAPTER RESET ISSUED nexus=%ld:%d:%d.\n", vha->host_no, id, lun);
 
-	if (qla2x00_wait_for_reset_ready(vha) != QLA_SUCCESS)
+	/*
+	 * No point in issuing another reset if one is active.  Also do not
+	 * attempt a reset if we are updating flash.
+	 */
+	if (qla2x00_reset_active(vha) || ha->optrom_state != QLA_SWAITING)
 		goto eh_host_reset_lock;
 
 	if (vha != base_vha) {
@@ -2270,6 +2267,13 @@
 		ha->device_type |= DT_IIDMA;
 		ha->fw_srisc_address = RISC_START_ADDRESS_2400;
 		break;
+	case PCI_DEVICE_ID_QLOGIC_ISP2271:
+		ha->device_type |= DT_ISP2271;
+		ha->device_type |= DT_ZIO_SUPPORTED;
+		ha->device_type |= DT_FWI2;
+		ha->device_type |= DT_IIDMA;
+		ha->fw_srisc_address = RISC_START_ADDRESS_2400;
+		break;
 	}
 
 	if (IS_QLA82XX(ha))
@@ -2346,7 +2350,8 @@
 	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISP8031 ||
 	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISPF001 ||
 	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISP8044 ||
-	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISP2071) {
+	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISP2071 ||
+	    pdev->device == PCI_DEVICE_ID_QLOGIC_ISP2271) {
 		bars = pci_select_bars(pdev, IORESOURCE_MEM);
 		mem_only = 1;
 		ql_dbg_pci(ql_dbg_init, pdev, 0x0007,
@@ -2877,6 +2882,7 @@
 
 	base_vha->flags.init_done = 1;
 	base_vha->flags.online = 1;
+	ha->prev_minidump_failed = 0;
 
 	ql_dbg(ql_dbg_init, base_vha, 0x00f2,
 	    "Init done and hba is online.\n");
@@ -3136,6 +3142,8 @@
 	base_vha = pci_get_drvdata(pdev);
 	ha = base_vha->hw;
 
+	qla2x00_wait_for_hba_ready(base_vha);
+
 	set_bit(UNLOADING, &base_vha->dpc_flags);
 
 	if (IS_QLAFX00(ha))
@@ -3645,6 +3653,7 @@
 	ha->eft = NULL;
 	ha->eft_dma = 0;
 	ha->fw_dumped = 0;
+	ha->fw_dump_cap_flags = 0;
 	ha->fw_dump_reading = 0;
 	ha->fw_dump = NULL;
 	ha->fw_dump_len = 0;
@@ -4913,12 +4922,13 @@
 				if (qlafx00_reset_initialize(base_vha)) {
 					/* Failed. Abort isp later. */
 					if (!test_bit(UNLOADING,
-					    &base_vha->dpc_flags))
+					    &base_vha->dpc_flags)) {
 						set_bit(ISP_UNRECOVERABLE,
 						    &base_vha->dpc_flags);
 						ql_dbg(ql_dbg_dpc, base_vha,
 						    0x4021,
 						    "Reset Recovery Failed\n");
+					}
 				}
 			}
 
@@ -5077,8 +5087,10 @@
 			ha->isp_ops->enable_intrs(ha);
 
 		if (test_and_clear_bit(BEACON_BLINK_NEEDED,
-					&base_vha->dpc_flags))
-			ha->isp_ops->beacon_blink(base_vha);
+					&base_vha->dpc_flags)) {
+			if (ha->beacon_blink_led == 1)
+				ha->isp_ops->beacon_blink(base_vha);
+		}
 
 		if (!IS_QLAFX00(ha))
 			qla2x00_do_dpc_all_vps(base_vha);
@@ -5325,7 +5337,7 @@
 #define FW_ISP82XX	7
 #define FW_ISP2031	8
 #define FW_ISP8031	9
-#define FW_ISP2071	10
+#define FW_ISP27XX	10
 
 #define FW_FILE_ISP21XX	"ql2100_fw.bin"
 #define FW_FILE_ISP22XX	"ql2200_fw.bin"
@@ -5337,7 +5349,7 @@
 #define FW_FILE_ISP82XX	"ql8200_fw.bin"
 #define FW_FILE_ISP2031	"ql2600_fw.bin"
 #define FW_FILE_ISP8031	"ql8300_fw.bin"
-#define FW_FILE_ISP2071	"ql2700_fw.bin"
+#define FW_FILE_ISP27XX	"ql2700_fw.bin"
 
 
 static DEFINE_MUTEX(qla_fw_lock);
@@ -5353,7 +5365,7 @@
 	{ .name = FW_FILE_ISP82XX, },
 	{ .name = FW_FILE_ISP2031, },
 	{ .name = FW_FILE_ISP8031, },
-	{ .name = FW_FILE_ISP2071, },
+	{ .name = FW_FILE_ISP27XX, },
 };
 
 struct fw_blob *
@@ -5382,8 +5394,8 @@
 		blob = &qla_fw_blobs[FW_ISP2031];
 	} else if (IS_QLA8031(ha)) {
 		blob = &qla_fw_blobs[FW_ISP8031];
-	} else if (IS_QLA2071(ha)) {
-		blob = &qla_fw_blobs[FW_ISP2071];
+	} else if (IS_QLA27XX(ha)) {
+		blob = &qla_fw_blobs[FW_ISP27XX];
 	} else {
 		return NULL;
 	}
@@ -5714,6 +5726,7 @@
 	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISPF001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8044) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2071) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2271) },
 	{ 0 },
 };
 MODULE_DEVICE_TABLE(pci, qla2xxx_pci_tbl);

diff --git a/drivers/scsi/qla2xxx/qla_settings.h b/drivers/scsi/qla2xxx/qla_settings.h
index 46ef0ac..2fb7ebf 100644
--- a/drivers/scsi/qla2xxx/qla_settings.h
+++ b/drivers/scsi/qla2xxx/qla_settings.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */

diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index f28123e..bca173e 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1727,11 +1727,8 @@
 	if (IS_QLA2031(ha)) {
 		led_select_value = qla83xx_select_led_port(ha);
 
-		qla83xx_wr_reg(vha, led_select_value, 0x40002000);
-		qla83xx_wr_reg(vha, led_select_value + 4, 0x40002000);
-		msleep(1000);
-		qla83xx_wr_reg(vha, led_select_value, 0x40004000);
-		qla83xx_wr_reg(vha, led_select_value + 4, 0x40004000);
+		qla83xx_wr_reg(vha, led_select_value, 0x40000230);
+		qla83xx_wr_reg(vha, led_select_value + 4, 0x40000230);
 	} else if (IS_QLA8031(ha)) {
 		led_select_value = qla83xx_select_led_port(ha);
 

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0cb7307..4b188b0 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -104,7 +104,6 @@
 /*
  * Global Variables
  */
-static struct kmem_cache *qla_tgt_cmd_cachep;
 static struct kmem_cache *qla_tgt_mgmt_cmd_cachep;
 static mempool_t *qla_tgt_mgmt_cmd_mempool;
 static struct workqueue_struct *qla_tgt_wq;
@@ -182,6 +181,11 @@
 void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio)
 {
+	ql_dbg(ql_dbg_tgt, vha, 0xe072,
+		"%s: qla_target(%d): type %x ox_id %04x\n",
+		__func__, vha->vp_idx, atio->u.raw.entry_type,
+		be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id));
+
 	switch (atio->u.raw.entry_type) {
 	case ATIO_TYPE7:
 	{
@@ -236,6 +240,10 @@
 void qlt_response_pkt_all_vps(struct scsi_qla_host *vha, response_t *pkt)
 {
 	switch (pkt->entry_type) {
+	case CTIO_CRC2:
+		ql_dbg(ql_dbg_tgt, vha, 0xe073,
+			"qla_target(%d):%s: CRC2 Response pkt\n",
+			vha->vp_idx, __func__);
 	case CTIO_TYPE7:
 	{
 		struct ctio7_from_24xx *entry = (struct ctio7_from_24xx *)pkt;
@@ -1350,13 +1358,42 @@
 
 	prm->cmd->sg_mapped = 1;
 
-	/*
-	 * If greater than four sg entries then we need to allocate
-	 * the continuation entries
-	 */
-	if (prm->seg_cnt > prm->tgt->datasegs_per_cmd)
-		prm->req_cnt += DIV_ROUND_UP(prm->seg_cnt -
-		    prm->tgt->datasegs_per_cmd, prm->tgt->datasegs_per_cont);
+	if (cmd->se_cmd.prot_op == TARGET_PROT_NORMAL) {
+		/*
+		 * If greater than four sg entries then we need to allocate
+		 * the continuation entries
+		 */
+		if (prm->seg_cnt > prm->tgt->datasegs_per_cmd)
+			prm->req_cnt += DIV_ROUND_UP(prm->seg_cnt -
+			prm->tgt->datasegs_per_cmd,
+			prm->tgt->datasegs_per_cont);
+	} else {
+		/* DIF */
+		if ((cmd->se_cmd.prot_op == TARGET_PROT_DIN_INSERT) ||
+		    (cmd->se_cmd.prot_op == TARGET_PROT_DOUT_STRIP)) {
+			prm->seg_cnt = DIV_ROUND_UP(cmd->bufflen, cmd->blk_sz);
+			prm->tot_dsds = prm->seg_cnt;
+		} else
+			prm->tot_dsds = prm->seg_cnt;
+
+		if (cmd->prot_sg_cnt) {
+			prm->prot_sg      = cmd->prot_sg;
+			prm->prot_seg_cnt = pci_map_sg(prm->tgt->ha->pdev,
+				cmd->prot_sg, cmd->prot_sg_cnt,
+				cmd->dma_data_direction);
+			if (unlikely(prm->prot_seg_cnt == 0))
+				goto out_err;
+
+			if ((cmd->se_cmd.prot_op == TARGET_PROT_DIN_INSERT) ||
+			    (cmd->se_cmd.prot_op == TARGET_PROT_DOUT_STRIP)) {
+				/* Dif Bundling not support here */
+				prm->prot_seg_cnt = DIV_ROUND_UP(cmd->bufflen,
+								cmd->blk_sz);
+				prm->tot_dsds += prm->prot_seg_cnt;
+			} else
+				prm->tot_dsds += prm->prot_seg_cnt;
+		}
+	}
 
 	ql_dbg(ql_dbg_tgt, prm->cmd->vha, 0xe009, "seg_cnt=%d, req_cnt=%d\n",
 	    prm->seg_cnt, prm->req_cnt);
@@ -1377,6 +1414,16 @@
 	BUG_ON(!cmd->sg_mapped);
 	pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction);
 	cmd->sg_mapped = 0;
+
+	if (cmd->prot_sg_cnt)
+		pci_unmap_sg(ha->pdev, cmd->prot_sg, cmd->prot_sg_cnt,
+			cmd->dma_data_direction);
+
+	if (cmd->ctx_dsd_alloced)
+		qla2x00_clean_dsd_pool(ha, NULL, cmd);
+
+	if (cmd->ctx)
+		dma_pool_free(ha->dl_dma_pool, cmd->ctx, cmd->ctx->crc_ctx_dma);
 }
 
 static int qlt_check_reserve_free_req(struct scsi_qla_host *vha,
@@ -1665,8 +1712,9 @@
 		return QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED;
 	}
 
-	ql_dbg(ql_dbg_tgt, vha, 0xe011, "qla_target(%d): tag=%u\n",
-	    vha->vp_idx, cmd->tag);
+	ql_dbg(ql_dbg_tgt, vha, 0xe011, "qla_target(%d): tag=%u ox_id %04x\n",
+		vha->vp_idx, cmd->tag,
+		be16_to_cpu(cmd->atio.u.isp24.fcp_hdr.ox_id));
 
 	prm->cmd = cmd;
 	prm->tgt = tgt;
@@ -1902,6 +1950,328 @@
 	/* Sense with len > 24, is it possible ??? */
 }
 
+
+
+/* diff  */
+static inline int
+qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
+{
+	/*
+	 * Uncomment when corresponding SCSI changes are done.
+	 *
+	 if (!sp->cmd->prot_chk)
+	 return 0;
+	 *
+	 */
+	switch (se_cmd->prot_op) {
+	case TARGET_PROT_DOUT_INSERT:
+	case TARGET_PROT_DIN_STRIP:
+		if (ql2xenablehba_err_chk >= 1)
+			return 1;
+		break;
+	case TARGET_PROT_DOUT_PASS:
+	case TARGET_PROT_DIN_PASS:
+		if (ql2xenablehba_err_chk >= 2)
+			return 1;
+		break;
+	case TARGET_PROT_DIN_INSERT:
+	case TARGET_PROT_DOUT_STRIP:
+		return 1;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
+ *
+ */
+static inline void
+qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+{
+	uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+
+	/* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+	 * have been immplemented by TCM, before AppTag is avail.
+	 * Look for modesense_handlers[]
+	 */
+	ctx->app_tag = 0;
+	ctx->app_tag_mask[0] = 0x0;
+	ctx->app_tag_mask[1] = 0x0;
+
+	switch (se_cmd->prot_type) {
+	case TARGET_DIF_TYPE0_PROT:
+		/*
+		 * No check for ql2xenablehba_err_chk, as it would be an
+		 * I/O error if hba tag generation is not done.
+		 */
+		ctx->ref_tag = cpu_to_le32(lba);
+
+		if (!qlt_hba_err_chk_enabled(se_cmd))
+			break;
+
+		/* enable ALL bytes of the ref tag */
+		ctx->ref_tag_mask[0] = 0xff;
+		ctx->ref_tag_mask[1] = 0xff;
+		ctx->ref_tag_mask[2] = 0xff;
+		ctx->ref_tag_mask[3] = 0xff;
+		break;
+	/*
+	 * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
+	 * 16 bit app tag.
+	 */
+	case TARGET_DIF_TYPE1_PROT:
+		ctx->ref_tag = cpu_to_le32(lba);
+
+		if (!qlt_hba_err_chk_enabled(se_cmd))
+			break;
+
+		/* enable ALL bytes of the ref tag */
+		ctx->ref_tag_mask[0] = 0xff;
+		ctx->ref_tag_mask[1] = 0xff;
+		ctx->ref_tag_mask[2] = 0xff;
+		ctx->ref_tag_mask[3] = 0xff;
+		break;
+	/*
+	 * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
+	 * match LBA in CDB + N
+	 */
+	case TARGET_DIF_TYPE2_PROT:
+		ctx->ref_tag = cpu_to_le32(lba);
+
+		if (!qlt_hba_err_chk_enabled(se_cmd))
+			break;
+
+		/* enable ALL bytes of the ref tag */
+		ctx->ref_tag_mask[0] = 0xff;
+		ctx->ref_tag_mask[1] = 0xff;
+		ctx->ref_tag_mask[2] = 0xff;
+		ctx->ref_tag_mask[3] = 0xff;
+		break;
+
+	/* For Type 3 protection: 16 bit GUARD only */
+	case TARGET_DIF_TYPE3_PROT:
+		ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+			ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+		break;
+	}
+}
+
+
+static inline int
+qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
+{
+	uint32_t		*cur_dsd;
+	int			sgc;
+	uint32_t		transfer_length = 0;
+	uint32_t		data_bytes;
+	uint32_t		dif_bytes;
+	uint8_t			bundling = 1;
+	uint8_t			*clr_ptr;
+	struct crc_context	*crc_ctx_pkt = NULL;
+	struct qla_hw_data	*ha;
+	struct ctio_crc2_to_fw	*pkt;
+	dma_addr_t		crc_ctx_dma;
+	uint16_t		fw_prot_opts = 0;
+	struct qla_tgt_cmd	*cmd = prm->cmd;
+	struct se_cmd		*se_cmd = &cmd->se_cmd;
+	uint32_t h;
+	struct atio_from_isp *atio = &prm->cmd->atio;
+	uint16_t t16;
+
+	sgc = 0;
+	ha = vha->hw;
+
+	pkt = (struct ctio_crc2_to_fw *)vha->req->ring_ptr;
+	prm->pkt = pkt;
+	memset(pkt, 0, sizeof(*pkt));
+
+	ql_dbg(ql_dbg_tgt, vha, 0xe071,
+		"qla_target(%d):%s: se_cmd[%p] CRC2 prot_op[0x%x] cmd prot sg:cnt[%p:%x] lba[%llu]\n",
+		vha->vp_idx, __func__, se_cmd, se_cmd->prot_op,
+		prm->prot_sg, prm->prot_seg_cnt, se_cmd->t_task_lba);
+
+	if ((se_cmd->prot_op == TARGET_PROT_DIN_INSERT) ||
+	    (se_cmd->prot_op == TARGET_PROT_DOUT_STRIP))
+		bundling = 0;
+
+	/* Compute dif len and adjust data len to incude protection */
+	data_bytes = cmd->bufflen;
+	dif_bytes  = (data_bytes / cmd->blk_sz) * 8;
+
+	switch (se_cmd->prot_op) {
+	case TARGET_PROT_DIN_INSERT:
+	case TARGET_PROT_DOUT_STRIP:
+		transfer_length = data_bytes;
+		data_bytes += dif_bytes;
+		break;
+
+	case TARGET_PROT_DIN_STRIP:
+	case TARGET_PROT_DOUT_INSERT:
+	case TARGET_PROT_DIN_PASS:
+	case TARGET_PROT_DOUT_PASS:
+		transfer_length = data_bytes + dif_bytes;
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+
+	if (!qlt_hba_err_chk_enabled(se_cmd))
+		fw_prot_opts |= 0x10; /* Disable Guard tag checking */
+	/* HBA error checking enabled */
+	else if (IS_PI_UNINIT_CAPABLE(ha)) {
+		if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+		    (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+			fw_prot_opts |= PO_DIS_VALD_APP_ESC;
+		else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+			fw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+	}
+
+	switch (se_cmd->prot_op) {
+	case TARGET_PROT_DIN_INSERT:
+	case TARGET_PROT_DOUT_INSERT:
+		fw_prot_opts |= PO_MODE_DIF_INSERT;
+		break;
+	case TARGET_PROT_DIN_STRIP:
+	case TARGET_PROT_DOUT_STRIP:
+		fw_prot_opts |= PO_MODE_DIF_REMOVE;
+		break;
+	case TARGET_PROT_DIN_PASS:
+	case TARGET_PROT_DOUT_PASS:
+		fw_prot_opts |= PO_MODE_DIF_PASS;
+		/* FUTURE: does tcm require T10CRC<->IPCKSUM conversion? */
+		break;
+	default:/* Normal Request */
+		fw_prot_opts |= PO_MODE_DIF_PASS;
+		break;
+	}
+
+
+	/* ---- PKT ---- */
+	/* Update entry type to indicate Command Type CRC_2 IOCB */
+	pkt->entry_type  = CTIO_CRC2;
+	pkt->entry_count = 1;
+	pkt->vp_index = vha->vp_idx;
+
+	h = qlt_make_handle(vha);
+	if (unlikely(h == QLA_TGT_NULL_HANDLE)) {
+		/*
+		 * CTIO type 7 from the firmware doesn't provide a way to
+		 * know the initiator's LOOP ID, hence we can't find
+		 * the session and, so, the command.
+		 */
+		return -EAGAIN;
+	} else
+		ha->tgt.cmds[h-1] = prm->cmd;
+
+
+	pkt->handle  = h | CTIO_COMPLETION_HANDLE_MARK;
+	pkt->nport_handle = prm->cmd->loop_id;
+	pkt->timeout = __constant_cpu_to_le16(QLA_TGT_TIMEOUT);
+	pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+	pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+	pkt->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+	pkt->exchange_addr   = atio->u.isp24.exchange_addr;
+
+	/* silence compile warning */
+	t16 = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+	pkt->ox_id  = cpu_to_le16(t16);
+
+	t16 = (atio->u.isp24.attr << 9);
+	pkt->flags |= cpu_to_le16(t16);
+	pkt->relative_offset = cpu_to_le32(prm->cmd->offset);
+
+	/* Set transfer direction */
+	if (cmd->dma_data_direction == DMA_TO_DEVICE)
+		pkt->flags = __constant_cpu_to_le16(CTIO7_FLAGS_DATA_IN);
+	else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
+		pkt->flags = __constant_cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
+
+
+	pkt->dseg_count = prm->tot_dsds;
+	/* Fibre channel byte count */
+	pkt->transfer_length = cpu_to_le32(transfer_length);
+
+
+	/* ----- CRC context -------- */
+
+	/* Allocate CRC context from global pool */
+	crc_ctx_pkt = cmd->ctx =
+	    dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC, &crc_ctx_dma);
+
+	if (!crc_ctx_pkt)
+		goto crc_queuing_error;
+
+	/* Zero out CTX area. */
+	clr_ptr = (uint8_t *)crc_ctx_pkt;
+	memset(clr_ptr, 0, sizeof(*crc_ctx_pkt));
+
+	crc_ctx_pkt->crc_ctx_dma = crc_ctx_dma;
+	INIT_LIST_HEAD(&crc_ctx_pkt->dsd_list);
+
+	/* Set handle */
+	crc_ctx_pkt->handle = pkt->handle;
+
+	qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+
+	pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
+	pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
+	pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
+
+
+	if (!bundling) {
+		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
+	} else {
+		/*
+		 * Configure Bundling if we need to fetch interlaving
+		 * protection PCI accesses
+		 */
+		fw_prot_opts |= PO_ENABLE_DIF_BUNDLING;
+		crc_ctx_pkt->u.bundling.dif_byte_count = cpu_to_le32(dif_bytes);
+		crc_ctx_pkt->u.bundling.dseg_count =
+			cpu_to_le16(prm->tot_dsds - prm->prot_seg_cnt);
+		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.data_address;
+	}
+
+	/* Finish the common fields of CRC pkt */
+	crc_ctx_pkt->blk_size   = cpu_to_le16(cmd->blk_sz);
+	crc_ctx_pkt->prot_opts  = cpu_to_le16(fw_prot_opts);
+	crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
+	crc_ctx_pkt->guard_seed = __constant_cpu_to_le16(0);
+
+
+	/* Walks data segments */
+	pkt->flags |= __constant_cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
+
+	if (!bundling && prm->prot_seg_cnt) {
+		if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
+			prm->tot_dsds, cmd))
+			goto crc_queuing_error;
+	} else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
+		(prm->tot_dsds - prm->prot_seg_cnt), cmd))
+		goto crc_queuing_error;
+
+	if (bundling && prm->prot_seg_cnt) {
+		/* Walks dif segments */
+		pkt->add_flags |= CTIO_CRC2_AF_DIF_DSD_ENA;
+
+		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
+		if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
+			prm->prot_seg_cnt, cmd))
+			goto crc_queuing_error;
+	}
+	return QLA_SUCCESS;
+
+crc_queuing_error:
+	/* Cleanup will be performed by the caller */
+
+	return QLA_FUNCTION_FAILED;
+}
+
+
 /*
  * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
  * QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -1921,9 +2291,10 @@
 	qlt_check_srr_debug(cmd, &xmit_type);
 
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe018,
-	    "is_send_status=%d, cmd->bufflen=%d, cmd->sg_cnt=%d, "
-	    "cmd->dma_data_direction=%d\n", (xmit_type & QLA_TGT_XMIT_STATUS) ?
-	    1 : 0, cmd->bufflen, cmd->sg_cnt, cmd->dma_data_direction);
+	    "is_send_status=%d, cmd->bufflen=%d, cmd->sg_cnt=%d, cmd->dma_data_direction=%d se_cmd[%p]\n",
+	    (xmit_type & QLA_TGT_XMIT_STATUS) ?
+	    1 : 0, cmd->bufflen, cmd->sg_cnt, cmd->dma_data_direction,
+	    &cmd->se_cmd);
 
 	res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status,
 	    &full_req_cnt);
@@ -1941,7 +2312,10 @@
 	if (unlikely(res))
 		goto out_unmap_unlock;
 
-	res = qlt_24xx_build_ctio_pkt(&prm, vha);
+	if (cmd->se_cmd.prot_op && (xmit_type & QLA_TGT_XMIT_DATA))
+		res = qlt_build_ctio_crc2_pkt(&prm, vha);
+	else
+		res = qlt_24xx_build_ctio_pkt(&prm, vha);
 	if (unlikely(res != 0))
 		goto out_unmap_unlock;
 
@@ -1953,7 +2327,8 @@
 		    __constant_cpu_to_le16(CTIO7_FLAGS_DATA_IN |
 			CTIO7_FLAGS_STATUS_MODE_0);
 
-		qlt_load_data_segments(&prm, vha);
+		if (cmd->se_cmd.prot_op == TARGET_PROT_NORMAL)
+			qlt_load_data_segments(&prm, vha);
 
 		if (prm.add_status_pkt == 0) {
 			if (xmit_type & QLA_TGT_XMIT_STATUS) {
@@ -1983,8 +2358,14 @@
 			ql_dbg(ql_dbg_tgt, vha, 0xe019,
 			    "Building additional status packet\n");
 
+			/*
+			 * T10Dif: ctio_crc2_to_fw overlay ontop of
+			 * ctio7_to_24xx
+			 */
 			memcpy(ctio, pkt, sizeof(*ctio));
+			/* reset back to CTIO7 */
 			ctio->entry_count = 1;
+			ctio->entry_type = CTIO_TYPE7;
 			ctio->dseg_count = 0;
 			ctio->u.status1.flags &= ~__constant_cpu_to_le16(
 			    CTIO7_FLAGS_DATA_IN);
@@ -1993,6 +2374,11 @@
 			pkt->handle |= CTIO_INTERMEDIATE_HANDLE_MARK;
 			pkt->u.status0.flags |= __constant_cpu_to_le16(
 			    CTIO7_FLAGS_DONT_RET_CTIO);
+
+			/* qlt_24xx_init_ctio_to_isp will correct
+			 * all neccessary fields that's part of CTIO7.
+			 * There should be no residual of CTIO-CRC2 data.
+			 */
 			qlt_24xx_init_ctio_to_isp((struct ctio7_to_24xx *)ctio,
 			    &prm);
 			pr_debug("Status CTIO7: %p\n", ctio);
@@ -2041,8 +2427,10 @@
 	if (qlt_issue_marker(vha, 0) != QLA_SUCCESS)
 		return -EIO;
 
-	ql_dbg(ql_dbg_tgt, vha, 0xe01b, "CTIO_start: vha(%d)",
-	    (int)vha->vp_idx);
+	ql_dbg(ql_dbg_tgt, vha, 0xe01b,
+		"%s: CTIO_start: vha(%d) se_cmd %p ox_id %04x\n",
+		__func__, (int)vha->vp_idx, &cmd->se_cmd,
+		be16_to_cpu(cmd->atio.u.isp24.fcp_hdr.ox_id));
 
 	/* Calculate number of entries and segments required */
 	if (qlt_pci_map_calc_cnt(&prm) != 0)
@@ -2054,14 +2442,19 @@
 	res = qlt_check_reserve_free_req(vha, prm.req_cnt);
 	if (res != 0)
 		goto out_unlock_free_unmap;
+	if (cmd->se_cmd.prot_op)
+		res = qlt_build_ctio_crc2_pkt(&prm, vha);
+	else
+		res = qlt_24xx_build_ctio_pkt(&prm, vha);
 
-	res = qlt_24xx_build_ctio_pkt(&prm, vha);
 	if (unlikely(res != 0))
 		goto out_unlock_free_unmap;
 	pkt = (struct ctio7_to_24xx *)prm.pkt;
 	pkt->u.status0.flags |= __constant_cpu_to_le16(CTIO7_FLAGS_DATA_OUT |
 	    CTIO7_FLAGS_STATUS_MODE_0);
-	qlt_load_data_segments(&prm, vha);
+
+	if (cmd->se_cmd.prot_op == TARGET_PROT_NORMAL)
+		qlt_load_data_segments(&prm, vha);
 
 	cmd->state = QLA_TGT_STATE_NEED_DATA;
 
@@ -2079,6 +2472,143 @@
 }
 EXPORT_SYMBOL(qlt_rdy_to_xfer);
 
+
+/*
+ * Checks the guard or meta-data for the type of error
+ * detected by the HBA.
+ */
+static inline int
+qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
+		struct ctio_crc_from_fw *sts)
+{
+	uint8_t		*ap = &sts->actual_dif[0];
+	uint8_t		*ep = &sts->expected_dif[0];
+	uint32_t	e_ref_tag, a_ref_tag;
+	uint16_t	e_app_tag, a_app_tag;
+	uint16_t	e_guard, a_guard;
+	uint64_t	lba = cmd->se_cmd.t_task_lba;
+
+	a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
+	a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+	a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
+
+	e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
+	e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+	e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
+
+	ql_dbg(ql_dbg_tgt, vha, 0xe075,
+	    "iocb(s) %p Returned STATUS.\n", sts);
+
+	ql_dbg(ql_dbg_tgt, vha, 0xf075,
+	    "dif check TGT cdb 0x%x lba 0x%llu: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
+	    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
+	    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
+
+	/*
+	 * Ignore sector if:
+	 * For type     3: ref & app tag is all 'f's
+	 * For type 0,1,2: app tag is all 'f's
+	 */
+	if ((a_app_tag == 0xffff) &&
+	    ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
+	     (a_ref_tag == 0xffffffff))) {
+		uint32_t blocks_done;
+
+		/* 2TB boundary case covered automatically with this */
+		blocks_done = e_ref_tag - (uint32_t)lba + 1;
+		cmd->se_cmd.bad_sector = e_ref_tag;
+		cmd->se_cmd.pi_err = 0;
+		ql_dbg(ql_dbg_tgt, vha, 0xf074,
+			"need to return scsi good\n");
+
+		/* Update protection tag */
+		if (cmd->prot_sg_cnt) {
+			uint32_t i, j = 0, k = 0, num_ent;
+			struct scatterlist *sg, *sgl;
+
+
+			sgl = cmd->prot_sg;
+
+			/* Patch the corresponding protection tags */
+			for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
+				num_ent = sg_dma_len(sg) / 8;
+				if (k + num_ent < blocks_done) {
+					k += num_ent;
+					continue;
+				}
+				j = blocks_done - k - 1;
+				k = blocks_done;
+				break;
+			}
+
+			if (k != blocks_done) {
+				ql_log(ql_log_warn, vha, 0xf076,
+				    "unexpected tag values tag:lba=%u:%llu)\n",
+				    e_ref_tag, (unsigned long long)lba);
+				goto out;
+			}
+
+#if 0
+			struct sd_dif_tuple *spt;
+			/* TODO:
+			 * This section came from initiator. Is it valid here?
+			 * should ulp be override with actual val???
+			 */
+			spt = page_address(sg_page(sg)) + sg->offset;
+			spt += j;
+
+			spt->app_tag = 0xffff;
+			if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
+				spt->ref_tag = 0xffffffff;
+#endif
+		}
+
+		return 0;
+	}
+
+	/* check guard */
+	if (e_guard != a_guard) {
+		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
+
+		ql_log(ql_log_warn, vha, 0xe076,
+		    "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
+		    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
+		    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
+		    a_guard, e_guard, cmd);
+		goto out;
+	}
+
+	/* check ref tag */
+	if (e_ref_tag != a_ref_tag) {
+		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+		cmd->se_cmd.bad_sector = e_ref_tag;
+
+		ql_log(ql_log_warn, vha, 0xe077,
+			"Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
+			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
+			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
+			a_guard, e_guard, cmd);
+		goto out;
+	}
+
+	/* check appl tag */
+	if (e_app_tag != a_app_tag) {
+		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
+
+		ql_log(ql_log_warn, vha, 0xe078,
+			"App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
+			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
+			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
+			a_guard, e_guard, cmd);
+		goto out;
+	}
+out:
+	return 1;
+}
+
+
 /* If hardware_lock held on entry, might drop it, then reaquire */
 /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
 static int __qlt_send_term_exchange(struct scsi_qla_host *vha,
@@ -2155,21 +2685,46 @@
 	rc = __qlt_send_term_exchange(vha, cmd, atio);
 	spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
 done:
-	if (rc == 1) {
+	/*
+	 * Terminate exchange will tell fw to release any active CTIO
+	 * that's in FW posession and cleanup the exchange.
+	 *
+	 * "cmd->state == QLA_TGT_STATE_ABORTED" means CTIO is still
+	 * down at FW.  Free the cmd later when CTIO comes back later
+	 * w/aborted(0x2) status.
+	 *
+	 * "cmd->state != QLA_TGT_STATE_ABORTED" means CTIO is already
+	 * back w/some err.  Free the cmd now.
+	 */
+	if ((rc == 1) && (cmd->state != QLA_TGT_STATE_ABORTED)) {
 		if (!ha_locked && !in_interrupt())
 			msleep(250); /* just in case */
 
+		if (cmd->sg_mapped)
+			qlt_unmap_sg(vha, cmd);
 		vha->hw->tgt.tgt_ops->free_cmd(cmd);
 	}
+	return;
 }
 
 void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 {
-	BUG_ON(cmd->sg_mapped);
+	struct qla_tgt_sess *sess = cmd->sess;
 
+	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe074,
+	    "%s: se_cmd[%p] ox_id %04x\n",
+	    __func__, &cmd->se_cmd,
+	    be16_to_cpu(cmd->atio.u.isp24.fcp_hdr.ox_id));
+
+	BUG_ON(cmd->sg_mapped);
 	if (unlikely(cmd->free_sg))
 		kfree(cmd->sg);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
+
+	if (!sess || !sess->se_sess) {
+		WARN_ON(1);
+		return;
+	}
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
@@ -2374,6 +2929,7 @@
 		case CTIO_LIP_RESET:
 		case CTIO_TARGET_RESET:
 		case CTIO_ABORTED:
+			/* driver request abort via Terminate exchange */
 		case CTIO_TIMEOUT:
 		case CTIO_INVALID_RX_ID:
 			/* They are OK */
@@ -2404,18 +2960,58 @@
 			else
 				return;
 
+		case CTIO_DIF_ERROR: {
+			struct ctio_crc_from_fw *crc =
+				(struct ctio_crc_from_fw *)ctio;
+			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
+			    "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+			    vha->vp_idx, status, cmd->state, se_cmd,
+			    *((u64 *)&crc->actual_dif[0]),
+			    *((u64 *)&crc->expected_dif[0]));
+
+			if (qlt_handle_dif_error(vha, cmd, ctio)) {
+				if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
+					/* scsi Write/xfer rdy complete */
+					goto skip_term;
+				} else {
+					/* scsi read/xmit respond complete
+					 * call handle dif to send scsi status
+					 * rather than terminate exchange.
+					 */
+					cmd->state = QLA_TGT_STATE_PROCESSED;
+					ha->tgt.tgt_ops->handle_dif_err(cmd);
+					return;
+				}
+			} else {
+				/* Need to generate a SCSI good completion.
+				 * because FW did not send scsi status.
+				 */
+				status = 0;
+				goto skip_term;
+			}
+			break;
+		}
 		default:
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
-			    "qla_target(%d): CTIO with error status "
-			    "0x%x received (state %x, se_cmd %p\n",
+			    "qla_target(%d): CTIO with error status 0x%x received (state %x, se_cmd %p\n",
 			    vha->vp_idx, status, cmd->state, se_cmd);
 			break;
 		}
 
-		if (cmd->state != QLA_TGT_STATE_NEED_DATA)
+
+		/* "cmd->state == QLA_TGT_STATE_ABORTED" means
+		 * cmd is already aborted/terminated, we don't
+		 * need to terminate again.  The exchange is already
+		 * cleaned up/freed at FW level.  Just cleanup at driver
+		 * level.
+		 */
+		if ((cmd->state != QLA_TGT_STATE_NEED_DATA) &&
+			(cmd->state != QLA_TGT_STATE_ABORTED)) {
 			if (qlt_term_ctio_exchange(vha, ctio, cmd, status))
 				return;
+		}
 	}
+skip_term:
 
 	if (cmd->state == QLA_TGT_STATE_PROCESSED) {
 		ql_dbg(ql_dbg_tgt, vha, 0xe01f, "Command %p finished\n", cmd);
@@ -2444,7 +3040,8 @@
 		    "not return a CTIO complete\n", vha->vp_idx, cmd->state);
 	}
 
-	if (unlikely(status != CTIO_SUCCESS)) {
+	if (unlikely(status != CTIO_SUCCESS) &&
+		(cmd->state != QLA_TGT_STATE_ABORTED)) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01f, "Finishing failed CTIO\n");
 		dump_stack();
 	}
@@ -2489,13 +3086,12 @@
 /*
  * Process context for I/O path into tcm_qla2xxx code
  */
-static void qlt_do_work(struct work_struct *work)
+static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 {
-	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
 	scsi_qla_host_t *vha = cmd->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess = NULL;
+	struct qla_tgt_sess *sess = cmd->sess;
 	struct atio_from_isp *atio = &cmd->atio;
 	unsigned char *cdb;
 	unsigned long flags;
@@ -2505,41 +3101,6 @@
 	if (tgt->tgt_stop)
 		goto out_term;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha,
-	    atio->u.isp24.fcp_hdr.s_id);
-	/* Do kref_get() before dropping qla_hw_data->hardware_lock. */
-	if (sess)
-		kref_get(&sess->se_sess->sess_kref);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	if (unlikely(!sess)) {
-		uint8_t *s_id =	atio->u.isp24.fcp_hdr.s_id;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
-			"qla_target(%d): Unable to find wwn login"
-			" (s_id %x:%x:%x), trying to create it manually\n",
-			vha->vp_idx, s_id[0], s_id[1], s_id[2]);
-
-		if (atio->u.raw.entry_count > 1) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
-				"Dropping multy entry cmd %p\n", cmd);
-			goto out_term;
-		}
-
-		mutex_lock(&vha->vha_tgt.tgt_mutex);
-		sess = qlt_make_local_sess(vha, s_id);
-		/* sess has an extra creation ref. */
-		mutex_unlock(&vha->vha_tgt.tgt_mutex);
-
-		if (!sess)
-			goto out_term;
-	}
-
-	cmd->sess = sess;
-	cmd->loop_id = sess->loop_id;
-	cmd->conf_compl_supported = sess->conf_compl_supported;
-
 	cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
 	cmd->tag = atio->u.isp24.exchange_addr;
 	cmd->unpacked_lun = scsilun_to_int(
@@ -2563,11 +3124,12 @@
 	    atio->u.isp24.fcp_cmnd.add_cdb_len]));
 
 	ql_dbg(ql_dbg_tgt, vha, 0xe022,
-	    "qla_target: START qla command: %p lun: 0x%04x (tag %d)\n",
-	    cmd, cmd->unpacked_lun, cmd->tag);
+		"qla_target: START qla cmd: %p se_cmd %p lun: 0x%04x (tag %d) len(%d) ox_id %x\n",
+		cmd, &cmd->se_cmd, cmd->unpacked_lun, cmd->tag, data_length,
+		cmd->atio.u.isp24.fcp_hdr.ox_id);
 
-	ret = vha->hw->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
-	    fcp_task_attr, data_dir, bidi);
+	ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
+				          fcp_task_attr, data_dir, bidi);
 	if (ret != 0)
 		goto out_term;
 	/*
@@ -2586,17 +3148,114 @@
 	 */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
+static void qlt_do_work(struct work_struct *work)
+{
+	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+
+	__qlt_do_work(cmd);
+}
+
+static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
+				       struct qla_tgt_sess *sess,
+				       struct atio_from_isp *atio)
+{
+	struct se_session *se_sess = sess->se_sess;
+	struct qla_tgt_cmd *cmd;
+	int tag;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	if (tag < 0)
+		return NULL;
+
+	cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag];
+	memset(cmd, 0, sizeof(struct qla_tgt_cmd));
+
+	memcpy(&cmd->atio, atio, sizeof(*atio));
+	cmd->state = QLA_TGT_STATE_NEW;
+	cmd->tgt = vha->vha_tgt.qla_tgt;
+	cmd->vha = vha;
+	cmd->se_cmd.map_tag = tag;
+	cmd->sess = sess;
+	cmd->loop_id = sess->loop_id;
+	cmd->conf_compl_supported = sess->conf_compl_supported;
+
+	return cmd;
+}
+
+static void qlt_send_busy(struct scsi_qla_host *, struct atio_from_isp *,
+			  uint16_t);
+
+static void qlt_create_sess_from_atio(struct work_struct *work)
+{
+	struct qla_tgt_sess_op *op = container_of(work,
+					struct qla_tgt_sess_op, work);
+	scsi_qla_host_t *vha = op->vha;
+	struct qla_hw_data *ha = vha->hw;
+	struct qla_tgt_sess *sess;
+	struct qla_tgt_cmd *cmd;
+	unsigned long flags;
+	uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id;
+
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
+		"qla_target(%d): Unable to find wwn login"
+		" (s_id %x:%x:%x), trying to create it manually\n",
+		vha->vp_idx, s_id[0], s_id[1], s_id[2]);
+
+	if (op->atio.u.raw.entry_count > 1) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
+		        "Dropping multy entry atio %p\n", &op->atio);
+		goto out_term;
+	}
+
+	mutex_lock(&vha->vha_tgt.tgt_mutex);
+	sess = qlt_make_local_sess(vha, s_id);
+	/* sess has an extra creation ref. */
+	mutex_unlock(&vha->vha_tgt.tgt_mutex);
+
+	if (!sess)
+		goto out_term;
+	/*
+	 * Now obtain a pre-allocated session tag using the original op->atio
+	 * packet header, and dispatch into __qlt_do_work() using the existing
+	 * process context.
+	 */
+	cmd = qlt_get_tag(vha, sess, &op->atio);
+	if (!cmd) {
+		spin_lock_irqsave(&ha->hardware_lock, flags);
+		qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY);
+		ha->tgt.tgt_ops->put_sess(sess);
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		kfree(op);
+		return;
+	}
+	/*
+	 * __qlt_do_work() will call ha->tgt.tgt_ops->put_sess() to release
+	 * the extra reference taken above by qlt_make_local_sess()
+	 */
+	__qlt_do_work(cmd);
+	kfree(op);
+	return;
+
+out_term:
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+	qlt_send_term_exchange(vha, NULL, &op->atio, 1);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	kfree(op);
+
+}
+
 /* ha->hardware_lock supposed to be held on entry */
 static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio)
 {
+	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	struct qla_tgt_sess *sess;
 	struct qla_tgt_cmd *cmd;
 
 	if (unlikely(tgt->tgt_stop)) {
@@ -2605,18 +3264,31 @@
 		return -EFAULT;
 	}
 
-	cmd = kmem_cache_zalloc(qla_tgt_cmd_cachep, GFP_ATOMIC);
+	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, atio->u.isp24.fcp_hdr.s_id);
+	if (unlikely(!sess)) {
+		struct qla_tgt_sess_op *op = kzalloc(sizeof(struct qla_tgt_sess_op),
+						     GFP_ATOMIC);
+		if (!op)
+			return -ENOMEM;
+
+		memcpy(&op->atio, atio, sizeof(*atio));
+		INIT_WORK(&op->work, qlt_create_sess_from_atio);
+		queue_work(qla_tgt_wq, &op->work);
+		return 0;
+	}
+	/*
+	 * Do kref_get() before returning + dropping qla_hw_data->hardware_lock.
+	 */
+	kref_get(&sess->se_sess->sess_kref);
+
+	cmd = qlt_get_tag(vha, sess, atio);
 	if (!cmd) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05e,
 		    "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx);
+		ha->tgt.tgt_ops->put_sess(sess);
 		return -ENOMEM;
 	}
 
-	memcpy(&cmd->atio, atio, sizeof(*atio));
-	cmd->state = QLA_TGT_STATE_NEW;
-	cmd->tgt = vha->vha_tgt.qla_tgt;
-	cmd->vha = vha;
-
 	INIT_WORK(&cmd->work, qlt_do_work);
 	queue_work(qla_tgt_wq, &cmd->work);
 	return 0;
@@ -3527,11 +4199,11 @@
 	switch (atio->u.raw.entry_type) {
 	case ATIO_TYPE7:
 		ql_dbg(ql_dbg_tgt, vha, 0xe02d,
-		    "ATIO_TYPE7 instance %d, lun %Lx, read/write %d/%d, "
-		    "add_cdb_len %d, data_length %04x, s_id %x:%x:%x\n",
+		    "ATIO_TYPE7 instance %d, lun %Lx, read/write %d/%d, cdb %x, add_cdb_len %x, data_length %04x, s_id %02x%02x%02x\n",
 		    vha->vp_idx, atio->u.isp24.fcp_cmnd.lun,
 		    atio->u.isp24.fcp_cmnd.rddata,
 		    atio->u.isp24.fcp_cmnd.wrdata,
+		    atio->u.isp24.fcp_cmnd.cdb[0],
 		    atio->u.isp24.fcp_cmnd.add_cdb_len,
 		    be32_to_cpu(get_unaligned((uint32_t *)
 			&atio->u.isp24.fcp_cmnd.add_cdb[
@@ -3629,11 +4301,13 @@
 	tgt->irq_cmd_count++;
 
 	switch (pkt->entry_type) {
+	case CTIO_CRC2:
 	case CTIO_TYPE7:
 	{
 		struct ctio7_from_24xx *entry = (struct ctio7_from_24xx *)pkt;
-		ql_dbg(ql_dbg_tgt, vha, 0xe030, "CTIO_TYPE7: instance %d\n",
-		    vha->vp_idx);
+		ql_dbg(ql_dbg_tgt, vha, 0xe030,
+			"CTIO[0x%x] 12/CTIO7 7A/CRC2: instance %d\n",
+			entry->entry_type, vha->vp_idx);
 		qlt_do_ctio_completion(vha, entry->handle,
 		    le16_to_cpu(entry->status)|(pkt->entry_status << 16),
 		    entry);
@@ -4768,6 +5442,7 @@
 	case ABTS_RESP_24XX:
 	case CTIO_TYPE7:
 	case NOTIFY_ACK_TYPE:
+	case CTIO_CRC2:
 		return 1;
 	default:
 		return 0;
@@ -4911,23 +5586,13 @@
 	if (!QLA_TGT_MODE_ENABLED())
 		return 0;
 
-	qla_tgt_cmd_cachep = kmem_cache_create("qla_tgt_cmd_cachep",
-	    sizeof(struct qla_tgt_cmd), __alignof__(struct qla_tgt_cmd), 0,
-	    NULL);
-	if (!qla_tgt_cmd_cachep) {
-		ql_log(ql_log_fatal, NULL, 0xe06c,
-		    "kmem_cache_create for qla_tgt_cmd_cachep failed\n");
-		return -ENOMEM;
-	}
-
 	qla_tgt_mgmt_cmd_cachep = kmem_cache_create("qla_tgt_mgmt_cmd_cachep",
 	    sizeof(struct qla_tgt_mgmt_cmd), __alignof__(struct
 	    qla_tgt_mgmt_cmd), 0, NULL);
 	if (!qla_tgt_mgmt_cmd_cachep) {
 		ql_log(ql_log_fatal, NULL, 0xe06d,
 		    "kmem_cache_create for qla_tgt_mgmt_cmd_cachep failed\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	qla_tgt_mgmt_cmd_mempool = mempool_create(25, mempool_alloc_slab,
@@ -4955,8 +5620,6 @@
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 out_mgmt_cmd_cachep:
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-out:
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 	return ret;
 }
 
@@ -4968,5 +5631,4 @@
 	destroy_workqueue(qla_tgt_wq);
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 }

diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index ce33d8c..e0a58fd 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h

@@ -293,6 +293,7 @@
 #define CTIO_ABORTED			0x02
 #define CTIO_INVALID_RX_ID		0x08
 #define CTIO_TIMEOUT			0x0B
+#define CTIO_DIF_ERROR			0x0C     /* DIF error detected  */
 #define CTIO_LIP_RESET			0x0E
 #define CTIO_TARGET_RESET		0x17
 #define CTIO_PORT_UNAVAILABLE		0x28
@@ -315,7 +316,7 @@
 	uint8_t  seq_id;
 	uint8_t  df_ctl;
 	uint16_t seq_cnt;
-	uint16_t ox_id;
+	__be16   ox_id;
 	uint16_t rx_id;
 	uint32_t parameter;
 } __packed;
@@ -440,7 +441,7 @@
 	union {
 		struct {
 			uint16_t reserved1;
-			uint16_t flags;
+			__le16 flags;
 			uint32_t residual;
 			uint16_t ox_id;
 			uint16_t scsi_status;
@@ -498,11 +499,12 @@
 #define CTIO7_FLAGS_DONT_RET_CTIO	BIT_8
 #define CTIO7_FLAGS_STATUS_MODE_0	0
 #define CTIO7_FLAGS_STATUS_MODE_1	BIT_6
+#define CTIO7_FLAGS_STATUS_MODE_2	BIT_7
 #define CTIO7_FLAGS_EXPLICIT_CONFORM	BIT_5
 #define CTIO7_FLAGS_CONFIRM_SATISF	BIT_4
 #define CTIO7_FLAGS_DSD_PTR		BIT_2
-#define CTIO7_FLAGS_DATA_IN		BIT_1
-#define CTIO7_FLAGS_DATA_OUT		BIT_0
+#define CTIO7_FLAGS_DATA_IN		BIT_1 /* data to initiator */
+#define CTIO7_FLAGS_DATA_OUT		BIT_0 /* data from initiator */
 
 #define ELS_PLOGI			0x3
 #define ELS_FLOGI			0x4
@@ -514,6 +516,68 @@
 #define ELS_ADISC			0x52
 
 /*
+ *CTIO Type CRC_2 IOCB
+ */
+struct ctio_crc2_to_fw {
+	uint8_t entry_type;		/* Entry type. */
+#define CTIO_CRC2 0x7A
+	uint8_t entry_count;		/* Entry count. */
+	uint8_t sys_define;		/* System defined. */
+	uint8_t entry_status;		/* Entry Status. */
+
+	uint32_t handle;		/* System handle. */
+	uint16_t nport_handle;		/* N_PORT handle. */
+	__le16 timeout;		/* Command timeout. */
+
+	uint16_t dseg_count;		/* Data segment count. */
+	uint8_t  vp_index;
+	uint8_t  add_flags;		/* additional flags */
+#define CTIO_CRC2_AF_DIF_DSD_ENA BIT_3
+
+	uint8_t  initiator_id[3];	/* initiator ID */
+	uint8_t  reserved1;
+	uint32_t exchange_addr;		/* rcv exchange address */
+	uint16_t reserved2;
+	__le16 flags;			/* refer to CTIO7 flags values */
+	uint32_t residual;
+	__le16 ox_id;
+	uint16_t scsi_status;
+	__le32 relative_offset;
+	uint32_t reserved5;
+	__le32 transfer_length;		/* total fc transfer length */
+	uint32_t reserved6;
+	__le32 crc_context_address[2];/* Data segment address. */
+	uint16_t crc_context_len;	/* Data segment length. */
+	uint16_t reserved_1;		/* MUST be set to 0. */
+} __packed;
+
+/* CTIO Type CRC_x Status IOCB */
+struct ctio_crc_from_fw {
+	uint8_t entry_type;		/* Entry type. */
+	uint8_t entry_count;		/* Entry count. */
+	uint8_t sys_define;		/* System defined. */
+	uint8_t entry_status;		/* Entry Status. */
+
+	uint32_t handle;		/* System handle. */
+	uint16_t status;
+	uint16_t timeout;		/* Command timeout. */
+	uint16_t dseg_count;		/* Data segment count. */
+	uint32_t reserved1;
+	uint16_t state_flags;
+#define CTIO_CRC_SF_DIF_CHOPPED BIT_4
+
+	uint32_t exchange_address;	/* rcv exchange address */
+	uint16_t reserved2;
+	uint16_t flags;
+	uint32_t resid_xfer_length;
+	uint16_t ox_id;
+	uint8_t  reserved3[12];
+	uint16_t runt_guard;		/* reported runt blk guard */
+	uint8_t  actual_dif[8];
+	uint8_t  expected_dif[8];
+} __packed;
+
+/*
  * ISP queue - ABTS received/response entries structure definition for 24xx.
  */
 #define ABTS_RECV_24XX		0x54 /* ABTS received (for 24xx) */
@@ -641,6 +705,7 @@
 	int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
 			unsigned char *, uint32_t, int, int, int);
 	void (*handle_data)(struct qla_tgt_cmd *);
+	void (*handle_dif_err)(struct qla_tgt_cmd *);
 	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint8_t,
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
@@ -805,6 +870,12 @@
 	struct list_head tgt_list_entry;
 };
 
+struct qla_tgt_sess_op {
+	struct scsi_qla_host *vha;
+	struct atio_from_isp atio;
+	struct work_struct work;
+};
+
 /*
  * Equivilant to IT Nexus (Initiator-Target)
  */
@@ -829,9 +900,9 @@
 };
 
 struct qla_tgt_cmd {
+	struct se_cmd se_cmd;
 	struct qla_tgt_sess *sess;
 	int state;
-	struct se_cmd se_cmd;
 	struct work_struct free_work;
 	struct work_struct work;
 	/* Sense buffer that will be mapped into outgoing status */
@@ -843,6 +914,7 @@
 	unsigned int free_sg:1;
 	unsigned int aborted:1; /* Needed in case of SRR */
 	unsigned int write_data_transferred:1;
+	unsigned int ctx_dsd_alloced:1;
 
 	struct scatterlist *sg;	/* cmd data buffer SG vector */
 	int sg_cnt;		/* SG segments count */
@@ -857,6 +929,12 @@
 	struct scsi_qla_host *vha;
 
 	struct atio_from_isp atio;
+	/* t10dif */
+	struct scatterlist *prot_sg;
+	uint32_t prot_sg_cnt;
+	uint32_t blk_sz;
+	struct crc_context *ctx;
+
 };
 
 struct qla_tgt_sess_work_param {
@@ -901,6 +979,10 @@
 	int sense_buffer_len;
 	int residual;
 	int add_status_pkt;
+	/* dif */
+	struct scatterlist *prot_sg;
+	uint16_t prot_seg_cnt;
+	uint16_t tot_dsds;
 };
 
 struct qla_tgt_srr_imm {
@@ -976,6 +1058,8 @@
 extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *);
 extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
 extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
+extern int qlt_rdy_to_xfer_dif(struct qla_tgt_cmd *);
+extern int qlt_xmit_response_dif(struct qla_tgt_cmd *, int, uint8_t);
 extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_cmd(struct qla_tgt_cmd *cmd);

diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index a804e9b..cb9a0c4 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -201,7 +201,6 @@
 		ql_dbg(ql_dbg_misc, NULL, 0xd014,
 		    "%s: @%x\n", __func__, offset);
 	}
-	qla27xx_insert32(offset, buf, len);
 	qla27xx_read32(window, buf, len);
 }
 
@@ -220,7 +219,7 @@
 
 static inline void
 qla27xx_read_window(__iomem struct device_reg_24xx *reg,
-	uint32_t base, uint offset, uint count, uint width, void *buf,
+	uint32_t addr, uint offset, uint count, uint width, void *buf,
 	ulong *len)
 {
 	void *window = (void *)reg + offset;
@@ -229,14 +228,14 @@
 	if (buf) {
 		ql_dbg(ql_dbg_misc, NULL, 0xd016,
 		    "%s: base=%x offset=%x count=%x width=%x\n",
-		    __func__, base, offset, count, width);
+		    __func__, addr, offset, count, width);
 	}
-	qla27xx_write_reg(reg, IOBASE_ADDR, base, buf);
+	qla27xx_write_reg(reg, IOBASE_ADDR, addr, buf);
 	while (count--) {
-		qla27xx_insert32(base, buf, len);
+		qla27xx_insert32(addr, buf, len);
 		readn(window, buf, len);
 		window += width;
-		base += width;
+		addr++;
 	}
 }
 
@@ -336,7 +335,8 @@
 
 	ql_dbg(ql_dbg_misc, vha, 0xd204,
 	    "%s: rdpci [%lx]\n", __func__, *len);
-	qla27xx_read_reg(reg, ent->t260.pci_addr, buf, len);
+	qla27xx_insert32(ent->t260.pci_offset, buf, len);
+	qla27xx_read_reg(reg, ent->t260.pci_offset, buf, len);
 
 	return false;
 }
@@ -349,7 +349,7 @@
 
 	ql_dbg(ql_dbg_misc, vha, 0xd205,
 	    "%s: wrpci [%lx]\n", __func__, *len);
-	qla27xx_write_reg(reg, ent->t261.pci_addr, ent->t261.write_data, buf);
+	qla27xx_write_reg(reg, ent->t261.pci_offset, ent->t261.write_data, buf);
 
 	return false;
 }
@@ -392,9 +392,9 @@
 		goto done;
 	}
 
-	if (end < start) {
+	if (end < start || end == 0) {
 		ql_dbg(ql_dbg_misc, vha, 0xd023,
-		    "%s: bad range (start=%x end=%x)\n", __func__,
+		    "%s: unusable range (start=%x end=%x)\n", __func__,
 		    ent->t262.end_addr, ent->t262.start_addr);
 		qla27xx_skip_entry(ent, buf);
 		goto done;
@@ -452,17 +452,15 @@
 		ql_dbg(ql_dbg_misc, vha, 0xd025,
 		    "%s: unsupported atio queue\n", __func__);
 		qla27xx_skip_entry(ent, buf);
-		goto done;
 	} else {
 		ql_dbg(ql_dbg_misc, vha, 0xd026,
 		    "%s: unknown queue %u\n", __func__, ent->t263.queue_type);
 		qla27xx_skip_entry(ent, buf);
-		goto done;
 	}
 
 	if (buf)
 		ent->t263.num_queues = count;
-done:
+
 	return false;
 }
 
@@ -503,7 +501,7 @@
 	ql_dbg(ql_dbg_misc, vha, 0xd209,
 	    "%s: pause risc [%lx]\n", __func__, *len);
 	if (buf)
-		qla24xx_pause_risc(reg);
+		qla24xx_pause_risc(reg, vha->hw);
 
 	return false;
 }
@@ -590,7 +588,6 @@
 	struct qla27xx_fwdt_entry *ent, void *buf, ulong *len)
 {
 	struct device_reg_24xx __iomem *reg = qla27xx_isp_reg(vha);
-	void *window = (void *)reg + 0xc4;
 	ulong dwords = ent->t270.count;
 	ulong addr = ent->t270.addr;
 
@@ -599,10 +596,9 @@
 	qla27xx_write_reg(reg, IOBASE_ADDR, 0x40, buf);
 	while (dwords--) {
 		qla27xx_write_reg(reg, 0xc0, addr|0x80000000, buf);
-		qla27xx_read_reg(reg, 0xc4, buf, len);
 		qla27xx_insert32(addr, buf, len);
-		qla27xx_read32(window, buf, len);
-		addr++;
+		qla27xx_read_reg(reg, 0xc4, buf, len);
+		addr += sizeof(uint32_t);
 	}
 
 	return false;
@@ -614,12 +610,12 @@
 {
 	struct device_reg_24xx __iomem *reg = qla27xx_isp_reg(vha);
 	ulong addr = ent->t271.addr;
+	ulong data = ent->t271.data;
 
 	ql_dbg(ql_dbg_misc, vha, 0xd20f,
 	    "%s: wrremreg [%lx]\n", __func__, *len);
 	qla27xx_write_reg(reg, IOBASE_ADDR, 0x40, buf);
-	qla27xx_read_reg(reg, 0xc4, buf, len);
-	qla27xx_insert32(addr, buf, len);
+	qla27xx_write_reg(reg, 0xc4, data, buf);
 	qla27xx_write_reg(reg, 0xc0, addr, buf);
 
 	return false;
@@ -662,13 +658,63 @@
 			    "%s: failed pcicfg read at %lx\n", __func__, addr);
 		qla27xx_insert32(addr, buf, len);
 		qla27xx_insert32(value, buf, len);
-		addr += 4;
+		addr += sizeof(uint32_t);
 	}
 
 	return false;
 }
 
 static int
+qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha,
+	struct qla27xx_fwdt_entry *ent, void *buf, ulong *len)
+{
+	uint count = 0;
+	uint i;
+
+	ql_dbg(ql_dbg_misc, vha, 0xd212,
+	    "%s: getqsh(%x) [%lx]\n", __func__, ent->t274.queue_type, *len);
+	if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) {
+		for (i = 0; i < vha->hw->max_req_queues; i++) {
+			struct req_que *req = vha->hw->req_q_map[i];
+			if (req || !buf) {
+				qla27xx_insert16(i, buf, len);
+				qla27xx_insert16(1, buf, len);
+				qla27xx_insert32(req && req->out_ptr ?
+				    *req->out_ptr : 0, buf, len);
+				count++;
+			}
+		}
+	} else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) {
+		for (i = 0; i < vha->hw->max_rsp_queues; i++) {
+			struct rsp_que *rsp = vha->hw->rsp_q_map[i];
+			if (rsp || !buf) {
+				qla27xx_insert16(i, buf, len);
+				qla27xx_insert16(1, buf, len);
+				qla27xx_insert32(rsp && rsp->in_ptr ?
+				    *rsp->in_ptr : 0, buf, len);
+				count++;
+			}
+		}
+	} else if (ent->t274.queue_type == T274_QUEUE_TYPE_ATIO_SHAD) {
+		ql_dbg(ql_dbg_misc, vha, 0xd02e,
+		    "%s: unsupported atio queue\n", __func__);
+		qla27xx_skip_entry(ent, buf);
+	} else {
+		ql_dbg(ql_dbg_misc, vha, 0xd02f,
+		    "%s: unknown queue %u\n", __func__, ent->t274.queue_type);
+		qla27xx_skip_entry(ent, buf);
+	}
+
+	if (buf)
+		ent->t274.num_queues = count;
+
+	if (!count)
+		qla27xx_skip_entry(ent, buf);
+
+	return false;
+}
+
+static int
 qla27xx_fwdt_entry_other(struct scsi_qla_host *vha,
 	struct qla27xx_fwdt_entry *ent, void *buf, ulong *len)
 {
@@ -709,6 +755,7 @@
 	{ ENTRY_TYPE_WRREMREG		, qla27xx_fwdt_entry_t271  } ,
 	{ ENTRY_TYPE_RDREMRAM		, qla27xx_fwdt_entry_t272  } ,
 	{ ENTRY_TYPE_PCICFG		, qla27xx_fwdt_entry_t273  } ,
+	{ ENTRY_TYPE_GET_SHADOW		, qla27xx_fwdt_entry_t274  } ,
 	{ -1				, qla27xx_fwdt_entry_other }
 };
 

diff --git a/drivers/scsi/qla2xxx/qla_tmpl.h b/drivers/scsi/qla2xxx/qla_tmpl.h
index c9d2fff..1967424 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.h
+++ b/drivers/scsi/qla2xxx/qla_tmpl.h

@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -52,6 +52,7 @@
 #define ENTRY_TYPE_WRREMREG		271
 #define ENTRY_TYPE_RDREMRAM		272
 #define ENTRY_TYPE_PCICFG		273
+#define ENTRY_TYPE_GET_SHADOW		274
 
 #define CAPTURE_FLAG_PHYS_ONLY		BIT_0
 #define CAPTURE_FLAG_PHYS_VIRT		BIT_1
@@ -109,12 +110,12 @@
 		} t259;
 
 		struct __packed {
-			uint8_t pci_addr;
+			uint8_t pci_offset;
 			uint8_t reserved[3];
 		} t260;
 
 		struct __packed {
-			uint8_t pci_addr;
+			uint8_t pci_offset;
 			uint8_t reserved[3];
 			uint32_t write_data;
 		} t261;
@@ -186,6 +187,12 @@
 			uint32_t addr;
 			uint32_t count;
 		} t273;
+
+		struct __packed {
+			uint32_t num_queues;
+			uint8_t  queue_type;
+			uint8_t  reserved[3];
+		} t274;
 	};
 };
 
@@ -202,4 +209,8 @@
 #define T268_BUF_TYPE_EXCH_BUFOFF	2
 #define T268_BUF_TYPE_EXTD_LOGIN	3
 
+#define T274_QUEUE_TYPE_REQ_SHAD	1
+#define T274_QUEUE_TYPE_RSP_SHAD	2
+#define T274_QUEUE_TYPE_ATIO_SHAD	3
+
 #endif

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index e36b947..4d2c98c 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h

@@ -1,13 +1,13 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2013 QLogic Corporation
+ * Copyright (c)  2003-2014 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.07.00.02-k"
+#define QLA2XXX_VERSION      "8.07.00.08-k"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	7

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 68fb66f..e2beab9 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c

@@ -472,6 +472,11 @@
 	cmd->sg_cnt = se_cmd->t_data_nents;
 	cmd->sg = se_cmd->t_data_sg;
 
+	cmd->prot_sg_cnt = se_cmd->t_prot_nents;
+	cmd->prot_sg = se_cmd->t_prot_sg;
+	cmd->blk_sz  = se_cmd->se_dev->dev_attrib.block_size;
+	se_cmd->pi_err = 0;
+
 	/*
 	 * qla_target.c:qlt_rdy_to_xfer() will call pci_map_sg() to setup
 	 * the SGL mappings into PCIe memory for incoming FCP WRITE data.
@@ -567,8 +572,13 @@
 			return;
 		}
 
-		transport_generic_request_failure(&cmd->se_cmd,
-						  TCM_CHECK_CONDITION_ABORT_CMD);
+		if (cmd->se_cmd.pi_err)
+			transport_generic_request_failure(&cmd->se_cmd,
+				cmd->se_cmd.pi_err);
+		else
+			transport_generic_request_failure(&cmd->se_cmd,
+				TCM_CHECK_CONDITION_ABORT_CMD);
+
 		return;
 	}
 
@@ -584,6 +594,27 @@
 	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
 }
 
+static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+{
+	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+
+	/* take an extra kref to prevent cmd free too early.
+	 * need to wait for SCSI status/check condition to
+	 * finish responding generate by transport_generic_request_failure.
+	 */
+	kref_get(&cmd->se_cmd.cmd_kref);
+	transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+}
+
+/*
+ * Called from qla_target.c:qlt_do_ctio_completion()
+ */
+static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+{
+	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
+	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+}
+
 /*
  * Called from qla_target.c:qlt_issue_task_mgmt()
  */
@@ -610,6 +641,11 @@
 	cmd->sg = se_cmd->t_data_sg;
 	cmd->offset = 0;
 
+	cmd->prot_sg_cnt = se_cmd->t_prot_nents;
+	cmd->prot_sg = se_cmd->t_prot_sg;
+	cmd->blk_sz  = se_cmd->se_dev->dev_attrib.block_size;
+	se_cmd->pi_err = 0;
+
 	/*
 	 * Now queue completed DATA_IN the qla2xxx LLD and response ring
 	 */
@@ -1465,6 +1501,8 @@
 	struct qla_tgt_sess *sess = qla_tgt_sess;
 	unsigned char port_name[36];
 	unsigned long flags;
+	int num_tags = (ha->fw_xcb_count) ? ha->fw_xcb_count :
+		       TCM_QLA2XXX_DEFAULT_TAGS;
 
 	lport = vha->vha_tgt.target_lport_ptr;
 	if (!lport) {
@@ -1482,7 +1520,9 @@
 	}
 	se_tpg = &tpg->se_tpg;
 
-	se_sess = transport_init_session(TARGET_PROT_NORMAL);
+	se_sess = transport_init_session_tags(num_tags,
+					      sizeof(struct qla_tgt_cmd),
+					      TARGET_PROT_NORMAL);
 	if (IS_ERR(se_sess)) {
 		pr_err("Unable to initialize struct se_session\n");
 		return PTR_ERR(se_sess);
@@ -1600,6 +1640,7 @@
 static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.handle_cmd		= tcm_qla2xxx_handle_cmd,
 	.handle_data		= tcm_qla2xxx_handle_data,
+	.handle_dif_err		= tcm_qla2xxx_handle_dif_err,
 	.handle_tmr		= tcm_qla2xxx_handle_tmr,
 	.free_cmd		= tcm_qla2xxx_free_cmd,
 	.free_mcmd		= tcm_qla2xxx_free_mcmd,

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 33aaac8..10c0021 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h

@@ -4,6 +4,11 @@
 #define TCM_QLA2XXX_VERSION	"v0.1"
 /* length of ASCII WWPNs including pad */
 #define TCM_QLA2XXX_NAMELEN	32
+/*
+ * Number of pre-allocated per-session tags, based upon the worst-case
+ * per port number of iocbs
+ */
+#define TCM_QLA2XXX_DEFAULT_TAGS 2088
 
 #include "qla_target.h"
 

diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c
index 2eba353..556c152 100644
--- a/drivers/scsi/qla4xxx/ql4_83xx.c
+++ b/drivers/scsi/qla4xxx/ql4_83xx.c

@@ -249,110 +249,6 @@
 	qla4_83xx_flash_unlock(ha);
 }
 
-/**
- * qla4_83xx_ms_mem_write_128b - Writes data to MS/off-chip memory
- * @ha: Pointer to adapter structure
- * @addr: Flash address to write to
- * @data: Data to be written
- * @count: word_count to be written
- *
- * Return: On success return QLA_SUCCESS
- *	   On error return QLA_ERROR
- **/
-int qla4_83xx_ms_mem_write_128b(struct scsi_qla_host *ha, uint64_t addr,
-				uint32_t *data, uint32_t count)
-{
-	int i, j;
-	uint32_t agt_ctrl;
-	unsigned long flags;
-	int ret_val = QLA_SUCCESS;
-
-	/* Only 128-bit aligned access */
-	if (addr & 0xF) {
-		ret_val = QLA_ERROR;
-		goto exit_ms_mem_write;
-	}
-
-	write_lock_irqsave(&ha->hw_lock, flags);
-
-	/* Write address */
-	ret_val = qla4_83xx_wr_reg_indirect(ha, MD_MIU_TEST_AGT_ADDR_HI, 0);
-	if (ret_val == QLA_ERROR) {
-		ql4_printk(KERN_ERR, ha, "%s: write to AGT_ADDR_HI failed\n",
-			   __func__);
-		goto exit_ms_mem_write_unlock;
-	}
-
-	for (i = 0; i < count; i++, addr += 16) {
-		if (!((QLA8XXX_ADDR_IN_RANGE(addr, QLA8XXX_ADDR_QDR_NET,
-					     QLA8XXX_ADDR_QDR_NET_MAX)) ||
-		      (QLA8XXX_ADDR_IN_RANGE(addr, QLA8XXX_ADDR_DDR_NET,
-					     QLA8XXX_ADDR_DDR_NET_MAX)))) {
-			ret_val = QLA_ERROR;
-			goto exit_ms_mem_write_unlock;
-		}
-
-		ret_val = qla4_83xx_wr_reg_indirect(ha, MD_MIU_TEST_AGT_ADDR_LO,
-						    addr);
-		/* Write data */
-		ret_val |= qla4_83xx_wr_reg_indirect(ha,
-						     MD_MIU_TEST_AGT_WRDATA_LO,
-						     *data++);
-		ret_val |= qla4_83xx_wr_reg_indirect(ha,
-						     MD_MIU_TEST_AGT_WRDATA_HI,
-						     *data++);
-		ret_val |= qla4_83xx_wr_reg_indirect(ha,
-						     MD_MIU_TEST_AGT_WRDATA_ULO,
-						     *data++);
-		ret_val |= qla4_83xx_wr_reg_indirect(ha,
-						     MD_MIU_TEST_AGT_WRDATA_UHI,
-						     *data++);
-		if (ret_val == QLA_ERROR) {
-			ql4_printk(KERN_ERR, ha, "%s: write to AGT_WRDATA failed\n",
-				   __func__);
-			goto exit_ms_mem_write_unlock;
-		}
-
-		/* Check write status */
-		ret_val = qla4_83xx_wr_reg_indirect(ha, MD_MIU_TEST_AGT_CTRL,
-						    MIU_TA_CTL_WRITE_ENABLE);
-		ret_val |= qla4_83xx_wr_reg_indirect(ha, MD_MIU_TEST_AGT_CTRL,
-						     MIU_TA_CTL_WRITE_START);
-		if (ret_val == QLA_ERROR) {
-			ql4_printk(KERN_ERR, ha, "%s: write to AGT_CTRL failed\n",
-				   __func__);
-			goto exit_ms_mem_write_unlock;
-		}
-
-		for (j = 0; j < MAX_CTL_CHECK; j++) {
-			ret_val = qla4_83xx_rd_reg_indirect(ha,
-							MD_MIU_TEST_AGT_CTRL,
-							&agt_ctrl);
-			if (ret_val == QLA_ERROR) {
-				ql4_printk(KERN_ERR, ha, "%s: failed to read MD_MIU_TEST_AGT_CTRL\n",
-					   __func__);
-				goto exit_ms_mem_write_unlock;
-			}
-			if ((agt_ctrl & MIU_TA_CTL_BUSY) == 0)
-				break;
-		}
-
-		/* Status check failed */
-		if (j >= MAX_CTL_CHECK) {
-			printk_ratelimited(KERN_ERR "%s: MS memory write failed!\n",
-					   __func__);
-			ret_val = QLA_ERROR;
-			goto exit_ms_mem_write_unlock;
-		}
-	}
-
-exit_ms_mem_write_unlock:
-	write_unlock_irqrestore(&ha->hw_lock, flags);
-
-exit_ms_mem_write:
-	return ret_val;
-}
-
 #define INTENT_TO_RECOVER	0x01
 #define PROCEED_TO_RECOVER	0x02
 
@@ -760,7 +656,7 @@
 			  __func__));
 
 	/* 128 bit/16 byte write to MS memory */
-	ret_val = qla4_83xx_ms_mem_write_128b(ha, dest, (uint32_t *)p_cache,
+	ret_val = qla4_8xxx_ms_mem_write_128b(ha, dest, (uint32_t *)p_cache,
 					      count);
 	if (ret_val == QLA_ERROR) {
 		ql4_printk(KERN_ERR, ha, "%s: Error writing firmware to MS\n",

diff --git a/drivers/scsi/qla4xxx/ql4_83xx.h b/drivers/scsi/qla4xxx/ql4_83xx.h
index a0de6e2..775fdf9 100644
--- a/drivers/scsi/qla4xxx/ql4_83xx.h
+++ b/drivers/scsi/qla4xxx/ql4_83xx.h

@@ -254,6 +254,50 @@
 	uint32_t rsvd_1;
 };
 
+struct qla8044_minidump_entry_rddfe {
+	struct qla8xxx_minidump_entry_hdr h;
+	uint32_t addr_1;
+	uint32_t value;
+	uint8_t stride;
+	uint8_t stride2;
+	uint16_t count;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t modify_mask;
+	uint32_t data_size;
+	uint32_t rsvd;
+
+} __packed;
+
+struct qla8044_minidump_entry_rdmdio {
+	struct qla8xxx_minidump_entry_hdr h;
+
+	uint32_t addr_1;
+	uint32_t addr_2;
+	uint32_t value_1;
+	uint8_t stride_1;
+	uint8_t stride_2;
+	uint16_t count;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t value_2;
+	uint32_t data_size;
+
+} __packed;
+
+struct qla8044_minidump_entry_pollwr {
+	struct qla8xxx_minidump_entry_hdr h;
+	uint32_t addr_1;
+	uint32_t addr_2;
+	uint32_t value_1;
+	uint32_t value_2;
+	uint32_t poll;
+	uint32_t mask;
+	uint32_t data_size;
+	uint32_t rsvd;
+
+} __packed;
+
 /* RDMUX2 Entry */
 struct qla83xx_minidump_entry_rdmux2 {
 	struct qla8xxx_minidump_entry_hdr h;

diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index 73a5022..8f6d0fb 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h

@@ -601,6 +601,7 @@
 #define DPC_HA_NEED_QUIESCENT		22 /* 0x00400000 ISP-82xx only*/
 #define DPC_POST_IDC_ACK		23 /* 0x00800000 */
 #define DPC_RESTORE_ACB			24 /* 0x01000000 */
+#define DPC_SYSFS_DDB_EXPORT		25 /* 0x02000000 */
 
 	struct Scsi_Host *host; /* pointer to host data */
 	uint32_t tot_ddbs;

diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index 209853c..699575e 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h

@@ -1415,6 +1415,9 @@
 #define QLA83XX_DBG_OCM_WNDREG_ARRAY_LEN	16
 #define QLA83XX_SS_OCM_WNDREG_INDEX		3
 #define QLA83XX_SS_PCI_INDEX			0
+#define QLA8022_TEMPLATE_CAP_OFFSET		172
+#define QLA83XX_TEMPLATE_CAP_OFFSET		268
+#define QLA80XX_TEMPLATE_RESERVED_BITS		16
 
 struct qla4_8xxx_minidump_template_hdr {
 	uint32_t entry_type;
@@ -1434,6 +1437,7 @@
 	uint32_t saved_state_array[QLA8XXX_DBG_STATE_ARRAY_LEN];
 	uint32_t capture_size_array[QLA8XXX_DBG_CAP_SIZE_ARRAY_LEN];
 	uint32_t ocm_window_reg[QLA83XX_DBG_OCM_WNDREG_ARRAY_LEN];
+	uint32_t capabilities[QLA80XX_TEMPLATE_RESERVED_BITS];
 };
 
 #endif /*  _QLA4X_FW_H */

diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h
index b1a19cd..5f58b45 100644
--- a/drivers/scsi/qla4xxx/ql4_glbl.h
+++ b/drivers/scsi/qla4xxx/ql4_glbl.h

@@ -274,13 +274,14 @@
 int qla4xxx_get_acb(struct scsi_qla_host *ha, dma_addr_t acb_dma,
 		    uint32_t acb_type, uint32_t len);
 int qla4_84xx_config_acb(struct scsi_qla_host *ha, int acb_config);
-int qla4_83xx_ms_mem_write_128b(struct scsi_qla_host *ha,
+int qla4_8xxx_ms_mem_write_128b(struct scsi_qla_host *ha,
 				uint64_t addr, uint32_t *data, uint32_t count);
 uint8_t qla4xxx_set_ipaddr_state(uint8_t fw_ipaddr_state);
 int qla4_83xx_get_port_config(struct scsi_qla_host *ha, uint32_t *config);
 int qla4_83xx_set_port_config(struct scsi_qla_host *ha, uint32_t *config);
 int qla4_8xxx_check_init_adapter_retry(struct scsi_qla_host *ha);
 int qla4_83xx_is_detached(struct scsi_qla_host *ha);
+int qla4xxx_sysfs_ddb_export(struct scsi_qla_host *ha);
 
 extern int ql4xextended_error_logging;
 extern int ql4xdontresethba;

diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index 28fbece..6f12f85 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c

@@ -282,6 +282,25 @@
 	return ipv4_wait|ipv6_wait;
 }
 
+static int qla4_80xx_is_minidump_dma_capable(struct scsi_qla_host *ha,
+		struct qla4_8xxx_minidump_template_hdr *md_hdr)
+{
+	int offset = (is_qla8022(ha)) ? QLA8022_TEMPLATE_CAP_OFFSET :
+					QLA83XX_TEMPLATE_CAP_OFFSET;
+	int rval = 1;
+	uint32_t *cap_offset;
+
+	cap_offset = (uint32_t *)((char *)md_hdr + offset);
+
+	if (!(le32_to_cpu(*cap_offset) & BIT_0)) {
+		ql4_printk(KERN_INFO, ha, "PEX DMA Not supported %d\n",
+			   *cap_offset);
+		rval = 0;
+	}
+
+	return rval;
+}
+
 /**
  * qla4xxx_alloc_fw_dump - Allocate memory for minidump data.
  * @ha: pointer to host adapter structure.
@@ -294,6 +313,7 @@
 	void *md_tmp;
 	dma_addr_t md_tmp_dma;
 	struct qla4_8xxx_minidump_template_hdr *md_hdr;
+	int dma_capable;
 
 	if (ha->fw_dump) {
 		ql4_printk(KERN_WARNING, ha,
@@ -326,13 +346,19 @@
 
 	md_hdr = (struct qla4_8xxx_minidump_template_hdr *)md_tmp;
 
+	dma_capable = qla4_80xx_is_minidump_dma_capable(ha, md_hdr);
+
 	capture_debug_level = md_hdr->capture_debug_level;
 
 	/* Get capture mask based on module loadtime setting. */
-	if (ql4xmdcapmask >= 0x3 && ql4xmdcapmask <= 0x7F)
+	if ((ql4xmdcapmask >= 0x3 && ql4xmdcapmask <= 0x7F) ||
+	    (ql4xmdcapmask == 0xFF && dma_capable))  {
 		ha->fw_dump_capture_mask = ql4xmdcapmask;
-	else
+	} else {
+		if (ql4xmdcapmask == 0xFF)
+			ql4_printk(KERN_INFO, ha, "Falling back to default capture mask, as PEX DMA is not supported\n");
 		ha->fw_dump_capture_mask = capture_debug_level;
+	}
 
 	md_hdr->driver_capture_mask = ha->fw_dump_capture_mask;
 
@@ -864,6 +890,8 @@
 	if (status == QLA_SUCCESS) {
 		if (test_and_clear_bit(AF_GET_CRASH_RECORD, &ha->flags))
 			qla4xxx_get_crash_record(ha);
+
+		qla4xxx_init_rings(ha);
 	} else {
 		DEBUG(printk("scsi%ld: %s: Firmware has NOT started\n",
 			     ha->host_no, __func__));

diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c
index b1925d1..081b6b7 100644
--- a/drivers/scsi/qla4xxx/ql4_isr.c
+++ b/drivers/scsi/qla4xxx/ql4_isr.c

@@ -1526,7 +1526,7 @@
 
 int qla4xxx_request_irqs(struct scsi_qla_host *ha)
 {
-	int ret;
+	int ret = 0;
 	int rval = QLA_ERROR;
 
 	if (is_qla40XX(ha))
@@ -1580,15 +1580,13 @@
 		}
 	}
 
-	/*
-	 * Prevent interrupts from falling back to INTx mode in cases where
-	 * interrupts cannot get acquired through MSI-X or MSI mode.
-	 */
+try_intx:
 	if (is_qla8022(ha)) {
-		ql4_printk(KERN_WARNING, ha, "IRQ not attached -- %d.\n", ret);
+		ql4_printk(KERN_WARNING, ha, "%s: ISP82xx Legacy interrupt not supported\n",
+			   __func__);
 		goto irq_not_attached;
 	}
-try_intx:
+
 	/* Trying INTx */
 	ret = request_irq(ha->pdev->irq, ha->isp_ops->intr_handler,
 	    IRQF_SHARED, DRIVER_NAME, ha);

diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index 0a6b782..0a3312c 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c

@@ -2381,7 +2381,7 @@
 			ql4_printk(KERN_ERR, ha, "%s: Unable to alloc acb\n",
 				   __func__);
 			rval = QLA_ERROR;
-			goto exit_config_acb;
+			goto exit_free_acb;
 		}
 		memcpy(ha->saved_acb, acb, acb_len);
 		break;
@@ -2395,8 +2395,6 @@
 		}
 
 		memcpy(acb, ha->saved_acb, acb_len);
-		kfree(ha->saved_acb);
-		ha->saved_acb = NULL;
 
 		rval = qla4xxx_set_acb(ha, &mbox_cmd[0], &mbox_sts[0], acb_dma);
 		if (rval != QLA_SUCCESS)
@@ -2412,6 +2410,10 @@
 	dma_free_coherent(&ha->pdev->dev, sizeof(struct addr_ctrl_blk), acb,
 			  acb_dma);
 exit_config_acb:
+	if ((acb_config == ACB_CONFIG_SET) && ha->saved_acb) {
+		kfree(ha->saved_acb);
+		ha->saved_acb = NULL;
+	}
 	DEBUG2(ql4_printk(KERN_INFO, ha,
 			  "%s %s\n", __func__,
 			  rval == QLA_SUCCESS ? "SUCCEEDED" : "FAILED"));

diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 63328c8..9dbdb4b 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c

@@ -14,6 +14,7 @@
 
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
+#define TIMEOUT_100_MS	100
 #define MASK(n)		DMA_BIT_MASK(n)
 #define MN_WIN(addr)	(((addr & 0x1fc0000) >> 1) | ((addr >> 25) & 0x3ff))
 #define OCM_WIN(addr)	(((addr & 0x1ff0000) >> 1) | ((addr >> 25) & 0x3ff))
@@ -1176,6 +1177,112 @@
 	return 0;
 }
 
+/**
+ * qla4_8xxx_ms_mem_write_128b - Writes data to MS/off-chip memory
+ * @ha: Pointer to adapter structure
+ * @addr: Flash address to write to
+ * @data: Data to be written
+ * @count: word_count to be written
+ *
+ * Return: On success return QLA_SUCCESS
+ *         On error return QLA_ERROR
+ **/
+int qla4_8xxx_ms_mem_write_128b(struct scsi_qla_host *ha, uint64_t addr,
+				uint32_t *data, uint32_t count)
+{
+	int i, j;
+	uint32_t agt_ctrl;
+	unsigned long flags;
+	int ret_val = QLA_SUCCESS;
+
+	/* Only 128-bit aligned access */
+	if (addr & 0xF) {
+		ret_val = QLA_ERROR;
+		goto exit_ms_mem_write;
+	}
+
+	write_lock_irqsave(&ha->hw_lock, flags);
+
+	/* Write address */
+	ret_val = ha->isp_ops->wr_reg_indirect(ha, MD_MIU_TEST_AGT_ADDR_HI, 0);
+	if (ret_val == QLA_ERROR) {
+		ql4_printk(KERN_ERR, ha, "%s: write to AGT_ADDR_HI failed\n",
+			   __func__);
+		goto exit_ms_mem_write_unlock;
+	}
+
+	for (i = 0; i < count; i++, addr += 16) {
+		if (!((QLA8XXX_ADDR_IN_RANGE(addr, QLA8XXX_ADDR_QDR_NET,
+					     QLA8XXX_ADDR_QDR_NET_MAX)) ||
+		      (QLA8XXX_ADDR_IN_RANGE(addr, QLA8XXX_ADDR_DDR_NET,
+					     QLA8XXX_ADDR_DDR_NET_MAX)))) {
+			ret_val = QLA_ERROR;
+			goto exit_ms_mem_write_unlock;
+		}
+
+		ret_val = ha->isp_ops->wr_reg_indirect(ha,
+						       MD_MIU_TEST_AGT_ADDR_LO,
+						       addr);
+		/* Write data */
+		ret_val |= ha->isp_ops->wr_reg_indirect(ha,
+						MD_MIU_TEST_AGT_WRDATA_LO,
+						*data++);
+		ret_val |= ha->isp_ops->wr_reg_indirect(ha,
+						MD_MIU_TEST_AGT_WRDATA_HI,
+						*data++);
+		ret_val |= ha->isp_ops->wr_reg_indirect(ha,
+						MD_MIU_TEST_AGT_WRDATA_ULO,
+						*data++);
+		ret_val |= ha->isp_ops->wr_reg_indirect(ha,
+						MD_MIU_TEST_AGT_WRDATA_UHI,
+						*data++);
+		if (ret_val == QLA_ERROR) {
+			ql4_printk(KERN_ERR, ha, "%s: write to AGT_WRDATA failed\n",
+				   __func__);
+			goto exit_ms_mem_write_unlock;
+		}
+
+		/* Check write status */
+		ret_val = ha->isp_ops->wr_reg_indirect(ha, MD_MIU_TEST_AGT_CTRL,
+						       MIU_TA_CTL_WRITE_ENABLE);
+		ret_val |= ha->isp_ops->wr_reg_indirect(ha,
+							MD_MIU_TEST_AGT_CTRL,
+							MIU_TA_CTL_WRITE_START);
+		if (ret_val == QLA_ERROR) {
+			ql4_printk(KERN_ERR, ha, "%s: write to AGT_CTRL failed\n",
+				   __func__);
+			goto exit_ms_mem_write_unlock;
+		}
+
+		for (j = 0; j < MAX_CTL_CHECK; j++) {
+			ret_val = ha->isp_ops->rd_reg_indirect(ha,
+							MD_MIU_TEST_AGT_CTRL,
+							&agt_ctrl);
+			if (ret_val == QLA_ERROR) {
+				ql4_printk(KERN_ERR, ha, "%s: failed to read MD_MIU_TEST_AGT_CTRL\n",
+					   __func__);
+				goto exit_ms_mem_write_unlock;
+			}
+			if ((agt_ctrl & MIU_TA_CTL_BUSY) == 0)
+				break;
+		}
+
+		/* Status check failed */
+		if (j >= MAX_CTL_CHECK) {
+			printk_ratelimited(KERN_ERR "%s: MS memory write failed!\n",
+					   __func__);
+			ret_val = QLA_ERROR;
+			goto exit_ms_mem_write_unlock;
+		}
+	}
+
+exit_ms_mem_write_unlock:
+	write_unlock_irqrestore(&ha->hw_lock, flags);
+
+exit_ms_mem_write:
+	return ret_val;
+}
+
 static int
 qla4_82xx_load_from_flash(struct scsi_qla_host *ha, uint32_t image_start)
 {
@@ -1714,6 +1821,101 @@
 	qla4_82xx_rom_unlock(ha);
 }
 
+static uint32_t ql4_84xx_poll_wait_for_ready(struct scsi_qla_host *ha,
+					     uint32_t addr1, uint32_t mask)
+{
+	unsigned long timeout;
+	uint32_t rval = QLA_SUCCESS;
+	uint32_t temp;
+
+	timeout = jiffies + msecs_to_jiffies(TIMEOUT_100_MS);
+	do {
+		ha->isp_ops->rd_reg_indirect(ha, addr1, &temp);
+		if ((temp & mask) != 0)
+			break;
+
+		if (time_after_eq(jiffies, timeout)) {
+			ql4_printk(KERN_INFO, ha, "Error in processing rdmdio entry\n");
+			return QLA_ERROR;
+		}
+	} while (1);
+
+	return rval;
+}
+
+uint32_t ql4_84xx_ipmdio_rd_reg(struct scsi_qla_host *ha, uint32_t addr1,
+				uint32_t addr3, uint32_t mask, uint32_t addr,
+				uint32_t *data_ptr)
+{
+	int rval = QLA_SUCCESS;
+	uint32_t temp;
+	uint32_t data;
+
+	rval = ql4_84xx_poll_wait_for_ready(ha, addr1, mask);
+	if (rval)
+		goto exit_ipmdio_rd_reg;
+
+	temp = (0x40000000 | addr);
+	ha->isp_ops->wr_reg_indirect(ha, addr1, temp);
+
+	rval = ql4_84xx_poll_wait_for_ready(ha, addr1, mask);
+	if (rval)
+		goto exit_ipmdio_rd_reg;
+
+	ha->isp_ops->rd_reg_indirect(ha, addr3, &data);
+	*data_ptr = data;
+
+exit_ipmdio_rd_reg:
+	return rval;
+}
+
+
+static uint32_t ql4_84xx_poll_wait_ipmdio_bus_idle(struct scsi_qla_host *ha,
+						    uint32_t addr1,
+						    uint32_t addr2,
+						    uint32_t addr3,
+						    uint32_t mask)
+{
+	unsigned long timeout;
+	uint32_t temp;
+	uint32_t rval = QLA_SUCCESS;
+
+	timeout = jiffies + msecs_to_jiffies(TIMEOUT_100_MS);
+	do {
+		ql4_84xx_ipmdio_rd_reg(ha, addr1, addr3, mask, addr2, &temp);
+		if ((temp & 0x1) != 1)
+			break;
+		if (time_after_eq(jiffies, timeout)) {
+			ql4_printk(KERN_INFO, ha, "Error in processing mdiobus idle\n");
+			return QLA_ERROR;
+		}
+	} while (1);
+
+	return rval;
+}
+
+static int ql4_84xx_ipmdio_wr_reg(struct scsi_qla_host *ha,
+				  uint32_t addr1, uint32_t addr3,
+				  uint32_t mask, uint32_t addr,
+				  uint32_t value)
+{
+	int rval = QLA_SUCCESS;
+
+	rval = ql4_84xx_poll_wait_for_ready(ha, addr1, mask);
+	if (rval)
+		goto exit_ipmdio_wr_reg;
+
+	ha->isp_ops->wr_reg_indirect(ha, addr3, value);
+	ha->isp_ops->wr_reg_indirect(ha, addr1, addr);
+
+	rval = ql4_84xx_poll_wait_for_ready(ha, addr1, mask);
+	if (rval)
+		goto exit_ipmdio_wr_reg;
+
+exit_ipmdio_wr_reg:
+	return rval;
+}
+
 static void qla4_8xxx_minidump_process_rdcrb(struct scsi_qla_host *ha,
 				struct qla8xxx_minidump_entry_hdr *entry_hdr,
 				uint32_t **d_ptr)
@@ -1822,7 +2024,7 @@
 	return rval;
 }
 
-static int qla4_83xx_minidump_pex_dma_read(struct scsi_qla_host *ha,
+static int qla4_8xxx_minidump_pex_dma_read(struct scsi_qla_host *ha,
 				struct qla8xxx_minidump_entry_hdr *entry_hdr,
 				uint32_t **d_ptr)
 {
@@ -1899,11 +2101,11 @@
 		dma_desc.cmd.read_data_size = size;
 
 		/* Prepare: Write pex-dma descriptor to MS memory. */
-		rval = qla4_83xx_ms_mem_write_128b(ha,
+		rval = qla4_8xxx_ms_mem_write_128b(ha,
 			      (uint64_t)m_hdr->desc_card_addr,
 			      (uint32_t *)&dma_desc,
 			      (sizeof(struct qla4_83xx_pex_dma_descriptor)/16));
-		if (rval == -1) {
+		if (rval != QLA_SUCCESS) {
 			ql4_printk(KERN_INFO, ha,
 				   "%s: Error writing rdmem-dma-init to MS !!!\n",
 				   __func__);
@@ -2359,17 +2561,10 @@
 	uint32_t *data_ptr = *d_ptr;
 	int rval = QLA_SUCCESS;
 
-	if (is_qla8032(ha) || is_qla8042(ha)) {
-		rval = qla4_83xx_minidump_pex_dma_read(ha, entry_hdr,
-						       &data_ptr);
-		if (rval != QLA_SUCCESS) {
-			rval = __qla4_8xxx_minidump_process_rdmem(ha, entry_hdr,
-								  &data_ptr);
-		}
-	} else {
+	rval = qla4_8xxx_minidump_pex_dma_read(ha, entry_hdr, &data_ptr);
+	if (rval != QLA_SUCCESS)
 		rval = __qla4_8xxx_minidump_process_rdmem(ha, entry_hdr,
 							  &data_ptr);
-	}
 	*d_ptr = data_ptr;
 	return rval;
 }
@@ -2440,6 +2635,227 @@
 	return rval;
 }
 
+static uint32_t qla4_84xx_minidump_process_rddfe(struct scsi_qla_host *ha,
+				struct qla8xxx_minidump_entry_hdr *entry_hdr,
+				uint32_t **d_ptr)
+{
+	int loop_cnt;
+	uint32_t addr1, addr2, value, data, temp, wrval;
+	uint8_t stride, stride2;
+	uint16_t count;
+	uint32_t poll, mask, data_size, modify_mask;
+	uint32_t wait_count = 0;
+	uint32_t *data_ptr = *d_ptr;
+	struct qla8044_minidump_entry_rddfe *rddfe;
+	uint32_t rval = QLA_SUCCESS;
+
+	rddfe = (struct qla8044_minidump_entry_rddfe *)entry_hdr;
+	addr1 = le32_to_cpu(rddfe->addr_1);
+	value = le32_to_cpu(rddfe->value);
+	stride = le32_to_cpu(rddfe->stride);
+	stride2 = le32_to_cpu(rddfe->stride2);
+	count = le32_to_cpu(rddfe->count);
+
+	poll = le32_to_cpu(rddfe->poll);
+	mask = le32_to_cpu(rddfe->mask);
+	modify_mask = le32_to_cpu(rddfe->modify_mask);
+	data_size = le32_to_cpu(rddfe->data_size);
+
+	addr2 = addr1 + stride;
+
+	for (loop_cnt = 0x0; loop_cnt < count; loop_cnt++) {
+		ha->isp_ops->wr_reg_indirect(ha, addr1, (0x40000000 | value));
+
+		wait_count = 0;
+		while (wait_count < poll) {
+			ha->isp_ops->rd_reg_indirect(ha, addr1, &temp);
+			if ((temp & mask) != 0)
+				break;
+			wait_count++;
+		}
+
+		if (wait_count == poll) {
+			ql4_printk(KERN_ERR, ha, "%s: TIMEOUT\n", __func__);
+			rval = QLA_ERROR;
+			goto exit_process_rddfe;
+		} else {
+			ha->isp_ops->rd_reg_indirect(ha, addr2, &temp);
+			temp = temp & modify_mask;
+			temp = (temp | ((loop_cnt << 16) | loop_cnt));
+			wrval = ((temp << 16) | temp);
+
+			ha->isp_ops->wr_reg_indirect(ha, addr2, wrval);
+			ha->isp_ops->wr_reg_indirect(ha, addr1, value);
+
+			wait_count = 0;
+			while (wait_count < poll) {
+				ha->isp_ops->rd_reg_indirect(ha, addr1, &temp);
+				if ((temp & mask) != 0)
+					break;
+				wait_count++;
+			}
+			if (wait_count == poll) {
+				ql4_printk(KERN_ERR, ha, "%s: TIMEOUT\n",
+					   __func__);
+				rval = QLA_ERROR;
+				goto exit_process_rddfe;
+			}
+
+			ha->isp_ops->wr_reg_indirect(ha, addr1,
+						     ((0x40000000 | value) +
+						     stride2));
+			wait_count = 0;
+			while (wait_count < poll) {
+				ha->isp_ops->rd_reg_indirect(ha, addr1, &temp);
+				if ((temp & mask) != 0)
+					break;
+				wait_count++;
+			}
+
+			if (wait_count == poll) {
+				ql4_printk(KERN_ERR, ha, "%s: TIMEOUT\n",
+					   __func__);
+				rval = QLA_ERROR;
+				goto exit_process_rddfe;
+			}
+
+			ha->isp_ops->rd_reg_indirect(ha, addr2, &data);
+
+			*data_ptr++ = cpu_to_le32(wrval);
+			*data_ptr++ = cpu_to_le32(data);
+		}
+	}
+
+	*d_ptr = data_ptr;
+exit_process_rddfe:
+	return rval;
+}
+
+static uint32_t qla4_84xx_minidump_process_rdmdio(struct scsi_qla_host *ha,
+				struct qla8xxx_minidump_entry_hdr *entry_hdr,
+				uint32_t **d_ptr)
+{
+	int rval = QLA_SUCCESS;
+	uint32_t addr1, addr2, value1, value2, data, selval;
+	uint8_t stride1, stride2;
+	uint32_t addr3, addr4, addr5, addr6, addr7;
+	uint16_t count, loop_cnt;
+	uint32_t poll, mask;
+	uint32_t *data_ptr = *d_ptr;
+	struct qla8044_minidump_entry_rdmdio *rdmdio;
+
+	rdmdio = (struct qla8044_minidump_entry_rdmdio *)entry_hdr;
+	addr1 = le32_to_cpu(rdmdio->addr_1);
+	addr2 = le32_to_cpu(rdmdio->addr_2);
+	value1 = le32_to_cpu(rdmdio->value_1);
+	stride1 = le32_to_cpu(rdmdio->stride_1);
+	stride2 = le32_to_cpu(rdmdio->stride_2);
+	count = le32_to_cpu(rdmdio->count);
+
+	poll = le32_to_cpu(rdmdio->poll);
+	mask = le32_to_cpu(rdmdio->mask);
+	value2 = le32_to_cpu(rdmdio->value_2);
+
+	addr3 = addr1 + stride1;
+
+	for (loop_cnt = 0; loop_cnt < count; loop_cnt++) {
+		rval = ql4_84xx_poll_wait_ipmdio_bus_idle(ha, addr1, addr2,
+							 addr3, mask);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		addr4 = addr2 - stride1;
+		rval = ql4_84xx_ipmdio_wr_reg(ha, addr1, addr3, mask, addr4,
+					     value2);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		addr5 = addr2 - (2 * stride1);
+		rval = ql4_84xx_ipmdio_wr_reg(ha, addr1, addr3, mask, addr5,
+					     value1);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		addr6 = addr2 - (3 * stride1);
+		rval = ql4_84xx_ipmdio_wr_reg(ha, addr1, addr3, mask,
+					     addr6, 0x2);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		rval = ql4_84xx_poll_wait_ipmdio_bus_idle(ha, addr1, addr2,
+							 addr3, mask);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		addr7 = addr2 - (4 * stride1);
+		rval = ql4_84xx_ipmdio_rd_reg(ha, addr1, addr3,
+						      mask, addr7, &data);
+		if (rval)
+			goto exit_process_rdmdio;
+
+		selval = (value2 << 18) | (value1 << 2) | 2;
+
+		stride2 = le32_to_cpu(rdmdio->stride_2);
+		*data_ptr++ = cpu_to_le32(selval);
+		*data_ptr++ = cpu_to_le32(data);
+
+		value1 = value1 + stride2;
+		*d_ptr = data_ptr;
+	}
+
+exit_process_rdmdio:
+	return rval;
+}
+
+static uint32_t qla4_84xx_minidump_process_pollwr(struct scsi_qla_host *ha,
+				struct qla8xxx_minidump_entry_hdr *entry_hdr,
+				uint32_t **d_ptr)
+{
+	uint32_t addr1, addr2, value1, value2, poll, mask, r_value;
+	struct qla8044_minidump_entry_pollwr *pollwr_hdr;
+	uint32_t wait_count = 0;
+	uint32_t rval = QLA_SUCCESS;
+
+	pollwr_hdr = (struct qla8044_minidump_entry_pollwr *)entry_hdr;
+	addr1 = le32_to_cpu(pollwr_hdr->addr_1);
+	addr2 = le32_to_cpu(pollwr_hdr->addr_2);
+	value1 = le32_to_cpu(pollwr_hdr->value_1);
+	value2 = le32_to_cpu(pollwr_hdr->value_2);
+
+	poll = le32_to_cpu(pollwr_hdr->poll);
+	mask = le32_to_cpu(pollwr_hdr->mask);
+
+	while (wait_count < poll) {
+		ha->isp_ops->rd_reg_indirect(ha, addr1, &r_value);
+
+		if ((r_value & poll) != 0)
+			break;
+
+		wait_count++;
+	}
+
+	if (wait_count == poll) {
+		ql4_printk(KERN_ERR, ha, "%s: TIMEOUT\n", __func__);
+		rval = QLA_ERROR;
+		goto exit_process_pollwr;
+	}
+
+	ha->isp_ops->wr_reg_indirect(ha, addr2, value2);
+	ha->isp_ops->wr_reg_indirect(ha, addr1, value1);
+
+	wait_count = 0;
+	while (wait_count < poll) {
+		ha->isp_ops->rd_reg_indirect(ha, addr1, &r_value);
+
+		if ((r_value & poll) != 0)
+			break;
+		wait_count++;
+	}
+
+exit_process_pollwr:
+	return rval;
+}
+
 static void qla83xx_minidump_process_rdmux2(struct scsi_qla_host *ha,
 				struct qla8xxx_minidump_entry_hdr *entry_hdr,
 				uint32_t **d_ptr)
@@ -2753,6 +3169,24 @@
 			if (rval != QLA_SUCCESS)
 				qla4_8xxx_mark_entry_skipped(ha, entry_hdr, i);
 			break;
+		case QLA8044_RDDFE:
+			rval = qla4_84xx_minidump_process_rddfe(ha, entry_hdr,
+								&data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla4_8xxx_mark_entry_skipped(ha, entry_hdr, i);
+			break;
+		case QLA8044_RDMDIO:
+			rval = qla4_84xx_minidump_process_rdmdio(ha, entry_hdr,
+								 &data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla4_8xxx_mark_entry_skipped(ha, entry_hdr, i);
+			break;
+		case QLA8044_POLLWR:
+			rval = qla4_84xx_minidump_process_pollwr(ha, entry_hdr,
+								 &data_ptr);
+			if (rval != QLA_SUCCESS)
+				qla4_8xxx_mark_entry_skipped(ha, entry_hdr, i);
+			break;
 		case QLA8XXX_RDNOP:
 		default:
 			qla4_8xxx_mark_entry_skipped(ha, entry_hdr, i);

diff --git a/drivers/scsi/qla4xxx/ql4_nx.h b/drivers/scsi/qla4xxx/ql4_nx.h
index 14500a0..337d9fc 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.h
+++ b/drivers/scsi/qla4xxx/ql4_nx.h

@@ -858,6 +858,9 @@
 #define QLA83XX_POLLRD	35
 #define QLA83XX_RDMUX2	36
 #define QLA83XX_POLLRDMWR  37
+#define QLA8044_RDDFE	38
+#define QLA8044_RDMDIO	39
+#define QLA8044_POLLWR	40
 #define QLA8XXX_RDROM	71
 #define QLA8XXX_RDMEM	72
 #define QLA8XXX_CNTRL	98

diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 459b9f7..3202063 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c

@@ -83,12 +83,12 @@
 		" Target Session Recovery Timeout.\n"
 		"\t\t  Default: 120 sec.");
 
-int ql4xmdcapmask = 0x1F;
+int ql4xmdcapmask = 0;
 module_param(ql4xmdcapmask, int, S_IRUGO);
 MODULE_PARM_DESC(ql4xmdcapmask,
 		 " Set the Minidump driver capture mask level.\n"
-		 "\t\t  Default is 0x1F.\n"
-		 "\t\t  Can be set to 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F");
+		 "\t\t  Default is 0 (firmware default capture mask)\n"
+		 "\t\t  Can be set to 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF");
 
 int ql4xenablemd = 1;
 module_param(ql4xenablemd, int, S_IRUGO | S_IWUSR);
@@ -1742,6 +1742,9 @@
 	struct sockaddr *dst_addr;
 	struct scsi_qla_host *ha;
 
+	if (!qla_ep)
+		return -ENOTCONN;
+
 	ha = to_qla_host(qla_ep->host);
 	DEBUG2(ql4_printk(KERN_INFO, ha, "%s: host: %ld\n", __func__,
 			  ha->host_no));
@@ -1749,9 +1752,6 @@
 	switch (param) {
 	case ISCSI_PARAM_CONN_PORT:
 	case ISCSI_PARAM_CONN_ADDRESS:
-		if (!qla_ep)
-			return -ENOTCONN;
-
 		dst_addr = (struct sockaddr *)&qla_ep->dst_addr;
 		if (!dst_addr)
 			return -ENOTCONN;
@@ -2879,7 +2879,6 @@
 	struct iscsi_conn *conn;
 	struct qla_conn *qla_conn;
 	struct sockaddr *dst_addr;
-	int len = 0;
 
 	conn = cls_conn->dd_data;
 	qla_conn = conn->dd_data;
@@ -2893,9 +2892,6 @@
 	default:
 		return iscsi_conn_get_param(cls_conn, param, buf);
 	}
-
-	return len;
-
 }
 
 int qla4xxx_get_ddb_index(struct scsi_qla_host *ha, uint16_t *ddb_index)
@@ -3569,14 +3565,13 @@
 	if (test_bit(OPT_IPV6_DEVICE, &options)) {
 		conn->ipv6_traffic_class = fw_ddb_entry->ipv4_tos;
 
-		conn->link_local_ipv6_addr = kzalloc(IPv6_ADDR_LEN, GFP_KERNEL);
+		conn->link_local_ipv6_addr = kmemdup(
+					fw_ddb_entry->link_local_ipv6_addr,
+					IPv6_ADDR_LEN, GFP_KERNEL);
 		if (!conn->link_local_ipv6_addr) {
 			rc = -ENOMEM;
 			goto exit_copy;
 		}
-
-		memcpy(conn->link_local_ipv6_addr,
-		       fw_ddb_entry->link_local_ipv6_addr, IPv6_ADDR_LEN);
 	} else {
 		conn->ipv4_tos = fw_ddb_entry->ipv4_tos;
 	}
@@ -4565,6 +4560,7 @@
 	     test_bit(DPC_LINK_CHANGED, &ha->dpc_flags) ||
 	     test_bit(DPC_HA_UNRECOVERABLE, &ha->dpc_flags) ||
 	     test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) ||
+	     test_bit(DPC_SYSFS_DDB_EXPORT, &ha->dpc_flags) ||
 	     test_bit(DPC_AEN, &ha->dpc_flags)) {
 		DEBUG2(printk("scsi%ld: %s: scheduling dpc routine"
 			      " - dpc flags = 0x%lx\n",
@@ -4862,9 +4858,6 @@
 		    ha->host_no, __func__));
 		status = ha->isp_ops->reset_firmware(ha);
 		if (status == QLA_SUCCESS) {
-			if (!test_bit(AF_FW_RECOVERY, &ha->flags))
-				qla4xxx_cmd_wait(ha);
-
 			ha->isp_ops->disable_intrs(ha);
 			qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
 			qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
@@ -5432,6 +5425,11 @@
 				qla4xxx_relogin_all_devices(ha);
 		}
 	}
+	if (test_and_clear_bit(DPC_SYSFS_DDB_EXPORT, &ha->dpc_flags)) {
+		if (qla4xxx_sysfs_ddb_export(ha))
+			ql4_printk(KERN_ERR, ha, "%s: Error exporting ddb to sysfs\n",
+				   __func__);
+	}
 }
 
 /**
@@ -8409,7 +8407,7 @@
  *
  * Export the firmware DDB for all send targets and normal targets to sysfs.
  **/
-static int qla4xxx_sysfs_ddb_export(struct scsi_qla_host *ha)
+int qla4xxx_sysfs_ddb_export(struct scsi_qla_host *ha)
 {
 	struct dev_db_entry *fw_ddb_entry = NULL;
 	dma_addr_t fw_ddb_entry_dma;
@@ -8847,11 +8845,8 @@
 		ql4_printk(KERN_ERR, ha,
 			   "%s: No iSCSI boot target configured\n", __func__);
 
-	if (qla4xxx_sysfs_ddb_export(ha))
-		ql4_printk(KERN_ERR, ha,
-			   "%s: Error exporting ddb to sysfs\n", __func__);
-
-		/* Perform the build ddb list and login to each */
+	set_bit(DPC_SYSFS_DDB_EXPORT, &ha->dpc_flags);
+	/* Perform the build ddb list and login to each */
 	qla4xxx_build_ddb_list(ha, INIT_ADAPTER);
 	iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb);
 	qla4xxx_wait_login_resp_boot_tgt(ha);

diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h
index c6ba0a6..f11eaa7 100644
--- a/drivers/scsi/qla4xxx/ql4_version.h
+++ b/drivers/scsi/qla4xxx/ql4_version.h

@@ -5,4 +5,4 @@
  * See LICENSE.qla4xxx for copyright and licensing details.
  */
 
-#define QLA4XXX_DRIVER_VERSION	"5.04.00-k4"
+#define QLA4XXX_DRIVER_VERSION	"5.04.00-k6"

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index f3e9cc0..1328a26 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c

@@ -130,6 +130,7 @@
 #define SCSI_DEBUG_OPT_DIF_ERR   32
 #define SCSI_DEBUG_OPT_DIX_ERR   64
 #define SCSI_DEBUG_OPT_MAC_TIMEOUT  128
+#define SCSI_DEBUG_OPT_SHORT_TRANSFER	256
 /* When "every_nth" > 0 then modulo "every_nth" commands:
  *   - a no response is simulated if SCSI_DEBUG_OPT_TIMEOUT is set
  *   - a RECOVERED_ERROR is simulated on successful read and write
@@ -3583,6 +3584,7 @@
 	int inj_transport = 0;
 	int inj_dif = 0;
 	int inj_dix = 0;
+	int inj_short = 0;
 	int delay_override = 0;
 	int unmap = 0;
 
@@ -3628,6 +3630,8 @@
 			inj_dif = 1; /* to reads and writes below */
 		else if (SCSI_DEBUG_OPT_DIX_ERR & scsi_debug_opts)
 			inj_dix = 1; /* to reads and writes below */
+		else if (SCSI_DEBUG_OPT_SHORT_TRANSFER & scsi_debug_opts)
+			inj_short = 1;
 	}
 
 	if (devip->wlun) {
@@ -3744,6 +3748,10 @@
 		if (scsi_debug_fake_rw)
 			break;
 		get_data_transfer_info(cmd, &lba, &num, &ei_lba);
+
+		if (inj_short)
+			num /= 2;
+
 		errsts = resp_read(SCpnt, lba, num, devip, ei_lba);
 		if (inj_recovered && (0 == errsts)) {
 			mk_sense_buffer(devip, RECOVERED_ERROR,

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f17aa7a..cbe38e5 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c

@@ -1029,6 +1029,7 @@
 		rtn = NEEDS_RETRY;
 	} else {
 		timeleft = wait_for_completion_timeout(&done, timeout);
+		rtn = SUCCESS;
 	}
 
 	shost->eh_action = NULL;
@@ -1951,6 +1952,8 @@
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
 
+	blk_rq_set_block_pc(req);
+
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
 	req->cmd[2] = 0;
@@ -1960,7 +1963,6 @@
 
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 	req->timeout = 10 * HZ;
 	req->retries = 5;
@@ -2306,6 +2308,12 @@
 	}
 
 	scmd = scsi_get_command(dev, GFP_KERNEL);
+	if (!scmd) {
+		rtn = FAILED;
+		put_device(&dev->sdev_gendev);
+		goto out_put_autopm_host;
+	}
+
 	blk_rq_init(NULL, &req);
 	scmd->request = &req;
 

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a0c95ca..f7e3163 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -195,6 +195,7 @@
 	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
 	if (!req)
 		return ret;
+	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_WAIT))
@@ -206,7 +207,6 @@
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
@@ -512,68 +512,6 @@
 		scsi_run_queue(sdev->request_queue);
 }
 
-static void __scsi_release_buffers(struct scsi_cmnd *, int);
-
-/*
- * Function:    scsi_end_request()
- *
- * Purpose:     Post-processing of completed commands (usually invoked at end
- *		of upper level post-processing and scsi_io_completion).
- *
- * Arguments:   cmd	 - command that is complete.
- *              error    - 0 if I/O indicates success, < 0 for I/O error.
- *              bytes    - number of bytes of completed I/O
- *		requeue  - indicates whether we should requeue leftovers.
- *
- * Lock status: Assumed that lock is not held upon entry.
- *
- * Returns:     cmd if requeue required, NULL otherwise.
- *
- * Notes:       This is called for block device requests in order to
- *              mark some number of sectors as complete.
- * 
- *		We are guaranteeing that the request queue will be goosed
- *		at some point during this call.
- * Notes:	If cmd was requeued, upon return it will be a stale pointer.
- */
-static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
-					  int bytes, int requeue)
-{
-	struct request_queue *q = cmd->device->request_queue;
-	struct request *req = cmd->request;
-
-	/*
-	 * If there are blocks left over at the end, set up the command
-	 * to queue the remainder of them.
-	 */
-	if (blk_end_request(req, error, bytes)) {
-		/* kill remainder if no retrys */
-		if (error && scsi_noretry_cmd(cmd))
-			blk_end_request_all(req, error);
-		else {
-			if (requeue) {
-				/*
-				 * Bleah.  Leftovers again.  Stick the
-				 * leftovers in the front of the
-				 * queue, and goose the queue again.
-				 */
-				scsi_release_buffers(cmd);
-				scsi_requeue_command(q, cmd);
-				cmd = NULL;
-			}
-			return cmd;
-		}
-	}
-
-	/*
-	 * This will goose the queue request function at the end, so we don't
-	 * need to worry about launching another command.
-	 */
-	__scsi_release_buffers(cmd, 0);
-	scsi_next_command(cmd);
-	return NULL;
-}
-
 static inline unsigned int scsi_sgtable_index(unsigned short nents)
 {
 	unsigned int index;
@@ -625,30 +563,10 @@
 	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free);
 }
 
-static void __scsi_release_buffers(struct scsi_cmnd *cmd, int do_bidi_check)
-{
-
-	if (cmd->sdb.table.nents)
-		scsi_free_sgtable(&cmd->sdb);
-
-	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
-
-	if (do_bidi_check && scsi_bidi_cmnd(cmd)) {
-		struct scsi_data_buffer *bidi_sdb =
-			cmd->request->next_rq->special;
-		scsi_free_sgtable(bidi_sdb);
-		kmem_cache_free(scsi_sdb_cache, bidi_sdb);
-		cmd->request->next_rq->special = NULL;
-	}
-
-	if (scsi_prot_sg_count(cmd))
-		scsi_free_sgtable(cmd->prot_sdb);
-}
-
 /*
  * Function:    scsi_release_buffers()
  *
- * Purpose:     Completion processing for block device I/O requests.
+ * Purpose:     Free resources allocate for a scsi_command.
  *
  * Arguments:   cmd	- command that we are bailing.
  *
@@ -659,15 +577,29 @@
  * Notes:       In the event that an upper level driver rejects a
  *		command, we must release resources allocated during
  *		the __init_io() function.  Primarily this would involve
- *		the scatter-gather table, and potentially any bounce
- *		buffers.
+ *		the scatter-gather table.
  */
 void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
-	__scsi_release_buffers(cmd, 1);
+	if (cmd->sdb.table.nents)
+		scsi_free_sgtable(&cmd->sdb);
+
+	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
+
+	if (scsi_prot_sg_count(cmd))
+		scsi_free_sgtable(cmd->prot_sdb);
 }
 EXPORT_SYMBOL(scsi_release_buffers);
 
+static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
+{
+	struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special;
+
+	scsi_free_sgtable(bidi_sdb);
+	kmem_cache_free(scsi_sdb_cache, bidi_sdb);
+	cmd->request->next_rq->special = NULL;
+}
+
 /**
  * __scsi_error_from_host_byte - translate SCSI error code into errno
  * @cmd:	SCSI command (unused)
@@ -725,16 +657,9 @@
  *
  * Returns:     Nothing
  *
- * Notes:       This function is matched in terms of capabilities to
- *              the function that created the scatter-gather list.
- *              In other words, if there are no bounce buffers
- *              (the normal case for most drivers), we don't need
- *              the logic to deal with cleaning up afterwards.
- *
- *		We must call scsi_end_request().  This will finish off
- *		the specified number of sectors.  If we are done, the
- *		command block will be released and the queue function
- *		will be goosed.  If we are not done then we have to
+ * Notes:       We will finish off the specified number of sectors.  If we
+ *		are done, the command block will be released and the queue
+ *		function will be goosed.  If we are not done then we have to
  *		figure out what to do next:
  *
  *		a) We can call scsi_requeue_command().  The request
@@ -743,7 +668,7 @@
  *		   be used if we made forward progress, or if we want
  *		   to switch from READ(10) to READ(6) for example.
  *
- *		b) We can call scsi_queue_insert().  The request will
+ *		b) We can call __scsi_queue_insert().  The request will
  *		   be put back on the queue and retried using the same
  *		   command as before, possibly after a delay.
  *
@@ -801,6 +726,8 @@
 			req->next_rq->resid_len = scsi_in(cmd)->resid;
 
 			scsi_release_buffers(cmd);
+			scsi_release_bidi_buffers(cmd);
+
 			blk_end_request_all(req, 0);
 
 			scsi_next_command(cmd);
@@ -840,12 +767,25 @@
 	}
 
 	/*
-	 * A number of bytes were successfully read.  If there
-	 * are leftovers and there is some kind of error
-	 * (result != 0), retry the rest.
+	 * If we finished all bytes in the request we are done now.
 	 */
-	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
-		return;
+	if (!blk_end_request(req, error, good_bytes))
+		goto next_command;
+
+	/*
+	 * Kill remainder if no retrys.
+	 */
+	if (error && scsi_noretry_cmd(cmd)) {
+		blk_end_request_all(req, error);
+		goto next_command;
+	}
+
+	/*
+	 * If there had been no error, but we have leftover bytes in the
+	 * requeues just queue the command up again.
+	 */
+	if (result == 0)
+		goto requeue;
 
 	error = __scsi_error_from_host_byte(cmd, result);
 
@@ -973,7 +913,6 @@
 	switch (action) {
 	case ACTION_FAIL:
 		/* Give up and fail the remainder of the request */
-		scsi_release_buffers(cmd);
 		if (!(req->cmd_flags & REQ_QUIET)) {
 			if (description)
 				scmd_printk(KERN_INFO, cmd, "%s\n",
@@ -983,12 +922,11 @@
 				scsi_print_sense("", cmd);
 			scsi_print_command(cmd);
 		}
-		if (blk_end_request_err(req, error))
-			scsi_requeue_command(q, cmd);
-		else
-			scsi_next_command(cmd);
-		break;
+		if (!blk_end_request_err(req, error))
+			goto next_command;
+		/*FALLTHRU*/
 	case ACTION_REPREP:
+	requeue:
 		/* Unprep the request and put it back at the head of the queue.
 		 * A new command will be prepared and issued.
 		 */
@@ -1004,6 +942,11 @@
 		__scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0);
 		break;
 	}
+	return;
+
+next_command:
+	scsi_release_buffers(cmd);
+	scsi_next_command(cmd);
 }
 
 static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
@@ -1128,15 +1071,7 @@
 
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 {
-	struct scsi_cmnd *cmd;
-	int ret = scsi_prep_state_check(sdev, req);
-
-	if (ret != BLKPREP_OK)
-		return ret;
-
-	cmd = scsi_get_cmd_from_req(sdev, req);
-	if (unlikely(!cmd))
-		return BLKPREP_DEFER;
+	struct scsi_cmnd *cmd = req->special;
 
 	/*
 	 * BLOCK_PC requests may transfer data, in which case they must
@@ -1179,15 +1114,11 @@
  */
 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 {
-	struct scsi_cmnd *cmd;
-	int ret = scsi_prep_state_check(sdev, req);
-
-	if (ret != BLKPREP_OK)
-		return ret;
+	struct scsi_cmnd *cmd = req->special;
 
 	if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
 			 && sdev->scsi_dh_data->scsi_dh->prep_fn)) {
-		ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
+		int ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
 		if (ret != BLKPREP_OK)
 			return ret;
 	}
@@ -1197,16 +1128,13 @@
 	 */
 	BUG_ON(!req->nr_phys_segments);
 
-	cmd = scsi_get_cmd_from_req(sdev, req);
-	if (unlikely(!cmd))
-		return BLKPREP_DEFER;
-
 	memset(cmd->cmnd, 0, BLK_MAX_CDB);
 	return scsi_init_io(cmd, GFP_ATOMIC);
 }
 EXPORT_SYMBOL(scsi_setup_fs_cmnd);
 
-int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
+static int
+scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 {
 	int ret = BLKPREP_OK;
 
@@ -1258,9 +1186,9 @@
 	}
 	return ret;
 }
-EXPORT_SYMBOL(scsi_prep_state_check);
 
-int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
+static int
+scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 {
 	struct scsi_device *sdev = q->queuedata;
 
@@ -1291,18 +1219,44 @@
 
 	return ret;
 }
-EXPORT_SYMBOL(scsi_prep_return);
 
-int scsi_prep_fn(struct request_queue *q, struct request *req)
+static int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct scsi_device *sdev = q->queuedata;
-	int ret = BLKPREP_KILL;
+	struct scsi_cmnd *cmd;
+	int ret;
 
-	if (req->cmd_type == REQ_TYPE_BLOCK_PC)
+	ret = scsi_prep_state_check(sdev, req);
+	if (ret != BLKPREP_OK)
+		goto out;
+
+	cmd = scsi_get_cmd_from_req(sdev, req);
+	if (unlikely(!cmd)) {
+		ret = BLKPREP_DEFER;
+		goto out;
+	}
+
+	if (req->cmd_type == REQ_TYPE_FS)
+		ret = scsi_cmd_to_driver(cmd)->init_command(cmd);
+	else if (req->cmd_type == REQ_TYPE_BLOCK_PC)
 		ret = scsi_setup_blk_pc_cmnd(sdev, req);
+	else
+		ret = BLKPREP_KILL;
+
+out:
 	return scsi_prep_return(q, req, ret);
 }
-EXPORT_SYMBOL(scsi_prep_fn);
+
+static void scsi_unprep_fn(struct request_queue *q, struct request *req)
+{
+	if (req->cmd_type == REQ_TYPE_FS) {
+		struct scsi_cmnd *cmd = req->special;
+		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
+
+		if (drv->uninit_command)
+			drv->uninit_command(cmd);
+	}
+}
 
 /*
  * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
@@ -1723,6 +1677,7 @@
 		return NULL;
 
 	blk_queue_prep_rq(q, scsi_prep_fn);
+	blk_queue_unprep_rq(q, scsi_unprep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
 	blk_queue_rq_timed_out(q, scsi_times_out);
 	blk_queue_lld_busy(q, scsi_lld_busy);

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 96af195..e9689d5 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -109,6 +109,8 @@
 static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
 static void sd_rescan(struct device *);
+static int sd_init_command(struct scsi_cmnd *SCpnt);
+static void sd_uninit_command(struct scsi_cmnd *SCpnt);
 static int sd_done(struct scsi_cmnd *);
 static int sd_eh_action(struct scsi_cmnd *, int);
 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
@@ -503,6 +505,8 @@
 		.pm		= &sd_pm_ops,
 	},
 	.rescan			= sd_rescan,
+	.init_command		= sd_init_command,
+	.uninit_command		= sd_uninit_command,
 	.done			= sd_done,
 	.eh_action		= sd_eh_action,
 };
@@ -836,9 +840,9 @@
 	return scsi_setup_blk_pc_cmnd(sdp, rq);
 }
 
-static void sd_unprep_fn(struct request_queue *q, struct request *rq)
+static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
-	struct scsi_cmnd *SCpnt = rq->special;
+	struct request *rq = SCpnt->request;
 
 	if (rq->cmd_flags & REQ_DISCARD)
 		__free_page(rq->completion_data);
@@ -850,18 +854,10 @@
 	}
 }
 
-/**
- *	sd_prep_fn - build a scsi (read or write) command from
- *	information in the request structure.
- *	@SCpnt: pointer to mid-level's per scsi command structure that
- *	contains request and into which the scsi command is written
- *
- *	Returns 1 if successful and 0 if error (or cannot be done now).
- **/
-static int sd_prep_fn(struct request_queue *q, struct request *rq)
+static int sd_init_command(struct scsi_cmnd *SCpnt)
 {
-	struct scsi_cmnd *SCpnt;
-	struct scsi_device *sdp = q->queuedata;
+	struct request *rq = SCpnt->request;
+	struct scsi_device *sdp = SCpnt->device;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp;
 	sector_t block = blk_rq_pos(rq);
@@ -883,12 +879,6 @@
 	} else if (rq->cmd_flags & REQ_FLUSH) {
 		ret = scsi_setup_flush_cmnd(sdp, rq);
 		goto out;
-	} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
-		goto out;
-	} else if (rq->cmd_type != REQ_TYPE_FS) {
-		ret = BLKPREP_KILL;
-		goto out;
 	}
 	ret = scsi_setup_fs_cmnd(sdp, rq);
 	if (ret != BLKPREP_OK)
@@ -900,11 +890,10 @@
 	 * is used for a killable error condition */
 	ret = BLKPREP_KILL;
 
-	SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt,
-					"sd_prep_fn: block=%llu, "
-					"count=%d\n",
-					(unsigned long long)block,
-					this_count));
+	SCSI_LOG_HLQUEUE(1,
+		scmd_printk(KERN_INFO, SCpnt,
+			"%s: block=%llu, count=%d\n",
+			__func__, (unsigned long long)block, this_count));
 
 	if (!sdp || !scsi_device_online(sdp) ||
 	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
@@ -1124,7 +1113,7 @@
 	 */
 	ret = BLKPREP_OK;
  out:
-	return scsi_prep_return(q, rq, ret);
+	return ret;
 }
 
 /**
@@ -1686,12 +1675,12 @@
 						   sshdr.ascq));
 	}
 #endif
+	sdkp->medium_access_timed_out = 0;
+
 	if (driver_byte(result) != DRIVER_SENSE &&
 	    (!sense_valid || sense_deferred))
 		goto out;
 
-	sdkp->medium_access_timed_out = 0;
-
 	switch (sshdr.sense_key) {
 	case HARDWARE_ERROR:
 	case MEDIUM_ERROR:
@@ -2875,9 +2864,6 @@
 
 	sd_revalidate_disk(gd);
 
-	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
-	blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
-
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT;
 	if (sdp->removable) {
@@ -3025,8 +3011,6 @@
 
 	async_synchronize_full_domain(&scsi_sd_pm_domain);
 	async_synchronize_full_domain(&scsi_sd_probe_domain);
-	blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
-	blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index df5e961..53268aa 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c

@@ -1653,10 +1653,9 @@
 	if (!rq)
 		return -ENOMEM;
 
+	blk_rq_set_block_pc(rq);
 	memcpy(rq->cmd, cmd, hp->cmd_len);
-
 	rq->cmd_len = hp->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	srp->rq = rq;
 	rq->end_io_data = srp;

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 40d8592..93cbd36 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c

@@ -79,6 +79,7 @@
 static DEFINE_MUTEX(sr_mutex);
 static int sr_probe(struct device *);
 static int sr_remove(struct device *);
+static int sr_init_command(struct scsi_cmnd *SCpnt);
 static int sr_done(struct scsi_cmnd *);
 static int sr_runtime_suspend(struct device *dev);
 
@@ -94,6 +95,7 @@
 		.remove		= sr_remove,
 		.pm		= &sr_pm_ops,
 	},
+	.init_command		= sr_init_command,
 	.done			= sr_done,
 };
 
@@ -378,21 +380,14 @@
 	return good_bytes;
 }
 
-static int sr_prep_fn(struct request_queue *q, struct request *rq)
+static int sr_init_command(struct scsi_cmnd *SCpnt)
 {
 	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
-	struct scsi_cmnd *SCpnt;
-	struct scsi_device *sdp = q->queuedata;
+	struct request *rq = SCpnt->request;
+	struct scsi_device *sdp = SCpnt->device;
 	int ret;
 
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
-		goto out;
-	} else if (rq->cmd_type != REQ_TYPE_FS) {
-		ret = BLKPREP_KILL;
-		goto out;
-	}
 	ret = scsi_setup_fs_cmnd(sdp, rq);
 	if (ret != BLKPREP_OK)
 		goto out;
@@ -517,7 +512,7 @@
 	 */
 	ret = BLKPREP_OK;
  out:
-	return scsi_prep_return(q, rq, ret);
+	return ret;
 }
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
@@ -718,7 +713,6 @@
 
 	/* FIXME: need to handle a get_capabilities failure properly ?? */
 	get_capabilities(cd);
-	blk_queue_prep_rq(sdev->request_queue, sr_prep_fn);
 	sr_vendor_init(cd);
 
 	disk->driverfs_dev = &sdev->sdev_gendev;
@@ -993,7 +987,6 @@
 
 	scsi_autopm_get_device(cd->device);
 
-	blk_queue_prep_rq(cd->device->request_queue, scsi_prep_fn);
 	del_gendisk(cd->disk);
 
 	mutex_lock(&sr_ref_mutex);

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index afc834e..14eb4b2 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c

@@ -484,7 +484,7 @@
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	mdata->null_mapped = 1;

diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 636bbe0..8822079 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c

@@ -364,7 +364,7 @@
 	return( 0 );
     if (TagAlloc[cmd->device->id][cmd->device->lun].nr_allocated >=
 	TagAlloc[cmd->device->id][cmd->device->lun].queue_size ) {
-	TAG_PRINTK( "scsi%d: target %d lun %d: no free tags\n",
+	dprintk(NDEBUG_TAGS,  "scsi%d: target %d lun %d: no free tags\n",
 		    H_NO(cmd), cmd->device->id, cmd->device->lun );
 	return( 1 );
     }
@@ -388,7 +388,7 @@
 	!setup_use_tagged_queuing || !cmd->device->tagged_supported) {
 	cmd->tag = TAG_NONE;
 	hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
-	TAG_PRINTK( "scsi%d: target %d lun %d now allocated by untagged "
+	dprintk(NDEBUG_TAGS,  "scsi%d: target %d lun %d now allocated by untagged "
 		    "command\n", H_NO(cmd), cmd->device->id, cmd->device->lun );
     }
     else {
@@ -397,7 +397,7 @@
 	cmd->tag = find_first_zero_bit( &ta->allocated, MAX_TAGS );
 	set_bit( cmd->tag, &ta->allocated );
 	ta->nr_allocated++;
-	TAG_PRINTK( "scsi%d: using tag %d for target %d lun %d "
+	dprintk(NDEBUG_TAGS,  "scsi%d: using tag %d for target %d lun %d "
 		    "(now %d tags in use)\n",
 		    H_NO(cmd), cmd->tag, cmd->device->id, cmd->device->lun,
 		    ta->nr_allocated );
@@ -415,7 +415,7 @@
 
     if (cmd->tag == TAG_NONE) {
 	hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-	TAG_PRINTK( "scsi%d: target %d lun %d untagged cmd finished\n",
+	dprintk(NDEBUG_TAGS,  "scsi%d: target %d lun %d untagged cmd finished\n",
 		    H_NO(cmd), cmd->device->id, cmd->device->lun );
     }
     else if (cmd->tag >= MAX_TAGS) {
@@ -426,7 +426,7 @@
 	TAG_ALLOC *ta = &TagAlloc[cmd->device->id][cmd->device->lun];
 	clear_bit( cmd->tag, &ta->allocated );
 	ta->nr_allocated--;
-	TAG_PRINTK( "scsi%d: freed tag %d for target %d lun %d\n",
+	dprintk(NDEBUG_TAGS,  "scsi%d: freed tag %d for target %d lun %d\n",
 		    H_NO(cmd), cmd->tag, cmd->device->id, cmd->device->lun );
     }
 }
@@ -484,7 +484,7 @@
 
 #include <linux/delay.h>
 
-#if 1
+#if NDEBUG
 static struct {
     unsigned char mask;
     const char * name;} 
@@ -572,12 +572,6 @@
     }
 }
 
-#else /* !NDEBUG */
-
-/* dummies... */
-__inline__ void NCR5380_print(struct Scsi_Host *instance) { };
-__inline__ void NCR5380_print_phase(struct Scsi_Host *instance) { };
-
 #endif
 
 /*
@@ -618,7 +612,7 @@
 {
     static int done = 0;
     if (!done) {
-	INI_PRINTK("scsi : NCR5380_all_init()\n");
+	dprintk(NDEBUG_INIT, "scsi : NCR5380_all_init()\n");
 	done = 1;
     }
 }
@@ -681,8 +675,8 @@
 	Scsi_Cmnd *ptr;
 	unsigned long flags;
 
-	NCR_PRINT(NDEBUG_ANY);
-	NCR_PRINT_PHASE(NDEBUG_ANY);
+	NCR5380_dprint(NDEBUG_ANY, instance);
+	NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
 	hostdata = (struct NCR5380_hostdata *)instance->hostdata;
 
@@ -928,7 +922,7 @@
 
     local_irq_restore(flags);
 
-    QU_PRINTK("scsi%d: command added to %s of queue\n", H_NO(cmd),
+    dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
 	      (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
     /* If queue_command() is called from an interrupt (real one or bottom
@@ -998,7 +992,7 @@
 	done = 1;
 	
 	if (!hostdata->connected) {
-	    MAIN_PRINTK( "scsi%d: not connected\n", HOSTNO );
+	    dprintk(NDEBUG_MAIN,  "scsi%d: not connected\n", HOSTNO );
 	    /*
 	     * Search through the issue_queue for a command destined
 	     * for a target that's not busy.
@@ -1012,12 +1006,8 @@
 	    for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
 		 prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp) ) {
 
-#if (NDEBUG & NDEBUG_LISTS)
 		if (prev != tmp)
-		    printk("MAIN tmp=%p   target=%d   busy=%d lun=%d\n",
-			   tmp, tmp->target, hostdata->busy[tmp->target],
-			   tmp->lun);
-#endif
+			dprintk(NDEBUG_LISTS, "MAIN tmp=%p   target=%d   busy=%d lun=%d\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
 		/*  When we find one, remove it from the issue queue. */
 		/* ++guenther: possible race with Falcon locking */
 		if (
@@ -1047,9 +1037,9 @@
 		     * On failure, we must add the command back to the
 		     *   issue queue so we can keep trying.	
 		     */
-		    MAIN_PRINTK("scsi%d: main(): command for target %d "
+		    dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
 				"lun %d removed from issue_queue\n",
-				HOSTNO, tmp->target, tmp->lun);
+				HOSTNO, tmp->device->id, tmp->device->lun);
 		    /* 
 		     * REQUEST SENSE commands are issued without tagged
 		     * queueing, even on SCSI-II devices because the 
@@ -1076,7 +1066,7 @@
 			cmd_free_tag( tmp );
 #endif
 			local_irq_restore(flags);
-			MAIN_PRINTK("scsi%d: main(): select() failed, "
+			dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
 				    "returned to issue_queue\n", HOSTNO);
 			if (hostdata->connected)
 			    break;
@@ -1090,10 +1080,10 @@
 #endif
 	    ) {
 	    local_irq_restore(flags);
-	    MAIN_PRINTK("scsi%d: main: performing information transfer\n",
+	    dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
 			HOSTNO);
 	    NCR5380_information_transfer(instance);
-	    MAIN_PRINTK("scsi%d: main: done set false\n", HOSTNO);
+	    dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
 	    done = 0;
 	}
     } while (!done);
@@ -1130,7 +1120,7 @@
 	return;
     }
 
-    DMA_PRINTK("scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
+    dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
 	       HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
 	       NCR5380_read(STATUS_REG));
 
@@ -1189,27 +1179,27 @@
     int done = 1, handled = 0;
     unsigned char basr;
 
-    INT_PRINTK("scsi%d: NCR5380 irq triggered\n", HOSTNO);
+    dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
 
     /* Look for pending interrupts */
     basr = NCR5380_read(BUS_AND_STATUS_REG);
-    INT_PRINTK("scsi%d: BASR=%02x\n", HOSTNO, basr);
+    dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
     /* dispatch to appropriate routine if found and done=0 */
     if (basr & BASR_IRQ) {
-	NCR_PRINT(NDEBUG_INTR);
+	NCR5380_dprint(NDEBUG_INTR, instance);
 	if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
 	    done = 0;
 //	    ENABLE_IRQ();
-	    INT_PRINTK("scsi%d: SEL interrupt\n", HOSTNO);
+	    dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
 	    NCR5380_reselect(instance);
 	    (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 	}
 	else if (basr & BASR_PARITY_ERROR) {
-	    INT_PRINTK("scsi%d: PARITY interrupt\n", HOSTNO);
+	    dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
 	    (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 	}
 	else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-	    INT_PRINTK("scsi%d: RESET interrupt\n", HOSTNO);
+	    dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
 	    (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 	}
 	else {
@@ -1229,7 +1219,7 @@
 		((basr & BASR_END_DMA_TRANSFER) || 
 		 !(basr & BASR_PHASE_MATCH))) {
 		    
-		INT_PRINTK("scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
+		dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
 		NCR5380_dma_complete( instance );
 		done = 0;
 //		ENABLE_IRQ();
@@ -1238,7 +1228,7 @@
 	    {
 /* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
 		if (basr & BASR_PHASE_MATCH)
-		   INT_PRINTK("scsi%d: unknown interrupt, "
+		   dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
 			   "BASR 0x%x, MR 0x%x, SR 0x%x\n",
 			   HOSTNO, basr, NCR5380_read(MODE_REG),
 			   NCR5380_read(STATUS_REG));
@@ -1262,7 +1252,7 @@
     }
     
     if (!done) {
-	INT_PRINTK("scsi%d: in int routine, calling main\n", HOSTNO);
+	dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
 	/* Put a call to NCR5380_main() on the queue... */
 	queue_main();
     }
@@ -1338,8 +1328,8 @@
     unsigned long flags;
 
     hostdata->restart_select = 0;
-    NCR_PRINT(NDEBUG_ARBITRATION);
-    ARB_PRINTK("scsi%d: starting arbitration, id = %d\n", HOSTNO,
+    NCR5380_dprint(NDEBUG_ARBITRATION, instance);
+    dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
 	       instance->this_id);
 
     /* 
@@ -1385,7 +1375,7 @@
 	 && !hostdata->connected);
 #endif
 
-    ARB_PRINTK("scsi%d: arbitration complete\n", HOSTNO);
+    dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
 
     if (hostdata->connected) {
 	NCR5380_write(MODE_REG, MR_BASE); 
@@ -1406,7 +1396,7 @@
 	(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
 	hostdata->connected) {
 	NCR5380_write(MODE_REG, MR_BASE); 
-	ARB_PRINTK("scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
+	dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
 		   HOSTNO);
 	return -1;
     }
@@ -1421,7 +1411,7 @@
 	hostdata->connected) {
 	NCR5380_write(MODE_REG, MR_BASE);
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-	ARB_PRINTK("scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
+	dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
 		   HOSTNO);
 	return -1;
     }
@@ -1444,7 +1434,7 @@
 	return -1;
     }
 
-    ARB_PRINTK("scsi%d: won arbitration\n", HOSTNO);
+    dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
 
     /* 
      * Now that we have won arbitration, start Selection process, asserting 
@@ -1504,7 +1494,7 @@
 
     udelay(1);
 
-    SEL_PRINTK("scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+    dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
 
     /* 
      * The SCSI specification calls for a 250 ms timeout for the actual 
@@ -1559,7 +1549,7 @@
 	    printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
 	    if (hostdata->restart_select)
 		printk(KERN_NOTICE "\trestart select\n");
-	    NCR_PRINT(NDEBUG_ANY);
+	    NCR5380_dprint(NDEBUG_ANY, instance);
 	    NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 	    return -1;
 	}
@@ -1572,7 +1562,7 @@
 #endif
 	cmd->scsi_done(cmd);
 	NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-	SEL_PRINTK("scsi%d: target did not respond within 250ms\n", HOSTNO);
+	dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
 	NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 	return 0;
     } 
@@ -1597,7 +1587,7 @@
     /* Wait for start of REQ/ACK handshake */
     while (!(NCR5380_read(STATUS_REG) & SR_REQ));
 
-    SEL_PRINTK("scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
+    dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
 	       HOSTNO, cmd->device->id);
     tmp[0] = IDENTIFY(1, cmd->device->lun);
 
@@ -1617,7 +1607,7 @@
     data = tmp;
     phase = PHASE_MSGOUT;
     NCR5380_transfer_pio(instance, &phase, &len, &data);
-    SEL_PRINTK("scsi%d: nexus established.\n", HOSTNO);
+    dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
     /* XXX need to handle errors here */
     hostdata->connected = cmd;
 #ifndef SUPPORT_TAGS
@@ -1680,12 +1670,12 @@
 	 */
 	while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ));
 
-	HSH_PRINTK("scsi%d: REQ detected\n", HOSTNO);
+	dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
 
 	/* Check for phase mismatch */	
 	if ((tmp & PHASE_MASK) != p) {
-	    PIO_PRINTK("scsi%d: phase mismatch\n", HOSTNO);
-	    NCR_PRINT_PHASE(NDEBUG_PIO);
+	    dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+	    NCR5380_dprint_phase(NDEBUG_PIO, instance);
 	    break;
 	}
 
@@ -1708,24 +1698,24 @@
 	    if (!((p & SR_MSG) && c > 1)) {
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | 
 		    ICR_ASSERT_DATA);
-		NCR_PRINT(NDEBUG_PIO);
+		NCR5380_dprint(NDEBUG_PIO, instance);
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | 
 			ICR_ASSERT_DATA | ICR_ASSERT_ACK);
 	    } else {
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
 		    ICR_ASSERT_DATA | ICR_ASSERT_ATN);
-		NCR_PRINT(NDEBUG_PIO);
+		NCR5380_dprint(NDEBUG_PIO, instance);
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | 
 		    ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
 	    }
 	} else {
-	    NCR_PRINT(NDEBUG_PIO);
+	    NCR5380_dprint(NDEBUG_PIO, instance);
 	    NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
 	}
 
 	while (NCR5380_read(STATUS_REG) & SR_REQ);
 
-	HSH_PRINTK("scsi%d: req false, handshake complete\n", HOSTNO);
+	dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
 
 /*
  * We have several special cases to consider during REQ/ACK handshaking : 
@@ -1746,7 +1736,7 @@
 	} 
     } while (--c);
 
-    PIO_PRINTK("scsi%d: residual %d\n", HOSTNO, c);
+    dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
 
     *count = c;
     *data = d;
@@ -1854,7 +1844,7 @@
     }
     hostdata->dma_len = c;
 
-    DMA_PRINTK("scsi%d: initializing DMA for %s, %d bytes %s %p\n",
+    dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
 	       HOSTNO, (p & SR_IO) ? "reading" : "writing",
 	       c, (p & SR_IO) ? "to" : "from", *data);
 
@@ -1931,7 +1921,7 @@
 	    phase = (tmp & PHASE_MASK); 
  	    if (phase != old_phase) {
 		old_phase = phase;
-		NCR_PRINT_PHASE(NDEBUG_INFORMATION);
+		NCR5380_dprint_phase(NDEBUG_INFORMATION, instance);
 	    }
 
 	    if(phase == PHASE_CMDOUT) {
@@ -1996,7 +1986,7 @@
 		    --cmd->SCp.buffers_residual;
 		    cmd->SCp.this_residual = cmd->SCp.buffer->length;
 		    cmd->SCp.ptr = SGADDR(cmd->SCp.buffer);
-		    INF_PRINTK("scsi%d: %d bytes and %d buffers left\n",
+		    dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
 			       HOSTNO, cmd->SCp.this_residual,
 			       cmd->SCp.buffers_residual);
 		}
@@ -2088,7 +2078,7 @@
 		    /* Accept message by clearing ACK */
 		    NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		    
-		    LNK_PRINTK("scsi%d: target %d lun %d linked command "
+		    dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %d linked command "
 			       "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
 
 		    /* Enable reselect interrupts */
@@ -2113,7 +2103,7 @@
 		     * and don't free it! */
 		    cmd->next_link->tag = cmd->tag;
 		    cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8); 
-		    LNK_PRINTK("scsi%d: target %d lun %d linked request "
+		    dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %d linked request "
 			       "done, calling scsi_done().\n",
 			       HOSTNO, cmd->device->id, cmd->device->lun);
 #ifdef NCR5380_STATS
@@ -2128,7 +2118,7 @@
 		    /* Accept message by clearing ACK */
 		    NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		    hostdata->connected = NULL;
-		    QU_PRINTK("scsi%d: command for target %d, lun %d "
+		    dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %d "
 			      "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
 #ifdef SUPPORT_TAGS
 		    cmd_free_tag( cmd );
@@ -2142,7 +2132,7 @@
 			/* ++Andreas: the mid level code knows about
 			   QUEUE_FULL now. */
 			TAG_ALLOC *ta = &TagAlloc[cmd->device->id][cmd->device->lun];
-			TAG_PRINTK("scsi%d: target %d lun %d returned "
+			dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d returned "
 				   "QUEUE_FULL after %d commands\n",
 				   HOSTNO, cmd->device->id, cmd->device->lun,
 				   ta->nr_allocated);
@@ -2186,7 +2176,7 @@
 		    if ((cmd->cmnd[0] != REQUEST_SENSE) && 
 			(status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
 			scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-			ASEN_PRINTK("scsi%d: performing request sense\n",
+			dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n",
 				    HOSTNO);
 			/* this is initialized from initialize_SCp 
 			cmd->SCp.buffer = NULL;
@@ -2198,7 +2188,7 @@
 			SET_NEXT(cmd, hostdata->issue_queue);
 		        hostdata->issue_queue = (struct scsi_cmnd *) cmd;
 		        local_irq_restore(flags);
-			QU_PRINTK("scsi%d: REQUEST SENSE added to head of "
+			dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
 				  "issue queue\n", H_NO(cmd));
 		   } else
 #endif /* def AUTOSENSE */
@@ -2238,7 +2228,7 @@
 			cmd->device->tagged_supported = 0;
 			hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
 			cmd->tag = TAG_NONE;
-			TAG_PRINTK("scsi%d: target %d lun %d rejected "
+			dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d rejected "
 				   "QUEUE_TAG message; tagged queuing "
 				   "disabled\n",
 				   HOSTNO, cmd->device->id, cmd->device->lun);
@@ -2255,7 +2245,7 @@
 		    hostdata->connected = NULL;
 		    hostdata->disconnected_queue = cmd;
 		    local_irq_restore(flags);
-		    QU_PRINTK("scsi%d: command for target %d lun %d was "
+		    dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %d was "
 			      "moved from connected to the "
 			      "disconnected_queue\n", HOSTNO, 
 			      cmd->device->id, cmd->device->lun);
@@ -2308,13 +2298,13 @@
 		    /* Accept first byte by clearing ACK */
 		    NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-		    EXT_PRINTK("scsi%d: receiving extended message\n", HOSTNO);
+		    dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
 
 		    len = 2;
 		    data = extended_msg + 1;
 		    phase = PHASE_MSGIN;
 		    NCR5380_transfer_pio(instance, &phase, &len, &data);
-		    EXT_PRINTK("scsi%d: length=%d, code=0x%02x\n", HOSTNO,
+		    dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
 			       (int)extended_msg[1], (int)extended_msg[2]);
 
 		    if (!len && extended_msg[1] <= 
@@ -2326,7 +2316,7 @@
 			phase = PHASE_MSGIN;
 
 			NCR5380_transfer_pio(instance, &phase, &len, &data);
-			EXT_PRINTK("scsi%d: message received, residual %d\n",
+			dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
 				   HOSTNO, len);
 
 			switch (extended_msg[2]) {
@@ -2416,7 +2406,7 @@
 		break;
 	    default:
 		printk("scsi%d: unknown phase\n", HOSTNO);
-		NCR_PRINT(NDEBUG_ANY);
+		NCR5380_dprint(NDEBUG_ANY, instance);
 	    } /* switch(phase) */
 	} /* if (tmp * SR_REQ) */ 
     } /* while (1) */
@@ -2458,7 +2448,7 @@
 
     target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
 
-    RSL_PRINTK("scsi%d: reselect\n", HOSTNO);
+    dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
 
     /* 
      * At this point, we have detected that our SCSI ID is on the bus,
@@ -2580,14 +2570,14 @@
 	if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
 	    msg[1] == SIMPLE_QUEUE_TAG)
 	    tag = msg[2];
-	TAG_PRINTK("scsi%d: target mask %02x, lun %d sent tag %d at "
+	dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
 		   "reselection\n", HOSTNO, target_mask, lun, tag);
     }
 #endif
     
     hostdata->connected = tmp;
-    RSL_PRINTK("scsi%d: nexus established, target = %d, lun = %d, tag = %d\n",
-	       HOSTNO, tmp->target, tmp->lun, tmp->tag);
+    dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %d, tag = %d\n",
+	       HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
 }
 
 
@@ -2622,7 +2612,7 @@
 
     local_irq_save(flags);
     
-    ABRT_PRINTK("scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
+    dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
 		NCR5380_read(BUS_AND_STATUS_REG),
 		NCR5380_read(STATUS_REG));
 
@@ -2635,7 +2625,7 @@
 
     if (hostdata->connected == cmd) {
 
-	ABRT_PRINTK("scsi%d: aborting connected command\n", HOSTNO);
+	dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
 /*
  * We should perform BSY checking, and make sure we haven't slipped
  * into BUS FREE.
@@ -2664,11 +2654,11 @@
 #endif
 	  local_irq_restore(flags);
 	  cmd->scsi_done(cmd);
-	  return SCSI_ABORT_SUCCESS;
+	  return SUCCESS;
 	} else {
 /*	  local_irq_restore(flags); */
 	  printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-	  return SCSI_ABORT_ERROR;
+	  return FAILED;
 	} 
    }
 #endif
@@ -2686,12 +2676,12 @@
 	    SET_NEXT(tmp, NULL);
 	    tmp->result = DID_ABORT << 16;
 	    local_irq_restore(flags);
-	    ABRT_PRINTK("scsi%d: abort removed command from issue queue.\n",
+	    dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
 			HOSTNO);
 	    /* Tagged queuing note: no tag to free here, hasn't been assigned
 	     * yet... */
 	    tmp->scsi_done(tmp);
-	    return SCSI_ABORT_SUCCESS;
+	    return SUCCESS;
 	}
 
 /* 
@@ -2707,8 +2697,8 @@
 
     if (hostdata->connected) {
 	local_irq_restore(flags);
-	ABRT_PRINTK("scsi%d: abort failed, command connected.\n", HOSTNO);
-        return SCSI_ABORT_SNOOZE;
+	dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
+        return FAILED;
     }
 
 /*
@@ -2740,12 +2730,12 @@
 	 tmp = NEXT(tmp)) 
         if (cmd == tmp) {
             local_irq_restore(flags);
-	    ABRT_PRINTK("scsi%d: aborting disconnected command.\n", HOSTNO);
+	    dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
   
             if (NCR5380_select (instance, cmd, (int) cmd->tag)) 
-		return SCSI_ABORT_BUSY;
+		return FAILED;
 
-	    ABRT_PRINTK("scsi%d: nexus reestablished.\n", HOSTNO);
+	    dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
 
 	    do_abort (instance);
 
@@ -2769,7 +2759,7 @@
 #endif
 		    local_irq_restore(flags);
 		    tmp->scsi_done(tmp);
-		    return SCSI_ABORT_SUCCESS;
+		    return SUCCESS;
 		}
 	}
 
@@ -2786,7 +2776,7 @@
     local_irq_restore(flags);
     printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO); 
 
-    return SCSI_ABORT_NOT_RUNNING;
+    return FAILED;
 }
 
 
@@ -2795,7 +2785,7 @@
  * 
  * Purpose : reset the SCSI bus.
  *
- * Returns : SCSI_RESET_WAKEUP
+ * Returns : SUCCESS or FAILURE
  *
  */ 
 
@@ -2804,7 +2794,7 @@
     SETUP_HOSTDATA(cmd->device->host);
     int           i;
     unsigned long flags;
-#if 1
+#if defined(RESET_RUN_DONE)
     struct scsi_cmnd *connected, *disconnected_queue;
 #endif
 
@@ -2826,8 +2816,15 @@
      * through anymore ... */
     (void)NCR5380_read( RESET_PARITY_INTERRUPT_REG );
 
-#if 1 /* XXX Should now be done by midlevel code, but it's broken XXX */
-      /* XXX see below                                            XXX */
+	/* MSch 20140115 - looking at the generic NCR5380 driver, all of this
+	 * should go.
+	 * Catch-22: if we don't clear all queues, the SCSI driver lock will
+	 * not be released by atari_scsi_reset()!
+	 */
+
+#if defined(RESET_RUN_DONE)
+	/* XXX Should now be done by midlevel code, but it's broken XXX */
+	/* XXX see below                                            XXX */
 
     /* MSch: old-style reset: actually abort all command processing here */
 
@@ -2857,7 +2854,7 @@
      */
 
     if ((cmd = connected)) {
-	ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
+	dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
 	cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
 	cmd->scsi_done( cmd );
     }
@@ -2869,14 +2866,14 @@
 	cmd->scsi_done( cmd );
     }
     if (i > 0)
-	ABRT_PRINTK("scsi: reset aborted %d disconnected command(s)\n", i);
+	dprintk(NDEBUG_ABORT, "scsi: reset aborted %d disconnected command(s)\n", i);
 
 
     /* since all commands have been explicitly terminated, we need to tell
      * the midlevel code that the reset was SUCCESSFUL, and there is no 
      * need to 'wake up' the commands by a request_sense
      */
-    return SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET;
+    return SUCCESS;
 #else /* 1 */
 
     /* MSch: new-style reset handling: let the mid-level do what it can */
@@ -2903,11 +2900,11 @@
      */
 
     if (hostdata->issue_queue)
-	ABRT_PRINTK("scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
+	dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
     if (hostdata->connected) 
-	ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
+	dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
     if (hostdata->disconnected_queue)
-	ABRT_PRINTK("scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+	dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
 
     local_irq_save(flags);
     hostdata->issue_queue = NULL;
@@ -2924,7 +2921,7 @@
     local_irq_restore(flags);
 
     /* we did no complete reset of all commands, so a wakeup is required */
-    return SCSI_RESET_WAKEUP | SCSI_RESET_BUS_RESET;
+    return SUCCESS;
 #endif /* 1 */
 }
 

diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index e2c009b..9707b74 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c

@@ -3,6 +3,10 @@
  *
  * Sun3 DMA routines added by Sam Creasey (sammy@sammy.net)
  *
+ * VME support added by Sam Creasey
+ *
+ * TODO: modify this driver to support multiple Sun3 SCSI VME boards
+ *
  * Adapted from mac_scsinew.c:
  */
 /*
@@ -45,10 +49,6 @@
  * USLEEP - enable support for devices that don't disconnect.  Untested.
  */
 
-/*
- * $Log: sun3_NCR5380.c,v $
- */
-
 #define AUTOSENSE
 
 #include <linux/types.h>
@@ -69,23 +69,15 @@
 #include <asm/idprom.h>
 #include <asm/machines.h>
 
-#define NDEBUG 0
-
-#define NDEBUG_ABORT		0x00100000
-#define NDEBUG_TAGS		0x00200000
-#define NDEBUG_MERGING		0x00400000
-
 /* dma on! */
 #define REAL_DMA
 
 #include "scsi.h"
-#include "initio.h"
 #include <scsi/scsi_host.h>
 #include "sun3_scsi.h"
+#include "NCR5380.h"
 
-static void NCR5380_print(struct Scsi_Host *instance);
-
-/* #define OLDDMA */
+extern int sun3_map_test(unsigned long, char *);
 
 #define USE_WRAPPER
 /*#define RESET_BOOT */
@@ -101,7 +93,11 @@
 
 /* #define SUPPORT_TAGS */
 
+#ifdef SUN3_SCSI_VME
+#define ENABLE_IRQ()
+#else
 #define	ENABLE_IRQ()	enable_irq( IRQ_SUN3_SCSI ); 
+#endif
 
 
 static irqreturn_t scsi_sun3_intr(int irq, void *dummy);
@@ -123,6 +119,8 @@
 
 static struct scsi_cmnd *sun3_dma_setup_done = NULL;
 
+#define	RESET_RUN_DONE
+
 #define	AFTER_RESET_DELAY	(HZ/2)
 
 /* ms to wait after hitting dma regs */
@@ -136,10 +134,9 @@
 
 static volatile unsigned char *sun3_scsi_regp;
 static volatile struct sun3_dma_regs *dregs;
-#ifdef OLDDMA
-static unsigned char *dmabuf = NULL; /* dma memory buffer */
-#endif
+#ifndef SUN3_SCSI_VME
 static struct sun3_udc_regs *udc_regs = NULL;
+#endif
 static unsigned char *sun3_dma_orig_addr = NULL;
 static unsigned long sun3_dma_orig_count = 0;
 static int sun3_dma_active = 0;
@@ -159,6 +156,7 @@
 	sun3_scsi_regp[reg] = value;
 }
 
+#ifndef SUN3_SCSI_VME
 /* dma controller register access functions */
 
 static inline unsigned short sun3_udc_read(unsigned char reg)
@@ -180,6 +178,7 @@
 	dregs->udc_data = val;
 	udelay(SUN3_DMA_DELAY);
 }
+#endif
 
 /*
  * XXX: status debug
@@ -198,17 +197,32 @@
  *
  */
  
-int __init sun3scsi_detect(struct scsi_host_template * tpnt)
+static int __init sun3scsi_detect(struct scsi_host_template *tpnt)
 {
-	unsigned long ioaddr;
+	unsigned long ioaddr, irq;
 	static int called = 0;
 	struct Scsi_Host *instance;
+#ifdef SUN3_SCSI_VME
+	int i;
+	unsigned long addrs[3] = { IOBASE_SUN3_VMESCSI,
+				   IOBASE_SUN3_VMESCSI + 0x4000,
+				   0 };
+	unsigned long vecs[3] = { SUN3_VEC_VMESCSI0,
+				  SUN3_VEC_VMESCSI1,
+				  0 };
+#endif
 
 	/* check that this machine has an onboard 5380 */
 	switch(idprom->id_machtype) {
+#ifdef SUN3_SCSI_VME
+	case SM_SUN3|SM_3_160:
+	case SM_SUN3|SM_3_260:
+		break;
+#else
 	case SM_SUN3|SM_3_50:
 	case SM_SUN3|SM_3_60:
 		break;
+#endif
 
 	default:
 		return 0;
@@ -217,7 +231,11 @@
 	if(called)
 		return 0;
 
+#ifdef SUN3_SCSI_VME
+	tpnt->proc_name = "Sun3 5380 VME SCSI";
+#else
 	tpnt->proc_name = "Sun3 5380 SCSI";
+#endif
 
 	/* setup variables */
 	tpnt->can_queue =
@@ -234,6 +252,38 @@
 		tpnt->this_id = 7;
 	}
 
+#ifdef SUN3_SCSI_VME
+	ioaddr = 0;
+	for (i = 0; addrs[i] != 0; i++) {
+		unsigned char x;
+
+		ioaddr = (unsigned long)sun3_ioremap(addrs[i], PAGE_SIZE,
+						     SUN3_PAGE_TYPE_VME16);
+		irq = vecs[i];
+		sun3_scsi_regp = (unsigned char *)ioaddr;
+
+		dregs = (struct sun3_dma_regs *)(((unsigned char *)ioaddr) + 8);
+
+		if (sun3_map_test((unsigned long)dregs, &x)) {
+			unsigned short oldcsr;
+
+			oldcsr = dregs->csr;
+			dregs->csr = 0;
+			udelay(SUN3_DMA_DELAY);
+			if (dregs->csr == 0x1400)
+				break;
+
+			dregs->csr = oldcsr;
+		}
+
+		iounmap((void *)ioaddr);
+		ioaddr = 0;
+	}
+
+	if (!ioaddr)
+		return 0;
+#else
+	irq = IRQ_SUN3_SCSI;
 	ioaddr = (unsigned long)ioremap(IOBASE_SUN3_SCSI, PAGE_SIZE);
 	sun3_scsi_regp = (unsigned char *)ioaddr;
 
@@ -244,11 +294,6 @@
 	     printk("SUN3 Scsi couldn't allocate DVMA memory!\n");
 	     return 0;
 	}
-#ifdef OLDDMA
-	if((dmabuf = dvma_malloc_align(SUN3_DVMA_BUFSIZE, 0x10000)) == NULL) {
-	     printk("SUN3 Scsi couldn't allocate DVMA memory!\n");
-	     return 0;
-	}
 #endif
 #ifdef SUPPORT_TAGS
 	if (setup_use_tagged_queuing < 0)
@@ -262,7 +307,7 @@
 	default_instance = instance;
 
         instance->io_port = (unsigned long) ioaddr;
-	instance->irq = IRQ_SUN3_SCSI;
+	instance->irq = irq;
 
 	NCR5380_init(instance, 0);
 
@@ -283,7 +328,8 @@
 #endif
 	}
 	
-	printk("scsi%d: Sun3 5380 at port %lX irq", instance->host_no, instance->io_port);
+	pr_info("scsi%d: %s at port %lX irq", instance->host_no,
+		tpnt->proc_name, instance->io_port);
 	if (instance->irq == SCSI_IRQ_NONE)
 		printk ("s disabled");
 	else
@@ -300,6 +346,15 @@
 	dregs->csr = CSR_SCSI | CSR_FIFO | CSR_INTR;
 	udelay(SUN3_DMA_DELAY);
 	dregs->fifo_count = 0;
+#ifdef SUN3_SCSI_VME
+	dregs->fifo_count_hi = 0;
+	dregs->dma_addr_hi = 0;
+	dregs->dma_addr_lo = 0;
+	dregs->dma_count_hi = 0;
+	dregs->dma_count_lo = 0;
+
+	dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
+#endif
 
 	called = 1;
 
@@ -367,7 +422,8 @@
 }
 #endif
 
-const char * sun3scsi_info (struct Scsi_Host *spnt) {
+static const char *sun3scsi_info(struct Scsi_Host *spnt)
+{
     return "";
 }
 
@@ -379,6 +435,10 @@
 	unsigned short csr = dregs->csr;
 	int handled = 0;
 
+#ifdef SUN3_SCSI_VME
+	dregs->csr &= ~CSR_DMA_ENABLE;
+#endif
+
 	if(csr & ~CSR_GOOD) {
 		if(csr & CSR_DMA_BUSERR) {
 			printk("scsi%d: bus error in dma\n", default_instance->host_no);
@@ -422,31 +482,28 @@
 /* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
 static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
 {
-#ifdef OLDDMA
-	if(write_flag) 
-		memcpy(dmabuf, data, count);
-	else {
-		sun3_dma_orig_addr = data;
-		sun3_dma_orig_count = count;
-	}
-#else
 	void *addr;
 
 	if(sun3_dma_orig_addr != NULL)
 		dvma_unmap(sun3_dma_orig_addr);
 
-//	addr = sun3_dvma_page((unsigned long)data, (unsigned long)dmabuf);
+#ifdef SUN3_SCSI_VME
+	addr = (void *)dvma_map_vme((unsigned long) data, count);
+#else
 	addr = (void *)dvma_map((unsigned long) data, count);
+#endif
 		
 	sun3_dma_orig_addr = addr;
 	sun3_dma_orig_count = count;
-#endif
+
+#ifndef SUN3_SCSI_VME
 	dregs->fifo_count = 0;
 	sun3_udc_write(UDC_RESET, UDC_CSR);
 	
 	/* reset fifo */
 	dregs->csr &= ~CSR_FIFO;
 	dregs->csr |= CSR_FIFO;
+#endif
 	
 	/* set direction */
 	if(write_flag)
@@ -454,6 +511,17 @@
 	else
 		dregs->csr &= ~CSR_SEND;
 	
+#ifdef SUN3_SCSI_VME
+	dregs->csr |= CSR_PACK_ENABLE;
+
+	dregs->dma_addr_hi = ((unsigned long)addr >> 16);
+	dregs->dma_addr_lo = ((unsigned long)addr & 0xffff);
+
+	dregs->dma_count_hi = 0;
+	dregs->dma_count_lo = 0;
+	dregs->fifo_count_hi = 0;
+	dregs->fifo_count = 0;
+#else
 	/* byte count for fifo */
 	dregs->fifo_count = count;
 
@@ -467,17 +535,12 @@
 		printk("scsi%d: fifo_mismatch %04x not %04x\n",
 		       default_instance->host_no, dregs->fifo_count,
 		       (unsigned int) count);
-		NCR5380_print(default_instance);
+		NCR5380_dprint(NDEBUG_DMA, default_instance);
 	}
 
 	/* setup udc */
-#ifdef OLDDMA
-	udc_regs->addr_hi = ((dvma_vtob(dmabuf) & 0xff0000) >> 8);
-	udc_regs->addr_lo = (dvma_vtob(dmabuf) & 0xffff);
-#else
 	udc_regs->addr_hi = (((unsigned long)(addr) & 0xff0000) >> 8);
 	udc_regs->addr_lo = ((unsigned long)(addr) & 0xffff);
-#endif
 	udc_regs->count = count/2; /* count in words */
 	udc_regs->mode_hi = UDC_MODE_HIWORD;
 	if(write_flag) {
@@ -501,11 +564,13 @@
 
 	/* interrupt enable */
 	sun3_udc_write(UDC_INT_ENABLE, UDC_CSR);
+#endif
 	
        	return count;
 
 }
 
+#ifndef SUN3_SCSI_VME
 static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
 {
 	unsigned short resid;
@@ -518,6 +583,7 @@
 
 	return (unsigned long) resid;
 }
+#endif
 
 static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 {
@@ -536,8 +602,23 @@
 
 static inline int sun3scsi_dma_start(unsigned long count, unsigned char *data)
 {
+#ifdef SUN3_SCSI_VME
+	unsigned short csr;
 
+	csr = dregs->csr;
+
+	dregs->dma_count_hi = (sun3_dma_orig_count >> 16);
+	dregs->dma_count_lo = (sun3_dma_orig_count & 0xffff);
+
+	dregs->fifo_count_hi = (sun3_dma_orig_count >> 16);
+	dregs->fifo_count = (sun3_dma_orig_count & 0xffff);
+
+/*	if(!(csr & CSR_DMA_ENABLE))
+ *		dregs->csr |= CSR_DMA_ENABLE;
+ */
+#else
     sun3_udc_write(UDC_CHN_START, UDC_CSR);
+#endif
     
     return 0;
 }
@@ -545,12 +626,46 @@
 /* clean up after our dma is done */
 static int sun3scsi_dma_finish(int write_flag)
 {
-	unsigned short count;
+	unsigned short __maybe_unused count;
 	unsigned short fifo;
 	int ret = 0;
 	
 	sun3_dma_active = 0;
-#if 1
+
+#ifdef SUN3_SCSI_VME
+	dregs->csr &= ~CSR_DMA_ENABLE;
+
+	fifo = dregs->fifo_count;
+	if (write_flag) {
+		if ((fifo > 0) && (fifo < sun3_dma_orig_count))
+			fifo++;
+	}
+
+	last_residual = fifo;
+	/* empty bytes from the fifo which didn't make it */
+	if ((!write_flag) && (dregs->csr & CSR_LEFT)) {
+		unsigned char *vaddr;
+
+		vaddr = (unsigned char *)dvma_vmetov(sun3_dma_orig_addr);
+
+		vaddr += (sun3_dma_orig_count - fifo);
+		vaddr--;
+
+		switch (dregs->csr & CSR_LEFT) {
+		case CSR_LEFT_3:
+			*vaddr = (dregs->bpack_lo & 0xff00) >> 8;
+			vaddr--;
+
+		case CSR_LEFT_2:
+			*vaddr = (dregs->bpack_hi & 0x00ff);
+			vaddr--;
+
+		case CSR_LEFT_1:
+			*vaddr = (dregs->bpack_hi & 0xff00) >> 8;
+			break;
+		}
+	}
+#else
 	// check to empty the fifo on a read
 	if(!write_flag) {
 		int tmo = 20000; /* .2 sec */
@@ -566,28 +681,8 @@
 			udelay(10);
 		}
 	}
-		
-#endif
 
 	count = sun3scsi_dma_count(default_instance);
-#ifdef OLDDMA
-
-	/* if we've finished a read, copy out the data we read */
- 	if(sun3_dma_orig_addr) {
-		/* check for residual bytes after dma end */
-		if(count && (NCR5380_read(BUS_AND_STATUS_REG) &
-			     (BASR_PHASE_MATCH | BASR_ACK))) {
-			printk("scsi%d: sun3_scsi_finish: read overrun baby... ", default_instance->host_no);
-			printk("basr now %02x\n", NCR5380_read(BUS_AND_STATUS_REG));
-			ret = count;
-		}
-		
-		/* copy in what we dma'd no matter what */
-		memcpy(sun3_dma_orig_addr, dmabuf, sun3_dma_orig_count);
-		sun3_dma_orig_addr = NULL;
-
-	}
-#else
 
 	fifo = dregs->fifo_count;
 	last_residual = fifo;
@@ -605,10 +700,23 @@
 		vaddr[-2] = (data & 0xff00) >> 8;
 		vaddr[-1] = (data & 0xff);
 	}
+#endif
 
 	dvma_unmap(sun3_dma_orig_addr);
 	sun3_dma_orig_addr = NULL;
-#endif
+
+#ifdef SUN3_SCSI_VME
+	dregs->dma_addr_hi = 0;
+	dregs->dma_addr_lo = 0;
+	dregs->dma_count_hi = 0;
+	dregs->dma_count_lo = 0;
+
+	dregs->fifo_count = 0;
+	dregs->fifo_count_hi = 0;
+
+	dregs->csr &= ~CSR_SEND;
+/*	dregs->csr |= CSR_DMA_ENABLE; */
+#else
 	sun3_udc_write(UDC_RESET, UDC_CSR);
 	dregs->fifo_count = 0;
 	dregs->csr &= ~CSR_SEND;
@@ -616,6 +724,7 @@
 	/* reset fifo */
 	dregs->csr &= ~CSR_FIFO;
 	dregs->csr |= CSR_FIFO;
+#endif
 	
 	sun3_dma_setup_done = NULL;
 

diff --git a/drivers/scsi/sun3_scsi.h b/drivers/scsi/sun3_scsi.h
index a8da9c7..e96a37c 100644
--- a/drivers/scsi/sun3_scsi.h
+++ b/drivers/scsi/sun3_scsi.h

@@ -29,12 +29,8 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: cumana_NCR5380.h,v $
- */
-
-#ifndef SUN3_NCR5380_H
-#define SUN3_NCR5380_H
+#ifndef SUN3_SCSI_H
+#define SUN3_SCSI_H
 
 #define SUN3SCSI_PUBLIC_RELEASE 1
 
@@ -82,8 +78,6 @@
 #define SUN3_SCSI_NAME "Sun3 NCR5380 SCSI"
 #endif
 
-#ifndef HOSTS_C
-
 #define NCR5380_implementation_fields \
     int port, ctrl
 
@@ -108,9 +102,6 @@
 #define NCR5380_dma_read_setup(instance, data, count) sun3scsi_dma_setup(data, count, 0)
 #define NCR5380_dma_residual sun3scsi_dma_residual
 
-#define BOARD_NORMAL	0
-#define BOARD_NCR53C400	1
-
 /* additional registers - mainly DMA control regs */
 /* these start at regbase + 8 -- directly after the NCR regs */
 struct sun3_dma_regs {
@@ -191,189 +182,5 @@
 
 #define VME_DATA24 0x3d00
 
-// debugging printk's, taken from atari_scsi.h 
-/* Debugging printk definitions:
- *
- *  ARB  -> arbitration
- *  ASEN -> auto-sense
- *  DMA  -> DMA
- *  HSH  -> PIO handshake
- *  INF  -> information transfer
- *  INI  -> initialization
- *  INT  -> interrupt
- *  LNK  -> linked commands
- *  MAIN -> NCR5380_main() control flow
- *  NDAT -> no data-out phase
- *  NWR  -> no write commands
- *  PIO  -> PIO transfers
- *  PDMA -> pseudo DMA (unused on Atari)
- *  QU   -> queues
- *  RSL  -> reselections
- *  SEL  -> selections
- *  USL  -> usleep cpde (unused on Atari)
- *  LBS  -> last byte sent (unused on Atari)
- *  RSS  -> restarting of selections
- *  EXT  -> extended messages
- *  ABRT -> aborting and resetting
- *  TAG  -> queue tag handling
- *  MER  -> merging of consec. buffers
- *
- */
-
-#include "NCR5380.h"
-
-#if NDEBUG & NDEBUG_ARBITRATION
-#define ARB_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define ARB_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_AUTOSENSE
-#define ASEN_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define ASEN_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_DMA
-#define DMA_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define DMA_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_HANDSHAKE
-#define HSH_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define HSH_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_INFORMATION
-#define INF_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define INF_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_INIT
-#define INI_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define INI_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_INTR
-#define INT_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define INT_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_LINKED
-#define LNK_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define LNK_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_MAIN
-#define MAIN_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define MAIN_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_NO_DATAOUT
-#define NDAT_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define NDAT_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_NO_WRITE
-#define NWR_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define NWR_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_PIO
-#define PIO_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define PIO_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_PSEUDO_DMA
-#define PDMA_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define PDMA_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_QUEUES
-#define QU_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define QU_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_RESELECTION
-#define RSL_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define RSL_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_SELECTION
-#define SEL_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define SEL_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_USLEEP
-#define USL_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define USL_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_LAST_BYTE_SENT
-#define LBS_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define LBS_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_RESTART_SELECT
-#define RSS_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define RSS_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_EXTENDED
-#define EXT_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define EXT_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_ABORT
-#define ABRT_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define ABRT_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_TAGS
-#define TAG_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define TAG_PRINTK(format, args...)
-#endif
-#if NDEBUG & NDEBUG_MERGING
-#define MER_PRINTK(format, args...) \
-	printk(KERN_DEBUG format , ## args)
-#else
-#define MER_PRINTK(format, args...)
-#endif
-
-/* conditional macros for NCR5380_print_{,phase,status} */
-
-#define NCR_PRINT(mask)	\
-	((NDEBUG & (mask)) ? NCR5380_print(instance) : (void)0)
-
-#define NCR_PRINT_PHASE(mask) \
-	((NDEBUG & (mask)) ? NCR5380_print_phase(instance) : (void)0)
-
-#define NCR_PRINT_STATUS(mask) \
-	((NDEBUG & (mask)) ? NCR5380_print_status(instance) : (void)0)
-
-
-
-#endif /* ndef HOSTS_C */
-#endif /* SUN3_NCR5380_H */
+#endif /* SUN3_SCSI_H */
 

diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c
index a3dd55d..1eeece6 100644
--- a/drivers/scsi/sun3_scsi_vme.c
+++ b/drivers/scsi/sun3_scsi_vme.c

@@ -1,589 +1,3 @@
- /*
- * Sun3 SCSI stuff by Erik Verbruggen (erik@bigmama.xtdnet.nl)
- *
- * Sun3 DMA routines added by Sam Creasey (sammy@sammy.net)
- *
- * VME support added by Sam Creasey
- *
- * Adapted from sun3_scsi.c -- see there for other headers
- *
- * TODO: modify this driver to support multiple Sun3 SCSI VME boards
- *
- */
-
-#define AUTOSENSE
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/ctype.h>
-#include <linux/delay.h>
-
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-
-#include <asm/io.h>
-
-#include <asm/sun3ints.h>
-#include <asm/dvma.h>
-#include <asm/idprom.h>
-#include <asm/machines.h>
-
 #define SUN3_SCSI_VME
 
-#undef SUN3_SCSI_DEBUG
-
-/* dma on! */
-#define REAL_DMA
-
-#define NDEBUG 0
-
-#define NDEBUG_ABORT		0x00100000
-#define NDEBUG_TAGS		0x00200000
-#define NDEBUG_MERGING		0x00400000
-
-#include "scsi.h"
-#include "initio.h"
-#include <scsi/scsi_host.h>
-#include "sun3_scsi.h"
-
-extern int sun3_map_test(unsigned long, char *);
-
-#define USE_WRAPPER
-/*#define RESET_BOOT */
-#define DRIVER_SETUP
-
-/*
- * BUG can be used to trigger a strange code-size related hang on 2.1 kernels
- */
-#ifdef BUG
-#undef RESET_BOOT
-#undef DRIVER_SETUP
-#endif
-
-/* #define SUPPORT_TAGS */
-
-//#define	ENABLE_IRQ()	enable_irq( SUN3_VEC_VMESCSI0 ); 
-#define ENABLE_IRQ()
-
-
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy);
-static inline unsigned char sun3scsi_read(int reg);
-static inline void sun3scsi_write(int reg, int value);
-
-static int setup_can_queue = -1;
-module_param(setup_can_queue, int, 0);
-static int setup_cmd_per_lun = -1;
-module_param(setup_cmd_per_lun, int, 0);
-static int setup_sg_tablesize = -1;
-module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
-static int setup_use_tagged_queuing = -1;
-module_param(setup_use_tagged_queuing, int, 0);
-#endif
-static int setup_hostid = -1;
-module_param(setup_hostid, int, 0);
-
-static struct scsi_cmnd *sun3_dma_setup_done = NULL;
-
-#define	AFTER_RESET_DELAY	(HZ/2)
-
-/* ms to wait after hitting dma regs */
-#define SUN3_DMA_DELAY 10
-
-/* dvma buffer to allocate -- 32k should hopefully be more than sufficient */
-#define SUN3_DVMA_BUFSIZE 0xe000
-
-/* minimum number of bytes to do dma on */
-#define SUN3_DMA_MINSIZE 128
-
-static volatile unsigned char *sun3_scsi_regp;
-static volatile struct sun3_dma_regs *dregs;
-#ifdef OLDDMA
-static unsigned char *dmabuf = NULL; /* dma memory buffer */
-#endif
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-
-/*
- * NCR 5380 register access functions
- */
-
-static inline unsigned char sun3scsi_read(int reg)
-{
-	return( sun3_scsi_regp[reg] );
-}
-
-static inline void sun3scsi_write(int reg, int value)
-{
-	sun3_scsi_regp[reg] = value;
-}
-
-/*
- * XXX: status debug
- */
-static struct Scsi_Host *default_instance;
-
-/*
- * Function : int sun3scsi_detect(struct scsi_host_template * tpnt)
- *
- * Purpose : initializes mac NCR5380 driver based on the
- *	command line / compile time port and irq definitions.
- *
- * Inputs : tpnt - template for this SCSI adapter.
- *
- * Returns : 1 if a host adapter was found, 0 if not.
- *
- */
- 
-static int __init sun3scsi_detect(struct scsi_host_template * tpnt)
-{
-	unsigned long ioaddr, irq = 0;
-	static int called = 0;
-	struct Scsi_Host *instance;
-	int i;
-	unsigned long addrs[3] = { IOBASE_SUN3_VMESCSI, 
-				   IOBASE_SUN3_VMESCSI + 0x4000,
-				   0 };
-	unsigned long vecs[3] = { SUN3_VEC_VMESCSI0,
-				  SUN3_VEC_VMESCSI1,
-				  0 };
-	/* check that this machine has an onboard 5380 */
-	switch(idprom->id_machtype) {
-	case SM_SUN3|SM_3_160:
-	case SM_SUN3|SM_3_260:
-		break;
-
-	default:
-		return 0;
-	}
-
-	if(called)
-		return 0;
-
-	tpnt->proc_name = "Sun3 5380 VME SCSI";
-
-	/* setup variables */
-	tpnt->can_queue =
-		(setup_can_queue > 0) ? setup_can_queue : CAN_QUEUE;
-	tpnt->cmd_per_lun =
-		(setup_cmd_per_lun > 0) ? setup_cmd_per_lun : CMD_PER_LUN;
-	tpnt->sg_tablesize = 
-		(setup_sg_tablesize >= 0) ? setup_sg_tablesize : SG_TABLESIZE;
-	
-	if (setup_hostid >= 0)
-		tpnt->this_id = setup_hostid;
-	else {
-		/* use 7 as default */
-		tpnt->this_id = 7;
-	}
-	
-	ioaddr = 0;
-	for(i = 0; addrs[i] != 0; i++) {
-		unsigned char x;
-		
-		ioaddr = (unsigned long)sun3_ioremap(addrs[i], PAGE_SIZE,
-						     SUN3_PAGE_TYPE_VME16);
-		irq = vecs[i];
-		sun3_scsi_regp = (unsigned char *)ioaddr;
-		
-		dregs = (struct sun3_dma_regs *)(((unsigned char *)ioaddr) + 8);
-		
-		if(sun3_map_test((unsigned long)dregs, &x)) {
-			unsigned short oldcsr;
-
-			oldcsr = dregs->csr;
-			dregs->csr = 0;
-			udelay(SUN3_DMA_DELAY);
-			if(dregs->csr == 0x1400)
-				break;
-			
-			dregs->csr = oldcsr;
-		}
-
-		iounmap((void *)ioaddr);
-		ioaddr = 0;
-	}
-
-	if(!ioaddr)
-		return 0;
-	
-#ifdef SUPPORT_TAGS
-	if (setup_use_tagged_queuing < 0)
-		setup_use_tagged_queuing = USE_TAGGED_QUEUING;
-#endif
-
-	instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
-	if(instance == NULL)
-		return 0;
-		
-	default_instance = instance;
-
-        instance->io_port = (unsigned long) ioaddr;
-	instance->irq = irq;
-
-	NCR5380_init(instance, 0);
-
-	instance->n_io_port = 32;
-
-        ((struct NCR5380_hostdata *)instance->hostdata)->ctrl = 0;
-
-	if (request_irq(instance->irq, scsi_sun3_intr,
-			0, "Sun3SCSI-5380VME", instance)) {
-#ifndef REAL_DMA
-		printk("scsi%d: IRQ%d not free, interrupts disabled\n",
-		       instance->host_no, instance->irq);
-		instance->irq = SCSI_IRQ_NONE;
-#else
-		printk("scsi%d: IRQ%d not free, bailing out\n",
-		       instance->host_no, instance->irq);
-		return 0;
-#endif
-	}
-
-	printk("scsi%d: Sun3 5380 VME at port %lX irq", instance->host_no, instance->io_port);
-	if (instance->irq == SCSI_IRQ_NONE)
-		printk ("s disabled");
-	else
-		printk (" %d", instance->irq);
-	printk(" options CAN_QUEUE=%d CMD_PER_LUN=%d release=%d",
-	       instance->can_queue, instance->cmd_per_lun,
-	       SUN3SCSI_PUBLIC_RELEASE);
-	printk("\nscsi%d:", instance->host_no);
-	NCR5380_print_options(instance);
-	printk("\n");
-
-	dregs->csr = 0;
-	udelay(SUN3_DMA_DELAY);
-	dregs->csr = CSR_SCSI | CSR_FIFO | CSR_INTR;
-	udelay(SUN3_DMA_DELAY);
-	dregs->fifo_count = 0;
-	dregs->fifo_count_hi = 0;
-	dregs->dma_addr_hi = 0;
-	dregs->dma_addr_lo = 0;
-	dregs->dma_count_hi = 0;
-	dregs->dma_count_lo = 0;
-
-	dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
-
-	called = 1;
-
-#ifdef RESET_BOOT
-	sun3_scsi_reset_boot(instance);
-#endif
-
-	return 1;
-}
-
-int sun3scsi_release (struct Scsi_Host *shpnt)
-{
-	if (shpnt->irq != SCSI_IRQ_NONE)
-		free_irq(shpnt->irq, shpnt);
-
-	iounmap((void *)sun3_scsi_regp);
-
-	NCR5380_exit(shpnt);
-	return 0;
-}
-
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
-	unsigned long end;
-
-	NCR5380_local_declare();
-	NCR5380_setup(instance);
-	
-	/*
-	 * Do a SCSI reset to clean up the bus during initialization. No
-	 * messing with the queues, interrupts, or locks necessary here.
-	 */
-
-	printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
-	/* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-//       	sun3_disable_irq( IRQ_SUN3_SCSI );
-
-	/* get in phase */
-	NCR5380_write( TARGET_COMMAND_REG,
-		      PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-	/* assert RST */
-	NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
-	/* The min. reset hold time is 25us, so 40us should be enough */
-	udelay( 50 );
-
-	/* reset RST and interrupt */
-	NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-	NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-	for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-		barrier();
-
-	/* switch on SCSI IRQ again */
-//       	sun3_enable_irq( IRQ_SUN3_SCSI );
-
-	printk( " done\n" );
-}
-#endif
-
-static const char * sun3scsi_info (struct Scsi_Host *spnt) {
-    return "";
-}
-
-// safe bits for the CSR
-#define CSR_GOOD 0x060f
-
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
-{
-	unsigned short csr = dregs->csr;
-	int handled = 0;
-
-	dregs->csr &= ~CSR_DMA_ENABLE;
-
-
-#ifdef SUN3_SCSI_DEBUG
-	printk("scsi_intr csr %x\n", csr);
-#endif
-
-	if(csr & ~CSR_GOOD) {
-		if(csr & CSR_DMA_BUSERR) {
-			printk("scsi%d: bus error in dma\n", default_instance->host_no);
-#ifdef SUN3_SCSI_DEBUG
-			printk("scsi: residual %x count %x addr %p dmaaddr %x\n", 
-			       dregs->fifo_count,
-			       dregs->dma_count_lo | (dregs->dma_count_hi << 16),
-			       sun3_dma_orig_addr,
-			       dregs->dma_addr_lo | (dregs->dma_addr_hi << 16));
-#endif
-		}
-
-		if(csr & CSR_DMA_CONFLICT) {
-			printk("scsi%d: dma conflict\n", default_instance->host_no);
-		}
-		handled = 1;
-	}
-
-	if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
-		NCR5380_intr(irq, dummy);
-		handled = 1;
-	}
-
-	return IRQ_RETVAL(handled);
-}
-
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk; 
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
-	unsigned long flags;
-	NCR5380_local_declare();
-
-	if (default_instance) {
-			local_irq_save(flags);
-			NCR5380_print_status(default_instance);
-			local_irq_restore(flags);
-	}
-}
-#endif
-
-
-/* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
-{
-	void *addr;
-
-	if(sun3_dma_orig_addr != NULL)
-		dvma_unmap(sun3_dma_orig_addr);
-
-//	addr = sun3_dvma_page((unsigned long)data, (unsigned long)dmabuf);
-	addr = (void *)dvma_map_vme((unsigned long) data, count);
-		
-	sun3_dma_orig_addr = addr;
-	sun3_dma_orig_count = count;
-	
-#ifdef SUN3_SCSI_DEBUG
-	printk("scsi: dma_setup addr %p count %x\n", addr, count);
-#endif
-
-//	dregs->fifo_count = 0;
-#if 0	
-	/* reset fifo */
-	dregs->csr &= ~CSR_FIFO;
-	dregs->csr |= CSR_FIFO;
-#endif	
-	/* set direction */
-	if(write_flag)
-		dregs->csr |= CSR_SEND;
-	else
-		dregs->csr &= ~CSR_SEND;
-	
-	/* reset fifo */
-//	dregs->csr &= ~CSR_FIFO;
-//	dregs->csr |= CSR_FIFO;
-
-	dregs->csr |= CSR_PACK_ENABLE;
-
-	dregs->dma_addr_hi = ((unsigned long)addr >> 16);
-	dregs->dma_addr_lo = ((unsigned long)addr & 0xffff);
-	
-	dregs->dma_count_hi = 0;
-	dregs->dma_count_lo = 0;
-	dregs->fifo_count_hi = 0;
-	dregs->fifo_count = 0;
-		
-#ifdef SUN3_SCSI_DEBUG
-	printk("scsi: dma_setup done csr %x\n", dregs->csr);
-#endif
-       	return count;
-
-}
-
-static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
-{
-	return last_residual;
-}
-
-static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted,
-						  struct scsi_cmnd *cmd,
-						  int write_flag)
-{
-	if (cmd->request->cmd_type == REQ_TYPE_FS)
- 		return wanted;
-	else
-		return 0;
-}
-
-static int sun3scsi_dma_start(unsigned long count, char *data)
-{
-	
-	unsigned short csr;
-
-	csr = dregs->csr;
-#ifdef SUN3_SCSI_DEBUG
-	printk("scsi: dma_start data %p count %x csr %x fifo %x\n", data, count, csr, dregs->fifo_count);
-#endif
-	
-	dregs->dma_count_hi = (sun3_dma_orig_count >> 16);
-	dregs->dma_count_lo = (sun3_dma_orig_count & 0xffff);
-
-	dregs->fifo_count_hi = (sun3_dma_orig_count >> 16);
-	dregs->fifo_count = (sun3_dma_orig_count & 0xffff);
-
-//	if(!(csr & CSR_DMA_ENABLE))
-//		dregs->csr |= CSR_DMA_ENABLE;
-
-	return 0;
-}
-
-/* clean up after our dma is done */
-static int sun3scsi_dma_finish(int write_flag)
-{
-	unsigned short fifo;
-	int ret = 0;
-	
-	sun3_dma_active = 0;
-
-	dregs->csr &= ~CSR_DMA_ENABLE;
-	
-	fifo = dregs->fifo_count;
-	if(write_flag) {
-		if((fifo > 0) && (fifo < sun3_dma_orig_count))
-			fifo++;
-	}
-
-	last_residual = fifo;
-#ifdef SUN3_SCSI_DEBUG
-	printk("scsi: residual %x total %x\n", fifo, sun3_dma_orig_count);
-#endif
-	/* empty bytes from the fifo which didn't make it */
-	if((!write_flag) && (dregs->csr & CSR_LEFT)) {
-		unsigned char *vaddr;
-
-#ifdef SUN3_SCSI_DEBUG
-		printk("scsi: got left over bytes\n");
-#endif
-
-		vaddr = (unsigned char *)dvma_vmetov(sun3_dma_orig_addr);
-		
-		vaddr += (sun3_dma_orig_count - fifo);
-		vaddr--;
-		
-		switch(dregs->csr & CSR_LEFT) {
-		case CSR_LEFT_3:
-			*vaddr = (dregs->bpack_lo & 0xff00) >> 8;
-			vaddr--;
-			
-		case CSR_LEFT_2:
-			*vaddr = (dregs->bpack_hi & 0x00ff);
-			vaddr--;
-			
-		case CSR_LEFT_1:
-			*vaddr = (dregs->bpack_hi & 0xff00) >> 8;
-			break;
-		}
-		
-		
-	}
-
-	dvma_unmap(sun3_dma_orig_addr);
-	sun3_dma_orig_addr = NULL;
-
-	dregs->dma_addr_hi = 0;
-	dregs->dma_addr_lo = 0;
-	dregs->dma_count_hi = 0;
-	dregs->dma_count_lo = 0;
-
-	dregs->fifo_count = 0;
-	dregs->fifo_count_hi = 0;
-
-	dregs->csr &= ~CSR_SEND;
-	
-//	dregs->csr |= CSR_DMA_ENABLE;
-	
-#if 0
-	/* reset fifo */
-	dregs->csr &= ~CSR_FIFO;
-	dregs->csr |= CSR_FIFO;
-#endif	
-	sun3_dma_setup_done = NULL;
-
-	return ret;
-
-}
-
-#include "sun3_NCR5380.c"
-
-static struct scsi_host_template driver_template = {
-	.name			= SUN3_SCSI_NAME,
-	.detect			= sun3scsi_detect,
-	.release		= sun3scsi_release,
-	.info			= sun3scsi_info,
-	.queuecommand		= sun3scsi_queue_command,
-	.eh_abort_handler      	= sun3scsi_abort,
-	.eh_bus_reset_handler  	= sun3scsi_bus_reset,
-	.can_queue		= CAN_QUEUE,
-	.this_id		= 7,
-	.sg_tablesize		= SG_TABLESIZE,
-	.cmd_per_lun		= CMD_PER_LUN,
-	.use_clustering		= DISABLE_CLUSTERING
-};
-
-
-#include "scsi_module.c"
-
-MODULE_LICENSE("GPL");
+#include "sun3_scsi.c"

diff --git a/drivers/scsi/t128.c b/drivers/scsi/t128.c
index a4abce9..8cc8093 100644
--- a/drivers/scsi/t128.c
+++ b/drivers/scsi/t128.c

@@ -102,10 +102,6 @@
  * 15 9-11
  */
  
-/*
- * $Log: t128.c,v $
- */
-
 #include <linux/signal.h>
 #include <linux/io.h>
 #include <linux/blkdev.h>

diff --git a/drivers/scsi/t128.h b/drivers/scsi/t128.h
index 1df82c28..fd68cec 100644
--- a/drivers/scsi/t128.h
+++ b/drivers/scsi/t128.h

@@ -34,10 +34,6 @@
  * 1+ (800) 334-5454
  */
 
-/*
- * $Log: t128.h,v $
- */
-
 #ifndef T128_H
 #define T128_H
 
@@ -107,8 +103,6 @@
 #define CAN_QUEUE 32
 #endif
 
-#ifndef HOSTS_C
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
@@ -148,6 +142,5 @@
 
 #define T128_IRQS 0xc4a8
 
-#endif /* else def HOSTS_C */
 #endif /* ndef ASM */
 #endif /* T128_H */

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 7210500..f42d1ce 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h

@@ -196,9 +196,9 @@
  * @dword_2: UPIU header DW-2
  */
 struct utp_upiu_header {
-	u32 dword_0;
-	u32 dword_1;
-	u32 dword_2;
+	__be32 dword_0;
+	__be32 dword_1;
+	__be32 dword_2;
 };
 
 /**
@@ -207,7 +207,7 @@
  * @cdb: Command Descriptor Block CDB DW-4 to DW-7
  */
 struct utp_upiu_cmd {
-	u32 exp_data_transfer_len;
+	__be32 exp_data_transfer_len;
 	u8 cdb[MAX_CDB_SIZE];
 };
 
@@ -228,10 +228,10 @@
 	u8 idn;
 	u8 index;
 	u8 selector;
-	u16 reserved_osf;
-	u16 length;
-	u32 value;
-	u32 reserved[2];
+	__be16 reserved_osf;
+	__be16 length;
+	__be32 value;
+	__be32 reserved[2];
 };
 
 /**
@@ -256,9 +256,9 @@
  * @sense_data: Sense data field DW-8 to DW-12
  */
 struct utp_cmd_rsp {
-	u32 residual_transfer_count;
-	u32 reserved[4];
-	u16 sense_data_len;
+	__be32 residual_transfer_count;
+	__be32 reserved[4];
+	__be16 sense_data_len;
 	u8 sense_data[18];
 };
 
@@ -286,10 +286,10 @@
  */
 struct utp_upiu_task_req {
 	struct utp_upiu_header header;
-	u32 input_param1;
-	u32 input_param2;
-	u32 input_param3;
-	u32 reserved[2];
+	__be32 input_param1;
+	__be32 input_param2;
+	__be32 input_param3;
+	__be32 reserved[2];
 };
 
 /**
@@ -301,9 +301,9 @@
  */
 struct utp_upiu_task_rsp {
 	struct utp_upiu_header header;
-	u32 output_param1;
-	u32 output_param2;
-	u32 reserved[3];
+	__be32 output_param1;
+	__be32 output_param2;
+	__be32 reserved[3];
 };
 
 /**

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 04884d6..0c28772 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c

@@ -55,6 +55,9 @@
 /* Query request timeout */
 #define QUERY_REQ_TIMEOUT 30 /* msec */
 
+/* Task management command timeout */
+#define TM_CMD_TIMEOUT	100 /* msecs */
+
 /* Expose the flag value from utp_upiu_query.value */
 #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
 
@@ -71,9 +74,22 @@
 
 /* UFSHCD states */
 enum {
-	UFSHCD_STATE_OPERATIONAL,
 	UFSHCD_STATE_RESET,
 	UFSHCD_STATE_ERROR,
+	UFSHCD_STATE_OPERATIONAL,
+};
+
+/* UFSHCD error handling flags */
+enum {
+	UFSHCD_EH_IN_PROGRESS = (1 << 0),
+};
+
+/* UFSHCD UIC layer error flags */
+enum {
+	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
+	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
+	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
+	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
 };
 
 /* Interrupt configuration options */
@@ -83,6 +99,18 @@
 	UFSHCD_INT_CLEAR,
 };
 
+#define ufshcd_set_eh_in_progress(h) \
+	(h->eh_flags |= UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_eh_in_progress(h) \
+	(h->eh_flags & UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_clear_eh_in_progress(h) \
+	(h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
+
+static void ufshcd_tmc_handler(struct ufs_hba *hba);
+static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+static int ufshcd_reset_and_restore(struct ufs_hba *hba);
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
+
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -163,7 +191,7 @@
  */
 static inline int ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
 {
-	return lrbp->utr_descriptor_ptr->header.dword_2 & MASK_OCS;
+	return le32_to_cpu(lrbp->utr_descriptor_ptr->header.dword_2) & MASK_OCS;
 }
 
 /**
@@ -176,19 +204,41 @@
 static inline int
 ufshcd_get_tmr_ocs(struct utp_task_req_desc *task_req_descp)
 {
-	return task_req_descp->header.dword_2 & MASK_OCS;
+	return le32_to_cpu(task_req_descp->header.dword_2) & MASK_OCS;
 }
 
 /**
  * ufshcd_get_tm_free_slot - get a free slot for task management request
  * @hba: per adapter instance
+ * @free_slot: pointer to variable with available slot value
  *
- * Returns maximum number of task management request slots in case of
- * task management queue full or returns the free slot number
+ * Get a free tag and lock it until ufshcd_put_tm_slot() is called.
+ * Returns 0 if free slot is not available, else return 1 with tag value
+ * in @free_slot.
  */
-static inline int ufshcd_get_tm_free_slot(struct ufs_hba *hba)
+static bool ufshcd_get_tm_free_slot(struct ufs_hba *hba, int *free_slot)
 {
-	return find_first_zero_bit(&hba->outstanding_tasks, hba->nutmrs);
+	int tag;
+	bool ret = false;
+
+	if (!free_slot)
+		goto out;
+
+	do {
+		tag = find_first_zero_bit(&hba->tm_slots_in_use, hba->nutmrs);
+		if (tag >= hba->nutmrs)
+			goto out;
+	} while (test_and_set_bit_lock(tag, &hba->tm_slots_in_use));
+
+	*free_slot = tag;
+	ret = true;
+out:
+	return ret;
+}
+
+static inline void ufshcd_put_tm_slot(struct ufs_hba *hba, int slot)
+{
+	clear_bit_unlock(slot, &hba->tm_slots_in_use);
 }
 
 /**
@@ -390,26 +440,6 @@
 }
 
 /**
- * ufshcd_query_to_cpu() - formats the buffer to native cpu endian
- * @response: upiu query response to convert
- */
-static inline void ufshcd_query_to_cpu(struct utp_upiu_query *response)
-{
-	response->length = be16_to_cpu(response->length);
-	response->value = be32_to_cpu(response->value);
-}
-
-/**
- * ufshcd_query_to_be() - formats the buffer to big endian
- * @request: upiu query request to convert
- */
-static inline void ufshcd_query_to_be(struct utp_upiu_query *request)
-{
-	request->length = cpu_to_be16(request->length);
-	request->value = cpu_to_be32(request->value);
-}
-
-/**
  * ufshcd_copy_query_response() - Copy the Query Response and the data
  * descriptor
  * @hba: per adapter instance
@@ -425,7 +455,6 @@
 			UPIU_RSP_CODE_OFFSET;
 
 	memcpy(&query_res->upiu_res, &lrbp->ucd_rsp_ptr->qr, QUERY_OSF_SIZE);
-	ufshcd_query_to_cpu(&query_res->upiu_res);
 
 
 	/* Get the descriptor */
@@ -749,7 +778,7 @@
 {
 	struct utp_upiu_req *ucd_req_ptr = lrbp->ucd_req_ptr;
 	struct ufs_query *query = &hba->dev_cmd.query;
-	u16 len = query->request.upiu_req.length;
+	u16 len = be16_to_cpu(query->request.upiu_req.length);
 	u8 *descp = (u8 *)lrbp->ucd_req_ptr + GENERAL_UPIU_REQUEST_SIZE;
 
 	/* Query request header */
@@ -766,7 +795,6 @@
 	/* Copy the Query Request buffer as is */
 	memcpy(&ucd_req_ptr->qr, &query->request.upiu_req,
 			QUERY_OSF_SIZE);
-	ufshcd_query_to_be(&ucd_req_ptr->qr);
 
 	/* Copy the Descriptor */
 	if ((len > 0) && (query->request.upiu_req.opcode ==
@@ -853,10 +881,25 @@
 
 	tag = cmd->request->tag;
 
-	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	switch (hba->ufshcd_state) {
+	case UFSHCD_STATE_OPERATIONAL:
+		break;
+	case UFSHCD_STATE_RESET:
 		err = SCSI_MLQUEUE_HOST_BUSY;
-		goto out;
+		goto out_unlock;
+	case UFSHCD_STATE_ERROR:
+		set_host_byte(cmd, DID_ERROR);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
+	default:
+		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
+				__func__, hba->ufshcd_state);
+		set_host_byte(cmd, DID_BAD_TARGET);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
 	}
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	/* acquire the tag to make sure device cmds don't use it */
 	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
@@ -893,6 +936,7 @@
 	/* issue command to the controller */
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	ufshcd_send_command(hba, tag);
+out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
 	return err;
@@ -1151,7 +1195,7 @@
 	}
 
 	if (flag_res)
-		*flag_res = (response->upiu_res.value &
+		*flag_res = (be32_to_cpu(response->upiu_res.value) &
 				MASK_QUERY_UPIU_FLAG_LOC) & 0x1;
 
 out_unlock:
@@ -1170,7 +1214,7 @@
  *
  * Returns 0 for success, non-zero in case of failure
 */
-int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
+static int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
 			enum attr_idn idn, u8 index, u8 selector, u32 *attr_val)
 {
 	struct ufs_query_req *request;
@@ -1195,7 +1239,7 @@
 	switch (opcode) {
 	case UPIU_QUERY_OPCODE_WRITE_ATTR:
 		request->query_func = UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST;
-		request->upiu_req.value = *attr_val;
+		request->upiu_req.value = cpu_to_be32(*attr_val);
 		break;
 	case UPIU_QUERY_OPCODE_READ_ATTR:
 		request->query_func = UPIU_QUERY_FUNC_STANDARD_READ_REQUEST;
@@ -1222,7 +1266,7 @@
 		goto out_unlock;
 	}
 
-	*attr_val = response->upiu_res.value;
+	*attr_val = be32_to_cpu(response->upiu_res.value);
 
 out_unlock:
 	mutex_unlock(&hba->dev_cmd.lock);
@@ -1481,7 +1525,7 @@
  *
  * Returns 0 on success, non-zero value on failure
  */
-int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
+static int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
 {
 	struct uic_command uic_cmd = {0};
 	struct completion pwr_done;
@@ -1701,11 +1745,6 @@
 		goto out;
 	}
 
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		scsi_unblock_requests(hba->host);
-
-	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
-
 out:
 	return err;
 }
@@ -1831,66 +1870,6 @@
 }
 
 /**
- * ufshcd_do_reset - reset the host controller
- * @hba: per adapter instance
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_do_reset(struct ufs_hba *hba)
-{
-	struct ufshcd_lrb *lrbp;
-	unsigned long flags;
-	int tag;
-
-	/* block commands from midlayer */
-	scsi_block_requests(hba->host);
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
-
-	/* send controller to reset state */
-	ufshcd_hba_stop(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-	/* abort outstanding commands */
-	for (tag = 0; tag < hba->nutrs; tag++) {
-		if (test_bit(tag, &hba->outstanding_reqs)) {
-			lrbp = &hba->lrb[tag];
-			if (lrbp->cmd) {
-				scsi_dma_unmap(lrbp->cmd);
-				lrbp->cmd->result = DID_RESET << 16;
-				lrbp->cmd->scsi_done(lrbp->cmd);
-				lrbp->cmd = NULL;
-				clear_bit_unlock(tag, &hba->lrb_in_use);
-			}
-		}
-	}
-
-	/* complete device management command */
-	if (hba->dev_cmd.complete)
-		complete(hba->dev_cmd.complete);
-
-	/* clear outstanding request/task bit maps */
-	hba->outstanding_reqs = 0;
-	hba->outstanding_tasks = 0;
-
-	/* Host controller enable */
-	if (ufshcd_hba_enable(hba)) {
-		dev_err(hba->dev,
-			"Reset: Controller initialization failed\n");
-		return FAILED;
-	}
-
-	if (ufshcd_link_startup(hba)) {
-		dev_err(hba->dev,
-			"Reset: Link start-up failed\n");
-		return FAILED;
-	}
-
-	return SUCCESS;
-}
-
-/**
  * ufshcd_slave_alloc - handle initial SCSI device configurations
  * @sdev: pointer to SCSI device
  *
@@ -1907,6 +1886,9 @@
 	sdev->use_10_for_ms = 1;
 	scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
 
+	/* allow SCSI layer to restart the device in case of errors */
+	sdev->allow_restart = 1;
+
 	/*
 	 * Inform SCSI Midlayer that the LUN queue depth is same as the
 	 * controller queue depth. If a LUN queue depth is less than the
@@ -1934,10 +1916,11 @@
  * ufshcd_task_req_compl - handle task management request completion
  * @hba: per adapter instance
  * @index: index of the completed request
+ * @resp: task management service response
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success
  */
-static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
+static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_rsp *task_rsp_upiup;
@@ -1958,19 +1941,15 @@
 				task_req_descp[index].task_rsp_upiu;
 		task_result = be32_to_cpu(task_rsp_upiup->header.dword_1);
 		task_result = ((task_result & MASK_TASK_RESPONSE) >> 8);
-
-		if (task_result != UPIU_TASK_MANAGEMENT_FUNC_COMPL &&
-		    task_result != UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED)
-			task_result = FAILED;
-		else
-			task_result = SUCCESS;
+		if (resp)
+			*resp = (u8)task_result;
 	} else {
-		task_result = FAILED;
-		dev_err(hba->dev,
-			"trc: Invalid ocs = %x\n", ocs_value);
+		dev_err(hba->dev, "%s: failed, ocs = 0x%x\n",
+				__func__, ocs_value);
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	return task_result;
+
+	return ocs_value;
 }
 
 /**
@@ -2105,6 +2084,9 @@
 	case OCS_ABORTED:
 		result |= DID_ABORT << 16;
 		break;
+	case OCS_INVALID_COMMAND_STATUS:
+		result |= DID_REQUEUE << 16;
+		break;
 	case OCS_INVALID_CMD_TABLE_ATTR:
 	case OCS_INVALID_PRDT_ATTR:
 	case OCS_MISMATCH_DATA_BUF_SIZE:
@@ -2422,41 +2404,145 @@
 }
 
 /**
- * ufshcd_fatal_err_handler - handle fatal errors
- * @hba: per adapter instance
+ * ufshcd_err_handler - handle UFS errors that require s/w attention
+ * @work: pointer to work structure
  */
-static void ufshcd_fatal_err_handler(struct work_struct *work)
+static void ufshcd_err_handler(struct work_struct *work)
 {
 	struct ufs_hba *hba;
-	hba = container_of(work, struct ufs_hba, feh_workq);
+	unsigned long flags;
+	u32 err_xfer = 0;
+	u32 err_tm = 0;
+	int err = 0;
+	int tag;
+
+	hba = container_of(work, struct ufs_hba, eh_work);
 
 	pm_runtime_get_sync(hba->dev);
-	/* check if reset is already in progress */
-	if (hba->ufshcd_state != UFSHCD_STATE_RESET)
-		ufshcd_do_reset(hba);
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		goto out;
+	}
+
+	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_eh_in_progress(hba);
+
+	/* Complete requests that have door-bell cleared by h/w */
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* Clear pending transfer requests */
+	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
+		if (ufshcd_clear_cmd(hba, tag))
+			err_xfer |= 1 << tag;
+
+	/* Clear pending task management requests */
+	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
+		if (ufshcd_clear_tm_cmd(hba, tag))
+			err_tm |= 1 << tag;
+
+	/* Complete the requests that are cleared by s/w */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* Fatal errors need reset */
+	if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
+			((hba->saved_err & UIC_ERROR) &&
+			 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
+		err = ufshcd_reset_and_restore(hba);
+		if (err) {
+			dev_err(hba->dev, "%s: reset and restore failed\n",
+					__func__);
+			hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		}
+		/*
+		 * Inform scsi mid-layer that we did reset and allow to handle
+		 * Unit Attention properly.
+		 */
+		scsi_report_bus_reset(hba->host, 0);
+		hba->saved_err = 0;
+		hba->saved_uic_err = 0;
+	}
+	ufshcd_clear_eh_in_progress(hba);
+
+out:
+	scsi_unblock_requests(hba->host);
 	pm_runtime_put_sync(hba->dev);
 }
 
 /**
- * ufshcd_err_handler - Check for fatal errors
- * @work: pointer to a work queue structure
+ * ufshcd_update_uic_error - check and set fatal UIC error flags.
+ * @hba: per-adapter instance
  */
-static void ufshcd_err_handler(struct ufs_hba *hba)
+static void ufshcd_update_uic_error(struct ufs_hba *hba)
 {
 	u32 reg;
 
+	/* PA_INIT_ERROR is fatal and needs UIC reset */
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
+	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
+		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
+
+	/* UIC NL/TL/DME errors needs software retry */
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_NL_ERROR;
+
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_TL_ERROR;
+
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_DME_ERROR;
+
+	dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
+			__func__, hba->uic_error);
+}
+
+/**
+ * ufshcd_check_errors - Check for errors that need s/w attention
+ * @hba: per-adapter instance
+ */
+static void ufshcd_check_errors(struct ufs_hba *hba)
+{
+	bool queue_eh_work = false;
+
 	if (hba->errors & INT_FATAL_ERRORS)
-		goto fatal_eh;
+		queue_eh_work = true;
 
 	if (hba->errors & UIC_ERROR) {
-		reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
-		if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
-			goto fatal_eh;
+		hba->uic_error = 0;
+		ufshcd_update_uic_error(hba);
+		if (hba->uic_error)
+			queue_eh_work = true;
 	}
-	return;
-fatal_eh:
-	hba->ufshcd_state = UFSHCD_STATE_ERROR;
-	schedule_work(&hba->feh_workq);
+
+	if (queue_eh_work) {
+		/* handle fatal errors only when link is functional */
+		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
+			/* block commands from scsi mid-layer */
+			scsi_block_requests(hba->host);
+
+			/* transfer error masks to sticky bits */
+			hba->saved_err |= hba->errors;
+			hba->saved_uic_err |= hba->uic_error;
+
+			hba->ufshcd_state = UFSHCD_STATE_ERROR;
+			schedule_work(&hba->eh_work);
+		}
+	}
+	/*
+	 * if (!queue_eh_work) -
+	 * Other errors are either non-fatal where host recovers
+	 * itself without s/w intervention or errors that will be
+	 * handled by the SCSI core layer.
+	 */
 }
 
 /**
@@ -2469,7 +2555,7 @@
 
 	tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
 	hba->tm_condition = tm_doorbell ^ hba->outstanding_tasks;
-	wake_up_interruptible(&hba->ufshcd_tm_wait_queue);
+	wake_up(&hba->tm_wq);
 }
 
 /**
@@ -2481,7 +2567,7 @@
 {
 	hba->errors = UFSHCD_ERROR_MASK & intr_status;
 	if (hba->errors)
-		ufshcd_err_handler(hba);
+		ufshcd_check_errors(hba);
 
 	if (intr_status & UFSHCD_UIC_MASK)
 		ufshcd_uic_cmd_compl(hba, intr_status);
@@ -2519,38 +2605,58 @@
 	return retval;
 }
 
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
+{
+	int err = 0;
+	u32 mask = 1 << tag;
+	unsigned long flags;
+
+	if (!test_bit(tag, &hba->outstanding_tasks))
+		goto out;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_writel(hba, ~(1 << tag), REG_UTP_TASK_REQ_LIST_CLEAR);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* poll for max. 1 sec to clear door bell register by h/w */
+	err = ufshcd_wait_for_register(hba,
+			REG_UTP_TASK_REQ_DOOR_BELL,
+			mask, 0, 1000, 1000);
+out:
+	return err;
+}
+
 /**
  * ufshcd_issue_tm_cmd - issues task management commands to controller
  * @hba: per adapter instance
- * @lrbp: pointer to local reference block
+ * @lun_id: LUN ID to which TM command is sent
+ * @task_id: task ID to which the TM command is applicable
+ * @tm_function: task management function opcode
+ * @tm_response: task management service response return value
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success.
  */
-static int
-ufshcd_issue_tm_cmd(struct ufs_hba *hba,
-		    struct ufshcd_lrb *lrbp,
-		    u8 tm_function)
+static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
+		u8 tm_function, u8 *tm_response)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_req *task_req_upiup;
 	struct Scsi_Host *host;
 	unsigned long flags;
-	int free_slot = 0;
+	int free_slot;
 	int err;
+	int task_tag;
 
 	host = hba->host;
 
+	/*
+	 * Get free slot, sleep if slots are unavailable.
+	 * Even though we use wait_event() which sleeps indefinitely,
+	 * the maximum wait time is bounded by %TM_CMD_TIMEOUT.
+	 */
+	wait_event(hba->tm_tag_wq, ufshcd_get_tm_free_slot(hba, &free_slot));
+
 	spin_lock_irqsave(host->host_lock, flags);
-
-	/* If task management queue is full */
-	free_slot = ufshcd_get_tm_free_slot(hba);
-	if (free_slot >= hba->nutmrs) {
-		spin_unlock_irqrestore(host->host_lock, flags);
-		dev_err(hba->dev, "Task management queue full\n");
-		err = FAILED;
-		goto out;
-	}
-
 	task_req_descp = hba->utmrdl_base_addr;
 	task_req_descp += free_slot;
 
@@ -2562,18 +2668,15 @@
 	/* Configure task request UPIU */
 	task_req_upiup =
 		(struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
+	task_tag = hba->nutrs + free_slot;
 	task_req_upiup->header.dword_0 =
 		UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
-					      lrbp->lun, lrbp->task_tag);
+					      lun_id, task_tag);
 	task_req_upiup->header.dword_1 =
 		UPIU_HEADER_DWORD(0, tm_function, 0, 0);
 
-	task_req_upiup->input_param1 = lrbp->lun;
-	task_req_upiup->input_param1 =
-		cpu_to_be32(task_req_upiup->input_param1);
-	task_req_upiup->input_param2 = lrbp->task_tag;
-	task_req_upiup->input_param2 =
-		cpu_to_be32(task_req_upiup->input_param2);
+	task_req_upiup->input_param1 = cpu_to_be32(lun_id);
+	task_req_upiup->input_param2 = cpu_to_be32(task_id);
 
 	/* send command to the controller */
 	__set_bit(free_slot, &hba->outstanding_tasks);
@@ -2582,91 +2685,88 @@
 	spin_unlock_irqrestore(host->host_lock, flags);
 
 	/* wait until the task management command is completed */
-	err =
-	wait_event_interruptible_timeout(hba->ufshcd_tm_wait_queue,
-					 (test_bit(free_slot,
-					 &hba->tm_condition) != 0),
-					 60 * HZ);
+	err = wait_event_timeout(hba->tm_wq,
+			test_bit(free_slot, &hba->tm_condition),
+			msecs_to_jiffies(TM_CMD_TIMEOUT));
 	if (!err) {
-		dev_err(hba->dev,
-			"Task management command timed-out\n");
-		err = FAILED;
-		goto out;
+		dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
+				__func__, tm_function);
+		if (ufshcd_clear_tm_cmd(hba, free_slot))
+			dev_WARN(hba->dev, "%s: unable clear tm cmd (slot %d) after timeout\n",
+					__func__, free_slot);
+		err = -ETIMEDOUT;
+	} else {
+		err = ufshcd_task_req_compl(hba, free_slot, tm_response);
 	}
+
 	clear_bit(free_slot, &hba->tm_condition);
-	err = ufshcd_task_req_compl(hba, free_slot);
-out:
+	ufshcd_put_tm_slot(hba, free_slot);
+	wake_up(&hba->tm_tag_wq);
+
 	return err;
 }
 
 /**
- * ufshcd_device_reset - reset device and abort all the pending commands
+ * ufshcd_eh_device_reset_handler - device reset handler registered to
+ *                                    scsi layer.
  * @cmd: SCSI command pointer
  *
  * Returns SUCCESS/FAILED
  */
-static int ufshcd_device_reset(struct scsi_cmnd *cmd)
+static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
 	unsigned int tag;
 	u32 pos;
 	int err;
+	u8 resp = 0xF;
+	struct ufshcd_lrb *lrbp;
+	unsigned long flags;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_LOGICAL_RESET);
-	if (err == FAILED)
+	lrbp = &hba->lrb[tag];
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		if (!err)
+			err = resp;
 		goto out;
+	}
 
-	for (pos = 0; pos < hba->nutrs; pos++) {
-		if (test_bit(pos, &hba->outstanding_reqs) &&
-		    (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
-
-			/* clear the respective UTRLCLR register bit */
-			ufshcd_utrl_clear(hba, pos);
-
-			clear_bit(pos, &hba->outstanding_reqs);
-
-			if (hba->lrb[pos].cmd) {
-				scsi_dma_unmap(hba->lrb[pos].cmd);
-				hba->lrb[pos].cmd->result =
-					DID_ABORT << 16;
-				hba->lrb[pos].cmd->scsi_done(cmd);
-				hba->lrb[pos].cmd = NULL;
-				clear_bit_unlock(pos, &hba->lrb_in_use);
-				wake_up(&hba->dev_cmd.tag_wq);
-			}
+	/* clear the commands that were pending for corresponding LUN */
+	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
+		if (hba->lrb[pos].lun == lrbp->lun) {
+			err = ufshcd_clear_cmd(hba, pos);
+			if (err)
+				break;
 		}
-	} /* end of for */
+	}
+	spin_lock_irqsave(host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	spin_unlock_irqrestore(host->host_lock, flags);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
 	return err;
 }
 
 /**
- * ufshcd_host_reset - Main reset function registered with scsi layer
- * @cmd: SCSI command pointer
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_host_reset(struct scsi_cmnd *cmd)
-{
-	struct ufs_hba *hba;
-
-	hba = shost_priv(cmd->device->host);
-
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		return SUCCESS;
-
-	return ufshcd_do_reset(hba);
-}
-
-/**
  * ufshcd_abort - abort a specific command
  * @cmd: SCSI command pointer
  *
+ * Abort the pending command in device by sending UFS_ABORT_TASK task management
+ * command, and in host controller by clearing the door-bell register. There can
+ * be race between controller sending the command to the device while abort is
+ * issued. To avoid that, first issue UFS_QUERY_TASK to check if the command is
+ * really issued and then try to abort it.
+ *
  * Returns SUCCESS/FAILED
  */
 static int ufshcd_abort(struct scsi_cmnd *cmd)
@@ -2675,33 +2775,68 @@
 	struct ufs_hba *hba;
 	unsigned long flags;
 	unsigned int tag;
-	int err;
+	int err = 0;
+	int poll_cnt;
+	u8 resp = 0xF;
+	struct ufshcd_lrb *lrbp;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	spin_lock_irqsave(host->host_lock, flags);
+	/* If command is already aborted/completed, return SUCCESS */
+	if (!(test_bit(tag, &hba->outstanding_reqs)))
+		goto out;
 
-	/* check if command is still pending */
-	if (!(test_bit(tag, &hba->outstanding_reqs))) {
-		err = FAILED;
-		spin_unlock_irqrestore(host->host_lock, flags);
+	lrbp = &hba->lrb[tag];
+	for (poll_cnt = 100; poll_cnt; poll_cnt--) {
+		err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+				UFS_QUERY_TASK, &resp);
+		if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
+			/* cmd pending in the device */
+			break;
+		} else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+			u32 reg;
+
+			/*
+			 * cmd not pending in the device, check if it is
+			 * in transition.
+			 */
+			reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+			if (reg & (1 << tag)) {
+				/* sleep for max. 200us to stabilize */
+				usleep_range(100, 200);
+				continue;
+			}
+			/* command completed already */
+			goto out;
+		} else {
+			if (!err)
+				err = resp; /* service response error */
+			goto out;
+		}
+	}
+
+	if (!poll_cnt) {
+		err = -EBUSY;
 		goto out;
 	}
-	spin_unlock_irqrestore(host->host_lock, flags);
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_ABORT_TASK);
-	if (err == FAILED)
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+			UFS_ABORT_TASK, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		if (!err)
+			err = resp; /* service response error */
+		goto out;
+	}
+
+	err = ufshcd_clear_cmd(hba, tag);
+	if (err)
 		goto out;
 
 	scsi_dma_unmap(cmd);
 
 	spin_lock_irqsave(host->host_lock, flags);
-
-	/* clear the respective UTRLCLR register bit */
-	ufshcd_utrl_clear(hba, tag);
-
 	__clear_bit(tag, &hba->outstanding_reqs);
 	hba->lrb[tag].cmd = NULL;
 	spin_unlock_irqrestore(host->host_lock, flags);
@@ -2709,6 +2844,129 @@
 	clear_bit_unlock(tag, &hba->lrb_in_use);
 	wake_up(&hba->dev_cmd.tag_wq);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
+
+	return err;
+}
+
+/**
+ * ufshcd_host_reset_and_restore - reset and restore host controller
+ * @hba: per-adapter instance
+ *
+ * Note that host controller reset may issue DME_RESET to
+ * local and remote (device) Uni-Pro stack and the attributes
+ * are reset to default state.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
+{
+	int err;
+	async_cookie_t cookie;
+	unsigned long flags;
+
+	/* Reset the host controller */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_hba_stop(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_hba_enable(hba);
+	if (err)
+		goto out;
+
+	/* Establish the link again and restore the device */
+	cookie = async_schedule(ufshcd_async_scan, hba);
+	/* wait for async scan to be completed */
+	async_synchronize_cookie(++cookie);
+	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
+		err = -EIO;
+out:
+	if (err)
+		dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
+
+	return err;
+}
+
+/**
+ * ufshcd_reset_and_restore - reset and re-initialize host/device
+ * @hba: per-adapter instance
+ *
+ * Reset and recover device, host and re-establish link. This
+ * is helpful to recover the communication in fatal error conditions.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_reset_and_restore(struct ufs_hba *hba)
+{
+	int err = 0;
+	unsigned long flags;
+
+	err = ufshcd_host_reset_and_restore(hba);
+
+	/*
+	 * After reset the door-bell might be cleared, complete
+	 * outstanding requests in s/w here.
+	 */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return err;
+}
+
+/**
+ * ufshcd_eh_host_reset_handler - host reset handler registered to scsi layer
+ * @cmd - SCSI command pointer
+ *
+ * Returns SUCCESS/FAILED
+ */
+static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
+{
+	int err;
+	unsigned long flags;
+	struct ufs_hba *hba;
+
+	hba = shost_priv(cmd->device->host);
+
+	/*
+	 * Check if there is any race with fatal error handling.
+	 * If so, wait for it to complete. Even though fatal error
+	 * handling does reset and restore in some cases, don't assume
+	 * anything out of it. We are just avoiding race here.
+	 */
+	do {
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (!(work_pending(&hba->eh_work) ||
+				hba->ufshcd_state == UFSHCD_STATE_RESET))
+			break;
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+		flush_work(&hba->eh_work);
+	} while (1);
+
+	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_reset_and_restore(hba);
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (!err) {
+		err = SUCCESS;
+		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+	} else {
+		err = FAILED;
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+	}
+	ufshcd_clear_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
 	return err;
 }
 
@@ -2737,8 +2995,13 @@
 		goto out;
 
 	ufshcd_force_reset_auto_bkops(hba);
-	scsi_scan_host(hba->host);
-	pm_runtime_put_sync(hba->dev);
+	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
+	/* If we are in error handling context no need to scan the host */
+	if (!ufshcd_eh_in_progress(hba)) {
+		scsi_scan_host(hba->host);
+		pm_runtime_put_sync(hba->dev);
+	}
 out:
 	return;
 }
@@ -2751,8 +3014,8 @@
 	.slave_alloc		= ufshcd_slave_alloc,
 	.slave_destroy		= ufshcd_slave_destroy,
 	.eh_abort_handler	= ufshcd_abort,
-	.eh_device_reset_handler = ufshcd_device_reset,
-	.eh_host_reset_handler	= ufshcd_host_reset,
+	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
+	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
@@ -2916,10 +3179,11 @@
 	host->max_cmd_len = MAX_CDB_SIZE;
 
 	/* Initailize wait queue for task management */
-	init_waitqueue_head(&hba->ufshcd_tm_wait_queue);
+	init_waitqueue_head(&hba->tm_wq);
+	init_waitqueue_head(&hba->tm_tag_wq);
 
 	/* Initialize work queues */
-	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
+	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
 	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
 
 	/* Initialize UIC command mutex */

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 577679a..acf318e 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h

@@ -174,15 +174,21 @@
  * @irq: Irq number of the controller
  * @active_uic_cmd: handle of active UIC command
  * @uic_cmd_mutex: mutex for uic command
- * @ufshcd_tm_wait_queue: wait queue for task management
+ * @tm_wq: wait queue for task management
+ * @tm_tag_wq: wait queue for free task management slots
+ * @tm_slots_in_use: bit map of task management request slots in use
  * @pwr_done: completion for power mode change
  * @tm_condition: condition variable for task management
  * @ufshcd_state: UFSHCD states
+ * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
- * @feh_workq: Work queue for fatal controller error handling
+ * @eh_work: Worker to handle UFS errors that require s/w attention
  * @eeh_work: Worker to handle exception events
  * @errors: HBA errors
+ * @uic_error: UFS interconnect layer error status
+ * @saved_err: sticky error mask
+ * @saved_uic_err: sticky UIC error mask
  * @dev_cmd: ufs device management command information
  * @auto_bkops_enabled: to track whether bkops is enabled in device
  */
@@ -217,21 +223,27 @@
 	struct uic_command *active_uic_cmd;
 	struct mutex uic_cmd_mutex;
 
-	wait_queue_head_t ufshcd_tm_wait_queue;
+	wait_queue_head_t tm_wq;
+	wait_queue_head_t tm_tag_wq;
 	unsigned long tm_condition;
+	unsigned long tm_slots_in_use;
 
 	struct completion *pwr_done;
 
 	u32 ufshcd_state;
+	u32 eh_flags;
 	u32 intr_mask;
 	u16 ee_ctrl_mask;
 
 	/* Work Queues */
-	struct work_struct feh_workq;
+	struct work_struct eh_work;
 	struct work_struct eeh_work;
 
 	/* HBA Errors */
 	u32 errors;
+	u32 uic_error;
+	u32 saved_err;
+	u32 saved_uic_err;
 
 	/* Device management request data */
 	struct ufs_dev_cmd dev_cmd;
@@ -263,6 +275,8 @@
 		GENERAL_UPIU_REQUEST_SIZE + QUERY_DESC_MAX_SIZE);
 }
 
+extern int ufshcd_suspend(struct ufs_hba *hba, pm_message_t state);
+extern int ufshcd_resume(struct ufs_hba *hba);
 extern int ufshcd_runtime_suspend(struct ufs_hba *hba);
 extern int ufshcd_runtime_resume(struct ufs_hba *hba);
 extern int ufshcd_runtime_idle(struct ufs_hba *hba);

diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 0475c66..9abc7e3 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h

@@ -304,10 +304,10 @@
  * @size: size of physical segment DW-3
  */
 struct ufshcd_sg_entry {
-	u32    base_addr;
-	u32    upper_addr;
-	u32    reserved;
-	u32    size;
+	__le32    base_addr;
+	__le32    upper_addr;
+	__le32    reserved;
+	__le32    size;
 };
 
 /**
@@ -330,10 +330,10 @@
  * @dword3: Descriptor Header DW3
  */
 struct request_desc_header {
-	u32 dword_0;
-	u32 dword_1;
-	u32 dword_2;
-	u32 dword_3;
+	__le32 dword_0;
+	__le32 dword_1;
+	__le32 dword_2;
+	__le32 dword_3;
 };
 
 /**
@@ -352,16 +352,16 @@
 	struct request_desc_header header;
 
 	/* DW 4-5*/
-	u32  command_desc_base_addr_lo;
-	u32  command_desc_base_addr_hi;
+	__le32  command_desc_base_addr_lo;
+	__le32  command_desc_base_addr_hi;
 
 	/* DW 6 */
-	u16  response_upiu_length;
-	u16  response_upiu_offset;
+	__le16  response_upiu_length;
+	__le16  response_upiu_offset;
 
 	/* DW 7 */
-	u16  prd_table_length;
-	u16  prd_table_offset;
+	__le16  prd_table_length;
+	__le16  prd_table_offset;
 };
 
 /**
@@ -376,10 +376,10 @@
 	struct request_desc_header header;
 
 	/* DW 4-11 */
-	u32 task_req_upiu[TASK_REQ_UPIU_SIZE_DWORDS];
+	__le32 task_req_upiu[TASK_REQ_UPIU_SIZE_DWORDS];
 
 	/* DW 12-19 */
-	u32 task_rsp_upiu[TASK_RSP_UPIU_SIZE_DWORDS];
+	__le32 task_rsp_upiu[TASK_RSP_UPIU_SIZE_DWORDS];
 };
 
 #endif /* End of Header */

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index db3b494..89ee592 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c

@@ -23,6 +23,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_scsi.h>
 #include <linux/cpu.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
@@ -37,6 +38,7 @@
 	struct completion *comp;
 	union {
 		struct virtio_scsi_cmd_req       cmd;
+		struct virtio_scsi_cmd_req_pi    cmd_pi;
 		struct virtio_scsi_ctrl_tmf_req  tmf;
 		struct virtio_scsi_ctrl_an_req   an;
 	} req;
@@ -73,17 +75,12 @@
  * queue, and also lets the driver optimize the IRQ affinity for the virtqueues
  * (each virtqueue's affinity is set to the CPU that "owns" the queue).
  *
- * An interesting effect of this policy is that only writes to req_vq need to
- * take the tgt_lock.  Read can be done outside the lock because:
+ * tgt_lock is held to serialize reading and writing req_vq. Reading req_vq
+ * could be done locklessly, but we do not do it yet.
  *
- * - writes of req_vq only occur when atomic_inc_return(&tgt->reqs) returns 1.
- *   In that case, no other CPU is reading req_vq: even if they were in
- *   virtscsi_queuecommand_multi, they would be spinning on tgt_lock.
- *
- * - reads of req_vq only occur when the target is not idle (reqs != 0).
- *   A CPU that enters virtscsi_queuecommand_multi will not modify req_vq.
- *
- * Similarly, decrements of reqs are never concurrent with writes of req_vq.
+ * Decrements of reqs are never concurrent with writes of req_vq: before the
+ * decrement reqs will be != 0; after the decrement the virtqueue completion
+ * routine will not use the req_vq so it can be changed by a new request.
  * Thus they can happen outside the tgt_lock, provided of course we make reqs
  * an atomic_t.
  */
@@ -204,7 +201,6 @@
 			set_driver_byte(sc, DRIVER_SENSE);
 	}
 
-	mempool_free(cmd, virtscsi_cmd_pool);
 	sc->scsi_done(sc);
 
 	atomic_dec(&tgt->reqs);
@@ -238,38 +234,6 @@
 	int index = vq->index - VIRTIO_SCSI_VQ_BASE;
 	struct virtio_scsi_vq *req_vq = &vscsi->req_vqs[index];
 
-	/*
-	 * Read req_vq before decrementing the reqs field in
-	 * virtscsi_complete_cmd.
-	 *
-	 * With barriers:
-	 *
-	 * 	CPU #0			virtscsi_queuecommand_multi (CPU #1)
-	 * 	------------------------------------------------------------
-	 * 	lock vq_lock
-	 * 	read req_vq
-	 * 	read reqs (reqs = 1)
-	 * 	write reqs (reqs = 0)
-	 * 				increment reqs (reqs = 1)
-	 * 				write req_vq
-	 *
-	 * Possible reordering without barriers:
-	 *
-	 * 	CPU #0			virtscsi_queuecommand_multi (CPU #1)
-	 * 	------------------------------------------------------------
-	 * 	lock vq_lock
-	 * 	read reqs (reqs = 1)
-	 * 	write reqs (reqs = 0)
-	 * 				increment reqs (reqs = 1)
-	 * 				write req_vq
-	 * 	read (wrong) req_vq
-	 *
-	 * We do not need a full smp_rmb, because req_vq is required to get
-	 * to tgt->reqs: tgt is &vscsi->tgt[sc->device->id], where sc is stored
-	 * in the virtqueue as the user token.
-	 */
-	smp_read_barrier_depends();
-
 	virtscsi_vq_done(vscsi, req_vq, virtscsi_complete_cmd);
 };
 
@@ -279,8 +243,6 @@
 
 	if (cmd->comp)
 		complete_all(cmd->comp);
-	else
-		mempool_free(cmd, virtscsi_cmd_pool);
 }
 
 static void virtscsi_ctrl_done(struct virtqueue *vq)
@@ -433,14 +395,13 @@
  * @cmd		: command structure
  * @req_size	: size of the request buffer
  * @resp_size	: size of the response buffer
- * @gfp	: flags to use for memory allocations
  */
 static int virtscsi_add_cmd(struct virtqueue *vq,
 			    struct virtio_scsi_cmd *cmd,
-			    size_t req_size, size_t resp_size, gfp_t gfp)
+			    size_t req_size, size_t resp_size)
 {
 	struct scsi_cmnd *sc = cmd->sc;
-	struct scatterlist *sgs[4], req, resp;
+	struct scatterlist *sgs[6], req, resp;
 	struct sg_table *out, *in;
 	unsigned out_num = 0, in_num = 0;
 
@@ -458,30 +419,38 @@
 	sgs[out_num++] = &req;
 
 	/* Data-out buffer.  */
-	if (out)
+	if (out) {
+		/* Place WRITE protection SGLs before Data OUT payload */
+		if (scsi_prot_sg_count(sc))
+			sgs[out_num++] = scsi_prot_sglist(sc);
 		sgs[out_num++] = out->sgl;
+	}
 
 	/* Response header.  */
 	sg_init_one(&resp, &cmd->resp, resp_size);
 	sgs[out_num + in_num++] = &resp;
 
 	/* Data-in buffer */
-	if (in)
+	if (in) {
+		/* Place READ protection SGLs before Data IN payload */
+		if (scsi_prot_sg_count(sc))
+			sgs[out_num + in_num++] = scsi_prot_sglist(sc);
 		sgs[out_num + in_num++] = in->sgl;
+	}
 
-	return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, gfp);
+	return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_ATOMIC);
 }
 
 static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
 			     struct virtio_scsi_cmd *cmd,
-			     size_t req_size, size_t resp_size, gfp_t gfp)
+			     size_t req_size, size_t resp_size)
 {
 	unsigned long flags;
 	int err;
 	bool needs_kick = false;
 
 	spin_lock_irqsave(&vq->vq_lock, flags);
-	err = virtscsi_add_cmd(vq->vq, cmd, req_size, resp_size, gfp);
+	err = virtscsi_add_cmd(vq->vq, cmd, req_size, resp_size);
 	if (!err)
 		needs_kick = virtqueue_kick_prepare(vq->vq);
 
@@ -492,14 +461,46 @@
 	return err;
 }
 
+static void virtio_scsi_init_hdr(struct virtio_scsi_cmd_req *cmd,
+				 struct scsi_cmnd *sc)
+{
+	cmd->lun[0] = 1;
+	cmd->lun[1] = sc->device->id;
+	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
+	cmd->lun[3] = sc->device->lun & 0xff;
+	cmd->tag = (unsigned long)sc;
+	cmd->task_attr = VIRTIO_SCSI_S_SIMPLE;
+	cmd->prio = 0;
+	cmd->crn = 0;
+}
+
+static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi,
+				    struct scsi_cmnd *sc)
+{
+	struct request *rq = sc->request;
+	struct blk_integrity *bi;
+
+	virtio_scsi_init_hdr((struct virtio_scsi_cmd_req *)cmd_pi, sc);
+
+	if (!rq || !scsi_prot_sg_count(sc))
+		return;
+
+	bi = blk_get_integrity(rq->rq_disk);
+
+	if (sc->sc_data_direction == DMA_TO_DEVICE)
+		cmd_pi->pi_bytesout = blk_rq_sectors(rq) * bi->tuple_size;
+	else if (sc->sc_data_direction == DMA_FROM_DEVICE)
+		cmd_pi->pi_bytesin = blk_rq_sectors(rq) * bi->tuple_size;
+}
+
 static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 				 struct virtio_scsi_vq *req_vq,
 				 struct scsi_cmnd *sc)
 {
-	struct virtio_scsi_cmd *cmd;
-	int ret;
-
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
+	struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
+	int req_size;
+
 	BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
 
 	/* TODO: check feature bit and fail if unsupported?  */
@@ -508,36 +509,24 @@
 	dev_dbg(&sc->device->sdev_gendev,
 		"cmd %p CDB: %#02x\n", sc, sc->cmnd[0]);
 
-	ret = SCSI_MLQUEUE_HOST_BUSY;
-	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC);
-	if (!cmd)
-		goto out;
-
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->sc = sc;
-	cmd->req.cmd = (struct virtio_scsi_cmd_req){
-		.lun[0] = 1,
-		.lun[1] = sc->device->id,
-		.lun[2] = (sc->device->lun >> 8) | 0x40,
-		.lun[3] = sc->device->lun & 0xff,
-		.tag = (unsigned long)sc,
-		.task_attr = VIRTIO_SCSI_S_SIMPLE,
-		.prio = 0,
-		.crn = 0,
-	};
 
 	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
-	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
 
-	if (virtscsi_kick_cmd(req_vq, cmd,
-			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
-			      GFP_ATOMIC) == 0)
-		ret = 0;
-	else
-		mempool_free(cmd, virtscsi_cmd_pool);
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) {
+		virtio_scsi_init_hdr_pi(&cmd->req.cmd_pi, sc);
+		memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len);
+		req_size = sizeof(cmd->req.cmd_pi);
+	} else {
+		virtio_scsi_init_hdr(&cmd->req.cmd, sc);
+		memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
+		req_size = sizeof(cmd->req.cmd);
+	}
 
-out:
-	return ret;
+	if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0)
+		return SCSI_MLQUEUE_HOST_BUSY;
+	return 0;
 }
 
 static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
@@ -560,12 +549,8 @@
 
 	spin_lock_irqsave(&tgt->tgt_lock, flags);
 
-	/*
-	 * The memory barrier after atomic_inc_return matches
-	 * the smp_read_barrier_depends() in virtscsi_req_done.
-	 */
 	if (atomic_inc_return(&tgt->reqs) > 1)
-		vq = ACCESS_ONCE(tgt->req_vq);
+		vq = tgt->req_vq;
 	else {
 		queue_num = smp_processor_id();
 		while (unlikely(queue_num >= vscsi->num_queues))
@@ -596,8 +581,7 @@
 
 	cmd->comp = &comp;
 	if (virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd,
-			      sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
-			      GFP_NOIO) < 0)
+			      sizeof cmd->req.tmf, sizeof cmd->resp.tmf) < 0)
 		goto out;
 
 	wait_for_completion(&comp);
@@ -683,6 +667,7 @@
 	.name = "Virtio SCSI HBA",
 	.proc_name = "virtio_scsi",
 	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
 	.queuecommand = virtscsi_queuecommand_single,
 	.eh_abort_handler = virtscsi_abort,
 	.eh_device_reset_handler = virtscsi_device_reset,
@@ -699,6 +684,7 @@
 	.name = "Virtio SCSI HBA",
 	.proc_name = "virtio_scsi",
 	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
 	.queuecommand = virtscsi_queuecommand_multi,
 	.eh_abort_handler = virtscsi_abort,
 	.eh_device_reset_handler = virtscsi_device_reset,
@@ -875,7 +861,7 @@
 {
 	struct Scsi_Host *shost;
 	struct virtio_scsi *vscsi;
-	int err;
+	int err, host_prot;
 	u32 sg_elems, num_targets;
 	u32 cmd_per_lun;
 	u32 num_queues;
@@ -925,6 +911,16 @@
 	shost->max_id = num_targets;
 	shost->max_channel = 0;
 	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
+
+	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_T10_PI)) {
+		host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION |
+			    SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION |
+			    SHOST_DIX_TYPE2_PROTECTION | SHOST_DIX_TYPE3_PROTECTION;
+
+		scsi_host_set_prot(shost, host_prot);
+		scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+	}
+
 	err = scsi_add_host(shost, &vdev->dev);
 	if (err)
 		goto scsi_add_host_failed;
@@ -994,6 +990,7 @@
 static unsigned int features[] = {
 	VIRTIO_SCSI_F_HOTPLUG,
 	VIRTIO_SCSI_F_CHANGE,
+	VIRTIO_SCSI_F_T10_PI,
 };
 
 static struct virtio_driver virtio_scsi_driver = {

diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c
index 0901ef5..08356b6 100644
--- a/drivers/staging/et131x/et131x.c
+++ b/drivers/staging/et131x/et131x.c

@@ -4605,7 +4605,7 @@
 	netdev->netdev_ops     = &et131x_netdev_ops;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &et131x_ethtool_ops);
+	netdev->ethtool_ops = &et131x_ethtool_ops;
 
 	adapter = et131x_adapter_init(netdev, pdev);
 

diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
index d6421b9..a6158be 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c

@@ -2249,7 +2249,7 @@
 
 	ft1000InitProc(dev);
 	ft1000_card_present = 1;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	printk(KERN_INFO "ft1000: %s: addr 0x%04lx irq %d, MAC addr %pM\n",
 			dev->name, dev->base_addr, dev->irq, dev->dev_addr);
 	return dev;

diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig
index c6e8ba7..82fb758 100644
--- a/drivers/staging/imx-drm/Kconfig
+++ b/drivers/staging/imx-drm/Kconfig

@@ -39,19 +39,10 @@
 	  Choose this to enable the internal LVDS Display Bridge (LDB)
 	  found on i.MX53 and i.MX6 processors.
 
-config DRM_IMX_IPUV3_CORE
-	tristate "IPUv3 core support"
-	depends on DRM_IMX
-	depends on RESET_CONTROLLER
-	help
-	  Choose this if you have a i.MX5/6 system and want
-	  to use the IPU. This option only enables IPU base
-	  support.
-
 config DRM_IMX_IPUV3
 	tristate "DRM Support for i.MX IPUv3"
 	depends on DRM_IMX
-	depends on DRM_IMX_IPUV3_CORE
+	depends on IMX_IPUV3_CORE
 	help
 	  Choose this if you have a i.MX5 or i.MX6 processor.
 

diff --git a/drivers/staging/imx-drm/Makefile b/drivers/staging/imx-drm/Makefile
index 129e3a3..582c438 100644
--- a/drivers/staging/imx-drm/Makefile
+++ b/drivers/staging/imx-drm/Makefile

@@ -6,7 +6,6 @@
 obj-$(CONFIG_DRM_IMX_PARALLEL_DISPLAY) += parallel-display.o
 obj-$(CONFIG_DRM_IMX_TVE) += imx-tve.o
 obj-$(CONFIG_DRM_IMX_LDB) += imx-ldb.o
-obj-$(CONFIG_DRM_IMX_IPUV3_CORE) += ipu-v3/
 
 imx-ipuv3-crtc-objs  := ipuv3-crtc.o ipuv3-plane.o
 obj-$(CONFIG_DRM_IMX_IPUV3)	+= imx-ipuv3-crtc.o

diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c
index c270c9a..def8280 100644
--- a/drivers/staging/imx-drm/imx-drm-core.c
+++ b/drivers/staging/imx-drm/imx-drm-core.c

@@ -200,13 +200,6 @@
 	.llseek = noop_llseek,
 };
 
-int imx_drm_connector_mode_valid(struct drm_connector *connector,
-	struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-EXPORT_SYMBOL(imx_drm_connector_mode_valid);
-
 void imx_drm_connector_destroy(struct drm_connector *connector)
 {
 	drm_sysfs_connector_remove(connector);
@@ -305,7 +298,7 @@
 			dev_err(drm->dev,
 				"[CONNECTOR:%d:%s] drm_sysfs_connector_add failed: %d\n",
 				connector->base.id,
-				drm_get_connector_name(connector), ret);
+				connector->name, ret);
 			goto err_unbind;
 		}
 	}

diff --git a/drivers/staging/imx-drm/imx-drm.h b/drivers/staging/imx-drm/imx-drm.h
index a322bac..7453ae0 100644
--- a/drivers/staging/imx-drm/imx-drm.h
+++ b/drivers/staging/imx-drm/imx-drm.h

@@ -50,8 +50,6 @@
 int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
-int imx_drm_connector_mode_valid(struct drm_connector *connector,
-	struct drm_display_mode *mode);
 void imx_drm_connector_destroy(struct drm_connector *connector);
 void imx_drm_encoder_destroy(struct drm_encoder *encoder);
 

diff --git a/drivers/staging/imx-drm/imx-hdmi.c b/drivers/staging/imx-drm/imx-hdmi.c
index 1b44048..18c9ccd 100644
--- a/drivers/staging/imx-drm/imx-hdmi.c
+++ b/drivers/staging/imx-drm/imx-hdmi.c

@@ -27,8 +27,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder_slave.h>
+#include <video/imx-ipu-v3.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
 #include "imx-hdmi.h"
 #include "imx-drm.h"
 
@@ -1490,7 +1490,6 @@
 
 static struct drm_connector_helper_funcs imx_hdmi_connector_helper_funcs = {
 	.get_modes = imx_hdmi_connector_get_modes,
-	.mode_valid = imx_drm_connector_mode_valid,
 	.best_encoder = imx_hdmi_connector_best_encoder,
 };
 

diff --git a/drivers/staging/imx-drm/imx-ldb.c b/drivers/staging/imx-drm/imx-ldb.c
index fe4c1ef..7e3f019 100644
--- a/drivers/staging/imx-drm/imx-ldb.c
+++ b/drivers/staging/imx-drm/imx-ldb.c

@@ -317,7 +317,6 @@
 static struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
 	.get_modes = imx_ldb_connector_get_modes,
 	.best_encoder = imx_ldb_connector_best_encoder,
-	.mode_valid = imx_drm_connector_mode_valid,
 };
 
 static struct drm_encoder_funcs imx_ldb_encoder_funcs = {

diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c
index a23f4f7..c628fcd 100644
--- a/drivers/staging/imx-drm/imx-tve.c
+++ b/drivers/staging/imx-drm/imx-tve.c

@@ -30,8 +30,8 @@
 #include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <video/imx-ipu-v3.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
 #include "imx-drm.h"
 
 #define TVE_COM_CONF_REG	0x00
@@ -249,11 +249,6 @@
 {
 	struct imx_tve *tve = con_to_tve(connector);
 	unsigned long rate;
-	int ret;
-
-	ret = imx_drm_connector_mode_valid(connector, mode);
-	if (ret != MODE_OK)
-		return ret;
 
 	/* pixel clock with 2x oversampling */
 	rate = clk_round_rate(tve->clk, 2000UL * mode->clock) / 2000;

diff --git a/drivers/staging/imx-drm/ipu-v3/Makefile b/drivers/staging/imx-drm/ipu-v3/Makefile
deleted file mode 100644
index 28ed72e..0000000
--- a/drivers/staging/imx-drm/ipu-v3/Makefile
+++ /dev/null

@@ -1,3 +0,0 @@
-obj-$(CONFIG_DRM_IMX_IPUV3_CORE) += imx-ipu-v3.o
-
-imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o

diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
index 47bec5e..720868b 100644
--- a/drivers/staging/imx-drm/ipuv3-crtc.c
+++ b/drivers/staging/imx-drm/ipuv3-crtc.c

@@ -30,7 +30,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "imx-drm.h"
 #include "ipuv3-plane.h"
 

diff --git a/drivers/staging/imx-drm/ipuv3-plane.c b/drivers/staging/imx-drm/ipuv3-plane.c
index 5697e59..6f393a1 100644
--- a/drivers/staging/imx-drm/ipuv3-plane.c
+++ b/drivers/staging/imx-drm/ipuv3-plane.c

@@ -17,7 +17,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
+#include "video/imx-ipu-v3.h"
 #include "ipuv3-plane.h"
 
 #define to_ipu_plane(x)	container_of(x, struct ipu_plane, base)

diff --git a/drivers/staging/imx-drm/parallel-display.c b/drivers/staging/imx-drm/parallel-display.c
index eaf4dda..b567832 100644
--- a/drivers/staging/imx-drm/parallel-display.c
+++ b/drivers/staging/imx-drm/parallel-display.c

@@ -148,7 +148,6 @@
 static struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
 	.get_modes = imx_pd_connector_get_modes,
 	.best_encoder = imx_pd_connector_best_encoder,
-	.mode_valid = imx_drm_connector_mode_valid,
 };
 
 static struct drm_encoder_funcs imx_pd_encoder_funcs = {

diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
index 827209e..386a36c 100644
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ b/drivers/staging/lustre/lustre/include/lclient.h

@@ -82,16 +82,7 @@
 	/**
 	 * I/O vector information to or from which read/write is going.
 	 */
-	struct iovec *cui_iov;
-	unsigned long cui_nrsegs;
-	/**
-	 * Total iov count for left IO.
-	 */
-	unsigned long cui_tot_nrsegs;
-	/**
-	 * Old length for iov that was truncated partially.
-	 */
-	size_t cui_iov_olen;
+	struct iov_iter *cui_iter;
 	/**
 	 * Total size for the left IO.
 	 */

diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
index dc24cfa..1b0c216 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c

@@ -720,31 +720,12 @@
 void ccc_io_update_iov(const struct lu_env *env,
 		       struct ccc_io *cio, struct cl_io *io)
 {
-	int i;
 	size_t size = io->u.ci_rw.crw_count;
 
-	cio->cui_iov_olen = 0;
-	if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
+	if (!cl_is_normalio(env, io) || cio->cui_iter == NULL)
 		return;
 
-	for (i = 0; i < cio->cui_tot_nrsegs; i++) {
-		struct iovec *iv = &cio->cui_iov[i];
-
-		if (iv->iov_len < size)
-			size -= iv->iov_len;
-		else {
-			if (iv->iov_len > size) {
-				cio->cui_iov_olen = iv->iov_len;
-				iv->iov_len = size;
-			}
-			break;
-		}
-	}
-
-	cio->cui_nrsegs = i + 1;
-	LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
-		 "tot_nrsegs: %lu, nrsegs: %lu\n",
-		 cio->cui_tot_nrsegs, cio->cui_nrsegs);
+	iov_iter_truncate(cio->cui_iter, size);
 }
 
 int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
@@ -775,30 +756,7 @@
 	if (!cl_is_normalio(env, io))
 		return;
 
-	LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
-	LASSERT(cio->cui_tot_count  >= nob);
-
-	cio->cui_iov	+= cio->cui_nrsegs;
-	cio->cui_tot_nrsegs -= cio->cui_nrsegs;
-	cio->cui_tot_count  -= nob;
-
-	/* update the iov */
-	if (cio->cui_iov_olen > 0) {
-		struct iovec *iv;
-
-		cio->cui_iov--;
-		cio->cui_tot_nrsegs++;
-		iv = &cio->cui_iov[0];
-		if (io->ci_continue) {
-			iv->iov_base += iv->iov_len;
-			LASSERT(cio->cui_iov_olen > iv->iov_len);
-			iv->iov_len = cio->cui_iov_olen - iv->iov_len;
-		} else {
-			/* restore the iov_len, in case of restart io. */
-			iv->iov_len = cio->cui_iov_olen;
-		}
-		cio->cui_iov_olen = 0;
-	}
+	iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
 }
 
 /**

diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index c4ddec2..716e1ee 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c

@@ -1114,9 +1114,7 @@
 
 		switch (vio->cui_io_subtype) {
 		case IO_NORMAL:
-			cio->cui_iov = args->u.normal.via_iov;
-			cio->cui_nrsegs = args->u.normal.via_nrsegs;
-			cio->cui_tot_nrsegs = cio->cui_nrsegs;
+			cio->cui_iter = args->u.normal.via_iter;
 			cio->cui_iocb = args->u.normal.via_iocb;
 			if ((iot == CIT_WRITE) &&
 			    !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1180,58 +1178,23 @@
 	return result;
 }
 
-static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (result)
-		return result;
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = to;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
-				    &iocb->ki_pos, count);
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-			    loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
+				    &iocb->ki_pos, iov_iter_count(to));
 	cl_env_put(env, &refcheck);
 	return result;
 }
@@ -1239,64 +1202,27 @@
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (result)
-		return result;
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = from;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-				  &iocb->ki_pos, count);
+				  &iocb->ki_pos, iov_iter_count(from));
 	cl_env_put(env, &refcheck);
 	return result;
 }
 
-static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-			     loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-
-
 /*
  * Send file content (through pagecache) somewhere with helper
  */
@@ -3143,10 +3069,10 @@
 
 /* -o localflock - only provides locally consistent flock locks */
 struct file_operations ll_file_operations = {
-	.read	   = ll_file_read,
-	.aio_read = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3158,10 +3084,10 @@
 };
 
 struct file_operations ll_file_operations_flock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3176,10 +3102,10 @@
 
 /* These are for -o noflock - to return ENOSYS on flock calls */
 struct file_operations ll_file_operations_noflock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,

diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index dde7632..140ee94 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h

@@ -917,8 +917,7 @@
 	union {
 		struct {
 			struct kiocb      *via_iocb;
-			struct iovec      *via_iov;
-			unsigned long      via_nrsegs;
+			struct iov_iter   *via_iter;
 		} normal;
 		struct {
 			struct pipe_inode_info  *via_pipe;

diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index f0122c5..5616210 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c

@@ -151,8 +151,7 @@
 		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
 		if (result == 0) {
 			cio->cui_fd = LUSTRE_FPRIVATE(file);
-			cio->cui_iov = NULL;
-			cio->cui_nrsegs = 0;
+			cio->cui_iter = NULL;
 			result = cl_io_iter_init(env, io);
 			if (result == 0) {
 				result = cl_io_lock(env, io);

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 55ca8d3..af84c1a 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c

@@ -218,14 +218,11 @@
 	int i;
 
 	for (i = 0; i < npages; i++) {
-		if (pages[i] == NULL)
-			break;
 		if (do_dirty)
 			set_page_dirty_lock(pages[i]);
 		page_cache_release(pages[i]);
 	}
-
-	OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+	kvfree(pages);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -363,18 +360,16 @@
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
 		      ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-			       const struct iovec *iov, loff_t file_offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter, loff_t file_offset)
 {
 	struct lu_env *env;
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct ccc_object *obj = cl_inode2ccc(inode);
-	long count = iov_length(iov, nr_segs);
-	long tot_bytes = 0, result = 0;
+	ssize_t count = iov_iter_count(iter);
+	ssize_t tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
-	unsigned long seg = 0;
 	long size = MAX_DIO_SIZE;
 	int refcheck;
 
@@ -392,11 +387,8 @@
 	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	for (seg = 0; seg < nr_segs; seg++) {
-		if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-		    (iov[seg].iov_len & ~CFS_PAGE_MASK))
-			return -EINVAL;
-	}
+	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+		return -EINVAL;
 
 	env = cl_env_get(&refcheck);
 	LASSERT(!IS_ERR(env));
@@ -411,63 +403,49 @@
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
-	for (seg = 0; seg < nr_segs; seg++) {
-		long iov_left = iov[seg].iov_len;
-		unsigned long user_addr = (unsigned long)iov[seg].iov_base;
+	while (iov_iter_count(iter)) {
+		struct page **pages;
+		size_t offs;
 
+		count = min_t(size_t, iov_iter_count(iter), size);
 		if (rw == READ) {
 			if (file_offset >= i_size_read(inode))
 				break;
-			if (file_offset + iov_left > i_size_read(inode))
-				iov_left = i_size_read(inode) - file_offset;
+			if (file_offset + count > i_size_read(inode))
+				count = i_size_read(inode) - file_offset;
 		}
 
-		while (iov_left > 0) {
-			struct page **pages;
-			int page_count, max_pages = 0;
-			long bytes;
-
-			bytes = min(size, iov_left);
-			page_count = ll_get_user_pages(rw, user_addr, bytes,
-						       &pages, &max_pages);
-			if (likely(page_count > 0)) {
-				if (unlikely(page_count <  max_pages))
-					bytes = page_count << PAGE_CACHE_SHIFT;
-				result = ll_direct_IO_26_seg(env, io, rw, inode,
-							     file->f_mapping,
-							     bytes, file_offset,
-							     pages, page_count);
-				ll_free_user_pages(pages, max_pages, rw==READ);
-			} else if (page_count == 0) {
-				GOTO(out, result = -EFAULT);
-			} else {
-				result = page_count;
-			}
-			if (unlikely(result <= 0)) {
-				/* If we can't allocate a large enough buffer
-				 * for the request, shrink it to a smaller
-				 * PAGE_SIZE multiple and try again.
-				 * We should always be able to kmalloc for a
-				 * page worth of page pointers = 4MB on i386. */
-				if (result == -ENOMEM &&
-				    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-					   PAGE_CACHE_SIZE) {
-					size = ((((size / 2) - 1) |
-						 ~CFS_PAGE_MASK) + 1) &
-						CFS_PAGE_MASK;
-					CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
-					       size);
-					continue;
-				}
-
-				GOTO(out, result);
-			}
-
-			tot_bytes += result;
-			file_offset += result;
-			iov_left -= result;
-			user_addr += result;
+		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+		if (likely(result > 0)) {
+			int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE;
+			result = ll_direct_IO_26_seg(env, io, rw, inode,
+						     file->f_mapping,
+						     result, file_offset,
+						     pages, n);
+			ll_free_user_pages(pages, n, rw==READ);
 		}
+		if (unlikely(result <= 0)) {
+			/* If we can't allocate a large enough buffer
+			 * for the request, shrink it to a smaller
+			 * PAGE_SIZE multiple and try again.
+			 * We should always be able to kmalloc for a
+			 * page worth of page pointers = 4MB on i386. */
+			if (result == -ENOMEM &&
+			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+				   PAGE_CACHE_SIZE) {
+				size = ((((size / 2) - 1) |
+					 ~CFS_PAGE_MASK) + 1) &
+					CFS_PAGE_MASK;
+				CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+				       size);
+				continue;
+			}
+
+			GOTO(out, result);
+		}
+		iov_iter_advance(iter, result);
+		tot_bytes += result;
+		file_offset += result;
 	}
 out:
 	LASSERT(obj->cob_transient_pages == 0);

diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 7dd2b47..0e0b404 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c

@@ -211,27 +211,26 @@
 	struct cl_lock_descr   *descr = &cti->cti_descr;
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
-	unsigned long	   seg;
 	ssize_t		 count;
 	int		     result;
+	struct iov_iter i;
+	struct iovec iov;
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
 	if (!cl_is_normalio(env, io))
 		return 0;
 
-	if (vio->cui_iov == NULL) /* nfs or loop back device write */
+	if (vio->cui_iter == NULL) /* nfs or loop back device write */
 		return 0;
 
 	/* No MM (e.g. NFS)? No vmas too. */
 	if (mm == NULL)
 		return 0;
 
-	for (seg = 0; seg < vio->cui_nrsegs; seg++) {
-		const struct iovec *iv = &vio->cui_iov[seg];
-
-		addr = (unsigned long)iv->iov_base;
-		count = iv->iov_len;
+	iov_for_each(iov, i, *(vio->cui_iter)) {
+		addr = (unsigned long)iov.iov_base;
+		count = iov.iov_len;
 		if (count == 0)
 			continue;
 
@@ -527,9 +526,7 @@
 	switch (vio->cui_io_subtype) {
 	case IO_NORMAL:
 		LASSERT(cio->cui_iocb->ki_pos == pos);
-		result = generic_file_aio_read(cio->cui_iocb,
-					       cio->cui_iov, cio->cui_nrsegs,
-					       cio->cui_iocb->ki_pos);
+		result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
 		break;
 	case IO_SPLICE:
 		result = generic_file_splice_read(file, &pos,
@@ -595,12 +592,11 @@
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */
+	if (cio->cui_iter == NULL) /* from a temp io in ll_cl_init(). */
 		result = 0;
 	else
-		result = generic_file_aio_write(cio->cui_iocb,
-						cio->cui_iov, cio->cui_nrsegs,
-						cio->cui_iocb->ki_pos);
+		result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+
 	if (result > 0) {
 		if (result < cnt)
 			io->ci_continue = 0;
@@ -1162,10 +1158,9 @@
 		 *  results."  -- Single Unix Spec */
 		if (count == 0)
 			result = 1;
-		else {
+		else
 			cio->cui_tot_count = count;
-			cio->cui_tot_nrsegs = 0;
-		}
+
 		/* for read/write, we store the jobid in the inode, and
 		 * it'll be fetched by osc when building RPC.
 		 *

diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index ded31ea..cbf455d 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c

@@ -396,7 +396,7 @@
 	}
 }
 
-static struct vb2_ops iss_video_vb2ops = {
+static const struct vb2_ops iss_video_vb2ops = {
 	.queue_setup	= iss_video_queue_setup,
 	.buf_prepare	= iss_video_buf_prepare,
 	.buf_queue	= iss_video_buf_queue,

diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index 75d7c63..e320d6b 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c

@@ -1067,7 +1067,7 @@
 	xlr_set_rx_mode(ndev);
 
 	priv->num_rx_desc += MAX_NUM_DESC_SPILL;
-	SET_ETHTOOL_OPS(ndev, &xlr_ethtool_ops);
+	ndev->ethtool_ops = &xlr_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	/* Common registers, do one time initialization */

diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index ff7214a..da9dd6b 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c

@@ -469,7 +469,7 @@
 
 	/* We do our own locking, Linux doesn't need to */
 	dev->features |= NETIF_F_LLTX;
-	SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
+	dev->ethtool_ops = &cvm_oct_ethtool_ops;
 
 	cvm_oct_phy_setup_device(dev);
 	cvm_oct_set_mac_filter(dev);

diff --git a/drivers/staging/rtl8192ee/core.c b/drivers/staging/rtl8192ee/core.c
index 76ea356..7f6accd 100644
--- a/drivers/staging/rtl8192ee/core.c
+++ b/drivers/staging/rtl8192ee/core.c

@@ -322,7 +322,7 @@
 	struct rtl_mac *mac = &(rtlpriv->mac80211);
 	struct cfg80211_pkt_pattern *patterns = wow->patterns;
 	struct rtl_wow_pattern rtl_pattern;
-	u8 *pattern_os, *mask_os;
+	const u8 *pattern_os, *mask_os;
 	u8 mask[MAX_WOL_BIT_MASK_SIZE] = {0};
 	u8 content[MAX_WOL_PATTERN_SIZE] = {0};
 	u8 broadcast_addr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -1561,7 +1561,7 @@
  * before switch channle or power save, or tx buffer packet
  * maybe send after offchannel or rf sleep, this may cause
  * dis-association by AP */
-static void rtl_op_flush(struct ieee80211_hw *hw,
+static void rtl_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			 u32 queues, bool drop)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);

diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 0c9f5ce..f0839f6 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c

@@ -1227,7 +1227,7 @@
 
 static int cfg80211_rtw_get_station(struct wiphy *wiphy,
 				    struct net_device *ndev,
-				    u8 *mac, struct station_info *sinfo)
+				    const u8 *mac, struct station_info *sinfo)
 {
 	int ret = 0;
 	struct rtw_adapter *padapter = wiphy_to_adapter(wiphy);
@@ -2903,7 +2903,7 @@
 }
 
 static int cfg80211_rtw_add_station(struct wiphy *wiphy,
-				    struct net_device *ndev, u8 *mac,
+				    struct net_device *ndev, const u8 *mac,
 				    struct station_parameters *params)
 {
 	DBG_8723A("%s(%s)\n", __func__, ndev->name);
@@ -2912,7 +2912,7 @@
 }
 
 static int cfg80211_rtw_del_station(struct wiphy *wiphy,
-				    struct net_device *ndev, u8 *mac)
+				    struct net_device *ndev, const u8 *mac)
 {
 	int ret = 0;
 	struct list_head *phead, *plist, *ptmp;
@@ -2988,7 +2988,7 @@
 }
 
 static int cfg80211_rtw_change_station(struct wiphy *wiphy,
-				       struct net_device *ndev, u8 *mac,
+				       struct net_device *ndev, const u8 *mac,
 				       struct station_parameters *params)
 {
 	DBG_8723A("%s(%s)\n", __func__, ndev->name);

diff --git a/drivers/staging/rtl8821ae/core.c b/drivers/staging/rtl8821ae/core.c
index 9a37408..046be2c 100644
--- a/drivers/staging/rtl8821ae/core.c
+++ b/drivers/staging/rtl8821ae/core.c

@@ -1278,7 +1278,9 @@
  * before switch channel or power save, or tx buffer packet
  * maybe send after offchannel or rf sleep, this may cause
  * dis-association by AP */
-static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void rtl_op_flush(struct ieee80211_hw *hw,
+			 struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 

diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index f76f95c..723319e 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c

@@ -84,7 +84,7 @@
 }
 
 static int prism2_domibset_pstr32(wlandevice_t *wlandev,
-				  u32 did, u8 len, u8 *data)
+				  u32 did, u8 len, const u8 *data)
 {
 	struct p80211msg_dot11req_mibset msg;
 	p80211item_pstr32_t *mibitem =
@@ -298,7 +298,7 @@
 
 
 static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			      const u8 *mac, struct station_info *sinfo)
 {
 	wlandevice_t *wlandev = dev->ml_priv;
 	struct p80211msg_lnxreq_commsquality quality;

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 9189bc0..5663f4d 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c

@@ -300,7 +300,7 @@
 		port = ntohs(sock_in->sin_port);
 	}
 
-	if ((ip_match == true) && (np->np_port == port) &&
+	if (ip_match && (np->np_port == port) &&
 	    (np->np_network_transport == network_transport))
 		return true;
 
@@ -325,7 +325,7 @@
 		}
 
 		match = iscsit_check_np_match(sockaddr, np, network_transport);
-		if (match == true) {
+		if (match) {
 			/*
 			 * Increment the np_exports reference count now to
 			 * prevent iscsit_del_np() below from being called
@@ -1121,7 +1121,7 @@
 	/*
 	 * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes.
 	 */
-	if (dump_payload == true)
+	if (dump_payload)
 		goto after_immediate_data;
 
 	immed_ret = iscsit_handle_immediate_data(cmd, hdr,
@@ -3390,7 +3390,9 @@
 
 #define SENDTARGETS_BUF_LIMIT 32768U
 
-static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+static int
+iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
+				  enum iscsit_transport_type network_transport)
 {
 	char *payload = NULL;
 	struct iscsi_conn *conn = cmd->conn;
@@ -3467,6 +3469,9 @@
 				struct iscsi_np *np = tpg_np->tpg_np;
 				bool inaddr_any = iscsit_check_inaddr_any(np);
 
+				if (np->np_network_transport != network_transport)
+					continue;
+
 				if (!target_name_printed) {
 					len = sprintf(buf, "TargetName=%s",
 						      tiqn->tiqn);
@@ -3485,10 +3490,8 @@
 
 				len = sprintf(buf, "TargetAddress="
 					"%s:%hu,%hu",
-					(inaddr_any == false) ?
-						np->np_ip : conn->local_ip,
-					(inaddr_any == false) ?
-						np->np_port : conn->local_port,
+					inaddr_any ? conn->local_ip : np->np_ip,
+					inaddr_any ? conn->local_port : np->np_port,
 					tpg->tpgt);
 				len += 1;
 
@@ -3520,11 +3523,12 @@
 
 int
 iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
-		      struct iscsi_text_rsp *hdr)
+		      struct iscsi_text_rsp *hdr,
+		      enum iscsit_transport_type network_transport)
 {
 	int text_length, padding;
 
-	text_length = iscsit_build_sendtargets_response(cmd);
+	text_length = iscsit_build_sendtargets_response(cmd, network_transport);
 	if (text_length < 0)
 		return text_length;
 
@@ -3562,7 +3566,7 @@
 	u32 tx_size = 0;
 	int text_length, iov_count = 0, rc;
 
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP);
 	if (rc < 0)
 		return rc;
 
@@ -4234,8 +4238,6 @@
 	if (conn->conn_transport->iscsit_wait_conn)
 		conn->conn_transport->iscsit_wait_conn(conn);
 
-	iscsit_free_queue_reqs_for_conn(conn);
-
 	/*
 	 * During Connection recovery drop unacknowledged out of order
 	 * commands for this connection, and prepare the other commands
@@ -4252,6 +4254,7 @@
 		iscsit_clear_ooo_cmdsns_for_conn(conn);
 		iscsit_release_commands_from_conn(conn);
 	}
+	iscsit_free_queue_reqs_for_conn(conn);
 
 	/*
 	 * Handle decrementing session or connection usage count if

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index de77d9a..19b842c 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c

@@ -71,6 +71,40 @@
 			challenge_asciihex);
 }
 
+static int chap_check_algorithm(const char *a_str)
+{
+	char *tmp, *orig, *token;
+
+	tmp = kstrdup(a_str, GFP_KERNEL);
+	if (!tmp) {
+		pr_err("Memory allocation failed for CHAP_A temporary buffer\n");
+		return CHAP_DIGEST_UNKNOWN;
+	}
+	orig = tmp;
+
+	token = strsep(&tmp, "=");
+	if (!token)
+		goto out;
+
+	if (strcmp(token, "CHAP_A")) {
+		pr_err("Unable to locate CHAP_A key\n");
+		goto out;
+	}
+	while (token) {
+		token = strsep(&tmp, ",");
+		if (!token)
+			goto out;
+
+		if (!strncmp(token, "5", 1)) {
+			pr_debug("Selected MD5 Algorithm\n");
+			kfree(orig);
+			return CHAP_DIGEST_MD5;
+		}
+	}
+out:
+	kfree(orig);
+	return CHAP_DIGEST_UNKNOWN;
+}
 
 static struct iscsi_chap *chap_server_open(
 	struct iscsi_conn *conn,
@@ -79,6 +113,7 @@
 	char *aic_str,
 	unsigned int *aic_len)
 {
+	int ret;
 	struct iscsi_chap *chap;
 
 	if (!(auth->naf_flags & NAF_USERID_SET) ||
@@ -93,21 +128,24 @@
 		return NULL;
 
 	chap = conn->auth_protocol;
-	/*
-	 * We only support MD5 MDA presently.
-	 */
-	if (strncmp(a_str, "CHAP_A=5", 8)) {
-		pr_err("CHAP_A is not MD5.\n");
+	ret = chap_check_algorithm(a_str);
+	switch (ret) {
+	case CHAP_DIGEST_MD5:
+		pr_debug("[server] Got CHAP_A=5\n");
+		/*
+		 * Send back CHAP_A set to MD5.
+		*/
+		*aic_len = sprintf(aic_str, "CHAP_A=5");
+		*aic_len += 1;
+		chap->digest_type = CHAP_DIGEST_MD5;
+		pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+		break;
+	case CHAP_DIGEST_UNKNOWN:
+	default:
+		pr_err("Unsupported CHAP_A value\n");
 		return NULL;
 	}
-	pr_debug("[server] Got CHAP_A=5\n");
-	/*
-	 * Send back CHAP_A set to MD5.
-	 */
-	*aic_len = sprintf(aic_str, "CHAP_A=5");
-	*aic_len += 1;
-	chap->digest_type = CHAP_DIGEST_MD5;
-	pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+
 	/*
 	 * Set Identifier.
 	 */
@@ -314,6 +352,16 @@
 		goto out;
 	}
 	/*
+	 * During mutual authentication, the CHAP_C generated by the
+	 * initiator must not match the original CHAP_C generated by
+	 * the target.
+	 */
+	if (!memcmp(challenge_binhex, chap->challenge, CHAP_CHALLENGE_LENGTH)) {
+		pr_err("initiator CHAP_C matches target CHAP_C, failing"
+		       " login attempt\n");
+		goto out;
+	}
+	/*
 	 * Generate CHAP_N and CHAP_R for mutual authentication.
 	 */
 	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);

diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h
index 2f463c0..d22f7b96 100644
--- a/drivers/target/iscsi/iscsi_target_auth.h
+++ b/drivers/target/iscsi/iscsi_target_auth.h

@@ -1,6 +1,7 @@
 #ifndef _ISCSI_CHAP_H_
 #define _ISCSI_CHAP_H_
 
+#define CHAP_DIGEST_UNKNOWN	0
 #define CHAP_DIGEST_MD5		5
 #define CHAP_DIGEST_SHA		6
 

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index d9b1d88..fecb695 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c

@@ -1145,7 +1145,7 @@
 void iscsi_target_login_sess_out(struct iscsi_conn *conn,
 		struct iscsi_np *np, bool zero_tsih, bool new_sess)
 {
-	if (new_sess == false)
+	if (!new_sess)
 		goto old_sess_out;
 
 	pr_err("iSCSI Login negotiation failed.\n");

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 75b6859..62a095f 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c

@@ -404,7 +404,7 @@
 	}
 
 	rc = schedule_delayed_work(&conn->login_work, 0);
-	if (rc == false) {
+	if (!rc) {
 		pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work"
 			 " got false\n");
 	}
@@ -513,7 +513,7 @@
 	state = (tpg->tpg_state == TPG_STATE_ACTIVE);
 	spin_unlock(&tpg->tpg_state_lock);
 
-	if (state == false) {
+	if (!state) {
 		pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n");
 		iscsi_target_restore_sock_callbacks(conn);
 		iscsi_target_login_drop(conn, login);
@@ -528,7 +528,7 @@
 		state = iscsi_target_sk_state_check(sk);
 		read_unlock_bh(&sk->sk_callback_lock);
 
-		if (state == false) {
+		if (!state) {
 			pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
 			iscsi_target_restore_sock_callbacks(conn);
 			iscsi_target_login_drop(conn, login);
@@ -773,6 +773,12 @@
 		}
 
 		goto do_auth;
+	} else if (!payload_length) {
+		pr_err("Initiator sent zero length security payload,"
+		       " login failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				    ISCSI_LOGIN_STATUS_AUTH_FAILED);
+		return -1;
 	}
 
 	if (login->first_request)

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 4d2e23f..02f9de2 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c

@@ -474,10 +474,10 @@
 		if (!strcmp(param->name, AUTHMETHOD)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, HEADERDIGEST)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, DATADIGEST)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXCONNECTIONS)) {
 			SET_PSTATE_NEGOTIATE(param);
@@ -497,7 +497,7 @@
 		} else if (!strcmp(param->name, IMMEDIATEDATA)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXXMITDATASEGMENTLENGTH)) {
 			continue;
@@ -528,13 +528,13 @@
 		} else if (!strcmp(param->name, OFMARKINT)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, RDMAEXTENSIONS)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, INITIATORRECVDATASEGMENTLENGTH)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, TARGETRECVDATASEGMENTLENGTH)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		}
 	}
@@ -1605,7 +1605,7 @@
 
 	tmpbuf = kzalloc(length + 1, GFP_KERNEL);
 	if (!tmpbuf) {
-		pr_err("Unable to allocate memory for tmpbuf.\n");
+		pr_err("Unable to allocate %u + 1 bytes for tmpbuf.\n", length);
 		return -1;
 	}
 

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 1431e84..c3cb5c1 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c

@@ -189,7 +189,7 @@
 	iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown);
 }
 
-void iscsit_clear_tpg_np_login_threads(
+static void iscsit_clear_tpg_np_login_threads(
 	struct iscsi_portal_group *tpg,
 	bool shutdown)
 {
@@ -276,8 +276,6 @@
 	tpg->tpg_state = TPG_STATE_INACTIVE;
 	spin_unlock(&tpg->tpg_state_lock);
 
-	iscsit_clear_tpg_np_login_threads(tpg, true);
-
 	if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
 		pr_err("Unable to delete iSCSI Target Portal Group:"
 			" %hu while active sessions exist, and force=0\n",
@@ -453,7 +451,7 @@
 
 			match = iscsit_check_np_match(sockaddr, np,
 						network_transport);
-			if (match == true)
+			if (match)
 				break;
 		}
 		spin_unlock(&tpg->tpg_np_lock);
@@ -475,7 +473,7 @@
 
 	if (!tpg_np_parent) {
 		if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr,
-				network_transport) == true) {
+				network_transport)) {
 			pr_err("Network Portal: %s already exists on a"
 				" different TPG on %s\n", ip_str,
 				tpg->tpg_tiqn->tiqn);

diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
index 0a182f2..e726533 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h

@@ -8,7 +8,6 @@
 			struct iscsi_np *, struct iscsi_tpg_np **);
 extern int iscsit_get_tpg(struct iscsi_portal_group *);
 extern void iscsit_put_tpg(struct iscsi_portal_group *);
-extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool);
 extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
 extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
 extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 73ab75d..6d2f375 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c

@@ -179,7 +179,7 @@
 	struct tcm_loop_hba *tl_hba;
 	struct tcm_loop_tpg *tl_tpg;
 	struct scatterlist *sgl_bidi = NULL;
-	u32 sgl_bidi_count = 0;
+	u32 sgl_bidi_count = 0, transfer_length;
 	int rc;
 
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
@@ -213,12 +213,21 @@
 
 	}
 
-	if (!scsi_prot_sg_count(sc) && scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
+	transfer_length = scsi_transfer_length(sc);
+	if (!scsi_prot_sg_count(sc) &&
+	    scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) {
 		se_cmd->prot_pto = true;
+		/*
+		 * loopback transport doesn't support
+		 * WRITE_GENERATE, READ_STRIP protection
+		 * information operations, go ahead unprotected.
+		 */
+		transfer_length = scsi_bufflen(sc);
+	}
 
 	rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
 			&tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
-			scsi_bufflen(sc), tcm_loop_sam_attr(sc),
+			transfer_length, tcm_loop_sam_attr(sc),
 			sc->sc_data_direction, 0,
 			scsi_sglist(sc), scsi_sg_count(sc),
 			sgl_bidi, sgl_bidi_count,

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 0f199f6..94d00df 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c

@@ -1055,6 +1055,8 @@
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 			goto fail;
 		}
+
+		blk_rq_set_block_pc(req);
 	} else {
 		BUG_ON(!cmd->data_length);
 
@@ -1071,7 +1073,6 @@
 		}
 	}
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->end_io = pscsi_req_done;
 	req->end_io_data = cmd;
 	req->cmd_len = scsi_command_size(pt->pscsi_cdb);

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index e022959..bd78d92 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c

@@ -81,7 +81,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8);
 	return 0;
 }
 
@@ -137,7 +137,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 32);
 	return 0;
 }
 
@@ -176,24 +176,6 @@
 	return cmd->se_dev->dev_attrib.block_size * sectors;
 }
 
-static int sbc_check_valid_sectors(struct se_cmd *cmd)
-{
-	struct se_device *dev = cmd->se_dev;
-	unsigned long long end_lba;
-	u32 sectors;
-
-	sectors = cmd->data_length / dev->dev_attrib.block_size;
-	end_lba = dev->transport->get_blocks(dev) + 1;
-
-	if (cmd->t_task_lba + sectors > end_lba) {
-		pr_err("target: lba %llu, sectors %u exceeds end lba %llu\n",
-			cmd->t_task_lba, sectors, end_lba);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static inline u32 transport_get_sectors_6(unsigned char *cdb)
 {
 	/*
@@ -665,8 +647,19 @@
 
 	cmd->prot_type = dev->dev_attrib.pi_prot_type;
 	cmd->prot_length = dev->prot_length * sectors;
-	pr_debug("%s: prot_type=%d, prot_length=%d prot_op=%d prot_checks=%d\n",
-		 __func__, cmd->prot_type, cmd->prot_length,
+
+	/**
+	 * In case protection information exists over the wire
+	 * we modify command data length to describe pure data.
+	 * The actual transfer length is data length + protection
+	 * length
+	 **/
+	if (protect)
+		cmd->data_length = sectors * dev->dev_attrib.block_size;
+
+	pr_debug("%s: prot_type=%d, data_length=%d, prot_length=%d "
+		 "prot_op=%d prot_checks=%d\n",
+		 __func__, cmd->prot_type, cmd->data_length, cmd->prot_length,
 		 cmd->prot_op, cmd->prot_checks);
 
 	return true;
@@ -877,15 +870,6 @@
 		break;
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
-		if (!ops->execute_sync_cache) {
-			size = 0;
-			cmd->execute_cmd = sbc_emulate_noop;
-			break;
-		}
-
-		/*
-		 * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE
-		 */
 		if (cdb[0] == SYNCHRONIZE_CACHE) {
 			sectors = transport_get_sectors_10(cdb);
 			cmd->t_task_lba = transport_lba_32(cdb);
@@ -893,18 +877,12 @@
 			sectors = transport_get_sectors_16(cdb);
 			cmd->t_task_lba = transport_lba_64(cdb);
 		}
-
-		size = sbc_get_size(cmd, sectors);
-
-		/*
-		 * Check to ensure that LBA + Range does not exceed past end of
-		 * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
-		 */
-		if (cmd->t_task_lba || sectors) {
-			if (sbc_check_valid_sectors(cmd) < 0)
-				return TCM_ADDRESS_OUT_OF_RANGE;
+		if (ops->execute_sync_cache) {
+			cmd->execute_cmd = ops->execute_sync_cache;
+			goto check_lba;
 		}
-		cmd->execute_cmd = ops->execute_sync_cache;
+		size = 0;
+		cmd->execute_cmd = sbc_emulate_noop;
 		break;
 	case UNMAP:
 		if (!ops->execute_unmap)
@@ -947,8 +925,10 @@
 		break;
 	case VERIFY:
 		size = 0;
+		sectors = transport_get_sectors_10(cdb);
+		cmd->t_task_lba = transport_lba_32(cdb);
 		cmd->execute_cmd = sbc_emulate_noop;
-		break;
+		goto check_lba;
 	case REZERO_UNIT:
 	case SEEK_6:
 	case SEEK_10:
@@ -988,7 +968,7 @@
 				dev->dev_attrib.hw_max_sectors);
 			return TCM_INVALID_CDB_FIELD;
 		}
-
+check_lba:
 		end_lba = dev->transport->get_blocks(dev) + 1;
 		if (cmd->t_task_lba + sectors > end_lba) {
 			pr_err("cmd exceeds last lba %llu "

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 8653666..6cd7222 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c

@@ -129,15 +129,10 @@
 spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf)
 {
 	struct se_device *dev = cmd->se_dev;
-	u16 len = 0;
+	u16 len;
 
 	if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) {
-		u32 unit_serial_len;
-
-		unit_serial_len = strlen(dev->t10_wwn.unit_serial);
-		unit_serial_len++; /* For NULL Terminator */
-
-		len += sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial);
+		len = sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial);
 		len++; /* Extra Byte for NULL Terminator */
 		buf[3] = len;
 	}
@@ -721,6 +716,7 @@
 	unsigned char *buf;
 	sense_reason_t ret;
 	int p;
+	int len = 0;
 
 	buf = kzalloc(SE_INQUIRY_BUF, GFP_KERNEL);
 	if (!buf) {
@@ -742,6 +738,7 @@
 		}
 
 		ret = spc_emulate_inquiry_std(cmd, buf);
+		len = buf[4] + 5;
 		goto out;
 	}
 
@@ -749,6 +746,7 @@
 		if (cdb[2] == evpd_handlers[p].page) {
 			buf[1] = cdb[2];
 			ret = evpd_handlers[p].emulate(cmd, buf);
+			len = get_unaligned_be16(&buf[2]) + 4;
 			goto out;
 		}
 	}
@@ -765,7 +763,7 @@
 	kfree(buf);
 
 	if (!ret)
-		target_complete_cmd(cmd, GOOD);
+		target_complete_cmd_with_length(cmd, GOOD, len);
 	return ret;
 }
 
@@ -1103,7 +1101,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, length);
 	return 0;
 }
 
@@ -1279,7 +1277,7 @@
 	buf[3] = (lun_count & 0xff);
 	transport_kunmap_data_sg(cmd);
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8 + lun_count * 8);
 	return 0;
 }
 EXPORT_SYMBOL(spc_emulate_report_luns);

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2179fee..7fa62fc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -504,7 +504,7 @@
 	 * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group
 	 * removal context.
 	 */
-	if (se_nacl && comp_nacl == true)
+	if (se_nacl && comp_nacl)
 		target_put_nacl(se_nacl);
 
 	transport_free_session(se_sess);
@@ -562,7 +562,7 @@
 
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return 1;
 	}
 
@@ -687,7 +687,7 @@
 	if (cmd->transport_state & CMD_T_ABORTED &&
 	    cmd->transport_state & CMD_T_STOP) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	} else if (!success) {
 		INIT_WORK(&cmd->work, target_complete_failure_work);
@@ -703,6 +703,23 @@
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+	if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) {
+		if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+			cmd->residual_count += cmd->data_length - length;
+		} else {
+			cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT;
+			cmd->residual_count = cmd->data_length - length;
+		}
+
+		cmd->data_length = length;
+	}
+
+	target_complete_cmd(cmd, scsi_status);
+}
+EXPORT_SYMBOL(target_complete_cmd_with_length);
+
 static void target_add_to_state_list(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
@@ -1761,7 +1778,7 @@
 			cmd->se_tfo->get_task_tag(cmd));
 
 		spin_unlock_irq(&cmd->t_state_lock);
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	}
 
@@ -2363,7 +2380,7 @@
 	 * fabric acknowledgement that requires two target_put_sess_cmd()
 	 * invocations before se_cmd descriptor release.
 	 */
-	if (ack_kref == true) {
+	if (ack_kref) {
 		kref_get(&se_cmd->cmd_kref);
 		se_cmd->se_cmd_flags |= SCF_ACK_KREF;
 	}
@@ -2407,6 +2424,10 @@
  */
 int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd)
 {
+	if (!se_sess) {
+		se_cmd->se_tfo->release_cmd(se_cmd);
+		return 1;
+	}
 	return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref,
 			&se_sess->sess_cmd_lock);
 }
@@ -2934,6 +2955,12 @@
 int transport_generic_handle_tmr(
 	struct se_cmd *cmd)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd->transport_state |= CMD_T_ACTIVE;
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
 	INIT_WORK(&cmd->work, target_tmr_work);
 	queue_work(cmd->se_dev->tmr_wq, &cmd->work);
 	return 0;

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 669c536..e9186cd 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c

@@ -70,7 +70,7 @@
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
 	int rc;
 
-	if (src == true)
+	if (src)
 		dev_wwn = &xop->dst_tid_wwn[0];
 	else
 		dev_wwn = &xop->src_tid_wwn[0];
@@ -88,7 +88,7 @@
 		if (rc != 0)
 			continue;
 
-		if (src == true) {
+		if (src) {
 			xop->dst_dev = se_dev;
 			pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located"
 				" se_dev\n", xop->dst_dev);
@@ -166,7 +166,7 @@
 		return -EINVAL;
 	}
 
-	if (src == true) {
+	if (src) {
 		memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
 		 * Determine if the source designator matches the local device
@@ -236,7 +236,7 @@
 			/*
 			 * Assume target descriptors are in source -> destination order..
 			 */
-			if (src == true)
+			if (src)
 				src = false;
 			else
 				src = true;
@@ -560,7 +560,7 @@
 	 * reservations.  The pt_cmd->se_lun pointer will be setup from within
 	 * target_xcopy_setup_pt_port()
 	 */
-	if (remote_port == false) {
+	if (!remote_port) {
 		pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
 		return 0;
 	}

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index f5fd515..be0c0d0 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c

@@ -128,6 +128,7 @@
 	struct fc_lport *lport;
 	struct fc_exch *ep;
 	size_t len;
+	int rc;
 
 	if (cmd->aborted)
 		return 0;
@@ -137,9 +138,10 @@
 	len = sizeof(*fcp) + se_cmd->scsi_sense_length;
 	fp = fc_frame_alloc(lport, len);
 	if (!fp) {
-		/* XXX shouldn't just drop it - requeue and retry? */
-		return 0;
+		se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+		return -ENOMEM;
 	}
+
 	fcp = fc_frame_payload_get(fp, len);
 	memset(fcp, 0, len);
 	fcp->resp.fr_status = se_cmd->scsi_status;
@@ -170,7 +172,18 @@
 	fc_fill_fc_hdr(fp, FC_RCTL_DD_CMD_STATUS, ep->did, ep->sid, FC_TYPE_FCP,
 		       FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ, 0);
 
-	lport->tt.seq_send(lport, cmd->seq, fp);
+	rc = lport->tt.seq_send(lport, cmd->seq, fp);
+	if (rc) {
+		pr_info_ratelimited("%s: Failed to send response frame %p, "
+				    "xid <0x%x>\n", __func__, fp, ep->xid);
+		/*
+		 * Generate a TASK_SET_FULL status to notify the initiator
+		 * to reduce it's queue_depth after the se_cmd response has
+		 * been re-queued by target-core.
+		 */
+		se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+		return -ENOMEM;
+	}
 	lport->tt.exch_done(cmd->seq);
 	return 0;
 }

diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index e415af3..97b486c 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c

@@ -82,6 +82,10 @@
 
 	if (cmd->aborted)
 		return 0;
+
+	if (se_cmd->scsi_status == SAM_STAT_TASK_SET_FULL)
+		goto queue_status;
+
 	ep = fc_seq_exch(cmd->seq);
 	lport = ep->lp;
 	cmd->seq = lport->tt.seq_start_next(cmd->seq);
@@ -178,14 +182,23 @@
 			       FC_TYPE_FCP, f_ctl, fh_off);
 		error = lport->tt.seq_send(lport, seq, fp);
 		if (error) {
-			/* XXX For now, initiator will retry */
-			pr_err_ratelimited("%s: Failed to send frame %p, "
+			pr_info_ratelimited("%s: Failed to send frame %p, "
 						"xid <0x%x>, remaining %zu, "
 						"lso_max <0x%x>\n",
 						__func__, fp, ep->xid,
 						remaining, lport->lso_max);
+			/*
+			 * Go ahead and set TASK_SET_FULL status ignoring the
+			 * rest of the DataIN, and immediately attempt to
+			 * send the response via ft_queue_status() in order
+			 * to notify the initiator that it should reduce it's
+			 * per LUN queue_depth.
+			 */
+			se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+			break;
 		}
 	}
+queue_status:
 	return ft_queue_status(se_cmd);
 }
 

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 2d51912..f9a1386 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig

@@ -222,12 +222,24 @@
 	  the Intel Thermal Daemon can use this information to allow the user
 	  to select his laptop to run without turning on the fans.
 
+config INTEL_SOC_DTS_THERMAL
+	tristate "Intel SoCs DTS thermal driver"
+	depends on X86 && IOSF_MBI
+	help
+	  Enable this to register Intel SoCs (e.g. Bay Trail) platform digital
+	  temperature sensor (DTS). These SoCs have two additional DTSs in
+	  addition to DTSs on CPU cores. Each DTS will be registered as a
+	  thermal zone. There are two trip points. One of the trip point can
+	  be set by user mode programs to get notifications via Linux thermal
+	  notification methods.The other trip is a critical trip point, which
+	  was set by the driver based on the TJ MAX temperature.
+
 menu "Texas Instruments thermal drivers"
 source "drivers/thermal/ti-soc-thermal/Kconfig"
 endmenu
 
 menu "Samsung thermal drivers"
-depends on PLAT_SAMSUNG
+depends on ARCH_EXYNOS
 source "drivers/thermal/samsung/Kconfig"
 endmenu
 

diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 54e4ec9..de0636a 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile

@@ -29,5 +29,6 @@
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
+obj-$(CONFIG_INTEL_SOC_DTS_THERMAL)	+= intel_soc_dts_thermal.o
 obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_ACPI_INT3403_THERMAL)	+= int3403_thermal.o

diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index 5e53212..9d1420a 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c

@@ -24,10 +24,7 @@
 #include <linux/of_device.h>
 #include <linux/thermal.h>
 
-#define THERMAL_VALID_OFFSET		9
 #define THERMAL_VALID_MASK		0x1
-#define THERMAL_TEMP_OFFSET		10
-#define THERMAL_TEMP_MASK		0x1ff
 
 /* Thermal Manager Control and Status Register */
 #define PMU_TDC0_SW_RST_MASK		(0x1 << 1)
@@ -38,24 +35,47 @@
 #define PMU_TDC0_OTF_CAL_MASK		(0x1 << 30)
 #define PMU_TDC0_START_CAL_MASK		(0x1 << 25)
 
-struct armada_thermal_ops;
+#define A375_Z1_CAL_RESET_LSB		0x8011e214
+#define A375_Z1_CAL_RESET_MSB		0x30a88019
+#define A375_Z1_WORKAROUND_BIT		BIT(9)
+
+#define A375_UNIT_CONTROL_SHIFT		27
+#define A375_UNIT_CONTROL_MASK		0x7
+#define A375_READOUT_INVERT		BIT(15)
+#define A375_HW_RESETn			BIT(8)
+#define A380_HW_RESET			BIT(8)
+
+struct armada_thermal_data;
 
 /* Marvell EBU Thermal Sensor Dev Structure */
 struct armada_thermal_priv {
 	void __iomem *sensor;
 	void __iomem *control;
-	struct armada_thermal_ops *ops;
+	struct armada_thermal_data *data;
 };
 
-struct armada_thermal_ops {
+struct armada_thermal_data {
 	/* Initialize the sensor */
-	void (*init_sensor)(struct armada_thermal_priv *);
+	void (*init_sensor)(struct platform_device *pdev,
+			    struct armada_thermal_priv *);
 
 	/* Test for a valid sensor value (optional) */
 	bool (*is_valid)(struct armada_thermal_priv *);
+
+	/* Formula coeficients: temp = (b + m * reg) / div */
+	unsigned long coef_b;
+	unsigned long coef_m;
+	unsigned long coef_div;
+	bool inverted;
+
+	/* Register shift and mask to access the sensor temperature */
+	unsigned int temp_shift;
+	unsigned int temp_mask;
+	unsigned int is_valid_shift;
 };
 
-static void armadaxp_init_sensor(struct armada_thermal_priv *priv)
+static void armadaxp_init_sensor(struct platform_device *pdev,
+				 struct armada_thermal_priv *priv)
 {
 	unsigned long reg;
 
@@ -80,7 +100,8 @@
 	writel(reg, priv->sensor);
 }
 
-static void armada370_init_sensor(struct armada_thermal_priv *priv)
+static void armada370_init_sensor(struct platform_device *pdev,
+				  struct armada_thermal_priv *priv)
 {
 	unsigned long reg;
 
@@ -99,11 +120,54 @@
 	mdelay(10);
 }
 
+static void armada375_init_sensor(struct platform_device *pdev,
+				  struct armada_thermal_priv *priv)
+{
+	unsigned long reg;
+	bool quirk_needed =
+		!!of_device_is_compatible(pdev->dev.of_node,
+					  "marvell,armada375-z1-thermal");
+
+	if (quirk_needed) {
+		/* Ensure these registers have the default (reset) values */
+		writel(A375_Z1_CAL_RESET_LSB, priv->control);
+		writel(A375_Z1_CAL_RESET_MSB, priv->control + 0x4);
+	}
+
+	reg = readl(priv->control + 4);
+	reg &= ~(A375_UNIT_CONTROL_MASK << A375_UNIT_CONTROL_SHIFT);
+	reg &= ~A375_READOUT_INVERT;
+	reg &= ~A375_HW_RESETn;
+
+	if (quirk_needed)
+		reg |= A375_Z1_WORKAROUND_BIT;
+
+	writel(reg, priv->control + 4);
+	mdelay(20);
+
+	reg |= A375_HW_RESETn;
+	writel(reg, priv->control + 4);
+	mdelay(50);
+}
+
+static void armada380_init_sensor(struct platform_device *pdev,
+				  struct armada_thermal_priv *priv)
+{
+	unsigned long reg = readl_relaxed(priv->control);
+
+	/* Reset hardware once */
+	if (!(reg & A380_HW_RESET)) {
+		reg |= A380_HW_RESET;
+		writel(reg, priv->control);
+		mdelay(10);
+	}
+}
+
 static bool armada_is_valid(struct armada_thermal_priv *priv)
 {
 	unsigned long reg = readl_relaxed(priv->sensor);
 
-	return (reg >> THERMAL_VALID_OFFSET) & THERMAL_VALID_MASK;
+	return (reg >> priv->data->is_valid_shift) & THERMAL_VALID_MASK;
 }
 
 static int armada_get_temp(struct thermal_zone_device *thermal,
@@ -111,17 +175,27 @@
 {
 	struct armada_thermal_priv *priv = thermal->devdata;
 	unsigned long reg;
+	unsigned long m, b, div;
 
 	/* Valid check */
-	if (priv->ops->is_valid && !priv->ops->is_valid(priv)) {
+	if (priv->data->is_valid && !priv->data->is_valid(priv)) {
 		dev_err(&thermal->device,
 			"Temperature sensor reading not valid\n");
 		return -EIO;
 	}
 
 	reg = readl_relaxed(priv->sensor);
-	reg = (reg >> THERMAL_TEMP_OFFSET) & THERMAL_TEMP_MASK;
-	*temp = (3153000000UL - (10000000UL*reg)) / 13825;
+	reg = (reg >> priv->data->temp_shift) & priv->data->temp_mask;
+
+	/* Get formula coeficients */
+	b = priv->data->coef_b;
+	m = priv->data->coef_m;
+	div = priv->data->coef_div;
+
+	if (priv->data->inverted)
+		*temp = ((m * reg) - b) / div;
+	else
+		*temp = (b - (m * reg)) / div;
 	return 0;
 }
 
@@ -129,23 +203,69 @@
 	.get_temp = armada_get_temp,
 };
 
-static const struct armada_thermal_ops armadaxp_ops = {
+static const struct armada_thermal_data armadaxp_data = {
 	.init_sensor = armadaxp_init_sensor,
+	.temp_shift = 10,
+	.temp_mask = 0x1ff,
+	.coef_b = 3153000000UL,
+	.coef_m = 10000000UL,
+	.coef_div = 13825,
 };
 
-static const struct armada_thermal_ops armada370_ops = {
+static const struct armada_thermal_data armada370_data = {
 	.is_valid = armada_is_valid,
 	.init_sensor = armada370_init_sensor,
+	.is_valid_shift = 9,
+	.temp_shift = 10,
+	.temp_mask = 0x1ff,
+	.coef_b = 3153000000UL,
+	.coef_m = 10000000UL,
+	.coef_div = 13825,
+};
+
+static const struct armada_thermal_data armada375_data = {
+	.is_valid = armada_is_valid,
+	.init_sensor = armada375_init_sensor,
+	.is_valid_shift = 10,
+	.temp_shift = 0,
+	.temp_mask = 0x1ff,
+	.coef_b = 3171900000UL,
+	.coef_m = 10000000UL,
+	.coef_div = 13616,
+};
+
+static const struct armada_thermal_data armada380_data = {
+	.is_valid = armada_is_valid,
+	.init_sensor = armada380_init_sensor,
+	.is_valid_shift = 10,
+	.temp_shift = 0,
+	.temp_mask = 0x3ff,
+	.coef_b = 1169498786UL,
+	.coef_m = 2000000UL,
+	.coef_div = 4289,
+	.inverted = true,
 };
 
 static const struct of_device_id armada_thermal_id_table[] = {
 	{
 		.compatible = "marvell,armadaxp-thermal",
-		.data       = &armadaxp_ops,
+		.data       = &armadaxp_data,
 	},
 	{
 		.compatible = "marvell,armada370-thermal",
-		.data       = &armada370_ops,
+		.data       = &armada370_data,
+	},
+	{
+		.compatible = "marvell,armada375-thermal",
+		.data       = &armada375_data,
+	},
+	{
+		.compatible = "marvell,armada375-z1-thermal",
+		.data       = &armada375_data,
+	},
+	{
+		.compatible = "marvell,armada380-thermal",
+		.data       = &armada380_data,
 	},
 	{
 		/* sentinel */
@@ -178,8 +298,8 @@
 	if (IS_ERR(priv->control))
 		return PTR_ERR(priv->control);
 
-	priv->ops = (struct armada_thermal_ops *)match->data;
-	priv->ops->init_sensor(priv);
+	priv->data = (struct armada_thermal_data *)match->data;
+	priv->data->init_sensor(pdev, priv);
 
 	thermal = thermal_zone_device_register("armada_thermal", 0, 0,
 					       priv, &ops, NULL, 0, 0);

diff --git a/drivers/thermal/int3403_thermal.c b/drivers/thermal/int3403_thermal.c
index 1301681..e93f025 100644
--- a/drivers/thermal/int3403_thermal.c
+++ b/drivers/thermal/int3403_thermal.c

@@ -62,7 +62,13 @@
 	if (ACPI_FAILURE(status))
 		return -EIO;
 
-	*temp = DECI_KELVIN_TO_MILLI_CELSIUS(hyst, KELVIN_OFFSET);
+	/*
+	 * Thermal hysteresis represents a temperature difference.
+	 * Kelvin and Celsius have same degree size. So the
+	 * conversion here between tenths of degree Kelvin unit
+	 * and Milli-Celsius unit is just to multiply 100.
+	 */
+	*temp = hyst * 100;
 
 	return 0;
 }

diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index a084325..95cb7fc 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c

@@ -681,8 +681,10 @@
 	{ X86_VENDOR_INTEL, 6, 0x2d},
 	{ X86_VENDOR_INTEL, 6, 0x2e},
 	{ X86_VENDOR_INTEL, 6, 0x2f},
+	{ X86_VENDOR_INTEL, 6, 0x37},
 	{ X86_VENDOR_INTEL, 6, 0x3a},
 	{ X86_VENDOR_INTEL, 6, 0x3c},
+	{ X86_VENDOR_INTEL, 6, 0x3d},
 	{ X86_VENDOR_INTEL, 6, 0x3e},
 	{ X86_VENDOR_INTEL, 6, 0x3f},
 	{ X86_VENDOR_INTEL, 6, 0x45},

diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
new file mode 100644
index 0000000..a6a0a18
--- /dev/null
+++ b/drivers/thermal/intel_soc_dts_thermal.c

@@ -0,0 +1,479 @@
+/*
+ * intel_soc_dts_thermal.c
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/thermal.h>
+#include <asm/cpu_device_id.h>
+#include <asm/iosf_mbi.h>
+
+#define SOC_DTS_OFFSET_ENABLE	0xB0
+#define SOC_DTS_OFFSET_TEMP	0xB1
+
+#define SOC_DTS_OFFSET_PTPS	0xB2
+#define SOC_DTS_OFFSET_PTTS	0xB3
+#define SOC_DTS_OFFSET_PTTSS	0xB4
+#define SOC_DTS_OFFSET_PTMC	0x80
+#define SOC_DTS_TE_AUX0		0xB5
+#define SOC_DTS_TE_AUX1		0xB6
+
+#define SOC_DTS_AUX0_ENABLE_BIT		BIT(0)
+#define SOC_DTS_AUX1_ENABLE_BIT		BIT(1)
+#define SOC_DTS_CPU_MODULE0_ENABLE_BIT	BIT(16)
+#define SOC_DTS_CPU_MODULE1_ENABLE_BIT	BIT(17)
+#define SOC_DTS_TE_SCI_ENABLE		BIT(9)
+#define SOC_DTS_TE_SMI_ENABLE		BIT(10)
+#define SOC_DTS_TE_MSI_ENABLE		BIT(11)
+#define SOC_DTS_TE_APICA_ENABLE		BIT(14)
+#define SOC_DTS_PTMC_APIC_DEASSERT_BIT	BIT(4)
+
+/* DTS encoding for TJ MAX temperature */
+#define SOC_DTS_TJMAX_ENCODING	0x7F
+
+/* IRQ 86 is a fixed APIC interrupt for BYT DTS Aux threshold notifications */
+#define BYT_SOC_DTS_APIC_IRQ	86
+
+/* Only 2 out of 4 is allowed for OSPM */
+#define SOC_MAX_DTS_TRIPS	2
+
+/* Mask for two trips in status bits */
+#define SOC_DTS_TRIP_MASK	0x03
+
+/* DTS0 and DTS 1 */
+#define SOC_MAX_DTS_SENSORS	2
+
+#define CRITICAL_OFFSET_FROM_TJ_MAX	5000
+
+struct soc_sensor_entry {
+	int id;
+	u32 tj_max;
+	u32 temp_mask;
+	u32 temp_shift;
+	u32 store_status;
+	struct thermal_zone_device *tzone;
+};
+
+static struct soc_sensor_entry *soc_dts[SOC_MAX_DTS_SENSORS];
+
+static int crit_offset = CRITICAL_OFFSET_FROM_TJ_MAX;
+module_param(crit_offset, int, 0644);
+MODULE_PARM_DESC(crit_offset,
+	"Critical Temperature offset from tj max in millidegree Celsius.");
+
+static DEFINE_MUTEX(aux_update_mutex);
+static spinlock_t intr_notify_lock;
+static int soc_dts_thres_irq;
+
+static int get_tj_max(u32 *tj_max)
+{
+	u32 eax, edx;
+	u32 val;
+	int err;
+
+	err = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
+	if (err)
+		goto err_ret;
+	else {
+		val = (eax >> 16) & 0xff;
+		if (val)
+			*tj_max = val * 1000;
+		else {
+			err = -EINVAL;
+			goto err_ret;
+		}
+	}
+
+	return 0;
+err_ret:
+	*tj_max = 0;
+
+	return err;
+}
+
+static int sys_get_trip_temp(struct thermal_zone_device *tzd,
+					int trip, unsigned long *temp)
+{
+	int status;
+	u32 out;
+	struct soc_sensor_entry *aux_entry;
+
+	aux_entry = tzd->devdata;
+
+	if (!trip) {
+		/* Just return the critical temp */
+		*temp = aux_entry->tj_max - crit_offset;
+		return 0;
+	}
+
+	mutex_lock(&aux_update_mutex);
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_PTPS, &out);
+	mutex_unlock(&aux_update_mutex);
+	if (status)
+		return status;
+
+	out = (out >> (trip * 8)) & SOC_DTS_TJMAX_ENCODING;
+
+	if (!out)
+		*temp = 0;
+	else
+		*temp = aux_entry->tj_max - out * 1000;
+
+	return 0;
+}
+
+static int update_trip_temp(struct soc_sensor_entry *aux_entry,
+				int thres_index, unsigned long temp)
+{
+	int status;
+	u32 temp_out;
+	u32 out;
+	u32 store_ptps;
+	u32 store_ptmc;
+	u32 store_te_out;
+	u32 te_out;
+
+	u32 int_enable_bit = SOC_DTS_TE_APICA_ENABLE |
+						SOC_DTS_TE_MSI_ENABLE;
+
+	temp_out = (aux_entry->tj_max - temp) / 1000;
+
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+				SOC_DTS_OFFSET_PTPS, &store_ptps);
+	if (status)
+		return status;
+
+	out = (store_ptps & ~(0xFF << (thres_index * 8)));
+	out |= (temp_out & 0xFF) << (thres_index * 8);
+	status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+				SOC_DTS_OFFSET_PTPS, out);
+	if (status)
+		return status;
+	pr_debug("update_trip_temp PTPS = %x\n", out);
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_PTMC, &out);
+	if (status)
+		goto err_restore_ptps;
+
+	store_ptmc = out;
+
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_TE_AUX0 + thres_index,
+					&te_out);
+	if (status)
+		goto err_restore_ptmc;
+
+	store_te_out = te_out;
+
+	/* Enable for CPU module 0 and module 1 */
+	out |= (SOC_DTS_CPU_MODULE0_ENABLE_BIT |
+					SOC_DTS_CPU_MODULE1_ENABLE_BIT);
+	if (temp) {
+		if (thres_index)
+			out |= SOC_DTS_AUX1_ENABLE_BIT;
+		else
+			out |= SOC_DTS_AUX0_ENABLE_BIT;
+		te_out |= int_enable_bit;
+	} else {
+		if (thres_index)
+			out &= ~SOC_DTS_AUX1_ENABLE_BIT;
+		else
+			out &= ~SOC_DTS_AUX0_ENABLE_BIT;
+		te_out &= ~int_enable_bit;
+	}
+	status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+					SOC_DTS_OFFSET_PTMC, out);
+	if (status)
+		goto err_restore_te_out;
+
+	status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+					SOC_DTS_TE_AUX0 + thres_index,
+					te_out);
+	if (status)
+		goto err_restore_te_out;
+
+	return 0;
+
+err_restore_te_out:
+	iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+				SOC_DTS_OFFSET_PTMC, store_te_out);
+err_restore_ptmc:
+	iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+				SOC_DTS_OFFSET_PTMC, store_ptmc);
+err_restore_ptps:
+	iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+				SOC_DTS_OFFSET_PTPS, store_ptps);
+	/* Nothing we can do if restore fails */
+
+	return status;
+}
+
+static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
+							unsigned long temp)
+{
+	struct soc_sensor_entry *aux_entry = tzd->devdata;
+	int status;
+
+	if (temp > (aux_entry->tj_max - crit_offset))
+		return -EINVAL;
+
+	mutex_lock(&aux_update_mutex);
+	status = update_trip_temp(tzd->devdata, trip, temp);
+	mutex_unlock(&aux_update_mutex);
+
+	return status;
+}
+
+static int sys_get_trip_type(struct thermal_zone_device *thermal,
+		int trip, enum thermal_trip_type *type)
+{
+	if (trip)
+		*type = THERMAL_TRIP_PASSIVE;
+	else
+		*type = THERMAL_TRIP_CRITICAL;
+
+	return 0;
+}
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzd,
+						unsigned long *temp)
+{
+	int status;
+	u32 out;
+	struct soc_sensor_entry *aux_entry;
+
+	aux_entry = tzd->devdata;
+
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_TEMP, &out);
+	if (status)
+		return status;
+
+	out = (out & aux_entry->temp_mask) >> aux_entry->temp_shift;
+	out -= SOC_DTS_TJMAX_ENCODING;
+	*temp = aux_entry->tj_max - out * 1000;
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops tzone_ops = {
+	.get_temp = sys_get_curr_temp,
+	.get_trip_temp = sys_get_trip_temp,
+	.get_trip_type = sys_get_trip_type,
+	.set_trip_temp = sys_set_trip_temp,
+};
+
+static void free_soc_dts(struct soc_sensor_entry *aux_entry)
+{
+	if (aux_entry) {
+		iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+			SOC_DTS_OFFSET_ENABLE, aux_entry->store_status);
+		thermal_zone_device_unregister(aux_entry->tzone);
+		kfree(aux_entry);
+	}
+}
+
+static int soc_dts_enable(int id)
+{
+	u32 out;
+	int ret;
+
+	ret = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_ENABLE, &out);
+	if (ret)
+		return ret;
+
+	if (!(out & BIT(id))) {
+		out |= BIT(id);
+		ret = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+					SOC_DTS_OFFSET_ENABLE, out);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static struct soc_sensor_entry *alloc_soc_dts(int id, u32 tj_max)
+{
+	struct soc_sensor_entry *aux_entry;
+	char name[10];
+	int err;
+
+	aux_entry = kzalloc(sizeof(*aux_entry), GFP_KERNEL);
+	if (!aux_entry) {
+		err = -ENOMEM;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Store status to restor on exit */
+	err = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_ENABLE,
+					&aux_entry->store_status);
+	if (err)
+		goto err_ret;
+
+	aux_entry->id = id;
+	aux_entry->tj_max = tj_max;
+	aux_entry->temp_mask = 0x00FF << (id * 8);
+	aux_entry->temp_shift = id * 8;
+	snprintf(name, sizeof(name), "soc_dts%d", id);
+	aux_entry->tzone = thermal_zone_device_register(name,
+			SOC_MAX_DTS_TRIPS,
+			0x02,
+			aux_entry, &tzone_ops, NULL, 0, 0);
+	if (IS_ERR(aux_entry->tzone)) {
+		err = PTR_ERR(aux_entry->tzone);
+		goto err_ret;
+	}
+
+	err = soc_dts_enable(id);
+	if (err)
+		goto err_aux_status;
+
+	return aux_entry;
+
+err_aux_status:
+	thermal_zone_device_unregister(aux_entry->tzone);
+err_ret:
+	kfree(aux_entry);
+	return ERR_PTR(err);
+}
+
+static void proc_thermal_interrupt(void)
+{
+	u32 sticky_out;
+	int status;
+	u32 ptmc_out;
+
+	/* Clear APIC interrupt */
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+				SOC_DTS_OFFSET_PTMC, &ptmc_out);
+
+	ptmc_out |= SOC_DTS_PTMC_APIC_DEASSERT_BIT;
+	status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+					SOC_DTS_OFFSET_PTMC, ptmc_out);
+
+	/* Read status here */
+	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
+					SOC_DTS_OFFSET_PTTSS, &sticky_out);
+	pr_debug("status %d PTTSS %x\n", status, sticky_out);
+	if (sticky_out & SOC_DTS_TRIP_MASK) {
+		int i;
+		/* reset sticky bit */
+		status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
+					SOC_DTS_OFFSET_PTTSS, sticky_out);
+		for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+			pr_debug("TZD update for zone %d\n", i);
+			thermal_zone_device_update(soc_dts[i]->tzone);
+		}
+	}
+
+}
+
+static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr_notify_lock, flags);
+	proc_thermal_interrupt();
+	spin_unlock_irqrestore(&intr_notify_lock, flags);
+	pr_debug("proc_thermal_interrupt\n");
+
+	return IRQ_HANDLED;
+}
+
+static const struct x86_cpu_id soc_thermal_ids[] = {
+	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, 0x37, 0, BYT_SOC_DTS_APIC_IRQ},
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids);
+
+static int __init intel_soc_thermal_init(void)
+{
+	u32 tj_max;
+	int err = 0;
+	int i;
+	const struct x86_cpu_id *match_cpu;
+
+	match_cpu = x86_match_cpu(soc_thermal_ids);
+	if (!match_cpu)
+		return -ENODEV;
+
+	if (get_tj_max(&tj_max))
+		return -EINVAL;
+
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+		soc_dts[i] = alloc_soc_dts(i, tj_max);
+		if (IS_ERR(soc_dts[i])) {
+			err = PTR_ERR(soc_dts[i]);
+			goto err_free;
+		}
+	}
+
+	spin_lock_init(&intr_notify_lock);
+
+	soc_dts_thres_irq = (int)match_cpu->driver_data;
+
+	err = request_threaded_irq(soc_dts_thres_irq, NULL,
+					soc_irq_thread_fn,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					"soc_dts", soc_dts);
+	if (err) {
+		pr_err("request_threaded_irq ret %d\n", err);
+		goto err_free;
+	}
+
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+		err = update_trip_temp(soc_dts[i], 0, tj_max - crit_offset);
+		if (err)
+			goto err_trip_temp;
+	}
+
+	return 0;
+
+err_trip_temp:
+	i = SOC_MAX_DTS_SENSORS;
+	free_irq(soc_dts_thres_irq, soc_dts);
+err_free:
+	while (--i >= 0)
+		free_soc_dts(soc_dts[i]);
+
+	return err;
+}
+
+static void __exit intel_soc_thermal_exit(void)
+{
+	int i;
+
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i)
+		update_trip_temp(soc_dts[i], 0, 0);
+
+	free_irq(soc_dts_thres_irq, soc_dts);
+
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i)
+		free_soc_dts(soc_dts[i]);
+
+}
+
+module_init(intel_soc_thermal_init)
+module_exit(intel_soc_thermal_exit)
+
+MODULE_DESCRIPTION("Intel SoC DTS Thermal Driver");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 5a37940..8803e69 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c

@@ -374,10 +374,8 @@
 	int idle = IDLE_INTERVAL;
 
 	common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL);
-	if (!common) {
-		dev_err(dev, "Could not allocate common\n");
+	if (!common)
 		return -ENOMEM;
-	}
 
 	INIT_LIST_HEAD(&common->head);
 	spin_lock_init(&common->lock);
@@ -423,7 +421,6 @@
 
 		priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 		if (!priv) {
-			dev_err(dev, "Could not allocate priv\n");
 			ret = -ENOMEM;
 			goto error_unregister;
 		}
@@ -470,7 +467,7 @@
 			rcar_thermal_irq_disable(priv);
 	}
 
-	pm_runtime_put_sync(dev);
+	pm_runtime_put(dev);
 	pm_runtime_disable(dev);
 
 	return ret;
@@ -488,7 +485,7 @@
 			rcar_thermal_irq_disable(priv);
 	}
 
-	pm_runtime_put_sync(dev);
+	pm_runtime_put(dev);
 	pm_runtime_disable(dev);
 
 	return 0;

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 0d96a51..d7ca9f4 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c

@@ -41,12 +41,13 @@
  * @id: identifier of the one instance of the TMU controller.
  * @pdata: pointer to the tmu platform/configuration data
  * @base: base address of the single instance of the TMU controller.
- * @base_common: base address of the common registers of the TMU controller.
+ * @base_second: base address of the common registers of the TMU controller.
  * @irq: irq number of the TMU controller.
  * @soc: id of the SOC type.
  * @irq_work: pointer to the irq work structure.
  * @lock: lock to implement synchronization.
  * @clk: pointer to the clock structure.
+ * @clk_sec: pointer to the clock structure for accessing the base_second.
  * @temp_error1: fused value of the first point trim.
  * @temp_error2: fused value of the second point trim.
  * @regulator: pointer to the TMU regulator structure.
@@ -56,12 +57,12 @@
 	int id;
 	struct exynos_tmu_platform_data *pdata;
 	void __iomem *base;
-	void __iomem *base_common;
+	void __iomem *base_second;
 	int irq;
 	enum soc_type soc;
 	struct work_struct irq_work;
 	struct mutex lock;
-	struct clk *clk;
+	struct clk *clk, *clk_sec;
 	u8 temp_error1, temp_error2;
 	struct regulator *regulator;
 	struct thermal_sensor_conf *reg_conf;
@@ -152,6 +153,8 @@
 
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
+	if (!IS_ERR(data->clk_sec))
+		clk_enable(data->clk_sec);
 
 	if (TMU_SUPPORTS(pdata, READY_STATUS)) {
 		status = readb(data->base + reg->tmu_status);
@@ -186,7 +189,12 @@
 			EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
 		}
 	} else {
-		trim_info = readl(data->base + reg->triminfo_data);
+		/* On exynos5420 the triminfo register is in the shared space */
+		if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
+			trim_info = readl(data->base_second +
+							reg->triminfo_data);
+		else
+			trim_info = readl(data->base + reg->triminfo_data);
 	}
 	data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK;
 	data->temp_error2 = ((trim_info >> reg->triminfo_85_shift) &
@@ -225,6 +233,8 @@
 			trigger_levs++;
 	}
 
+	rising_threshold = readl(data->base + reg->threshold_th0);
+
 	if (data->soc == SOC_ARCH_EXYNOS4210) {
 		/* Write temperature code for threshold */
 		threshold_code = temp_to_code(data, pdata->threshold);
@@ -238,7 +248,7 @@
 			writeb(pdata->trigger_levels[i], data->base +
 			reg->threshold_th0 + i * sizeof(reg->threshold_th0));
 
-		writel(reg->inten_rise_mask, data->base + reg->tmu_intclear);
+		writel(reg->intclr_rise_mask, data->base + reg->tmu_intclear);
 	} else {
 		/* Write temperature code for rising and falling threshold */
 		for (i = 0;
@@ -249,6 +259,7 @@
 				ret = threshold_code;
 				goto out;
 			}
+			rising_threshold &= ~(0xff << 8 * i);
 			rising_threshold |= threshold_code << 8 * i;
 			if (pdata->threshold_falling) {
 				threshold_code = temp_to_code(data,
@@ -265,8 +276,8 @@
 		writel(falling_threshold,
 				data->base + reg->threshold_th1);
 
-		writel((reg->inten_rise_mask << reg->inten_rise_shift) |
-			(reg->inten_fall_mask << reg->inten_fall_shift),
+		writel((reg->intclr_rise_mask << reg->intclr_rise_shift) |
+			(reg->intclr_fall_mask << reg->intclr_fall_shift),
 				data->base + reg->tmu_intclear);
 
 		/* if last threshold limit is also present */
@@ -281,6 +292,7 @@
 			}
 			if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) {
 				/* 1-4 level to be assigned in th0 reg */
+				rising_threshold &= ~(0xff << 8 * i);
 				rising_threshold |= threshold_code << 8 * i;
 				writel(rising_threshold,
 					data->base + reg->threshold_th0);
@@ -298,10 +310,12 @@
 	}
 	/*Clear the PMIN in the common TMU register*/
 	if (reg->tmu_pmin && !data->id)
-		writel(0, data->base_common + reg->tmu_pmin);
+		writel(0, data->base_second + reg->tmu_pmin);
 out:
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
+	if (!IS_ERR(data->clk_sec))
+		clk_disable(data->clk_sec);
 
 	return ret;
 }
@@ -453,12 +467,16 @@
 	const struct exynos_tmu_registers *reg = pdata->registers;
 	unsigned int val_irq, val_type;
 
+	if (!IS_ERR(data->clk_sec))
+		clk_enable(data->clk_sec);
 	/* Find which sensor generated this interrupt */
 	if (reg->tmu_irqstatus) {
-		val_type = readl(data->base_common + reg->tmu_irqstatus);
+		val_type = readl(data->base_second + reg->tmu_irqstatus);
 		if (!((val_type >> data->id) & 0x1))
 			goto out;
 	}
+	if (!IS_ERR(data->clk_sec))
+		clk_disable(data->clk_sec);
 
 	exynos_report_trigger(data->reg_conf);
 	mutex_lock(&data->lock);
@@ -499,6 +517,18 @@
 		.data = (void *)EXYNOS5250_TMU_DRV_DATA,
 	},
 	{
+		.compatible = "samsung,exynos5260-tmu",
+		.data = (void *)EXYNOS5260_TMU_DRV_DATA,
+	},
+	{
+		.compatible = "samsung,exynos5420-tmu",
+		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+	},
+	{
+		.compatible = "samsung,exynos5420-tmu-ext-triminfo",
+		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+	},
+	{
 		.compatible = "samsung,exynos5440-tmu",
 		.data = (void *)EXYNOS5440_TMU_DRV_DATA,
 	},
@@ -580,7 +610,7 @@
 	 * Check if the TMU shares some registers and then try to map the
 	 * memory of common registers.
 	 */
-	if (!TMU_SUPPORTS(pdata, SHARED_MEMORY))
+	if (!TMU_SUPPORTS(pdata, ADDRESS_MULTIPLE))
 		return 0;
 
 	if (of_address_to_resource(pdev->dev.of_node, 1, &res)) {
@@ -588,9 +618,9 @@
 		return -ENODEV;
 	}
 
-	data->base_common = devm_ioremap(&pdev->dev, res.start,
+	data->base_second = devm_ioremap(&pdev->dev, res.start,
 					resource_size(&res));
-	if (!data->base_common) {
+	if (!data->base_second) {
 		dev_err(&pdev->dev, "Failed to ioremap memory\n");
 		return -ENOMEM;
 	}
@@ -607,10 +637,8 @@
 
 	data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data),
 					GFP_KERNEL);
-	if (!data) {
-		dev_err(&pdev->dev, "Failed to allocate driver structure\n");
+	if (!data)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(pdev, data);
 	mutex_init(&data->lock);
@@ -629,13 +657,31 @@
 		return  PTR_ERR(data->clk);
 	}
 
+	data->clk_sec = devm_clk_get(&pdev->dev, "tmu_triminfo_apbif");
+	if (IS_ERR(data->clk_sec)) {
+		if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO) {
+			dev_err(&pdev->dev, "Failed to get triminfo clock\n");
+			return PTR_ERR(data->clk_sec);
+		}
+	} else {
+		ret = clk_prepare(data->clk_sec);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			return ret;
+		}
+	}
+
 	ret = clk_prepare(data->clk);
-	if (ret)
-		return ret;
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to get clock\n");
+		goto err_clk_sec;
+	}
 
 	if (pdata->type == SOC_ARCH_EXYNOS4210 ||
 	    pdata->type == SOC_ARCH_EXYNOS4412 ||
 	    pdata->type == SOC_ARCH_EXYNOS5250 ||
+	    pdata->type == SOC_ARCH_EXYNOS5260 ||
+	    pdata->type == SOC_ARCH_EXYNOS5420_TRIMINFO ||
 	    pdata->type == SOC_ARCH_EXYNOS5440)
 		data->soc = pdata->type;
 	else {
@@ -656,7 +702,6 @@
 	sensor_conf = devm_kzalloc(&pdev->dev,
 				sizeof(struct thermal_sensor_conf), GFP_KERNEL);
 	if (!sensor_conf) {
-		dev_err(&pdev->dev, "Failed to allocate registration struct\n");
 		ret = -ENOMEM;
 		goto err_clk;
 	}
@@ -704,6 +749,9 @@
 	return 0;
 err_clk:
 	clk_unprepare(data->clk);
+err_clk_sec:
+	if (!IS_ERR(data->clk_sec))
+		clk_unprepare(data->clk_sec);
 	return ret;
 }
 
@@ -716,6 +764,8 @@
 	exynos_unregister_thermal(data->reg_conf);
 
 	clk_unprepare(data->clk);
+	if (!IS_ERR(data->clk_sec))
+		clk_unprepare(data->clk_sec);
 
 	if (!IS_ERR(data->regulator))
 		regulator_disable(data->regulator);

diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
index 3fb6554..edd08cf 100644
--- a/drivers/thermal/samsung/exynos_tmu.h
+++ b/drivers/thermal/samsung/exynos_tmu.h

@@ -43,6 +43,8 @@
 	SOC_ARCH_EXYNOS4210 = 1,
 	SOC_ARCH_EXYNOS4412,
 	SOC_ARCH_EXYNOS5250,
+	SOC_ARCH_EXYNOS5260,
+	SOC_ARCH_EXYNOS5420_TRIMINFO,
 	SOC_ARCH_EXYNOS5440,
 };
 
@@ -60,7 +62,7 @@
  *			state(active/idle) can be checked.
  * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation
  *			sample time.
- * TMU_SUPPORT_SHARED_MEMORY - This feature tells that the different TMU
+ * TMU_SUPPORT_ADDRESS_MULTIPLE - This feature tells that the different TMU
  *			sensors shares some common registers.
  * TMU_SUPPORT - macro to compare the above features with the supplied.
  */
@@ -70,7 +72,7 @@
 #define TMU_SUPPORT_FALLING_TRIP		BIT(3)
 #define TMU_SUPPORT_READY_STATUS		BIT(4)
 #define TMU_SUPPORT_EMUL_TIME			BIT(5)
-#define TMU_SUPPORT_SHARED_MEMORY		BIT(6)
+#define TMU_SUPPORT_ADDRESS_MULTIPLE		BIT(6)
 
 #define TMU_SUPPORTS(a, b)	(a->features & TMU_SUPPORT_ ## b)
 
@@ -122,10 +124,6 @@
  * @threshold_th3_l0_shift: shift bits of level0 threshold temperature.
  * @tmu_inten: register containing the different threshold interrupt
 	enable bits.
- * @inten_rise_shift: shift bits of all rising interrupt bits.
- * @inten_rise_mask: mask bits of all rising interrupt bits.
- * @inten_fall_shift: shift bits of all rising interrupt bits.
- * @inten_fall_mask: mask bits of all rising interrupt bits.
  * @inten_rise0_shift: shift bits of rising 0 interrupt bits.
  * @inten_rise1_shift: shift bits of rising 1 interrupt bits.
  * @inten_rise2_shift: shift bits of rising 2 interrupt bits.
@@ -136,6 +134,10 @@
  * @inten_fall3_shift: shift bits of falling 3 interrupt bits.
  * @tmu_intstat: Register containing the interrupt status values.
  * @tmu_intclear: Register for clearing the raised interrupt status.
+ * @intclr_fall_shift: shift bits for interrupt clear fall 0
+ * @intclr_rise_shift: shift bits of all rising interrupt bits.
+ * @intclr_rise_mask: mask bits of all rising interrupt bits.
+ * @intclr_fall_mask: mask bits of all rising interrupt bits.
  * @emul_con: TMU emulation controller register.
  * @emul_temp_shift: shift bits of emulation temperature.
  * @emul_time_shift: shift bits of emulation time.
@@ -149,6 +151,7 @@
 	u32	triminfo_85_shift;
 
 	u32	triminfo_ctrl;
+	u32	triminfo_ctrl1;
 	u32	triminfo_reload_shift;
 
 	u32	tmu_ctrl;
@@ -191,10 +194,6 @@
 	u32	threshold_th3_l0_shift;
 
 	u32	tmu_inten;
-	u32	inten_rise_shift;
-	u32	inten_rise_mask;
-	u32	inten_fall_shift;
-	u32	inten_fall_mask;
 	u32	inten_rise0_shift;
 	u32	inten_rise1_shift;
 	u32	inten_rise2_shift;
@@ -207,6 +206,10 @@
 	u32	tmu_intstat;
 
 	u32	tmu_intclear;
+	u32	intclr_fall_shift;
+	u32	intclr_rise_shift;
+	u32	intclr_fall_mask;
+	u32	intclr_rise_mask;
 
 	u32	emul_con;
 	u32	emul_temp_shift;

diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c
index 476b768..c1d81dc 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.c
+++ b/drivers/thermal/samsung/exynos_tmu_data.c

@@ -40,13 +40,13 @@
 	.threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP,
 	.threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0,
 	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise_mask = EXYNOS4210_TMU_TRIG_LEVEL_MASK,
 	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
 	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
 	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
 	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
 	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
 	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+	.intclr_rise_mask = EXYNOS4210_TMU_TRIG_LEVEL_MASK,
 };
 
 struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
@@ -112,10 +112,6 @@
 	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
 	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
 	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise_mask = EXYNOS_TMU_RISE_INT_MASK,
-	.inten_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
-	.inten_fall_mask = EXYNOS_TMU_FALL_INT_MASK,
-	.inten_fall_shift = EXYNOS_TMU_FALL_INT_SHIFT,
 	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
 	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
 	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
@@ -123,6 +119,10 @@
 	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
 	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
 	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+	.intclr_fall_shift = EXYNOS_TMU_CLEAR_FALL_INT_SHIFT,
+	.intclr_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
+	.intclr_rise_mask = EXYNOS_TMU_RISE_INT_MASK,
+	.intclr_fall_mask = EXYNOS_TMU_FALL_INT_MASK,
 	.emul_con = EXYNOS_EMUL_CON,
 	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
 	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
@@ -194,6 +194,197 @@
 };
 #endif
 
+#if defined(CONFIG_SOC_EXYNOS5260)
+static const struct exynos_tmu_registers exynos5260_tmu_registers = {
+	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
+	.triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+	.triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
+	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL1,
+	.buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+	.buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
+	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
+	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
+	.buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+	.buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+	.core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+	.tmu_status = EXYNOS_TMU_REG_STATUS,
+	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
+	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
+	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
+	.tmu_inten = EXYNOS5260_TMU_REG_INTEN,
+	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
+	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
+	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
+	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
+	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
+	.tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT,
+	.tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR,
+	.intclr_fall_shift = EXYNOS5420_TMU_CLEAR_FALL_INT_SHIFT,
+	.intclr_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
+	.intclr_rise_mask = EXYNOS5260_TMU_RISE_INT_MASK,
+	.intclr_fall_mask = EXYNOS5260_TMU_FALL_INT_MASK,
+	.emul_con = EXYNOS5260_EMUL_CON,
+	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
+	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
+	.emul_time_mask = EXYNOS_EMUL_TIME_MASK,
+};
+
+#define __EXYNOS5260_TMU_DATA	\
+	.threshold_falling = 10, \
+	.trigger_levels[0] = 85, \
+	.trigger_levels[1] = 103, \
+	.trigger_levels[2] = 110, \
+	.trigger_levels[3] = 120, \
+	.trigger_enable[0] = true, \
+	.trigger_enable[1] = true, \
+	.trigger_enable[2] = true, \
+	.trigger_enable[3] = false, \
+	.trigger_type[0] = THROTTLE_ACTIVE, \
+	.trigger_type[1] = THROTTLE_ACTIVE, \
+	.trigger_type[2] = SW_TRIP, \
+	.trigger_type[3] = HW_TRIP, \
+	.max_trigger_level = 4, \
+	.gain = 8, \
+	.reference_voltage = 16, \
+	.noise_cancel_mode = 4, \
+	.cal_type = TYPE_ONE_POINT_TRIMMING, \
+	.efuse_value = 55, \
+	.min_efuse_value = 40, \
+	.max_efuse_value = 100, \
+	.first_point_trim = 25, \
+	.second_point_trim = 85, \
+	.default_temp_offset = 50, \
+	.freq_tab[0] = { \
+		.freq_clip_max = 800 * 1000, \
+		.temp_level = 85, \
+	}, \
+	.freq_tab[1] = { \
+		.freq_clip_max = 200 * 1000, \
+		.temp_level = 103, \
+	}, \
+	.freq_tab_count = 2, \
+	.registers = &exynos5260_tmu_registers, \
+
+#define EXYNOS5260_TMU_DATA \
+	__EXYNOS5260_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5260, \
+	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
+			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
+			TMU_SUPPORT_EMUL_TIME)
+
+struct exynos_tmu_init_data const exynos5260_default_tmu_data = {
+	.tmu_data = {
+		{ EXYNOS5260_TMU_DATA },
+		{ EXYNOS5260_TMU_DATA },
+		{ EXYNOS5260_TMU_DATA },
+		{ EXYNOS5260_TMU_DATA },
+		{ EXYNOS5260_TMU_DATA },
+	},
+	.tmu_count = 5,
+};
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS5420)
+static const struct exynos_tmu_registers exynos5420_tmu_registers = {
+	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
+	.triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+	.triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
+	.buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+	.buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
+	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
+	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
+	.buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+	.buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+	.core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+	.tmu_status = EXYNOS_TMU_REG_STATUS,
+	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
+	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
+	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
+	.tmu_inten = EXYNOS_TMU_REG_INTEN,
+	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
+	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
+	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
+	/* INTEN_RISE3 Not availble in exynos5420 */
+	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
+	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
+	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
+	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+	.intclr_fall_shift = EXYNOS5420_TMU_CLEAR_FALL_INT_SHIFT,
+	.intclr_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
+	.intclr_rise_mask = EXYNOS_TMU_RISE_INT_MASK,
+	.intclr_fall_mask = EXYNOS_TMU_FALL_INT_MASK,
+	.emul_con = EXYNOS_EMUL_CON,
+	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
+	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
+	.emul_time_mask = EXYNOS_EMUL_TIME_MASK,
+};
+
+#define __EXYNOS5420_TMU_DATA	\
+	.threshold_falling = 10, \
+	.trigger_levels[0] = 85, \
+	.trigger_levels[1] = 103, \
+	.trigger_levels[2] = 110, \
+	.trigger_levels[3] = 120, \
+	.trigger_enable[0] = true, \
+	.trigger_enable[1] = true, \
+	.trigger_enable[2] = true, \
+	.trigger_enable[3] = false, \
+	.trigger_type[0] = THROTTLE_ACTIVE, \
+	.trigger_type[1] = THROTTLE_ACTIVE, \
+	.trigger_type[2] = SW_TRIP, \
+	.trigger_type[3] = HW_TRIP, \
+	.max_trigger_level = 4, \
+	.gain = 8, \
+	.reference_voltage = 16, \
+	.noise_cancel_mode = 4, \
+	.cal_type = TYPE_ONE_POINT_TRIMMING, \
+	.efuse_value = 55, \
+	.min_efuse_value = 40, \
+	.max_efuse_value = 100, \
+	.first_point_trim = 25, \
+	.second_point_trim = 85, \
+	.default_temp_offset = 50, \
+	.freq_tab[0] = { \
+		.freq_clip_max = 800 * 1000, \
+		.temp_level = 85, \
+	}, \
+	.freq_tab[1] = { \
+		.freq_clip_max = 200 * 1000, \
+		.temp_level = 103, \
+	}, \
+	.freq_tab_count = 2, \
+	.registers = &exynos5420_tmu_registers, \
+
+#define EXYNOS5420_TMU_DATA \
+	__EXYNOS5420_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5250, \
+	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
+			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
+			TMU_SUPPORT_EMUL_TIME)
+
+#define EXYNOS5420_TMU_DATA_SHARED \
+	__EXYNOS5420_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5420_TRIMINFO, \
+	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
+			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
+			TMU_SUPPORT_EMUL_TIME | TMU_SUPPORT_ADDRESS_MULTIPLE)
+
+struct exynos_tmu_init_data const exynos5420_default_tmu_data = {
+	.tmu_data = {
+		{ EXYNOS5420_TMU_DATA },
+		{ EXYNOS5420_TMU_DATA },
+		{ EXYNOS5420_TMU_DATA_SHARED },
+		{ EXYNOS5420_TMU_DATA_SHARED },
+		{ EXYNOS5420_TMU_DATA_SHARED },
+	},
+	.tmu_count = 5,
+};
+#endif
+
 #if defined(CONFIG_SOC_EXYNOS5440)
 static const struct exynos_tmu_registers exynos5440_tmu_registers = {
 	.triminfo_data = EXYNOS5440_TMU_S0_7_TRIM,
@@ -217,10 +408,6 @@
 	.threshold_th2 = EXYNOS5440_TMU_S0_7_TH2,
 	.threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT,
 	.tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN,
-	.inten_rise_mask = EXYNOS5440_TMU_RISE_INT_MASK,
-	.inten_rise_shift = EXYNOS5440_TMU_RISE_INT_SHIFT,
-	.inten_fall_mask = EXYNOS5440_TMU_FALL_INT_MASK,
-	.inten_fall_shift = EXYNOS5440_TMU_FALL_INT_SHIFT,
 	.inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT,
 	.inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT,
 	.inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT,
@@ -228,6 +415,10 @@
 	.inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT,
 	.tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ,
 	.tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ,
+	.intclr_fall_shift = EXYNOS5440_TMU_CLEAR_FALL_INT_SHIFT,
+	.intclr_rise_shift = EXYNOS5440_TMU_RISE_INT_SHIFT,
+	.intclr_rise_mask = EXYNOS5440_TMU_RISE_INT_MASK,
+	.intclr_fall_mask = EXYNOS5440_TMU_FALL_INT_MASK,
 	.tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS,
 	.emul_con = EXYNOS5440_TMU_S0_7_DEBUG,
 	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
@@ -255,7 +446,7 @@
 	.type = SOC_ARCH_EXYNOS5440, \
 	.registers = &exynos5440_tmu_registers, \
 	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_SHARED_MEMORY),
+			TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_ADDRESS_MULTIPLE),
 
 struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
 	.tmu_data = {

diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h
index a1ea19d..d268981 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.h
+++ b/drivers/thermal/samsung/exynos_tmu_data.h

@@ -69,9 +69,11 @@
 #define EXYNOS_TMU_RISE_INT_MASK	0x111
 #define EXYNOS_TMU_RISE_INT_SHIFT	0
 #define EXYNOS_TMU_FALL_INT_MASK	0x111
-#define EXYNOS_TMU_FALL_INT_SHIFT	12
 #define EXYNOS_TMU_CLEAR_RISE_INT	0x111
 #define EXYNOS_TMU_CLEAR_FALL_INT	(0x111 << 12)
+#define EXYNOS_TMU_CLEAR_FALL_INT_SHIFT	12
+#define EXYNOS5420_TMU_CLEAR_FALL_INT_SHIFT	16
+#define EXYNOS5440_TMU_CLEAR_FALL_INT_SHIFT	4
 #define EXYNOS_TMU_TRIP_MODE_SHIFT	13
 #define EXYNOS_TMU_TRIP_MODE_MASK	0x7
 #define EXYNOS_TMU_THERM_TRIP_EN_SHIFT	12
@@ -85,6 +87,7 @@
 #define EXYNOS_TMU_INTEN_FALL0_SHIFT	16
 #define EXYNOS_TMU_INTEN_FALL1_SHIFT	20
 #define EXYNOS_TMU_INTEN_FALL2_SHIFT	24
+#define EXYNOS_TMU_INTEN_FALL3_SHIFT	28
 
 #define EXYNOS_EMUL_TIME	0x57F0
 #define EXYNOS_EMUL_TIME_MASK	0xffff
@@ -95,6 +98,17 @@
 
 #define EXYNOS_MAX_TRIGGER_PER_REG	4
 
+/* Exynos5260 specific */
+#define EXYNOS_TMU_REG_CONTROL1			0x24
+#define EXYNOS5260_TMU_REG_INTEN		0xC0
+#define EXYNOS5260_TMU_REG_INTSTAT		0xC4
+#define EXYNOS5260_TMU_REG_INTCLEAR		0xC8
+#define EXYNOS5260_TMU_CLEAR_RISE_INT		0x1111
+#define EXYNOS5260_TMU_CLEAR_FALL_INT		(0x1111 << 16)
+#define EXYNOS5260_TMU_RISE_INT_MASK		0x1111
+#define EXYNOS5260_TMU_FALL_INT_MASK		0x1111
+#define EXYNOS5260_EMUL_CON			0x100
+
 /* Exynos4412 specific */
 #define EXYNOS4412_MUX_ADDR_VALUE          6
 #define EXYNOS4412_MUX_ADDR_SHIFT          20
@@ -119,7 +133,6 @@
 #define EXYNOS5440_TMU_RISE_INT_MASK		0xf
 #define EXYNOS5440_TMU_RISE_INT_SHIFT		0
 #define EXYNOS5440_TMU_FALL_INT_MASK		0xf
-#define EXYNOS5440_TMU_FALL_INT_SHIFT		4
 #define EXYNOS5440_TMU_INTEN_RISE0_SHIFT	0
 #define EXYNOS5440_TMU_INTEN_RISE1_SHIFT	1
 #define EXYNOS5440_TMU_INTEN_RISE2_SHIFT	2
@@ -156,6 +169,20 @@
 #define EXYNOS5250_TMU_DRV_DATA (NULL)
 #endif
 
+#if defined(CONFIG_SOC_EXYNOS5260)
+extern struct exynos_tmu_init_data const exynos5260_default_tmu_data;
+#define EXYNOS5260_TMU_DRV_DATA (&exynos5260_default_tmu_data)
+#else
+#define EXYNOS5260_TMU_DRV_DATA (NULL)
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS5420)
+extern struct exynos_tmu_init_data const exynos5420_default_tmu_data;
+#define EXYNOS5420_TMU_DRV_DATA (&exynos5420_default_tmu_data)
+#else
+#define EXYNOS5420_TMU_DRV_DATA (NULL)
+#endif
+
 #if defined(CONFIG_SOC_EXYNOS5440)
 extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
 #define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data)

diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c
index ab79ea4..1e2193f 100644
--- a/drivers/thermal/spear_thermal.c
+++ b/drivers/thermal/spear_thermal.c

@@ -113,10 +113,8 @@
 	}
 
 	stdev = devm_kzalloc(&pdev->dev, sizeof(*stdev), GFP_KERNEL);
-	if (!stdev) {
-		dev_err(&pdev->dev, "kzalloc fail\n");
+	if (!stdev)
 		return -ENOMEM;
-	}
 
 	/* Enable thermal sensor */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);

diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index 3ab12ee..a1271b5 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c

@@ -1248,7 +1248,7 @@
 	clk_rate = clk_round_rate(bgp->div_clk,
 				  bgp->conf->sensors[0].ts_data->max_freq);
 	if (clk_rate < bgp->conf->sensors[0].ts_data->min_freq ||
-	    clk_rate == 0xffffffff) {
+	    clk_rate <= 0) {
 		ret = -ENODEV;
 		dev_err(&pdev->dev, "wrong clock rate (%d)\n", clk_rate);
 		goto put_clks;

diff --git a/drivers/tty/hvc/hvc_tile.c b/drivers/tty/hvc/hvc_tile.c
index 147d49e..df37486 100644
--- a/drivers/tty/hvc/hvc_tile.c
+++ b/drivers/tty/hvc/hvc_tile.c

@@ -196,7 +196,7 @@
 #ifndef __tilegx__
 	struct hvc_struct *hp;
 	hp = hvc_alloc(0, 0, &hvc_tile_get_put_ops, 128);
-	return IS_ERR(hp) ? PTR_ERR(hp) : 0;
+	return PTR_ERR_OR_ZERO(hp);
 #else
 	platform_device_register(&hvc_tile_pdev);
 	return platform_driver_register(&hvc_tile_driver);

diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index ff205a7..648f9e4 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c

@@ -220,11 +220,11 @@
 	 * If we can't read the file, it's no good.
 	 * If we can't write the file, use it read-only.
 	 */
-	if (!(filp->f_op->read || filp->f_op->aio_read)) {
+	if (!(filp->f_mode & FMODE_CAN_READ)) {
 		LINFO(curlun, "file not readable: %s\n", filename);
 		goto out;
 	}
-	if (!(filp->f_op->write || filp->f_op->aio_write))
+	if (!(filp->f_mode & FMODE_CAN_WRITE))
 		ro = 1;
 
 	size = i_size_read(inode->i_mapping->host);

diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index fe0880d..3d78a88 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c

@@ -793,7 +793,7 @@
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
@@ -850,7 +850,7 @@
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
 	return net;

diff --git a/drivers/usb/host/ehci-pmcmsp.c b/drivers/usb/host/ehci-pmcmsp.c
index af3974a..7d75465 100644
--- a/drivers/usb/host/ehci-pmcmsp.c
+++ b/drivers/usb/host/ehci-pmcmsp.c

@@ -68,9 +68,6 @@
 
 	/* set TWI GPIO USB_HOST_DEV pin high */
 	gpio_direction_output(MSP_PIN_USB0_HOST_DEV, 1);
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-	gpio_direction_output(MSP_PIN_USB1_HOST_DEV, 1);
-#endif
 }
 
 /* called during probe() after chip reset completes */
@@ -248,33 +245,6 @@
 	usb_put_hcd(hcd);
 }
 
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-/*
- * Wrapper around the main ehci_irq.  Since both USB host controllers are
- * sharing the same IRQ, need to first determine whether we're the intended
- * recipient of this interrupt.
- */
-static irqreturn_t ehci_msp_irq(struct usb_hcd *hcd)
-{
-	u32 int_src;
-	struct device *dev = hcd->self.controller;
-	struct platform_device *pdev;
-	struct mspusb_device *mdev;
-	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
-	/* need to reverse-map a couple of containers to get our device */
-	pdev = to_platform_device(dev);
-	mdev = to_mspusb_device(pdev);
-
-	/* Check to see if this interrupt is for this host controller */
-	int_src = ehci_readl(ehci, &mdev->mab_regs->int_stat);
-	if (int_src & (1 << pdev->id))
-		return ehci_irq(hcd);
-
-	/* Not for this device */
-	return IRQ_NONE;
-}
-#endif /* DUAL_USB */
-
 static const struct hc_driver ehci_msp_hc_driver = {
 	.description =		hcd_name,
 	.product_desc =		"PMC MSP EHCI",
@@ -283,11 +253,7 @@
 	/*
 	 * generic hardware linkage
 	 */
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-	.irq =			ehci_msp_irq,
-#else
 	.irq =			ehci_irq,
-#endif
 	.flags =		HCD_MEMORY | HCD_USB2 | HCD_BH,
 
 	/*
@@ -334,9 +300,6 @@
 		return -ENODEV;
 
 	gpio_request(MSP_PIN_USB0_HOST_DEV, "USB0_HOST_DEV_GPIO");
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-	gpio_request(MSP_PIN_USB1_HOST_DEV, "USB1_HOST_DEV_GPIO");
-#endif
 
 	ret = usb_hcd_msp_probe(&ehci_msp_hc_driver, pdev);
 
@@ -351,9 +314,6 @@
 
 	/* free TWI GPIO USB_HOST_DEV pin */
 	gpio_free(MSP_PIN_USB0_HOST_DEV);
-#ifdef CONFIG_MSP_HAS_DUAL_USB
-	gpio_free(MSP_PIN_USB1_HOST_DEV);
-#endif
 
 	return 0;
 }

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index be414d2..971a760 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c

@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <linux/net.h>
 #include <linux/if_packet.h>
@@ -373,7 +374,7 @@
 			      % UIO_MAXIOV == nvq->done_idx))
 			break;
 
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+		head = vhost_get_vq_desc(vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
 					 NULL, NULL);
@@ -505,7 +506,7 @@
 			r = -ENOBUFS;
 			goto err;
 		}
-		r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg,
+		r = vhost_get_vq_desc(vq, vq->iov + seg,
 				      ARRAY_SIZE(vq->iov) - seg, &out,
 				      &in, log, log_num);
 		if (unlikely(r < 0))
@@ -584,9 +585,9 @@
 	vhost_hlen = nvq->vhost_hlen;
 	sock_hlen = nvq->sock_hlen;
 
-	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+	vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
-	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
+	mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 
 	while ((sock_len = peek_head_len(sock->sk))) {
 		sock_len += sock_hlen;
@@ -699,18 +700,30 @@
 	handle_rx(net);
 }
 
+static void vhost_net_free(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
 static int vhost_net_open(struct inode *inode, struct file *f)
 {
-	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+	struct vhost_net *n;
 	struct vhost_dev *dev;
 	struct vhost_virtqueue **vqs;
 	int i;
 
-	if (!n)
-		return -ENOMEM;
+	n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!n) {
+		n = vmalloc(sizeof *n);
+		if (!n)
+			return -ENOMEM;
+	}
 	vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
 	if (!vqs) {
-		kfree(n);
+		vhost_net_free(n);
 		return -ENOMEM;
 	}
 
@@ -827,7 +840,7 @@
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
 	kfree(n->dev.vqs);
-	kfree(n);
+	vhost_net_free(n);
 	return 0;
 }
 
@@ -1038,15 +1051,13 @@
 		mutex_unlock(&n->dev.mutex);
 		return -EFAULT;
 	}
-	n->dev.acked_features = features;
-	smp_wmb();
 	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
 		mutex_lock(&n->vqs[i].vq.mutex);
+		n->vqs[i].vq.acked_features = features;
 		n->vqs[i].vhost_hlen = vhost_hlen;
 		n->vqs[i].sock_hlen = sock_hlen;
 		mutex_unlock(&n->vqs[i].vq.mutex);
 	}
-	vhost_net_flush(n);
 	mutex_unlock(&n->dev.mutex);
 	return 0;
 }

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index aeb5131..4f4ffa4 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c

@@ -57,7 +57,8 @@
 #define TCM_VHOST_MAX_CDB_SIZE 32
 #define TCM_VHOST_DEFAULT_TAGS 256
 #define TCM_VHOST_PREALLOC_SGLS 2048
-#define TCM_VHOST_PREALLOC_PAGES 2048
+#define TCM_VHOST_PREALLOC_UPAGES 2048
+#define TCM_VHOST_PREALLOC_PROT_SGLS 512
 
 struct vhost_scsi_inflight {
 	/* Wait for the flush operation to finish */
@@ -79,10 +80,12 @@
 	u64 tvc_tag;
 	/* The number of scatterlists associated with this cmd */
 	u32 tvc_sgl_count;
+	u32 tvc_prot_sgl_count;
 	/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
 	u32 tvc_lun;
 	/* Pointer to the SGL formatted memory from virtio-scsi */
 	struct scatterlist *tvc_sgl;
+	struct scatterlist *tvc_prot_sgl;
 	struct page **tvc_upages;
 	/* Pointer to response */
 	struct virtio_scsi_cmd_resp __user *tvc_resp;
@@ -166,7 +169,8 @@
 };
 
 enum {
-	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG)
+	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
+					       (1ULL << VIRTIO_SCSI_F_T10_PI)
 };
 
 #define VHOST_SCSI_MAX_TARGET	256
@@ -456,12 +460,16 @@
 	struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
 				struct tcm_vhost_cmd, tvc_se_cmd);
 	struct se_session *se_sess = se_cmd->se_sess;
+	int i;
 
 	if (tv_cmd->tvc_sgl_count) {
-		u32 i;
 		for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
 			put_page(sg_page(&tv_cmd->tvc_sgl[i]));
 	}
+	if (tv_cmd->tvc_prot_sgl_count) {
+		for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
+			put_page(sg_page(&tv_cmd->tvc_prot_sgl[i]));
+	}
 
 	tcm_vhost_put_inflight(tv_cmd->inflight);
 	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
@@ -606,7 +614,7 @@
 
 again:
 	vhost_disable_notify(&vs->dev, vq);
-	head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
+	head = vhost_get_vq_desc(vq, vq->iov,
 			ARRAY_SIZE(vq->iov), &out, &in,
 			NULL, NULL);
 	if (head < 0) {
@@ -713,16 +721,14 @@
 }
 
 static struct tcm_vhost_cmd *
-vhost_scsi_get_tag(struct vhost_virtqueue *vq,
-			struct tcm_vhost_tpg *tpg,
-			struct virtio_scsi_cmd_req *v_req,
-			u32 exp_data_len,
-			int data_direction)
+vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg,
+		   unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr,
+		   u32 exp_data_len, int data_direction)
 {
 	struct tcm_vhost_cmd *cmd;
 	struct tcm_vhost_nexus *tv_nexus;
 	struct se_session *se_sess;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *prot_sg;
 	struct page **pages;
 	int tag;
 
@@ -741,19 +747,24 @@
 
 	cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
 	sg = cmd->tvc_sgl;
+	prot_sg = cmd->tvc_prot_sgl;
 	pages = cmd->tvc_upages;
 	memset(cmd, 0, sizeof(struct tcm_vhost_cmd));
 
 	cmd->tvc_sgl = sg;
+	cmd->tvc_prot_sgl = prot_sg;
 	cmd->tvc_upages = pages;
 	cmd->tvc_se_cmd.map_tag = tag;
-	cmd->tvc_tag = v_req->tag;
-	cmd->tvc_task_attr = v_req->task_attr;
+	cmd->tvc_tag = scsi_tag;
+	cmd->tvc_lun = lun;
+	cmd->tvc_task_attr = task_attr;
 	cmd->tvc_exp_data_len = exp_data_len;
 	cmd->tvc_data_direction = data_direction;
 	cmd->tvc_nexus = tv_nexus;
 	cmd->inflight = tcm_vhost_get_inflight(vq);
 
+	memcpy(cmd->tvc_cdb, cdb, TCM_VHOST_MAX_CDB_SIZE);
+
 	return cmd;
 }
 
@@ -767,34 +778,27 @@
 		      struct scatterlist *sgl,
 		      unsigned int sgl_count,
 		      struct iovec *iov,
-		      int write)
+		      struct page **pages,
+		      bool write)
 {
 	unsigned int npages = 0, pages_nr, offset, nbytes;
 	struct scatterlist *sg = sgl;
 	void __user *ptr = iov->iov_base;
 	size_t len = iov->iov_len;
-	struct page **pages;
 	int ret, i;
 
-	if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
-		pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than"
-		       " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
-			sgl_count, TCM_VHOST_PREALLOC_SGLS);
-		return -ENOBUFS;
-	}
-
 	pages_nr = iov_num_pages(iov);
-	if (pages_nr > sgl_count)
-		return -ENOBUFS;
-
-	if (pages_nr > TCM_VHOST_PREALLOC_PAGES) {
+	if (pages_nr > sgl_count) {
 		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
-		       " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n",
-			pages_nr, TCM_VHOST_PREALLOC_PAGES);
+		       " sgl_count: %u\n", pages_nr, sgl_count);
 		return -ENOBUFS;
 	}
-
-	pages = tv_cmd->tvc_upages;
+	if (pages_nr > TCM_VHOST_PREALLOC_UPAGES) {
+		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
+		       " preallocated TCM_VHOST_PREALLOC_UPAGES: %u\n",
+			pages_nr, TCM_VHOST_PREALLOC_UPAGES);
+		return -ENOBUFS;
+	}
 
 	ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
 	/* No pages were pinned */
@@ -825,33 +829,32 @@
 static int
 vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
 			  struct iovec *iov,
-			  unsigned int niov,
-			  int write)
+			  int niov,
+			  bool write)
 {
-	int ret;
-	unsigned int i;
-	u32 sgl_count;
-	struct scatterlist *sg;
+	struct scatterlist *sg = cmd->tvc_sgl;
+	unsigned int sgl_count = 0;
+	int ret, i;
 
-	/*
-	 * Find out how long sglist needs to be
-	 */
-	sgl_count = 0;
 	for (i = 0; i < niov; i++)
 		sgl_count += iov_num_pages(&iov[i]);
 
-	/* TODO overflow checking */
+	if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
+		pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than"
+			" preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
+			sgl_count, TCM_VHOST_PREALLOC_SGLS);
+		return -ENOBUFS;
+	}
 
-	sg = cmd->tvc_sgl;
 	pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count);
 	sg_init_table(sg, sgl_count);
-
 	cmd->tvc_sgl_count = sgl_count;
 
-	pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count);
+	pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count);
+
 	for (i = 0; i < niov; i++) {
 		ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
-					    write);
+					    cmd->tvc_upages, write);
 		if (ret < 0) {
 			for (i = 0; i < cmd->tvc_sgl_count; i++)
 				put_page(sg_page(&cmd->tvc_sgl[i]));
@@ -859,31 +862,70 @@
 			cmd->tvc_sgl_count = 0;
 			return ret;
 		}
-
 		sg += ret;
 		sgl_count -= ret;
 	}
 	return 0;
 }
 
+static int
+vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
+			   struct iovec *iov,
+			   int niov,
+			   bool write)
+{
+	struct scatterlist *prot_sg = cmd->tvc_prot_sgl;
+	unsigned int prot_sgl_count = 0;
+	int ret, i;
+
+	for (i = 0; i < niov; i++)
+		prot_sgl_count += iov_num_pages(&iov[i]);
+
+	if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) {
+		pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than"
+			" preallocated TCM_VHOST_PREALLOC_PROT_SGLS: %u\n",
+			prot_sgl_count, TCM_VHOST_PREALLOC_PROT_SGLS);
+		return -ENOBUFS;
+	}
+
+	pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
+		 prot_sg, prot_sgl_count);
+	sg_init_table(prot_sg, prot_sgl_count);
+	cmd->tvc_prot_sgl_count = prot_sgl_count;
+
+	for (i = 0; i < niov; i++) {
+		ret = vhost_scsi_map_to_sgl(cmd, prot_sg, prot_sgl_count, &iov[i],
+					    cmd->tvc_upages, write);
+		if (ret < 0) {
+			for (i = 0; i < cmd->tvc_prot_sgl_count; i++)
+				put_page(sg_page(&cmd->tvc_prot_sgl[i]));
+
+			cmd->tvc_prot_sgl_count = 0;
+			return ret;
+		}
+		prot_sg += ret;
+		prot_sgl_count -= ret;
+	}
+	return 0;
+}
+
 static void tcm_vhost_submission_work(struct work_struct *work)
 {
 	struct tcm_vhost_cmd *cmd =
 		container_of(work, struct tcm_vhost_cmd, work);
 	struct tcm_vhost_nexus *tv_nexus;
 	struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
-	struct scatterlist *sg_ptr, *sg_bidi_ptr = NULL;
-	int rc, sg_no_bidi = 0;
+	struct scatterlist *sg_ptr, *sg_prot_ptr = NULL;
+	int rc;
 
+	/* FIXME: BIDI operation */
 	if (cmd->tvc_sgl_count) {
 		sg_ptr = cmd->tvc_sgl;
-/* FIXME: Fix BIDI operation in tcm_vhost_submission_work() */
-#if 0
-		if (se_cmd->se_cmd_flags & SCF_BIDI) {
-			sg_bidi_ptr = NULL;
-			sg_no_bidi = 0;
-		}
-#endif
+
+		if (cmd->tvc_prot_sgl_count)
+			sg_prot_ptr = cmd->tvc_prot_sgl;
+		else
+			se_cmd->prot_pto = true;
 	} else {
 		sg_ptr = NULL;
 	}
@@ -894,7 +936,7 @@
 			cmd->tvc_lun, cmd->tvc_exp_data_len,
 			cmd->tvc_task_attr, cmd->tvc_data_direction,
 			TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count,
-			sg_bidi_ptr, sg_no_bidi, NULL, 0);
+			NULL, 0, sg_prot_ptr, cmd->tvc_prot_sgl_count);
 	if (rc < 0) {
 		transport_send_check_condition_and_sense(se_cmd,
 				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
@@ -926,12 +968,18 @@
 {
 	struct tcm_vhost_tpg **vs_tpg;
 	struct virtio_scsi_cmd_req v_req;
+	struct virtio_scsi_cmd_req_pi v_req_pi;
 	struct tcm_vhost_tpg *tpg;
 	struct tcm_vhost_cmd *cmd;
-	u32 exp_data_len, data_first, data_num, data_direction;
+	u64 tag;
+	u32 exp_data_len, data_first, data_num, data_direction, prot_first;
 	unsigned out, in, i;
-	int head, ret;
-	u8 target;
+	int head, ret, data_niov, prot_niov, prot_bytes;
+	size_t req_size;
+	u16 lun;
+	u8 *target, *lunp, task_attr;
+	bool hdr_pi;
+	void *req, *cdb;
 
 	mutex_lock(&vq->mutex);
 	/*
@@ -945,7 +993,7 @@
 	vhost_disable_notify(&vs->dev, vq);
 
 	for (;;) {
-		head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
+		head = vhost_get_vq_desc(vq, vq->iov,
 					ARRAY_SIZE(vq->iov), &out, &in,
 					NULL, NULL);
 		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
@@ -962,7 +1010,7 @@
 			break;
 		}
 
-/* FIXME: BIDI operation */
+		/* FIXME: BIDI operation */
 		if (out == 1 && in == 1) {
 			data_direction = DMA_NONE;
 			data_first = 0;
@@ -992,29 +1040,38 @@
 			break;
 		}
 
-		if (unlikely(vq->iov[0].iov_len != sizeof(v_req))) {
-			vq_err(vq, "Expecting virtio_scsi_cmd_req, got %zu"
-				" bytes\n", vq->iov[0].iov_len);
+		if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) {
+			req = &v_req_pi;
+			lunp = &v_req_pi.lun[0];
+			target = &v_req_pi.lun[1];
+			req_size = sizeof(v_req_pi);
+			hdr_pi = true;
+		} else {
+			req = &v_req;
+			lunp = &v_req.lun[0];
+			target = &v_req.lun[1];
+			req_size = sizeof(v_req);
+			hdr_pi = false;
+		}
+
+		if (unlikely(vq->iov[0].iov_len < req_size)) {
+			pr_err("Expecting virtio-scsi header: %zu, got %zu\n",
+			       req_size, vq->iov[0].iov_len);
 			break;
 		}
-		pr_debug("Calling __copy_from_user: vq->iov[0].iov_base: %p,"
-			" len: %zu\n", vq->iov[0].iov_base, sizeof(v_req));
-		ret = __copy_from_user(&v_req, vq->iov[0].iov_base,
-				sizeof(v_req));
+		ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
 		if (unlikely(ret)) {
 			vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
 			break;
 		}
 
 		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
-		if (unlikely(v_req.lun[0] != 1)) {
+		if (unlikely(*lunp != 1)) {
 			vhost_scsi_send_bad_target(vs, vq, head, out);
 			continue;
 		}
 
-		/* Extract the tpgt */
-		target = v_req.lun[1];
-		tpg = ACCESS_ONCE(vs_tpg[target]);
+		tpg = ACCESS_ONCE(vs_tpg[*target]);
 
 		/* Target does not exist, fail the request */
 		if (unlikely(!tpg)) {
@@ -1022,17 +1079,79 @@
 			continue;
 		}
 
-		exp_data_len = 0;
-		for (i = 0; i < data_num; i++)
-			exp_data_len += vq->iov[data_first + i].iov_len;
+		data_niov = data_num;
+		prot_niov = prot_first = prot_bytes = 0;
+		/*
+		 * Determine if any protection information iovecs are preceeding
+		 * the actual data payload, and adjust data_first + data_niov
+		 * values accordingly for vhost_scsi_map_iov_to_sgl() below.
+		 *
+		 * Also extract virtio_scsi header bits for vhost_scsi_get_tag()
+		 */
+		if (hdr_pi) {
+			if (v_req_pi.pi_bytesout) {
+				if (data_direction != DMA_TO_DEVICE) {
+					vq_err(vq, "Received non zero do_pi_niov"
+						", but wrong data_direction\n");
+					goto err_cmd;
+				}
+				prot_bytes = v_req_pi.pi_bytesout;
+			} else if (v_req_pi.pi_bytesin) {
+				if (data_direction != DMA_FROM_DEVICE) {
+					vq_err(vq, "Received non zero di_pi_niov"
+						", but wrong data_direction\n");
+					goto err_cmd;
+				}
+				prot_bytes = v_req_pi.pi_bytesin;
+			}
+			if (prot_bytes) {
+				int tmp = 0;
 
-		cmd = vhost_scsi_get_tag(vq, tpg, &v_req,
-					 exp_data_len, data_direction);
+				for (i = 0; i < data_num; i++) {
+					tmp += vq->iov[data_first + i].iov_len;
+					prot_niov++;
+					if (tmp >= prot_bytes)
+						break;
+				}
+				prot_first = data_first;
+				data_first += prot_niov;
+				data_niov = data_num - prot_niov;
+			}
+			tag = v_req_pi.tag;
+			task_attr = v_req_pi.task_attr;
+			cdb = &v_req_pi.cdb[0];
+			lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF;
+		} else {
+			tag = v_req.tag;
+			task_attr = v_req.task_attr;
+			cdb = &v_req.cdb[0];
+			lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
+		}
+		exp_data_len = 0;
+		for (i = 0; i < data_niov; i++)
+			exp_data_len += vq->iov[data_first + i].iov_len;
+		/*
+		 * Check that the recieved CDB size does not exceeded our
+		 * hardcoded max for vhost-scsi
+		 *
+		 * TODO what if cdb was too small for varlen cdb header?
+		 */
+		if (unlikely(scsi_command_size(cdb) > TCM_VHOST_MAX_CDB_SIZE)) {
+			vq_err(vq, "Received SCSI CDB with command_size: %d that"
+				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
+				scsi_command_size(cdb), TCM_VHOST_MAX_CDB_SIZE);
+			goto err_cmd;
+		}
+
+		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
+					 exp_data_len + prot_bytes,
+					 data_direction);
 		if (IS_ERR(cmd)) {
 			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
 					PTR_ERR(cmd));
 			goto err_cmd;
 		}
+
 		pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction"
 			": %d\n", cmd, exp_data_len, data_direction);
 
@@ -1040,40 +1159,28 @@
 		cmd->tvc_vq = vq;
 		cmd->tvc_resp = vq->iov[out].iov_base;
 
-		/*
-		 * Copy in the recieved CDB descriptor into cmd->tvc_cdb
-		 * that will be used by tcm_vhost_new_cmd_map() and down into
-		 * target_setup_cmd_from_cdb()
-		 */
-		memcpy(cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE);
-		/*
-		 * Check that the recieved CDB size does not exceeded our
-		 * hardcoded max for tcm_vhost
-		 */
-		/* TODO what if cdb was too small for varlen cdb header? */
-		if (unlikely(scsi_command_size(cmd->tvc_cdb) >
-					TCM_VHOST_MAX_CDB_SIZE)) {
-			vq_err(vq, "Received SCSI CDB with command_size: %d that"
-				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
-				scsi_command_size(cmd->tvc_cdb),
-				TCM_VHOST_MAX_CDB_SIZE);
-			goto err_free;
-		}
-		cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
-
 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
 			cmd->tvc_cdb[0], cmd->tvc_lun);
 
+		if (prot_niov) {
+			ret = vhost_scsi_map_iov_to_prot(cmd,
+					&vq->iov[prot_first], prot_niov,
+					data_direction == DMA_FROM_DEVICE);
+			if (unlikely(ret)) {
+				vq_err(vq, "Failed to map iov to"
+					" prot_sgl\n");
+				goto err_free;
+			}
+		}
 		if (data_direction != DMA_NONE) {
 			ret = vhost_scsi_map_iov_to_sgl(cmd,
-					&vq->iov[data_first], data_num,
+					&vq->iov[data_first], data_niov,
 					data_direction == DMA_FROM_DEVICE);
 			if (unlikely(ret)) {
 				vq_err(vq, "Failed to map iov to sgl\n");
 				goto err_free;
 			}
 		}
-
 		/*
 		 * Save the descriptor from vhost_get_vq_desc() to be used to
 		 * complete the virtio-scsi request in TCM callback context via
@@ -1373,6 +1480,9 @@
 
 static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
 {
+	struct vhost_virtqueue *vq;
+	int i;
+
 	if (features & ~VHOST_SCSI_FEATURES)
 		return -EOPNOTSUPP;
 
@@ -1382,9 +1492,13 @@
 		mutex_unlock(&vs->dev.mutex);
 		return -EFAULT;
 	}
-	vs->dev.acked_features = features;
-	smp_wmb();
-	vhost_scsi_flush(vs);
+
+	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+		vq = &vs->vqs[i].vq;
+		mutex_lock(&vq->mutex);
+		vq->acked_features = features;
+		mutex_unlock(&vq->mutex);
+	}
 	mutex_unlock(&vs->dev.mutex);
 	return 0;
 }
@@ -1591,10 +1705,6 @@
 		return;
 
 	mutex_lock(&vs->dev.mutex);
-	if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) {
-		mutex_unlock(&vs->dev.mutex);
-		return;
-	}
 
 	if (plug)
 		reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
@@ -1603,8 +1713,9 @@
 
 	vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
 	mutex_lock(&vq->mutex);
-	tcm_vhost_send_evt(vs, tpg, lun,
-			VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
+	if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
+		tcm_vhost_send_evt(vs, tpg, lun,
+				   VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
 	mutex_unlock(&vq->mutex);
 	mutex_unlock(&vs->dev.mutex);
 }
@@ -1712,6 +1823,7 @@
 		tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
 
 		kfree(tv_cmd->tvc_sgl);
+		kfree(tv_cmd->tvc_prot_sgl);
 		kfree(tv_cmd->tvc_upages);
 	}
 }
@@ -1746,7 +1858,7 @@
 	tv_nexus->tvn_se_sess = transport_init_session_tags(
 					TCM_VHOST_DEFAULT_TAGS,
 					sizeof(struct tcm_vhost_cmd),
-					TARGET_PROT_NORMAL);
+					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
 	if (IS_ERR(tv_nexus->tvn_se_sess)) {
 		mutex_unlock(&tpg->tv_tpg_mutex);
 		kfree(tv_nexus);
@@ -1765,12 +1877,20 @@
 		}
 
 		tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
-					TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL);
+					TCM_VHOST_PREALLOC_UPAGES, GFP_KERNEL);
 		if (!tv_cmd->tvc_upages) {
 			mutex_unlock(&tpg->tv_tpg_mutex);
 			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
 			goto out;
 		}
+
+		tv_cmd->tvc_prot_sgl = kzalloc(sizeof(struct scatterlist) *
+					TCM_VHOST_PREALLOC_PROT_SGLS, GFP_KERNEL);
+		if (!tv_cmd->tvc_prot_sgl) {
+			mutex_unlock(&tpg->tv_tpg_mutex);
+			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
+			goto out;
+		}
 	}
 	/*
 	 * Since we are running in 'demo mode' this call with generate a

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index c2a54fb..d9c501e 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c

@@ -53,7 +53,7 @@
 	vhost_disable_notify(&n->dev, vq);
 
 	for (;;) {
-		head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
+		head = vhost_get_vq_desc(vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
 					 NULL, NULL);
@@ -241,15 +241,18 @@
 
 static int vhost_test_set_features(struct vhost_test *n, u64 features)
 {
+	struct vhost_virtqueue *vq;
+
 	mutex_lock(&n->dev.mutex);
 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
 	    !vhost_log_access_ok(&n->dev)) {
 		mutex_unlock(&n->dev.mutex);
 		return -EFAULT;
 	}
-	n->dev.acked_features = features;
-	smp_wmb();
-	vhost_test_flush(n);
+	vq = &n->vqs[VHOST_TEST_VQ];
+	mutex_lock(&vq->mutex);
+	vq->acked_features = features;
+	mutex_unlock(&vq->mutex);
 	mutex_unlock(&n->dev.mutex);
 	return 0;
 }

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 78987e4..c90f437 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c

@@ -18,7 +18,6 @@
 #include <linux/mmu_context.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
-#include <linux/rcupdate.h>
 #include <linux/poll.h>
 #include <linux/file.h>
 #include <linux/highmem.h>
@@ -191,6 +190,7 @@
 	vq->log_used = false;
 	vq->log_addr = -1ull;
 	vq->private_data = NULL;
+	vq->acked_features = 0;
 	vq->log_base = NULL;
 	vq->error_ctx = NULL;
 	vq->error = NULL;
@@ -198,6 +198,7 @@
 	vq->call_ctx = NULL;
 	vq->call = NULL;
 	vq->log_ctx = NULL;
+	vq->memory = NULL;
 }
 
 static int vhost_worker(void *data)
@@ -415,11 +416,18 @@
 /* Caller should have device mutex */
 void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
 {
+	int i;
+
 	vhost_dev_cleanup(dev, true);
 
 	/* Restore memory to default empty mapping. */
 	memory->nregions = 0;
-	RCU_INIT_POINTER(dev->memory, memory);
+	dev->memory = memory;
+	/* We don't need VQ locks below since vhost_dev_cleanup makes sure
+	 * VQs aren't running.
+	 */
+	for (i = 0; i < dev->nvqs; ++i)
+		dev->vqs[i]->memory = memory;
 }
 EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
 
@@ -462,10 +470,8 @@
 		fput(dev->log_file);
 	dev->log_file = NULL;
 	/* No one will access memory at this point */
-	kfree(rcu_dereference_protected(dev->memory,
-					locked ==
-						lockdep_is_held(&dev->mutex)));
-	RCU_INIT_POINTER(dev->memory, NULL);
+	kfree(dev->memory);
+	dev->memory = NULL;
 	WARN_ON(!list_empty(&dev->work_list));
 	if (dev->worker) {
 		kthread_stop(dev->worker);
@@ -524,11 +530,13 @@
 
 	for (i = 0; i < d->nvqs; ++i) {
 		int ok;
+		bool log;
+
 		mutex_lock(&d->vqs[i]->mutex);
+		log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
 		/* If ring is inactive, will check when it's enabled. */
 		if (d->vqs[i]->private_data)
-			ok = vq_memory_access_ok(d->vqs[i]->log_base, mem,
-						 log_all);
+			ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log);
 		else
 			ok = 1;
 		mutex_unlock(&d->vqs[i]->mutex);
@@ -538,12 +546,12 @@
 	return 1;
 }
 
-static int vq_access_ok(struct vhost_dev *d, unsigned int num,
+static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			struct vring_desc __user *desc,
 			struct vring_avail __user *avail,
 			struct vring_used __user *used)
 {
-	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 	return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
 	       access_ok(VERIFY_READ, avail,
 			 sizeof *avail + num * sizeof *avail->ring + s) &&
@@ -555,26 +563,19 @@
 /* Caller should have device mutex but not vq mutex */
 int vhost_log_access_ok(struct vhost_dev *dev)
 {
-	struct vhost_memory *mp;
-
-	mp = rcu_dereference_protected(dev->memory,
-				       lockdep_is_held(&dev->mutex));
-	return memory_access_ok(dev, mp, 1);
+	return memory_access_ok(dev, dev->memory, 1);
 }
 EXPORT_SYMBOL_GPL(vhost_log_access_ok);
 
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
+static int vq_log_access_ok(struct vhost_virtqueue *vq,
 			    void __user *log_base)
 {
-	struct vhost_memory *mp;
-	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 
-	mp = rcu_dereference_protected(vq->dev->memory,
-				       lockdep_is_held(&vq->mutex));
-	return vq_memory_access_ok(log_base, mp,
-			    vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
+	return vq_memory_access_ok(log_base, vq->memory,
+				   vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
 		(!vq->log_used || log_access_ok(log_base, vq->log_addr,
 					sizeof *vq->used +
 					vq->num * sizeof *vq->used->ring + s));
@@ -584,8 +585,8 @@
 /* Caller should have vq mutex and device mutex */
 int vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-	return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
-		vq_log_access_ok(vq->dev, vq, vq->log_base);
+	return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) &&
+		vq_log_access_ok(vq, vq->log_base);
 }
 EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
 
@@ -593,6 +594,7 @@
 {
 	struct vhost_memory mem, *newmem, *oldmem;
 	unsigned long size = offsetof(struct vhost_memory, regions);
+	int i;
 
 	if (copy_from_user(&mem, m, size))
 		return -EFAULT;
@@ -611,15 +613,19 @@
 		return -EFAULT;
 	}
 
-	if (!memory_access_ok(d, newmem,
-			      vhost_has_feature(d, VHOST_F_LOG_ALL))) {
+	if (!memory_access_ok(d, newmem, 0)) {
 		kfree(newmem);
 		return -EFAULT;
 	}
-	oldmem = rcu_dereference_protected(d->memory,
-					   lockdep_is_held(&d->mutex));
-	rcu_assign_pointer(d->memory, newmem);
-	synchronize_rcu();
+	oldmem = d->memory;
+	d->memory = newmem;
+
+	/* All memory accesses are done under some VQ mutex. */
+	for (i = 0; i < d->nvqs; ++i) {
+		mutex_lock(&d->vqs[i]->mutex);
+		d->vqs[i]->memory = newmem;
+		mutex_unlock(&d->vqs[i]->mutex);
+	}
 	kfree(oldmem);
 	return 0;
 }
@@ -718,7 +724,7 @@
 		 * If it is not, we don't as size might not have been setup.
 		 * We will verify when backend is configured. */
 		if (vq->private_data) {
-			if (!vq_access_ok(d, vq->num,
+			if (!vq_access_ok(vq, vq->num,
 				(void __user *)(unsigned long)a.desc_user_addr,
 				(void __user *)(unsigned long)a.avail_user_addr,
 				(void __user *)(unsigned long)a.used_user_addr)) {
@@ -858,7 +864,7 @@
 			vq = d->vqs[i];
 			mutex_lock(&vq->mutex);
 			/* If ring is inactive, will check when it's enabled. */
-			if (vq->private_data && !vq_log_access_ok(d, vq, base))
+			if (vq->private_data && !vq_log_access_ok(vq, base))
 				r = -EFAULT;
 			else
 				vq->log_base = base;
@@ -1044,7 +1050,7 @@
 }
 EXPORT_SYMBOL_GPL(vhost_init_used);
 
-static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
 			  struct iovec iov[], int iov_size)
 {
 	const struct vhost_memory_region *reg;
@@ -1053,9 +1059,7 @@
 	u64 s = 0;
 	int ret = 0;
 
-	rcu_read_lock();
-
-	mem = rcu_dereference(dev->memory);
+	mem = vq->memory;
 	while ((u64)len > s) {
 		u64 size;
 		if (unlikely(ret >= iov_size)) {
@@ -1077,7 +1081,6 @@
 		++ret;
 	}
 
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -1102,7 +1105,7 @@
 	return next;
 }
 
-static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+static int get_indirect(struct vhost_virtqueue *vq,
 			struct iovec iov[], unsigned int iov_size,
 			unsigned int *out_num, unsigned int *in_num,
 			struct vhost_log *log, unsigned int *log_num,
@@ -1121,7 +1124,7 @@
 		return -EINVAL;
 	}
 
-	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+	ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect,
 			     UIO_MAXIOV);
 	if (unlikely(ret < 0)) {
 		vq_err(vq, "Translation failure %d in indirect.\n", ret);
@@ -1161,7 +1164,7 @@
 			return -EINVAL;
 		}
 
-		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+		ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
 				     iov_size - iov_count);
 		if (unlikely(ret < 0)) {
 			vq_err(vq, "Translation failure %d indirect idx %d\n",
@@ -1198,7 +1201,7 @@
  * This function returns the descriptor number found, or vq->num (which is
  * never a valid descriptor number) if none was found.  A negative code is
  * returned on error. */
-int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 		      struct iovec iov[], unsigned int iov_size,
 		      unsigned int *out_num, unsigned int *in_num,
 		      struct vhost_log *log, unsigned int *log_num)
@@ -1272,7 +1275,7 @@
 			return -EFAULT;
 		}
 		if (desc.flags & VRING_DESC_F_INDIRECT) {
-			ret = get_indirect(dev, vq, iov, iov_size,
+			ret = get_indirect(vq, iov, iov_size,
 					   out_num, in_num,
 					   log, log_num, &desc);
 			if (unlikely(ret < 0)) {
@@ -1283,7 +1286,7 @@
 			continue;
 		}
 
-		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+		ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
 				     iov_size - iov_count);
 		if (unlikely(ret < 0)) {
 			vq_err(vq, "Translation failure %d descriptor idx %d\n",
@@ -1426,11 +1429,11 @@
 	 * interrupts. */
 	smp_mb();
 
-	if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+	if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
 	    unlikely(vq->avail_idx == vq->last_avail_idx))
 		return true;
 
-	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		__u16 flags;
 		if (__get_user(flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
@@ -1491,7 +1494,7 @@
 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
 		return false;
 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
-	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		r = vhost_update_used_flags(vq);
 		if (r) {
 			vq_err(vq, "Failed to enable notification at %p: %d\n",
@@ -1528,7 +1531,7 @@
 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
 		return;
 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
-	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		r = vhost_update_used_flags(vq);
 		if (r)
 			vq_err(vq, "Failed to enable notification at %p: %d\n",

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 35eeb2a..3eda654 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h

@@ -104,20 +104,18 @@
 	struct iovec *indirect;
 	struct vring_used_elem *heads;
 	/* Protected by virtqueue mutex. */
+	struct vhost_memory *memory;
 	void *private_data;
+	unsigned acked_features;
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
 };
 
 struct vhost_dev {
-	/* Readers use RCU to access memory table pointer
-	 * log base pointer and features.
-	 * Writers use mutex below.*/
-	struct vhost_memory __rcu *memory;
+	struct vhost_memory *memory;
 	struct mm_struct *mm;
 	struct mutex mutex;
-	unsigned acked_features;
 	struct vhost_virtqueue **vqs;
 	int nvqs;
 	struct file *log_file;
@@ -140,7 +138,7 @@
 int vhost_vq_access_ok(struct vhost_virtqueue *vq);
 int vhost_log_access_ok(struct vhost_dev *);
 
-int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
 		      unsigned int *out_num, unsigned int *in_num,
 		      struct vhost_log *log, unsigned int *log_num);
@@ -174,13 +172,8 @@
 			 (1ULL << VHOST_F_LOG_ALL),
 };
 
-static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit)
 {
-	unsigned acked_features;
-
-	/* TODO: check that we are running from vhost_worker or dev mutex is
-	 * held? */
-	acked_features = rcu_dereference_index_check(dev->acked_features, 1);
-	return acked_features & (1 << bit);
+	return vq->acked_features & (1 << bit);
 }
 #endif

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index c7b4f0f..8bf495f 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig

@@ -20,6 +20,7 @@
 source "drivers/gpu/vga/Kconfig"
 
 source "drivers/gpu/host1x/Kconfig"
+source "drivers/gpu/ipu-v3/Kconfig"
 
 menu "Direct Rendering Manager"
 source "drivers/gpu/drm/Kconfig"

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 5a3eb2e..5d44905 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig

@@ -29,7 +29,7 @@
 
 config LCD_CORGI
 	tristate "LCD Panel support for SHARP corgi/spitz model"
-	depends on SPI_MASTER && PXA_SHARPSL
+	depends on SPI_MASTER && PXA_SHARPSL && BACKLIGHT_CLASS_DEVICE
 	help
 	  Say y here to support the LCD panels usually found on SHARP
 	  corgi (C7x0) and spitz (Cxx00) models.
@@ -370,7 +370,7 @@
 
 config BACKLIGHT_LM3630A
 	tristate "Backlight Driver for LM3630A"
-	depends on BACKLIGHT_CLASS_DEVICE && I2C
+	depends on BACKLIGHT_CLASS_DEVICE && I2C && PWM
 	select REGMAP_I2C
 	help
 	  This supports TI LM3630A Backlight Driver
@@ -386,14 +386,14 @@
 
 config BACKLIGHT_LP855X
 	tristate "Backlight driver for TI LP855X"
-	depends on BACKLIGHT_CLASS_DEVICE && I2C
+	depends on BACKLIGHT_CLASS_DEVICE && I2C && PWM
 	help
 	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
 	  LP8557 backlight driver.
 
 config BACKLIGHT_LP8788
 	tristate "Backlight driver for TI LP8788 MFD"
-	depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788
+	depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788 && PWM
 	help
 	  This supports TI LP8788 backlight driver.
 

diff --git a/drivers/video/backlight/gpio_backlight.c b/drivers/video/backlight/gpio_backlight.c
index a2eba12..1cea688 100644
--- a/drivers/video/backlight/gpio_backlight.c
+++ b/drivers/video/backlight/gpio_backlight.c

@@ -38,7 +38,8 @@
 	    bl->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK))
 		brightness = 0;
 
-	gpio_set_value(gbl->gpio, brightness ? gbl->active : !gbl->active);
+	gpio_set_value_cansleep(gbl->gpio,
+				brightness ? gbl->active : !gbl->active);
 
 	return 0;
 }

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index b75201f..38ca88b 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c

@@ -10,8 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/gpio/consumer.h>
 #include <linux/gpio.h>
-#include <linux/of_gpio.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -32,8 +32,7 @@
 	unsigned int		*levels;
 	bool			enabled;
 	struct regulator	*power_supply;
-	int			enable_gpio;
-	unsigned long		enable_gpio_flags;
+	struct gpio_desc	*enable_gpio;
 	unsigned int		scale;
 	int			(*notify)(struct device *,
 					  int brightness);
@@ -54,12 +53,8 @@
 	if (err < 0)
 		dev_err(pb->dev, "failed to enable power supply\n");
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 0);
-		else
-			gpio_set_value(pb->enable_gpio, 1);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 1);
 
 	pwm_enable(pb->pwm);
 	pb->enabled = true;
@@ -73,12 +68,8 @@
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 1);
-		else
-			gpio_set_value(pb->enable_gpio, 0);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 0);
 
 	regulator_disable(pb->power_supply);
 	pb->enabled = false;
@@ -148,7 +139,6 @@
 				  struct platform_pwm_backlight_data *data)
 {
 	struct device_node *node = dev->of_node;
-	enum of_gpio_flags flags;
 	struct property *prop;
 	int length;
 	u32 value;
@@ -189,14 +179,6 @@
 		data->max_brightness--;
 	}
 
-	data->enable_gpio = of_get_named_gpio_flags(node, "enable-gpios", 0,
-						    &flags);
-	if (data->enable_gpio == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
-
-	if (gpio_is_valid(data->enable_gpio) && (flags & OF_GPIO_ACTIVE_LOW))
-		data->enable_gpio_flags |= PWM_BACKLIGHT_GPIO_ACTIVE_LOW;
-
 	return 0;
 }
 
@@ -256,8 +238,6 @@
 	} else
 		pb->scale = data->max_brightness;
 
-	pb->enable_gpio = data->enable_gpio;
-	pb->enable_gpio_flags = data->enable_gpio_flags;
 	pb->notify = data->notify;
 	pb->notify_after = data->notify_after;
 	pb->check_fb = data->check_fb;
@@ -265,26 +245,38 @@
 	pb->dev = &pdev->dev;
 	pb->enabled = false;
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		unsigned long flags;
-
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			flags = GPIOF_OUT_INIT_HIGH;
+	pb->enable_gpio = devm_gpiod_get(&pdev->dev, "enable");
+	if (IS_ERR(pb->enable_gpio)) {
+		ret = PTR_ERR(pb->enable_gpio);
+		if (ret == -ENOENT)
+			pb->enable_gpio = NULL;
 		else
-			flags = GPIOF_OUT_INIT_LOW;
+			goto err_alloc;
+	}
 
-		ret = gpio_request_one(pb->enable_gpio, flags, "enable");
+	/*
+	 * Compatibility fallback for drivers still using the integer GPIO
+	 * platform data. Must go away soon.
+	 */
+	if (!pb->enable_gpio && gpio_is_valid(data->enable_gpio)) {
+		ret = devm_gpio_request_one(&pdev->dev, data->enable_gpio,
+					    GPIOF_OUT_INIT_HIGH, "enable");
 		if (ret < 0) {
 			dev_err(&pdev->dev, "failed to request GPIO#%d: %d\n",
-				pb->enable_gpio, ret);
+				data->enable_gpio, ret);
 			goto err_alloc;
 		}
+
+		pb->enable_gpio = gpio_to_desc(data->enable_gpio);
 	}
 
+	if (pb->enable_gpio)
+		gpiod_direction_output(pb->enable_gpio, 1);
+
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");
 	if (IS_ERR(pb->power_supply)) {
 		ret = PTR_ERR(pb->power_supply);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	pb->pwm = devm_pwm_get(&pdev->dev, NULL);
@@ -295,7 +287,7 @@
 		if (IS_ERR(pb->pwm)) {
 			dev_err(&pdev->dev, "unable to request legacy PWM\n");
 			ret = PTR_ERR(pb->pwm);
-			goto err_gpio;
+			goto err_alloc;
 		}
 	}
 
@@ -304,12 +296,15 @@
 	/*
 	 * The DT case will set the pwm_period_ns field to 0 and store the
 	 * period, parsed from the DT, in the PWM device. For the non-DT case,
-	 * set the period from platform data.
+	 * set the period from platform data if it has not already been set
+	 * via the PWM lookup table.
 	 */
-	if (data->pwm_period_ns > 0)
-		pwm_set_period(pb->pwm, data->pwm_period_ns);
-
 	pb->period = pwm_get_period(pb->pwm);
+	if (!pb->period && (data->pwm_period_ns > 0)) {
+		pb->period = data->pwm_period_ns;
+		pwm_set_period(pb->pwm, data->pwm_period_ns);
+	}
+
 	pb->lth_brightness = data->lth_brightness * (pb->period / pb->scale);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
@@ -320,7 +315,7 @@
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		ret = PTR_ERR(bl);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	if (data->dft_brightness > data->max_brightness) {
@@ -336,9 +331,6 @@
 	platform_set_drvdata(pdev, bl);
 	return 0;
 
-err_gpio:
-	if (gpio_is_valid(pb->enable_gpio))
-		gpio_free(pb->enable_gpio);
 err_alloc:
 	if (data->exit)
 		data->exit(&pdev->dev);
@@ -359,6 +351,14 @@
 	return 0;
 }
 
+static void pwm_backlight_shutdown(struct platform_device *pdev)
+{
+	struct backlight_device *bl = platform_get_drvdata(pdev);
+	struct pwm_bl_data *pb = bl_get_data(bl);
+
+	pwm_backlight_power_off(pb);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int pwm_backlight_suspend(struct device *dev)
 {
@@ -404,6 +404,7 @@
 	},
 	.probe		= pwm_backlight_probe,
 	.remove		= pwm_backlight_remove,
+	.shutdown	= pwm_backlight_shutdown,
 };
 
 module_platform_driver(pwm_backlight_driver);

diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c
index 510a1bc..2d6d481 100644
--- a/drivers/video/backlight/s6e63m0.c
+++ b/drivers/video/backlight/s6e63m0.c

@@ -703,7 +703,7 @@
 	struct s6e63m0 *lcd = dev_get_drvdata(dev);
 	char temp[3];
 
-	sprintf(temp, "%d\n", lcd->gamma_table_count);
+	sprintf(temp, "%u\n", lcd->gamma_table_count);
 	strcpy(buf, temp);
 
 	return strlen(buf);

diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c
index 1501979..c2c8eb6 100644
--- a/drivers/video/fbdev/sm501fb.c
+++ b/drivers/video/fbdev/sm501fb.c

@@ -1215,7 +1215,7 @@
 }
 
 /* Prepare the device_attr for registration with sysfs later */
-static DEVICE_ATTR(crt_src, 0666, sm501fb_crtsrc_show, sm501fb_crtsrc_store);
+static DEVICE_ATTR(crt_src, 0664, sm501fb_crtsrc_show, sm501fb_crtsrc_store);
 
 /* sm501fb_show_regs
  *

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1e44362..4d08f45a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c

@@ -865,4 +865,19 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
 
+/*
+ * This should prevent the device from being used, allowing drivers to
+ * recover.  You may need to grab appropriate locks to flush.
+ */
+void virtio_break_device(struct virtio_device *dev)
+{
+	struct virtqueue *_vq;
+
+	list_for_each_entry(_vq, &dev->vqs, list) {
+		struct vring_virtqueue *vq = to_vvq(_vq);
+		vq->broken = true;
+	}
+}
+EXPORT_SYMBOL_GPL(virtio_break_device);
+
 MODULE_LICENSE("GPL");

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 74ec8fc..c845527 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig

@@ -272,7 +272,7 @@
 
 config IOP_WATCHDOG
 	tristate "IOP Watchdog"
-	depends on PLAT_IOP
+	depends on ARCH_IOP13XX
 	select WATCHDOG_NOWAYOUT if (ARCH_IOP32X || ARCH_IOP33X)
 	help
 	  Say Y here if to include support for the watchdog timer
@@ -378,6 +378,8 @@
 config IMX2_WDT
 	tristate "IMX2+ Watchdog"
 	depends on ARCH_MXC
+	select REGMAP_MMIO
+	select WATCHDOG_CORE
 	help
 	  This is the driver for the hardware watchdog
 	  on the Freescale IMX2 and later processors.
@@ -663,6 +665,19 @@
 
 	  To compile this driver as a module, choose M here.
 
+config INTEL_MID_WATCHDOG
+	tristate "Intel MID Watchdog Timer"
+	depends on X86_INTEL_MID
+	select WATCHDOG_CORE
+	---help---
+	  Watchdog timer driver built into the Intel SCU for Intel MID
+	  Platforms.
+
+	  This driver currently supports only the watchdog evolution
+	  implementation in SCU, available for Merrifield generation.
+
+	  To compile this driver as a module, choose M here.
+
 config ITCO_WDT
 	tristate "Intel TCO Timer/Watchdog"
 	depends on (X86 || IA64) && PCI
@@ -835,7 +850,7 @@
 
 config SBC8360_WDT
 	tristate "SBC8360 Watchdog Timer"
-	depends on X86
+	depends on X86_32
 	---help---
 
 	  This is the driver for the hardware watchdog on the SBC8360 Single
@@ -938,36 +953,6 @@
 
 	  Most people will say N.
 
-config W83697HF_WDT
-	tristate "W83697HF/W83697HG Watchdog Timer"
-	depends on X86
-	---help---
-	  This is the driver for the hardware watchdog on the W83697HF/HG
-	  chipset as used in Dedibox/VIA motherboards (and likely others).
-	  This watchdog simply watches your kernel to make sure it doesn't
-	  freeze, and if it does, it reboots your computer after a certain
-	  amount of time.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called w83697hf_wdt.
-
-	  Most people will say N.
-
-config W83697UG_WDT
-	tristate "W83697UG/W83697UF Watchdog Timer"
-	depends on X86
-	---help---
-	  This is the driver for the hardware watchdog on the W83697UG/UF
-	  chipset as used in MSI Fuzzy CX700 VIA motherboards (and likely others).
-	  This watchdog simply watches your kernel to make sure it doesn't
-	  freeze, and if it does, it reboots your computer after a certain
-	  amount of time.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called w83697ug_wdt.
-
-	  Most people will say N.
-
 config W83877F_WDT
 	tristate "W83877F (EMACS) Watchdog Timer"
 	depends on X86

diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 1b5f3d5..7b8a91e 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile

@@ -107,13 +107,12 @@
 obj-$(CONFIG_SMSC37B787_WDT) += smsc37b787_wdt.o
 obj-$(CONFIG_VIA_WDT) += via_wdt.o
 obj-$(CONFIG_W83627HF_WDT) += w83627hf_wdt.o
-obj-$(CONFIG_W83697HF_WDT) += w83697hf_wdt.o
-obj-$(CONFIG_W83697UG_WDT) += w83697ug_wdt.o
 obj-$(CONFIG_W83877F_WDT) += w83877f_wdt.o
 obj-$(CONFIG_W83977F_WDT) += w83977f_wdt.o
 obj-$(CONFIG_MACHZ_WDT) += machzwd.o
 obj-$(CONFIG_SBC_EPX_C3_WATCHDOG) += sbc_epx_c3.o
 obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o
+obj-$(CONFIG_INTEL_MID_WATCHDOG) += intel-mid_wdt.o
 
 # M32R Architecture
 

diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c
index 399c3fd..41ac466 100644
--- a/drivers/watchdog/ath79_wdt.c
+++ b/drivers/watchdog/ath79_wdt.c

@@ -20,6 +20,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitops.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/io.h>
@@ -90,6 +91,15 @@
 static inline void ath79_wdt_enable(void)
 {
 	ath79_wdt_keepalive();
+
+	/*
+	 * Updating the TIMER register requires a few microseconds
+	 * on the AR934x SoCs at least. Use a small delay to ensure
+	 * that the TIMER register is updated within the hardware
+	 * before enabling the watchdog.
+	 */
+	udelay(2);
+
 	ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR);
 	/* flush write */
 	ath79_wdt_rr(WDOG_REG_CTRL);
@@ -255,7 +265,7 @@
 	if (IS_ERR(wdt_clk))
 		return PTR_ERR(wdt_clk);
 
-	err = clk_enable(wdt_clk);
+	err = clk_prepare_enable(wdt_clk);
 	if (err)
 		return err;
 
@@ -286,14 +296,14 @@
 	return 0;
 
 err_clk_disable:
-	clk_disable(wdt_clk);
+	clk_disable_unprepare(wdt_clk);
 	return err;
 }
 
 static int ath79_wdt_remove(struct platform_device *pdev)
 {
 	misc_deregister(&ath79_wdt_miscdev);
-	clk_disable(wdt_clk);
+	clk_disable_unprepare(wdt_clk);
 	return 0;
 }
 

diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index a8dbceb3..08a7853 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c

@@ -41,6 +41,28 @@
 #define WDTP_MASK	(TCR_WP_MASK)
 #endif
 
+/* Checks wdt=x and wdt_period=xx command-line option */
+notrace int __init early_parse_wdt(char *p)
+{
+	if (p && strncmp(p, "0", 1) != 0)
+		booke_wdt_enabled = 1;
+
+	return 0;
+}
+early_param("wdt", early_parse_wdt);
+
+int __init early_parse_wdt_period(char *p)
+{
+	unsigned long ret;
+	if (p) {
+		if (!kstrtol(p, 0, &ret))
+			booke_wdt_period = ret;
+	}
+
+	return 0;
+}
+early_param("wdt_period", early_parse_wdt_period);
+
 #ifdef CONFIG_PPC_FSL_BOOK3E
 
 /* For the specified period, determine the number of seconds
@@ -103,17 +125,18 @@
 static void __booke_wdt_set(void *data)
 {
 	u32 val;
+	struct watchdog_device *wdog = data;
 
 	val = mfspr(SPRN_TCR);
 	val &= ~WDTP_MASK;
-	val |= WDTP(booke_wdt_period);
+	val |= WDTP(sec_to_period(wdog->timeout));
 
 	mtspr(SPRN_TCR, val);
 }
 
-static void booke_wdt_set(void)
+static void booke_wdt_set(void *data)
 {
-	on_each_cpu(__booke_wdt_set, NULL, 0);
+	on_each_cpu(__booke_wdt_set, data, 0);
 }
 
 static void __booke_wdt_ping(void *data)
@@ -131,12 +154,13 @@
 static void __booke_wdt_enable(void *data)
 {
 	u32 val;
+	struct watchdog_device *wdog = data;
 
 	/* clear status before enabling watchdog */
 	__booke_wdt_ping(NULL);
 	val = mfspr(SPRN_TCR);
 	val &= ~WDTP_MASK;
-	val |= (TCR_WIE|TCR_WRC(WRC_CHIP)|WDTP(booke_wdt_period));
+	val |= (TCR_WIE|TCR_WRC(WRC_CHIP)|WDTP(sec_to_period(wdog->timeout)));
 
 	mtspr(SPRN_TCR, val);
 }
@@ -162,25 +186,17 @@
 
 }
 
-static void __booke_wdt_start(struct watchdog_device *wdog)
-{
-	on_each_cpu(__booke_wdt_enable, NULL, 0);
-	pr_debug("watchdog enabled (timeout = %u sec)\n", wdog->timeout);
-}
-
 static int booke_wdt_start(struct watchdog_device *wdog)
 {
-	if (booke_wdt_enabled == 0) {
-		booke_wdt_enabled = 1;
-		__booke_wdt_start(wdog);
-	}
+	on_each_cpu(__booke_wdt_enable, wdog, 0);
+	pr_debug("watchdog enabled (timeout = %u sec)\n", wdog->timeout);
+
 	return 0;
 }
 
 static int booke_wdt_stop(struct watchdog_device *wdog)
 {
 	on_each_cpu(__booke_wdt_disable, NULL, 0);
-	booke_wdt_enabled = 0;
 	pr_debug("watchdog disabled\n");
 
 	return 0;
@@ -191,9 +207,8 @@
 {
 	if (timeout > MAX_WDT_TIMEOUT)
 		return -EINVAL;
-	booke_wdt_period = sec_to_period(timeout);
 	wdt_dev->timeout = timeout;
-	booke_wdt_set();
+	booke_wdt_set(wdt_dev);
 
 	return 0;
 }
@@ -231,10 +246,10 @@
 	pr_info("powerpc book-e watchdog driver loaded\n");
 	booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value;
 	booke_wdt_set_timeout(&booke_wdt_dev,
-			      period_to_sec(CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT));
+			      period_to_sec(booke_wdt_period));
 	watchdog_set_nowayout(&booke_wdt_dev, nowayout);
 	if (booke_wdt_enabled)
-		__booke_wdt_start(&booke_wdt_dev);
+		booke_wdt_start(&booke_wdt_dev);
 
 	ret = watchdog_register_device(&booke_wdt_dev);
 

diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index dd51d95..9d4874f 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c

@@ -21,19 +21,17 @@
  * Halt on suspend:	Manual		Can be automatic
  */
 
+#include <linux/clk.h>
 #include <linux/init.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
-#include <linux/watchdog.h>
-#include <linux/clk.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
+#include <linux/regmap.h>
 #include <linux/timer.h>
-#include <linux/jiffies.h>
+#include <linux/watchdog.h>
 
 #define DRIVER_NAME "imx2-wdt"
 
@@ -55,19 +53,12 @@
 
 #define WDOG_SEC_TO_COUNT(s)	((s * 2 - 1) << 8)
 
-#define IMX2_WDT_STATUS_OPEN	0
-#define IMX2_WDT_STATUS_STARTED	1
-#define IMX2_WDT_EXPECT_CLOSE	2
-
-static struct {
+struct imx2_wdt_device {
 	struct clk *clk;
-	void __iomem *base;
-	unsigned timeout;
-	unsigned long status;
+	struct regmap *regmap;
 	struct timer_list timer;	/* Pings the watchdog when closed */
-} imx2_wdt;
-
-static struct miscdevice imx2_wdt_miscdev;
+	struct watchdog_device wdog;
+};
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
@@ -85,9 +76,12 @@
 	.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE,
 };
 
-static inline void imx2_wdt_setup(void)
+static inline void imx2_wdt_setup(struct watchdog_device *wdog)
 {
-	u16 val = __raw_readw(imx2_wdt.base + IMX2_WDT_WCR);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+	u32 val;
+
+	regmap_read(wdev->regmap, IMX2_WDT_WCR, &val);
 
 	/* Suspend timer in low power mode, write once-only */
 	val |= IMX2_WDT_WCR_WDZST;
@@ -98,227 +92,199 @@
 	/* Keep Watchdog Disabled */
 	val &= ~IMX2_WDT_WCR_WDE;
 	/* Set the watchdog's Time-Out value */
-	val |= WDOG_SEC_TO_COUNT(imx2_wdt.timeout);
+	val |= WDOG_SEC_TO_COUNT(wdog->timeout);
 
-	__raw_writew(val, imx2_wdt.base + IMX2_WDT_WCR);
+	regmap_write(wdev->regmap, IMX2_WDT_WCR, val);
 
 	/* enable the watchdog */
 	val |= IMX2_WDT_WCR_WDE;
-	__raw_writew(val, imx2_wdt.base + IMX2_WDT_WCR);
+	regmap_write(wdev->regmap, IMX2_WDT_WCR, val);
 }
 
-static inline void imx2_wdt_ping(void)
+static inline bool imx2_wdt_is_running(struct imx2_wdt_device *wdev)
 {
-	__raw_writew(IMX2_WDT_SEQ1, imx2_wdt.base + IMX2_WDT_WSR);
-	__raw_writew(IMX2_WDT_SEQ2, imx2_wdt.base + IMX2_WDT_WSR);
+	u32 val;
+
+	regmap_read(wdev->regmap, IMX2_WDT_WCR, &val);
+
+	return val & IMX2_WDT_WCR_WDE;
+}
+
+static int imx2_wdt_ping(struct watchdog_device *wdog)
+{
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	regmap_write(wdev->regmap, IMX2_WDT_WSR, IMX2_WDT_SEQ1);
+	regmap_write(wdev->regmap, IMX2_WDT_WSR, IMX2_WDT_SEQ2);
+	return 0;
 }
 
 static void imx2_wdt_timer_ping(unsigned long arg)
 {
-	/* ping it every imx2_wdt.timeout / 2 seconds to prevent reboot */
-	imx2_wdt_ping();
-	mod_timer(&imx2_wdt.timer, jiffies + imx2_wdt.timeout * HZ / 2);
+	struct watchdog_device *wdog = (struct watchdog_device *)arg;
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	/* ping it every wdog->timeout / 2 seconds to prevent reboot */
+	imx2_wdt_ping(wdog);
+	mod_timer(&wdev->timer, jiffies + wdog->timeout * HZ / 2);
 }
 
-static void imx2_wdt_start(void)
+static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
+				unsigned int new_timeout)
 {
-	if (!test_and_set_bit(IMX2_WDT_STATUS_STARTED, &imx2_wdt.status)) {
-		/* at our first start we enable clock and do initialisations */
-		clk_prepare_enable(imx2_wdt.clk);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-		imx2_wdt_setup();
-	} else	/* delete the timer that pings the watchdog after close */
-		del_timer_sync(&imx2_wdt.timer);
-
-	/* Watchdog is enabled - time to reload the timeout value */
-	imx2_wdt_ping();
-}
-
-static void imx2_wdt_stop(void)
-{
-	/* we don't need a clk_disable, it cannot be disabled once started.
-	 * We use a timer to ping the watchdog while /dev/watchdog is closed */
-	imx2_wdt_timer_ping(0);
-}
-
-static void imx2_wdt_set_timeout(int new_timeout)
-{
-	u16 val = __raw_readw(imx2_wdt.base + IMX2_WDT_WCR);
-
-	/* set the new timeout value in the WSR */
-	val &= ~IMX2_WDT_WCR_WT;
-	val |= WDOG_SEC_TO_COUNT(new_timeout);
-	__raw_writew(val, imx2_wdt.base + IMX2_WDT_WCR);
-}
-
-static int imx2_wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(IMX2_WDT_STATUS_OPEN, &imx2_wdt.status))
-		return -EBUSY;
-
-	imx2_wdt_start();
-	return nonseekable_open(inode, file);
-}
-
-static int imx2_wdt_close(struct inode *inode, struct file *file)
-{
-	if (test_bit(IMX2_WDT_EXPECT_CLOSE, &imx2_wdt.status) && !nowayout)
-		imx2_wdt_stop();
-	else {
-		dev_crit(imx2_wdt_miscdev.parent,
-			"Unexpected close: Expect reboot!\n");
-		imx2_wdt_ping();
-	}
-
-	clear_bit(IMX2_WDT_EXPECT_CLOSE, &imx2_wdt.status);
-	clear_bit(IMX2_WDT_STATUS_OPEN, &imx2_wdt.status);
+	regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT,
+			   WDOG_SEC_TO_COUNT(new_timeout));
 	return 0;
 }
 
-static long imx2_wdt_ioctl(struct file *file, unsigned int cmd,
-							unsigned long arg)
+static int imx2_wdt_start(struct watchdog_device *wdog)
 {
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_value;
-	u16 val;
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(argp, &imx2_wdt_info,
-			sizeof(struct watchdog_info)) ? -EFAULT : 0;
+	if (imx2_wdt_is_running(wdev)) {
+		/* delete the timer that pings the watchdog after close */
+		del_timer_sync(&wdev->timer);
+		imx2_wdt_set_timeout(wdog, wdog->timeout);
+	} else
+		imx2_wdt_setup(wdog);
 
-	case WDIOC_GETSTATUS:
-		return put_user(0, p);
+	return imx2_wdt_ping(wdog);
+}
 
-	case WDIOC_GETBOOTSTATUS:
-		val = __raw_readw(imx2_wdt.base + IMX2_WDT_WRSR);
-		new_value = val & IMX2_WDT_WRSR_TOUT ? WDIOF_CARDRESET : 0;
-		return put_user(new_value, p);
+static int imx2_wdt_stop(struct watchdog_device *wdog)
+{
+	/*
+	 * We don't need a clk_disable, it cannot be disabled once started.
+	 * We use a timer to ping the watchdog while /dev/watchdog is closed
+	 */
+	imx2_wdt_timer_ping((unsigned long)wdog);
+	return 0;
+}
 
-	case WDIOC_KEEPALIVE:
-		imx2_wdt_ping();
-		return 0;
+static inline void imx2_wdt_ping_if_active(struct watchdog_device *wdog)
+{
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_value, p))
-			return -EFAULT;
-		if ((new_value < 1) || (new_value > IMX2_WDT_MAX_TIME))
-			return -EINVAL;
-		imx2_wdt_set_timeout(new_value);
-		imx2_wdt.timeout = new_value;
-		imx2_wdt_ping();
-
-		/* Fallthrough to return current value */
-	case WDIOC_GETTIMEOUT:
-		return put_user(imx2_wdt.timeout, p);
-
-	default:
-		return -ENOTTY;
+	if (imx2_wdt_is_running(wdev)) {
+		imx2_wdt_set_timeout(wdog, wdog->timeout);
+		imx2_wdt_timer_ping((unsigned long)wdog);
 	}
 }
 
-static ssize_t imx2_wdt_write(struct file *file, const char __user *data,
-						size_t len, loff_t *ppos)
-{
-	size_t i;
-	char c;
-
-	if (len == 0)	/* Can we see this even ? */
-		return 0;
-
-	clear_bit(IMX2_WDT_EXPECT_CLOSE, &imx2_wdt.status);
-	/* scan to see whether or not we got the magic character */
-	for (i = 0; i != len; i++) {
-		if (get_user(c, data + i))
-			return -EFAULT;
-		if (c == 'V')
-			set_bit(IMX2_WDT_EXPECT_CLOSE, &imx2_wdt.status);
-	}
-
-	imx2_wdt_ping();
-	return len;
-}
-
-static const struct file_operations imx2_wdt_fops = {
+static struct watchdog_ops imx2_wdt_ops = {
 	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.unlocked_ioctl = imx2_wdt_ioctl,
-	.open = imx2_wdt_open,
-	.release = imx2_wdt_close,
-	.write = imx2_wdt_write,
+	.start = imx2_wdt_start,
+	.stop = imx2_wdt_stop,
+	.ping = imx2_wdt_ping,
+	.set_timeout = imx2_wdt_set_timeout,
 };
 
-static struct miscdevice imx2_wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &imx2_wdt_fops,
+static struct regmap_config imx2_wdt_regmap_config = {
+	.reg_bits = 16,
+	.reg_stride = 2,
+	.val_bits = 16,
+	.max_register = 0x8,
 };
 
 static int __init imx2_wdt_probe(struct platform_device *pdev)
 {
-	int ret;
+	struct imx2_wdt_device *wdev;
+	struct watchdog_device *wdog;
 	struct resource *res;
+	void __iomem *base;
+	int ret;
+	u32 val;
+
+	wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL);
+	if (!wdev)
+		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	imx2_wdt.base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(imx2_wdt.base))
-		return PTR_ERR(imx2_wdt.base);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
-	imx2_wdt.clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(imx2_wdt.clk)) {
-		dev_err(&pdev->dev, "can't get Watchdog clock\n");
-		return PTR_ERR(imx2_wdt.clk);
+	wdev->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+						 &imx2_wdt_regmap_config);
+	if (IS_ERR(wdev->regmap)) {
+		dev_err(&pdev->dev, "regmap init failed\n");
+		return PTR_ERR(wdev->regmap);
 	}
 
-	imx2_wdt.timeout = clamp_t(unsigned, timeout, 1, IMX2_WDT_MAX_TIME);
-	if (imx2_wdt.timeout != timeout)
-		dev_warn(&pdev->dev, "Initial timeout out of range! "
-			"Clamped from %u to %u\n", timeout, imx2_wdt.timeout);
+	wdev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(wdev->clk)) {
+		dev_err(&pdev->dev, "can't get Watchdog clock\n");
+		return PTR_ERR(wdev->clk);
+	}
 
-	setup_timer(&imx2_wdt.timer, imx2_wdt_timer_ping, 0);
+	wdog			= &wdev->wdog;
+	wdog->info		= &imx2_wdt_info;
+	wdog->ops		= &imx2_wdt_ops;
+	wdog->min_timeout	= 1;
+	wdog->max_timeout	= IMX2_WDT_MAX_TIME;
 
-	imx2_wdt_miscdev.parent = &pdev->dev;
-	ret = misc_register(&imx2_wdt_miscdev);
-	if (ret)
-		goto fail;
+	clk_prepare_enable(wdev->clk);
 
-	dev_info(&pdev->dev,
-		"IMX2+ Watchdog Timer enabled. timeout=%ds (nowayout=%d)\n",
-						imx2_wdt.timeout, nowayout);
+	regmap_read(wdev->regmap, IMX2_WDT_WRSR, &val);
+	wdog->bootstatus = val & IMX2_WDT_WRSR_TOUT ? WDIOF_CARDRESET : 0;
+
+	wdog->timeout = clamp_t(unsigned, timeout, 1, IMX2_WDT_MAX_TIME);
+	if (wdog->timeout != timeout)
+		dev_warn(&pdev->dev, "Initial timeout out of range! Clamped from %u to %u\n",
+			 timeout, wdog->timeout);
+
+	platform_set_drvdata(pdev, wdog);
+	watchdog_set_drvdata(wdog, wdev);
+	watchdog_set_nowayout(wdog, nowayout);
+	watchdog_init_timeout(wdog, timeout, &pdev->dev);
+
+	setup_timer(&wdev->timer, imx2_wdt_timer_ping, (unsigned long)wdog);
+
+	imx2_wdt_ping_if_active(wdog);
+
+	ret = watchdog_register_device(wdog);
+	if (ret) {
+		dev_err(&pdev->dev, "cannot register watchdog device\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "timeout %d sec (nowayout=%d)\n",
+		 wdog->timeout, nowayout);
+
 	return 0;
-
-fail:
-	imx2_wdt_miscdev.parent = NULL;
-	return ret;
 }
 
 static int __exit imx2_wdt_remove(struct platform_device *pdev)
 {
-	misc_deregister(&imx2_wdt_miscdev);
+	struct watchdog_device *wdog = platform_get_drvdata(pdev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-	if (test_bit(IMX2_WDT_STATUS_STARTED, &imx2_wdt.status)) {
-		del_timer_sync(&imx2_wdt.timer);
+	watchdog_unregister_device(wdog);
 
-		dev_crit(imx2_wdt_miscdev.parent,
-			"Device removed: Expect reboot!\n");
+	if (imx2_wdt_is_running(wdev)) {
+		del_timer_sync(&wdev->timer);
+		imx2_wdt_ping(wdog);
+		dev_crit(&pdev->dev, "Device removed: Expect reboot!\n");
 	}
-
-	imx2_wdt_miscdev.parent = NULL;
 	return 0;
 }
 
 static void imx2_wdt_shutdown(struct platform_device *pdev)
 {
-	if (test_bit(IMX2_WDT_STATUS_STARTED, &imx2_wdt.status)) {
-		/* we are running, we need to delete the timer but will give
-		 * max timeout before reboot will take place */
-		del_timer_sync(&imx2_wdt.timer);
-		imx2_wdt_set_timeout(IMX2_WDT_MAX_TIME);
-		imx2_wdt_ping();
+	struct watchdog_device *wdog = platform_get_drvdata(pdev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-		dev_crit(imx2_wdt_miscdev.parent,
-			"Device shutdown: Expect reboot!\n");
+	if (imx2_wdt_is_running(wdev)) {
+		/*
+		 * We are running, we need to delete the timer but will
+		 * give max timeout before reboot will take place
+		 */
+		del_timer_sync(&wdev->timer);
+		imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
+		imx2_wdt_ping(wdog);
+		dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n");
 	}
 }
 

diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
new file mode 100644
index 0000000..ca66e8e
--- /dev/null
+++ b/drivers/watchdog/intel-mid_wdt.c

@@ -0,0 +1,184 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Platforms supported so far:
+ *      - Merrifield only
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+
+#include <asm/intel_scu_ipc.h>
+#include <asm/intel-mid.h>
+
+#define IPC_WATCHDOG 0xf8
+
+#define MID_WDT_PRETIMEOUT		15
+#define MID_WDT_TIMEOUT_MIN		(1 + MID_WDT_PRETIMEOUT)
+#define MID_WDT_TIMEOUT_MAX		170
+#define MID_WDT_DEFAULT_TIMEOUT		90
+
+/* SCU watchdog messages */
+enum {
+	SCU_WATCHDOG_START = 0,
+	SCU_WATCHDOG_STOP,
+	SCU_WATCHDOG_KEEPALIVE,
+};
+
+static inline int wdt_command(int sub, u32 *in, int inlen)
+{
+	return intel_scu_ipc_command(IPC_WATCHDOG, sub, in, inlen, NULL, 0);
+}
+
+static int wdt_start(struct watchdog_device *wd)
+{
+	int ret, in_size;
+	int timeout = wd->timeout;
+	struct ipc_wd_start {
+		u32 pretimeout;
+		u32 timeout;
+	} ipc_wd_start = { timeout - MID_WDT_PRETIMEOUT, timeout };
+
+	/*
+	 * SCU expects the input size for watchdog IPC to
+	 * be based on 4 bytes
+	 */
+	in_size = DIV_ROUND_UP(sizeof(ipc_wd_start), 4);
+
+	ret = wdt_command(SCU_WATCHDOG_START, (u32 *)&ipc_wd_start, in_size);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "error starting watchdog: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_ping(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_KEEPALIVE, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error executing keepalive: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_stop(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_STOP, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error stopping watchdog: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static irqreturn_t mid_wdt_irq(int irq, void *dev_id)
+{
+	panic("Kernel Watchdog");
+
+	/* This code should not be reached */
+	return IRQ_HANDLED;
+}
+
+static const struct watchdog_info mid_wdt_info = {
+	.identity = "Intel MID SCU watchdog",
+	.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+};
+
+static const struct watchdog_ops mid_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.ping = wdt_ping,
+};
+
+static int mid_wdt_probe(struct platform_device *pdev)
+{
+	struct watchdog_device *wdt_dev;
+	struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -EINVAL;
+	}
+
+	if (pdata->probe) {
+		ret = pdata->probe(pdev);
+		if (ret)
+			return ret;
+	}
+
+	wdt_dev = devm_kzalloc(&pdev->dev, sizeof(*wdt_dev), GFP_KERNEL);
+	if (!wdt_dev)
+		return -ENOMEM;
+
+	wdt_dev->info = &mid_wdt_info;
+	wdt_dev->ops = &mid_wdt_ops;
+	wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN;
+	wdt_dev->max_timeout = MID_WDT_TIMEOUT_MAX;
+	wdt_dev->timeout = MID_WDT_DEFAULT_TIMEOUT;
+
+	watchdog_set_drvdata(wdt_dev, &pdev->dev);
+	platform_set_drvdata(pdev, wdt_dev);
+
+	ret = devm_request_irq(&pdev->dev, pdata->irq, mid_wdt_irq,
+			       IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog",
+			       wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error requesting warning irq %d\n",
+			pdata->irq);
+		return ret;
+	}
+
+	ret = watchdog_register_device(wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error registering watchdog device\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Intel MID watchdog device probed\n");
+
+	return 0;
+}
+
+static int mid_wdt_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wd = platform_get_drvdata(pdev);
+	watchdog_unregister_device(wd);
+	return 0;
+}
+
+static struct platform_driver mid_wdt_driver = {
+	.probe		= mid_wdt_probe,
+	.remove		= mid_wdt_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "intel_mid_wdt",
+	},
+};
+
+module_platform_driver(mid_wdt_driver);
+
+MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
+MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
+MODULE_LICENSE("GPL");

diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 20dc738..d9c1a16 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c

@@ -162,7 +162,7 @@
 	kempld_get_mutex(pld);
 	stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id));
 	stage_cfg &= ~STAGE_CFG_PRESCALER_MASK;
-	stage_cfg |= STAGE_CFG_SET_PRESCALER(prescaler);
+	stage_cfg |= STAGE_CFG_SET_PRESCALER(PRESCALER_21);
 	kempld_write8(pld, KEMPLD_WDT_STAGE_CFG(stage->id), stage_cfg);
 	kempld_write32(pld, KEMPLD_WDT_STAGE_TIMEOUT(stage->id),
 			stage_timeout);

diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 57ccae8..1e6e28d 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c

@@ -225,7 +225,7 @@
 }
 
 /* Match table for of_platform binding */
-static struct of_device_id xwdt_of_match[] = {
+static const struct of_device_id xwdt_of_match[] = {
 	{ .compatible = "xlnx,xps-timebase-wdt-1.00.a", },
 	{ .compatible = "xlnx,xps-timebase-wdt-1.01.a", },
 	{},

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 9b3c41d..00d0741 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c

@@ -55,15 +55,19 @@
 	int wdt_counter_offset;
 	int wdt_enable_bit;
 	int rstout_enable_bit;
+	int rstout_mask_bit;
 	int (*clock_init)(struct platform_device *,
 			  struct orion_watchdog *);
+	int (*enabled)(struct orion_watchdog *);
 	int (*start)(struct watchdog_device *);
+	int (*stop)(struct watchdog_device *);
 };
 
 struct orion_watchdog {
 	struct watchdog_device wdt;
 	void __iomem *reg;
 	void __iomem *rstout;
+	void __iomem *rstout_mask;
 	unsigned long clk_rate;
 	struct clk *clk;
 	const struct orion_watchdog_data *data;
@@ -142,9 +146,10 @@
 	return 0;
 }
 
-static int armada370_start(struct watchdog_device *wdt_dev)
+static int armada375_start(struct watchdog_device *wdt_dev)
 {
 	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+	u32 reg;
 
 	/* Set watchdog duration */
 	writel(dev->clk_rate * wdt_dev->timeout,
@@ -157,8 +162,35 @@
 	atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit,
 						dev->data->wdt_enable_bit);
 
-	atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
-				      dev->data->rstout_enable_bit);
+	/* Enable reset on watchdog */
+	reg = readl(dev->rstout);
+	reg |= dev->data->rstout_enable_bit;
+	writel(reg, dev->rstout);
+
+	atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, 0);
+	return 0;
+}
+
+static int armada370_start(struct watchdog_device *wdt_dev)
+{
+	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+	u32 reg;
+
+	/* Set watchdog duration */
+	writel(dev->clk_rate * wdt_dev->timeout,
+	       dev->reg + dev->data->wdt_counter_offset);
+
+	/* Clear the watchdog expiration bit */
+	atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0);
+
+	/* Enable watchdog timer */
+	atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit,
+						dev->data->wdt_enable_bit);
+
+	/* Enable reset on watchdog */
+	reg = readl(dev->rstout);
+	reg |= dev->data->rstout_enable_bit;
+	writel(reg, dev->rstout);
 	return 0;
 }
 
@@ -189,7 +221,7 @@
 	return dev->data->start(wdt_dev);
 }
 
-static int orion_wdt_stop(struct watchdog_device *wdt_dev)
+static int orion_stop(struct watchdog_device *wdt_dev)
 {
 	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
 
@@ -202,7 +234,48 @@
 	return 0;
 }
 
-static int orion_wdt_enabled(struct orion_watchdog *dev)
+static int armada375_stop(struct watchdog_device *wdt_dev)
+{
+	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+	u32 reg;
+
+	/* Disable reset on watchdog */
+	atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit,
+					   dev->data->rstout_mask_bit);
+	reg = readl(dev->rstout);
+	reg &= ~dev->data->rstout_enable_bit;
+	writel(reg, dev->rstout);
+
+	/* Disable watchdog timer */
+	atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0);
+
+	return 0;
+}
+
+static int armada370_stop(struct watchdog_device *wdt_dev)
+{
+	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+	u32 reg;
+
+	/* Disable reset on watchdog */
+	reg = readl(dev->rstout);
+	reg &= ~dev->data->rstout_enable_bit;
+	writel(reg, dev->rstout);
+
+	/* Disable watchdog timer */
+	atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0);
+
+	return 0;
+}
+
+static int orion_wdt_stop(struct watchdog_device *wdt_dev)
+{
+	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+
+	return dev->data->stop(wdt_dev);
+}
+
+static int orion_enabled(struct orion_watchdog *dev)
 {
 	bool enabled, running;
 
@@ -212,6 +285,24 @@
 	return enabled && running;
 }
 
+static int armada375_enabled(struct orion_watchdog *dev)
+{
+	bool masked, enabled, running;
+
+	masked = readl(dev->rstout_mask) & dev->data->rstout_mask_bit;
+	enabled = readl(dev->rstout) & dev->data->rstout_enable_bit;
+	running = readl(dev->reg + TIMER_CTRL) & dev->data->wdt_enable_bit;
+
+	return !masked && enabled && running;
+}
+
+static int orion_wdt_enabled(struct watchdog_device *wdt_dev)
+{
+	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
+
+	return dev->data->enabled(dev);
+}
+
 static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev)
 {
 	struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
@@ -262,10 +353,6 @@
 		return devm_ioremap(&pdev->dev, res->start,
 				    resource_size(res));
 
-	/* This workaround works only for "orion-wdt", DT-enabled */
-	if (!of_device_is_compatible(pdev->dev.of_node, "marvell,orion-wdt"))
-		return NULL;
-
 	rstout = internal_regs + ORION_RSTOUT_MASK_OFFSET;
 
 	WARN(1, FW_BUG "falling back to harcoded RSTOUT reg %pa\n", &rstout);
@@ -277,7 +364,9 @@
 	.wdt_enable_bit = BIT(4),
 	.wdt_counter_offset = 0x24,
 	.clock_init = orion_wdt_clock_init,
+	.enabled = orion_enabled,
 	.start = orion_start,
+	.stop = orion_stop,
 };
 
 static const struct orion_watchdog_data armada370_data = {
@@ -285,7 +374,9 @@
 	.wdt_enable_bit = BIT(8),
 	.wdt_counter_offset = 0x34,
 	.clock_init = armada370_wdt_clock_init,
+	.enabled = orion_enabled,
 	.start = armada370_start,
+	.stop = armada370_stop,
 };
 
 static const struct orion_watchdog_data armadaxp_data = {
@@ -293,7 +384,31 @@
 	.wdt_enable_bit = BIT(8),
 	.wdt_counter_offset = 0x34,
 	.clock_init = armadaxp_wdt_clock_init,
+	.enabled = orion_enabled,
 	.start = armada370_start,
+	.stop = armada370_stop,
+};
+
+static const struct orion_watchdog_data armada375_data = {
+	.rstout_enable_bit = BIT(8),
+	.rstout_mask_bit = BIT(10),
+	.wdt_enable_bit = BIT(8),
+	.wdt_counter_offset = 0x34,
+	.clock_init = armada370_wdt_clock_init,
+	.enabled = armada375_enabled,
+	.start = armada375_start,
+	.stop = armada375_stop,
+};
+
+static const struct orion_watchdog_data armada380_data = {
+	.rstout_enable_bit = BIT(8),
+	.rstout_mask_bit = BIT(10),
+	.wdt_enable_bit = BIT(8),
+	.wdt_counter_offset = 0x34,
+	.clock_init = armadaxp_wdt_clock_init,
+	.enabled = armada375_enabled,
+	.start = armada375_start,
+	.stop = armada375_stop,
 };
 
 static const struct of_device_id orion_wdt_of_match_table[] = {
@@ -309,16 +424,78 @@
 		.compatible = "marvell,armada-xp-wdt",
 		.data = &armadaxp_data,
 	},
+	{
+		.compatible = "marvell,armada-375-wdt",
+		.data = &armada375_data,
+	},
+	{
+		.compatible = "marvell,armada-380-wdt",
+		.data = &armada380_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, orion_wdt_of_match_table);
 
+static int orion_wdt_get_regs(struct platform_device *pdev,
+			      struct orion_watchdog *dev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	dev->reg = devm_ioremap(&pdev->dev, res->start,
+				resource_size(res));
+	if (!dev->reg)
+		return -ENOMEM;
+
+	/* Each supported compatible has some RSTOUT register quirk */
+	if (of_device_is_compatible(node, "marvell,orion-wdt")) {
+
+		dev->rstout = orion_wdt_ioremap_rstout(pdev, res->start &
+						       INTERNAL_REGS_MASK);
+		if (!dev->rstout)
+			return -ENODEV;
+
+	} else if (of_device_is_compatible(node, "marvell,armada-370-wdt") ||
+		   of_device_is_compatible(node, "marvell,armada-xp-wdt")) {
+
+		/* Dedicated RSTOUT register, can be requested. */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		dev->rstout = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(dev->rstout))
+			return PTR_ERR(dev->rstout);
+
+	} else if (of_device_is_compatible(node, "marvell,armada-375-wdt") ||
+		   of_device_is_compatible(node, "marvell,armada-380-wdt")) {
+
+		/* Dedicated RSTOUT register, can be requested. */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		dev->rstout = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(dev->rstout))
+			return PTR_ERR(dev->rstout);
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+		if (!res)
+			return -ENODEV;
+		dev->rstout_mask = devm_ioremap(&pdev->dev, res->start,
+						resource_size(res));
+		if (!dev->rstout_mask)
+			return -ENOMEM;
+
+	} else {
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 static int orion_wdt_probe(struct platform_device *pdev)
 {
 	struct orion_watchdog *dev;
 	const struct of_device_id *match;
 	unsigned int wdt_max_duration;	/* (seconds) */
-	struct resource *res;
 	int ret, irq;
 
 	dev = devm_kzalloc(&pdev->dev, sizeof(struct orion_watchdog),
@@ -336,19 +513,9 @@
 	dev->wdt.min_timeout = 1;
 	dev->data = match->data;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	dev->reg = devm_ioremap(&pdev->dev, res->start,
-			       resource_size(res));
-	if (!dev->reg)
-		return -ENOMEM;
-
-	dev->rstout = orion_wdt_ioremap_rstout(pdev, res->start &
-						     INTERNAL_REGS_MASK);
-	if (!dev->rstout)
-		return -ENODEV;
+	ret = orion_wdt_get_regs(pdev, dev);
+	if (ret)
+		return ret;
 
 	ret = dev->data->clock_init(pdev, dev);
 	if (ret) {
@@ -371,7 +538,7 @@
 	 * removed and re-insterted, or if the bootloader explicitly
 	 * set a running watchdog before booting the kernel.
 	 */
-	if (!orion_wdt_enabled(dev))
+	if (!orion_wdt_enabled(&dev->wdt))
 		orion_wdt_stop(&dev->wdt);
 
 	/* Request the IRQ only after the watchdog is disabled */

diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c
index d04d02b..061756e 100644
--- a/drivers/watchdog/shwdt.c
+++ b/drivers/watchdog/shwdt.c

@@ -282,8 +282,6 @@
 	wdt->timer.data		= (unsigned long)wdt;
 	wdt->timer.expires	= next_ping_period(clock_division_ratio);
 
-	platform_set_drvdata(pdev, wdt);
-
 	dev_info(&pdev->dev, "initialized.\n");
 
 	pm_runtime_enable(&pdev->dev);

diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 47629d2..c1b03f4 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c

@@ -59,7 +59,6 @@
  * @adev: amba device structure of wdt
  * @status: current status of wdt
  * @load_val: load value to be set for current timeout
- * @timeout: current programmed timeout
  */
 struct sp805_wdt {
 	struct watchdog_device		wdd;
@@ -68,7 +67,6 @@
 	struct clk			*clk;
 	struct amba_device		*adev;
 	unsigned int			load_val;
-	unsigned int			timeout;
 };
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -98,7 +96,7 @@
 	spin_lock(&wdt->lock);
 	wdt->load_val = load;
 	/* roundup timeout to closest positive integer value */
-	wdt->timeout = div_u64((load + 1) * 2 + (rate / 2), rate);
+	wdd->timeout = div_u64((load + 1) * 2 + (rate / 2), rate);
 	spin_unlock(&wdt->lock);
 
 	return 0;

diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index cd00a78..693b9d2 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c

@@ -57,17 +57,17 @@
  */
 
 static const int wdt_timeout_map[] = {
-	[1] = 0b0001,  /* 1s  */
-	[2] = 0b0010,  /* 2s  */
-	[3] = 0b0011,  /* 3s  */
-	[4] = 0b0100,  /* 4s  */
-	[5] = 0b0101,  /* 5s  */
-	[6] = 0b0110,  /* 6s  */
-	[8] = 0b0111,  /* 8s  */
-	[10] = 0b1000, /* 10s */
-	[12] = 0b1001, /* 12s */
-	[14] = 0b1010, /* 14s */
-	[16] = 0b1011, /* 16s */
+	[1] = 0x1,  /* 1s  */
+	[2] = 0x2,  /* 2s  */
+	[3] = 0x3,  /* 3s  */
+	[4] = 0x4,  /* 4s  */
+	[5] = 0x5,  /* 5s  */
+	[6] = 0x6,  /* 6s  */
+	[8] = 0x7,  /* 8s  */
+	[10] = 0x8, /* 10s */
+	[12] = 0x9, /* 12s */
+	[14] = 0xA, /* 14s */
+	[16] = 0xB, /* 16s */
 };
 
 static int sunxi_wdt_ping(struct watchdog_device *wdt_dev)

diff --git a/drivers/watchdog/via_wdt.c b/drivers/watchdog/via_wdt.c
index d2cd9f0..56369c4 100644
--- a/drivers/watchdog/via_wdt.c
+++ b/drivers/watchdog/via_wdt.c

@@ -232,7 +232,7 @@
 static void wdt_remove(struct pci_dev *pdev)
 {
 	watchdog_unregister_device(&wdt_dev);
-	del_timer(&timer);
+	del_timer_sync(&timer);
 	iounmap(wdt_mem);
 	release_mem_region(mmio, VIA_WDT_MMIO_LEN);
 	release_resource(&wdt_res);

diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index b1da0c1..7165704 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c

@@ -64,6 +64,10 @@
 		"Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+static int early_disable;
+module_param(early_disable, int, 0);
+MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)");
+
 /*
  *	Kernel methods.
  */
@@ -208,9 +212,14 @@
 
 	t = superio_inb(cr_wdt_timeout);
 	if (t != 0) {
-		pr_info("Watchdog already running. Resetting timeout to %d sec\n",
-			wdog->timeout);
-		superio_outb(cr_wdt_timeout, wdog->timeout);
+		if (early_disable) {
+			pr_warn("Stopping previously enabled watchdog until userland kicks in\n");
+			superio_outb(cr_wdt_timeout, 0);
+		} else {
+			pr_info("Watchdog already running. Resetting timeout to %d sec\n",
+				wdog->timeout);
+			superio_outb(cr_wdt_timeout, wdog->timeout);
+		}
 	}
 
 	/* set second mode & disable keyboard turning off watchdog */

diff --git a/drivers/watchdog/w83697hf_wdt.c b/drivers/watchdog/w83697hf_wdt.c
deleted file mode 100644
index e9ea856..0000000
--- a/drivers/watchdog/w83697hf_wdt.c
+++ /dev/null

@@ -1,460 +0,0 @@
-/*
- *	w83697hf/hg WDT driver
- *
- *	(c) Copyright 2006 Samuel Tardieu <sam@rfc1149.net>
- *	(c) Copyright 2006 Marcus Junker <junker@anduras.de>
- *
- *	Based on w83627hf_wdt.c which is based on advantechwdt.c
- *	which is based on wdt.c.
- *	Original copyright messages:
- *
- *	(c) Copyright 2003 Pádraig Brady <P@draigBrady.com>
- *
- *	(c) Copyright 2000-2001 Marek Michalkiewicz <marekm@linux.org.pl>
- *
- *	(c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
- *						All Rights Reserved.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	Neither Marcus Junker nor ANDURAS AG admit liability nor provide
- *	warranty for any of this software. This material is provided
- *	"AS-IS" and at no charge.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/fs.h>
-#include <linux/ioport.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-
-
-#define WATCHDOG_NAME "w83697hf/hg WDT"
-#define WATCHDOG_TIMEOUT 60		/* 60 sec default timeout */
-#define WATCHDOG_EARLY_DISABLE 1	/* Disable until userland kicks in */
-
-static unsigned long wdt_is_open;
-static char expect_close;
-static DEFINE_SPINLOCK(io_lock);
-
-/* You must set this - there is no sane way to probe for this board. */
-static int wdt_io = 0x2e;
-module_param(wdt_io, int, 0);
-MODULE_PARM_DESC(wdt_io,
-		"w83697hf/hg WDT io port (default 0x2e, 0 = autodetect)");
-
-static int timeout = WATCHDOG_TIMEOUT;	/* in seconds */
-module_param(timeout, int, 0);
-MODULE_PARM_DESC(timeout,
-	"Watchdog timeout in seconds. 1<= timeout <=255 (default="
-				__MODULE_STRING(WATCHDOG_TIMEOUT) ")");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout,
-	"Watchdog cannot be stopped once started (default="
-				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-static int early_disable = WATCHDOG_EARLY_DISABLE;
-module_param(early_disable, int, 0);
-MODULE_PARM_DESC(early_disable,
-	"Watchdog gets disabled at boot time (default="
-				__MODULE_STRING(WATCHDOG_EARLY_DISABLE) ")");
-
-/*
- *	Kernel methods.
- */
-
-#define W83697HF_EFER (wdt_io + 0)  /* Extended Function Enable Register */
-#define W83697HF_EFIR (wdt_io + 0)  /* Extended Function Index Register
-							(same as EFER) */
-#define W83697HF_EFDR (wdt_io + 1)  /* Extended Function Data Register */
-
-static inline void w83697hf_unlock(void)
-{
-	outb_p(0x87, W83697HF_EFER);	/* Enter extended function mode */
-	outb_p(0x87, W83697HF_EFER);	/* Again according to manual */
-}
-
-static inline void w83697hf_lock(void)
-{
-	outb_p(0xAA, W83697HF_EFER);	/* Leave extended function mode */
-}
-
-/*
- *	The three functions w83697hf_get_reg(), w83697hf_set_reg() and
- *	w83697hf_write_timeout() must be called with the device unlocked.
- */
-
-static unsigned char w83697hf_get_reg(unsigned char reg)
-{
-	outb_p(reg, W83697HF_EFIR);
-	return inb_p(W83697HF_EFDR);
-}
-
-static void w83697hf_set_reg(unsigned char reg, unsigned char data)
-{
-	outb_p(reg, W83697HF_EFIR);
-	outb_p(data, W83697HF_EFDR);
-}
-
-static void w83697hf_write_timeout(int timeout)
-{
-	/* Write Timeout counter to CRF4 */
-	w83697hf_set_reg(0xF4, timeout);
-}
-
-static void w83697hf_select_wdt(void)
-{
-	w83697hf_unlock();
-	w83697hf_set_reg(0x07, 0x08);	/* Switch to logic device 8 (GPIO2) */
-}
-
-static inline void w83697hf_deselect_wdt(void)
-{
-	w83697hf_lock();
-}
-
-static void w83697hf_init(void)
-{
-	unsigned char bbuf;
-
-	w83697hf_select_wdt();
-
-	bbuf = w83697hf_get_reg(0x29);
-	bbuf &= ~0x60;
-	bbuf |= 0x20;
-
-	/* Set pin 119 to WDTO# mode (= CR29, WDT0) */
-	w83697hf_set_reg(0x29, bbuf);
-
-	bbuf = w83697hf_get_reg(0xF3);
-	bbuf &= ~0x04;
-	w83697hf_set_reg(0xF3, bbuf);	/* Count mode is seconds */
-
-	w83697hf_deselect_wdt();
-}
-
-static void wdt_ping(void)
-{
-	spin_lock(&io_lock);
-	w83697hf_select_wdt();
-
-	w83697hf_write_timeout(timeout);
-
-	w83697hf_deselect_wdt();
-	spin_unlock(&io_lock);
-}
-
-static void wdt_enable(void)
-{
-	spin_lock(&io_lock);
-	w83697hf_select_wdt();
-
-	w83697hf_write_timeout(timeout);
-	w83697hf_set_reg(0x30, 1);	/* Enable timer */
-
-	w83697hf_deselect_wdt();
-	spin_unlock(&io_lock);
-}
-
-static void wdt_disable(void)
-{
-	spin_lock(&io_lock);
-	w83697hf_select_wdt();
-
-	w83697hf_set_reg(0x30, 0);	/* Disable timer */
-	w83697hf_write_timeout(0);
-
-	w83697hf_deselect_wdt();
-	spin_unlock(&io_lock);
-}
-
-static unsigned char wdt_running(void)
-{
-	unsigned char t;
-
-	spin_lock(&io_lock);
-	w83697hf_select_wdt();
-
-	t = w83697hf_get_reg(0xF4);	/* Read timer */
-
-	w83697hf_deselect_wdt();
-	spin_unlock(&io_lock);
-
-	return t;
-}
-
-static int wdt_set_heartbeat(int t)
-{
-	if (t < 1 || t > 255)
-		return -EINVAL;
-
-	timeout = t;
-	return 0;
-}
-
-static ssize_t wdt_write(struct file *file, const char __user *buf,
-						size_t count, loff_t *ppos)
-{
-	if (count) {
-		if (!nowayout) {
-			size_t i;
-
-			expect_close = 0;
-
-			for (i = 0; i != count; i++) {
-				char c;
-				if (get_user(c, buf + i))
-					return -EFAULT;
-				if (c == 'V')
-					expect_close = 42;
-			}
-		}
-		wdt_ping();
-	}
-	return count;
-}
-
-static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_timeout;
-	static const struct watchdog_info ident = {
-		.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT
-							| WDIOF_MAGICCLOSE,
-		.firmware_version = 1,
-		.identity = "W83697HF WDT",
-	};
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		if (copy_to_user(argp, &ident, sizeof(ident)))
-			return -EFAULT;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, p);
-
-	case WDIOC_SETOPTIONS:
-	{
-		int options, retval = -EINVAL;
-
-		if (get_user(options, p))
-			return -EFAULT;
-
-		if (options & WDIOS_DISABLECARD) {
-			wdt_disable();
-			retval = 0;
-		}
-
-		if (options & WDIOS_ENABLECARD) {
-			wdt_enable();
-			retval = 0;
-		}
-
-		return retval;
-	}
-
-	case WDIOC_KEEPALIVE:
-		wdt_ping();
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_timeout, p))
-			return -EFAULT;
-		if (wdt_set_heartbeat(new_timeout))
-			return -EINVAL;
-		wdt_ping();
-		/* Fall */
-
-	case WDIOC_GETTIMEOUT:
-		return put_user(timeout, p);
-
-	default:
-		return -ENOTTY;
-	}
-	return 0;
-}
-
-static int wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(0, &wdt_is_open))
-		return -EBUSY;
-	/*
-	 *	Activate
-	 */
-
-	wdt_enable();
-	return nonseekable_open(inode, file);
-}
-
-static int wdt_close(struct inode *inode, struct file *file)
-{
-	if (expect_close == 42)
-		wdt_disable();
-	else {
-		pr_crit("Unexpected close, not stopping watchdog!\n");
-		wdt_ping();
-	}
-	expect_close = 0;
-	clear_bit(0, &wdt_is_open);
-	return 0;
-}
-
-/*
- *	Notifier for system down
- */
-
-static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
-	void *unused)
-{
-	if (code == SYS_DOWN || code == SYS_HALT)
-		wdt_disable();	/* Turn the WDT off */
-
-	return NOTIFY_DONE;
-}
-
-/*
- *	Kernel Interfaces
- */
-
-static const struct file_operations wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.write		= wdt_write,
-	.unlocked_ioctl	= wdt_ioctl,
-	.open		= wdt_open,
-	.release	= wdt_close,
-};
-
-static struct miscdevice wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &wdt_fops,
-};
-
-/*
- *	The WDT needs to learn about soft shutdowns in order to
- *	turn the timebomb registers off.
- */
-
-static struct notifier_block wdt_notifier = {
-	.notifier_call = wdt_notify_sys,
-};
-
-static int w83697hf_check_wdt(void)
-{
-	if (!request_region(wdt_io, 2, WATCHDOG_NAME)) {
-		pr_err("I/O address 0x%x already in use\n", wdt_io);
-		return -EIO;
-	}
-
-	pr_debug("Looking for watchdog at address 0x%x\n", wdt_io);
-	w83697hf_unlock();
-	if (w83697hf_get_reg(0x20) == 0x60) {
-		pr_info("watchdog found at address 0x%x\n", wdt_io);
-		w83697hf_lock();
-		return 0;
-	}
-	/* Reprotect in case it was a compatible device */
-	w83697hf_lock();
-
-	pr_info("watchdog not found at address 0x%x\n", wdt_io);
-	release_region(wdt_io, 2);
-	return -EIO;
-}
-
-static int w83697hf_ioports[] = { 0x2e, 0x4e, 0x00 };
-
-static int __init wdt_init(void)
-{
-	int ret, i, found = 0;
-
-	pr_info("WDT driver for W83697HF/HG initializing\n");
-
-	if (wdt_io == 0) {
-		/* we will autodetect the W83697HF/HG watchdog */
-		for (i = 0; ((!found) && (w83697hf_ioports[i] != 0)); i++) {
-			wdt_io = w83697hf_ioports[i];
-			if (!w83697hf_check_wdt())
-				found++;
-		}
-	} else {
-		if (!w83697hf_check_wdt())
-			found++;
-	}
-
-	if (!found) {
-		pr_err("No W83697HF/HG could be found\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	w83697hf_init();
-	if (early_disable) {
-		if (wdt_running())
-			pr_warn("Stopping previously enabled watchdog until userland kicks in\n");
-		wdt_disable();
-	}
-
-	if (wdt_set_heartbeat(timeout)) {
-		wdt_set_heartbeat(WATCHDOG_TIMEOUT);
-		pr_info("timeout value must be 1 <= timeout <= 255, using %d\n",
-			WATCHDOG_TIMEOUT);
-	}
-
-	ret = register_reboot_notifier(&wdt_notifier);
-	if (ret != 0) {
-		pr_err("cannot register reboot notifier (err=%d)\n", ret);
-		goto unreg_regions;
-	}
-
-	ret = misc_register(&wdt_miscdev);
-	if (ret != 0) {
-		pr_err("cannot register miscdev on minor=%d (err=%d)\n",
-		       WATCHDOG_MINOR, ret);
-		goto unreg_reboot;
-	}
-
-	pr_info("initialized. timeout=%d sec (nowayout=%d)\n",
-		timeout, nowayout);
-
-out:
-	return ret;
-unreg_reboot:
-	unregister_reboot_notifier(&wdt_notifier);
-unreg_regions:
-	release_region(wdt_io, 2);
-	goto out;
-}
-
-static void __exit wdt_exit(void)
-{
-	misc_deregister(&wdt_miscdev);
-	unregister_reboot_notifier(&wdt_notifier);
-	release_region(wdt_io, 2);
-}
-
-module_init(wdt_init);
-module_exit(wdt_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marcus Junker <junker@anduras.de>");
-MODULE_AUTHOR("Samuel Tardieu <sam@rfc1149.net>");
-MODULE_DESCRIPTION("w83697hf/hg WDT driver");

diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
deleted file mode 100644
index ff58cb7..0000000
--- a/drivers/watchdog/w83697ug_wdt.c
+++ /dev/null

@@ -1,397 +0,0 @@
-/*
- *	w83697ug/uf WDT driver
- *
- *	(c) Copyright 2008 Flemming Fransen <ff@nrvissing.net>
- *		reused original code to support w83697ug/uf.
- *
- *	Based on w83627hf_wdt.c which is based on advantechwdt.c
- *	which is based on wdt.c.
- *	Original copyright messages:
- *
- *	(c) Copyright 2007 Vlad Drukker <vlad@storewiz.com>
- *		added support for W83627THF.
- *
- *	(c) Copyright 2003 Pádraig Brady <P@draigBrady.com>
- *
- *	(c) Copyright 2000-2001 Marek Michalkiewicz <marekm@linux.org.pl>
- *
- *	(c) Copyright 1996 Alan Cox <alan@redhat.com>, All Rights Reserved.
- *				http://www.redhat.com
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
- *	warranty for any of this software. This material is provided
- *	"AS-IS" and at no charge.
- *
- *	(c) Copyright 1995    Alan Cox <alan@redhat.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/fs.h>
-#include <linux/ioport.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-
-
-#define WATCHDOG_NAME "w83697ug/uf WDT"
-#define WATCHDOG_TIMEOUT 60		/* 60 sec default timeout */
-
-static unsigned long wdt_is_open;
-static char expect_close;
-static DEFINE_SPINLOCK(io_lock);
-
-static int wdt_io = 0x2e;
-module_param(wdt_io, int, 0);
-MODULE_PARM_DESC(wdt_io, "w83697ug/uf WDT io port (default 0x2e)");
-
-static int timeout = WATCHDOG_TIMEOUT;	/* in seconds */
-module_param(timeout, int, 0);
-MODULE_PARM_DESC(timeout,
-	"Watchdog timeout in seconds. 1<= timeout <=255 (default="
-				__MODULE_STRING(WATCHDOG_TIMEOUT) ")");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout,
-	"Watchdog cannot be stopped once started (default="
-				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-/*
- *	Kernel methods.
- */
-
-#define WDT_EFER (wdt_io+0)   /* Extended Function Enable Registers */
-#define WDT_EFIR (wdt_io+0)   /* Extended Function Index Register
-							(same as EFER) */
-#define WDT_EFDR (WDT_EFIR+1) /* Extended Function Data Register */
-
-static int w83697ug_select_wd_register(void)
-{
-	unsigned char c;
-	unsigned char version;
-
-	outb_p(0x87, WDT_EFER); /* Enter extended function mode */
-	outb_p(0x87, WDT_EFER); /* Again according to manual */
-
-	outb(0x20, WDT_EFER);	/* check chip version	*/
-	version = inb(WDT_EFDR);
-
-	if (version == 0x68) {	/* W83697UG		*/
-		pr_info("Watchdog chip version 0x%02x = W83697UG/UF found at 0x%04x\n",
-			version, wdt_io);
-
-		outb_p(0x2b, WDT_EFER);
-		c = inb_p(WDT_EFDR);    /* select WDT0 */
-		c &= ~0x04;
-		outb_p(0x2b, WDT_EFER);
-		outb_p(c, WDT_EFDR);	/* set pin118 to WDT0 */
-
-	} else {
-		pr_err("No W83697UG/UF could be found\n");
-		return -ENODEV;
-	}
-
-	outb_p(0x07, WDT_EFER); /* point to logical device number reg */
-	outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
-	outb_p(0x30, WDT_EFER); /* select CR30 */
-	c = inb_p(WDT_EFDR);
-	outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
-
-	return 0;
-}
-
-static void w83697ug_unselect_wd_register(void)
-{
-	outb_p(0xAA, WDT_EFER); /* Leave extended function mode */
-}
-
-static int w83697ug_init(void)
-{
-	int ret;
-	unsigned char t;
-
-	ret = w83697ug_select_wd_register();
-	if (ret != 0)
-		return ret;
-
-	outb_p(0xF6, WDT_EFER); /* Select CRF6 */
-	t = inb_p(WDT_EFDR);    /* read CRF6 */
-	if (t != 0) {
-		pr_info("Watchdog already running. Resetting timeout to %d sec\n",
-			timeout);
-		outb_p(timeout, WDT_EFDR);    /* Write back to CRF6 */
-	}
-	outb_p(0xF5, WDT_EFER); /* Select CRF5 */
-	t = inb_p(WDT_EFDR);    /* read CRF5 */
-	t &= ~0x0C;             /* set second mode &
-					disable keyboard turning off watchdog */
-	outb_p(t, WDT_EFDR);    /* Write back to CRF5 */
-
-	w83697ug_unselect_wd_register();
-	return 0;
-}
-
-static void wdt_ctrl(int timeout)
-{
-	spin_lock(&io_lock);
-
-	if (w83697ug_select_wd_register() < 0) {
-		spin_unlock(&io_lock);
-		return;
-	}
-
-	outb_p(0xF4, WDT_EFER);    /* Select CRF4 */
-	outb_p(timeout, WDT_EFDR); /* Write Timeout counter to CRF4 */
-
-	w83697ug_unselect_wd_register();
-
-	spin_unlock(&io_lock);
-}
-
-static int wdt_ping(void)
-{
-	wdt_ctrl(timeout);
-	return 0;
-}
-
-static int wdt_disable(void)
-{
-	wdt_ctrl(0);
-	return 0;
-}
-
-static int wdt_set_heartbeat(int t)
-{
-	if (t < 1 || t > 255)
-		return -EINVAL;
-
-	timeout = t;
-	return 0;
-}
-
-static ssize_t wdt_write(struct file *file, const char __user *buf,
-						size_t count, loff_t *ppos)
-{
-	if (count) {
-		if (!nowayout) {
-			size_t i;
-
-			expect_close = 0;
-
-			for (i = 0; i != count; i++) {
-				char c;
-				if (get_user(c, buf + i))
-					return -EFAULT;
-				if (c == 'V')
-					expect_close = 42;
-			}
-		}
-		wdt_ping();
-	}
-	return count;
-}
-
-static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_timeout;
-	static const struct watchdog_info ident = {
-		.options =		WDIOF_KEEPALIVEPING |
-					WDIOF_SETTIMEOUT |
-					WDIOF_MAGICCLOSE,
-		.firmware_version =	1,
-		.identity =		"W83697UG WDT",
-	};
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		if (copy_to_user(argp, &ident, sizeof(ident)))
-			return -EFAULT;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, p);
-
-	case WDIOC_SETOPTIONS:
-	{
-		int options, retval = -EINVAL;
-
-		if (get_user(options, p))
-			return -EFAULT;
-
-		if (options & WDIOS_DISABLECARD) {
-			wdt_disable();
-			retval = 0;
-		}
-
-		if (options & WDIOS_ENABLECARD) {
-			wdt_ping();
-			retval = 0;
-		}
-
-		return retval;
-	}
-
-	case WDIOC_KEEPALIVE:
-		wdt_ping();
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_timeout, p))
-			return -EFAULT;
-		if (wdt_set_heartbeat(new_timeout))
-			return -EINVAL;
-		wdt_ping();
-		/* Fall */
-
-	case WDIOC_GETTIMEOUT:
-		return put_user(timeout, p);
-
-	default:
-		return -ENOTTY;
-	}
-	return 0;
-}
-
-static int wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(0, &wdt_is_open))
-		return -EBUSY;
-	/*
-	 *	Activate
-	 */
-
-	wdt_ping();
-	return nonseekable_open(inode, file);
-}
-
-static int wdt_close(struct inode *inode, struct file *file)
-{
-	if (expect_close == 42)
-		wdt_disable();
-	else {
-		pr_crit("Unexpected close, not stopping watchdog!\n");
-		wdt_ping();
-	}
-	expect_close = 0;
-	clear_bit(0, &wdt_is_open);
-	return 0;
-}
-
-/*
- *	Notifier for system down
- */
-
-static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
-	void *unused)
-{
-	if (code == SYS_DOWN || code == SYS_HALT)
-		wdt_disable();	/* Turn the WDT off */
-
-	return NOTIFY_DONE;
-}
-
-/*
- *	Kernel Interfaces
- */
-
-static const struct file_operations wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.write		= wdt_write,
-	.unlocked_ioctl	= wdt_ioctl,
-	.open		= wdt_open,
-	.release	= wdt_close,
-};
-
-static struct miscdevice wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &wdt_fops,
-};
-
-/*
- *	The WDT needs to learn about soft shutdowns in order to
- *	turn the timebomb registers off.
- */
-
-static struct notifier_block wdt_notifier = {
-	.notifier_call = wdt_notify_sys,
-};
-
-static int __init wdt_init(void)
-{
-	int ret;
-
-	pr_info("WDT driver for the Winbond(TM) W83697UG/UF Super I/O chip initialising\n");
-
-	if (wdt_set_heartbeat(timeout)) {
-		wdt_set_heartbeat(WATCHDOG_TIMEOUT);
-		pr_info("timeout value must be 1<=timeout<=255, using %d\n",
-			WATCHDOG_TIMEOUT);
-	}
-
-	if (!request_region(wdt_io, 1, WATCHDOG_NAME)) {
-		pr_err("I/O address 0x%04x already in use\n", wdt_io);
-		ret = -EIO;
-		goto out;
-	}
-
-	ret = w83697ug_init();
-	if (ret != 0)
-		goto unreg_regions;
-
-	ret = register_reboot_notifier(&wdt_notifier);
-	if (ret != 0) {
-		pr_err("cannot register reboot notifier (err=%d)\n", ret);
-		goto unreg_regions;
-	}
-
-	ret = misc_register(&wdt_miscdev);
-	if (ret != 0) {
-		pr_err("cannot register miscdev on minor=%d (err=%d)\n",
-		       WATCHDOG_MINOR, ret);
-		goto unreg_reboot;
-	}
-
-	pr_info("initialized. timeout=%d sec (nowayout=%d)\n",
-		timeout, nowayout);
-
-out:
-	return ret;
-unreg_reboot:
-	unregister_reboot_notifier(&wdt_notifier);
-unreg_regions:
-	release_region(wdt_io, 1);
-	goto out;
-}
-
-static void __exit wdt_exit(void)
-{
-	misc_deregister(&wdt_miscdev);
-	unregister_reboot_notifier(&wdt_notifier);
-	release_region(wdt_io, 1);
-}
-
-module_init(wdt_init);
-module_exit(wdt_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Flemming Frandsen <ff@nrvissing.net>");
-MODULE_DESCRIPTION("w83697ug/uf WDT driver");

diff --git a/firmware/Makefile b/firmware/Makefile
index cbb09ce..5747417 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile

@@ -4,10 +4,10 @@
 
 # Create $(fwabs) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a
 # leading /, it's relative to $(srctree).
-fwdir := $(subst ",,$(CONFIG_EXTRA_FIRMWARE_DIR))
+fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR))
 fwabs := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
 
-fw-external-y := $(subst ",,$(CONFIG_EXTRA_FIRMWARE))
+fw-external-y := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE))
 
 # There are three cases to care about:
 # 1. Building kernel with CONFIG_FIRMWARE_IN_KERNEL=y -- $(fw-shipped-y) should
@@ -138,12 +138,6 @@
 
 fw-shipped-all := $(fw-shipped-y) $(fw-shipped-m) $(fw-shipped-)
 
-# Directories which we _might_ need to create, so we have a rule for them.
-firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all))))
-
-quiet_cmd_mkdir = MKDIR   $(patsubst $(objtree)/%,%,$@)
-      cmd_mkdir = mkdir -p $@
-
 quiet_cmd_ihex  = IHEX    $@
       cmd_ihex  = $(OBJCOPY) -Iihex -Obinary $< $@
 
@@ -184,21 +178,10 @@
 		include/config/superh32.h include/config/superh64.h \
 		include/config/x86_32.h include/config/x86_64.h)
 
-# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
-# It'll end up depending on these targets, so make them a PHONY rule which
-# depends on _all_ the directories in $(firmware-dirs), and it'll work out OK.
-PHONY += $(objtree)/$$(%) $(objtree)/$(obj)/$$(%)
-$(objtree)/$$(%) $(objtree)/$(obj)/$$(%): $(firmware-dirs)
-	@true
-
-# For the $$(dir %) trick, where we need % to be expanded first.
-.SECONDEXPANSION:
-
-$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps) \
-		| $(objtree)/$$(dir %)
+$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps)
 	$(call cmd,fwbin,$(patsubst %.gen.S,%,$@))
 $(patsubst %,$(obj)/%.gen.S, $(fw-external-y)): %: $(wordsize_deps) \
-		include/config/extra/firmware/dir.h | $(objtree)/$$(dir %)
+		include/config/extra/firmware/dir.h
 	$(call cmd,fwbin,$(fwabs)/$(patsubst $(obj)/%.gen.S,%,$@))
 
 # The .o files depend on the binaries directly; the .S files don't.
@@ -207,7 +190,7 @@
 
 # .ihex is used just as a simple way to hold binary files in a source tree
 # where binaries are frowned upon. They are directly converted with objcopy.
-$(obj)/%: $(obj)/%.ihex | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%: $(obj)/%.ihex
 	$(call cmd,ihex)
 
 # Don't depend on ihex2fw if we're installing and it already exists.
@@ -226,16 +209,13 @@
 # is actually meaningful, because the firmware has to be loaded in a certain
 # order rather than as a single binary blob. Thus, we convert them into our
 # more compact binary representation of ihex records (<linux/ihex.h>)
-$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep)
 	$(call cmd,ihex2fw)
 
 # .H16 is our own modified form of Intel HEX, with 16-bit length for records.
-$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep)
 	$(call cmd,h16tofw)
 
-$(firmware-dirs):
-	$(call cmd,mkdir)
-
 obj-y				 += $(patsubst %,%.gen.o, $(fw-external-y))
 obj-$(CONFIG_FIRMWARE_IN_KERNEL) += $(patsubst %,%.gen.o, $(fw-shipped-y))
 

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c71e886..cc1cfae 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c

@@ -259,8 +259,7 @@
  *
  */
 static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-	       loff_t pos, unsigned long nr_segs)
+v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	/*
 	 * FIXME
@@ -269,7 +268,7 @@
 	 */
 	p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
 		 iocb->ki_filp->f_path.dentry->d_name.name,
-		 (long long)pos, nr_segs);
+		 (long long)pos, iter->nr_segs);
 
 	return -EINVAL;
 }

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 96e5507..520c11c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c

@@ -692,7 +692,7 @@
 {
 	if (filp->f_flags & O_DIRECT)
 		return v9fs_direct_read(filp, data, count, offset);
-	return do_sync_read(filp, data, count, offset);
+	return new_sync_read(filp, data, count, offset);
 }
 
 /**
@@ -760,7 +760,7 @@
 
 buff_write:
 	mutex_unlock(&inode->i_mutex);
-	return do_sync_write(filp, data, count, offsetp);
+	return new_sync_write(filp, data, count, offsetp);
 }
 
 /**
@@ -778,7 +778,7 @@
 
 	if (filp->f_flags & O_DIRECT)
 		return v9fs_direct_write(filp, data, count, offset);
-	return do_sync_write(filp, data, count, offset);
+	return new_sync_write(filp, data, count, offset);
 }
 
 
@@ -847,8 +847,8 @@
 	.llseek = generic_file_llseek,
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
-	.aio_read = generic_file_aio_read,
-	.aio_write = generic_file_aio_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
@@ -860,8 +860,8 @@
 	.llseek = generic_file_llseek,
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
-	.aio_read = generic_file_aio_read,
-	.aio_write = generic_file_aio_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock_dotl,

diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index a36da53..07c9edc 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c

@@ -23,12 +23,12 @@
 
 const struct file_operations adfs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 };
 

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0270303..a7fe57d 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c

@@ -27,10 +27,10 @@
 
 const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 66d50fe..932ce07 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c

@@ -31,10 +31,10 @@
 	.open		= afs_open,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= afs_file_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= afs_file_write,
 	.mmap		= generic_file_readonly_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= afs_fsync,

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 590b55f..71d5982 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h

@@ -747,8 +747,7 @@
 extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
-extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
-			      unsigned long, loff_t);
+extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 

diff --git a/fs/afs/write.c b/fs/afs/write.c
index a890db4..ab6adfd 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c

@@ -625,15 +625,14 @@
 /*
  * write to an AFS file
  */
-ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
+ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
 	ssize_t result;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(from);
 
-	_enter("{%x.%u},{%zu},%lu,",
-	       vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
+	_enter("{%x.%u},{%zu},",
+	       vnode->fid.vid, vnode->fid.vnode, count);
 
 	if (IS_SWAPFILE(&vnode->vfs_inode)) {
 		printk(KERN_INFO
@@ -644,7 +643,7 @@
 	if (!count)
 		return 0;
 
-	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	result = generic_file_write_iter(iocb, from);
 	if (IS_ERR_VALUE(result)) {
 		_leave(" = %zd", result);
 		return result;

diff --git a/fs/aio.c b/fs/aio.c
index a0ed6c7..4f078c0 100644
--- a/fs/aio.c
+++ b/fs/aio.c

@@ -477,7 +477,7 @@
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static int kiocb_cancel(struct kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;
 
@@ -538,7 +538,7 @@
 				       struct kiocb, ki_list);
 
 		list_del_init(&req->ki_list);
-		kiocb_cancel(ctx, req);
+		kiocb_cancel(req);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -727,42 +727,42 @@
  *	when the processes owning a context have all exited to encourage
  *	the rapid destruction of the kioctx.
  */
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 		struct completion *requests_done)
 {
-	if (!atomic_xchg(&ctx->dead, 1)) {
-		struct kioctx_table *table;
+	struct kioctx_table *table;
 
-		spin_lock(&mm->ioctx_lock);
-		rcu_read_lock();
-		table = rcu_dereference(mm->ioctx_table);
+	if (atomic_xchg(&ctx->dead, 1))
+		return -EINVAL;
 
-		WARN_ON(ctx != table->table[ctx->id]);
-		table->table[ctx->id] = NULL;
-		rcu_read_unlock();
-		spin_unlock(&mm->ioctx_lock);
 
-		/* percpu_ref_kill() will do the necessary call_rcu() */
-		wake_up_all(&ctx->wait);
+	spin_lock(&mm->ioctx_lock);
+	rcu_read_lock();
+	table = rcu_dereference(mm->ioctx_table);
 
-		/*
-		 * It'd be more correct to do this in free_ioctx(), after all
-		 * the outstanding kiocbs have finished - but by then io_destroy
-		 * has already returned, so io_setup() could potentially return
-		 * -EAGAIN with no ioctxs actually in use (as far as userspace
-		 *  could tell).
-		 */
-		aio_nr_sub(ctx->max_reqs);
+	WARN_ON(ctx != table->table[ctx->id]);
+	table->table[ctx->id] = NULL;
+	rcu_read_unlock();
+	spin_unlock(&mm->ioctx_lock);
 
-		if (ctx->mmap_size)
-			vm_munmap(ctx->mmap_base, ctx->mmap_size);
+	/* percpu_ref_kill() will do the necessary call_rcu() */
+	wake_up_all(&ctx->wait);
 
-		ctx->requests_done = requests_done;
-		percpu_ref_kill(&ctx->users);
-	} else {
-		if (requests_done)
-			complete(requests_done);
-	}
+	/*
+	 * It'd be more correct to do this in free_ioctx(), after all
+	 * the outstanding kiocbs have finished - but by then io_destroy
+	 * has already returned, so io_setup() could potentially return
+	 * -EAGAIN with no ioctxs actually in use (as far as userspace
+	 *  could tell).
+	 */
+	aio_nr_sub(ctx->max_reqs);
+
+	if (ctx->mmap_size)
+		vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+	ctx->requests_done = requests_done;
+	percpu_ref_kill(&ctx->users);
+	return 0;
 }
 
 /* wait_on_sync_kiocb:
@@ -1219,21 +1219,23 @@
 	if (likely(NULL != ioctx)) {
 		struct completion requests_done =
 			COMPLETION_INITIALIZER_ONSTACK(requests_done);
+		int ret;
 
 		/* Pass requests_done to kill_ioctx() where it can be set
 		 * in a thread-safe way. If we try to set it here then we have
 		 * a race condition if two io_destroy() called simultaneously.
 		 */
-		kill_ioctx(current->mm, ioctx, &requests_done);
+		ret = kill_ioctx(current->mm, ioctx, &requests_done);
 		percpu_ref_put(&ioctx->users);
 
 		/* Wait until all IO for the context are done. Otherwise kernel
 		 * keep using user-space buffers even if user thinks the context
 		 * is destroyed.
 		 */
-		wait_for_completion(&requests_done);
+		if (!ret)
+			wait_for_completion(&requests_done);
 
-		return 0;
+		return ret;
 	}
 	pr_debug("EINVAL: io_destroy: invalid context id\n");
 	return -EINVAL;
@@ -1241,6 +1243,7 @@
 
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
 			    unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
 				     int rw, char __user *buf,
@@ -1298,7 +1301,9 @@
 	int rw;
 	fmode_t mode;
 	aio_rw_op *rw_op;
+	rw_iter_op *iter_op;
 	struct iovec inline_vec, *iovec = &inline_vec;
+	struct iov_iter iter;
 
 	switch (opcode) {
 	case IOCB_CMD_PREAD:
@@ -1306,6 +1311,7 @@
 		mode	= FMODE_READ;
 		rw	= READ;
 		rw_op	= file->f_op->aio_read;
+		iter_op	= file->f_op->read_iter;
 		goto rw_common;
 
 	case IOCB_CMD_PWRITE:
@@ -1313,12 +1319,13 @@
 		mode	= FMODE_WRITE;
 		rw	= WRITE;
 		rw_op	= file->f_op->aio_write;
+		iter_op	= file->f_op->write_iter;
 		goto rw_common;
 rw_common:
 		if (unlikely(!(file->f_mode & mode)))
 			return -EBADF;
 
-		if (!rw_op)
+		if (!rw_op && !iter_op)
 			return -EINVAL;
 
 		ret = (opcode == IOCB_CMD_PREADV ||
@@ -1347,7 +1354,12 @@
 		if (rw == WRITE)
 			file_start_write(file);
 
-		ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		if (iter_op) {
+			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+			ret = iter_op(req, &iter);
+		} else {
+			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		}
 
 		if (rw == WRITE)
 			file_end_write(file);
@@ -1585,7 +1597,7 @@
 
 	kiocb = lookup_kiocb(ctx, iocb, key);
 	if (kiocb)
-		ret = kiocb_cancel(ctx, kiocb);
+		ret = kiocb_cancel(kiocb);
 	else
 		ret = -EINVAL;
 

diff --git a/fs/attr.c b/fs/attr.c
index 5d4e59d..6530ced 100644
--- a/fs/attr.c
+++ b/fs/attr.c

@@ -50,14 +50,14 @@
 	if ((ia_valid & ATTR_UID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
 				inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			attr->ia_mode &= ~S_ISGID;
 	}
 
@@ -160,7 +160,7 @@
 		umode_t mode = attr->ia_mode;
 
 		if (!in_group_p(inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			mode &= ~S_ISGID;
 		inode->i_mode = mode;
 	}

diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ae28922..e7f88ac 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c

@@ -23,10 +23,10 @@
 
 const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 };

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 83fba15..6d72746 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -165,14 +165,15 @@
 }
 
 static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
-				    nr_segs, blkdev_get_block, NULL, NULL, 0);
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+				    offset, blkdev_get_block,
+				    NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1571,43 +1572,38 @@
  * Does not take i_mutex for the write and thus is not for general purpose
  * use.
  */
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct blk_plug plug;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	blk_start_plug(&plug);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	if (ret > 0) {
 		ssize_t err;
-
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
 	blk_finish_plug(&plug);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
-static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t pos)
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *bd_inode = file->f_mapping->host;
 	loff_t size = i_size_read(bd_inode);
+	loff_t pos = iocb->ki_pos;
 
 	if (pos >= size)
 		return 0;
 
 	size -= pos;
-	if (size < iocb->ki_nbytes)
-		nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
-	return generic_file_aio_read(iocb, iov, nr_segs, pos);
+	iov_iter_truncate(to, size);
+	return generic_file_read_iter(iocb, to);
 }
 
 /*
@@ -1639,10 +1635,10 @@
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= blkdev_aio_read,
-	.aio_write	= blkdev_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= blkdev_read_iter,
+	.write_iter	= blkdev_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
@@ -1650,7 +1646,7 @@
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f341a98..6d1d0b9 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile

@@ -16,4 +16,4 @@
 
 btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
 	tests/extent-buffer-tests.o tests/btrfs-tests.o \
-	tests/extent-io-tests.o tests/inode-tests.o
+	tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index ff9b399..9a0124a 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c

@@ -79,13 +79,6 @@
 	const char *name;
 	char *value = NULL;
 
-	if (acl) {
-		ret = posix_acl_valid(acl);
-		if (ret < 0)
-			return ret;
-		ret = 0;
-	}
-
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 10db21f..e25564b 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c

@@ -900,7 +900,11 @@
 		goto out;
 	BUG_ON(ret == 0);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (trans && likely(trans->type != __TRANS_DUMMY)) {
+#else
 	if (trans) {
+#endif
 		/*
 		 * look if there are updates for this ref queued and lock the
 		 * head
@@ -984,11 +988,12 @@
 				goto out;
 		}
 		if (ref->count && ref->parent) {
-			if (extent_item_pos && !ref->inode_list) {
+			if (extent_item_pos && !ref->inode_list &&
+			    ref->level == 0) {
 				u32 bsz;
 				struct extent_buffer *eb;
 				bsz = btrfs_level_size(fs_info->extent_root,
-							info_level);
+							ref->level);
 				eb = read_tree_block(fs_info->extent_root,
 							   ref->parent, bsz, 0);
 				if (!eb || !extent_buffer_uptodate(eb)) {
@@ -1404,9 +1409,10 @@
  * returns <0 on error
  */
 static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
-				struct btrfs_extent_item *ei, u32 item_size,
-				struct btrfs_extent_inline_ref **out_eiref,
-				int *out_type)
+				   struct btrfs_key *key,
+				   struct btrfs_extent_item *ei, u32 item_size,
+				   struct btrfs_extent_inline_ref **out_eiref,
+				   int *out_type)
 {
 	unsigned long end;
 	u64 flags;
@@ -1416,19 +1422,26 @@
 		/* first call */
 		flags = btrfs_extent_flags(eb, ei);
 		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
-			info = (struct btrfs_tree_block_info *)(ei + 1);
-			*out_eiref =
-				(struct btrfs_extent_inline_ref *)(info + 1);
+			if (key->type == BTRFS_METADATA_ITEM_KEY) {
+				/* a skinny metadata extent */
+				*out_eiref =
+				     (struct btrfs_extent_inline_ref *)(ei + 1);
+			} else {
+				WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+				info = (struct btrfs_tree_block_info *)(ei + 1);
+				*out_eiref =
+				   (struct btrfs_extent_inline_ref *)(info + 1);
+			}
 		} else {
 			*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
 		}
 		*ptr = (unsigned long)*out_eiref;
-		if ((void *)*ptr >= (void *)ei + item_size)
+		if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
 			return -ENOENT;
 	}
 
 	end = (unsigned long)ei + item_size;
-	*out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
+	*out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
 	*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
 
 	*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1447,8 +1460,8 @@
  * <0 on error.
  */
 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
-				struct btrfs_extent_item *ei, u32 item_size,
-				u64 *out_root, u8 *out_level)
+			    struct btrfs_key *key, struct btrfs_extent_item *ei,
+			    u32 item_size, u64 *out_root, u8 *out_level)
 {
 	int ret;
 	int type;
@@ -1459,8 +1472,8 @@
 		return 1;
 
 	while (1) {
-		ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
-						&eiref, &type);
+		ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+					      &eiref, &type);
 		if (ret < 0)
 			return ret;
 

diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index a910b27..86fc20f 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h

@@ -40,8 +40,8 @@
 			u64 *flags);
 
 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
-				struct btrfs_extent_item *ei, u32 item_size,
-				u64 *out_root, u8 *out_level);
+			    struct btrfs_key *key, struct btrfs_extent_item *ei,
+			    u32 item_size, u64 *out_root, u8 *out_level);
 
 int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 				u64 extent_item_objectid,
@@ -55,8 +55,8 @@
 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
 
 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info, u64 bytenr,
-				u64 time_seq, struct ulist **roots);
+			 struct btrfs_fs_info *fs_info, u64 bytenr,
+			 u64 time_seq, struct ulist **roots);
 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 			u32 name_len, unsigned long name_off,
 			struct extent_buffer *eb_in, u64 parent,

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 2256e9c..4794923 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h

@@ -284,4 +284,6 @@
 		  &BTRFS_I(inode)->runtime_flags);
 }
 
+bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
+
 #endif

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 0e8388e..ce92ae3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c

@@ -1093,6 +1093,7 @@
 					next_stack =
 					    btrfsic_stack_frame_alloc();
 					if (NULL == next_stack) {
+						sf->error = -1;
 						btrfsic_release_block_ctx(
 								&sf->
 								next_block_ctx);
@@ -1190,8 +1191,10 @@
 				    sf->next_block_ctx.datav[0];
 
 				next_stack = btrfsic_stack_frame_alloc();
-				if (NULL == next_stack)
+				if (NULL == next_stack) {
+					sf->error = -1;
 					goto one_stack_frame_backwards;
+				}
 
 				next_stack->i = -1;
 				next_stack->block = sf->next_block;

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d43c544..92371c4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c

@@ -887,7 +887,7 @@
 
 	workspace = find_workspace(type);
 	if (IS_ERR(workspace))
-		return -1;
+		return PTR_ERR(workspace);
 
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, len, pages,
@@ -923,7 +923,7 @@
 
 	workspace = find_workspace(type);
 	if (IS_ERR(workspace))
-		return -ENOMEM;
+		return PTR_ERR(workspace);
 
 	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
 							 disk_start,
@@ -945,7 +945,7 @@
 
 	workspace = find_workspace(type);
 	if (IS_ERR(workspace))
-		return -ENOMEM;
+		return PTR_ERR(workspace);
 
 	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
 						  dest_page, start_byte,

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1bcfcdb..aeab453 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c

@@ -224,7 +224,8 @@
 static void add_root_to_dirty_list(struct btrfs_root *root)
 {
 	spin_lock(&root->fs_info->trans_lock);
-	if (root->track_dirty && list_empty(&root->dirty_list)) {
+	if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
+	    list_empty(&root->dirty_list)) {
 		list_add(&root->dirty_list,
 			 &root->fs_info->dirty_cowonly_roots);
 	}
@@ -246,9 +247,10 @@
 	int level;
 	struct btrfs_disk_key disk_key;
 
-	WARN_ON(root->ref_cows && trans->transid !=
-		root->fs_info->running_transaction->transid);
-	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+		trans->transid != root->fs_info->running_transaction->transid);
+	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+		trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
 	if (level == 0)
@@ -354,44 +356,14 @@
 }
 
 /*
- * Increment the upper half of tree_mod_seq, set lower half zero.
- *
- * Must be called with fs_info->tree_mod_seq_lock held.
+ * Pull a new tree mod seq number for our operation.
  */
-static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
-{
-	u64 seq = atomic64_read(&fs_info->tree_mod_seq);
-	seq &= 0xffffffff00000000ull;
-	seq += 1ull << 32;
-	atomic64_set(&fs_info->tree_mod_seq, seq);
-	return seq;
-}
-
-/*
- * Increment the lower half of tree_mod_seq.
- *
- * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
- * are generated should not technically require a spin lock here. (Rationale:
- * incrementing the minor while incrementing the major seq number is between its
- * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
- * just returns a unique sequence number as usual.) We have decided to leave
- * that requirement in here and rethink it once we notice it really imposes a
- * problem on some workload.
- */
-static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
 {
 	return atomic64_inc_return(&fs_info->tree_mod_seq);
 }
 
 /*
- * return the last minor in the previous major tree_mod_seq number
- */
-u64 btrfs_tree_mod_seq_prev(u64 seq)
-{
-	return (seq & 0xffffffff00000000ull) - 1ull;
-}
-
-/*
  * This adds a new blocker to the tree mod log's blocker list if the @elem
  * passed does not already have a sequence number set. So when a caller expects
  * to record tree modifications, it should ensure to set elem->seq to zero
@@ -402,19 +374,16 @@
 u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			   struct seq_list *elem)
 {
-	u64 seq;
-
 	tree_mod_log_write_lock(fs_info);
 	spin_lock(&fs_info->tree_mod_seq_lock);
 	if (!elem->seq) {
-		elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
+		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
 		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
 	}
-	seq = btrfs_inc_tree_mod_seq_minor(fs_info);
 	spin_unlock(&fs_info->tree_mod_seq_lock);
 	tree_mod_log_write_unlock(fs_info);
 
-	return seq;
+	return elem->seq;
 }
 
 void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -487,9 +456,7 @@
 
 	BUG_ON(!tm);
 
-	spin_lock(&fs_info->tree_mod_seq_lock);
-	tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
-	spin_unlock(&fs_info->tree_mod_seq_lock);
+	tm->seq = btrfs_inc_tree_mod_seq(fs_info);
 
 	tm_root = &fs_info->tree_mod_log;
 	new = &tm_root->rb_node;
@@ -997,14 +964,14 @@
 	 * snapshot and the block was not allocated by tree relocation,
 	 * we know the block is not shared.
 	 */
-	if (root->ref_cows &&
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
 	    buf != root->node && buf != root->commit_root &&
 	    (btrfs_header_generation(buf) <=
 	     btrfs_root_last_snapshot(&root->root_item) ||
 	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
 		return 1;
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (root->ref_cows &&
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
 	    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
 		return 1;
 #endif
@@ -1146,9 +1113,10 @@
 
 	btrfs_assert_tree_locked(buf);
 
-	WARN_ON(root->ref_cows && trans->transid !=
-		root->fs_info->running_transaction->transid);
-	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+		trans->transid != root->fs_info->running_transaction->transid);
+	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+		trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
 
@@ -1193,7 +1161,7 @@
 		return ret;
 	}
 
-	if (root->ref_cows) {
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
 		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
 		if (ret)
 			return ret;
@@ -1538,6 +1506,10 @@
 				   struct btrfs_root *root,
 				   struct extent_buffer *buf)
 {
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+		return 0;
+#endif
 	/* ensure we can see the force_cow */
 	smp_rmb();
 
@@ -1556,7 +1528,7 @@
 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
 	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
 	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
-	    !root->force_cow)
+	    !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
 		return 0;
 	return 1;
 }
@@ -5125,7 +5097,17 @@
 		return ret;
 	btrfs_item_key(path->nodes[0], &found_key, 0);
 	ret = comp_keys(&found_key, &key);
-	if (ret < 0)
+	/*
+	 * We might have had an item with the previous key in the tree right
+	 * before we released our path. And after we released our path, that
+	 * item might have been pushed to the first slot (0) of the leaf we
+	 * were holding due to a tree balance. Alternatively, an item with the
+	 * previous key can exist as the only element of a leaf (big fat item).
+	 * Therefore account for these 2 cases, so that our callers (like
+	 * btrfs_previous_item) don't miss an existing item with a key matching
+	 * the previous key we computed above.
+	 */
+	if (ret <= 0)
 		return 0;
 	return 1;
 }
@@ -5736,6 +5718,24 @@
 		ret = 0;
 		goto done;
 	}
+	/*
+	 * So the above check misses one case:
+	 * - after releasing the path above, someone has removed the item that
+	 *   used to be at the very end of the block, and balance between leafs
+	 *   gets another one with bigger key.offset to replace it.
+	 *
+	 * This one should be returned as well, or we can get leaf corruption
+	 * later(esp. in __btrfs_drop_extents()).
+	 *
+	 * And a bit more explanation about this check,
+	 * with ret > 0, the key isn't found, the path points to the slot
+	 * where it should be inserted, so the path->slots[0] item must be the
+	 * bigger one.
+	 */
+	if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
+		ret = 0;
+		goto done;
+	}
 
 	while (level < BTRFS_MAX_LEVEL) {
 		if (!path->nodes[level]) {

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ba6b885..b7e2c1c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -33,6 +33,7 @@
 #include <asm/kmap_types.h>
 #include <linux/pagemap.h>
 #include <linux/btrfs.h>
+#include <linux/workqueue.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
@@ -756,6 +757,12 @@
 
 #define BTRFS_ROOT_SUBVOL_RDONLY	(1ULL << 0)
 
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD		(1ULL << 48)
+
 struct btrfs_root_item {
 	struct btrfs_inode_item inode;
 	__le64 generation;
@@ -840,7 +847,10 @@
 	/* BTRFS_BALANCE_ARGS_* */
 	__le64 flags;
 
-	__le64 unused[8];
+	/* BTRFS_BALANCE_ARGS_LIMIT value */
+	__le64 limit;
+
+	__le64 unused[7];
 } __attribute__ ((__packed__));
 
 /*
@@ -1113,6 +1123,12 @@
 	__le64 rsv_excl;
 } __attribute__ ((__packed__));
 
+/* For raid type sysfs entries */
+struct raid_kobject {
+	int raid_type;
+	struct kobject kobj;
+};
+
 struct btrfs_space_info {
 	spinlock_t lock;
 
@@ -1163,7 +1179,7 @@
 	wait_queue_head_t wait;
 
 	struct kobject kobj;
-	struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
+	struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
 };
 
 #define	BTRFS_BLOCK_RSV_GLOBAL		1
@@ -1313,6 +1329,8 @@
 
 #define BTRFS_STRIPE_HASH_TABLE_BITS 11
 
+void btrfs_init_async_reclaim_work(struct work_struct *work);
+
 /* fs_info */
 struct reloc_control;
 struct btrfs_device;
@@ -1534,6 +1552,9 @@
 	 */
 	struct btrfs_workqueue *fixup_workers;
 	struct btrfs_workqueue *delayed_workers;
+
+	/* the extent workers do delayed refs on the extent allocation tree */
+	struct btrfs_workqueue *extent_workers;
 	struct task_struct *transaction_kthread;
 	struct task_struct *cleaner_kthread;
 	int thread_pool_size;
@@ -1636,7 +1657,10 @@
 
 	/* holds configuration and tracking. Protected by qgroup_lock */
 	struct rb_root qgroup_tree;
+	struct rb_root qgroup_op_tree;
 	spinlock_t qgroup_lock;
+	spinlock_t qgroup_op_lock;
+	atomic_t qgroup_op_seq;
 
 	/*
 	 * used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1688,6 +1712,9 @@
 
 	struct semaphore uuid_tree_rescan_sem;
 	unsigned int update_uuid_tree_gen:1;
+
+	/* Used to reclaim the metadata space in the background. */
+	struct work_struct async_reclaim_work;
 };
 
 struct btrfs_subvolume_writers {
@@ -1696,6 +1723,26 @@
 };
 
 /*
+ * The state of btrfs root
+ */
+/*
+ * btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code.   But the
+ * race is very small, and only the first time the root
+ * is added to each transaction.  So IN_TRANS_SETUP
+ * is used to tell us when more checks are required
+ */
+#define BTRFS_ROOT_IN_TRANS_SETUP	0
+#define BTRFS_ROOT_REF_COWS		1
+#define BTRFS_ROOT_TRACK_DIRTY		2
+#define BTRFS_ROOT_IN_RADIX		3
+#define BTRFS_ROOT_DUMMY_ROOT		4
+#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED	5
+#define BTRFS_ROOT_DEFRAG_RUNNING	6
+#define BTRFS_ROOT_FORCE_COW		7
+#define BTRFS_ROOT_MULTI_LOG_TASKS	8
+
+/*
  * in ram representation of the tree.  extent_root is used for all allocations
  * and for the extent tree extent_root root.
  */
@@ -1706,6 +1753,7 @@
 	struct btrfs_root *log_root;
 	struct btrfs_root *reloc_root;
 
+	unsigned long state;
 	struct btrfs_root_item root_item;
 	struct btrfs_key root_key;
 	struct btrfs_fs_info *fs_info;
@@ -1740,7 +1788,6 @@
 	/* Just be updated when the commit succeeds. */
 	int last_log_commit;
 	pid_t log_start_pid;
-	bool log_multiple_pids;
 
 	u64 objectid;
 	u64 last_trans;
@@ -1760,23 +1807,13 @@
 
 	u64 highest_objectid;
 
-	/* btrfs_record_root_in_trans is a multi-step process,
-	 * and it can race with the balancing code.   But the
-	 * race is very small, and only the first time the root
-	 * is added to each transaction.  So in_trans_setup
-	 * is used to tell us when more checks are required
-	 */
-	unsigned long in_trans_setup;
-	int ref_cows;
-	int track_dirty;
-	int in_radix;
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-	int dummy_root;
+	u64 alloc_bytenr;
 #endif
+
 	u64 defrag_trans_start;
 	struct btrfs_key defrag_progress;
 	struct btrfs_key defrag_max;
-	int defrag_running;
 	char *name;
 
 	/* the dirty list is only used by non-reference counted roots */
@@ -1790,7 +1827,6 @@
 	spinlock_t orphan_lock;
 	atomic_t orphan_inodes;
 	struct btrfs_block_rsv *orphan_block_rsv;
-	int orphan_item_inserted;
 	int orphan_cleanup_state;
 
 	spinlock_t inode_lock;
@@ -1808,8 +1844,6 @@
 	 */
 	dev_t anon_dev;
 
-	int force_cow;
-
 	spinlock_t root_item_lock;
 	atomic_t refs;
 
@@ -2788,6 +2822,11 @@
 	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
 }
 
+static inline bool btrfs_root_dead(struct btrfs_root *root)
+{
+	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
+}
+
 /* struct btrfs_root_backup */
 BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
 		   tree_root, 64);
@@ -2897,6 +2936,7 @@
 	cpu->vend = le64_to_cpu(disk->vend);
 	cpu->target = le64_to_cpu(disk->target);
 	cpu->flags = le64_to_cpu(disk->flags);
+	cpu->limit = le64_to_cpu(disk->limit);
 }
 
 static inline void
@@ -2914,6 +2954,7 @@
 	disk->vend = cpu_to_le64(cpu->vend);
 	disk->target = cpu_to_le64(cpu->target);
 	disk->flags = cpu_to_le64(cpu->flags);
+	disk->limit = cpu_to_le64(cpu->limit);
 }
 
 /* struct btrfs_super_block */
@@ -3236,6 +3277,8 @@
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+				 unsigned long count, int wait);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
@@ -3275,9 +3318,9 @@
 			 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
 			 struct btrfs_key *ins, int is_data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int for_cow);
+		  struct extent_buffer *buf, int full_backref, int no_quota);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int for_cow);
+		  struct extent_buffer *buf, int full_backref, int no_quota);
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root,
 				u64 bytenr, u64 num_bytes, u64 flags,
@@ -3285,7 +3328,7 @@
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-		      u64 owner, u64 offset, int for_cow);
+		      u64 owner, u64 offset, int no_quota);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -3297,7 +3340,7 @@
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 owner, u64 offset, int for_cow);
+			 u64 root_objectid, u64 owner, u64 offset, int no_quota);
 
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				    struct btrfs_root *root);
@@ -3385,7 +3428,6 @@
 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 					 struct btrfs_fs_info *fs_info);
 int __get_raid_index(u64 flags);
-
 int btrfs_start_nocow_write(struct btrfs_root *root);
 void btrfs_end_nocow_write(struct btrfs_root *root);
 /* ctree.c */
@@ -3561,7 +3603,6 @@
 			   struct seq_list *elem);
 void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			    struct seq_list *elem);
-u64 btrfs_tree_mod_seq_prev(u64 seq);
 int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
 
 /* root-item.c */
@@ -3708,6 +3749,12 @@
 		       struct bio *bio, u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+				     const struct btrfs_path *path,
+				     struct btrfs_file_extent_item *fi,
+				     const bool new_inline,
+				     struct extent_map *em);
+
 /* inode.c */
 struct btrfs_delalloc_work {
 	struct inode *inode;
@@ -4069,52 +4116,6 @@
 int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 			 u64 start, int err);
 
-/* qgroup.c */
-struct qgroup_update {
-	struct list_head list;
-	struct btrfs_delayed_ref_node *node;
-	struct btrfs_delayed_extent_op *extent_op;
-};
-
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info);
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
-void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u64 qgroupid,
-			char *name);
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info, u64 qgroupid,
-		       struct btrfs_qgroup_limit *limit);
-int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
-void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
-struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_delayed_ref_node *node,
-			    struct btrfs_delayed_extent_op *extent_op);
-int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info,
-			     struct btrfs_delayed_ref_node *node,
-			     struct btrfs_delayed_extent_op *extent_op);
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
-		      struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
-			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
-			 struct btrfs_qgroup_inherit *inherit);
-int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
-void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
-
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
-
 static inline int is_fstree(u64 rootid)
 {
 	if (rootid == BTRFS_FS_TREE_OBJECTID ||
@@ -4131,6 +4132,8 @@
 /* Sanity test specific functions */
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 void btrfs_test_destroy_inode(struct inode *inode);
+int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
+			       u64 rfer, u64 excl);
 #endif
 
 #endif

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 33e561a..da775bf 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c

@@ -149,8 +149,8 @@
 	spin_lock(&root->inode_lock);
 	ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
 	if (ret == -EEXIST) {
-		kmem_cache_free(delayed_node_cache, node);
 		spin_unlock(&root->inode_lock);
+		kmem_cache_free(delayed_node_cache, node);
 		radix_tree_preload_end();
 		goto again;
 	}
@@ -267,14 +267,17 @@
 	mutex_unlock(&delayed_node->mutex);
 
 	if (atomic_dec_and_test(&delayed_node->refs)) {
+		bool free = false;
 		struct btrfs_root *root = delayed_node->root;
 		spin_lock(&root->inode_lock);
 		if (atomic_read(&delayed_node->refs) == 0) {
 			radix_tree_delete(&root->delayed_nodes_tree,
 					  delayed_node->inode_id);
-			kmem_cache_free(delayed_node_cache, delayed_node);
+			free = true;
 		}
 		spin_unlock(&root->inode_lock);
+		if (free)
+			kmem_cache_free(delayed_node_cache, delayed_node);
 	}
 }
 

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 3129964..6d16bea 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c

@@ -106,6 +106,10 @@
 		return -1;
 	if (ref1->type > ref2->type)
 		return 1;
+	if (ref1->no_quota > ref2->no_quota)
+		return 1;
+	if (ref1->no_quota < ref2->no_quota)
+		return -1;
 	/* merging of sequenced refs is not allowed */
 	if (compare_seq) {
 		if (ref1->seq < ref2->seq)
@@ -635,7 +639,7 @@
 		     struct btrfs_delayed_ref_head *head_ref,
 		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 		     u64 num_bytes, u64 parent, u64 ref_root, int level,
-		     int action, int for_cow)
+		     int action, int no_quota)
 {
 	struct btrfs_delayed_ref_node *existing;
 	struct btrfs_delayed_tree_ref *full_ref;
@@ -645,6 +649,8 @@
 	if (action == BTRFS_ADD_DELAYED_EXTENT)
 		action = BTRFS_ADD_DELAYED_REF;
 
+	if (is_fstree(ref_root))
+		seq = atomic64_read(&fs_info->tree_mod_seq);
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
@@ -655,9 +661,7 @@
 	ref->action = action;
 	ref->is_head = 0;
 	ref->in_tree = 1;
-
-	if (need_ref_seq(for_cow, ref_root))
-		seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
+	ref->no_quota = no_quota;
 	ref->seq = seq;
 
 	full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -697,7 +701,7 @@
 		     struct btrfs_delayed_ref_head *head_ref,
 		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 		     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
-		     u64 offset, int action, int for_cow)
+		     u64 offset, int action, int no_quota)
 {
 	struct btrfs_delayed_ref_node *existing;
 	struct btrfs_delayed_data_ref *full_ref;
@@ -709,6 +713,9 @@
 
 	delayed_refs = &trans->transaction->delayed_refs;
 
+	if (is_fstree(ref_root))
+		seq = atomic64_read(&fs_info->tree_mod_seq);
+
 	/* first set the basic ref node struct up */
 	atomic_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
@@ -717,9 +724,7 @@
 	ref->action = action;
 	ref->is_head = 0;
 	ref->in_tree = 1;
-
-	if (need_ref_seq(for_cow, ref_root))
-		seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
+	ref->no_quota = no_quota;
 	ref->seq = seq;
 
 	full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -762,12 +767,15 @@
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root,  int level, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
-			       int for_cow)
+			       int no_quota)
 {
 	struct btrfs_delayed_tree_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 
+	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+		no_quota = 0;
+
 	BUG_ON(extent_op && extent_op->is_data);
 	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 	if (!ref)
@@ -793,10 +801,8 @@
 
 	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, level, action,
-				   for_cow);
+				   no_quota);
 	spin_unlock(&delayed_refs->lock);
-	if (need_ref_seq(for_cow, ref_root))
-		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 
 	return 0;
 }
@@ -810,12 +816,15 @@
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
-			       int for_cow)
+			       int no_quota)
 {
 	struct btrfs_delayed_data_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 
+	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+		no_quota = 0;
+
 	BUG_ON(extent_op && !extent_op->is_data);
 	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 	if (!ref)
@@ -841,10 +850,8 @@
 
 	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, owner, offset,
-				   action, for_cow);
+				   action, no_quota);
 	spin_unlock(&delayed_refs->lock);
-	if (need_ref_seq(for_cow, ref_root))
-		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 
 	return 0;
 }

diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 4ba9b93..a764e23 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h

@@ -52,6 +52,7 @@
 
 	unsigned int action:8;
 	unsigned int type:8;
+	unsigned int no_quota:1;
 	/* is this node still in the rbtree? */
 	unsigned int is_head:1;
 	unsigned int in_tree:1;
@@ -196,14 +197,14 @@
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root, int level, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
-			       int for_cow);
+			       int no_quota);
 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 			       struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes,
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
-			       int for_cow);
+			       int no_quota);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
@@ -231,25 +232,6 @@
 			    u64 seq);
 
 /*
- * delayed refs with a ref_seq > 0 must be held back during backref walking.
- * this only applies to items in one of the fs-trees. for_cow items never need
- * to be held back, so they won't get a ref_seq number.
- */
-static inline int need_ref_seq(int for_cow, u64 rootid)
-{
-	if (for_cow)
-		return 0;
-
-	if (rootid == BTRFS_FS_TREE_OBJECTID)
-		return 1;
-
-	if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
-		return 1;
-
-	return 0;
-}
-
-/*
  * a node might live in a head or a regular ref, this lets you
  * test for the proper type to use.
  */

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 9f22905..2af6e66 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c

@@ -313,7 +313,7 @@
 
 	if (btrfs_fs_incompat(fs_info, RAID56)) {
 		btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	switch (args->start.cont_reading_from_srcdev_mode) {

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9833149..8bb4aa1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -49,6 +49,7 @@
 #include "dev-replace.h"
 #include "raid56.h"
 #include "sysfs.h"
+#include "qgroup.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
@@ -1109,6 +1110,11 @@
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
 						 u64 bytenr, u32 blocksize)
 {
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+		return alloc_test_extent_buffer(root->fs_info, bytenr,
+						blocksize);
+#endif
 	return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
 }
 
@@ -1201,10 +1207,7 @@
 	root->nodesize = nodesize;
 	root->leafsize = leafsize;
 	root->stripesize = stripesize;
-	root->ref_cows = 0;
-	root->track_dirty = 0;
-	root->in_radix = 0;
-	root->orphan_item_inserted = 0;
+	root->state = 0;
 	root->orphan_cleanup_state = 0;
 
 	root->objectid = objectid;
@@ -1265,7 +1268,6 @@
 	else
 		root->defrag_trans_start = 0;
 	init_completion(&root->kobj_unregister);
-	root->defrag_running = 0;
 	root->root_key.objectid = objectid;
 	root->anon_dev = 0;
 
@@ -1290,7 +1292,8 @@
 	if (!root)
 		return ERR_PTR(-ENOMEM);
 	__setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
-	root->dummy_root = 1;
+	set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
+	root->alloc_bytenr = 0;
 
 	return root;
 }
@@ -1341,8 +1344,7 @@
 	btrfs_mark_buffer_dirty(leaf);
 
 	root->commit_root = btrfs_root_node(root);
-	root->track_dirty = 1;
-
+	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 
 	root->root_item.flags = 0;
 	root->root_item.byte_limit = 0;
@@ -1371,6 +1373,7 @@
 fail:
 	if (leaf) {
 		btrfs_tree_unlock(leaf);
+		free_extent_buffer(root->commit_root);
 		free_extent_buffer(leaf);
 	}
 	kfree(root);
@@ -1396,13 +1399,15 @@
 	root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
 	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
 	root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+
 	/*
+	 * DON'T set REF_COWS for log trees
+	 *
 	 * log trees do not get reference counted because they go away
 	 * before a real commit is actually done.  They do store pointers
 	 * to file data extents, and those reference counts still get
 	 * updated (along with back refs to the log tree).
 	 */
-	root->ref_cows = 0;
 
 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
 				      BTRFS_TREE_LOG_OBJECTID, NULL,
@@ -1536,7 +1541,7 @@
 		return root;
 
 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
-		root->ref_cows = 1;
+		set_bit(BTRFS_ROOT_REF_COWS, &root->state);
 		btrfs_check_and_init_root_item(&root->root_item);
 	}
 
@@ -1606,7 +1611,7 @@
 				(unsigned long)root->root_key.objectid,
 				root);
 	if (ret == 0)
-		root->in_radix = 1;
+		set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
 	spin_unlock(&fs_info->fs_roots_radix_lock);
 	radix_tree_preload_end();
 
@@ -1662,7 +1667,7 @@
 	if (ret < 0)
 		goto fail;
 	if (ret == 0)
-		root->orphan_item_inserted = 1;
+		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 	ret = btrfs_insert_fs_root(fs_info, root);
 	if (ret) {
@@ -2064,6 +2069,7 @@
 	btrfs_destroy_workqueue(fs_info->readahead_workers);
 	btrfs_destroy_workqueue(fs_info->flush_workers);
 	btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
+	btrfs_destroy_workqueue(fs_info->extent_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2090,7 +2096,7 @@
 		free_root_extent_buffers(info->chunk_root);
 }
 
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
+void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
 {
 	int ret;
 	struct btrfs_root *gang[8];
@@ -2101,7 +2107,7 @@
 				     struct btrfs_root, root_list);
 		list_del(&gang[0]->root_list);
 
-		if (gang[0]->in_radix) {
+		if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) {
 			btrfs_drop_and_free_fs_root(fs_info, gang[0]);
 		} else {
 			free_extent_buffer(gang[0]->node);
@@ -2221,6 +2227,7 @@
 	spin_lock_init(&fs_info->free_chunk_lock);
 	spin_lock_init(&fs_info->tree_mod_seq_lock);
 	spin_lock_init(&fs_info->super_lock);
+	spin_lock_init(&fs_info->qgroup_op_lock);
 	spin_lock_init(&fs_info->buffer_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	mutex_init(&fs_info->reloc_mutex);
@@ -2246,6 +2253,7 @@
 	atomic_set(&fs_info->async_submit_draining, 0);
 	atomic_set(&fs_info->nr_async_bios, 0);
 	atomic_set(&fs_info->defrag_running, 0);
+	atomic_set(&fs_info->qgroup_op_seq, 0);
 	atomic64_set(&fs_info->tree_mod_seq, 0);
 	fs_info->sb = sb;
 	fs_info->max_inline = 8192 * 1024;
@@ -2291,6 +2299,7 @@
 	atomic_set(&fs_info->balance_cancel_req, 0);
 	fs_info->balance_ctl = NULL;
 	init_waitqueue_head(&fs_info->balance_wait_q);
+	btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
 
 	sb->s_blocksize = 4096;
 	sb->s_blocksize_bits = blksize_bits(4096);
@@ -2354,6 +2363,7 @@
 	spin_lock_init(&fs_info->qgroup_lock);
 	mutex_init(&fs_info->qgroup_ioctl_lock);
 	fs_info->qgroup_tree = RB_ROOT;
+	fs_info->qgroup_op_tree = RB_ROOT;
 	INIT_LIST_HEAD(&fs_info->dirty_qgroups);
 	fs_info->qgroup_seq = 1;
 	fs_info->quota_enabled = 0;
@@ -2577,6 +2587,10 @@
 		btrfs_alloc_workqueue("readahead", flags, max_active, 2);
 	fs_info->qgroup_rescan_workers =
 		btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+	fs_info->extent_workers =
+		btrfs_alloc_workqueue("extent-refs", flags,
+				      min_t(u64, fs_devices->num_devices,
+					    max_active), 8);
 
 	if (!(fs_info->workers && fs_info->delalloc_workers &&
 	      fs_info->submit_workers && fs_info->flush_workers &&
@@ -2586,6 +2600,7 @@
 	      fs_info->endio_freespace_worker && fs_info->rmw_workers &&
 	      fs_info->caching_workers && fs_info->readahead_workers &&
 	      fs_info->fixup_workers && fs_info->delayed_workers &&
+	      fs_info->fixup_workers && fs_info->extent_workers &&
 	      fs_info->qgroup_rescan_workers)) {
 		err = -ENOMEM;
 		goto fail_sb_buffer;
@@ -2693,7 +2708,7 @@
 		ret = PTR_ERR(extent_root);
 		goto recovery_tree_root;
 	}
-	extent_root->track_dirty = 1;
+	set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
 	fs_info->extent_root = extent_root;
 
 	location.objectid = BTRFS_DEV_TREE_OBJECTID;
@@ -2702,7 +2717,7 @@
 		ret = PTR_ERR(dev_root);
 		goto recovery_tree_root;
 	}
-	dev_root->track_dirty = 1;
+	set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
 	fs_info->dev_root = dev_root;
 	btrfs_init_devices_late(fs_info);
 
@@ -2712,13 +2727,13 @@
 		ret = PTR_ERR(csum_root);
 		goto recovery_tree_root;
 	}
-	csum_root->track_dirty = 1;
+	set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
 	fs_info->csum_root = csum_root;
 
 	location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
 	quota_root = btrfs_read_tree_root(tree_root, &location);
 	if (!IS_ERR(quota_root)) {
-		quota_root->track_dirty = 1;
+		set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
 		fs_info->quota_enabled = 1;
 		fs_info->pending_quota_state = 1;
 		fs_info->quota_root = quota_root;
@@ -2733,7 +2748,7 @@
 		create_uuid_tree = true;
 		check_uuid_tree = false;
 	} else {
-		uuid_root->track_dirty = 1;
+		set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
 		fs_info->uuid_root = uuid_root;
 		create_uuid_tree = false;
 		check_uuid_tree =
@@ -2966,7 +2981,7 @@
 fail_trans_kthread:
 	kthread_stop(fs_info->transaction_kthread);
 	btrfs_cleanup_transaction(fs_info->tree_root);
-	del_fs_roots(fs_info);
+	btrfs_free_fs_roots(fs_info);
 fail_cleaner:
 	kthread_stop(fs_info->cleaner_kthread);
 
@@ -3501,8 +3516,10 @@
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		btrfs_free_log(NULL, root);
 
-	__btrfs_remove_free_space_cache(root->free_ino_pinned);
-	__btrfs_remove_free_space_cache(root->free_ino_ctl);
+	if (root->free_ino_pinned)
+		__btrfs_remove_free_space_cache(root->free_ino_pinned);
+	if (root->free_ino_ctl)
+		__btrfs_remove_free_space_cache(root->free_ino_ctl);
 	free_fs_root(root);
 }
 
@@ -3533,28 +3550,51 @@
 {
 	u64 root_objectid = 0;
 	struct btrfs_root *gang[8];
-	int i;
-	int ret;
+	int i = 0;
+	int err = 0;
+	unsigned int ret = 0;
+	int index;
 
 	while (1) {
+		index = srcu_read_lock(&fs_info->subvol_srcu);
 		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
 					     (void **)gang, root_objectid,
 					     ARRAY_SIZE(gang));
-		if (!ret)
+		if (!ret) {
+			srcu_read_unlock(&fs_info->subvol_srcu, index);
 			break;
-
+		}
 		root_objectid = gang[ret - 1]->root_key.objectid + 1;
-		for (i = 0; i < ret; i++) {
-			int err;
 
+		for (i = 0; i < ret; i++) {
+			/* Avoid to grab roots in dead_roots */
+			if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+				gang[i] = NULL;
+				continue;
+			}
+			/* grab all the search result for later use */
+			gang[i] = btrfs_grab_fs_root(gang[i]);
+		}
+		srcu_read_unlock(&fs_info->subvol_srcu, index);
+
+		for (i = 0; i < ret; i++) {
+			if (!gang[i])
+				continue;
 			root_objectid = gang[i]->root_key.objectid;
 			err = btrfs_orphan_cleanup(gang[i]);
 			if (err)
-				return err;
+				break;
+			btrfs_put_fs_root(gang[i]);
 		}
 		root_objectid++;
 	}
-	return 0;
+
+	/* release the uncleaned roots due to error */
+	for (; i < ret; i++) {
+		if (gang[i])
+			btrfs_put_fs_root(gang[i]);
+	}
+	return err;
 }
 
 int btrfs_commit_super(struct btrfs_root *root)
@@ -3603,6 +3643,8 @@
 	/* clear out the rbtree of defraggable inodes */
 	btrfs_cleanup_defrag_inodes(fs_info);
 
+	cancel_work_sync(&fs_info->async_reclaim_work);
+
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_commit_super(root);
 		if (ret)
@@ -3627,12 +3669,17 @@
 
 	btrfs_sysfs_remove_one(fs_info);
 
-	del_fs_roots(fs_info);
+	btrfs_free_fs_roots(fs_info);
 
 	btrfs_put_block_group_cache(fs_info);
 
 	btrfs_free_block_groups(fs_info);
 
+	/*
+	 * we must make sure there is not any read request to
+	 * submit after we stopping all workers.
+	 */
+	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	btrfs_stop_all_workers(fs_info);
 
 	free_root_pointers(fs_info, 1);
@@ -3709,6 +3756,12 @@
 		__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
 				     buf->len,
 				     root->fs_info->dirty_metadata_batch);
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+	if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
+		btrfs_print_leaf(root, buf);
+		ASSERT(0);
+	}
+#endif
 }
 
 static void __btrfs_btree_balance_dirty(struct btrfs_root *root,

diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 53059df..23ce3ce 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h

@@ -68,6 +68,7 @@
 int btrfs_init_fs_root(struct btrfs_root *root);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 			 struct btrfs_root *root);
+void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
 
 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
 				     struct btrfs_key *key,

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5590af9..fafb3e5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -26,16 +26,16 @@
 #include <linux/ratelimit.h>
 #include <linux/percpu_counter.h>
 #include "hash.h"
-#include "ctree.h"
+#include "tree-log.h"
 #include "disk-io.h"
 #include "print-tree.h"
-#include "transaction.h"
 #include "volumes.h"
 #include "raid56.h"
 #include "locking.h"
 #include "free-space-cache.h"
 #include "math.h"
 #include "sysfs.h"
+#include "qgroup.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -81,7 +81,8 @@
 				u64 bytenr, u64 num_bytes, u64 parent,
 				u64 root_objectid, u64 owner_objectid,
 				u64 owner_offset, int refs_to_drop,
-				struct btrfs_delayed_extent_op *extra_op);
+				struct btrfs_delayed_extent_op *extra_op,
+				int no_quota);
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 				    struct extent_buffer *leaf,
 				    struct btrfs_extent_item *ei);
@@ -94,7 +95,8 @@
 				     struct btrfs_root *root,
 				     u64 parent, u64 root_objectid,
 				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins);
+				     int level, struct btrfs_key *ins,
+				     int no_quota);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 flags,
 			  int force);
@@ -1271,7 +1273,7 @@
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 					   struct btrfs_root *root,
 					   struct btrfs_path *path,
-					   int refs_to_drop)
+					   int refs_to_drop, int *last_ref)
 {
 	struct btrfs_key key;
 	struct btrfs_extent_data_ref *ref1 = NULL;
@@ -1307,6 +1309,7 @@
 
 	if (num_refs == 0) {
 		ret = btrfs_del_item(trans, root, path);
+		*last_ref = 1;
 	} else {
 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1764,7 +1767,8 @@
 				  struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
-				  struct btrfs_delayed_extent_op *extent_op)
+				  struct btrfs_delayed_extent_op *extent_op,
+				  int *last_ref)
 {
 	struct extent_buffer *leaf;
 	struct btrfs_extent_item *ei;
@@ -1808,6 +1812,7 @@
 		else
 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
 	} else {
+		*last_ref = 1;
 		size =  btrfs_extent_inline_ref_size(type);
 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
 		ptr = (unsigned long)iref;
@@ -1839,7 +1844,7 @@
 	if (ret == 0) {
 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
 		update_inline_extent_backref(root, path, iref,
-					     refs_to_add, extent_op);
+					     refs_to_add, extent_op, NULL);
 	} else if (ret == -ENOENT) {
 		setup_inline_extent_backref(root, path, iref, parent,
 					    root_objectid, owner, offset,
@@ -1872,17 +1877,19 @@
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref *iref,
-				 int refs_to_drop, int is_data)
+				 int refs_to_drop, int is_data, int *last_ref)
 {
 	int ret = 0;
 
 	BUG_ON(!is_data && refs_to_drop != 1);
 	if (iref) {
 		update_inline_extent_backref(root, path, iref,
-					     -refs_to_drop, NULL);
+					     -refs_to_drop, NULL, last_ref);
 	} else if (is_data) {
-		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+		ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+					     last_ref);
 	} else {
+		*last_ref = 1;
 		ret = btrfs_del_item(trans, root, path);
 	}
 	return ret;
@@ -1946,7 +1953,8 @@
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 owner, u64 offset, int for_cow)
+			 u64 root_objectid, u64 owner, u64 offset,
+			 int no_quota)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1958,12 +1966,12 @@
 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, (int)owner,
-					BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+					BTRFS_ADD_DELAYED_REF, NULL, no_quota);
 	} else {
 		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, owner, offset,
-					BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+					BTRFS_ADD_DELAYED_REF, NULL, no_quota);
 	}
 	return ret;
 }
@@ -1973,31 +1981,64 @@
 				  u64 bytenr, u64 num_bytes,
 				  u64 parent, u64 root_objectid,
 				  u64 owner, u64 offset, int refs_to_add,
+				  int no_quota,
 				  struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_extent_item *item;
+	struct btrfs_key key;
 	u64 refs;
 	int ret;
+	enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
 
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
+	if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
+		no_quota = 1;
+
 	path->reada = 1;
 	path->leave_spinning = 1;
 	/* this will setup the path even if it fails to insert the back ref */
-	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
-					   path, bytenr, num_bytes, parent,
+	ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
+					   bytenr, num_bytes, parent,
 					   root_objectid, owner, offset,
 					   refs_to_add, extent_op);
-	if (ret != -EAGAIN)
+	if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
 		goto out;
+	/*
+	 * Ok we were able to insert an inline extent and it appears to be a new
+	 * reference, deal with the qgroup accounting.
+	 */
+	if (!ret && !no_quota) {
+		ASSERT(root->fs_info->quota_enabled);
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		item = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_item);
+		if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
+			type = BTRFS_QGROUP_OPER_ADD_SHARED;
+		btrfs_release_path(path);
 
+		ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+					      bytenr, num_bytes, type, 0);
+		goto out;
+	}
+
+	/*
+	 * Ok we had -EAGAIN which means we didn't have space to insert and
+	 * inline extent ref, so just update the reference count and add a
+	 * normal backref.
+	 */
 	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	refs = btrfs_extent_refs(leaf, item);
+	if (refs)
+		type = BTRFS_QGROUP_OPER_ADD_SHARED;
 	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
 	if (extent_op)
 		__run_delayed_extent_op(extent_op, leaf, item);
@@ -2005,9 +2046,15 @@
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
 
+	if (!no_quota) {
+		ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+					      bytenr, num_bytes, type, 0);
+		if (ret)
+			goto out;
+	}
+
 	path->reada = 1;
 	path->leave_spinning = 1;
-
 	/* now insert the actual backref */
 	ret = insert_extent_backref(trans, root->fs_info->extent_root,
 				    path, bytenr, parent, root_objectid,
@@ -2041,8 +2088,7 @@
 
 	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
 		parent = ref->parent;
-	else
-		ref_root = ref->root;
+	ref_root = ref->root;
 
 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
 		if (extent_op)
@@ -2056,13 +2102,13 @@
 					     node->num_bytes, parent,
 					     ref_root, ref->objectid,
 					     ref->offset, node->ref_mod,
-					     extent_op);
+					     node->no_quota, extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
 		ret = __btrfs_free_extent(trans, root, node->bytenr,
 					  node->num_bytes, parent,
 					  ref_root, ref->objectid,
 					  ref->offset, node->ref_mod,
-					  extent_op);
+					  extent_op, node->no_quota);
 	} else {
 		BUG();
 	}
@@ -2199,8 +2245,7 @@
 
 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
 		parent = ref->parent;
-	else
-		ref_root = ref->root;
+	ref_root = ref->root;
 
 	ins.objectid = node->bytenr;
 	if (skinny_metadata) {
@@ -2218,15 +2263,18 @@
 						parent, ref_root,
 						extent_op->flags_to_set,
 						&extent_op->key,
-						ref->level, &ins);
+						ref->level, &ins,
+						node->no_quota);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
 		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
 					     node->num_bytes, parent, ref_root,
-					     ref->level, 0, 1, extent_op);
+					     ref->level, 0, 1, node->no_quota,
+					     extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
 		ret = __btrfs_free_extent(trans, root, node->bytenr,
 					  node->num_bytes, parent, ref_root,
-					  ref->level, 0, 1, extent_op);
+					  ref->level, 0, 1, extent_op,
+					  node->no_quota);
 	} else {
 		BUG();
 	}
@@ -2574,42 +2622,6 @@
 }
 #endif
 
-int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
-					 struct btrfs_fs_info *fs_info)
-{
-	struct qgroup_update *qgroup_update;
-	int ret = 0;
-
-	if (list_empty(&trans->qgroup_ref_list) !=
-	    !trans->delayed_ref_elem.seq) {
-		/* list without seq or seq without list */
-		btrfs_err(fs_info,
-			"qgroup accounting update error, list is%s empty, seq is %#x.%x",
-			list_empty(&trans->qgroup_ref_list) ? "" : " not",
-			(u32)(trans->delayed_ref_elem.seq >> 32),
-			(u32)trans->delayed_ref_elem.seq);
-		BUG();
-	}
-
-	if (!trans->delayed_ref_elem.seq)
-		return 0;
-
-	while (!list_empty(&trans->qgroup_ref_list)) {
-		qgroup_update = list_first_entry(&trans->qgroup_ref_list,
-						 struct qgroup_update, list);
-		list_del(&qgroup_update->list);
-		if (!ret)
-			ret = btrfs_qgroup_account_ref(
-					trans, fs_info, qgroup_update->node,
-					qgroup_update->extent_op);
-		kfree(qgroup_update);
-	}
-
-	btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
-
-	return ret;
-}
-
 static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
 {
 	u64 num_bytes;
@@ -2662,15 +2674,94 @@
 	u64 num_entries =
 		atomic_read(&trans->transaction->delayed_refs.num_entries);
 	u64 avg_runtime;
+	u64 val;
 
 	smp_mb();
 	avg_runtime = fs_info->avg_delayed_ref_runtime;
+	val = num_entries * avg_runtime;
 	if (num_entries * avg_runtime >= NSEC_PER_SEC)
 		return 1;
+	if (val >= NSEC_PER_SEC / 2)
+		return 2;
 
 	return btrfs_check_space_for_delayed_refs(trans, root);
 }
 
+struct async_delayed_refs {
+	struct btrfs_root *root;
+	int count;
+	int error;
+	int sync;
+	struct completion wait;
+	struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+	struct async_delayed_refs *async;
+	struct btrfs_trans_handle *trans;
+	int ret;
+
+	async = container_of(work, struct async_delayed_refs, work);
+
+	trans = btrfs_join_transaction(async->root);
+	if (IS_ERR(trans)) {
+		async->error = PTR_ERR(trans);
+		goto done;
+	}
+
+	/*
+	 * trans->sync means that when we call end_transaciton, we won't
+	 * wait on delayed refs
+	 */
+	trans->sync = true;
+	ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+	if (ret)
+		async->error = ret;
+
+	ret = btrfs_end_transaction(trans, async->root);
+	if (ret && !async->error)
+		async->error = ret;
+done:
+	if (async->sync)
+		complete(&async->wait);
+	else
+		kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+				 unsigned long count, int wait)
+{
+	struct async_delayed_refs *async;
+	int ret;
+
+	async = kmalloc(sizeof(*async), GFP_NOFS);
+	if (!async)
+		return -ENOMEM;
+
+	async->root = root->fs_info->tree_root;
+	async->count = count;
+	async->error = 0;
+	if (wait)
+		async->sync = 1;
+	else
+		async->sync = 0;
+	init_completion(&async->wait);
+
+	btrfs_init_work(&async->work, delayed_ref_async_start,
+			NULL, NULL);
+
+	btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+	if (wait) {
+		wait_for_completion(&async->wait);
+		ret = async->error;
+		kfree(async);
+		return ret;
+	}
+	return 0;
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -2698,8 +2789,6 @@
 	if (root == root->fs_info->extent_root)
 		root = root->fs_info->tree_root;
 
-	btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
-
 	delayed_refs = &trans->transaction->delayed_refs;
 	if (count == 0) {
 		count = atomic_read(&delayed_refs->num_entries) * 2;
@@ -2758,6 +2847,9 @@
 		goto again;
 	}
 out:
+	ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
+	if (ret)
+		return ret;
 	assert_qgroups_uptodate(trans);
 	return 0;
 }
@@ -2964,7 +3056,7 @@
 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct extent_buffer *buf,
-			   int full_backref, int inc, int for_cow)
+			   int full_backref, int inc, int no_quota)
 {
 	u64 bytenr;
 	u64 num_bytes;
@@ -2979,11 +3071,15 @@
 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
 			    u64, u64, u64, u64, u64, u64, int);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+		return 0;
+#endif
 	ref_root = btrfs_header_owner(buf);
 	nritems = btrfs_header_nritems(buf);
 	level = btrfs_header_level(buf);
 
-	if (!root->ref_cows && level == 0)
+	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
 		return 0;
 
 	if (inc)
@@ -3014,7 +3110,7 @@
 			key.offset -= btrfs_file_extent_offset(buf, fi);
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, key.objectid,
-					   key.offset, for_cow);
+					   key.offset, no_quota);
 			if (ret)
 				goto fail;
 		} else {
@@ -3022,7 +3118,7 @@
 			num_bytes = btrfs_level_size(root, level - 1);
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, level - 1, 0,
-					   for_cow);
+					   no_quota);
 			if (ret)
 				goto fail;
 		}
@@ -3033,15 +3129,15 @@
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int for_cow)
+		  struct extent_buffer *buf, int full_backref, int no_quota)
 {
-	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
 }
 
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int for_cow)
+		  struct extent_buffer *buf, int full_backref, int no_quota)
 {
-	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -3401,10 +3497,8 @@
 		return ret;
 	}
 
-	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
 		INIT_LIST_HEAD(&found->block_groups[i]);
-		kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
-	}
 	init_rwsem(&found->groups_sem);
 	spin_lock_init(&found->lock);
 	found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -4204,6 +4298,104 @@
 
 	return ret;
 }
+
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
+				 struct btrfs_space_info *space_info)
+{
+	u64 used;
+	u64 expected;
+	u64 to_reclaim;
+
+	to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
+				16 * 1024 * 1024);
+	spin_lock(&space_info->lock);
+	if (can_overcommit(root, space_info, to_reclaim,
+			   BTRFS_RESERVE_FLUSH_ALL)) {
+		to_reclaim = 0;
+		goto out;
+	}
+
+	used = space_info->bytes_used + space_info->bytes_reserved +
+	       space_info->bytes_pinned + space_info->bytes_readonly +
+	       space_info->bytes_may_use;
+	if (can_overcommit(root, space_info, 1024 * 1024,
+			   BTRFS_RESERVE_FLUSH_ALL))
+		expected = div_factor_fine(space_info->total_bytes, 95);
+	else
+		expected = div_factor_fine(space_info->total_bytes, 90);
+
+	if (used > expected)
+		to_reclaim = used - expected;
+	else
+		to_reclaim = 0;
+	to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+				     space_info->bytes_reserved);
+out:
+	spin_unlock(&space_info->lock);
+
+	return to_reclaim;
+}
+
+static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
+					struct btrfs_fs_info *fs_info, u64 used)
+{
+	return (used >= div_factor_fine(space_info->total_bytes, 98) &&
+		!btrfs_fs_closing(fs_info) &&
+		!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+
+static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
+				       struct btrfs_fs_info *fs_info)
+{
+	u64 used;
+
+	spin_lock(&space_info->lock);
+	used = space_info->bytes_used + space_info->bytes_reserved +
+	       space_info->bytes_pinned + space_info->bytes_readonly +
+	       space_info->bytes_may_use;
+	if (need_do_async_reclaim(space_info, fs_info, used)) {
+		spin_unlock(&space_info->lock);
+		return 1;
+	}
+	spin_unlock(&space_info->lock);
+
+	return 0;
+}
+
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_space_info *space_info;
+	u64 to_reclaim;
+	int flush_state;
+
+	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+						      space_info);
+	if (!to_reclaim)
+		return;
+
+	flush_state = FLUSH_DELAYED_ITEMS_NR;
+	do {
+		flush_space(fs_info->fs_root, space_info, to_reclaim,
+			    to_reclaim, flush_state);
+		flush_state++;
+		if (!btrfs_need_do_async_reclaim(space_info, fs_info))
+			return;
+	} while (flush_state <= COMMIT_TRANS);
+
+	if (btrfs_need_do_async_reclaim(space_info, fs_info))
+		queue_work(system_unbound_wq, work);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+	INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
 /**
  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
  * @root - the root we're allocating for
@@ -4311,8 +4503,13 @@
 	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
 		flushing = true;
 		space_info->flush = 1;
+	} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+		used += orig_bytes;
+		if (need_do_async_reclaim(space_info, root->fs_info, used) &&
+		    !work_busy(&root->fs_info->async_reclaim_work))
+			queue_work(system_unbound_wq,
+				   &root->fs_info->async_reclaim_work);
 	}
-
 	spin_unlock(&space_info->lock);
 
 	if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
@@ -4369,7 +4566,7 @@
 {
 	struct btrfs_block_rsv *block_rsv = NULL;
 
-	if (root->ref_cows)
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		block_rsv = trans->block_rsv;
 
 	if (root == root->fs_info->csum_root && trans->adding_csums)
@@ -5621,7 +5818,8 @@
 				u64 bytenr, u64 num_bytes, u64 parent,
 				u64 root_objectid, u64 owner_objectid,
 				u64 owner_offset, int refs_to_drop,
-				struct btrfs_delayed_extent_op *extent_op)
+				struct btrfs_delayed_extent_op *extent_op,
+				int no_quota)
 {
 	struct btrfs_key key;
 	struct btrfs_path *path;
@@ -5637,9 +5835,14 @@
 	int num_to_del = 1;
 	u32 item_size;
 	u64 refs;
+	int last_ref = 0;
+	enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
 	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
 						 SKINNY_METADATA);
 
+	if (!info->quota_enabled || !is_fstree(root_objectid))
+		no_quota = 1;
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -5687,7 +5890,7 @@
 			BUG_ON(iref);
 			ret = remove_extent_backref(trans, extent_root, path,
 						    NULL, refs_to_drop,
-						    is_data);
+						    is_data, &last_ref);
 			if (ret) {
 				btrfs_abort_transaction(trans, extent_root, ret);
 				goto out;
@@ -5806,7 +6009,7 @@
 	refs = btrfs_extent_refs(leaf, ei);
 	if (refs < refs_to_drop) {
 		btrfs_err(info, "trying to drop %d refs but we only have %Lu "
-			  "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+			  "for bytenr %Lu", refs_to_drop, refs, bytenr);
 		ret = -EINVAL;
 		btrfs_abort_transaction(trans, extent_root, ret);
 		goto out;
@@ -5814,6 +6017,7 @@
 	refs -= refs_to_drop;
 
 	if (refs > 0) {
+		type = BTRFS_QGROUP_OPER_SUB_SHARED;
 		if (extent_op)
 			__run_delayed_extent_op(extent_op, leaf, ei);
 		/*
@@ -5829,7 +6033,7 @@
 		if (found_extent) {
 			ret = remove_extent_backref(trans, extent_root, path,
 						    iref, refs_to_drop,
-						    is_data);
+						    is_data, &last_ref);
 			if (ret) {
 				btrfs_abort_transaction(trans, extent_root, ret);
 				goto out;
@@ -5850,6 +6054,7 @@
 			}
 		}
 
+		last_ref = 1;
 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
 				      num_to_del);
 		if (ret) {
@@ -5872,6 +6077,20 @@
 			goto out;
 		}
 	}
+	btrfs_release_path(path);
+
+	/* Deal with the quota accounting */
+	if (!ret && last_ref && !no_quota) {
+		int mod_seq = 0;
+
+		if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+		    type == BTRFS_QGROUP_OPER_SUB_SHARED)
+			mod_seq = 1;
+
+		ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
+					      bytenr, num_bytes, type,
+					      mod_seq);
+	}
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -6008,11 +6227,15 @@
 /* Can return -ENOMEM */
 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-		      u64 owner, u64 offset, int for_cow)
+		      u64 owner, u64 offset, int no_quota)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+		return 0;
+#endif
 	add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
 
 	/*
@@ -6028,13 +6251,13 @@
 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, (int)owner,
-					BTRFS_DROP_DELAYED_REF, NULL, for_cow);
+					BTRFS_DROP_DELAYED_REF, NULL, no_quota);
 	} else {
 		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
 						num_bytes,
 						parent, root_objectid, owner,
 						offset, BTRFS_DROP_DELAYED_REF,
-						NULL, for_cow);
+						NULL, no_quota);
 	}
 	return ret;
 }
@@ -6514,8 +6737,14 @@
 		loop++;
 		if (loop == LOOP_ALLOC_CHUNK) {
 			struct btrfs_trans_handle *trans;
+			int exist = 0;
 
-			trans = btrfs_join_transaction(root);
+			trans = current->journal_info;
+			if (trans)
+				exist = 1;
+			else
+				trans = btrfs_join_transaction(root);
+
 			if (IS_ERR(trans)) {
 				ret = PTR_ERR(trans);
 				goto out;
@@ -6532,7 +6761,8 @@
 							root, ret);
 			else
 				ret = 0;
-			btrfs_end_transaction(trans, root);
+			if (!exist)
+				btrfs_end_transaction(trans, root);
 			if (ret)
 				goto out;
 		}
@@ -6733,6 +6963,13 @@
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
 
+	/* Always set parent to 0 here since its exclusive anyway. */
+	ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+				      ins->objectid, ins->offset,
+				      BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+	if (ret)
+		return ret;
+
 	ret = update_block_group(root, ins->objectid, ins->offset, 1);
 	if (ret) { /* -ENOENT, logic error */
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6747,7 +6984,8 @@
 				     struct btrfs_root *root,
 				     u64 parent, u64 root_objectid,
 				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins)
+				     int level, struct btrfs_key *ins,
+				     int no_quota)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6757,6 +6995,7 @@
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
+	u64 num_bytes = ins->offset;
 	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
 						 SKINNY_METADATA);
 
@@ -6790,6 +7029,7 @@
 
 	if (skinny_metadata) {
 		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+		num_bytes = root->leafsize;
 	} else {
 		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
 		btrfs_set_tree_block_key(leaf, block_info, key);
@@ -6811,6 +7051,14 @@
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
 
+	if (!no_quota) {
+		ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+					      ins->objectid, num_bytes,
+					      BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+		if (ret)
+			return ret;
+	}
+
 	ret = update_block_group(root, ins->objectid, root->leafsize, 1);
 	if (ret) { /* -ENOENT, logic error */
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6994,6 +7242,15 @@
 	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
 						 SKINNY_METADATA);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) {
+		buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
+					    blocksize, level);
+		if (!IS_ERR(buf))
+			root->alloc_bytenr += blocksize;
+		return buf;
+	}
+#endif
 	block_rsv = use_block_rsv(trans, root, blocksize);
 	if (IS_ERR(block_rsv))
 		return ERR_CAST(block_rsv);
@@ -7735,7 +7992,7 @@
 		}
 	}
 
-	if (root->in_radix) {
+	if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
 		btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
 	} else {
 		free_extent_buffer(root->node);
@@ -8327,8 +8584,9 @@
 		list_del(&space_info->list);
 		for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
 			struct kobject *kobj;
-			kobj = &space_info->block_group_kobjs[i];
-			if (kobj->parent) {
+			kobj = space_info->block_group_kobjs[i];
+			space_info->block_group_kobjs[i] = NULL;
+			if (kobj) {
 				kobject_del(kobj);
 				kobject_put(kobj);
 			}
@@ -8352,17 +8610,26 @@
 	up_write(&space_info->groups_sem);
 
 	if (first) {
-		struct kobject *kobj = &space_info->block_group_kobjs[index];
+		struct raid_kobject *rkobj;
 		int ret;
 
-		kobject_get(&space_info->kobj); /* put in release */
-		ret = kobject_add(kobj, &space_info->kobj, "%s",
-				  get_raid_name(index));
+		rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+		if (!rkobj)
+			goto out_err;
+		rkobj->raid_type = index;
+		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+				  "%s", get_raid_name(index));
 		if (ret) {
-			pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
-			kobject_put(&space_info->kobj);
+			kobject_put(&rkobj->kobj);
+			goto out_err;
 		}
+		space_info->block_group_kobjs[index] = &rkobj->kobj;
 	}
+
+	return;
+out_err:
+	pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
 }
 
 static struct btrfs_block_group_cache *
@@ -8611,7 +8878,7 @@
 
 	extent_root = root->fs_info->extent_root;
 
-	root->fs_info->last_trans_log_full_commit = trans->transid;
+	btrfs_set_log_full_commit(root->fs_info, trans);
 
 	cache = btrfs_create_block_group_cache(root, chunk_offset, size);
 	if (!cache)
@@ -8697,6 +8964,7 @@
 	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_key key;
 	struct inode *inode;
+	struct kobject *kobj = NULL;
 	int ret;
 	int index;
 	int factor;
@@ -8796,11 +9064,15 @@
 	 */
 	list_del_init(&block_group->list);
 	if (list_empty(&block_group->space_info->block_groups[index])) {
-		kobject_del(&block_group->space_info->block_group_kobjs[index]);
-		kobject_put(&block_group->space_info->block_group_kobjs[index]);
+		kobj = block_group->space_info->block_group_kobjs[index];
+		block_group->space_info->block_group_kobjs[index] = NULL;
 		clear_avail_alloc_bits(root->fs_info, block_group->flags);
 	}
 	up_write(&block_group->space_info->groups_sem);
+	if (kobj) {
+		kobject_del(kobj);
+		kobject_put(kobj);
+	}
 
 	if (block_group->cached == BTRFS_CACHE_STARTED)
 		wait_block_group_cache_done(block_group);

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4cd0ac9..a389820 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c

@@ -1693,6 +1693,7 @@
 		 * shortening the size of the delalloc range we're searching
 		 */
 		free_extent_state(cached_state);
+		cached_state = NULL;
 		if (!loops) {
 			max_bytes = PAGE_CACHE_SIZE;
 			loops = 1;
@@ -2353,7 +2354,7 @@
 {
 	int uptodate = (err == 0);
 	struct extent_io_tree *tree;
-	int ret;
+	int ret = 0;
 
 	tree = &BTRFS_I(page->mapping->host)->io_tree;
 
@@ -2367,6 +2368,8 @@
 	if (!uptodate) {
 		ClearPageUptodate(page);
 		SetPageError(page);
+		ret = ret < 0 ? ret : -EIO;
+		mapping_set_error(page->mapping, ret);
 	}
 	return 0;
 }
@@ -3098,143 +3101,130 @@
 }
 
 /*
- * the writepage semantics are similar to regular writepage.  extent
- * records are inserted to lock ranges in the tree, and as dirty areas
- * are found, they are marked writeback.  Then the lock bits are removed
- * and the end_io handler clears the writeback ranges
+ * helper for __extent_writepage, doing all of the delayed allocation setup.
+ *
+ * This returns 1 if our fill_delalloc function did all the work required
+ * to write the page (copy into inline extent).  In this case the IO has
+ * been started and the page is already unlocked.
+ *
+ * This returns 0 if all went well (page still locked)
+ * This returns < 0 if there were errors (page still locked)
  */
-static int __extent_writepage(struct page *page, struct writeback_control *wbc,
-			      void *data)
+static noinline_for_stack int writepage_delalloc(struct inode *inode,
+			      struct page *page, struct writeback_control *wbc,
+			      struct extent_page_data *epd,
+			      u64 delalloc_start,
+			      unsigned long *nr_written)
 {
-	struct inode *inode = page->mapping->host;
-	struct extent_page_data *epd = data;
+	struct extent_io_tree *tree = epd->tree;
+	u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+	u64 nr_delalloc;
+	u64 delalloc_to_write = 0;
+	u64 delalloc_end = 0;
+	int ret;
+	int page_started = 0;
+
+	if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
+		return 0;
+
+	while (delalloc_end < page_end) {
+		nr_delalloc = find_lock_delalloc_range(inode, tree,
+					       page,
+					       &delalloc_start,
+					       &delalloc_end,
+					       128 * 1024 * 1024);
+		if (nr_delalloc == 0) {
+			delalloc_start = delalloc_end + 1;
+			continue;
+		}
+		ret = tree->ops->fill_delalloc(inode, page,
+					       delalloc_start,
+					       delalloc_end,
+					       &page_started,
+					       nr_written);
+		/* File system has been set read-only */
+		if (ret) {
+			SetPageError(page);
+			/* fill_delalloc should be return < 0 for error
+			 * but just in case, we use > 0 here meaning the
+			 * IO is started, so we don't want to return > 0
+			 * unless things are going well.
+			 */
+			ret = ret < 0 ? ret : -EIO;
+			goto done;
+		}
+		/*
+		 * delalloc_end is already one less than the total
+		 * length, so we don't subtract one from
+		 * PAGE_CACHE_SIZE
+		 */
+		delalloc_to_write += (delalloc_end - delalloc_start +
+				      PAGE_CACHE_SIZE) >>
+				      PAGE_CACHE_SHIFT;
+		delalloc_start = delalloc_end + 1;
+	}
+	if (wbc->nr_to_write < delalloc_to_write) {
+		int thresh = 8192;
+
+		if (delalloc_to_write < thresh * 2)
+			thresh = delalloc_to_write;
+		wbc->nr_to_write = min_t(u64, delalloc_to_write,
+					 thresh);
+	}
+
+	/* did the fill delalloc function already unlock and start
+	 * the IO?
+	 */
+	if (page_started) {
+		/*
+		 * we've unlocked the page, so we can't update
+		 * the mapping's writeback index, just update
+		 * nr_to_write.
+		 */
+		wbc->nr_to_write -= *nr_written;
+		return 1;
+	}
+
+	ret = 0;
+
+done:
+	return ret;
+}
+
+/*
+ * helper for __extent_writepage.  This calls the writepage start hooks,
+ * and does the loop to map the page into extents and bios.
+ *
+ * We return 1 if the IO is started and the page is unlocked,
+ * 0 if all went well (page still locked)
+ * < 0 if there were errors (page still locked)
+ */
+static noinline_for_stack int __extent_writepage_io(struct inode *inode,
+				 struct page *page,
+				 struct writeback_control *wbc,
+				 struct extent_page_data *epd,
+				 loff_t i_size,
+				 unsigned long nr_written,
+				 int write_flags, int *nr_ret)
+{
 	struct extent_io_tree *tree = epd->tree;
 	u64 start = page_offset(page);
-	u64 delalloc_start;
 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
 	u64 end;
 	u64 cur = start;
 	u64 extent_offset;
-	u64 last_byte = i_size_read(inode);
 	u64 block_start;
 	u64 iosize;
 	sector_t sector;
 	struct extent_state *cached_state = NULL;
 	struct extent_map *em;
 	struct block_device *bdev;
-	int ret;
-	int nr = 0;
 	size_t pg_offset = 0;
 	size_t blocksize;
-	loff_t i_size = i_size_read(inode);
-	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
-	u64 nr_delalloc;
-	u64 delalloc_end;
-	int page_started;
-	int compressed;
-	int write_flags;
-	unsigned long nr_written = 0;
-	bool fill_delalloc = true;
+	int ret = 0;
+	int nr = 0;
+	bool compressed;
 
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		write_flags = WRITE_SYNC;
-	else
-		write_flags = WRITE;
-
-	trace___extent_writepage(page, inode, wbc);
-
-	WARN_ON(!PageLocked(page));
-
-	ClearPageError(page);
-
-	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
-	if (page->index > end_index ||
-	   (page->index == end_index && !pg_offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
-		unlock_page(page);
-		return 0;
-	}
-
-	if (page->index == end_index) {
-		char *userpage;
-
-		userpage = kmap_atomic(page);
-		memset(userpage + pg_offset, 0,
-		       PAGE_CACHE_SIZE - pg_offset);
-		kunmap_atomic(userpage);
-		flush_dcache_page(page);
-	}
-	pg_offset = 0;
-
-	set_page_extent_mapped(page);
-
-	if (!tree->ops || !tree->ops->fill_delalloc)
-		fill_delalloc = false;
-
-	delalloc_start = start;
-	delalloc_end = 0;
-	page_started = 0;
-	if (!epd->extent_locked && fill_delalloc) {
-		u64 delalloc_to_write = 0;
-		/*
-		 * make sure the wbc mapping index is at least updated
-		 * to this page.
-		 */
-		update_nr_written(page, wbc, 0);
-
-		while (delalloc_end < page_end) {
-			nr_delalloc = find_lock_delalloc_range(inode, tree,
-						       page,
-						       &delalloc_start,
-						       &delalloc_end,
-						       128 * 1024 * 1024);
-			if (nr_delalloc == 0) {
-				delalloc_start = delalloc_end + 1;
-				continue;
-			}
-			ret = tree->ops->fill_delalloc(inode, page,
-						       delalloc_start,
-						       delalloc_end,
-						       &page_started,
-						       &nr_written);
-			/* File system has been set read-only */
-			if (ret) {
-				SetPageError(page);
-				goto done;
-			}
-			/*
-			 * delalloc_end is already one less than the total
-			 * length, so we don't subtract one from
-			 * PAGE_CACHE_SIZE
-			 */
-			delalloc_to_write += (delalloc_end - delalloc_start +
-					      PAGE_CACHE_SIZE) >>
-					      PAGE_CACHE_SHIFT;
-			delalloc_start = delalloc_end + 1;
-		}
-		if (wbc->nr_to_write < delalloc_to_write) {
-			int thresh = 8192;
-
-			if (delalloc_to_write < thresh * 2)
-				thresh = delalloc_to_write;
-			wbc->nr_to_write = min_t(u64, delalloc_to_write,
-						 thresh);
-		}
-
-		/* did the fill delalloc function already unlock and start
-		 * the IO?
-		 */
-		if (page_started) {
-			ret = 0;
-			/*
-			 * we've unlocked the page, so we can't update
-			 * the mapping's writeback index, just update
-			 * nr_to_write.
-			 */
-			wbc->nr_to_write -= nr_written;
-			goto done_unlocked;
-		}
-	}
 	if (tree->ops && tree->ops->writepage_start_hook) {
 		ret = tree->ops->writepage_start_hook(page, start,
 						      page_end);
@@ -3244,9 +3234,10 @@
 				wbc->pages_skipped++;
 			else
 				redirty_page_for_writepage(wbc, page);
+
 			update_nr_written(page, wbc, nr_written);
 			unlock_page(page);
-			ret = 0;
+			ret = 1;
 			goto done_unlocked;
 		}
 	}
@@ -3258,7 +3249,7 @@
 	update_nr_written(page, wbc, nr_written + 1);
 
 	end = page_end;
-	if (last_byte <= start) {
+	if (i_size <= start) {
 		if (tree->ops && tree->ops->writepage_end_io_hook)
 			tree->ops->writepage_end_io_hook(page, start,
 							 page_end, NULL, 1);
@@ -3268,7 +3259,8 @@
 	blocksize = inode->i_sb->s_blocksize;
 
 	while (cur <= end) {
-		if (cur >= last_byte) {
+		u64 em_end;
+		if (cur >= i_size) {
 			if (tree->ops && tree->ops->writepage_end_io_hook)
 				tree->ops->writepage_end_io_hook(page, cur,
 							 page_end, NULL, 1);
@@ -3278,13 +3270,15 @@
 				     end - cur + 1, 1);
 		if (IS_ERR_OR_NULL(em)) {
 			SetPageError(page);
+			ret = PTR_ERR_OR_ZERO(em);
 			break;
 		}
 
 		extent_offset = cur - em->start;
-		BUG_ON(extent_map_end(em) <= cur);
+		em_end = extent_map_end(em);
+		BUG_ON(em_end <= cur);
 		BUG_ON(end < cur);
-		iosize = min(extent_map_end(em) - cur, end - cur + 1);
+		iosize = min(em_end - cur, end - cur + 1);
 		iosize = ALIGN(iosize, blocksize);
 		sector = (em->block_start + extent_offset) >> 9;
 		bdev = em->bdev;
@@ -3320,13 +3314,6 @@
 			pg_offset += iosize;
 			continue;
 		}
-		/* leave this out until we have a page_mkwrite call */
-		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
-				   EXTENT_DIRTY, 0, NULL)) {
-			cur = cur + iosize;
-			pg_offset += iosize;
-			continue;
-		}
 
 		if (tree->ops && tree->ops->writepage_io_hook) {
 			ret = tree->ops->writepage_io_hook(page, cur,
@@ -3337,7 +3324,7 @@
 		if (ret) {
 			SetPageError(page);
 		} else {
-			unsigned long max_nr = end_index + 1;
+			unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
 
 			set_range_writeback(tree, cur, cur + iosize - 1);
 			if (!PageWriteback(page)) {
@@ -3359,17 +3346,94 @@
 		nr++;
 	}
 done:
-	if (nr == 0) {
-		/* make sure the mapping tag for page dirty gets cleared */
-		set_page_writeback(page);
-		end_page_writeback(page);
-	}
-	unlock_page(page);
+	*nr_ret = nr;
 
 done_unlocked:
 
 	/* drop our reference on any cached states */
 	free_extent_state(cached_state);
+	return ret;
+}
+
+/*
+ * the writepage semantics are similar to regular writepage.  extent
+ * records are inserted to lock ranges in the tree, and as dirty areas
+ * are found, they are marked writeback.  Then the lock bits are removed
+ * and the end_io handler clears the writeback ranges
+ */
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+			      void *data)
+{
+	struct inode *inode = page->mapping->host;
+	struct extent_page_data *epd = data;
+	u64 start = page_offset(page);
+	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	int ret;
+	int nr = 0;
+	size_t pg_offset = 0;
+	loff_t i_size = i_size_read(inode);
+	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+	int write_flags;
+	unsigned long nr_written = 0;
+
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		write_flags = WRITE_SYNC;
+	else
+		write_flags = WRITE;
+
+	trace___extent_writepage(page, inode, wbc);
+
+	WARN_ON(!PageLocked(page));
+
+	ClearPageError(page);
+
+	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+	if (page->index > end_index ||
+	   (page->index == end_index && !pg_offset)) {
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		unlock_page(page);
+		return 0;
+	}
+
+	if (page->index == end_index) {
+		char *userpage;
+
+		userpage = kmap_atomic(page);
+		memset(userpage + pg_offset, 0,
+		       PAGE_CACHE_SIZE - pg_offset);
+		kunmap_atomic(userpage);
+		flush_dcache_page(page);
+	}
+
+	pg_offset = 0;
+
+	set_page_extent_mapped(page);
+
+	ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
+	if (ret == 1)
+		goto done_unlocked;
+	if (ret)
+		goto done;
+
+	ret = __extent_writepage_io(inode, page, wbc, epd,
+				    i_size, nr_written, write_flags, &nr);
+	if (ret == 1)
+		goto done_unlocked;
+
+done:
+	if (nr == 0) {
+		/* make sure the mapping tag for page dirty gets cleared */
+		set_page_writeback(page);
+		end_page_writeback(page);
+	}
+	if (PageError(page)) {
+		ret = ret < 0 ? ret : -EIO;
+		end_extent_writepage(page, ret, start, page_end);
+	}
+	unlock_page(page);
+	return ret;
+
+done_unlocked:
 	return 0;
 }
 
@@ -3385,9 +3449,10 @@
 		    TASK_UNINTERRUPTIBLE);
 }
 
-static int lock_extent_buffer_for_io(struct extent_buffer *eb,
-				     struct btrfs_fs_info *fs_info,
-				     struct extent_page_data *epd)
+static noinline_for_stack int
+lock_extent_buffer_for_io(struct extent_buffer *eb,
+			  struct btrfs_fs_info *fs_info,
+			  struct extent_page_data *epd)
 {
 	unsigned long i, num_pages;
 	int flush = 0;
@@ -3492,7 +3557,7 @@
 	bio_put(bio);
 }
 
-static int write_one_eb(struct extent_buffer *eb,
+static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 			struct btrfs_fs_info *fs_info,
 			struct writeback_control *wbc,
 			struct extent_page_data *epd)
@@ -3690,6 +3755,7 @@
 	struct inode *inode = mapping->host;
 	int ret = 0;
 	int done = 0;
+	int err = 0;
 	int nr_to_write_done = 0;
 	struct pagevec pvec;
 	int nr_pages;
@@ -3776,8 +3842,8 @@
 				unlock_page(page);
 				ret = 0;
 			}
-			if (ret)
-				done = 1;
+			if (!err && ret < 0)
+				err = ret;
 
 			/*
 			 * the filesystem may choose to bump up nr_to_write.
@@ -3789,7 +3855,7 @@
 		pagevec_release(&pvec);
 		cond_resched();
 	}
-	if (!scanned && !done) {
+	if (!scanned && !done && !err) {
 		/*
 		 * We hit the last page and there is more work to be done: wrap
 		 * back to the start of the file
@@ -3799,7 +3865,7 @@
 		goto retry;
 	}
 	btrfs_add_delayed_iput(inode);
-	return ret;
+	return err;
 }
 
 static void flush_epd_write_bio(struct extent_page_data *epd)
@@ -4543,6 +4609,53 @@
 	return NULL;
 }
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
+					       u64 start, unsigned long len)
+{
+	struct extent_buffer *eb, *exists = NULL;
+	int ret;
+
+	eb = find_extent_buffer(fs_info, start);
+	if (eb)
+		return eb;
+	eb = alloc_dummy_extent_buffer(start, len);
+	if (!eb)
+		return NULL;
+	eb->fs_info = fs_info;
+again:
+	ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+	if (ret)
+		goto free_eb;
+	spin_lock(&fs_info->buffer_lock);
+	ret = radix_tree_insert(&fs_info->buffer_radix,
+				start >> PAGE_CACHE_SHIFT, eb);
+	spin_unlock(&fs_info->buffer_lock);
+	radix_tree_preload_end();
+	if (ret == -EEXIST) {
+		exists = find_extent_buffer(fs_info, start);
+		if (exists)
+			goto free_eb;
+		else
+			goto again;
+	}
+	check_buffer_tree_ref(eb);
+	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
+
+	/*
+	 * We will free dummy extent buffer's if they come into
+	 * free_extent_buffer with a ref count of 2, but if we are using this we
+	 * want the buffers to stay in memory until we're done with them, so
+	 * bump the ref count again.
+	 */
+	atomic_inc(&eb->refs);
+	return eb;
+free_eb:
+	btrfs_release_extent_buffer(eb);
+	return exists;
+}
+#endif
+
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 					  u64 start, unsigned long len)
 {
@@ -4955,6 +5068,43 @@
 	}
 }
 
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
+			unsigned long start,
+			unsigned long len)
+{
+	size_t cur;
+	size_t offset;
+	struct page *page;
+	char *kaddr;
+	char __user *dst = (char __user *)dstv;
+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	int ret = 0;
+
+	WARN_ON(start > eb->len);
+	WARN_ON(start + len > eb->start + eb->len);
+
+	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+
+	while (len > 0) {
+		page = extent_buffer_page(eb, i);
+
+		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		kaddr = page_address(page);
+		if (copy_to_user(dst, kaddr + offset, cur)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		dst += cur;
+		len -= cur;
+		offset = 0;
+		i++;
+	}
+
+	return ret;
+}
+
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **map,
 			       unsigned long *map_start,

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c488b45..15ce5f2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h

@@ -304,6 +304,9 @@
 void read_extent_buffer(struct extent_buffer *eb, void *dst,
 			unsigned long start,
 			unsigned long len);
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
+			       unsigned long start,
+			       unsigned long len);
 void write_extent_buffer(struct extent_buffer *eb, const void *src,
 			 unsigned long start, unsigned long len);
 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
@@ -350,5 +353,7 @@
 				      struct extent_io_tree *tree,
 				      struct page *locked_page, u64 *start,
 				      u64 *end, u64 max_bytes);
+struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
+					       u64 start, unsigned long len);
 #endif
 #endif

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 127555b..f46cfe4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c

@@ -281,10 +281,10 @@
 found:
 		csum += count * csum_size;
 		nblocks -= count;
+		bio_index += count;
 		while (count--) {
 			disk_bytenr += bvec->bv_len;
 			offset += bvec->bv_len;
-			bio_index++;
 			bvec++;
 		}
 	}
@@ -750,7 +750,7 @@
 		int slot = path->slots[0] + 1;
 		/* we didn't find a csum item, insert one */
 		nritems = btrfs_header_nritems(path->nodes[0]);
-		if (path->slots[0] >= nritems - 1) {
+		if (!nritems || (path->slots[0] >= nritems - 1)) {
 			ret = btrfs_next_leaf(root, path);
 			if (ret == 1)
 				found_next = 1;
@@ -885,3 +885,79 @@
 fail_unlock:
 	goto out;
 }
+
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+				     const struct btrfs_path *path,
+				     struct btrfs_file_extent_item *fi,
+				     const bool new_inline,
+				     struct extent_map *em)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_buffer *leaf = path->nodes[0];
+	const int slot = path->slots[0];
+	struct btrfs_key key;
+	u64 extent_start, extent_end;
+	u64 bytenr;
+	u8 type = btrfs_file_extent_type(leaf, fi);
+	int compress_type = btrfs_file_extent_compression(leaf, fi);
+
+	em->bdev = root->fs_info->fs_devices->latest_bdev;
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	extent_start = key.offset;
+
+	if (type == BTRFS_FILE_EXTENT_REG ||
+	    type == BTRFS_FILE_EXTENT_PREALLOC) {
+		extent_end = extent_start +
+			btrfs_file_extent_num_bytes(leaf, fi);
+	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+		size_t size;
+		size = btrfs_file_extent_inline_len(leaf, slot, fi);
+		extent_end = ALIGN(extent_start + size, root->sectorsize);
+	}
+
+	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+	if (type == BTRFS_FILE_EXTENT_REG ||
+	    type == BTRFS_FILE_EXTENT_PREALLOC) {
+		em->start = extent_start;
+		em->len = extent_end - extent_start;
+		em->orig_start = extent_start -
+			btrfs_file_extent_offset(leaf, fi);
+		em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		if (bytenr == 0) {
+			em->block_start = EXTENT_MAP_HOLE;
+			return;
+		}
+		if (compress_type != BTRFS_COMPRESS_NONE) {
+			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+			em->compress_type = compress_type;
+			em->block_start = bytenr;
+			em->block_len = em->orig_block_len;
+		} else {
+			bytenr += btrfs_file_extent_offset(leaf, fi);
+			em->block_start = bytenr;
+			em->block_len = em->len;
+			if (type == BTRFS_FILE_EXTENT_PREALLOC)
+				set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+		}
+	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+		em->block_start = EXTENT_MAP_INLINE;
+		em->start = extent_start;
+		em->len = extent_end - extent_start;
+		/*
+		 * Initialize orig_start and block_len with the same values
+		 * as in inode.c:btrfs_get_extent().
+		 */
+		em->orig_start = EXTENT_MAP_HOLE;
+		em->block_len = (u64)-1;
+		if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
+			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+			em->compress_type = compress_type;
+		}
+	} else {
+		btrfs_err(root->fs_info,
+			  "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
+			  type, btrfs_ino(inode), extent_start,
+			  root->root_key.objectid);
+	}
+}

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 74272a3..1f2b99c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -40,6 +40,7 @@
 #include "tree-log.h"
 #include "locking.h"
 #include "volumes.h"
+#include "qgroup.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -447,7 +448,7 @@
 		write_bytes -= copied;
 		total_copied += copied;
 
-		/* Return to btrfs_file_aio_write to fault page */
+		/* Return to btrfs_file_write_iter to fault page */
 		if (unlikely(copied == 0))
 			break;
 
@@ -715,7 +716,7 @@
 	int recow;
 	int ret;
 	int modify_tree = -1;
-	int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
+	int update_refs;
 	int found = 0;
 	int leafs_visited = 0;
 
@@ -725,6 +726,8 @@
 	if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
 		modify_tree = 0;
 
+	update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+		       root == root->fs_info->tree_root);
 	while (1) {
 		recow = 0;
 		ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -781,6 +784,18 @@
 			extent_end = search_start;
 		}
 
+		/*
+		 * Don't skip extent items representing 0 byte lengths. They
+		 * used to be created (bug) if while punching holes we hit
+		 * -ENOSPC condition. So if we find one here, just ensure we
+		 * delete it, otherwise we would insert a new file extent item
+		 * with the same key (offset) as that 0 bytes length file
+		 * extent item in the call to setup_items_for_insert() later
+		 * in this function.
+		 */
+		if (extent_end == key.offset && extent_end >= search_start)
+			goto delete_extent_item;
+
 		if (extent_end <= search_start) {
 			path->slots[0]++;
 			goto next_slot;
@@ -836,7 +851,7 @@
 						disk_bytenr, num_bytes, 0,
 						root->root_key.objectid,
 						new_key.objectid,
-						start - extent_offset, 0);
+						start - extent_offset, 1);
 				BUG_ON(ret); /* -ENOMEM */
 			}
 			key.offset = start;
@@ -894,6 +909,7 @@
 		 *    | ------ extent ------ |
 		 */
 		if (start <= key.offset && end >= extent_end) {
+delete_extent_item:
 			if (del_nr == 0) {
 				del_slot = path->slots[0];
 				del_nr = 1;
@@ -1192,7 +1208,7 @@
 
 		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
 					   root->root_key.objectid,
-					   ino, orig_offset, 0);
+					   ino, orig_offset, 1);
 		BUG_ON(ret); /* -ENOMEM */
 
 		if (split == start) {
@@ -1659,27 +1675,22 @@
 }
 
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
-				    const struct iovec *iov,
-				    unsigned long nr_segs, loff_t pos,
-				    size_t count, size_t ocount)
+				    struct iov_iter *from,
+				    loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct iov_iter i;
 	ssize_t written;
 	ssize_t written_buffered;
 	loff_t endbyte;
 	int err;
 
-	written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-					    count, ocount);
+	written = generic_file_direct_write(iocb, from, pos);
 
-	if (written < 0 || written == count)
+	if (written < 0 || !iov_iter_count(from))
 		return written;
 
 	pos += written;
-	count -= written;
-	iov_iter_init(&i, iov, nr_segs, count, written);
-	written_buffered = __btrfs_buffered_write(file, &i, pos);
+	written_buffered = __btrfs_buffered_write(file, from, pos);
 	if (written_buffered < 0) {
 		err = written_buffered;
 		goto out;
@@ -1714,9 +1725,8 @@
 		inode_inc_iversion(inode);
 }
 
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
-				    const struct iovec *iov,
-				    unsigned long nr_segs, loff_t pos)
+static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
+				    struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
@@ -1725,18 +1735,12 @@
 	u64 end_pos;
 	ssize_t num_written = 0;
 	ssize_t err = 0;
-	size_t count, ocount;
+	size_t count = iov_iter_count(from);
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
+	loff_t pos = iocb->ki_pos;
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err) {
-		mutex_unlock(&inode->i_mutex);
-		goto out;
-	}
-	count = ocount;
-
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err) {
@@ -1749,6 +1753,8 @@
 		goto out;
 	}
 
+	iov_iter_truncate(from, count);
+
 	err = file_remove_suid(file);
 	if (err) {
 		mutex_unlock(&inode->i_mutex);
@@ -1790,14 +1796,9 @@
 		atomic_inc(&BTRFS_I(inode)->sync_writers);
 
 	if (unlikely(file->f_flags & O_DIRECT)) {
-		num_written = __btrfs_direct_write(iocb, iov, nr_segs,
-						   pos, count, ocount);
+		num_written = __btrfs_direct_write(iocb, from, pos);
 	} else {
-		struct iov_iter i;
-
-		iov_iter_init(&i, iov, nr_segs, count, num_written);
-
-		num_written = __btrfs_buffered_write(file, &i, pos);
+		num_written = __btrfs_buffered_write(file, from, pos);
 		if (num_written > 0)
 			iocb->ki_pos = pos + num_written;
 	}
@@ -2010,8 +2011,10 @@
 		if (!full_sync) {
 			ret = btrfs_wait_ordered_range(inode, start,
 						       end - start + 1);
-			if (ret)
+			if (ret) {
+				btrfs_end_transaction(trans, root);
 				goto out;
+			}
 		}
 		ret = btrfs_commit_transaction(trans, root);
 	} else {
@@ -2169,6 +2172,37 @@
 	return 0;
 }
 
+/*
+ * Find a hole extent on given inode and change start/len to the end of hole
+ * extent.(hole/vacuum extent whose em->start <= start &&
+ *	   em->start + em->len > start)
+ * When a hole extent is found, return 1 and modify start/len.
+ */
+static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
+{
+	struct extent_map *em;
+	int ret = 0;
+
+	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+	if (IS_ERR_OR_NULL(em)) {
+		if (!em)
+			ret = -ENOMEM;
+		else
+			ret = PTR_ERR(em);
+		return ret;
+	}
+
+	/* Hole or vacuum extent(only exists in no-hole mode) */
+	if (em->block_start == EXTENT_MAP_HOLE) {
+		ret = 1;
+		*len = em->start + em->len > *start + *len ?
+		       0 : *start + *len - em->start - em->len;
+		*start = em->start + em->len;
+	}
+	free_extent_map(em);
+	return ret;
+}
+
 static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2176,25 +2210,42 @@
 	struct btrfs_path *path;
 	struct btrfs_block_rsv *rsv;
 	struct btrfs_trans_handle *trans;
-	u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
-	u64 lockend = round_down(offset + len,
-				 BTRFS_I(inode)->root->sectorsize) - 1;
-	u64 cur_offset = lockstart;
+	u64 lockstart;
+	u64 lockend;
+	u64 tail_start;
+	u64 tail_len;
+	u64 orig_start = offset;
+	u64 cur_offset;
 	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 	u64 drop_end;
 	int ret = 0;
 	int err = 0;
 	int rsv_count;
-	bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-			  ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+	bool same_page;
 	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
-	u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+	u64 ino_size;
 
 	ret = btrfs_wait_ordered_range(inode, offset, len);
 	if (ret)
 		return ret;
 
 	mutex_lock(&inode->i_mutex);
+	ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+	ret = find_first_non_hole(inode, &offset, &len);
+	if (ret < 0)
+		goto out_only_mutex;
+	if (ret && !len) {
+		/* Already in a large hole */
+		ret = 0;
+		goto out_only_mutex;
+	}
+
+	lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
+	lockend = round_down(offset + len,
+			     BTRFS_I(inode)->root->sectorsize) - 1;
+	same_page = ((offset >> PAGE_CACHE_SHIFT) ==
+		    ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+
 	/*
 	 * We needn't truncate any page which is beyond the end of the file
 	 * because we are sure there is no data there.
@@ -2206,8 +2257,7 @@
 	if (same_page && len < PAGE_CACHE_SIZE) {
 		if (offset < ino_size)
 			ret = btrfs_truncate_page(inode, offset, len, 0);
-		mutex_unlock(&inode->i_mutex);
-		return ret;
+		goto out_only_mutex;
 	}
 
 	/* zero back part of the first page */
@@ -2219,12 +2269,39 @@
 		}
 	}
 
-	/* zero the front end of the last page */
-	if (offset + len < ino_size) {
-		ret = btrfs_truncate_page(inode, offset + len, 0, 1);
-		if (ret) {
-			mutex_unlock(&inode->i_mutex);
-			return ret;
+	/* Check the aligned pages after the first unaligned page,
+	 * if offset != orig_start, which means the first unaligned page
+	 * including serveral following pages are already in holes,
+	 * the extra check can be skipped */
+	if (offset == orig_start) {
+		/* after truncate page, check hole again */
+		len = offset + len - lockstart;
+		offset = lockstart;
+		ret = find_first_non_hole(inode, &offset, &len);
+		if (ret < 0)
+			goto out_only_mutex;
+		if (ret && !len) {
+			ret = 0;
+			goto out_only_mutex;
+		}
+		lockstart = offset;
+	}
+
+	/* Check the tail unaligned part is in a hole */
+	tail_start = lockend + 1;
+	tail_len = offset + len - tail_start;
+	if (tail_len) {
+		ret = find_first_non_hole(inode, &tail_start, &tail_len);
+		if (unlikely(ret < 0))
+			goto out_only_mutex;
+		if (!ret) {
+			/* zero the front end of the last page */
+			if (tail_start + tail_len < ino_size) {
+				ret = btrfs_truncate_page(inode,
+						tail_start + tail_len, 0, 1);
+				if (ret)
+					goto out_only_mutex;
+				}
 		}
 	}
 
@@ -2250,9 +2327,7 @@
 		if ((!ordered ||
 		    (ordered->file_offset + ordered->len <= lockstart ||
 		     ordered->file_offset > lockend)) &&
-		     !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
-				     lockend, EXTENT_UPTODATE, 0,
-				     cached_state)) {
+		     !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
 			if (ordered)
 				btrfs_put_ordered_extent(ordered);
 			break;
@@ -2300,6 +2375,8 @@
 	BUG_ON(ret);
 	trans->block_rsv = rsv;
 
+	cur_offset = lockstart;
+	len = lockend - cur_offset;
 	while (cur_offset < lockend) {
 		ret = __btrfs_drop_extents(trans, root, inode, path,
 					   cur_offset, lockend + 1,
@@ -2340,6 +2417,14 @@
 					      rsv, min_size);
 		BUG_ON(ret);	/* shouldn't happen */
 		trans->block_rsv = rsv;
+
+		ret = find_first_non_hole(inode, &cur_offset, &len);
+		if (unlikely(ret < 0))
+			break;
+		if (ret && !len) {
+			ret = 0;
+			break;
+		}
 	}
 
 	if (ret) {
@@ -2348,7 +2433,12 @@
 	}
 
 	trans->block_rsv = &root->fs_info->trans_block_rsv;
-	if (cur_offset < ino_size) {
+	/*
+	 * Don't insert file hole extent item if it's for a range beyond eof
+	 * (because it's useless) or if it represents a 0 bytes range (when
+	 * cur_offset == drop_end).
+	 */
+	if (cur_offset < ino_size && cur_offset < drop_end) {
 		ret = fill_holes(trans, inode, path, cur_offset, drop_end);
 		if (ret) {
 			err = ret;
@@ -2373,6 +2463,7 @@
 out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 			     &cached_state, GFP_NOFS);
+out_only_mutex:
 	mutex_unlock(&inode->i_mutex);
 	if (ret && !err)
 		err = ret;
@@ -2634,11 +2725,11 @@
 
 const struct file_operations btrfs_file_operations = {
 	.llseek		= btrfs_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read       = generic_file_aio_read,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter      = generic_file_read_iter,
 	.splice_read	= generic_file_splice_read,
-	.aio_write	= btrfs_file_aio_write,
+	.write_iter	= btrfs_file_write_iter,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
 	.release	= btrfs_release_file,

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 73f3de7..372b05f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c

@@ -831,7 +831,7 @@
 
 	if (!matched) {
 		__btrfs_remove_free_space_cache(ctl);
-		btrfs_err(fs_info, "block group %llu has wrong amount of free space",
+		btrfs_warn(fs_info, "block group %llu has wrong amount of free space",
 			block_group->key.objectid);
 		ret = -1;
 	}
@@ -843,7 +843,7 @@
 		spin_unlock(&block_group->lock);
 		ret = 0;
 
-		btrfs_err(fs_info, "failed to load free space cache for block group %llu",
+		btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now",
 			block_group->key.objectid);
 	}
 
@@ -851,90 +851,44 @@
 	return ret;
 }
 
-/**
- * __btrfs_write_out_cache - write out cached info to an inode
- * @root - the root the inode belongs to
- * @ctl - the free space cache we are going to write out
- * @block_group - the block_group for this cache if it belongs to a block_group
- * @trans - the trans handle
- * @path - the path to use
- * @offset - the offset for the key we'll insert
- *
- * This function writes out a free space cache struct to disk for quick recovery
- * on mount.  This will return 0 if it was successfull in writing the cache out,
- * and -1 if it was not.
- */
-static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
-				   struct btrfs_free_space_ctl *ctl,
-				   struct btrfs_block_group_cache *block_group,
-				   struct btrfs_trans_handle *trans,
-				   struct btrfs_path *path, u64 offset)
+static noinline_for_stack
+int write_cache_extent_entries(struct io_ctl *io_ctl,
+			      struct btrfs_free_space_ctl *ctl,
+			      struct btrfs_block_group_cache *block_group,
+			      int *entries, int *bitmaps,
+			      struct list_head *bitmap_list)
 {
-	struct btrfs_free_space_header *header;
-	struct extent_buffer *leaf;
-	struct rb_node *node;
-	struct list_head *pos, *n;
-	struct extent_state *cached_state = NULL;
-	struct btrfs_free_cluster *cluster = NULL;
-	struct extent_io_tree *unpin = NULL;
-	struct io_ctl io_ctl;
-	struct list_head bitmap_list;
-	struct btrfs_key key;
-	u64 start, extent_start, extent_end, len;
-	int entries = 0;
-	int bitmaps = 0;
 	int ret;
-	int err = -1;
-
-	INIT_LIST_HEAD(&bitmap_list);
-
-	if (!i_size_read(inode))
-		return -1;
-
-	ret = io_ctl_init(&io_ctl, inode, root);
-	if (ret)
-		return -1;
+	struct btrfs_free_cluster *cluster = NULL;
+	struct rb_node *node = rb_first(&ctl->free_space_offset);
 
 	/* Get the cluster for this block_group if it exists */
-	if (block_group && !list_empty(&block_group->cluster_list))
+	if (block_group && !list_empty(&block_group->cluster_list)) {
 		cluster = list_entry(block_group->cluster_list.next,
 				     struct btrfs_free_cluster,
 				     block_group_list);
+	}
 
-	/* Lock all pages first so we can lock the extent safely. */
-	io_ctl_prepare_pages(&io_ctl, inode, 0);
-
-	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
-			 0, &cached_state);
-
-	node = rb_first(&ctl->free_space_offset);
 	if (!node && cluster) {
 		node = rb_first(&cluster->root);
 		cluster = NULL;
 	}
 
-	/* Make sure we can fit our crcs into the first page */
-	if (io_ctl.check_crcs &&
-	    (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
-		goto out_nospc;
-
-	io_ctl_set_generation(&io_ctl, trans->transid);
-
 	/* Write out the extent entries */
 	while (node) {
 		struct btrfs_free_space *e;
 
 		e = rb_entry(node, struct btrfs_free_space, offset_index);
-		entries++;
+		*entries += 1;
 
-		ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
+		ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
 				       e->bitmap);
 		if (ret)
-			goto out_nospc;
+			goto fail;
 
 		if (e->bitmap) {
-			list_add_tail(&e->list, &bitmap_list);
-			bitmaps++;
+			list_add_tail(&e->list, bitmap_list);
+			*bitmaps += 1;
 		}
 		node = rb_next(node);
 		if (!node && cluster) {
@@ -942,13 +896,84 @@
 			cluster = NULL;
 		}
 	}
+	return 0;
+fail:
+	return -ENOSPC;
+}
+
+static noinline_for_stack int
+update_cache_item(struct btrfs_trans_handle *trans,
+		  struct btrfs_root *root,
+		  struct inode *inode,
+		  struct btrfs_path *path, u64 offset,
+		  int entries, int bitmaps)
+{
+	struct btrfs_key key;
+	struct btrfs_free_space_header *header;
+	struct extent_buffer *leaf;
+	int ret;
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = offset;
+	key.type = 0;
+
+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	if (ret < 0) {
+		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
+				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
+				 GFP_NOFS);
+		goto fail;
+	}
+	leaf = path->nodes[0];
+	if (ret > 0) {
+		struct btrfs_key found_key;
+		ASSERT(path->slots[0]);
+		path->slots[0]--;
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+		    found_key.offset != offset) {
+			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
+					 inode->i_size - 1,
+					 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+					 NULL, GFP_NOFS);
+			btrfs_release_path(path);
+			goto fail;
+		}
+	}
+
+	BTRFS_I(inode)->generation = trans->transid;
+	header = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_free_space_header);
+	btrfs_set_free_space_entries(leaf, header, entries);
+	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+	btrfs_set_free_space_generation(leaf, header, trans->transid);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(path);
+
+	return 0;
+
+fail:
+	return -1;
+}
+
+static noinline_for_stack int
+add_ioctl_entries(struct btrfs_root *root,
+		  struct inode *inode,
+		  struct btrfs_block_group_cache *block_group,
+		  struct io_ctl *io_ctl,
+		  struct extent_state **cached_state,
+		  struct list_head *bitmap_list,
+		  int *entries)
+{
+	u64 start, extent_start, extent_end, len;
+	struct list_head *pos, *n;
+	struct extent_io_tree *unpin = NULL;
+	int ret;
 
 	/*
 	 * We want to add any pinned extents to our free space cache
 	 * so we don't leak the space
-	 */
-
-	/*
+	 *
 	 * We shouldn't have switched the pinned extents yet so this is the
 	 * right one
 	 */
@@ -977,8 +1002,8 @@
 				 block_group->key.offset, extent_end + 1);
 		len = extent_end - extent_start;
 
-		entries++;
-		ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
+		*entries += 1;
+		ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
 		if (ret)
 			goto out_nospc;
 
@@ -986,74 +1011,129 @@
 	}
 
 	/* Write out the bitmaps */
-	list_for_each_safe(pos, n, &bitmap_list) {
+	list_for_each_safe(pos, n, bitmap_list) {
 		struct btrfs_free_space *entry =
 			list_entry(pos, struct btrfs_free_space, list);
 
-		ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
+		ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
 		if (ret)
 			goto out_nospc;
 		list_del_init(&entry->list);
 	}
 
 	/* Zero out the rest of the pages just to make sure */
-	io_ctl_zero_remaining_pages(&io_ctl);
+	io_ctl_zero_remaining_pages(io_ctl);
 
-	ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
-				0, i_size_read(inode), &cached_state);
-	io_ctl_drop_pages(&io_ctl);
+	ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
+				0, i_size_read(inode), cached_state);
+	io_ctl_drop_pages(io_ctl);
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
-			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+			     i_size_read(inode) - 1, cached_state, GFP_NOFS);
 
 	if (ret)
-		goto out;
+		goto fail;
 
 	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
 	if (ret) {
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
 				 GFP_NOFS);
+		goto fail;
+	}
+	return 0;
+
+fail:
+	return -1;
+
+out_nospc:
+	return -ENOSPC;
+}
+
+static void noinline_for_stack
+cleanup_write_cache_enospc(struct inode *inode,
+			   struct io_ctl *io_ctl,
+			   struct extent_state **cached_state,
+			   struct list_head *bitmap_list)
+{
+	struct list_head *pos, *n;
+	list_for_each_safe(pos, n, bitmap_list) {
+		struct btrfs_free_space *entry =
+			list_entry(pos, struct btrfs_free_space, list);
+		list_del_init(&entry->list);
+	}
+	io_ctl_drop_pages(io_ctl);
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+			     i_size_read(inode) - 1, cached_state,
+			     GFP_NOFS);
+}
+
+/**
+ * __btrfs_write_out_cache - write out cached info to an inode
+ * @root - the root the inode belongs to
+ * @ctl - the free space cache we are going to write out
+ * @block_group - the block_group for this cache if it belongs to a block_group
+ * @trans - the trans handle
+ * @path - the path to use
+ * @offset - the offset for the key we'll insert
+ *
+ * This function writes out a free space cache struct to disk for quick recovery
+ * on mount.  This will return 0 if it was successfull in writing the cache out,
+ * and -1 if it was not.
+ */
+static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
+				   struct btrfs_free_space_ctl *ctl,
+				   struct btrfs_block_group_cache *block_group,
+				   struct btrfs_trans_handle *trans,
+				   struct btrfs_path *path, u64 offset)
+{
+	struct extent_state *cached_state = NULL;
+	struct io_ctl io_ctl;
+	struct list_head bitmap_list;
+	int entries = 0;
+	int bitmaps = 0;
+	int ret;
+	int err = -1;
+
+	INIT_LIST_HEAD(&bitmap_list);
+
+	if (!i_size_read(inode))
+		return -1;
+
+	ret = io_ctl_init(&io_ctl, inode, root);
+	if (ret)
+		return -1;
+
+	/* Lock all pages first so we can lock the extent safely. */
+	io_ctl_prepare_pages(&io_ctl, inode, 0);
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+			 0, &cached_state);
+
+
+	/* Make sure we can fit our crcs into the first page */
+	if (io_ctl.check_crcs &&
+	    (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
+		goto out_nospc;
+
+	io_ctl_set_generation(&io_ctl, trans->transid);
+
+	ret = write_cache_extent_entries(&io_ctl, ctl,
+					 block_group, &entries, &bitmaps,
+					 &bitmap_list);
+	if (ret)
+		goto out_nospc;
+
+	ret = add_ioctl_entries(root, inode, block_group, &io_ctl,
+				&cached_state, &bitmap_list, &entries);
+
+	if (ret == -ENOSPC)
+		goto out_nospc;
+	else if (ret)
 		goto out;
-	}
 
-	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
-	key.offset = offset;
-	key.type = 0;
+	err = update_cache_item(trans, root, inode, path, offset,
+				entries, bitmaps);
 
-	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
-	if (ret < 0) {
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
-				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
-				 GFP_NOFS);
-		goto out;
-	}
-	leaf = path->nodes[0];
-	if (ret > 0) {
-		struct btrfs_key found_key;
-		ASSERT(path->slots[0]);
-		path->slots[0]--;
-		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
-		    found_key.offset != offset) {
-			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
-					 inode->i_size - 1,
-					 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
-					 NULL, GFP_NOFS);
-			btrfs_release_path(path);
-			goto out;
-		}
-	}
-
-	BTRFS_I(inode)->generation = trans->transid;
-	header = btrfs_item_ptr(leaf, path->slots[0],
-				struct btrfs_free_space_header);
-	btrfs_set_free_space_entries(leaf, header, entries);
-	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
-	btrfs_set_free_space_generation(leaf, header, trans->transid);
-	btrfs_mark_buffer_dirty(leaf);
-	btrfs_release_path(path);
-
-	err = 0;
 out:
 	io_ctl_free(&io_ctl);
 	if (err) {
@@ -1064,14 +1144,8 @@
 	return err;
 
 out_nospc:
-	list_for_each_safe(pos, n, &bitmap_list) {
-		struct btrfs_free_space *entry =
-			list_entry(pos, struct btrfs_free_space, list);
-		list_del_init(&entry->list);
-	}
-	io_ctl_drop_pages(&io_ctl);
-	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
-			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+	cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
 	goto out;
 }
 

diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 86935f5..888fbe1 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c

@@ -174,7 +174,7 @@
 				       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
 	}
 
-	tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
+	tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
 			  root->root_key.objectid);
 	if (IS_ERR(tsk)) {
 		btrfs_warn(root->fs_info, "failed to start inode caching task");

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5a3b837..8925f66 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -125,7 +125,7 @@
  * the btree.  The caller should have done a btrfs_drop_extents so that
  * no overlapping inline items exist in the btree
  */
-static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+static int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_path *path, int extent_inserted,
 				struct btrfs_root *root, struct inode *inode,
 				u64 start, size_t size, size_t compressed_size,
@@ -2678,6 +2678,7 @@
 		trans = NULL;
 		goto out_unlock;
 	}
+
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
 	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -2947,14 +2948,15 @@
 	root->orphan_block_rsv = NULL;
 	spin_unlock(&root->orphan_lock);
 
-	if (root->orphan_item_inserted &&
+	if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
 	    btrfs_root_refs(&root->root_item) > 0) {
 		ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
 					    root->root_key.objectid);
 		if (ret)
 			btrfs_abort_transaction(trans, root, ret);
 		else
-			root->orphan_item_inserted = 0;
+			clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+				  &root->state);
 	}
 
 	if (block_rsv) {
@@ -3271,7 +3273,8 @@
 		btrfs_block_rsv_release(root, root->orphan_block_rsv,
 					(u64)-1);
 
-	if (root->orphan_block_rsv || root->orphan_item_inserted) {
+	if (root->orphan_block_rsv ||
+	    test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
 		trans = btrfs_join_transaction(root);
 		if (!IS_ERR(trans))
 			btrfs_end_transaction(trans, root);
@@ -3473,7 +3476,7 @@
 		ret = btrfs_load_inode_props(inode, path);
 		if (ret)
 			btrfs_err(root->fs_info,
-				  "error loading props for ino %llu (root %llu): %d\n",
+				  "error loading props for ino %llu (root %llu): %d",
 				  btrfs_ino(inode),
 				  root->root_key.objectid, ret);
 	}
@@ -3998,7 +4001,8 @@
 	 * not block aligned since we will be keeping the last block of the
 	 * extent just the way it is.
 	 */
-	if (root->ref_cows || root == root->fs_info->tree_root)
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+	    root == root->fs_info->tree_root)
 		btrfs_drop_extent_cache(inode, ALIGN(new_size,
 					root->sectorsize), (u64)-1, 0);
 
@@ -4091,7 +4095,9 @@
 							 extent_num_bytes);
 				num_dec = (orig_num_bytes -
 					   extent_num_bytes);
-				if (root->ref_cows && extent_start != 0)
+				if (test_bit(BTRFS_ROOT_REF_COWS,
+					     &root->state) &&
+				    extent_start != 0)
 					inode_sub_bytes(inode, num_dec);
 				btrfs_mark_buffer_dirty(leaf);
 			} else {
@@ -4105,7 +4111,8 @@
 				num_dec = btrfs_file_extent_num_bytes(leaf, fi);
 				if (extent_start != 0) {
 					found_extent = 1;
-					if (root->ref_cows)
+					if (test_bit(BTRFS_ROOT_REF_COWS,
+						     &root->state))
 						inode_sub_bytes(inode, num_dec);
 				}
 			}
@@ -4120,10 +4127,9 @@
 			    btrfs_file_extent_other_encoding(leaf, fi) == 0) {
 				u32 size = new_size - found_key.offset;
 
-				if (root->ref_cows) {
+				if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 					inode_sub_bytes(inode, item_end + 1 -
 							new_size);
-				}
 
 				/*
 				 * update the ram bytes to properly reflect
@@ -4133,7 +4139,8 @@
 				size =
 				    btrfs_file_extent_calc_inline_size(size);
 				btrfs_truncate_item(root, path, size, 1);
-			} else if (root->ref_cows) {
+			} else if (test_bit(BTRFS_ROOT_REF_COWS,
+					    &root->state)) {
 				inode_sub_bytes(inode, item_end + 1 -
 						found_key.offset);
 			}
@@ -4155,8 +4162,9 @@
 		} else {
 			break;
 		}
-		if (found_extent && (root->ref_cows ||
-				     root == root->fs_info->tree_root)) {
+		if (found_extent &&
+		    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+		     root == root->fs_info->tree_root)) {
 			btrfs_set_path_blocking(path);
 			ret = btrfs_free_extent(trans, root, extent_start,
 						extent_num_bytes, 0,
@@ -5168,8 +5176,7 @@
 
 static void btrfs_dentry_release(struct dentry *dentry)
 {
-	if (dentry->d_fsdata)
-		kfree(dentry->d_fsdata);
+	kfree(dentry->d_fsdata);
 }
 
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -5553,6 +5560,7 @@
 	struct btrfs_inode_ref *ref;
 	struct btrfs_key key[2];
 	u32 sizes[2];
+	int nitems = name ? 2 : 1;
 	unsigned long ptr;
 	int ret;
 
@@ -5572,7 +5580,7 @@
 	 */
 	inode->i_ino = objectid;
 
-	if (dir) {
+	if (dir && name) {
 		trace_btrfs_inode_request(dir);
 
 		ret = btrfs_set_inode_index(dir, index);
@@ -5581,6 +5589,8 @@
 			iput(inode);
 			return ERR_PTR(ret);
 		}
+	} else if (dir) {
+		*index = 0;
 	}
 	/*
 	 * index_cnt is ignored for everything but a dir,
@@ -5605,21 +5615,24 @@
 	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
 	key[0].offset = 0;
 
-	/*
-	 * Start new inodes with an inode_ref. This is slightly more
-	 * efficient for small numbers of hard links since they will
-	 * be packed into one item. Extended refs will kick in if we
-	 * add more hard links than can fit in the ref item.
-	 */
-	key[1].objectid = objectid;
-	btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
-	key[1].offset = ref_objectid;
-
 	sizes[0] = sizeof(struct btrfs_inode_item);
-	sizes[1] = name_len + sizeof(*ref);
+
+	if (name) {
+		/*
+		 * Start new inodes with an inode_ref. This is slightly more
+		 * efficient for small numbers of hard links since they will
+		 * be packed into one item. Extended refs will kick in if we
+		 * add more hard links than can fit in the ref item.
+		 */
+		key[1].objectid = objectid;
+		btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
+		key[1].offset = ref_objectid;
+
+		sizes[1] = name_len + sizeof(*ref);
+	}
 
 	path->leave_spinning = 1;
-	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
+	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
 	if (ret != 0)
 		goto fail;
 
@@ -5632,12 +5645,14 @@
 			     sizeof(*inode_item));
 	fill_inode_item(trans, path->nodes[0], inode_item, inode);
 
-	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
-			     struct btrfs_inode_ref);
-	btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
-	btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
-	ptr = (unsigned long)(ref + 1);
-	write_extent_buffer(path->nodes[0], name, ptr, name_len);
+	if (name) {
+		ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
+				     struct btrfs_inode_ref);
+		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+		btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
+		ptr = (unsigned long)(ref + 1);
+		write_extent_buffer(path->nodes[0], name, ptr, name_len);
+	}
 
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
@@ -5673,7 +5688,7 @@
 
 	return inode;
 fail:
-	if (dir)
+	if (dir && name)
 		BTRFS_I(dir)->index_cnt--;
 	btrfs_free_path(path);
 	iput(inode);
@@ -5958,6 +5973,15 @@
 		err = btrfs_update_inode(trans, root, inode);
 		if (err)
 			goto fail;
+		if (inode->i_nlink == 1) {
+			/*
+			 * If new hard link count is 1, it's a file created
+			 * with open(2) O_TMPFILE flag.
+			 */
+			err = btrfs_orphan_del(trans, inode);
+			if (err)
+				goto fail;
+		}
 		d_instantiate(dentry, inode);
 		btrfs_log_new_name(trans, inode, NULL, parent);
 	}
@@ -6086,16 +6110,8 @@
 	max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
-	if (ret) {
-		char *kaddr = kmap_atomic(page);
-		unsigned long copy_size = min_t(u64,
-				  PAGE_CACHE_SIZE - pg_offset,
-				  max_size - extent_offset);
-		memset(kaddr + pg_offset, 0, copy_size);
-		kunmap_atomic(kaddr);
-	}
 	kfree(tmp);
-	return 0;
+	return ret;
 }
 
 /*
@@ -6113,7 +6129,6 @@
 {
 	int ret;
 	int err = 0;
-	u64 bytenr;
 	u64 extent_start = 0;
 	u64 extent_end = 0;
 	u64 objectid = btrfs_ino(inode);
@@ -6127,7 +6142,7 @@
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_trans_handle *trans = NULL;
-	int compress_type;
+	const bool new_inline = !page || create;
 
 again:
 	read_lock(&em_tree->lock);
@@ -6201,7 +6216,6 @@
 
 	found_type = btrfs_file_extent_type(leaf, item);
 	extent_start = found_key.offset;
-	compress_type = btrfs_file_extent_compression(leaf, item);
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 		extent_end = extent_start +
@@ -6236,32 +6250,10 @@
 		goto not_found_em;
 	}
 
-	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
+	btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
+
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
-		em->start = extent_start;
-		em->len = extent_end - extent_start;
-		em->orig_start = extent_start -
-				 btrfs_file_extent_offset(leaf, item);
-		em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
-								      item);
-		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
-		if (bytenr == 0) {
-			em->block_start = EXTENT_MAP_HOLE;
-			goto insert;
-		}
-		if (compress_type != BTRFS_COMPRESS_NONE) {
-			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
-			em->compress_type = compress_type;
-			em->block_start = bytenr;
-			em->block_len = em->orig_block_len;
-		} else {
-			bytenr += btrfs_file_extent_offset(leaf, item);
-			em->block_start = bytenr;
-			em->block_len = em->len;
-			if (found_type == BTRFS_FILE_EXTENT_PREALLOC)
-				set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
-		}
 		goto insert;
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		unsigned long ptr;
@@ -6270,12 +6262,8 @@
 		size_t extent_offset;
 		size_t copy_size;
 
-		em->block_start = EXTENT_MAP_INLINE;
-		if (!page || create) {
-			em->start = extent_start;
-			em->len = extent_end - extent_start;
+		if (new_inline)
 			goto out;
-		}
 
 		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
 		extent_offset = page_offset(page) + pg_offset - extent_start;
@@ -6285,10 +6273,6 @@
 		em->len = ALIGN(copy_size, root->sectorsize);
 		em->orig_block_len = em->len;
 		em->orig_start = em->start;
-		if (compress_type) {
-			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
-			em->compress_type = compress_type;
-		}
 		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
 		if (create == 0 && !PageUptodate(page)) {
 			if (btrfs_file_extent_compression(leaf, item) !=
@@ -6296,7 +6280,10 @@
 				ret = uncompress_inline(path, inode, page,
 							pg_offset,
 							extent_offset, item);
-				BUG_ON(ret); /* -ENOMEM */
+				if (ret) {
+					err = ret;
+					goto out;
+				}
 			} else {
 				map = kmap(page);
 				read_extent_buffer(leaf, map + pg_offset, ptr,
@@ -6332,8 +6319,6 @@
 		set_extent_uptodate(io_tree, em->start,
 				    extent_map_end(em) - 1, NULL, GFP_NOFS);
 		goto insert;
-	} else {
-		WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
 	}
 not_found:
 	em->start = start;
@@ -6717,6 +6702,76 @@
 	return ret;
 }
 
+bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
+{
+	struct radix_tree_root *root = &inode->i_mapping->page_tree;
+	int found = false;
+	void **pagep = NULL;
+	struct page *page = NULL;
+	int start_idx;
+	int end_idx;
+
+	start_idx = start >> PAGE_CACHE_SHIFT;
+
+	/*
+	 * end is the last byte in the last page.  end == start is legal
+	 */
+	end_idx = end >> PAGE_CACHE_SHIFT;
+
+	rcu_read_lock();
+
+	/* Most of the code in this while loop is lifted from
+	 * find_get_page.  It's been modified to begin searching from a
+	 * page and return just the first page found in that range.  If the
+	 * found idx is less than or equal to the end idx then we know that
+	 * a page exists.  If no pages are found or if those pages are
+	 * outside of the range then we're fine (yay!) */
+	while (page == NULL &&
+	       radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
+		page = radix_tree_deref_slot(pagep);
+		if (unlikely(!page))
+			break;
+
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				page = NULL;
+				continue;
+			}
+			/*
+			 * Otherwise, shmem/tmpfs must be storing a swap entry
+			 * here as an exceptional entry: so return it without
+			 * attempting to raise page count.
+			 */
+			page = NULL;
+			break; /* TODO: Is this relevant for this use case? */
+		}
+
+		if (!page_cache_get_speculative(page)) {
+			page = NULL;
+			continue;
+		}
+
+		/*
+		 * Has the page moved?
+		 * This is part of the lockless pagecache protocol. See
+		 * include/linux/pagemap.h for details.
+		 */
+		if (unlikely(page != *pagep)) {
+			page_cache_release(page);
+			page = NULL;
+		}
+	}
+
+	if (page) {
+		if (page->index <= end_idx)
+			found = true;
+		page_cache_release(page);
+	}
+
+	rcu_read_unlock();
+	return found;
+}
+
 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			      struct extent_state **cached_state, int writing)
 {
@@ -6741,10 +6796,9 @@
 		 * invalidate needs to happen so that reads after a write do not
 		 * get stale data.
 		 */
-		if (!ordered && (!writing ||
-		    !test_range_bit(&BTRFS_I(inode)->io_tree,
-				    lockstart, lockend, EXTENT_UPTODATE, 0,
-				    *cached_state)))
+		if (!ordered &&
+		    (!writing ||
+		     !btrfs_page_exists_in_range(inode, lockstart, lockend)))
 			break;
 
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
@@ -7391,39 +7445,30 @@
 }
 
 static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			const struct iov_iter *iter, loff_t offset)
 {
 	int seg;
 	int i;
-	size_t size;
-	unsigned long addr;
 	unsigned blocksize_mask = root->sectorsize - 1;
 	ssize_t retval = -EINVAL;
-	loff_t end = offset;
 
 	if (offset & blocksize_mask)
 		goto out;
 
-	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < nr_segs; seg++) {
-		addr = (unsigned long)iov[seg].iov_base;
-		size = iov[seg].iov_len;
-		end += size;
-		if ((addr & blocksize_mask) || (size & blocksize_mask))
-			goto out;
+	if (iov_iter_alignment(iter) & blocksize_mask)
+		goto out;
 
-		/* If this is a write we don't need to check anymore */
-		if (rw & WRITE)
-			continue;
-
-		/*
-		 * Check to make sure we don't have duplicate iov_base's in this
-		 * iovec, if so return EINVAL, otherwise we'll get csum errors
-		 * when reading back.
-		 */
-		for (i = seg + 1; i < nr_segs; i++) {
-			if (iov[seg].iov_base == iov[i].iov_base)
+	/* If this is a write we don't need to check anymore */
+	if (rw & WRITE)
+		return 0;
+	/*
+	 * Check to make sure we don't have duplicate iov_base's in this
+	 * iovec, if so return EINVAL, otherwise we'll get csum errors
+	 * when reading back.
+	 */
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		for (i = seg + 1; i < iter->nr_segs; i++) {
+			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
 				goto out;
 		}
 	}
@@ -7433,8 +7478,7 @@
 }
 
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -7444,8 +7488,7 @@
 	bool relock = false;
 	ssize_t ret;
 
-	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-			    offset, nr_segs))
+	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
 		return 0;
 
 	atomic_inc(&inode->i_dio_count);
@@ -7457,7 +7500,7 @@
 	 * we need to flush the dirty pages again to make absolutely sure
 	 * that any outstanding dirty pages are on disk.
 	 */
-	count = iov_length(iov, nr_segs);
+	count = iov_iter_count(iter);
 	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 		     &BTRFS_I(inode)->runtime_flags))
 		filemap_fdatawrite_range(inode->i_mapping, offset, count);
@@ -7484,7 +7527,7 @@
 
 	ret = __blockdev_direct_IO(rw, iocb, inode,
 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+			iter, offset, btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
 		if (ret < 0 && ret != -EIOCBQUEUED)
@@ -7992,7 +8035,7 @@
 	err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
 	if (err)
 		btrfs_err(new_root->fs_info,
-			  "error inheriting subvolume %llu properties: %d\n",
+			  "error inheriting subvolume %llu properties: %d",
 			  new_root->root_key.objectid, err);
 
 	err = btrfs_update_inode(trans, new_root, inode);
@@ -8311,7 +8354,7 @@
 	BTRFS_I(old_inode)->dir_index = 0ULL;
 	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
 		/* force full log commit if subvolume involved. */
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 	} else {
 		ret = btrfs_insert_inode_ref(trans, dest,
 					     new_dentry->d_name.name,
@@ -8889,6 +8932,66 @@
 	return generic_permission(inode, mask);
 }
 
+static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct inode *inode = NULL;
+	u64 objectid;
+	u64 index;
+	int ret = 0;
+
+	/*
+	 * 5 units required for adding orphan entry
+	 */
+	trans = btrfs_start_transaction(root, 5);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans);
+
+	ret = btrfs_find_free_ino(root, &objectid);
+	if (ret)
+		goto out;
+
+	inode = btrfs_new_inode(trans, root, dir, NULL, 0,
+				btrfs_ino(dir), objectid, mode, &index);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		inode = NULL;
+		goto out;
+	}
+
+	ret = btrfs_init_inode_security(trans, inode, dir, NULL);
+	if (ret)
+		goto out;
+
+	ret = btrfs_update_inode(trans, root, inode);
+	if (ret)
+		goto out;
+
+	inode->i_fop = &btrfs_file_operations;
+	inode->i_op = &btrfs_file_inode_operations;
+
+	inode->i_mapping->a_ops = &btrfs_aops;
+	inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+	ret = btrfs_orphan_add(trans, inode);
+	if (ret)
+		goto out;
+
+	d_tmpfile(dentry, inode);
+	mark_inode_dirty(inode);
+
+out:
+	btrfs_end_transaction(trans, root);
+	if (ret)
+		iput(inode);
+	btrfs_balance_delayed_items(root);
+	btrfs_btree_balance_dirty(root);
+
+	return ret;
+}
+
 static const struct inode_operations btrfs_dir_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.lookup		= btrfs_lookup,
@@ -8909,6 +9012,7 @@
 	.get_acl	= btrfs_get_acl,
 	.set_acl	= btrfs_set_acl,
 	.update_time	= btrfs_update_time,
+	.tmpfile        = btrfs_tmpfile,
 };
 static const struct inode_operations btrfs_dir_ro_inode_operations = {
 	.lookup		= btrfs_lookup,

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3f52bb7..0d321c2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c

@@ -58,6 +58,7 @@
 #include "dev-replace.h"
 #include "props.h"
 #include "sysfs.h"
+#include "qgroup.h"
 
 #ifdef CONFIG_64BIT
 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -638,7 +639,7 @@
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	if (!root->ref_cows)
+	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return -EINVAL;
 
 	atomic_inc(&root->will_be_snapshoted);
@@ -711,6 +712,35 @@
 	if (ret)
 		goto fail;
 
+	/*
+	 * If orphan cleanup did remove any orphans, it means the tree was
+	 * modified and therefore the commit root is not the same as the
+	 * current root anymore. This is a problem, because send uses the
+	 * commit root and therefore can see inode items that don't exist
+	 * in the current root anymore, and for example make calls to
+	 * btrfs_iget, which will do tree lookups based on the current root
+	 * and not on the commit root. Those lookups will fail, returning a
+	 * -ESTALE error, and making send fail with that error. So make sure
+	 * a send does not see any orphans we have just removed, and that it
+	 * will see the same inodes regardless of whether a transaction
+	 * commit happened before it started (meaning that the commit root
+	 * will be the same as the current root) or not.
+	 */
+	if (readonly && pending_snapshot->snap->node !=
+	    pending_snapshot->snap->commit_root) {
+		trans = btrfs_join_transaction(pending_snapshot->snap);
+		if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
+			ret = PTR_ERR(trans);
+			goto fail;
+		}
+		if (!IS_ERR(trans)) {
+			ret = btrfs_commit_transaction(trans,
+						       pending_snapshot->snap);
+			if (ret)
+				goto fail;
+		}
+	}
+
 	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
@@ -1502,11 +1532,12 @@
 	sizestr = vol_args->name;
 	devstr = strchr(sizestr, ':');
 	if (devstr) {
-		char *end;
 		sizestr = devstr + 1;
 		*devstr = '\0';
 		devstr = vol_args->name;
-		devid = simple_strtoull(devstr, &end, 10);
+		ret = kstrtoull(devstr, 10, &devid);
+		if (ret)
+			goto out_free;
 		if (!devid) {
 			ret = -EINVAL;
 			goto out_free;
@@ -1562,7 +1593,7 @@
 		new_size = old_size - new_size;
 	} else if (mod > 0) {
 		if (new_size > ULLONG_MAX - old_size) {
-			ret = -EINVAL;
+			ret = -ERANGE;
 			goto out_free;
 		}
 		new_size = old_size + new_size;
@@ -1926,7 +1957,8 @@
 			       struct btrfs_path *path,
 			       struct btrfs_key *key,
 			       struct btrfs_ioctl_search_key *sk,
-			       char *buf,
+			       size_t *buf_size,
+			       char __user *ubuf,
 			       unsigned long *sk_offset,
 			       int *num_found)
 {
@@ -1958,13 +1990,25 @@
 		if (!key_in_sk(key, sk))
 			continue;
 
-		if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
-			item_len = 0;
+		if (sizeof(sh) + item_len > *buf_size) {
+			if (*num_found) {
+				ret = 1;
+				goto out;
+			}
 
-		if (sizeof(sh) + item_len + *sk_offset >
-		    BTRFS_SEARCH_ARGS_BUFSIZE) {
+			/*
+			 * return one empty item back for v1, which does not
+			 * handle -EOVERFLOW
+			 */
+
+			*buf_size = sizeof(sh) + item_len;
+			item_len = 0;
+			ret = -EOVERFLOW;
+		}
+
+		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
 			ret = 1;
-			goto overflow;
+			goto out;
 		}
 
 		sh.objectid = key->objectid;
@@ -1974,20 +2018,33 @@
 		sh.transid = found_transid;
 
 		/* copy search result header */
-		memcpy(buf + *sk_offset, &sh, sizeof(sh));
+		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
 		*sk_offset += sizeof(sh);
 
 		if (item_len) {
-			char *p = buf + *sk_offset;
+			char __user *up = ubuf + *sk_offset;
 			/* copy the item */
-			read_extent_buffer(leaf, p,
-					   item_off, item_len);
+			if (read_extent_buffer_to_user(leaf, up,
+						       item_off, item_len)) {
+				ret = -EFAULT;
+				goto out;
+			}
+
 			*sk_offset += item_len;
 		}
 		(*num_found)++;
 
-		if (*num_found >= sk->nr_items)
-			break;
+		if (ret) /* -EOVERFLOW from above */
+			goto out;
+
+		if (*num_found >= sk->nr_items) {
+			ret = 1;
+			goto out;
+		}
 	}
 advance_key:
 	ret = 0;
@@ -2002,22 +2059,37 @@
 		key->objectid++;
 	} else
 		ret = 1;
-overflow:
+out:
+	/*
+	 *  0: all items from this leaf copied, continue with next
+	 *  1: * more items can be copied, but unused buffer is too small
+	 *     * all items were found
+	 *     Either way, it will stops the loop which iterates to the next
+	 *     leaf
+	 *  -EOVERFLOW: item was to large for buffer
+	 *  -EFAULT: could not copy extent buffer back to userspace
+	 */
 	return ret;
 }
 
 static noinline int search_ioctl(struct inode *inode,
-				 struct btrfs_ioctl_search_args *args)
+				 struct btrfs_ioctl_search_key *sk,
+				 size_t *buf_size,
+				 char __user *ubuf)
 {
 	struct btrfs_root *root;
 	struct btrfs_key key;
 	struct btrfs_path *path;
-	struct btrfs_ioctl_search_key *sk = &args->key;
 	struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
 	int ret;
 	int num_found = 0;
 	unsigned long sk_offset = 0;
 
+	if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
+		*buf_size = sizeof(struct btrfs_ioctl_search_header);
+		return -EOVERFLOW;
+	}
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -2051,14 +2123,15 @@
 				ret = 0;
 			goto err;
 		}
-		ret = copy_to_sk(root, path, &key, sk, args->buf,
+		ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
 				 &sk_offset, &num_found);
 		btrfs_release_path(path);
-		if (ret || num_found >= sk->nr_items)
+		if (ret)
 			break;
 
 	}
-	ret = 0;
+	if (ret > 0)
+		ret = 0;
 err:
 	sk->nr_items = num_found;
 	btrfs_free_path(path);
@@ -2068,22 +2141,73 @@
 static noinline int btrfs_ioctl_tree_search(struct file *file,
 					   void __user *argp)
 {
-	 struct btrfs_ioctl_search_args *args;
-	 struct inode *inode;
-	 int ret;
+	struct btrfs_ioctl_search_args __user *uargs;
+	struct btrfs_ioctl_search_key sk;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	args = memdup_user(argp, sizeof(*args));
-	if (IS_ERR(args))
-		return PTR_ERR(args);
+	uargs = (struct btrfs_ioctl_search_args __user *)argp;
+
+	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
+		return -EFAULT;
+
+	buf_size = sizeof(uargs->buf);
 
 	inode = file_inode(file);
-	ret = search_ioctl(inode, args);
-	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
+
+	/*
+	 * In the origin implementation an overflow is handled by returning a
+	 * search header with a len of zero, so reset ret.
+	 */
+	if (ret == -EOVERFLOW)
+		ret = 0;
+
+	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
 		ret = -EFAULT;
-	kfree(args);
+	return ret;
+}
+
+static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+					       void __user *argp)
+{
+	struct btrfs_ioctl_search_args_v2 __user *uarg;
+	struct btrfs_ioctl_search_args_v2 args;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
+	const size_t buf_limit = 16 * 1024 * 1024;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* copy search header and buffer size */
+	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	buf_size = args.buf_size;
+
+	if (buf_size < sizeof(struct btrfs_ioctl_search_header))
+		return -EOVERFLOW;
+
+	/* limit result size to 16MB */
+	if (buf_size > buf_limit)
+		buf_size = buf_limit;
+
+	inode = file_inode(file);
+	ret = search_ioctl(inode, &args.key, &buf_size,
+			   (char *)(&uarg->buf[0]));
+	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
+		ret = -EFAULT;
+	else if (ret == -EOVERFLOW &&
+		copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
+		ret = -EFAULT;
+
 	return ret;
 }
 
@@ -2219,6 +2343,7 @@
 	struct btrfs_ioctl_vol_args *vol_args;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_block_rsv block_rsv;
+	u64 root_flags;
 	u64 qgroup_reserved;
 	int namelen;
 	int ret;
@@ -2240,6 +2365,7 @@
 	if (err)
 		goto out;
 
+
 	err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
 	if (err == -EINTR)
 		goto out_drop_write;
@@ -2301,6 +2427,27 @@
 	}
 
 	mutex_lock(&inode->i_mutex);
+
+	/*
+	 * Don't allow to delete a subvolume with send in progress. This is
+	 * inside the i_mutex so the error handling that has to drop the bit
+	 * again is not run concurrently.
+	 */
+	spin_lock(&dest->root_item_lock);
+	root_flags = btrfs_root_flags(&dest->root_item);
+	if (dest->send_in_progress == 0) {
+		btrfs_set_root_flags(&dest->root_item,
+				root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+		spin_unlock(&dest->root_item_lock);
+	} else {
+		spin_unlock(&dest->root_item_lock);
+		btrfs_warn(root->fs_info,
+			"Attempt to delete subvolume %llu during send",
+			dest->root_key.objectid);
+		err = -EPERM;
+		goto out_dput;
+	}
+
 	err = d_invalidate(dentry);
 	if (err)
 		goto out_unlock;
@@ -2346,7 +2493,7 @@
 	dest->root_item.drop_level = 0;
 	btrfs_set_root_refs(&dest->root_item, 0);
 
-	if (!xchg(&dest->orphan_item_inserted, 1)) {
+	if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
 		ret = btrfs_insert_orphan_item(trans,
 					root->fs_info->tree_root,
 					dest->root_key.objectid);
@@ -2389,11 +2536,19 @@
 out_up_write:
 	up_write(&root->fs_info->subvol_sem);
 out_unlock:
+	if (err) {
+		spin_lock(&dest->root_item_lock);
+		root_flags = btrfs_root_flags(&dest->root_item);
+		btrfs_set_root_flags(&dest->root_item,
+				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+		spin_unlock(&dest->root_item_lock);
+	}
 	mutex_unlock(&inode->i_mutex);
 	if (!err) {
 		shrink_dcache_sb(root->fs_info->sb);
 		btrfs_invalidate_inodes(dest);
 		d_delete(dentry);
+		ASSERT(dest->send_in_progress == 0);
 
 		/* the last ref */
 		if (dest->cache_inode) {
@@ -2557,9 +2712,6 @@
 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 	int ret = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
 	if (!fi_args)
 		return -ENOMEM;
@@ -2574,6 +2726,10 @@
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
+	fi_args->nodesize = root->fs_info->super_copy->nodesize;
+	fi_args->sectorsize = root->fs_info->super_copy->sectorsize;
+	fi_args->clone_alignment = root->fs_info->super_copy->sectorsize;
+
 	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
 		ret = -EFAULT;
 
@@ -2589,9 +2745,6 @@
 	int ret = 0;
 	char *s_uuid = NULL;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	di_args = memdup_user(arg, sizeof(*di_args));
 	if (IS_ERR(di_args))
 		return PTR_ERR(di_args);
@@ -2669,10 +2822,15 @@
 		lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
 		ordered = btrfs_lookup_first_ordered_extent(inode,
 							    off + len - 1);
-		if (!ordered &&
+		if ((!ordered ||
+		     ordered->file_offset + ordered->len <= off ||
+		     ordered->file_offset >= off + len) &&
 		    !test_range_bit(&BTRFS_I(inode)->io_tree, off,
-				    off + len - 1, EXTENT_DELALLOC, 0, NULL))
+				    off + len - 1, EXTENT_DELALLOC, 0, NULL)) {
+			if (ordered)
+				btrfs_put_ordered_extent(ordered);
 			break;
+		}
 		unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
@@ -2912,6 +3070,126 @@
 	return ret;
 }
 
+/* Helper to check and see if this root currently has a ref on the given disk
+ * bytenr.  If it does then we need to update the quota for this root.  This
+ * doesn't do anything if quotas aren't enabled.
+ */
+static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		     u64 disko)
+{
+	struct seq_list tree_mod_seq_elem = {};
+	struct ulist *roots;
+	struct ulist_iterator uiter;
+	struct ulist_node *root_node = NULL;
+	int ret;
+
+	if (!root->fs_info->quota_enabled)
+		return 1;
+
+	btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
+	ret = btrfs_find_all_roots(trans, root->fs_info, disko,
+				   tree_mod_seq_elem.seq, &roots);
+	if (ret < 0)
+		goto out;
+	ret = 0;
+	ULIST_ITER_INIT(&uiter);
+	while ((root_node = ulist_next(roots, &uiter))) {
+		if (root_node->val == root->objectid) {
+			ret = 1;
+			break;
+		}
+	}
+	ulist_free(roots);
+out:
+	btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
+	return ret;
+}
+
+static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
+				     struct inode *inode,
+				     u64 endoff,
+				     const u64 destoff,
+				     const u64 olen)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	inode_inc_iversion(inode);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	/*
+	 * We round up to the block size at eof when determining which
+	 * extents to clone above, but shouldn't round up the file size.
+	 */
+	if (endoff > destoff + olen)
+		endoff = destoff + olen;
+	if (endoff > inode->i_size)
+		btrfs_i_size_write(inode, endoff);
+
+	ret = btrfs_update_inode(trans, root, inode);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		btrfs_end_transaction(trans, root);
+		goto out;
+	}
+	ret = btrfs_end_transaction(trans, root);
+out:
+	return ret;
+}
+
+static void clone_update_extent_map(struct inode *inode,
+				    const struct btrfs_trans_handle *trans,
+				    const struct btrfs_path *path,
+				    struct btrfs_file_extent_item *fi,
+				    const u64 hole_offset,
+				    const u64 hole_len)
+{
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *em;
+	int ret;
+
+	em = alloc_extent_map();
+	if (!em) {
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+			&BTRFS_I(inode)->runtime_flags);
+		return;
+	}
+
+	if (fi) {
+		btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
+		em->generation = -1;
+		if (btrfs_file_extent_type(path->nodes[0], fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+				&BTRFS_I(inode)->runtime_flags);
+	} else {
+		em->start = hole_offset;
+		em->len = hole_len;
+		em->ram_bytes = em->len;
+		em->orig_start = hole_offset;
+		em->block_start = EXTENT_MAP_HOLE;
+		em->block_len = 0;
+		em->orig_block_len = 0;
+		em->compress_type = BTRFS_COMPRESS_NONE;
+		em->generation = trans->transid;
+	}
+
+	while (1) {
+		write_lock(&em_tree->lock);
+		ret = add_extent_mapping(em_tree, em, 1);
+		write_unlock(&em_tree->lock);
+		if (ret != -EEXIST) {
+			free_extent_map(em);
+			break;
+		}
+		btrfs_drop_extent_cache(inode, em->start,
+					em->start + em->len - 1, 0);
+	}
+
+	if (unlikely(ret))
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+			&BTRFS_I(inode)->runtime_flags);
+}
+
 /**
  * btrfs_clone() - clone a range from inode file to another
  *
@@ -2924,7 +3202,8 @@
  * @destoff: Offset within @inode to start clone
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
-		       u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+		       const u64 off, const u64 olen, const u64 olen_aligned,
+		       const u64 destoff)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path = NULL;
@@ -2935,7 +3214,10 @@
 	u32 nritems;
 	int slot;
 	int ret;
-	u64 len = olen_aligned;
+	int no_quota;
+	const u64 len = olen_aligned;
+	u64 last_disko = 0;
+	u64 last_dest_end = destoff;
 
 	ret = -ENOMEM;
 	buf = vmalloc(btrfs_level_size(root, 0));
@@ -2952,7 +3234,7 @@
 	/* clone data */
 	key.objectid = btrfs_ino(src);
 	key.type = BTRFS_EXTENT_DATA_KEY;
-	key.offset = 0;
+	key.offset = off;
 
 	while (1) {
 		/*
@@ -2964,9 +3246,21 @@
 				0, 0);
 		if (ret < 0)
 			goto out;
+		/*
+		 * First search, if no extent item that starts at offset off was
+		 * found but the previous item is an extent item, it's possible
+		 * it might overlap our target range, therefore process it.
+		 */
+		if (key.offset == off && ret > 0 && path->slots[0] > 0) {
+			btrfs_item_key_to_cpu(path->nodes[0], &key,
+					      path->slots[0] - 1);
+			if (key.type == BTRFS_EXTENT_DATA_KEY)
+				path->slots[0]--;
+		}
 
 		nritems = btrfs_header_nritems(path->nodes[0]);
 process_slot:
+		no_quota = 1;
 		if (path->slots[0] >= nritems) {
 			ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
 			if (ret < 0)
@@ -2991,7 +3285,7 @@
 			u64 disko = 0, diskl = 0;
 			u64 datao = 0, datal = 0;
 			u8 comp;
-			u64 endoff;
+			u64 drop_start;
 
 			extent = btrfs_item_ptr(leaf, slot,
 						struct btrfs_file_extent_item);
@@ -3012,10 +3306,16 @@
 								    extent);
 			}
 
-			if (key.offset + datal <= off ||
-			    key.offset >= off + len - 1) {
+			/*
+			 * The first search might have left us at an extent
+			 * item that ends before our target range's start, can
+			 * happen if we have holes and NO_HOLES feature enabled.
+			 */
+			if (key.offset + datal <= off) {
 				path->slots[0]++;
 				goto process_slot;
+			} else if (key.offset >= off + len) {
+				break;
 			}
 
 			size = btrfs_item_size_nr(leaf, slot);
@@ -3034,6 +3334,18 @@
 				new_key.offset = destoff;
 
 			/*
+			 * Deal with a hole that doesn't have an extent item
+			 * that represents it (NO_HOLES feature enabled).
+			 * This hole is either in the middle of the cloning
+			 * range or at the beginning (fully overlaps it or
+			 * partially overlaps it).
+			 */
+			if (new_key.offset != last_dest_end)
+				drop_start = last_dest_end;
+			else
+				drop_start = new_key.offset;
+
+			/*
 			 * 1 - adjusting old extent (we may have to split it)
 			 * 1 - add new extent
 			 * 1 - inode update
@@ -3051,18 +3363,18 @@
 				 * | ------------- extent ------------- |
 				 */
 
-				/* substract range b */
+				/* subtract range b */
 				if (key.offset + datal > off + len)
 					datal = off + len - key.offset;
 
-				/* substract range a */
+				/* subtract range a */
 				if (off > key.offset) {
 					datao += off - key.offset;
 					datal -= off - key.offset;
 				}
 
 				ret = btrfs_drop_extents(trans, root, inode,
-							 new_key.offset,
+							 drop_start,
 							 new_key.offset + datal,
 							 1);
 				if (ret) {
@@ -3099,6 +3411,28 @@
 							     datao);
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								datal);
+
+				/*
+				 * We need to look up the roots that point at
+				 * this bytenr and see if the new root does.  If
+				 * it does not we need to make sure we update
+				 * quotas appropriately.
+				 */
+				if (disko && root != BTRFS_I(src)->root &&
+				    disko != last_disko) {
+					no_quota = check_ref(trans, root,
+							     disko);
+					if (no_quota < 0) {
+						btrfs_abort_transaction(trans,
+									root,
+									ret);
+						btrfs_end_transaction(trans,
+								      root);
+						ret = no_quota;
+						goto out;
+					}
+				}
+
 				if (disko) {
 					inode_add_bytes(inode, datal);
 					ret = btrfs_inc_extent_ref(trans, root,
@@ -3106,7 +3440,7 @@
 							root->root_key.objectid,
 							btrfs_ino(inode),
 							new_key.offset - datao,
-							0);
+							no_quota);
 					if (ret) {
 						btrfs_abort_transaction(trans,
 									root,
@@ -3141,7 +3475,7 @@
 				aligned_end = ALIGN(new_key.offset + datal,
 						    root->sectorsize);
 				ret = btrfs_drop_extents(trans, root, inode,
-							 new_key.offset,
+							 drop_start,
 							 aligned_end,
 							 1);
 				if (ret) {
@@ -3174,40 +3508,69 @@
 					    btrfs_item_ptr_offset(leaf, slot),
 					    size);
 				inode_add_bytes(inode, datal);
+				extent = btrfs_item_ptr(leaf, slot,
+						struct btrfs_file_extent_item);
 			}
 
+			/* If we have an implicit hole (NO_HOLES feature). */
+			if (drop_start < new_key.offset)
+				clone_update_extent_map(inode, trans,
+						path, NULL, drop_start,
+						new_key.offset - drop_start);
+
+			clone_update_extent_map(inode, trans, path,
+						extent, 0, 0);
+
 			btrfs_mark_buffer_dirty(leaf);
 			btrfs_release_path(path);
 
-			inode_inc_iversion(inode);
-			inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-
-			/*
-			 * we round up to the block size at eof when
-			 * determining which extents to clone above,
-			 * but shouldn't round up the file size
-			 */
-			endoff = new_key.offset + datal;
-			if (endoff > destoff+olen)
-				endoff = destoff+olen;
-			if (endoff > inode->i_size)
-				btrfs_i_size_write(inode, endoff);
-
-			ret = btrfs_update_inode(trans, root, inode);
-			if (ret) {
-				btrfs_abort_transaction(trans, root, ret);
-				btrfs_end_transaction(trans, root);
+			last_dest_end = new_key.offset + datal;
+			ret = clone_finish_inode_update(trans, inode,
+							last_dest_end,
+							destoff, olen);
+			if (ret)
 				goto out;
-			}
-			ret = btrfs_end_transaction(trans, root);
+			if (new_key.offset + datal >= destoff + len)
+				break;
 		}
 		btrfs_release_path(path);
 		key.offset++;
 	}
 	ret = 0;
 
+	if (last_dest_end < destoff + len) {
+		/*
+		 * We have an implicit hole (NO_HOLES feature is enabled) that
+		 * fully or partially overlaps our cloning range at its end.
+		 */
+		btrfs_release_path(path);
+
+		/*
+		 * 1 - remove extent(s)
+		 * 1 - inode update
+		 */
+		trans = btrfs_start_transaction(root, 2);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			goto out;
+		}
+		ret = btrfs_drop_extents(trans, root, inode,
+					 last_dest_end, destoff + len, 1);
+		if (ret) {
+			if (ret != -EOPNOTSUPP)
+				btrfs_abort_transaction(trans, root, ret);
+			btrfs_end_transaction(trans, root);
+			goto out;
+		}
+		ret = clone_finish_inode_update(trans, inode, destoff + len,
+						destoff, olen);
+		if (ret)
+			goto out;
+		clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
+					destoff + len - last_dest_end);
+	}
+
 out:
-	btrfs_release_path(path);
 	btrfs_free_path(path);
 	vfree(buf);
 	return ret;
@@ -3319,15 +3682,41 @@
 			goto out_unlock;
 	}
 
-	/* truncate page cache pages from target inode range */
-	truncate_inode_pages_range(&inode->i_data, destoff,
-				   PAGE_CACHE_ALIGN(destoff + len) - 1);
+	/*
+	 * Lock the target range too. Right after we replace the file extent
+	 * items in the fs tree (which now point to the cloned data), we might
+	 * have a worker replace them with extent items relative to a write
+	 * operation that was issued before this clone operation (i.e. confront
+	 * with inode.c:btrfs_finish_ordered_io).
+	 */
+	if (same_inode) {
+		u64 lock_start = min_t(u64, off, destoff);
+		u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
 
-	lock_extent_range(src, off, len);
+		lock_extent_range(src, lock_start, lock_len);
+	} else {
+		lock_extent_range(src, off, len);
+		lock_extent_range(inode, destoff, len);
+	}
 
 	ret = btrfs_clone(src, inode, off, olen, len, destoff);
 
-	unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
+	if (same_inode) {
+		u64 lock_start = min_t(u64, off, destoff);
+		u64 lock_end = max_t(u64, off, destoff) + len - 1;
+
+		unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end);
+	} else {
+		unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
+		unlock_extent(&BTRFS_I(inode)->io_tree, destoff,
+			      destoff + len - 1);
+	}
+	/*
+	 * Truncate page cache pages so that future reads will see the cloned
+	 * data immediately and not the previous data.
+	 */
+	truncate_inode_pages_range(&inode->i_data, destoff,
+				   PAGE_CACHE_ALIGN(destoff + len) - 1);
 out_unlock:
 	if (!same_inode) {
 		if (inode < src) {
@@ -4902,6 +5291,8 @@
 		return btrfs_ioctl_trans_end(file);
 	case BTRFS_IOC_TREE_SEARCH:
 		return btrfs_ioctl_tree_search(file, argp);
+	case BTRFS_IOC_TREE_SEARCH_V2:
+		return btrfs_ioctl_tree_search_v2(file, argp);
 	case BTRFS_IOC_INO_LOOKUP:
 		return btrfs_ioctl_ino_lookup(file, argp);
 	case BTRFS_IOC_INO_PATHS:

diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index b47f669..dfad851 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c

@@ -143,7 +143,7 @@
 		if (ret != LZO_E_OK) {
 			printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
 			       ret);
-			ret = -1;
+			ret = -EIO;
 			goto out;
 		}
 
@@ -189,7 +189,7 @@
 				kunmap(out_page);
 				if (nr_pages == nr_dest_pages) {
 					out_page = NULL;
-					ret = -1;
+					ret = -E2BIG;
 					goto out;
 				}
 
@@ -208,7 +208,7 @@
 
 		/* we're making it bigger, give up */
 		if (tot_in > 8192 && tot_in < tot_out) {
-			ret = -1;
+			ret = -E2BIG;
 			goto out;
 		}
 
@@ -335,7 +335,7 @@
 					break;
 
 				if (page_in_index + 1 >= total_pages_in) {
-					ret = -1;
+					ret = -EIO;
 					goto done;
 				}
 
@@ -358,7 +358,7 @@
 			kunmap(pages_in[page_in_index - 1]);
 		if (ret != LZO_E_OK) {
 			printk(KERN_WARNING "BTRFS: decompress failed\n");
-			ret = -1;
+			ret = -EIO;
 			break;
 		}
 
@@ -402,12 +402,12 @@
 	ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
 	if (ret != LZO_E_OK) {
 		printk(KERN_WARNING "BTRFS: decompress failed!\n");
-		ret = -1;
+		ret = -EIO;
 		goto out;
 	}
 
 	if (out_len < start_byte) {
-		ret = -1;
+		ret = -EIO;
 		goto out;
 	}
 

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a94b05f..e12441c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c

@@ -67,7 +67,7 @@
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
-		    "%llu\n", offset);
+		    "%llu", offset);
 }
 
 /*

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2cf9058..98cb6b2 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c

@@ -32,6 +32,7 @@
 #include "ulist.h"
 #include "backref.h"
 #include "extent_io.h"
+#include "qgroup.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
@@ -84,8 +85,8 @@
 	/*
 	 * temp variables for accounting operations
 	 */
-	u64 tag;
-	u64 refcnt;
+	u64 old_refcnt;
+	u64 new_refcnt;
 };
 
 /*
@@ -98,6 +99,9 @@
 	struct btrfs_qgroup *member;
 };
 
+#define ptr_to_u64(x) ((u64)(uintptr_t)x)
+#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
+
 static int
 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
 		   int init_flags);
@@ -242,6 +246,21 @@
 	return -ENOENT;
 }
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
+			       u64 rfer, u64 excl)
+{
+	struct btrfs_qgroup *qgroup;
+
+	qgroup = find_qgroup_rb(fs_info, qgroupid);
+	if (!qgroup)
+		return -EINVAL;
+	if (qgroup->rfer != rfer || qgroup->excl != excl)
+		return -EINVAL;
+	return 0;
+}
+#endif
+
 /*
  * The full config is read in one go, only called from open_ctree()
  * It doesn't use any locking, as at this point we're still single-threaded
@@ -520,6 +539,10 @@
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &quota_root->state)))
+		return 0;
+#endif
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -669,6 +692,10 @@
 	int ret;
 	int slot;
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+	if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+		return 0;
+#endif
 	key.objectid = 0;
 	key.type = BTRFS_QGROUP_INFO_KEY;
 	key.offset = qgroup->qgroupid;
@@ -1174,33 +1201,198 @@
 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
+static int comp_oper(struct btrfs_qgroup_operation *oper1,
+		     struct btrfs_qgroup_operation *oper2)
+{
+	if (oper1->bytenr < oper2->bytenr)
+		return -1;
+	if (oper1->bytenr > oper2->bytenr)
+		return 1;
+	if (oper1->seq < oper2->seq)
+		return -1;
+	if (oper1->seq > oper2->seq)
+		return -1;
+	if (oper1->ref_root < oper2->ref_root)
+		return -1;
+	if (oper1->ref_root > oper2->ref_root)
+		return 1;
+	if (oper1->type < oper2->type)
+		return -1;
+	if (oper1->type > oper2->type)
+		return 1;
+	return 0;
+}
+
+static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
+			      struct btrfs_qgroup_operation *oper)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct btrfs_qgroup_operation *cur;
+	int cmp;
+
+	spin_lock(&fs_info->qgroup_op_lock);
+	p = &fs_info->qgroup_op_tree.rb_node;
+	while (*p) {
+		parent = *p;
+		cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
+		cmp = comp_oper(cur, oper);
+		if (cmp < 0) {
+			p = &(*p)->rb_right;
+		} else if (cmp) {
+			p = &(*p)->rb_left;
+		} else {
+			spin_unlock(&fs_info->qgroup_op_lock);
+			return -EEXIST;
+		}
+	}
+	rb_link_node(&oper->n, parent, p);
+	rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
+	spin_unlock(&fs_info->qgroup_op_lock);
+	return 0;
+}
 
 /*
- * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
- * the modification into a list that's later used by btrfs_end_transaction to
- * pass the recorded modifications on to btrfs_qgroup_account_ref.
+ * Record a quota operation for processing later on.
+ * @trans: the transaction we are adding the delayed op to.
+ * @fs_info: the fs_info for this fs.
+ * @ref_root: the root of the reference we are acting on,
+ * @bytenr: the bytenr we are acting on.
+ * @num_bytes: the number of bytes in the reference.
+ * @type: the type of operation this is.
+ * @mod_seq: do we need to get a sequence number for looking up roots.
+ *
+ * We just add it to our trans qgroup_ref_list and carry on and process these
+ * operations in order at some later point.  If the reference root isn't a fs
+ * root then we don't bother with doing anything.
+ *
+ * MUST BE HOLDING THE REF LOCK.
  */
 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_delayed_ref_node *node,
-			    struct btrfs_delayed_extent_op *extent_op)
+			    struct btrfs_fs_info *fs_info, u64 ref_root,
+			    u64 bytenr, u64 num_bytes,
+			    enum btrfs_qgroup_operation_type type, int mod_seq)
 {
-	struct qgroup_update *u;
+	struct btrfs_qgroup_operation *oper;
+	int ret;
 
-	BUG_ON(!trans->delayed_ref_elem.seq);
-	u = kmalloc(sizeof(*u), GFP_NOFS);
-	if (!u)
+	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+		return 0;
+
+	oper = kmalloc(sizeof(*oper), GFP_NOFS);
+	if (!oper)
 		return -ENOMEM;
 
-	u->node = node;
-	u->extent_op = extent_op;
-	list_add_tail(&u->list, &trans->qgroup_ref_list);
+	oper->ref_root = ref_root;
+	oper->bytenr = bytenr;
+	oper->num_bytes = num_bytes;
+	oper->type = type;
+	oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
+	INIT_LIST_HEAD(&oper->elem.list);
+	oper->elem.seq = 0;
+	ret = insert_qgroup_oper(fs_info, oper);
+	if (ret) {
+		/* Shouldn't happen so have an assert for developers */
+		ASSERT(0);
+		kfree(oper);
+		return ret;
+	}
+	list_add_tail(&oper->list, &trans->qgroup_ref_list);
+
+	if (mod_seq)
+		btrfs_get_tree_mod_seq(fs_info, &oper->elem);
 
 	return 0;
 }
 
-static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
-				    struct ulist *roots, struct ulist *tmp,
-				    u64 seq)
+/*
+ * The easy accounting, if we are adding/removing the only ref for an extent
+ * then this qgroup and all of the parent qgroups get their refrence and
+ * exclusive counts adjusted.
+ */
+static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
+				  struct btrfs_qgroup_operation *oper)
+{
+	struct btrfs_qgroup *qgroup;
+	struct ulist *tmp;
+	struct btrfs_qgroup_list *glist;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	int sign = 0;
+	int ret = 0;
+
+	tmp = ulist_alloc(GFP_NOFS);
+	if (!tmp)
+		return -ENOMEM;
+
+	spin_lock(&fs_info->qgroup_lock);
+	if (!fs_info->quota_root)
+		goto out;
+	qgroup = find_qgroup_rb(fs_info, oper->ref_root);
+	if (!qgroup)
+		goto out;
+	switch (oper->type) {
+	case BTRFS_QGROUP_OPER_ADD_EXCL:
+		sign = 1;
+		break;
+	case BTRFS_QGROUP_OPER_SUB_EXCL:
+		sign = -1;
+		break;
+	default:
+		ASSERT(0);
+	}
+	qgroup->rfer += sign * oper->num_bytes;
+	qgroup->rfer_cmpr += sign * oper->num_bytes;
+
+	WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
+	qgroup->excl += sign * oper->num_bytes;
+	qgroup->excl_cmpr += sign * oper->num_bytes;
+
+	qgroup_dirty(fs_info, qgroup);
+
+	/* Get all of the parent groups that contain this qgroup */
+	list_for_each_entry(glist, &qgroup->groups, next_group) {
+		ret = ulist_add(tmp, glist->group->qgroupid,
+				ptr_to_u64(glist->group), GFP_ATOMIC);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* Iterate all of the parents and adjust their reference counts */
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(tmp, &uiter))) {
+		qgroup = u64_to_ptr(unode->aux);
+		qgroup->rfer += sign * oper->num_bytes;
+		qgroup->rfer_cmpr += sign * oper->num_bytes;
+		qgroup->excl += sign * oper->num_bytes;
+		if (sign < 0)
+			WARN_ON(qgroup->excl < oper->num_bytes);
+		qgroup->excl_cmpr += sign * oper->num_bytes;
+		qgroup_dirty(fs_info, qgroup);
+
+		/* Add any parents of the parents */
+		list_for_each_entry(glist, &qgroup->groups, next_group) {
+			ret = ulist_add(tmp, glist->group->qgroupid,
+					ptr_to_u64(glist->group), GFP_ATOMIC);
+			if (ret < 0)
+				goto out;
+		}
+	}
+	ret = 0;
+out:
+	spin_unlock(&fs_info->qgroup_lock);
+	ulist_free(tmp);
+	return ret;
+}
+
+/*
+ * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
+ * properly.
+ */
+static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
+				  u64 root_to_skip, struct ulist *tmp,
+				  struct ulist *roots, struct ulist *qgroups,
+				  u64 seq, int *old_roots, int rescan)
 {
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
@@ -1211,129 +1403,484 @@
 
 	ULIST_ITER_INIT(&uiter);
 	while ((unode = ulist_next(roots, &uiter))) {
+		/* We don't count our current root here */
+		if (unode->val == root_to_skip)
+			continue;
 		qg = find_qgroup_rb(fs_info, unode->val);
 		if (!qg)
 			continue;
+		/*
+		 * We could have a pending removal of this same ref so we may
+		 * not have actually found our ref root when doing
+		 * btrfs_find_all_roots, so we need to keep track of how many
+		 * old roots we find in case we removed ours and added a
+		 * different one at the same time.  I don't think this could
+		 * happen in practice but that sort of thinking leads to pain
+		 * and suffering and to the dark side.
+		 */
+		(*old_roots)++;
 
 		ulist_reinit(tmp);
-						/* XXX id not needed */
-		ret = ulist_add(tmp, qg->qgroupid,
-				(u64)(uintptr_t)qg, GFP_ATOMIC);
+		ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
+				GFP_ATOMIC);
+		if (ret < 0)
+			return ret;
+		ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
 		if (ret < 0)
 			return ret;
 		ULIST_ITER_INIT(&tmp_uiter);
 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
 			struct btrfs_qgroup_list *glist;
 
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->refcnt < seq)
-				qg->refcnt = seq + 1;
+			qg = u64_to_ptr(tmp_unode->aux);
+			/*
+			 * We use this sequence number to keep from having to
+			 * run the whole list and 0 out the refcnt every time.
+			 * We basically use sequnce as the known 0 count and
+			 * then add 1 everytime we see a qgroup.  This is how we
+			 * get how many of the roots actually point up to the
+			 * upper level qgroups in order to determine exclusive
+			 * counts.
+			 *
+			 * For rescan we want to set old_refcnt to seq so our
+			 * exclusive calculations end up correct.
+			 */
+			if (rescan)
+				qg->old_refcnt = seq;
+			else if (qg->old_refcnt < seq)
+				qg->old_refcnt = seq + 1;
 			else
-				++qg->refcnt;
+				qg->old_refcnt++;
 
+			if (qg->new_refcnt < seq)
+				qg->new_refcnt = seq + 1;
+			else
+				qg->new_refcnt++;
 			list_for_each_entry(glist, &qg->groups, next_group) {
+				ret = ulist_add(qgroups, glist->group->qgroupid,
+						ptr_to_u64(glist->group),
+						GFP_ATOMIC);
+				if (ret < 0)
+					return ret;
 				ret = ulist_add(tmp, glist->group->qgroupid,
-						(u64)(uintptr_t)glist->group,
+						ptr_to_u64(glist->group),
 						GFP_ATOMIC);
 				if (ret < 0)
 					return ret;
 			}
 		}
 	}
-
 	return 0;
 }
 
-static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
-				    struct ulist *roots, struct ulist *tmp,
-				    u64 seq, int sgn, u64 num_bytes,
-				    struct btrfs_qgroup *qgroup)
+/*
+ * We need to walk forward in our operation tree and account for any roots that
+ * were deleted after we made this operation.
+ */
+static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
+				       struct btrfs_qgroup_operation *oper,
+				       struct ulist *tmp,
+				       struct ulist *qgroups, u64 seq,
+				       int *old_roots)
 {
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 	struct btrfs_qgroup *qg;
-	struct btrfs_qgroup_list *glist;
+	struct btrfs_qgroup_operation *tmp_oper;
+	struct rb_node *n;
 	int ret;
 
 	ulist_reinit(tmp);
-	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
-	if (ret < 0)
-		return ret;
 
+	/*
+	 * We only walk forward in the tree since we're only interested in
+	 * removals that happened _after_  our operation.
+	 */
+	spin_lock(&fs_info->qgroup_op_lock);
+	n = rb_next(&oper->n);
+	spin_unlock(&fs_info->qgroup_op_lock);
+	if (!n)
+		return 0;
+	tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
+	while (tmp_oper->bytenr == oper->bytenr) {
+		/*
+		 * If it's not a removal we don't care, additions work out
+		 * properly with our refcnt tracking.
+		 */
+		if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
+		    tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
+			goto next;
+		qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
+		if (!qg)
+			goto next;
+		ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
+				GFP_ATOMIC);
+		if (ret) {
+			if (ret < 0)
+				return ret;
+			/*
+			 * We only want to increase old_roots if this qgroup is
+			 * not already in the list of qgroups.  If it is already
+			 * there then that means it must have been re-added or
+			 * the delete will be discarded because we had an
+			 * existing ref that we haven't looked up yet.  In this
+			 * case we don't want to increase old_roots.  So if ret
+			 * == 1 then we know that this is the first time we've
+			 * seen this qgroup and we can bump the old_roots.
+			 */
+			(*old_roots)++;
+			ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
+					GFP_ATOMIC);
+			if (ret < 0)
+				return ret;
+		}
+next:
+		spin_lock(&fs_info->qgroup_op_lock);
+		n = rb_next(&tmp_oper->n);
+		spin_unlock(&fs_info->qgroup_op_lock);
+		if (!n)
+			break;
+		tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
+	}
+
+	/* Ok now process the qgroups we found */
 	ULIST_ITER_INIT(&uiter);
 	while ((unode = ulist_next(tmp, &uiter))) {
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
-		if (qg->refcnt < seq) {
-			/* not visited by step 1 */
-			qg->rfer += sgn * num_bytes;
-			qg->rfer_cmpr += sgn * num_bytes;
-			if (roots->nnodes == 0) {
-				qg->excl += sgn * num_bytes;
-				qg->excl_cmpr += sgn * num_bytes;
-			}
-			qgroup_dirty(fs_info, qg);
-		}
-		WARN_ON(qg->tag >= seq);
-		qg->tag = seq;
+		struct btrfs_qgroup_list *glist;
 
+		qg = u64_to_ptr(unode->aux);
+		if (qg->old_refcnt < seq)
+			qg->old_refcnt = seq + 1;
+		else
+			qg->old_refcnt++;
+		if (qg->new_refcnt < seq)
+			qg->new_refcnt = seq + 1;
+		else
+			qg->new_refcnt++;
 		list_for_each_entry(glist, &qg->groups, next_group) {
+			ret = ulist_add(qgroups, glist->group->qgroupid,
+					ptr_to_u64(glist->group), GFP_ATOMIC);
+			if (ret < 0)
+				return ret;
 			ret = ulist_add(tmp, glist->group->qgroupid,
-					(uintptr_t)glist->group, GFP_ATOMIC);
+					ptr_to_u64(glist->group), GFP_ATOMIC);
 			if (ret < 0)
 				return ret;
 		}
 	}
-
 	return 0;
 }
 
-static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
-				    struct ulist *roots, struct ulist *tmp,
-				    u64 seq, int sgn, u64 num_bytes)
+/* Add refcnt for the newly added reference. */
+static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
+				  struct btrfs_qgroup_operation *oper,
+				  struct btrfs_qgroup *qgroup,
+				  struct ulist *tmp, struct ulist *qgroups,
+				  u64 seq)
 {
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 	struct btrfs_qgroup *qg;
-	struct ulist_node *tmp_unode;
-	struct ulist_iterator tmp_uiter;
 	int ret;
 
+	ulist_reinit(tmp);
+	ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
+			GFP_ATOMIC);
+	if (ret < 0)
+		return ret;
+	ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
+			GFP_ATOMIC);
+	if (ret < 0)
+		return ret;
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(tmp, &uiter))) {
+		struct btrfs_qgroup_list *glist;
+
+		qg = u64_to_ptr(unode->aux);
+		if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
+			if (qg->new_refcnt < seq)
+				qg->new_refcnt = seq + 1;
+			else
+				qg->new_refcnt++;
+		} else {
+			if (qg->old_refcnt < seq)
+				qg->old_refcnt = seq + 1;
+			else
+				qg->old_refcnt++;
+		}
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			ret = ulist_add(tmp, glist->group->qgroupid,
+					ptr_to_u64(glist->group), GFP_ATOMIC);
+			if (ret < 0)
+				return ret;
+			ret = ulist_add(qgroups, glist->group->qgroupid,
+					ptr_to_u64(glist->group), GFP_ATOMIC);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This adjusts the counters for all referenced qgroups if need be.
+ */
+static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
+				  u64 root_to_skip, u64 num_bytes,
+				  struct ulist *qgroups, u64 seq,
+				  int old_roots, int new_roots, int rescan)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup *qg;
+	u64 cur_new_count, cur_old_count;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(qgroups, &uiter))) {
+		bool dirty = false;
+
+		qg = u64_to_ptr(unode->aux);
+		/*
+		 * Wasn't referenced before but is now, add to the reference
+		 * counters.
+		 */
+		if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+			qg->rfer += num_bytes;
+			qg->rfer_cmpr += num_bytes;
+			dirty = true;
+		}
+
+		/*
+		 * Was referenced before but isn't now, subtract from the
+		 * reference counters.
+		 */
+		if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+			qg->rfer -= num_bytes;
+			qg->rfer_cmpr -= num_bytes;
+			dirty = true;
+		}
+
+		if (qg->old_refcnt < seq)
+			cur_old_count = 0;
+		else
+			cur_old_count = qg->old_refcnt - seq;
+		if (qg->new_refcnt < seq)
+			cur_new_count = 0;
+		else
+			cur_new_count = qg->new_refcnt - seq;
+
+		/*
+		 * If our refcount was the same as the roots previously but our
+		 * new count isn't the same as the number of roots now then we
+		 * went from having a exclusive reference on this range to not.
+		 */
+		if (old_roots && cur_old_count == old_roots &&
+		    (cur_new_count != new_roots || new_roots == 0)) {
+			WARN_ON(cur_new_count != new_roots && new_roots == 0);
+			qg->excl -= num_bytes;
+			qg->excl_cmpr -= num_bytes;
+			dirty = true;
+		}
+
+		/*
+		 * If we didn't reference all the roots before but now we do we
+		 * have an exclusive reference to this range.
+		 */
+		if ((!old_roots || (old_roots && cur_old_count != old_roots))
+		    && cur_new_count == new_roots) {
+			qg->excl += num_bytes;
+			qg->excl_cmpr += num_bytes;
+			dirty = true;
+		}
+
+		if (dirty)
+			qgroup_dirty(fs_info, qg);
+	}
+	return 0;
+}
+
+/*
+ * If we removed a data extent and there were other references for that bytenr
+ * then we need to lookup all referenced roots to make sure we still don't
+ * reference this bytenr.  If we do then we can just discard this operation.
+ */
+static int check_existing_refs(struct btrfs_trans_handle *trans,
+			       struct btrfs_fs_info *fs_info,
+			       struct btrfs_qgroup_operation *oper)
+{
+	struct ulist *roots = NULL;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	int ret = 0;
+
+	ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+				   oper->elem.seq, &roots);
+	if (ret < 0)
+		return ret;
+	ret = 0;
+
 	ULIST_ITER_INIT(&uiter);
 	while ((unode = ulist_next(roots, &uiter))) {
-		qg = find_qgroup_rb(fs_info, unode->val);
-		if (!qg)
-			continue;
-
-		ulist_reinit(tmp);
-		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
-		if (ret < 0)
-			return ret;
-
-		ULIST_ITER_INIT(&tmp_uiter);
-		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->tag == seq)
-				continue;
-
-			if (qg->refcnt - seq == roots->nnodes) {
-				qg->excl -= sgn * num_bytes;
-				qg->excl_cmpr -= sgn * num_bytes;
-				qgroup_dirty(fs_info, qg);
-			}
-
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ret = ulist_add(tmp, glist->group->qgroupid,
-						(uintptr_t)glist->group,
-						GFP_ATOMIC);
-				if (ret < 0)
-					return ret;
-			}
+		if (unode->val == oper->ref_root) {
+			ret = 1;
+			break;
 		}
 	}
+	ulist_free(roots);
+	btrfs_put_tree_mod_seq(fs_info, &oper->elem);
 
-	return 0;
+	return ret;
+}
+
+/*
+ * If we share a reference across multiple roots then we may need to adjust
+ * various qgroups referenced and exclusive counters.  The basic premise is this
+ *
+ * 1) We have seq to represent a 0 count.  Instead of looping through all of the
+ * qgroups and resetting their refcount to 0 we just constantly bump this
+ * sequence number to act as the base reference count.  This means that if
+ * anybody is equal to or below this sequence they were never referenced.  We
+ * jack this sequence up by the number of roots we found each time in order to
+ * make sure we don't have any overlap.
+ *
+ * 2) We first search all the roots that reference the area _except_ the root
+ * we're acting on currently.  This makes up the old_refcnt of all the qgroups
+ * before.
+ *
+ * 3) We walk all of the qgroups referenced by the root we are currently acting
+ * on, and will either adjust old_refcnt in the case of a removal or the
+ * new_refcnt in the case of an addition.
+ *
+ * 4) Finally we walk all the qgroups that are referenced by this range
+ * including the root we are acting on currently.  We will adjust the counters
+ * based on the number of roots we had and will have after this operation.
+ *
+ * Take this example as an illustration
+ *
+ *			[qgroup 1/0]
+ *		     /         |          \
+ *		[qg 0/0]   [qg 0/1]	[qg 0/2]
+ *		   \          |            /
+ *		  [	   extent	    ]
+ *
+ * Say we are adding a reference that is covered by qg 0/0.  The first step
+ * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
+ * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
+ * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
+ * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
+ * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
+ * reference and thus must add the size to the referenced bytes.  Everything
+ * else is the same so nothing else changes.
+ */
+static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
+				    struct btrfs_fs_info *fs_info,
+				    struct btrfs_qgroup_operation *oper)
+{
+	struct ulist *roots = NULL;
+	struct ulist *qgroups, *tmp;
+	struct btrfs_qgroup *qgroup;
+	struct seq_list elem = {};
+	u64 seq;
+	int old_roots = 0;
+	int new_roots = 0;
+	int ret = 0;
+
+	if (oper->elem.seq) {
+		ret = check_existing_refs(trans, fs_info, oper);
+		if (ret < 0)
+			return ret;
+		if (ret)
+			return 0;
+	}
+
+	qgroups = ulist_alloc(GFP_NOFS);
+	if (!qgroups)
+		return -ENOMEM;
+
+	tmp = ulist_alloc(GFP_NOFS);
+	if (!tmp) {
+		ulist_free(qgroups);
+		return -ENOMEM;
+	}
+
+	btrfs_get_tree_mod_seq(fs_info, &elem);
+	ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
+				   &roots);
+	btrfs_put_tree_mod_seq(fs_info, &elem);
+	if (ret < 0) {
+		ulist_free(qgroups);
+		ulist_free(tmp);
+		return ret;
+	}
+	spin_lock(&fs_info->qgroup_lock);
+	qgroup = find_qgroup_rb(fs_info, oper->ref_root);
+	if (!qgroup)
+		goto out;
+	seq = fs_info->qgroup_seq;
+
+	/*
+	 * So roots is the list of all the roots currently pointing at the
+	 * bytenr, including the ref we are adding if we are adding, or not if
+	 * we are removing a ref.  So we pass in the ref_root to skip that root
+	 * in our calculations.  We set old_refnct and new_refcnt cause who the
+	 * hell knows what everything looked like before, and it doesn't matter
+	 * except...
+	 */
+	ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
+				     seq, &old_roots, 0);
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * Now adjust the refcounts of the qgroups that care about this
+	 * reference, either the old_count in the case of removal or new_count
+	 * in the case of an addition.
+	 */
+	ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
+				     seq);
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * ...in the case of removals.  If we had a removal before we got around
+	 * to processing this operation then we need to find that guy and count
+	 * his references as if they really existed so we don't end up screwing
+	 * up the exclusive counts.  Then whenever we go to process the delete
+	 * everything will be grand and we can account for whatever exclusive
+	 * changes need to be made there.  We also have to pass in old_roots so
+	 * we have an accurate count of the roots as it pertains to this
+	 * operations view of the world.
+	 */
+	ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
+					  &old_roots);
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * We are adding our root, need to adjust up the number of roots,
+	 * otherwise old_roots is the number of roots we want.
+	 */
+	if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
+		new_roots = old_roots + 1;
+	} else {
+		new_roots = old_roots;
+		old_roots++;
+	}
+	fs_info->qgroup_seq += old_roots + 1;
+
+
+	/*
+	 * And now the magic happens, bless Arne for having a pretty elegant
+	 * solution for this.
+	 */
+	qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
+			       qgroups, seq, old_roots, new_roots, 0);
+out:
+	spin_unlock(&fs_info->qgroup_lock);
+	ulist_free(qgroups);
+	ulist_free(roots);
+	ulist_free(tmp);
+	return ret;
 }
 
 /*
@@ -1342,125 +1889,65 @@
  * then the space is accounted accordingly to the different roots. The
  * accounting algorithm works in 3 steps documented inline.
  */
-int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info,
-			     struct btrfs_delayed_ref_node *node,
-			     struct btrfs_delayed_extent_op *extent_op)
+static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
+				struct btrfs_fs_info *fs_info,
+				struct btrfs_qgroup_operation *oper)
 {
-	struct btrfs_root *quota_root;
-	u64 ref_root;
-	struct btrfs_qgroup *qgroup;
-	struct ulist *roots = NULL;
-	u64 seq;
 	int ret = 0;
-	int sgn;
 
 	if (!fs_info->quota_enabled)
 		return 0;
 
 	BUG_ON(!fs_info->quota_root);
 
-	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
-	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
-		struct btrfs_delayed_tree_ref *ref;
-		ref = btrfs_delayed_node_to_tree_ref(node);
-		ref_root = ref->root;
-	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
-		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
-		struct btrfs_delayed_data_ref *ref;
-		ref = btrfs_delayed_node_to_data_ref(node);
-		ref_root = ref->root;
-	} else {
-		BUG();
-	}
-
-	if (!is_fstree(ref_root)) {
-		/*
-		 * non-fs-trees are not being accounted
-		 */
-		return 0;
-	}
-
-	switch (node->action) {
-	case BTRFS_ADD_DELAYED_REF:
-	case BTRFS_ADD_DELAYED_EXTENT:
-		sgn = 1;
-		seq = btrfs_tree_mod_seq_prev(node->seq);
-		break;
-	case BTRFS_DROP_DELAYED_REF:
-		sgn = -1;
-		seq = node->seq;
-		break;
-	case BTRFS_UPDATE_DELAYED_HEAD:
-		return 0;
-	default:
-		BUG();
-	}
-
 	mutex_lock(&fs_info->qgroup_rescan_lock);
 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
-		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+		if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
 			mutex_unlock(&fs_info->qgroup_rescan_lock);
 			return 0;
 		}
 	}
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
 
-	/*
-	 * the delayed ref sequence number we pass depends on the direction of
-	 * the operation. for add operations, we pass
-	 * tree_mod_log_prev_seq(node->seq) to skip
-	 * the delayed ref's current sequence number, because we need the state
-	 * of the tree before the add operation. for delete operations, we pass
-	 * (node->seq) to include the delayed ref's current sequence number,
-	 * because we need the state of the tree after the delete operation.
-	 */
-	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
-	if (ret < 0)
-		return ret;
+	ASSERT(is_fstree(oper->ref_root));
 
-	spin_lock(&fs_info->qgroup_lock);
+	switch (oper->type) {
+	case BTRFS_QGROUP_OPER_ADD_EXCL:
+	case BTRFS_QGROUP_OPER_SUB_EXCL:
+		ret = qgroup_excl_accounting(fs_info, oper);
+		break;
+	case BTRFS_QGROUP_OPER_ADD_SHARED:
+	case BTRFS_QGROUP_OPER_SUB_SHARED:
+		ret = qgroup_shared_accounting(trans, fs_info, oper);
+		break;
+	default:
+		ASSERT(0);
+	}
+	return ret;
+}
 
-	quota_root = fs_info->quota_root;
-	if (!quota_root)
-		goto unlock;
+/*
+ * Needs to be called everytime we run delayed refs, even if there is an error
+ * in order to cleanup outstanding operations.
+ */
+int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
+				    struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_qgroup_operation *oper;
+	int ret = 0;
 
-	qgroup = find_qgroup_rb(fs_info, ref_root);
-	if (!qgroup)
-		goto unlock;
-
-	/*
-	 * step 1: for each old ref, visit all nodes once and inc refcnt
-	 */
-	ulist_reinit(fs_info->qgroup_ulist);
-	seq = fs_info->qgroup_seq;
-	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
-	ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
-				       seq);
-	if (ret)
-		goto unlock;
-
-	/*
-	 * step 2: walk from the new root
-	 */
-	ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
-				       seq, sgn, node->num_bytes, qgroup);
-	if (ret)
-		goto unlock;
-
-	/*
-	 * step 3: walk again from old refs
-	 */
-	ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
-				       seq, sgn, node->num_bytes);
-	if (ret)
-		goto unlock;
-
-unlock:
-	spin_unlock(&fs_info->qgroup_lock);
-	ulist_free(roots);
-
+	while (!list_empty(&trans->qgroup_ref_list)) {
+		oper = list_first_entry(&trans->qgroup_ref_list,
+					struct btrfs_qgroup_operation, list);
+		list_del_init(&oper->list);
+		if (!ret || !trans->aborted)
+			ret = btrfs_qgroup_account(trans, fs_info, oper);
+		spin_lock(&fs_info->qgroup_op_lock);
+		rb_erase(&oper->n, &fs_info->qgroup_op_tree);
+		spin_unlock(&fs_info->qgroup_op_lock);
+		btrfs_put_tree_mod_seq(fs_info, &oper->elem);
+		kfree(oper);
+	}
 	return ret;
 }
 
@@ -1629,8 +2116,16 @@
 		srcgroup = find_qgroup_rb(fs_info, srcid);
 		if (!srcgroup)
 			goto unlock;
-		dstgroup->rfer = srcgroup->rfer - level_size;
-		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
+
+		/*
+		 * We call inherit after we clone the root in order to make sure
+		 * our counts don't go crazy, so at this point the only
+		 * difference between the two roots should be the root node.
+		 */
+		dstgroup->rfer = srcgroup->rfer;
+		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
+		dstgroup->excl = level_size;
+		dstgroup->excl_cmpr = level_size;
 		srcgroup->excl = level_size;
 		srcgroup->excl_cmpr = level_size;
 		qgroup_dirty(fs_info, dstgroup);
@@ -1734,7 +2229,7 @@
 		struct btrfs_qgroup *qg;
 		struct btrfs_qgroup_list *glist;
 
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+		qg = u64_to_ptr(unode->aux);
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
 		    qg->reserved + (s64)qg->rfer + num_bytes >
@@ -1766,7 +2261,7 @@
 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
 		struct btrfs_qgroup *qg;
 
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+		qg = u64_to_ptr(unode->aux);
 
 		qg->reserved += num_bytes;
 	}
@@ -1812,7 +2307,7 @@
 		struct btrfs_qgroup *qg;
 		struct btrfs_qgroup_list *glist;
 
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+		qg = u64_to_ptr(unode->aux);
 
 		qg->reserved -= num_bytes;
 
@@ -1848,15 +2343,15 @@
  */
 static int
 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-		   struct btrfs_trans_handle *trans, struct ulist *tmp,
-		   struct extent_buffer *scratch_leaf)
+		   struct btrfs_trans_handle *trans, struct ulist *qgroups,
+		   struct ulist *tmp, struct extent_buffer *scratch_leaf)
 {
 	struct btrfs_key found;
 	struct ulist *roots = NULL;
-	struct ulist_node *unode;
-	struct ulist_iterator uiter;
 	struct seq_list tree_mod_seq_elem = {};
+	u64 num_bytes;
 	u64 seq;
+	int new_roots;
 	int slot;
 	int ret;
 
@@ -1897,8 +2392,6 @@
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
 
 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
-		u64 num_bytes;
-
 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
 		if (found.type != BTRFS_EXTENT_ITEM_KEY &&
 		    found.type != BTRFS_METADATA_ITEM_KEY)
@@ -1908,76 +2401,34 @@
 		else
 			num_bytes = found.offset;
 
-		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
-					   tree_mod_seq_elem.seq, &roots);
+		ulist_reinit(qgroups);
+		ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
+					   &roots);
 		if (ret < 0)
 			goto out;
 		spin_lock(&fs_info->qgroup_lock);
 		seq = fs_info->qgroup_seq;
 		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
 
-		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
-		if (ret) {
+		new_roots = 0;
+		ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
+					     seq, &new_roots, 1);
+		if (ret < 0) {
 			spin_unlock(&fs_info->qgroup_lock);
 			ulist_free(roots);
 			goto out;
 		}
 
-		/*
-		 * step2 of btrfs_qgroup_account_ref works from a single root,
-		 * we're doing all at once here.
-		 */
-		ulist_reinit(tmp);
-		ULIST_ITER_INIT(&uiter);
-		while ((unode = ulist_next(roots, &uiter))) {
-			struct btrfs_qgroup *qg;
-
-			qg = find_qgroup_rb(fs_info, unode->val);
-			if (!qg)
-				continue;
-
-			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
-					GFP_ATOMIC);
-			if (ret < 0) {
-				spin_unlock(&fs_info->qgroup_lock);
-				ulist_free(roots);
-				goto out;
-			}
+		ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
+					     seq, 0, new_roots, 1);
+		if (ret < 0) {
+			spin_unlock(&fs_info->qgroup_lock);
+			ulist_free(roots);
+			goto out;
 		}
-
-		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
-		ULIST_ITER_INIT(&uiter);
-		while ((unode = ulist_next(tmp, &uiter))) {
-			struct btrfs_qgroup *qg;
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
-			qg->rfer += num_bytes;
-			qg->rfer_cmpr += num_bytes;
-			WARN_ON(qg->tag >= seq);
-			if (qg->refcnt - seq == roots->nnodes) {
-				qg->excl += num_bytes;
-				qg->excl_cmpr += num_bytes;
-			}
-			qgroup_dirty(fs_info, qg);
-
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ret = ulist_add(tmp, glist->group->qgroupid,
-						(uintptr_t)glist->group,
-						GFP_ATOMIC);
-				if (ret < 0) {
-					spin_unlock(&fs_info->qgroup_lock);
-					ulist_free(roots);
-					goto out;
-				}
-			}
-		}
-
 		spin_unlock(&fs_info->qgroup_lock);
 		ulist_free(roots);
-		ret = 0;
 	}
-
 out:
 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 
@@ -1990,13 +2441,16 @@
 						     qgroup_rescan_work);
 	struct btrfs_path *path;
 	struct btrfs_trans_handle *trans = NULL;
-	struct ulist *tmp = NULL;
+	struct ulist *tmp = NULL, *qgroups = NULL;
 	struct extent_buffer *scratch_leaf = NULL;
 	int err = -ENOMEM;
 
 	path = btrfs_alloc_path();
 	if (!path)
 		goto out;
+	qgroups = ulist_alloc(GFP_NOFS);
+	if (!qgroups)
+		goto out;
 	tmp = ulist_alloc(GFP_NOFS);
 	if (!tmp)
 		goto out;
@@ -2015,7 +2469,7 @@
 			err = -EINTR;
 		} else {
 			err = qgroup_rescan_leaf(fs_info, path, trans,
-						 tmp, scratch_leaf);
+						 qgroups, tmp, scratch_leaf);
 		}
 		if (err > 0)
 			btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2025,6 +2479,7 @@
 
 out:
 	kfree(scratch_leaf);
+	ulist_free(qgroups);
 	ulist_free(tmp);
 	btrfs_free_path(path);
 

diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
new file mode 100644
index 0000000..5952ff1
--- /dev/null
+++ b/fs/btrfs/qgroup.h

@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2014 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_QGROUP__
+#define __BTRFS_QGROUP__
+
+/*
+ * A description of the operations, all of these operations only happen when we
+ * are adding the 1st reference for that subvolume in the case of adding space
+ * or on the last reference delete in the case of subtraction.  The only
+ * exception is the last one, which is added for confusion.
+ *
+ * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
+ * one pointing at the bytes we are adding.  This is called on the first
+ * allocation.
+ *
+ * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
+ * shared between subvols.  This is called on the creation of a ref that already
+ * has refs from a different subvolume, so basically reflink.
+ *
+ * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
+ * one referencing the range.
+ *
+ * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
+ * refs with other subvolumes.
+ */
+enum btrfs_qgroup_operation_type {
+	BTRFS_QGROUP_OPER_ADD_EXCL,
+	BTRFS_QGROUP_OPER_ADD_SHARED,
+	BTRFS_QGROUP_OPER_SUB_EXCL,
+	BTRFS_QGROUP_OPER_SUB_SHARED,
+};
+
+struct btrfs_qgroup_operation {
+	u64 ref_root;
+	u64 bytenr;
+	u64 num_bytes;
+	u64 seq;
+	enum btrfs_qgroup_operation_type type;
+	struct seq_list elem;
+	struct rb_node n;
+	struct list_head list;
+};
+
+int btrfs_quota_enable(struct btrfs_trans_handle *trans,
+		       struct btrfs_fs_info *fs_info);
+int btrfs_quota_disable(struct btrfs_trans_handle *trans,
+			struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
+			struct btrfs_fs_info *fs_info, u64 qgroupid,
+			char *name);
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info, u64 qgroupid);
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
+		       struct btrfs_fs_info *fs_info, u64 qgroupid,
+		       struct btrfs_qgroup_limit *limit);
+int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
+void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
+struct btrfs_delayed_extent_op;
+int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
+			    struct btrfs_fs_info *fs_info, u64 ref_root,
+			    u64 bytenr, u64 num_bytes,
+			    enum btrfs_qgroup_operation_type type,
+			    int mod_seq);
+int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
+				    struct btrfs_fs_info *fs_info);
+void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
+				   struct btrfs_fs_info *fs_info,
+				   struct btrfs_qgroup_operation *oper);
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
+		      struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
+			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
+			 struct btrfs_qgroup_inherit *inherit);
+int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
+void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
+
+void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
+			       u64 rfer, u64 excl);
+#endif
+
+#endif /* __BTRFS_QGROUP__ */

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 30947f9..09230cf 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c

@@ -428,8 +428,13 @@
 			continue;
 		}
 		if (!dev->bdev) {
-			/* cannot read ahead on missing device */
-			continue;
+			/*
+			 * cannot read ahead on missing device, but for RAID5/6,
+			 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
+			 * device for such case.
+			 */
+			if (nzones > 1)
+				continue;
 		}
 		if (dev_replace_is_ongoing &&
 		    dev == fs_info->dev_replace.tgtdev) {

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 7f92ab1..65245a0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c

@@ -337,7 +337,7 @@
 	if (bnode->root)
 		fs_info = bnode->root->fs_info;
 	btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
-		    "found at offset %llu\n", bytenr);
+		    "found at offset %llu", bytenr);
 }
 
 /*
@@ -528,7 +528,7 @@
 {
 	struct btrfs_root *reloc_root;
 
-	if (!root->ref_cows)
+	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return 0;
 
 	reloc_root = root->reloc_root;
@@ -610,7 +610,7 @@
 	root = read_fs_root(rc->extent_root->fs_info, root_objectid);
 	BUG_ON(IS_ERR(root));
 
-	if (root->ref_cows &&
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
 	    generation != btrfs_root_generation(&root->root_item))
 		return NULL;
 
@@ -887,7 +887,7 @@
 			goto out;
 		}
 
-		if (!root->ref_cows)
+		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 			cur->cowonly = 1;
 
 		if (btrfs_root_level(&root->root_item) == cur->level) {
@@ -954,7 +954,8 @@
 				upper->bytenr = eb->start;
 				upper->owner = btrfs_header_owner(eb);
 				upper->level = lower->level + 1;
-				if (!root->ref_cows)
+				if (!test_bit(BTRFS_ROOT_REF_COWS,
+					      &root->state))
 					upper->cowonly = 1;
 
 				/*
@@ -1258,7 +1259,7 @@
 	if (rb_node) {
 		btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
 			    "for start=%llu while inserting into relocation "
-			    "tree\n", node->bytenr);
+			    "tree", node->bytenr);
 		kfree(node);
 		return -EEXIST;
 	}
@@ -2441,7 +2442,7 @@
 		next = walk_up_backref(next, edges, &index);
 		root = next->root;
 		BUG_ON(!root);
-		BUG_ON(!root->ref_cows);
+		BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
 
 		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
 			record_reloc_root_in_trans(trans, root);
@@ -2506,7 +2507,7 @@
 		BUG_ON(!root);
 
 		/* no other choice for non-references counted tree */
-		if (!root->ref_cows)
+		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 			return root;
 
 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
@@ -2893,14 +2894,14 @@
 		goto out;
 	}
 
-	if (!root || root->ref_cows) {
+	if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
 		ret = reserve_metadata_space(trans, rc, node);
 		if (ret)
 			goto out;
 	}
 
 	if (root) {
-		if (root->ref_cows) {
+		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
 			BUG_ON(node->new_bytenr);
 			BUG_ON(!list_empty(&node->list));
 			btrfs_record_root_in_trans(trans, root);

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 38bb47e..360a728 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c

@@ -306,7 +306,7 @@
 			break;
 		}
 
-		root->orphan_item_inserted = 1;
+		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 		err = btrfs_insert_fs_root(root->fs_info, root);
 		if (err) {

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0be7799..ac80188 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c

@@ -588,8 +588,9 @@
 
 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 		do {
-			ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
-							&ref_root, &ref_level);
+			ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+						      item_size, &ref_root,
+						      &ref_level);
 			printk_in_rcu(KERN_WARNING
 				"BTRFS: %s at logical %llu on dev %s, "
 				"sector %llu: metadata %s (level %d) in tree "
@@ -717,8 +718,8 @@
 out:
 	if (page)
 		put_page(page);
-	if (inode)
-		iput(inode);
+
+	iput(inode);
 
 	if (ret < 0)
 		return ret;

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 484aaca..6528aa6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c

@@ -975,7 +975,7 @@
 	struct btrfs_dir_item *di;
 	struct btrfs_key di_key;
 	char *buf = NULL;
-	const int buf_len = PATH_MAX;
+	int buf_len;
 	u32 name_len;
 	u32 data_len;
 	u32 cur;
@@ -985,6 +985,11 @@
 	int num;
 	u8 type;
 
+	if (found_key->type == BTRFS_XATTR_ITEM_KEY)
+		buf_len = BTRFS_MAX_XATTR_SIZE(root);
+	else
+		buf_len = PATH_MAX;
+
 	buf = kmalloc(buf_len, GFP_NOFS);
 	if (!buf) {
 		ret = -ENOMEM;
@@ -1006,12 +1011,23 @@
 		type = btrfs_dir_type(eb, di);
 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
 
-		/*
-		 * Path too long
-		 */
-		if (name_len + data_len > buf_len) {
-			ret = -ENAMETOOLONG;
-			goto out;
+		if (type == BTRFS_FT_XATTR) {
+			if (name_len > XATTR_NAME_MAX) {
+				ret = -ENAMETOOLONG;
+				goto out;
+			}
+			if (name_len + data_len > buf_len) {
+				ret = -E2BIG;
+				goto out;
+			}
+		} else {
+			/*
+			 * Path too long
+			 */
+			if (name_len + data_len > buf_len) {
+				ret = -ENAMETOOLONG;
+				goto out;
+			}
 		}
 
 		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
@@ -1349,7 +1365,7 @@
 		ret = -EIO;
 		btrfs_err(sctx->send_root->fs_info, "did not find backref in "
 				"send_root. inode=%llu, offset=%llu, "
-				"disk_byte=%llu found extent=%llu\n",
+				"disk_byte=%llu found extent=%llu",
 				ino, data_offset, disk_byte, found_key.objectid);
 		goto out;
 	}
@@ -1628,6 +1644,10 @@
 		goto out;
 	}
 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+	if (key.type == BTRFS_ROOT_ITEM_KEY) {
+		ret = -ENOENT;
+		goto out;
+	}
 	*found_inode = key.objectid;
 	*found_type = btrfs_dir_type(path->nodes[0], di);
 
@@ -1693,10 +1713,12 @@
 		goto out;
 	btrfs_release_path(path);
 
-	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
-			NULL, NULL);
-	if (ret < 0)
-		goto out;
+	if (dir_gen) {
+		ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
+				     NULL, NULL, NULL);
+		if (ret < 0)
+			goto out;
+	}
 
 	*dir = parent_dir;
 
@@ -1712,13 +1734,12 @@
 	int ret;
 	struct fs_path *tmp_name;
 	u64 tmp_dir;
-	u64 tmp_dir_gen;
 
 	tmp_name = fs_path_alloc();
 	if (!tmp_name)
 		return -ENOMEM;
 
-	ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
+	ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
 	if (ret < 0)
 		goto out;
 
@@ -2029,7 +2050,6 @@
 {
 	int ret;
 	int nce_ret;
-	struct btrfs_path *path = NULL;
 	struct name_cache_entry *nce = NULL;
 
 	/*
@@ -2055,10 +2075,6 @@
 		}
 	}
 
-	path = alloc_path_for_send();
-	if (!path)
-		return -ENOMEM;
-
 	/*
 	 * If the inode is not existent yet, add the orphan name and return 1.
 	 * This should only happen for the parent dir that we determine in
@@ -2134,7 +2150,6 @@
 	name_cache_clean_unused(sctx);
 
 out:
-	btrfs_free_path(path);
 	return ret;
 }
 
@@ -2945,7 +2960,9 @@
 static int add_pending_dir_move(struct send_ctx *sctx,
 				u64 ino,
 				u64 ino_gen,
-				u64 parent_ino)
+				u64 parent_ino,
+				struct list_head *new_refs,
+				struct list_head *deleted_refs)
 {
 	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
 	struct rb_node *parent = NULL;
@@ -2977,12 +2994,12 @@
 		}
 	}
 
-	list_for_each_entry(cur, &sctx->deleted_refs, list) {
+	list_for_each_entry(cur, deleted_refs, list) {
 		ret = dup_ref(cur, &pm->update_refs);
 		if (ret < 0)
 			goto out;
 	}
-	list_for_each_entry(cur, &sctx->new_refs, list) {
+	list_for_each_entry(cur, new_refs, list) {
 		ret = dup_ref(cur, &pm->update_refs);
 		if (ret < 0)
 			goto out;
@@ -3025,6 +3042,48 @@
 	return NULL;
 }
 
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+		     u64 ino, u64 gen, u64 *ancestor_ino)
+{
+	int ret = 0;
+	u64 parent_inode = 0;
+	u64 parent_gen = 0;
+	u64 start_ino = ino;
+
+	*ancestor_ino = 0;
+	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+		fs_path_reset(name);
+
+		if (is_waiting_for_rm(sctx, ino))
+			break;
+		if (is_waiting_for_move(sctx, ino)) {
+			if (*ancestor_ino == 0)
+				*ancestor_ino = ino;
+			ret = get_first_ref(sctx->parent_root, ino,
+					    &parent_inode, &parent_gen, name);
+		} else {
+			ret = __get_cur_name_and_parent(sctx, ino, gen,
+							&parent_inode,
+							&parent_gen, name);
+			if (ret > 0) {
+				ret = 0;
+				break;
+			}
+		}
+		if (ret < 0)
+			break;
+		if (parent_inode == start_ino) {
+			ret = 1;
+			if (*ancestor_ino == 0)
+				*ancestor_ino = ino;
+			break;
+		}
+		ino = parent_inode;
+		gen = parent_gen;
+	}
+	return ret;
+}
+
 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 {
 	struct fs_path *from_path = NULL;
@@ -3036,6 +3095,7 @@
 	struct waiting_dir_move *dm = NULL;
 	u64 rmdir_ino = 0;
 	int ret;
+	u64 ancestor = 0;
 
 	name = fs_path_alloc();
 	from_path = fs_path_alloc();
@@ -3054,34 +3114,33 @@
 	if (ret < 0)
 		goto out;
 
-	if (parent_ino == sctx->cur_ino) {
-		/* child only renamed, not moved */
-		ASSERT(parent_gen == sctx->cur_inode_gen);
-		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
-				   from_path);
-		if (ret < 0)
-			goto out;
-		ret = fs_path_add_path(from_path, name);
-		if (ret < 0)
-			goto out;
-	} else {
-		/* child moved and maybe renamed too */
-		sctx->send_progress = pm->ino;
-		ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
-		if (ret < 0)
-			goto out;
-	}
-
-	fs_path_free(name);
-	name = NULL;
-
-	to_path = fs_path_alloc();
-	if (!to_path) {
-		ret = -ENOMEM;
+	ret = get_cur_path(sctx, parent_ino, parent_gen,
+			   from_path);
+	if (ret < 0)
 		goto out;
-	}
+	ret = fs_path_add_path(from_path, name);
+	if (ret < 0)
+		goto out;
 
 	sctx->send_progress = sctx->cur_ino + 1;
+	ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+	if (ret) {
+		LIST_HEAD(deleted_refs);
+		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+					   &pm->update_refs, &deleted_refs);
+		if (ret < 0)
+			goto out;
+		if (rmdir_ino) {
+			dm = get_waiting_dir_move(sctx, pm->ino);
+			ASSERT(dm);
+			dm->rmdir_ino = rmdir_ino;
+		}
+		goto out;
+	}
+	fs_path_reset(name);
+	to_path = name;
+	name = NULL;
 	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
 	if (ret < 0)
 		goto out;
@@ -3205,127 +3264,74 @@
 static int wait_for_parent_move(struct send_ctx *sctx,
 				struct recorded_ref *parent_ref)
 {
-	int ret;
+	int ret = 0;
 	u64 ino = parent_ref->dir;
 	u64 parent_ino_before, parent_ino_after;
-	u64 old_gen;
 	struct fs_path *path_before = NULL;
 	struct fs_path *path_after = NULL;
 	int len1, len2;
-	int register_upper_dirs;
-	u64 gen;
-
-	if (is_waiting_for_move(sctx, ino))
-		return 1;
-
-	if (parent_ref->dir <= sctx->cur_ino)
-		return 0;
-
-	ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
-			     NULL, NULL, NULL, NULL);
-	if (ret == -ENOENT)
-		return 0;
-	else if (ret < 0)
-		return ret;
-
-	if (parent_ref->dir_gen != old_gen)
-		return 0;
-
-	path_before = fs_path_alloc();
-	if (!path_before)
-		return -ENOMEM;
-
-	ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
-			    NULL, path_before);
-	if (ret == -ENOENT) {
-		ret = 0;
-		goto out;
-	} else if (ret < 0) {
-		goto out;
-	}
 
 	path_after = fs_path_alloc();
-	if (!path_after) {
+	path_before = fs_path_alloc();
+	if (!path_after || !path_before) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
-			    &gen, path_after);
-	if (ret == -ENOENT) {
-		ret = 0;
-		goto out;
-	} else if (ret < 0) {
-		goto out;
-	}
-
-	len1 = fs_path_len(path_before);
-	len2 = fs_path_len(path_after);
-	if (parent_ino_before != parent_ino_after || len1 != len2 ||
-	     memcmp(path_before->start, path_after->start, len1)) {
-		ret = 1;
-		goto out;
-	}
-	ret = 0;
-
 	/*
-	 * Ok, our new most direct ancestor has a higher inode number but
-	 * wasn't moved/renamed. So maybe some of the new ancestors higher in
-	 * the hierarchy have an higher inode number too *and* were renamed
-	 * or moved - in this case we need to wait for the ancestor's rename
-	 * or move operation before we can do the move/rename for the current
-	 * inode.
+	 * Our current directory inode may not yet be renamed/moved because some
+	 * ancestor (immediate or not) has to be renamed/moved first. So find if
+	 * such ancestor exists and make sure our own rename/move happens after
+	 * that ancestor is processed.
 	 */
-	register_upper_dirs = 0;
-	ino = parent_ino_after;
-again:
-	while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) {
-		u64 parent_gen;
+	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+		if (is_waiting_for_move(sctx, ino)) {
+			ret = 1;
+			break;
+		}
 
 		fs_path_reset(path_before);
 		fs_path_reset(path_after);
 
 		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
-				    &parent_gen, path_after);
+				    NULL, path_after);
 		if (ret < 0)
 			goto out;
 		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
 				    NULL, path_before);
-		if (ret == -ENOENT) {
-			ret = 0;
-			break;
-		} else if (ret < 0) {
+		if (ret < 0 && ret != -ENOENT) {
 			goto out;
+		} else if (ret == -ENOENT) {
+			ret = 1;
+			break;
 		}
 
 		len1 = fs_path_len(path_before);
 		len2 = fs_path_len(path_after);
-		if (parent_ino_before != parent_ino_after || len1 != len2 ||
-		    memcmp(path_before->start, path_after->start, len1)) {
+		if (ino > sctx->cur_ino &&
+		    (parent_ino_before != parent_ino_after || len1 != len2 ||
+		     memcmp(path_before->start, path_after->start, len1))) {
 			ret = 1;
-			if (register_upper_dirs) {
-				break;
-			} else {
-				register_upper_dirs = 1;
-				ino = parent_ref->dir;
-				gen = parent_ref->dir_gen;
-				goto again;
-			}
-		} else if (register_upper_dirs) {
-			ret = add_pending_dir_move(sctx, ino, gen,
-						   parent_ino_after);
-			if (ret < 0 && ret != -EEXIST)
-				goto out;
+			break;
 		}
-
 		ino = parent_ino_after;
-		gen = parent_gen;
 	}
 
 out:
 	fs_path_free(path_before);
 	fs_path_free(path_after);
 
+	if (ret == 1) {
+		ret = add_pending_dir_move(sctx,
+					   sctx->cur_ino,
+					   sctx->cur_inode_gen,
+					   ino,
+					   &sctx->new_refs,
+					   &sctx->deleted_refs);
+		if (!ret)
+			ret = 1;
+	}
+
 	return ret;
 }
 
@@ -3486,10 +3492,6 @@
 				if (ret < 0)
 					goto out;
 				if (ret) {
-					ret = add_pending_dir_move(sctx,
-							   sctx->cur_ino,
-							   sctx->cur_inode_gen,
-							   cur->dir);
 					*pending_move = 1;
 				} else {
 					ret = send_rename(sctx, valid_path,
@@ -5490,7 +5492,7 @@
 	 */
 	if (root->send_in_progress < 0)
 		btrfs_err(root->fs_info,
-			"send_in_progres unbalanced %d root %llu\n",
+			"send_in_progres unbalanced %d root %llu",
 			root->send_in_progress, root->root_key.objectid);
 	spin_unlock(&root->root_item_lock);
 }
@@ -5518,7 +5520,7 @@
 
 	/*
 	 * The subvolume must remain read-only during send, protect against
-	 * making it RW.
+	 * making it RW. This also protects against deletion.
 	 */
 	spin_lock(&send_root->root_item_lock);
 	send_root->send_in_progress++;
@@ -5578,6 +5580,15 @@
 	}
 
 	sctx->send_root = send_root;
+	/*
+	 * Unlikely but possible, if the subvolume is marked for deletion but
+	 * is slow to remove the directory entry, send can still be started
+	 */
+	if (btrfs_root_dead(sctx->send_root)) {
+		ret = -EPERM;
+		goto out;
+	}
+
 	sctx->clone_roots_cnt = arg->clone_sources_count;
 
 	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
@@ -5667,7 +5678,8 @@
 
 		spin_lock(&sctx->parent_root->root_item_lock);
 		sctx->parent_root->send_in_progress++;
-		if (!btrfs_root_readonly(sctx->parent_root)) {
+		if (!btrfs_root_readonly(sctx->parent_root) ||
+				btrfs_root_dead(sctx->parent_root)) {
 			spin_unlock(&sctx->parent_root->root_item_lock);
 			srcu_read_unlock(&fs_info->subvol_srcu, index);
 			ret = -EPERM;

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9601d25..4662d92 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c

@@ -511,7 +511,7 @@
 			} else if (compress) {
 				if (!btrfs_test_opt(root, COMPRESS))
 					btrfs_info(root->fs_info,
-						   "btrfs: use %s compression\n",
+						   "btrfs: use %s compression",
 						   compress_type);
 			}
 			break;
@@ -580,8 +580,15 @@
 			}
 			break;
 		case Opt_acl:
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
 			root->fs_info->sb->s_flags |= MS_POSIXACL;
 			break;
+#else
+			btrfs_err(root->fs_info,
+				"support for ACL not compiled in!");
+			ret = -EINVAL;
+			goto out;
+#endif
 		case Opt_noacl:
 			root->fs_info->sb->s_flags &= ~MS_POSIXACL;
 			break;
@@ -1413,6 +1420,7 @@
 		 * this also happens on 'umount -rf' or on shutdown, when
 		 * the filesystem is busy.
 		 */
+		cancel_work_sync(&fs_info->async_reclaim_work);
 
 		/* wait for the uuid_scan task to finish */
 		down(&fs_info->uuid_tree_rescan_sem);
@@ -1894,6 +1902,9 @@
 	if (ret)
 		goto out;
 	ret = btrfs_test_inodes();
+	if (ret)
+		goto out;
+	ret = btrfs_test_qgroups();
 out:
 	btrfs_destroy_test_fs();
 	return ret;

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c5eb214..df39458 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c

@@ -254,6 +254,7 @@
 BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show);
 
 #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
+#define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
 
 static ssize_t raid_bytes_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf);
@@ -266,7 +267,7 @@
 {
 	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
 	struct btrfs_block_group_cache *block_group;
-	int index = kobj - sinfo->block_group_kobjs;
+	int index = to_raid_kobj(kobj)->raid_type;
 	u64 val = 0;
 
 	down_read(&sinfo->groups_sem);
@@ -288,7 +289,7 @@
 
 static void release_raid_kobj(struct kobject *kobj)
 {
-	kobject_put(kobj->parent);
+	kfree(to_raid_kobj(kobj));
 }
 
 struct kobj_type btrfs_raid_ktype = {
@@ -374,11 +375,8 @@
 	struct btrfs_root *root = fs_info->fs_root;
 	int ret;
 
-	if (len >= BTRFS_LABEL_SIZE) {
-		pr_err("BTRFS: unable to set label with more than %d bytes\n",
-		       BTRFS_LABEL_SIZE - 1);
+	if (len >= BTRFS_LABEL_SIZE)
 		return -EINVAL;
-	}
 
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans))
@@ -396,8 +394,48 @@
 }
 BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store);
 
+static ssize_t btrfs_no_store(struct kobject *kobj,
+				 struct kobj_attribute *a,
+				 const char *buf, size_t len)
+{
+	return -EPERM;
+}
+
+static ssize_t btrfs_nodesize_show(struct kobject *kobj,
+				struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
+}
+
+BTRFS_ATTR_RW(nodesize, 0444, btrfs_nodesize_show, btrfs_no_store);
+
+static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
+				struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
+}
+
+BTRFS_ATTR_RW(sectorsize, 0444, btrfs_sectorsize_show, btrfs_no_store);
+
+static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
+				struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
+}
+
+BTRFS_ATTR_RW(clone_alignment, 0444, btrfs_clone_alignment_show, btrfs_no_store);
+
 static struct attribute *btrfs_attrs[] = {
 	BTRFS_ATTR_PTR(label),
+	BTRFS_ATTR_PTR(nodesize),
+	BTRFS_ATTR_PTR(sectorsize),
+	BTRFS_ATTR_PTR(clone_alignment),
 	NULL,
 };
 

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 757ef00..9626252 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c

@@ -21,6 +21,9 @@
 #include <linux/magic.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../volumes.h"
+#include "../disk-io.h"
+#include "../qgroup.h"
 
 static struct vfsmount *test_mnt = NULL;
 
@@ -72,3 +75,97 @@
 	kern_unmount(test_mnt);
 	unregister_filesystem(&test_type);
 }
+
+struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
+{
+	struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
+						GFP_NOFS);
+
+	if (!fs_info)
+		return fs_info;
+	fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
+				      GFP_NOFS);
+	if (!fs_info->fs_devices) {
+		kfree(fs_info);
+		return NULL;
+	}
+	fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
+				      GFP_NOFS);
+	if (!fs_info->super_copy) {
+		kfree(fs_info->fs_devices);
+		kfree(fs_info);
+		return NULL;
+	}
+
+	if (init_srcu_struct(&fs_info->subvol_srcu)) {
+		kfree(fs_info->fs_devices);
+		kfree(fs_info->super_copy);
+		kfree(fs_info);
+		return NULL;
+	}
+
+	spin_lock_init(&fs_info->buffer_lock);
+	spin_lock_init(&fs_info->qgroup_lock);
+	spin_lock_init(&fs_info->qgroup_op_lock);
+	spin_lock_init(&fs_info->super_lock);
+	spin_lock_init(&fs_info->fs_roots_radix_lock);
+	spin_lock_init(&fs_info->tree_mod_seq_lock);
+	mutex_init(&fs_info->qgroup_ioctl_lock);
+	mutex_init(&fs_info->qgroup_rescan_lock);
+	rwlock_init(&fs_info->tree_mod_log_lock);
+	fs_info->running_transaction = NULL;
+	fs_info->qgroup_tree = RB_ROOT;
+	fs_info->qgroup_ulist = NULL;
+	atomic64_set(&fs_info->tree_mod_seq, 0);
+	INIT_LIST_HEAD(&fs_info->dirty_qgroups);
+	INIT_LIST_HEAD(&fs_info->dead_roots);
+	INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
+	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+	return fs_info;
+}
+
+static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+
+	spin_lock(&fs_info->buffer_lock);
+restart:
+	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
+		struct extent_buffer *eb;
+
+		eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
+		if (!eb)
+			continue;
+		/* Shouldn't happen but that kind of thinking creates CVE's */
+		if (radix_tree_exception(eb)) {
+			if (radix_tree_deref_retry(eb))
+				goto restart;
+			continue;
+		}
+		spin_unlock(&fs_info->buffer_lock);
+		free_extent_buffer_stale(eb);
+		spin_lock(&fs_info->buffer_lock);
+	}
+	spin_unlock(&fs_info->buffer_lock);
+
+	btrfs_free_qgroup_config(fs_info);
+	btrfs_free_fs_roots(fs_info);
+	cleanup_srcu_struct(&fs_info->subvol_srcu);
+	kfree(fs_info->super_copy);
+	kfree(fs_info->fs_devices);
+	kfree(fs_info);
+}
+
+void btrfs_free_dummy_root(struct btrfs_root *root)
+{
+	if (!root)
+		return;
+	if (root->node)
+		free_extent_buffer(root->node);
+	if (root->fs_info)
+		btrfs_free_dummy_fs_info(root->fs_info);
+	kfree(root);
+}
+

diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 312560a..fd39542 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h

@@ -23,13 +23,18 @@
 
 #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
 
+struct btrfs_root;
+
 int btrfs_test_free_space_cache(void);
 int btrfs_test_extent_buffer_operations(void);
 int btrfs_test_extent_io(void);
 int btrfs_test_inodes(void);
+int btrfs_test_qgroups(void);
 int btrfs_init_test_fs(void);
 void btrfs_destroy_test_fs(void);
 struct inode *btrfs_new_test_inode(void);
+struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
+void btrfs_free_dummy_root(struct btrfs_root *root);
 #else
 static inline int btrfs_test_free_space_cache(void)
 {
@@ -54,6 +59,10 @@
 {
 	return 0;
 }
+static inline int btrfs_test_qgroups(void)
+{
+	return 0;
+}
 #endif
 
 #endif

diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 397d1f9..3ae0f5b 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c

@@ -23,33 +23,6 @@
 #include "../extent_io.h"
 #include "../volumes.h"
 
-static struct btrfs_fs_info *alloc_dummy_fs_info(void)
-{
-	struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-						GFP_NOFS);
-	if (!fs_info)
-		return fs_info;
-	fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-				      GFP_NOFS);
-	if (!fs_info->fs_devices) {
-		kfree(fs_info);
-		return NULL;
-	}
-	return fs_info;
-}
-static void free_dummy_root(struct btrfs_root *root)
-{
-	if (!root)
-		return;
-	if (root->fs_info) {
-		kfree(root->fs_info->fs_devices);
-		kfree(root->fs_info);
-	}
-	if (root->node)
-		free_extent_buffer(root->node);
-	kfree(root);
-}
-
 static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
 			  u64 ram_bytes, u64 offset, u64 disk_bytenr,
 			  u64 disk_len, u32 type, u8 compression, int slot)
@@ -276,7 +249,7 @@
 	 * We do this since btrfs_get_extent wants to assign em->bdev to
 	 * root->fs_info->fs_devices->latest_bdev.
 	 */
-	root->fs_info = alloc_dummy_fs_info();
+	root->fs_info = btrfs_alloc_dummy_fs_info();
 	if (!root->fs_info) {
 		test_msg("Couldn't allocate dummy fs info\n");
 		goto out;
@@ -837,7 +810,7 @@
 	if (!IS_ERR(em))
 		free_extent_map(em);
 	iput(inode);
-	free_dummy_root(root);
+	btrfs_free_dummy_root(root);
 	return ret;
 }
 
@@ -864,7 +837,7 @@
 		goto out;
 	}
 
-	root->fs_info = alloc_dummy_fs_info();
+	root->fs_info = btrfs_alloc_dummy_fs_info();
 	if (!root->fs_info) {
 		test_msg("Couldn't allocate dummy fs info\n");
 		goto out;
@@ -934,7 +907,7 @@
 	if (!IS_ERR(em))
 		free_extent_map(em);
 	iput(inode);
-	free_dummy_root(root);
+	btrfs_free_dummy_root(root);
 	return ret;
 }
 

diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
new file mode 100644
index 0000000..ec3dcb2
--- /dev/null
+++ b/fs/btrfs/tests/qgroup-tests.c

@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) 2013 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../transaction.h"
+#include "../disk-io.h"
+#include "../qgroup.h"
+
+static void init_dummy_trans(struct btrfs_trans_handle *trans)
+{
+	memset(trans, 0, sizeof(*trans));
+	trans->transid = 1;
+	INIT_LIST_HEAD(&trans->qgroup_ref_list);
+	trans->type = __TRANS_DUMMY;
+}
+
+static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
+				  u64 num_bytes, u64 parent, u64 root_objectid)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_extent_item *item;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_tree_block_info *block_info;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_key ins;
+	u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	ins.objectid = bytenr;
+	ins.type = BTRFS_EXTENT_ITEM_KEY;
+	ins.offset = num_bytes;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		test_msg("Couldn't allocate path\n");
+		return -ENOMEM;
+	}
+
+	path->leave_spinning = 1;
+	ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
+	if (ret) {
+		test_msg("Couldn't insert ref %d\n", ret);
+		btrfs_free_path(path);
+		return ret;
+	}
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	btrfs_set_extent_refs(leaf, item, 1);
+	btrfs_set_extent_generation(leaf, item, 1);
+	btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
+	block_info = (struct btrfs_tree_block_info *)(item + 1);
+	btrfs_set_tree_block_level(leaf, block_info, 1);
+	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+	if (parent > 0) {
+		btrfs_set_extent_inline_ref_type(leaf, iref,
+						 BTRFS_SHARED_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else {
+		btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	}
+	btrfs_free_path(path);
+	return 0;
+}
+
+static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
+			u64 parent, u64 root_objectid)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_extent_item *item;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	u64 refs;
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = num_bytes;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		test_msg("Couldn't allocate path\n");
+		return -ENOMEM;
+	}
+
+	path->leave_spinning = 1;
+	ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
+	if (ret) {
+		test_msg("Couldn't find extent ref\n");
+		btrfs_free_path(path);
+		return ret;
+	}
+
+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			      struct btrfs_extent_item);
+	refs = btrfs_extent_refs(path->nodes[0], item);
+	btrfs_set_extent_refs(path->nodes[0], item, refs + 1);
+	btrfs_release_path(path);
+
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
+	}
+
+	ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
+	if (ret)
+		test_msg("Failed to insert backref\n");
+	btrfs_free_path(path);
+	return ret;
+}
+
+static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
+			      u64 num_bytes)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = num_bytes;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		test_msg("Couldn't allocate path\n");
+		return -ENOMEM;
+	}
+	path->leave_spinning = 1;
+
+	ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
+	if (ret) {
+		test_msg("Didn't find our key %d\n", ret);
+		btrfs_free_path(path);
+		return ret;
+	}
+	btrfs_del_item(&trans, root, path);
+	btrfs_free_path(path);
+	return 0;
+}
+
+static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
+			     u64 num_bytes, u64 parent, u64 root_objectid)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_extent_item *item;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	u64 refs;
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = num_bytes;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		test_msg("Couldn't allocate path\n");
+		return -ENOMEM;
+	}
+
+	path->leave_spinning = 1;
+	ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
+	if (ret) {
+		test_msg("Couldn't find extent ref\n");
+		btrfs_free_path(path);
+		return ret;
+	}
+
+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			      struct btrfs_extent_item);
+	refs = btrfs_extent_refs(path->nodes[0], item);
+	btrfs_set_extent_refs(path->nodes[0], item, refs - 1);
+	btrfs_release_path(path);
+
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
+	}
+
+	ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
+	if (ret) {
+		test_msg("Couldn't find backref %d\n", ret);
+		btrfs_free_path(path);
+		return ret;
+	}
+	btrfs_del_item(&trans, root, path);
+	btrfs_free_path(path);
+	return ret;
+}
+
+static int test_no_shared_qgroup(struct btrfs_root *root)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	test_msg("Qgroup basic add\n");
+	ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL);
+	if (ret) {
+		test_msg("Couldn't create a qgroup %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
+				      BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+	if (ret) {
+		test_msg("Couldn't add space to a qgroup %d\n", ret);
+		return ret;
+	}
+
+	ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+	if (ret)
+		return ret;
+
+	ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+	if (ret) {
+		test_msg("Delayed qgroup accounting failed %d\n", ret);
+		return ret;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	ret = remove_extent_item(root, 4096, 4096);
+	if (ret)
+		return -EINVAL;
+
+	ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
+				      BTRFS_QGROUP_OPER_SUB_EXCL, 0);
+	if (ret) {
+		test_msg("Couldn't remove space from the qgroup %d\n", ret);
+		return -EINVAL;
+	}
+
+	ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+	if (ret) {
+		test_msg("Qgroup accounting failed %d\n", ret);
+		return -EINVAL;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Add a ref for two different roots to make sure the shared value comes out
+ * right, also remove one of the roots and make sure the exclusive count is
+ * adjusted properly.
+ */
+static int test_multiple_refs(struct btrfs_root *root)
+{
+	struct btrfs_trans_handle trans;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	int ret;
+
+	init_dummy_trans(&trans);
+
+	test_msg("Qgroup multiple refs test\n");
+
+	/* We have 5 created already from the previous test */
+	ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL);
+	if (ret) {
+		test_msg("Couldn't create a qgroup %d\n", ret);
+		return ret;
+	}
+
+	ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+	if (ret)
+		return ret;
+
+	ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
+				      BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+	if (ret) {
+		test_msg("Couldn't add space to a qgroup %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+	if (ret) {
+		test_msg("Delayed qgroup accounting failed %d\n", ret);
+		return ret;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	ret = add_tree_ref(root, 4096, 4096, 0, 256);
+	if (ret)
+		return ret;
+
+	ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
+				      BTRFS_QGROUP_OPER_ADD_SHARED, 0);
+	if (ret) {
+		test_msg("Qgroup record ref failed %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+	if (ret) {
+		test_msg("Qgroup accounting failed %d\n", ret);
+		return ret;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	ret = remove_extent_ref(root, 4096, 4096, 0, 256);
+	if (ret)
+		return ret;
+
+	ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
+				      BTRFS_QGROUP_OPER_SUB_SHARED, 0);
+	if (ret) {
+		test_msg("Qgroup record ref failed %d\n", ret);
+		return ret;
+	}
+
+	ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+	if (ret) {
+		test_msg("Qgroup accounting failed %d\n", ret);
+		return ret;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+		test_msg("Qgroup counts didn't match expected values\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int btrfs_test_qgroups(void)
+{
+	struct btrfs_root *root;
+	struct btrfs_root *tmp_root;
+	int ret = 0;
+
+	root = btrfs_alloc_dummy_root();
+	if (IS_ERR(root)) {
+		test_msg("Couldn't allocate root\n");
+		return PTR_ERR(root);
+	}
+
+	root->fs_info = btrfs_alloc_dummy_fs_info();
+	if (!root->fs_info) {
+		test_msg("Couldn't allocate dummy fs info\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Can't use bytenr 0, some things freak out
+	 * *cough*backref walking code*cough*
+	 */
+	root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096);
+	if (!root->node) {
+		test_msg("Couldn't allocate dummy buffer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	btrfs_set_header_level(root->node, 0);
+	btrfs_set_header_nritems(root->node, 0);
+	root->alloc_bytenr += 8192;
+
+	tmp_root = btrfs_alloc_dummy_root();
+	if (IS_ERR(tmp_root)) {
+		test_msg("Couldn't allocate a fs root\n");
+		ret = PTR_ERR(tmp_root);
+		goto out;
+	}
+
+	tmp_root->root_key.objectid = 5;
+	root->fs_info->fs_root = tmp_root;
+	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
+	if (ret) {
+		test_msg("Couldn't insert fs root %d\n", ret);
+		goto out;
+	}
+
+	tmp_root = btrfs_alloc_dummy_root();
+	if (IS_ERR(tmp_root)) {
+		test_msg("Couldn't allocate a fs root\n");
+		ret = PTR_ERR(tmp_root);
+		goto out;
+	}
+
+	tmp_root->root_key.objectid = 256;
+	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
+	if (ret) {
+		test_msg("Couldn't insert fs root %d\n", ret);
+		goto out;
+	}
+
+	/* We are using this root as our extent root */
+	root->fs_info->extent_root = root;
+
+	/*
+	 * Some of the paths we test assume we have a filled out fs_info, so we
+	 * just need to addt he root in there so we don't panic.
+	 */
+	root->fs_info->tree_root = root;
+	root->fs_info->quota_root = root;
+	root->fs_info->quota_enabled = 1;
+
+	test_msg("Running qgroup tests\n");
+	ret = test_no_shared_qgroup(root);
+	if (ret)
+		goto out;
+	ret = test_multiple_refs(root);
+out:
+	btrfs_free_dummy_root(root);
+	return ret;
+}

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7579f6d..511839c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c

@@ -31,6 +31,7 @@
 #include "inode-map.h"
 #include "volumes.h"
 #include "dev-replace.h"
+#include "qgroup.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -241,18 +242,19 @@
 static int record_root_in_trans(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
-	if (root->ref_cows && root->last_trans < trans->transid) {
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	    root->last_trans < trans->transid) {
 		WARN_ON(root == root->fs_info->extent_root);
 		WARN_ON(root->commit_root != root->node);
 
 		/*
-		 * see below for in_trans_setup usage rules
+		 * see below for IN_TRANS_SETUP usage rules
 		 * we have the reloc mutex held now, so there
 		 * is only one writer in this function
 		 */
-		root->in_trans_setup = 1;
+		set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
 
-		/* make sure readers find in_trans_setup before
+		/* make sure readers find IN_TRANS_SETUP before
 		 * they find our root->last_trans update
 		 */
 		smp_wmb();
@@ -279,7 +281,7 @@
 		 * But, we have to set root->last_trans before we
 		 * init the relocation root, otherwise, we trip over warnings
 		 * in ctree.c.  The solution used here is to flag ourselves
-		 * with root->in_trans_setup.  When this is 1, we're still
+		 * with root IN_TRANS_SETUP.  When this is 1, we're still
 		 * fixing up the reloc trees and everyone must wait.
 		 *
 		 * When this is zero, they can trust root->last_trans and fly
@@ -288,8 +290,8 @@
 		 * done before we pop in the zero below
 		 */
 		btrfs_init_reloc_root(trans, root);
-		smp_wmb();
-		root->in_trans_setup = 0;
+		smp_mb__before_atomic();
+		clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
 	}
 	return 0;
 }
@@ -298,16 +300,16 @@
 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
-	if (!root->ref_cows)
+	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return 0;
 
 	/*
-	 * see record_root_in_trans for comments about in_trans_setup usage
+	 * see record_root_in_trans for comments about IN_TRANS_SETUP usage
 	 * and barriers
 	 */
 	smp_rmb();
 	if (root->last_trans == trans->transid &&
-	    !root->in_trans_setup)
+	    !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
 		return 0;
 
 	mutex_lock(&root->fs_info->reloc_mutex);
@@ -365,7 +367,7 @@
 static inline bool need_reserve_reloc_root(struct btrfs_root *root)
 {
 	if (!root->fs_info->reloc_ctl ||
-	    !root->ref_cows ||
+	    !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
 	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
 	    root->reloc_root)
 		return false;
@@ -695,6 +697,7 @@
 	unsigned long cur = trans->delayed_ref_updates;
 	int lock = (trans->type != TRANS_JOIN_NOLOCK);
 	int err = 0;
+	int must_run_delayed_refs = 0;
 
 	if (trans->use_count > 1) {
 		trans->use_count--;
@@ -702,14 +705,27 @@
 		return 0;
 	}
 
-	/*
-	 * do the qgroup accounting as early as possible
-	 */
-	err = btrfs_delayed_refs_qgroup_accounting(trans, info);
-
 	btrfs_trans_release_metadata(trans, root);
 	trans->block_rsv = NULL;
 
+	if (!list_empty(&trans->new_bgs))
+		btrfs_create_pending_block_groups(trans, root);
+
+	trans->delayed_ref_updates = 0;
+	if (!trans->sync) {
+		must_run_delayed_refs =
+			btrfs_should_throttle_delayed_refs(trans, root);
+		cur = max_t(unsigned long, cur, 32);
+
+		/*
+		 * don't make the caller wait if they are from a NOLOCK
+		 * or ATTACH transaction, it will deadlock with commit
+		 */
+		if (must_run_delayed_refs == 1 &&
+		    (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
+			must_run_delayed_refs = 2;
+	}
+
 	if (trans->qgroup_reserved) {
 		/*
 		 * the same root has to be passed here between start_transaction
@@ -719,16 +735,6 @@
 		trans->qgroup_reserved = 0;
 	}
 
-	if (!list_empty(&trans->new_bgs))
-		btrfs_create_pending_block_groups(trans, root);
-
-	trans->delayed_ref_updates = 0;
-	if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
-		cur = max_t(unsigned long, cur, 32);
-		trans->delayed_ref_updates = 0;
-		btrfs_run_delayed_refs(trans, root, cur);
-	}
-
 	btrfs_trans_release_metadata(trans, root);
 	trans->block_rsv = NULL;
 
@@ -778,6 +784,10 @@
 	assert_qgroups_uptodate(trans);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
+	if (must_run_delayed_refs) {
+		btrfs_async_run_delayed_refs(root, cur,
+					     must_run_delayed_refs == 1);
+	}
 	return err;
 }
 
@@ -1049,8 +1059,8 @@
 			btrfs_save_ino_cache(root, trans);
 
 			/* see comments in should_cow_block() */
-			root->force_cow = 0;
-			smp_wmb();
+			clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+			smp_mb__after_atomic();
 
 			if (root->commit_root != root->node) {
 				list_add_tail(&root->dirty_list,
@@ -1081,7 +1091,7 @@
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	if (xchg(&root->defrag_running, 1))
+	if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
 		return 0;
 
 	while (1) {
@@ -1104,7 +1114,7 @@
 			break;
 		}
 	}
-	root->defrag_running = 0;
+	clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
 	return ret;
 }
 
@@ -1168,12 +1178,6 @@
 			goto no_free_objectid;
 	}
 
-	pending->error = btrfs_qgroup_inherit(trans, fs_info,
-					      root->root_key.objectid,
-					      objectid, pending->inherit);
-	if (pending->error)
-		goto no_free_objectid;
-
 	key.objectid = objectid;
 	key.offset = (u64)-1;
 	key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1270,8 +1274,26 @@
 		goto fail;
 	}
 
+	/*
+	 * We need to flush delayed refs in order to make sure all of our quota
+	 * operations have been done before we call btrfs_qgroup_inherit.
+	 */
+	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto fail;
+	}
+
+	ret = btrfs_qgroup_inherit(trans, fs_info,
+				   root->root_key.objectid,
+				   objectid, pending->inherit);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto fail;
+	}
+
 	/* see comments in should_cow_block() */
-	root->force_cow = 1;
+	set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
 	smp_wmb();
 
 	btrfs_set_root_node(new_root_item, tmp);
@@ -1598,12 +1620,6 @@
 	 * them now so that they hinder processing of more delayed refs
 	 * as little as possible.
 	 */
-	if (ret) {
-		btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
-		return ret;
-	}
-
-	ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
 	if (ret)
 		return ret;
 
@@ -1984,19 +2000,6 @@
 	}
 	root = list_first_entry(&fs_info->dead_roots,
 			struct btrfs_root, root_list);
-	/*
-	 * Make sure root is not involved in send,
-	 * if we fail with first root, we return
-	 * directly rather than continue.
-	 */
-	spin_lock(&root->root_item_lock);
-	if (root->send_in_progress) {
-		spin_unlock(&fs_info->trans_lock);
-		spin_unlock(&root->root_item_lock);
-		return 0;
-	}
-	spin_unlock(&root->root_item_lock);
-
 	list_del_init(&root->root_list);
 	spin_unlock(&fs_info->trans_lock);
 

diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b57b924..7dd558e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h

@@ -69,6 +69,7 @@
 #define __TRANS_ATTACH		(1U << 10)
 #define __TRANS_JOIN		(1U << 11)
 #define __TRANS_JOIN_NOLOCK	(1U << 12)
+#define __TRANS_DUMMY		(1U << 13)
 
 #define TRANS_USERSPACE		(__TRANS_USERSPACE | __TRANS_FREEZABLE)
 #define TRANS_START		(__TRANS_START | __TRANS_FREEZABLE)

diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 76928ca..a63719c 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c

@@ -49,7 +49,7 @@
 		goto out;
 	}
 
-	if (root->ref_cows == 0)
+	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		goto out;
 
 	if (btrfs_test_opt(root, SSD))

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e2f45fc..9e1f2cd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c

@@ -20,13 +20,11 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/list_sort.h>
-#include "ctree.h"
-#include "transaction.h"
+#include "tree-log.h"
 #include "disk-io.h"
 #include "locking.h"
 #include "print-tree.h"
 #include "backref.h"
-#include "tree-log.h"
 #include "hash.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
@@ -144,17 +142,15 @@
 
 	mutex_lock(&root->log_mutex);
 	if (root->log_root) {
-		if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
-		    trans->transid) {
+		if (btrfs_need_log_full_commit(root->fs_info, trans)) {
 			ret = -EAGAIN;
 			goto out;
 		}
-
 		if (!root->log_start_pid) {
 			root->log_start_pid = current->pid;
-			root->log_multiple_pids = false;
+			clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
 		} else if (root->log_start_pid != current->pid) {
-			root->log_multiple_pids = true;
+			set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
 		}
 
 		atomic_inc(&root->log_batch);
@@ -181,7 +177,7 @@
 		if (ret)
 			goto out;
 	}
-	root->log_multiple_pids = false;
+	clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
 	root->log_start_pid = current->pid;
 	atomic_inc(&root->log_batch);
 	atomic_inc(&root->log_writers);
@@ -2500,7 +2496,8 @@
 	while (1) {
 		int batch = atomic_read(&root->log_batch);
 		/* when we're on an ssd, just kick the log commit out */
-		if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) {
+		if (!btrfs_test_opt(root, SSD) &&
+		    test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
 			mutex_unlock(&root->log_mutex);
 			schedule_timeout_uninterruptible(1);
 			mutex_lock(&root->log_mutex);
@@ -2511,8 +2508,7 @@
 	}
 
 	/* bail out if we need to do a full commit */
-	if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
-	    trans->transid) {
+	if (btrfs_need_log_full_commit(root->fs_info, trans)) {
 		ret = -EAGAIN;
 		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&root->log_mutex);
@@ -2533,8 +2529,7 @@
 		blk_finish_plug(&plug);
 		btrfs_abort_transaction(trans, root, ret);
 		btrfs_free_logged_extents(log, log_transid);
-		ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) =
-								trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		mutex_unlock(&root->log_mutex);
 		goto out;
 	}
@@ -2577,8 +2572,8 @@
 			list_del_init(&root_log_ctx.list);
 
 		blk_finish_plug(&plug);
-		ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) =
-								trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
+
 		if (ret != -ENOSPC) {
 			btrfs_abort_transaction(trans, root, ret);
 			mutex_unlock(&log_root_tree->log_mutex);
@@ -2622,8 +2617,7 @@
 	 * now that we've moved on to the tree of log tree roots,
 	 * check the full commit flag again
 	 */
-	if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
-	    trans->transid) {
+	if (btrfs_need_log_full_commit(root->fs_info, trans)) {
 		blk_finish_plug(&plug);
 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		btrfs_free_logged_extents(log, log_transid);
@@ -2637,8 +2631,7 @@
 					 EXTENT_DIRTY | EXTENT_NEW);
 	blk_finish_plug(&plug);
 	if (ret) {
-		ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) =
-								trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		btrfs_abort_transaction(trans, root, ret);
 		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
@@ -2667,8 +2660,7 @@
 	 */
 	ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
 	if (ret) {
-		ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) =
-								trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		btrfs_abort_transaction(trans, root, ret);
 		goto out_wake_log_root;
 	}
@@ -2886,7 +2878,7 @@
 out_unlock:
 	mutex_unlock(&BTRFS_I(dir)->log_mutex);
 	if (ret == -ENOSPC) {
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		ret = 0;
 	} else if (ret < 0)
 		btrfs_abort_transaction(trans, root, ret);
@@ -2919,7 +2911,7 @@
 				  dirid, &index);
 	mutex_unlock(&BTRFS_I(inode)->log_mutex);
 	if (ret == -ENOSPC) {
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		ret = 0;
 	} else if (ret < 0 && ret != -ENOENT)
 		btrfs_abort_transaction(trans, root, ret);
@@ -4130,8 +4122,7 @@
 			 * make sure any commits to the log are forced
 			 * to be full commits
 			 */
-			root->fs_info->last_trans_log_full_commit =
-				trans->transid;
+			btrfs_set_log_full_commit(root->fs_info, trans);
 			ret = 1;
 			break;
 		}
@@ -4177,6 +4168,10 @@
 		goto end_no_trans;
 	}
 
+	/*
+	 * The prev transaction commit doesn't complete, we need do
+	 * full commit by ourselves.
+	 */
 	if (root->fs_info->last_trans_log_full_commit >
 	    root->fs_info->last_trans_committed) {
 		ret = 1;
@@ -4246,7 +4241,7 @@
 end_trans:
 	dput(old_parent);
 	if (ret < 0) {
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		btrfs_set_log_full_commit(root->fs_info, trans);
 		ret = 1;
 	}
 

diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 91b145f..7f5b41b 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h

@@ -19,6 +19,9 @@
 #ifndef __TREE_LOG_
 #define __TREE_LOG_
 
+#include "ctree.h"
+#include "transaction.h"
+
 /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
 #define BTRFS_NO_LOG_SYNC 256
 
@@ -35,6 +38,19 @@
 	INIT_LIST_HEAD(&ctx->list);
 }
 
+static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
+					     struct btrfs_trans_handle *trans)
+{
+	ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid;
+}
+
+static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
+					     struct btrfs_trans_handle *trans)
+{
+	return ACCESS_ONCE(fs_info->last_trans_log_full_commit) ==
+		trans->transid;
+}
+
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
 		   struct btrfs_root *root, struct btrfs_log_ctx *ctx);
 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49d7fab..ffeed6d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -1452,6 +1452,22 @@
 	return ret;
 }
 
+/*
+ * Function to update ctime/mtime for a given device path.
+ * Mainly used for ctime/mtime based probe like libblkid.
+ */
+static void update_dev_time(char *path_name)
+{
+	struct file *filp;
+
+	filp = filp_open(path_name, O_RDWR, 0);
+	if (!filp)
+		return;
+	file_update_time(filp);
+	filp_close(filp, NULL);
+	return;
+}
+
 static int btrfs_rm_dev_item(struct btrfs_root *root,
 			     struct btrfs_device *device)
 {
@@ -1674,11 +1690,12 @@
 		struct btrfs_fs_devices *fs_devices;
 		fs_devices = root->fs_info->fs_devices;
 		while (fs_devices) {
-			if (fs_devices->seed == cur_devices)
+			if (fs_devices->seed == cur_devices) {
+				fs_devices->seed = cur_devices->seed;
 				break;
+			}
 			fs_devices = fs_devices->seed;
 		}
-		fs_devices->seed = cur_devices->seed;
 		cur_devices->seed = NULL;
 		lock_chunks(root);
 		__btrfs_close_devices(cur_devices);
@@ -1694,20 +1711,55 @@
 	 * remove it from the devices list and zero out the old super
 	 */
 	if (clear_super && disk_super) {
+		u64 bytenr;
+		int i;
+
 		/* make sure this device isn't detected as part of
 		 * the FS anymore
 		 */
 		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
 		set_buffer_dirty(bh);
 		sync_dirty_buffer(bh);
+
+		/* clear the mirror copies of super block on the disk
+		 * being removed, 0th copy is been taken care above and
+		 * the below would take of the rest
+		 */
+		for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+			bytenr = btrfs_sb_offset(i);
+			if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+					i_size_read(bdev->bd_inode))
+				break;
+
+			brelse(bh);
+			bh = __bread(bdev, bytenr / 4096,
+					BTRFS_SUPER_INFO_SIZE);
+			if (!bh)
+				continue;
+
+			disk_super = (struct btrfs_super_block *)bh->b_data;
+
+			if (btrfs_super_bytenr(disk_super) != bytenr ||
+				btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
+				continue;
+			}
+			memset(&disk_super->magic, 0,
+						sizeof(disk_super->magic));
+			set_buffer_dirty(bh);
+			sync_dirty_buffer(bh);
+		}
 	}
 
 	ret = 0;
 
-	/* Notify udev that device has changed */
-	if (bdev)
+	if (bdev) {
+		/* Notify udev that device has changed */
 		btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
+		/* Update ctime/mtime for device path for libblkid */
+		update_dev_time(device_path);
+	}
+
 error_brelse:
 	brelse(bh);
 	if (bdev)
@@ -1883,7 +1935,6 @@
 	fs_devices->seeding = 0;
 	fs_devices->num_devices = 0;
 	fs_devices->open_devices = 0;
-	fs_devices->total_devices = 0;
 	fs_devices->seed = seed_devices;
 
 	generate_random_uuid(fs_devices->fsid);
@@ -2146,6 +2197,8 @@
 		ret = btrfs_commit_transaction(trans, root);
 	}
 
+	/* Update ctime/mtime for libblkid */
+	update_dev_time(device_path);
 	return ret;
 
 error_trans:
@@ -2922,6 +2975,16 @@
 		return 0;
 	}
 
+	/*
+	 * limited by count, must be the last filter
+	 */
+	if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
+		if (bargs->limit == 0)
+			return 0;
+		else
+			bargs->limit--;
+	}
+
 	return 1;
 }
 
@@ -2944,6 +3007,9 @@
 	int ret;
 	int enospc_errors = 0;
 	bool counting = true;
+	u64 limit_data = bctl->data.limit;
+	u64 limit_meta = bctl->meta.limit;
+	u64 limit_sys = bctl->sys.limit;
 
 	/* step one make some room on all the devices */
 	devices = &fs_info->fs_devices->devices;
@@ -2982,6 +3048,11 @@
 	memset(&bctl->stat, 0, sizeof(bctl->stat));
 	spin_unlock(&fs_info->balance_lock);
 again:
+	if (!counting) {
+		bctl->data.limit = limit_data;
+		bctl->meta.limit = limit_meta;
+		bctl->sys.limit = limit_sys;
+	}
 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	key.offset = (u64)-1;
 	key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -3881,7 +3952,8 @@
 	u8 *ptr;
 
 	array_size = btrfs_super_sys_array_size(super_copy);
-	if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
+	if (array_size + item_size + sizeof(disk_key)
+			> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
 		return -EFBIG;
 
 	ptr = super_copy->sys_chunk_array + array_size;
@@ -3986,6 +4058,16 @@
 	btrfs_set_fs_incompat(info, RAID56);
 }
 
+#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)		\
+			- sizeof(struct btrfs_item)		\
+			- sizeof(struct btrfs_chunk))		\
+			/ sizeof(struct btrfs_stripe) + 1)
+
+#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE	\
+				- 2 * sizeof(struct btrfs_disk_key)	\
+				- 2 * sizeof(struct btrfs_chunk))	\
+				/ sizeof(struct btrfs_stripe) + 1)
+
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *extent_root, u64 start,
 			       u64 type)
@@ -4035,6 +4117,8 @@
 	if (type & BTRFS_BLOCK_GROUP_DATA) {
 		max_stripe_size = 1024 * 1024 * 1024;
 		max_chunk_size = 10 * max_stripe_size;
+		if (!devs_max)
+			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
 		/* for larger filesystems, use larger metadata chunks */
 		if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
@@ -4042,11 +4126,15 @@
 		else
 			max_stripe_size = 256 * 1024 * 1024;
 		max_chunk_size = max_stripe_size;
+		if (!devs_max)
+			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
 	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
 		max_stripe_size = 32 * 1024 * 1024;
 		max_chunk_size = 2 * max_stripe_size;
+		if (!devs_max)
+			devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
 	} else {
-		btrfs_err(info, "invalid chunk type 0x%llx requested\n",
+		btrfs_err(info, "invalid chunk type 0x%llx requested",
 		       type);
 		BUG_ON(1);
 	}
@@ -4294,7 +4382,7 @@
 
 	if (em->start != chunk_offset || em->len != chunk_size) {
 		btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted"
-			  " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset,
+			  " %Lu-%Lu, found %Lu-%Lu", chunk_offset,
 			  chunk_size, em->start, em->len);
 		free_extent_map(em);
 		return -EINVAL;
@@ -4496,14 +4584,14 @@
 	 * and exit, so return 1 so the callers don't try to use other copies.
 	 */
 	if (!em) {
-		btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
+		btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
 			    logical+len);
 		return 1;
 	}
 
 	if (em->start > logical || em->start + em->len < logical) {
 		btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
-			    "%Lu-%Lu\n", logical, logical+len, em->start,
+			    "%Lu-%Lu", logical, logical+len, em->start,
 			    em->start + em->len);
 		free_extent_map(em);
 		return 1;
@@ -4684,7 +4772,7 @@
 
 	if (em->start > logical || em->start + em->len < logical) {
 		btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
-			   "found %Lu-%Lu\n", logical, em->start,
+			   "found %Lu-%Lu", logical, em->start,
 			   em->start + em->len);
 		free_extent_map(em);
 		return -EINVAL;
@@ -6058,10 +6146,14 @@
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	struct btrfs_device *device;
 
-	mutex_lock(&fs_devices->device_list_mutex);
-	list_for_each_entry(device, &fs_devices->devices, dev_list)
-		device->dev_root = fs_info->dev_root;
-	mutex_unlock(&fs_devices->device_list_mutex);
+	while (fs_devices) {
+		mutex_lock(&fs_devices->device_list_mutex);
+		list_for_each_entry(device, &fs_devices->devices, dev_list)
+			device->dev_root = fs_info->dev_root;
+		mutex_unlock(&fs_devices->device_list_mutex);
+
+		fs_devices = fs_devices->seed;
+	}
 }
 
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev)

diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 80754f9..1a15bbe 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h

@@ -255,6 +255,7 @@
 #define BTRFS_BALANCE_ARGS_DEVID	(1ULL << 2)
 #define BTRFS_BALANCE_ARGS_DRANGE	(1ULL << 3)
 #define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)
+#define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)
 
 /*
  * Profile changing flags.  When SOFT is set we won't relocate chunk if

diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 8e57191..4f19631 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c

@@ -98,7 +98,7 @@
 
 	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
 		printk(KERN_WARNING "BTRFS: deflateInit failed\n");
-		ret = -1;
+		ret = -EIO;
 		goto out;
 	}
 
@@ -110,7 +110,7 @@
 
 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 	if (out_page == NULL) {
-		ret = -1;
+		ret = -ENOMEM;
 		goto out;
 	}
 	cpage_out = kmap(out_page);
@@ -128,7 +128,7 @@
 			printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
 			       ret);
 			zlib_deflateEnd(&workspace->def_strm);
-			ret = -1;
+			ret = -EIO;
 			goto out;
 		}
 
@@ -136,7 +136,7 @@
 		if (workspace->def_strm.total_in > 8192 &&
 		    workspace->def_strm.total_in <
 		    workspace->def_strm.total_out) {
-			ret = -1;
+			ret = -EIO;
 			goto out;
 		}
 		/* we need another page for writing out.  Test this
@@ -147,12 +147,12 @@
 			kunmap(out_page);
 			if (nr_pages == nr_dest_pages) {
 				out_page = NULL;
-				ret = -1;
+				ret = -E2BIG;
 				goto out;
 			}
 			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 			if (out_page == NULL) {
-				ret = -1;
+				ret = -ENOMEM;
 				goto out;
 			}
 			cpage_out = kmap(out_page);
@@ -188,12 +188,12 @@
 	zlib_deflateEnd(&workspace->def_strm);
 
 	if (ret != Z_STREAM_END) {
-		ret = -1;
+		ret = -EIO;
 		goto out;
 	}
 
 	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
-		ret = -1;
+		ret = -E2BIG;
 		goto out;
 	}
 
@@ -253,7 +253,7 @@
 
 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "BTRFS: inflateInit failed\n");
-		return -1;
+		return -EIO;
 	}
 	while (workspace->inf_strm.total_in < srclen) {
 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
@@ -295,7 +295,7 @@
 		}
 	}
 	if (ret != Z_STREAM_END)
-		ret = -1;
+		ret = -EIO;
 	else
 		ret = 0;
 done:
@@ -337,7 +337,7 @@
 
 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "BTRFS: inflateInit failed\n");
-		return -1;
+		return -EIO;
 	}
 
 	while (bytes_left > 0) {
@@ -354,7 +354,7 @@
 		total_out = workspace->inf_strm.total_out;
 
 		if (total_out == buf_start) {
-			ret = -1;
+			ret = -EIO;
 			break;
 		}
 
@@ -382,7 +382,7 @@
 	}
 
 	if (ret != Z_STREAM_END && bytes_left != 0)
-		ret = -1;
+		ret = -EIO;
 	else
 		ret = 0;
 

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 21887d6..469f2e8 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c

@@ -104,12 +104,6 @@
 	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
 	struct dentry *dentry;
 
-	if (acl) {
-		ret = posix_acl_valid(acl);
-		if (ret < 0)
-			goto out;
-	}
-
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 65a30e8..90b3954 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -211,18 +211,15 @@
 		SetPageError(page);
 		ceph_fscache_readpage_cancel(inode, page);
 		goto out;
-	} else {
-		if (err < PAGE_CACHE_SIZE) {
-		/* zero fill remainder of page */
-			zero_user_segment(page, err, PAGE_CACHE_SIZE);
-		} else {
-			flush_dcache_page(page);
-		}
 	}
-	SetPageUptodate(page);
+	if (err < PAGE_CACHE_SIZE)
+		/* zero fill remainder of page */
+		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+	else
+		flush_dcache_page(page);
 
-	if (err >= 0)
-		ceph_readpage_to_fscache(inode, page);
+	SetPageUptodate(page);
+	ceph_readpage_to_fscache(inode, page);
 
 out:
 	return err < 0 ? err : 0;
@@ -1187,8 +1184,8 @@
  * never get called.
  */
 static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-			      const struct iovec *iov,
-			      loff_t pos, unsigned long nr_segs)
+			      struct iov_iter *iter,
+			      loff_t pos)
 {
 	WARN_ON(1);
 	return -EINVAL;

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c561b62..1fde164 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -221,8 +221,8 @@
 	return 0;
 }
 
-static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
-				struct ceph_cap_reservation *ctx)
+struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
+			      struct ceph_cap_reservation *ctx)
 {
 	struct ceph_cap *cap = NULL;
 
@@ -508,15 +508,14 @@
  * it is < 0.  (This is so we can atomically add the cap and add an
  * open file reference to it.)
  */
-int ceph_add_cap(struct inode *inode,
-		 struct ceph_mds_session *session, u64 cap_id,
-		 int fmode, unsigned issued, unsigned wanted,
-		 unsigned seq, unsigned mseq, u64 realmino, int flags,
-		 struct ceph_cap_reservation *caps_reservation)
+void ceph_add_cap(struct inode *inode,
+		  struct ceph_mds_session *session, u64 cap_id,
+		  int fmode, unsigned issued, unsigned wanted,
+		  unsigned seq, unsigned mseq, u64 realmino, int flags,
+		  struct ceph_cap **new_cap)
 {
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *new_cap = NULL;
 	struct ceph_cap *cap;
 	int mds = session->s_mds;
 	int actual_wanted;
@@ -531,20 +530,10 @@
 	if (fmode >= 0)
 		wanted |= ceph_caps_for_mode(fmode);
 
-retry:
-	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ci, mds);
 	if (!cap) {
-		if (new_cap) {
-			cap = new_cap;
-			new_cap = NULL;
-		} else {
-			spin_unlock(&ci->i_ceph_lock);
-			new_cap = get_cap(mdsc, caps_reservation);
-			if (new_cap == NULL)
-				return -ENOMEM;
-			goto retry;
-		}
+		cap = *new_cap;
+		*new_cap = NULL;
 
 		cap->issued = 0;
 		cap->implemented = 0;
@@ -562,9 +551,6 @@
 		session->s_nr_caps++;
 		spin_unlock(&session->s_cap_lock);
 	} else {
-		if (new_cap)
-			ceph_put_cap(mdsc, new_cap);
-
 		/*
 		 * auth mds of the inode changed. we received the cap export
 		 * message, but still haven't received the cap import message.
@@ -626,7 +612,6 @@
 			ci->i_auth_cap = cap;
 			cap->mds_wanted = wanted;
 		}
-		ci->i_cap_exporting_issued = 0;
 	} else {
 		WARN_ON(ci->i_auth_cap == cap);
 	}
@@ -648,9 +633,6 @@
 
 	if (fmode >= 0)
 		__ceph_get_fmode(ci, fmode);
-	spin_unlock(&ci->i_ceph_lock);
-	wake_up_all(&ci->i_cap_wq);
-	return 0;
 }
 
 /*
@@ -685,7 +667,7 @@
  */
 int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
 {
-	int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
+	int have = ci->i_snap_caps;
 	struct ceph_cap *cap;
 	struct rb_node *p;
 
@@ -900,7 +882,7 @@
  */
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
-	return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued;
+	return !RB_EMPTY_ROOT(&ci->i_caps);
 }
 
 int ceph_is_any_caps(struct inode *inode)
@@ -2397,32 +2379,30 @@
  * actually be a revocation if it specifies a smaller cap set.)
  *
  * caller holds s_mutex and i_ceph_lock, we drop both.
- *
- * return value:
- *  0 - ok
- *  1 - check_caps on auth cap only (writeback)
- *  2 - check_caps (ack revoke)
  */
-static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+static void handle_cap_grant(struct ceph_mds_client *mdsc,
+			     struct inode *inode, struct ceph_mds_caps *grant,
+			     void *snaptrace, int snaptrace_len,
+			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
-			     struct ceph_cap *cap,
-			     struct ceph_buffer *xattr_buf)
-		__releases(ci->i_ceph_lock)
+			     struct ceph_cap *cap, int issued)
+	__releases(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
 	int seq = le32_to_cpu(grant->seq);
 	int newcaps = le32_to_cpu(grant->caps);
-	int issued, implemented, used, wanted, dirty;
+	int used, wanted, dirty;
 	u64 size = le64_to_cpu(grant->size);
 	u64 max_size = le64_to_cpu(grant->max_size);
 	struct timespec mtime, atime, ctime;
 	int check_caps = 0;
-	int wake = 0;
-	int writeback = 0;
-	int queue_invalidate = 0;
-	int deleted_inode = 0;
-	int queue_revalidate = 0;
+	bool wake = 0;
+	bool writeback = 0;
+	bool queue_trunc = 0;
+	bool queue_invalidate = 0;
+	bool queue_revalidate = 0;
+	bool deleted_inode = 0;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2466,16 +2446,13 @@
 	}
 
 	/* side effects now are allowed */
-
-	issued = __ceph_caps_issued(ci, &implemented);
-	issued |= implemented | __ceph_caps_dirty(ci);
-
 	cap->cap_gen = session->s_cap_gen;
 	cap->seq = seq;
 
 	__check_cap_issue(ci, cap, newcaps);
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(grant->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2484,7 +2461,8 @@
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
 		if (inode->i_nlink == 0 &&
 		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2511,30 +2489,35 @@
 	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
 		queue_revalidate = 1;
 
-	/* size/ctime/mtime/atime? */
-	ceph_fill_file_size(inode, issued,
-			    le32_to_cpu(grant->truncate_seq),
-			    le64_to_cpu(grant->truncate_size), size);
-	ceph_decode_timespec(&mtime, &grant->mtime);
-	ceph_decode_timespec(&atime, &grant->atime);
-	ceph_decode_timespec(&ctime, &grant->ctime);
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
-			    &atime);
+	if (newcaps & CEPH_CAP_ANY_RD) {
+		/* ctime/mtime/atime? */
+		ceph_decode_timespec(&mtime, &grant->mtime);
+		ceph_decode_timespec(&atime, &grant->atime);
+		ceph_decode_timespec(&ctime, &grant->ctime);
+		ceph_fill_file_time(inode, issued,
+				    le32_to_cpu(grant->time_warp_seq),
+				    &ctime, &mtime, &atime);
+	}
 
-
-	/* file layout may have changed */
-	ci->i_layout = grant->layout;
-
-	/* max size increase? */
-	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
-		ci->i_max_size = max_size;
-		if (max_size >= ci->i_wanted_max_size) {
-			ci->i_wanted_max_size = 0;  /* reset */
-			ci->i_requested_max_size = 0;
+	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
+		/* file layout may have changed */
+		ci->i_layout = grant->layout;
+		/* size/truncate_seq? */
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(grant->truncate_seq),
+					le64_to_cpu(grant->truncate_size),
+					size);
+		/* max size increase? */
+		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+			dout("max_size %lld -> %llu\n",
+			     ci->i_max_size, max_size);
+			ci->i_max_size = max_size;
+			if (max_size >= ci->i_wanted_max_size) {
+				ci->i_wanted_max_size = 0;  /* reset */
+				ci->i_requested_max_size = 0;
+			}
+			wake = 1;
 		}
-		wake = 1;
 	}
 
 	/* check cap bits */
@@ -2595,6 +2578,23 @@
 
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
+		down_write(&mdsc->snap_rwsem);
+		ceph_update_snap_trace(mdsc, snaptrace,
+				       snaptrace + snaptrace_len, false);
+		downgrade_write(&mdsc->snap_rwsem);
+		kick_flushing_inode_caps(mdsc, session, inode);
+		up_read(&mdsc->snap_rwsem);
+		if (newcaps & ~issued)
+			wake = 1;
+	}
+
+	if (queue_trunc) {
+		ceph_queue_vmtruncate(inode);
+		ceph_queue_revalidate(inode);
+	} else if (queue_revalidate)
+		ceph_queue_revalidate(inode);
+
 	if (writeback)
 		/*
 		 * queue inode for writeback: we can't actually call
@@ -2606,8 +2606,6 @@
 		ceph_queue_invalidate(inode);
 	if (deleted_inode)
 		invalidate_aliases(inode);
-	if (queue_revalidate)
-		ceph_queue_revalidate(inode);
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
@@ -2784,7 +2782,7 @@
 {
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_session *tsession = NULL;
-	struct ceph_cap *cap, *tcap;
+	struct ceph_cap *cap, *tcap, *new_cap = NULL;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 t_cap_id;
 	unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2807,7 +2805,7 @@
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ci, mds);
-	if (!cap)
+	if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
 		goto out_unlock;
 
 	if (target < 0) {
@@ -2846,15 +2844,14 @@
 		}
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
-	}
-
-	if (tsession) {
-		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
-		spin_unlock(&ci->i_ceph_lock);
+	} else if (tsession) {
 		/* add placeholder for the export tagert */
+		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
 		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
-			     t_seq - 1, t_mseq, (u64)-1, flag, NULL);
-		goto retry;
+			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+
+		__ceph_remove_cap(cap, false);
+		goto out_unlock;
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2873,6 +2870,7 @@
 					  SINGLE_DEPTH_NESTING);
 		}
 		ceph_add_cap_releases(mdsc, tsession);
+		new_cap = ceph_get_cap(mdsc, NULL);
 	} else {
 		WARN_ON(1);
 		tsession = NULL;
@@ -2887,24 +2885,27 @@
 		mutex_unlock(&tsession->s_mutex);
 		ceph_put_mds_session(tsession);
 	}
+	if (new_cap)
+		ceph_put_cap(mdsc, new_cap);
 }
 
 /*
- * Handle cap IMPORT.  If there are temp bits from an older EXPORT,
- * clean them up.
+ * Handle cap IMPORT.
  *
- * caller holds s_mutex.
+ * caller holds s_mutex. acquires i_ceph_lock
  */
 static void handle_cap_import(struct ceph_mds_client *mdsc,
 			      struct inode *inode, struct ceph_mds_caps *im,
 			      struct ceph_mds_cap_peer *ph,
 			      struct ceph_mds_session *session,
-			      void *snaptrace, int snaptrace_len)
+			      struct ceph_cap **target_cap, int *old_issued)
+	__acquires(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *cap;
+	struct ceph_cap *cap, *ocap, *new_cap = NULL;
 	int mds = session->s_mds;
-	unsigned issued = le32_to_cpu(im->caps);
+	int issued;
+	unsigned caps = le32_to_cpu(im->caps);
 	unsigned wanted = le32_to_cpu(im->wanted);
 	unsigned seq = le32_to_cpu(im->seq);
 	unsigned mseq = le32_to_cpu(im->migrate_seq);
@@ -2924,40 +2925,52 @@
 	dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
 	     inode, ci, mds, mseq, peer);
 
+retry:
 	spin_lock(&ci->i_ceph_lock);
-	cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
-	if (cap && cap->cap_id == p_cap_id) {
+	cap = __get_cap_for_mds(ci, mds);
+	if (!cap) {
+		if (!new_cap) {
+			spin_unlock(&ci->i_ceph_lock);
+			new_cap = ceph_get_cap(mdsc, NULL);
+			goto retry;
+		}
+		cap = new_cap;
+	} else {
+		if (new_cap) {
+			ceph_put_cap(mdsc, new_cap);
+			new_cap = NULL;
+		}
+	}
+
+	__ceph_caps_issued(ci, &issued);
+	issued |= __ceph_caps_dirty(ci);
+
+	ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
+		     realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
+
+	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
+	if (ocap && ocap->cap_id == p_cap_id) {
 		dout(" remove export cap %p mds%d flags %d\n",
-		     cap, peer, ph->flags);
+		     ocap, peer, ph->flags);
 		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
-		    (cap->seq != le32_to_cpu(ph->seq) ||
-		     cap->mseq != le32_to_cpu(ph->mseq))) {
+		    (ocap->seq != le32_to_cpu(ph->seq) ||
+		     ocap->mseq != le32_to_cpu(ph->mseq))) {
 			pr_err("handle_cap_import: mismatched seq/mseq: "
 			       "ino (%llx.%llx) mds%d seq %d mseq %d "
 			       "importer mds%d has peer seq %d mseq %d\n",
-			       ceph_vinop(inode), peer, cap->seq,
-			       cap->mseq, mds, le32_to_cpu(ph->seq),
+			       ceph_vinop(inode), peer, ocap->seq,
+			       ocap->mseq, mds, le32_to_cpu(ph->seq),
 			       le32_to_cpu(ph->mseq));
 		}
-		ci->i_cap_exporting_issued = cap->issued;
-		__ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	/* make sure we re-request max_size, if necessary */
 	ci->i_wanted_max_size = 0;
 	ci->i_requested_max_size = 0;
-	spin_unlock(&ci->i_ceph_lock);
 
-	down_write(&mdsc->snap_rwsem);
-	ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
-			       false);
-	downgrade_write(&mdsc->snap_rwsem);
-	ceph_add_cap(inode, session, cap_id, -1,
-		     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
-		     NULL /* no caps context */);
-	kick_flushing_inode_caps(mdsc, session, inode);
-	up_read(&mdsc->snap_rwsem);
-
+	*old_issued = issued;
+	*target_cap = cap;
 }
 
 /*
@@ -2977,7 +2990,7 @@
 	struct ceph_mds_caps *h;
 	struct ceph_mds_cap_peer *peer = NULL;
 	int mds = session->s_mds;
-	int op;
+	int op, issued;
 	u32 seq, mseq;
 	struct ceph_vino vino;
 	u64 cap_id;
@@ -3069,7 +3082,10 @@
 
 	case CEPH_CAP_OP_IMPORT:
 		handle_cap_import(mdsc, inode, h, peer, session,
-				  snaptrace, snaptrace_len);
+				  &cap, &issued);
+		handle_cap_grant(mdsc, inode, h,  snaptrace, snaptrace_len,
+				 msg->middle, session, cap, issued);
+		goto done_unlocked;
 	}
 
 	/* the rest require a cap */
@@ -3086,8 +3102,10 @@
 	switch (op) {
 	case CEPH_CAP_OP_REVOKE:
 	case CEPH_CAP_OP_GRANT:
-	case CEPH_CAP_OP_IMPORT:
-		handle_cap_grant(inode, h, session, cap, msg->middle);
+		__ceph_caps_issued(ci, &issued);
+		issued |= __ceph_caps_dirty(ci);
+		handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
+				 session, cap, issued);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 00d6af6..8d7d782 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -169,7 +169,7 @@
 	return dentry;
 }
 
-struct dentry *ceph_get_parent(struct dentry *child)
+static struct dentry *ceph_get_parent(struct dentry *child)
 {
 	/* don't re-export snaps */
 	if (ceph_snap(child->d_inode) != CEPH_NOSNAP)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88a6df4..3020851 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -418,7 +418,7 @@
 	struct page **pages;
 	u64 off = iocb->ki_pos;
 	int num_pages, ret;
-	size_t len = i->count;
+	size_t len = iov_iter_count(i);
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off,
 	     (unsigned)len,
@@ -436,25 +436,26 @@
 
 	if (file->f_flags & O_DIRECT) {
 		while (iov_iter_count(i)) {
-			void __user *data = i->iov[0].iov_base + i->iov_offset;
-			size_t len = i->iov[0].iov_len - i->iov_offset;
+			size_t start;
+			ssize_t n;
 
-			num_pages = calc_pages_for((unsigned long)data, len);
-			pages = ceph_get_direct_page_vector(data,
-							    num_pages, true);
-			if (IS_ERR(pages))
-				return PTR_ERR(pages);
+			n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
+			if (n < 0)
+				return n;
 
-			ret = striped_read(inode, off, len,
+			num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+			ret = striped_read(inode, off, n,
 					   pages, num_pages, checkeof,
-					   1, (unsigned long)data & ~PAGE_MASK);
+					   1, start);
+
 			ceph_put_page_vector(pages, num_pages, true);
 
 			if (ret <= 0)
 				break;
 			off += ret;
 			iov_iter_advance(i, ret);
-			if (ret < len)
+			if (ret < n)
 				break;
 		}
 	} else {
@@ -466,25 +467,14 @@
 					num_pages, checkeof, 0, 0);
 		if (ret > 0) {
 			int l, k = 0;
-			size_t left = len = ret;
+			size_t left = ret;
 
 			while (left) {
-				void __user *data = i->iov[0].iov_base
-							+ i->iov_offset;
-				l = min(i->iov[0].iov_len - i->iov_offset,
-					left);
-
-				ret = ceph_copy_page_vector_to_user(&pages[k],
-								    data, off,
-								    l);
-				if (ret > 0) {
-					iov_iter_advance(i, ret);
-					left -= ret;
-					off += ret;
-					k = calc_pages_for(iocb->ki_pos,
-							   len - left + 1) - 1;
-					BUG_ON(k >= num_pages && left);
-				} else
+				int copy = min_t(size_t, PAGE_SIZE, left);
+				l = copy_page_to_iter(pages[k++], 0, copy, i);
+				off += l;
+				left -= l;
+				if (l < copy)
 					break;
 			}
 		}
@@ -541,8 +531,7 @@
  * objects, rollback on failure, etc.)
  */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, size_t count)
+ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
@@ -556,11 +545,10 @@
 	int written = 0;
 	int flags;
 	int check_caps = 0;
-	int page_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 	loff_t pos = iocb->ki_pos;
-	struct iov_iter i;
+	size_t count = iov_iter_count(from);
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
@@ -582,13 +570,10 @@
 		CEPH_OSD_FLAG_ONDISK |
 		CEPH_OSD_FLAG_WRITE;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
-
-	while (iov_iter_count(&i) > 0) {
-		void __user *data = i.iov->iov_base + i.iov_offset;
-		u64 len = i.iov->iov_len - i.iov_offset;
-
-		page_align = (unsigned long)data & ~PAGE_MASK;
+	while (iov_iter_count(from) > 0) {
+		u64 len = iov_iter_single_seg_count(from);
+		size_t start;
+		ssize_t n;
 
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
@@ -604,20 +589,21 @@
 			break;
 		}
 
-		num_pages = calc_pages_for(page_align, len);
-		pages = ceph_get_direct_page_vector(data, num_pages, false);
-		if (IS_ERR(pages)) {
-			ret = PTR_ERR(pages);
-			goto out;
+		n = iov_iter_get_pages_alloc(from, &pages, len, &start);
+		if (unlikely(n < 0)) {
+			ret = n;
+			ceph_osdc_put_request(req);
+			break;
 		}
 
+		num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
 		/*
 		 * throw out any page cache pages in this range. this
 		 * may block.
 		 */
 		truncate_inode_pages_range(inode->i_mapping, pos,
-				   (pos+len) | (PAGE_CACHE_SIZE-1));
-		osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+				   (pos+n) | (PAGE_CACHE_SIZE-1));
+		osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
 						false, false);
 
 		/* BUG_ON(vino.snap != CEPH_NOSNAP); */
@@ -629,22 +615,20 @@
 
 		ceph_put_page_vector(pages, num_pages, false);
 
-out:
 		ceph_osdc_put_request(req);
-		if (ret == 0) {
-			pos += len;
-			written += len;
-			iov_iter_advance(&i, (size_t)len);
-
-			if (pos > i_size_read(inode)) {
-				check_caps = ceph_inode_set_size(inode, pos);
-				if (check_caps)
-					ceph_check_caps(ceph_inode(inode),
-							CHECK_CAPS_AUTHONLY,
-							NULL);
-			}
-		} else
+		if (ret)
 			break;
+		pos += n;
+		written += n;
+		iov_iter_advance(from, n);
+
+		if (pos > i_size_read(inode)) {
+			check_caps = ceph_inode_set_size(inode, pos);
+			if (check_caps)
+				ceph_check_caps(ceph_inode(inode),
+						CHECK_CAPS_AUTHONLY,
+						NULL);
+		}
 	}
 
 	if (ret != -EOLDSNAPC && written > 0) {
@@ -662,8 +646,7 @@
  * correct atomic write, we should e.g. take write locks on all
  * objects, rollback on failure, etc.)
  */
-static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, size_t count)
+static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
@@ -681,7 +664,7 @@
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 	loff_t pos = iocb->ki_pos;
-	struct iov_iter i;
+	size_t count = iov_iter_count(from);
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
@@ -703,9 +686,7 @@
 		CEPH_OSD_FLAG_WRITE |
 		CEPH_OSD_FLAG_ACK;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
-
-	while ((len = iov_iter_count(&i)) > 0) {
+	while ((len = iov_iter_count(from)) > 0) {
 		size_t left;
 		int n;
 
@@ -737,13 +718,12 @@
 		left = len;
 		for (n = 0; n < num_pages; n++) {
 			size_t plen = min_t(size_t, left, PAGE_SIZE);
-			ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+			ret = copy_page_from_iter(pages[n], 0, plen, from);
 			if (ret != plen) {
 				ret = -EFAULT;
 				break;
 			}
 			left -= ret;
-			iov_iter_advance(&i, ret);
 		}
 
 		if (ret < 0) {
@@ -796,8 +776,7 @@
  *
  * Hmm, the sync read case isn't actually async... should it be?
  */
-static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			     unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
@@ -823,40 +802,20 @@
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
 	    (fi->flags & CEPH_F_SYNC)) {
-		struct iov_iter i;
 
 		dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		if (!read) {
-			ret = generic_segment_checks(iov, &nr_segs,
-							&len, VERIFY_WRITE);
-			if (ret)
-				goto out;
-		}
-
-		iov_iter_init(&i, iov, nr_segs, len, read);
-
 		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(iocb, &i, &checkeof);
+		ret = ceph_sync_read(iocb, to, &checkeof);
 	} else {
-		/*
-		 * We can't modify the content of iov,
-		 * so we only read from beginning.
-		 */
-		if (read) {
-			iocb->ki_pos = pos;
-			len = iocb->ki_nbytes;
-			read = 0;
-		}
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
-		     inode, ceph_vinop(inode), pos, (unsigned)len,
+		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		ret = generic_file_read_iter(iocb, to);
 	}
-out:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
@@ -872,6 +831,7 @@
 			     ", reading more\n", iocb->ki_pos,
 			     inode->i_size);
 
+			iov_iter_advance(to, ret);
 			read += ret;
 			len -= ret;
 			checkeof = 0;
@@ -895,8 +855,7 @@
  *
  * If we are near ENOSPC, write synchronously.
  */
-static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct ceph_file_info *fi = file->private_data;
@@ -904,18 +863,15 @@
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
-	ssize_t count, written = 0;
+	ssize_t count = iov_iter_count(from), written = 0;
 	int err, want, got;
+	loff_t pos = iocb->ki_pos;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		goto out;
-
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = file->f_mapping->backing_dev_info;
 
@@ -925,6 +881,7 @@
 
 	if (count == 0)
 		goto out;
+	iov_iter_truncate(from, count);
 
 	err = file_remove_suid(file);
 	if (err)
@@ -956,23 +913,26 @@
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+		struct iov_iter data;
 		mutex_unlock(&inode->i_mutex);
+		/* we might need to revert back to that point */
+		data = *from;
 		if (file->f_flags & O_DIRECT)
-			written = ceph_sync_direct_write(iocb, iov,
-							 nr_segs, count);
+			written = ceph_sync_direct_write(iocb, &data);
 		else
-			written = ceph_sync_write(iocb, iov, nr_segs, count);
+			written = ceph_sync_write(iocb, &data);
 		if (written == -EOLDSNAPC) {
 			dout("aio_write %p %llx.%llx %llu~%u"
 				"got EOLDSNAPC, retrying\n",
 				inode, ceph_vinop(inode),
-				pos, (unsigned)iov->iov_len);
+				pos, (unsigned)count);
 			mutex_lock(&inode->i_mutex);
 			goto retry_snap;
 		}
+		if (written > 0)
+			iov_iter_advance(from, written);
 	} else {
 		loff_t old_size = inode->i_size;
-		struct iov_iter from;
 		/*
 		 * No need to acquire the i_truncate_mutex. Because
 		 * the MDS revokes Fwb caps before sending truncate
@@ -980,8 +940,7 @@
 		 * are pending vmtruncate. So write and vmtruncate
 		 * can not run at the same time
 		 */
-		iov_iter_init(&from, iov, nr_segs, count, 0);
-		written = generic_perform_write(file, &from, pos);
+		written = generic_perform_write(file, from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
 		if (inode->i_size > old_size)
@@ -999,7 +958,7 @@
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+	     inode, ceph_vinop(inode), pos, (unsigned)count,
 	     ceph_cap_string(got));
 	ceph_put_cap_refs(ci, got);
 
@@ -1276,16 +1235,16 @@
 	.open = ceph_open,
 	.release = ceph_release,
 	.llseek = ceph_llseek,
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = ceph_aio_read,
-	.aio_write = ceph_aio_write,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = ceph_read_iter,
+	.write_iter = ceph_write_iter,
 	.mmap = ceph_mmap,
 	.fsync = ceph_fsync,
 	.lock = ceph_lock,
 	.flock = ceph_flock,
 	.splice_read = generic_file_splice_read,
-	.splice_write = generic_file_splice_write,
+	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl = ceph_ioctl,
 	.compat_ioctl	= ceph_ioctl,
 	.fallocate	= ceph_fallocate,

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e4fff9f..04c89c2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -10,6 +10,7 @@
 #include <linux/writeback.h>
 #include <linux/vmalloc.h>
 #include <linux/posix_acl.h>
+#include <linux/random.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -179,9 +180,8 @@
  * specified, copy the frag delegation info to the caller if
  * it is present.
  */
-u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
-		     struct ceph_inode_frag *pfrag,
-		     int *found)
+static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+			      struct ceph_inode_frag *pfrag, int *found)
 {
 	u32 t = ceph_frag_make(0, 0);
 	struct ceph_inode_frag *frag;
@@ -191,7 +191,6 @@
 	if (found)
 		*found = 0;
 
-	mutex_lock(&ci->i_fragtree_mutex);
 	while (1) {
 		WARN_ON(!ceph_frag_contains_value(t, v));
 		frag = __ceph_find_frag(ci, t);
@@ -220,10 +219,19 @@
 	}
 	dout("choose_frag(%x) = %x\n", v, t);
 
-	mutex_unlock(&ci->i_fragtree_mutex);
 	return t;
 }
 
+u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+		     struct ceph_inode_frag *pfrag, int *found)
+{
+	u32 ret;
+	mutex_lock(&ci->i_fragtree_mutex);
+	ret = __ceph_choose_frag(ci, v, pfrag, found);
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return ret;
+}
+
 /*
  * Process dirfrag (delegation) info from the mds.  Include leaf
  * fragment in tree ONLY if ndist > 0.  Otherwise, only
@@ -237,11 +245,17 @@
 	u32 id = le32_to_cpu(dirinfo->frag);
 	int mds = le32_to_cpu(dirinfo->auth);
 	int ndist = le32_to_cpu(dirinfo->ndist);
+	int diri_auth = -1;
 	int i;
 	int err = 0;
 
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_auth_cap)
+		diri_auth = ci->i_auth_cap->mds;
+	spin_unlock(&ci->i_ceph_lock);
+
 	mutex_lock(&ci->i_fragtree_mutex);
-	if (ndist == 0) {
+	if (ndist == 0 && mds == diri_auth) {
 		/* no delegation info needed. */
 		frag = __ceph_find_frag(ci, id);
 		if (!frag)
@@ -286,6 +300,75 @@
 	return err;
 }
 
+static int ceph_fill_fragtree(struct inode *inode,
+			      struct ceph_frag_tree_head *fragtree,
+			      struct ceph_mds_reply_dirfrag *dirinfo)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_inode_frag *frag;
+	struct rb_node *rb_node;
+	int i;
+	u32 id, nsplits;
+	bool update = false;
+
+	mutex_lock(&ci->i_fragtree_mutex);
+	nsplits = le32_to_cpu(fragtree->nsplits);
+	if (nsplits) {
+		i = prandom_u32() % nsplits;
+		id = le32_to_cpu(fragtree->splits[i].frag);
+		if (!__ceph_find_frag(ci, id))
+			update = true;
+	} else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
+		rb_node = rb_first(&ci->i_fragtree);
+		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+		if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
+			update = true;
+	}
+	if (!update && dirinfo) {
+		id = le32_to_cpu(dirinfo->frag);
+		if (id != __ceph_choose_frag(ci, id, NULL, NULL))
+			update = true;
+	}
+	if (!update)
+		goto out_unlock;
+
+	dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
+	rb_node = rb_first(&ci->i_fragtree);
+	for (i = 0; i < nsplits; i++) {
+		id = le32_to_cpu(fragtree->splits[i].frag);
+		frag = NULL;
+		while (rb_node) {
+			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+			if (ceph_frag_compare(frag->frag, id) >= 0) {
+				if (frag->frag != id)
+					frag = NULL;
+				else
+					rb_node = rb_next(rb_node);
+				break;
+			}
+			rb_node = rb_next(rb_node);
+			rb_erase(&frag->node, &ci->i_fragtree);
+			kfree(frag);
+			frag = NULL;
+		}
+		if (!frag) {
+			frag = __get_or_create_frag(ci, id);
+			if (IS_ERR(frag))
+				continue;
+		}
+		frag->split_by = le32_to_cpu(fragtree->splits[i].by);
+		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
+	}
+	while (rb_node) {
+		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+		rb_node = rb_next(rb_node);
+		rb_erase(&frag->node, &ci->i_fragtree);
+		kfree(frag);
+	}
+out_unlock:
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return 0;
+}
 
 /*
  * initialize a newly allocated inode.
@@ -341,7 +424,6 @@
 	INIT_LIST_HEAD(&ci->i_cap_snaps);
 	ci->i_head_snapc = NULL;
 	ci->i_snap_caps = 0;
-	ci->i_cap_exporting_issued = 0;
 
 	for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
 		ci->i_nr_by_mode[i] = 0;
@@ -407,7 +489,7 @@
 
 	/*
 	 * we may still have a snap_realm reference if there are stray
-	 * caps in i_cap_exporting_issued or i_snap_caps.
+	 * caps in i_snap_caps.
 	 */
 	if (ci->i_snap_realm) {
 		struct ceph_mds_client *mdsc =
@@ -582,22 +664,26 @@
 		      unsigned long ttl_from, int cap_fmode,
 		      struct ceph_cap_reservation *caps_reservation)
 {
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int i;
-	int issued = 0, implemented;
+	int issued = 0, implemented, new_issued;
 	struct timespec mtime, atime, ctime;
-	u32 nsplits;
-	struct ceph_inode_frag *frag;
-	struct rb_node *rb_node;
 	struct ceph_buffer *xattr_blob = NULL;
+	struct ceph_cap *new_cap = NULL;
 	int err = 0;
-	int queue_trunc = 0;
+	bool wake = false;
+	bool queue_trunc = false;
+	bool new_version = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
 	     ci->i_version);
 
+	/* prealloc new cap struct */
+	if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
+		new_cap = ceph_get_cap(mdsc, caps_reservation);
+
 	/*
 	 * prealloc xattr data, if it looks like we'll need it.  only
 	 * if len > 4 (meaning there are actually xattrs; the first 4
@@ -623,19 +709,23 @@
 	 *   3    2     skip
 	 *   3    3     update
 	 */
-	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
-		goto no_change;
-	
+	if (ci->i_version == 0 ||
+	    ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	     le64_to_cpu(info->version) > (ci->i_version & ~1)))
+		new_version = true;
+
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
+	new_issued = ~issued & le32_to_cpu(info->cap.caps);
 
 	/* update inode */
 	ci->i_version = le64_to_cpu(info->version);
 	inode->i_version++;
 	inode->i_rdev = le32_to_cpu(info->rdev);
+	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +734,35 @@
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0)
 		set_nlink(inode, le32_to_cpu(info->nlink));
 
-	/* be careful with mtime, atime, size */
-	ceph_decode_timespec(&atime, &info->atime);
-	ceph_decode_timespec(&mtime, &info->mtime);
-	ceph_decode_timespec(&ctime, &info->ctime);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(info->truncate_seq),
-					  le64_to_cpu(info->truncate_size),
-					  le64_to_cpu(info->size));
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(info->time_warp_seq),
-			    &ctime, &mtime, &atime);
+	if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+		/* be careful with mtime, atime, size */
+		ceph_decode_timespec(&atime, &info->atime);
+		ceph_decode_timespec(&mtime, &info->mtime);
+		ceph_decode_timespec(&ctime, &info->ctime);
+		ceph_fill_file_time(inode, issued,
+				le32_to_cpu(info->time_warp_seq),
+				&ctime, &mtime, &atime);
+	}
 
-	ci->i_layout = info->layout;
-	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+	if (new_version ||
+	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		ci->i_layout = info->layout;
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(info->truncate_seq),
+					le64_to_cpu(info->truncate_size),
+					le64_to_cpu(info->size));
+		/* only update max_size on auth cap */
+		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+		    ci->i_max_size != le64_to_cpu(info->max_size)) {
+			dout("max_size %lld -> %llu\n", ci->i_max_size,
+					le64_to_cpu(info->max_size));
+			ci->i_max_size = le64_to_cpu(info->max_size);
+		}
+	}
 
 	/* xattrs */
 	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,58 +847,6 @@
 		dout(" marking %p complete (empty)\n", inode);
 		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
 	}
-no_change:
-	/* only update max_size on auth cap */
-	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-	    ci->i_max_size != le64_to_cpu(info->max_size)) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size,
-		     le64_to_cpu(info->max_size));
-		ci->i_max_size = le64_to_cpu(info->max_size);
-	}
-
-	spin_unlock(&ci->i_ceph_lock);
-
-	/* queue truncate if we saw i_size decrease */
-	if (queue_trunc)
-		ceph_queue_vmtruncate(inode);
-
-	/* populate frag tree */
-	/* FIXME: move me up, if/when version reflects fragtree changes */
-	nsplits = le32_to_cpu(info->fragtree.nsplits);
-	mutex_lock(&ci->i_fragtree_mutex);
-	rb_node = rb_first(&ci->i_fragtree);
-	for (i = 0; i < nsplits; i++) {
-		u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
-		frag = NULL;
-		while (rb_node) {
-			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
-			if (ceph_frag_compare(frag->frag, id) >= 0) {
-				if (frag->frag != id)
-					frag = NULL;
-				else
-					rb_node = rb_next(rb_node);
-				break;
-			}
-			rb_node = rb_next(rb_node);
-			rb_erase(&frag->node, &ci->i_fragtree);
-			kfree(frag);
-			frag = NULL;
-		}
-		if (!frag) {
-			frag = __get_or_create_frag(ci, id);
-			if (IS_ERR(frag))
-				continue;
-		}
-		frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
-		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
-	}
-	while (rb_node) {
-		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
-		rb_node = rb_next(rb_node);
-		rb_erase(&frag->node, &ci->i_fragtree);
-		kfree(frag);
-	}
-	mutex_unlock(&ci->i_fragtree_mutex);
 
 	/* were we issued a capability? */
 	if (info->cap.caps) {
@@ -809,30 +859,41 @@
 				     le32_to_cpu(info->cap.seq),
 				     le32_to_cpu(info->cap.mseq),
 				     le64_to_cpu(info->cap.realm),
-				     info->cap.flags,
-				     caps_reservation);
+				     info->cap.flags, &new_cap);
+			wake = true;
 		} else {
-			spin_lock(&ci->i_ceph_lock);
 			dout(" %p got snap_caps %s\n", inode,
 			     ceph_cap_string(le32_to_cpu(info->cap.caps)));
 			ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
 			if (cap_fmode >= 0)
 				__ceph_get_fmode(ci, cap_fmode);
-			spin_unlock(&ci->i_ceph_lock);
 		}
 	} else if (cap_fmode >= 0) {
 		pr_warn("mds issued no caps on %llx.%llx\n",
 			   ceph_vinop(inode));
 		__ceph_get_fmode(ci, cap_fmode);
 	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (wake)
+		wake_up_all(&ci->i_cap_wq);
+
+	/* queue truncate if we saw i_size decrease */
+	if (queue_trunc)
+		ceph_queue_vmtruncate(inode);
+
+	/* populate frag tree */
+	if (S_ISDIR(inode->i_mode))
+		ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
 
 	/* update delegation info? */
 	if (dirinfo)
 		ceph_fill_dirfrag(inode, dirinfo);
 
 	err = 0;
-
 out:
+	if (new_cap)
+		ceph_put_cap(mdsc, new_cap);
 	if (xattr_blob)
 		ceph_buffer_put(xattr_blob);
 	return err;
@@ -1485,7 +1546,7 @@
 	orig_gen = ci->i_rdcache_gen;
 	spin_unlock(&ci->i_ceph_lock);
 
-	truncate_inode_pages(inode->i_mapping, 0);
+	truncate_pagecache(inode, 0);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (orig_gen == ci->i_rdcache_gen &&
@@ -1588,7 +1649,7 @@
 	     ci->i_truncate_pending, to);
 	spin_unlock(&ci->i_ceph_lock);
 
-	truncate_inode_pages(inode->i_mapping, to);
+	truncate_pagecache(inode, to);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (to == ci->i_truncate_size) {

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9a33b98..92a2548 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -1558,6 +1558,8 @@
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
+	req->r_stamp = CURRENT_TIME;
+
 	req->r_op = op;
 	req->r_direct_mode = mode;
 	return req;
@@ -1783,7 +1785,8 @@
 	}
 
 	len = sizeof(*head) +
-		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64));
+		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+		sizeof(struct timespec);
 
 	/* calculate (max) length for cap releases */
 	len += sizeof(struct ceph_mds_request_release) *
@@ -1800,6 +1803,7 @@
 		goto out_free2;
 	}
 
+	msg->hdr.version = 2;
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
 	head = msg->front.iov_base;
@@ -1836,6 +1840,9 @@
 		      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
 	head->num_releases = cpu_to_le16(releases);
 
+	/* time stamp */
+	ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
+
 	BUG_ON(p > end);
 	msg->front.iov_len = p - msg->front.iov_base;
 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);

diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e90cfcc..e00737c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h

@@ -194,6 +194,7 @@
 	int r_fmode;        /* file mode, if expecting cap */
 	kuid_t r_uid;
 	kgid_t r_gid;
+	struct timespec r_stamp;
 
 	/* for choosing which mds to send this request to */
 	int r_direct_mode;

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ead05cc..12b2074 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -292,7 +292,6 @@
 	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
 						    dirty|flushing caps */
 	unsigned i_snap_caps;           /* cap bits for snapped files */
-	unsigned i_cap_exporting_issued;
 
 	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
 
@@ -775,11 +774,13 @@
 extern const char *ceph_cap_string(int c);
 extern void ceph_handle_caps(struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
-extern int ceph_add_cap(struct inode *inode,
-			struct ceph_mds_session *session, u64 cap_id,
-			int fmode, unsigned issued, unsigned wanted,
-			unsigned cap, unsigned seq, u64 realmino, int flags,
-			struct ceph_cap_reservation *caps_reservation);
+extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
+				     struct ceph_cap_reservation *ctx);
+extern void ceph_add_cap(struct inode *inode,
+			 struct ceph_mds_session *session, u64 cap_id,
+			 int fmode, unsigned issued, unsigned wanted,
+			 unsigned cap, unsigned seq, u64 realmino, int flags,
+			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5be1f99..2c90d07 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -87,10 +87,6 @@
 
 struct workqueue_struct	*cifsiod_wq;
 
-#ifdef CONFIG_CIFS_SMB2
-__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
-#endif
-
 /*
  * Bumps refcount for cifs super block.
  * Note that it should be only called if a referece to VFS super block is
@@ -251,11 +247,7 @@
 	 * server, can not assume caching of file data or metadata.
 	 */
 	cifs_set_oplock_level(cifs_inode, 0);
-	cifs_inode->delete_pending = false;
-	cifs_inode->invalid_mapping = false;
-	clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
-	clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
-	clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
+	cifs_inode->flags = 0;
 	spin_lock_init(&cifs_inode->writers_lock);
 	cifs_inode->writers = 0;
 	cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -302,7 +294,7 @@
 	struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
 	struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
 
-	seq_printf(s, ",addr=");
+	seq_puts(s, ",addr=");
 
 	switch (server->dstaddr.ss_family) {
 	case AF_INET:
@@ -314,7 +306,7 @@
 			seq_printf(s, "%%%u", sa6->sin6_scope_id);
 		break;
 	default:
-		seq_printf(s, "(unknown)");
+		seq_puts(s, "(unknown)");
 	}
 }
 
@@ -324,45 +316,45 @@
 	if (ses->sectype == Unspecified)
 		return;
 
-	seq_printf(s, ",sec=");
+	seq_puts(s, ",sec=");
 
 	switch (ses->sectype) {
 	case LANMAN:
-		seq_printf(s, "lanman");
+		seq_puts(s, "lanman");
 		break;
 	case NTLMv2:
-		seq_printf(s, "ntlmv2");
+		seq_puts(s, "ntlmv2");
 		break;
 	case NTLM:
-		seq_printf(s, "ntlm");
+		seq_puts(s, "ntlm");
 		break;
 	case Kerberos:
-		seq_printf(s, "krb5");
+		seq_puts(s, "krb5");
 		break;
 	case RawNTLMSSP:
-		seq_printf(s, "ntlmssp");
+		seq_puts(s, "ntlmssp");
 		break;
 	default:
 		/* shouldn't ever happen */
-		seq_printf(s, "unknown");
+		seq_puts(s, "unknown");
 		break;
 	}
 
 	if (ses->sign)
-		seq_printf(s, "i");
+		seq_puts(s, "i");
 }
 
 static void
 cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
 {
-	seq_printf(s, ",cache=");
+	seq_puts(s, ",cache=");
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
-		seq_printf(s, "strict");
+		seq_puts(s, "strict");
 	else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
-		seq_printf(s, "none");
+		seq_puts(s, "none");
 	else
-		seq_printf(s, "loose");
+		seq_puts(s, "loose");
 }
 
 static void
@@ -395,7 +387,7 @@
 	cifs_show_cache_flavor(s, cifs_sb);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
-		seq_printf(s, ",multiuser");
+		seq_puts(s, ",multiuser");
 	else if (tcon->ses->user_name)
 		seq_printf(s, ",username=%s", tcon->ses->user_name);
 
@@ -421,16 +413,16 @@
 	seq_printf(s, ",uid=%u",
 		   from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
-		seq_printf(s, ",forceuid");
+		seq_puts(s, ",forceuid");
 	else
-		seq_printf(s, ",noforceuid");
+		seq_puts(s, ",noforceuid");
 
 	seq_printf(s, ",gid=%u",
 		   from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
-		seq_printf(s, ",forcegid");
+		seq_puts(s, ",forcegid");
 	else
-		seq_printf(s, ",noforcegid");
+		seq_puts(s, ",noforcegid");
 
 	cifs_show_address(s, tcon->ses->server);
 
@@ -442,47 +434,47 @@
 	cifs_show_nls(s, cifs_sb->local_nls);
 
 	if (tcon->seal)
-		seq_printf(s, ",seal");
+		seq_puts(s, ",seal");
 	if (tcon->nocase)
-		seq_printf(s, ",nocase");
+		seq_puts(s, ",nocase");
 	if (tcon->retry)
-		seq_printf(s, ",hard");
+		seq_puts(s, ",hard");
 	if (tcon->unix_ext)
-		seq_printf(s, ",unix");
+		seq_puts(s, ",unix");
 	else
-		seq_printf(s, ",nounix");
+		seq_puts(s, ",nounix");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
-		seq_printf(s, ",posixpaths");
+		seq_puts(s, ",posixpaths");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
-		seq_printf(s, ",setuids");
+		seq_puts(s, ",setuids");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
-		seq_printf(s, ",serverino");
+		seq_puts(s, ",serverino");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
-		seq_printf(s, ",rwpidforward");
+		seq_puts(s, ",rwpidforward");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL)
-		seq_printf(s, ",forcemand");
+		seq_puts(s, ",forcemand");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
-		seq_printf(s, ",nouser_xattr");
+		seq_puts(s, ",nouser_xattr");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
-		seq_printf(s, ",mapchars");
+		seq_puts(s, ",mapchars");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
-		seq_printf(s, ",sfu");
+		seq_puts(s, ",sfu");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-		seq_printf(s, ",nobrl");
+		seq_puts(s, ",nobrl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-		seq_printf(s, ",cifsacl");
+		seq_puts(s, ",cifsacl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
-		seq_printf(s, ",dynperm");
+		seq_puts(s, ",dynperm");
 	if (root->d_sb->s_flags & MS_POSIXACL)
-		seq_printf(s, ",acl");
+		seq_puts(s, ",acl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
-		seq_printf(s, ",mfsymlinks");
+		seq_puts(s, ",mfsymlinks");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
-		seq_printf(s, ",fsc");
+		seq_puts(s, ",fsc");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)
-		seq_printf(s, ",nostrictsync");
+		seq_puts(s, ",nostrictsync");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
-		seq_printf(s, ",noperm");
+		seq_puts(s, ",noperm");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
 		seq_printf(s, ",backupuid=%u",
 			   from_kuid_munged(&init_user_ns,
@@ -733,8 +725,7 @@
 	goto out;
 }
 
-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				   unsigned long nr_segs, loff_t pos)
+static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -745,14 +736,14 @@
 	if (written)
 		return written;
 
-	written = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	written = generic_file_write_iter(iocb, from);
 
 	if (CIFS_CACHE_WRITE(CIFS_I(inode)))
 		goto out;
 
 	rc = filemap_fdatawrite(inode->i_mapping);
 	if (rc)
-		cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
+		cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n",
 			 rc, inode);
 
 out:
@@ -888,10 +879,10 @@
 };
 
 const struct file_operations cifs_file_ops = {
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = generic_file_aio_read,
-	.aio_write = cifs_file_aio_write,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = cifs_file_write_iter,
 	.open = cifs_open,
 	.release = cifs_close,
 	.lock = cifs_lock,
@@ -907,10 +898,10 @@
 };
 
 const struct file_operations cifs_file_strict_ops = {
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = cifs_strict_readv,
-	.aio_write = cifs_strict_writev,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = cifs_strict_readv,
+	.write_iter = cifs_strict_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.lock = cifs_lock,
@@ -927,10 +918,10 @@
 
 const struct file_operations cifs_file_direct_ops = {
 	/* BB reevaluate whether they can be done with directio, no cache */
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = cifs_user_readv,
-	.aio_write = cifs_user_writev,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = cifs_user_readv,
+	.write_iter = cifs_user_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.lock = cifs_lock,
@@ -946,10 +937,10 @@
 };
 
 const struct file_operations cifs_file_nobrl_ops = {
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = generic_file_aio_read,
-	.aio_write = cifs_file_aio_write,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = cifs_file_write_iter,
 	.open = cifs_open,
 	.release = cifs_close,
 	.fsync = cifs_fsync,
@@ -964,10 +955,10 @@
 };
 
 const struct file_operations cifs_file_strict_nobrl_ops = {
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = cifs_strict_readv,
-	.aio_write = cifs_strict_writev,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = cifs_strict_readv,
+	.write_iter = cifs_strict_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.fsync = cifs_strict_fsync,
@@ -983,10 +974,10 @@
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
 	/* BB reevaluate whether they can be done with directio, no cache */
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = cifs_user_readv,
-	.aio_write = cifs_user_writev,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = cifs_user_readv,
+	.write_iter = cifs_user_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.fsync = cifs_fsync,
@@ -1192,10 +1183,6 @@
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
-#ifdef CONFIG_CIFS_SMB2
-	get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
-#endif
-
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
 		cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26a754f..70f178a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h

@@ -22,20 +22,28 @@
 #ifndef _CIFSFS_H
 #define _CIFSFS_H
 
+#include <linux/hash.h>
+
 #define ROOT_I 2
 
 /*
  * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
- * so that it will fit.
+ * so that it will fit. We use hash_64 to convert the value to 31 bits, and
+ * then add 1, to ensure that we don't end up with a 0 as the value.
  */
+#if BITS_PER_LONG == 64
 static inline ino_t
 cifs_uniqueid_to_ino_t(u64 fileid)
 {
-	ino_t ino = (ino_t) fileid;
-	if (sizeof(ino_t) < sizeof(u64))
-		ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
-	return ino;
+	return (ino_t)fileid;
 }
+#else
+static inline ino_t
+cifs_uniqueid_to_ino_t(u64 fileid)
+{
+	return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
+}
+#endif
 
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
@@ -67,6 +75,8 @@
 extern int cifs_revalidate_file(struct file *filp);
 extern int cifs_revalidate_dentry(struct dentry *);
 extern int cifs_invalidate_mapping(struct inode *inode);
+extern int cifs_revalidate_mapping(struct inode *inode);
+extern int cifs_zap_mapping(struct inode *inode);
 extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int cifs_setattr(struct dentry *, struct iattr *);
 
@@ -85,14 +95,10 @@
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
-				  unsigned long nr_segs, loff_t pos);
+extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
 extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
@@ -130,5 +136,5 @@
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.02"
+#define CIFS_VERSION   "2.03"
 #endif				/* _CIFSFS_H */

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 30f6e92..de6aed8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h

@@ -559,6 +559,7 @@
 	int echo_credits;  /* echo reserved slots */
 	int oplock_credits;  /* oplock break reserved slots */
 	bool echoes:1; /* enable echoes */
+	__u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */
 #endif
 	u16 dialect; /* dialect index that server chose */
 	bool oplocks:1; /* enable oplocks */
@@ -1113,12 +1114,13 @@
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
 	unsigned int oplock;		/* oplock/lease level we have */
 	unsigned int epoch;		/* used to track lease state changes */
-	bool delete_pending;		/* DELETE_ON_CLOSE is set */
-	bool invalid_mapping;		/* pagecache is invalid */
-	unsigned long flags;
 #define CIFS_INODE_PENDING_OPLOCK_BREAK   (0) /* oplock break in progress */
 #define CIFS_INODE_PENDING_WRITERS	  (1) /* Writes in progress */
 #define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
+#define CIFS_INO_DELETE_PENDING		  (3) /* delete pending on server */
+#define CIFS_INO_INVALID_MAPPING	  (4) /* pagecache is invalid */
+#define CIFS_INO_LOCK			  (5) /* lock bit for synchronization */
+	unsigned long flags;
 	spinlock_t writers_lock;
 	unsigned int writers;		/* Number of writers on this inode */
 	unsigned long time;		/* jiffies of last update of inode */

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8813ff7..20d75b8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c

@@ -2144,6 +2144,9 @@
 	       sizeof(tcp_ses->srcaddr));
 	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
 		sizeof(tcp_ses->dstaddr));
+#ifdef CONFIG_CIFS_SMB2
+	get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE);
+#endif
 	/*
 	 * at this point we are the only ones with the pointer
 	 * to the struct since the kernel thread not created yet
@@ -2225,7 +2228,7 @@
 			    vol->username ? vol->username : "",
 			    CIFS_MAX_USERNAME_LEN))
 			return 0;
-		if (strlen(vol->username) != 0 &&
+		if ((vol->username && strlen(vol->username) != 0) &&
 		    ses->password != NULL &&
 		    strncmp(ses->password,
 			    vol->password ? vol->password : "",

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ed03e0..e90a1e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c

@@ -335,7 +335,7 @@
 	spin_unlock(&cifs_file_list_lock);
 
 	if (fid->purge_cache)
-		cifs_invalidate_mapping(inode);
+		cifs_zap_mapping(inode);
 
 	file->private_data = cfile;
 	return cfile;
@@ -392,7 +392,7 @@
 		 * again and get at least level II oplock.
 		 */
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
-			CIFS_I(inode)->invalid_mapping = true;
+			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 		cifs_set_oplock_level(cifsi, 0);
 	}
 	spin_unlock(&cifs_file_list_lock);
@@ -1529,7 +1529,7 @@
 		 */
 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
 					CIFS_CACHE_READ(CIFS_I(inode))) {
-			cifs_invalidate_mapping(inode);
+			cifs_zap_mapping(inode);
 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
 				 inode);
 			CIFS_I(inode)->oplock = 0;
@@ -2218,7 +2218,7 @@
 		 file->f_path.dentry->d_name.name, datasync);
 
 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
-		rc = cifs_invalidate_mapping(inode);
+		rc = cifs_zap_mapping(inode);
 		if (rc) {
 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
 			rc = 0; /* don't care about it in fsync */
@@ -2385,14 +2385,12 @@
 }
 
 static ssize_t
-cifs_iovec_write(struct file *file, const struct iovec *iov,
-		 unsigned long nr_segs, loff_t *poffset)
+cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
 {
 	unsigned long nr_pages, i;
 	size_t bytes, copied, len, cur_len;
 	ssize_t total_written = 0;
 	loff_t offset;
-	struct iov_iter it;
 	struct cifsFileInfo *open_file;
 	struct cifs_tcon *tcon;
 	struct cifs_sb_info *cifs_sb;
@@ -2401,14 +2399,16 @@
 	int rc;
 	pid_t pid;
 
-	len = iov_length(iov, nr_segs);
-	if (!len)
-		return 0;
-
+	len = iov_iter_count(from);
 	rc = generic_write_checks(file, poffset, &len, 0);
 	if (rc)
 		return rc;
 
+	if (!len)
+		return 0;
+
+	iov_iter_truncate(from, len);
+
 	INIT_LIST_HEAD(&wdata_list);
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	open_file = file->private_data;
@@ -2424,7 +2424,6 @@
 	else
 		pid = current->tgid;
 
-	iov_iter_init(&it, iov, nr_segs, len, 0);
 	do {
 		size_t save_len;
 
@@ -2444,11 +2443,10 @@
 
 		save_len = cur_len;
 		for (i = 0; i < nr_pages; i++) {
-			bytes = min_t(const size_t, cur_len, PAGE_SIZE);
-			copied = iov_iter_copy_from_user(wdata->pages[i], &it,
-							 0, bytes);
+			bytes = min_t(size_t, cur_len, PAGE_SIZE);
+			copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
+						     from);
 			cur_len -= copied;
-			iov_iter_advance(&it, copied);
 			/*
 			 * If we didn't copy as much as we expected, then that
 			 * may mean we trod into an unmapped area. Stop copying
@@ -2546,11 +2544,11 @@
 	return total_written ? total_written : (ssize_t)rc;
 }
 
-ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 {
 	ssize_t written;
 	struct inode *inode;
+	loff_t pos = iocb->ki_pos;
 
 	inode = file_inode(iocb->ki_filp);
 
@@ -2560,9 +2558,9 @@
 	 * write request.
 	 */
 
-	written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
+	written = cifs_iovec_write(iocb->ki_filp, from, &pos);
 	if (written > 0) {
-		CIFS_I(inode)->invalid_mapping = true;
+		set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
 		iocb->ki_pos = pos;
 	}
 
@@ -2570,8 +2568,7 @@
 }
 
 static ssize_t
-cifs_writev(struct kiocb *iocb, const struct iovec *iov,
-	    unsigned long nr_segs, loff_t pos)
+cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2589,10 +2586,10 @@
 	mutex_lock(&inode->i_mutex);
 	if (file->f_flags & O_APPEND)
 		lock_pos = i_size_read(inode);
-	if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
+	if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
 				     server->vals->exclusive_lock_type, NULL,
 				     CIFS_WRITE_OP)) {
-		rc = __generic_file_aio_write(iocb, iov, nr_segs);
+		rc = __generic_file_write_iter(iocb, from);
 		mutex_unlock(&inode->i_mutex);
 
 		if (rc > 0) {
@@ -2610,8 +2607,7 @@
 }
 
 ssize_t
-cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
-		   unsigned long nr_segs, loff_t pos)
+cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2629,11 +2625,10 @@
 		if (cap_unix(tcon->ses) &&
 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
-			written = generic_file_aio_write(
-					iocb, iov, nr_segs, pos);
+			written = generic_file_write_iter(iocb, from);
 			goto out;
 		}
-		written = cifs_writev(iocb, iov, nr_segs, pos);
+		written = cifs_writev(iocb, from);
 		goto out;
 	}
 	/*
@@ -2642,14 +2637,14 @@
 	 * affected pages because it may cause a error with mandatory locks on
 	 * these pages but not on the region from pos to ppos+len-1.
 	 */
-	written = cifs_user_writev(iocb, iov, nr_segs, pos);
+	written = cifs_user_writev(iocb, from);
 	if (written > 0 && CIFS_CACHE_READ(cinode)) {
 		/*
 		 * Windows 7 server can delay breaking level2 oplock if a write
 		 * request comes - break it on the client to prevent reading
 		 * an old data.
 		 */
-		cifs_invalidate_mapping(inode);
+		cifs_zap_mapping(inode);
 		cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
 			 inode);
 		cinode->oplock = 0;
@@ -2831,32 +2826,25 @@
 	return total_read > 0 ? total_read : result;
 }
 
-ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos)
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t rc;
 	size_t len, cur_len;
 	ssize_t total_read = 0;
-	loff_t offset = pos;
+	loff_t offset = iocb->ki_pos;
 	unsigned int npages;
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_tcon *tcon;
 	struct cifsFileInfo *open_file;
 	struct cifs_readdata *rdata, *tmp;
 	struct list_head rdata_list;
-	struct iov_iter to;
 	pid_t pid;
 
-	if (!nr_segs)
-		return 0;
-
-	len = iov_length(iov, nr_segs);
+	len = iov_iter_count(to);
 	if (!len)
 		return 0;
 
-	iov_iter_init(&to, iov, nr_segs, len, 0);
-
 	INIT_LIST_HEAD(&rdata_list);
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	open_file = file->private_data;
@@ -2914,7 +2902,7 @@
 	if (!list_empty(&rdata_list))
 		rc = 0;
 
-	len = iov_iter_count(&to);
+	len = iov_iter_count(to);
 	/* the loop below should proceed in the order of increasing offsets */
 	list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
 	again:
@@ -2931,7 +2919,7 @@
 					goto again;
 				}
 			} else {
-				rc = cifs_readdata_to_iov(rdata, &to);
+				rc = cifs_readdata_to_iov(rdata, to);
 			}
 
 		}
@@ -2939,7 +2927,7 @@
 		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
 	}
 
-	total_read = len - iov_iter_count(&to);
+	total_read = len - iov_iter_count(to);
 
 	cifs_stats_bytes_read(tcon, total_read);
 
@@ -2948,15 +2936,14 @@
 		rc = 0;
 
 	if (total_read) {
-		iocb->ki_pos = pos + total_read;
+		iocb->ki_pos += total_read;
 		return total_read;
 	}
 	return rc;
 }
 
 ssize_t
-cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
-		  unsigned long nr_segs, loff_t pos)
+cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2975,22 +2962,22 @@
 	 * pos+len-1.
 	 */
 	if (!CIFS_CACHE_READ(cinode))
-		return cifs_user_readv(iocb, iov, nr_segs, pos);
+		return cifs_user_readv(iocb, to);
 
 	if (cap_unix(tcon->ses) &&
 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
-		return generic_file_aio_read(iocb, iov, nr_segs, pos);
+		return generic_file_read_iter(iocb, to);
 
 	/*
 	 * We need to hold the sem to be sure nobody modifies lock list
 	 * with a brlock that prevents reading.
 	 */
 	down_read(&cinode->lock_sem);
-	if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
+	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
 				     tcon->ses->server->vals->shared_lock_type,
 				     NULL, CIFS_READ_OP))
-		rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		rc = generic_file_read_iter(iocb, to);
 	up_read(&cinode->lock_sem);
 	return rc;
 }
@@ -3112,7 +3099,7 @@
 	xid = get_xid();
 
 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
-		rc = cifs_invalidate_mapping(inode);
+		rc = cifs_zap_mapping(inode);
 		if (rc)
 			return rc;
 	}
@@ -3670,7 +3657,7 @@
 		if (!CIFS_CACHE_READ(cinode)) {
 			rc = filemap_fdatawait(inode->i_mapping);
 			mapping_set_error(inode->i_mapping, rc);
-			cifs_invalidate_mapping(inode);
+			cifs_zap_mapping(inode);
 		}
 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
 	}
@@ -3703,8 +3690,8 @@
  * Direct IO is not yet supported in the cached mode. 
  */
 static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t pos, unsigned long nr_segs)
+cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t pos)
 {
         /*
          * FIXME

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a22d667..a174605 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c

@@ -22,6 +22,7 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/freezer.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -117,7 +118,7 @@
 
 	cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n",
 		 __func__, cifs_i->uniqueid);
-	cifs_i->invalid_mapping = true;
+	set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags);
 }
 
 /*
@@ -177,7 +178,10 @@
 	else
 		cifs_i->time = jiffies;
 
-	cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
+	if (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+		set_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags);
+	else
+		clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags);
 
 	cifs_i->server_eof = fattr->cf_eof;
 	/*
@@ -1121,7 +1125,7 @@
 	}
 
 	/* try to set DELETE_ON_CLOSE */
-	if (!cifsInode->delete_pending) {
+	if (!test_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags)) {
 		rc = CIFSSMBSetFileDisposition(xid, tcon, true, fid.netfid,
 					       current->tgid);
 		/*
@@ -1138,7 +1142,7 @@
 			rc = -EBUSY;
 			goto undo_rename;
 		}
-		cifsInode->delete_pending = true;
+		set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags);
 	}
 
 out_close:
@@ -1759,23 +1763,62 @@
 cifs_invalidate_mapping(struct inode *inode)
 {
 	int rc = 0;
-	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
-
-	cifs_i->invalid_mapping = false;
 
 	if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
 		rc = invalidate_inode_pages2(inode->i_mapping);
-		if (rc) {
+		if (rc)
 			cifs_dbg(VFS, "%s: could not invalidate inode %p\n",
 				 __func__, inode);
-			cifs_i->invalid_mapping = true;
-		}
 	}
 
 	cifs_fscache_reset_inode_cookie(inode);
 	return rc;
 }
 
+/**
+ * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks
+ * @word: long word containing the bit lock
+ */
+static int
+cifs_wait_bit_killable(void *word)
+{
+	if (fatal_signal_pending(current))
+		return -ERESTARTSYS;
+	freezable_schedule_unsafe();
+	return 0;
+}
+
+int
+cifs_revalidate_mapping(struct inode *inode)
+{
+	int rc;
+	unsigned long *flags = &CIFS_I(inode)->flags;
+
+	rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
+				TASK_KILLABLE);
+	if (rc)
+		return rc;
+
+	if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) {
+		rc = cifs_invalidate_mapping(inode);
+		if (rc)
+			set_bit(CIFS_INO_INVALID_MAPPING, flags);
+	}
+
+	clear_bit_unlock(CIFS_INO_LOCK, flags);
+	smp_mb__after_atomic();
+	wake_up_bit(flags, CIFS_INO_LOCK);
+
+	return rc;
+}
+
+int
+cifs_zap_mapping(struct inode *inode)
+{
+	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
+	return cifs_revalidate_mapping(inode);
+}
+
 int cifs_revalidate_file_attr(struct file *filp)
 {
 	int rc = 0;
@@ -1842,9 +1885,7 @@
 	if (rc)
 		return rc;
 
-	if (CIFS_I(inode)->invalid_mapping)
-		rc = cifs_invalidate_mapping(inode);
-	return rc;
+	return cifs_revalidate_mapping(inode);
 }
 
 /* revalidate a dentry's inode attributes */
@@ -1857,9 +1898,7 @@
 	if (rc)
 		return rc;
 
-	if (CIFS_I(inode)->invalid_mapping)
-		rc = cifs_invalidate_mapping(inode);
-	return rc;
+	return cifs_revalidate_mapping(inode);
 }
 
 int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,

diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 7749230..45cb59b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c

@@ -85,7 +85,7 @@
 		goto out_fput;
 	}
 
-	src_inode = src_file.file->f_dentry->d_inode;
+	src_inode = file_inode(src_file.file);
 
 	/*
 	 * Note: cifs case is easier than btrfs since server responsible for

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 0498845..6834b9c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c

@@ -795,8 +795,8 @@
 	while (nt_errs[idx].nt_errstr != NULL) {
 		if (((nt_errs[idx].nt_errcode) & 0xFFFFFF) ==
 		    (status_code & 0xFFFFFF)) {
-			printk(KERN_NOTICE "Status code returned 0x%08x %s\n",
-				   status_code, nt_errs[idx].nt_errstr);
+			pr_notice("Status code returned 0x%08x %s\n",
+				  status_code, nt_errs[idx].nt_errstr);
 		}
 		idx++;
 	}
@@ -941,8 +941,9 @@
 	return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET;
 }
 
-static int total_days_of_prev_months[] =
-{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
+static const int total_days_of_prev_months[] = {
+	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
+};
 
 struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
 {

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 35ddc3e..787844b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c

@@ -1047,6 +1047,7 @@
 	buf->ccontext.NameOffset = cpu_to_le16(offsetof
 				(struct create_lease, Name));
 	buf->ccontext.NameLength = cpu_to_le16(4);
+	/* SMB2_CREATE_REQUEST_LEASE is "RqLs" */
 	buf->Name[0] = 'R';
 	buf->Name[1] = 'q';
 	buf->Name[2] = 'L';
@@ -1073,6 +1074,7 @@
 	buf->ccontext.NameOffset = cpu_to_le16(offsetof
 				(struct create_lease_v2, Name));
 	buf->ccontext.NameLength = cpu_to_le16(4);
+	/* SMB2_CREATE_REQUEST_LEASE is "RqLs" */
 	buf->Name[0] = 'R';
 	buf->Name[1] = 'q';
 	buf->Name[2] = 'L';

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3802f8c..b0b260d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c

@@ -375,7 +375,12 @@
 
 	req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities);
 
-	memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+	/* ClientGUID must be zero for SMB2.02 dialect */
+	if (ses->server->vals->protocol_id == SMB20_PROT_ID)
+		memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE);
+	else
+		memcpy(req->ClientGUID, server->client_guid,
+			SMB2_CLIENT_GUID_SIZE);
 
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
@@ -478,7 +483,8 @@
 
 	vneg_inbuf.Capabilities =
 			cpu_to_le32(tcon->ses->server->vals->req_capabilities);
-	memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+	memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
+					SMB2_CLIENT_GUID_SIZE);
 
 	if (tcon->ses->sign)
 		vneg_inbuf.SecurityMode =
@@ -966,6 +972,7 @@
 	buf->ccontext.NameOffset = cpu_to_le16(offsetof
 				(struct create_durable, Name));
 	buf->ccontext.NameLength = cpu_to_le16(4);
+	/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DHnQ" */
 	buf->Name[0] = 'D';
 	buf->Name[1] = 'H';
 	buf->Name[2] = 'n';
@@ -990,6 +997,7 @@
 	buf->ccontext.NameLength = cpu_to_le16(4);
 	buf->Data.Fid.PersistentFileId = fid->persistent_fid;
 	buf->Data.Fid.VolatileFileId = fid->volatile_fid;
+	/* SMB2_CREATE_DURABLE_HANDLE_RECONNECT is "DHnC" */
 	buf->Name[0] = 'D';
 	buf->Name[1] = 'H';
 	buf->Name[2] = 'n';
@@ -1089,6 +1097,7 @@
 	int rc = 0;
 	unsigned int num_iovecs = 2;
 	__u32 file_attributes = 0;
+	char *dhc_buf = NULL, *lc_buf = NULL;
 
 	cifs_dbg(FYI, "create/open\n");
 
@@ -1155,6 +1164,7 @@
 			kfree(copy_path);
 			return rc;
 		}
+		lc_buf = iov[num_iovecs-1].iov_base;
 	}
 
 	if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -1169,9 +1179,10 @@
 		if (rc) {
 			cifs_small_buf_release(req);
 			kfree(copy_path);
-			kfree(iov[num_iovecs-1].iov_base);
+			kfree(lc_buf);
 			return rc;
 		}
+		dhc_buf = iov[num_iovecs-1].iov_base;
 	}
 
 	rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
@@ -1203,6 +1214,8 @@
 		*oplock = rsp->OplockLevel;
 creat_exit:
 	kfree(copy_path);
+	kfree(lc_buf);
+	kfree(dhc_buf);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }

diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 2022c54..69f3595 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h

@@ -183,8 +183,6 @@
 
 #define SMB2_CLIENT_GUID_SIZE 16
 
-extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
-
 struct smb2_negotiate_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize; /* Must be 36 */
@@ -437,11 +435,15 @@
 #define SMB2_CREATE_SD_BUFFER			"SecD" /* security descriptor */
 #define SMB2_CREATE_DURABLE_HANDLE_REQUEST	"DHnQ"
 #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT	"DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE		"AlSi"
+#define SMB2_CREATE_ALLOCATION_SIZE		"AISi"
 #define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
 #define SMB2_CREATE_TIMEWARP_REQUEST		"TWrp"
 #define SMB2_CREATE_QUERY_ON_DISK_ID		"QFid"
 #define SMB2_CREATE_REQUEST_LEASE		"RqLs"
+#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2	"DH2Q"
+#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2	"DH2C"
+#define SMB2_CREATE_APP_INSTANCE_ID	0x45BCA66AEFA7F74A9008FA462E144D74
+#define SVHDX_OPEN_DEVICE_CONTEXT	0x83CE6F1AD851E0986E34401CC9BCFCE9
 
 struct smb2_create_req {
 	struct smb2_hdr hdr;

diff --git a/fs/dcache.c b/fs/dcache.c
index 1792d60..06f6585 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -532,10 +532,12 @@
 	struct dentry *parent = dentry->d_parent;
 	if (IS_ROOT(dentry))
 		return NULL;
+	if (unlikely((int)dentry->d_lockref.count < 0))
+		return NULL;
 	if (likely(spin_trylock(&parent->d_lock)))
 		return parent;
-	spin_unlock(&dentry->d_lock);
 	rcu_read_lock();
+	spin_unlock(&dentry->d_lock);
 again:
 	parent = ACCESS_ONCE(dentry->d_parent);
 	spin_lock(&parent->d_lock);

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 31ba093..98040ba 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c

@@ -77,7 +77,6 @@
 	unsigned blocks_available;	/* At block_in_file.  changes */
 	int reap_counter;		/* rate limit reaping */
 	sector_t final_block_in_request;/* doesn't change */
-	unsigned first_block_in_page;	/* doesn't change, Used only once */
 	int boundary;			/* prev block is at a boundary */
 	get_block_t *get_block;		/* block mapping function */
 	dio_submit_t *submit_io;	/* IO submition function */
@@ -98,19 +97,14 @@
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
 
-	/*
-	 * Page fetching state. These variables belong to dio_refill_pages().
-	 */
-	int curr_page;			/* changes */
-	int total_pages;		/* doesn't change */
-	unsigned long curr_user_address;/* changes */
-
+	struct iov_iter *iter;
 	/*
 	 * Page queue.  These variables belong to dio_refill_pages() and
 	 * dio_get_page().
 	 */
 	unsigned head;			/* next page to process */
 	unsigned tail;			/* last valid page + 1 */
+	size_t from, to;
 };
 
 /* dio_state communicated between submission path and end_io */
@@ -163,15 +157,10 @@
  */
 static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
-	int ret;
-	int nr_pages;
+	ssize_t ret;
 
-	nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
-	ret = get_user_pages_fast(
-		sdio->curr_user_address,		/* Where from? */
-		nr_pages,			/* How many pages? */
-		dio->rw == READ,		/* Write to memory? */
-		&dio->pages[0]);		/* Put results here */
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
 		struct page *page = ZERO_PAGE(0);
@@ -186,18 +175,19 @@
 		dio->pages[0] = page;
 		sdio->head = 0;
 		sdio->tail = 1;
-		ret = 0;
-		goto out;
+		sdio->from = 0;
+		sdio->to = PAGE_SIZE;
+		return 0;
 	}
 
 	if (ret >= 0) {
-		sdio->curr_user_address += ret * PAGE_SIZE;
-		sdio->curr_page += ret;
+		iov_iter_advance(sdio->iter, ret);
+		ret += sdio->from;
 		sdio->head = 0;
-		sdio->tail = ret;
-		ret = 0;
+		sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+		sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
+		return 0;
 	}
-out:
 	return ret;	
 }
 
@@ -208,8 +198,9 @@
  * L1 cache.
  */
 static inline struct page *dio_get_page(struct dio *dio,
-		struct dio_submit *sdio)
+		struct dio_submit *sdio, size_t *from, size_t *to)
 {
+	int n;
 	if (dio_pages_present(sdio) == 0) {
 		int ret;
 
@@ -218,7 +209,10 @@
 			return ERR_PTR(ret);
 		BUG_ON(dio_pages_present(sdio) == 0);
 	}
-	return dio->pages[sdio->head++];
+	n = sdio->head++;
+	*from = n ? 0 : sdio->from;
+	*to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+	return dio->pages[n];
 }
 
 /**
@@ -422,8 +416,8 @@
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (dio_pages_present(sdio))
-		page_cache_release(dio_get_page(dio, sdio));
+	while (sdio->head < sdio->tail)
+		page_cache_release(dio->pages[sdio->head++]);
 }
 
 /*
@@ -912,23 +906,18 @@
 			struct buffer_head *map_bh)
 {
 	const unsigned blkbits = sdio->blkbits;
-	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
-	struct page *page;
-	unsigned block_in_page;
 	int ret = 0;
 
-	/* The I/O can start at any block offset within the first page */
-	block_in_page = sdio->first_block_in_page;
-
 	while (sdio->block_in_file < sdio->final_block_in_request) {
-		page = dio_get_page(dio, sdio);
+		struct page *page;
+		size_t from, to;
+		page = dio_get_page(dio, sdio, &from, &to);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
 		}
 
-		while (block_in_page < blocks_per_page) {
-			unsigned offset_in_page = block_in_page << blkbits;
+		while (from < to) {
 			unsigned this_chunk_bytes;	/* # of bytes mapped */
 			unsigned this_chunk_blocks;	/* # of blocks */
 			unsigned u;
@@ -999,10 +988,10 @@
 					page_cache_release(page);
 					goto out;
 				}
-				zero_user(page, block_in_page << blkbits,
-						1 << blkbits);
+				zero_user(page, from, 1 << blkbits);
 				sdio->block_in_file++;
-				block_in_page++;
+				from += 1 << blkbits;
+				dio->result += 1 << blkbits;
 				goto next_block;
 			}
 
@@ -1019,7 +1008,7 @@
 			 * can add to this page
 			 */
 			this_chunk_blocks = sdio->blocks_available;
-			u = (PAGE_SIZE - offset_in_page) >> blkbits;
+			u = (to - from) >> blkbits;
 			if (this_chunk_blocks > u)
 				this_chunk_blocks = u;
 			u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1031,7 +1020,7 @@
 			if (this_chunk_blocks == sdio->blocks_available)
 				sdio->boundary = buffer_boundary(map_bh);
 			ret = submit_page_section(dio, sdio, page,
-						  offset_in_page,
+						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
 						  map_bh);
@@ -1042,7 +1031,8 @@
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
-			block_in_page += this_chunk_blocks;
+			from += this_chunk_bytes;
+			dio->result += this_chunk_bytes;
 			sdio->blocks_available -= this_chunk_blocks;
 next_block:
 			BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
@@ -1052,7 +1042,6 @@
 
 		/* Drop the ref which was taken in get_user_pages() */
 		page_cache_release(page);
-		block_in_page = 0;
 	}
 out:
 	return ret;
@@ -1107,24 +1096,20 @@
  */
 static inline ssize_t
 do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset, 
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
-	int seg;
-	size_t size;
-	unsigned long addr;
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
 	ssize_t retval = -EINVAL;
-	loff_t end = offset;
+	loff_t end = offset + iov_iter_count(iter);
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
-	unsigned long user_addr;
-	size_t bytes;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
+	unsigned long align = offset | iov_iter_alignment(iter);
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT;
@@ -1134,32 +1119,16 @@
 	 * the early prefetch in the caller enough time.
 	 */
 
-	if (offset & blocksize_mask) {
+	if (align & blocksize_mask) {
 		if (bdev)
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
-		if (offset & blocksize_mask)
+		if (align & blocksize_mask)
 			goto out;
 	}
 
-	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < nr_segs; seg++) {
-		addr = (unsigned long)iov[seg].iov_base;
-		size = iov[seg].iov_len;
-		end += size;
-		if (unlikely((addr & blocksize_mask) ||
-			     (size & blocksize_mask))) {
-			if (bdev)
-				blkbits = blksize_bits(
-					 bdev_logical_block_size(bdev));
-			blocksize_mask = (1 << blkbits) - 1;
-			if ((addr & blocksize_mask) || (size & blocksize_mask))
-				goto out;
-		}
-	}
-
 	/* watch out for a 0 len io from a tricksy fs */
-	if (rw == READ && end == offset)
+	if (rw == READ && !iov_iter_count(iter))
 		return 0;
 
 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1249,6 +1218,10 @@
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
+	sdio.iter = iter;
+	sdio.final_block_in_request =
+		(offset + iov_iter_count(iter)) >> blkbits;
+
 	/*
 	 * In case of non-aligned buffers, we may need 2 more
 	 * pages since we need to zero out first and last block.
@@ -1256,47 +1229,13 @@
 	if (unlikely(sdio.blkfactor))
 		sdio.pages_in_io = 2;
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
-		sdio.pages_in_io +=
-			((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
-				PAGE_SIZE - user_addr / PAGE_SIZE);
-	}
+	sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
 
 	blk_start_plug(&plug);
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
-		sdio.size += bytes = iov[seg].iov_len;
-
-		/* Index into the first page of the first block */
-		sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
-		sdio.final_block_in_request = sdio.block_in_file +
-						(bytes >> blkbits);
-		/* Page fetching state */
-		sdio.head = 0;
-		sdio.tail = 0;
-		sdio.curr_page = 0;
-
-		sdio.total_pages = 0;
-		if (user_addr & (PAGE_SIZE-1)) {
-			sdio.total_pages++;
-			bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
-		}
-		sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
-		sdio.curr_user_address = user_addr;
-
-		retval = do_direct_IO(dio, &sdio, &map_bh);
-
-		dio->result += iov[seg].iov_len -
-			((sdio.final_block_in_request - sdio.block_in_file) <<
-					blkbits);
-
-		if (retval) {
-			dio_cleanup(dio, &sdio);
-			break;
-		}
-	} /* end iovec loop */
+	retval = do_direct_IO(dio, &sdio, &map_bh);
+	if (retval)
+		dio_cleanup(dio, &sdio);
 
 	if (retval == -ENOTBLK) {
 		/*
@@ -1365,8 +1304,8 @@
 
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
 	/*
@@ -1381,9 +1320,8 @@
 	prefetch(bdev->bd_queue);
 	prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
-	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				     nr_segs, get_block, end_io,
-				     submit_io, flags);
+	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
+				     get_block, end_io, submit_io, flags);
 }
 
 EXPORT_SYMBOL(__blockdev_direct_IO);

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1e5b453..d08e079 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c

@@ -617,6 +617,11 @@
 	int nodeid = sn_send_failed->ssf_info.sinfo_ppid;
 
 	log_print("Retry sending %d bytes to node id %d", len, nodeid);
+	
+	if (!nodeid) {
+		log_print("Shouldn't resend data via listening connection.");
+		return;
+	}
 
 	con = nodeid2con(nodeid, 0);
 	if (!con) {

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b1eaa7a..db0fad3 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c

@@ -45,14 +45,13 @@
  * The function to be used for directory reads is ecryptfs_read.
  */
 static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
-				const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+				struct iov_iter *to)
 {
 	ssize_t rc;
 	struct path *path;
 	struct file *file = iocb->ki_filp;
 
-	rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+	rc = generic_file_read_iter(iocb, to);
 	/*
 	 * Even though this is a async interface, we need to wait
 	 * for IO to finish to update atime
@@ -352,10 +351,10 @@
 
 const struct file_operations ecryptfs_main_fops = {
 	.llseek = generic_file_llseek,
-	.read = do_sync_read,
-	.aio_read = ecryptfs_read_update_atime,
-	.write = do_sync_write,
-	.aio_write = generic_file_aio_write,
+	.read = new_sync_read,
+	.read_iter = ecryptfs_read_update_atime,
+	.write = new_sync_write,
+	.write_iter = generic_file_write_iter,
 	.iterate = ecryptfs_readdir,
 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT

diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa..a3d33fe 100644
--- a/fs/exec.c
+++ b/fs/exec.c

@@ -1046,13 +1046,13 @@
  * so that a new one can be started
  */
 
-void set_task_comm(struct task_struct *tsk, const char *buf)
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
 	task_lock(tsk);
 	trace_task_rename(tsk, buf);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_event_comm(tsk);
+	perf_event_comm(tsk, exec);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -1110,7 +1110,8 @@
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
-	set_task_comm(current, kbasename(bprm->filename));
+	perf_event_exec();
+	__set_task_comm(current, kbasename(bprm->filename), true);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on

diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 491c6c0..71bf8e4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c

@@ -67,17 +67,17 @@
 
 const struct file_operations exofs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= exofs_release_file,
 	.fsync		= exofs_file_fsync,
 	.flush		= exofs_flush,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations exofs_file_inode_operations = {

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index d1c244d..3f9cafd 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c

@@ -964,7 +964,7 @@
 
  /* TODO: Should be easy enough to do proprly */
 static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	return 0;
 }

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 44c36e5..7c87b22 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c

@@ -62,10 +62,10 @@
  */
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
@@ -75,7 +75,7 @@
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b1d2a46..36d35c3 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c

@@ -850,18 +850,18 @@
 }
 
 static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext2_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
-		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		ext2_write_failed(mapping, offset + count);
 	return ret;
 }
 

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index aad0531..a062fa1 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c

@@ -50,10 +50,10 @@
 
 const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= ext3_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext3_compat_ioctl,
@@ -63,7 +63,7 @@
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations ext3_file_inode_operations = {

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f5157d0..2c6ccc4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c

@@ -1716,17 +1716,17 @@
 	WARN_ON_ONCE(IS_RDONLY(inode) &&
 		     !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
 
-	if (ext3_journal_current_handle())
-		goto no_write;
-
 	trace_ext3_journalled_writepage(page);
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto no_write;
-	}
-
 	if (!page_has_buffers(page) || PageChecked(page)) {
+		if (ext3_journal_current_handle())
+			goto no_write;
+
+		handle = ext3_journal_start(inode,
+					    ext3_writepage_trans_blocks(inode));
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto no_write;
+		}
 		/*
 		 * It's mmapped pagecache.  Add buffers and journal it.  There
 		 * doesn't seem much point in redirtying the page here.
@@ -1749,17 +1749,18 @@
 		atomic_set(&EXT3_I(inode)->i_datasync_tid,
 			   handle->h_transaction->t_tid);
 		unlock_page(page);
+		err = ext3_journal_stop(handle);
+		if (!ret)
+			ret = err;
 	} else {
 		/*
-		 * It may be a page full of checkpoint-mode buffers.  We don't
-		 * really know unless we go poke around in the buffer_heads.
-		 * But block_write_full_page will do the right thing.
+		 * It is a page full of checkpoint-mode buffers. Go and write
+		 * them. They should have been already mapped when they went
+		 * to the journal so provide NULL get_block function to catch
+		 * errors.
 		 */
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+		ret = block_write_full_page(page, NULL, wbc);
 	}
-	err = ext3_journal_stop(handle);
-	if (!ret)
-		ret = err;
 out:
 	return ret;
 
@@ -1820,8 +1821,7 @@
  * VFS code falls back into buffered path in that case so we are safe.
  */
 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1829,10 +1829,10 @@
 	handle_t *handle;
 	ssize_t ret;
 	int orphan = 0;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	int retries = 0;
 
-	trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext3_direct_IO_enter(inode, offset, count, rw);
 
 	if (rw == WRITE) {
 		loff_t final_size = offset + count;
@@ -1856,15 +1856,14 @@
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext3_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			ext3_truncate_failed_direct_write(inode);
@@ -1909,8 +1908,7 @@
 			ret = err;
 	}
 out:
-	trace_ext3_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+	trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
 	return ret;
 }
 

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1479e2a..7cc5a0e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -2140,8 +2140,7 @@
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 				struct ext4_map_blocks *map, int flags);
 extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-				const struct iovec *iov, loff_t offset,
-				unsigned long nr_segs);
+				struct iov_iter *iter, loff_t offset);
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
 extern void ext4_ind_truncate(handle_t *, struct inode *inode);

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4e8bc284..8695f70 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -74,26 +74,22 @@
  * or one thread will zero the other's data, causing corruption.
  */
 static int
-ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
-		   unsigned long nr_segs, loff_t pos)
+ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
 {
 	struct super_block *sb = inode->i_sb;
 	int blockmask = sb->s_blocksize - 1;
-	size_t count = iov_length(iov, nr_segs);
-	loff_t final_size = pos + count;
 
 	if (pos >= i_size_read(inode))
 		return 0;
 
-	if ((pos & blockmask) || (final_size & blockmask))
+	if ((pos | iov_iter_alignment(from)) & blockmask)
 		return 1;
 
 	return 0;
 }
 
 static ssize_t
-ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -101,10 +97,9 @@
 	struct blk_plug plug;
 	int o_direct = file->f_flags & O_DIRECT;
 	int overwrite = 0;
-	size_t length = iov_length(iov, nr_segs);
+	size_t length = iov_iter_count(from);
 	ssize_t ret;
-
-	BUG_ON(iocb->ki_pos != pos);
+	loff_t pos = iocb->ki_pos;
 
 	/*
 	 * Unaligned direct AIO must be serialized; see comment above
@@ -114,7 +109,7 @@
 	    ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
 	    !is_sync_kiocb(iocb) &&
 	    (file->f_flags & O_APPEND ||
-	     ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
+	     ext4_unaligned_aio(inode, from, pos))) {
 		aio_mutex = ext4_aio_mutex(inode);
 		mutex_lock(aio_mutex);
 		ext4_unwritten_wait(inode);
@@ -138,10 +133,8 @@
 			goto errout;
 		}
 
-		if (pos + length > sbi->s_bitmap_maxbytes) {
-			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
-					      sbi->s_bitmap_maxbytes - pos);
-		}
+		if (pos + length > sbi->s_bitmap_maxbytes)
+			iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
 	}
 
 	if (o_direct) {
@@ -179,7 +172,7 @@
 		}
 	}
 
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0) {
@@ -594,10 +587,10 @@
 
 const struct file_operations ext4_file_operations = {
 	.llseek		= ext4_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= ext4_file_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= ext4_file_write_iter,
 	.unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
@@ -607,7 +600,7 @@
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= ext4_fallocate,
 };
 

diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 594009f..8a57e9f 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c

@@ -639,8 +639,7 @@
  * VFS code falls back into buffered path in that case so we are safe.
  */
 ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-			   const struct iovec *iov, loff_t offset,
-			   unsigned long nr_segs)
+			   struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -648,7 +647,7 @@
 	handle_t *handle;
 	ssize_t ret;
 	int orphan = 0;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	int retries = 0;
 
 	if (rw == WRITE) {
@@ -687,18 +686,17 @@
 			goto locked;
 		}
 		ret = __blockdev_direct_IO(rw, iocb, inode,
-				 inode->i_sb->s_bdev, iov,
-				 offset, nr_segs,
+				 inode->i_sb->s_bdev, iter, offset,
 				 ext4_get_block, NULL, NULL, 0);
 		inode_dio_done(inode);
 	} else {
 locked:
-		ret = blockdev_direct_IO(rw, iocb, inode, iov,
-				 offset, nr_segs, ext4_get_block);
+		ret = blockdev_direct_IO(rw, iocb, inode, iter,
+				 offset, ext4_get_block);
 
 		if (unlikely((rw & WRITE) && ret < 0)) {
 			loff_t isize = i_size_read(inode);
-			loff_t end = offset + iov_length(iov, nr_segs);
+			loff_t end = offset + count;
 
 			if (end > isize)
 				ext4_truncate_failed_write(inode);

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7fcd68e..8a06473 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -3093,13 +3093,12 @@
  *
  */
 static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	int overwrite = 0;
 	get_block_t *get_block_func = NULL;
 	int dio_flags = 0;
@@ -3108,7 +3107,7 @@
 
 	/* Use the old path for reads and writes beyond i_size. */
 	if (rw != WRITE || final_size > inode->i_size)
-		return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+		return ext4_ind_direct_IO(rw, iocb, iter, offset);
 
 	BUG_ON(iocb->private == NULL);
 
@@ -3175,8 +3174,8 @@
 		dio_flags = DIO_LOCKING;
 	}
 	ret = __blockdev_direct_IO(rw, iocb, inode,
-				   inode->i_sb->s_bdev, iov,
-				   offset, nr_segs,
+				   inode->i_sb->s_bdev, iter,
+				   offset,
 				   get_block_func,
 				   ext4_end_io_dio,
 				   NULL,
@@ -3230,11 +3229,11 @@
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
 	/*
@@ -3247,13 +3246,12 @@
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext4_direct_IO_enter(inode, offset, count, rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
 	else
-		ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
-	trace_ext4_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+		ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
+	trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
 	return ret;
 }
 

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e93e4ec..dbe2141 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c

@@ -240,7 +240,7 @@
 		}
 	}
 
-	error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
+	error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
 
 	kfree(value);
 	if (!error)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c405b8f..0b4710c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c

@@ -33,12 +33,12 @@
 	struct address_space *mapping = META_MAPPING(sbi);
 	struct page *page = NULL;
 repeat:
-	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+	page = grab_cache_page(mapping, index);
 	if (!page) {
 		cond_resched();
 		goto repeat;
 	}
-
+	f2fs_wait_on_page_writeback(page, META);
 	SetPageUptodate(page);
 	return page;
 }
@@ -72,7 +72,7 @@
 	return page;
 }
 
-inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
 {
 	switch (type) {
 	case META_NAT:
@@ -154,6 +154,8 @@
 	struct inode *inode = page->mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 
+	trace_f2fs_writepage(page, META);
+
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
 	if (wbc->for_reclaim)
@@ -171,10 +173,7 @@
 	return 0;
 
 redirty_out:
-	dec_page_count(sbi, F2FS_DIRTY_META);
-	wbc->pages_skipped++;
-	account_page_redirty(page);
-	set_page_dirty(page);
+	redirty_page_for_writepage(wbc, page);
 	return AOP_WRITEPAGE_ACTIVATE;
 }
 
@@ -184,6 +183,8 @@
 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 	long diff, written;
 
+	trace_f2fs_writepages(mapping->host, wbc, META);
+
 	/* collect a number of dirty meta pages and write together */
 	if (wbc->for_kupdate ||
 		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
@@ -367,7 +368,9 @@
 		return;
 
 	sbi->por_doing = true;
-	start_blk = __start_cp_addr(sbi) + 1;
+
+	start_blk = __start_cp_addr(sbi) + 1 +
+		le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
 	orphan_blkaddr = __start_sum_addr(sbi) - 1;
 
 	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -508,8 +511,11 @@
 	unsigned long blk_size = sbi->blocksize;
 	unsigned long long cp1_version = 0, cp2_version = 0;
 	unsigned long long cp_start_blk_no;
+	unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	block_t cp_blk_no;
+	int i;
 
-	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
 	if (!sbi->ckpt)
 		return -ENOMEM;
 	/*
@@ -540,6 +546,23 @@
 	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 	memcpy(sbi->ckpt, cp_block, blk_size);
 
+	if (cp_blks <= 1)
+		goto done;
+
+	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+	if (cur_page == cp2)
+		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+	for (i = 1; i < cp_blks; i++) {
+		void *sit_bitmap_ptr;
+		unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+		cur_page = get_meta_page(sbi, cp_blk_no + i);
+		sit_bitmap_ptr = page_address(cur_page);
+		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+		f2fs_put_page(cur_page, 1);
+	}
+done:
 	f2fs_put_page(cp1, 1);
 	f2fs_put_page(cp2, 1);
 	return 0;
@@ -552,14 +575,13 @@
 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-	struct list_head *head = &sbi->dir_inode_list;
-	struct dir_inode_entry *entry;
 
-	list_for_each_entry(entry, head, list)
-		if (unlikely(entry->inode == inode))
-			return -EEXIST;
+	if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
+		return -EEXIST;
 
-	list_add_tail(&new->list, head);
+	set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+	F2FS_I(inode)->dirty_dir = new;
+	list_add_tail(&new->list, &sbi->dir_inode_list);
 	stat_inc_dirty_dir(sbi);
 	return 0;
 }
@@ -608,31 +630,26 @@
 void remove_dirty_dir_inode(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-	struct list_head *head;
 	struct dir_inode_entry *entry;
 
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
 	spin_lock(&sbi->dir_inode_lock);
-	if (get_dirty_dents(inode)) {
+	if (get_dirty_dents(inode) ||
+			!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
 		spin_unlock(&sbi->dir_inode_lock);
 		return;
 	}
 
-	head = &sbi->dir_inode_list;
-	list_for_each_entry(entry, head, list) {
-		if (entry->inode == inode) {
-			list_del(&entry->list);
-			stat_dec_dirty_dir(sbi);
-			spin_unlock(&sbi->dir_inode_lock);
-			kmem_cache_free(inode_entry_slab, entry);
-			goto done;
-		}
-	}
+	entry = F2FS_I(inode)->dirty_dir;
+	list_del(&entry->list);
+	F2FS_I(inode)->dirty_dir = NULL;
+	clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+	stat_dec_dirty_dir(sbi);
 	spin_unlock(&sbi->dir_inode_lock);
+	kmem_cache_free(inode_entry_slab, entry);
 
-done:
 	/* Only from the recovery routine */
 	if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
 		clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -640,26 +657,6 @@
 	}
 }
 
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
-{
-
-	struct list_head *head;
-	struct inode *inode = NULL;
-	struct dir_inode_entry *entry;
-
-	spin_lock(&sbi->dir_inode_lock);
-
-	head = &sbi->dir_inode_list;
-	list_for_each_entry(entry, head, list) {
-		if (entry->inode->i_ino == ino) {
-			inode = entry->inode;
-			break;
-		}
-	}
-	spin_unlock(&sbi->dir_inode_lock);
-	return inode;
-}
-
 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
 {
 	struct list_head *head;
@@ -758,6 +755,13 @@
 	__u32 crc32 = 0;
 	void *kaddr;
 	int i;
+	int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+
+	/*
+	 * This avoids to conduct wrong roll-forward operations and uses
+	 * metapages, so should be called prior to sync_meta_pages below.
+	 */
+	discard_next_dnode(sbi);
 
 	/* Flush all the NAT/SIT pages */
 	while (get_pages(sbi, F2FS_DIRTY_META))
@@ -802,16 +806,19 @@
 
 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 					/ F2FS_ORPHANS_PER_BLOCK;
-	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+			orphan_blocks);
 
 	if (is_umount) {
 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
 		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks);
 	}
 
 	if (sbi->n_orphans)
@@ -837,6 +844,15 @@
 	set_page_dirty(cp_page);
 	f2fs_put_page(cp_page, 1);
 
+	for (i = 1; i < 1 + cp_payload_blks; i++) {
+		cp_page = grab_meta_page(sbi, start_blk++);
+		kaddr = page_address(cp_page);
+		memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+				(1 << sbi->log_blocksize));
+		set_page_dirty(cp_page);
+		f2fs_put_page(cp_page, 1);
+	}
+
 	if (sbi->n_orphans) {
 		write_orphan_inodes(sbi, start_blk);
 		start_blk += orphan_blocks;

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45abd60..0924521 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -417,7 +417,7 @@
 	if (unlikely(dn.data_blkaddr == NEW_ADDR))
 		return ERR_PTR(-EINVAL);
 
-	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+	page = grab_cache_page(mapping, index);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
@@ -455,7 +455,7 @@
 	int err;
 
 repeat:
-	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+	page = grab_cache_page(mapping, index);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
@@ -652,8 +652,7 @@
 		goto put_out;
 	}
 
-	end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	bh_result->b_size = (((size_t)1) << blkbits);
 	dn.ofs_in_node++;
 	pgofs++;
@@ -675,8 +674,7 @@
 		if (dn.data_blkaddr == NEW_ADDR)
 			goto put_out;
 
-		end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	}
 
 	if (maxblocks > (bh_result->b_size >> blkbits)) {
@@ -710,11 +708,19 @@
 	return err;
 }
 
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		u64 start, u64 len)
+{
+	return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
+}
+
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
 	int ret;
 
+	trace_f2fs_readpage(page, DATA);
+
 	/* If the file has inline data, try to read it directlly */
 	if (f2fs_has_inline_data(inode))
 		ret = f2fs_read_inline_data(inode, page);
@@ -790,6 +796,8 @@
 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
 	};
 
+	trace_f2fs_writepage(page, DATA);
+
 	if (page->index < end_index)
 		goto write;
 
@@ -798,10 +806,8 @@
 	 * this page does not have to be written to disk.
 	 */
 	offset = i_size & (PAGE_CACHE_SIZE - 1);
-	if ((page->index >= end_index + 1) || !offset) {
-		inode_dec_dirty_dents(inode);
+	if ((page->index >= end_index + 1) || !offset)
 		goto out;
-	}
 
 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 write:
@@ -810,7 +816,6 @@
 
 	/* Dentry blocks are controlled by checkpoint */
 	if (S_ISDIR(inode->i_mode)) {
-		inode_dec_dirty_dents(inode);
 		err = do_write_data_page(page, &fio);
 		goto done;
 	}
@@ -832,15 +837,16 @@
 
 	clear_cold_data(page);
 out:
+	inode_dec_dirty_dents(inode);
 	unlock_page(page);
 	if (need_balance_fs)
 		f2fs_balance_fs(sbi);
+	if (wbc->for_reclaim)
+		f2fs_submit_merged_bio(sbi, DATA, WRITE);
 	return 0;
 
 redirty_out:
-	wbc->pages_skipped++;
-	account_page_redirty(page);
-	set_page_dirty(page);
+	redirty_page_for_writepage(wbc, page);
 	return AOP_WRITEPAGE_ACTIVATE;
 }
 
@@ -862,12 +868,15 @@
 	int ret;
 	long diff;
 
+	trace_f2fs_writepages(mapping->host, wbc, DATA);
+
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
 		return 0;
 
 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
-			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
+			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
+			available_free_memory(sbi, DIRTY_DENTS))
 		goto skip_write;
 
 	diff = nr_pages_to_write(sbi, DATA, wbc);
@@ -903,6 +912,8 @@
 	struct dnode_of_data dn;
 	int err = 0;
 
+	trace_f2fs_write_begin(inode, pos, len, flags);
+
 	f2fs_balance_fs(sbi);
 repeat:
 	err = f2fs_convert_inline_data(inode, pos + len);
@@ -912,6 +923,10 @@
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
+
+	/* to avoid latency during memory pressure */
+	unlock_page(page);
+
 	*pagep = page;
 
 	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
@@ -923,10 +938,18 @@
 	f2fs_unlock_op(sbi);
 
 	if (err) {
-		f2fs_put_page(page, 1);
+		f2fs_put_page(page, 0);
 		return err;
 	}
 inline_data:
+	lock_page(page);
+	if (unlikely(page->mapping != mapping)) {
+		f2fs_put_page(page, 1);
+		goto repeat;
+	}
+
+	f2fs_wait_on_page_writeback(page, DATA);
+
 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
 		return 0;
 
@@ -978,6 +1001,8 @@
 {
 	struct inode *inode = page->mapping->host;
 
+	trace_f2fs_write_end(inode, pos, len, copied);
+
 	SetPageUptodate(page);
 	set_page_dirty(page);
 
@@ -992,10 +1017,9 @@
 }
 
 static int check_direct_IO(struct inode *inode, int rw,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
-	int i;
 
 	if (rw == READ)
 		return 0;
@@ -1003,14 +1027,14 @@
 	if (offset & blocksize_mask)
 		return -EINVAL;
 
-	for (i = 0; i < nr_segs; i++)
-		if (iov[i].iov_len & blocksize_mask)
-			return -EINVAL;
+	if (iov_iter_alignment(iter) & blocksize_mask)
+		return -EINVAL;
+
 	return 0;
 }
 
 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1019,11 +1043,14 @@
 	if (f2fs_has_inline_data(inode))
 		return 0;
 
-	if (check_direct_IO(inode, rw, iov, offset, nr_segs))
+	if (check_direct_IO(inode, rw, iter, offset))
 		return 0;
 
-	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-							get_data_block);
+	/* clear fsync mark to recover these blocks */
+	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
+
+	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
@@ -1061,6 +1088,11 @@
 
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
+	struct inode *inode = mapping->host;
+
+	if (f2fs_has_inline_data(inode))
+		return 0;
+
 	return generic_block_bmap(mapping, block, get_data_block);
 }
 

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 972fd0e..966acb0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c

@@ -23,10 +23,10 @@
 
 static unsigned int dir_buckets(unsigned int level, int dir_level)
 {
-	if (level < MAX_DIR_HASH_DEPTH / 2)
+	if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
 		return 1 << (level + dir_level);
 	else
-		return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
+		return MAX_DIR_BUCKETS;
 }
 
 static unsigned int bucket_blocks(unsigned int level)
@@ -268,6 +268,8 @@
 {
 	struct f2fs_inode *ri;
 
+	f2fs_wait_on_page_writeback(ipage, NODE);
+
 	/* copy name info. to this inode page */
 	ri = F2FS_INODE(ipage);
 	ri->i_namelen = cpu_to_le32(name->len);
@@ -637,11 +639,17 @@
 	struct f2fs_dentry_block *dentry_blk = NULL;
 	struct f2fs_dir_entry *de = NULL;
 	struct page *dentry_page = NULL;
+	struct file_ra_state *ra = &file->f_ra;
 	unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
 	unsigned char d_type = DT_UNKNOWN;
 
 	bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
 
+	/* readahead for multi pages of dir */
+	if (npages - n > 1 && !ra_has_index(ra, n))
+		page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+				min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
+
 	for (; n < npages; n++) {
 		dentry_page = get_lock_data_page(inode, n);
 		if (IS_ERR(dentry_page))

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2ecac83..e51c732 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h

@@ -182,6 +182,8 @@
 
 #define F2FS_LINK_MAX		32000	/* maximum link count per file */
 
+#define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
+
 /* for in-memory extent cache entry */
 #define F2FS_MIN_EXTENT_LEN	16	/* minimum extent length */
 
@@ -218,6 +220,7 @@
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
 	unsigned long long xattr_ver;	/* cp version of xattr modification */
 	struct extent_info ext;		/* in-memory extent cache entry */
+	struct dir_inode_entry *dirty_dir;	/* the pointer of dirty dir */
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -243,6 +246,7 @@
 struct f2fs_nm_info {
 	block_t nat_blkaddr;		/* base disk address of NAT */
 	nid_t max_nid;			/* maximum possible node ids */
+	nid_t available_nids;		/* maximum available node ids */
 	nid_t next_scan_nid;		/* the next nid to be scanned */
 	unsigned int ram_thresh;	/* control the memory footprint */
 
@@ -323,6 +327,15 @@
 	int ret;
 };
 
+struct flush_cmd_control {
+	struct task_struct *f2fs_issue_flush;	/* flush thread */
+	wait_queue_head_t flush_wait_queue;	/* waiting queue for wake-up */
+	struct flush_cmd *issue_list;		/* list for command issue */
+	struct flush_cmd *dispatch_list;	/* list for command dispatch */
+	spinlock_t issue_lock;			/* for issue list lock */
+	struct flush_cmd *issue_tail;		/* list tail of issue list */
+};
+
 struct f2fs_sm_info {
 	struct sit_info *sit_info;		/* whole segment information */
 	struct free_segmap_info *free_info;	/* free segment information */
@@ -353,12 +366,8 @@
 	unsigned int min_ipu_util;	/* in-place-update threshold */
 
 	/* for flush command control */
-	struct task_struct *f2fs_issue_flush;	/* flush thread */
-	wait_queue_head_t flush_wait_queue;	/* waiting queue for wake-up */
-	struct flush_cmd *issue_list;		/* list for command issue */
-	struct flush_cmd *dispatch_list;	/* list for command dispatch */
-	spinlock_t issue_lock;			/* for issue list lock */
-	struct flush_cmd *issue_tail;		/* list tail of issue list */
+	struct flush_cmd_control *cmd_control_info;
+
 };
 
 /*
@@ -755,9 +764,18 @@
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	int offset = (flag == NAT_BITMAP) ?
+	int offset;
+
+	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+		if (flag == NAT_BITMAP)
+			return &ckpt->sit_nat_version_bitmap;
+		else
+			return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+	} else {
+		offset = (flag == NAT_BITMAP) ?
 			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
-	return &ckpt->sit_nat_version_bitmap + offset;
+		return &ckpt->sit_nat_version_bitmap + offset;
+	}
 }
 
 static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
@@ -958,6 +976,7 @@
 enum {
 	FI_NEW_INODE,		/* indicate newly allocated inode */
 	FI_DIRTY_INODE,		/* indicate inode is dirty or not */
+	FI_DIRTY_DIR,		/* indicate directory has dirty pages */
 	FI_INC_LINK,		/* need to increment i_nlink */
 	FI_ACL_MODE,		/* indicate acl mode */
 	FI_NO_ALLOC,		/* should not allocate any blocks */
@@ -1071,6 +1090,12 @@
 	((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
 	 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
 
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi)				\
+	((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) :	\
+	(pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) /	\
+	ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
 /*
  * file.c
  */
@@ -1140,8 +1165,10 @@
 struct dnode_of_data;
 struct node_info;
 
+bool available_free_memory(struct f2fs_sb_info *, int);
 int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
 bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
+void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1176,9 +1203,12 @@
 void f2fs_balance_fs(struct f2fs_sb_info *);
 void f2fs_balance_fs_bg(struct f2fs_sb_info *);
 int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *);
 void allocate_new_segments(struct f2fs_sb_info *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1221,7 +1251,6 @@
 void set_dirty_dir_page(struct inode *, struct page *);
 void add_dirty_dir_inode(struct inode *);
 void remove_dirty_dir_inode(struct inode *);
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t);
 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
 void write_checkpoint(struct f2fs_sb_info *, bool);
 void init_orphan_info(struct f2fs_sb_info *);
@@ -1242,6 +1271,7 @@
 struct page *get_lock_data_page(struct inode *, pgoff_t);
 struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
 int do_write_data_page(struct page *, struct f2fs_io_info *);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
 
 /*
  * gc.c
@@ -1391,5 +1421,6 @@
 int f2fs_read_inline_data(struct inode *, struct page *);
 int f2fs_convert_inline_data(struct inode *, pgoff_t);
 int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
+void truncate_inline_data(struct inode *, u64);
 int recover_inline_data(struct inode *, struct page *);
 #endif

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60e7d54..c58e330 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c

@@ -19,6 +19,7 @@
 #include <linux/compat.h>
 #include <linux/uaccess.h>
 #include <linux/mount.h>
+#include <linux/pagevec.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -194,6 +195,132 @@
 	return ret;
 }
 
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+						pgoff_t pgofs, int whence)
+{
+	struct pagevec pvec;
+	int nr_pages;
+
+	if (whence != SEEK_DATA)
+		return 0;
+
+	/* find first dirty page index */
+	pagevec_init(&pvec, 0);
+	nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+	pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+	pagevec_release(&pvec);
+	return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+							int whence)
+{
+	switch (whence) {
+	case SEEK_DATA:
+		if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+			(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+			return true;
+		break;
+	case SEEK_HOLE:
+		if (blkaddr == NULL_ADDR)
+			return true;
+		break;
+	}
+	return false;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+	loff_t maxbytes = inode->i_sb->s_maxbytes;
+	struct dnode_of_data dn;
+	pgoff_t pgofs, end_offset, dirty;
+	loff_t data_ofs = offset;
+	loff_t isize;
+	int err = 0;
+
+	mutex_lock(&inode->i_mutex);
+
+	isize = i_size_read(inode);
+	if (offset >= isize)
+		goto fail;
+
+	/* handle inline data case */
+	if (f2fs_has_inline_data(inode)) {
+		if (whence == SEEK_HOLE)
+			data_ofs = isize;
+		goto found;
+	}
+
+	pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+	for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+		if (err && err != -ENOENT) {
+			goto fail;
+		} else if (err == -ENOENT) {
+			/* direct node is not exist */
+			if (whence == SEEK_DATA) {
+				pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+							F2FS_I(inode));
+				continue;
+			} else {
+				goto found;
+			}
+		}
+
+		end_offset = IS_INODE(dn.node_page) ?
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+
+		/* find data/hole in dnode block */
+		for (; dn.ofs_in_node < end_offset;
+				dn.ofs_in_node++, pgofs++,
+				data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+			block_t blkaddr;
+			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+			if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+				f2fs_put_dnode(&dn);
+				goto found;
+			}
+		}
+		f2fs_put_dnode(&dn);
+	}
+
+	if (whence == SEEK_DATA)
+		goto fail;
+found:
+	if (whence == SEEK_HOLE && data_ofs > isize)
+		data_ofs = isize;
+	mutex_unlock(&inode->i_mutex);
+	return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+	mutex_unlock(&inode->i_mutex);
+	return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+	loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+	switch (whence) {
+	case SEEK_SET:
+	case SEEK_CUR:
+	case SEEK_END:
+		return generic_file_llseek_size(file, offset, whence,
+						maxbytes, i_size_read(inode));
+	case SEEK_DATA:
+	case SEEK_HOLE:
+		return f2fs_seek_block(file, offset, whence);
+	}
+
+	return -EINVAL;
+}
+
 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
@@ -242,6 +369,9 @@
 	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
 	struct page *page;
 
+	if (f2fs_has_inline_data(inode))
+		return truncate_inline_data(inode, from);
+
 	if (!offset)
 		return;
 
@@ -288,10 +418,7 @@
 		return err;
 	}
 
-	if (IS_INODE(dn.node_page))
-		count = ADDRS_PER_INODE(F2FS_I(inode));
-	else
-		count = ADDRS_PER_BLOCK;
+	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 
 	count -= dn.ofs_in_node;
 	f2fs_bug_on(count < 0);
@@ -413,6 +540,7 @@
 	.listxattr	= f2fs_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
+	.fiemap		= f2fs_fiemap,
 };
 
 static void fill_zero(struct inode *inode, pgoff_t index,
@@ -555,6 +683,7 @@
 		i_size_read(inode) < new_size) {
 		i_size_write(inode, new_size);
 		mark_inode_dirty(inode);
+		f2fs_write_inode(inode, NULL);
 	}
 
 	return ret;
@@ -678,11 +807,11 @@
 #endif
 
 const struct file_operations f2fs_file_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.llseek		= f2fs_llseek,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.open		= generic_file_open,
 	.mmap		= f2fs_file_mmap,
 	.fsync		= f2fs_sync_file,
@@ -692,5 +821,5 @@
 	.compat_ioctl	= f2fs_compat_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 383db1f..1bba522 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c

@@ -81,8 +81,10 @@
 
 	f2fs_lock_op(sbi);
 	ipage = get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage))
-		return PTR_ERR(ipage);
+	if (IS_ERR(ipage)) {
+		err = PTR_ERR(ipage);
+		goto out;
+	}
 
 	/*
 	 * i_addr[0] is not used for inline data,
@@ -90,11 +92,10 @@
 	 */
 	set_new_dnode(&dn, inode, ipage, NULL, 0);
 	err = f2fs_reserve_block(&dn, 0);
-	if (err) {
-		f2fs_unlock_op(sbi);
-		return err;
-	}
+	if (err)
+		goto out;
 
+	f2fs_wait_on_page_writeback(page, DATA);
 	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 
 	/* Copy the whole inline data block */
@@ -118,6 +119,7 @@
 
 	sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
+out:
 	f2fs_unlock_op(sbi);
 	return err;
 }
@@ -132,7 +134,7 @@
 	else if (to_size <= MAX_INLINE_DATA)
 		return 0;
 
-	page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS);
+	page = grab_cache_page(inode->i_mapping, 0);
 	if (!page)
 		return -ENOMEM;
 
@@ -155,6 +157,7 @@
 		return err;
 	ipage = dn.inode_page;
 
+	f2fs_wait_on_page_writeback(ipage, NODE);
 	zero_user_segment(ipage, INLINE_DATA_OFFSET,
 				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 	src_addr = kmap(page);
@@ -175,6 +178,26 @@
 	return 0;
 }
 
+void truncate_inline_data(struct inode *inode, u64 from)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct page *ipage;
+
+	if (from >= MAX_INLINE_DATA)
+		return;
+
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage))
+		return;
+
+	f2fs_wait_on_page_writeback(ipage, NODE);
+
+	zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
+				INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+	set_page_dirty(ipage);
+	f2fs_put_page(ipage, 1);
+}
+
 int recover_inline_data(struct inode *inode, struct page *npage)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -199,6 +222,8 @@
 		ipage = get_node_page(sbi, inode->i_ino);
 		f2fs_bug_on(IS_ERR(ipage));
 
+		f2fs_wait_on_page_writeback(ipage, NODE);
+
 		src_addr = inline_data_addr(npage);
 		dst_addr = inline_data_addr(ipage);
 		memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -210,6 +235,7 @@
 	if (f2fs_has_inline_data(inode)) {
 		ipage = get_node_page(sbi, inode->i_ino);
 		f2fs_bug_on(IS_ERR(ipage));
+		f2fs_wait_on_page_writeback(ipage, NODE);
 		zero_user_segment(ipage, INLINE_DATA_OFFSET,
 				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 		clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ee829d3..adc622c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c

@@ -12,6 +12,7 @@
 #include <linux/f2fs_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/bitops.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -21,20 +22,20 @@
 void f2fs_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = F2FS_I(inode)->i_flags;
-
-	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
-			S_NOATIME | S_DIRSYNC);
+	unsigned int new_fl = 0;
 
 	if (flags & FS_SYNC_FL)
-		inode->i_flags |= S_SYNC;
+		new_fl |= S_SYNC;
 	if (flags & FS_APPEND_FL)
-		inode->i_flags |= S_APPEND;
+		new_fl |= S_APPEND;
 	if (flags & FS_IMMUTABLE_FL)
-		inode->i_flags |= S_IMMUTABLE;
+		new_fl |= S_IMMUTABLE;
 	if (flags & FS_NOATIME_FL)
-		inode->i_flags |= S_NOATIME;
+		new_fl |= S_NOATIME;
 	if (flags & FS_DIRSYNC_FL)
-		inode->i_flags |= S_DIRSYNC;
+		new_fl |= S_DIRSYNC;
+	set_mask_bits(&inode->i_flags,
+			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
 }
 
 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -294,4 +295,5 @@
 	sb_end_intwrite(inode->i_sb);
 no_delete:
 	clear_inode(inode);
+	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
 }

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a9409d1..9138c32 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c

@@ -41,18 +41,9 @@
 	}
 	f2fs_unlock_op(sbi);
 
-	inode->i_uid = current_fsuid();
-
-	if (dir->i_mode & S_ISGID) {
-		inode->i_gid = dir->i_gid;
-		if (S_ISDIR(mode))
-			mode |= S_ISGID;
-	} else {
-		inode->i_gid = current_fsgid();
-	}
+	inode_init_owner(inode, dir, mode);
 
 	inode->i_ino = ino;
-	inode->i_mode = mode;
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_generation = sbi->s_next_generation++;

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 57caa6e..9dfb9a0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c

@@ -26,20 +26,26 @@
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
 
-static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct sysinfo val;
 	unsigned long mem_size = 0;
+	bool res = false;
 
 	si_meminfo(&val);
-	if (type == FREE_NIDS)
-		mem_size = nm_i->fcnt * sizeof(struct free_nid);
-	else if (type == NAT_ENTRIES)
-		mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
-	mem_size >>= 12;
-
-	/* give 50:50 memory for free nids and nat caches respectively */
-	return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+	/* give 25%, 25%, 50% memory for each components respectively */
+	if (type == FREE_NIDS) {
+		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
+		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+	} else if (type == NAT_ENTRIES) {
+		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
+		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+	} else if (type == DIRTY_DENTS) {
+		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+	}
+	return res;
 }
 
 static void clear_node_page_dirty(struct page *page)
@@ -147,6 +153,18 @@
 	return fsync_done;
 }
 
+void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct nat_entry *e;
+
+	write_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, nid);
+	if (e)
+		e->fsync_done = false;
+	write_unlock(&nm_i->nat_tree_lock);
+}
+
 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 {
 	struct nat_entry *new;
@@ -179,9 +197,7 @@
 			write_unlock(&nm_i->nat_tree_lock);
 			goto retry;
 		}
-		nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
-		nat_set_ino(e, le32_to_cpu(ne->ino));
-		nat_set_version(e, ne->version);
+		node_info_from_raw_nat(&e->ni, ne);
 	}
 	write_unlock(&nm_i->nat_tree_lock);
 }
@@ -243,7 +259,7 @@
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
-	if (available_free_memory(nm_i, NAT_ENTRIES))
+	if (available_free_memory(sbi, NAT_ENTRIES))
 		return 0;
 
 	write_lock(&nm_i->nat_tree_lock);
@@ -849,8 +865,7 @@
 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
 		return ERR_PTR(-EPERM);
 
-	page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
-					dn->nid, AOP_FLAG_NOFS);
+	page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
@@ -867,6 +882,7 @@
 	new_ni.ino = dn->inode->i_ino;
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
+	f2fs_wait_on_page_writeback(page, NODE);
 	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
 	set_cold_node(dn->inode, page);
 	SetPageUptodate(page);
@@ -946,8 +962,7 @@
 	struct page *page;
 	int err;
 repeat:
-	page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
-					nid, AOP_FLAG_NOFS);
+	page = grab_cache_page(NODE_MAPPING(sbi), nid);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
@@ -1194,6 +1209,8 @@
 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
 	};
 
+	trace_f2fs_writepage(page, NODE);
+
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
 
@@ -1225,10 +1242,7 @@
 	return 0;
 
 redirty_out:
-	dec_page_count(sbi, F2FS_DIRTY_NODES);
-	wbc->pages_skipped++;
-	account_page_redirty(page);
-	set_page_dirty(page);
+	redirty_page_for_writepage(wbc, page);
 	return AOP_WRITEPAGE_ACTIVATE;
 }
 
@@ -1238,6 +1252,8 @@
 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 	long diff;
 
+	trace_f2fs_writepages(mapping->host, wbc, NODE);
+
 	/* balancing f2fs's metadata in background */
 	f2fs_balance_fs_bg(sbi);
 
@@ -1313,13 +1329,14 @@
 	radix_tree_delete(&nm_i->free_nid_root, i->nid);
 }
 
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct free_nid *i;
 	struct nat_entry *ne;
 	bool allocated = false;
 
-	if (!available_free_memory(nm_i, FREE_NIDS))
+	if (!available_free_memory(sbi, FREE_NIDS))
 		return -1;
 
 	/* 0 nid should not be used */
@@ -1372,9 +1389,10 @@
 		kmem_cache_free(free_nid_slab, i);
 }
 
-static void scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_sb_info *sbi,
 			struct page *nat_page, nid_t start_nid)
 {
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct f2fs_nat_block *nat_blk = page_address(nat_page);
 	block_t blk_addr;
 	int i;
@@ -1389,7 +1407,7 @@
 		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
 		f2fs_bug_on(blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR) {
-			if (add_free_nid(nm_i, start_nid, true) < 0)
+			if (add_free_nid(sbi, start_nid, true) < 0)
 				break;
 		}
 	}
@@ -1413,7 +1431,7 @@
 	while (1) {
 		struct page *page = get_current_nat_page(sbi, nid);
 
-		scan_nat_page(nm_i, page, nid);
+		scan_nat_page(sbi, page, nid);
 		f2fs_put_page(page, 1);
 
 		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -1433,7 +1451,7 @@
 		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
 		nid = le32_to_cpu(nid_in_journal(sum, i));
 		if (addr == NULL_ADDR)
-			add_free_nid(nm_i, nid, true);
+			add_free_nid(sbi, nid, true);
 		else
 			remove_free_nid(nm_i, nid);
 	}
@@ -1450,7 +1468,7 @@
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct free_nid *i = NULL;
 retry:
-	if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
+	if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
 		return false;
 
 	spin_lock(&nm_i->free_nid_list_lock);
@@ -1510,7 +1528,7 @@
 	spin_lock(&nm_i->free_nid_list_lock);
 	i = __lookup_free_nid_list(nm_i, nid);
 	f2fs_bug_on(!i || i->state != NID_ALLOC);
-	if (!available_free_memory(nm_i, FREE_NIDS)) {
+	if (!available_free_memory(sbi, FREE_NIDS)) {
 		__del_from_free_nid_list(nm_i, i);
 		need_free = true;
 	} else {
@@ -1532,7 +1550,7 @@
 	clear_node_page_dirty(page);
 }
 
-void recover_inline_xattr(struct inode *inode, struct page *page)
+static void recover_inline_xattr(struct inode *inode, struct page *page)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	void *src_addr, *dst_addr;
@@ -1557,6 +1575,7 @@
 	src_addr = inline_xattr_addr(page);
 	inline_size = inline_xattr_size(inode);
 
+	f2fs_wait_on_page_writeback(ipage, NODE);
 	memcpy(dst_addr, src_addr, inline_size);
 
 	update_inode(inode, ipage);
@@ -1612,6 +1631,11 @@
 	struct node_info old_ni, new_ni;
 	struct page *ipage;
 
+	get_node_info(sbi, ino, &old_ni);
+
+	if (unlikely(old_ni.blk_addr != NULL_ADDR))
+		return -EINVAL;
+
 	ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
 	if (!ipage)
 		return -ENOMEM;
@@ -1619,7 +1643,6 @@
 	/* Should not use this inode  from free nid list */
 	remove_free_nid(NM_I(sbi), ino);
 
-	get_node_info(sbi, ino, &old_ni);
 	SetPageUptodate(ipage);
 	fill_node_footer(ipage, ino, ino, 0, true);
 
@@ -1645,35 +1668,29 @@
 
 /*
  * ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are linked in pages list.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
  */
-static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
 				int start, int nrpages)
 {
-	struct page *page;
-	int page_idx = start;
+	struct inode *inode = sbi->sb->s_bdev->bd_inode;
+	struct address_space *mapping = inode->i_mapping;
+	int i, page_idx = start;
 	struct f2fs_io_info fio = {
 		.type = META,
 		.rw = READ_SYNC | REQ_META | REQ_PRIO
 	};
 
-	for (; page_idx < start + nrpages; page_idx++) {
-		/* alloc temporal page for read node summary info*/
-		page = alloc_page(GFP_F2FS_ZERO);
-		if (!page)
+	for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
+		/* alloc page in bd_inode for reading node summary info */
+		pages[i] = grab_cache_page(mapping, page_idx);
+		if (!pages[i])
 			break;
-
-		lock_page(page);
-		page->index = page_idx;
-		list_add_tail(&page->lru, pages);
+		f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
 	}
 
-	list_for_each_entry(page, pages, lru)
-		f2fs_submit_page_mbio(sbi, page, page->index, &fio);
-
 	f2fs_submit_merged_bio(sbi, META, READ);
-
-	return page_idx - start;
+	return i;
 }
 
 int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1681,11 +1698,11 @@
 {
 	struct f2fs_node *rn;
 	struct f2fs_summary *sum_entry;
-	struct page *page, *tmp;
+	struct inode *inode = sbi->sb->s_bdev->bd_inode;
 	block_t addr;
 	int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
-	int i, last_offset, nrpages, err = 0;
-	LIST_HEAD(page_list);
+	struct page *pages[bio_blocks];
+	int i, idx, last_offset, nrpages, err = 0;
 
 	/* scan the node segment */
 	last_offset = sbi->blocks_per_seg;
@@ -1696,29 +1713,31 @@
 		nrpages = min(last_offset - i, bio_blocks);
 
 		/* read ahead node pages */
-		nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
+		nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
 		if (!nrpages)
 			return -ENOMEM;
 
-		list_for_each_entry_safe(page, tmp, &page_list, lru) {
+		for (idx = 0; idx < nrpages; idx++) {
 			if (err)
 				goto skip;
 
-			lock_page(page);
-			if (unlikely(!PageUptodate(page))) {
+			lock_page(pages[idx]);
+			if (unlikely(!PageUptodate(pages[idx]))) {
 				err = -EIO;
 			} else {
-				rn = F2FS_NODE(page);
+				rn = F2FS_NODE(pages[idx]);
 				sum_entry->nid = rn->footer.nid;
 				sum_entry->version = 0;
 				sum_entry->ofs_in_node = 0;
 				sum_entry++;
 			}
-			unlock_page(page);
+			unlock_page(pages[idx]);
 skip:
-			list_del(&page->lru);
-			__free_pages(page, 0);
+			page_cache_release(pages[idx]);
 		}
+
+		invalidate_mapping_pages(inode->i_mapping, addr,
+							addr + nrpages);
 	}
 	return err;
 }
@@ -1756,9 +1775,7 @@
 			write_unlock(&nm_i->nat_tree_lock);
 			goto retry;
 		}
-		nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
-		nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
-		nat_set_version(ne, raw_ne.version);
+		node_info_from_raw_nat(&ne->ni, &raw_ne);
 		__set_nat_cache_dirty(nm_i, ne);
 		write_unlock(&nm_i->nat_tree_lock);
 	}
@@ -1791,7 +1808,6 @@
 		nid_t nid;
 		struct f2fs_nat_entry raw_ne;
 		int offset = -1;
-		block_t new_blkaddr;
 
 		if (nat_get_blkaddr(ne) == NEW_ADDR)
 			continue;
@@ -1827,11 +1843,7 @@
 		f2fs_bug_on(!nat_blk);
 		raw_ne = nat_blk->entries[nid - start_nid];
 flush_now:
-		new_blkaddr = nat_get_blkaddr(ne);
-
-		raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
-		raw_ne.block_addr = cpu_to_le32(new_blkaddr);
-		raw_ne.version = nat_get_version(ne);
+		raw_nat_from_node_info(&raw_ne, &ne->ni);
 
 		if (offset < 0) {
 			nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1841,7 +1853,7 @@
 		}
 
 		if (nat_get_blkaddr(ne) == NULL_ADDR &&
-				add_free_nid(NM_I(sbi), nid, false) <= 0) {
+				add_free_nid(sbi, nid, false) <= 0) {
 			write_lock(&nm_i->nat_tree_lock);
 			__del_from_nat_cache(nm_i, ne);
 			write_unlock(&nm_i->nat_tree_lock);
@@ -1869,8 +1881,10 @@
 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
 	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
 
+	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
 	/* not used nids: 0, node, meta, (and root counted as valid node) */
-	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
+	nm_i->available_nids = nm_i->max_nid - 3;
 	nm_i->fcnt = 0;
 	nm_i->nat_cnt = 0;
 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5decc1a..7281112 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h

@@ -59,12 +59,12 @@
 	do {								\
 		ne->checkpointed = false;				\
 		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
-	} while (0);
+	} while (0)
 #define __clear_nat_cache_dirty(nm_i, ne)				\
 	do {								\
 		ne->checkpointed = true;				\
 		list_move_tail(&ne->list, &nm_i->nat_entries);		\
-	} while (0);
+	} while (0)
 #define inc_node_version(version)	(++version)
 
 static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -75,9 +75,18 @@
 	ni->version = raw_ne->version;
 }
 
-enum nid_type {
+static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
+						struct node_info *ni)
+{
+	raw_ne->ino = cpu_to_le32(ni->ino);
+	raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
+	raw_ne->version = ni->version;
+}
+
+enum mem_type {
 	FREE_NIDS,	/* indicates the free nid list */
-	NAT_ENTRIES	/* indicates the cached nat entry */
+	NAT_ENTRIES,	/* indicates the cached nat entry */
+	DIRTY_DENTS	/* indicates dirty dentry pages */
 };
 
 /*
@@ -263,7 +272,7 @@
 {
 	struct f2fs_node *rn = F2FS_NODE(p);
 
-	wait_on_page_writeback(p);
+	f2fs_wait_on_page_writeback(p, NODE);
 
 	if (i)
 		rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b1ae89f..a112368 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c

@@ -46,15 +46,10 @@
 	struct inode *dir, *einode;
 	int err = 0;
 
-	dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino);
-	if (!dir) {
-		dir = f2fs_iget(inode->i_sb, pino);
-		if (IS_ERR(dir)) {
-			err = PTR_ERR(dir);
-			goto out;
-		}
-		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
-		add_dirty_dir_inode(dir);
+	dir = f2fs_iget(inode->i_sb, pino);
+	if (IS_ERR(dir)) {
+		err = PTR_ERR(dir);
+		goto out;
 	}
 
 	name.len = le32_to_cpu(raw_inode->i_namelen);
@@ -63,7 +58,7 @@
 	if (unlikely(name.len > F2FS_NAME_LEN)) {
 		WARN_ON(1);
 		err = -ENAMETOOLONG;
-		goto out;
+		goto out_err;
 	}
 retry:
 	de = f2fs_find_entry(dir, &name, &page);
@@ -73,7 +68,8 @@
 		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
 		if (IS_ERR(einode)) {
 			WARN_ON(1);
-			if (PTR_ERR(einode) == -ENOENT)
+			err = PTR_ERR(einode);
+			if (err == -ENOENT)
 				err = -EEXIST;
 			goto out_unmap_put;
 		}
@@ -87,11 +83,23 @@
 		goto retry;
 	}
 	err = __f2fs_add_link(dir, &name, inode);
+	if (err)
+		goto out_err;
+
+	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
+		iput(dir);
+	} else {
+		add_dirty_dir_inode(dir);
+		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
+	}
+
 	goto out;
 
 out_unmap_put:
 	kunmap(page);
 	f2fs_put_page(page, 0);
+out_err:
+	iput(dir);
 out:
 	f2fs_msg(inode->i_sb, KERN_NOTICE,
 			"%s: ino = %x, name = %s, dir = %lx, err = %d",
@@ -299,10 +307,7 @@
 		goto out;
 
 	start = start_bidx_of_node(ofs_of_node(page), fi);
-	if (IS_INODE(page))
-		end = start + ADDRS_PER_INODE(fi);
-	else
-		end = start + ADDRS_PER_BLOCK;
+	end = start + ADDRS_PER_PAGE(page, fi);
 
 	f2fs_lock_op(sbi);
 

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 085f548..f25f0e0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c

@@ -25,7 +25,6 @@
 #define __reverse_ffz(x) __reverse_ffs(~(x))
 
 static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *flush_cmd_slab;
 
 /*
  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -200,20 +199,20 @@
 static int issue_flush_thread(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
-	struct f2fs_sm_info *sm_i = SM_I(sbi);
-	wait_queue_head_t *q = &sm_i->flush_wait_queue;
+	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	wait_queue_head_t *q = &fcc->flush_wait_queue;
 repeat:
 	if (kthread_should_stop())
 		return 0;
 
-	spin_lock(&sm_i->issue_lock);
-	if (sm_i->issue_list) {
-		sm_i->dispatch_list = sm_i->issue_list;
-		sm_i->issue_list = sm_i->issue_tail = NULL;
+	spin_lock(&fcc->issue_lock);
+	if (fcc->issue_list) {
+		fcc->dispatch_list = fcc->issue_list;
+		fcc->issue_list = fcc->issue_tail = NULL;
 	}
-	spin_unlock(&sm_i->issue_lock);
+	spin_unlock(&fcc->issue_lock);
 
-	if (sm_i->dispatch_list) {
+	if (fcc->dispatch_list) {
 		struct bio *bio = bio_alloc(GFP_NOIO, 0);
 		struct flush_cmd *cmd, *next;
 		int ret;
@@ -221,47 +220,79 @@
 		bio->bi_bdev = sbi->sb->s_bdev;
 		ret = submit_bio_wait(WRITE_FLUSH, bio);
 
-		for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
+		for (cmd = fcc->dispatch_list; cmd; cmd = next) {
 			cmd->ret = ret;
 			next = cmd->next;
 			complete(&cmd->wait);
 		}
-		sm_i->dispatch_list = NULL;
+		bio_put(bio);
+		fcc->dispatch_list = NULL;
 	}
 
-	wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
+	wait_event_interruptible(*q,
+			kthread_should_stop() || fcc->issue_list);
 	goto repeat;
 }
 
 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_sm_info *sm_i = SM_I(sbi);
-	struct flush_cmd *cmd;
-	int ret;
+	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd cmd;
 
 	if (!test_opt(sbi, FLUSH_MERGE))
 		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
 
-	cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
-	cmd->next = NULL;
-	cmd->ret = 0;
-	init_completion(&cmd->wait);
+	init_completion(&cmd.wait);
+	cmd.next = NULL;
 
-	spin_lock(&sm_i->issue_lock);
-	if (sm_i->issue_list)
-		sm_i->issue_tail->next = cmd;
+	spin_lock(&fcc->issue_lock);
+	if (fcc->issue_list)
+		fcc->issue_tail->next = &cmd;
 	else
-		sm_i->issue_list = cmd;
-	sm_i->issue_tail = cmd;
-	spin_unlock(&sm_i->issue_lock);
+		fcc->issue_list = &cmd;
+	fcc->issue_tail = &cmd;
+	spin_unlock(&fcc->issue_lock);
 
-	if (!sm_i->dispatch_list)
-		wake_up(&sm_i->flush_wait_queue);
+	if (!fcc->dispatch_list)
+		wake_up(&fcc->flush_wait_queue);
 
-	wait_for_completion(&cmd->wait);
-	ret = cmd->ret;
-	kmem_cache_free(flush_cmd_slab, cmd);
-	return ret;
+	wait_for_completion(&cmd.wait);
+
+	return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+	dev_t dev = sbi->sb->s_bdev->bd_dev;
+	struct flush_cmd_control *fcc;
+	int err = 0;
+
+	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+	if (!fcc)
+		return -ENOMEM;
+	spin_lock_init(&fcc->issue_lock);
+	init_waitqueue_head(&fcc->flush_wait_queue);
+	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+	if (IS_ERR(fcc->f2fs_issue_flush)) {
+		err = PTR_ERR(fcc->f2fs_issue_flush);
+		kfree(fcc);
+		return err;
+	}
+	sbi->sm_info->cmd_control_info = fcc;
+
+	return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+	struct flush_cmd_control *fcc =
+				sbi->sm_info->cmd_control_info;
+
+	if (fcc && fcc->f2fs_issue_flush)
+		kthread_stop(fcc->f2fs_issue_flush);
+	kfree(fcc);
+	sbi->sm_info->cmd_control_info = NULL;
 }
 
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -336,13 +367,26 @@
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static void f2fs_issue_discard(struct f2fs_sb_info *sbi,
+static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 				block_t blkstart, block_t blklen)
 {
 	sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
 	sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
-	blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+}
+
+void discard_next_dnode(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+	block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+	if (f2fs_issue_discard(sbi, blkaddr, 1)) {
+		struct page *page = grab_meta_page(sbi, blkaddr);
+		/* zero-filled page */
+		set_page_dirty(page);
+		f2fs_put_page(page, 1);
+	}
 }
 
 static void add_discard_addrs(struct f2fs_sb_info *sbi,
@@ -1832,7 +1876,6 @@
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct f2fs_sm_info *sm_info;
 	int err;
 
@@ -1860,14 +1903,10 @@
 	sm_info->nr_discards = 0;
 	sm_info->max_discards = 0;
 
-	if (test_opt(sbi, FLUSH_MERGE)) {
-		spin_lock_init(&sm_info->issue_lock);
-		init_waitqueue_head(&sm_info->flush_wait_queue);
-
-		sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
-				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
-		if (IS_ERR(sm_info->f2fs_issue_flush))
-			return PTR_ERR(sm_info->f2fs_issue_flush);
+	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+		err = create_flush_cmd_control(sbi);
+		if (err)
+			return err;
 	}
 
 	err = build_sit_info(sbi);
@@ -1976,10 +2015,10 @@
 void destroy_segment_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_sm_info *sm_info = SM_I(sbi);
+
 	if (!sm_info)
 		return;
-	if (sm_info->f2fs_issue_flush)
-		kthread_stop(sm_info->f2fs_issue_flush);
+	destroy_flush_cmd_control(sbi);
 	destroy_dirty_segmap(sbi);
 	destroy_curseg(sbi);
 	destroy_free_segmap(sbi);
@@ -1994,17 +2033,10 @@
 			sizeof(struct discard_entry));
 	if (!discard_entry_slab)
 		return -ENOMEM;
-	flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
-			sizeof(struct flush_cmd));
-	if (!flush_cmd_slab) {
-		kmem_cache_destroy(discard_entry_slab);
-		return -ENOMEM;
-	}
 	return 0;
 }
 
 void destroy_segment_manager_caches(void)
 {
 	kmem_cache_destroy(discard_entry_slab);
-	kmem_cache_destroy(flush_cmd_slab);
 }

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c756923..b2b1863 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c

@@ -514,7 +514,7 @@
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
 
-	if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC))
+	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
 		seq_printf(seq, ",background_gc=%s", "on");
 	else
 		seq_printf(seq, ",background_gc=%s", "off");
@@ -542,7 +542,7 @@
 		seq_puts(seq, ",disable_ext_identify");
 	if (test_opt(sbi, INLINE_DATA))
 		seq_puts(seq, ",inline_data");
-	if (test_opt(sbi, FLUSH_MERGE))
+	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
 		seq_puts(seq, ",flush_merge");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 
@@ -594,6 +594,8 @@
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct f2fs_mount_info org_mount_opt;
 	int err, active_logs;
+	bool need_restart_gc = false;
+	bool need_stop_gc = false;
 
 	sync_filesystem(sb);
 
@@ -611,7 +613,7 @@
 
 	/*
 	 * Previous and new state of filesystem is RO,
-	 * so no point in checking GC conditions.
+	 * so skip checking GC and FLUSH_MERGE conditions.
 	 */
 	if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
 		goto skip;
@@ -625,18 +627,40 @@
 		if (sbi->gc_thread) {
 			stop_gc_thread(sbi);
 			f2fs_sync_fs(sb, 1);
+			need_restart_gc = true;
 		}
 	} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
 		err = start_gc_thread(sbi);
 		if (err)
 			goto restore_opts;
+		need_stop_gc = true;
+	}
+
+	/*
+	 * We stop issue flush thread if FS is mounted as RO
+	 * or if flush_merge is not passed in mount option.
+	 */
+	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+		destroy_flush_cmd_control(sbi);
+	} else if (test_opt(sbi, FLUSH_MERGE) &&
+					!sbi->sm_info->cmd_control_info) {
+		err = create_flush_cmd_control(sbi);
+		if (err)
+			goto restore_gc;
 	}
 skip:
 	/* Update the POSIXACL Flag */
 	 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
 	return 0;
-
+restore_gc:
+	if (need_restart_gc) {
+		if (start_gc_thread(sbi))
+			f2fs_msg(sbi->sb, KERN_WARNING,
+				"background gc thread is stop");
+	} else if (need_stop_gc) {
+		stop_gc_thread(sbi);
+	}
 restore_opts:
 	sbi->mount_opt = org_mount_opt;
 	sbi->active_logs = active_logs;

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 503c245..8bea941 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c

@@ -26,7 +26,7 @@
 #include "xattr.h"
 
 static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
-		size_t list_size, const char *name, size_t name_len, int type)
+		size_t list_size, const char *name, size_t len, int type)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
 	int total_len, prefix_len = 0;
@@ -53,11 +53,11 @@
 		return -EINVAL;
 	}
 
-	total_len = prefix_len + name_len + 1;
+	total_len = prefix_len + len + 1;
 	if (list && total_len <= list_size) {
 		memcpy(list, prefix, prefix_len);
-		memcpy(list + prefix_len, name, name_len);
-		list[prefix_len + name_len] = '\0';
+		memcpy(list + prefix_len, name, len);
+		list[prefix_len + len] = '\0';
 	}
 	return total_len;
 }
@@ -108,11 +108,12 @@
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL);
+	return f2fs_setxattr(dentry->d_inode, type, name,
+					value, size, NULL, flags);
 }
 
 static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
-		size_t list_size, const char *name, size_t name_len, int type)
+		size_t list_size, const char *name, size_t len, int type)
 {
 	const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
 	size_t size;
@@ -155,9 +156,6 @@
 }
 
 #ifdef CONFIG_F2FS_FS_SECURITY
-static int __f2fs_setxattr(struct inode *inode, int name_index,
-			const char *name, const void *value, size_t value_len,
-			struct page *ipage);
 static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
 		void *page)
 {
@@ -165,9 +163,9 @@
 	int err = 0;
 
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-		err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+		err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
 				xattr->name, xattr->value,
-				xattr->value_len, (struct page *)page);
+				xattr->value_len, (struct page *)page, 0);
 		if (err < 0)
 			break;
 	}
@@ -241,26 +239,26 @@
 	NULL,
 };
 
-static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
+static inline const struct xattr_handler *f2fs_xattr_handler(int index)
 {
 	const struct xattr_handler *handler = NULL;
 
-	if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
-		handler = f2fs_xattr_handler_map[name_index];
+	if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
+		handler = f2fs_xattr_handler_map[index];
 	return handler;
 }
 
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index,
-					size_t name_len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
+					size_t len, const char *name)
 {
 	struct f2fs_xattr_entry *entry;
 
 	list_for_each_xattr(entry, base_addr) {
-		if (entry->e_name_index != name_index)
+		if (entry->e_name_index != index)
 			continue;
-		if (entry->e_name_len != name_len)
+		if (entry->e_name_len != len)
 			continue;
-		if (!memcmp(entry->e_name, name, name_len))
+		if (!memcmp(entry->e_name, name, len))
 			break;
 	}
 	return entry;
@@ -347,6 +345,7 @@
 
 		if (ipage) {
 			inline_addr = inline_xattr_addr(ipage);
+			f2fs_wait_on_page_writeback(ipage, NODE);
 		} else {
 			page = get_node_page(sbi, inode->i_ino);
 			if (IS_ERR(page)) {
@@ -354,6 +353,7 @@
 				return PTR_ERR(page);
 			}
 			inline_addr = inline_xattr_addr(page);
+			f2fs_wait_on_page_writeback(page, NODE);
 		}
 		memcpy(inline_addr, txattr_addr, inline_size);
 		f2fs_put_page(page, 1);
@@ -374,6 +374,7 @@
 			return PTR_ERR(xpage);
 		}
 		f2fs_bug_on(new_nid);
+		f2fs_wait_on_page_writeback(xpage, NODE);
 	} else {
 		struct dnode_of_data dn;
 		set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -396,42 +397,43 @@
 	return 0;
 }
 
-int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
+int f2fs_getxattr(struct inode *inode, int index, const char *name,
 		void *buffer, size_t buffer_size)
 {
 	struct f2fs_xattr_entry *entry;
 	void *base_addr;
 	int error = 0;
-	size_t value_len, name_len;
+	size_t size, len;
 
 	if (name == NULL)
 		return -EINVAL;
-	name_len = strlen(name);
-	if (name_len > F2FS_NAME_LEN)
+
+	len = strlen(name);
+	if (len > F2FS_NAME_LEN)
 		return -ERANGE;
 
 	base_addr = read_all_xattrs(inode, NULL);
 	if (!base_addr)
 		return -ENOMEM;
 
-	entry = __find_xattr(base_addr, name_index, name_len, name);
+	entry = __find_xattr(base_addr, index, len, name);
 	if (IS_XATTR_LAST_ENTRY(entry)) {
 		error = -ENODATA;
 		goto cleanup;
 	}
 
-	value_len = le16_to_cpu(entry->e_value_size);
+	size = le16_to_cpu(entry->e_value_size);
 
-	if (buffer && value_len > buffer_size) {
+	if (buffer && size > buffer_size) {
 		error = -ERANGE;
 		goto cleanup;
 	}
 
 	if (buffer) {
 		char *pval = entry->e_name + entry->e_name_len;
-		memcpy(buffer, pval, value_len);
+		memcpy(buffer, pval, size);
 	}
-	error = value_len;
+	error = size;
 
 cleanup:
 	kzfree(base_addr);
@@ -475,15 +477,15 @@
 	return error;
 }
 
-static int __f2fs_setxattr(struct inode *inode, int name_index,
-			const char *name, const void *value, size_t value_len,
-			struct page *ipage)
+static int __f2fs_setxattr(struct inode *inode, int index,
+			const char *name, const void *value, size_t size,
+			struct page *ipage, int flags)
 {
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_xattr_entry *here, *last;
 	void *base_addr;
 	int found, newsize;
-	size_t name_len;
+	size_t len;
 	__u32 new_hsize;
 	int error = -ENOMEM;
 
@@ -491,11 +493,11 @@
 		return -EINVAL;
 
 	if (value == NULL)
-		value_len = 0;
+		size = 0;
 
-	name_len = strlen(name);
+	len = strlen(name);
 
-	if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
+	if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
 		return -ERANGE;
 
 	base_addr = read_all_xattrs(inode, ipage);
@@ -503,16 +505,23 @@
 		goto exit;
 
 	/* find entry with wanted name. */
-	here = __find_xattr(base_addr, name_index, name_len, name);
+	here = __find_xattr(base_addr, index, len, name);
 
 	found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
-	last = here;
 
+	if ((flags & XATTR_REPLACE) && !found) {
+		error = -ENODATA;
+		goto exit;
+	} else if ((flags & XATTR_CREATE) && found) {
+		error = -EEXIST;
+		goto exit;
+	}
+
+	last = here;
 	while (!IS_XATTR_LAST_ENTRY(last))
 		last = XATTR_NEXT_ENTRY(last);
 
-	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
-			name_len + value_len);
+	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
 
 	/* 1. Check space */
 	if (value) {
@@ -555,12 +564,12 @@
 		 * We just write new entry.
 		 */
 		memset(last, 0, newsize);
-		last->e_name_index = name_index;
-		last->e_name_len = name_len;
-		memcpy(last->e_name, name, name_len);
-		pval = last->e_name + name_len;
-		memcpy(pval, value, value_len);
-		last->e_value_size = cpu_to_le16(value_len);
+		last->e_name_index = index;
+		last->e_name_len = len;
+		memcpy(last->e_name, name, len);
+		pval = last->e_name + len;
+		memcpy(pval, value, size);
+		last->e_value_size = cpu_to_le16(size);
 		new_hsize += newsize;
 	}
 
@@ -583,18 +592,23 @@
 	return error;
 }
 
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
-			const void *value, size_t value_len, struct page *ipage)
+int f2fs_setxattr(struct inode *inode, int index, const char *name,
+				const void *value, size_t size,
+				struct page *ipage, int flags)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	int err;
 
+	/* this case is only from init_inode_metadata */
+	if (ipage)
+		return __f2fs_setxattr(inode, index, name, value,
+						size, ipage, flags);
 	f2fs_balance_fs(sbi);
 
 	f2fs_lock_op(sbi);
 	/* protect xattr_ver */
 	down_write(&F2FS_I(inode)->i_sem);
-	err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
+	err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
 	up_write(&F2FS_I(inode)->i_sem);
 	f2fs_unlock_op(sbi);
 

diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index b21d9eb..34ab7db 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h

@@ -114,18 +114,18 @@
 extern const struct xattr_handler *f2fs_xattr_handlers[];
 
 extern int f2fs_setxattr(struct inode *, int, const char *,
-				const void *, size_t, struct page *);
+				const void *, size_t, struct page *, int);
 extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
 extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
 #else
 
 #define f2fs_xattr_handlers	NULL
-static inline int f2fs_setxattr(struct inode *inode, int name_index,
-		const char *name, const void *value, size_t value_len)
+static inline int f2fs_setxattr(struct inode *inode, int index,
+		const char *name, const void *value, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
-static inline int f2fs_getxattr(struct inode *inode, int name_index,
+static inline int f2fs_getxattr(struct inode *inode, int index,
 		const char *name, void *buffer, size_t buffer_size)
 {
 	return -EOPNOTSUPP;

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 9b104f5..85f79a8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c

@@ -170,10 +170,10 @@
 
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.unlocked_ioctl	= fat_generic_ioctl,

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 9c83594..756aead 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c

@@ -247,12 +247,13 @@
 }
 
 static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
 	if (rw == WRITE) {
@@ -265,7 +266,7 @@
 		 *
 		 * Return 0, and fallback to normal buffered write.
 		 */
-		loff_t size = offset + iov_length(iov, nr_segs);
+		loff_t size = offset + count;
 		if (MSDOS_I(inode)->mmu_private < size)
 			return 0;
 	}
@@ -274,10 +275,9 @@
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 fat_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
 	if (ret < 0 && (rw & WRITE))
-		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		fat_write_failed(mapping, offset + count);
 
 	return ret;
 }

diff --git a/fs/file.c b/fs/file.c
index 8f294cf..66923fe 100644
--- a/fs/file.c
+++ b/fs/file.c

@@ -44,15 +44,10 @@
 	return vmalloc(size);
 }
 
-static void free_fdmem(void *ptr)
-{
-	is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
-}
-
 static void __free_fdtable(struct fdtable *fdt)
 {
-	free_fdmem(fdt->fd);
-	free_fdmem(fdt->open_fds);
+	kvfree(fdt->fd);
+	kvfree(fdt->open_fds);
 	kfree(fdt);
 }
 
@@ -130,7 +125,7 @@
 	return fdt;
 
 out_arr:
-	free_fdmem(fdt->fd);
+	kvfree(fdt->fd);
 out_fdt:
 	kfree(fdt);
 out:

diff --git a/fs/file_table.c b/fs/file_table.c
index 40bf466..385bfd3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c

@@ -175,6 +175,12 @@
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
+	if ((mode & FMODE_READ) &&
+	     likely(fop->read || fop->aio_read || fop->read_iter))
+		mode |= FMODE_CAN_READ;
+	if ((mode & FMODE_WRITE) &&
+	     likely(fop->write || fop->aio_write || fop->write_iter))
+		mode |= FMODE_CAN_WRITE;
 	file->f_mode = mode;
 	file->f_op = fop;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 13b691a..966ace8 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c

@@ -94,8 +94,10 @@
 	loff_t pos = 0;
 	struct iovec iov = { .iov_base = buf, .iov_len = count };
 	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct iov_iter ii;
+	iov_iter_init(&ii, READ, &iov, 1, count);
 
-	return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
+	return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
 }
 
 static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -104,12 +106,14 @@
 	loff_t pos = 0;
 	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
 	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct iov_iter ii;
+	iov_iter_init(&ii, WRITE, &iov, 1, count);
 
 	/*
 	 * No locking or generic_write_checks(), the server is
 	 * responsible for locking and sanity checks.
 	 */
-	return fuse_direct_io(&io, &iov, 1, count, &pos,
+	return fuse_direct_io(&io, &ii, &pos,
 			      FUSE_DIO_WRITE | FUSE_DIO_CUSE);
 }
 

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 903cbc9..6e16dad 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c

@@ -933,8 +933,7 @@
 	return err;
 }
 
-static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				  unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -945,14 +944,14 @@
 	 * i_size is up to date).
 	 */
 	if (fc->auto_inval_data ||
-	    (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
+	    (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
 		int err;
 		err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
 		if (err)
 			return err;
 	}
 
-	return generic_file_aio_read(iocb, iov, nr_segs, pos);
+	return generic_file_read_iter(iocb, to);
 }
 
 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1181,19 +1180,17 @@
 	return res > 0 ? res : err;
 }
 
-static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				   unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	size_t count = 0;
-	size_t ocount = 0;
+	size_t count = iov_iter_count(from);
 	ssize_t written = 0;
 	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
 	ssize_t err;
-	struct iov_iter i;
 	loff_t endbyte = 0;
+	loff_t pos = iocb->ki_pos;
 
 	if (get_fuse_conn(inode)->writeback_cache) {
 		/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1201,17 +1198,9 @@
 		if (err)
 			return err;
 
-		return generic_file_aio_write(iocb, iov, nr_segs, pos);
+		return generic_file_write_iter(iocb, from);
 	}
 
-	WARN_ON(iocb->ki_pos != pos);
-
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
@@ -1224,6 +1213,7 @@
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(from, count);
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -1233,16 +1223,13 @@
 		goto out;
 
 	if (file->f_flags & O_DIRECT) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
-						    count, ocount);
-		if (written < 0 || written == count)
+		written = generic_file_direct_write(iocb, from, pos);
+		if (written < 0 || !iov_iter_count(from))
 			goto out;
 
 		pos += written;
-		count -= written;
 
-		iov_iter_init(&i, iov, nr_segs, count, written);
-		written_buffered = fuse_perform_write(file, mapping, &i, pos);
+		written_buffered = fuse_perform_write(file, mapping, from, pos);
 		if (written_buffered < 0) {
 			err = written_buffered;
 			goto out;
@@ -1261,8 +1248,7 @@
 		written += written_buffered;
 		iocb->ki_pos = pos + written_buffered;
 	} else {
-		iov_iter_init(&i, iov, nr_segs, count, 0);
-		written = fuse_perform_write(file, mapping, &i, pos);
+		written = fuse_perform_write(file, mapping, from, pos);
 		if (written >= 0)
 			iocb->ki_pos = pos + written;
 	}
@@ -1300,7 +1286,7 @@
 	size_t nbytes = 0;  /* # bytes already packed in req */
 
 	/* Special case for kernel I/O: can copy directly into the buffer */
-	if (segment_eq(get_fs(), KERNEL_DS)) {
+	if (ii->type & ITER_KVEC) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
 		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
@@ -1316,35 +1302,26 @@
 
 	while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
 		unsigned npages;
-		unsigned long user_addr = fuse_get_user_addr(ii);
-		unsigned offset = user_addr & ~PAGE_MASK;
-		size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
-		int ret;
-
+		size_t start;
 		unsigned n = req->max_pages - req->num_pages;
-		frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
-
-		npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		npages = clamp(npages, 1U, n);
-
-		ret = get_user_pages_fast(user_addr, npages, !write,
-					  &req->pages[req->num_pages]);
+		ssize_t ret = iov_iter_get_pages(ii,
+					&req->pages[req->num_pages],
+					n * PAGE_SIZE, &start);
 		if (ret < 0)
 			return ret;
 
-		npages = ret;
-		frag_size = min_t(size_t, frag_size,
-				  (npages << PAGE_SHIFT) - offset);
-		iov_iter_advance(ii, frag_size);
+		iov_iter_advance(ii, ret);
+		nbytes += ret;
 
-		req->page_descs[req->num_pages].offset = offset;
+		ret += start;
+		npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+
+		req->page_descs[req->num_pages].offset = start;
 		fuse_page_descs_length_init(req, req->num_pages, npages);
 
 		req->num_pages += npages;
 		req->page_descs[req->num_pages - 1].length -=
-			(npages << PAGE_SHIFT) - offset - frag_size;
-
-		nbytes += frag_size;
+			(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
 	}
 
 	if (write)
@@ -1359,24 +1336,11 @@
 
 static inline int fuse_iter_npages(const struct iov_iter *ii_p)
 {
-	struct iov_iter ii = *ii_p;
-	int npages = 0;
-
-	while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
-		unsigned long user_addr = fuse_get_user_addr(&ii);
-		unsigned offset = user_addr & ~PAGE_MASK;
-		size_t frag_size = iov_iter_single_seg_count(&ii);
-
-		npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		iov_iter_advance(&ii, frag_size);
-	}
-
-	return min(npages, FUSE_MAX_PAGES_PER_REQ);
+	return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
 }
 
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
-		       unsigned long nr_segs, size_t count, loff_t *ppos,
-		       int flags)
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+		       loff_t *ppos, int flags)
 {
 	int write = flags & FUSE_DIO_WRITE;
 	int cuse = flags & FUSE_DIO_CUSE;
@@ -1386,18 +1350,16 @@
 	struct fuse_conn *fc = ff->fc;
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
+	size_t count = iov_iter_count(iter);
 	pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
 	pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
 	ssize_t res = 0;
 	struct fuse_req *req;
-	struct iov_iter ii;
-
-	iov_iter_init(&ii, iov, nr_segs, count, 0);
 
 	if (io->async)
-		req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+		req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
 	else
-		req = fuse_get_req(fc, fuse_iter_npages(&ii));
+		req = fuse_get_req(fc, fuse_iter_npages(iter));
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1413,7 +1375,7 @@
 		size_t nres;
 		fl_owner_t owner = current->files;
 		size_t nbytes = min(count, nmax);
-		int err = fuse_get_user_pages(req, &ii, &nbytes, write);
+		int err = fuse_get_user_pages(req, iter, &nbytes, write);
 		if (err) {
 			res = err;
 			break;
@@ -1443,9 +1405,9 @@
 			fuse_put_request(fc, req);
 			if (io->async)
 				req = fuse_get_req_for_background(fc,
-					fuse_iter_npages(&ii));
+					fuse_iter_npages(iter));
 			else
-				req = fuse_get_req(fc, fuse_iter_npages(&ii));
+				req = fuse_get_req(fc, fuse_iter_npages(iter));
 			if (IS_ERR(req))
 				break;
 		}
@@ -1460,9 +1422,8 @@
 EXPORT_SYMBOL_GPL(fuse_direct_io);
 
 static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
-				  const struct iovec *iov,
-				  unsigned long nr_segs, loff_t *ppos,
-				  size_t count)
+				  struct iov_iter *iter,
+				  loff_t *ppos)
 {
 	ssize_t res;
 	struct file *file = io->file;
@@ -1471,7 +1432,7 @@
 	if (is_bad_inode(inode))
 		return -EIO;
 
-	res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0);
+	res = fuse_direct_io(io, iter, ppos, 0);
 
 	fuse_invalidate_attr(inode);
 
@@ -1483,22 +1444,26 @@
 {
 	struct fuse_io_priv io = { .async = 0, .file = file };
 	struct iovec iov = { .iov_base = buf, .iov_len = count };
-	return __fuse_direct_read(&io, &iov, 1, ppos, count);
+	struct iov_iter ii;
+	iov_iter_init(&ii, READ, &iov, 1, count);
+	return __fuse_direct_read(&io, &ii, ppos);
 }
 
 static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
-				   const struct iovec *iov,
-				   unsigned long nr_segs, loff_t *ppos)
+				   struct iov_iter *iter,
+				   loff_t *ppos)
 {
 	struct file *file = io->file;
 	struct inode *inode = file_inode(file);
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	ssize_t res;
 
+
 	res = generic_write_checks(file, ppos, &count, 0);
-	if (!res)
-		res = fuse_direct_io(io, iov, nr_segs, count, ppos,
-				     FUSE_DIO_WRITE);
+	if (!res) {
+		iov_iter_truncate(iter, count);
+		res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
+	}
 
 	fuse_invalidate_attr(inode);
 
@@ -1512,13 +1477,15 @@
 	struct inode *inode = file_inode(file);
 	ssize_t res;
 	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct iov_iter ii;
+	iov_iter_init(&ii, WRITE, &iov, 1, count);
 
 	if (is_bad_inode(inode))
 		return -EIO;
 
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
-	res = __fuse_direct_write(&io, &iov, 1, ppos);
+	res = __fuse_direct_write(&io, &ii, ppos);
 	if (res > 0)
 		fuse_write_update_size(inode, *ppos);
 	mutex_unlock(&inode->i_mutex);
@@ -2372,7 +2339,7 @@
 	if (!bytes)
 		return 0;
 
-	iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+	iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
 
 	while (iov_iter_count(&ii)) {
 		struct page *page = pages[page_idx++];
@@ -2894,8 +2861,8 @@
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
@@ -2904,7 +2871,7 @@
 	loff_t pos = 0;
 	struct inode *inode;
 	loff_t i_size;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	struct fuse_io_priv *io;
 
 	pos = offset;
@@ -2919,6 +2886,7 @@
 		if (offset >= i_size)
 			return 0;
 		count = min_t(loff_t, count, fuse_round_up(i_size - offset));
+		iov_iter_truncate(iter, count);
 	}
 
 	io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2948,9 +2916,9 @@
 		io->async = false;
 
 	if (rw == WRITE)
-		ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+		ret = __fuse_direct_write(io, iter, &pos);
 	else
-		ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+		ret = __fuse_direct_read(io, iter, &pos);
 
 	if (io->async) {
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -3061,10 +3029,10 @@
 
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= fuse_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= fuse_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= fuse_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= fuse_file_write_iter,
 	.mmap		= fuse_file_mmap,
 	.open		= fuse_open,
 	.flush		= fuse_flush,

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7aa5c75..e8e47a6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h

@@ -880,9 +880,8 @@
 /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
 #define FUSE_DIO_CUSE  (1 << 1)
 
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
-		       unsigned long nr_segs, size_t count, loff_t *ppos,
-		       int flags);
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+		       loff_t *ppos, int flags);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		   unsigned int flags);
 long fuse_ioctl_common(struct file *file, unsigned int cmd,

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 492123c..805b37f 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c

@@ -1040,8 +1040,7 @@
 
 
 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1081,7 +1080,7 @@
 	 */
 	if (mapping->nrpages) {
 		loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
-		loff_t len = iov_length(iov, nr_segs);
+		loff_t len = iov_iter_count(iter);
 		loff_t end = PAGE_ALIGN(offset + len) - 1;
 
 		rv = 0;
@@ -1096,9 +1095,9 @@
 			truncate_inode_pages_range(mapping, lstart, end);
 	}
 
-	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, gfs2_get_block_direct,
-				  NULL, NULL, 0);
+	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				  iter, offset,
+				  gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6ab0cfb..4fc3a30 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c

@@ -684,7 +684,7 @@
 }
 
 /**
- * gfs2_file_aio_write - Perform a write to a file
+ * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
  * @iov: The data to write
  * @nr_segs: Number of @iov segments
@@ -697,11 +697,9 @@
  *
  */
 
-static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				   unsigned long nr_segs, loff_t pos)
+static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	size_t writesize = iov_length(iov, nr_segs);
 	struct gfs2_inode *ip = GFS2_I(file_inode(file));
 	int ret;
 
@@ -709,7 +707,7 @@
 	if (ret)
 		return ret;
 
-	gfs2_size_hint(file, pos, writesize);
+	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
 
 	if (file->f_flags & O_APPEND) {
 		struct gfs2_holder gh;
@@ -720,7 +718,7 @@
 		gfs2_glock_dq_uninit(&gh);
 	}
 
-	return generic_file_aio_write(iocb, iov, nr_segs, pos);
+	return generic_file_write_iter(iocb, from);
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1058,10 +1056,10 @@
 
 const struct file_operations gfs2_file_fops = {
 	.llseek		= gfs2_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= gfs2_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= gfs2_file_write_iter,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
 	.open		= gfs2_open,
@@ -1070,7 +1068,7 @@
 	.lock		= gfs2_lock,
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= gfs2_setlease,
 	.fallocate	= gfs2_fallocate,
 };
@@ -1090,17 +1088,17 @@
 
 const struct file_operations gfs2_file_fops_nolock = {
 	.llseek		= gfs2_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= gfs2_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= gfs2_file_write_iter,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
 	.open		= gfs2_open,
 	.release	= gfs2_release,
 	.fsync		= gfs2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= generic_setlease,
 	.fallocate	= gfs2_fallocate,
 };

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9e2fecd..d0929bc 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c

@@ -125,15 +125,15 @@
 }
 
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 hfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -141,7 +141,7 @@
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			hfs_write_failed(mapping, end);
@@ -674,10 +674,10 @@
 
 static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfs_file_fsync,

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a4f45bd..0cf786f 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c

@@ -123,14 +123,15 @@
 }
 
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 
 				 hfsplus_get_block);
 
 	/*
@@ -139,7 +140,7 @@
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			hfsplus_write_failed(mapping, end);
@@ -340,10 +341,10 @@
 
 static const struct file_operations hfsplus_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfsplus_file_fsync,

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 9c470fd..bb529f3 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c

@@ -378,11 +378,11 @@
 
 static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
+	.read		= new_sync_read,
 	.splice_read	= generic_file_splice_read,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
-	.write		= do_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
+	.write		= new_sync_write,
 	.mmap		= generic_file_mmap,
 	.open		= hostfs_file_open,
 	.release	= hostfs_file_release,

diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 67c1a61..7f54e5f 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c

@@ -197,10 +197,10 @@
 const struct file_operations hpfs_file_ops =
 {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,

diff --git a/fs/inode.c b/fs/inode.c
index 2feb9b6..6eecb7f 100644
--- a/fs/inode.c
+++ b/fs/inode.c

@@ -1839,14 +1839,18 @@
  * inode_owner_or_capable - check current task permissions to inode
  * @inode: inode being checked
  *
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
  */
 bool inode_owner_or_capable(const struct inode *inode)
 {
+	struct user_namespace *ns;
+
 	if (uid_eq(current_fsuid(), inode->i_uid))
 		return true;
-	if (inode_capable(inode, CAP_FOWNER))
+
+	ns = current_user_ns();
+	if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
 		return true;
 	return false;
 }

diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 25c713e..8898bbd 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c

@@ -231,19 +231,15 @@
 
 static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
 {
-	int shift = 0;
-	int tmp = hash_size;
+	int i;
 	struct jbd_revoke_table_s *table;
 
 	table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
 	if (!table)
 		goto out;
 
-	while((tmp >>= 1UL) != 0UL)
-		shift++;
-
 	table->hash_size = hash_size;
-	table->hash_shift = shift;
+	table->hash_shift = ilog2(hash_size);
 	table->hash_table =
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
 	if (!table->hash_table) {
@@ -252,8 +248,8 @@
 		goto out;
 	}
 
-	for (tmp = 0; tmp < hash_size; tmp++)
-		INIT_LIST_HEAD(&table->hash_table[tmp]);
+	for (i = 0; i < hash_size; i++)
+		INIT_LIST_HEAD(&table->hash_table[i]);
 
 out:
 	return table;

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 256cd19..64989ca 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c

@@ -51,10 +51,10 @@
 {
 	.llseek =	generic_file_llseek,
 	.open =		generic_file_open,
- 	.read =		do_sync_read,
- 	.aio_read =	generic_file_aio_read,
- 	.write =	do_sync_write,
- 	.aio_write =	generic_file_aio_write,
+ 	.read =		new_sync_read,
+ 	.read_iter =	generic_file_read_iter,
+ 	.write =	new_sync_write,
+ 	.write_iter =	generic_file_write_iter,
 	.unlocked_ioctl=jffs2_ioctl,
 	.mmap =		generic_file_readonly_mmap,
 	.fsync =	jffs2_fsync,

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 794da94..33aa0cc 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c

@@ -151,13 +151,13 @@
 const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
-	.write		= do_sync_write,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
 	.unlocked_ioctl = jfs_ioctl,

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f8fe72..bd3df1c 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c

@@ -331,15 +331,15 @@
 }
 
 static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
-	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+	struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 jfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -347,7 +347,7 @@
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			jfs_write_failed(mapping, end);

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 00ec0b9..d3e40db 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c

@@ -14,6 +14,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs3.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 9a55797..3e9f787 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c

@@ -15,6 +15,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index de051cb1..8f27c93 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c

@@ -622,8 +622,8 @@
 err_pernet:
 #ifdef CONFIG_SYSCTL
 	unregister_sysctl_table(nlm_sysctl_table);
-#endif
 err_sysctl:
+#endif
 	return err;
 }
 

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dc5c759..b6f3b84 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c

@@ -14,12 +14,11 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/nfsd/nfsfh.h>
-#include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <uapi/linux/nfs2.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
 

diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 964666c..9340e7e 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c

@@ -16,6 +16,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 

diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 57914fc..8538752 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c

@@ -264,15 +264,15 @@
 };
 
 const struct file_operations logfs_reg_fops = {
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= logfs_fsync,
 	.unlocked_ioctl	= logfs_ioctl,
 	.llseek		= generic_file_llseek,
 	.mmap		= generic_file_readonly_mmap,
 	.open		= generic_file_open,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
 };
 
 const struct address_space_operations logfs_reg_aops = {

diff --git a/fs/minix/file.c b/fs/minix/file.c
index adc6f54..a967de0 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c

@@ -14,10 +14,10 @@
  */
 const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,

diff --git a/fs/namei.c b/fs/namei.c
index 8016827..985c6f3 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -332,10 +332,11 @@
 
 	if (S_ISDIR(inode->i_mode)) {
 		/* DACs are overridable for directories */
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 		if (!(mask & MAY_WRITE))
-			if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+			if (capable_wrt_inode_uidgid(inode,
+						     CAP_DAC_READ_SEARCH))
 				return 0;
 		return -EACCES;
 	}
@@ -345,7 +346,7 @@
 	 * at least one exec bit set.
 	 */
 	if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 
 	/*
@@ -353,7 +354,7 @@
 	 */
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 	if (mask == MAY_READ)
-		if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
 			return 0;
 
 	return -EACCES;
@@ -2379,7 +2380,7 @@
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
-	return !inode_capable(inode, CAP_FOWNER);
+	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
 
 /*

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 03192a6..4782e08 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile

@@ -29,8 +29,6 @@
 nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
 nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
 
-obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
-nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
-
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
 obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 obj-$(CONFIG_PNFS_BLOCK) += blocklayout/

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849b..9b431f4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c

@@ -210,7 +210,7 @@
 			SetPageUptodate(bvec->bv_page);
 
 	if (err) {
-		struct nfs_read_data *rdata = par->data;
+		struct nfs_pgio_data *rdata = par->data;
 		struct nfs_pgio_header *header = rdata->header;
 
 		if (!header->pnfs_error)
@@ -224,17 +224,17 @@
 static void bl_read_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 	pnfs_ld_read_done(rdata);
 }
 
 static void
 bl_end_par_io_read(void *data, int unused)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rdata->task.tk_status = rdata->header->pnfs_error;
 	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@
 }
 
 static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *header = rdata->header;
 	int i, hole;
@@ -390,7 +390,7 @@
 	}
 
 	if (unlikely(err)) {
-		struct nfs_write_data *data = par->data;
+		struct nfs_pgio_data *data = par->data;
 		struct nfs_pgio_header *header = data->header;
 
 		if (!header->pnfs_error)
@@ -405,7 +405,7 @@
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_write_data *data = par->data;
+	struct nfs_pgio_data *data = par->data;
 	struct nfs_pgio_header *header = data->header;
 
 	if (!uptodate) {
@@ -423,10 +423,10 @@
 static void bl_write_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 	if (likely(!wdata->header->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
 		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@
 /* Called when last of bios associated with a bl_write_pagelist call finishes */
 static void bl_end_par_io_write(void *data, int num_se)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(wdata->header->pnfs_error)) {
 		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@
 }
 
 static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 {
 	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
@@ -1189,13 +1189,17 @@
 		pnfs_generic_pg_init_read(pgio, req);
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, SECTOR_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
@@ -1241,13 +1245,17 @@
 	}
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		 struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, PAGE_CACHE_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae..8f98138 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c

@@ -108,6 +108,97 @@
 	return atomic_dec_and_test(&dreq->io_count);
 }
 
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+		       struct nfs_client *ds_clp,
+		       int ds_idx)
+{
+	struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+	if (ds_clp) {
+		/* pNFS is in use, use the DS verf */
+		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+		else
+			WARN_ON_ONCE(1);
+	}
+#endif
+	return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+				    struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+				      hdr->data->ds_idx);
+	WARN_ON_ONCE(verfp->committed >= 0);
+	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+	WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+					  struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+					 hdr->data->ds_idx);
+	if (verfp->committed < 0) {
+		nfs_direct_set_hdr_verf(dreq, hdr);
+		return 0;
+	}
+	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+					   struct nfs_commit_data *data)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+					 data->ds_commit_index);
+	WARN_ON_ONCE(verfp->committed < 0);
+	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -121,20 +212,20 @@
  * shunt off direct read and write requests before the VFS gets them,
  * so this method is only ever called for swap.
  */
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 #ifndef CONFIG_NFS_SWAP
 	dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-			iocb->ki_filp, (long long) pos, nr_segs);
+			iocb->ki_filp, (long long) pos, iter->nr_segs);
 
 	return -EINVAL;
 #else
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
@@ -168,6 +259,7 @@
 	kref_get(&dreq->kref);
 	init_completion(&dreq->completion);
 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
 	spin_lock_init(&dreq->lock);
 
@@ -322,66 +414,42 @@
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
-						const struct iovec *iov,
-						loff_t pos, bool uio)
+
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+					      struct iov_iter *iter,
+					      loff_t pos)
 {
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t rsize = NFS_SERVER(inode)->rsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
+	struct nfs_pageio_descriptor desc;
+	struct inode *inode = dreq->inode;
+	ssize_t result = -EINVAL;
+	size_t requested_bytes = 0;
+	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
 
-	do {
+	nfs_pageio_init_read(&desc, dreq->inode, false,
+			     &nfs_direct_read_completion_ops);
+	get_dreq(dreq);
+	desc.pg_dreq = dreq;
+	atomic_inc(&inode->i_dio_count);
+
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
 		size_t bytes;
-		int i;
+		size_t pgbase;
+		unsigned npages, i;
 
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
-
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *),
-					  GFP_KERNEL);
-		if (!pagevec)
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  rsize, &pgbase);
+		if (result < 0)
 			break;
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-					npages, 1, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 1, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
-
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
-
+	
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -389,56 +457,21 @@
 			}
 			req->wb_index = pos >> PAGE_SHIFT;
 			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
 				nfs_release_request(req);
 				break;
 			}
 			pgbase = 0;
 			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
+			requested_bytes += req_len;
 			pos += req_len;
-			count -= req_len;
 			dreq->bytes_left -= req_len;
 		}
-		/* The nfs_page now hold references to these pages */
 		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      const struct iovec *iov,
-					      unsigned long nr_segs,
-					      loff_t pos, bool uio)
-{
-	struct nfs_pageio_descriptor desc;
-	struct inode *inode = dreq->inode;
-	ssize_t result = -EINVAL;
-	size_t requested_bytes = 0;
-	unsigned long seg;
-
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
-			     &nfs_direct_read_completion_ops);
-	get_dreq(dreq);
-	desc.pg_dreq = dreq;
-	atomic_inc(&inode->i_dio_count);
-
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+		kvfree(pagevec);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
-			break;
-		pos += vec->iov_len;
 	}
 
 	nfs_pageio_complete(&desc);
@@ -461,8 +494,7 @@
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers into which to read data
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
@@ -479,8 +511,8 @@
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -488,9 +520,7 @@
 	struct nfs_direct_req *dreq;
 	struct nfs_lock_context *l_ctx;
 	ssize_t result = -EINVAL;
-	size_t count;
-
-	count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
 	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -513,7 +543,7 @@
 		goto out_unlock;
 
 	dreq->inode = inode;
-	dreq->bytes_left = iov_length(iov, nr_segs);
+	dreq->bytes_left = count;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
 	if (IS_ERR(l_ctx)) {
@@ -524,8 +554,8 @@
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	NFS_I(inode)->read_io += iov_length(iov, nr_segs);
-	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	NFS_I(inode)->read_io += count;
+	result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -564,7 +594,7 @@
 	dreq->count = 0;
 	get_dreq(dreq);
 
-	NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
@@ -603,7 +633,7 @@
 		dprintk("NFS: %5u commit failed with error %d.\n",
 			data->task.tk_pid, status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
 		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
@@ -681,109 +711,6 @@
 }
 #endif
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes.  Write length accounting is
- * handled automatically by nfs_direct_write_result().  Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
-						 const struct iovec *iov,
-						 loff_t pos, bool uio)
-{
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t wsize = NFS_SERVER(inode)->wsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
-
-	do {
-		size_t bytes;
-		int i;
-
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
-		if (!pagevec)
-			break;
-
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-						npages, 0, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 0, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
-
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
-
-		for (i = 0; i < npages; i++) {
-			struct nfs_page *req;
-			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
-						 pgbase, req_len);
-			if (IS_ERR(req)) {
-				result = PTR_ERR(req);
-				break;
-			}
-			nfs_lock_request(req);
-			req->wb_index = pos >> PAGE_SHIFT;
-			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
-				nfs_unlock_and_release_request(req);
-				break;
-			}
-			pgbase = 0;
-			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
-			pos += req_len;
-			count -= req_len;
-			dreq->bytes_left -= req_len;
-		}
-		/* The nfs_page now hold references to these pages */
-		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
@@ -813,13 +740,13 @@
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, hdr->verf,
-				       sizeof(dreq->verf));
+				nfs_direct_set_hdr_verf(dreq, hdr);
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
-					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+					dreq->flags =
+						NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
 					bit = NFS_IOHDR_NEED_COMMIT;
@@ -829,6 +756,8 @@
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
+		bool do_destroy = true;
+
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
 		switch (bit) {
@@ -836,6 +765,7 @@
 		case NFS_IOHDR_NEED_COMMIT:
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+			do_destroy = false;
 		}
 		nfs_unlock_and_release_request(req);
 	}
@@ -863,33 +793,77 @@
 	.completion = nfs_direct_write_completion,
 };
 
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes.  Write length accounting is
+ * handled automatically by nfs_direct_write_result().  Otherwise, if
+ * no requests have been sent, just return an error.
+ */
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
-					       const struct iovec *iov,
-					       unsigned long nr_segs,
-					       loff_t pos, bool uio)
+					       struct iov_iter *iter,
+					       loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
-	unsigned long seg;
+	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
-	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
 	atomic_inc(&inode->i_dio_count);
 
-	NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+	NFS_I(inode)->write_io += iov_iter_count(iter);
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
+		size_t bytes;
+		size_t pgbase;
+		unsigned npages, i;
+
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  wsize, &pgbase);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
+
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				result = PTR_ERR(req);
+				break;
+			}
+			nfs_lock_request(req);
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
+				nfs_unlock_and_release_request(req);
+				break;
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			requested_bytes += req_len;
+			pos += req_len;
+			dreq->bytes_left -= req_len;
+		}
+		nfs_direct_release_pages(pagevec, npages);
+		kvfree(pagevec);
+		if (result < 0)
 			break;
-		pos += vec->iov_len;
 	}
 	nfs_pageio_complete(&desc);
 
@@ -911,8 +885,7 @@
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
@@ -930,8 +903,8 @@
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -940,9 +913,7 @@
 	struct nfs_direct_req *dreq;
 	struct nfs_lock_context *l_ctx;
 	loff_t end;
-	size_t count;
-
-	count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 	end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
 
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -993,7 +964,7 @@
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c1edf73..4042ff5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c

@@ -165,22 +165,21 @@
 EXPORT_SYMBOL_GPL(nfs_file_flush);
 
 ssize_t
-nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
 
-	dprintk("NFS: read(%pD2, %lu@%lu)\n",
+	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
-		(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
-		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
@@ -635,24 +634,24 @@
 	return 0;
 }
 
-ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	unsigned long written = 0;
 	ssize_t result;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(from);
+	loff_t pos = iocb->ki_pos;
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
 
 	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_write(iocb, from, pos, true);
 
-	dprintk("NFS: write(%pD2, %lu@%Ld)\n",
-		file, (unsigned long) count, (long long) pos);
+	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+		file, count, (long long) pos);
 
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
@@ -670,7 +669,7 @@
 	if (!count)
 		goto out;
 
-	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	result = generic_file_write_iter(iocb, from);
 	if (result > 0)
 		written = result;
 
@@ -691,36 +690,6 @@
 }
 EXPORT_SYMBOL_GPL(nfs_file_write);
 
-ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
-			      struct file *filp, loff_t *ppos,
-			      size_t count, unsigned int flags)
-{
-	struct inode *inode = file_inode(filp);
-	unsigned long written = 0;
-	ssize_t ret;
-
-	dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
-		filp, (unsigned long) count, (unsigned long long) *ppos);
-
-	/*
-	 * The combination of splice and an O_APPEND destination is disallowed.
-	 */
-
-	ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
-	if (ret > 0)
-		written = ret;
-
-	if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
-		int err = vfs_fsync(filp, 0);
-		if (err < 0)
-			ret = err;
-	}
-	if (ret > 0)
-		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_write);
-
 static int
 do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
@@ -935,10 +904,10 @@
 
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= nfs_file_read,
-	.aio_write	= nfs_file_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= nfs_file_read,
+	.write_iter	= nfs_file_write,
 	.mmap		= nfs_file_mmap,
 	.open		= nfs_file_open,
 	.flush		= nfs_file_flush,
@@ -947,7 +916,7 @@
 	.lock		= nfs_lock,
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
-	.splice_write	= nfs_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
 	.setlease	= nfs_setlease,
 };

diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 0000000..8516cdf
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile

@@ -0,0 +1,5 @@
+#
+# Makefile for the pNFS Files Layout Driver kernel module
+#
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
similarity index 90%
rename from fs/nfs/nfs4filelayout.c
rename to fs/nfs/filelayout/filelayout.c
index b9a35c0..d2eba1c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c

@@ -35,11 +35,11 @@
 
 #include <linux/sunrpc/metrics.h>
 
-#include "nfs4session.h"
-#include "internal.h"
-#include "delegation.h"
-#include "nfs4filelayout.h"
-#include "nfs4trace.h"
+#include "../nfs4session.h"
+#include "../internal.h"
+#include "../delegation.h"
+#include "filelayout.h"
+#include "../nfs4trace.h"
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
@@ -84,7 +84,7 @@
 	BUG();
 }
 
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@
 	}
 }
 
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@
 /* NFS_PROTO call done callback routines */
 
 static int filelayout_read_done_cb(struct rpc_task *task,
-				struct nfs_read_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -270,7 +270,7 @@
  * rfc5661 is not clear about which credential should be used.
  */
 static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 
@@ -279,7 +279,7 @@
 		return;
 
 	pnfs_set_layoutcommit(wdata);
-	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
 		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
@@ -305,7 +305,7 @@
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@
 		rpc_exit(task, 0);
 		return;
 	}
-	rdata->read_done_cb = filelayout_read_done_cb;
+	rdata->pgio_done_cb = filelayout_read_done_cb;
 
 	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
 			&rdata->args.seq_args,
@@ -331,7 +331,7 @@
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
@@ -347,14 +347,14 @@
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
-				struct nfs_write_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -419,7 +419,7 @@
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
 	    task->tk_status == 0) {
@@ -457,14 +457,14 @@
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@
 };
 
 static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -560,6 +560,7 @@
 	/* No multipath support. Use first DS */
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
@@ -568,14 +569,14 @@
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_read(ds_clnt, data,
-				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+	nfs_initiate_pgio(ds_clnt, data,
+			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
 
 /* Perform async writes. */
 static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,20 +601,18 @@
 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
-	data->write_done_cb = filelayout_write_done_cb;
+	data->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
-	/*
-	 * Get the file offset on the dserver. Set the write offset to
-	 * this offset and save the original offset.
-	 */
+
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_write(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, data,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
@@ -637,7 +636,6 @@
 	struct nfs4_deviceid_node *d;
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
-	struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
 
 	dprintk("--> %s\n", __func__);
 
@@ -655,7 +653,7 @@
 		goto out;
 	}
 
-	if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
+	if (!fl->stripe_unit) {
 		dprintk("%s Invalid stripe unit (%u)\n",
 			__func__, fl->stripe_unit);
 		goto out;
@@ -692,12 +690,6 @@
 		goto out_put;
 	}
 
-	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
-		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
-			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
-			nfss->wsize);
-	}
-
 	status = 0;
 out:
 	dprintk("--> %s returns %d\n", __func__, status);
@@ -850,11 +842,15 @@
 {
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 	struct pnfs_commit_bucket *buckets;
-	int size;
+	int size, i;
 
 	if (fl->commit_through_mds)
 		return 0;
-	if (cinfo->ds->nbuckets != 0) {
+
+	size = (fl->stripe_type == STRIPE_SPARSE) ?
+		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+	if (cinfo->ds->nbuckets >= size) {
 		/* This assumes there is only one IOMODE_RW lseg.  What
 		 * we really want to do is have a layout_hdr level
 		 * dictionary of <multipath_list4, fh> keys, each
@@ -864,30 +860,36 @@
 		return 0;
 	}
 
-	size = (fl->stripe_type == STRIPE_SPARSE) ?
-		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
 	buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
 			  gfp_flags);
 	if (!buckets)
 		return -ENOMEM;
-	else {
-		int i;
-
-		spin_lock(cinfo->lock);
-		if (cinfo->ds->nbuckets != 0)
-			kfree(buckets);
-		else {
-			cinfo->ds->buckets = buckets;
-			cinfo->ds->nbuckets = size;
-			for (i = 0; i < size; i++) {
-				INIT_LIST_HEAD(&buckets[i].written);
-				INIT_LIST_HEAD(&buckets[i].committing);
-			}
-		}
-		spin_unlock(cinfo->lock);
-		return 0;
+	for (i = 0; i < size; i++) {
+		INIT_LIST_HEAD(&buckets[i].written);
+		INIT_LIST_HEAD(&buckets[i].committing);
+		/* mark direct verifier as unset */
+		buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
 	}
+
+	spin_lock(cinfo->lock);
+	if (cinfo->ds->nbuckets >= size)
+		goto out;
+	for (i = 0; i < cinfo->ds->nbuckets; i++) {
+		list_splice(&cinfo->ds->buckets[i].written,
+			    &buckets[i].written);
+		list_splice(&cinfo->ds->buckets[i].committing,
+			    &buckets[i].committing);
+		buckets[i].direct_verf.committed =
+			cinfo->ds->buckets[i].direct_verf.committed;
+		buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
+		buckets[i].clseg = cinfo->ds->buckets[i].clseg;
+	}
+	swap(cinfo->ds->buckets, buckets);
+	cinfo->ds->nbuckets = size;
+out:
+	spin_unlock(cinfo->lock);
+	kfree(buckets);
+	return 0;
 }
 
 static struct pnfs_layout_segment *
@@ -915,47 +917,51 @@
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return true  : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
  */
-static bool
+static size_t
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
+	unsigned int size;
 	u64 p_stripe, r_stripe;
-	u32 stripe_unit;
+	u32 stripe_offset;
+	u64 segment_offset = pgio->pg_lseg->pls_range.offset;
+	u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req) ||
-	    !nfs_generic_pg_test(pgio, prev, req))
-		return false;
+	/* calls nfs_generic_pg_test */
+	size = pnfs_generic_pg_test(pgio, prev, req);
+	if (!size)
+		return 0;
 
-	p_stripe = (u64)req_offset(prev);
-	r_stripe = (u64)req_offset(req);
-	stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+	/* see if req and prev are in the same stripe */
+	if (prev) {
+		p_stripe = (u64)req_offset(prev) - segment_offset;
+		r_stripe = (u64)req_offset(req) - segment_offset;
+		do_div(p_stripe, stripe_unit);
+		do_div(r_stripe, stripe_unit);
 
-	do_div(p_stripe, stripe_unit);
-	do_div(r_stripe, stripe_unit);
+		if (p_stripe != r_stripe)
+			return 0;
+	}
 
-	return (p_stripe == r_stripe);
+	/* calculate remaining bytes in the current stripe */
+	div_u64_rem((u64)req_offset(req) - segment_offset,
+			stripe_unit,
+			&stripe_offset);
+	WARN_ON_ONCE(stripe_offset > stripe_unit);
+	if (stripe_offset >= stripe_unit)
+		return 0;
+	return min(stripe_unit - (unsigned int)stripe_offset, size);
 }
 
 static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
-	WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
-	if (req->wb_offset != req->wb_pgbase) {
-		/*
-		 * Handling unaligned pages is difficult, because have to
-		 * somehow split a req in two in certain cases in the
-		 * pg.test code.  Avoid this by just not using pnfs
-		 * in this case.
-		 */
-		nfs_pageio_reset_read_mds(pgio);
-		return;
-	}
-	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+	if (!pgio->pg_lseg)
+		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   0,
 					   NFS4_MAX_UINT64,
@@ -973,11 +979,8 @@
 	struct nfs_commit_info cinfo;
 	int status;
 
-	WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
-	if (req->wb_offset != req->wb_pgbase)
-		goto out_mds;
-	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+	if (!pgio->pg_lseg)
+		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   0,
 					   NFS4_MAX_UINT64,
@@ -1067,6 +1070,7 @@
 	 */
 	j = nfs4_fl_calc_j_index(lseg, req_offset(req));
 	i = select_bucket_index(fl, j);
+	spin_lock(cinfo->lock);
 	buckets = cinfo->ds->buckets;
 	list = &buckets[i].written;
 	if (list_empty(list)) {
@@ -1080,6 +1084,7 @@
 	}
 	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 	cinfo->ds->nwritten++;
+	spin_unlock(cinfo->lock);
 	return list;
 }
 
@@ -1176,6 +1181,7 @@
 	return ret;
 }
 
+/* Note called with cinfo->lock held. */
 static int
 filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
 			       struct nfs_commit_info *cinfo,
@@ -1220,15 +1226,18 @@
 					   struct nfs_commit_info *cinfo)
 {
 	struct pnfs_commit_bucket *b;
+	struct pnfs_layout_segment *freeme;
 	int i;
 
+restart:
 	spin_lock(cinfo->lock);
 	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
 		if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
-			spin_unlock(cinfo->lock);
-			pnfs_put_lseg(b->wlseg);
+			freeme = b->wlseg;
 			b->wlseg = NULL;
-			spin_lock(cinfo->lock);
+			spin_unlock(cinfo->lock);
+			pnfs_put_lseg(freeme);
+			goto restart;
 		}
 	}
 	cinfo->ds->nwritten = 0;
@@ -1243,6 +1252,7 @@
 	struct nfs_commit_data *data;
 	int i, j;
 	unsigned int nreq = 0;
+	struct pnfs_layout_segment *freeme;
 
 	fl_cinfo = cinfo->ds;
 	bucket = fl_cinfo->buckets;
@@ -1253,8 +1263,10 @@
 		if (!data)
 			break;
 		data->ds_commit_index = i;
+		spin_lock(cinfo->lock);
 		data->lseg = bucket->clseg;
 		bucket->clseg = NULL;
+		spin_unlock(cinfo->lock);
 		list_add(&data->pages, list);
 		nreq++;
 	}
@@ -1264,8 +1276,11 @@
 		if (list_empty(&bucket->committing))
 			continue;
 		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
-		pnfs_put_lseg(bucket->clseg);
+		spin_lock(cinfo->lock);
+		freeme = bucket->clseg;
 		bucket->clseg = NULL;
+		spin_unlock(cinfo->lock);
+		pnfs_put_lseg(freeme);
 	}
 	/* Caller will clean up entries put on list */
 	return nreq;
@@ -1330,7 +1345,7 @@
 	struct nfs4_filelayout *flo;
 
 	flo = kzalloc(sizeof(*flo), gfp_flags);
-	return &flo->generic_hdr;
+	return flo != NULL ? &flo->generic_hdr : NULL;
 }
 
 static void

diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
similarity index 99%
rename from fs/nfs/nfs4filelayout.h
rename to fs/nfs/filelayout/filelayout.h
index cebd20e..ffbddf2 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h

@@ -30,7 +30,7 @@
 #ifndef FS_NFS_NFS4FILELAYOUT_H
 #define FS_NFS_NFS4FILELAYOUT_H
 
-#include "pnfs.h"
+#include "../pnfs.h"
 
 /*
  * Default data server connection timeout and retrans vaules.

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
similarity index 99%
rename from fs/nfs/nfs4filelayoutdev.c
rename to fs/nfs/filelayout/filelayoutdev.c
index b9c61ef..44bf014 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c

@@ -33,9 +33,9 @@
 #include <linux/module.h>
 #include <linux/sunrpc/addr.h>
 
-#include "internal.h"
-#include "nfs4session.h"
-#include "nfs4filelayout.h"
+#include "../internal.h"
+#include "../nfs4session.h"
+#include "filelayout.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
 

diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 66984a9..b94f804 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c

@@ -120,7 +120,8 @@
 
 	security_d_instantiate(ret, inode);
 	spin_lock(&ret->d_lock);
-	if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+	if (IS_ROOT(ret) && !ret->d_fsdata &&
+	    !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
 		ret->d_fsdata = name;
 		name = NULL;
 	}

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e6f7398..c496f8a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -1575,18 +1575,20 @@
 			inode->i_version = fattr->change_attr;
 		}
 	} else if (server->caps & NFS_CAP_CHANGE_ATTR)
-		invalid |= save_cache_validity;
+		nfsi->cache_validity |= save_cache_validity;
 
 	if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
 	} else if (server->caps & NFS_CAP_MTIME)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
 		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
 	} else if (server->caps & NFS_CAP_CTIME)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_REVAL_FORCED);
 
 	/* Check if our cached file size is stale */
@@ -1608,7 +1610,8 @@
 					(long long)new_isize);
 		}
 	} else
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_REVAL_PAGECACHE
 				| NFS_INO_REVAL_FORCED);
 
@@ -1616,7 +1619,8 @@
 	if (fattr->valid & NFS_ATTR_FATTR_ATIME)
 		memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 	else if (server->caps & NFS_CAP_ATIME)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATIME
 				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1627,7 +1631,8 @@
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 		}
 	} else if (server->caps & NFS_CAP_MODE)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
 				| NFS_INO_REVAL_FORCED);
@@ -1638,7 +1643,8 @@
 			inode->i_uid = fattr->uid;
 		}
 	} else if (server->caps & NFS_CAP_OWNER)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
 				| NFS_INO_REVAL_FORCED);
@@ -1649,7 +1655,8 @@
 			inode->i_gid = fattr->gid;
 		}
 	} else if (server->caps & NFS_CAP_OWNER_GROUP)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
 				| NFS_INO_REVAL_FORCED);
@@ -1662,7 +1669,8 @@
 			set_nlink(inode, fattr->nlink);
 		}
 	} else if (server->caps & NFS_CAP_NLINK)
-		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_ATTR
 				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2..82ddbf4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -231,13 +231,20 @@
 
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
-extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
 extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr,
 			      void (*release)(struct nfs_pgio_header *hdr));
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 int nfs_iocounter_wait(struct nfs_io_counter *c);
 
+extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_release(struct nfs_pgio_data *);
+int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+		      const struct rpc_call_ops *, int, int);
+
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
 	c->flags = 0;
@@ -320,16 +327,14 @@
 int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
 loff_t nfs_file_llseek(struct file *, loff_t, int);
 int nfs_file_flush(struct file *, fl_owner_t);
-ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
 ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
 			     size_t, unsigned int);
 int nfs_file_mmap(struct file *, struct vm_area_struct *);
-ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
 int nfs_file_release(struct inode *, struct file *);
 int nfs_lock(struct file *, int, struct file_lock *);
 int nfs_flock(struct file *, int, struct file_lock *);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
-			      size_t, unsigned int);
 int nfs_check_flags(int);
 int nfs_setlease(struct file *, long, struct file_lock **);
 
@@ -395,19 +400,11 @@
 
 struct nfs_pgio_completion_ops;
 /* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode,
+			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
-			     struct nfs_read_data *data,
-			     const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
-			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* super.c */
 void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -422,19 +419,10 @@
 
 /* write.c */
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode, int ioflags,
+			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
-extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
-			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
-			      struct nfs_write_data *data,
-			      const struct rpc_call_ops *call_ops,
-			      int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -447,6 +435,7 @@
 			    struct nfs_commit_info *cinfo);
 int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
 			 struct nfs_commit_info *cinfo, int max);
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
 int nfs_scan_commit(struct inode *inode, struct list_head *dst,
 		    struct nfs_commit_info *cinfo);
 void nfs_mark_request_commit(struct nfs_page *req,
@@ -492,7 +481,7 @@
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr);

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136..5f61b83 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c

@@ -103,7 +103,7 @@
 /*
  *	typedef opaque	nfsdata<>;
  */
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
 {
 	u32 recvd, count;
 	__be32 *p;
@@ -613,7 +613,7 @@
  *	};
  */
 static void encode_readargs(struct xdr_stream *xdr,
-			    const struct nfs_readargs *args)
+			    const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -629,7 +629,7 @@
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_readargs *args)
+				  const struct nfs_pgio_args *args)
 {
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@
  *	};
  */
 static void encode_writeargs(struct xdr_stream *xdr,
-			     const struct nfs_writeargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -669,7 +669,7 @@
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_writeargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
@@ -857,7 +857,7 @@
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_readres *result)
+				struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -878,7 +878,7 @@
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_writeres *result)
+				 struct nfs_pgio_res *result)
 {
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149..e7daa42 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c

@@ -795,7 +795,7 @@
 	return status;
 }
 
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -807,18 +807,18 @@
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -829,17 +829,11 @@
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	rpc_call_start(task);
@@ -946,13 +940,10 @@
 	.fsinfo		= nfs3_proc_fsinfo,
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
+	.pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
 	.read_setup	= nfs3_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
-	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
-	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
 	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d721..8f4cbe7 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c

@@ -953,7 +953,7 @@
  *	};
  */
 static void encode_read3args(struct xdr_stream *xdr,
-			     const struct nfs_readargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -966,7 +966,7 @@
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_readargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@
  *	};
  */
 static void encode_write3args(struct xdr_stream *xdr,
-			      const struct nfs_writeargs *args)
+			      const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -1008,7 +1008,7 @@
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_writeargs *args)
+				    const struct nfs_pgio_args *args)
 {
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
@@ -1589,7 +1589,7 @@
  *	};
  */
 static int decode_read3resok(struct xdr_stream *xdr,
-			     struct nfs_readres *result)
+			     struct nfs_pgio_res *result)
 {
 	u32 eof, count, ocount, recvd;
 	__be32 *p;
@@ -1625,7 +1625,7 @@
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_readres *result)
+				 struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1673,7 +1673,7 @@
  *	};
  */
 static int decode_write3resok(struct xdr_stream *xdr,
-			      struct nfs_writeres *result)
+			      struct nfs_pgio_res *result)
 {
 	__be32 *p;
 
@@ -1697,7 +1697,7 @@
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_writeres *result)
+				  struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1bad..f63cb87 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h

@@ -337,7 +337,7 @@
  */
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
 	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@
 
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407..a816f06 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c

@@ -100,8 +100,7 @@
 			break;
 		mutex_lock(&inode->i_mutex);
 		ret = nfs_file_fsync_commit(file, start, end, datasync);
-		if (!ret && !datasync)
-			/* application has asked for meta-data sync */
+		if (!ret)
 			ret = pnfs_layoutcommit_inode(inode, true);
 		mutex_unlock(&inode->i_mutex);
 		/*
@@ -118,10 +117,10 @@
 
 const struct file_operations nfs4_file_operations = {
 	.llseek		= nfs_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= nfs_file_read,
-	.aio_write	= nfs_file_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= nfs_file_read,
+	.write_iter	= nfs_file_write,
 	.mmap		= nfs_file_mmap,
 	.open		= nfs4_file_open,
 	.flush		= nfs_file_flush,
@@ -130,7 +129,7 @@
 	.lock		= nfs_lock,
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
-	.splice_write	= nfs_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
 	.setlease	= nfs_setlease,
 };

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39..285ad53 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -2027,7 +2027,7 @@
 			return status;
 	}
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		_nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
 	return 0;
 }
 
@@ -2750,7 +2750,7 @@
 
 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
@@ -4033,12 +4033,12 @@
 	return false;
 }
 
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
 {
 	nfs_invalidate_atime(data->header->inode);
 }
 
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4055,7 +4055,7 @@
 }
 
 static bool nfs4_read_stateid_changed(struct rpc_task *task,
-		struct nfs_readargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4068,7 +4068,7 @@
 	return true;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 
 	dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@
 		return -EAGAIN;
 	if (nfs4_read_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->read_done_cb ? data->read_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 				    nfs4_read_done_cb(task, data);
 }
 
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	data->timestamp   = jiffies;
-	data->read_done_cb = nfs4_read_done_cb;
+	data->pgio_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
@@ -4097,14 +4097,14 @@
 			task))
 		return 0;
 	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_READ) == -EIO)
+				data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
 		return -EIO;
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
 		return -EIO;
 	return 0;
 }
 
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 	
@@ -4121,7 +4121,7 @@
 }
 
 static bool nfs4_write_stateid_changed(struct rpc_task *task,
-		struct nfs_writeargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4134,18 +4134,18 @@
 	return true;
 }
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
 	if (nfs4_write_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->write_done_cb ? data->write_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 		nfs4_write_done_cb(task, data);
 }
 
 static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
 {
 	const struct nfs_pgio_header *hdr = data->header;
 
@@ -4158,7 +4158,7 @@
 	return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4168,8 +4168,8 @@
 	} else
 		data->args.bitmask = server->cache_consistency_bitmask;
 
-	if (!data->write_done_cb)
-		data->write_done_cb = nfs4_write_done_cb;
+	if (!data->pgio_done_cb)
+		data->pgio_done_cb = nfs4_write_done_cb;
 	data->res.server = server;
 	data->timestamp   = jiffies;
 
@@ -4177,21 +4177,6 @@
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-			&data->args.seq_args,
-			&data->res.seq_res,
-			task))
-		return 0;
-	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_WRITE) == -EIO)
-		return -EIO;
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
-		return -EIO;
-	return 0;
-}
-
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,13 +8417,10 @@
 	.pathconf	= nfs4_proc_pathconf,
 	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
+	.pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
 	.read_setup	= nfs4_proc_read_setup,
-	.read_pageio_init = pnfs_pageio_init_read,
-	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
-	.write_pageio_init = pnfs_pageio_init_write,
-	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
 	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c0583b9..848f685 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c

@@ -1456,7 +1456,7 @@
 	 * server that doesn't support a grace period.
 	 */
 	spin_lock(&sp->so_lock);
-	write_seqcount_begin(&sp->so_reclaim_seqcount);
+	raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
 restart:
 	list_for_each_entry(state, &sp->so_states, open_states) {
 		if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1519,13 +1519,13 @@
 		spin_lock(&sp->so_lock);
 		goto restart;
 	}
-	write_seqcount_end(&sp->so_reclaim_seqcount);
+	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
 	spin_unlock(&sp->so_lock);
 	return 0;
 out_err:
 	nfs4_put_open_state(state);
 	spin_lock(&sp->so_lock);
-	write_seqcount_end(&sp->so_reclaim_seqcount);
+	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
 	spin_unlock(&sp->so_lock);
 	return status;
 }

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf14..0a744f3 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h

@@ -932,7 +932,7 @@
 
 DECLARE_EVENT_CLASS(nfs4_read_event,
 		TP_PROTO(
-			const struct nfs_read_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -972,7 +972,7 @@
 #define DEFINE_NFS4_READ_EVENT(name) \
 	DEFINE_EVENT(nfs4_read_event, name, \
 			TP_PROTO( \
-				const struct nfs_read_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))
@@ -983,7 +983,7 @@
 
 DECLARE_EVENT_CLASS(nfs4_write_event,
 		TP_PROTO(
-			const struct nfs_write_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -1024,7 +1024,7 @@
 #define DEFINE_NFS4_WRITE_EVENT(name) \
 	DEFINE_EVENT(nfs4_write_event, name, \
 			TP_PROTO( \
-				const struct nfs_write_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4..939ae60 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -1556,7 +1556,8 @@
 	encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
 }
 
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1701,7 +1702,8 @@
 	encode_nfs4_verifier(xdr, &arg->confirm);
 }
 
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			 struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -2451,7 +2453,7 @@
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_readargs *args)
+			      struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_writeargs *args)
+			       struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -5085,7 +5087,8 @@
 	return decode_op_hdr(xdr, OP_PUTROOTFH);
 }
 
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+		       struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	uint32_t count, eof, recvd;
@@ -5339,7 +5342,7 @@
 	return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
 }
 
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	int status;
@@ -6636,7 +6639,7 @@
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_readres *res)
+			     struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6661,7 +6664,7 @@
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_writeres *res)
+			      struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745..6113207 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c

@@ -439,7 +439,7 @@
 	objlayout_read_done(&objios->oir, status, objios->sync);
 }
 
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
@@ -487,7 +487,7 @@
 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
-	struct nfs_write_data *wdata = objios->oir.rpcdata;
+	struct nfs_pgio_data *wdata = objios->oir.rpcdata;
 	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
 	struct page *page;
@@ -531,7 +531,7 @@
 	.put_page = &__r4w_put_page,
 };
 
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
@@ -564,14 +564,22 @@
 	return 0;
 }
 
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
 			  struct nfs_page *prev, struct nfs_page *req)
 {
-	if (!pnfs_generic_pg_test(pgio, prev, req))
-		return false;
+	unsigned int size;
 
-	return pgio->pg_count + req->wb_bytes <=
-			(unsigned long)pgio->pg_layout_private;
+	size = pnfs_generic_pg_test(pgio, prev, req);
+
+	if (!size || pgio->pg_count + req->wb_bytes >
+	    (unsigned long)pgio->pg_layout_private)
+		return 0;
+
+	return min(size, req->wb_bytes);
 }
 
 static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)

diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbf..765d3f5 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c

@@ -53,10 +53,10 @@
 	struct objlayout *objlay;
 
 	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
-	if (objlay) {
-		spin_lock_init(&objlay->lock);
-		INIT_LIST_HEAD(&objlay->err_list);
-	}
+	if (!objlay)
+		return NULL;
+	spin_lock_init(&objlay->lock);
+	INIT_LIST_HEAD(&objlay->err_list);
 	dprintk("%s: Return %p\n", __func__, objlay);
 	return &objlay->pnfs_layout;
 }
@@ -229,11 +229,11 @@
 static void _rpc_read_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_read_done(rdata);
 }
@@ -241,7 +241,7 @@
 void
 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_read_data *rdata = oir->rpcdata;
+	struct nfs_pgio_data *rdata = oir->rpcdata;
 
 	oir->status = rdata->task.tk_status = status;
 	if (status >= 0)
@@ -266,7 +266,7 @@
  * Perform sync or async reads.
  */
 enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@
 static void _rpc_write_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_write_done(wdata);
 }
@@ -324,7 +324,7 @@
 void
 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_write_data *wdata = oir->rpcdata;
+	struct nfs_pgio_data *wdata = oir->rpcdata;
 
 	oir->status = wdata->task.tk_status = status;
 	if (status >= 0) {
@@ -351,7 +351,7 @@
  * Perform sync or async writes.
  */
 enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
 			 int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;

diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1de..01e0410 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h

@@ -119,8 +119,8 @@
  */
 extern void objio_free_result(struct objlayout_io_res *oir);
 
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
 
 /*
  * callback API
@@ -168,10 +168,10 @@
 extern void objlayout_free_lseg(struct pnfs_layout_segment *);
 
 extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_read_data *);
+	struct nfs_pgio_data *);
 
 extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_write_data *,
+	struct nfs_pgio_data *,
 	int how);
 
 extern void objlayout_encode_layoutcommit(

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 03ed984..b6ee3a6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c

@@ -24,9 +24,14 @@
 #include "internal.h"
 #include "pnfs.h"
 
-static struct kmem_cache *nfs_page_cachep;
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+static struct kmem_cache *nfs_page_cachep;
+static const struct rpc_call_ops nfs_pgio_common_ops;
+
+static void nfs_free_request(struct nfs_page *);
+
+static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
 	p->npages = pagecount;
 	if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -133,11 +138,156 @@
 	return __nfs_iocounter_wait(c);
 }
 
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+	io_schedule();
+	return 0;
+}
+
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+			nfs_wait_bit_uninterruptible,
+			TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	smp_mb__before_atomic();
+	clear_bit(PG_HEADLOCK, &head->wb_flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+	struct nfs_page *head = req->wb_head;
+	struct nfs_page *tmp;
+
+	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+	WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+	tmp = req->wb_this_page;
+	while (tmp != req) {
+		if (!test_bit(bit, &tmp->wb_flags))
+			return false;
+		tmp = tmp->wb_this_page;
+	}
+
+	/* true! reset all bits */
+	tmp = req;
+	do {
+		clear_bit(bit, &tmp->wb_flags);
+		tmp = tmp->wb_this_page;
+	} while (tmp != req);
+
+	return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ *   return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+	bool ret;
+
+	nfs_page_group_lock(req);
+	ret = nfs_page_group_sync_on_bit_locked(req, bit);
+	nfs_page_group_unlock(req);
+
+	return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ *         or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+	WARN_ON_ONCE(prev == req);
+
+	if (!prev) {
+		req->wb_head = req;
+		req->wb_this_page = req;
+	} else {
+		WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+		WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+		req->wb_head = prev->wb_head;
+		req->wb_this_page = prev->wb_this_page;
+		prev->wb_this_page = req;
+
+		/* grab extra ref if head request has extra ref from
+		 * the write/commit path to handle handoff between write
+		 * and commit lists */
+		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+			kref_get(&req->wb_kref);
+	}
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	struct nfs_page *tmp, *next;
+
+	if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+		return;
+
+	tmp = req;
+	do {
+		next = tmp->wb_this_page;
+		/* unlink and free */
+		tmp->wb_this_page = tmp;
+		tmp->wb_head = tmp;
+		nfs_free_request(tmp);
+		tmp = next;
+	} while (tmp != req);
+}
+
 /**
  * nfs_create_request - Create an NFS read/write request.
  * @ctx: open context to use
- * @inode: inode to which the request is attached
  * @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
  * @offset: starting offset within the page for the write
  * @count: number of bytes to read/write
  *
@@ -146,9 +296,9 @@
  * User should ensure it is safe to sleep in this function.
  */
 struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
-		   struct page *page,
-		   unsigned int offset, unsigned int count)
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+		   struct nfs_page *last, unsigned int offset,
+		   unsigned int count)
 {
 	struct nfs_page		*req;
 	struct nfs_lock_context *l_ctx;
@@ -180,6 +330,7 @@
 	req->wb_bytes   = count;
 	req->wb_context = get_nfs_open_context(ctx);
 	kref_init(&req->wb_kref);
+	nfs_page_group_init(req, last);
 	return req;
 }
 
@@ -237,16 +388,22 @@
 	}
 }
 
-
 /**
  * nfs_release_request - Release the count on an NFS read/write request
  * @req: request to release
  *
  * Note: Should never be called with the spinlock held!
  */
-static void nfs_free_request(struct kref *kref)
+static void nfs_free_request(struct nfs_page *req)
 {
-	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	WARN_ON_ONCE(req->wb_this_page != req);
+
+	/* extra debug: make sure no sync bits are still set */
+	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
@@ -255,13 +412,7 @@
 
 void nfs_release_request(struct nfs_page *req)
 {
-	kref_put(&req->wb_kref, nfs_free_request);
-}
-
-static int nfs_wait_bit_uninterruptible(void *word)
-{
-	io_schedule();
-	return 0;
+	kref_put(&req->wb_kref, nfs_page_group_destroy);
 }
 
 /**
@@ -279,22 +430,249 @@
 			TASK_UNINTERRUPTIBLE);
 }
 
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+			   struct nfs_page *prev, struct nfs_page *req)
 {
-	/*
-	 * FIXME: ideally we should be able to coalesce all requests
-	 * that are not block boundary aligned, but currently this
-	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
-	 * since nfs_flush_multi and nfs_pagein_multi assume you
-	 * can have only one struct nfs_page.
-	 */
-	if (desc->pg_bsize < PAGE_SIZE)
+	if (desc->pg_count > desc->pg_bsize) {
+		/* should never happen */
+		WARN_ON_ONCE(1);
 		return 0;
+	}
 
-	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+	return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
 }
 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
+static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+{
+	return container_of(hdr, struct nfs_rw_header, header);
+}
+
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+	struct nfs_rw_header *header = ops->rw_alloc_header();
+
+	if (header) {
+		struct nfs_pgio_header *hdr = &header->header;
+
+		INIT_LIST_HEAD(&hdr->pages);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+		hdr->rw_ops = ops;
+	}
+	return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+	hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
+/**
+ * nfs_pgio_data_alloc - Allocate pageio data
+ * @hdr: The header making a request
+ * @pagecount: Number of pages to create
+ */
+static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
+						 unsigned int pagecount)
+{
+	struct nfs_pgio_data *data, *prealloc;
+
+	prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
+	if (prealloc->header == NULL)
+		data = prealloc;
+	else
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out;
+
+	if (nfs_pgarray_set(&data->pages, pagecount)) {
+		data->header = hdr;
+		atomic_inc(&hdr->refcnt);
+	} else {
+		if (data != prealloc)
+			kfree(data);
+		data = NULL;
+	}
+out:
+	return data;
+}
+
+/**
+ * nfs_pgio_data_release - Properly free pageio data
+ * @data: The data to release
+ */
+void nfs_pgio_data_release(struct nfs_pgio_data *data)
+{
+	struct nfs_pgio_header *hdr = data->header;
+	struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
+
+	put_nfs_open_context(data->args.context);
+	if (data->pages.pagevec != data->pages.page_array)
+		kfree(data->pages.pagevec);
+	if (data == &pageio_header->rpc_data) {
+		data->header = NULL;
+		data = NULL;
+	}
+	if (atomic_dec_and_test(&hdr->refcnt))
+		hdr->completion_ops->completion(hdr);
+	/* Note: we only free the rpc_task after callbacks are done.
+	 * See the comment in rpc_free_task() for why
+	 */
+	kfree(data);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+
+/**
+ * nfs_pgio_rpcsetup - Set up arguments for a pageio call
+ * @data: The pageio data
+ * @count: Number of bytes to read
+ * @offset: Initial offset
+ * @how: How to commit data (writes only)
+ * @cinfo: Commit information for the call (writes only)
+ */
+static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+			      unsigned int count, unsigned int offset,
+			      int how, struct nfs_commit_info *cinfo)
+{
+	struct nfs_page *req = data->header->req;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	data->args.fh     = NFS_FH(data->header->inode);
+	data->args.offset = req_offset(req) + offset;
+	/* pnfs_set_layoutcommit needs this */
+	data->mds_offset = data->args.offset;
+	data->args.pgbase = req->wb_pgbase + offset;
+	data->args.pages  = data->pages.pagevec;
+	data->args.count  = count;
+	data->args.context = get_nfs_open_context(req->wb_context);
+	data->args.lock_context = req->wb_lock_context;
+	data->args.stable  = NFS_UNSTABLE;
+	switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+	case 0:
+		break;
+	case FLUSH_COND_STABLE:
+		if (nfs_reqs_to_commit(cinfo))
+			break;
+	default:
+		data->args.stable = NFS_FILE_SYNC;
+	}
+
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.eof     = 0;
+	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
+}
+
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	int err;
+	err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+	if (err)
+		rpc_exit(task, err);
+}
+
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+		      const struct rpc_call_ops *call_ops, int how, int flags)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->header->cred,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.task = &data->task,
+		.rpc_message = &msg,
+		.callback_ops = call_ops,
+		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
+		.flags = RPC_TASK_ASYNC | flags,
+	};
+	int ret = 0;
+
+	data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+	dprintk("NFS: %5u initiated pgio call "
+		"(req %s/%llu, %u bytes @ offset %llu)\n",
+		data->task.tk_pid,
+		data->header->inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(data->header->inode),
+		data->args.count,
+		(unsigned long long)data->args.offset);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
+	rpc_put_task(task);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
+/**
+ * nfs_pgio_error - Clean up from a pageio error
+ * @desc: IO descriptor
+ * @hdr: pageio header
+ */
+static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
+			  struct nfs_pgio_header *hdr)
+{
+	set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	nfs_pgio_data_release(hdr->data);
+	hdr->data = NULL;
+	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+	return -ENOMEM;
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+static void nfs_pgio_release(void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	if (data->header->rw_ops->rw_release)
+		data->header->rw_ops->rw_release(data);
+	nfs_pgio_data_release(data);
+}
+
 /**
  * nfs_pageio_init - initialise a page io descriptor
  * @desc: pointer to descriptor
@@ -307,6 +685,7 @@
 		     struct inode *inode,
 		     const struct nfs_pageio_ops *pg_ops,
 		     const struct nfs_pgio_completion_ops *compl_ops,
+		     const struct nfs_rw_ops *rw_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -320,6 +699,7 @@
 	desc->pg_inode = inode;
 	desc->pg_ops = pg_ops;
 	desc->pg_completion_ops = compl_ops;
+	desc->pg_rw_ops = rw_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
@@ -328,6 +708,94 @@
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init);
 
+/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+static void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	struct inode *inode = data->header->inode;
+
+	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+		task->tk_pid, task->tk_status);
+
+	if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+		return;
+	if (task->tk_status < 0)
+		nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+	else
+		data->header->rw_ops->rw_result(task, data);
+}
+
+/*
+ * Create an RPC task for the given read or write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
+		     struct nfs_pgio_header *hdr)
+{
+	struct nfs_page		*req;
+	struct page		**pages;
+	struct nfs_pgio_data	*data;
+	struct list_head *head = &desc->pg_list;
+	struct nfs_commit_info cinfo;
+
+	data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
+							   desc->pg_count));
+	if (!data)
+		return nfs_pgio_error(desc, hdr);
+
+	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+	pages = data->pages.pagevec;
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &hdr->pages);
+		*pages++ = req->wb_page;
+	}
+
+	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
+		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
+	/* Set up the argument struct */
+	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+	hdr->data = data;
+	desc->pg_rpc_callops = &nfs_pgio_common_ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pgio);
+
+static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
+{
+	struct nfs_rw_header *rw_hdr;
+	struct nfs_pgio_header *hdr;
+	int ret;
+
+	rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
+	if (!rw_hdr) {
+		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+		return -ENOMEM;
+	}
+	hdr = &rw_hdr->header;
+	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_pgio(desc, hdr);
+	if (ret == 0)
+		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+					hdr->data, desc->pg_rpc_callops,
+					desc->pg_ioflags, 0);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		hdr->completion_ops->completion(hdr);
+	return ret;
+}
+
 static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 		const struct nfs_open_context *ctx2)
 {
@@ -356,18 +824,23 @@
 				      struct nfs_page *req,
 				      struct nfs_pageio_descriptor *pgio)
 {
-	if (!nfs_match_open_context(req->wb_context, prev->wb_context))
-		return false;
-	if (req->wb_context->dentry->d_inode->i_flock != NULL &&
-	    !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context))
-		return false;
-	if (req->wb_pgbase != 0)
-		return false;
-	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
-		return false;
-	if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
-		return false;
-	return pgio->pg_ops->pg_test(pgio, prev, req);
+	size_t size;
+
+	if (prev) {
+		if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+			return false;
+		if (req->wb_context->dentry->d_inode->i_flock != NULL &&
+		    !nfs_match_lock_context(req->wb_lock_context,
+					    prev->wb_lock_context))
+			return false;
+		if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+			return false;
+	}
+	size = pgio->pg_ops->pg_test(pgio, prev, req);
+	WARN_ON_ONCE(size > req->wb_bytes);
+	if (size && size < req->wb_bytes)
+		req->wb_bytes = size;
+	return size > 0;
 }
 
 /**
@@ -381,17 +854,16 @@
 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 				     struct nfs_page *req)
 {
+	struct nfs_page *prev = NULL;
 	if (desc->pg_count != 0) {
-		struct nfs_page *prev;
-
 		prev = nfs_list_entry(desc->pg_list.prev);
-		if (!nfs_can_coalesce_requests(prev, req, desc))
-			return 0;
 	} else {
 		if (desc->pg_ops->pg_init)
 			desc->pg_ops->pg_init(desc, req);
 		desc->pg_base = req->wb_pgbase;
 	}
+	if (!nfs_can_coalesce_requests(prev, req, desc))
+		return 0;
 	nfs_list_remove_request(req);
 	nfs_list_add_request(req, &desc->pg_list);
 	desc->pg_count += req->wb_bytes;
@@ -421,22 +893,73 @@
  * @desc: destination io descriptor
  * @req: request
  *
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			   struct nfs_page *req)
 {
-	while (!nfs_pageio_do_add_request(desc, req)) {
-		desc->pg_moreio = 1;
-		nfs_pageio_doio(desc);
-		if (desc->pg_error < 0)
-			return 0;
-		desc->pg_moreio = 0;
-		if (desc->pg_recoalesce)
-			return 0;
-	}
+	struct nfs_page *subreq;
+	unsigned int bytes_left = 0;
+	unsigned int offset, pgbase;
+
+	nfs_page_group_lock(req);
+
+	subreq = req;
+	bytes_left = subreq->wb_bytes;
+	offset = subreq->wb_offset;
+	pgbase = subreq->wb_pgbase;
+
+	do {
+		if (!nfs_pageio_do_add_request(desc, subreq)) {
+			/* make sure pg_test call(s) did nothing */
+			WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+			WARN_ON_ONCE(subreq->wb_offset != offset);
+			WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+			nfs_page_group_unlock(req);
+			desc->pg_moreio = 1;
+			nfs_pageio_doio(desc);
+			if (desc->pg_error < 0)
+				return 0;
+			desc->pg_moreio = 0;
+			if (desc->pg_recoalesce)
+				return 0;
+			/* retry add_request for this subreq */
+			nfs_page_group_lock(req);
+			continue;
+		}
+
+		/* check for buggy pg_test call(s) */
+		WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+		WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+		WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+		bytes_left -= subreq->wb_bytes;
+		offset += subreq->wb_bytes;
+		pgbase += subreq->wb_bytes;
+
+		if (bytes_left) {
+			subreq = nfs_create_request(req->wb_context,
+					req->wb_page,
+					subreq, pgbase, bytes_left);
+			if (IS_ERR(subreq))
+				goto err_ptr;
+			nfs_lock_request(subreq);
+			subreq->wb_offset  = offset;
+			subreq->wb_index = req->wb_index;
+		}
+	} while (bytes_left > 0);
+
+	nfs_page_group_unlock(req);
 	return 1;
+err_ptr:
+	desc->pg_error = PTR_ERR(subreq);
+	nfs_page_group_unlock(req);
+	return 0;
 }
 
 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -535,3 +1058,13 @@
 	kmem_cache_destroy(nfs_page_cachep);
 }
 
+static const struct rpc_call_ops nfs_pgio_common_ops = {
+	.rpc_call_prepare = nfs_pgio_prepare,
+	.rpc_call_done = nfs_pgio_result,
+	.rpc_release = nfs_pgio_release,
+};
+
+const struct nfs_pageio_ops nfs_pgio_rw_ops = {
+	.pg_test = nfs_generic_pg_test,
+	.pg_doio = nfs_generic_pg_pgios,
+};

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fd9536e..6fdcd23 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -1388,11 +1388,6 @@
 
 	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
-	if (req->wb_offset != req->wb_pgbase) {
-		nfs_pageio_reset_read_mds(pgio);
-		return;
-	}
-
 	if (pgio->pg_dreq == NULL)
 		rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
 	else
@@ -1417,11 +1412,6 @@
 {
 	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
-	if (req->wb_offset != req->wb_pgbase) {
-		nfs_pageio_reset_write_mds(pgio);
-		return;
-	}
-
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   req_offset(req),
@@ -1434,56 +1424,49 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		      const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_read(pgio, inode, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		       int ioflags,
-		       const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
 {
-	if (pgio->pg_lseg == NULL)
-		return nfs_generic_pg_test(pgio, prev, req);
+	unsigned int size;
+	u64 seg_end, req_start, seg_left;
+
+	size = nfs_generic_pg_test(pgio, prev, req);
+	if (!size)
+		return 0;
 
 	/*
-	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
-	 * Note that this test makes several assumptions:
-	 * - that the previous nfs_page in the struct nfs_pageio_descriptor
-	 *   is known to lie within the range.
-	 *   - that the nfs_page being tested is known to be contiguous with the
-	 *   previous nfs_page.
-	 *   - Layout ranges are page aligned, so we only have to test the
-	 *   start offset of the request.
+	 * 'size' contains the number of bytes left in the current page (up
+	 * to the original size asked for in @req->wb_bytes).
+	 *
+	 * Calculate how many bytes are left in the layout segment
+	 * and if there are less bytes than 'size', return that instead.
 	 *
 	 * Please also note that 'end_offset' is actually the offset of the
 	 * first byte that lies outside the pnfs_layout_range. FIXME?
 	 *
 	 */
-	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
-					 pgio->pg_lseg->pls_range.length);
+	if (pgio->pg_lseg) {
+		seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
+				     pgio->pg_lseg->pls_range.length);
+		req_start = req_offset(req);
+		WARN_ON_ONCE(req_start > seg_end);
+		/* start of request is past the last byte of this segment */
+		if (req_start >= seg_end)
+			return 0;
+
+		/* adjust 'size' iff there are fewer bytes left in the
+		 * segment than what nfs_generic_pg_test returned */
+		seg_left = seg_end - req_start;
+		if (seg_left < size)
+			size = (unsigned int)seg_left;
+	}
+
+	return size;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
@@ -1496,7 +1479,7 @@
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
@@ -1519,7 +1502,7 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1538,7 +1521,7 @@
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1554,7 +1537,7 @@
 
 static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_write_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1563,11 +1546,11 @@
 		nfs_pageio_reset_write_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_writedata_release(data);
+	nfs_pgio_data_release(data);
 }
 
 static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 			const struct rpc_call_ops *call_ops,
 			struct pnfs_layout_segment *lseg,
 			int how)
@@ -1589,41 +1572,36 @@
 }
 
 static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_write_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_write_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_write_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_writehdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_write_header *whdr;
+	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_writehdr_alloc();
+	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!whdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		pnfs_put_lseg(desc->pg_lseg);
@@ -1634,12 +1612,12 @@
 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
-	ret = nfs_generic_flush(desc, hdr);
+	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+		pnfs_do_write(desc, hdr, desc->pg_ioflags);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
@@ -1655,7 +1633,7 @@
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read(&pgio, inode, compl_ops);
+	nfs_pageio_init_read(&pgio, inode, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
@@ -1674,7 +1652,7 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1693,7 +1671,7 @@
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1709,7 +1687,7 @@
 
 static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_read_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1718,14 +1696,14 @@
 		nfs_pageio_reset_read_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_readdata_release(data);
+	nfs_pgio_data_release(data);
 }
 
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
 static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
@@ -1747,41 +1725,35 @@
 }
 
 static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_read_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_read_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_read_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_readhdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_readhdr_alloc();
+	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!rhdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
@@ -1793,12 +1765,12 @@
 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
-	ret = nfs_generic_pagein(desc, hdr);
+	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+		pnfs_do_read(desc, hdr);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
@@ -1848,7 +1820,7 @@
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c3058a0..4fb309a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h

@@ -113,8 +113,8 @@
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
-	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
-	enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+	enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+	enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -180,11 +180,6 @@
 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
-			   const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
-			    int, const struct nfs_pgio_completion_ops *);
-
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -192,7 +187,8 @@
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			        struct nfs_page *req, u64 wb_size);
 int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+			    struct nfs_page *prev, struct nfs_page *req);
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -217,13 +213,13 @@
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
@@ -461,18 +457,6 @@
 {
 }
 
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-					 const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
-					  const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
 		 struct nfs_commit_info *cinfo)

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e..c171ce1 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c

@@ -578,7 +578,7 @@
 	return 0;
 }
 
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -594,18 +594,18 @@
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -614,19 +614,13 @@
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	BUG();
@@ -734,13 +728,10 @@
 	.fsinfo		= nfs_proc_fsinfo,
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs2_decode_dirent,
+	.pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
 	.read_setup	= nfs_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
-	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
-	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedd..e818a47 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c

@@ -24,85 +24,24 @@
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_common_ops;
 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
 {
-	struct nfs_read_header *rhdr;
-
-	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-	if (rhdr) {
-		struct nfs_pgio_header *hdr = &rhdr->header;
-
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-	}
-	return rhdr;
-}
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
-
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
-						unsigned int pagecount)
-{
-	struct nfs_read_data *data, *prealloc;
-
-	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
-	if (prealloc->header == NULL)
-		data = prealloc;
-	else
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		goto out;
-
-	if (nfs_pgarray_set(&data->pages, pagecount)) {
-		data->header = hdr;
-		atomic_inc(&hdr->refcnt);
-	} else {
-		if (data != prealloc)
-			kfree(data);
-		data = NULL;
-	}
-out:
-	return data;
+	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 }
 
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
 {
-	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
-
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
-
-void nfs_readdata_release(struct nfs_read_data *rdata)
-{
-	struct nfs_pgio_header *hdr = rdata->header;
-	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
-
-	put_nfs_open_context(rdata->args.context);
-	if (rdata->pages.pagevec != rdata->pages.page_array)
-		kfree(rdata->pages.pagevec);
-	if (rdata == &read_header->rpc_data) {
-		rdata->header = NULL;
-		rdata = NULL;
-	}
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
-	/* Note: we only free the rpc_task after callbacks are done.
-	 * See the comment in rpc_free_task() for why
-	 */
-	kfree(rdata);
-}
-EXPORT_SYMBOL_GPL(nfs_readdata_release);
 
 static
 int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@
 }
 
 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			      struct inode *inode,
+			      struct inode *inode, bool force_mds,
 			      const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
-			NFS_SERVER(inode)->rsize, 0);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+			server->rsize, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 {
-	pgio->pg_ops = &nfs_pageio_read_ops;
+	pgio->pg_ops = &nfs_pgio_rw_ops;
 	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
-	new = nfs_create_request(ctx, inode, page, 0, len);
+	new = nfs_create_request(ctx, page, NULL, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
 		return PTR_ERR(new);
@@ -147,7 +93,8 @@
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
 	NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,10 +105,16 @@
 {
 	struct inode *d_inode = req->wb_context->dentry->d_inode;
 
-	if (PageUptodate(req->wb_page))
-		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+	dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+		(long long)req_offset(req));
 
-	unlock_page(req->wb_page);
+	if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+		if (PageUptodate(req->wb_page))
+			nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
+		unlock_page(req->wb_page);
+	}
 
 	dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -171,7 +124,12 @@
 	nfs_release_request(req);
 }
 
-/* Note io was page aligned */
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+	if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+		SetPageUptodate(req->wb_page);
+}
+
 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
@@ -181,21 +139,32 @@
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
+		unsigned long start = req->wb_pgbase;
+		unsigned long end = req->wb_pgbase + req->wb_bytes;
 
 		if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
-			if (bytes > hdr->good_bytes)
-				zero_user(page, 0, PAGE_SIZE);
-			else if (hdr->good_bytes - bytes < PAGE_SIZE)
-				zero_user_segment(page,
-					hdr->good_bytes & ~PAGE_MASK,
-					PAGE_SIZE);
+			/* note: regions of the page not covered by a
+			 * request are zeroed in nfs_readpage_async /
+			 * readpage_async_filler */
+			if (bytes > hdr->good_bytes) {
+				/* nothing in this request was good, so zero
+				 * the full extent of the request */
+				zero_user_segment(page, start, end);
+
+			} else if (hdr->good_bytes - bytes < req->wb_bytes) {
+				/* part of this request has good bytes, but
+				 * not all. zero the bad bytes */
+				start += hdr->good_bytes - bytes;
+				WARN_ON(start < req->wb_pgbase);
+				zero_user_segment(page, start, end);
+			}
 		}
 		bytes += req->wb_bytes;
 		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
 			if (bytes <= hdr->good_bytes)
-				SetPageUptodate(page);
+				nfs_page_group_set_uptodate(req);
 		} else
-			SetPageUptodate(page);
+			nfs_page_group_set_uptodate(req);
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
 	}
@@ -203,95 +172,14 @@
 	hdr->release(hdr);
 }
 
-int nfs_initiate_read(struct rpc_clnt *clnt,
-		      struct nfs_read_data *data,
-		      const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+			      struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.task = &data->task,
-		.rpc_client = clnt,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | swap_flags | flags,
-	};
 
-	/* Set up the initial task struct. */
-	NFS_PROTO(inode)->read_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
-			"offset %llu)\n",
-			data->task.tk_pid,
-			inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-	rpc_put_task(task);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
-
-/*
- * Set up the NFS read request struct
- */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
-		unsigned int count, unsigned int offset)
-{
-	struct nfs_page *req = data->header->req;
-
-	data->args.fh     = NFS_FH(data->header->inode);
-	data->args.offset = req_offset(req) + offset;
-	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pages.pagevec;
-	data->args.count  = count;
-	data->args.context = get_nfs_open_context(req->wb_context);
-	data->args.lock_context = req->wb_lock_context;
-
-	data->res.fattr   = &data->fattr;
-	data->res.count   = count;
-	data->res.eof     = 0;
-	nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_read(struct nfs_read_data *data,
-		const struct rpc_call_ops *call_ops)
-{
-	struct inode *inode = data->header->inode;
-
-	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
-}
-
-static int
-nfs_do_multiple_reads(struct list_head *head,
-		const struct rpc_call_ops *call_ops)
-{
-	struct nfs_read_data *data;
-	int ret = 0;
-
-	while (!list_empty(head)) {
-		int ret2;
-
-		data = list_first_entry(head, struct nfs_read_data, list);
-		list_del_init(&data->list);
-
-		ret2 = nfs_do_read(data, call_ops);
-		if (ret == 0)
-			ret = ret2;
-	}
-	return ret;
+	task_setup_data->flags |= swap_flags;
+	NFS_PROTO(inode)->read_setup(data, msg);
 }
 
 static void
@@ -311,143 +199,14 @@
 	.completion = nfs_read_completion,
 };
 
-static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_header *hdr)
-{
-	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_read_data, list);
-		list_del(&data->list);
-		nfs_readdata_release(data);
-	}
-	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple requests to fill a single page.
- *
- * We optimize to reduce the number of read operations on the wire.  If we
- * detect that we're reading a page, or an area of a page, that is past the
- * end of file, we do not generate NFS read operations but just clear the
- * parts of the page that would have come back zero from the server anyway.
- *
- * We rely on the cached value of i_size to make this determination; another
- * client can fill pages on the server past our cached end-of-file, but we
- * won't see the new data until our attribute cache is updated.  This is more
- * or less conventional NFS client behavior.
- */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
-			    struct nfs_pgio_header *hdr)
-{
-	struct nfs_page *req = hdr->req;
-	struct page *page = req->wb_page;
-	struct nfs_read_data *data;
-	size_t rsize = desc->pg_bsize, nbytes;
-	unsigned int offset;
-
-	offset = 0;
-	nbytes = desc->pg_count;
-	do {
-		size_t len = min(nbytes,rsize);
-
-		data = nfs_readdata_alloc(hdr, 1);
-		if (!data) {
-			nfs_pagein_error(desc, hdr);
-			return -ENOMEM;
-		}
-		data->pages.pagevec[0] = page;
-		nfs_read_rpcsetup(data, len, offset);
-		list_add(&data->list, &hdr->rpc_list);
-		nbytes -= len;
-		offset += len;
-	} while (nbytes != 0);
-
-	nfs_list_remove_request(req);
-	nfs_list_add_request(req, &hdr->pages);
-	desc->pg_rpc_callops = &nfs_read_common_ops;
-	return 0;
-}
-
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
-			  struct nfs_pgio_header *hdr)
-{
-	struct nfs_page		*req;
-	struct page		**pages;
-	struct nfs_read_data    *data;
-	struct list_head *head = &desc->pg_list;
-
-	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
-							  desc->pg_count));
-	if (!data) {
-		nfs_pagein_error(desc, hdr);
-		return -ENOMEM;
-	}
-
-	pages = data->pages.pagevec;
-	while (!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &hdr->pages);
-		*pages++ = req->wb_page;
-	}
-
-	nfs_read_rpcsetup(data, desc->pg_count, 0);
-	list_add(&data->list, &hdr->rpc_list);
-	desc->pg_rpc_callops = &nfs_read_common_ops;
-	return 0;
-}
-
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
-		       struct nfs_pgio_header *hdr)
-{
-	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_pagein_multi(desc, hdr);
-	return nfs_pagein_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_pagein);
-
-static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
-{
-	struct nfs_read_header *rhdr;
-	struct nfs_pgio_header *hdr;
-	int ret;
-
-	rhdr = nfs_readhdr_alloc();
-	if (!rhdr) {
-		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-		return -ENOMEM;
-	}
-	hdr = &rhdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
-	atomic_inc(&hdr->refcnt);
-	ret = nfs_generic_pagein(desc, hdr);
-	if (ret == 0)
-		ret = nfs_do_multiple_reads(&hdr->rpc_list,
-					    desc->pg_rpc_callops);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
-	return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_read_ops = {
-	.pg_test = nfs_generic_pg_test,
-	.pg_doio = nfs_generic_pg_readpages,
-};
-
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			     struct inode *inode)
 {
-	struct inode *inode = data->header->inode;
-	int status;
-
-	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
-			task->tk_status);
-
-	status = NFS_PROTO(inode)->read_done(task, data);
+	int status = NFS_PROTO(inode)->read_done(task, data);
 	if (status != 0)
 		return status;
 
@@ -460,10 +219,10 @@
 	return 0;
 }
 
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
 {
-	struct nfs_readargs *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
+	struct nfs_pgio_args *argp = &data->args;
+	struct nfs_pgio_res  *resp = &data->res;
 
 	/* This is a short read! */
 	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@
 	rpc_restart_call_prepare(task);
 }
 
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
 {
-	struct nfs_read_data *data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 
-	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
-	if (nfs_readpage_result(task, data) != 0)
-		return;
-	if (task->tk_status < 0)
-		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
-	else if (data->res.eof) {
+	if (data->res.eof) {
 		loff_t bound;
 
 		bound = data->args.offset + data->res.count;
@@ -505,26 +258,6 @@
 		nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_common(void *calldata)
-{
-	nfs_readdata_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_read_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
-static const struct rpc_call_ops nfs_read_common_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_readpage_result_common,
-	.rpc_release = nfs_readpage_release_common,
-};
-
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
@@ -592,7 +325,6 @@
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *new;
 	unsigned int len;
 	int error;
@@ -601,7 +333,7 @@
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+	new = nfs_create_request(desc->ctx, page, NULL, 0, len);
 	if (IS_ERR(new))
 		goto out_error;
 
@@ -654,7 +386,8 @@
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
@@ -671,7 +404,7 @@
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_read_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
@@ -684,3 +417,12 @@
 {
 	kmem_cache_destroy(nfs_rdata_cachep);
 }
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+	.rw_mode		= FMODE_READ,
+	.rw_alloc_header	= nfs_readhdr_alloc,
+	.rw_free_header		= nfs_readhdr_free,
+	.rw_done		= nfs_readpage_done,
+	.rw_result		= nfs_readpage_result,
+	.rw_initiate		= nfs_initiate_read,
+};

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2cb5694..084af10 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c

@@ -2180,11 +2180,23 @@
 	return -EINVAL;
 }
 
+#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+		| NFS_MOUNT_SECURE \
+		| NFS_MOUNT_TCP \
+		| NFS_MOUNT_VER3 \
+		| NFS_MOUNT_KERBEROS \
+		| NFS_MOUNT_NONLM \
+		| NFS_MOUNT_BROKEN_SUID \
+		| NFS_MOUNT_STRICTLOCK \
+		| NFS_MOUNT_UNSHARED \
+		| NFS_MOUNT_NORESVPORT \
+		| NFS_MOUNT_LEGACY_INTERFACE)
+
 static int
 nfs_compare_remount_data(struct nfs_server *nfss,
 			 struct nfs_parsed_mount_data *data)
 {
-	if (data->flags != nfss->flags ||
+	if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
 	    data->rsize != nfss->rsize ||
 	    data->wsize != nfss->wsize ||
 	    data->version != nfss->nfs_client->rpc_ops->version ||
@@ -2248,6 +2260,7 @@
 	data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
 	data->version = nfsvers;
 	data->minorversion = nfss->nfs_client->cl_minorversion;
+	data->net = current->nsproxy->net_ns;
 	memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
 		data->nfs_server.addrlen);
 
@@ -2347,18 +2360,6 @@
  	nfs_initialise_sb(sb);
 }
 
-#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
-		| NFS_MOUNT_SECURE \
-		| NFS_MOUNT_TCP \
-		| NFS_MOUNT_VER3 \
-		| NFS_MOUNT_KERBEROS \
-		| NFS_MOUNT_NONLM \
-		| NFS_MOUNT_BROKEN_SUID \
-		| NFS_MOUNT_STRICTLOCK \
-		| NFS_MOUNT_UNSHARED \
-		| NFS_MOUNT_NORESVPORT \
-		| NFS_MOUNT_LEGACY_INTERFACE)
-
 static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
 {
 	const struct nfs_server *a = s->s_fs_info;

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ffb9459..3ee5af4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -42,10 +42,10 @@
  * Local function declarations
  */
 static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -70,76 +70,19 @@
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	if (p) {
-		struct nfs_pgio_header *hdr = &p->header;
-
+	if (p)
 		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-		hdr->verf = &p->verf;
-	}
 	return p;
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
 
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
-						  unsigned int pagecount)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
 {
-	struct nfs_write_data *data, *prealloc;
-
-	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
-	if (prealloc->header == NULL)
-		data = prealloc;
-	else
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		goto out;
-
-	if (nfs_pgarray_set(&data->pages, pagecount)) {
-		data->header = hdr;
-		atomic_inc(&hdr->refcnt);
-	} else {
-		if (data != prealloc)
-			kfree(data);
-		data = NULL;
-	}
-out:
-	return data;
-}
-
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
-{
-	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
 	mempool_free(whdr, nfs_wdata_mempool);
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
-
-void nfs_writedata_release(struct nfs_write_data *wdata)
-{
-	struct nfs_pgio_header *hdr = wdata->header;
-	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
-
-	put_nfs_open_context(wdata->args.context);
-	if (wdata->pages.pagevec != wdata->pages.page_array)
-		kfree(wdata->pages.pagevec);
-	if (wdata == &write_header->rpc_data) {
-		wdata->header = NULL;
-		wdata = NULL;
-	}
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
-	/* Note: we only free the rpc_task after callbacks are done.
-	 * See the comment in rpc_free_task() for why
-	 */
-	kfree(wdata);
-}
-EXPORT_SYMBOL_GPL(nfs_writedata_release);
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
@@ -211,18 +154,78 @@
 	nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
 }
 
+/*
+ * nfs_page_group_search_locked
+ * @head - head request of page group
+ * @page_offset - offset into page
+ *
+ * Search page group with head @head to find a request that contains the
+ * page offset @page_offset.
+ *
+ * Returns a pointer to the first matching nfs request, or NULL if no
+ * match is found.
+ *
+ * Must be called with the page group lock held
+ */
+static struct nfs_page *
+nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
+{
+	struct nfs_page *req;
+
+	WARN_ON_ONCE(head != head->wb_head);
+	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
+
+	req = head;
+	do {
+		if (page_offset >= req->wb_pgbase &&
+		    page_offset < (req->wb_pgbase + req->wb_bytes))
+			return req;
+
+		req = req->wb_this_page;
+	} while (req != head);
+
+	return NULL;
+}
+
+/*
+ * nfs_page_group_covers_page
+ * @head - head request of page group
+ *
+ * Return true if the page group with head @head covers the whole page,
+ * returns false otherwise
+ */
+static bool nfs_page_group_covers_page(struct nfs_page *req)
+{
+	struct nfs_page *tmp;
+	unsigned int pos = 0;
+	unsigned int len = nfs_page_length(req->wb_page);
+
+	nfs_page_group_lock(req);
+
+	do {
+		tmp = nfs_page_group_search_locked(req->wb_head, pos);
+		if (tmp) {
+			/* no way this should happen */
+			WARN_ON_ONCE(tmp->wb_pgbase != pos);
+			pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
+		}
+	} while (tmp && pos < len);
+
+	nfs_page_group_unlock(req);
+	WARN_ON_ONCE(pos > len);
+	return pos == len;
+}
+
 /* We can set the PG_uptodate flag if we see that a write request
  * covers the full page.
  */
-static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+static void nfs_mark_uptodate(struct nfs_page *req)
 {
-	if (PageUptodate(page))
+	if (PageUptodate(req->wb_page))
 		return;
-	if (base != 0)
+	if (!nfs_page_group_covers_page(req))
 		return;
-	if (count != nfs_page_length(page))
-		return;
-	SetPageUptodate(page);
+	SetPageUptodate(req->wb_page);
 }
 
 static int wb_priority(struct writeback_control *wbc)
@@ -258,12 +261,15 @@
 	}
 }
 
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
 {
-	struct inode *inode = page_file_mapping(page)->host;
+	struct inode *inode = page_file_mapping(req->wb_page)->host;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
-	end_page_writeback(page);
+	if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+		return;
+
+	end_page_writeback(req->wb_page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -354,10 +360,8 @@
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
-							  page->mapping->host,
-							  wb_priority(wbc),
-							  &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+				false, &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
@@ -400,7 +404,8 @@
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+				&nfs_async_write_completion_ops);
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
@@ -425,6 +430,8 @@
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
+	WARN_ON_ONCE(req->wb_this_page != req);
+
 	/* Lock the request! */
 	nfs_lock_request(req);
 
@@ -441,6 +448,7 @@
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
 	nfsi->npages++;
+	set_bit(PG_INODE_REF, &req->wb_flags);
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
 }
@@ -452,15 +460,20 @@
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page *head;
 
-	spin_lock(&inode->i_lock);
-	if (likely(!PageSwapCache(req->wb_page))) {
-		set_page_private(req->wb_page, 0);
-		ClearPagePrivate(req->wb_page);
-		clear_bit(PG_MAPPED, &req->wb_flags);
+	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+		head = req->wb_head;
+
+		spin_lock(&inode->i_lock);
+		if (likely(!PageSwapCache(head->wb_page))) {
+			set_page_private(head->wb_page, 0);
+			ClearPagePrivate(head->wb_page);
+			clear_bit(PG_MAPPED, &head->wb_flags);
+		}
+		nfsi->npages--;
+		spin_unlock(&inode->i_lock);
 	}
-	nfsi->npages--;
-	spin_unlock(&inode->i_lock);
 	nfs_release_request(req);
 }
 
@@ -583,7 +596,7 @@
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	if (data->verf.committed == NFS_DATA_SYNC)
 		return data->header->lseg == NULL;
@@ -614,7 +627,7 @@
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	return 0;
 }
@@ -625,6 +638,7 @@
 {
 	struct nfs_commit_info cinfo;
 	unsigned long bytes = 0;
+	bool do_destroy;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
@@ -645,7 +659,7 @@
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
-			memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
@@ -653,7 +667,8 @@
 		nfs_inode_remove_request(req);
 next:
 		nfs_unlock_request(req);
-		nfs_end_page_writeback(req->wb_page);
+		nfs_end_page_writeback(req);
+		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
 out:
@@ -661,7 +676,7 @@
 }
 
 #if  IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-static unsigned long
+unsigned long
 nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
 	return cinfo->mds->ncommit;
@@ -718,7 +733,7 @@
 }
 
 #else
-static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
@@ -758,6 +773,10 @@
 		if (req == NULL)
 			goto out_unlock;
 
+		/* should be handled by nfs_flush_incompatible */
+		WARN_ON_ONCE(req->wb_head != req);
+		WARN_ON_ONCE(req->wb_this_page != req);
+
 		rqend = req->wb_offset + req->wb_bytes;
 		/*
 		 * Tell the caller to flush out the request if
@@ -819,7 +838,7 @@
 	req = nfs_try_to_update_request(inode, page, offset, bytes);
 	if (req != NULL)
 		goto out;
-	req = nfs_create_request(ctx, inode, page, offset, bytes);
+	req = nfs_create_request(ctx, page, NULL, offset, bytes);
 	if (IS_ERR(req))
 		goto out;
 	nfs_inode_add_request(inode, req);
@@ -837,7 +856,7 @@
 		return PTR_ERR(req);
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
-	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+	nfs_mark_uptodate(req);
 	nfs_mark_request_dirty(req);
 	nfs_unlock_and_release_request(req);
 	return 0;
@@ -863,6 +882,8 @@
 			return 0;
 		l_ctx = req->wb_lock_context;
 		do_flush = req->wb_page != page || req->wb_context != ctx;
+		/* for now, flush if more than 1 request in page_group */
+		do_flush |= req->wb_this_page != req;
 		if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
 			do_flush |= l_ctx->lockowner.l_owner != current->files
 				|| l_ctx->lockowner.l_pid != current->tgid;
@@ -990,126 +1011,17 @@
 	return RPC_PRIORITY_NORMAL;
 }
 
-int nfs_initiate_write(struct rpc_clnt *clnt,
-		       struct nfs_write_data *data,
-		       const struct rpc_call_ops *call_ops,
-		       int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+			       struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int priority = flush_task_priority(how);
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = clnt,
-		.task = &data->task,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | flags,
-		.priority = priority,
-	};
-	int ret = 0;
 
-	/* Set up the initial task struct.  */
-	NFS_PROTO(inode)->write_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated write call "
-		"(req %s/%llu, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(inode),
-		data->args.count,
-		(unsigned long long)data->args.offset);
+	task_setup_data->priority = priority;
+	NFS_PROTO(inode)->write_setup(data, msg);
 
 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
-				 &task_setup_data.rpc_client, &msg, data);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
-		goto out;
-	}
-	if (how & FLUSH_SYNC) {
-		ret = rpc_wait_for_completion_task(task);
-		if (ret == 0)
-			ret = task->tk_status;
-	}
-	rpc_put_task(task);
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
-
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
-		unsigned int count, unsigned int offset,
-		int how, struct nfs_commit_info *cinfo)
-{
-	struct nfs_page *req = data->header->req;
-
-	/* Set up the RPC argument and reply structs
-	 * NB: take care not to mess about with data->commit et al. */
-
-	data->args.fh     = NFS_FH(data->header->inode);
-	data->args.offset = req_offset(req) + offset;
-	/* pnfs_set_layoutcommit needs this */
-	data->mds_offset = data->args.offset;
-	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pages.pagevec;
-	data->args.count  = count;
-	data->args.context = get_nfs_open_context(req->wb_context);
-	data->args.lock_context = req->wb_lock_context;
-	data->args.stable  = NFS_UNSTABLE;
-	switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
-	case 0:
-		break;
-	case FLUSH_COND_STABLE:
-		if (nfs_reqs_to_commit(cinfo))
-			break;
-	default:
-		data->args.stable = NFS_FILE_SYNC;
-	}
-
-	data->res.fattr   = &data->fattr;
-	data->res.count   = count;
-	data->res.verf    = &data->verf;
-	nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_write(struct nfs_write_data *data,
-		const struct rpc_call_ops *call_ops,
-		int how)
-{
-	struct inode *inode = data->header->inode;
-
-	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
-}
-
-static int nfs_do_multiple_writes(struct list_head *head,
-		const struct rpc_call_ops *call_ops,
-		int how)
-{
-	struct nfs_write_data *data;
-	int ret = 0;
-
-	while (!list_empty(head)) {
-		int ret2;
-
-		data = list_first_entry(head, struct nfs_write_data, list);
-		list_del_init(&data->list);
-		
-		ret2 = nfs_do_write(data, call_ops, how);
-		 if (ret == 0)
-			 ret = ret2;
-	}
-	return ret;
+				 &task_setup_data->rpc_client, msg, data);
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1120,7 +1032,7 @@
 {
 	nfs_mark_request_dirty(req);
 	nfs_unlock_request(req);
-	nfs_end_page_writeback(req->wb_page);
+	nfs_end_page_writeback(req);
 	nfs_release_request(req);
 }
 
@@ -1140,173 +1052,30 @@
 	.completion = nfs_write_completion,
 };
 
-static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_header *hdr)
-{
-	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_write_data, list);
-		list_del(&data->list);
-		nfs_writedata_release(data);
-	}
-	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple small requests to write out a single
- * contiguous dirty area on one page.
- */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
-			   struct nfs_pgio_header *hdr)
-{
-	struct nfs_page *req = hdr->req;
-	struct page *page = req->wb_page;
-	struct nfs_write_data *data;
-	size_t wsize = desc->pg_bsize, nbytes;
-	unsigned int offset;
-	int requests = 0;
-	struct nfs_commit_info cinfo;
-
-	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-
-	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
-	     desc->pg_count > wsize))
-		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
-
-	offset = 0;
-	nbytes = desc->pg_count;
-	do {
-		size_t len = min(nbytes, wsize);
-
-		data = nfs_writedata_alloc(hdr, 1);
-		if (!data) {
-			nfs_flush_error(desc, hdr);
-			return -ENOMEM;
-		}
-		data->pages.pagevec[0] = page;
-		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
-		list_add(&data->list, &hdr->rpc_list);
-		requests++;
-		nbytes -= len;
-		offset += len;
-	} while (nbytes != 0);
-	nfs_list_remove_request(req);
-	nfs_list_add_request(req, &hdr->pages);
-	desc->pg_rpc_callops = &nfs_write_common_ops;
-	return 0;
-}
-
-/*
- * Create an RPC task for the given write request and kick it.
- * The page must have been locked by the caller.
- *
- * It may happen that the page we're passed is not marked dirty.
- * This is the case if nfs_updatepage detects a conflicting request
- * that has been written but not committed.
- */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
-			 struct nfs_pgio_header *hdr)
-{
-	struct nfs_page		*req;
-	struct page		**pages;
-	struct nfs_write_data	*data;
-	struct list_head *head = &desc->pg_list;
-	struct nfs_commit_info cinfo;
-
-	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
-							   desc->pg_count));
-	if (!data) {
-		nfs_flush_error(desc, hdr);
-		return -ENOMEM;
-	}
-
-	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-	pages = data->pages.pagevec;
-	while (!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &hdr->pages);
-		*pages++ = req->wb_page;
-	}
-
-	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
-		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
-	/* Set up the argument struct */
-	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
-	list_add(&data->list, &hdr->rpc_list);
-	desc->pg_rpc_callops = &nfs_write_common_ops;
-	return 0;
-}
-
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
-		      struct nfs_pgio_header *hdr)
-{
-	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(desc, hdr);
-	return nfs_flush_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_flush);
-
-static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
-{
-	struct nfs_write_header *whdr;
-	struct nfs_pgio_header *hdr;
-	int ret;
-
-	whdr = nfs_writehdr_alloc();
-	if (!whdr) {
-		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-		return -ENOMEM;
-	}
-	hdr = &whdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
-	atomic_inc(&hdr->refcnt);
-	ret = nfs_generic_flush(desc, hdr);
-	if (ret == 0)
-		ret = nfs_do_multiple_writes(&hdr->rpc_list,
-					     desc->pg_rpc_callops,
-					     desc->pg_ioflags);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
-	return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_write_ops = {
-	.pg_test = nfs_generic_pg_test,
-	.pg_doio = nfs_generic_pg_writepages,
-};
-
 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			       struct inode *inode, int ioflags,
+			       struct inode *inode, int ioflags, bool force_mds,
 			       const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
-				NFS_SERVER(inode)->wsize, ioflags);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+			server->wsize, ioflags);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
 void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 {
-	pgio->pg_ops = &nfs_pageio_write_ops;
+	pgio->pg_ops = &nfs_pgio_rw_ops;
 	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_write_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
@@ -1314,23 +1083,8 @@
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- *	  writebacks since the page->count is kept > 1 for as long
- *	  as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 {
-	struct nfs_write_data	*data = calldata;
-
-	nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_common(void *calldata)
-{
-	struct nfs_write_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
@@ -1339,34 +1093,46 @@
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
-		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+			memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
-	nfs_writedata_release(data);
 }
 
-static const struct rpc_call_ops nfs_write_common_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_writeback_done_common,
-	.rpc_release = nfs_writeback_release_common,
-};
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static int nfs_should_remove_suid(const struct inode *inode)
+{
+	umode_t mode = inode->i_mode;
+	int kill = 0;
 
+	/* suid always must be killed */
+	if (unlikely(mode & S_ISUID))
+		kill = ATTR_KILL_SUID;
+
+	/*
+	 * sgid without any exec bits is just a mandatory locking mark; leave
+	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
+	 */
+	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+		kill |= ATTR_KILL_SGID;
+
+	if (unlikely(kill && S_ISREG(mode)))
+		return kill;
+
+	return 0;
+}
 
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			      struct inode *inode)
 {
-	struct nfs_writeargs	*argp = &data->args;
-	struct nfs_writeres	*resp = &data->res;
-	struct inode		*inode = data->header->inode;
 	int status;
 
-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
-		task->tk_pid, task->tk_status);
-
 	/*
 	 * ->write_done will attempt to use post-op attributes to detect
 	 * conflicting writes by other clients.  A strict interpretation
@@ -1376,11 +1142,11 @@
 	 */
 	status = NFS_PROTO(inode)->write_done(task, data);
 	if (status != 0)
-		return;
-	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+		return status;
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
 
 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+	if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
 		 * commit data to stable storage even though we
 		 * requested it.
@@ -1396,18 +1162,31 @@
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
 				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				resp->verf->committed, argp->stable);
+				data->res.verf->committed, data->args.stable);
 			complain = jiffies + 300 * HZ;
 		}
 	}
 #endif
-	if (task->tk_status < 0)
-		nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
-	else if (resp->count < argp->count) {
+
+	/* Deal with the suid/sgid bit corner case */
+	if (nfs_should_remove_suid(inode))
+		nfs_mark_for_revalidate(inode);
+	return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+	struct nfs_pgio_args	*argp = &data->args;
+	struct nfs_pgio_res	*resp = &data->res;
+
+	if (resp->count < argp->count) {
 		static unsigned long    complain;
 
 		/* This a short write! */
-		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count == 0) {
@@ -1874,7 +1653,7 @@
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_write_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
@@ -1936,3 +1715,12 @@
 	kmem_cache_destroy(nfs_wdata_cachep);
 }
 
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+	.rw_mode		= FMODE_WRITE,
+	.rw_alloc_header	= nfs_writehdr_alloc,
+	.rw_free_header		= nfs_writehdr_free,
+	.rw_release		= nfs_writeback_release_common,
+	.rw_done		= nfs_writeback_done,
+	.rw_result		= nfs_writeback_result,
+	.rw_initiate		= nfs_initiate_write,
+};

diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index b481e1f..a986ceb 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h

@@ -49,7 +49,7 @@
 
 struct nfs4_acl *nfs4_acl_new(int);
 int nfs4_acl_get_whotype(char *, u32);
-__be32 nfs4_acl_write_who(int who, __be32 **p, int *len);
+__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
 
 int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
 		struct nfs4_acl **acl);

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 2645be4..72f4482 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c

@@ -1,7 +1,6 @@
 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
 
 #include <linux/sched.h>
-#include <linux/user_namespace.h>
 #include "nfsd.h"
 #include "auth.h"
 
@@ -25,7 +24,6 @@
 	struct cred *new;
 	int i;
 	int flags = nfsexp_flags(rqstp, exp);
-	int ret;
 
 	validate_process_creds();
 
@@ -86,8 +84,7 @@
 	return 0;
 
 oom:
-	ret = -ENOMEM;
 	abort_creds(new);
-	return ret;
+	return -ENOMEM;
 }
 

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8513c59..13b85f9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c

@@ -17,17 +17,12 @@
 #include <linux/exportfs.h>
 #include <linux/sunrpc/svc_xprt.h>
 
-#include <net/ipv6.h>
-
 #include "nfsd.h"
 #include "nfsfh.h"
 #include "netns.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
-typedef struct auth_domain	svc_client;
-typedef struct svc_export	svc_export;
-
 /*
  * We have two caches.
  * One maps client+vfsmnt+dentry to export options - the export map
@@ -73,7 +68,7 @@
 
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
-	/* client fsidtype fsid [path] */
+	/* client fsidtype fsid expiry [path] */
 	char *buf;
 	int len;
 	struct auth_domain *dom = NULL;
@@ -295,13 +290,19 @@
 
 static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 {
+	struct nfsd4_fs_location *locations = fsloc->locations;
 	int i;
 
+	if (!locations)
+		return;
+
 	for (i = 0; i < fsloc->locations_count; i++) {
-		kfree(fsloc->locations[i].path);
-		kfree(fsloc->locations[i].hosts);
+		kfree(locations[i].path);
+		kfree(locations[i].hosts);
 	}
-	kfree(fsloc->locations);
+
+	kfree(locations);
+	fsloc->locations = NULL;
 }
 
 static void svc_export_put(struct kref *ref)
@@ -388,6 +389,10 @@
 	int len;
 	int migrated, i, err;
 
+	/* more than one fsloc */
+	if (fsloc->locations)
+		return -EINVAL;
+
 	/* listsize */
 	err = get_uint(mesg, &fsloc->locations_count);
 	if (err)
@@ -437,13 +442,18 @@
 
 static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
 {
-	int listsize, err;
 	struct exp_flavor_info *f;
+	u32 listsize;
+	int err;
 
-	err = get_int(mesg, &listsize);
+	/* more than one secinfo */
+	if (exp->ex_nflavors)
+		return -EINVAL;
+
+	err = get_uint(mesg, &listsize);
 	if (err)
 		return err;
-	if (listsize < 0 || listsize > MAX_SECINFO_LIST)
+	if (listsize > MAX_SECINFO_LIST)
 		return -EINVAL;
 
 	for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
@@ -474,6 +484,27 @@
 secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
 #endif
 
+static inline int
+uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+{
+	int len;
+
+	/* more than one uuid */
+	if (*puuid)
+		return -EINVAL;
+
+	/* expect a 16 byte uuid encoded as \xXXXX... */
+	len = qword_get(mesg, buf, PAGE_SIZE);
+	if (len != EX_UUID_LEN)
+		return -EINVAL;
+
+	*puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL);
+	if (*puuid == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client path expiry [flags anonuid anongid fsid] */
@@ -552,18 +583,9 @@
 		while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
 			if (strcmp(buf, "fsloc") == 0)
 				err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
-			else if (strcmp(buf, "uuid") == 0) {
-				/* expect a 16 byte uuid encoded as \xXXXX... */
-				len = qword_get(&mesg, buf, PAGE_SIZE);
-				if (len != 16)
-					err  = -EINVAL;
-				else {
-					exp.ex_uuid =
-						kmemdup(buf, 16, GFP_KERNEL);
-					if (exp.ex_uuid == NULL)
-						err = -ENOMEM;
-				}
-			} else if (strcmp(buf, "secinfo") == 0)
+			else if (strcmp(buf, "uuid") == 0)
+				err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+			else if (strcmp(buf, "secinfo") == 0)
 				err = secinfo_parse(&mesg, buf, &exp);
 			else
 				/* quietly ignore unknown words and anything
@@ -649,7 +671,7 @@
 		if (exp->ex_uuid) {
 			int i;
 			seq_puts(m, ",uuid=");
-			for (i=0; i<16; i++) {
+			for (i = 0; i < EX_UUID_LEN; i++) {
 				if ((i&3) == 0 && i)
 					seq_putc(m, ':');
 				seq_printf(m, "%02x", exp->ex_uuid[i]);
@@ -771,7 +793,7 @@
 
 
 static struct svc_expkey *
-exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
+exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
 	     u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_expkey key, *ek;
@@ -793,9 +815,9 @@
 	return ek;
 }
 
-
-static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
-				   const struct path *path, struct cache_req *reqp)
+static struct svc_export *
+exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
+		const struct path *path, struct cache_req *reqp)
 {
 	struct svc_export *exp, key;
 	int err;
@@ -819,11 +841,11 @@
 /*
  * Find the export entry for a given dentry.
  */
-static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
-				     struct path *path)
+static struct svc_export *
+exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path)
 {
 	struct dentry *saved = dget(path->dentry);
-	svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
+	struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
 
 	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
 		struct dentry *parent = dget_parent(path->dentry);
@@ -844,7 +866,7 @@
  * since its harder to fool a kernel module than a user space program.
  */
 int
-exp_rootfh(struct net *net, svc_client *clp, char *name,
+exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
 	   struct knfsd_fh *f, int maxsize)
 {
 	struct svc_export	*exp;

diff --git a/include/linux/nfsd/export.h b/fs/nfsd/export.h
similarity index 93%
rename from include/linux/nfsd/export.h
rename to fs/nfsd/export.h
index 7898c99..cfeea85 100644
--- a/include/linux/nfsd/export.h
+++ b/fs/nfsd/export.h

@@ -1,17 +1,16 @@
 /*
- * include/linux/nfsd/export.h
- * 
- * Public declarations for NFS exports. The definitions for the
- * syscall interface are in nfsctl.h
- *
  * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
  */
 #ifndef NFSD_EXPORT_H
 #define NFSD_EXPORT_H
 
-# include <linux/nfsd/nfsfh.h>
+#include <linux/sunrpc/cache.h>
 #include <uapi/linux/nfsd/export.h>
 
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
 /*
  * FS Locations
  */
@@ -38,6 +37,7 @@
  * spkm3i, and spkm3p (and using all 8 at once should be rare).
  */
 #define MAX_SECINFO_LIST	8
+#define EX_UUID_LEN		16
 
 struct exp_flavor_info {
 	u32	pseudoflavor;
@@ -54,7 +54,7 @@
 	int			ex_fsid;
 	unsigned char *		ex_uuid; /* 16 byte fsid */
 	struct nfsd4_fs_locations ex_fslocs;
-	int			ex_nflavors;
+	uint32_t		ex_nflavors;
 	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
 	struct cache_detail	*cd;
 };

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index d620e7f..2ed05c3 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c

@@ -97,25 +97,14 @@
 {
 	static u64 val;
 	char read_buf[25];
-	size_t size, ret;
+	size_t size;
 	loff_t pos = *ppos;
 
 	if (!pos)
 		nfsd_inject_get(file_inode(file)->i_private, &val);
 	size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
 
-	if (pos < 0)
-		return -EINVAL;
-	if (pos >= size || !len)
-		return 0;
-	if (len > size - pos)
-		len = size - pos;
-	ret = copy_to_user(buf, read_buf + pos, len);
-	if (ret == len)
-		return -EFAULT;
-	len -= ret;
-	*ppos = pos + len;
-	return len;
+	return simple_read_from_buffer(buf, len, ppos, read_buf, size);
 }
 
 static ssize_t fault_inject_write(struct file *file, const char __user *buf,

diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index 66e58db..a3f3490 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h

@@ -56,7 +56,7 @@
 
 __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
 __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
-__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *);
-__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *);
+__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t);
+__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t);
 
 #endif /* LINUX_NFSD_IDMAP_H */

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 11c1fba..12b023a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c

@@ -182,7 +182,8 @@
 static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *argp)
 {
-	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
 		return 0;
 	argp->mask = ntohl(*p); p++;
 
@@ -197,7 +198,8 @@
 	unsigned int base;
 	int n;
 
-	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
 		return 0;
 	argp->mask = ntohl(*p++);
 	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
@@ -218,7 +220,8 @@
 static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_fhandle *argp)
 {
-	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
@@ -226,7 +229,8 @@
 static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_accessargs *argp)
 {
-	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
 		return 0;
 	argp->access = ntohl(*p++);
 

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index adc5f1b..2a514e2 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c

@@ -128,7 +128,8 @@
 static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *args)
 {
-	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+	p = nfs3svc_decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->mask = ntohl(*p); p++;
 
@@ -143,7 +144,8 @@
 	unsigned int base;
 	int n;
 
-	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+	p = nfs3svc_decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->mask = ntohl(*p++);
 	if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index de6e39e..e6c01e8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c

@@ -278,7 +278,8 @@
 int
 nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
@@ -287,7 +288,8 @@
 nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_sattrargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = decode_sattr3(p, &args->attrs);
 
@@ -315,7 +317,8 @@
 nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_accessargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->access = ntohl(*p++);
 
@@ -330,7 +333,8 @@
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
 
@@ -360,7 +364,8 @@
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
 
@@ -535,7 +540,8 @@
 nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readlinkargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
@@ -558,7 +564,8 @@
 nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->cookie);
 	args->verf   = p; p += 2;
@@ -580,7 +587,8 @@
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->cookie);
 	args->verf     = p; p += 2;
@@ -605,7 +613,8 @@
 nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
 	args->count = ntohl(*p++);

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index f66c66b..d714156 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c

@@ -36,7 +36,6 @@
 
 #include <linux/slab.h>
 #include <linux/nfs_fs.h>
-#include <linux/export.h>
 #include "nfsfh.h"
 #include "nfsd.h"
 #include "acl.h"
@@ -920,20 +919,19 @@
 	return NFS4_ACL_WHO_NAMED;
 }
 
-__be32 nfs4_acl_write_who(int who, __be32 **p, int *len)
+__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
 {
+	__be32 *p;
 	int i;
-	int bytes;
 
 	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
 		if (s2t_map[i].type != who)
 			continue;
-		bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2);
-		if (bytes > *len)
+		p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4);
+		if (!p)
 			return nfserr_resource;
-		*p = xdr_encode_opaque(*p, s2t_map[i].string,
+		p = xdr_encode_opaque(p, s2t_map[i].string,
 					s2t_map[i].stringlen);
-		*len -= bytes;
 		return 0;
 	}
 	WARN_ON_ONCE(1);

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c0dfde6..a0ab0a8 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c

@@ -551,44 +551,43 @@
 	return 0;
 }
 
-static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen)
+static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id)
 {
 	char buf[11];
 	int len;
-	int bytes;
+	__be32 *p;
 
 	len = sprintf(buf, "%u", id);
-	bytes = 4 + (XDR_QUADLEN(len) << 2);
-	if (bytes > *buflen)
+	p = xdr_reserve_space(xdr, len + 4);
+	if (!p)
 		return nfserr_resource;
-	*p = xdr_encode_opaque(*p, buf, len);
-	*buflen -= bytes;
+	p = xdr_encode_opaque(p, buf, len);
 	return 0;
 }
 
-static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
+static __be32 idmap_id_to_name(struct xdr_stream *xdr,
+			       struct svc_rqst *rqstp, int type, u32 id)
 {
 	struct ent *item, key = {
 		.id = id,
 		.type = type,
 	};
+	__be32 *p;
 	int ret;
-	int bytes;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
 	ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
 	if (ret == -ENOENT)
-		return encode_ascii_id(id, p, buflen);
+		return encode_ascii_id(xdr, id);
 	if (ret)
 		return nfserrno(ret);
 	ret = strlen(item->name);
 	WARN_ON_ONCE(ret > IDMAP_NAMESZ);
-	bytes = 4 + (XDR_QUADLEN(ret) << 2);
-	if (bytes > *buflen)
+	p = xdr_reserve_space(xdr, ret + 4);
+	if (!p)
 		return nfserr_resource;
-	*p = xdr_encode_opaque(*p, item->name, ret);
-	*buflen -= bytes;
+	p = xdr_encode_opaque(p, item->name, ret);
 	cache_put(&item->h, nn->idtoname_cache);
 	return 0;
 }
@@ -622,11 +621,12 @@
 	return idmap_name_to_id(rqstp, type, name, namelen, id);
 }
 
-static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
+static __be32 encode_name_from_id(struct xdr_stream *xdr,
+				  struct svc_rqst *rqstp, int type, u32 id)
 {
 	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
-		return encode_ascii_id(id, p, buflen);
-	return idmap_id_to_name(rqstp, type, id, p, buflen);
+		return encode_ascii_id(xdr, id);
+	return idmap_id_to_name(xdr, rqstp, type, id);
 }
 
 __be32
@@ -655,14 +655,16 @@
 	return status;
 }
 
-__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid,  __be32 **p, int *buflen)
+__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+			 kuid_t uid)
 {
 	u32 id = from_kuid(&init_user_ns, uid);
-	return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen);
+	return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
 }
 
-__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen)
+__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+			  kgid_t gid)
 {
 	u32 id = from_kgid(&init_user_ns, gid);
-	return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen);
+	return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
 }

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d543222..6851b00 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -430,12 +430,12 @@
 				goto out;
 			break;
 		case NFS4_OPEN_CLAIM_PREVIOUS:
-			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
 			status = nfs4_check_open_reclaim(&open->op_clientid,
 							 cstate->minorversion,
 							 nn);
 			if (status)
 				goto out;
+			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
 		case NFS4_OPEN_CLAIM_FH:
 		case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
 			status = do_open_fhandle(rqstp, cstate, open);
@@ -445,7 +445,6 @@
 			break;
 		case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
              	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
-			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
 			dprintk("NFSD: unsupported OPEN claim type %d\n",
 				open->op_claim_type);
 			status = nfserr_notsupp;
@@ -786,7 +785,6 @@
 	if (!nfsd4_last_compound_op(rqstp))
 		rqstp->rq_splice_ok = false;
 
-	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
 						 cstate, &read->rd_stateid,
@@ -794,11 +792,8 @@
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
 	}
-	if (read->rd_filp)
-		get_file(read->rd_filp);
 	status = nfs_ok;
 out:
-	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = &cstate->current_fh;
 	return status;
@@ -937,10 +932,8 @@
 	int err;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
-		nfs4_lock_state();
 		status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
 			&setattr->sa_stateid, WR_STATE, NULL);
-		nfs4_unlock_state();
 		if (status) {
 			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
 			return status;
@@ -1006,17 +999,12 @@
 	if (write->wr_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
-	nfs4_lock_state();
 	status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
 					cstate, stateid, WR_STATE, &filp);
 	if (status) {
-		nfs4_unlock_state();
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		return status;
 	}
-	if (filp)
-		get_file(filp);
-	nfs4_unlock_state();
 
 	cnt = write->wr_buflen;
 	write->wr_how_written = write->wr_stable_how;
@@ -1072,10 +1060,10 @@
 		return nfserr_jukebox;
 
 	p = buf;
-	status = nfsd4_encode_fattr(&cstate->current_fh,
+	status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh,
 				    cstate->current_fh.fh_export,
-				    cstate->current_fh.fh_dentry, &p,
-				    count, verify->ve_bmval,
+				    cstate->current_fh.fh_dentry,
+				    verify->ve_bmval,
 				    rqstp, 0);
 	/*
 	 * If nfsd4_encode_fattr() ran out of space, assume that's because
@@ -1182,9 +1170,7 @@
 
 static struct nfsd4_operation nfsd4_ops[];
 
-#ifdef NFSD_DEBUG
 static const char *nfsd4_op_name(unsigned opnum);
-#endif
 
 /*
  * Enforce NFSv4.1 COMPOUND ordering rules:
@@ -1226,6 +1212,8 @@
 
 bool nfsd4_cache_this_op(struct nfsd4_op *op)
 {
+	if (op->opnum == OP_ILLEGAL)
+		return false;
 	return OPDESC(op)->op_flags & OP_CACHEME;
 }
 
@@ -1262,6 +1250,25 @@
 	return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
 }
 
+static void svcxdr_init_encode(struct svc_rqst *rqstp,
+			       struct nfsd4_compoundres *resp)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct xdr_buf *buf = &rqstp->rq_res;
+	struct kvec *head = buf->head;
+
+	xdr->buf = buf;
+	xdr->iov = head;
+	xdr->p   = head->iov_base + head->iov_len;
+	xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
+	/* Tail and page_len should be zero at this point: */
+	buf->len = buf->head[0].iov_len;
+	xdr->scratch.iov_len = 0;
+	xdr->page_ptr = buf->pages - 1;
+	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+		- rqstp->rq_auth_slack;
+}
+
 /*
  * COMPOUND call.
  */
@@ -1275,24 +1282,16 @@
 	struct nfsd4_compound_state *cstate = &resp->cstate;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct svc_fh *save_fh = &cstate->save_fh;
-	int		slack_bytes;
-	u32		plen = 0;
 	__be32		status;
 
-	resp->xbuf = &rqstp->rq_res;
-	resp->p = rqstp->rq_res.head[0].iov_base +
-						rqstp->rq_res.head[0].iov_len;
-	resp->tagp = resp->p;
+	svcxdr_init_encode(rqstp, resp);
+	resp->tagp = resp->xdr.p;
 	/* reserve space for: taglen, tag, and opcnt */
-	resp->p += 2 + XDR_QUADLEN(args->taglen);
-	resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
+	xdr_reserve_space(&resp->xdr, 8 + args->taglen);
 	resp->taglen = args->taglen;
 	resp->tag = args->tag;
-	resp->opcnt = 0;
 	resp->rqstp = rqstp;
 	cstate->minorversion = args->minorversion;
-	cstate->replay_owner = NULL;
-	cstate->session = NULL;
 	fh_init(current_fh, NFS4_FHSIZE);
 	fh_init(save_fh, NFS4_FHSIZE);
 	/*
@@ -1332,19 +1331,6 @@
 			goto encode_op;
 		}
 
-		/* We must be able to encode a successful response to
-		 * this operation, with enough room left over to encode a
-		 * failed response to the next operation.  If we don't
-		 * have enough room, fail with ERR_RESOURCE.
-		 */
-		slack_bytes = (char *)resp->end - (char *)resp->p;
-		if (slack_bytes < COMPOUND_SLACK_SPACE
-				+ COMPOUND_ERR_SLACK_SPACE) {
-			BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE);
-			op->status = nfserr_resource;
-			goto encode_op;
-		}
-
 		opdesc = OPDESC(op);
 
 		if (!current_fh->fh_dentry) {
@@ -1362,9 +1348,13 @@
 
 		/* If op is non-idempotent */
 		if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
-			plen = opdesc->op_rsize_bop(rqstp, op);
 			/*
-			 * If there's still another operation, make sure
+			 * Don't execute this op if we couldn't encode a
+			 * succesful reply:
+			 */
+			u32 plen = opdesc->op_rsize_bop(rqstp, op);
+			/*
+			 * Plus if there's another operation, make sure
 			 * we'll have space to at least encode an error:
 			 */
 			if (resp->opcnt < args->opcnt)
@@ -1399,7 +1389,7 @@
 		}
 		if (op->status == nfserr_replay_me) {
 			op->replay = &cstate->replay_owner->so_replay;
-			nfsd4_encode_replay(resp, op);
+			nfsd4_encode_replay(&resp->xdr, op);
 			status = op->status = op->replay->rp_status;
 		} else {
 			nfsd4_encode_operation(resp, op);
@@ -1438,7 +1428,8 @@
 #define op_encode_change_info_maxsz	(5)
 #define nfs4_fattr_bitmap_maxsz		(4)
 
-#define op_encode_lockowner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We'll fall back on returning no lockowner if run out of space: */
+#define op_encode_lockowner_maxsz	(0)
 #define op_encode_lock_denied_maxsz	(8 + op_encode_lockowner_maxsz)
 
 #define nfs4_owner_maxsz		(1 + XDR_QUADLEN(IDMAP_NAMESZ))
@@ -1470,6 +1461,49 @@
 		+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
 }
 
+/*
+ * Note since this is an idempotent operation we won't insist on failing
+ * the op prematurely if the estimate is too large.  We may turn off splice
+ * reads unnecessarily.
+ */
+static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
+				      struct nfsd4_op *op)
+{
+	u32 *bmap = op->u.getattr.ga_bmval;
+	u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
+	u32 ret = 0;
+
+	if (bmap0 & FATTR4_WORD0_ACL)
+		return svc_max_payload(rqstp);
+	if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
+		return svc_max_payload(rqstp);
+
+	if (bmap1 & FATTR4_WORD1_OWNER) {
+		ret += IDMAP_NAMESZ + 4;
+		bmap1 &= ~FATTR4_WORD1_OWNER;
+	}
+	if (bmap1 & FATTR4_WORD1_OWNER_GROUP) {
+		ret += IDMAP_NAMESZ + 4;
+		bmap1 &= ~FATTR4_WORD1_OWNER_GROUP;
+	}
+	if (bmap0 & FATTR4_WORD0_FILEHANDLE) {
+		ret += NFS4_FHSIZE + 4;
+		bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
+	}
+	if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
+		ret += NFSD4_MAX_SEC_LABEL_LEN + 12;
+		bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
+	}
+	/*
+	 * Largest of remaining attributes are 16 bytes (e.g.,
+	 * supported_attributes)
+	 */
+	ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2));
+	/* bitmask, length */
+	ret += 20;
+	return ret;
+}
+
 static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1500,18 +1534,19 @@
 	if (rlen > maxcount)
 		rlen = maxcount;
 
-	return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
+	return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
+	u32 maxcount = svc_max_payload(rqstp);
 	u32 rlen = op->u.readdir.rd_maxcount;
 
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
+	if (rlen > maxcount)
+		rlen = maxcount;
 
-	return (op_encode_hdr_size + op_encode_verifier_maxsz)
-		 * sizeof(__be32) + rlen;
+	return (op_encode_hdr_size + op_encode_verifier_maxsz +
+		XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1526,6 +1561,12 @@
 		+ op_encode_change_info_maxsz) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+				       struct nfsd4_op *op)
+{
+	return NFS4_MAX_SESSIONID_LEN + 20;
+}
+
 static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@@ -1539,7 +1580,7 @@
 
 static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+	return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1607,6 +1648,7 @@
 	[OP_GETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_getattr,
 		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_rsize_bop = nfsd4_getattr_rsize,
 		.op_name = "OP_GETATTR",
 	},
 	[OP_GETFH] = {
@@ -1676,37 +1718,32 @@
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
-				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
-				| OP_CLEAR_STATEID,
+				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
-				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
-				| OP_CLEAR_STATEID,
+				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
-				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
-				| OP_CLEAR_STATEID,
+				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
-		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_READ",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
-		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_READDIR",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
 	},
@@ -1864,14 +1901,33 @@
 	},
 };
 
-#ifdef NFSD_DEBUG
+int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	struct nfsd4_operation *opdesc;
+	nfsd4op_rsize estimator;
+
+	if (op->opnum == OP_ILLEGAL)
+		return op_encode_hdr_size * sizeof(__be32);
+	opdesc = OPDESC(op);
+	estimator = opdesc->op_rsize_bop;
+	return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+}
+
+void warn_on_nonidempotent_op(struct nfsd4_op *op)
+{
+	if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
+		pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
+			op->opnum, nfsd4_op_name(op->opnum));
+		WARN_ON_ONCE(1);
+	}
+}
+
 static const char *nfsd4_op_name(unsigned opnum)
 {
 	if (opnum < ARRAY_SIZE(nfsd4_ops))
 		return nfsd4_ops[opnum].op_name;
 	return "unknown_operation";
 }
-#endif
 
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9a77a5a..c0d45ce 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c

@@ -81,13 +81,13 @@
  * effort to decrease the scope of the client_mutex, this spinlock may
  * eventually cover more:
  */
-static DEFINE_SPINLOCK(recall_lock);
+static DEFINE_SPINLOCK(state_lock);
 
-static struct kmem_cache *openowner_slab = NULL;
-static struct kmem_cache *lockowner_slab = NULL;
-static struct kmem_cache *file_slab = NULL;
-static struct kmem_cache *stateid_slab = NULL;
-static struct kmem_cache *deleg_slab = NULL;
+static struct kmem_cache *openowner_slab;
+static struct kmem_cache *lockowner_slab;
+static struct kmem_cache *file_slab;
+static struct kmem_cache *stateid_slab;
+static struct kmem_cache *deleg_slab;
 
 void
 nfs4_lock_state(void)
@@ -235,9 +235,9 @@
 static inline void
 put_nfs4_file(struct nfs4_file *fi)
 {
-	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
 		hlist_del(&fi->fi_hash);
-		spin_unlock(&recall_lock);
+		spin_unlock(&state_lock);
 		iput(fi->fi_inode);
 		nfsd4_free_file(fi);
 	}
@@ -375,7 +375,6 @@
 	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
 	if (dp == NULL)
 		return dp;
-	dp->dl_stid.sc_type = NFS4_DELEG_STID;
 	/*
 	 * delegation seqid's are never incremented.  The 4.1 special
 	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -418,6 +417,8 @@
 
 static void nfs4_put_deleg_lease(struct nfs4_file *fp)
 {
+	if (!fp->fi_lease)
+		return;
 	if (atomic_dec_and_test(&fp->fi_delegees)) {
 		vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
 		fp->fi_lease = NULL;
@@ -431,18 +432,30 @@
 	s->sc_type = 0;
 }
 
+static void
+hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
+{
+	lockdep_assert_held(&state_lock);
+
+	dp->dl_stid.sc_type = NFS4_DELEG_STID;
+	list_add(&dp->dl_perfile, &fp->fi_delegations);
+	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+}
+
 /* Called under the state lock. */
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
+	spin_lock(&state_lock);
 	list_del_init(&dp->dl_perclnt);
-	spin_lock(&recall_lock);
 	list_del_init(&dp->dl_perfile);
 	list_del_init(&dp->dl_recall_lru);
-	spin_unlock(&recall_lock);
-	nfs4_put_deleg_lease(dp->dl_file);
-	put_nfs4_file(dp->dl_file);
-	dp->dl_file = NULL;
+	spin_unlock(&state_lock);
+	if (dp->dl_file) {
+		nfs4_put_deleg_lease(dp->dl_file);
+		put_nfs4_file(dp->dl_file);
+		dp->dl_file = NULL;
+	}
 }
 
 
@@ -645,6 +658,12 @@
 	}
 }
 
+static void nfs4_free_lockowner(struct nfs4_lockowner *lo)
+{
+	kfree(lo->lo_owner.so_owner.data);
+	kmem_cache_free(lockowner_slab, lo);
+}
+
 static void release_lockowner(struct nfs4_lockowner *lo)
 {
 	unhash_lockowner(lo);
@@ -699,6 +718,12 @@
 	}
 }
 
+static void nfs4_free_openowner(struct nfs4_openowner *oo)
+{
+	kfree(oo->oo_owner.so_owner.data);
+	kmem_cache_free(openowner_slab, oo);
+}
+
 static void release_openowner(struct nfs4_openowner *oo)
 {
 	unhash_openowner(oo);
@@ -1093,7 +1118,7 @@
 	return clp;
 }
 
-static inline void
+static void
 free_client(struct nfs4_client *clp)
 {
 	struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
@@ -1136,13 +1161,13 @@
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	INIT_LIST_HEAD(&reaplist);
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	while (!list_empty(&clp->cl_delegations)) {
 		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
 		list_del_init(&dp->dl_perclnt);
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 	while (!list_empty(&reaplist)) {
 		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
 		destroy_delegation(dp);
@@ -1544,6 +1569,7 @@
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
+	struct xdr_buf *buf = resp->xdr.buf;
 	struct nfsd4_slot *slot = resp->cstate.slot;
 	unsigned int base;
 
@@ -1557,11 +1583,9 @@
 		slot->sl_datalen = 0;
 		return;
 	}
-	slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
-	base = (char *)resp->cstate.datap -
-					(char *)resp->xbuf->head[0].iov_base;
-	if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
-				    slot->sl_datalen))
+	base = resp->cstate.data_offset;
+	slot->sl_datalen = buf->len - base;
+	if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
 		WARN("%s: sessions DRC could not cache compound\n", __func__);
 	return;
 }
@@ -1602,6 +1626,8 @@
 			 struct nfsd4_sequence *seq)
 {
 	struct nfsd4_slot *slot = resp->cstate.slot;
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
 	__be32 status;
 
 	dprintk("--> %s slot %p\n", __func__, slot);
@@ -1610,14 +1636,16 @@
 	if (status)
 		return status;
 
-	/* The sequence operation has been encoded, cstate->datap set. */
-	memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
+	p = xdr_reserve_space(xdr, slot->sl_datalen);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		return nfserr_serverfault;
+	}
+	xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
+	xdr_commit_encode(xdr);
 
 	resp->opcnt = slot->sl_opcnt;
-	resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
-	status = slot->sl_status;
-
-	return status;
+	return slot->sl_status;
 }
 
 /*
@@ -2189,11 +2217,13 @@
 	       struct nfsd4_sequence *seq)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_session *session;
 	struct nfs4_client *clp;
 	struct nfsd4_slot *slot;
 	struct nfsd4_conn *conn;
 	__be32 status;
+	int buflen;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (resp->opcnt != 1)
@@ -2262,6 +2292,16 @@
 	if (status)
 		goto out_put_session;
 
+	buflen = (seq->cachethis) ?
+			session->se_fchannel.maxresp_cached :
+			session->se_fchannel.maxresp_sz;
+	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
+				    nfserr_rep_too_big;
+	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
+		goto out_put_session;
+	svc_reserve(rqstp, buflen);
+
+	status = nfs_ok;
 	/* Success! bump slot seqid */
 	slot->sl_seqid = seq->seqid;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
@@ -2499,28 +2539,19 @@
 	fp->fi_lease = NULL;
 	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
 	memset(fp->fi_access, 0, sizeof(fp->fi_access));
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
-	spin_unlock(&recall_lock);
-}
-
-static void
-nfsd4_free_slab(struct kmem_cache **slab)
-{
-	if (*slab == NULL)
-		return;
-	kmem_cache_destroy(*slab);
-	*slab = NULL;
+	spin_unlock(&state_lock);
 }
 
 void
 nfsd4_free_slabs(void)
 {
-	nfsd4_free_slab(&openowner_slab);
-	nfsd4_free_slab(&lockowner_slab);
-	nfsd4_free_slab(&file_slab);
-	nfsd4_free_slab(&stateid_slab);
-	nfsd4_free_slab(&deleg_slab);
+	kmem_cache_destroy(openowner_slab);
+	kmem_cache_destroy(lockowner_slab);
+	kmem_cache_destroy(file_slab);
+	kmem_cache_destroy(stateid_slab);
+	kmem_cache_destroy(deleg_slab);
 }
 
 int
@@ -2529,42 +2560,38 @@
 	openowner_slab = kmem_cache_create("nfsd4_openowners",
 			sizeof(struct nfs4_openowner), 0, 0, NULL);
 	if (openowner_slab == NULL)
-		goto out_nomem;
+		goto out;
 	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
 			sizeof(struct nfs4_lockowner), 0, 0, NULL);
 	if (lockowner_slab == NULL)
-		goto out_nomem;
+		goto out_free_openowner_slab;
 	file_slab = kmem_cache_create("nfsd4_files",
 			sizeof(struct nfs4_file), 0, 0, NULL);
 	if (file_slab == NULL)
-		goto out_nomem;
+		goto out_free_lockowner_slab;
 	stateid_slab = kmem_cache_create("nfsd4_stateids",
 			sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
 	if (stateid_slab == NULL)
-		goto out_nomem;
+		goto out_free_file_slab;
 	deleg_slab = kmem_cache_create("nfsd4_delegations",
 			sizeof(struct nfs4_delegation), 0, 0, NULL);
 	if (deleg_slab == NULL)
-		goto out_nomem;
+		goto out_free_stateid_slab;
 	return 0;
-out_nomem:
-	nfsd4_free_slabs();
+
+out_free_stateid_slab:
+	kmem_cache_destroy(stateid_slab);
+out_free_file_slab:
+	kmem_cache_destroy(file_slab);
+out_free_lockowner_slab:
+	kmem_cache_destroy(lockowner_slab);
+out_free_openowner_slab:
+	kmem_cache_destroy(openowner_slab);
+out:
 	dprintk("nfsd4: out of memory while initializing nfsv4\n");
 	return -ENOMEM;
 }
 
-void nfs4_free_openowner(struct nfs4_openowner *oo)
-{
-	kfree(oo->oo_owner.so_owner.data);
-	kmem_cache_free(openowner_slab, oo);
-}
-
-void nfs4_free_lockowner(struct nfs4_lockowner *lo)
-{
-	kfree(lo->lo_owner.so_owner.data);
-	kmem_cache_free(lockowner_slab, lo);
-}
-
 static void init_nfs4_replay(struct nfs4_replay *rp)
 {
 	rp->rp_status = nfserr_serverfault;
@@ -2685,15 +2712,15 @@
 	unsigned int hashval = file_hashval(ino);
 	struct nfs4_file *fp;
 
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
 		if (fp->fi_inode == ino) {
 			get_nfs4_file(fp);
-			spin_unlock(&recall_lock);
+			spin_unlock(&state_lock);
 			return fp;
 		}
 	}
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 	return NULL;
 }
 
@@ -2730,6 +2757,7 @@
 	struct nfs4_client *clp = dp->dl_stid.sc_client;
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
+	lockdep_assert_held(&state_lock);
 	/* We're assuming the state code never drops its reference
 	 * without first removing the lease.  Since we're in this lease
 	 * callback (and since the lease code is serialized by the kernel
@@ -2766,11 +2794,11 @@
 	 */
 	fl->fl_break_time = 0;
 
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	fp->fi_had_conflict = true;
 	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
 		nfsd_break_one_deleg(dp);
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 }
 
 static
@@ -3047,11 +3075,12 @@
 	status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
 	if (status)
 		goto out_free;
-	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
 	fp->fi_lease = fl;
 	fp->fi_deleg_file = get_file(fl->fl_file);
 	atomic_set(&fp->fi_delegees, 1);
-	list_add(&dp->dl_perfile, &fp->fi_delegations);
+	spin_lock(&state_lock);
+	hash_delegation_locked(dp, fp);
+	spin_unlock(&state_lock);
 	return 0;
 out_free:
 	locks_free_lock(fl);
@@ -3060,33 +3089,21 @@
 
 static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
 {
-	int status;
-
 	if (fp->fi_had_conflict)
 		return -EAGAIN;
 	get_nfs4_file(fp);
 	dp->dl_file = fp;
-	if (!fp->fi_lease) {
-		status = nfs4_setlease(dp);
-		if (status)
-			goto out_free;
-		return 0;
-	}
-	spin_lock(&recall_lock);
-	if (fp->fi_had_conflict) {
-		spin_unlock(&recall_lock);
-		status = -EAGAIN;
-		goto out_free;
-	}
+	if (!fp->fi_lease)
+		return nfs4_setlease(dp);
+	spin_lock(&state_lock);
 	atomic_inc(&fp->fi_delegees);
-	list_add(&dp->dl_perfile, &fp->fi_delegations);
-	spin_unlock(&recall_lock);
-	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+	if (fp->fi_had_conflict) {
+		spin_unlock(&state_lock);
+		return -EAGAIN;
+	}
+	hash_delegation_locked(dp, fp);
+	spin_unlock(&state_lock);
 	return 0;
-out_free:
-	put_nfs4_file(fp);
-	dp->dl_file = fp;
-	return status;
 }
 
 static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3173,8 +3190,7 @@
 	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
 	return;
 out_free:
-	remove_stid(&dp->dl_stid);
-	nfs4_put_delegation(dp);
+	destroy_delegation(dp);
 out_no_deleg:
 	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@@ -3391,8 +3407,7 @@
 	struct nfs4_delegation *dp;
 	struct list_head *pos, *next, reaplist;
 	time_t cutoff = get_seconds() - nn->nfsd4_lease;
-	time_t t, clientid_val = nn->nfsd4_lease;
-	time_t u, test_val = nn->nfsd4_lease;
+	time_t t, new_timeo = nn->nfsd4_lease;
 
 	nfs4_lock_state();
 
@@ -3404,8 +3419,7 @@
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
 			t = clp->cl_time - cutoff;
-			if (clientid_val > t)
-				clientid_val = t;
+			new_timeo = min(new_timeo, t);
 			break;
 		}
 		if (mark_client_expired_locked(clp)) {
@@ -3422,39 +3436,35 @@
 			clp->cl_clientid.cl_id);
 		expire_client(clp);
 	}
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
 			continue;
 		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
-			u = dp->dl_time - cutoff;
-			if (test_val > u)
-				test_val = u;
+			t = dp->dl_time - cutoff;
+			new_timeo = min(new_timeo, t);
 			break;
 		}
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		revoke_delegation(dp);
 	}
-	test_val = nn->nfsd4_lease;
 	list_for_each_safe(pos, next, &nn->close_lru) {
 		oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
 		if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
-			u = oo->oo_time - cutoff;
-			if (test_val > u)
-				test_val = u;
+			t = oo->oo_time - cutoff;
+			new_timeo = min(new_timeo, t);
 			break;
 		}
 		release_openowner(oo);
 	}
-	if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
-		clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+	new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
 	nfs4_unlock_state();
-	return clientid_val;
+	return new_timeo;
 }
 
 static struct workqueue_struct *laundry_wq;
@@ -3654,6 +3664,7 @@
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct file *file = NULL;
 	__be32 status;
 
 	if (filpp)
@@ -3665,10 +3676,12 @@
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
 		return check_special_stateids(net, current_fh, stateid, flags);
 
+	nfs4_lock_state();
+
 	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
 				      &s, cstate->minorversion, nn);
 	if (status)
-		return status;
+		goto out;
 	status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
 	if (status)
 		goto out;
@@ -3679,8 +3692,8 @@
 		if (status)
 			goto out;
 		if (filpp) {
-			*filpp = dp->dl_file->fi_deleg_file;
-			if (!*filpp) {
+			file = dp->dl_file->fi_deleg_file;
+			if (!file) {
 				WARN_ON_ONCE(1);
 				status = nfserr_serverfault;
 				goto out;
@@ -3701,16 +3714,20 @@
 			goto out;
 		if (filpp) {
 			if (flags & RD_STATE)
-				*filpp = find_readable_file(stp->st_file);
+				file = find_readable_file(stp->st_file);
 			else
-				*filpp = find_writeable_file(stp->st_file);
+				file = find_writeable_file(stp->st_file);
 		}
 		break;
 	default:
-		return nfserr_bad_stateid;
+		status = nfserr_bad_stateid;
+		goto out;
 	}
 	status = nfs_ok;
+	if (file)
+		*filpp = get_file(file);
 out:
+	nfs4_unlock_state();
 	return status;
 }
 
@@ -3726,7 +3743,7 @@
 	 * correspondance, and we have to delete the lockowner when we
 	 * delete the lock stateid:
 	 */
-	unhash_lockowner(lo);
+	release_lockowner(lo);
 	return nfs_ok;
 }
 
@@ -4896,6 +4913,7 @@
 	struct nfs4_delegation *dp, *next;
 	u64 count = 0;
 
+	lockdep_assert_held(&state_lock);
 	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
 		if (victims)
 			list_move(&dp->dl_recall_lru, victims);
@@ -4911,9 +4929,9 @@
 	LIST_HEAD(victims);
 	u64 count;
 
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	count = nfsd_find_all_delegations(clp, max, &victims);
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 
 	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
 		revoke_delegation(dp);
@@ -4927,11 +4945,11 @@
 	LIST_HEAD(victims);
 	u64 count;
 
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	count = nfsd_find_all_delegations(clp, max, &victims);
 	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
 		nfsd_break_one_deleg(dp);
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 
 	return count;
 }
@@ -4940,9 +4958,9 @@
 {
 	u64 count = 0;
 
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	count = nfsd_find_all_delegations(clp, max, NULL);
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 
 	nfsd_print_count(clp, count, "delegations");
 	return count;
@@ -4983,13 +5001,6 @@
 
 #endif /* CONFIG_NFSD_FAULT_INJECTION */
 
-/* initialization to perform at module load time: */
-
-void
-nfs4_state_init(void)
-{
-}
-
 /*
  * Since the lifetime of a delegation isn't limited to that of an open, a
  * client may quite reasonably hang on to a delegation as long as it has
@@ -5160,12 +5171,12 @@
 
 	nfs4_lock_state();
 	INIT_LIST_HEAD(&reaplist);
-	spin_lock(&recall_lock);
+	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
-	spin_unlock(&recall_lock);
+	spin_unlock(&state_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		destroy_delegation(dp);

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 18881f3..2d305a1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c

@@ -98,11 +98,6 @@
 	status = nfserr_bad_xdr;		\
 	goto out
 
-#define READ32(x)         (x) = ntohl(*p++)
-#define READ64(x)         do {			\
-	(x) = (u64)ntohl(*p++) << 32;		\
-	(x) |= ntohl(*p++);			\
-} while (0)
 #define READMEM(x,nbytes) do {			\
 	x = (char *)p;				\
 	p += XDR_QUADLEN(nbytes);		\
@@ -248,17 +243,17 @@
 	bmval[2] = 0;
 
 	READ_BUF(4);
-	READ32(bmlen);
+	bmlen = be32_to_cpup(p++);
 	if (bmlen > 1000)
 		goto xdr_error;
 
 	READ_BUF(bmlen << 2);
 	if (bmlen > 0)
-		READ32(bmval[0]);
+		bmval[0] = be32_to_cpup(p++);
 	if (bmlen > 1)
-		READ32(bmval[1]);
+		bmval[1] = be32_to_cpup(p++);
 	if (bmlen > 2)
-		READ32(bmval[2]);
+		bmval[2] = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -270,6 +265,7 @@
 {
 	int expected_len, len = 0;
 	u32 dummy32;
+	u64 sec;
 	char *buf;
 
 	DECODE_HEAD;
@@ -278,12 +274,12 @@
 		return status;
 
 	READ_BUF(4);
-	READ32(expected_len);
+	expected_len = be32_to_cpup(p++);
 
 	if (bmval[0] & FATTR4_WORD0_SIZE) {
 		READ_BUF(8);
 		len += 8;
-		READ64(iattr->ia_size);
+		p = xdr_decode_hyper(p, &iattr->ia_size);
 		iattr->ia_valid |= ATTR_SIZE;
 	}
 	if (bmval[0] & FATTR4_WORD0_ACL) {
@@ -291,7 +287,7 @@
 		struct nfs4_ace *ace;
 
 		READ_BUF(4); len += 4;
-		READ32(nace);
+		nace = be32_to_cpup(p++);
 
 		if (nace > NFS4_ACL_MAX)
 			return nfserr_fbig;
@@ -305,10 +301,10 @@
 		(*acl)->naces = nace;
 		for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
 			READ_BUF(16); len += 16;
-			READ32(ace->type);
-			READ32(ace->flag);
-			READ32(ace->access_mask);
-			READ32(dummy32);
+			ace->type = be32_to_cpup(p++);
+			ace->flag = be32_to_cpup(p++);
+			ace->access_mask = be32_to_cpup(p++);
+			dummy32 = be32_to_cpup(p++);
 			READ_BUF(dummy32);
 			len += XDR_QUADLEN(dummy32) << 2;
 			READMEM(buf, dummy32);
@@ -330,14 +326,14 @@
 	if (bmval[1] & FATTR4_WORD1_MODE) {
 		READ_BUF(4);
 		len += 4;
-		READ32(iattr->ia_mode);
+		iattr->ia_mode = be32_to_cpup(p++);
 		iattr->ia_mode &= (S_IFMT | S_IALLUGO);
 		iattr->ia_valid |= ATTR_MODE;
 	}
 	if (bmval[1] & FATTR4_WORD1_OWNER) {
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32);
+		dummy32 = be32_to_cpup(p++);
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
@@ -348,7 +344,7 @@
 	if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32);
+		dummy32 = be32_to_cpup(p++);
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
@@ -359,15 +355,16 @@
 	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32);
+		dummy32 = be32_to_cpup(p++);
 		switch (dummy32) {
 		case NFS4_SET_TO_CLIENT_TIME:
 			/* We require the high 32 bits of 'seconds' to be 0, and we ignore
 			   all 32 bits of 'nseconds'. */
 			READ_BUF(12);
 			len += 12;
-			READ64(iattr->ia_atime.tv_sec);
-			READ32(iattr->ia_atime.tv_nsec);
+			p = xdr_decode_hyper(p, &sec);
+			iattr->ia_atime.tv_sec = (time_t)sec;
+			iattr->ia_atime.tv_nsec = be32_to_cpup(p++);
 			if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
 				return nfserr_inval;
 			iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -382,15 +379,16 @@
 	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32);
+		dummy32 = be32_to_cpup(p++);
 		switch (dummy32) {
 		case NFS4_SET_TO_CLIENT_TIME:
 			/* We require the high 32 bits of 'seconds' to be 0, and we ignore
 			   all 32 bits of 'nseconds'. */
 			READ_BUF(12);
 			len += 12;
-			READ64(iattr->ia_mtime.tv_sec);
-			READ32(iattr->ia_mtime.tv_nsec);
+			p = xdr_decode_hyper(p, &sec);
+			iattr->ia_mtime.tv_sec = sec;
+			iattr->ia_mtime.tv_nsec = be32_to_cpup(p++);
 			if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
 				return nfserr_inval;
 			iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -408,13 +406,13 @@
 	if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32); /* lfs: we don't use it */
+		dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32); /* pi: we don't use it either */
+		dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */
 		READ_BUF(4);
 		len += 4;
-		READ32(dummy32);
+		dummy32 = be32_to_cpup(p++);
 		READ_BUF(dummy32);
 		if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)
 			return nfserr_badlabel;
@@ -445,7 +443,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(sizeof(stateid_t));
-	READ32(sid->si_generation);
+	sid->si_generation = be32_to_cpup(p++);
 	COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
 
 	DECODE_TAIL;
@@ -457,7 +455,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(access->ac_req_access);
+	access->ac_req_access = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -472,7 +470,7 @@
 
 	/* callback_sec_params4 */
 	READ_BUF(4);
-	READ32(nr_secflavs);
+	nr_secflavs = be32_to_cpup(p++);
 	if (nr_secflavs)
 		cbs->flavor = (u32)(-1);
 	else
@@ -480,7 +478,7 @@
 		cbs->flavor = 0;
 	for (i = 0; i < nr_secflavs; ++i) {
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		switch (dummy) {
 		case RPC_AUTH_NULL:
 			/* Nothing to read */
@@ -490,21 +488,21 @@
 		case RPC_AUTH_UNIX:
 			READ_BUF(8);
 			/* stamp */
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 
 			/* machine name */
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy);
 			SAVEMEM(machine_name, dummy);
 
 			/* uid, gid */
 			READ_BUF(8);
-			READ32(uid);
-			READ32(gid);
+			uid = be32_to_cpup(p++);
+			gid = be32_to_cpup(p++);
 
 			/* more gids */
 			READ_BUF(4);
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy * 4);
 			if (cbs->flavor == (u32)(-1)) {
 				kuid_t kuid = make_kuid(&init_user_ns, uid);
@@ -524,14 +522,14 @@
 				"not supported!\n");
 			READ_BUF(8);
 			/* gcbp_service */
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			/* gcbp_handle_from_server */
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy);
 			p += XDR_QUADLEN(dummy);
 			/* gcbp_handle_from_client */
 			READ_BUF(4);
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy);
 			break;
 		default:
@@ -547,7 +545,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(bc->bc_cb_program);
+	bc->bc_cb_program = be32_to_cpup(p++);
 	nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
 
 	DECODE_TAIL;
@@ -559,7 +557,7 @@
 
 	READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
 	COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
-	READ32(bcts->dir);
+	bcts->dir = be32_to_cpup(p++);
 	/* XXX: skipping ctsa_use_conn_in_rdma_mode.  Perhaps Tom Tucker
 	 * could help us figure out we should be using it. */
 	DECODE_TAIL;
@@ -571,7 +569,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(close->cl_seqid);
+	close->cl_seqid = be32_to_cpup(p++);
 	return nfsd4_decode_stateid(argp, &close->cl_stateid);
 
 	DECODE_TAIL;
@@ -584,8 +582,8 @@
 	DECODE_HEAD;
 
 	READ_BUF(12);
-	READ64(commit->co_offset);
-	READ32(commit->co_count);
+	p = xdr_decode_hyper(p, &commit->co_offset);
+	commit->co_count = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -596,19 +594,19 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(create->cr_type);
+	create->cr_type = be32_to_cpup(p++);
 	switch (create->cr_type) {
 	case NF4LNK:
 		READ_BUF(4);
-		READ32(create->cr_linklen);
+		create->cr_linklen = be32_to_cpup(p++);
 		READ_BUF(create->cr_linklen);
 		SAVEMEM(create->cr_linkname, create->cr_linklen);
 		break;
 	case NF4BLK:
 	case NF4CHR:
 		READ_BUF(8);
-		READ32(create->cr_specdata1);
-		READ32(create->cr_specdata2);
+		create->cr_specdata1 = be32_to_cpup(p++);
+		create->cr_specdata2 = be32_to_cpup(p++);
 		break;
 	case NF4SOCK:
 	case NF4FIFO:
@@ -618,7 +616,7 @@
 	}
 
 	READ_BUF(4);
-	READ32(create->cr_namelen);
+	create->cr_namelen = be32_to_cpup(p++);
 	READ_BUF(create->cr_namelen);
 	SAVEMEM(create->cr_name, create->cr_namelen);
 	if ((status = check_filename(create->cr_name, create->cr_namelen)))
@@ -650,7 +648,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(link->li_namelen);
+	link->li_namelen = be32_to_cpup(p++);
 	READ_BUF(link->li_namelen);
 	SAVEMEM(link->li_name, link->li_namelen);
 	if ((status = check_filename(link->li_name, link->li_namelen)))
@@ -668,24 +666,24 @@
 	* type, reclaim(boolean), offset, length, new_lock_owner(boolean)
 	*/
 	READ_BUF(28);
-	READ32(lock->lk_type);
+	lock->lk_type = be32_to_cpup(p++);
 	if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
 		goto xdr_error;
-	READ32(lock->lk_reclaim);
-	READ64(lock->lk_offset);
-	READ64(lock->lk_length);
-	READ32(lock->lk_is_new);
+	lock->lk_reclaim = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &lock->lk_offset);
+	p = xdr_decode_hyper(p, &lock->lk_length);
+	lock->lk_is_new = be32_to_cpup(p++);
 
 	if (lock->lk_is_new) {
 		READ_BUF(4);
-		READ32(lock->lk_new_open_seqid);
+		lock->lk_new_open_seqid = be32_to_cpup(p++);
 		status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
 		if (status)
 			return status;
 		READ_BUF(8 + sizeof(clientid_t));
-		READ32(lock->lk_new_lock_seqid);
+		lock->lk_new_lock_seqid = be32_to_cpup(p++);
 		COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
-		READ32(lock->lk_new_owner.len);
+		lock->lk_new_owner.len = be32_to_cpup(p++);
 		READ_BUF(lock->lk_new_owner.len);
 		READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
 	} else {
@@ -693,7 +691,7 @@
 		if (status)
 			return status;
 		READ_BUF(4);
-		READ32(lock->lk_old_lock_seqid);
+		lock->lk_old_lock_seqid = be32_to_cpup(p++);
 	}
 
 	DECODE_TAIL;
@@ -705,13 +703,13 @@
 	DECODE_HEAD;
 		        
 	READ_BUF(32);
-	READ32(lockt->lt_type);
+	lockt->lt_type = be32_to_cpup(p++);
 	if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
 		goto xdr_error;
-	READ64(lockt->lt_offset);
-	READ64(lockt->lt_length);
+	p = xdr_decode_hyper(p, &lockt->lt_offset);
+	p = xdr_decode_hyper(p, &lockt->lt_length);
 	COPYMEM(&lockt->lt_clientid, 8);
-	READ32(lockt->lt_owner.len);
+	lockt->lt_owner.len = be32_to_cpup(p++);
 	READ_BUF(lockt->lt_owner.len);
 	READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
 
@@ -724,16 +722,16 @@
 	DECODE_HEAD;
 
 	READ_BUF(8);
-	READ32(locku->lu_type);
+	locku->lu_type = be32_to_cpup(p++);
 	if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
 		goto xdr_error;
-	READ32(locku->lu_seqid);
+	locku->lu_seqid = be32_to_cpup(p++);
 	status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
 	if (status)
 		return status;
 	READ_BUF(16);
-	READ64(locku->lu_offset);
-	READ64(locku->lu_length);
+	p = xdr_decode_hyper(p, &locku->lu_offset);
+	p = xdr_decode_hyper(p, &locku->lu_length);
 
 	DECODE_TAIL;
 }
@@ -744,7 +742,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(lookup->lo_len);
+	lookup->lo_len = be32_to_cpup(p++);
 	READ_BUF(lookup->lo_len);
 	SAVEMEM(lookup->lo_name, lookup->lo_len);
 	if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
@@ -759,7 +757,7 @@
 	u32 w;
 
 	READ_BUF(4);
-	READ32(w);
+	w = be32_to_cpup(p++);
 	*share_access = w & NFS4_SHARE_ACCESS_MASK;
 	*deleg_want = w & NFS4_SHARE_WANT_MASK;
 	if (deleg_when)
@@ -811,7 +809,7 @@
 	__be32 *p;
 
 	READ_BUF(4);
-	READ32(*x);
+	*x = be32_to_cpup(p++);
 	/* Note: unlinke access bits, deny bits may be zero. */
 	if (*x & ~NFS4_SHARE_DENY_BOTH)
 		return nfserr_bad_xdr;
@@ -825,7 +823,7 @@
 	__be32 *p;
 
 	READ_BUF(4);
-	READ32(o->len);
+	o->len = be32_to_cpup(p++);
 
 	if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
 		return nfserr_bad_xdr;
@@ -850,7 +848,7 @@
 	open->op_xdr_error = 0;
 	/* seqid, share_access, share_deny, clientid, ownerlen */
 	READ_BUF(4);
-	READ32(open->op_seqid);
+	open->op_seqid = be32_to_cpup(p++);
 	/* decode, yet ignore deleg_when until supported */
 	status = nfsd4_decode_share_access(argp, &open->op_share_access,
 					   &open->op_deleg_want, &dummy);
@@ -865,13 +863,13 @@
 	if (status)
 		goto xdr_error;
 	READ_BUF(4);
-	READ32(open->op_create);
+	open->op_create = be32_to_cpup(p++);
 	switch (open->op_create) {
 	case NFS4_OPEN_NOCREATE:
 		break;
 	case NFS4_OPEN_CREATE:
 		READ_BUF(4);
-		READ32(open->op_createmode);
+		open->op_createmode = be32_to_cpup(p++);
 		switch (open->op_createmode) {
 		case NFS4_CREATE_UNCHECKED:
 		case NFS4_CREATE_GUARDED:
@@ -904,12 +902,12 @@
 
 	/* open_claim */
 	READ_BUF(4);
-	READ32(open->op_claim_type);
+	open->op_claim_type = be32_to_cpup(p++);
 	switch (open->op_claim_type) {
 	case NFS4_OPEN_CLAIM_NULL:
 	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
 		READ_BUF(4);
-		READ32(open->op_fname.len);
+		open->op_fname.len = be32_to_cpup(p++);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
 		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
@@ -917,14 +915,14 @@
 		break;
 	case NFS4_OPEN_CLAIM_PREVIOUS:
 		READ_BUF(4);
-		READ32(open->op_delegate_type);
+		open->op_delegate_type = be32_to_cpup(p++);
 		break;
 	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
 		status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
 		if (status)
 			return status;
 		READ_BUF(4);
-		READ32(open->op_fname.len);
+		open->op_fname.len = be32_to_cpup(p++);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
 		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
@@ -962,7 +960,7 @@
 	if (status)
 		return status;
 	READ_BUF(4);
-	READ32(open_conf->oc_seqid);
+	open_conf->oc_seqid = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -976,7 +974,7 @@
 	if (status)
 		return status;
 	READ_BUF(4);
-	READ32(open_down->od_seqid);
+	open_down->od_seqid = be32_to_cpup(p++);
 	status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
 					   &open_down->od_deleg_want, NULL);
 	if (status)
@@ -993,7 +991,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(putfh->pf_fhlen);
+	putfh->pf_fhlen = be32_to_cpup(p++);
 	if (putfh->pf_fhlen > NFS4_FHSIZE)
 		goto xdr_error;
 	READ_BUF(putfh->pf_fhlen);
@@ -1019,8 +1017,8 @@
 	if (status)
 		return status;
 	READ_BUF(12);
-	READ64(read->rd_offset);
-	READ32(read->rd_length);
+	p = xdr_decode_hyper(p, &read->rd_offset);
+	read->rd_length = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -1031,10 +1029,10 @@
 	DECODE_HEAD;
 
 	READ_BUF(24);
-	READ64(readdir->rd_cookie);
+	p = xdr_decode_hyper(p, &readdir->rd_cookie);
 	COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
-	READ32(readdir->rd_dircount);    /* just in case you needed a useless field... */
-	READ32(readdir->rd_maxcount);
+	readdir->rd_dircount = be32_to_cpup(p++);
+	readdir->rd_maxcount = be32_to_cpup(p++);
 	if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
 		goto out;
 
@@ -1047,7 +1045,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(remove->rm_namelen);
+	remove->rm_namelen = be32_to_cpup(p++);
 	READ_BUF(remove->rm_namelen);
 	SAVEMEM(remove->rm_name, remove->rm_namelen);
 	if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
@@ -1062,10 +1060,10 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(rename->rn_snamelen);
+	rename->rn_snamelen = be32_to_cpup(p++);
 	READ_BUF(rename->rn_snamelen + 4);
 	SAVEMEM(rename->rn_sname, rename->rn_snamelen);
-	READ32(rename->rn_tnamelen);
+	rename->rn_tnamelen = be32_to_cpup(p++);
 	READ_BUF(rename->rn_tnamelen);
 	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
 	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
@@ -1097,7 +1095,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(secinfo->si_namelen);
+	secinfo->si_namelen = be32_to_cpup(p++);
 	READ_BUF(secinfo->si_namelen);
 	SAVEMEM(secinfo->si_name, secinfo->si_namelen);
 	status = check_filename(secinfo->si_name, secinfo->si_namelen);
@@ -1113,7 +1111,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(sin->sin_style);
+	sin->sin_style = be32_to_cpup(p++);
 	DECODE_TAIL;
 }
 
@@ -1144,16 +1142,16 @@
 	if (status)
 		return nfserr_bad_xdr;
 	READ_BUF(8);
-	READ32(setclientid->se_callback_prog);
-	READ32(setclientid->se_callback_netid_len);
+	setclientid->se_callback_prog = be32_to_cpup(p++);
+	setclientid->se_callback_netid_len = be32_to_cpup(p++);
 
 	READ_BUF(setclientid->se_callback_netid_len + 4);
 	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
-	READ32(setclientid->se_callback_addr_len);
+	setclientid->se_callback_addr_len = be32_to_cpup(p++);
 
 	READ_BUF(setclientid->se_callback_addr_len + 4);
 	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
-	READ32(setclientid->se_callback_ident);
+	setclientid->se_callback_ident = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -1186,7 +1184,7 @@
 	 * nfsd4_proc_verify */
 
 	READ_BUF(4);
-	READ32(verify->ve_attrlen);
+	verify->ve_attrlen = be32_to_cpup(p++);
 	READ_BUF(verify->ve_attrlen);
 	SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
 
@@ -1204,11 +1202,11 @@
 	if (status)
 		return status;
 	READ_BUF(16);
-	READ64(write->wr_offset);
-	READ32(write->wr_stable_how);
+	p = xdr_decode_hyper(p, &write->wr_offset);
+	write->wr_stable_how = be32_to_cpup(p++);
 	if (write->wr_stable_how > 2)
 		goto xdr_error;
-	READ32(write->wr_buflen);
+	write->wr_buflen = be32_to_cpup(p++);
 
 	/* Sorry .. no magic macros for this.. *
 	 * READ_BUF(write->wr_buflen);
@@ -1254,7 +1252,7 @@
 
 	READ_BUF(12);
 	COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
-	READ32(rlockowner->rl_owner.len);
+	rlockowner->rl_owner.len = be32_to_cpup(p++);
 	READ_BUF(rlockowner->rl_owner.len);
 	READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
 
@@ -1278,63 +1276,63 @@
 		return nfserr_bad_xdr;
 
 	READ_BUF(4);
-	READ32(exid->flags);
+	exid->flags = be32_to_cpup(p++);
 
 	/* Ignore state_protect4_a */
 	READ_BUF(4);
-	READ32(exid->spa_how);
+	exid->spa_how = be32_to_cpup(p++);
 	switch (exid->spa_how) {
 	case SP4_NONE:
 		break;
 	case SP4_MACH_CRED:
 		/* spo_must_enforce */
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy * 4);
 		p += dummy;
 
 		/* spo_must_allow */
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy * 4);
 		p += dummy;
 		break;
 	case SP4_SSV:
 		/* ssp_ops */
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy * 4);
 		p += dummy;
 
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy * 4);
 		p += dummy;
 
 		/* ssp_hash_algs<> */
 		READ_BUF(4);
-		READ32(tmp);
+		tmp = be32_to_cpup(p++);
 		while (tmp--) {
 			READ_BUF(4);
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy);
 			p += XDR_QUADLEN(dummy);
 		}
 
 		/* ssp_encr_algs<> */
 		READ_BUF(4);
-		READ32(tmp);
+		tmp = be32_to_cpup(p++);
 		while (tmp--) {
 			READ_BUF(4);
-			READ32(dummy);
+			dummy = be32_to_cpup(p++);
 			READ_BUF(dummy);
 			p += XDR_QUADLEN(dummy);
 		}
 
 		/* ssp_window and ssp_num_gss_handles */
 		READ_BUF(8);
-		READ32(dummy);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
+		dummy = be32_to_cpup(p++);
 		break;
 	default:
 		goto xdr_error;
@@ -1342,7 +1340,7 @@
 
 	/* Ignore Implementation ID */
 	READ_BUF(4);    /* nfs_impl_id4 array length */
-	READ32(dummy);
+	dummy = be32_to_cpup(p++);
 
 	if (dummy > 1)
 		goto xdr_error;
@@ -1350,13 +1348,13 @@
 	if (dummy == 1) {
 		/* nii_domain */
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy);
 		p += XDR_QUADLEN(dummy);
 
 		/* nii_name */
 		READ_BUF(4);
-		READ32(dummy);
+		dummy = be32_to_cpup(p++);
 		READ_BUF(dummy);
 		p += XDR_QUADLEN(dummy);
 
@@ -1376,21 +1374,21 @@
 
 	READ_BUF(16);
 	COPYMEM(&sess->clientid, 8);
-	READ32(sess->seqid);
-	READ32(sess->flags);
+	sess->seqid = be32_to_cpup(p++);
+	sess->flags = be32_to_cpup(p++);
 
 	/* Fore channel attrs */
 	READ_BUF(28);
-	READ32(dummy); /* headerpadsz is always 0 */
-	READ32(sess->fore_channel.maxreq_sz);
-	READ32(sess->fore_channel.maxresp_sz);
-	READ32(sess->fore_channel.maxresp_cached);
-	READ32(sess->fore_channel.maxops);
-	READ32(sess->fore_channel.maxreqs);
-	READ32(sess->fore_channel.nr_rdma_attrs);
+	dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+	sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
+	sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
+	sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
+	sess->fore_channel.maxops = be32_to_cpup(p++);
+	sess->fore_channel.maxreqs = be32_to_cpup(p++);
+	sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);
 	if (sess->fore_channel.nr_rdma_attrs == 1) {
 		READ_BUF(4);
-		READ32(sess->fore_channel.rdma_attrs);
+		sess->fore_channel.rdma_attrs = be32_to_cpup(p++);
 	} else if (sess->fore_channel.nr_rdma_attrs > 1) {
 		dprintk("Too many fore channel attr bitmaps!\n");
 		goto xdr_error;
@@ -1398,23 +1396,23 @@
 
 	/* Back channel attrs */
 	READ_BUF(28);
-	READ32(dummy); /* headerpadsz is always 0 */
-	READ32(sess->back_channel.maxreq_sz);
-	READ32(sess->back_channel.maxresp_sz);
-	READ32(sess->back_channel.maxresp_cached);
-	READ32(sess->back_channel.maxops);
-	READ32(sess->back_channel.maxreqs);
-	READ32(sess->back_channel.nr_rdma_attrs);
+	dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+	sess->back_channel.maxreq_sz = be32_to_cpup(p++);
+	sess->back_channel.maxresp_sz = be32_to_cpup(p++);
+	sess->back_channel.maxresp_cached = be32_to_cpup(p++);
+	sess->back_channel.maxops = be32_to_cpup(p++);
+	sess->back_channel.maxreqs = be32_to_cpup(p++);
+	sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);
 	if (sess->back_channel.nr_rdma_attrs == 1) {
 		READ_BUF(4);
-		READ32(sess->back_channel.rdma_attrs);
+		sess->back_channel.rdma_attrs = be32_to_cpup(p++);
 	} else if (sess->back_channel.nr_rdma_attrs > 1) {
 		dprintk("Too many back channel attr bitmaps!\n");
 		goto xdr_error;
 	}
 
 	READ_BUF(4);
-	READ32(sess->callback_prog);
+	sess->callback_prog = be32_to_cpup(p++);
 	nfsd4_decode_cb_sec(argp, &sess->cb_sec);
 	DECODE_TAIL;
 }
@@ -1437,7 +1435,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(sizeof(stateid_t));
-	READ32(free_stateid->fr_stateid.si_generation);
+	free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);
 	COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
 
 	DECODE_TAIL;
@@ -1451,10 +1449,10 @@
 
 	READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
 	COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
-	READ32(seq->seqid);
-	READ32(seq->slotid);
-	READ32(seq->maxslots);
-	READ32(seq->cachethis);
+	seq->seqid = be32_to_cpup(p++);
+	seq->slotid = be32_to_cpup(p++);
+	seq->maxslots = be32_to_cpup(p++);
+	seq->cachethis = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -1511,7 +1509,7 @@
 	DECODE_HEAD;
 
 	READ_BUF(4);
-	READ32(rc->rca_one_fs);
+	rc->rca_one_fs = be32_to_cpup(p++);
 
 	DECODE_TAIL;
 }
@@ -1605,47 +1603,25 @@
 	return true;
 }
 
-/*
- * Return a rough estimate of the maximum possible reply size.  Note the
- * estimate includes rpc headers so is meant to be passed to
- * svc_reserve, not svc_reserve_auth.
- *
- * Also note the current compound encoding permits only one operation to
- * use pages beyond the first one, so the maximum possible length is the
- * maximum over these values, not the sum.
- */
-static int nfsd4_max_reply(u32 opnum)
-{
-	switch (opnum) {
-	case OP_READLINK:
-	case OP_READDIR:
-		/*
-		 * Both of these ops take a single page for data and put
-		 * the head and tail in another page:
-		 */
-		return 2 * PAGE_SIZE;
-	case OP_READ:
-		return INT_MAX;
-	default:
-		return PAGE_SIZE;
-	}
-}
-
 static __be32
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
 	DECODE_HEAD;
 	struct nfsd4_op *op;
 	bool cachethis = false;
-	int max_reply = PAGE_SIZE;
+	int auth_slack= argp->rqstp->rq_auth_slack;
+	int max_reply = auth_slack + 8; /* opcnt, status */
+	int readcount = 0;
+	int readbytes = 0;
 	int i;
 
 	READ_BUF(4);
-	READ32(argp->taglen);
+	argp->taglen = be32_to_cpup(p++);
 	READ_BUF(argp->taglen + 8);
 	SAVEMEM(argp->tag, argp->taglen);
-	READ32(argp->minorversion);
-	READ32(argp->opcnt);
+	argp->minorversion = be32_to_cpup(p++);
+	argp->opcnt = be32_to_cpup(p++);
+	max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
 
 	if (argp->taglen > NFSD4_MAX_TAGLEN)
 		goto xdr_error;
@@ -1669,7 +1645,7 @@
 		op->replay = NULL;
 
 		READ_BUF(4);
-		READ32(op->opnum);
+		op->opnum = be32_to_cpup(p++);
 
 		if (nfsd4_opnum_in_range(argp, op))
 			op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
@@ -1677,97 +1653,82 @@
 			op->opnum = OP_ILLEGAL;
 			op->status = nfserr_op_illegal;
 		}
-
-		if (op->status) {
-			argp->opcnt = i+1;
-			break;
-		}
 		/*
 		 * We'll try to cache the result in the DRC if any one
 		 * op in the compound wants to be cached:
 		 */
 		cachethis |= nfsd4_cache_this_op(op);
 
-		max_reply = max(max_reply, nfsd4_max_reply(op->opnum));
+		if (op->opnum == OP_READ) {
+			readcount++;
+			readbytes += nfsd4_max_reply(argp->rqstp, op);
+		} else
+			max_reply += nfsd4_max_reply(argp->rqstp, op);
+
+		if (op->status) {
+			argp->opcnt = i+1;
+			break;
+		}
 	}
 	/* Sessions make the DRC unnecessary: */
 	if (argp->minorversion)
 		cachethis = false;
-	if (max_reply != INT_MAX)
-		svc_reserve(argp->rqstp, max_reply);
+	svc_reserve(argp->rqstp, max_reply + readbytes);
 	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
+	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
+		argp->rqstp->rq_splice_ok = false;
+
 	DECODE_TAIL;
 }
 
-#define WRITE32(n)               *p++ = htonl(n)
-#define WRITE64(n)               do {				\
-	*p++ = htonl((u32)((n) >> 32));				\
-	*p++ = htonl((u32)(n));					\
-} while (0)
-#define WRITEMEM(ptr,nbytes)     do { if (nbytes > 0) {		\
-	*(p + XDR_QUADLEN(nbytes) -1) = 0;                      \
-	memcpy(p, ptr, nbytes);					\
-	p += XDR_QUADLEN(nbytes);				\
-}} while (0)
-
-static void write32(__be32 **p, u32 n)
-{
-	*(*p)++ = htonl(n);
-}
-
-static void write64(__be32 **p, u64 n)
-{
-	write32(p, (n >> 32));
-	write32(p, (u32)n);
-}
-
-static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
 {
 	if (IS_I_VERSION(inode)) {
-		write64(p, inode->i_version);
+		p = xdr_encode_hyper(p, inode->i_version);
 	} else {
-		write32(p, stat->ctime.tv_sec);
-		write32(p, stat->ctime.tv_nsec);
+		*p++ = cpu_to_be32(stat->ctime.tv_sec);
+		*p++ = cpu_to_be32(stat->ctime.tv_nsec);
 	}
+	return p;
 }
 
-static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
 {
-	write32(p, c->atomic);
+	*p++ = cpu_to_be32(c->atomic);
 	if (c->change_supported) {
-		write64(p, c->before_change);
-		write64(p, c->after_change);
+		p = xdr_encode_hyper(p, c->before_change);
+		p = xdr_encode_hyper(p, c->after_change);
 	} else {
-		write32(p, c->before_ctime_sec);
-		write32(p, c->before_ctime_nsec);
-		write32(p, c->after_ctime_sec);
-		write32(p, c->after_ctime_nsec);
+		*p++ = cpu_to_be32(c->before_ctime_sec);
+		*p++ = cpu_to_be32(c->before_ctime_nsec);
+		*p++ = cpu_to_be32(c->after_ctime_sec);
+		*p++ = cpu_to_be32(c->after_ctime_nsec);
 	}
+	return p;
 }
 
-#define RESERVE_SPACE(nbytes)	do {				\
-	p = resp->p;						\
-	BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end);		\
-} while (0)
-#define ADJUST_ARGS()		resp->p = p
-
 /* Encode as an array of strings the string given with components
  * separated @sep, escaped with esc_enter and esc_exit.
  */
-static __be32 nfsd4_encode_components_esc(char sep, char *components,
-				   __be32 **pp, int *buflen,
-				   char esc_enter, char esc_exit)
+static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
+					  char *components, char esc_enter,
+					  char esc_exit)
 {
-	__be32 *p = *pp;
-	__be32 *countp = p;
+	__be32 *p;
+	__be32 pathlen;
+	int pathlen_offset;
 	int strlen, count=0;
 	char *str, *end, *next;
 
 	dprintk("nfsd4_encode_components(%s)\n", components);
-	if ((*buflen -= 4) < 0)
+
+	pathlen_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		return nfserr_resource;
-	WRITE32(0); /* We will fill this in with @count later */
+	p++; /* We will fill this in with @count later */
+
 	end = str = components;
 	while (*end) {
 		bool found_esc = false;
@@ -1789,59 +1750,57 @@
 
 		strlen = end - str;
 		if (strlen) {
-			if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+			p = xdr_reserve_space(xdr, strlen + 4);
+			if (!p)
 				return nfserr_resource;
-			WRITE32(strlen);
-			WRITEMEM(str, strlen);
+			p = xdr_encode_opaque(p, str, strlen);
 			count++;
 		}
 		else
 			end++;
 		str = end;
 	}
-	*pp = p;
-	p = countp;
-	WRITE32(count);
+	pathlen = htonl(xdr->buf->len - pathlen_offset);
+	write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4);
 	return 0;
 }
 
 /* Encode as an array of strings the string given with components
  * separated @sep.
  */
-static __be32 nfsd4_encode_components(char sep, char *components,
-				   __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep,
+				      char *components)
 {
-	return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0);
+	return nfsd4_encode_components_esc(xdr, sep, components, 0, 0);
 }
 
 /*
  * encode a location element of a fs_locations structure
  */
-static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
-				    __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr,
+					struct nfsd4_fs_location *location)
 {
 	__be32 status;
-	__be32 *p = *pp;
 
-	status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen,
+	status = nfsd4_encode_components_esc(xdr, ':', location->hosts,
 						'[', ']');
 	if (status)
 		return status;
-	status = nfsd4_encode_components('/', location->path, &p, buflen);
+	status = nfsd4_encode_components(xdr, '/', location->path);
 	if (status)
 		return status;
-	*pp = p;
 	return 0;
 }
 
 /*
  * Encode a path in RFC3530 'pathname4' format
  */
-static __be32 nfsd4_encode_path(const struct path *root,
-		const struct path *path, __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
+				const struct path *root,
+				const struct path *path)
 {
 	struct path cur = *path;
-	__be32 *p = *pp;
+	__be32 *p;
 	struct dentry **components = NULL;
 	unsigned int ncomponents = 0;
 	__be32 err = nfserr_jukebox;
@@ -1872,11 +1831,11 @@
 		components[ncomponents++] = cur.dentry;
 		cur.dentry = dget_parent(cur.dentry);
 	}
-
-	*buflen -= 4;
-	if (*buflen < 0)
+	err = nfserr_resource;
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto out_free;
-	WRITE32(ncomponents);
+	*p++ = cpu_to_be32(ncomponents);
 
 	while (ncomponents) {
 		struct dentry *dentry = components[ncomponents - 1];
@@ -1884,20 +1843,18 @@
 
 		spin_lock(&dentry->d_lock);
 		len = dentry->d_name.len;
-		*buflen -= 4 + (XDR_QUADLEN(len) << 2);
-		if (*buflen < 0) {
+		p = xdr_reserve_space(xdr, len + 4);
+		if (!p) {
 			spin_unlock(&dentry->d_lock);
 			goto out_free;
 		}
-		WRITE32(len);
-		WRITEMEM(dentry->d_name.name, len);
+		p = xdr_encode_opaque(p, dentry->d_name.name, len);
 		dprintk("/%s", dentry->d_name.name);
 		spin_unlock(&dentry->d_lock);
 		dput(dentry);
 		ncomponents--;
 	}
 
-	*pp = p;
 	err = 0;
 out_free:
 	dprintk(")\n");
@@ -1908,8 +1865,8 @@
 	return err;
 }
 
-static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
-		const struct path *path, __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr,
+			struct svc_rqst *rqstp, const struct path *path)
 {
 	struct svc_export *exp_ps;
 	__be32 res;
@@ -1917,7 +1874,7 @@
 	exp_ps = rqst_find_fsidzero_export(rqstp);
 	if (IS_ERR(exp_ps))
 		return nfserrno(PTR_ERR(exp_ps));
-	res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen);
+	res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path);
 	exp_put(exp_ps);
 	return res;
 }
@@ -1925,28 +1882,26 @@
 /*
  *  encode a fs_locations structure
  */
-static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
-				     struct svc_export *exp,
-				     __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr,
+			struct svc_rqst *rqstp, struct svc_export *exp)
 {
 	__be32 status;
 	int i;
-	__be32 *p = *pp;
+	__be32 *p;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
 
-	status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen);
+	status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path);
 	if (status)
 		return status;
-	if ((*buflen -= 4) < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		return nfserr_resource;
-	WRITE32(fslocs->locations_count);
+	*p++ = cpu_to_be32(fslocs->locations_count);
 	for (i=0; i<fslocs->locations_count; i++) {
-		status = nfsd4_encode_fs_location4(&fslocs->locations[i],
-						   &p, buflen);
+		status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]);
 		if (status)
 			return status;
 	}
-	*pp = p;
 	return 0;
 }
 
@@ -1965,15 +1920,15 @@
 }
 
 static inline __be32
-nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
-		__be32 **p, int *buflen)
+nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+		     struct nfs4_ace *ace)
 {
 	if (ace->whotype != NFS4_ACL_WHO_NAMED)
-		return nfs4_acl_write_who(ace->whotype, p, buflen);
+		return nfs4_acl_write_who(xdr, ace->whotype);
 	else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
-		return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen);
+		return nfsd4_encode_group(xdr, rqstp, ace->who_gid);
 	else
-		return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen);
+		return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
 }
 
 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -1982,31 +1937,28 @@
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 static inline __be32
-nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
+nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+			    void *context, int len)
 {
-	__be32 *p = *pp;
+	__be32 *p;
 
-	if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4))
+	p = xdr_reserve_space(xdr, len + 4 + 4 + 4);
+	if (!p)
 		return nfserr_resource;
 
 	/*
 	 * For now we use a 0 here to indicate the null translation; in
 	 * the future we may place a call to translation code here.
 	 */
-	if ((*buflen -= 8) < 0)
-		return nfserr_resource;
-
-	WRITE32(0); /* lfs */
-	WRITE32(0); /* pi */
+	*p++ = cpu_to_be32(0); /* lfs */
+	*p++ = cpu_to_be32(0); /* pi */
 	p = xdr_encode_opaque(p, context, len);
-	*buflen -= (XDR_QUADLEN(len) << 2) + 4;
-
-	*pp = p;
 	return 0;
 }
 #else
 static inline __be32
-nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
+nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+			    void *context, int len)
 { return 0; }
 #endif
 
@@ -2045,12 +1997,11 @@
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
- *
- * countp is the buffer size in _words_
  */
-__be32
-nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		struct dentry *dentry, __be32 **buffer, int count, u32 *bmval,
+static __be32
+nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+		struct svc_export *exp,
+		struct dentry *dentry, u32 *bmval,
 		struct svc_rqst *rqstp, int ignore_crossmnt)
 {
 	u32 bmval0 = bmval[0];
@@ -2059,12 +2010,13 @@
 	struct kstat stat;
 	struct svc_fh *tempfh = NULL;
 	struct kstatfs statfs;
-	int buflen = count << 2;
-	__be32 *attrlenp;
+	__be32 *p;
+	int starting_len = xdr->buf->len;
+	int attrlen_offset;
+	__be32 attrlen;
 	u32 dummy;
 	u64 dummy64;
 	u32 rdattr_err = 0;
-	__be32 *p = *buffer;
 	__be32 status;
 	int err;
 	int aclsupport = 0;
@@ -2095,8 +2047,8 @@
 	err = vfs_getattr(&path, &stat);
 	if (err)
 		goto out_nfserr;
-	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
-			FATTR4_WORD0_MAXNAME)) ||
+	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+			FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
 		err = vfs_statfs(&path, &statfs);
@@ -2145,25 +2097,33 @@
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 
 	if (bmval2) {
-		if ((buflen -= 16) < 0)
+		p = xdr_reserve_space(xdr, 16);
+		if (!p)
 			goto out_resource;
-		WRITE32(3);
-		WRITE32(bmval0);
-		WRITE32(bmval1);
-		WRITE32(bmval2);
+		*p++ = cpu_to_be32(3);
+		*p++ = cpu_to_be32(bmval0);
+		*p++ = cpu_to_be32(bmval1);
+		*p++ = cpu_to_be32(bmval2);
 	} else if (bmval1) {
-		if ((buflen -= 12) < 0)
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
 			goto out_resource;
-		WRITE32(2);
-		WRITE32(bmval0);
-		WRITE32(bmval1);
+		*p++ = cpu_to_be32(2);
+		*p++ = cpu_to_be32(bmval0);
+		*p++ = cpu_to_be32(bmval1);
 	} else {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
-		WRITE32(bmval0);
+		*p++ = cpu_to_be32(1);
+		*p++ = cpu_to_be32(bmval0);
 	}
-	attrlenp = p++;                /* to be backfilled later */
+
+	attrlen_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		goto out_resource;
+	p++;                /* to be backfilled later */
 
 	if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
 		u32 word0 = nfsd_suppattrs0(minorversion);
@@ -2175,296 +2135,343 @@
 		if (!contextsupport)
 			word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
 		if (!word2) {
-			if ((buflen -= 12) < 0)
+			p = xdr_reserve_space(xdr, 12);
+			if (!p)
 				goto out_resource;
-			WRITE32(2);
-			WRITE32(word0);
-			WRITE32(word1);
+			*p++ = cpu_to_be32(2);
+			*p++ = cpu_to_be32(word0);
+			*p++ = cpu_to_be32(word1);
 		} else {
-			if ((buflen -= 16) < 0)
+			p = xdr_reserve_space(xdr, 16);
+			if (!p)
 				goto out_resource;
-			WRITE32(3);
-			WRITE32(word0);
-			WRITE32(word1);
-			WRITE32(word2);
+			*p++ = cpu_to_be32(3);
+			*p++ = cpu_to_be32(word0);
+			*p++ = cpu_to_be32(word1);
+			*p++ = cpu_to_be32(word2);
 		}
 	}
 	if (bmval0 & FATTR4_WORD0_TYPE) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
 		dummy = nfs4_file_type(stat.mode);
 		if (dummy == NF4BAD) {
 			status = nfserr_serverfault;
 			goto out;
 		}
-		WRITE32(dummy);
+		*p++ = cpu_to_be32(dummy);
 	}
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
 		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
-			WRITE32(NFS4_FH_PERSISTENT);
+			*p++ = cpu_to_be32(NFS4_FH_PERSISTENT);
 		else
-			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
+			*p++ = cpu_to_be32(NFS4_FH_PERSISTENT|
+						NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		write_change(&p, &stat, dentry->d_inode);
+		p = encode_change(p, &stat, dentry->d_inode);
 	}
 	if (bmval0 & FATTR4_WORD0_SIZE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64(stat.size);
+		p = xdr_encode_hyper(p, stat.size);
 	}
 	if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(0);
+		*p++ = cpu_to_be32(0);
 	}
 	if (bmval0 & FATTR4_WORD0_FSID) {
-		if ((buflen -= 16) < 0)
+		p = xdr_reserve_space(xdr, 16);
+		if (!p)
 			goto out_resource;
 		if (exp->ex_fslocs.migrated) {
-			WRITE64(NFS4_REFERRAL_FSID_MAJOR);
-			WRITE64(NFS4_REFERRAL_FSID_MINOR);
+			p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR);
+			p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR);
 		} else switch(fsid_source(fhp)) {
 		case FSIDSOURCE_FSID:
-			WRITE64((u64)exp->ex_fsid);
-			WRITE64((u64)0);
+			p = xdr_encode_hyper(p, (u64)exp->ex_fsid);
+			p = xdr_encode_hyper(p, (u64)0);
 			break;
 		case FSIDSOURCE_DEV:
-			WRITE32(0);
-			WRITE32(MAJOR(stat.dev));
-			WRITE32(0);
-			WRITE32(MINOR(stat.dev));
+			*p++ = cpu_to_be32(0);
+			*p++ = cpu_to_be32(MAJOR(stat.dev));
+			*p++ = cpu_to_be32(0);
+			*p++ = cpu_to_be32(MINOR(stat.dev));
 			break;
 		case FSIDSOURCE_UUID:
-			WRITEMEM(exp->ex_uuid, 16);
+			p = xdr_encode_opaque_fixed(p, exp->ex_uuid,
+								EX_UUID_LEN);
 			break;
 		}
 	}
 	if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(0);
+		*p++ = cpu_to_be32(0);
 	}
 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(nn->nfsd4_lease);
+		*p++ = cpu_to_be32(nn->nfsd4_lease);
 	}
 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(rdattr_err);
+		*p++ = cpu_to_be32(rdattr_err);
 	}
 	if (bmval0 & FATTR4_WORD0_ACL) {
 		struct nfs4_ace *ace;
 
 		if (acl == NULL) {
-			if ((buflen -= 4) < 0)
+			p = xdr_reserve_space(xdr, 4);
+			if (!p)
 				goto out_resource;
 
-			WRITE32(0);
+			*p++ = cpu_to_be32(0);
 			goto out_acl;
 		}
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(acl->naces);
+		*p++ = cpu_to_be32(acl->naces);
 
 		for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
-			if ((buflen -= 4*3) < 0)
+			p = xdr_reserve_space(xdr, 4*3);
+			if (!p)
 				goto out_resource;
-			WRITE32(ace->type);
-			WRITE32(ace->flag);
-			WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
-			status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
+			*p++ = cpu_to_be32(ace->type);
+			*p++ = cpu_to_be32(ace->flag);
+			*p++ = cpu_to_be32(ace->access_mask &
+							NFS4_ACE_MASK_ALL);
+			status = nfsd4_encode_aclname(xdr, rqstp, ace);
 			if (status)
 				goto out;
 		}
 	}
 out_acl:
 	if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(aclsupport ?
+		*p++ = cpu_to_be32(aclsupport ?
 			ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
 	}
 	if (bmval0 & FATTR4_WORD0_CANSETTIME) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(0);
+		*p++ = cpu_to_be32(0);
 	}
 	if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
-		buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4;
-		if (buflen < 0)
+		p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(fhp->fh_handle.fh_size);
-		WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size);
+		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base,
+					fhp->fh_handle.fh_size);
 	}
 	if (bmval0 & FATTR4_WORD0_FILEID) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64(stat.ino);
+		p = xdr_encode_hyper(p, stat.ino);
 	}
 	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64((u64) statfs.f_ffree);
+		p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
 	}
 	if (bmval0 & FATTR4_WORD0_FILES_FREE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64((u64) statfs.f_ffree);
+		p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
 	}
 	if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64((u64) statfs.f_files);
+		p = xdr_encode_hyper(p, (u64) statfs.f_files);
 	}
 	if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
-		status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+		status = nfsd4_encode_fs_locations(xdr, rqstp, exp);
 		if (status)
 			goto out;
 	}
 	if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes);
+		p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes);
 	}
 	if (bmval0 & FATTR4_WORD0_MAXLINK) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(255);
+		*p++ = cpu_to_be32(255);
 	}
 	if (bmval0 & FATTR4_WORD0_MAXNAME) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(statfs.f_namelen);
+		*p++ = cpu_to_be32(statfs.f_namelen);
 	}
 	if (bmval0 & FATTR4_WORD0_MAXREAD) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64((u64) svc_max_payload(rqstp));
+		p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
 	}
 	if (bmval0 & FATTR4_WORD0_MAXWRITE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE64((u64) svc_max_payload(rqstp));
+		p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
 	}
 	if (bmval1 & FATTR4_WORD1_MODE) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(stat.mode & S_IALLUGO);
+		*p++ = cpu_to_be32(stat.mode & S_IALLUGO);
 	}
 	if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(1);
+		*p++ = cpu_to_be32(1);
 	}
 	if (bmval1 & FATTR4_WORD1_NUMLINKS) {
-		if ((buflen -= 4) < 0)
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
 			goto out_resource;
-		WRITE32(stat.nlink);
+		*p++ = cpu_to_be32(stat.nlink);
 	}
 	if (bmval1 & FATTR4_WORD1_OWNER) {
-		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
+		status = nfsd4_encode_user(xdr, rqstp, stat.uid);
 		if (status)
 			goto out;
 	}
 	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
-		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
+		status = nfsd4_encode_group(xdr, rqstp, stat.gid);
 		if (status)
 			goto out;
 	}
 	if (bmval1 & FATTR4_WORD1_RAWDEV) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
-		WRITE32((u32) MAJOR(stat.rdev));
-		WRITE32((u32) MINOR(stat.rdev));
+		*p++ = cpu_to_be32((u32) MAJOR(stat.rdev));
+		*p++ = cpu_to_be32((u32) MINOR(stat.rdev));
 	}
 	if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
 		dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
-		WRITE64(dummy64);
+		p = xdr_encode_hyper(p, dummy64);
 	}
 	if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
 		dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
-		WRITE64(dummy64);
+		p = xdr_encode_hyper(p, dummy64);
 	}
 	if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
 		dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
-		WRITE64(dummy64);
+		p = xdr_encode_hyper(p, dummy64);
 	}
 	if (bmval1 & FATTR4_WORD1_SPACE_USED) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
 			goto out_resource;
 		dummy64 = (u64)stat.blocks << 9;
-		WRITE64(dummy64);
+		p = xdr_encode_hyper(p, dummy64);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
-		if ((buflen -= 12) < 0)
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
 			goto out_resource;
-		WRITE64((s64)stat.atime.tv_sec);
-		WRITE32(stat.atime.tv_nsec);
+		p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+		*p++ = cpu_to_be32(stat.atime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
-		if ((buflen -= 12) < 0)
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
 			goto out_resource;
-		WRITE32(0);
-		WRITE32(1);
-		WRITE32(0);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(1);
+		*p++ = cpu_to_be32(0);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
-		if ((buflen -= 12) < 0)
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
 			goto out_resource;
-		WRITE64((s64)stat.ctime.tv_sec);
-		WRITE32(stat.ctime.tv_nsec);
+		p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+		*p++ = cpu_to_be32(stat.ctime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
-		if ((buflen -= 12) < 0)
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
 			goto out_resource;
-		WRITE64((s64)stat.mtime.tv_sec);
-		WRITE32(stat.mtime.tv_nsec);
+		p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+		*p++ = cpu_to_be32(stat.mtime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
-		if ((buflen -= 8) < 0)
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
                 	goto out_resource;
 		/*
 		 * Get parent's attributes if not ignoring crossmount
@@ -2473,25 +2480,26 @@
 		if (ignore_crossmnt == 0 &&
 		    dentry == exp->ex_path.mnt->mnt_root)
 			get_parent_attributes(exp, &stat);
-		WRITE64(stat.ino);
+		p = xdr_encode_hyper(p, stat.ino);
 	}
 	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
-		status = nfsd4_encode_security_label(rqstp, context,
-				contextlen, &p, &buflen);
+		status = nfsd4_encode_security_label(xdr, rqstp, context,
+								contextlen);
 		if (status)
 			goto out;
 	}
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
-		if ((buflen -= 16) < 0)
+		p = xdr_reserve_space(xdr, 16);
+		if (!p)
 			goto out_resource;
-		WRITE32(3);
-		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
-		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
-		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
+		*p++ = cpu_to_be32(3);
+		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
+		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
 	}
 
-	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
-	*buffer = p;
+	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
 	status = nfs_ok;
 
 out:
@@ -2504,6 +2512,8 @@
 		fh_put(tempfh);
 		kfree(tempfh);
 	}
+	if (status)
+		xdr_truncate_encode(xdr, starting_len);
 	return status;
 out_nfserr:
 	status = nfserrno(err);
@@ -2513,6 +2523,37 @@
 	goto out;
 }
 
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
+				struct xdr_buf *buf, __be32 *p, int bytes)
+{
+	xdr->scratch.iov_len = 0;
+	memset(buf, 0, sizeof(struct xdr_buf));
+	buf->head[0].iov_base = p;
+	buf->head[0].iov_len = 0;
+	buf->len = 0;
+	xdr->buf = buf;
+	xdr->iov = buf->head;
+	xdr->p = p;
+	xdr->end = (void *)p + bytes;
+	buf->buflen = bytes;
+}
+
+__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
+			struct svc_fh *fhp, struct svc_export *exp,
+			struct dentry *dentry, u32 *bmval,
+			struct svc_rqst *rqstp, int ignore_crossmnt)
+{
+	struct xdr_buf dummy;
+	struct xdr_stream xdr;
+	__be32 ret;
+
+	svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
+	ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
+							ignore_crossmnt);
+	*p = xdr.p;
+	return ret;
+}
+
 static inline int attributes_need_mount(u32 *bmval)
 {
 	if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
@@ -2523,8 +2564,8 @@
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+			const char *name, int namlen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -2576,7 +2617,7 @@
 
 	}
 out_encode:
-	nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
 					cd->rd_rqstp, ignore_crossmnt);
 out_put:
 	dput(dentry);
@@ -2585,9 +2626,12 @@
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-	if (buflen < 6)
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6);
+	if (!p)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2604,10 +2648,13 @@
 {
 	struct readdir_cd *ccd = ccdv;
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-	int buflen;
-	__be32 *p = cd->buffer;
-	__be32 *cookiep;
+	struct xdr_stream *xdr = cd->xdr;
+	int start_offset = xdr->buf->len;
+	int cookie_offset;
+	int entry_bytes;
 	__be32 nfserr = nfserr_toosmall;
+	__be64 wire_offset;
+	__be32 *p;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -2615,19 +2662,24 @@
 		return 0;
 	}
 
-	if (cd->offset)
-		xdr_encode_hyper(cd->offset, (u64) offset);
+	if (cd->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-	if (buflen < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto fail;
-
 	*p++ = xdr_one;                             /* mark entry present */
-	cookiep = p;
+	cookie_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 3*4 + namlen);
+	if (!p)
+		goto fail;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
 	switch (nfserr) {
 	case nfs_ok:
 		break;
@@ -2646,59 +2698,74 @@
 		 */
 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
 			goto fail;
-		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		p = nfsd4_encode_rdattr_error(xdr, nfserr);
 		if (p == NULL) {
 			nfserr = nfserr_toosmall;
 			goto fail;
 		}
 	}
-	cd->buflen -= (p - cd->buffer);
-	cd->buffer = p;
-	cd->offset = cookiep;
+	nfserr = nfserr_toosmall;
+	entry_bytes = xdr->buf->len - start_offset;
+	if (entry_bytes > cd->rd_maxcount)
+		goto fail;
+	cd->rd_maxcount -= entry_bytes;
+	if (!cd->rd_dircount)
+		goto fail;
+	cd->rd_dircount--;
+	cd->cookie_offset = cookie_offset;
 skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
+	xdr_truncate_encode(xdr, start_offset);
 	cd->common.err = nfserr;
 	return -EINVAL;
 }
 
-static void
-nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
+static __be32
+nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
 {
 	__be32 *p;
 
-	RESERVE_SPACE(sizeof(stateid_t));
-	WRITE32(sid->si_generation);
-	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
-	ADJUST_ARGS();
+	p = xdr_reserve_space(xdr, sizeof(stateid_t));
+	if (!p)
+		return nfserr_resource;
+	*p++ = cpu_to_be32(sid->si_generation);
+	p = xdr_encode_opaque_fixed(p, &sid->si_opaque,
+					sizeof(stateid_opaque_t));
+	return 0;
 }
 
 static __be32
 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(8);
-		WRITE32(access->ac_supported);
-		WRITE32(access->ac_resp_access);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(access->ac_supported);
+		*p++ = cpu_to_be32(access->ac_resp_access);
 	}
 	return nfserr;
 }
 
 static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8);
-		WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
-		WRITE32(bcts->dir);
+		p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
+		if (!p)
+			return nfserr_resource;
+		p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
+						NFS4_MAX_SESSIONID_LEN);
+		*p++ = cpu_to_be32(bcts->dir);
 		/* Sorry, we do not yet support RDMA over 4.1: */
-		WRITE32(0);
-		ADJUST_ARGS();
+		*p++ = cpu_to_be32(0);
 	}
 	return nfserr;
 }
@@ -2706,8 +2773,10 @@
 static __be32
 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
+	struct xdr_stream *xdr = &resp->xdr;
+
 	if (!nfserr)
-		nfsd4_encode_stateid(resp, &close->cl_stateid);
+		nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid);
 
 	return nfserr;
 }
@@ -2716,12 +2785,15 @@
 static __be32
 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(NFS4_VERIFIER_SIZE);
-		WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+		if (!p)
+			return nfserr_resource;
+		p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
+						NFS4_VERIFIER_SIZE);
 	}
 	return nfserr;
 }
@@ -2729,15 +2801,17 @@
 static __be32
 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(32);
-		write_cinfo(&p, &create->cr_cinfo);
-		WRITE32(2);
-		WRITE32(create->cr_bmval[0]);
-		WRITE32(create->cr_bmval[1]);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 32);
+		if (!p)
+			return nfserr_resource;
+		p = encode_cinfo(p, &create->cr_cinfo);
+		*p++ = cpu_to_be32(2);
+		*p++ = cpu_to_be32(create->cr_bmval[0]);
+		*p++ = cpu_to_be32(create->cr_bmval[1]);
 	}
 	return nfserr;
 }
@@ -2746,14 +2820,13 @@
 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
 {
 	struct svc_fh *fhp = getattr->ga_fhp;
-	int buflen;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	if (nfserr)
 		return nfserr;
 
-	buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
-	nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
-				    &resp->p, buflen, getattr->ga_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
+				    getattr->ga_bmval,
 				    resp->rqstp, 0);
 	return nfserr;
 }
@@ -2761,16 +2834,17 @@
 static __be32
 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	struct svc_fh *fhp = *fhpp;
 	unsigned int len;
 	__be32 *p;
 
 	if (!nfserr) {
 		len = fhp->fh_handle.fh_size;
-		RESERVE_SPACE(len + 4);
-		WRITE32(len);
-		WRITEMEM(&fhp->fh_handle.fh_base, len);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, len + 4);
+		if (!p)
+			return nfserr_resource;
+		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
 	}
 	return nfserr;
 }
@@ -2779,52 +2853,69 @@
 * Including all fields other than the name, a LOCK4denied structure requires
 *   8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
 */
-static void
-nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
+static __be32
+nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
 {
 	struct xdr_netobj *conf = &ld->ld_owner;
 	__be32 *p;
 
-	RESERVE_SPACE(32 + XDR_LEN(conf->len));
-	WRITE64(ld->ld_start);
-	WRITE64(ld->ld_length);
-	WRITE32(ld->ld_type);
-	if (conf->len) {
-		WRITEMEM(&ld->ld_clientid, 8);
-		WRITE32(conf->len);
-		WRITEMEM(conf->data, conf->len);
-		kfree(conf->data);
-	}  else {  /* non - nfsv4 lock in conflict, no clientid nor owner */
-		WRITE64((u64)0); /* clientid */
-		WRITE32(0); /* length of owner name */
+again:
+	p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len));
+	if (!p) {
+		/*
+		 * Don't fail to return the result just because we can't
+		 * return the conflicting open:
+		 */
+		if (conf->len) {
+			conf->len = 0;
+			conf->data = NULL;
+			goto again;
+		}
+		return nfserr_resource;
 	}
-	ADJUST_ARGS();
+	p = xdr_encode_hyper(p, ld->ld_start);
+	p = xdr_encode_hyper(p, ld->ld_length);
+	*p++ = cpu_to_be32(ld->ld_type);
+	if (conf->len) {
+		p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
+		p = xdr_encode_opaque(p, conf->data, conf->len);
+	}  else {  /* non - nfsv4 lock in conflict, no clientid nor owner */
+		p = xdr_encode_hyper(p, (u64)0); /* clientid */
+		*p++ = cpu_to_be32(0); /* length of owner name */
+	}
+	return nfserr_denied;
 }
 
 static __be32
 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
-	if (!nfserr)
-		nfsd4_encode_stateid(resp, &lock->lk_resp_stateid);
-	else if (nfserr == nfserr_denied)
-		nfsd4_encode_lock_denied(resp, &lock->lk_denied);
+	struct xdr_stream *xdr = &resp->xdr;
 
+	if (!nfserr)
+		nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
+	else if (nfserr == nfserr_denied)
+		nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
+	kfree(lock->lk_denied.ld_owner.data);
 	return nfserr;
 }
 
 static __be32
 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
 {
+	struct xdr_stream *xdr = &resp->xdr;
+
 	if (nfserr == nfserr_denied)
-		nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+		nfsd4_encode_lock_denied(xdr, &lockt->lt_denied);
 	return nfserr;
 }
 
 static __be32
 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
+	struct xdr_stream *xdr = &resp->xdr;
+
 	if (!nfserr)
-		nfsd4_encode_stateid(resp, &locku->lu_stateid);
+		nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid);
 
 	return nfserr;
 }
@@ -2833,12 +2924,14 @@
 static __be32
 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(20);
-		write_cinfo(&p, &link->li_cinfo);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 20);
+		if (!p)
+			return nfserr_resource;
+		p = encode_cinfo(p, &link->li_cinfo);
 	}
 	return nfserr;
 }
@@ -2847,72 +2940,86 @@
 static __be32
 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (nfserr)
 		goto out;
 
-	nfsd4_encode_stateid(resp, &open->op_stateid);
-	RESERVE_SPACE(40);
-	write_cinfo(&p, &open->op_cinfo);
-	WRITE32(open->op_rflags);
-	WRITE32(2);
-	WRITE32(open->op_bmval[0]);
-	WRITE32(open->op_bmval[1]);
-	WRITE32(open->op_delegate_type);
-	ADJUST_ARGS();
+	nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
+	if (nfserr)
+		goto out;
+	p = xdr_reserve_space(xdr, 40);
+	if (!p)
+		return nfserr_resource;
+	p = encode_cinfo(p, &open->op_cinfo);
+	*p++ = cpu_to_be32(open->op_rflags);
+	*p++ = cpu_to_be32(2);
+	*p++ = cpu_to_be32(open->op_bmval[0]);
+	*p++ = cpu_to_be32(open->op_bmval[1]);
+	*p++ = cpu_to_be32(open->op_delegate_type);
 
 	switch (open->op_delegate_type) {
 	case NFS4_OPEN_DELEGATE_NONE:
 		break;
 	case NFS4_OPEN_DELEGATE_READ:
-		nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
-		RESERVE_SPACE(20);
-		WRITE32(open->op_recall);
+		nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
+		if (nfserr)
+			return nfserr;
+		p = xdr_reserve_space(xdr, 20);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(open->op_recall);
 
 		/*
 		 * TODO: ACE's in delegations
 		 */
-		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
-		WRITE32(0);
-		WRITE32(0);
-		WRITE32(0);   /* XXX: is NULL principal ok? */
-		ADJUST_ARGS();
+		*p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);   /* XXX: is NULL principal ok? */
 		break;
 	case NFS4_OPEN_DELEGATE_WRITE:
-		nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
-		RESERVE_SPACE(32);
-		WRITE32(0);
+		nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
+		if (nfserr)
+			return nfserr;
+		p = xdr_reserve_space(xdr, 32);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(0);
 
 		/*
 		 * TODO: space_limit's in delegations
 		 */
-		WRITE32(NFS4_LIMIT_SIZE);
-		WRITE32(~(u32)0);
-		WRITE32(~(u32)0);
+		*p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
+		*p++ = cpu_to_be32(~(u32)0);
+		*p++ = cpu_to_be32(~(u32)0);
 
 		/*
 		 * TODO: ACE's in delegations
 		 */
-		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
-		WRITE32(0);
-		WRITE32(0);
-		WRITE32(0);   /* XXX: is NULL principal ok? */
-		ADJUST_ARGS();
+		*p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);   /* XXX: is NULL principal ok? */
 		break;
 	case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */
 		switch (open->op_why_no_deleg) {
 		case WND4_CONTENTION:
 		case WND4_RESOURCE:
-			RESERVE_SPACE(8);
-			WRITE32(open->op_why_no_deleg);
-			WRITE32(0);	/* deleg signaling not supported yet */
+			p = xdr_reserve_space(xdr, 8);
+			if (!p)
+				return nfserr_resource;
+			*p++ = cpu_to_be32(open->op_why_no_deleg);
+			/* deleg signaling not supported yet: */
+			*p++ = cpu_to_be32(0);
 			break;
 		default:
-			RESERVE_SPACE(4);
-			WRITE32(open->op_why_no_deleg);
+			p = xdr_reserve_space(xdr, 4);
+			if (!p)
+				return nfserr_resource;
+			*p++ = cpu_to_be32(open->op_why_no_deleg);
 		}
-		ADJUST_ARGS();
 		break;
 	default:
 		BUG();
@@ -2925,8 +3032,10 @@
 static __be32
 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
+	struct xdr_stream *xdr = &resp->xdr;
+
 	if (!nfserr)
-		nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
+		nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
 
 	return nfserr;
 }
@@ -2934,127 +3043,233 @@
 static __be32
 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
+	struct xdr_stream *xdr = &resp->xdr;
+
 	if (!nfserr)
-		nfsd4_encode_stateid(resp, &od->od_stateid);
+		nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid);
 
 	return nfserr;
 }
 
+static __be32 nfsd4_encode_splice_read(
+				struct nfsd4_compoundres *resp,
+				struct nfsd4_read *read,
+				struct file *file, unsigned long maxcount)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct xdr_buf *buf = xdr->buf;
+	u32 eof;
+	int space_left;
+	__be32 nfserr;
+	__be32 *p = xdr->p - 2;
+
+	/*
+	 * Don't inline pages unless we know there's room for eof,
+	 * count, and possible padding:
+	 */
+	if (xdr->end - xdr->p < 3)
+		return nfserr_resource;
+
+	nfserr = nfsd_splice_read(read->rd_rqstp, file,
+				  read->rd_offset, &maxcount);
+	if (nfserr) {
+		/*
+		 * nfsd_splice_actor may have already messed with the
+		 * page length; reset it so as not to confuse
+		 * xdr_truncate_encode:
+		 */
+		buf->page_len = 0;
+		return nfserr;
+	}
+
+	eof = (read->rd_offset + maxcount >=
+	       read->rd_fhp->fh_dentry->d_inode->i_size);
+
+	*(p++) = htonl(eof);
+	*(p++) = htonl(maxcount);
+
+	buf->page_len = maxcount;
+	buf->len += maxcount;
+	xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE;
+
+	/* Use rest of head for padding and remaining ops: */
+	buf->tail[0].iov_base = xdr->p;
+	buf->tail[0].iov_len = 0;
+	xdr->iov = buf->tail;
+	if (maxcount&3) {
+		int pad = 4 - (maxcount&3);
+
+		*(xdr->p++) = 0;
+
+		buf->tail[0].iov_base += maxcount&3;
+		buf->tail[0].iov_len = pad;
+		buf->len += pad;
+	}
+
+	space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
+				buf->buflen - buf->len);
+	buf->buflen = buf->len + space_left;
+	xdr->end = (__be32 *)((void *)xdr->end + space_left);
+
+	return 0;
+}
+
+static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+				 struct nfsd4_read *read,
+				 struct file *file, unsigned long maxcount)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	u32 eof;
+	int v;
+	int starting_len = xdr->buf->len - 8;
+	long len;
+	int thislen;
+	__be32 nfserr;
+	__be32 tmp;
+	__be32 *p;
+	u32 zzz = 0;
+	int pad;
+
+	len = maxcount;
+	v = 0;
+
+	thislen = (void *)xdr->end - (void *)xdr->p;
+	if (len < thislen)
+		thislen = len;
+	p = xdr_reserve_space(xdr, (thislen+3)&~3);
+	WARN_ON_ONCE(!p);
+	resp->rqstp->rq_vec[v].iov_base = p;
+	resp->rqstp->rq_vec[v].iov_len = thislen;
+	v++;
+	len -= thislen;
+
+	while (len) {
+		thislen = min_t(long, len, PAGE_SIZE);
+		p = xdr_reserve_space(xdr, (thislen+3)&~3);
+		WARN_ON_ONCE(!p);
+		resp->rqstp->rq_vec[v].iov_base = p;
+		resp->rqstp->rq_vec[v].iov_len = thislen;
+		v++;
+		len -= thislen;
+	}
+	read->rd_vlen = v;
+
+	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
+			read->rd_vlen, &maxcount);
+	if (nfserr)
+		return nfserr;
+	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
+
+	eof = (read->rd_offset + maxcount >=
+	       read->rd_fhp->fh_dentry->d_inode->i_size);
+
+	tmp = htonl(eof);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
+	tmp = htonl(maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+
+	pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
+								&zzz, pad);
+	return 0;
+
+}
+
 static __be32
 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
-	u32 eof;
-	int v;
-	struct page *page;
-	unsigned long maxcount; 
-	long len;
+	unsigned long maxcount;
+	struct xdr_stream *xdr = &resp->xdr;
+	struct file *file = read->rd_filp;
+	int starting_len = xdr->buf->len;
+	struct raparms *ra;
 	__be32 *p;
+	__be32 err;
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xbuf->page_len)
-		return nfserr_resource;
 
-	RESERVE_SPACE(8); /* eof flag and byte count */
+	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
+	if (!p) {
+		WARN_ON_ONCE(resp->rqstp->rq_splice_ok);
+		return nfserr_resource;
+	}
+	if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) {
+		WARN_ON_ONCE(1);
+		return nfserr_resource;
+	}
+	xdr_commit_encode(xdr);
 
 	maxcount = svc_max_payload(resp->rqstp);
+	if (maxcount > xdr->buf->buflen - xdr->buf->len)
+		maxcount = xdr->buf->buflen - xdr->buf->len;
 	if (maxcount > read->rd_length)
 		maxcount = read->rd_length;
 
-	len = maxcount;
-	v = 0;
-	while (len > 0) {
-		page = *(resp->rqstp->rq_next_page);
-		if (!page) { /* ran out of pages */
-			maxcount -= len;
-			break;
-		}
-		resp->rqstp->rq_vec[v].iov_base = page_address(page);
-		resp->rqstp->rq_vec[v].iov_len =
-			len < PAGE_SIZE ? len : PAGE_SIZE;
-		resp->rqstp->rq_next_page++;
-		v++;
-		len -= PAGE_SIZE;
+	if (!read->rd_filp) {
+		err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
+						&file, &ra);
+		if (err)
+			goto err_truncate;
 	}
-	read->rd_vlen = v;
 
-	nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp,
-			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
-			&maxcount);
+	if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
+		err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+	else
+		err = nfsd4_encode_readv(resp, read, file, maxcount);
 
-	if (nfserr)
-		return nfserr;
-	eof = (read->rd_offset + maxcount >=
-	       read->rd_fhp->fh_dentry->d_inode->i_size);
+	if (!read->rd_filp)
+		nfsd_put_tmp_read_open(file, ra);
 
-	WRITE32(eof);
-	WRITE32(maxcount);
-	ADJUST_ARGS();
-	resp->xbuf->head[0].iov_len = (char*)p
-					- (char*)resp->xbuf->head[0].iov_base;
-	resp->xbuf->page_len = maxcount;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xbuf->tail[0].iov_base = p;
-	resp->xbuf->tail[0].iov_len = 0;
-	if (maxcount&3) {
-		RESERVE_SPACE(4);
-		WRITE32(0);
-		resp->xbuf->tail[0].iov_base += maxcount&3;
-		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
-		ADJUST_ARGS();
-	}
-	return 0;
+err_truncate:
+	if (err)
+		xdr_truncate_encode(xdr, starting_len);
+	return err;
 }
 
 static __be32
 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
 {
 	int maxcount;
-	char *page;
+	__be32 wire_count;
+	int zero = 0;
+	struct xdr_stream *xdr = &resp->xdr;
+	int length_offset = xdr->buf->len;
 	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xbuf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
-
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
 	maxcount = PAGE_SIZE;
-	RESERVE_SPACE(4);
 
+	p = xdr_reserve_space(xdr, maxcount);
+	if (!p)
+		return nfserr_resource;
 	/*
 	 * XXX: By default, the ->readlink() VFS op will truncate symlinks
 	 * if they would overflow the buffer.  Is this kosher in NFSv4?  If
 	 * not, one easy fix is: if ->readlink() precisely fills the buffer,
 	 * assume that truncation occurred, and return NFS4ERR_RESOURCE.
 	 */
-	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
+	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
+						(char *)p, &maxcount);
 	if (nfserr == nfserr_isdir)
-		return nfserr_inval;
-	if (nfserr)
+		nfserr = nfserr_inval;
+	if (nfserr) {
+		xdr_truncate_encode(xdr, length_offset);
 		return nfserr;
-
-	WRITE32(maxcount);
-	ADJUST_ARGS();
-	resp->xbuf->head[0].iov_len = (char*)p
-				- (char*)resp->xbuf->head[0].iov_base;
-	resp->xbuf->page_len = maxcount;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xbuf->tail[0].iov_base = p;
-	resp->xbuf->tail[0].iov_len = 0;
-	if (maxcount&3) {
-		RESERVE_SPACE(4);
-		WRITE32(0);
-		resp->xbuf->tail[0].iov_base += maxcount&3;
-		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
-		ADJUST_ARGS();
 	}
+
+	wire_count = htonl(maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
+	xdr_truncate_encode(xdr, length_offset + 4 + maxcount);
+	if (maxcount & 3)
+		write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
+						&zero, 4 - (maxcount&3));
 	return 0;
 }
 
@@ -3062,47 +3277,52 @@
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
+	int bytes_left;
 	loff_t offset;
-	__be32 *page, *savep, *tailbase;
+	__be64 wire_offset;
+	struct xdr_stream *xdr = &resp->xdr;
+	int starting_len = xdr->buf->len;
 	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xbuf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
 
-	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
-	savep = p;
+	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+	if (!p)
+		return nfserr_resource;
 
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
-	WRITE32(0);
-	WRITE32(0);
-	ADJUST_ARGS();
-	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
-	tailbase = p;
-
-	maxcount = PAGE_SIZE;
-	if (maxcount > readdir->rd_maxcount)
-		maxcount = readdir->rd_maxcount;
+	*p++ = cpu_to_be32(0);
+	*p++ = cpu_to_be32(0);
+	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
+				- (char *)resp->xdr.buf->head[0].iov_base;
 
 	/*
-	 * Convert from bytes to words, account for the two words already
-	 * written, make sure to leave two words at the end for the next
-	 * pointer and eof field.
+	 * Number of bytes left for directory entries allowing for the
+	 * final 8 bytes of the readdir and a following failed op:
 	 */
-	maxcount = (maxcount >> 2) - 4;
-	if (maxcount < 0) {
-		nfserr =  nfserr_toosmall;
+	bytes_left = xdr->buf->buflen - xdr->buf->len
+			- COMPOUND_ERR_SLACK_SPACE - 8;
+	if (bytes_left < 0) {
+		nfserr = nfserr_resource;
 		goto err_no_verf;
 	}
+	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	/*
+	 * Note the rfc defines rd_maxcount as the size of the
+	 * READDIR4resok structure, which includes the verifier above
+	 * and the 8 bytes encoded at the end of this function:
+	 */
+	if (maxcount < 16) {
+		nfserr = nfserr_toosmall;
+		goto err_no_verf;
+	}
+	maxcount = min_t(int, maxcount-16, bytes_left);
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
+	readdir->xdr = xdr;
+	readdir->rd_maxcount = maxcount;
 	readdir->common.err = 0;
-	readdir->buflen = maxcount;
-	readdir->buffer = page;
-	readdir->offset = NULL;
+	readdir->cookie_offset = 0;
 
 	offset = readdir->rd_cookie;
 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3110,42 +3330,49 @@
 			      &readdir->common, nfsd4_encode_dirent);
 	if (nfserr == nfs_ok &&
 	    readdir->common.err == nfserr_toosmall &&
-	    readdir->buffer == page) 
-		nfserr = nfserr_toosmall;
+	    xdr->buf->len == starting_len + 8) {
+		/* nothing encoded; which limit did we hit?: */
+		if (maxcount - 16 < bytes_left)
+			/* It was the fault of rd_maxcount: */
+			nfserr = nfserr_toosmall;
+		else
+			/* We ran out of buffer space: */
+			nfserr = nfserr_resource;
+	}
 	if (nfserr)
 		goto err_no_verf;
 
-	if (readdir->offset)
-		xdr_encode_hyper(readdir->offset, offset);
+	if (readdir->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	p = readdir->buffer;
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		goto err_no_verf;
+	}
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xbuf->page_len = ((char*)p) -
-		(char*)page_address(*(resp->rqstp->rq_next_page-1));
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xbuf->tail[0].iov_base = tailbase;
-	resp->xbuf->tail[0].iov_len = 0;
-	resp->p = resp->xbuf->tail[0].iov_base;
-	resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
 
 	return 0;
 err_no_verf:
-	p = savep;
-	ADJUST_ARGS();
+	xdr_truncate_encode(xdr, starting_len);
 	return nfserr;
 }
 
 static __be32
 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(20);
-		write_cinfo(&p, &remove->rm_cinfo);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 20);
+		if (!p)
+			return nfserr_resource;
+		p = encode_cinfo(p, &remove->rm_cinfo);
 	}
 	return nfserr;
 }
@@ -3153,19 +3380,21 @@
 static __be32
 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(40);
-		write_cinfo(&p, &rename->rn_sinfo);
-		write_cinfo(&p, &rename->rn_tinfo);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 40);
+		if (!p)
+			return nfserr_resource;
+		p = encode_cinfo(p, &rename->rn_sinfo);
+		p = encode_cinfo(p, &rename->rn_tinfo);
 	}
 	return nfserr;
 }
 
 static __be32
-nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
+nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
 			 __be32 nfserr, struct svc_export *exp)
 {
 	u32 i, nflavs, supported;
@@ -3176,6 +3405,7 @@
 
 	if (nfserr)
 		goto out;
+	nfserr = nfserr_resource;
 	if (exp->ex_nflavors) {
 		flavs = exp->ex_flavors;
 		nflavs = exp->ex_nflavors;
@@ -3197,9 +3427,10 @@
 	}
 
 	supported = 0;
-	RESERVE_SPACE(4);
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		goto out;
 	flavorsp = p++;		/* to be backfilled later */
-	ADJUST_ARGS();
 
 	for (i = 0; i < nflavs; i++) {
 		rpc_authflavor_t pf = flavs[i].pseudoflavor;
@@ -3207,18 +3438,20 @@
 
 		if (rpcauth_get_gssinfo(pf, &info) == 0) {
 			supported++;
-			RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4);
-			WRITE32(RPC_AUTH_GSS);
-			WRITE32(info.oid.len);
-			WRITEMEM(info.oid.data, info.oid.len);
-			WRITE32(info.qop);
-			WRITE32(info.service);
-			ADJUST_ARGS();
+			p = xdr_reserve_space(xdr, 4 + 4 +
+					      XDR_LEN(info.oid.len) + 4 + 4);
+			if (!p)
+				goto out;
+			*p++ = cpu_to_be32(RPC_AUTH_GSS);
+			p = xdr_encode_opaque(p,  info.oid.data, info.oid.len);
+			*p++ = cpu_to_be32(info.qop);
+			*p++ = cpu_to_be32(info.service);
 		} else if (pf < RPC_AUTH_MAXFLAVOR) {
 			supported++;
-			RESERVE_SPACE(4);
-			WRITE32(pf);
-			ADJUST_ARGS();
+			p = xdr_reserve_space(xdr, 4);
+			if (!p)
+				goto out;
+			*p++ = cpu_to_be32(pf);
 		} else {
 			if (report)
 				pr_warn("NFS: SECINFO: security flavor %u "
@@ -3229,7 +3462,7 @@
 	if (nflavs != supported)
 		report = false;
 	*flavorsp = htonl(supported);
-
+	nfserr = 0;
 out:
 	if (exp)
 		exp_put(exp);
@@ -3240,14 +3473,18 @@
 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 		     struct nfsd4_secinfo *secinfo)
 {
-	return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp);
+	struct xdr_stream *xdr = &resp->xdr;
+
+	return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp);
 }
 
 static __be32
 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
 		     struct nfsd4_secinfo_no_name *secinfo)
 {
-	return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp);
+	struct xdr_stream *xdr = &resp->xdr;
+
+	return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp);
 }
 
 /*
@@ -3257,41 +3494,47 @@
 static __be32
 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
-	RESERVE_SPACE(16);
+	p = xdr_reserve_space(xdr, 16);
+	if (!p)
+		return nfserr_resource;
 	if (nfserr) {
-		WRITE32(3);
-		WRITE32(0);
-		WRITE32(0);
-		WRITE32(0);
+		*p++ = cpu_to_be32(3);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);
 	}
 	else {
-		WRITE32(3);
-		WRITE32(setattr->sa_bmval[0]);
-		WRITE32(setattr->sa_bmval[1]);
-		WRITE32(setattr->sa_bmval[2]);
+		*p++ = cpu_to_be32(3);
+		*p++ = cpu_to_be32(setattr->sa_bmval[0]);
+		*p++ = cpu_to_be32(setattr->sa_bmval[1]);
+		*p++ = cpu_to_be32(setattr->sa_bmval[2]);
 	}
-	ADJUST_ARGS();
 	return nfserr;
 }
 
 static __be32
 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE);
-		WRITEMEM(&scd->se_clientid, 8);
-		WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
+		if (!p)
+			return nfserr_resource;
+		p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
+		p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
+						NFS4_VERIFIER_SIZE);
 	}
 	else if (nfserr == nfserr_clid_inuse) {
-		RESERVE_SPACE(8);
-		WRITE32(0);
-		WRITE32(0);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(0);
+		*p++ = cpu_to_be32(0);
 	}
 	return nfserr;
 }
@@ -3299,14 +3542,17 @@
 static __be32
 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
-		RESERVE_SPACE(16);
-		WRITE32(write->wr_bytes_written);
-		WRITE32(write->wr_how_written);
-		WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 16);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(write->wr_bytes_written);
+		*p++ = cpu_to_be32(write->wr_how_written);
+		p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
+							NFS4_VERIFIER_SIZE);
 	}
 	return nfserr;
 }
@@ -3323,6 +3569,7 @@
 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
 			 struct nfsd4_exchange_id *exid)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 	char *major_id;
 	char *server_scope;
@@ -3338,60 +3585,61 @@
 	server_scope = utsname()->nodename;
 	server_scope_sz = strlen(server_scope);
 
-	RESERVE_SPACE(
+	p = xdr_reserve_space(xdr,
 		8 /* eir_clientid */ +
 		4 /* eir_sequenceid */ +
 		4 /* eir_flags */ +
 		4 /* spr_how */);
+	if (!p)
+		return nfserr_resource;
 
-	WRITEMEM(&exid->clientid, 8);
-	WRITE32(exid->seqid);
-	WRITE32(exid->flags);
+	p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
+	*p++ = cpu_to_be32(exid->seqid);
+	*p++ = cpu_to_be32(exid->flags);
 
-	WRITE32(exid->spa_how);
-	ADJUST_ARGS();
+	*p++ = cpu_to_be32(exid->spa_how);
 
 	switch (exid->spa_how) {
 	case SP4_NONE:
 		break;
 	case SP4_MACH_CRED:
 		/* spo_must_enforce, spo_must_allow */
-		RESERVE_SPACE(16);
+		p = xdr_reserve_space(xdr, 16);
+		if (!p)
+			return nfserr_resource;
 
 		/* spo_must_enforce bitmap: */
-		WRITE32(2);
-		WRITE32(nfs4_minimal_spo_must_enforce[0]);
-		WRITE32(nfs4_minimal_spo_must_enforce[1]);
+		*p++ = cpu_to_be32(2);
+		*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]);
+		*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]);
 		/* empty spo_must_allow bitmap: */
-		WRITE32(0);
+		*p++ = cpu_to_be32(0);
 
-		ADJUST_ARGS();
 		break;
 	default:
 		WARN_ON_ONCE(1);
 	}
 
-	RESERVE_SPACE(
+	p = xdr_reserve_space(xdr,
 		8 /* so_minor_id */ +
 		4 /* so_major_id.len */ +
 		(XDR_QUADLEN(major_id_sz) * 4) +
 		4 /* eir_server_scope.len */ +
 		(XDR_QUADLEN(server_scope_sz) * 4) +
 		4 /* eir_server_impl_id.count (0) */);
+	if (!p)
+		return nfserr_resource;
 
 	/* The server_owner struct */
-	WRITE64(minor_id);      /* Minor id */
+	p = xdr_encode_hyper(p, minor_id);      /* Minor id */
 	/* major id */
-	WRITE32(major_id_sz);
-	WRITEMEM(major_id, major_id_sz);
+	p = xdr_encode_opaque(p, major_id, major_id_sz);
 
 	/* Server scope */
-	WRITE32(server_scope_sz);
-	WRITEMEM(server_scope, server_scope_sz);
+	p = xdr_encode_opaque(p, server_scope, server_scope_sz);
 
 	/* Implementation id */
-	WRITE32(0);	/* zero length nfs_impl_id4 array */
-	ADJUST_ARGS();
+	*p++ = cpu_to_be32(0);	/* zero length nfs_impl_id4 array */
 	return 0;
 }
 
@@ -3399,47 +3647,54 @@
 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
 			    struct nfsd4_create_session *sess)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
 
-	RESERVE_SPACE(24);
-	WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN);
-	WRITE32(sess->seqid);
-	WRITE32(sess->flags);
-	ADJUST_ARGS();
+	p = xdr_reserve_space(xdr, 24);
+	if (!p)
+		return nfserr_resource;
+	p = xdr_encode_opaque_fixed(p, sess->sessionid.data,
+					NFS4_MAX_SESSIONID_LEN);
+	*p++ = cpu_to_be32(sess->seqid);
+	*p++ = cpu_to_be32(sess->flags);
 
-	RESERVE_SPACE(28);
-	WRITE32(0); /* headerpadsz */
-	WRITE32(sess->fore_channel.maxreq_sz);
-	WRITE32(sess->fore_channel.maxresp_sz);
-	WRITE32(sess->fore_channel.maxresp_cached);
-	WRITE32(sess->fore_channel.maxops);
-	WRITE32(sess->fore_channel.maxreqs);
-	WRITE32(sess->fore_channel.nr_rdma_attrs);
-	ADJUST_ARGS();
+	p = xdr_reserve_space(xdr, 28);
+	if (!p)
+		return nfserr_resource;
+	*p++ = cpu_to_be32(0); /* headerpadsz */
+	*p++ = cpu_to_be32(sess->fore_channel.maxreq_sz);
+	*p++ = cpu_to_be32(sess->fore_channel.maxresp_sz);
+	*p++ = cpu_to_be32(sess->fore_channel.maxresp_cached);
+	*p++ = cpu_to_be32(sess->fore_channel.maxops);
+	*p++ = cpu_to_be32(sess->fore_channel.maxreqs);
+	*p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs);
 
 	if (sess->fore_channel.nr_rdma_attrs) {
-		RESERVE_SPACE(4);
-		WRITE32(sess->fore_channel.rdma_attrs);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(sess->fore_channel.rdma_attrs);
 	}
 
-	RESERVE_SPACE(28);
-	WRITE32(0); /* headerpadsz */
-	WRITE32(sess->back_channel.maxreq_sz);
-	WRITE32(sess->back_channel.maxresp_sz);
-	WRITE32(sess->back_channel.maxresp_cached);
-	WRITE32(sess->back_channel.maxops);
-	WRITE32(sess->back_channel.maxreqs);
-	WRITE32(sess->back_channel.nr_rdma_attrs);
-	ADJUST_ARGS();
+	p = xdr_reserve_space(xdr, 28);
+	if (!p)
+		return nfserr_resource;
+	*p++ = cpu_to_be32(0); /* headerpadsz */
+	*p++ = cpu_to_be32(sess->back_channel.maxreq_sz);
+	*p++ = cpu_to_be32(sess->back_channel.maxresp_sz);
+	*p++ = cpu_to_be32(sess->back_channel.maxresp_cached);
+	*p++ = cpu_to_be32(sess->back_channel.maxops);
+	*p++ = cpu_to_be32(sess->back_channel.maxreqs);
+	*p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs);
 
 	if (sess->back_channel.nr_rdma_attrs) {
-		RESERVE_SPACE(4);
-		WRITE32(sess->back_channel.rdma_attrs);
-		ADJUST_ARGS();
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			return nfserr_resource;
+		*p++ = cpu_to_be32(sess->back_channel.rdma_attrs);
 	}
 	return 0;
 }
@@ -3448,22 +3703,25 @@
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 		      struct nfsd4_sequence *seq)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
 
-	RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20);
-	WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
-	WRITE32(seq->seqid);
-	WRITE32(seq->slotid);
+	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20);
+	if (!p)
+		return nfserr_resource;
+	p = xdr_encode_opaque_fixed(p, seq->sessionid.data,
+					NFS4_MAX_SESSIONID_LEN);
+	*p++ = cpu_to_be32(seq->seqid);
+	*p++ = cpu_to_be32(seq->slotid);
 	/* Note slotid's are numbered from zero: */
-	WRITE32(seq->maxslots - 1); /* sr_highest_slotid */
-	WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */
-	WRITE32(seq->status_flags);
+	*p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */
+	*p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */
+	*p++ = cpu_to_be32(seq->status_flags);
 
-	ADJUST_ARGS();
-	resp->cstate.datap = p; /* DRC cache data pointer */
+	resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */
 	return 0;
 }
 
@@ -3471,20 +3729,22 @@
 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
 			  struct nfsd4_test_stateid *test_stateid)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_test_stateid_id *stateid, *next;
 	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
 
-	RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
+	p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids));
+	if (!p)
+		return nfserr_resource;
 	*p++ = htonl(test_stateid->ts_num_ids);
 
 	list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
 		*p++ = stateid->ts_id_status;
 	}
 
-	ADJUST_ARGS();
 	return nfserr;
 }
 
@@ -3563,81 +3823,99 @@
 };
 
 /*
- * Calculate the total amount of memory that the compound response has taken
- * after encoding the current operation with pad.
+ * Calculate whether we still have space to encode repsize bytes.
+ * There are two considerations:
+ *     - For NFS versions >=4.1, the size of the reply must stay within
+ *       session limits
+ *     - For all NFS versions, we must stay within limited preallocated
+ *       buffer space.
  *
- * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
- *      which was specified at nfsd4_operation, else pad is zero.
- *
- * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
- *
- * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
- * will be at least a page and will therefore hold the xdr_buf head.
+ * This is called before the operation is processed, so can only provide
+ * an upper estimate.  For some nonidempotent operations (such as
+ * getattr), it's not necessarily a problem if that estimate is wrong,
+ * as we can fail it after processing without significant side effects.
  */
-__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
+__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)
 {
-	struct xdr_buf *xb = &resp->rqstp->rq_res;
-	struct nfsd4_session *session = NULL;
+	struct xdr_buf *buf = &resp->rqstp->rq_res;
 	struct nfsd4_slot *slot = resp->cstate.slot;
-	u32 length, tlen = 0;
 
+	if (buf->len + respsize <= buf->buflen)
+		return nfs_ok;
 	if (!nfsd4_has_session(&resp->cstate))
-		return 0;
-
-	session = resp->cstate.session;
-
-	if (xb->page_len == 0) {
-		length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
-	} else {
-		if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
-			tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;
-
-		length = xb->head[0].iov_len + xb->page_len + tlen + pad;
-	}
-	dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
-		length, xb->page_len, tlen, pad);
-
-	if (length > session->se_fchannel.maxresp_sz)
-		return nfserr_rep_too_big;
-
-	if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) &&
-	    length > session->se_fchannel.maxresp_cached)
+		return nfserr_resource;
+	if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) {
+		WARN_ON_ONCE(1);
 		return nfserr_rep_too_big_to_cache;
-
-	return 0;
+	}
+	return nfserr_rep_too_big;
 }
 
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfs4_stateowner *so = resp->cstate.replay_owner;
-	__be32 *statp;
+	struct svc_rqst *rqstp = resp->rqstp;
+	int post_err_offset;
+	nfsd4_enc encoder;
 	__be32 *p;
 
-	RESERVE_SPACE(8);
-	WRITE32(op->opnum);
-	statp = p++;	/* to be backfilled at the end */
-	ADJUST_ARGS();
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+	*p++ = cpu_to_be32(op->opnum);
+	post_err_offset = xdr->buf->len;
 
 	if (op->opnum == OP_ILLEGAL)
 		goto status;
 	BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
 	       !nfsd4_enc_ops[op->opnum]);
-	op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
+	encoder = nfsd4_enc_ops[op->opnum];
+	op->status = encoder(resp, op->status, &op->u);
+	xdr_commit_encode(xdr);
+
 	/* nfsd4_check_resp_size guarantees enough room for error status */
-	if (!op->status)
-		op->status = nfsd4_check_resp_size(resp, 0);
+	if (!op->status) {
+		int space_needed = 0;
+		if (!nfsd4_last_compound_op(rqstp))
+			space_needed = COMPOUND_ERR_SLACK_SPACE;
+		op->status = nfsd4_check_resp_size(resp, space_needed);
+	}
+	if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
+		struct nfsd4_slot *slot = resp->cstate.slot;
+
+		if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+			op->status = nfserr_rep_too_big_to_cache;
+		else
+			op->status = nfserr_rep_too_big;
+	}
+	if (op->status == nfserr_resource ||
+	    op->status == nfserr_rep_too_big ||
+	    op->status == nfserr_rep_too_big_to_cache) {
+		/*
+		 * The operation may have already been encoded or
+		 * partially encoded.  No op returns anything additional
+		 * in the case of one of these three errors, so we can
+		 * just truncate back to after the status.  But it's a
+		 * bug if we had to do this on a non-idempotent op:
+		 */
+		warn_on_nonidempotent_op(op);
+		xdr_truncate_encode(xdr, post_err_offset);
+	}
 	if (so) {
+		int len = xdr->buf->len - post_err_offset;
+
 		so->so_replay.rp_status = op->status;
-		so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
-		memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen);
+		so->so_replay.rp_buflen = len;
+		read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+						so->so_replay.rp_buf, len);
 	}
 status:
-	/*
-	 * Note: We write the status directly, instead of using WRITE32(),
-	 * since it is already in network byte order.
-	 */
-	*statp = op->status;
+	/* Note that op->status is already in network byte order: */
+	write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);
 }
 
 /* 
@@ -3649,21 +3927,22 @@
  * called with nfs4_lock_state() held
  */
 void
-nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 {
 	__be32 *p;
 	struct nfs4_replay *rp = op->replay;
 
 	BUG_ON(!rp);
 
-	RESERVE_SPACE(8);
-	WRITE32(op->opnum);
+	p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+	*p++ = cpu_to_be32(op->opnum);
 	*p++ = rp->rp_status;  /* already xdr'ed */
-	ADJUST_ARGS();
 
-	RESERVE_SPACE(rp->rp_buflen);
-	WRITEMEM(rp->rp_buf, rp->rp_buflen);
-	ADJUST_ARGS();
+	p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
 }
 
 int
@@ -3720,19 +3999,19 @@
 	 * All that remains is to write the tag and operation count...
 	 */
 	struct nfsd4_compound_state *cs = &resp->cstate;
-	struct kvec *iov;
+	struct xdr_buf *buf = resp->xdr.buf;
+
+	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+				 buf->tail[0].iov_len);
+
+	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
 	memcpy(p, resp->tag, resp->taglen);
 	p += XDR_QUADLEN(resp->taglen);
 	*p++ = htonl(resp->opcnt);
 
-	if (rqstp->rq_res.page_len) 
-		iov = &rqstp->rq_res.tail[0];
-	else
-		iov = &rqstp->rq_res.head[0];
-	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
-	BUG_ON(iov->iov_len > PAGE_SIZE);
 	if (nfsd4_has_session(cs)) {
 		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 		struct nfs4_client *clp = cs->session->se_client;

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index f8f060f..6040da8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c

@@ -224,13 +224,6 @@
 	hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
 }
 
-static inline bool
-nfsd_cache_entry_expired(struct svc_cacherep *rp)
-{
-	return rp->c_state != RC_INPROG &&
-	       time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
-}
-
 /*
  * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
  * Also prune the oldest ones when the total exceeds the max number of entries.
@@ -242,8 +235,14 @@
 	long freed = 0;
 
 	list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
-		if (!nfsd_cache_entry_expired(rp) &&
-		    num_drc_entries <= max_drc_entries)
+		/*
+		 * Don't free entries attached to calls that are still
+		 * in-progress, but do keep scanning the list.
+		 */
+		if (rp->c_state == RC_INPROG)
+			continue;
+		if (num_drc_entries <= max_drc_entries &&
+		    time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
 			break;
 		nfsd_reply_cache_free_locked(rp);
 		freed++;

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f34d9de..5184404 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c

@@ -1179,7 +1179,6 @@
 	retval = nfsd4_init_slabs();
 	if (retval)
 		goto out_unregister_pernet;
-	nfs4_state_init();
 	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
 	if (retval)
 		goto out_free_slabs;

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 479eb68..847daf3 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h

@@ -15,11 +15,20 @@
 #include <linux/nfs2.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
+#include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/msg_prot.h>
 
-#include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
-#include <linux/nfsd/stats.h>
+#include <uapi/linux/nfsd/debug.h>
+
+#include "stats.h"
+#include "export.h"
+
+#undef ifdebug
+#ifdef NFSD_DEBUG
+# define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
+#else
+# define ifdebug(flag)		if (0)
+#endif
 
 /*
  * nfsd version
@@ -106,7 +115,6 @@
  */
 #ifdef CONFIG_NFSD_V4
 extern unsigned long max_delegations;
-void nfs4_state_init(void);
 int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
@@ -117,7 +125,6 @@
 int nfs4_reset_recoverydir(char *recdir);
 char * nfs4_recoverydir(void);
 #else
-static inline void nfs4_state_init(void) { }
 static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3c37b16..ec83934 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c

@@ -88,9 +88,8 @@
 	/* Check if the request originated from a secure port. */
 	if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-		dprintk(KERN_WARNING
-		       "nfsd: request from insecure port %s!\n",
-		       svc_print_addr(rqstp, buf, sizeof(buf)));
+		dprintk("nfsd: request from insecure port %s!\n",
+		        svc_print_addr(rqstp, buf, sizeof(buf)));
 		return nfserr_perm;
 	}
 
@@ -169,8 +168,8 @@
 		data_left -= len;
 		if (data_left < 0)
 			return error;
-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
-		fid = (struct fid *)(fh->fh_auth + len);
+		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+		fid = (struct fid *)(fh->fh_fsid + len);
 	} else {
 		__u32 tfh[2];
 		dev_t xdev;
@@ -385,7 +384,7 @@
 {
 	if (dentry != exp->ex_path.dentry) {
 		struct fid *fid = (struct fid *)
-			(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
+			(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
 		int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
 		int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
 
@@ -513,7 +512,6 @@
 	 */
 
 	struct inode * inode = dentry->d_inode;
-	__u32 *datap;
 	dev_t ex_dev = exp_sb(exp)->s_dev;
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
@@ -557,17 +555,16 @@
 		if (inode)
 			_fh_update_old(dentry, exp, &fhp->fh_handle);
 	} else {
-		int len;
+		fhp->fh_handle.fh_size =
+			key_len(fhp->fh_handle.fh_fsid_type) + 4;
 		fhp->fh_handle.fh_auth_type = 0;
-		datap = fhp->fh_handle.fh_auth+0;
-		mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
+
+		mk_fsid(fhp->fh_handle.fh_fsid_type,
+			fhp->fh_handle.fh_fsid,
+			ex_dev,
 			exp->ex_path.dentry->d_inode->i_ino,
 			exp->ex_fsid, exp->ex_uuid);
 
-		len = key_len(fhp->fh_handle.fh_fsid_type);
-		datap += len/4;
-		fhp->fh_handle.fh_size = 4 + len;
-
 		if (inode)
 			_fh_update(fhp, exp, dentry);
 		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {

diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index ad67964..2e89e70 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h

@@ -1,9 +1,58 @@
-/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+/*
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * This file describes the layout of the file handles as passed
+ * over the wire.
+ */
+#ifndef _LINUX_NFSD_NFSFH_H
+#define _LINUX_NFSD_NFSFH_H
 
-#ifndef _LINUX_NFSD_FH_INT_H
-#define _LINUX_NFSD_FH_INT_H
+#include <linux/sunrpc/svc.h>
+#include <uapi/linux/nfsd/nfsfh.h>
 
-#include <linux/nfsd/nfsfh.h>
+static inline __u32 ino_t_to_u32(ino_t ino)
+{
+	return (__u32) ino;
+}
+
+static inline ino_t u32_to_ino_t(__u32 uino)
+{
+	return (ino_t) uino;
+}
+
+/*
+ * This is the internal representation of an NFS handle used in knfsd.
+ * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
+ */
+typedef struct svc_fh {
+	struct knfsd_fh		fh_handle;	/* FH data */
+	struct dentry *		fh_dentry;	/* validated dentry */
+	struct svc_export *	fh_export;	/* export pointer */
+	int			fh_maxsize;	/* max size for fh_handle */
+
+	unsigned char		fh_locked;	/* inode locked by us */
+	unsigned char		fh_want_write;	/* remount protection taken */
+
+#ifdef CONFIG_NFSD_V3
+	unsigned char		fh_post_saved;	/* post-op attrs saved */
+	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
+
+	/* Pre-op attributes saved during fh_lock */
+	__u64			fh_pre_size;	/* size before operation */
+	struct timespec		fh_pre_mtime;	/* mtime before oper */
+	struct timespec		fh_pre_ctime;	/* ctime before oper */
+	/*
+	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
+	 *  to find out if it is valid.
+	 */
+	u64			fh_pre_change;
+
+	/* Post-op attributes saved in fh_unlock */
+	struct kstat		fh_post_attr;	/* full attrs after operation */
+	u64			fh_post_change; /* nfsv4 change; see above */
+#endif /* CONFIG_NFSD_V3 */
+
+} svc_fh;
 
 enum nfsd_fsid {
 	FSID_DEV = 0,
@@ -215,4 +264,4 @@
 	}
 }
 
-#endif /* _LINUX_NFSD_FH_INT_H */
+#endif /* _LINUX_NFSD_NFSFH_H */

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9a4a5f9..1879e43 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c

@@ -591,12 +591,6 @@
 	nfsdstats.th_cnt++;
 	mutex_unlock(&nfsd_mutex);
 
-	/*
-	 * We want less throttling in balance_dirty_pages() so that nfs to
-	 * localhost doesn't cause nfsd to lock up due to all the client's
-	 * dirty pages.
-	 */
-	current->flags |= PF_LESS_THROTTLE;
 	set_freezable();
 
 	/*

diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 9c769a4..1ac306b 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c

@@ -214,7 +214,8 @@
 int
 nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
@@ -248,7 +249,8 @@
 {
 	unsigned int len;
 	int v;
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 
 	args->offset    = ntohl(*p++);
@@ -281,7 +283,8 @@
 	unsigned int len, hdr, dlen;
 	int v;
 
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 
 	p++;				/* beginoffset */
@@ -355,7 +358,8 @@
 int
 nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
@@ -391,7 +395,8 @@
 nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readdirargs *args)
 {
-	if (!(p = decode_fh(p, &args->fh)))
+	p = decode_fh(p, &args->fh);
+	if (!p)
 		return 0;
 	args->cookie = ntohl(*p++);
 	args->count  = ntohl(*p++);

diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 424d8f5..374c662 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h

@@ -37,7 +37,6 @@
 
 #include <linux/idr.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/nfsd/nfsfh.h>
 #include "nfsfh.h"
 
 typedef struct {
@@ -123,7 +122,7 @@
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
 /* Maximum  session per slot cache size */
-#define NFSD_SLOT_CACHE_SIZE		1024
+#define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
 #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
 #define NFSD_MAX_MEM_PER_SESSION  \
@@ -464,8 +463,6 @@
 extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
 							struct nfsd_net *nn);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
-extern void nfs4_free_openowner(struct nfs4_openowner *);
-extern void nfs4_free_lockowner(struct nfs4_lockowner *);
 extern int set_callback_cred(void);
 extern void nfsd4_init_callback(struct nfsd4_callback *);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);

diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 6d4521f..cd90878 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c

@@ -24,7 +24,6 @@
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/stats.h>
 #include <net/net_namespace.h>
 
 #include "nfsd.h"

diff --git a/include/linux/nfsd/stats.h b/fs/nfsd/stats.h
similarity index 91%
rename from include/linux/nfsd/stats.h
rename to fs/nfsd/stats.h
index e75b254..a5c944b 100644
--- a/include/linux/nfsd/stats.h
+++ b/fs/nfsd/stats.h

@@ -1,12 +1,10 @@
 /*
- * linux/include/linux/nfsd/stats.h
- *
  * Statistics for NFS server.
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
-#ifndef LINUX_NFSD_STATS_H
-#define LINUX_NFSD_STATS_H
+#ifndef _NFSD_STATS_H
+#define _NFSD_STATS_H
 
 #include <uapi/linux/nfsd/stats.h>
 
@@ -42,4 +40,4 @@
 void	nfsd_stat_init(void);
 void	nfsd_stat_shutdown(void);
 
-#endif /* LINUX_NFSD_STATS_H */
+#endif /* _NFSD_STATS_H */

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 16f0673..140c496 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c

@@ -820,55 +820,54 @@
 	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
 }
 
-static __be32
-nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
 {
-	mm_segment_t	oldfs;
-	__be32		err;
-	int		host_err;
-
-	err = nfserr_perm;
-
-	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
-		struct splice_desc sd = {
-			.len		= 0,
-			.total_len	= *count,
-			.pos		= offset,
-			.u.data		= rqstp,
-		};
-
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-	} else {
-		oldfs = get_fs();
-		set_fs(KERNEL_DS);
-		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
-		set_fs(oldfs);
-	}
-
 	if (host_err >= 0) {
 		nfsdstats.io_read += host_err;
 		*count = host_err;
-		err = 0;
 		fsnotify_access(file);
+		return 0;
 	} else 
-		err = nfserrno(host_err);
-	return err;
+		return nfserrno(host_err);
 }
 
-static void kill_suid(struct dentry *dentry)
+int nfsd_splice_read(struct svc_rqst *rqstp,
+		     struct file *file, loff_t offset, unsigned long *count)
 {
-	struct iattr	ia;
-	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+	struct splice_desc sd = {
+		.len		= 0,
+		.total_len	= *count,
+		.pos		= offset,
+		.u.data		= rqstp,
+	};
+	int host_err;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	/*
-	 * Note we call this on write, so notify_change will not
-	 * encounter any conflicting delegations:
-	 */
-	notify_change(dentry, &ia, NULL);
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
+	return nfsd_finish_read(file, count, host_err);
+}
+
+int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
+		unsigned long *count)
+{
+	mm_segment_t oldfs;
+	int host_err;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+	set_fs(oldfs);
+	return nfsd_finish_read(file, count, host_err);
+}
+
+static __be32
+nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
+	      loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+	if (file->f_op->splice_read && rqstp->rq_splice_ok)
+		return nfsd_splice_read(rqstp, file, offset, count);
+	else
+		return nfsd_readv(file, offset, vec, vlen, count);
 }
 
 /*
@@ -922,6 +921,16 @@
 	int			stable = *stablep;
 	int			use_wgather;
 	loff_t			pos = offset;
+	unsigned int		pflags = current->flags;
+
+	if (rqstp->rq_local)
+		/*
+		 * We want less throttling in balance_dirty_pages()
+		 * and shrink_inactive_list() so that nfs to
+		 * localhost doesn't cause nfsd to lock up due to all
+		 * the client's dirty pages or its congested queue.
+		 */
+		current->flags |= PF_LESS_THROTTLE;
 
 	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
@@ -942,10 +951,6 @@
 	nfsdstats.io_write += host_err;
 	fsnotify_modify(file);
 
-	/* clear setuid/setgid flag after write */
-	if (inode->i_mode & (S_ISUID | S_ISGID))
-		kill_suid(dentry);
-
 	if (stable) {
 		if (use_wgather)
 			host_err = wait_for_concurrent_writes(file);
@@ -959,9 +964,45 @@
 		err = 0;
 	else
 		err = nfserrno(host_err);
+	if (rqstp->rq_local)
+		tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
 	return err;
 }
 
+__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct file **file, struct raparms **ra)
+{
+	struct inode *inode;
+	__be32 err;
+
+	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
+	if (err)
+		return err;
+
+	inode = file_inode(*file);
+
+	/* Get readahead parameters */
+	*ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+
+	if (*ra && (*ra)->p_set)
+		(*file)->f_ra = (*ra)->p_ra;
+	return nfs_ok;
+}
+
+void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
+{
+	/* Write back readahead params */
+	if (ra) {
+		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+		spin_lock(&rab->pb_lock);
+		ra->p_ra = file->f_ra;
+		ra->p_set = 1;
+		ra->p_count--;
+		spin_unlock(&rab->pb_lock);
+	}
+	nfsd_close(file);
+}
+
 /*
  * Read data from a file. count must contain the requested read count
  * on entry. On return, *count contains the number of bytes actually read.
@@ -971,55 +1012,17 @@
 	loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
 	struct file *file;
-	struct inode *inode;
 	struct raparms	*ra;
 	__be32 err;
 
-	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+	err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
 	if (err)
 		return err;
 
-	inode = file_inode(file);
+	err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
 
-	/* Get readahead parameters */
-	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+	nfsd_put_tmp_read_open(file, ra);
 
-	if (ra && ra->p_set)
-		file->f_ra = ra->p_ra;
-
-	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
-
-	/* Write back readahead params */
-	if (ra) {
-		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-		spin_lock(&rab->pb_lock);
-		ra->p_ra = file->f_ra;
-		ra->p_set = 1;
-		ra->p_count--;
-		spin_unlock(&rab->pb_lock);
-	}
-
-	nfsd_close(file);
-	return err;
-}
-
-/* As above, but use the provided file descriptor. */
-__be32
-nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-		loff_t offset, struct kvec *vec, int vlen,
-		unsigned long *count)
-{
-	__be32		err;
-
-	if (file) {
-		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-				NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
-		if (err)
-			goto out;
-		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
-	} else /* Note file may still be NULL in NFSv4 special stateid case: */
-		err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
-out:
 	return err;
 }
 

diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fbe90bd..91b6ae3 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h

@@ -70,10 +70,16 @@
 __be32		nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
 				int, struct file **);
 void		nfsd_close(struct file *);
+struct raparms;
+__be32		nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
+				struct file **, struct raparms **);
+void		nfsd_put_tmp_read_open(struct file *, struct raparms *);
+int		nfsd_splice_read(struct svc_rqst *,
+				struct file *, loff_t, unsigned long *);
+int		nfsd_readv(struct file *, loff_t, struct kvec *, int,
+				unsigned long *);
 __be32 		nfsd_read(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *, int, unsigned long *);
-__be32 		nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
-				loff_t, struct kvec *, int, unsigned long *);
 __be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
 				loff_t, struct kvec *,int, unsigned long *, int *);
 __be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,

diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 5ea7df3..18cbb6d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h

@@ -58,7 +58,7 @@
 	/* For sessions DRC */
 	struct nfsd4_session	*session;
 	struct nfsd4_slot	*slot;
-	__be32			*datap;
+	int			data_offset;
 	size_t			iovlen;
 	u32			minorversion;
 	__be32			status;
@@ -287,9 +287,8 @@
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
+	struct xdr_stream	*xdr;
+	int			cookie_offset;
 };
 
 struct nfsd4_release_lockowner {
@@ -506,9 +505,7 @@
 
 struct nfsd4_compoundres {
 	/* scratch variables for XDR encode */
-	__be32 *			p;
-	__be32 *			end;
-	struct xdr_buf *		xbuf;
+	struct xdr_stream		xdr;
 	struct svc_rqst *		rqstp;
 
 	u32				taglen;
@@ -538,6 +535,9 @@
 	return argp->opcnt == resp->opcnt;
 }
 
+int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
+void warn_on_nonidempotent_op(struct nfsd4_op *op);
+
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
 
 static inline void
@@ -563,10 +563,11 @@
 		struct nfsd4_compoundres *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
-void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		       struct dentry *dentry, __be32 **buffer, int countp,
-		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
+		struct svc_fh *fhp, struct svc_export *exp,
+		struct dentry *dentry,
+		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *,
 		struct nfsd4_setclientid *setclid);

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index f3a82fb..2497815 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c

@@ -152,10 +152,10 @@
  */
 const struct file_operations nilfs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= generic_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= nilfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= nilfs_compat_ioctl,

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9c5726..6252b17 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c

@@ -298,19 +298,20 @@
 }
 
 static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t offset, unsigned long nr_segs)
+nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+		loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t size;
 
 	if (rw == WRITE)
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
 				  nilfs_get_block);
 
 	/*
@@ -319,7 +320,7 @@
 	 */
 	if (unlikely((rw & WRITE) && size < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			nilfs_write_failed(mapping, end);

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 86ddab9..5c9e2c8 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c

@@ -2090,10 +2090,7 @@
 	size_t count;		/* after file limit checks */
 	ssize_t written, err;
 
-	count = 0;
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		return err;
+	count = iov_length(iov, nr_segs);
 	pos = *ppos;
 	/* We can write back this queue in page reclaim. */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2202,8 +2199,8 @@
 
 const struct file_operations ntfs_file_ops = {
 	.llseek		= generic_file_llseek,	 /* Seek inside file. */
-	.read		= do_sync_read,		 /* Read from file. */
-	.aio_read	= generic_file_aio_read, /* Async read from file. */
+	.read		= new_sync_read,	 /* Read from file. */
+	.read_iter	= generic_file_read_iter, /* Async read from file. */
 #ifdef NTFS_RW
 	.write		= do_sync_write,	 /* Write to file. */
 	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d310d12..4a231a1 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c

@@ -599,9 +599,8 @@
 
 static ssize_t ocfs2_direct_IO(int rw,
 			       struct kiocb *iocb,
-			       const struct iovec *iov,
-			       loff_t offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter,
+			       loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file)->i_mapping->host;
@@ -618,7 +617,7 @@
 		return 0;
 
 	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				    iov, offset, nr_segs,
+				    iter, offset,
 				    ocfs2_direct_IO_get_blocks,
 				    ocfs2_dio_end_io, NULL, 0);
 }

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a68e07a..681691b 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c

@@ -1799,7 +1799,7 @@
 
 /* ------------------------------------------------------------ */
 
-static int o2net_accept_one(struct socket *sock)
+static int o2net_accept_one(struct socket *sock, int *more)
 {
 	int ret, slen;
 	struct sockaddr_in sin;
@@ -1810,6 +1810,7 @@
 	struct o2net_node *nn;
 
 	BUG_ON(sock == NULL);
+	*more = 0;
 	ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
 			       sock->sk->sk_protocol, &new_sock);
 	if (ret)
@@ -1821,6 +1822,7 @@
 	if (ret < 0)
 		goto out;
 
+	*more = 1;
 	new_sock->sk->sk_allocation = GFP_ATOMIC;
 
 	ret = o2net_set_nodelay(new_sock);
@@ -1919,11 +1921,36 @@
 	return ret;
 }
 
+/*
+ * This function is invoked in response to one or more
+ * pending accepts at softIRQ level. We must drain the
+ * entire que before returning.
+ */
+
 static void o2net_accept_many(struct work_struct *work)
 {
 	struct socket *sock = o2net_listen_sock;
-	while (o2net_accept_one(sock) == 0)
+	int	more;
+	int	err;
+
+	/*
+	 * It is critical to note that due to interrupt moderation
+	 * at the network driver level, we can't assume to get a
+	 * softIRQ for every single conn since tcp SYN packets
+	 * can arrive back-to-back, and therefore many pending
+	 * accepts may result in just 1 softIRQ. If we terminate
+	 * the o2net_accept_one() loop upon seeing an err, what happens
+	 * to the rest of the conns in the queue? If no new SYN
+	 * arrives for hours, no softIRQ  will be delivered,
+	 * and the connections will just sit in the queue.
+	 */
+
+	for (;;) {
+		err = o2net_accept_one(sock, &more);
+		if (!more)
+			break;
 		cond_resched();
+	}
 }
 
 static void o2net_listen_data_ready(struct sock *sk)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8eb6e57..2930e23 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c

@@ -2233,16 +2233,13 @@
 	return ret;
 }
 
-static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
-				    const struct iovec *iov,
-				    unsigned long nr_segs,
-				    loff_t pos)
+static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
+				    struct iov_iter *from)
 {
 	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
 	int can_do_direct, has_refcount = 0;
 	ssize_t written = 0;
-	size_t ocount;		/* original count */
-	size_t count;		/* after file limit checks */
+	size_t count = iov_iter_count(from);
 	loff_t old_size, *ppos = &iocb->ki_pos;
 	u32 old_clusters;
 	struct file *file = iocb->ki_filp;
@@ -2256,7 +2253,7 @@
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
 		file->f_path.dentry->d_name.len,
 		file->f_path.dentry->d_name.name,
-		(unsigned int)nr_segs);
+		(unsigned int)from->nr_segs);	/* GRRRRR */
 
 	if (iocb->ki_nbytes == 0)
 		return 0;
@@ -2354,29 +2351,21 @@
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	ret = generic_segment_checks(iov, &nr_segs, &ocount,
-				     VERIFY_READ);
-	if (ret)
-		goto out_dio;
-
-	count = ocount;
 	ret = generic_write_checks(file, ppos, &count,
 				   S_ISBLK(inode->i_mode));
 	if (ret)
 		goto out_dio;
 
+	iov_iter_truncate(from, count);
 	if (direct_io) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
-						    count, ocount);
+		written = generic_file_direct_write(iocb, from, *ppos);
 		if (written < 0) {
 			ret = written;
 			goto out_dio;
 		}
 	} else {
-		struct iov_iter from;
-		iov_iter_init(&from, iov, nr_segs, count, 0);
 		current->backing_dev_info = file->f_mapping->backing_dev_info;
-		written = generic_perform_write(file, &from, *ppos);
+		written = generic_perform_write(file, from, *ppos);
 		if (likely(written >= 0))
 			iocb->ki_pos = *ppos + written;
 		current->backing_dev_info = NULL;
@@ -2441,84 +2430,6 @@
 	return ret;
 }
 
-static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
-				struct file *out,
-				struct splice_desc *sd)
-{
-	int ret;
-
-	ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
-					    sd->total_len, 0, NULL, NULL);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
-	}
-
-	return splice_from_pipe_feed(pipe, sd, pipe_to_file);
-}
-
-static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
-				       struct file *out,
-				       loff_t *ppos,
-				       size_t len,
-				       unsigned int flags)
-{
-	int ret;
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
-	struct splice_desc sd = {
-		.total_len = len,
-		.flags = flags,
-		.pos = *ppos,
-		.u.file = out,
-	};
-
-
-	trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
-			(unsigned long long)OCFS2_I(inode)->ip_blkno,
-			out->f_path.dentry->d_name.len,
-			out->f_path.dentry->d_name.name, len);
-
-	pipe_lock(pipe);
-
-	splice_from_pipe_begin(&sd);
-	do {
-		ret = splice_from_pipe_next(pipe, &sd);
-		if (ret <= 0)
-			break;
-
-		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-		ret = ocfs2_rw_lock(inode, 1);
-		if (ret < 0)
-			mlog_errno(ret);
-		else {
-			ret = ocfs2_splice_to_file(pipe, out, &sd);
-			ocfs2_rw_unlock(inode, 1);
-		}
-		mutex_unlock(&inode->i_mutex);
-	} while (ret > 0);
-	splice_from_pipe_end(pipe, &sd);
-
-	pipe_unlock(pipe);
-
-	if (sd.num_spliced)
-		ret = sd.num_spliced;
-
-	if (ret > 0) {
-		int err;
-
-		err = generic_write_sync(out, *ppos, ret);
-		if (err)
-			ret = err;
-		else
-			*ppos += ret;
-
-		balance_dirty_pages_ratelimited(mapping);
-	}
-
-	return ret;
-}
-
 static ssize_t ocfs2_file_splice_read(struct file *in,
 				      loff_t *ppos,
 				      struct pipe_inode_info *pipe,
@@ -2534,7 +2445,7 @@
 			in->f_path.dentry->d_name.name, len);
 
 	/*
-	 * See the comment in ocfs2_file_aio_read()
+	 * See the comment in ocfs2_file_read_iter()
 	 */
 	ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
 	if (ret < 0) {
@@ -2549,10 +2460,8 @@
 	return ret;
 }
 
-static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
-				   const struct iovec *iov,
-				   unsigned long nr_segs,
-				   loff_t pos)
+static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
+				   struct iov_iter *to)
 {
 	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
 	struct file *filp = iocb->ki_filp;
@@ -2561,7 +2470,8 @@
 	trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			filp->f_path.dentry->d_name.len,
-			filp->f_path.dentry->d_name.name, nr_segs);
+			filp->f_path.dentry->d_name.name,
+			to->nr_segs);	/* GRRRRR */
 
 
 	if (!inode) {
@@ -2606,13 +2516,13 @@
 	}
 	ocfs2_inode_unlock(inode, lock_level);
 
-	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
+	ret = generic_file_read_iter(iocb, to);
 	trace_generic_file_aio_read_ret(ret);
 
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
 
-	/* see ocfs2_file_aio_write */
+	/* see ocfs2_file_write_iter */
 	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
 		rw_level = -1;
 		have_alloc_sem = 0;
@@ -2705,14 +2615,14 @@
  */
 const struct file_operations ocfs2_fops = {
 	.llseek		= ocfs2_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
 	.mmap		= ocfs2_mmap,
 	.fsync		= ocfs2_sync_file,
 	.release	= ocfs2_file_release,
 	.open		= ocfs2_file_open,
-	.aio_read	= ocfs2_file_aio_read,
-	.aio_write	= ocfs2_file_aio_write,
+	.read_iter	= ocfs2_file_read_iter,
+	.write_iter	= ocfs2_file_write_iter,
 	.unlocked_ioctl	= ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ocfs2_compat_ioctl,
@@ -2720,7 +2630,7 @@
 	.lock		= ocfs2_lock,
 	.flock		= ocfs2_flock,
 	.splice_read	= ocfs2_file_splice_read,
-	.splice_write	= ocfs2_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
 };
 
@@ -2753,21 +2663,21 @@
  */
 const struct file_operations ocfs2_fops_no_plocks = {
 	.llseek		= ocfs2_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
 	.mmap		= ocfs2_mmap,
 	.fsync		= ocfs2_sync_file,
 	.release	= ocfs2_file_release,
 	.open		= ocfs2_file_open,
-	.aio_read	= ocfs2_file_aio_read,
-	.aio_write	= ocfs2_file_aio_write,
+	.read_iter	= ocfs2_file_read_iter,
+	.write_iter	= ocfs2_file_write_iter,
 	.unlocked_ioctl	= ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ocfs2_compat_ioctl,
 #endif
 	.flock		= ocfs2_flock,
 	.splice_read	= ocfs2_file_splice_read,
-	.splice_write	= ocfs2_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
 };
 

diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 54d57d6..902e885 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c

@@ -337,10 +337,10 @@
 
 const struct file_operations omfs_file_operations = {
 	.llseek = generic_file_llseek,
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = generic_file_aio_read,
-	.aio_write = generic_file_aio_write,
+	.read = new_sync_read,
+	.write = new_sync_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = generic_file_write_iter,
 	.mmap = generic_file_mmap,
 	.fsync = generic_file_fsync,
 	.splice_read = generic_file_splice_read,

diff --git a/fs/open.c b/fs/open.c
index 9d64679..36662d0 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -725,6 +725,12 @@
 	}
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
+	if ((f->f_mode & FMODE_READ) &&
+	     likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
+		f->f_mode |= FMODE_CAN_READ;
+	if ((f->f_mode & FMODE_WRITE) &&
+	     likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
+		f->f_mode |= FMODE_CAN_WRITE;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 

diff --git a/fs/pipe.c b/fs/pipe.c
index 034bffa..21981e5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c

@@ -116,50 +116,6 @@
 	pipe_lock(pipe);
 }
 
-static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
-			int atomic)
-{
-	unsigned long copy;
-
-	while (len > 0) {
-		while (!iov->iov_len)
-			iov++;
-		copy = min_t(unsigned long, len, iov->iov_len);
-
-		if (atomic) {
-			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
-				return -EFAULT;
-		} else {
-			if (copy_from_user(to, iov->iov_base, copy))
-				return -EFAULT;
-		}
-		to += copy;
-		len -= copy;
-		iov->iov_base += copy;
-		iov->iov_len -= copy;
-	}
-	return 0;
-}
-
-/*
- * Pre-fault in the user memory, so we can use atomic copies.
- */
-static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
-{
-	while (!iov->iov_len)
-		iov++;
-
-	while (len > 0) {
-		unsigned long this_len;
-
-		this_len = min_t(unsigned long, len, iov->iov_len);
-		fault_in_pages_readable(iov->iov_base, this_len);
-		len -= this_len;
-		iov++;
-	}
-}
-
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -271,24 +227,18 @@
 };
 
 static ssize_t
-pipe_read(struct kiocb *iocb, const struct iovec *_iov,
-	   unsigned long nr_segs, loff_t pos)
+pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
+	size_t total_len = iov_iter_count(to);
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
 	int do_wakeup;
 	ssize_t ret;
-	struct iovec *iov = (struct iovec *)_iov;
-	size_t total_len;
-	struct iov_iter iter;
 
-	total_len = iov_length(iov, nr_segs);
 	/* Null read succeeds. */
 	if (unlikely(total_len == 0))
 		return 0;
 
-	iov_iter_init(&iter, iov, nr_segs, total_len, 0);
-
 	do_wakeup = 0;
 	ret = 0;
 	__pipe_lock(pipe);
@@ -312,7 +262,7 @@
 				break;
 			}
 
-			written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
+			written = copy_page_to_iter(buf->page, buf->offset, chars, to);
 			if (unlikely(written < chars)) {
 				if (!ret)
 					ret = -EFAULT;
@@ -386,24 +336,19 @@
 }
 
 static ssize_t
-pipe_write(struct kiocb *iocb, const struct iovec *_iov,
-	    unsigned long nr_segs, loff_t ppos)
+pipe_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	ssize_t ret;
-	int do_wakeup;
-	struct iovec *iov = (struct iovec *)_iov;
-	size_t total_len;
+	ssize_t ret = 0;
+	int do_wakeup = 0;
+	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
 
-	total_len = iov_length(iov, nr_segs);
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
 		return 0;
 
-	do_wakeup = 0;
-	ret = 0;
 	__pipe_lock(pipe);
 
 	if (!pipe->readers) {
@@ -422,38 +367,19 @@
 		int offset = buf->offset + buf->len;
 
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			int error, atomic = 1;
-			void *addr;
-
-			error = ops->confirm(pipe, buf);
+			int error = ops->confirm(pipe, buf);
 			if (error)
 				goto out;
 
-			iov_fault_in_pages_read(iov, chars);
-redo1:
-			if (atomic)
-				addr = kmap_atomic(buf->page);
-			else
-				addr = kmap(buf->page);
-			error = pipe_iov_copy_from_user(offset + addr, iov,
-							chars, atomic);
-			if (atomic)
-				kunmap_atomic(addr);
-			else
-				kunmap(buf->page);
-			ret = error;
-			do_wakeup = 1;
-			if (error) {
-				if (atomic) {
-					atomic = 0;
-					goto redo1;
-				}
+			ret = copy_page_from_iter(buf->page, offset, chars, from);
+			if (unlikely(ret < chars)) {
+				error = -EFAULT;
 				goto out;
 			}
+			do_wakeup = 1;
 			buf->len += chars;
-			total_len -= chars;
 			ret = chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				goto out;
 		}
 	}
@@ -472,8 +398,7 @@
 			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pipe->tmp_page;
-			char *src;
-			int error, atomic = 1;
+			int copied;
 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
@@ -489,40 +414,19 @@
 			 * FIXME! Is this really true?
 			 */
 			do_wakeup = 1;
-			chars = PAGE_SIZE;
-			if (chars > total_len)
-				chars = total_len;
-
-			iov_fault_in_pages_read(iov, chars);
-redo2:
-			if (atomic)
-				src = kmap_atomic(page);
-			else
-				src = kmap(page);
-
-			error = pipe_iov_copy_from_user(src, iov, chars,
-							atomic);
-			if (atomic)
-				kunmap_atomic(src);
-			else
-				kunmap(page);
-
-			if (unlikely(error)) {
-				if (atomic) {
-					atomic = 0;
-					goto redo2;
-				}
+			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
+			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
 				if (!ret)
-					ret = error;
+					ret = -EFAULT;
 				break;
 			}
-			ret += chars;
+			ret += copied;
 
 			/* Insert it into the buffer array */
 			buf->page = page;
 			buf->ops = &anon_pipe_buf_ops;
 			buf->offset = 0;
-			buf->len = chars;
+			buf->len = copied;
 			buf->flags = 0;
 			if (is_packetized(filp)) {
 				buf->ops = &packet_pipe_buf_ops;
@@ -531,8 +435,7 @@
 			pipe->nrbufs = ++bufs;
 			pipe->tmp_page = NULL;
 
-			total_len -= chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				break;
 		}
 		if (bufs < pipe->buffers)
@@ -1044,10 +947,10 @@
 const struct file_operations pipefifo_fops = {
 	.open		= fifo_open,
 	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
+	.read		= new_sync_read,
+	.read_iter	= pipe_read,
+	.write		= new_sync_write,
+	.write_iter	= pipe_write,
 	.poll		= pipe_poll,
 	.unlocked_ioctl	= pipe_ioctl,
 	.release	= pipe_release,

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2b363e2..ff3f0b3 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c

@@ -278,6 +278,17 @@
 	return ret;
 }
 
+static int quota_rmxquota(struct super_block *sb, void __user *addr)
+{
+	__u32 flags;
+
+	if (copy_from_user(&flags, addr, sizeof(flags)))
+		return -EFAULT;
+	if (!sb->s_qcop->rm_xquota)
+		return -ENOSYS;
+	return sb->s_qcop->rm_xquota(sb, flags);
+}
+
 /* Copy parameters and call proper function */
 static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		       void __user *addr, struct path *path)
@@ -316,8 +327,9 @@
 		return sb->s_qcop->quota_sync(sb, type);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
-	case Q_XQUOTARM:
 		return quota_setxstate(sb, cmd, addr);
+	case Q_XQUOTARM:
+		return quota_rmxquota(sb, addr);
 	case Q_XGETQSTAT:
 		return quota_getxstate(sb, addr);
 	case Q_XGETQSTATV:

diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 1e56a4e..4f56de8 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c

@@ -31,14 +31,14 @@
 #include "internal.h"
 
 const struct file_operations ramfs_file_operations = {
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= noop_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.llseek		= generic_file_llseek,
 };
 

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0b3d8e4..dda012a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c

@@ -37,13 +37,13 @@
 const struct file_operations ramfs_file_operations = {
 	.mmap			= ramfs_nommu_mmap,
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
-	.read			= do_sync_read,
-	.aio_read		= generic_file_aio_read,
-	.write			= do_sync_write,
-	.aio_write		= generic_file_aio_write,
+	.read			= new_sync_read,
+	.read_iter		= generic_file_read_iter,
+	.write			= new_sync_write,
+	.write_iter		= generic_file_write_iter,
 	.fsync			= noop_fsync,
 	.splice_read		= generic_file_splice_read,
-	.splice_write		= generic_file_splice_write,
+	.splice_write		= iter_file_splice_write,
 	.llseek			= generic_file_llseek,
 };
 

diff --git a/fs/read_write.c b/fs/read_write.c
index 31c6efa..009d854 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c

@@ -25,11 +25,12 @@
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t);
+typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
 
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
 	.mmap		= generic_file_readonly_mmap,
 	.splice_read	= generic_file_splice_read,
 };
@@ -390,13 +391,34 @@
 
 EXPORT_SYMBOL(do_sync_read);
 
+ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, READ, &iov, 1, len);
+
+	ret = filp->f_op->read_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_read);
+
 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->read && !file->f_op->aio_read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 		return -EFAULT;
@@ -406,8 +428,10 @@
 		count = ret;
 		if (file->f_op->read)
 			ret = file->f_op->read(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_read)
 			ret = do_sync_read(file, buf, count, pos);
+		else
+			ret = new_sync_read(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_access(file);
 			add_rchar(current, ret);
@@ -439,13 +463,34 @@
 
 EXPORT_SYMBOL(do_sync_write);
 
+ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+	ret = filp->f_op->write_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_write);
+
 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 {
 	mm_segment_t old_fs;
 	const char __user *p;
 	ssize_t ret;
 
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	old_fs = get_fs();
@@ -455,8 +500,10 @@
 		count =  MAX_RW_COUNT;
 	if (file->f_op->write)
 		ret = file->f_op->write(file, p, count, pos);
-	else
+	else if (file->f_op->aio_write)
 		ret = do_sync_write(file, p, count, pos);
+	else
+		ret = new_sync_write(file, p, count, pos);
 	set_fs(old_fs);
 	if (ret > 0) {
 		fsnotify_modify(file);
@@ -472,7 +519,7 @@
 
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
@@ -483,8 +530,10 @@
 		file_start_write(file);
 		if (file->f_op->write)
 			ret = file->f_op->write(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_write)
 			ret = do_sync_write(file, buf, count, pos);
+		else
+			ret = new_sync_write(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_modify(file);
 			add_wchar(current, ret);
@@ -601,6 +650,25 @@
 }
 EXPORT_SYMBOL(iov_shorten);
 
+static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
+		unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+{
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+
+	iov_iter_init(&iter, rw, iov, nr_segs, len);
+	ret = fn(&kiocb, &iter);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
@@ -738,6 +806,7 @@
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = rw_copy_check_uvector(type, uvector, nr_segs,
 				    ARRAY_SIZE(iovstack), iovstack, &iov);
@@ -753,13 +822,18 @@
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
@@ -785,7 +859,7 @@
 {
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 
 	return do_readv_writev(READ, file, vec, vlen, pos);
@@ -798,7 +872,7 @@
 {
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -912,6 +986,7 @@
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
 					       UIO_FASTIOV, iovstack, &iov);
@@ -927,13 +1002,18 @@
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
@@ -964,7 +1044,7 @@
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
 
 	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1041,7 +1121,7 @@
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 1bcffea..dc198bc 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c

@@ -50,8 +50,10 @@
 				   unsigned int *bmap_nr,
 				   unsigned int *offset)
 {
-	/* It is in the bitmap block number equal to the block
-	 * number divided by the number of bits in a block. */
+	/*
+	 * It is in the bitmap block number equal to the block
+	 * number divided by the number of bits in a block.
+	 */
 	*bmap_nr = block >> (s->s_blocksize_bits + 3);
 	/* Within that bitmap block it is located at bit offset *offset. */
 	*offset = block & ((s->s_blocksize << 3) - 1);
@@ -71,10 +73,12 @@
 
 	get_bit_address(s, block, &bmap, &offset);
 
-	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
-	 * we need to account for it. */
+	/*
+	 * Old format filesystem? Unlikely, but the bitmaps are all
+	 * up front so we need to account for it.
+	 */
 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
-			      &(REISERFS_SB(s)->s_properties)))) {
+			      &REISERFS_SB(s)->s_properties))) {
 		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
 		if (block >= bmap1 &&
 		    block <= bmap1 + bmap_count) {
@@ -108,8 +112,11 @@
 	return 1;
 }
 
-/* searches in journal structures for a given block number (bmap, off). If block
-   is found in reiserfs journal it suggests next free block candidate to test. */
+/*
+ * Searches in journal structures for a given block number (bmap, off).
+ * If block is found in reiserfs journal it suggests next free block
+ * candidate to test.
+ */
 static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 				      int off, int *next)
 {
@@ -120,7 +127,7 @@
 			*next = tmp;
 			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 		} else {
-			(*next) = off + 1;	/* inc offset to avoid looping. */
+			(*next) = off + 1;  /* inc offset to avoid looping. */
 			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 		}
 		PROC_INFO_INC(s, scan_bitmap.retry);
@@ -129,8 +136,10 @@
 	return 0;
 }
 
-/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
- * block; */
+/*
+ * Searches for a window of zero bits with given minimum and maximum
+ * lengths in one bitmap block
+ */
 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 			     unsigned int bmap_n, int *beg, int boundary,
 			     int min, int max, int unfm)
@@ -145,10 +154,6 @@
 	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 	PROC_INFO_INC(s, scan_bitmap.bmap);
-/* this is unclear and lacks comments, explain how journal bitmaps
-   work here for the reader.  Convey a sense of the design here. What
-   is a window? */
-/* - I mean `a window of zero bits' as in description of this function - Zam. */
 
 	if (!bi) {
 		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
@@ -161,18 +166,21 @@
 		return 0;
 
 	while (1) {
-	      cont:
+cont:
 		if (bi->free_count < min) {
 			brelse(bh);
-			return 0;	// No free blocks in this bitmap
+			return 0;	/* No free blocks in this bitmap */
 		}
 
 		/* search for a first zero bit -- beginning of a window */
 		*beg = reiserfs_find_next_zero_le_bit
 		    ((unsigned long *)(bh->b_data), boundary, *beg);
 
-		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
-						 * cannot contain a zero window of minimum size */
+		/*
+		 * search for a zero bit fails or the rest of bitmap block
+		 * cannot contain a zero window of minimum size
+		 */
+		if (*beg + min > boundary) {
 			brelse(bh);
 			return 0;
 		}
@@ -186,49 +194,75 @@
 				next = end;
 				break;
 			}
-			/* finding the other end of zero bit window requires looking into journal structures (in
-			 * case of searching for free blocks for unformatted nodes) */
+
+			/*
+			 * finding the other end of zero bit window requires
+			 * looking into journal structures (in case of
+			 * searching for free blocks for unformatted nodes)
+			 */
 			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 				break;
 		}
 
-		/* now (*beg) points to beginning of zero bits window,
-		 * (end) points to one bit after the window end */
-		if (end - *beg >= min) {	/* it seems we have found window of proper size */
+		/*
+		 * now (*beg) points to beginning of zero bits window,
+		 * (end) points to one bit after the window end
+		 */
+
+		/* found window of proper size */
+		if (end - *beg >= min) {
 			int i;
 			reiserfs_prepare_for_journal(s, bh, 1);
-			/* try to set all blocks used checking are they still free */
+			/*
+			 * try to set all blocks used checking are
+			 * they still free
+			 */
 			for (i = *beg; i < end; i++) {
-				/* It seems that we should not check in journal again. */
+				/* Don't check in journal again. */
 				if (reiserfs_test_and_set_le_bit
 				    (i, bh->b_data)) {
-					/* bit was set by another process
-					 * while we slept in prepare_for_journal() */
+					/*
+					 * bit was set by another process while
+					 * we slept in prepare_for_journal()
+					 */
 					PROC_INFO_INC(s, scan_bitmap.stolen);
-					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
-								 * if length of this set is more or equal to `min' */
+
+					/*
+					 * we can continue with smaller set
+					 * of allocated blocks, if length of
+					 * this set is more or equal to `min'
+					 */
+					if (i >= *beg + min) {
 						end = i;
 						break;
 					}
-					/* otherwise we clear all bit were set ... */
+
+					/*
+					 * otherwise we clear all bit
+					 * were set ...
+					 */
 					while (--i >= *beg)
 						reiserfs_clear_le_bit
 						    (i, bh->b_data);
 					reiserfs_restore_prepared_buffer(s, bh);
 					*beg = org;
-					/* ... and search again in current block from beginning */
+
+					/*
+					 * Search again in current block
+					 * from beginning
+					 */
 					goto cont;
 				}
 			}
 			bi->free_count -= (end - *beg);
-			journal_mark_dirty(th, s, bh);
+			journal_mark_dirty(th, bh);
 			brelse(bh);
 
 			/* free block count calculation */
 			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 						     1);
 			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
-			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 
 			return end - (*beg);
 		} else {
@@ -267,11 +301,13 @@
 	int bm = bmap_hash_id(s, id);
 	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 
-	/* If we don't have cached information on this bitmap block, we're
+	/*
+	 * If we don't have cached information on this bitmap block, we're
 	 * going to have to load it later anyway. Loading it here allows us
 	 * to make a better decision. This favors long-term performance gain
 	 * with a better on-disk layout vs. a short term gain of skipping the
-	 * read and potentially having a bad placement. */
+	 * read and potentially having a bad placement.
+	 */
 	if (info->free_count == UINT_MAX) {
 		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 		brelse(bh);
@@ -304,25 +340,26 @@
 	return packing;
 }
 
-/* Tries to find contiguous zero bit window (given size) in given region of
- * bitmap and place new blocks there. Returns number of allocated blocks. */
+/*
+ * Tries to find contiguous zero bit window (given size) in given region of
+ * bitmap and place new blocks there. Returns number of allocated blocks.
+ */
 static int scan_bitmap(struct reiserfs_transaction_handle *th,
 		       b_blocknr_t * start, b_blocknr_t finish,
 		       int min, int max, int unfm, sector_t file_block)
 {
 	int nr_allocated = 0;
 	struct super_block *s = th->t_super;
-	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
-	 * - Hans, it is not a block number - Zam. */
-
 	unsigned int bm, off;
 	unsigned int end_bm, end_off;
 	unsigned int off_max = s->s_blocksize << 3;
 
 	BUG_ON(!th->t_trans_id);
 	PROC_INFO_INC(s, scan_bitmap.call);
+
+	/* No point in looking for more free blocks */
 	if (SB_FREE_BLOCKS(s) <= 0)
-		return 0;	// No point in looking for more free blocks
+		return 0;
 
 	get_bit_address(s, *start, &bm, &off);
 	get_bit_address(s, finish, &end_bm, &end_off);
@@ -331,7 +368,8 @@
 	if (end_bm > reiserfs_bmap_count(s))
 		end_bm = reiserfs_bmap_count(s);
 
-	/* When the bitmap is more than 10% free, anyone can allocate.
+	/*
+	 * When the bitmap is more than 10% free, anyone can allocate.
 	 * When it's less than 10% free, only files that already use the
 	 * bitmap are allowed. Once we pass 80% full, this restriction
 	 * is lifted.
@@ -369,7 +407,7 @@
 	nr_allocated =
 	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 
-      ret:
+ret:
 	*start = bm * off_max + off;
 	return nr_allocated;
 
@@ -411,14 +449,14 @@
 			       "block %lu: bit already cleared", block);
 	}
 	apbi[nr].free_count++;
-	journal_mark_dirty(th, s, bmbh);
+	journal_mark_dirty(th, bmbh);
 	brelse(bmbh);
 
 	reiserfs_prepare_for_journal(s, sbh, 1);
 	/* update super block */
 	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 
-	journal_mark_dirty(th, s, sbh);
+	journal_mark_dirty(th, sbh);
 	if (for_unformatted) {
 		int depth = reiserfs_write_unlock_nested(s);
 		dquot_free_block_nodirty(inode, 1);
@@ -483,7 +521,7 @@
 	if (dirty)
 		reiserfs_update_sd(th, inode);
 	ei->i_prealloc_block = save;
-	list_del_init(&(ei->i_prealloc_list));
+	list_del_init(&ei->i_prealloc_list);
 }
 
 /* FIXME: It should be inline function */
@@ -529,7 +567,8 @@
 {
 	char *this_char, *value;
 
-	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */
+	/* clear default settings */
+	REISERFS_SB(s)->s_alloc_options.bits = 0;
 
 	while ((this_char = strsep(&options, ":")) != NULL) {
 		if ((value = strchr(this_char, '=')) != NULL)
@@ -731,7 +770,7 @@
 		hash_in = (char *)&hint->key.k_dir_id;
 	} else {
 		if (!hint->inode) {
-			//hint->search_start = hint->beg;
+			/*hint->search_start = hint->beg;*/
 			hash_in = (char *)&hint->key.k_dir_id;
 		} else
 		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
@@ -785,7 +824,8 @@
 
 		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 
-		/* keep the root dir and it's first set of subdirs close to
+		/*
+		 * keep the root dir and it's first set of subdirs close to
 		 * the start of the disk
 		 */
 		if (dirid <= 2)
@@ -799,7 +839,8 @@
 	}
 }
 
-/* returns 1 if it finds an indirect item and gets valid hint info
+/*
+ * returns 1 if it finds an indirect item and gets valid hint info
  * from it, otherwise 0
  */
 static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
@@ -811,25 +852,29 @@
 	__le32 *item;
 	int ret = 0;
 
-	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
-				 * structure supplied; then we rely on supplied search_start */
+	/*
+	 * reiserfs code can call this function w/o pointer to path
+	 * structure supplied; then we rely on supplied search_start
+	 */
+	if (!hint->path)
 		return 0;
 
 	path = hint->path;
 	bh = get_last_bh(path);
 	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
-	ih = get_ih(path);
+	ih = tp_item_head(path);
 	pos_in_item = path->pos_in_item;
-	item = get_item(path);
+	item = tp_item_body(path);
 
 	hint->search_start = bh->b_blocknr;
 
+	/*
+	 * for indirect item: go to left and look for the first non-hole entry
+	 * in the indirect item
+	 */
 	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
-		/* for indirect item: go to left and look for the first non-hole entry
-		   in the indirect item */
 		if (pos_in_item == I_UNFM_NUM(ih))
 			pos_in_item--;
-//          pos_in_item = I_UNFM_NUM (ih) - 1;
 		while (pos_in_item >= 0) {
 			int t = get_block_num(item, pos_in_item);
 			if (t) {
@@ -845,10 +890,12 @@
 	return ret;
 }
 
-/* should be, if formatted node, then try to put on first part of the device
-   specified as number of percent with mount option device, else try to put
-   on last of device.  This is not to say it is good code to do so,
-   but the effect should be measured.  */
+/*
+ * should be, if formatted node, then try to put on first part of the device
+ * specified as number of percent with mount option device, else try to put
+ * on last of device.  This is not to say it is good code to do so,
+ * but the effect should be measured.
+ */
 static inline void set_border_in_hint(struct super_block *s,
 				      reiserfs_blocknr_hint_t * hint)
 {
@@ -974,21 +1021,27 @@
 		set_border_in_hint(s, hint);
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
-	/* whenever we create a new directory, we displace it.  At first we will
-	   hash for location, later we might look for a moderately empty place for
-	   it */
+	/*
+	 * whenever we create a new directory, we displace it.  At first
+	 * we will hash for location, later we might look for a moderately
+	 * empty place for it
+	 */
 	if (displacing_new_packing_localities(s)
 	    && hint->th->displace_new_blocks) {
 		displace_new_packing_locality(hint);
 
-		/* we do not continue determine_search_start,
-		 * if new packing locality is being displaced */
+		/*
+		 * we do not continue determine_search_start,
+		 * if new packing locality is being displaced
+		 */
 		return;
 	}
 #endif
 
-	/* all persons should feel encouraged to add more special cases here and
-	 * test them */
+	/*
+	 * all persons should feel encouraged to add more special cases
+	 * here and test them
+	 */
 
 	if (displacing_large_files(s) && !hint->formatted_node
 	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
@@ -996,8 +1049,10 @@
 		return;
 	}
 
-	/* if none of our special cases is relevant, use the left neighbor in the
-	   tree order of the new node we are allocating for */
+	/*
+	 * if none of our special cases is relevant, use the left
+	 * neighbor in the tree order of the new node we are allocating for
+	 */
 	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
 		hash_formatted_node(hint);
 		return;
@@ -1005,10 +1060,13 @@
 
 	unfm_hint = get_left_neighbor(hint);
 
-	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
-	   new blocks are displaced based on directory ID. Also, if suggested search_start
-	   is less than last preallocated block, we start searching from it, assuming that
-	   HDD dataflow is faster in forward direction */
+	/*
+	 * Mimic old block allocator behaviour, that is if VFS allowed for
+	 * preallocation, new blocks are displaced based on directory ID.
+	 * Also, if suggested search_start is less than last preallocated
+	 * block, we start searching from it, assuming that HDD dataflow
+	 * is faster in forward direction
+	 */
 	if (TEST_OPTION(old_way, s)) {
 		if (!hint->formatted_node) {
 			if (!reiserfs_hashed_relocation(s))
@@ -1037,11 +1095,13 @@
 	    TEST_OPTION(old_hashed_relocation, s)) {
 		old_hashed_relocation(hint);
 	}
+
 	/* new_hashed_relocation works with both formatted/unformatted nodes */
 	if ((!unfm_hint || hint->formatted_node) &&
 	    TEST_OPTION(new_hashed_relocation, s)) {
 		new_hashed_relocation(hint);
 	}
+
 	/* dirid grouping works only on unformatted nodes */
 	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
 		dirid_groups(hint);
@@ -1079,8 +1139,6 @@
 	return CARRY_ON;
 }
 
-/* XXX I know it could be merged with upper-level function;
-   but may be result function would be too complex. */
 static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
 						 b_blocknr_t * new_blocknrs,
 						 b_blocknr_t start,
@@ -1108,7 +1166,10 @@
 
 		/* do we have something to fill prealloc. array also ? */
 		if (nr_allocated > 0) {
-			/* it means prealloc_size was greater that 0 and we do preallocation */
+			/*
+			 * it means prealloc_size was greater that 0 and
+			 * we do preallocation
+			 */
 			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
 				 &SB_JOURNAL(hint->th->t_super)->
 				 j_prealloc_list);
@@ -1176,7 +1237,8 @@
 			start = 0;
 			finish = hint->beg;
 			break;
-		default:	/* We've tried searching everywhere, not enough space */
+		default:
+			/* We've tried searching everywhere, not enough space */
 			/* Free the blocks */
 			if (!hint->formatted_node) {
 #ifdef REISERQUOTA_DEBUG
@@ -1261,8 +1323,11 @@
 	return amount_needed;
 }
 
-int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
-																	   already reserved */ )
+int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
+			       b_blocknr_t *new_blocknrs,
+			       int amount_needed,
+			       /* Amount of blocks we have already reserved */
+			       int reserved_by_us)
 {
 	int initial_amount_needed = amount_needed;
 	int ret;
@@ -1274,15 +1339,21 @@
 		return NO_DISK_SPACE;
 	/* should this be if !hint->inode &&  hint->preallocate? */
 	/* do you mean hint->formatted_node can be removed ? - Zam */
-	/* hint->formatted_node cannot be removed because we try to access
-	   inode information here, and there is often no inode assotiated with
-	   metadata allocations - green */
+	/*
+	 * hint->formatted_node cannot be removed because we try to access
+	 * inode information here, and there is often no inode associated with
+	 * metadata allocations - green
+	 */
 
 	if (!hint->formatted_node && hint->preallocate) {
 		amount_needed = use_preallocated_list_if_available
 		    (hint, new_blocknrs, amount_needed);
-		if (amount_needed == 0)	/* all blocknrs we need we got from
-					   prealloc. list */
+
+		/*
+		 * We have all the block numbers we need from the
+		 * prealloc list
+		 */
+		if (amount_needed == 0)
 			return CARRY_ON;
 		new_blocknrs += (initial_amount_needed - amount_needed);
 	}
@@ -1296,10 +1367,12 @@
 	ret = blocknrs_and_prealloc_arrays_from_search_start
 	    (hint, new_blocknrs, amount_needed);
 
-	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
-	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
-	 * variant) */
-
+	/*
+	 * We used prealloc. list to fill (partially) new_blocknrs array.
+	 * If final allocation fails we need to return blocks back to
+	 * prealloc. list or just free them. -- Zam (I chose second
+	 * variant)
+	 */
 	if (ret != CARRY_ON) {
 		while (amount_needed++ < initial_amount_needed) {
 			reiserfs_free_block(hint->th, hint->inode,
@@ -1338,10 +1411,12 @@
 	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
 	struct buffer_head *bh;
 
-	/* Way old format filesystems had the bitmaps packed up front.
-	 * I doubt there are any of these left, but just in case... */
+	/*
+	 * Way old format filesystems had the bitmaps packed up front.
+	 * I doubt there are any of these left, but just in case...
+	 */
 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
-	                      &(REISERFS_SB(sb)->s_properties))))
+			      &REISERFS_SB(sb)->s_properties)))
 		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
 	else if (bitmap == 0)
 		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index af67735..d9f5a60 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c

@@ -59,7 +59,10 @@
 
 int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
 {
-	struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
+
+	/* key of current position in the directory (key of directory entry) */
+	struct cpu_key pos_key;
+
 	INITIALIZE_PATH(path_to_entry);
 	struct buffer_head *bh;
 	int item_num, entry_num;
@@ -77,21 +80,28 @@
 
 	reiserfs_check_lock_depth(inode->i_sb, "readdir");
 
-	/* form key for search the next directory entry using f_pos field of
-	   file structure */
+	/*
+	 * form key for search the next directory entry using
+	 * f_pos field of file structure
+	 */
 	make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
 	next_pos = cpu_key_k_offset(&pos_key);
 
 	path_to_entry.reada = PATH_READA;
 	while (1) {
-	      research:
-		/* search the directory item, containing entry with specified key */
+research:
+		/*
+		 * search the directory item, containing entry with
+		 * specified key
+		 */
 		search_res =
 		    search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
 					&de);
 		if (search_res == IO_ERROR) {
-			// FIXME: we could just skip part of directory which could
-			// not be read
+			/*
+			 * FIXME: we could just skip part of directory
+			 * which could not be read
+			 */
 			ret = -EIO;
 			goto out;
 		}
@@ -102,41 +112,49 @@
 		store_ih(&tmp_ih, ih);
 
 		/* we must have found item, that is item of this directory, */
-		RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
+		RFALSE(COMP_SHORT_KEYS(&ih->ih_key, &pos_key),
 		       "vs-9000: found item %h does not match to dir we readdir %K",
 		       ih, &pos_key);
 		RFALSE(item_num > B_NR_ITEMS(bh) - 1,
 		       "vs-9005 item_num == %d, item amount == %d",
 		       item_num, B_NR_ITEMS(bh));
 
-		/* and entry must be not more than number of entries in the item */
-		RFALSE(I_ENTRY_COUNT(ih) < entry_num,
+		/*
+		 * and entry must be not more than number of entries
+		 * in the item
+		 */
+		RFALSE(ih_entry_count(ih) < entry_num,
 		       "vs-9010: entry number is too big %d (%d)",
-		       entry_num, I_ENTRY_COUNT(ih));
+		       entry_num, ih_entry_count(ih));
 
+		/*
+		 * go through all entries in the directory item beginning
+		 * from the entry, that has been found
+		 */
 		if (search_res == POSITION_FOUND
-		    || entry_num < I_ENTRY_COUNT(ih)) {
-			/* go through all entries in the directory item beginning from the entry, that has been found */
+		    || entry_num < ih_entry_count(ih)) {
 			struct reiserfs_de_head *deh =
 			    B_I_DEH(bh, ih) + entry_num;
 
-			for (; entry_num < I_ENTRY_COUNT(ih);
+			for (; entry_num < ih_entry_count(ih);
 			     entry_num++, deh++) {
 				int d_reclen;
 				char *d_name;
 				ino_t d_ino;
 				loff_t cur_pos = deh_offset(deh);
 
+				/* it is hidden entry */
 				if (!de_visible(deh))
-					/* it is hidden entry */
 					continue;
 				d_reclen = entry_length(bh, ih, entry_num);
 				d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
 
 				if (d_reclen <= 0 ||
 				    d_name + d_reclen > bh->b_data + bh->b_size) {
-					/* There is corrupted data in entry,
-					 * We'd better stop here */
+					/*
+					 * There is corrupted data in entry,
+					 * We'd better stop here
+					 */
 					pathrelse(&path_to_entry);
 					ret = -EIO;
 					goto out;
@@ -145,10 +163,10 @@
 				if (!d_name[d_reclen - 1])
 					d_reclen = strlen(d_name);
 
+				/* too big to send back to VFS */
 				if (d_reclen >
 				    REISERFS_MAX_NAME(inode->i_sb->
 						      s_blocksize)) {
-					/* too big to send back to VFS */
 					continue;
 				}
 
@@ -173,10 +191,14 @@
 						goto research;
 					}
 				}
-				// Note, that we copy name to user space via temporary
-				// buffer (local_buf) because filldir will block if
-				// user space buffer is swapped out. At that time
-				// entry can move to somewhere else
+
+				/*
+				 * Note, that we copy name to user space via
+				 * temporary buffer (local_buf) because
+				 * filldir will block if user space buffer is
+				 * swapped out. At that time entry can move to
+				 * somewhere else
+				 */
 				memcpy(local_buf, d_name, d_reclen);
 
 				/*
@@ -209,22 +231,26 @@
 			}	/* for */
 		}
 
+		/* end of directory has been reached */
 		if (item_num != B_NR_ITEMS(bh) - 1)
-			// end of directory has been reached
 			goto end;
 
-		/* item we went through is last item of node. Using right
-		   delimiting key check is it directory end */
+		/*
+		 * item we went through is last item of node. Using right
+		 * delimiting key check is it directory end
+		 */
 		rkey = get_rkey(&path_to_entry, inode->i_sb);
 		if (!comp_le_keys(rkey, &MIN_KEY)) {
-			/* set pos_key to key, that is the smallest and greater
-			   that key of the last entry in the item */
+			/*
+			 * set pos_key to key, that is the smallest and greater
+			 * that key of the last entry in the item
+			 */
 			set_cpu_key_k_offset(&pos_key, next_pos);
 			continue;
 		}
 
+		/* end of directory has been reached */
 		if (COMP_SHORT_KEYS(rkey, &pos_key)) {
-			// end of directory has been reached
 			goto end;
 		}
 
@@ -248,71 +274,73 @@
 	return reiserfs_readdir_inode(file_inode(file), ctx);
 }
 
-/* compose directory item containing "." and ".." entries (entries are
-   not aligned to 4 byte boundary) */
-/* the last four params are LE */
+/*
+ * compose directory item containing "." and ".." entries (entries are
+ * not aligned to 4 byte boundary)
+ */
 void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
 			    __le32 par_dirid, __le32 par_objid)
 {
-	struct reiserfs_de_head *deh;
+	struct reiserfs_de_head *dot, *dotdot;
 
 	memset(body, 0, EMPTY_DIR_SIZE_V1);
-	deh = (struct reiserfs_de_head *)body;
+	dot = (struct reiserfs_de_head *)body;
+	dotdot = dot + 1;
 
 	/* direntry header of "." */
-	put_deh_offset(&(deh[0]), DOT_OFFSET);
+	put_deh_offset(dot, DOT_OFFSET);
 	/* these two are from make_le_item_head, and are are LE */
-	deh[0].deh_dir_id = dirid;
-	deh[0].deh_objectid = objid;
-	deh[0].deh_state = 0;	/* Endian safe if 0 */
-	put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen("."));
-	mark_de_visible(&(deh[0]));
+	dot->deh_dir_id = dirid;
+	dot->deh_objectid = objid;
+	dot->deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(dot, EMPTY_DIR_SIZE_V1 - strlen("."));
+	mark_de_visible(dot);
 
 	/* direntry header of ".." */
-	put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
+	put_deh_offset(dotdot, DOT_DOT_OFFSET);
 	/* key of ".." for the root directory */
 	/* these two are from the inode, and are are LE */
-	deh[1].deh_dir_id = par_dirid;
-	deh[1].deh_objectid = par_objid;
-	deh[1].deh_state = 0;	/* Endian safe if 0 */
-	put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen(".."));
-	mark_de_visible(&(deh[1]));
+	dotdot->deh_dir_id = par_dirid;
+	dotdot->deh_objectid = par_objid;
+	dotdot->deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(dotdot, deh_location(dot) - strlen(".."));
+	mark_de_visible(dotdot);
 
 	/* copy ".." and "." */
-	memcpy(body + deh_location(&(deh[0])), ".", 1);
-	memcpy(body + deh_location(&(deh[1])), "..", 2);
+	memcpy(body + deh_location(dot), ".", 1);
+	memcpy(body + deh_location(dotdot), "..", 2);
 }
 
 /* compose directory item containing "." and ".." entries */
 void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
 			 __le32 par_dirid, __le32 par_objid)
 {
-	struct reiserfs_de_head *deh;
+	struct reiserfs_de_head *dot, *dotdot;
 
 	memset(body, 0, EMPTY_DIR_SIZE);
-	deh = (struct reiserfs_de_head *)body;
+	dot = (struct reiserfs_de_head *)body;
+	dotdot = dot + 1;
 
 	/* direntry header of "." */
-	put_deh_offset(&(deh[0]), DOT_OFFSET);
+	put_deh_offset(dot, DOT_OFFSET);
 	/* these two are from make_le_item_head, and are are LE */
-	deh[0].deh_dir_id = dirid;
-	deh[0].deh_objectid = objid;
-	deh[0].deh_state = 0;	/* Endian safe if 0 */
-	put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
-	mark_de_visible(&(deh[0]));
+	dot->deh_dir_id = dirid;
+	dot->deh_objectid = objid;
+	dot->deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(dot, EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
+	mark_de_visible(dot);
 
 	/* direntry header of ".." */
-	put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
+	put_deh_offset(dotdot, DOT_DOT_OFFSET);
 	/* key of ".." for the root directory */
 	/* these two are from the inode, and are are LE */
-	deh[1].deh_dir_id = par_dirid;
-	deh[1].deh_objectid = par_objid;
-	deh[1].deh_state = 0;	/* Endian safe if 0 */
-	put_deh_location(&(deh[1]),
-			 deh_location(&(deh[0])) - ROUND_UP(strlen("..")));
-	mark_de_visible(&(deh[1]));
+	dotdot->deh_dir_id = par_dirid;
+	dotdot->deh_objectid = par_objid;
+	dotdot->deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(dotdot, deh_location(dot) - ROUND_UP(strlen("..")));
+	mark_de_visible(dotdot);
 
 	/* copy ".." and "." */
-	memcpy(body + deh_location(&(deh[0])), ".", 1);
-	memcpy(body + deh_location(&(deh[1])), "..", 2);
+	memcpy(body + deh_location(dot), ".", 1);
+	memcpy(body + deh_location(dotdot), "..", 2);
 }

diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 9a3c68c..54fdf19 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c

@@ -2,18 +2,13 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
-/* Now we have all buffers that must be used in balancing of the tree 	*/
-/* Further calculations can not cause schedule(), and thus the buffer 	*/
-/* tree will be stable until the balancing will be finished 		*/
-/* balance the tree according to the analysis made before,		*/
-/* and using buffers obtained after all above.				*/
-
-/**
- ** balance_leaf_when_delete
- ** balance_leaf
- ** do_balance
- **
- **/
+/*
+ * Now we have all buffers that must be used in balancing of the tree
+ * Further calculations can not cause schedule(), and thus the buffer
+ * tree will be stable until the balancing will be finished
+ * balance the tree according to the analysis made before,
+ * and using buffers obtained after all above.
+ */
 
 #include <asm/uaccess.h>
 #include <linux/time.h>
@@ -61,48 +56,190 @@
 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
 				       struct buffer_head *bh, int flag)
 {
-	journal_mark_dirty(tb->transaction_handle,
-			   tb->transaction_handle->t_super, bh);
+	journal_mark_dirty(tb->transaction_handle, bh);
 }
 
 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
 
-/* summary:
- if deleting something ( tb->insert_size[0] < 0 )
-   return(balance_leaf_when_delete()); (flag d handled here)
- else
-   if lnum is larger than 0 we put items into the left node
-   if rnum is larger than 0 we put items into the right node
-   if snum1 is larger than 0 we put items into the new node s1
-   if snum2 is larger than 0 we put items into the new node s2
-Note that all *num* count new items being created.
+/*
+ * summary:
+ *  if deleting something ( tb->insert_size[0] < 0 )
+ *    return(balance_leaf_when_delete()); (flag d handled here)
+ *  else
+ *    if lnum is larger than 0 we put items into the left node
+ *    if rnum is larger than 0 we put items into the right node
+ *    if snum1 is larger than 0 we put items into the new node s1
+ *    if snum2 is larger than 0 we put items into the new node s2
+ * Note that all *num* count new items being created.
+ */
 
-It would be easier to read balance_leaf() if each of these summary
-lines was a separate procedure rather than being inlined.  I think
-that there are many passages here and in balance_leaf_when_delete() in
-which two calls to one procedure can replace two passages, and it
-might save cache space and improve software maintenance costs to do so.
+static void balance_leaf_when_delete_del(struct tree_balance *tb)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int item_pos = PATH_LAST_POSITION(tb->tb_path);
+	struct buffer_info bi;
+#ifdef CONFIG_REISERFS_CHECK
+	struct item_head *ih = item_head(tbS0, item_pos);
+#endif
 
-Vladimir made the perceptive comment that we should offload most of
-the decision making in this function into fix_nodes/check_balance, and
-then create some sort of structure in tb that says what actions should
-be performed by do_balance.
+	RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
+	       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
+	       -tb->insert_size[0], ih);
 
--Hans */
+	buffer_info_init_tbS0(tb, &bi);
+	leaf_delete_items(&bi, 0, item_pos, 1, -1);
 
-/* Balance leaf node in case of delete or cut: insert_size[0] < 0
+	if (!item_pos && tb->CFL[0]) {
+		if (B_NR_ITEMS(tbS0)) {
+			replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
+		} else {
+			if (!PATH_H_POSITION(tb->tb_path, 1))
+				replace_key(tb, tb->CFL[0], tb->lkey[0],
+					    PATH_H_PPARENT(tb->tb_path, 0), 0);
+		}
+	}
+
+	RFALSE(!item_pos && !tb->CFL[0],
+	       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
+	       tb->L[0]);
+}
+
+/* cut item in S[0] */
+static void balance_leaf_when_delete_cut(struct tree_balance *tb)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int item_pos = PATH_LAST_POSITION(tb->tb_path);
+	struct item_head *ih = item_head(tbS0, item_pos);
+	int pos_in_item = tb->tb_path->pos_in_item;
+	struct buffer_info bi;
+	buffer_info_init_tbS0(tb, &bi);
+
+	if (is_direntry_le_ih(ih)) {
+		/*
+		 * UFS unlink semantics are such that you can only
+		 * delete one directory entry at a time.
+		 *
+		 * when we cut a directory tb->insert_size[0] means
+		 * number of entries to be cut (always 1)
+		 */
+		tb->insert_size[0] = -1;
+		leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
+				     -tb->insert_size[0]);
+
+		RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
+		       "PAP-12030: can not change delimiting key. CFL[0]=%p",
+		       tb->CFL[0]);
+
+		if (!item_pos && !pos_in_item && tb->CFL[0])
+			replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
+	} else {
+		leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
+				     -tb->insert_size[0]);
+
+		RFALSE(!ih_item_len(ih),
+		       "PAP-12035: cut must leave non-zero dynamic "
+		       "length of item");
+	}
+}
+
+static int balance_leaf_when_delete_left(struct tree_balance *tb)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+
+	/* L[0] must be joined with S[0] */
+	if (tb->lnum[0] == -1) {
+		/* R[0] must be also joined with S[0] */
+		if (tb->rnum[0] == -1) {
+			if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
+				/*
+				 * all contents of all the
+				 * 3 buffers will be in L[0]
+				 */
+				if (PATH_H_POSITION(tb->tb_path, 1) == 0 &&
+				    1 < B_NR_ITEMS(tb->FR[0]))
+					replace_key(tb, tb->CFL[0],
+						    tb->lkey[0], tb->FR[0], 1);
+
+				leaf_move_items(LEAF_FROM_S_TO_L, tb, n, -1,
+						NULL);
+				leaf_move_items(LEAF_FROM_R_TO_L, tb,
+						B_NR_ITEMS(tb->R[0]), -1,
+						NULL);
+
+				reiserfs_invalidate_buffer(tb, tbS0);
+				reiserfs_invalidate_buffer(tb, tb->R[0]);
+
+				return 0;
+			}
+
+			/* all contents of all the 3 buffers will be in R[0] */
+			leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, NULL);
+			leaf_move_items(LEAF_FROM_L_TO_R, tb,
+					B_NR_ITEMS(tb->L[0]), -1, NULL);
+
+			/* right_delimiting_key is correct in R[0] */
+			replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+			reiserfs_invalidate_buffer(tb, tbS0);
+			reiserfs_invalidate_buffer(tb, tb->L[0]);
+
+			return -1;
+		}
+
+		RFALSE(tb->rnum[0] != 0,
+		       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
+		/* all contents of L[0] and S[0] will be in L[0] */
+		leaf_shift_left(tb, n, -1);
+
+		reiserfs_invalidate_buffer(tb, tbS0);
+
+		return 0;
+	}
+
+	/*
+	 * a part of contents of S[0] will be in L[0] and
+	 * the rest part of S[0] will be in R[0]
+	 */
+
+	RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
+	       (tb->lnum[0] + tb->rnum[0] > n + 1),
+	       "PAP-12050: rnum(%d) and lnum(%d) and item "
+	       "number(%d) in S[0] are not consistent",
+	       tb->rnum[0], tb->lnum[0], n);
+	RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
+	       (tb->lbytes != -1 || tb->rbytes != -1),
+	       "PAP-12055: bad rbytes (%d)/lbytes (%d) "
+	       "parameters when items are not split",
+	       tb->rbytes, tb->lbytes);
+	RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
+	       (tb->lbytes < 1 || tb->rbytes != -1),
+	       "PAP-12060: bad rbytes (%d)/lbytes (%d) "
+	       "parameters when items are split",
+	       tb->rbytes, tb->lbytes);
+
+	leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+	leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+
+	reiserfs_invalidate_buffer(tb, tbS0);
+
+	return 0;
+}
+
+/*
+ * Balance leaf node in case of delete or cut: insert_size[0] < 0
  *
  * lnum, rnum can have values >= -1
  *	-1 means that the neighbor must be joined with S
  *	 0 means that nothing should be done with the neighbor
- *	>0 means to shift entirely or partly the specified number of items to the neighbor
+ *	>0 means to shift entirely or partly the specified number of items
+ *         to the neighbor
  */
 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
 {
 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
 	int item_pos = PATH_LAST_POSITION(tb->tb_path);
-	int pos_in_item = tb->tb_path->pos_in_item;
 	struct buffer_info bi;
 	int n;
 	struct item_head *ih;
@@ -114,147 +251,28 @@
 	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
 	       "PAP-12010: tree can not be empty");
 
-	ih = B_N_PITEM_HEAD(tbS0, item_pos);
+	ih = item_head(tbS0, item_pos);
 	buffer_info_init_tbS0(tb, &bi);
 
 	/* Delete or truncate the item */
 
-	switch (flag) {
-	case M_DELETE:		/* delete item in S[0] */
+	BUG_ON(flag != M_DELETE && flag != M_CUT);
+	if (flag == M_DELETE)
+		balance_leaf_when_delete_del(tb);
+	else /* M_CUT */
+		balance_leaf_when_delete_cut(tb);
 
-		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
-		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
-		       -tb->insert_size[0], ih);
 
-		leaf_delete_items(&bi, 0, item_pos, 1, -1);
-
-		if (!item_pos && tb->CFL[0]) {
-			if (B_NR_ITEMS(tbS0)) {
-				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
-					    0);
-			} else {
-				if (!PATH_H_POSITION(tb->tb_path, 1))
-					replace_key(tb, tb->CFL[0], tb->lkey[0],
-						    PATH_H_PPARENT(tb->tb_path,
-								   0), 0);
-			}
-		}
-
-		RFALSE(!item_pos && !tb->CFL[0],
-		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
-		       tb->L[0]);
-
-		break;
-
-	case M_CUT:{		/* cut item in S[0] */
-			if (is_direntry_le_ih(ih)) {
-
-				/* UFS unlink semantics are such that you can only delete one directory entry at a time. */
-				/* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
-				tb->insert_size[0] = -1;
-				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
-						     -tb->insert_size[0]);
-
-				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
-				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
-				       tb->CFL[0]);
-
-				if (!item_pos && !pos_in_item && tb->CFL[0]) {
-					replace_key(tb, tb->CFL[0], tb->lkey[0],
-						    tbS0, 0);
-				}
-			} else {
-				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
-						     -tb->insert_size[0]);
-
-				RFALSE(!ih_item_len(ih),
-				       "PAP-12035: cut must leave non-zero dynamic length of item");
-			}
-			break;
-		}
-
-	default:
-		print_cur_tb("12040");
-		reiserfs_panic(tb->tb_sb, "PAP-12040",
-			       "unexpected mode: %s(%d)",
-			       (flag ==
-				M_PASTE) ? "PASTE" : ((flag ==
-						       M_INSERT) ? "INSERT" :
-						      "UNKNOWN"), flag);
-	}
-
-	/* the rule is that no shifting occurs unless by shifting a node can be freed */
+	/*
+	 * the rule is that no shifting occurs unless by shifting
+	 * a node can be freed
+	 */
 	n = B_NR_ITEMS(tbS0);
-	if (tb->lnum[0]) {	/* L[0] takes part in balancing */
-		if (tb->lnum[0] == -1) {	/* L[0] must be joined with S[0] */
-			if (tb->rnum[0] == -1) {	/* R[0] must be also joined with S[0] */
-				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
-					/* all contents of all the 3 buffers will be in L[0] */
-					if (PATH_H_POSITION(tb->tb_path, 1) == 0
-					    && 1 < B_NR_ITEMS(tb->FR[0]))
-						replace_key(tb, tb->CFL[0],
-							    tb->lkey[0],
-							    tb->FR[0], 1);
 
-					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
-							-1, NULL);
-					leaf_move_items(LEAF_FROM_R_TO_L, tb,
-							B_NR_ITEMS(tb->R[0]),
-							-1, NULL);
 
-					reiserfs_invalidate_buffer(tb, tbS0);
-					reiserfs_invalidate_buffer(tb,
-								   tb->R[0]);
-
-					return 0;
-				}
-				/* all contents of all the 3 buffers will be in R[0] */
-				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
-						NULL);
-				leaf_move_items(LEAF_FROM_L_TO_R, tb,
-						B_NR_ITEMS(tb->L[0]), -1, NULL);
-
-				/* right_delimiting_key is correct in R[0] */
-				replace_key(tb, tb->CFR[0], tb->rkey[0],
-					    tb->R[0], 0);
-
-				reiserfs_invalidate_buffer(tb, tbS0);
-				reiserfs_invalidate_buffer(tb, tb->L[0]);
-
-				return -1;
-			}
-
-			RFALSE(tb->rnum[0] != 0,
-			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
-			/* all contents of L[0] and S[0] will be in L[0] */
-			leaf_shift_left(tb, n, -1);
-
-			reiserfs_invalidate_buffer(tb, tbS0);
-
-			return 0;
-		}
-		/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
-
-		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
-		       (tb->lnum[0] + tb->rnum[0] > n + 1),
-		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
-		       tb->rnum[0], tb->lnum[0], n);
-		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
-		       (tb->lbytes != -1 || tb->rbytes != -1),
-		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
-		       tb->rbytes, tb->lbytes);
-		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
-		       (tb->lbytes < 1 || tb->rbytes != -1),
-		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
-		       tb->rbytes, tb->lbytes);
-
-		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
-		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
-
-		reiserfs_invalidate_buffer(tb, tbS0);
-
-		return 0;
-	}
+	/* L[0] takes part in balancing */
+	if (tb->lnum[0])
+		return balance_leaf_when_delete_left(tb);
 
 	if (tb->rnum[0] == -1) {
 		/* all contents of R[0] and S[0] will be in R[0] */
@@ -268,35 +286,1101 @@
 	return 0;
 }
 
-static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item header of inserted item (this is on little endian) */
-			const char *body,	/* body  of inserted item or bytes to paste */
-			int flag,	/* i - insert, d - delete, c - cut, p - paste
-					   (see comment to do_balance) */
-			struct item_head *insert_key,	/* in our processing of one level we sometimes determine what
-							   must be inserted into the next higher level.  This insertion
-							   consists of a key or two keys and their corresponding
-							   pointers */
-			struct buffer_head **insert_ptr	/* inserted node-ptrs for the next level */
-    )
+static void balance_leaf_insert_left(struct tree_balance *tb,
+				     struct item_head *ih, const char *body)
+{
+	int ret;
+	struct buffer_info bi;
+	int n = B_NR_ITEMS(tb->L[0]);
+
+	if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
+		/* part of new item falls into L[0] */
+		int new_item_len, shift;
+		int version;
+
+		ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
+
+		/* Calculate item length to insert to S[0] */
+		new_item_len = ih_item_len(ih) - tb->lbytes;
+
+		/* Calculate and check item length to insert to L[0] */
+		put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
+
+		RFALSE(ih_item_len(ih) <= 0,
+		       "PAP-12080: there is nothing to insert into L[0]: "
+		       "ih_item_len=%d", ih_item_len(ih));
+
+		/* Insert new item into L[0] */
+		buffer_info_init_left(tb, &bi);
+		leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
+			     min_t(int, tb->zeroes_num, ih_item_len(ih)));
+
+		version = ih_version(ih);
+
+		/*
+		 * Calculate key component, item length and body to
+		 * insert into S[0]
+		 */
+		shift = 0;
+		if (is_indirect_le_ih(ih))
+			shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+
+		add_le_ih_k_offset(ih, tb->lbytes << shift);
+
+		put_ih_item_len(ih, new_item_len);
+		if (tb->lbytes > tb->zeroes_num) {
+			body += (tb->lbytes - tb->zeroes_num);
+			tb->zeroes_num = 0;
+		} else
+			tb->zeroes_num -= tb->lbytes;
+
+		RFALSE(ih_item_len(ih) <= 0,
+		       "PAP-12085: there is nothing to insert into S[0]: "
+		       "ih_item_len=%d", ih_item_len(ih));
+	} else {
+		/* new item in whole falls into L[0] */
+		/* Shift lnum[0]-1 items to L[0] */
+		ret = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
+
+		/* Insert new item into L[0] */
+		buffer_info_init_left(tb, &bi);
+		leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
+				     tb->zeroes_num);
+		tb->insert_size[0] = 0;
+		tb->zeroes_num = 0;
+	}
+}
+
+static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
+						 struct item_head *ih,
+						 const char *body)
+{
+	int n = B_NR_ITEMS(tb->L[0]);
+	struct buffer_info bi;
+
+	RFALSE(tb->zeroes_num,
+	       "PAP-12090: invalid parameter in case of a directory");
+
+	/* directory item */
+	if (tb->lbytes > tb->pos_in_item) {
+		/* new directory entry falls into L[0] */
+		struct item_head *pasted;
+		int ret, l_pos_in_item = tb->pos_in_item;
+
+		/*
+		 * Shift lnum[0] - 1 items in whole.
+		 * Shift lbytes - 1 entries from given directory item
+		 */
+		ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
+		if (ret && !tb->item_pos) {
+			pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
+			l_pos_in_item += ih_entry_count(pasted) -
+					 (tb->lbytes - 1);
+		}
+
+		/* Append given directory entry to directory item */
+		buffer_info_init_left(tb, &bi);
+		leaf_paste_in_buffer(&bi, n + tb->item_pos - ret,
+				     l_pos_in_item, tb->insert_size[0],
+				     body, tb->zeroes_num);
+
+		/*
+		 * previous string prepared space for pasting new entry,
+		 * following string pastes this entry
+		 */
+
+		/*
+		 * when we have merge directory item, pos_in_item
+		 * has been changed too
+		 */
+
+		/* paste new directory entry. 1 is entry number */
+		leaf_paste_entries(&bi, n + tb->item_pos - ret,
+				   l_pos_in_item, 1,
+				   (struct reiserfs_de_head *) body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+		tb->insert_size[0] = 0;
+	} else {
+		/* new directory item doesn't fall into L[0] */
+		/*
+		 * Shift lnum[0]-1 items in whole. Shift lbytes
+		 * directory entries from directory item number lnum[0]
+		 */
+		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+	}
+
+	/* Calculate new position to append in item body */
+	tb->pos_in_item -= tb->lbytes;
+}
+
+static void balance_leaf_paste_left_shift(struct tree_balance *tb,
+					  struct item_head *ih,
+					  const char *body)
 {
 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
-	int item_pos = PATH_LAST_POSITION(tb->tb_path);	/*  index into the array of item headers in S[0]
-							   of the affected item */
+	int n = B_NR_ITEMS(tb->L[0]);
 	struct buffer_info bi;
-	struct buffer_head *S_new[2];	/* new nodes allocated to hold what could not fit into S */
-	int snum[2];		/* number of items that will be placed
-				   into S_new (includes partially shifted
-				   items) */
-	int sbytes[2];		/* if an item is partially shifted into S_new then
-				   if it is a directory item
-				   it is the number of entries from the item that are shifted into S_new
-				   else
-				   it is the number of bytes from the item that are shifted into S_new
-				 */
-	int n, i;
-	int ret_val;
-	int pos_in_item;
-	int zeros_num;
+
+	if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
+		balance_leaf_paste_left_shift_dirent(tb, ih, body);
+		return;
+	}
+
+	RFALSE(tb->lbytes <= 0,
+	       "PAP-12095: there is nothing to shift to L[0]. "
+	       "lbytes=%d", tb->lbytes);
+	RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)),
+	       "PAP-12100: incorrect position to paste: "
+	       "item_len=%d, pos_in_item=%d",
+	       ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item);
+
+	/* appended item will be in L[0] in whole */
+	if (tb->lbytes >= tb->pos_in_item) {
+		struct item_head *tbS0_pos_ih, *tbL0_ih;
+		struct item_head *tbS0_0_ih;
+		struct reiserfs_key *left_delim_key;
+		int ret, l_n, version, temp_l;
+
+		tbS0_pos_ih = item_head(tbS0, tb->item_pos);
+		tbS0_0_ih = item_head(tbS0, 0);
+
+		/*
+		 * this bytes number must be appended
+		 * to the last item of L[h]
+		 */
+		l_n = tb->lbytes - tb->pos_in_item;
+
+		/* Calculate new insert_size[0] */
+		tb->insert_size[0] -= l_n;
+
+		RFALSE(tb->insert_size[0] <= 0,
+		       "PAP-12105: there is nothing to paste into "
+		       "L[0]. insert_size=%d", tb->insert_size[0]);
+
+		ret = leaf_shift_left(tb, tb->lnum[0],
+				      ih_item_len(tbS0_pos_ih));
+
+		tbL0_ih = item_head(tb->L[0], n + tb->item_pos - ret);
+
+		/* Append to body of item in L[0] */
+		buffer_info_init_left(tb, &bi);
+		leaf_paste_in_buffer(&bi, n + tb->item_pos - ret,
+				     ih_item_len(tbL0_ih), l_n, body,
+				     min_t(int, l_n, tb->zeroes_num));
+
+		/*
+		 * 0-th item in S0 can be only of DIRECT type
+		 * when l_n != 0
+		 */
+		temp_l = l_n;
+
+		RFALSE(ih_item_len(tbS0_0_ih),
+		       "PAP-12106: item length must be 0");
+		RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key,
+		       leaf_key(tb->L[0], n + tb->item_pos - ret)),
+		       "PAP-12107: items must be of the same file");
+
+		if (is_indirect_le_ih(tbL0_ih)) {
+			int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+			temp_l = l_n << shift;
+		}
+		/* update key of first item in S0 */
+		version = ih_version(tbS0_0_ih);
+		add_le_key_k_offset(version, &tbS0_0_ih->ih_key, temp_l);
+
+		/* update left delimiting key */
+		left_delim_key = internal_key(tb->CFL[0], tb->lkey[0]);
+		add_le_key_k_offset(version, left_delim_key, temp_l);
+
+		/*
+		 * Calculate new body, position in item and
+		 * insert_size[0]
+		 */
+		if (l_n > tb->zeroes_num) {
+			body += (l_n - tb->zeroes_num);
+			tb->zeroes_num = 0;
+		} else
+			tb->zeroes_num -= l_n;
+		tb->pos_in_item = 0;
+
+		RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key,
+					  leaf_key(tb->L[0],
+						 B_NR_ITEMS(tb->L[0]) - 1)) ||
+		       !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) ||
+		       !op_is_left_mergeable(left_delim_key, tbS0->b_size),
+		       "PAP-12120: item must be merge-able with left "
+		       "neighboring item");
+	} else {
+		/* only part of the appended item will be in L[0] */
+
+		/* Calculate position in item for append in S[0] */
+		tb->pos_in_item -= tb->lbytes;
+
+		RFALSE(tb->pos_in_item <= 0,
+		       "PAP-12125: no place for paste. pos_in_item=%d",
+		       tb->pos_in_item);
+
+		/*
+		 * Shift lnum[0] - 1 items in whole.
+		 * Shift lbytes - 1 byte from item number lnum[0]
+		 */
+		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+	}
+}
+
+
+/* appended item will be in L[0] in whole */
+static void balance_leaf_paste_left_whole(struct tree_balance *tb,
+					  struct item_head *ih,
+					  const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tb->L[0]);
+	struct buffer_info bi;
+	struct item_head *pasted;
+	int ret;
+
+	/* if we paste into first item of S[0] and it is left mergable */
+	if (!tb->item_pos &&
+	    op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) {
+		/*
+		 * then increment pos_in_item by the size of the
+		 * last item in L[0]
+		 */
+		pasted = item_head(tb->L[0], n - 1);
+		if (is_direntry_le_ih(pasted))
+			tb->pos_in_item += ih_entry_count(pasted);
+		else
+			tb->pos_in_item += ih_item_len(pasted);
+	}
+
+	/*
+	 * Shift lnum[0] - 1 items in whole.
+	 * Shift lbytes - 1 byte from item number lnum[0]
+	 */
+	ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+
+	/* Append to body of item in L[0] */
+	buffer_info_init_left(tb, &bi);
+	leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, tb->pos_in_item,
+			     tb->insert_size[0], body, tb->zeroes_num);
+
+	/* if appended item is directory, paste entry */
+	pasted = item_head(tb->L[0], n + tb->item_pos - ret);
+	if (is_direntry_le_ih(pasted))
+		leaf_paste_entries(&bi, n + tb->item_pos - ret,
+				   tb->pos_in_item, 1,
+				   (struct reiserfs_de_head *)body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+	/*
+	 * if appended item is indirect item, put unformatted node
+	 * into un list
+	 */
+	if (is_indirect_le_ih(pasted))
+		set_ih_free_space(pasted, 0);
+
+	tb->insert_size[0] = 0;
+	tb->zeroes_num = 0;
+}
+
+static void balance_leaf_paste_left(struct tree_balance *tb,
+				    struct item_head *ih, const char *body)
+{
+	/* we must shift the part of the appended item */
+	if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1)
+		balance_leaf_paste_left_shift(tb, ih, body);
+	else
+		balance_leaf_paste_left_whole(tb, ih, body);
+}
+
+/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
+static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih,
+			      const char *body, int flag)
+{
+	if (tb->lnum[0] <= 0)
+		return;
+
+	/* new item or it part falls to L[0], shift it too */
+	if (tb->item_pos < tb->lnum[0]) {
+		BUG_ON(flag != M_INSERT && flag != M_PASTE);
+
+		if (flag == M_INSERT)
+			balance_leaf_insert_left(tb, ih, body);
+		else /* M_PASTE */
+			balance_leaf_paste_left(tb, ih, body);
+	} else
+		/* new item doesn't fall into L[0] */
+		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+}
+
+
+static void balance_leaf_insert_right(struct tree_balance *tb,
+				      struct item_head *ih, const char *body)
+{
+
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+	struct buffer_info bi;
+	int ret;
+
+	/* new item or part of it doesn't fall into R[0] */
+	if (n - tb->rnum[0] >= tb->item_pos) {
+		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+		return;
+	}
+
+	/* new item or its part falls to R[0] */
+
+	/* part of new item falls into R[0] */
+	if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {
+		loff_t old_key_comp, old_len, r_zeroes_number;
+		const char *r_body;
+		int version, shift;
+		loff_t offset;
+
+		leaf_shift_right(tb, tb->rnum[0] - 1, -1);
+
+		version = ih_version(ih);
+
+		/* Remember key component and item length */
+		old_key_comp = le_ih_k_offset(ih);
+		old_len = ih_item_len(ih);
+
+		/*
+		 * Calculate key component and item length to insert
+		 * into R[0]
+		 */
+		shift = 0;
+		if (is_indirect_le_ih(ih))
+			shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+		offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << shift);
+		set_le_ih_k_offset(ih, offset);
+		put_ih_item_len(ih, tb->rbytes);
+
+		/* Insert part of the item into R[0] */
+		buffer_info_init_right(tb, &bi);
+		if ((old_len - tb->rbytes) > tb->zeroes_num) {
+			r_zeroes_number = 0;
+			r_body = body + (old_len - tb->rbytes) - tb->zeroes_num;
+		} else {
+			r_body = body;
+			r_zeroes_number = tb->zeroes_num -
+					  (old_len - tb->rbytes);
+			tb->zeroes_num -= r_zeroes_number;
+		}
+
+		leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number);
+
+		/* Replace right delimiting key by first key in R[0] */
+		replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+		/*
+		 * Calculate key component and item length to
+		 * insert into S[0]
+		 */
+		set_le_ih_k_offset(ih, old_key_comp);
+		put_ih_item_len(ih, old_len - tb->rbytes);
+
+		tb->insert_size[0] -= tb->rbytes;
+
+	} else {
+		/* whole new item falls into R[0] */
+
+		/* Shift rnum[0]-1 items to R[0] */
+		ret = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
+
+		/* Insert new item into R[0] */
+		buffer_info_init_right(tb, &bi);
+		leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1,
+				     ih, body, tb->zeroes_num);
+
+		if (tb->item_pos - n + tb->rnum[0] - 1 == 0)
+			replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+		tb->zeroes_num = tb->insert_size[0] = 0;
+	}
+}
+
+
+static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
+				     struct item_head *ih, const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct buffer_info bi;
+	int entry_count;
+
+	RFALSE(tb->zeroes_num,
+	       "PAP-12145: invalid parameter in case of a directory");
+	entry_count = ih_entry_count(item_head(tbS0, tb->item_pos));
+
+	/* new directory entry falls into R[0] */
+	if (entry_count - tb->rbytes < tb->pos_in_item) {
+		int paste_entry_position;
+
+		RFALSE(tb->rbytes - 1 >= entry_count || !tb->insert_size[0],
+		       "PAP-12150: no enough of entries to shift to R[0]: "
+		       "rbytes=%d, entry_count=%d", tb->rbytes, entry_count);
+
+		/*
+		 * Shift rnum[0]-1 items in whole.
+		 * Shift rbytes-1 directory entries from directory
+		 * item number rnum[0]
+		 */
+		leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
+
+		/* Paste given directory entry to directory item */
+		paste_entry_position = tb->pos_in_item - entry_count +
+				       tb->rbytes - 1;
+		buffer_info_init_right(tb, &bi);
+		leaf_paste_in_buffer(&bi, 0, paste_entry_position,
+				     tb->insert_size[0], body, tb->zeroes_num);
+
+		/* paste entry */
+		leaf_paste_entries(&bi, 0, paste_entry_position, 1,
+				   (struct reiserfs_de_head *) body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+		/* change delimiting keys */
+		if (paste_entry_position == 0)
+			replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+		tb->insert_size[0] = 0;
+		tb->pos_in_item++;
+	} else {
+		/* new directory entry doesn't fall into R[0] */
+		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+	}
+}
+
+static void balance_leaf_paste_right_shift(struct tree_balance *tb,
+				     struct item_head *ih, const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n_shift, n_rem, r_zeroes_number, version;
+	unsigned long temp_rem;
+	const char *r_body;
+	struct buffer_info bi;
+
+	/* we append to directory item */
+	if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
+		balance_leaf_paste_right_shift_dirent(tb, ih, body);
+		return;
+	}
+
+	/* regular object */
+
+	/*
+	 * Calculate number of bytes which must be shifted
+	 * from appended item
+	 */
+	n_shift = tb->rbytes - tb->insert_size[0];
+	if (n_shift < 0)
+		n_shift = 0;
+
+	RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)),
+	       "PAP-12155: invalid position to paste. ih_item_len=%d, "
+	       "pos_in_item=%d", tb->pos_in_item,
+	       ih_item_len(item_head(tbS0, tb->item_pos)));
+
+	leaf_shift_right(tb, tb->rnum[0], n_shift);
+
+	/*
+	 * Calculate number of bytes which must remain in body
+	 * after appending to R[0]
+	 */
+	n_rem = tb->insert_size[0] - tb->rbytes;
+	if (n_rem < 0)
+		n_rem = 0;
+
+	temp_rem = n_rem;
+
+	version = ih_version(item_head(tb->R[0], 0));
+
+	if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
+		int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+		temp_rem = n_rem << shift;
+	}
+
+	add_le_key_k_offset(version, leaf_key(tb->R[0], 0), temp_rem);
+	add_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
+			    temp_rem);
+
+	do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
+
+	/* Append part of body into R[0] */
+	buffer_info_init_right(tb, &bi);
+	if (n_rem > tb->zeroes_num) {
+		r_zeroes_number = 0;
+		r_body = body + n_rem - tb->zeroes_num;
+	} else {
+		r_body = body;
+		r_zeroes_number = tb->zeroes_num - n_rem;
+		tb->zeroes_num -= r_zeroes_number;
+	}
+
+	leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem,
+			     r_body, r_zeroes_number);
+
+	if (is_indirect_le_ih(item_head(tb->R[0], 0)))
+		set_ih_free_space(item_head(tb->R[0], 0), 0);
+
+	tb->insert_size[0] = n_rem;
+	if (!n_rem)
+		tb->pos_in_item++;
+}
+
+static void balance_leaf_paste_right_whole(struct tree_balance *tb,
+				     struct item_head *ih, const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+	struct item_head *pasted;
+	struct buffer_info bi;
+
+							buffer_info_init_right(tb, &bi);
+	leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+
+	/* append item in R[0] */
+	if (tb->pos_in_item >= 0) {
+		buffer_info_init_right(tb, &bi);
+		leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0],
+				     tb->pos_in_item, tb->insert_size[0], body,
+				     tb->zeroes_num);
+	}
+
+	/* paste new entry, if item is directory item */
+	pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]);
+	if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) {
+		leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0],
+				   tb->pos_in_item, 1,
+				   (struct reiserfs_de_head *)body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+		if (!tb->pos_in_item) {
+
+			RFALSE(tb->item_pos - n + tb->rnum[0],
+			       "PAP-12165: directory item must be first "
+			       "item of node when pasting is in 0th position");
+
+			/* update delimiting keys */
+			replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+		}
+	}
+
+	if (is_indirect_le_ih(pasted))
+		set_ih_free_space(pasted, 0);
+	tb->zeroes_num = tb->insert_size[0] = 0;
+}
+
+static void balance_leaf_paste_right(struct tree_balance *tb,
+				     struct item_head *ih, const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+
+	/* new item doesn't fall into R[0] */
+	if (n - tb->rnum[0] > tb->item_pos) {
+		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+		return;
+	}
+
+	/* pasted item or part of it falls to R[0] */
+
+	if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1)
+		/* we must shift the part of the appended item */
+		balance_leaf_paste_right_shift(tb, ih, body);
+	else
+		/* pasted item in whole falls into R[0] */
+		balance_leaf_paste_right_whole(tb, ih, body);
+}
+
+/* shift rnum[0] items from S[0] to the right neighbor R[0] */
+static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih,
+			       const char *body, int flag)
+{
+	if (tb->rnum[0] <= 0)
+		return;
+
+	BUG_ON(flag != M_INSERT && flag != M_PASTE);
+
+	if (flag == M_INSERT)
+		balance_leaf_insert_right(tb, ih, body);
+	else /* M_PASTE */
+		balance_leaf_paste_right(tb, ih, body);
+}
+
+static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
+					  struct item_head *ih,
+					  const char *body,
+					  struct item_head *insert_key,
+					  struct buffer_head **insert_ptr,
+					  int i)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+	struct buffer_info bi;
+	int shift;
+
+	/* new item or it part don't falls into S_new[i] */
+	if (n - tb->snum[i] >= tb->item_pos) {
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+				tb->snum[i], tb->sbytes[i], tb->S_new[i]);
+		return;
+	}
+
+	/* new item or it's part falls to first new node S_new[i] */
+
+	/* part of new item falls into S_new[i] */
+	if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) {
+		int old_key_comp, old_len, r_zeroes_number;
+		const char *r_body;
+		int version;
+
+		/* Move snum[i]-1 items from S[0] to S_new[i] */
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1,
+				tb->S_new[i]);
+
+		/* Remember key component and item length */
+		version = ih_version(ih);
+		old_key_comp = le_ih_k_offset(ih);
+		old_len = ih_item_len(ih);
+
+		/*
+		 * Calculate key component and item length to insert
+		 * into S_new[i]
+		 */
+		shift = 0;
+		if (is_indirect_le_ih(ih))
+			shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+		set_le_ih_k_offset(ih,
+				   le_ih_k_offset(ih) +
+				   ((old_len - tb->sbytes[i]) << shift));
+
+		put_ih_item_len(ih, tb->sbytes[i]);
+
+		/* Insert part of the item into S_new[i] before 0-th item */
+		buffer_info_init_bh(tb, &bi, tb->S_new[i]);
+
+		if ((old_len - tb->sbytes[i]) > tb->zeroes_num) {
+			r_zeroes_number = 0;
+			r_body = body + (old_len - tb->sbytes[i]) -
+					 tb->zeroes_num;
+		} else {
+			r_body = body;
+			r_zeroes_number = tb->zeroes_num - (old_len -
+					  tb->sbytes[i]);
+			tb->zeroes_num -= r_zeroes_number;
+		}
+
+		leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number);
+
+		/*
+		 * Calculate key component and item length to
+		 * insert into S[i]
+		 */
+		set_le_ih_k_offset(ih, old_key_comp);
+		put_ih_item_len(ih, old_len - tb->sbytes[i]);
+		tb->insert_size[0] -= tb->sbytes[i];
+	} else {
+		/* whole new item falls into S_new[i] */
+
+		/*
+		 * Shift snum[0] - 1 items to S_new[i]
+		 * (sbytes[i] of split item)
+		 */
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+				tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]);
+
+		/* Insert new item into S_new[i] */
+		buffer_info_init_bh(tb, &bi, tb->S_new[i]);
+		leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1,
+				     ih, body, tb->zeroes_num);
+
+		tb->zeroes_num = tb->insert_size[0] = 0;
+	}
+}
+
+/* we append to directory item */
+static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
+					 struct item_head *ih,
+					 const char *body,
+					 struct item_head *insert_key,
+					 struct buffer_head **insert_ptr,
+					 int i)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct item_head *aux_ih = item_head(tbS0, tb->item_pos);
+	int entry_count = ih_entry_count(aux_ih);
+	struct buffer_info bi;
+
+	if (entry_count - tb->sbytes[i] < tb->pos_in_item &&
+	    tb->pos_in_item <= entry_count) {
+		/* new directory entry falls into S_new[i] */
+
+		RFALSE(!tb->insert_size[0],
+		       "PAP-12215: insert_size is already 0");
+		RFALSE(tb->sbytes[i] - 1 >= entry_count,
+		       "PAP-12220: there are no so much entries (%d), only %d",
+		       tb->sbytes[i] - 1, entry_count);
+
+		/*
+		 * Shift snum[i]-1 items in whole.
+		 * Shift sbytes[i] directory entries
+		 * from directory item number snum[i]
+		 */
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
+				tb->sbytes[i] - 1, tb->S_new[i]);
+
+		/*
+		 * Paste given directory entry to
+		 * directory item
+		 */
+		buffer_info_init_bh(tb, &bi, tb->S_new[i]);
+		leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count +
+				     tb->sbytes[i] - 1, tb->insert_size[0],
+				     body, tb->zeroes_num);
+
+		/* paste new directory entry */
+		leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count +
+				   tb->sbytes[i] - 1, 1,
+				   (struct reiserfs_de_head *) body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+		tb->insert_size[0] = 0;
+		tb->pos_in_item++;
+	} else {
+		/* new directory entry doesn't fall into S_new[i] */
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
+				tb->sbytes[i], tb->S_new[i]);
+	}
+
+}
+
+static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
+					 struct item_head *ih,
+					 const char *body,
+					 struct item_head *insert_key,
+					 struct buffer_head **insert_ptr,
+					 int i)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct item_head *aux_ih = item_head(tbS0, tb->item_pos);
+	int n_shift, n_rem, r_zeroes_number, shift;
+	const char *r_body;
+	struct item_head *tmp;
+	struct buffer_info bi;
+
+	RFALSE(ih, "PAP-12210: ih must be 0");
+
+	if (is_direntry_le_ih(aux_ih)) {
+		balance_leaf_new_nodes_paste_dirent(tb, ih, body, insert_key,
+						    insert_ptr, i);
+		return;
+	}
+
+	/* regular object */
+
+
+	RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) ||
+	       tb->insert_size[0] <= 0,
+	       "PAP-12225: item too short or insert_size <= 0");
+
+	/*
+	 * Calculate number of bytes which must be shifted from appended item
+	 */
+	n_shift = tb->sbytes[i] - tb->insert_size[0];
+	if (n_shift < 0)
+		n_shift = 0;
+	leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift,
+			tb->S_new[i]);
+
+	/*
+	 * Calculate number of bytes which must remain in body after
+	 * append to S_new[i]
+	 */
+	n_rem = tb->insert_size[0] - tb->sbytes[i];
+	if (n_rem < 0)
+		n_rem = 0;
+
+	/* Append part of body into S_new[0] */
+	buffer_info_init_bh(tb, &bi, tb->S_new[i]);
+	if (n_rem > tb->zeroes_num) {
+		r_zeroes_number = 0;
+		r_body = body + n_rem - tb->zeroes_num;
+	} else {
+		r_body = body;
+		r_zeroes_number = tb->zeroes_num - n_rem;
+		tb->zeroes_num -= r_zeroes_number;
+	}
+
+	leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem,
+			     r_body, r_zeroes_number);
+
+	tmp = item_head(tb->S_new[i], 0);
+	shift = 0;
+	if (is_indirect_le_ih(tmp)) {
+		set_ih_free_space(tmp, 0);
+		shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
+	}
+	add_le_ih_k_offset(tmp, n_rem << shift);
+
+	tb->insert_size[0] = n_rem;
+	if (!n_rem)
+		tb->pos_in_item++;
+}
+
+static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
+					       struct item_head *ih,
+					       const char *body,
+					       struct item_head *insert_key,
+					       struct buffer_head **insert_ptr,
+					       int i)
+
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+	int leaf_mi;
+	struct item_head *pasted;
+	struct buffer_info bi;
+
+#ifdef CONFIG_REISERFS_CHECK
+	struct item_head *ih_check = item_head(tbS0, tb->item_pos);
+
+	if (!is_direntry_le_ih(ih_check) &&
+	    (tb->pos_in_item != ih_item_len(ih_check) ||
+	    tb->insert_size[0] <= 0))
+		reiserfs_panic(tb->tb_sb,
+			     "PAP-12235",
+			     "pos_in_item must be equal to ih_item_len");
+#endif
+
+	leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
+				  tb->sbytes[i], tb->S_new[i]);
+
+	RFALSE(leaf_mi,
+	       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
+	       leaf_mi);
+
+	/* paste into item */
+	buffer_info_init_bh(tb, &bi, tb->S_new[i]);
+	leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->snum[i],
+			     tb->pos_in_item, tb->insert_size[0],
+			     body, tb->zeroes_num);
+
+	pasted = item_head(tb->S_new[i], tb->item_pos - n +
+			   tb->snum[i]);
+	if (is_direntry_le_ih(pasted))
+		leaf_paste_entries(&bi, tb->item_pos - n + tb->snum[i],
+				   tb->pos_in_item, 1,
+				   (struct reiserfs_de_head *)body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+	/* if we paste to indirect item update ih_free_space */
+	if (is_indirect_le_ih(pasted))
+		set_ih_free_space(pasted, 0);
+
+	tb->zeroes_num = tb->insert_size[0] = 0;
+
+}
+static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
+					 struct item_head *ih,
+					 const char *body,
+					 struct item_head *insert_key,
+					 struct buffer_head **insert_ptr,
+					 int i)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int n = B_NR_ITEMS(tbS0);
+
+	/* pasted item doesn't fall into S_new[i] */
+	if (n - tb->snum[i] > tb->item_pos) {
+		leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+				tb->snum[i], tb->sbytes[i], tb->S_new[i]);
+		return;
+	}
+
+	/* pasted item or part if it falls to S_new[i] */
+
+	if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1)
+		/* we must shift part of the appended item */
+		balance_leaf_new_nodes_paste_shift(tb, ih, body, insert_key,
+						   insert_ptr, i);
+	else
+		/* item falls wholly into S_new[i] */
+		balance_leaf_new_nodes_paste_whole(tb, ih, body, insert_key,
+						   insert_ptr, i);
+}
+
+/* Fill new nodes that appear in place of S[0] */
+static void balance_leaf_new_nodes(struct tree_balance *tb,
+				   struct item_head *ih,
+				   const char *body,
+				   struct item_head *insert_key,
+				   struct buffer_head **insert_ptr,
+				   int flag)
+{
+	int i;
+	for (i = tb->blknum[0] - 2; i >= 0; i--) {
+		BUG_ON(flag != M_INSERT && flag != M_PASTE);
+
+		RFALSE(!tb->snum[i],
+		       "PAP-12200: snum[%d] == %d. Must be > 0", i,
+		       tb->snum[i]);
+
+		/* here we shift from S to S_new nodes */
+
+		tb->S_new[i] = get_FEB(tb);
+
+		/* initialized block type and tree level */
+		set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL);
+
+		if (flag == M_INSERT)
+			balance_leaf_new_nodes_insert(tb, ih, body, insert_key,
+						      insert_ptr, i);
+		else /* M_PASTE */
+			balance_leaf_new_nodes_paste(tb, ih, body, insert_key,
+						     insert_ptr, i);
+
+		memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE);
+		insert_ptr[i] = tb->S_new[i];
+
+		RFALSE(!buffer_journaled(tb->S_new[i])
+		       || buffer_journal_dirty(tb->S_new[i])
+		       || buffer_dirty(tb->S_new[i]),
+		       "PAP-12247: S_new[%d] : (%b)",
+		       i, tb->S_new[i]);
+	}
+}
+
+static void balance_leaf_finish_node_insert(struct tree_balance *tb,
+					    struct item_head *ih,
+					    const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct buffer_info bi;
+	buffer_info_init_tbS0(tb, &bi);
+	leaf_insert_into_buf(&bi, tb->item_pos, ih, body, tb->zeroes_num);
+
+	/* If we insert the first key change the delimiting key */
+	if (tb->item_pos == 0) {
+		if (tb->CFL[0])	/* can be 0 in reiserfsck */
+			replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
+
+	}
+}
+
+static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
+						  struct item_head *ih,
+						  const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct item_head *pasted = item_head(tbS0, tb->item_pos);
+	struct buffer_info bi;
+
+	if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) {
+		RFALSE(!tb->insert_size[0],
+		       "PAP-12260: insert_size is 0 already");
+
+		/* prepare space */
+		buffer_info_init_tbS0(tb, &bi);
+		leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item,
+				     tb->insert_size[0], body, tb->zeroes_num);
+
+		/* paste entry */
+		leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1,
+				   (struct reiserfs_de_head *)body,
+				   body + DEH_SIZE, tb->insert_size[0]);
+
+		if (!tb->item_pos && !tb->pos_in_item) {
+			RFALSE(!tb->CFL[0] || !tb->L[0],
+			       "PAP-12270: CFL[0]/L[0] must  be specified");
+			if (tb->CFL[0])
+				replace_key(tb, tb->CFL[0], tb->lkey[0],
+					    tbS0, 0);
+		}
+
+		tb->insert_size[0] = 0;
+	}
+}
+
+static void balance_leaf_finish_node_paste(struct tree_balance *tb,
+					   struct item_head *ih,
+					   const char *body)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	struct buffer_info bi;
+	struct item_head *pasted = item_head(tbS0, tb->item_pos);
+
+	/* when directory, may be new entry already pasted */
+	if (is_direntry_le_ih(pasted)) {
+		balance_leaf_finish_node_paste_dirent(tb, ih, body);
+		return;
+	}
+
+	/* regular object */
+
+	if (tb->pos_in_item == ih_item_len(pasted)) {
+		RFALSE(tb->insert_size[0] <= 0,
+		       "PAP-12275: insert size must not be %d",
+		       tb->insert_size[0]);
+		buffer_info_init_tbS0(tb, &bi);
+		leaf_paste_in_buffer(&bi, tb->item_pos,
+				     tb->pos_in_item, tb->insert_size[0], body,
+				     tb->zeroes_num);
+
+		if (is_indirect_le_ih(pasted))
+			set_ih_free_space(pasted, 0);
+
+		tb->insert_size[0] = 0;
+	}
+#ifdef CONFIG_REISERFS_CHECK
+	else if (tb->insert_size[0]) {
+		print_cur_tb("12285");
+		reiserfs_panic(tb->tb_sb, "PAP-12285",
+		    "insert_size must be 0 (%d)", tb->insert_size[0]);
+	}
+#endif
+}
+
+/*
+ * if the affected item was not wholly shifted then we
+ * perform all necessary operations on that part or whole
+ * of the affected item which remains in S
+ */
+static void balance_leaf_finish_node(struct tree_balance *tb,
+				      struct item_head *ih,
+				      const char *body, int flag)
+{
+	/* if we must insert or append into buffer S[0] */
+	if (0 <= tb->item_pos && tb->item_pos < tb->s0num) {
+		if (flag == M_INSERT)
+			balance_leaf_finish_node_insert(tb, ih, body);
+		else /* M_PASTE */
+			balance_leaf_finish_node_paste(tb, ih, body);
+	}
+}
+
+/**
+ * balance_leaf - reiserfs tree balancing algorithm
+ * @tb: tree balance state
+ * @ih: item header of inserted item (little endian)
+ * @body: body of inserted item or bytes to paste
+ * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance)
+ * passed back:
+ * @insert_key: key to insert new nodes
+ * @insert_ptr: array of nodes to insert at the next level
+ *
+ * In our processing of one level we sometimes determine what must be
+ * inserted into the next higher level.  This insertion consists of a
+ * key or two keys and their corresponding pointers.
+ */
+static int balance_leaf(struct tree_balance *tb, struct item_head *ih,
+			const char *body, int flag,
+			struct item_head *insert_key,
+			struct buffer_head **insert_ptr)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
 
 	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
 
@@ -304,465 +1388,27 @@
 	if (tb->insert_size[0] < 0)
 		return balance_leaf_when_delete(tb, flag);
 
-	zeros_num = 0;
+	tb->item_pos = PATH_LAST_POSITION(tb->tb_path),
+	tb->pos_in_item = tb->tb_path->pos_in_item,
+	tb->zeroes_num = 0;
 	if (flag == M_INSERT && !body)
-		zeros_num = ih_item_len(ih);
+		tb->zeroes_num = ih_item_len(ih);
 
-	pos_in_item = tb->tb_path->pos_in_item;
-	/* for indirect item pos_in_item is measured in unformatted node
-	   pointers. Recalculate to bytes */
+	/*
+	 * for indirect item pos_in_item is measured in unformatted node
+	 * pointers. Recalculate to bytes
+	 */
 	if (flag != M_INSERT
-	    && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
-		pos_in_item *= UNFM_P_SIZE;
+	    && is_indirect_le_ih(item_head(tbS0, tb->item_pos)))
+		tb->pos_in_item *= UNFM_P_SIZE;
 
-	if (tb->lnum[0] > 0) {
-		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
-		if (item_pos < tb->lnum[0]) {
-			/* new item or it part falls to L[0], shift it too */
-			n = B_NR_ITEMS(tb->L[0]);
-
-			switch (flag) {
-			case M_INSERT:	/* insert item into L[0] */
-
-				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
-					/* part of new item falls into L[0] */
-					int new_item_len;
-					int version;
-
-					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
-
-					/* Calculate item length to insert to S[0] */
-					new_item_len = ih_item_len(ih) - tb->lbytes;
-					/* Calculate and check item length to insert to L[0] */
-					put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
-
-					RFALSE(ih_item_len(ih) <= 0,
-					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
-					       ih_item_len(ih));
-
-					/* Insert new item into L[0] */
-					buffer_info_init_left(tb, &bi);
-					leaf_insert_into_buf(&bi,
-							n + item_pos - ret_val, ih, body,
-							zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
-
-					version = ih_version(ih);
-
-					/* Calculate key component, item length and body to insert into S[0] */
-					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
-							(tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
-
-					put_ih_item_len(ih, new_item_len);
-					if (tb->lbytes > zeros_num) {
-						body += (tb->lbytes - zeros_num);
-						zeros_num = 0;
-					} else
-						zeros_num -= tb->lbytes;
-
-					RFALSE(ih_item_len(ih) <= 0,
-					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
-					       ih_item_len(ih));
-				} else {
-					/* new item in whole falls into L[0] */
-					/* Shift lnum[0]-1 items to L[0] */
-					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
-					/* Insert new item into L[0] */
-					buffer_info_init_left(tb, &bi);
-					leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
-					tb->insert_size[0] = 0;
-					zeros_num = 0;
-				}
-				break;
-
-			case M_PASTE:	/* append item in L[0] */
-
-				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
-					/* we must shift the part of the appended item */
-					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
-
-						RFALSE(zeros_num,
-						       "PAP-12090: invalid parameter in case of a directory");
-						/* directory item */
-						if (tb->lbytes > pos_in_item) {
-							/* new directory entry falls into L[0] */
-							struct item_head *pasted;
-							int l_pos_in_item = pos_in_item;
-
-							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
-							ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
-							if (ret_val && !item_pos) {
-								pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
-								l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
-							}
-
-							/* Append given directory entry to directory item */
-							buffer_info_init_left(tb, &bi);
-							leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
-
-							/* previous string prepared space for pasting new entry, following string pastes this entry */
-
-							/* when we have merge directory item, pos_in_item has been changed too */
-
-							/* paste new directory entry. 1 is entry number */
-							leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
-									   1, (struct reiserfs_de_head *) body,
-									   body + DEH_SIZE, tb->insert_size[0]);
-							tb->insert_size[0] = 0;
-						} else {
-							/* new directory item doesn't fall into L[0] */
-							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
-							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
-						}
-						/* Calculate new position to append in item body */
-						pos_in_item -= tb->lbytes;
-					} else {
-						/* regular object */
-						RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
-						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
-						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
-						       ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
-
-						if (tb->lbytes >= pos_in_item) {
-							/* appended item will be in L[0] in whole */
-							int l_n;
-
-							/* this bytes number must be appended to the last item of L[h] */
-							l_n = tb->lbytes - pos_in_item;
-
-							/* Calculate new insert_size[0] */
-							tb->insert_size[0] -= l_n;
-
-							RFALSE(tb->insert_size[0] <= 0,
-							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
-							       tb->insert_size[0]);
-							ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
-									    (B_N_PITEM_HEAD(tbS0, item_pos)));
-							/* Append to body of item in L[0] */
-							buffer_info_init_left(tb, &bi);
-							leaf_paste_in_buffer
-							    (&bi, n + item_pos - ret_val, ih_item_len
-							     (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
-							     l_n, body,
-							     zeros_num > l_n ? l_n : zeros_num);
-							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
-							{
-								int version;
-								int temp_l = l_n;
-
-								RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
-								     "PAP-12106: item length must be 0");
-								RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
-								      (tb->L[0], n + item_pos - ret_val)),
-								     "PAP-12107: items must be of the same file");
-								if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
-									temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
-								}
-								/* update key of first item in S0 */
-								version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
-								set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
-								     le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
-								/* update left delimiting key */
-								set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
-								     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
-							}
-
-							/* Calculate new body, position in item and insert_size[0] */
-							if (l_n > zeros_num) {
-								body += (l_n - zeros_num);
-								zeros_num = 0;
-							} else
-								zeros_num -= l_n;
-							pos_in_item = 0;
-
-							RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
-							     || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
-							     || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
-							     "PAP-12120: item must be merge-able with left neighboring item");
-						} else {	/* only part of the appended item will be in L[0] */
-
-							/* Calculate position in item for append in S[0] */
-							pos_in_item -= tb->lbytes;
-
-							RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
-
-							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
-							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
-						}
-					}
-				} else {	/* appended item will be in L[0] in whole */
-
-					struct item_head *pasted;
-
-					if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
-						/* then increment pos_in_item by the size of the last item in L[0] */
-						pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
-						if (is_direntry_le_ih(pasted))
-							pos_in_item += ih_entry_count(pasted);
-						else
-							pos_in_item += ih_item_len(pasted);
-					}
-
-					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
-					ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
-					/* Append to body of item in L[0] */
-					buffer_info_init_left(tb, &bi);
-					leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
-							     pos_in_item,
-							     tb->insert_size[0],
-							     body, zeros_num);
-
-					/* if appended item is directory, paste entry */
-					pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
-					if (is_direntry_le_ih(pasted))
-						leaf_paste_entries(&bi, n + item_pos - ret_val,
-								   pos_in_item, 1,
-								   (struct reiserfs_de_head *) body,
-								   body + DEH_SIZE,
-								   tb->insert_size[0]);
-					/* if appended item is indirect item, put unformatted node into un list */
-					if (is_indirect_le_ih(pasted))
-						set_ih_free_space(pasted, 0);
-					tb->insert_size[0] = 0;
-					zeros_num = 0;
-				}
-				break;
-			default:	/* cases d and t */
-				reiserfs_panic(tb->tb_sb, "PAP-12130",
-					       "lnum > 0: unexpected mode: "
-					       " %s(%d)",
-					       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
-			}
-		} else {
-			/* new item doesn't fall into L[0] */
-			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
-		}
-	}
+	balance_leaf_left(tb, ih, body, flag);
 
 	/* tb->lnum[0] > 0 */
 	/* Calculate new item position */
-	item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
+	tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
 
-	if (tb->rnum[0] > 0) {
-		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
-		n = B_NR_ITEMS(tbS0);
-		switch (flag) {
-
-		case M_INSERT:	/* insert item */
-			if (n - tb->rnum[0] < item_pos) {	/* new item or its part falls to R[0] */
-				if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
-					loff_t old_key_comp, old_len, r_zeros_number;
-					const char *r_body;
-					int version;
-					loff_t offset;
-
-					leaf_shift_right(tb, tb->rnum[0] - 1, -1);
-
-					version = ih_version(ih);
-					/* Remember key component and item length */
-					old_key_comp = le_ih_k_offset(ih);
-					old_len = ih_item_len(ih);
-
-					/* Calculate key component and item length to insert into R[0] */
-					offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
-					set_le_ih_k_offset(ih, offset);
-					put_ih_item_len(ih, tb->rbytes);
-					/* Insert part of the item into R[0] */
-					buffer_info_init_right(tb, &bi);
-					if ((old_len - tb->rbytes) > zeros_num) {
-						r_zeros_number = 0;
-						r_body = body + (old_len - tb->rbytes) - zeros_num;
-					} else {
-						r_body = body;
-						r_zeros_number = zeros_num - (old_len - tb->rbytes);
-						zeros_num -= r_zeros_number;
-					}
-
-					leaf_insert_into_buf(&bi, 0, ih, r_body,
-							     r_zeros_number);
-
-					/* Replace right delimiting key by first key in R[0] */
-					replace_key(tb, tb->CFR[0], tb->rkey[0],
-						    tb->R[0], 0);
-
-					/* Calculate key component and item length to insert into S[0] */
-					set_le_ih_k_offset(ih, old_key_comp);
-					put_ih_item_len(ih, old_len - tb->rbytes);
-
-					tb->insert_size[0] -= tb->rbytes;
-
-				} else {	/* whole new item falls into R[0] */
-
-					/* Shift rnum[0]-1 items to R[0] */
-					ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
-					/* Insert new item into R[0] */
-					buffer_info_init_right(tb, &bi);
-					leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
-							     ih, body, zeros_num);
-
-					if (item_pos - n + tb->rnum[0] - 1 == 0) {
-						replace_key(tb, tb->CFR[0],
-							    tb->rkey[0],
-							    tb->R[0], 0);
-
-					}
-					zeros_num = tb->insert_size[0] = 0;
-				}
-			} else {	/* new item or part of it doesn't fall into R[0] */
-
-				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
-			}
-			break;
-
-		case M_PASTE:	/* append item */
-
-			if (n - tb->rnum[0] <= item_pos) {	/* pasted item or part of it falls to R[0] */
-				if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
-					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {	/* we append to directory item */
-						int entry_count;
-
-						RFALSE(zeros_num,
-						       "PAP-12145: invalid parameter in case of a directory");
-						entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
-								  (tbS0, item_pos));
-						if (entry_count - tb->rbytes <
-						    pos_in_item)
-							/* new directory entry falls into R[0] */
-						{
-							int paste_entry_position;
-
-							RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
-							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
-							       tb->rbytes, entry_count);
-							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
-							leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
-							/* Paste given directory entry to directory item */
-							paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
-							buffer_info_init_right(tb, &bi);
-							leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
-							/* paste entry */
-							leaf_paste_entries(&bi, 0, paste_entry_position, 1,
-									   (struct reiserfs_de_head *) body,
-									   body + DEH_SIZE, tb->insert_size[0]);
-
-							if (paste_entry_position == 0) {
-								/* change delimiting keys */
-								replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
-							}
-
-							tb->insert_size[0] = 0;
-							pos_in_item++;
-						} else {	/* new directory entry doesn't fall into R[0] */
-
-							leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
-						}
-					} else {	/* regular object */
-
-						int n_shift, n_rem, r_zeros_number;
-						const char *r_body;
-
-						/* Calculate number of bytes which must be shifted from appended item */
-						if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
-							n_shift = 0;
-
-						RFALSE(pos_in_item != ih_item_len
-						       (B_N_PITEM_HEAD(tbS0, item_pos)),
-						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
-						       pos_in_item, ih_item_len
-						       (B_N_PITEM_HEAD(tbS0, item_pos)));
-
-						leaf_shift_right(tb, tb->rnum[0], n_shift);
-						/* Calculate number of bytes which must remain in body after appending to R[0] */
-						if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
-							n_rem = 0;
-
-						{
-							int version;
-							unsigned long temp_rem = n_rem;
-
-							version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
-							if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
-								temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
-							}
-							set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
-							     le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
-							set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
-							     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
-						}
-/*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
-		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
-						do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
-
-						/* Append part of body into R[0] */
-						buffer_info_init_right(tb, &bi);
-						if (n_rem > zeros_num) {
-							r_zeros_number = 0;
-							r_body = body + n_rem - zeros_num;
-						} else {
-							r_body = body;
-							r_zeros_number = zeros_num - n_rem;
-							zeros_num -= r_zeros_number;
-						}
-
-						leaf_paste_in_buffer(&bi, 0, n_shift,
-								     tb->insert_size[0] - n_rem,
-								     r_body, r_zeros_number);
-
-						if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
-#if 0
-							RFALSE(n_rem,
-							       "PAP-12160: paste more than one unformatted node pointer");
-#endif
-							set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
-						}
-						tb->insert_size[0] = n_rem;
-						if (!n_rem)
-							pos_in_item++;
-					}
-				} else {	/* pasted item in whole falls into R[0] */
-
-					struct item_head *pasted;
-
-					ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
-					/* append item in R[0] */
-					if (pos_in_item >= 0) {
-						buffer_info_init_right(tb, &bi);
-						leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
-								     tb->insert_size[0], body, zeros_num);
-					}
-
-					/* paste new entry, if item is directory item */
-					pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
-					if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
-						leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
-								   pos_in_item, 1,
-								   (struct reiserfs_de_head *) body,
-								   body + DEH_SIZE, tb->insert_size[0]);
-						if (!pos_in_item) {
-
-							RFALSE(item_pos - n + tb->rnum[0],
-							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
-
-							/* update delimiting keys */
-							replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
-						}
-					}
-
-					if (is_indirect_le_ih(pasted))
-						set_ih_free_space(pasted, 0);
-					zeros_num = tb->insert_size[0] = 0;
-				}
-			} else {	/* new item doesn't fall into R[0] */
-
-				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
-			}
-			break;
-		default:	/* cases d and t */
-			reiserfs_panic(tb->tb_sb, "PAP-12175",
-				       "rnum > 0: unexpected mode: %s(%d)",
-				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
-		}
-
-	}
+	balance_leaf_right(tb, ih, body, flag);
 
 	/* tb->rnum[0] > 0 */
 	RFALSE(tb->blknum[0] > 3,
@@ -770,22 +1416,26 @@
 	RFALSE(tb->blknum[0] < 0,
 	       "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
 
-	/* if while adding to a node we discover that it is possible to split
-	   it in two, and merge the left part into the left neighbor and the
-	   right part into the right neighbor, eliminating the node */
+	/*
+	 * if while adding to a node we discover that it is possible to split
+	 * it in two, and merge the left part into the left neighbor and the
+	 * right part into the right neighbor, eliminating the node
+	 */
 	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
 
 		RFALSE(!tb->lnum[0] || !tb->rnum[0],
 		       "PAP-12190: lnum and rnum must not be zero");
-		/* if insertion was done before 0-th position in R[0], right
-		   delimiting key of the tb->L[0]'s and left delimiting key are
-		   not set correctly */
+		/*
+		 * if insertion was done before 0-th position in R[0], right
+		 * delimiting key of the tb->L[0]'s and left delimiting key are
+		 * not set correctly
+		 */
 		if (tb->CFL[0]) {
 			if (!tb->CFR[0])
 				reiserfs_panic(tb->tb_sb, "vs-12195",
 					       "CFR not initialized");
-			copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
-				 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
+			copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
+				 internal_key(tb->CFR[0], tb->rkey[0]));
 			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
 		}
 
@@ -793,343 +1443,10 @@
 		return 0;
 	}
 
-	/* Fill new nodes that appear in place of S[0] */
+	balance_leaf_new_nodes(tb, ih, body, insert_key, insert_ptr, flag);
 
-	/* I am told that this copying is because we need an array to enable
-	   the looping code. -Hans */
-	snum[0] = tb->s1num, snum[1] = tb->s2num;
-	sbytes[0] = tb->s1bytes;
-	sbytes[1] = tb->s2bytes;
-	for (i = tb->blknum[0] - 2; i >= 0; i--) {
+	balance_leaf_finish_node(tb, ih, body, flag);
 
-		RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
-		       snum[i]);
-
-		/* here we shift from S to S_new nodes */
-
-		S_new[i] = get_FEB(tb);
-
-		/* initialized block type and tree level */
-		set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
-
-		n = B_NR_ITEMS(tbS0);
-
-		switch (flag) {
-		case M_INSERT:	/* insert item */
-
-			if (n - snum[i] < item_pos) {	/* new item or it's part falls to first new node S_new[i] */
-				if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
-					int old_key_comp, old_len, r_zeros_number;
-					const char *r_body;
-					int version;
-
-					/* Move snum[i]-1 items from S[0] to S_new[i] */
-					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
-							snum[i] - 1, -1,
-							S_new[i]);
-					/* Remember key component and item length */
-					version = ih_version(ih);
-					old_key_comp = le_ih_k_offset(ih);
-					old_len = ih_item_len(ih);
-
-					/* Calculate key component and item length to insert into S_new[i] */
-					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
-							   ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
-
-					put_ih_item_len(ih, sbytes[i]);
-
-					/* Insert part of the item into S_new[i] before 0-th item */
-					buffer_info_init_bh(tb, &bi, S_new[i]);
-
-					if ((old_len - sbytes[i]) > zeros_num) {
-						r_zeros_number = 0;
-						r_body = body + (old_len - sbytes[i]) - zeros_num;
-					} else {
-						r_body = body;
-						r_zeros_number = zeros_num - (old_len - sbytes[i]);
-						zeros_num -= r_zeros_number;
-					}
-
-					leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
-
-					/* Calculate key component and item length to insert into S[i] */
-					set_le_ih_k_offset(ih, old_key_comp);
-					put_ih_item_len(ih, old_len - sbytes[i]);
-					tb->insert_size[0] -= sbytes[i];
-				} else {	/* whole new item falls into S_new[i] */
-
-					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
-					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
-							snum[i] - 1, sbytes[i], S_new[i]);
-
-					/* Insert new item into S_new[i] */
-					buffer_info_init_bh(tb, &bi, S_new[i]);
-					leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
-							     ih, body, zeros_num);
-
-					zeros_num = tb->insert_size[0] = 0;
-				}
-			}
-
-			else {	/* new item or it part don't falls into S_new[i] */
-
-				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
-						snum[i], sbytes[i], S_new[i]);
-			}
-			break;
-
-		case M_PASTE:	/* append item */
-
-			if (n - snum[i] <= item_pos) {	/* pasted item or part if it falls to S_new[i] */
-				if (item_pos == n - snum[i] && sbytes[i] != -1) {	/* we must shift part of the appended item */
-					struct item_head *aux_ih;
-
-					RFALSE(ih, "PAP-12210: ih must be 0");
-
-					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
-					if (is_direntry_le_ih(aux_ih)) {
-						/* we append to directory item */
-
-						int entry_count;
-
-						entry_count = ih_entry_count(aux_ih);
-
-						if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
-							/* new directory entry falls into S_new[i] */
-
-							RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
-							RFALSE(sbytes[i] - 1 >= entry_count,
-							       "PAP-12220: there are no so much entries (%d), only %d",
-							       sbytes[i] - 1, entry_count);
-
-							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
-							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
-							/* Paste given directory entry to directory item */
-							buffer_info_init_bh(tb, &bi, S_new[i]);
-							leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
-							     tb->insert_size[0], body, zeros_num);
-							/* paste new directory entry */
-							leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
-									   (struct reiserfs_de_head *) body,
-									   body + DEH_SIZE, tb->insert_size[0]);
-							tb->insert_size[0] = 0;
-							pos_in_item++;
-						} else {	/* new directory entry doesn't fall into S_new[i] */
-							leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
-						}
-					} else {	/* regular object */
-
-						int n_shift, n_rem, r_zeros_number;
-						const char *r_body;
-
-						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
-						       "PAP-12225: item too short or insert_size <= 0");
-
-						/* Calculate number of bytes which must be shifted from appended item */
-						n_shift = sbytes[i] - tb->insert_size[0];
-						if (n_shift < 0)
-							n_shift = 0;
-						leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
-
-						/* Calculate number of bytes which must remain in body after append to S_new[i] */
-						n_rem = tb->insert_size[0] - sbytes[i];
-						if (n_rem < 0)
-							n_rem = 0;
-						/* Append part of body into S_new[0] */
-						buffer_info_init_bh(tb, &bi, S_new[i]);
-						if (n_rem > zeros_num) {
-							r_zeros_number = 0;
-							r_body = body + n_rem - zeros_num;
-						} else {
-							r_body = body;
-							r_zeros_number = zeros_num - n_rem;
-							zeros_num -= r_zeros_number;
-						}
-
-						leaf_paste_in_buffer(&bi, 0, n_shift,
-								     tb->insert_size[0] - n_rem,
-								     r_body, r_zeros_number);
-						{
-							struct item_head *tmp;
-
-							tmp = B_N_PITEM_HEAD(S_new[i], 0);
-							if (is_indirect_le_ih
-							    (tmp)) {
-								set_ih_free_space(tmp, 0);
-								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
-							} else {
-								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
-							}
-						}
-
-						tb->insert_size[0] = n_rem;
-						if (!n_rem)
-							pos_in_item++;
-					}
-				} else
-					/* item falls wholly into S_new[i] */
-				{
-					int leaf_mi;
-					struct item_head *pasted;
-
-#ifdef CONFIG_REISERFS_CHECK
-					struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
-
-					if (!is_direntry_le_ih(ih_check)
-					    && (pos_in_item != ih_item_len(ih_check)
-						|| tb->insert_size[0] <= 0))
-						reiserfs_panic(tb->tb_sb,
-							     "PAP-12235",
-							     "pos_in_item "
-							     "must be equal "
-							     "to ih_item_len");
-#endif				/* CONFIG_REISERFS_CHECK */
-
-					leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
-							    tb, snum[i],
-							    sbytes[i],
-							    S_new[i]);
-
-					RFALSE(leaf_mi,
-					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
-					       leaf_mi);
-
-					/* paste into item */
-					buffer_info_init_bh(tb, &bi, S_new[i]);
-					leaf_paste_in_buffer(&bi,
-							     item_pos - n + snum[i],
-							     pos_in_item,
-							     tb->insert_size[0],
-							     body, zeros_num);
-
-					pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
-					if (is_direntry_le_ih(pasted)) {
-						leaf_paste_entries(&bi,
-								   item_pos - n + snum[i],
-								   pos_in_item, 1,
-								   (struct reiserfs_de_head *)body,
-								   body + DEH_SIZE,
-								   tb->insert_size[0]
-						    );
-					}
-
-					/* if we paste to indirect item update ih_free_space */
-					if (is_indirect_le_ih(pasted))
-						set_ih_free_space(pasted, 0);
-					zeros_num = tb->insert_size[0] = 0;
-				}
-			}
-
-			else {	/* pasted item doesn't fall into S_new[i] */
-
-				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
-						snum[i], sbytes[i], S_new[i]);
-			}
-			break;
-		default:	/* cases d and t */
-			reiserfs_panic(tb->tb_sb, "PAP-12245",
-				       "blknum > 2: unexpected mode: %s(%d)",
-				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
-		}
-
-		memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
-		insert_ptr[i] = S_new[i];
-
-		RFALSE(!buffer_journaled(S_new[i])
-		       || buffer_journal_dirty(S_new[i])
-		       || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
-		       i, S_new[i]);
-	}
-
-	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
-	   affected item which remains in S */
-	if (0 <= item_pos && item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
-
-		switch (flag) {
-		case M_INSERT:	/* insert item into S[0] */
-			buffer_info_init_tbS0(tb, &bi);
-			leaf_insert_into_buf(&bi, item_pos, ih, body,
-					     zeros_num);
-
-			/* If we insert the first key change the delimiting key */
-			if (item_pos == 0) {
-				if (tb->CFL[0])	/* can be 0 in reiserfsck */
-					replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
-			}
-			break;
-
-		case M_PASTE:{	/* append item in S[0] */
-				struct item_head *pasted;
-
-				pasted = B_N_PITEM_HEAD(tbS0, item_pos);
-				/* when directory, may be new entry already pasted */
-				if (is_direntry_le_ih(pasted)) {
-					if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
-
-						RFALSE(!tb->insert_size[0],
-						       "PAP-12260: insert_size is 0 already");
-
-						/* prepare space */
-						buffer_info_init_tbS0(tb, &bi);
-						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
-								     tb->insert_size[0], body,
-								     zeros_num);
-
-						/* paste entry */
-						leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
-								   (struct reiserfs_de_head *)body,
-								   body + DEH_SIZE,
-								   tb->insert_size[0]);
-						if (!item_pos && !pos_in_item) {
-							RFALSE(!tb->CFL[0] || !tb->L[0],
-							       "PAP-12270: CFL[0]/L[0] must be specified");
-							if (tb->CFL[0])
-								replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
-						}
-						tb->insert_size[0] = 0;
-					}
-				} else {	/* regular object */
-					if (pos_in_item == ih_item_len(pasted)) {
-
-						RFALSE(tb->insert_size[0] <= 0,
-						       "PAP-12275: insert size must not be %d",
-						       tb->insert_size[0]);
-						buffer_info_init_tbS0(tb, &bi);
-						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
-								     tb->insert_size[0], body, zeros_num);
-
-						if (is_indirect_le_ih(pasted)) {
-#if 0
-							RFALSE(tb->
-							       insert_size[0] !=
-							       UNFM_P_SIZE,
-							       "PAP-12280: insert_size for indirect item must be %d, not %d",
-							       UNFM_P_SIZE,
-							       tb->
-							       insert_size[0]);
-#endif
-							set_ih_free_space(pasted, 0);
-						}
-						tb->insert_size[0] = 0;
-					}
-#ifdef CONFIG_REISERFS_CHECK
-					else {
-						if (tb->insert_size[0]) {
-							print_cur_tb("12285");
-							reiserfs_panic(tb->tb_sb,
-							    "PAP-12285",
-							    "insert_size "
-							    "must be 0 "
-							    "(%d)",
-							    tb->insert_size[0]);
-						}
-					}
-#endif				/* CONFIG_REISERFS_CHECK */
-
-				}
-			}	/* case M_PASTE: */
-		}
-	}
 #ifdef CONFIG_REISERFS_CHECK
 	if (flag == M_PASTE && tb->insert_size[0]) {
 		print_cur_tb("12290");
@@ -1137,9 +1454,11 @@
 			       "PAP-12290", "insert_size is still not 0 (%d)",
 			       tb->insert_size[0]);
 	}
-#endif				/* CONFIG_REISERFS_CHECK */
+#endif
+
+	/* Leaf level of the tree is balanced (end of balance_leaf) */
 	return 0;
-}				/* Leaf level of the tree is balanced (end of balance_leaf) */
+}
 
 /* Make empty node */
 void make_empty_node(struct buffer_info *bi)
@@ -1178,9 +1497,7 @@
 	return tb->used[i];
 }
 
-/* This is now used because reiserfs_free_block has to be able to
-** schedule.
-*/
+/* This is now used because reiserfs_free_block has to be able to schedule. */
 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
 {
 	int i;
@@ -1246,10 +1563,10 @@
 
 	if (B_IS_ITEMS_LEVEL(src))
 		/* source buffer contains leaf node */
-		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
+		memcpy(internal_key(dest, n_dest), item_head(src, n_src),
 		       KEY_SIZE);
 	else
-		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
+		memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
 		       KEY_SIZE);
 
 	do_balance_mark_internal_dirty(tb, dest, 0);
@@ -1335,8 +1652,10 @@
 			       "mount point.");
 	}
 
-	/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
-	   prepped all of these for us). */
+	/*
+	 * double check that buffers that we will modify are unlocked.
+	 * (fix_nodes should already have prepped all of these for us).
+	 */
 	if (tb->lnum[0]) {
 		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
 		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
@@ -1429,49 +1748,51 @@
 
 #endif
 
-/* Now we have all of the buffers that must be used in balancing of
-   the tree.  We rely on the assumption that schedule() will not occur
-   while do_balance works. ( Only interrupt handlers are acceptable.)
-   We balance the tree according to the analysis made before this,
-   using buffers already obtained.  For SMP support it will someday be
-   necessary to add ordered locking of tb. */
+/*
+ * Now we have all of the buffers that must be used in balancing of
+ * the tree.  We rely on the assumption that schedule() will not occur
+ * while do_balance works. ( Only interrupt handlers are acceptable.)
+ * We balance the tree according to the analysis made before this,
+ * using buffers already obtained.  For SMP support it will someday be
+ * necessary to add ordered locking of tb.
+ */
 
-/* Some interesting rules of balancing:
-
-   we delete a maximum of two nodes per level per balancing: we never
-   delete R, when we delete two of three nodes L, S, R then we move
-   them into R.
-
-   we only delete L if we are deleting two nodes, if we delete only
-   one node we delete S
-
-   if we shift leaves then we shift as much as we can: this is a
-   deliberate policy of extremism in node packing which results in
-   higher average utilization after repeated random balance operations
-   at the cost of more memory copies and more balancing as a result of
-   small insertions to full nodes.
-
-   if we shift internal nodes we try to evenly balance the node
-   utilization, with consequent less balancing at the cost of lower
-   utilization.
-
-   one could argue that the policy for directories in leaves should be
-   that of internal nodes, but we will wait until another day to
-   evaluate this....  It would be nice to someday measure and prove
-   these assumptions as to what is optimal....
-
-*/
+/*
+ * Some interesting rules of balancing:
+ * we delete a maximum of two nodes per level per balancing: we never
+ * delete R, when we delete two of three nodes L, S, R then we move
+ * them into R.
+ *
+ * we only delete L if we are deleting two nodes, if we delete only
+ * one node we delete S
+ *
+ * if we shift leaves then we shift as much as we can: this is a
+ * deliberate policy of extremism in node packing which results in
+ * higher average utilization after repeated random balance operations
+ * at the cost of more memory copies and more balancing as a result of
+ * small insertions to full nodes.
+ *
+ * if we shift internal nodes we try to evenly balance the node
+ * utilization, with consequent less balancing at the cost of lower
+ * utilization.
+ *
+ * one could argue that the policy for directories in leaves should be
+ * that of internal nodes, but we will wait until another day to
+ * evaluate this....  It would be nice to someday measure and prove
+ * these assumptions as to what is optimal....
+ */
 
 static inline void do_balance_starts(struct tree_balance *tb)
 {
-	/* use print_cur_tb() to see initial state of struct
-	   tree_balance */
+	/* use print_cur_tb() to see initial state of struct tree_balance */
 
 	/* store_print_tb (tb); */
 
 	/* do not delete, just comment it out */
-/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
-	     "check");*/
+	/*
+	print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
+		 tb->tb_path->pos_in_item, tb, "check");
+	*/
 	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
 #ifdef CONFIG_REISERFS_CHECK
 	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
@@ -1487,9 +1808,10 @@
 	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
 #endif
 
-	/* reiserfs_free_block is no longer schedule safe.  So, we need to
-	 ** put the buffers we want freed on the thrown list during do_balance,
-	 ** and then free them now
+	/*
+	 * reiserfs_free_block is no longer schedule safe.  So, we need to
+	 * put the buffers we want freed on the thrown list during do_balance,
+	 * and then free them now
 	 */
 
 	REISERFS_SB(tb->tb_sb)->s_do_balance++;
@@ -1500,36 +1822,40 @@
 	free_thrown(tb);
 }
 
-void do_balance(struct tree_balance *tb,	/* tree_balance structure */
-		struct item_head *ih,	/* item header of inserted item */
-		const char *body,	/* body  of inserted item or bytes to paste */
-		int flag)
-{				/* i - insert, d - delete
-				   c - cut, p - paste
+/*
+ * do_balance - balance the tree
+ *
+ * @tb: tree_balance structure
+ * @ih: item header of inserted item
+ * @body: body of inserted item or bytes to paste
+ * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
+ *
+ * Cut means delete part of an item (includes removing an entry from a
+ * directory).
+ *
+ * Delete means delete whole item.
+ *
+ * Insert means add a new item into the tree.
+ *
+ * Paste means to append to the end of an existing file or to
+ * insert a directory entry.
+ */
+void do_balance(struct tree_balance *tb, struct item_head *ih,
+		const char *body, int flag)
+{
+	int child_pos;		/* position of a child node in its parent */
+	int h;			/* level of the tree being processed */
 
-				   Cut means delete part of an item
-				   (includes removing an entry from a
-				   directory).
+	/*
+	 * in our processing of one level we sometimes determine what
+	 * must be inserted into the next higher level.  This insertion
+	 * consists of a key or two keys and their corresponding
+	 * pointers
+	 */
+	struct item_head insert_key[2];
 
-				   Delete means delete whole item.
-
-				   Insert means add a new item into the
-				   tree.
-
-				   Paste means to append to the end of an
-				   existing file or to insert a directory
-				   entry.  */
-	int child_pos,		/* position of a child node in its parent */
-	 h;			/* level of the tree being processed */
-	struct item_head insert_key[2];	/* in our processing of one level
-					   we sometimes determine what
-					   must be inserted into the next
-					   higher level.  This insertion
-					   consists of a key or two keys
-					   and their corresponding
-					   pointers */
-	struct buffer_head *insert_ptr[2];	/* inserted node-ptrs for the next
-						   level */
+	/* inserted node-ptrs for the next level */
+	struct buffer_head *insert_ptr[2];
 
 	tb->tb_mode = flag;
 	tb->need_balance_dirty = 0;
@@ -1546,12 +1872,14 @@
 		return;
 	}
 
-	atomic_inc(&(fs_generation(tb->tb_sb)));
+	atomic_inc(&fs_generation(tb->tb_sb));
 	do_balance_starts(tb);
 
-	/* balance leaf returns 0 except if combining L R and S into
-	   one node.  see balance_internal() for explanation of this
-	   line of code. */
+	/*
+	 * balance_leaf returns 0 except if combining L R and S into
+	 * one node.  see balance_internal() for explanation of this
+	 * line of code.
+	 */
 	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
 	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
 
@@ -1561,9 +1889,8 @@
 
 	/* Balance internal level of the tree. */
 	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
-		child_pos =
-		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
+		child_pos = balance_internal(tb, h, child_pos, insert_key,
+					     insert_ptr);
 
 	do_balance_completed(tb);
-
 }

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ed58d84..db9e80b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c

@@ -15,20 +15,20 @@
 #include <linux/quotaops.h>
 
 /*
-** We pack the tails of files on file close, not at the time they are written.
-** This implies an unnecessary copy of the tail and an unnecessary indirect item
-** insertion/balancing, for files that are written in one write.
-** It avoids unnecessary tail packings (balances) for files that are written in
-** multiple writes and are small enough to have tails.
-**
-** file_release is called by the VFS layer when the file is closed.  If
-** this is the last open file descriptor, and the file
-** small enough to have a tail, and the tail is currently in an
-** unformatted node, the tail is converted back into a direct item.
-**
-** We use reiserfs_truncate_file to pack the tail, since it already has
-** all the conditions coded.
-*/
+ * We pack the tails of files on file close, not at the time they are written.
+ * This implies an unnecessary copy of the tail and an unnecessary indirect item
+ * insertion/balancing, for files that are written in one write.
+ * It avoids unnecessary tail packings (balances) for files that are written in
+ * multiple writes and are small enough to have tails.
+ *
+ * file_release is called by the VFS layer when the file is closed.  If
+ * this is the last open file descriptor, and the file
+ * small enough to have a tail, and the tail is currently in an
+ * unformatted node, the tail is converted back into a direct item.
+ *
+ * We use reiserfs_truncate_file to pack the tail, since it already has
+ * all the conditions coded.
+ */
 static int reiserfs_file_release(struct inode *inode, struct file *filp)
 {
 
@@ -41,10 +41,10 @@
         if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
 		return 0;
 
-	mutex_lock(&(REISERFS_I(inode)->tailpack));
+	mutex_lock(&REISERFS_I(inode)->tailpack);
 
         if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
-		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		mutex_unlock(&REISERFS_I(inode)->tailpack);
 		return 0;
 	}
 
@@ -52,31 +52,35 @@
 	if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
 	     !tail_has_to_be_packed(inode)) &&
 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
-		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		mutex_unlock(&REISERFS_I(inode)->tailpack);
 		return 0;
 	}
 
 	reiserfs_write_lock(inode->i_sb);
-	/* freeing preallocation only involves relogging blocks that
+	/*
+	 * freeing preallocation only involves relogging blocks that
 	 * are already in the current transaction.  preallocation gets
 	 * freed at the end of each transaction, so it is impossible for
 	 * us to log any additional blocks (including quota blocks)
 	 */
 	err = journal_begin(&th, inode->i_sb, 1);
 	if (err) {
-		/* uh oh, we can't allow the inode to go away while there
+		/*
+		 * uh oh, we can't allow the inode to go away while there
 		 * is still preallocation blocks pending.  Try to join the
 		 * aborted transaction
 		 */
 		jbegin_failure = err;
-		err = journal_join_abort(&th, inode->i_sb, 1);
+		err = journal_join_abort(&th, inode->i_sb);
 
 		if (err) {
-			/* hmpf, our choices here aren't good.  We can pin the inode
-			 * which will disallow unmount from every happening, we can
-			 * do nothing, which will corrupt random memory on unmount,
-			 * or we can forcibly remove the file from the preallocation
-			 * list, which will leak blocks on disk.  Lets pin the inode
+			/*
+			 * hmpf, our choices here aren't good.  We can pin
+			 * the inode which will disallow unmount from ever
+			 * happening, we can do nothing, which will corrupt
+			 * random memory on unmount, or we can forcibly
+			 * remove the file from the preallocation list, which
+			 * will leak blocks on disk.  Lets pin the inode
 			 * and let the admin know what is going on.
 			 */
 			igrab(inode);
@@ -92,7 +96,7 @@
 #ifdef REISERFS_PREALLOCATE
 	reiserfs_discard_prealloc(&th, inode);
 #endif
-	err = journal_end(&th, inode->i_sb, 1);
+	err = journal_end(&th);
 
 	/* copy back the error code from journal_begin */
 	if (!err)
@@ -102,35 +106,38 @@
 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
 	    tail_has_to_be_packed(inode)) {
 
-		/* if regular file is released by last holder and it has been
-		   appended (we append by unformatted node only) or its direct
-		   item(s) had to be converted, then it may have to be
-		   indirect2direct converted */
+		/*
+		 * if regular file is released by last holder and it has been
+		 * appended (we append by unformatted node only) or its direct
+		 * item(s) had to be converted, then it may have to be
+		 * indirect2direct converted
+		 */
 		err = reiserfs_truncate_file(inode, 0);
 	}
-      out:
+out:
 	reiserfs_write_unlock(inode->i_sb);
-	mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	mutex_unlock(&REISERFS_I(inode)->tailpack);
 	return err;
 }
 
 static int reiserfs_file_open(struct inode *inode, struct file *file)
 {
 	int err = dquot_file_open(inode, file);
+
+	/* somebody might be tailpacking on final close; wait for it */
         if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
-		/* somebody might be tailpacking on final close; wait for it */
-		mutex_lock(&(REISERFS_I(inode)->tailpack));
+		mutex_lock(&REISERFS_I(inode)->tailpack);
 		atomic_inc(&REISERFS_I(inode)->openers);
-		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		mutex_unlock(&REISERFS_I(inode)->tailpack);
 	}
 	return err;
 }
 
 void reiserfs_vfs_truncate_file(struct inode *inode)
 {
-	mutex_lock(&(REISERFS_I(inode)->tailpack));
+	mutex_lock(&REISERFS_I(inode)->tailpack);
 	reiserfs_truncate_file(inode, 1);
-	mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	mutex_unlock(&REISERFS_I(inode)->tailpack);
 }
 
 /* Sync a reiserfs file. */
@@ -205,10 +212,11 @@
 			set_buffer_uptodate(bh);
 			if (logit) {
 				reiserfs_prepare_for_journal(s, bh, 1);
-				journal_mark_dirty(&th, s, bh);
+				journal_mark_dirty(&th, bh);
 			} else if (!buffer_dirty(bh)) {
 				mark_buffer_dirty(bh);
-				/* do data=ordered on any page past the end
+				/*
+				 * do data=ordered on any page past the end
 				 * of file and any buffer marked BH_New.
 				 */
 				if (reiserfs_data_ordered(inode->i_sb) &&
@@ -219,8 +227,8 @@
 		}
 	}
 	if (logit) {
-		ret = journal_end(&th, s, bh_per_page + 1);
-	      drop_write_lock:
+		ret = journal_end(&th);
+drop_write_lock:
 		reiserfs_write_unlock(s);
 	}
 	/*
@@ -235,8 +243,8 @@
 }
 
 const struct file_operations reiserfs_file_operations = {
-	.read = do_sync_read,
-	.write = do_sync_write,
+	.read = new_sync_read,
+	.write = new_sync_write,
 	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
@@ -245,10 +253,10 @@
 	.open = reiserfs_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
-	.aio_read = generic_file_aio_read,
-	.aio_write = generic_file_aio_write,
+	.read_iter = generic_file_read_iter,
+	.write_iter = generic_file_write_iter,
 	.splice_read = generic_file_splice_read,
-	.splice_write = generic_file_splice_write,
+	.splice_write = iter_file_splice_write,
 	.llseek = generic_file_llseek,
 };
 

diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index dc4d415..6b0ddb2 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c

@@ -2,59 +2,32 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
-/**
- ** old_item_num
- ** old_entry_num
- ** set_entry_sizes
- ** create_virtual_node
- ** check_left
- ** check_right
- ** directory_part_size
- ** get_num_ver
- ** set_parameters
- ** is_leaf_removable
- ** are_leaves_removable
- ** get_empty_nodes
- ** get_lfree
- ** get_rfree
- ** is_left_neighbor_in_cache
- ** decrement_key
- ** get_far_parent
- ** get_parents
- ** can_node_be_removed
- ** ip_check_balance
- ** dc_check_balance_internal
- ** dc_check_balance_leaf
- ** dc_check_balance
- ** check_balance
- ** get_direct_parent
- ** get_neighbors
- ** fix_nodes
- **
- **
- **/
-
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "reiserfs.h"
 #include <linux/buffer_head.h>
 
-/* To make any changes in the tree we find a node, that contains item
-   to be changed/deleted or position in the node we insert a new item
-   to. We call this node S. To do balancing we need to decide what we
-   will shift to left/right neighbor, or to a new node, where new item
-   will be etc. To make this analysis simpler we build virtual
-   node. Virtual node is an array of items, that will replace items of
-   node S. (For instance if we are going to delete an item, virtual
-   node does not contain it). Virtual node keeps information about
-   item sizes and types, mergeability of first and last items, sizes
-   of all entries in directory item. We use this array of items when
-   calculating what we can shift to neighbors and how many nodes we
-   have to have if we do not any shiftings, if we shift to left/right
-   neighbor or to both. */
+/*
+ * To make any changes in the tree we find a node that contains item
+ * to be changed/deleted or position in the node we insert a new item
+ * to. We call this node S. To do balancing we need to decide what we
+ * will shift to left/right neighbor, or to a new node, where new item
+ * will be etc. To make this analysis simpler we build virtual
+ * node. Virtual node is an array of items, that will replace items of
+ * node S. (For instance if we are going to delete an item, virtual
+ * node does not contain it). Virtual node keeps information about
+ * item sizes and types, mergeability of first and last items, sizes
+ * of all entries in directory item. We use this array of items when
+ * calculating what we can shift to neighbors and how many nodes we
+ * have to have if we do not any shiftings, if we shift to left/right
+ * neighbor or to both.
+ */
 
-/* taking item number in virtual node, returns number of item, that it has in source buffer */
+/*
+ * Takes item number in virtual node, returns number of item
+ * that it has in source buffer
+ */
 static inline int old_item_num(int new_num, int affected_item_num, int mode)
 {
 	if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
@@ -105,14 +78,17 @@
 	vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
 
 	/* first item in the node */
-	ih = B_N_PITEM_HEAD(Sh, 0);
+	ih = item_head(Sh, 0);
 
 	/* define the mergeability for 0-th item (if it is not being deleted) */
-	if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
+	if (op_is_left_mergeable(&ih->ih_key, Sh->b_size)
 	    && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
 		vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
 
-	/* go through all items those remain in the virtual node (except for the new (inserted) one) */
+	/*
+	 * go through all items that remain in the virtual
+	 * node (except for the new (inserted) one)
+	 */
 	for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
 		int j;
 		struct virtual_item *vi = vn->vn_vi + new_num;
@@ -128,11 +104,13 @@
 
 		vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
 		vi->vi_ih = ih + j;
-		vi->vi_item = B_I_PITEM(Sh, ih + j);
+		vi->vi_item = ih_item_body(Sh, ih + j);
 		vi->vi_uarea = vn->vn_free_ptr;
 
-		// FIXME: there is no check, that item operation did not
-		// consume too much memory
+		/*
+		 * FIXME: there is no check that item operation did not
+		 * consume too much memory
+		 */
 		vn->vn_free_ptr +=
 		    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
 		if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
@@ -145,7 +123,8 @@
 
 		if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
 			vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
-			vi->vi_new_data = vn->vn_data;	// pointer to data which is going to be pasted
+			/* pointer to data which is going to be pasted */
+			vi->vi_new_data = vn->vn_data;
 		}
 	}
 
@@ -164,11 +143,14 @@
 			     tb->insert_size[0]);
 	}
 
-	/* set right merge flag we take right delimiting key and check whether it is a mergeable item */
+	/*
+	 * set right merge flag we take right delimiting key and
+	 * check whether it is a mergeable item
+	 */
 	if (tb->CFR[0]) {
 		struct reiserfs_key *key;
 
-		key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
+		key = internal_key(tb->CFR[0], tb->rkey[0]);
 		if (op_is_left_mergeable(key, Sh->b_size)
 		    && (vn->vn_mode != M_DELETE
 			|| vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
@@ -179,12 +161,19 @@
 		if (op_is_left_mergeable(key, Sh->b_size) &&
 		    !(vn->vn_mode != M_DELETE
 		      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
-			/* we delete last item and it could be merged with right neighbor's first item */
+			/*
+			 * we delete last item and it could be merged
+			 * with right neighbor's first item
+			 */
 			if (!
 			    (B_NR_ITEMS(Sh) == 1
-			     && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
-			     && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
-				/* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
+			     && is_direntry_le_ih(item_head(Sh, 0))
+			     && ih_entry_count(item_head(Sh, 0)) == 1)) {
+				/*
+				 * node contains more than 1 item, or item
+				 * is not directory item, or this item
+				 * contains more than 1 entry
+				 */
 				print_block(Sh, 0, -1, -1);
 				reiserfs_panic(tb->tb_sb, "vs-8045",
 					       "rdkey %k, affected item==%d "
@@ -198,8 +187,10 @@
 	}
 }
 
-/* using virtual node check, how many items can be shifted to left
-   neighbor */
+/*
+ * Using virtual node check, how many items can be
+ * shifted to left neighbor
+ */
 static void check_left(struct tree_balance *tb, int h, int cur_free)
 {
 	int i;
@@ -259,9 +250,13 @@
 		}
 
 		/* the item cannot be shifted entirely, try to split it */
-		/* check whether L[0] can hold ih and at least one byte of the item body */
+		/*
+		 * check whether L[0] can hold ih and at least one byte
+		 * of the item body
+		 */
+
+		/* cannot shift even a part of the current item */
 		if (cur_free <= ih_size) {
-			/* cannot shift even a part of the current item */
 			tb->lbytes = -1;
 			return;
 		}
@@ -278,8 +273,10 @@
 	return;
 }
 
-/* using virtual node check, how many items can be shifted to right
-   neighbor */
+/*
+ * Using virtual node check, how many items can be
+ * shifted to right neighbor
+ */
 static void check_right(struct tree_balance *tb, int h, int cur_free)
 {
 	int i;
@@ -338,13 +335,21 @@
 			continue;
 		}
 
-		/* check whether R[0] can hold ih and at least one byte of the item body */
-		if (cur_free <= ih_size) {	/* cannot shift even a part of the current item */
+		/*
+		 * check whether R[0] can hold ih and at least one
+		 * byte of the item body
+		 */
+
+		/* cannot shift even a part of the current item */
+		if (cur_free <= ih_size) {
 			tb->rbytes = -1;
 			return;
 		}
 
-		/* R[0] can hold the header of the item and at least one byte of its body */
+		/*
+		 * R[0] can hold the header of the item and at least
+		 * one byte of its body
+		 */
 		cur_free -= ih_size;	/* cur_free is still > 0 */
 
 		tb->rbytes = op_check_right(vi, cur_free);
@@ -361,45 +366,64 @@
 /*
  * from - number of items, which are shifted to left neighbor entirely
  * to - number of item, which are shifted to right neighbor entirely
- * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
- * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
+ * from_bytes - number of bytes of boundary item (or directory entries)
+ *              which are shifted to left neighbor
+ * to_bytes - number of bytes of boundary item (or directory entries)
+ *            which are shifted to right neighbor
+ */
 static int get_num_ver(int mode, struct tree_balance *tb, int h,
 		       int from, int from_bytes,
 		       int to, int to_bytes, short *snum012, int flow)
 {
 	int i;
 	int cur_free;
-	//    int bytes;
 	int units;
 	struct virtual_node *vn = tb->tb_vn;
-	//    struct virtual_item * vi;
-
 	int total_node_size, max_node_size, current_item_size;
 	int needed_nodes;
-	int start_item,		/* position of item we start filling node from */
-	 end_item,		/* position of item we finish filling node by */
-	 start_bytes,		/* number of first bytes (entries for directory) of start_item-th item
-				   we do not include into node that is being filled */
-	 end_bytes;		/* number of last bytes (entries for directory) of end_item-th item
-				   we do node include into node that is being filled */
-	int split_item_positions[2];	/* these are positions in virtual item of
-					   items, that are split between S[0] and
-					   S1new and S1new and S2new */
+
+	/* position of item we start filling node from */
+	int start_item;
+
+	/* position of item we finish filling node by */
+	int end_item;
+
+	/*
+	 * number of first bytes (entries for directory) of start_item-th item
+	 * we do not include into node that is being filled
+	 */
+	int start_bytes;
+
+	/*
+	 * number of last bytes (entries for directory) of end_item-th item
+	 * we do node include into node that is being filled
+	 */
+	int end_bytes;
+
+	/*
+	 * these are positions in virtual item of items, that are split
+	 * between S[0] and S1new and S1new and S2new
+	 */
+	int split_item_positions[2];
 
 	split_item_positions[0] = -1;
 	split_item_positions[1] = -1;
 
-	/* We only create additional nodes if we are in insert or paste mode
-	   or we are in replace mode at the internal level. If h is 0 and
-	   the mode is M_REPLACE then in fix_nodes we change the mode to
-	   paste or insert before we get here in the code.  */
+	/*
+	 * We only create additional nodes if we are in insert or paste mode
+	 * or we are in replace mode at the internal level. If h is 0 and
+	 * the mode is M_REPLACE then in fix_nodes we change the mode to
+	 * paste or insert before we get here in the code.
+	 */
 	RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
 	       "vs-8100: insert_size < 0 in overflow");
 
 	max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
 
-	/* snum012 [0-2] - number of items, that lay
-	   to S[0], first new node and second new node */
+	/*
+	 * snum012 [0-2] - number of items, that lay
+	 * to S[0], first new node and second new node
+	 */
 	snum012[3] = -1;	/* s1bytes */
 	snum012[4] = -1;	/* s2bytes */
 
@@ -416,20 +440,22 @@
 	total_node_size = 0;
 	cur_free = max_node_size;
 
-	// start from 'from'-th item
+	/* start from 'from'-th item */
 	start_item = from;
-	// skip its first 'start_bytes' units
+	/* skip its first 'start_bytes' units */
 	start_bytes = ((from_bytes != -1) ? from_bytes : 0);
 
-	// last included item is the 'end_item'-th one
+	/* last included item is the 'end_item'-th one */
 	end_item = vn->vn_nr_item - to - 1;
-	// do not count last 'end_bytes' units of 'end_item'-th item
+	/* do not count last 'end_bytes' units of 'end_item'-th item */
 	end_bytes = (to_bytes != -1) ? to_bytes : 0;
 
-	/* go through all item beginning from the start_item-th item and ending by
-	   the end_item-th item. Do not count first 'start_bytes' units of
-	   'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
-
+	/*
+	 * go through all item beginning from the start_item-th item
+	 * and ending by the end_item-th item. Do not count first
+	 * 'start_bytes' units of 'start_item'-th item and last
+	 * 'end_bytes' of 'end_item'-th item
+	 */
 	for (i = start_item; i <= end_item; i++) {
 		struct virtual_item *vi = vn->vn_vi + i;
 		int skip_from_end = ((i == end_item) ? end_bytes : 0);
@@ -439,7 +465,10 @@
 		/* get size of current item */
 		current_item_size = vi->vi_item_len;
 
-		/* do not take in calculation head part (from_bytes) of from-th item */
+		/*
+		 * do not take in calculation head part (from_bytes)
+		 * of from-th item
+		 */
 		current_item_size -=
 		    op_part_size(vi, 0 /*from start */ , start_bytes);
 
@@ -455,9 +484,11 @@
 			continue;
 		}
 
+		/*
+		 * virtual item length is longer, than max size of item in
+		 * a node. It is impossible for direct item
+		 */
 		if (current_item_size > max_node_size) {
-			/* virtual item length is longer, than max size of item in
-			   a node. It is impossible for direct item */
 			RFALSE(is_direct_le_ih(vi->vi_ih),
 			       "vs-8110: "
 			       "direct item length is %d. It can not be longer than %d",
@@ -466,15 +497,18 @@
 			flow = 1;
 		}
 
+		/* as we do not split items, take new node and continue */
 		if (!flow) {
-			/* as we do not split items, take new node and continue */
 			needed_nodes++;
 			i--;
 			total_node_size = 0;
 			continue;
 		}
-		// calculate number of item units which fit into node being
-		// filled
+
+		/*
+		 * calculate number of item units which fit into node being
+		 * filled
+		 */
 		{
 			int free_space;
 
@@ -482,17 +516,17 @@
 			units =
 			    op_check_left(vi, free_space, start_bytes,
 					  skip_from_end);
+			/*
+			 * nothing fits into current node, take new
+			 * node and continue
+			 */
 			if (units == -1) {
-				/* nothing fits into current node, take new node and continue */
 				needed_nodes++, i--, total_node_size = 0;
 				continue;
 			}
 		}
 
 		/* something fits into the current node */
-		//if (snum012[3] != -1 || needed_nodes != 1)
-		//  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
-		//snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
 		start_bytes += units;
 		snum012[needed_nodes - 1 + 3] = units;
 
@@ -508,9 +542,11 @@
 		total_node_size = 0;
 	}
 
-	// sum012[4] (if it is not -1) contains number of units of which
-	// are to be in S1new, snum012[3] - to be in S0. They are supposed
-	// to be S1bytes and S2bytes correspondingly, so recalculate
+	/*
+	 * sum012[4] (if it is not -1) contains number of units of which
+	 * are to be in S1new, snum012[3] - to be in S0. They are supposed
+	 * to be S1bytes and S2bytes correspondingly, so recalculate
+	 */
 	if (snum012[4] > 0) {
 		int split_item_num;
 		int bytes_to_r, bytes_to_l;
@@ -527,7 +563,7 @@
 		    ((split_item_positions[0] ==
 		      split_item_positions[1]) ? snum012[3] : 0);
 
-		// s2bytes
+		/* s2bytes */
 		snum012[4] =
 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
 		    bytes_to_r - bytes_to_l - bytes_to_S1new;
@@ -555,7 +591,7 @@
 		    ((split_item_positions[0] == split_item_positions[1]
 		      && snum012[4] != -1) ? snum012[4] : 0);
 
-		// s1bytes
+		/* s1bytes */
 		snum012[3] =
 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
 		    bytes_to_r - bytes_to_l - bytes_to_S2new;
@@ -565,7 +601,8 @@
 }
 
 
-/* Set parameters for balancing.
+/*
+ * Set parameters for balancing.
  * Performs write of results of analysis of balancing into structure tb,
  * where it will later be used by the functions that actually do the balancing.
  * Parameters:
@@ -575,11 +612,12 @@
  *	rnum	number of items from S[h] that must be shifted to R[h];
  *	blk_num	number of blocks that S[h] will be splitted into;
  *	s012	number of items that fall into splitted nodes.
- *	lbytes	number of bytes which flow to the left neighbor from the item that is not
- *		not shifted entirely
- *	rbytes	number of bytes which flow to the right neighbor from the item that is not
- *		not shifted entirely
- *	s1bytes	number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
+ *	lbytes	number of bytes which flow to the left neighbor from the
+ *              item that is not not shifted entirely
+ *	rbytes	number of bytes which flow to the right neighbor from the
+ *              item that is not not shifted entirely
+ *	s1bytes	number of bytes which flow to the first  new node when
+ *              S[0] splits (this number is contained in s012 array)
  */
 
 static void set_parameters(struct tree_balance *tb, int h, int lnum,
@@ -590,12 +628,14 @@
 	tb->rnum[h] = rnum;
 	tb->blknum[h] = blk_num;
 
-	if (h == 0) {		/* only for leaf level */
+	/* only for leaf level */
+	if (h == 0) {
 		if (s012 != NULL) {
-			tb->s0num = *s012++,
-			    tb->s1num = *s012++, tb->s2num = *s012++;
-			tb->s1bytes = *s012++;
-			tb->s2bytes = *s012;
+			tb->s0num = *s012++;
+			tb->snum[0] = *s012++;
+			tb->snum[1] = *s012++;
+			tb->sbytes[0] = *s012++;
+			tb->sbytes[1] = *s012;
 		}
 		tb->lbytes = lb;
 		tb->rbytes = rb;
@@ -607,8 +647,10 @@
 	PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
 }
 
-/* check, does node disappear if we shift tb->lnum[0] items to left
-   neighbor and tb->rnum[0] to the right one. */
+/*
+ * check if node disappears if we shift tb->lnum[0] items to left
+ * neighbor and tb->rnum[0] to the right one.
+ */
 static int is_leaf_removable(struct tree_balance *tb)
 {
 	struct virtual_node *vn = tb->tb_vn;
@@ -616,8 +658,10 @@
 	int size;
 	int remain_items;
 
-	/* number of items, that will be shifted to left (right) neighbor
-	   entirely */
+	/*
+	 * number of items that will be shifted to left (right) neighbor
+	 * entirely
+	 */
 	to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
 	to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
 	remain_items = vn->vn_nr_item;
@@ -625,21 +669,21 @@
 	/* how many items remain in S[0] after shiftings to neighbors */
 	remain_items -= (to_left + to_right);
 
+	/* all content of node can be shifted to neighbors */
 	if (remain_items < 1) {
-		/* all content of node can be shifted to neighbors */
 		set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
 			       NULL, -1, -1);
 		return 1;
 	}
 
+	/* S[0] is not removable */
 	if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
-		/* S[0] is not removable */
 		return 0;
 
-	/* check, whether we can divide 1 remaining item between neighbors */
+	/* check whether we can divide 1 remaining item between neighbors */
 
 	/* get size of remaining item (in item units) */
-	size = op_unit_num(&(vn->vn_vi[to_left]));
+	size = op_unit_num(&vn->vn_vi[to_left]);
 
 	if (tb->lbytes + tb->rbytes >= size) {
 		set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
@@ -675,23 +719,28 @@
 		       "vs-8125: item number must be 1: it is %d",
 		       B_NR_ITEMS(S0));
 
-		ih = B_N_PITEM_HEAD(S0, 0);
+		ih = item_head(S0, 0);
 		if (tb->CFR[0]
-		    && !comp_short_le_keys(&(ih->ih_key),
-					   B_N_PDELIM_KEY(tb->CFR[0],
+		    && !comp_short_le_keys(&ih->ih_key,
+					   internal_key(tb->CFR[0],
 							  tb->rkey[0])))
+			/*
+			 * Directory must be in correct state here: that is
+			 * somewhere at the left side should exist first
+			 * directory item. But the item being deleted can
+			 * not be that first one because its right neighbor
+			 * is item of the same directory. (But first item
+			 * always gets deleted in last turn). So, neighbors
+			 * of deleted item can be merged, so we can save
+			 * ih_size
+			 */
 			if (is_direntry_le_ih(ih)) {
-				/* Directory must be in correct state here: that is
-				   somewhere at the left side should exist first directory
-				   item. But the item being deleted can not be that first
-				   one because its right neighbor is item of the same
-				   directory. (But first item always gets deleted in last
-				   turn). So, neighbors of deleted item can be merged, so
-				   we can save ih_size */
 				ih_size = IH_SIZE;
 
-				/* we might check that left neighbor exists and is of the
-				   same directory */
+				/*
+				 * we might check that left neighbor exists
+				 * and is of the same directory
+				 */
 				RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
 				       "vs-8130: first directory item can not be removed until directory is not empty");
 			}
@@ -770,7 +819,8 @@
 	}
 }
 
-/* Get new buffers for storing new nodes that are created while balancing.
+/*
+ * Get new buffers for storing new nodes that are created while balancing.
  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
  *	        CARRY_ON - schedule didn't occur while the function worked;
  *	        NO_DISK_SPACE - no disk space.
@@ -778,28 +828,33 @@
 /* The function is NOT SCHEDULE-SAFE! */
 static int get_empty_nodes(struct tree_balance *tb, int h)
 {
-	struct buffer_head *new_bh,
-	    *Sh = PATH_H_PBUFFER(tb->tb_path, h);
+	struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h);
 	b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
-	int counter, number_of_freeblk, amount_needed,	/* number of needed empty blocks */
-	 retval = CARRY_ON;
+	int counter, number_of_freeblk;
+	int  amount_needed;	/* number of needed empty blocks */
+	int  retval = CARRY_ON;
 	struct super_block *sb = tb->tb_sb;
 
-	/* number_of_freeblk is the number of empty blocks which have been
-	   acquired for use by the balancing algorithm minus the number of
-	   empty blocks used in the previous levels of the analysis,
-	   number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
-	   after empty blocks are acquired, and the balancing analysis is
-	   then restarted, amount_needed is the number needed by this level
-	   (h) of the balancing analysis.
+	/*
+	 * number_of_freeblk is the number of empty blocks which have been
+	 * acquired for use by the balancing algorithm minus the number of
+	 * empty blocks used in the previous levels of the analysis,
+	 * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule
+	 * occurs after empty blocks are acquired, and the balancing analysis
+	 * is then restarted, amount_needed is the number needed by this
+	 * level (h) of the balancing analysis.
+	 *
+	 * Note that for systems with many processes writing, it would be
+	 * more layout optimal to calculate the total number needed by all
+	 * levels and then to run reiserfs_new_blocks to get all of them at
+	 * once.
+	 */
 
-	   Note that for systems with many processes writing, it would be
-	   more layout optimal to calculate the total number needed by all
-	   levels and then to run reiserfs_new_blocks to get all of them at once.  */
-
-	/* Initiate number_of_freeblk to the amount acquired prior to the restart of
-	   the analysis or 0 if not restarted, then subtract the amount needed
-	   by all of the levels of the tree below h. */
+	/*
+	 * Initiate number_of_freeblk to the amount acquired prior to the
+	 * restart of the analysis or 0 if not restarted, then subtract the
+	 * amount needed by all of the levels of the tree below h.
+	 */
 	/* blknum includes S[h], so we subtract 1 in this calculation */
 	for (counter = 0, number_of_freeblk = tb->cur_blknum;
 	     counter < h; counter++)
@@ -810,13 +865,19 @@
 	/* Allocate missing empty blocks. */
 	/* if Sh == 0  then we are getting a new root */
 	amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
-	/*  Amount_needed = the amount that we need more than the amount that we have. */
+	/*
+	 * Amount_needed = the amount that we need more than the
+	 * amount that we have.
+	 */
 	if (amount_needed > number_of_freeblk)
 		amount_needed -= number_of_freeblk;
-	else			/* If we have enough already then there is nothing to do. */
+	else	/* If we have enough already then there is nothing to do. */
 		return CARRY_ON;
 
-	/* No need to check quota - is not allocated for blocks used for formatted nodes */
+	/*
+	 * No need to check quota - is not allocated for blocks used
+	 * for formatted nodes
+	 */
 	if (reiserfs_new_form_blocknrs(tb, blocknrs,
 				       amount_needed) == NO_DISK_SPACE)
 		return NO_DISK_SPACE;
@@ -849,8 +910,10 @@
 	return retval;
 }
 
-/* Get free space of the left neighbor, which is stored in the parent
- * node of the left neighbor.  */
+/*
+ * Get free space of the left neighbor, which is stored in the parent
+ * node of the left neighbor.
+ */
 static int get_lfree(struct tree_balance *tb, int h)
 {
 	struct buffer_head *l, *f;
@@ -870,7 +933,8 @@
 	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
 }
 
-/* Get free space of the right neighbor,
+/*
+ * Get free space of the right neighbor,
  * which is stored in the parent node of the right neighbor.
  */
 static int get_rfree(struct tree_balance *tb, int h)
@@ -916,7 +980,10 @@
 	       "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
 	       father, tb->FL[h]);
 
-	/* Get position of the pointer to the left neighbor into the left father. */
+	/*
+	 * Get position of the pointer to the left neighbor
+	 * into the left father.
+	 */
 	left_neighbor_position = (father == tb->FL[h]) ?
 	    tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
 	/* Get left neighbor block number. */
@@ -940,17 +1007,20 @@
 
 static void decrement_key(struct cpu_key *key)
 {
-	// call item specific function for this key
+	/* call item specific function for this key */
 	item_ops[cpu_key_k_type(key)]->decrement_key(key);
 }
 
-/* Calculate far left/right parent of the left/right neighbor of the current node, that
- * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
+/*
+ * Calculate far left/right parent of the left/right neighbor of the
+ * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor
+ * of the parent F[h].
  * Calculate left/right common parent of the current node and L[h]/R[h].
  * Calculate left/right delimiting key position.
- * Returns:	PATH_INCORRECT   - path in the tree is not correct;
- 		SCHEDULE_OCCURRED - schedule occurred while the function worked;
- *	        CARRY_ON         - schedule didn't occur while the function worked;
+ * Returns:	PATH_INCORRECT    - path in the tree is not correct
+ *		SCHEDULE_OCCURRED - schedule occurred while the function worked
+ *	        CARRY_ON          - schedule didn't occur while the function
+ *				    worked
  */
 static int get_far_parent(struct tree_balance *tb,
 			  int h,
@@ -966,8 +1036,10 @@
 	    first_last_position = 0,
 	    path_offset = PATH_H_PATH_OFFSET(path, h);
 
-	/* Starting from F[h] go upwards in the tree, and look for the common
-	   ancestor of F[h], and its neighbor l/r, that should be obtained. */
+	/*
+	 * Starting from F[h] go upwards in the tree, and look for the common
+	 * ancestor of F[h], and its neighbor l/r, that should be obtained.
+	 */
 
 	counter = path_offset;
 
@@ -975,21 +1047,33 @@
 	       "PAP-8180: invalid path length");
 
 	for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
-		/* Check whether parent of the current buffer in the path is really parent in the tree. */
+		/*
+		 * Check whether parent of the current buffer in the path
+		 * is really parent in the tree.
+		 */
 		if (!B_IS_IN_TREE
 		    (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
 			return REPEAT_SEARCH;
+
 		/* Check whether position in the parent is correct. */
 		if ((position =
 		     PATH_OFFSET_POSITION(path,
 					  counter - 1)) >
 		    B_NR_ITEMS(parent))
 			return REPEAT_SEARCH;
-		/* Check whether parent at the path really points to the child. */
+
+		/*
+		 * Check whether parent at the path really points
+		 * to the child.
+		 */
 		if (B_N_CHILD_NUM(parent, position) !=
 		    PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
 			return REPEAT_SEARCH;
-		/* Return delimiting key if position in the parent is not equal to first/last one. */
+
+		/*
+		 * Return delimiting key if position in the parent is not
+		 * equal to first/last one.
+		 */
 		if (c_lr_par == RIGHT_PARENTS)
 			first_last_position = B_NR_ITEMS(parent);
 		if (position != first_last_position) {
@@ -1002,7 +1086,10 @@
 
 	/* if we are in the root of the tree, then there is no common father */
 	if (counter == FIRST_PATH_ELEMENT_OFFSET) {
-		/* Check whether first buffer in the path is the root of the tree. */
+		/*
+		 * Check whether first buffer in the path is the
+		 * root of the tree.
+		 */
 		if (PATH_OFFSET_PBUFFER
 		    (tb->tb_path,
 		     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
@@ -1031,12 +1118,15 @@
 		}
 	}
 
-	/* So, we got common parent of the current node and its left/right neighbor.
-	   Now we are geting the parent of the left/right neighbor. */
+	/*
+	 * So, we got common parent of the current node and its
+	 * left/right neighbor.  Now we are getting the parent of the
+	 * left/right neighbor.
+	 */
 
 	/* Form key to get parent of the left/right neighbor. */
 	le_key2cpu_key(&s_lr_father_key,
-		       B_N_PDELIM_KEY(*pcom_father,
+		       internal_key(*pcom_father,
 				      (c_lr_par ==
 				       LEFT_PARENTS) ? (tb->lkey[h - 1] =
 							position -
@@ -1050,7 +1140,7 @@
 	if (search_by_key
 	    (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
 	     h + 1) == IO_ERROR)
-		// path is released
+		/* path is released */
 		return IO_ERROR;
 
 	if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -1071,12 +1161,15 @@
 	return CARRY_ON;
 }
 
-/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
- * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
- * FR[path_offset], CFL[path_offset], CFR[path_offset].
- * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
- * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
- *	        CARRY_ON - schedule didn't occur while the function worked;
+/*
+ * Get parents of neighbors of node in the path(S[path_offset]) and
+ * common parents of S[path_offset] and L[path_offset]/R[path_offset]:
+ * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset],
+ * CFR[path_offset].
+ * Calculate numbers of left and right delimiting keys position:
+ * lkey[path_offset], rkey[path_offset].
+ * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked
+ *	        CARRY_ON - schedule didn't occur while the function worked
  */
 static int get_parents(struct tree_balance *tb, int h)
 {
@@ -1088,8 +1181,11 @@
 
 	/* Current node is the root of the tree or will be root of the tree */
 	if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
-		/* The root can not have parents.
-		   Release nodes which previously were obtained as parents of the current node neighbors. */
+		/*
+		 * The root can not have parents.
+		 * Release nodes which previously were obtained as
+		 * parents of the current node neighbors.
+		 */
 		brelse(tb->FL[h]);
 		brelse(tb->CFL[h]);
 		brelse(tb->FR[h]);
@@ -1111,10 +1207,14 @@
 		get_bh(curf);
 		tb->lkey[h] = position - 1;
 	} else {
-		/* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
-		   Calculate current common parent of L[path_offset] and the current node. Note that
-		   CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
-		   Calculate lkey[path_offset]. */
+		/*
+		 * Calculate current parent of L[path_offset], which is the
+		 * left neighbor of the current node.  Calculate current
+		 * common parent of L[path_offset] and the current node.
+		 * Note that CFL[path_offset] not equal FL[path_offset] and
+		 * CFL[path_offset] not equal F[path_offset].
+		 * Calculate lkey[path_offset].
+		 */
 		if ((ret = get_far_parent(tb, h + 1, &curf,
 						  &curcf,
 						  LEFT_PARENTS)) != CARRY_ON)
@@ -1130,19 +1230,22 @@
 	       (curcf && !B_IS_IN_TREE(curcf)),
 	       "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
 
-/* Get parent FR[h] of R[h]. */
+	/* Get parent FR[h] of R[h]. */
 
-/* Current node is the last child of F[h]. FR[h] != F[h]. */
+	/* Current node is the last child of F[h]. FR[h] != F[h]. */
 	if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
-/* Calculate current parent of R[h], which is the right neighbor of F[h].
-   Calculate current common parent of R[h] and current node. Note that CFR[h]
-   not equal FR[path_offset] and CFR[h] not equal F[h]. */
+		/*
+		 * Calculate current parent of R[h], which is the right
+		 * neighbor of F[h].  Calculate current common parent of
+		 * R[h] and current node. Note that CFR[h] not equal
+		 * FR[path_offset] and CFR[h] not equal F[h].
+		 */
 		if ((ret =
 		     get_far_parent(tb, h + 1, &curf, &curcf,
 				    RIGHT_PARENTS)) != CARRY_ON)
 			return ret;
 	} else {
-/* Current node is not the last child of its parent F[h]. */
+		/* Current node is not the last child of its parent F[h]. */
 		curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
 		curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
 		get_bh(curf);
@@ -1165,8 +1268,10 @@
 	return CARRY_ON;
 }
 
-/* it is possible to remove node as result of shiftings to
-   neighbors even when we insert or paste item. */
+/*
+ * it is possible to remove node as result of shiftings to
+ * neighbors even when we insert or paste item.
+ */
 static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
 				      struct tree_balance *tb, int h)
 {
@@ -1175,21 +1280,22 @@
 	struct item_head *ih;
 	struct reiserfs_key *r_key = NULL;
 
-	ih = B_N_PITEM_HEAD(Sh, 0);
+	ih = item_head(Sh, 0);
 	if (tb->CFR[h])
-		r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
+		r_key = internal_key(tb->CFR[h], tb->rkey[h]);
 
 	if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
 	    /* shifting may merge items which might save space */
 	    -
 	    ((!h
-	      && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
+	      && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0)
 	    -
 	    ((!h && r_key
 	      && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
 	    + ((h) ? KEY_SIZE : 0)) {
 		/* node can not be removed */
-		if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
+		if (sfree >= levbytes) {
+			/* new item fits into node S[h] without any shifting */
 			if (!h)
 				tb->s0num =
 				    B_NR_ITEMS(Sh) +
@@ -1202,7 +1308,8 @@
 	return !NO_BALANCING_NEEDED;
 }
 
-/* Check whether current node S[h] is balanced when increasing its size by
+/*
+ * Check whether current node S[h] is balanced when increasing its size by
  * Inserting or Pasting.
  * Calculate parameters for balancing for current level h.
  * Parameters:
@@ -1219,39 +1326,48 @@
 static int ip_check_balance(struct tree_balance *tb, int h)
 {
 	struct virtual_node *vn = tb->tb_vn;
-	int levbytes,		/* Number of bytes that must be inserted into (value
-				   is negative if bytes are deleted) buffer which
-				   contains node being balanced.  The mnemonic is
-				   that the attempted change in node space used level
-				   is levbytes bytes. */
-	 ret;
+	/*
+	 * Number of bytes that must be inserted into (value is negative
+	 * if bytes are deleted) buffer which contains node being balanced.
+	 * The mnemonic is that the attempted change in node space used
+	 * level is levbytes bytes.
+	 */
+	int levbytes;
+	int ret;
 
 	int lfree, sfree, rfree /* free space in L, S and R */ ;
 
-	/* nver is short for number of vertixes, and lnver is the number if
-	   we shift to the left, rnver is the number if we shift to the
-	   right, and lrnver is the number if we shift in both directions.
-	   The goal is to minimize first the number of vertixes, and second,
-	   the number of vertixes whose contents are changed by shifting,
-	   and third the number of uncached vertixes whose contents are
-	   changed by shifting and must be read from disk.  */
+	/*
+	 * nver is short for number of vertixes, and lnver is the number if
+	 * we shift to the left, rnver is the number if we shift to the
+	 * right, and lrnver is the number if we shift in both directions.
+	 * The goal is to minimize first the number of vertixes, and second,
+	 * the number of vertixes whose contents are changed by shifting,
+	 * and third the number of uncached vertixes whose contents are
+	 * changed by shifting and must be read from disk.
+	 */
 	int nver, lnver, rnver, lrnver;
 
-	/* used at leaf level only, S0 = S[0] is the node being balanced,
-	   sInum [ I = 0,1,2 ] is the number of items that will
-	   remain in node SI after balancing.  S1 and S2 are new
-	   nodes that might be created. */
-
-	/* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
-	   where 4th parameter is s1bytes and 5th - s2bytes
+	/*
+	 * used at leaf level only, S0 = S[0] is the node being balanced,
+	 * sInum [ I = 0,1,2 ] is the number of items that will
+	 * remain in node SI after balancing.  S1 and S2 are new
+	 * nodes that might be created.
 	 */
-	short snum012[40] = { 0, };	/* s0num, s1num, s2num for 8 cases
-					   0,1 - do not shift and do not shift but bottle
-					   2 - shift only whole item to left
-					   3 - shift to left and bottle as much as possible
-					   4,5 - shift to right (whole items and as much as possible
-					   6,7 - shift to both directions (whole items and as much as possible)
-					 */
+
+	/*
+	 * we perform 8 calls to get_num_ver().  For each call we
+	 * calculate five parameters.  where 4th parameter is s1bytes
+	 * and 5th - s2bytes
+	 *
+	 * s0num, s1num, s2num for 8 cases
+	 * 0,1 - do not shift and do not shift but bottle
+	 * 2   - shift only whole item to left
+	 * 3   - shift to left and bottle as much as possible
+	 * 4,5 - shift to right (whole items and as much as possible
+	 * 6,7 - shift to both directions (whole items and as much as possible)
+	 */
+	short snum012[40] = { 0, };
 
 	/* Sh is the node whose balance is currently being checked */
 	struct buffer_head *Sh;
@@ -1265,9 +1381,10 @@
 			reiserfs_panic(tb->tb_sb, "vs-8210",
 				       "S[0] can not be 0");
 		switch (ret = get_empty_nodes(tb, h)) {
+		/* no balancing for higher levels needed */
 		case CARRY_ON:
 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
-			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
+			return NO_BALANCING_NEEDED;
 
 		case NO_DISK_SPACE:
 		case REPEAT_SEARCH:
@@ -1278,7 +1395,9 @@
 		}
 	}
 
-	if ((ret = get_parents(tb, h)) != CARRY_ON)	/* get parents of S[h] neighbors. */
+	/* get parents of S[h] neighbors. */
+	ret = get_parents(tb, h);
+	if (ret != CARRY_ON)
 		return ret;
 
 	sfree = B_FREE_SPACE(Sh);
@@ -1287,38 +1406,44 @@
 	rfree = get_rfree(tb, h);
 	lfree = get_lfree(tb, h);
 
+	/* and new item fits into node S[h] without any shifting */
 	if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
 	    NO_BALANCING_NEEDED)
-		/* and new item fits into node S[h] without any shifting */
 		return NO_BALANCING_NEEDED;
 
 	create_virtual_node(tb, h);
 
 	/*
-	   determine maximal number of items we can shift to the left neighbor (in tb structure)
-	   and the maximal number of bytes that can flow to the left neighbor
-	   from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
+	 * determine maximal number of items we can shift to the left
+	 * neighbor (in tb structure) and the maximal number of bytes
+	 * that can flow to the left neighbor from the left most liquid
+	 * item that cannot be shifted from S[0] entirely (returned value)
 	 */
 	check_left(tb, h, lfree);
 
 	/*
-	   determine maximal number of items we can shift to the right neighbor (in tb structure)
-	   and the maximal number of bytes that can flow to the right neighbor
-	   from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
+	 * determine maximal number of items we can shift to the right
+	 * neighbor (in tb structure) and the maximal number of bytes
+	 * that can flow to the right neighbor from the right most liquid
+	 * item that cannot be shifted from S[0] entirely (returned value)
 	 */
 	check_right(tb, h, rfree);
 
-	/* all contents of internal node S[h] can be moved into its
-	   neighbors, S[h] will be removed after balancing */
+	/*
+	 * all contents of internal node S[h] can be moved into its
+	 * neighbors, S[h] will be removed after balancing
+	 */
 	if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
 		int to_r;
 
-		/* Since we are working on internal nodes, and our internal
-		   nodes have fixed size entries, then we can balance by the
-		   number of items rather than the space they consume.  In this
-		   routine we set the left node equal to the right node,
-		   allowing a difference of less than or equal to 1 child
-		   pointer. */
+		/*
+		 * Since we are working on internal nodes, and our internal
+		 * nodes have fixed size entries, then we can balance by the
+		 * number of items rather than the space they consume.  In this
+		 * routine we set the left node equal to the right node,
+		 * allowing a difference of less than or equal to 1 child
+		 * pointer.
+		 */
 		to_r =
 		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
 		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
@@ -1328,7 +1453,10 @@
 		return CARRY_ON;
 	}
 
-	/* this checks balance condition, that any two neighboring nodes can not fit in one node */
+	/*
+	 * this checks balance condition, that any two neighboring nodes
+	 * can not fit in one node
+	 */
 	RFALSE(h &&
 	       (tb->lnum[h] >= vn->vn_nr_item + 1 ||
 		tb->rnum[h] >= vn->vn_nr_item + 1),
@@ -1337,16 +1465,22 @@
 		      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
 	       "vs-8225: tree is not balanced on leaf level");
 
-	/* all contents of S[0] can be moved into its neighbors
-	   S[0] will be removed after balancing. */
+	/*
+	 * all contents of S[0] can be moved into its neighbors
+	 * S[0] will be removed after balancing.
+	 */
 	if (!h && is_leaf_removable(tb))
 		return CARRY_ON;
 
-	/* why do we perform this check here rather than earlier??
-	   Answer: we can win 1 node in some cases above. Moreover we
-	   checked it above, when we checked, that S[0] is not removable
-	   in principle */
-	if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
+	/*
+	 * why do we perform this check here rather than earlier??
+	 * Answer: we can win 1 node in some cases above. Moreover we
+	 * checked it above, when we checked, that S[0] is not removable
+	 * in principle
+	 */
+
+	 /* new item fits into node S[h] without any shifting */
+	if (sfree >= levbytes) {
 		if (!h)
 			tb->s0num = vn->vn_nr_item;
 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
@@ -1355,18 +1489,19 @@
 
 	{
 		int lpar, rpar, nset, lset, rset, lrset;
-		/*
-		 * regular overflowing of the node
-		 */
+		/* regular overflowing of the node */
 
-		/* get_num_ver works in 2 modes (FLOW & NO_FLOW)
-		   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
-		   nset, lset, rset, lrset - shows, whether flowing items give better packing
+		/*
+		 * get_num_ver works in 2 modes (FLOW & NO_FLOW)
+		 * lpar, rpar - number of items we can shift to left/right
+		 *              neighbor (including splitting item)
+		 * nset, lset, rset, lrset - shows, whether flowing items
+		 *                           give better packing
 		 */
 #define FLOW 1
 #define NO_FLOW 0		/* do not any splitting */
 
-		/* we choose one the following */
+		/* we choose one of the following */
 #define NOTHING_SHIFT_NO_FLOW	0
 #define NOTHING_SHIFT_FLOW	5
 #define LEFT_SHIFT_NO_FLOW	10
@@ -1379,10 +1514,13 @@
 		lpar = tb->lnum[h];
 		rpar = tb->rnum[h];
 
-		/* calculate number of blocks S[h] must be split into when
-		   nothing is shifted to the neighbors,
-		   as well as number of items in each part of the split node (s012 numbers),
-		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
+		/*
+		 * calculate number of blocks S[h] must be split into when
+		 * nothing is shifted to the neighbors, as well as number of
+		 * items in each part of the split node (s012 numbers),
+		 * and number of bytes (s1bytes) of the shared drop which
+		 * flow to S1 if any
+		 */
 		nset = NOTHING_SHIFT_NO_FLOW;
 		nver = get_num_ver(vn->vn_mode, tb, h,
 				   0, -1, h ? vn->vn_nr_item : 0, -1,
@@ -1391,7 +1529,10 @@
 		if (!h) {
 			int nver1;
 
-			/* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
+			/*
+			 * note, that in this case we try to bottle
+			 * between S[0] and S1 (S1 - the first new node)
+			 */
 			nver1 = get_num_ver(vn->vn_mode, tb, h,
 					    0, -1, 0, -1,
 					    snum012 + NOTHING_SHIFT_FLOW, FLOW);
@@ -1399,11 +1540,13 @@
 				nset = NOTHING_SHIFT_FLOW, nver = nver1;
 		}
 
-		/* calculate number of blocks S[h] must be split into when
-		   l_shift_num first items and l_shift_bytes of the right most
-		   liquid item to be shifted are shifted to the left neighbor,
-		   as well as number of items in each part of the splitted node (s012 numbers),
-		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		/*
+		 * calculate number of blocks S[h] must be split into when
+		 * l_shift_num first items and l_shift_bytes of the right
+		 * most liquid item to be shifted are shifted to the left
+		 * neighbor, as well as number of items in each part of the
+		 * splitted node (s012 numbers), and number of bytes
+		 * (s1bytes) of the shared drop which flow to S1 if any
 		 */
 		lset = LEFT_SHIFT_NO_FLOW;
 		lnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1422,11 +1565,13 @@
 				lset = LEFT_SHIFT_FLOW, lnver = lnver1;
 		}
 
-		/* calculate number of blocks S[h] must be split into when
-		   r_shift_num first items and r_shift_bytes of the left most
-		   liquid item to be shifted are shifted to the right neighbor,
-		   as well as number of items in each part of the splitted node (s012 numbers),
-		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		/*
+		 * calculate number of blocks S[h] must be split into when
+		 * r_shift_num first items and r_shift_bytes of the left most
+		 * liquid item to be shifted are shifted to the right neighbor,
+		 * as well as number of items in each part of the splitted
+		 * node (s012 numbers), and number of bytes (s1bytes) of the
+		 * shared drop which flow to S1 if any
 		 */
 		rset = RIGHT_SHIFT_NO_FLOW;
 		rnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1451,10 +1596,12 @@
 				rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
 		}
 
-		/* calculate number of blocks S[h] must be split into when
-		   items are shifted in both directions,
-		   as well as number of items in each part of the splitted node (s012 numbers),
-		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		/*
+		 * calculate number of blocks S[h] must be split into when
+		 * items are shifted in both directions, as well as number
+		 * of items in each part of the splitted node (s012 numbers),
+		 * and number of bytes (s1bytes) of the shared drop which
+		 * flow to S1 if any
 		 */
 		lrset = LR_SHIFT_NO_FLOW;
 		lrnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1481,10 +1628,12 @@
 				lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
 		}
 
-		/* Our general shifting strategy is:
-		   1) to minimized number of new nodes;
-		   2) to minimized number of neighbors involved in shifting;
-		   3) to minimized number of disk reads; */
+		/*
+		 * Our general shifting strategy is:
+		 * 1) to minimized number of new nodes;
+		 * 2) to minimized number of neighbors involved in shifting;
+		 * 3) to minimized number of disk reads;
+		 */
 
 		/* we can win TWO or ONE nodes by shifting in both directions */
 		if (lrnver < lnver && lrnver < rnver) {
@@ -1508,42 +1657,59 @@
 			return CARRY_ON;
 		}
 
-		/* if shifting doesn't lead to better packing then don't shift */
+		/*
+		 * if shifting doesn't lead to better packing
+		 * then don't shift
+		 */
 		if (nver == lrnver) {
 			set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
 				       -1);
 			return CARRY_ON;
 		}
 
-		/* now we know that for better packing shifting in only one
-		   direction either to the left or to the right is required */
+		/*
+		 * now we know that for better packing shifting in only one
+		 * direction either to the left or to the right is required
+		 */
 
-		/*  if shifting to the left is better than shifting to the right */
+		/*
+		 * if shifting to the left is better than
+		 * shifting to the right
+		 */
 		if (lnver < rnver) {
 			SET_PAR_SHIFT_LEFT;
 			return CARRY_ON;
 		}
 
-		/* if shifting to the right is better than shifting to the left */
+		/*
+		 * if shifting to the right is better than
+		 * shifting to the left
+		 */
 		if (lnver > rnver) {
 			SET_PAR_SHIFT_RIGHT;
 			return CARRY_ON;
 		}
 
-		/* now shifting in either direction gives the same number
-		   of nodes and we can make use of the cached neighbors */
+		/*
+		 * now shifting in either direction gives the same number
+		 * of nodes and we can make use of the cached neighbors
+		 */
 		if (is_left_neighbor_in_cache(tb, h)) {
 			SET_PAR_SHIFT_LEFT;
 			return CARRY_ON;
 		}
 
-		/* shift to the right independently on whether the right neighbor in cache or not */
+		/*
+		 * shift to the right independently on whether the
+		 * right neighbor in cache or not
+		 */
 		SET_PAR_SHIFT_RIGHT;
 		return CARRY_ON;
 	}
 }
 
-/* Check whether current node S[h] is balanced when Decreasing its size by
+/*
+ * Check whether current node S[h] is balanced when Decreasing its size by
  * Deleting or Cutting for INTERNAL node of S+tree.
  * Calculate parameters for balancing for current level h.
  * Parameters:
@@ -1563,8 +1729,10 @@
 {
 	struct virtual_node *vn = tb->tb_vn;
 
-	/* Sh is the node whose balance is currently being checked,
-	   and Fh is its father.  */
+	/*
+	 * Sh is the node whose balance is currently being checked,
+	 * and Fh is its father.
+	 */
 	struct buffer_head *Sh, *Fh;
 	int maxsize, ret;
 	int lfree, rfree /* free space in L and R */ ;
@@ -1574,19 +1742,25 @@
 
 	maxsize = MAX_CHILD_SIZE(Sh);
 
-/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
-/*   new_nr_item = number of items node would have if operation is */
-/* 	performed without balancing (new_nr_item); */
+	/*
+	 * using tb->insert_size[h], which is negative in this case,
+	 * create_virtual_node calculates:
+	 * new_nr_item = number of items node would have if operation is
+	 * performed without balancing (new_nr_item);
+	 */
 	create_virtual_node(tb, h);
 
 	if (!Fh) {		/* S[h] is the root. */
+		/* no balancing for higher levels needed */
 		if (vn->vn_nr_item > 0) {
 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
-			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
+			return NO_BALANCING_NEEDED;
 		}
-		/* new_nr_item == 0.
+		/*
+		 * new_nr_item == 0.
 		 * Current root will be deleted resulting in
-		 * decrementing the tree height. */
+		 * decrementing the tree height.
+		 */
 		set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
 		return CARRY_ON;
 	}
@@ -1602,12 +1776,18 @@
 	check_left(tb, h, lfree);
 	check_right(tb, h, rfree);
 
-	if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {	/* Balance condition for the internal node is valid.
-						 * In this case we balance only if it leads to better packing. */
-		if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {	/* Here we join S[h] with one of its neighbors,
-							 * which is impossible with greater values of new_nr_item. */
+	/*
+	 * Balance condition for the internal node is valid.
+	 * In this case we balance only if it leads to better packing.
+	 */
+	if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {
+		/*
+		 * Here we join S[h] with one of its neighbors,
+		 * which is impossible with greater values of new_nr_item.
+		 */
+		if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {
+			/* All contents of S[h] can be moved to L[h]. */
 			if (tb->lnum[h] >= vn->vn_nr_item + 1) {
-				/* All contents of S[h] can be moved to L[h]. */
 				int n;
 				int order_L;
 
@@ -1623,8 +1803,8 @@
 				return CARRY_ON;
 			}
 
+			/* All contents of S[h] can be moved to R[h]. */
 			if (tb->rnum[h] >= vn->vn_nr_item + 1) {
-				/* All contents of S[h] can be moved to R[h]. */
 				int n;
 				int order_R;
 
@@ -1641,8 +1821,11 @@
 			}
 		}
 
+		/*
+		 * All contents of S[h] can be moved to the neighbors
+		 * (L[h] & R[h]).
+		 */
 		if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
-			/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
 			int to_r;
 
 			to_r =
@@ -1659,7 +1842,10 @@
 		return NO_BALANCING_NEEDED;
 	}
 
-	/* Current node contain insufficient number of items. Balancing is required. */
+	/*
+	 * Current node contain insufficient number of items.
+	 * Balancing is required.
+	 */
 	/* Check whether we can merge S[h] with left neighbor. */
 	if (tb->lnum[h] >= vn->vn_nr_item + 1)
 		if (is_left_neighbor_in_cache(tb, h)
@@ -1726,7 +1912,8 @@
 	return CARRY_ON;
 }
 
-/* Check whether current node S[h] is balanced when Decreasing its size by
+/*
+ * Check whether current node S[h] is balanced when Decreasing its size by
  * Deleting or Truncating for LEAF node of S+tree.
  * Calculate parameters for balancing for current level h.
  * Parameters:
@@ -1743,15 +1930,21 @@
 {
 	struct virtual_node *vn = tb->tb_vn;
 
-	/* Number of bytes that must be deleted from
-	   (value is negative if bytes are deleted) buffer which
-	   contains node being balanced.  The mnemonic is that the
-	   attempted change in node space used level is levbytes bytes. */
+	/*
+	 * Number of bytes that must be deleted from
+	 * (value is negative if bytes are deleted) buffer which
+	 * contains node being balanced.  The mnemonic is that the
+	 * attempted change in node space used level is levbytes bytes.
+	 */
 	int levbytes;
+
 	/* the maximal item size */
 	int maxsize, ret;
-	/* S0 is the node whose balance is currently being checked,
-	   and F0 is its father.  */
+
+	/*
+	 * S0 is the node whose balance is currently being checked,
+	 * and F0 is its father.
+	 */
 	struct buffer_head *S0, *F0;
 	int lfree, rfree /* free space in L and R */ ;
 
@@ -1784,9 +1977,11 @@
 	if (are_leaves_removable(tb, lfree, rfree))
 		return CARRY_ON;
 
-	/* determine maximal number of items we can shift to the left/right  neighbor
-	   and the maximal number of bytes that can flow to the left/right neighbor
-	   from the left/right most liquid item that cannot be shifted from S[0] entirely
+	/*
+	 * determine maximal number of items we can shift to the left/right
+	 * neighbor and the maximal number of bytes that can flow to the
+	 * left/right neighbor from the left/right most liquid item that
+	 * cannot be shifted from S[0] entirely
 	 */
 	check_left(tb, h, lfree);
 	check_right(tb, h, rfree);
@@ -1810,7 +2005,10 @@
 		return CARRY_ON;
 	}
 
-	/* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
+	/*
+	 * All contents of S[0] can be moved to the neighbors (L[0] & R[0]).
+	 * Set parameters and return
+	 */
 	if (is_leaf_removable(tb))
 		return CARRY_ON;
 
@@ -1820,7 +2018,8 @@
 	return NO_BALANCING_NEEDED;
 }
 
-/* Check whether current node S[h] is balanced when Decreasing its size by
+/*
+ * Check whether current node S[h] is balanced when Decreasing its size by
  * Deleting or Cutting.
  * Calculate parameters for balancing for current level h.
  * Parameters:
@@ -1844,15 +2043,16 @@
 		return dc_check_balance_leaf(tb, h);
 }
 
-/* Check whether current node S[h] is balanced.
+/*
+ * Check whether current node S[h] is balanced.
  * Calculate parameters for balancing for current level h.
  * Parameters:
  *
  *	tb	tree_balance structure:
  *
- *              tb is a large structure that must be read about in the header file
- *              at the same time as this procedure if the reader is to successfully
- *              understand this procedure
+ *              tb is a large structure that must be read about in the header
+ *		file at the same time as this procedure if the reader is
+ *		to successfully understand this procedure
  *
  *	h	current level of the node;
  *	inum	item number in S[h];
@@ -1882,8 +2082,8 @@
 	RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
 	       "vs-8255: ins_ih can not be 0 in insert mode");
 
+	/* Calculate balance parameters when size of node is increasing. */
 	if (tb->insert_size[h] > 0)
-		/* Calculate balance parameters when size of node is increasing. */
 		return ip_check_balance(tb, h);
 
 	/* Calculate balance parameters when  size of node is decreasing. */
@@ -1911,21 +2111,23 @@
 			PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
 			return CARRY_ON;
 		}
-		return REPEAT_SEARCH;	/* Root is changed and we must recalculate the path. */
+		/* Root is changed and we must recalculate the path. */
+		return REPEAT_SEARCH;
 	}
 
+	/* Parent in the path is not in the tree. */
 	if (!B_IS_IN_TREE
 	    (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
-		return REPEAT_SEARCH;	/* Parent in the path is not in the tree. */
+		return REPEAT_SEARCH;
 
 	if ((position =
 	     PATH_OFFSET_POSITION(path,
 				  path_offset - 1)) > B_NR_ITEMS(bh))
 		return REPEAT_SEARCH;
 
+	/* Parent in the path is not parent of the current node in the tree. */
 	if (B_N_CHILD_NUM(bh, position) !=
 	    PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
-		/* Parent in the path is not parent of the current node in the tree. */
 		return REPEAT_SEARCH;
 
 	if (buffer_locked(bh)) {
@@ -1936,10 +2138,15 @@
 			return REPEAT_SEARCH;
 	}
 
-	return CARRY_ON;	/* Parent in the path is unlocked and really parent of the current node.  */
+	/*
+	 * Parent in the path is unlocked and really parent
+	 * of the current node.
+	 */
+	return CARRY_ON;
 }
 
-/* Using lnum[h] and rnum[h] we should determine what neighbors
+/*
+ * Using lnum[h] and rnum[h] we should determine what neighbors
  * of S[h] we
  * need in order to balance S[h], and get them if necessary.
  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
@@ -1997,7 +2204,7 @@
 	}
 
 	/* We need right neighbor to balance S[path_offset]. */
-	if (tb->rnum[h]) {	/* We need right neighbor to balance S[path_offset]. */
+	if (tb->rnum[h]) {
 		PROC_INFO_INC(sb, need_r_neighbor[h]);
 		bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
 
@@ -2053,9 +2260,11 @@
 		(max_num_of_entries - 1) * sizeof(__u16));
 }
 
-/* maybe we should fail balancing we are going to perform when kmalloc
-   fails several times. But now it will loop until kmalloc gets
-   required memory */
+/*
+ * maybe we should fail balancing we are going to perform when kmalloc
+ * fails several times. But now it will loop until kmalloc gets
+ * required memory
+ */
 static int get_mem_for_virtual_node(struct tree_balance *tb)
 {
 	int check_fs = 0;
@@ -2064,8 +2273,8 @@
 
 	size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
 
+	/* we have to allocate more memory for virtual node */
 	if (size > tb->vn_buf_size) {
-		/* we have to allocate more memory for virtual node */
 		if (tb->vn_buf) {
 			/* free memory allocated before */
 			kfree(tb->vn_buf);
@@ -2079,10 +2288,12 @@
 		/* get memory for virtual item */
 		buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
 		if (!buf) {
-			/* getting memory with GFP_KERNEL priority may involve
-			   balancing now (due to indirect_to_direct conversion on
-			   dcache shrinking). So, release path and collected
-			   resources here */
+			/*
+			 * getting memory with GFP_KERNEL priority may involve
+			 * balancing now (due to indirect_to_direct conversion
+			 * on dcache shrinking). So, release path and collected
+			 * resources here
+			 */
 			free_buffers_in_tb(tb);
 			buf = kmalloc(size, GFP_NOFS);
 			if (!buf) {
@@ -2168,8 +2379,10 @@
 		for (i = tb->tb_path->path_length;
 		     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
 			if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
-				/* if I understand correctly, we can only be sure the last buffer
-				 ** in the path is in the tree --clm
+				/*
+				 * if I understand correctly, we can only
+				 * be sure the last buffer in the path is
+				 * in the tree --clm
 				 */
 #ifdef CONFIG_REISERFS_CHECK
 				if (PATH_PLAST_BUFFER(tb->tb_path) ==
@@ -2256,13 +2469,15 @@
 				}
 			}
 		}
-		/* as far as I can tell, this is not required.  The FEB list seems
-		 ** to be full of newly allocated nodes, which will never be locked,
-		 ** dirty, or anything else.
-		 ** To be safe, I'm putting in the checks and waits in.  For the moment,
-		 ** they are needed to keep the code in journal.c from complaining
-		 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
-		 ** --clm
+
+		/*
+		 * as far as I can tell, this is not required.  The FEB list
+		 * seems to be full of newly allocated nodes, which will
+		 * never be locked, dirty, or anything else.
+		 * To be safe, I'm putting in the checks and waits in.
+		 * For the moment, they are needed to keep the code in
+		 * journal.c from complaining about the buffer.
+		 * That code is inside CONFIG_REISERFS_CHECK as well.  --clm
 		 */
 		for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
 			if (tb->FEB[i]) {
@@ -2300,7 +2515,8 @@
 	return CARRY_ON;
 }
 
-/* Prepare for balancing, that is
+/*
+ * Prepare for balancing, that is
  *	get all necessary parents, and neighbors;
  *	analyze what and where should be moved;
  *	get sufficient number of new nodes;
@@ -2309,13 +2525,14 @@
  * When ported to SMP kernels, only at the last moment after all needed nodes
  * are collected in cache, will the resources be locked using the usual
  * textbook ordered lock acquisition algorithms.  Note that ensuring that
- * this code neither write locks what it does not need to write lock nor locks out of order
- * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
+ * this code neither write locks what it does not need to write lock nor locks
+ * out of order will be a pain in the butt that could have been avoided.
+ * Grumble grumble. -Hans
  *
  * fix is meant in the sense of render unchanging
  *
- * Latency might be improved by first gathering a list of what buffers are needed
- * and then getting as many of them in parallel as possible? -Hans
+ * Latency might be improved by first gathering a list of what buffers
+ * are needed and then getting as many of them in parallel as possible? -Hans
  *
  * Parameters:
  *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append)
@@ -2335,8 +2552,9 @@
 	int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
 	int pos_in_item;
 
-	/* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
-	 ** during wait_tb_buffers_run
+	/*
+	 * we set wait_tb_buffers_run when we have to restore any dirty
+	 * bits cleared during wait_tb_buffers_run
 	 */
 	int wait_tb_buffers_run = 0;
 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
@@ -2347,14 +2565,15 @@
 
 	tb->fs_gen = get_generation(tb->tb_sb);
 
-	/* we prepare and log the super here so it will already be in the
-	 ** transaction when do_balance needs to change it.
-	 ** This way do_balance won't have to schedule when trying to prepare
-	 ** the super for logging
+	/*
+	 * we prepare and log the super here so it will already be in the
+	 * transaction when do_balance needs to change it.
+	 * This way do_balance won't have to schedule when trying to prepare
+	 * the super for logging
 	 */
 	reiserfs_prepare_for_journal(tb->tb_sb,
 				     SB_BUFFER_WITH_SB(tb->tb_sb), 1);
-	journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
+	journal_mark_dirty(tb->transaction_handle,
 			   SB_BUFFER_WITH_SB(tb->tb_sb));
 	if (FILESYSTEM_CHANGED_TB(tb))
 		return REPEAT_SEARCH;
@@ -2408,7 +2627,7 @@
 #endif
 
 	if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
-		// FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
+		/* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */
 		return REPEAT_SEARCH;
 
 	/* Starting from the leaf level; for all levels h of the tree. */
@@ -2427,7 +2646,10 @@
 					goto repeat;
 				if (h != MAX_HEIGHT - 1)
 					tb->insert_size[h + 1] = 0;
-				/* ok, analysis and resource gathering are complete */
+				/*
+				 * ok, analysis and resource gathering
+				 * are complete
+				 */
 				break;
 			}
 			goto repeat;
@@ -2437,15 +2659,19 @@
 		if (ret != CARRY_ON)
 			goto repeat;
 
-		/* No disk space, or schedule occurred and analysis may be
-		 * invalid and needs to be redone. */
+		/*
+		 * No disk space, or schedule occurred and analysis may be
+		 * invalid and needs to be redone.
+		 */
 		ret = get_empty_nodes(tb, h);
 		if (ret != CARRY_ON)
 			goto repeat;
 
+		/*
+		 * We have a positive insert size but no nodes exist on this
+		 * level, this means that we are creating a new root.
+		 */
 		if (!PATH_H_PBUFFER(tb->tb_path, h)) {
-			/* We have a positive insert size but no nodes exist on this
-			   level, this means that we are creating a new root. */
 
 			RFALSE(tb->blknum[h] != 1,
 			       "PAP-8350: creating new empty root");
@@ -2453,11 +2679,13 @@
 			if (h < MAX_HEIGHT - 1)
 				tb->insert_size[h + 1] = 0;
 		} else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
+			/*
+			 * The tree needs to be grown, so this node S[h]
+			 * which is the root node is split into two nodes,
+			 * and a new node (S[h+1]) will be created to
+			 * become the root node.
+			 */
 			if (tb->blknum[h] > 1) {
-				/* The tree needs to be grown, so this node S[h]
-				   which is the root node is split into two nodes,
-				   and a new node (S[h+1]) will be created to
-				   become the root node.  */
 
 				RFALSE(h == MAX_HEIGHT - 1,
 				       "PAP-8355: attempt to create too high of a tree");
@@ -2487,12 +2715,14 @@
 		goto repeat;
 	}
 
-      repeat:
-	// fix_nodes was unable to perform its calculation due to
-	// filesystem got changed under us, lack of free disk space or i/o
-	// failure. If the first is the case - the search will be
-	// repeated. For now - free all resources acquired so far except
-	// for the new allocated nodes
+repeat:
+	/*
+	 * fix_nodes was unable to perform its calculation due to
+	 * filesystem got changed under us, lack of free disk space or i/o
+	 * failure. If the first is the case - the search will be
+	 * repeated. For now - free all resources acquired so far except
+	 * for the new allocated nodes
+	 */
 	{
 		int i;
 
@@ -2548,8 +2778,6 @@
 
 }
 
-/* Anatoly will probably forgive me renaming tb to tb. I just
-   wanted to make lines shorter */
 void unfix_nodes(struct tree_balance *tb)
 {
 	int i;
@@ -2578,8 +2806,10 @@
 	for (i = 0; i < MAX_FEB_SIZE; i++) {
 		if (tb->FEB[i]) {
 			b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
-			/* de-allocated block which was not used by balancing and
-			   bforget about buffer for it */
+			/*
+			 * de-allocated block which was not used by
+			 * balancing and bforget about buffer for it
+			 */
 			brelse(tb->FEB[i]);
 			reiserfs_free_block(tb->transaction_handle, NULL,
 					    blocknr, 0);

diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 91b0cc1..7a26c4f 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c

@@ -12,12 +12,6 @@
  * Yura's function is added (04/07/2000)
  */
 
-//
-// keyed_hash
-// yura_hash
-// r5_hash
-//
-
 #include <linux/kernel.h>
 #include "reiserfs.h"
 #include <asm/types.h>
@@ -56,7 +50,7 @@
 	u32 pad;
 	int i;
 
-	//      assert(len >= 0 && len < 256);
+	/*      assert(len >= 0 && len < 256); */
 
 	pad = (u32) len | ((u32) len << 8);
 	pad |= pad << 16;
@@ -127,9 +121,10 @@
 	return h0 ^ h1;
 }
 
-/* What follows in this file is copyright 2000 by Hans Reiser, and the
- * licensing of what follows is governed by reiserfs/README */
-
+/*
+ * What follows in this file is copyright 2000 by Hans Reiser, and the
+ * licensing of what follows is governed by reiserfs/README
+ */
 u32 yura_hash(const signed char *msg, int len)
 {
 	int j, pow;

diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index e1978fd..73231b1 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c

@@ -12,7 +12,10 @@
 int balance_internal(struct tree_balance *,
 		     int, int, struct item_head *, struct buffer_head **);
 
-/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
+/*
+ * modes of internal_shift_left, internal_shift_right and
+ * internal_insert_childs
+ */
 #define INTERNAL_SHIFT_FROM_S_TO_L 0
 #define INTERNAL_SHIFT_FROM_R_TO_S 1
 #define INTERNAL_SHIFT_FROM_L_TO_S 2
@@ -32,7 +35,9 @@
 	memset(src_bi, 0, sizeof(struct buffer_info));
 	/* define dest, src, dest parent, dest position */
 	switch (shift_mode) {
-	case INTERNAL_SHIFT_FROM_S_TO_L:	/* used in internal_shift_left */
+
+	/* used in internal_shift_left */
+	case INTERNAL_SHIFT_FROM_S_TO_L:
 		src_bi->tb = tb;
 		src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
 		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
@@ -52,12 +57,14 @@
 		dest_bi->tb = tb;
 		dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
 		dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
-		dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);	/* dest position is analog of dest->b_item_order */
+		/* dest position is analog of dest->b_item_order */
+		dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
 		*d_key = tb->lkey[h];
 		*cf = tb->CFL[h];
 		break;
 
-	case INTERNAL_SHIFT_FROM_R_TO_S:	/* used in internal_shift_left */
+	/* used in internal_shift_left */
+	case INTERNAL_SHIFT_FROM_R_TO_S:
 		src_bi->tb = tb;
 		src_bi->bi_bh = tb->R[h];
 		src_bi->bi_parent = tb->FR[h];
@@ -111,7 +118,8 @@
 	}
 }
 
-/* Insert count node pointers into buffer cur before position to + 1.
+/*
+ * Insert count node pointers into buffer cur before position to + 1.
  * Insert count items into buffer cur before position to.
  * Items and node pointers are specified by inserted and bh respectively.
  */
@@ -146,14 +154,14 @@
 
 	/* copy to_be_insert disk children */
 	for (i = 0; i < count; i++) {
-		put_dc_size(&(new_dc[i]),
+		put_dc_size(&new_dc[i],
 			    MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
-		put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
+		put_dc_block_number(&new_dc[i], bh[i]->b_blocknr);
 	}
 	memcpy(dc, new_dc, DC_SIZE * count);
 
 	/* prepare space for count items  */
-	ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
+	ih = internal_key(cur, ((to == -1) ? 0 : to));
 
 	memmove(ih + count, ih,
 		(nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
@@ -190,8 +198,10 @@
 
 }
 
-/* Delete del_num items and node pointers from buffer cur starting from *
- * the first_i'th item and first_p'th pointers respectively.		*/
+/*
+ * Delete del_num items and node pointers from buffer cur starting from
+ * the first_i'th item and first_p'th pointers respectively.
+ */
 static void internal_delete_pointers_items(struct buffer_info *cur_bi,
 					   int first_p,
 					   int first_i, int del_num)
@@ -233,7 +243,7 @@
 	dc = B_N_CHILD(cur, first_p);
 
 	memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
-	key = B_N_PDELIM_KEY(cur, first_i);
+	key = internal_key(cur, first_i);
 	memmove(key, key + del_num,
 		(nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
 						       del_num) * DC_SIZE);
@@ -270,22 +280,30 @@
 
 	i_from = (from == 0) ? from : from - 1;
 
-	/* delete n pointers starting from `from' position in CUR;
-	   delete n keys starting from 'i_from' position in CUR;
+	/*
+	 * delete n pointers starting from `from' position in CUR;
+	 * delete n keys starting from 'i_from' position in CUR;
 	 */
 	internal_delete_pointers_items(cur_bi, from, i_from, n);
 }
 
-/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
-* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
- * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
+/*
+ * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer
+ * dest
+ * last_first == FIRST_TO_LAST means that we copy first items
+ *                             from src to tail of dest
+ * last_first == LAST_TO_FIRST means that we copy last items
+ *                             from src to head of dest
  */
 static void internal_copy_pointers_items(struct buffer_info *dest_bi,
 					 struct buffer_head *src,
 					 int last_first, int cpy_num)
 {
-	/* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
-	 * as delimiting key have already inserted to buffer dest.*/
+	/*
+	 * ATTENTION! Number of node pointers in DEST is equal to number
+	 * of items in DEST  as delimiting key have already inserted to
+	 * buffer dest.
+	 */
 	struct buffer_head *dest = dest_bi->bi_bh;
 	int nr_dest, nr_src;
 	int dest_order, src_order;
@@ -330,13 +348,13 @@
 	memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
 
 	/* prepare space for cpy_num - 1 item headers */
-	key = B_N_PDELIM_KEY(dest, dest_order);
+	key = internal_key(dest, dest_order);
 	memmove(key + cpy_num - 1, key,
 		KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
 							       cpy_num));
 
 	/* insert headers */
-	memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
+	memcpy(key, internal_key(src, src_order), KEY_SIZE * (cpy_num - 1));
 
 	/* sizes, item number */
 	set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
@@ -366,7 +384,9 @@
 
 }
 
-/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
+/*
+ * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to
+ * buffer dest.
  * Delete cpy_num - del_par items and node pointers from buffer src.
  * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
  * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
@@ -385,8 +405,10 @@
 	if (last_first == FIRST_TO_LAST) {	/* shift_left occurs */
 		first_pointer = 0;
 		first_item = 0;
-		/* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
-		   for key - with first_item */
+		/*
+		 * delete cpy_num - del_par pointers and keys starting for
+		 * pointers with first_pointer, for key - with first_item
+		 */
 		internal_delete_pointers_items(src_bi, first_pointer,
 					       first_item, cpy_num - del_par);
 	} else {		/* shift_right occurs */
@@ -404,7 +426,9 @@
 }
 
 /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
-static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before,	/* insert key before key with n_dest number */
+static void internal_insert_key(struct buffer_info *dest_bi,
+				/* insert key before key with n_dest number */
+				int dest_position_before,
 				struct buffer_head *src, int src_position)
 {
 	struct buffer_head *dest = dest_bi->bi_bh;
@@ -429,12 +453,12 @@
 	nr = blkh_nr_item(blkh);
 
 	/* prepare space for inserting key */
-	key = B_N_PDELIM_KEY(dest, dest_position_before);
+	key = internal_key(dest, dest_position_before);
 	memmove(key + 1, key,
 		(nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
 
 	/* insert key */
-	memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
+	memcpy(key, internal_key(src, src_position), KEY_SIZE);
 
 	/* Change dirt, free space, item number fields. */
 
@@ -453,13 +477,19 @@
 	}
 }
 
-/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
- * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
+/*
+ * Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
+ * Copy pointer_amount node pointers and pointer_amount - 1 items from
+ * buffer src to buffer dest.
  * Replace  d_key'th key in buffer cfl.
  * Delete pointer_amount items and node pointers from buffer src.
  */
 /* this can be invoked both to shift from S to L and from R to S */
-static void internal_shift_left(int mode,	/* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
+static void internal_shift_left(
+				/*
+				 * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S
+				 */
+				int mode,
 				struct tree_balance *tb,
 				int h, int pointer_amount)
 {
@@ -473,7 +503,10 @@
 	/*printk("pointer_amount = %d\n",pointer_amount); */
 
 	if (pointer_amount) {
-		/* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
+		/*
+		 * insert delimiting key from common father of dest and
+		 * src to node dest into position B_NR_ITEM(dest)
+		 */
 		internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
 				    d_key_position);
 
@@ -492,7 +525,8 @@
 
 }
 
-/* Insert delimiting key to L[h].
+/*
+ * Insert delimiting key to L[h].
  * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
  * Delete n - 1 items and node pointers from buffer S[h].
  */
@@ -507,23 +541,27 @@
 	internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
 				       &dest_bi, &src_bi, &d_key_position, &cf);
 
-	if (pointer_amount > 0)	/* insert lkey[h]-th key  from CFL[h] to left neighbor L[h] */
+	/* insert lkey[h]-th key  from CFL[h] to left neighbor L[h] */
+	if (pointer_amount > 0)
 		internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
 				    d_key_position);
-	/*            internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
 
 	/* last parameter is del_parameter */
 	internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
 				     pointer_amount, 1);
-	/*    internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
 }
 
-/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
+/*
+ * Insert d_key'th (delimiting) key from buffer cfr to head of dest.
  * Copy n node pointers and n - 1 items from buffer src to buffer dest.
  * Replace  d_key'th key in buffer cfr.
  * Delete n items and node pointers from buffer src.
  */
-static void internal_shift_right(int mode,	/* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
+static void internal_shift_right(
+				 /*
+				  * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S
+				  */
+				 int mode,
 				 struct tree_balance *tb,
 				 int h, int pointer_amount)
 {
@@ -538,7 +576,10 @@
 	nr = B_NR_ITEMS(src_bi.bi_bh);
 
 	if (pointer_amount > 0) {
-		/* insert delimiting key from common father of dest and src to dest node into position 0 */
+		/*
+		 * insert delimiting key from common father of dest
+		 * and src to dest node into position 0
+		 */
 		internal_insert_key(&dest_bi, 0, cf, d_key_position);
 		if (nr == pointer_amount - 1) {
 			RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
@@ -559,7 +600,8 @@
 				     pointer_amount, 0);
 }
 
-/* Insert delimiting key to R[h].
+/*
+ * Insert delimiting key to R[h].
  * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
  * Delete n - 1 items and node pointers from buffer S[h].
  */
@@ -574,18 +616,19 @@
 	internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
 				       &dest_bi, &src_bi, &d_key_position, &cf);
 
-	if (pointer_amount > 0)	/* insert rkey from CFR[h] to right neighbor R[h] */
+	/* insert rkey from CFR[h] to right neighbor R[h] */
+	if (pointer_amount > 0)
 		internal_insert_key(&dest_bi, 0, cf, d_key_position);
-	/*            internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
 
 	/* last parameter is del_parameter */
 	internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
 				     pointer_amount, 1);
-	/*    internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
 }
 
-/* Delete insert_num node pointers together with their left items
- * and balance current node.*/
+/*
+ * Delete insert_num node pointers together with their left items
+ * and balance current node.
+ */
 static void balance_internal_when_delete(struct tree_balance *tb,
 					 int h, int child_pos)
 {
@@ -626,9 +669,11 @@
 				new_root = tb->R[h - 1];
 			else
 				new_root = tb->L[h - 1];
-			/* switch super block's tree root block number to the new value */
+			/*
+			 * switch super block's tree root block
+			 * number to the new value */
 			PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
-			//REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
+			/*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */
 			PUT_SB_TREE_HEIGHT(tb->tb_sb,
 					   SB_TREE_HEIGHT(tb->tb_sb) - 1);
 
@@ -636,8 +681,8 @@
 						 REISERFS_SB(tb->tb_sb)->s_sbh,
 						 1);
 			/*&&&&&&&&&&&&&&&&&&&&&& */
+			/* use check_internal if new root is an internal node */
 			if (h > 1)
-				/* use check_internal if new root is an internal node */
 				check_internal(new_root);
 			/*&&&&&&&&&&&&&&&&&&&&&& */
 
@@ -648,7 +693,8 @@
 		return;
 	}
 
-	if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {	/* join S[h] with L[h] */
+	/* join S[h] with L[h] */
+	if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {
 
 		RFALSE(tb->rnum[h] != 0,
 		       "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
@@ -660,7 +706,8 @@
 		return;
 	}
 
-	if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {	/* join S[h] with R[h] */
+	/* join S[h] with R[h] */
+	if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {
 		RFALSE(tb->lnum[h] != 0,
 		       "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
 		       h, tb->lnum[h]);
@@ -671,17 +718,18 @@
 		return;
 	}
 
-	if (tb->lnum[h] < 0) {	/* borrow from left neighbor L[h] */
+	/* borrow from left neighbor L[h] */
+	if (tb->lnum[h] < 0) {
 		RFALSE(tb->rnum[h] != 0,
 		       "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
 		       tb->rnum[h]);
-		/*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
 		internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
 				     -tb->lnum[h]);
 		return;
 	}
 
-	if (tb->rnum[h] < 0) {	/* borrow from right neighbor R[h] */
+	/* borrow from right neighbor R[h] */
+	if (tb->rnum[h] < 0) {
 		RFALSE(tb->lnum[h] != 0,
 		       "invalid tb->lnum[%d]==%d when borrow from R[h]",
 		       h, tb->lnum[h]);
@@ -689,7 +737,8 @@
 		return;
 	}
 
-	if (tb->lnum[h] > 0) {	/* split S[h] into two parts and put them into neighbors */
+	/* split S[h] into two parts and put them into neighbors */
+	if (tb->lnum[h] > 0) {
 		RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
 		       "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
 		       h, tb->lnum[h], h, tb->rnum[h], n);
@@ -717,7 +766,7 @@
 	if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
 		return;
 
-	memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
+	memcpy(internal_key(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
 
 	do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
 }
@@ -732,34 +781,41 @@
 	       "R[h] can not be empty if it exists (item number=%d)",
 	       B_NR_ITEMS(tb->R[h]));
 
-	memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
+	memcpy(internal_key(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
 
 	do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
 }
 
-int balance_internal(struct tree_balance *tb,	/* tree_balance structure               */
-		     int h,	/* level of the tree                    */
-		     int child_pos, struct item_head *insert_key,	/* key for insertion on higher level    */
-		     struct buffer_head **insert_ptr	/* node for insertion on higher level */
-    )
-    /* if inserting/pasting
-       {
-       child_pos is the position of the node-pointer in S[h] that        *
-       pointed to S[h-1] before balancing of the h-1 level;              *
-       this means that new pointers and items must be inserted AFTER *
-       child_pos
-       }
-       else
-       {
-       it is the position of the leftmost pointer that must be deleted (together with
-       its corresponding key to the left of the pointer)
-       as a result of the previous level's balancing.
-       }
-     */
+
+/*
+ * if inserting/pasting {
+ *   child_pos is the position of the node-pointer in S[h] that
+ *   pointed to S[h-1] before balancing of the h-1 level;
+ *   this means that new pointers and items must be inserted AFTER
+ *   child_pos
+ * } else {
+ *   it is the position of the leftmost pointer that must be deleted
+ *   (together with its corresponding key to the left of the pointer)
+ *   as a result of the previous level's balancing.
+ * }
+ */
+
+int balance_internal(struct tree_balance *tb,
+		     int h,	/* level of the tree */
+		     int child_pos,
+		     /* key for insertion on higher level    */
+		     struct item_head *insert_key,
+		     /* node for insertion on higher level */
+		     struct buffer_head **insert_ptr)
 {
 	struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
 	struct buffer_info bi;
-	int order;		/* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
+
+	/*
+	 * we return this: it is 0 if there is no S[h],
+	 * else it is tb->S[h]->b_item_order
+	 */
+	int order;
 	int insert_num, n, k;
 	struct buffer_head *S_new;
 	struct item_head new_insert_key;
@@ -774,8 +830,10 @@
 	    (tbSh) ? PATH_H_POSITION(tb->tb_path,
 				     h + 1) /*tb->S[h]->b_item_order */ : 0;
 
-	/* Using insert_size[h] calculate the number insert_num of items
-	   that must be inserted to or deleted from S[h]. */
+	/*
+	 * Using insert_size[h] calculate the number insert_num of items
+	 * that must be inserted to or deleted from S[h].
+	 */
 	insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
 
 	/* Check whether insert_num is proper * */
@@ -794,23 +852,21 @@
 
 	k = 0;
 	if (tb->lnum[h] > 0) {
-		/* shift lnum[h] items from S[h] to the left neighbor L[h].
-		   check how many of new items fall into L[h] or CFL[h] after
-		   shifting */
+		/*
+		 * shift lnum[h] items from S[h] to the left neighbor L[h].
+		 * check how many of new items fall into L[h] or CFL[h] after
+		 * shifting
+		 */
 		n = B_NR_ITEMS(tb->L[h]);	/* number of items in L[h] */
 		if (tb->lnum[h] <= child_pos) {
 			/* new items don't fall into L[h] or CFL[h] */
 			internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
 					    tb->lnum[h]);
-			/*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
 			child_pos -= tb->lnum[h];
 		} else if (tb->lnum[h] > child_pos + insert_num) {
 			/* all new items fall into L[h] */
 			internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
 					    tb->lnum[h] - insert_num);
-			/*                  internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
-			   tb->lnum[h]-insert_num);
-			 */
 			/* insert insert_num keys and node-pointers into L[h] */
 			bi.tb = tb;
 			bi.bi_bh = tb->L[h];
@@ -826,7 +882,10 @@
 		} else {
 			struct disk_child *dc;
 
-			/* some items fall into L[h] or CFL[h], but some don't fall */
+			/*
+			 * some items fall into L[h] or CFL[h],
+			 * but some don't fall
+			 */
 			internal_shift1_left(tb, h, child_pos + 1);
 			/* calculate number of new items that fall into L[h] */
 			k = tb->lnum[h] - child_pos - 1;
@@ -841,7 +900,10 @@
 
 			replace_lkey(tb, h, insert_key + k);
 
-			/* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
+			/*
+			 * replace the first node-ptr in S[h] by
+			 * node-ptr to insert_ptr[k]
+			 */
 			dc = B_N_CHILD(tbSh, 0);
 			put_dc_size(dc,
 				    MAX_CHILD_SIZE(insert_ptr[k]) -
@@ -860,17 +922,17 @@
 	/* tb->lnum[h] > 0 */
 	if (tb->rnum[h] > 0) {
 		/*shift rnum[h] items from S[h] to the right neighbor R[h] */
-		/* check how many of new items fall into R or CFR after shifting */
+		/*
+		 * check how many of new items fall into R or CFR
+		 * after shifting
+		 */
 		n = B_NR_ITEMS(tbSh);	/* number of items in S[h] */
 		if (n - tb->rnum[h] >= child_pos)
 			/* new items fall into S[h] */
-			/*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
 			internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
 					     tb->rnum[h]);
 		else if (n + insert_num - tb->rnum[h] < child_pos) {
 			/* all new items fall into R[h] */
-			/*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
-			   tb->rnum[h] - insert_num); */
 			internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
 					     tb->rnum[h] - insert_num);
 
@@ -904,7 +966,10 @@
 
 			replace_rkey(tb, h, insert_key + insert_num - k - 1);
 
-			/* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
+			/*
+			 * replace the first node-ptr in R[h] by
+			 * node-ptr insert_ptr[insert_num-k-1]
+			 */
 			dc = B_N_CHILD(tb->R[h], 0);
 			put_dc_size(dc,
 				    MAX_CHILD_SIZE(insert_ptr
@@ -921,7 +986,7 @@
 		}
 	}
 
-    /** Fill new node that appears instead of S[h] **/
+	/** Fill new node that appears instead of S[h] **/
 	RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
 	RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
 
@@ -997,26 +1062,30 @@
 			/* new items don't fall into S_new */
 			/*  store the delimiting key for the next level */
 			/* new_insert_key = (n - snum)'th key in S[h] */
-			memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
+			memcpy(&new_insert_key, internal_key(tbSh, n - snum),
 			       KEY_SIZE);
 			/* last parameter is del_par */
 			internal_move_pointers_items(&dest_bi, &src_bi,
 						     LAST_TO_FIRST, snum, 0);
-			/*            internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
 		} else if (n + insert_num - snum < child_pos) {
 			/* all new items fall into S_new */
 			/*  store the delimiting key for the next level */
-			/* new_insert_key = (n + insert_item - snum)'th key in S[h] */
+			/*
+			 * new_insert_key = (n + insert_item - snum)'th
+			 * key in S[h]
+			 */
 			memcpy(&new_insert_key,
-			       B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
+			       internal_key(tbSh, n + insert_num - snum),
 			       KEY_SIZE);
 			/* last parameter is del_par */
 			internal_move_pointers_items(&dest_bi, &src_bi,
 						     LAST_TO_FIRST,
 						     snum - insert_num, 0);
-			/*                  internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
 
-			/* insert insert_num keys and node-pointers into S_new */
+			/*
+			 * insert insert_num keys and node-pointers
+			 * into S_new
+			 */
 			internal_insert_childs(&dest_bi,
 					       /*S_new,tb->S[h-1]->b_next, */
 					       child_pos - n - insert_num +
@@ -1033,7 +1102,6 @@
 			internal_move_pointers_items(&dest_bi, &src_bi,
 						     LAST_TO_FIRST,
 						     n - child_pos + 1, 1);
-			/*                  internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
 			/* calculate number of new items that fall into S_new */
 			k = snum - n + child_pos - 1;
 
@@ -1043,7 +1111,10 @@
 			/* new_insert_key = insert_key[insert_num - k - 1] */
 			memcpy(&new_insert_key, insert_key + insert_num - k - 1,
 			       KEY_SIZE);
-			/* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
+			/*
+			 * replace first node-ptr in S_new by node-ptr
+			 * to insert_ptr[insert_num-k-1]
+			 */
 
 			dc = B_N_CHILD(S_new, 0);
 			put_dc_size(dc,
@@ -1066,7 +1137,7 @@
 		       || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
 		       S_new);
 
-		// S_new is released in unfix_nodes
+		/* S_new is released in unfix_nodes */
 	}
 
 	n = B_NR_ITEMS(tbSh);	/*number of items in S[h] */

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index bc8b800..63b2b0e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c

@@ -25,7 +25,10 @@
 
 void reiserfs_evict_inode(struct inode *inode)
 {
-	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
+	/*
+	 * We need blocks for transaction + (user+group) quota
+	 * update (possibly delete)
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 2 +
 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
@@ -39,8 +42,12 @@
 	if (inode->i_nlink)
 		goto no_delete;
 
-	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
-	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
+	/*
+	 * The = 0 happens when we abort creating a new inode
+	 * for some reason like lack of space..
+	 * also handles bad_inode case
+	 */
+	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {
 
 		reiserfs_delete_xattrs(inode);
 
@@ -54,34 +61,43 @@
 
 		err = reiserfs_delete_object(&th, inode);
 
-		/* Do quota update inside a transaction for journaled quotas. We must do that
-		 * after delete_object so that quota updates go into the same transaction as
-		 * stat data deletion */
+		/*
+		 * Do quota update inside a transaction for journaled quotas.
+		 * We must do that after delete_object so that quota updates
+		 * go into the same transaction as stat data deletion
+		 */
 		if (!err) {
 			int depth = reiserfs_write_unlock_nested(inode->i_sb);
 			dquot_free_inode(inode);
 			reiserfs_write_lock_nested(inode->i_sb, depth);
 		}
 
-		if (journal_end(&th, inode->i_sb, jbegin_count))
+		if (journal_end(&th))
 			goto out;
 
-		/* check return value from reiserfs_delete_object after
+		/*
+		 * check return value from reiserfs_delete_object after
 		 * ending the transaction
 		 */
 		if (err)
 		    goto out;
 
-		/* all items of file are deleted, so we can remove "save" link */
-		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
-								 * about an error here */
+		/*
+		 * all items of file are deleted, so we can remove
+		 * "save" link
+		 * we can't do anything about an error here
+		 */
+		remove_save_link(inode, 0 /* not truncate */);
 out:
 		reiserfs_write_unlock(inode->i_sb);
 	} else {
 		/* no object items are in the tree */
 		;
 	}
-	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
+
+	/* note this must go after the journal_end to prevent deadlock */
+	clear_inode(inode);
+
 	dquot_drop(inode);
 	inode->i_blocks = 0;
 	return;
@@ -103,8 +119,10 @@
 	key->key_length = length;
 }
 
-/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
-   offset and type of key */
+/*
+ * take base of inode_key (it comes from inode always) (dirid, objectid)
+ * and version from an inode, set offset and type of key
+ */
 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
 		  int type, int length)
 {
@@ -114,9 +132,7 @@
 		      length);
 }
 
-//
-// when key is 0, do not set version and short key
-//
+/* when key is 0, do not set version and short key */
 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
 			      int version,
 			      loff_t offset, int type, int length,
@@ -132,43 +148,47 @@
 	set_le_ih_k_type(ih, type);
 	put_ih_item_len(ih, length);
 	/*    set_ih_free_space (ih, 0); */
-	// for directory items it is entry count, for directs and stat
-	// datas - 0xffff, for indirects - 0
+	/*
+	 * for directory items it is entry count, for directs and stat
+	 * datas - 0xffff, for indirects - 0
+	 */
 	put_ih_entry_count(ih, entry_count);
 }
 
-//
-// FIXME: we might cache recently accessed indirect item
+/*
+ * FIXME: we might cache recently accessed indirect item
+ * Ugh.  Not too eager for that....
+ * I cut the code until such time as I see a convincing argument (benchmark).
+ * I don't want a bloated inode struct..., and I don't like code complexity....
+ */
 
-// Ugh.  Not too eager for that....
-//  I cut the code until such time as I see a convincing argument (benchmark).
-// I don't want a bloated inode struct..., and I don't like code complexity....
+/*
+ * cutting the code is fine, since it really isn't in use yet and is easy
+ * to add back in.  But, Vladimir has a really good idea here.  Think
+ * about what happens for reading a file.  For each page,
+ * The VFS layer calls reiserfs_readpage, who searches the tree to find
+ * an indirect item.  This indirect item has X number of pointers, where
+ * X is a big number if we've done the block allocation right.  But,
+ * we only use one or two of these pointers during each call to readpage,
+ * needlessly researching again later on.
+ *
+ * The size of the cache could be dynamic based on the size of the file.
+ *
+ * I'd also like to see us cache the location the stat data item, since
+ * we are needlessly researching for that frequently.
+ *
+ * --chris
+ */
 
-/* cutting the code is fine, since it really isn't in use yet and is easy
-** to add back in.  But, Vladimir has a really good idea here.  Think
-** about what happens for reading a file.  For each page,
-** The VFS layer calls reiserfs_readpage, who searches the tree to find
-** an indirect item.  This indirect item has X number of pointers, where
-** X is a big number if we've done the block allocation right.  But,
-** we only use one or two of these pointers during each call to readpage,
-** needlessly researching again later on.
-**
-** The size of the cache could be dynamic based on the size of the file.
-**
-** I'd also like to see us cache the location the stat data item, since
-** we are needlessly researching for that frequently.
-**
-** --chris
-*/
-
-/* If this page has a file tail in it, and
-** it was read in by get_block_create_0, the page data is valid,
-** but tail is still sitting in a direct item, and we can't write to
-** it.  So, look through this page, and check all the mapped buffers
-** to make sure they have valid block numbers.  Any that don't need
-** to be unmapped, so that __block_write_begin will correctly call
-** reiserfs_get_block to convert the tail into an unformatted node
-*/
+/*
+ * If this page has a file tail in it, and
+ * it was read in by get_block_create_0, the page data is valid,
+ * but tail is still sitting in a direct item, and we can't write to
+ * it.  So, look through this page, and check all the mapped buffers
+ * to make sure they have valid block numbers.  Any that don't need
+ * to be unmapped, so that __block_write_begin will correctly call
+ * reiserfs_get_block to convert the tail into an unformatted node
+ */
 static inline void fix_tail_page_for_writing(struct page *page)
 {
 	struct buffer_head *head, *next, *bh;
@@ -186,8 +206,10 @@
 	}
 }
 
-/* reiserfs_get_block does not need to allocate a block only if it has been
-   done already or non-hole position has been found in the indirect item */
+/*
+ * reiserfs_get_block does not need to allocate a block only if it has been
+ * done already or non-hole position has been found in the indirect item
+ */
 static inline int allocation_needed(int retval, b_blocknr_t allocated,
 				    struct item_head *ih,
 				    __le32 * item, int pos_in_item)
@@ -211,14 +233,16 @@
 	map_bh(bh, inode->i_sb, block);
 }
 
-//
-// files which were created in the earlier version can not be longer,
-// than 2 gb
-//
+/*
+ * files which were created in the earlier version can not be longer,
+ * than 2 gb
+ */
 static int file_capable(struct inode *inode, sector_t block)
 {
-	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
-	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
+	/* it is new file. */
+	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||
+	    /* old file, but 'block' is inside of 2gb */
+	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))
 		return 1;
 
 	return 0;
@@ -228,7 +252,6 @@
 			       struct inode *inode, struct treepath *path)
 {
 	struct super_block *s = th->t_super;
-	int len = th->t_blocks_allocated;
 	int err;
 
 	BUG_ON(!th->t_trans_id);
@@ -241,7 +264,7 @@
 		return 0;
 	}
 	reiserfs_update_sd(th, inode);
-	err = journal_end(th, s, len);
+	err = journal_end(th);
 	if (!err) {
 		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
 		if (!err)
@@ -250,14 +273,14 @@
 	return err;
 }
 
-// it is called by get_block when create == 0. Returns block number
-// for 'block'-th logical block of file. When it hits direct item it
-// returns 0 (being called from bmap) or read direct item into piece
-// of page (bh_result)
-
-// Please improve the english/clarity in the comment above, as it is
-// hard to understand.
-
+/*
+ * it is called by get_block when create == 0. Returns block number
+ * for 'block'-th logical block of file. When it hits direct item it
+ * returns 0 (being called from bmap) or read direct item into piece
+ * of page (bh_result)
+ * Please improve the english/clarity in the comment above, as it is
+ * hard to understand.
+ */
 static int _get_block_create_0(struct inode *inode, sector_t block,
 			       struct buffer_head *bh_result, int args)
 {
@@ -273,7 +296,7 @@
 	int done = 0;
 	unsigned long offset;
 
-	// prepare the key to look for the 'block'-th block of file
+	/* prepare the key to look for the 'block'-th block of file */
 	make_cpu_key(&key, inode,
 		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
 		     3);
@@ -285,23 +308,28 @@
 			kunmap(bh_result->b_page);
 		if (result == IO_ERROR)
 			return -EIO;
-		// We do not return -ENOENT if there is a hole but page is uptodate, because it means
-		// That there is some MMAPED data associated with it that is yet to be written to disk.
+		/*
+		 * We do not return -ENOENT if there is a hole but page is
+		 * uptodate, because it means that there is some MMAPED data
+		 * associated with it that is yet to be written to disk.
+		 */
 		if ((args & GET_BLOCK_NO_HOLE)
 		    && !PageUptodate(bh_result->b_page)) {
 			return -ENOENT;
 		}
 		return 0;
 	}
-	//
-	bh = get_last_bh(&path);
-	ih = get_ih(&path);
-	if (is_indirect_le_ih(ih)) {
-		__le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
 
-		/* FIXME: here we could cache indirect item or part of it in
-		   the inode to avoid search_by_key in case of subsequent
-		   access to file */
+	bh = get_last_bh(&path);
+	ih = tp_item_head(&path);
+	if (is_indirect_le_ih(ih)) {
+		__le32 *ind_item = (__le32 *) ih_item_body(bh, ih);
+
+		/*
+		 * FIXME: here we could cache indirect item or part of it in
+		 * the inode to avoid search_by_key in case of subsequent
+		 * access to file
+		 */
 		blocknr = get_block_num(ind_item, path.pos_in_item);
 		ret = 0;
 		if (blocknr) {
@@ -311,8 +339,12 @@
 				set_buffer_boundary(bh_result);
 			}
 		} else
-			// We do not return -ENOENT if there is a hole but page is uptodate, because it means
-			// That there is some MMAPED data associated with it that is yet to  be written to disk.
+			/*
+			 * We do not return -ENOENT if there is a hole but
+			 * page is uptodate, because it means that there is
+			 * some MMAPED data associated with it that is
+			 * yet to be written to disk.
+			 */
 		if ((args & GET_BLOCK_NO_HOLE)
 			    && !PageUptodate(bh_result->b_page)) {
 			ret = -ENOENT;
@@ -323,41 +355,45 @@
 			kunmap(bh_result->b_page);
 		return ret;
 	}
-	// requested data are in direct item(s)
+	/* requested data are in direct item(s) */
 	if (!(args & GET_BLOCK_READ_DIRECT)) {
-		// we are called by bmap. FIXME: we can not map block of file
-		// when it is stored in direct item(s)
+		/*
+		 * we are called by bmap. FIXME: we can not map block of file
+		 * when it is stored in direct item(s)
+		 */
 		pathrelse(&path);
 		if (p)
 			kunmap(bh_result->b_page);
 		return -ENOENT;
 	}
 
-	/* if we've got a direct item, and the buffer or page was uptodate,
-	 ** we don't want to pull data off disk again.  skip to the
-	 ** end, where we map the buffer and return
+	/*
+	 * if we've got a direct item, and the buffer or page was uptodate,
+	 * we don't want to pull data off disk again.  skip to the
+	 * end, where we map the buffer and return
 	 */
 	if (buffer_uptodate(bh_result)) {
 		goto finished;
 	} else
 		/*
-		 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
-		 ** pages without any buffers.  If the page is up to date, we don't want
-		 ** read old data off disk.  Set the up to date bit on the buffer instead
-		 ** and jump to the end
+		 * grab_tail_page can trigger calls to reiserfs_get_block on
+		 * up to date pages without any buffers.  If the page is up
+		 * to date, we don't want read old data off disk.  Set the up
+		 * to date bit on the buffer instead and jump to the end
 		 */
 	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
 		set_buffer_uptodate(bh_result);
 		goto finished;
 	}
-	// read file tail into part of page
+	/* read file tail into part of page */
 	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
 	copy_item_head(&tmp_ih, ih);
 
-	/* we only want to kmap if we are reading the tail into the page.
-	 ** this is not the common case, so we don't kmap until we are
-	 ** sure we need to.  But, this means the item might move if
-	 ** kmap schedules
+	/*
+	 * we only want to kmap if we are reading the tail into the page.
+	 * this is not the common case, so we don't kmap until we are
+	 * sure we need to.  But, this means the item might move if
+	 * kmap schedules
 	 */
 	if (!p)
 		p = (char *)kmap(bh_result->b_page);
@@ -368,10 +404,11 @@
 		if (!is_direct_le_ih(ih)) {
 			BUG();
 		}
-		/* make sure we don't read more bytes than actually exist in
-		 ** the file.  This can happen in odd cases where i_size isn't
-		 ** correct, and when direct item padding results in a few
-		 ** extra bytes at the end of the direct item
+		/*
+		 * make sure we don't read more bytes than actually exist in
+		 * the file.  This can happen in odd cases where i_size isn't
+		 * correct, and when direct item padding results in a few
+		 * extra bytes at the end of the direct item
 		 */
 		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
 			break;
@@ -383,40 +420,43 @@
 		} else {
 			chars = ih_item_len(ih) - path.pos_in_item;
 		}
-		memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
+		memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars);
 
 		if (done)
 			break;
 
 		p += chars;
 
+		/*
+		 * we done, if read direct item is not the last item of
+		 * node FIXME: we could try to check right delimiting key
+		 * to see whether direct item continues in the right
+		 * neighbor or rely on i_size
+		 */
 		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
-			// we done, if read direct item is not the last item of
-			// node FIXME: we could try to check right delimiting key
-			// to see whether direct item continues in the right
-			// neighbor or rely on i_size
 			break;
 
-		// update key to look for the next piece
+		/* update key to look for the next piece */
 		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
 		result = search_for_position_by_key(inode->i_sb, &key, &path);
 		if (result != POSITION_FOUND)
-			// i/o error most likely
+			/* i/o error most likely */
 			break;
 		bh = get_last_bh(&path);
-		ih = get_ih(&path);
+		ih = tp_item_head(&path);
 	} while (1);
 
 	flush_dcache_page(bh_result->b_page);
 	kunmap(bh_result->b_page);
 
-      finished:
+finished:
 	pathrelse(&path);
 
 	if (result == IO_ERROR)
 		return -EIO;
 
-	/* this buffer has valid data, but isn't valid for io.  mapping it to
+	/*
+	 * this buffer has valid data, but isn't valid for io.  mapping it to
 	 * block #0 tells the rest of reiserfs it just has a tail in it
 	 */
 	map_bh(bh_result, inode->i_sb, 0);
@@ -424,8 +464,10 @@
 	return 0;
 }
 
-// this is called to create file map. So, _get_block_create_0 will not
-// read direct item
+/*
+ * this is called to create file map. So, _get_block_create_0 will not
+ * read direct item
+ */
 static int reiserfs_bmap(struct inode *inode, sector_t block,
 			 struct buffer_head *bh_result, int create)
 {
@@ -439,22 +481,23 @@
 	return 0;
 }
 
-/* special version of get_block that is only used by grab_tail_page right
-** now.  It is sent to __block_write_begin, and when you try to get a
-** block past the end of the file (or a block from a hole) it returns
-** -ENOENT instead of a valid buffer.  __block_write_begin expects to
-** be able to do i/o on the buffers returned, unless an error value
-** is also returned.
-**
-** So, this allows __block_write_begin to be used for reading a single block
-** in a page.  Where it does not produce a valid page for holes, or past the
-** end of the file.  This turns out to be exactly what we need for reading
-** tails for conversion.
-**
-** The point of the wrapper is forcing a certain value for create, even
-** though the VFS layer is calling this function with create==1.  If you
-** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
-** don't use this function.
+/*
+ * special version of get_block that is only used by grab_tail_page right
+ * now.  It is sent to __block_write_begin, and when you try to get a
+ * block past the end of the file (or a block from a hole) it returns
+ * -ENOENT instead of a valid buffer.  __block_write_begin expects to
+ * be able to do i/o on the buffers returned, unless an error value
+ * is also returned.
+ *
+ * So, this allows __block_write_begin to be used for reading a single block
+ * in a page.  Where it does not produce a valid page for holes, or past the
+ * end of the file.  This turns out to be exactly what we need for reading
+ * tails for conversion.
+ *
+ * The point of the wrapper is forcing a certain value for create, even
+ * though the VFS layer is calling this function with create==1.  If you
+ * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
+ * don't use this function.
 */
 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
 				       struct buffer_head *bh_result,
@@ -463,8 +506,10 @@
 	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
 }
 
-/* This is special helper for reiserfs_get_block in case we are executing
-   direct_IO request. */
+/*
+ * This is special helper for reiserfs_get_block in case we are executing
+ * direct_IO request.
+ */
 static int reiserfs_get_blocks_direct_io(struct inode *inode,
 					 sector_t iblock,
 					 struct buffer_head *bh_result,
@@ -474,9 +519,11 @@
 
 	bh_result->b_page = NULL;
 
-	/* We set the b_size before reiserfs_get_block call since it is
-	   referenced in convert_tail_for_hole() that may be called from
-	   reiserfs_get_block() */
+	/*
+	 * We set the b_size before reiserfs_get_block call since it is
+	 * referenced in convert_tail_for_hole() that may be called from
+	 * reiserfs_get_block()
+	 */
 	bh_result->b_size = (1 << inode->i_blkbits);
 
 	ret = reiserfs_get_block(inode, iblock, bh_result,
@@ -486,14 +533,18 @@
 
 	/* don't allow direct io onto tail pages */
 	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
-		/* make sure future calls to the direct io funcs for this offset
-		 ** in the file fail by unmapping the buffer
+		/*
+		 * make sure future calls to the direct io funcs for this
+		 * offset in the file fail by unmapping the buffer
 		 */
 		clear_buffer_mapped(bh_result);
 		ret = -EINVAL;
 	}
-	/* Possible unpacked tail. Flush the data before pages have
-	   disappeared */
+
+	/*
+	 * Possible unpacked tail. Flush the data before pages have
+	 * disappeared
+	 */
 	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
 		int err;
 
@@ -507,20 +558,20 @@
 		if (err < 0)
 			ret = err;
 	}
-      out:
+out:
 	return ret;
 }
 
 /*
-** helper function for when reiserfs_get_block is called for a hole
-** but the file tail is still in a direct item
-** bh_result is the buffer head for the hole
-** tail_offset is the offset of the start of the tail in the file
-**
-** This calls prepare_write, which will start a new transaction
-** you should not be in a transaction, or have any paths held when you
-** call this.
-*/
+ * helper function for when reiserfs_get_block is called for a hole
+ * but the file tail is still in a direct item
+ * bh_result is the buffer head for the hole
+ * tail_offset is the offset of the start of the tail in the file
+ *
+ * This calls prepare_write, which will start a new transaction
+ * you should not be in a transaction, or have any paths held when you
+ * call this.
+ */
 static int convert_tail_for_hole(struct inode *inode,
 				 struct buffer_head *bh_result,
 				 loff_t tail_offset)
@@ -540,9 +591,10 @@
 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
 
 	index = tail_offset >> PAGE_CACHE_SHIFT;
-	/* hole_page can be zero in case of direct_io, we are sure
-	   that we cannot get here if we write with O_DIRECT into
-	   tail page */
+	/*
+	 * hole_page can be zero in case of direct_io, we are sure
+	 * that we cannot get here if we write with O_DIRECT into tail page
+	 */
 	if (!hole_page || index != hole_page->index) {
 		tail_page = grab_cache_page(inode->i_mapping, index);
 		retval = -ENOMEM;
@@ -553,14 +605,15 @@
 		tail_page = hole_page;
 	}
 
-	/* we don't have to make sure the conversion did not happen while
-	 ** we were locking the page because anyone that could convert
-	 ** must first take i_mutex.
-	 **
-	 ** We must fix the tail page for writing because it might have buffers
-	 ** that are mapped, but have a block number of 0.  This indicates tail
-	 ** data that has been read directly into the page, and
-	 ** __block_write_begin won't trigger a get_block in this case.
+	/*
+	 * we don't have to make sure the conversion did not happen while
+	 * we were locking the page because anyone that could convert
+	 * must first take i_mutex.
+	 *
+	 * We must fix the tail page for writing because it might have buffers
+	 * that are mapped, but have a block number of 0.  This indicates tail
+	 * data that has been read directly into the page, and
+	 * __block_write_begin won't trigger a get_block in this case.
 	 */
 	fix_tail_page_for_writing(tail_page);
 	retval = __reiserfs_write_begin(tail_page, tail_start,
@@ -573,12 +626,12 @@
 
 	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
 
-      unlock:
+unlock:
 	if (tail_page != hole_page) {
 		unlock_page(tail_page);
 		page_cache_release(tail_page);
 	}
-      out:
+out:
 	return retval;
 }
 
@@ -604,7 +657,8 @@
 		       struct buffer_head *bh_result, int create)
 {
 	int repeat, retval = 0;
-	b_blocknr_t allocated_block_nr = 0;	// b_blocknr_t is (unsigned) 32 bit int
+	/* b_blocknr_t is (unsigned) 32 bit int*/
+	b_blocknr_t allocated_block_nr = 0;
 	INITIALIZE_PATH(path);
 	int pos_in_item;
 	struct cpu_key key;
@@ -614,12 +668,14 @@
 	int done;
 	int fs_gen;
 	struct reiserfs_transaction_handle *th = NULL;
-	/* space reserved in transaction batch:
-	   . 3 balancings in direct->indirect conversion
-	   . 1 block involved into reiserfs_update_sd()
-	   XXX in practically impossible worst case direct2indirect()
-	   can incur (much) more than 3 balancings.
-	   quota update for user, group */
+	/*
+	 * space reserved in transaction batch:
+	 * . 3 balancings in direct->indirect conversion
+	 * . 1 block involved into reiserfs_update_sd()
+	 * XXX in practically impossible worst case direct2indirect()
+	 * can incur (much) more than 3 balancings.
+	 * quota update for user, group
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
@@ -636,8 +692,9 @@
 		return -EFBIG;
 	}
 
-	/* if !create, we aren't changing the FS, so we don't need to
-	 ** log anything, so we don't need to start a transaction
+	/*
+	 * if !create, we aren't changing the FS, so we don't need to
+	 * log anything, so we don't need to start a transaction
 	 */
 	if (!(create & GET_BLOCK_CREATE)) {
 		int ret;
@@ -647,6 +704,7 @@
 		reiserfs_write_unlock(inode->i_sb);
 		return ret;
 	}
+
 	/*
 	 * if we're already in a transaction, make sure to close
 	 * any new transactions we start in this func
@@ -655,8 +713,10 @@
 	    reiserfs_transaction_running(inode->i_sb))
 		dangle = 0;
 
-	/* If file is of such a size, that it might have a tail and tails are enabled
-	 ** we should mark it as possibly needing tail packing on close
+	/*
+	 * If file is of such a size, that it might have a tail and
+	 * tails are enabled  we should mark it as possibly needing
+	 * tail packing on close
 	 */
 	if ((have_large_tails(inode->i_sb)
 	     && inode->i_size < i_block_size(inode) * 4)
@@ -667,7 +727,7 @@
 	/* set the key of the first byte in the 'block'-th block of file */
 	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
 	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
-	      start_trans:
+start_trans:
 		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
 		if (!th) {
 			retval = -ENOMEM;
@@ -675,7 +735,7 @@
 		}
 		reiserfs_update_inode_transaction(inode);
 	}
-      research:
+research:
 
 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
 	if (retval == IO_ERROR) {
@@ -684,8 +744,8 @@
 	}
 
 	bh = get_last_bh(&path);
-	ih = get_ih(&path);
-	item = get_item(&path);
+	ih = tp_item_head(&path);
+	item = tp_item_body(&path);
 	pos_in_item = path.pos_in_item;
 
 	fs_gen = get_generation(inode->i_sb);
@@ -703,11 +763,12 @@
 		    _allocate_block(th, block, inode, &allocated_block_nr,
 				    &path, create);
 
+		/*
+		 * restart the transaction to give the journal a chance to free
+		 * some blocks.  releases the path, so we have to go back to
+		 * research if we succeed on the second try
+		 */
 		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
-			/* restart the transaction to give the journal a chance to free
-			 ** some blocks.  releases the path, so we have to go back to
-			 ** research if we succeed on the second try
-			 */
 			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
 			retval = restart_transaction(th, inode, &path);
 			if (retval)
@@ -734,9 +795,11 @@
 
 	if (indirect_item_found(retval, ih)) {
 		b_blocknr_t unfm_ptr;
-		/* 'block'-th block is in the file already (there is
-		   corresponding cell in some indirect item). But it may be
-		   zero unformatted node pointer (hole) */
+		/*
+		 * 'block'-th block is in the file already (there is
+		 * corresponding cell in some indirect item). But it may be
+		 * zero unformatted node pointer (hole)
+		 */
 		unfm_ptr = get_block_num(item, pos_in_item);
 		if (unfm_ptr == 0) {
 			/* use allocated block to plug the hole */
@@ -753,7 +816,7 @@
 				reiserfs_add_ordered_list(inode, bh_result);
 			put_block_num(item, pos_in_item, allocated_block_nr);
 			unfm_ptr = allocated_block_nr;
-			journal_mark_dirty(th, inode->i_sb, bh);
+			journal_mark_dirty(th, bh);
 			reiserfs_update_sd(th, inode);
 		}
 		set_block_dev_mapped(bh_result, unfm_ptr, inode);
@@ -764,9 +827,10 @@
 
 		reiserfs_write_unlock(inode->i_sb);
 
-		/* the item was found, so new blocks were not added to the file
-		 ** there is no need to make sure the inode is updated with this
-		 ** transaction
+		/*
+		 * the item was found, so new blocks were not added to the file
+		 * there is no need to make sure the inode is updated with this
+		 * transaction
 		 */
 		return retval;
 	}
@@ -776,9 +840,11 @@
 		goto start_trans;
 	}
 
-	/* desired position is not found or is in the direct item. We have
-	   to append file with holes up to 'block'-th block converting
-	   direct items to indirect one if necessary */
+	/*
+	 * desired position is not found or is in the direct item. We have
+	 * to append file with holes up to 'block'-th block converting
+	 * direct items to indirect one if necessary
+	 */
 	done = 0;
 	do {
 		if (is_statdata_le_ih(ih)) {
@@ -790,16 +856,18 @@
 					  TYPE_INDIRECT, UNFM_P_SIZE,
 					  0 /* free_space */ );
 
+			/*
+			 * we are going to add 'block'-th block to the file.
+			 * Use allocated block for that
+			 */
 			if (cpu_key_k_offset(&key) == 1) {
-				/* we are going to add 'block'-th block to the file. Use
-				   allocated block for that */
 				unp = cpu_to_le32(allocated_block_nr);
 				set_block_dev_mapped(bh_result,
 						     allocated_block_nr, inode);
 				set_buffer_new(bh_result);
 				done = 1;
 			}
-			tmp_key = key;	// ;)
+			tmp_key = key;	/* ;) */
 			set_cpu_key_k_offset(&tmp_key, 1);
 			PATH_LAST_POSITION(&path)++;
 
@@ -809,9 +877,12 @@
 			if (retval) {
 				reiserfs_free_block(th, inode,
 						    allocated_block_nr, 1);
-				goto failure;	// retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
+				/*
+				 * retval == -ENOSPC, -EDQUOT or -EIO
+				 * or -EEXIST
+				 */
+				goto failure;
 			}
-			//mark_tail_converted (inode);
 		} else if (is_direct_le_ih(ih)) {
 			/* direct item has to be converted */
 			loff_t tail_offset;
@@ -819,18 +890,24 @@
 			tail_offset =
 			    ((le_ih_k_offset(ih) -
 			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
+
+			/*
+			 * direct item we just found fits into block we have
+			 * to map. Convert it into unformatted node: use
+			 * bh_result for the conversion
+			 */
 			if (tail_offset == cpu_key_k_offset(&key)) {
-				/* direct item we just found fits into block we have
-				   to map. Convert it into unformatted node: use
-				   bh_result for the conversion */
 				set_block_dev_mapped(bh_result,
 						     allocated_block_nr, inode);
 				unbh = bh_result;
 				done = 1;
 			} else {
-				/* we have to padd file tail stored in direct item(s)
-				   up to block size and convert it to unformatted
-				   node. FIXME: this should also get into page cache */
+				/*
+				 * we have to pad file tail stored in direct
+				 * item(s) up to block size and convert it
+				 * to unformatted node. FIXME: this should
+				 * also get into page cache
+				 */
 
 				pathrelse(&path);
 				/*
@@ -859,7 +936,10 @@
 							inode->i_ino,
 							retval);
 					if (allocated_block_nr) {
-						/* the bitmap, the super, and the stat data == 3 */
+						/*
+						 * the bitmap, the super,
+						 * and the stat data == 3
+						 */
 						if (!th)
 							th = reiserfs_persistent_transaction(inode->i_sb, 3);
 						if (th)
@@ -881,43 +961,57 @@
 						    allocated_block_nr, 1);
 				goto failure;
 			}
-			/* it is important the set_buffer_uptodate is done after
-			 ** the direct2indirect.  The buffer might contain valid
-			 ** data newer than the data on disk (read by readpage, changed,
-			 ** and then sent here by writepage).  direct2indirect needs
-			 ** to know if unbh was already up to date, so it can decide
-			 ** if the data in unbh needs to be replaced with data from
-			 ** the disk
+			/*
+			 * it is important the set_buffer_uptodate is done
+			 * after the direct2indirect.  The buffer might
+			 * contain valid data newer than the data on disk
+			 * (read by readpage, changed, and then sent here by
+			 * writepage).  direct2indirect needs to know if unbh
+			 * was already up to date, so it can decide if the
+			 * data in unbh needs to be replaced with data from
+			 * the disk
 			 */
 			set_buffer_uptodate(unbh);
 
-			/* unbh->b_page == NULL in case of DIRECT_IO request, this means
-			   buffer will disappear shortly, so it should not be added to
+			/*
+			 * unbh->b_page == NULL in case of DIRECT_IO request,
+			 * this means buffer will disappear shortly, so it
+			 * should not be added to
 			 */
 			if (unbh->b_page) {
-				/* we've converted the tail, so we must
-				 ** flush unbh before the transaction commits
+				/*
+				 * we've converted the tail, so we must
+				 * flush unbh before the transaction commits
 				 */
 				reiserfs_add_tail_list(inode, unbh);
 
-				/* mark it dirty now to prevent commit_write from adding
-				 ** this buffer to the inode's dirty buffer list
+				/*
+				 * mark it dirty now to prevent commit_write
+				 * from adding this buffer to the inode's
+				 * dirty buffer list
 				 */
 				/*
-				 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
-				 * It's still atomic, but it sets the page dirty too,
-				 * which makes it eligible for writeback at any time by the
-				 * VM (which was also the case with __mark_buffer_dirty())
+				 * AKPM: changed __mark_buffer_dirty to
+				 * mark_buffer_dirty().  It's still atomic,
+				 * but it sets the page dirty too, which makes
+				 * it eligible for writeback at any time by the
+				 * VM (which was also the case with
+				 * __mark_buffer_dirty())
 				 */
 				mark_buffer_dirty(unbh);
 			}
 		} else {
-			/* append indirect item with holes if needed, when appending
-			   pointer to 'block'-th block use block, which is already
-			   allocated */
+			/*
+			 * append indirect item with holes if needed, when
+			 * appending pointer to 'block'-th block use block,
+			 * which is already allocated
+			 */
 			struct cpu_key tmp_key;
-			unp_t unf_single = 0;	// We use this in case we need to allocate only
-			// one block which is a fastpath
+			/*
+			 * We use this in case we need to allocate
+			 * only one block which is a fastpath
+			 */
+			unp_t unf_single = 0;
 			unp_t *un;
 			__u64 max_to_insert =
 			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
@@ -926,14 +1020,17 @@
 
 			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
 			       "vs-804: invalid position for append");
-			/* indirect item has to be appended, set up key of that position */
+			/*
+			 * indirect item has to be appended,
+			 * set up key of that position
+			 * (key type is unimportant)
+			 */
 			make_cpu_key(&tmp_key, inode,
 				     le_key_k_offset(version,
-						     &(ih->ih_key)) +
+						     &ih->ih_key) +
 				     op_bytes_number(ih,
 						     inode->i_sb->s_blocksize),
-				     //pos_in_item * inode->i_sb->s_blocksize,
-				     TYPE_INDIRECT, 3);	// key type is unimportant
+				     TYPE_INDIRECT, 3);
 
 			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
 			       "green-805: invalid offset");
@@ -954,8 +1051,10 @@
 				}
 			}
 			if (blocks_needed <= max_to_insert) {
-				/* we are going to add target block to the file. Use allocated
-				   block for that */
+				/*
+				 * we are going to add target block to
+				 * the file. Use allocated block for that
+				 */
 				un[blocks_needed - 1] =
 				    cpu_to_le32(allocated_block_nr);
 				set_block_dev_mapped(bh_result,
@@ -964,8 +1063,11 @@
 				done = 1;
 			} else {
 				/* paste hole to the indirect item */
-				/* If kmalloc failed, max_to_insert becomes zero and it means we
-				   only have space for one block */
+				/*
+				 * If kmalloc failed, max_to_insert becomes
+				 * zero and it means we only have space for
+				 * one block
+				 */
 				blocks_needed =
 				    max_to_insert ? max_to_insert : 1;
 			}
@@ -984,9 +1086,12 @@
 				goto failure;
 			}
 			if (!done) {
-				/* We need to mark new file size in case this function will be
-				   interrupted/aborted later on. And we may do this only for
-				   holes. */
+				/*
+				 * We need to mark new file size in case
+				 * this function will be interrupted/aborted
+				 * later on. And we may do this only for
+				 * holes.
+				 */
 				inode->i_size +=
 				    inode->i_sb->s_blocksize * blocks_needed;
 			}
@@ -995,13 +1100,13 @@
 		if (done == 1)
 			break;
 
-		/* this loop could log more blocks than we had originally asked
-		 ** for.  So, we have to allow the transaction to end if it is
-		 ** too big or too full.  Update the inode so things are
-		 ** consistent if we crash before the function returns
-		 **
-		 ** release the path so that anybody waiting on the path before
-		 ** ending their transaction will be able to continue.
+		/*
+		 * this loop could log more blocks than we had originally
+		 * asked for.  So, we have to allow the transaction to end
+		 * if it is too big or too full.  Update the inode so things
+		 * are consistent if we crash before the function returns
+		 * release the path so that anybody waiting on the path before
+		 * ending their transaction will be able to continue.
 		 */
 		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
 			retval = restart_transaction(th, inode, &path);
@@ -1031,14 +1136,14 @@
 			goto failure;
 		}
 		bh = get_last_bh(&path);
-		ih = get_ih(&path);
-		item = get_item(&path);
+		ih = tp_item_head(&path);
+		item = tp_item_body(&path);
 		pos_in_item = path.pos_in_item;
 	} while (1);
 
 	retval = 0;
 
-      failure:
+failure:
 	if (th && (!dangle || (retval && !th->t_trans_id))) {
 		int err;
 		if (th->t_trans_id)
@@ -1060,8 +1165,10 @@
 	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
 }
 
-/* Compute real number of used bytes by file
- * Following three functions can go away when we'll have enough space in stat item
+/*
+ * Compute real number of used bytes by file
+ * Following three functions can go away when we'll have enough space in
+ * stat item
  */
 static int real_space_diff(struct inode *inode, int sd_size)
 {
@@ -1071,13 +1178,14 @@
 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
 		return sd_size;
 
-	/* End of file is also in full block with indirect reference, so round
-	 ** up to the next block.
-	 **
-	 ** there is just no way to know if the tail is actually packed
-	 ** on the file, so we have to assume it isn't.  When we pack the
-	 ** tail, we add 4 bytes to pretend there really is an unformatted
-	 ** node pointer
+	/*
+	 * End of file is also in full block with indirect reference, so round
+	 * up to the next block.
+	 *
+	 * there is just no way to know if the tail is actually packed
+	 * on the file, so we have to assume it isn't.  When we pack the
+	 * tail, we add 4 bytes to pretend there really is an unformatted
+	 * node pointer
 	 */
 	bytes =
 	    ((inode->i_size +
@@ -1108,36 +1216,36 @@
 		bytes += (loff_t) 511;
 	}
 
-	/* files from before the quota patch might i_blocks such that
-	 ** bytes < real_space.  Deal with that here to prevent it from
-	 ** going negative.
+	/*
+	 * files from before the quota patch might i_blocks such that
+	 * bytes < real_space.  Deal with that here to prevent it from
+	 * going negative.
 	 */
 	if (bytes < real_space)
 		return 0;
 	return (bytes - real_space) >> 9;
 }
 
-//
-// BAD: new directories have stat data of new type and all other items
-// of old type. Version stored in the inode says about body items, so
-// in update_stat_data we can not rely on inode, but have to check
-// item version directly
-//
+/*
+ * BAD: new directories have stat data of new type and all other items
+ * of old type. Version stored in the inode says about body items, so
+ * in update_stat_data we can not rely on inode, but have to check
+ * item version directly
+ */
 
-// called by read_locked_inode
+/* called by read_locked_inode */
 static void init_inode(struct inode *inode, struct treepath *path)
 {
 	struct buffer_head *bh;
 	struct item_head *ih;
 	__u32 rdev;
-	//int version = ITEM_VERSION_1;
 
 	bh = PATH_PLAST_BUFFER(path);
-	ih = PATH_PITEM_HEAD(path);
+	ih = tp_item_head(path);
 
-	copy_key(INODE_PKEY(inode), &(ih->ih_key));
+	copy_key(INODE_PKEY(inode), &ih->ih_key);
 
-	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
+	INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
 	REISERFS_I(inode)->i_flags = 0;
 	REISERFS_I(inode)->i_prealloc_block = 0;
 	REISERFS_I(inode)->i_prealloc_count = 0;
@@ -1147,7 +1255,7 @@
 
 	if (stat_data_v1(ih)) {
 		struct stat_data_v1 *sd =
-		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
+		    (struct stat_data_v1 *)ih_item_body(bh, ih);
 		unsigned long blocks;
 
 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
@@ -1168,20 +1276,26 @@
 		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
 		blocks = (inode->i_size + 511) >> 9;
 		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
+
+		/*
+		 * there was a bug in <=3.5.23 when i_blocks could take
+		 * negative values. Starting from 3.5.17 this value could
+		 * even be stored in stat data. For such files we set
+		 * i_blocks based on file size. Just 2 notes: this can be
+		 * wrong for sparse files. On-disk value will be only
+		 * updated if file's inode will ever change
+		 */
 		if (inode->i_blocks > blocks) {
-			// there was a bug in <=3.5.23 when i_blocks could take negative
-			// values. Starting from 3.5.17 this value could even be stored in
-			// stat data. For such files we set i_blocks based on file
-			// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
-			// only updated if file's inode will ever change
 			inode->i_blocks = blocks;
 		}
 
 		rdev = sd_v1_rdev(sd);
 		REISERFS_I(inode)->i_first_direct_byte =
 		    sd_v1_first_direct_byte(sd);
-		/* an early bug in the quota code can give us an odd number for the
-		 ** block count.  This is incorrect, fix it here.
+
+		/*
+		 * an early bug in the quota code can give us an odd
+		 * number for the block count.  This is incorrect, fix it here.
 		 */
 		if (inode->i_blocks & 1) {
 			inode->i_blocks++;
@@ -1189,13 +1303,17 @@
 		inode_set_bytes(inode,
 				to_real_used_space(inode, inode->i_blocks,
 						   SD_V1_SIZE));
-		/* nopack is initially zero for v1 objects. For v2 objects,
-		   nopack is initialised from sd_attrs */
+		/*
+		 * nopack is initially zero for v1 objects. For v2 objects,
+		 * nopack is initialised from sd_attrs
+		 */
 		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
 	} else {
-		// new stat data found, but object may have old items
-		// (directories and symlinks)
-		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
+		/*
+		 * new stat data found, but object may have old items
+		 * (directories and symlinks)
+		 */
+		struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih);
 
 		inode->i_mode = sd_v2_mode(sd);
 		set_nlink(inode, sd_v2_nlink(sd));
@@ -1225,8 +1343,10 @@
 		inode_set_bytes(inode,
 				to_real_used_space(inode, inode->i_blocks,
 						   SD_V2_SIZE));
-		/* read persistent inode attributes from sd and initialise
-		   generic inode flags from them */
+		/*
+		 * read persistent inode attributes from sd and initialise
+		 * generic inode flags from them
+		 */
 		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
 	}
@@ -1249,7 +1369,7 @@
 	}
 }
 
-// update new stat data with inode fields
+/* update new stat data with inode fields */
 static void inode2sd(void *sd, struct inode *inode, loff_t size)
 {
 	struct stat_data *sd_v2 = (struct stat_data *)sd;
@@ -1273,7 +1393,7 @@
 	set_sd_v2_attrs(sd_v2, flags);
 }
 
-// used to copy inode's fields to old stat data
+/* used to copy inode's fields to old stat data */
 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
 {
 	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
@@ -1292,14 +1412,15 @@
 	else
 		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
 
-	// Sigh. i_first_direct_byte is back
+	/* Sigh. i_first_direct_byte is back */
 	set_sd_v1_first_direct_byte(sd_v1,
 				    REISERFS_I(inode)->i_first_direct_byte);
 }
 
-/* NOTE, you must prepare the buffer head before sending it here,
-** and then log it after the call
-*/
+/*
+ * NOTE, you must prepare the buffer head before sending it here,
+ * and then log it after the call
+ */
 static void update_stat_data(struct treepath *path, struct inode *inode,
 			     loff_t size)
 {
@@ -1307,17 +1428,17 @@
 	struct item_head *ih;
 
 	bh = PATH_PLAST_BUFFER(path);
-	ih = PATH_PITEM_HEAD(path);
+	ih = tp_item_head(path);
 
 	if (!is_statdata_le_ih(ih))
 		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
 			       INODE_PKEY(inode), ih);
 
+	/* path points to old stat data */
 	if (stat_data_v1(ih)) {
-		// path points to old stat data
-		inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
+		inode2sd_v1(ih_item_body(bh, ih), inode, size);
 	} else {
-		inode2sd(B_I_PITEM(bh, ih), inode, size);
+		inode2sd(ih_item_body(bh, ih), inode, size);
 	}
 
 	return;
@@ -1335,7 +1456,8 @@
 
 	BUG_ON(!th->t_trans_id);
 
-	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);	//key type is unimportant
+	/* key type is unimportant */
+	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);
 
 	for (;;) {
 		int pos;
@@ -1363,45 +1485,48 @@
 			return;
 		}
 
-		/* sigh, prepare_for_journal might schedule.  When it schedules the
-		 ** FS might change.  We have to detect that, and loop back to the
-		 ** search if the stat data item has moved
+		/*
+		 * sigh, prepare_for_journal might schedule.  When it
+		 * schedules the FS might change.  We have to detect that,
+		 * and loop back to the search if the stat data item has moved
 		 */
 		bh = get_last_bh(&path);
-		ih = get_ih(&path);
+		ih = tp_item_head(&path);
 		copy_item_head(&tmp_ih, ih);
 		fs_gen = get_generation(inode->i_sb);
 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
+
+		/* Stat_data item has been moved after scheduling. */
 		if (fs_changed(fs_gen, inode->i_sb)
 		    && item_moved(&tmp_ih, &path)) {
 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
-			continue;	/* Stat_data item has been moved after scheduling. */
+			continue;
 		}
 		break;
 	}
 	update_stat_data(&path, inode, size);
-	journal_mark_dirty(th, th->t_super, bh);
+	journal_mark_dirty(th, bh);
 	pathrelse(&path);
 	return;
 }
 
-/* reiserfs_read_locked_inode is called to read the inode off disk, and it
-** does a make_bad_inode when things go wrong.  But, we need to make sure
-** and clear the key in the private portion of the inode, otherwise a
-** corresponding iput might try to delete whatever object the inode last
-** represented.
-*/
+/*
+ * reiserfs_read_locked_inode is called to read the inode off disk, and it
+ * does a make_bad_inode when things go wrong.  But, we need to make sure
+ * and clear the key in the private portion of the inode, otherwise a
+ * corresponding iput might try to delete whatever object the inode last
+ * represented.
+ */
 static void reiserfs_make_bad_inode(struct inode *inode)
 {
 	memset(INODE_PKEY(inode), 0, KEY_SIZE);
 	make_bad_inode(inode);
 }
 
-//
-// initially this function was derived from minix or ext2's analog and
-// evolved as the prototype did
-//
-
+/*
+ * initially this function was derived from minix or ext2's analog and
+ * evolved as the prototype did
+ */
 int reiserfs_init_locked_inode(struct inode *inode, void *p)
 {
 	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
@@ -1410,8 +1535,10 @@
 	return 0;
 }
 
-/* looks for stat data in the tree, and fills up the fields of in-core
-   inode stat data fields */
+/*
+ * looks for stat data in the tree, and fills up the fields of in-core
+ * inode stat data fields
+ */
 void reiserfs_read_locked_inode(struct inode *inode,
 				struct reiserfs_iget_args *args)
 {
@@ -1422,8 +1549,10 @@
 
 	dirino = args->dirid;
 
-	/* set version 1, version 2 could be used too, because stat data
-	   key is the same in both versions */
+	/*
+	 * set version 1, version 2 could be used too, because stat data
+	 * key is the same in both versions
+	 */
 	key.version = KEY_FORMAT_3_5;
 	key.on_disk_key.k_dir_id = dirino;
 	key.on_disk_key.k_objectid = inode->i_ino;
@@ -1439,8 +1568,9 @@
 		reiserfs_make_bad_inode(inode);
 		return;
 	}
+
+	/* a stale NFS handle can trigger this without it being an error */
 	if (retval != ITEM_FOUND) {
-		/* a stale NFS handle can trigger this without it being an error */
 		pathrelse(&path_to_sd);
 		reiserfs_make_bad_inode(inode);
 		clear_nlink(inode);
@@ -1449,20 +1579,25 @@
 
 	init_inode(inode, &path_to_sd);
 
-	/* It is possible that knfsd is trying to access inode of a file
-	   that is being removed from the disk by some other thread. As we
-	   update sd on unlink all that is required is to check for nlink
-	   here. This bug was first found by Sizif when debugging
-	   SquidNG/Butterfly, forgotten, and found again after Philippe
-	   Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
+	/*
+	 * It is possible that knfsd is trying to access inode of a file
+	 * that is being removed from the disk by some other thread. As we
+	 * update sd on unlink all that is required is to check for nlink
+	 * here. This bug was first found by Sizif when debugging
+	 * SquidNG/Butterfly, forgotten, and found again after Philippe
+	 * Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
 
-	   More logical fix would require changes in fs/inode.c:iput() to
-	   remove inode from hash-table _after_ fs cleaned disk stuff up and
-	   in iget() to return NULL if I_FREEING inode is found in
-	   hash-table. */
-	/* Currently there is one place where it's ok to meet inode with
-	   nlink==0: processing of open-unlinked and half-truncated files
-	   during mount (fs/reiserfs/super.c:finish_unfinished()). */
+	 * More logical fix would require changes in fs/inode.c:iput() to
+	 * remove inode from hash-table _after_ fs cleaned disk stuff up and
+	 * in iget() to return NULL if I_FREEING inode is found in
+	 * hash-table.
+	 */
+
+	/*
+	 * Currently there is one place where it's ok to meet inode with
+	 * nlink==0: processing of open-unlinked and half-truncated files
+	 * during mount (fs/reiserfs/super.c:finish_unfinished()).
+	 */
 	if ((inode->i_nlink == 0) &&
 	    !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
 		reiserfs_warning(inode->i_sb, "vs-13075",
@@ -1472,7 +1607,8 @@
 		reiserfs_make_bad_inode(inode);
 	}
 
-	reiserfs_check_path(&path_to_sd);	/* init inode should be relsing */
+	/* init inode should be relsing */
+	reiserfs_check_path(&path_to_sd);
 
 	/*
 	 * Stat data v1 doesn't support ACLs.
@@ -1481,7 +1617,7 @@
 		cache_no_acl(inode);
 }
 
-/**
+/*
  * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
  *
  * @inode:    inode from hash table to check
@@ -1556,7 +1692,8 @@
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 		int fh_len, int fh_type)
 {
-	/* fhtype happens to reflect the number of u32s encoded.
+	/*
+	 * fhtype happens to reflect the number of u32s encoded.
 	 * due to a bug in earlier code, fhtype might indicate there
 	 * are more u32s then actually fitted.
 	 * so if fhtype seems to be more than len, reduce fhtype.
@@ -1625,13 +1762,16 @@
 	return *lenp;
 }
 
-/* looks for stat data, then copies fields to it, marks the buffer
-   containing stat data as dirty */
-/* reiserfs inodes are never really dirty, since the dirty inode call
-** always logs them.  This call allows the VFS inode marking routines
-** to properly mark inodes for datasync and such, but only actually
-** does something when called for a synchronous update.
-*/
+/*
+ * looks for stat data, then copies fields to it, marks the buffer
+ * containing stat data as dirty
+ */
+/*
+ * reiserfs inodes are never really dirty, since the dirty inode call
+ * always logs them.  This call allows the VFS inode marking routines
+ * to properly mark inodes for datasync and such, but only actually
+ * does something when called for a synchronous update.
+ */
 int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct reiserfs_transaction_handle th;
@@ -1639,24 +1779,28 @@
 
 	if (inode->i_sb->s_flags & MS_RDONLY)
 		return -EROFS;
-	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
-	 ** these cases are just when the system needs ram, not when the
-	 ** inode needs to reach disk for safety, and they can safely be
-	 ** ignored because the altered inode has already been logged.
+	/*
+	 * memory pressure can sometimes initiate write_inode calls with
+	 * sync == 1,
+	 * these cases are just when the system needs ram, not when the
+	 * inode needs to reach disk for safety, and they can safely be
+	 * ignored because the altered inode has already been logged.
 	 */
 	if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
 			reiserfs_update_sd(&th, inode);
-			journal_end_sync(&th, inode->i_sb, jbegin_count);
+			journal_end_sync(&th);
 		}
 		reiserfs_write_unlock(inode->i_sb);
 	}
 	return 0;
 }
 
-/* stat data of new object is inserted already, this inserts the item
-   containing "." and ".." entries */
+/*
+ * stat data of new object is inserted already, this inserts the item
+ * containing "." and ".." entries
+ */
 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
 				  struct inode *inode,
 				  struct item_head *ih, struct treepath *path,
@@ -1674,9 +1818,11 @@
 		      le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
 		      TYPE_DIRENTRY, 3 /*key length */ );
 
-	/* compose item head for new item. Directories consist of items of
-	   old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
-	   is done by reiserfs_new_inode */
+	/*
+	 * compose item head for new item. Directories consist of items of
+	 * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
+	 * is done by reiserfs_new_inode
+	 */
 	if (old_format_only(sb)) {
 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
@@ -1714,9 +1860,12 @@
 	return reiserfs_insert_item(th, path, &key, ih, inode, body);
 }
 
-/* stat data of object has been inserted, this inserts the item
-   containing the body of symlink */
-static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
+/*
+ * stat data of object has been inserted, this inserts the item
+ * containing the body of symlink
+ */
+static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th,
+				struct inode *inode,
 				struct item_head *ih,
 				struct treepath *path, const char *symname,
 				int item_len)
@@ -1754,15 +1903,26 @@
 	return reiserfs_insert_item(th, path, &key, ih, inode, symname);
 }
 
-/* inserts the stat data into the tree, and then calls
-   reiserfs_new_directory (to insert ".", ".." item if new object is
-   directory) or reiserfs_new_symlink (to insert symlink body if new
-   object is symlink) or nothing (if new object is regular file)
+/*
+ * inserts the stat data into the tree, and then calls
+ * reiserfs_new_directory (to insert ".", ".." item if new object is
+ * directory) or reiserfs_new_symlink (to insert symlink body if new
+ * object is symlink) or nothing (if new object is regular file)
 
-   NOTE! uid and gid must already be set in the inode.  If we return
-   non-zero due to an error, we have to drop the quota previously allocated
-   for the fresh inode.  This can only be done outside a transaction, so
-   if we return non-zero, we also end the transaction.  */
+ * NOTE! uid and gid must already be set in the inode.  If we return
+ * non-zero due to an error, we have to drop the quota previously allocated
+ * for the fresh inode.  This can only be done outside a transaction, so
+ * if we return non-zero, we also end the transaction.
+ *
+ * @th: active transaction handle
+ * @dir: parent directory for new inode
+ * @mode: mode of new inode
+ * @symname: symlink contents if inode is symlink
+ * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for
+ *         symlinks
+ * @inode: inode to be filled
+ * @security: optional security context to associate with this inode
+ */
 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		       struct inode *dir, umode_t mode, const char *symname,
 		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
@@ -1807,7 +1967,7 @@
 	else
 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
 				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
-	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
+	memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE);
 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
 
 	depth = reiserfs_write_unlock_nested(inode->i_sb);
@@ -1820,10 +1980,11 @@
 	}
 
 	if (old_format_only(sb))
-		/* not a perfect generation count, as object ids can be reused, but
-		 ** this is as good as reiserfs can do right now.
-		 ** note that the private part of inode isn't filled in yet, we have
-		 ** to use the directory.
+		/*
+		 * not a perfect generation count, as object ids can be reused,
+		 * but this is as good as reiserfs can do right now.
+		 * note that the private part of inode isn't filled in yet,
+		 * we have to use the directory.
 		 */
 		inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
 	else
@@ -1850,7 +2011,7 @@
 	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
 	    U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
 
-	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
+	INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
 	REISERFS_I(inode)->i_flags = 0;
 	REISERFS_I(inode)->i_prealloc_block = 0;
 	REISERFS_I(inode)->i_prealloc_count = 0;
@@ -1878,9 +2039,9 @@
 		goto out_bad_inode;
 	}
 	if (old_format_only(sb)) {
+		/* i_uid or i_gid is too big to be stored in stat data v3.5 */
 		if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
 			pathrelse(&path_to_key);
-			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
 			err = -EINVAL;
 			goto out_bad_inode;
 		}
@@ -1888,9 +2049,11 @@
 	} else {
 		inode2sd(&sd, inode, inode->i_size);
 	}
-	// store in in-core inode the key of stat data and version all
-	// object items will have (directory items will have old offset
-	// format, other new objects will consist of new items)
+	/*
+	 * store in in-core inode the key of stat data and version all
+	 * object items will have (directory items will have old offset
+	 * format, other new objects will consist of new items)
+	 */
 	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
 	else
@@ -1934,7 +2097,7 @@
 	if (retval) {
 		err = retval;
 		reiserfs_check_path(&path_to_key);
-		journal_end(th, th->t_super, th->t_blocks_allocated);
+		journal_end(th);
 		goto out_inserted_sd;
 	}
 
@@ -1945,7 +2108,7 @@
 		if (retval) {
 			err = retval;
 			reiserfs_check_path(&path_to_key);
-			journal_end(th, th->t_super, th->t_blocks_allocated);
+			journal_end(th);
 			goto out_inserted_sd;
 		}
 	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
@@ -1962,8 +2125,7 @@
 		if (retval) {
 			err = retval;
 			reiserfs_check_path(&path_to_key);
-			retval = journal_end(th, th->t_super,
-					     th->t_blocks_allocated);
+			retval = journal_end(th);
 			if (retval)
 				err = retval;
 			goto out_inserted_sd;
@@ -1975,11 +2137,7 @@
 
 	return 0;
 
-/* it looks like you can easily compress these two goto targets into
- * one.  Keeping it like this doesn't actually hurt anything, and they
- * are place holders for what the quota code actually needs.
- */
-      out_bad_inode:
+out_bad_inode:
 	/* Invalidate the object, nothing was inserted yet */
 	INODE_PKEY(inode)->k_objectid = 0;
 
@@ -1988,16 +2146,19 @@
 	dquot_free_inode(inode);
 	reiserfs_write_lock_nested(inode->i_sb, depth);
 
-      out_end_trans:
-	journal_end(th, th->t_super, th->t_blocks_allocated);
-	/* Drop can be outside and it needs more credits so it's better to have it outside */
+out_end_trans:
+	journal_end(th);
+	/*
+	 * Drop can be outside and it needs more credits so it's better
+	 * to have it outside
+	 */
 	depth = reiserfs_write_unlock_nested(inode->i_sb);
 	dquot_drop(inode);
 	reiserfs_write_lock_nested(inode->i_sb, depth);
 	inode->i_flags |= S_NOQUOTA;
 	make_bad_inode(inode);
 
-      out_inserted_sd:
+out_inserted_sd:
 	clear_nlink(inode);
 	th->t_trans_id = 0;	/* so the caller can't use this handle later */
 	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
@@ -2006,25 +2167,26 @@
 }
 
 /*
-** finds the tail page in the page cache,
-** reads the last block in.
-**
-** On success, page_result is set to a locked, pinned page, and bh_result
-** is set to an up to date buffer for the last block in the file.  returns 0.
-**
-** tail conversion is not done, so bh_result might not be valid for writing
-** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
-** trying to write the block.
-**
-** on failure, nonzero is returned, page_result and bh_result are untouched.
-*/
+ * finds the tail page in the page cache,
+ * reads the last block in.
+ *
+ * On success, page_result is set to a locked, pinned page, and bh_result
+ * is set to an up to date buffer for the last block in the file.  returns 0.
+ *
+ * tail conversion is not done, so bh_result might not be valid for writing
+ * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
+ * trying to write the block.
+ *
+ * on failure, nonzero is returned, page_result and bh_result are untouched.
+ */
 static int grab_tail_page(struct inode *inode,
 			  struct page **page_result,
 			  struct buffer_head **bh_result)
 {
 
-	/* we want the page with the last byte in the file,
-	 ** not the page that will hold the next byte for appending
+	/*
+	 * we want the page with the last byte in the file,
+	 * not the page that will hold the next byte for appending
 	 */
 	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
 	unsigned long pos = 0;
@@ -2036,10 +2198,11 @@
 	struct page *page;
 	int error;
 
-	/* we know that we are only called with inode->i_size > 0.
-	 ** we also know that a file tail can never be as big as a block
-	 ** If i_size % blocksize == 0, our file is currently block aligned
-	 ** and it won't need converting or zeroing after a truncate.
+	/*
+	 * we know that we are only called with inode->i_size > 0.
+	 * we also know that a file tail can never be as big as a block
+	 * If i_size % blocksize == 0, our file is currently block aligned
+	 * and it won't need converting or zeroing after a truncate.
 	 */
 	if ((offset & (blocksize - 1)) == 0) {
 		return -ENOENT;
@@ -2068,10 +2231,11 @@
 	} while (bh != head);
 
 	if (!buffer_uptodate(bh)) {
-		/* note, this should never happen, prepare_write should
-		 ** be taking care of this for us.  If the buffer isn't up to date,
-		 ** I've screwed up the code to find the buffer, or the code to
-		 ** call prepare_write
+		/*
+		 * note, this should never happen, prepare_write should be
+		 * taking care of this for us.  If the buffer isn't up to
+		 * date, I've screwed up the code to find the buffer, or the
+		 * code to call prepare_write
 		 */
 		reiserfs_error(inode->i_sb, "clm-6000",
 			       "error reading block %lu", bh->b_blocknr);
@@ -2081,21 +2245,21 @@
 	*bh_result = bh;
 	*page_result = page;
 
-      out:
+out:
 	return error;
 
-      unlock:
+unlock:
 	unlock_page(page);
 	page_cache_release(page);
 	return error;
 }
 
 /*
-** vfs version of truncate file.  Must NOT be called with
-** a transaction already started.
-**
-** some code taken from block_truncate_page
-*/
+ * vfs version of truncate file.  Must NOT be called with
+ * a transaction already started.
+ *
+ * some code taken from block_truncate_page
+ */
 int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
 {
 	struct reiserfs_transaction_handle th;
@@ -2113,9 +2277,11 @@
 	if (inode->i_size > 0) {
 		error = grab_tail_page(inode, &page, &bh);
 		if (error) {
-			// -ENOENT means we truncated past the end of the file,
-			// and get_block_create_0 could not find a block to read in,
-			// which is ok.
+			/*
+			 * -ENOENT means we truncated past the end of the
+			 * file, and get_block_create_0 could not find a
+			 * block to read in, which is ok.
+			 */
 			if (error != -ENOENT)
 				reiserfs_error(inode->i_sb, "clm-6001",
 					       "grab_tail_page failed %d",
@@ -2125,29 +2291,33 @@
 		}
 	}
 
-	/* so, if page != NULL, we have a buffer head for the offset at
-	 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
-	 ** then we have an unformatted node.  Otherwise, we have a direct item,
-	 ** and no zeroing is required on disk.  We zero after the truncate,
-	 ** because the truncate might pack the item anyway
-	 ** (it will unmap bh if it packs).
+	/*
+	 * so, if page != NULL, we have a buffer head for the offset at
+	 * the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
+	 * then we have an unformatted node.  Otherwise, we have a direct item,
+	 * and no zeroing is required on disk.  We zero after the truncate,
+	 * because the truncate might pack the item anyway
+	 * (it will unmap bh if it packs).
+	 *
+	 * it is enough to reserve space in transaction for 2 balancings:
+	 * one for "save" link adding and another for the first
+	 * cut_from_item. 1 is for update_sd
 	 */
-	/* it is enough to reserve space in transaction for 2 balancings:
-	   one for "save" link adding and another for the first
-	   cut_from_item. 1 is for update_sd */
 	error = journal_begin(&th, inode->i_sb,
 			      JOURNAL_PER_BALANCE_CNT * 2 + 1);
 	if (error)
 		goto out;
 	reiserfs_update_inode_transaction(inode);
 	if (update_timestamps)
-		/* we are doing real truncate: if the system crashes before the last
-		   transaction of truncating gets committed - on reboot the file
-		   either appears truncated properly or not truncated at all */
+		/*
+		 * we are doing real truncate: if the system crashes
+		 * before the last transaction of truncating gets committed
+		 * - on reboot the file either appears truncated properly
+		 * or not truncated at all
+		 */
 		add_save_link(&th, inode, 1);
 	err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
-	error =
-	    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
+	error = journal_end(&th);
 	if (error)
 		goto out;
 
@@ -2180,7 +2350,7 @@
 	reiserfs_write_unlock(inode->i_sb);
 
 	return 0;
-      out:
+out:
 	if (page) {
 		unlock_page(page);
 		page_cache_release(page);
@@ -2212,7 +2382,10 @@
 	int copy_size;
 	int trans_running = 0;
 
-	/* catch places below that try to log something without starting a trans */
+	/*
+	 * catch places below that try to log something without
+	 * starting a trans
+	 */
 	th.t_trans_id = 0;
 
 	if (!buffer_uptodate(bh_result)) {
@@ -2220,11 +2393,11 @@
 	}
 
 	kmap(bh_result->b_page);
-      start_over:
+start_over:
 	reiserfs_write_lock(inode->i_sb);
 	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
 
-      research:
+research:
 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
 	if (retval != POSITION_FOUND) {
 		use_get_block = 1;
@@ -2232,8 +2405,8 @@
 	}
 
 	bh = get_last_bh(&path);
-	ih = get_ih(&path);
-	item = get_item(&path);
+	ih = tp_item_head(&path);
+	item = tp_item_body(&path);
 	pos_in_item = path.pos_in_item;
 
 	/* we've found an unformatted node */
@@ -2281,10 +2454,10 @@
 			goto research;
 		}
 
-		memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
+		memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied,
 		       copy_size);
 
-		journal_mark_dirty(&th, inode->i_sb, bh);
+		journal_mark_dirty(&th, bh);
 		bytes_copied += copy_size;
 		set_block_dev_mapped(bh_result, 0, inode);
 
@@ -2304,10 +2477,10 @@
 	}
 	retval = 0;
 
-      out:
+out:
 	pathrelse(&path);
 	if (trans_running) {
-		int err = journal_end(&th, inode->i_sb, jbegin_count);
+		int err = journal_end(&th);
 		if (err)
 			retval = err;
 		trans_running = 0;
@@ -2331,7 +2504,8 @@
 	kunmap(bh_result->b_page);
 
 	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
-		/* we've copied data from the page into the direct item, so the
+		/*
+		 * we've copied data from the page into the direct item, so the
 		 * buffer in the page is now clean, mark it to reflect that.
 		 */
 		lock_buffer(bh_result);
@@ -2370,7 +2544,8 @@
 		return 0;
 	}
 
-	/* The page dirty bit is cleared before writepage is called, which
+	/*
+	 * The page dirty bit is cleared before writepage is called, which
 	 * means we have to tell create_empty_buffers to make dirty buffers
 	 * The page really should be up to date at this point, so tossing
 	 * in the BH_Uptodate is just a sanity check.
@@ -2381,8 +2556,9 @@
 	}
 	head = page_buffers(page);
 
-	/* last page in the file, zero out any contents past the
-	 ** last byte in the file
+	/*
+	 * last page in the file, zero out any contents past the
+	 * last byte in the file
 	 */
 	if (page->index >= end_index) {
 		unsigned last_offset;
@@ -2412,7 +2588,8 @@
 		           (!buffer_mapped(bh) || (buffer_mapped(bh)
 						       && bh->b_blocknr ==
 						       0))) {
-			/* not mapped yet, or it points to a direct item, search
+			/*
+			 * not mapped yet, or it points to a direct item, search
 			 * the btree for the mapping info, and log any direct
 			 * items found
 			 */
@@ -2450,10 +2627,11 @@
 
 		if (checked) {
 			reiserfs_prepare_for_journal(s, bh, 1);
-			journal_mark_dirty(&th, s, bh);
+			journal_mark_dirty(&th, bh);
 			continue;
 		}
-		/* from this point on, we know the buffer is mapped to a
+		/*
+		 * from this point on, we know the buffer is mapped to a
 		 * real block and not a direct item
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE) {
@@ -2472,7 +2650,7 @@
 	} while ((bh = bh->b_this_page) != head);
 
 	if (checked) {
-		error = journal_end(&th, s, bh_per_page + 1);
+		error = journal_end(&th);
 		reiserfs_write_unlock(s);
 		if (error)
 			goto fail;
@@ -2497,7 +2675,7 @@
 	} while (bh != head);
 
 	error = 0;
-      done:
+done:
 	if (nr == 0) {
 		/*
 		 * if this page only had a direct item, it is very possible for
@@ -2519,8 +2697,9 @@
 	}
 	return error;
 
-      fail:
-	/* catches various errors, we need to make sure any valid dirty blocks
+fail:
+	/*
+	 * catches various errors, we need to make sure any valid dirty blocks
 	 * get to the media.  The page is currently locked and not marked for
 	 * writeback
 	 */
@@ -2533,8 +2712,8 @@
 			mark_buffer_async_write(bh);
 		} else {
 			/*
-			 * clear any dirty bits that might have come from getting
-			 * attached to a dirty page
+			 * clear any dirty bits that might have come from
+			 * getting attached to a dirty page
 			 */
 			clear_buffer_dirty(bh);
 		}
@@ -2614,15 +2793,18 @@
 	ret = __block_write_begin(page, pos, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
-		/* this gets a little ugly.  If reiserfs_get_block returned an
-		 * error and left a transacstion running, we've got to close it,
-		 * and we've got to free handle if it was a persistent transaction.
+		/*
+		 * this gets a little ugly.  If reiserfs_get_block returned an
+		 * error and left a transacstion running, we've got to close
+		 * it, and we've got to free handle if it was a persistent
+		 * transaction.
 		 *
 		 * But, if we had nested into an existing transaction, we need
 		 * to just drop the ref count on the handle.
 		 *
 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
-		 * and it was a persistent trans.  Otherwise, it was nested above.
+		 * and it was a persistent trans.  Otherwise, it was nested
+		 * above.
 		 */
 		if (th->t_refcount > old_ref) {
 			if (old_ref)
@@ -2671,15 +2853,18 @@
 	ret = __block_write_begin(page, from, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
-		/* this gets a little ugly.  If reiserfs_get_block returned an
-		 * error and left a transacstion running, we've got to close it,
-		 * and we've got to free handle if it was a persistent transaction.
+		/*
+		 * this gets a little ugly.  If reiserfs_get_block returned an
+		 * error and left a transacstion running, we've got to close
+		 * it, and we've got to free handle if it was a persistent
+		 * transaction.
 		 *
 		 * But, if we had nested into an existing transaction, we need
 		 * to just drop the ref count on the handle.
 		 *
 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
-		 * and it was a persistent trans.  Otherwise, it was nested above.
+		 * and it was a persistent trans.  Otherwise, it was nested
+		 * above.
 		 */
 		if (th->t_refcount > old_ref) {
 			if (old_ref)
@@ -2734,17 +2919,20 @@
 
 	reiserfs_commit_page(inode, page, start, start + copied);
 
-	/* generic_commit_write does this for us, but does not update the
-	 ** transaction tracking stuff when the size changes.  So, we have
-	 ** to do the i_size updates here.
+	/*
+	 * generic_commit_write does this for us, but does not update the
+	 * transaction tracking stuff when the size changes.  So, we have
+	 * to do the i_size updates here.
 	 */
 	if (pos + copied > inode->i_size) {
 		struct reiserfs_transaction_handle myth;
 		reiserfs_write_lock(inode->i_sb);
 		locked = true;
-		/* If the file have grown beyond the border where it
-		   can have a tail, unmark it as needing a tail
-		   packing */
+		/*
+		 * If the file have grown beyond the border where it
+		 * can have a tail, unmark it as needing a tail
+		 * packing
+		 */
 		if ((have_large_tails(inode->i_sb)
 		     && inode->i_size > i_block_size(inode) * 4)
 		    || (have_small_tails(inode->i_sb)
@@ -2759,13 +2947,13 @@
 		inode->i_size = pos + copied;
 		/*
 		 * this will just nest into our transaction.  It's important
-		 * to use mark_inode_dirty so the inode gets pushed around on the
-		 * dirty lists, and so that O_SYNC works as expected
+		 * to use mark_inode_dirty so the inode gets pushed around on
+		 * the dirty lists, and so that O_SYNC works as expected
 		 */
 		mark_inode_dirty(inode);
 		reiserfs_update_sd(&myth, inode);
 		update_sd = 1;
-		ret = journal_end(&myth, inode->i_sb, 1);
+		ret = journal_end(&myth);
 		if (ret)
 			goto journal_error;
 	}
@@ -2781,7 +2969,7 @@
 			goto out;
 	}
 
-      out:
+out:
 	if (locked)
 		reiserfs_write_unlock(inode->i_sb);
 	unlock_page(page);
@@ -2792,7 +2980,7 @@
 
 	return ret == 0 ? copied : ret;
 
-      journal_error:
+journal_error:
 	reiserfs_write_unlock(inode->i_sb);
 	locked = false;
 	if (th) {
@@ -2822,15 +3010,18 @@
 	}
 	reiserfs_commit_page(inode, page, from, to);
 
-	/* generic_commit_write does this for us, but does not update the
-	 ** transaction tracking stuff when the size changes.  So, we have
-	 ** to do the i_size updates here.
+	/*
+	 * generic_commit_write does this for us, but does not update the
+	 * transaction tracking stuff when the size changes.  So, we have
+	 * to do the i_size updates here.
 	 */
 	if (pos > inode->i_size) {
 		struct reiserfs_transaction_handle myth;
-		/* If the file have grown beyond the border where it
-		   can have a tail, unmark it as needing a tail
-		   packing */
+		/*
+		 * If the file have grown beyond the border where it
+		 * can have a tail, unmark it as needing a tail
+		 * packing
+		 */
 		if ((have_large_tails(inode->i_sb)
 		     && inode->i_size > i_block_size(inode) * 4)
 		    || (have_small_tails(inode->i_sb)
@@ -2845,13 +3036,13 @@
 		inode->i_size = pos;
 		/*
 		 * this will just nest into our transaction.  It's important
-		 * to use mark_inode_dirty so the inode gets pushed around on the
-		 * dirty lists, and so that O_SYNC works as expected
+		 * to use mark_inode_dirty so the inode gets pushed around
+		 * on the dirty lists, and so that O_SYNC works as expected
 		 */
 		mark_inode_dirty(inode);
 		reiserfs_update_sd(&myth, inode);
 		update_sd = 1;
-		ret = journal_end(&myth, inode->i_sb, 1);
+		ret = journal_end(&myth);
 		if (ret)
 			goto journal_error;
 	}
@@ -2863,10 +3054,10 @@
 			goto out;
 	}
 
-      out:
+out:
 	return ret;
 
-      journal_error:
+journal_error:
 	if (th) {
 		if (!update_sd)
 			reiserfs_update_sd(th, inode);
@@ -2924,9 +3115,10 @@
 	}
 }
 
-/* decide if this buffer needs to stay around for data logging or ordered
-** write purposes
-*/
+/*
+ * decide if this buffer needs to stay around for data logging or ordered
+ * write purposes
+ */
 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
 {
 	int ret = 1;
@@ -2937,7 +3129,8 @@
 	if (!buffer_mapped(bh)) {
 		goto free_jh;
 	}
-	/* the page is locked, and the only places that log a data buffer
+	/*
+	 * the page is locked, and the only places that log a data buffer
 	 * also lock the page.
 	 */
 	if (reiserfs_file_data_log(inode)) {
@@ -2952,7 +3145,8 @@
 		struct reiserfs_journal_list *jl;
 		struct reiserfs_jh *jh = bh->b_private;
 
-		/* why is this safe?
+		/*
+		 * why is this safe?
 		 * reiserfs_setattr updates i_size in the on disk
 		 * stat data before allowing vmtruncate to be called.
 		 *
@@ -2969,7 +3163,7 @@
 		    && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
 			ret = 0;
 	}
-      free_jh:
+free_jh:
 	if (ret && bh->b_private) {
 		reiserfs_free_jh(bh);
 	}
@@ -3028,7 +3222,7 @@
 		ret = try_to_release_page(page, 0);
 		/* maybe should BUG_ON(!ret); - neilb */
 	}
-      out:
+out:
 	return;
 }
 
@@ -3080,18 +3274,20 @@
 	return ret;
 }
 
-/* We thank Mingming Cao for helping us understand in great detail what
-   to do in this section of the code. */
+/*
+ * We thank Mingming Cao for helping us understand in great detail what
+ * to do in this section of the code.
+ */
 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
-				  const struct iovec *iov, loff_t offset,
-				  unsigned long nr_segs)
+				  struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  reiserfs_get_blocks_direct_io);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				 reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -3099,7 +3295,7 @@
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
 			truncate_setsize(inode, isize);
@@ -3127,8 +3323,9 @@
 		dquot_initialize(inode);
 	reiserfs_write_lock(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
-		/* version 2 items will be caught by the s_maxbytes check
-		 ** done for us in vmtruncate
+		/*
+		 * version 2 items will be caught by the s_maxbytes check
+		 * done for us in vmtruncate
 		 */
 		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
 		    attr->ia_size > MAX_NON_LFS) {
@@ -3149,7 +3346,7 @@
 				err = journal_begin(&th, inode->i_sb, 4);
 				if (!err) {
 					reiserfs_discard_prealloc(&th, inode);
-					err = journal_end(&th, inode->i_sb, 4);
+					err = journal_end(&th);
 				}
 				if (err)
 					error = err;
@@ -3189,7 +3386,10 @@
 		if (error)
 			return error;
 
-		/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
+		/*
+		 * (user+group)*(old+new) structure - we count quota
+		 * info and , inode write (sb, inode)
+		 */
 		reiserfs_write_lock(inode->i_sb);
 		error = journal_begin(&th, inode->i_sb, jbegin_count);
 		reiserfs_write_unlock(inode->i_sb);
@@ -3198,19 +3398,21 @@
 		error = dquot_transfer(inode, attr);
 		reiserfs_write_lock(inode->i_sb);
 		if (error) {
-			journal_end(&th, inode->i_sb, jbegin_count);
+			journal_end(&th);
 			reiserfs_write_unlock(inode->i_sb);
 			goto out;
 		}
 
-		/* Update corresponding info in inode so that everything is in
-		 * one transaction */
+		/*
+		 * Update corresponding info in inode so that everything
+		 * is in one transaction
+		 */
 		if (attr->ia_valid & ATTR_UID)
 			inode->i_uid = attr->ia_uid;
 		if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		mark_inode_dirty(inode);
-		error = journal_end(&th, inode->i_sb, jbegin_count);
+		error = journal_end(&th);
 		reiserfs_write_unlock(inode->i_sb);
 		if (error)
 			goto out;
@@ -3220,8 +3422,14 @@
 	    attr->ia_size != i_size_read(inode)) {
 		error = inode_newsize_ok(inode, attr->ia_size);
 		if (!error) {
+			/*
+			 * Could race against reiserfs_file_release
+			 * if called from NFS, so take tailpack mutex.
+			 */
+			mutex_lock(&REISERFS_I(inode)->tailpack);
 			truncate_setsize(inode, attr->ia_size);
-			reiserfs_vfs_truncate_file(inode);
+			reiserfs_truncate_file(inode, 1);
+			mutex_unlock(&REISERFS_I(inode)->tailpack);
 		}
 	}
 

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 946ccbf..501ed68 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c

@@ -15,7 +15,8 @@
  * reiserfs_ioctl - handler for ioctl for inode
  * supported commands:
  *  1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
- *                           and prevent packing file (argument arg has to be non-zero)
+ *                           and prevent packing file (argument arg has t
+ *			      be non-zero)
  *  2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
  *  3) That's all for a while ...
  */
@@ -132,7 +133,10 @@
 long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
-	/* These are just misnamed, they actually get/put from/to user an int */
+	/*
+	 * These are just misnamed, they actually
+	 * get/put from/to user an int
+	 */
 	switch (cmd) {
 	case REISERFS_IOC32_UNPACK:
 		cmd = REISERFS_IOC_UNPACK;
@@ -160,10 +164,10 @@
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
 /*
-** reiserfs_unpack
-** Function try to convert tail from direct item into indirect.
-** It set up nopack attribute in the REISERFS_I(inode)->nopack
-*/
+ * reiserfs_unpack
+ * Function try to convert tail from direct item into indirect.
+ * It set up nopack attribute in the REISERFS_I(inode)->nopack
+ */
 int reiserfs_unpack(struct inode *inode, struct file *filp)
 {
 	int retval = 0;
@@ -194,9 +198,10 @@
 		goto out;
 	}
 
-	/* we unpack by finding the page with the tail, and calling
-	 ** __reiserfs_write_begin on that page.  This will force a
-	 ** reiserfs_get_block to unpack the tail for us.
+	/*
+	 * we unpack by finding the page with the tail, and calling
+	 * __reiserfs_write_begin on that page.  This will force a
+	 * reiserfs_get_block to unpack the tail for us.
 	 */
 	index = inode->i_size >> PAGE_CACHE_SHIFT;
 	mapping = inode->i_mapping;
@@ -214,11 +219,11 @@
 	retval = reiserfs_commit_write(NULL, page, write_from, write_from);
 	REISERFS_I(inode)->i_flags |= i_nopack_mask;
 
-      out_unlock:
+out_unlock:
 	unlock_page(page);
 	page_cache_release(page);
 
-      out:
+out:
 	mutex_unlock(&inode->i_mutex);
 	reiserfs_write_unlock(inode->i_sb);
 	return retval;

diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index ee382ef..cfaee91 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c

@@ -5,15 +5,17 @@
 #include <linux/time.h>
 #include "reiserfs.h"
 
-// this contains item handlers for old item types: sd, direct,
-// indirect, directory
+/*
+ * this contains item handlers for old item types: sd, direct,
+ * indirect, directory
+ */
 
-/* and where are the comments? how about saying where we can find an
-   explanation of each item handler method? -Hans */
+/*
+ * and where are the comments? how about saying where we can find an
+ * explanation of each item handler method? -Hans
+ */
 
-//////////////////////////////////////////////////////////////////////////////
-// stat data functions
-//
+/* stat data functions */
 static int sd_bytes_number(struct item_head *ih, int block_size)
 {
 	return 0;
@@ -60,7 +62,7 @@
 
 static void sd_check_item(struct item_head *ih, char *item)
 {
-	// FIXME: type something here!
+	/* unused */
 }
 
 static int sd_create_vi(struct virtual_node *vn,
@@ -68,7 +70,6 @@
 			int is_affected, int insert_size)
 {
 	vi->vi_index = TYPE_STAT_DATA;
-	//vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
 	return 0;
 }
 
@@ -117,15 +118,13 @@
 	.print_vi = sd_print_vi
 };
 
-//////////////////////////////////////////////////////////////////////////////
-// direct item functions
-//
+/* direct item functions */
 static int direct_bytes_number(struct item_head *ih, int block_size)
 {
 	return ih_item_len(ih);
 }
 
-// FIXME: this should probably switch to indirect as well
+/* FIXME: this should probably switch to indirect as well */
 static void direct_decrement_key(struct cpu_key *key)
 {
 	cpu_key_k_offset_dec(key);
@@ -144,7 +143,7 @@
 {
 	int j = 0;
 
-//    return;
+/*    return; */
 	printk("\"");
 	while (j < ih_item_len(ih))
 		printk("%c", item[j++]);
@@ -153,7 +152,7 @@
 
 static void direct_check_item(struct item_head *ih, char *item)
 {
-	// FIXME: type something here!
+	/* unused */
 }
 
 static int direct_create_vi(struct virtual_node *vn,
@@ -161,7 +160,6 @@
 			    int is_affected, int insert_size)
 {
 	vi->vi_index = TYPE_DIRECT;
-	//vi->vi_type |= VI_TYPE_DIRECT;
 	return 0;
 }
 
@@ -211,16 +209,13 @@
 	.print_vi = direct_print_vi
 };
 
-//////////////////////////////////////////////////////////////////////////////
-// indirect item functions
-//
-
+/* indirect item functions */
 static int indirect_bytes_number(struct item_head *ih, int block_size)
 {
-	return ih_item_len(ih) / UNFM_P_SIZE * block_size;	//- get_ih_free_space (ih);
+	return ih_item_len(ih) / UNFM_P_SIZE * block_size;
 }
 
-// decrease offset, if it becomes 0, change type to stat data
+/* decrease offset, if it becomes 0, change type to stat data */
 static void indirect_decrement_key(struct cpu_key *key)
 {
 	cpu_key_k_offset_dec(key);
@@ -228,7 +223,7 @@
 		set_cpu_key_k_type(key, TYPE_STAT_DATA);
 }
 
-// if it is not first item of the body, then it is mergeable
+/* if it is not first item of the body, then it is mergeable */
 static int indirect_is_left_mergeable(struct reiserfs_key *key,
 				      unsigned long bsize)
 {
@@ -236,7 +231,7 @@
 	return (le_key_k_offset(version, key) != 1);
 }
 
-// printing of indirect item
+/* printing of indirect item */
 static void start_new_sequence(__u32 * start, int *len, __u32 new)
 {
 	*start = new;
@@ -295,7 +290,7 @@
 
 static void indirect_check_item(struct item_head *ih, char *item)
 {
-	// FIXME: type something here!
+	/* unused */
 }
 
 static int indirect_create_vi(struct virtual_node *vn,
@@ -303,7 +298,6 @@
 			      int is_affected, int insert_size)
 {
 	vi->vi_index = TYPE_INDIRECT;
-	//vi->vi_type |= VI_TYPE_INDIRECT;
 	return 0;
 }
 
@@ -321,16 +315,19 @@
 	return indirect_check_left(vi, free, 0, 0);
 }
 
-// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right)
+/*
+ * return size in bytes of 'units' units. If first == 0 - calculate
+ * from the head (left), otherwise - from tail (right)
+ */
 static int indirect_part_size(struct virtual_item *vi, int first, int units)
 {
-	// unit of indirect item is byte (yet)
+	/* unit of indirect item is byte (yet) */
 	return units;
 }
 
 static int indirect_unit_num(struct virtual_item *vi)
 {
-	// unit of indirect item is byte (yet)
+	/* unit of indirect item is byte (yet) */
 	return vi->vi_item_len - IH_SIZE;
 }
 
@@ -356,10 +353,7 @@
 	.print_vi = indirect_print_vi
 };
 
-//////////////////////////////////////////////////////////////////////////////
-// direntry functions
-//
-
+/* direntry functions */
 static int direntry_bytes_number(struct item_head *ih, int block_size)
 {
 	reiserfs_warning(NULL, "vs-16090",
@@ -396,7 +390,7 @@
 
 	deh = (struct reiserfs_de_head *)item;
 
-	for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
+	for (i = 0; i < ih_entry_count(ih); i++, deh++) {
 		namelen =
 		    (i ? (deh_location(deh - 1)) : ih_item_len(ih)) -
 		    deh_location(deh);
@@ -428,9 +422,9 @@
 	int i;
 	struct reiserfs_de_head *deh;
 
-	// FIXME: type something here!
+	/* unused */
 	deh = (struct reiserfs_de_head *)item;
-	for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
+	for (i = 0; i < ih_entry_count(ih); i++, deh++) {
 		;
 	}
 }
@@ -439,7 +433,8 @@
 
 /*
  * function returns old entry number in directory item in real node
- * using new entry number in virtual item in virtual node */
+ * using new entry number in virtual item in virtual node
+ */
 static inline int old_entry_num(int is_affected, int virtual_entry_num,
 				int pos_in_item, int mode)
 {
@@ -463,9 +458,11 @@
 	return virtual_entry_num - 1;
 }
 
-/* Create an array of sizes of directory entries for virtual
-   item. Return space used by an item. FIXME: no control over
-   consuming of space used by this item handler */
+/*
+ * Create an array of sizes of directory entries for virtual
+ * item. Return space used by an item. FIXME: no control over
+ * consuming of space used by this item handler
+ */
 static int direntry_create_vi(struct virtual_node *vn,
 			      struct virtual_item *vi,
 			      int is_affected, int insert_size)
@@ -494,8 +491,8 @@
 		j = old_entry_num(is_affected, i, vn->vn_pos_in_item,
 				  vn->vn_mode);
 		dir_u->entry_sizes[i] =
-		    (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) -
-		    deh_location(&(deh[j])) + DEH_SIZE;
+		    (j ? deh_location(&deh[j - 1]) : ih_item_len(vi->vi_ih)) -
+		    deh_location(&deh[j]) + DEH_SIZE;
 	}
 
 	size += (dir_u->entry_count * sizeof(short));
@@ -529,10 +526,10 @@
 
 }
 
-//
-// return number of entries which may fit into specified amount of
-// free space, or -1 if free space is not enough even for 1 entry
-//
+/*
+ * return number of entries which may fit into specified amount of
+ * free space, or -1 if free space is not enough even for 1 entry
+ */
 static int direntry_check_left(struct virtual_item *vi, int free,
 			       int start_skip, int end_skip)
 {
@@ -541,8 +538,8 @@
 	struct direntry_uarea *dir_u = vi->vi_uarea;
 
 	for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
+		/* i-th entry doesn't fit into the remaining free space */
 		if (dir_u->entry_sizes[i] > free)
-			/* i-th entry doesn't fit into the remaining free space */
 			break;
 
 		free -= dir_u->entry_sizes[i];
@@ -570,8 +567,8 @@
 	struct direntry_uarea *dir_u = vi->vi_uarea;
 
 	for (i = dir_u->entry_count - 1; i >= 0; i--) {
+		/* i-th entry doesn't fit into the remaining free space */
 		if (dir_u->entry_sizes[i] > free)
-			/* i-th entry doesn't fit into the remaining free space */
 			break;
 
 		free -= dir_u->entry_sizes[i];
@@ -643,9 +640,7 @@
 	.print_vi = direntry_print_vi
 };
 
-//////////////////////////////////////////////////////////////////////////////
-// Error catching functions to catch errors caused by incorrect item types.
-//
+/* Error catching functions to catch errors caused by incorrect item types. */
 static int errcatch_bytes_number(struct item_head *ih, int block_size)
 {
 	reiserfs_warning(NULL, "green-16001",
@@ -685,8 +680,12 @@
 {
 	reiserfs_warning(NULL, "green-16006",
 			 "Invalid item type observed, run fsck ASAP");
-	return 0;		// We might return -1 here as well, but it won't help as create_virtual_node() from where
-	// this operation is called from is of return type void.
+	/*
+	 * We might return -1 here as well, but it won't help as
+	 * create_virtual_node() from where this operation is called
+	 * from is of return type void.
+	 */
+	return 0;
 }
 
 static int errcatch_check_left(struct virtual_item *vi, int free,
@@ -739,9 +738,6 @@
 	errcatch_print_vi
 };
 
-//////////////////////////////////////////////////////////////////////////////
-//
-//
 #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
 #error Item types must use disk-format assigned values.
 #endif

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index fd77703..e8870de 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c

@@ -1,38 +1,38 @@
 /*
-** Write ahead logging implementation copyright Chris Mason 2000
-**
-** The background commits make this code very interrelated, and
-** overly complex.  I need to rethink things a bit....The major players:
-**
-** journal_begin -- call with the number of blocks you expect to log.
-**                  If the current transaction is too
-** 		    old, it will block until the current transaction is
-** 		    finished, and then start a new one.
-**		    Usually, your transaction will get joined in with
-**                  previous ones for speed.
-**
-** journal_join  -- same as journal_begin, but won't block on the current
-**                  transaction regardless of age.  Don't ever call
-**                  this.  Ever.  There are only two places it should be
-**                  called from, and they are both inside this file.
-**
-** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
-**                       that might make them get sent to disk
-**                       and then marks them BH_JDirty.  Puts the buffer head
-**                       into the current transaction hash.
-**
-** journal_end -- if the current transaction is batchable, it does nothing
-**                   otherwise, it could do an async/synchronous commit, or
-**                   a full flush of all log and real blocks in the
-**                   transaction.
-**
-** flush_old_commits -- if the current transaction is too old, it is ended and
-**                      commit blocks are sent to disk.  Forces commit blocks
-**                      to disk for all backgrounded commits that have been
-**                      around too long.
-**		     -- Note, if you call this as an immediate flush from
-**		        from within kupdate, it will ignore the immediate flag
-*/
+ * Write ahead logging implementation copyright Chris Mason 2000
+ *
+ * The background commits make this code very interrelated, and
+ * overly complex.  I need to rethink things a bit....The major players:
+ *
+ * journal_begin -- call with the number of blocks you expect to log.
+ *                  If the current transaction is too
+ *		    old, it will block until the current transaction is
+ *		    finished, and then start a new one.
+ *		    Usually, your transaction will get joined in with
+ *                  previous ones for speed.
+ *
+ * journal_join  -- same as journal_begin, but won't block on the current
+ *                  transaction regardless of age.  Don't ever call
+ *                  this.  Ever.  There are only two places it should be
+ *                  called from, and they are both inside this file.
+ *
+ * journal_mark_dirty -- adds blocks into this transaction.  clears any flags
+ *                       that might make them get sent to disk
+ *                       and then marks them BH_JDirty.  Puts the buffer head
+ *                       into the current transaction hash.
+ *
+ * journal_end -- if the current transaction is batchable, it does nothing
+ *                   otherwise, it could do an async/synchronous commit, or
+ *                   a full flush of all log and real blocks in the
+ *                   transaction.
+ *
+ * flush_old_commits -- if the current transaction is too old, it is ended and
+ *                      commit blocks are sent to disk.  Forces commit blocks
+ *                      to disk for all backgrounded commits that have been
+ *                      around too long.
+ *		     -- Note, if you call this as an immediate flush from
+ *		        from within kupdate, it will ignore the immediate flag
+ */
 
 #include <linux/time.h>
 #include <linux/semaphore.h>
@@ -58,23 +58,19 @@
 #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
                                j_working_list))
 
-/* the number of mounted filesystems.  This is used to decide when to
-** start and kill the commit workqueue
-*/
-static int reiserfs_mounted_fs_count;
-
-static struct workqueue_struct *commit_wq;
-
-#define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
-				   structs at 4k */
+/* must be correct to keep the desc and commit structs at 4k */
+#define JOURNAL_TRANS_HALF 1018
 #define BUFNR 64		/*read ahead */
 
 /* cnode stat bits.  Move these into reiserfs_fs.h */
 
-#define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
-#define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
+/* this block was freed, and can't be written.  */
+#define BLOCK_FREED 2
+/* this block was freed during this transaction, and can't be written */
+#define BLOCK_FREED_HOLDER 3
 
-#define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
+/* used in flush_journal_list */
+#define BLOCK_NEEDS_FLUSH 4
 #define BLOCK_DIRTIED 5
 
 /* journal list state bits */
@@ -87,16 +83,14 @@
 #define COMMIT_NOW  2		/* end and commit this transaction */
 #define WAIT        4		/* wait for the log blocks to hit the disk */
 
-static int do_journal_end(struct reiserfs_transaction_handle *,
-			  struct super_block *, unsigned long nblocks,
-			  int flags);
+static int do_journal_end(struct reiserfs_transaction_handle *, int flags);
 static int flush_journal_list(struct super_block *s,
 			      struct reiserfs_journal_list *jl, int flushall);
 static int flush_commit_list(struct super_block *s,
 			     struct reiserfs_journal_list *jl, int flushall);
 static int can_dirty(struct reiserfs_journal_cnode *cn);
 static int journal_join(struct reiserfs_transaction_handle *th,
-			struct super_block *sb, unsigned long nblocks);
+			struct super_block *sb);
 static void release_journal_dev(struct super_block *super,
 			       struct reiserfs_journal *journal);
 static int dirty_one_transaction(struct super_block *s,
@@ -107,8 +101,10 @@
 /* values for join in do_journal_begin_r */
 enum {
 	JBEGIN_REG = 0,		/* regular journal begin */
-	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
-	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
+	/* join the running transaction if at all possible */
+	JBEGIN_JOIN = 1,
+	/* called from cleanup code, ignores aborted flag */
+	JBEGIN_ABORT = 2,
 };
 
 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
@@ -123,10 +119,11 @@
 }
 
 /*
-** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
-** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
-** more details.
-*/
+ * clears BH_Dirty and sticks the buffer on the clean list.  Called because
+ * I can't allow refile_buffer to make schedule happen after I've freed a
+ * block.  Look at remove_from_transaction and journal_mark_freed for
+ * more details.
+ */
 static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
 {
 	if (bh) {
@@ -163,7 +160,7 @@
 	struct list_head *entry = journal->j_bitmap_nodes.next;
 
 	journal->j_used_bitmap_nodes++;
-      repeat:
+repeat:
 
 	if (entry != &journal->j_bitmap_nodes) {
 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
@@ -204,7 +201,8 @@
 			list_add(&bn->list, &journal->j_bitmap_nodes);
 			journal->j_free_bitmap_nodes++;
 		} else {
-			break;	/* this is ok, we'll try again when more are needed */
+			/* this is ok, we'll try again when more are needed */
+			break;
 		}
 	}
 }
@@ -239,8 +237,8 @@
 }
 
 /*
-** only call this on FS unmount.
-*/
+ * only call this on FS unmount.
+ */
 static int free_list_bitmaps(struct super_block *sb,
 			     struct reiserfs_list_bitmap *jb_array)
 {
@@ -275,9 +273,9 @@
 }
 
 /*
-** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
-** jb_array is the array to be filled in.
-*/
+ * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
+ * jb_array is the array to be filled in.
+ */
 int reiserfs_allocate_list_bitmaps(struct super_block *sb,
 				   struct reiserfs_list_bitmap *jb_array,
 				   unsigned int bmap_nr)
@@ -306,9 +304,9 @@
 }
 
 /*
-** find an available list bitmap.  If you can't find one, flush a commit list
-** and try again
-*/
+ * find an available list bitmap.  If you can't find one, flush a commit list
+ * and try again
+ */
 static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
 						    struct reiserfs_journal_list
 						    *jl)
@@ -332,18 +330,18 @@
 			break;
 		}
 	}
-	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
+	/* double check to make sure if flushed correctly */
+	if (jb->journal_list)
 		return NULL;
-	}
 	jb->journal_list = jl;
 	return jb;
 }
 
 /*
-** allocates a new chunk of X nodes, and links them all together as a list.
-** Uses the cnode->next and cnode->prev pointers
-** returns NULL on failure
-*/
+ * allocates a new chunk of X nodes, and links them all together as a list.
+ * Uses the cnode->next and cnode->prev pointers
+ * returns NULL on failure
+ */
 static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
 {
 	struct reiserfs_journal_cnode *head;
@@ -365,9 +363,7 @@
 	return head;
 }
 
-/*
-** pulls a cnode off the free list, or returns NULL on failure
-*/
+/* pulls a cnode off the free list, or returns NULL on failure */
 static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
 {
 	struct reiserfs_journal_cnode *cn;
@@ -393,8 +389,8 @@
 }
 
 /*
-** returns a cnode to the free list
-*/
+ * returns a cnode to the free list
+ */
 static void free_cnode(struct super_block *sb,
 		       struct reiserfs_journal_cnode *cn)
 {
@@ -419,7 +415,10 @@
 	clear_buffer_journal_restore_dirty(bh);
 }
 
-/* return a cnode with same dev, block number and size in table, or null if not found */
+/*
+ * return a cnode with same dev, block number and size in table,
+ * or null if not found
+ */
 static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 								  super_block
 								  *sb,
@@ -439,23 +438,24 @@
 }
 
 /*
-** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
-** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
-** being overwritten by a replay after crashing.
-**
-** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
-** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
-** sure you never write the block without logging it.
-**
-** next_zero_bit is a suggestion about the next block to try for find_forward.
-** when bl is rejected because it is set in a journal list bitmap, we search
-** for the next zero bit in the bitmap that rejected bl.  Then, we return that
-** through next_zero_bit for find_forward to try.
-**
-** Just because we return something in next_zero_bit does not mean we won't
-** reject it on the next call to reiserfs_in_journal
-**
-*/
+ * this actually means 'can this block be reallocated yet?'.  If you set
+ * search_all, a block can only be allocated if it is not in the current
+ * transaction, was not freed by the current transaction, and has no chance
+ * of ever being overwritten by a replay after crashing.
+ *
+ * If you don't set search_all, a block can only be allocated if it is not
+ * in the current transaction.  Since deleting a block removes it from the
+ * current transaction, this case should never happen.  If you don't set
+ * search_all, make sure you never write the block without logging it.
+ *
+ * next_zero_bit is a suggestion about the next block to try for find_forward.
+ * when bl is rejected because it is set in a journal list bitmap, we search
+ * for the next zero bit in the bitmap that rejected bl.  Then, we return
+ * that through next_zero_bit for find_forward to try.
+ *
+ * Just because we return something in next_zero_bit does not mean we won't
+ * reject it on the next call to reiserfs_in_journal
+ */
 int reiserfs_in_journal(struct super_block *sb,
 			unsigned int bmap_nr, int bit_nr, int search_all,
 			b_blocknr_t * next_zero_bit)
@@ -469,9 +469,11 @@
 	*next_zero_bit = 0;	/* always start this at zero. */
 
 	PROC_INFO_INC(sb, journal.in_journal);
-	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
-	 ** if we crash before the transaction that freed it commits,  this transaction won't
-	 ** have committed either, and the block will never be written
+	/*
+	 * If we aren't doing a search_all, this is a metablock, and it
+	 * will be logged before use.  if we crash before the transaction
+	 * that freed it commits,  this transaction won't have committed
+	 * either, and the block will never be written
 	 */
 	if (search_all) {
 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
@@ -511,8 +513,7 @@
 	return 0;
 }
 
-/* insert cn into table
-*/
+/* insert cn into table */
 static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 				       struct reiserfs_journal_cnode *cn)
 {
@@ -558,10 +559,10 @@
 }
 
 /*
-** this used to be much more involved, and I'm keeping it just in case things get ugly again.
-** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
-** transaction.
-*/
+ * this used to be much more involved, and I'm keeping it just in case
+ * things get ugly again.  it gets called by flush_commit_list, and
+ * cleans up any data stored about blocks freed during a transaction.
+ */
 static void cleanup_freed_for_journal_list(struct super_block *sb,
 					   struct reiserfs_journal_list *jl)
 {
@@ -756,11 +757,12 @@
 		jh = bh->b_private;
 		list_del_init(&jh->list);
 	} else {
-	      no_jh:
+no_jh:
 		get_bh(bh);
 		jh = alloc_jh();
 		spin_lock(&j->j_dirty_buffers_lock);
-		/* buffer must be locked for __add_jh, should be able to have
+		/*
+		 * buffer must be locked for __add_jh, should be able to have
 		 * two adds at the same time
 		 */
 		BUG_ON(bh->b_private);
@@ -818,7 +820,8 @@
 			spin_lock(lock);
 			goto loop_next;
 		}
-		/* in theory, dirty non-uptodate buffers should never get here,
+		/*
+		 * in theory, dirty non-uptodate buffers should never get here,
 		 * but the upper layer io error paths still have a few quirks.
 		 * Handle them here as gracefully as we can
 		 */
@@ -833,7 +836,7 @@
 			reiserfs_free_jh(bh);
 			unlock_buffer(bh);
 		}
-	      loop_next:
+loop_next:
 		put_bh(bh);
 		cond_resched_lock(lock);
 	}
@@ -856,13 +859,14 @@
 		if (!buffer_uptodate(bh)) {
 			ret = -EIO;
 		}
-		/* ugly interaction with invalidatepage here.
-		 * reiserfs_invalidate_page will pin any buffer that has a valid
-		 * journal head from an older transaction.  If someone else sets
-		 * our buffer dirty after we write it in the first loop, and
-		 * then someone truncates the page away, nobody will ever write
-		 * the buffer. We're safe if we write the page one last time
-		 * after freeing the journal header.
+		/*
+		 * ugly interaction with invalidatepage here.
+		 * reiserfs_invalidate_page will pin any buffer that has a
+		 * valid journal head from an older transaction.  If someone
+		 * else sets our buffer dirty after we write it in the first
+		 * loop, and then someone truncates the page away, nobody
+		 * will ever write the buffer. We're safe if we write the
+		 * page one last time after freeing the journal header.
 		 */
 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
 			spin_unlock(lock);
@@ -887,7 +891,7 @@
 	unsigned int other_trans_id;
 	unsigned int first_trans_id;
 
-      find_first:
+find_first:
 	/*
 	 * first we walk backwards to find the oldest uncommitted transation
 	 */
@@ -923,9 +927,11 @@
 				if (!journal_list_still_alive(s, trans_id))
 					return 1;
 
-				/* the one we just flushed is gone, this means all
-				 * older lists are also gone, so first_jl is no longer
-				 * valid either.  Go back to the beginning.
+				/*
+				 * the one we just flushed is gone, this means
+				 * all older lists are also gone, so first_jl
+				 * is no longer valid either.  Go back to the
+				 * beginning.
 				 */
 				if (!journal_list_still_alive
 				    (s, other_trans_id)) {
@@ -958,12 +964,12 @@
 }
 
 /*
-** if this journal list still has commit blocks unflushed, send them to disk.
-**
-** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
-** Before the commit block can by written, every other log block must be safely on disk
-**
-*/
+ * if this journal list still has commit blocks unflushed, send them to disk.
+ *
+ * log areas must be flushed in order (transaction 2 can't commit before
+ * transaction 1) Before the commit block can by written, every other log
+ * block must be safely on disk
+ */
 static int flush_commit_list(struct super_block *s,
 			     struct reiserfs_journal_list *jl, int flushall)
 {
@@ -982,8 +988,9 @@
 		return 0;
 	}
 
-	/* before we can put our commit blocks on disk, we have to make sure everyone older than
-	 ** us is on disk too
+	/*
+	 * before we can put our commit blocks on disk, we have to make
+	 * sure everyone older than us is on disk too
 	 */
 	BUG_ON(jl->j_len <= 0);
 	BUG_ON(trans_id == journal->j_trans_id);
@@ -991,7 +998,10 @@
 	get_journal_list(jl);
 	if (flushall) {
 		if (flush_older_commits(s, jl) == 1) {
-			/* list disappeared during flush_older_commits.  return */
+			/*
+			 * list disappeared during flush_older_commits.
+			 * return
+			 */
 			goto put_jl;
 		}
 	}
@@ -1006,9 +1016,9 @@
 	BUG_ON(jl->j_trans_id == 0);
 
 	/* this commit is done, exit */
-	if (atomic_read(&(jl->j_commit_left)) <= 0) {
+	if (atomic_read(&jl->j_commit_left) <= 0) {
 		if (flushall) {
-			atomic_set(&(jl->j_older_commits_done), 1);
+			atomic_set(&jl->j_older_commits_done, 1);
 		}
 		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
@@ -1063,9 +1073,10 @@
 		depth = reiserfs_write_unlock_nested(s);
 		__wait_on_buffer(tbh);
 		reiserfs_write_lock_nested(s, depth);
-		// since we're using ll_rw_blk above, it might have skipped over
-		// a locked buffer.  Double check here
-		//
+		/*
+		 * since we're using ll_rw_blk above, it might have skipped
+		 * over a locked buffer.  Double check here
+		 */
 		/* redundant, sync_dirty_buffer() checks */
 		if (buffer_dirty(tbh)) {
 			depth = reiserfs_write_unlock_nested(s);
@@ -1079,17 +1090,21 @@
 #endif
 			retval = -EIO;
 		}
-		put_bh(tbh);	/* once for journal_find_get_block */
-		put_bh(tbh);	/* once due to original getblk in do_journal_end */
-		atomic_dec(&(jl->j_commit_left));
+		/* once for journal_find_get_block */
+		put_bh(tbh);
+		/* once due to original getblk in do_journal_end */
+		put_bh(tbh);
+		atomic_dec(&jl->j_commit_left);
 	}
 
-	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
+	BUG_ON(atomic_read(&jl->j_commit_left) != 1);
 
-	/* If there was a write error in the journal - we can't commit
+	/*
+	 * If there was a write error in the journal - we can't commit
 	 * this transaction - it will be invalid and, if successful,
 	 * will just end up propagating the write error out to
-	 * the file system. */
+	 * the file system.
+	 */
 	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
 		if (buffer_dirty(jl->j_commit_bh))
 			BUG();
@@ -1102,9 +1117,11 @@
 		reiserfs_write_lock_nested(s, depth);
 	}
 
-	/* If there was a write error in the journal - we can't commit this
+	/*
+	 * If there was a write error in the journal - we can't commit this
 	 * transaction - it will be invalid and, if successful, will just end
-	 * up propagating the write error out to the filesystem. */
+	 * up propagating the write error out to the filesystem.
+	 */
 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
 		reiserfs_warning(s, "journal-615", "buffer write failed");
@@ -1119,7 +1136,10 @@
 	}
 	journal->j_last_commit_id = jl->j_trans_id;
 
-	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
+	/*
+	 * now, every commit block is on the disk.  It is safe to allow
+	 * blocks freed during this transaction to be reallocated
+	 */
 	cleanup_freed_for_journal_list(s, jl);
 
 	retval = retval ? retval : journal->j_errno;
@@ -1127,13 +1147,13 @@
 	/* mark the metadata dirty */
 	if (!retval)
 		dirty_one_transaction(s, jl);
-	atomic_dec(&(jl->j_commit_left));
+	atomic_dec(&jl->j_commit_left);
 
 	if (flushall) {
-		atomic_set(&(jl->j_older_commits_done), 1);
+		atomic_set(&jl->j_older_commits_done, 1);
 	}
 	mutex_unlock(&jl->j_commit_mutex);
-      put_jl:
+put_jl:
 	put_journal_list(s, jl);
 
 	if (retval)
@@ -1143,9 +1163,9 @@
 }
 
 /*
-** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
-** returns NULL if it can't find anything
-*/
+ * flush_journal_list frequently needs to find a newer transaction for a
+ * given block.  This does that, or returns NULL if it can't find anything
+ */
 static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
 							  reiserfs_journal_cnode
 							  *cn)
@@ -1169,10 +1189,11 @@
 				int);
 
 /*
-** once all the real blocks have been flushed, it is safe to remove them from the
-** journal list for this transaction.  Aside from freeing the cnode, this also allows the
-** block to be reallocated for data blocks if it had been deleted.
-*/
+ * once all the real blocks have been flushed, it is safe to remove them
+ * from the journal list for this transaction.  Aside from freeing the
+ * cnode, this also allows the block to be reallocated for data blocks
+ * if it had been deleted.
+ */
 static void remove_all_from_journal_list(struct super_block *sb,
 					 struct reiserfs_journal_list *jl,
 					 int debug)
@@ -1181,8 +1202,9 @@
 	struct reiserfs_journal_cnode *cn, *last;
 	cn = jl->j_realblock;
 
-	/* which is better, to lock once around the whole loop, or
-	 ** to lock for each call to remove_journal_hash?
+	/*
+	 * which is better, to lock once around the whole loop, or
+	 * to lock for each call to remove_journal_hash?
 	 */
 	while (cn) {
 		if (cn->blocknr != 0) {
@@ -1204,12 +1226,13 @@
 }
 
 /*
-** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
-** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
-** releasing blocks in this transaction for reuse as data blocks.
-** called by flush_journal_list, before it calls remove_all_from_journal_list
-**
-*/
+ * if this timestamp is greater than the timestamp we wrote last to the
+ * header block, write it to the header block.  once this is done, I can
+ * safely say the log area for this transaction won't ever be replayed,
+ * and I can start releasing blocks in this transaction for reuse as data
+ * blocks.  called by flush_journal_list, before it calls
+ * remove_all_from_journal_list
+ */
 static int _update_journal_header_block(struct super_block *sb,
 					unsigned long offset,
 					unsigned int trans_id)
@@ -1279,10 +1302,11 @@
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	unsigned int trans_id = jl->j_trans_id;
 
-	/* we know we are the only ones flushing things, no extra race
+	/*
+	 * we know we are the only ones flushing things, no extra race
 	 * protection is required.
 	 */
-      restart:
+restart:
 	entry = journal->j_journal_list.next;
 	/* Did we wrap? */
 	if (entry == &journal->j_journal_list)
@@ -1309,15 +1333,16 @@
 	}
 }
 
-/* flush a journal list, both commit and real blocks
-**
-** always set flushall to 1, unless you are calling from inside
-** flush_journal_list
-**
-** IMPORTANT.  This can only be called while there are no journal writers,
-** and the journal is locked.  That means it can only be called from
-** do_journal_end, or by journal_release
-*/
+/*
+ * flush a journal list, both commit and real blocks
+ *
+ * always set flushall to 1, unless you are calling from inside
+ * flush_journal_list
+ *
+ * IMPORTANT.  This can only be called while there are no journal writers,
+ * and the journal is locked.  That means it can only be called from
+ * do_journal_end, or by journal_release
+ */
 static int flush_journal_list(struct super_block *s,
 			      struct reiserfs_journal_list *jl, int flushall)
 {
@@ -1354,13 +1379,14 @@
 	}
 
 	/* if all the work is already done, get out of here */
-	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
-	    atomic_read(&(jl->j_commit_left)) <= 0) {
+	if (atomic_read(&jl->j_nonzerolen) <= 0 &&
+	    atomic_read(&jl->j_commit_left) <= 0) {
 		goto flush_older_and_return;
 	}
 
-	/* start by putting the commit list on disk.  This will also flush
-	 ** the commit lists of any olders transactions
+	/*
+	 * start by putting the commit list on disk.  This will also flush
+	 * the commit lists of any olders transactions
 	 */
 	flush_commit_list(s, jl, 1);
 
@@ -1369,15 +1395,16 @@
 		BUG();
 
 	/* are we done now? */
-	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
-	    atomic_read(&(jl->j_commit_left)) <= 0) {
+	if (atomic_read(&jl->j_nonzerolen) <= 0 &&
+	    atomic_read(&jl->j_commit_left) <= 0) {
 		goto flush_older_and_return;
 	}
 
-	/* loop through each cnode, see if we need to write it,
-	 ** or wait on a more recent transaction, or just ignore it
+	/*
+	 * loop through each cnode, see if we need to write it,
+	 * or wait on a more recent transaction, or just ignore it
 	 */
-	if (atomic_read(&(journal->j_wcount)) != 0) {
+	if (atomic_read(&journal->j_wcount) != 0) {
 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
 			       "wcount is not 0");
 	}
@@ -1391,20 +1418,25 @@
 			goto free_cnode;
 		}
 
-		/* This transaction failed commit. Don't write out to the disk */
+		/*
+		 * This transaction failed commit.
+		 * Don't write out to the disk
+		 */
 		if (!(jl->j_state & LIST_DIRTY))
 			goto free_cnode;
 
 		pjl = find_newer_jl_for_cn(cn);
-		/* the order is important here.  We check pjl to make sure we
-		 ** don't clear BH_JDirty_wait if we aren't the one writing this
-		 ** block to disk
+		/*
+		 * the order is important here.  We check pjl to make sure we
+		 * don't clear BH_JDirty_wait if we aren't the one writing this
+		 * block to disk
 		 */
 		if (!pjl && cn->bh) {
 			saved_bh = cn->bh;
 
-			/* we do this to make sure nobody releases the buffer while
-			 ** we are working with it
+			/*
+			 * we do this to make sure nobody releases the
+			 * buffer while we are working with it
 			 */
 			get_bh(saved_bh);
 
@@ -1413,13 +1445,17 @@
 				was_jwait = 1;
 				was_dirty = 1;
 			} else if (can_dirty(cn)) {
-				/* everything with !pjl && jwait should be writable */
+				/*
+				 * everything with !pjl && jwait
+				 * should be writable
+				 */
 				BUG();
 			}
 		}
 
-		/* if someone has this block in a newer transaction, just make
-		 ** sure they are committed, and don't try writing it to disk
+		/*
+		 * if someone has this block in a newer transaction, just make
+		 * sure they are committed, and don't try writing it to disk
 		 */
 		if (pjl) {
 			if (atomic_read(&pjl->j_commit_left))
@@ -1427,16 +1463,18 @@
 			goto free_cnode;
 		}
 
-		/* bh == NULL when the block got to disk on its own, OR,
-		 ** the block got freed in a future transaction
+		/*
+		 * bh == NULL when the block got to disk on its own, OR,
+		 * the block got freed in a future transaction
 		 */
 		if (saved_bh == NULL) {
 			goto free_cnode;
 		}
 
-		/* this should never happen.  kupdate_one_transaction has this list
-		 ** locked while it works, so we should never see a buffer here that
-		 ** is not marked JDirty_wait
+		/*
+		 * this should never happen.  kupdate_one_transaction has
+		 * this list locked while it works, so we should never see a
+		 * buffer here that is not marked JDirty_wait
 		 */
 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
 			reiserfs_warning(s, "journal-813",
@@ -1447,7 +1485,10 @@
 					 was_jwait ? ' ' : '!');
 		}
 		if (was_dirty) {
-			/* we inc again because saved_bh gets decremented at free_cnode */
+			/*
+			 * we inc again because saved_bh gets decremented
+			 * at free_cnode
+			 */
 			get_bh(saved_bh);
 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
 			lock_buffer(saved_bh);
@@ -1463,13 +1504,16 @@
 					 (unsigned long long)saved_bh->
 					 b_blocknr, __func__);
 		}
-	      free_cnode:
+free_cnode:
 		last = cn;
 		cn = cn->next;
 		if (saved_bh) {
-			/* we incremented this to keep others from taking the buffer head away */
+			/*
+			 * we incremented this to keep others from
+			 * taking the buffer head away
+			 */
 			put_bh(saved_bh);
-			if (atomic_read(&(saved_bh->b_count)) < 0) {
+			if (atomic_read(&saved_bh->b_count) < 0) {
 				reiserfs_warning(s, "journal-945",
 						 "saved_bh->b_count < 0");
 			}
@@ -1499,8 +1543,10 @@
 #endif
 					err = -EIO;
 				}
-				/* note, we must clear the JDirty_wait bit after the up to date
-				 ** check, otherwise we race against our flushpage routine
+				/*
+				 * note, we must clear the JDirty_wait bit
+				 * after the up to date check, otherwise we
+				 * race against our flushpage routine
 				 */
 				BUG_ON(!test_clear_buffer_journal_dirty
 				       (cn->bh));
@@ -1518,25 +1564,27 @@
 		reiserfs_abort(s, -EIO,
 			       "Write error while pushing transaction to disk in %s",
 			       __func__);
-      flush_older_and_return:
+flush_older_and_return:
 
-	/* before we can update the journal header block, we _must_ flush all
-	 ** real blocks from all older transactions to disk.  This is because
-	 ** once the header block is updated, this transaction will not be
-	 ** replayed after a crash
+	/*
+	 * before we can update the journal header block, we _must_ flush all
+	 * real blocks from all older transactions to disk.  This is because
+	 * once the header block is updated, this transaction will not be
+	 * replayed after a crash
 	 */
 	if (flushall) {
 		flush_older_journal_lists(s, jl);
 	}
 
 	err = journal->j_errno;
-	/* before we can remove everything from the hash tables for this
-	 ** transaction, we must make sure it can never be replayed
-	 **
-	 ** since we are only called from do_journal_end, we know for sure there
-	 ** are no allocations going on while we are flushing journal lists.  So,
-	 ** we only need to update the journal header block for the last list
-	 ** being flushed
+	/*
+	 * before we can remove everything from the hash tables for this
+	 * transaction, we must make sure it can never be replayed
+	 *
+	 * since we are only called from do_journal_end, we know for sure there
+	 * are no allocations going on while we are flushing journal lists.  So,
+	 * we only need to update the journal header block for the last list
+	 * being flushed
 	 */
 	if (!err && flushall) {
 		err =
@@ -1561,11 +1609,12 @@
 	}
 	journal->j_last_flush_id = jl->j_trans_id;
 
-	/* not strictly required since we are freeing the list, but it should
+	/*
+	 * not strictly required since we are freeing the list, but it should
 	 * help find code using dead lists later on
 	 */
 	jl->j_len = 0;
-	atomic_set(&(jl->j_nonzerolen), 0);
+	atomic_set(&jl->j_nonzerolen, 0);
 	jl->j_start = 0;
 	jl->j_realblock = NULL;
 	jl->j_commit_bh = NULL;
@@ -1592,15 +1641,17 @@
 
 	cn = jl->j_realblock;
 	while (cn) {
-		/* if the blocknr == 0, this has been cleared from the hash,
-		 ** skip it
+		/*
+		 * if the blocknr == 0, this has been cleared from the hash,
+		 * skip it
 		 */
 		if (cn->blocknr == 0) {
 			goto next;
 		}
 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
 			struct buffer_head *tmp_bh;
-			/* we can race against journal_mark_freed when we try
+			/*
+			 * we can race against journal_mark_freed when we try
 			 * to lock_buffer(cn->bh), so we have to inc the buffer
 			 * count, and recheck things after locking
 			 */
@@ -1619,7 +1670,7 @@
 			}
 			put_bh(tmp_bh);
 		}
-	      next:
+next:
 		cn = cn->next;
 		cond_resched();
 	}
@@ -1637,15 +1688,17 @@
 	jl->j_state |= LIST_DIRTY;
 	cn = jl->j_realblock;
 	while (cn) {
-		/* look for a more recent transaction that logged this
-		 ** buffer.  Only the most recent transaction with a buffer in
-		 ** it is allowed to send that buffer to disk
+		/*
+		 * look for a more recent transaction that logged this
+		 * buffer.  Only the most recent transaction with a buffer in
+		 * it is allowed to send that buffer to disk
 		 */
 		pjl = find_newer_jl_for_cn(cn);
 		if (!pjl && cn->blocknr && cn->bh
 		    && buffer_journal_dirty(cn->bh)) {
 			BUG_ON(!can_dirty(cn));
-			/* if the buffer is prepared, it will either be logged
+			/*
+			 * if the buffer is prepared, it will either be logged
 			 * or restored.  If restored, we need to make sure
 			 * it actually gets marked dirty
 			 */
@@ -1682,7 +1735,8 @@
 		goto done;
 	}
 
-	/* we've got j_flush_mutex held, nobody is going to delete any
+	/*
+	 * we've got j_flush_mutex held, nobody is going to delete any
 	 * of these lists out from underneath us
 	 */
 	while ((num_trans && transactions_flushed < num_trans) ||
@@ -1716,20 +1770,21 @@
 		write_chunk(&chunk);
 	}
 
-      done:
+done:
 	mutex_unlock(&journal->j_flush_mutex);
 	return ret;
 }
 
-/* for o_sync and fsync heavy applications, they tend to use
-** all the journa list slots with tiny transactions.  These
-** trigger lots and lots of calls to update the header block, which
-** adds seeks and slows things down.
-**
-** This function tries to clear out a large chunk of the journal lists
-** at once, which makes everything faster since only the newest journal
-** list updates the header block
-*/
+/*
+ * for o_sync and fsync heavy applications, they tend to use
+ * all the journa list slots with tiny transactions.  These
+ * trigger lots and lots of calls to update the header block, which
+ * adds seeks and slows things down.
+ *
+ * This function tries to clear out a large chunk of the journal lists
+ * at once, which makes everything faster since only the newest journal
+ * list updates the header block
+ */
 static int flush_used_journal_lists(struct super_block *s,
 				    struct reiserfs_journal_list *jl)
 {
@@ -1766,9 +1821,11 @@
 	}
 	get_journal_list(jl);
 	get_journal_list(flush_jl);
-	/* try to find a group of blocks we can flush across all the
-	 ** transactions, but only bother if we've actually spanned
-	 ** across multiple lists
+
+	/*
+	 * try to find a group of blocks we can flush across all the
+	 * transactions, but only bother if we've actually spanned
+	 * across multiple lists
 	 */
 	if (flush_jl != jl) {
 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
@@ -1780,9 +1837,9 @@
 }
 
 /*
-** removes any nodes in table with name block and dev as bh.
-** only touchs the hnext and hprev pointers.
-*/
+ * removes any nodes in table with name block and dev as bh.
+ * only touchs the hnext and hprev pointers.
+ */
 void remove_journal_hash(struct super_block *sb,
 			 struct reiserfs_journal_cnode **table,
 			 struct reiserfs_journal_list *jl,
@@ -1811,8 +1868,12 @@
 			cur->blocknr = 0;
 			cur->sb = NULL;
 			cur->state = 0;
-			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
-				atomic_dec(&(cur->jlist->j_nonzerolen));
+			/*
+			 * anybody who clears the cur->bh will also
+			 * dec the nonzerolen
+			 */
+			if (cur->bh && cur->jlist)
+				atomic_dec(&cur->jlist->j_nonzerolen);
 			cur->bh = NULL;
 			cur->jlist = NULL;
 		}
@@ -1832,17 +1893,18 @@
 	if (journal->j_header_bh) {
 		brelse(journal->j_header_bh);
 	}
-	/* j_header_bh is on the journal dev, make sure not to release the journal
-	 * dev until we brelse j_header_bh
+	/*
+	 * j_header_bh is on the journal dev, make sure
+	 * not to release the journal dev until we brelse j_header_bh
 	 */
 	release_journal_dev(sb, journal);
 	vfree(journal);
 }
 
 /*
-** call on unmount.  Only set error to 1 if you haven't made your way out
-** of read_super() yet.  Any other caller must keep error at 0.
-*/
+ * call on unmount.  Only set error to 1 if you haven't made your way out
+ * of read_super() yet.  Any other caller must keep error at 0.
+ */
 static int do_journal_release(struct reiserfs_transaction_handle *th,
 			      struct super_block *sb, int error)
 {
@@ -1850,21 +1912,25 @@
 	int flushed = 0;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
-	/* we only want to flush out transactions if we were called with error == 0
+	/*
+	 * we only want to flush out transactions if we were
+	 * called with error == 0
 	 */
 	if (!error && !(sb->s_flags & MS_RDONLY)) {
 		/* end the current trans */
 		BUG_ON(!th->t_trans_id);
-		do_journal_end(th, sb, 10, FLUSH_ALL);
+		do_journal_end(th, FLUSH_ALL);
 
-		/* make sure something gets logged to force our way into the flush code */
-		if (!journal_join(&myth, sb, 1)) {
+		/*
+		 * make sure something gets logged to force
+		 * our way into the flush code
+		 */
+		if (!journal_join(&myth, sb)) {
 			reiserfs_prepare_for_journal(sb,
 						     SB_BUFFER_WITH_SB(sb),
 						     1);
-			journal_mark_dirty(&myth, sb,
-					   SB_BUFFER_WITH_SB(sb));
-			do_journal_end(&myth, sb, 1, FLUSH_ALL);
+			journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
+			do_journal_end(&myth, FLUSH_ALL);
 			flushed = 1;
 		}
 	}
@@ -1872,17 +1938,15 @@
 	/* this also catches errors during the do_journal_end above */
 	if (!error && reiserfs_is_journal_aborted(journal)) {
 		memset(&myth, 0, sizeof(myth));
-		if (!journal_join_abort(&myth, sb, 1)) {
+		if (!journal_join_abort(&myth, sb)) {
 			reiserfs_prepare_for_journal(sb,
 						     SB_BUFFER_WITH_SB(sb),
 						     1);
-			journal_mark_dirty(&myth, sb,
-					   SB_BUFFER_WITH_SB(sb));
-			do_journal_end(&myth, sb, 1, FLUSH_ALL);
+			journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
+			do_journal_end(&myth, FLUSH_ALL);
 		}
 	}
 
-	reiserfs_mounted_fs_count--;
 	/* wait for all commits to finish */
 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
 
@@ -1893,12 +1957,7 @@
 	reiserfs_write_unlock(sb);
 
 	cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
-	flush_workqueue(commit_wq);
-
-	if (!reiserfs_mounted_fs_count) {
-		destroy_workqueue(commit_wq);
-		commit_wq = NULL;
-	}
+	flush_workqueue(REISERFS_SB(sb)->commit_wq);
 
 	free_journal_ram(sb);
 
@@ -1907,25 +1966,24 @@
 	return 0;
 }
 
-/*
-** call on unmount.  flush all journal trans, release all alloc'd ram
-*/
+/* * call on unmount.  flush all journal trans, release all alloc'd ram */
 int journal_release(struct reiserfs_transaction_handle *th,
 		    struct super_block *sb)
 {
 	return do_journal_release(th, sb, 0);
 }
 
-/*
-** only call from an error condition inside reiserfs_read_super!
-*/
+/* only call from an error condition inside reiserfs_read_super!  */
 int journal_release_error(struct reiserfs_transaction_handle *th,
 			  struct super_block *sb)
 {
 	return do_journal_release(th, sb, 1);
 }
 
-/* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
+/*
+ * compares description block with commit block.
+ * returns 1 if they differ, 0 if they are the same
+ */
 static int journal_compare_desc_commit(struct super_block *sb,
 				       struct reiserfs_journal_desc *desc,
 				       struct reiserfs_journal_commit *commit)
@@ -1939,11 +1997,12 @@
 	return 0;
 }
 
-/* returns 0 if it did not find a description block
-** returns -1 if it found a corrupt commit block
-** returns 1 if both desc and commit were valid
-** NOTE: only called during fs mount
-*/
+/*
+ * returns 0 if it did not find a description block
+ * returns -1 if it found a corrupt commit block
+ * returns 1 if both desc and commit were valid
+ * NOTE: only called during fs mount
+ */
 static int journal_transaction_is_valid(struct super_block *sb,
 					struct buffer_head *d_bh,
 					unsigned int *oldest_invalid_trans_id,
@@ -1989,7 +2048,10 @@
 		}
 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
 
-		/* ok, we have a journal description block, lets see if the transaction was valid */
+		/*
+		 * ok, we have a journal description block,
+		 * let's see if the transaction was valid
+		 */
 		c_bh =
 		    journal_bread(sb,
 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
@@ -2041,11 +2103,11 @@
 }
 
 /*
-** given the start, and values for the oldest acceptable transactions,
-** this either reads in a replays a transaction, or returns because the
-** transaction is invalid, or too old.
-** NOTE: only called during fs mount
-*/
+ * given the start, and values for the oldest acceptable transactions,
+ * this either reads in a replays a transaction, or returns because the
+ * transaction is invalid, or too old.
+ * NOTE: only called during fs mount
+ */
 static int journal_read_transaction(struct super_block *sb,
 				    unsigned long cur_dblock,
 				    unsigned long oldest_start,
@@ -2119,7 +2181,10 @@
 	}
 
 	trans_id = get_desc_trans_id(desc);
-	/* now we know we've got a good transaction, and it was inside the valid time ranges */
+	/*
+	 * now we know we've got a good transaction, and it was
+	 * inside the valid time ranges
+	 */
 	log_blocks = kmalloc(get_desc_trans_len(desc) *
 			     sizeof(struct buffer_head *), GFP_NOFS);
 	real_blocks = kmalloc(get_desc_trans_len(desc) *
@@ -2164,7 +2229,7 @@
 			reiserfs_warning(sb, "journal-1204",
 					 "REPLAY FAILURE fsck required! "
 					 "Trying to replay onto a log block");
-		      abort_replay:
+abort_replay:
 			brelse_array(log_blocks, i);
 			brelse_array(real_blocks, i);
 			brelse(c_bh);
@@ -2226,7 +2291,10 @@
 		       "journal-1095: setting journal " "start to offset %ld",
 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
 
-	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
+	/*
+	 * init starting values for the first transaction, in case
+	 * this is the last transaction to be replayed.
+	 */
 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
 	journal->j_last_flush_trans_id = trans_id;
 	journal->j_trans_id = trans_id + 1;
@@ -2240,12 +2308,14 @@
 	return 0;
 }
 
-/* This function reads blocks starting from block and to max_block of bufsize
-   size (but no more than BUFNR blocks at a time). This proved to improve
-   mounting speed on self-rebuilding raid5 arrays at least.
-   Right now it is only used from journal code. But later we might use it
-   from other places.
-   Note: Do not use journal_getblk/sb_getblk functions here! */
+/*
+ * This function reads blocks starting from block and to max_block of bufsize
+ * size (but no more than BUFNR blocks at a time). This proved to improve
+ * mounting speed on self-rebuilding raid5 arrays at least.
+ * Right now it is only used from journal code. But later we might use it
+ * from other places.
+ * Note: Do not use journal_getblk/sb_getblk functions here!
+ */
 static struct buffer_head *reiserfs_breada(struct block_device *dev,
 					   b_blocknr_t block, int bufsize,
 					   b_blocknr_t max_block)
@@ -2284,15 +2354,17 @@
 }
 
 /*
-** read and replay the log
-** on a clean unmount, the journal header's next unflushed pointer will
-** be to an invalid transaction.  This tests that before finding all the
-** transactions in the log, which makes normal mount times fast.
-** After a crash, this starts with the next unflushed transaction, and
-** replays until it finds one too old, or invalid.
-** On exit, it sets things up so the first transaction will work correctly.
-** NOTE: only called during fs mount
-*/
+ * read and replay the log
+ * on a clean unmount, the journal header's next unflushed pointer will be
+ * to an invalid transaction.  This tests that before finding all the
+ * transactions in the log, which makes normal mount times fast.
+ *
+ * After a crash, this starts with the next unflushed transaction, and
+ * replays until it finds one too old, or invalid.
+ *
+ * On exit, it sets things up so the first transaction will work correctly.
+ * NOTE: only called during fs mount
+ */
 static int journal_read(struct super_block *sb)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
@@ -2316,9 +2388,10 @@
 		      bdevname(journal->j_dev_bd, b));
 	start = get_seconds();
 
-	/* step 1, read in the journal header block.  Check the transaction it says
-	 ** is the first unflushed, and if that transaction is not valid,
-	 ** replay is done
+	/*
+	 * step 1, read in the journal header block.  Check the transaction
+	 * it says is the first unflushed, and if that transaction is not
+	 * valid, replay is done
 	 */
 	journal->j_header_bh = journal_bread(sb,
 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
@@ -2342,9 +2415,10 @@
 			       le32_to_cpu(jh->j_last_flush_trans_id));
 		valid_journal_header = 1;
 
-		/* now, we try to read the first unflushed offset.  If it is not valid,
-		 ** there is nothing more we can do, and it makes no sense to read
-		 ** through the whole log.
+		/*
+		 * now, we try to read the first unflushed offset.  If it
+		 * is not valid, there is nothing more we can do, and it
+		 * makes no sense to read through the whole log.
 		 */
 		d_bh =
 		    journal_bread(sb,
@@ -2358,15 +2432,19 @@
 		goto start_log_replay;
 	}
 
-	/* ok, there are transactions that need to be replayed.  start with the first log block, find
-	 ** all the valid transactions, and pick out the oldest.
+	/*
+	 * ok, there are transactions that need to be replayed.  start
+	 * with the first log block, find all the valid transactions, and
+	 * pick out the oldest.
 	 */
 	while (continue_replay
 	       && cur_dblock <
 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
 		SB_ONDISK_JOURNAL_SIZE(sb))) {
-		/* Note that it is required for blocksize of primary fs device and journal
-		   device to be the same */
+		/*
+		 * Note that it is required for blocksize of primary fs
+		 * device and journal device to be the same
+		 */
 		d_bh =
 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
 				    sb->s_blocksize,
@@ -2413,7 +2491,7 @@
 		brelse(d_bh);
 	}
 
-      start_log_replay:
+start_log_replay:
 	cur_dblock = oldest_start;
 	if (oldest_trans_id) {
 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
@@ -2444,9 +2522,11 @@
 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
 			       "journal-1225: No valid " "transactions found");
 	}
-	/* j_start does not get set correctly if we don't replay any transactions.
-	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
-	 ** copy the trans_id from the header
+	/*
+	 * j_start does not get set correctly if we don't replay any
+	 * transactions.  if we had a valid journal_header, set j_start
+	 * to the first unflushed transaction value, copy the trans_id
+	 * from the header
 	 */
 	if (valid_journal_header && replay_count == 0) {
 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
@@ -2475,8 +2555,9 @@
 	    _update_journal_header_block(sb, journal->j_start,
 					 journal->j_last_flush_trans_id)) {
 		reiserfs_write_unlock(sb);
-		/* replay failed, caller must call free_journal_ram and abort
-		 ** the mount
+		/*
+		 * replay failed, caller must call free_journal_ram and abort
+		 * the mount
 		 */
 		return -1;
 	}
@@ -2569,7 +2650,7 @@
 	return 0;
 }
 
-/**
+/*
  * When creating/tuning a file system user can assign some
  * journal params within boundaries which depend on the ratio
  * blocksize/standard_blocksize.
@@ -2587,8 +2668,7 @@
 				     struct reiserfs_journal *journal)
 {
         if (journal->j_trans_max) {
-	        /* Non-default journal params.
-		   Do sanity check for them. */
+		/* Non-default journal params.  Do sanity check for them. */
 	        int ratio = 1;
 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
@@ -2610,10 +2690,12 @@
 			return 1;
 		}
 	} else {
-		/* Default journal params.
-                   The file system was created by old version
-		   of mkreiserfs, so some fields contain zeros,
-		   and we need to advise proper values for them */
+		/*
+		 * Default journal params.
+		 * The file system was created by old version
+		 * of mkreiserfs, so some fields contain zeros,
+		 * and we need to advise proper values for them
+		 */
 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
 					 sb->s_blocksize);
@@ -2626,9 +2708,7 @@
 	return 0;
 }
 
-/*
-** must be called once on fs mount.  calls journal_read for you
-*/
+/* must be called once on fs mount.  calls journal_read for you */
 int journal_init(struct super_block *sb, const char *j_dev_name,
 		 int old_format, unsigned int commit_max_age)
 {
@@ -2667,8 +2747,10 @@
 						 REISERFS_DISK_OFFSET_IN_BYTES /
 						 sb->s_blocksize + 2);
 
-	/* Sanity check to see is the standard journal fitting within first bitmap
-	   (actual for small blocksizes) */
+	/*
+	 * Sanity check to see is the standard journal fitting
+	 * within first bitmap (actual for small blocksizes)
+	 */
 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
@@ -2754,20 +2836,20 @@
 	journal->j_start = 0;
 	journal->j_len = 0;
 	journal->j_len_alloc = 0;
-	atomic_set(&(journal->j_wcount), 0);
-	atomic_set(&(journal->j_async_throttle), 0);
+	atomic_set(&journal->j_wcount, 0);
+	atomic_set(&journal->j_async_throttle, 0);
 	journal->j_bcount = 0;
 	journal->j_trans_start_time = 0;
 	journal->j_last = NULL;
 	journal->j_first = NULL;
-	init_waitqueue_head(&(journal->j_join_wait));
+	init_waitqueue_head(&journal->j_join_wait);
 	mutex_init(&journal->j_mutex);
 	mutex_init(&journal->j_flush_mutex);
 
 	journal->j_trans_id = 10;
 	journal->j_mount_id = 10;
 	journal->j_state = 0;
-	atomic_set(&(journal->j_jlock), 0);
+	atomic_set(&journal->j_jlock, 0);
 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
@@ -2807,23 +2889,19 @@
 		goto free_and_return;
 	}
 
-	reiserfs_mounted_fs_count++;
-	if (reiserfs_mounted_fs_count <= 1)
-		commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
-
 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
 	journal->j_work_sb = sb;
 	return 0;
-      free_and_return:
+free_and_return:
 	free_journal_ram(sb);
 	return 1;
 }
 
 /*
-** test for a polite end of the current transaction.  Used by file_write, and should
-** be used by delete to make sure they don't write more than can fit inside a single
-** transaction
-*/
+ * test for a polite end of the current transaction.  Used by file_write,
+ * and should be used by delete to make sure they don't write more than
+ * can fit inside a single transaction
+ */
 int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
 				   int new_alloc)
 {
@@ -2835,7 +2913,7 @@
 		return 0;
 	if (journal->j_must_wait > 0 ||
 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
-	    atomic_read(&(journal->j_jlock)) ||
+	    atomic_read(&journal->j_jlock) ||
 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
 		return 1;
@@ -2846,8 +2924,7 @@
 	return 0;
 }
 
-/* this must be called inside a transaction
-*/
+/* this must be called inside a transaction */
 void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
@@ -2857,8 +2934,7 @@
 	return;
 }
 
-/* this must be called without a transaction started
-*/
+/* this must be called without a transaction started */
 void reiserfs_allow_writes(struct super_block *s)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2866,8 +2942,7 @@
 	wake_up(&journal->j_join_wait);
 }
 
-/* this must be called without a transaction started
-*/
+/* this must be called without a transaction started */
 void reiserfs_wait_on_write_block(struct super_block *s)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2929,11 +3004,12 @@
 	}
 }
 
-/* join == true if you must join an existing transaction.
-** join == false if you can deal with waiting for others to finish
-**
-** this will block until the transaction is joinable.  send the number of blocks you
-** expect to use in nblocks.
+/*
+ * join == true if you must join an existing transaction.
+ * join == false if you can deal with waiting for others to finish
+ *
+ * this will block until the transaction is joinable.  send the number of
+ * blocks you expect to use in nblocks.
 */
 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
 			      struct super_block *sb, unsigned long nblocks,
@@ -2955,7 +3031,7 @@
 	th->t_refcount = 1;
 	th->t_super = sb;
 
-      relock:
+relock:
 	lock_journal(sb);
 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
 		unlock_journal(sb);
@@ -2974,9 +3050,11 @@
 	}
 	now = get_seconds();
 
-	/* if there is no room in the journal OR
-	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
-	 ** we don't sleep if there aren't other writers
+	/*
+	 * if there is no room in the journal OR
+	 * if this transaction is too old, and we weren't called joinable,
+	 * wait for it to finish before beginning we don't sleep if there
+	 * aren't other writers
 	 */
 
 	if ((!join && journal->j_must_wait > 0) ||
@@ -2990,7 +3068,8 @@
 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
 
 		old_trans_id = journal->j_trans_id;
-		unlock_journal(sb);	/* allow others to finish this transaction */
+		/* allow others to finish this transaction */
+		unlock_journal(sb);
 
 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
 		    journal->j_max_batch &&
@@ -3002,8 +3081,9 @@
 				goto relock;
 			}
 		}
-		/* don't mess with joining the transaction if all we have to do is
-		 * wait for someone else to do a commit
+		/*
+		 * don't mess with joining the transaction if all we
+		 * have to do is wait for someone else to do a commit
 		 */
 		if (atomic_read(&journal->j_jlock)) {
 			while (journal->j_trans_id == old_trans_id &&
@@ -3012,15 +3092,15 @@
 			}
 			goto relock;
 		}
-		retval = journal_join(&myth, sb, 1);
+		retval = journal_join(&myth, sb);
 		if (retval)
 			goto out_fail;
 
 		/* someone might have ended the transaction while we joined */
 		if (old_trans_id != journal->j_trans_id) {
-			retval = do_journal_end(&myth, sb, 1, 0);
+			retval = do_journal_end(&myth, 0);
 		} else {
-			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
+			retval = do_journal_end(&myth, COMMIT_NOW);
 		}
 
 		if (retval)
@@ -3033,7 +3113,7 @@
 	if (journal->j_trans_start_time == 0) {
 		journal->j_trans_start_time = get_seconds();
 	}
-	atomic_inc(&(journal->j_wcount));
+	atomic_inc(&journal->j_wcount);
 	journal->j_len_alloc += nblocks;
 	th->t_blocks_logged = 0;
 	th->t_blocks_allocated = nblocks;
@@ -3042,11 +3122,13 @@
 	INIT_LIST_HEAD(&th->t_list);
 	return 0;
 
-      out_fail:
+out_fail:
 	memset(th, 0, sizeof(*th));
-	/* Re-set th->t_super, so we can properly keep track of how many
+	/*
+	 * Re-set th->t_super, so we can properly keep track of how many
 	 * persistent transactions there are. We need to do this so if this
-	 * call is part of a failed restart_transaction, we can free it later */
+	 * call is part of a failed restart_transaction, we can free it later
+	 */
 	th->t_super = sb;
 	return retval;
 }
@@ -3059,14 +3141,15 @@
 	int ret;
 	struct reiserfs_transaction_handle *th;
 
-	/* if we're nesting into an existing transaction.  It will be
-	 ** persistent on its own
+	/*
+	 * if we're nesting into an existing transaction.  It will be
+	 * persistent on its own
 	 */
 	if (reiserfs_transaction_running(s)) {
 		th = current->journal_info;
 		th->t_refcount++;
 		BUG_ON(th->t_refcount < 2);
-		
+
 		return th;
 	}
 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
@@ -3087,7 +3170,7 @@
 	struct super_block *s = th->t_super;
 	int ret = 0;
 	if (th->t_trans_id)
-		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
+		ret = journal_end(th);
 	else
 		ret = -EIO;
 	if (th->t_refcount == 0) {
@@ -3098,29 +3181,31 @@
 }
 
 static int journal_join(struct reiserfs_transaction_handle *th,
-			struct super_block *sb, unsigned long nblocks)
+			struct super_block *sb)
 {
 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
 
-	/* this keeps do_journal_end from NULLing out the current->journal_info
-	 ** pointer
+	/*
+	 * this keeps do_journal_end from NULLing out the
+	 * current->journal_info pointer
 	 */
 	th->t_handle_save = cur_th;
 	BUG_ON(cur_th && cur_th->t_refcount > 1);
-	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
+	return do_journal_begin_r(th, sb, 1, JBEGIN_JOIN);
 }
 
 int journal_join_abort(struct reiserfs_transaction_handle *th,
-		       struct super_block *sb, unsigned long nblocks)
+		       struct super_block *sb)
 {
 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
 
-	/* this keeps do_journal_end from NULLing out the current->journal_info
-	 ** pointer
+	/*
+	 * this keeps do_journal_end from NULLing out the
+	 * current->journal_info pointer
 	 */
 	th->t_handle_save = cur_th;
 	BUG_ON(cur_th && cur_th->t_refcount > 1);
-	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
+	return do_journal_begin_r(th, sb, 1, JBEGIN_ABORT);
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th,
@@ -3142,9 +3227,10 @@
 						 "journal_info != 0");
 			return 0;
 		} else {
-			/* we've ended up with a handle from a different filesystem.
-			 ** save it and restore on journal_end.  This should never
-			 ** really happen...
+			/*
+			 * we've ended up with a handle from a different
+			 * filesystem.  save it and restore on journal_end.
+			 * This should never really happen...
 			 */
 			reiserfs_warning(sb, "clm-2100",
 					 "nesting info a different FS");
@@ -3157,9 +3243,10 @@
 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
 	BUG_ON(current->journal_info != th);
 
-	/* I guess this boils down to being the reciprocal of clm-2100 above.
-	 * If do_journal_begin_r fails, we need to put it back, since journal_end
-	 * won't be called to do it. */
+	/*
+	 * I guess this boils down to being the reciprocal of clm-2100 above.
+	 * If do_journal_begin_r fails, we need to put it back, since
+	 * journal_end won't be called to do it. */
 	if (ret)
 		current->journal_info = th->t_handle_save;
 	else
@@ -3169,17 +3256,19 @@
 }
 
 /*
-** puts bh into the current transaction.  If it was already there, reorders removes the
-** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
-**
-** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
-** transaction is committed.
-**
-** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
-*/
+ * puts bh into the current transaction.  If it was already there, reorders
+ * removes the old pointers from the hash, and puts new ones in (to make
+ * sure replay happen in the right order).
+ *
+ * if it was dirty, cleans and files onto the clean list.  I can't let it
+ * be dirty again until the transaction is committed.
+ *
+ * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
+ */
 int journal_mark_dirty(struct reiserfs_transaction_handle *th,
-		       struct super_block *sb, struct buffer_head *bh)
+		       struct buffer_head *bh)
 {
+	struct super_block *sb = th->t_super;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	struct reiserfs_journal_cnode *cn = NULL;
 	int count_already_incd = 0;
@@ -3201,9 +3290,10 @@
 		return 0;
 	}
 
-	/* this must be turned into a panic instead of a warning.  We can't allow
-	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
-	 ** could get to disk too early.  NOT GOOD.
+	/*
+	 * this must be turned into a panic instead of a warning.  We can't
+	 * allow a dirty or journal_dirty or locked buffer to be logged, as
+	 * some changes could get to disk too early.  NOT GOOD.
 	 */
 	if (!prepared || buffer_dirty(bh)) {
 		reiserfs_warning(sb, "journal-1777",
@@ -3216,14 +3306,16 @@
 				 buffer_journal_dirty(bh) ? ' ' : '!');
 	}
 
-	if (atomic_read(&(journal->j_wcount)) <= 0) {
+	if (atomic_read(&journal->j_wcount) <= 0) {
 		reiserfs_warning(sb, "journal-1409",
 				 "returning because j_wcount was %d",
-				 atomic_read(&(journal->j_wcount)));
+				 atomic_read(&journal->j_wcount));
 		return 1;
 	}
-	/* this error means I've screwed up, and we've overflowed the transaction.
-	 ** Nothing can be done here, except make the FS readonly or panic.
+	/*
+	 * this error means I've screwed up, and we've overflowed
+	 * the transaction.  Nothing can be done here, except make the
+	 * FS readonly or panic.
 	 */
 	if (journal->j_len >= journal->j_trans_max) {
 		reiserfs_panic(th->t_super, "journal-1413",
@@ -3280,9 +3372,9 @@
 	return 0;
 }
 
-int journal_end(struct reiserfs_transaction_handle *th,
-		struct super_block *sb, unsigned long nblocks)
+int journal_end(struct reiserfs_transaction_handle *th)
 {
+	struct super_block *sb = th->t_super;
 	if (!current->journal_info && th->t_refcount > 1)
 		reiserfs_warning(sb, "REISER-NESTING",
 				 "th NULL, refcount %d", th->t_refcount);
@@ -3297,8 +3389,9 @@
 		struct reiserfs_transaction_handle *cur_th =
 		    current->journal_info;
 
-		/* we aren't allowed to close a nested transaction on a different
-		 ** filesystem from the one in the task struct
+		/*
+		 * we aren't allowed to close a nested transaction on a
+		 * different filesystem from the one in the task struct
 		 */
 		BUG_ON(cur_th->t_super != th->t_super);
 
@@ -3308,17 +3401,18 @@
 		}
 		return 0;
 	} else {
-		return do_journal_end(th, sb, nblocks, 0);
+		return do_journal_end(th, 0);
 	}
 }
 
-/* removes from the current transaction, relsing and descrementing any counters.
-** also files the removed buffer directly onto the clean list
-**
-** called by journal_mark_freed when a block has been deleted
-**
-** returns 1 if it cleaned and relsed the buffer. 0 otherwise
-*/
+/*
+ * removes from the current transaction, relsing and descrementing any counters.
+ * also files the removed buffer directly onto the clean list
+ *
+ * called by journal_mark_freed when a block has been deleted
+ *
+ * returns 1 if it cleaned and relsed the buffer. 0 otherwise
+ */
 static int remove_from_transaction(struct super_block *sb,
 				   b_blocknr_t blocknr, int already_cleaned)
 {
@@ -3354,7 +3448,7 @@
 		clear_buffer_dirty(bh);
 		clear_buffer_journal_test(bh);
 		put_bh(bh);
-		if (atomic_read(&(bh->b_count)) < 0) {
+		if (atomic_read(&bh->b_count) < 0) {
 			reiserfs_warning(sb, "journal-1752",
 					 "b_count < 0");
 		}
@@ -3367,15 +3461,16 @@
 }
 
 /*
-** for any cnode in a journal list, it can only be dirtied of all the
-** transactions that include it are committed to disk.
-** this checks through each transaction, and returns 1 if you are allowed to dirty,
-** and 0 if you aren't
-**
-** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
-** blocks for a given transaction on disk
-**
-*/
+ * for any cnode in a journal list, it can only be dirtied of all the
+ * transactions that include it are committed to disk.
+ * this checks through each transaction, and returns 1 if you are allowed
+ * to dirty, and 0 if you aren't
+ *
+ * it is called by dirty_journal_list, which is called after
+ * flush_commit_list has gotten all the log blocks for a given
+ * transaction on disk
+ *
+ */
 static int can_dirty(struct reiserfs_journal_cnode *cn)
 {
 	struct super_block *sb = cn->sb;
@@ -3383,9 +3478,10 @@
 	struct reiserfs_journal_cnode *cur = cn->hprev;
 	int can_dirty = 1;
 
-	/* first test hprev.  These are all newer than cn, so any node here
-	 ** with the same block number and dev means this node can't be sent
-	 ** to disk right now.
+	/*
+	 * first test hprev.  These are all newer than cn, so any node here
+	 * with the same block number and dev means this node can't be sent
+	 * to disk right now.
 	 */
 	while (cur && can_dirty) {
 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
@@ -3394,13 +3490,14 @@
 		}
 		cur = cur->hprev;
 	}
-	/* then test hnext.  These are all older than cn.  As long as they
-	 ** are committed to the log, it is safe to write cn to disk
+	/*
+	 * then test hnext.  These are all older than cn.  As long as they
+	 * are committed to the log, it is safe to write cn to disk
 	 */
 	cur = cn->hnext;
 	while (cur && can_dirty) {
 		if (cur->jlist && cur->jlist->j_len > 0 &&
-		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
+		    atomic_read(&cur->jlist->j_commit_left) > 0 && cur->bh &&
 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
 			can_dirty = 0;
 		}
@@ -3409,12 +3506,13 @@
 	return can_dirty;
 }
 
-/* syncs the commit blocks, but does not force the real buffers to disk
-** will wait until the current transaction is done/committed before returning
-*/
-int journal_end_sync(struct reiserfs_transaction_handle *th,
-		     struct super_block *sb, unsigned long nblocks)
+/*
+ * syncs the commit blocks, but does not force the real buffers to disk
+ * will wait until the current transaction is done/committed before returning
+ */
+int journal_end_sync(struct reiserfs_transaction_handle *th)
 {
+	struct super_block *sb = th->t_super;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
 	BUG_ON(!th->t_trans_id);
@@ -3423,14 +3521,12 @@
 	if (journal->j_len == 0) {
 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
 					     1);
-		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
+		journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
 	}
-	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
+	return do_journal_end(th, COMMIT_NOW | WAIT);
 }
 
-/*
-** writeback the pending async commits to disk
-*/
+/* writeback the pending async commits to disk */
 static void flush_async_commits(struct work_struct *work)
 {
 	struct reiserfs_journal *journal =
@@ -3450,9 +3546,9 @@
 }
 
 /*
-** flushes any old transactions to disk
-** ends the current transaction if it is too old
-*/
+ * flushes any old transactions to disk
+ * ends the current transaction if it is too old
+ */
 void reiserfs_flush_old_commits(struct super_block *sb)
 {
 	time_t now;
@@ -3460,48 +3556,53 @@
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
 	now = get_seconds();
-	/* safety check so we don't flush while we are replaying the log during
+	/*
+	 * safety check so we don't flush while we are replaying the log during
 	 * mount
 	 */
 	if (list_empty(&journal->j_journal_list))
 		return;
 
-	/* check the current transaction.  If there are no writers, and it is
+	/*
+	 * check the current transaction.  If there are no writers, and it is
 	 * too old, finish it, and force the commit blocks to disk
 	 */
 	if (atomic_read(&journal->j_wcount) <= 0 &&
 	    journal->j_trans_start_time > 0 &&
 	    journal->j_len > 0 &&
 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
-		if (!journal_join(&th, sb, 1)) {
+		if (!journal_join(&th, sb)) {
 			reiserfs_prepare_for_journal(sb,
 						     SB_BUFFER_WITH_SB(sb),
 						     1);
-			journal_mark_dirty(&th, sb,
-					   SB_BUFFER_WITH_SB(sb));
+			journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
 
-			/* we're only being called from kreiserfsd, it makes no sense to do
-			 ** an async commit so that kreiserfsd can do it later
+			/*
+			 * we're only being called from kreiserfsd, it makes
+			 * no sense to do an async commit so that kreiserfsd
+			 * can do it later
 			 */
-			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
+			do_journal_end(&th, COMMIT_NOW | WAIT);
 		}
 	}
 }
 
 /*
-** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
-**
-** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
-** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
-** flushes the commit list and returns 0.
-**
-** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
-**
-** Note, we can't allow the journal_end to proceed while there are still writers in the log.
-*/
-static int check_journal_end(struct reiserfs_transaction_handle *th,
-			     struct super_block *sb, unsigned long nblocks,
-			     int flags)
+ * returns 0 if do_journal_end should return right away, returns 1 if
+ * do_journal_end should finish the commit
+ *
+ * if the current transaction is too old, but still has writers, this will
+ * wait on j_join_wait until all the writers are done.  By the time it
+ * wakes up, the transaction it was called has already ended, so it just
+ * flushes the commit list and returns 0.
+ *
+ * Won't batch when flush or commit_now is set.  Also won't batch when
+ * others are waiting on j_join_wait.
+ *
+ * Note, we can't allow the journal_end to proceed while there are still
+ * writers in the log.
+ */
+static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
 {
 
 	time_t now;
@@ -3509,6 +3610,7 @@
 	int commit_now = flags & COMMIT_NOW;
 	int wait_on_commit = flags & WAIT;
 	struct reiserfs_journal_list *jl;
+	struct super_block *sb = th->t_super;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
 	BUG_ON(!th->t_trans_id);
@@ -3520,23 +3622,27 @@
 	}
 
 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
-	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
-		atomic_dec(&(journal->j_wcount));
-	}
+	/* <= 0 is allowed.  unmounting might not call begin */
+	if (atomic_read(&journal->j_wcount) > 0)
+		atomic_dec(&journal->j_wcount);
 
-	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
-	 ** will be dealt with by next transaction that actually writes something, but should be taken
-	 ** care of in this trans
+	/*
+	 * BUG, deal with case where j_len is 0, but people previously
+	 * freed blocks need to be released will be dealt with by next
+	 * transaction that actually writes something, but should be taken
+	 * care of in this trans
 	 */
 	BUG_ON(journal->j_len == 0);
 
-	/* if wcount > 0, and we are called to with flush or commit_now,
-	 ** we wait on j_join_wait.  We will wake up when the last writer has
-	 ** finished the transaction, and started it on its way to the disk.
-	 ** Then, we flush the commit or journal list, and just return 0
-	 ** because the rest of journal end was already done for this transaction.
+	/*
+	 * if wcount > 0, and we are called to with flush or commit_now,
+	 * we wait on j_join_wait.  We will wake up when the last writer has
+	 * finished the transaction, and started it on its way to the disk.
+	 * Then, we flush the commit or journal list, and just return 0
+	 * because the rest of journal end was already done for this
+	 * transaction.
 	 */
-	if (atomic_read(&(journal->j_wcount)) > 0) {
+	if (atomic_read(&journal->j_wcount) > 0) {
 		if (flush || commit_now) {
 			unsigned trans_id;
 
@@ -3544,27 +3650,30 @@
 			trans_id = jl->j_trans_id;
 			if (wait_on_commit)
 				jl->j_state |= LIST_COMMIT_PENDING;
-			atomic_set(&(journal->j_jlock), 1);
+			atomic_set(&journal->j_jlock, 1);
 			if (flush) {
 				journal->j_next_full_flush = 1;
 			}
 			unlock_journal(sb);
 
-			/* sleep while the current transaction is still j_jlocked */
+			/*
+			 * sleep while the current transaction is
+			 * still j_jlocked
+			 */
 			while (journal->j_trans_id == trans_id) {
 				if (atomic_read(&journal->j_jlock)) {
 					queue_log_writer(sb);
 				} else {
 					lock_journal(sb);
 					if (journal->j_trans_id == trans_id) {
-						atomic_set(&(journal->j_jlock),
+						atomic_set(&journal->j_jlock,
 							   1);
 					}
 					unlock_journal(sb);
 				}
 			}
 			BUG_ON(journal->j_trans_id == trans_id);
-			
+
 			if (commit_now
 			    && journal_list_still_alive(sb, trans_id)
 			    && wait_on_commit) {
@@ -3584,7 +3693,7 @@
 	}
 	/* don't batch when someone is waiting on j_join_wait */
 	/* don't batch when syncing the commit or flushing the whole trans */
-	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
+	if (!(journal->j_must_wait > 0) && !(atomic_read(&journal->j_jlock))
 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
 	    && journal->j_len_alloc < journal->j_max_batch
 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
@@ -3602,19 +3711,22 @@
 }
 
 /*
-** Does all the work that makes deleting blocks safe.
-** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
-**
-** otherwise:
-** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
-** before this transaction has finished.
-**
-** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
-** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
-** the block can't be reallocated yet.
-**
-** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
-*/
+ * Does all the work that makes deleting blocks safe.
+ * when deleting a block mark BH_JNew, just remove it from the current
+ * transaction, clean it's buffer_head and move on.
+ *
+ * otherwise:
+ * set a bit for the block in the journal bitmap.  That will prevent it from
+ * being allocated for unformatted nodes before this transaction has finished.
+ *
+ * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.
+ * That will prevent any old transactions with this block from trying to flush
+ * to the real location.  Since we aren't removing the cnode from the
+ * journal_list_hash, *the block can't be reallocated yet.
+ *
+ * Then remove it from the current transaction, decrementing any counters and
+ * filing it on the clean list.
+ */
 int journal_mark_freed(struct reiserfs_transaction_handle *th,
 		       struct super_block *sb, b_blocknr_t blocknr)
 {
@@ -3637,7 +3749,10 @@
 		reiserfs_clean_and_file_buffer(bh);
 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
 	} else {
-		/* set the bit for this block in the journal bitmap for this transaction */
+		/*
+		 * set the bit for this block in the journal bitmap
+		 * for this transaction
+		 */
 		jb = journal->j_current_jl->j_list_bitmap;
 		if (!jb) {
 			reiserfs_panic(sb, "journal-1702",
@@ -3653,17 +3768,22 @@
 		}
 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
 
-		/* find all older transactions with this block, make sure they don't try to write it out */
+		/*
+		 * find all older transactions with this block,
+		 * make sure they don't try to write it out
+		 */
 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
 					  blocknr);
 		while (cn) {
 			if (sb == cn->sb && blocknr == cn->blocknr) {
 				set_bit(BLOCK_FREED, &cn->state);
 				if (cn->bh) {
+					/*
+					 * remove_from_transaction will brelse
+					 * the buffer if it was in the current
+					 * trans
+					 */
 					if (!cleaned) {
-						/* remove_from_transaction will brelse the buffer if it was 
-						 ** in the current trans
-						 */
 						clear_buffer_journal_dirty(cn->
 									   bh);
 						clear_buffer_dirty(cn->bh);
@@ -3672,16 +3792,19 @@
 						cleaned = 1;
 						put_bh(cn->bh);
 						if (atomic_read
-						    (&(cn->bh->b_count)) < 0) {
+						    (&cn->bh->b_count) < 0) {
 							reiserfs_warning(sb,
 								 "journal-2138",
 								 "cn->bh->b_count < 0");
 						}
 					}
-					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
-						atomic_dec(&
-							   (cn->jlist->
-							    j_nonzerolen));
+					/*
+					 * since we are clearing the bh,
+					 * we MUST dec nonzerolen
+					 */
+					if (cn->jlist) {
+						atomic_dec(&cn->jlist->
+							   j_nonzerolen);
 					}
 					cn->bh = NULL;
 				}
@@ -3714,10 +3837,16 @@
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	int ret = 0;
 
-	/* is it from the current transaction, or from an unknown transaction? */
+	/*
+	 * is it from the current transaction,
+	 * or from an unknown transaction?
+	 */
 	if (id == journal->j_trans_id) {
 		jl = journal->j_current_jl;
-		/* try to let other writers come in and grow this transaction */
+		/*
+		 * try to let other writers come in and
+		 * grow this transaction
+		 */
 		let_transaction_grow(sb, id);
 		if (journal->j_trans_id != id) {
 			goto flush_commit_only;
@@ -3731,21 +3860,22 @@
 		if (journal->j_trans_id != id) {
 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
 						     1);
-			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
-			ret = journal_end(&th, sb, 1);
+			journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
+			ret = journal_end(&th);
 			goto flush_commit_only;
 		}
 
-		ret = journal_end_sync(&th, sb, 1);
+		ret = journal_end_sync(&th);
 		if (!ret)
 			ret = 1;
 
 	} else {
-		/* this gets tricky, we have to make sure the journal list in
+		/*
+		 * this gets tricky, we have to make sure the journal list in
 		 * the inode still exists.  We know the list is still around
 		 * if we've got a larger transaction id than the oldest list
 		 */
-	      flush_commit_only:
+flush_commit_only:
 		if (journal_list_still_alive(inode->i_sb, id)) {
 			/*
 			 * we only set ret to 1 when we know for sure
@@ -3768,7 +3898,8 @@
 	unsigned int id = REISERFS_I(inode)->i_trans_id;
 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
 
-	/* for the whole inode, assume unset id means it was
+	/*
+	 * for the whole inode, assume unset id means it was
 	 * changed in the current transaction.  More conservative
 	 */
 	if (!id || !jl) {
@@ -3806,12 +3937,11 @@
 
 extern struct tree_balance *cur_tb;
 /*
-** before we can change a metadata block, we have to make sure it won't
-** be written to disk while we are altering it.  So, we must:
-** clean it
-** wait on it.
-**
-*/
+ * before we can change a metadata block, we have to make sure it won't
+ * be written to disk while we are altering it.  So, we must:
+ * clean it
+ * wait on it.
+ */
 int reiserfs_prepare_for_journal(struct super_block *sb,
 				 struct buffer_head *bh, int wait)
 {
@@ -3832,19 +3962,18 @@
 }
 
 /*
-** long and ugly.  If flush, will not return until all commit
-** blocks and all real buffers in the trans are on disk.
-** If no_async, won't return until all commit blocks are on disk.
-**
-** keep reading, there are comments as you go along
-**
-** If the journal is aborted, we just clean up. Things like flushing
-** journal lists, etc just won't happen.
-*/
-static int do_journal_end(struct reiserfs_transaction_handle *th,
-			  struct super_block *sb, unsigned long nblocks,
-			  int flags)
+ * long and ugly.  If flush, will not return until all commit
+ * blocks and all real buffers in the trans are on disk.
+ * If no_async, won't return until all commit blocks are on disk.
+ *
+ * keep reading, there are comments as you go along
+ *
+ * If the journal is aborted, we just clean up. Things like flushing
+ * journal lists, etc just won't happen.
+ */
+static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
 {
+	struct super_block *sb = th->t_super;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
 	struct reiserfs_journal_cnode *last_cn = NULL;
@@ -3866,9 +3995,12 @@
 
 	BUG_ON(th->t_refcount > 1);
 	BUG_ON(!th->t_trans_id);
+	BUG_ON(!th->t_super);
 
-	/* protect flush_older_commits from doing mistakes if the
-           transaction ID counter gets overflowed.  */
+	/*
+	 * protect flush_older_commits from doing mistakes if the
+	 * transaction ID counter gets overflowed.
+	 */
 	if (th->t_trans_id == ~0U)
 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
 	flush = flags & FLUSH_ALL;
@@ -3879,7 +4011,7 @@
 	if (journal->j_len == 0) {
 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
 					     1);
-		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
+		journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
 	}
 
 	lock_journal(sb);
@@ -3892,10 +4024,12 @@
 		wait_on_commit = 1;
 	}
 
-	/* check_journal_end locks the journal, and unlocks if it does not return 1
-	 ** it tells us if we should continue with the journal_end, or just return
+	/*
+	 * check_journal_end locks the journal, and unlocks if it does
+	 * not return 1 it tells us if we should continue with the
+	 * journal_end, or just return
 	 */
-	if (!check_journal_end(th, sb, nblocks, flags)) {
+	if (!check_journal_end(th, flags)) {
 		reiserfs_schedule_old_flush(sb);
 		wake_queued_writers(sb);
 		reiserfs_async_progress_wait(sb);
@@ -3908,19 +4042,23 @@
 	}
 
 	/*
-	 ** j must wait means we have to flush the log blocks, and the real blocks for
-	 ** this transaction
+	 * j must wait means we have to flush the log blocks, and the
+	 * real blocks for this transaction
 	 */
 	if (journal->j_must_wait > 0) {
 		flush = 1;
 	}
 #ifdef REISERFS_PREALLOCATE
-	/* quota ops might need to nest, setup the journal_info pointer for them
-	 * and raise the refcount so that it is > 0. */
+	/*
+	 * quota ops might need to nest, setup the journal_info pointer
+	 * for them and raise the refcount so that it is > 0.
+	 */
 	current->journal_info = th;
 	th->t_refcount++;
-	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
-						 * the transaction */
+
+	/* it should not involve new blocks into the transaction */
+	reiserfs_discard_all_prealloc(th);
+
 	th->t_refcount--;
 	current->journal_info = th->t_handle_save;
 #endif
@@ -3936,7 +4074,10 @@
 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
 	set_desc_trans_id(desc, journal->j_trans_id);
 
-	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
+	/*
+	 * setup commit block.  Don't write (keep it clean too) this one
+	 * until after everyone else is written
+	 */
 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
 			      ((journal->j_start + journal->j_len +
 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
@@ -3948,7 +4089,8 @@
 	/* init this journal list */
 	jl = journal->j_current_jl;
 
-	/* we lock the commit before doing anything because
+	/*
+	 * we lock the commit before doing anything because
 	 * we want to make sure nobody tries to run flush_commit_list until
 	 * the new transaction is fully setup, and we've already flushed the
 	 * ordered bh list
@@ -3968,9 +4110,10 @@
 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
 	jl->j_realblock = NULL;
 
-	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
-	 **  for each real block, add it to the journal list hash,
-	 ** copy into real block index array in the commit or desc block
+	/*
+	 * The ENTIRE FOR LOOP MUST not cause schedule to occur.
+	 * for each real block, add it to the journal list hash,
+	 * copy into real block index array in the commit or desc block
 	 */
 	trans_half = journal_trans_half(sb->s_blocksize);
 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
@@ -3989,9 +4132,10 @@
 				last_cn->next = jl_cn;
 			}
 			last_cn = jl_cn;
-			/* make sure the block we are trying to log is not a block
-			   of journal or reserved area */
-
+			/*
+			 * make sure the block we are trying to log
+			 * is not a block of journal or reserved area
+			 */
 			if (is_block_in_log_or_reserved_area
 			    (sb, cn->bh->b_blocknr)) {
 				reiserfs_panic(sb, "journal-2332",
@@ -4021,19 +4165,26 @@
 	set_desc_trans_id(desc, journal->j_trans_id);
 	set_commit_trans_len(commit, journal->j_len);
 
-	/* special check in case all buffers in the journal were marked for not logging */
+	/*
+	 * special check in case all buffers in the journal
+	 * were marked for not logging
+	 */
 	BUG_ON(journal->j_len == 0);
 
-	/* we're about to dirty all the log blocks, mark the description block
+	/*
+	 * we're about to dirty all the log blocks, mark the description block
 	 * dirty now too.  Don't mark the commit block dirty until all the
 	 * others are on disk
 	 */
 	mark_buffer_dirty(d_bh);
 
-	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
+	/*
+	 * first data block is j_start + 1, so add one to
+	 * cur_write_start wherever you use it
+	 */
 	cur_write_start = journal->j_start;
 	cn = journal->j_first;
-	jindex = 1;		/* start at one so we don't get the desc again */
+	jindex = 1;	/* start at one so we don't get the desc again */
 	while (cn) {
 		clear_buffer_journal_new(cn->bh);
 		/* copy all the real blocks into log area.  dirty log blocks */
@@ -4059,7 +4210,10 @@
 			set_buffer_journal_dirty(cn->bh);
 			clear_buffer_journaled(cn->bh);
 		} else {
-			/* JDirty cleared sometime during transaction.  don't log this one */
+			/*
+			 * JDirty cleared sometime during transaction.
+			 * don't log this one
+			 */
 			reiserfs_warning(sb, "journal-2048",
 					 "BAD, buffer in journal hash, "
 					 "but not JDirty!");
@@ -4071,9 +4225,10 @@
 		reiserfs_cond_resched(sb);
 	}
 
-	/* we are done  with both the c_bh and d_bh, but
-	 ** c_bh must be written after all other commit blocks,
-	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
+	/*
+	 * we are done with both the c_bh and d_bh, but
+	 * c_bh must be written after all other commit blocks,
+	 * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
 	 */
 
 	journal->j_current_jl = alloc_journal_list(sb);
@@ -4088,7 +4243,7 @@
 	journal->j_start =
 	    (journal->j_start + journal->j_len +
 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
-	atomic_set(&(journal->j_wcount), 0);
+	atomic_set(&journal->j_wcount, 0);
 	journal->j_bcount = 0;
 	journal->j_last = NULL;
 	journal->j_first = NULL;
@@ -4104,15 +4259,18 @@
 	journal->j_next_async_flush = 0;
 	init_journal_hash(sb);
 
-	// make sure reiserfs_add_jh sees the new current_jl before we
-	// write out the tails
+	/*
+	 * make sure reiserfs_add_jh sees the new current_jl before we
+	 * write out the tails
+	 */
 	smp_mb();
 
-	/* tail conversion targets have to hit the disk before we end the
+	/*
+	 * tail conversion targets have to hit the disk before we end the
 	 * transaction.  Otherwise a later transaction might repack the tail
-	 * before this transaction commits, leaving the data block unflushed and
-	 * clean, if we crash before the later transaction commits, the data block
-	 * is lost.
+	 * before this transaction commits, leaving the data block unflushed
+	 * and clean, if we crash before the later transaction commits, the
+	 * data block is lost.
 	 */
 	if (!list_empty(&jl->j_tail_bh_list)) {
 		depth = reiserfs_write_unlock_nested(sb);
@@ -4123,24 +4281,27 @@
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
 	mutex_unlock(&jl->j_commit_mutex);
 
-	/* honor the flush wishes from the caller, simple commits can
-	 ** be done outside the journal lock, they are done below
-	 **
-	 ** if we don't flush the commit list right now, we put it into
-	 ** the work queue so the people waiting on the async progress work
-	 ** queue don't wait for this proc to flush journal lists and such.
+	/*
+	 * honor the flush wishes from the caller, simple commits can
+	 * be done outside the journal lock, they are done below
+	 *
+	 * if we don't flush the commit list right now, we put it into
+	 * the work queue so the people waiting on the async progress work
+	 * queue don't wait for this proc to flush journal lists and such.
 	 */
 	if (flush) {
 		flush_commit_list(sb, jl, 1);
 		flush_journal_list(sb, jl, 1);
 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
-		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
+		queue_delayed_work(REISERFS_SB(sb)->commit_wq,
+				   &journal->j_work, HZ / 10);
 
-	/* if the next transaction has any chance of wrapping, flush
-	 ** transactions that might get overwritten.  If any journal lists are very
-	 ** old flush them as well.
+	/*
+	 * if the next transaction has any chance of wrapping, flush
+	 * transactions that might get overwritten.  If any journal lists
+	 * are very old flush them as well.
 	 */
-      first_jl:
+first_jl:
 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
 		temp_jl = JOURNAL_LIST_ENTRY(entry);
 		if (journal->j_start <= temp_jl->j_start) {
@@ -4151,8 +4312,10 @@
 			} else if ((journal->j_start +
 				    journal->j_trans_max + 1) <
 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
-				/* if we don't cross into the next transaction and we don't
-				 * wrap, there is no way we can overlap any later transactions
+				/*
+				 * if we don't cross into the next
+				 * transaction and we don't wrap, there is
+				 * no way we can overlap any later transactions
 				 * break now
 				 */
 				break;
@@ -4166,10 +4329,12 @@
 				flush_used_journal_lists(sb, temp_jl);
 				goto first_jl;
 			} else {
-				/* we don't overlap anything from out start to the end of the
-				 * log, and our wrapped portion doesn't overlap anything at
-				 * the start of the log.  We can break
-				 */
+				/*
+				* we don't overlap anything from out start
+				* to the end of the log, and our wrapped
+				* portion doesn't overlap anything at
+				* the start of the log.  We can break
+				*/
 				break;
 			}
 		}
@@ -4183,23 +4348,25 @@
 			       "could not get a list bitmap");
 	}
 
-	atomic_set(&(journal->j_jlock), 0);
+	atomic_set(&journal->j_jlock, 0);
 	unlock_journal(sb);
 	/* wake up any body waiting to join. */
 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
-	wake_up(&(journal->j_join_wait));
+	wake_up(&journal->j_join_wait);
 
 	if (!flush && wait_on_commit &&
 	    journal_list_still_alive(sb, commit_trans_id)) {
 		flush_commit_list(sb, jl, 1);
 	}
-      out:
+out:
 	reiserfs_check_lock_depth(sb, "journal end2");
 
 	memset(th, 0, sizeof(*th));
-	/* Re-set th->t_super, so we can properly keep track of how many
+	/*
+	 * Re-set th->t_super, so we can properly keep track of how many
 	 * persistent transactions there are. We need to do this so if this
-	 * call is part of a failed restart_transaction, we can free it later */
+	 * call is part of a failed restart_transaction, we can free it later
+	 */
 	th->t_super = sb;
 
 	return journal->j_errno;

diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 79e5a8b..d6744c8 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c

@@ -8,46 +8,42 @@
 #include "reiserfs.h"
 #include <linux/buffer_head.h>
 
-/* these are used in do_balance.c */
-
-/* leaf_move_items
-   leaf_shift_left
-   leaf_shift_right
-   leaf_delete_items
-   leaf_insert_into_buf
-   leaf_paste_in_buffer
-   leaf_cut_from_buffer
-   leaf_paste_entries
-   */
-
-/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
+/*
+ * copy copy_count entries from source directory item to dest buffer
+ * (creating new item if needed)
+ */
 static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
 				  struct buffer_head *source, int last_first,
 				  int item_num, int from, int copy_count)
 {
 	struct buffer_head *dest = dest_bi->bi_bh;
-	int item_num_in_dest;	/* either the number of target item,
-				   or if we must create a new item,
-				   the number of the item we will
-				   create it next to */
+	/*
+	 * either the number of target item, or if we must create a
+	 * new item, the number of the item we will create it next to
+	 */
+	int item_num_in_dest;
+
 	struct item_head *ih;
 	struct reiserfs_de_head *deh;
 	int copy_records_len;	/* length of all records in item to be copied */
 	char *records;
 
-	ih = B_N_PITEM_HEAD(source, item_num);
+	ih = item_head(source, item_num);
 
 	RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
 
-	/* length of all record to be copied and first byte of the last of them */
+	/*
+	 * length of all record to be copied and first byte of
+	 * the last of them
+	 */
 	deh = B_I_DEH(source, ih);
 	if (copy_count) {
-		copy_records_len = (from ? deh_location(&(deh[from - 1])) :
+		copy_records_len = (from ? deh_location(&deh[from - 1]) :
 				    ih_item_len(ih)) -
-		    deh_location(&(deh[from + copy_count - 1]));
+		    deh_location(&deh[from + copy_count - 1]);
 		records =
 		    source->b_data + ih_location(ih) +
-		    deh_location(&(deh[from + copy_count - 1]));
+		    deh_location(&deh[from + copy_count - 1]);
 	} else {
 		copy_records_len = 0;
 		records = NULL;
@@ -59,12 +55,15 @@
 	     LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
 							       - 1);
 
-	/* if there are no items in dest or the first/last item in dest is not item of the same directory */
+	/*
+	 * if there are no items in dest or the first/last item in
+	 * dest is not item of the same directory
+	 */
 	if ((item_num_in_dest == -1) ||
 	    (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
 	    (last_first == LAST_TO_FIRST
 	     && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key,
-							 B_N_PKEY(dest,
+							 leaf_key(dest,
 								  item_num_in_dest))))
 	{
 		/* create new item in dest */
@@ -80,16 +79,22 @@
 
 		if (last_first == LAST_TO_FIRST) {
 			/* form key by the following way */
-			if (from < I_ENTRY_COUNT(ih)) {
+			if (from < ih_entry_count(ih)) {
 				set_le_ih_k_offset(&new_ih,
-						   deh_offset(&(deh[from])));
-				/*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
+						   deh_offset(&deh[from]));
 			} else {
-				/* no entries will be copied to this item in this function */
+				/*
+				 * no entries will be copied to this
+				 * item in this function
+				 */
 				set_le_ih_k_offset(&new_ih, U32_MAX);
-				/* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */
+				/*
+				 * this item is not yet valid, but we
+				 * want I_IS_DIRECTORY_ITEM to return 1
+				 * for it, so we -1
+				 */
 			}
-			set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key),
+			set_le_key_k_type(KEY_FORMAT_3_5, &new_ih.ih_key,
 					  TYPE_DIRENTRY);
 		}
 
@@ -113,36 +118,44 @@
 
 	leaf_paste_entries(dest_bi, item_num_in_dest,
 			   (last_first ==
-			    FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
+			    FIRST_TO_LAST) ? ih_entry_count(item_head(dest,
 									  item_num_in_dest))
 			   : 0, copy_count, deh + from, records,
 			   DEH_SIZE * copy_count + copy_records_len);
 }
 
-/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
-   part of it or nothing (see the return 0 below) from SOURCE to the end
-   (if last_first) or beginning (!last_first) of the DEST */
+/*
+ * Copy the first (if last_first == FIRST_TO_LAST) or last
+ * (last_first == LAST_TO_FIRST) item or part of it or nothing
+ * (see the return 0 below) from SOURCE to the end (if last_first)
+ * or beginning (!last_first) of the DEST
+ */
 /* returns 1 if anything was copied, else 0 */
 static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
 				   struct buffer_head *src, int last_first,
 				   int bytes_or_entries)
 {
 	struct buffer_head *dest = dest_bi->bi_bh;
-	int dest_nr_item, src_nr_item;	/* number of items in the source and destination buffers */
+	/* number of items in the source and destination buffers */
+	int dest_nr_item, src_nr_item;
 	struct item_head *ih;
 	struct item_head *dih;
 
 	dest_nr_item = B_NR_ITEMS(dest);
 
+	/*
+	 * if ( DEST is empty or first item of SOURCE and last item of
+	 * DEST are the items of different objects or of different types )
+	 * then there is no need to treat this item differently from the
+	 * other items that we copy, so we return
+	 */
 	if (last_first == FIRST_TO_LAST) {
-		/* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
-		   or of different types ) then there is no need to treat this item differently from the other items
-		   that we copy, so we return */
-		ih = B_N_PITEM_HEAD(src, 0);
-		dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1);
+		ih = item_head(src, 0);
+		dih = item_head(dest, dest_nr_item - 1);
+
+		/* there is nothing to merge */
 		if (!dest_nr_item
-		    || (!op_is_left_mergeable(&(ih->ih_key), src->b_size)))
-			/* there is nothing to merge */
+		    || (!op_is_left_mergeable(&ih->ih_key, src->b_size)))
 			return 0;
 
 		RFALSE(!ih_item_len(ih),
@@ -157,8 +170,11 @@
 			return 1;
 		}
 
-		/* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST
-		   part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header
+		/*
+		 * copy part of the body of the first item of SOURCE
+		 * to the end of the body of the last item of the DEST
+		 * part defined by 'bytes_or_entries'; if bytes_or_entries
+		 * == -1 copy whole body; don't create new item header
 		 */
 		if (bytes_or_entries == -1)
 			bytes_or_entries = ih_item_len(ih);
@@ -176,11 +192,13 @@
 		}
 #endif
 
-		/* merge first item (or its part) of src buffer with the last
-		   item of dest buffer. Both are of the same file */
+		/*
+		 * merge first item (or its part) of src buffer with the last
+		 * item of dest buffer. Both are of the same file
+		 */
 		leaf_paste_in_buffer(dest_bi,
 				     dest_nr_item - 1, ih_item_len(dih),
-				     bytes_or_entries, B_I_PITEM(src, ih), 0);
+				     bytes_or_entries, ih_item_body(src, ih), 0);
 
 		if (is_indirect_le_ih(dih)) {
 			RFALSE(get_ih_free_space(dih),
@@ -195,19 +213,23 @@
 
 	/* copy boundary item to right (last_first == LAST_TO_FIRST) */
 
-	/* ( DEST is empty or last item of SOURCE and first item of DEST
-	   are the items of different object or of different types )
+	/*
+	 * (DEST is empty or last item of SOURCE and first item of DEST
+	 * are the items of different object or of different types)
 	 */
 	src_nr_item = B_NR_ITEMS(src);
-	ih = B_N_PITEM_HEAD(src, src_nr_item - 1);
-	dih = B_N_PITEM_HEAD(dest, 0);
+	ih = item_head(src, src_nr_item - 1);
+	dih = item_head(dest, 0);
 
-	if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size))
+	if (!dest_nr_item || !op_is_left_mergeable(&dih->ih_key, src->b_size))
 		return 0;
 
 	if (is_direntry_le_ih(ih)) {
+		/*
+		 * bytes_or_entries = entries number in last
+		 * item body of SOURCE
+		 */
 		if (bytes_or_entries == -1)
-			/* bytes_or_entries = entries number in last item body of SOURCE */
 			bytes_or_entries = ih_entry_count(ih);
 
 		leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
@@ -217,9 +239,11 @@
 		return 1;
 	}
 
-	/* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
-	   part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
-	   don't create new item header
+	/*
+	 * copy part of the body of the last item of SOURCE to the
+	 * begin of the body of the first item of the DEST; part defined
+	 * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body;
+	 * change first item key of the DEST; don't create new item header
 	 */
 
 	RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
@@ -270,15 +294,18 @@
 	}
 
 	leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries,
-			     B_I_PITEM(src,
+			     ih_item_body(src,
 				       ih) + ih_item_len(ih) - bytes_or_entries,
 			     0);
 	return 1;
 }
 
-/* copy cpy_mun items from buffer src to buffer dest
- * last_first == FIRST_TO_LAST means, that we copy cpy_num  items beginning from first-th item in src to tail of dest
- * last_first == LAST_TO_FIRST means, that we copy cpy_num  items beginning from first-th item in src to head of dest
+/*
+ * copy cpy_mun items from buffer src to buffer dest
+ * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning
+ *                             from first-th item in src to tail of dest
+ * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning
+ *                             from first-th item in src to head of dest
  */
 static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
 				     struct buffer_head *src, int last_first,
@@ -311,11 +338,14 @@
 	nr = blkh_nr_item(blkh);
 	free_space = blkh_free_space(blkh);
 
-	/* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */
+	/*
+	 * we will insert items before 0-th or nr-th item in dest buffer.
+	 * It depends of last_first parameter
+	 */
 	dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
 
 	/* location of head of first new item */
-	ih = B_N_PITEM_HEAD(dest, dest_before);
+	ih = item_head(dest, dest_before);
 
 	RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE,
 	       "vs-10140: not enough free space for headers %d (needed %d)",
@@ -325,7 +355,7 @@
 	memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE);
 
 	/* copy item headers */
-	memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE);
+	memcpy(ih, item_head(src, first), cpy_num * IH_SIZE);
 
 	free_space -= (IH_SIZE * cpy_num);
 	set_blkh_free_space(blkh, free_space);
@@ -338,8 +368,8 @@
 	}
 
 	/* prepare space for items */
-	last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before]));
-	last_inserted_loc = ih_location(&(ih[cpy_num - 1]));
+	last_loc = ih_location(&ih[nr + cpy_num - 1 - dest_before]);
+	last_inserted_loc = ih_location(&ih[cpy_num - 1]);
 
 	/* check free space */
 	RFALSE(free_space < j - last_inserted_loc,
@@ -352,7 +382,8 @@
 
 	/* copy items */
 	memcpy(dest->b_data + last_inserted_loc,
-	       B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc);
+	       item_body(src, (first + cpy_num - 1)),
+	       j - last_inserted_loc);
 
 	/* sizes, item number */
 	set_blkh_nr_item(blkh, nr + cpy_num);
@@ -376,8 +407,10 @@
 	}
 }
 
-/* This function splits the (liquid) item into two items (useful when
-   shifting part of an item into another node.) */
+/*
+ * This function splits the (liquid) item into two items (useful when
+ * shifting part of an item into another node.)
+ */
 static void leaf_item_bottle(struct buffer_info *dest_bi,
 			     struct buffer_head *src, int last_first,
 			     int item_num, int cpy_bytes)
@@ -389,17 +422,22 @@
 	       "vs-10170: bytes == - 1 means: do not split item");
 
 	if (last_first == FIRST_TO_LAST) {
-		/* if ( if item in position item_num in buffer SOURCE is directory item ) */
-		ih = B_N_PITEM_HEAD(src, item_num);
+		/*
+		 * if ( if item in position item_num in buffer SOURCE
+		 * is directory item )
+		 */
+		ih = item_head(src, item_num);
 		if (is_direntry_le_ih(ih))
 			leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
 					      item_num, 0, cpy_bytes);
 		else {
 			struct item_head n_ih;
 
-			/* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
-			   part defined by 'cpy_bytes'; create new item header; change old item_header (????);
-			   n_ih = new item_header;
+			/*
+			 * copy part of the body of the item number 'item_num'
+			 * of SOURCE to the end of the DEST part defined by
+			 * 'cpy_bytes'; create new item header; change old
+			 * item_header (????); n_ih = new item_header;
 			 */
 			memcpy(&n_ih, ih, IH_SIZE);
 			put_ih_item_len(&n_ih, cpy_bytes);
@@ -411,30 +449,36 @@
 				set_ih_free_space(&n_ih, 0);
 			}
 
-			RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size),
+			RFALSE(op_is_left_mergeable(&ih->ih_key, src->b_size),
 			       "vs-10190: bad mergeability of item %h", ih);
 			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
 			leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih,
-					     B_N_PITEM(src, item_num), 0);
+					     item_body(src, item_num), 0);
 		}
 	} else {
-		/*  if ( if item in position item_num in buffer SOURCE is directory item ) */
-		ih = B_N_PITEM_HEAD(src, item_num);
+		/*
+		 * if ( if item in position item_num in buffer
+		 * SOURCE is directory item )
+		 */
+		ih = item_head(src, item_num);
 		if (is_direntry_le_ih(ih))
 			leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
 					      item_num,
-					      I_ENTRY_COUNT(ih) - cpy_bytes,
+					      ih_entry_count(ih) - cpy_bytes,
 					      cpy_bytes);
 		else {
 			struct item_head n_ih;
 
-			/* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
-			   part defined by 'cpy_bytes'; create new item header;
-			   n_ih = new item_header;
+			/*
+			 * copy part of the body of the item number 'item_num'
+			 * of SOURCE to the begin of the DEST part defined by
+			 * 'cpy_bytes'; create new item header;
+			 * n_ih = new item_header;
 			 */
 			memcpy(&n_ih, ih, SHORT_KEY_SIZE);
 
-			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
+			/* Endian safe, both le */
+			n_ih.ih_version = ih->ih_version;
 
 			if (is_direct_le_ih(ih)) {
 				set_le_ih_k_offset(&n_ih,
@@ -458,20 +502,22 @@
 			/* set item length */
 			put_ih_item_len(&n_ih, cpy_bytes);
 
-			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
+			/* Endian safe, both le */
+			n_ih.ih_version = ih->ih_version;
 
 			leaf_insert_into_buf(dest_bi, 0, &n_ih,
-					     B_N_PITEM(src,
-						       item_num) +
-					     ih_item_len(ih) - cpy_bytes, 0);
+					     item_body(src, item_num) +
+						ih_item_len(ih) - cpy_bytes, 0);
 		}
 	}
 }
 
-/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST.
-   If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST.
-   From last item copy cpy_num bytes for regular item and cpy_num directory entries for
-   directory item. */
+/*
+ * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE
+ * to DEST.  If cpy_bytes not equal to minus one than copy cpy_num-1 whole
+ * items from SOURCE to DEST.  From last item copy cpy_num bytes for regular
+ * item and cpy_num directory entries for directory item.
+ */
 static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
 			   int last_first, int cpy_num, int cpy_bytes)
 {
@@ -498,22 +544,34 @@
 		else
 			bytes = -1;
 
-		/* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */
+		/*
+		 * copy the first item or it part or nothing to the end of
+		 * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes))
+		 */
 		i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
 		cpy_num -= i;
 		if (cpy_num == 0)
 			return i;
 		pos += i;
 		if (cpy_bytes == -1)
-			/* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */
+			/*
+			 * copy first cpy_num items starting from position
+			 * 'pos' of SOURCE to end of DEST
+			 */
 			leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
 						 pos, cpy_num);
 		else {
-			/* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */
+			/*
+			 * copy first cpy_num-1 items starting from position
+			 * 'pos-1' of the SOURCE to the end of the DEST
+			 */
 			leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
 						 pos, cpy_num - 1);
 
-			/* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */
+			/*
+			 * copy part of the item which number is
+			 * cpy_num+pos-1 to the end of the DEST
+			 */
 			leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
 					 cpy_num + pos - 1, cpy_bytes);
 		}
@@ -525,7 +583,11 @@
 		else
 			bytes = -1;
 
-		/* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */
+		/*
+		 * copy the last item or it part or nothing to the
+		 * begin of the DEST
+		 * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes));
+		 */
 		i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
 
 		cpy_num -= i;
@@ -534,15 +596,24 @@
 
 		pos = src_nr_item - cpy_num - i;
 		if (cpy_bytes == -1) {
-			/* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */
+			/*
+			 * starting from position 'pos' copy last cpy_num
+			 * items of SOURCE to begin of DEST
+			 */
 			leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
 						 pos, cpy_num);
 		} else {
-			/* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */
+			/*
+			 * copy last cpy_num-1 items starting from position
+			 * 'pos+1' of the SOURCE to the begin of the DEST;
+			 */
 			leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
 						 pos + 1, cpy_num - 1);
 
-			/* copy part of the item which number is pos to the begin of the DEST */
+			/*
+			 * copy part of the item which number is pos to
+			 * the begin of the DEST
+			 */
 			leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
 					 cpy_bytes);
 		}
@@ -550,9 +621,11 @@
 	return i;
 }
 
-/* there are types of coping: from S[0] to L[0], from S[0] to R[0],
-   from R[0] to L[0]. for each of these we have to define parent and
-   positions of destination and source buffers */
+/*
+ * there are types of coping: from S[0] to L[0], from S[0] to R[0],
+ * from R[0] to L[0]. for each of these we have to define parent and
+ * positions of destination and source buffers
+ */
 static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
 				       struct buffer_info *dest_bi,
 				       struct buffer_info *src_bi,
@@ -568,7 +641,9 @@
 		src_bi->tb = tb;
 		src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
 		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
-		src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);	/* src->b_item_order */
+
+		/* src->b_item_order */
+		src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
 		dest_bi->tb = tb;
 		dest_bi->bi_bh = tb->L[0];
 		dest_bi->bi_parent = tb->FL[0];
@@ -633,8 +708,10 @@
 	       shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
 }
 
-/* copy mov_num items and mov_bytes of the (mov_num-1)th item to
-   neighbor. Delete them from source */
+/*
+ * copy mov_num items and mov_bytes of the (mov_num-1)th item to
+ * neighbor. Delete them from source
+ */
 int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
 		    int mov_bytes, struct buffer_head *Snew)
 {
@@ -657,18 +734,24 @@
 	return ret_value;
 }
 
-/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1)
-   from S[0] to L[0] and replace the delimiting key */
+/*
+ * Shift shift_num items (and shift_bytes of last shifted item if
+ * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key
+ */
 int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
 {
 	struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
 	int i;
 
-	/* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */
+	/*
+	 * move shift_num (and shift_bytes bytes) items from S[0]
+	 * to left neighbor L[0]
+	 */
 	i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
 
 	if (shift_num) {
-		if (B_NR_ITEMS(S0) == 0) {	/* number of items in S[0] == 0 */
+		/* number of items in S[0] == 0 */
+		if (B_NR_ITEMS(S0) == 0) {
 
 			RFALSE(shift_bytes != -1,
 			       "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
@@ -691,10 +774,10 @@
 			replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0);
 
 			RFALSE((shift_bytes != -1 &&
-				!(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0))
-				  && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) &&
+				!(is_direntry_le_ih(item_head(S0, 0))
+				  && !ih_entry_count(item_head(S0, 0)))) &&
 			       (!op_is_left_mergeable
-				(B_N_PKEY(S0, 0), S0->b_size)),
+				(leaf_key(S0, 0), S0->b_size)),
 			       "vs-10280: item must be mergeable");
 		}
 	}
@@ -704,13 +787,18 @@
 
 /* CLEANING STOPPED HERE */
 
-/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */
+/*
+ * Shift shift_num (shift_bytes) items from S[0] to the right neighbor,
+ * and replace the delimiting key
+ */
 int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
 {
-	//  struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
 	int ret_value;
 
-	/* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */
+	/*
+	 * move shift_num (and shift_bytes) items from S[0] to
+	 * right neighbor R[0]
+	 */
 	ret_value =
 	    leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
 
@@ -725,12 +813,16 @@
 
 static void leaf_delete_items_entirely(struct buffer_info *bi,
 				       int first, int del_num);
-/*  If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
-    If not.
-    If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
-    the first item. Part defined by del_bytes. Don't delete first item header
-    If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
-    the last item . Part defined by del_bytes. Don't delete last item header.
+/*
+ * If del_bytes == -1, starting from position 'first' delete del_num
+ * items in whole in buffer CUR.
+ *   If not.
+ *   If last_first == 0. Starting from position 'first' delete del_num-1
+ *   items in whole. Delete part of body of the first item. Part defined by
+ *   del_bytes. Don't delete first item header
+ *   If last_first == 1. Starting from position 'first+1' delete del_num-1
+ *   items in whole. Delete part of body of the last item . Part defined by
+ *   del_bytes. Don't delete last item header.
 */
 void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
 		       int first, int del_num, int del_bytes)
@@ -761,32 +853,43 @@
 		leaf_delete_items_entirely(cur_bi, first, del_num);
 	else {
 		if (last_first == FIRST_TO_LAST) {
-			/* delete del_num-1 items beginning from item in position first  */
+			/*
+			 * delete del_num-1 items beginning from
+			 * item in position first
+			 */
 			leaf_delete_items_entirely(cur_bi, first, del_num - 1);
 
-			/* delete the part of the first item of the bh
-			   do not delete item header
+			/*
+			 * delete the part of the first item of the bh
+			 * do not delete item header
 			 */
 			leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
 		} else {
 			struct item_head *ih;
 			int len;
 
-			/* delete del_num-1 items beginning from item in position first+1  */
+			/*
+			 * delete del_num-1 items beginning from
+			 * item in position first+1
+			 */
 			leaf_delete_items_entirely(cur_bi, first + 1,
 						   del_num - 1);
 
-			ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
+			ih = item_head(bh, B_NR_ITEMS(bh) - 1);
 			if (is_direntry_le_ih(ih))
 				/* the last item is directory  */
-				/* len = numbers of directory entries in this item */
+				/*
+				 * len = numbers of directory entries
+				 * in this item
+				 */
 				len = ih_entry_count(ih);
 			else
 				/* len = body len of item */
 				len = ih_item_len(ih);
 
-			/* delete the part of the last item of the bh
-			   do not delete item header
+			/*
+			 * delete the part of the last item of the bh
+			 * do not delete item header
 			 */
 			leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
 					     len - del_bytes, del_bytes);
@@ -820,10 +923,10 @@
 	       zeros_number, ih_item_len(inserted_item_ih));
 
 	/* get item new item must be inserted before */
-	ih = B_N_PITEM_HEAD(bh, before);
+	ih = item_head(bh, before);
 
 	/* prepare space for the body of new item */
-	last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size;
+	last_loc = nr ? ih_location(&ih[nr - before - 1]) : bh->b_size;
 	unmoved_loc = before ? ih_location(ih - 1) : bh->b_size;
 
 	memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih),
@@ -846,8 +949,8 @@
 
 	/* change locations */
 	for (i = before; i < nr + 1; i++) {
-		unmoved_loc -= ih_item_len(&(ih[i - before]));
-		put_ih_location(&(ih[i - before]), unmoved_loc);
+		unmoved_loc -= ih_item_len(&ih[i - before]);
+		put_ih_location(&ih[i - before], unmoved_loc);
 	}
 
 	/* sizes, free space, item number */
@@ -867,8 +970,10 @@
 	}
 }
 
-/* paste paste_size bytes to affected_item_num-th item.
-   When item is a directory, this only prepare space for new entries */
+/*
+ * paste paste_size bytes to affected_item_num-th item.
+ * When item is a directory, this only prepare space for new entries
+ */
 void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
 			  int pos_in_item, int paste_size,
 			  const char *body, int zeros_number)
@@ -902,9 +1007,9 @@
 #endif				/* CONFIG_REISERFS_CHECK */
 
 	/* item to be appended */
-	ih = B_N_PITEM_HEAD(bh, affected_item_num);
+	ih = item_head(bh, affected_item_num);
 
-	last_loc = ih_location(&(ih[nr - affected_item_num - 1]));
+	last_loc = ih_location(&ih[nr - affected_item_num - 1]);
 	unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size;
 
 	/* prepare space */
@@ -913,8 +1018,8 @@
 
 	/* change locations */
 	for (i = affected_item_num; i < nr; i++)
-		put_ih_location(&(ih[i - affected_item_num]),
-				ih_location(&(ih[i - affected_item_num])) -
+		put_ih_location(&ih[i - affected_item_num],
+				ih_location(&ih[i - affected_item_num]) -
 				paste_size);
 
 	if (body) {
@@ -957,10 +1062,12 @@
 	}
 }
 
-/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
-   does not have free space, so it moves DEHs and remaining records as
-   necessary. Return value is size of removed part of directory item
-   in bytes. */
+/*
+ * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
+ * does not have free space, so it moves DEHs and remaining records as
+ * necessary. Return value is size of removed part of directory item
+ * in bytes.
+ */
 static int leaf_cut_entries(struct buffer_head *bh,
 			    struct item_head *ih, int from, int del_count)
 {
@@ -971,12 +1078,14 @@
 	int cut_records_len;	/* length of all removed records */
 	int i;
 
-	/* make sure, that item is directory and there are enough entries to
-	   remove */
+	/*
+	 * make sure that item is directory and there are enough entries to
+	 * remove
+	 */
 	RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
-	RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
+	RFALSE(ih_entry_count(ih) < from + del_count,
 	       "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
-	       I_ENTRY_COUNT(ih), from, del_count);
+	       ih_entry_count(ih), from, del_count);
 
 	if (del_count == 0)
 		return 0;
@@ -987,22 +1096,24 @@
 	/* entry head array */
 	deh = B_I_DEH(bh, ih);
 
-	/* first byte of remaining entries, those are BEFORE cut entries
-	   (prev_record) and length of all removed records (cut_records_len) */
+	/*
+	 * first byte of remaining entries, those are BEFORE cut entries
+	 * (prev_record) and length of all removed records (cut_records_len)
+	 */
 	prev_record_offset =
-	    (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih));
+	    (from ? deh_location(&deh[from - 1]) : ih_item_len(ih));
 	cut_records_len = prev_record_offset /*from_record */  -
-	    deh_location(&(deh[from + del_count - 1]));
+	    deh_location(&deh[from + del_count - 1]);
 	prev_record = item + prev_record_offset;
 
 	/* adjust locations of remaining entries */
-	for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--)
-		put_deh_location(&(deh[i]),
+	for (i = ih_entry_count(ih) - 1; i > from + del_count - 1; i--)
+		put_deh_location(&deh[i],
 				 deh_location(&deh[i]) -
 				 (DEH_SIZE * del_count));
 
 	for (i = 0; i < from; i++)
-		put_deh_location(&(deh[i]),
+		put_deh_location(&deh[i],
 				 deh_location(&deh[i]) - (DEH_SIZE * del_count +
 							  cut_records_len));
 
@@ -1021,14 +1132,15 @@
 	return DEH_SIZE * del_count + cut_records_len;
 }
 
-/*  when cut item is part of regular file
-        pos_in_item - first byte that must be cut
-        cut_size - number of bytes to be cut beginning from pos_in_item
-
-   when cut item is part of directory
-        pos_in_item - number of first deleted entry
-        cut_size - count of deleted entries
-    */
+/*
+ * when cut item is part of regular file
+ *      pos_in_item - first byte that must be cut
+ *      cut_size - number of bytes to be cut beginning from pos_in_item
+ *
+ * when cut item is part of directory
+ *      pos_in_item - number of first deleted entry
+ *      cut_size - count of deleted entries
+ */
 void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
 			  int pos_in_item, int cut_size)
 {
@@ -1043,7 +1155,7 @@
 	nr = blkh_nr_item(blkh);
 
 	/* item head of truncated item */
-	ih = B_N_PITEM_HEAD(bh, cut_item_num);
+	ih = item_head(bh, cut_item_num);
 
 	if (is_direntry_le_ih(ih)) {
 		/* first cut entry () */
@@ -1055,7 +1167,6 @@
 			       cut_item_num);
 			/* change item key by key of first entry in the item */
 			set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
-			/*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
 		}
 	} else {
 		/* item is direct or indirect */
@@ -1089,7 +1200,7 @@
 	}
 
 	/* location of the last item */
-	last_loc = ih_location(&(ih[nr - cut_item_num - 1]));
+	last_loc = ih_location(&ih[nr - cut_item_num - 1]);
 
 	/* location of the item, which is remaining at the same place */
 	unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size;
@@ -1108,7 +1219,7 @@
 
 	/* change locations */
 	for (i = cut_item_num; i < nr; i++)
-		put_ih_location(&(ih[i - cut_item_num]),
+		put_ih_location(&ih[i - cut_item_num],
 				ih_location(&ih[i - cut_item_num]) + cut_size);
 
 	/* size, free space */
@@ -1156,14 +1267,14 @@
 		return;
 	}
 
-	ih = B_N_PITEM_HEAD(bh, first);
+	ih = item_head(bh, first);
 
 	/* location of unmovable item */
 	j = (first == 0) ? bh->b_size : ih_location(ih - 1);
 
 	/* delete items */
-	last_loc = ih_location(&(ih[nr - 1 - first]));
-	last_removed_loc = ih_location(&(ih[del_num - 1]));
+	last_loc = ih_location(&ih[nr - 1 - first]);
+	last_removed_loc = ih_location(&ih[del_num - 1]);
 
 	memmove(bh->b_data + last_loc + j - last_removed_loc,
 		bh->b_data + last_loc, last_removed_loc - last_loc);
@@ -1173,8 +1284,8 @@
 
 	/* change item location */
 	for (i = first; i < nr - del_num; i++)
-		put_ih_location(&(ih[i - first]),
-				ih_location(&(ih[i - first])) + (j -
+		put_ih_location(&ih[i - first],
+				ih_location(&ih[i - first]) + (j -
 								 last_removed_loc));
 
 	/* sizes, item number */
@@ -1195,7 +1306,10 @@
 	}
 }
 
-/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
+/*
+ * paste new_entry_count entries (new_dehs, records) into position
+ * before to item_num-th item
+ */
 void leaf_paste_entries(struct buffer_info *bi,
 			int item_num,
 			int before,
@@ -1213,13 +1327,16 @@
 	if (new_entry_count == 0)
 		return;
 
-	ih = B_N_PITEM_HEAD(bh, item_num);
+	ih = item_head(bh, item_num);
 
-	/* make sure, that item is directory, and there are enough records in it */
+	/*
+	 * make sure, that item is directory, and there are enough
+	 * records in it
+	 */
 	RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
-	RFALSE(I_ENTRY_COUNT(ih) < before,
+	RFALSE(ih_entry_count(ih) < before,
 	       "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
-	       I_ENTRY_COUNT(ih), before);
+	       ih_entry_count(ih), before);
 
 	/* first byte of dest item */
 	item = bh->b_data + ih_location(ih);
@@ -1230,21 +1347,21 @@
 	/* new records will be pasted at this point */
 	insert_point =
 	    item +
-	    (before ? deh_location(&(deh[before - 1]))
+	    (before ? deh_location(&deh[before - 1])
 	     : (ih_item_len(ih) - paste_size));
 
 	/* adjust locations of records that will be AFTER new records */
-	for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--)
-		put_deh_location(&(deh[i]),
-				 deh_location(&(deh[i])) +
+	for (i = ih_entry_count(ih) - 1; i >= before; i--)
+		put_deh_location(&deh[i],
+				 deh_location(&deh[i]) +
 				 (DEH_SIZE * new_entry_count));
 
 	/* adjust locations of records that will be BEFORE new records */
 	for (i = 0; i < before; i++)
-		put_deh_location(&(deh[i]),
-				 deh_location(&(deh[i])) + paste_size);
+		put_deh_location(&deh[i],
+				 deh_location(&deh[i]) + paste_size);
 
-	old_entry_num = I_ENTRY_COUNT(ih);
+	old_entry_num = ih_entry_count(ih);
 	put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
 
 	/* prepare space for pasted records */
@@ -1266,10 +1383,10 @@
 
 	/* set locations of new records */
 	for (i = 0; i < new_entry_count; i++) {
-		put_deh_location(&(deh[i]),
-				 deh_location(&(deh[i])) +
+		put_deh_location(&deh[i],
+				 deh_location(&deh[i]) +
 				 (-deh_location
-				  (&(new_dehs[new_entry_count - 1])) +
+				  (&new_dehs[new_entry_count - 1]) +
 				  insert_point + DEH_SIZE * new_entry_count -
 				  item));
 	}
@@ -1277,28 +1394,26 @@
 	/* change item key if necessary (when we paste before 0-th entry */
 	if (!before) {
 		set_le_ih_k_offset(ih, deh_offset(new_dehs));
-/*      memcpy (&ih->ih_key.k_offset,
-		       &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
 	}
 #ifdef CONFIG_REISERFS_CHECK
 	{
 		int prev, next;
 		/* check record locations */
 		deh = B_I_DEH(bh, ih);
-		for (i = 0; i < I_ENTRY_COUNT(ih); i++) {
+		for (i = 0; i < ih_entry_count(ih); i++) {
 			next =
 			    (i <
-			     I_ENTRY_COUNT(ih) -
-			     1) ? deh_location(&(deh[i + 1])) : 0;
-			prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
+			     ih_entry_count(ih) -
+			     1) ? deh_location(&deh[i + 1]) : 0;
+			prev = (i != 0) ? deh_location(&deh[i - 1]) : 0;
 
-			if (prev && prev <= deh_location(&(deh[i])))
+			if (prev && prev <= deh_location(&deh[i]))
 				reiserfs_error(sb_from_bi(bi), "vs-10240",
 					       "directory item (%h) "
 					       "corrupted (prev %a, "
 					       "cur(%d) %a)",
 					       ih, deh + i - 1, i, deh + i);
-			if (next && next >= deh_location(&(deh[i])))
+			if (next && next >= deh_location(&deh[i]))
 				reiserfs_error(sb_from_bi(bi), "vs-10250",
 					       "directory item (%h) "
 					       "corrupted (cur(%d) %a, "

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e825f8b..cd11358 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c

@@ -22,8 +22,10 @@
 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
 
-// directory item contains array of entry headers. This performs
-// binary search through that array
+/*
+ * directory item contains array of entry headers. This performs
+ * binary search through that array
+ */
 static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
 {
 	struct item_head *ih = de->de_ih;
@@ -31,7 +33,7 @@
 	int rbound, lbound, j;
 
 	lbound = 0;
-	rbound = I_ENTRY_COUNT(ih) - 1;
+	rbound = ih_entry_count(ih) - 1;
 
 	for (j = (rbound + lbound) / 2; lbound <= rbound;
 	     j = (rbound + lbound) / 2) {
@@ -43,7 +45,7 @@
 			lbound = j + 1;
 			continue;
 		}
-		// this is not name found, but matched third key component
+		/* this is not name found, but matched third key component */
 		de->de_entry_num = j;
 		return NAME_FOUND;
 	}
@@ -52,17 +54,21 @@
 	return NAME_NOT_FOUND;
 }
 
-// comment?  maybe something like set de to point to what the path points to?
+/*
+ * comment?  maybe something like set de to point to what the path points to?
+ */
 static inline void set_de_item_location(struct reiserfs_dir_entry *de,
 					struct treepath *path)
 {
 	de->de_bh = get_last_bh(path);
-	de->de_ih = get_ih(path);
+	de->de_ih = tp_item_head(path);
 	de->de_deh = B_I_DEH(de->de_bh, de->de_ih);
 	de->de_item_num = PATH_LAST_POSITION(path);
 }
 
-// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
+/*
+ * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
+ */
 inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
 {
 	struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
@@ -71,17 +77,17 @@
 
 	de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
 	de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
-	de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh);
+	de->de_name = ih_item_body(de->de_bh, de->de_ih) + deh_location(deh);
 	if (de->de_name[de->de_namelen - 1] == 0)
 		de->de_namelen = strlen(de->de_name);
 }
 
-// what entry points to
+/* what entry points to */
 static inline void set_de_object_key(struct reiserfs_dir_entry *de)
 {
 	BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
-	de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num]));
-	de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num]));
+	de->de_dir_id = deh_dir_id(&de->de_deh[de->de_entry_num]);
+	de->de_objectid = deh_objectid(&de->de_deh[de->de_entry_num]);
 }
 
 static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
@@ -96,21 +102,20 @@
 	    le32_to_cpu(de->de_ih->ih_key.k_dir_id);
 	de->de_entry_key.on_disk_key.k_objectid =
 	    le32_to_cpu(de->de_ih->ih_key.k_objectid);
-	set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh));
-	set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY);
+	set_cpu_key_k_offset(&de->de_entry_key, deh_offset(deh));
+	set_cpu_key_k_type(&de->de_entry_key, TYPE_DIRENTRY);
 }
 
-/* We assign a key to each directory item, and place multiple entries
-in a single directory item.  A directory item has a key equal to the
-key of the first directory entry in it.
+/*
+ * We assign a key to each directory item, and place multiple entries in a
+ * single directory item.  A directory item has a key equal to the key of
+ * the first directory entry in it.
 
-This function first calls search_by_key, then, if item whose first
-entry matches is not found it looks for the entry inside directory
-item found by search_by_key. Fills the path to the entry, and to the
-entry position in the item
-
-*/
-
+ * This function first calls search_by_key, then, if item whose first entry
+ * matches is not found it looks for the entry inside directory item found
+ * by search_by_key. Fills the path to the entry, and to the entry position
+ * in the item
+ */
 /* The function is NOT SCHEDULE-SAFE! */
 int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
 			struct treepath *path, struct reiserfs_dir_entry *de)
@@ -144,7 +149,7 @@
 
 #ifdef CONFIG_REISERFS_CHECK
 	if (!is_direntry_le_ih(de->de_ih) ||
-	    COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
+	    COMP_SHORT_KEYS(&de->de_ih->ih_key, key)) {
 		print_block(de->de_bh, 0, -1, -1);
 		reiserfs_panic(sb, "vs-7005", "found item %h is not directory "
 			       "item or does not belong to the same directory "
@@ -152,12 +157,17 @@
 	}
 #endif				/* CONFIG_REISERFS_CHECK */
 
-	/* binary search in directory item by third componen t of the
-	   key. sets de->de_entry_num of de */
+	/*
+	 * binary search in directory item by third component of the
+	 * key. sets de->de_entry_num of de
+	 */
 	retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
 	path->pos_in_item = de->de_entry_num;
 	if (retval != NAME_NOT_FOUND) {
-		// ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
+		/*
+		 * ugly, but rename needs de_bh, de_deh, de_name,
+		 * de_namelen, de_objectid set
+		 */
 		set_de_name_and_namelen(de);
 		set_de_object_key(de);
 	}
@@ -166,11 +176,12 @@
 
 /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
 
-/* The third component is hashed, and you can choose from more than
-   one hash function.  Per directory hashes are not yet implemented
-   but are thought about. This function should be moved to hashes.c
-   Jedi, please do so.  -Hans */
-
+/*
+ * The third component is hashed, and you can choose from more than
+ * one hash function.  Per directory hashes are not yet implemented
+ * but are thought about. This function should be moved to hashes.c
+ * Jedi, please do so.  -Hans
+ */
 static __u32 get_third_component(struct super_block *s,
 				 const char *name, int len)
 {
@@ -183,11 +194,13 @@
 
 	res = REISERFS_SB(s)->s_hash_function(name, len);
 
-	// take bits from 7-th to 30-th including both bounds
+	/* take bits from 7-th to 30-th including both bounds */
 	res = GET_HASH_VALUE(res);
 	if (res == 0)
-		// needed to have no names before "." and ".." those have hash
-		// value == 0 and generation conters 1 and 2 accordingly
+		/*
+		 * needed to have no names before "." and ".." those have hash
+		 * value == 0 and generation conters 1 and 2 accordingly
+		 */
 		res = 128;
 	return res + MAX_GENERATION_NUMBER;
 }
@@ -208,7 +221,7 @@
 
 /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
 
-				/* used when hash collisions exist */
+/* used when hash collisions exist */
 
 static int linear_search_in_dir_item(struct cpu_key *key,
 				     struct reiserfs_dir_entry *de,
@@ -220,7 +233,7 @@
 
 	i = de->de_entry_num;
 
-	if (i == I_ENTRY_COUNT(de->de_ih) ||
+	if (i == ih_entry_count(de->de_ih) ||
 	    GET_HASH_VALUE(deh_offset(deh + i)) !=
 	    GET_HASH_VALUE(cpu_key_k_offset(key))) {
 		i--;
@@ -232,43 +245,50 @@
 	deh += i;
 
 	for (; i >= 0; i--, deh--) {
+		/* hash value does not match, no need to check whole name */
 		if (GET_HASH_VALUE(deh_offset(deh)) !=
 		    GET_HASH_VALUE(cpu_key_k_offset(key))) {
-			// hash value does not match, no need to check whole name
 			return NAME_NOT_FOUND;
 		}
 
-		/* mark, that this generation number is used */
+		/* mark that this generation number is used */
 		if (de->de_gen_number_bit_string)
 			set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
 				de->de_gen_number_bit_string);
 
-		// calculate pointer to name and namelen
+		/* calculate pointer to name and namelen */
 		de->de_entry_num = i;
 		set_de_name_and_namelen(de);
 
+		/*
+		 * de's de_name, de_namelen, de_recordlen are set.
+		 * Fill the rest.
+		 */
 		if ((retval =
 		     reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
-			// de's de_name, de_namelen, de_recordlen are set. Fill the rest:
 
-			// key of pointed object
+			/* key of pointed object */
 			set_de_object_key(de);
 
 			store_de_entry_key(de);
 
-			// retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
+			/* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */
 			return retval;
 		}
 	}
 
 	if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
-		/* we have reached left most entry in the node. In common we
-		   have to go to the left neighbor, but if generation counter
-		   is 0 already, we know for sure, that there is no name with
-		   the same hash value */
-		// FIXME: this work correctly only because hash value can not
-		// be 0. Btw, in case of Yura's hash it is probably possible,
-		// so, this is a bug
+		/*
+		 * we have reached left most entry in the node. In common we
+		 * have to go to the left neighbor, but if generation counter
+		 * is 0 already, we know for sure, that there is no name with
+		 * the same hash value
+		 */
+		/*
+		 * FIXME: this work correctly only because hash value can not
+		 *  be 0. Btw, in case of Yura's hash it is probably possible,
+		 * so, this is a bug
+		 */
 		return NAME_NOT_FOUND;
 
 	RFALSE(de->de_item_num,
@@ -277,8 +297,10 @@
 	return GOTO_PREVIOUS_ITEM;
 }
 
-// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
-// FIXME: should add something like IOERROR
+/*
+ * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
+ * FIXME: should add something like IOERROR
+ */
 static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
 			       struct treepath *path_to_entry,
 			       struct reiserfs_dir_entry *de)
@@ -307,13 +329,19 @@
 		retval =
 		    linear_search_in_dir_item(&key_to_search, de, name,
 					      namelen);
+		/*
+		 * there is no need to scan directory anymore.
+		 * Given entry found or does not exist
+		 */
 		if (retval != GOTO_PREVIOUS_ITEM) {
-			/* there is no need to scan directory anymore. Given entry found or does not exist */
 			path_to_entry->pos_in_item = de->de_entry_num;
 			return retval;
 		}
 
-		/* there is left neighboring item of this directory and given entry can be there */
+		/*
+		 * there is left neighboring item of this directory
+		 * and given entry can be there
+		 */
 		set_cpu_key_k_offset(&key_to_search,
 				     le_ih_k_offset(de->de_ih) - 1);
 		pathrelse(path_to_entry);
@@ -341,14 +369,16 @@
 	pathrelse(&path_to_entry);
 	if (retval == NAME_FOUND) {
 		inode = reiserfs_iget(dir->i_sb,
-				      (struct cpu_key *)&(de.de_dir_id));
+				      (struct cpu_key *)&de.de_dir_id);
 		if (!inode || IS_ERR(inode)) {
 			reiserfs_write_unlock(dir->i_sb);
 			return ERR_PTR(-EACCES);
 		}
 
-		/* Propagate the private flag so we know we're
-		 * in the priv tree */
+		/*
+		 * Propagate the private flag so we know we're
+		 * in the priv tree
+		 */
 		if (IS_PRIVATE(dir))
 			inode->i_flags |= S_PRIVATE;
 	}
@@ -361,9 +391,9 @@
 }
 
 /*
-** looks up the dentry of the parent directory for child.
-** taken from ext2_get_parent
-*/
+ * looks up the dentry of the parent directory for child.
+ * taken from ext2_get_parent
+ */
 struct dentry *reiserfs_get_parent(struct dentry *child)
 {
 	int retval;
@@ -384,7 +414,7 @@
 		reiserfs_write_unlock(dir->i_sb);
 		return ERR_PTR(-ENOENT);
 	}
-	inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
+	inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&de.de_dir_id);
 	reiserfs_write_unlock(dir->i_sb);
 
 	return d_obtain_alias(inode);
@@ -406,8 +436,13 @@
 	struct reiserfs_dir_entry de;
 	DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1);
 	int gen_number;
-	char small_buf[32 + DEH_SIZE];	/* 48 bytes now and we avoid kmalloc
-					   if we create file with short name */
+
+	/*
+	 * 48 bytes now and we avoid kmalloc if we
+	 * create file with short name
+	 */
+	char small_buf[32 + DEH_SIZE];
+
 	char *buffer;
 	int buflen, paste_size;
 	int retval;
@@ -439,21 +474,30 @@
 	    (get_inode_sd_version(dir) ==
 	     STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
 
-	/* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
+	/*
+	 * fill buffer : directory entry head, name[, dir objectid | ,
+	 * stat data | ,stat data, dir objectid ]
+	 */
 	deh = (struct reiserfs_de_head *)buffer;
 	deh->deh_location = 0;	/* JDM Endian safe if 0 */
 	put_deh_offset(deh, cpu_key_k_offset(&entry_key));
 	deh->deh_state = 0;	/* JDM Endian safe if 0 */
 	/* put key (ino analog) to de */
-	deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id;	/* safe: k_dir_id is le */
-	deh->deh_objectid = INODE_PKEY(inode)->k_objectid;	/* safe: k_objectid is le */
+
+	/* safe: k_dir_id is le */
+	deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id;
+	/* safe: k_objectid is le */
+	deh->deh_objectid = INODE_PKEY(inode)->k_objectid;
 
 	/* copy name */
 	memcpy((char *)(deh + 1), name, namelen);
 	/* padd by 0s to the 4 byte boundary */
 	padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
 
-	/* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
+	/*
+	 * entry is ready to be pasted into tree, set 'visibility'
+	 * and 'stat data in entry' attributes
+	 */
 	mark_de_without_sd(deh);
 	visible ? mark_de_visible(deh) : mark_de_hidden(deh);
 
@@ -499,7 +543,8 @@
 	/* update max-hash-collisions counter in reiserfs_sb_info */
 	PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
 
-	if (gen_number != 0) {	/* we need to re-search for the insertion point */
+	/* we need to re-search for the insertion point */
+	if (gen_number != 0) {
 		if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
 		    NAME_NOT_FOUND) {
 			reiserfs_warning(dir->i_sb, "vs-7032",
@@ -527,18 +572,19 @@
 	dir->i_size += paste_size;
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	if (!S_ISDIR(inode->i_mode) && visible)
-		// reiserfs_mkdir or reiserfs_rename will do that by itself
+		/* reiserfs_mkdir or reiserfs_rename will do that by itself */
 		reiserfs_update_sd(th, dir);
 
 	reiserfs_check_path(&path);
 	return 0;
 }
 
-/* quota utility function, call if you've had to abort after calling
-** new_inode_init, and have not called reiserfs_new_inode yet.
-** This should only be called on inodes that do not have stat data
-** inserted into the tree yet.
-*/
+/*
+ * quota utility function, call if you've had to abort after calling
+ * new_inode_init, and have not called reiserfs_new_inode yet.
+ * This should only be called on inodes that do not have stat data
+ * inserted into the tree yet.
+ */
 static int drop_new_inode(struct inode *inode)
 {
 	dquot_drop(inode);
@@ -548,18 +594,23 @@
 	return 0;
 }
 
-/* utility function that does setup for reiserfs_new_inode.
-** dquot_initialize needs lots of credits so it's better to have it
-** outside of a transaction, so we had to pull some bits of
-** reiserfs_new_inode out into this func.
-*/
+/*
+ * utility function that does setup for reiserfs_new_inode.
+ * dquot_initialize needs lots of credits so it's better to have it
+ * outside of a transaction, so we had to pull some bits of
+ * reiserfs_new_inode out into this func.
+ */
 static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
 {
-	/* Make inode invalid - just in case we are going to drop it before
-	 * the initialization happens */
+	/*
+	 * Make inode invalid - just in case we are going to drop it before
+	 * the initialization happens
+	 */
 	INODE_PKEY(inode)->k_objectid = 0;
-	/* the quota init calls have to know who to charge the quota to, so
-	 ** we have to set uid and gid here
+
+	/*
+	 * the quota init calls have to know who to charge the quota to, so
+	 * we have to set uid and gid here
 	 */
 	inode_init_owner(inode, dir, mode);
 	dquot_initialize(inode);
@@ -571,7 +622,10 @@
 {
 	int retval;
 	struct inode *inode;
-	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	/*
+	 * We need blocks for transaction + (user+group)*(quotas
+	 * for new inode + update of quota for directory owner)
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 2 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -618,7 +672,7 @@
 		int err;
 		drop_nlink(inode);
 		reiserfs_update_sd(&th, inode);
-		err = journal_end(&th, dir->i_sb, jbegin_count);
+		err = journal_end(&th);
 		if (err)
 			retval = err;
 		unlock_new_inode(inode);
@@ -630,9 +684,9 @@
 
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 
-      out_failed:
+out_failed:
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
@@ -644,7 +698,10 @@
 	struct inode *inode;
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
-	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	/*
+	 * We need blocks for transaction + (user+group)*(quotas
+	 * for new inode + update of quota for directory owner)
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -685,7 +742,7 @@
 	inode->i_op = &reiserfs_special_inode_operations;
 	init_special_inode(inode, inode->i_mode, rdev);
 
-	//FIXME: needed for block and char devices only
+	/* FIXME: needed for block and char devices only */
 	reiserfs_update_sd(&th, inode);
 
 	reiserfs_update_inode_transaction(inode);
@@ -698,7 +755,7 @@
 		int err;
 		drop_nlink(inode);
 		reiserfs_update_sd(&th, inode);
-		err = journal_end(&th, dir->i_sb, jbegin_count);
+		err = journal_end(&th);
 		if (err)
 			retval = err;
 		unlock_new_inode(inode);
@@ -708,9 +765,9 @@
 
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 
-      out_failed:
+out_failed:
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
@@ -721,7 +778,10 @@
 	struct inode *inode;
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
-	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	/*
+	 * We need blocks for transaction + (user+group)*(quotas
+	 * for new inode + update of quota for directory owner)
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -730,7 +790,10 @@
 	dquot_initialize(dir);
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
-	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
+	/*
+	 * set flag that new packing locality created and new blocks
+	 * for the content of that directory are not displaced yet
+	 */
 	REISERFS_I(dir)->new_packing_locality = 1;
 #endif
 	mode = S_IFDIR | mode;
@@ -754,8 +817,9 @@
 		goto out_failed;
 	}
 
-	/* inc the link count now, so another writer doesn't overflow it while
-	 ** we sleep later on.
+	/*
+	 * inc the link count now, so another writer doesn't overflow
+	 * it while we sleep later on.
 	 */
 	INC_DIR_INODE_NLINK(dir)
 
@@ -774,7 +838,7 @@
 	inode->i_op = &reiserfs_dir_inode_operations;
 	inode->i_fop = &reiserfs_dir_operations;
 
-	// note, _this_ add_entry will not update dir's stat data
+	/* note, _this_ add_entry will not update dir's stat data */
 	retval =
 	    reiserfs_add_entry(&th, dir, dentry->d_name.name,
 			       dentry->d_name.len, inode, 1 /*visible */ );
@@ -783,19 +847,19 @@
 		clear_nlink(inode);
 		DEC_DIR_INODE_NLINK(dir);
 		reiserfs_update_sd(&th, inode);
-		err = journal_end(&th, dir->i_sb, jbegin_count);
+		err = journal_end(&th);
 		if (err)
 			retval = err;
 		unlock_new_inode(inode);
 		iput(inode);
 		goto out_failed;
 	}
-	// the above add_entry did not update dir's stat data
+	/* the above add_entry did not update dir's stat data */
 	reiserfs_update_sd(&th, dir);
 
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 out_failed:
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
@@ -803,10 +867,11 @@
 
 static inline int reiserfs_empty_dir(struct inode *inode)
 {
-	/* we can cheat because an old format dir cannot have
-	 ** EMPTY_DIR_SIZE, and a new format dir cannot have
-	 ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size,
-	 ** regardless of disk format version, the directory is empty.
+	/*
+	 * we can cheat because an old format dir cannot have
+	 * EMPTY_DIR_SIZE, and a new format dir cannot have
+	 * EMPTY_DIR_SIZE_V1.  So, if the inode is either size,
+	 * regardless of disk format version, the directory is empty.
 	 */
 	if (inode->i_size != EMPTY_DIR_SIZE &&
 	    inode->i_size != EMPTY_DIR_SIZE_V1) {
@@ -824,10 +889,12 @@
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
 
-	/* we will be doing 2 balancings and update 2 stat data, we change quotas
-	 * of the owner of the directory and of the owner of the parent directory.
-	 * The quota structure is possibly deleted only on last iput => outside
-	 * of this transaction */
+	/*
+	 * we will be doing 2 balancings and update 2 stat data, we
+	 * change quotas of the owner of the directory and of the owner
+	 * of the parent directory.  The quota structure is possibly
+	 * deleted only on last iput => outside of this transaction
+	 */
 	jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -856,8 +923,9 @@
 	reiserfs_update_inode_transaction(dir);
 
 	if (de.de_objectid != inode->i_ino) {
-		// FIXME: compare key of an object and a key found in the
-		// entry
+		/*
+		 * FIXME: compare key of an object and a key found in the entry
+		 */
 		retval = -EIO;
 		goto end_rmdir;
 	}
@@ -867,7 +935,8 @@
 	}
 
 	/* cut entry from dir directory */
-	retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,	/* page */
+	retval = reiserfs_cut_from_item(&th, &path, &de.de_entry_key,
+					dir, NULL,	/* page */
 					0 /*new file size - not used here */ );
 	if (retval < 0)
 		goto end_rmdir;
@@ -888,18 +957,20 @@
 	/* prevent empty directory from getting lost */
 	add_save_link(&th, inode, 0 /* not truncate */ );
 
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 	reiserfs_check_path(&path);
-      out_rmdir:
+out_rmdir:
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 
-      end_rmdir:
-	/* we must release path, because we did not call
-	   reiserfs_cut_from_item, or reiserfs_cut_from_item does not
-	   release path if operation was not complete */
+end_rmdir:
+	/*
+	 * we must release path, because we did not call
+	 * reiserfs_cut_from_item, or reiserfs_cut_from_item does not
+	 * release path if operation was not complete
+	 */
 	pathrelse(&path);
-	err = journal_end(&th, dir->i_sb, jbegin_count);
+	err = journal_end(&th);
 	reiserfs_write_unlock(dir->i_sb);
 	return err ? err : retval;
 }
@@ -918,10 +989,13 @@
 
 	inode = dentry->d_inode;
 
-	/* in this transaction we can be doing at max two balancings and update
-	 * two stat datas, we change quotas of the owner of the directory and of
-	 * the owner of the parent directory. The quota structure is possibly
-	 * deleted only on iput => outside of this transaction */
+	/*
+	 * in this transaction we can be doing at max two balancings and
+	 * update two stat datas, we change quotas of the owner of the
+	 * directory and of the owner of the parent directory. The quota
+	 * structure is possibly deleted only on iput => outside of
+	 * this transaction
+	 */
 	jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -946,8 +1020,9 @@
 	reiserfs_update_inode_transaction(dir);
 
 	if (de.de_objectid != inode->i_ino) {
-		// FIXME: compare key of an object and a key found in the
-		// entry
+		/*
+		 * FIXME: compare key of an object and a key found in the entry
+		 */
 		retval = -EIO;
 		goto end_unlink;
 	}
@@ -968,7 +1043,7 @@
 	savelink = inode->i_nlink;
 
 	retval =
-	    reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,
+	    reiserfs_cut_from_item(&th, &path, &de.de_entry_key, dir, NULL,
 				   0);
 	if (retval < 0) {
 		inc_nlink(inode);
@@ -985,18 +1060,18 @@
 		/* prevent file from getting lost */
 		add_save_link(&th, inode, 0 /* not truncate */ );
 
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 	reiserfs_check_path(&path);
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 
-      end_unlink:
+end_unlink:
 	pathrelse(&path);
-	err = journal_end(&th, dir->i_sb, jbegin_count);
+	err = journal_end(&th);
 	reiserfs_check_path(&path);
 	if (err)
 		retval = err;
-      out_unlink:
+out_unlink:
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
@@ -1011,7 +1086,10 @@
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 	int mode = S_IFLNK | S_IRWXUGO;
-	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	/*
+	 * We need blocks for transaction + (user+group)*(quotas for
+	 * new inode + update of quota for directory owner)
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
@@ -1070,17 +1148,13 @@
 	inode->i_op = &reiserfs_symlink_inode_operations;
 	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
 
-	// must be sure this inode is written with this transaction
-	//
-	//reiserfs_update_sd (&th, inode, READ_BLOCKS);
-
 	retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
 				    dentry->d_name.len, inode, 1 /*visible */ );
 	if (retval) {
 		int err;
 		drop_nlink(inode);
 		reiserfs_update_sd(&th, inode);
-		err = journal_end(&th, parent_dir->i_sb, jbegin_count);
+		err = journal_end(&th);
 		if (err)
 			retval = err;
 		unlock_new_inode(inode);
@@ -1090,8 +1164,8 @@
 
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
-	retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
-      out_failed:
+	retval = journal_end(&th);
+out_failed:
 	reiserfs_write_unlock(parent_dir->i_sb);
 	return retval;
 }
@@ -1102,7 +1176,10 @@
 	int retval;
 	struct inode *inode = old_dentry->d_inode;
 	struct reiserfs_transaction_handle th;
-	/* We need blocks for transaction + update of quotas for the owners of the directory */
+	/*
+	 * We need blocks for transaction + update of quotas for
+	 * the owners of the directory
+	 */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -1111,7 +1188,7 @@
 
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
-		//FIXME: sd_nlink is 32 bit for new files
+		/* FIXME: sd_nlink is 32 bit for new files */
 		reiserfs_write_unlock(dir->i_sb);
 		return -EMLINK;
 	}
@@ -1137,7 +1214,7 @@
 	if (retval) {
 		int err;
 		drop_nlink(inode);
-		err = journal_end(&th, dir->i_sb, jbegin_count);
+		err = journal_end(&th);
 		reiserfs_write_unlock(dir->i_sb);
 		return err ? err : retval;
 	}
@@ -1147,7 +1224,7 @@
 
 	ihold(inode);
 	d_instantiate(dentry, inode);
-	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
@@ -1158,9 +1235,9 @@
 {
 	struct reiserfs_dir_entry tmp = *de;
 
-	// recalculate pointer to name and name length
+	/* recalculate pointer to name and name length */
 	set_de_name_and_namelen(&tmp);
-	// FIXME: could check more
+	/* FIXME: could check more */
 	if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
 		return 0;
 	return 1;
@@ -1217,14 +1294,16 @@
 	unsigned long savelink = 1;
 	struct timespec ctime;
 
-	/* three balancings: (1) old name removal, (2) new name insertion
-	   and (3) maybe "save" link insertion
-	   stat data updates: (1) old directory,
-	   (2) new directory and (3) maybe old object stat data (when it is
-	   directory) and (4) maybe stat data of object to which new entry
-	   pointed initially and (5) maybe block containing ".." of
-	   renamed directory
-	   quota updates: two parent directories */
+	/*
+	 * three balancings: (1) old name removal, (2) new name insertion
+	 * and (3) maybe "save" link insertion
+	 * stat data updates: (1) old directory,
+	 * (2) new directory and (3) maybe old object stat data (when it is
+	 * directory) and (4) maybe stat data of object to which new entry
+	 * pointed initially and (5) maybe block containing ".." of
+	 * renamed directory
+	 * quota updates: two parent directories
+	 */
 	jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
@@ -1235,8 +1314,10 @@
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
 
-	// make sure, that oldname still exists and points to an object we
-	// are going to rename
+	/*
+	 * make sure that oldname still exists and points to an object we
+	 * are going to rename
+	 */
 	old_de.de_gen_number_bit_string = NULL;
 	reiserfs_write_lock(old_dir->i_sb);
 	retval =
@@ -1256,10 +1337,11 @@
 
 	old_inode_mode = old_inode->i_mode;
 	if (S_ISDIR(old_inode_mode)) {
-		// make sure, that directory being renamed has correct ".."
-		// and that its new parent directory has not too many links
-		// already
-
+		/*
+		 * make sure that directory being renamed has correct ".."
+		 * and that its new parent directory has not too many links
+		 * already
+		 */
 		if (new_dentry_inode) {
 			if (!reiserfs_empty_dir(new_dentry_inode)) {
 				reiserfs_write_unlock(old_dir->i_sb);
@@ -1267,8 +1349,9 @@
 			}
 		}
 
-		/* directory is renamed, its parent directory will be changed,
-		 ** so find ".." entry
+		/*
+		 * directory is renamed, its parent directory will be changed,
+		 * so find ".." entry
 		 */
 		dot_dot_de.de_gen_number_bit_string = NULL;
 		retval =
@@ -1303,7 +1386,7 @@
 				       "new entry is found, new inode == 0");
 		}
 	} else if (retval) {
-		int err = journal_end(&th, old_dir->i_sb, jbegin_count);
+		int err = journal_end(&th);
 		reiserfs_write_unlock(old_dir->i_sb);
 		return err ? err : retval;
 	}
@@ -1311,8 +1394,9 @@
 	reiserfs_update_inode_transaction(old_dir);
 	reiserfs_update_inode_transaction(new_dir);
 
-	/* this makes it so an fsync on an open fd for the old name will
-	 ** commit the rename operation
+	/*
+	 * this makes it so an fsync on an open fd for the old name will
+	 * commit the rename operation
 	 */
 	reiserfs_update_inode_transaction(old_inode);
 
@@ -1320,38 +1404,45 @@
 		reiserfs_update_inode_transaction(new_dentry_inode);
 
 	while (1) {
-		// look for old name using corresponding entry key (found by reiserfs_find_entry)
+		/*
+		 * look for old name using corresponding entry key
+		 * (found by reiserfs_find_entry)
+		 */
 		if ((retval =
 		     search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
 					 &old_entry_path,
 					 &old_de)) != NAME_FOUND) {
 			pathrelse(&old_entry_path);
-			journal_end(&th, old_dir->i_sb, jbegin_count);
+			journal_end(&th);
 			reiserfs_write_unlock(old_dir->i_sb);
 			return -EIO;
 		}
 
-		copy_item_head(&old_entry_ih, get_ih(&old_entry_path));
+		copy_item_head(&old_entry_ih, tp_item_head(&old_entry_path));
 
 		reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
 
-		// look for new name by reiserfs_find_entry
+		/* look for new name by reiserfs_find_entry */
 		new_de.de_gen_number_bit_string = NULL;
 		retval =
 		    reiserfs_find_entry(new_dir, new_dentry->d_name.name,
 					new_dentry->d_name.len, &new_entry_path,
 					&new_de);
-		// reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from
-		// reiserfs_add_entry above, and we'll catch any i/o errors before we get here.
+		/*
+		 * reiserfs_add_entry should not return IO_ERROR,
+		 * because it is called with essentially same parameters from
+		 * reiserfs_add_entry above, and we'll catch any i/o errors
+		 * before we get here.
+		 */
 		if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
 			pathrelse(&new_entry_path);
 			pathrelse(&old_entry_path);
-			journal_end(&th, old_dir->i_sb, jbegin_count);
+			journal_end(&th);
 			reiserfs_write_unlock(old_dir->i_sb);
 			return -EIO;
 		}
 
-		copy_item_head(&new_entry_ih, get_ih(&new_entry_path));
+		copy_item_head(&new_entry_ih, tp_item_head(&new_entry_path));
 
 		reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1);
 
@@ -1364,28 +1455,32 @@
 				pathrelse(&dot_dot_entry_path);
 				pathrelse(&new_entry_path);
 				pathrelse(&old_entry_path);
-				journal_end(&th, old_dir->i_sb, jbegin_count);
+				journal_end(&th);
 				reiserfs_write_unlock(old_dir->i_sb);
 				return -EIO;
 			}
 			copy_item_head(&dot_dot_ih,
-				       get_ih(&dot_dot_entry_path));
-			// node containing ".." gets into transaction
+				       tp_item_head(&dot_dot_entry_path));
+			/* node containing ".." gets into transaction */
 			reiserfs_prepare_for_journal(old_inode->i_sb,
 						     dot_dot_de.de_bh, 1);
 		}
-		/* we should check seals here, not do
-		   this stuff, yes? Then, having
-		   gathered everything into RAM we
-		   should lock the buffers, yes?  -Hans */
-		/* probably.  our rename needs to hold more
-		 ** than one path at once.  The seals would
-		 ** have to be written to deal with multi-path
-		 ** issues -chris
+		/*
+		 * we should check seals here, not do
+		 * this stuff, yes? Then, having
+		 * gathered everything into RAM we
+		 * should lock the buffers, yes?  -Hans
 		 */
-		/* sanity checking before doing the rename - avoid races many
-		 ** of the above checks could have scheduled.  We have to be
-		 ** sure our items haven't been shifted by another process.
+		/*
+		 * probably.  our rename needs to hold more
+		 * than one path at once.  The seals would
+		 * have to be written to deal with multi-path
+		 * issues -chris
+		 */
+		/*
+		 * sanity checking before doing the rename - avoid races many
+		 * of the above checks could have scheduled.  We have to be
+		 * sure our items haven't been shifted by another process.
 		 */
 		if (item_moved(&new_entry_ih, &new_entry_path) ||
 		    !entry_points_to_object(new_dentry->d_name.name,
@@ -1430,24 +1525,28 @@
 		break;
 	}
 
-	/* ok, all the changes can be done in one fell swoop when we
-	   have claimed all the buffers needed. */
+	/*
+	 * ok, all the changes can be done in one fell swoop when we
+	 * have claimed all the buffers needed.
+	 */
 
 	mark_de_visible(new_de.de_deh + new_de.de_entry_num);
 	set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
-	journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh);
+	journal_mark_dirty(&th, new_de.de_bh);
 
 	mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
-	journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh);
+	journal_mark_dirty(&th, old_de.de_bh);
 	ctime = CURRENT_TIME_SEC;
 	old_dir->i_ctime = old_dir->i_mtime = ctime;
 	new_dir->i_ctime = new_dir->i_mtime = ctime;
-	/* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of
-	   renamed object */
+	/*
+	 * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
+	 * which adds ctime update of renamed object
+	 */
 	old_inode->i_ctime = ctime;
 
 	if (new_dentry_inode) {
-		// adjust link number of the victim
+		/* adjust link number of the victim */
 		if (S_ISDIR(new_dentry_inode->i_mode)) {
 			clear_nlink(new_dentry_inode);
 		} else {
@@ -1460,25 +1559,32 @@
 	if (S_ISDIR(old_inode_mode)) {
 		/* adjust ".." of renamed directory */
 		set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
-		journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
+		journal_mark_dirty(&th, dot_dot_de.de_bh);
 
+		/*
+		 * there (in new_dir) was no directory, so it got new link
+		 * (".."  of renamed directory)
+		 */
 		if (!new_dentry_inode)
-			/* there (in new_dir) was no directory, so it got new link
-			   (".."  of renamed directory) */
 			INC_DIR_INODE_NLINK(new_dir);
 
 		/* old directory lost one link - ".. " of renamed directory */
 		DEC_DIR_INODE_NLINK(old_dir);
 	}
-	// looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
+	/*
+	 * looks like in 2.3.99pre3 brelse is atomic.
+	 * so we can use pathrelse
+	 */
 	pathrelse(&new_entry_path);
 	pathrelse(&dot_dot_entry_path);
 
-	// FIXME: this reiserfs_cut_from_item's return value may screw up
-	// anybody, but it will panic if will not be able to find the
-	// entry. This needs one more clean up
+	/*
+	 * FIXME: this reiserfs_cut_from_item's return value may screw up
+	 * anybody, but it will panic if will not be able to find the
+	 * entry. This needs one more clean up
+	 */
 	if (reiserfs_cut_from_item
-	    (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
+	    (&th, &old_entry_path, &old_de.de_entry_key, old_dir, NULL,
 	     0) < 0)
 		reiserfs_error(old_dir->i_sb, "vs-7060",
 			       "couldn't not cut old name. Fsck later?");
@@ -1496,16 +1602,13 @@
 		reiserfs_update_sd(&th, new_dentry_inode);
 	}
 
-	retval = journal_end(&th, old_dir->i_sb, jbegin_count);
+	retval = journal_end(&th);
 	reiserfs_write_unlock(old_dir->i_sb);
 	return retval;
 }
 
-/*
- * directories can handle most operations...
- */
+/* directories can handle most operations...  */
 const struct inode_operations reiserfs_dir_inode_operations = {
-	//&reiserfs_dir_operations,   /* default_file_ops */
 	.create = reiserfs_create,
 	.lookup = reiserfs_lookup,
 	.link = reiserfs_link,

diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index f732d6a..99a5d5d 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c

@@ -7,7 +7,7 @@
 #include <linux/time.h>
 #include "reiserfs.h"
 
-// find where objectid map starts
+/* find where objectid map starts */
 #define objectid_map(s,rs) (old_format_only (s) ? \
                          (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
 			 (__le32 *)((rs) + 1))
@@ -20,7 +20,7 @@
 		reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
 			       (long unsigned int)le32_to_cpu(map[0]));
 
-	// FIXME: add something else here
+	/* FIXME: add something else here */
 }
 
 #else
@@ -29,19 +29,21 @@
 }
 #endif
 
-/* When we allocate objectids we allocate the first unused objectid.
-   Each sequence of objectids in use (the odd sequences) is followed
-   by a sequence of objectids not in use (the even sequences).  We
-   only need to record the last objectid in each of these sequences
-   (both the odd and even sequences) in order to fully define the
-   boundaries of the sequences.  A consequence of allocating the first
-   objectid not in use is that under most conditions this scheme is
-   extremely compact.  The exception is immediately after a sequence
-   of operations which deletes a large number of objects of
-   non-sequential objectids, and even then it will become compact
-   again as soon as more objects are created.  Note that many
-   interesting optimizations of layout could result from complicating
-   objectid assignment, but we have deferred making them for now. */
+/*
+ * When we allocate objectids we allocate the first unused objectid.
+ * Each sequence of objectids in use (the odd sequences) is followed
+ * by a sequence of objectids not in use (the even sequences).  We
+ * only need to record the last objectid in each of these sequences
+ * (both the odd and even sequences) in order to fully define the
+ * boundaries of the sequences.  A consequence of allocating the first
+ * objectid not in use is that under most conditions this scheme is
+ * extremely compact.  The exception is immediately after a sequence
+ * of operations which deletes a large number of objects of
+ * non-sequential objectids, and even then it will become compact
+ * again as soon as more objects are created.  Note that many
+ * interesting optimizations of layout could result from complicating
+ * objectid assignment, but we have deferred making them for now.
+ */
 
 /* get unique object identifier */
 __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
@@ -64,26 +66,30 @@
 		return 0;
 	}
 
-	/* This incrementation allocates the first unused objectid. That
-	   is to say, the first entry on the objectid map is the first
-	   unused objectid, and by incrementing it we use it.  See below
-	   where we check to see if we eliminated a sequence of unused
-	   objectids.... */
+	/*
+	 * This incrementation allocates the first unused objectid. That
+	 * is to say, the first entry on the objectid map is the first
+	 * unused objectid, and by incrementing it we use it.  See below
+	 * where we check to see if we eliminated a sequence of unused
+	 * objectids....
+	 */
 	map[1] = cpu_to_le32(unused_objectid + 1);
 
-	/* Now we check to see if we eliminated the last remaining member of
-	   the first even sequence (and can eliminate the sequence by
-	   eliminating its last objectid from oids), and can collapse the
-	   first two odd sequences into one sequence.  If so, then the net
-	   result is to eliminate a pair of objectids from oids.  We do this
-	   by shifting the entire map to the left. */
+	/*
+	 * Now we check to see if we eliminated the last remaining member of
+	 * the first even sequence (and can eliminate the sequence by
+	 * eliminating its last objectid from oids), and can collapse the
+	 * first two odd sequences into one sequence.  If so, then the net
+	 * result is to eliminate a pair of objectids from oids.  We do this
+	 * by shifting the entire map to the left.
+	 */
 	if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
 		memmove(map + 1, map + 3,
 			(sb_oid_cursize(rs) - 3) * sizeof(__u32));
 		set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
 	}
 
-	journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+	journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 	return unused_objectid;
 }
 
@@ -97,30 +103,33 @@
 	int i = 0;
 
 	BUG_ON(!th->t_trans_id);
-	//return;
+	/*return; */
 	check_objectid_map(s, map);
 
 	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
-	journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+	journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 
-	/* start at the beginning of the objectid map (i = 0) and go to
-	   the end of it (i = disk_sb->s_oid_cursize).  Linear search is
-	   what we use, though it is possible that binary search would be
-	   more efficient after performing lots of deletions (which is
-	   when oids is large.)  We only check even i's. */
+	/*
+	 * start at the beginning of the objectid map (i = 0) and go to
+	 * the end of it (i = disk_sb->s_oid_cursize).  Linear search is
+	 * what we use, though it is possible that binary search would be
+	 * more efficient after performing lots of deletions (which is
+	 * when oids is large.)  We only check even i's.
+	 */
 	while (i < sb_oid_cursize(rs)) {
 		if (objectid_to_release == le32_to_cpu(map[i])) {
 			/* This incrementation unallocates the objectid. */
-			//map[i]++;
 			le32_add_cpu(&map[i], 1);
 
-			/* Did we unallocate the last member of an odd sequence, and can shrink oids? */
+			/*
+			 * Did we unallocate the last member of an
+			 * odd sequence, and can shrink oids?
+			 */
 			if (map[i] == map[i + 1]) {
 				/* shrink objectid map */
 				memmove(map + i, map + i + 2,
 					(sb_oid_cursize(rs) - i -
 					 2) * sizeof(__u32));
-				//disk_sb->s_oid_cursize -= 2;
 				set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
 
 				RFALSE(sb_oid_cursize(rs) < 2 ||
@@ -135,14 +144,19 @@
 		    objectid_to_release < le32_to_cpu(map[i + 1])) {
 			/* size of objectid map is not changed */
 			if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
-				//objectid_map[i+1]--;
 				le32_add_cpu(&map[i + 1], -1);
 				return;
 			}
 
-			/* JDM comparing two little-endian values for equality -- safe */
+			/*
+			 * JDM comparing two little-endian values for
+			 * equality -- safe
+			 */
+			/*
+			 * objectid map must be expanded, but
+			 * there is no space
+			 */
 			if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
-				/* objectid map must be expanded, but there is no space */
 				PROC_INFO_INC(s, leaked_oid);
 				return;
 			}
@@ -178,8 +192,9 @@
 	new_objectid_map = (__le32 *) (disk_sb + 1);
 
 	if (cur_size > new_size) {
-		/* mark everyone used that was listed as free at the end of the objectid
-		 ** map
+		/*
+		 * mark everyone used that was listed as free at
+		 * the end of the objectid map
 		 */
 		objectid_map[new_size - 1] = objectid_map[cur_size - 1];
 		set_sb_oid_cursize(disk_sb, new_size);

diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 54944d5..c9b47e9 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c

@@ -172,18 +172,19 @@
 	return k;
 }
 
-/* debugging reiserfs we used to print out a lot of different
-   variables, like keys, item headers, buffer heads etc. Values of
-   most fields matter. So it took a long time just to write
-   appropriative printk. With this reiserfs_warning you can use format
-   specification for complex structures like you used to do with
-   printfs for integers, doubles and pointers. For instance, to print
-   out key structure you have to write just:
-   reiserfs_warning ("bad key %k", key);
-   instead of
-   printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
-           key->k_offset, key->k_uniqueness);
-*/
+/*
+ * debugging reiserfs we used to print out a lot of different
+ * variables, like keys, item headers, buffer heads etc. Values of
+ * most fields matter. So it took a long time just to write
+ * appropriative printk. With this reiserfs_warning you can use format
+ * specification for complex structures like you used to do with
+ * printfs for integers, doubles and pointers. For instance, to print
+ * out key structure you have to write just:
+ * reiserfs_warning ("bad key %k", key);
+ * instead of
+ * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
+ *         key->k_offset, key->k_uniqueness);
+ */
 static DEFINE_SPINLOCK(error_lock);
 static void prepare_error_buf(const char *fmt, va_list args)
 {
@@ -243,15 +244,16 @@
 
 }
 
-/* in addition to usual conversion specifiers this accepts reiserfs
-   specific conversion specifiers:
-   %k to print little endian key,
-   %K to print cpu key,
-   %h to print item_head,
-   %t to print directory entry
-   %z to print block head (arg must be struct buffer_head *
-   %b to print buffer_head
-*/
+/*
+ * in addition to usual conversion specifiers this accepts reiserfs
+ * specific conversion specifiers:
+ * %k to print little endian key,
+ * %K to print cpu key,
+ * %h to print item_head,
+ * %t to print directory entry
+ * %z to print block head (arg must be struct buffer_head *
+ * %b to print buffer_head
+ */
 
 #define do_reiserfs_warning(fmt)\
 {\
@@ -304,50 +306,52 @@
 #endif
 }
 
-/* The format:
-
-           maintainer-errorid: [function-name:] message
-
-    where errorid is unique to the maintainer and function-name is
-    optional, is recommended, so that anyone can easily find the bug
-    with a simple grep for the short to type string
-    maintainer-errorid.  Don't bother with reusing errorids, there are
-    lots of numbers out there.
-
-    Example:
-
-    reiserfs_panic(
-	p_sb, "reiser-29: reiserfs_new_blocknrs: "
-	"one of search_start or rn(%d) is equal to MAX_B_NUM,"
-	"which means that we are optimizing location based on the bogus location of a temp buffer (%p).",
-	rn, bh
-    );
-
-    Regular panic()s sometimes clear the screen before the message can
-    be read, thus the need for the while loop.
-
-    Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
-    pointless complexity):
-
-    panics in reiserfs.h have numbers from 1000 to 1999
-    super.c				        2000 to 2999
-    preserve.c (unused)			    3000 to 3999
-    bitmap.c				    4000 to 4999
-    stree.c				        5000 to 5999
-    prints.c				    6000 to 6999
-    namei.c                     7000 to 7999
-    fix_nodes.c                 8000 to 8999
-    dir.c                       9000 to 9999
-	lbalance.c					10000 to 10999
-	ibalance.c		11000 to 11999 not ready
-	do_balan.c		12000 to 12999
-	inode.c			13000 to 13999
-	file.c			14000 to 14999
-    objectid.c                       15000 - 15999
-    buffer.c                         16000 - 16999
-    symlink.c                        17000 - 17999
-
-   .  */
+/*
+ * The format:
+ *
+ *          maintainer-errorid: [function-name:] message
+ *
+ *   where errorid is unique to the maintainer and function-name is
+ *   optional, is recommended, so that anyone can easily find the bug
+ *   with a simple grep for the short to type string
+ *   maintainer-errorid.  Don't bother with reusing errorids, there are
+ *   lots of numbers out there.
+ *
+ *   Example:
+ *
+ *   reiserfs_panic(
+ *     p_sb, "reiser-29: reiserfs_new_blocknrs: "
+ *     "one of search_start or rn(%d) is equal to MAX_B_NUM,"
+ *     "which means that we are optimizing location based on the "
+ *     "bogus location of a temp buffer (%p).",
+ *     rn, bh
+ *   );
+ *
+ *   Regular panic()s sometimes clear the screen before the message can
+ *   be read, thus the need for the while loop.
+ *
+ *   Numbering scheme for panic used by Vladimir and Anatoly( Hans completely
+ *   ignores this scheme, and considers it pointless complexity):
+ *
+ *   panics in reiserfs_fs.h have numbers from 1000 to 1999
+ *   super.c			2000 to 2999
+ *   preserve.c (unused)	3000 to 3999
+ *   bitmap.c			4000 to 4999
+ *   stree.c			5000 to 5999
+ *   prints.c			6000 to 6999
+ *   namei.c			7000 to 7999
+ *   fix_nodes.c		8000 to 8999
+ *   dir.c			9000 to 9999
+ *   lbalance.c			10000 to 10999
+ *   ibalance.c			11000 to 11999 not ready
+ *   do_balan.c			12000 to 12999
+ *   inode.c			13000 to 13999
+ *   file.c			14000 to 14999
+ *   objectid.c			15000 - 15999
+ *   buffer.c			16000 - 16999
+ *   symlink.c			17000 - 17999
+ *
+ *  .  */
 
 void __reiserfs_panic(struct super_block *sb, const char *id,
 		      const char *function, const char *fmt, ...)
@@ -411,9 +415,11 @@
 	reiserfs_abort_journal(sb, errno);
 }
 
-/* this prints internal nodes (4 keys/items in line) (dc_number,
-   dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
-   dc_size)...*/
+/*
+ * this prints internal nodes (4 keys/items in line) (dc_number,
+ * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
+ * dc_size)...
+ */
 static int print_internal(struct buffer_head *bh, int first, int last)
 {
 	struct reiserfs_key *key;
@@ -439,7 +445,7 @@
 	dc = B_N_CHILD(bh, from);
 	reiserfs_printk("PTR %d: %y ", from, dc);
 
-	for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to;
+	for (i = from, key = internal_key(bh, from), dc++; i < to;
 	     i++, key++, dc++) {
 		reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
 		if (i && i % 4 == 0)
@@ -463,7 +469,7 @@
 	check_leaf(bh);
 
 	blkh = B_BLK_HEAD(bh);
-	ih = B_N_PITEM_HEAD(bh, 0);
+	ih = item_head(bh, 0);
 	nr = blkh_nr_item(blkh);
 
 	printk
@@ -496,7 +502,7 @@
 		    ("-------------------------------------------------------------------------------\n");
 		reiserfs_printk("|%2d| %h |\n", i, ih);
 		if (print_mode & PRINT_LEAF_ITEMS)
-			op_print_item(ih, B_I_PITEM(bh, ih));
+			op_print_item(ih, ih_item_body(bh, ih));
 	}
 
 	printk
@@ -543,9 +549,11 @@
 	printk("Block count %u\n", sb_block_count(rs));
 	printk("Blocksize %d\n", sb_blocksize(rs));
 	printk("Free blocks %u\n", sb_free_blocks(rs));
-	// FIXME: this would be confusing if
-	// someone stores reiserfs super block in some data block ;)
+	/*
+	 * FIXME: this would be confusing if
+	 * someone stores reiserfs super block in some data block ;)
 //    skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
+	 */
 	skipped = bh->b_blocknr;
 	data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
 	    (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
@@ -581,8 +589,8 @@
 
 	return 0;
 }
-
-void print_block(struct buffer_head *bh, ...)	//int print_mode, int first, int last)
+/* ..., int print_mode, int first, int last) */
+void print_block(struct buffer_head *bh, ...)
 {
 	va_list args;
 	int mode, first, last;
@@ -644,11 +652,11 @@
 			"* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
 			h,
 			(tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL),
-			(tbSh) ? atomic_read(&(tbSh->b_count)) : -1,
+			(tbSh) ? atomic_read(&tbSh->b_count) : -1,
 			(tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL),
-			(tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1,
+			(tb->L[h]) ? atomic_read(&tb->L[h]->b_count) : -1,
 			(tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL),
-			(tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1,
+			(tb->R[h]) ? atomic_read(&tb->R[h]->b_count) : -1,
 			(tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL),
 			(tb->FL[h]) ? (long long)(tb->FL[h]->
 						  b_blocknr) : (-1LL),
@@ -665,9 +673,9 @@
 		"* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
 		"* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
 		tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],
-		tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes,
-		tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0],
-		tb->rkey[0]);
+		tb->rbytes, tb->blknum[0], tb->s0num, tb->snum[0],
+		tb->sbytes[0], tb->snum[1], tb->sbytes[1],
+		tb->cur_blknum, tb->lkey[0], tb->rkey[0]);
 
 	/* this prints balance parameters for non-leaf levels */
 	h = 0;
@@ -690,7 +698,7 @@
 			"%p (%llu %d)%s", tb->FEB[i],
 			tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
 			b_blocknr : 0ULL,
-			tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
+			tb->FEB[i] ? atomic_read(&tb->FEB[i]->b_count) : 0,
 			(i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", ");
 
 	sprintf(print_tb_buf + strlen(print_tb_buf),
@@ -744,8 +752,8 @@
 	if (!bh)
 		return;
 	check_leaf_block_head(bh);
-	for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
-		op_check_item(ih, B_I_PITEM(bh, ih));
+	for (i = 0, ih = item_head(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
+		op_check_item(ih, ih_item_body(bh, ih));
 }
 
 void check_internal(struct buffer_head *bh)

diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 83d4eac..bf53888 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h

@@ -1,5 +1,6 @@
 /*
- * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
+ * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for
+ * licensing and copyright details
  */
 
 #include <linux/reiserfs_fs.h>
@@ -23,52 +24,73 @@
 
 struct reiserfs_journal_list;
 
-/** bitmasks for i_flags field in reiserfs-specific part of inode */
+/* bitmasks for i_flags field in reiserfs-specific part of inode */
 typedef enum {
-    /** this says what format of key do all items (but stat data) of
-      an object have.  If this is set, that format is 3.6 otherwise
-      - 3.5 */
+	/*
+	 * this says what format of key do all items (but stat data) of
+	 * an object have.  If this is set, that format is 3.6 otherwise - 3.5
+	 */
 	i_item_key_version_mask = 0x0001,
-    /** If this is unset, object has 3.5 stat data, otherwise, it has
-      3.6 stat data with 64bit size, 32bit nlink etc. */
+
+	/*
+	 * If this is unset, object has 3.5 stat data, otherwise,
+	 * it has 3.6 stat data with 64bit size, 32bit nlink etc.
+	 */
 	i_stat_data_version_mask = 0x0002,
-    /** file might need tail packing on close */
+
+	/* file might need tail packing on close */
 	i_pack_on_close_mask = 0x0004,
-    /** don't pack tail of file */
+
+	/* don't pack tail of file */
 	i_nopack_mask = 0x0008,
-    /** If those is set, "safe link" was created for this file during
-      truncate or unlink. Safe link is used to avoid leakage of disk
-      space on crash with some files open, but unlinked. */
+
+	/*
+	 * If either of these are set, "safe link" was created for this
+	 * file during truncate or unlink. Safe link is used to avoid
+	 * leakage of disk space on crash with some files open, but unlinked.
+	 */
 	i_link_saved_unlink_mask = 0x0010,
 	i_link_saved_truncate_mask = 0x0020,
+
 	i_has_xattr_dir = 0x0040,
 	i_data_log = 0x0080,
 } reiserfs_inode_flags;
 
 struct reiserfs_inode_info {
 	__u32 i_key[4];		/* key is still 4 32 bit integers */
-    /** transient inode flags that are never stored on disk. Bitmasks
-      for this field are defined above. */
+
+	/*
+	 * transient inode flags that are never stored on disk. Bitmasks
+	 * for this field are defined above.
+	 */
 	__u32 i_flags;
 
-	__u32 i_first_direct_byte;	// offset of first byte stored in direct item.
+	/* offset of first byte stored in direct item. */
+	__u32 i_first_direct_byte;
 
 	/* copy of persistent inode flags read from sd_attrs. */
 	__u32 i_attrs;
 
-	int i_prealloc_block;	/* first unused block of a sequence of unused blocks */
+	/* first unused block of a sequence of unused blocks */
+	int i_prealloc_block;
 	int i_prealloc_count;	/* length of that sequence */
-	struct list_head i_prealloc_list;	/* per-transaction list of inodes which
-						 * have preallocated blocks */
 
-	unsigned new_packing_locality:1;	/* new_packig_locality is created; new blocks
-						 * for the contents of this directory should be
-						 * displaced */
+	/* per-transaction list of inodes which  have preallocated blocks */
+	struct list_head i_prealloc_list;
 
-	/* we use these for fsync or O_SYNC to decide which transaction
-	 ** needs to be committed in order for this inode to be properly
-	 ** flushed */
+	/*
+	 * new_packing_locality is created; new blocks for the contents
+	 * of this directory should be displaced
+	 */
+	unsigned new_packing_locality:1;
+
+	/*
+	 * we use these for fsync or O_SYNC to decide which transaction
+	 * needs to be committed in order for this inode to be properly
+	 * flushed
+	 */
 	unsigned int i_trans_id;
+
 	struct reiserfs_journal_list *i_jl;
 	atomic_t openers;
 	struct mutex tailpack;
@@ -82,9 +104,10 @@
 	reiserfs_attrs_cleared = 0x00000001,
 } reiserfs_super_block_flags;
 
-/* struct reiserfs_super_block accessors/mutators
- * since this is a disk structure, it will always be in
- * little endian format. */
+/*
+ * struct reiserfs_super_block accessors/mutators since this is a disk
+ * structure, it will always be in little endian format.
+ */
 #define sb_block_count(sbp)         (le32_to_cpu((sbp)->s_v1.s_block_count))
 #define set_sb_block_count(sbp,v)   ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
 #define sb_free_blocks(sbp)         (le32_to_cpu((sbp)->s_v1.s_free_blocks))
@@ -152,48 +175,61 @@
 
 /* LOGGING -- */
 
-/* These all interelate for performance.
-**
-** If the journal block count is smaller than n transactions, you lose speed.
-** I don't know what n is yet, I'm guessing 8-16.
-**
-** typical transaction size depends on the application, how often fsync is
-** called, and how many metadata blocks you dirty in a 30 second period.
-** The more small files (<16k) you use, the larger your transactions will
-** be.
-**
-** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
-** to wrap, which slows things down.  If you need high speed meta data updates, the journal should be big enough
-** to prevent wrapping before dirty meta blocks get to disk.
-**
-** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
-** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
-**
-** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
-**
-*/
+/*
+ * These all interelate for performance.
+ *
+ * If the journal block count is smaller than n transactions, you lose speed.
+ * I don't know what n is yet, I'm guessing 8-16.
+ *
+ * typical transaction size depends on the application, how often fsync is
+ * called, and how many metadata blocks you dirty in a 30 second period.
+ * The more small files (<16k) you use, the larger your transactions will
+ * be.
+ *
+ * If your journal fills faster than dirty buffers get flushed to disk, it
+ * must flush them before allowing the journal to wrap, which slows things
+ * down.  If you need high speed meta data updates, the journal should be
+ * big enough to prevent wrapping before dirty meta blocks get to disk.
+ *
+ * If the batch max is smaller than the transaction max, you'll waste space
+ * at the end of the journal because journal_end sets the next transaction
+ * to start at 0 if the next transaction has any chance of wrapping.
+ *
+ * The large the batch max age, the better the speed, and the more meta
+ * data changes you'll lose after a crash.
+ */
 
 /* don't mess with these for a while */
-				/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
+/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
 #define JOURNAL_BLOCK_SIZE  4096	/* BUG gotta get rid of this */
 #define JOURNAL_MAX_CNODE   1500	/* max cnodes to allocate. */
 #define JOURNAL_HASH_SIZE 8192
-#define JOURNAL_NUM_BITMAPS 5	/* number of copies of the bitmaps to have floating.  Must be >= 2 */
 
-/* One of these for every block in every transaction
-** Each one is in two hash tables.  First, a hash of the current transaction, and after journal_end, a
-** hash of all the in memory transactions.
-** next and prev are used by the current transaction (journal_hash).
-** hnext and hprev are used by journal_list_hash.  If a block is in more than one transaction, the journal_list_hash
-** links it in multiple times.  This allows flush_journal_list to remove just the cnode belonging
-** to a given transaction.
-*/
+/* number of copies of the bitmaps to have floating.  Must be >= 2 */
+#define JOURNAL_NUM_BITMAPS 5
+
+/*
+ * One of these for every block in every transaction
+ * Each one is in two hash tables.  First, a hash of the current transaction,
+ * and after journal_end, a hash of all the in memory transactions.
+ * next and prev are used by the current transaction (journal_hash).
+ * hnext and hprev are used by journal_list_hash.  If a block is in more
+ * than one transaction, the journal_list_hash links it in multiple times.
+ * This allows flush_journal_list to remove just the cnode belonging to a
+ * given transaction.
+ */
 struct reiserfs_journal_cnode {
 	struct buffer_head *bh;	/* real buffer head */
 	struct super_block *sb;	/* dev of real buffer head */
-	__u32 blocknr;		/* block number of real buffer head, == 0 when buffer on disk */
+
+	/* block number of real buffer head, == 0 when buffer on disk */
+	__u32 blocknr;
+
 	unsigned long state;
-	struct reiserfs_journal_list *jlist;	/* journal list this cnode lives in */
+
+	/* journal list this cnode lives in */
+	struct reiserfs_journal_list *jlist;
+
 	struct reiserfs_journal_cnode *next;	/* next in transaction list */
 	struct reiserfs_journal_cnode *prev;	/* prev in transaction list */
 	struct reiserfs_journal_cnode *hprev;	/* prev in hash list */
@@ -212,18 +248,22 @@
 };
 
 /*
-** one of these for each transaction.  The most important part here is the j_realblock.
-** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
-** real buffer heads dirty once all the commits hit the disk,
-** and to make sure every real block in a transaction is on disk before allowing the log area
-** to be overwritten */
+ * one of these for each transaction.  The most important part here is the
+ * j_realblock.  this list of cnodes is used to hash all the blocks in all
+ * the commits, to mark all the real buffer heads dirty once all the commits
+ * hit the disk, and to make sure every real block in a transaction is on
+ * disk before allowing the log area to be overwritten
+ */
 struct reiserfs_journal_list {
 	unsigned long j_start;
 	unsigned long j_state;
 	unsigned long j_len;
 	atomic_t j_nonzerolen;
 	atomic_t j_commit_left;
-	atomic_t j_older_commits_done;	/* all commits older than this on disk */
+
+	/* all commits older than this on disk */
+	atomic_t j_older_commits_done;
+
 	struct mutex j_commit_mutex;
 	unsigned int j_trans_id;
 	time_t j_timestamp;
@@ -234,11 +274,15 @@
 	/* time ordered list of all active transactions */
 	struct list_head j_list;
 
-	/* time ordered list of all transactions we haven't tried to flush yet */
+	/*
+	 * time ordered list of all transactions we haven't tried
+	 * to flush yet
+	 */
 	struct list_head j_working_list;
 
 	/* list of tail conversion targets in need of flush before commit */
 	struct list_head j_tail_bh_list;
+
 	/* list of data=ordered buffers in need of flush before commit */
 	struct list_head j_bh_list;
 	int j_refcount;
@@ -246,46 +290,83 @@
 
 struct reiserfs_journal {
 	struct buffer_head **j_ap_blocks;	/* journal blocks on disk */
-	struct reiserfs_journal_cnode *j_last;	/* newest journal block */
-	struct reiserfs_journal_cnode *j_first;	/*  oldest journal block.  start here for traverse */
+	/* newest journal block */
+	struct reiserfs_journal_cnode *j_last;
+
+	/* oldest journal block.  start here for traverse */
+	struct reiserfs_journal_cnode *j_first;
 
 	struct block_device *j_dev_bd;
 	fmode_t j_dev_mode;
-	int j_1st_reserved_block;	/* first block on s_dev of reserved area journal */
+
+	/* first block on s_dev of reserved area journal */
+	int j_1st_reserved_block;
 
 	unsigned long j_state;
 	unsigned int j_trans_id;
 	unsigned long j_mount_id;
-	unsigned long j_start;	/* start of current waiting commit (index into j_ap_blocks) */
+
+	/* start of current waiting commit (index into j_ap_blocks) */
+	unsigned long j_start;
 	unsigned long j_len;	/* length of current waiting commit */
-	unsigned long j_len_alloc;	/* number of buffers requested by journal_begin() */
+
+	/* number of buffers requested by journal_begin() */
+	unsigned long j_len_alloc;
+
 	atomic_t j_wcount;	/* count of writers for current commit */
-	unsigned long j_bcount;	/* batch count. allows turning X transactions into 1 */
-	unsigned long j_first_unflushed_offset;	/* first unflushed transactions offset */
-	unsigned j_last_flush_trans_id;	/* last fully flushed journal timestamp */
+
+	/* batch count. allows turning X transactions into 1 */
+	unsigned long j_bcount;
+
+	/* first unflushed transactions offset */
+	unsigned long j_first_unflushed_offset;
+
+	/* last fully flushed journal timestamp */
+	unsigned j_last_flush_trans_id;
+
 	struct buffer_head *j_header_bh;
 
 	time_t j_trans_start_time;	/* time this transaction started */
 	struct mutex j_mutex;
 	struct mutex j_flush_mutex;
-	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
-	atomic_t j_jlock;	/* lock for j_join_wait */
+
+	/* wait for current transaction to finish before starting new one */
+	wait_queue_head_t j_join_wait;
+
+	atomic_t j_jlock;		/* lock for j_join_wait */
 	int j_list_bitmap_index;	/* number of next list bitmap to use */
-	int j_must_wait;	/* no more journal begins allowed. MUST sleep on j_join_wait */
-	int j_next_full_flush;	/* next journal_end will flush all journal list */
-	int j_next_async_flush;	/* next journal_end will flush all async commits */
+
+	/* no more journal begins allowed. MUST sleep on j_join_wait */
+	int j_must_wait;
+
+	/* next journal_end will flush all journal list */
+	int j_next_full_flush;
+
+	/* next journal_end will flush all async commits */
+	int j_next_async_flush;
 
 	int j_cnode_used;	/* number of cnodes on the used list */
 	int j_cnode_free;	/* number of cnodes on the free list */
 
-	unsigned int j_trans_max;	/* max number of blocks in a transaction.  */
-	unsigned int j_max_batch;	/* max number of blocks to batch into a trans */
-	unsigned int j_max_commit_age;	/* in seconds, how old can an async commit be */
-	unsigned int j_max_trans_age;	/* in seconds, how old can a transaction be */
-	unsigned int j_default_max_commit_age;	/* the default for the max commit age */
+	/* max number of blocks in a transaction.  */
+	unsigned int j_trans_max;
+
+	/* max number of blocks to batch into a trans */
+	unsigned int j_max_batch;
+
+	/* in seconds, how old can an async commit be */
+	unsigned int j_max_commit_age;
+
+	/* in seconds, how old can a transaction be */
+	unsigned int j_max_trans_age;
+
+	/* the default for the max commit age */
+	unsigned int j_default_max_commit_age;
 
 	struct reiserfs_journal_cnode *j_cnode_free_list;
-	struct reiserfs_journal_cnode *j_cnode_free_orig;	/* orig pointer returned from vmalloc */
+
+	/* orig pointer returned from vmalloc */
+	struct reiserfs_journal_cnode *j_cnode_free_orig;
 
 	struct reiserfs_journal_list *j_current_jl;
 	int j_free_bitmap_nodes;
@@ -306,14 +387,21 @@
 
 	/* list of all active transactions */
 	struct list_head j_journal_list;
+
 	/* lists that haven't been touched by writeback attempts */
 	struct list_head j_working_list;
 
-	struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];	/* array of bitmaps to record the deleted blocks */
-	struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];	/* hash table for real buffer heads in current trans */
-	struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];	/* hash table for all the real buffer heads in all
-										   the transactions */
-	struct list_head j_prealloc_list;	/* list of inodes which have preallocated blocks */
+	/* hash table for real buffer heads in current trans */
+	struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];
+
+	/* hash table for all the real buffer heads in all the transactions */
+	struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];
+
+	/* array of bitmaps to record the deleted blocks */
+	struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];
+
+	/* list of inodes which have preallocated blocks */
+	struct list_head j_prealloc_list;
 	int j_persistent_trans;
 	unsigned long j_max_trans_size;
 	unsigned long j_max_batch_size;
@@ -328,11 +416,12 @@
 
 enum journal_state_bits {
 	J_WRITERS_BLOCKED = 1,	/* set when new writers not allowed */
-	J_WRITERS_QUEUED,	/* set when log is full due to too many writers */
-	J_ABORTED,		/* set when log is aborted */
+	J_WRITERS_QUEUED,    /* set when log is full due to too many writers */
+	J_ABORTED,           /* set when log is aborted */
 };
 
-#define JOURNAL_DESC_MAGIC "ReIsErLB"	/* ick.  magic string to find desc blocks in the journal */
+/* ick.  magic string to find desc blocks in the journal */
+#define JOURNAL_DESC_MAGIC "ReIsErLB"
 
 typedef __u32(*hashf_t) (const signed char *, int);
 
@@ -364,7 +453,10 @@
 	stat_cnt_t leaked_oid;
 	stat_cnt_t leaves_removable;
 
-	/* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
+	/*
+	 * balances per level.
+	 * Use explicit 5 as MAX_HEIGHT is not visible yet.
+	 */
 	stat_cnt_t balance_at[5];	/* XXX */
 	/* sbk == search_by_key */
 	stat_cnt_t sbk_read_at[5];	/* XXX */
@@ -416,47 +508,75 @@
 
 /* reiserfs union of in-core super block data */
 struct reiserfs_sb_info {
-	struct buffer_head *s_sbh;	/* Buffer containing the super block */
-	/* both the comment and the choice of
-	   name are unclear for s_rs -Hans */
-	struct reiserfs_super_block *s_rs;	/* Pointer to the super block in the buffer */
+	/* Buffer containing the super block */
+	struct buffer_head *s_sbh;
+
+	/* Pointer to the on-disk super block in the buffer */
+	struct reiserfs_super_block *s_rs;
 	struct reiserfs_bitmap_info *s_ap_bitmap;
-	struct reiserfs_journal *s_journal;	/* pointer to journal information */
+
+	/* pointer to journal information */
+	struct reiserfs_journal *s_journal;
+
 	unsigned short s_mount_state;	/* reiserfs state (valid, invalid) */
 
 	/* Serialize writers access, replace the old bkl */
 	struct mutex lock;
+
 	/* Owner of the lock (can be recursive) */
 	struct task_struct *lock_owner;
+
 	/* Depth of the lock, start from -1 like the bkl */
 	int lock_depth;
 
+	struct workqueue_struct *commit_wq;
+
 	/* Comment? -Hans */
 	void (*end_io_handler) (struct buffer_head *, int);
-	hashf_t s_hash_function;	/* pointer to function which is used
-					   to sort names in directory. Set on
-					   mount */
-	unsigned long s_mount_opt;	/* reiserfs's mount options are set
-					   here (currently - NOTAIL, NOLOG,
-					   REPLAYONLY) */
 
-	struct {		/* This is a structure that describes block allocator options */
-		unsigned long bits;	/* Bitfield for enable/disable kind of options */
-		unsigned long large_file_size;	/* size started from which we consider file to be a large one(in blocks) */
+	/*
+	 * pointer to function which is used to sort names in directory.
+	 * Set on mount
+	 */
+	hashf_t s_hash_function;
+
+	/* reiserfs's mount options are set here */
+	unsigned long s_mount_opt;
+
+	/* This is a structure that describes block allocator options */
+	struct {
+		/* Bitfield for enable/disable kind of options */
+		unsigned long bits;
+
+		/*
+		 * size started from which we consider file
+		 * to be a large one (in blocks)
+		 */
+		unsigned long large_file_size;
+
 		int border;	/* percentage of disk, border takes */
-		int preallocmin;	/* Minimal file size (in blocks) starting from which we do preallocations */
-		int preallocsize;	/* Number of blocks we try to prealloc when file
-					   reaches preallocmin size (in blocks) or
-					   prealloc_list is empty. */
+
+		/*
+		 * Minimal file size (in blocks) starting
+		 * from which we do preallocations
+		 */
+		int preallocmin;
+
+		/*
+		 * Number of blocks we try to prealloc when file
+		 * reaches preallocmin size (in blocks) or prealloc_list
+		 is empty.
+		 */
+		int preallocsize;
 	} s_alloc_options;
 
 	/* Comment? -Hans */
 	wait_queue_head_t s_wait;
-	/* To be obsoleted soon by per buffer seals.. -Hans */
-	atomic_t s_generation_counter;	// increased by one every time the
-	// tree gets re-balanced
-	unsigned long s_properties;	/* File system properties. Currently holds
-					   on-disk FS format */
+	/* increased by one every time the  tree gets re-balanced */
+	atomic_t s_generation_counter;
+
+	/* File system properties. Currently holds on-disk FS format */
+	unsigned long s_properties;
 
 	/* session statistics */
 	int s_disk_reads;
@@ -469,14 +589,23 @@
 	int s_bmaps_without_search;
 	int s_direct2indirect;
 	int s_indirect2direct;
-	/* set up when it's ok for reiserfs_read_inode2() to read from
-	   disk inode with nlink==0. Currently this is only used during
-	   finish_unfinished() processing at mount time */
+
+	/*
+	 * set up when it's ok for reiserfs_read_inode2() to read from
+	 * disk inode with nlink==0. Currently this is only used during
+	 * finish_unfinished() processing at mount time
+	 */
 	int s_is_unlinked_ok;
+
 	reiserfs_proc_info_data_t s_proc_info_data;
 	struct proc_dir_entry *procdir;
-	int reserved_blocks;	/* amount of blocks reserved for further allocations */
-	spinlock_t bitmap_lock;	/* this lock on now only used to protect reserved_blocks variable */
+
+	/* amount of blocks reserved for further allocations */
+	int reserved_blocks;
+
+
+	/* this lock on now only used to protect reserved_blocks variable */
+	spinlock_t bitmap_lock;
 	struct dentry *priv_root;	/* root of /.reiserfs_priv */
 	struct dentry *xattr_root;	/* root of /.reiserfs_priv/xattrs */
 	int j_errno;
@@ -492,14 +621,13 @@
 	char *s_jdev;		/* Stored jdev for mount option showing */
 #ifdef CONFIG_REISERFS_CHECK
 
-	struct tree_balance *cur_tb;	/*
-					 * Detects whether more than one
-					 * copy of tb exists per superblock
-					 * as a means of checking whether
-					 * do_balance is executing concurrently
-					 * against another tree reader/writer
-					 * on a same mount point.
-					 */
+	/*
+	 * Detects whether more than one copy of tb exists per superblock
+	 * as a means of checking whether do_balance is executing
+	 * concurrently against another tree reader/writer on a same
+	 * mount point.
+	 */
+	struct tree_balance *cur_tb;
 #endif
 };
 
@@ -508,25 +636,36 @@
 #define REISERFS_3_6 1
 #define REISERFS_OLD_FORMAT 2
 
-enum reiserfs_mount_options {
 /* Mount options */
-	REISERFS_LARGETAIL,	/* large tails will be created in a session */
-	REISERFS_SMALLTAIL,	/* small (for files less than block size) tails will be created in a session */
-	REPLAYONLY,		/* replay journal and return 0. Use by fsck */
-	REISERFS_CONVERT,	/* -o conv: causes conversion of old
-				   format super block to the new
-				   format. If not specified - old
-				   partition will be dealt with in a
-				   manner of 3.5.x */
+enum reiserfs_mount_options {
+	/* large tails will be created in a session */
+	REISERFS_LARGETAIL,
+	/*
+	 * small (for files less than block size) tails will
+	 * be created in a session
+	 */
+	REISERFS_SMALLTAIL,
 
-/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
-** reiserfs disks from 3.5.19 or earlier.  99% of the time, this option
-** is not required.  If the normal autodection code can't determine which
-** hash to use (because both hashes had the same value for a file)
-** use this option to force a specific hash.  It won't allow you to override
-** the existing hash on the FS, so if you have a tea hash disk, and mount
-** with -o hash=rupasov, the mount will fail.
-*/
+	/* replay journal and return 0. Use by fsck */
+	REPLAYONLY,
+
+	/*
+	 * -o conv: causes conversion of old format super block to the
+	 * new format. If not specified - old partition will be dealt
+	 * with in a manner of 3.5.x
+	 */
+	REISERFS_CONVERT,
+
+	/*
+	 * -o hash={tea, rupasov, r5, detect} is meant for properly mounting
+	 * reiserfs disks from 3.5.19 or earlier.  99% of the time, this
+	 * option is not required.  If the normal autodection code can't
+	 * determine which hash to use (because both hashes had the same
+	 * value for a file) use this option to force a specific hash.
+	 * It won't allow you to override the existing hash on the FS, so
+	 * if you have a tea hash disk, and mount with -o hash=rupasov,
+	 * the mount will fail.
+	 */
 	FORCE_TEA_HASH,		/* try to force tea hash on mount */
 	FORCE_RUPASOV_HASH,	/* try to force rupasov hash on mount */
 	FORCE_R5_HASH,		/* try to force rupasov hash on mount */
@@ -536,9 +675,11 @@
 	REISERFS_DATA_ORDERED,
 	REISERFS_DATA_WRITEBACK,
 
-/* used for testing experimental features, makes benchmarking new
-   features with and without more convenient, should never be used by
-   users in any code shipped to users (ideally) */
+	/*
+	 * used for testing experimental features, makes benchmarking new
+	 * features with and without more convenient, should never be used by
+	 * users in any code shipped to users (ideally)
+	 */
 
 	REISERFS_NO_BORDER,
 	REISERFS_NO_UNHASHED_RELOCATION,
@@ -705,28 +846,28 @@
 
 struct fid;
 
-/* in reading the #defines, it may help to understand that they employ
-   the following abbreviations:
-
-   B = Buffer
-   I = Item header
-   H = Height within the tree (should be changed to LEV)
-   N = Number of the item in the node
-   STAT = stat data
-   DEH = Directory Entry Header
-   EC = Entry Count
-   E = Entry number
-   UL = Unsigned Long
-   BLKH = BLocK Header
-   UNFM = UNForMatted node
-   DC = Disk Child
-   P = Path
-
-   These #defines are named by concatenating these abbreviations,
-   where first comes the arguments, and last comes the return value,
-   of the macro.
-
-*/
+/*
+ * in reading the #defines, it may help to understand that they employ
+ *  the following abbreviations:
+ *
+ *  B = Buffer
+ *  I = Item header
+ *  H = Height within the tree (should be changed to LEV)
+ *  N = Number of the item in the node
+ *  STAT = stat data
+ *  DEH = Directory Entry Header
+ *  EC = Entry Count
+ *  E = Entry number
+ *  UL = Unsigned Long
+ *  BLKH = BLocK Header
+ *  UNFM = UNForMatted node
+ *  DC = Disk Child
+ *  P = Path
+ *
+ *  These #defines are named by concatenating these abbreviations,
+ *  where first comes the arguments, and last comes the return value,
+ *  of the macro.
+ */
 
 #define USE_INODE_GENERATION_COUNTER
 
@@ -737,14 +878,17 @@
 /* n must be power of 2 */
 #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
 
-// to be ok for alpha and others we have to align structures to 8 byte
-// boundary.
-// FIXME: do not change 4 by anything else: there is code which relies on that
+/*
+ * to be ok for alpha and others we have to align structures to 8 byte
+ * boundary.
+ * FIXME: do not change 4 by anything else: there is code which relies on that
+ */
 #define ROUND_UP(x) _ROUND_UP(x,8LL)
 
-/* debug levels.  Right now, CONFIG_REISERFS_CHECK means print all debug
-** messages.
-*/
+/*
+ * debug levels.  Right now, CONFIG_REISERFS_CHECK means print all debug
+ * messages.
+ */
 #define REISERFS_DEBUG_CODE 5	/* extra messages to help find/debug errors */
 
 void __reiserfs_warning(struct super_block *s, const char *id,
@@ -753,7 +897,7 @@
 	 __reiserfs_warning(s, id, __func__, fmt, ##args)
 /* assertions handling */
 
-/** always check a condition and panic if it's false. */
+/* always check a condition and panic if it's false. */
 #define __RASSERT(cond, scond, format, args...)			\
 do {									\
 	if (!(cond))							\
@@ -776,35 +920,48 @@
  * Disk Data Structures
  */
 
-/***************************************************************************/
-/*                             SUPER BLOCK                                 */
-/***************************************************************************/
+/***************************************************************************
+ *                             SUPER BLOCK                                 *
+ ***************************************************************************/
 
 /*
- * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
- * the version in RAM is part of a larger structure containing fields never written to disk.
+ * Structure of super block on disk, a version of which in RAM is often
+ * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger
+ * structure containing fields never written to disk.
  */
-#define UNSET_HASH 0		// read_super will guess about, what hash names
-		     // in directories were sorted with
+#define UNSET_HASH 0	/* Detect hash on disk */
 #define TEA_HASH  1
 #define YURA_HASH 2
 #define R5_HASH   3
 #define DEFAULT_HASH R5_HASH
 
 struct journal_params {
-	__le32 jp_journal_1st_block;	/* where does journal start from on its
-					 * device */
-	__le32 jp_journal_dev;	/* journal device st_rdev */
-	__le32 jp_journal_size;	/* size of the journal */
-	__le32 jp_journal_trans_max;	/* max number of blocks in a transaction. */
-	__le32 jp_journal_magic;	/* random value made on fs creation (this
-					 * was sb_journal_block_count) */
-	__le32 jp_journal_max_batch;	/* max number of blocks to batch into a
-					 * trans */
-	__le32 jp_journal_max_commit_age;	/* in seconds, how old can an async
-						 * commit be */
-	__le32 jp_journal_max_trans_age;	/* in seconds, how old can a transaction
-						 * be */
+	/* where does journal start from on its * device */
+	__le32 jp_journal_1st_block;
+
+	/* journal device st_rdev */
+	__le32 jp_journal_dev;
+
+	/* size of the journal */
+	__le32 jp_journal_size;
+
+	/* max number of blocks in a transaction. */
+	__le32 jp_journal_trans_max;
+
+	/*
+	 * random value made on fs creation
+	 * (this was sb_journal_block_count)
+	 */
+	__le32 jp_journal_magic;
+
+	/* max number of blocks to batch into a trans */
+	__le32 jp_journal_max_batch;
+
+	/* in seconds, how old can an async  commit be */
+	__le32 jp_journal_max_commit_age;
+
+	/* in seconds, how old can a transaction be */
+	__le32 jp_journal_max_trans_age;
 };
 
 /* this is the super from 3.5.X, where X >= 10 */
@@ -814,26 +971,48 @@
 	__le32 s_root_block;	/* root block number    */
 	struct journal_params s_journal;
 	__le16 s_blocksize;	/* block size */
-	__le16 s_oid_maxsize;	/* max size of object id array, see
-				 * get_objectid() commentary  */
+
+	/* max size of object id array, see get_objectid() commentary  */
+	__le16 s_oid_maxsize;
 	__le16 s_oid_cursize;	/* current size of object id array */
-	__le16 s_umount_state;	/* this is set to 1 when filesystem was
-				 * umounted, to 2 - when not */
-	char s_magic[10];	/* reiserfs magic string indicates that
-				 * file system is reiserfs:
-				 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
-	__le16 s_fs_state;	/* it is set to used by fsck to mark which
-				 * phase of rebuilding is done */
-	__le32 s_hash_function_code;	/* indicate, what hash function is being use
-					 * to sort names in a directory*/
+
+	/* this is set to 1 when filesystem was umounted, to 2 - when not */
+	__le16 s_umount_state;
+
+	/*
+	 * reiserfs magic string indicates that file system is reiserfs:
+	 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs"
+	 */
+	char s_magic[10];
+
+	/*
+	 * it is set to used by fsck to mark which
+	 * phase of rebuilding is done
+	 */
+	__le16 s_fs_state;
+	/*
+	 * indicate, what hash function is being use
+	 * to sort names in a directory
+	 */
+	__le32 s_hash_function_code;
 	__le16 s_tree_height;	/* height of disk tree */
-	__le16 s_bmap_nr;	/* amount of bitmap blocks needed to address
-				 * each block of file system */
-	__le16 s_version;	/* this field is only reliable on filesystem
-				 * with non-standard journal */
-	__le16 s_reserved_for_journal;	/* size in blocks of journal area on main
-					 * device, we need to keep after
-					 * making fs with non-standard journal */
+
+	/*
+	 * amount of bitmap blocks needed to address
+	 * each block of file system
+	 */
+	__le16 s_bmap_nr;
+
+	/*
+	 * this field is only reliable on filesystem with non-standard journal
+	 */
+	__le16 s_version;
+
+	/*
+	 * size in blocks of journal area on main device, we need to
+	 * keep after making fs with non-standard journal
+	 */
+	__le16 s_reserved_for_journal;
 } __attribute__ ((__packed__));
 
 #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
@@ -842,17 +1021,21 @@
 struct reiserfs_super_block {
 	struct reiserfs_super_block_v1 s_v1;
 	__le32 s_inode_generation;
-	__le32 s_flags;		/* Right now used only by inode-attributes, if enabled */
+
+	/* Right now used only by inode-attributes, if enabled */
+	__le32 s_flags;
+
 	unsigned char s_uuid[16];	/* filesystem unique identifier */
 	unsigned char s_label[16];	/* filesystem volume label */
 	__le16 s_mnt_count;		/* Count of mounts since last fsck */
 	__le16 s_max_mnt_count;		/* Maximum mounts before check */
 	__le32 s_lastcheck;		/* Timestamp of last fsck */
 	__le32 s_check_interval;	/* Interval between checks */
-	char s_unused[76];	/* zero filled by mkreiserfs and
-				 * reiserfs_convert_objectid_map_v1()
-				 * so any additions must be updated
-				 * there as well. */
+
+	/*
+	 * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1()
+	 * so any additions must be updated there as well. */
+	char s_unused[76];
 } __attribute__ ((__packed__));
 
 #define SB_SIZE (sizeof(struct reiserfs_super_block))
@@ -860,7 +1043,7 @@
 #define REISERFS_VERSION_1 0
 #define REISERFS_VERSION_2 2
 
-// on-disk super block fields converted to cpu form
+/* on-disk super block fields converted to cpu form */
 #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
 #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
 #define SB_BLOCKSIZE(s) \
@@ -915,11 +1098,13 @@
 int is_reiserfs_3_6(struct reiserfs_super_block *rs);
 int is_reiserfs_jr(struct reiserfs_super_block *rs);
 
-/* ReiserFS leaves the first 64k unused, so that partition labels have
-   enough space.  If someone wants to write a fancy bootloader that
-   needs more than 64k, let us know, and this will be increased in size.
-   This number must be larger than than the largest block size on any
-   platform, or code will break.  -Hans */
+/*
+ * ReiserFS leaves the first 64k unused, so that partition labels have
+ * enough space.  If someone wants to write a fancy bootloader that
+ * needs more than 64k, let us know, and this will be increased in size.
+ * This number must be larger than than the largest block size on any
+ * platform, or code will break.  -Hans
+ */
 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
 #define REISERFS_FIRST_BLOCK unused_define
 #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
@@ -944,8 +1129,7 @@
 	unsigned short unfm_freespace;
 };
 
-/* there are two formats of keys: 3.5 and 3.6
- */
+/* there are two formats of keys: 3.5 and 3.6 */
 #define KEY_FORMAT_3_5 0
 #define KEY_FORMAT_3_6 1
 
@@ -963,8 +1147,10 @@
 	return sb->s_fs_info;
 }
 
-/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
- * which overflows on large file systems. */
+/*
+ * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems.
+ */
 static inline __u32 reiserfs_bmap_count(struct super_block *sb)
 {
 	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
@@ -975,8 +1161,10 @@
 	return bmap_nr > ((1LL << 16) - 1);
 }
 
-/** this says about version of key of all items (but stat data) the
-    object consists of */
+/*
+ * this says about version of key of all items (but stat data) the
+ * object consists of
+ */
 #define get_inode_item_key_version( inode )                                    \
     ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
 
@@ -995,16 +1183,18 @@
             else                                                               \
                 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
 
-/* This is an aggressive tail suppression policy, I am hoping it
-   improves our benchmarks. The principle behind it is that percentage
-   space saving is what matters, not absolute space saving.  This is
-   non-intuitive, but it helps to understand it if you consider that the
-   cost to access 4 blocks is not much more than the cost to access 1
-   block, if you have to do a seek and rotate.  A tail risks a
-   non-linear disk access that is significant as a percentage of total
-   time cost for a 4 block file and saves an amount of space that is
-   less significant as a percentage of space, or so goes the hypothesis.
-   -Hans */
+/*
+ * This is an aggressive tail suppression policy, I am hoping it
+ * improves our benchmarks. The principle behind it is that percentage
+ * space saving is what matters, not absolute space saving.  This is
+ * non-intuitive, but it helps to understand it if you consider that the
+ * cost to access 4 blocks is not much more than the cost to access 1
+ * block, if you have to do a seek and rotate.  A tail risks a
+ * non-linear disk access that is significant as a percentage of total
+ * time cost for a 4 block file and saves an amount of space that is
+ * less significant as a percentage of space, or so goes the hypothesis.
+ * -Hans
+ */
 #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
 (\
   (!(n_tail_size)) || \
@@ -1018,10 +1208,11 @@
      ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
 )
 
-/* Another strategy for tails, this one means only create a tail if all the
-   file would fit into one DIRECT item.
-   Primary intention for this one is to increase performance by decreasing
-   seeking.
+/*
+ * Another strategy for tails, this one means only create a tail if all the
+ * file would fit into one DIRECT item.
+ * Primary intention for this one is to increase performance by decreasing
+ * seeking.
 */
 #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
 (\
@@ -1035,23 +1226,21 @@
 #define REISERFS_VALID_FS    1
 #define REISERFS_ERROR_FS    2
 
-//
-// there are 5 item types currently
-//
+/*
+ * there are 5 item types currently
+ */
 #define TYPE_STAT_DATA 0
 #define TYPE_INDIRECT 1
 #define TYPE_DIRECT 2
 #define TYPE_DIRENTRY 3
 #define TYPE_MAXTYPE 3
-#define TYPE_ANY 15		// FIXME: comment is required
+#define TYPE_ANY 15		/* FIXME: comment is required */
 
-/***************************************************************************/
-/*                       KEY & ITEM HEAD                                   */
-/***************************************************************************/
+/***************************************************************************
+ *                       KEY & ITEM HEAD                                   *
+ ***************************************************************************/
 
-//
-// directories use this key as well as old files
-//
+/* * directories use this key as well as old files */
 struct offset_v1 {
 	__le32 k_offset;
 	__le32 k_uniqueness;
@@ -1084,11 +1273,14 @@
 	v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
 }
 
-/* Key of an item determines its location in the S+tree, and
-   is composed of 4 components */
+/*
+ * Key of an item determines its location in the S+tree, and
+ * is composed of 4 components
+ */
 struct reiserfs_key {
-	__le32 k_dir_id;	/* packing locality: by default parent
-				   directory object id */
+	/* packing locality: by default parent directory object id */
+	__le32 k_dir_id;
+
 	__le32 k_objectid;	/* object identifier */
 	union {
 		struct offset_v1 k_offset_v1;
@@ -1097,8 +1289,8 @@
 } __attribute__ ((__packed__));
 
 struct in_core_key {
-	__u32 k_dir_id;		/* packing locality: by default parent
-				   directory object id */
+	/* packing locality: by default parent directory object id */
+	__u32 k_dir_id;
 	__u32 k_objectid;	/* object identifier */
 	__u64 k_offset;
 	__u8 k_type;
@@ -1107,14 +1299,16 @@
 struct cpu_key {
 	struct in_core_key on_disk_key;
 	int version;
-	int key_length;		/* 3 in all cases but direct2indirect and
-				   indirect2direct conversion */
+	/* 3 in all cases but direct2indirect and indirect2direct conversion */
+	int key_length;
 };
 
-/* Our function for comparing keys can compare keys of different
-   lengths.  It takes as a parameter the length of the keys it is to
-   compare.  These defines are used in determining what is to be passed
-   to it as that parameter. */
+/*
+ * Our function for comparing keys can compare keys of different
+ * lengths.  It takes as a parameter the length of the keys it is to
+ * compare.  These defines are used in determining what is to be passed
+ * to it as that parameter.
+ */
 #define REISERFS_FULL_KEY_LEN     4
 #define REISERFS_SHORT_KEY_LEN    2
 
@@ -1143,40 +1337,52 @@
 #define POSITION_FOUND 1
 #define POSITION_NOT_FOUND 0
 
-// return values for reiserfs_find_entry and search_by_entry_key
+/* return values for reiserfs_find_entry and search_by_entry_key */
 #define NAME_FOUND 1
 #define NAME_NOT_FOUND 0
 #define GOTO_PREVIOUS_ITEM 2
 #define NAME_FOUND_INVISIBLE 3
 
-/*  Everything in the filesystem is stored as a set of items.  The
-    item head contains the key of the item, its free space (for
-    indirect items) and specifies the location of the item itself
-    within the block.  */
+/*
+ * Everything in the filesystem is stored as a set of items.  The
+ * item head contains the key of the item, its free space (for
+ * indirect items) and specifies the location of the item itself
+ * within the block.
+ */
 
 struct item_head {
-	/* Everything in the tree is found by searching for it based on
-	 * its key.*/
+	/*
+	 * Everything in the tree is found by searching for it based on
+	 * its key.
+	 */
 	struct reiserfs_key ih_key;
 	union {
-		/* The free space in the last unformatted node of an
-		   indirect item if this is an indirect item.  This
-		   equals 0xFFFF iff this is a direct item or stat data
-		   item. Note that the key, not this field, is used to
-		   determine the item type, and thus which field this
-		   union contains. */
+		/*
+		 * The free space in the last unformatted node of an
+		 * indirect item if this is an indirect item.  This
+		 * equals 0xFFFF iff this is a direct item or stat data
+		 * item. Note that the key, not this field, is used to
+		 * determine the item type, and thus which field this
+		 * union contains.
+		 */
 		__le16 ih_free_space_reserved;
-		/* Iff this is a directory item, this field equals the
-		   number of directory entries in the directory item. */
+
+		/*
+		 * Iff this is a directory item, this field equals the
+		 * number of directory entries in the directory item.
+		 */
 		__le16 ih_entry_count;
 	} __attribute__ ((__packed__)) u;
 	__le16 ih_item_len;	/* total size of the item body */
-	__le16 ih_item_location;	/* an offset to the item body
-					 * within the block */
-	__le16 ih_version;	/* 0 for all old items, 2 for new
-				   ones. Highest bit is set by fsck
-				   temporary, cleaned after all
-				   done */
+
+	/* an offset to the item body within the block */
+	__le16 ih_item_location;
+
+	/*
+	 * 0 for all old items, 2 for new ones. Highest bit is set by fsck
+	 * temporary, cleaned after all done
+	 */
+	__le16 ih_version;
 } __attribute__ ((__packed__));
 /* size of item header     */
 #define IH_SIZE (sizeof(struct item_head))
@@ -1198,27 +1404,24 @@
 #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
 #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
 
-/* these operate on indirect items, where you've got an array of ints
-** at a possibly unaligned location.  These are a noop on ia32
-** 
-** p is the array of __u32, i is the index into the array, v is the value
-** to store there.
-*/
+/*
+ * these operate on indirect items, where you've got an array of ints
+ * at a possibly unaligned location.  These are a noop on ia32
+ *
+ * p is the array of __u32, i is the index into the array, v is the value
+ * to store there.
+ */
 #define get_block_num(p, i) get_unaligned_le32((p) + (i))
 #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
 
-//
-// in old version uniqueness field shows key type
-//
+/* * in old version uniqueness field shows key type */
 #define V1_SD_UNIQUENESS 0
 #define V1_INDIRECT_UNIQUENESS 0xfffffffe
 #define V1_DIRECT_UNIQUENESS 0xffffffff
 #define V1_DIRENTRY_UNIQUENESS 500
-#define V1_ANY_UNIQUENESS 555	// FIXME: comment is required
+#define V1_ANY_UNIQUENESS 555	/* FIXME: comment is required */
 
-//
-// here are conversion routines
-//
+/* here are conversion routines */
 static inline int uniqueness2type(__u32 uniqueness) CONSTF;
 static inline int uniqueness2type(__u32 uniqueness)
 {
@@ -1255,11 +1458,11 @@
 	}
 }
 
-//
-// key is pointer to on disk key which is stored in le, result is cpu,
-// there is no way to get version of object from key, so, provide
-// version to these defines
-//
+/*
+ * key is pointer to on disk key which is stored in le, result is cpu,
+ * there is no way to get version of object from key, so, provide
+ * version to these defines
+ */
 static inline loff_t le_key_k_offset(int version,
 				     const struct reiserfs_key *key)
 {
@@ -1275,9 +1478,11 @@
 
 static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
 {
-	return (version == KEY_FORMAT_3_5) ?
-	    uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
-	    offset_v2_k_type(&(key->u.k_offset_v2));
+	if (version == KEY_FORMAT_3_5) {
+		loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness);
+		return uniqueness2type(val);
+	} else
+		return offset_v2_k_type(&(key->u.k_offset_v2));
 }
 
 static inline loff_t le_ih_k_type(const struct item_head *ih)
@@ -1288,8 +1493,22 @@
 static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
 				       loff_t offset)
 {
-	(version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) :	/* jdm check */
-	    (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
+	if (version == KEY_FORMAT_3_5)
+		key->u.k_offset_v1.k_offset = cpu_to_le32(offset);
+	else
+		set_offset_v2_k_offset(&key->u.k_offset_v2, offset);
+}
+
+static inline void add_le_key_k_offset(int version, struct reiserfs_key *key,
+				       loff_t offset)
+{
+	set_le_key_k_offset(version, key,
+			    le_key_k_offset(version, key) + offset);
+}
+
+static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset)
+{
+	add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
 }
 
 static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
@@ -1300,10 +1519,11 @@
 static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
 				     int type)
 {
-	(version == KEY_FORMAT_3_5) ?
-	    (void)(key->u.k_offset_v1.k_uniqueness =
-		   cpu_to_le32(type2uniqueness(type)))
-	    : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
+	if (version == KEY_FORMAT_3_5) {
+		type = type2uniqueness(type);
+		key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type);
+	} else
+	       set_offset_v2_k_type(&key->u.k_offset_v2, type);
 }
 
 static inline void set_le_ih_k_type(struct item_head *ih, int type)
@@ -1331,9 +1551,7 @@
 	return le_key_k_type(version, key) == TYPE_STAT_DATA;
 }
 
-//
-// item header has version.
-//
+/* item header has version.  */
 static inline int is_direntry_le_ih(struct item_head *ih)
 {
 	return is_direntry_le_key(ih_version(ih), &ih->ih_key);
@@ -1354,9 +1572,7 @@
 	return is_statdata_le_key(ih_version(ih), &ih->ih_key);
 }
 
-//
-// key is pointer to cpu key, result is cpu
-//
+/* key is pointer to cpu key, result is cpu */
 static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
 {
 	return key->on_disk_key.k_offset;
@@ -1407,7 +1623,7 @@
 
 extern struct reiserfs_key root_key;
 
-/* 
+/*
  * Picture represents a leaf of the S+tree
  *  ______________________________________________________
  * |      |  Array of     |                   |           |
@@ -1416,15 +1632,19 @@
  * |______|_______________|___________________|___________|
  */
 
-/* Header of a disk block.  More precisely, header of a formatted leaf
-   or internal node, and not the header of an unformatted node. */
+/*
+ * Header of a disk block.  More precisely, header of a formatted leaf
+ * or internal node, and not the header of an unformatted node.
+ */
 struct block_head {
 	__le16 blk_level;	/* Level of a block in the tree. */
 	__le16 blk_nr_item;	/* Number of keys/items in a block. */
 	__le16 blk_free_space;	/* Block free space in bytes. */
 	__le16 blk_reserved;
 	/* dump this in v4/planA */
-	struct reiserfs_key blk_right_delim_key;	/* kept only for compatibility */
+
+	/* kept only for compatibility */
+	struct reiserfs_key blk_right_delim_key;
 };
 
 #define BLKH_SIZE                     (sizeof(struct block_head))
@@ -1439,18 +1659,20 @@
 #define blkh_right_delim_key(p_blkh)  ((p_blkh)->blk_right_delim_key)
 #define set_blkh_right_delim_key(p_blkh,val)  ((p_blkh)->blk_right_delim_key = val)
 
-/*
- * values for blk_level field of the struct block_head
- */
+/* values for blk_level field of the struct block_head */
 
-#define FREE_LEVEL 0		/* when node gets removed from the tree its
-				   blk_level is set to FREE_LEVEL. It is then
-				   used to see whether the node is still in the
-				   tree */
+/*
+ * When node gets removed from the tree its blk_level is set to FREE_LEVEL.
+ * It is then  used to see whether the node is still in the tree
+ */
+#define FREE_LEVEL 0
 
 #define DISK_LEAF_NODE_LEVEL  1	/* Leaf node level. */
 
-/* Given the buffer head of a formatted node, resolve to the block head of that node. */
+/*
+ * Given the buffer head of a formatted node, resolve to the
+ * block head of that node.
+ */
 #define B_BLK_HEAD(bh)			((struct block_head *)((bh)->b_data))
 /* Number of items that are in buffer. */
 #define B_NR_ITEMS(bh)			(blkh_nr_item(B_BLK_HEAD(bh)))
@@ -1471,14 +1693,14 @@
 #define B_IS_KEYS_LEVEL(bh)      (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
 					    && B_LEVEL(bh) <= MAX_HEIGHT)
 
-/***************************************************************************/
-/*                             STAT DATA                                   */
-/***************************************************************************/
+/***************************************************************************
+ *                             STAT DATA                                   *
+ ***************************************************************************/
 
-//
-// old stat data is 32 bytes long. We are going to distinguish new one by
-// different size
-//
+/*
+ * old stat data is 32 bytes long. We are going to distinguish new one by
+ * different size
+*/
 struct stat_data_v1 {
 	__le16 sd_mode;		/* file type, permissions */
 	__le16 sd_nlink;	/* number of hard links */
@@ -1487,20 +1709,25 @@
 	__le32 sd_size;		/* file size */
 	__le32 sd_atime;	/* time of last access */
 	__le32 sd_mtime;	/* time file was last modified  */
-	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+
+	/*
+	 * time inode (stat data) was last changed
+	 * (except changes to sd_atime and sd_mtime)
+	 */
+	__le32 sd_ctime;
 	union {
 		__le32 sd_rdev;
 		__le32 sd_blocks;	/* number of blocks file uses */
 	} __attribute__ ((__packed__)) u;
-	__le32 sd_first_direct_byte;	/* first byte of file which is stored
-					   in a direct item: except that if it
-					   equals 1 it is a symlink and if it
-					   equals ~(__u32)0 there is no
-					   direct item.  The existence of this
-					   field really grates on me. Let's
-					   replace it with a macro based on
-					   sd_size and our tail suppression
-					   policy.  Someday.  -Hans */
+
+	/*
+	 * first byte of file which is stored in a direct item: except that if
+	 * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no
+	 * direct item.  The existence of this field really grates on me.
+	 * Let's replace it with a macro based on sd_size and our tail
+	 * suppression policy.  Someday.  -Hans
+	 */
+	__le32 sd_first_direct_byte;
 } __attribute__ ((__packed__));
 
 #define SD_V1_SIZE              (sizeof(struct stat_data_v1))
@@ -1532,8 +1759,10 @@
 
 /* inode flags stored in sd_attrs (nee sd_reserved) */
 
-/* we want common flags to have the same values as in ext2,
-   so chattr(1) will work without problems */
+/*
+ * we want common flags to have the same values as in ext2,
+ * so chattr(1) will work without problems
+ */
 #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
 #define REISERFS_APPEND_FL    FS_APPEND_FL
 #define REISERFS_SYNC_FL      FS_SYNC_FL
@@ -1553,8 +1782,10 @@
 				REISERFS_COMPR_FL |	\
 				REISERFS_NOTAIL_FL )
 
-/* Stat Data on disk (reiserfs version of UFS disk inode minus the
-   address blocks) */
+/*
+ * Stat Data on disk (reiserfs version of UFS disk inode minus the
+ * address blocks)
+ */
 struct stat_data {
 	__le16 sd_mode;		/* file type, permissions */
 	__le16 sd_attrs;	/* persistent inode flags */
@@ -1564,25 +1795,20 @@
 	__le32 sd_gid;		/* group */
 	__le32 sd_atime;	/* time of last access */
 	__le32 sd_mtime;	/* time file was last modified  */
-	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+
+	/*
+	 * time inode (stat data) was last changed
+	 * (except changes to sd_atime and sd_mtime)
+	 */
+	__le32 sd_ctime;
 	__le32 sd_blocks;
 	union {
 		__le32 sd_rdev;
 		__le32 sd_generation;
-		//__le32 sd_first_direct_byte;
-		/* first byte of file which is stored in a
-		   direct item: except that if it equals 1
-		   it is a symlink and if it equals
-		   ~(__u32)0 there is no direct item.  The
-		   existence of this field really grates
-		   on me. Let's replace it with a macro
-		   based on sd_size and our tail
-		   suppression policy? */
 	} __attribute__ ((__packed__)) u;
 } __attribute__ ((__packed__));
-//
-// this is 44 bytes long
-//
+
+/* this is 44 bytes long */
 #define SD_SIZE (sizeof(struct stat_data))
 #define SD_V2_SIZE              SD_SIZE
 #define stat_data_v2(ih)        (ih_version (ih) == KEY_FORMAT_3_6)
@@ -1613,48 +1839,61 @@
 #define sd_v2_attrs(sdp)         (le16_to_cpu((sdp)->sd_attrs))
 #define set_sd_v2_attrs(sdp,v)   ((sdp)->sd_attrs = cpu_to_le16(v))
 
-/***************************************************************************/
-/*                      DIRECTORY STRUCTURE                                */
-/***************************************************************************/
-/* 
-   Picture represents the structure of directory items
-   ________________________________________________
-   |  Array of     |   |     |        |       |   |
-   | directory     |N-1| N-2 | ....   |   1st |0th|
-   | entry headers |   |     |        |       |   |
-   |_______________|___|_____|________|_______|___|
-                    <----   directory entries         ------>
-
- First directory item has k_offset component 1. We store "." and ".."
- in one item, always, we never split "." and ".." into differing
- items.  This makes, among other things, the code for removing
- directories simpler. */
+/***************************************************************************
+ *                      DIRECTORY STRUCTURE                                *
+ ***************************************************************************/
+/*
+ * Picture represents the structure of directory items
+ * ________________________________________________
+ * |  Array of     |   |     |        |       |   |
+ * | directory     |N-1| N-2 | ....   |   1st |0th|
+ * | entry headers |   |     |        |       |   |
+ * |_______________|___|_____|________|_______|___|
+ *                  <----   directory entries         ------>
+ *
+ * First directory item has k_offset component 1. We store "." and ".."
+ * in one item, always, we never split "." and ".." into differing
+ * items.  This makes, among other things, the code for removing
+ * directories simpler.
+ */
 #define SD_OFFSET  0
 #define SD_UNIQUENESS 0
 #define DOT_OFFSET 1
 #define DOT_DOT_OFFSET 2
 #define DIRENTRY_UNIQUENESS 500
 
-/* */
 #define FIRST_ITEM_OFFSET 1
 
 /*
-   Q: How to get key of object pointed to by entry from entry?  
+ * Q: How to get key of object pointed to by entry from entry?
+ *
+ * A: Each directory entry has its header. This header has deh_dir_id
+ *    and deh_objectid fields, those are key of object, entry points to
+ */
 
-   A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
-      of object, entry points to */
-
-/* NOT IMPLEMENTED:   
-   Directory will someday contain stat data of object */
+/*
+ * NOT IMPLEMENTED:
+ * Directory will someday contain stat data of object
+ */
 
 struct reiserfs_de_head {
 	__le32 deh_offset;	/* third component of the directory entry key */
-	__le32 deh_dir_id;	/* objectid of the parent directory of the object, that is referenced
-				   by directory entry */
-	__le32 deh_objectid;	/* objectid of the object, that is referenced by directory entry */
+
+	/*
+	 * objectid of the parent directory of the object, that is referenced
+	 * by directory entry
+	 */
+	__le32 deh_dir_id;
+
+	/* objectid of the object, that is referenced by directory entry */
+	__le32 deh_objectid;
 	__le16 deh_location;	/* offset of name in the whole item */
-	__le16 deh_state;	/* whether 1) entry contains stat data (for future), and 2) whether
-				   entry is hidden (unlinked) */
+
+	/*
+	 * whether 1) entry contains stat data (for future), and
+	 * 2) whether entry is hidden (unlinked)
+	 */
+	__le16 deh_state;
 } __attribute__ ((__packed__));
 #define DEH_SIZE                  sizeof(struct reiserfs_de_head)
 #define deh_offset(p_deh)         (le32_to_cpu((p_deh)->deh_offset))
@@ -1684,9 +1923,11 @@
 #   define ADDR_UNALIGNED_BITS  (3)
 #endif
 
-/* These are only used to manipulate deh_state.
+/*
+ * These are only used to manipulate deh_state.
  * Because of this, we'll use the ext2_ bit routines,
- * since they are little endian */
+ * since they are little endian
+ */
 #ifdef ADDR_UNALIGNED_BITS
 
 #   define aligned_address(addr)           ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
@@ -1721,46 +1962,16 @@
 extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
 				__le32 par_dirid, __le32 par_objid);
 
-/* array of the entry headers */
- /* get item body */
-#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
-#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
-
-/* length of the directory entry in directory item. This define
-   calculates length of i-th directory entry using directory entry
-   locations from dir entry head. When it calculates length of 0-th
-   directory entry, it uses length of whole item in place of entry
-   location of the non-existent following entry in the calculation.
-   See picture above.*/
-/*
-#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
-((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
-*/
-static inline int entry_length(const struct buffer_head *bh,
-			       const struct item_head *ih, int pos_in_item)
-{
-	struct reiserfs_de_head *deh;
-
-	deh = B_I_DEH(bh, ih) + pos_in_item;
-	if (pos_in_item)
-		return deh_location(deh - 1) - deh_location(deh);
-
-	return ih_item_len(ih) - deh_location(deh);
-}
-
-/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
-#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
-
-/* name by bh, ih and entry_num */
-#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
-
-// two entries per block (at least)
+/* two entries per block (at least) */
 #define REISERFS_MAX_NAME(block_size) 255
 
-/* this structure is used for operations on directory entries. It is
-   not a disk structure. */
-/* When reiserfs_find_entry or search_by_entry_key find directory
-   entry, they return filled reiserfs_dir_entry structure */
+/*
+ * this structure is used for operations on directory entries. It is
+ * not a disk structure.
+ *
+ * When reiserfs_find_entry or search_by_entry_key find directory
+ * entry, they return filled reiserfs_dir_entry structure
+ */
 struct reiserfs_dir_entry {
 	struct buffer_head *de_bh;
 	int de_item_num;
@@ -1778,10 +1989,14 @@
 	struct cpu_key de_entry_key;
 };
 
-/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
+/*
+ * these defines are useful when a particular member of
+ * a reiserfs_dir_entry is needed
+ */
 
 /* pointer to file name, stored in entry */
-#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
+#define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \
+				(ih_item_body(bh, ih) + deh_location(deh))
 
 /* length of name */
 #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
@@ -1804,11 +2019,13 @@
  * |______|_______________|___________________|___________|
  */
 
-/***************************************************************************/
-/*                      DISK CHILD                                         */
-/***************************************************************************/
-/* Disk child pointer: The pointer from an internal node of the tree
-   to a node that is on disk. */
+/***************************************************************************
+ *                      DISK CHILD                                         *
+ ***************************************************************************/
+/*
+ * Disk child pointer:
+ * The pointer from an internal node of the tree to a node that is on disk.
+ */
 struct disk_child {
 	__le32 dc_block_number;	/* Disk child's block number. */
 	__le16 dc_size;		/* Disk child's used space.   */
@@ -1841,47 +2058,66 @@
 #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
 #define MIN_NR_KEY(bh)    (MAX_NR_KEY(bh)/2)
 
-/***************************************************************************/
-/*                      PATH STRUCTURES AND DEFINES                        */
-/***************************************************************************/
+/***************************************************************************
+ *                      PATH STRUCTURES AND DEFINES                        *
+ ***************************************************************************/
 
-/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
-   key.  It uses reiserfs_bread to try to find buffers in the cache given their block number.  If it
-   does not find them in the cache it reads them from disk.  For each node search_by_key finds using
-   reiserfs_bread it then uses bin_search to look through that node.  bin_search will find the
-   position of the block_number of the next node if it is looking through an internal node.  If it
-   is looking through a leaf node bin_search will find the position of the item which has key either
-   equal to given key, or which is the maximal key less than the given key. */
+/*
+ * search_by_key fills up the path from the root to the leaf as it descends
+ * the tree looking for the key.  It uses reiserfs_bread to try to find
+ * buffers in the cache given their block number.  If it does not find
+ * them in the cache it reads them from disk.  For each node search_by_key
+ * finds using reiserfs_bread it then uses bin_search to look through that
+ * node.  bin_search will find the position of the block_number of the next
+ * node if it is looking through an internal node.  If it is looking through
+ * a leaf node bin_search will find the position of the item which has key
+ * either equal to given key, or which is the maximal key less than the
+ * given key.
+ */
 
 struct path_element {
-	struct buffer_head *pe_buffer;	/* Pointer to the buffer at the path in the tree. */
-	int pe_position;	/* Position in the tree node which is placed in the */
-	/* buffer above.                                  */
+	/* Pointer to the buffer at the path in the tree. */
+	struct buffer_head *pe_buffer;
+	/* Position in the tree node which is placed in the buffer above. */
+	int pe_position;
 };
 
-#define MAX_HEIGHT 5		/* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
-#define EXTENDED_MAX_HEIGHT         7	/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
-#define FIRST_PATH_ELEMENT_OFFSET   2	/* Must be equal to at least 2. */
+/*
+ * maximal height of a tree. don't change this without
+ * changing JOURNAL_PER_BALANCE_CNT
+ */
+#define MAX_HEIGHT 5
 
-#define ILLEGAL_PATH_ELEMENT_OFFSET 1	/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
-#define MAX_FEB_SIZE 6		/* this MUST be MAX_HEIGHT + 1. See about FEB below */
+/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
+#define EXTENDED_MAX_HEIGHT         7
 
-/* We need to keep track of who the ancestors of nodes are.  When we
-   perform a search we record which nodes were visited while
-   descending the tree looking for the node we searched for. This list
-   of nodes is called the path.  This information is used while
-   performing balancing.  Note that this path information may become
-   invalid, and this means we must check it when using it to see if it
-   is still valid. You'll need to read search_by_key and the comments
-   in it, especially about decrement_counters_in_path(), to understand
-   this structure.  
+/* Must be equal to at least 2. */
+#define FIRST_PATH_ELEMENT_OFFSET   2
 
-Paths make the code so much harder to work with and debug.... An
-enormous number of bugs are due to them, and trying to write or modify
-code that uses them just makes my head hurt.  They are based on an
-excessive effort to avoid disturbing the precious VFS code.:-( The
-gods only know how we are going to SMP the code that uses them.
-znodes are the way! */
+/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
+#define ILLEGAL_PATH_ELEMENT_OFFSET 1
+
+/* this MUST be MAX_HEIGHT + 1. See about FEB below */
+#define MAX_FEB_SIZE 6
+
+/*
+ * We need to keep track of who the ancestors of nodes are.  When we
+ * perform a search we record which nodes were visited while
+ * descending the tree looking for the node we searched for. This list
+ * of nodes is called the path.  This information is used while
+ * performing balancing.  Note that this path information may become
+ * invalid, and this means we must check it when using it to see if it
+ * is still valid. You'll need to read search_by_key and the comments
+ * in it, especially about decrement_counters_in_path(), to understand
+ * this structure.
+ *
+ * Paths make the code so much harder to work with and debug.... An
+ * enormous number of bugs are due to them, and trying to write or modify
+ * code that uses them just makes my head hurt.  They are based on an
+ * excessive effort to avoid disturbing the precious VFS code.:-( The
+ * gods only know how we are going to SMP the code that uses them.
+ * znodes are the way!
+ */
 
 #define PATH_READA	0x1	/* do read ahead */
 #define PATH_READA_BACK 0x2	/* read backwards */
@@ -1889,7 +2125,8 @@
 struct treepath {
 	int path_length;	/* Length of the array above.   */
 	int reada;
-	struct path_element path_elements[EXTENDED_MAX_HEIGHT];	/* Array of the path elements.  */
+	/* Array of the path elements.  */
+	struct path_element path_elements[EXTENDED_MAX_HEIGHT];
 	int pos_in_item;
 };
 
@@ -1908,41 +2145,124 @@
 #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
 
 #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
-				/* you know, to the person who didn't
-				   write this the macro name does not
-				   at first suggest what it does.
-				   Maybe POSITION_FROM_PATH_END? Or
-				   maybe we should just focus on
-				   dumping paths... -Hans */
+
+/*
+ * you know, to the person who didn't write this the macro name does not
+ * at first suggest what it does.  Maybe POSITION_FROM_PATH_END? Or
+ * maybe we should just focus on dumping paths... -Hans
+ */
 #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
 
-#define PATH_PITEM_HEAD(path)    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
+/*
+ * in do_balance leaf has h == 0 in contrast with path structure,
+ * where root has level == 0. That is why we need these defines
+ */
 
-/* in do_balance leaf has h == 0 in contrast with path structure,
-   where root has level == 0. That is why we need these defines */
-#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h))	/* tb->S[h] */
-#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1)	/* tb->F[h] or tb->S[0]->b_parent */
-#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
-#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)	/* tb->S[h]->b_item_order */
+/* tb->S[h] */
+#define PATH_H_PBUFFER(path, h) \
+			PATH_OFFSET_PBUFFER(path, path->path_length - (h))
+
+/* tb->F[h] or tb->S[0]->b_parent */
+#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1)
+
+#define PATH_H_POSITION(path, h) \
+			PATH_OFFSET_POSITION(path, path->path_length - (h))
+
+/* tb->S[h]->b_item_order */
+#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)
 
 #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
 
+static inline void *reiserfs_node_data(const struct buffer_head *bh)
+{
+	return bh->b_data + sizeof(struct block_head);
+}
+
+/* get key from internal node */
+static inline struct reiserfs_key *internal_key(struct buffer_head *bh,
+						int item_num)
+{
+	struct reiserfs_key *key = reiserfs_node_data(bh);
+
+	return &key[item_num];
+}
+
+/* get the item header from leaf node */
+static inline struct item_head *item_head(const struct buffer_head *bh,
+					  int item_num)
+{
+	struct item_head *ih = reiserfs_node_data(bh);
+
+	return &ih[item_num];
+}
+
+/* get the key from leaf node */
+static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh,
+					    int item_num)
+{
+	return &item_head(bh, item_num)->ih_key;
+}
+
+static inline void *ih_item_body(const struct buffer_head *bh,
+				 const struct item_head *ih)
+{
+	return bh->b_data + ih_location(ih);
+}
+
+/* get item body from leaf node */
+static inline void *item_body(const struct buffer_head *bh, int item_num)
+{
+	return ih_item_body(bh, item_head(bh, item_num));
+}
+
+static inline struct item_head *tp_item_head(const struct treepath *path)
+{
+	return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path));
+}
+
+static inline void *tp_item_body(const struct treepath *path)
+{
+	return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path));
+}
+
 #define get_last_bh(path) PATH_PLAST_BUFFER(path)
-#define get_ih(path) PATH_PITEM_HEAD(path)
 #define get_item_pos(path) PATH_LAST_POSITION(path)
-#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
 #define item_moved(ih,path) comp_items(ih, path)
 #define path_changed(ih,path) comp_items (ih, path)
 
-/***************************************************************************/
-/*                       MISC                                              */
-/***************************************************************************/
+/* array of the entry headers */
+ /* get item body */
+#define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih)))
+
+/*
+ * length of the directory entry in directory item. This define
+ * calculates length of i-th directory entry using directory entry
+ * locations from dir entry head. When it calculates length of 0-th
+ * directory entry, it uses length of whole item in place of entry
+ * location of the non-existent following entry in the calculation.
+ * See picture above.
+ */
+static inline int entry_length(const struct buffer_head *bh,
+			       const struct item_head *ih, int pos_in_item)
+{
+	struct reiserfs_de_head *deh;
+
+	deh = B_I_DEH(bh, ih) + pos_in_item;
+	if (pos_in_item)
+		return deh_location(deh - 1) - deh_location(deh);
+
+	return ih_item_len(ih) - deh_location(deh);
+}
+
+/***************************************************************************
+ *                       MISC                                              *
+ ***************************************************************************/
 
 /* Size of pointer to the unformatted node. */
 #define UNFM_P_SIZE (sizeof(unp_t))
 #define UNFM_P_SHIFT 2
 
-// in in-core inode key is stored on le form
+/* in in-core inode key is stored on le form */
 #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
 
 #define MAX_UL_INT 0xffffffff
@@ -1958,7 +2278,6 @@
 	return (loff_t) ((~(__u64) 0) >> 4);
 }
 
-/*#define MAX_KEY_UNIQUENESS	MAX_UL_INT*/
 #define MAX_KEY_OBJECTID	MAX_UL_INT
 
 #define MAX_B_NUM  MAX_UL_INT
@@ -1967,9 +2286,12 @@
 /* the purpose is to detect overflow of an unsigned short */
 #define REISERFS_LINK_MAX (MAX_US_INT - 1000)
 
-/* The following defines are used in reiserfs_insert_item and reiserfs_append_item  */
-#define REISERFS_KERNEL_MEM		0	/* reiserfs kernel memory mode  */
-#define REISERFS_USER_MEM		1	/* reiserfs user memory mode            */
+/*
+ * The following defines are used in reiserfs_insert_item
+ * and reiserfs_append_item
+ */
+#define REISERFS_KERNEL_MEM		0	/* kernel memory mode */
+#define REISERFS_USER_MEM		1	/* user memory mode */
 
 #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
 #define get_generation(s) atomic_read (&fs_generation(s))
@@ -1981,46 +2303,65 @@
 	__fs_changed(gen, s);		\
 })
 
-/***************************************************************************/
-/*                  FIXATE NODES                                           */
-/***************************************************************************/
+/***************************************************************************
+ *                  FIXATE NODES                                           *
+ ***************************************************************************/
 
 #define VI_TYPE_LEFT_MERGEABLE 1
 #define VI_TYPE_RIGHT_MERGEABLE 2
 
-/* To make any changes in the tree we always first find node, that
-   contains item to be changed/deleted or place to insert a new
-   item. We call this node S. To do balancing we need to decide what
-   we will shift to left/right neighbor, or to a new node, where new
-   item will be etc. To make this analysis simpler we build virtual
-   node. Virtual node is an array of items, that will replace items of
-   node S. (For instance if we are going to delete an item, virtual
-   node does not contain it). Virtual node keeps information about
-   item sizes and types, mergeability of first and last items, sizes
-   of all entries in directory item. We use this array of items when
-   calculating what we can shift to neighbors and how many nodes we
-   have to have if we do not any shiftings, if we shift to left/right
-   neighbor or to both. */
+/*
+ * To make any changes in the tree we always first find node, that
+ * contains item to be changed/deleted or place to insert a new
+ * item. We call this node S. To do balancing we need to decide what
+ * we will shift to left/right neighbor, or to a new node, where new
+ * item will be etc. To make this analysis simpler we build virtual
+ * node. Virtual node is an array of items, that will replace items of
+ * node S. (For instance if we are going to delete an item, virtual
+ * node does not contain it). Virtual node keeps information about
+ * item sizes and types, mergeability of first and last items, sizes
+ * of all entries in directory item. We use this array of items when
+ * calculating what we can shift to neighbors and how many nodes we
+ * have to have if we do not any shiftings, if we shift to left/right
+ * neighbor or to both.
+ */
 struct virtual_item {
-	int vi_index;		// index in the array of item operations
-	unsigned short vi_type;	// left/right mergeability
-	unsigned short vi_item_len;	/* length of item that it will have after balancing */
+	int vi_index;		/* index in the array of item operations */
+	unsigned short vi_type;	/* left/right mergeability */
+
+	/* length of item that it will have after balancing */
+	unsigned short vi_item_len;
+
 	struct item_head *vi_ih;
-	const char *vi_item;	// body of item (old or new)
-	const void *vi_new_data;	// 0 always but paste mode
-	void *vi_uarea;		// item specific area
+	const char *vi_item;	/* body of item (old or new) */
+	const void *vi_new_data;	/* 0 always but paste mode */
+	void *vi_uarea;		/* item specific area */
 };
 
 struct virtual_node {
-	char *vn_free_ptr;	/* this is a pointer to the free space in the buffer */
+	/* this is a pointer to the free space in the buffer */
+	char *vn_free_ptr;
+
 	unsigned short vn_nr_item;	/* number of items in virtual node */
-	short vn_size;		/* size of node , that node would have if it has unlimited size and no balancing is performed */
-	short vn_mode;		/* mode of balancing (paste, insert, delete, cut) */
+
+	/*
+	 * size of node , that node would have if it has
+	 * unlimited size and no balancing is performed
+	 */
+	short vn_size;
+
+	/* mode of balancing (paste, insert, delete, cut) */
+	short vn_mode;
+
 	short vn_affected_item_num;
 	short vn_pos_in_item;
-	struct item_head *vn_ins_ih;	/* item header of inserted item, 0 for other modes */
+
+	/* item header of inserted item, 0 for other modes */
+	struct item_head *vn_ins_ih;
 	const void *vn_data;
-	struct virtual_item *vn_vi;	/* array of items (including a new one, excluding item to be deleted) */
+
+	/* array of items (including a new one, excluding item to be deleted) */
+	struct virtual_item *vn_vi;
 };
 
 /* used by directory items when creating virtual nodes */
@@ -2030,22 +2371,25 @@
 	__u16 entry_sizes[1];
 } __attribute__ ((__packed__));
 
-/***************************************************************************/
-/*                  TREE BALANCE                                           */
-/***************************************************************************/
+/***************************************************************************
+ *                  TREE BALANCE                                           *
+ ***************************************************************************/
 
-/* This temporary structure is used in tree balance algorithms, and
-   constructed as we go to the extent that its various parts are
-   needed.  It contains arrays of nodes that can potentially be
-   involved in the balancing of node S, and parameters that define how
-   each of the nodes must be balanced.  Note that in these algorithms
-   for balancing the worst case is to need to balance the current node
-   S and the left and right neighbors and all of their parents plus
-   create a new node.  We implement S1 balancing for the leaf nodes
-   and S0 balancing for the internal nodes (S1 and S0 are defined in
-   our papers.)*/
+/*
+ * This temporary structure is used in tree balance algorithms, and
+ * constructed as we go to the extent that its various parts are
+ * needed.  It contains arrays of nodes that can potentially be
+ * involved in the balancing of node S, and parameters that define how
+ * each of the nodes must be balanced.  Note that in these algorithms
+ * for balancing the worst case is to need to balance the current node
+ * S and the left and right neighbors and all of their parents plus
+ * create a new node.  We implement S1 balancing for the leaf nodes
+ * and S0 balancing for the internal nodes (S1 and S0 are defined in
+ * our papers.)
+ */
 
-#define MAX_FREE_BLOCK 7	/* size of the array of buffers to free at end of do_balance */
+/* size of the array of buffers to free at end of do_balance */
+#define MAX_FREE_BLOCK 7
 
 /* maximum number of FEB blocknrs on a single level */
 #define MAX_AMOUNT_NEEDED 2
@@ -2057,64 +2401,144 @@
 	struct super_block *tb_sb;
 	struct reiserfs_transaction_handle *transaction_handle;
 	struct treepath *tb_path;
-	struct buffer_head *L[MAX_HEIGHT];	/* array of left neighbors of nodes in the path */
-	struct buffer_head *R[MAX_HEIGHT];	/* array of right neighbors of nodes in the path */
-	struct buffer_head *FL[MAX_HEIGHT];	/* array of fathers of the left  neighbors      */
-	struct buffer_head *FR[MAX_HEIGHT];	/* array of fathers of the right neighbors      */
-	struct buffer_head *CFL[MAX_HEIGHT];	/* array of common parents of center node and its left neighbor  */
-	struct buffer_head *CFR[MAX_HEIGHT];	/* array of common parents of center node and its right neighbor */
 
-	struct buffer_head *FEB[MAX_FEB_SIZE];	/* array of empty buffers. Number of buffers in array equals
-						   cur_blknum. */
+	/* array of left neighbors of nodes in the path */
+	struct buffer_head *L[MAX_HEIGHT];
+
+	/* array of right neighbors of nodes in the path */
+	struct buffer_head *R[MAX_HEIGHT];
+
+	/* array of fathers of the left neighbors */
+	struct buffer_head *FL[MAX_HEIGHT];
+
+	/* array of fathers of the right neighbors */
+	struct buffer_head *FR[MAX_HEIGHT];
+	/* array of common parents of center node and its left neighbor */
+	struct buffer_head *CFL[MAX_HEIGHT];
+
+	/* array of common parents of center node and its right neighbor */
+	struct buffer_head *CFR[MAX_HEIGHT];
+
+	/*
+	 * array of empty buffers. Number of buffers in array equals
+	 * cur_blknum.
+	 */
+	struct buffer_head *FEB[MAX_FEB_SIZE];
 	struct buffer_head *used[MAX_FEB_SIZE];
 	struct buffer_head *thrown[MAX_FEB_SIZE];
-	int lnum[MAX_HEIGHT];	/* array of number of items which must be
-				   shifted to the left in order to balance the
-				   current node; for leaves includes item that
-				   will be partially shifted; for internal
-				   nodes, it is the number of child pointers
-				   rather than items. It includes the new item
-				   being created. The code sometimes subtracts
-				   one to get the number of wholly shifted
-				   items for other purposes. */
-	int rnum[MAX_HEIGHT];	/* substitute right for left in comment above */
-	int lkey[MAX_HEIGHT];	/* array indexed by height h mapping the key delimiting L[h] and
-				   S[h] to its item number within the node CFL[h] */
-	int rkey[MAX_HEIGHT];	/* substitute r for l in comment above */
-	int insert_size[MAX_HEIGHT];	/* the number of bytes by we are trying to add or remove from
-					   S[h]. A negative value means removing.  */
-	int blknum[MAX_HEIGHT];	/* number of nodes that will replace node S[h] after
-				   balancing on the level h of the tree.  If 0 then S is
-				   being deleted, if 1 then S is remaining and no new nodes
-				   are being created, if 2 or 3 then 1 or 2 new nodes is
-				   being created */
+
+	/*
+	 * array of number of items which must be shifted to the left in
+	 * order to balance the current node; for leaves includes item that
+	 * will be partially shifted; for internal nodes, it is the number
+	 * of child pointers rather than items. It includes the new item
+	 * being created. The code sometimes subtracts one to get the
+	 * number of wholly shifted items for other purposes.
+	 */
+	int lnum[MAX_HEIGHT];
+
+	/* substitute right for left in comment above */
+	int rnum[MAX_HEIGHT];
+
+	/*
+	 * array indexed by height h mapping the key delimiting L[h] and
+	 * S[h] to its item number within the node CFL[h]
+	 */
+	int lkey[MAX_HEIGHT];
+
+	/* substitute r for l in comment above */
+	int rkey[MAX_HEIGHT];
+
+	/*
+	 * the number of bytes by we are trying to add or remove from
+	 * S[h]. A negative value means removing.
+	 */
+	int insert_size[MAX_HEIGHT];
+
+	/*
+	 * number of nodes that will replace node S[h] after balancing
+	 * on the level h of the tree.  If 0 then S is being deleted,
+	 * if 1 then S is remaining and no new nodes are being created,
+	 * if 2 or 3 then 1 or 2 new nodes is being created
+	 */
+	int blknum[MAX_HEIGHT];
 
 	/* fields that are used only for balancing leaves of the tree */
-	int cur_blknum;		/* number of empty blocks having been already allocated                 */
-	int s0num;		/* number of items that fall into left most  node when S[0] splits     */
-	int s1num;		/* number of items that fall into first  new node when S[0] splits     */
-	int s2num;		/* number of items that fall into second new node when S[0] splits     */
-	int lbytes;		/* number of bytes which can flow to the left neighbor from the        left    */
-	/* most liquid item that cannot be shifted from S[0] entirely         */
-	/* if -1 then nothing will be partially shifted */
-	int rbytes;		/* number of bytes which will flow to the right neighbor from the right        */
-	/* most liquid item that cannot be shifted from S[0] entirely         */
-	/* if -1 then nothing will be partially shifted                           */
-	int s1bytes;		/* number of bytes which flow to the first  new node when S[0] splits   */
-	/* note: if S[0] splits into 3 nodes, then items do not need to be cut  */
-	int s2bytes;
-	struct buffer_head *buf_to_free[MAX_FREE_BLOCK];	/* buffers which are to be freed after do_balance finishes by unfix_nodes */
-	char *vn_buf;		/* kmalloced memory. Used to create
-				   virtual node and keep map of
-				   dirtied bitmap blocks */
-	int vn_buf_size;	/* size of the vn_buf */
-	struct virtual_node *tb_vn;	/* VN starts after bitmap of bitmap blocks */
 
-	int fs_gen;		/* saved value of `reiserfs_generation' counter
-				   see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
+	/* number of empty blocks having been already allocated */
+	int cur_blknum;
+
+	/* number of items that fall into left most node when S[0] splits */
+	int s0num;
+
+	/*
+	 * number of bytes which can flow to the left neighbor from the left
+	 * most liquid item that cannot be shifted from S[0] entirely
+	 * if -1 then nothing will be partially shifted
+	 */
+	int lbytes;
+
+	/*
+	 * number of bytes which will flow to the right neighbor from the right
+	 * most liquid item that cannot be shifted from S[0] entirely
+	 * if -1 then nothing will be partially shifted
+	 */
+	int rbytes;
+
+
+	/*
+	 * index into the array of item headers in
+	 * S[0] of the affected item
+	 */
+	int item_pos;
+
+	/* new nodes allocated to hold what could not fit into S */
+	struct buffer_head *S_new[2];
+
+	/*
+	 * number of items that will be placed into nodes in S_new
+	 * when S[0] splits
+	 */
+	int snum[2];
+
+	/*
+	 * number of bytes which flow to nodes in S_new when S[0] splits
+	 * note: if S[0] splits into 3 nodes, then items do not need to be cut
+	 */
+	int sbytes[2];
+
+	int pos_in_item;
+	int zeroes_num;
+
+	/*
+	 * buffers which are to be freed after do_balance finishes
+	 * by unfix_nodes
+	 */
+	struct buffer_head *buf_to_free[MAX_FREE_BLOCK];
+
+	/*
+	 * kmalloced memory. Used to create virtual node and keep
+	 * map of dirtied bitmap blocks
+	 */
+	char *vn_buf;
+
+	int vn_buf_size;	/* size of the vn_buf */
+
+	/* VN starts after bitmap of bitmap blocks */
+	struct virtual_node *tb_vn;
+
+	/*
+	 * saved value of `reiserfs_generation' counter see
+	 * FILESYSTEM_CHANGED() macro in reiserfs_fs.h
+	 */
+	int fs_gen;
+
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
-	struct in_core_key key;	/* key pointer, to pass to block allocator or
-				   another low-level subsystem */
+	/*
+	 * key pointer, to pass to block allocator or
+	 * another low-level subsystem
+	 */
+	struct in_core_key key;
 #endif
 };
 
@@ -2122,20 +2546,24 @@
 
 /* When inserting an item. */
 #define M_INSERT	'i'
-/* When inserting into (directories only) or appending onto an already
-   existent item. */
+/*
+ * When inserting into (directories only) or appending onto an already
+ * existent item.
+ */
 #define M_PASTE		'p'
 /* When deleting an item. */
 #define M_DELETE	'd'
 /* When truncating an item or removing an entry from a (directory) item. */
-#define M_CUT 		'c'
+#define M_CUT		'c'
 
 /* used when balancing on leaf level skipped (in reiserfsck) */
 #define M_INTERNAL	'n'
 
-/* When further balancing is not needed, then do_balance does not need
-   to be called. */
-#define M_SKIP_BALANCING 		's'
+/*
+ * When further balancing is not needed, then do_balance does not need
+ * to be called.
+ */
+#define M_SKIP_BALANCING		's'
 #define M_CONVERT	'v'
 
 /* modes of leaf_move_items */
@@ -2148,8 +2576,10 @@
 #define FIRST_TO_LAST 0
 #define LAST_TO_FIRST 1
 
-/* used in do_balance for passing parent of node information that has
-   been gotten from tb struct */
+/*
+ * used in do_balance for passing parent of node information that has
+ * been gotten from tb struct
+ */
 struct buffer_info {
 	struct tree_balance *tb;
 	struct buffer_head *bi_bh;
@@ -2167,20 +2597,24 @@
 	return bi ? sb_from_tb(bi->tb) : NULL;
 }
 
-/* there are 4 types of items: stat data, directory item, indirect, direct.
-+-------------------+------------+--------------+------------+
-|	            |  k_offset  | k_uniqueness | mergeable? |
-+-------------------+------------+--------------+------------+
-|     stat data     |	0        |      0       |   no       |
-+-------------------+------------+--------------+------------+
-| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS|   no       | 
-| non 1st directory | hash value |              |   yes      |
-|     item          |            |              |            |
-+-------------------+------------+--------------+------------+
-| indirect item     | offset + 1 |TYPE_INDIRECT |   if this is not the first indirect item of the object
-+-------------------+------------+--------------+------------+
-| direct item       | offset + 1 |TYPE_DIRECT   | if not this is not the first direct item of the object
-+-------------------+------------+--------------+------------+
+/*
+ * there are 4 types of items: stat data, directory item, indirect, direct.
+ * +-------------------+------------+--------------+------------+
+ * |                   |  k_offset  | k_uniqueness | mergeable? |
+ * +-------------------+------------+--------------+------------+
+ * |     stat data     |     0      |      0       |   no       |
+ * +-------------------+------------+--------------+------------+
+ * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. |   no       |
+ * | non 1st directory | hash value | UNIQUENESS   |   yes      |
+ * |     item          |            |              |            |
+ * +-------------------+------------+--------------+------------+
+ * | indirect item     | offset + 1 |TYPE_INDIRECT |    [1]	|
+ * +-------------------+------------+--------------+------------+
+ * | direct item       | offset + 1 |TYPE_DIRECT   |    [2]     |
+ * +-------------------+------------+--------------+------------+
+ *
+ * [1] if this is not the first indirect item of the object
+ * [2] if this is not the first direct item of the object
 */
 
 struct item_operations {
@@ -2219,49 +2653,43 @@
 /* number of blocks pointed to by the indirect item */
 #define I_UNFM_NUM(ih)	(ih_item_len(ih) / UNFM_P_SIZE)
 
-/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
+/*
+ * the used space within the unformatted node corresponding
+ * to pos within the item pointed to by ih
+ */
 #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
 
-/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
+/*
+ * number of bytes contained by the direct item or the
+ * unformatted nodes the indirect item points to
+ */
 
-/* get the item header */
-#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
-
-/* get key */
-#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
-
-/* get the key */
-#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
-
-/* get item body */
-#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
-
-/* get the stat data by the buffer header and the item order */
-#define B_N_STAT_DATA(bh,nr) \
-( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
-
-    /* following defines use reiserfs buffer header and item header */
+/* following defines use reiserfs buffer header and item header */
 
 /* get stat-data */
 #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
 
-// this is 3976 for size==4096
+/* this is 3976 for size==4096 */
 #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
 
-/* indirect items consist of entries which contain blocknrs, pos
-   indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
-   blocknr contained by the entry pos points to */
-#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
-#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
+/*
+ * indirect items consist of entries which contain blocknrs, pos
+ * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
+ * blocknr contained by the entry pos points to
+ */
+#define B_I_POS_UNFM_POINTER(bh, ih, pos)				\
+	le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos)))
+#define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val)			\
+	(*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val))
 
 struct reiserfs_iget_args {
 	__u32 objectid;
 	__u32 dirid;
 };
 
-/***************************************************************************/
-/*                    FUNCTION DECLARATIONS                                */
-/***************************************************************************/
+/***************************************************************************
+ *                    FUNCTION DECLARATIONS                                *
+ ***************************************************************************/
 
 #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
 
@@ -2273,7 +2701,10 @@
 /* first block written in a commit.  */
 struct reiserfs_journal_desc {
 	__le32 j_trans_id;	/* id of commit */
-	__le32 j_len;		/* length of commit. len +1 is the commit block */
+
+	/* length of commit. len +1 is the commit block */
+	__le32 j_len;
+
 	__le32 j_mount_id;	/* mount id of this trans */
 	__le32 j_realblock[1];	/* real locations for each block */
 };
@@ -2300,22 +2731,35 @@
 #define set_commit_trans_id(c,val)     do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
 #define set_commit_trans_len(c,val)    do { (c)->j_len = cpu_to_le32 (val); } while (0)
 
-/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
-** last fully flushed transaction.  fully flushed means all the log blocks and all the real blocks are on disk,
-** and this transaction does not need to be replayed.
-*/
+/*
+ * this header block gets written whenever a transaction is considered
+ * fully flushed, and is more recent than the last fully flushed transaction.
+ * fully flushed means all the log blocks and all the real blocks are on
+ * disk, and this transaction does not need to be replayed.
+ */
 struct reiserfs_journal_header {
-	__le32 j_last_flush_trans_id;	/* id of last fully flushed transaction */
-	__le32 j_first_unflushed_offset;	/* offset in the log of where to start replay after a crash */
+	/* id of last fully flushed transaction */
+	__le32 j_last_flush_trans_id;
+
+	/* offset in the log of where to start replay after a crash */
+	__le32 j_first_unflushed_offset;
+
 	__le32 j_mount_id;
 	/* 12 */ struct journal_params jh_journal;
 };
 
 /* biggest tunable defines are right here */
 #define JOURNAL_BLOCK_COUNT 8192	/* number of blocks in the journal */
-#define JOURNAL_TRANS_MAX_DEFAULT 1024	/* biggest possible single transaction, don't change for now (8/3/99) */
+
+/* biggest possible single transaction, don't change for now (8/3/99) */
+#define JOURNAL_TRANS_MAX_DEFAULT 1024
 #define JOURNAL_TRANS_MIN_DEFAULT 256
-#define JOURNAL_MAX_BATCH_DEFAULT   900	/* max blocks to batch into one transaction, don't make this any bigger than 900 */
+
+/*
+ * max blocks to batch into one transaction,
+ * don't make this any bigger than 900
+ */
+#define JOURNAL_MAX_BATCH_DEFAULT   900
 #define JOURNAL_MIN_RATIO 2
 #define JOURNAL_MAX_COMMIT_AGE 30
 #define JOURNAL_MAX_TRANS_AGE 30
@@ -2340,16 +2784,18 @@
 #define REISERFS_QUOTA_DEL_BLOCKS(s) 0
 #endif
 
-/* both of these can be as low as 1, or as high as you want.  The min is the
-** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
-** as needed, and released when transactions are committed.  On release, if 
-** the current number of nodes is > max, the node is freed, otherwise, 
-** it is put on a free list for faster use later.
+/*
+ * both of these can be as low as 1, or as high as you want.  The min is the
+ * number of 4k bitmap nodes preallocated on mount. New nodes are allocated
+ * as needed, and released when transactions are committed.  On release, if
+ * the current number of nodes is > max, the node is freed, otherwise,
+ * it is put on a free list for faster use later.
 */
 #define REISERFS_MIN_BITMAP_NODES 10
 #define REISERFS_MAX_BITMAP_NODES 100
 
-#define JBH_HASH_SHIFT 13	/* these are based on journal hash size of 8192 */
+/* these are based on journal hash size of 8192 */
+#define JBH_HASH_SHIFT 13
 #define JBH_HASH_MASK 8191
 
 #define _jhashfn(sb,block)	\
@@ -2357,7 +2803,7 @@
 	 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
 #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
 
-// We need these to make journal.c code more readable
+/* We need these to make journal.c code more readable */
 #define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
 #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
 #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
@@ -2365,12 +2811,14 @@
 enum reiserfs_bh_state_bits {
 	BH_JDirty = BH_PrivateStart,	/* buffer is in current transaction */
 	BH_JDirty_wait,
-	BH_JNew,		/* disk block was taken off free list before
-				 * being in a finished transaction, or
-				 * written to disk. Can be reused immed. */
+	/*
+	 * disk block was taken off free list before being in a
+	 * finished transaction, or written to disk. Can be reused immed.
+	 */
+	BH_JNew,
 	BH_JPrepared,
 	BH_JRestore_dirty,
-	BH_JTest,		// debugging only will go away
+	BH_JTest,		/* debugging only will go away */
 };
 
 BUFFER_FNS(JDirty, journaled);
@@ -2386,27 +2834,36 @@
 BUFFER_FNS(JTest, journal_test);
 TAS_BUFFER_FNS(JTest, journal_test);
 
-/*
-** transaction handle which is passed around for all journal calls
-*/
+/* transaction handle which is passed around for all journal calls */
 struct reiserfs_transaction_handle {
-	struct super_block *t_super;	/* super for this FS when journal_begin was
-					   called. saves calls to reiserfs_get_super
-					   also used by nested transactions to make
-					   sure they are nesting on the right FS
-					   _must_ be first in the handle
-					 */
+	/*
+	 * super for this FS when journal_begin was called. saves calls to
+	 * reiserfs_get_super also used by nested transactions to make
+	 * sure they are nesting on the right FS _must_ be first
+	 * in the handle
+	 */
+	struct super_block *t_super;
+
 	int t_refcount;
 	int t_blocks_logged;	/* number of blocks this writer has logged */
 	int t_blocks_allocated;	/* number of blocks this writer allocated */
-	unsigned int t_trans_id;	/* sanity check, equals the current trans id */
+
+	/* sanity check, equals the current trans id */
+	unsigned int t_trans_id;
+
 	void *t_handle_save;	/* save existing current->journal_info */
-	unsigned displace_new_blocks:1;	/* if new block allocation occurres, that block
-					   should be displaced from others */
+
+	/*
+	 * if new block allocation occurres, that block
+	 * should be displaced from others
+	 */
+	unsigned displace_new_blocks:1;
+
 	struct list_head t_list;
 };
 
-/* used to keep track of ordered and tail writes, attached to the buffer
+/*
+ * used to keep track of ordered and tail writes, attached to the buffer
  * head through b_journal_head.
  */
 struct reiserfs_jh {
@@ -2419,7 +2876,7 @@
 int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
 int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
 int journal_mark_dirty(struct reiserfs_transaction_handle *,
-		       struct super_block *, struct buffer_head *bh);
+		       struct buffer_head *bh);
 
 static inline int reiserfs_file_data_log(struct inode *inode)
 {
@@ -2469,10 +2926,8 @@
 int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
 int journal_release_error(struct reiserfs_transaction_handle *,
 			  struct super_block *);
-int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
-		unsigned long);
-int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
-		     unsigned long);
+int journal_end(struct reiserfs_transaction_handle *);
+int journal_end_sync(struct reiserfs_transaction_handle *);
 int journal_mark_freed(struct reiserfs_transaction_handle *,
 		       struct super_block *, b_blocknr_t blocknr);
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
@@ -2481,7 +2936,7 @@
 int journal_begin(struct reiserfs_transaction_handle *,
 		  struct super_block *sb, unsigned long);
 int journal_join_abort(struct reiserfs_transaction_handle *,
-		       struct super_block *sb, unsigned long);
+		       struct super_block *sb);
 void reiserfs_abort_journal(struct super_block *sb, int errno);
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
 int reiserfs_allocate_list_bitmaps(struct super_block *s,
@@ -2503,20 +2958,18 @@
 extern void copy_item_head(struct item_head *to,
 			   const struct item_head *from);
 
-// first key is in cpu form, second - le
+/* first key is in cpu form, second - le */
 extern int comp_short_keys(const struct reiserfs_key *le_key,
 			   const struct cpu_key *cpu_key);
 extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
 
-// both are in le form
+/* both are in le form */
 extern int comp_le_keys(const struct reiserfs_key *,
 			const struct reiserfs_key *);
 extern int comp_short_le_keys(const struct reiserfs_key *,
 			      const struct reiserfs_key *);
 
-//
-// get key version from on disk key - kludge
-//
+/* * get key version from on disk key - kludge */
 static inline int le_key_version(const struct reiserfs_key *key)
 {
 	int type;
@@ -2593,12 +3046,12 @@
 
 /* inode.c */
 /* args for the create parameter of reiserfs_get_block */
-#define GET_BLOCK_NO_CREATE 0	/* don't create new blocks or convert tails */
-#define GET_BLOCK_CREATE 1	/* add anything you need to find block */
-#define GET_BLOCK_NO_HOLE 2	/* return -ENOENT for file holes */
-#define GET_BLOCK_READ_DIRECT 4	/* read the tail if indirect item not found */
-#define GET_BLOCK_NO_IMUX     8	/* i_mutex is not held, don't preallocate */
-#define GET_BLOCK_NO_DANGLE   16	/* don't leave any transactions running */
+#define GET_BLOCK_NO_CREATE 0	 /* don't create new blocks or convert tails */
+#define GET_BLOCK_CREATE 1	 /* add anything you need to find block */
+#define GET_BLOCK_NO_HOLE 2	 /* return -ENOENT for file holes */
+#define GET_BLOCK_READ_DIRECT 4	 /* read the tail if indirect item not found */
+#define GET_BLOCK_NO_IMUX     8	 /* i_mutex is not held, don't preallocate */
+#define GET_BLOCK_NO_DANGLE   16 /* don't leave any transactions running */
 
 void reiserfs_read_locked_inode(struct inode *inode,
 				struct reiserfs_iget_args *args);
@@ -2797,25 +3250,49 @@
 
 /* bitmap.c */
 
-/* structure contains hints for block allocator, and it is a container for
- * arguments, such as node, search path, transaction_handle, etc. */
+/*
+ * structure contains hints for block allocator, and it is a container for
+ * arguments, such as node, search path, transaction_handle, etc.
+ */
 struct __reiserfs_blocknr_hint {
-	struct inode *inode;	/* inode passed to allocator, if we allocate unf. nodes */
+	/* inode passed to allocator, if we allocate unf. nodes */
+	struct inode *inode;
+
 	sector_t block;		/* file offset, in blocks */
 	struct in_core_key key;
-	struct treepath *path;	/* search path, used by allocator to deternine search_start by
-				 * various ways */
-	struct reiserfs_transaction_handle *th;	/* transaction handle is needed to log super blocks and
-						 * bitmap blocks changes  */
-	b_blocknr_t beg, end;
-	b_blocknr_t search_start;	/* a field used to transfer search start value (block number)
-					 * between different block allocator procedures
-					 * (determine_search_start() and others) */
-	int prealloc_size;	/* is set in determine_prealloc_size() function, used by underlayed
-				 * function that do actual allocation */
 
-	unsigned formatted_node:1;	/* the allocator uses different polices for getting disk space for
-					 * formatted/unformatted blocks with/without preallocation */
+	/*
+	 * search path, used by allocator to deternine search_start by
+	 * various ways
+	 */
+	struct treepath *path;
+
+	/*
+	 * transaction handle is needed to log super blocks
+	 * and bitmap blocks changes
+	 */
+	struct reiserfs_transaction_handle *th;
+
+	b_blocknr_t beg, end;
+
+	/*
+	 * a field used to transfer search start value (block number)
+	 * between different block allocator procedures
+	 * (determine_search_start() and others)
+	 */
+	b_blocknr_t search_start;
+
+	/*
+	 * is set in determine_prealloc_size() function,
+	 * used by underlayed function that do actual allocation
+	 */
+	int prealloc_size;
+
+	/*
+	 * the allocator uses different polices for getting disk
+	 * space for formatted/unformatted blocks with/without preallocation
+	 */
+	unsigned formatted_node:1;
 	unsigned preallocate:1;
 };
 
@@ -2909,13 +3386,15 @@
 #define reiserfs_test_le_bit		test_bit_le
 #define reiserfs_find_next_zero_le_bit	find_next_zero_bit_le
 
-/* sometimes reiserfs_truncate may require to allocate few new blocks
-   to perform indirect2direct conversion. People probably used to
-   think, that truncate should work without problems on a filesystem
-   without free disk space. They may complain that they can not
-   truncate due to lack of free disk space. This spare space allows us
-   to not worry about it. 500 is probably too much, but it should be
-   absolutely safe */
+/*
+ * sometimes reiserfs_truncate may require to allocate few new blocks
+ * to perform indirect2direct conversion. People probably used to
+ * think, that truncate should work without problems on a filesystem
+ * without free disk space. They may complain that they can not
+ * truncate due to lack of free disk space. This spare space allows us
+ * to not worry about it. 500 is probably too much, but it should be
+ * absolutely safe
+ */
 #define SPARE_SPACE 500
 
 /* prototypes from ioctl.c */

diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index a4ef5cd..6052d32 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c

@@ -53,8 +53,10 @@
 	}
 	bforget(bh);
 
-	/* old disk layout detection; those partitions can be mounted, but
-	 * cannot be resized */
+	/*
+	 * old disk layout detection; those partitions can be mounted, but
+	 * cannot be resized
+	 */
 	if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
 	    != REISERFS_DISK_OFFSET_IN_BYTES) {
 		printk
@@ -86,12 +88,14 @@
 			    ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
 			return -ENOMEM;
 		}
-		/* the new journal bitmaps are zero filled, now we copy in the bitmap
-		 ** node pointers from the old journal bitmap structs, and then
-		 ** transfer the new data structures into the journal struct.
-		 **
-		 ** using the copy_size var below allows this code to work for
-		 ** both shrinking and expanding the FS.
+		/*
+		 * the new journal bitmaps are zero filled, now we copy i
+		 * the bitmap node pointers from the old journal bitmap
+		 * structs, and then transfer the new data structures
+		 * into the journal struct.
+		 *
+		 * using the copy_size var below allows this code to work for
+		 * both shrinking and expanding the FS.
 		 */
 		copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
 		copy_size =
@@ -101,36 +105,45 @@
 			jb = SB_JOURNAL(s)->j_list_bitmap + i;
 			memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
 
-			/* just in case vfree schedules on us, copy the new
-			 ** pointer into the journal struct before freeing the
-			 ** old one
+			/*
+			 * just in case vfree schedules on us, copy the new
+			 * pointer into the journal struct before freeing the
+			 * old one
 			 */
 			node_tmp = jb->bitmaps;
 			jb->bitmaps = jbitmap[i].bitmaps;
 			vfree(node_tmp);
 		}
 
-		/* allocate additional bitmap blocks, reallocate array of bitmap
-		 * block pointers */
+		/*
+		 * allocate additional bitmap blocks, reallocate
+		 * array of bitmap block pointers
+		 */
 		bitmap =
 		    vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
 		if (!bitmap) {
-			/* Journal bitmaps are still supersized, but the memory isn't
-			 * leaked, so I guess it's ok */
+			/*
+			 * Journal bitmaps are still supersized, but the
+			 * memory isn't leaked, so I guess it's ok
+			 */
 			printk("reiserfs_resize: unable to allocate memory.\n");
 			return -ENOMEM;
 		}
 		for (i = 0; i < bmap_nr; i++)
 			bitmap[i] = old_bitmap[i];
 
-		/* This doesn't go through the journal, but it doesn't have to.
-		 * The changes are still atomic: We're synced up when the journal
-		 * transaction begins, and the new bitmaps don't matter if the
-		 * transaction fails. */
+		/*
+		 * This doesn't go through the journal, but it doesn't have to.
+		 * The changes are still atomic: We're synced up when the
+		 * journal transaction begins, and the new bitmaps don't
+		 * matter if the transaction fails.
+		 */
 		for (i = bmap_nr; i < bmap_nr_new; i++) {
 			int depth;
-			/* don't use read_bitmap_block since it will cache
-			 * the uninitialized bitmap */
+			/*
+			 * don't use read_bitmap_block since it will cache
+			 * the uninitialized bitmap
+			 */
 			depth = reiserfs_write_unlock_nested(s);
 			bh = sb_bread(s, i * s->s_blocksize * 8);
 			reiserfs_write_lock_nested(s, depth);
@@ -147,7 +160,7 @@
 			depth = reiserfs_write_unlock_nested(s);
 			sync_dirty_buffer(bh);
 			reiserfs_write_lock_nested(s, depth);
-			// update bitmap_info stuff
+			/* update bitmap_info stuff */
 			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
 			brelse(bh);
 		}
@@ -156,9 +169,11 @@
 		vfree(old_bitmap);
 	}
 
-	/* begin transaction, if there was an error, it's fine. Yes, we have
+	/*
+	 * begin transaction, if there was an error, it's fine. Yes, we have
 	 * incorrect bitmaps now, but none of it is ever going to touch the
-	 * disk anyway. */
+	 * disk anyway.
+	 */
 	err = journal_begin(&th, s, 10);
 	if (err)
 		return err;
@@ -167,7 +182,7 @@
 	info = SB_AP_BITMAP(s) + bmap_nr - 1;
 	bh = reiserfs_read_bitmap_block(s, bmap_nr - 1);
 	if (!bh) {
-		int jerr = journal_end(&th, s, 10);
+		int jerr = journal_end(&th);
 		if (jerr)
 			return jerr;
 		return -EIO;
@@ -178,14 +193,14 @@
 		reiserfs_clear_le_bit(i, bh->b_data);
 	info->free_count += s->s_blocksize * 8 - block_r;
 
-	journal_mark_dirty(&th, s, bh);
+	journal_mark_dirty(&th, bh);
 	brelse(bh);
 
 	/* Correct new last bitmap block - It may not be full */
 	info = SB_AP_BITMAP(s) + bmap_nr_new - 1;
 	bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1);
 	if (!bh) {
-		int jerr = journal_end(&th, s, 10);
+		int jerr = journal_end(&th);
 		if (jerr)
 			return jerr;
 		return -EIO;
@@ -194,7 +209,7 @@
 	reiserfs_prepare_for_journal(s, bh, 1);
 	for (i = block_r_new; i < s->s_blocksize * 8; i++)
 		reiserfs_set_le_bit(i, bh->b_data);
-	journal_mark_dirty(&th, s, bh);
+	journal_mark_dirty(&th, bh);
 	brelse(bh);
 
 	info->free_count -= s->s_blocksize * 8 - block_r_new;
@@ -207,8 +222,8 @@
 	PUT_SB_BLOCK_COUNT(s, block_count_new);
 	PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
 
-	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+	journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
 
 	SB_JOURNAL(s)->j_must_wait = 1;
-	return journal_end(&th, s, 10);
+	return journal_end(&th);
 }

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 615cd9a..dd44468 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c

@@ -8,46 +8,6 @@
  *  Pereslavl-Zalessky Russia
  */
 
-/*
- *  This file contains functions dealing with S+tree
- *
- * B_IS_IN_TREE
- * copy_item_head
- * comp_short_keys
- * comp_keys
- * comp_short_le_keys
- * le_key2cpu_key
- * comp_le_keys
- * bin_search
- * get_lkey
- * get_rkey
- * key_in_buffer
- * decrement_bcount
- * reiserfs_check_path
- * pathrelse_and_restore
- * pathrelse
- * search_by_key_reada
- * search_by_key
- * search_for_position_by_key
- * comp_items
- * prepare_for_direct_item
- * prepare_for_direntry_item
- * prepare_for_delete_or_cut
- * calc_deleted_bytes_number
- * init_tb_struct
- * padd_item
- * reiserfs_delete_item
- * reiserfs_delete_solid_item
- * reiserfs_delete_object
- * maybe_indirect_to_direct
- * indirect_to_direct_roll_back
- * reiserfs_cut_from_item
- * truncate_directory
- * reiserfs_do_truncate
- * reiserfs_paste_into_item
- * reiserfs_insert_item
- */
-
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
@@ -65,21 +25,21 @@
 	return (B_LEVEL(bh) != FREE_LEVEL);
 }
 
-//
-// to gets item head in le form
-//
+/* to get item head in le form */
 inline void copy_item_head(struct item_head *to,
 			   const struct item_head *from)
 {
 	memcpy(to, from, IH_SIZE);
 }
 
-/* k1 is pointer to on-disk structure which is stored in little-endian
-   form. k2 is pointer to cpu variable. For key of items of the same
-   object this returns 0.
-   Returns: -1 if key1 < key2
-   0 if key1 == key2
-   1 if key1 > key2 */
+/*
+ * k1 is pointer to on-disk structure which is stored in little-endian
+ * form. k2 is pointer to cpu variable. For key of items of the same
+ * object this returns 0.
+ * Returns: -1 if key1 < key2
+ * 0 if key1 == key2
+ * 1 if key1 > key2
+ */
 inline int comp_short_keys(const struct reiserfs_key *le_key,
 			   const struct cpu_key *cpu_key)
 {
@@ -97,11 +57,13 @@
 	return 0;
 }
 
-/* k1 is pointer to on-disk structure which is stored in little-endian
-   form. k2 is pointer to cpu variable.
-   Compare keys using all 4 key fields.
-   Returns: -1 if key1 < key2 0
-   if key1 = key2 1 if key1 > key2 */
+/*
+ * k1 is pointer to on-disk structure which is stored in little-endian
+ * form. k2 is pointer to cpu variable.
+ * Compare keys using all 4 key fields.
+ * Returns: -1 if key1 < key2 0
+ * if key1 = key2 1 if key1 > key2
+ */
 static inline int comp_keys(const struct reiserfs_key *le_key,
 			    const struct cpu_key *cpu_key)
 {
@@ -155,15 +117,17 @@
 	to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
 	to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
 
-	// find out version of the key
+	/* find out version of the key */
 	version = le_key_version(from);
 	to->version = version;
 	to->on_disk_key.k_offset = le_key_k_offset(version, from);
 	to->on_disk_key.k_type = le_key_k_type(version, from);
 }
 
-// this does not say which one is bigger, it only returns 1 if keys
-// are not equal, 0 otherwise
+/*
+ * this does not say which one is bigger, it only returns 1 if keys
+ * are not equal, 0 otherwise
+ */
 inline int comp_le_keys(const struct reiserfs_key *k1,
 			const struct reiserfs_key *k2)
 {
@@ -177,24 +141,27 @@
  *        *pos = number of the searched element if found, else the        *
  *        number of the first element that is larger than key.            *
  **************************************************************************/
-/* For those not familiar with binary search: lbound is the leftmost item that it
- could be, rbound the rightmost item that it could be.  We examine the item
- halfway between lbound and rbound, and that tells us either that we can increase
- lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
- there are no possible items, and we have not found it. With each examination we
- cut the number of possible items it could be by one more than half rounded down,
- or we find it. */
+/*
+ * For those not familiar with binary search: lbound is the leftmost item
+ * that it could be, rbound the rightmost item that it could be.  We examine
+ * the item halfway between lbound and rbound, and that tells us either
+ * that we can increase lbound, or decrease rbound, or that we have found it,
+ * or if lbound <= rbound that there are no possible items, and we have not
+ * found it. With each examination we cut the number of possible items it
+ * could be by one more than half rounded down, or we find it.
+ */
 static inline int bin_search(const void *key,	/* Key to search for. */
 			     const void *base,	/* First item in the array. */
 			     int num,	/* Number of items in the array. */
-			     int width,	/* Item size in the array.
-					   searched. Lest the reader be
-					   confused, note that this is crafted
-					   as a general function, and when it
-					   is applied specifically to the array
-					   of item headers in a node, width
-					   is actually the item header size not
-					   the item size. */
+			     /*
+			      * Item size in the array.  searched. Lest the
+			      * reader be confused, note that this is crafted
+			      * as a general function, and when it is applied
+			      * specifically to the array of item headers in a
+			      * node, width is actually the item header size
+			      * not the item size.
+			      */
+			     int width,
 			     int *pos /* Number of the searched for element. */
     )
 {
@@ -216,8 +183,10 @@
 			return ITEM_FOUND;	/* Key found in the array.  */
 		}
 
-	/* bin_search did not find given key, it returns position of key,
-	   that is minimal and greater than the given one. */
+	/*
+	 * bin_search did not find given key, it returns position of key,
+	 * that is minimal and greater than the given one.
+	 */
 	*pos = lbound;
 	return ITEM_NOT_FOUND;
 }
@@ -234,10 +203,14 @@
 	  cpu_to_le32(0xffffffff)},}
 };
 
-/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
-   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
-   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
-   case we return a special key, either MIN_KEY or MAX_KEY. */
+/*
+ * Get delimiting key of the buffer by looking for it in the buffers in the
+ * path, starting from the bottom of the path, and going upwards.  We must
+ * check the path's validity at each step.  If the key is not in the path,
+ * there is no delimiting key in the tree (buffer is first or last buffer
+ * in tree), and in this case we return a special key, either MIN_KEY or
+ * MAX_KEY.
+ */
 static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
 						  const struct super_block *sb)
 {
@@ -270,9 +243,12 @@
 		    PATH_OFFSET_PBUFFER(chk_path,
 					path_offset + 1)->b_blocknr)
 			return &MAX_KEY;
-		/* Return delimiting key if position in the parent is not equal to zero. */
+		/*
+		 * Return delimiting key if position in the parent
+		 * is not equal to zero.
+		 */
 		if (position)
-			return B_N_PDELIM_KEY(parent, position - 1);
+			return internal_key(parent, position - 1);
 	}
 	/* Return MIN_KEY if we are in the root of the buffer tree. */
 	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
@@ -308,15 +284,23 @@
 					  path_offset)) >
 		    B_NR_ITEMS(parent))
 			return &MIN_KEY;
-		/* Check whether parent at the path really points to the child. */
+		/*
+		 * Check whether parent at the path really points
+		 * to the child.
+		 */
 		if (B_N_CHILD_NUM(parent, position) !=
 		    PATH_OFFSET_PBUFFER(chk_path,
 					path_offset + 1)->b_blocknr)
 			return &MIN_KEY;
-		/* Return delimiting key if position in the parent is not the last one. */
+
+		/*
+		 * Return delimiting key if position in the parent
+		 * is not the last one.
+		 */
 		if (position != B_NR_ITEMS(parent))
-			return B_N_PDELIM_KEY(parent, position);
+			return internal_key(parent, position);
 	}
+
 	/* Return MAX_KEY if we are in the root of the buffer tree. */
 	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
 	    b_blocknr == SB_ROOT_BLOCK(sb))
@@ -324,13 +308,20 @@
 	return &MIN_KEY;
 }
 
-/* Check whether a key is contained in the tree rooted from a buffer at a path. */
-/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
-   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
-   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
-   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
-static inline int key_in_buffer(struct treepath *chk_path,	/* Path which should be checked.  */
-				const struct cpu_key *key,	/* Key which should be checked.   */
+/*
+ * Check whether a key is contained in the tree rooted from a buffer at a path.
+ * This works by looking at the left and right delimiting keys for the buffer
+ * in the last path_element in the path.  These delimiting keys are stored
+ * at least one level above that buffer in the tree. If the buffer is the
+ * first or last node in the tree order then one of the delimiting keys may
+ * be absent, and in this case get_lkey and get_rkey return a special key
+ * which is MIN_KEY or MAX_KEY.
+ */
+static inline int key_in_buffer(
+				/* Path which should be checked. */
+				struct treepath *chk_path,
+				/* Key which should be checked. */
+				const struct cpu_key *key,
 				struct super_block *sb
     )
 {
@@ -359,9 +350,11 @@
 	return 0;
 }
 
-/* Drop the reference to each buffer in a path and restore
+/*
+ * Drop the reference to each buffer in a path and restore
  * dirty bits clean when preparing the buffer for the log.
- * This version should only be called from fix_nodes() */
+ * This version should only be called from fix_nodes()
+ */
 void pathrelse_and_restore(struct super_block *sb,
 			   struct treepath *search_path)
 {
@@ -418,14 +411,17 @@
 	}
 	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
 	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
+
+	/* free space does not match to calculated amount of use space */
 	if (used_space != blocksize - blkh_free_space(blkh)) {
-		/* free space does not match to calculated amount of use space */
 		reiserfs_warning(NULL, "reiserfs-5082",
 				 "free space seems wrong: %z", bh);
 		return 0;
 	}
-	// FIXME: it is_leaf will hit performance too much - we may have
-	// return 1 here
+	/*
+	 * FIXME: it is_leaf will hit performance too much - we may have
+	 * return 1 here
+	 */
 
 	/* check tables of item heads */
 	ih = (struct item_head *)(buf + BLKH_SIZE);
@@ -460,7 +456,7 @@
 		prev_location = ih_location(ih);
 	}
 
-	// one may imagine much more checks
+	/* one may imagine many more checks */
 	return 1;
 }
 
@@ -481,8 +477,8 @@
 	}
 
 	nr = blkh_nr_item(blkh);
+	/* for internal which is not root we might check min number of keys */
 	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
-		/* for internal which is not root we might check min number of keys */
 		reiserfs_warning(NULL, "reiserfs-5088",
 				 "number of key seems wrong: %z", bh);
 		return 0;
@@ -494,12 +490,15 @@
 				 "free space seems wrong: %z", bh);
 		return 0;
 	}
-	// one may imagine much more checks
+
+	/* one may imagine many more checks */
 	return 1;
 }
 
-// make sure that bh contains formatted node of reiserfs tree of
-// 'level'-th level
+/*
+ * make sure that bh contains formatted node of reiserfs tree of
+ * 'level'-th level
+ */
 static int is_tree_node(struct buffer_head *bh, int level)
 {
 	if (B_LEVEL(bh) != level) {
@@ -546,7 +545,8 @@
 	for (j = 0; j < i; j++) {
 		/*
 		 * note, this needs attention if we are getting rid of the BKL
-		 * you have to make sure the prepared bit isn't set on this buffer
+		 * you have to make sure the prepared bit isn't set on this
+		 * buffer
 		 */
 		if (!buffer_uptodate(bh[j])) {
 			if (depth == -1)
@@ -558,39 +558,34 @@
 	return depth;
 }
 
-/**************************************************************************
- * Algorithm   SearchByKey                                                *
- *             look for item in the Disk S+Tree by its key                *
- * Input:  sb   -  super block                                            *
- *         key  - pointer to the key to search                            *
- * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
- *         search_path - path from the root to the needed leaf            *
- **************************************************************************/
-
-/* This function fills up the path from the root to the leaf as it
-   descends the tree looking for the key.  It uses reiserfs_bread to
-   try to find buffers in the cache given their block number.  If it
-   does not find them in the cache it reads them from disk.  For each
-   node search_by_key finds using reiserfs_bread it then uses
-   bin_search to look through that node.  bin_search will find the
-   position of the block_number of the next node if it is looking
-   through an internal node.  If it is looking through a leaf node
-   bin_search will find the position of the item which has key either
-   equal to given key, or which is the maximal key less than the given
-   key.  search_by_key returns a path that must be checked for the
-   correctness of the top of the path but need not be checked for the
-   correctness of the bottom of the path */
-/* The function is NOT SCHEDULE-SAFE! */
-int search_by_key(struct super_block *sb, const struct cpu_key *key,	/* Key to search. */
-		  struct treepath *search_path,/* This structure was
-						   allocated and initialized
-						   by the calling
-						   function. It is filled up
-						   by this function.  */
-		  int stop_level	/* How far down the tree to search. To
-					   stop at leaf level - set to
-					   DISK_LEAF_NODE_LEVEL */
-    )
+/*
+ * This function fills up the path from the root to the leaf as it
+ * descends the tree looking for the key.  It uses reiserfs_bread to
+ * try to find buffers in the cache given their block number.  If it
+ * does not find them in the cache it reads them from disk.  For each
+ * node search_by_key finds using reiserfs_bread it then uses
+ * bin_search to look through that node.  bin_search will find the
+ * position of the block_number of the next node if it is looking
+ * through an internal node.  If it is looking through a leaf node
+ * bin_search will find the position of the item which has key either
+ * equal to given key, or which is the maximal key less than the given
+ * key.  search_by_key returns a path that must be checked for the
+ * correctness of the top of the path but need not be checked for the
+ * correctness of the bottom of the path
+ */
+/*
+ * search_by_key - search for key (and item) in stree
+ * @sb: superblock
+ * @key: pointer to key to search for
+ * @search_path: Allocated and initialized struct treepath; Returned filled
+ *		 on success.
+ * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
+ *		stop at leaf level.
+ *
+ * The function is NOT SCHEDULE-SAFE!
+ */
+int search_by_key(struct super_block *sb, const struct cpu_key *key,
+		  struct treepath *search_path, int stop_level)
 {
 	b_blocknr_t block_number;
 	int expected_level;
@@ -609,17 +604,22 @@
 
 	PROC_INFO_INC(sb, search_by_key);
 
-	/* As we add each node to a path we increase its count.  This means that
-	   we must be careful to release all nodes in a path before we either
-	   discard the path struct or re-use the path struct, as we do here. */
+	/*
+	 * As we add each node to a path we increase its count.  This means
+	 * that we must be careful to release all nodes in a path before we
+	 * either discard the path struct or re-use the path struct, as we
+	 * do here.
+	 */
 
 	pathrelse(search_path);
 
 	right_neighbor_of_leaf_node = 0;
 
-	/* With each iteration of this loop we search through the items in the
-	   current node, and calculate the next current node(next path element)
-	   for the next iteration of this loop.. */
+	/*
+	 * With each iteration of this loop we search through the items in the
+	 * current node, and calculate the next current node(next path element)
+	 * for the next iteration of this loop..
+	 */
 	block_number = SB_ROOT_BLOCK(sb);
 	expected_level = -1;
 	while (1) {
@@ -639,8 +639,10 @@
 					 ++search_path->path_length);
 		fs_gen = get_generation(sb);
 
-		/* Read the next tree node, and set the last element in the path to
-		   have a pointer to it. */
+		/*
+		 * Read the next tree node, and set the last element
+		 * in the path to have a pointer to it.
+		 */
 		if ((bh = last_element->pe_buffer =
 		     sb_getblk(sb, block_number))) {
 
@@ -666,7 +668,7 @@
 			if (!buffer_uptodate(bh))
 				goto io_error;
 		} else {
-		      io_error:
+io_error:
 			search_path->path_length--;
 			pathrelse(search_path);
 			return IO_ERROR;
@@ -676,9 +678,12 @@
 			expected_level = SB_TREE_HEIGHT(sb);
 		expected_level--;
 
-		/* It is possible that schedule occurred. We must check whether the key
-		   to search is still in the tree rooted from the current buffer. If
-		   not then repeat search from the root. */
+		/*
+		 * It is possible that schedule occurred. We must check
+		 * whether the key to search is still in the tree rooted
+		 * from the current buffer. If not then repeat search
+		 * from the root.
+		 */
 		if (fs_changed(fs_gen, sb) &&
 		    (!B_IS_IN_TREE(bh) ||
 		     B_LEVEL(bh) != expected_level ||
@@ -689,8 +694,10 @@
 				      sbk_restarted[expected_level - 1]);
 			pathrelse(search_path);
 
-			/* Get the root block number so that we can repeat the search
-			   starting from the root. */
+			/*
+			 * Get the root block number so that we can
+			 * repeat the search starting from the root.
+			 */
 			block_number = SB_ROOT_BLOCK(sb);
 			expected_level = -1;
 			right_neighbor_of_leaf_node = 0;
@@ -699,9 +706,11 @@
 			continue;
 		}
 
-		/* only check that the key is in the buffer if key is not
-		   equal to the MAX_KEY. Latter case is only possible in
-		   "finish_unfinished()" processing during mount. */
+		/*
+		 * only check that the key is in the buffer if key is not
+		 * equal to the MAX_KEY. Latter case is only possible in
+		 * "finish_unfinished()" processing during mount.
+		 */
 		RFALSE(comp_keys(&MAX_KEY, key) &&
 		       !key_in_buffer(search_path, key, sb),
 		       "PAP-5130: key is not in the buffer");
@@ -713,8 +722,10 @@
 		}
 #endif
 
-		// make sure, that the node contents look like a node of
-		// certain level
+		/*
+		 * make sure, that the node contents look like a node of
+		 * certain level
+		 */
 		if (!is_tree_node(bh, expected_level)) {
 			reiserfs_error(sb, "vs-5150",
 				       "invalid format found in block %ld. "
@@ -732,32 +743,42 @@
 		       "vs-5152: tree level (%d) is less than stop level (%d)",
 		       node_level, stop_level);
 
-		retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
+		retval = bin_search(key, item_head(bh, 0),
 				      B_NR_ITEMS(bh),
 				      (node_level ==
 				       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
 				      KEY_SIZE,
-				      &(last_element->pe_position));
+				      &last_element->pe_position);
 		if (node_level == stop_level) {
 			return retval;
 		}
 
 		/* we are not in the stop level */
+		/*
+		 * item has been found, so we choose the pointer which
+		 * is to the right of the found one
+		 */
 		if (retval == ITEM_FOUND)
-			/* item has been found, so we choose the pointer which is to the right of the found one */
 			last_element->pe_position++;
 
-		/* if item was not found we choose the position which is to
-		   the left of the found item. This requires no code,
-		   bin_search did it already. */
+		/*
+		 * if item was not found we choose the position which is to
+		 * the left of the found item. This requires no code,
+		 * bin_search did it already.
+		 */
 
-		/* So we have chosen a position in the current node which is
-		   an internal node.  Now we calculate child block number by
-		   position in the node. */
+		/*
+		 * So we have chosen a position in the current node which is
+		 * an internal node.  Now we calculate child block number by
+		 * position in the node.
+		 */
 		block_number =
 		    B_N_CHILD_NUM(bh, last_element->pe_position);
 
-		/* if we are going to read leaf nodes, try for read ahead as well */
+		/*
+		 * if we are going to read leaf nodes, try for read
+		 * ahead as well
+		 */
 		if ((search_path->reada & PATH_READA) &&
 		    node_level == DISK_LEAF_NODE_LEVEL + 1) {
 			int pos = last_element->pe_position;
@@ -779,7 +800,7 @@
 				/*
 				 * check to make sure we're in the same object
 				 */
-				le_key = B_N_PDELIM_KEY(bh, pos);
+				le_key = internal_key(bh, pos);
 				if (le32_to_cpu(le_key->k_objectid) !=
 				    key->on_disk_key.k_objectid) {
 					break;
@@ -789,26 +810,28 @@
 	}
 }
 
-/* Form the path to an item and position in this item which contains
-   file byte defined by key. If there is no such item
-   corresponding to the key, we point the path to the item with
-   maximal key less than key, and *pos_in_item is set to one
-   past the last entry/byte in the item.  If searching for entry in a
-   directory item, and it is not found, *pos_in_item is set to one
-   entry more than the entry with maximal key which is less than the
-   sought key.
-
-   Note that if there is no entry in this same node which is one more,
-   then we point to an imaginary entry.  for direct items, the
-   position is in units of bytes, for indirect items the position is
-   in units of blocknr entries, for directory items the position is in
-   units of directory entries.  */
-
+/*
+ * Form the path to an item and position in this item which contains
+ * file byte defined by key. If there is no such item
+ * corresponding to the key, we point the path to the item with
+ * maximal key less than key, and *pos_in_item is set to one
+ * past the last entry/byte in the item.  If searching for entry in a
+ * directory item, and it is not found, *pos_in_item is set to one
+ * entry more than the entry with maximal key which is less than the
+ * sought key.
+ *
+ * Note that if there is no entry in this same node which is one more,
+ * then we point to an imaginary entry.  for direct items, the
+ * position is in units of bytes, for indirect items the position is
+ * in units of blocknr entries, for directory items the position is in
+ * units of directory entries.
+ */
 /* The function is NOT SCHEDULE-SAFE! */
-int search_for_position_by_key(struct super_block *sb,	/* Pointer to the super block.          */
-			       const struct cpu_key *p_cpu_key,	/* Key to search (cpu variable)         */
-			       struct treepath *search_path	/* Filled up by this function.          */
-    )
+int search_for_position_by_key(struct super_block *sb,
+			       /* Key to search (cpu variable) */
+			       const struct cpu_key *p_cpu_key,
+			       /* Filled up by this function. */
+			       struct treepath *search_path)
 {
 	struct item_head *p_le_ih;	/* pointer to on-disk structure */
 	int blk_size;
@@ -830,7 +853,7 @@
 	if (retval == ITEM_FOUND) {
 
 		RFALSE(!ih_item_len
-		       (B_N_PITEM_HEAD
+		       (item_head
 			(PATH_PLAST_BUFFER(search_path),
 			 PATH_LAST_POSITION(search_path))),
 		       "PAP-5165: item length equals zero");
@@ -844,14 +867,14 @@
 
 	/* Item is not found. Set path to the previous item. */
 	p_le_ih =
-	    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
+	    item_head(PATH_PLAST_BUFFER(search_path),
 			   --PATH_LAST_POSITION(search_path));
 	blk_size = sb->s_blocksize;
 
-	if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
+	if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
 		return FILE_NOT_FOUND;
-	}
-	// FIXME: quite ugly this far
+
+	/* FIXME: quite ugly this far */
 
 	item_offset = le_ih_k_offset(p_le_ih);
 	offset = cpu_key_k_offset(p_cpu_key);
@@ -866,8 +889,10 @@
 		return POSITION_FOUND;
 	}
 
-	/* Needed byte is not contained in the item pointed to by the
-	   path. Set pos_in_item out of the item. */
+	/*
+	 * Needed byte is not contained in the item pointed to by the
+	 * path. Set pos_in_item out of the item.
+	 */
 	if (is_indirect_le_ih(p_le_ih))
 		pos_in_item(search_path) =
 		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
@@ -892,19 +917,17 @@
 		return 1;
 
 	/* we need only to know, whether it is the same item */
-	ih = get_ih(path);
+	ih = tp_item_head(path);
 	return memcmp(stored_ih, ih, IH_SIZE);
 }
 
-/* unformatted nodes are not logged anymore, ever.  This is safe
-** now
-*/
+/* unformatted nodes are not logged anymore, ever.  This is safe now */
 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
 
-// block can not be forgotten as it is in I/O or held by someone
+/* block can not be forgotten as it is in I/O or held by someone */
 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
 
-// prepare for delete or cut of direct item
+/* prepare for delete or cut of direct item */
 static inline int prepare_for_direct_item(struct treepath *path,
 					  struct item_head *le_ih,
 					  struct inode *inode,
@@ -917,9 +940,8 @@
 		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
 		return M_DELETE;
 	}
-	// new file gets truncated
+	/* new file gets truncated */
 	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
-		//
 		round_len = ROUND_UP(new_file_length);
 		/* this was new_file_length < le_ih ... */
 		if (round_len < le_ih_k_offset(le_ih)) {
@@ -933,12 +955,13 @@
 		return M_CUT;	/* Cut from this item. */
 	}
 
-	// old file: items may have any length
+	/* old file: items may have any length */
 
 	if (new_file_length < le_ih_k_offset(le_ih)) {
 		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
 		return M_DELETE;	/* Delete this item. */
 	}
+
 	/* Calculate first position and size for cutting from item. */
 	*cut_size = -(ih_item_len(le_ih) -
 		      (pos_in_item(path) =
@@ -957,12 +980,15 @@
 		RFALSE(ih_entry_count(le_ih) != 2,
 		       "PAP-5220: incorrect empty directory item (%h)", le_ih);
 		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
-		return M_DELETE;	/* Delete the directory item containing "." and ".." entry. */
+		/* Delete the directory item containing "." and ".." entry. */
+		return M_DELETE;
 	}
 
 	if (ih_entry_count(le_ih) == 1) {
-		/* Delete the directory item such as there is one record only
-		   in this item */
+		/*
+		 * Delete the directory item such as there is one record only
+		 * in this item
+		 */
 		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
 		return M_DELETE;
 	}
@@ -976,18 +1002,34 @@
 
 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
 
-/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
-    If the path points to an indirect item, remove some number of its unformatted nodes.
-    In case of file truncate calculate whether this item must be deleted/truncated or last
-    unformatted node of this item will be converted to a direct item.
-    This function returns a determination of what balance mode the calling function should employ. */
-static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed,	/* Number of unformatted nodes which were removed
-																						   from end of the file. */
-				      int *cut_size, unsigned long long new_file_length	/* MAX_KEY_OFFSET in case of delete. */
+/*
+ * If the path points to a directory or direct item, calculate mode
+ * and the size cut, for balance.
+ * If the path points to an indirect item, remove some number of its
+ * unformatted nodes.
+ * In case of file truncate calculate whether this item must be
+ * deleted/truncated or last unformatted node of this item will be
+ * converted to a direct item.
+ * This function returns a determination of what balance mode the
+ * calling function should employ.
+ */
+static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
+				      struct inode *inode,
+				      struct treepath *path,
+				      const struct cpu_key *item_key,
+				      /*
+				       * Number of unformatted nodes
+				       * which were removed from end
+				       * of the file.
+				       */
+				      int *removed,
+				      int *cut_size,
+				      /* MAX_KEY_OFFSET in case of delete. */
+				      unsigned long long new_file_length
     )
 {
 	struct super_block *sb = inode->i_sb;
-	struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
+	struct item_head *p_le_ih = tp_item_head(path);
 	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
 
 	BUG_ON(!th->t_trans_id);
@@ -1023,8 +1065,10 @@
 	    int pos = 0;
 
 	    if ( new_file_length == max_reiserfs_offset (inode) ) {
-		/* prepare_for_delete_or_cut() is called by
-		 * reiserfs_delete_item() */
+		/*
+		 * prepare_for_delete_or_cut() is called by
+		 * reiserfs_delete_item()
+		 */
 		new_file_length = 0;
 		delete = 1;
 	    }
@@ -1033,27 +1077,30 @@
 		need_re_search = 0;
 		*cut_size = 0;
 		bh = PATH_PLAST_BUFFER(path);
-		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+		copy_item_head(&s_ih, tp_item_head(path));
 		pos = I_UNFM_NUM(&s_ih);
 
 		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
 		    __le32 *unfm;
 		    __u32 block;
 
-		    /* Each unformatted block deletion may involve one additional
-		     * bitmap block into the transaction, thereby the initial
-		     * journal space reservation might not be enough. */
+		    /*
+		     * Each unformatted block deletion may involve
+		     * one additional bitmap block into the transaction,
+		     * thereby the initial journal space reservation
+		     * might not be enough.
+		     */
 		    if (!delete && (*cut_size) != 0 &&
 			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
 			break;
 
-		    unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
+		    unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
 		    block = get_block_num(unfm, 0);
 
 		    if (block != 0) {
 			reiserfs_prepare_for_journal(sb, bh, 1);
 			put_block_num(unfm, 0, 0);
-			journal_mark_dirty(th, sb, bh);
+			journal_mark_dirty(th, bh);
 			reiserfs_free_block(th, inode, block, 1);
 		    }
 
@@ -1074,17 +1121,21 @@
 			break;
 		    }
 		}
-		/* a trick.  If the buffer has been logged, this will do nothing.  If
-		** we've broken the loop without logging it, it will restore the
-		** buffer */
+		/*
+		 * a trick.  If the buffer has been logged, this will
+		 * do nothing.  If we've broken the loop without logging
+		 * it, it will restore the buffer
+		 */
 		reiserfs_restore_prepared_buffer(sb, bh);
 	    } while (need_re_search &&
 		     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
 	    pos_in_item(path) = pos * UNFM_P_SIZE;
 
 	    if (*cut_size == 0) {
-		/* Nothing were cut. maybe convert last unformatted node to the
-		 * direct item? */
+		/*
+		 * Nothing was cut. maybe convert last unformatted node to the
+		 * direct item?
+		 */
 		result = M_CONVERT;
 	    }
 	    return result;
@@ -1095,7 +1146,7 @@
 static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
 {
 	int del_size;
-	struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
+	struct item_head *p_le_ih = tp_item_head(tb->tb_path);
 
 	if (is_statdata_le_ih(p_le_ih))
 		return 0;
@@ -1104,9 +1155,11 @@
 	    (mode ==
 	     M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
 	if (is_direntry_le_ih(p_le_ih)) {
-		/* return EMPTY_DIR_SIZE; We delete emty directoris only.
-		 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
-		 * empty size.  ick. FIXME, is this right? */
+		/*
+		 * return EMPTY_DIR_SIZE; We delete emty directories only.
+		 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
+		 * different empty size.  ick. FIXME, is this right?
+		 */
 		return del_size;
 	}
 
@@ -1169,7 +1222,8 @@
 }
 #endif
 
-/* Delete object item.
+/*
+ * Delete object item.
  * th       - active transaction handle
  * path     - path to the deleted item
  * item_key - key to search for the deleted item
@@ -1212,7 +1266,7 @@
 
 		RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
 
-		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+		copy_item_head(&s_ih, tp_item_head(path));
 		s_del_balance.insert_size[0] = del_size;
 
 		ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
@@ -1221,7 +1275,7 @@
 
 		PROC_INFO_INC(sb, delete_item_restarted);
 
-		// file system changed, repeat search
+		/* file system changed, repeat search */
 		ret_value =
 		    search_for_position_by_key(sb, item_key, path);
 		if (ret_value == IO_ERROR)
@@ -1238,16 +1292,18 @@
 		unfix_nodes(&s_del_balance);
 		return 0;
 	}
-	// reiserfs_delete_item returns item length when success
+
+	/* reiserfs_delete_item returns item length when success */
 	ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
-	q_ih = get_ih(path);
+	q_ih = tp_item_head(path);
 	quota_cut_bytes = ih_item_len(q_ih);
 
-	/* hack so the quota code doesn't have to guess if the file
-	 ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
-	 ** We test the offset because the tail might have been
-	 ** split into multiple items, and we only want to decrement for
-	 ** the unfm node once
+	/*
+	 * hack so the quota code doesn't have to guess if the file has a
+	 * tail.  On tail insert, we allocate quota for 1 unformatted node.
+	 * We test the offset because the tail might have been
+	 * split into multiple items, and we only want to decrement for
+	 * the unfm node once
 	 */
 	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
 		if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
@@ -1261,33 +1317,38 @@
 		int off;
 		char *data;
 
-		/* We are in direct2indirect conversion, so move tail contents
-		   to the unformatted node */
-		/* note, we do the copy before preparing the buffer because we
-		 ** don't care about the contents of the unformatted node yet.
-		 ** the only thing we really care about is the direct item's data
-		 ** is in the unformatted node.
-		 **
-		 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
-		 ** the unformatted node, which might schedule, meaning we'd have to
-		 ** loop all the way back up to the start of the while loop.
-		 **
-		 ** The unformatted node must be dirtied later on.  We can't be
-		 ** sure here if the entire tail has been deleted yet.
-		 **
-		 ** un_bh is from the page cache (all unformatted nodes are
-		 ** from the page cache) and might be a highmem page.  So, we
-		 ** can't use un_bh->b_data.
-		 ** -clm
+		/*
+		 * We are in direct2indirect conversion, so move tail contents
+		 * to the unformatted node
+		 */
+		/*
+		 * note, we do the copy before preparing the buffer because we
+		 * don't care about the contents of the unformatted node yet.
+		 * the only thing we really care about is the direct item's
+		 * data is in the unformatted node.
+		 *
+		 * Otherwise, we would have to call
+		 * reiserfs_prepare_for_journal on the unformatted node,
+		 * which might schedule, meaning we'd have to loop all the
+		 * way back up to the start of the while loop.
+		 *
+		 * The unformatted node must be dirtied later on.  We can't be
+		 * sure here if the entire tail has been deleted yet.
+		 *
+		 * un_bh is from the page cache (all unformatted nodes are
+		 * from the page cache) and might be a highmem page.  So, we
+		 * can't use un_bh->b_data.
+		 * -clm
 		 */
 
 		data = kmap_atomic(un_bh->b_page);
 		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
 		memcpy(data + off,
-		       B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
+		       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
 		       ret_value);
 		kunmap_atomic(data);
 	}
+
 	/* Perform balancing after all resources have been collected at once. */
 	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
 
@@ -1304,20 +1365,21 @@
 	return ret_value;
 }
 
-/* Summary Of Mechanisms For Handling Collisions Between Processes:
-
- deletion of the body of the object is performed by iput(), with the
- result that if multiple processes are operating on a file, the
- deletion of the body of the file is deferred until the last process
- that has an open inode performs its iput().
-
- writes and truncates are protected from collisions by use of
- semaphores.
-
- creates, linking, and mknod are protected from collisions with other
- processes by making the reiserfs_add_entry() the last step in the
- creation, and then rolling back all changes if there was a collision.
- - Hans
+/*
+ * Summary Of Mechanisms For Handling Collisions Between Processes:
+ *
+ *  deletion of the body of the object is performed by iput(), with the
+ *  result that if multiple processes are operating on a file, the
+ *  deletion of the body of the file is deferred until the last process
+ *  that has an open inode performs its iput().
+ *
+ *  writes and truncates are protected from collisions by use of
+ *  semaphores.
+ *
+ *  creates, linking, and mknod are protected from collisions with other
+ *  processes by making the reiserfs_add_entry() the last step in the
+ *  creation, and then rolling back all changes if there was a collision.
+ *  - Hans
 */
 
 /* this deletes item which never gets split */
@@ -1347,7 +1409,11 @@
 		}
 		if (retval != ITEM_FOUND) {
 			pathrelse(&path);
-			// No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
+			/*
+			 * No need for a warning, if there is just no free
+			 * space to insert '..' item into the
+			 * newly-created subdir
+			 */
 			if (!
 			    ((unsigned long long)
 			     GET_HASH_VALUE(le_key_k_offset
@@ -1362,11 +1428,11 @@
 		}
 		if (!tb_init) {
 			tb_init = 1;
-			item_len = ih_item_len(PATH_PITEM_HEAD(&path));
+			item_len = ih_item_len(tp_item_head(&path));
 			init_tb_struct(th, &tb, th->t_super, &path,
 				       -(IH_SIZE + item_len));
 		}
-		quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
+		quota_cut_bytes = ih_item_len(tp_item_head(&path));
 
 		retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
 		if (retval == REPEAT_SEARCH) {
@@ -1376,7 +1442,11 @@
 
 		if (retval == CARRY_ON) {
 			do_balance(&tb, NULL, NULL, M_DELETE);
-			if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
+			/*
+			 * Should we count quota for item? (we don't
+			 * count quotas for save-links)
+			 */
+			if (inode) {
 				int depth;
 #ifdef REISERQUOTA_DEBUG
 				reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
@@ -1391,7 +1461,8 @@
 			}
 			break;
 		}
-		// IO_ERROR, NO_DISK_SPACE, etc
+
+		/* IO_ERROR, NO_DISK_SPACE, etc */
 		reiserfs_warning(th->t_super, "vs-5360",
 				 "could not delete %K due to fix_nodes failure",
 				 &cpu_key);
@@ -1447,11 +1518,13 @@
 			do {
 				next = bh->b_this_page;
 
-				/* we want to unmap the buffers that contain the tail, and
-				 ** all the buffers after it (since the tail must be at the
-				 ** end of the file).  We don't want to unmap file data
-				 ** before the tail, since it might be dirty and waiting to
-				 ** reach disk
+				/*
+				 * we want to unmap the buffers that contain
+				 * the tail, and all the buffers after it
+				 * (since the tail must be at the end of the
+				 * file).  We don't want to unmap file data
+				 * before the tail, since it might be dirty
+				 * and waiting to reach disk
 				 */
 				cur_index += bh->b_size;
 				if (cur_index > tail_index) {
@@ -1476,9 +1549,10 @@
 	BUG_ON(!th->t_trans_id);
 	BUG_ON(new_file_size != inode->i_size);
 
-	/* the page being sent in could be NULL if there was an i/o error
-	 ** reading in the last block.  The user will hit problems trying to
-	 ** read the file, but for now we just skip the indirect2direct
+	/*
+	 * the page being sent in could be NULL if there was an i/o error
+	 * reading in the last block.  The user will hit problems trying to
+	 * read the file, but for now we just skip the indirect2direct
 	 */
 	if (atomic_read(&inode->i_count) > 1 ||
 	    !tail_has_to_be_packed(inode) ||
@@ -1490,17 +1564,18 @@
 		pathrelse(path);
 		return cut_bytes;
 	}
+
 	/* Perform the conversion to a direct_item. */
-	/* return indirect_to_direct(inode, path, item_key,
-				  new_file_size, mode); */
 	return indirect2direct(th, inode, page, path, item_key,
 			       new_file_size, mode);
 }
 
-/* we did indirect_to_direct conversion. And we have inserted direct
-   item successesfully, but there were no disk space to cut unfm
-   pointer being converted. Therefore we have to delete inserted
-   direct item(s) */
+/*
+ * we did indirect_to_direct conversion. And we have inserted direct
+ * item successesfully, but there were no disk space to cut unfm
+ * pointer being converted. Therefore we have to delete inserted
+ * direct item(s)
+ */
 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
 					 struct inode *inode, struct treepath *path)
 {
@@ -1509,7 +1584,7 @@
 	int removed;
 	BUG_ON(!th->t_trans_id);
 
-	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);	// !!!!
+	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
 	tail_key.key_length = 4;
 
 	tail_len =
@@ -1521,7 +1596,7 @@
 			reiserfs_panic(inode->i_sb, "vs-5615",
 				       "found invalid item");
 		RFALSE(path->pos_in_item !=
-		       ih_item_len(PATH_PITEM_HEAD(path)) - 1,
+		       ih_item_len(tp_item_head(path)) - 1,
 		       "vs-5616: appended bytes found");
 		PATH_LAST_POSITION(path)--;
 
@@ -1539,7 +1614,6 @@
 	reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
 			 "conversion has been rolled back due to "
 			 "lack of disk space");
-	//mark_file_without_tail (inode);
 	mark_inode_dirty(inode);
 }
 
@@ -1551,15 +1625,18 @@
 			   struct page *page, loff_t new_file_size)
 {
 	struct super_block *sb = inode->i_sb;
-	/* Every function which is going to call do_balance must first
-	   create a tree_balance structure.  Then it must fill up this
-	   structure by using the init_tb_struct and fix_nodes functions.
-	   After that we can make tree balancing. */
+	/*
+	 * Every function which is going to call do_balance must first
+	 * create a tree_balance structure.  Then it must fill up this
+	 * structure by using the init_tb_struct and fix_nodes functions.
+	 * After that we can make tree balancing.
+	 */
 	struct tree_balance s_cut_balance;
 	struct item_head *p_le_ih;
-	int cut_size = 0,	/* Amount to be cut. */
-	    ret_value = CARRY_ON, removed = 0,	/* Number of the removed unformatted nodes. */
-	    is_inode_locked = 0;
+	int cut_size = 0;	/* Amount to be cut. */
+	int ret_value = CARRY_ON;
+	int removed = 0;	/* Number of the removed unformatted nodes. */
+	int is_inode_locked = 0;
 	char mode;		/* Mode of the balance. */
 	int retval2 = -1;
 	int quota_cut_bytes;
@@ -1571,21 +1648,27 @@
 	init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
 		       cut_size);
 
-	/* Repeat this loop until we either cut the item without needing
-	   to balance, or we fix_nodes without schedule occurring */
+	/*
+	 * Repeat this loop until we either cut the item without needing
+	 * to balance, or we fix_nodes without schedule occurring
+	 */
 	while (1) {
-		/* Determine the balance mode, position of the first byte to
-		   be cut, and size to be cut.  In case of the indirect item
-		   free unformatted nodes which are pointed to by the cut
-		   pointers. */
+		/*
+		 * Determine the balance mode, position of the first byte to
+		 * be cut, and size to be cut.  In case of the indirect item
+		 * free unformatted nodes which are pointed to by the cut
+		 * pointers.
+		 */
 
 		mode =
 		    prepare_for_delete_or_cut(th, inode, path,
 					      item_key, &removed,
 					      &cut_size, new_file_size);
 		if (mode == M_CONVERT) {
-			/* convert last unformatted node to direct item or leave
-			   tail in the unformatted node */
+			/*
+			 * convert last unformatted node to direct item or
+			 * leave tail in the unformatted node
+			 */
 			RFALSE(ret_value != CARRY_ON,
 			       "PAP-5570: can not convert twice");
 
@@ -1599,15 +1682,20 @@
 
 			is_inode_locked = 1;
 
-			/* removing of last unformatted node will change value we
-			   have to return to truncate. Save it */
+			/*
+			 * removing of last unformatted node will
+			 * change value we have to return to truncate.
+			 * Save it
+			 */
 			retval2 = ret_value;
-			/*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
 
-			/* So, we have performed the first part of the conversion:
-			   inserting the new direct item.  Now we are removing the
-			   last unformatted node pointer. Set key to search for
-			   it. */
+			/*
+			 * So, we have performed the first part of the
+			 * conversion:
+			 * inserting the new direct item.  Now we are
+			 * removing the last unformatted node pointer.
+			 * Set key to search for it.
+			 */
 			set_cpu_key_k_type(item_key, TYPE_INDIRECT);
 			item_key->key_length = 4;
 			new_file_size -=
@@ -1650,11 +1738,13 @@
 		return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
 	}			/* while */
 
-	// check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
+	/* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
 	if (ret_value != CARRY_ON) {
 		if (is_inode_locked) {
-			// FIXME: this seems to be not needed: we are always able
-			// to cut item
+			/*
+			 * FIXME: this seems to be not needed: we are always
+			 * able to cut item
+			 */
 			indirect_to_direct_roll_back(th, inode, path);
 		}
 		if (ret_value == NO_DISK_SPACE)
@@ -1671,22 +1761,23 @@
 	/* Calculate number of bytes that need to be cut from the item. */
 	quota_cut_bytes =
 	    (mode ==
-	     M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
+	     M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
 	    insert_size[0];
 	if (retval2 == -1)
 		ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
 	else
 		ret_value = retval2;
 
-	/* For direct items, we only change the quota when deleting the last
-	 ** item.
+	/*
+	 * For direct items, we only change the quota when deleting the last
+	 * item.
 	 */
-	p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
+	p_le_ih = tp_item_head(s_cut_balance.tb_path);
 	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
 		if (mode == M_DELETE &&
 		    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
 		    1) {
-			// FIXME: this is to keep 3.5 happy
+			/* FIXME: this is to keep 3.5 happy */
 			REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
 			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
 		} else {
@@ -1696,10 +1787,12 @@
 #ifdef CONFIG_REISERFS_CHECK
 	if (is_inode_locked) {
 		struct item_head *le_ih =
-		    PATH_PITEM_HEAD(s_cut_balance.tb_path);
-		/* we are going to complete indirect2direct conversion. Make
-		   sure, that we exactly remove last unformatted node pointer
-		   of the item */
+		    tp_item_head(s_cut_balance.tb_path);
+		/*
+		 * we are going to complete indirect2direct conversion. Make
+		 * sure, that we exactly remove last unformatted node pointer
+		 * of the item
+		 */
 		if (!is_indirect_le_ih(le_ih))
 			reiserfs_panic(sb, "vs-5652",
 				       "item must be indirect %h", le_ih);
@@ -1717,17 +1810,20 @@
 				       "(CUT, insert_size==%d)",
 				       le_ih, s_cut_balance.insert_size[0]);
 		}
-		/* it would be useful to make sure, that right neighboring
-		   item is direct item of this file */
+		/*
+		 * it would be useful to make sure, that right neighboring
+		 * item is direct item of this file
+		 */
 	}
 #endif
 
 	do_balance(&s_cut_balance, NULL, NULL, mode);
 	if (is_inode_locked) {
-		/* we've done an indirect->direct conversion.  when the data block
-		 ** was freed, it was removed from the list of blocks that must
-		 ** be flushed before the transaction commits, make sure to
-		 ** unmap and invalidate it
+		/*
+		 * we've done an indirect->direct conversion.  when the
+		 * data block was freed, it was removed from the list of
+		 * blocks that must be flushed before the transaction
+		 * commits, make sure to unmap and invalidate it
 		 */
 		unmap_buffers(page, tail_pos);
 		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
@@ -1758,20 +1854,25 @@
 	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
 }
 
-/* Truncate file to the new size. Note, this must be called with a transaction
-   already started */
+/*
+ * Truncate file to the new size. Note, this must be called with a
+ * transaction already started
+ */
 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
-			  struct inode *inode,	/* ->i_size contains new size */
+			 struct inode *inode,	/* ->i_size contains new size */
 			 struct page *page,	/* up to date for last block */
-			 int update_timestamps	/* when it is called by
-						   file_release to convert
-						   the tail - no timestamps
-						   should be updated */
+			 /*
+			  * when it is called by file_release to convert
+			  * the tail - no timestamps should be updated
+			  */
+			 int update_timestamps
     )
 {
 	INITIALIZE_PATH(s_search_path);	/* Path to the current object item. */
 	struct item_head *p_le_ih;	/* Pointer to an item header. */
-	struct cpu_key s_item_key;	/* Key to search for a previous file item. */
+
+	/* Key to search for a previous file item. */
+	struct cpu_key s_item_key;
 	loff_t file_size,	/* Old file size. */
 	 new_file_size;	/* New file size. */
 	int deleted;		/* Number of deleted or truncated bytes. */
@@ -1784,8 +1885,8 @@
 	     || S_ISLNK(inode->i_mode)))
 		return 0;
 
+	/* deletion of directory - no need to update timestamps */
 	if (S_ISDIR(inode->i_mode)) {
-		// deletion of directory - no need to update timestamps
 		truncate_directory(th, inode);
 		return 0;
 	}
@@ -1793,7 +1894,7 @@
 	/* Get new file size. */
 	new_file_size = inode->i_size;
 
-	// FIXME: note, that key type is unimportant here
+	/* FIXME: note, that key type is unimportant here */
 	make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
 		     TYPE_DIRECT, 3);
 
@@ -1819,7 +1920,7 @@
 	s_search_path.pos_in_item--;
 
 	/* Get real file size (total length of all file items) */
-	p_le_ih = PATH_PITEM_HEAD(&s_search_path);
+	p_le_ih = tp_item_head(&s_search_path);
 	if (is_statdata_le_ih(p_le_ih))
 		file_size = 0;
 	else {
@@ -1827,9 +1928,11 @@
 		int bytes =
 		    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
 
-		/* this may mismatch with real file size: if last direct item
-		   had no padding zeros and last unformatted node had no free
-		   space, this file would have this file size */
+		/*
+		 * this may mismatch with real file size: if last direct item
+		 * had no padding zeros and last unformatted node had no free
+		 * space, this file would have this file size
+		 */
 		file_size = offset + bytes - 1;
 	}
 	/*
@@ -1867,18 +1970,20 @@
 
 		set_cpu_key_k_offset(&s_item_key, file_size);
 
-		/* While there are bytes to truncate and previous file item is presented in the tree. */
+		/*
+		 * While there are bytes to truncate and previous
+		 * file item is presented in the tree.
+		 */
 
 		/*
-		 ** This loop could take a really long time, and could log
-		 ** many more blocks than a transaction can hold.  So, we do a polite
-		 ** journal end here, and if the transaction needs ending, we make
-		 ** sure the file is consistent before ending the current trans
-		 ** and starting a new one
+		 * This loop could take a really long time, and could log
+		 * many more blocks than a transaction can hold.  So, we do
+		 * a polite journal end here, and if the transaction needs
+		 * ending, we make sure the file is consistent before ending
+		 * the current trans and starting a new one
 		 */
 		if (journal_transaction_should_end(th, 0) ||
 		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
-			int orig_len_alloc = th->t_blocks_allocated;
 			pathrelse(&s_search_path);
 
 			if (update_timestamps) {
@@ -1887,7 +1992,7 @@
 			}
 			reiserfs_update_sd(th, inode);
 
-			err = journal_end(th, inode->i_sb, orig_len_alloc);
+			err = journal_end(th);
 			if (err)
 				goto out;
 			err = journal_begin(th, inode->i_sb,
@@ -1904,25 +2009,25 @@
 	       "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
 	       new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
 
-      update_and_out:
+update_and_out:
 	if (update_timestamps) {
-		// this is truncate, not file closing
+		/* this is truncate, not file closing */
 		inode->i_mtime = CURRENT_TIME_SEC;
 		inode->i_ctime = CURRENT_TIME_SEC;
 	}
 	reiserfs_update_sd(th, inode);
 
-      out:
+out:
 	pathrelse(&s_search_path);
 	return err;
 }
 
 #ifdef CONFIG_REISERFS_CHECK
-// this makes sure, that we __append__, not overwrite or add holes
+/* this makes sure, that we __append__, not overwrite or add holes */
 static void check_research_for_paste(struct treepath *path,
 				     const struct cpu_key *key)
 {
-	struct item_head *found_ih = get_ih(path);
+	struct item_head *found_ih = tp_item_head(path);
 
 	if (is_direct_le_ih(found_ih)) {
 		if (le_ih_k_offset(found_ih) +
@@ -1952,13 +2057,22 @@
 }
 #endif				/* config reiserfs check */
 
-/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
-int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path,	/* Path to the pasted item.	  */
-			     const struct cpu_key *key,	/* Key to search for the needed item. */
-			     struct inode *inode,	/* Inode item belongs to */
-			     const char *body,	/* Pointer to the bytes to paste.    */
+/*
+ * Paste bytes to the existing item.
+ * Returns bytes number pasted into the item.
+ */
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
+			     /* Path to the pasted item. */
+			     struct treepath *search_path,
+			     /* Key to search for the needed item. */
+			     const struct cpu_key *key,
+			     /* Inode item belongs to */
+			     struct inode *inode,
+			     /* Pointer to the bytes to paste. */
+			     const char *body,
+			     /* Size of pasted bytes. */
 			     int pasted_size)
-{				/* Size of pasted bytes.             */
+{
 	struct super_block *sb = inode->i_sb;
 	struct tree_balance s_paste_balance;
 	int retval;
@@ -1973,7 +2087,7 @@
 	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
 		       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
 		       pasted_size, inode->i_uid,
-		       key2type(&(key->on_disk_key)));
+		       key2type(&key->on_disk_key));
 #endif
 
 	depth = reiserfs_write_unlock_nested(sb);
@@ -1997,7 +2111,7 @@
 	while ((retval =
 		fix_nodes(M_PASTE, &s_paste_balance, NULL,
 			  body)) == REPEAT_SEARCH) {
-	      search_again:
+search_again:
 		/* file system changed while we were in the fix_nodes */
 		PROC_INFO_INC(th->t_super, paste_into_item_restarted);
 		retval =
@@ -2019,21 +2133,23 @@
 #endif
 	}
 
-	/* Perform balancing after all resources are collected by fix_nodes, and
-	   accessing them will not risk triggering schedule. */
+	/*
+	 * Perform balancing after all resources are collected by fix_nodes,
+	 * and accessing them will not risk triggering schedule.
+	 */
 	if (retval == CARRY_ON) {
 		do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
 		return 0;
 	}
 	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
-      error_out:
+error_out:
 	/* this also releases the path */
 	unfix_nodes(&s_paste_balance);
 #ifdef REISERQUOTA_DEBUG
 	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
 		       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
 		       pasted_size, inode->i_uid,
-		       key2type(&(key->on_disk_key)));
+		       key2type(&key->on_disk_key));
 #endif
 	depth = reiserfs_write_unlock_nested(sb);
 	dquot_free_space_nodirty(inode, pasted_size);
@@ -2041,7 +2157,8 @@
 	return retval;
 }
 
-/* Insert new item into the buffer at the path.
+/*
+ * Insert new item into the buffer at the path.
  * th   - active transaction handle
  * path - path to the inserted item
  * ih   - pointer to the item header to insert
@@ -2064,8 +2181,10 @@
 		fs_gen = get_generation(inode->i_sb);
 		quota_bytes = ih_item_len(ih);
 
-		/* hack so the quota code doesn't have to guess if the file has
-		 ** a tail, links are always tails, so there's no guessing needed
+		/*
+		 * hack so the quota code doesn't have to guess
+		 * if the file has a tail, links are always tails,
+		 * so there's no guessing needed
 		 */
 		if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
 			quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
@@ -2074,8 +2193,10 @@
 			       "reiserquota insert_item(): allocating %u id=%u type=%c",
 			       quota_bytes, inode->i_uid, head2type(ih));
 #endif
-		/* We can't dirty inode here. It would be immediately written but
-		 * appropriate stat item isn't inserted yet... */
+		/*
+		 * We can't dirty inode here. It would be immediately
+		 * written but appropriate stat item isn't inserted yet...
+		 */
 		depth = reiserfs_write_unlock_nested(inode->i_sb);
 		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
 		reiserfs_write_lock_nested(inode->i_sb, depth);
@@ -2089,7 +2210,10 @@
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	s_ins_balance.key = key->on_disk_key;
 #endif
-	/* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
+	/*
+	 * DQUOT_* can schedule, must check to be sure calling
+	 * fix_nodes is safe
+	 */
 	if (inode && fs_changed(fs_gen, inode->i_sb)) {
 		goto search_again;
 	}
@@ -2097,7 +2221,7 @@
 	while ((retval =
 		fix_nodes(M_INSERT, &s_ins_balance, ih,
 			  body)) == REPEAT_SEARCH) {
-	      search_again:
+search_again:
 		/* file system changed while we were in the fix_nodes */
 		PROC_INFO_INC(th->t_super, insert_item_restarted);
 		retval = search_item(th->t_super, key, path);
@@ -2121,7 +2245,7 @@
 	}
 
 	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
-      error_out:
+error_out:
 	/* also releases the path */
 	unfix_nodes(&s_ins_balance);
 #ifdef REISERQUOTA_DEBUG

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9fb2042..a392cef 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c

@@ -74,7 +74,7 @@
 	dquot_writeback_dquots(s, -1);
 	reiserfs_write_lock(s);
 	if (!journal_begin(&th, s, 1))
-		if (!journal_end_sync(&th, s, 1))
+		if (!journal_end_sync(&th))
 			reiserfs_flush_old_commits(s);
 	reiserfs_write_unlock(s);
 	return 0;
@@ -136,9 +136,9 @@
 		} else {
 			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 						     1);
-			journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+			journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
 			reiserfs_block_writes(&th);
-			journal_end_sync(&th, s, 1);
+			journal_end_sync(&th);
 		}
 	}
 	reiserfs_write_unlock(s);
@@ -153,13 +153,15 @@
 
 extern const struct in_core_key MAX_IN_CORE_KEY;
 
-/* this is used to delete "save link" when there are no items of a
-   file it points to. It can either happen if unlink is completed but
-   "save unlink" removal, or if file has both unlink and truncate
-   pending and as unlink completes first (because key of "save link"
-   protecting unlink is bigger that a key lf "save link" which
-   protects truncate), so there left no items to make truncate
-   completion on */
+/*
+ * this is used to delete "save link" when there are no items of a
+ * file it points to. It can either happen if unlink is completed but
+ * "save unlink" removal, or if file has both unlink and truncate
+ * pending and as unlink completes first (because key of "save link"
+ * protecting unlink is bigger that a key lf "save link" which
+ * protects truncate), so there left no items to make truncate
+ * completion on
+ */
 static int remove_save_link_only(struct super_block *s,
 				 struct reiserfs_key *key, int oid_free)
 {
@@ -176,7 +178,7 @@
 		/* removals are protected by direct items */
 		reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid));
 
-	return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT);
+	return journal_end(&th);
 }
 
 #ifdef CONFIG_QUOTA
@@ -258,7 +260,7 @@
 			break;
 		}
 		item_pos--;
-		ih = B_N_PITEM_HEAD(bh, item_pos);
+		ih = item_head(bh, item_pos);
 
 		if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID)
 			/* there are no "save" links anymore */
@@ -271,7 +273,7 @@
 			truncate = 0;
 
 		/* reiserfs_iget needs k_dirid and k_objectid only */
-		item = B_I_PITEM(bh, ih);
+		item = ih_item_body(bh, ih);
 		obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item);
 		obj_key.on_disk_key.k_objectid =
 		    le32_to_cpu(ih->ih_key.k_objectid);
@@ -282,8 +284,10 @@
 
 		inode = reiserfs_iget(s, &obj_key);
 		if (!inode) {
-			/* the unlink almost completed, it just did not manage to remove
-			   "save" link and release objectid */
+			/*
+			 * the unlink almost completed, it just did not
+			 * manage to remove "save" link and release objectid
+			 */
 			reiserfs_warning(s, "vs-2180", "iget failed for %K",
 					 &obj_key);
 			retval = remove_save_link_only(s, &save_link_key, 1);
@@ -303,10 +307,13 @@
 		reiserfs_write_lock_nested(inode->i_sb, depth);
 
 		if (truncate && S_ISDIR(inode->i_mode)) {
-			/* We got a truncate request for a dir which is impossible.
-			   The only imaginable way is to execute unfinished truncate request
-			   then boot into old kernel, remove the file and create dir with
-			   the same key. */
+			/*
+			 * We got a truncate request for a dir which
+			 * is impossible.  The only imaginable way is to
+			 * execute unfinished truncate request then boot
+			 * into old kernel, remove the file and create dir
+			 * with the same key.
+			 */
 			reiserfs_warning(s, "green-2101",
 					 "impossible truncate on a "
 					 "directory %k. Please report",
@@ -320,14 +327,16 @@
 		if (truncate) {
 			REISERFS_I(inode)->i_flags |=
 			    i_link_saved_truncate_mask;
-			/* not completed truncate found. New size was committed together
-			   with "save" link */
+			/*
+			 * not completed truncate found. New size was
+			 * committed together with "save" link
+			 */
 			reiserfs_info(s, "Truncating %k to %Ld ..",
 				      INODE_PKEY(inode), inode->i_size);
-			reiserfs_truncate_file(inode,
-					       0
-					       /*don't update modification time */
-					       );
+
+			/* don't update modification time */
+			reiserfs_truncate_file(inode, 0);
+
 			retval = remove_save_link(inode, truncate);
 		} else {
 			REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
@@ -373,10 +382,12 @@
 	return retval;
 }
 
-/* to protect file being unlinked from getting lost we "safe" link files
-   being unlinked. This link will be deleted in the same transaction with last
-   item of file. mounting the filesystem we scan all these links and remove
-   files which almost got lost */
+/*
+ * to protect file being unlinked from getting lost we "safe" link files
+ * being unlinked. This link will be deleted in the same transaction with last
+ * item of file. mounting the filesystem we scan all these links and remove
+ * files which almost got lost
+ */
 void add_save_link(struct reiserfs_transaction_handle *th,
 		   struct inode *inode, int truncate)
 {
@@ -495,7 +506,7 @@
 	} else
 		REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask;
 
-	return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
+	return journal_end(&th);
 }
 
 static void reiserfs_kill_sb(struct super_block *s)
@@ -530,19 +541,23 @@
 
 	reiserfs_write_lock(s);
 
-	/* change file system state to current state if it was mounted with read-write permissions */
+	/*
+	 * change file system state to current state if it was mounted
+	 * with read-write permissions
+	 */
 	if (!(s->s_flags & MS_RDONLY)) {
 		if (!journal_begin(&th, s, 10)) {
 			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 						     1);
 			set_sb_umount_state(SB_DISK_SUPER_BLOCK(s),
 					    REISERFS_SB(s)->s_mount_state);
-			journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+			journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
 		}
 	}
 
-	/* note, journal_release checks for readonly mount, and can decide not
-	 ** to do a journal_end
+	/*
+	 * note, journal_release checks for readonly mount, and can
+	 * decide not to do a journal_end
 	 */
 	journal_release(&th, s);
 
@@ -559,6 +574,7 @@
 
 	reiserfs_write_unlock(s);
 	mutex_destroy(&REISERFS_SB(s)->lock);
+	destroy_workqueue(REISERFS_SB(s)->commit_wq);
 	kfree(s->s_fs_info);
 	s->s_fs_info = NULL;
 }
@@ -634,15 +650,16 @@
 	}
 	reiserfs_write_lock(inode->i_sb);
 
-	/* this is really only used for atime updates, so they don't have
-	 ** to be included in O_SYNC or fsync
+	/*
+	 * this is really only used for atime updates, so they don't have
+	 * to be included in O_SYNC or fsync
 	 */
 	err = journal_begin(&th, inode->i_sb, 1);
 	if (err)
 		goto out;
 
 	reiserfs_update_sd(&th, inode);
-	journal_end(&th, inode->i_sb, 1);
+	journal_end(&th);
 
 out:
 	reiserfs_write_unlock(inode->i_sb);
@@ -788,31 +805,53 @@
 	.get_parent = reiserfs_get_parent,
 };
 
-/* this struct is used in reiserfs_getopt () for containing the value for those
-   mount options that have values rather than being toggles. */
+/*
+ * this struct is used in reiserfs_getopt () for containing the value for
+ * those mount options that have values rather than being toggles.
+ */
 typedef struct {
 	char *value;
-	int setmask;		/* bitmask which is to set on mount_options bitmask when this
-				   value is found, 0 is no bits are to be changed. */
-	int clrmask;		/* bitmask which is to clear on mount_options bitmask when  this
-				   value is found, 0 is no bits are to be changed. This is
-				   applied BEFORE setmask */
+	/*
+	 * bitmask which is to set on mount_options bitmask
+	 * when this value is found, 0 is no bits are to be changed.
+	 */
+	int setmask;
+	/*
+	 * bitmask which is to clear on mount_options bitmask
+	 * when this value is found, 0 is no bits are to be changed.
+	 * This is applied BEFORE setmask
+	 */
+	int clrmask;
 } arg_desc_t;
 
 /* Set this bit in arg_required to allow empty arguments */
 #define REISERFS_OPT_ALLOWEMPTY 31
 
-/* this struct is used in reiserfs_getopt() for describing the set of reiserfs
-   mount options */
+/*
+ * this struct is used in reiserfs_getopt() for describing the
+ * set of reiserfs mount options
+ */
 typedef struct {
 	char *option_name;
-	int arg_required;	/* 0 if argument is not required, not 0 otherwise */
-	const arg_desc_t *values;	/* list of values accepted by an option */
-	int setmask;		/* bitmask which is to set on mount_options bitmask when this
-				   value is found, 0 is no bits are to be changed. */
-	int clrmask;		/* bitmask which is to clear on mount_options bitmask when  this
-				   value is found, 0 is no bits are to be changed. This is
-				   applied BEFORE setmask */
+
+	/* 0 if argument is not required, not 0 otherwise */
+	int arg_required;
+
+	/* list of values accepted by an option */
+	const arg_desc_t *values;
+
+	/*
+	 * bitmask which is to set on mount_options bitmask
+	 * when this value is found, 0 is no bits are to be changed.
+	 */
+	int setmask;
+
+	/*
+	 * bitmask which is to clear on mount_options bitmask
+	 * when this value is found, 0 is no bits are to be changed.
+	 * This is applied BEFORE setmask
+	 */
+	int clrmask;
 } opt_desc_t;
 
 /* possible values for -o data= */
@@ -833,8 +872,10 @@
 	{.value = NULL}
 };
 
-/* possible values for "-o block-allocator=" and bits which are to be set in
-   s_mount_opt of reiserfs specific part of in-core super block */
+/*
+ * possible values for "-o block-allocator=" and bits which are to be set in
+ * s_mount_opt of reiserfs specific part of in-core super block
+ */
 static const arg_desc_t balloc[] = {
 	{"noborder", 1 << REISERFS_NO_BORDER, 0},
 	{"border", 0, 1 << REISERFS_NO_BORDER},
@@ -864,21 +905,25 @@
 	{NULL, 0, 0},
 };
 
-/* proceed only one option from a list *cur - string containing of mount options
-   opts - array of options which are accepted
-   opt_arg - if option is found and requires an argument and if it is specifed
-   in the input - pointer to the argument is stored here
-   bit_flags - if option requires to set a certain bit - it is set here
-   return -1 if unknown option is found, opt->arg_required otherwise */
+/*
+ * proceed only one option from a list *cur - string containing of mount
+ * options
+ * opts - array of options which are accepted
+ * opt_arg - if option is found and requires an argument and if it is specifed
+ * in the input - pointer to the argument is stored here
+ * bit_flags - if option requires to set a certain bit - it is set here
+ * return -1 if unknown option is found, opt->arg_required otherwise
+ */
 static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
 			   char **opt_arg, unsigned long *bit_flags)
 {
 	char *p;
-	/* foo=bar,
-	   ^   ^  ^
-	   |   |  +-- option_end
-	   |   +-- arg_start
-	   +-- option_start
+	/*
+	 * foo=bar,
+	 * ^   ^  ^
+	 * |   |  +-- option_end
+	 * |   +-- arg_start
+	 * +-- option_start
 	 */
 	const opt_desc_t *opt;
 	const arg_desc_t *arg;
@@ -893,9 +938,12 @@
 	}
 
 	if (!strncmp(p, "alloc=", 6)) {
-		/* Ugly special case, probably we should redo options parser so that
-		   it can understand several arguments for some options, also so that
-		   it can fill several bitfields with option values. */
+		/*
+		 * Ugly special case, probably we should redo options
+		 * parser so that it can understand several arguments for
+		 * some options, also so that it can fill several bitfields
+		 * with option values.
+		 */
 		if (reiserfs_parse_alloc_options(s, p + 6)) {
 			return -1;
 		} else {
@@ -958,7 +1006,10 @@
 		return -1;
 	}
 
-	/* move to the argument, or to next option if argument is not required */
+	/*
+	 * move to the argument, or to next option if argument is not
+	 * required
+	 */
 	p++;
 
 	if (opt->arg_required
@@ -995,12 +1046,20 @@
 }
 
 /* returns 0 if something is wrong in option string, 1 - otherwise */
-static int reiserfs_parse_options(struct super_block *s, char *options,	/* string given via mount's -o */
+static int reiserfs_parse_options(struct super_block *s,
+
+				  /* string given via mount's -o */
+				  char *options,
+
+				  /*
+				   * after the parsing phase, contains the
+				   * collection of bitflags defining what
+				   * mount options were selected.
+				   */
 				  unsigned long *mount_options,
-				  /* after the parsing phase, contains the
-				     collection of bitflags defining what
-				     mount options were selected. */
-				  unsigned long *blocks,	/* strtol-ed from NNN of resize=NNN */
+
+				  /* strtol-ed from NNN of resize=NNN */
+				  unsigned long *blocks,
 				  char **jdev_name,
 				  unsigned int *commit_max_age,
 				  char **qf_names,
@@ -1010,7 +1069,10 @@
 	char *arg = NULL;
 	char *pos;
 	opt_desc_t opts[] = {
-		/* Compatibility stuff, so that -o notail for old setups still work */
+		/*
+		 * Compatibility stuff, so that -o notail for old
+		 * setups still work
+		 */
 		{"tails",.arg_required = 't',.values = tails},
 		{"notail",.clrmask =
 		 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
@@ -1055,8 +1117,10 @@
 
 	*blocks = 0;
 	if (!options || !*options)
-		/* use default configuration: create tails, journaling on, no
-		   conversion to newest format */
+		/*
+		 * use default configuration: create tails, journaling on, no
+		 * conversion to newest format
+		 */
 		return 1;
 
 	for (pos = options; pos;) {
@@ -1109,7 +1173,8 @@
 
 		if (c == 'j') {
 			if (arg && *arg && jdev_name) {
-				if (*jdev_name) {	//Hm, already assigned?
+				/* Hm, already assigned? */
+				if (*jdev_name) {
 					reiserfs_warning(s, "super-6510",
 							 "journal device was "
 							 "already specified to "
@@ -1362,8 +1427,10 @@
 	safe_mask |= 1 << REISERFS_USRQUOTA;
 	safe_mask |= 1 << REISERFS_GRPQUOTA;
 
-	/* Update the bitmask, taking care to keep
-	 * the bits we're not allowed to change here */
+	/*
+	 * Update the bitmask, taking care to keep
+	 * the bits we're not allowed to change here
+	 */
 	REISERFS_SB(s)->s_mount_opt =
 	    (REISERFS_SB(s)->
 	     s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
@@ -1410,7 +1477,7 @@
 		/* Mounting a rw partition read-only. */
 		reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
 		set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state);
-		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+		journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
 	} else {
 		/* remount read-write */
 		if (!(s->s_flags & MS_RDONLY)) {
@@ -1427,7 +1494,9 @@
 		handle_data_mode(s, mount_options);
 		handle_barrier_mode(s, mount_options);
 		REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
-		s->s_flags &= ~MS_RDONLY;	/* now it is safe to call journal_begin */
+
+		/* now it is safe to call journal_begin */
+		s->s_flags &= ~MS_RDONLY;
 		err = journal_begin(&th, s, 10);
 		if (err)
 			goto out_err_unlock;
@@ -1440,12 +1509,12 @@
 		if (!old_format_only(s))
 			set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
 		/* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
-		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+		journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
 		REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
 	}
 	/* this will force a full flush of all journal lists */
 	SB_JOURNAL(s)->j_must_wait = 1;
-	err = journal_end(&th, s, 10);
+	err = journal_end(&th);
 	if (err)
 		goto out_err_unlock;
 
@@ -1489,9 +1558,9 @@
 		brelse(bh);
 		return 1;
 	}
-	//
-	// ok, reiserfs signature (old or new) found in at the given offset
-	//
+	/*
+	 * ok, reiserfs signature (old or new) found in at the given offset
+	 */
 	fs_blocksize = sb_blocksize(rs);
 	brelse(bh);
 	sb_set_blocksize(s, fs_blocksize);
@@ -1529,9 +1598,11 @@
 	SB_BUFFER_WITH_SB(s) = bh;
 	SB_DISK_SUPER_BLOCK(s) = rs;
 
+	/*
+	 * magic is of non-standard journal filesystem, look at s_version to
+	 * find which format is in use
+	 */
 	if (is_reiserfs_jr(rs)) {
-		/* magic is of non-standard journal filesystem, look at s_version to
-		   find which format is in use */
 		if (sb_version(rs) == REISERFS_VERSION_2)
 			reiserfs_info(s, "found reiserfs format \"3.6\""
 				      " with non-standard journal\n");
@@ -1545,8 +1616,10 @@
 			return 1;
 		}
 	} else
-		/* s_version of standard format may contain incorrect information,
-		   so we just look at the magic string */
+		/*
+		 * s_version of standard format may contain incorrect
+		 * information, so we just look at the magic string
+		 */
 		reiserfs_info(s,
 			      "found reiserfs format \"%s\" with standard journal\n",
 			      is_reiserfs_3_5(rs) ? "3.5" : "3.6");
@@ -1558,8 +1631,9 @@
 	s->dq_op = &reiserfs_quota_operations;
 #endif
 
-	/* new format is limited by the 32 bit wide i_blocks field, want to
-	 ** be one full block below that.
+	/*
+	 * new format is limited by the 32 bit wide i_blocks field, want to
+	 * be one full block below that.
 	 */
 	s->s_maxbytes = (512LL << 32) - s->s_blocksize;
 	return 0;
@@ -1568,7 +1642,7 @@
 /* after journal replay, reread all bitmap and super blocks */
 static int reread_meta_blocks(struct super_block *s)
 {
-	ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
+	ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s));
 	wait_on_buffer(SB_BUFFER_WITH_SB(s));
 	if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
 		reiserfs_warning(s, "reiserfs-2504", "error reading the super");
@@ -1578,14 +1652,15 @@
 	return 0;
 }
 
-/////////////////////////////////////////////////////
-// hash detection stuff
+/* hash detection stuff */
 
-// if root directory is empty - we set default - Yura's - hash and
-// warn about it
-// FIXME: we look for only one name in a directory. If tea and yura
-// bith have the same value - we ask user to send report to the
-// mailing list
+/*
+ * if root directory is empty - we set default - Yura's - hash and
+ * warn about it
+ * FIXME: we look for only one name in a directory. If tea and yura
+ * both have the same value - we ask user to send report to the
+ * mailing list
+ */
 static __u32 find_hash_out(struct super_block *s)
 {
 	int retval;
@@ -1593,92 +1668,83 @@
 	struct cpu_key key;
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
+	struct reiserfs_de_head *deh;
 	__u32 hash = DEFAULT_HASH;
+	__u32 deh_hashval, teahash, r5hash, yurahash;
 
 	inode = s->s_root->d_inode;
 
-	do {			// Some serious "goto"-hater was there ;)
-		u32 teahash, r5hash, yurahash;
+	make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
+	retval = search_by_entry_key(s, &key, &path, &de);
+	if (retval == IO_ERROR) {
+		pathrelse(&path);
+		return UNSET_HASH;
+	}
+	if (retval == NAME_NOT_FOUND)
+		de.de_entry_num--;
 
-		make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
-		retval = search_by_entry_key(s, &key, &path, &de);
-		if (retval == IO_ERROR) {
-			pathrelse(&path);
-			return UNSET_HASH;
-		}
-		if (retval == NAME_NOT_FOUND)
-			de.de_entry_num--;
-		set_de_name_and_namelen(&de);
-		if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) {
-			/* allow override in this case */
-			if (reiserfs_rupasov_hash(s)) {
-				hash = YURA_HASH;
-			}
-			reiserfs_info(s, "FS seems to be empty, autodetect "
-					 "is using the default hash\n");
-			break;
-		}
-		r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
-		teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
-		yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
-		if (((teahash == r5hash)
-		     &&
-		     (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num])))
-		      == r5hash)) || ((teahash == yurahash)
-				      && (yurahash ==
-					  GET_HASH_VALUE(deh_offset
-							 (&
-							  (de.
-							   de_deh[de.
-								  de_entry_num])))))
-		    || ((r5hash == yurahash)
-			&& (yurahash ==
-			    GET_HASH_VALUE(deh_offset
-					   (&(de.de_deh[de.de_entry_num])))))) {
-			reiserfs_warning(s, "reiserfs-2506", "Unable to "
-					 "automatically detect hash function. "
-					 "Please mount with -o "
-					 "hash={tea,rupasov,r5}");
-			hash = UNSET_HASH;
-			break;
-		}
-		if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) ==
-		    yurahash)
+	set_de_name_and_namelen(&de);
+	deh = de.de_deh + de.de_entry_num;
+
+	if (deh_offset(deh) == DOT_DOT_OFFSET) {
+		/* allow override in this case */
+		if (reiserfs_rupasov_hash(s))
 			hash = YURA_HASH;
-		else if (GET_HASH_VALUE
-			 (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash)
-			hash = TEA_HASH;
-		else if (GET_HASH_VALUE
-			 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
-			hash = R5_HASH;
-		else {
-			reiserfs_warning(s, "reiserfs-2506",
-					 "Unrecognised hash function");
-			hash = UNSET_HASH;
-		}
-	} while (0);
+		reiserfs_info(s, "FS seems to be empty, autodetect is using the default hash\n");
+		goto out;
+	}
 
+	deh_hashval = GET_HASH_VALUE(deh_offset(deh));
+	r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
+	teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
+	yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
+
+	if ((teahash == r5hash && deh_hashval == r5hash) ||
+	    (teahash == yurahash && deh_hashval == yurahash) ||
+	    (r5hash == yurahash && deh_hashval == yurahash)) {
+		reiserfs_warning(s, "reiserfs-2506",
+				 "Unable to automatically detect hash "
+				 "function. Please mount with -o "
+				 "hash={tea,rupasov,r5}");
+		hash = UNSET_HASH;
+		goto out;
+	}
+
+	if (deh_hashval == yurahash)
+		hash = YURA_HASH;
+	else if (deh_hashval == teahash)
+		hash = TEA_HASH;
+	else if (deh_hashval == r5hash)
+		hash = R5_HASH;
+	else {
+		reiserfs_warning(s, "reiserfs-2506",
+				 "Unrecognised hash function");
+		hash = UNSET_HASH;
+	}
+out:
 	pathrelse(&path);
 	return hash;
 }
 
-// finds out which hash names are sorted with
+/* finds out which hash names are sorted with */
 static int what_hash(struct super_block *s)
 {
 	__u32 code;
 
 	code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
 
-	/* reiserfs_hash_detect() == true if any of the hash mount options
-	 ** were used.  We must check them to make sure the user isn't
-	 ** using a bad hash value
+	/*
+	 * reiserfs_hash_detect() == true if any of the hash mount options
+	 * were used.  We must check them to make sure the user isn't
+	 * using a bad hash value
 	 */
 	if (code == UNSET_HASH || reiserfs_hash_detect(s))
 		code = find_hash_out(s);
 
 	if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
-		/* detection has found the hash, and we must check against the
-		 ** mount options
+		/*
+		 * detection has found the hash, and we must check against the
+		 * mount options
 		 */
 		if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
 			reiserfs_warning(s, "reiserfs-2507",
@@ -1700,7 +1766,10 @@
 			code = UNSET_HASH;
 		}
 	} else {
-		/* find_hash_out was not called or could not determine the hash */
+		/*
+		 * find_hash_out was not called or
+		 * could not determine the hash
+		 */
 		if (reiserfs_rupasov_hash(s)) {
 			code = YURA_HASH;
 		} else if (reiserfs_tea_hash(s)) {
@@ -1710,8 +1779,9 @@
 		}
 	}
 
-	/* if we are mounted RW, and we have a new valid hash code, update
-	 ** the super
+	/*
+	 * if we are mounted RW, and we have a new valid hash code, update
+	 * the super
 	 */
 	if (code != UNSET_HASH &&
 	    !(s->s_flags & MS_RDONLY) &&
@@ -1721,7 +1791,7 @@
 	return code;
 }
 
-// return pointer to appropriate function
+/* return pointer to appropriate function */
 static hashf_t hash_function(struct super_block *s)
 {
 	switch (what_hash(s)) {
@@ -1738,7 +1808,7 @@
 	return NULL;
 }
 
-// this is used to set up correct value for old partitions
+/* this is used to set up correct value for old partitions */
 static int function2code(hashf_t func)
 {
 	if (func == keyed_hash)
@@ -1748,7 +1818,7 @@
 	if (func == r5_hash)
 		return R5_HASH;
 
-	BUG();			// should never happen
+	BUG();			/* should never happen */
 
 	return 0;
 }
@@ -1783,8 +1853,7 @@
 	sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
 	sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO);
 	sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
-	/* no preallocation minimum, be smart in
-	   reiserfs_file_write instead */
+	/* no preallocation minimum, be smart in reiserfs_file_write instead */
 	sbi->s_alloc_options.preallocmin = 0;
 	/* Preallocate by 16 blocks (17-1) at once */
 	sbi->s_alloc_options.preallocsize = 17;
@@ -1796,9 +1865,17 @@
 	mutex_init(&sbi->lock);
 	sbi->lock_depth = -1;
 
+	sbi->commit_wq = alloc_workqueue("reiserfs/%s", WQ_MEM_RECLAIM, 0,
+					 s->s_id);
+	if (!sbi->commit_wq) {
+		SWARN(silent, s, "", "Cannot allocate commit workqueue");
+		errval = -ENOMEM;
+		goto error_unlocked;
+	}
+
 	jdev_name = NULL;
 	if (reiserfs_parse_options
-	    (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
+	    (s, (char *)data, &sbi->s_mount_opt, &blocks, &jdev_name,
 	     &commit_max_age, qf_names, &qfmt) == 0) {
 		goto error_unlocked;
 	}
@@ -1819,10 +1896,17 @@
 		goto error_unlocked;
 	}
 
-	/* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
+	/*
+	 * try old format (undistributed bitmap, super block in 8-th 1k
+	 * block of a device)
+	 */
 	if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
 		old_format = 1;
-	/* try new format (64-th 1k block), which can contain reiserfs super block */
+
+	/*
+	 * try new format (64-th 1k block), which can contain reiserfs
+	 * super block
+	 */
 	else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
 		SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
 		      s->s_id);
@@ -1830,9 +1914,11 @@
 	}
 
 	rs = SB_DISK_SUPER_BLOCK(s);
-	/* Let's do basic sanity check to verify that underlying device is not
-	   smaller than the filesystem. If the check fails then abort and scream,
-	   because bad stuff will happen otherwise. */
+	/*
+	 * Let's do basic sanity check to verify that underlying device is not
+	 * smaller than the filesystem. If the check fails then abort and
+	 * scream, because bad stuff will happen otherwise.
+	 */
 	if (s->s_bdev && s->s_bdev->bd_inode
 	    && i_size_read(s->s_bdev->bd_inode) <
 	    sb_block_count(rs) * sb_blocksize(rs)) {
@@ -1876,15 +1962,16 @@
 		printk("reiserfs: using flush barriers\n");
 	}
 
-	// set_device_ro(s->s_dev, 1) ;
 	if (journal_init(s, jdev_name, old_format, commit_max_age)) {
 		SWARN(silent, s, "sh-2022",
 		      "unable to initialize journal space");
 		goto error_unlocked;
 	} else {
-		jinit_done = 1;	/* once this is set, journal_release must be called
-				 ** if we error out of the mount
-				 */
+		/*
+		 * once this is set, journal_release must be called
+		 * if we error out of the mount
+		 */
+		jinit_done = 1;
 	}
 
 	if (reread_meta_blocks(s)) {
@@ -1905,7 +1992,7 @@
 	args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
 	root_inode =
 	    iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
-			 reiserfs_init_locked_inode, (void *)(&args));
+			 reiserfs_init_locked_inode, (void *)&args);
 	if (!root_inode) {
 		SWARN(silent, s, "jmacd-10", "get root inode failed");
 		goto error_unlocked;
@@ -1929,7 +2016,7 @@
 	s->s_root = d_make_root(root_inode);
 	if (!s->s_root)
 		goto error;
-	// define and initialize hash function
+	/* define and initialize hash function */
 	sbi->s_hash_function = hash_function(s);
 	if (sbi->s_hash_function == NULL) {
 		dput(s->s_root);
@@ -1939,11 +2026,11 @@
 
 	if (is_reiserfs_3_5(rs)
 	    || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1))
-		set_bit(REISERFS_3_5, &(sbi->s_properties));
+		set_bit(REISERFS_3_5, &sbi->s_properties);
 	else if (old_format)
-		set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties));
+		set_bit(REISERFS_OLD_FORMAT, &sbi->s_properties);
 	else
-		set_bit(REISERFS_3_6, &(sbi->s_properties));
+		set_bit(REISERFS_3_6, &sbi->s_properties);
 
 	if (!(s->s_flags & MS_RDONLY)) {
 
@@ -1958,10 +2045,12 @@
 		set_sb_umount_state(rs, REISERFS_ERROR_FS);
 		set_sb_fs_state(rs, 0);
 
-		/* Clear out s_bmap_nr if it would wrap. We can handle this
+		/*
+		 * Clear out s_bmap_nr if it would wrap. We can handle this
 		 * case, but older revisions can't. This will cause the
 		 * file system to fail mount on those older implementations,
-		 * avoiding corruption. -jeffm */
+		 * avoiding corruption. -jeffm
+		 */
 		if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
 		    sb_bmap_nr(rs) != 0) {
 			reiserfs_warning(s, "super-2030", "This file system "
@@ -1974,8 +2063,10 @@
 		}
 
 		if (old_format_only(s)) {
-			/* filesystem of format 3.5 either with standard or non-standard
-			   journal */
+			/*
+			 * filesystem of format 3.5 either with standard
+			 * or non-standard journal
+			 */
 			if (convert_reiserfs(s)) {
 				/* and -o conv is given */
 				if (!silent)
@@ -1983,8 +2074,11 @@
 						      "converting 3.5 filesystem to the 3.6 format");
 
 				if (is_reiserfs_3_5(rs))
-					/* put magic string of 3.6 format. 2.2 will not be able to
-					   mount this filesystem anymore */
+					/*
+					 * put magic string of 3.6 format.
+					 * 2.2 will not be able to
+					 * mount this filesystem anymore
+					 */
 					memcpy(rs->s_v1.s_magic,
 					       reiserfs_3_6_magic_string,
 					       sizeof
@@ -1992,8 +2086,8 @@
 
 				set_sb_version(rs, REISERFS_VERSION_2);
 				reiserfs_convert_objectid_map_v1(s);
-				set_bit(REISERFS_3_6, &(sbi->s_properties));
-				clear_bit(REISERFS_3_5, &(sbi->s_properties));
+				set_bit(REISERFS_3_6, &sbi->s_properties);
+				clear_bit(REISERFS_3_5, &sbi->s_properties);
 			} else if (!silent) {
 				reiserfs_info(s, "using 3.5.x disk format\n");
 			}
@@ -2001,8 +2095,8 @@
 			set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
 
 
-		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
-		errval = journal_end(&th, s, 1);
+		journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
+		errval = journal_end(&th);
 		if (errval) {
 			dput(s->s_root);
 			s->s_root = NULL;
@@ -2018,7 +2112,9 @@
 		}
 		reiserfs_write_lock(s);
 
-		/* look for files which were to be removed in previous session */
+		/*
+		 * look for files which were to be removed in previous session
+		 */
 		finish_unfinished(s);
 	} else {
 		if (old_format_only(s) && !silent) {
@@ -2034,7 +2130,9 @@
 		}
 		reiserfs_write_lock(s);
 	}
-	// mark hash in super block: it could be unset. overwrite should be ok
+	/*
+	 * mark hash in super block: it could be unset. overwrite should be ok
+	 */
 	set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
 
 	handle_attrs(s);
@@ -2111,9 +2209,7 @@
 	depth = reiserfs_write_unlock_nested(dquot->dq_sb);
 	ret = dquot_commit(dquot);
 	reiserfs_write_lock_nested(dquot->dq_sb, depth);
-	err =
-	    journal_end(&th, dquot->dq_sb,
-			REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	err = journal_end(&th);
 	if (!ret && err)
 		ret = err;
 out:
@@ -2136,9 +2232,7 @@
 	depth = reiserfs_write_unlock_nested(dquot->dq_sb);
 	ret = dquot_acquire(dquot);
 	reiserfs_write_lock_nested(dquot->dq_sb, depth);
-	err =
-	    journal_end(&th, dquot->dq_sb,
-			REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	err = journal_end(&th);
 	if (!ret && err)
 		ret = err;
 out:
@@ -2163,9 +2257,7 @@
 	}
 	ret = dquot_release(dquot);
 	reiserfs_write_lock(dquot->dq_sb);
-	err =
-	    journal_end(&th, dquot->dq_sb,
-			REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	err = journal_end(&th);
 	if (!ret && err)
 		ret = err;
 	reiserfs_write_unlock(dquot->dq_sb);
@@ -2198,7 +2290,7 @@
 	depth = reiserfs_write_unlock_nested(sb);
 	ret = dquot_commit_info(sb, type);
 	reiserfs_write_lock_nested(sb, depth);
-	err = journal_end(&th, sb, 2);
+	err = journal_end(&th);
 	if (!ret && err)
 		ret = err;
 out:
@@ -2238,7 +2330,10 @@
 		goto out;
 	}
 	inode = path->dentry->d_inode;
-	/* We must not pack tails for quota files on reiserfs for quota IO to work */
+	/*
+	 * We must not pack tails for quota files on reiserfs for quota
+	 * IO to work
+	 */
 	if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
 		err = reiserfs_unpack(inode, NULL);
 		if (err) {
@@ -2268,7 +2363,7 @@
 		err = journal_begin(&th, sb, 1);
 		if (err)
 			goto out;
-		err = journal_end_sync(&th, sb, 1);
+		err = journal_end_sync(&th);
 		if (err)
 			goto out;
 	}
@@ -2279,10 +2374,12 @@
 	return err;
 }
 
-/* Read data from quotafile - avoid pagecache and such because we cannot afford
+/*
+ * Read data from quotafile - avoid pagecache and such because we cannot afford
  * acquiring the locks... As quota files are never truncated and quota code
  * itself serializes the operations (and no one else should touch the files)
- * we don't have to be afraid of races */
+ * we don't have to be afraid of races
+ */
 static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
 				   size_t len, loff_t off)
 {
@@ -2303,7 +2400,10 @@
 		    sb->s_blocksize - offset <
 		    toread ? sb->s_blocksize - offset : toread;
 		tmp_bh.b_state = 0;
-		/* Quota files are without tails so we can safely use this function */
+		/*
+		 * Quota files are without tails so we can safely
+		 * use this function
+		 */
 		reiserfs_write_lock(sb);
 		err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
 		reiserfs_write_unlock(sb);
@@ -2326,8 +2426,10 @@
 	return len;
 }
 
-/* Write to quotafile (we know the transaction is already started and has
- * enough credits) */
+/*
+ * Write to quotafile (we know the transaction is already started and has
+ * enough credits)
+ */
 static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 				    const char *data, size_t len, loff_t off)
 {
@@ -2368,7 +2470,7 @@
 		unlock_buffer(bh);
 		reiserfs_write_lock(sb);
 		reiserfs_prepare_for_journal(sb, bh, 1);
-		journal_mark_dirty(current->journal_info, sb, bh);
+		journal_mark_dirty(current->journal_info, bh);
 		if (!journal_quota)
 			reiserfs_add_ordered_list(inode, bh);
 		reiserfs_write_unlock(sb);
@@ -2402,18 +2504,18 @@
 {
 	int ret;
 
-	if ((ret = init_inodecache())) {
+	ret = init_inodecache();
+	if (ret)
 		return ret;
-	}
 
 	reiserfs_proc_info_global_init();
 
 	ret = register_filesystem(&reiserfs_fs_type);
+	if (ret)
+		goto out;
 
-	if (ret == 0) {
-		return 0;
-	}
-
+	return 0;
+out:
 	reiserfs_proc_info_global_done();
 	destroy_inodecache();
 

diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 5e2624d..f41e19b 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c

@@ -1,5 +1,6 @@
 /*
- * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details
+ * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright
+ * details
  */
 
 #include <linux/time.h>
@@ -7,29 +8,41 @@
 #include <linux/buffer_head.h>
 #include "reiserfs.h"
 
-/* access to tail : when one is going to read tail it must make sure, that is not running.
- direct2indirect and indirect2direct can not run concurrently */
+/*
+ * access to tail : when one is going to read tail it must make sure, that is
+ * not running.  direct2indirect and indirect2direct can not run concurrently
+ */
 
-/* Converts direct items to an unformatted node. Panics if file has no
-   tail. -ENOSPC if no disk space for conversion */
-/* path points to first direct item of the file regarless of how many of
-   them are there */
+/*
+ * Converts direct items to an unformatted node. Panics if file has no
+ * tail. -ENOSPC if no disk space for conversion
+ */
+/*
+ * path points to first direct item of the file regardless of how many of
+ * them are there
+ */
 int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
 		    struct treepath *path, struct buffer_head *unbh,
 		    loff_t tail_offset)
 {
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *up_to_date_bh;
-	struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
+	struct item_head *p_le_ih = tp_item_head(path);
 	unsigned long total_tail = 0;
-	struct cpu_key end_key;	/* Key to search for the last byte of the
-				   converted item. */
-	struct item_head ind_ih;	/* new indirect item to be inserted or
-					   key of unfm pointer to be pasted */
-	int blk_size, retval;	/* returned value for reiserfs_insert_item and clones */
-	unp_t unfm_ptr;		/* Handle on an unformatted node
-				   that will be inserted in the
-				   tree. */
+
+	/* Key to search for the last byte of the converted item. */
+	struct cpu_key end_key;
+
+	/*
+	 * new indirect item to be inserted or key
+	 * of unfm pointer to be pasted
+	 */
+	struct item_head ind_ih;
+	int blk_size;
+	/* returned value for reiserfs_insert_item and clones */
+	int  retval;
+	/* Handle on an unformatted node that will be inserted in the tree. */
+	unp_t unfm_ptr;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -37,8 +50,10 @@
 
 	blk_size = sb->s_blocksize;
 
-	/* and key to search for append or insert pointer to the new
-	   unformatted node. */
+	/*
+	 * and key to search for append or insert pointer to the new
+	 * unformatted node.
+	 */
 	copy_item_head(&ind_ih, p_le_ih);
 	set_le_ih_k_offset(&ind_ih, tail_offset);
 	set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
@@ -55,7 +70,7 @@
 		return -EIO;
 	}
 
-	p_le_ih = PATH_PITEM_HEAD(path);
+	p_le_ih = tp_item_head(path);
 
 	unfm_ptr = cpu_to_le32(unbh->b_blocknr);
 
@@ -76,36 +91,43 @@
 	if (retval) {
 		return retval;
 	}
-	// note: from here there are two keys which have matching first
-	// three key components. They only differ by the fourth one.
+	/*
+	 * note: from here there are two keys which have matching first
+	 *  three key components. They only differ by the fourth one.
+	 */
 
 	/* Set the key to search for the direct items of the file */
 	make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
 		     4);
 
-	/* Move bytes from the direct items to the new unformatted node
-	   and delete them. */
+	/*
+	 * Move bytes from the direct items to the new unformatted node
+	 * and delete them.
+	 */
 	while (1) {
 		int tail_size;
 
-		/* end_key.k_offset is set so, that we will always have found
-		   last item of the file */
+		/*
+		 * end_key.k_offset is set so, that we will always have found
+		 * last item of the file
+		 */
 		if (search_for_position_by_key(sb, &end_key, path) ==
 		    POSITION_FOUND)
 			reiserfs_panic(sb, "PAP-14050",
 				       "direct item (%K) not found", &end_key);
-		p_le_ih = PATH_PITEM_HEAD(path);
+		p_le_ih = tp_item_head(path);
 		RFALSE(!is_direct_le_ih(p_le_ih),
 		       "vs-14055: direct item expected(%K), found %h",
 		       &end_key, p_le_ih);
 		tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
 		    + ih_item_len(p_le_ih) - 1;
 
-		/* we only send the unbh pointer if the buffer is not up to date.
-		 ** this avoids overwriting good data from writepage() with old data
-		 ** from the disk or buffer cache
-		 ** Special case: unbh->b_page will be NULL if we are coming through
-		 ** DIRECT_IO handler here.
+		/*
+		 * we only send the unbh pointer if the buffer is not
+		 * up to date.  this avoids overwriting good data from
+		 * writepage() with old data from the disk or buffer cache
+		 * Special case: unbh->b_page will be NULL if we are coming
+		 * through DIRECT_IO handler here.
 		 */
 		if (!unbh->b_page || buffer_uptodate(unbh)
 		    || PageUptodate(unbh->b_page)) {
@@ -117,13 +139,15 @@
 						up_to_date_bh);
 
 		total_tail += retval;
+
+		/* done: file does not have direct items anymore */
 		if (tail_size == retval)
-			// done: file does not have direct items anymore
 			break;
 
 	}
-	/* if we've copied bytes from disk into the page, we need to zero
-	 ** out the unused part of the block (it was not up to date before)
+	/*
+	 * if we've copied bytes from disk into the page, we need to zero
+	 * out the unused part of the block (it was not up to date before)
 	 */
 	if (up_to_date_bh) {
 		unsigned pgoff =
@@ -146,9 +170,11 @@
 		BUG();
 	}
 	clear_buffer_dirty(bh);
-	/* Remove the buffer from whatever list it belongs to. We are mostly
-	   interested in removing it from per-sb j_dirty_buffers list, to avoid
-	   BUG() on attempt to write not mapped buffer */
+	/*
+	 * Remove the buffer from whatever list it belongs to. We are mostly
+	 * interested in removing it from per-sb j_dirty_buffers list, to avoid
+	 * BUG() on attempt to write not mapped buffer
+	 */
 	if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
 		struct inode *inode = bh->b_page->mapping->host;
 		struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
@@ -164,12 +190,14 @@
 	unlock_buffer(bh);
 }
 
-/* this first locks inode (neither reads nor sync are permitted),
-   reads tail through page cache, insert direct item. When direct item
-   inserted successfully inode is left locked. Return value is always
-   what we expect from it (number of cut bytes). But when tail remains
-   in the unformatted node, we set mode to SKIP_BALANCING and unlock
-   inode */
+/*
+ * this first locks inode (neither reads nor sync are permitted),
+ * reads tail through page cache, insert direct item. When direct item
+ * inserted successfully inode is left locked. Return value is always
+ * what we expect from it (number of cut bytes). But when tail remains
+ * in the unformatted node, we set mode to SKIP_BALANCING and unlock
+ * inode
+ */
 int indirect2direct(struct reiserfs_transaction_handle *th,
 		    struct inode *inode, struct page *page,
 		    struct treepath *path,	/* path to the indirect item. */
@@ -194,7 +222,7 @@
 	*mode = M_SKIP_BALANCING;
 
 	/* store item head path points to. */
-	copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+	copy_item_head(&s_ih, tp_item_head(path));
 
 	tail_len = (n_new_file_size & (block_size - 1));
 	if (get_inode_sd_version(inode) == STAT_DATA_V2)
@@ -207,9 +235,11 @@
 					 1) * sb->s_blocksize;
 	pos1 = pos;
 
-	// we are protected by i_mutex. The tail can not disapper, not
-	// append can be done either
-	// we are in truncate or packing tail in file_release
+	/*
+	 * we are protected by i_mutex. The tail can not disapper, not
+	 * append can be done either
+	 * we are in truncate or packing tail in file_release
+	 */
 
 	tail = (char *)kmap(page);	/* this can schedule */
 
@@ -220,7 +250,7 @@
 			reiserfs_panic(sb, "PAP-5520",
 				       "item to be converted %K does not exist",
 				       item_key);
-		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+		copy_item_head(&s_ih, tp_item_head(path));
 #ifdef CONFIG_REISERFS_CHECK
 		pos = le_ih_k_offset(&s_ih) - 1 +
 		    (ih_item_len(&s_ih) / UNFM_P_SIZE -
@@ -236,9 +266,10 @@
 			  pos1 + 1, TYPE_DIRECT, round_tail_len,
 			  0xffff /*ih_free_space */ );
 
-	/* we want a pointer to the first byte of the tail in the page.
-	 ** the page was locked and this part of the page was up to date when
-	 ** indirect2direct was called, so we know the bytes are still valid
+	/*
+	 * we want a pointer to the first byte of the tail in the page.
+	 * the page was locked and this part of the page was up to date when
+	 * indirect2direct was called, so we know the bytes are still valid
 	 */
 	tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
 
@@ -250,12 +281,14 @@
 	/* Insert tail as new direct item in the tree */
 	if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
 				 tail ? tail : NULL) < 0) {
-		/* No disk memory. So we can not convert last unformatted node
-		   to the direct item.  In this case we used to adjust
-		   indirect items's ih_free_space. Now ih_free_space is not
-		   used, it would be ideal to write zeros to corresponding
-		   unformatted node. For now i_size is considered as guard for
-		   going out of file size */
+		/*
+		 * No disk memory. So we can not convert last unformatted node
+		 * to the direct item.  In this case we used to adjust
+		 * indirect items's ih_free_space. Now ih_free_space is not
+		 * used, it would be ideal to write zeros to corresponding
+		 * unformatted node. For now i_size is considered as guard for
+		 * going out of file size
+		 */
 		kunmap(page);
 		return block_size - round_tail_len;
 	}
@@ -264,12 +297,16 @@
 	/* make sure to get the i_blocks changes from reiserfs_insert_item */
 	reiserfs_update_sd(th, inode);
 
-	// note: we have now the same as in above direct2indirect
-	// conversion: there are two keys which have matching first three
-	// key components. They only differ by the fouhth one.
+	/*
+	 * note: we have now the same as in above direct2indirect
+	 * conversion: there are two keys which have matching first three
+	 * key components. They only differ by the fourth one.
+	 */
 
-	/* We have inserted new direct item and must remove last
-	   unformatted node. */
+	/*
+	 * We have inserted new direct item and must remove last
+	 * unformatted node.
+	 */
 	*mode = M_CUT;
 
 	/* we store position of first direct item in the in-core inode */

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5cdfbd6..ca416d0 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c

@@ -56,9 +56,11 @@
 #define XAROOT_NAME   "xattrs"
 
 
-/* Helpers for inode ops. We do this so that we don't have all the VFS
+/*
+ * Helpers for inode ops. We do this so that we don't have all the VFS
  * overhead and also for proper i_mutex annotation.
- * dir->i_mutex must be held for all of them. */
+ * dir->i_mutex must be held for all of them.
+ */
 #ifdef CONFIG_REISERFS_FS_XATTR
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -73,10 +75,12 @@
 	return dir->i_op->mkdir(dir, dentry, mode);
 }
 
-/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
+/*
+ * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
  * mutation ops aren't called during rename or splace, which are the
  * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
- * better than allocating another subclass just for this code. */
+ * better than allocating another subclass just for this code.
+ */
 static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int error;
@@ -166,9 +170,11 @@
 	return xadir;
 }
 
-/* The following are side effects of other operations that aren't explicitly
+/*
+ * The following are side effects of other operations that aren't explicitly
  * modifying extended attributes. This includes operations such as permissions
- * or ownership changes, object deletions, etc. */
+ * or ownership changes, object deletions, etc.
+ */
 struct reiserfs_dentry_buf {
 	struct dir_context ctx;
 	struct dentry *xadir;
@@ -267,11 +273,13 @@
 	cleanup_dentry_buf(&buf);
 
 	if (!err) {
-		/* We start a transaction here to avoid a ABBA situation
+		/*
+		 * We start a transaction here to avoid a ABBA situation
 		 * between the xattr root's i_mutex and the journal lock.
 		 * This doesn't incur much additional overhead since the
 		 * new transaction will just nest inside the
-		 * outer transaction. */
+		 * outer transaction.
+		 */
 		int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 			     4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
 		struct reiserfs_transaction_handle th;
@@ -284,7 +292,7 @@
 					  I_MUTEX_XATTR);
 			err = action(dir, data);
 			reiserfs_write_lock(inode->i_sb);
-			jerror = journal_end(&th, inode->i_sb, blocks);
+			jerror = journal_end(&th);
 			reiserfs_write_unlock(inode->i_sb);
 			mutex_unlock(&dir->d_parent->d_inode->i_mutex);
 			err = jerror ?: err;
@@ -349,9 +357,11 @@
 }
 
 #ifdef CONFIG_REISERFS_FS_XATTR
-/* Returns a dentry corresponding to a specific extended attribute file
+/*
+ * Returns a dentry corresponding to a specific extended attribute file
  * for the inode. If flags allow, the file is created. Otherwise, a
- * valid or negative dentry, or an error is returned. */
+ * valid or negative dentry, or an error is returned.
+ */
 static struct dentry *xattr_lookup(struct inode *inode, const char *name,
 				    int flags)
 {
@@ -400,8 +410,10 @@
 {
 	struct address_space *mapping = dir->i_mapping;
 	struct page *page;
-	/* We can deadlock if we try to free dentries,
-	   and an unlink/rmdir has just occurred - GFP_NOFS avoids this */
+	/*
+	 * We can deadlock if we try to free dentries,
+	 * and an unlink/rmdir has just occurred - GFP_NOFS avoids this
+	 */
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
 	if (!IS_ERR(page)) {
@@ -411,7 +423,7 @@
 	}
 	return page;
 
-      fail:
+fail:
 	reiserfs_put_page(page);
 	return ERR_PTR(-EIO);
 }
@@ -589,7 +601,7 @@
 					  buffer, buffer_size, flags);
 
 	reiserfs_write_lock(inode->i_sb);
-	error2 = journal_end(&th, inode->i_sb, jbegin_count);
+	error2 = journal_end(&th);
 	reiserfs_write_unlock(inode->i_sb);
 	if (error == 0)
 		error = error2;
@@ -615,8 +627,10 @@
 	if (name == NULL)
 		return -EINVAL;
 
-	/* We can't have xattrs attached to v1 items since they don't have
-	 * generation numbers */
+	/*
+	 * We can't have xattrs attached to v1 items since they don't have
+	 * generation numbers
+	 */
 	if (get_inode_sd_version(inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
@@ -913,12 +927,16 @@
 
 static int xattr_mount_check(struct super_block *s)
 {
-	/* We need generation numbers to ensure that the oid mapping is correct
-	 * v3.5 filesystems don't have them. */
+	/*
+	 * We need generation numbers to ensure that the oid mapping is correct
+	 * v3.5 filesystems don't have them.
+	 */
 	if (old_format_only(s)) {
 		if (reiserfs_xattrs_optional(s)) {
-			/* Old format filesystem, but optional xattrs have
-			 * been enabled. Error out. */
+			/*
+			 * Old format filesystem, but optional xattrs have
+			 * been enabled. Error out.
+			 */
 			reiserfs_warning(s, "jdm-2005",
 					 "xattrs/ACLs not supported "
 					 "on pre-v3.6 format filesystems. "
@@ -972,9 +990,11 @@
 	return err;
 }
 
-/* We need to take a copy of the mount flags since things like
+/*
+ * We need to take a copy of the mount flags since things like
  * MS_RDONLY don't get set until *after* we're called.
- * mount_flags != mount_options */
+ * mount_flags != mount_options
+ */
 int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 {
 	int err = 0;
@@ -1007,8 +1027,8 @@
 
 error:
 	if (err) {
-		clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
-		clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
+		clear_bit(REISERFS_XATTRS_USER, &REISERFS_SB(s)->s_mount_opt);
+		clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt);
 	}
 
 	/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */

diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index f59626c..857ec7e 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h

@@ -61,7 +61,8 @@
 	return ret;
 }
 
-/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
+/*
+ * We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
  * Let's try to be smart about it.
  * xattr root: We cache it. If it's not cached, we may need to create it.
  * xattr dir: If anything has been loaded for this inode, we can set a flag

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a6ce532..44503e2 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c

@@ -25,8 +25,10 @@
 	int size = acl ? posix_acl_xattr_size(acl->a_count) : 0;
 
 
-	/* Pessimism: We can't assume that anything from the xattr root up
-	 * has been created. */
+	/*
+	 * Pessimism: We can't assume that anything from the xattr root up
+	 * has been created.
+	 */
 
 	jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
 			 reiserfs_xattr_nblocks(inode, size) * 2;
@@ -37,7 +39,7 @@
 	if (error == 0) {
 		error = __reiserfs_set_acl(&th, inode, type, acl);
 		reiserfs_write_lock(inode->i_sb);
-		error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
+		error2 = journal_end(&th);
 		reiserfs_write_unlock(inode->i_sb);
 		if (error2)
 			error = error2;
@@ -111,7 +113,7 @@
 		goto fail;
 	return acl;
 
-      fail:
+fail:
 	posix_acl_release(acl);
 	return ERR_PTR(-EINVAL);
 }
@@ -164,7 +166,7 @@
 	}
 	return (char *)ext_acl;
 
-      fail:
+fail:
 	kfree(ext_acl);
 	return ERR_PTR(-EINVAL);
 }
@@ -208,8 +210,10 @@
 
 	retval = reiserfs_xattr_get(inode, name, value, size);
 	if (retval == -ENODATA || retval == -ENOSYS) {
-		/* This shouldn't actually happen as it should have
-		   been caught above.. but just in case */
+		/*
+		 * This shouldn't actually happen as it should have
+		 * been caught above.. but just in case
+		 */
 		acl = NULL;
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
@@ -290,8 +294,10 @@
 	return error;
 }
 
-/* dir->i_mutex: locked,
- * inode is new and not released into the wild yet */
+/*
+ * dir->i_mutex: locked,
+ * inode is new and not released into the wild yet
+ */
 int
 reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 			     struct inode *dir, struct dentry *dentry,
@@ -304,14 +310,18 @@
 	if (S_ISLNK(inode->i_mode))
 		return 0;
 
-	/* ACLs can only be used on "new" objects, so if it's an old object
-	 * there is nothing to inherit from */
+	/*
+	 * ACLs can only be used on "new" objects, so if it's an old object
+	 * there is nothing to inherit from
+	 */
 	if (get_inode_sd_version(dir) == STAT_DATA_V1)
 		goto apply_umask;
 
-	/* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
+	/*
+	 * Don't apply ACLs to objects in the .reiserfs_priv tree.. This
 	 * would be useless since permissions are ignored, and a pain because
-	 * it introduces locking cycles */
+	 * it introduces locking cycles
+	 */
 	if (IS_PRIVATE(dir)) {
 		inode->i_flags |= S_PRIVATE;
 		goto apply_umask;
@@ -335,7 +345,7 @@
 
 	return err;
 
-      apply_umask:
+apply_umask:
 	/* no ACL, apply umask */
 	inode->i_mode &= ~current_umask();
 	return err;

diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index f373bde..ea06c75 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c

@@ -72,8 +72,8 @@
 
 const struct file_operations romfs_ro_fops = {
 	.llseek			= generic_file_llseek,
-	.read			= do_sync_read,
-	.aio_read		= generic_file_aio_read,
+	.read			= new_sync_read,
+	.read_iter		= generic_file_read_iter,
 	.splice_read		= generic_file_splice_read,
 	.mmap			= romfs_mmap,
 	.get_unmapped_area	= romfs_get_unmapped_area,

diff --git a/fs/splice.c b/fs/splice.c
index e246954..f5cb9ba 100644
--- a/fs/splice.c
+++ b/fs/splice.c

@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -717,63 +718,6 @@
 				    sd->len, &pos, more);
 }
 
-/*
- * This is a little more tricky than the file -> pipe splicing. There are
- * basically three cases:
- *
- *	- Destination page already exists in the address space and there
- *	  are users of it. For that case we have no other option that
- *	  copying the data. Tough luck.
- *	- Destination page already exists in the address space, but there
- *	  are no users of it. Make sure it's uptodate, then drop it. Fall
- *	  through to last case.
- *	- Destination page does not exist, we can add the pipe page to
- *	  the page cache and avoid the copy.
- *
- * If asked to move pages to the output file (SPLICE_F_MOVE is set in
- * sd->flags), we attempt to migrate pages from the pipe to the output
- * file address space page cache. This is possible if no one else has
- * the pipe page referenced outside of the pipe and page cache. If
- * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
- * a new page in the output file page cache and fill/dirty that.
- */
-int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		 struct splice_desc *sd)
-{
-	struct file *file = sd->u.file;
-	struct address_space *mapping = file->f_mapping;
-	unsigned int offset, this_len;
-	struct page *page;
-	void *fsdata;
-	int ret;
-
-	offset = sd->pos & ~PAGE_CACHE_MASK;
-
-	this_len = sd->len;
-	if (this_len + offset > PAGE_CACHE_SIZE)
-		this_len = PAGE_CACHE_SIZE - offset;
-
-	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-	if (unlikely(ret))
-		goto out;
-
-	if (buf->page != page) {
-		char *src = kmap_atomic(buf->page);
-		char *dst = kmap_atomic(page);
-
-		memcpy(dst + offset, src + buf->offset, this_len);
-		flush_dcache_page(page);
-		kunmap_atomic(dst);
-		kunmap_atomic(src);
-	}
-	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
-				page, fsdata);
-out:
-	return ret;
-}
-EXPORT_SYMBOL(pipe_to_file);
-
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
@@ -802,7 +746,7 @@
  *    locking is required around copying the pipe buffers to the
  *    destination.
  */
-int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 			  splice_actor *actor)
 {
 	int ret;
@@ -849,7 +793,6 @@
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_feed);
 
 /**
  * splice_from_pipe_next - wait for some data to splice from
@@ -861,7 +804,7 @@
  *    value (one) if pipe buffers are available.  It will return zero
  *    or -errno if no more data needs to be spliced.
  */
-int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	while (!pipe->nrbufs) {
 		if (!pipe->writers)
@@ -886,7 +829,6 @@
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_next);
 
 /**
  * splice_from_pipe_begin - start splicing from pipe
@@ -897,12 +839,11 @@
  *    splice_from_pipe_next() and splice_from_pipe_feed() to
  *    initialize the necessary fields of @sd.
  */
-void splice_from_pipe_begin(struct splice_desc *sd)
+static void splice_from_pipe_begin(struct splice_desc *sd)
 {
 	sd->num_spliced = 0;
 	sd->need_wakeup = false;
 }
-EXPORT_SYMBOL(splice_from_pipe_begin);
 
 /**
  * splice_from_pipe_end - finish splicing from pipe
@@ -914,12 +855,11 @@
  *    be called after a loop containing splice_from_pipe_next() and
  *    splice_from_pipe_feed().
  */
-void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	if (sd->need_wakeup)
 		wakeup_pipe_writers(pipe);
 }
-EXPORT_SYMBOL(splice_from_pipe_end);
 
 /**
  * __splice_from_pipe - splice data from a pipe to given actor
@@ -985,7 +925,7 @@
 }
 
 /**
- * generic_file_splice_write - splice data from a pipe to a file
+ * iter_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
  * @out:	file to write to
  * @ppos:	position in @out
@@ -995,40 +935,122 @@
  * Description:
  *    Will either move or copy pages (determined by @flags options) from
  *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
  *
  */
 ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			  loff_t *ppos, size_t len, unsigned int flags)
 {
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
 	struct splice_desc sd = {
 		.total_len = len,
 		.flags = flags,
 		.pos = *ppos,
 		.u.file = out,
 	};
+	int nbufs = pipe->buffers;
+	struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
 	ssize_t ret;
 
+	if (unlikely(!array))
+		return -ENOMEM;
+
 	pipe_lock(pipe);
 
 	splice_from_pipe_begin(&sd);
-	do {
+	while (sd.total_len) {
+		struct iov_iter from;
+		struct kiocb kiocb;
+		size_t left;
+		int n, idx;
+
 		ret = splice_from_pipe_next(pipe, &sd);
 		if (ret <= 0)
 			break;
 
-		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-		ret = file_remove_suid(out);
-		if (!ret) {
-			ret = file_update_time(out);
-			if (!ret)
-				ret = splice_from_pipe_feed(pipe, &sd,
-							    pipe_to_file);
+		if (unlikely(nbufs < pipe->buffers)) {
+			kfree(array);
+			nbufs = pipe->buffers;
+			array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+			if (!array) {
+				ret = -ENOMEM;
+				break;
+			}
 		}
-		mutex_unlock(&inode->i_mutex);
-	} while (ret > 0);
+
+		/* build the vector */
+		left = sd.total_len;
+		for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+			struct pipe_buffer *buf = pipe->bufs + idx;
+			size_t this_len = buf->len;
+
+			if (this_len > left)
+				this_len = left;
+
+			if (idx == pipe->buffers - 1)
+				idx = -1;
+
+			ret = buf->ops->confirm(pipe, buf);
+			if (unlikely(ret)) {
+				if (ret == -ENODATA)
+					ret = 0;
+				goto done;
+			}
+
+			array[n].bv_page = buf->page;
+			array[n].bv_len = this_len;
+			array[n].bv_offset = buf->offset;
+			left -= this_len;
+		}
+
+		/* ... iov_iter */
+		from.type = ITER_BVEC | WRITE;
+		from.bvec = array;
+		from.nr_segs = n;
+		from.count = sd.total_len - left;
+		from.iov_offset = 0;
+
+		/* ... and iocb */
+		init_sync_kiocb(&kiocb, out);
+		kiocb.ki_pos = sd.pos;
+		kiocb.ki_nbytes = sd.total_len - left;
+
+		/* now, send it */
+		ret = out->f_op->write_iter(&kiocb, &from);
+		if (-EIOCBQUEUED == ret)
+			ret = wait_on_sync_kiocb(&kiocb);
+
+		if (ret <= 0)
+			break;
+
+		sd.num_spliced += ret;
+		sd.total_len -= ret;
+		*ppos = sd.pos = kiocb.ki_pos;
+
+		/* dismiss the fully eaten buffers, adjust the partial one */
+		while (ret) {
+			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+			if (ret >= buf->len) {
+				const struct pipe_buf_operations *ops = buf->ops;
+				ret -= buf->len;
+				buf->len = 0;
+				buf->ops = NULL;
+				ops->release(pipe, buf);
+				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+				pipe->nrbufs--;
+				if (pipe->files)
+					sd.need_wakeup = true;
+			} else {
+				buf->offset += ret;
+				buf->len -= ret;
+				ret = 0;
+			}
+		}
+	}
+done:
+	kfree(array);
 	splice_from_pipe_end(pipe, &sd);
 
 	pipe_unlock(pipe);
@@ -1036,21 +1058,10 @@
 	if (sd.num_spliced)
 		ret = sd.num_spliced;
 
-	if (ret > 0) {
-		int err;
-
-		err = generic_write_sync(out, *ppos, ret);
-		if (err)
-			ret = err;
-		else
-			*ppos += ret;
-		balance_dirty_pages_ratelimited(mapping);
-	}
-
 	return ret;
 }
 
-EXPORT_SYMBOL(generic_file_splice_write);
+EXPORT_SYMBOL(iter_file_splice_write);
 
 static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			  struct splice_desc *sd)
@@ -1549,7 +1560,7 @@
 		goto out;
 
 	count = ret;
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, count);
 
 	sd.len = 0;
 	sd.total_len = count;

diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 9d4dc68..b00811c 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c

@@ -21,10 +21,10 @@
  */
 const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index e8e01d7..eb997e9 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c

@@ -437,7 +437,6 @@
  */
 int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
 {
-	int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
 	int err, idx_growth, data_growth, dd_growth, retried = 0;
 
 	ubifs_assert(req->new_page <= 1);

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 5157b86..177b015 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c

@@ -745,8 +745,10 @@
 
 	for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
 		err = ubifs_read_one_lp(c, lnum, &lp);
-		if (err)
+		if (err) {
 			ubifs_err("cannot read lprops for LEB %d", lnum);
+			continue;
+		}
 
 		ubifs_dump_lprop(c, &lp);
 	}

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4f34dba..b5b593c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c

@@ -903,8 +903,9 @@
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
 #ifdef UBIFS_DEBUG
+	struct ubifs_inode *ui = ubifs_inode(inode);
 	spin_lock(&ui->ui_lock);
-	ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
+	ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT);
 	spin_unlock(&ui->ui_lock);
 #endif
 
@@ -1363,17 +1364,17 @@
 
 /**
  * update_ctime - update mtime and ctime of an inode.
- * @c: UBIFS file-system description object
  * @inode: inode to update
  *
  * This function updates mtime and ctime of the inode if it is not equivalent to
  * current time. Returns zero in case of success and a negative error code in
  * case of failure.
  */
-static int update_mctime(struct ubifs_info *c, struct inode *inode)
+static int update_mctime(struct inode *inode)
 {
 	struct timespec now = ubifs_current_time(inode);
 	struct ubifs_inode *ui = ubifs_inode(inode);
+	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
 	if (mctime_update_needed(inode, &now)) {
 		int err, release;
@@ -1396,18 +1397,13 @@
 	return 0;
 }
 
-static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos)
+static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-	int err;
-	struct inode *inode = iocb->ki_filp->f_mapping->host;
-	struct ubifs_info *c = inode->i_sb->s_fs_info;
-
-	err = update_mctime(c, inode);
+	int err = update_mctime(file_inode(iocb->ki_filp));
 	if (err)
 		return err;
 
-	return generic_file_aio_write(iocb, iov, nr_segs, pos);
+	return generic_file_write_iter(iocb, from);
 }
 
 static int ubifs_set_page_dirty(struct page *page)
@@ -1525,8 +1521,7 @@
 	}
 
 	wait_for_stable_page(page);
-	unlock_page(page);
-	return 0;
+	return VM_FAULT_LOCKED;
 
 out_unlock:
 	unlock_page(page);
@@ -1582,15 +1577,15 @@
 
 const struct file_operations ubifs_file_operations = {
 	.llseek         = generic_file_llseek,
-	.read           = do_sync_read,
-	.write          = do_sync_write,
-	.aio_read       = generic_file_aio_read,
-	.aio_write      = ubifs_aio_write,
+	.read           = new_sync_read,
+	.write          = new_sync_write,
+	.read_iter      = generic_file_read_iter,
+	.write_iter     = ubifs_write_iter,
 	.mmap           = ubifs_file_mmap,
 	.fsync          = ubifs_fsync,
 	.unlocked_ioctl = ubifs_ioctl,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ubifs_compat_ioctl,
 #endif

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index e18b988..2290d58 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c

@@ -988,30 +988,32 @@
 		return err;
 
 	if (type != ch->node_type) {
-		ubifs_err("bad node type (%d but expected %d)",
-			  ch->node_type, type);
+		ubifs_errc(c, "bad node type (%d but expected %d)",
+			   ch->node_type, type);
 		goto out;
 	}
 
 	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 	if (err) {
-		ubifs_err("expected node type %d", type);
+		ubifs_errc(c, "expected node type %d", type);
 		return err;
 	}
 
 	l = le32_to_cpu(ch->len);
 	if (l != len) {
-		ubifs_err("bad node length %d, expected %d", l, len);
+		ubifs_errc(c, "bad node length %d, expected %d", l, len);
 		goto out;
 	}
 
 	return 0;
 
 out:
-	ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
-		  ubi_is_mapped(c->ubi, lnum));
-	ubifs_dump_node(c, buf);
-	dump_stack();
+	ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum,
+		   offs, ubi_is_mapped(c->ubi, lnum));
+	if (!c->probing) {
+		ubifs_dump_node(c, buf);
+		dump_stack();
+	}
 	return -EINVAL;
 }
 

diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index f35135e..9a9fb94 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c

@@ -128,7 +128,6 @@
 			freed = ubifs_destroy_tnc_subtree(znode);
 			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
 			atomic_long_sub(freed, &c->clean_zn_cnt);
-			ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
 			total_freed += freed;
 			znode = zprev;
 		}

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a81c7b5..3904c85 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c

@@ -1149,6 +1149,9 @@
 	size_t sz;
 
 	c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
+	/* Suppress error messages while probing if MS_SILENT is set */
+	c->probing = !!(c->vfs_sb->s_flags & MS_SILENT);
+
 	err = init_constants_early(c);
 	if (err)
 		return err;
@@ -1214,6 +1217,8 @@
 	if (err)
 		goto out_free;
 
+	c->probing = 0;
+
 	/*
 	 * Make sure the compressor which is set as default in the superblock
 	 * or overridden by mount options is actually compiled in.

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 9083bc7..8a40cf9 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c

@@ -2859,10 +2859,11 @@
 {
 	tnc_destroy_cnext(c);
 	if (c->zroot.znode) {
-		long n;
+		long n, freed;
 
-		ubifs_destroy_tnc_subtree(c->zroot.znode);
 		n = atomic_long_read(&c->clean_zn_cnt);
+		freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
+		ubifs_assert(freed == n);
 		atomic_long_sub(n, &ubifs_clean_zn_cnt);
 	}
 	kfree(c->gap_lebs);

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e8c8cfe..c1f71fe 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h

@@ -51,6 +51,15 @@
 #define ubifs_warn(fmt, ...)                                        \
 	pr_warn("UBIFS warning (pid %d): %s: " fmt "\n",            \
 		current->pid, __func__, ##__VA_ARGS__)
+/*
+ * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
+ * object as an argument.
+ */
+#define ubifs_errc(c, fmt, ...)                                     \
+	do {                                                        \
+		if (!(c)->probing)                                  \
+			ubifs_err(fmt, ##__VA_ARGS__);              \
+	} while (0)
 
 /* UBIFS file system VFS magic number */
 #define UBIFS_SUPER_MAGIC 0x24051905
@@ -1209,6 +1218,7 @@
  * @need_recovery: %1 if the file-system needs recovery
  * @replaying: %1 during journal replay
  * @mounting: %1 while mounting
+ * @probing: %1 while attempting to mount if MS_SILENT mount flag is set
  * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
  * @replay_list: temporary list used during journal replay
  * @replay_buds: list of buds to replay
@@ -1441,6 +1451,7 @@
 	unsigned int replaying:1;
 	unsigned int mounting:1;
 	unsigned int remounting_rw:1;
+	unsigned int probing:1;
 	struct list_head replay_list;
 	struct list_head replay_buds;
 	unsigned long long cs_sqnum;

diff --git a/fs/udf/file.c b/fs/udf/file.c
index d2c170f..d80738f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c

@@ -119,8 +119,8 @@
 }
 
 static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
-				     const struct iovec *iov,
-				     loff_t offset, unsigned long nr_segs)
+				     struct iov_iter *iter,
+				     loff_t offset)
 {
 	/* Fallback to buffered I/O. */
 	return 0;
@@ -134,8 +134,7 @@
 	.direct_IO	= udf_adinicb_direct_IO,
 };
 
-static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				  unsigned long nr_segs, loff_t ppos)
+static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
@@ -150,7 +149,7 @@
 		if (file->f_flags & O_APPEND)
 			pos = inode->i_size;
 		else
-			pos = ppos;
+			pos = iocb->ki_pos;
 
 		if (inode->i_sb->s_blocksize <
 				(udf_file_entry_alloc_offset(inode) +
@@ -171,7 +170,7 @@
 	} else
 		up_write(&iinfo->i_data_sem);
 
-	retval = __generic_file_aio_write(iocb, iov, nr_segs);
+	retval = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (retval > 0) {
@@ -252,13 +251,13 @@
 }
 
 const struct file_operations udf_file_operations = {
-	.read			= do_sync_read,
-	.aio_read		= generic_file_aio_read,
+	.read			= new_sync_read,
+	.read_iter		= generic_file_read_iter,
 	.unlocked_ioctl		= udf_ioctl,
 	.open			= generic_file_open,
 	.mmap			= generic_file_mmap,
-	.write			= do_sync_write,
-	.aio_write		= udf_file_aio_write,
+	.write			= new_sync_write,
+	.write_iter		= udf_file_write_iter,
 	.release		= udf_release_file,
 	.fsync			= generic_file_fsync,
 	.splice_read		= generic_file_splice_read,

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5d64370..236cd48 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c

@@ -217,18 +217,18 @@
 }
 
 static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
+	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  udf_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
 	if (unlikely(ret < 0 && (rw & WRITE)))
-		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		udf_write_failed(mapping, offset + count);
 	return ret;
 }
 

diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 33afa20..c84ec01 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c

@@ -35,10 +35,10 @@
  
 const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
 	.fsync		= generic_file_fsync,

diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 0fdd410..6e247a9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h

@@ -160,30 +160,38 @@
 	 * still being referenced.
 	 */
 	__be32		agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
-
+	/*
+	 * This marks the end of logging region 1 and start of logging region 2.
+	 */
 	uuid_t		agi_uuid;	/* uuid of filesystem */
 	__be32		agi_crc;	/* crc of agi sector */
 	__be32		agi_pad32;
 	__be64		agi_lsn;	/* last write sequence */
 
+	__be32		agi_free_root; /* root of the free inode btree */
+	__be32		agi_free_level;/* levels in free inode btree */
+
 	/* structure must be padded to 64 bit alignment */
 } xfs_agi_t;
 
 #define XFS_AGI_CRC_OFF		offsetof(struct xfs_agi, agi_crc)
 
-#define	XFS_AGI_MAGICNUM	0x00000001
-#define	XFS_AGI_VERSIONNUM	0x00000002
-#define	XFS_AGI_SEQNO		0x00000004
-#define	XFS_AGI_LENGTH		0x00000008
-#define	XFS_AGI_COUNT		0x00000010
-#define	XFS_AGI_ROOT		0x00000020
-#define	XFS_AGI_LEVEL		0x00000040
-#define	XFS_AGI_FREECOUNT	0x00000080
-#define	XFS_AGI_NEWINO		0x00000100
-#define	XFS_AGI_DIRINO		0x00000200
-#define	XFS_AGI_UNLINKED	0x00000400
-#define	XFS_AGI_NUM_BITS	11
-#define	XFS_AGI_ALL_BITS	((1 << XFS_AGI_NUM_BITS) - 1)
+#define	XFS_AGI_MAGICNUM	(1 << 0)
+#define	XFS_AGI_VERSIONNUM	(1 << 1)
+#define	XFS_AGI_SEQNO		(1 << 2)
+#define	XFS_AGI_LENGTH		(1 << 3)
+#define	XFS_AGI_COUNT		(1 << 4)
+#define	XFS_AGI_ROOT		(1 << 5)
+#define	XFS_AGI_LEVEL		(1 << 6)
+#define	XFS_AGI_FREECOUNT	(1 << 7)
+#define	XFS_AGI_NEWINO		(1 << 8)
+#define	XFS_AGI_DIRINO		(1 << 9)
+#define	XFS_AGI_UNLINKED	(1 << 10)
+#define	XFS_AGI_NUM_BITS_R1	11	/* end of the 1st agi logging region */
+#define	XFS_AGI_ALL_BITS_R1	((1 << XFS_AGI_NUM_BITS_R1) - 1)
+#define	XFS_AGI_FREE_ROOT	(1 << 11)
+#define	XFS_AGI_FREE_LEVEL	(1 << 12)
+#define	XFS_AGI_NUM_BITS_R2	13
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGI_DADDR(mp)	((xfs_daddr_t)(2 << (mp)->m_sectbb_log))

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index c1cf6a3..d438132 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c

@@ -257,16 +257,14 @@
 	k = rlen % args->prod;
 	if (k == args->mod)
 		return;
-	if (k > args->mod) {
-		if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
-			return;
-	} else {
-		if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
-		    (int)args->minlen)
-			return;
-	}
-	ASSERT(rlen >= args->minlen);
-	ASSERT(rlen <= args->maxlen);
+	if (k > args->mod)
+		rlen = rlen - (k - args->mod);
+	else
+		rlen = rlen - args->prod + (args->mod - k);
+	if ((int)rlen < (int)args->minlen)
+		return;
+	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
+	ASSERT(rlen % args->prod == args->mod);
 	args->len = rlen;
 }
 
@@ -541,7 +539,6 @@
 			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 	if (error)
 		return error;
-	ASSERT(!xfs_buf_geterror(bp));
 	xfs_buf_set_ref(bp, XFS_AGFL_REF);
 	*bpp = bp;
 	return 0;

diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index cc1eadc..8358f1d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c

@@ -70,7 +70,6 @@
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	int			error;

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32..faaf716 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -975,14 +975,39 @@
 	 * Given that we do not allow direct reclaim to call us, we should
 	 * never be called while in a filesystem transaction.
 	 */
-	if (WARN_ON(current->flags & PF_FSTRANS))
+	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
 		goto redirty;
 
 	/* Is this page beyond the end of the file? */
 	offset = i_size_read(inode);
 	end_index = offset >> PAGE_CACHE_SHIFT;
 	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index >= end_index) {
+
+	/*
+	 * The page index is less than the end_index, adjust the end_offset
+	 * to the highest offset that this page should represent.
+	 * -----------------------------------------------------
+	 * |			file mapping	       | <EOF> |
+	 * -----------------------------------------------------
+	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
+	 * ^--------------------------------^----------|--------
+	 * |     desired writeback range    |      see else    |
+	 * ---------------------------------^------------------|
+	 */
+	if (page->index < end_index)
+		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+	else {
+		/*
+		 * Check whether the page to write out is beyond or straddles
+		 * i_size or not.
+		 * -------------------------------------------------------
+		 * |		file mapping		        | <EOF>  |
+		 * -------------------------------------------------------
+		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
+		 * ^--------------------------------^-----------|---------
+		 * |				    |      Straddles     |
+		 * ---------------------------------^-----------|--------|
+		 */
 		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
 
 		/*
@@ -990,24 +1015,36 @@
 		 * truncate operation that is in progress. We must redirty the
 		 * page so that reclaim stops reclaiming it. Otherwise
 		 * xfs_vm_releasepage() is called on it and gets confused.
+		 *
+		 * Note that the end_index is unsigned long, it would overflow
+		 * if the given offset is greater than 16TB on 32-bit system
+		 * and if we do check the page is fully outside i_size or not
+		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
+		 * will be evaluated to 0.  Hence this page will be redirtied
+		 * and be written out repeatedly which would result in an
+		 * infinite loop, the user program that perform this operation
+		 * will hang.  Instead, we can verify this situation by checking
+		 * if the page to write is totally beyond the i_size or if it's
+		 * offset is just equal to the EOF.
 		 */
-		if (page->index >= end_index + 1 || offset_into_page == 0)
+		if (page->index > end_index ||
+		    (page->index == end_index && offset_into_page == 0))
 			goto redirty;
 
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
 		 * and every writepage invocation because it may be mmapped.
 		 * "A file is mapped in multiples of the page size.  For a file
-		 * that is not a multiple of the  page size, the remaining
+		 * that is not a multiple of the page size, the remaining
 		 * memory is zeroed when mapped, and writes to that region are
 		 * not written out to the file."
 		 */
 		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+		/* Adjust the end_offset to the end of file */
+		end_offset = offset;
 	}
 
-	end_offset = min_t(unsigned long long,
-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-			offset);
 	len = 1 << inode->i_blkbits;
 
 	bh = head = page_buffers(page);
@@ -1188,9 +1225,9 @@
 
 	xfs_count_page_state(page, &delalloc, &unwritten);
 
-	if (WARN_ON(delalloc))
+	if (WARN_ON_ONCE(delalloc))
 		return 0;
-	if (WARN_ON(unwritten))
+	if (WARN_ON_ONCE(unwritten))
 		return 0;
 
 	return try_to_free_buffers(page);
@@ -1449,9 +1486,8 @@
 xfs_vm_direct_IO(
 	int			rw,
 	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	loff_t			offset,
-	unsigned long		nr_segs)
+	struct iov_iter		*iter,
+	loff_t			offset)
 {
 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
@@ -1459,7 +1495,7 @@
 	ssize_t			ret;
 
 	if (rw & WRITE) {
-		size_t size = iov_length(iov, nr_segs);
+		size_t size = iov_iter_count(iter);
 
 		/*
 		 * We cannot preallocate a size update transaction here as we
@@ -1471,17 +1507,15 @@
 		if (offset + size > XFS_I(inode)->i_d.di_size)
 			ioend->io_isdirect = 1;
 
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			goto out_destroy_ioend;
 	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    NULL, NULL, 0);
 	}
 

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index abda112..bfe36fc 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c

@@ -77,17 +77,27 @@
 
 
 STATIC int
-xfs_attr_name_to_xname(
-	struct xfs_name	*xname,
-	const unsigned char *aname)
+xfs_attr_args_init(
+	struct xfs_da_args	*args,
+	struct xfs_inode	*dp,
+	const unsigned char	*name,
+	int			flags)
 {
-	if (!aname)
+
+	if (!name)
 		return EINVAL;
-	xname->name = aname;
-	xname->len = strlen((char *)aname);
-	if (xname->len >= MAXNAMELEN)
+
+	memset(args, 0, sizeof(*args));
+	args->geo = dp->i_mount->m_attr_geo;
+	args->whichfork = XFS_ATTR_FORK;
+	args->dp = dp;
+	args->flags = flags;
+	args->name = name;
+	args->namelen = strlen((const char *)name);
+	if (args->namelen >= MAXNAMELEN)
 		return EFAULT;		/* match IRIX behaviour */
 
+	args->hashval = xfs_da_hashname(args->name, args->namelen);
 	return 0;
 }
 
@@ -106,79 +116,46 @@
  * Overall external interface routines.
  *========================================================================*/
 
-STATIC int
-xfs_attr_get_int(
+int
+xfs_attr_get(
 	struct xfs_inode	*ip,
-	struct xfs_name		*name,
+	const unsigned char	*name,
 	unsigned char		*value,
 	int			*valuelenp,
 	int			flags)
 {
-	xfs_da_args_t   args;
-	int             error;
-
-	if (!xfs_inode_hasattr(ip))
-		return ENOATTR;
-
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
-	args.value = value;
-	args.valuelen = *valuelenp;
-	args.flags = flags;
-	args.hashval = xfs_da_hashname(args.name, args.namelen);
-	args.dp = ip;
-	args.whichfork = XFS_ATTR_FORK;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		error = xfs_attr_shortform_getvalue(&args);
-	} else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
-		error = xfs_attr_leaf_get(&args);
-	} else {
-		error = xfs_attr_node_get(&args);
-	}
-
-	/*
-	 * Return the number of bytes in the value to the caller.
-	 */
-	*valuelenp = args.valuelen;
-
-	if (error == EEXIST)
-		error = 0;
-	return(error);
-}
-
-int
-xfs_attr_get(
-	xfs_inode_t	*ip,
-	const unsigned char *name,
-	unsigned char	*value,
-	int		*valuelenp,
-	int		flags)
-{
-	int		error;
-	struct xfs_name	xname;
-	uint		lock_mode;
+	struct xfs_da_args	args;
+	uint			lock_mode;
+	int			error;
 
 	XFS_STATS_INC(xs_attr_get);
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return(EIO);
+		return EIO;
 
-	error = xfs_attr_name_to_xname(&xname, name);
+	if (!xfs_inode_hasattr(ip))
+		return ENOATTR;
+
+	error = xfs_attr_args_init(&args, ip, name, flags);
 	if (error)
 		return error;
 
+	args.value = value;
+	args.valuelen = *valuelenp;
+
 	lock_mode = xfs_ilock_attr_map_shared(ip);
-	error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
+	if (!xfs_inode_hasattr(ip))
+		error = ENOATTR;
+	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+		error = xfs_attr_shortform_getvalue(&args);
+	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+		error = xfs_attr_leaf_get(&args);
+	else
+		error = xfs_attr_node_get(&args);
 	xfs_iunlock(ip, lock_mode);
-	return(error);
+
+	*valuelenp = args.valuelen;
+	return error == EEXIST ? 0 : error;
 }
 
 /*
@@ -186,12 +163,10 @@
  */
 STATIC int
 xfs_attr_calc_size(
-	struct xfs_inode 	*ip,
-	int			namelen,
-	int			valuelen,
+	struct xfs_da_args	*args,
 	int			*local)
 {
-	struct xfs_mount 	*mp = ip->i_mount;
+	struct xfs_mount	*mp = args->dp->i_mount;
 	int			size;
 	int			nblks;
 
@@ -199,12 +174,10 @@
 	 * Determine space new attribute will use, and if it would be
 	 * "local" or "remote" (note: local != inline).
 	 */
-	size = xfs_attr_leaf_newentsize(namelen, valuelen,
-					mp->m_sb.sb_blocksize, local);
-
+	size = xfs_attr_leaf_newentsize(args, local);
 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
 	if (*local) {
-		if (size > (mp->m_sb.sb_blocksize >> 1)) {
+		if (size > (args->geo->blksize / 2)) {
 			/* Double split possible */
 			nblks *= 2;
 		}
@@ -213,7 +186,7 @@
 		 * Out of line attribute, cannot double split, but
 		 * make room for the attribute value itself.
 		 */
-		uint	dblocks = xfs_attr3_rmt_blocks(mp, valuelen);
+		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
 		nblks += dblocks;
 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
 	}
@@ -221,26 +194,38 @@
 	return nblks;
 }
 
-STATIC int
-xfs_attr_set_int(
-	struct xfs_inode *dp,
-	struct xfs_name	*name,
-	unsigned char	*value,
-	int		valuelen,
-	int		flags)
+int
+xfs_attr_set(
+	struct xfs_inode	*dp,
+	const unsigned char	*name,
+	unsigned char		*value,
+	int			valuelen,
+	int			flags)
 {
-	xfs_da_args_t		args;
-	xfs_fsblock_t		firstblock;
-	xfs_bmap_free_t		flist;
-	int			error, err2, committed;
 	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_da_args	args;
+	struct xfs_bmap_free	flist;
 	struct xfs_trans_res	tres;
+	xfs_fsblock_t		firstblock;
 	int			rsvd = (flags & ATTR_ROOT) != 0;
-	int			local;
+	int			error, err2, committed, local;
 
-	/*
-	 * Attach the dquots to the inode.
-	 */
+	XFS_STATS_INC(xs_attr_set);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return EIO;
+
+	error = xfs_attr_args_init(&args, dp, name, flags);
+	if (error)
+		return error;
+
+	args.value = value;
+	args.valuelen = valuelen;
+	args.firstblock = &firstblock;
+	args.flist = &flist;
+	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+	args.total = xfs_attr_calc_size(&args, &local);
+
 	error = xfs_qm_dqattach(dp, 0);
 	if (error)
 		return error;
@@ -251,32 +236,14 @@
 	 */
 	if (XFS_IFORK_Q(dp) == 0) {
 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
-			      XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
+			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
 
-		if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
-			return(error);
+		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
+		if (error)
+			return error;
 	}
 
 	/*
-	 * Fill in the arg structure for this request.
-	 */
-	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
-	args.value = value;
-	args.valuelen = valuelen;
-	args.flags = flags;
-	args.hashval = xfs_da_hashname(args.name, args.namelen);
-	args.dp = dp;
-	args.firstblock = &firstblock;
-	args.flist = &flist;
-	args.whichfork = XFS_ATTR_FORK;
-	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
-	/* Size is now blocks for attribute data */
-	args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);
-
-	/*
 	 * Start our first transaction of the day.
 	 *
 	 * All future transactions during this code must be "chained" off
@@ -303,7 +270,7 @@
 	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
 	if (error) {
 		xfs_trans_cancel(args.trans, 0);
-		return(error);
+		return error;
 	}
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 
@@ -313,7 +280,7 @@
 	if (error) {
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
 		xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
-		return (error);
+		return error;
 	}
 
 	xfs_trans_ijoin(args.trans, dp, 0);
@@ -322,9 +289,9 @@
 	 * If the attribute list is non-existent or a shortform list,
 	 * upgrade it to a single-leaf-block attribute list.
 	 */
-	if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
-	    ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
-	     (dp->i_d.di_anextents == 0))) {
+	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
+	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+	     dp->i_d.di_anextents == 0)) {
 
 		/*
 		 * Build initial attribute list (if required).
@@ -349,9 +316,8 @@
 			 * the transaction goes to disk before returning
 			 * to the user.
 			 */
-			if (mp->m_flags & XFS_MOUNT_WSYNC) {
+			if (mp->m_flags & XFS_MOUNT_WSYNC)
 				xfs_trans_set_sync(args.trans);
-			}
 
 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
 				xfs_trans_ichgtime(args.trans, dp,
@@ -361,7 +327,7 @@
 						 XFS_TRANS_RELEASE_LOG_RES);
 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-			return(error == 0 ? err2 : error);
+			return error ? error : err2;
 		}
 
 		/*
@@ -399,22 +365,19 @@
 
 	}
 
-	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
 		error = xfs_attr_leaf_addname(&args);
-	} else {
+	else
 		error = xfs_attr_node_addname(&args);
-	}
-	if (error) {
+	if (error)
 		goto out;
-	}
 
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC) {
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
 		xfs_trans_set_sync(args.trans);
-	}
 
 	if ((flags & ATTR_KERNOTIME) == 0)
 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
@@ -426,65 +389,47 @@
 	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-	return(error);
+	return error;
 
 out:
-	if (args.trans)
+	if (args.trans) {
 		xfs_trans_cancel(args.trans,
 			XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+	}
 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
-	return(error);
-}
-
-int
-xfs_attr_set(
-	xfs_inode_t	*dp,
-	const unsigned char *name,
-	unsigned char	*value,
-	int		valuelen,
-	int		flags)
-{
-	int             error;
-	struct xfs_name	xname;
-
-	XFS_STATS_INC(xs_attr_set);
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return (EIO);
-
-	error = xfs_attr_name_to_xname(&xname, name);
-	if (error)
-		return error;
-
-	return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
+	return error;
 }
 
 /*
  * Generic handler routine to remove a name from an attribute list.
  * Transitions attribute list from Btree to shortform as necessary.
  */
-STATIC int
-xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
+int
+xfs_attr_remove(
+	struct xfs_inode	*dp,
+	const unsigned char	*name,
+	int			flags)
 {
-	xfs_da_args_t	args;
-	xfs_fsblock_t	firstblock;
-	xfs_bmap_free_t	flist;
-	int		error;
-	xfs_mount_t	*mp = dp->i_mount;
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_da_args	args;
+	struct xfs_bmap_free	flist;
+	xfs_fsblock_t		firstblock;
+	int			error;
 
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
-	args.flags = flags;
-	args.hashval = xfs_da_hashname(args.name, args.namelen);
-	args.dp = dp;
+	XFS_STATS_INC(xs_attr_remove);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return EIO;
+
+	if (!xfs_inode_hasattr(dp))
+		return ENOATTR;
+
+	error = xfs_attr_args_init(&args, dp, name, flags);
+	if (error)
+		return error;
+
 	args.firstblock = &firstblock;
 	args.flist = &flist;
-	args.total = 0;
-	args.whichfork = XFS_ATTR_FORK;
 
 	/*
 	 * we have no control over the attribute names that userspace passes us
@@ -493,9 +438,6 @@
 	 */
 	args.op_flags = XFS_DA_OP_OKNOENT;
 
-	/*
-	 * Attach the dquots to the inode.
-	 */
 	error = xfs_qm_dqattach(dp, 0);
 	if (error)
 		return error;
@@ -524,7 +466,7 @@
 				  XFS_ATTRRM_SPACE_RES(mp), 0);
 	if (error) {
 		xfs_trans_cancel(args.trans, 0);
-		return(error);
+		return error;
 	}
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -534,35 +476,26 @@
 	 */
 	xfs_trans_ijoin(args.trans, dp, 0);
 
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
 	if (!xfs_inode_hasattr(dp)) {
 		error = XFS_ERROR(ENOATTR);
-		goto out;
-	}
-	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
 		error = xfs_attr_shortform_remove(&args);
-		if (error) {
-			goto out;
-		}
 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
 		error = xfs_attr_leaf_removename(&args);
 	} else {
 		error = xfs_attr_node_removename(&args);
 	}
-	if (error) {
+
+	if (error)
 		goto out;
-	}
 
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC) {
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
 		xfs_trans_set_sync(args.trans);
-	}
 
 	if ((flags & ATTR_KERNOTIME) == 0)
 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
@@ -574,45 +507,17 @@
 	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-	return(error);
+	return error;
 
 out:
-	if (args.trans)
+	if (args.trans) {
 		xfs_trans_cancel(args.trans,
 			XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-	xfs_iunlock(dp, XFS_ILOCK_EXCL);
-	return(error);
-}
-
-int
-xfs_attr_remove(
-	xfs_inode_t	*dp,
-	const unsigned char *name,
-	int		flags)
-{
-	int		error;
-	struct xfs_name	xname;
-
-	XFS_STATS_INC(xs_attr_remove);
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return (EIO);
-
-	error = xfs_attr_name_to_xname(&xname, name);
-	if (error)
-		return error;
-
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!xfs_inode_hasattr(dp)) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return XFS_ERROR(ENOATTR);
 	}
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
-	return xfs_attr_remove_int(dp, &xname, flags);
+	xfs_iunlock(dp, XFS_ILOCK_EXCL);
+	return error;
 }
 
-
 /*========================================================================
  * External routines when attribute list is inside the inode
  *========================================================================*/
@@ -958,7 +863,7 @@
 }
 
 /*========================================================================
- * External routines when attribute list size > XFS_LBSIZE(mp).
+ * External routines when attribute list size > geo->blksize
  *========================================================================*/
 
 /*
@@ -991,8 +896,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = mp;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_attr_node_ents;
 
 	/*
 	 * Search to see if name already exists, and get back a pointer
@@ -1170,8 +1073,6 @@
 		state = xfs_da_state_alloc();
 		state->args = args;
 		state->mp = mp;
-		state->blocksize = state->mp->m_sb.sb_blocksize;
-		state->node_ents = state->mp->m_attr_node_ents;
 		state->inleaf = 0;
 		error = xfs_da3_node_lookup_int(state, &retval);
 		if (error)
@@ -1262,8 +1163,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_attr_node_ents;
 
 	/*
 	 * Search to see if name exists, and get back a pointer to it.
@@ -1525,8 +1424,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_attr_node_ents;
 
 	/*
 	 * Search to see if name exists, and get back a pointer to it.

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 511c283..28712d2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c

@@ -80,11 +80,12 @@
 /*
  * Utility routines.
  */
-STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf,
+STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
+			struct xfs_attr_leafblock *src_leaf,
 			struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
 			struct xfs_attr_leafblock *dst_leaf,
 			struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
-			int move_count, struct xfs_mount *mp);
+			int move_count);
 STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
 
 void
@@ -711,6 +712,7 @@
 
 	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
+	nargs.geo = args->geo;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
 	nargs.total = args->total;
@@ -805,18 +807,18 @@
 
 	trace_xfs_attr_leaf_to_sf(args);
 
-	tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
+	tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
 	if (!tmpbuffer)
 		return ENOMEM;
 
-	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
+	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
 
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
 	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
 	entry = xfs_attr3_leaf_entryp(leaf);
 
 	/* XXX (dgc): buffer is about to be marked stale - why zero it? */
-	memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
+	memset(bp->b_addr, 0, args->geo->blksize);
 
 	/*
 	 * Clean out the prior contents of the attribute list.
@@ -838,6 +840,7 @@
 	 * Copy the attributes
 	 */
 	memset((char *)&nargs, 0, sizeof(nargs));
+	nargs.geo = args->geo;
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
 	nargs.flist = args->flist;
@@ -904,12 +907,12 @@
 	/* copy leaf to new buffer, update identifiers */
 	xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
 	bp2->b_ops = bp1->b_ops;
-	memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp));
+	memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
 		hdr3->blkno = cpu_to_be64(bp2->b_bn);
 	}
-	xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1);
+	xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
 
 	/*
 	 * Set up the new root node.
@@ -930,7 +933,7 @@
 	btree[0].before = cpu_to_be32(blkno);
 	icnodehdr.count = 1;
 	dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
-	xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1);
+	xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);
 	error = 0;
 out:
 	return error;
@@ -966,10 +969,10 @@
 	bp->b_ops = &xfs_attr3_leaf_buf_ops;
 	xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
 	leaf = bp->b_addr;
-	memset(leaf, 0, XFS_LBSIZE(mp));
+	memset(leaf, 0, args->geo->blksize);
 
 	memset(&ichdr, 0, sizeof(ichdr));
-	ichdr.firstused = XFS_LBSIZE(mp);
+	ichdr.firstused = args->geo->blksize;
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
@@ -988,7 +991,7 @@
 	ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
 
 	xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
-	xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1);
+	xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
 
 	*bpp = bp;
 	return 0;
@@ -1074,8 +1077,7 @@
 	leaf = bp->b_addr;
 	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
 	ASSERT(args->index >= 0 && args->index <= ichdr.count);
-	entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
-			   args->trans->t_mountp->m_sb.sb_blocksize, NULL);
+	entsize = xfs_attr_leaf_newentsize(args, NULL);
 
 	/*
 	 * Search through freemap for first-fit on new name length.
@@ -1174,17 +1176,14 @@
 	 * Allocate space for the new string (at the end of the run).
 	 */
 	mp = args->trans->t_mountp;
-	ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp));
+	ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);
 	ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
 	ASSERT(ichdr->freemap[mapindex].size >=
-		xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
-					 mp->m_sb.sb_blocksize, NULL));
-	ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp));
+		xfs_attr_leaf_newentsize(args, NULL));
+	ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);
 	ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
 
-	ichdr->freemap[mapindex].size -=
-			xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
-						 mp->m_sb.sb_blocksize, &tmp);
+	ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp);
 
 	entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
 				     ichdr->freemap[mapindex].size);
@@ -1269,14 +1268,13 @@
 	struct xfs_attr_leafblock *leaf_dst;
 	struct xfs_attr3_icleaf_hdr ichdr_src;
 	struct xfs_trans	*trans = args->trans;
-	struct xfs_mount	*mp = trans->t_mountp;
 	char			*tmpbuffer;
 
 	trace_xfs_attr_leaf_compact(args);
 
-	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
-	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
-	memset(bp->b_addr, 0, XFS_LBSIZE(mp));
+	tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
+	memset(bp->b_addr, 0, args->geo->blksize);
 	leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
 	leaf_dst = bp->b_addr;
 
@@ -1289,7 +1287,7 @@
 
 	/* Initialise the incore headers */
 	ichdr_src = *ichdr_dst;	/* struct copy */
-	ichdr_dst->firstused = XFS_LBSIZE(mp);
+	ichdr_dst->firstused = args->geo->blksize;
 	ichdr_dst->usedbytes = 0;
 	ichdr_dst->count = 0;
 	ichdr_dst->holes = 0;
@@ -1304,13 +1302,13 @@
 	 * Copy all entry's in the same (sorted) order,
 	 * but allocate name/value pairs packed and in sequence.
 	 */
-	xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
-				ichdr_src.count, mp);
+	xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0,
+				leaf_dst, ichdr_dst, 0, ichdr_src.count);
 	/*
 	 * this logs the entire buffer, but the caller must write the header
 	 * back to the buffer when it is finished modifying it.
 	 */
-	xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+	xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
 
 	kmem_free(tmpbuffer);
 }
@@ -1461,8 +1459,8 @@
 		/*
 		 * Move high entries from leaf1 to low end of leaf2.
 		 */
-		xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count,
-				leaf2, &ichdr2, 0, count, state->mp);
+		xfs_attr3_leaf_moveents(args, leaf1, &ichdr1,
+				ichdr1.count - count, leaf2, &ichdr2, 0, count);
 
 	} else if (count > ichdr1.count) {
 		/*
@@ -1490,14 +1488,14 @@
 		/*
 		 * Move low entries from leaf2 to high end of leaf1.
 		 */
-		xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1,
-					ichdr1.count, count, state->mp);
+		xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1,
+					ichdr1.count, count);
 	}
 
 	xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
 	xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
-	xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
-	xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+	xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
+	xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
 
 	/*
 	 * Copy out last hashval in each block for B-tree code.
@@ -1592,11 +1590,9 @@
 	max = ichdr1->count + ichdr2->count;
 	half = (max + 1) * sizeof(*entry);
 	half += ichdr1->usedbytes + ichdr2->usedbytes +
-			xfs_attr_leaf_newentsize(state->args->namelen,
-						 state->args->valuelen,
-						 state->blocksize, NULL);
+			xfs_attr_leaf_newentsize(state->args, NULL);
 	half /= 2;
-	lastdelta = state->blocksize;
+	lastdelta = state->args->geo->blksize;
 	entry = xfs_attr3_leaf_entryp(leaf1);
 	for (count = index = 0; count < max; entry++, index++, count++) {
 
@@ -1606,10 +1602,7 @@
 		 */
 		if (count == blk1->index) {
 			tmp = totallen + sizeof(*entry) +
-				xfs_attr_leaf_newentsize(
-						state->args->namelen,
-						state->args->valuelen,
-						state->blocksize, NULL);
+				xfs_attr_leaf_newentsize(state->args, NULL);
 			if (XFS_ATTR_ABS(half - tmp) > lastdelta)
 				break;
 			lastdelta = XFS_ATTR_ABS(half - tmp);
@@ -1645,10 +1638,7 @@
 	totallen -= count * sizeof(*entry);
 	if (foundit) {
 		totallen -= sizeof(*entry) +
-				xfs_attr_leaf_newentsize(
-						state->args->namelen,
-						state->args->valuelen,
-						state->blocksize, NULL);
+				xfs_attr_leaf_newentsize(state->args, NULL);
 	}
 
 	*countarg = count;
@@ -1700,7 +1690,7 @@
 	bytes = xfs_attr3_leaf_hdr_size(leaf) +
 		ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
 		ichdr.usedbytes;
-	if (bytes > (state->blocksize >> 1)) {
+	if (bytes > (state->args->geo->blksize >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);
 	}
@@ -1754,7 +1744,8 @@
 
 		xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
 
-		bytes = state->blocksize - (state->blocksize >> 2) -
+		bytes = state->args->geo->blksize -
+			(state->args->geo->blksize >> 2) -
 			ichdr.usedbytes - ichdr2.usedbytes -
 			((ichdr.count + ichdr2.count) *
 					sizeof(xfs_attr_leaf_entry_t)) -
@@ -1805,7 +1796,6 @@
 	struct xfs_attr_leafblock *leaf;
 	struct xfs_attr3_icleaf_hdr ichdr;
 	struct xfs_attr_leaf_entry *entry;
-	struct xfs_mount	*mp = args->trans->t_mountp;
 	int			before;
 	int			after;
 	int			smallest;
@@ -1819,7 +1809,7 @@
 	leaf = bp->b_addr;
 	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
 
-	ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8);
+	ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
 	ASSERT(args->index >= 0 && args->index < ichdr.count);
 	ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
 					xfs_attr3_leaf_hdr_size(leaf));
@@ -1827,7 +1817,7 @@
 	entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
 
 	ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
-	ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+	ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
 
 	/*
 	 * Scan through free region table:
@@ -1842,8 +1832,8 @@
 	smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
 	entsize = xfs_attr_leaf_entsize(leaf, args->index);
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-		ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp));
-		ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp));
+		ASSERT(ichdr.freemap[i].base < args->geo->blksize);
+		ASSERT(ichdr.freemap[i].size < args->geo->blksize);
 		if (ichdr.freemap[i].base == tablesize) {
 			ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
 			ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
@@ -1920,11 +1910,11 @@
 	 * removing the name.
 	 */
 	if (smallest) {
-		tmp = XFS_LBSIZE(mp);
+		tmp = args->geo->blksize;
 		entry = xfs_attr3_leaf_entryp(leaf);
 		for (i = ichdr.count - 1; i >= 0; entry++, i--) {
 			ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
-			ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+			ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
 
 			if (be16_to_cpu(entry->nameidx) < tmp)
 				tmp = be16_to_cpu(entry->nameidx);
@@ -1947,7 +1937,7 @@
 	tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
 	      ichdr.count * sizeof(xfs_attr_leaf_entry_t);
 
-	return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */
+	return tmp < args->geo->magicpct; /* leaf is < 37% full */
 }
 
 /*
@@ -1964,7 +1954,6 @@
 	struct xfs_attr3_icleaf_hdr drophdr;
 	struct xfs_attr3_icleaf_hdr savehdr;
 	struct xfs_attr_leaf_entry *entry;
-	struct xfs_mount	*mp = state->mp;
 
 	trace_xfs_attr_leaf_unbalance(state->args);
 
@@ -1991,13 +1980,15 @@
 		 */
 		if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
 					 drop_blk->bp, &drophdr)) {
-			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+			xfs_attr3_leaf_moveents(state->args,
+						drop_leaf, &drophdr, 0,
 						save_leaf, &savehdr, 0,
-						drophdr.count, mp);
+						drophdr.count);
 		} else {
-			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+			xfs_attr3_leaf_moveents(state->args,
+						drop_leaf, &drophdr, 0,
 						save_leaf, &savehdr,
-						savehdr.count, drophdr.count, mp);
+						savehdr.count, drophdr.count);
 		}
 	} else {
 		/*
@@ -2007,7 +1998,7 @@
 		struct xfs_attr_leafblock *tmp_leaf;
 		struct xfs_attr3_icleaf_hdr tmphdr;
 
-		tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
+		tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);
 
 		/*
 		 * Copy the header into the temp leaf so that all the stuff
@@ -2020,35 +2011,39 @@
 		tmphdr.magic = savehdr.magic;
 		tmphdr.forw = savehdr.forw;
 		tmphdr.back = savehdr.back;
-		tmphdr.firstused = state->blocksize;
+		tmphdr.firstused = state->args->geo->blksize;
 
 		/* write the header to the temp buffer to initialise it */
 		xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
 
 		if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
 					 drop_blk->bp, &drophdr)) {
-			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+			xfs_attr3_leaf_moveents(state->args,
+						drop_leaf, &drophdr, 0,
 						tmp_leaf, &tmphdr, 0,
-						drophdr.count, mp);
-			xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
+						drophdr.count);
+			xfs_attr3_leaf_moveents(state->args,
+						save_leaf, &savehdr, 0,
 						tmp_leaf, &tmphdr, tmphdr.count,
-						savehdr.count, mp);
+						savehdr.count);
 		} else {
-			xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
+			xfs_attr3_leaf_moveents(state->args,
+						save_leaf, &savehdr, 0,
 						tmp_leaf, &tmphdr, 0,
-						savehdr.count, mp);
-			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+						savehdr.count);
+			xfs_attr3_leaf_moveents(state->args,
+						drop_leaf, &drophdr, 0,
 						tmp_leaf, &tmphdr, tmphdr.count,
-						drophdr.count, mp);
+						drophdr.count);
 		}
-		memcpy(save_leaf, tmp_leaf, state->blocksize);
+		memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
 		savehdr = tmphdr; /* struct copy */
 		kmem_free(tmp_leaf);
 	}
 
 	xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
 	xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
-					   state->blocksize - 1);
+					   state->args->geo->blksize - 1);
 
 	/*
 	 * Copy out last hashval in each block for B-tree code.
@@ -2094,7 +2089,7 @@
 	leaf = bp->b_addr;
 	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
 	entries = xfs_attr3_leaf_entryp(leaf);
-	ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
+	ASSERT(ichdr.count < args->geo->blksize / 8);
 
 	/*
 	 * Binary search.  (note: small blocks will skip this loop)
@@ -2198,7 +2193,7 @@
 
 	leaf = bp->b_addr;
 	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-	ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
+	ASSERT(ichdr.count < args->geo->blksize / 8);
 	ASSERT(args->index < ichdr.count);
 
 	entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2249,14 +2244,14 @@
 /*ARGSUSED*/
 STATIC void
 xfs_attr3_leaf_moveents(
+	struct xfs_da_args		*args,
 	struct xfs_attr_leafblock	*leaf_s,
 	struct xfs_attr3_icleaf_hdr	*ichdr_s,
 	int				start_s,
 	struct xfs_attr_leafblock	*leaf_d,
 	struct xfs_attr3_icleaf_hdr	*ichdr_d,
 	int				start_d,
-	int				count,
-	struct xfs_mount		*mp)
+	int				count)
 {
 	struct xfs_attr_leaf_entry	*entry_s;
 	struct xfs_attr_leaf_entry	*entry_d;
@@ -2276,10 +2271,10 @@
 	ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
 	       ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
 	ASSERT(ichdr_s->magic == ichdr_d->magic);
-	ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8);
+	ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
 	ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
 					+ xfs_attr3_leaf_hdr_size(leaf_s));
-	ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8);
+	ASSERT(ichdr_d->count < args->geo->blksize / 8);
 	ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
 					+ xfs_attr3_leaf_hdr_size(leaf_d));
 
@@ -2331,11 +2326,11 @@
 			entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
 			entry_d->flags = entry_s->flags;
 			ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
-							<= XFS_LBSIZE(mp));
+							<= args->geo->blksize);
 			memmove(xfs_attr3_leaf_name(leaf_d, desti),
 				xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
 			ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
-							<= XFS_LBSIZE(mp));
+							<= args->geo->blksize);
 			memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
 			ichdr_s->usedbytes -= tmp;
 			ichdr_d->usedbytes += tmp;
@@ -2356,7 +2351,7 @@
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
 		ASSERT(((char *)entry_s + tmp) <=
-		       ((char *)leaf_s + XFS_LBSIZE(mp)));
+		       ((char *)leaf_s + args->geo->blksize));
 		memset(entry_s, 0, tmp);
 	} else {
 		/*
@@ -2371,7 +2366,7 @@
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
 		ASSERT(((char *)entry_s + tmp) <=
-		       ((char *)leaf_s + XFS_LBSIZE(mp)));
+		       ((char *)leaf_s + args->geo->blksize));
 		memset(entry_s, 0, tmp);
 	}
 
@@ -2439,22 +2434,21 @@
  * a "local" or a "remote" attribute.
  */
 int
-xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
+xfs_attr_leaf_newentsize(
+	struct xfs_da_args	*args,
+	int			*local)
 {
-	int size;
+	int			size;
 
-	size = xfs_attr_leaf_entsize_local(namelen, valuelen);
-	if (size < xfs_attr_leaf_entsize_local_max(blocksize)) {
-		if (local) {
+	size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen);
+	if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) {
+		if (local)
 			*local = 1;
-		}
-	} else {
-		size = xfs_attr_leaf_entsize_remote(namelen);
-		if (local) {
-			*local = 0;
-		}
+		return size;
 	}
-	return size;
+	if (local)
+		*local = 0;
+	return xfs_attr_leaf_entsize_remote(args->namelen);
 }
 
 

diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 3ec5ec0..e2929da 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h

@@ -96,8 +96,7 @@
 xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
 int	xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
 				   struct xfs_buf *leaf2_bp);
-int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
-					int *local);
+int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
 int	xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
 			xfs_dablk_t bno, xfs_daddr_t mappedbno,
 			struct xfs_buf **bpp);

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 833fe5d..90e2eeb 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c

@@ -444,6 +444,7 @@
 				xfs_da_args_t args;
 
 				memset((char *)&args, 0, sizeof(args));
+				args.geo = context->dp->i_mount->m_attr_geo;
 				args.dp = context->dp;
 				args.whichfork = XFS_ATTR_FORK;
 				args.valuelen = valuelen;

diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index d2e6e94..b5adfec 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c

@@ -68,7 +68,6 @@
  */
 static bool
 xfs_attr3_rmt_hdr_ok(
-	struct xfs_mount	*mp,
 	void			*ptr,
 	xfs_ino_t		ino,
 	uint32_t		offset,
@@ -126,6 +125,7 @@
 	char		*ptr;
 	int		len;
 	xfs_daddr_t	bno;
+	int		blksize = mp->m_attr_geo->blksize;
 
 	/* no verification of non-crc buffers */
 	if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -134,21 +134,20 @@
 	ptr = bp->b_addr;
 	bno = bp->b_bn;
 	len = BBTOB(bp->b_length);
-	ASSERT(len >= XFS_LBSIZE(mp));
+	ASSERT(len >= blksize);
 
 	while (len > 0) {
-		if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
-				      XFS_ATTR3_RMT_CRC_OFF)) {
+		if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
 			xfs_buf_ioerror(bp, EFSBADCRC);
 			break;
 		}
-		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
 			xfs_buf_ioerror(bp, EFSCORRUPTED);
 			break;
 		}
-		len -= XFS_LBSIZE(mp);
-		ptr += XFS_LBSIZE(mp);
-		bno += mp->m_bsize;
+		len -= blksize;
+		ptr += blksize;
+		bno += BTOBB(blksize);
 	}
 
 	if (bp->b_error)
@@ -166,6 +165,7 @@
 	char		*ptr;
 	int		len;
 	xfs_daddr_t	bno;
+	int		blksize = mp->m_attr_geo->blksize;
 
 	/* no verification of non-crc buffers */
 	if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -174,10 +174,10 @@
 	ptr = bp->b_addr;
 	bno = bp->b_bn;
 	len = BBTOB(bp->b_length);
-	ASSERT(len >= XFS_LBSIZE(mp));
+	ASSERT(len >= blksize);
 
 	while (len > 0) {
-		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
 			xfs_buf_ioerror(bp, EFSCORRUPTED);
 			xfs_verifier_error(bp);
 			return;
@@ -188,11 +188,11 @@
 			rmt = (struct xfs_attr3_rmt_hdr *)ptr;
 			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 		}
-		xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
+		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
 
-		len -= XFS_LBSIZE(mp);
-		ptr += XFS_LBSIZE(mp);
-		bno += mp->m_bsize;
+		len -= blksize;
+		ptr += blksize;
+		bno += BTOBB(blksize);
 	}
 	ASSERT(len == 0);
 }
@@ -241,17 +241,18 @@
 	char		*src = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
 	int		len = BBTOB(bp->b_length);
+	int		blksize = mp->m_attr_geo->blksize;
 
-	ASSERT(len >= XFS_LBSIZE(mp));
+	ASSERT(len >= blksize);
 
 	while (len > 0 && *valuelen > 0) {
 		int hdr_size = 0;
-		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
 
 		byte_cnt = min(*valuelen, byte_cnt);
 
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+			if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
 						  byte_cnt, bno)) {
 				xfs_alert(mp,
 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
@@ -264,9 +265,9 @@
 		memcpy(*dst, src + hdr_size, byte_cnt);
 
 		/* roll buffer forwards */
-		len -= XFS_LBSIZE(mp);
-		src += XFS_LBSIZE(mp);
-		bno += mp->m_bsize;
+		len -= blksize;
+		src += blksize;
+		bno += BTOBB(blksize);
 
 		/* roll attribute data forwards */
 		*valuelen -= byte_cnt;
@@ -288,12 +289,13 @@
 	char		*dst = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
 	int		len = BBTOB(bp->b_length);
+	int		blksize = mp->m_attr_geo->blksize;
 
-	ASSERT(len >= XFS_LBSIZE(mp));
+	ASSERT(len >= blksize);
 
 	while (len > 0 && *valuelen > 0) {
 		int hdr_size;
-		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
 
 		byte_cnt = min(*valuelen, byte_cnt);
 		hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
@@ -305,17 +307,17 @@
 		 * If this is the last block, zero the remainder of it.
 		 * Check that we are actually the last block, too.
 		 */
-		if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
+		if (byte_cnt + hdr_size < blksize) {
 			ASSERT(*valuelen - byte_cnt == 0);
-			ASSERT(len == XFS_LBSIZE(mp));
+			ASSERT(len == blksize);
 			memset(dst + hdr_size + byte_cnt, 0,
-					XFS_LBSIZE(mp) - hdr_size - byte_cnt);
+					blksize - hdr_size - byte_cnt);
 		}
 
 		/* roll buffer forwards */
-		len -= XFS_LBSIZE(mp);
-		dst += XFS_LBSIZE(mp);
-		bno += mp->m_bsize;
+		len -= blksize;
+		dst += blksize;
+		bno += BTOBB(blksize);
 
 		/* roll attribute data forwards */
 		*valuelen -= byte_cnt;

diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index f1e3c90..e1649c0 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h

@@ -66,8 +66,11 @@
 		n = ffs(w);
 	} else {	/* upper bits */
 		w = (__uint32_t)(v >> 32);
-		if (w && (n = ffs(w)))
-		n += 32;
+		if (w) {
+			n = ffs(w);
+			if (n)
+				n += 32;
+		}
 	}
 	return n - 1;
 }

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index f0efc7e..96175df 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c

@@ -94,7 +94,7 @@
 		maxleafents = MAXAEXTNUM;
 		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
 	}
-	maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
+	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
 	minleafrecs = mp->m_bmap_dmnr[0];
 	minnoderecs = mp->m_bmap_dmnr[1];
 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -233,7 +233,6 @@
  */
 STATIC void
 xfs_bmap_forkoff_reset(
-	xfs_mount_t	*mp,
 	xfs_inode_t	*ip,
 	int		whichfork)
 {
@@ -905,7 +904,7 @@
 	ASSERT(ifp->if_bytes == 0);
 	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
 
-	xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
+	xfs_bmap_forkoff_reset(ip, whichfork);
 	ifp->if_flags &= ~XFS_IFINLINE;
 	ifp->if_flags |= XFS_IFEXTENTS;
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
@@ -1099,10 +1098,11 @@
 
 	if (S_ISDIR(ip->i_d.di_mode)) {
 		memset(&dargs, 0, sizeof(dargs));
+		dargs.geo = ip->i_mount->m_dir_geo;
 		dargs.dp = ip;
 		dargs.firstblock = firstblock;
 		dargs.flist = flist;
-		dargs.total = ip->i_mount->m_dirblkfsbs;
+		dargs.total = dargs.geo->fsbcount;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
 		return xfs_dir2_sf_to_block(&dargs);
@@ -1675,7 +1675,6 @@
  */
 int
 xfs_bmap_last_offset(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	xfs_fileoff_t		*last_block,
 	int			whichfork)
@@ -3517,6 +3516,67 @@
 #undef ISVALID
 }
 
+static int
+xfs_bmap_longest_free_extent(
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		ag,
+	xfs_extlen_t		*blen,
+	int			*notinit)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	xfs_extlen_t		longest;
+	int			error = 0;
+
+	pag = xfs_perag_get(mp, ag);
+	if (!pag->pagf_init) {
+		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
+		if (error)
+			goto out;
+
+		if (!pag->pagf_init) {
+			*notinit = 1;
+			goto out;
+		}
+	}
+
+	longest = xfs_alloc_longest_free_extent(mp, pag);
+	if (*blen < longest)
+		*blen = longest;
+
+out:
+	xfs_perag_put(pag);
+	return error;
+}
+
+static void
+xfs_bmap_select_minlen(
+	struct xfs_bmalloca	*ap,
+	struct xfs_alloc_arg	*args,
+	xfs_extlen_t		*blen,
+	int			notinit)
+{
+	if (notinit || *blen < ap->minlen) {
+		/*
+		 * Since we did a BUF_TRYLOCK above, it is possible that
+		 * there is space for this request.
+		 */
+		args->minlen = ap->minlen;
+	} else if (*blen < args->maxlen) {
+		/*
+		 * If the best seen length is less than the request length,
+		 * use the best as the minimum.
+		 */
+		args->minlen = *blen;
+	} else {
+		/*
+		 * Otherwise we've seen an extent as big as maxlen, use that
+		 * as the minimum.
+		 */
+		args->minlen = args->maxlen;
+	}
+}
+
 STATIC int
 xfs_bmap_btalloc_nullfb(
 	struct xfs_bmalloca	*ap,
@@ -3524,111 +3584,74 @@
 	xfs_extlen_t		*blen)
 {
 	struct xfs_mount	*mp = ap->ip->i_mount;
-	struct xfs_perag	*pag;
 	xfs_agnumber_t		ag, startag;
 	int			notinit = 0;
 	int			error;
 
-	if (ap->userdata && xfs_inode_is_filestream(ap->ip))
-		args->type = XFS_ALLOCTYPE_NEAR_BNO;
-	else
-		args->type = XFS_ALLOCTYPE_START_BNO;
+	args->type = XFS_ALLOCTYPE_START_BNO;
 	args->total = ap->total;
 
-	/*
-	 * Search for an allocation group with a single extent large enough
-	 * for the request.  If one isn't found, then adjust the minimum
-	 * allocation size to the largest space found.
-	 */
 	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
 	if (startag == NULLAGNUMBER)
 		startag = ag = 0;
 
-	pag = xfs_perag_get(mp, ag);
 	while (*blen < args->maxlen) {
-		if (!pag->pagf_init) {
-			error = xfs_alloc_pagf_init(mp, args->tp, ag,
-						    XFS_ALLOC_FLAG_TRYLOCK);
-			if (error) {
-				xfs_perag_put(pag);
-				return error;
-			}
-		}
+		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+						     &notinit);
+		if (error)
+			return error;
 
-		/*
-		 * See xfs_alloc_fix_freelist...
-		 */
-		if (pag->pagf_init) {
-			xfs_extlen_t	longest;
-			longest = xfs_alloc_longest_free_extent(mp, pag);
-			if (*blen < longest)
-				*blen = longest;
-		} else
-			notinit = 1;
-
-		if (xfs_inode_is_filestream(ap->ip)) {
-			if (*blen >= args->maxlen)
-				break;
-
-			if (ap->userdata) {
-				/*
-				 * If startag is an invalid AG, we've
-				 * come here once before and
-				 * xfs_filestream_new_ag picked the
-				 * best currently available.
-				 *
-				 * Don't continue looping, since we
-				 * could loop forever.
-				 */
-				if (startag == NULLAGNUMBER)
-					break;
-
-				error = xfs_filestream_new_ag(ap, &ag);
-				xfs_perag_put(pag);
-				if (error)
-					return error;
-
-				/* loop again to set 'blen'*/
-				startag = NULLAGNUMBER;
-				pag = xfs_perag_get(mp, ag);
-				continue;
-			}
-		}
 		if (++ag == mp->m_sb.sb_agcount)
 			ag = 0;
 		if (ag == startag)
 			break;
-		xfs_perag_put(pag);
-		pag = xfs_perag_get(mp, ag);
 	}
-	xfs_perag_put(pag);
+
+	xfs_bmap_select_minlen(ap, args, blen, notinit);
+	return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc_filestreams(
+	struct xfs_bmalloca	*ap,
+	struct xfs_alloc_arg	*args,
+	xfs_extlen_t		*blen)
+{
+	struct xfs_mount	*mp = ap->ip->i_mount;
+	xfs_agnumber_t		ag;
+	int			notinit = 0;
+	int			error;
+
+	args->type = XFS_ALLOCTYPE_NEAR_BNO;
+	args->total = ap->total;
+
+	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
+	if (ag == NULLAGNUMBER)
+		ag = 0;
+
+	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
+	if (error)
+		return error;
+
+	if (*blen < args->maxlen) {
+		error = xfs_filestream_new_ag(ap, &ag);
+		if (error)
+			return error;
+
+		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+						     &notinit);
+		if (error)
+			return error;
+
+	}
+
+	xfs_bmap_select_minlen(ap, args, blen, notinit);
 
 	/*
-	 * Since the above loop did a BUF_TRYLOCK, it is
-	 * possible that there is space for this request.
+	 * Set the failure fallback case to look in the selected AG as stream
+	 * may have moved.
 	 */
-	if (notinit || *blen < ap->minlen)
-		args->minlen = ap->minlen;
-	/*
-	 * If the best seen length is less than the request
-	 * length, use the best as the minimum.
-	 */
-	else if (*blen < args->maxlen)
-		args->minlen = *blen;
-	/*
-	 * Otherwise we've seen an extent as big as maxlen,
-	 * use that as the minimum.
-	 */
-	else
-		args->minlen = args->maxlen;
-
-	/*
-	 * set the failure fallback case to look in the selected
-	 * AG as the stream may have moved.
-	 */
-	if (xfs_inode_is_filestream(ap->ip))
-		ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
-
+	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
 	return 0;
 }
 
@@ -3708,7 +3731,15 @@
 	args.firstblock = *ap->firstblock;
 	blen = 0;
 	if (nullfb) {
-		error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
+		/*
+		 * Search for an allocation group with a single extent large
+		 * enough for the request.  If one isn't found, then adjust
+		 * the minimum allocation size to the largest space found.
+		 */
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
+		else
+			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
 		if (error)
 			return error;
 	} else if (ap->flist->xbf_low) {

diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f84bd7a..38ba36e 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h

@@ -156,8 +156,8 @@
 		xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
 int	xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *last_block, int whichfork);
-int	xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
-		xfs_fileoff_t *unused, int whichfork);
+int	xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
+		int whichfork);
 int	xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
 int	xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		int whichfork);

diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 818d546..948836c 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c

@@ -84,7 +84,7 @@
 	rblock->bb_level = dblock->bb_level;
 	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
 	rblock->bb_numrecs = dblock->bb_numrecs;
-	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
 	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
 	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
 	fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
@@ -443,7 +443,7 @@
 	ASSERT(rblock->bb_level != 0);
 	dblock->bb_level = rblock->bb_level;
 	dblock->bb_numrecs = rblock->bb_numrecs;
-	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
 	fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
 	tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
 	fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
@@ -519,7 +519,6 @@
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
@@ -672,8 +671,7 @@
 {
 	if (level != cur->bc_nlevels - 1)
 		return cur->bc_mp->m_bmap_dmxr[level != 0];
-	return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
-				level == 0);
+	return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
 }
 
 STATIC void
@@ -914,7 +912,6 @@
  */
 int
 xfs_bmdr_maxrecs(
-	struct xfs_mount	*mp,
 	int			blocklen,
 	int			leaf)
 {

diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6e42e1e..819a8a4 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h

@@ -130,7 +130,7 @@
 			xfs_bmdr_block_t *, int);
 
 extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
-extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
 extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
 
 extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 296160b..703b3ec 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -258,14 +258,23 @@
 	struct xfs_bmalloca	*args = container_of(work,
 						struct xfs_bmalloca, work);
 	unsigned long		pflags;
+	unsigned long		new_pflags = PF_FSTRANS;
 
-	/* we are in a transaction context here */
-	current_set_flags_nested(&pflags, PF_FSTRANS);
+	/*
+	 * we are in a transaction context here, but may also be doing work
+	 * in kswapd context, and hence we may need to inherit that state
+	 * temporarily to ensure that we don't block waiting for memory reclaim
+	 * in any way.
+	 */
+	if (args->kswapd)
+		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+	current_set_flags_nested(&pflags, new_pflags);
 
 	args->result = __xfs_bmapi_allocate(args);
 	complete(args->done);
 
-	current_restore_flags_nested(&pflags, PF_FSTRANS);
+	current_restore_flags_nested(&pflags, new_pflags);
 }
 
 /*
@@ -284,6 +293,7 @@
 
 
 	args->done = &done;
+	args->kswapd = current_is_kswapd();
 	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
 	queue_work(xfs_alloc_wq, &args->work);
 	wait_for_completion(&done);
@@ -1519,7 +1529,6 @@
 
 	while (!error && !done) {
 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-		tp->t_flags |= XFS_TRANS_RESERVE;
 		/*
 		 * We would need to reserve permanent block for transaction.
 		 * This will come into picture when after shifting extent into
@@ -1529,7 +1538,6 @@
 		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
 				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
 		if (error) {
-			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
 			xfs_trans_cancel(tp, 0);
 			break;
 		}

diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 935ed2b..075f722 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h

@@ -50,12 +50,13 @@
 	xfs_extlen_t		total;	/* total blocks needed for xaction */
 	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
 	xfs_extlen_t		minleft; /* amount must be left after alloc */
-	char			eof;	/* set if allocating past last extent */
-	char			wasdel;	/* replacing a delayed allocation */
-	char			userdata;/* set if is user data */
-	char			aeof;	/* allocated space at eof */
-	char			conv;	/* overwriting unwritten extents */
-	char			stack_switch;
+	bool			eof;	/* set if allocating past last extent */
+	bool			wasdel;	/* replacing a delayed allocation */
+	bool			userdata;/* set if is user data */
+	bool			aeof;	/* allocated space at eof */
+	bool			conv;	/* overwriting unwritten extents */
+	bool			stack_switch;
+	bool			kswapd;	/* allocation in kswapd context */
 	int			flags;
 	struct completion	*done;
 	struct work_struct	work;

diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e80d59f..bf810c6 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c

@@ -43,9 +43,10 @@
  * Btree magic numbers.
  */
 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
-	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
+	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+	  XFS_FIBT_MAGIC },
 	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
-	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
+	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
 };
 #define xfs_btree_magic(cur) \
 	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -552,14 +553,11 @@
 	xfs_fsblock_t	fsbno,		/* file system block number */
 	uint		lock)		/* lock flags for get_buf */
 {
-	xfs_buf_t	*bp;		/* buffer pointer (return value) */
 	xfs_daddr_t		d;		/* real disk block address */
 
 	ASSERT(fsbno != NULLFSBLOCK);
 	d = XFS_FSB_TO_DADDR(mp, fsbno);
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-	ASSERT(!xfs_buf_geterror(bp));
-	return bp;
+	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 }
 
 /*
@@ -574,15 +572,12 @@
 	xfs_agblock_t	agbno,		/* allocation group block number */
 	uint		lock)		/* lock flags for get_buf */
 {
-	xfs_buf_t	*bp;		/* buffer pointer (return value) */
 	xfs_daddr_t		d;		/* real disk block address */
 
 	ASSERT(agno != NULLAGNUMBER);
 	ASSERT(agbno != NULLAGBLOCK);
 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-	ASSERT(!xfs_buf_geterror(bp));
-	return bp;
+	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 }
 
 /*
@@ -722,7 +717,6 @@
 				   mp->m_bsize, lock, &bp, ops);
 	if (error)
 		return error;
-	ASSERT(!xfs_buf_geterror(bp));
 	if (bp)
 		xfs_buf_set_ref(bp, refval);
 	*bpp = bp;
@@ -1115,6 +1109,7 @@
 		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
 		break;
 	case XFS_BTNUM_INO:
+	case XFS_BTNUM_FINO:
 		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
 		break;
 	case XFS_BTNUM_BMAP:
@@ -1159,7 +1154,6 @@
 xfs_btree_read_buf_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*ptr,
-	int			level,
 	int			flags,
 	struct xfs_btree_block	**block,
 	struct xfs_buf		**bpp)
@@ -1178,7 +1172,6 @@
 	if (error)
 		return error;
 
-	ASSERT(!xfs_buf_geterror(*bpp));
 	xfs_btree_set_refs(cur, *bpp);
 	*block = XFS_BUF_TO_BLOCK(*bpp);
 	return 0;
@@ -1517,8 +1510,8 @@
 		union xfs_btree_ptr	*ptrp;
 
 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
-							0, &block, &bp);
+		--lev;
+		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
 		if (error)
 			goto error0;
 
@@ -1616,8 +1609,8 @@
 		union xfs_btree_ptr	*ptrp;
 
 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
-							0, &block, &bp);
+		--lev;
+		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
 		if (error)
 			goto error0;
 		xfs_btree_setbuf(cur, lev, bp);
@@ -1667,7 +1660,7 @@
 		return 0;
 	}
 
-	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
+	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
 	if (error)
 		return error;
 
@@ -2018,7 +2011,7 @@
 		goto out0;
 
 	/* Set up the left neighbor as "left". */
-	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
+	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 	if (error)
 		goto error0;
 
@@ -2202,7 +2195,7 @@
 		goto out0;
 
 	/* Set up the right neighbor as "right". */
-	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
+	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 	if (error)
 		goto error0;
 
@@ -2372,7 +2365,7 @@
 	xfs_btree_buf_to_ptr(cur, lbp, &lptr);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0)
@@ -2470,7 +2463,7 @@
 	 * point back to right instead of to left.
 	 */
 	if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
-		error = xfs_btree_read_buf_block(cur, &rrptr, level,
+		error = xfs_btree_read_buf_block(cur, &rrptr,
 							0, &rrblock, &rrbp);
 		if (error)
 			goto error0;
@@ -2545,7 +2538,7 @@
 	pp = xfs_btree_ptr_addr(cur, 1, block);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0) {
@@ -2649,7 +2642,7 @@
 	cur->bc_ops->init_ptr_from_cur(cur, &rptr);
 
 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
-	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
+	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
 	if (error)
 		goto error0;
 	if (*stat == 0)
@@ -2684,8 +2677,7 @@
 		lbp = bp;
 		xfs_btree_buf_to_ptr(cur, lbp, &lptr);
 		left = block;
-		error = xfs_btree_read_buf_block(cur, &rptr,
-					cur->bc_nlevels - 1, 0, &right, &rbp);
+		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 		if (error)
 			goto error0;
 		bp = rbp;
@@ -2696,8 +2688,7 @@
 		xfs_btree_buf_to_ptr(cur, rbp, &rptr);
 		right = block;
 		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
-		error = xfs_btree_read_buf_block(cur, &lptr,
-					cur->bc_nlevels - 1, 0, &left, &lbp);
+		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 		if (error)
 			goto error0;
 		bp = lbp;
@@ -3649,8 +3640,7 @@
 		rptr = cptr;
 		right = block;
 		rbp = bp;
-		error = xfs_btree_read_buf_block(cur, &lptr, level,
-							0, &left, &lbp);
+		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
 		if (error)
 			goto error0;
 
@@ -3667,8 +3657,7 @@
 		lptr = cptr;
 		left = block;
 		lbp = bp;
-		error = xfs_btree_read_buf_block(cur, &rptr, level,
-							0, &right, &rbp);
+		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
 		if (error)
 			goto error0;
 
@@ -3740,8 +3729,7 @@
 	/* If there is a right sibling, point it to the remaining block. */
 	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
 	if (!xfs_btree_ptr_is_null(cur, &cptr)) {
-		error = xfs_btree_read_buf_block(cur, &cptr, level,
-							0, &rrblock, &rrbp);
+		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
 		if (error)
 			goto error0;
 		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);

diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 91e34f2..a04b694 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h

@@ -62,6 +62,7 @@
 #define	XFS_BTNUM_CNT	((xfs_btnum_t)XFS_BTNUM_CNTi)
 #define	XFS_BTNUM_BMAP	((xfs_btnum_t)XFS_BTNUM_BMAPi)
 #define	XFS_BTNUM_INO	((xfs_btnum_t)XFS_BTNUM_INOi)
+#define	XFS_BTNUM_FINO	((xfs_btnum_t)XFS_BTNUM_FINOi)
 
 /*
  * For logging record fields.
@@ -92,6 +93,7 @@
 	case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break;	\
 	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break;	\
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break;	\
+	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break;	\
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
 	}       \
 } while (0)
@@ -105,6 +107,7 @@
 	case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
 	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
+	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
 	}       \
 } while (0)
@@ -129,7 +132,7 @@
 	int	(*alloc_block)(struct xfs_btree_cur *cur,
 			       union xfs_btree_ptr *start_bno,
 			       union xfs_btree_ptr *new_bno,
-			       int length, int *stat);
+			       int *stat);
 	int	(*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
 
 	/* update last record information */

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cb10a0a..7a34a1a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c

@@ -216,8 +216,7 @@
 STATIC int
 _xfs_buf_get_pages(
 	xfs_buf_t		*bp,
-	int			page_count,
-	xfs_buf_flags_t		flags)
+	int			page_count)
 {
 	/* Make sure that we have a page list */
 	if (bp->b_pages == NULL) {
@@ -330,7 +329,7 @@
 	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
 								>> PAGE_SHIFT;
 	page_count = end - start;
-	error = _xfs_buf_get_pages(bp, page_count, flags);
+	error = _xfs_buf_get_pages(bp, page_count);
 	if (unlikely(error))
 		return error;
 
@@ -778,7 +777,7 @@
 	bp->b_pages = NULL;
 	bp->b_addr = mem;
 
-	rval = _xfs_buf_get_pages(bp, page_count, 0);
+	rval = _xfs_buf_get_pages(bp, page_count);
 	if (rval)
 		return rval;
 
@@ -811,7 +810,7 @@
 		goto fail;
 
 	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
-	error = _xfs_buf_get_pages(bp, page_count, 0);
+	error = _xfs_buf_get_pages(bp, page_count);
 	if (error)
 		goto fail_free_buf;
 
@@ -1615,7 +1614,6 @@
 int
 xfs_setsize_buftarg(
 	xfs_buftarg_t		*btp,
-	unsigned int		blocksize,
 	unsigned int		sectorsize)
 {
 	/* Set up metadata sector size info */
@@ -1650,16 +1648,13 @@
 	xfs_buftarg_t		*btp,
 	struct block_device	*bdev)
 {
-	return xfs_setsize_buftarg(btp, PAGE_SIZE,
-				   bdev_logical_block_size(bdev));
+	return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
 }
 
 xfs_buftarg_t *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
-	struct block_device	*bdev,
-	int			external,
-	const char		*fsname)
+	struct block_device	*bdev)
 {
 	xfs_buftarg_t		*btp;
 

diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b8a3abf..3a7a552 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h

@@ -298,11 +298,6 @@
 
 extern int xfs_bioerror_relse(struct xfs_buf *);
 
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
-	return bp ? bp->b_error : ENOMEM;
-}
-
 /* Buffer Utility Routines */
 extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
 
@@ -387,10 +382,10 @@
  *	Handling of buftargs.
  */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-			struct block_device *, int, const char *);
+			struct block_device *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
 
 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8752821..4654338 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c

@@ -812,7 +812,6 @@
  */
 static void
 xfs_buf_item_log_segment(
-	struct xfs_buf_log_item	*bip,
 	uint			first,
 	uint			last,
 	uint			*map)
@@ -920,7 +919,7 @@
 		if (end > last)
 			end = last;
 
-		xfs_buf_item_log_segment(bip, first, end,
+		xfs_buf_item_log_segment(first, end,
 					 &bip->bli_formats[i].blf_data_map[0]);
 
 		start += bp->b_maps[i].bm_len;
@@ -1053,7 +1052,7 @@
 	static ulong		lasttime;
 	static xfs_buftarg_t	*lasttarg;
 
-	if (likely(!xfs_buf_geterror(bp)))
+	if (likely(!bp->b_error))
 		goto do_callbacks;
 
 	/*

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6cc5f67..a514ab6 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c

@@ -167,8 +167,8 @@
 	 * we don't know if the node is for and attribute or directory tree,
 	 * so only fail if the count is outside both bounds
 	 */
-	if (ichdr.count > mp->m_dir_node_ents &&
-	    ichdr.count > mp->m_attr_node_ents)
+	if (ichdr.count > mp->m_dir_geo->node_ents &&
+	    ichdr.count > mp->m_attr_geo->node_ents)
 		return false;
 
 	/* XXX: hash order check? */
@@ -598,7 +598,7 @@
 	 * Set up the new root node.
 	 */
 	error = xfs_da3_node_create(args,
-		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
+		(args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
 		level + 1, &bp, args->whichfork);
 	if (error)
 		return error;
@@ -616,10 +616,10 @@
 #ifdef DEBUG
 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
 	    oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
-		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
-		       blk1->blkno < mp->m_dirfreeblk);
-		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
-		       blk2->blkno < mp->m_dirfreeblk);
+		ASSERT(blk1->blkno >= args->geo->leafblk &&
+		       blk1->blkno < args->geo->freeblk);
+		ASSERT(blk2->blkno >= args->geo->leafblk &&
+		       blk2->blkno < args->geo->freeblk);
 	}
 #endif
 
@@ -663,7 +663,7 @@
 	/*
 	 * Do we have to split the node?
 	 */
-	if (nodehdr.count + newcount > state->node_ents) {
+	if (nodehdr.count + newcount > state->args->geo->node_ents) {
 		/*
 		 * Allocate a new node, add to the doubly linked chain of
 		 * nodes, then move some of our excess entries into it.
@@ -894,8 +894,8 @@
 	ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK)
-		ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
-		       newblk->blkno < state->mp->m_dirfreeblk);
+		ASSERT(newblk->blkno >= state->args->geo->leafblk &&
+		       newblk->blkno < state->args->geo->freeblk);
 
 	/*
 	 * We may need to make some room before we insert the new node.
@@ -1089,14 +1089,15 @@
 	 * that could occur. For dir3 blocks we also need to update the block
 	 * number in the buffer header.
 	 */
-	memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
+	memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
 	root_blk->bp->b_ops = bp->b_ops;
 	xfs_trans_buf_copy_type(root_blk->bp, bp);
 	if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
 		struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
 		da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
 	}
-	xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+	xfs_trans_log_buf(args->trans, root_blk->bp, 0,
+			  args->geo->blksize - 1);
 	error = xfs_da_shrink_inode(args, child, bp);
 	return(error);
 }
@@ -1139,7 +1140,7 @@
 	info = blk->bp->b_addr;
 	node = (xfs_da_intnode_t *)info;
 	dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-	if (nodehdr.count > (state->node_ents >> 1)) {
+	if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);	/* blk over 50%, don't try to join */
 	}
@@ -1176,8 +1177,8 @@
 	 * We prefer coalescing with the lower numbered sibling so as
 	 * to shrink a directory over time.
 	 */
-	count  = state->node_ents;
-	count -= state->node_ents >> 2;
+	count  = state->args->geo->node_ents;
+	count -= state->args->geo->node_ents >> 2;
 	count -= nodehdr.count;
 
 	/* start with smaller blk num */
@@ -1472,7 +1473,7 @@
 	 * Descend thru the B-tree searching each level for the right
 	 * node to use, until the right hashval is found.
 	 */
-	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
+	blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
 	for (blk = &state->path.blk[0], state->path.active = 1;
 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
 			 blk++, state->path.active++) {
@@ -2090,20 +2091,12 @@
 	xfs_dablk_t		*new_blkno)
 {
 	xfs_fileoff_t		bno;
-	int			count;
 	int			error;
 
 	trace_xfs_da_grow_inode(args);
 
-	if (args->whichfork == XFS_DATA_FORK) {
-		bno = args->dp->i_mount->m_dirleafblk;
-		count = args->dp->i_mount->m_dirblkfsbs;
-	} else {
-		bno = 0;
-		count = 1;
-	}
-
-	error = xfs_da_grow_inode_int(args, &bno, count);
+	bno = args->geo->leafblk;
+	error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
 	if (!error)
 		*new_blkno = (xfs_dablk_t)bno;
 	return error;
@@ -2158,7 +2151,7 @@
 	w = args->whichfork;
 	ASSERT(w == XFS_DATA_FORK);
 	mp = dp->i_mount;
-	lastoff = mp->m_dirfreeblk;
+	lastoff = args->geo->freeblk;
 	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
 	if (error)
 		return error;
@@ -2170,15 +2163,15 @@
 	/*
 	 * Read the last block in the btree space.
 	 */
-	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
+	last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
 	error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
 	if (error)
 		return error;
 	/*
 	 * Copy the last block into the dead buffer and log it.
 	 */
-	memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
-	xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+	memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
+	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
 	dead_info = dead_buf->b_addr;
 	/*
 	 * Get values from the moved block.
@@ -2247,7 +2240,7 @@
 					sizeof(sib_info->back)));
 		sib_buf = NULL;
 	}
-	par_blkno = mp->m_dirleafblk;
+	par_blkno = args->geo->leafblk;
 	level = -1;
 	/*
 	 * Walk down the tree looking for the parent of the moved block.
@@ -2357,10 +2350,7 @@
 	w = args->whichfork;
 	tp = args->trans;
 	mp = dp->i_mount;
-	if (w == XFS_DATA_FORK)
-		count = mp->m_dirblkfsbs;
-	else
-		count = 1;
+	count = args->geo->fsbcount;
 	for (;;) {
 		/*
 		 * Remove extents.  If we get ENOSPC for a dir we have to move
@@ -2462,7 +2452,6 @@
  */
 static int
 xfs_dabuf_map(
-	struct xfs_trans	*trans,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
@@ -2480,7 +2469,10 @@
 	ASSERT(map && *map);
 	ASSERT(*nmaps == 1);
 
-	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
+	if (whichfork == XFS_DATA_FORK)
+		nfsb = mp->m_dir_geo->fsbcount;
+	else
+		nfsb = mp->m_attr_geo->fsbcount;
 
 	/*
 	 * Caller doesn't have a mapping.  -2 means don't complain
@@ -2558,7 +2550,7 @@
 	*bpp = NULL;
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */
@@ -2606,7 +2598,7 @@
 	*bpp = NULL;
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */
@@ -2625,47 +2617,6 @@
 		xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
 	else
 		xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-
-	/*
-	 * This verification code will be moved to a CRC verification callback
-	 * function so just leave it here unchanged until then.
-	 */
-	{
-		xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
-		xfs_dir2_free_t		*free = bp->b_addr;
-		xfs_da_blkinfo_t	*info = bp->b_addr;
-		uint			magic, magic1;
-		struct xfs_mount	*mp = dp->i_mount;
-
-		magic = be16_to_cpu(info->magic);
-		magic1 = be32_to_cpu(hdr->magic);
-		if (unlikely(
-		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
-				   (magic != XFS_DA3_NODE_MAGIC) &&
-				   (magic != XFS_ATTR_LEAF_MAGIC) &&
-				   (magic != XFS_ATTR3_LEAF_MAGIC) &&
-				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
-				   (magic != XFS_DIR3_LEAF1_MAGIC) &&
-				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
-				   (magic != XFS_DIR3_LEAFN_MAGIC) &&
-				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
-				   (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
-				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-				   (magic1 != XFS_DIR3_DATA_MAGIC) &&
-				   (free->hdr.magic !=
-					cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
-				   (free->hdr.magic !=
-					cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
-				mp, XFS_ERRTAG_DA_READ_BUF,
-				XFS_RANDOM_DA_READ_BUF))) {
-			trace_xfs_da_btree_corrupt(bp, _RET_IP_);
-			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
-					     XFS_ERRLEVEL_LOW, mp, info);
-			error = XFS_ERROR(EFSCORRUPTED);
-			xfs_trans_brelse(trans, bp);
-			goto out_free;
-		}
-	}
 	*bpp = bp;
 out_free:
 	if (mapp != &map)
@@ -2679,7 +2630,6 @@
  */
 xfs_daddr_t
 xfs_da_reada_buf(
-	struct xfs_trans	*trans,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mappedbno,
@@ -2693,7 +2643,7 @@
 
 	mapp = &map;
 	nmap = 1;
-	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
 				&mapp, &nmap);
 	if (error) {
 		/* mapping a hole is not an error, but we don't continue */

diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 201c609..6e153e3 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h

@@ -25,6 +25,23 @@
 struct zone;
 struct xfs_dir_ops;
 
+/*
+ * Directory/attribute geometry information. There will be one of these for each
+ * data fork type, and it will be passed around via the xfs_da_args. Global
+ * structures will be attached to the xfs_mount.
+ */
+struct xfs_da_geometry {
+	int		blksize;	/* da block size in bytes */
+	int		fsbcount;	/* da block size in filesystem blocks */
+	uint8_t		fsblog;		/* log2 of _filesystem_ block size */
+	uint8_t		blklog;		/* log2 of da block size */
+	uint		node_ents;	/* # of entries in a danode */
+	int		magicpct;	/* 37% of block size in bytes */
+	xfs_dablk_t	datablk;	/* blockno of dir data v2 */
+	xfs_dablk_t	leafblk;	/* blockno of leaf data v2 */
+	xfs_dablk_t	freeblk;	/* blockno of free data v2 */
+};
+
 /*========================================================================
  * Btree searching and modification structure definitions.
  *========================================================================*/
@@ -42,6 +59,7 @@
  * Structure to ease passing around component names.
  */
 typedef struct xfs_da_args {
+	struct xfs_da_geometry *geo;	/* da block geometry */
 	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
 	__uint8_t	filetype;	/* filetype of inode for directories */
@@ -110,8 +128,6 @@
 typedef struct xfs_da_state {
 	xfs_da_args_t		*args;		/* filename arguments */
 	struct xfs_mount	*mp;		/* filesystem mount point */
-	unsigned int		blocksize;	/* logical block size */
-	unsigned int		node_ents;	/* how many entries in danode */
 	xfs_da_state_path_t	path;		/* search/split paths */
 	xfs_da_state_path_t	altpath;	/* alternate path for join */
 	unsigned char		inleaf;		/* insert into 1->lf, 0->splf */
@@ -185,9 +201,9 @@
 			       xfs_dablk_t bno, xfs_daddr_t mappedbno,
 			       struct xfs_buf **bpp, int whichfork,
 			       const struct xfs_buf_ops *ops);
-xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
-				xfs_dablk_t bno, xfs_daddr_t mapped_bno,
-				int whichfork, const struct xfs_buf_ops *ops);
+xfs_daddr_t	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+				xfs_daddr_t mapped_bno, int whichfork,
+				const struct xfs_buf_ops *ops);
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  struct xfs_buf *dead_buf);
 

diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c
index e6c83e1..c9aee52 100644
--- a/fs/xfs/xfs_da_format.c
+++ b/fs/xfs/xfs_da_format.c

@@ -26,8 +26,10 @@
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
 
 /*
  * Shortform directory ops
@@ -425,9 +427,9 @@
  * Directory Leaf block operations
  */
 static int
-xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
 {
-	return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) /
+	return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
 		(uint)sizeof(struct xfs_dir2_leaf_entry);
 }
 
@@ -438,9 +440,9 @@
 }
 
 static int
-xfs_dir3_max_leaf_ents(struct xfs_mount *mp)
+xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
 {
-	return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) /
+	return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
 		(uint)sizeof(struct xfs_dir2_leaf_entry);
 }
 
@@ -591,9 +593,9 @@
  * Directory free space block operations
  */
 static int
-xfs_dir2_free_max_bests(struct xfs_mount *mp)
+xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
 {
-	return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
+	return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
 		sizeof(xfs_dir2_data_off_t);
 }
 
@@ -607,24 +609,25 @@
  * Convert data space db to the corresponding free db.
  */
 static xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
 {
-	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
+	return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+			(db / xfs_dir2_free_max_bests(geo));
 }
 
 /*
  * Convert data space db to the corresponding index in a free db.
  */
 static int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
 {
-	return db % xfs_dir2_free_max_bests(mp);
+	return db % xfs_dir2_free_max_bests(geo);
 }
 
 static int
-xfs_dir3_free_max_bests(struct xfs_mount *mp)
+xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
 {
-	return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) /
+	return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
 		sizeof(xfs_dir2_data_off_t);
 }
 
@@ -638,18 +641,19 @@
  * Convert data space db to the corresponding free db.
  */
 static xfs_dir2_db_t
-xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
 {
-	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
+	return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+			(db / xfs_dir3_free_max_bests(geo));
 }
 
 /*
  * Convert data space db to the corresponding index in a free db.
  */
 static int
-xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
 {
-	return db % xfs_dir3_free_max_bests(mp);
+	return db % xfs_dir3_free_max_bests(geo);
 }
 
 static void

diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_da_format.h
index a19d3f8..0a49b02 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/xfs_da_format.h

@@ -19,10 +19,6 @@
 #ifndef __XFS_DA_FORMAT_H__
 #define __XFS_DA_FORMAT_H__
 
-/*========================================================================
- * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
- *========================================================================*/
-
 /*
  * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
  *
@@ -122,8 +118,6 @@
 	__uint16_t	level;
 };
 
-#define	XFS_LBSIZE(mp)	(mp)->m_sb.sb_blocksize
-
 /*
  * Directory version 2.
  *
@@ -330,8 +324,6 @@
 #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
 #define	XFS_DIR2_DATA_SPACE	0
 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_DATA_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
 
 /*
  * Describe a free area in the data block.
@@ -456,8 +448,6 @@
  */
 #define	XFS_DIR2_LEAF_SPACE	1
 #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
 
 /*
  * Leaf block header.
@@ -514,17 +504,6 @@
 #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
 
 /*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline struct xfs_dir2_leaf_tail *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
-{
-	return (struct xfs_dir2_leaf_tail *)
-		((char *)lp + mp->m_dirblksize -
-		  sizeof(struct xfs_dir2_leaf_tail));
-}
-
-/*
  * Get address of the bests array in the single-leaf block.
  */
 static inline __be16 *
@@ -534,123 +513,6 @@
 }
 
 /*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr.  It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_db_t)
-		(by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_data_aoff_t)(by &
-		((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
-			xfs_dir2_data_aoff_t o)
-{
-	return ((xfs_dir2_off_t)db <<
-		(mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-	return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
-			   xfs_dir2_data_aoff_t o)
-{
-	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
-	return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
-	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
  * Free space block defintions for the node format.
  */
 
@@ -659,8 +521,6 @@
  */
 #define	XFS_DIR2_FREE_SPACE	2
 #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_FREE_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
 
 typedef	struct xfs_dir2_free_hdr {
 	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
@@ -736,16 +596,6 @@
 } xfs_dir2_block_tail_t;
 
 /*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_block_tail *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
-{
-	return ((struct xfs_dir2_block_tail *)
-		((char *)hdr + mp->m_dirblksize)) - 1;
-}
-
-/*
  * Pointer to the leaf entries embedded in a data block (1-block format)
  */
 static inline struct xfs_dir2_leaf_entry *
@@ -764,10 +614,6 @@
  * of an attribute name may not be unique, we may have duplicate keys.  The
  * internal links in the Btree are logical block offsets into the file.
  *
- *========================================================================
- * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
- *========================================================================
- *
  * Struct leaf_entry's are packed from the top.  Name/values grow from the
  * bottom but are not packed.  The freemap contains run-length-encoded entries
  * for the free bytes after the leaf_entry's, but only the N largest such,

diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index fda4625..79670cd 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c

@@ -85,38 +85,74 @@
 	.compname	= xfs_ascii_ci_compname,
 };
 
-void
-xfs_dir_mount(
-	xfs_mount_t	*mp)
+int
+xfs_da_mount(
+	struct xfs_mount	*mp)
 {
-	int	nodehdr_size;
+	struct xfs_da_geometry	*dageo;
+	int			nodehdr_size;
 
 
-	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
+	ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
 	       XFS_MAX_BLOCKSIZE);
 
 	mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
 	mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
 
-	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
-	mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
-	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
-	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
-	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
-
 	nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
-	mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) /
-				(uint)sizeof(xfs_da_node_entry_t);
-	mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) /
-				(uint)sizeof(xfs_da_node_entry_t);
+	mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+				    KM_SLEEP | KM_MAYFAIL);
+	mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+				     KM_SLEEP | KM_MAYFAIL);
+	if (!mp->m_dir_geo || !mp->m_attr_geo) {
+		kmem_free(mp->m_dir_geo);
+		kmem_free(mp->m_attr_geo);
+		return ENOMEM;
+	}
 
-	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+	/* set up directory geometry */
+	dageo = mp->m_dir_geo;
+	dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
+	dageo->fsblog = mp->m_sb.sb_blocklog;
+	dageo->blksize = 1 << dageo->blklog;
+	dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
+
+	/*
+	 * Now we've set up the block conversion variables, we can calculate the
+	 * segment block constants using the geometry structure.
+	 */
+	dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
+	dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
+	dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
+	dageo->node_ents = (dageo->blksize - nodehdr_size) /
+				(uint)sizeof(xfs_da_node_entry_t);
+	dageo->magicpct = (dageo->blksize * 37) / 100;
+
+	/* set up attribute geometry - single fsb only */
+	dageo = mp->m_attr_geo;
+	dageo->blklog = mp->m_sb.sb_blocklog;
+	dageo->fsblog = mp->m_sb.sb_blocklog;
+	dageo->blksize = 1 << dageo->blklog;
+	dageo->fsbcount = 1;
+	dageo->node_ents = (dageo->blksize - nodehdr_size) /
+				(uint)sizeof(xfs_da_node_entry_t);
+	dageo->magicpct = (dageo->blksize * 37) / 100;
+
 	if (xfs_sb_version_hasasciici(&mp->m_sb))
 		mp->m_dirnameops = &xfs_ascii_ci_nameops;
 	else
 		mp->m_dirnameops = &xfs_default_nameops;
 
+	return 0;
+}
+
+void
+xfs_da_unmount(
+	struct xfs_mount	*mp)
+{
+	kmem_free(mp->m_dir_geo);
+	kmem_free(mp->m_attr_geo);
 }
 
 /*
@@ -192,6 +228,7 @@
 	if (!args)
 		return ENOMEM;
 
+	args->geo = dp->i_mount->m_dir_geo;
 	args->dp = dp;
 	args->trans = tp;
 	error = xfs_dir2_sf_create(args, pdp->i_ino);
@@ -226,6 +263,7 @@
 	if (!args)
 		return ENOMEM;
 
+	args->geo = dp->i_mount->m_dir_geo;
 	args->name = name->name;
 	args->namelen = name->len;
 	args->filetype = name->type;
@@ -244,7 +282,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(args, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -252,7 +290,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(args, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -320,6 +358,7 @@
 	 * annotations into the reclaim path for the ilock.
 	 */
 	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+	args->geo = dp->i_mount->m_dir_geo;
 	args->name = name->name;
 	args->namelen = name->len;
 	args->filetype = name->type;
@@ -336,7 +375,7 @@
 		goto out_check_rval;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(args, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -344,7 +383,7 @@
 		goto out_check_rval;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(args, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -391,6 +430,7 @@
 	if (!args)
 		return ENOMEM;
 
+	args->geo = dp->i_mount->m_dir_geo;
 	args->name = name->name;
 	args->namelen = name->len;
 	args->filetype = name->type;
@@ -408,7 +448,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(args, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -416,7 +456,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(args, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -455,6 +495,7 @@
 	if (!args)
 		return ENOMEM;
 
+	args->geo = dp->i_mount->m_dir_geo;
 	args->name = name->name;
 	args->namelen = name->len;
 	args->filetype = name->type;
@@ -472,7 +513,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(args, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -480,7 +521,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(args, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -516,6 +557,7 @@
 	if (!args)
 		return ENOMEM;
 
+	args->geo = dp->i_mount->m_dir_geo;
 	args->name = name->name;
 	args->namelen = name->len;
 	args->filetype = name->type;
@@ -531,7 +573,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isblock(tp, dp, &v);
+	rval = xfs_dir2_isblock(args, &v);
 	if (rval)
 		goto out_free;
 	if (v) {
@@ -539,7 +581,7 @@
 		goto out_free;
 	}
 
-	rval = xfs_dir2_isleaf(tp, dp, &v);
+	rval = xfs_dir2_isleaf(args, &v);
 	if (rval)
 		goto out_free;
 	if (v)
@@ -579,13 +621,13 @@
 	 * Set lowest possible block in the space requested.
 	 */
 	bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
-	count = mp->m_dirblkfsbs;
+	count = args->geo->fsbcount;
 
 	error = xfs_da_grow_inode_int(args, &bno, count);
 	if (error)
 		return error;
 
-	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
+	*dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);
 
 	/*
 	 * Update file's size if this is the data space and it grew.
@@ -607,19 +649,16 @@
  */
 int
 xfs_dir2_isblock(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*dp,
-	int		*vp)		/* out: 1 is block, 0 is not block */
+	struct xfs_da_args	*args,
+	int			*vp)	/* out: 1 is block, 0 is not block */
 {
-	xfs_fileoff_t	last;		/* last file offset */
-	xfs_mount_t	*mp;
-	int		rval;
+	xfs_fileoff_t		last;	/* last file offset */
+	int			rval;
 
-	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+	if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
 		return rval;
-	rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
-	ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
+	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
+	ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
 	*vp = rval;
 	return 0;
 }
@@ -629,18 +668,15 @@
  */
 int
 xfs_dir2_isleaf(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*dp,
-	int		*vp)		/* out: 1 is leaf, 0 is not leaf */
+	struct xfs_da_args	*args,
+	int			*vp)	/* out: 1 is block, 0 is not block */
 {
-	xfs_fileoff_t	last;		/* last file offset */
-	xfs_mount_t	*mp;
-	int		rval;
+	xfs_fileoff_t		last;	/* last file offset */
+	int			rval;
 
-	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+	if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
 		return rval;
-	*vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
+	*vp = last == args->geo->leafblk + args->geo->fsbcount;
 	return 0;
 }
 
@@ -668,11 +704,11 @@
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	da = xfs_dir2_db_to_da(mp, db);
+	da = xfs_dir2_db_to_da(args->geo, db);
 	/*
 	 * Unmap the fsblock(s).
 	 */
-	if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
+	if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
 			XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
 			&done))) {
 		/*
@@ -699,12 +735,12 @@
 	/*
 	 * If it's not a data block, we're done.
 	 */
-	if (db >= XFS_DIR2_LEAF_FIRSTDB(mp))
+	if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))
 		return 0;
 	/*
 	 * If the block isn't the last one in the directory, we're done.
 	 */
-	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
+	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))
 		return 0;
 	bno = da;
 	if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
@@ -713,7 +749,7 @@
 		 */
 		return error;
 	}
-	if (db == mp->m_dirdatablk)
+	if (db == args->geo->datablk)
 		ASSERT(bno == 0);
 	else
 		ASSERT(bno > 0);

diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index cec70e0..c8e86b0 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h

@@ -80,7 +80,7 @@
 				    struct xfs_dir3_icleaf_hdr *from);
 	void	(*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to,
 				      struct xfs_dir2_leaf *from);
-	int	(*leaf_max_ents)(struct xfs_mount *mp);
+	int	(*leaf_max_ents)(struct xfs_da_geometry *geo);
 	struct xfs_dir2_leaf_entry *
 		(*leaf_ents_p)(struct xfs_dir2_leaf *lp);
 
@@ -97,10 +97,12 @@
 				    struct xfs_dir3_icfree_hdr *from);
 	void	(*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to,
 				      struct xfs_dir2_free *from);
-	int	(*free_max_bests)(struct xfs_mount *mp);
+	int	(*free_max_bests)(struct xfs_da_geometry *geo);
 	__be16 * (*free_bests_p)(struct xfs_dir2_free *free);
-	xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db);
-	int	(*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db);
+	xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo,
+				   xfs_dir2_db_t db);
+	int	(*db_to_fdindex)(struct xfs_da_geometry *geo,
+				 xfs_dir2_db_t db);
 };
 
 extern const struct xfs_dir_ops *
@@ -112,7 +114,9 @@
  * Generic directory interface routines
  */
 extern void xfs_dir_startup(void);
-extern void xfs_dir_mount(struct xfs_mount *mp);
+extern int xfs_da_mount(struct xfs_mount *mp);
+extern void xfs_da_unmount(struct xfs_mount *mp);
+
 extern int xfs_dir_isempty(struct xfs_inode *dp);
 extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_inode *pdp);
@@ -142,23 +146,23 @@
 /*
  * Interface routines used by userspace utilities
  */
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r);
+extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
 				struct xfs_buf *bp);
 
 extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
 		struct xfs_dir2_data_hdr *hdr, int *loghead);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
 		struct xfs_buf *bp, struct xfs_dir2_data_entry *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_header(struct xfs_da_args *args,
 		struct xfs_buf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
-		struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
+		struct xfs_buf *bp, struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
 		struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
 		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
 		struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
 		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
 		int *needlogp, int *needscanp);

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 4f6a38c..c7cd315 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c

@@ -136,7 +136,7 @@
 	struct xfs_mount	*mp = dp->i_mount;
 	int			err;
 
-	err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
+	err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
 				XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
 	if (!err && tp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
@@ -281,8 +281,7 @@
  */
 static void
 xfs_dir2_block_compact(
-	struct xfs_trans		*tp,
-	struct xfs_inode		*dp,
+	struct xfs_da_args		*args,
 	struct xfs_buf			*bp,
 	struct xfs_dir2_data_hdr	*hdr,
 	struct xfs_dir2_block_tail	*btp,
@@ -315,18 +314,17 @@
 	*lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
 	*lfloghigh -= be32_to_cpu(btp->stale) - 1;
 	be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
-	xfs_dir2_data_make_free(tp, dp, bp,
+	xfs_dir2_data_make_free(args, bp,
 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
 		(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
 		needlog, &needscan);
-	blp += be32_to_cpu(btp->stale) - 1;
 	btp->stale = cpu_to_be32(1);
 	/*
 	 * If we now need to rebuild the bestfree map, do so.
 	 * This needs to happen before the next call to use_free.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(dp, hdr, needlog);
+		xfs_dir2_data_freescan(args->dp, hdr, needlog);
 }
 
 /*
@@ -378,7 +376,7 @@
 	 * Set up pointers to parts of the block.
 	 */
 	hdr = bp->b_addr;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 
 	/*
@@ -421,7 +419,7 @@
 	 * If need to compact the leaf entries, do it now.
 	 */
 	if (compact) {
-		xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog,
+		xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog,
 				      &lfloghigh, &lfloglow);
 		/* recalculate blp post-compaction */
 		blp = xfs_dir2_block_leaf_p(btp);
@@ -456,7 +454,7 @@
 		/*
 		 * Mark the space needed for the new leaf entry, now in use.
 		 */
-		xfs_dir2_data_use_free(tp, dp, bp, enddup,
+		xfs_dir2_data_use_free(args, bp, enddup,
 			(xfs_dir2_data_aoff_t)
 			((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
 			 sizeof(*blp)),
@@ -537,13 +535,13 @@
 	 * Fill in the leaf entry.
 	 */
 	blp[mid].hashval = cpu_to_be32(args->hashval);
-	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
 	 * Mark space for the data entry used.
 	 */
-	xfs_dir2_data_use_free(tp, dp, bp, dup,
+	xfs_dir2_data_use_free(args, bp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
 		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
 	/*
@@ -561,9 +559,9 @@
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, bp);
+		xfs_dir2_data_log_header(args, bp);
 	xfs_dir2_block_log_tail(tp, bp);
-	xfs_dir2_data_log_entry(tp, dp, bp, dep);
+	xfs_dir2_data_log_entry(args, bp, dep);
 	xfs_dir3_data_check(dp, bp);
 	return 0;
 }
@@ -582,7 +580,7 @@
 	xfs_dir2_leaf_entry_t	*blp;
 	xfs_dir2_block_tail_t	*btp;
 
-	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+	btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
 		(uint)((char *)&blp[last + 1] - (char *)hdr - 1));
@@ -599,7 +597,7 @@
 	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
 	xfs_dir2_block_tail_t	*btp;
 
-	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+	btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
 	xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
 		(uint)((char *)(btp + 1) - (char *)hdr - 1));
 }
@@ -634,13 +632,14 @@
 	mp = dp->i_mount;
 	hdr = bp->b_addr;
 	xfs_dir3_data_check(dp, bp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
 	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-		xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+			xfs_dir2_dataptr_to_off(args->geo,
+						be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, CI name if appropriate, release the block.
 	 */
@@ -686,7 +685,7 @@
 
 	hdr = bp->b_addr;
 	xfs_dir3_data_check(dp, bp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Loop doing a binary search for our hash value.
@@ -724,7 +723,7 @@
 		 * Get pointer to the entry from the leaf.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-			((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+			((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr));
 		/*
 		 * Compare name and if it's an exact match, return the index
 		 * and buffer. If it's the first case-insensitive match, store
@@ -791,18 +790,19 @@
 	tp = args->trans;
 	mp = dp->i_mount;
 	hdr = bp->b_addr;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry using the leaf entry.
 	 */
-	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+			xfs_dir2_dataptr_to_off(args->geo,
+						be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
 	needlog = needscan = 0;
-	xfs_dir2_data_make_free(tp, dp, bp,
+	xfs_dir2_data_make_free(args, bp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
 		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
 	/*
@@ -821,7 +821,7 @@
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, bp);
+		xfs_dir2_data_log_header(args, bp);
 	xfs_dir3_data_check(dp, bp);
 	/*
 	 * See if the size as a shortform is good enough.
@@ -866,20 +866,21 @@
 	dp = args->dp;
 	mp = dp->i_mount;
 	hdr = bp->b_addr;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry we need to change.
 	 */
-	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+			xfs_dir2_dataptr_to_off(args->geo,
+						be32_to_cpu(blp[ent].address)));
 	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
 	 */
 	dep->inumber = cpu_to_be64(args->inumber);
 	dp->d_ops->data_put_ftype(dep, args->filetype);
-	xfs_dir2_data_log_entry(args->trans, dp, bp, dep);
+	xfs_dir2_data_log_entry(args, bp, dep);
 	xfs_dir3_data_check(dp, bp);
 	return 0;
 }
@@ -939,7 +940,7 @@
 	leaf = lbp->b_addr;
 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
 	ents = dp->d_ops->leaf_ents_p(leaf);
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 
 	ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
 	       leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
@@ -949,13 +950,13 @@
 	 * been left behind during no-space-reservation operations.
 	 * These will show up in the leaf bests table.
 	 */
-	while (dp->i_d.di_size > mp->m_dirblksize) {
+	while (dp->i_d.di_size > args->geo->blksize) {
 		int hdrsz;
 
 		hdrsz = dp->d_ops->data_entry_offset;
 		bestsp = xfs_dir2_leaf_bests_p(ltp);
 		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
-					    mp->m_dirblksize - hdrsz) {
+					    args->geo->blksize - hdrsz) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
 				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -967,7 +968,7 @@
 	 * Read the data block if we don't already have it, give up if it fails.
 	 */
 	if (!dbp) {
-		error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
+		error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);
 		if (error)
 			return error;
 	}
@@ -983,7 +984,7 @@
 	/*
 	 * Look at the last data entry.
 	 */
-	tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
+	tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1;
 	dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
 	/*
 	 * If it's not free or is too short we can't do it.
@@ -1002,12 +1003,12 @@
 	/*
 	 * Use up the space at the end of the block (blp/btp).
 	 */
-	xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size,
+	xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
 		&needlog, &needscan);
 	/*
 	 * Initialize the block tail.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
@@ -1028,11 +1029,11 @@
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
+		xfs_dir2_data_log_header(args, dbp);
 	/*
 	 * Pitch the old leaf block.
 	 */
-	error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
+	error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp);
 	if (error)
 		return error;
 
@@ -1141,13 +1142,13 @@
 	 */
 	dup = dp->d_ops->data_unused_p(hdr);
 	needlog = needscan = 0;
-	xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog,
-		&needscan);
+	xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
+			       i, &needlog, &needscan);
 	ASSERT(needscan == 0);
 	/*
 	 * Fill in the tail.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	btp->count = cpu_to_be32(sfp->count + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = xfs_dir2_block_leaf_p(btp);
@@ -1155,7 +1156,7 @@
 	/*
 	 * Remove the freespace, we'll manage it.
 	 */
-	xfs_dir2_data_use_free(tp, dp, bp, dup,
+	xfs_dir2_data_use_free(args, bp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
 		be16_to_cpu(dup->length), &needlog, &needscan);
 	/*
@@ -1168,9 +1169,9 @@
 	dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
 	tagp = dp->d_ops->data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
-	xfs_dir2_data_log_entry(tp, dp, bp, dep);
+	xfs_dir2_data_log_entry(args, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	/*
 	 * Create entry for ..
@@ -1182,9 +1183,9 @@
 	dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
 	tagp = dp->d_ops->data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
-	xfs_dir2_data_log_entry(tp, dp, bp, dep);
+	xfs_dir2_data_log_entry(args, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 				(char *)dep - (char *)hdr));
 	offset = dp->d_ops->data_first_offset;
 	/*
@@ -1216,7 +1217,7 @@
 			dup->length = cpu_to_be16(newoffset - offset);
 			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
 				((char *)dup - (char *)hdr));
-			xfs_dir2_data_log_unused(tp, bp, dup);
+			xfs_dir2_data_log_unused(args, bp, dup);
 			xfs_dir2_data_freeinsert(hdr,
 						 dp->d_ops->data_bestfree_p(hdr),
 						 dup, &dummy);
@@ -1233,12 +1234,12 @@
 		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = dp->d_ops->data_entry_tag_p(dep);
 		*tagp = cpu_to_be16((char *)dep - (char *)hdr);
-		xfs_dir2_data_log_entry(tp, dp, bp, dep);
+		xfs_dir2_data_log_entry(args, bp, dep);
 		name.name = sfep->name;
 		name.len = sfep->namelen;
 		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
 							hashname(&name));
-		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
 						 (char *)dep - (char *)hdr));
 		offset = (int)((char *)(tagp + 1) - (char *)hdr);
 		if (++i == sfp->count)

diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index afa4ad5..8c2f642 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c

@@ -63,8 +63,10 @@
 	int			stale;		/* count of stale leaves */
 	struct xfs_name		name;
 	const struct xfs_dir_ops *ops;
+	struct xfs_da_geometry	*geo;
 
 	mp = bp->b_target->bt_mount;
+	geo = mp->m_dir_geo;
 
 	/*
 	 * We can be passed a null dp here from a verifier, so we need to go the
@@ -78,7 +80,7 @@
 	switch (hdr->magic) {
 	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
 	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
-		btp = xfs_dir2_block_tail_p(mp, hdr);
+		btp = xfs_dir2_block_tail_p(geo, hdr);
 		lep = xfs_dir2_block_leaf_p(btp);
 		endp = (char *)lep;
 
@@ -94,7 +96,7 @@
 		break;
 	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
-		endp = (char *)hdr + mp->m_dirblksize;
+		endp = (char *)hdr + geo->blksize;
 		break;
 	default:
 		XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
@@ -172,9 +174,9 @@
 		lastfree = 0;
 		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
 		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-				(xfs_dir2_data_aoff_t)
-				((char *)dep - (char *)hdr));
+			addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+						(xfs_dir2_data_aoff_t)
+						((char *)dep - (char *)hdr));
 			name.name = dep->name;
 			name.len = dep->namelen;
 			hash = mp->m_dirnameops->hashname(&name);
@@ -329,12 +331,11 @@
 
 int
 xfs_dir3_data_readahead(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
 	xfs_dablk_t		bno,
 	xfs_daddr_t		mapped_bno)
 {
-	return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
+	return xfs_da_reada_buf(dp, bno, mapped_bno,
 				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
 }
 
@@ -510,6 +511,7 @@
 	struct xfs_dir2_data_free *bf;
 	char			*endp;		/* end of block's data */
 	char			*p;		/* current entry pointer */
+	struct xfs_da_geometry	*geo = dp->i_mount->m_dir_geo;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -528,10 +530,10 @@
 	p = (char *)dp->d_ops->data_entry_p(hdr);
 	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
 	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-		btp = xfs_dir2_block_tail_p(dp->i_mount, hdr);
+		btp = xfs_dir2_block_tail_p(geo, hdr);
 		endp = (char *)xfs_dir2_block_leaf_p(btp);
 	} else
-		endp = (char *)hdr + dp->i_mount->m_dirblksize;
+		endp = (char *)hdr + geo->blksize;
 	/*
 	 * Loop over the block's entries.
 	 */
@@ -585,8 +587,8 @@
 	/*
 	 * Get the buffer set up for the block.
 	 */
-	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
-		XFS_DATA_FORK);
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
+			       -1, &bp, XFS_DATA_FORK);
 	if (error)
 		return error;
 	bp->b_ops = &xfs_dir3_data_buf_ops;
@@ -621,15 +623,15 @@
 	dup = dp->d_ops->data_unused_p(hdr);
 	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 
-	t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset;
+	t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
 	bf[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
 	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
 	/*
 	 * Log it and return it.
 	 */
-	xfs_dir2_data_log_header(tp, dp, bp);
-	xfs_dir2_data_log_unused(tp, bp, dup);
+	xfs_dir2_data_log_header(args, bp);
+	xfs_dir2_data_log_unused(args, bp, dup);
 	*bpp = bp;
 	return 0;
 }
@@ -639,8 +641,7 @@
  */
 void
 xfs_dir2_data_log_entry(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
 {
@@ -651,8 +652,8 @@
 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
 
-	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
-		(uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) -
+	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
+		(uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
 		       (char *)hdr - 1));
 }
 
@@ -661,8 +662,7 @@
  */
 void
 xfs_dir2_data_log_header(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp)
 {
 #ifdef DEBUG
@@ -674,7 +674,8 @@
 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
 #endif
 
-	xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1);
+	xfs_trans_log_buf(args->trans, bp, 0,
+			  args->dp->d_ops->data_entry_offset - 1);
 }
 
 /*
@@ -682,7 +683,7 @@
  */
 void
 xfs_dir2_data_log_unused(
-	struct xfs_trans	*tp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
 {
@@ -696,13 +697,13 @@
 	/*
 	 * Log the first part of the unused entry.
 	 */
-	xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
+	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
 		(uint)((char *)&dup->length + sizeof(dup->length) -
 		       1 - (char *)hdr));
 	/*
 	 * Log the end (tag) of the unused entry.
 	 */
-	xfs_trans_log_buf(tp, bp,
+	xfs_trans_log_buf(args->trans, bp,
 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
 		       sizeof(xfs_dir2_data_off_t) - 1));
@@ -714,8 +715,7 @@
  */
 void
 xfs_dir2_data_make_free(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
 	xfs_dir2_data_aoff_t	len,		/* length in bytes */
@@ -725,14 +725,12 @@
 	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */
 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
 	char			*endptr;	/* end of data area */
-	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			needscan;	/* need to regen bestfree */
 	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
 	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
 	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
 	struct xfs_dir2_data_free *bf;
 
-	mp = tp->t_mountp;
 	hdr = bp->b_addr;
 
 	/*
@@ -740,20 +738,20 @@
 	 */
 	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	    hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
-		endptr = (char *)hdr + mp->m_dirblksize;
+		endptr = (char *)hdr + args->geo->blksize;
 	else {
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
 		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
 			hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-		btp = xfs_dir2_block_tail_p(mp, hdr);
+		btp = xfs_dir2_block_tail_p(args->geo, hdr);
 		endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	}
 	/*
 	 * If this isn't the start of the block, then back up to
 	 * the previous entry and see if it's free.
 	 */
-	if (offset > dp->d_ops->data_entry_offset) {
+	if (offset > args->dp->d_ops->data_entry_offset) {
 		__be16			*tagp;	/* tag just before us */
 
 		tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -779,7 +777,7 @@
 	 * Previous and following entries are both free,
 	 * merge everything into a single free entry.
 	 */
-	bf = dp->d_ops->data_bestfree_p(hdr);
+	bf = args->dp->d_ops->data_bestfree_p(hdr);
 	if (prevdup && postdup) {
 		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
 
@@ -801,7 +799,7 @@
 		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
 		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, prevdup);
+		xfs_dir2_data_log_unused(args, bp, prevdup);
 		if (!needscan) {
 			/*
 			 * Has to be the case that entries 0 and 1 are
@@ -836,7 +834,7 @@
 		be16_add_cpu(&prevdup->length, len);
 		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, prevdup);
+		xfs_dir2_data_log_unused(args, bp, prevdup);
 		/*
 		 * If the previous entry was in the table, the new entry
 		 * is longer, so it will be in the table too.  Remove
@@ -864,7 +862,7 @@
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
 		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup);
+		xfs_dir2_data_log_unused(args, bp, newdup);
 		/*
 		 * If the following entry was in the table, the new entry
 		 * is longer, so it will be in the table too.  Remove
@@ -891,7 +889,7 @@
 		newdup->length = cpu_to_be16(len);
 		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup);
+		xfs_dir2_data_log_unused(args, bp, newdup);
 		xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
 	}
 	*needscanp = needscan;
@@ -902,8 +900,7 @@
  */
 void
 xfs_dir2_data_use_free(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	xfs_dir2_data_unused_t	*dup,		/* unused entry */
 	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
@@ -934,7 +931,7 @@
 	 * Look up the entry in the bestfree table.
 	 */
 	oldlen = be16_to_cpu(dup->length);
-	bf = dp->d_ops->data_bestfree_p(hdr);
+	bf = args->dp->d_ops->data_bestfree_p(hdr);
 	dfp = xfs_dir2_data_freefind(hdr, bf, dup);
 	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
 	/*
@@ -966,7 +963,7 @@
 		newdup->length = cpu_to_be16(oldlen - len);
 		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup);
+		xfs_dir2_data_log_unused(args, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
 		 */
@@ -994,7 +991,7 @@
 		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
 		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup);
+		xfs_dir2_data_log_unused(args, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
 		 */
@@ -1022,13 +1019,13 @@
 		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
 		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup);
+		xfs_dir2_data_log_unused(args, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
 		*xfs_dir2_data_unused_tag_p(newdup2) =
 			cpu_to_be16((char *)newdup2 - (char *)hdr);
-		xfs_dir2_data_log_unused(tp, bp, newdup2);
+		xfs_dir2_data_log_unused(args, bp, newdup2);
 		/*
 		 * If the old entry was in the table, we need to scan
 		 * if the 3rd entry was valid, since these entries

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index d36e97d..fb0aad4 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c

@@ -41,9 +41,10 @@
  */
 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
 				    int *indexp, struct xfs_buf **dbpp);
-static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
-				    int first, int last);
-static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
+static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
+				    struct xfs_buf *bp, int first, int last);
+static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
+				   struct xfs_buf *bp);
 
 /*
  * Check the internal consistency of a leaf1 block.
@@ -92,6 +93,7 @@
 	int			i;
 	const struct xfs_dir_ops *ops;
 	struct xfs_dir3_icleaf_hdr leafhdr;
+	struct xfs_da_geometry	*geo = mp->m_dir_geo;
 
 	/*
 	 * we can be passed a null dp here from a verifier, so we need to go the
@@ -105,14 +107,14 @@
 	}
 
 	ents = ops->leaf_ents_p(leaf);
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
 
 	/*
 	 * XXX (dgc): This value is not restrictive enough.
 	 * Should factor in the size of the bests table as well.
 	 * We can deduce a value for that from di_size.
 	 */
-	if (hdr->count > ops->leaf_max_ents(mp))
+	if (hdr->count > ops->leaf_max_ents(geo))
 		return false;
 
 	/* Leaves and bests don't overlap in leaf format. */
@@ -323,7 +325,7 @@
 	if (type == XFS_DIR2_LEAF1_MAGIC) {
 		struct xfs_dir2_leaf_tail *ltp;
 
-		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+		ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
 		ltp->bestcount = 0;
 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
@@ -347,18 +349,18 @@
 	int			error;
 
 	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
-	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
+	ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
+	       bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
 
-	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
-			       XFS_DATA_FORK);
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
+			       -1, &bp, XFS_DATA_FORK);
 	if (error)
 		return error;
 
 	xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
-	xfs_dir3_leaf_log_header(tp, dp, bp);
+	xfs_dir3_leaf_log_header(args, bp);
 	if (magic == XFS_DIR2_LEAF1_MAGIC)
-		xfs_dir3_leaf_log_tail(tp, bp);
+		xfs_dir3_leaf_log_tail(args, bp);
 	*bpp = bp;
 	return 0;
 }
@@ -403,8 +405,8 @@
 	if ((error = xfs_da_grow_inode(args, &blkno))) {
 		return error;
 	}
-	ldb = xfs_dir2_da_to_db(mp, blkno);
-	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
+	ldb = xfs_dir2_da_to_db(args->geo, blkno);
+	ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
 	/*
 	 * Initialize the leaf block, get a buffer for it.
 	 */
@@ -415,7 +417,7 @@
 	leaf = lbp->b_addr;
 	hdr = dbp->b_addr;
 	xfs_dir3_data_check(dp, dbp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 	bf = dp->d_ops->data_bestfree_p(hdr);
 	ents = dp->d_ops->leaf_ents_p(leaf);
@@ -427,23 +429,23 @@
 	leafhdr.count = be32_to_cpu(btp->count);
 	leafhdr.stale = be32_to_cpu(btp->stale);
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, lbp);
+	xfs_dir3_leaf_log_header(args, lbp);
 
 	/*
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
 	memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
-	xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1);
+	xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
 	needscan = 0;
 	needlog = 1;
 	/*
 	 * Make the space formerly occupied by the leaf entries and block
 	 * tail be free.
 	 */
-	xfs_dir2_data_make_free(tp, dp, dbp,
+	xfs_dir2_data_make_free(args, dbp,
 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
-		(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
+		(xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
 				       (char *)blp),
 		&needlog, &needscan);
 	/*
@@ -461,7 +463,7 @@
 	/*
 	 * Set up leaf tail and bests table.
 	 */
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	ltp->bestcount = cpu_to_be32(1);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	bestsp[0] =  bf[0].length;
@@ -469,10 +471,10 @@
 	 * Log the data header and leaf bests table.
 	 */
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
+		xfs_dir2_data_log_header(args, dbp);
 	xfs_dir3_leaf_check(dp, lbp);
 	xfs_dir3_data_check(dp, dbp);
-	xfs_dir3_leaf_log_bests(tp, lbp, 0, 0);
+	xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
 	return 0;
 }
 
@@ -641,7 +643,7 @@
 	tp = args->trans;
 	mp = dp->i_mount;
 
-	error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
+	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
 	if (error)
 		return error;
 
@@ -653,7 +655,7 @@
 	 */
 	index = xfs_dir2_leaf_search_hash(args, lbp);
 	leaf = lbp->b_addr;
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	ents = dp->d_ops->leaf_ents_p(leaf);
 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
@@ -670,7 +672,7 @@
 	     index++, lep++) {
 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
-		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+		i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
 		if (be16_to_cpu(bestsp[i]) >= length) {
@@ -810,14 +812,15 @@
 			memmove(&bestsp[0], &bestsp[1],
 				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
 			be32_add_cpu(&ltp->bestcount, 1);
-			xfs_dir3_leaf_log_tail(tp, lbp);
-			xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+			xfs_dir3_leaf_log_tail(args, lbp);
+			xfs_dir3_leaf_log_bests(args, lbp, 0,
+						be32_to_cpu(ltp->bestcount) - 1);
 		}
 		/*
 		 * If we're filling in a previously empty block just log it.
 		 */
 		else
-			xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
+			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
 		hdr = dbp->b_addr;
 		bf = dp->d_ops->data_bestfree_p(hdr);
 		bestsp[use_block] = bf[0].length;
@@ -828,8 +831,8 @@
 		 * Just read that one in.
 		 */
 		error = xfs_dir3_data_read(tp, dp,
-					   xfs_dir2_db_to_da(mp, use_block),
-					   -1, &dbp);
+				   xfs_dir2_db_to_da(args->geo, use_block),
+				   -1, &dbp);
 		if (error) {
 			xfs_trans_brelse(tp, lbp);
 			return error;
@@ -848,7 +851,7 @@
 	/*
 	 * Mark the initial part of our freespace in use for the new entry.
 	 */
-	xfs_dir2_data_use_free(tp, dp, dbp, dup,
+	xfs_dir2_data_use_free(args, dbp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
 		&needlog, &needscan);
 	/*
@@ -870,8 +873,8 @@
 	 * Need to log the data block's header.
 	 */
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
-	xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+		xfs_dir2_data_log_header(args, dbp);
+	xfs_dir2_data_log_entry(args, dbp, dep);
 	/*
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
@@ -879,7 +882,7 @@
 	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
 		bestsp[use_block] = bf[0].length;
 		if (!grown)
-			xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
+			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
 	}
 
 	lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
@@ -889,14 +892,15 @@
 	 * Fill in the new leaf entry.
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
+	lep->address = cpu_to_be32(
+				xfs_dir2_db_off_to_dataptr(args->geo, use_block,
 				be16_to_cpu(*tagp)));
 	/*
 	 * Log the leaf fields and give up the buffers.
 	 */
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, lbp);
-	xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh);
+	xfs_dir3_leaf_log_header(args, lbp);
+	xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
 	xfs_dir3_leaf_check(dp, lbp);
 	xfs_dir3_data_check(dp, dbp);
 	return 0;
@@ -948,9 +952,9 @@
 	leafhdr->stale = 0;
 
 	dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
-	xfs_dir3_leaf_log_header(args->trans, dp, bp);
+	xfs_dir3_leaf_log_header(args, bp);
 	if (loglow != -1)
-		xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1);
+		xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
 }
 
 /*
@@ -1052,7 +1056,7 @@
  */
 static void
 xfs_dir3_leaf_log_bests(
-	xfs_trans_t		*tp,		/* transaction pointer */
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,		/* leaf buffer */
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
@@ -1065,10 +1069,11 @@
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
 
-	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
 	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
-	xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+	xfs_trans_log_buf(args->trans, bp,
+		(uint)((char *)firstb - (char *)leaf),
 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
 }
 
@@ -1077,8 +1082,7 @@
  */
 void
 xfs_dir3_leaf_log_ents(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	int			first,
 	int			last)
@@ -1093,10 +1097,11 @@
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
 
-	ents = dp->d_ops->leaf_ents_p(leaf);
+	ents = args->dp->d_ops->leaf_ents_p(leaf);
 	firstlep = &ents[first];
 	lastlep = &ents[last];
-	xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+	xfs_trans_log_buf(args->trans, bp,
+		(uint)((char *)firstlep - (char *)leaf),
 		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
 }
 
@@ -1105,8 +1110,7 @@
  */
 void
 xfs_dir3_leaf_log_header(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp)
 {
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
@@ -1116,8 +1120,9 @@
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
 
-	xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
-			  dp->d_ops->leaf_hdr_size - 1);
+	xfs_trans_log_buf(args->trans, bp,
+			  (uint)((char *)&leaf->hdr - (char *)leaf),
+			  args->dp->d_ops->leaf_hdr_size - 1);
 }
 
 /*
@@ -1125,21 +1130,20 @@
  */
 STATIC void
 xfs_dir3_leaf_log_tail(
-	struct xfs_trans	*tp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp)
 {
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
-	struct xfs_mount	*mp = tp->t_mountp;
 
 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
 
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-	xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
-		(uint)(mp->m_dirblksize - 1));
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+	xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
+		(uint)(args->geo->blksize - 1));
 }
 
 /*
@@ -1185,7 +1189,7 @@
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->b_addr +
-	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number & CI name if appropriate
 	 */
@@ -1231,7 +1235,7 @@
 	tp = args->trans;
 	mp = dp->i_mount;
 
-	error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
+	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
 	if (error)
 		return error;
 
@@ -1260,7 +1264,8 @@
 		/*
 		 * Get the new data block number.
 		 */
-		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(args->geo,
+					       be32_to_cpu(lep->address));
 		/*
 		 * If it's not the same as the old data block number,
 		 * need to pitch the old one and read the new one.
@@ -1269,8 +1274,8 @@
 			if (dbp)
 				xfs_trans_brelse(tp, dbp);
 			error = xfs_dir3_data_read(tp, dp,
-						   xfs_dir2_db_to_da(mp, newdb),
-						   -1, &dbp);
+					   xfs_dir2_db_to_da(args->geo, newdb),
+					   -1, &dbp);
 			if (error) {
 				xfs_trans_brelse(tp, lbp);
 				return error;
@@ -1281,7 +1286,8 @@
 		 * Point to the data entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
-			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+			xfs_dir2_dataptr_to_off(args->geo,
+						be32_to_cpu(lep->address)));
 		/*
 		 * Compare name and if it's an exact match, return the index
 		 * and buffer. If it's the first case-insensitive match, store
@@ -1310,8 +1316,8 @@
 		if (cidb != curdb) {
 			xfs_trans_brelse(tp, dbp);
 			error = xfs_dir3_data_read(tp, dp,
-						   xfs_dir2_db_to_da(mp, cidb),
-						   -1, &dbp);
+					   xfs_dir2_db_to_da(args->geo, cidb),
+					   -1, &dbp);
 			if (error) {
 				xfs_trans_brelse(tp, lbp);
 				return error;
@@ -1380,18 +1386,18 @@
 	 * Point to the leaf entry, use that to point to the data entry.
 	 */
 	lep = &ents[index];
-	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
-	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+	db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+		xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
 	oldbest = be16_to_cpu(bf[0].length);
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
 	/*
 	 * Mark the former data entry unused.
 	 */
-	xfs_dir2_data_make_free(tp, dp, dbp,
+	xfs_dir2_data_make_free(args, dbp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
 		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
 	/*
@@ -1399,10 +1405,10 @@
 	 */
 	leafhdr.stale++;
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, lbp);
+	xfs_dir3_leaf_log_header(args, lbp);
 
 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-	xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index);
+	xfs_dir3_leaf_log_ents(args, lbp, index, index);
 
 	/*
 	 * Scan the freespace in the data block again if necessary,
@@ -1411,22 +1417,22 @@
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
+		xfs_dir2_data_log_header(args, dbp);
 	/*
 	 * If the longest freespace in the data block has changed,
 	 * put the new value in the bests table and log that.
 	 */
 	if (be16_to_cpu(bf[0].length) != oldbest) {
 		bestsp[db] = bf[0].length;
-		xfs_dir3_leaf_log_bests(tp, lbp, db, db);
+		xfs_dir3_leaf_log_bests(args, lbp, db, db);
 	}
 	xfs_dir3_data_check(dp, dbp);
 	/*
 	 * If the data block is now empty then get rid of the data block.
 	 */
 	if (be16_to_cpu(bf[0].length) ==
-			mp->m_dirblksize - dp->d_ops->data_entry_offset) {
-		ASSERT(db != mp->m_dirdatablk);
+			args->geo->blksize - dp->d_ops->data_entry_offset) {
+		ASSERT(db != args->geo->datablk);
 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
 			/*
 			 * Nope, can't get rid of it because it caused
@@ -1459,15 +1465,16 @@
 			memmove(&bestsp[db - i], bestsp,
 				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
 			be32_add_cpu(&ltp->bestcount, -(db - i));
-			xfs_dir3_leaf_log_tail(tp, lbp);
-			xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+			xfs_dir3_leaf_log_tail(args, lbp);
+			xfs_dir3_leaf_log_bests(args, lbp, 0,
+						be32_to_cpu(ltp->bestcount) - 1);
 		} else
 			bestsp[db] = cpu_to_be16(NULLDATAOFF);
 	}
 	/*
 	 * If the data block was not the first one, drop it.
 	 */
-	else if (db != mp->m_dirdatablk)
+	else if (db != args->geo->datablk)
 		dbp = NULL;
 
 	xfs_dir3_leaf_check(dp, lbp);
@@ -1515,7 +1522,7 @@
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->b_addr +
-	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
 	/*
 	 * Put the new inode number in, log it.
@@ -1523,7 +1530,7 @@
 	dep->inumber = cpu_to_be64(args->inumber);
 	dp->d_ops->data_put_ftype(dep, args->filetype);
 	tp = args->trans;
-	xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+	xfs_dir2_data_log_entry(args, dbp, dep);
 	xfs_dir3_leaf_check(dp, lbp);
 	xfs_trans_brelse(tp, lbp);
 	return 0;
@@ -1609,12 +1616,13 @@
 	/*
 	 * Read the offending data block.  We need its buffer.
 	 */
-	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
+	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
+				   -1, &dbp);
 	if (error)
 		return error;
 
 	leaf = lbp->b_addr;
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 
 #ifdef DEBUG
 {
@@ -1624,7 +1632,7 @@
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
 	ASSERT(be16_to_cpu(bf[0].length) ==
-	       mp->m_dirblksize - dp->d_ops->data_entry_offset);
+	       args->geo->blksize - dp->d_ops->data_entry_offset);
 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
 }
 #endif
@@ -1643,8 +1651,8 @@
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	be32_add_cpu(&ltp->bestcount, -1);
 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
-	xfs_dir3_leaf_log_tail(tp, lbp);
-	xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+	xfs_dir3_leaf_log_tail(args, lbp);
+	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	return 0;
 }
 
@@ -1708,22 +1716,22 @@
 	/*
 	 * Get the last offset in the file.
 	 */
-	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
+	if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
 		return error;
 	}
-	fo -= mp->m_dirblkfsbs;
+	fo -= args->geo->fsbcount;
 	/*
 	 * If there are freespace blocks other than the first one,
 	 * take this opportunity to remove trailing empty freespace blocks
 	 * that may have been left behind during no-space-reservation
 	 * operations.
 	 */
-	while (fo > mp->m_dirfreeblk) {
+	while (fo > args->geo->freeblk) {
 		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
 			return error;
 		}
 		if (rval)
-			fo -= mp->m_dirblkfsbs;
+			fo -= args->geo->fsbcount;
 		else
 			return 0;
 	}
@@ -1736,7 +1744,7 @@
 	/*
 	 * If it's not the single leaf block, give up.
 	 */
-	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
+	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
 		return 0;
 	lbp = state->path.blk[0].bp;
 	leaf = lbp->b_addr;
@@ -1748,7 +1756,7 @@
 	/*
 	 * Read the freespace block.
 	 */
-	error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp);
+	error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);
 	if (error)
 		return error;
 	free = fbp->b_addr;
@@ -1760,7 +1768,7 @@
 	 * Now see if the leafn and free data will fit in a leaf1.
 	 * If not, release the buffer and give up.
 	 */
-	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) {
+	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
 		xfs_trans_brelse(tp, fbp);
 		return 0;
 	}
@@ -1780,7 +1788,7 @@
 	/*
 	 * Set up the leaf tail from the freespace block.
 	 */
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	ltp->bestcount = cpu_to_be32(freehdr.nvalid);
 
 	/*
@@ -1790,15 +1798,17 @@
 		freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
 
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, lbp);
-	xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
-	xfs_dir3_leaf_log_tail(tp, lbp);
+	xfs_dir3_leaf_log_header(args, lbp);
+	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+	xfs_dir3_leaf_log_tail(args, lbp);
 	xfs_dir3_leaf_check(dp, lbp);
 
 	/*
 	 * Get rid of the freespace block.
 	 */
-	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
+	error = xfs_dir2_shrink_inode(args,
+			xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
+			fbp);
 	if (error) {
 		/*
 		 * This can't fail here because it can only happen when

diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index cb434d7..da43d30 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c

@@ -195,17 +195,18 @@
 
 static int
 xfs_dir3_free_get_buf(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	xfs_da_args_t		*args,
 	xfs_dir2_db_t		fbno,
 	struct xfs_buf		**bpp)
 {
+	struct xfs_trans	*tp = args->trans;
+	struct xfs_inode	*dp = args->dp;
 	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_buf		*bp;
 	int			error;
 	struct xfs_dir3_icfree_hdr hdr;
 
-	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno),
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),
 				   -1, &bp, XFS_DATA_FORK);
 	if (error)
 		return error;
@@ -240,8 +241,7 @@
  */
 STATIC void
 xfs_dir2_free_log_bests(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
@@ -250,10 +250,10 @@
 	__be16			*bests;
 
 	free = bp->b_addr;
-	bests = dp->d_ops->free_bests_p(free);
+	bests = args->dp->d_ops->free_bests_p(free);
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
 	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
-	xfs_trans_log_buf(tp, bp,
+	xfs_trans_log_buf(args->trans, bp,
 		(uint)((char *)&bests[first] - (char *)free),
 		(uint)((char *)&bests[last] - (char *)free +
 		       sizeof(bests[0]) - 1));
@@ -264,8 +264,7 @@
  */
 static void
 xfs_dir2_free_log_header(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	struct xfs_buf		*bp)
 {
 #ifdef DEBUG
@@ -275,7 +274,8 @@
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
 	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
 #endif
-	xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1);
+	xfs_trans_log_buf(args->trans, bp, 0,
+			  args->dp->d_ops->free_hdr_size - 1);
 }
 
 /*
@@ -315,20 +315,20 @@
 	if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {
 		return error;
 	}
-	ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp));
+	ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
 	/*
 	 * Get the buffer for the new freespace block.
 	 */
-	error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp);
+	error = xfs_dir3_free_get_buf(args, fdb, &fbp);
 	if (error)
 		return error;
 
 	free = fbp->b_addr;
 	dp->d_ops->free_hdr_from_disk(&freehdr, free);
 	leaf = lbp->b_addr;
-	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	ASSERT(be32_to_cpu(ltp->bestcount) <=
-				(uint)dp->i_d.di_size / mp->m_dirblksize);
+				(uint)dp->i_d.di_size / args->geo->blksize);
 
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
@@ -349,8 +349,8 @@
 	freehdr.nvalid = be32_to_cpu(ltp->bestcount);
 
 	dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
-	xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1);
-	xfs_dir2_free_log_header(tp, dp, fbp);
+	xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1);
+	xfs_dir2_free_log_header(args, fbp);
 
 	/*
 	 * Converting the leaf to a leafnode is just a matter of changing the
@@ -364,7 +364,7 @@
 		leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
 	lbp->b_ops = &xfs_dir3_leafn_buf_ops;
 	xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
-	xfs_dir3_leaf_log_header(tp, dp, lbp);
+	xfs_dir3_leaf_log_header(args, lbp);
 	xfs_dir3_leaf_check(dp, lbp);
 	return 0;
 }
@@ -415,7 +415,7 @@
 	 * a compact.
 	 */
 
-	if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) {
+	if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
 		if (!leafhdr.stale)
 			return XFS_ERROR(ENOSPC);
 		compact = leafhdr.stale > 1;
@@ -450,12 +450,12 @@
 				       highstale, &lfloglow, &lfloghigh);
 
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
+	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,
 				args->blkno, args->index));
 
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, bp);
-	xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh);
+	xfs_dir3_leaf_log_header(args, bp);
+	xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh);
 	xfs_dir3_leaf_check(dp, bp);
 	return 0;
 }
@@ -471,7 +471,8 @@
 
 	dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
 
-	ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0);
+	ASSERT((hdr.firstdb %
+		dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);
 	ASSERT(hdr.firstdb <= db);
 	ASSERT(db < hdr.firstdb + hdr.nvalid);
 }
@@ -576,7 +577,8 @@
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(args->geo,
+					       be32_to_cpu(lep->address));
 		/*
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
@@ -593,7 +595,7 @@
 			 * Convert the data block to the free block
 			 * holding its freespace information.
 			 */
-			newfdb = dp->d_ops->db_to_fdb(mp, newdb);
+			newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);
 			/*
 			 * If it's not the one we have in hand, read it in.
 			 */
@@ -605,7 +607,8 @@
 					xfs_trans_brelse(tp, curbp);
 
 				error = xfs_dir2_free_read(tp, dp,
-						xfs_dir2_db_to_da(mp, newfdb),
+						xfs_dir2_db_to_da(args->geo,
+								  newfdb),
 						&curbp);
 				if (error)
 					return error;
@@ -616,7 +619,7 @@
 			/*
 			 * Get the index for our entry.
 			 */
-			fi = dp->d_ops->db_to_fdindex(mp, curdb);
+			fi = dp->d_ops->db_to_fdindex(args->geo, curdb);
 			/*
 			 * If it has room, return it.
 			 */
@@ -721,7 +724,8 @@
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(args->geo,
+					       be32_to_cpu(lep->address));
 		/*
 		 * Not adding a new entry, so we really want to find
 		 * the name given to us.
@@ -746,7 +750,8 @@
 				curbp = state->extrablk.bp;
 			} else {
 				error = xfs_dir3_data_read(tp, dp,
-						xfs_dir2_db_to_da(mp, newdb),
+						xfs_dir2_db_to_da(args->geo,
+								  newdb),
 						-1, &curbp);
 				if (error)
 					return error;
@@ -758,7 +763,8 @@
 		 * Point to the data entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
-			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+			xfs_dir2_dataptr_to_off(args->geo,
+						be32_to_cpu(lep->address)));
 		/*
 		 * Compare the entry and if it's an exact match, return
 		 * EEXIST immediately. If it's the first case-insensitive
@@ -844,7 +850,6 @@
 	int				start_d,/* destination leaf index */
 	int				count)	/* count of leaves to copy */
 {
-	struct xfs_trans		*tp = args->trans;
 	int				stale;	/* count stale leaves copied */
 
 	trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
@@ -863,7 +868,7 @@
 	if (start_d < dhdr->count) {
 		memmove(&dents[start_d + count], &dents[start_d],
 			(dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
-		xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count,
+		xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,
 				       count + dhdr->count - 1);
 	}
 	/*
@@ -885,8 +890,7 @@
 	 */
 	memcpy(&dents[start_d], &sents[start_s],
 		count * sizeof(xfs_dir2_leaf_entry_t));
-	xfs_dir3_leaf_log_ents(tp, args->dp, bp_d,
-			       start_d, start_d + count - 1);
+	xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);
 
 	/*
 	 * If there are source entries after the ones we copied,
@@ -895,8 +899,7 @@
 	if (start_s + count < shdr->count) {
 		memmove(&sents[start_s], &sents[start_s + count],
 			count * sizeof(xfs_dir2_leaf_entry_t));
-		xfs_dir3_leaf_log_ents(tp, args->dp, bp_s,
-				       start_s, start_s + count - 1);
+		xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);
 	}
 
 	/*
@@ -1032,8 +1035,8 @@
 	/* log the changes made when moving the entries */
 	dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
 	dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
-	xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp);
-	xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp);
+	xfs_dir3_leaf_log_header(args, blk1->bp);
+	xfs_dir3_leaf_log_header(args, blk2->bp);
 
 	xfs_dir3_leaf_check(dp, blk1->bp);
 	xfs_dir3_leaf_check(dp, blk2->bp);
@@ -1076,7 +1079,6 @@
 	struct xfs_buf		*fbp,
 	int			longest)
 {
-	struct xfs_trans	*tp = args->trans;
 	int			logfree = 0;
 	__be16			*bests;
 	struct xfs_dir3_icfree_hdr freehdr;
@@ -1090,7 +1092,7 @@
 		 * value.
 		 */
 		bests[findex] = cpu_to_be16(longest);
-		xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+		xfs_dir2_free_log_bests(args, fbp, findex, findex);
 		return 0;
 	}
 
@@ -1118,7 +1120,7 @@
 	}
 
 	dp->d_ops->free_hdr_to_disk(free, &freehdr);
-	xfs_dir2_free_log_header(tp, dp, fbp);
+	xfs_dir2_free_log_header(args, fbp);
 
 	/*
 	 * If there are no useful entries left in the block, get rid of the
@@ -1142,7 +1144,7 @@
 
 	/* Log the free entry that changed, unless we got rid of it.  */
 	if (logfree)
-		xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+		xfs_dir2_free_log_bests(args, fbp, findex, findex);
 	return 0;
 }
 
@@ -1193,9 +1195,9 @@
 	/*
 	 * Extract the data block and offset from the entry.
 	 */
-	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+	db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
 	ASSERT(dblk->blkno == db);
-	off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
+	off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));
 	ASSERT(dblk->index == off);
 
 	/*
@@ -1204,10 +1206,10 @@
 	 */
 	leafhdr.stale++;
 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-	xfs_dir3_leaf_log_header(tp, dp, bp);
+	xfs_dir3_leaf_log_header(args, bp);
 
 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-	xfs_dir3_leaf_log_ents(tp, dp, bp, index, index);
+	xfs_dir3_leaf_log_ents(args, bp, index, index);
 
 	/*
 	 * Make the data entry free.  Keep track of the longest freespace
@@ -1219,7 +1221,7 @@
 	bf = dp->d_ops->data_bestfree_p(hdr);
 	longest = be16_to_cpu(bf[0].length);
 	needlog = needscan = 0;
-	xfs_dir2_data_make_free(tp, dp, dbp, off,
+	xfs_dir2_data_make_free(args, dbp, off,
 		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Rescan the data block freespaces for bestfree.
@@ -1228,7 +1230,7 @@
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
+		xfs_dir2_data_log_header(args, dbp);
 	xfs_dir3_data_check(dp, dbp);
 	/*
 	 * If the longest data block freespace changes, need to update
@@ -1245,8 +1247,9 @@
 		 * Convert the data block number to a free block,
 		 * read in the free block.
 		 */
-		fdb = dp->d_ops->db_to_fdb(mp, db);
-		error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb),
+		fdb = dp->d_ops->db_to_fdb(args->geo, db);
+		error = xfs_dir2_free_read(tp, dp,
+					   xfs_dir2_db_to_da(args->geo, fdb),
 					   &fbp);
 		if (error)
 			return error;
@@ -1255,20 +1258,21 @@
 	{
 		struct xfs_dir3_icfree_hdr freehdr;
 		dp->d_ops->free_hdr_from_disk(&freehdr, free);
-		ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) *
-					  (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
+		ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) *
+			(fdb - xfs_dir2_byte_to_db(args->geo,
+						   XFS_DIR2_FREE_OFFSET)));
 	}
 #endif
 		/*
 		 * Calculate which entry we need to fix.
 		 */
-		findex = dp->d_ops->db_to_fdindex(mp, db);
+		findex = dp->d_ops->db_to_fdindex(args->geo, db);
 		longest = be16_to_cpu(bf[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
 		 * (usually).
 		 */
-		if (longest == mp->m_dirblksize -
+		if (longest == args->geo->blksize -
 			       dp->d_ops->data_entry_offset) {
 			/*
 			 * Try to punch out the data block.
@@ -1303,7 +1307,7 @@
 	 */
 	*rval = (dp->d_ops->leaf_hdr_size +
 		 (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
-		mp->m_dir_magicpct;
+		args->geo->magicpct;
 	return 0;
 }
 
@@ -1336,7 +1340,7 @@
 	/*
 	 * Initialize the new leaf block.
 	 */
-	error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno),
+	error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno),
 				      &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
 	if (error)
 		return error;
@@ -1410,7 +1414,7 @@
 
 	count = leafhdr.count - leafhdr.stale;
 	bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
-	if (bytes > (state->blocksize >> 1)) {
+	if (bytes > (state->args->geo->blksize >> 1)) {
 		/*
 		 * Blk over 50%, don't try to join.
 		 */
@@ -1463,7 +1467,8 @@
 		 * Count bytes in the two blocks combined.
 		 */
 		count = leafhdr.count - leafhdr.stale;
-		bytes = state->blocksize - (state->blocksize >> 2);
+		bytes = state->args->geo->blksize -
+			(state->args->geo->blksize >> 2);
 
 		leaf = bp->b_addr;
 		dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
@@ -1560,8 +1565,8 @@
 	/* log the changes made when moving the entries */
 	dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
 	dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
-	xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp);
-	xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp);
+	xfs_dir3_leaf_log_header(args, save_blk->bp);
+	xfs_dir3_leaf_log_header(args, drop_blk->bp);
 
 	xfs_dir3_leaf_check(dp, save_blk->bp);
 	xfs_dir3_leaf_check(dp, drop_blk->bp);
@@ -1587,8 +1592,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_dirblksize;
-	state->node_ents = state->mp->m_dir_node_ents;
 	/*
 	 * Look up the name.  We're not supposed to find it, but
 	 * this gives us the insertion point.
@@ -1727,9 +1730,9 @@
 	if (dbno == -1) {
 		xfs_fileoff_t	fo;		/* freespace block number */
 
-		if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
+		if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
 			return error;
-		lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
+		lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
 		fbno = ifbno;
 	}
 	/*
@@ -1747,7 +1750,8 @@
 			 * us a freespace block to start with.
 			 */
 			if (++fbno == 0)
-				fbno = XFS_DIR2_FREE_FIRSTDB(mp);
+				fbno = xfs_dir2_byte_to_db(args->geo,
+							XFS_DIR2_FREE_OFFSET);
 			/*
 			 * If it's ifbno we already looked at it.
 			 */
@@ -1765,8 +1769,8 @@
 			 * to avoid it.
 			 */
 			error = xfs_dir2_free_try_read(tp, dp,
-						xfs_dir2_db_to_da(mp, fbno),
-						&fbp);
+					xfs_dir2_db_to_da(args->geo, fbno),
+					&fbp);
 			if (error)
 				return error;
 			if (!fbp)
@@ -1834,10 +1838,10 @@
 		 * Get the freespace block corresponding to the data block
 		 * that was just allocated.
 		 */
-		fbno = dp->d_ops->db_to_fdb(mp, dbno);
+		fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
 		error = xfs_dir2_free_try_read(tp, dp,
-					       xfs_dir2_db_to_da(mp, fbno),
-					       &fbp);
+				       xfs_dir2_db_to_da(args->geo, fbno),
+				       &fbp);
 		if (error)
 			return error;
 
@@ -1851,12 +1855,13 @@
 			if (error)
 				return error;
 
-			if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) {
+			if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
 				xfs_alert(mp,
 			"%s: dir ino %llu needed freesp block %lld for\n"
 			"  data block %lld, got %lld ifbno %llu lastfbno %d",
 					__func__, (unsigned long long)dp->i_ino,
-					(long long)dp->d_ops->db_to_fdb(mp, dbno),
+					(long long)dp->d_ops->db_to_fdb(
+								args->geo, dbno),
 					(long long)dbno, (long long)fbno,
 					(unsigned long long)ifbno, lastfbno);
 				if (fblk) {
@@ -1877,7 +1882,7 @@
 			/*
 			 * Get a buffer for the new block.
 			 */
-			error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp);
+			error = xfs_dir3_free_get_buf(args, fbno, &fbp);
 			if (error)
 				return error;
 			free = fbp->b_addr;
@@ -1887,8 +1892,10 @@
 			/*
 			 * Remember the first slot as our empty slot.
 			 */
-			freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
-					dp->d_ops->free_max_bests(mp);
+			freehdr.firstdb =
+				(fbno - xfs_dir2_byte_to_db(args->geo,
+							XFS_DIR2_FREE_OFFSET)) *
+					dp->d_ops->free_max_bests(args->geo);
 		} else {
 			free = fbp->b_addr;
 			bests = dp->d_ops->free_bests_p(free);
@@ -1898,13 +1905,13 @@
 		/*
 		 * Set the freespace block index from the data block number.
 		 */
-		findex = dp->d_ops->db_to_fdindex(mp, dbno);
+		findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
 		/*
 		 * If it's after the end of the current entries in the
 		 * freespace block, extend that table.
 		 */
 		if (findex >= freehdr.nvalid) {
-			ASSERT(findex < dp->d_ops->free_max_bests(mp));
+			ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
 			freehdr.nvalid = findex + 1;
 			/*
 			 * Tag new entry so nused will go up.
@@ -1918,7 +1925,7 @@
 		if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
 			freehdr.nused++;
 			dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
-			xfs_dir2_free_log_header(tp, dp, fbp);
+			xfs_dir2_free_log_header(args, fbp);
 		}
 		/*
 		 * Update the real value in the table.
@@ -1943,7 +1950,8 @@
 		/*
 		 * Read the data block in.
 		 */
-		error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
+		error = xfs_dir3_data_read(tp, dp,
+					   xfs_dir2_db_to_da(args->geo, dbno),
 					   -1, &dbp);
 		if (error)
 			return error;
@@ -1961,7 +1969,7 @@
 	/*
 	 * Mark the first part of the unused space, inuse for us.
 	 */
-	xfs_dir2_data_use_free(tp, dp, dbp, dup,
+	xfs_dir2_data_use_free(args, dbp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
 		&needlog, &needscan);
 	/*
@@ -1974,7 +1982,7 @@
 	dp->d_ops->data_put_ftype(dep, args->filetype);
 	tagp = dp->d_ops->data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
-	xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+	xfs_dir2_data_log_entry(args, dbp, dep);
 	/*
 	 * Rescan the block for bestfree if needed.
 	 */
@@ -1984,7 +1992,7 @@
 	 * Log the data block header if needed.
 	 */
 	if (needlog)
-		xfs_dir2_data_log_header(tp, dp, dbp);
+		xfs_dir2_data_log_header(args, dbp);
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
@@ -1997,7 +2005,7 @@
 	 * Log the freespace entry if needed.
 	 */
 	if (logfree)
-		xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+		xfs_dir2_free_log_bests(args, fbp, findex, findex);
 	/*
 	 * Return the data block and offset in args, then drop the data block.
 	 */
@@ -2028,8 +2036,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_dirblksize;
-	state->node_ents = state->mp->m_dir_node_ents;
 	/*
 	 * Fill in the path to the entry in the cursor.
 	 */
@@ -2083,8 +2089,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_dirblksize;
-	state->node_ents = state->mp->m_dir_node_ents;
 
 	/* Look up the entry we're deleting, set up the cursor. */
 	error = xfs_da3_node_lookup_int(state, &rval);
@@ -2153,8 +2157,6 @@
 	state = xfs_da_state_alloc();
 	state->args = args;
 	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_dirblksize;
-	state->node_ents = state->mp->m_dir_node_ents;
 	inum = args->inumber;
 	/*
 	 * Lookup the entry to change in the btree.
@@ -2186,15 +2188,15 @@
 		       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)hdr +
-		       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
+		       xfs_dir2_dataptr_to_off(args->geo,
+					       be32_to_cpu(lep->address)));
 		ASSERT(inum != be64_to_cpu(dep->inumber));
 		/*
 		 * Fill in the new inode number and log the entry.
 		 */
 		dep->inumber = cpu_to_be64(inum);
 		args->dp->d_ops->data_put_ftype(dep, args->filetype);
-		xfs_dir2_data_log_entry(args->trans, args->dp,
-					state->extrablk.bp, dep);
+		xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
 		rval = 0;
 	}
 	/*
@@ -2262,9 +2264,9 @@
 	/*
 	 * Blow the block away.
 	 */
-	if ((error =
-	    xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
-		    bp))) {
+	error = xfs_dir2_shrink_inode(args,
+			xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp);
+	if (error) {
 		/*
 		 * Can't fail with ENOSPC since that only happens with no
 		 * space reservation, when breaking up an extent into two

diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 8b9d228..27ce079 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h

@@ -20,6 +20,140 @@
 
 struct dir_context;
 
+/*
+ * Directory offset/block conversion functions.
+ *
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
+{
+	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
+{
+	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_db_t)(by >> geo->blklog);
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+	return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+	return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+			xfs_dir2_data_aoff_t o)
+{
+	return ((xfs_dir2_off_t)db << geo->blklog) + o;
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+	return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+	return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+			   xfs_dir2_data_aoff_t o)
+{
+	return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+	return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+	return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0);
+}
+
+/*
+ * Directory tail pointer accessor functions. Based on block geometry.
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr)
+{
+	return ((struct xfs_dir2_block_tail *)
+		((char *)hdr + geo->blksize)) - 1;
+}
+
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
+{
+	return (struct xfs_dir2_leaf_tail *)
+		((char *)lp + geo->blksize -
+		  sizeof(struct xfs_dir2_leaf_tail));
+}
+
 /* xfs_dir2.c */
 extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
@@ -54,8 +188,8 @@
 extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
-extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
-		xfs_dablk_t bno, xfs_daddr_t mapped_bno);
+extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
+		xfs_daddr_t mapped_bno);
 
 extern struct xfs_dir2_data_free *
 xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
@@ -77,9 +211,9 @@
 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
 extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
 		struct xfs_buf **bpp, __uint16_t magic);
-extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
 		struct xfs_buf *bp, int first, int last);
-extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
 		struct xfs_buf *bp);
 extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);

diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index aead369..48e99af 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c

@@ -76,26 +76,25 @@
 
 STATIC int
 xfs_dir2_sf_getdents(
-	xfs_inode_t		*dp,		/* incore directory inode */
+	struct xfs_da_args	*args,
 	struct dir_context	*ctx)
 {
 	int			i;		/* shortform entry number */
-	xfs_mount_t		*mp;		/* filesystem mount point */
+	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
 	xfs_dir2_dataptr_t	off;		/* current entry's offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
 	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
 	xfs_dir2_dataptr_t	dot_offset;
 	xfs_dir2_dataptr_t	dotdot_offset;
 	xfs_ino_t		ino;
-
-	mp = dp->i_mount;
+	struct xfs_da_geometry	*geo = args->geo;
 
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
 	/*
 	 * Give up if the directory is way too short.
 	 */
 	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
 		return XFS_ERROR(EIO);
 	}
 
@@ -109,18 +108,18 @@
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
 		return 0;
 
 	/*
 	 * Precalculate offsets for . and .. as we will always need them.
 	 *
 	 * XXX(hch): the second argument is sometimes 0 and sometimes
-	 * mp->m_dirdatablk.
+	 * geo->datablk
 	 */
-	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+	dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 						dp->d_ops->data_dot_offset);
-	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+	dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 						dp->d_ops->data_dotdot_offset);
 
 	/*
@@ -149,7 +148,7 @@
 	for (i = 0; i < sfp->count; i++) {
 		__uint8_t filetype;
 
-		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+		off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 				xfs_dir2_sf_get_offset(sfep));
 
 		if (ctx->pos > off) {
@@ -161,13 +160,13 @@
 		filetype = dp->d_ops->sf_get_ftype(sfep);
 		ctx->pos = off & 0x7fffffff;
 		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
-			    xfs_dir3_get_dtype(mp, filetype)))
+			    xfs_dir3_get_dtype(dp->i_mount, filetype)))
 			return 0;
 		sfep = dp->d_ops->sf_nextentry(sfp, sfep);
 	}
 
-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
-			0x7fffffff;
+	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
+								0x7fffffff;
 	return 0;
 }
 
@@ -176,9 +175,10 @@
  */
 STATIC int
 xfs_dir2_block_getdents(
-	xfs_inode_t		*dp,		/* incore inode */
+	struct xfs_da_args	*args,
 	struct dir_context	*ctx)
 {
+	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
 	struct xfs_buf		*bp;		/* buffer for block */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -186,16 +186,15 @@
 	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
 	char			*endptr;	/* end of the data entries */
 	int			error;		/* error return value */
-	xfs_mount_t		*mp;		/* filesystem mount point */
 	char			*ptr;		/* current data entry */
 	int			wantoff;	/* starting block offset */
 	xfs_off_t		cook;
+	struct xfs_da_geometry	*geo = args->geo;
 
-	mp = dp->i_mount;
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
 		return 0;
 
 	error = xfs_dir3_block_read(NULL, dp, &bp);
@@ -206,13 +205,13 @@
 	 * Extract the byte offset we start at from the seek pointer.
 	 * We'll skip entries before this.
 	 */
-	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
+	wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos);
 	hdr = bp->b_addr;
 	xfs_dir3_data_check(dp, bp);
 	/*
 	 * Set up values for the loop.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(geo, hdr);
 	ptr = (char *)dp->d_ops->data_entry_p(hdr);
 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 
@@ -244,7 +243,7 @@
 		if ((char *)dep - (char *)hdr < wantoff)
 			continue;
 
-		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+		cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 					    (char *)dep - (char *)hdr);
 
 		ctx->pos = cook & 0x7fffffff;
@@ -254,7 +253,7 @@
 		 */
 		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
 			    be64_to_cpu(dep->inumber),
-			    xfs_dir3_get_dtype(mp, filetype))) {
+			    xfs_dir3_get_dtype(dp->i_mount, filetype))) {
 			xfs_trans_brelse(NULL, bp);
 			return 0;
 		}
@@ -264,8 +263,8 @@
 	 * Reached the end of the block.
 	 * Set the offset to a non-existent block 1 and return.
 	 */
-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
-			0x7fffffff;
+	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
+								0x7fffffff;
 	xfs_trans_brelse(NULL, bp);
 	return 0;
 }
@@ -286,13 +285,13 @@
 
 STATIC int
 xfs_dir2_leaf_readbuf(
-	struct xfs_inode	*dp,
+	struct xfs_da_args	*args,
 	size_t			bufsize,
 	struct xfs_dir2_leaf_map_info *mip,
 	xfs_dir2_off_t		*curoff,
 	struct xfs_buf		**bpp)
 {
-	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_inode	*dp = args->dp;
 	struct xfs_buf		*bp = *bpp;
 	struct xfs_bmbt_irec	*map = mip->map;
 	struct blk_plug		plug;
@@ -300,6 +299,7 @@
 	int			length;
 	int			i;
 	int			j;
+	struct xfs_da_geometry	*geo = args->geo;
 
 	/*
 	 * If we have a buffer, we need to release it and
@@ -309,12 +309,12 @@
 	if (bp) {
 		xfs_trans_brelse(NULL, bp);
 		bp = NULL;
-		mip->map_blocks -= mp->m_dirblkfsbs;
+		mip->map_blocks -= geo->fsbcount;
 		/*
 		 * Loop to get rid of the extents for the
 		 * directory block.
 		 */
-		for (i = mp->m_dirblkfsbs; i > 0; ) {
+		for (i = geo->fsbcount; i > 0; ) {
 			j = min_t(int, map->br_blockcount, i);
 			map->br_blockcount -= j;
 			map->br_startblock += j;
@@ -333,8 +333,7 @@
 	/*
 	 * Recalculate the readahead blocks wanted.
 	 */
-	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
-			       mp->m_sb.sb_blocksize) - 1;
+	mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
 	ASSERT(mip->ra_want >= 0);
 
 	/*
@@ -342,14 +341,14 @@
 	 * run out of data blocks, get some more mappings.
 	 */
 	if (1 + mip->ra_want > mip->map_blocks &&
-	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
+	    mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
 		/*
 		 * Get more bmaps, fill in after the ones
 		 * we already have in the table.
 		 */
 		mip->nmap = mip->map_size - mip->map_valid;
 		error = xfs_bmapi_read(dp, mip->map_off,
-				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
+				xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
 								mip->map_off,
 				&map[mip->map_valid], &mip->nmap, 0);
 
@@ -370,7 +369,7 @@
 			i = mip->map_valid + mip->nmap - 1;
 			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
 		} else
-			mip->map_off = xfs_dir2_byte_to_da(mp,
+			mip->map_off = xfs_dir2_byte_to_da(geo,
 							XFS_DIR2_LEAF_OFFSET);
 
 		/*
@@ -396,18 +395,18 @@
 	 * No valid mappings, so no more data blocks.
 	 */
 	if (!mip->map_valid) {
-		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
+		*curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
 		goto out;
 	}
 
 	/*
 	 * Read the directory block starting at the first mapping.
 	 */
-	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
+	mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
 	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
-			map->br_blockcount >= mp->m_dirblkfsbs ?
-			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
-
+			map->br_blockcount >= geo->fsbcount ?
+			    XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
+			    -1, &bp);
 	/*
 	 * Should just skip over the data block instead of giving up.
 	 */
@@ -419,7 +418,7 @@
 	 * was previously ra.
 	 */
 	if (mip->ra_current)
-		mip->ra_current -= mp->m_dirblkfsbs;
+		mip->ra_current -= geo->fsbcount;
 
 	/*
 	 * Do we need more readahead?
@@ -427,16 +426,16 @@
 	blk_start_plug(&plug);
 	for (mip->ra_index = mip->ra_offset = i = 0;
 	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
-	     i += mp->m_dirblkfsbs) {
+	     i += geo->fsbcount) {
 		ASSERT(mip->ra_index < mip->map_valid);
 		/*
 		 * Read-ahead a contiguous directory block.
 		 */
 		if (i > mip->ra_current &&
-		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
-			xfs_dir3_data_readahead(NULL, dp,
+		    map[mip->ra_index].br_blockcount >= geo->fsbcount) {
+			xfs_dir3_data_readahead(dp,
 				map[mip->ra_index].br_startoff + mip->ra_offset,
-				XFS_FSB_TO_DADDR(mp,
+				XFS_FSB_TO_DADDR(dp->i_mount,
 					map[mip->ra_index].br_startblock +
 							mip->ra_offset));
 			mip->ra_current = i;
@@ -447,7 +446,7 @@
 		 * use our mapping, but this is a very rare case.
 		 */
 		else if (i > mip->ra_current) {
-			xfs_dir3_data_readahead(NULL, dp,
+			xfs_dir3_data_readahead(dp,
 					map[mip->ra_index].br_startoff +
 							mip->ra_offset, -1);
 			mip->ra_current = i;
@@ -456,15 +455,14 @@
 		/*
 		 * Advance offset through the mapping table.
 		 */
-		for (j = 0; j < mp->m_dirblkfsbs; j++) {
+		for (j = 0; j < geo->fsbcount; j += length ) {
 			/*
 			 * The rest of this extent but not more than a dir
 			 * block.
 			 */
-			length = min_t(int, mp->m_dirblkfsbs,
+			length = min_t(int, geo->fsbcount,
 					map[mip->ra_index].br_blockcount -
 							mip->ra_offset);
-			j += length;
 			mip->ra_offset += length;
 
 			/*
@@ -489,22 +487,23 @@
  */
 STATIC int
 xfs_dir2_leaf_getdents(
-	xfs_inode_t		*dp,		/* incore directory inode */
+	struct xfs_da_args	*args,
 	struct dir_context	*ctx,
 	size_t			bufsize)
 {
+	struct xfs_inode	*dp = args->dp;
 	struct xfs_buf		*bp = NULL;	/* data block buffer */
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 	int			error = 0;	/* error return value */
 	int			length;		/* temporary length value */
-	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			byteoff;	/* offset in current block */
 	xfs_dir2_off_t		curoff;		/* current overall offset */
 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
 	char			*ptr = NULL;	/* pointer to current data */
 	struct xfs_dir2_leaf_map_info *map_info;
+	struct xfs_da_geometry	*geo = args->geo;
 
 	/*
 	 * If the offset is at or past the largest allowed value,
@@ -513,15 +512,12 @@
 	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
 		return 0;
 
-	mp = dp->i_mount;
-
 	/*
 	 * Set up to bmap a number of blocks based on the caller's
 	 * buffer size, the directory block size, and the filesystem
 	 * block size.
 	 */
-	length = howmany(bufsize + mp->m_dirblksize,
-				     mp->m_sb.sb_blocksize);
+	length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
 	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
 				(length * sizeof(struct xfs_bmbt_irec)),
 			       KM_SLEEP | KM_NOFS);
@@ -531,14 +527,14 @@
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
-	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
+	curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
 
 	/*
 	 * Force this conversion through db so we truncate the offset
 	 * down to get the start of the data block.
 	 */
-	map_info->map_off = xfs_dir2_db_to_da(mp,
-					      xfs_dir2_byte_to_db(mp, curoff));
+	map_info->map_off = xfs_dir2_db_to_da(geo,
+					      xfs_dir2_byte_to_db(geo, curoff));
 
 	/*
 	 * Loop over directory entries until we reach the end offset.
@@ -551,9 +547,9 @@
 		 * If we have no buffer, or we're off the end of the
 		 * current buffer, need to get another one.
 		 */
-		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
+		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
 
-			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
+			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
 						      &curoff, &bp);
 			if (error || !map_info->map_valid)
 				break;
@@ -561,7 +557,8 @@
 			/*
 			 * Having done a read, we need to set a new offset.
 			 */
-			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
+			newoff = xfs_dir2_db_off_to_byte(geo,
+							 map_info->curdb, 0);
 			/*
 			 * Start of the current block.
 			 */
@@ -571,7 +568,7 @@
 			 * Make sure we're in the right block.
 			 */
 			else if (curoff > newoff)
-				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
+				ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
 				       map_info->curdb);
 			hdr = bp->b_addr;
 			xfs_dir3_data_check(dp, bp);
@@ -579,7 +576,7 @@
 			 * Find our position in the block.
 			 */
 			ptr = (char *)dp->d_ops->data_entry_p(hdr);
-			byteoff = xfs_dir2_byte_to_off(mp, curoff);
+			byteoff = xfs_dir2_byte_to_off(geo, curoff);
 			/*
 			 * Skip past the header.
 			 */
@@ -608,10 +605,10 @@
 				 * Now set our real offset.
 				 */
 				curoff =
-					xfs_dir2_db_off_to_byte(mp,
-					    xfs_dir2_byte_to_db(mp, curoff),
+					xfs_dir2_db_off_to_byte(geo,
+					    xfs_dir2_byte_to_db(geo, curoff),
 					    (char *)ptr - (char *)hdr);
-				if (ptr >= (char *)hdr + mp->m_dirblksize) {
+				if (ptr >= (char *)hdr + geo->blksize) {
 					continue;
 				}
 			}
@@ -635,10 +632,10 @@
 		length = dp->d_ops->data_entsize(dep->namelen);
 		filetype = dp->d_ops->data_get_ftype(dep);
 
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
 			    be64_to_cpu(dep->inumber),
-			    xfs_dir3_get_dtype(mp, filetype)))
+			    xfs_dir3_get_dtype(dp->i_mount, filetype)))
 			break;
 
 		/*
@@ -653,10 +650,10 @@
 	/*
 	 * All done.  Set output offset value to current offset.
 	 */
-	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
+	if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR))
 		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
 	else
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 	kmem_free(map_info);
 	if (bp)
 		xfs_trans_brelse(NULL, bp);
@@ -668,13 +665,14 @@
  */
 int
 xfs_readdir(
-	xfs_inode_t	*dp,
-	struct dir_context *ctx,
-	size_t		bufsize)
+	struct xfs_inode	*dp,
+	struct dir_context	*ctx,
+	size_t			bufsize)
 {
-	int		rval;		/* return value */
-	int		v;		/* type-checking value */
-	uint		lock_mode;
+	struct xfs_da_args	args = { NULL };
+	int			rval;
+	int			v;
+	uint			lock_mode;
 
 	trace_xfs_readdir(dp);
 
@@ -684,15 +682,18 @@
 	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	XFS_STATS_INC(xs_dir_getdents);
 
+	args.dp = dp;
+	args.geo = dp->i_mount->m_dir_geo;
+
 	lock_mode = xfs_ilock_data_map_shared(dp);
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-		rval = xfs_dir2_sf_getdents(dp, ctx);
-	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
+		rval = xfs_dir2_sf_getdents(&args, ctx);
+	else if ((rval = xfs_dir2_isblock(&args, &v)))
 		;
 	else if (v)
-		rval = xfs_dir2_block_getdents(dp, ctx);
+		rval = xfs_dir2_block_getdents(&args, ctx);
 	else
-		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
+		rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
 	xfs_iunlock(dp, lock_mode);
 
 	return rval;

diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 3725fb1..53c3be6 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c

@@ -82,8 +82,10 @@
 	xfs_ino_t		parent = 0;	/* parent inode number */
 	int			size=0;		/* total computed size */
 	int			has_ftype;
+	struct xfs_da_geometry	*geo;
 
 	mp = dp->i_mount;
+	geo = mp->m_dir_geo;
 
 	/*
 	 * if there is a filetype field, add the extra byte to the namelen
@@ -92,7 +94,7 @@
 	has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
 
 	count = i8count = namelen = 0;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(geo, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
 
 	/*
@@ -104,8 +106,8 @@
 		/*
 		 * Calculate the pointer to the entry at hand.
 		 */
-		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+		dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+				xfs_dir2_dataptr_to_off(geo, addr));
 		/*
 		 * Detect . and .., so we can special-case them.
 		 * . is not included in sf directories.
@@ -195,7 +197,7 @@
 	/*
 	 * Set up to loop over the block's entries.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(args->geo, hdr);
 	ptr = (char *)dp->d_ops->data_entry_p(hdr);
 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	sfep = xfs_dir2_sf_firstentry(sfp);
@@ -247,7 +249,7 @@
 
 	/* now we are done with the block, we can shrink the inode */
 	logflags = XFS_ILOG_CORE;
-	error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp);
+	error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
 	if (error) {
 		ASSERT(error != ENOSPC);
 		goto out;
@@ -285,14 +287,12 @@
 xfs_dir2_sf_addname(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	int			add_entsize;	/* size of the new entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
 	int			incr_isize;	/* total change in size */
 	int			new_isize;	/* di_size after adding name */
 	int			objchange;	/* changing to 8-byte inodes */
 	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
-	int			old_isize;	/* di_size before adding name */
 	int			pick;		/* which algorithm to use */
 	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
 	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
@@ -316,8 +316,7 @@
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
-	incr_isize = add_entsize;
+	incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
 	objchange = 0;
 #if XFS_BIG_INUMS
 	/*
@@ -325,11 +324,8 @@
 	 */
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
 		/*
-		 * Yes, adjust the entry size and the total size.
+		 * Yes, adjust the inode size.  old count + (parent + new)
 		 */
-		add_entsize +=
-			(uint)sizeof(xfs_dir2_ino8_t) -
-			(uint)sizeof(xfs_dir2_ino4_t);
 		incr_isize +=
 			(sfp->count + 2) *
 			((uint)sizeof(xfs_dir2_ino8_t) -
@@ -337,8 +333,7 @@
 		objchange = 1;
 	}
 #endif
-	old_isize = (int)dp->i_d.di_size;
-	new_isize = old_isize + incr_isize;
+	new_isize = (int)dp->i_d.di_size + incr_isize;
 	/*
 	 * Won't fit as shortform any more (due to size),
 	 * or the pick routine says it won't (due to offset values).
@@ -593,7 +588,7 @@
 	 * we'll go back, convert to block, then try the insert and convert
 	 * to leaf.
 	 */
-	if (used + (holefit ? 0 : size) > mp->m_dirblksize)
+	if (used + (holefit ? 0 : size) > args->geo->blksize)
 		return 0;
 	/*
 	 * If changing the inode number size, do it the hard way.
@@ -608,7 +603,7 @@
 	/*
 	 * If it won't fit at the end then do it the hard way (use the hole).
 	 */
-	if (used + size > mp->m_dirblksize)
+	if (used + size > args->geo->blksize)
 		return 2;
 	/*
 	 * Do it the easy way.
@@ -659,7 +654,7 @@
 	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
 	ASSERT(offset +
 	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize);
+	       (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize);
 }
 #endif	/* DEBUG */
 
@@ -1110,9 +1105,9 @@
 }
 
 /*
- * Convert from 4-byte inode numbers to 8-byte inode numbers.
- * The new 8-byte inode number is not there yet, we leave with the
- * count 1 but no corresponding entry.
+ * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
+ * The new entry w/ an 8-byte inode number is not there yet; we leave with
+ * i8count set to 1, but no corresponding 8-byte entry.
  */
 static void
 xfs_dir2_sf_toino8(
@@ -1145,7 +1140,7 @@
 	ASSERT(oldsfp->i8count == 0);
 	memcpy(buf, oldsfp, oldsize);
 	/*
-	 * Compute the new inode size.
+	 * Compute the new inode size (nb: entry count + 1 for parent)
 	 */
 	newsize =
 		oldsize +

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 868b19f..3ee0cd4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c

@@ -353,10 +353,10 @@
 			       dqp->q_blkno,
 			       mp->m_quotainfo->qi_dqchunklen,
 			       0);
-
-	error = xfs_buf_geterror(bp);
-	if (error)
+	if (!bp) {
+		error = ENOMEM;
 		goto error1;
+	}
 	bp->b_ops = &xfs_dquot_buf_ops;
 
 	/*
@@ -832,47 +832,6 @@
 	return (0);
 }
 
-
-STATIC void
-xfs_qm_dqput_final(
-	struct xfs_dquot	*dqp)
-{
-	struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;
-	struct xfs_dquot	*gdqp;
-	struct xfs_dquot	*pdqp;
-
-	trace_xfs_dqput_free(dqp);
-
-	if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
-		XFS_STATS_INC(xs_qm_dquot_unused);
-
-	/*
-	 * If we just added a udquot to the freelist, then we want to release
-	 * the gdquot/pdquot reference that it (probably) has. Otherwise it'll
-	 * keep the gdquot/pdquot from getting reclaimed.
-	 */
-	gdqp = dqp->q_gdquot;
-	if (gdqp) {
-		xfs_dqlock(gdqp);
-		dqp->q_gdquot = NULL;
-	}
-
-	pdqp = dqp->q_pdquot;
-	if (pdqp) {
-		xfs_dqlock(pdqp);
-		dqp->q_pdquot = NULL;
-	}
-	xfs_dqunlock(dqp);
-
-	/*
-	 * If we had a group/project quota hint, release it now.
-	 */
-	if (gdqp)
-		xfs_qm_dqput(gdqp);
-	if (pdqp)
-		xfs_qm_dqput(pdqp);
-}
-
 /*
  * Release a reference to the dquot (decrement ref-count) and unlock it.
  *
@@ -888,10 +847,14 @@
 
 	trace_xfs_dqput(dqp);
 
-	if (--dqp->q_nrefs > 0)
-		xfs_dqunlock(dqp);
-	else
-		xfs_qm_dqput_final(dqp);
+	if (--dqp->q_nrefs == 0) {
+		struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;
+		trace_xfs_dqput_free(dqp);
+
+		if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
+			XFS_STATS_INC(xs_qm_dquot_unused);
+	}
+	xfs_dqunlock(dqp);
 }
 
 /*

diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index d22ed00..68a68f7 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h

@@ -52,8 +52,6 @@
 	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
 	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
 
-	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */
-	struct xfs_dquot*q_pdquot;	/* project dquot, hint only */
 	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
 	xfs_dq_logitem_t q_logitem;	/* dquot log item */
 	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */

diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index 610da81..c2ac0c6 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c

@@ -35,7 +35,6 @@
 
 int
 xfs_calc_dquots_per_chunk(
-	struct xfs_mount	*mp,
 	unsigned int		nbblks)	/* basic block units */
 {
 	unsigned int	ndquots;
@@ -194,7 +193,7 @@
 	if (mp->m_quotainfo)
 		ndquots = mp->m_quotainfo->qi_dqperchunk;
 	else
-		ndquots = xfs_calc_dquots_per_chunk(mp,
+		ndquots = xfs_calc_dquots_per_chunk(
 					XFS_BB_TO_FSB(mp, bp->b_length));
 
 	for (i = 0; i < ndquots; i++, d++) {
@@ -225,7 +224,7 @@
 	if (mp->m_quotainfo)
 		ndquots = mp->m_quotainfo->qi_dqperchunk;
 	else
-		ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length);
+		ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
 
 	/*
 	 * On the first read of the buffer, verify that each dquot is valid.

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 830c1c9..1f66779 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c

@@ -229,34 +229,27 @@
 }
 
 STATIC ssize_t
-xfs_file_aio_read(
+xfs_file_read_iter(
 	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos)
+	struct iov_iter		*to)
 {
 	struct file		*file = iocb->ki_filp;
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
-	size_t			size = 0;
+	size_t			size = iov_iter_count(to);
 	ssize_t			ret = 0;
 	int			ioflags = 0;
 	xfs_fsize_t		n;
+	loff_t			pos = iocb->ki_pos;
 
 	XFS_STATS_INC(xs_read_calls);
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 	if (file->f_mode & FMODE_NOCMTIME)
 		ioflags |= IO_INVIS;
 
-	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
-	if (ret < 0)
-		return ret;
-
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		xfs_buftarg_t	*target =
 			XFS_IS_REALTIME_INODE(ip) ?
@@ -309,7 +302,7 @@
 
 	trace_xfs_file_read(ip, size, pos, ioflags);
 
-	ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
+	ret = generic_file_read_iter(iocb, to);
 	if (ret > 0)
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
@@ -350,47 +343,6 @@
 }
 
 /*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-	struct pipe_inode_info	*pipe,
-	struct file		*outfilp,
-	loff_t			*ppos,
-	size_t			count,
-	unsigned int		flags)
-{
-	struct inode		*inode = outfilp->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	int			ioflags = 0;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_write_calls);
-
-	if (outfilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_write_bytes, ret);
-
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return ret;
-}
-
-/*
  * This routine is called to handle zeroing any space in the last block of the
  * file that is beyond the EOF.  We do this since the size is being increased
  * without writing anything to that block and we don't want to read the
@@ -625,10 +577,7 @@
 STATIC ssize_t
 xfs_file_dio_aio_write(
 	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos,
-	size_t			ocount)
+	struct iov_iter		*from)
 {
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
@@ -636,9 +585,10 @@
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	size_t			count = ocount;
 	int			unaligned_io = 0;
 	int			iolock;
+	size_t			count = iov_iter_count(from);
+	loff_t			pos = iocb->ki_pos;
 	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
 					mp->m_rtdev_targp : mp->m_ddev_targp;
 
@@ -677,6 +627,7 @@
 	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
 	if (ret)
 		goto out;
+	iov_iter_truncate(from, count);
 
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -698,8 +649,7 @@
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_file_direct_write(iocb, iovp,
-			&nr_segs, pos, count, ocount);
+	ret = generic_file_direct_write(iocb, from, pos);
 
 out:
 	xfs_rw_iunlock(ip, iolock);
@@ -712,10 +662,7 @@
 STATIC ssize_t
 xfs_file_buffered_aio_write(
 	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos,
-	size_t			count)
+	struct iov_iter		*from)
 {
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
@@ -724,7 +671,8 @@
 	ssize_t			ret;
 	int			enospc = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	struct iov_iter		from;
+	loff_t			pos = iocb->ki_pos;
+	size_t			count = iov_iter_count(from);
 
 	xfs_rw_ilock(ip, iolock);
 
@@ -732,13 +680,13 @@
 	if (ret)
 		goto out;
 
-	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	iov_iter_truncate(from, count);
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
 
 write_retry:
 	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_perform_write(file, &from, pos);
+	ret = generic_perform_write(file, from, pos);
 	if (likely(ret >= 0))
 		iocb->ki_pos = pos + ret;
 	/*
@@ -759,40 +707,29 @@
 }
 
 STATIC ssize_t
-xfs_file_aio_write(
+xfs_file_write_iter(
 	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos)
+	struct iov_iter		*from)
 {
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	ssize_t			ret;
-	size_t			ocount = 0;
+	size_t			ocount = iov_iter_count(from);
 
 	XFS_STATS_INC(xs_write_calls);
 
-	BUG_ON(iocb->ki_pos != pos);
-
-	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-	if (ret)
-		return ret;
-
 	if (ocount == 0)
 		return 0;
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		ret = -EIO;
-		goto out;
-	}
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
 
 	if (unlikely(file->f_flags & O_DIRECT))
-		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
+		ret = xfs_file_dio_aio_write(iocb, from);
 	else
-		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-						  ocount);
+		ret = xfs_file_buffered_aio_write(iocb, from);
 
 	if (ret > 0) {
 		ssize_t err;
@@ -804,8 +741,6 @@
 		if (err < 0)
 			ret = err;
 	}
-
-out:
 	return ret;
 }
 
@@ -944,7 +879,7 @@
 	 */
 	mode = xfs_ilock_data_map_shared(ip);
 	if (ip->i_d.di_nextents > 0)
-		xfs_dir3_data_readahead(NULL, ip, 0, -1);
+		xfs_dir3_data_readahead(ip, 0, -1);
 	xfs_iunlock(ip, mode);
 	return 0;
 }
@@ -1461,12 +1396,12 @@
 
 const struct file_operations xfs_file_operations = {
 	.llseek		= xfs_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= xfs_file_aio_read,
-	.aio_write	= xfs_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= xfs_file_read_iter,
+	.write_iter	= xfs_file_write_iter,
 	.splice_read	= xfs_file_splice_read,
-	.splice_write	= xfs_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,

diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 12b6e77..8ec81be 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * Copyright (c) 2014 Christoph Hellwig.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -32,100 +33,20 @@
 #include "xfs_filestream.h"
 #include "xfs_trace.h"
 
-#ifdef XFS_FILESTREAMS_TRACE
+struct xfs_fstrm_item {
+	struct xfs_mru_cache_elem	mru;
+	struct xfs_inode		*ip;
+	xfs_agnumber_t			ag; /* AG in use for this directory */
+};
 
-ktrace_t *xfs_filestreams_trace_buf;
-
-STATIC void
-xfs_filestreams_trace(
-	xfs_mount_t	*mp,	/* mount point */
-	int		type,	/* type of trace */
-	const char	*func,	/* source function */
-	int		line,	/* source line number */
-	__psunsigned_t	arg0,
-	__psunsigned_t	arg1,
-	__psunsigned_t	arg2,
-	__psunsigned_t	arg3,
-	__psunsigned_t	arg4,
-	__psunsigned_t	arg5)
-{
-	ktrace_enter(xfs_filestreams_trace_buf,
-		(void *)(__psint_t)(type | (line << 16)),
-		(void *)func,
-		(void *)(__psunsigned_t)current_pid(),
-		(void *)mp,
-		(void *)(__psunsigned_t)arg0,
-		(void *)(__psunsigned_t)arg1,
-		(void *)(__psunsigned_t)arg2,
-		(void *)(__psunsigned_t)arg3,
-		(void *)(__psunsigned_t)arg4,
-		(void *)(__psunsigned_t)arg5,
-		NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-#define TRACE0(mp,t)			TRACE6(mp,t,0,0,0,0,0,0)
-#define TRACE1(mp,t,a0)			TRACE6(mp,t,a0,0,0,0,0,0)
-#define TRACE2(mp,t,a0,a1)		TRACE6(mp,t,a0,a1,0,0,0,0)
-#define TRACE3(mp,t,a0,a1,a2)		TRACE6(mp,t,a0,a1,a2,0,0,0)
-#define TRACE4(mp,t,a0,a1,a2,a3)	TRACE6(mp,t,a0,a1,a2,a3,0,0)
-#define TRACE5(mp,t,a0,a1,a2,a3,a4)	TRACE6(mp,t,a0,a1,a2,a3,a4,0)
-#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
-	xfs_filestreams_trace(mp, t, __func__, __LINE__, \
-				(__psunsigned_t)a0, (__psunsigned_t)a1, \
-				(__psunsigned_t)a2, (__psunsigned_t)a3, \
-				(__psunsigned_t)a4, (__psunsigned_t)a5)
-
-#define TRACE_AG_SCAN(mp, ag, ag2) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
-#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
-		TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
-			 cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
-		TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
-		TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
-		TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag) \
-		TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
-
-
-#else
-#define TRACE_AG_SCAN(mp, ag, ag2)
-#define TRACE_AG_PICK1(mp, max_ag, maxfree)
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag)
-#endif
-
-static kmem_zone_t *item_zone;
-
-/*
- * Structure for associating a file or a directory with an allocation group.
- * The parent directory pointer is only needed for files, but since there will
- * generally be vastly more files than directories in the cache, using the same
- * data structure simplifies the code with very little memory overhead.
- */
-typedef struct fstrm_item
-{
-	xfs_agnumber_t	ag;	/* AG currently in use for the file/directory. */
-	xfs_inode_t	*ip;	/* inode self-pointer. */
-	xfs_inode_t	*pip;	/* Parent directory inode pointer. */
-} fstrm_item_t;
+enum xfs_fstrm_alloc {
+	XFS_PICK_USERDATA = 1,
+	XFS_PICK_LOWSPACE = 2,
+};
 
 /*
  * Allocation group filestream associations are tracked with per-ag atomic
- * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
  * particular AG already has active filestreams associated with it. The mount
  * point's m_peraglock is used to protect these counters from per-ag array
  * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
@@ -160,7 +81,7 @@
  * the cache that reference per-ag array elements that have since been
  * reallocated.
  */
-static int
+int
 xfs_filestream_peek_ag(
 	xfs_mount_t	*mp,
 	xfs_agnumber_t	agno)
@@ -200,23 +121,40 @@
 	xfs_perag_put(pag);
 }
 
+static void
+xfs_fstrm_free_func(
+	struct xfs_mru_cache_elem *mru)
+{
+	struct xfs_fstrm_item	*item =
+		container_of(mru, struct xfs_fstrm_item, mru);
+
+	xfs_filestream_put_ag(item->ip->i_mount, item->ag);
+
+	trace_xfs_filestream_free(item->ip, item->ag);
+
+	kmem_free(item);
+}
+
 /*
  * Scan the AGs starting at startag looking for an AG that isn't in use and has
  * at least minlen blocks free.
  */
 static int
-_xfs_filestream_pick_ag(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	startag,
-	xfs_agnumber_t	*agp,
-	int		flags,
-	xfs_extlen_t	minlen)
+xfs_filestream_pick_ag(
+	struct xfs_inode	*ip,
+	xfs_agnumber_t		startag,
+	xfs_agnumber_t		*agp,
+	int			flags,
+	xfs_extlen_t		minlen)
 {
-	int		streams, max_streams;
-	int		err, trylock, nscan;
-	xfs_extlen_t	longest, free, minfree, maxfree = 0;
-	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER;
-	struct xfs_perag *pag;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_fstrm_item	*item;
+	struct xfs_perag	*pag;
+	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
+	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
+	int			err, trylock, nscan;
+
+	ASSERT(S_ISDIR(ip->i_d.di_mode));
 
 	/* 2% of an AG's blocks must be free for it to be chosen. */
 	minfree = mp->m_sb.sb_agblocks / 50;
@@ -228,8 +166,9 @@
 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
 
 	for (nscan = 0; 1; nscan++) {
+		trace_xfs_filestream_scan(ip, ag);
+
 		pag = xfs_perag_get(mp, ag);
-		TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
 
 		if (!pag->pagf_init) {
 			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
@@ -246,7 +185,6 @@
 		/* Keep track of the AG with the most free blocks. */
 		if (pag->pagf_freeblks > maxfree) {
 			maxfree = pag->pagf_freeblks;
-			max_streams = atomic_read(&pag->pagf_fstrms);
 			max_ag = ag;
 		}
 
@@ -269,7 +207,6 @@
 
 			/* Break out, retaining the reference on the AG. */
 			free = pag->pagf_freeblks;
-			streams = atomic_read(&pag->pagf_fstrms);
 			xfs_perag_put(pag);
 			*agp = ag;
 			break;
@@ -305,317 +242,98 @@
 		 */
 		if (max_ag != NULLAGNUMBER) {
 			xfs_filestream_get_ag(mp, max_ag);
-			TRACE_AG_PICK1(mp, max_ag, maxfree);
-			streams = max_streams;
 			free = maxfree;
 			*agp = max_ag;
 			break;
 		}
 
 		/* take AG 0 if none matched */
-		TRACE_AG_PICK1(mp, max_ag, maxfree);
+		trace_xfs_filestream_pick(ip, *agp, free, nscan);
 		*agp = 0;
 		return 0;
 	}
 
-	TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
+	trace_xfs_filestream_pick(ip, *agp, free, nscan);
 
-	return 0;
-}
-
-/*
- * Set the allocation group number for a file or a directory, updating inode
- * references and per-AG references as appropriate.
- */
-static int
-_xfs_filestream_update_ag(
-	xfs_inode_t	*ip,
-	xfs_inode_t	*pip,
-	xfs_agnumber_t	ag)
-{
-	int		err = 0;
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	old_ag;
-	xfs_inode_t	*old_pip;
-
-	/*
-	 * Either ip is a regular file and pip is a directory, or ip is a
-	 * directory and pip is NULL.
-	 */
-	ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
-	               S_ISDIR(pip->i_d.di_mode)) ||
-	              (S_ISDIR(ip->i_d.di_mode) && !pip)));
-
-	mp = ip->i_mount;
-	cache = mp->m_filestream;
-
-	item = xfs_mru_cache_lookup(cache, ip->i_ino);
-	if (item) {
-		ASSERT(item->ip == ip);
-		old_ag = item->ag;
-		item->ag = ag;
-		old_pip = item->pip;
-		item->pip = pip;
-		xfs_mru_cache_done(cache);
-
-		/*
-		 * If the AG has changed, drop the old ref and take a new one,
-		 * effectively transferring the reference from old to new AG.
-		 */
-		if (ag != old_ag) {
-			xfs_filestream_put_ag(mp, old_ag);
-			xfs_filestream_get_ag(mp, ag);
-		}
-
-		/*
-		 * If ip is a file and its pip has changed, drop the old ref and
-		 * take a new one.
-		 */
-		if (pip && pip != old_pip) {
-			IRELE(old_pip);
-			IHOLD(pip);
-		}
-
-		TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
-				ag, xfs_filestream_peek_ag(mp, ag));
+	if (*agp == NULLAGNUMBER)
 		return 0;
-	}
 
-	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+	err = ENOMEM;
+	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
 	if (!item)
-		return ENOMEM;
+		goto out_put_ag;
 
-	item->ag = ag;
+	item->ag = *agp;
 	item->ip = ip;
-	item->pip = pip;
 
-	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
 	if (err) {
-		kmem_zone_free(item_zone, item);
-		return err;
+		if (err == EEXIST)
+			err = 0;
+		goto out_free_item;
 	}
 
-	/* Take a reference on the AG. */
-	xfs_filestream_get_ag(mp, ag);
-
-	/*
-	 * Take a reference on the inode itself regardless of whether it's a
-	 * regular file or a directory.
-	 */
-	IHOLD(ip);
-
-	/*
-	 * In the case of a regular file, take a reference on the parent inode
-	 * as well to ensure it remains in-core.
-	 */
-	if (pip)
-		IHOLD(pip);
-
-	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
-			ag, xfs_filestream_peek_ag(mp, ag));
-
 	return 0;
-}
 
-/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
-STATIC void
-xfs_fstrm_free_func(
-	unsigned long	ino,
-	void		*data)
-{
-	fstrm_item_t	*item  = (fstrm_item_t *)data;
-	xfs_inode_t	*ip = item->ip;
-
-	ASSERT(ip->i_ino == ino);
-
-	xfs_iflags_clear(ip, XFS_IFILESTREAM);
-
-	/* Drop the reference taken on the AG when the item was added. */
-	xfs_filestream_put_ag(ip->i_mount, item->ag);
-
-	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
-		xfs_filestream_peek_ag(ip->i_mount, item->ag));
-
-	/*
-	 * _xfs_filestream_update_ag() always takes a reference on the inode
-	 * itself, whether it's a file or a directory.  Release it here.
-	 * This can result in the inode being freed and so we must
-	 * not hold any inode locks when freeing filesstreams objects
-	 * otherwise we can deadlock here.
-	 */
-	IRELE(ip);
-
-	/*
-	 * In the case of a regular file, _xfs_filestream_update_ag() also
-	 * takes a ref on the parent inode to keep it in-core.  Release that
-	 * too.
-	 */
-	if (item->pip)
-		IRELE(item->pip);
-
-	/* Finally, free the memory allocated for the item. */
-	kmem_zone_free(item_zone, item);
-}
-
-/*
- * xfs_filestream_init() is called at xfs initialisation time to set up the
- * memory zone that will be used for filestream data structure allocation.
- */
-int
-xfs_filestream_init(void)
-{
-	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
-	if (!item_zone)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/*
- * xfs_filestream_uninit() is called at xfs termination time to destroy the
- * memory zone that was used for filestream data structure allocation.
- */
-void
-xfs_filestream_uninit(void)
-{
-	kmem_zone_destroy(item_zone);
-}
-
-/*
- * xfs_filestream_mount() is called when a file system is mounted with the
- * filestream option.  It is responsible for allocating the data structures
- * needed to track the new file system's file streams.
- */
-int
-xfs_filestream_mount(
-	xfs_mount_t	*mp)
-{
-	int		err;
-	unsigned int	lifetime, grp_count;
-
-	/*
-	 * The filestream timer tunable is currently fixed within the range of
-	 * one second to four minutes, with five seconds being the default.  The
-	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
-	 * timer tunable to within about 10 percent.  This requires at least 10
-	 * groups.
-	 */
-	lifetime  = xfs_fstrm_centisecs * 10;
-	grp_count = 10;
-
-	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
-	                     xfs_fstrm_free_func);
-
+out_free_item:
+	kmem_free(item);
+out_put_ag:
+	xfs_filestream_put_ag(mp, *agp);
 	return err;
 }
 
-/*
- * xfs_filestream_unmount() is called when a file system that was mounted with
- * the filestream option is unmounted.  It drains the data structures created
- * to track the file system's file streams and frees all the memory that was
- * allocated.
- */
-void
-xfs_filestream_unmount(
-	xfs_mount_t	*mp)
+static struct xfs_inode *
+xfs_filestream_get_parent(
+	struct xfs_inode	*ip)
 {
-	xfs_mru_cache_destroy(mp->m_filestream);
+	struct inode		*inode = VFS_I(ip), *dir = NULL;
+	struct dentry		*dentry, *parent;
+
+	dentry = d_find_alias(inode);
+	if (!dentry)
+		goto out;
+
+	parent = dget_parent(dentry);
+	if (!parent)
+		goto out_dput;
+
+	dir = igrab(parent->d_inode);
+	dput(parent);
+
+out_dput:
+	dput(dentry);
+out:
+	return dir ? XFS_I(dir) : NULL;
 }
 
 /*
- * Return the AG of the filestream the file or directory belongs to, or
- * NULLAGNUMBER otherwise.
+ * Find the right allocation group for a file, either by finding an
+ * existing file stream or creating a new one.
+ *
+ * Returns NULLAGNUMBER in case of an error.
  */
 xfs_agnumber_t
 xfs_filestream_lookup_ag(
-	xfs_inode_t	*ip)
+	struct xfs_inode	*ip)
 {
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	ag;
-	int		ref;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_inode	*pip = NULL;
+	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
+	struct xfs_mru_cache_elem *mru;
 
-	if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
-		ASSERT(0);
-		return NULLAGNUMBER;
-	}
-
-	cache = ip->i_mount->m_filestream;
-	item = xfs_mru_cache_lookup(cache, ip->i_ino);
-	if (!item) {
-		TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
-		return NULLAGNUMBER;
-	}
-
-	ASSERT(ip == item->ip);
-	ag = item->ag;
-	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
-	xfs_mru_cache_done(cache);
-
-	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
-	return ag;
-}
-
-/*
- * xfs_filestream_associate() should only be called to associate a regular file
- * with its parent directory.  Calling it with a child directory isn't
- * appropriate because filestreams don't apply to entire directory hierarchies.
- * Creating a file in a child directory of an existing filestream directory
- * starts a new filestream with its own allocation group association.
- *
- * Returns < 0 on error, 0 if successful association occurred, > 0 if
- * we failed to get an association because of locking issues.
- */
-int
-xfs_filestream_associate(
-	xfs_inode_t	*pip,
-	xfs_inode_t	*ip)
-{
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	fstrm_item_t	*item;
-	xfs_agnumber_t	ag, rotorstep, startag;
-	int		err = 0;
-
-	ASSERT(S_ISDIR(pip->i_d.di_mode));
 	ASSERT(S_ISREG(ip->i_d.di_mode));
-	if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
-		return -EINVAL;
 
-	mp = pip->i_mount;
-	cache = mp->m_filestream;
+	pip = xfs_filestream_get_parent(ip);
+	if (!pip)
+		goto out;
 
-	/*
-	 * We have a problem, Houston.
-	 *
-	 * Taking the iolock here violates inode locking order - we already
-	 * hold the ilock. Hence if we block getting this lock we may never
-	 * wake. Unfortunately, that means if we can't get the lock, we're
-	 * screwed in terms of getting a stream association - we can't spin
-	 * waiting for the lock because someone else is waiting on the lock we
-	 * hold and we cannot drop that as we are in a transaction here.
-	 *
-	 * Lucky for us, this inversion is not a problem because it's a
-	 * directory inode that we are trying to lock here.
-	 *
-	 * So, if we can't get the iolock without sleeping then just give up
-	 */
-	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
-		return 1;
+	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
+	if (mru) {
+		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
+		xfs_mru_cache_done(mp->m_filestream);
 
-	/* If the parent directory is already in the cache, use its AG. */
-	item = xfs_mru_cache_lookup(cache, pip->i_ino);
-	if (item) {
-		ASSERT(item->ip == pip);
-		ag = item->ag;
-		xfs_mru_cache_done(cache);
-
-		TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
-		err = _xfs_filestream_update_ag(ip, pip, ag);
-
-		goto exit;
+		trace_xfs_filestream_lookup(ip, ag);
+		goto out;
 	}
 
 	/*
@@ -623,202 +341,94 @@
 	 * use the directory inode's AG.
 	 */
 	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-		rotorstep = xfs_rotorstep;
+		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
 		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
 		                 (mp->m_sb.sb_agcount * rotorstep);
 	} else
 		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
 
-	/* Pick a new AG for the parent inode starting at startag. */
-	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
-	if (err || ag == NULLAGNUMBER)
-		goto exit_did_pick;
-
-	/* Associate the parent inode with the AG. */
-	err = _xfs_filestream_update_ag(pip, NULL, ag);
-	if (err)
-		goto exit_did_pick;
-
-	/* Associate the file inode with the AG. */
-	err = _xfs_filestream_update_ag(ip, pip, ag);
-	if (err)
-		goto exit_did_pick;
-
-	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
-
-exit_did_pick:
-	/*
-	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
-	 * reference it took on it, since the file and directory will have taken
-	 * their own now if they were successfully cached.
-	 */
-	if (ag != NULLAGNUMBER)
-		xfs_filestream_put_ag(mp, ag);
-
-exit:
-	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
-	return -err;
+	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
+		ag = NULLAGNUMBER;
+out:
+	IRELE(pip);
+	return ag;
 }
 
 /*
- * Pick a new allocation group for the current file and its file stream.  This
- * function is called by xfs_bmap_filestreams() with the mount point's per-ag
- * lock held.
+ * Pick a new allocation group for the current file and its file stream.
+ *
+ * This is called when the allocator can't find a suitable extent in the
+ * current AG, and we have to move the stream into a new AG with more space.
  */
 int
 xfs_filestream_new_ag(
 	struct xfs_bmalloca	*ap,
 	xfs_agnumber_t		*agp)
 {
-	int		flags, err;
-	xfs_inode_t	*ip, *pip = NULL;
-	xfs_mount_t	*mp;
-	xfs_mru_cache_t	*cache;
-	xfs_extlen_t	minlen;
-	fstrm_item_t	*dir, *file;
-	xfs_agnumber_t	ag = NULLAGNUMBER;
+	struct xfs_inode	*ip = ap->ip, *pip;
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_extlen_t		minlen = ap->length;
+	xfs_agnumber_t		startag = 0;
+	int			flags, err = 0;
+	struct xfs_mru_cache_elem *mru;
 
-	ip = ap->ip;
-	mp = ip->i_mount;
-	cache = mp->m_filestream;
-	minlen = ap->length;
 	*agp = NULLAGNUMBER;
 
-	/*
-	 * Look for the file in the cache, removing it if it's found.  Doing
-	 * this allows it to be held across the dir lookup that follows.
-	 */
-	file = xfs_mru_cache_remove(cache, ip->i_ino);
-	if (file) {
-		ASSERT(ip == file->ip);
+	pip = xfs_filestream_get_parent(ip);
+	if (!pip)
+		goto exit;
 
-		/* Save the file's parent inode and old AG number for later. */
-		pip = file->pip;
-		ag = file->ag;
-
-		/* Look for the file's directory in the cache. */
-		dir = xfs_mru_cache_lookup(cache, pip->i_ino);
-		if (dir) {
-			ASSERT(pip == dir->ip);
-
-			/*
-			 * If the directory has already moved on to a new AG,
-			 * use that AG as the new AG for the file. Don't
-			 * forget to twiddle the AG refcounts to match the
-			 * movement.
-			 */
-			if (dir->ag != file->ag) {
-				xfs_filestream_put_ag(mp, file->ag);
-				xfs_filestream_get_ag(mp, dir->ag);
-				*agp = file->ag = dir->ag;
-			}
-
-			xfs_mru_cache_done(cache);
-		}
-
-		/*
-		 * Put the file back in the cache.  If this fails, the free
-		 * function needs to be called to tidy up in the same way as if
-		 * the item had simply expired from the cache.
-		 */
-		err = xfs_mru_cache_insert(cache, ip->i_ino, file);
-		if (err) {
-			xfs_fstrm_free_func(ip->i_ino, file);
-			return err;
-		}
-
-		/*
-		 * If the file's AG was moved to the directory's new AG, there's
-		 * nothing more to be done.
-		 */
-		if (*agp != NULLAGNUMBER) {
-			TRACE_MOVEAG(mp, ip, pip,
-					ag, xfs_filestream_peek_ag(mp, ag),
-					*agp, xfs_filestream_peek_ag(mp, *agp));
-			return 0;
-		}
+	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
+	if (mru) {
+		struct xfs_fstrm_item *item =
+			container_of(mru, struct xfs_fstrm_item, mru);
+		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
 	}
 
-	/*
-	 * If the file's parent directory is known, take its iolock in exclusive
-	 * mode to prevent two sibling files from racing each other to migrate
-	 * themselves and their parent to different AGs.
-	 *
-	 * Note that we lock the parent directory iolock inside the child
-	 * iolock here.  That's fine as we never hold both parent and child
-	 * iolock in any other place.  This is different from the ilock,
-	 * which requires locking of the child after the parent for namespace
-	 * operations.
-	 */
-	if (pip)
-		xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
-
-	/*
-	 * A new AG needs to be found for the file.  If the file's parent
-	 * directory is also known, it will be moved to the new AG as well to
-	 * ensure that files created inside it in future use the new AG.
-	 */
-	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
 	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
 	        (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
 
-	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
-	if (err || *agp == NULLAGNUMBER)
-		goto exit;
+	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
 
 	/*
-	 * If the file wasn't found in the file cache, then its parent directory
-	 * inode isn't known.  For this to have happened, the file must either
-	 * be pre-existing, or it was created long enough ago that its cache
-	 * entry has expired.  This isn't the sort of usage that the filestreams
-	 * allocator is trying to optimise, so there's no point trying to track
-	 * its new AG somehow in the filestream data structures.
+	 * Only free the item here so we skip over the old AG earlier.
 	 */
-	if (!pip) {
-		TRACE_ORPHAN(mp, ip, *agp);
-		goto exit;
-	}
+	if (mru)
+		xfs_fstrm_free_func(mru);
 
-	/* Associate the parent inode with the AG. */
-	err = _xfs_filestream_update_ag(pip, NULL, *agp);
-	if (err)
-		goto exit;
-
-	/* Associate the file inode with the AG. */
-	err = _xfs_filestream_update_ag(ip, pip, *agp);
-	if (err)
-		goto exit;
-
-	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
-			*agp, xfs_filestream_peek_ag(mp, *agp));
-
+	IRELE(pip);
 exit:
-	/*
-	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
-	 * reference it took on it, since the file and directory will have taken
-	 * their own now if they were successfully cached.
-	 */
-	if (*agp != NULLAGNUMBER)
-		xfs_filestream_put_ag(mp, *agp);
-	else
+	if (*agp == NULLAGNUMBER)
 		*agp = 0;
-
-	if (pip)
-		xfs_iunlock(pip, XFS_IOLOCK_EXCL);
-
 	return err;
 }
 
-/*
- * Remove an association between an inode and a filestream object.
- * Typically this is done on last close of an unlinked file.
- */
 void
 xfs_filestream_deassociate(
-	xfs_inode_t	*ip)
+	struct xfs_inode	*ip)
 {
-	xfs_mru_cache_t	*cache = ip->i_mount->m_filestream;
+	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
+}
 
-	xfs_mru_cache_delete(cache, ip->i_ino);
+int
+xfs_filestream_mount(
+	xfs_mount_t	*mp)
+{
+	/*
+	 * The filestream timer tunable is currently fixed within the range of
+	 * one second to four minutes, with five seconds being the default.  The
+	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
+	 * timer tunable to within about 10 percent.  This requires at least 10
+	 * groups.
+	 */
+	return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
+				    10, xfs_fstrm_free_func);
+}
+
+void
+xfs_filestream_unmount(
+	xfs_mount_t	*mp)
+{
+	xfs_mru_cache_destroy(mp->m_filestream);
 }

diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 6d61dbe..2ef4340 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h

@@ -20,50 +20,20 @@
 
 struct xfs_mount;
 struct xfs_inode;
-struct xfs_perag;
 struct xfs_bmalloca;
 
-#ifdef XFS_FILESTREAMS_TRACE
-#define XFS_FSTRM_KTRACE_INFO		1
-#define XFS_FSTRM_KTRACE_AGSCAN		2
-#define XFS_FSTRM_KTRACE_AGPICK1	3
-#define XFS_FSTRM_KTRACE_AGPICK2	4
-#define XFS_FSTRM_KTRACE_UPDATE		5
-#define XFS_FSTRM_KTRACE_FREE		6
-#define	XFS_FSTRM_KTRACE_ITEM_LOOKUP	7
-#define	XFS_FSTRM_KTRACE_ASSOCIATE	8
-#define	XFS_FSTRM_KTRACE_MOVEAG		9
-#define	XFS_FSTRM_KTRACE_ORPHAN		10
-
-#define XFS_FSTRM_KTRACE_SIZE	16384
-extern ktrace_t *xfs_filestreams_trace_buf;
-
-#endif
-
-/* allocation selection flags */
-typedef enum xfs_fstrm_alloc {
-	XFS_PICK_USERDATA = 1,
-	XFS_PICK_LOWSPACE = 2,
-} xfs_fstrm_alloc_t;
-
-/* prototypes for filestream.c */
-int xfs_filestream_init(void);
-void xfs_filestream_uninit(void);
 int xfs_filestream_mount(struct xfs_mount *mp);
 void xfs_filestream_unmount(struct xfs_mount *mp);
-xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
-int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
 void xfs_filestream_deassociate(struct xfs_inode *ip);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
 int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
 
-
-/* filestreams for the inode? */
 static inline int
 xfs_inode_is_filestream(
 	struct xfs_inode	*ip)
 {
 	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
-		xfs_iflags_test(ip, XFS_IFILESTREAM) ||
 		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
 }
 

diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index 9898f31..34d85ac 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h

@@ -202,6 +202,8 @@
  */
 #define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */
 #define	XFS_IBT_CRC_MAGIC	0x49414233	/* 'IAB3' */
+#define	XFS_FIBT_MAGIC		0x46494254	/* 'FIBT' */
+#define	XFS_FIBT_CRC_MAGIC	0x46494233	/* 'FIB3' */
 
 typedef	__uint64_t	xfs_inofree_t;
 #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
@@ -244,7 +246,17 @@
  * block numbers in the AG.
  */
 #define	XFS_IBT_BLOCK(mp)		((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
-#define	XFS_PREALLOC_BLOCKS(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#define	XFS_FIBT_BLOCK(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+
+/*
+ * The first data block of an AG depends on whether the filesystem was formatted
+ * with the finobt feature. If so, account for the finobt reserved root btree
+ * block.
+ */
+#define XFS_PREALLOC_BLOCKS(mp) \
+	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
+	 XFS_FIBT_BLOCK(mp) + 1 : \
+	 XFS_IBT_BLOCK(mp) + 1)
 
 
 

diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c5fc116..d34703d 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h

@@ -238,6 +238,7 @@
 #define XFS_FSOP_GEOM_FLAGS_LAZYSB	0x4000	/* lazy superblock counters */
 #define XFS_FSOP_GEOM_FLAGS_V5SB	0x8000	/* version 5 superblock */
 #define XFS_FSOP_GEOM_FLAGS_FTYPE	0x10000	/* inode directory types */
+#define XFS_FSOP_GEOM_FLAGS_FINOBT	0x20000	/* free inode btree */
 
 /*
  * Minimum and maximum sizes need for growth checks.

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 02fb943..d229556 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c

@@ -24,6 +24,8 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_inode_item.h"
@@ -74,23 +76,18 @@
 	}
 	if (new_version >= 3) {
 		geo->version = XFS_FSOP_GEOM_VERSION;
-		geo->flags =
+		geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
+			     XFS_FSOP_GEOM_FLAGS_DIRV2 |
 			(xfs_sb_version_hasattr(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
-			(xfs_sb_version_hasnlink(&mp->m_sb) ?
-				XFS_FSOP_GEOM_FLAGS_NLINK : 0) |
 			(xfs_sb_version_hasquota(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
 			(xfs_sb_version_hasalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
 			(xfs_sb_version_hasdalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
-			(xfs_sb_version_hasshared(&mp->m_sb) ?
-				XFS_FSOP_GEOM_FLAGS_SHARED : 0) |
 			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
-			(xfs_sb_version_hasdirv2(&mp->m_sb) ?
-				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
 			(xfs_sb_version_hassector(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
 			(xfs_sb_version_hasasciici(&mp->m_sb) ?
@@ -104,11 +101,13 @@
 			(xfs_sb_version_hascrc(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
 			(xfs_sb_version_hasftype(&mp->m_sb) ?
-				XFS_FSOP_GEOM_FLAGS_FTYPE : 0);
+				XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
+			(xfs_sb_version_hasfinobt(&mp->m_sb) ?
+				XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
 		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
 				mp->m_sb.sb_logsectsize : BBSIZE;
 		geo->rtsectsize = mp->m_sb.sb_blocksize;
-		geo->dirblocksize = mp->m_dirblksize;
+		geo->dirblocksize = mp->m_dir_geo->blksize;
 	}
 	if (new_version >= 4) {
 		geo->flags |=
@@ -316,6 +315,10 @@
 		agi->agi_dirino = cpu_to_be32(NULLAGINO);
 		if (xfs_sb_version_hascrc(&mp->m_sb))
 			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
+		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
+			agi->agi_free_level = cpu_to_be32(1);
+		}
 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
 			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
 
@@ -407,6 +410,34 @@
 		xfs_buf_relse(bp);
 		if (error)
 			goto error0;
+
+		/*
+		 * FINO btree root block
+		 */
+		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+			bp = xfs_growfs_get_hdr_buf(mp,
+				XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+				BTOBB(mp->m_sb.sb_blocksize), 0,
+				&xfs_inobt_buf_ops);
+			if (!bp) {
+				error = ENOMEM;
+				goto error0;
+			}
+
+			if (xfs_sb_version_hascrc(&mp->m_sb))
+				xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
+						     0, 0, agno,
+						     XFS_BTREE_CRC_BLOCKS);
+			else
+				xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
+						     0, agno, 0);
+
+			error = xfs_bwrite(bp);
+			xfs_buf_relse(bp);
+			if (error)
+				goto error0;
+		}
+
 	}
 	xfs_trans_agblocks_delta(tp, nfree);
 	/*

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 8f711db..5960e55 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c

@@ -112,6 +112,66 @@
 }
 
 /*
+ * Insert a single inobt record. Cursor must already point to desired location.
+ */
+STATIC int
+xfs_inobt_insert_rec(
+	struct xfs_btree_cur	*cur,
+	__int32_t		freecount,
+	xfs_inofree_t		free,
+	int			*stat)
+{
+	cur->bc_rec.i.ir_freecount = freecount;
+	cur->bc_rec.i.ir_free = free;
+	return xfs_btree_insert(cur, stat);
+}
+
+/*
+ * Insert records describing a newly allocated inode chunk into the inobt.
+ */
+STATIC int
+xfs_inobt_insert(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_agino_t		newino,
+	xfs_agino_t		newlen,
+	xfs_btnum_t		btnum)
+{
+	struct xfs_btree_cur	*cur;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t		agno = be32_to_cpu(agi->agi_seqno);
+	xfs_agino_t		thisino;
+	int			i;
+	int			error;
+
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+	for (thisino = newino;
+	     thisino < newino + newlen;
+	     thisino += XFS_INODES_PER_CHUNK) {
+		error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
+		if (error) {
+			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+			return error;
+		}
+		ASSERT(i == 0);
+
+		error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+					     XFS_INOBT_ALL_FREE, &i);
+		if (error) {
+			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+			return error;
+		}
+		ASSERT(i == 1);
+	}
+
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+	return 0;
+}
+
+/*
  * Verify that the number of free inodes in the AGI is correct.
  */
 #ifdef DEBUG
@@ -220,10 +280,8 @@
 		if (tp)
 			xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
 					mp->m_sb.sb_inodesize, length, gen);
-	} else if (xfs_sb_version_hasnlink(&mp->m_sb))
+	} else
 		version = 2;
-	else
-		version = 1;
 
 	for (j = 0; j < nbufs; j++) {
 		/*
@@ -303,13 +361,10 @@
 {
 	xfs_agi_t	*agi;		/* allocation group header */
 	xfs_alloc_arg_t	args;		/* allocation argument structure */
-	xfs_btree_cur_t	*cur;		/* inode btree cursor */
 	xfs_agnumber_t	agno;
 	int		error;
-	int		i;
 	xfs_agino_t	newino;		/* new first inode's number */
 	xfs_agino_t	newlen;		/* new number of inodes */
-	xfs_agino_t	thisino;	/* current inode number, for loop */
 	int		isaligned = 0;	/* inode allocation at stripe unit */
 					/* boundary */
 	struct xfs_perag *pag;
@@ -459,29 +514,19 @@
 	agi->agi_newino = cpu_to_be32(newino);
 
 	/*
-	 * Insert records describing the new inode chunk into the btree.
+	 * Insert records describing the new inode chunk into the btrees.
 	 */
-	cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
-	for (thisino = newino;
-	     thisino < newino + newlen;
-	     thisino += XFS_INODES_PER_CHUNK) {
-		cur->bc_rec.i.ir_startino = thisino;
-		cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
-		cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
-		error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
-		if (error) {
-			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+				 XFS_BTNUM_INO);
+	if (error)
+		return error;
+
+	if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+		error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+					 XFS_BTNUM_FINO);
+		if (error)
 			return error;
-		}
-		ASSERT(i == 0);
-		error = xfs_btree_insert(cur, &i);
-		if (error) {
-			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-			return error;
-		}
-		ASSERT(i == 1);
 	}
-	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	/*
 	 * Log allocation group header fields
 	 */
@@ -675,13 +720,10 @@
 }
 
 /*
- * Allocate an inode.
- *
- * The caller selected an AG for us, and made sure that free inodes are
- * available.
+ * Allocate an inode using the inobt-only algorithm.
  */
 STATIC int
-xfs_dialloc_ag(
+xfs_dialloc_ag_inobt(
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agbp,
 	xfs_ino_t		parent,
@@ -707,7 +749,7 @@
 	ASSERT(pag->pagi_freecount > 0);
 
  restart_pagno:
-	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
 	/*
 	 * If pagino is 0 (this is the root inode allocation) use newino.
 	 * This must work because we've just allocated some.
@@ -940,6 +982,294 @@
 }
 
 /*
+ * Use the free inode btree to allocate an inode based on distance from the
+ * parent. Note that the provided cursor may be deleted and replaced.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_near(
+	xfs_agino_t			pagino,
+	struct xfs_btree_cur		**ocur,
+	struct xfs_inobt_rec_incore	*rec)
+{
+	struct xfs_btree_cur		*lcur = *ocur;	/* left search cursor */
+	struct xfs_btree_cur		*rcur;	/* right search cursor */
+	struct xfs_inobt_rec_incore	rrec;
+	int				error;
+	int				i, j;
+
+	error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
+	if (error)
+		return error;
+
+	if (i == 1) {
+		error = xfs_inobt_get_rec(lcur, rec, &i);
+		if (error)
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+		/*
+		 * See if we've landed in the parent inode record. The finobt
+		 * only tracks chunks with at least one free inode, so record
+		 * existence is enough.
+		 */
+		if (pagino >= rec->ir_startino &&
+		    pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
+			return 0;
+	}
+
+	error = xfs_btree_dup_cursor(lcur, &rcur);
+	if (error)
+		return error;
+
+	error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
+	if (error)
+		goto error_rcur;
+	if (j == 1) {
+		error = xfs_inobt_get_rec(rcur, &rrec, &j);
+		if (error)
+			goto error_rcur;
+		XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
+	}
+
+	XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
+	if (i == 1 && j == 1) {
+		/*
+		 * Both the left and right records are valid. Choose the closer
+		 * inode chunk to the target.
+		 */
+		if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
+		    (rrec.ir_startino - pagino)) {
+			*rec = rrec;
+			xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+			*ocur = rcur;
+		} else {
+			xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+		}
+	} else if (j == 1) {
+		/* only the right record is valid */
+		*rec = rrec;
+		xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+		*ocur = rcur;
+	} else if (i == 1) {
+		/* only the left record is valid */
+		xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+	}
+
+	return 0;
+
+error_rcur:
+	xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Use the free inode btree to find a free inode based on a newino hint. If
+ * the hint is NULL, find the first free inode in the AG.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_newino(
+	struct xfs_agi			*agi,
+	struct xfs_btree_cur		*cur,
+	struct xfs_inobt_rec_incore	*rec)
+{
+	int error;
+	int i;
+
+	if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+		error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
+					 &i);
+		if (error)
+			return error;
+		if (i == 1) {
+			error = xfs_inobt_get_rec(cur, rec, &i);
+			if (error)
+				return error;
+			XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+			return 0;
+		}
+	}
+
+	/*
+	 * Find the first inode available in the AG.
+	 */
+	error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+	if (error)
+		return error;
+	XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+	error = xfs_inobt_get_rec(cur, rec, &i);
+	if (error)
+		return error;
+	XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+	return 0;
+}
+
+/*
+ * Update the inobt based on a modification made to the finobt. Also ensure that
+ * the records from both trees are equivalent post-modification.
+ */
+STATIC int
+xfs_dialloc_ag_update_inobt(
+	struct xfs_btree_cur		*cur,	/* inobt cursor */
+	struct xfs_inobt_rec_incore	*frec,	/* finobt record */
+	int				offset) /* inode offset */
+{
+	struct xfs_inobt_rec_incore	rec;
+	int				error;
+	int				i;
+
+	error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
+	if (error)
+		return error;
+	XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+	error = xfs_inobt_get_rec(cur, &rec, &i);
+	if (error)
+		return error;
+	XFS_WANT_CORRUPTED_RETURN(i == 1);
+	ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
+				   XFS_INODES_PER_CHUNK) == 0);
+
+	rec.ir_free &= ~XFS_INOBT_MASK(offset);
+	rec.ir_freecount--;
+
+	XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
+				  (rec.ir_freecount == frec->ir_freecount));
+
+	error = xfs_inobt_update(cur, &rec);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+/*
+ * Allocate an inode using the free inode btree, if available. Otherwise, fall
+ * back to the inobt search algorithm.
+ *
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
+ */
+STATIC int
+xfs_dialloc_ag(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_ino_t		parent,
+	xfs_ino_t		*inop)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno);
+	xfs_agnumber_t			pagno = XFS_INO_TO_AGNO(mp, parent);
+	xfs_agino_t			pagino = XFS_INO_TO_AGINO(mp, parent);
+	struct xfs_perag		*pag;
+	struct xfs_btree_cur		*cur;	/* finobt cursor */
+	struct xfs_btree_cur		*icur;	/* inobt cursor */
+	struct xfs_inobt_rec_incore	rec;
+	xfs_ino_t			ino;
+	int				error;
+	int				offset;
+	int				i;
+
+	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+		return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
+
+	pag = xfs_perag_get(mp, agno);
+
+	/*
+	 * If pagino is 0 (this is the root inode allocation) use newino.
+	 * This must work because we've just allocated some.
+	 */
+	if (!pagino)
+		pagino = be32_to_cpu(agi->agi_newino);
+
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+	error = xfs_check_agi_freecount(cur, agi);
+	if (error)
+		goto error_cur;
+
+	/*
+	 * The search algorithm depends on whether we're in the same AG as the
+	 * parent. If so, find the closest available inode to the parent. If
+	 * not, consider the agi hint or find the first free inode in the AG.
+	 */
+	if (agno == pagno)
+		error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
+	else
+		error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
+	if (error)
+		goto error_cur;
+
+	offset = xfs_lowbit64(rec.ir_free);
+	ASSERT(offset >= 0);
+	ASSERT(offset < XFS_INODES_PER_CHUNK);
+	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+				   XFS_INODES_PER_CHUNK) == 0);
+	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+
+	/*
+	 * Modify or remove the finobt record.
+	 */
+	rec.ir_free &= ~XFS_INOBT_MASK(offset);
+	rec.ir_freecount--;
+	if (rec.ir_freecount)
+		error = xfs_inobt_update(cur, &rec);
+	else
+		error = xfs_btree_delete(cur, &i);
+	if (error)
+		goto error_cur;
+
+	/*
+	 * The finobt has now been updated appropriately. We haven't updated the
+	 * agi and superblock yet, so we can create an inobt cursor and validate
+	 * the original freecount. If all is well, make the equivalent update to
+	 * the inobt using the finobt record and offset information.
+	 */
+	icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+
+	error = xfs_check_agi_freecount(icur, agi);
+	if (error)
+		goto error_icur;
+
+	error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
+	if (error)
+		goto error_icur;
+
+	/*
+	 * Both trees have now been updated. We must update the perag and
+	 * superblock before we can check the freecount for each btree.
+	 */
+	be32_add_cpu(&agi->agi_freecount, -1);
+	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+	pag->pagi_freecount--;
+
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+
+	error = xfs_check_agi_freecount(icur, agi);
+	if (error)
+		goto error_icur;
+	error = xfs_check_agi_freecount(cur, agi);
+	if (error)
+		goto error_icur;
+
+	xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	xfs_perag_put(pag);
+	*inop = ino;
+	return 0;
+
+error_icur:
+	xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
+error_cur:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	xfs_perag_put(pag);
+	return error;
+}
+
+/*
  * Allocate an inode on disk.
  *
  * Mode is used to tell whether the new inode will need space, and whether it
@@ -1098,78 +1428,34 @@
 	return XFS_ERROR(error);
 }
 
-/*
- * Free disk inode.  Carefully avoids touching the incore inode, all
- * manipulations incore are the caller's responsibility.
- * The on-disk inode is not changed by this operation, only the
- * btree (free inode mask) is changed.
- */
-int
-xfs_difree(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_ino_t	inode,		/* inode to be freed */
-	xfs_bmap_free_t	*flist,		/* extents to free */
-	int		*delete,	/* set if inode cluster was deleted */
-	xfs_ino_t	*first_ino)	/* first inode in deleted cluster */
+STATIC int
+xfs_difree_inobt(
+	struct xfs_mount		*mp,
+	struct xfs_trans		*tp,
+	struct xfs_buf			*agbp,
+	xfs_agino_t			agino,
+	struct xfs_bmap_free		*flist,
+	int				*deleted,
+	xfs_ino_t			*first_ino,
+	struct xfs_inobt_rec_incore	*orec)
 {
-	/* REFERENCED */
-	xfs_agblock_t	agbno;	/* block number containing inode */
-	xfs_buf_t	*agbp;	/* buffer containing allocation group header */
-	xfs_agino_t	agino;	/* inode number relative to allocation group */
-	xfs_agnumber_t	agno;	/* allocation group number */
-	xfs_agi_t	*agi;	/* allocation group header */
-	xfs_btree_cur_t	*cur;	/* inode btree cursor */
-	int		error;	/* error return value */
-	int		i;	/* result code */
-	int		ilen;	/* inodes in an inode cluster */
-	xfs_mount_t	*mp;	/* mount structure for filesystem */
-	int		off;	/* offset of inode in inode chunk */
-	xfs_inobt_rec_incore_t rec;	/* btree record */
-	struct xfs_perag *pag;
+	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno);
+	struct xfs_perag		*pag;
+	struct xfs_btree_cur		*cur;
+	struct xfs_inobt_rec_incore	rec;
+	int				ilen;
+	int				error;
+	int				i;
+	int				off;
 
-	mp = tp->t_mountp;
-
-	/*
-	 * Break up inode number into its components.
-	 */
-	agno = XFS_INO_TO_AGNO(mp, inode);
-	if (agno >= mp->m_sb.sb_agcount)  {
-		xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
-			__func__, agno, mp->m_sb.sb_agcount);
-		ASSERT(0);
-		return XFS_ERROR(EINVAL);
-	}
-	agino = XFS_INO_TO_AGINO(mp, inode);
-	if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
-		xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
-			__func__, (unsigned long long)inode,
-			(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
-		ASSERT(0);
-		return XFS_ERROR(EINVAL);
-	}
-	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
-	if (agbno >= mp->m_sb.sb_agblocks)  {
-		xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
-			__func__, agbno, mp->m_sb.sb_agblocks);
-		ASSERT(0);
-		return XFS_ERROR(EINVAL);
-	}
-	/*
-	 * Get the allocation group header.
-	 */
-	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-	if (error) {
-		xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
-			__func__, error);
-		return error;
-	}
-	agi = XFS_BUF_TO_AGI(agbp);
 	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-	ASSERT(agbno < be32_to_cpu(agi->agi_length));
+	ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
+
 	/*
 	 * Initialize the cursor.
 	 */
-	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
 
 	error = xfs_check_agi_freecount(cur, agi);
 	if (error)
@@ -1209,7 +1495,7 @@
 	if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
 	    (rec.ir_freecount == mp->m_ialloc_inos)) {
 
-		*delete = 1;
+		*deleted = 1;
 		*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
 
 		/*
@@ -1237,7 +1523,7 @@
 				  XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
 				  mp->m_ialloc_blks, flist, mp);
 	} else {
-		*delete = 0;
+		*deleted = 0;
 
 		error = xfs_inobt_update(cur, &rec);
 		if (error) {
@@ -1261,6 +1547,7 @@
 	if (error)
 		goto error0;
 
+	*orec = rec;
 	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	return 0;
 
@@ -1269,6 +1556,182 @@
 	return error;
 }
 
+/*
+ * Free an inode in the free inode btree.
+ */
+STATIC int
+xfs_difree_finobt(
+	struct xfs_mount		*mp,
+	struct xfs_trans		*tp,
+	struct xfs_buf			*agbp,
+	xfs_agino_t			agino,
+	struct xfs_inobt_rec_incore	*ibtrec) /* inobt record */
+{
+	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno);
+	struct xfs_btree_cur		*cur;
+	struct xfs_inobt_rec_incore	rec;
+	int				offset = agino - ibtrec->ir_startino;
+	int				error;
+	int				i;
+
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+	error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
+	if (error)
+		goto error;
+	if (i == 0) {
+		/*
+		 * If the record does not exist in the finobt, we must have just
+		 * freed an inode in a previously fully allocated chunk. If not,
+		 * something is out of sync.
+		 */
+		XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
+
+		error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+					     ibtrec->ir_free, &i);
+		if (error)
+			goto error;
+		ASSERT(i == 1);
+
+		goto out;
+	}
+
+	/*
+	 * Read and update the existing record. We could just copy the ibtrec
+	 * across here, but that would defeat the purpose of having redundant
+	 * metadata. By making the modifications independently, we can catch
+	 * corruptions that we wouldn't see if we just copied from one record
+	 * to another.
+	 */
+	error = xfs_inobt_get_rec(cur, &rec, &i);
+	if (error)
+		goto error;
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error);
+
+	rec.ir_free |= XFS_INOBT_MASK(offset);
+	rec.ir_freecount++;
+
+	XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
+				(rec.ir_freecount == ibtrec->ir_freecount),
+				error);
+
+	/*
+	 * The content of inobt records should always match between the inobt
+	 * and finobt. The lifecycle of records in the finobt is different from
+	 * the inobt in that the finobt only tracks records with at least one
+	 * free inode. Hence, if all of the inodes are free and we aren't
+	 * keeping inode chunks permanently on disk, remove the record.
+	 * Otherwise, update the record with the new information.
+	 */
+	if (rec.ir_freecount == mp->m_ialloc_inos &&
+	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+		error = xfs_btree_delete(cur, &i);
+		if (error)
+			goto error;
+		ASSERT(i == 1);
+	} else {
+		error = xfs_inobt_update(cur, &rec);
+		if (error)
+			goto error;
+	}
+
+out:
+	error = xfs_check_agi_freecount(cur, agi);
+	if (error)
+		goto error;
+
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	return 0;
+
+error:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Free disk inode.  Carefully avoids touching the incore inode, all
+ * manipulations incore are the caller's responsibility.
+ * The on-disk inode is not changed by this operation, only the
+ * btree (free inode mask) is changed.
+ */
+int
+xfs_difree(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	xfs_ino_t		inode,		/* inode to be freed */
+	struct xfs_bmap_free	*flist,		/* extents to free */
+	int			*deleted,/* set if inode cluster was deleted */
+	xfs_ino_t		*first_ino)/* first inode in deleted cluster */
+{
+	/* REFERENCED */
+	xfs_agblock_t		agbno;	/* block number containing inode */
+	struct xfs_buf		*agbp;	/* buffer for allocation group header */
+	xfs_agino_t		agino;	/* allocation group inode number */
+	xfs_agnumber_t		agno;	/* allocation group number */
+	int			error;	/* error return value */
+	struct xfs_mount	*mp;	/* mount structure for filesystem */
+	struct xfs_inobt_rec_incore rec;/* btree record */
+
+	mp = tp->t_mountp;
+
+	/*
+	 * Break up inode number into its components.
+	 */
+	agno = XFS_INO_TO_AGNO(mp, inode);
+	if (agno >= mp->m_sb.sb_agcount)  {
+		xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
+			__func__, agno, mp->m_sb.sb_agcount);
+		ASSERT(0);
+		return XFS_ERROR(EINVAL);
+	}
+	agino = XFS_INO_TO_AGINO(mp, inode);
+	if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
+		xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+			__func__, (unsigned long long)inode,
+			(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
+		ASSERT(0);
+		return XFS_ERROR(EINVAL);
+	}
+	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+	if (agbno >= mp->m_sb.sb_agblocks)  {
+		xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
+			__func__, agbno, mp->m_sb.sb_agblocks);
+		ASSERT(0);
+		return XFS_ERROR(EINVAL);
+	}
+	/*
+	 * Get the allocation group header.
+	 */
+	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+	if (error) {
+		xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
+			__func__, error);
+		return error;
+	}
+
+	/*
+	 * Fix up the inode allocation btree.
+	 */
+	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
+				 &rec);
+	if (error)
+		goto error0;
+
+	/*
+	 * Fix up the free inode btree.
+	 */
+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+		error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
+		if (error)
+			goto error0;
+	}
+
+	return 0;
+
+error0:
+	return error;
+}
+
 STATIC int
 xfs_imap_lookup(
 	struct xfs_mount	*mp,
@@ -1300,7 +1763,7 @@
 	 * we have a record, we need to ensure it contains the inode number
 	 * we are looking up.
 	 */
-	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
 	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
 	if (!error) {
 		if (i)
@@ -1488,7 +1951,16 @@
 }
 
 /*
- * Log specified fields for the ag hdr (inode section)
+ * Log specified fields for the ag hdr (inode section). The growth of the agi
+ * structure over time requires that we interpret the buffer as two logical
+ * regions delineated by the end of the unlinked list. This is due to the size
+ * of the hash table and its location in the middle of the agi.
+ *
+ * For example, a request to log a field before agi_unlinked and a field after
+ * agi_unlinked could cause us to log the entire hash table and use an excessive
+ * amount of log space. To avoid this behavior, log the region up through
+ * agi_unlinked in one call and the region after agi_unlinked through the end of
+ * the structure in another.
  */
 void
 xfs_ialloc_log_agi(
@@ -1511,6 +1983,8 @@
 		offsetof(xfs_agi_t, agi_newino),
 		offsetof(xfs_agi_t, agi_dirino),
 		offsetof(xfs_agi_t, agi_unlinked),
+		offsetof(xfs_agi_t, agi_free_root),
+		offsetof(xfs_agi_t, agi_free_level),
 		sizeof(xfs_agi_t)
 	};
 #ifdef DEBUG
@@ -1519,15 +1993,30 @@
 	agi = XFS_BUF_TO_AGI(bp);
 	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
 #endif
-	/*
-	 * Compute byte offsets for the first and last fields.
-	 */
-	xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
-	/*
-	 * Log the allocation group inode header buffer.
-	 */
+
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
-	xfs_trans_log_buf(tp, bp, first, last);
+
+	/*
+	 * Compute byte offsets for the first and last fields in the first
+	 * region and log the agi buffer. This only logs up through
+	 * agi_unlinked.
+	 */
+	if (fields & XFS_AGI_ALL_BITS_R1) {
+		xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
+				  &first, &last);
+		xfs_trans_log_buf(tp, bp, first, last);
+	}
+
+	/*
+	 * Mask off the bits in the first region and calculate the first and
+	 * last field offsets for any bits in the second region.
+	 */
+	fields &= ~XFS_AGI_ALL_BITS_R1;
+	if (fields) {
+		xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
+				  &first, &last);
+		xfs_trans_log_buf(tp, bp, first, last);
+	}
 }
 
 #ifdef DEBUG
@@ -1640,7 +2129,6 @@
 	if (error)
 		return error;
 
-	ASSERT(!xfs_buf_geterror(*bpp));
 	xfs_buf_set_ref(*bpp, XFS_AGI_REF);
 	return 0;
 }

diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 812365d..95ad1c0 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h

@@ -90,7 +90,7 @@
 	struct xfs_trans *tp,		/* transaction pointer */
 	xfs_ino_t	inode,		/* inode to be freed */
 	struct xfs_bmap_free *flist,	/* extents to free */
-	int		*delete,	/* set if inode cluster was deleted */
+	int		*deleted,	/* set if inode cluster was deleted */
 	xfs_ino_t	*first_ino);	/* first inode in deleted cluster */
 
 /*

diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 7e309b1..726f83a 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c

@@ -49,7 +49,8 @@
 	struct xfs_btree_cur	*cur)
 {
 	return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.a.agbp, cur->bc_private.a.agno);
+			cur->bc_private.a.agbp, cur->bc_private.a.agno,
+			cur->bc_btnum);
 }
 
 STATIC void
@@ -66,12 +67,26 @@
 	xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
 }
 
+STATIC void
+xfs_finobt_set_root(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*nptr,
+	int			inc)	/* level change */
+{
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
+
+	agi->agi_free_root = nptr->s;
+	be32_add_cpu(&agi->agi_free_level, inc);
+	xfs_ialloc_log_agi(cur->bc_tp, agbp,
+			   XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
+}
+
 STATIC int
 xfs_inobt_alloc_block(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_ptr	*start,
 	union xfs_btree_ptr	*new,
-	int			length,
 	int			*stat)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
@@ -173,6 +188,17 @@
 	ptr->s = agi->agi_root;
 }
 
+STATIC void
+xfs_finobt_init_ptr_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
+
+	ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
+	ptr->s = agi->agi_free_root;
+}
+
 STATIC __int64_t
 xfs_inobt_key_diff(
 	struct xfs_btree_cur	*cur,
@@ -203,6 +229,7 @@
 	 */
 	switch (block->bb_magic) {
 	case cpu_to_be32(XFS_IBT_CRC_MAGIC):
+	case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
 		if (!xfs_sb_version_hascrc(&mp->m_sb))
 			return false;
 		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
@@ -214,6 +241,7 @@
 			return false;
 		/* fall through */
 	case cpu_to_be32(XFS_IBT_MAGIC):
+	case cpu_to_be32(XFS_FIBT_MAGIC):
 		break;
 	default:
 		return 0;
@@ -317,6 +345,28 @@
 #endif
 };
 
+static const struct xfs_btree_ops xfs_finobt_ops = {
+	.rec_len		= sizeof(xfs_inobt_rec_t),
+	.key_len		= sizeof(xfs_inobt_key_t),
+
+	.dup_cursor		= xfs_inobt_dup_cursor,
+	.set_root		= xfs_finobt_set_root,
+	.alloc_block		= xfs_inobt_alloc_block,
+	.free_block		= xfs_inobt_free_block,
+	.get_minrecs		= xfs_inobt_get_minrecs,
+	.get_maxrecs		= xfs_inobt_get_maxrecs,
+	.init_key_from_rec	= xfs_inobt_init_key_from_rec,
+	.init_rec_from_key	= xfs_inobt_init_rec_from_key,
+	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
+	.init_ptr_from_cur	= xfs_finobt_init_ptr_from_cur,
+	.key_diff		= xfs_inobt_key_diff,
+	.buf_ops		= &xfs_inobt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+	.keys_inorder		= xfs_inobt_keys_inorder,
+	.recs_inorder		= xfs_inobt_recs_inorder,
+#endif
+};
+
 /*
  * Allocate a new inode btree cursor.
  */
@@ -325,7 +375,8 @@
 	struct xfs_mount	*mp,		/* file system mount point */
 	struct xfs_trans	*tp,		/* transaction pointer */
 	struct xfs_buf		*agbp,		/* buffer for agi structure */
-	xfs_agnumber_t		agno)		/* allocation group number */
+	xfs_agnumber_t		agno,		/* allocation group number */
+	xfs_btnum_t		btnum)		/* ialloc or free ino btree */
 {
 	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
 	struct xfs_btree_cur	*cur;
@@ -334,11 +385,17 @@
 
 	cur->bc_tp = tp;
 	cur->bc_mp = mp;
-	cur->bc_nlevels = be32_to_cpu(agi->agi_level);
-	cur->bc_btnum = XFS_BTNUM_INO;
+	cur->bc_btnum = btnum;
+	if (btnum == XFS_BTNUM_INO) {
+		cur->bc_nlevels = be32_to_cpu(agi->agi_level);
+		cur->bc_ops = &xfs_inobt_ops;
+	} else {
+		cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
+		cur->bc_ops = &xfs_finobt_ops;
+	}
+
 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
 
-	cur->bc_ops = &xfs_inobt_ops;
 	if (xfs_sb_version_hascrc(&mp->m_sb))
 		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 

diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index f38b220..d7ebea7 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h

@@ -58,7 +58,8 @@
 		 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
 
 extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
-		struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
+		struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
+		xfs_btnum_t);
 extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
 
 #endif	/* __XFS_IALLOC_BTREE_H__ */

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 98d3524..c48df5f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c

@@ -507,8 +507,7 @@
 xfs_inode_ag_walk(
 	struct xfs_mount	*mp,
 	struct xfs_perag	*pag,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args,
@@ -582,7 +581,7 @@
 		for (i = 0; i < nr_found; i++) {
 			if (!batch[i])
 				continue;
-			error = execute(batch[i], pag, flags, args);
+			error = execute(batch[i], flags, args);
 			IRELE(batch[i]);
 			if (error == EAGAIN) {
 				skipped++;
@@ -636,8 +635,7 @@
 int
 xfs_inode_ag_iterator(
 	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args)
@@ -664,8 +662,7 @@
 int
 xfs_inode_ag_iterator_tag(
 	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags,
+	int			(*execute)(struct xfs_inode *ip, int flags,
 					   void *args),
 	int			flags,
 	void			*args,
@@ -1209,7 +1206,6 @@
 STATIC int
 xfs_inode_free_eofblocks(
 	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
 	int			flags,
 	void			*args)
 {

diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9ed68bb..9cf017b 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h

@@ -60,12 +60,10 @@
 void xfs_eofblocks_worker(struct work_struct *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
-		int flags, void *args),
+	int (*execute)(struct xfs_inode *ip, int flags, void *args),
 	int flags, void *args);
 int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
-		int flags, void *args),
+	int (*execute)(struct xfs_inode *ip, int flags, void *args),
 	int flags, void *args, int tag);
 
 static inline int

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 768087b..a6115fe 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c

@@ -655,7 +655,6 @@
 	uint		flags;
 	int		error;
 	timespec_t	tv;
-	int		filestreams = 0;
 
 	/*
 	 * Call the space management code to pick
@@ -682,6 +681,14 @@
 		return error;
 	ASSERT(ip != NULL);
 
+	/*
+	 * We always convert v1 inodes to v2 now - we only support filesystems
+	 * with >= v2 inode capability, so there is no reason for ever leaving
+	 * an inode in v1 format.
+	 */
+	if (ip->i_d.di_version == 1)
+		ip->i_d.di_version = 2;
+
 	ip->i_d.di_mode = mode;
 	ip->i_d.di_onlink = 0;
 	ip->i_d.di_nlink = nlink;
@@ -691,27 +698,6 @@
 	xfs_set_projid(ip, prid);
 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 
-	/*
-	 * If the superblock version is up to where we support new format
-	 * inodes and this is currently an old format inode, then change
-	 * the inode version number now.  This way we only do the conversion
-	 * here rather than here and in the flush/logging code.
-	 */
-	if (xfs_sb_version_hasnlink(&mp->m_sb) &&
-	    ip->i_d.di_version == 1) {
-		ip->i_d.di_version = 2;
-		/*
-		 * We've already zeroed the old link count, the projid field,
-		 * and the pad field.
-		 */
-	}
-
-	/*
-	 * Project ids won't be stored on disk if we are using a version 1 inode.
-	 */
-	if ((prid != 0) && (ip->i_d.di_version == 1))
-		xfs_bump_ino_vers2(tp, ip);
-
 	if (pip && XFS_INHERIT_GID(pip)) {
 		ip->i_d.di_gid = pip->i_d.di_gid;
 		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
@@ -772,13 +758,6 @@
 		flags |= XFS_ILOG_DEV;
 		break;
 	case S_IFREG:
-		/*
-		 * we can't set up filestreams until after the VFS inode
-		 * is set up properly.
-		 */
-		if (pip && xfs_inode_is_filestream(pip))
-			filestreams = 1;
-		/* fall through */
 	case S_IFDIR:
 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 			uint	di_flags = 0;
@@ -844,15 +823,6 @@
 	/* now that we have an i_mode we can setup inode ops and unlock */
 	xfs_setup_inode(ip);
 
-	/* now we have set up the vfs inode we can associate the filestream */
-	if (filestreams) {
-		error = xfs_filestream_associate(pip, ip);
-		if (error < 0)
-			return -error;
-		if (!error)
-			xfs_iflags_set(ip, XFS_IFILESTREAM);
-	}
-
 	*ipp = ip;
 	return 0;
 }
@@ -1073,40 +1043,6 @@
 }
 
 /*
- * This gets called when the inode's version needs to be changed from 1 to 2.
- * Currently this happens when the nlink field overflows the old 16-bit value
- * or when chproj is called to change the project for the first time.
- * As a side effect the superblock version will also get rev'd
- * to contain the NLINK bit.
- */
-void
-xfs_bump_ino_vers2(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip)
-{
-	xfs_mount_t	*mp;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(ip->i_d.di_version == 1);
-
-	ip->i_d.di_version = 2;
-	ip->i_d.di_onlink = 0;
-	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-	mp = tp->t_mountp;
-	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
-		spin_lock(&mp->m_sb_lock);
-		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
-			xfs_sb_version_addnlink(&mp->m_sb);
-			spin_unlock(&mp->m_sb_lock);
-			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
-		} else {
-			spin_unlock(&mp->m_sb_lock);
-		}
-	}
-	/* Caller must log the inode */
-}
-
-/*
  * Increment the link count on an inode & log the change.
  */
 int
@@ -1116,22 +1052,10 @@
 {
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
+	ASSERT(ip->i_d.di_version > 1);
 	ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
 	ip->i_d.di_nlink++;
 	inc_nlink(VFS_I(ip));
-	if ((ip->i_d.di_version == 1) &&
-	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
-		/*
-		 * The inode has increased its number of links beyond
-		 * what can fit in an old format inode.  It now needs
-		 * to be converted to a version 2 inode with a 32 bit
-		 * link count.  If this is the first inode in the file
-		 * system to do this, then we need to bump the superblock
-		 * version number as well.
-		 */
-		xfs_bump_ino_vers2(tp, ip);
-	}
-
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	return 0;
 }
@@ -1699,16 +1623,6 @@
 		int truncated;
 
 		/*
-		 * If we are using filestreams, and we have an unlinked
-		 * file that we are processing the last close on, then nothing
-		 * will be able to reopen and write to this file. Purge this
-		 * inode from the filestreams cache so that it doesn't delay
-		 * teardown of the inode.
-		 */
-		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
-			xfs_filestream_deassociate(ip);
-
-		/*
 		 * If we previously truncated this file and removed old data
 		 * in the process, we want to initiate "early" writeout on
 		 * the last close.  This is an attempt to combat the notorious
@@ -1838,9 +1752,33 @@
 	int			error;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
+
+	/*
+	 * The ifree transaction might need to allocate blocks for record
+	 * insertion to the finobt. We don't want to fail here at ENOSPC, so
+	 * allow ifree to dip into the reserved block pool if necessary.
+	 *
+	 * Freeing large sets of inodes generally means freeing inode chunks,
+	 * directory and file data blocks, so this should be relatively safe.
+	 * Only under severe circumstances should it be possible to free enough
+	 * inodes to exhaust the reserve block pool via finobt expansion while
+	 * at the same time not creating free space in the filesystem.
+	 *
+	 * Send a warning if the reservation does happen to fail, as the inode
+	 * now remains allocated and sits on the unlinked list until the fs is
+	 * repaired.
+	 */
+	tp->t_flags |= XFS_TRANS_RESERVE;
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
+				  XFS_IFREE_SPACE_RES(mp), 0);
 	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		if (error == ENOSPC) {
+			xfs_warn_ratelimited(mp,
+			"Failed to remove inode(s) from unlinked list. "
+			"Please free space, unmount and run xfs_repair.");
+		} else {
+			ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		}
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
 		return error;
 	}
@@ -2664,13 +2602,7 @@
 	if (error)
 		goto std_return;
 
-	/*
-	 * If we are using filestreams, kill the stream association.
-	 * If the file is still open it may get a new one but that
-	 * will get killed on last close in xfs_close() so we don't
-	 * have to worry about that.
-	 */
-	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
+	if (is_dir && xfs_inode_is_filestream(ip))
 		xfs_filestream_deassociate(ip);
 
 	return 0;
@@ -3258,6 +3190,7 @@
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 	ASSERT(iip != NULL && iip->ili_fields != 0);
+	ASSERT(ip->i_d.di_version > 1);
 
 	/* set *dip = inode's place in the buffer */
 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -3318,7 +3251,7 @@
 	}
 
 	/*
-	 * Inode item log recovery for v1/v2 inodes are dependent on the
+	 * Inode item log recovery for v2 inodes are dependent on the
 	 * di_flushiter count for correct sequencing. We bump the flush
 	 * iteration count so we can detect flushes which postdate a log record
 	 * during recovery. This is redundant as we now log every change and
@@ -3341,40 +3274,9 @@
 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
 		ip->i_d.di_flushiter = 0;
 
-	/*
-	 * If this is really an old format inode and the superblock version
-	 * has not been updated to support only new format inodes, then
-	 * convert back to the old inode format.  If the superblock version
-	 * has been updated, then make the conversion permanent.
-	 */
-	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
-	if (ip->i_d.di_version == 1) {
-		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
-			/*
-			 * Convert it back.
-			 */
-			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
-			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
-		} else {
-			/*
-			 * The superblock version has already been bumped,
-			 * so just make the conversion to the new inode
-			 * format permanent.
-			 */
-			ip->i_d.di_version = 2;
-			dip->di_version = 2;
-			ip->i_d.di_onlink = 0;
-			dip->di_onlink = 0;
-			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-			memset(&(dip->di_pad[0]), 0,
-			      sizeof(dip->di_pad));
-			ASSERT(xfs_get_projid(ip) == 0);
-		}
-	}
-
-	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
+	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 	if (XFS_IFORK_Q(ip))
-		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
+		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 	xfs_inobp_check(mp, bp);
 
 	/*

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f2fcde5..f72bffa 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h

@@ -209,7 +209,6 @@
 #define XFS_ISTALE		(1 << 1) /* inode has been staled */
 #define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
 #define XFS_INEW		(1 << 3) /* inode has just been allocated */
-#define XFS_IFILESTREAM		(1 << 4) /* inode is in a filestream dir. */
 #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
 #define __XFS_IFLOCK_BIT	7	 /* inode is being flushed right now */
@@ -225,8 +224,7 @@
  */
 #define XFS_IRECLAIM_RESET_FLAGS	\
 	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
-	 XFS_IFILESTREAM);
+	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
 
 /*
  * Synchronize processes attempting to flush the in-core inode back to disk.
@@ -379,7 +377,6 @@
 			       struct xfs_inode **, int *);
 int		xfs_droplink(struct xfs_trans *, struct xfs_inode *);
 int		xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
-void		xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *);
 
 /* from xfs_file.c */
 int		xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);

diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 24e9939..cb35ae4 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c

@@ -437,17 +437,16 @@
 	}
 
 	/*
-	 * The inode format changed when we moved the link count and
-	 * made it 32 bits long.  If this is an old format inode,
-	 * convert it in memory to look like a new one.  If it gets
-	 * flushed to disk we will convert back before flushing or
-	 * logging it.  We zero out the new projid field and the old link
-	 * count field.  We'll handle clearing the pad field (the remains
-	 * of the old uuid field) when we actually convert the inode to
-	 * the new format. We don't change the version number so that we
-	 * can distinguish this from a real new format inode.
+	 * Automatically convert version 1 inode formats in memory to version 2
+	 * inode format. If the inode is modified, it will get logged and
+	 * rewritten as a version 2 inode. We can do this because we set the
+	 * superblock feature bit for v2 inodes unconditionally during mount
+	 * and it means the reast of the code can assume the inode version is 2
+	 * or higher.
 	 */
 	if (ip->i_d.di_version == 1) {
+		ip->i_d.di_version = 2;
+		memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 		ip->i_d.di_nlink = ip->i_d.di_onlink;
 		ip->i_d.di_onlink = 0;
 		xfs_set_projid(ip, 0);

diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
index 73514c0..b031e8d 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/xfs_inode_fork.c

@@ -798,8 +798,7 @@
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
 	xfs_inode_log_item_t	*iip,
-	int			whichfork,
-	xfs_buf_t		*bp)
+	int			whichfork)
 {
 	char			*cp;
 	xfs_ifork_t		*ifp;

diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h
index eb329a1..7d3b1ed 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/xfs_inode_fork.h

@@ -127,8 +127,7 @@
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
 void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
-				struct xfs_inode_log_item *, int,
-				struct xfs_buf *);
+				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
 void		xfs_iroot_realloc(struct xfs_inode *, int, int);

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 686889b..a640137 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c

@@ -145,34 +145,6 @@
 		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
 }
 
-/*
- * If this is a v1 format inode, then we need to log it as such.  This means
- * that we have to copy the link count from the new field to the old.  We
- * don't have to worry about the new fields, because nothing trusts them as
- * long as the old inode version number is there.
- */
-STATIC void
-xfs_inode_item_format_v1_inode(
-	struct xfs_inode	*ip)
-{
-	if (!xfs_sb_version_hasnlink(&ip->i_mount->m_sb)) {
-		/*
-		 * Convert it back.
-		 */
-		ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
-		ip->i_d.di_onlink = ip->i_d.di_nlink;
-	} else {
-		/*
-		 * The superblock version has already been bumped,
-		 * so just make the conversion to the new inode
-		 * format permanent.
-		 */
-		ip->i_d.di_version = 2;
-		ip->i_d.di_onlink = 0;
-		memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-	}
-}
-
 STATIC void
 xfs_inode_item_format_data_fork(
 	struct xfs_inode_log_item *iip,
@@ -370,6 +342,8 @@
 	struct xfs_inode_log_format *ilf;
 	struct xfs_log_iovec	*vecp = NULL;
 
+	ASSERT(ip->i_d.di_version > 1);
+
 	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
 	ilf->ilf_type = XFS_LI_INODE;
 	ilf->ilf_ino = ip->i_ino;
@@ -380,8 +354,6 @@
 	ilf->ilf_size = 2; /* format + core */
 	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
 
-	if (ip->i_d.di_version == 1)
-		xfs_inode_item_format_v1_inode(ip);
 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
 			&ip->i_d,
 			xfs_icdinode_size(ip->i_d.di_version));

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0b18776..8bc1bbc 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c

@@ -543,10 +543,11 @@
 
 	ops = memdup_user(am_hreq.ops, size);
 	if (IS_ERR(ops)) {
-		error = PTR_ERR(ops);
+		error = -PTR_ERR(ops);
 		goto out_dput;
 	}
 
+	error = ENOMEM;
 	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
 	if (!attr_name)
 		goto out_kfree_ops;
@@ -556,7 +557,7 @@
 		ops[i].am_error = strncpy_from_user((char *)attr_name,
 				ops[i].am_attrname, MAXNAMELEN);
 		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-			error = -ERANGE;
+			error = ERANGE;
 		if (ops[i].am_error < 0)
 			break;
 
@@ -1215,7 +1216,7 @@
 		 * cleared upon successful return from chown()
 		 */
 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !inode_capable(VFS_I(ip), CAP_FSETID))
+		    !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 
 		/*
@@ -1227,15 +1228,8 @@
 				olddquot = xfs_qm_vop_chown(tp, ip,
 							&ip->i_pdquot, pdqp);
 			}
+			ASSERT(ip->i_d.di_version > 1);
 			xfs_set_projid(ip, fa->fsx_projid);
-
-			/*
-			 * We may have to rev the inode as well as
-			 * the superblock version number since projids didn't
-			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-			 */
-			if (ip->i_d.di_version == 1)
-				xfs_bump_ino_vers2(tp, ip);
 		}
 
 	}

diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index a7992f8..944d5ba 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c

@@ -424,10 +424,11 @@
 
 	ops = memdup_user(compat_ptr(am_hreq.ops), size);
 	if (IS_ERR(ops)) {
-		error = PTR_ERR(ops);
+		error = -PTR_ERR(ops);
 		goto out_dput;
 	}
 
+	error = ENOMEM;
 	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
 	if (!attr_name)
 		goto out_kfree_ops;
@@ -438,7 +439,7 @@
 				compat_ptr(ops[i].am_attrname),
 				MAXNAMELEN);
 		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-			error = -ERANGE;
+			error = ERANGE;
 		if (ops[i].am_error < 0)
 			break;
 

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3b80eba..6c5eb4c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c

@@ -730,7 +730,7 @@
 			 */
 			nimaps = 1;
 			end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
-			error = xfs_bmap_last_offset(NULL, ip, &last_block,
+			error = xfs_bmap_last_offset(ip, &last_block,
 							XFS_DATA_FORK);
 			if (error)
 				goto trans_cancel;

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 36d6303..205613a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c

@@ -829,22 +829,34 @@
 	 */
 	inode_dio_wait(inode);
 
+	/*
+	 * Do all the page cache truncate work outside the transaction context
+	 * as the "lock" order is page lock->log space reservation.  i.e.
+	 * locking pages inside the transaction can ABBA deadlock with
+	 * writeback. We have to do the VFS inode size update before we truncate
+	 * the pagecache, however, to avoid racing with page faults beyond the
+	 * new EOF they are not serialised against truncate operations except by
+	 * page locks and size updates.
+	 *
+	 * Hence we are in a situation where a truncate can fail with ENOMEM
+	 * from xfs_trans_reserve(), but having already truncated the in-memory
+	 * version of the file (i.e. made user visible changes). There's not
+	 * much we can do about this, except to hope that the caller sees ENOMEM
+	 * and retries the truncate operation.
+	 */
 	error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
 	if (error)
 		return error;
+	truncate_setsize(inode, newsize);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
 	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 	if (error)
 		goto out_trans_cancel;
 
-	truncate_setsize(inode, newsize);
-
 	commit_flags = XFS_TRANS_RELEASE_LOG_RES;
 	lock_flags |= XFS_ILOCK_EXCL;
-
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
 	xfs_trans_ijoin(tp, ip, 0);
 
 	/*

diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f463382..cb64f22 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c

@@ -270,7 +270,8 @@
 		/*
 		 * Allocate and initialize a btree cursor for ialloc btree.
 		 */
-		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
+					    XFS_BTNUM_INO);
 		irbp = irbuf;
 		irbufend = irbuf + nirbuf;
 		end_of_ag = 0;
@@ -621,7 +622,8 @@
 				agino = 0;
 				continue;
 			}
-			cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+			cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
+						    XFS_BTNUM_INO);
 			error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
 						 &tmp);
 			if (error) {

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a5f8bd9..292308d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c

@@ -1165,7 +1165,7 @@
 	/*
 	 * Race to shutdown the filesystem if we see an error.
 	 */
-	if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
+	if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
 			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
 		xfs_buf_ioerror_alert(bp, __func__);
 		xfs_buf_stale(bp);
@@ -3952,11 +3952,14 @@
 		retval = xlog_state_ioerror(log);
 		spin_unlock(&log->l_icloglock);
 	}
+
 	/*
-	 * Wake up everybody waiting on xfs_log_force.
-	 * Callback all log item committed functions as if the
-	 * log writes were completed.
+	 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
+	 * as if the log writes were completed. The abort handling in the log
+	 * item committed callback functions will do this again under lock to
+	 * avoid races.
 	 */
+	wake_up_all(&log->l_cilp->xc_commit_wait);
 	xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
 
 #ifdef XFSERRORDEBUG

diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2c40044..84e0deb 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h

@@ -24,7 +24,8 @@
 	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
 	struct xfs_log_item	*lv_item;	/* owner */
 	char			*lv_buf;	/* formatted buffer */
-	int			lv_buf_len;	/* size of formatted buffer */
+	int			lv_bytes;	/* accounted space in buffer */
+	int			lv_buf_len;	/* aligned size of buffer */
 	int			lv_size;	/* size of allocated lv */
 };
 
@@ -52,15 +53,21 @@
 	return vec->i_addr;
 }
 
+/*
+ * We need to make sure the next buffer is naturally aligned for the biggest
+ * basic data type we put into it.  We already accounted for this padding when
+ * sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we have to
+ * track the space used by the log vectors separately to prevent log space hangs
+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
+ * CIL context ticket.
+ */
 static inline void
 xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 {
-	/*
-	 * We need to make sure the next buffer is naturally aligned for the
-	 * biggest basic data type we put into it.  We already accounted for
-	 * this when sizing the buffer.
-	 */
 	lv->lv_buf_len += round_up(len, sizeof(uint64_t));
+	lv->lv_bytes += len;
 	vec->i_len = len;
 }
 

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7e54553..b3425b3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c

@@ -97,7 +97,7 @@
 {
 	/* Account for the new LV being passed in */
 	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
-		*diff_len += lv->lv_buf_len;
+		*diff_len += lv->lv_bytes;
 		*diff_iovecs += lv->lv_niovecs;
 	}
 
@@ -111,7 +111,7 @@
 	else if (old_lv != lv) {
 		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
 
-		*diff_len -= old_lv->lv_buf_len;
+		*diff_len -= old_lv->lv_bytes;
 		*diff_iovecs -= old_lv->lv_niovecs;
 		kmem_free(old_lv);
 	}
@@ -239,7 +239,7 @@
 			 * that the space reservation accounting is correct.
 			 */
 			*diff_iovecs -= lv->lv_niovecs;
-			*diff_len -= lv->lv_buf_len;
+			*diff_len -= lv->lv_bytes;
 		} else {
 			/* allocate new data chunk */
 			lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
@@ -259,6 +259,7 @@
 
 		/* The allocated data region lies beyond the iovec region */
 		lv->lv_buf_len = 0;
+		lv->lv_bytes = 0;
 		lv->lv_buf = (char *)lv + buf_size - nbytes;
 		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
 
@@ -385,7 +386,15 @@
 	xfs_extent_busy_clear(mp, &ctx->busy_extents,
 			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
 
+	/*
+	 * If we are aborting the commit, wake up anyone waiting on the
+	 * committing list.  If we don't, then a shutdown we can leave processes
+	 * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
+	 * will never happen because we aborted it.
+	 */
 	spin_lock(&ctx->cil->xc_push_lock);
+	if (abort)
+		wake_up_all(&ctx->cil->xc_commit_wait);
 	list_del(&ctx->committing);
 	spin_unlock(&ctx->cil->xc_push_lock);
 
@@ -564,8 +573,18 @@
 	spin_lock(&cil->xc_push_lock);
 	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
 		/*
+		 * Avoid getting stuck in this loop because we were woken by the
+		 * shutdown, but then went back to sleep once already in the
+		 * shutdown state.
+		 */
+		if (XLOG_FORCED_SHUTDOWN(log)) {
+			spin_unlock(&cil->xc_push_lock);
+			goto out_abort_free_ticket;
+		}
+
+		/*
 		 * Higher sequences will wait for this one so skip them.
-		 * Don't wait for own own sequence, either.
+		 * Don't wait for our own sequence, either.
 		 */
 		if (new_ctx->sequence >= ctx->sequence)
 			continue;
@@ -810,6 +829,13 @@
 	 */
 	spin_lock(&cil->xc_push_lock);
 	list_for_each_entry(ctx, &cil->xc_committing, committing) {
+		/*
+		 * Avoid getting stuck in this loop because we were woken by the
+		 * shutdown, but then went back to sleep once already in the
+		 * shutdown state.
+		 */
+		if (XLOG_FORCED_SHUTDOWN(log))
+			goto out_shutdown;
 		if (ctx->sequence > sequence)
 			continue;
 		if (!ctx->commit_lsn) {
@@ -833,14 +859,12 @@
 	 * push sequence after the above wait loop and the CIL still contains
 	 * dirty objects.
 	 *
-	 * When the push occurs, it will empty the CIL and
-	 * atomically increment the currect sequence past the push sequence and
-	 * move it into the committing list. Of course, if the CIL is clean at
-	 * the time of the push, it won't have pushed the CIL at all, so in that
-	 * case we should try the push for this sequence again from the start
-	 * just in case.
+	 * When the push occurs, it will empty the CIL and atomically increment
+	 * the currect sequence past the push sequence and move it into the
+	 * committing list. Of course, if the CIL is clean at the time of the
+	 * push, it won't have pushed the CIL at all, so in that case we should
+	 * try the push for this sequence again from the start just in case.
 	 */
-
 	if (sequence == cil->xc_current_sequence &&
 	    !list_empty(&cil->xc_cil)) {
 		spin_unlock(&cil->xc_push_lock);
@@ -849,6 +873,17 @@
 
 	spin_unlock(&cil->xc_push_lock);
 	return commit_lsn;
+
+	/*
+	 * We detected a shutdown in progress. We need to trigger the log force
+	 * to pass through it's iclog state machine error handling, even though
+	 * we are already in a shutdown state. Hence we can't return
+	 * NULLCOMMITLSN here as that has special meaning to log forces (i.e.
+	 * LSN is already stable), so we return a zero LSN instead.
+	 */
+out_shutdown:
+	spin_unlock(&cil->xc_push_lock);
+	return 0;
 }
 
 /*

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index bce53ac..981af0f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c

@@ -2138,7 +2138,9 @@
 			bp->b_ops = &xfs_allocbt_buf_ops;
 			break;
 		case XFS_IBT_CRC_MAGIC:
+		case XFS_FIBT_CRC_MAGIC:
 		case XFS_IBT_MAGIC:
+		case XFS_FIBT_MAGIC:
 			bp->b_ops = &xfs_inobt_buf_ops;
 			break;
 		case XFS_BMAP_CRC_MAGIC:
@@ -3145,7 +3147,7 @@
 		}
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 
 	return 0;
@@ -3520,8 +3522,7 @@
 
 STATIC int
 xlog_recover_unmount_trans(
-	struct xlog		*log,
-	struct xlog_recover	*trans)
+	struct xlog		*log)
 {
 	/* Do nothing now */
 	xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
@@ -3595,7 +3596,7 @@
 								trans, pass);
 				break;
 			case XLOG_UNMOUNT_TRANS:
-				error = xlog_recover_unmount_trans(log, trans);
+				error = xlog_recover_unmount_trans(log);
 				break;
 			case XLOG_WAS_CONT_TRANS:
 				error = xlog_recover_add_to_cont_trans(log,
@@ -3757,7 +3758,7 @@
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
 out:
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 	return error;
 }

diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c
index 2af1a0a..ee7e0e8 100644
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/xfs_log_rlimit.c

@@ -42,7 +42,7 @@
 	int			size;
 	int			nblks;
 
-	size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) -
+	size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -
 	       MAXNAMELEN - 1;
 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
 	nblks += XFS_B_TO_FSB(mp, size);

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 944f3d9..3507cd0 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c

@@ -323,8 +323,19 @@
 	/*
 	 * Initialize the mount structure from the superblock.
 	 */
-	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
-	xfs_sb_quota_from_disk(&mp->m_sb);
+	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
+	xfs_sb_quota_from_disk(sbp);
+
+	/*
+	 * If we haven't validated the superblock, do so now before we try
+	 * to check the sector size and reread the superblock appropriately.
+	 */
+	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+		if (loud)
+			xfs_warn(mp, "Invalid superblock magic number");
+		error = EINVAL;
+		goto release_buf;
+	}
 
 	/*
 	 * We must be able to do sector-sized and sector-aligned IO.
@@ -337,11 +348,11 @@
 		goto release_buf;
 	}
 
-	/*
-	 * Re-read the superblock so the buffer is correctly sized,
-	 * and properly verified.
-	 */
 	if (buf_ops == NULL) {
+		/*
+		 * Re-read the superblock so the buffer is correctly sized,
+		 * and properly verified.
+		 */
 		xfs_buf_relse(bp);
 		sector_size = sbp->sb_sectsize;
 		buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
@@ -697,6 +708,12 @@
 			mp->m_update_flags |= XFS_SB_VERSIONNUM;
 	}
 
+	/* always use v2 inodes by default now */
+	if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
+		mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
+		mp->m_update_flags |= XFS_SB_VERSIONNUM;
+	}
+
 	/*
 	 * Check if sb_agblocks is aligned at stripe boundary
 	 * If sb_agblocks is NOT aligned turn off m_dalign since
@@ -774,12 +791,11 @@
 
 	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
 
-	xfs_dir_mount(mp);
-
-	/*
-	 * Initialize the attribute manager's entries.
-	 */
-	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
+	error = xfs_da_mount(mp);
+	if (error) {
+		xfs_warn(mp, "Failed dir/attr init: %d", error);
+		goto out_remove_uuid;
+	}
 
 	/*
 	 * Initialize the precomputed transaction reservations values.
@@ -794,7 +810,7 @@
 	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
 	if (error) {
 		xfs_warn(mp, "Failed per-ag init: %d", error);
-		goto out_remove_uuid;
+		goto out_free_dir;
 	}
 
 	if (!sbp->sb_logblocks) {
@@ -969,6 +985,8 @@
 	xfs_wait_buftarg(mp->m_ddev_targp);
  out_free_perag:
 	xfs_free_perag(mp);
+ out_free_dir:
+	xfs_da_unmount(mp);
  out_remove_uuid:
 	xfs_uuid_unmount(mp);
  out:
@@ -1046,6 +1064,7 @@
 				"Freespace may not be correct on next mount.");
 
 	xfs_log_unmount(mp);
+	xfs_da_unmount(mp);
 	xfs_uuid_unmount(mp);
 
 #if defined(DEBUG)

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a466c5e..7295a0b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h

@@ -27,6 +27,7 @@
 struct xfs_ail;
 struct xfs_quotainfo;
 struct xfs_dir_ops;
+struct xfs_da_geometry;
 
 #ifdef HAVE_PERCPU_SB
 
@@ -96,6 +97,8 @@
 	uint			m_readio_blocks; /* min read size blocks */
 	uint			m_writeio_log;	/* min write size log bytes */
 	uint			m_writeio_blocks; /* min write size blocks */
+	struct xfs_da_geometry	*m_dir_geo;	/* directory block geometry */
+	struct xfs_da_geometry	*m_attr_geo;	/* attribute block geometry */
 	struct xlog		*m_log;		/* log specific stuff */
 	int			m_logbufs;	/* number of log buffers */
 	int			m_logbsize;	/* size of each log buffer */
@@ -131,8 +134,6 @@
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_dmevmask;	/* DMI events for this FS */
 	__uint64_t		m_flags;	/* global mount flags */
-	uint			m_dir_node_ents; /* #entries in a dir danode */
-	uint			m_attr_node_ents; /* #entries in attr danode */
 	int			m_ialloc_inos;	/* inodes in inode allocation */
 	int			m_ialloc_blks;	/* blocks in inode allocation */
 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
@@ -145,17 +146,10 @@
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
 	int			m_sinoalign;	/* stripe unit inode alignment */
-	int			m_attr_magicpct;/* 37% of the blocksize */
-	int			m_dir_magicpct;	/* 37% of the dir blocksize */
 	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */
 	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */
 	const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
 	const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
-	int			m_dirblksize;	/* directory block sz--bytes */
-	int			m_dirblkfsbs;	/* directory block sz--fsbs */
-	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */
-	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */
-	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */
 	uint			m_chsize;	/* size of next field */
 	atomic_t		m_active_trans;	/* number trans frozen */
 #ifdef HAVE_PERCPU_SB

diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4aff563..f99b493 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c

@@ -100,14 +100,20 @@
  * likely result in a loop in one of the lists.  That's a sure-fire recipe for
  * an infinite loop in the code.
  */
-typedef struct xfs_mru_cache_elem
-{
-	struct list_head list_node;
-	unsigned long	key;
-	void		*value;
-} xfs_mru_cache_elem_t;
+struct xfs_mru_cache {
+	struct radix_tree_root	store;     /* Core storage data structure.  */
+	struct list_head	*lists;    /* Array of lists, one per grp.  */
+	struct list_head	reap_list; /* Elements overdue for reaping. */
+	spinlock_t		lock;      /* Lock to protect this struct.  */
+	unsigned int		grp_count; /* Number of discrete groups.    */
+	unsigned int		grp_time;  /* Time period spanned by grps.  */
+	unsigned int		lru_grp;   /* Group containing time zero.   */
+	unsigned long		time_zero; /* Time first element was added. */
+	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+	struct delayed_work	work;      /* Workqueue data for reaping.   */
+	unsigned int		queued;	   /* work has been queued */
+};
 
-static kmem_zone_t		*xfs_mru_elem_zone;
 static struct workqueue_struct	*xfs_mru_reap_wq;
 
 /*
@@ -129,12 +135,12 @@
  */
 STATIC unsigned long
 _xfs_mru_cache_migrate(
-	xfs_mru_cache_t	*mru,
-	unsigned long	now)
+	struct xfs_mru_cache	*mru,
+	unsigned long		now)
 {
-	unsigned int	grp;
-	unsigned int	migrated = 0;
-	struct list_head *lru_list;
+	unsigned int		grp;
+	unsigned int		migrated = 0;
+	struct list_head	*lru_list;
 
 	/* Nothing to do if the data store is empty. */
 	if (!mru->time_zero)
@@ -193,11 +199,11 @@
  */
 STATIC void
 _xfs_mru_cache_list_insert(
-	xfs_mru_cache_t		*mru,
-	xfs_mru_cache_elem_t	*elem)
+	struct xfs_mru_cache	*mru,
+	struct xfs_mru_cache_elem *elem)
 {
-	unsigned int	grp = 0;
-	unsigned long	now = jiffies;
+	unsigned int		grp = 0;
+	unsigned long		now = jiffies;
 
 	/*
 	 * If the data store is empty, initialise time zero, leave grp set to
@@ -231,10 +237,10 @@
  */
 STATIC void
 _xfs_mru_cache_clear_reap_list(
-	xfs_mru_cache_t		*mru) __releases(mru->lock) __acquires(mru->lock)
-
+	struct xfs_mru_cache	*mru)
+		__releases(mru->lock) __acquires(mru->lock)
 {
-	xfs_mru_cache_elem_t	*elem, *next;
+	struct xfs_mru_cache_elem *elem, *next;
 	struct list_head	tmp;
 
 	INIT_LIST_HEAD(&tmp);
@@ -252,15 +258,8 @@
 	spin_unlock(&mru->lock);
 
 	list_for_each_entry_safe(elem, next, &tmp, list_node) {
-
-		/* Remove the element from the reap list. */
 		list_del_init(&elem->list_node);
-
-		/* Call the client's free function with the key and value pointer. */
-		mru->free_func(elem->key, elem->value);
-
-		/* Free the element structure. */
-		kmem_zone_free(xfs_mru_elem_zone, elem);
+		mru->free_func(elem);
 	}
 
 	spin_lock(&mru->lock);
@@ -277,7 +276,8 @@
 _xfs_mru_cache_reap(
 	struct work_struct	*work)
 {
-	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work);
+	struct xfs_mru_cache	*mru =
+		container_of(work, struct xfs_mru_cache, work.work);
 	unsigned long		now, next;
 
 	ASSERT(mru && mru->lists);
@@ -304,28 +304,16 @@
 int
 xfs_mru_cache_init(void)
 {
-	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
-	                                 "xfs_mru_cache_elem");
-	if (!xfs_mru_elem_zone)
-		goto out;
-
 	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
 	if (!xfs_mru_reap_wq)
-		goto out_destroy_mru_elem_zone;
-
+		return -ENOMEM;
 	return 0;
-
- out_destroy_mru_elem_zone:
-	kmem_zone_destroy(xfs_mru_elem_zone);
- out:
-	return -ENOMEM;
 }
 
 void
 xfs_mru_cache_uninit(void)
 {
 	destroy_workqueue(xfs_mru_reap_wq);
-	kmem_zone_destroy(xfs_mru_elem_zone);
 }
 
 /*
@@ -336,14 +324,14 @@
  */
 int
 xfs_mru_cache_create(
-	xfs_mru_cache_t		**mrup,
+	struct xfs_mru_cache	**mrup,
 	unsigned int		lifetime_ms,
 	unsigned int		grp_count,
 	xfs_mru_cache_free_func_t free_func)
 {
-	xfs_mru_cache_t	*mru = NULL;
-	int		err = 0, grp;
-	unsigned int	grp_time;
+	struct xfs_mru_cache	*mru = NULL;
+	int			err = 0, grp;
+	unsigned int		grp_time;
 
 	if (mrup)
 		*mrup = NULL;
@@ -400,7 +388,7 @@
  */
 static void
 xfs_mru_cache_flush(
-	xfs_mru_cache_t		*mru)
+	struct xfs_mru_cache	*mru)
 {
 	if (!mru || !mru->lists)
 		return;
@@ -420,7 +408,7 @@
 
 void
 xfs_mru_cache_destroy(
-	xfs_mru_cache_t		*mru)
+	struct xfs_mru_cache	*mru)
 {
 	if (!mru || !mru->lists)
 		return;
@@ -438,38 +426,30 @@
  */
 int
 xfs_mru_cache_insert(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key,
-	void		*value)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key,
+	struct xfs_mru_cache_elem *elem)
 {
-	xfs_mru_cache_elem_t *elem;
+	int			error;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
 		return EINVAL;
 
-	elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
-	if (!elem)
+	if (radix_tree_preload(GFP_KERNEL))
 		return ENOMEM;
 
-	if (radix_tree_preload(GFP_KERNEL)) {
-		kmem_zone_free(xfs_mru_elem_zone, elem);
-		return ENOMEM;
-	}
-
 	INIT_LIST_HEAD(&elem->list_node);
 	elem->key = key;
-	elem->value = value;
 
 	spin_lock(&mru->lock);
-
-	radix_tree_insert(&mru->store, key, elem);
+	error = -radix_tree_insert(&mru->store, key, elem);
 	radix_tree_preload_end();
-	_xfs_mru_cache_list_insert(mru, elem);
-
+	if (!error)
+		_xfs_mru_cache_list_insert(mru, elem);
 	spin_unlock(&mru->lock);
 
-	return 0;
+	return error;
 }
 
 /*
@@ -478,13 +458,12 @@
  * the client data pointer for the removed element is returned, otherwise this
  * function will return a NULL pointer.
  */
-void *
+struct xfs_mru_cache_elem *
 xfs_mru_cache_remove(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	xfs_mru_cache_elem_t *elem;
-	void		*value = NULL;
+	struct xfs_mru_cache_elem *elem;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
@@ -492,17 +471,11 @@
 
 	spin_lock(&mru->lock);
 	elem = radix_tree_delete(&mru->store, key);
-	if (elem) {
-		value = elem->value;
+	if (elem)
 		list_del(&elem->list_node);
-	}
-
 	spin_unlock(&mru->lock);
 
-	if (elem)
-		kmem_zone_free(xfs_mru_elem_zone, elem);
-
-	return value;
+	return elem;
 }
 
 /*
@@ -511,13 +484,14 @@
  */
 void
 xfs_mru_cache_delete(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	void		*value = xfs_mru_cache_remove(mru, key);
+	struct xfs_mru_cache_elem *elem;
 
-	if (value)
-		mru->free_func(key, value);
+	elem = xfs_mru_cache_remove(mru, key);
+	if (elem)
+		mru->free_func(elem);
 }
 
 /*
@@ -540,12 +514,12 @@
  * status, we need to help it get it right by annotating the path that does
  * not release the lock.
  */
-void *
+struct xfs_mru_cache_elem *
 xfs_mru_cache_lookup(
-	xfs_mru_cache_t	*mru,
-	unsigned long	key)
+	struct xfs_mru_cache	*mru,
+	unsigned long		key)
 {
-	xfs_mru_cache_elem_t *elem;
+	struct xfs_mru_cache_elem *elem;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
@@ -560,7 +534,7 @@
 	} else
 		spin_unlock(&mru->lock);
 
-	return elem ? elem->value : NULL;
+	return elem;
 }
 
 /*
@@ -570,7 +544,8 @@
  */
 void
 xfs_mru_cache_done(
-	xfs_mru_cache_t	*mru) __releases(mru->lock)
+	struct xfs_mru_cache	*mru)
+		__releases(mru->lock)
 {
 	spin_unlock(&mru->lock);
 }

diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 36dd3ec..fb5245b 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h

@@ -18,24 +18,15 @@
 #ifndef __XFS_MRU_CACHE_H__
 #define __XFS_MRU_CACHE_H__
 
+struct xfs_mru_cache;
+
+struct xfs_mru_cache_elem {
+	struct list_head list_node;
+	unsigned long	key;
+};
 
 /* Function pointer type for callback to free a client's data pointer. */
-typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
-
-typedef struct xfs_mru_cache
-{
-	struct radix_tree_root	store;     /* Core storage data structure.  */
-	struct list_head	*lists;    /* Array of lists, one per grp.  */
-	struct list_head	reap_list; /* Elements overdue for reaping. */
-	spinlock_t		lock;      /* Lock to protect this struct.  */
-	unsigned int		grp_count; /* Number of discrete groups.    */
-	unsigned int		grp_time;  /* Time period spanned by grps.  */
-	unsigned int		lru_grp;   /* Group containing time zero.   */
-	unsigned long		time_zero; /* Time first element was added. */
-	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
-	struct delayed_work	work;      /* Workqueue data for reaping.   */
-	unsigned int		queued;	   /* work has been queued */
-} xfs_mru_cache_t;
+typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
 
 int xfs_mru_cache_init(void);
 void xfs_mru_cache_uninit(void);
@@ -44,10 +35,12 @@
 			     xfs_mru_cache_free_func_t free_func);
 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
-				void *value);
-void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+		struct xfs_mru_cache_elem *elem);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
-void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_done(struct xfs_mru_cache *mru);
 
 #endif /* __XFS_MRU_CACHE_H__ */

diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index dc977b6..6d26759 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c

@@ -193,47 +193,6 @@
 }
 
 /*
- * Release the group or project dquot pointers the user dquots maybe carrying
- * around as a hint, and proceed to purge the user dquot cache if requested.
-*/
-STATIC int
-xfs_qm_dqpurge_hints(
-	struct xfs_dquot	*dqp,
-	void			*data)
-{
-	struct xfs_dquot	*gdqp = NULL;
-	struct xfs_dquot	*pdqp = NULL;
-	uint			flags = *((uint *)data);
-
-	xfs_dqlock(dqp);
-	if (dqp->dq_flags & XFS_DQ_FREEING) {
-		xfs_dqunlock(dqp);
-		return EAGAIN;
-	}
-
-	/* If this quota has a hint attached, prepare for releasing it now */
-	gdqp = dqp->q_gdquot;
-	if (gdqp)
-		dqp->q_gdquot = NULL;
-
-	pdqp = dqp->q_pdquot;
-	if (pdqp)
-		dqp->q_pdquot = NULL;
-
-	xfs_dqunlock(dqp);
-
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
-	if (pdqp)
-		xfs_qm_dqrele(pdqp);
-
-	if (flags & XFS_QMOPT_UQUOTA)
-		return xfs_qm_dqpurge(dqp, NULL);
-
-	return 0;
-}
-
-/*
  * Purge the dquot cache.
  */
 void
@@ -241,18 +200,8 @@
 	struct xfs_mount	*mp,
 	uint			flags)
 {
-	/*
-	 * We have to release group/project dquot hint(s) from the user dquot
-	 * at first if they are there, otherwise we would run into an infinite
-	 * loop while walking through radix tree to purge other type of dquots
-	 * since their refcount is not zero if the user dquot refers to them
-	 * as hint.
-	 *
-	 * Call the special xfs_qm_dqpurge_hints() will end up go through the
-	 * general xfs_qm_dqpurge() against user dquot cache if requested.
-	 */
-	xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
-
+	if (flags & XFS_QMOPT_UQUOTA)
+		xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
 	if (flags & XFS_QMOPT_GQUOTA)
 		xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
 	if (flags & XFS_QMOPT_PQUOTA)
@@ -409,7 +358,6 @@
 	xfs_dqid_t	id,
 	uint		type,
 	uint		doalloc,
-	xfs_dquot_t	*udqhint, /* hint */
 	xfs_dquot_t	**IO_idqpp)
 {
 	xfs_dquot_t	*dqp;
@@ -419,9 +367,9 @@
 	error = 0;
 
 	/*
-	 * See if we already have it in the inode itself. IO_idqpp is
-	 * &i_udquot or &i_gdquot. This made the code look weird, but
-	 * made the logic a lot simpler.
+	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
+	 * or &i_gdquot. This made the code look weird, but made the logic a lot
+	 * simpler.
 	 */
 	dqp = *IO_idqpp;
 	if (dqp) {
@@ -430,49 +378,10 @@
 	}
 
 	/*
-	 * udqhint is the i_udquot field in inode, and is non-NULL only
-	 * when the type arg is group/project. Its purpose is to save a
-	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-	 * the user dquot.
-	 */
-	if (udqhint) {
-		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-		xfs_dqlock(udqhint);
-
-		/*
-		 * No need to take dqlock to look at the id.
-		 *
-		 * The ID can't change until it gets reclaimed, and it won't
-		 * be reclaimed as long as we have a ref from inode and we
-		 * hold the ilock.
-		 */
-		if (type == XFS_DQ_GROUP)
-			dqp = udqhint->q_gdquot;
-		else
-			dqp = udqhint->q_pdquot;
-		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-			ASSERT(*IO_idqpp == NULL);
-
-			*IO_idqpp = xfs_qm_dqhold(dqp);
-			xfs_dqunlock(udqhint);
-			return 0;
-		}
-
-		/*
-		 * We can't hold a dquot lock when we call the dqget code.
-		 * We'll deadlock in no time, because of (not conforming to)
-		 * lock ordering - the inodelock comes before any dquot lock,
-		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
-		 */
-		xfs_dqunlock(udqhint);
-	}
-
-	/*
-	 * Find the dquot from somewhere. This bumps the
-	 * reference count of dquot and returns it locked.
-	 * This can return ENOENT if dquot didn't exist on
-	 * disk and we didn't ask it to allocate;
-	 * ESRCH if quotas got turned off suddenly.
+	 * Find the dquot from somewhere. This bumps the reference count of
+	 * dquot and returns it locked.  This can return ENOENT if dquot didn't
+	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
+	 * turned off suddenly.
 	 */
 	error = xfs_qm_dqget(ip->i_mount, ip, id, type,
 			     doalloc | XFS_QMOPT_DOWARN, &dqp);
@@ -490,48 +399,6 @@
 	return 0;
 }
 
-
-/*
- * Given a udquot and group/project type, attach the group/project
- * dquot pointer to the udquot as a hint for future lookups.
- */
-STATIC void
-xfs_qm_dqattach_hint(
-	struct xfs_inode	*ip,
-	int			type)
-{
-	struct xfs_dquot **dqhintp;
-	struct xfs_dquot *dqp;
-	struct xfs_dquot *udq = ip->i_udquot;
-
-	ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-
-	xfs_dqlock(udq);
-
-	if (type == XFS_DQ_GROUP) {
-		dqp = ip->i_gdquot;
-		dqhintp = &udq->q_gdquot;
-	} else {
-		dqp = ip->i_pdquot;
-		dqhintp = &udq->q_pdquot;
-	}
-
-	if (*dqhintp) {
-		struct xfs_dquot *tmp;
-
-		if (*dqhintp == dqp)
-			goto done;
-
-		tmp = *dqhintp;
-		*dqhintp = NULL;
-		xfs_qm_dqrele(tmp);
-	}
-
-	*dqhintp = xfs_qm_dqhold(dqp);
-done:
-	xfs_dqunlock(udq);
-}
-
 static bool
 xfs_qm_need_dqattach(
 	struct xfs_inode	*ip)
@@ -562,7 +429,6 @@
 	uint		flags)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	uint		nquotas = 0;
 	int		error = 0;
 
 	if (!xfs_qm_need_dqattach(ip))
@@ -570,77 +436,39 @@
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-	if (XFS_IS_UQUOTA_ON(mp)) {
+	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
 						flags & XFS_QMOPT_DQALLOC,
-						NULL, &ip->i_udquot);
+						&ip->i_udquot);
 		if (error)
 			goto done;
-		nquotas++;
+		ASSERT(ip->i_udquot);
 	}
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
 						flags & XFS_QMOPT_DQALLOC,
-						ip->i_udquot, &ip->i_gdquot);
-		/*
-		 * Don't worry about the udquot that we may have
-		 * attached above. It'll get detached, if not already.
-		 */
+						&ip->i_gdquot);
 		if (error)
 			goto done;
-		nquotas++;
+		ASSERT(ip->i_gdquot);
 	}
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	if (XFS_IS_PQUOTA_ON(mp)) {
+	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
 		error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
 						flags & XFS_QMOPT_DQALLOC,
-						ip->i_udquot, &ip->i_pdquot);
-		/*
-		 * Don't worry about the udquot that we may have
-		 * attached above. It'll get detached, if not already.
-		 */
+						&ip->i_pdquot);
 		if (error)
 			goto done;
-		nquotas++;
+		ASSERT(ip->i_pdquot);
 	}
 
+done:
 	/*
-	 * Attach this group/project quota to the user quota as a hint.
-	 * This WON'T, in general, result in a thrash.
+	 * Don't worry about the dquots that we may have attached before any
+	 * error - they'll get detached later if it has not already been done.
 	 */
-	if (nquotas > 1 && ip->i_udquot) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-		ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp));
-		ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp));
-
-		/*
-		 * We do not have i_udquot locked at this point, but this check
-		 * is OK since we don't depend on the i_gdquot to be accurate
-		 * 100% all the time. It is just a hint, and this will
-		 * succeed in general.
-		 */
-		if (ip->i_udquot->q_gdquot != ip->i_gdquot)
-			xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP);
-
-		if (ip->i_udquot->q_pdquot != ip->i_pdquot)
-			xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ);
-	}
-
- done:
-#ifdef DEBUG
-	if (!error) {
-		if (XFS_IS_UQUOTA_ON(mp))
-			ASSERT(ip->i_udquot);
-		if (XFS_IS_GQUOTA_ON(mp))
-			ASSERT(ip->i_gdquot);
-		if (XFS_IS_PQUOTA_ON(mp))
-			ASSERT(ip->i_pdquot);
-	}
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
 	return error;
 }
 
@@ -865,8 +693,7 @@
 
 	/* Precalc some constants */
 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp,
-							qinf->qi_dqchunklen);
+	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
 
 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
 

diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3daf5ea..bbc813c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c

@@ -278,9 +278,10 @@
 	xfs_mount_t	*mp,
 	uint		flags)
 {
-	int		error;
+	int		error = EINVAL;
 
-	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
+	    (flags & ~XFS_DQ_ALLTYPES)) {
 		xfs_debug(mp, "%s: flags=%x m_qflags=%x",
 			__func__, flags, mp->m_qflags);
 		return XFS_ERROR(EINVAL);
@@ -959,7 +960,6 @@
 STATIC int
 xfs_dqrele_inode(
 	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
 	int			flags,
 	void			*args)
 {

diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h
index b3b2b10..137e209 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/xfs_quota_defs.h

@@ -156,6 +156,6 @@
 
 extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
 		       xfs_dqid_t id, uint type, uint flags, char *str);
-extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks);
+extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
 
 #endif	/* __XFS_QUOTA_H__ */

diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33caf..2ad1b98 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c

@@ -100,16 +100,36 @@
 		if (!XFS_IS_QUOTA_ON(mp))
 			return -EINVAL;
 		return -xfs_qm_scall_quotaoff(mp, flags);
-	case Q_XQUOTARM:
-		if (XFS_IS_QUOTA_ON(mp))
-			return -EINVAL;
-		return -xfs_qm_scall_trunc_qfiles(mp, flags);
 	}
 
 	return -EINVAL;
 }
 
 STATIC int
+xfs_fs_rm_xquota(
+	struct super_block	*sb,
+	unsigned int		uflags)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	unsigned int		flags = 0;
+	
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+
+	if (XFS_IS_QUOTA_ON(mp))
+		return -EINVAL;
+
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_USER;
+	if (uflags & FS_GROUP_QUOTA)
+		flags |= XFS_DQ_GROUP;
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_PROJ;
+
+	return -xfs_qm_scall_trunc_qfiles(mp, flags);
+}	
+
+STATIC int
 xfs_fs_get_dqblk(
 	struct super_block	*sb,
 	struct kqid		qid,
@@ -149,6 +169,7 @@
 	.get_xstatev		= xfs_fs_get_xstatev,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
+	.rm_xquota		= xfs_fs_rm_xquota,
 	.get_dqblk		= xfs_fs_get_dqblk,
 	.set_dqblk		= xfs_fs_set_dqblk,
 };

diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c
index b1f2fe8..f4dd697 100644
--- a/fs/xfs/xfs_rtbitmap.c
+++ b/fs/xfs/xfs_rtbitmap.c

@@ -74,7 +74,6 @@
 				   mp->m_bsize, 0, &bp, NULL);
 	if (error)
 		return error;
-	ASSERT(!xfs_buf_geterror(bp));
 	*bpp = bp;
 	return 0;
 }

diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 8baf61a..c3453b1 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c

@@ -291,7 +291,8 @@
 	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
 	    sbp->sb_dblocks == 0					||
 	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
-	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
+	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp)			||
+	    sbp->sb_shared_vn != 0)) {
 		xfs_notice(mp, "SB sanity check failed");
 		return XFS_ERROR(EFSCORRUPTED);
 	}
@@ -333,15 +334,6 @@
 		xfs_warn(mp, "Offline file system operation in progress!");
 		return XFS_ERROR(EFSCORRUPTED);
 	}
-
-	/*
-	 * Version 1 directory format has never worked on Linux.
-	 */
-	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
-		xfs_warn(mp, "file system using version 1 directory format");
-		return XFS_ERROR(ENOSYS);
-	}
-
 	return 0;
 }
 

diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index f7b2fe7..c43c2d6 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h

@@ -36,8 +36,6 @@
 #define	XFS_SB_VERSION_5	5		/* CRC enabled filesystem */
 #define	XFS_SB_VERSION_NUMBITS		0x000f
 #define	XFS_SB_VERSION_ALLFBITS		0xfff0
-#define	XFS_SB_VERSION_SASHFBITS	0xf000
-#define	XFS_SB_VERSION_REALFBITS	0x0ff0
 #define	XFS_SB_VERSION_ATTRBIT		0x0010
 #define	XFS_SB_VERSION_NLINKBIT		0x0020
 #define	XFS_SB_VERSION_QUOTABIT		0x0040
@@ -50,24 +48,15 @@
 #define	XFS_SB_VERSION_DIRV2BIT		0x2000
 #define	XFS_SB_VERSION_BORGBIT		0x4000	/* ASCII only case-insens. */
 #define	XFS_SB_VERSION_MOREBITSBIT	0x8000
-#define	XFS_SB_VERSION_OKSASHFBITS	\
-	(XFS_SB_VERSION_EXTFLGBIT | \
-	 XFS_SB_VERSION_DIRV2BIT | \
-	 XFS_SB_VERSION_BORGBIT)
-#define	XFS_SB_VERSION_OKREALFBITS	\
-	(XFS_SB_VERSION_ATTRBIT | \
-	 XFS_SB_VERSION_NLINKBIT | \
-	 XFS_SB_VERSION_QUOTABIT | \
-	 XFS_SB_VERSION_ALIGNBIT | \
-	 XFS_SB_VERSION_DALIGNBIT | \
-	 XFS_SB_VERSION_SHAREDBIT | \
-	 XFS_SB_VERSION_LOGV2BIT | \
-	 XFS_SB_VERSION_SECTORBIT | \
-	 XFS_SB_VERSION_MOREBITSBIT)
-#define	XFS_SB_VERSION_OKREALBITS	\
-	(XFS_SB_VERSION_NUMBITS | \
-	 XFS_SB_VERSION_OKREALFBITS | \
-	 XFS_SB_VERSION_OKSASHFBITS)
+
+/*
+ * Supported feature bit list is just all bits in the versionnum field because
+ * we've used them all up and understand them all. Except, of course, for the
+ * shared superblock bit, which nobody knows what it does and so is unsupported.
+ */
+#define	XFS_SB_VERSION_OKBITS		\
+	((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \
+		~XFS_SB_VERSION_SHAREDBIT)
 
 /*
  * There are two words to hold XFS "feature" bits: the original
@@ -76,7 +65,6 @@
  *
  * These defines represent bits in sb_features2.
  */
-#define XFS_SB_VERSION2_REALFBITS	0x00ffffff	/* Mask: features */
 #define XFS_SB_VERSION2_RESERVED1BIT	0x00000001
 #define XFS_SB_VERSION2_LAZYSBCOUNTBIT	0x00000002	/* Superblk counters */
 #define XFS_SB_VERSION2_RESERVED4BIT	0x00000004
@@ -86,16 +74,11 @@
 #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */
 #define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */
 
-#define	XFS_SB_VERSION2_OKREALFBITS	\
+#define	XFS_SB_VERSION2_OKBITS		\
 	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
 	 XFS_SB_VERSION2_ATTR2BIT	| \
 	 XFS_SB_VERSION2_PROJID32BIT	| \
 	 XFS_SB_VERSION2_FTYPE)
-#define	XFS_SB_VERSION2_OKSASHFBITS	\
-	(0)
-#define XFS_SB_VERSION2_OKREALBITS	\
-	(XFS_SB_VERSION2_OKREALFBITS |	\
-	 XFS_SB_VERSION2_OKSASHFBITS )
 
 /*
  * Superblock - in core version.  Must match the ondisk version below.
@@ -345,214 +328,140 @@
 
 #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
 
-static inline int xfs_sb_good_version(xfs_sb_t *sbp)
+/*
+ * The first XFS version we support is a v4 superblock with V2 directories.
+ */
+static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
 {
-	/* We always support version 1-3 */
-	if (sbp->sb_versionnum >= XFS_SB_VERSION_1 &&
-	    sbp->sb_versionnum <= XFS_SB_VERSION_3)
-		return 1;
+	if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
+		return false;
 
-	/* We support version 4 if all feature bits are supported */
-	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) {
-		if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) ||
-		    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
-		     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
-			return 0;
+	/* check for unknown features in the fs */
+	if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
+	    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
+	     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
+		return false;
 
-		if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
-			return 0;
-		return 1;
-	}
+	return true;
+}
+
+static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
+{
 	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
-		return 1;
-
-	return 0;
+		return true;
+	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
+		return xfs_sb_good_v4_features(sbp);
+	return false;
 }
 
 /*
  * Detect a mismatched features2 field.  Older kernels read/wrote
  * this into the wrong slot, so to be safe we keep them in sync.
  */
-static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp)
+static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
 {
-	return (sbp->sb_bad_features2 != sbp->sb_features2);
+	return sbp->sb_bad_features2 != sbp->sb_features2;
 }
 
-static inline unsigned xfs_sb_version_tonew(unsigned v)
+static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp)
 {
-	if (v == XFS_SB_VERSION_1)
-		return XFS_SB_VERSION_4;
-
-	if (v == XFS_SB_VERSION_2)
-		return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
-
-	return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT |
-		XFS_SB_VERSION_NLINKBIT;
+	return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT);
 }
 
-static inline unsigned xfs_sb_version_toold(unsigned v)
+static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)
 {
-	if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT))
-		return 0;
-	if (v & XFS_SB_VERSION_NLINKBIT)
-		return XFS_SB_VERSION_3;
-	if (v & XFS_SB_VERSION_ATTRBIT)
-		return XFS_SB_VERSION_2;
-	return XFS_SB_VERSION_1;
+	sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
 }
 
-static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp)
 {
-	return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
-		sbp->sb_versionnum == XFS_SB_VERSION_3 ||
-		(XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
+	return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
 }
 
-static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addquota(struct xfs_sb *sbp)
 {
-	if (sbp->sb_versionnum == XFS_SB_VERSION_1)
-		sbp->sb_versionnum = XFS_SB_VERSION_2;
-	else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
-		sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
-	else
-		sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
+	sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
 }
 
-static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp)
 {
-	return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
-		 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		  (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
-}
-
-static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
-{
-	if (sbp->sb_versionnum <= XFS_SB_VERSION_2)
-		sbp->sb_versionnum = XFS_SB_VERSION_3;
-	else
-		sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
-}
-
-static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
-}
-
-static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
-{
-	if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
-		sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
-	else
-		sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
-					XFS_SB_VERSION_QUOTABIT;
-}
-
-static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
 		(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
 }
 
-static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp)
 {
-	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
+	return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
 }
 
-static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
 {
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+	       (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
 }
 
-static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
 {
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT));
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+	       (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
 }
 
-static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
 {
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT));
+	return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
 }
 
-static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp)
 {
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT));
+	return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
 }
 
-static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
 {
-	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
-}
-
-static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
-}
-
-static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
-		(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT));
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+	       (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
 }
 
 /*
  * sb_features2 bit version macros.
- *
- * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
- *
- * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
- *	((xfs_sb_version_hasmorebits(sbp) &&
- *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
-
-static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
 	       (xfs_sb_version_hasmorebits(sbp) &&
 		(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
 }
 
-static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
 	       (xfs_sb_version_hasmorebits(sbp) &&
 		(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
 }
 
-static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)
 {
 	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
 	sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
+	sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT;
 }
 
-static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)
 {
 	sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+	sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
 	if (!sbp->sb_features2)
 		sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
 }
 
-static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
 	       (xfs_sb_version_hasmorebits(sbp) &&
 		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
 }
 
-static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
 {
 	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
 	sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
@@ -587,7 +496,9 @@
 	return (sbp->sb_features_compat & feature) != 0;
 }
 
-#define XFS_SB_FEAT_RO_COMPAT_ALL 0
+#define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_ALL \
+		(XFS_SB_FEAT_RO_COMPAT_FINOBT)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(
@@ -623,12 +534,12 @@
 /*
  * V5 superblock specific feature checks
  */
-static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
+static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp)
 {
 	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
 }
 
-static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp)
+static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
 {
 	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
 }
@@ -641,6 +552,12 @@
 		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
 }
 
+static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
+}
+
 /*
  * end of superblock version macros
  */

diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 4484e51..82404da 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h

@@ -238,7 +238,7 @@
 int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
 int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
 			uint32_t size, struct xfs_buf *bp);
-bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
 			uint32_t size, struct xfs_buf *bp);
 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
 				 struct xfs_inode *ip, struct xfs_ifork *ifp);

diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ce372b7..f224038 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c

@@ -59,6 +59,7 @@
 		{ "abtc2",		XFSSTAT_END_ABTC_V2		},
 		{ "bmbt2",		XFSSTAT_END_BMBT_V2		},
 		{ "ibt2",		XFSSTAT_END_IBT_V2		},
+		{ "fibt2",		XFSSTAT_END_FIBT_V2		},
 		/* we print both series of quota information together */
 		{ "qm",			XFSSTAT_END_QM			},
 	};

diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c03ad38..c8f238b 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h

@@ -183,7 +183,23 @@
 	__uint32_t		xs_ibt_2_alloc;
 	__uint32_t		xs_ibt_2_free;
 	__uint32_t		xs_ibt_2_moves;
-#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_IBT_V2+6)
+#define XFSSTAT_END_FIBT_V2		(XFSSTAT_END_IBT_V2+15)
+	__uint32_t		xs_fibt_2_lookup;
+	__uint32_t		xs_fibt_2_compare;
+	__uint32_t		xs_fibt_2_insrec;
+	__uint32_t		xs_fibt_2_delrec;
+	__uint32_t		xs_fibt_2_newroot;
+	__uint32_t		xs_fibt_2_killroot;
+	__uint32_t		xs_fibt_2_increment;
+	__uint32_t		xs_fibt_2_decrement;
+	__uint32_t		xs_fibt_2_lshift;
+	__uint32_t		xs_fibt_2_rshift;
+	__uint32_t		xs_fibt_2_split;
+	__uint32_t		xs_fibt_2_join;
+	__uint32_t		xs_fibt_2_alloc;
+	__uint32_t		xs_fibt_2_free;
+	__uint32_t		xs_fibt_2_moves;
+#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_FIBT_V2+6)
 	__uint32_t		xs_qm_dqreclaims;
 	__uint32_t		xs_qm_dqreclaim_misses;
 	__uint32_t		xs_qm_dquot_dups;

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3494eff..8f0333b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c

@@ -765,20 +765,18 @@
 	 * Setup xfs_mount buffer target pointers
 	 */
 	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
 	if (!mp->m_ddev_targp)
 		goto out_close_rtdev;
 
 	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-							mp->m_fsname);
+		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
 		if (!mp->m_rtdev_targp)
 			goto out_free_ddev_targ;
 	}
 
 	if (logdev && logdev != ddev) {
-		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-							mp->m_fsname);
+		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
 		if (!mp->m_logdev_targp)
 			goto out_free_rtdev_targ;
 	} else {
@@ -811,8 +809,7 @@
 {
 	int			error;
 
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-				    mp->m_sb.sb_sectsize);
+	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 	if (error)
 		return error;
 
@@ -822,14 +819,12 @@
 		if (xfs_sb_version_hassector(&mp->m_sb))
 			log_sector_size = mp->m_sb.sb_logsectsize;
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
-					    mp->m_sb.sb_blocksize,
 					    log_sector_size);
 		if (error)
 			return error;
 	}
 	if (mp->m_rtdev_targp) {
 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-					    mp->m_sb.sb_blocksize,
 					    mp->m_sb.sb_sectsize);
 		if (error)
 			return error;
@@ -1754,13 +1749,9 @@
 	if (error)
 		goto out_destroy_wq;
 
-	error = xfs_filestream_init();
-	if (error)
-		goto out_mru_cache_uninit;
-
 	error = xfs_buf_init();
 	if (error)
-		goto out_filestream_uninit;
+		goto out_mru_cache_uninit;
 
 	error = xfs_init_procfs();
 	if (error)
@@ -1787,8 +1778,6 @@
 	xfs_cleanup_procfs();
  out_buf_terminate:
 	xfs_buf_terminate();
- out_filestream_uninit:
-	xfs_filestream_uninit();
  out_mru_cache_uninit:
 	xfs_mru_cache_uninit();
  out_destroy_wq:
@@ -1807,7 +1796,6 @@
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
-	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
 	xfs_destroy_workqueues();
 	xfs_destroy_zones();

diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 52979aa..d69363c 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c

@@ -27,6 +27,7 @@
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_inode.h"
 #include "xfs_ialloc.h"
@@ -92,7 +93,7 @@
 
 		cur_chunk = bp->b_addr;
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
+			if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
 							byte_cnt, bp)) {
 				error = EFSCORRUPTED;
 				xfs_alert(mp,

diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index 9b32052..23c2f25 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c

@@ -80,7 +80,6 @@
  */
 bool
 xfs_symlink_hdr_ok(
-	struct xfs_mount	*mp,
 	xfs_ino_t		ino,
 	uint32_t		offset,
 	uint32_t		size,

diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index dee3279..1e85bcd 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c

@@ -46,6 +46,7 @@
 #include "xfs_log_recover.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_filestream.h"
 
 /*
  * We include this last to have the helpers above available for the trace

diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 65d8c79..152f827 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h

@@ -538,6 +538,64 @@
 DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
 
+DECLARE_EVENT_CLASS(xfs_filestream_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
+	TP_ARGS(ip, agno),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_agnumber_t, agno)
+		__field(int, streams)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->agno = agno;
+		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+	),
+	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->agno,
+		  __entry->streams)
+)
+#define DEFINE_FILESTREAM_EVENT(name) \
+DEFINE_EVENT(xfs_filestream_class, name, \
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
+	TP_ARGS(ip, agno))
+DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
+
+TRACE_EVENT(xfs_filestream_pick,
+	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno,
+		 xfs_extlen_t free, int nscan),
+	TP_ARGS(ip, agno, free, nscan),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_agnumber_t, agno)
+		__field(int, streams)
+		__field(xfs_extlen_t, free)
+		__field(int, nscan)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->agno = agno;
+		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+		__entry->free = free;
+		__entry->nscan = nscan;
+	),
+	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->agno,
+		  __entry->streams,
+		  __entry->free,
+		  __entry->nscan)
+);
+
 DECLARE_EVENT_CLASS(xfs_lock_class,
 	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
 		 unsigned long caller_ip),
@@ -1060,7 +1118,6 @@
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 54a5732..d039325 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c

@@ -827,7 +827,7 @@
 		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
 
 	spin_lock(&ailp->xa_lock);
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 }
 

diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a728735..cb0f3a8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c

@@ -173,7 +173,6 @@
  */
 void
 xfs_trans_ail_cursor_done(
-	struct xfs_ail		*ailp,
 	struct xfs_ail_cursor	*cur)
 {
 	cur->item = NULL;
@@ -368,7 +367,7 @@
 		 * If the AIL is empty or our push has reached the end we are
 		 * done now.
 		 */
-		xfs_trans_ail_cursor_done(ailp, &cur);
+		xfs_trans_ail_cursor_done(&cur);
 		spin_unlock(&ailp->xa_lock);
 		goto out_done;
 	}
@@ -453,7 +452,7 @@
 			break;
 		lsn = lip->li_lsn;
 	}
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->xa_lock);
 
 	if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))

diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 12e86af..bd12818 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h

@@ -133,8 +133,7 @@
 					xfs_lsn_t lsn);
 struct xfs_log_item *	xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur);
-void			xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
-					struct xfs_ail_cursor *cur);
+void			xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
 
 #if BITS_PER_LONG != 64
 static inline void

diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index ae36816..f2bda7c 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c

@@ -26,6 +26,7 @@
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_ialloc.h"
@@ -106,6 +107,47 @@
 }
 
 /*
+ * The free inode btree is a conditional feature and the log reservation
+ * requirements differ slightly from that of the traditional inode allocation
+ * btree. The finobt tracks records for inode chunks with at least one free
+ * inode. A record can be removed from the tree for an inode allocation
+ * or free and thus the finobt reservation is unconditional across:
+ *
+ * 	- inode allocation
+ * 	- inode free
+ * 	- inode chunk allocation
+ *
+ * The 'modify' param indicates to include the record modification scenario. The
+ * 'alloc' param indicates to include the reservation for free space btree
+ * modifications on behalf of finobt modifications. This is required only for
+ * transactions that do not already account for free space btree modifications.
+ *
+ * the free inode btree: max depth * block size
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the free inode btree entry: block size
+ */
+STATIC uint
+xfs_calc_finobt_res(
+	struct xfs_mount 	*mp,
+	int			alloc,
+	int			modify)
+{
+	uint res;
+
+	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+		return 0;
+
+	res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
+	if (alloc)
+		res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 
+					XFS_FSB_TO_B(mp, 1));
+	if (modify)
+		res += (uint)XFS_FSB_TO_B(mp, 1);
+
+	return res;
+}
+
+/*
  * Various log reservation values.
  *
  * These are based on the size of the file system block because that is what
@@ -302,6 +344,7 @@
  *    the superblock for the nlink flag: sector size
  *    the directory btree: (max depth + v2) * dir block size
  *    the directory inode's bmap btree: (max depth + v2) * block size
+ *    the finobt (record modification and allocation btrees)
  */
 STATIC uint
 xfs_calc_create_resv_modify(
@@ -310,7 +353,8 @@
 	return xfs_calc_inode_res(mp, 2) +
 		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 		(uint)XFS_FSB_TO_B(mp, 1) +
-		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_finobt_res(mp, 1, 1);
 }
 
 /*
@@ -348,6 +392,7 @@
  *    the superblock for the nlink flag: sector size
  *    the inode btree: max depth * blocksize
  *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ *    the finobt (record insertion)
  */
 STATIC uint
 xfs_calc_icreate_resv_alloc(
@@ -357,7 +402,8 @@
 		mp->m_sb.sb_sectsize +
 		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
+				 XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_finobt_res(mp, 0, 0);
 }
 
 STATIC uint
@@ -425,6 +471,7 @@
  *    the on disk inode before ours in the agi hash list: inode cluster size
  *    the inode btree: max depth * blocksize
  *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ *    the finobt (record insertion, removal or modification)
  */
 STATIC uint
 xfs_calc_ifree_reservation(
@@ -439,7 +486,8 @@
 		xfs_calc_buf_res(2 + mp->m_ialloc_blks +
 				 mp->m_in_maxlevels, 0) +
 		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
+				 XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_finobt_res(mp, 0, 1);
 }
 
 /*
@@ -562,7 +610,7 @@
 	return XFS_DQUOT_LOGRES(mp) +
 		xfs_calc_inode_res(mp, 1) +
 		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(1, mp->m_dirblksize) +
+		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
 		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
 				 XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),

diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index af5dbe0..bf9c457 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h

@@ -28,7 +28,8 @@
 	(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
 	  XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
 	  XFS_EXTENTADD_SPACE_RES(mp,w))
-#define	XFS_DAENTER_1B(mp,w)	((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
+#define	XFS_DAENTER_1B(mp,w)	\
+	((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
 #define	XFS_DAENTER_DBS(mp,w)	\
 	(XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))
 #define	XFS_DAENTER_BLOCKS(mp,w)	\
@@ -47,13 +48,15 @@
 #define	XFS_DIRREMOVE_SPACE_RES(mp)	\
 	XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
 #define	XFS_IALLOC_SPACE_RES(mp)	\
-	((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1)
+	((mp)->m_ialloc_blks + \
+	 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
+	  ((mp)->m_in_maxlevels - 1)))
 
 /*
  * Space reservation values for various transactions.
  */
 #define	XFS_ADDAFORK_SPACE_RES(mp)	\
-	((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
+	((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
 #define	XFS_ATTRRM_SPACE_RES(mp)	\
 	XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
 /* This macro is not used - see inline code in xfs_attr_set */
@@ -82,5 +85,8 @@
 	(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
 #define	XFS_SYMLINK_SPACE_RES(mp,nl,b)	\
 	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
+#define XFS_IFREE_SPACE_RES(mp)		\
+	(xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+
 
 #endif	/* __XFS_TRANS_SPACE_H__ */

diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 82bbc34..65c6e66 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h

@@ -134,7 +134,7 @@
 
 typedef enum {
 	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
-	XFS_BTNUM_MAX
+	XFS_BTNUM_FINOi, XFS_BTNUM_MAX
 } xfs_btnum_t;
 
 struct xfs_name {

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
new file mode 100644
index 0000000..6383d54
--- /dev/null
+++ b/include/asm-generic/qrwlock.h

@@ -0,0 +1,166 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QRWLOCK_H
+#define __ASM_GENERIC_QRWLOCK_H
+
+#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+#include <asm-generic/qrwlock_types.h>
+
+/*
+ * Writer states & reader shift and bias
+ */
+#define	_QW_WAITING	1		/* A writer is waiting	   */
+#define	_QW_LOCKED	0xff		/* A writer holds the lock */
+#define	_QW_WMASK	0xff		/* Writer mask		   */
+#define	_QR_SHIFT	8		/* Reader count shift	   */
+#define _QR_BIAS	(1U << _QR_SHIFT)
+
+/*
+ * External function declarations
+ */
+extern void queue_read_lock_slowpath(struct qrwlock *lock);
+extern void queue_write_lock_slowpath(struct qrwlock *lock);
+
+/**
+ * queue_read_can_lock- would read_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_read_can_lock(struct qrwlock *lock)
+{
+	return !(atomic_read(&lock->cnts) & _QW_WMASK);
+}
+
+/**
+ * queue_write_can_lock- would write_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_write_can_lock(struct qrwlock *lock)
+{
+	return !atomic_read(&lock->cnts);
+}
+
+/**
+ * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_read_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (likely(!(cnts & _QW_WMASK))) {
+		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		if (likely(!(cnts & _QW_WMASK)))
+			return 1;
+		atomic_sub(_QR_BIAS, &lock->cnts);
+	}
+	return 0;
+}
+
+/**
+ * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_write_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (unlikely(cnts))
+		return 0;
+
+	return likely(atomic_cmpxchg(&lock->cnts,
+				     cnts, cnts | _QW_LOCKED) == cnts);
+}
+/**
+ * queue_read_lock - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline void queue_read_lock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	if (likely(!(cnts & _QW_WMASK)))
+		return;
+
+	/* The slowpath will decrement the reader count, if necessary. */
+	queue_read_lock_slowpath(lock);
+}
+
+/**
+ * queue_write_lock - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_lock(struct qrwlock *lock)
+{
+	/* Optimize for the unfair lock case where the fair flag is 0. */
+	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+		return;
+
+	queue_write_lock_slowpath(lock);
+}
+
+/**
+ * queue_read_unlock - release read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_read_unlock(struct qrwlock *lock)
+{
+	/*
+	 * Atomically decrement the reader count
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QR_BIAS, &lock->cnts);
+}
+
+#ifndef queue_write_unlock
+/**
+ * queue_write_unlock - release write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_unlock(struct qrwlock *lock)
+{
+	/*
+	 * If the writer field is atomic, it can be cleared directly.
+	 * Otherwise, an atomic subtraction will be used to clear it.
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QW_LOCKED, &lock->cnts);
+}
+#endif
+
+/*
+ * Remapping rwlock architecture specific functions to the corresponding
+ * queue rwlock functions.
+ */
+#define arch_read_can_lock(l)	queue_read_can_lock(l)
+#define arch_write_can_lock(l)	queue_write_can_lock(l)
+#define arch_read_lock(l)	queue_read_lock(l)
+#define arch_write_lock(l)	queue_write_lock(l)
+#define arch_read_trylock(l)	queue_read_trylock(l)
+#define arch_write_trylock(l)	queue_write_trylock(l)
+#define arch_read_unlock(l)	queue_read_unlock(l)
+#define arch_write_unlock(l)	queue_write_unlock(l)
+
+#endif /* __ASM_GENERIC_QRWLOCK_H */

diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
new file mode 100644
index 0000000..4d76f24
--- /dev/null
+++ b/include/asm-generic/qrwlock_types.h

@@ -0,0 +1,21 @@
+#ifndef __ASM_GENERIC_QRWLOCK_TYPES_H
+#define __ASM_GENERIC_QRWLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/spinlock_types.h>
+
+/*
+ * The queue read/write lock data structure
+ */
+
+typedef struct qrwlock {
+	atomic_t		cnts;
+	arch_spinlock_t		lock;
+} arch_rwlock_t;
+
+#define	__ARCH_RW_LOCK_UNLOCKED {		\
+	.cnts = ATOMIC_INIT(0),			\
+	.lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
+}
+
+#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d647637..471ba48 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -109,6 +109,15 @@
 #define BRANCH_PROFILE()
 #endif
 
+#ifdef CONFIG_KPROBES
+#define KPROBE_BLACKLIST()	. = ALIGN(8);				      \
+				VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+				*(_kprobe_blacklist)			      \
+				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+#else
+#define KPROBE_BLACKLIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
@@ -478,6 +487,7 @@
 	*(.init.rodata)							\
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
+	KPROBE_BLACKLIST()						\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a7c2a86..8af71a8 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h

@@ -143,11 +143,6 @@
 #define DRIVER_PRIME       0x4000
 #define DRIVER_RENDER      0x8000
 
-#define DRIVER_BUS_PCI 0x1
-#define DRIVER_BUS_PLATFORM 0x2
-#define DRIVER_BUS_USB 0x3
-#define DRIVER_BUS_HOST1X 0x4
-
 /***********************************************************************/
 /** \name Begin the DRM... */
 /*@{*/
@@ -239,8 +234,6 @@
 /** \name Internal types and structures */
 /*@{*/
 
-#define DRM_ARRAY_SIZE(x) ARRAY_SIZE(x)
-
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 /**
@@ -731,13 +724,7 @@
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 
 struct drm_bus {
-	int bus_type;
-	int (*get_irq)(struct drm_device *dev);
-	const char *(*get_name)(struct drm_device *dev);
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
-	int (*set_unique)(struct drm_device *dev, struct drm_master *master,
-			  struct drm_unique *unique);
-	int (*irq_by_busid)(struct drm_device *dev, struct drm_irq_busid *p);
 };
 
 /**
@@ -974,11 +961,6 @@
 	const struct drm_ioctl_desc *ioctls;
 	int num_ioctls;
 	const struct file_operations *fops;
-	union {
-		struct pci_driver *pci;
-		struct platform_device *platform_device;
-		struct usb_driver *usb;
-	} kdriver;
 	struct drm_bus *bus;
 
 	/* List of devices hanging off this driver with stealth attach. */
@@ -1040,14 +1022,17 @@
 };
 
 struct drm_vblank_crtc {
+	struct drm_device *dev;		/* pointer to the drm_device */
 	wait_queue_head_t queue;	/**< VBLANK wait queue */
 	struct timeval time[DRM_VBLANKTIME_RBSIZE];	/**< timestamp of current count */
+	struct timer_list disable_timer;		/* delayed disable timer */
 	atomic_t count;			/**< number of VBLANK interrupts */
 	atomic_t refcount;		/* number of users of vblank interruptsper crtc */
 	u32 last;			/* protected by dev->vbl_lock, used */
 					/* for wraparound handling */
 	u32 last_wait;			/* Last vblank seqno waited per CRTC */
 	unsigned int inmodeset;		/* Display driver is setting mode */
+	int crtc;			/* crtc index */
 	bool enabled;			/* so we don't call enable more than
 					   once per disable */
 };
@@ -1058,7 +1043,6 @@
  */
 struct drm_device {
 	struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */
-	char *devname;			/**< For /proc/interrupts */
 	int if_version;			/**< Highest interface version set */
 
 	/** \name Lifetime Management */
@@ -1072,18 +1056,19 @@
 	struct drm_minor *render;		/**< Render node */
 	atomic_t unplugged;			/**< Flag whether dev is dead */
 	struct inode *anon_inode;		/**< inode for private address-space */
+	char *unique;				/**< unique name of the device */
 	/*@} */
 
 	/** \name Locks */
 	/*@{ */
-	spinlock_t count_lock;		/**< For inuse, drm_device::open_count, drm_device::buf_use */
 	struct mutex struct_mutex;	/**< For others */
 	struct mutex master_mutex;      /**< For drm_minor::master and drm_file::is_master */
 	/*@} */
 
 	/** \name Usage Counters */
 	/*@{ */
-	int open_count;			/**< Outstanding files open */
+	int open_count;			/**< Outstanding files open, protected by drm_global_mutex. */
+	spinlock_t buf_lock;		/**< For drm_device::buf_use and a few other things. */
 	int buf_use;			/**< Buffers in use -- cannot alloc */
 	atomic_t buf_alloc;		/**< Buffer allocation in progress */
 	/*@} */
@@ -1114,6 +1099,8 @@
 	/** \name Context support */
 	/*@{ */
 	bool irq_enabled;		/**< True if irq handler is enabled */
+	int irq;
+
 	__volatile__ long context_flag;	/**< Context swapping flag */
 	int last_context;		/**< Last current context */
 	/*@} */
@@ -1134,7 +1121,6 @@
 
 	spinlock_t vblank_time_lock;    /**< Protects vblank count and time updates during vblank enable/disable */
 	spinlock_t vbl_lock;
-	struct timer_list vblank_disable_timer;
 
 	u32 max_vblank_count;           /**< size of vblank counter register */
 
@@ -1186,11 +1172,6 @@
 	return ((dev->driver->driver_features & feature) ? 1 : 0);
 }
 
-static inline int drm_dev_to_irq(struct drm_device *dev)
-{
-	return dev->driver->bus->get_irq(dev);
-}
-
 static inline void drm_device_set_unplugged(struct drm_device *dev)
 {
 	smp_wmb();
@@ -1204,11 +1185,6 @@
 	return ret;
 }
 
-static inline bool drm_modeset_is_locked(struct drm_device *dev)
-{
-	return mutex_is_locked(&dev->mode_config.mutex);
-}
-
 static inline bool drm_is_render_client(const struct drm_file *file_priv)
 {
 	return file_priv->minor->type == DRM_MINOR_RENDER;
@@ -1310,7 +1286,7 @@
 /* Cache management (drm_cache.c) */
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 void drm_clflush_sg(struct sg_table *st);
-void drm_clflush_virt_range(char *addr, unsigned long length);
+void drm_clflush_virt_range(void *addr, unsigned long length);
 
 				/* Locking IOCTL support (drm_lock.h) */
 extern int drm_lock(struct drm_device *dev, void *data,
@@ -1363,7 +1339,7 @@
 				/* IRQ support (drm_irq.h) */
 extern int drm_control(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
-extern int drm_irq_install(struct drm_device *dev);
+extern int drm_irq_install(struct drm_device *dev, int irq);
 extern int drm_irq_uninstall(struct drm_device *dev);
 
 extern int drm_vblank_init(struct drm_device *dev, int num_crtcs);
@@ -1377,8 +1353,14 @@
 extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
+extern int drm_crtc_vblank_get(struct drm_crtc *crtc);
+extern void drm_crtc_vblank_put(struct drm_crtc *crtc);
 extern void drm_vblank_off(struct drm_device *dev, int crtc);
+extern void drm_vblank_on(struct drm_device *dev, int crtc);
+extern void drm_crtc_vblank_off(struct drm_crtc *crtc);
+extern void drm_crtc_vblank_on(struct drm_crtc *crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
+
 extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 				     struct timeval *tvblank, unsigned flags);
 extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
@@ -1522,6 +1504,9 @@
 				       size_t align);
 extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
+extern int drm_pci_set_unique(struct drm_device *dev,
+			      struct drm_master *master,
+			      struct drm_unique *u);
 
 			       /* sysfs support (drm_sysfs.c) */
 struct drm_sysfs_class;
@@ -1631,6 +1616,7 @@
 void drm_dev_unref(struct drm_device *dev);
 int drm_dev_register(struct drm_device *dev, unsigned long flags);
 void drm_dev_unregister(struct drm_device *dev);
+int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...);
 
 struct drm_minor *drm_minor_acquire(unsigned int minor_id);
 void drm_minor_release(struct drm_minor *minor);

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index e55fccb..251b75e 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h

@@ -33,6 +33,7 @@
 #include <linux/hdmi.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_lock.h>
 
 struct drm_device;
 struct drm_mode_set;
@@ -50,6 +51,7 @@
 #define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
 #define DRM_MODE_OBJECT_PLANE 0xeeeeeeee
 #define DRM_MODE_OBJECT_BRIDGE 0xbdbdbdbd
+#define DRM_MODE_OBJECT_ANY 0
 
 struct drm_mode_object {
 	uint32_t id;
@@ -64,6 +66,15 @@
 	uint64_t values[DRM_OBJECT_MAX_PROPERTY];
 };
 
+static inline int64_t U642I64(uint64_t val)
+{
+	return (int64_t)*((int64_t *)&val);
+}
+static inline uint64_t I642U64(int64_t val)
+{
+	return (uint64_t)*((uint64_t *)&val);
+}
+
 enum drm_connector_force {
 	DRM_FORCE_UNSPECIFIED,
 	DRM_FORCE_OFF,
@@ -110,6 +121,9 @@
 	enum subpixel_order subpixel_order;
 	u32 color_formats;
 
+	/* Mask of supported hdmi deep color modes */
+	u8 edid_hdmi_dc_modes;
+
 	u8 cea_rev;
 };
 
@@ -190,10 +204,15 @@
 	char name[DRM_PROP_NAME_LEN];
 	uint32_t num_values;
 	uint64_t *values;
+	struct drm_device *dev;
 
 	struct list_head enum_blob_list;
 };
 
+void drm_modeset_lock_all(struct drm_device *dev);
+void drm_modeset_unlock_all(struct drm_device *dev);
+void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
+
 struct drm_crtc;
 struct drm_connector;
 struct drm_encoder;
@@ -269,6 +288,7 @@
  * drm_crtc - central CRTC control structure
  * @dev: parent DRM device
  * @head: list management
+ * @mutex: per-CRTC locking
  * @base: base KMS object for ID tracking etc.
  * @primary: primary plane for this CRTC
  * @cursor: cursor plane for this CRTC
@@ -303,7 +323,7 @@
 	 * state, ...) and a write lock for everything which can be update
 	 * without a full modeset (fb, cursor data, ...)
 	 */
-	struct mutex mutex;
+	struct drm_modeset_lock mutex;
 
 	struct drm_mode_object base;
 
@@ -400,6 +420,7 @@
  * @dev: parent DRM device
  * @head: list management
  * @base: base KMS object
+ * @name: encoder name
  * @encoder_type: one of the %DRM_MODE_ENCODER_<foo> types in drm_mode.h
  * @possible_crtcs: bitmask of potential CRTC bindings
  * @possible_clones: bitmask of potential sibling encoders for cloning
@@ -416,6 +437,7 @@
 	struct list_head head;
 
 	struct drm_mode_object base;
+	char *name;
 	int encoder_type;
 	uint32_t possible_crtcs;
 	uint32_t possible_clones;
@@ -444,6 +466,7 @@
  * @attr: sysfs attributes
  * @head: list management
  * @base: base KMS object
+ * @name: connector name
  * @connector_type: one of the %DRM_MODE_CONNECTOR_<foo> types from drm_mode.h
  * @connector_type_id: index into connector type enum
  * @interlace_allowed: can this connector handle interlaced modes?
@@ -482,6 +505,7 @@
 
 	struct drm_mode_object base;
 
+	char *name;
 	int connector_type;
 	int connector_type_id;
 	bool interlace_allowed;
@@ -723,6 +747,8 @@
  */
 struct drm_mode_config {
 	struct mutex mutex; /* protects configuration (mode lists etc.) */
+	struct drm_modeset_lock connection_mutex; /* protects connector->encoder and encoder->crtc links */
+	struct drm_modeset_acquire_ctx *acquire_ctx; /* for legacy _lock_all() / _unlock_all() */
 	struct mutex idr_mutex; /* for IDR management */
 	struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */
 	/* this is limited to one for now */
@@ -823,10 +849,6 @@
 	char *name;
 };
 
-extern void drm_modeset_lock_all(struct drm_device *dev);
-extern void drm_modeset_unlock_all(struct drm_device *dev);
-extern void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
-
 extern int drm_crtc_init_with_planes(struct drm_device *dev,
 				     struct drm_crtc *crtc,
 				     struct drm_plane *primary,
@@ -905,7 +927,6 @@
 
 extern void drm_encoder_cleanup(struct drm_encoder *encoder);
 
-extern const char *drm_get_connector_name(const struct drm_connector *connector);
 extern const char *drm_get_connector_status_name(enum drm_connector_status status);
 extern const char *drm_get_subpixel_order_name(enum subpixel_order order);
 extern const char *drm_get_dpms_name(int val);
@@ -915,6 +936,7 @@
 extern const char *drm_get_tv_select_name(int val);
 extern void drm_fb_release(struct drm_file *file_priv);
 extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group);
+extern void drm_mode_group_destroy(struct drm_mode_group *group);
 extern bool drm_probe_ddc(struct i2c_adapter *adapter);
 extern struct edid *drm_get_edid(struct drm_connector *connector,
 				 struct i2c_adapter *adapter);
@@ -926,6 +948,23 @@
 
 extern int drm_mode_connector_update_edid_property(struct drm_connector *connector,
 						struct edid *edid);
+
+static inline bool drm_property_type_is(struct drm_property *property,
+		uint32_t type)
+{
+	/* instanceof for props.. handles extended type vs original types: */
+	if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE)
+		return (property->flags & DRM_MODE_PROP_EXTENDED_TYPE) == type;
+	return property->flags & type;
+}
+
+static inline bool drm_property_type_valid(struct drm_property *property)
+{
+	if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE)
+		return !(property->flags & DRM_MODE_PROP_LEGACY_TYPE);
+	return !!(property->flags & DRM_MODE_PROP_LEGACY_TYPE);
+}
+
 extern int drm_object_property_set_value(struct drm_mode_object *obj,
 					 struct drm_property *property,
 					 uint64_t val);
@@ -959,6 +998,11 @@
 struct drm_property *drm_property_create_range(struct drm_device *dev, int flags,
 					 const char *name,
 					 uint64_t min, uint64_t max);
+struct drm_property *drm_property_create_signed_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 int64_t min, int64_t max);
+struct drm_property *drm_property_create_object(struct drm_device *dev,
+					 int flags, const char *name, uint32_t type);
 extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
 extern int drm_property_add_enum(struct drm_property *property, int index,
 				 uint64_t value, const char *name);
@@ -967,7 +1011,6 @@
 				     char *formats[]);
 extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
 extern int drm_mode_create_dirty_info_property(struct drm_device *dev);
-extern const char *drm_get_encoder_name(const struct drm_encoder *encoder);
 
 extern int drm_mode_connector_attach_encoder(struct drm_connector *connector,
 					     struct drm_encoder *encoder);
@@ -975,6 +1018,7 @@
 					 int gamma_size);
 extern struct drm_mode_object *drm_mode_object_find(struct drm_device *dev,
 		uint32_t id, uint32_t type);
+
 /* IOCTLs */
 extern int drm_mode_getresources(struct drm_device *dev,
 				 void *data, struct drm_file *file_priv);
@@ -1020,6 +1064,7 @@
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
+extern enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern bool drm_detect_monitor_audio(struct edid *edid);
 extern bool drm_rgb_quant_range_selectable(struct edid *edid);
@@ -1057,6 +1102,15 @@
 extern const char *drm_get_format_name(uint32_t format);
 
 /* Helpers */
+
+static inline struct drm_plane *drm_plane_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PLANE);
+	return mo ? obj_to_plane(mo) : NULL;
+}
+
 static inline struct drm_crtc *drm_crtc_find(struct drm_device *dev,
 	uint32_t id)
 {
@@ -1073,6 +1127,30 @@
 	return mo ? obj_to_encoder(mo) : NULL;
 }
 
+static inline struct drm_connector *drm_connector_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_CONNECTOR);
+	return mo ? obj_to_connector(mo) : NULL;
+}
+
+static inline struct drm_property *drm_property_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PROPERTY);
+	return mo ? obj_to_property(mo) : NULL;
+}
+
+static inline struct drm_property_blob *
+drm_property_blob_find(struct drm_device *dev, uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_BLOB);
+	return mo ? obj_to_blob(mo) : NULL;
+}
+
 /* Plane list iterator for legacy (overlay only) planes. */
 #define drm_for_each_legacy_plane(plane, planelist) \
 	list_for_each_entry(plane, planelist, head) \

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 36a5feb..a3d75fe 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h

@@ -114,7 +114,7 @@
 /**
  * drm_connector_helper_funcs - helper operations for connectors
  * @get_modes: get mode list for this connector
- * @mode_valid: is this mode valid on the given connector?
+ * @mode_valid (optional): is this mode valid on the given connector?
  *
  * The helper operations are called by the mid-layer CRTC helper.
  */
@@ -165,6 +165,10 @@
 extern int drm_helper_probe_single_connector_modes(struct drm_connector
 						   *connector, uint32_t maxX,
 						   uint32_t maxY);
+extern int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector
+							   *connector,
+							   uint32_t maxX,
+							   uint32_t maxY);
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern bool drm_helper_hpd_irq_event(struct drm_device *dev);

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index cfcacec..a21568b 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h

@@ -37,6 +37,7 @@
  * eDP: Embedded DisplayPort version 1
  * DPI: DisplayPort Interoperability Guideline v1.1a
  * 1.2: DisplayPort 1.2
+ * MST: Multistream Transport - part of DP 1.2a
  *
  * 1.2 formally includes both eDP and DPI definitions.
  */
@@ -103,9 +104,14 @@
 #define DP_TRAINING_AUX_RD_INTERVAL         0x00e   /* XXX 1.2? */
 
 /* Multiple stream transport */
+#define DP_FAUX_CAP			    0x020   /* 1.2 */
+# define DP_FAUX_CAP_1			    (1 << 0)
+
 #define DP_MSTM_CAP			    0x021   /* 1.2 */
 # define DP_MST_CAP			    (1 << 0)
 
+#define DP_GUID				    0x030   /* 1.2 */
+
 #define DP_PSR_SUPPORT                      0x070   /* XXX 1.2? */
 # define DP_PSR_IS_SUPPORTED                1
 #define DP_PSR_CAPS                         0x071   /* XXX 1.2? */
@@ -221,6 +227,16 @@
 # define DP_PSR_CRC_VERIFICATION	    (1 << 2)
 # define DP_PSR_FRAME_CAPTURE		    (1 << 3)
 
+#define DP_ADAPTER_CTRL			    0x1a0
+# define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE   (1 << 0)
+
+#define DP_BRANCH_DEVICE_CTRL		    0x1a1
+# define DP_BRANCH_DEVICE_IRQ_HPD	    (1 << 0)
+
+#define DP_PAYLOAD_ALLOCATE_SET		    0x1c0
+#define DP_PAYLOAD_ALLOCATE_START_TIME_SLOT 0x1c1
+#define DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT 0x1c2
+
 #define DP_SINK_COUNT			    0x200
 /* prior to 1.2 bit 7 was reserved mbz */
 # define DP_GET_SINK_COUNT(x)		    ((((x) & 0x80) >> 1) | ((x) & 0x3f))
@@ -230,6 +246,9 @@
 # define DP_REMOTE_CONTROL_COMMAND_PENDING  (1 << 0)
 # define DP_AUTOMATED_TEST_REQUEST	    (1 << 1)
 # define DP_CP_IRQ			    (1 << 2)
+# define DP_MCCS_IRQ			    (1 << 3)
+# define DP_DOWN_REP_MSG_RDY		    (1 << 4) /* 1.2 MST */
+# define DP_UP_REQ_MSG_RDY		    (1 << 5) /* 1.2 MST */
 # define DP_SINK_SPECIFIC_IRQ		    (1 << 6)
 
 #define DP_LANE0_1_STATUS		    0x202
@@ -291,9 +310,18 @@
 # define DP_TEST_NAK			    (1 << 1)
 # define DP_TEST_EDID_CHECKSUM_WRITE	    (1 << 2)
 
+#define DP_TEST_EDID_CHECKSUM		    0x261
+
 #define DP_TEST_SINK			    0x270
 #define DP_TEST_SINK_START	    (1 << 0)
 
+#define DP_PAYLOAD_TABLE_UPDATE_STATUS      0x2c0   /* 1.2 MST */
+# define DP_PAYLOAD_TABLE_UPDATED           (1 << 0)
+# define DP_PAYLOAD_ACT_HANDLED             (1 << 1)
+
+#define DP_VC_PAYLOAD_ID_SLOT_1             0x2c1   /* 1.2 MST */
+/* up to ID_SLOT_63 at 0x2ff */
+
 #define DP_SOURCE_OUI			    0x300
 #define DP_SINK_OUI			    0x400
 #define DP_BRANCH_OUI			    0x500
@@ -303,6 +331,21 @@
 # define DP_SET_POWER_D3                    0x2
 # define DP_SET_POWER_MASK                  0x3
 
+#define DP_SIDEBAND_MSG_DOWN_REQ_BASE	    0x1000   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_UP_REP_BASE	    0x1200   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_DOWN_REP_BASE	    0x1400   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_UP_REQ_BASE	    0x1600   /* 1.2 MST */
+
+#define DP_SINK_COUNT_ESI		    0x2002   /* 1.2 */
+/* 0-5 sink count */
+# define DP_SINK_COUNT_CP_READY             (1 << 6)
+
+#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0   0x2003   /* 1.2 */
+
+#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1   0x2004   /* 1.2 */
+
+#define DP_LINK_SERVICE_IRQ_VECTOR_ESI0     0x2005   /* 1.2 */
+
 #define DP_PSR_ERROR_STATUS                 0x2006  /* XXX 1.2? */
 # define DP_PSR_LINK_CRC_ERROR              (1 << 0)
 # define DP_PSR_RFB_STORAGE_ERROR           (1 << 1)
@@ -319,6 +362,43 @@
 # define DP_PSR_SINK_INTERNAL_ERROR         7
 # define DP_PSR_SINK_STATE_MASK             0x07
 
+/* DP 1.2 Sideband message defines */
+/* peer device type - DP 1.2a Table 2-92 */
+#define DP_PEER_DEVICE_NONE		0x0
+#define DP_PEER_DEVICE_SOURCE_OR_SST	0x1
+#define DP_PEER_DEVICE_MST_BRANCHING	0x2
+#define DP_PEER_DEVICE_SST_SINK		0x3
+#define DP_PEER_DEVICE_DP_LEGACY_CONV	0x4
+
+/* DP 1.2 MST sideband request names DP 1.2a Table 2-80 */
+#define DP_LINK_ADDRESS			0x01
+#define DP_CONNECTION_STATUS_NOTIFY	0x02
+#define DP_ENUM_PATH_RESOURCES		0x10
+#define DP_ALLOCATE_PAYLOAD		0x11
+#define DP_QUERY_PAYLOAD		0x12
+#define DP_RESOURCE_STATUS_NOTIFY	0x13
+#define DP_CLEAR_PAYLOAD_ID_TABLE	0x14
+#define DP_REMOTE_DPCD_READ		0x20
+#define DP_REMOTE_DPCD_WRITE		0x21
+#define DP_REMOTE_I2C_READ		0x22
+#define DP_REMOTE_I2C_WRITE		0x23
+#define DP_POWER_UP_PHY			0x24
+#define DP_POWER_DOWN_PHY		0x25
+#define DP_SINK_EVENT_NOTIFY		0x30
+#define DP_QUERY_STREAM_ENC_STATUS	0x38
+
+/* DP 1.2 MST sideband nak reasons - table 2.84 */
+#define DP_NAK_WRITE_FAILURE		0x01
+#define DP_NAK_INVALID_READ		0x02
+#define DP_NAK_CRC_FAILURE		0x03
+#define DP_NAK_BAD_PARAM		0x04
+#define DP_NAK_DEFER			0x05
+#define DP_NAK_LINK_FAILURE		0x06
+#define DP_NAK_NO_RESOURCES		0x07
+#define DP_NAK_DPCD_FAIL		0x08
+#define DP_NAK_I2C_NAK			0x09
+#define DP_NAK_ALLOCATE_FAIL		0x0a
+
 #define MODE_I2C_START	1
 #define MODE_I2C_WRITE	2
 #define MODE_I2C_READ	4
@@ -431,8 +511,10 @@
 
 /**
  * struct drm_dp_aux - DisplayPort AUX channel
+ * @name: user-visible name of this AUX channel and the I2C-over-AUX adapter
  * @ddc: I2C adapter that can be used for I2C-over-AUX communication
  * @dev: pointer to struct device that is the parent for this AUX channel
+ * @hw_mutex: internal mutex used for locking transfers
  * @transfer: transfers a message representing a single AUX transaction
  *
  * The .dev field should be set to a pointer to the device that implements
@@ -465,7 +547,7 @@
 	const char *name;
 	struct i2c_adapter ddc;
 	struct device *dev;
-
+	struct mutex hw_mutex;
 	ssize_t (*transfer)(struct drm_dp_aux *aux,
 			    struct drm_dp_aux_msg *msg);
 };
@@ -524,7 +606,7 @@
 int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link);
 
-int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux);
-void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux);
+int drm_dp_aux_register(struct drm_dp_aux *aux);
+void drm_dp_aux_unregister(struct drm_dp_aux *aux);
 
 #endif /* _DRM_DP_HELPER_H_ */

diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index a1441c5..b96031d 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h

@@ -202,6 +202,11 @@
 #define DRM_EDID_FEATURE_PM_SUSPEND       (1 << 6)
 #define DRM_EDID_FEATURE_PM_STANDBY       (1 << 7)
 
+#define DRM_EDID_HDMI_DC_48               (1 << 6)
+#define DRM_EDID_HDMI_DC_36               (1 << 5)
+#define DRM_EDID_HDMI_DC_30               (1 << 4)
+#define DRM_EDID_HDMI_DC_Y444             (1 << 3)
+
 struct edid {
 	u8 header[8];
 	/* Vendor & product info */

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 6e622f7..7997246 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h

@@ -108,7 +108,7 @@
 int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 			    struct fb_info *info);
 
-bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper);
+bool drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height);
 void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,

diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
index 35c776a..9eed34d 100644
--- a/include/drm/drm_flip_work.h
+++ b/include/drm/drm_flip_work.h

@@ -57,6 +57,7 @@
  * @count: number of committed items
  * @func: callback fxn called for each committed item
  * @worker: worker which calls @func
+ * @fifo: queue of committed items
  */
 struct drm_flip_work {
 	const char *name;

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 7209df1..944f33f 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h

@@ -135,11 +135,13 @@
  * @driver: device driver model driver
  * @probe: callback for device binding
  * @remove: callback for device unbinding
+ * @shutdown: called at shutdown time to quiesce the device
  */
 struct mipi_dsi_driver {
 	struct device_driver driver;
 	int(*probe)(struct mipi_dsi_device *dsi);
 	int(*remove)(struct mipi_dsi_device *dsi);
+	void (*shutdown)(struct mipi_dsi_device *dsi);
 };
 
 #define to_mipi_dsi_driver(d) container_of(d, struct mipi_dsi_driver, driver)

diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 2dbbf99..91d0582 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h

@@ -223,7 +223,7 @@
 void drm_mode_prune_invalid(struct drm_device *dev,
 			    struct list_head *mode_list, bool verbose);
 void drm_mode_sort(struct list_head *mode_list);
-void drm_mode_connector_list_update(struct drm_connector *connector);
+void drm_mode_connector_list_update(struct drm_connector *connector, bool merge_type_bits);
 
 /* parsing cmdline modes */
 bool

diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
new file mode 100644
index 0000000..402aa7a
--- /dev/null
+++ b/include/drm/drm_modeset_lock.h

@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DRM_MODESET_LOCK_H_
+#define DRM_MODESET_LOCK_H_
+
+#include <linux/ww_mutex.h>
+
+struct drm_modeset_lock;
+
+/**
+ * drm_modeset_acquire_ctx - locking context (see ww_acquire_ctx)
+ * @ww_ctx: base acquire ctx
+ * @contended: used internally for -EDEADLK handling
+ * @locked: list of held locks
+ *
+ * Each thread competing for a set of locks must use one acquire
+ * ctx.  And if any lock fxn returns -EDEADLK, it must backoff and
+ * retry.
+ */
+struct drm_modeset_acquire_ctx {
+
+	struct ww_acquire_ctx ww_ctx;
+
+	/**
+	 * Contended lock: if a lock is contended you should only call
+	 * drm_modeset_backoff() which drops locks and slow-locks the
+	 * contended lock.
+	 */
+	struct drm_modeset_lock *contended;
+
+	/**
+	 * list of held locks (drm_modeset_lock)
+	 */
+	struct list_head locked;
+};
+
+/**
+ * drm_modeset_lock - used for locking modeset resources.
+ * @mutex: resource locking
+ * @head: used to hold it's place on state->locked list when
+ *    part of an atomic update
+ *
+ * Used for locking CRTCs and other modeset resources.
+ */
+struct drm_modeset_lock {
+	/**
+	 * modeset lock
+	 */
+	struct ww_mutex mutex;
+
+	/**
+	 * Resources that are locked as part of an atomic update are added
+	 * to a list (so we know what to unlock at the end).
+	 */
+	struct list_head head;
+};
+
+extern struct ww_class crtc_ww_class;
+
+void drm_modeset_acquire_init(struct drm_modeset_acquire_ctx *ctx,
+		uint32_t flags);
+void drm_modeset_acquire_fini(struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_backoff(struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_backoff_interruptible(struct drm_modeset_acquire_ctx *ctx);
+
+/**
+ * drm_modeset_lock_init - initialize lock
+ * @lock: lock to init
+ */
+static inline void drm_modeset_lock_init(struct drm_modeset_lock *lock)
+{
+	ww_mutex_init(&lock->mutex, &crtc_ww_class);
+	INIT_LIST_HEAD(&lock->head);
+}
+
+/**
+ * drm_modeset_lock_fini - cleanup lock
+ * @lock: lock to cleanup
+ */
+static inline void drm_modeset_lock_fini(struct drm_modeset_lock *lock)
+{
+	WARN_ON(!list_empty(&lock->head));
+}
+
+/**
+ * drm_modeset_is_locked - equivalent to mutex_is_locked()
+ * @lock: lock to check
+ */
+static inline bool drm_modeset_is_locked(struct drm_modeset_lock *lock)
+{
+	return ww_mutex_is_locked(&lock->mutex);
+}
+
+int drm_modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_unlock(struct drm_modeset_lock *lock);
+
+struct drm_device;
+int drm_modeset_lock_all_crtcs(struct drm_device *dev,
+		struct drm_modeset_acquire_ctx *ctx);
+
+#endif /* DRM_MODESET_LOCK_H_ */

diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index 09824be..52e6870 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h

@@ -24,6 +24,17 @@
 #ifndef DRM_PLANE_HELPER_H
 #define DRM_PLANE_HELPER_H
 
+#include <drm/drm_rect.h>
+
+/*
+ * Drivers that don't allow primary plane scaling may pass this macro in place
+ * of the min/max scale parameters of the update checker function.
+ *
+ * Due to src being in 16.16 fixed point and dest being in integer pixels,
+ * 1<<16 represents no scaling.
+ */
+#define DRM_PLANE_HELPER_NO_SCALING (1<<16)
+
 /**
  * DOC: plane helpers
  *
@@ -31,6 +42,17 @@
  * planes.
  */
 
+extern int drm_plane_helper_check_update(struct drm_plane *plane,
+					 struct drm_crtc *crtc,
+					 struct drm_framebuffer *fb,
+					 struct drm_rect *src,
+					 struct drm_rect *dest,
+					 const struct drm_rect *clip,
+					 int min_scale,
+					 int max_scale,
+					 bool can_position,
+					 bool can_update_disabled,
+					 bool *visible);
 extern int drm_primary_helper_update(struct drm_plane *plane,
 				     struct drm_crtc *crtc,
 				     struct drm_framebuffer *fb,
@@ -42,7 +64,7 @@
 extern void drm_primary_helper_destroy(struct drm_plane *plane);
 extern const struct drm_plane_funcs drm_primary_helper_funcs;
 extern struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev,
-							 uint32_t *formats,
+							 const uint32_t *formats,
 							 int num_formats);
 
 

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 012d58f..0572035 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h

@@ -223,14 +223,32 @@
 	_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
 	_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
 	_INTEL_BDW_M_IDS(1, info), \
-	_INTEL_BDW_M_IDS(2, info), \
+	_INTEL_BDW_M_IDS(2, info)
+
+#define INTEL_BDW_GT12D_IDS(info) \
+	_INTEL_BDW_D_IDS(1, info), \
+	_INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
 	_INTEL_BDW_M_IDS(3, info)
 
-#define INTEL_BDW_D_IDS(info) \
-	_INTEL_BDW_D_IDS(1, info), \
-	_INTEL_BDW_D_IDS(2, info), \
+#define INTEL_BDW_GT3D_IDS(info) \
 	_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+	INTEL_BDW_GT12M_IDS(info), \
+	INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+	INTEL_BDW_GT12D_IDS(info), \
+	INTEL_BDW_GT3D_IDS(info)
+
+#define INTEL_CHV_IDS(info) \
+	INTEL_VGA_DEVICE(0x22b0, info), \
+	INTEL_VGA_DEVICE(0x22b1, info), \
+	INTEL_VGA_DEVICE(0x22b2, info), \
+	INTEL_VGA_DEVICE(0x22b3, info)
+
 #endif /* _I915_PCIIDS_H */

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index ee127ec..7526c5b 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h

@@ -485,13 +485,12 @@
 			void (*destroy) (struct ttm_buffer_object *));
 
 /**
- * ttm_bo_synccpu_object_init
+ * ttm_bo_create
  *
  * @bdev: Pointer to a ttm_bo_device struct.
- * @bo: Pointer to a ttm_buffer_object to be initialized.
  * @size: Requested size of buffer object.
  * @type: Requested type of buffer object.
- * @flags: Initial placement flags.
+ * @placement: Initial placement.
  * @page_alignment: Data alignment in pages.
  * @interruptible: If needing to sleep while waiting for GPU resources,
  * sleep interruptible.

diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1f16d50..6dfd51a 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h

@@ -44,6 +44,7 @@
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map);
 

diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h
new file mode 100644
index 0000000..34b98f2
--- /dev/null
+++ b/include/linux/amba/xilinx_dma.h

@@ -0,0 +1,47 @@
+/*
+ * Xilinx DMA Engine drivers support header file
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DMA_XILINX_DMA_H
+#define __DMA_XILINX_DMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct xilinx_vdma_config - VDMA Configuration structure
+ * @frm_dly: Frame delay
+ * @gen_lock: Whether in gen-lock mode
+ * @master: Master that it syncs to
+ * @frm_cnt_en: Enable frame count enable
+ * @park: Whether wants to park
+ * @park_frm: Frame to park on
+ * @coalesc: Interrupt coalescing threshold
+ * @delay: Delay counter
+ * @reset: Reset Channel
+ * @ext_fsync: External Frame Sync source
+ */
+struct xilinx_vdma_config {
+	int frm_dly;
+	int gen_lock;
+	int master;
+	int frm_cnt_en;
+	int park;
+	int park_frm;
+	int coalesc;
+	int delay;
+	int reset;
+	int ext_fsync;
+};
+
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg);
+
+#endif

diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 8598f8e..a495a95 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h

@@ -36,6 +36,8 @@
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
+
+	bool use_eeprom;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0feedeb..a002cf1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h

@@ -135,7 +135,7 @@
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 
-	BLK_MQ_MAX_DEPTH	= 2048,
+	BLK_MQ_MAX_DEPTH	= 10240,
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d8e4cea..66c2167 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h

@@ -5,8 +5,6 @@
 #ifndef __LINUX_BLK_TYPES_H
 #define __LINUX_BLK_TYPES_H
 
-#ifdef CONFIG_BLOCK
-
 #include <linux/types.h>
 
 struct bio_set;
@@ -28,6 +26,8 @@
 	unsigned int	bv_offset;
 };
 
+#ifdef CONFIG_BLOCK
+
 struct bvec_iter {
 	sector_t		bi_sector;	/* device address in 512 byte
 						   sectors */

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3cd426e..31e1105 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -280,6 +280,7 @@
 	unsigned long		seg_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
@@ -795,6 +796,7 @@
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
+extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
@@ -910,6 +912,20 @@
 	return q->limits.max_sectors;
 }
 
+/*
+ * Return maximum size of a request at given offset. Only valid for
+ * file system requests.
+ */
+static inline unsigned int blk_max_size_offset(struct request_queue *q,
+					       sector_t offset)
+{
+	if (!q->limits.chunk_sectors)
+		return q->limits.max_hw_sectors;
+
+	return q->limits.chunk_sectors -
+			(offset & (q->limits.chunk_sectors - 1));
+}
+
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -917,7 +933,11 @@
 	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		return q->limits.max_hw_sectors;
 
-	return blk_queue_get_max_sectors(q, rq->cmd_flags);
+	if (!q->limits.chunk_sectors)
+		return blk_queue_get_max_sectors(q, rq->cmd_flags);
+
+	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
+			blk_queue_get_max_sectors(q, rq->cmd_flags));
 }
 
 static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -983,6 +1003,7 @@
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 78c6c52..a087500 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h

@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_CORE_H
-#define CAN_CORE_H
+#ifndef _CAN_CORE_H
+#define _CAN_CORE_H
 
 #include <linux/can.h>
 #include <linux/skbuff.h>
@@ -58,4 +58,4 @@
 extern int can_send(struct sk_buff *skb, int loop);
 extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
-#endif /* CAN_CORE_H */
+#endif /* !_CAN_CORE_H */

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3ce5e52..6992afc 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h

@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_DEV_H
-#define CAN_DEV_H
+#ifndef _CAN_DEV_H
+#define _CAN_DEV_H
 
 #include <linux/can.h>
 #include <linux/can/netlink.h>
@@ -132,4 +132,4 @@
 struct sk_buff *alloc_can_err_skb(struct net_device *dev,
 				  struct can_frame **cf);
 
-#endif /* CAN_DEV_H */
+#endif /* !_CAN_DEV_H */

diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 9c1167ba..e0475c5 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h

@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef CAN_LED_H
-#define CAN_LED_H
+#ifndef _CAN_LED_H
+#define _CAN_LED_H
 
 #include <linux/if.h>
 #include <linux/leds.h>
@@ -48,4 +48,4 @@
 
 #endif
 
-#endif
+#endif /* !_CAN_LED_H */

diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h
index 7702641..78b2d44 100644
--- a/include/linux/can/platform/cc770.h
+++ b/include/linux/can/platform/cc770.h

@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_CC770_H_
-#define _CAN_PLATFORM_CC770_H_
+#ifndef _CAN_PLATFORM_CC770_H
+#define _CAN_PLATFORM_CC770_H
 
 /* CPU Interface Register (0x02) */
 #define CPUIF_CEN	0x01	/* Clock Out Enable */
@@ -30,4 +30,4 @@
 	u8 bcr;		/* Bus Configuration Register */
 };
 
-#endif	/* !_CAN_PLATFORM_CC770_H_ */
+#endif	/* !_CAN_PLATFORM_CC770_H */

diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index dc029db..d44fcae 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h

@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_MCP251X_H__
-#define __CAN_PLATFORM_MCP251X_H__
+#ifndef _CAN_PLATFORM_MCP251X_H
+#define _CAN_PLATFORM_MCP251X_H
 
 /*
  *
@@ -18,4 +18,4 @@
 	unsigned long oscillator_frequency;
 };
 
-#endif /* __CAN_PLATFORM_MCP251X_H__ */
+#endif /* !_CAN_PLATFORM_MCP251X_H */

diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h
new file mode 100644
index 0000000..0f4a2f3
--- /dev/null
+++ b/include/linux/can/platform/rcar_can.h

@@ -0,0 +1,17 @@
+#ifndef _CAN_PLATFORM_RCAR_CAN_H_
+#define _CAN_PLATFORM_RCAR_CAN_H_
+
+#include <linux/types.h>
+
+/* Clock Select Register settings */
+enum CLKR {
+	CLKR_CLKP1 = 0,	/* Peripheral clock (clkp1) */
+	CLKR_CLKP2 = 1,	/* Peripheral clock (clkp2) */
+	CLKR_CLKEXT = 3	/* Externally input clock */
+};
+
+struct rcar_can_platform_data {
+	enum CLKR clock_select;	/* Clock source select */
+};
+
+#endif	/* !_CAN_PLATFORM_RCAR_CAN_H_ */

diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 96f8fcc..93570b6 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h

@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_SJA1000_H_
-#define _CAN_PLATFORM_SJA1000_H_
+#ifndef _CAN_PLATFORM_SJA1000_H
+#define _CAN_PLATFORM_SJA1000_H
 
 /* clock divider register */
 #define CDR_CLKOUT_MASK 0x07
@@ -32,4 +32,4 @@
 	u8 cdr;		/* clock divider register */
 };
 
-#endif	/* !_CAN_PLATFORM_SJA1000_H_ */
+#endif	/* !_CAN_PLATFORM_SJA1000_H */

diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
index af17cb3..a52f47c 100644
--- a/include/linux/can/platform/ti_hecc.h
+++ b/include/linux/can/platform/ti_hecc.h

@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_TI_HECC_H__
-#define __CAN_PLATFORM_TI_HECC_H__
+#ifndef _CAN_PLATFORM_TI_HECC_H
+#define _CAN_PLATFORM_TI_HECC_H
 
 /*
  * TI HECC (High End CAN Controller) driver platform header
@@ -41,4 +41,4 @@
 	u32 version;
 	void (*transceiver_switch) (int);
 };
-#endif
+#endif /* !_CAN_PLATFORM_TI_HECC_H */

diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index f9bbbb4..cc00d15 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h

@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef CAN_SKB_H
-#define CAN_SKB_H
+#ifndef _CAN_SKB_H
+#define _CAN_SKB_H
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
@@ -80,4 +80,4 @@
 	return skb;
 }
 
-#endif /* CAN_SKB_H */
+#endif /* !_CAN_SKB_H */

diff --git a/include/linux/capability.h b/include/linux/capability.h
index a6ee1f9..84b13ad 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h

@@ -210,7 +210,7 @@
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool inode_capable(const struct inode *inode, int cap);
+extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 
 /* audit system wants to get cap info from files as well */

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 5f6db18..3c97d5e 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h

@@ -625,6 +625,8 @@
 			   CEPH_CAP_LINK_EXCL |		\
 			   CEPH_CAP_XATTR_EXCL |	\
 			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \
+			      CEPH_CAP_FILE_SHARED)
 #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
 			      CEPH_CAP_FILE_EXCL)
 #define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2f49aa4..279b0af 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h

@@ -222,8 +222,6 @@
 extern void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
-				    loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
 

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index a486f39..deb47e4 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h

@@ -40,9 +40,9 @@
 };
 
 /*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
+ * ceph_mon_generic_request is being used for the statfs, poolop and
+ * mon_get_version requests which are being done a bit differently
+ * because we need to get data back to the caller
  */
 struct ceph_mon_generic_request {
 	struct kref kref;
@@ -104,10 +104,15 @@
 extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+				 unsigned long timeout);
 
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
 
+extern int ceph_monc_do_get_version(struct ceph_mon_client *monc,
+				    const char *what, u64 *newest);
+
 extern int ceph_monc_open_session(struct ceph_mon_client *monc);
 
 extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d60904b..8a111dd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h

@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
+#include <linux/wait.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -47,21 +48,45 @@
 };
 #undef SUBSYS
 
-/* Per-subsystem/per-cgroup state maintained by the system. */
+/*
+ * Per-subsystem/per-cgroup state maintained by the system.  This is the
+ * fundamental structural building block that controllers deal with.
+ *
+ * Fields marked with "PI:" are public and immutable and may be accessed
+ * directly without synchronization.
+ */
 struct cgroup_subsys_state {
-	/* the cgroup that this css is attached to */
+	/* PI: the cgroup that this css is attached to */
 	struct cgroup *cgroup;
 
-	/* the cgroup subsystem that this css is attached to */
+	/* PI: the cgroup subsystem that this css is attached to */
 	struct cgroup_subsys *ss;
 
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
-	/* the parent css */
+	/* PI: the parent css */
 	struct cgroup_subsys_state *parent;
 
-	unsigned long flags;
+	/* siblings list anchored at the parent's ->children */
+	struct list_head sibling;
+	struct list_head children;
+
+	/*
+	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * matching css can be looked up using css_from_id().
+	 */
+	int id;
+
+	unsigned int flags;
+
+	/*
+	 * Monotonically increasing unique serial number which defines a
+	 * uniform order among all csses.  It's guaranteed that all
+	 * ->children lists are in the ascending order of ->serial_nr and
+	 * used to allow interrupting and resuming iterations.
+	 */
+	u64 serial_nr;
 
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
@@ -70,8 +95,9 @@
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
-	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
+	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
+	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 };
 
 /**
@@ -82,8 +108,7 @@
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	/* We don't need to reference count the root state */
-	if (!(css->flags & CSS_ROOT))
+	if (!(css->flags & CSS_NO_REF))
 		percpu_ref_get(&css->refcnt);
 }
 
@@ -91,35 +116,51 @@
  * css_tryget - try to obtain a reference on the specified css
  * @css: target css
  *
- * Obtain a reference on @css if it's alive.  The caller naturally needs to
- * ensure that @css is accessible but doesn't have to be holding a
+ * Obtain a reference on @css unless it already has reached zero and is
+ * being released.  This function doesn't care whether @css is on or
+ * offline.  The caller naturally needs to ensure that @css is accessible
+ * but doesn't have to be holding a reference on it - IOW, RCU protected
+ * access is good enough for this function.  Returns %true if a reference
+ * count was successfully obtained; %false otherwise.
+ */
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget(&css->refcnt);
+	return true;
+}
+
+/**
+ * css_tryget_online - try to obtain a reference on the specified css if online
+ * @css: target css
+ *
+ * Obtain a reference on @css if it's online.  The caller naturally needs
+ * to ensure that @css is accessible but doesn't have to be holding a
  * reference on it - IOW, RCU protected access is good enough for this
  * function.  Returns %true if a reference count was successfully obtained;
  * %false otherwise.
  */
-static inline bool css_tryget(struct cgroup_subsys_state *css)
+static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	if (css->flags & CSS_ROOT)
-		return true;
-	return percpu_ref_tryget(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget_live(&css->refcnt);
+	return true;
 }
 
 /**
  * css_put - put a css reference
  * @css: target css
  *
- * Put a reference obtained via css_get() and css_tryget().
+ * Put a reference obtained via css_get() and css_tryget_online().
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_ROOT))
+	if (!(css->flags & CSS_NO_REF))
 		percpu_ref_put(&css->refcnt);
 }
 
 /* bits in struct cgroup flags field */
 enum {
-	/* Control Group is dead */
-	CGRP_DEAD,
 	/*
 	 * Control Group has previously had a child cgroup or a task,
 	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
@@ -133,48 +174,37 @@
 	 * specified at mount time and thus is implemented here.
 	 */
 	CGRP_CPUSET_CLONE_CHILDREN,
-	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
-	CGRP_SANE_BEHAVIOR,
 };
 
 struct cgroup {
+	/* self css with NULL ->ss, points back to this cgroup */
+	struct cgroup_subsys_state self;
+
 	unsigned long flags;		/* "unsigned long" so bitops work */
 
 	/*
 	 * idr allocated in-hierarchy ID.
 	 *
-	 * The ID of the root cgroup is always 0, and a new cgroup
-	 * will be assigned with a smallest available ID.
+	 * ID 0 is not used, the ID of the root cgroup is always 1, and a
+	 * new cgroup will be assigned with a smallest available ID.
 	 *
 	 * Allocating/Removing ID must be protected by cgroup_mutex.
 	 */
 	int id;
 
-	/* the number of attached css's */
-	int nr_css;
-
-	atomic_t refcnt;
-
 	/*
-	 * We link our 'sibling' struct into our parent's 'children'.
-	 * Our children link their 'sibling' into our 'children'.
+	 * If this cgroup contains any tasks, it contributes one to
+	 * populated_cnt.  All children with non-zero popuplated_cnt of
+	 * their own contribute one.  The count is zero iff there's no task
+	 * in this cgroup or its subtree.
 	 */
-	struct list_head sibling;	/* my parent's children */
-	struct list_head children;	/* my children */
+	int populated_cnt;
 
-	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
+	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
-	/*
-	 * Monotonically increasing unique serial number which defines a
-	 * uniform order among all cgroups.  It's guaranteed that all
-	 * ->children lists are in the ascending order of ->serial_nr.
-	 * It's used to allow interrupting and resuming iterations.
-	 */
-	u64 serial_nr;
-
-	/* The bitmask of subsystems attached to this cgroup */
-	unsigned long subsys_mask;
+	/* the bitmask of subsystems enabled on the child cgroups */
+	unsigned int child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -188,6 +218,15 @@
 	struct list_head cset_links;
 
 	/*
+	 * On the default hierarchy, a css_set for a cgroup with some
+	 * susbsys disabled will point to css's which are associated with
+	 * the closest ancestor which has the subsys enabled.  The
+	 * following lists all css_sets which point to this cgroup's css
+	 * for the given subsystem.
+	 */
+	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
+
+	/*
 	 * Linked list running through all cgroups that can
 	 * potentially be reaped by the release agent. Protected by
 	 * release_list_lock
@@ -201,12 +240,8 @@
 	struct list_head pidlists;
 	struct mutex pidlist_mutex;
 
-	/* dummy css with NULL ->ss, points back to this cgroup */
-	struct cgroup_subsys_state dummy_css;
-
-	/* For css percpu_ref killing and RCU-protected deletion */
-	struct rcu_head rcu_head;
-	struct work_struct destroy_work;
+	/* used to wait for offlining of csses */
+	wait_queue_head_t offline_waitq;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
@@ -250,6 +285,12 @@
 	 *
 	 * - "cgroup.clone_children" is removed.
 	 *
+	 * - "cgroup.subtree_populated" is available.  Its value is 0 if
+	 *   the cgroup and its descendants contain no task; otherwise, 1.
+	 *   The file also generates kernfs notification which can be
+	 *   monitored through poll and [di]notify when the value of the
+	 *   file changes.
+	 *
 	 * - If mount is requested with sane_behavior but without any
 	 *   subsystem, the default unified hierarchy is mounted.
 	 *
@@ -264,6 +305,8 @@
 	 *   the flag is not created.
 	 *
 	 * - blkcg: blk-throttle becomes properly hierarchical.
+	 *
+	 * - debug: disallowed on the default hierarchy.
 	 */
 	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0),
 
@@ -282,6 +325,9 @@
 struct cgroup_root {
 	struct kernfs_root *kf_root;
 
+	/* The bitmask of subsystems attached to this hierarchy */
+	unsigned int subsys_mask;
+
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
 
@@ -295,7 +341,7 @@
 	struct list_head root_list;
 
 	/* Hierarchy-specific flags */
-	unsigned long flags;
+	unsigned int flags;
 
 	/* IDs for cgroups in this hierarchy */
 	struct idr cgroup_idr;
@@ -342,6 +388,9 @@
 	 */
 	struct list_head cgrp_links;
 
+	/* the default cgroup associated with this css_set */
+	struct cgroup *dfl_cgrp;
+
 	/*
 	 * Set of subsystem states, one for each subsystem. This array is
 	 * immutable after creation apart from the init_css_set during
@@ -366,6 +415,15 @@
 	struct cgroup *mg_src_cgrp;
 	struct css_set *mg_dst_cset;
 
+	/*
+	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * attached to an ancestor instead of the cgroup this css_set is
+	 * associated with.  The following node is anchored at
+	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+	 * iterate through all css's attached to a given cgroup.
+	 */
+	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
@@ -405,8 +463,7 @@
 
 	/*
 	 * The maximum length of string, excluding trailing nul, that can
-	 * be passed to write_string.  If < PAGE_SIZE-1, PAGE_SIZE-1 is
-	 * assumed.
+	 * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
 	 */
 	size_t max_write_len;
 
@@ -453,19 +510,13 @@
 			 s64 val);
 
 	/*
-	 * write_string() is passed a nul-terminated kernelspace
-	 * buffer of maximum length determined by max_write_len.
-	 * Returns 0 or -ve error code.
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.  Use
+	 * of_css/cft() to access the associated css and cft.
 	 */
-	int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer);
-	/*
-	 * trigger() callback can be used to get some kick from the
-	 * userspace, when the actual string written is not important
-	 * at all. The private field can be used to determine the
-	 * kick type for multiplexing.
-	 */
-	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
@@ -504,14 +555,24 @@
 		return 0;
 }
 
-static inline struct cftype *seq_cft(struct seq_file *seq)
+/* cft/css accessors for cftype->write() operation */
+static inline struct cftype *of_cft(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
-
 	return of->kn->priv;
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq);
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);
+
+/* cft/css accessors for cftype->seq_*() operations */
+static inline struct cftype *seq_cft(struct seq_file *seq)
+{
+	return of_cft(seq->private);
+}
+
+static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+{
+	return of_css(seq->private);
+}
 
 /*
  * Name / path handling functions.  All are thin wrappers around the kernfs
@@ -612,6 +673,9 @@
 	/* link to parent, protected by cgroup_lock() */
 	struct cgroup_root *root;
 
+	/* idr for css->id */
+	struct idr css_idr;
+
 	/*
 	 * List of cftypes.  Each entry is the first entry of an array
 	 * terminated by zero length name.
@@ -627,19 +691,6 @@
 #undef SUBSYS
 
 /**
- * css_parent - find the parent css
- * @css: the target cgroup_subsys_state
- *
- * Return the parent css of @css.  This function is guaranteed to return
- * non-NULL parent as long as @css isn't the root.
- */
-static inline
-struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
-{
-	return css->parent;
-}
-
-/**
  * task_css_set_check - obtain a task's css_set with extra access conditions
  * @task: the task to obtain css_set for
  * @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -731,14 +782,14 @@
  * @pos: the css * to use as the loop cursor
  * @parent: css whose children to walk
  *
- * Walk @parent's children.  Must be called under rcu_read_lock().  A child
- * css which hasn't finished ->css_online() or already has finished
- * ->css_offline() may show up during traversal and it's each subsystem's
- * responsibility to verify that each @pos is alive.
+ * Walk @parent's children.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
  * It is allowed to temporarily drop RCU read lock during iteration.  The
  * caller is responsible for ensuring that @pos remains accessible until
@@ -761,17 +812,16 @@
  * @root: css whose descendants to walk
  *
  * Walk @root's descendants.  @root is included in the iteration and the
- * first node to be visited.  Must be called under rcu_read_lock().  A
- * descendant css which hasn't finished ->css_online() or already has
- * finished ->css_offline() may show up during traversal and it's each
- * subsystem's responsibility to verify that each @pos is alive.
+ * first node to be visited.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
- * In other words, the following guarantees that a descendant can't escape
+ * For example, the following guarantees that a descendant can't escape
  * state updates of its ancestors.
  *
  * my_online(@css)
@@ -827,18 +877,34 @@
  *
  * Similar to css_for_each_descendant_pre() but performs post-order
  * traversal instead.  @root is included in the iteration and the last
- * node to be visited.  Note that the walk visibility guarantee described
- * in pre-order walk doesn't apply the same to post-order walks.
+ * node to be visited.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * Note that the walk visibility guarantee example described in pre-order
+ * walk doesn't apply the same to post-order walks.
  */
 #define css_for_each_descendant_post(pos, css)				\
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
 	     (pos) = css_next_descendant_post((pos), (css)))
 
+bool css_has_online_children(struct cgroup_subsys_state *css);
+
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
-	struct cgroup_subsys_state	*origin_css;
-	struct list_head		*cset_link;
-	struct list_head		*task;
+	struct cgroup_subsys		*ss;
+
+	struct list_head		*cset_pos;
+	struct list_head		*cset_head;
+
+	struct list_head		*task_pos;
+	struct list_head		*tasks_head;
+	struct list_head		*mg_tasks_head;
 };
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
@@ -849,8 +915,8 @@
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss);
 
 #else /* !CONFIG_CGROUPS */
 

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 768fe44..98c4f9b 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h

@@ -7,10 +7,6 @@
 SUBSYS(cpuset)
 #endif
 
-#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
-SUBSYS(debug)
-#endif
-
 #if IS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu)
 #endif
@@ -50,6 +46,13 @@
 #if IS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
+
+/*
+ * The following subsystems are not supported on the default hierarchy.
+ */
+#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
+SUBSYS(debug)
+#endif
 /*
  * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 64fdfe1..d5ad7b1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -383,7 +383,9 @@
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes	__attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline	__always_inline
 #else
 # define __kprobes
+# define nokprobe_inline	inline
 #endif
 #endif /* __LINUX_COMPILER_H */

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 3f45889..ec4112d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h

@@ -75,6 +75,7 @@
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
+	unsigned int		restore_freq; /* = policy->cur before transition */
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
@@ -221,11 +222,35 @@
 
 	/* define one out of two */
 	int	(*setpolicy)	(struct cpufreq_policy *policy);
+
+	/*
+	 * On failure, should always restore frequency to policy->restore_freq
+	 * (i.e. old freq).
+	 */
 	int	(*target)	(struct cpufreq_policy *policy,	/* Deprecated */
 				 unsigned int target_freq,
 				 unsigned int relation);
 	int	(*target_index)	(struct cpufreq_policy *policy,
 				 unsigned int index);
+	/*
+	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
+	 * unset.
+	 *
+	 * get_intermediate should return a stable intermediate frequency
+	 * platform wants to switch to and target_intermediate() should set CPU
+	 * to to that frequency, before jumping to the frequency corresponding
+	 * to 'index'. Core will take care of sending notifications and driver
+	 * doesn't have to handle them in target_intermediate() or
+	 * target_index().
+	 *
+	 * Drivers can return '0' from get_intermediate() in case they don't
+	 * wish to switch to intermediate frequency for some target frequency.
+	 * In that case core will directly call ->target_index().
+	 */
+	unsigned int (*get_intermediate)(struct cpufreq_policy *policy,
+					 unsigned int index);
+	int	(*target_intermediate)(struct cpufreq_policy *policy,
+				       unsigned int index);
 
 	/* should be defined, if possible */
 	unsigned int	(*get)	(unsigned int cpu);

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c51a436..25e0df6 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h

@@ -84,6 +84,7 @@
 };
 
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
 /**
  * cpuidle_get_last_residency - retrieves the last state's residency time

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2..2997af6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h

@@ -142,6 +142,13 @@
 	return 1;
 }
 
+static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	set_bit(0, cpumask_bits(dstp));
+
+	return 0;
+}
+
 #define for_each_cpu(cpu, mask)			\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)		\
@@ -192,6 +199,7 @@
 
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
 
 /**
  * for_each_cpu - iterate over every cpu in a mask
@@ -600,7 +608,7 @@
 static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 {
 	char *nl = strchr(buf, '\n');
-	int len = nl ? nl - buf : strlen(buf);
+	unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
 	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }

diff --git a/include/linux/crc7.h b/include/linux/crc7.h
index 1786e77..d590765 100644
--- a/include/linux/crc7.h
+++ b/include/linux/crc7.h

@@ -2,13 +2,13 @@
 #define _LINUX_CRC7_H
 #include <linux/types.h>
 
-extern const u8 crc7_syndrome_table[256];
+extern const u8 crc7_be_syndrome_table[256];
 
-static inline u8 crc7_byte(u8 crc, u8 data)
+static inline u8 crc7_be_byte(u8 crc, u8 data)
 {
-	return crc7_syndrome_table[(crc << 1) ^ data];
+	return crc7_be_syndrome_table[crc ^ data];
 }
 
-extern u8 crc7(u8 crc, const u8 *buffer, size_t len);
+extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len);
 
 #endif

diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h
new file mode 100644
index 0000000..7009b8b
--- /dev/null
+++ b/include/linux/dell-led.h

@@ -0,0 +1,10 @@
+#ifndef __DELL_LED_H__
+#define __DELL_LED_H__
+
+enum {
+	DELL_LED_MICMUTE,
+};
+
+int dell_app_wmi_led_set(int whichled, int on);
+
+#endif

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 63da56e..e1707de 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h

@@ -115,12 +115,6 @@
 
 void dm_error(const char *message);
 
-/*
- * Combine device limits.
- */
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data);
-
 struct dm_dev {
 	struct block_device *bdev;
 	fmode_t mode;
@@ -132,7 +126,7 @@
  * are opened/closed correctly.
  */
 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-						 struct dm_dev **result);
+		  struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
 /*
@@ -291,6 +285,7 @@
 	struct dm_io *io;
 	struct dm_target *ti;
 	unsigned target_bio_nr;
+	unsigned *len_ptr;
 	struct bio clone;
 };
 
@@ -401,6 +396,7 @@
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 72cb0dd..d2c5cc7 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h

@@ -292,7 +292,7 @@
 };
 
 /**
- * enum dma_slave_buswidth - defines bus with of the DMA slave
+ * enum dma_slave_buswidth - defines bus width of the DMA slave
  * device, source or target buses
  */
 enum dma_slave_buswidth {

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index df63bd3..4ff262e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h

@@ -144,7 +144,7 @@
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int elv_register(struct elevator_type *);
+extern int __init elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0a114d0..e658229 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h

@@ -154,13 +154,20 @@
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
+ * @get_rxfh_key_size: Get the size of the RX flow hash key.
+ *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
- * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
+ *	key.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
- * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
+ *	hash key.  In case only the indirection table or hash key is to be
+ *	changed, the other argument will be %NULL.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -232,9 +239,11 @@
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
+	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
-	int	(*get_rxfh_indir)(struct net_device *, u32 *);
-	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
+	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*set_rxfh)(struct net_device *, const u32 *indir,
+			    const u8 *key);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index df53e17..6ff0b0b 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h

@@ -19,6 +19,7 @@
 #define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
 
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
@@ -75,6 +76,7 @@
 	__le16 volume_name[512];	/* volume name */
 	__le32 extension_count;		/* # of extensions below */
 	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+	__le32 cp_payload;
 } __packed;
 
 /*
@@ -146,6 +148,9 @@
 #define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
 
+#define ADDRS_PER_PAGE(page, fi)	\
+	(IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
 #define	NODE_IND1_BLOCK		(DEF_ADDRS_PER_INODE + 3)
@@ -391,6 +396,9 @@
 /* MAX level for dir lookup */
 #define MAX_DIR_HASH_DEPTH	63
 
+/* MAX buckets in one level of dir */
+#define MAX_DIR_BUCKETS		(1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
+
 #define SIZE_OF_DIR_ENTRY	11	/* by byte */
 #define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
 					BITS_PER_BYTE)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 024fd03..a7e3c48 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h

@@ -37,21 +37,270 @@
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
 /* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	11
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+/* ArgX, context and stack frame pointer register positions. Note,
+ * Arg1, Arg2, Arg3, etc are used as argument mappings of function
+ * calls in BPF_CALL instruction.
+ */
+#define BPF_REG_ARG1	BPF_REG_1
+#define BPF_REG_ARG2	BPF_REG_2
+#define BPF_REG_ARG3	BPF_REG_3
+#define BPF_REG_ARG4	BPF_REG_4
+#define BPF_REG_ARG5	BPF_REG_5
+#define BPF_REG_CTX	BPF_REG_6
+#define BPF_REG_FP	BPF_REG_10
+
+/* Additional register mappings for converted user programs. */
+#define BPF_REG_A	BPF_REG_0
+#define BPF_REG_X	BPF_REG_7
+#define BPF_REG_TMP	BPF_REG_8
 
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* Arg1, context and stack frame pointer register positions. */
-#define ARG1_REG	1
-#define CTX_REG		6
-#define FP_REG		10
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, DST, LEN)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = LEN })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
+
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
+
+#define BPF_LD_IND(SIZE, SRC, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
+		.dst_reg = 0,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct sock_filter_int) {				\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_CALL,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((FUNC) - __bpf_call_base) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
+	((struct sock_filter_int) {				\
+		.code  = CODE,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_EXIT,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define bytes_to_bpf_size(bytes)				\
+({								\
+	int bpf_size = -EINVAL;					\
+								\
+	if (bytes == sizeof(u8))				\
+		bpf_size = BPF_B;				\
+	else if (bytes == sizeof(u16))				\
+		bpf_size = BPF_H;				\
+	else if (bytes == sizeof(u32))				\
+		bpf_size = BPF_W;				\
+	else if (bytes == sizeof(u64))				\
+		bpf_size = BPF_DW;				\
+								\
+	bpf_size;						\
+})
+
+/* Macro to invoke filter function. */
+#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
 struct sock_filter_int {
 	__u8	code;		/* opcode */
-	__u8	a_reg:4;	/* dest register */
-	__u8	x_reg:4;	/* source register */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
 	__s16	off;		/* signed offset */
 	__s32	imm;		/* signed immediate constant */
 };
@@ -97,21 +346,16 @@
 #define sk_filter_proglen(fprog)			\
 		(fprog->len * sizeof(fprog->filter[0]))
 
-#define SK_RUN_FILTER(filter, ctx)			\
-		(*filter->bpf_func)(ctx, filter->insnsi)
-
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni);
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni);
+void sk_filter_select_runtime(struct sk_filter *fp);
+void sk_filter_free(struct sk_filter *fp);
 
 int sk_convert_filter(struct sock_filter *prog, int len,
 		      struct sock_filter_int *new_prog, int *new_len);
 
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog);
+				struct sock_fprog_kern *fprog);
 void sk_unattached_filter_destroy(struct sk_filter *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
@@ -120,11 +364,48 @@
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 
 void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
+u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+void bpf_int_jit_compile(struct sk_filter *fp);
+
+#define BPF_ANC		BIT(15)
+
+static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
+{
+	BUG_ON(ftest->code & BPF_ANC);
+
+	switch (ftest->code) {
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+#define BPF_ANCILLARY(CODE)	case SKF_AD_OFF + SKF_AD_##CODE:	\
+				return BPF_ANC | SKF_AD_##CODE
+		switch (ftest->k) {
+		BPF_ANCILLARY(PROTOCOL);
+		BPF_ANCILLARY(PKTTYPE);
+		BPF_ANCILLARY(IFINDEX);
+		BPF_ANCILLARY(NLATTR);
+		BPF_ANCILLARY(NLATTR_NEST);
+		BPF_ANCILLARY(MARK);
+		BPF_ANCILLARY(QUEUE);
+		BPF_ANCILLARY(HATYPE);
+		BPF_ANCILLARY(RXHASH);
+		BPF_ANCILLARY(CPU);
+		BPF_ANCILLARY(ALU_XOR_X);
+		BPF_ANCILLARY(VLAN_TAG);
+		BPF_ANCILLARY(VLAN_TAG_PRESENT);
+		BPF_ANCILLARY(PAY_OFFSET);
+		BPF_ANCILLARY(RANDOM);
+		}
+		/* Fallthrough. */
+	default:
+		return ftest->code;
+	}
+}
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
@@ -144,85 +425,20 @@
 }
 #else
 #include <linux/slab.h>
+
 static inline void bpf_jit_compile(struct sk_filter *fp)
 {
 }
+
 static inline void bpf_jit_free(struct sk_filter *fp)
 {
 	kfree(fp);
 }
-#endif
+#endif /* CONFIG_BPF_JIT */
 
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
 }
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_MOD_K,
-	BPF_S_ALU_MOD_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_XOR_K,
-	BPF_S_ALU_XOR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-	BPF_S_ANC_ALU_XOR_X,
-	BPF_S_ANC_VLAN_TAG,
-	BPF_S_ANC_VLAN_TAG_PRESENT,
-	BPF_S_ANC_PAY_OFFSET,
-};
-
 #endif /* __LINUX_FILTER_H__ */

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3f46e4..338e6f7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -128,6 +128,10 @@
 #define FMODE_ATOMIC_POS	((__force fmode_t)0x8000)
 /* Write access to underlying fs */
 #define FMODE_WRITER		((__force fmode_t)0x10000)
+/* Has read method(s) */
+#define FMODE_CAN_READ          ((__force fmode_t)0x20000)
+/* Has write method(s) */
+#define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
@@ -343,8 +347,7 @@
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
 	/*
@@ -1448,6 +1451,8 @@
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+struct iov_iter;
+
 struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
@@ -1455,6 +1460,8 @@
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -2404,20 +2411,18 @@
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 		unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
-extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-		unsigned long *, loff_t, size_t, size_t);
+extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern int generic_segment_checks(const struct iovec *iov,
-		unsigned long *nr_segs, size_t *count, int access_flags);
+extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
-extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
+extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
 extern void block_sync_page(struct page *page);
@@ -2427,7 +2432,7 @@
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t default_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
@@ -2477,16 +2482,16 @@
 void dio_end_io(struct bio *bio, int error);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags);
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
-		struct inode *inode, const struct iovec *iov, loff_t offset,
-		unsigned long nr_segs, get_block_t get_block)
+		struct inode *inode, struct iov_iter *iter, loff_t offset,
+		get_block_t get_block)
 {
-	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				    offset, nr_segs, get_block, NULL, NULL,
+	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
+				    offset, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 #endif

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2018751..404a686 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h

@@ -62,9 +62,6 @@
  * set in the flags member.
  *
  * ENABLED - set/unset when ftrace_ops is registered/unregistered
- * GLOBAL  - set manualy by ftrace_ops user to denote the ftrace_ops
- *           is part of the global tracers sharing the same filter
- *           via set_ftrace_* debugfs files.
  * DYNAMIC - set when ftrace_ops is registered to denote dynamically
  *           allocated ftrace_ops which need special care
  * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
@@ -96,15 +93,14 @@
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
-	FTRACE_OPS_FL_GLOBAL			= 1 << 1,
-	FTRACE_OPS_FL_DYNAMIC			= 1 << 2,
-	FTRACE_OPS_FL_CONTROL			= 1 << 3,
-	FTRACE_OPS_FL_SAVE_REGS			= 1 << 4,
-	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 5,
-	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 6,
-	FTRACE_OPS_FL_STUB			= 1 << 7,
-	FTRACE_OPS_FL_INITIALIZED		= 1 << 8,
-	FTRACE_OPS_FL_DELETED			= 1 << 9,
+	FTRACE_OPS_FL_DYNAMIC			= 1 << 1,
+	FTRACE_OPS_FL_CONTROL			= 1 << 2,
+	FTRACE_OPS_FL_SAVE_REGS			= 1 << 3,
+	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 4,
+	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 5,
+	FTRACE_OPS_FL_STUB			= 1 << 6,
+	FTRACE_OPS_FL_INITIALIZED		= 1 << 7,
+	FTRACE_OPS_FL_DELETED			= 1 << 8,
 };
 
 /*
@@ -366,14 +362,12 @@
  *  IGNORE           - The function is already what we want it to be
  *  MAKE_CALL        - Start tracing the function
  *  MODIFY_CALL      - Stop saving regs for the function
- *  MODIFY_CALL_REGS - Start saving regs for the function
  *  MAKE_NOP         - Stop tracing the function
  */
 enum {
 	FTRACE_UPDATE_IGNORE,
 	FTRACE_UPDATE_MAKE_CALL,
 	FTRACE_UPDATE_MODIFY_CALL,
-	FTRACE_UPDATE_MODIFY_CALL_REGS,
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
@@ -404,6 +398,8 @@
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
 unsigned long ftrace_location(unsigned long ip);
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
 
 extern ftrace_func_t ftrace_trace_function;
 

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d16da3e..cff3106 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h

@@ -38,6 +38,9 @@
 								 *symbol_array);
 #endif
 
+const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				     unsigned int bitmask_size);
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index a7e977f..8b62246 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h

@@ -3,29 +3,53 @@
 
 struct device;
 
+/**
+ * struct gpio_keys_button - configuration parameters
+ * @code:		input event code (KEY_*, SW_*)
+ * @gpio:		%-1 if this key does not support gpio
+ * @active_low:		%true indicates that button is considered
+ *			depressed when gpio is low
+ * @desc:		label that will be attached to button's gpio
+ * @type:		input event type (%EV_KEY, %EV_SW, %EV_ABS)
+ * @wakeup:		configure the button as a wake-up source
+ * @debounce_interval:	debounce ticks interval in msecs
+ * @can_disable:	%true indicates that userspace is allowed to
+ *			disable button via sysfs
+ * @value:		axis value for %EV_ABS
+ * @irq:		Irq number in case of interrupt keys
+ */
 struct gpio_keys_button {
-	/* Configuration parameters */
-	unsigned int code;	/* input event code (KEY_*, SW_*) */
-	int gpio;		/* -1 if this key does not support gpio */
+	unsigned int code;
+	int gpio;
 	int active_low;
 	const char *desc;
-	unsigned int type;	/* input event type (EV_KEY, EV_SW, EV_ABS) */
-	int wakeup;		/* configure the button as a wake-up source */
-	int debounce_interval;	/* debounce ticks interval in msecs */
+	unsigned int type;
+	int wakeup;
+	int debounce_interval;
 	bool can_disable;
-	int value;		/* axis value for EV_ABS */
-	unsigned int irq;	/* Irq number in case of interrupt keys */
+	int value;
+	unsigned int irq;
 };
 
+/**
+ * struct gpio_keys_platform_data - platform data for gpio_keys driver
+ * @buttons:		pointer to array of &gpio_keys_button structures
+ *			describing buttons attached to the device
+ * @nbuttons:		number of elements in @buttons array
+ * @poll_interval:	polling interval in msecs - for polling driver only
+ * @rep:		enable input subsystem auto repeat
+ * @enable:		platform hook for enabling the device
+ * @disable:		platform hook for disabling the device
+ * @name:		input device name
+ */
 struct gpio_keys_platform_data {
 	struct gpio_keys_button *buttons;
 	int nbuttons;
-	unsigned int poll_interval;	/* polling interval in msecs -
-					   for polling driver only */
-	unsigned int rep:1;		/* enable input subsystem auto repeat */
+	unsigned int poll_interval;
+	unsigned int rep:1;
 	int (*enable)(struct device *dev);
 	void (*disable)(struct device *dev);
-	const char *name;		/* input device name */
+	const char *name;
 };
 
 #endif

diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 99e379b..3891dc1 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h

@@ -15,35 +15,14 @@
 
 #include <linux/types.h>
 
-/* For key_map array */
-#define MXT_NUM_GPIO		4
-
-/* Orient */
-#define MXT_NORMAL		0x0
-#define MXT_DIAGONAL		0x1
-#define MXT_HORIZONTAL_FLIP	0x2
-#define MXT_ROTATED_90_COUNTER	0x3
-#define MXT_VERTICAL_FLIP	0x4
-#define MXT_ROTATED_90		0x5
-#define MXT_ROTATED_180		0x6
-#define MXT_DIAGONAL_COUNTER	0x7
-
 /* The platform data for the Atmel maXTouch touchscreen driver */
 struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
-
-	unsigned int x_line;
-	unsigned int y_line;
-	unsigned int x_size;
-	unsigned int y_size;
-	unsigned int blen;
-	unsigned int threshold;
-	unsigned int voltage;
-	unsigned char orient;
+	u32 config_crc;
 	unsigned long irqflags;
-	bool is_tp;
-	const unsigned int key_map[MXT_NUM_GPIO];
+	u8 t19_num_keys;
+	const unsigned int *t19_keymap;
 };
 
 #endif /* __LINUX_ATMEL_MXT_TS_H */

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index d2b1670..8cfb50f 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h

@@ -498,7 +498,10 @@
 #define RES_GRP_ALL		0x7	/* All resource groups */
 
 #define RES_TYPE2_R0		0x0
+#define RES_TYPE2_R1		0x1
+#define RES_TYPE2_R2		0x2
 
+#define RES_TYPE_R0		0x0
 #define RES_TYPE_ALL		0x7
 
 /* Resource states */
@@ -671,6 +674,7 @@
 	struct twl4030_script **scripts;
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
+	struct twl4030_resconfig *board_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
 	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f194ccb..6bff13f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h

@@ -1711,6 +1711,7 @@
 	WLAN_EID_RRM_ENABLED_CAPABILITIES = 70,
 	WLAN_EID_MULTIPLE_BSSID = 71,
 	WLAN_EID_BSS_COEX_2040 = 72,
+	WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73,
 	WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74,
 	WLAN_EID_RIC_DESCRIPTOR = 75,
 	WLAN_EID_MMIE = 76,

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 1085ffe..fd22789 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h

@@ -16,9 +16,28 @@
 #include <linux/netdevice.h>
 #include <uapi/linux/if_bridge.h>
 
+struct br_ip {
+	union {
+		__be32	ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr ip6;
+#endif
+	} u;
+	__be16		proto;
+	__u16           vid;
+};
+
+struct br_ip_list {
+	struct list_head list;
+	struct br_ip addr;
+};
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list);
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 
 #endif

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index a86784d..119130e 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h

@@ -10,8 +10,9 @@
 	__u8 mac[32];
 	__u32 vlan;
 	__u32 qos;
-	__u32 tx_rate;
 	__u32 spoofchk;
 	__u32 linkstate;
+	__u32 min_tx_rate;
+	__u32 max_tx_rate;
 };
 #endif /* _LINUX_IF_LINK_H */

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a9a53b1..6b2c7cf 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h

@@ -57,6 +57,9 @@
 	netdev_features_t	tap_features;
 	int			minor;
 	int			nest_level;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll		*netpoll;
+#endif
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index b2acc4a..4967916 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h

@@ -106,7 +106,7 @@
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 
-extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
+extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
@@ -206,7 +206,7 @@
 }
 #else
 static inline struct net_device *
-__vlan_find_dev_deep(struct net_device *real_dev,
+__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 		     __be16 vlan_proto, u16 vlan_id)
 {
 	return NULL;

diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h
index ce0b724..2465182 100644
--- a/include/linux/input-polldev.h
+++ b/include/linux/input-polldev.h

@@ -48,9 +48,12 @@
 
 /* private: */
 	struct delayed_work work;
+
+	bool devres_managed;
 };
 
 struct input_polled_dev *input_allocate_polled_device(void);
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev);
 void input_free_polled_device(struct input_polled_dev *dev);
 int input_register_polled_device(struct input_polled_dev *dev);
 void input_unregister_polled_device(struct input_polled_dev *dev);

diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 7163d91..160cf35 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h

@@ -1,10 +1,52 @@
 #ifndef	_PIXCIR_I2C_TS_H
 #define	_PIXCIR_I2C_TS_H
 
+/*
+ * Register map
+ */
+#define PIXCIR_REG_POWER_MODE	51
+#define PIXCIR_REG_INT_MODE	52
+
+/*
+ * Power modes:
+ * active: max scan speed
+ * idle: lower scan speed with automatic transition to active on touch
+ * halt: datasheet says sleep but this is more like halt as the chip
+ *       clocks are cut and it can only be brought out of this mode
+ *	 using the RESET pin.
+ */
+enum pixcir_power_mode {
+	PIXCIR_POWER_ACTIVE,
+	PIXCIR_POWER_IDLE,
+	PIXCIR_POWER_HALT,
+};
+
+#define PIXCIR_POWER_MODE_MASK	0x03
+#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2)
+
+/*
+ * Interrupt modes:
+ * periodical: interrupt is asserted periodicaly
+ * diff coordinates: interrupt is asserted when coordinates change
+ * level on touch: interrupt level asserted during touch
+ * pulse on touch: interrupt pulse asserted druing touch
+ *
+ */
+enum pixcir_int_mode {
+	PIXCIR_INT_PERIODICAL,
+	PIXCIR_INT_DIFF_COORD,
+	PIXCIR_INT_LEVEL_TOUCH,
+	PIXCIR_INT_PULSE_TOUCH,
+};
+
+#define PIXCIR_INT_MODE_MASK	0x03
+#define PIXCIR_INT_ENABLE	(1UL << 3)
+#define PIXCIR_INT_POL_HIGH	(1UL << 2)
+
 struct pixcir_ts_platform_data {
-	int (*attb_read_val)(void);
 	int x_max;
 	int y_max;
+	int gpio_attb;		/* GPIO connected to ATTB line */
 };
 
 #endif

diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h
new file mode 100644
index 0000000..08a5ef6
--- /dev/null
+++ b/include/linux/input/touchscreen.h

@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _TOUCHSCREEN_H
+#define _TOUCHSCREEN_H
+
+#include <linux/input.h>
+
+#ifdef CONFIG_OF
+void touchscreen_parse_of_params(struct input_dev *dev);
+#else
+static inline void touchscreen_parse_of_params(struct input_dev *dev)
+{
+}
+#endif
+
+#endif

diff --git a/include/linux/isdn/capiutil.h b/include/linux/isdn/capiutil.h
index 5a52f2c..44bd604 100644
--- a/include/linux/isdn/capiutil.h
+++ b/include/linux/isdn/capiutil.h

@@ -164,11 +164,6 @@
 			  __u8 _Command, __u8 _Subcommand,
 			  __u16 _Messagenumber, __u32 _Controller);
 
-/*
- * capi_info2str generated a readable string for Capi2.0 reasons.
- */
-char *capi_info2str(__u16 reason);
-
 /*-----------------------------------------------------------------------*/
 
 /*

diff --git a/include/linux/key.h b/include/linux/key.h
index 3ae45f0..017b082 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h

@@ -309,6 +309,17 @@
 
 extern void key_set_timeout(struct key *, unsigned);
 
+/*
+ * The permissions required on a key that we're looking up.
+ */
+#define	KEY_NEED_VIEW	0x01	/* Require permission to view attributes */
+#define	KEY_NEED_READ	0x02	/* Require permission to read content */
+#define	KEY_NEED_WRITE	0x04	/* Require permission to update / modify */
+#define	KEY_NEED_SEARCH	0x08	/* Require permission to search (keyring) or find (key) */
+#define	KEY_NEED_LINK	0x10	/* Require permission to link */
+#define	KEY_NEED_SETATTR 0x20	/* Require permission to change attributes */
+#define	KEY_NEED_ALL	0x3f	/* All the above permissions */
+
 /**
  * key_is_instantiated - Determine if a key has been positively instantiated
  * @key: The key to check.

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 925eaf2..f7296e5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h

@@ -205,10 +205,10 @@
 	void *addr;
 };
 
-struct kprobe_blackpoint {
-	const char *name;
+struct kprobe_blacklist_entry {
+	struct list_head list;
 	unsigned long start_addr;
-	unsigned long range;
+	unsigned long end_addr;
 };
 
 #ifdef CONFIG_KPROBES
@@ -265,6 +265,7 @@
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
@@ -355,7 +356,7 @@
 
 static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
 {
-	return (&__get_cpu_var(kprobe_ctlblk));
+	return this_cpu_ptr(&kprobe_ctlblk);
 }
 
 int register_kprobe(struct kprobe *p);
@@ -476,4 +477,18 @@
 	return enable_kprobe(&jp->kp);
 }
 
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+#define __NOKPROBE_SYMBOL(fname)			\
+static unsigned long __used				\
+	__attribute__((section("_kprobe_blacklist")))	\
+	_kbl_addr_##fname = (unsigned long)fname;
+#define NOKPROBE_SYMBOL(fname)	__NOKPROBE_SYMBOL(fname)
+#else
+#define NOKPROBE_SYMBOL(fname)
+#endif
+
 #endif /* _LINUX_KPROBES_H */

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 31c0cd1..de9e46e 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h

@@ -304,6 +304,30 @@
 	return 0;
 }
 
+/**
+ * ktime_after - Compare if a ktime_t value is bigger than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened after cmp2.
+ */
+static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) > 0;
+}
+
+/**
+ * ktime_before - Compare if a ktime_t value is smaller than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened before cmp2.
+ */
+static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) < 0;
+}
+
 static inline s64 ktime_to_us(const ktime_t kt)
 {
 	struct timeval tv = ktime_to_timeval(kt);

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 970c681..ec4e3bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h

@@ -586,7 +586,7 @@
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
+int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dcaad79..219d796 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h

@@ -17,13 +17,13 @@
 #include <linux/fs.h>
 #include <linux/kref.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
 #ifdef CONFIG_LOCKD_V4
 #include <linux/lockd/xdr4.h>
 #endif
 #include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Version string

diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 3e02b76..b6401e7 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h

@@ -18,38 +18,38 @@
 #ifndef __LINUX_MFD_SEC_RTC_H
 #define __LINUX_MFD_SEC_RTC_H
 
-enum sec_rtc_reg {
-	SEC_RTC_SEC,
-	SEC_RTC_MIN,
-	SEC_RTC_HOUR,
-	SEC_RTC_WEEKDAY,
-	SEC_RTC_DATE,
-	SEC_RTC_MONTH,
-	SEC_RTC_YEAR1,
-	SEC_RTC_YEAR2,
-	SEC_ALARM0_SEC,
-	SEC_ALARM0_MIN,
-	SEC_ALARM0_HOUR,
-	SEC_ALARM0_WEEKDAY,
-	SEC_ALARM0_DATE,
-	SEC_ALARM0_MONTH,
-	SEC_ALARM0_YEAR1,
-	SEC_ALARM0_YEAR2,
-	SEC_ALARM1_SEC,
-	SEC_ALARM1_MIN,
-	SEC_ALARM1_HOUR,
-	SEC_ALARM1_WEEKDAY,
-	SEC_ALARM1_DATE,
-	SEC_ALARM1_MONTH,
-	SEC_ALARM1_YEAR1,
-	SEC_ALARM1_YEAR2,
-	SEC_ALARM0_CONF,
-	SEC_ALARM1_CONF,
-	SEC_RTC_STATUS,
-	SEC_WTSR_SMPL_CNTL,
-	SEC_RTC_UDR_CON,
+enum s5m_rtc_reg {
+	S5M_RTC_SEC,
+	S5M_RTC_MIN,
+	S5M_RTC_HOUR,
+	S5M_RTC_WEEKDAY,
+	S5M_RTC_DATE,
+	S5M_RTC_MONTH,
+	S5M_RTC_YEAR1,
+	S5M_RTC_YEAR2,
+	S5M_ALARM0_SEC,
+	S5M_ALARM0_MIN,
+	S5M_ALARM0_HOUR,
+	S5M_ALARM0_WEEKDAY,
+	S5M_ALARM0_DATE,
+	S5M_ALARM0_MONTH,
+	S5M_ALARM0_YEAR1,
+	S5M_ALARM0_YEAR2,
+	S5M_ALARM1_SEC,
+	S5M_ALARM1_MIN,
+	S5M_ALARM1_HOUR,
+	S5M_ALARM1_WEEKDAY,
+	S5M_ALARM1_DATE,
+	S5M_ALARM1_MONTH,
+	S5M_ALARM1_YEAR1,
+	S5M_ALARM1_YEAR2,
+	S5M_ALARM0_CONF,
+	S5M_ALARM1_CONF,
+	S5M_RTC_STATUS,
+	S5M_WTSR_SMPL_CNTL,
+	S5M_RTC_UDR_CON,
 
-	SEC_RTC_REG_MAX,
+	S5M_RTC_REG_MAX,
 };
 
 enum s2mps_rtc_reg {
@@ -88,9 +88,9 @@
 #define HOUR_12			(1 << 7)
 #define HOUR_AMPM		(1 << 6)
 #define HOUR_PM			(1 << 5)
-#define ALARM0_STATUS		(1 << 1)
-#define ALARM1_STATUS		(1 << 2)
-#define UPDATE_AD		(1 << 0)
+#define S5M_ALARM0_STATUS	(1 << 1)
+#define S5M_ALARM1_STATUS	(1 << 2)
+#define S5M_UPDATE_AD		(1 << 0)
 
 #define S2MPS_ALARM0_STATUS	(1 << 2)
 #define S2MPS_ALARM1_STATUS	(1 << 1)
@@ -101,16 +101,26 @@
 #define MODEL24_SHIFT		1
 #define MODEL24_MASK		(1 << MODEL24_SHIFT)
 /* RTC Update Register1 */
-#define RTC_UDR_SHIFT		0
-#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+#define S5M_RTC_UDR_SHIFT	0
+#define S5M_RTC_UDR_MASK	(1 << S5M_RTC_UDR_SHIFT)
 #define S2MPS_RTC_WUDR_SHIFT	4
 #define S2MPS_RTC_WUDR_MASK	(1 << S2MPS_RTC_WUDR_SHIFT)
 #define S2MPS_RTC_RUDR_SHIFT	0
 #define S2MPS_RTC_RUDR_MASK	(1 << S2MPS_RTC_RUDR_SHIFT)
 #define RTC_TCON_SHIFT		1
 #define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
-#define RTC_TIME_EN_SHIFT	3
-#define RTC_TIME_EN_MASK	(1 << RTC_TIME_EN_SHIFT)
+#define S5M_RTC_TIME_EN_SHIFT	3
+#define S5M_RTC_TIME_EN_MASK	(1 << S5M_RTC_TIME_EN_SHIFT)
+/*
+ * UDR_T field in S5M_RTC_UDR_CON register determines the time needed
+ * for updating alarm and time registers. Default is 7.32 ms.
+ */
+#define S5M_RTC_UDR_T_SHIFT	6
+#define S5M_RTC_UDR_T_MASK	(0x3 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_7320_US	(0x0 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_1830_US	(0x1 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_3660_US	(0x2 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_450_US	(0x3 << S5M_RTC_UDR_T_SHIFT)
 
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd2..b12f4bb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h

@@ -401,6 +401,7 @@
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
+	u32			*qp0_qkey;
 	u32			*qp0_proxy;
 	u32			*qp1_proxy;
 	u32			*qp0_tunnel;
@@ -449,7 +450,6 @@
 	int                     reserved_qps_base[MLX4_NUM_QP_REGION];
 	int                     log_num_macs;
 	int                     log_num_vlans;
-	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
 	u8                      suggested_type[MLX4_MAX_PORTS + 1];
@@ -577,6 +577,9 @@
 
 	u32			cons_index;
 
+	u16                     irq;
+	bool                    irq_affinity_change;
+
 	__be32		       *set_ci_db;
 	__be32		       *arm_db;
 	int			arm_sn;
@@ -837,7 +840,7 @@
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf);
+		   struct mlx4_buf *buf, gfp_t gfp);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -874,9 +877,10 @@
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf);
+		       struct mlx4_buf *buf, gfp_t gfp);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
+		  gfp_t gfp);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -892,7 +896,8 @@
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
+		  gfp_t gfp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
@@ -1234,4 +1239,8 @@
 int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enable);
 #endif /* MLX4_DEVICE_H */

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 93cef63..2bce4aa 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h

@@ -427,7 +427,6 @@
 	u64			size;
 	u32			key;
 	u32			pd;
-	u32			access;
 };
 
 struct mlx5_core_srq {

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index b730272..d424b9d 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h

@@ -63,12 +63,12 @@
 	unsigned int            power_off_longtime;     /* Units: ms */
 	u8			power_off_notification;	/* state */
 	unsigned int		hs_max_dtr;
+	unsigned int		hs200_max_dtr;
 #define MMC_HIGH_26_MAX_DTR	26000000
 #define MMC_HIGH_52_MAX_DTR	52000000
 #define MMC_HIGH_DDR_MAX_DTR	52000000
 #define MMC_HS200_MAX_DTR	200000000
 	unsigned int		sectors;
-	unsigned int		card_type;
 	unsigned int		hc_erase_size;		/* In sectors */
 	unsigned int		hc_erase_timeout;	/* In milliseconds */
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
@@ -110,6 +110,7 @@
 	u8			raw_pwr_cl_200_360;	/* 237 */
 	u8			raw_pwr_cl_ddr_52_195;	/* 238 */
 	u8			raw_pwr_cl_ddr_52_360;	/* 239 */
+	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
 
@@ -194,6 +195,7 @@
 };
 
 struct mmc_host;
+struct mmc_ios;
 struct sdio_func;
 struct sdio_func_tuple;
 
@@ -250,15 +252,11 @@
 	unsigned int		state;		/* (our) card state */
 #define MMC_STATE_PRESENT	(1<<0)		/* present in sysfs */
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
-#define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
-#define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
-#define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
-#define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
-#define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
-#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
-#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
-#define MMC_STATE_DOING_BKOPS	(1<<10)		/* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED	(1<<11)		/* card is suspended */
+#define MMC_STATE_BLOCKADDR	(1<<2)		/* card uses block-addressing */
+#define MMC_CARD_SDXC		(1<<3)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<4)		/* card has been removed */
+#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing BKOPS */
+#define MMC_STATE_SUSPENDED	(1<<6)		/* card is suspended */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -301,6 +299,7 @@
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
 
 	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
+	unsigned int		mmc_avail_type;	/* supported device type by both host and card */
 
 	struct dentry		*debugfs_root;
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
@@ -353,7 +352,7 @@
 #define CID_OEMID_ANY ((unsigned short) -1)
 #define CID_NAME_ANY (NULL)
 
-#define END_FIXUP { 0 }
+#define END_FIXUP { NULL }
 
 #define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
 		   _cis_vendor, _cis_device,				\
@@ -418,11 +417,7 @@
 
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
-#define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
-#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
-#define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
@@ -430,11 +425,7 @@
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
-#define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
-#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
-#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 #define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6ce7d2c..babaea9 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h

@@ -248,20 +248,6 @@
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
 
-	int (*init)(u32 slot_id, irq_handler_t , void *);
-	int (*get_ro)(u32 slot_id);
-	int (*get_cd)(u32 slot_id);
-	int (*get_ocr)(u32 slot_id);
-	int (*get_bus_wd)(u32 slot_id);
-	/*
-	 * Enable power to selected slot and set voltage to desired level.
-	 * Voltage levels are specified using MMC_VDD_xxx defines defined
-	 * in linux/mmc/host.h file.
-	 */
-	void (*setpower)(u32 slot_id, u32 volt);
-	void (*exit)(u32 slot_id);
-	void (*select_slot)(u32 slot_id);
-
 	struct dw_mci_dma_ops *dma_ops;
 	struct dma_pdata *data;
 	struct block_settings *blk_settings;

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cb61ea4..7960424 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h

@@ -17,6 +17,7 @@
 #include <linux/fault-inject.h>
 
 #include <linux/mmc/core.h>
+#include <linux/mmc/card.h>
 #include <linux/mmc/pm.h>
 
 struct mmc_ios {
@@ -58,13 +59,9 @@
 #define MMC_TIMING_UHS_SDR50	5
 #define MMC_TIMING_UHS_SDR104	6
 #define MMC_TIMING_UHS_DDR50	7
-#define MMC_TIMING_MMC_HS200	8
-
-#define MMC_SDR_MODE		0
-#define MMC_1_2V_DDR_MODE	1
-#define MMC_1_8V_DDR_MODE	2
-#define MMC_1_2V_SDR_MODE	3
-#define MMC_1_8V_SDR_MODE	4
+#define MMC_TIMING_MMC_DDR52	8
+#define MMC_TIMING_MMC_HS200	9
+#define MMC_TIMING_MMC_HS400	10
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -136,6 +133,9 @@
 
 	/* The tuning command opcode value is different for SD and eMMC cards */
 	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
+
+	/* Prepare HS400 target operating frequency depending host driver */
+	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
@@ -278,6 +278,11 @@
 #define MMC_CAP2_PACKED_CMD	(MMC_CAP2_PACKED_RD | \
 				 MMC_CAP2_PACKED_WR)
 #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14)	/* Don't power up before scan */
+#define MMC_CAP2_HS400_1_8V	(1 << 15)	/* Can support HS400 1.8V */
+#define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
+#define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
+				 MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -318,6 +323,8 @@
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	bool			trigger_card_event; /* card_event necessary */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
@@ -391,12 +398,13 @@
 	wake_up_process(host->sdio_irq_thread);
 }
 
+void sdio_run_irqs(struct mmc_host *host);
+
 #ifdef CONFIG_REGULATOR
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
-int mmc_regulator_get_supply(struct mmc_host *mmc);
 #else
 static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
 {
@@ -409,13 +417,10 @@
 {
 	return 0;
 }
-
-static inline int mmc_regulator_get_supply(struct mmc_host *mmc)
-{
-	return 0;
-}
 #endif
 
+int mmc_regulator_get_supply(struct mmc_host *mmc);
+
 int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *);
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
@@ -475,4 +480,32 @@
 	return host->ios.clock;
 }
 #endif
+
+static inline int mmc_card_hs(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_SD_HS ||
+		card->host->ios.timing == MMC_TIMING_MMC_HS;
+}
+
+static inline int mmc_card_uhs(struct mmc_card *card)
+{
+	return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 &&
+		card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
+}
+
+static inline bool mmc_card_hs200(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS200;
+}
+
+static inline bool mmc_card_ddr52(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
+}
+
+static inline bool mmc_card_hs400(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS400;
+}
+
 #endif /* LINUX_MMC_HOST_H */

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 50bcde3..64ec963 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h

@@ -325,6 +325,7 @@
 #define EXT_CSD_POWER_OFF_LONG_TIME	247	/* RO */
 #define EXT_CSD_GENERIC_CMD6_TIME	248	/* RO */
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
+#define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
 #define EXT_CSD_MAX_PACKED_WRITES	500	/* RO */
@@ -354,18 +355,25 @@
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
 
-#define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
-#define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_HS_26	(1<<0)	/* Card can run at 26MHz */
+#define EXT_CSD_CARD_TYPE_HS_52	(1<<1)	/* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_HS	(EXT_CSD_CARD_TYPE_HS_26 | \
+				 EXT_CSD_CARD_TYPE_HS_52)
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
-#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
-#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_2V	(1<<5)	/* Card can run at 200MHz */
 						/* SDR mode @1.2V I/O */
+#define EXT_CSD_CARD_TYPE_HS200		(EXT_CSD_CARD_TYPE_HS200_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS200_1_2V)
+#define EXT_CSD_CARD_TYPE_HS400_1_8V	(1<<6)	/* Card can run at 200MHz DDR, 1.8V */
+#define EXT_CSD_CARD_TYPE_HS400_1_2V	(1<<7)	/* Card can run at 200MHz DDR, 1.2V */
+#define EXT_CSD_CARD_TYPE_HS400		(EXT_CSD_CARD_TYPE_HS400_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS400_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
@@ -373,6 +381,11 @@
 #define EXT_CSD_DDR_BUS_WIDTH_4	5	/* Card is in 4 bit DDR mode */
 #define EXT_CSD_DDR_BUS_WIDTH_8	6	/* Card is in 8 bit DDR mode */
 
+#define EXT_CSD_TIMING_BC	0	/* Backwards compatility */
+#define EXT_CSD_TIMING_HS	1	/* High speed */
+#define EXT_CSD_TIMING_HS200	2	/* HS200 */
+#define EXT_CSD_TIMING_HS400	3	/* HS400 */
+
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
 #define EXT_CSD_SEC_GB_CL_EN	BIT(4)

diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7be12b8..08abe99 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h

@@ -57,12 +57,8 @@
 #define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
 /* Controller reports inverted write-protect state */
 #define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
-/* Controller has nonstandard clock management */
-#define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
 /* Controller does not like fast PIO transfers */
 #define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
-/* Controller losing signal/interrupt enable states after reset */
-#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET		(1<<19)
 /* Controller has to be forced to use block size of 2048 bytes */
 #define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
 /* Controller cannot do multi-block transfers */
@@ -147,6 +143,7 @@
 
 	bool runtime_suspended;	/* Host is runtime suspended */
 	bool bus_on;		/* Bus power prevents runtime suspend */
+	bool preset_enabled;	/* Preset is enabled */
 
 	struct mmc_request *mrq;	/* Current request */
 	struct mmc_command *cmd;	/* Current command */
@@ -164,8 +161,7 @@
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
-	struct tasklet_struct card_tasklet;	/* Tasklet structures */
-	struct tasklet_struct finish_tasklet;
+	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
 
 	struct timer_list timer;	/* Timer for timeouts */
 
@@ -177,6 +173,13 @@
 	unsigned int            ocr_avail_mmc;
 	u32 ocr_mask;		/* available voltages */
 
+	unsigned		timing;		/* Current timing */
+
+	u32			thread_isr;
+
+	/* cached registers */
+	u32			ier;
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 204a677..b1990c5 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h

@@ -321,7 +321,7 @@
 extern bool parameqn(const char *name1, const char *name2, size_t n);
 
 /* Called on module insert or kernel boot */
-extern int parse_args(const char *name,
+extern char *parse_args(const char *name,
 		      char *args,
 		      const struct kernel_param *params,
 		      unsigned num,

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 450d61e..2f0af28 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h

@@ -176,6 +176,11 @@
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
 /*
+ * This option could be defined by controller drivers to protect against
+ * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers
+ */
+#define NAND_USE_BOUNCE_BUFFER	0x00080000
+/*
  * Autodetect nand buswidth with readid/onfi.
  * This suppose the driver will configure the hardware in 8 bits mode
  * when calling nand_scan_ident, and update its configuration
@@ -552,8 +557,7 @@
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
  * @hwcontrol:		platform-specific hardware control structure
- * @erase_cmd:		[INTERN] erase command write function, selectable due
- *			to AND support.
+ * @erase:		[REPLACEABLE] erase function
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
@@ -637,7 +641,7 @@
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
-	void (*erase_cmd)(struct mtd_info *mtd, int page);
+	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);

diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
index b730d4f..42ff7ff 100644
--- a/include/linux/mtd/pfow.h
+++ b/include/linux/mtd/pfow.h

@@ -101,9 +101,6 @@
 				unsigned long len, map_word *datum)
 {
 	int bits_per_chip = map_bankwidth(map) * 8;
-	int chipnum;
-	struct lpddr_private *lpddr = map->fldrv_priv;
-	chipnum = adr >> lpddr->chipshift;
 
 	map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE);
 	map_write(map, CMD(adr & ((1<<bits_per_chip) - 1)),

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
new file mode 100644
index 0000000..5324184
--- /dev/null
+++ b/include/linux/mtd/spi-nor.h

@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2014 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_MTD_SPI_NOR_H
+#define __LINUX_MTD_SPI_NOR_H
+
+/*
+ * Note on opcode nomenclature: some opcodes have a format like
+ * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number
+ * of I/O lines used for the opcode, address, and data (respectively). The
+ * FUNCTION has an optional suffix of '4', to represent an opcode which
+ * requires a 4-byte (32-bit) address.
+ */
+
+/* Flash opcodes. */
+#define SPINOR_OP_WREN		0x06	/* Write enable */
+#define SPINOR_OP_RDSR		0x05	/* Read status register */
+#define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
+#define SPINOR_OP_READ		0x03	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ_FAST	0x0b	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
+#define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
+#define SPINOR_OP_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
+#define SPINOR_OP_RDCR		0x35	/* Read configuration register */
+
+/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
+#define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ4_FAST	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ4_1_1_2	0x3c	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ4_1_1_4	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+
+/* Used for SST flashes only. */
+#define SPINOR_OP_BP		0x02	/* Byte program */
+#define SPINOR_OP_WRDI		0x04	/* Write disable */
+#define SPINOR_OP_AAI_WP	0xad	/* Auto address increment word program */
+
+/* Used for Macronix and Winbond flashes. */
+#define SPINOR_OP_EN4B		0xb7	/* Enter 4-byte mode */
+#define SPINOR_OP_EX4B		0xe9	/* Exit 4-byte mode */
+
+/* Used for Spansion flashes only. */
+#define SPINOR_OP_BRWR		0x17	/* Bank register write */
+
+/* Status Register bits. */
+#define SR_WIP			1	/* Write in progress */
+#define SR_WEL			2	/* Write enable latch */
+/* meaning of other SR_* bits may differ between vendors */
+#define SR_BP0			4	/* Block protect 0 */
+#define SR_BP1			8	/* Block protect 1 */
+#define SR_BP2			0x10	/* Block protect 2 */
+#define SR_SRWD			0x80	/* SR write protect */
+
+#define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
+
+/* Configuration Register bits. */
+#define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
+
+enum read_mode {
+	SPI_NOR_NORMAL = 0,
+	SPI_NOR_FAST,
+	SPI_NOR_DUAL,
+	SPI_NOR_QUAD,
+};
+
+/**
+ * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer
+ * @wren:		command for "Write Enable", or 0x00 for not required
+ * @cmd:		command for operation
+ * @cmd_pins:		number of pins to send @cmd (1, 2, 4)
+ * @addr:		address for operation
+ * @addr_pins:		number of pins to send @addr (1, 2, 4)
+ * @addr_width:		number of address bytes
+ *			(3,4, or 0 for address not required)
+ * @mode:		mode data
+ * @mode_pins:		number of pins to send @mode (1, 2, 4)
+ * @mode_cycles:	number of mode cycles (0 for mode not required)
+ * @dummy_cycles:	number of dummy cycles (0 for dummy not required)
+ */
+struct spi_nor_xfer_cfg {
+	u8		wren;
+	u8		cmd;
+	u8		cmd_pins;
+	u32		addr;
+	u8		addr_pins;
+	u8		addr_width;
+	u8		mode;
+	u8		mode_pins;
+	u8		mode_cycles;
+	u8		dummy_cycles;
+};
+
+#define SPI_NOR_MAX_CMD_SIZE	8
+enum spi_nor_ops {
+	SPI_NOR_OPS_READ = 0,
+	SPI_NOR_OPS_WRITE,
+	SPI_NOR_OPS_ERASE,
+	SPI_NOR_OPS_LOCK,
+	SPI_NOR_OPS_UNLOCK,
+};
+
+/**
+ * struct spi_nor - Structure for defining a the SPI NOR layer
+ * @mtd:		point to a mtd_info structure
+ * @lock:		the lock for the read/write/erase/lock/unlock operations
+ * @dev:		point to a spi device, or a spi nor controller device.
+ * @page_size:		the page size of the SPI NOR
+ * @addr_width:		number of address bytes
+ * @erase_opcode:	the opcode for erasing a sector
+ * @read_opcode:	the read opcode
+ * @read_dummy:		the dummy needed by the read operation
+ * @program_opcode:	the program opcode
+ * @flash_read:		the mode of the read
+ * @sst_write_second:	used by the SST write operation
+ * @cfg:		used by the read_xfer/write_xfer
+ * @cmd_buf:		used by the write_reg
+ * @prepare:		[OPTIONAL] do some preparations for the
+ *			read/write/erase/lock/unlock operations
+ * @unprepare:		[OPTIONAL] do some post work after the
+ *			read/write/erase/lock/unlock operations
+ * @read_xfer:		[OPTIONAL] the read fundamental primitive
+ * @write_xfer:		[OPTIONAL] the writefundamental primitive
+ * @read_reg:		[DRIVER-SPECIFIC] read out the register
+ * @write_reg:		[DRIVER-SPECIFIC] write data to the register
+ * @read_id:		[REPLACEABLE] read out the ID data, and find
+ *			the proper spi_device_id
+ * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
+ * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
+ * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
+ * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
+ *			at the offset @offs
+ * @priv:		the private data
+ */
+struct spi_nor {
+	struct mtd_info		*mtd;
+	struct mutex		lock;
+	struct device		*dev;
+	u32			page_size;
+	u8			addr_width;
+	u8			erase_opcode;
+	u8			read_opcode;
+	u8			read_dummy;
+	u8			program_opcode;
+	enum read_mode		flash_read;
+	bool			sst_write_second;
+	struct spi_nor_xfer_cfg	cfg;
+	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
+
+	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			 u8 *buf, size_t len);
+	int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			  u8 *buf, size_t len);
+	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
+	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
+			int write_enable);
+	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
+	int (*wait_till_ready)(struct spi_nor *nor);
+
+	int (*read)(struct spi_nor *nor, loff_t from,
+			size_t len, size_t *retlen, u_char *read_buf);
+	void (*write)(struct spi_nor *nor, loff_t to,
+			size_t len, size_t *retlen, const u_char *write_buf);
+	int (*erase)(struct spi_nor *nor, loff_t offs);
+
+	void *priv;
+};
+
+/**
+ * spi_nor_scan() - scan the SPI NOR
+ * @nor:	the spi_nor structure
+ * @id:		the spi_device_id provided by the driver
+ * @mode:	the read mode supported by the driver
+ *
+ * The drivers can use this fuction to scan the SPI NOR.
+ * In the scanning, it will try to get all the necessary information to
+ * fill the mtd_info{} and the spi_nor{}.
+ *
+ * The board may assigns a spi_device_id with @id which be used to compared with
+ * the spi_device_id detected by the scanning.
+ *
+ * Return: 0 for success, others for failure.
+ */
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode);
+extern const struct spi_device_id spi_nor_ids[];
+
+/**
+ * spi_nor_match_id() - find the spi_device_id by the name
+ * @name:	the name of the spi_device_id
+ *
+ * The drivers use this function to find the spi_device_id
+ * specified by the @name.
+ *
+ * Return: returns the right spi_device_id pointer on success,
+ *         and returns NULL on failure.
+ */
+const struct spi_device_id *spi_nor_match_id(char *name);
+
+#endif

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index c26d0ec..e5a5894 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h

@@ -42,9 +42,11 @@
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
+	NETIF_F_GSO_GRE_CSUM_BIT,	/* ... GRE with csum with TSO */
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
+	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
 		NETIF_F_GSO_MPLS_BIT,
@@ -111,6 +113,7 @@
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
+#define NETIF_F_GSO_GRE_CSUM	__NETIF_F(GSO_GRE_CSUM)
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6c1ae9f..abe3de1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -56,9 +56,6 @@
 struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
-					/* source back-compat hooks */
-#define SET_ETHTOOL_OPS(netdev,ops) \
-	( (netdev)->ethtool_ops = (ops) )
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -853,7 +850,8 @@
  *	SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
  * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
- * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
+ * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
+ *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
@@ -1047,8 +1045,9 @@
 						  int queue, u8 *mac);
 	int			(*ndo_set_vf_vlan)(struct net_device *dev,
 						   int queue, u16 vlan, u8 qos);
-	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
-						      int vf, int rate);
+	int			(*ndo_set_vf_rate)(struct net_device *dev,
+						   int vf, int min_tx_rate,
+						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
@@ -2634,6 +2633,7 @@
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
 
@@ -3003,6 +3003,15 @@
 		   struct netdev_hw_addr_list *from_list, int addr_len);
 void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		      struct netdev_hw_addr_list *from_list, int addr_len);
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *));
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *));
 void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
@@ -3023,6 +3032,38 @@
 void dev_uc_flush(struct net_device *dev);
 void dev_uc_init(struct net_device *dev);
 
+/**
+ *  __dev_uc_sync - Synchonize device's unicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_uc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_uc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_uc_sync().
+ **/
+static inline void __dev_uc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->uc, dev, unsync);
+}
+
 /* Functions used for multicast addresses handling */
 int dev_mc_add(struct net_device *dev, const unsigned char *addr);
 int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
@@ -3035,6 +3076,38 @@
 void dev_mc_flush(struct net_device *dev);
 void dev_mc_init(struct net_device *dev);
 
+/**
+ *  __dev_mc_sync - Synchonize device's multicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_mc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_mc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_mc_sync().
+ **/
+static inline void __dev_mc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->mc, dev, unsync);
+}
+
 /* Functions used for secondary unicast and multicast support */
 void dev_set_rx_mode(struct net_device *dev);
 void __dev_set_rx_mode(struct net_device *dev);
@@ -3180,6 +3253,20 @@
 
 void linkwatch_run_queue(void);
 
+static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
+							  netdev_features_t f2)
+{
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	if (f2 & NETIF_F_GEN_CSUM)
+		f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	f1 &= f2;
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+
+	return f1;
+}
+
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
 {

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b2e85e5..6ec9757 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h

@@ -3,11 +3,17 @@
 
 #include <uapi/linux/netfilter/nfnetlink_acct.h>
 
+enum {
+	NFACCT_NO_QUOTA		= -1,
+	NFACCT_UNDERQUOTA,
+	NFACCT_OVERQUOTA,
+};
 
 struct nf_acct;
 
 struct nf_acct *nfnl_acct_find_get(const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-
+extern int nfnl_acct_overquota(const struct sk_buff *skb,
+			      struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 034cda7..9e572da 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h

@@ -46,7 +46,8 @@
 	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
-	void		(*bind)(int group);
+	int		(*bind)(int group);
+	void		(*unbind)(int group);
 	bool		(*compare)(struct net *net, struct sock *sk);
 };
 

diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 3e794c1..610af51 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h

@@ -46,6 +46,9 @@
 enum nfs3_stable_how {
 	NFS_UNSTABLE = 0,
 	NFS_DATA_SYNC = 1,
-	NFS_FILE_SYNC = 2
+	NFS_FILE_SYNC = 2,
+
+	/* used by direct.c to mark verf as invalid */
+	NFS_INVALID_STABLE_HOW = -1
 };
 #endif /* _LINUX_NFS_H */

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 12c2cb9..a1e3064 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h

@@ -399,8 +399,6 @@
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
-#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
-					(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b..e30f605 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h

@@ -459,13 +459,12 @@
 /*
  * linux/fs/nfs/direct.c
  */
-extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
-			unsigned long);
+extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 
 /*
@@ -520,7 +519,6 @@
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -553,7 +551,6 @@
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 92ce578..7d9096d 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h

@@ -22,12 +22,17 @@
  * Valid flags for a dirty buffer
  */
 enum {
-	PG_BUSY = 0,
-	PG_MAPPED,
-	PG_CLEAN,
-	PG_NEED_COMMIT,
-	PG_NEED_RESCHED,
-	PG_COMMIT_TO_DS,
+	PG_BUSY = 0,		/* nfs_{un}lock_request */
+	PG_MAPPED,		/* page private set for buffered io */
+	PG_CLEAN,		/* write succeeded */
+	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
+	PG_INODE_REF,		/* extra ref held by inode (head req only) */
+	PG_HEADLOCK,		/* page group lock of wb_head */
+	PG_TEARDOWN,		/* page group sync for destroy */
+	PG_UNLOCKPAGE,		/* page group sync bit in read path */
+	PG_UPTODATE,		/* page group sync bit in read path */
+	PG_WB_END,		/* page group sync bit in write path */
+	PG_REMOVE,		/* page group sync bit in write path */
 };
 
 struct nfs_inode;
@@ -43,15 +48,29 @@
 	struct kref		wb_kref;	/* reference count */
 	unsigned long		wb_flags;
 	struct nfs_write_verifier	wb_verf;	/* Commit cookie */
+	struct nfs_page		*wb_this_page;  /* list of reqs for this page */
+	struct nfs_page		*wb_head;       /* head pointer for req list */
 };
 
 struct nfs_pageio_descriptor;
 struct nfs_pageio_ops {
 	void	(*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
-	bool	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	size_t	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
+			   struct nfs_page *);
 	int	(*pg_doio)(struct nfs_pageio_descriptor *);
 };
 
+struct nfs_rw_ops {
+	const fmode_t rw_mode;
+	struct nfs_rw_header *(*rw_alloc_header)(void);
+	void (*rw_free_header)(struct nfs_rw_header *);
+	void (*rw_release)(struct nfs_pgio_data *);
+	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
+	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
+	void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+			    struct rpc_task_setup *, int);
+};
+
 struct nfs_pageio_descriptor {
 	struct list_head	pg_list;
 	unsigned long		pg_bytes_written;
@@ -63,6 +82,7 @@
 
 	struct inode		*pg_inode;
 	const struct nfs_pageio_ops *pg_ops;
+	const struct nfs_rw_ops *pg_rw_ops;
 	int 			pg_ioflags;
 	int			pg_error;
 	const struct rpc_call_ops *pg_rpc_callops;
@@ -75,29 +95,33 @@
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
-					    struct inode *inode,
 					    struct page *page,
+					    struct nfs_page *last,
 					    unsigned int offset,
 					    unsigned int count);
-extern	void nfs_release_request(struct nfs_page *req);
+extern	void nfs_release_request(struct nfs_page *);
 
 
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     const struct nfs_pageio_ops *pg_ops,
 			     const struct nfs_pgio_completion_ops *compl_ops,
+			     const struct nfs_rw_ops *rw_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
-extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *prev,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
-extern	void nfs_unlock_and_release_request(struct nfs_page *req);
+extern	void nfs_unlock_and_release_request(struct nfs_page *);
+extern void nfs_page_group_lock(struct nfs_page *);
+extern void nfs_page_group_unlock(struct nfs_page *);
+extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
 /*
  * Lock the page of an asynchronous request

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6fb5b23..9a1396e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -489,44 +489,8 @@
 };
 
 /*
- * Arguments to the read call.
- */
-struct nfs_readargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	unsigned int		pgbase;
-	struct page **		pages;
-};
-
-struct nfs_readres {
-	struct nfs4_sequence_res	seq_res;
-	struct nfs_fattr *	fattr;
-	__u32			count;
-	int                     eof;
-};
-
-/*
  * Arguments to the write call.
  */
-struct nfs_writeargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	enum nfs3_stable_how	stable;
-	unsigned int		pgbase;
-	struct page **		pages;
-	const u32 *		bitmask;
-};
-
 struct nfs_write_verifier {
 	char			data[8];
 };
@@ -536,12 +500,31 @@
 	enum nfs3_stable_how	committed;
 };
 
-struct nfs_writeres {
+/*
+ * Arguments shared by the read and write call.
+ */
+struct nfs_pgio_args {
+	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh *		fh;
+	struct nfs_open_context *context;
+	struct nfs_lock_context *lock_context;
+	nfs4_stateid		stateid;
+	__u64			offset;
+	__u32			count;
+	unsigned int		pgbase;
+	struct page **		pages;
+	const u32 *		bitmask;	/* used by write */
+	enum nfs3_stable_how	stable;		/* used by write */
+};
+
+struct nfs_pgio_res {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
-	struct nfs_writeverf *	verf;
 	__u32			count;
-	const struct nfs_server *server;
+	int			eof;		/* used by read */
+	struct nfs_writeverf *	verf;		/* used by write */
+	const struct nfs_server *server;	/* used by write */
+
 };
 
 /*
@@ -1129,6 +1112,7 @@
 	struct list_head committing;
 	struct pnfs_layout_segment *wlseg;
 	struct pnfs_layout_segment *clseg;
+	struct nfs_writeverf direct_verf;
 };
 
 struct pnfs_ds_commit_info {
@@ -1264,20 +1248,6 @@
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
-struct nfs_read_data {
-	struct nfs_pgio_header	*header;
-	struct list_head	list;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;	/* fattr storage */
-	struct nfs_readargs args;
-	struct nfs_readres  res;
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
-	__u64			mds_offset;
-	struct nfs_page_array	pages;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-};
-
 /* used as flag bits in nfs_pgio_header */
 enum {
 	NFS_IOHDR_ERROR = 0,
@@ -1287,19 +1257,22 @@
 	NFS_IOHDR_NEED_RESCHED,
 };
 
+struct nfs_pgio_data;
+
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct list_head	rpc_list;
+	struct nfs_pgio_data	*data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
-	struct nfs_writeverf	*verf;
+	struct nfs_writeverf	verf;		/* Used for writes */
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
+	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_direct_req	*dreq;
 	void			*layout_private;
 	spinlock_t		lock;
@@ -1310,30 +1283,24 @@
 	unsigned long		flags;
 };
 
-struct nfs_read_header {
-	struct nfs_pgio_header	header;
-	struct nfs_read_data	rpc_data;
-};
-
-struct nfs_write_data {
+struct nfs_pgio_data {
 	struct nfs_pgio_header	*header;
-	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;
-	struct nfs_writeargs	args;		/* argument struct */
-	struct nfs_writeres	res;		/* result struct */
+	struct nfs_writeverf	verf;		/* Used for writes */
+	struct nfs_pgio_args	args;		/* argument struct */
+	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
-	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
+	int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
 };
 
-struct nfs_write_header {
+struct nfs_rw_header {
 	struct nfs_pgio_header	header;
-	struct nfs_write_data	rpc_data;
-	struct nfs_writeverf	verf;
+	struct nfs_pgio_data	rpc_data;
 };
 
 struct nfs_mds_commit_info {
@@ -1465,16 +1432,11 @@
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
-	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
-	void	(*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *,
-				    const struct nfs_pgio_completion_ops *);
-	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
-	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
-	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-	void	(*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int,
-				     const struct nfs_pgio_completion_ops *);
-	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
-	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
+	int	(*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
+	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);

diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
deleted file mode 100644
index 19ef837..0000000
--- a/include/linux/nfsd/debug.h
+++ /dev/null

@@ -1,19 +0,0 @@
-/*
- * linux/include/linux/nfsd/debug.h
- *
- * Debugging-related stuff for nfsd
- *
- * Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_DEBUG_H
-#define LINUX_NFSD_DEBUG_H
-
-#include <uapi/linux/nfsd/debug.h>
-
-# undef ifdebug
-# ifdef NFSD_DEBUG
-#  define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
-# else
-#  define ifdebug(flag)		if (0)
-# endif
-#endif /* LINUX_NFSD_DEBUG_H */

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
deleted file mode 100644
index a93593f..0000000
--- a/include/linux/nfsd/nfsfh.h
+++ /dev/null

@@ -1,63 +0,0 @@
-/*
- * include/linux/nfsd/nfsfh.h
- *
- * This file describes the layout of the file handles as passed
- * over the wire.
- *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef _LINUX_NFSD_FH_H
-#define _LINUX_NFSD_FH_H
-
-# include <linux/sunrpc/svc.h>
-#include <uapi/linux/nfsd/nfsfh.h>
-
-static inline __u32 ino_t_to_u32(ino_t ino)
-{
-	return (__u32) ino;
-}
-
-static inline ino_t u32_to_ino_t(__u32 uino)
-{
-	return (ino_t) uino;
-}
-
-/*
- * This is the internal representation of an NFS handle used in knfsd.
- * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
- */
-typedef struct svc_fh {
-	struct knfsd_fh		fh_handle;	/* FH data */
-	struct dentry *		fh_dentry;	/* validated dentry */
-	struct svc_export *	fh_export;	/* export pointer */
-	int			fh_maxsize;	/* max size for fh_handle */
-
-	unsigned char		fh_locked;	/* inode locked by us */
-	unsigned char		fh_want_write;	/* remount protection taken */
-
-#ifdef CONFIG_NFSD_V3
-	unsigned char		fh_post_saved;	/* post-op attrs saved */
-	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
-
-	/* Pre-op attributes saved during fh_lock */
-	__u64			fh_pre_size;	/* size before operation */
-	struct timespec		fh_pre_mtime;	/* mtime before oper */
-	struct timespec		fh_pre_ctime;	/* ctime before oper */
-	/*
-	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
-	 *  to find out if it is valid.
-	 */
-	u64			fh_pre_change;
-
-	/* Post-op attributes saved in fh_unlock */
-	struct kstat		fh_post_attr;	/* full attrs after operation */
-	u64			fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
-
-} svc_fh;
-
-#endif /* _LINUX_NFSD_FH_H */

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index c8d7f39..20163b9 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h

@@ -80,6 +80,22 @@
 
 	IEEE802154_ATTR_FRAME_RETRIES,
 
+	IEEE802154_ATTR_LLSEC_ENABLED,
+	IEEE802154_ATTR_LLSEC_SECLEVEL,
+	IEEE802154_ATTR_LLSEC_KEY_MODE,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+	IEEE802154_ATTR_LLSEC_KEY_ID,
+	IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+	IEEE802154_ATTR_LLSEC_KEY_BYTES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+	IEEE802154_ATTR_LLSEC_FRAME_TYPE,
+	IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+	IEEE802154_ATTR_LLSEC_SECLEVELS,
+	IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+	IEEE802154_ATTR_LLSEC_DEV_KEY_MODE,
+
 	__IEEE802154_ATTR_MAX,
 };
 
@@ -134,6 +150,21 @@
 
 	IEEE802154_SET_MACPARAMS,
 
+	IEEE802154_LLSEC_GETPARAMS,
+	IEEE802154_LLSEC_SETPARAMS,
+	IEEE802154_LLSEC_LIST_KEY,
+	IEEE802154_LLSEC_ADD_KEY,
+	IEEE802154_LLSEC_DEL_KEY,
+	IEEE802154_LLSEC_LIST_DEV,
+	IEEE802154_LLSEC_ADD_DEV,
+	IEEE802154_LLSEC_DEL_DEV,
+	IEEE802154_LLSEC_LIST_DEVKEY,
+	IEEE802154_LLSEC_ADD_DEVKEY,
+	IEEE802154_LLSEC_DEL_DEVKEY,
+	IEEE802154_LLSEC_LIST_SECLEVEL,
+	IEEE802154_LLSEC_ADD_SECLEVEL,
+	IEEE802154_LLSEC_DEL_SECLEVEL,
+
 	__IEEE802154_CMD_MAX,
 };
 

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a50173c..2bf4031 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h

@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _LINUX_NVME_H
@@ -66,8 +62,8 @@
 
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
-extern unsigned char io_timeout;
-#define NVME_IO_TIMEOUT	(io_timeout * HZ)
+extern unsigned char nvme_io_timeout;
+#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
@@ -94,7 +90,7 @@
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
-	struct notifier_block nb;
+	struct work_struct cpu_work;
 	char name[12];
 	char serial[20];
 	char model[40];
@@ -103,6 +99,7 @@
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 vwc;
 	u8 initialized;
 };
 
@@ -159,7 +156,6 @@
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
 int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
 int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
 							u32 *result);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 881a7c3..a70c949 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h

@@ -22,12 +22,12 @@
 struct phy_device *of_phy_attach(struct net_device *dev,
 				 struct device_node *phy_np, u32 flags,
 				 phy_interface_t iface);
-extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					 void (*hndlr)(struct net_device *),
-					 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
+extern void of_mdiobus_link_phydev(struct mii_bus *mdio,
+				   struct phy_device *phydev);
+
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
@@ -59,17 +59,30 @@
 	return NULL;
 }
 
-static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-							   void (*hndlr)(struct net_device *),
-							   phy_interface_t iface)
-{
-	return NULL;
-}
-
 static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
 }
+
+static inline void of_mdiobus_link_phydev(struct mii_bus *mdio,
+					  struct phy_device *phydev)
+{
+}
 #endif /* CONFIG_OF */
 
+#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
+extern int of_phy_register_fixed_link(struct device_node *np);
+extern bool of_phy_is_fixed_link(struct device_node *np);
+#else
+static inline int of_phy_register_fixed_link(struct device_node *np)
+{
+	return -ENOSYS;
+}
+static inline bool of_phy_is_fixed_link(struct device_node *np)
+{
+	return false;
+}
+#endif
+
+
 #endif /* __LINUX_OF_MDIO_H */

diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index c29a6de..88e6ea4 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h

@@ -1,23 +1,6 @@
-/*
- * OMAP DMA Engine support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
 #ifndef __LINUX_OMAP_DMA_H
 #define __LINUX_OMAP_DMA_H
-
-struct dma_chan;
-
-#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
-bool omap_dma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-{
-	return false;
-}
-#endif
+#include <linux/omap-dmaengine.h>
 
 /*
  *  Legacy OMAP DMA handling defines and functions

diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h
new file mode 100644
index 0000000..8e6906c
--- /dev/null
+++ b/include/linux/omap-dmaengine.h

@@ -0,0 +1,21 @@
+/*
+ * OMAP DMA Engine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_OMAP_DMAENGINE_H
+#define __LINUX_OMAP_DMAENGINE_H
+
+struct dma_chan;
+
+#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
+bool omap_dma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
+{
+	return false;
+}
+#endif
+#endif /* __LINUX_OMAP_DMAENGINE_H */

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 71d9673..466bcd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h

@@ -164,13 +164,17 @@
 	/* INTX_DISABLE in PCI_COMMAND register disables MSI
 	 * generation too.
 	 */
-	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
+	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0),
 	/* Device configuration is irrevocably lost if disabled into D3 */
-	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1),
 	/* Provide indication device is assigned by a Virtual Machine Manager */
-	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
-	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) 8,
+	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
+	/* Flag to indicate the device uses dma_alias_devfn */
+	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
+	/* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */
+	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5),
 };
 
 enum pci_irq_reroute_variant {
@@ -268,6 +272,7 @@
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;		/* which interrupt pin this device uses */
 	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
+	u8		dma_alias_devfn;/* devfn of DMA alias, if any */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
@@ -1809,6 +1814,10 @@
 }
 #endif
 
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data);
+
 /**
  * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
  * @pdev: the PCI device

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 0afb48f..5d8920e 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h

@@ -121,6 +121,36 @@
  * percpu_ref_tryget - try to increment a percpu refcount
  * @ref: percpu_ref to try-get
  *
+ * Increment a percpu refcount unless its count already reached zero.
+ * Returns %true on success; %false on failure.
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
+ */
+static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+{
+	unsigned __percpu *pcpu_count;
+	int ret = false;
+
+	rcu_read_lock_sched();
+
+	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
+		this_cpu_inc(*pcpu_count);
+		ret = true;
+	} else {
+		ret = atomic_inc_not_zero(&ref->count);
+	}
+
+	rcu_read_unlock_sched();
+
+	return ret;
+}
+
+/**
+ * percpu_ref_tryget_live - try to increment a live percpu refcount
+ * @ref: percpu_ref to try-get
+ *
  * Increment a percpu refcount unless it has already been killed.  Returns
  * %true on success; %false on failure.
  *
@@ -128,8 +158,10 @@
  * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
  * used.  After the confirm_kill callback is invoked, it's guaranteed that
  * no new reference will be given out by percpu_ref_tryget().
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
  */
-static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned __percpu *pcpu_count;
 	int ret = false;

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 495c654..8419053 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h

@@ -29,7 +29,7 @@
  */
 #define get_cpu_var(var) (*({				\
 	preempt_disable();				\
-	&__get_cpu_var(var); }))
+	this_cpu_ptr(&var); }))
 
 /*
  * The weird & is necessary because sparse considers (void)(var) to be

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a920911..707617a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -167,6 +167,11 @@
 #define PERF_EVENT_TXN 0x1
 
 /**
+ * pmu::capabilities flags
+ */
+#define PERF_PMU_CAP_NO_INTERRUPT		0x01
+
+/**
  * struct pmu - generic performance monitoring unit
  */
 struct pmu {
@@ -178,6 +183,11 @@
 	const char			*name;
 	int				type;
 
+	/*
+	 * various common per-pmu feature flags
+	 */
+	int				capabilities;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
@@ -696,7 +706,8 @@
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
-extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_exec(void);
+extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -773,7 +784,7 @@
 extern void perf_event_disable(struct perf_event *event);
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
-#else
+#else /* !CONFIG_PERF_EVENTS: */
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
@@ -803,7 +814,8 @@
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
-static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_exec(void)				{ }
+static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4d0221f..864ddaf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h

@@ -198,6 +198,13 @@
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
+static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
+{
+	return devm_mdiobus_alloc_size(dev, 0);
+}
+
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
 int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
@@ -666,6 +673,7 @@
 	return phydev->drv->read_status(phydev);
 }
 
+int genphy_config_init(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 509d8f5..ae612ac 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h

@@ -9,15 +9,31 @@
 	int asym_pause;
 };
 
+struct device_node;
+
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
+extern int fixed_phy_register(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct device_node *np);
+extern void fixed_phy_del(int phy_addr);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_register(unsigned int irq,
+				     struct fixed_phy_status *status,
+				     struct device_node *np)
+{
+	return -ENODEV;
+}
+static inline int fixed_phy_del(int phy_addr)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 /*

diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 4edb406..780d1e9 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h

@@ -21,6 +21,7 @@
 enum bch_ecc {
 	BCH4_ECC = 0,
 	BCH8_ECC,
+	BCH16_ECC,
 };
 
 /* ELM support 8 error syndrome process */
@@ -38,7 +39,7 @@
 	bool error_reported;
 	bool error_uncorrectable;
 	int error_count;
-	int error_loc[ERROR_VECTOR_MAX];
+	int error_loc[16];
 };
 
 void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,

diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/intel-mid_wdt.h
new file mode 100644
index 0000000..b982534
--- /dev/null
+++ b/include/linux/platform_data/intel-mid_wdt.h

@@ -0,0 +1,22 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#ifndef __INTEL_MID_WDT_H__
+#define __INTEL_MID_WDT_H__
+
+#include <linux/platform_device.h>
+
+struct intel_mid_wdt_pdata {
+	int irq;
+	int (*probe)(struct platform_device *pdev);
+};
+
+#endif /*__INTEL_MID_WDT_H__*/

diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h
deleted file mode 100644
index 778e9e4..0000000
--- a/include/linux/platform_data/leds-pca9685.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.h by Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * LED driver for the NXP PCA9685 PWM chip
- *
- */
-
-#ifndef __LINUX_PCA9685_H
-#define __LINUX_PCA9685_H
-
-#include <linux/leds.h>
-
-enum pca9685_outdrv {
-	PCA9685_OPEN_DRAIN,
-	PCA9685_TOTEM_POLE,
-};
-
-enum pca9685_inverted {
-	PCA9685_NOT_INVERTED,
-	PCA9685_INVERTED,
-};
-
-struct pca9685_platform_data {
-	struct led_platform_data leds;
-	enum pca9685_outdrv outdrv;
-	enum pca9685_inverted inverted;
-};
-
-#endif /* __LINUX_PCA9685_H */

diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 3e9dd66..660c029 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h

@@ -31,6 +31,8 @@
 	OMAP_ECC_BCH8_CODE_HW_DETECTION_SW,
 	/* 8-bit  ECC calculation by GPMC, Error detection by ELM */
 	OMAP_ECC_BCH8_CODE_HW,
+	/* 16-bit ECC calculation by GPMC, Error detection by ELM */
+	OMAP_ECC_BCH16_CODE_HW,
 };
 
 struct gpmc_nand_regs {
@@ -50,6 +52,9 @@
 	void __iomem	*gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER];
 };
 
 struct omap_nand_platform_data {

diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h
index a941471..ac4ea2e 100644
--- a/include/linux/platform_data/mtd-nand-pxa3xx.h
+++ b/include/linux/platform_data/mtd-nand-pxa3xx.h

@@ -58,6 +58,9 @@
 	/* use an flash-based bad block table */
 	bool	flash_bbt;
 
+	/* requested ECC strength and ECC step size */
+	int ecc_strength, ecc_step_size;
+
 	const struct mtd_partition		*parts[NUM_CHIP_SELECT];
 	unsigned int				nr_parts[NUM_CHIP_SELECT];
 

diff --git a/include/linux/platform_data/omap4-keypad.h b/include/linux/platform_data/omap4-keypad.h
deleted file mode 100644
index 4eef5fb..0000000
--- a/include/linux/platform_data/omap4-keypad.h
+++ /dev/null

@@ -1,13 +0,0 @@
-#ifndef __LINUX_INPUT_OMAP4_KEYPAD_H
-#define __LINUX_INPUT_OMAP4_KEYPAD_H
-
-#include <linux/input/matrix_keypad.h>
-
-struct omap4_keypad_platform_data {
-	const struct matrix_keymap_data *keymap_data;
-
-	u8 rows;
-	u8 cols;
-};
-
-#endif /* __LINUX_INPUT_OMAP4_KEYPAD_H */

diff --git a/include/linux/platform_data/pwm-renesas-tpu.h b/include/linux/platform_data/pwm-renesas-tpu.h
deleted file mode 100644
index a7220b1..0000000
--- a/include/linux/platform_data/pwm-renesas-tpu.h
+++ /dev/null

@@ -1,16 +0,0 @@
-#ifndef __PWM_RENESAS_TPU_H__
-#define __PWM_RENESAS_TPU_H__
-
-#include <linux/pwm.h>
-
-#define TPU_CHANNEL_MAX		4
-
-struct tpu_pwm_channel_data {
-	enum pwm_polarity polarity;
-};
-
-struct tpu_pwm_platform_data {
-	struct tpu_pwm_channel_data channels[TPU_CHANNEL_MAX];
-};
-
-#endif /* __PWM_RENESAS_TPU_H__ */

diff --git a/include/linux/platform_data/shtc1.h b/include/linux/platform_data/shtc1.h
new file mode 100644
index 0000000..7b8c353
--- /dev/null
+++ b/include/linux/platform_data/shtc1.h

@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SHTC1_H_
+#define __SHTC1_H_
+
+struct shtc1_platform_data {
+	bool blocking_io;
+	bool high_precision;
+};
+#endif /* __SHTC1_H_ */

diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h
new file mode 100644
index 0000000..1730312
--- /dev/null
+++ b/include/linux/platform_data/st21nfca.h

@@ -0,0 +1,32 @@
+/*
+ * Driver include for the ST21NFCA NFC chip.
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST21NFCA_HCI_H_
+#define _ST21NFCA_HCI_H_
+
+#include <linux/i2c.h>
+
+#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
+
+struct st21nfca_nfc_platform_data {
+	unsigned int gpio_irq;
+	unsigned int gpio_ena;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST21NFCA_HCI_H_ */

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 4717f54..e90628c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h

@@ -274,14 +274,18 @@
 	unsigned int index;
 	const char *dev_id;
 	const char *con_id;
+	unsigned int period;
+	enum pwm_polarity polarity;
 };
 
-#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id)	\
+#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
 	{						\
 		.provider = _provider,			\
 		.index = _index,			\
 		.dev_id = _dev_id,			\
 		.con_id = _con_id,			\
+		.period = _period,			\
+		.polarity = _polarity			\
 	}
 
 #if IS_ENABLED(CONFIG_PWM)

diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 2de2e27..efdd922 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h

@@ -6,9 +6,6 @@
 
 #include <linux/backlight.h>
 
-/* TODO: convert to gpiod_*() API once it has been merged */
-#define PWM_BACKLIGHT_GPIO_ACTIVE_LOW	(1 << 0)
-
 struct platform_pwm_backlight_data {
 	int pwm_id;
 	unsigned int max_brightness;
@@ -16,8 +13,8 @@
 	unsigned int lth_brightness;
 	unsigned int pwm_period_ns;
 	unsigned int *levels;
+	/* TODO remove once all users are switched to gpiod_* API */
 	int enable_gpio;
-	unsigned long enable_gpio_flags;
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
 	void (*notify_after)(struct device *dev, int brightness);

diff --git a/include/linux/quota.h b/include/linux/quota.h
index cc7494a..0f3c5d3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h

@@ -329,6 +329,7 @@
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
+	int (*rm_xquota)(struct super_block *, unsigned int);
 };
 
 struct quota_format_type {

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
index 4d09f6e..20bcb55 100644
--- a/include/linux/rfkill-gpio.h
+++ b/include/linux/rfkill-gpio.h

@@ -27,21 +27,11 @@
  * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
  * for unused gpio's, the expected value is -1.
  * @name:		name for the gpio rf kill instance
- * @reset_gpio:		GPIO which is used for reseting rfkill switch
- * @shutdown_gpio:	GPIO which is used for shutdown of rfkill switch
- * @power_clk_name:	[optional] name of clk to turn off while blocked
- * @gpio_runtime_close:	clean up platform specific gpio configuration
- * @gpio_runtime_setup:	set up platform specific gpio configuration
  */
 
 struct rfkill_gpio_platform_data {
 	char			*name;
-	int			reset_gpio;
-	int			shutdown_gpio;
-	const char		*power_clk_name;
 	enum rfkill_type	type;
-	void	(*gpio_runtime_close)(struct platform_device *);
-	int	(*gpio_runtime_setup)(struct platform_device *);
 };
 
 #endif /* __RFKILL_GPIO_H */

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d69cf63..49a4d6f 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h

@@ -97,7 +97,7 @@
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05..8d79708 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h

@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 
+struct optimistic_spin_queue;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long			count;
-	raw_spinlock_t		wait_lock;
-	struct list_head	wait_list;
+	long count;
+	raw_spinlock_t wait_lock;
+	struct list_head wait_list;
+#ifdef CONFIG_SMP
+	/*
+	 * Write owner. Used as a speculative check to see
+	 * if the owner is running on the cpu.
+	 */
+	struct task_struct *owner;
+	struct optimistic_spin_queue *osq; /* spinner MCS lock */
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -55,11 +64,21 @@
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
+#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#define __RWSEM_INITIALIZER(name)			\
+	{ RWSEM_UNLOCKED_VALUE,				\
+	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
+	  LIST_HEAD_INIT((name).wait_list),		\
+	  NULL, /* owner */				\
+	  NULL /* mcs lock */                           \
+	  __RWSEM_DEP_MAP_INIT(name) }
+#else
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list)		\
 	  __RWSEM_DEP_MAP_INIT(name) }
+#endif
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea74596..306f4f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -847,10 +847,10 @@
 };
 
 /*
- * Increase resolution of cpu_power calculations
+ * Increase resolution of cpu_capacity calculations
  */
-#define SCHED_POWER_SHIFT	10
-#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
+#define SCHED_CAPACITY_SHIFT	10
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
 /*
  * sched-domains (multiprocessor balancing) declarations:
@@ -862,7 +862,7 @@
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu power */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
@@ -874,7 +874,7 @@
 #ifdef CONFIG_SCHED_SMT
 static inline const int cpu_smt_flags(void)
 {
-	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
@@ -1006,7 +1006,7 @@
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-	struct sched_group_power **__percpu sgp;
+	struct sched_group_capacity **__percpu sgc;
 };
 
 struct sched_domain_topology_level {
@@ -2173,7 +2173,7 @@
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-extern bool yield_to(struct task_struct *p, bool preempt);
+extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 /**
@@ -2421,7 +2421,11 @@
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-extern void set_task_comm(struct task_struct *tsk, const char *from);
+extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+static inline void set_task_comm(struct task_struct *tsk, const char *from)
+{
+	__set_task_comm(tsk, from, false);
+}
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP

diff --git a/include/linux/security.h b/include/linux/security.h
index 6478ce3..9c6b972 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h

@@ -1708,7 +1708,7 @@
 	void (*key_free) (struct key *key);
 	int (*key_permission) (key_ref_t key_ref,
 			       const struct cred *cred,
-			       key_perm_t perm);
+			       unsigned perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
 #endif	/* CONFIG_KEYS */
 
@@ -3034,7 +3034,7 @@
 int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
 void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
-			    const struct cred *cred, key_perm_t perm);
+			    const struct cred *cred, unsigned perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
 
 #else
@@ -3052,7 +3052,7 @@
 
 static inline int security_key_permission(key_ref_t key_ref,
 					  const struct cred *cred,
-					  key_perm_t perm)
+					  unsigned perm)
 {
 	return 0;
 }

diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index f92c0a4..abdf1f2 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h

@@ -54,6 +54,7 @@
 	dma_cookie_t cookie;
 	int chunks;
 	int mark;
+	bool cyclic;			/* used as cyclic transfer */
 };
 
 struct shdma_chan {

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 08074a8..5b5cd31 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h

@@ -345,6 +345,10 @@
 	SKB_GSO_UDP_TUNNEL = 1 << 9,
 
 	SKB_GSO_MPLS = 1 << 10,
+
+	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
+
+	SKB_GSO_GRE_CSUM = 1 << 12,
 };
 
 #if BITS_PER_LONG > 32
@@ -426,7 +430,7 @@
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
  *	@priority: Packet queueing priority
- *	@local_df: allow local fragmentation
+ *	@ignore_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@ip_summed: Driver fed us an IP checksum
  *	@nohdr: Payload reference only, must not modify header
@@ -514,7 +518,7 @@
 	};
 	__u32			priority;
 	kmemcheck_bitfield_begin(flags1);
-	__u8			local_df:1,
+	__u8			ignore_df:1,
 				cloned:1,
 				ip_summed:2,
 				nohdr:1,
@@ -567,7 +571,10 @@
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
-	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
+	__u8			encap_hdr_csum:1;
+	__u8			csum_valid:1;
+	__u8			csum_complete_sw:1;
+	/* 3/5 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -739,7 +746,13 @@
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone);
+static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
+					  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
+}
 
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
@@ -2233,6 +2246,14 @@
 	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
 }
 
+
+static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
+						  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
+}
+
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
@@ -2716,7 +2737,7 @@
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
-	return skb->ip_summed & CHECKSUM_UNNECESSARY;
+	return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid);
 }
 
 /**
@@ -2741,6 +2762,103 @@
 	       0 : __skb_checksum_complete(skb);
 }
 
+/* Check if we need to perform checksum complete validation.
+ *
+ * Returns true if checksum complete is needed, false otherwise
+ * (either checksum is unnecessary or zero checksum is allowed).
+ */
+static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
+						  bool zero_okay,
+						  __sum16 check)
+{
+	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
+		skb->csum_valid = 1;
+		return false;
+	}
+
+	return true;
+}
+
+/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+ * in checksum_init.
+ */
+#define CHECKSUM_BREAK 76
+
+/* Validate (init) checksum based on checksum complete.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete. In the latter
+ *	case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
+ *	checksum is stored in skb->csum for use in __skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ *
+ */
+static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
+						       bool complete,
+						       __wsum psum)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		if (!csum_fold(csum_add(psum, skb->csum))) {
+			skb->csum_valid = 1;
+			return 0;
+		}
+	}
+
+	skb->csum = psum;
+
+	if (complete || skb->len <= CHECKSUM_BREAK) {
+		__sum16 csum;
+
+		csum = __skb_checksum_complete(skb);
+		skb->csum_valid = !csum;
+		return csum;
+	}
+
+	return 0;
+}
+
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return 0;
+}
+
+/* Perform checksum validate (init). Note that this is a macro since we only
+ * want to calculate the pseudo header which is an input function if necessary.
+ * First we try to validate without any computation (checksum unnecessary) and
+ * then calculate based on checksum complete calling the function to compute
+ * pseudo header.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ */
+#define __skb_checksum_validate(skb, proto, complete,			\
+				zero_okay, check, compute_pseudo)	\
+({									\
+	__sum16 __ret = 0;						\
+	skb->csum_valid = 0;						\
+	if (__skb_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_checksum_validate_complete(skb,		\
+				complete, compute_pseudo(skb, proto));	\
+	__ret;								\
+})
+
+#define skb_checksum_init(skb, proto, compute_pseudo)			\
+	__skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
+
+#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo)	\
+	__skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
+
+#define skb_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
+
+#define skb_checksum_validate_zero_check(skb, proto, check,		\
+					 compute_pseudo)		\
+	__skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
+
+#define skb_checksum_simple_validate(skb)				\
+	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
@@ -2895,6 +3013,7 @@
 struct skb_gso_cb {
 	int	mac_offset;
 	int	encap_level;
+	__u16	csum_start;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 
@@ -2919,6 +3038,28 @@
 	return 0;
 }
 
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
+	    skb_transport_offset(skb);
+	__u16 csum;
+
+	csum = csum_fold(csum_partial(skb_transport_header(skb),
+				      plen, skb->csum));
+	skb->csum = res;
+	SKB_GSO_CB(skb)->csum_start -= plen;
+
+	return csum;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;

diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
index aa327a8..b2b1afb 100644
--- a/include/linux/spi/at86rf230.h
+++ b/include/linux/spi/at86rf230.h

@@ -26,20 +26,6 @@
 	int rstn;
 	int slp_tr;
 	int dig2;
-
-	/* Setting the irq_type will configure the driver to request
-	 * the platform irq trigger type according to the given value
-	 * and configure the interrupt polarity of the device to the
-	 * corresponding polarity.
-	 *
-	 * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING,
-	 *                     IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW
-	 *
-	 * Setting it to 0, the driver does not touch the trigger type
-	 * configuration of the interrupt and sets the interrupt polarity
-	 * of the device to high active (the default value).
-	 */
-	int irq_type;
 };
 
 #endif

diff --git a/include/linux/splice.h b/include/linux/splice.h
index 0e43906..da2751d 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h

@@ -70,16 +70,6 @@
 				splice_actor *);
 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
 				  struct splice_desc *, splice_actor *);
-extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
-				 splice_actor *);
-extern int splice_from_pipe_next(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern void splice_from_pipe_begin(struct splice_desc *);
-extern void splice_from_pipe_end(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
-			struct splice_desc *);
-
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 			      struct splice_pipe_desc *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,

diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 07ef9b8..4568a5c 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h

@@ -33,6 +33,7 @@
 	u8 et1phyaddr;		/* MII address for enet1 */
 	u8 et0mdcport;		/* MDIO for enet0 */
 	u8 et1mdcport;		/* MDIO for enet1 */
+	u16 dev_id;		/* Device ID overriding e.g. PCI ID */
 	u16 board_rev;		/* Board revision number from SPROM. */
 	u16 board_num;		/* Board number from SPROM. */
 	u16 board_type;		/* Board type from SPROM. */

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e7632..1bc7cd0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -244,6 +244,7 @@
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
+	struct page *		*rq_page_end;  /* one past the last page */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -254,11 +255,15 @@
 	u32			rq_prot;	/* IP protocol */
 	unsigned short
 				rq_secure  : 1;	/* secure port */
+	unsigned short		rq_local   : 1;	/* local request */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
 	void *			rq_auth_data;	/* flavor-specific data */
-
+	int			rq_auth_slack;	/* extra space xdr code
+						 * should leave in head
+						 * for krb5i, krb5p.
+						 */
 	int			rq_reserved;	/* space on socket outq
 						 * reserved for this request
 						 */
@@ -454,11 +459,7 @@
  */
 static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int added_space = 0;
-
-	if (rqstp->rq_authop->flavour)
-		added_space = RPC_MAX_AUTH_SIZE;
-	svc_reserve(rqstp, space + added_space);
+	svc_reserve(rqstp, space + rqstp->rq_auth_slack);
 }
 
 #endif /* SUNRPC_SVC_H */

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 0b8e3e6..5cf99a0 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h

@@ -115,14 +115,13 @@
 	struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
-	struct svc_rdma_fastreg_mr *frmr;
 	unsigned long count;
 	union {
 		struct kvec sge[RPCSVC_MAXPAGES];
 		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+		unsigned long lkey[RPCSVC_MAXPAGES];
 	};
 };
-#define RDMACTXT_F_FAST_UNREG	1
 #define RDMACTXT_F_LAST_CTXT	2
 
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b05963f..7235040 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h

@@ -24,6 +24,7 @@
 	void		(*xpo_release_rqst)(struct svc_rqst *);
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
+	int		(*xpo_secure_port)(struct svc_rqst *);
 };
 
 struct svc_xprt_class {
@@ -63,6 +64,7 @@
 #define	XPT_DETACHED	10		/* detached from tempsocks list */
 #define XPT_LISTENER	11		/* listening endpoint */
 #define XPT_CACHE_AUTH	12		/* cache auth info */
+#define XPT_LOCAL	13		/* connection from loopback interface */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 15f9204..70c6b92 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h

@@ -215,6 +215,9 @@
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
+extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
+extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3876f0f..fcbfe87 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h

@@ -24,6 +24,12 @@
 #define RPC_MAX_SLOT_TABLE_LIMIT	(65536U)
 #define RPC_MAX_SLOT_TABLE	RPC_MAX_SLOT_TABLE_LIMIT
 
+#define RPC_CWNDSHIFT		(8U)
+#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
+#define RPC_INITCWND		RPC_CWNDSCALE
+#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
+
 /*
  * This describes a timeout strategy
  */

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2399468..a051321 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h

@@ -197,7 +197,8 @@
 	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
 		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
-		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
@@ -209,6 +210,8 @@
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
+	u32	max_packets_out;  /* max packets_out in last window */
+	u32	max_packets_seq;  /* right edge of max_packets_out flight */
 
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
 	u8	ecn_flags;	/* ECN status bits.			*/
@@ -365,11 +368,6 @@
 		tcp_sk(sk)->fastopen_rsk != NULL);
 }
 
-static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
-{
-	return foc->len != -1;
-}
-
 extern void tcp_sock_destruct(struct sock *sk);
 
 static inline int fastopen_init_queue(struct sock *sk, int backlog)

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index a32d86e..1361169 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h

@@ -46,6 +46,9 @@
 extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
+extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+			     int nmaskbits);
+
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
@@ -57,6 +60,13 @@
 	return 0;
 }
 
+static inline int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	return 0;
+}
+
 static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	return 0;

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 9d30ee4..2e2a5f7 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h

@@ -185,6 +185,11 @@
 	static inline void						\
 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
 /*
@@ -230,6 +235,11 @@
 	}								\
 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return false;						\
 	}
 
 #define DEFINE_TRACE_FN(name, reg, unreg)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 42278bb..247cfdc 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h

@@ -47,7 +47,9 @@
 #define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node
 	int		 pending;	/* Any pending frames ? */
 	unsigned int	 corkflag;	/* Cork is required */
-  	__u16		 encap_type;	/* Is this an Encapsulation socket? */
+	__u8		 encap_type;	/* Is this an Encapsulation socket? */
+	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
+			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -76,6 +78,26 @@
 	return (struct udp_sock *)sk;
 }
 
+static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_tx = val;
+}
+
+static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_rx = val;
+}
+
+static inline bool udp_get_no_check6_tx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_tx;
+}
+
+static inline bool udp_get_no_check6_rx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_rx;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 199bcc3..e2231e4 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h

@@ -19,11 +19,21 @@
 	size_t iov_len;
 };
 
+enum {
+	ITER_IOVEC = 0,
+	ITER_KVEC = 2,
+	ITER_BVEC = 4,
+};
+
 struct iov_iter {
-	const struct iovec *iov;
-	unsigned long nr_segs;
+	int type;
 	size_t iov_offset;
 	size_t count;
+	union {
+		const struct iovec *iov;
+		const struct bio_vec *bvec;
+	};
+	unsigned long nr_segs;
 };
 
 /*
@@ -53,6 +63,7 @@
 }
 
 #define iov_for_each(iov, iter, start)				\
+	if (!((start).type & ITER_BVEC))			\
 	for (iter = (start);					\
 	     (iter).count &&					\
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
@@ -62,32 +73,44 @@
 
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
-
-static inline void iov_iter_init(struct iov_iter *i,
-			const struct iovec *iov, unsigned long nr_segs,
-			size_t count, size_t written)
-{
-	i->iov = iov;
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count + written;
-
-	iov_iter_advance(i, written);
-}
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i);
+unsigned long iov_iter_alignment(const struct iov_iter *i);
+void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+			unsigned long nr_segs, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+			size_t maxsize, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
+			size_t maxsize, size_t *start);
+int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
 	return i->count;
 }
 
+static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+{
+	if (i->count > count)
+		i->count = count;
+}
+
+/*
+ * reexpand a previously truncated iterator; count must be no more than how much
+ * we had shrunk it.
+ */
+static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
+{
+	i->count = count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
+
 #endif

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c52f827..4f844c6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h

@@ -103,6 +103,7 @@
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -133,6 +134,9 @@
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
+
+#define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
+
 static inline int
 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 44b38b9..7c9b484 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h

@@ -52,6 +52,10 @@
 #define	CDC_NCM_NTB_MAX_SIZE_TX			32768	/* bytes */
 #define	CDC_NCM_NTB_MAX_SIZE_RX			32768	/* bytes */
 
+/* Initial NTB length */
+#define	CDC_NCM_NTB_DEF_SIZE_TX			16384	/* bytes */
+#define	CDC_NCM_NTB_DEF_SIZE_RX			16384	/* bytes */
+
 /* Minimum value for MaxDatagramSize, ch. 6.2.9 */
 #define	CDC_NCM_MIN_DATAGRAM_SIZE		1514	/* bytes */
 
@@ -72,16 +76,9 @@
 /* Restart the timer, if amount of datagrams is less than given value */
 #define	CDC_NCM_RESTART_TIMER_DATAGRAM_CNT	3
 #define	CDC_NCM_TIMER_PENDING_CNT		2
-#define CDC_NCM_TIMER_INTERVAL			(400UL * NSEC_PER_USEC)
-
-/* The following macro defines the minimum header space */
-#define	CDC_NCM_MIN_HDR_SIZE \
-	(sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
-	(CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
-#define CDC_NCM_NDP_SIZE \
-	(sizeof(struct usb_cdc_ncm_ndp16) +				\
-	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
+#define CDC_NCM_TIMER_INTERVAL_USEC		400UL
+#define CDC_NCM_TIMER_INTERVAL_MIN		5UL
+#define CDC_NCM_TIMER_INTERVAL_MAX		(U32_MAX / NSEC_PER_USEC)
 
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
@@ -107,6 +104,9 @@
 	spinlock_t mtx;
 	atomic_t stop;
 
+	u32 timer_interval;
+	u32 max_ndp_size;
+
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
 	u32 rx_max;
@@ -118,10 +118,21 @@
 	u16 tx_ndp_modulus;
 	u16 tx_seq;
 	u16 rx_seq;
-	u16 connected;
+	u16 min_tx_pkt;
+
+	/* statistics */
+	u32 tx_curr_frame_payload;
+	u32 tx_reason_ntb_full;
+	u32 tx_reason_ndp_full;
+	u32 tx_reason_timeout;
+	u32 tx_reason_max_datagram;
+	u64 tx_overhead;
+	u64 tx_ntbs;
+	u64 rx_overhead;
+	u64 rx_ntbs;
 };
 
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index e4abb84..b46671e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h

@@ -106,6 +106,8 @@
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
+void virtio_break_device(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index 4195b97..de429d1 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h

@@ -35,11 +35,23 @@
 	u8 lun[8];		/* Logical Unit Number */
 	u64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
-	u8 prio;
+	u8 prio;		/* SAM command priority field */
 	u8 crn;
 	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 } __packed;
 
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+	u8 lun[8];		/* Logical Unit Number */
+	u64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	u32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	u32 pi_bytesin;		/* DataIN PI Number of bytes */
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
 /* Response, followed by sense data and data-in */
 struct virtio_scsi_cmd_resp {
 	u32 sense_len;		/* Sense data length */
@@ -97,6 +109,7 @@
 #define VIRTIO_SCSI_F_INOUT                    0
 #define VIRTIO_SCSI_F_HOTPLUG                  1
 #define VIRTIO_SCSI_F_CHANGE                   2
+#define VIRTIO_SCSI_F_T10_PI                   3
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1b22c42..a0cc2e9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h

@@ -56,9 +56,8 @@
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
-	/* special cpu IDs */
+	/* not bound to any CPU, prefer the local CPU */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
@@ -274,13 +273,6 @@
 #define delayed_work_pending(w) \
 	work_pending(&(w)->work)
 
-/**
- * work_clear_pending - for internal use only, mark a work item as not pending
- * @work: The work item in question
- */
-#define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
-
 /*
  * Workqueue flags and constants.  For details, please refer to
  * Documentation/workqueue.txt.
@@ -340,6 +332,9 @@
  * short queue flush time.  Don't queue works which can run for too
  * long.
  *
+ * system_highpri_wq is similar to system_wq but for work items which
+ * require WQ_HIGHPRI.
+ *
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
@@ -358,26 +353,13 @@
  * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
-{
-	return system_wq;
-}
-
-static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
-{
-	return system_freezable_wq;
-}
-
-/* equivlalent to system_wq and system_freezable_wq, deprecated */
-#define system_nrt_wq			__system_nrt_wq()
-#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
@@ -587,18 +569,6 @@
 	return system_wq != NULL;
 }
 
-/* used to be different but now identical to flush_work(), deprecated */
-static inline bool __deprecated flush_work_sync(struct work_struct *work)
-{
-	return flush_work(work);
-}
-
-/* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	return flush_delayed_work(dwork);
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index bca25dc..8fab6fa 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h

@@ -432,6 +432,7 @@
 void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no);
 
 void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
+void vb2_discard_done(struct vb2_queue *q);
 int vb2_wait_for_all_buffers(struct vb2_queue *q);
 
 int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b);

diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index f7d372b..79b530f 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h

@@ -54,6 +54,7 @@
 #define __6LOWPAN_H__
 
 #include <net/ipv6.h>
+#include <net/net_namespace.h>
 
 #define UIP_802154_SHORTADDR_LEN	2  /* compressed ipv6 address length */
 #define UIP_IPH_LEN			40 /* ipv6 fixed header size */

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 933a9f2..f679877 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h

@@ -306,11 +306,6 @@
 		      htonl(0xFF000000) | addr->s6_addr32[3]);
 }
 
-static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr)
-{
-	return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
-}
-
 static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index f79ae2a..085940f 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h

@@ -57,6 +57,14 @@
 /* get/setsockopt */
 #define SOL_IEEE802154	0
 
-#define WPAN_WANTACK	0
+#define WPAN_WANTACK		0
+#define WPAN_SECURITY		1
+#define WPAN_SECURITY_LEVEL	2
+
+#define WPAN_SECURITY_DEFAULT	0
+#define WPAN_SECURITY_OFF	1
+#define WPAN_SECURITY_ON	2
+
+#define WPAN_SECURITY_LEVEL_DEFAULT	(-1)
 
 #endif

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index be150cf..16587dc 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h

@@ -367,6 +367,7 @@
 #define HCI_ERROR_REMOTE_POWER_OFF	0x15
 #define HCI_ERROR_LOCAL_HOST_TERM	0x16
 #define HCI_ERROR_PAIRING_NOT_ALLOWED	0x18
+#define HCI_ERROR_ADVERTISING_TIMEOUT	0x3c
 
 /* Flow control modes */
 #define HCI_FLOW_CTL_MODE_PACKET_BASED	0x00
@@ -1053,6 +1054,17 @@
 	__le16   window;
 } __packed;
 
+#define HCI_OP_READ_TX_POWER		0x0c2d
+struct hci_cp_read_tx_power {
+	__le16   handle;
+	__u8     type;
+} __packed;
+struct hci_rp_read_tx_power {
+	__u8     status;
+	__le16   handle;
+	__s8     tx_power;
+} __packed;
+
 #define HCI_OP_READ_PAGE_SCAN_TYPE	0x0c46
 struct hci_rp_read_page_scan_type {
 	__u8     status;
@@ -1063,6 +1075,16 @@
 	#define PAGE_SCAN_TYPE_STANDARD		0x00
 	#define PAGE_SCAN_TYPE_INTERLACED	0x01
 
+#define HCI_OP_READ_RSSI		0x1405
+struct hci_cp_read_rssi {
+	__le16   handle;
+} __packed;
+struct hci_rp_read_rssi {
+	__u8     status;
+	__le16   handle;
+	__s8     rssi;
+} __packed;
+
 #define HCI_OP_READ_LOCAL_AMP_INFO	0x1409
 struct hci_rp_read_local_amp_info {
 	__u8     status;

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5f8bc05..b386bf1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h

@@ -68,6 +68,11 @@
 	struct list_head	unknown;	/* Name state not known */
 	struct list_head	resolve;	/* Name needs to be resolved */
 	__u32			timestamp;
+	bdaddr_t		last_adv_addr;
+	u8			last_adv_addr_type;
+	s8			last_adv_rssi;
+	u8			last_adv_data[HCI_MAX_AD_LENGTH];
+	u8			last_adv_data_len;
 };
 
 struct hci_conn_hash {
@@ -140,6 +145,10 @@
 /* Default LE RPA expiry time, 15 minutes */
 #define HCI_DEFAULT_RPA_TIMEOUT		(15 * 60)
 
+/* Default min/max age of connection information (1s/3s) */
+#define DEFAULT_CONN_INFO_MIN_AGE	1000
+#define DEFAULT_CONN_INFO_MAX_AGE	3000
+
 struct amp_assoc {
 	__u16	len;
 	__u16	offset;
@@ -194,6 +203,9 @@
 	__u16		le_scan_window;
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
+	__u16		discov_interleaved_timeout;
+	__u16		conn_info_min_age;
+	__u16		conn_info_max_age;
 	__u8		ssp_debug_mode;
 
 	__u16		devid_source;
@@ -368,8 +380,13 @@
 	__u16		setting;
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
+	__s8		rssi;
+	__s8		tx_power;
+	__s8		max_tx_power;
 	unsigned long	flags;
 
+	unsigned long	conn_info_timestamp;
+
 	__u8		remote_cap;
 	__u8		remote_auth;
 	__u8		remote_id;
@@ -1204,8 +1221,8 @@
  */
 #define DISCOV_LE_SCAN_WIN		0x12
 #define DISCOV_LE_SCAN_INT		0x12
-#define DISCOV_LE_TIMEOUT		msecs_to_jiffies(10240)
-#define DISCOV_INTERLEAVED_TIMEOUT	msecs_to_jiffies(5120)
+#define DISCOV_LE_TIMEOUT		10240	/* msec */
+#define DISCOV_INTERLEAVED_TIMEOUT	5120	/* msec */
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 
@@ -1265,7 +1282,8 @@
 				       u8 *randomizer256, u8 status);
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
-		       u8 ssp, u8 *eir, u16 eir_len);
+		       u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 scan_rsp_len);
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
 void mgmt_discovering(struct hci_dev *hdev, u8 discovering);

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index d4b571c..bcffc9a 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h

@@ -181,6 +181,9 @@
 } __packed;
 #define MGMT_LOAD_LINK_KEYS_SIZE	3
 
+#define MGMT_LTK_UNAUTHENTICATED	0x00
+#define MGMT_LTK_AUTHENTICATED		0x01
+
 struct mgmt_ltk_info {
 	struct mgmt_addr_info addr;
 	__u8	type;
@@ -409,6 +412,18 @@
 } __packed;
 #define MGMT_LOAD_IRKS_SIZE		2
 
+#define MGMT_OP_GET_CONN_INFO		0x0031
+struct mgmt_cp_get_conn_info {
+	struct mgmt_addr_info addr;
+} __packed;
+#define MGMT_GET_CONN_INFO_SIZE		MGMT_ADDR_INFO_SIZE
+struct mgmt_rp_get_conn_info {
+	struct mgmt_addr_info addr;
+	__s8	rssi;
+	__s8	tx_power;
+	__s8	max_tx_power;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 2611cc3..578b831 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h

@@ -173,7 +173,7 @@
 	struct sk_buff_head   tx_queue;
 	struct timer_list     timer;
 
-	spinlock_t    lock;
+	struct mutex  lock;
 	unsigned long state;
 	unsigned long flags;
 	atomic_t      refcnt;
@@ -244,8 +244,8 @@
 void rfcomm_dlc_accept(struct rfcomm_dlc *d);
 struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel);
 
-#define rfcomm_dlc_lock(d)     spin_lock(&d->lock)
-#define rfcomm_dlc_unlock(d)   spin_unlock(&d->lock)
+#define rfcomm_dlc_lock(d)     mutex_lock(&d->lock)
+#define rfcomm_dlc_unlock(d)   mutex_unlock(&d->lock)
 
 static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d)
 {

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f856e5a..e46c437 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h

@@ -109,6 +109,13 @@
  *	channel as the control or any of the secondary channels.
  *	This may be due to the driver or due to regulatory bandwidth
  *	restrictions.
+ * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
+ * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
+ *	on this channel.
+ *
  */
 enum ieee80211_channel_flags {
 	IEEE80211_CHAN_DISABLED		= 1<<0,
@@ -120,6 +127,10 @@
 	IEEE80211_CHAN_NO_OFDM		= 1<<6,
 	IEEE80211_CHAN_NO_80MHZ		= 1<<7,
 	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
+	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
+	IEEE80211_CHAN_GO_CONCURRENT	= 1<<10,
+	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
+	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
@@ -330,8 +341,8 @@
  * @seq_len: length of @seq.
  */
 struct key_params {
-	u8 *key;
-	u8 *seq;
+	const u8 *key;
+	const u8 *seq;
 	int key_len;
 	int seq_len;
 	u32 cipher;
@@ -441,10 +452,13 @@
  * cfg80211_chandef_dfs_required - checks if radar detection is required
  * @wiphy: the wiphy to validate against
  * @chandef: the channel definition to check
- * Return: 1 if radar detection is required, 0 if it is not, < 0 on error
+ * @iftype: the interface type as specified in &enum nl80211_iftype
+ * Returns:
+ *	1 if radar detection is required, 0 if it is not, < 0 on error
  */
 int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
-				  const struct cfg80211_chan_def *chandef);
+				  const struct cfg80211_chan_def *chandef,
+				  enum nl80211_iftype iftype);
 
 /**
  * ieee80211_chandef_rate_flags - returns rate flags for a channel
@@ -654,7 +668,6 @@
  * @p2p_opp_ps: P2P opportunistic PS
  * @acl: ACL configuration used by the drivers which has support for
  *	MAC address based access control
- * @radar_required: set if radar detection is required
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -672,7 +685,6 @@
 	u8 p2p_ctwindow;
 	bool p2p_opp_ps;
 	const struct cfg80211_acl_data *acl;
-	bool radar_required;
 };
 
 /**
@@ -682,8 +694,10 @@
  *
  * @chandef: defines the channel to use after the switch
  * @beacon_csa: beacon data while performing the switch
- * @counter_offset_beacon: offset for the counter within the beacon (tail)
- * @counter_offset_presp: offset for the counter within the probe response
+ * @counter_offsets_beacon: offsets of the counters within the beacon (tail)
+ * @counter_offsets_presp: offsets of the counters within the probe response
+ * @n_counter_offsets_beacon: number of csa counters the beacon (tail)
+ * @n_counter_offsets_presp: number of csa counters in the probe response
  * @beacon_after: beacon data to be used on the new channel
  * @radar_required: whether radar detection is required on the new channel
  * @block_tx: whether transmissions should be blocked while changing
@@ -692,7 +706,10 @@
 struct cfg80211_csa_settings {
 	struct cfg80211_chan_def chandef;
 	struct cfg80211_beacon_data beacon_csa;
-	u16 counter_offset_beacon, counter_offset_presp;
+	const u16 *counter_offsets_beacon;
+	const u16 *counter_offsets_presp;
+	unsigned int n_counter_offsets_beacon;
+	unsigned int n_counter_offsets_presp;
 	struct cfg80211_beacon_data beacon_after;
 	bool radar_required;
 	bool block_tx;
@@ -856,36 +873,38 @@
  * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled
  * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled
  * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled
+ * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled
  */
 enum station_info_flags {
-	STATION_INFO_INACTIVE_TIME	= 1<<0,
-	STATION_INFO_RX_BYTES		= 1<<1,
-	STATION_INFO_TX_BYTES		= 1<<2,
-	STATION_INFO_LLID		= 1<<3,
-	STATION_INFO_PLID		= 1<<4,
-	STATION_INFO_PLINK_STATE	= 1<<5,
-	STATION_INFO_SIGNAL		= 1<<6,
-	STATION_INFO_TX_BITRATE		= 1<<7,
-	STATION_INFO_RX_PACKETS		= 1<<8,
-	STATION_INFO_TX_PACKETS		= 1<<9,
-	STATION_INFO_TX_RETRIES		= 1<<10,
-	STATION_INFO_TX_FAILED		= 1<<11,
-	STATION_INFO_RX_DROP_MISC	= 1<<12,
-	STATION_INFO_SIGNAL_AVG		= 1<<13,
-	STATION_INFO_RX_BITRATE		= 1<<14,
-	STATION_INFO_BSS_PARAM          = 1<<15,
-	STATION_INFO_CONNECTED_TIME	= 1<<16,
-	STATION_INFO_ASSOC_REQ_IES	= 1<<17,
-	STATION_INFO_STA_FLAGS		= 1<<18,
-	STATION_INFO_BEACON_LOSS_COUNT	= 1<<19,
-	STATION_INFO_T_OFFSET		= 1<<20,
-	STATION_INFO_LOCAL_PM		= 1<<21,
-	STATION_INFO_PEER_PM		= 1<<22,
-	STATION_INFO_NONPEER_PM		= 1<<23,
-	STATION_INFO_RX_BYTES64		= 1<<24,
-	STATION_INFO_TX_BYTES64		= 1<<25,
-	STATION_INFO_CHAIN_SIGNAL	= 1<<26,
-	STATION_INFO_CHAIN_SIGNAL_AVG	= 1<<27,
+	STATION_INFO_INACTIVE_TIME		= BIT(0),
+	STATION_INFO_RX_BYTES			= BIT(1),
+	STATION_INFO_TX_BYTES			= BIT(2),
+	STATION_INFO_LLID			= BIT(3),
+	STATION_INFO_PLID			= BIT(4),
+	STATION_INFO_PLINK_STATE		= BIT(5),
+	STATION_INFO_SIGNAL			= BIT(6),
+	STATION_INFO_TX_BITRATE			= BIT(7),
+	STATION_INFO_RX_PACKETS			= BIT(8),
+	STATION_INFO_TX_PACKETS			= BIT(9),
+	STATION_INFO_TX_RETRIES			= BIT(10),
+	STATION_INFO_TX_FAILED			= BIT(11),
+	STATION_INFO_RX_DROP_MISC		= BIT(12),
+	STATION_INFO_SIGNAL_AVG			= BIT(13),
+	STATION_INFO_RX_BITRATE			= BIT(14),
+	STATION_INFO_BSS_PARAM			= BIT(15),
+	STATION_INFO_CONNECTED_TIME		= BIT(16),
+	STATION_INFO_ASSOC_REQ_IES		= BIT(17),
+	STATION_INFO_STA_FLAGS			= BIT(18),
+	STATION_INFO_BEACON_LOSS_COUNT		= BIT(19),
+	STATION_INFO_T_OFFSET			= BIT(20),
+	STATION_INFO_LOCAL_PM			= BIT(21),
+	STATION_INFO_PEER_PM			= BIT(22),
+	STATION_INFO_NONPEER_PM			= BIT(23),
+	STATION_INFO_RX_BYTES64			= BIT(24),
+	STATION_INFO_TX_BYTES64			= BIT(25),
+	STATION_INFO_CHAIN_SIGNAL		= BIT(26),
+	STATION_INFO_CHAIN_SIGNAL_AVG		= BIT(27),
+	STATION_INFO_EXPECTED_THROUGHPUT	= BIT(28),
 };
 
 /**
@@ -1007,6 +1026,8 @@
  * @local_pm: local mesh STA power save mode
  * @peer_pm: peer mesh STA power save mode
  * @nonpeer_pm: non-peer mesh STA power save mode
+ * @expected_throughput: expected throughput in kbps (including 802.11 headers)
+ *	towards this station.
  */
 struct station_info {
 	u32 filled;
@@ -1045,6 +1066,8 @@
 	enum nl80211_mesh_power_mode peer_pm;
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
+	u32 expected_throughput;
+
 	/*
 	 * Note: Add a new enum station_info_flags value for each new field and
 	 * use it to check which fields are initialized.
@@ -1052,6 +1075,19 @@
 };
 
 /**
+ * cfg80211_get_station - retrieve information about a given station
+ * @dev: the device where the station is supposed to be connected to
+ * @mac_addr: the mac address of the station of interest
+ * @sinfo: pointer to the structure to fill with the information
+ *
+ * Returns 0 on success and sinfo is filled with the available information
+ * otherwise returns a negative error code and the content of sinfo has to be
+ * considered undefined.
+ */
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+			 struct station_info *sinfo);
+
+/**
  * enum monitor_flags - monitor flags
  *
  * Monitor interface configuration flags. Note that these must be the bits
@@ -1152,7 +1188,7 @@
 	int use_cts_prot;
 	int use_short_preamble;
 	int use_short_slot_time;
-	u8 *basic_rates;
+	const u8 *basic_rates;
 	u8 basic_rates_len;
 	int ap_isolate;
 	int ht_opmode;
@@ -1682,10 +1718,10 @@
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_ibss_params {
-	u8 *ssid;
-	u8 *bssid;
+	const u8 *ssid;
+	const u8 *bssid;
 	struct cfg80211_chan_def chandef;
-	u8 *ie;
+	const u8 *ie;
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
 	u32 basic_rates;
@@ -1794,8 +1830,8 @@
  * @pmkid: The PMK material itself.
  */
 struct cfg80211_pmksa {
-	u8 *bssid;
-	u8 *pmkid;
+	const u8 *bssid;
+	const u8 *pmkid;
 };
 
 /**
@@ -1810,7 +1846,7 @@
  * memory, free @mask only!
  */
 struct cfg80211_pkt_pattern {
-	u8 *mask, *pattern;
+	const u8 *mask, *pattern;
 	int pattern_len;
 	int pkt_offset;
 };
@@ -1974,6 +2010,8 @@
  * @len: buffer length
  * @no_cck: don't use cck rates for this frame
  * @dont_wait_for_ack: tells the low level not to wait for an ack
+ * @n_csa_offsets: length of csa_offsets array
+ * @csa_offsets: array of all the csa offsets in the frame
  */
 struct cfg80211_mgmt_tx_params {
 	struct ieee80211_channel *chan;
@@ -1983,6 +2021,8 @@
 	size_t len;
 	bool no_cck;
 	bool dont_wait_for_ack;
+	int n_csa_offsets;
+	const u16 *csa_offsets;
 };
 
 /**
@@ -2278,6 +2318,10 @@
  * @channel_switch: initiate channel-switch procedure (with CSA)
  *
  * @set_qos_map: Set QoS mapping information to the driver
+ *
+ * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
+ *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
+ *	changes during the lifetime of the BSS.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2320,28 +2364,29 @@
 
 
 	int	(*add_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac, struct station_parameters *params);
+			       const u8 *mac,
+			       struct station_parameters *params);
 	int	(*del_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac);
+			       const u8 *mac);
 	int	(*change_station)(struct wiphy *wiphy, struct net_device *dev,
-				  u8 *mac, struct station_parameters *params);
+				  const u8 *mac,
+				  struct station_parameters *params);
 	int	(*get_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac, struct station_info *sinfo);
+			       const u8 *mac, struct station_info *sinfo);
 	int	(*dump_station)(struct wiphy *wiphy, struct net_device *dev,
-			       int idx, u8 *mac, struct station_info *sinfo);
+				int idx, u8 *mac, struct station_info *sinfo);
 
 	int	(*add_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst, u8 *next_hop);
+			       const u8 *dst, const u8 *next_hop);
 	int	(*del_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst);
+			       const u8 *dst);
 	int	(*change_mpath)(struct wiphy *wiphy, struct net_device *dev,
-				  u8 *dst, u8 *next_hop);
+				  const u8 *dst, const u8 *next_hop);
 	int	(*get_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst, u8 *next_hop,
-			       struct mpath_info *pinfo);
+			     u8 *dst, u8 *next_hop, struct mpath_info *pinfo);
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       int idx, u8 *dst, u8 *next_hop,
-			       struct mpath_info *pinfo);
+			      int idx, u8 *dst, u8 *next_hop,
+			      struct mpath_info *pinfo);
 	int	(*get_mesh_config)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
@@ -2471,11 +2516,11 @@
 				  struct cfg80211_gtk_rekey_data *data);
 
 	int	(*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *peer, u8 action_code,  u8 dialog_token,
+			     const u8 *peer, u8 action_code,  u8 dialog_token,
 			     u16 status_code, u32 peer_capability,
 			     const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *peer, enum nl80211_tdls_operation oper);
+			     const u8 *peer, enum nl80211_tdls_operation oper);
 
 	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
 				const u8 *peer, u64 *cookie);
@@ -2521,9 +2566,13 @@
 	int	(*channel_switch)(struct wiphy *wiphy,
 				  struct net_device *dev,
 				  struct cfg80211_csa_settings *params);
+
 	int     (*set_qos_map)(struct wiphy *wiphy,
 			       struct net_device *dev,
 			       struct cfg80211_qos_map *qos_map);
+
+	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
+				    struct cfg80211_chan_def *chandef);
 };
 
 /*
@@ -2618,6 +2667,7 @@
  *	between infrastructure and AP types must match. This is required
  *	only in special cases.
  * @radar_detect_widths: bitmap of channel widths supported for radar detection
+ * @radar_detect_regions: bitmap of regions supported for radar detection
  *
  * With this structure the driver can describe which interface
  * combinations it supports concurrently.
@@ -2675,6 +2725,7 @@
 	u8 n_limits;
 	bool beacon_int_infra_match;
 	u8 radar_detect_widths;
+	u8 radar_detect_regions;
 };
 
 struct ieee80211_txrx_stypes {
@@ -2905,6 +2956,17 @@
  *	(including P2P GO) or 0 to indicate no such limit is advertised. The
  *	driver is allowed to advertise a theoretical limit that it can reach in
  *	some cases, but may not always reach.
+ *
+ * @max_num_csa_counters: Number of supported csa_counters in beacons
+ *	and probe responses.  This value should be set if the driver
+ *	wishes to limit the number of csa counters. Default (0) means
+ *	infinite.
+ * @max_adj_channel_rssi_comp: max offset of between the channel on which the
+ *	frame was sent and the channel on which the frame was heard for which
+ *	the reported rssi is still valid. If a driver is able to compensate the
+ *	low rssi when a frame is heard on different channel, then it should set
+ *	this variable to the maximal offset for which it can compensate.
+ *	This value should be set in MHz.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -3022,6 +3084,9 @@
 
 	u16 max_ap_assoc_sta;
 
+	u8 max_num_csa_counters;
+	u8 max_adj_channel_rssi_comp;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -3194,6 +3259,7 @@
  * @ibss_dfs_possible: (private) IBSS may change to a DFS channel
  * @event_list: (private) list for internal event processing
  * @event_lock: (private) lock for event list
+ * @owner_nlportid: (private) owner socket port ID
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -3241,13 +3307,15 @@
 	unsigned long cac_start_time;
 	unsigned int cac_time_ms;
 
+	u32 owner_nlportid;
+
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
 		struct cfg80211_ibss_params ibss;
 		struct cfg80211_connect_params connect;
 		struct cfg80211_cached_keys *keys;
-		u8 *ie;
+		const u8 *ie;
 		size_t ie_len;
 		u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
@@ -3488,7 +3556,8 @@
  * Return: 0 on success, or a negative error code.
  */
 int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
-			     enum nl80211_iftype iftype, u8 *bssid, bool qos);
+			     enum nl80211_iftype iftype, const u8 *bssid,
+			     bool qos);
 
 /**
  * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
@@ -3600,7 +3669,7 @@
  * default channel settings will be disregarded. If no rule is found for a
  * channel on the regulatory domain the channel will be disabled.
  * Drivers using this for a wiphy should also set the wiphy flag
- * WIPHY_FLAG_CUSTOM_REGULATORY or cfg80211 will set it for the wiphy
+ * REGULATORY_CUSTOM_REG or cfg80211 will set it for the wiphy
  * that called this helper.
  */
 void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
@@ -4289,7 +4358,7 @@
  * and not try to connect to any AP any more.
  */
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-			   u8 *ie, size_t ie_len, gfp_t gfp);
+			   const u8 *ie, size_t ie_len, gfp_t gfp);
 
 /**
  * cfg80211_ready_on_channel - notification of remain_on_channel start
@@ -4543,12 +4612,14 @@
  * cfg80211_reg_can_beacon - check if beaconing is allowed
  * @wiphy: the wiphy
  * @chandef: the channel definition
+ * @iftype: interface type
  *
  * Return: %true if there is no secondary channel or the secondary channel(s)
  * can be used for beaconing (i.e. is not a radar channel etc.)
  */
 bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef);
+			     struct cfg80211_chan_def *chandef,
+			     enum nl80211_iftype iftype);
 
 /*
  * cfg80211_ch_switch_notify - update wdev channel and notify userspace
@@ -4694,6 +4765,84 @@
  */
 unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy);
 
+/**
+ * cfg80211_check_combinations - check interface combinations
+ *
+ * @wiphy: the wiphy
+ * @num_different_channels: the number of different channels we want
+ *	to use for verification
+ * @radar_detect: a bitmap where each bit corresponds to a channel
+ *	width where radar detection is needed, as in the definition of
+ *	&struct ieee80211_iface_combination.@radar_detect_widths
+ * @iftype_num: array with the numbers of interfaces of each interface
+ *	type.  The index is the interface type as specified in &enum
+ *	nl80211_iftype.
+ *
+ * This function can be called by the driver to check whether a
+ * combination of interfaces and their types are allowed according to
+ * the interface combinations.
+ */
+int cfg80211_check_combinations(struct wiphy *wiphy,
+				const int num_different_channels,
+				const u8 radar_detect,
+				const int iftype_num[NUM_NL80211_IFTYPES]);
+
+/**
+ * cfg80211_iter_combinations - iterate over matching combinations
+ *
+ * @wiphy: the wiphy
+ * @num_different_channels: the number of different channels we want
+ *	to use for verification
+ * @radar_detect: a bitmap where each bit corresponds to a channel
+ *	width where radar detection is needed, as in the definition of
+ *	&struct ieee80211_iface_combination.@radar_detect_widths
+ * @iftype_num: array with the numbers of interfaces of each interface
+ *	type.  The index is the interface type as specified in &enum
+ *	nl80211_iftype.
+ * @iter: function to call for each matching combination
+ * @data: pointer to pass to iter function
+ *
+ * This function can be called by the driver to check what possible
+ * combinations it fits in at a given moment, e.g. for channel switching
+ * purposes.
+ */
+int cfg80211_iter_combinations(struct wiphy *wiphy,
+			       const int num_different_channels,
+			       const u8 radar_detect,
+			       const int iftype_num[NUM_NL80211_IFTYPES],
+			       void (*iter)(const struct ieee80211_iface_combination *c,
+					    void *data),
+			       void *data);
+
+/*
+ * cfg80211_stop_iface - trigger interface disconnection
+ *
+ * @wiphy: the wiphy
+ * @wdev: wireless device
+ * @gfp: context flags
+ *
+ * Trigger interface to be stopped as if AP was stopped, IBSS/mesh left, STA
+ * disconnected.
+ *
+ * Note: This doesn't need any locks and is asynchronous.
+ */
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+			 gfp_t gfp);
+
+/**
+ * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy
+ * @wiphy: the wiphy to shut down
+ *
+ * This function shuts down all interfaces belonging to this wiphy by
+ * calling dev_close() (and treating non-netdev interfaces as needed).
+ * It shouldn't really be used unless there are some fatal device errors
+ * that really can't be recovered in any other way.
+ *
+ * Callers must hold the RTNL and be able to deal with callbacks into
+ * the driver while the function is running.
+ */
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */

diff --git a/include/net/checksum.h b/include/net/checksum.h
index a28f4e0..87cb190 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h

@@ -57,12 +57,14 @@
 }
 #endif
 
+#ifndef HAVE_ARCH_CSUM_ADD
 static inline __wsum csum_add(__wsum csum, __wsum addend)
 {
 	u32 res = (__force u32)csum;
 	res += (__force u32)addend;
 	return (__force __wsum)(res + (res < (__force u32)addend));
 }
+#endif
 
 static inline __wsum csum_sub(__wsum csum, __wsum addend)
 {

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 7828ebf..6efce38 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h

@@ -181,6 +181,11 @@
 void register_switch_driver(struct dsa_switch_driver *type);
 void unregister_switch_driver(struct dsa_switch_driver *type);
 
+static inline void *ds_to_priv(struct dsa_switch *ds)
+{
+	return (void *)(ds + 1);
+}
+
 /*
  * The original DSA tag format and some other tag formats have no
  * ethertype, which means that we need to add a little hack to the

diff --git a/include/net/gre.h b/include/net/gre.h
index 70046a0..b531820 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h

@@ -37,9 +37,10 @@
 		      int hdr_len);
 
 static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
-						  bool gre_csum)
+						  bool csum)
 {
-	return iptunnel_handle_offloads(skb, gre_csum, SKB_GSO_GRE);
+	return iptunnel_handle_offloads(skb, csum,
+					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
 

diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h
index c7ae0ac..0aa7122 100644
--- a/include/net/ieee802154.h
+++ b/include/net/ieee802154.h

@@ -79,6 +79,15 @@
 #define IEEE802154_SCF_KEY_SHORT_INDEX		2
 #define IEEE802154_SCF_KEY_HW_INDEX		3
 
+#define IEEE802154_SCF_SECLEVEL_NONE		0
+#define IEEE802154_SCF_SECLEVEL_MIC32		1
+#define IEEE802154_SCF_SECLEVEL_MIC64		2
+#define IEEE802154_SCF_SECLEVEL_MIC128		3
+#define IEEE802154_SCF_SECLEVEL_ENC		4
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC32	5
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC64	6
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC128	7
+
 /* MAC footer size */
 #define IEEE802154_MFR_SIZE	2 /* 2 octets */
 

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 5a719ca..3b53c8e 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h

@@ -27,6 +27,7 @@
 #ifndef IEEE802154_NETDEVICE_H
 #define IEEE802154_NETDEVICE_H
 
+#include <net/ieee802154.h>
 #include <net/af_ieee802154.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -114,6 +115,34 @@
 int ieee802154_hdr_peek_addrs(const struct sk_buff *skb,
 			      struct ieee802154_hdr *hdr);
 
+/* parses the full 802.15.4 header a given skb and stores them into hdr,
+ * performing pan id decompression and length checks to be suitable for use in
+ * header_ops.parse
+ */
+int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr);
+
+int ieee802154_max_payload(const struct ieee802154_hdr *hdr);
+
+static inline int
+ieee802154_sechdr_authtag_len(const struct ieee802154_sechdr *sec)
+{
+	switch (sec->level) {
+	case IEEE802154_SCF_SECLEVEL_MIC32:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC32:
+		return 4;
+	case IEEE802154_SCF_SECLEVEL_MIC64:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC64:
+		return 8;
+	case IEEE802154_SCF_SECLEVEL_MIC128:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC128:
+		return 16;
+	case IEEE802154_SCF_SECLEVEL_NONE:
+	case IEEE802154_SCF_SECLEVEL_ENC:
+	default:
+		return 0;
+	}
+}
+
 static inline int ieee802154_hdr_length(struct sk_buff *skb)
 {
 	struct ieee802154_hdr hdr;
@@ -193,8 +222,12 @@
  */
 struct ieee802154_mac_cb {
 	u8 lqi;
-	u8 flags;
-	u8 seq;
+	u8 type;
+	bool ackreq;
+	bool secen;
+	bool secen_override;
+	u8 seclevel;
+	bool seclevel_override;
 	struct ieee802154_addr source;
 	struct ieee802154_addr dest;
 };
@@ -204,25 +237,96 @@
 	return (struct ieee802154_mac_cb *)skb->cb;
 }
 
-#define MAC_CB_FLAG_TYPEMASK		((1 << 3) - 1)
-
-#define MAC_CB_FLAG_ACKREQ		(1 << 3)
-#define MAC_CB_FLAG_SECEN		(1 << 4)
-
-static inline bool mac_cb_is_ackreq(struct sk_buff *skb)
+static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb)
 {
-	return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ;
+	BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb));
+
+	memset(skb->cb, 0, sizeof(struct ieee802154_mac_cb));
+	return mac_cb(skb);
 }
 
-static inline bool mac_cb_is_secen(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN;
-}
+#define IEEE802154_LLSEC_KEY_SIZE 16
 
-static inline int mac_cb_type(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK;
-}
+struct ieee802154_llsec_key_id {
+	u8 mode;
+	u8 id;
+	union {
+		struct ieee802154_addr device_addr;
+		__le32 short_source;
+		__le64 extended_source;
+	};
+};
+
+struct ieee802154_llsec_key {
+	u8 frame_types;
+	u32 cmd_frame_ids;
+	u8 key[IEEE802154_LLSEC_KEY_SIZE];
+};
+
+struct ieee802154_llsec_key_entry {
+	struct list_head list;
+
+	struct ieee802154_llsec_key_id id;
+	struct ieee802154_llsec_key *key;
+};
+
+struct ieee802154_llsec_device_key {
+	struct list_head list;
+
+	struct ieee802154_llsec_key_id key_id;
+	u32 frame_counter;
+};
+
+enum {
+	IEEE802154_LLSEC_DEVKEY_IGNORE,
+	IEEE802154_LLSEC_DEVKEY_RESTRICT,
+	IEEE802154_LLSEC_DEVKEY_RECORD,
+
+	__IEEE802154_LLSEC_DEVKEY_MAX,
+};
+
+struct ieee802154_llsec_device {
+	struct list_head list;
+
+	__le16 pan_id;
+	__le16 short_addr;
+	__le64 hwaddr;
+	u32 frame_counter;
+	bool seclevel_exempt;
+
+	u8 key_mode;
+	struct list_head keys;
+};
+
+struct ieee802154_llsec_seclevel {
+	struct list_head list;
+
+	u8 frame_type;
+	u8 cmd_frame_id;
+	bool device_override;
+	u32 sec_levels;
+};
+
+struct ieee802154_llsec_params {
+	bool enabled;
+
+	__be32 frame_counter;
+	u8 out_level;
+	struct ieee802154_llsec_key_id out_key;
+
+	__le64 default_key_source;
+
+	__le16 pan_id;
+	__le64 hwaddr;
+	__le64 coord_hwaddr;
+	__le16 coord_shortaddr;
+};
+
+struct ieee802154_llsec_table {
+	struct list_head keys;
+	struct list_head devices;
+	struct list_head security_levels;
+};
 
 #define IEEE802154_MAC_SCAN_ED		0
 #define IEEE802154_MAC_SCAN_ACTIVE	1
@@ -242,6 +346,53 @@
 };
 
 struct wpan_phy;
+
+enum {
+	IEEE802154_LLSEC_PARAM_ENABLED = 1 << 0,
+	IEEE802154_LLSEC_PARAM_FRAME_COUNTER = 1 << 1,
+	IEEE802154_LLSEC_PARAM_OUT_LEVEL = 1 << 2,
+	IEEE802154_LLSEC_PARAM_OUT_KEY = 1 << 3,
+	IEEE802154_LLSEC_PARAM_KEY_SOURCE = 1 << 4,
+	IEEE802154_LLSEC_PARAM_PAN_ID = 1 << 5,
+	IEEE802154_LLSEC_PARAM_HWADDR = 1 << 6,
+	IEEE802154_LLSEC_PARAM_COORD_HWADDR = 1 << 7,
+	IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8,
+};
+
+struct ieee802154_llsec_ops {
+	int (*get_params)(struct net_device *dev,
+			  struct ieee802154_llsec_params *params);
+	int (*set_params)(struct net_device *dev,
+			  const struct ieee802154_llsec_params *params,
+			  int changed);
+
+	int (*add_key)(struct net_device *dev,
+		       const struct ieee802154_llsec_key_id *id,
+		       const struct ieee802154_llsec_key *key);
+	int (*del_key)(struct net_device *dev,
+		       const struct ieee802154_llsec_key_id *id);
+
+	int (*add_dev)(struct net_device *dev,
+		       const struct ieee802154_llsec_device *llsec_dev);
+	int (*del_dev)(struct net_device *dev, __le64 dev_addr);
+
+	int (*add_devkey)(struct net_device *dev,
+			  __le64 device_addr,
+			  const struct ieee802154_llsec_device_key *key);
+	int (*del_devkey)(struct net_device *dev,
+			  __le64 device_addr,
+			  const struct ieee802154_llsec_device_key *key);
+
+	int (*add_seclevel)(struct net_device *dev,
+			    const struct ieee802154_llsec_seclevel *sl);
+	int (*del_seclevel)(struct net_device *dev,
+			    const struct ieee802154_llsec_seclevel *sl);
+
+	void (*lock_table)(struct net_device *dev);
+	void (*get_table)(struct net_device *dev,
+			  struct ieee802154_llsec_table **t);
+	void (*unlock_table)(struct net_device *dev);
+};
 /*
  * This should be located at net_device->ml_priv
  *
@@ -272,6 +423,8 @@
 	void (*get_mac_params)(struct net_device *dev,
 			       struct ieee802154_mac_params *params);
 
+	struct ieee802154_llsec_ops *llsec;
+
 	/* The fields below are required. */
 
 	struct wpan_phy *(*get_phy)(const struct net_device *dev);

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 3bd2279..84b2083 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h

@@ -150,7 +150,7 @@
 }
 
 /*
- * RFC 6080 4.2
+ * RFC 6040 4.2
  *  To decapsulate the inner header at the tunnel egress, a compliant
  *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
  *  intersection of the appropriate arriving inner header (row) and outer

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 1bdb477..dd1950a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h

@@ -292,12 +292,12 @@
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __addrpair __name = (__force __addrpair) ( \
 				   (((__force __u64)(__be32)(__saddr)) << 32) | \
-				   ((__force __u64)(__be32)(__daddr)));
+				   ((__force __u64)(__be32)(__daddr)))
 #else /* __LITTLE_ENDIAN */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __addrpair __name = (__force __addrpair) ( \
 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
-				   ((__force __u64)(__be32)(__saddr)));
+				   ((__force __u64)(__be32)(__saddr)))
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_portpair == (__ports))			&&	\
@@ -306,7 +306,9 @@
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
 	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
-#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+	const int __name __deprecated __attribute__((unused))
+
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
 	(((__sk)->sk_portpair == (__ports))		&&		\
 	 ((__sk)->sk_daddr	== (__saddr))		&&		\

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1833c3f..b1edf17 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h

@@ -90,6 +90,7 @@
 	kmemcheck_bitfield_end(flags);
 	struct ip_options_rcu	*opt;
 	struct sk_buff		*pktopts;
+	u32                     ir_mark;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -97,6 +98,15 @@
 	return (struct inet_request_sock *)sk;
 }
 
+static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+{
+	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
+		return skb->mark;
+	} else {
+		return sk->sk_mark;
+	}
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 058271b..01d590e 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h

@@ -41,14 +41,13 @@
 		struct rcu_head     gc_rcu;
 	};
 	/*
-	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
-	 * are not available: rid, ip_id_count
+	 * Once inet_peer is queued for deletion (refcnt == -1), following field
+	 * is not available: rid
 	 * We can share memory with rcu_head to help keep inet_peer small.
 	 */
 	union {
 		struct {
 			atomic_t			rid;		/* Frag reception counter */
-			atomic_t			ip_id_count;	/* IP ID for the next packet */
 		};
 		struct rcu_head         rcu;
 		struct inet_peer	*gc_next;
@@ -165,21 +164,11 @@
 void inetpeer_invalidate_tree(struct inet_peer_base *);
 
 /*
- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * temporary check to make sure we dont access rid, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
  */
 static inline void inet_peer_refcheck(const struct inet_peer *p)
 {
 	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
 }
-
-
-/* can be called with or without local BH being disabled */
-static inline int inet_getid(struct inet_peer *p, int more)
-{
-	more++;
-	inet_peer_refcheck(p);
-	return atomic_add_return(more, &p->ip_id_count) - more;
-}
-
 #endif /* _NET_INETPEER_H */

diff --git a/include/net/ip.h b/include/net/ip.h
index 3ec2b0f..0e795df 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h

@@ -196,35 +196,31 @@
 #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
-unsigned long snmp_fold_field(void __percpu *mib[], int offt);
+unsigned long snmp_fold_field(void __percpu *mib, int offt);
 #if BITS_PER_LONG==32
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off);
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off);
 #else
-static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off)
+static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off)
 {
 	return snmp_fold_field(mib, offt);
 }
 #endif
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
-
-static inline void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
-{
-	int i;
-
-	BUG_ON(ptr == NULL);
-	for (i = 0; i < SNMP_ARRAY_SZ; i++) {
-		free_percpu(ptr[i]);
-		ptr[i] = NULL;
-	}
-}
 
 void inet_get_local_port_range(struct net *net, int *low, int *high);
 
-extern unsigned long *sysctl_local_reserved_ports;
-static inline int inet_is_reserved_local_port(int port)
+#ifdef CONFIG_SYSCTL
+static inline int inet_is_local_reserved_port(struct net *net, int port)
 {
-	return test_bit(port, sysctl_local_reserved_ports);
+	if (!net->ipv4.sysctl_local_reserved_ports)
+		return 0;
+	return test_bit(port, net->ipv4.sysctl_local_reserved_ports);
 }
+#else
+static inline int inet_is_local_reserved_port(struct net *net, int port)
+{
+	return 0;
+}
+#endif
 
 extern int sysctl_ip_nonlocal_bind;
 
@@ -243,6 +239,9 @@
 
 void ip_static_sysctl_init(void);
 
+#define IP4_REPLY_MARK(net, mark) \
+	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+
 static inline bool ip_is_fragment(const struct iphdr *iph)
 {
 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
@@ -281,7 +280,7 @@
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
 }
 
-static inline bool ip_sk_local_df(const struct sock *sk)
+static inline bool ip_sk_ignore_df(const struct sock *sk)
 {
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
 	       inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
@@ -310,36 +309,48 @@
 	}
 }
 
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
 
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+	atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+	return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
+	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		/* This is only to work around buggy Windows95/2000
 		 * VJ compression implementations.  If the ID field
 		 * does not change, they drop every other packet in
 		 * a TCP stream using header compression.
 		 */
-		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
-					htons(inet_sk(sk)->inet_id++) : 0;
-	} else
-		__ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
-			inet_sk(sk)->inet_id += 1 + more;
-		} else
+			inet_sk(sk)->inet_id += segs;
+		} else {
 			iph->id = 0;
-	} else
-		__ip_select_ident(iph, dst, more);
+		}
+	} else {
+		__ip_select_ident(iph, segs);
+	}
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+	ip_select_ident_segs(skb, sk, 1);
+}
+
+static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+				  skb->len, proto, 0);
 }
 
 /*

diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 9e3c540..55236cb 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h

@@ -41,6 +41,13 @@
 			__wsum csum);
 #endif
 
+static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len, proto, 0));
+}
+
 static __inline__ __sum16 tcp_v6_check(int len,
 				   const struct in6_addr *saddr,
 				   const struct in6_addr *daddr,
@@ -75,5 +82,17 @@
 }
 #endif
 
+static inline __sum16 udp_v6_check(int len,
+				   const struct in6_addr *saddr,
+				   const struct in6_addr *daddr,
+				   __wsum base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len);
+
 int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
 #endif

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 216cecc..1d09b46 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h

@@ -186,7 +186,7 @@
 	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
 }
 
-static inline bool ip6_sk_local_df(const struct sock *sk)
+static inline bool ip6_sk_ignore_df(const struct sock *sk)
 {
 	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
 	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d640925..574337f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h

@@ -113,6 +113,9 @@
 #define	IP6_MF		0x0001
 #define	IP6_OFFSET	0xFFF8
 
+#define IP6_REPLY_MARK(net, mark) \
+	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
+
 #include <net/sock.h>
 
 /* sysctls */
@@ -583,6 +586,11 @@
 	return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010);
 }
 
+static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr)
+{
+	return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
+}
+
 static inline void ipv6_addr_set_v4mapped(const __be32 addr,
 					  struct in6_addr *v4mapped)
 {
@@ -660,10 +668,22 @@
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
-
 int ip6_dst_hoplimit(struct dst_entry *dst);
 
+static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
+				      struct dst_entry *dst)
+{
+	int hlimit;
+
+	if (ipv6_addr_is_multicast(&fl6->daddr))
+		hlimit = np->mcast_hops;
+	else
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = ip6_dst_hoplimit(dst);
+	return hlimit;
+}
+
 /*
  *	Header manipulation
  */

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8248e39..421b6ec 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h

@@ -189,6 +189,43 @@
 };
 
 /**
+ * enum ieee80211_chanctx_switch_mode - channel context switch mode
+ * @CHANCTX_SWMODE_REASSIGN_VIF: Both old and new contexts already
+ *	exist (and will continue to exist), but the virtual interface
+ *	needs to be switched from one to the other.
+ * @CHANCTX_SWMODE_SWAP_CONTEXTS: The old context exists but will stop
+ *      to exist with this call, the new context doesn't exist but
+ *      will be active after this call, the virtual interface switches
+ *      from the old to the new (note that the driver may of course
+ *      implement this as an on-the-fly chandef switch of the existing
+ *      hardware context, but the mac80211 pointer for the old context
+ *      will cease to exist and only the new one will later be used
+ *      for changes/removal.)
+ */
+enum ieee80211_chanctx_switch_mode {
+	CHANCTX_SWMODE_REASSIGN_VIF,
+	CHANCTX_SWMODE_SWAP_CONTEXTS,
+};
+
+/**
+ * struct ieee80211_vif_chanctx_switch - vif chanctx switch information
+ *
+ * This is structure is used to pass information about a vif that
+ * needs to switch from one chanctx to another.  The
+ * &ieee80211_chanctx_switch_mode defines how the switch should be
+ * done.
+ *
+ * @vif: the vif that should be switched from old_ctx to new_ctx
+ * @old_ctx: the old context to which the vif was assigned
+ * @new_ctx: the new context to which the vif must be assigned
+ */
+struct ieee80211_vif_chanctx_switch {
+	struct ieee80211_vif *vif;
+	struct ieee80211_chanctx_conf *old_ctx;
+	struct ieee80211_chanctx_conf *new_ctx;
+};
+
+/**
  * enum ieee80211_bss_change - BSS change notification flags
  *
  * These flags are used with the bss_info_changed() callback
@@ -1113,7 +1150,9 @@
  * @addr: address of this interface
  * @p2p: indicates whether this AP or STA interface is a p2p
  *	interface, i.e. a GO or p2p-sta respectively
- * @csa_active: marks whether a channel switch is going on
+ * @csa_active: marks whether a channel switch is going on. Internally it is
+ *	write-protected by sdata_lock and local->mtx so holding either is fine
+ *	for read access.
  * @driver_flags: flags/capabilities the driver has for this interface,
  *	these need to be set (or cleared) when the interface is added
  *	or, if supported by the driver, the interface type is changed
@@ -1202,14 +1241,18 @@
  *	fall back to software crypto. Note that this flag deals only with
  *	RX, if your crypto engine can't deal with TX you can also set the
  *	%IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW.
+ * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
+ *	driver for a CCMP key to indicate that is requires IV generation
+ *	only for managment frames (MFP).
  */
 enum ieee80211_key_flags {
-	IEEE80211_KEY_FLAG_GENERATE_IV	= 1<<1,
-	IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2,
-	IEEE80211_KEY_FLAG_PAIRWISE	= 1<<3,
-	IEEE80211_KEY_FLAG_SW_MGMT_TX	= 1<<4,
-	IEEE80211_KEY_FLAG_PUT_IV_SPACE = 1<<5,
-	IEEE80211_KEY_FLAG_RX_MGMT	= 1<<6,
+	IEEE80211_KEY_FLAG_GENERATE_IV_MGMT	= BIT(0),
+	IEEE80211_KEY_FLAG_GENERATE_IV		= BIT(1),
+	IEEE80211_KEY_FLAG_GENERATE_MMIC	= BIT(2),
+	IEEE80211_KEY_FLAG_PAIRWISE		= BIT(3),
+	IEEE80211_KEY_FLAG_SW_MGMT_TX		= BIT(4),
+	IEEE80211_KEY_FLAG_PUT_IV_SPACE		= BIT(5),
+	IEEE80211_KEY_FLAG_RX_MGMT		= BIT(6),
 };
 
 /**
@@ -1370,6 +1413,7 @@
  *	the station moves to associated state.
  * @smps_mode: current SMPS mode (off, static or dynamic)
  * @rates: rate control selection table
+ * @tdls: indicates whether the STA is a TDLS peer
  */
 struct ieee80211_sta {
 	u32 supp_rates[IEEE80211_NUM_BANDS];
@@ -1384,6 +1428,7 @@
 	enum ieee80211_sta_rx_bandwidth bandwidth;
 	enum ieee80211_smps_mode smps_mode;
 	struct ieee80211_sta_rates __rcu *rates;
+	bool tdls;
 
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
@@ -1555,6 +1600,12 @@
  *	for a single active channel while using channel contexts. When support
  *	is not enabled the default action is to disconnect when getting the
  *	CSA frame.
+ *
+ * @IEEE80211_HW_CHANGE_RUNNING_CHANCTX: The hardware can change a
+ *	channel context on-the-fly.  This is needed for channel switch
+ *	on single-channel hardware.  It can also be used as an
+ *	optimization in certain channel switch cases with
+ *	multi-channel.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1586,6 +1637,7 @@
 	IEEE80211_HW_TIMING_BEACON_ONLY			= 1<<26,
 	IEEE80211_HW_SUPPORTS_HT_CCK_RATES		= 1<<27,
 	IEEE80211_HW_CHANCTX_STA_CSA			= 1<<28,
+	IEEE80211_HW_CHANGE_RUNNING_CHANCTX		= 1<<29,
 };
 
 /**
@@ -2609,6 +2661,7 @@
  *	of queues to flush, which is useful if different virtual interfaces
  *	use different hardware queues; it may also indicate all queues.
  *	If the parameter @drop is set to %true, pending frames may be dropped.
+ *	Note that vif can be NULL.
  *	The callback can sleep.
  *
  * @channel_switch: Drivers that need (or want) to offload the channel
@@ -2720,6 +2773,11 @@
  *	to vif. Possible use is for hw queue remapping.
  * @unassign_vif_chanctx: Notifies device driver about channel context being
  *	unbound from vif.
+ * @switch_vif_chanctx: switch a number of vifs from one chanctx to
+ *	another, as specified in the list of
+ *	@ieee80211_vif_chanctx_switch passed to the driver, according
+ *	to the mode defined in &ieee80211_chanctx_switch_mode.
+ *
  * @start_ap: Start operation on the AP interface, this is called after all the
  *	information in bss_conf is set and beacon can be retrieved. A channel
  *	context is bound before this is called. Note that if the driver uses
@@ -2753,6 +2811,10 @@
  *	information in bss_conf is set up and the beacon can be retrieved. A
  *	channel context is bound before this is called.
  * @leave_ibss: Leave the IBSS again.
+ *
+ * @get_expected_throughput: extract the expected throughput towards the
+ *	specified station. The returned value is expressed in Kbps. It returns 0
+ *	if the RC algorithm does not have proper data to provide.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -2871,7 +2933,8 @@
 			     struct netlink_callback *cb,
 			     void *data, int len);
 #endif
-	void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop);
+	void (*flush)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		      u32 queues, bool drop);
 	void (*channel_switch)(struct ieee80211_hw *hw,
 			       struct ieee80211_channel_switch *ch_switch);
 	int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
@@ -2931,6 +2994,10 @@
 	void (*unassign_vif_chanctx)(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif,
 				     struct ieee80211_chanctx_conf *ctx);
+	int (*switch_vif_chanctx)(struct ieee80211_hw *hw,
+				  struct ieee80211_vif_chanctx_switch *vifs,
+				  int n_vifs,
+				  enum ieee80211_chanctx_switch_mode mode);
 
 	void (*restart_complete)(struct ieee80211_hw *hw);
 
@@ -2945,6 +3012,7 @@
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
+	u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
 };
 
 /**
@@ -3394,6 +3462,47 @@
  */
 void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
 
+#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+
+/**
+ * struct ieee80211_mutable_offsets - mutable beacon offsets
+ * @tim_offset: position of TIM element
+ * @tim_length: size of TIM element
+ * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
+ *	to CSA counters.  This array can contain zero values which
+ *	should be ignored.
+ */
+struct ieee80211_mutable_offsets {
+	u16 tim_offset;
+	u16 tim_length;
+
+	u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
+};
+
+/**
+ * ieee80211_beacon_get_template - beacon template generation function
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @offs: &struct ieee80211_mutable_offsets pointer to struct that will
+ *	receive the offsets that may be updated by the driver.
+ *
+ * If the driver implements beaconing modes, it must use this function to
+ * obtain the beacon template.
+ *
+ * This function should be used if the beacon frames are generated by the
+ * device, and then the driver must use the returned beacon as the template
+ * The driver or the device are responsible to update the DTIM and, when
+ * applicable, the CSA count.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: The beacon template. %NULL on error.
+ */
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      struct ieee80211_mutable_offsets *offs);
+
 /**
  * ieee80211_beacon_get_tim - beacon generation function
  * @hw: pointer obtained from ieee80211_alloc_hw().
@@ -3405,16 +3514,12 @@
  *	Set to 0 if invalid (in non-AP modes).
  *
  * If the driver implements beaconing modes, it must use this function to
- * obtain the beacon frame/template.
+ * obtain the beacon frame.
  *
  * If the beacon frames are generated by the host system (i.e., not in
  * hardware/firmware), the driver uses this function to get each beacon
- * frame from mac80211 -- it is responsible for calling this function
- * before the beacon is needed (e.g. based on hardware interrupt).
- *
- * If the beacon frames are generated by the device, then the driver
- * must use the returned beacon as the template and change the TIM IE
- * according to the current DTIM parameters/TIM bitmap.
+ * frame from mac80211 -- it is responsible for calling this function exactly
+ * once before the beacon is needed (e.g. based on hardware interrupt).
  *
  * The driver is responsible for freeing the returned skb.
  *
@@ -3440,6 +3545,20 @@
 }
 
 /**
+ * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The csa counter should be updated after each beacon transmission.
+ * This function is called implicitly when
+ * ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
+ * beacon frames are generated by the device, the driver should call this
+ * function after each beacon transmission to sync mac80211's csa counters.
+ *
+ * Return: new csa counter value
+ */
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+
+/**
  * ieee80211_csa_finish - notify mac80211 about channel switch
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
@@ -4467,6 +4586,8 @@
 	void (*add_sta_debugfs)(void *priv, void *priv_sta,
 				struct dentry *dir);
 	void (*remove_sta_debugfs)(void *priv, void *priv_sta);
+
+	u32 (*get_expected_throughput)(void *priv_sta);
 };
 
 static inline int rate_supported(struct ieee80211_sta *sta,
@@ -4576,7 +4697,9 @@
 static inline bool
 conf_is_ht(struct ieee80211_conf *conf)
 {
-	return conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT;
+	return (conf->chandef.width != NL80211_CHAN_WIDTH_5) &&
+		(conf->chandef.width != NL80211_CHAN_WIDTH_10) &&
+		(conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT);
 }
 
 static inline enum nl80211_iftype

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5f9eb26..361d260 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h

@@ -373,6 +373,14 @@
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+static inline struct netns_ieee802154_lowpan *
+net_ieee802154_lowpan(struct net *net)
+{
+	return &net->ieee802154_lowpan;
+}
+#endif
+
 /* For callers who don't really care about whether it's IPv4 or IPv6 */
 static inline void rt_genid_bump_all(struct net *net)
 {

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 07eaaf6..a71dd33 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h

@@ -48,6 +48,8 @@
 extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
 					      unsigned int hooknum);
 
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct);
+
 /* Is this tuple already taken? (not by us)*/
 int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conn *ignored_conntrack);

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bc14d..7ee6ce6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h

@@ -72,21 +72,23 @@
  *	struct nft_ctx - nf_tables rule/set context
  *
  *	@net: net namespace
- * 	@skb: netlink skb
- * 	@nlh: netlink message header
  * 	@afi: address family info
  * 	@table: the table the chain is contained in
  * 	@chain: the chain the rule is contained in
  *	@nla: netlink attributes
+ *	@portid: netlink portID of the original message
+ *	@seq: netlink sequence number
+ *	@report: notify via unicast netlink message
  */
 struct nft_ctx {
 	struct net			*net;
-	const struct sk_buff		*skb;
-	const struct nlmsghdr		*nlh;
-	const struct nft_af_info	*afi;
-	const struct nft_table		*table;
-	const struct nft_chain		*chain;
+	struct nft_af_info		*afi;
+	struct nft_table		*table;
+	struct nft_chain		*chain;
 	const struct nlattr * const 	*nla;
+	u32				portid;
+	u32				seq;
+	bool				report;
 };
 
 struct nft_data_desc {
@@ -146,6 +148,44 @@
 };
 
 /**
+ *	struct nft_set_desc - description of set elements
+ *
+ *	@klen: key length
+ *	@dlen: data length
+ *	@size: number of set elements
+ */
+struct nft_set_desc {
+	unsigned int		klen;
+	unsigned int		dlen;
+	unsigned int		size;
+};
+
+/**
+ *	enum nft_set_class - performance class
+ *
+ *	@NFT_LOOKUP_O_1: constant, O(1)
+ *	@NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
+ *	@NFT_LOOKUP_O_N: linear, O(N)
+ */
+enum nft_set_class {
+	NFT_SET_CLASS_O_1,
+	NFT_SET_CLASS_O_LOG_N,
+	NFT_SET_CLASS_O_N,
+};
+
+/**
+ *	struct nft_set_estimate - estimation of memory and performance
+ *				  characteristics
+ *
+ *	@size: required memory
+ *	@class: lookup performance class
+ */
+struct nft_set_estimate {
+	unsigned int		size;
+	enum nft_set_class	class;
+};
+
+/**
  *	struct nft_set_ops - nf_tables set operations
  *
  *	@lookup: look up an element within the set
@@ -174,7 +214,11 @@
 						struct nft_set_iter *iter);
 
 	unsigned int			(*privsize)(const struct nlattr * const nla[]);
+	bool				(*estimate)(const struct nft_set_desc *desc,
+						    u32 features,
+						    struct nft_set_estimate *est);
 	int				(*init)(const struct nft_set *set,
+						const struct nft_set_desc *desc,
 						const struct nlattr * const nla[]);
 	void				(*destroy)(const struct nft_set *set);
 
@@ -194,6 +238,8 @@
  * 	@name: name of the set
  * 	@ktype: key type (numeric type defined by userspace, not used in the kernel)
  * 	@dtype: data type (verdict or numeric type defined by userspace)
+ * 	@size: maximum set size
+ * 	@nelems: number of elements
  * 	@ops: set ops
  * 	@flags: set flags
  * 	@klen: key length
@@ -206,6 +252,8 @@
 	char				name[IFNAMSIZ];
 	u32				ktype;
 	u32				dtype;
+	u32				size;
+	u32				nelems;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	u16				flags;
@@ -222,6 +270,8 @@
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 				     const struct nlattr *nla);
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+					  const struct nlattr *nla);
 
 /**
  *	struct nft_set_binding - nf_tables set binding
@@ -341,18 +391,75 @@
 };
 
 /**
- *	struct nft_rule_trans - nf_tables rule update in transaction
+ *	struct nft_trans - nf_tables object update in transaction
  *
+ *	@rcu_head: rcu head to defer release of transaction data
  *	@list: used internally
- *	@ctx: rule context
- *	@rule: rule that needs to be updated
+ *	@msg_type: message type
+ *	@ctx: transaction context
+ *	@data: internal information related to the transaction
  */
-struct nft_rule_trans {
+struct nft_trans {
+	struct rcu_head			rcu_head;
 	struct list_head		list;
+	int				msg_type;
 	struct nft_ctx			ctx;
+	char				data[0];
+};
+
+struct nft_trans_rule {
 	struct nft_rule			*rule;
 };
 
+#define nft_trans_rule(trans)	\
+	(((struct nft_trans_rule *)trans->data)->rule)
+
+struct nft_trans_set {
+	struct nft_set	*set;
+	u32		set_id;
+};
+
+#define nft_trans_set(trans)	\
+	(((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans)	\
+	(((struct nft_trans_set *)trans->data)->set_id)
+
+struct nft_trans_chain {
+	bool		update;
+	char		name[NFT_CHAIN_MAXNAMELEN];
+	struct nft_stats __percpu *stats;
+	u8		policy;
+};
+
+#define nft_trans_chain_update(trans)	\
+	(((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans)	\
+	(((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans)	\
+	(((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans)	\
+	(((struct nft_trans_chain *)trans->data)->policy)
+
+struct nft_trans_table {
+	bool		update;
+	bool		enable;
+};
+
+#define nft_trans_table_update(trans)	\
+	(((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans)	\
+	(((struct nft_trans_table *)trans->data)->enable)
+
+struct nft_trans_elem {
+	struct nft_set		*set;
+	struct nft_set_elem	elem;
+};
+
+#define nft_trans_elem_set(trans)	\
+	(((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans)	\
+	(((struct nft_trans_elem *)trans->data)->elem)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
@@ -385,6 +492,7 @@
 
 enum nft_chain_flags {
 	NFT_BASE_CHAIN			= 0x1,
+	NFT_CHAIN_INACTIVE		= 0x2,
 };
 
 /**

diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
new file mode 100644
index 0000000..0ee47c3
--- /dev/null
+++ b/include/net/netfilter/nft_meta.h

@@ -0,0 +1,36 @@
+#ifndef _NFT_META_H_
+#define _NFT_META_H_
+
+struct nft_meta {
+	enum nft_meta_keys	key:8;
+	union {
+		enum nft_registers	dreg:8;
+		enum nft_registers	sreg:8;
+	};
+};
+
+extern const struct nla_policy nft_meta_policy[];
+
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[]);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[]);
+
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr);
+
+int nft_meta_set_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr);
+
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt);
+
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt);
+
+#endif

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b2704fd0..aec5e12 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h

@@ -77,10 +77,17 @@
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
 
+	int sysctl_fwmark_reflect;
+	int sysctl_tcp_fwmark_accept;
+
 	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
 
+#ifdef CONFIG_SYSCTL
+	unsigned long *sysctl_local_reserved_ports;
+#endif
+
 #ifdef CONFIG_IP_MROUTE
 #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt;

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21edaf1..19d3446 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h

@@ -30,6 +30,7 @@
 	int flowlabel_consistency;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
+	int fwmark_reflect;
 };
 
 struct netns_ipv6 {

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 7655cfe..bdf55c3 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h

@@ -36,6 +36,7 @@
 	NFC_DIGITAL_RF_TECH_212F,
 	NFC_DIGITAL_RF_TECH_424F,
 	NFC_DIGITAL_RF_TECH_ISO15693,
+	NFC_DIGITAL_RF_TECH_106B,
 
 	NFC_DIGITAL_RF_TECH_LAST,
 };
@@ -62,6 +63,9 @@
 	NFC_DIGITAL_FRAMING_ISO15693_INVENTORY,
 	NFC_DIGITAL_FRAMING_ISO15693_T5T,
 
+	NFC_DIGITAL_FRAMING_NFCB,
+	NFC_DIGITAL_FRAMING_NFCB_T4T,
+
 	NFC_DIGITAL_FRAMING_LAST,
 };
 

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 03c4650..61286db 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h

@@ -27,6 +27,7 @@
 struct nfc_hci_ops {
 	int (*open) (struct nfc_hci_dev *hdev);
 	void (*close) (struct nfc_hci_dev *hdev);
+	int (*load_session) (struct nfc_hci_dev *hdev);
 	int (*hci_ready) (struct nfc_hci_dev *hdev);
 	/*
 	 * xmit must always send the complete buffer before

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 2e8b40c..6c583e2 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h

@@ -264,4 +264,7 @@
 int nfc_remove_se(struct nfc_dev *dev, u32 se_idx);
 struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx);
 
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+			  u8 payload_type, u8 direction);
+
 #endif /* __NET_NFC_H */

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2441fb..6da46dc 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h

@@ -136,7 +136,7 @@
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
-		      struct tcf_exts *exts);
+		      struct tcf_exts *exts, bool ovr);
 void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src);

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 891d80d..ec030cd 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h

@@ -96,7 +96,7 @@
 					struct nlattr *tab);
 void qdisc_put_rtab(struct qdisc_rate_table *tab);
 void qdisc_put_stab(struct qdisc_size_table *tab);
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
 		    spinlock_t *root_lock);

diff --git a/include/net/protocol.h b/include/net/protocol.h
index a7e986b..d6fcc1f 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h

@@ -86,7 +86,6 @@
 	struct proto	 *prot;
 	const struct proto_ops *ops;
   
-	char             no_check;   /* checksum on rcv/xmit/none? */
 	unsigned char	 flags;      /* See INET_PROTOSW_* below.  */
 };
 #define INET_PROTOSW_REUSE 0x01	     /* Are ports automatically reusable? */

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 75fc1f5..2599924 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h

@@ -131,6 +131,11 @@
  * 	all country IE information processed by the regulatory core. This will
  * 	override %REGULATORY_COUNTRY_IE_FOLLOW_POWER as all country IEs will
  * 	be ignored.
+ * @REGULATORY_ENABLE_RELAX_NO_IR: for devices that wish to allow the
+ *      NO_IR relaxation, which enables transmissions on channels on which
+ *      otherwise initiating radiation is not allowed. This will enable the
+ *      relaxations enabled under the CFG80211_REG_RELAX_NO_IR configuration
+ *      option
  */
 enum ieee80211_regulatory_flags {
 	REGULATORY_CUSTOM_REG			= BIT(0),
@@ -138,6 +143,7 @@
 	REGULATORY_DISABLE_BEACON_HINTS		= BIT(2),
 	REGULATORY_COUNTRY_IE_FOLLOW_POWER	= BIT(3),
 	REGULATORY_COUNTRY_IE_IGNORE		= BIT(4),
+	REGULATORY_ENABLE_RELAX_NO_IR           = BIT(5),
 };
 
 struct ieee80211_freq_range {

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d062f81..624f985 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h

@@ -199,7 +199,7 @@
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					unsigned long *);
+					unsigned long *, bool);
 	int			(*delete)(struct tcf_proto*, unsigned long);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
 

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0dfcc92..f38588b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h

@@ -838,10 +838,10 @@
 	unsigned long sackdelay;
 	__u32 sackfreq;
 
-	/* When was the last time (in jiffies) that we heard from this
-	 * transport?  We use this to pick new active and retran paths.
+	/* When was the last time that we heard from this transport? We use
+	 * this to pick new active and retran paths.
 	 */
-	unsigned long last_time_heard;
+	ktime_t last_time_heard;
 
 	/* Last time(in jiffies) when cwnd is reduced due to the congestion
 	 * indication based on ECNE chunk.

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index f257486..3f36d45 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h

@@ -3,8 +3,6 @@
 
 #include <linux/types.h>
 
-__u32 secure_ip_id(__be32 daddr);
-__u32 secure_ipv6_id(const __be32 daddr[4]);
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 7159626..f1f27fd 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h

@@ -116,51 +116,49 @@
 	unsigned long	mibs[LINUX_MIB_XFRMMAX];
 };
 
-#define SNMP_ARRAY_SZ 1
-
 #define DEFINE_SNMP_STAT(type, name)	\
-	__typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
+	__typeof__(type) __percpu *name
 #define DEFINE_SNMP_STAT_ATOMIC(type, name)	\
 	__typeof__(type) *name
 #define DECLARE_SNMP_STAT(type, name)	\
-	extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
+	extern __typeof__(type) __percpu *name
 
 #define SNMP_INC_STATS_BH(mib, field)	\
-			__this_cpu_inc(mib[0]->mibs[field])
+			__this_cpu_inc(mib->mibs[field])
 
 #define SNMP_INC_STATS_USER(mib, field)	\
-			this_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib->mibs[field])
 
 #define SNMP_INC_STATS_ATOMIC_LONG(mib, field)	\
 			atomic_long_inc(&mib->mibs[field])
 
 #define SNMP_INC_STATS(mib, field)	\
-			this_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib->mibs[field])
 
 #define SNMP_DEC_STATS(mib, field)	\
-			this_cpu_dec(mib[0]->mibs[field])
+			this_cpu_dec(mib->mibs[field])
 
 #define SNMP_ADD_STATS_BH(mib, field, addend)	\
-			__this_cpu_add(mib[0]->mibs[field], addend)
+			__this_cpu_add(mib->mibs[field], addend)
 
 #define SNMP_ADD_STATS_USER(mib, field, addend)	\
-			this_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib->mibs[field], addend)
 
 #define SNMP_ADD_STATS(mib, field, addend)	\
-			this_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib->mibs[field], addend)
 /*
- * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
+ * Use "__typeof__(*mib) *ptr" instead of "__typeof__(mib) ptr"
  * to make @ptr a non-percpu pointer.
  */
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
 		this_cpu_inc(ptr[basefield##PKTS]);		\
 		this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
 		__this_cpu_inc(ptr[basefield##PKTS]);		\
 		__this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
@@ -170,7 +168,7 @@
 
 #define SNMP_ADD_STATS64_BH(mib, field, addend) 			\
 	do {								\
-		__typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]);	\
+		__typeof__(*mib) *ptr = __this_cpu_ptr(mib);		\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[field] += addend;				\
 		u64_stats_update_end(&ptr->syncp);			\
@@ -191,8 +189,8 @@
 #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
 #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
 	do {								\
-		__typeof__(*mib[0]) *ptr;				\
-		ptr = __this_cpu_ptr((mib)[0]);				\
+		__typeof__(*mib) *ptr;					\
+		ptr = __this_cpu_ptr(mib);				\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[basefield##PKTS]++;				\
 		ptr->mibs[basefield##OCTETS] += addend;			\

diff --git a/include/net/sock.h b/include/net/sock.h
index 21569cf..07b7fcd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h

@@ -243,7 +243,8 @@
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
-  *	@sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets
+  *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
+  *	@sk_no_check_rx: allow zero checksum in RX packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
@@ -371,7 +372,8 @@
 	struct sk_buff_head	sk_write_queue;
 	kmemcheck_bitfield_begin(flags);
 	unsigned int		sk_shutdown  : 2,
-				sk_no_check  : 2,
+				sk_no_check_tx : 1,
+				sk_no_check_rx : 1,
 				sk_userlocks : 4,
 				sk_protocol  : 8,
 				sk_type      : 16;

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 87d8774..7286db8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h

@@ -220,8 +220,6 @@
 #define	TFO_SERVER_ENABLE	2
 #define	TFO_CLIENT_NO_COOKIE	4	/* Data in SYN w/o cookie option */
 
-/* Process SYN data but skip cookie validation */
-#define	TFO_SERVER_COOKIE_NOT_CHKED	0x100
 /* Accept SYN data w/o any cookie option */
 #define	TFO_SERVER_COOKIE_NOT_REQD	0x200
 
@@ -230,10 +228,6 @@
  */
 #define	TFO_SERVER_WO_SOCKOPT1	0x400
 #define	TFO_SERVER_WO_SOCKOPT2	0x800
-/* Always create TFO child sockets on a TFO listener even when
- * cookie/data not present. (For testing purpose!)
- */
-#define	TFO_SERVER_ALWAYS	0x1000
 
 extern struct inet_timewait_death_row tcp_death_row;
 
@@ -541,7 +535,7 @@
 void tcp_xmit_retransmit_queue(struct sock *);
 void tcp_simple_retransmit(struct sock *);
 int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
+int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
 
 void tcp_send_probe0(struct sock *);
 void tcp_send_partial(struct sock *);
@@ -558,7 +552,6 @@
 bool tcp_schedule_loss_probe(struct sock *sk);
 
 /* tcp_input.c */
-void tcp_cwnd_application_limited(struct sock *sk);
 void tcp_resume_early_retransmit(struct sock *sk);
 void tcp_rearm_rto(struct sock *sk);
 void tcp_reset(struct sock *sk);
@@ -797,7 +790,7 @@
 	/* return slow start threshold (required) */
 	u32 (*ssthresh)(struct sock *sk);
 	/* do new cwnd calculation (required) */
-	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
 	/* call before changing ca_state (optional) */
 	void (*set_state)(struct sock *sk, u8 new_state);
 	/* call when cwnd event occurs (optional) */
@@ -829,7 +822,7 @@
 
 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 u32 tcp_reno_ssthresh(struct sock *sk);
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
 static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
@@ -975,7 +968,30 @@
 {
 	return tp->snd_una + tp->snd_wnd;
 }
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
+
+/* We follow the spirit of RFC2861 to validate cwnd but implement a more
+ * flexible approach. The RFC suggests cwnd should not be raised unless
+ * it was fully used previously. And that's exactly what we do in
+ * congestion avoidance mode. But in slow start we allow cwnd to grow
+ * as long as the application has used half the cwnd.
+ * Example :
+ *    cwnd is 10 (IW10), but application sends 9 frames.
+ *    We allow cwnd to reach 18 when all frames are ACKed.
+ * This check is safe because it's as aggressive as slow start which already
+ * risks 100% overshoot. The advantage is that we discourage application to
+ * either send more filler packets or data to artificially blow up the cwnd
+ * usage, and allow application-limited process to probe bw more aggressively.
+ */
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+	return tp->is_cwnd_limited;
+}
 
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
@@ -1103,6 +1119,9 @@
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 }
 
+extern void tcp_openreq_init_rwin(struct request_sock *req,
+				  struct sock *sk, struct dst_entry *dst);
+
 void tcp_enter_memory_pressure(struct sock *sk);
 
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
@@ -1312,8 +1331,10 @@
 
 extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
-			     struct tcp_fastopen_cookie *foc);
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+		      struct request_sock *req,
+		      struct tcp_fastopen_cookie *foc,
+		      struct dst_entry *dst);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 

diff --git a/include/net/tso.h b/include/net/tso.h
new file mode 100644
index 0000000..47e5444
--- /dev/null
+++ b/include/net/tso.h

@@ -0,0 +1,20 @@
+#ifndef _TSO_H
+#define _TSO_H
+
+#include <net/ip.h>
+
+struct tso_t {
+	int next_frag_idx;
+	void *data;
+	size_t size;
+	u16 ip_id;
+	u32 tcp_seq;
+};
+
+int tso_count_descs(struct sk_buff *skb);
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+		   int size, bool is_last);
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size);
+void tso_start(struct sk_buff *skb, struct tso_t *tso);
+
+#endif	/* _TSO_H */

diff --git a/include/net/udp.h b/include/net/udp.h
index a24f0f3..2ecfc6e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h

@@ -95,15 +95,6 @@
 	return &table->hash2[hash & table->mask];
 }
 
-/* Note: this must match 'valbool' in sock_setsockopt */
-#define UDP_CSUM_NOXMIT		1
-
-/* Used by SunRPC/xprt layer. */
-#define UDP_CSUM_NORCV		2
-
-/* Default, as per the RFC, is to always do csums. */
-#define UDP_CSUM_DEFAULT	0
-
 extern struct proto udp_prot;
 
 extern atomic_long_t udp_memory_allocated;
@@ -156,6 +147,15 @@
 	return csum;
 }
 
+static inline __sum16 udp_v4_check(int len, __be32 saddr,
+				   __be32 daddr, __wsum base)
+{
+	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+		  __be32 saddr, __be32 daddr, int len);
+
 /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
 static inline void udp_lib_hash(struct sock *sk)
 {

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 5deef1a..12196ce6 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h

@@ -24,16 +24,26 @@
 	struct udp_offload udp_offloads;
 };
 
+#define VXLAN_F_LEARN			0x01
+#define VXLAN_F_PROXY			0x02
+#define VXLAN_F_RSC			0x04
+#define VXLAN_F_L2MISS			0x08
+#define VXLAN_F_L3MISS			0x10
+#define VXLAN_F_IPV6			0x20
+#define VXLAN_F_UDP_CSUM		0x40
+#define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
+#define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
+
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
-				  bool no_share, bool ipv6);
+				  bool no_share, u32 flags);
 
 void vxlan_sock_release(struct vxlan_sock *vs);
 
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni);
+		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
 
 __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
 

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 116e9c7..721e9c3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h

@@ -691,13 +691,6 @@
 
 #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0]))
 
-/* Audit Information */
-struct xfrm_audit {
-	u32	secid;
-	kuid_t	loginuid;
-	unsigned int sessionid;
-};
-
 #ifdef CONFIG_AUDITSYSCALL
 static inline struct audit_buffer *xfrm_audit_start(const char *op)
 {
@@ -713,30 +706,24 @@
 	return audit_buf;
 }
 
-static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, u32 secid,
+static inline void xfrm_audit_helper_usrinfo(bool task_valid,
 					     struct audit_buffer *audit_buf)
 {
-	char *secctx;
-	u32 secctx_len;
+	const unsigned int auid = from_kuid(&init_user_ns, task_valid ?
+					    audit_get_loginuid(current) :
+					    INVALID_UID);
+	const unsigned int ses = task_valid ? audit_get_sessionid(current) :
+		(unsigned int) -1;
 
-	audit_log_format(audit_buf, " auid=%u ses=%u",
-			 from_kuid(&init_user_ns, auid), ses);
-	if (secid != 0 &&
-	    security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) {
-		audit_log_format(audit_buf, " subj=%s", secctx);
-		security_release_secctx(secctx, secctx_len);
-	} else
-		audit_log_task_context(audit_buf);
+	audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses);
+	audit_log_task_context(audit_buf);
 }
 
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, kuid_t auid,
-			   unsigned int ses, u32 secid);
-void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, kuid_t auid,
-			      unsigned int ses, u32 secid);
-void xfrm_audit_state_add(struct xfrm_state *x, int result, kuid_t auid,
-			  unsigned int ses, u32 secid);
-void xfrm_audit_state_delete(struct xfrm_state *x, int result, kuid_t auid,
-			     unsigned int ses, u32 secid);
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid);
+void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
+			      bool task_valid);
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid);
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid);
 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
 				      struct sk_buff *skb);
 void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb,
@@ -749,22 +736,22 @@
 #else
 
 static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses, u32 secid)
+					 bool task_valid)
 {
 }
 
 static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses, u32 secid)
+					    bool task_valid)
 {
 }
 
 static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
-				 kuid_t auid, unsigned int ses, u32 secid)
+					bool task_valid)
 {
 }
 
 static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-				    kuid_t auid, unsigned int ses, u32 secid)
+					   bool task_valid)
 {
 }
 
@@ -1508,7 +1495,7 @@
 
 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
@@ -1603,7 +1590,7 @@
 					  int *err);
 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
 				     u32 id, int delete, int *err);
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info);
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index acd8251..7ccef34 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h

@@ -80,8 +80,8 @@
 	RDMA_TRANSPORT_USNIC_UDP
 };
 
-enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+__attribute_const__ enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type);
 
 enum rdma_link_layer {
 	IB_LINK_LAYER_UNSPECIFIED,
@@ -466,14 +466,14 @@
  * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
  * @rate: rate to convert.
  */
-int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
 
 /**
  * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
  * For example, IB_RATE_2_5_GBPS will be converted to 2500.
  * @rate: rate to convert.
  */
-int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
 
 enum ib_mr_create_flags {
 	IB_MR_SIGNATURE_EN = 1,
@@ -604,7 +604,7 @@
  * enum.
  * @mult: multiple to convert.
  */
-enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
 
 struct ib_ah_attr {
 	struct ib_global_route	grh;
@@ -783,6 +783,7 @@
 	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
 	IB_QP_CREATE_NETIF_QP			= 1 << 5,
 	IB_QP_CREATE_SIGNATURE_EN		= 1 << 6,
+	IB_QP_CREATE_USE_GFP_NOIO		= 1 << 7,
 	/* reserve bits 26-31 for low level drivers' internal use */
 	IB_QP_CREATE_RESERVED_START		= 1 << 26,
 	IB_QP_CREATE_RESERVED_END		= 1 << 31,

diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644
index 0000000..928b277
--- /dev/null
+++ b/include/rdma/iw_portmap.h

@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#define IWPM_ULIBNAME_SIZE	32
+#define IWPM_DEVNAME_SIZE	32
+#define IWPM_IFNAME_SIZE	16
+#define IWPM_IPADDR_SIZE	16
+
+enum {
+	IWPM_INVALID_NLMSG_ERR = 10,
+	IWPM_CREATE_MAPPING_ERR,
+	IWPM_DUPLICATE_MAPPING_ERR,
+	IWPM_UNKNOWN_MAPPING_ERR,
+	IWPM_CLIENT_DEV_INFO_ERR,
+	IWPM_USER_LIB_INFO_ERR,
+	IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+	char dev_name[IWPM_DEVNAME_SIZE];
+	char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+	struct sockaddr_storage loc_addr;
+	struct sockaddr_storage mapped_loc_addr;
+	struct sockaddr_storage rem_addr;
+	struct sockaddr_storage mapped_rem_addr;
+};
+
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes up.
+ */
+int iwpm_init(u8);
+
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes down.
+ */
+int iwpm_exit(u8);
+
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
+int iwpm_valid_pid(void);
+
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ *                     to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ *                    the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
+ *				 to the userspace port mapper
+ * @pm_msg: Contains the local and remote ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores the
+ * port mapper response (mapped local and remote address info)
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ *                       to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ *                        iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
+ */
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ *                       iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ *                                 iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ *                        port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
+ */
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ *                            the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ *                       info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr, u8 nl_client);
+
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ *                       info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr);
+
+#endif /* _IW_PORTMAP_H */

diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index e38de79..0790882 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h

@@ -43,7 +43,7 @@
  * Returns the allocated buffer on success and NULL on failure.
  */
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-		   int len, int client, int op);
+		   int len, int client, int op, int flags);
 /**
  * Put a new attribute in a supplied skb.
  * @skb: The netlink skb.
@@ -56,4 +56,25 @@
 int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 		  int len, void *data, int type);
 
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			__u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			unsigned int group, gfp_t flags);
+
 #endif /* _RDMA_NETLINK_H */

diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 25ac628..a2594af 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h

@@ -263,16 +263,16 @@
  * Ex name = FORMAT_OSD we have OSD_ACT_FORMAT_OSD && OSDv1_ACT_FORMAT_OSD
  */
 #define OSD_ACT___(Name, Num) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), \
-	OSDv1_ACT_##Name = __constant_cpu_to_be16(0x8800 + Num),
+	OSD_ACT_##Name = cpu_to_be16(0x8880 + Num), \
+	OSDv1_ACT_##Name = cpu_to_be16(0x8800 + Num),
 
 /* V2 only actions */
 #define OSD_ACT_V2(Name, Num) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num),
+	OSD_ACT_##Name = cpu_to_be16(0x8880 + Num),
 
 #define OSD_ACT_V1_V2(Name, Num1, Num2) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(Num2), \
-	OSDv1_ACT_##Name = __constant_cpu_to_be16(Num1),
+	OSD_ACT_##Name = cpu_to_be16(Num2), \
+	OSDv1_ACT_##Name = cpu_to_be16(Num1),
 
 enum osd_service_actions {
 	OSD_ACT_V2(OBJECT_STRUCTURE_CHECK,	0x00)

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index dd7c998..42ed789 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h

@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
+#include <scsi/scsi_device.h>
 
 struct Scsi_Host;
 struct scsi_device;
@@ -133,6 +134,15 @@
 	unsigned char tag;	/* SCSI-II queued command tag */
 };
 
+/*
+ * Return the driver private allocation behind the command.
+ * Only works if cmd_size is set in the host template.
+ */
+static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
+{
+	return cmd + 1;
+}
+
 /* make sure not to use it with REQ_TYPE_BLOCK_PC commands */
 static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
 {
@@ -306,4 +316,20 @@
 	cmd->result = (cmd->result & 0x00ffffff) | (status << 24);
 }
 
+static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
+{
+	unsigned int xfer_len = blk_rq_bytes(scmd->request);
+	unsigned int prot_op = scsi_get_prot_op(scmd);
+	unsigned int sector_size = scmd->device->sector_size;
+
+	switch (prot_op) {
+	case SCSI_PROT_NORMAL:
+	case SCSI_PROT_WRITE_STRIP:
+	case SCSI_PROT_READ_INSERT:
+		return xfer_len;
+	}
+
+	return xfer_len + (xfer_len >> ilog2(sector_size)) * 8;
+}
+
 #endif /* _SCSI_SCSI_CMND_H */

diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 20fdfc2..36c4114 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h

@@ -4,17 +4,17 @@
 #include <linux/device.h>
 
 struct module;
+struct request;
 struct scsi_cmnd;
 struct scsi_device;
-struct request;
-struct request_queue;
-
 
 struct scsi_driver {
 	struct module		*owner;
 	struct device_driver	gendrv;
 
 	void (*rescan)(struct device *);
+	int (*init_command)(struct scsi_cmnd *);
+	void (*uninit_command)(struct scsi_cmnd *);
 	int (*done)(struct scsi_cmnd *);
 	int (*eh_action)(struct scsi_cmnd *, int);
 };
@@ -31,8 +31,5 @@
 
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req);
 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req);
-int scsi_prep_state_check(struct scsi_device *sdev, struct request *req);
-int scsi_prep_return(struct request_queue *q, struct request *req, int ret);
-int scsi_prep_fn(struct request_queue *, struct request *);
 
 #endif /* _SCSI_SCSI_DRIVER_H */

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index b4d6697..d854fb3 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h

@@ -932,7 +932,7 @@
 				   struct timespec *tv)
 {
 	if (runtime->tstamp_type == SNDRV_PCM_TSTAMP_TYPE_MONOTONIC)
-		do_posix_clock_monotonic_gettime(tv);
+		ktime_get_ts(tv);
 	else
 		getnstimeofday(tv);
 }

diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 33b487b..daef9da 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h

@@ -70,7 +70,8 @@
 extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_tm_rsp *);
 extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *,
-				struct iscsi_text_rsp *);
+				struct iscsi_text_rsp *,
+				enum iscsit_transport_type);
 extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_reject *);
 extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *,

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 3a1c1ee..9adc1bc 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h

@@ -59,6 +59,7 @@
 void	transport_subsystem_release(struct se_subsystem_api *);
 
 void	target_complete_cmd(struct se_cmd *, u8);
+void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 sense_reason_t	spc_parse_cdb(struct se_cmd *cmd, unsigned int *size);
 sense_reason_t	spc_emulate_report_luns(struct se_cmd *cmd);

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 67f38fa..b983990 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h

@@ -659,6 +659,66 @@
 	TP_CONDITION(bio)
 );
 
+TRACE_EVENT(f2fs_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+				unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t,	pos)
+		__field(unsigned int, len)
+		__field(unsigned int, flags)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u",
+		show_dev_ino(__entry),
+		(unsigned long long)__entry->pos,
+		__entry->len,
+		__entry->flags)
+);
+
+TRACE_EVENT(f2fs_write_end,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+				unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t,	pos)
+		__field(unsigned int, len)
+		__field(unsigned int, copied)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u",
+		show_dev_ino(__entry),
+		(unsigned long long)__entry->pos,
+		__entry->len,
+		__entry->copied)
+);
+
 DECLARE_EVENT_CLASS(f2fs__page,
 
 	TP_PROTO(struct page *page, int type),
@@ -672,6 +732,7 @@
 		__field(int, dir)
 		__field(pgoff_t, index)
 		__field(int, dirty)
+		__field(int, uptodate)
 	),
 
 	TP_fast_assign(
@@ -681,14 +742,31 @@
 		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
 		__entry->index	= page->index;
 		__entry->dirty	= PageDirty(page);
+		__entry->uptodate = PageUptodate(page);
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d",
+	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, "
+		"dirty = %d, uptodate = %d",
 		show_dev_ino(__entry),
 		show_block_type(__entry->type),
 		show_file_type(__entry->dir),
 		(unsigned long)__entry->index,
-		__entry->dirty)
+		__entry->dirty,
+		__entry->uptodate)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_writepage,
+
+	TP_PROTO(struct page *page, int type),
+
+	TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_readpage,
+
+	TP_PROTO(struct page *page, int type),
+
+	TP_ARGS(page, type)
 );
 
 DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
@@ -705,6 +783,70 @@
 	TP_ARGS(page, type)
 );
 
+TRACE_EVENT(f2fs_writepages,
+
+	TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
+
+	TP_ARGS(inode, wbc, type),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(int,	type)
+		__field(int,	dir)
+		__field(long,	nr_to_write)
+		__field(long,	pages_skipped)
+		__field(loff_t,	range_start)
+		__field(loff_t,	range_end)
+		__field(pgoff_t, writeback_index)
+		__field(int,	sync_mode)
+		__field(char,	for_kupdate)
+		__field(char,	for_background)
+		__field(char,	tagged_writepages)
+		__field(char,	for_reclaim)
+		__field(char,	range_cyclic)
+		__field(char,	for_sync)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->type		= type;
+		__entry->dir		= S_ISDIR(inode->i_mode);
+		__entry->nr_to_write	= wbc->nr_to_write;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->range_start	= wbc->range_start;
+		__entry->range_end	= wbc->range_end;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
+		__entry->sync_mode	= wbc->sync_mode;
+		__entry->for_kupdate	= wbc->for_kupdate;
+		__entry->for_background	= wbc->for_background;
+		__entry->tagged_writepages	= wbc->tagged_writepages;
+		__entry->for_reclaim	= wbc->for_reclaim;
+		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->for_sync	= wbc->for_sync;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, "
+		"skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, "
+		"kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u",
+		show_dev_ino(__entry),
+		show_block_type(__entry->type),
+		show_file_type(__entry->dir),
+		__entry->nr_to_write,
+		__entry->pages_skipped,
+		__entry->range_start,
+		__entry->range_end,
+		(unsigned long)__entry->writeback_index,
+		__entry->sync_mode,
+		__entry->for_kupdate,
+		__entry->for_background,
+		__entry->tagged_writepages,
+		__entry->for_reclaim,
+		__entry->range_cyclic,
+		__entry->for_sync)
+);
+
 TRACE_EVENT(f2fs_submit_page_mbio,
 
 	TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),

diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 9a7e08d..d19840b 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h

@@ -7,6 +7,9 @@
 #include <linux/ktime.h>
 #include <linux/pm_qos.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace_event.h>
+
+#define TPS(x)  tracepoint_string(x)
 
 DECLARE_EVENT_CLASS(cpu,
 
@@ -90,6 +93,17 @@
 #define PWR_EVENT_EXIT -1
 #endif
 
+#define pm_verb_symbolic(event) \
+	__print_symbolic(event, \
+		{ PM_EVENT_SUSPEND, "suspend" }, \
+		{ PM_EVENT_RESUME, "resume" }, \
+		{ PM_EVENT_FREEZE, "freeze" }, \
+		{ PM_EVENT_QUIESCE, "quiesce" }, \
+		{ PM_EVENT_HIBERNATE, "hibernate" }, \
+		{ PM_EVENT_THAW, "thaw" }, \
+		{ PM_EVENT_RESTORE, "restore" }, \
+		{ PM_EVENT_RECOVER, "recover" })
+
 DEFINE_EVENT(cpu, cpu_frequency,
 
 	TP_PROTO(unsigned int frequency, unsigned int cpu_id),
@@ -97,58 +111,76 @@
 	TP_ARGS(frequency, cpu_id)
 );
 
-TRACE_EVENT(machine_suspend,
+TRACE_EVENT(device_pm_callback_start,
 
-	TP_PROTO(unsigned int state),
+	TP_PROTO(struct device *dev, const char *pm_ops, int event),
 
-	TP_ARGS(state),
-
-	TP_STRUCT__entry(
-		__field(	u32,		state		)
-	),
-
-	TP_fast_assign(
-		__entry->state = state;
-	),
-
-	TP_printk("state=%lu", (unsigned long)__entry->state)
-);
-
-TRACE_EVENT(device_pm_report_time,
-
-	TP_PROTO(struct device *dev, const char *pm_ops, s64 ops_time,
-		 char *pm_event_str, int error),
-
-	TP_ARGS(dev, pm_ops, ops_time, pm_event_str, error),
+	TP_ARGS(dev, pm_ops, event),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
 		__string(driver, dev_driver_string(dev))
 		__string(parent, dev->parent ? dev_name(dev->parent) : "none")
 		__string(pm_ops, pm_ops ? pm_ops : "none ")
-		__string(pm_event_str, pm_event_str)
-		__field(s64, ops_time)
+		__field(int, event)
+	),
+
+	TP_fast_assign(
+		__assign_str(device, dev_name(dev));
+		__assign_str(driver, dev_driver_string(dev));
+		__assign_str(parent,
+			dev->parent ? dev_name(dev->parent) : "none");
+		__assign_str(pm_ops, pm_ops ? pm_ops : "none ");
+		__entry->event = event;
+	),
+
+	TP_printk("%s %s, parent: %s, %s[%s]", __get_str(driver),
+		__get_str(device), __get_str(parent), __get_str(pm_ops),
+		pm_verb_symbolic(__entry->event))
+);
+
+TRACE_EVENT(device_pm_callback_end,
+
+	TP_PROTO(struct device *dev, int error),
+
+	TP_ARGS(dev, error),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__string(driver, dev_driver_string(dev))
 		__field(int, error)
 	),
 
 	TP_fast_assign(
-		const char *tmp = dev->parent ? dev_name(dev->parent) : "none";
-		const char *tmp_i = pm_ops ? pm_ops : "none ";
-
 		__assign_str(device, dev_name(dev));
 		__assign_str(driver, dev_driver_string(dev));
-		__assign_str(parent, tmp);
-		__assign_str(pm_ops, tmp_i);
-		__assign_str(pm_event_str, pm_event_str);
-		__entry->ops_time = ops_time;
 		__entry->error = error;
 	),
 
-	/* ops_str has an extra space at the end */
-	TP_printk("%s %s parent=%s state=%s ops=%snsecs=%lld err=%d",
-		__get_str(driver), __get_str(device), __get_str(parent),
-		__get_str(pm_event_str), __get_str(pm_ops),
-		__entry->ops_time, __entry->error)
+	TP_printk("%s %s, err=%d",
+		__get_str(driver), __get_str(device), __entry->error)
+);
+
+TRACE_EVENT(suspend_resume,
+
+	TP_PROTO(const char *action, int val, bool start),
+
+	TP_ARGS(action, val, start),
+
+	TP_STRUCT__entry(
+		__field(const char *, action)
+		__field(int, val)
+		__field(bool, start)
+	),
+
+	TP_fast_assign(
+		__entry->action = action;
+		__entry->val = val;
+		__entry->start = start;
+	),
+
+	TP_printk("%s[%u] %s", __entry->action, (unsigned int)__entry->val,
+		(__entry->start)?"begin":"end")
 );
 
 DECLARE_EVENT_CLASS(wakeup_source,

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 67e1bbf..0a68d5a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h

@@ -530,6 +530,26 @@
 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
 			__entry->dst_cpu, __entry->dst_nid)
 );
+
+/*
+ * Tracepoint for waking a polling cpu without an IPI.
+ */
+TRACE_EVENT(sched_wake_idle_without_ipi,
+
+	TP_PROTO(int cpu),
+
+	TP_ARGS(cpu),
+
+	TP_STRUCT__entry(
+		__field(	int,	cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+	),
+
+	TP_printk("cpu=%d", __entry->cpu)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0a1a4f7..0fd06fe 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h

@@ -53,6 +53,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -128,6 +131,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
@@ -197,9 +203,22 @@
 #define __get_dynamic_array(field)	\
 		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
 
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field)						\
+	({								\
+		void *__bitmask = __get_dynamic_array(field);		\
+		unsigned int __bitmask_size;				\
+		__bitmask_size = __get_dynamic_array_len(field);	\
+		ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
+	})
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -322,6 +341,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int notrace __init						\
@@ -372,6 +394,29 @@
 #define __string(item, src) __dynamic_array(char, item,			\
 		    strlen((src) ? (const char *)(src) : "(null)") + 1)
 
+/*
+ * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
+ * num_possible_cpus().
+ */
+#define __bitmask_size_in_bytes_raw(nr_bits)	\
+	(((nr_bits) + 7) / 8)
+
+#define __bitmask_size_in_longs(nr_bits)			\
+	((__bitmask_size_in_bytes_raw(nr_bits) +		\
+	  ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8))
+
+/*
+ * __bitmask_size_in_bytes is the number of bytes needed to hold
+ * num_possible_cpus() padded out to the nearest long. This is what
+ * is saved in the buffer, just to be consistent.
+ */
+#define __bitmask_size_in_bytes(nr_bits)				\
+	(__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8))
+
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
+					 __bitmask_size_in_longs(nr_bits))
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int ftrace_get_offsets_##call(			\
@@ -513,12 +558,22 @@
 	__entry->__data_loc_##item = __data_offsets.item;
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)       	\
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __assign_bitmask
+#define __assign_bitmask(dst, src, nr_bits)					\
+	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -585,7 +640,9 @@
 #undef __print_symbolic
 #undef __print_hex
 #undef __get_dynamic_array
+#undef __get_dynamic_array_len
 #undef __get_str
+#undef __get_bitmask
 
 #undef TP_printk
 #define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)
@@ -648,9 +705,16 @@
 #define __get_dynamic_array(field)	\
 		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
 
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
 #undef __perf_addr
 #define __perf_addr(a)	(__addr = (a))
 

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index f104c26..def54f9 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h

@@ -181,6 +181,7 @@
 #define DRM_MODE_ENCODER_TVDAC	4
 #define DRM_MODE_ENCODER_VIRTUAL 5
 #define DRM_MODE_ENCODER_DSI	6
+#define DRM_MODE_ENCODER_DPMST	7
 
 struct drm_mode_get_encoder {
 	__u32 encoder_id;
@@ -251,6 +252,21 @@
 #define DRM_MODE_PROP_BLOB	(1<<4)
 #define DRM_MODE_PROP_BITMASK	(1<<5) /* bitmask of enumerated types */
 
+/* non-extended types: legacy bitmask, one bit per type: */
+#define DRM_MODE_PROP_LEGACY_TYPE  ( \
+		DRM_MODE_PROP_RANGE | \
+		DRM_MODE_PROP_ENUM | \
+		DRM_MODE_PROP_BLOB | \
+		DRM_MODE_PROP_BITMASK)
+
+/* extended-types: rather than continue to consume a bit per type,
+ * grab a chunk of the bits to use as integer type id.
+ */
+#define DRM_MODE_PROP_EXTENDED_TYPE	0x0000ffc0
+#define DRM_MODE_PROP_TYPE(n)		((n) << 6)
+#define DRM_MODE_PROP_OBJECT		DRM_MODE_PROP_TYPE(1)
+#define DRM_MODE_PROP_SIGNED_RANGE	DRM_MODE_PROP_TYPE(2)
+
 struct drm_mode_property_enum {
 	__u64 value;
 	char name[DRM_PROP_NAME_LEN];

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 126bfaa..ff57f07 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h

@@ -223,6 +223,7 @@
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
+#define DRM_I915_GEM_USERPTR		0x33
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -273,6 +274,7 @@
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -337,6 +339,7 @@
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
+#define I915_PARAM_CMD_PARSER_VERSION	 28
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -1049,4 +1052,18 @@
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */

diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index aefa2f6..1cc0b61 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h

@@ -1007,7 +1007,7 @@
 #define RADEON_INFO_NUM_BYTES_MOVED	0x1d
 #define RADEON_INFO_VRAM_USAGE		0x1e
 #define RADEON_INFO_GTT_USAGE		0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT	0x20
 
 struct drm_radeon_info {
 	uint32_t		request;

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4c31a36..cf67147 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h

@@ -385,6 +385,14 @@
  */
 #define AUDIT_MESSAGE_TEXT_MAX	8560
 
+/* Multicast Netlink socket groups (default up to 32) */
+enum audit_nlgrps {
+	AUDIT_NLGRP_NONE,	/* Group 0 not used */
+	AUDIT_NLGRP_READLOG,	/* "best effort" read only socket */
+	__AUDIT_NLGRP_MAX
+};
+#define AUDIT_NLGRP_MAX                (__AUDIT_NLGRP_MAX - 1)
+
 struct audit_status {
 	__u32		mask;		/* Bit mask for valid entries */
 	__u32		enabled;	/* 1 = enabled, 0 = disabled */

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b4d6909..6f9c38c 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h

@@ -181,7 +181,11 @@
 	__u64 max_id;				/* out */
 	__u64 num_devices;			/* out */
 	__u8 fsid[BTRFS_FSID_SIZE];		/* out */
-	__u64 reserved[124];			/* pad to 1k */
+	__u32 nodesize;				/* out */
+	__u32 sectorsize;			/* out */
+	__u32 clone_alignment;			/* out */
+	__u32 reserved32;
+	__u64 reserved[122];			/* pad to 1k */
 };
 
 struct btrfs_ioctl_feature_flags {
@@ -211,7 +215,8 @@
 
 	__u64 flags;
 
-	__u64 unused[8];
+	__u64 limit;		/* limit number of processed chunks */
+	__u64 unused[7];
 } __attribute__ ((__packed__));
 
 /* report balance progress to userspace */
@@ -301,6 +306,14 @@
 	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
 };
 
+struct btrfs_ioctl_search_args_v2 {
+	struct btrfs_ioctl_search_key key; /* in/out - search parameters */
+	__u64 buf_size;		   /* in - size of buffer
+					    * out - on EOVERFLOW: needed size
+					    *       to store item */
+	__u64 buf[0];                       /* out - found items */
+};
+
 struct btrfs_ioctl_clone_range_args {
   __s64 src_fd;
   __u64 src_offset, src_length;
@@ -553,6 +566,8 @@
 				struct btrfs_ioctl_defrag_range_args)
 #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
 				   struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+					   struct btrfs_ioctl_search_args_v2)
 #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
 				   struct btrfs_ioctl_ino_lookup_args)
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 5d9d1d1..41892f7 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h

@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_H
-#define CAN_H
+#ifndef _UAPI_CAN_H
+#define _UAPI_CAN_H
 
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -191,4 +191,4 @@
 
 #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */
 
-#endif /* CAN_H */
+#endif /* !_UAPI_CAN_H */

diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 382251a..89ddb9d 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h

@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_BCM_H
-#define CAN_BCM_H
+#ifndef _UAPI_CAN_BCM_H
+#define _UAPI_CAN_BCM_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -95,4 +95,4 @@
 #define TX_RESET_MULTI_IDX  0x0200
 #define RX_RTR_FRAME        0x0400
 
-#endif /* CAN_BCM_H */
+#endif /* !_UAPI_CAN_BCM_H */

diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
index b632045..c247446 100644
--- a/include/uapi/linux/can/error.h
+++ b/include/uapi/linux/can/error.h

@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_ERROR_H
-#define CAN_ERROR_H
+#ifndef _UAPI_CAN_ERROR_H
+#define _UAPI_CAN_ERROR_H
 
 #define CAN_ERR_DLC 8 /* dlc for error message frames */
 
@@ -120,4 +120,4 @@
 
 /* controller specific additional information / data[5..7] */
 
-#endif /* CAN_ERROR_H */
+#endif /* _UAPI_CAN_ERROR_H */

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 844c896..3e6184c 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h

@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_GW_H
-#define CAN_GW_H
+#ifndef _UAPI_CAN_GW_H
+#define _UAPI_CAN_GW_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -200,4 +200,4 @@
  *         Beware of sending unpacked or aligned structs!
  */
 
-#endif
+#endif /* !_UAPI_CAN_GW_H */

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 7e2e186..813d11f 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h

@@ -15,8 +15,8 @@
  * GNU General Public License for more details.
  */
 
-#ifndef CAN_NETLINK_H
-#define CAN_NETLINK_H
+#ifndef _UAPI_CAN_NETLINK_H
+#define _UAPI_CAN_NETLINK_H
 
 #include <linux/types.h>
 
@@ -130,4 +130,4 @@
 
 #define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
 
-#endif /* CAN_NETLINK_H */
+#endif /* !_UAPI_CAN_NETLINK_H */

diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index c7d8c33..78ec76f 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h

@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_RAW_H
-#define CAN_RAW_H
+#ifndef _UAPI_CAN_RAW_H
+#define _UAPI_CAN_RAW_H
 
 #include <linux/can.h>
 
@@ -59,4 +59,4 @@
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
 };
 
-#endif
+#endif /* !_UAPI_CAN_RAW_H */

diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 154dd6d..12c37a1 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h

@@ -347,7 +347,12 @@
 
 #define CAP_BLOCK_SUSPEND    36
 
-#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
+/* Allow reading the audit log via multicast netlink socket */
+
+#define CAP_AUDIT_READ		37
+
+
+#define CAP_LAST_CAP         CAP_AUDIT_READ
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fd161e9..e3c7a71 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h

@@ -847,6 +847,38 @@
 };
 
 /**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier.
+ * @indir_size: On entry, the array size of the user buffer for the
+ *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
+ *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
+ *	the array size of the hardware indirection table.
+ * @key_size: On entry, the array size of the user buffer for the hash key,
+ *	which may be zero.  On return from %ETHTOOL_GRSSH, the size of the
+ *	hardware hash key.
+ * @rsvd:	Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ *	of @indir_size __u32 elements, followed by hash key of @key_size
+ *	bytes.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
+ * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
+ * and a @indir_size of zero means the indir table should be reset to default
+ * values.
+ */
+struct ethtool_rxfh {
+	__u32   cmd;
+	__u32	rss_context;
+	__u32   indir_size;
+	__u32   key_size;
+	__u32	rsvd[2];
+	__u32   rss_config[0];
+};
+#define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
+
+/**
  * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
  * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
  * @h_u: Flow field values to match (dependent on @flow_type)
@@ -1118,6 +1150,9 @@
 #define ETHTOOL_GEEE		0x00000044 /* Get EEE settings */
 #define ETHTOOL_SEEE		0x00000045 /* Set EEE settings */
 
+#define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET

diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 8eb9cca..253b4d4 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h

@@ -130,7 +130,8 @@
 #define SKF_AD_VLAN_TAG	44
 #define SKF_AD_VLAN_TAG_PRESENT 48
 #define SKF_AD_PAY_OFFSET	52
-#define SKF_AD_MAX	56
+#define SKF_AD_RANDOM	56
+#define SKF_AD_MAX	60
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 

diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h
index 0d36909..1086cd9 100644
--- a/include/uapi/linux/if_fddi.h
+++ b/include/uapi/linux/if_fddi.h

@@ -30,74 +30,76 @@
  *  Define max and min legal sizes.  The frame sizes do not include
  *  4 byte FCS/CRC (frame check sequence).
  */
-#define FDDI_K_ALEN			6		/* Octets in one FDDI address */
-#define FDDI_K_8022_HLEN	16		/* Total octets in 802.2 header */
-#define FDDI_K_SNAP_HLEN	21		/* Total octets in 802.2 SNAP header */
-#define FDDI_K_8022_ZLEN	16		/* Min octets in 802.2 frame sans FCS */
-#define FDDI_K_SNAP_ZLEN	21		/* Min octets in 802.2 SNAP frame sans FCS */
+#define FDDI_K_ALEN		6	/* Octets in one FDDI address */
+#define FDDI_K_8022_HLEN	16	/* Total octets in 802.2 header */
+#define FDDI_K_SNAP_HLEN	21	/* Total octets in 802.2 SNAP header */
+#define FDDI_K_8022_ZLEN	16	/* Min octets in 802.2 frame sans
+					   FCS */
+#define FDDI_K_SNAP_ZLEN	21	/* Min octets in 802.2 SNAP frame sans
+					   FCS */
 #define FDDI_K_8022_DLEN	4475	/* Max octets in 802.2 payload */
 #define FDDI_K_SNAP_DLEN	4470	/* Max octets in 802.2 SNAP payload */
-#define FDDI_K_LLC_ZLEN		13		/* Min octets in LLC frame sans FCS */
+#define FDDI_K_LLC_ZLEN		13	/* Min octets in LLC frame sans FCS */
 #define FDDI_K_LLC_LEN		4491	/* Max octets in LLC frame sans FCS */
+#define FDDI_K_OUI_LEN		3	/* Octets in OUI in 802.2 SNAP
+					   header */
 
 /* Define FDDI Frame Control (FC) Byte values */
-#define FDDI_FC_K_VOID					0x00	
-#define FDDI_FC_K_NON_RESTRICTED_TOKEN	0x80	
-#define FDDI_FC_K_RESTRICTED_TOKEN		0xC0	
-#define FDDI_FC_K_SMT_MIN				0x41
-#define FDDI_FC_K_SMT_MAX		   		0x4F
-#define FDDI_FC_K_MAC_MIN				0xC1
-#define FDDI_FC_K_MAC_MAX		  		0xCF	
-#define FDDI_FC_K_ASYNC_LLC_MIN			0x50
-#define FDDI_FC_K_ASYNC_LLC_DEF			0x54
-#define FDDI_FC_K_ASYNC_LLC_MAX			0x5F
-#define FDDI_FC_K_SYNC_LLC_MIN			0xD0
-#define FDDI_FC_K_SYNC_LLC_MAX			0xD7
-#define FDDI_FC_K_IMPLEMENTOR_MIN		0x60
-#define FDDI_FC_K_IMPLEMENTOR_MAX  		0x6F
-#define FDDI_FC_K_RESERVED_MIN			0x70
-#define FDDI_FC_K_RESERVED_MAX			0x7F
+#define FDDI_FC_K_VOID			0x00
+#define FDDI_FC_K_NON_RESTRICTED_TOKEN	0x80
+#define FDDI_FC_K_RESTRICTED_TOKEN	0xC0
+#define FDDI_FC_K_SMT_MIN		0x41
+#define FDDI_FC_K_SMT_MAX		0x4F
+#define FDDI_FC_K_MAC_MIN		0xC1
+#define FDDI_FC_K_MAC_MAX		0xCF
+#define FDDI_FC_K_ASYNC_LLC_MIN		0x50
+#define FDDI_FC_K_ASYNC_LLC_DEF		0x54
+#define FDDI_FC_K_ASYNC_LLC_MAX		0x5F
+#define FDDI_FC_K_SYNC_LLC_MIN		0xD0
+#define FDDI_FC_K_SYNC_LLC_MAX		0xD7
+#define FDDI_FC_K_IMPLEMENTOR_MIN	0x60
+#define FDDI_FC_K_IMPLEMENTOR_MAX	0x6F
+#define FDDI_FC_K_RESERVED_MIN		0x70
+#define FDDI_FC_K_RESERVED_MAX		0x7F
 
 /* Define LLC and SNAP constants */
-#define FDDI_EXTENDED_SAP	0xAA
+#define FDDI_EXTENDED_SAP		0xAA
 #define FDDI_UI_CMD			0x03
 
 /* Define 802.2 Type 1 header */
 struct fddi_8022_1_hdr {
-	__u8	dsap;					/* destination service access point */
-	__u8	ssap;					/* source service access point */
-	__u8	ctrl;					/* control byte #1 */
+	__u8	dsap;			/* destination service access point */
+	__u8	ssap;			/* source service access point */
+	__u8	ctrl;			/* control byte #1 */
 } __attribute__((packed));
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
-	__u8	dsap;					/* destination service access point */
-	__u8	ssap;					/* source service access point */
-	__u8	ctrl_1;					/* control byte #1 */
-	__u8	ctrl_2;					/* control byte #2 */
+	__u8	dsap;			/* destination service access point */
+	__u8	ssap;			/* source service access point */
+	__u8	ctrl_1;			/* control byte #1 */
+	__u8	ctrl_2;			/* control byte #2 */
 } __attribute__((packed));
 
 /* Define 802.2 SNAP header */
-#define FDDI_K_OUI_LEN	3
 struct fddi_snap_hdr {
-	__u8	dsap;					/* always 0xAA */
-	__u8	ssap;					/* always 0xAA */
-	__u8	ctrl;					/* always 0x03 */
+	__u8	dsap;			/* always 0xAA */
+	__u8	ssap;			/* always 0xAA */
+	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
-	__be16	ethertype;				/* packet type ID field */
+	__be16	ethertype;		/* packet type ID field */
 } __attribute__((packed));
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
-	__u8	fc;						/* frame control */
-	__u8	daddr[FDDI_K_ALEN];		/* destination address */
-	__u8	saddr[FDDI_K_ALEN];		/* source address */
-	union
-		{
-		struct fddi_8022_1_hdr		llc_8022_1;
-		struct fddi_8022_2_hdr		llc_8022_2;
-		struct fddi_snap_hdr		llc_snap;
-		} hdr;
+	__u8	fc;			/* frame control */
+	__u8	daddr[FDDI_K_ALEN];	/* destination address */
+	__u8	saddr[FDDI_K_ALEN];	/* source address */
+	union {
+		struct fddi_8022_1_hdr	llc_8022_1;
+		struct fddi_8022_2_hdr	llc_8022_2;
+		struct fddi_snap_hdr	llc_snap;
+	} hdr;
 } __attribute__((packed));
 
 

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9a7f7ac..b385348 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h

@@ -319,6 +319,9 @@
 	IFLA_VXLAN_PORT,	/* destination port */
 	IFLA_VXLAN_GROUP6,
 	IFLA_VXLAN_LOCAL6,
+	IFLA_VXLAN_UDP_CSUM,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
@@ -399,9 +402,10 @@
 	IFLA_VF_UNSPEC,
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
 	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
+	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
 	__IFLA_VF_MAX,
 };
 
@@ -423,6 +427,12 @@
 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
 };
 
+struct ifla_vf_rate {
+	__u32 vf;
+	__u32 min_tx_rate; /* Min Bandwidth in Mbps */
+	__u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
 struct ifla_vf_spoofchk {
 	__u32 vf;
 	__u32 setting;

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index aee73d0..3bce9e9 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h

@@ -100,7 +100,7 @@
 #define IFLA_GRE_MAX	(__IFLA_GRE_MAX - 1)
 
 /* VTI-mode i_flags */
-#define VTI_ISVTI 0x0001
+#define VTI_ISVTI ((__force __be16)0x0001)
 
 enum {
 	IFLA_VTI_UNSPEC,

diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 2841f86..bf6cd7d 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h

@@ -20,6 +20,9 @@
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
 #define KVM_HC_KICK_CPU			5
+#define KVM_HC_MIPS_GET_CLOCK_FREQ	6
+#define KVM_HC_MIPS_EXIT_VM		7
+#define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 
 /*
  * hypercalls use architecture specific

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 8adb681..21caa26 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h

@@ -124,6 +124,8 @@
 	L2TP_ATTR_STATS,		/* nested */
 	L2TP_ATTR_IP6_SADDR,		/* struct in6_addr */
 	L2TP_ATTR_IP6_DADDR,		/* struct in6_addr */
+	L2TP_ATTR_UDP_ZERO_CSUM6_TX,	/* u8 */
+	L2TP_ATTR_UDP_ZERO_CSUM6_RX,	/* u8 */
 	__L2TP_ATTR_MAX,
 };
 

diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index d3ef583..4a1d7e9 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h

@@ -24,6 +24,7 @@
 	NDA_PORT,
 	NDA_VNI,
 	NDA_IFINDEX,
+	NDA_MASTER,
 	__NDA_MAX
 };
 

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c88ccbf..2a88f64 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h

@@ -212,6 +212,29 @@
 };
 
 /**
+ * enum nft_set_policies - set selection policy
+ *
+ * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use
+ * @NFT_SET_POL_MEMORY: prefer low memory use over high performance
+ */
+enum nft_set_policies {
+	NFT_SET_POL_PERFORMANCE,
+	NFT_SET_POL_MEMORY,
+};
+
+/**
+ * enum nft_set_desc_attributes - set element description
+ *
+ * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
+ */
+enum nft_set_desc_attributes {
+	NFTA_SET_DESC_UNSPEC,
+	NFTA_SET_DESC_SIZE,
+	__NFTA_SET_DESC_MAX
+};
+#define NFTA_SET_DESC_MAX	(__NFTA_SET_DESC_MAX - 1)
+
+/**
  * enum nft_set_attributes - nf_tables set netlink attributes
  *
  * @NFTA_SET_TABLE: table name (NLA_STRING)
@@ -221,6 +244,9 @@
  * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
  * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
  * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ * @NFTA_SET_POLICY: selection policy (NLA_U32)
+ * @NFTA_SET_DESC: set description (NLA_NESTED)
+ * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -231,6 +257,9 @@
 	NFTA_SET_KEY_LEN,
 	NFTA_SET_DATA_TYPE,
 	NFTA_SET_DATA_LEN,
+	NFTA_SET_POLICY,
+	NFTA_SET_DESC,
+	NFTA_SET_ID,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
@@ -266,12 +295,14 @@
  * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_set_elem_list_attributes {
 	NFTA_SET_ELEM_LIST_UNSPEC,
 	NFTA_SET_ELEM_LIST_TABLE,
 	NFTA_SET_ELEM_LIST_SET,
 	NFTA_SET_ELEM_LIST_ELEMENTS,
+	NFTA_SET_ELEM_LIST_SET_ID,
 	__NFTA_SET_ELEM_LIST_MAX
 };
 #define NFTA_SET_ELEM_LIST_MAX	(__NFTA_SET_ELEM_LIST_MAX - 1)
@@ -457,12 +488,14 @@
  * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
 	NFTA_LOOKUP_SET,
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
+	NFTA_LOOKUP_SET_ID,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
@@ -536,6 +569,8 @@
  * @NFT_META_SECMARK: packet secmark (skb->secmark)
  * @NFT_META_NFPROTO: netfilter protocol
  * @NFT_META_L4PROTO: layer 4 protocol number
+ * @NFT_META_BRI_IIFNAME: packet input bridge interface name
+ * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -555,6 +590,8 @@
 	NFT_META_SECMARK,
 	NFT_META_NFPROTO,
 	NFT_META_L4PROTO,
+	NFT_META_BRI_IIFNAME,
+	NFT_META_BRI_OIFNAME,
 };
 
 /**

diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 596ddd4..354a7e5 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h

@@ -20,6 +20,8 @@
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
 	NFNLGRP_NFTABLES,
 #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
+	NFNLGRP_ACCT_QUOTA,
+#define NFNLGRP_ACCT_QUOTA		NFNLGRP_ACCT_QUOTA
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)

diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index c7b6269..51404ec 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h

@@ -10,15 +10,24 @@
 	NFNL_MSG_ACCT_GET,
 	NFNL_MSG_ACCT_GET_CTRZERO,
 	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_OVERQUOTA,
 	NFNL_MSG_ACCT_MAX
 };
 
+enum nfnl_acct_flags {
+	NFACCT_F_QUOTA_PKTS	= (1 << 0),
+	NFACCT_F_QUOTA_BYTES	= (1 << 1),
+	NFACCT_F_OVERQUOTA	= (1 << 2), /* can't be set from userspace */
+};
+
 enum nfnl_acct_type {
 	NFACCT_UNSPEC,
 	NFACCT_NAME,
 	NFACCT_PKTS,
 	NFACCT_BYTES,
 	NFACCT_USE,
+	NFACCT_FLAGS,
+	NFACCT_QUOTA,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 9789dc9..9b19b44 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h

@@ -273,11 +273,19 @@
  * First byte is the adapter index
  * Second byte contains flags
  *  - 0x01 - Direction (0=RX, 1=TX)
- *  - 0x02-0x80 - Reserved
+ *  - 0x02-0x04 - Payload type (000=LLCP, 001=NCI, 010=HCI, 011=Digital,
+ *                              100=Proprietary)
+ *  - 0x05-0x80 - Reserved
  **/
-#define NFC_LLCP_RAW_HEADER_SIZE	2
-#define NFC_LLCP_DIRECTION_RX		0x00
-#define NFC_LLCP_DIRECTION_TX		0x01
+#define NFC_RAW_HEADER_SIZE	2
+#define NFC_DIRECTION_RX		0x00
+#define NFC_DIRECTION_TX		0x01
+
+#define RAW_PAYLOAD_LLCP 0
+#define RAW_PAYLOAD_NCI	1
+#define RAW_PAYLOAD_HCI	2
+#define RAW_PAYLOAD_DIGITAL	3
+#define RAW_PAYLOAD_PROPRIETARY	4
 
 /* socket option names */
 #define NFC_LLCP_RW		0

diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
index 616e3b3..2039123 100644
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ b/include/uapi/linux/nfsd/nfsfh.h

@@ -1,13 +1,7 @@
 /*
- * include/linux/nfsd/nfsfh.h
- *
  * This file describes the layout of the file handles as passed
  * over the wire.
  *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
  */
 
@@ -37,7 +31,7 @@
 };
 
 /*
- * This is the new flexible, extensible style NFSv2/v3 file handle.
+ * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
  *
  * The file handle starts with a sequence of four-byte words.
@@ -47,14 +41,7 @@
  *
  * All four-byte values are in host-byte-order.
  *
- * The auth_type field specifies how the filehandle can be authenticated
- * This might allow a file to be confirmed to be in a writable part of a
- * filetree without checking the path from it up to the root.
- * Current values:
- *     0  - No authentication.  fb_auth is 0 bytes long
- * Possible future values:
- *     1  - 4 bytes taken from MD5 hash of the remainer of the file handle
- *          prefixed by a secret and with the important export flags.
+ * The auth_type field is deprecated and must be set to 0.
  *
  * The fsid_type identifies how the filesystem (or export point) is
  *    encoded.
@@ -71,14 +58,9 @@
  *     7  - 8 byte inode number and 16 byte uuid
  *
  * The fileid_type identified how the file within the filesystem is encoded.
- * This is (will be) passed to, and set by, the underlying filesystem if it supports
- * filehandle operations.  The filesystem must not use the value '0' or '0xff' and may
- * only use the values 1 and 2 as defined below:
- *  Current values:
- *    0   - The root, or export point, of the filesystem.  fb_fileid is 0 bytes.
- *    1   - 32bit inode number, 32 bit generation number.
- *    2   - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number.
- *
+ *   The values for this field are filesystem specific, exccept that
+ *   filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ *   in include/linux/exportfs.h for currently registered values.
  */
 struct nfs_fhbase_new {
 	__u8		fb_version;	/* == 1, even => nfs_fhbase_old */
@@ -114,9 +96,9 @@
 #define	fh_fsid_type		fh_base.fh_new.fb_fsid_type
 #define	fh_auth_type		fh_base.fh_new.fb_auth_type
 #define	fh_fileid_type		fh_base.fh_new.fb_fileid_type
-#define	fh_auth			fh_base.fh_new.fb_auth
 #define	fh_fsid			fh_base.fh_new.fb_auth
 
-
+/* Do not use, provided for userspace compatiblity. */
+#define	fh_auth			fh_base.fh_new.fb_auth
 
 #endif /* _UAPI_LINUX_NFSD_FH_H */

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 194c1ea..be9519b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h

@@ -503,6 +503,9 @@
  *	TX status event pertaining to the TX request.
  *	%NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
  *	management frames at CCK rate or not in 2GHz band.
+ *	%NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA
+ *	counters which will be updated to the current value. This attribute
+ *	is used during CSA period.
  * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
  *	command may be used with the corresponding cookie to cancel the wait
  *	time if it is known that it is no longer necessary.
@@ -1525,10 +1528,10 @@
  *	operation).
  * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
  *	for the time while performing a channel switch.
- * @NL80211_ATTR_CSA_C_OFF_BEACON: Offset of the channel switch counter
- *	field in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
- * @NL80211_ATTR_CSA_C_OFF_PRESP: Offset of the channel switch counter
- *	field in the probe response (%NL80211_ATTR_PROBE_RESP).
+ * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
+ *	switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
+ *	switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
  *
  * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
  *	As specified in the &enum nl80211_rxmgmt_flags.
@@ -1576,9 +1579,18 @@
  *	advertise values that cannot always be met. In such cases, an attempt
  *	to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
  *
+ * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which
+ *	should be updated when the frame is transmitted.
+ * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum
+ *	supported number of csa counters.
+ *
  * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
  *	As specified in the &enum nl80211_tdls_peer_capability.
  *
+ * @NL80211_ATTR_IFACE_SOCKET_OWNER: flag attribute, if set during interface
+ *	creation then the new interface will be owned by the netlink socket
+ *	that created it and will be destroyed when the socket is closed
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1914,6 +1926,11 @@
 
 	NL80211_ATTR_TDLS_PEER_CAPABILITY,
 
+	NL80211_ATTR_IFACE_SOCKET_OWNER,
+
+	NL80211_ATTR_CSA_C_OFFSETS_TX,
+	NL80211_ATTR_MAX_CSA_COUNTERS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2182,6 +2199,8 @@
  *	Contains a nested array of signal strength attributes (u8, dBm)
  * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average
  *	Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
+ * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
+ *	802.11 header (u32, kbps)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2213,6 +2232,7 @@
 	NL80211_STA_INFO_TX_BYTES64,
 	NL80211_STA_INFO_CHAIN_SIGNAL,
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
+	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
@@ -2336,9 +2356,34 @@
  *	using this channel as the primary or any of the secondary channels
  *	isn't possible
  * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this
+ *	channel. A channel that has the INDOOR_ONLY attribute can only be
+ *	used when there is a clear assessment that the device is operating in
+ *	an indoor surroundings, i.e., it is connected to AC power (and not
+ *	through portable DC inverters) or is under the control of a master
+ *	that is acting as an AP and is connected to AC power.
+ * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this
+ *	channel if it's connected concurrently to a BSS on the same channel on
+ *	the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
+ *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a
+ *	channel that has the GO_CONCURRENT attribute set can be done when there
+ *	is a clear assessment that the device is operating under the guidance of
+ *	an authorized master, i.e., setting up a GO while the device is also
+ *	connected to an AP with DFS and radar detection on the UNII band (it is
+ *	up to user-space, i.e., wpa_supplicant to perform the required
+ *	verifications)
+ * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
+ *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
+ *
+ * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
+ * for more information on the FCC description of the relaxations allowed
+ * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
+ * NL80211_FREQUENCY_ATTR_GO_CONCURRENT.
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -2355,6 +2400,10 @@
 	NL80211_FREQUENCY_ATTR_NO_80MHZ,
 	NL80211_FREQUENCY_ATTR_NO_160MHZ,
 	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+	NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
+	NL80211_FREQUENCY_ATTR_GO_CONCURRENT,
+	NL80211_FREQUENCY_ATTR_NO_20MHZ,
+	NL80211_FREQUENCY_ATTR_NO_10MHZ,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -2573,10 +2622,13 @@
  *	present has been registered with the wireless core that
  *	has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a
  *	supported feature.
+ * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the
+ *	platform is operating in an indoor environment.
  */
 enum nl80211_user_reg_hint_type {
 	NL80211_USER_REG_HINT_USER	= 0,
 	NL80211_USER_REG_HINT_CELL_BASE = 1,
+	NL80211_USER_REG_HINT_INDOOR    = 2,
 };
 
 /**
@@ -3650,6 +3702,8 @@
  *	different channels may be used within this group.
  * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap
  *	of supported channel widths for radar detection.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap
+ *	of supported regulatory regions for radar detection.
  * @NUM_NL80211_IFACE_COMB: number of attributes
  * @MAX_NL80211_IFACE_COMB: highest attribute number
  *
@@ -3683,6 +3737,7 @@
 	NL80211_IFACE_COMB_STA_AP_BI_MATCH,
 	NL80211_IFACE_COMB_NUM_CHANNELS,
 	NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+	NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
 
 	/* keep last */
 	NUM_NL80211_IFACE_COMB,
@@ -3893,6 +3948,9 @@
  *	interface. An active monitor interface behaves like a normal monitor
  *	interface, but gets added to the driver. It ensures that incoming
  *	unicast packets directed at the configured interface address get ACKed.
+ * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
+ *	channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
+ *	lifetime of a BSS.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -3913,6 +3971,7 @@
 	NL80211_FEATURE_FULL_AP_CLIENT_STATE		= 1 << 15,
 	NL80211_FEATURE_USERSPACE_MPM			= 1 << 16,
 	NL80211_FEATURE_ACTIVE_MONITOR			= 1 << 17,
+	NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE	= 1 << 18,
 };
 
 /**

diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 096fe1c..29a7d86 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h

@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _UAPI_LINUX_NVME_H
@@ -31,7 +27,12 @@
 	__u8			read_lat;
 	__u8			write_tput;
 	__u8			write_lat;
-	__u8			rsvd16[16];
+	__le16			idle_power;
+	__u8			idle_scale;
+	__u8			rsvd19;
+	__le16			active_power;
+	__u8			active_work_scale;
+	__u8			rsvd23[9];
 };
 
 enum {
@@ -49,7 +50,9 @@
 	__u8			ieee[3];
 	__u8			mic;
 	__u8			mdts;
-	__u8			rsvd78[178];
+	__u16			cntlid;
+	__u32			ver;
+	__u8			rsvd84[172];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -57,7 +60,11 @@
 	__u8			lpa;
 	__u8			elpe;
 	__u8			npss;
-	__u8			rsvd264[248];
+	__u8			avscc;
+	__u8			apsta;
+	__le16			wctemp;
+	__le16			cctemp;
+	__u8			rsvd270[242];
 	__u8			sqes;
 	__u8			cqes;
 	__u8			rsvd514[2];
@@ -68,7 +75,12 @@
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd530[1518];
+	__u8			nvscc;
+	__u8			rsvd531;
+	__le16			acwu;
+	__u8			rsvd534[2];
+	__le32			sgls;
+	__u8			rsvd540[1508];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
@@ -77,6 +89,7 @@
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
 struct nvme_lbaf {
@@ -95,7 +108,15 @@
 	__u8			mc;
 	__u8			dpc;
 	__u8			dps;
-	__u8			rsvd30[98];
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			rsvd33;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__u8			rsvd40[80];
+	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
 	__u8			rsvd192[192];
 	__u8			vs[3712];
@@ -126,7 +147,10 @@
 	__u8			unsafe_shutdowns[16];
 	__u8			media_errors[16];
 	__u8			num_err_log_entries[16];
-	__u8			rsvd192[320];
+	__le32			warning_temp_time;
+	__le32			critical_comp_time;
+	__le16			temp_sensor[8];
+	__u8			rsvd216[296];
 };
 
 enum {
@@ -282,6 +306,10 @@
 	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
 	NVME_FEAT_ASYNC_EVENT	= 0x0b,
 	NVME_FEAT_SW_PROGRESS	= 0x0c,
+	NVME_LOG_ERROR		= 0x01,
+	NVME_LOG_SMART		= 0x02,
+	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
 	NVME_FWACT_REPL_ACTV	= (1 << 3),
 	NVME_FWACT_ACTV		= (2 << 3),

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 970553c..0b979ee 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h

@@ -395,7 +395,9 @@
  * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
  * the actions to take for packets that match the key.  Always present in
  * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
- * %OVS_FLOW_CMD_SET requests.
+ * %OVS_FLOW_CMD_SET requests.  An %OVS_FLOW_CMD_SET without
+ * %OVS_FLOW_ATTR_ACTIONS will not modify the actions.  To clear the actions,
+ * an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
  * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
  * flow.  Present in notifications if the stats would be nonzero.  Ignored in
  * requests.

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e3fc8f0..5312fae 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h

@@ -163,8 +163,9 @@
 	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
 	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
 	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -301,8 +302,8 @@
 				exclude_callchain_kernel : 1, /* exclude kernel callchains */
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
-
-				__reserved_1   : 40;
+				comm_exec      :  1, /* flag comm events that are due to an exec */
+				__reserved_1   : 39;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -501,7 +502,12 @@
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 852373d..6f71b9b 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h

@@ -38,6 +38,7 @@
 #define _LINUX_TIPC_H_
 
 #include <linux/types.h>
+#include <linux/sockios.h>
 
 /*
  * TIPC addressing primitives
@@ -87,6 +88,7 @@
 
 #define TIPC_CFG_SRV		0	/* configuration service name type */
 #define TIPC_TOP_SRV		1	/* topology service name type */
+#define TIPC_LINK_STATE		2	/* link state name type */
 #define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
 
 /*
@@ -206,4 +208,25 @@
 #define TIPC_NODE_RECVQ_DEPTH	131	/* Default: none (read only) */
 #define TIPC_SOCK_RECVQ_DEPTH	132	/* Default: none (read only) */
 
+/*
+ * Maximum sizes of TIPC bearer-related names (including terminating NULL)
+ * The string formatting for each name element is:
+ * media: media
+ * interface: media:interface name
+ * link: Z.C.N:interface-Z.C.N:interface
+ *
+ */
+
+#define TIPC_MAX_MEDIA_NAME	16
+#define TIPC_MAX_IF_NAME	16
+#define TIPC_MAX_BEARER_NAME	32
+#define TIPC_MAX_LINK_NAME	60
+
+#define SIOCGETLINKNAME		SIOCPROTOPRIVATE
+
+struct tipc_sioc_ln_req {
+	__u32 peer;
+	__u32 bearer_id;
+	char linkname[TIPC_MAX_LINK_NAME];
+};
 #endif

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 6b0bff0..41a76ac 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h

@@ -39,6 +39,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/tipc.h>
 #include <asm/byteorder.h>
 
 #ifndef __KERNEL__
@@ -155,15 +156,6 @@
 #define TIPC_TLV_PORT_REF	26	/* 32-bit port reference */
 
 /*
- * Maximum sizes of TIPC bearer-related names (including terminating NUL)
- */
-
-#define TIPC_MAX_MEDIA_NAME	16	/* format = media */
-#define TIPC_MAX_IF_NAME	16	/* format = interface */
-#define TIPC_MAX_BEARER_NAME	32	/* format = media:interface */
-#define TIPC_MAX_LINK_NAME	60	/* format = Z.C.N:interface-Z.C.N:interface */
-
-/*
  * Link priority limits (min, default, max, media default)
  */
 

diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index e2bcfd7..16574ea 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h

@@ -29,6 +29,8 @@
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
+#define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
+#define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
 
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */

diff --git a/include/uapi/mtd/mtd-abi.h b/include/uapi/mtd/mtd-abi.h
index e272ea0..763bb69 100644
--- a/include/uapi/mtd/mtd-abi.h
+++ b/include/uapi/mtd/mtd-abi.h

@@ -109,6 +109,7 @@
 #define MTD_CAP_RAM		(MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE)
 #define MTD_CAP_NORFLASH	(MTD_WRITEABLE | MTD_BIT_WRITEABLE)
 #define MTD_CAP_NANDFLASH	(MTD_WRITEABLE)
+#define MTD_CAP_NVRAM		(MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE)
 
 /* Obsolete ECC byte placement modes (used with obsolete MEMGETOOBSEL) */
 #define MTD_NANDECC_OFF		0	// Switch off ECC (Not recommended)

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 8297285..de69170 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h

@@ -4,7 +4,16 @@
 #include <linux/types.h>
 
 enum {
-	RDMA_NL_RDMA_CM = 1
+	RDMA_NL_RDMA_CM = 1,
+	RDMA_NL_NES,
+	RDMA_NL_C4IW,
+	RDMA_NL_NUM_CLIENTS
+};
+
+enum {
+	RDMA_NL_GROUP_CM = 1,
+	RDMA_NL_GROUP_IWPM,
+	RDMA_NL_NUM_GROUPS
 };
 
 #define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
@@ -22,6 +31,18 @@
 	RDMA_NL_RDMA_CM_NUM_ATTR,
 };
 
+/* iwarp port mapper op-codes */
+enum {
+	RDMA_NL_IWPM_REG_PID = 0,
+	RDMA_NL_IWPM_ADD_MAPPING,
+	RDMA_NL_IWPM_QUERY_MAPPING,
+	RDMA_NL_IWPM_REMOVE_MAPPING,
+	RDMA_NL_IWPM_HANDLE_ERR,
+	RDMA_NL_IWPM_MAPINFO,
+	RDMA_NL_IWPM_MAPINFO_NUM,
+	RDMA_NL_IWPM_NUM_OPS
+};
+
 struct rdma_cm_id_stats {
 	__u32	qp_num;
 	__u32	bound_dev_if;
@@ -33,5 +54,78 @@
 	__u8	qp_type;
 };
 
+enum {
+	IWPM_NLA_REG_PID_UNSPEC = 0,
+	IWPM_NLA_REG_PID_SEQ,
+	IWPM_NLA_REG_IF_NAME,
+	IWPM_NLA_REG_IBDEV_NAME,
+	IWPM_NLA_REG_ULIB_NAME,
+	IWPM_NLA_REG_PID_MAX
+};
+
+enum {
+	IWPM_NLA_RREG_PID_UNSPEC = 0,
+	IWPM_NLA_RREG_PID_SEQ,
+	IWPM_NLA_RREG_IBDEV_NAME,
+	IWPM_NLA_RREG_ULIB_NAME,
+	IWPM_NLA_RREG_ULIB_VER,
+	IWPM_NLA_RREG_PID_ERR,
+	IWPM_NLA_RREG_PID_MAX
+
+};
+
+enum {
+	IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
+	IWPM_NLA_MANAGE_MAPPING_SEQ,
+	IWPM_NLA_MANAGE_ADDR,
+	IWPM_NLA_MANAGE_MAPPED_LOC_ADDR,
+	IWPM_NLA_RMANAGE_MAPPING_ERR,
+	IWPM_NLA_RMANAGE_MAPPING_MAX
+};
+
+#define IWPM_NLA_MANAGE_MAPPING_MAX 3
+#define IWPM_NLA_QUERY_MAPPING_MAX  4
+#define IWPM_NLA_MAPINFO_SEND_MAX   3
+
+enum {
+	IWPM_NLA_QUERY_MAPPING_UNSPEC = 0,
+	IWPM_NLA_QUERY_MAPPING_SEQ,
+	IWPM_NLA_QUERY_LOCAL_ADDR,
+	IWPM_NLA_QUERY_REMOTE_ADDR,
+	IWPM_NLA_RQUERY_MAPPED_LOC_ADDR,
+	IWPM_NLA_RQUERY_MAPPED_REM_ADDR,
+	IWPM_NLA_RQUERY_MAPPING_ERR,
+	IWPM_NLA_RQUERY_MAPPING_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_REQ_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_ULIB_NAME,
+	IWPM_NLA_MAPINFO_ULIB_VER,
+	IWPM_NLA_MAPINFO_REQ_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_LOCAL_ADDR,
+	IWPM_NLA_MAPINFO_MAPPED_ADDR,
+	IWPM_NLA_MAPINFO_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_NUM_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_SEQ,
+	IWPM_NLA_MAPINFO_SEND_NUM,
+	IWPM_NLA_MAPINFO_ACK_NUM,
+	IWPM_NLA_MAPINFO_NUM_MAX
+};
+
+enum {
+	IWPM_NLA_ERR_UNSPEC = 0,
+	IWPM_NLA_ERR_SEQ,
+	IWPM_NLA_ERR_CODE,
+	IWPM_NLA_ERR_MAX
+};
+
 
 #endif /* _UAPI_RDMA_NETLINK_H */

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 5759810..21eed48 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h

@@ -80,7 +80,7 @@
 struct snd_compr_avail {
 	__u64 avail;
 	struct snd_compr_tstamp tstamp;
-};
+} __attribute__((packed));
 
 enum snd_compr_direction {
 	SND_COMPRESS_PLAYBACK = 0,

diff --git a/drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
similarity index 94%
rename from drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h
rename to include/video/imx-ipu-v3.h
index c2c6fab..3e43e22 100644
--- a/drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h

@@ -104,6 +104,7 @@
 
 void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
 		bool doublebuffer);
+int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel);
 void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num);
 
 /*
@@ -165,6 +166,20 @@
 int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha,
 		bool bg_chan);
 
+/*
+ * IPU CMOS Sensor Interface (csi) functions
+ */
+int ipu_csi_enable(struct ipu_soc *ipu, int csi);
+int ipu_csi_disable(struct ipu_soc *ipu, int csi);
+
+/*
+ * IPU Sensor Multiple FIFO Controller (SMFC) functions
+ */
+int ipu_smfc_enable(struct ipu_soc *ipu);
+int ipu_smfc_disable(struct ipu_soc *ipu);
+int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id);
+int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize);
+
 #define IPU_CPMEM_WORD(word, ofs, size) ((((word) * 160 + (ofs)) << 8) | (size))
 
 #define IPU_FIELD_UBO		IPU_CPMEM_WORD(0, 46, 22)
@@ -321,6 +336,7 @@
 };
 
 struct ipu_client_platformdata {
+	int csi;
 	int di;
 	int dc;
 	int dp;

diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index c50061d..70054cc 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h

@@ -51,6 +51,59 @@
  */
 
 /*
+ * Multiple transmit and receive queues:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vif, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues", set to the number they wish to use, which
+ * must be greater than zero, and no more than the value reported by the backend
+ * in "multi-queue-max-queues".
+ *
+ * Queues replicate the shared rings and event channels.
+ * "feature-split-event-channels" may optionally be used when using
+ * multiple queues, but is not mandatory.
+ *
+ * Each queue consists of one shared ring pair, i.e. there must be the same
+ * number of tx and rx rings.
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
+ * instead writing those keys under sub-keys having the name "queue-N" where
+ * N is the integer ID of the queue for which those keys belong. Queues
+ * are indexed from zero. For example, a frontend with two queues and split
+ * event channels must write the following set of queue-related keys:
+ *
+ * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vif/0/queue-0 = ""
+ * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
+ * /local/domain/1/device/vif/0/queue-1 = ""
+ * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
+ *
+ * If there is any inconsistency in the XenStore data, the backend may
+ * choose not to connect any queues, instead treating the request as an
+ * error. This includes scenarios where more (or fewer) queues were
+ * requested than the frontend provided details for.
+ *
+ * Mapping of packets to queues is considered to be a function of the
+ * transmitting system (backend or frontend) and is not negotiated
+ * between the two. Guests are free to transmit packets on any queue
+ * they choose, provided it has been set up correctly. Guests must be
+ * prepared to receive packets on any queue they have requested be set up.
+ */
+
+/*
  * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
  * offload off or on. If it is missing then the feature is assumed to be on.
  * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum

diff --git a/init/main.c b/init/main.c
index 0ec2515..e8ae1fe 100644
--- a/init/main.c
+++ b/init/main.c

@@ -253,6 +253,27 @@
 	return 0;
 }
 
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val, const char *unused)
+{
+	unsigned int i;
+
+	if (panic_later)
+		return 0;
+
+	repair_env_string(param, val, unused);
+
+	for (i = 0; argv_init[i]; i++) {
+		if (i == MAX_INIT_ARGS) {
+			panic_later = "init";
+			panic_param = param;
+			return 0;
+		}
+	}
+	argv_init[i] = param;
+	return 0;
+}
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
@@ -479,7 +500,7 @@
 
 asmlinkage __visible void __init start_kernel(void)
 {
-	char * command_line;
+	char * command_line, *after_dashes;
 	extern const struct kernel_param __start___param[], __stop___param[];
 
 	/*
@@ -519,9 +540,13 @@
 
 	pr_notice("Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
-	parse_args("Booting kernel", static_command_line, __start___param,
-		   __stop___param - __start___param,
-		   -1, -1, &unknown_bootoption);
+	after_dashes = parse_args("Booting kernel",
+				  static_command_line, __start___param,
+				  __stop___param - __start___param,
+				  -1, -1, &unknown_bootoption);
+	if (after_dashes)
+		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+			   set_init_arg);
 
 	jump_label_init();
 

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index d2b32ac..35536d9 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks

@@ -223,3 +223,10 @@
 config MUTEX_SPIN_ON_OWNER
 	def_bool y
 	depends on SMP && !DEBUG_MUTEXES
+
+config ARCH_USE_QUEUE_RWLOCK
+	bool
+
+config QUEUE_RWLOCK
+	def_bool y if ARCH_USE_QUEUE_RWLOCK
+	depends on SMP

diff --git a/kernel/audit.c b/kernel/audit.c
index f301064..3ef2e0e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c

@@ -424,6 +424,38 @@
 }
 
 /*
+ * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ *
+ * This function doesn't consume an skb as might be expected since it has to
+ * copy it anyways.
+ */
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
+{
+	struct sk_buff		*copy;
+	struct audit_net	*aunet = net_generic(&init_net, audit_net_id);
+	struct sock		*sock = aunet->nlsk;
+
+	if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
+		return;
+
+	/*
+	 * The seemingly wasteful skb_copy() rather than bumping the refcount
+	 * using skb_get() is necessary because non-standard mods are made to
+	 * the skb by the original kaudit unicast socket send routine.  The
+	 * existing auditd daemon assumes this breakage.  Fixing this would
+	 * require co-ordinating a change in the established protocol between
+	 * the kaudit kernel subsystem and the auditd userspace code.  There is
+	 * no reason for new multicast clients to continue with this
+	 * non-compliance.
+	 */
+	copy = skb_copy(skb, GFP_KERNEL);
+	if (!copy)
+		return;
+
+	nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
+}
+
+/*
  * flush_hold_queue - empty the hold queue if auditd appears
  *
  * If auditd just started, drain the queue of messages already
@@ -1076,10 +1108,22 @@
 	mutex_unlock(&audit_cmd_mutex);
 }
 
+/* Run custom bind function on netlink socket group connect or bind requests. */
+static int audit_bind(int group)
+{
+	if (!capable(CAP_AUDIT_READ))
+		return -EPERM;
+
+	return 0;
+}
+
 static int __net_init audit_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input	= audit_receive,
+		.bind	= audit_bind,
+		.flags	= NL_CFG_F_NONROOT_RECV,
+		.groups	= AUDIT_NLGRP_MAX,
 	};
 
 	struct audit_net *aunet = net_generic(net, audit_net_id);
@@ -1901,10 +1945,10 @@
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
  *
- * The netlink_* functions cannot be called inside an irq context, so
- * the audit buffer is placed on a queue and a tasklet is scheduled to
- * remove them from the queue outside the irq context.  May be called in
- * any context.
+ * netlink_unicast() cannot be called inside an irq context because it blocks
+ * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
+ * on a queue and a tasklet is scheduled to remove them from the queue outside
+ * the irq context.  May be called in any context.
  */
 void audit_log_end(struct audit_buffer *ab)
 {
@@ -1914,6 +1958,18 @@
 		audit_log_lost("rate limit exceeded");
 	} else {
 		struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+
+		kauditd_send_multicast_skb(ab->skb);
+
+		/*
+		 * The original kaudit unicast socket sends up messages with
+		 * nlmsg_len set to the payload length rather than the entire
+		 * message length.  This breaks the standard set by netlink.
+		 * The existing auditd daemon assumes this breakage.  Fixing
+		 * this would require co-ordinating a change in the established
+		 * protocol between the kaudit kernel subsystem and the auditd
+		 * userspace code.
+		 */
 		nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN;
 
 		if (audit_pid) {

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f251a5e..21eae3c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c

@@ -728,6 +728,22 @@
 	return AUDIT_BUILD_CONTEXT;
 }
 
+static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
+{
+	int word, bit;
+
+	if (val > 0xffffffff)
+		return false;
+
+	word = AUDIT_WORD(val);
+	if (word >= AUDIT_BITMASK_SIZE)
+		return false;
+
+	bit = AUDIT_BIT(val);
+
+	return rule->mask[word] & bit;
+}
+
 /* At syscall entry and exit time, this filter is called if the
  * audit_state is not low enough that auditing cannot take place, but is
  * also not high enough that we already know we have to write an audit
@@ -745,11 +761,8 @@
 
 	rcu_read_lock();
 	if (!list_empty(list)) {
-		int word = AUDIT_WORD(ctx->major);
-		int bit  = AUDIT_BIT(ctx->major);
-
 		list_for_each_entry_rcu(e, list, list) {
-			if ((e->rule.mask[word] & bit) == bit &&
+			if (audit_in_mask(&e->rule, ctx->major) &&
 			    audit_filter_rules(tsk, &e->rule, ctx, NULL,
 					       &state, false)) {
 				rcu_read_unlock();
@@ -769,20 +782,16 @@
 static int audit_filter_inode_name(struct task_struct *tsk,
 				   struct audit_names *n,
 				   struct audit_context *ctx) {
-	int word, bit;
 	int h = audit_hash_ino((u32)n->ino);
 	struct list_head *list = &audit_inode_hash[h];
 	struct audit_entry *e;
 	enum audit_state state;
 
-	word = AUDIT_WORD(ctx->major);
-	bit  = AUDIT_BIT(ctx->major);
-
 	if (list_empty(list))
 		return 0;
 
 	list_for_each_entry_rcu(e, list, list) {
-		if ((e->rule.mask[word] & bit) == bit &&
+		if (audit_in_mask(&e->rule, ctx->major) &&
 		    audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) {
 			ctx->current_state = state;
 			return 1;

diff --git a/kernel/capability.c b/kernel/capability.c
index 84b2bbf..a5cf13c 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c

@@ -424,23 +424,19 @@
 EXPORT_SYMBOL(capable);
 
 /**
- * inode_capable - Check superior capability over inode
+ * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
  * @inode: The inode in question
  * @cap: The capability in question
  *
- * Return true if the current task has the given superior capability
- * targeted at it's own user namespace and that the given inode is owned
- * by the current user namespace or a child namespace.
- *
- * Currently we check to see if an inode is owned by the current
- * user namespace by seeing if the inode's owner maps into the
- * current user namespace.
- *
+ * Return true if the current task has the given capability targeted at
+ * its own user namespace and that the given inode's uid and gid are
+ * mapped into the current user namespace.
  */
-bool inode_capable(const struct inode *inode, int cap)
+bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
 {
 	struct user_namespace *ns = current_user_ns();
 
-	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
+	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+		kgid_has_mapping(ns, inode->i_gid);
 }
-EXPORT_SYMBOL(inode_capable);
+EXPORT_SYMBOL(capable_wrt_inode_uidgid);

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ceee0c5..7868fc3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c

@@ -26,6 +26,8 @@
  *  distribution for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cgroup.h>
 #include <linux/cred.h>
 #include <linux/ctype.h>
@@ -70,15 +72,6 @@
 					 MAX_CFTYPE_NAME + 2)
 
 /*
- * cgroup_tree_mutex nests above cgroup_mutex and protects cftypes, file
- * creation/removal and hierarchy changing operations including cgroup
- * creation, removal, css association and controller rebinding.  This outer
- * lock is needed mainly to resolve the circular dependency between kernfs
- * active ref and cgroup_mutex.  cgroup_tree_mutex nests above both.
- */
-static DEFINE_MUTEX(cgroup_tree_mutex);
-
-/*
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
  * hierarchy must be performed while holding it.
  *
@@ -99,16 +92,21 @@
 #endif
 
 /*
+ * Protects cgroup_idr and css_idr so that IDs can be released without
+ * grabbing cgroup_mutex.
+ */
+static DEFINE_SPINLOCK(cgroup_idr_lock);
+
+/*
  * Protects cgroup_subsys->release_agent_path.  Modifying it also requires
  * cgroup_mutex.  Reading requires either cgroup_mutex or this spinlock.
  */
 static DEFINE_SPINLOCK(release_agent_path_lock);
 
-#define cgroup_assert_mutexes_or_rcu_locked()				\
+#define cgroup_assert_mutex_or_rcu_locked()				\
 	rcu_lockdep_assert(rcu_read_lock_held() ||			\
-			   lockdep_is_held(&cgroup_tree_mutex) ||	\
 			   lockdep_is_held(&cgroup_mutex),		\
-			   "cgroup_[tree_]mutex or RCU read lock required");
+			   "cgroup_mutex or RCU read lock required");
 
 /*
  * cgroup destruction makes heavy use of work items and there can be a lot
@@ -151,6 +149,13 @@
  */
 static bool cgrp_dfl_root_visible;
 
+/* some controllers are not supported in the default hierarchy */
+static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0
+#ifdef CONFIG_CGROUP_DEBUG
+	| (1 << debug_cgrp_id)
+#endif
+	;
+
 /* The list of hierarchy roots */
 
 static LIST_HEAD(cgroup_roots);
@@ -160,14 +165,13 @@
 static DEFINE_IDR(cgroup_hierarchy_idr);
 
 /*
- * Assign a monotonically increasing serial number to cgroups.  It
- * guarantees cgroups with bigger numbers are newer than those with smaller
- * numbers.  Also, as cgroups are always appended to the parent's
- * ->children list, it guarantees that sibling cgroups are always sorted in
- * the ascending serial number order on the list.  Protected by
- * cgroup_mutex.
+ * Assign a monotonically increasing serial number to csses.  It guarantees
+ * cgroups with bigger numbers are newer than those with smaller numbers.
+ * Also, as csses are always appended to the parent's ->children list, it
+ * guarantees that sibling csses are always sorted in the ascending serial
+ * number order on the list.  Protected by cgroup_mutex.
  */
-static u64 cgroup_serial_nr_next = 1;
+static u64 css_serial_nr_next = 1;
 
 /* This flag indicates whether tasks in the fork and exit paths should
  * check for fork/exit handlers to call. This avoids us having to do
@@ -180,17 +184,59 @@
 
 static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask);
-static void cgroup_destroy_css_killed(struct cgroup *cgrp);
+			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void css_release(struct percpu_ref *ref);
+static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
 static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
 
+/* IDR wrappers which synchronize using cgroup_idr_lock */
+static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
+			    gfp_t gfp_mask)
+{
+	int ret;
+
+	idr_preload(gfp_mask);
+	spin_lock_bh(&cgroup_idr_lock);
+	ret = idr_alloc(idr, ptr, start, end, gfp_mask);
+	spin_unlock_bh(&cgroup_idr_lock);
+	idr_preload_end();
+	return ret;
+}
+
+static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
+{
+	void *ret;
+
+	spin_lock_bh(&cgroup_idr_lock);
+	ret = idr_replace(idr, ptr, id);
+	spin_unlock_bh(&cgroup_idr_lock);
+	return ret;
+}
+
+static void cgroup_idr_remove(struct idr *idr, int id)
+{
+	spin_lock_bh(&cgroup_idr_lock);
+	idr_remove(idr, id);
+	spin_unlock_bh(&cgroup_idr_lock);
+}
+
+static struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css = cgrp->self.parent;
+
+	if (parent_css)
+		return container_of(parent_css, struct cgroup, self);
+	return NULL;
+}
+
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
  * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
  * function must be called either under cgroup_mutex or rcu_read_lock() and
@@ -203,23 +249,49 @@
 {
 	if (ss)
 		return rcu_dereference_check(cgrp->subsys[ss->id],
-					lockdep_is_held(&cgroup_tree_mutex) ||
 					lockdep_is_held(&cgroup_mutex));
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
+}
+
+/**
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
+ *
+ * Similar to cgroup_css() but returns the effctive css, which is defined
+ * as the matching css of the nearest ancestor including self which has @ss
+ * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
+ * function is guaranteed to return non-NULL css.
+ */
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+						struct cgroup_subsys *ss)
+{
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (!ss)
+		return &cgrp->self;
+
+	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
+		return NULL;
+
+	while (cgroup_parent(cgrp) &&
+	       !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
+		cgrp = cgroup_parent(cgrp);
+
+	return cgroup_css(cgrp, ss);
 }
 
 /* convenient tests for these bits */
 static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 {
-	return test_bit(CGRP_DEAD, &cgrp->flags);
+	return !(cgrp->self.flags & CSS_ONLINE);
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
 	struct cgroup *cgrp = of->kn->parent->priv;
-	struct cftype *cft = seq_cft(seq);
+	struct cftype *cft = of_cft(of);
 
 	/*
 	 * This is open and unprotected implementation of cgroup_css().
@@ -232,9 +304,9 @@
 	if (cft->ss)
 		return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 }
-EXPORT_SYMBOL_GPL(seq_css);
+EXPORT_SYMBOL_GPL(of_css);
 
 /**
  * cgroup_is_descendant - test ancestry
@@ -250,7 +322,7 @@
 	while (cgrp) {
 		if (cgrp == ancestor)
 			return true;
-		cgrp = cgrp->parent;
+		cgrp = cgroup_parent(cgrp);
 	}
 	return false;
 }
@@ -274,17 +346,30 @@
  * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
  * @cgrp: the target cgroup to iterate css's of
  *
- * Should be called under cgroup_mutex.
+ * Should be called under cgroup_[tree_]mutex.
  */
 #define for_each_css(css, ssid, cgrp)					\
 	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
 		if (!((css) = rcu_dereference_check(			\
 				(cgrp)->subsys[(ssid)],			\
-				lockdep_is_held(&cgroup_tree_mutex) ||	\
 				lockdep_is_held(&cgroup_mutex)))) { }	\
 		else
 
 /**
+ * for_each_e_css - iterate all effective css's of a cgroup
+ * @css: the iteration cursor
+ * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
+ * @cgrp: the target cgroup to iterate css's of
+ *
+ * Should be called under cgroup_[tree_]mutex.
+ */
+#define for_each_e_css(css, ssid, cgrp)					\
+	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
+		if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+			;						\
+		else
+
+/**
  * for_each_subsys - iterate all enabled cgroup subsystems
  * @ss: the iteration cursor
  * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
@@ -297,22 +382,13 @@
 #define for_each_root(root)						\
 	list_for_each_entry((root), &cgroup_roots, root_list)
 
-/**
- * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
- * @cgrp: the cgroup to be checked for liveness
- *
- * On success, returns true; the mutex should be later unlocked.  On
- * failure returns false with no lock held.
- */
-static bool cgroup_lock_live_group(struct cgroup *cgrp)
-{
-	mutex_lock(&cgroup_mutex);
-	if (cgroup_is_dead(cgrp)) {
-		mutex_unlock(&cgroup_mutex);
-		return false;
-	}
-	return true;
-}
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp)				\
+	list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
+		if (({ lockdep_assert_held(&cgroup_mutex);		\
+		       cgroup_is_dead(child); }))			\
+			;						\
+		else
 
 /* the list of cgroups eligible for automatic release. Protected by
  * release_list_lock */
@@ -360,6 +436,43 @@
 
 static int css_set_count	= 1;	/* 1 for init_css_set */
 
+/**
+ * cgroup_update_populated - updated populated count of a cgroup
+ * @cgrp: the target cgroup
+ * @populated: inc or dec populated count
+ *
+ * @cgrp is either getting the first task (css_set) or losing the last.
+ * Update @cgrp->populated_cnt accordingly.  The count is propagated
+ * towards root so that a given cgroup's populated_cnt is zero iff the
+ * cgroup and all its descendants are empty.
+ *
+ * @cgrp's interface file "cgroup.populated" is zero if
+ * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
+ * changes from or to zero, userland is notified that the content of the
+ * interface file has changed.  This can be used to detect when @cgrp and
+ * its descendants become populated or empty.
+ */
+static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
+{
+	lockdep_assert_held(&css_set_rwsem);
+
+	do {
+		bool trigger;
+
+		if (populated)
+			trigger = !cgrp->populated_cnt++;
+		else
+			trigger = !--cgrp->populated_cnt;
+
+		if (!trigger)
+			break;
+
+		if (cgrp->populated_kn)
+			kernfs_notify(cgrp->populated_kn);
+		cgrp = cgroup_parent(cgrp);
+	} while (cgrp);
+}
+
 /*
  * hash table for cgroup groups. This improves the performance to find
  * an existing css_set. This hash doesn't (currently) take into
@@ -384,6 +497,8 @@
 static void put_css_set_locked(struct css_set *cset, bool taskexit)
 {
 	struct cgrp_cset_link *link, *tmp_link;
+	struct cgroup_subsys *ss;
+	int ssid;
 
 	lockdep_assert_held(&css_set_rwsem);
 
@@ -391,6 +506,8 @@
 		return;
 
 	/* This css_set is dead. unlink it and release cgroup refcounts */
+	for_each_subsys(ss, ssid)
+		list_del(&cset->e_cset_node[ssid]);
 	hash_del(&cset->hlist);
 	css_set_count--;
 
@@ -401,10 +518,13 @@
 		list_del(&link->cgrp_link);
 
 		/* @cgrp can't go away while we're holding css_set_rwsem */
-		if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
+		if (list_empty(&cgrp->cset_links)) {
+			cgroup_update_populated(cgrp, false);
+			if (notify_on_release(cgrp)) {
+				if (taskexit)
+					set_bit(CGRP_RELEASABLE, &cgrp->flags);
+				check_for_release(cgrp);
+			}
 		}
 
 		kfree(link);
@@ -453,20 +573,20 @@
 {
 	struct list_head *l1, *l2;
 
-	if (memcmp(template, cset->subsys, sizeof(cset->subsys))) {
-		/* Not all subsystems matched */
+	/*
+	 * On the default hierarchy, there can be csets which are
+	 * associated with the same set of cgroups but different csses.
+	 * Let's first ensure that csses match.
+	 */
+	if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
 		return false;
-	}
 
 	/*
 	 * Compare cgroup pointers in order to distinguish between
-	 * different cgroups in heirarchies with no subsystems. We
-	 * could get by with just this check alone (and skip the
-	 * memcmp above) but on most setups the memcmp check will
-	 * avoid the need for this more expensive check on almost all
-	 * candidates.
+	 * different cgroups in hierarchies.  As different cgroups may
+	 * share the same effective css, this comparison is always
+	 * necessary.
 	 */
-
 	l1 = &cset->cgrp_links;
 	l2 = &old_cset->cgrp_links;
 	while (1) {
@@ -530,14 +650,17 @@
 	 * won't change, so no need for locking.
 	 */
 	for_each_subsys(ss, i) {
-		if (root->cgrp.subsys_mask & (1UL << i)) {
-			/* Subsystem is in this hierarchy. So we want
-			 * the subsystem state from the new
-			 * cgroup */
-			template[i] = cgroup_css(cgrp, ss);
+		if (root->subsys_mask & (1UL << i)) {
+			/*
+			 * @ss is in this hierarchy, so we want the
+			 * effective css from @cgrp.
+			 */
+			template[i] = cgroup_e_css(cgrp, ss);
 		} else {
-			/* Subsystem is not in this hierarchy, so we
-			 * don't want to change the subsystem state */
+			/*
+			 * @ss is not in this hierarchy, so we don't want
+			 * to change the css.
+			 */
 			template[i] = old_cset->subsys[i];
 		}
 	}
@@ -603,10 +726,18 @@
 	struct cgrp_cset_link *link;
 
 	BUG_ON(list_empty(tmp_links));
+
+	if (cgroup_on_dfl(cgrp))
+		cset->dfl_cgrp = cgrp;
+
 	link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
 	link->cset = cset;
 	link->cgrp = cgrp;
+
+	if (list_empty(&cgrp->cset_links))
+		cgroup_update_populated(cgrp, true);
 	list_move(&link->cset_link, &cgrp->cset_links);
+
 	/*
 	 * Always add links to the tail of the list so that the list
 	 * is sorted by order of hierarchy creation
@@ -629,7 +760,9 @@
 	struct css_set *cset;
 	struct list_head tmp_links;
 	struct cgrp_cset_link *link;
+	struct cgroup_subsys *ss;
 	unsigned long key;
+	int ssid;
 
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -680,10 +813,14 @@
 
 	css_set_count++;
 
-	/* Add this cgroup group to the hash table */
+	/* Add @cset to the hash table */
 	key = css_set_hash(cset->subsys);
 	hash_add(css_set_table, &cset->hlist, key);
 
+	for_each_subsys(ss, ssid)
+		list_add_tail(&cset->e_cset_node[ssid],
+			      &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+
 	up_write(&css_set_rwsem);
 
 	return cset;
@@ -736,14 +873,13 @@
 	struct cgroup *cgrp = &root->cgrp;
 	struct cgrp_cset_link *link, *tmp_link;
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
 	BUG_ON(atomic_read(&root->nr_cgrps));
-	BUG_ON(!list_empty(&cgrp->children));
+	BUG_ON(!list_empty(&cgrp->self.children));
 
 	/* Rebind all subsystems back to the default hierarchy */
-	rebind_subsystems(&cgrp_dfl_root, cgrp->subsys_mask);
+	rebind_subsystems(&cgrp_dfl_root, root->subsys_mask);
 
 	/*
 	 * Release all the links from cset_links to this hierarchy's
@@ -766,7 +902,6 @@
 	cgroup_exit_root_id(root);
 
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 
 	kernfs_destroy_root(root->kf_root);
 	cgroup_free_root(root);
@@ -849,7 +984,7 @@
  * update of a tasks cgroup pointer by cgroup_attach_task()
  */
 
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask);
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -884,79 +1019,95 @@
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write_string ||
-	    cft->trigger)
+	if (cft->write_u64 || cft->write_s64 || cft->write)
 		mode |= S_IWUSR;
 
 	return mode;
 }
 
-static void cgroup_free_fn(struct work_struct *work)
-{
-	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
-
-	atomic_dec(&cgrp->root->nr_cgrps);
-	cgroup_pidlist_destroy_all(cgrp);
-
-	if (cgrp->parent) {
-		/*
-		 * We get a ref to the parent, and put the ref when this
-		 * cgroup is being freed, so it's guaranteed that the
-		 * parent won't be destroyed before its children.
-		 */
-		cgroup_put(cgrp->parent);
-		kernfs_put(cgrp->kn);
-		kfree(cgrp);
-	} else {
-		/*
-		 * This is root cgroup's refcnt reaching zero, which
-		 * indicates that the root should be released.
-		 */
-		cgroup_destroy_root(cgrp->root);
-	}
-}
-
-static void cgroup_free_rcu(struct rcu_head *head)
-{
-	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
-
-	INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
-	queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
-}
-
 static void cgroup_get(struct cgroup *cgrp)
 {
 	WARN_ON_ONCE(cgroup_is_dead(cgrp));
-	WARN_ON_ONCE(atomic_read(&cgrp->refcnt) <= 0);
-	atomic_inc(&cgrp->refcnt);
+	css_get(&cgrp->self);
 }
 
 static void cgroup_put(struct cgroup *cgrp)
 {
-	if (!atomic_dec_and_test(&cgrp->refcnt))
-		return;
-	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
-		return;
+	css_put(&cgrp->self);
+}
+
+/**
+ * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
+ * @kn: the kernfs_node being serviced
+ *
+ * This helper undoes cgroup_kn_lock_live() and should be invoked before
+ * the method finishes if locking succeeded.  Note that once this function
+ * returns the cgroup returned by cgroup_kn_lock_live() may become
+ * inaccessible any time.  If the caller intends to continue to access the
+ * cgroup, it should pin it before invoking this function.
+ */
+static void cgroup_kn_unlock(struct kernfs_node *kn)
+{
+	struct cgroup *cgrp;
+
+	if (kernfs_type(kn) == KERNFS_DIR)
+		cgrp = kn->priv;
+	else
+		cgrp = kn->parent->priv;
+
+	mutex_unlock(&cgroup_mutex);
+
+	kernfs_unbreak_active_protection(kn);
+	cgroup_put(cgrp);
+}
+
+/**
+ * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
+ * @kn: the kernfs_node being serviced
+ *
+ * This helper is to be used by a cgroup kernfs method currently servicing
+ * @kn.  It breaks the active protection, performs cgroup locking and
+ * verifies that the associated cgroup is alive.  Returns the cgroup if
+ * alive; otherwise, %NULL.  A successful return should be undone by a
+ * matching cgroup_kn_unlock() invocation.
+ *
+ * Any cgroup kernfs method implementation which requires locking the
+ * associated cgroup should use this helper.  It avoids nesting cgroup
+ * locking under kernfs active protection and allows all kernfs operations
+ * including self-removal.
+ */
+static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn)
+{
+	struct cgroup *cgrp;
+
+	if (kernfs_type(kn) == KERNFS_DIR)
+		cgrp = kn->priv;
+	else
+		cgrp = kn->parent->priv;
 
 	/*
-	 * XXX: cgrp->id is only used to look up css's.  As cgroup and
-	 * css's lifetimes will be decoupled, it should be made
-	 * per-subsystem and moved to css->id so that lookups are
-	 * successful until the target css is released.
+	 * We're gonna grab cgroup_mutex which nests outside kernfs
+	 * active_ref.  cgroup liveliness check alone provides enough
+	 * protection against removal.  Ensure @cgrp stays accessible and
+	 * break the active_ref protection.
 	 */
-	mutex_lock(&cgroup_mutex);
-	idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-	mutex_unlock(&cgroup_mutex);
-	cgrp->id = -1;
+	cgroup_get(cgrp);
+	kernfs_break_active_protection(kn);
 
-	call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
+	mutex_lock(&cgroup_mutex);
+
+	if (!cgroup_is_dead(cgrp))
+		return cgrp;
+
+	cgroup_kn_unlock(kn);
+	return NULL;
 }
 
 static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 {
 	char name[CGROUP_FILE_NAME_MAX];
 
-	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
 	kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
 }
 
@@ -965,7 +1116,7 @@
  * @cgrp: target cgroup
  * @subsys_mask: mask of the subsystem ids whose files should be removed
  */
-static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i;
@@ -973,40 +1124,40 @@
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 		list_for_each_entry(cfts, &ss->cfts, node)
 			cgroup_addrm_files(cgrp, cfts, false);
 	}
 }
 
-static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask)
+static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 {
 	struct cgroup_subsys *ss;
-	int ssid, ret;
+	unsigned int tmp_ss_mask;
+	int ssid, i, ret;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
 	for_each_subsys(ss, ssid) {
 		if (!(ss_mask & (1 << ssid)))
 			continue;
 
-		/* if @ss is on the dummy_root, we can always move it */
-		if (ss->root == &cgrp_dfl_root)
-			continue;
-
-		/* if @ss has non-root cgroups attached to it, can't move */
-		if (!list_empty(&ss->root->cgrp.children))
+		/* if @ss has non-root csses attached to it, can't move */
+		if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)))
 			return -EBUSY;
 
 		/* can't move between two non-dummy roots either */
-		if (dst_root != &cgrp_dfl_root)
+		if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
 			return -EBUSY;
 	}
 
-	ret = cgroup_populate_dir(&dst_root->cgrp, ss_mask);
+	/* skip creating root files on dfl_root for inhibited subsystems */
+	tmp_ss_mask = ss_mask;
+	if (dst_root == &cgrp_dfl_root)
+		tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask;
+
+	ret = cgroup_populate_dir(&dst_root->cgrp, tmp_ss_mask);
 	if (ret) {
 		if (dst_root != &cgrp_dfl_root)
 			return ret;
@@ -1018,9 +1169,9 @@
 		 * Just warn about it and continue.
 		 */
 		if (cgrp_dfl_root_visible) {
-			pr_warning("cgroup: failed to create files (%d) while rebinding 0x%lx to default root\n",
-				   ret, ss_mask);
-			pr_warning("cgroup: you may retry by moving them to a different hierarchy and unbinding\n");
+			pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n",
+				ret, ss_mask);
+			pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
 		}
 	}
 
@@ -1028,15 +1179,14 @@
 	 * Nothing can fail from this point on.  Remove files for the
 	 * removed subsystems and rebind each subsystem.
 	 */
-	mutex_unlock(&cgroup_mutex);
 	for_each_subsys(ss, ssid)
 		if (ss_mask & (1 << ssid))
 			cgroup_clear_dir(&ss->root->cgrp, 1 << ssid);
-	mutex_lock(&cgroup_mutex);
 
 	for_each_subsys(ss, ssid) {
 		struct cgroup_root *src_root;
 		struct cgroup_subsys_state *css;
+		struct css_set *cset;
 
 		if (!(ss_mask & (1 << ssid)))
 			continue;
@@ -1051,8 +1201,19 @@
 		ss->root = dst_root;
 		css->cgroup = &dst_root->cgrp;
 
-		src_root->cgrp.subsys_mask &= ~(1 << ssid);
-		dst_root->cgrp.subsys_mask |= 1 << ssid;
+		down_write(&css_set_rwsem);
+		hash_for_each(css_set_table, i, cset, hlist)
+			list_move_tail(&cset->e_cset_node[ss->id],
+				       &dst_root->cgrp.e_csets[ss->id]);
+		up_write(&css_set_rwsem);
+
+		src_root->subsys_mask &= ~(1 << ssid);
+		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+
+		/* default hierarchy doesn't enable controllers by default */
+		dst_root->subsys_mask |= 1 << ssid;
+		if (dst_root != &cgrp_dfl_root)
+			dst_root->cgrp.child_subsys_mask |= 1 << ssid;
 
 		if (ss->bind)
 			ss->bind(css);
@@ -1070,7 +1231,7 @@
 	int ssid;
 
 	for_each_subsys(ss, ssid)
-		if (root->cgrp.subsys_mask & (1 << ssid))
+		if (root->subsys_mask & (1 << ssid))
 			seq_printf(seq, ",%s", ss->name);
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
 		seq_puts(seq, ",sane_behavior");
@@ -1092,8 +1253,8 @@
 }
 
 struct cgroup_sb_opts {
-	unsigned long subsys_mask;
-	unsigned long flags;
+	unsigned int subsys_mask;
+	unsigned int flags;
 	char *release_agent;
 	bool cpuset_clone_children;
 	char *name;
@@ -1101,24 +1262,16 @@
 	bool none;
 };
 
-/*
- * Convert a hierarchy specifier into a bitmask of subsystems and
- * flags. Call with cgroup_mutex held to protect the cgroup_subsys[]
- * array. This function takes refcounts on subsystems to be used, unless it
- * returns error, in which case no refcounts are taken.
- */
 static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
-	unsigned long mask = (unsigned long)-1;
+	unsigned int mask = -1U;
 	struct cgroup_subsys *ss;
 	int i;
 
-	BUG_ON(!mutex_is_locked(&cgroup_mutex));
-
 #ifdef CONFIG_CPUSETS
-	mask = ~(1UL << cpuset_cgrp_id);
+	mask = ~(1U << cpuset_cgrp_id);
 #endif
 
 	memset(opts, 0, sizeof(*opts));
@@ -1199,7 +1352,7 @@
 			/* Mutually exclusive option 'all' + subsystem name */
 			if (all_ss)
 				return -EINVAL;
-			set_bit(i, &opts->subsys_mask);
+			opts->subsys_mask |= (1 << i);
 			one_ss = true;
 
 			break;
@@ -1211,12 +1364,12 @@
 	/* Consistency checks */
 
 	if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
-		pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
+		pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
 
 		if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) ||
 		    opts->cpuset_clone_children || opts->release_agent ||
 		    opts->name) {
-			pr_err("cgroup: sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
+			pr_err("sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
 			return -EINVAL;
 		}
 	} else {
@@ -1228,7 +1381,7 @@
 		if (all_ss || (!one_ss && !opts->none && !opts->name))
 			for_each_subsys(ss, i)
 				if (!ss->disabled)
-					set_bit(i, &opts->subsys_mask);
+					opts->subsys_mask |= (1 << i);
 
 		/*
 		 * We either have to specify by name or by subsystems. (So
@@ -1259,14 +1412,13 @@
 	int ret = 0;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_sb_opts opts;
-	unsigned long added_mask, removed_mask;
+	unsigned int added_mask, removed_mask;
 
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
-		pr_err("cgroup: sane_behavior: remount is not allowed\n");
+		pr_err("sane_behavior: remount is not allowed\n");
 		return -EINVAL;
 	}
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
 	/* See what subsystems are wanted */
@@ -1274,17 +1426,17 @@
 	if (ret)
 		goto out_unlock;
 
-	if (opts.subsys_mask != root->cgrp.subsys_mask || opts.release_agent)
-		pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
-			   task_tgid_nr(current), current->comm);
+	if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
+		pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
+			task_tgid_nr(current), current->comm);
 
-	added_mask = opts.subsys_mask & ~root->cgrp.subsys_mask;
-	removed_mask = root->cgrp.subsys_mask & ~opts.subsys_mask;
+	added_mask = opts.subsys_mask & ~root->subsys_mask;
+	removed_mask = root->subsys_mask & ~opts.subsys_mask;
 
 	/* Don't allow flags or name to change at remount */
 	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
 	    (opts.name && strcmp(opts.name, root->name))) {
-		pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
+		pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
 		       opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
 		       root->flags & CGRP_ROOT_OPTION_MASK, root->name);
 		ret = -EINVAL;
@@ -1292,7 +1444,7 @@
 	}
 
 	/* remounting is not allowed for populated hierarchies */
-	if (!list_empty(&root->cgrp.children)) {
+	if (!list_empty(&root->cgrp.self.children)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -1312,7 +1464,6 @@
 	kfree(opts.release_agent);
 	kfree(opts.name);
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 	return ret;
 }
 
@@ -1370,14 +1521,22 @@
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
 {
-	atomic_set(&cgrp->refcnt, 1);
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
+	struct cgroup_subsys *ss;
+	int ssid;
+
+	INIT_LIST_HEAD(&cgrp->self.sibling);
+	INIT_LIST_HEAD(&cgrp->self.children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
-	cgrp->dummy_css.cgroup = cgrp;
+	cgrp->self.cgroup = cgrp;
+	cgrp->self.flags |= CSS_ONLINE;
+
+	for_each_subsys(ss, ssid)
+		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
+
+	init_waitqueue_head(&cgrp->offline_waitq);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -1400,21 +1559,24 @@
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
-static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
+static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 {
 	LIST_HEAD(tmp_links);
 	struct cgroup *root_cgrp = &root->cgrp;
 	struct css_set *cset;
 	int i, ret;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
-	ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
+	ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_NOWAIT);
 	if (ret < 0)
 		goto out;
 	root_cgrp->id = ret;
 
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out;
+
 	/*
 	 * We're accessing css_set_count without locking css_set_rwsem here,
 	 * but that's OK - it can only be increased by someone holding
@@ -1423,11 +1585,11 @@
 	 */
 	ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	ret = cgroup_init_root_id(root);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
 					   KERNFS_ROOT_CREATE_DEACTIVATED,
@@ -1463,7 +1625,7 @@
 		link_css_set(&tmp_links, cset, root_cgrp);
 	up_write(&css_set_rwsem);
 
-	BUG_ON(!list_empty(&root_cgrp->children));
+	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
 
 	kernfs_activate(root_cgrp->kn);
@@ -1475,6 +1637,8 @@
 	root->kf_root = NULL;
 exit_root_id:
 	cgroup_exit_root_id(root);
+cancel_ref:
+	percpu_ref_cancel_init(&root_cgrp->self.refcnt);
 out:
 	free_cgrp_cset_links(&tmp_links);
 	return ret;
@@ -1497,14 +1661,13 @@
 	if (!use_task_css_set_links)
 		cgroup_enable_task_cg_lists();
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
 	/* First find the desired set of subsystems */
 	ret = parse_cgroupfs_options(data, &opts);
 	if (ret)
 		goto out_unlock;
-retry:
+
 	/* look for a matching existing root */
 	if (!opts.subsys_mask && !opts.none && !opts.name) {
 		cgrp_dfl_root_visible = true;
@@ -1536,7 +1699,7 @@
 		 * subsystems) then they must match.
 		 */
 		if ((opts.subsys_mask || opts.none) &&
-		    (opts.subsys_mask != root->cgrp.subsys_mask)) {
+		    (opts.subsys_mask != root->subsys_mask)) {
 			if (!name_match)
 				continue;
 			ret = -EBUSY;
@@ -1545,28 +1708,27 @@
 
 		if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
 			if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
-				pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
+				pr_err("sane_behavior: new mount options should match the existing superblock\n");
 				ret = -EINVAL;
 				goto out_unlock;
 			} else {
-				pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
+				pr_warn("new mount options do not match the existing superblock, will be ignored\n");
 			}
 		}
 
 		/*
-		 * A root's lifetime is governed by its root cgroup.  Zero
-		 * ref indicate that the root is being destroyed.  Wait for
-		 * destruction to complete so that the subsystems are free.
-		 * We can use wait_queue for the wait but this path is
-		 * super cold.  Let's just sleep for a bit and retry.
+		 * A root's lifetime is governed by its root cgroup.
+		 * tryget_live failure indicate that the root is being
+		 * destroyed.  Wait for destruction to complete so that the
+		 * subsystems are free.  We can use wait_queue for the wait
+		 * but this path is super cold.  Let's just sleep for a bit
+		 * and retry.
 		 */
-		if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
+		if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
-			mutex_unlock(&cgroup_tree_mutex);
 			msleep(10);
-			mutex_lock(&cgroup_tree_mutex);
-			mutex_lock(&cgroup_mutex);
-			goto retry;
+			ret = restart_syscall();
+			goto out_free;
 		}
 
 		ret = 0;
@@ -1597,8 +1759,7 @@
 
 out_unlock:
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
-
+out_free:
 	kfree(opts.release_agent);
 	kfree(opts.name);
 
@@ -1617,7 +1778,19 @@
 	struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 
-	cgroup_put(&root->cgrp);
+	/*
+	 * If @root doesn't have any mounts or children, start killing it.
+	 * This prevents new mounts by disabling percpu_ref_tryget_live().
+	 * cgroup_mount() may wait for @root's release.
+	 *
+	 * And don't kill the default root.
+	 */
+	if (css_has_online_children(&root->cgrp.self) ||
+	    root == &cgrp_dfl_root)
+		cgroup_put(&root->cgrp);
+	else
+		percpu_ref_kill(&root->cgrp.self.refcnt);
+
 	kernfs_kill_sb(sb);
 }
 
@@ -1739,7 +1912,7 @@
 
 /**
  * cgroup_task_migrate - move a task from one cgroup to another.
- * @old_cgrp; the cgroup @tsk is being migrated from
+ * @old_cgrp: the cgroup @tsk is being migrated from
  * @tsk: the task being migrated
  * @new_cset: the new css_set @tsk is being attached to
  *
@@ -1831,10 +2004,6 @@
 
 	src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
 
-	/* nothing to do if this cset already belongs to the cgroup */
-	if (src_cgrp == dst_cgrp)
-		return;
-
 	if (!list_empty(&src_cset->mg_preload_node))
 		return;
 
@@ -1849,13 +2018,14 @@
 
 /**
  * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
- * @dst_cgrp: the destination cgroup
+ * @dst_cgrp: the destination cgroup (may be %NULL)
  * @preloaded_csets: list of preloaded source css_sets
  *
  * Tasks are about to be moved to @dst_cgrp and all the source css_sets
  * have been preloaded to @preloaded_csets.  This function looks up and
- * pins all destination css_sets, links each to its source, and put them on
- * @preloaded_csets.
+ * pins all destination css_sets, links each to its source, and append them
+ * to @preloaded_csets.  If @dst_cgrp is %NULL, the destination of each
+ * source css_set is assumed to be its cgroup on the default hierarchy.
  *
  * This function must be called after cgroup_migrate_add_src() has been
  * called on each migration source css_set.  After migration is performed
@@ -1866,19 +2036,42 @@
 				      struct list_head *preloaded_csets)
 {
 	LIST_HEAD(csets);
-	struct css_set *src_cset;
+	struct css_set *src_cset, *tmp_cset;
 
 	lockdep_assert_held(&cgroup_mutex);
 
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
+	    dst_cgrp->child_subsys_mask)
+		return -EBUSY;
+
 	/* look up the dst cset for each src cset and link it to src */
-	list_for_each_entry(src_cset, preloaded_csets, mg_preload_node) {
+	list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		struct css_set *dst_cset;
 
-		dst_cset = find_css_set(src_cset, dst_cgrp);
+		dst_cset = find_css_set(src_cset,
+					dst_cgrp ?: src_cset->dfl_cgrp);
 		if (!dst_cset)
 			goto err;
 
 		WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
+
+		/*
+		 * If src cset equals dst, it's noop.  Drop the src.
+		 * cgroup_migrate() will skip the cset too.  Note that we
+		 * can't handle src == dst as some nodes are used by both.
+		 */
+		if (src_cset == dst_cset) {
+			src_cset->mg_src_cgrp = NULL;
+			list_del_init(&src_cset->mg_preload_node);
+			put_css_set(src_cset, false);
+			put_css_set(dst_cset, false);
+			continue;
+		}
+
 		src_cset->mg_dst_cset = dst_cset;
 
 		if (list_empty(&dst_cset->mg_preload_node))
@@ -1887,7 +2080,7 @@
 			put_css_set(dst_cset, false);
 	}
 
-	list_splice(&csets, preloaded_csets);
+	list_splice_tail(&csets, preloaded_csets);
 	return 0;
 err:
 	cgroup_migrate_finish(&csets);
@@ -1968,7 +2161,7 @@
 		return 0;
 
 	/* check that we can legitimately attach to the cgroup */
-	for_each_css(css, i, cgrp) {
+	for_each_e_css(css, i, cgrp) {
 		if (css->ss->can_attach) {
 			ret = css->ss->can_attach(css, &tset);
 			if (ret) {
@@ -1998,7 +2191,7 @@
 	 */
 	tset.csets = &tset.dst_csets;
 
-	for_each_css(css, i, cgrp)
+	for_each_e_css(css, i, cgrp)
 		if (css->ss->attach)
 			css->ss->attach(css, &tset);
 
@@ -2006,7 +2199,7 @@
 	goto out_release_tset;
 
 out_cancel_attach:
-	for_each_css(css, i, cgrp) {
+	for_each_e_css(css, i, cgrp) {
 		if (css == failed_css)
 			break;
 		if (css->ss->cancel_attach)
@@ -2065,13 +2258,20 @@
  * function to attach either it or all tasks in its threadgroup. Will lock
  * cgroup_mutex and threadgroup.
  */
-static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
+static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+				    size_t nbytes, loff_t off, bool threadgroup)
 {
 	struct task_struct *tsk;
 	const struct cred *cred = current_cred(), *tcred;
+	struct cgroup *cgrp;
+	pid_t pid;
 	int ret;
 
-	if (!cgroup_lock_live_group(cgrp))
+	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+		return -EINVAL;
+
+	cgrp = cgroup_kn_lock_live(of->kn);
+	if (!cgrp)
 		return -ENODEV;
 
 retry_find_task:
@@ -2137,8 +2337,8 @@
 
 	put_task_struct(tsk);
 out_unlock_cgroup:
-	mutex_unlock(&cgroup_mutex);
-	return ret;
+	cgroup_kn_unlock(of->kn);
+	return ret ?: nbytes;
 }
 
 /**
@@ -2172,43 +2372,44 @@
 }
 EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
-static int cgroup_tasks_write(struct cgroup_subsys_state *css,
-			      struct cftype *cft, u64 pid)
+static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
+				  char *buf, size_t nbytes, loff_t off)
 {
-	return attach_task_by_pid(css->cgroup, pid, false);
+	return __cgroup_procs_write(of, buf, nbytes, off, false);
 }
 
-static int cgroup_procs_write(struct cgroup_subsys_state *css,
-			      struct cftype *cft, u64 tgid)
+static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+				  char *buf, size_t nbytes, loff_t off)
 {
-	return attach_task_by_pid(css->cgroup, tgid, true);
+	return __cgroup_procs_write(of, buf, nbytes, off, true);
 }
 
-static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
-				      struct cftype *cft, char *buffer)
+static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes, loff_t off)
 {
-	struct cgroup_root *root = css->cgroup->root;
+	struct cgroup *cgrp;
 
-	BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX);
-	if (!cgroup_lock_live_group(css->cgroup))
+	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+
+	cgrp = cgroup_kn_lock_live(of->kn);
+	if (!cgrp)
 		return -ENODEV;
 	spin_lock(&release_agent_path_lock);
-	strlcpy(root->release_agent_path, buffer,
-		sizeof(root->release_agent_path));
+	strlcpy(cgrp->root->release_agent_path, strstrip(buf),
+		sizeof(cgrp->root->release_agent_path));
 	spin_unlock(&release_agent_path_lock);
-	mutex_unlock(&cgroup_mutex);
-	return 0;
+	cgroup_kn_unlock(of->kn);
+	return nbytes;
 }
 
 static int cgroup_release_agent_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	if (!cgroup_lock_live_group(cgrp))
-		return -ENODEV;
+	spin_lock(&release_agent_path_lock);
 	seq_puts(seq, cgrp->root->release_agent_path);
+	spin_unlock(&release_agent_path_lock);
 	seq_putc(seq, '\n');
-	mutex_unlock(&cgroup_mutex);
 	return 0;
 }
 
@@ -2220,6 +2421,320 @@
 	return 0;
 }
 
+static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
+{
+	struct cgroup_subsys *ss;
+	bool printed = false;
+	int ssid;
+
+	for_each_subsys(ss, ssid) {
+		if (ss_mask & (1 << ssid)) {
+			if (printed)
+				seq_putc(seq, ' ');
+			seq_printf(seq, "%s", ss->name);
+			printed = true;
+		}
+	}
+	if (printed)
+		seq_putc(seq, '\n');
+}
+
+/* show controllers which are currently attached to the default hierarchy */
+static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
+			     ~cgrp_dfl_root_inhibit_ss_mask);
+	return 0;
+}
+
+/* show controllers which are enabled from the parent */
+static int cgroup_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
+	return 0;
+}
+
+/* show controllers which are enabled for a given cgroup's children */
+static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+	return 0;
+}
+
+/**
+ * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
+ * @cgrp: root of the subtree to update csses for
+ *
+ * @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
+ * css associations need to be updated accordingly.  This function looks up
+ * all css_sets which are attached to the subtree, creates the matching
+ * updated css_sets and migrates the tasks to the new ones.
+ */
+static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+{
+	LIST_HEAD(preloaded_csets);
+	struct cgroup_subsys_state *css;
+	struct css_set *src_cset;
+	int ret;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	/* look up all csses currently attached to @cgrp's subtree */
+	down_read(&css_set_rwsem);
+	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
+		struct cgrp_cset_link *link;
+
+		/* self is not affected by child_subsys_mask change */
+		if (css->cgroup == cgrp)
+			continue;
+
+		list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
+			cgroup_migrate_add_src(link->cset, cgrp,
+					       &preloaded_csets);
+	}
+	up_read(&css_set_rwsem);
+
+	/* NULL dst indicates self on default hierarchy */
+	ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
+	if (ret)
+		goto out_finish;
+
+	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
+		struct task_struct *last_task = NULL, *task;
+
+		/* src_csets precede dst_csets, break on the first dst_cset */
+		if (!src_cset->mg_src_cgrp)
+			break;
+
+		/*
+		 * All tasks in src_cset need to be migrated to the
+		 * matching dst_cset.  Empty it process by process.  We
+		 * walk tasks but migrate processes.  The leader might even
+		 * belong to a different cset but such src_cset would also
+		 * be among the target src_csets because the default
+		 * hierarchy enforces per-process membership.
+		 */
+		while (true) {
+			down_read(&css_set_rwsem);
+			task = list_first_entry_or_null(&src_cset->tasks,
+						struct task_struct, cg_list);
+			if (task) {
+				task = task->group_leader;
+				WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
+				get_task_struct(task);
+			}
+			up_read(&css_set_rwsem);
+
+			if (!task)
+				break;
+
+			/* guard against possible infinite loop */
+			if (WARN(last_task == task,
+				 "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
+				goto out_finish;
+			last_task = task;
+
+			threadgroup_lock(task);
+			/* raced against de_thread() from another thread? */
+			if (!thread_group_leader(task)) {
+				threadgroup_unlock(task);
+				put_task_struct(task);
+				continue;
+			}
+
+			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
+
+			threadgroup_unlock(task);
+			put_task_struct(task);
+
+			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
+				goto out_finish;
+		}
+	}
+
+out_finish:
+	cgroup_migrate_finish(&preloaded_csets);
+	return ret;
+}
+
+/* change the enabled child controllers for a cgroup in the default hierarchy */
+static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
+{
+	unsigned int enable = 0, disable = 0;
+	struct cgroup *cgrp, *child;
+	struct cgroup_subsys *ss;
+	char *tok;
+	int ssid, ret;
+
+	/*
+	 * Parse input - space separated list of subsystem names prefixed
+	 * with either + or -.
+	 */
+	buf = strstrip(buf);
+	while ((tok = strsep(&buf, " "))) {
+		if (tok[0] == '\0')
+			continue;
+		for_each_subsys(ss, ssid) {
+			if (ss->disabled || strcmp(tok + 1, ss->name) ||
+			    ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
+				continue;
+
+			if (*tok == '+') {
+				enable |= 1 << ssid;
+				disable &= ~(1 << ssid);
+			} else if (*tok == '-') {
+				disable |= 1 << ssid;
+				enable &= ~(1 << ssid);
+			} else {
+				return -EINVAL;
+			}
+			break;
+		}
+		if (ssid == CGROUP_SUBSYS_COUNT)
+			return -EINVAL;
+	}
+
+	cgrp = cgroup_kn_lock_live(of->kn);
+	if (!cgrp)
+		return -ENODEV;
+
+	for_each_subsys(ss, ssid) {
+		if (enable & (1 << ssid)) {
+			if (cgrp->child_subsys_mask & (1 << ssid)) {
+				enable &= ~(1 << ssid);
+				continue;
+			}
+
+			/*
+			 * Because css offlining is asynchronous, userland
+			 * might try to re-enable the same controller while
+			 * the previous instance is still around.  In such
+			 * cases, wait till it's gone using offline_waitq.
+			 */
+			cgroup_for_each_live_child(child, cgrp) {
+				DEFINE_WAIT(wait);
+
+				if (!cgroup_css(child, ss))
+					continue;
+
+				cgroup_get(child);
+				prepare_to_wait(&child->offline_waitq, &wait,
+						TASK_UNINTERRUPTIBLE);
+				cgroup_kn_unlock(of->kn);
+				schedule();
+				finish_wait(&child->offline_waitq, &wait);
+				cgroup_put(child);
+
+				return restart_syscall();
+			}
+
+			/* unavailable or not enabled on the parent? */
+			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
+			    (cgroup_parent(cgrp) &&
+			     !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
+				ret = -ENOENT;
+				goto out_unlock;
+			}
+		} else if (disable & (1 << ssid)) {
+			if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+				disable &= ~(1 << ssid);
+				continue;
+			}
+
+			/* a child has it enabled? */
+			cgroup_for_each_live_child(child, cgrp) {
+				if (child->child_subsys_mask & (1 << ssid)) {
+					ret = -EBUSY;
+					goto out_unlock;
+				}
+			}
+		}
+	}
+
+	if (!enable && !disable) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/*
+	 * Create csses for enables and update child_subsys_mask.  This
+	 * changes cgroup_e_css() results which in turn makes the
+	 * subsequent cgroup_update_dfl_csses() associate all tasks in the
+	 * subtree to the updated csses.
+	 */
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			ret = create_css(child, ss);
+			if (ret)
+				goto err_undo_css;
+		}
+	}
+
+	cgrp->child_subsys_mask |= enable;
+	cgrp->child_subsys_mask &= ~disable;
+
+	ret = cgroup_update_dfl_csses(cgrp);
+	if (ret)
+		goto err_undo_css;
+
+	/* all tasks are now migrated away from the old csses, kill them */
+	for_each_subsys(ss, ssid) {
+		if (!(disable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp)
+			kill_css(cgroup_css(child, ss));
+	}
+
+	kernfs_activate(cgrp->kn);
+	ret = 0;
+out_unlock:
+	cgroup_kn_unlock(of->kn);
+	return ret ?: nbytes;
+
+err_undo_css:
+	cgrp->child_subsys_mask &= ~enable;
+	cgrp->child_subsys_mask |= disable;
+
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			struct cgroup_subsys_state *css = cgroup_css(child, ss);
+			if (css)
+				kill_css(css);
+		}
+	}
+	goto out_unlock;
+}
+
+static int cgroup_populated_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
+	return 0;
+}
+
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
@@ -2228,6 +2743,9 @@
 	struct cgroup_subsys_state *css;
 	int ret;
 
+	if (cft->write)
+		return cft->write(of, buf, nbytes, off);
+
 	/*
 	 * kernfs guarantees that a file isn't deleted with operations in
 	 * flight, which means that the matching css is and stays alive and
@@ -2238,9 +2756,7 @@
 	css = cgroup_css(cgrp, cft->ss);
 	rcu_read_unlock();
 
-	if (cft->write_string) {
-		ret = cft->write_string(css, cft, strstrip(buf));
-	} else if (cft->write_u64) {
+	if (cft->write_u64) {
 		unsigned long long v;
 		ret = kstrtoull(buf, 0, &v);
 		if (!ret)
@@ -2250,8 +2766,6 @@
 		ret = kstrtoll(buf, 0, &v);
 		if (!ret)
 			ret = cft->write_s64(css, cft, v);
-	} else if (cft->trigger) {
-		ret = cft->trigger(css, (unsigned int)cft->private);
 	} else {
 		ret = -EINVAL;
 	}
@@ -2328,20 +2842,18 @@
 		return -EPERM;
 
 	/*
-	 * We're gonna grab cgroup_tree_mutex which nests outside kernfs
+	 * We're gonna grab cgroup_mutex which nests outside kernfs
 	 * active_ref.  kernfs_rename() doesn't require active_ref
-	 * protection.  Break them before grabbing cgroup_tree_mutex.
+	 * protection.  Break them before grabbing cgroup_mutex.
 	 */
 	kernfs_break_active_protection(new_parent);
 	kernfs_break_active_protection(kn);
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
 	ret = kernfs_rename(kn, new_parent, new_name_str);
 
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 
 	kernfs_unbreak_active_protection(kn);
 	kernfs_unbreak_active_protection(new_parent);
@@ -2379,9 +2891,14 @@
 		return PTR_ERR(kn);
 
 	ret = cgroup_kn_set_ugid(kn);
-	if (ret)
+	if (ret) {
 		kernfs_remove(kn);
-	return ret;
+		return ret;
+	}
+
+	if (cft->seq_show == cgroup_populated_show)
+		cgrp->populated_kn = kn;
+	return 0;
 }
 
 /**
@@ -2401,7 +2918,7 @@
 	struct cftype *cft;
 	int ret;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
 
 	for (cft = cfts; cft->name[0] != '\0'; cft++) {
 		/* does cft->flags tell us to skip this file on @cgrp? */
@@ -2409,16 +2926,16 @@
 			continue;
 		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
+		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
+		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
 			continue;
 
 		if (is_add) {
 			ret = cgroup_add_file(cgrp, cft);
 			if (ret) {
-				pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
-					cft->name, ret);
+				pr_warn("%s: failed to add %s, err=%d\n",
+					__func__, cft->name, ret);
 				return ret;
 			}
 		} else {
@@ -2436,11 +2953,7 @@
 	struct cgroup_subsys_state *css;
 	int ret = 0;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
-
-	/* don't bother if @ss isn't attached */
-	if (ss->root == &cgrp_dfl_root)
-		return 0;
+	lockdep_assert_held(&cgroup_mutex);
 
 	/* add/rm files for all cgroups created before */
 	css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
@@ -2508,7 +3021,7 @@
 
 static int cgroup_rm_cftypes_locked(struct cftype *cfts)
 {
-	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
 
 	if (!cfts || !cfts[0].ss)
 		return -ENOENT;
@@ -2534,9 +3047,9 @@
 {
 	int ret;
 
-	mutex_lock(&cgroup_tree_mutex);
+	mutex_lock(&cgroup_mutex);
 	ret = cgroup_rm_cftypes_locked(cfts);
-	mutex_unlock(&cgroup_tree_mutex);
+	mutex_unlock(&cgroup_mutex);
 	return ret;
 }
 
@@ -2558,6 +3071,9 @@
 {
 	int ret;
 
+	if (ss->disabled)
+		return 0;
+
 	if (!cfts || cfts[0].name[0] == '\0')
 		return 0;
 
@@ -2565,14 +3081,14 @@
 	if (ret)
 		return ret;
 
-	mutex_lock(&cgroup_tree_mutex);
+	mutex_lock(&cgroup_mutex);
 
 	list_add_tail(&cfts->node, &ss->cfts);
 	ret = cgroup_apply_cftypes(cfts, true);
 	if (ret)
 		cgroup_rm_cftypes_locked(cfts);
 
-	mutex_unlock(&cgroup_tree_mutex);
+	mutex_unlock(&cgroup_mutex);
 	return ret;
 }
 
@@ -2596,57 +3112,65 @@
 
 /**
  * css_next_child - find the next child of a given css
- * @pos_css: the current position (%NULL to initiate traversal)
- * @parent_css: css whose children to walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @parent: css whose children to walk
  *
- * This function returns the next child of @parent_css and should be called
+ * This function returns the next child of @parent and should be called
  * under either cgroup_mutex or RCU read lock.  The only requirement is
- * that @parent_css and @pos_css are accessible.  The next sibling is
- * guaranteed to be returned regardless of their states.
+ * that @parent and @pos are accessible.  The next sibling is guaranteed to
+ * be returned regardless of their states.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
-struct cgroup_subsys_state *
-css_next_child(struct cgroup_subsys_state *pos_css,
-	       struct cgroup_subsys_state *parent_css)
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+					   struct cgroup_subsys_state *parent)
 {
-	struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
-	struct cgroup *cgrp = parent_css->cgroup;
-	struct cgroup *next;
+	struct cgroup_subsys_state *next;
 
-	cgroup_assert_mutexes_or_rcu_locked();
+	cgroup_assert_mutex_or_rcu_locked();
 
 	/*
-	 * @pos could already have been removed.  Once a cgroup is removed,
-	 * its ->sibling.next is no longer updated when its next sibling
-	 * changes.  As CGRP_DEAD assertion is serialized and happens
-	 * before the cgroup is taken off the ->sibling list, if we see it
-	 * unasserted, it's guaranteed that the next sibling hasn't
-	 * finished its grace period even if it's already removed, and thus
-	 * safe to dereference from this RCU critical section.  If
-	 * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
-	 * to be visible as %true here.
+	 * @pos could already have been unlinked from the sibling list.
+	 * Once a cgroup is removed, its ->sibling.next is no longer
+	 * updated when its next sibling changes.  CSS_RELEASED is set when
+	 * @pos is taken off list, at which time its next pointer is valid,
+	 * and, as releases are serialized, the one pointed to by the next
+	 * pointer is guaranteed to not have started release yet.  This
+	 * implies that if we observe !CSS_RELEASED on @pos in this RCU
+	 * critical section, the one pointed to by its next pointer is
+	 * guaranteed to not have finished its RCU grace period even if we
+	 * have dropped rcu_read_lock() inbetween iterations.
 	 *
-	 * If @pos is dead, its next pointer can't be dereferenced;
-	 * however, as each cgroup is given a monotonically increasing
-	 * unique serial number and always appended to the sibling list,
-	 * the next one can be found by walking the parent's children until
-	 * we see a cgroup with higher serial number than @pos's.  While
-	 * this path can be slower, it's taken only when either the current
-	 * cgroup is removed or iteration and removal race.
+	 * If @pos has CSS_RELEASED set, its next pointer can't be
+	 * dereferenced; however, as each css is given a monotonically
+	 * increasing unique serial number and always appended to the
+	 * sibling list, the next one can be found by walking the parent's
+	 * children until the first css with higher serial number than
+	 * @pos's.  While this path can be slower, it happens iff iteration
+	 * races against release and the race window is very small.
 	 */
 	if (!pos) {
-		next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
-	} else if (likely(!cgroup_is_dead(pos))) {
-		next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+		next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
+	} else if (likely(!(pos->flags & CSS_RELEASED))) {
+		next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
 	} else {
-		list_for_each_entry_rcu(next, &cgrp->children, sibling)
+		list_for_each_entry_rcu(next, &parent->children, sibling)
 			if (next->serial_nr > pos->serial_nr)
 				break;
 	}
 
-	if (&next->sibling == &cgrp->children)
-		return NULL;
-
-	return cgroup_css(next, parent_css->ss);
+	/*
+	 * @next, if not pointing to the head, can be dereferenced and is
+	 * the next sibling.
+	 */
+	if (&next->sibling != &parent->children)
+		return next;
+	return NULL;
 }
 
 /**
@@ -2662,6 +3186,13 @@
  * doesn't require the whole traversal to be contained in a single critical
  * section.  This function will return the correct next descendant as long
  * as both @pos and @root are accessible and @pos is a descendant of @root.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_pre(struct cgroup_subsys_state *pos,
@@ -2669,7 +3200,7 @@
 {
 	struct cgroup_subsys_state *next;
 
-	cgroup_assert_mutexes_or_rcu_locked();
+	cgroup_assert_mutex_or_rcu_locked();
 
 	/* if first iteration, visit @root */
 	if (!pos)
@@ -2682,10 +3213,10 @@
 
 	/* no child, visit my or the closest ancestor's next sibling */
 	while (pos != root) {
-		next = css_next_child(pos, css_parent(pos));
+		next = css_next_child(pos, pos->parent);
 		if (next)
 			return next;
-		pos = css_parent(pos);
+		pos = pos->parent;
 	}
 
 	return NULL;
@@ -2709,7 +3240,7 @@
 {
 	struct cgroup_subsys_state *last, *tmp;
 
-	cgroup_assert_mutexes_or_rcu_locked();
+	cgroup_assert_mutex_or_rcu_locked();
 
 	do {
 		last = pos;
@@ -2749,6 +3280,13 @@
  * section.  This function will return the correct next descendant as long
  * as both @pos and @cgroup are accessible and @pos is a descendant of
  * @cgroup.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_post(struct cgroup_subsys_state *pos,
@@ -2756,7 +3294,7 @@
 {
 	struct cgroup_subsys_state *next;
 
-	cgroup_assert_mutexes_or_rcu_locked();
+	cgroup_assert_mutex_or_rcu_locked();
 
 	/* if first iteration, visit leftmost descendant which may be @root */
 	if (!pos)
@@ -2767,12 +3305,36 @@
 		return NULL;
 
 	/* if there's an unvisited sibling, visit its leftmost descendant */
-	next = css_next_child(pos, css_parent(pos));
+	next = css_next_child(pos, pos->parent);
 	if (next)
 		return css_leftmost_descendant(next);
 
 	/* no sibling left, visit parent */
-	return css_parent(pos);
+	return pos->parent;
+}
+
+/**
+ * css_has_online_children - does a css have online children
+ * @css: the target css
+ *
+ * Returns %true if @css has any online children; otherwise, %false.  This
+ * function can be called from any context but the caller is responsible
+ * for synchronizing against on/offlining as necessary.
+ */
+bool css_has_online_children(struct cgroup_subsys_state *css)
+{
+	struct cgroup_subsys_state *child;
+	bool ret = false;
+
+	rcu_read_lock();
+	css_for_each_child(child, css) {
+		if (css->flags & CSS_ONLINE) {
+			ret = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
 }
 
 /**
@@ -2783,27 +3345,36 @@
  */
 static void css_advance_task_iter(struct css_task_iter *it)
 {
-	struct list_head *l = it->cset_link;
+	struct list_head *l = it->cset_pos;
 	struct cgrp_cset_link *link;
 	struct css_set *cset;
 
 	/* Advance to the next non-empty css_set */
 	do {
 		l = l->next;
-		if (l == &it->origin_css->cgroup->cset_links) {
-			it->cset_link = NULL;
+		if (l == it->cset_head) {
+			it->cset_pos = NULL;
 			return;
 		}
-		link = list_entry(l, struct cgrp_cset_link, cset_link);
-		cset = link->cset;
+
+		if (it->ss) {
+			cset = container_of(l, struct css_set,
+					    e_cset_node[it->ss->id]);
+		} else {
+			link = list_entry(l, struct cgrp_cset_link, cset_link);
+			cset = link->cset;
+		}
 	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
 
-	it->cset_link = l;
+	it->cset_pos = l;
 
 	if (!list_empty(&cset->tasks))
-		it->task = cset->tasks.next;
+		it->task_pos = cset->tasks.next;
 	else
-		it->task = cset->mg_tasks.next;
+		it->task_pos = cset->mg_tasks.next;
+
+	it->tasks_head = &cset->tasks;
+	it->mg_tasks_head = &cset->mg_tasks;
 }
 
 /**
@@ -2829,8 +3400,14 @@
 
 	down_read(&css_set_rwsem);
 
-	it->origin_css = css;
-	it->cset_link = &css->cgroup->cset_links;
+	it->ss = css->ss;
+
+	if (it->ss)
+		it->cset_pos = &css->cgroup->e_csets[css->ss->id];
+	else
+		it->cset_pos = &css->cgroup->cset_links;
+
+	it->cset_head = it->cset_pos;
 
 	css_advance_task_iter(it);
 }
@@ -2846,12 +3423,10 @@
 struct task_struct *css_task_iter_next(struct css_task_iter *it)
 {
 	struct task_struct *res;
-	struct list_head *l = it->task;
-	struct cgrp_cset_link *link = list_entry(it->cset_link,
-					struct cgrp_cset_link, cset_link);
+	struct list_head *l = it->task_pos;
 
 	/* If the iterator cg is NULL, we have no tasks */
-	if (!it->cset_link)
+	if (!it->cset_pos)
 		return NULL;
 	res = list_entry(l, struct task_struct, cg_list);
 
@@ -2862,13 +3437,13 @@
 	 */
 	l = l->next;
 
-	if (l == &link->cset->tasks)
-		l = link->cset->mg_tasks.next;
+	if (l == it->tasks_head)
+		l = it->mg_tasks_head->next;
 
-	if (l == &link->cset->mg_tasks)
+	if (l == it->mg_tasks_head)
 		css_advance_task_iter(it);
 	else
-		it->task = l;
+		it->task_pos = l;
 
 	return res;
 }
@@ -2921,7 +3496,7 @@
 	 * ->can_attach() fails.
 	 */
 	do {
-		css_task_iter_start(&from->dummy_css, &it);
+		css_task_iter_start(&from->self, &it);
 		task = css_task_iter_next(&it);
 		if (task)
 			get_task_struct(task);
@@ -3186,7 +3761,7 @@
 	if (!array)
 		return -ENOMEM;
 	/* now, populate the array */
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		if (unlikely(n == length))
 			break;
@@ -3248,7 +3823,7 @@
 
 	/*
 	 * We aren't being called from kernfs and there's no guarantee on
-	 * @kn->priv's validity.  For this and css_tryget_from_dir(),
+	 * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
 	 * @kn->priv is RCU safe.  Let's do the RCU dancing.
 	 */
 	rcu_read_lock();
@@ -3260,7 +3835,7 @@
 	}
 	rcu_read_unlock();
 
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		switch (tsk->state) {
 		case TASK_RUNNING:
@@ -3390,17 +3965,6 @@
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-/*
- * seq_operations functions for iterating on pidlists through seq_file -
- * independent of whether it's tasks or procs
- */
-static const struct seq_operations cgroup_pidlist_seq_operations = {
-	.start = cgroup_pidlist_start,
-	.stop = cgroup_pidlist_stop,
-	.next = cgroup_pidlist_next,
-	.show = cgroup_pidlist_show,
-};
-
 static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
 					 struct cftype *cft)
 {
@@ -3442,7 +4006,7 @@
 		.seq_stop = cgroup_pidlist_stop,
 		.seq_show = cgroup_pidlist_show,
 		.private = CGROUP_FILE_PROCS,
-		.write_u64 = cgroup_procs_write,
+		.write = cgroup_procs_write,
 		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
@@ -3456,6 +4020,27 @@
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_sane_behavior_show,
 	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT,
+		.seq_show = cgroup_root_controllers_show,
+	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_controllers_show,
+	},
+	{
+		.name = "cgroup.subtree_control",
+		.flags = CFTYPE_ONLY_ON_DFL,
+		.seq_show = cgroup_subtree_control_show,
+		.write = cgroup_subtree_control_write,
+	},
+	{
+		.name = "cgroup.populated",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_populated_show,
+	},
 
 	/*
 	 * Historical crazy stuff.  These don't have "cgroup."  prefix and
@@ -3470,7 +4055,7 @@
 		.seq_stop = cgroup_pidlist_stop,
 		.seq_show = cgroup_pidlist_show,
 		.private = CGROUP_FILE_TASKS,
-		.write_u64 = cgroup_tasks_write,
+		.write = cgroup_tasks_write,
 		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
@@ -3483,7 +4068,7 @@
 		.name = "release_agent",
 		.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_release_agent_show,
-		.write_string = cgroup_release_agent_write,
+		.write = cgroup_release_agent_write,
 		.max_write_len = PATH_MAX - 1,
 	},
 	{ }	/* terminate */
@@ -3496,7 +4081,7 @@
  *
  * On failure, no file is added.
  */
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i, ret = 0;
@@ -3505,7 +4090,7 @@
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 
 		list_for_each_entry(cfts, &ss->cfts, node) {
@@ -3527,9 +4112,9 @@
  *    Implemented in kill_css().
  *
  * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
- *    and thus css_tryget() is guaranteed to fail, the css can be offlined
- *    by invoking offline_css().  After offlining, the base ref is put.
- *    Implemented in css_killed_work_fn().
+ *    and thus css_tryget_online() is guaranteed to fail, the css can be
+ *    offlined by invoking offline_css().  After offlining, the base ref is
+ *    put.  Implemented in css_killed_work_fn().
  *
  * 3. When the percpu_ref reaches zero, the only possible remaining
  *    accessors are inside RCU read sections.  css_release() schedules the
@@ -3548,11 +4133,37 @@
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup *cgrp = css->cgroup;
 
-	if (css->parent)
-		css_put(css->parent);
+	if (css->ss) {
+		/* css free path */
+		if (css->parent)
+			css_put(css->parent);
 
-	css->ss->css_free(css);
-	cgroup_put(cgrp);
+		css->ss->css_free(css);
+		cgroup_put(cgrp);
+	} else {
+		/* cgroup free path */
+		atomic_dec(&cgrp->root->nr_cgrps);
+		cgroup_pidlist_destroy_all(cgrp);
+
+		if (cgroup_parent(cgrp)) {
+			/*
+			 * We get a ref to the parent, and put the ref when
+			 * this cgroup is being freed, so it's guaranteed
+			 * that the parent won't be destroyed before its
+			 * children.
+			 */
+			cgroup_put(cgroup_parent(cgrp));
+			kernfs_put(cgrp->kn);
+			kfree(cgrp);
+		} else {
+			/*
+			 * This is root cgroup's refcnt reaching zero,
+			 * which indicates that the root should be
+			 * released.
+			 */
+			cgroup_destroy_root(cgrp->root);
+		}
+	}
 }
 
 static void css_free_rcu_fn(struct rcu_head *rcu_head)
@@ -3564,26 +4175,59 @@
 	queue_work(cgroup_destroy_wq, &css->destroy_work);
 }
 
+static void css_release_work_fn(struct work_struct *work)
+{
+	struct cgroup_subsys_state *css =
+		container_of(work, struct cgroup_subsys_state, destroy_work);
+	struct cgroup_subsys *ss = css->ss;
+	struct cgroup *cgrp = css->cgroup;
+
+	mutex_lock(&cgroup_mutex);
+
+	css->flags |= CSS_RELEASED;
+	list_del_rcu(&css->sibling);
+
+	if (ss) {
+		/* css release path */
+		cgroup_idr_remove(&ss->css_idr, css->id);
+	} else {
+		/* cgroup release path */
+		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+		cgrp->id = -1;
+	}
+
+	mutex_unlock(&cgroup_mutex);
+
+	call_rcu(&css->rcu_head, css_free_rcu_fn);
+}
+
 static void css_release(struct percpu_ref *ref)
 {
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
 
-	RCU_INIT_POINTER(css->cgroup->subsys[css->ss->id], NULL);
-	call_rcu(&css->rcu_head, css_free_rcu_fn);
+	INIT_WORK(&css->destroy_work, css_release_work_fn);
+	queue_work(cgroup_destroy_wq, &css->destroy_work);
 }
 
-static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
-		     struct cgroup *cgrp)
+static void init_and_link_css(struct cgroup_subsys_state *css,
+			      struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
+	lockdep_assert_held(&cgroup_mutex);
+
+	cgroup_get(cgrp);
+
+	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
-	css->flags = 0;
+	INIT_LIST_HEAD(&css->sibling);
+	INIT_LIST_HEAD(&css->children);
+	css->serial_nr = css_serial_nr_next++;
 
-	if (cgrp->parent)
-		css->parent = cgroup_css(cgrp->parent, ss);
-	else
-		css->flags |= CSS_ROOT;
+	if (cgroup_parent(cgrp)) {
+		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
+		css_get(css->parent);
+	}
 
 	BUG_ON(cgroup_css(cgrp, ss));
 }
@@ -3594,14 +4238,12 @@
 	struct cgroup_subsys *ss = css->ss;
 	int ret = 0;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (ss->css_online)
 		ret = ss->css_online(css);
 	if (!ret) {
 		css->flags |= CSS_ONLINE;
-		css->cgroup->nr_css++;
 		rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
 	}
 	return ret;
@@ -3612,7 +4254,6 @@
 {
 	struct cgroup_subsys *ss = css->ss;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (!(css->flags & CSS_ONLINE))
@@ -3622,8 +4263,9 @@
 		ss->css_offline(css);
 
 	css->flags &= ~CSS_ONLINE;
-	css->cgroup->nr_css--;
-	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], css);
+	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+
+	wake_up_all(&css->cgroup->offline_waitq);
 }
 
 /**
@@ -3637,111 +4279,102 @@
  */
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 {
-	struct cgroup *parent = cgrp->parent;
+	struct cgroup *parent = cgroup_parent(cgrp);
+	struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
 	struct cgroup_subsys_state *css;
 	int err;
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	css = ss->css_alloc(cgroup_css(parent, ss));
+	css = ss->css_alloc(parent_css);
 	if (IS_ERR(css))
 		return PTR_ERR(css);
 
+	init_and_link_css(css, ss, cgrp);
+
 	err = percpu_ref_init(&css->refcnt, css_release);
 	if (err)
 		goto err_free_css;
 
-	init_css(css, ss, cgrp);
+	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT);
+	if (err < 0)
+		goto err_free_percpu_ref;
+	css->id = err;
 
 	err = cgroup_populate_dir(cgrp, 1 << ss->id);
 	if (err)
-		goto err_free_percpu_ref;
+		goto err_free_id;
+
+	/* @css is ready to be brought online now, make it visible */
+	list_add_tail_rcu(&css->sibling, &parent_css->children);
+	cgroup_idr_replace(&ss->css_idr, css, css->id);
 
 	err = online_css(css);
 	if (err)
-		goto err_clear_dir;
-
-	cgroup_get(cgrp);
-	css_get(css->parent);
-
-	cgrp->subsys_mask |= 1 << ss->id;
+		goto err_list_del;
 
 	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
-	    parent->parent) {
-		pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
-			   current->comm, current->pid, ss->name);
+	    cgroup_parent(parent)) {
+		pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
+			current->comm, current->pid, ss->name);
 		if (!strcmp(ss->name, "memory"))
-			pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
+			pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
 		ss->warned_broken_hierarchy = true;
 	}
 
 	return 0;
 
-err_clear_dir:
+err_list_del:
+	list_del_rcu(&css->sibling);
 	cgroup_clear_dir(css->cgroup, 1 << css->ss->id);
+err_free_id:
+	cgroup_idr_remove(&ss->css_idr, css->id);
 err_free_percpu_ref:
 	percpu_ref_cancel_init(&css->refcnt);
 err_free_css:
-	ss->css_free(css);
+	call_rcu(&css->rcu_head, css_free_rcu_fn);
 	return err;
 }
 
-/**
- * cgroup_create - create a cgroup
- * @parent: cgroup that will be parent of the new cgroup
- * @name: name of the new cgroup
- * @mode: mode to set on new cgroup
- */
-static long cgroup_create(struct cgroup *parent, const char *name,
-			  umode_t mode)
+static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+			umode_t mode)
 {
-	struct cgroup *cgrp;
-	struct cgroup_root *root = parent->root;
-	int ssid, err;
+	struct cgroup *parent, *cgrp;
+	struct cgroup_root *root;
 	struct cgroup_subsys *ss;
 	struct kernfs_node *kn;
+	int ssid, ret;
 
-	/*
-	 * XXX: The default hierarchy isn't fully implemented yet.  Block
-	 * !root cgroup creation on it for now.
-	 */
-	if (root == &cgrp_dfl_root)
-		return -EINVAL;
+	parent = cgroup_kn_lock_live(parent_kn);
+	if (!parent)
+		return -ENODEV;
+	root = parent->root;
 
 	/* allocate the cgroup and its ID, 0 is reserved for the root */
 	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
-	if (!cgrp)
-		return -ENOMEM;
-
-	mutex_lock(&cgroup_tree_mutex);
-
-	/*
-	 * Only live parents can have children.  Note that the liveliness
-	 * check isn't strictly necessary because cgroup_mkdir() and
-	 * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
-	 * anyway so that locking is contained inside cgroup proper and we
-	 * don't get nasty surprises if we ever grow another caller.
-	 */
-	if (!cgroup_lock_live_group(parent)) {
-		err = -ENODEV;
-		goto err_unlock_tree;
+	if (!cgrp) {
+		ret = -ENOMEM;
+		goto out_unlock;
 	}
 
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out_free_cgrp;
+
 	/*
 	 * Temporarily set the pointer to NULL, so idr_find() won't return
 	 * a half-baked cgroup.
 	 */
-	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+	cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT);
 	if (cgrp->id < 0) {
-		err = -ENOMEM;
-		goto err_unlock;
+		ret = -ENOMEM;
+		goto out_cancel_ref;
 	}
 
 	init_cgroup_housekeeping(cgrp);
 
-	cgrp->parent = parent;
-	cgrp->dummy_css.parent = &parent->dummy_css;
-	cgrp->root = parent->root;
+	cgrp->self.parent = &parent->self;
+	cgrp->root = root;
 
 	if (notify_on_release(parent))
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -3752,8 +4385,8 @@
 	/* create the directory */
 	kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
 	if (IS_ERR(kn)) {
-		err = PTR_ERR(kn);
-		goto err_free_id;
+		ret = PTR_ERR(kn);
+		goto out_free_id;
 	}
 	cgrp->kn = kn;
 
@@ -3763,10 +4396,10 @@
 	 */
 	kernfs_get(kn);
 
-	cgrp->serial_nr = cgroup_serial_nr_next++;
+	cgrp->self.serial_nr = css_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
-	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
+	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
 	atomic_inc(&root->nr_cgrps);
 	cgroup_get(parent);
 
@@ -3774,107 +4407,66 @@
 	 * @cgrp is now fully operational.  If something fails after this
 	 * point, it'll be released via the normal destruction path.
 	 */
-	idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
+	cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
 
-	err = cgroup_kn_set_ugid(kn);
-	if (err)
-		goto err_destroy;
+	ret = cgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
 
-	err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
-	if (err)
-		goto err_destroy;
+	ret = cgroup_addrm_files(cgrp, cgroup_base_files, true);
+	if (ret)
+		goto out_destroy;
 
 	/* let's create and online css's */
 	for_each_subsys(ss, ssid) {
-		if (root->cgrp.subsys_mask & (1 << ssid)) {
-			err = create_css(cgrp, ss);
-			if (err)
-				goto err_destroy;
+		if (parent->child_subsys_mask & (1 << ssid)) {
+			ret = create_css(cgrp, ss);
+			if (ret)
+				goto out_destroy;
 		}
 	}
 
+	/*
+	 * On the default hierarchy, a child doesn't automatically inherit
+	 * child_subsys_mask from the parent.  Each is configured manually.
+	 */
+	if (!cgroup_on_dfl(cgrp))
+		cgrp->child_subsys_mask = parent->child_subsys_mask;
+
 	kernfs_activate(kn);
 
-	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
+	ret = 0;
+	goto out_unlock;
 
-	return 0;
-
-err_free_id:
-	idr_remove(&root->cgroup_idr, cgrp->id);
-err_unlock:
-	mutex_unlock(&cgroup_mutex);
-err_unlock_tree:
-	mutex_unlock(&cgroup_tree_mutex);
+out_free_id:
+	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
+out_cancel_ref:
+	percpu_ref_cancel_init(&cgrp->self.refcnt);
+out_free_cgrp:
 	kfree(cgrp);
-	return err;
-
-err_destroy:
-	cgroup_destroy_locked(cgrp);
-	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
-	return err;
-}
-
-static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
-			umode_t mode)
-{
-	struct cgroup *parent = parent_kn->priv;
-	int ret;
-
-	/*
-	 * cgroup_create() grabs cgroup_tree_mutex which nests outside
-	 * kernfs active_ref and cgroup_create() already synchronizes
-	 * properly against removal through cgroup_lock_live_group().
-	 * Break it before calling cgroup_create().
-	 */
-	cgroup_get(parent);
-	kernfs_break_active_protection(parent_kn);
-
-	ret = cgroup_create(parent, name, mode);
-
-	kernfs_unbreak_active_protection(parent_kn);
-	cgroup_put(parent);
+out_unlock:
+	cgroup_kn_unlock(parent_kn);
 	return ret;
+
+out_destroy:
+	cgroup_destroy_locked(cgrp);
+	goto out_unlock;
 }
 
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
+ * initate destruction and put the css ref from kill_css().
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
-	struct cgroup *cgrp = css->cgroup;
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
-
-	/*
-	 * css_tryget() is guaranteed to fail now.  Tell subsystems to
-	 * initate destruction.
-	 */
 	offline_css(css);
-
-	/*
-	 * If @cgrp is marked dead, it's waiting for refs of all css's to
-	 * be disabled before proceeding to the second phase of cgroup
-	 * destruction.  If we are the last one, kick it off.
-	 */
-	if (!cgrp->nr_css && cgroup_is_dead(cgrp))
-		cgroup_destroy_css_killed(cgrp);
-
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 
-	/*
-	 * Put the css refs from kill_css().  Each css holds an extra
-	 * reference to the cgroup's dentry and cgroup removal proceeds
-	 * regardless of css refs.  On the last put of each css, whenever
-	 * that may be, the extra dentry ref is put so that dentry
-	 * destruction happens only after all css's are released.
-	 */
 	css_put(css);
 }
 
@@ -3888,9 +4480,18 @@
 	queue_work(cgroup_destroy_wq, &css->destroy_work);
 }
 
-static void __kill_css(struct cgroup_subsys_state *css)
+/**
+ * kill_css - destroy a css
+ * @css: css to destroy
+ *
+ * This function initiates destruction of @css by removing cgroup interface
+ * files and putting its base reference.  ->css_offline() will be invoked
+ * asynchronously once css_tryget_online() is guaranteed to fail and when
+ * the reference count reaches zero, @css will be released.
+ */
+static void kill_css(struct cgroup_subsys_state *css)
 {
-	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
 
 	/*
 	 * This must happen before css is disassociated with its cgroup.
@@ -3907,7 +4508,7 @@
 	/*
 	 * cgroup core guarantees that, by the time ->css_offline() is
 	 * invoked, no new css reference will be given out via
-	 * css_tryget().  We can't simply call percpu_ref_kill() and
+	 * css_tryget_online().  We can't simply call percpu_ref_kill() and
 	 * proceed to offlining css's because percpu_ref_kill() doesn't
 	 * guarantee that the ref is seen as killed on all CPUs on return.
 	 *
@@ -3918,36 +4519,14 @@
 }
 
 /**
- * kill_css - destroy a css
- * @css: css to destroy
- *
- * This function initiates destruction of @css by removing cgroup interface
- * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget() is guaranteed to fail and when the
- * reference count reaches zero, @css will be released.
- */
-static void kill_css(struct cgroup_subsys_state *css)
-{
-	struct cgroup *cgrp = css->cgroup;
-
-	lockdep_assert_held(&cgroup_tree_mutex);
-
-	/* if already killed, noop */
-	if (cgrp->subsys_mask & (1 << css->ss->id)) {
-		cgrp->subsys_mask &= ~(1 << css->ss->id);
-		__kill_css(css);
-	}
-}
-
-/**
  * cgroup_destroy_locked - the first stage of cgroup destruction
  * @cgrp: cgroup to be destroyed
  *
  * css's make use of percpu refcnts whose killing latency shouldn't be
  * exposed to userland and are RCU protected.  Also, cgroup core needs to
- * guarantee that css_tryget() won't succeed by the time ->css_offline() is
- * invoked.  To satisfy all the requirements, destruction is implemented in
- * the following two steps.
+ * guarantee that css_tryget_online() won't succeed by the time
+ * ->css_offline() is invoked.  To satisfy all the requirements,
+ * destruction is implemented in the following two steps.
  *
  * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
  *     userland visible parts and start killing the percpu refcnts of
@@ -3966,12 +4545,10 @@
 static int cgroup_destroy_locked(struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
-	struct cgroup *child;
 	struct cgroup_subsys_state *css;
 	bool empty;
 	int ssid;
 
-	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
 	/*
@@ -3985,127 +4562,68 @@
 		return -EBUSY;
 
 	/*
-	 * Make sure there's no live children.  We can't test ->children
-	 * emptiness as dead children linger on it while being destroyed;
-	 * otherwise, "rmdir parent/child parent" may fail with -EBUSY.
+	 * Make sure there's no live children.  We can't test emptiness of
+	 * ->self.children as dead children linger on it while being
+	 * drained; otherwise, "rmdir parent/child parent" may fail.
 	 */
-	empty = true;
-	rcu_read_lock();
-	list_for_each_entry_rcu(child, &cgrp->children, sibling) {
-		empty = cgroup_is_dead(child);
-		if (!empty)
-			break;
-	}
-	rcu_read_unlock();
-	if (!empty)
+	if (css_has_online_children(&cgrp->self))
 		return -EBUSY;
 
 	/*
 	 * Mark @cgrp dead.  This prevents further task migration and child
-	 * creation by disabling cgroup_lock_live_group().  Note that
-	 * CGRP_DEAD assertion is depended upon by css_next_child() to
-	 * resume iteration after dropping RCU read lock.  See
-	 * css_next_child() for details.
+	 * creation by disabling cgroup_lock_live_group().
 	 */
-	set_bit(CGRP_DEAD, &cgrp->flags);
+	cgrp->self.flags &= ~CSS_ONLINE;
 
-	/*
-	 * Initiate massacre of all css's.  cgroup_destroy_css_killed()
-	 * will be invoked to perform the rest of destruction once the
-	 * percpu refs of all css's are confirmed to be killed.  This
-	 * involves removing the subsystem's files, drop cgroup_mutex.
-	 */
-	mutex_unlock(&cgroup_mutex);
+	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
-	mutex_lock(&cgroup_mutex);
 
-	/* CGRP_DEAD is set, remove from ->release_list for the last time */
+	/* CSS_ONLINE is clear, remove from ->release_list for the last time */
 	raw_spin_lock(&release_list_lock);
 	if (!list_empty(&cgrp->release_list))
 		list_del_init(&cgrp->release_list);
 	raw_spin_unlock(&release_list_lock);
 
 	/*
-	 * If @cgrp has css's attached, the second stage of cgroup
-	 * destruction is kicked off from css_killed_work_fn() after the
-	 * refs of all attached css's are killed.  If @cgrp doesn't have
-	 * any css, we kick it off here.
+	 * Remove @cgrp directory along with the base files.  @cgrp has an
+	 * extra ref on its kn.
 	 */
-	if (!cgrp->nr_css)
-		cgroup_destroy_css_killed(cgrp);
+	kernfs_remove(cgrp->kn);
 
-	/* remove @cgrp directory along with the base files */
-	mutex_unlock(&cgroup_mutex);
+	set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
+	check_for_release(cgroup_parent(cgrp));
 
-	/*
-	 * There are two control paths which try to determine cgroup from
-	 * dentry without going through kernfs - cgroupstats_build() and
-	 * css_tryget_from_dir().  Those are supported by RCU protecting
-	 * clearing of cgrp->kn->priv backpointer, which should happen
-	 * after all files under it have been removed.
-	 */
-	kernfs_remove(cgrp->kn);	/* @cgrp has an extra ref on its kn */
-	RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
-
-	mutex_lock(&cgroup_mutex);
+	/* put the base reference */
+	percpu_ref_kill(&cgrp->self.refcnt);
 
 	return 0;
 };
 
-/**
- * cgroup_destroy_css_killed - the second step of cgroup destruction
- * @work: cgroup->destroy_free_work
- *
- * This function is invoked from a work item for a cgroup which is being
- * destroyed after all css's are offlined and performs the rest of
- * destruction.  This is the second step of destruction described in the
- * comment above cgroup_destroy_locked().
- */
-static void cgroup_destroy_css_killed(struct cgroup *cgrp)
-{
-	struct cgroup *parent = cgrp->parent;
-
-	lockdep_assert_held(&cgroup_tree_mutex);
-	lockdep_assert_held(&cgroup_mutex);
-
-	/* delete this cgroup from parent->children */
-	list_del_rcu(&cgrp->sibling);
-
-	cgroup_put(cgrp);
-
-	set_bit(CGRP_RELEASABLE, &parent->flags);
-	check_for_release(parent);
-}
-
 static int cgroup_rmdir(struct kernfs_node *kn)
 {
-	struct cgroup *cgrp = kn->priv;
+	struct cgroup *cgrp;
 	int ret = 0;
 
-	/*
-	 * This is self-destruction but @kn can't be removed while this
-	 * callback is in progress.  Let's break active protection.  Once
-	 * the protection is broken, @cgrp can be destroyed at any point.
-	 * Pin it so that it stays accessible.
-	 */
-	cgroup_get(cgrp);
-	kernfs_break_active_protection(kn);
+	cgrp = cgroup_kn_lock_live(kn);
+	if (!cgrp)
+		return 0;
+	cgroup_get(cgrp);	/* for @kn->priv clearing */
 
-	mutex_lock(&cgroup_tree_mutex);
-	mutex_lock(&cgroup_mutex);
+	ret = cgroup_destroy_locked(cgrp);
+
+	cgroup_kn_unlock(kn);
 
 	/*
-	 * @cgrp might already have been destroyed while we're trying to
-	 * grab the mutexes.
+	 * There are two control paths which try to determine cgroup from
+	 * dentry without going through kernfs - cgroupstats_build() and
+	 * css_tryget_online_from_dir().  Those are supported by RCU
+	 * protecting clearing of cgrp->kn->priv backpointer, which should
+	 * happen after all files under it have been removed.
 	 */
-	if (!cgroup_is_dead(cgrp))
-		ret = cgroup_destroy_locked(cgrp);
+	if (!ret)
+		RCU_INIT_POINTER(*(void __rcu __force **)&kn->priv, NULL);
 
-	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
-
-	kernfs_unbreak_active_protection(kn);
 	cgroup_put(cgrp);
 	return ret;
 }
@@ -4118,15 +4636,15 @@
 	.rename			= cgroup_rename,
 };
 
-static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
+static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 {
 	struct cgroup_subsys_state *css;
 
 	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
+	idr_init(&ss->css_idr);
 	INIT_LIST_HEAD(&ss->cfts);
 
 	/* Create the root cgroup state for this subsystem */
@@ -4134,7 +4652,21 @@
 	css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
-	init_css(css, ss, &cgrp_dfl_root.cgrp);
+	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+
+	/*
+	 * Root csses are never destroyed and we can't initialize
+	 * percpu_ref during early init.  Disable refcnting.
+	 */
+	css->flags |= CSS_NO_REF;
+
+	if (early) {
+		/* allocation can't be done safely during early init */
+		css->id = 1;
+	} else {
+		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
+		BUG_ON(css->id < 0);
+	}
 
 	/* Update the init_css_set to contain a subsys
 	 * pointer to this state - since the subsystem is
@@ -4151,10 +4683,7 @@
 
 	BUG_ON(online_css(css));
 
-	cgrp_dfl_root.cgrp.subsys_mask |= 1 << ss->id;
-
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 }
 
 /**
@@ -4171,6 +4700,8 @@
 	int i;
 
 	init_cgroup_root(&cgrp_dfl_root, &opts);
+	cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
+
 	RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
 
 	for_each_subsys(ss, i) {
@@ -4185,7 +4716,7 @@
 		ss->name = cgroup_subsys_name[i];
 
 		if (ss->early_init)
-			cgroup_init_subsys(ss);
+			cgroup_init_subsys(ss, true);
 	}
 	return 0;
 }
@@ -4204,7 +4735,6 @@
 
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
 
-	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
 	/* Add init_css_set to the hash table */
@@ -4214,18 +4744,31 @@
 	BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
 
 	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&cgroup_tree_mutex);
 
 	for_each_subsys(ss, ssid) {
-		if (!ss->early_init)
-			cgroup_init_subsys(ss);
+		if (ss->early_init) {
+			struct cgroup_subsys_state *css =
+				init_css_set.subsys[ss->id];
+
+			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
+						   GFP_KERNEL);
+			BUG_ON(css->id < 0);
+		} else {
+			cgroup_init_subsys(ss, false);
+		}
+
+		list_add_tail(&init_css_set.e_cset_node[ssid],
+			      &cgrp_dfl_root.cgrp.e_csets[ssid]);
 
 		/*
-		 * cftype registration needs kmalloc and can't be done
-		 * during early_init.  Register base cftypes separately.
+		 * Setting dfl_root subsys_mask needs to consider the
+		 * disabled flag and cftype registration needs kmalloc,
+		 * both of which aren't available during early_init.
 		 */
-		if (ss->base_cftypes)
+		if (!ss->disabled) {
+			cgrp_dfl_root.subsys_mask |= 1 << ss->id;
 			WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes));
+		}
 	}
 
 	cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
@@ -4308,7 +4851,7 @@
 
 		seq_printf(m, "%d:", root->hierarchy_id);
 		for_each_subsys(ss, ssid)
-			if (root->cgrp.subsys_mask & (1 << ssid))
+			if (root->subsys_mask & (1 << ssid))
 				seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
 		if (strlen(root->name))
 			seq_printf(m, "%sname=%s", count ? "," : "",
@@ -4503,8 +5046,8 @@
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) &&
-	    list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) {
+	if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
+	    !css_has_online_children(&cgrp->self)) {
 		/*
 		 * Control Group is currently removeable. If it's not
 		 * already queued for a userspace notification, queue
@@ -4621,7 +5164,7 @@
 __setup("cgroup_disable=", cgroup_disable);
 
 /**
- * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir
+ * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
  * @dentry: directory dentry of interest
  * @ss: subsystem of interest
  *
@@ -4629,8 +5172,8 @@
  * to get the corresponding css and return it.  If such css doesn't exist
  * or can't be pinned, an ERR_PTR value is returned.
  */
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss)
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss)
 {
 	struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
 	struct cgroup_subsys_state *css = NULL;
@@ -4646,13 +5189,13 @@
 	/*
 	 * This path doesn't originate from kernfs and @kn could already
 	 * have been or be removed at any point.  @kn->priv is RCU
-	 * protected for this access.  See destroy_locked() for details.
+	 * protected for this access.  See cgroup_rmdir() for details.
 	 */
 	cgrp = rcu_dereference(kn->priv);
 	if (cgrp)
 		css = cgroup_css(cgrp, ss);
 
-	if (!css || !css_tryget(css))
+	if (!css || !css_tryget_online(css))
 		css = ERR_PTR(-ENOENT);
 
 	rcu_read_unlock();
@@ -4669,14 +5212,8 @@
  */
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
 {
-	struct cgroup *cgrp;
-
-	cgroup_assert_mutexes_or_rcu_locked();
-
-	cgrp = idr_find(&ss->root->cgroup_idr, id);
-	if (cgrp)
-		return cgroup_css(cgrp, ss);
-	return NULL;
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return idr_find(&ss->css_idr, id);
 }
 
 #ifdef CONFIG_CGROUP_DEBUG

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 345628c..a79e40f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c

@@ -59,7 +59,7 @@
 
 static struct freezer *parent_freezer(struct freezer *freezer)
 {
-	return css_freezer(css_parent(&freezer->css));
+	return css_freezer(freezer->css.parent);
 }
 
 bool cgroup_freezing(struct task_struct *task)
@@ -73,10 +73,6 @@
 	return ret;
 }
 
-/*
- * cgroups_write_string() limits the size of freezer state strings to
- * CGROUP_LOCAL_BUFFER_SIZE
- */
 static const char *freezer_state_strs(unsigned int state)
 {
 	if (state & CGROUP_FROZEN)
@@ -304,7 +300,7 @@
 
 	/* update states bottom-up */
 	css_for_each_descendant_post(pos, css) {
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
@@ -404,7 +400,7 @@
 		struct freezer *pos_f = css_freezer(pos);
 		struct freezer *parent = parent_freezer(pos_f);
 
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
@@ -423,20 +419,22 @@
 	mutex_unlock(&freezer_mutex);
 }
 
-static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t freezer_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	bool freeze;
 
-	if (strcmp(buffer, freezer_state_strs(0)) == 0)
+	buf = strstrip(buf);
+
+	if (strcmp(buf, freezer_state_strs(0)) == 0)
 		freeze = false;
-	else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
+	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
 		freeze = true;
 	else
 		return -EINVAL;
 
-	freezer_change_state(css_freezer(css), freeze);
-	return 0;
+	freezer_change_state(css_freezer(of_css(of)), freeze);
+	return nbytes;
 }
 
 static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
@@ -460,7 +458,7 @@
 		.name = "state",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = freezer_read,
-		.write_string = freezer_write,
+		.write = freezer_write,
 	},
 	{
 		.name = "self_freezing",

diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf791c..a343bde 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
+#include <trace/events/power.h>
 
 #include "smpboot.h"
 
@@ -520,7 +521,9 @@
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
 			continue;
+		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 		error = _cpu_down(cpu, 1);
+		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 		if (!error)
 			cpumask_set_cpu(cpu, frozen_cpus);
 		else {
@@ -563,7 +566,9 @@
 	arch_enable_nonboot_cpus_begin();
 
 	for_each_cpu(cpu, frozen_cpus) {
+		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
 		error = _cpu_up(cpu, 1);
+		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 		if (!error) {
 			pr_info("CPU%d is up\n", cpu);
 			continue;

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1300178..f6b33c6 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c

@@ -119,7 +119,7 @@
 
 static inline struct cpuset *parent_cs(struct cpuset *cs)
 {
-	return css_cs(css_parent(&cs->css));
+	return css_cs(cs->css.parent);
 }
 
 #ifdef CONFIG_NUMA
@@ -691,11 +691,8 @@
 		if (nslot == ndoms) {
 			static int warnings = 10;
 			if (warnings) {
-				printk(KERN_WARNING
-				 "rebuild_sched_domains confused:"
-				  " nslot %d, ndoms %d, csn %d, i %d,"
-				  " apn %d\n",
-				  nslot, ndoms, csn, i, apn);
+				pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
+					nslot, ndoms, csn, i, apn);
 				warnings--;
 			}
 			continue;
@@ -870,7 +867,7 @@
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -885,6 +882,7 @@
 /**
  * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
  * @cs: the cpuset to consider
+ * @trialcs: trial cpuset
  * @buf: buffer of cpu numbers written to this cpuset
  */
 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
@@ -1105,7 +1103,7 @@
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -1600,13 +1598,15 @@
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
-static int cpuset_write_resmask(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buf)
+static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
-	struct cpuset *cs = css_cs(css);
+	struct cpuset *cs = css_cs(of_css(of));
 	struct cpuset *trialcs;
 	int retval = -ENODEV;
 
+	buf = strstrip(buf);
+
 	/*
 	 * CPU or memory hotunplug may leave @cs w/o any execution
 	 * resources, in which case the hotplug code asynchronously updates
@@ -1630,7 +1630,7 @@
 		goto out_unlock;
 	}
 
-	switch (cft->private) {
+	switch (of_cft(of)->private) {
 	case FILE_CPULIST:
 		retval = update_cpumask(cs, trialcs, buf);
 		break;
@@ -1645,7 +1645,7 @@
 	free_trial_cpuset(trialcs);
 out_unlock:
 	mutex_unlock(&cpuset_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 /*
@@ -1747,7 +1747,7 @@
 	{
 		.name = "cpus",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * NR_CPUS),
 		.private = FILE_CPULIST,
 	},
@@ -1755,7 +1755,7 @@
 	{
 		.name = "mems",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * MAX_NUMNODES),
 		.private = FILE_MEMLIST,
 	},
@@ -2011,7 +2011,7 @@
 		parent = parent_cs(parent);
 
 	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
-		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset ");
+		pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
 		pr_cont_cgroup_name(cs->css.cgroup);
 		pr_cont("\n");
 	}
@@ -2149,7 +2149,7 @@
 
 		rcu_read_lock();
 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-			if (cs == &top_cpuset || !css_tryget(&cs->css))
+			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
 				continue;
 			rcu_read_unlock();
 
@@ -2530,7 +2530,7 @@
 
 /**
  * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
- * @task: pointer to task_struct of some task.
+ * @tsk: pointer to task_struct of some task.
  *
  * Description: Prints @task's name, cpuset name, and cached copy of its
  * mems_allowed to the kernel log.
@@ -2548,7 +2548,7 @@
 	cgrp = task_cs(tsk)->css.cgroup;
 	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
 			   tsk->mems_allowed);
-	printk(KERN_INFO "%s cpuset=", tsk->comm);
+	pr_info("%s cpuset=", tsk->comm);
 	pr_cont_cgroup_name(cgrp);
 	pr_cont(" mems_allowed=%s\n", cpuset_nodelist);
 
@@ -2640,10 +2640,10 @@
 /* Display task mems_allowed in /proc/<pid>/status file. */
 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
 {
-	seq_printf(m, "Mems_allowed:\t");
+	seq_puts(m, "Mems_allowed:\t");
 	seq_nodemask(m, &task->mems_allowed);
-	seq_printf(m, "\n");
-	seq_printf(m, "Mems_allowed_list:\t");
+	seq_puts(m, "\n");
+	seq_puts(m, "Mems_allowed_list:\t");
 	seq_nodemask_list(m, &task->mems_allowed);
-	seq_printf(m, "\n");
+	seq_puts(m, "\n");
 }

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 689237a..5fa58e4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -608,7 +608,8 @@
 	if (!f.file)
 		return -EBADF;
 
-	css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+	css = css_tryget_online_from_dir(f.file->f_dentry,
+					 &perf_event_cgrp_subsys);
 	if (IS_ERR(css)) {
 		ret = PTR_ERR(css);
 		goto out;
@@ -2973,6 +2974,22 @@
 	local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = current->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctx);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -5074,21 +5091,9 @@
 		       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
 	struct perf_comm_event comm_event;
-	struct perf_event_context *ctx;
-	int ctxn;
-
-	rcu_read_lock();
-	for_each_task_context_nr(ctxn) {
-		ctx = task->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
-		perf_event_enable_on_exec(ctx);
-	}
-	rcu_read_unlock();
 
 	if (!atomic_read(&nr_comm_events))
 		return;
@@ -5100,7 +5105,7 @@
 		.event_id  = {
 			.header = {
 				.type = PERF_RECORD_COMM,
-				.misc = 0,
+				.misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
 				/* .size */
 			},
 			/* .pid */
@@ -7121,6 +7126,13 @@
 		}
 	}
 
+	if (is_sampling_event(event)) {
+		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+			err = -ENOTSUPP;
+			goto err_alloc;
+		}
+	}
+
 	account_event(event);
 
 	/*
@@ -7432,7 +7444,7 @@
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-	struct perf_event *child_event;
+	struct perf_event *child_event, *next;
 	struct perf_event_context *child_ctx;
 	unsigned long flags;
 
@@ -7486,7 +7498,7 @@
 	 */
 	mutex_lock(&child_ctx->mutex);
 
-	list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 
 	mutex_unlock(&child_ctx->mutex);

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index adcd76a..c445e39 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c

@@ -36,6 +36,7 @@
 #include "../../mm/internal.h"	/* munlock_vma_page */
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
+#include <linux/shmem_fs.h>
 
 #include <linux/uprobes.h>
 
@@ -127,7 +128,7 @@
  */
 static bool valid_vma(struct vm_area_struct *vma, bool is_register)
 {
-	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
+	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
 
 	if (is_register)
 		flags |= VM_WRITE;
@@ -279,18 +280,13 @@
  * supported by that architecture then we need to modify is_trap_at_addr and
  * uprobe_write_opcode accordingly. This would never be a problem for archs
  * that have fixed length instructions.
- */
-
-/*
+ *
  * uprobe_write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
  *
- * Called with mm->mmap_sem held (for read and with a reference to
- * mm).
- *
- * For mm @mm, write the opcode at @vaddr.
+ * Called with mm->mmap_sem held for write.
  * Return 0 (success) or a negative errno.
  */
 int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +306,25 @@
 	if (ret <= 0)
 		goto put_old;
 
+	ret = anon_vma_prepare(vma);
+	if (ret)
+		goto put_old;
+
 	ret = -ENOMEM;
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 	if (!new_page)
 		goto put_old;
 
-	__SetPageUptodate(new_page);
+	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+		goto put_new;
 
+	__SetPageUptodate(new_page);
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
-	ret = anon_vma_prepare(vma);
-	if (ret)
-		goto put_new;
-
 	ret = __replace_page(vma, vaddr, old_page, new_page);
+	if (ret)
+		mem_cgroup_uncharge_page(new_page);
 
 put_new:
 	page_cache_release(new_page);
@@ -537,14 +537,15 @@
 			void *insn, int nbytes, loff_t offset)
 {
 	struct page *page;
-
-	if (!mapping->a_ops->readpage)
-		return -EIO;
 	/*
-	 * Ensure that the page that has the original instruction is
-	 * populated and in page-cache.
+	 * Ensure that the page that has the original instruction is populated
+	 * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
+	 * see uprobe_register().
 	 */
-	page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+	if (mapping->a_ops->readpage)
+		page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+	else
+		page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
@@ -880,6 +881,9 @@
 	if (!uc->handler && !uc->ret_handler)
 		return -EINVAL;
 
+	/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
+	if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
+		return -EIO;
 	/* Racy, just to catch the obvious mistakes */
 	if (offset > i_size_read(inode))
 		return -EINVAL;
@@ -1361,6 +1365,16 @@
 	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
 }
 
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	if (unlikely(utask && utask->active_uprobe))
+		return utask->vaddr;
+
+	return instruction_pointer(regs);
+}
+
 /*
  * Called with no locks held.
  * Called in context of a exiting or a exec-ing thread.

diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index f45b75b..b358a80 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c

@@ -85,6 +85,12 @@
 }
 EXPORT_SYMBOL(__gcov_merge_ior);
 
+void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_time_profile);
+
 /**
  * gcov_enable_events - enable event reporting through gcov_event()
  *

diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 2c6e463..826ba9f 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c

@@ -18,7 +18,12 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
+#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9
+#define GCOV_COUNTERS			9
+#else
 #define GCOV_COUNTERS			8
+#endif
+
 #define GCOV_TAG_FUNCTION_LENGTH	3
 
 static struct gcov_info *gcov_info_head;

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ceeadfc..3214289 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c

@@ -86,21 +86,8 @@
 	return &(kretprobe_table_locks[hash].lock);
 }
 
-/*
- * Normally, functions that we'd want to prohibit kprobes in, are marked
- * __kprobes. But, there are cases where such functions already belong to
- * a different section (__sched for preempt_schedule)
- *
- * For such cases, we now have a blacklist
- */
-static struct kprobe_blackpoint kprobe_blacklist[] = {
-	{"preempt_schedule",},
-	{"native_get_debugreg",},
-	{"irq_entries_start",},
-	{"common_interrupt",},
-	{"mcount",},	/* mcount can be called from everywhere */
-	{NULL}    /* Terminator */
-};
+/* Blacklist -- list of struct kprobe_blacklist_entry */
+static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
@@ -151,13 +138,13 @@
 	.insn_size = MAX_INSN_SIZE,
 	.nr_garbage = 0,
 };
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
+static int collect_garbage_slots(struct kprobe_insn_cache *c);
 
 /**
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip;
 	kprobe_opcode_t *slot = NULL;
@@ -214,7 +201,7 @@
 }
 
 /* Return 1 if all garbages are collected, otherwise 0. */
-static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
 	kip->slot_used[idx] = SLOT_CLEAN;
 	kip->nused--;
@@ -235,7 +222,7 @@
 	return 0;
 }
 
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip, *next;
 
@@ -257,8 +244,8 @@
 	return 0;
 }
 
-void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
-				kprobe_opcode_t *slot, int dirty)
+void __free_insn_slot(struct kprobe_insn_cache *c,
+		      kprobe_opcode_t *slot, int dirty)
 {
 	struct kprobe_insn_page *kip;
 
@@ -314,7 +301,7 @@
  * 				OR
  * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
  */
-struct kprobe __kprobes *get_kprobe(void *addr)
+struct kprobe *get_kprobe(void *addr)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -327,8 +314,9 @@
 
 	return NULL;
 }
+NOKPROBE_SYMBOL(get_kprobe);
 
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 /* Return true if the kprobe is an aggregator */
 static inline int kprobe_aggrprobe(struct kprobe *p)
@@ -360,7 +348,7 @@
  * Call all pre_handler on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
  */
-void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *kp;
 
@@ -372,9 +360,10 @@
 		reset_kprobe_instance();
 	}
 }
+NOKPROBE_SYMBOL(opt_pre_handler);
 
 /* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -412,7 +401,7 @@
 }
 
 /* Return true(!0) if the probe is queued on (un)optimizing lists */
-static int __kprobes kprobe_queued(struct kprobe *p)
+static int kprobe_queued(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -428,7 +417,7 @@
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
  */
-static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+static struct kprobe *get_optimized_kprobe(unsigned long addr)
 {
 	int i;
 	struct kprobe *p = NULL;
@@ -460,7 +449,7 @@
  * Optimize (replace a breakpoint with a jump) kprobes listed on
  * optimizing_list.
  */
-static __kprobes void do_optimize_kprobes(void)
+static void do_optimize_kprobes(void)
 {
 	/* Optimization never be done when disarmed */
 	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
@@ -488,7 +477,7 @@
  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
  * if need) kprobes listed on unoptimizing_list.
  */
-static __kprobes void do_unoptimize_kprobes(void)
+static void do_unoptimize_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
 
@@ -520,7 +509,7 @@
 }
 
 /* Reclaim all kprobes on the free_list */
-static __kprobes void do_free_cleaned_kprobes(void)
+static void do_free_cleaned_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
 
@@ -532,13 +521,13 @@
 }
 
 /* Start optimizer after OPTIMIZE_DELAY passed */
-static __kprobes void kick_kprobe_optimizer(void)
+static void kick_kprobe_optimizer(void)
 {
 	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
 }
 
 /* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+static void kprobe_optimizer(struct work_struct *work)
 {
 	mutex_lock(&kprobe_mutex);
 	/* Lock modules while optimizing kprobes */
@@ -574,7 +563,7 @@
 }
 
 /* Wait for completing optimization and unoptimization */
-static __kprobes void wait_for_kprobe_optimizer(void)
+static void wait_for_kprobe_optimizer(void)
 {
 	mutex_lock(&kprobe_mutex);
 
@@ -593,7 +582,7 @@
 }
 
 /* Optimize kprobe if p is ready to be optimized */
-static __kprobes void optimize_kprobe(struct kprobe *p)
+static void optimize_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -627,7 +616,7 @@
 }
 
 /* Short cut to direct unoptimizing */
-static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
 	get_online_cpus();
 	arch_unoptimize_kprobe(op);
@@ -637,7 +626,7 @@
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
+static void unoptimize_kprobe(struct kprobe *p, bool force)
 {
 	struct optimized_kprobe *op;
 
@@ -697,7 +686,7 @@
 }
 
 /* Remove optimized instructions */
-static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+static void kill_optimized_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -723,7 +712,7 @@
 }
 
 /* Try to prepare optimized instructions */
-static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+static void prepare_optimized_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -732,7 +721,7 @@
 }
 
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -747,13 +736,13 @@
 	return &op->kp;
 }
 
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
 
 /*
  * Prepare an optimized_kprobe and optimize it
  * NOTE: p must be a normal registered kprobe
  */
-static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+static void try_to_optimize_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap;
 	struct optimized_kprobe *op;
@@ -787,7 +776,7 @@
 }
 
 #ifdef CONFIG_SYSCTL
-static void __kprobes optimize_all_kprobes(void)
+static void optimize_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -810,7 +799,7 @@
 	mutex_unlock(&kprobe_mutex);
 }
 
-static void __kprobes unoptimize_all_kprobes(void)
+static void unoptimize_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -861,7 +850,7 @@
 #endif /* CONFIG_SYSCTL */
 
 /* Put a breakpoint for a probe. Must be called with text_mutex locked */
-static void __kprobes __arm_kprobe(struct kprobe *p)
+static void __arm_kprobe(struct kprobe *p)
 {
 	struct kprobe *_p;
 
@@ -876,7 +865,7 @@
 }
 
 /* Remove the breakpoint of a probe. Must be called with text_mutex locked */
-static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
+static void __disarm_kprobe(struct kprobe *p, bool reopt)
 {
 	struct kprobe *_p;
 
@@ -911,13 +900,13 @@
 	BUG_ON(kprobe_unused(ap));
 }
 
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
 	arch_remove_kprobe(p);
 	kfree(p);
 }
 
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
 	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
 }
@@ -931,7 +920,7 @@
 static int kprobe_ftrace_enabled;
 
 /* Must ensure p->addr is really on ftrace */
-static int __kprobes prepare_kprobe(struct kprobe *p)
+static int prepare_kprobe(struct kprobe *p)
 {
 	if (!kprobe_ftrace(p))
 		return arch_prepare_kprobe(p);
@@ -940,7 +929,7 @@
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
+static void arm_kprobe_ftrace(struct kprobe *p)
 {
 	int ret;
 
@@ -955,7 +944,7 @@
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
+static void disarm_kprobe_ftrace(struct kprobe *p)
 {
 	int ret;
 
@@ -975,7 +964,7 @@
 #endif
 
 /* Arm a kprobe with text_mutex */
-static void __kprobes arm_kprobe(struct kprobe *kp)
+static void arm_kprobe(struct kprobe *kp)
 {
 	if (unlikely(kprobe_ftrace(kp))) {
 		arm_kprobe_ftrace(kp);
@@ -992,7 +981,7 @@
 }
 
 /* Disarm a kprobe with text_mutex */
-static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
+static void disarm_kprobe(struct kprobe *kp, bool reopt)
 {
 	if (unlikely(kprobe_ftrace(kp))) {
 		disarm_kprobe_ftrace(kp);
@@ -1008,7 +997,7 @@
  * Aggregate handlers for multiple kprobes support - these handlers
  * take care of invoking the individual kprobe handlers on p->list
  */
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *kp;
 
@@ -1022,9 +1011,10 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(aggr_pre_handler);
 
-static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
-					unsigned long flags)
+static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+			      unsigned long flags)
 {
 	struct kprobe *kp;
 
@@ -1036,9 +1026,10 @@
 		}
 	}
 }
+NOKPROBE_SYMBOL(aggr_post_handler);
 
-static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
-					int trapnr)
+static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
+			      int trapnr)
 {
 	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 
@@ -1052,8 +1043,9 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(aggr_fault_handler);
 
-static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 	int ret = 0;
@@ -1065,9 +1057,10 @@
 	reset_kprobe_instance();
 	return ret;
 }
+NOKPROBE_SYMBOL(aggr_break_handler);
 
 /* Walks the list and increments nmissed count for multiprobe case */
-void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
+void kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
 	if (!kprobe_aggrprobe(p)) {
@@ -1078,9 +1071,10 @@
 	}
 	return;
 }
+NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
 
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
-				struct hlist_head *head)
+void recycle_rp_inst(struct kretprobe_instance *ri,
+		     struct hlist_head *head)
 {
 	struct kretprobe *rp = ri->rp;
 
@@ -1095,8 +1089,9 @@
 		/* Unregistering */
 		hlist_add_head(&ri->hlist, head);
 }
+NOKPROBE_SYMBOL(recycle_rp_inst);
 
-void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
+void kretprobe_hash_lock(struct task_struct *tsk,
 			 struct hlist_head **head, unsigned long *flags)
 __acquires(hlist_lock)
 {
@@ -1107,17 +1102,19 @@
 	hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
-static void __kprobes kretprobe_table_lock(unsigned long hash,
-	unsigned long *flags)
+static void kretprobe_table_lock(unsigned long hash,
+				 unsigned long *flags)
 __acquires(hlist_lock)
 {
 	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_lock);
 
-void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
-	unsigned long *flags)
+void kretprobe_hash_unlock(struct task_struct *tsk,
+			   unsigned long *flags)
 __releases(hlist_lock)
 {
 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -1126,14 +1123,16 @@
 	hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_unlock);
 
-static void __kprobes kretprobe_table_unlock(unsigned long hash,
-       unsigned long *flags)
+static void kretprobe_table_unlock(unsigned long hash,
+				   unsigned long *flags)
 __releases(hlist_lock)
 {
 	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_unlock);
 
 /*
  * This function is called from finish_task_switch when task tk becomes dead,
@@ -1141,7 +1140,7 @@
  * with this task. These left over instances represent probed functions
  * that have been called but will never return.
  */
-void __kprobes kprobe_flush_task(struct task_struct *tk)
+void kprobe_flush_task(struct task_struct *tk)
 {
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
@@ -1166,6 +1165,7 @@
 		kfree(ri);
 	}
 }
+NOKPROBE_SYMBOL(kprobe_flush_task);
 
 static inline void free_rp_inst(struct kretprobe *rp)
 {
@@ -1178,7 +1178,7 @@
 	}
 }
 
-static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+static void cleanup_rp_inst(struct kretprobe *rp)
 {
 	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
@@ -1197,12 +1197,13 @@
 	}
 	free_rp_inst(rp);
 }
+NOKPROBE_SYMBOL(cleanup_rp_inst);
 
 /*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
 */
-static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
+static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
@@ -1226,7 +1227,7 @@
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	/* Copy p's insn slot to ap */
 	copy_kprobe(p, ap);
@@ -1252,8 +1253,7 @@
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
-					  struct kprobe *p)
+static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 {
 	int ret = 0;
 	struct kprobe *ap = orig_p;
@@ -1324,25 +1324,29 @@
 	return ret;
 }
 
-static int __kprobes in_kprobes_functions(unsigned long addr)
+bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 {
-	struct kprobe_blackpoint *kb;
+	/* The __kprobes marked functions and entry code must not be probed */
+	return addr >= (unsigned long)__kprobes_text_start &&
+	       addr < (unsigned long)__kprobes_text_end;
+}
 
-	if (addr >= (unsigned long)__kprobes_text_start &&
-	    addr < (unsigned long)__kprobes_text_end)
-		return -EINVAL;
+static bool within_kprobe_blacklist(unsigned long addr)
+{
+	struct kprobe_blacklist_entry *ent;
+
+	if (arch_within_kprobe_blacklist(addr))
+		return true;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
 	 * fail any probe registration in the prohibited area
 	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		if (kb->start_addr) {
-			if (addr >= kb->start_addr &&
-			    addr < (kb->start_addr + kb->range))
-				return -EINVAL;
-		}
+	list_for_each_entry(ent, &kprobe_blacklist, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
 	}
-	return 0;
+
+	return false;
 }
 
 /*
@@ -1351,7 +1355,7 @@
  * This returns encoded errors if it fails to look up symbol or invalid
  * combination of parameters.
  */
-static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
+static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
 {
 	kprobe_opcode_t *addr = p->addr;
 
@@ -1374,7 +1378,7 @@
 }
 
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+static struct kprobe *__get_valid_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap, *list_p;
 
@@ -1406,8 +1410,8 @@
 	return ret;
 }
 
-static __kprobes int check_kprobe_address_safe(struct kprobe *p,
-					       struct module **probed_mod)
+static int check_kprobe_address_safe(struct kprobe *p,
+				     struct module **probed_mod)
 {
 	int ret = 0;
 	unsigned long ftrace_addr;
@@ -1433,7 +1437,7 @@
 
 	/* Ensure it is not in reserved area nor out of text */
 	if (!kernel_text_address((unsigned long) p->addr) ||
-	    in_kprobes_functions((unsigned long) p->addr) ||
+	    within_kprobe_blacklist((unsigned long) p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		ret = -EINVAL;
 		goto out;
@@ -1469,7 +1473,7 @@
 	return ret;
 }
 
-int __kprobes register_kprobe(struct kprobe *p)
+int register_kprobe(struct kprobe *p)
 {
 	int ret;
 	struct kprobe *old_p;
@@ -1531,7 +1535,7 @@
 EXPORT_SYMBOL_GPL(register_kprobe);
 
 /* Check if all probes on the aggrprobe are disabled */
-static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+static int aggr_kprobe_disabled(struct kprobe *ap)
 {
 	struct kprobe *kp;
 
@@ -1547,7 +1551,7 @@
 }
 
 /* Disable one kprobe: Make sure called under kprobe_mutex is locked */
-static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
 	struct kprobe *orig_p;
 
@@ -1574,7 +1578,7 @@
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
-static int __kprobes __unregister_kprobe_top(struct kprobe *p)
+static int __unregister_kprobe_top(struct kprobe *p)
 {
 	struct kprobe *ap, *list_p;
 
@@ -1631,7 +1635,7 @@
 	return 0;
 }
 
-static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+static void __unregister_kprobe_bottom(struct kprobe *p)
 {
 	struct kprobe *ap;
 
@@ -1647,7 +1651,7 @@
 	/* Otherwise, do nothing. */
 }
 
-int __kprobes register_kprobes(struct kprobe **kps, int num)
+int register_kprobes(struct kprobe **kps, int num)
 {
 	int i, ret = 0;
 
@@ -1665,13 +1669,13 @@
 }
 EXPORT_SYMBOL_GPL(register_kprobes);
 
-void __kprobes unregister_kprobe(struct kprobe *p)
+void unregister_kprobe(struct kprobe *p)
 {
 	unregister_kprobes(&p, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kprobe);
 
-void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+void unregister_kprobes(struct kprobe **kps, int num)
 {
 	int i;
 
@@ -1700,7 +1704,7 @@
 	return (unsigned long)entry;
 }
 
-int __kprobes register_jprobes(struct jprobe **jps, int num)
+int register_jprobes(struct jprobe **jps, int num)
 {
 	struct jprobe *jp;
 	int ret = 0, i;
@@ -1731,19 +1735,19 @@
 }
 EXPORT_SYMBOL_GPL(register_jprobes);
 
-int __kprobes register_jprobe(struct jprobe *jp)
+int register_jprobe(struct jprobe *jp)
 {
 	return register_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(register_jprobe);
 
-void __kprobes unregister_jprobe(struct jprobe *jp)
+void unregister_jprobe(struct jprobe *jp)
 {
 	unregister_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_jprobe);
 
-void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+void unregister_jprobes(struct jprobe **jps, int num)
 {
 	int i;
 
@@ -1768,8 +1772,7 @@
  * This kprobe pre_handler is registered with every kretprobe. When probe
  * hits it will set up the return probe.
  */
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-					   struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
 	unsigned long hash, flags = 0;
@@ -1807,8 +1810,9 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
 	int ret = 0;
 	struct kretprobe_instance *inst;
@@ -1861,7 +1865,7 @@
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
 	int ret = 0, i;
 
@@ -1879,13 +1883,13 @@
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
 	unregister_kretprobes(&rp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
 	int i;
 
@@ -1908,38 +1912,38 @@
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
 #else /* CONFIG_KRETPROBES */
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-					   struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
 #endif /* CONFIG_KRETPROBES */
 
 /* Set the kprobe gone and remove its instruction buffer. */
-static void __kprobes kill_kprobe(struct kprobe *p)
+static void kill_kprobe(struct kprobe *p)
 {
 	struct kprobe *kp;
 
@@ -1963,7 +1967,7 @@
 }
 
 /* Disable one kprobe */
-int __kprobes disable_kprobe(struct kprobe *kp)
+int disable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
 
@@ -1979,7 +1983,7 @@
 EXPORT_SYMBOL_GPL(disable_kprobe);
 
 /* Enable one kprobe */
-int __kprobes enable_kprobe(struct kprobe *kp)
+int enable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
 	struct kprobe *p;
@@ -2012,16 +2016,49 @@
 }
 EXPORT_SYMBOL_GPL(enable_kprobe);
 
-void __kprobes dump_kprobe(struct kprobe *kp)
+void dump_kprobe(struct kprobe *kp)
 {
 	printk(KERN_WARNING "Dumping kprobe:\n");
 	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
 	       kp->symbol_name, kp->addr, kp->offset);
 }
+NOKPROBE_SYMBOL(dump_kprobe);
+
+/*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+static int __init populate_kprobe_blacklist(unsigned long *start,
+					     unsigned long *end)
+{
+	unsigned long *iter;
+	struct kprobe_blacklist_entry *ent;
+	unsigned long offset = 0, size = 0;
+
+	for (iter = start; iter < end; iter++) {
+		if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
+			pr_err("Failed to find blacklist %p\n", (void *)*iter);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			return -ENOMEM;
+		ent->start_addr = *iter;
+		ent->end_addr = *iter + size;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &kprobe_blacklist);
+	}
+	return 0;
+}
 
 /* Module notifier call back, checking kprobes on the module */
-static int __kprobes kprobes_module_callback(struct notifier_block *nb,
-					     unsigned long val, void *data)
+static int kprobes_module_callback(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct module *mod = data;
 	struct hlist_head *head;
@@ -2062,14 +2099,13 @@
 	.priority = 0
 };
 
+/* Markers of _kprobe_blacklist section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
-	unsigned long offset = 0, size = 0;
-	char *modname, namebuf[KSYM_NAME_LEN];
-	const char *symbol_name;
-	void *addr;
-	struct kprobe_blackpoint *kb;
 
 	/* FIXME allocate the probe table, currently defined statically */
 	/* initialize all list heads */
@@ -2079,26 +2115,11 @@
 		raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
-	/*
-	 * Lookup and populate the kprobe_blacklist.
-	 *
-	 * Unlike the kretprobe blacklist, we'll need to determine
-	 * the range of addresses that belong to the said functions,
-	 * since a kprobe need not necessarily be at the beginning
-	 * of a function.
-	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		kprobe_lookup_name(kb->name, addr);
-		if (!addr)
-			continue;
-
-		kb->start_addr = (unsigned long)addr;
-		symbol_name = kallsyms_lookup(kb->start_addr,
-				&size, &offset, &modname, namebuf);
-		if (!symbol_name)
-			kb->range = 0;
-		else
-			kb->range = size;
+	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+					__stop_kprobe_blacklist);
+	if (err) {
+		pr_err("kprobes: failed to populate blacklist: %d\n", err);
+		pr_err("Please take care of using kprobes.\n");
 	}
 
 	if (kretprobe_blacklist_size) {
@@ -2138,7 +2159,7 @@
 }
 
 #ifdef CONFIG_DEBUG_FS
-static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
+static void report_probe(struct seq_file *pi, struct kprobe *p,
 		const char *sym, int offset, char *modname, struct kprobe *pp)
 {
 	char *kprobe_type;
@@ -2167,12 +2188,12 @@
 		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
 }
 
-static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
+static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
 {
 	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
 }
 
-static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
+static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	(*pos)++;
 	if (*pos >= KPROBE_TABLE_SIZE)
@@ -2180,12 +2201,12 @@
 	return pos;
 }
 
-static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
+static void kprobe_seq_stop(struct seq_file *f, void *v)
 {
 	/* Nothing to do */
 }
 
-static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
+static int show_kprobe_addr(struct seq_file *pi, void *v)
 {
 	struct hlist_head *head;
 	struct kprobe *p, *kp;
@@ -2216,7 +2237,7 @@
 	.show  = show_kprobe_addr
 };
 
-static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
+static int kprobes_open(struct inode *inode, struct file *filp)
 {
 	return seq_open(filp, &kprobes_seq_ops);
 }
@@ -2228,7 +2249,47 @@
 	.release        = seq_release,
 };
 
-static void __kprobes arm_all_kprobes(void)
+/* kprobes/blacklist -- shows which functions can not be probed */
+static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
+{
+	return seq_list_start(&kprobe_blacklist, *pos);
+}
+
+static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &kprobe_blacklist, pos);
+}
+
+static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
+{
+	struct kprobe_blacklist_entry *ent =
+		list_entry(v, struct kprobe_blacklist_entry, list);
+
+	seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
+		   (void *)ent->end_addr, (void *)ent->start_addr);
+	return 0;
+}
+
+static const struct seq_operations kprobe_blacklist_seq_ops = {
+	.start = kprobe_blacklist_seq_start,
+	.next  = kprobe_blacklist_seq_next,
+	.stop  = kprobe_seq_stop,	/* Reuse void function */
+	.show  = kprobe_blacklist_seq_show,
+};
+
+static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &kprobe_blacklist_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_blacklist_ops = {
+	.open           = kprobe_blacklist_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static void arm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -2256,7 +2317,7 @@
 	return;
 }
 
-static void __kprobes disarm_all_kprobes(void)
+static void disarm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -2340,7 +2401,7 @@
 	.llseek =	default_llseek,
 };
 
-static int __kprobes debugfs_kprobe_init(void)
+static int __init debugfs_kprobe_init(void)
 {
 	struct dentry *dir, *file;
 	unsigned int value = 1;
@@ -2351,19 +2412,24 @@
 
 	file = debugfs_create_file("list", 0444, dir, NULL,
 				&debugfs_kprobes_operations);
-	if (!file) {
-		debugfs_remove(dir);
-		return -ENOMEM;
-	}
+	if (!file)
+		goto error;
 
 	file = debugfs_create_file("enabled", 0600, dir,
 					&value, &fops_kp);
-	if (!file) {
-		debugfs_remove(dir);
-		return -ENOMEM;
-	}
+	if (!file)
+		goto error;
+
+	file = debugfs_create_file("blacklist", 0444, dir, NULL,
+				&debugfs_kprobe_blacklist_ops);
+	if (!file)
+		goto error;
 
 	return 0;
+
+error:
+	debugfs_remove(dir);
+	return -ENOMEM;
 }
 
 late_initcall(debugfs_kprobe_init);

diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4..8541bfd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile

@@ -24,4 +24,5 @@
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
+obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o

diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 0000000..fb5b8ac
--- /dev/null
+++ b/kernel/locking/qrwlock.c

@@ -0,0 +1,133 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#include <linux/smp.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <asm/qrwlock.h>
+
+/**
+ * rspin_until_writer_unlock - inc reader count & spin until writer is gone
+ * @lock  : Pointer to queue rwlock structure
+ * @writer: Current queue rwlock writer status byte
+ *
+ * In interrupt context or at the head of the queue, the reader will just
+ * increment the reader count & wait until the writer releases the lock.
+ */
+static __always_inline void
+rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
+{
+	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
+		arch_mutex_cpu_relax();
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+	}
+}
+
+/**
+ * queue_read_lock_slowpath - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+void queue_read_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/*
+	 * Readers come here when they cannot get the lock without waiting
+	 */
+	if (unlikely(in_interrupt())) {
+		/*
+		 * Readers in interrupt context will spin until the lock is
+		 * available without waiting in the queue.
+		 */
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		rspin_until_writer_unlock(lock, cnts);
+		return;
+	}
+	atomic_sub(_QR_BIAS, &lock->cnts);
+
+	/*
+	 * Put the reader into the wait queue
+	 */
+	arch_spin_lock(&lock->lock);
+
+	/*
+	 * At the head of the wait queue now, wait until the writer state
+	 * goes to 0 and then try to increment the reader count and get
+	 * the lock. It is possible that an incoming writer may steal the
+	 * lock in the interim, so it is necessary to check the writer byte
+	 * to make sure that the write lock isn't taken.
+	 */
+	while (atomic_read(&lock->cnts) & _QW_WMASK)
+		arch_mutex_cpu_relax();
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	rspin_until_writer_unlock(lock, cnts);
+
+	/*
+	 * Signal the next one in queue to become queue head
+	 */
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_read_lock_slowpath);
+
+/**
+ * queue_write_lock_slowpath - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+void queue_write_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/* Put the writer into the wait queue */
+	arch_spin_lock(&lock->lock);
+
+	/* Try to acquire the lock directly if no reader is present */
+	if (!atomic_read(&lock->cnts) &&
+	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+		goto unlock;
+
+	/*
+	 * Set the waiting flag to notify readers that a writer is pending,
+	 * or wait for a previous writer to go away.
+	 */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if (!(cnts & _QW_WMASK) &&
+		    (atomic_cmpxchg(&lock->cnts, cnts,
+				    cnts | _QW_WAITING) == cnts))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+
+	/* When no more readers, set the locked flag */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if ((cnts == _QW_WAITING) &&
+		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
+				    _QW_LOCKED) == _QW_WAITING))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+unlock:
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_write_lock_slowpath);

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff..dacc321 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c

@@ -5,11 +5,17 @@
  *
  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
  * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched/rt.h>
+
+#include "mcs_spinlock.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
+#ifdef CONFIG_SMP
+	sem->owner = NULL;
+	sem->osq = NULL;
+#endif
 }
 
 EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@
 }
 
 /*
- * wait for the read lock to be granted
+ * Wait for the read lock to be granted
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@
 	return sem;
 }
 
+static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
+{
+	if (!(count & RWSEM_ACTIVE_MASK)) {
+		/* try acquiring the write lock */
+		if (sem->count == RWSEM_WAITING_BIAS &&
+		    cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+			    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
+			if (!list_is_singular(&sem->wait_list))
+				rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef CONFIG_SMP
 /*
- * wait until we successfully acquire the write lock
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+	long old, count = ACCESS_ONCE(sem->count);
+
+	while (true) {
+		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
+			return false;
+
+		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+		if (old == count)
+			return true;
+
+		count = old;
+	}
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool on_cpu = true;
+
+	if (need_resched())
+		return 0;
+
+	rcu_read_lock();
+	owner = ACCESS_ONCE(sem->owner);
+	if (owner)
+		on_cpu = owner->on_cpu;
+	rcu_read_unlock();
+
+	/*
+	 * If sem->owner is not set, the rwsem owner may have
+	 * just acquired it and not set the owner yet or the rwsem
+	 * has been released.
+	 */
+	return on_cpu;
+}
+
+static inline bool owner_running(struct rw_semaphore *sem,
+				 struct task_struct *owner)
+{
+	if (sem->owner != owner)
+		return false;
+
+	/*
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * sem->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
+	 */
+	barrier();
+
+	return owner->on_cpu;
+}
+
+static noinline
+bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+{
+	rcu_read_lock();
+	while (owner_running(sem, owner)) {
+		if (need_resched())
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+	rcu_read_unlock();
+
+	/*
+	 * We break out the loop above on need_resched() or when the
+	 * owner changed, which is a sign for heavy contention. Return
+	 * success only when sem->owner is NULL.
+	 */
+	return sem->owner == NULL;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool taken = false;
+
+	preempt_disable();
+
+	/* sem->wait_lock should not be held when doing optimistic spinning */
+	if (!rwsem_can_spin_on_owner(sem))
+		goto done;
+
+	if (!osq_lock(&sem->osq))
+		goto done;
+
+	while (true) {
+		owner = ACCESS_ONCE(sem->owner);
+		if (owner && !rwsem_spin_on_owner(sem, owner))
+			break;
+
+		/* wait_lock will be acquired if write_lock is obtained */
+		if (rwsem_try_write_lock_unqueued(sem)) {
+			taken = true;
+			break;
+		}
+
+		/*
+		 * When there's no owner, we might have preempted between the
+		 * owner acquiring the lock and setting the owner field. If
+		 * we're an RT task that will live-lock because we won't let
+		 * the owner complete.
+		 */
+		if (!owner && (need_resched() || rt_task(current)))
+			break;
+
+		/*
+		 * The cpu_relax() call is a compiler barrier which forces
+		 * everything in this loop to be re-loaded. We don't need
+		 * memory barriers as we'll eventually observe the right
+		 * values at the cost of a few extra spins.
+		 */
+		arch_mutex_cpu_relax();
+	}
+	osq_unlock(&sem->osq);
+done:
+	preempt_enable();
+	return taken;
+}
+
+#else
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	return false;
+}
+#endif
+
+/*
+ * Wait until we successfully acquire the write lock
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-	long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
+	long count;
+	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
 
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
+	/* undo write bias from down_write operation, stop active locking */
+	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+
+	/* do optimistic spinning and steal lock if possible */
+	if (rwsem_optimistic_spin(sem))
+		return sem;
+
+	/*
+	 * Optimistic spinning failed, proceed to the slowpath
+	 * and block until we can acquire the sem.
+	 */
+	waiter.task = current;
 	waiter.type = RWSEM_WAITING_FOR_WRITE;
 
 	raw_spin_lock_irq(&sem->wait_lock);
+
+	/* account for this before adding a new element to the list */
 	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
+		waiting = false;
+
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	if (waiting) {
+		count = ACCESS_ONCE(sem->count);
 
-	/* If there were already threads queued before us and there are no
-	 * active writers, the lock must be read owned; so we try to wake
-	 * any read locks that were queued ahead of us. */
-	if (count > RWSEM_WAITING_BIAS &&
-	    adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+		/*
+		 * If there were already threads queued before us and there are
+		 * no active writers, the lock must be read owned; so we try to
+		 * wake any read locks that were queued ahead of us.
+		 */
+		if (count > RWSEM_WAITING_BIAS)
+			sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+
+	} else
+		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_UNINTERRUPTIBLE);
 	while (true) {
-		if (!(count & RWSEM_ACTIVE_MASK)) {
-			/* Try acquiring the write lock. */
-			count = RWSEM_ACTIVE_WRITE_BIAS;
-			if (!list_is_singular(&sem->wait_list))
-				count += RWSEM_WAITING_BIAS;
-
-			if (sem->count == RWSEM_WAITING_BIAS &&
-			    cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-							RWSEM_WAITING_BIAS)
-				break;
-		}
-
+		if (rwsem_try_write_lock(count, sem))
+			break;
 		raw_spin_unlock_irq(&sem->wait_lock);
 
 		/* Block until there are no active lockers. */
 		do {
 			schedule();
-			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_current_state(TASK_UNINTERRUPTIBLE);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
+	__set_current_state(TASK_RUNNING);
 
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
-	tsk->state = TASK_RUNNING;
 
 	return sem;
 }

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff143..42f806d 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c

@@ -12,6 +12,27 @@
 
 #include <linux/atomic.h>
 
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+	sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+	sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
+
 /*
  * lock for reading
  */
@@ -48,6 +69,7 @@
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@
 {
 	int ret = __down_write_trylock(sem);
 
-	if (ret == 1)
+	if (ret == 1) {
 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
+		rwsem_set_owner(sem);
+	}
+
 	return ret;
 }
 
@@ -85,6 +110,7 @@
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 
+	rwsem_clear_owner(sem);
 	__up_write(sem);
 }
 
@@ -99,6 +125,7 @@
 	 * lockdep: a downgraded write will live on as a write
 	 * dependency.
 	 */
+	rwsem_clear_owner(sem);
 	__downgrade_write(sem);
 }
 
@@ -122,6 +149,7 @@
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write_nested);

diff --git a/kernel/module.c b/kernel/module.c
index 079c461..81e727c 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -3020,21 +3020,6 @@
 	 */
 	current->flags &= ~PF_USED_ASYNC;
 
-	blocking_notifier_call_chain(&module_notify_list,
-			MODULE_STATE_COMING, mod);
-
-	/* Set RO and NX regions for core */
-	set_section_ro_nx(mod->module_core,
-				mod->core_text_size,
-				mod->core_ro_size,
-				mod->core_size);
-
-	/* Set RO and NX regions for init */
-	set_section_ro_nx(mod->module_init,
-				mod->init_text_size,
-				mod->init_ro_size,
-				mod->init_size);
-
 	do_mod_ctors(mod);
 	/* Start the module */
 	if (mod->init != NULL)
@@ -3165,9 +3150,26 @@
 	/* This relies on module_mutex for list integrity. */
 	module_bug_finalize(info->hdr, info->sechdrs, mod);
 
+	/* Set RO and NX regions for core */
+	set_section_ro_nx(mod->module_core,
+				mod->core_text_size,
+				mod->core_ro_size,
+				mod->core_size);
+
+	/* Set RO and NX regions for init */
+	set_section_ro_nx(mod->module_init,
+				mod->init_text_size,
+				mod->init_ro_size,
+				mod->init_size);
+
 	/* Mark state as coming so strong_try_module_get() ignores us,
 	 * but kallsyms etc. can see us. */
 	mod->state = MODULE_STATE_COMING;
+	mutex_unlock(&module_mutex);
+
+	blocking_notifier_call_chain(&module_notify_list,
+				     MODULE_STATE_COMING, mod);
+	return 0;
 
 out:
 	mutex_unlock(&module_mutex);
@@ -3190,6 +3192,7 @@
 {
 	struct module *mod;
 	long err;
+	char *after_dashes;
 
 	err = module_sig_check(info);
 	if (err)
@@ -3277,10 +3280,15 @@
 		goto ddebug_cleanup;
 
 	/* Module is ready to execute: parsing args may do that. */
-	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-			 -32768, 32767, unknown_module_param_cb);
-	if (err < 0)
+	after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+				  -32768, 32767, unknown_module_param_cb);
+	if (IS_ERR(after_dashes)) {
+		err = PTR_ERR(after_dashes);
 		goto bug_cleanup;
+	} else if (after_dashes) {
+		pr_warn("%s: parameters '%s' after `--' ignored\n",
+		       mod->name, after_dashes);
+	}
 
 	/* Link in to syfs. */
 	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);

diff --git a/kernel/notifier.c b/kernel/notifier.c
index db4c8b0..4803da6 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c

@@ -71,9 +71,9 @@
  *	@returns:	notifier_call_chain returns the value returned by the
  *			last notifier function called.
  */
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-					unsigned long val, void *v,
-					int nr_to_call,	int *nr_calls)
+static int notifier_call_chain(struct notifier_block **nl,
+			       unsigned long val, void *v,
+			       int nr_to_call, int *nr_calls)
 {
 	int ret = NOTIFY_DONE;
 	struct notifier_block *nb, *next_nb;
@@ -102,6 +102,7 @@
 	}
 	return ret;
 }
+NOKPROBE_SYMBOL(notifier_call_chain);
 
 /*
  *	Atomic notifier chain routines.  Registration and unregistration
@@ -172,9 +173,9 @@
  *	Otherwise the return value is the return value
  *	of the last notifier function called.
  */
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-					unsigned long val, void *v,
-					int nr_to_call, int *nr_calls)
+int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+				 unsigned long val, void *v,
+				 int nr_to_call, int *nr_calls)
 {
 	int ret;
 
@@ -184,13 +185,15 @@
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
 
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-		unsigned long val, void *v)
+int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+			       unsigned long val, void *v)
 {
 	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
 }
 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(atomic_notifier_call_chain);
 
 /*
  *	Blocking notifier chain routines.  All access to the chain is
@@ -527,7 +530,7 @@
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notrace __kprobes notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
@@ -540,6 +543,7 @@
 	};
 	return atomic_notifier_call_chain(&die_chain, val, &args);
 }
+NOKPROBE_SYMBOL(notify_die);
 
 int register_die_notifier(struct notifier_block *nb)
 {

diff --git a/kernel/params.c b/kernel/params.c
index b00142e..1e52ca2 100644
--- a/kernel/params.c
+++ b/kernel/params.c

@@ -177,13 +177,13 @@
 }
 
 /* Args looks like "foo=bar,bar2 baz=fuz wiz". */
-int parse_args(const char *doing,
-	       char *args,
-	       const struct kernel_param *params,
-	       unsigned num,
-	       s16 min_level,
-	       s16 max_level,
-	       int (*unknown)(char *param, char *val, const char *doing))
+char *parse_args(const char *doing,
+		 char *args,
+		 const struct kernel_param *params,
+		 unsigned num,
+		 s16 min_level,
+		 s16 max_level,
+		 int (*unknown)(char *param, char *val, const char *doing))
 {
 	char *param, *val;
 
@@ -198,6 +198,9 @@
 		int irq_was_disabled;
 
 		args = next_arg(args, &param, &val);
+		/* Stop at -- */
+		if (!val && strcmp(param, "--") == 0)
+			return args;
 		irq_was_disabled = irqs_disabled();
 		ret = parse_one(param, val, doing, params, num,
 				min_level, max_level, unknown);
@@ -208,22 +211,22 @@
 		switch (ret) {
 		case -ENOENT:
 			pr_err("%s: Unknown parameter `%s'\n", doing, param);
-			return ret;
+			return ERR_PTR(ret);
 		case -ENOSPC:
 			pr_err("%s: `%s' too large for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		case 0:
 			break;
 		default:
 			pr_err("%s: `%s' invalid for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		}
 	}
 
 	/* All parsed OK. */
-	return 0;
+	return NULL;
 }
 
 /* Lazy bastard, eh? */

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index df88d55..49e0a20 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c

@@ -28,6 +28,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
@@ -292,7 +293,9 @@
 
 	in_suspend = 1;
 	save_processor_state();
+	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
 	error = swsusp_arch_suspend();
+	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
 		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
 			error);

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 06ec886..0ca8d83 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c

@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/kmod.h>
+#include <trace/events/power.h>
 
 /* 
  * Timeout for stopping processes
@@ -175,6 +176,7 @@
 	struct task_struct *g, *p;
 	struct task_struct *curr = current;
 
+	trace_suspend_resume(TPS("thaw_processes"), 0, true);
 	if (pm_freezing)
 		atomic_dec(&system_freezing_cnt);
 	pm_freezing = false;
@@ -201,6 +203,7 @@
 
 	schedule();
 	printk("done.\n");
+	trace_suspend_resume(TPS("thaw_processes"), 0, false);
 }
 
 void thaw_kernel_threads(void)

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 963e6d0..4dd8822 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c

@@ -177,7 +177,9 @@
 	if (error)
 		goto Finish;
 
+	trace_suspend_resume(TPS("freeze_processes"), 0, true);
 	error = suspend_freeze_processes();
+	trace_suspend_resume(TPS("freeze_processes"), 0, false);
 	if (!error)
 		return 0;
 
@@ -240,7 +242,9 @@
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
 		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
 		goto Platform_wake;
 	}
 
@@ -256,7 +260,11 @@
 	if (!error) {
 		*wakeup = pm_wakeup_pending();
 		if (!(suspend_test(TEST_CORE) || *wakeup)) {
+			trace_suspend_resume(TPS("machine_suspend"),
+				state, true);
 			error = suspend_ops->enter(state);
+			trace_suspend_resume(TPS("machine_suspend"),
+				state, false);
 			events_check_enabled = false;
 		}
 		syscore_resume();
@@ -294,7 +302,6 @@
 	if (need_suspend_ops(state) && !suspend_ops)
 		return -ENOSYS;
 
-	trace_machine_suspend(state);
 	if (need_suspend_ops(state) && suspend_ops->begin) {
 		error = suspend_ops->begin(state);
 		if (error)
@@ -331,7 +338,6 @@
 	else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
 		freeze_ops->end();
 
-	trace_machine_suspend(PWR_EVENT_EXIT);
 	return error;
 
  Recover_platform:
@@ -365,6 +371,7 @@
 {
 	int error;
 
+	trace_suspend_resume(TPS("suspend_enter"), state, true);
 	if (state == PM_SUSPEND_FREEZE) {
 #ifdef CONFIG_PM_DEBUG
 		if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
@@ -382,9 +389,11 @@
 	if (state == PM_SUSPEND_FREEZE)
 		freeze_begin();
 
+	trace_suspend_resume(TPS("sync_filesystems"), 0, true);
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
 	printk("done.\n");
+	trace_suspend_resume(TPS("sync_filesystems"), 0, false);
 
 	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label);
 	error = suspend_prepare(state);
@@ -394,6 +403,7 @@
 	if (suspend_test(TEST_FREEZER))
 		goto Finish;
 
+	trace_suspend_resume(TPS("suspend_enter"), state, false);
 	pr_debug("PM: Entering %s sleep\n", pm_states[state].label);
 	pm_restrict_gfp_mask();
 	error = suspend_devices_and_enter(state);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 48e78b6..3bdf01b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -535,7 +535,7 @@
  	__old;								\
 })
 
-#ifdef TIF_POLLING_NRFLAG
+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
  * this avoids any races wrt polling state changes and thereby avoids
@@ -546,12 +546,44 @@
 	struct thread_info *ti = task_thread_info(p);
 	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
 }
+
+/*
+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
+ *
+ * If this returns true, then the idle task promises to call
+ * sched_ttwu_pending() and reschedule soon.
+ */
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	struct thread_info *ti = task_thread_info(p);
+	typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+
+	for (;;) {
+		if (!(val & _TIF_POLLING_NRFLAG))
+			return false;
+		if (val & _TIF_NEED_RESCHED)
+			return true;
+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
+		if (old == val)
+			break;
+		val = old;
+	}
+	return true;
+}
+
 #else
 static bool set_nr_and_not_polling(struct task_struct *p)
 {
 	set_tsk_need_resched(p);
 	return true;
 }
+
+#ifdef CONFIG_SMP
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	return false;
+}
+#endif
 #endif
 
 /*
@@ -580,6 +612,8 @@
 
 	if (set_nr_and_not_polling(p))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 void resched_cpu(int cpu)
@@ -642,27 +676,10 @@
 	if (cpu == smp_processor_id())
 		return;
 
-	/*
-	 * This is safe, as this function is called with the timer
-	 * wheel base lock of (cpu) held. When the CPU is on the way
-	 * to idle and has not yet set rq->curr to idle then it will
-	 * be serialized on the timer wheel base lock and take the new
-	 * timer into account automatically.
-	 */
-	if (rq->curr != rq->idle)
-		return;
-
-	/*
-	 * We can set TIF_RESCHED on the idle task of the other CPU
-	 * lockless. The worst case is that the other CPU runs the
-	 * idle task through an additional NOOP schedule()
-	 */
-	set_tsk_need_resched(rq->idle);
-
-	/* NEED_RESCHED must be visible before we test polling */
-	smp_mb();
-	if (!tsk_is_polling(rq->idle))
+	if (set_nr_and_not_polling(rq->idle))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 static bool wake_up_full_nohz_cpu(int cpu)
@@ -888,7 +905,7 @@
 	rq->clock_task += delta;
 
 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
-	if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		sched_rt_avg_update(rq, irq_delta + steal);
 #endif
 }
@@ -1521,13 +1538,17 @@
 }
 
 #ifdef CONFIG_SMP
-static void sched_ttwu_pending(void)
+void sched_ttwu_pending(void)
 {
 	struct rq *rq = this_rq();
 	struct llist_node *llist = llist_del_all(&rq->wake_list);
 	struct task_struct *p;
+	unsigned long flags;
 
-	raw_spin_lock(&rq->lock);
+	if (!llist)
+		return;
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
 
 	while (llist) {
 		p = llist_entry(llist, struct task_struct, wake_entry);
@@ -1535,7 +1556,7 @@
 		ttwu_do_activate(rq, p, 0);
 	}
 
-	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
 void scheduler_ipi(void)
@@ -1581,8 +1602,14 @@
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
 {
-	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
-		smp_send_reschedule(cpu);
+	struct rq *rq = cpu_rq(cpu);
+
+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
+		if (!set_nr_if_polling(rq->idle))
+			smp_send_reschedule(cpu);
+		else
+			trace_sched_wake_idle_without_ipi(cpu);
+	}
 }
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
@@ -2527,7 +2554,7 @@
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
-void __kprobes preempt_count_add(int val)
+void preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2553,8 +2580,9 @@
 	}
 }
 EXPORT_SYMBOL(preempt_count_add);
+NOKPROBE_SYMBOL(preempt_count_add);
 
-void __kprobes preempt_count_sub(int val)
+void preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2575,6 +2603,7 @@
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
+NOKPROBE_SYMBOL(preempt_count_sub);
 
 #endif
 
@@ -2857,6 +2886,7 @@
 		barrier();
 	} while (need_resched());
 }
+NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif /* CONFIG_PREEMPT */
 
@@ -4216,7 +4246,7 @@
  *	false (0) if we failed to boost the target.
  *	-ESRCH if there's no task to yield to.
  */
-bool __sched yield_to(struct task_struct *p, bool preempt)
+int __sched yield_to(struct task_struct *p, bool preempt)
 {
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
@@ -5242,14 +5272,13 @@
 		}
 
 		/*
-		 * Even though we initialize ->power to something semi-sane,
-		 * we leave power_orig unset. This allows us to detect if
+		 * Even though we initialize ->capacity to something semi-sane,
+		 * we leave capacity_orig unset. This allows us to detect if
 		 * domain iteration is still funny without causing /0 traps.
 		 */
-		if (!group->sgp->power_orig) {
+		if (!group->sgc->capacity_orig) {
 			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_power not "
-					"set\n");
+			printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
 			break;
 		}
 
@@ -5271,9 +5300,9 @@
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgp->power != SCHED_POWER_SCALE) {
-			printk(KERN_CONT " (cpu_power = %d)",
-				group->sgp->power);
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
+			printk(KERN_CONT " (cpu_capacity = %d)",
+				group->sgc->capacity);
 		}
 
 		group = group->next;
@@ -5331,7 +5360,7 @@
 			 SD_BALANCE_NEWIDLE |
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
-			 SD_SHARE_CPUPOWER |
+			 SD_SHARE_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
 			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
@@ -5362,7 +5391,7 @@
 				SD_BALANCE_NEWIDLE |
 				SD_BALANCE_FORK |
 				SD_BALANCE_EXEC |
-				SD_SHARE_CPUPOWER |
+				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
 				SD_SHARE_POWERDOMAIN);
@@ -5487,7 +5516,7 @@
 	return rd;
 }
 
-static void free_sched_groups(struct sched_group *sg, int free_sgp)
+static void free_sched_groups(struct sched_group *sg, int free_sgc)
 {
 	struct sched_group *tmp, *first;
 
@@ -5498,8 +5527,8 @@
 	do {
 		tmp = sg->next;
 
-		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
-			kfree(sg->sgp);
+		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
+			kfree(sg->sgc);
 
 		kfree(sg);
 		sg = tmp;
@@ -5517,7 +5546,7 @@
 	if (sd->flags & SD_OVERLAP) {
 		free_sched_groups(sd->groups, 1);
 	} else if (atomic_dec_and_test(&sd->groups->ref)) {
-		kfree(sd->groups->sgp);
+		kfree(sd->groups->sgc);
 		kfree(sd->groups);
 	}
 	kfree(sd);
@@ -5728,17 +5757,17 @@
 
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
-		if (atomic_inc_return(&sg->sgp->ref) == 1)
+		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
+		if (atomic_inc_return(&sg->sgc->ref) == 1)
 			build_group_mask(sd, sg);
 
 		/*
-		 * Initialize sgp->power such that even if we mess up the
+		 * Initialize sgc->capacity such that even if we mess up the
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
-		sg->sgp->power_orig = sg->sgp->power;
+		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -5776,8 +5805,8 @@
 
 	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
-		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+		atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
 	}
 
 	return cpu;
@@ -5786,7 +5815,7 @@
 /*
  * build_sched_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
+ * and ->cpu_capacity to 0.
  *
  * Assumes the sched_domain tree is fully constructed
  */
@@ -5840,16 +5869,16 @@
 }
 
 /*
- * Initialize sched groups cpu_power.
+ * Initialize sched groups cpu_capacity.
  *
- * cpu_power indicates the capacity of sched group, which is used while
+ * cpu_capacity indicates the capacity of sched group, which is used while
  * distributing the load between different sched groups in a sched domain.
- * Typically cpu_power for all the groups in a sched domain will be same unless
- * there are asymmetries in the topology. If there are asymmetries, group
- * having more cpu_power will pickup more load compared to the group having
- * less cpu_power.
+ * Typically cpu_capacity for all the groups in a sched domain will be same
+ * unless there are asymmetries in the topology. If there are asymmetries,
+ * group having more cpu_capacity will pickup more load compared to the
+ * group having less cpu_capacity.
  */
-static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
 
@@ -5863,8 +5892,8 @@
 	if (cpu != group_balance_cpu(sg))
 		return;
 
-	update_group_power(sd, cpu);
-	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
+	update_group_capacity(sd, cpu);
+	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -5955,8 +5984,8 @@
 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
-	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
-		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
+		*per_cpu_ptr(sdd->sgc, cpu) = NULL;
 }
 
 #ifdef CONFIG_NUMA
@@ -5969,7 +5998,7 @@
 /*
  * SD_flags allowed in topology descriptions.
  *
- * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_CPUCAPACITY      - describes SMT topologies
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
@@ -5978,7 +6007,7 @@
  * SD_ASYM_PACKING        - describes SMT quirks
  */
 #define TOPOLOGY_SD_FLAGS		\
-	(SD_SHARE_CPUPOWER |		\
+	(SD_SHARE_CPUCAPACITY |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
@@ -6024,7 +6053,7 @@
 					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
 					| 1*SD_WAKE_AFFINE
-					| 0*SD_SHARE_CPUPOWER
+					| 0*SD_SHARE_CPUCAPACITY
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 0*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
@@ -6046,7 +6075,7 @@
 	 * Convert topological properties into behaviour.
 	 */
 
-	if (sd->flags & SD_SHARE_CPUPOWER) {
+	if (sd->flags & SD_SHARE_CPUCAPACITY) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
@@ -6358,14 +6387,14 @@
 		if (!sdd->sg)
 			return -ENOMEM;
 
-		sdd->sgp = alloc_percpu(struct sched_group_power *);
-		if (!sdd->sgp)
+		sdd->sgc = alloc_percpu(struct sched_group_capacity *);
+		if (!sdd->sgc)
 			return -ENOMEM;
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -6383,12 +6412,12 @@
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
 
-			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
+			sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
-			if (!sgp)
+			if (!sgc)
 				return -ENOMEM;
 
-			*per_cpu_ptr(sdd->sgp, j) = sgp;
+			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
 
@@ -6415,15 +6444,15 @@
 
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
-			if (sdd->sgp)
-				kfree(*per_cpu_ptr(sdd->sgp, j));
+			if (sdd->sgc)
+				kfree(*per_cpu_ptr(sdd->sgc, j));
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
-		free_percpu(sdd->sgp);
-		sdd->sgp = NULL;
+		free_percpu(sdd->sgc);
+		sdd->sgc = NULL;
 	}
 }
 
@@ -6493,14 +6522,14 @@
 		}
 	}
 
-	/* Calculate CPU power for physical packages and nodes */
+	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			claim_allocations(i, sd);
-			init_sched_groups_power(i, sd);
+			init_sched_groups_capacity(i, sd);
 		}
 	}
 
@@ -6943,7 +6972,7 @@
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_power = SCHED_POWER_SCALE;
+		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
@@ -7669,7 +7698,7 @@
 static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css_parent(css));
+	struct task_group *parent = css_tg(css->parent);
 
 	if (parent)
 		sched_online_group(tg, parent);

diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index c143ee3..9cf350c 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c

@@ -46,7 +46,7 @@
 
 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
 {
-	return css_ca(css_parent(&ca->css));
+	return css_ca(ca->css.parent);
 }
 
 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2b8cbf0..fc4f98b1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -57,8 +57,6 @@
 	dl_b->dl_runtime = runtime;
 }
 
-extern unsigned long to_ratio(u64 period, u64 runtime);
-
 void init_dl_bw(struct dl_bw *dl_b)
 {
 	raw_spin_lock_init(&dl_b->lock);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9855e87..fea7d33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -1017,7 +1017,7 @@
 static unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long power_of(int cpu);
+static unsigned long capacity_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
 /* Cached statistics for all CPUs within a node */
@@ -1026,11 +1026,11 @@
 	unsigned long load;
 
 	/* Total compute capacity of CPUs on a node */
-	unsigned long power;
+	unsigned long compute_capacity;
 
 	/* Approximate capacity in terms of runnable tasks on a node */
-	unsigned long capacity;
-	int has_capacity;
+	unsigned long task_capacity;
+	int has_free_capacity;
 };
 
 /*
@@ -1046,7 +1046,7 @@
 
 		ns->nr_running += rq->nr_running;
 		ns->load += weighted_cpuload(cpu);
-		ns->power += power_of(cpu);
+		ns->compute_capacity += capacity_of(cpu);
 
 		cpus++;
 	}
@@ -1056,15 +1056,16 @@
 	 * the @ns structure is NULL'ed and task_numa_compare() will
 	 * not find this node attractive.
 	 *
-	 * We'll either bail at !has_capacity, or we'll detect a huge imbalance
-	 * and bail there.
+	 * We'll either bail at !has_free_capacity, or we'll detect a huge
+	 * imbalance and bail there.
 	 */
 	if (!cpus)
 		return;
 
-	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
-	ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
-	ns->has_capacity = (ns->nr_running < ns->capacity);
+	ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
+	ns->task_capacity =
+		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
+	ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
 }
 
 struct task_numa_env {
@@ -1195,8 +1196,8 @@
 
 	if (!cur) {
 		/* Is there capacity at our destination? */
-		if (env->src_stats.has_capacity &&
-		    !env->dst_stats.has_capacity)
+		if (env->src_stats.has_free_capacity &&
+		    !env->dst_stats.has_free_capacity)
 			goto unlock;
 
 		goto balance;
@@ -1213,7 +1214,7 @@
 	orig_dst_load = env->dst_stats.load;
 	orig_src_load = env->src_stats.load;
 
-	/* XXX missing power terms */
+	/* XXX missing capacity terms */
 	load = task_h_load(env->p);
 	dst_load = orig_dst_load + load;
 	src_load = orig_src_load - load;
@@ -1301,8 +1302,8 @@
 	groupimp = group_weight(p, env.dst_nid) - groupweight;
 	update_numa_stats(&env.dst_stats, env.dst_nid);
 
-	/* If the preferred nid has capacity, try to use it. */
-	if (env.dst_stats.has_capacity)
+	/* If the preferred nid has free capacity, try to use it. */
+	if (env.dst_stats.has_free_capacity)
 		task_numa_find_cpu(&env, taskimp, groupimp);
 
 	/* No space available on the preferred nid. Look elsewhere. */
@@ -3225,10 +3226,12 @@
 	 * has not truly expired.
 	 *
 	 * Fortunately we can check determine whether this the case by checking
-	 * whether the global deadline has advanced.
+	 * whether the global deadline has advanced. It is valid to compare
+	 * cfs_b->runtime_expires without any locks since we only care about
+	 * exact equality, so a partial write will still work.
 	 */
 
-	if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
+	if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
 		/* extend local deadline, drift is bounded above by 2 ticks */
 		cfs_rq->runtime_expires += TICK_NSEC;
 	} else {
@@ -3457,21 +3460,21 @@
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 {
 	u64 runtime, runtime_expires;
-	int idle = 1, throttled;
+	int throttled;
 
-	raw_spin_lock(&cfs_b->lock);
 	/* no need to continue the timer with no bandwidth constraint */
 	if (cfs_b->quota == RUNTIME_INF)
-		goto out_unlock;
+		goto out_deactivate;
 
 	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
-	/* idle depends on !throttled (for the case of a large deficit) */
-	idle = cfs_b->idle && !throttled;
 	cfs_b->nr_periods += overrun;
 
-	/* if we're going inactive then everything else can be deferred */
-	if (idle)
-		goto out_unlock;
+	/*
+	 * idle depends on !throttled (for the case of a large deficit), and if
+	 * we're going inactive then everything else can be deferred
+	 */
+	if (cfs_b->idle && !throttled)
+		goto out_deactivate;
 
 	/*
 	 * if we have relooped after returning idle once, we need to update our
@@ -3485,7 +3488,7 @@
 	if (!throttled) {
 		/* mark as potentially idle for the upcoming period */
 		cfs_b->idle = 1;
-		goto out_unlock;
+		return 0;
 	}
 
 	/* account preceding periods in which throttling occurred */
@@ -3525,12 +3528,12 @@
 	 * timer to remain active while there are any throttled entities.)
 	 */
 	cfs_b->idle = 0;
-out_unlock:
-	if (idle)
-		cfs_b->timer_active = 0;
-	raw_spin_unlock(&cfs_b->lock);
 
-	return idle;
+	return 0;
+
+out_deactivate:
+	cfs_b->timer_active = 0;
+	return 1;
 }
 
 /* a cfs_rq won't donate quota below this amount */
@@ -3707,6 +3710,7 @@
 	int overrun;
 	int idle = 0;
 
+	raw_spin_lock(&cfs_b->lock);
 	for (;;) {
 		now = hrtimer_cb_get_time(timer);
 		overrun = hrtimer_forward(timer, now, cfs_b->period);
@@ -3716,6 +3720,7 @@
 
 		idle = do_sched_cfs_period_timer(cfs_b, overrun);
 	}
+	raw_spin_unlock(&cfs_b->lock);
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
@@ -3775,8 +3780,6 @@
 	struct cfs_rq *cfs_rq;
 
 	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-
 		if (!cfs_rq->runtime_enabled)
 			continue;
 
@@ -3784,7 +3787,7 @@
 		 * clock_task is not advancing so we just need to make sure
 		 * there's some valid quota amount
 		 */
-		cfs_rq->runtime_remaining = cfs_b->quota;
+		cfs_rq->runtime_remaining = 1;
 		if (cfs_rq_throttled(cfs_rq))
 			unthrottle_cfs_rq(cfs_rq);
 	}
@@ -4041,9 +4044,9 @@
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long power_of(int cpu)
+static unsigned long capacity_of(int cpu)
 {
-	return cpu_rq(cpu)->cpu_power;
+	return cpu_rq(cpu)->cpu_capacity;
 }
 
 static unsigned long cpu_avg_load_per_task(int cpu)
@@ -4065,7 +4068,7 @@
 	 * about the boundary, really active task won't care
 	 * about the loss.
 	 */
-	if (jiffies > current->wakee_flip_decay_ts + HZ) {
+	if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
 		current->wakee_flips >>= 1;
 		current->wakee_flip_decay_ts = jiffies;
 	}
@@ -4286,12 +4289,12 @@
 		s64 this_eff_load, prev_eff_load;
 
 		this_eff_load = 100;
-		this_eff_load *= power_of(prev_cpu);
+		this_eff_load *= capacity_of(prev_cpu);
 		this_eff_load *= this_load +
 			effective_load(tg, this_cpu, weight, weight);
 
 		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
-		prev_eff_load *= power_of(this_cpu);
+		prev_eff_load *= capacity_of(this_cpu);
 		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
 
 		balanced = this_eff_load <= prev_eff_load;
@@ -4367,8 +4370,8 @@
 			avg_load += load;
 		}
 
-		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
+		/* Adjust by relative CPU capacity of the group */
+		avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -4948,14 +4951,14 @@
  *
  *   W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0               (3)
  *
- * P_i is the cpu power (or compute capacity) of cpu i, typically it is the
+ * C_i is the compute capacity of cpu i, typically it is the
  * fraction of 'recent' time available for SCHED_OTHER task execution. But it
  * can also include other factors [XXX].
  *
  * To achieve this balance we define a measure of imbalance which follows
  * directly from (1):
  *
- *   imb_i,j = max{ avg(W/P), W_i/P_i } - min{ avg(W/P), W_j/P_j }    (4)
+ *   imb_i,j = max{ avg(W/C), W_i/C_i } - min{ avg(W/C), W_j/C_j }    (4)
  *
  * We them move tasks around to minimize the imbalance. In the continuous
  * function space it is obvious this converges, in the discrete case we get
@@ -5530,13 +5533,13 @@
 	unsigned long group_load; /* Total load over the CPUs of the group */
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
-	unsigned long group_power;
+	unsigned long group_capacity;
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
-	unsigned int group_capacity;
+	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
 	unsigned int group_weight;
 	int group_imb; /* Is there an imbalance in the group ? */
-	int group_has_capacity; /* Is there extra capacity in the group? */
+	int group_has_free_capacity;
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -5551,7 +5554,7 @@
 	struct sched_group *busiest;	/* Busiest group in this sd */
 	struct sched_group *local;	/* Local group in this sd */
 	unsigned long total_load;	/* Total load of all groups in sd */
-	unsigned long total_pwr;	/* Total power of all groups in sd */
+	unsigned long total_capacity;	/* Total capacity of all groups in sd */
 	unsigned long avg_load;	/* Average load across all groups in sd */
 
 	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -5570,7 +5573,7 @@
 		.busiest = NULL,
 		.local = NULL,
 		.total_load = 0UL,
-		.total_pwr = 0UL,
+		.total_capacity = 0UL,
 		.busiest_stat = {
 			.avg_load = 0UL,
 		},
@@ -5605,17 +5608,17 @@
 	return load_idx;
 }
 
-static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
 {
-	return SCHED_POWER_SCALE;
+	return SCHED_CAPACITY_SCALE;
 }
 
-unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
-	return default_scale_freq_power(sd, cpu);
+	return default_scale_capacity(sd, cpu);
 }
 
-static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
 	unsigned long smt_gain = sd->smt_gain;
@@ -5625,12 +5628,12 @@
 	return smt_gain;
 }
 
-unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
-	return default_scale_smt_power(sd, cpu);
+	return default_scale_smt_capacity(sd, cpu);
 }
 
-static unsigned long scale_rt_power(int cpu)
+static unsigned long scale_rt_capacity(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	u64 total, available, age_stamp, avg;
@@ -5650,71 +5653,71 @@
 	total = sched_avg_period() + delta;
 
 	if (unlikely(total < avg)) {
-		/* Ensures that power won't end up being negative */
+		/* Ensures that capacity won't end up being negative */
 		available = 0;
 	} else {
 		available = total - avg;
 	}
 
-	if (unlikely((s64)total < SCHED_POWER_SCALE))
-		total = SCHED_POWER_SCALE;
+	if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
+		total = SCHED_CAPACITY_SCALE;
 
-	total >>= SCHED_POWER_SHIFT;
+	total >>= SCHED_CAPACITY_SHIFT;
 
 	return div_u64(available, total);
 }
 
-static void update_cpu_power(struct sched_domain *sd, int cpu)
+static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long power = SCHED_POWER_SCALE;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		if (sched_feat(ARCH_POWER))
-			power *= arch_scale_smt_power(sd, cpu);
+	if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) {
+		if (sched_feat(ARCH_CAPACITY))
+			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
-			power *= default_scale_smt_power(sd, cpu);
+			capacity *= default_scale_smt_capacity(sd, cpu);
 
-		power >>= SCHED_POWER_SHIFT;
+		capacity >>= SCHED_CAPACITY_SHIFT;
 	}
 
-	sdg->sgp->power_orig = power;
+	sdg->sgc->capacity_orig = capacity;
 
-	if (sched_feat(ARCH_POWER))
-		power *= arch_scale_freq_power(sd, cpu);
+	if (sched_feat(ARCH_CAPACITY))
+		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
-		power *= default_scale_freq_power(sd, cpu);
+		capacity *= default_scale_capacity(sd, cpu);
 
-	power >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
-	power *= scale_rt_power(cpu);
-	power >>= SCHED_POWER_SHIFT;
+	capacity *= scale_rt_capacity(cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
-	if (!power)
-		power = 1;
+	if (!capacity)
+		capacity = 1;
 
-	cpu_rq(cpu)->cpu_power = power;
-	sdg->sgp->power = power;
+	cpu_rq(cpu)->cpu_capacity = capacity;
+	sdg->sgc->capacity = capacity;
 }
 
-void update_group_power(struct sched_domain *sd, int cpu)
+void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long power, power_orig;
+	unsigned long capacity, capacity_orig;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
 	interval = clamp(interval, 1UL, max_load_balance_interval);
-	sdg->sgp->next_update = jiffies + interval;
+	sdg->sgc->next_update = jiffies + interval;
 
 	if (!child) {
-		update_cpu_power(sd, cpu);
+		update_cpu_capacity(sd, cpu);
 		return;
 	}
 
-	power_orig = power = 0;
+	capacity_orig = capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -5723,31 +5726,31 @@
 		 */
 
 		for_each_cpu(cpu, sched_group_cpus(sdg)) {
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
 			/*
-			 * build_sched_domains() -> init_sched_groups_power()
+			 * build_sched_domains() -> init_sched_groups_capacity()
 			 * gets here before we've attached the domains to the
 			 * runqueues.
 			 *
-			 * Use power_of(), which is set irrespective of domains
-			 * in update_cpu_power().
+			 * Use capacity_of(), which is set irrespective of domains
+			 * in update_cpu_capacity().
 			 *
-			 * This avoids power/power_orig from being 0 and
+			 * This avoids capacity/capacity_orig from being 0 and
 			 * causing divide-by-zero issues on boot.
 			 *
-			 * Runtime updates will correct power_orig.
+			 * Runtime updates will correct capacity_orig.
 			 */
 			if (unlikely(!rq->sd)) {
-				power_orig += power_of(cpu);
-				power += power_of(cpu);
+				capacity_orig += capacity_of(cpu);
+				capacity += capacity_of(cpu);
 				continue;
 			}
 
-			sgp = rq->sd->groups->sgp;
-			power_orig += sgp->power_orig;
-			power += sgp->power;
+			sgc = rq->sd->groups->sgc;
+			capacity_orig += sgc->capacity_orig;
+			capacity += sgc->capacity;
 		}
 	} else  {
 		/*
@@ -5757,14 +5760,14 @@
 
 		group = child->groups;
 		do {
-			power_orig += group->sgp->power_orig;
-			power += group->sgp->power;
+			capacity_orig += group->sgc->capacity_orig;
+			capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
 
-	sdg->sgp->power_orig = power_orig;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity_orig = capacity_orig;
+	sdg->sgc->capacity = capacity;
 }
 
 /*
@@ -5778,15 +5781,15 @@
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_POWER_SCALE
+	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
-	if (!(sd->flags & SD_SHARE_CPUPOWER))
+	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
 		return 0;
 
 	/*
-	 * If ~90% of the cpu_power is still there, we're good.
+	 * If ~90% of the cpu_capacity is still there, we're good.
 	 */
-	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
+	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
 		return 1;
 
 	return 0;
@@ -5823,34 +5826,35 @@
 
 static inline int sg_imbalanced(struct sched_group *group)
 {
-	return group->sgp->imbalance;
+	return group->sgc->imbalance;
 }
 
 /*
- * Compute the group capacity.
+ * Compute the group capacity factor.
  *
- * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by
+ * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
  * first dividing out the smt factor and computing the actual number of cores
- * and limit power unit capacity with that.
+ * and limit unit capacity with that.
  */
-static inline int sg_capacity(struct lb_env *env, struct sched_group *group)
+static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
 {
-	unsigned int capacity, smt, cpus;
-	unsigned int power, power_orig;
+	unsigned int capacity_factor, smt, cpus;
+	unsigned int capacity, capacity_orig;
 
-	power = group->sgp->power;
-	power_orig = group->sgp->power_orig;
+	capacity = group->sgc->capacity;
+	capacity_orig = group->sgc->capacity_orig;
 	cpus = group->group_weight;
 
-	/* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig);
-	capacity = cpus / smt; /* cores */
+	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
+	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
+	capacity_factor = cpus / smt; /* cores */
 
-	capacity = min_t(unsigned, capacity, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE));
-	if (!capacity)
-		capacity = fix_small_capacity(env->sd, group);
+	capacity_factor = min_t(unsigned,
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
+	if (!capacity_factor)
+		capacity_factor = fix_small_capacity(env->sd, group);
 
-	return capacity;
+	return capacity_factor;
 }
 
 /**
@@ -5890,9 +5894,9 @@
 			sgs->idle_cpus++;
 	}
 
-	/* Adjust by relative CPU power of the group */
-	sgs->group_power = group->sgp->power;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power;
+	/* Adjust by relative CPU capacity of the group */
+	sgs->group_capacity = group->sgc->capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -5900,10 +5904,10 @@
 	sgs->group_weight = group->group_weight;
 
 	sgs->group_imb = sg_imbalanced(group);
-	sgs->group_capacity = sg_capacity(env, group);
+	sgs->group_capacity_factor = sg_capacity_factor(env, group);
 
-	if (sgs->group_capacity > sgs->sum_nr_running)
-		sgs->group_has_capacity = 1;
+	if (sgs->group_capacity_factor > sgs->sum_nr_running)
+		sgs->group_has_free_capacity = 1;
 }
 
 /**
@@ -5927,7 +5931,7 @@
 	if (sgs->avg_load <= sds->busiest_stat.avg_load)
 		return false;
 
-	if (sgs->sum_nr_running > sgs->group_capacity)
+	if (sgs->sum_nr_running > sgs->group_capacity_factor)
 		return true;
 
 	if (sgs->group_imb)
@@ -6007,8 +6011,8 @@
 			sgs = &sds->local_stat;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
-			    time_after_eq(jiffies, sg->sgp->next_update))
-				update_group_power(env->sd, env->dst_cpu);
+			    time_after_eq(jiffies, sg->sgc->next_update))
+				update_group_capacity(env->sd, env->dst_cpu);
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
@@ -6018,17 +6022,17 @@
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
-		 * first, lower the sg capacity to one so that we'll try
+		 * first, lower the sg capacity factor to one so that we'll try
 		 * and move all the excess tasks away. We lower the capacity
 		 * of a group only if the local group has the capacity to fit
-		 * these excess tasks, i.e. nr_running < group_capacity. The
+		 * these excess tasks, i.e. nr_running < group_capacity_factor. The
 		 * extra check prevents the case where you always pull from the
 		 * heaviest group when it is already under-utilized (possible
 		 * with a large weight task outweighs the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    sds->local_stat.group_has_capacity)
-			sgs->group_capacity = min(sgs->group_capacity, 1U);
+		    sds->local_stat.group_has_free_capacity)
+			sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
 
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
 			sds->busiest = sg;
@@ -6038,7 +6042,7 @@
 next_group:
 		/* Now, start updating sd_lb_stats */
 		sds->total_load += sgs->group_load;
-		sds->total_pwr += sgs->group_power;
+		sds->total_capacity += sgs->group_capacity;
 
 		sg = sg->next;
 	} while (sg != env->sd->groups);
@@ -6085,8 +6089,8 @@
 		return 0;
 
 	env->imbalance = DIV_ROUND_CLOSEST(
-		sds->busiest_stat.avg_load * sds->busiest_stat.group_power,
-		SCHED_POWER_SCALE);
+		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
+		SCHED_CAPACITY_SCALE);
 
 	return 1;
 }
@@ -6101,7 +6105,7 @@
 static inline
 void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 {
-	unsigned long tmp, pwr_now = 0, pwr_move = 0;
+	unsigned long tmp, capa_now = 0, capa_move = 0;
 	unsigned int imbn = 2;
 	unsigned long scaled_busy_load_per_task;
 	struct sg_lb_stats *local, *busiest;
@@ -6115,8 +6119,8 @@
 		imbn = 1;
 
 	scaled_busy_load_per_task =
-		(busiest->load_per_task * SCHED_POWER_SCALE) /
-		busiest->group_power;
+		(busiest->load_per_task * SCHED_CAPACITY_SCALE) /
+		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
 	    local->avg_load + (scaled_busy_load_per_task * imbn)) {
@@ -6126,38 +6130,38 @@
 
 	/*
 	 * OK, we don't have enough imbalance to justify moving tasks,
-	 * however we may be able to increase total CPU power used by
+	 * however we may be able to increase total CPU capacity used by
 	 * moving them.
 	 */
 
-	pwr_now += busiest->group_power *
+	capa_now += busiest->group_capacity *
 			min(busiest->load_per_task, busiest->avg_load);
-	pwr_now += local->group_power *
+	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	pwr_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_CAPACITY_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
-		pwr_move += busiest->group_power *
+		capa_move += busiest->group_capacity *
 			    min(busiest->load_per_task,
 				busiest->avg_load - scaled_busy_load_per_task);
 	}
 
 	/* Amount of load we'd add */
-	if (busiest->avg_load * busiest->group_power <
-	    busiest->load_per_task * SCHED_POWER_SCALE) {
-		tmp = (busiest->avg_load * busiest->group_power) /
-		      local->group_power;
+	if (busiest->avg_load * busiest->group_capacity <
+	    busiest->load_per_task * SCHED_CAPACITY_SCALE) {
+		tmp = (busiest->avg_load * busiest->group_capacity) /
+		      local->group_capacity;
 	} else {
-		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
-		      local->group_power;
+		tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
+		      local->group_capacity;
 	}
-	pwr_move += local->group_power *
+	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	pwr_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
-	if (pwr_move > pwr_now)
+	if (capa_move > capa_now)
 		env->imbalance = busiest->load_per_task;
 }
 
@@ -6187,7 +6191,7 @@
 	/*
 	 * In the presence of smp nice balancing, certain scenarios can have
 	 * max load less than avg load(as we skip the groups at or below
-	 * its cpu_power, while calculating max_load..)
+	 * its cpu_capacity, while calculating max_load..)
 	 */
 	if (busiest->avg_load <= sds->avg_load ||
 	    local->avg_load >= sds->avg_load) {
@@ -6202,10 +6206,10 @@
 		 * have to drop below capacity to reach cpu-load equilibrium.
 		 */
 		load_above_capacity =
-			(busiest->sum_nr_running - busiest->group_capacity);
+			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
-		load_above_capacity /= busiest->group_power;
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
+		load_above_capacity /= busiest->group_capacity;
 	}
 
 	/*
@@ -6220,9 +6224,9 @@
 
 	/* How much load to actually move to equalise the imbalance */
 	env->imbalance = min(
-		max_pull * busiest->group_power,
-		(sds->avg_load - local->avg_load) * local->group_power
-	) / SCHED_POWER_SCALE;
+		max_pull * busiest->group_capacity,
+		(sds->avg_load - local->avg_load) * local->group_capacity
+	) / SCHED_CAPACITY_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -6276,7 +6280,8 @@
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
+						/ sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6287,8 +6292,8 @@
 		goto force_balance;
 
 	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-	if (env->idle == CPU_NEWLY_IDLE && local->group_has_capacity &&
-	    !busiest->group_has_capacity)
+	if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
+	    !busiest->group_has_free_capacity)
 		goto force_balance;
 
 	/*
@@ -6342,11 +6347,11 @@
 				     struct sched_group *group)
 {
 	struct rq *busiest = NULL, *rq;
-	unsigned long busiest_load = 0, busiest_power = 1;
+	unsigned long busiest_load = 0, busiest_capacity = 1;
 	int i;
 
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-		unsigned long power, capacity, wl;
+		unsigned long capacity, capacity_factor, wl;
 		enum fbq_type rt;
 
 		rq = cpu_rq(i);
@@ -6374,34 +6379,34 @@
 		if (rt > env->fbq_type)
 			continue;
 
-		power = power_of(i);
-		capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
-		if (!capacity)
-			capacity = fix_small_capacity(env->sd, group);
+		capacity = capacity_of(i);
+		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
+		if (!capacity_factor)
+			capacity_factor = fix_small_capacity(env->sd, group);
 
 		wl = weighted_cpuload(i);
 
 		/*
 		 * When comparing with imbalance, use weighted_cpuload()
-		 * which is not scaled with the cpu power.
+		 * which is not scaled with the cpu capacity.
 		 */
-		if (capacity && rq->nr_running == 1 && wl > env->imbalance)
+		if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
 			continue;
 
 		/*
 		 * For the load comparisons with the other cpu's, consider
-		 * the weighted_cpuload() scaled with the cpu power, so that
-		 * the load can be moved away from the cpu that is potentially
-		 * running at a lower capacity.
+		 * the weighted_cpuload() scaled with the cpu capacity, so
+		 * that the load can be moved away from the cpu that is
+		 * potentially running at a lower capacity.
 		 *
-		 * Thus we're looking for max(wl_i / power_i), crosswise
+		 * Thus we're looking for max(wl_i / capacity_i), crosswise
 		 * multiplication to rid ourselves of the division works out
-		 * to: wl_i * power_j > wl_j * power_i;  where j is our
-		 * previous maximum.
+		 * to: wl_i * capacity_j > wl_j * capacity_i;  where j is
+		 * our previous maximum.
 		 */
-		if (wl * busiest_power > busiest_load * power) {
+		if (wl * busiest_capacity > busiest_load * capacity) {
 			busiest_load = wl;
-			busiest_power = power;
+			busiest_capacity = capacity;
 			busiest = rq;
 		}
 	}
@@ -6609,7 +6614,7 @@
 		 * We failed to reach balance because of affinity.
 		 */
 		if (sd_parent) {
-			int *group_imbalance = &sd_parent->groups->sgp->imbalance;
+			int *group_imbalance = &sd_parent->groups->sgc->imbalance;
 
 			if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
 				*group_imbalance = 1;
@@ -6996,7 +7001,7 @@
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7013,7 +7018,7 @@
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7192,12 +7197,17 @@
 
 		rq = cpu_rq(balance_cpu);
 
-		raw_spin_lock_irq(&rq->lock);
-		update_rq_clock(rq);
-		update_idle_cpu_load(rq);
-		raw_spin_unlock_irq(&rq->lock);
-
-		rebalance_domains(rq, CPU_IDLE);
+		/*
+		 * If time for next balance is due,
+		 * do the balance.
+		 */
+		if (time_after_eq(jiffies, rq->next_balance)) {
+			raw_spin_lock_irq(&rq->lock);
+			update_rq_clock(rq);
+			update_idle_cpu_load(rq);
+			raw_spin_unlock_irq(&rq->lock);
+			rebalance_domains(rq, CPU_IDLE);
+		}
 
 		if (time_after(this_rq->next_balance, rq->next_balance))
 			this_rq->next_balance = rq->next_balance;
@@ -7212,7 +7222,7 @@
  * of an idle cpu is the system.
  *   - This rq has more than one task.
  *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's power.
+ *     busy cpu's exceeding the group's capacity.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
@@ -7220,7 +7230,7 @@
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
 
 	if (unlikely(rq->idle_balance))
@@ -7250,8 +7260,8 @@
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (sd) {
-		sgp = sd->groups->sgp;
-		nr_busy = atomic_read(&sgp->nr_busy_cpus);
+		sgc = sd->groups->sgc;
+		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
 		if (nr_busy > 1)
 			goto need_kick_unlock;

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5716929..90284d1 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -37,18 +37,18 @@
 SCHED_FEAT(WAKEUP_PREEMPTION, true)
 
 /*
- * Use arch dependent cpu power functions
+ * Use arch dependent cpu capacity functions
  */
-SCHED_FEAT(ARCH_POWER, true)
+SCHED_FEAT(ARCH_CAPACITY, true)
 
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
 
 /*
- * Decrement CPU power based on time not spent running tasks
+ * Decrement CPU capacity based on time not spent running tasks
  */
-SCHED_FEAT(NONTASK_POWER, true)
+SCHED_FEAT(NONTASK_CAPACITY, true)
 
 /*
  * Queue remote wakeups on the target CPU and process them

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 25b9423..cf009fb 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c

@@ -12,6 +12,8 @@
 
 #include <trace/events/power.h>
 
+#include "sched.h"
+
 static int __read_mostly cpu_idle_force_poll;
 
 void cpu_idle_poll_ctrl(bool enable)
@@ -67,6 +69,10 @@
  * cpuidle_idle_call - the main idle function
  *
  * NOTE: no locks or semaphores should be used here
+ *
+ * On archs that support TIF_POLLING_NRFLAG, is called with polling
+ * set, and it returns with polling set.  If it ever stops polling, it
+ * must clear the polling bit.
  */
 static void cpuidle_idle_call(void)
 {
@@ -175,10 +181,22 @@
 
 /*
  * Generic idle loop implementation
+ *
+ * Called with polling cleared.
  */
 static void cpu_idle_loop(void)
 {
 	while (1) {
+		/*
+		 * If the arch has a polling bit, we maintain an invariant:
+		 *
+		 * Our polling bit is clear if we're not scheduled (i.e. if
+		 * rq->curr != rq->idle).  This means that, if rq->idle has
+		 * the polling bit set, then setting need_resched is
+		 * guaranteed to cause the cpu to reschedule.
+		 */
+
+		__current_set_polling();
 		tick_nohz_idle_enter();
 
 		while (!need_resched()) {
@@ -218,6 +236,17 @@
 		 */
 		preempt_set_need_resched();
 		tick_nohz_idle_exit();
+		__current_clr_polling();
+
+		/*
+		 * We promise to call sched_ttwu_pending and reschedule
+		 * if need_resched is set while polling is set.  That
+		 * means that clearing polling needs to be visible
+		 * before doing these things.
+		 */
+		smp_mb__after_atomic();
+
+		sched_ttwu_pending();
 		schedule_preempt_disabled();
 	}
 }
@@ -239,7 +268,6 @@
 	 */
 	boot_init_stack_canary();
 #endif
-	__current_set_polling();
 	arch_cpu_idle_prepare();
 	cpu_idle_loop();
 }

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b3512f1..a490831 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -918,7 +918,6 @@
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_rt_entity *rt_se = &curr->rt;
-	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	u64 delta_exec;
 
 	if (curr->sched_class != &rt_sched_class)
@@ -943,7 +942,7 @@
 		return;
 
 	for_each_sched_rt_entity(rt_se) {
-		rt_rq = rt_rq_of_se(rt_se);
+		struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 
 		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 			raw_spin_lock(&rt_rq->rt_runtime_lock);

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e47679b..31cc02e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -567,7 +567,7 @@
 	struct root_domain *rd;
 	struct sched_domain *sd;
 
-	unsigned long cpu_power;
+	unsigned long cpu_capacity;
 
 	unsigned char idle_balance;
 	/* For active balancing */
@@ -670,6 +670,8 @@
 
 #ifdef CONFIG_SMP
 
+extern void sched_ttwu_pending(void);
+
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
 			      lockdep_is_held(&sched_domains_mutex))
@@ -728,15 +730,15 @@
 DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
 
-struct sched_group_power {
+struct sched_group_capacity {
 	atomic_t ref;
 	/*
-	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-	 * single CPU.
+	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+	 * for a single CPU.
 	 */
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 	unsigned long next_update;
-	int imbalance; /* XXX unrelated to power but shared group state */
+	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
 	 * Number of busy cpus in this group.
 	 */
@@ -750,7 +752,7 @@
 	atomic_t ref;
 
 	unsigned int group_weight;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 
 	/*
 	 * The CPUs this group covers.
@@ -773,7 +775,7 @@
  */
 static inline struct cpumask *sched_group_mask(struct sched_group *sg)
 {
-	return to_cpumask(sg->sgp->cpumask);
+	return to_cpumask(sg->sgc->cpumask);
 }
 
 /**
@@ -787,6 +789,10 @@
 
 extern int group_balance_cpu(struct sched_group *sg);
 
+#else
+
+static inline void sched_ttwu_pending(void) { }
+
 #endif /* CONFIG_SMP */
 
 #include "stats.h"
@@ -1167,7 +1173,7 @@
 
 #ifdef CONFIG_SMP
 
-extern void update_group_power(struct sched_domain *sd, int cpu);
+extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
 

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f6d76be..301bbc2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c

@@ -54,8 +54,7 @@
 struct seccomp_filter {
 	atomic_t usage;
 	struct seccomp_filter *prev;
-	unsigned short len;  /* Instruction count */
-	struct sock_filter_int insnsi[];
+	struct sk_filter *prog;
 };
 
 /* Limit any path through the tree to 256KB worth of instructions. */
@@ -104,60 +103,59 @@
 		u32 k = ftest->k;
 
 		switch (code) {
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			ftest->code = BPF_LDX | BPF_W | BPF_ABS;
 			/* 32-bit aligned and not out of bounds. */
 			if (k >= sizeof(struct seccomp_data) || k & 3)
 				return -EINVAL;
 			continue;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ftest->code = BPF_LD | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ftest->code = BPF_LDX | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
 		/* Explicitly include allowed calls. */
-		case BPF_S_RET_K:
-		case BPF_S_RET_A:
-		case BPF_S_ALU_ADD_K:
-		case BPF_S_ALU_ADD_X:
-		case BPF_S_ALU_SUB_K:
-		case BPF_S_ALU_SUB_X:
-		case BPF_S_ALU_MUL_K:
-		case BPF_S_ALU_MUL_X:
-		case BPF_S_ALU_DIV_X:
-		case BPF_S_ALU_AND_K:
-		case BPF_S_ALU_AND_X:
-		case BPF_S_ALU_OR_K:
-		case BPF_S_ALU_OR_X:
-		case BPF_S_ALU_XOR_K:
-		case BPF_S_ALU_XOR_X:
-		case BPF_S_ALU_LSH_K:
-		case BPF_S_ALU_LSH_X:
-		case BPF_S_ALU_RSH_K:
-		case BPF_S_ALU_RSH_X:
-		case BPF_S_ALU_NEG:
-		case BPF_S_LD_IMM:
-		case BPF_S_LDX_IMM:
-		case BPF_S_MISC_TAX:
-		case BPF_S_MISC_TXA:
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-		case BPF_S_JMP_JA:
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
-			sk_decode_filter(ftest, ftest);
+		case BPF_RET | BPF_K:
+		case BPF_RET | BPF_A:
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_LSH | BPF_X:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_X:
+		case BPF_ALU | BPF_NEG:
+		case BPF_LD | BPF_IMM:
+		case BPF_LDX | BPF_IMM:
+		case BPF_MISC | BPF_TAX:
+		case BPF_MISC | BPF_TXA:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+		case BPF_JMP | BPF_JA:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			continue;
 		default:
 			return -EINVAL;
@@ -189,7 +187,8 @@
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (f = current->seccomp.filter; f; f = f->prev) {
-		u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
+		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
 	}
@@ -215,7 +214,7 @@
 		return -EINVAL;
 
 	for (filter = current->seccomp.filter; filter; filter = filter->prev)
-		total_insns += filter->len + 4;  /* include a 4 instr penalty */
+		total_insns += filter->prog->len + 4;  /* include a 4 instr penalty */
 	if (total_insns > MAX_INSNS_PER_PATH)
 		return -ENOMEM;
 
@@ -256,19 +255,25 @@
 
 	/* Allocate a new seccomp_filter */
 	ret = -ENOMEM;
-	filter = kzalloc(sizeof(struct seccomp_filter) +
-			 sizeof(struct sock_filter_int) * new_len,
+	filter = kzalloc(sizeof(struct seccomp_filter),
 			 GFP_KERNEL|__GFP_NOWARN);
 	if (!filter)
 		goto free_prog;
 
-	ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
-	if (ret)
+	filter->prog = kzalloc(sk_filter_size(new_len),
+			       GFP_KERNEL|__GFP_NOWARN);
+	if (!filter->prog)
 		goto free_filter;
+
+	ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
+	if (ret)
+		goto free_filter_prog;
 	kfree(fp);
 
 	atomic_set(&filter->usage, 1);
-	filter->len = new_len;
+	filter->prog->len = new_len;
+
+	sk_filter_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -278,6 +283,8 @@
 	current->seccomp.filter = filter;
 	return 0;
 
+free_filter_prog:
+	kfree(filter->prog);
 free_filter:
 	kfree(filter);
 free_prog:
@@ -330,6 +337,7 @@
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
+		sk_filter_free(freeme->prog);
 		kfree(freeme);
 	}
 }

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index db19e3e..ba9ed45 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -2568,11 +2568,11 @@
 	bool first = 1;
 	size_t left = *lenp;
 	unsigned long bitmap_len = table->maxlen;
-	unsigned long *bitmap = (unsigned long *) table->data;
+	unsigned long *bitmap = *(unsigned long **) table->data;
 	unsigned long *tmp_bitmap = NULL;
 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
 
-	if (!bitmap_len || !left || (*ppos && !write)) {
+	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
 	}

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8639819..d440935 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig

@@ -535,6 +535,36 @@
 
 	  Say N, unless you absolutely know what you are doing.
 
+config TRACEPOINT_BENCHMARK
+        bool "Add tracepoint that benchmarks tracepoints"
+	help
+	 This option creates the tracepoint "benchmark:benchmark_event".
+	 When the tracepoint is enabled, it kicks off a kernel thread that
+	 goes into an infinite loop (calling cond_sched() to let other tasks
+	 run), and calls the tracepoint. Each iteration will record the time
+	 it took to write to the tracepoint and the next iteration that
+	 data will be passed to the tracepoint itself. That is, the tracepoint
+	 will report the time it took to do the previous tracepoint.
+	 The string written to the tracepoint is a static string of 128 bytes
+	 to keep the time the same. The initial string is simply a write of
+	 "START". The second string records the cold cache time of the first
+	 write which is not added to the rest of the calculations.
+
+	 As it is a tight loop, it benchmarks as hot cache. That's fine because
+	 we care most about hot paths that are probably in cache already.
+
+	 An example of the output:
+
+	      START
+	      first=3672 [COLD CACHED]
+	      last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712
+	      last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337
+	      last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064
+	      last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411
+	      last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389
+	      last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666
+
+
 config RING_BUFFER_BENCHMARK
 	tristate "Ring buffer benchmark stress tester"
 	depends on RING_BUFFER

diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 1378e84..2611613 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile

@@ -17,6 +17,7 @@
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
+CFLAGS_trace_benchmark.o := -I$(src)
 CFLAGS_trace_events_filter.o := -I$(src)
 
 obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
@@ -62,4 +63,6 @@
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
 
+obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
+
 libftrace-y := ftrace.o

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a54a25..5b372e3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -62,7 +62,7 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
-#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define INIT_REGEX_LOCK(opsname)	\
@@ -103,7 +103,6 @@
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
@@ -171,23 +170,6 @@
 	return cnt;
 }
 
-static void
-ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
-			struct ftrace_ops *op, struct pt_regs *regs)
-{
-	int bit;
-
-	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
-	if (bit < 0)
-		return;
-
-	do_for_each_ftrace_op(op, ftrace_global_list) {
-		op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
-
-	trace_clear_recursion(bit);
-}
-
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
@@ -237,43 +219,6 @@
 	return 0;
 }
 
-static void update_global_ops(void)
-{
-	ftrace_func_t func = ftrace_global_list_func;
-	void *private = NULL;
-
-	/* The list has its own recursion protection. */
-	global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
-
-	/*
-	 * If there's only one function registered, then call that
-	 * function directly. Otherwise, we need to iterate over the
-	 * registered callers.
-	 */
-	if (ftrace_global_list == &ftrace_list_end ||
-	    ftrace_global_list->next == &ftrace_list_end) {
-		func = ftrace_global_list->func;
-		private = ftrace_global_list->private;
-		/*
-		 * As we are calling the function directly.
-		 * If it does not have recursion protection,
-		 * the function_trace_op needs to be updated
-		 * accordingly.
-		 */
-		if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE))
-			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
-	}
-
-	/* If we filter on pids, update to use the pid function */
-	if (!list_empty(&ftrace_pids)) {
-		set_ftrace_pid_function(func);
-		func = ftrace_pid_func;
-	}
-
-	global_ops.func = func;
-	global_ops.private = private;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -301,8 +246,6 @@
 {
 	ftrace_func_t func;
 
-	update_global_ops();
-
 	/*
 	 * If we are at the end of the list and this ops is
 	 * recursion safe and not dynamic and the arch supports passing ops,
@@ -314,10 +257,7 @@
 	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
 	     !FTRACE_FORCE_LIST_FUNC)) {
 		/* Set the ftrace_ops that the arch callback uses */
-		if (ftrace_ops_list == &global_ops)
-			set_function_trace_op = ftrace_global_list;
-		else
-			set_function_trace_op = ftrace_ops_list;
+		set_function_trace_op = ftrace_ops_list;
 		func = ftrace_ops_list->func;
 	} else {
 		/* Just use the default ftrace_ops */
@@ -373,6 +313,11 @@
 	ftrace_trace_function = func;
 }
 
+int using_ftrace_ops_list_func(void)
+{
+	return ftrace_trace_function == ftrace_ops_list_func;
+}
+
 static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 {
 	ops->next = *list;
@@ -434,16 +379,9 @@
 	if (ops->flags & FTRACE_OPS_FL_DELETED)
 		return -EINVAL;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
-	/* We don't support both control and global flags set. */
-	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
-		return -EINVAL;
-
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
@@ -461,10 +399,7 @@
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
-		ops->flags |= FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		if (control_ops_alloc(ops))
 			return -ENOMEM;
 		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
@@ -484,15 +419,7 @@
 	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
 		return -EBUSY;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ret = remove_ftrace_list_ops(&ftrace_global_list,
-					     &global_ops, ops);
-		if (!ret)
-			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		ret = remove_ftrace_list_ops(&ftrace_control_list,
 					     &control_ops, ops);
 	} else
@@ -895,7 +822,7 @@
 
 	local_irq_save(flags);
 
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
@@ -926,7 +853,7 @@
 	unsigned long flags;
 
 	local_irq_save(flags);
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
@@ -1178,7 +1105,7 @@
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
 
-static bool ftrace_hash_empty(struct ftrace_hash *hash)
+static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
 {
 	return !hash || !hash->count;
 }
@@ -1625,7 +1552,14 @@
 			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
 
 			/*
+			 * If filter_hash is set, we want to match all functions
+			 * that are in the hash but not in the other hash.
 			 *
+			 * If filter_hash is not set, then we are decrementing.
+			 * That means we match anything that is in the hash
+			 * and also in the other_hash. That is, we need to turn
+			 * off functions in the other hash because they are disabled
+			 * by this hash.
 			 */
 			if (filter_hash && in_hash && !in_other_hash)
 				match = 1;
@@ -1767,19 +1701,15 @@
 		/*
 		 * If this record is being updated from a nop, then
 		 *   return UPDATE_MAKE_CALL.
-		 * Otherwise, if the EN flag is set, then return
-		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert
-		 *   from the non-save regs, to a save regs function.
 		 * Otherwise,
 		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
-		 *   from the save regs, to a non-save regs function.
+		 *   from the save regs, to a non-save regs function or
+		 *   vice versa.
 		 */
 		if (flag & FTRACE_FL_ENABLED)
 			return FTRACE_UPDATE_MAKE_CALL;
-		else if (rec->flags & FTRACE_FL_REGS_EN)
-			return FTRACE_UPDATE_MODIFY_CALL_REGS;
-		else
-			return FTRACE_UPDATE_MODIFY_CALL;
+
+		return FTRACE_UPDATE_MODIFY_CALL;
 	}
 
 	if (update) {
@@ -1821,6 +1751,42 @@
 	return ftrace_check_record(rec, enable, 0);
 }
 
+/**
+ * ftrace_get_addr_new - Get the call address to set to
+ * @rec:  The ftrace record descriptor
+ *
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ *
+ * Returns the address of the trampoline to set to
+ */
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
+/**
+ * ftrace_get_addr_curr - Get the call address that is already there
+ * @rec:  The ftrace record descriptor
+ *
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ *
+ * Returns the address of the trampoline that is currently being called
+ */
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS_EN)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
@@ -1828,12 +1794,12 @@
 	unsigned long ftrace_addr;
 	int ret;
 
-	ret = ftrace_update_record(rec, enable);
+	ftrace_addr = ftrace_get_addr_new(rec);
 
-	if (rec->flags & FTRACE_FL_REGS)
-		ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
-	else
-		ftrace_addr = (unsigned long)FTRACE_ADDR;
+	/* This needs to be done before we call ftrace_update_record */
+	ftrace_old_addr = ftrace_get_addr_curr(rec);
+
+	ret = ftrace_update_record(rec, enable);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -1845,13 +1811,7 @@
 	case FTRACE_UPDATE_MAKE_NOP:
 		return ftrace_make_nop(NULL, rec, ftrace_addr);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
-		if (rec->flags & FTRACE_FL_REGS)
-			ftrace_old_addr = (unsigned long)FTRACE_ADDR;
-		else
-			ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
-
 		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
 	}
 
@@ -2115,7 +2075,6 @@
 
 static int ftrace_startup(struct ftrace_ops *ops, int command)
 {
-	bool hash_enable = true;
 	int ret;
 
 	if (unlikely(ftrace_disabled))
@@ -2128,18 +2087,9 @@
 	ftrace_start_up++;
 	command |= FTRACE_UPDATE_CALLS;
 
-	/* ops marked global share the filter hashes */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		/* Don't update hash if global is already set */
-		if (global_start_up)
-			hash_enable = false;
-		global_start_up++;
-	}
-
 	ops->flags |= FTRACE_OPS_FL_ENABLED;
-	if (hash_enable)
-		ftrace_hash_rec_enable(ops, 1);
+
+	ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
 
@@ -2148,7 +2098,6 @@
 
 static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 {
-	bool hash_disable = true;
 	int ret;
 
 	if (unlikely(ftrace_disabled))
@@ -2166,21 +2115,9 @@
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		global_start_up--;
-		WARN_ON_ONCE(global_start_up < 0);
-		/* Don't update hash if global still has users */
-		if (global_start_up) {
-			WARN_ON_ONCE(!ftrace_start_up);
-			hash_disable = false;
-		}
-	}
+	ftrace_hash_rec_disable(ops, 1);
 
-	if (hash_disable)
-		ftrace_hash_rec_disable(ops, 1);
-
-	if (ops != &global_ops || !global_start_up)
+	if (!global_start_up)
 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
 
 	command |= FTRACE_UPDATE_CALLS;
@@ -3524,10 +3461,6 @@
 	struct ftrace_hash *hash;
 	int ret;
 
-	/* All global ops uses the global ops filters */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
-		ops = &global_ops;
-
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -3639,8 +3572,7 @@
 }
 EXPORT_SYMBOL_GPL(ftrace_set_notrace);
 /**
- * ftrace_set_filter - set a function to filter on in ftrace
- * @ops - the ops to set the filter with
+ * ftrace_set_global_filter - set a function to filter on with global tracers
  * @buf - the string that holds the function filter text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -3655,8 +3587,7 @@
 EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
 
 /**
- * ftrace_set_notrace - set a function to not trace in ftrace
- * @ops - the ops to set the notrace filter with
+ * ftrace_set_global_notrace - set a function to not trace with global tracers
  * @buf - the string that holds the function notrace text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -4443,6 +4374,34 @@
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+__init void ftrace_init_global_array_ops(struct trace_array *tr)
+{
+	tr->ops = &global_ops;
+	tr->ops->private = tr;
+}
+
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
+{
+	/* If we filter on pids, update to use the pid function */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+		if (WARN_ON(tr->ops->func != ftrace_stub))
+			printk("ftrace ops had %pS for function\n",
+			       tr->ops->func);
+		/* Only the top level instance does pid tracing */
+		if (!list_empty(&ftrace_pids)) {
+			set_ftrace_pid_function(func);
+			func = ftrace_pid_func;
+		}
+	}
+	tr->ops->func = func;
+	tr->ops->private = tr;
+}
+
+void ftrace_reset_array_ops(struct trace_array *tr)
+{
+	tr->ops->func = ftrace_stub;
+}
+
 static void
 ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 			struct ftrace_ops *op, struct pt_regs *regs)
@@ -4501,9 +4460,16 @@
 	 */
 	preempt_disable_notrace();
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip, regs))
+		if (ftrace_ops_test(op, ip, regs)) {
+			if (WARN_ON(!op->func)) {
+				function_trace_stop = 1;
+				printk("op=%p %pS\n", op, op);
+				goto out;
+			}
 			op->func(ip, parent_ip, op, regs);
+		}
 	} while_for_each_ftrace_op(op);
+out:
 	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
@@ -4908,7 +4874,6 @@
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 static int ftrace_graph_active;
-static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
@@ -5054,13 +5019,6 @@
 	return NOTIFY_DONE;
 }
 
-/* Just a place holder for function graph */
-static struct ftrace_ops fgraph_ops __read_mostly = {
-	.func		= ftrace_stub,
-	.flags		= FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
-				FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
 {
 	if (!ftrace_ops_test(&global_ops, trace->func, NULL))
@@ -5085,6 +5043,10 @@
 		ftrace_graph_entry = ftrace_graph_entry_test;
 }
 
+static struct notifier_block ftrace_suspend_notifier = {
+	.notifier_call = ftrace_suspend_notifier_call,
+};
+
 int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 			trace_func_graph_ent_t entryfunc)
 {
@@ -5098,7 +5060,6 @@
 		goto out;
 	}
 
-	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
 	register_pm_notifier(&ftrace_suspend_notifier);
 
 	ftrace_graph_active++;
@@ -5120,7 +5081,10 @@
 	ftrace_graph_entry = ftrace_graph_entry_test;
 	update_function_graph_func();
 
-	ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET);
+	/* Function graph doesn't use the .func field of global_ops */
+	global_ops.flags |= FTRACE_OPS_FL_STUB;
+
+	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
 
 out:
 	mutex_unlock(&ftrace_lock);
@@ -5138,7 +5102,8 @@
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
 	__ftrace_graph_entry = ftrace_graph_entry_stub;
-	ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET);
+	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
+	global_ops.flags &= ~FTRACE_OPS_FL_STUB;
 	unregister_pm_notifier(&ftrace_suspend_notifier);
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c634868..7c56c3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c

@@ -543,7 +543,7 @@
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -ENODEV;
 		cpu_buffer = buffer->buffers[cpu];
 		work = &cpu_buffer->irq_work;
 	}
@@ -591,6 +593,7 @@
 		schedule();
 
 	finish_wait(&work->waiters, &wait);
+	return 0;
 }
 
 /**

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 737b0ef..384ede3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -275,7 +275,7 @@
 }
 EXPORT_SYMBOL_GPL(call_filter_check_discard);
 
-cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
+static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
 
@@ -599,7 +599,7 @@
 	return 0;
 }
 
-void free_snapshot(struct trace_array *tr)
+static void free_snapshot(struct trace_array *tr)
 {
 	/*
 	 * We don't free the ring buffer. instead, resize it because
@@ -963,27 +963,9 @@
 	return cnt;
 }
 
-/*
- * ftrace_max_lock is used to protect the swapping of buffers
- * when taking a max snapshot. The buffers themselves are
- * protected by per_cpu spinlocks. But the action of the swap
- * needs its own lock.
- *
- * This is defined as a arch_spinlock_t in order to help
- * with performance when lockdep debugging is enabled.
- *
- * It is also used in other places outside the update_max_tr
- * so it needs to be defined outside of the
- * CONFIG_TRACER_MAX_TRACE.
- */
-static arch_spinlock_t ftrace_max_lock =
-	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-
 unsigned long __read_mostly	tracing_thresh;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-unsigned long __read_mostly	tracing_max_latency;
-
 /*
  * Copy the new maximum trace into the separate maximum-trace
  * structure. (this way the maximum trace is permanently saved,
@@ -1000,7 +982,7 @@
 	max_buf->cpu = cpu;
 	max_buf->time_start = data->preempt_timestamp;
 
-	max_data->saved_latency = tracing_max_latency;
+	max_data->saved_latency = tr->max_latency;
 	max_data->critical_start = data->critical_start;
 	max_data->critical_end = data->critical_end;
 
@@ -1048,14 +1030,14 @@
 		return;
 	}
 
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&tr->max_lock);
 
 	buf = tr->trace_buffer.buffer;
 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
 	tr->max_buffer.buffer = buf;
 
 	__update_max_tr(tr, tsk, cpu);
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&tr->max_lock);
 }
 
 /**
@@ -1081,7 +1063,7 @@
 		return;
 	}
 
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&tr->max_lock);
 
 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
 
@@ -1099,17 +1081,17 @@
 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
 
 	__update_max_tr(tr, tsk, cpu);
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&tr->max_lock);
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static void default_wait_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
-		return;
+		return 0;
 
-	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1220,8 +1202,6 @@
 	else
 		if (!type->flags->opts)
 			type->flags->opts = dummy_tracer_opt;
-	if (!type->wait_pipe)
-		type->wait_pipe = default_wait_pipe;
 
 	ret = run_tracer_selftest(type);
 	if (ret < 0)
@@ -1305,22 +1285,71 @@
 	}
 }
 
-#define SAVED_CMDLINES 128
+#define SAVED_CMDLINES_DEFAULT 128
 #define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
-static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
-static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+struct saved_cmdlines_buffer {
+	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+	unsigned *map_cmdline_to_pid;
+	unsigned cmdline_num;
+	int cmdline_idx;
+	char *saved_cmdlines;
+};
+static struct saved_cmdlines_buffer *savedcmd;
 
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
 
-static void trace_init_cmdlines(void)
+static inline char *get_saved_cmdlines(int idx)
 {
-	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
-	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
-	cmdline_idx = 0;
+	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
+}
+
+static inline void set_cmdline(int idx, const char *cmdline)
+{
+	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
+}
+
+static int allocate_cmdlines_buffer(unsigned int val,
+				    struct saved_cmdlines_buffer *s)
+{
+	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
+					GFP_KERNEL);
+	if (!s->map_cmdline_to_pid)
+		return -ENOMEM;
+
+	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
+	if (!s->saved_cmdlines) {
+		kfree(s->map_cmdline_to_pid);
+		return -ENOMEM;
+	}
+
+	s->cmdline_idx = 0;
+	s->cmdline_num = val;
+	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
+	       sizeof(s->map_pid_to_cmdline));
+	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
+	       val * sizeof(*s->map_cmdline_to_pid));
+
+	return 0;
+}
+
+static int trace_create_savedcmd(void)
+{
+	int ret;
+
+	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
+	if (!savedcmd)
+		return -ENOMEM;
+
+	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
+	if (ret < 0) {
+		kfree(savedcmd);
+		savedcmd = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 int is_tracing_stopped(void)
@@ -1353,7 +1382,7 @@
 	}
 
 	/* Prevent the buffers from switching */
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&global_trace.max_lock);
 
 	buffer = global_trace.trace_buffer.buffer;
 	if (buffer)
@@ -1365,7 +1394,7 @@
 		ring_buffer_record_enable(buffer);
 #endif
 
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&global_trace.max_lock);
 
 	ftrace_start();
  out:
@@ -1420,7 +1449,7 @@
 		goto out;
 
 	/* Prevent the buffers from switching */
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&global_trace.max_lock);
 
 	buffer = global_trace.trace_buffer.buffer;
 	if (buffer)
@@ -1432,7 +1461,7 @@
 		ring_buffer_record_disable(buffer);
 #endif
 
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&global_trace.max_lock);
 
  out:
 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
@@ -1461,12 +1490,12 @@
 
 void trace_stop_cmdline_recording(void);
 
-static void trace_save_cmdline(struct task_struct *tsk)
+static int trace_save_cmdline(struct task_struct *tsk)
 {
 	unsigned pid, idx;
 
 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
-		return;
+		return 0;
 
 	/*
 	 * It's not the end of the world if we don't get
@@ -1475,11 +1504,11 @@
 	 * so if we miss here, then better luck next time.
 	 */
 	if (!arch_spin_trylock(&trace_cmdline_lock))
-		return;
+		return 0;
 
-	idx = map_pid_to_cmdline[tsk->pid];
+	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
 	if (idx == NO_CMDLINE_MAP) {
-		idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
 
 		/*
 		 * Check whether the cmdline buffer at idx has a pid
@@ -1487,22 +1516,24 @@
 		 * need to clear the map_pid_to_cmdline. Otherwise we
 		 * would read the new comm for the old pid.
 		 */
-		pid = map_cmdline_to_pid[idx];
+		pid = savedcmd->map_cmdline_to_pid[idx];
 		if (pid != NO_CMDLINE_MAP)
-			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
 
-		map_cmdline_to_pid[idx] = tsk->pid;
-		map_pid_to_cmdline[tsk->pid] = idx;
+		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
 
-		cmdline_idx = idx;
+		savedcmd->cmdline_idx = idx;
 	}
 
-	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+	set_cmdline(idx, tsk->comm);
 
 	arch_spin_unlock(&trace_cmdline_lock);
+
+	return 1;
 }
 
-void trace_find_cmdline(int pid, char comm[])
+static void __trace_find_cmdline(int pid, char comm[])
 {
 	unsigned map;
 
@@ -1521,13 +1552,19 @@
 		return;
 	}
 
-	preempt_disable();
-	arch_spin_lock(&trace_cmdline_lock);
-	map = map_pid_to_cmdline[pid];
+	map = savedcmd->map_pid_to_cmdline[pid];
 	if (map != NO_CMDLINE_MAP)
-		strcpy(comm, saved_cmdlines[map]);
+		strcpy(comm, get_saved_cmdlines(map));
 	else
 		strcpy(comm, "<...>");
+}
+
+void trace_find_cmdline(int pid, char comm[])
+{
+	preempt_disable();
+	arch_spin_lock(&trace_cmdline_lock);
+
+	__trace_find_cmdline(pid, comm);
 
 	arch_spin_unlock(&trace_cmdline_lock);
 	preempt_enable();
@@ -1541,9 +1578,8 @@
 	if (!__this_cpu_read(trace_cmdline_save))
 		return;
 
-	__this_cpu_write(trace_cmdline_save, false);
-
-	trace_save_cmdline(tsk);
+	if (trace_save_cmdline(tsk))
+		__this_cpu_write(trace_cmdline_save, false);
 }
 
 void
@@ -1746,7 +1782,7 @@
 	 */
 	barrier();
 	if (use_stack == 1) {
-		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
+		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
 
 		if (regs)
@@ -1995,7 +2031,21 @@
 	if (alloc_percpu_trace_buffer())
 		return;
 
-	pr_info("ftrace: Allocated trace_printk buffers\n");
+	/* trace_printk() is for debug use only. Don't use it in production. */
+
+	pr_warning("\n**********************************************************\n");
+	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+	pr_warning("**                                                      **\n");
+	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
+	pr_warning("**                                                      **\n");
+	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
+	pr_warning("** unsafe for produciton use.                           **\n");
+	pr_warning("**                                                      **\n");
+	pr_warning("** If you see this message and you are not debugging    **\n");
+	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
+	pr_warning("**                                                      **\n");
+	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+	pr_warning("**********************************************************\n");
 
 	/* Expand the buffers to set size */
 	tracing_update_buffers();
@@ -3333,7 +3383,7 @@
 	mutex_lock(&tracing_cpumask_update_lock);
 
 	local_irq_disable();
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&tr->max_lock);
 	for_each_tracing_cpu(cpu) {
 		/*
 		 * Increase/decrease the disabled counter if we are
@@ -3350,7 +3400,7 @@
 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
 		}
 	}
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&tr->max_lock);
 	local_irq_enable();
 
 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
@@ -3592,6 +3642,7 @@
 	"  trace_options\t\t- Set format or modify how tracing happens\n"
 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
 	"\t\t\t  option name\n"
+	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
 #ifdef CONFIG_DYNAMIC_FTRACE
 	"\n  available_filter_functions - list of functions that can be filtered on\n"
 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
@@ -3705,55 +3756,153 @@
 	.llseek		= generic_file_llseek,
 };
 
-static ssize_t
-tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
-				size_t cnt, loff_t *ppos)
+static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	char *buf_comm;
-	char *file_buf;
-	char *buf;
-	int len = 0;
-	int pid;
-	int i;
+	unsigned int *ptr = v;
 
-	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
-	if (!file_buf)
-		return -ENOMEM;
+	if (*pos || m->count)
+		ptr++;
 
-	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
-	if (!buf_comm) {
-		kfree(file_buf);
-		return -ENOMEM;
-	}
+	(*pos)++;
 
-	buf = file_buf;
-
-	for (i = 0; i < SAVED_CMDLINES; i++) {
-		int r;
-
-		pid = map_cmdline_to_pid[i];
-		if (pid == -1 || pid == NO_CMDLINE_MAP)
+	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
+	     ptr++) {
+		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
 			continue;
 
-		trace_find_cmdline(pid, buf_comm);
-		r = sprintf(buf, "%d %s\n", pid, buf_comm);
-		buf += r;
-		len += r;
+		return ptr;
 	}
 
-	len = simple_read_from_buffer(ubuf, cnt, ppos,
-				      file_buf, len);
+	return NULL;
+}
 
-	kfree(file_buf);
-	kfree(buf_comm);
+static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
+{
+	void *v;
+	loff_t l = 0;
 
-	return len;
+	preempt_disable();
+	arch_spin_lock(&trace_cmdline_lock);
+
+	v = &savedcmd->map_cmdline_to_pid[0];
+	while (l <= *pos) {
+		v = saved_cmdlines_next(m, v, &l);
+		if (!v)
+			return NULL;
+	}
+
+	return v;
+}
+
+static void saved_cmdlines_stop(struct seq_file *m, void *v)
+{
+	arch_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
+}
+
+static int saved_cmdlines_show(struct seq_file *m, void *v)
+{
+	char buf[TASK_COMM_LEN];
+	unsigned int *pid = v;
+
+	__trace_find_cmdline(*pid, buf);
+	seq_printf(m, "%d %s\n", *pid, buf);
+	return 0;
+}
+
+static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
+	.start		= saved_cmdlines_start,
+	.next		= saved_cmdlines_next,
+	.stop		= saved_cmdlines_stop,
+	.show		= saved_cmdlines_show,
+};
+
+static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+
+	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
 }
 
 static const struct file_operations tracing_saved_cmdlines_fops = {
-    .open       = tracing_open_generic,
-    .read       = tracing_saved_cmdlines_read,
-    .llseek	= generic_file_llseek,
+	.open		= tracing_saved_cmdlines_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static ssize_t
+tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
+				 size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	arch_spin_lock(&trace_cmdline_lock);
+	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
+	arch_spin_unlock(&trace_cmdline_lock);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+	kfree(s->saved_cmdlines);
+	kfree(s->map_cmdline_to_pid);
+	kfree(s);
+}
+
+static int tracing_resize_saved_cmdlines(unsigned int val)
+{
+	struct saved_cmdlines_buffer *s, *savedcmd_temp;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	if (allocate_cmdlines_buffer(val, s) < 0) {
+		kfree(s);
+		return -ENOMEM;
+	}
+
+	arch_spin_lock(&trace_cmdline_lock);
+	savedcmd_temp = savedcmd;
+	savedcmd = s;
+	arch_spin_unlock(&trace_cmdline_lock);
+	free_saved_cmdlines_buffer(savedcmd_temp);
+
+	return 0;
+}
+
+static ssize_t
+tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
+				  size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
+	if (!val || val > PID_MAX_DEFAULT)
+		return -EINVAL;
+
+	ret = tracing_resize_saved_cmdlines((unsigned int)val);
+	if (ret < 0)
+		return ret;
+
+	*ppos += cnt;
+
+	return cnt;
+}
+
+static const struct file_operations tracing_saved_cmdlines_size_fops = {
+	.open		= tracing_open_generic,
+	.read		= tracing_saved_cmdlines_size_read,
+	.write		= tracing_saved_cmdlines_size_write,
 };
 
 static ssize_t
@@ -4225,29 +4374,11 @@
 	return trace_poll(iter, filp, poll_table);
 }
 
-/*
- * This is a make-shift waitqueue.
- * A tracer might use this callback on some rare cases:
- *
- *  1) the current tracer might hold the runqueue lock when it wakes up
- *     a reader, hence a deadlock (sched, function, and function graph tracers)
- *  2) the function tracers, trace all functions, we don't want
- *     the overhead of calling wake_up and friends
- *     (and tracing them too)
- *
- *     Anyway, this is really very primitive wakeup.
- */
-void poll_wait_pipe(struct trace_iterator *iter)
-{
-	set_current_state(TASK_INTERRUPTIBLE);
-	/* sleep for 100 msecs, and try again. */
-	schedule_timeout(HZ / 10);
-}
-
 /* Must be called with trace_types_lock mutex held. */
 static int tracing_wait_pipe(struct file *filp)
 {
 	struct trace_iterator *iter = filp->private_data;
+	int ret;
 
 	while (trace_empty(iter)) {
 
@@ -4255,15 +4386,6 @@
 			return -EAGAIN;
 		}
 
-		mutex_unlock(&iter->mutex);
-
-		iter->trace->wait_pipe(iter);
-
-		mutex_lock(&iter->mutex);
-
-		if (signal_pending(current))
-			return -EINTR;
-
 		/*
 		 * We block until we read something and tracing is disabled.
 		 * We still block if tracing is disabled, but we have never
@@ -4275,6 +4397,18 @@
 		 */
 		if (!tracing_is_on() && iter->pos)
 			break;
+
+		mutex_unlock(&iter->mutex);
+
+		ret = wait_on_pipe(iter);
+
+		mutex_lock(&iter->mutex);
+
+		if (ret)
+			return ret;
+
+		if (signal_pending(current))
+			return -EINTR;
 	}
 
 	return 1;
@@ -5197,8 +5331,12 @@
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			iter->trace->wait_pipe(iter);
+			ret = wait_on_pipe(iter);
 			mutex_lock(&trace_types_lock);
+			if (ret) {
+				size = ret;
+				goto out_unlock;
+			}
 			if (signal_pending(current)) {
 				size = -EINTR;
 				goto out_unlock;
@@ -5408,8 +5546,10 @@
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		iter->trace->wait_pipe(iter);
+		ret = wait_on_pipe(iter);
 		mutex_lock(&trace_types_lock);
+		if (ret)
+			goto out;
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			goto out;
@@ -6102,6 +6242,28 @@
 	return 0;
 }
 
+static void free_trace_buffer(struct trace_buffer *buf)
+{
+	if (buf->buffer) {
+		ring_buffer_free(buf->buffer);
+		buf->buffer = NULL;
+		free_percpu(buf->data);
+		buf->data = NULL;
+	}
+}
+
+static void free_trace_buffers(struct trace_array *tr)
+{
+	if (!tr)
+		return;
+
+	free_trace_buffer(&tr->trace_buffer);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+	free_trace_buffer(&tr->max_buffer);
+#endif
+}
+
 static int new_instance_create(const char *name)
 {
 	struct trace_array *tr;
@@ -6131,6 +6293,8 @@
 
 	raw_spin_lock_init(&tr->start_lock);
 
+	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+
 	tr->current_trace = &nop_trace;
 
 	INIT_LIST_HEAD(&tr->systems);
@@ -6158,8 +6322,7 @@
 	return 0;
 
  out_free_tr:
-	if (tr->trace_buffer.buffer)
-		ring_buffer_free(tr->trace_buffer.buffer);
+	free_trace_buffers(tr);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
@@ -6199,8 +6362,7 @@
 	event_trace_del_tracer(tr);
 	ftrace_destroy_function_files(tr);
 	debugfs_remove_recursive(tr->dir);
-	free_percpu(tr->trace_buffer.data);
-	ring_buffer_free(tr->trace_buffer.buffer);
+	free_trace_buffers(tr);
 
 	kfree(tr->name);
 	kfree(tr);
@@ -6328,6 +6490,11 @@
 	trace_create_file("tracing_on", 0644, d_tracer,
 			  tr, &rb_simple_fops);
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+	trace_create_file("tracing_max_latency", 0644, d_tracer,
+			&tr->max_latency, &tracing_max_lat_fops);
+#endif
+
 	if (ftrace_create_function_files(tr, d_tracer))
 		WARN(1, "Could not allocate function filter files");
 
@@ -6353,11 +6520,6 @@
 
 	init_tracer_debugfs(&global_trace, d_tracer);
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-	trace_create_file("tracing_max_latency", 0644, d_tracer,
-			&tracing_max_latency, &tracing_max_lat_fops);
-#endif
-
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&tracing_thresh, &tracing_max_lat_fops);
 
@@ -6367,6 +6529,9 @@
 	trace_create_file("saved_cmdlines", 0444, d_tracer,
 			NULL, &tracing_saved_cmdlines_fops);
 
+	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
+			  NULL, &tracing_saved_cmdlines_size_fops);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -6603,18 +6768,19 @@
 	if (!temp_buffer)
 		goto out_free_cpumask;
 
+	if (trace_create_savedcmd() < 0)
+		goto out_free_temp_buffer;
+
 	/* TODO: make the number of buffers hot pluggable with CPUS */
 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
 		WARN_ON(1);
-		goto out_free_temp_buffer;
+		goto out_free_savedcmd;
 	}
 
 	if (global_trace.buffer_disabled)
 		tracing_off();
 
-	trace_init_cmdlines();
-
 	if (trace_boot_clock) {
 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
 		if (ret < 0)
@@ -6629,6 +6795,10 @@
 	 */
 	global_trace.current_trace = &nop_trace;
 
+	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+
+	ftrace_init_global_array_ops(&global_trace);
+
 	register_tracer(&nop_trace);
 
 	/* All seems OK, enable tracing */
@@ -6656,13 +6826,11 @@
 
 	return 0;
 
+out_free_savedcmd:
+	free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
 	ring_buffer_free(temp_buffer);
 out_free_cpumask:
-	free_percpu(global_trace.trace_buffer.data);
-#ifdef CONFIG_TRACER_MAX_TRACE
-	free_percpu(global_trace.max_buffer.data);
-#endif
 	free_cpumask_var(global_trace.tracing_cpumask);
 out_free_buffer_mask:
 	free_cpumask_var(tracing_buffer_mask);

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7b..9258f5a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h

@@ -190,7 +190,22 @@
 	 */
 	struct trace_buffer	max_buffer;
 	bool			allocated_snapshot;
+	unsigned long		max_latency;
 #endif
+	/*
+	 * max_lock is used to protect the swapping of buffers
+	 * when taking a max snapshot. The buffers themselves are
+	 * protected by per_cpu spinlocks. But the action of the swap
+	 * needs its own lock.
+	 *
+	 * This is defined as a arch_spinlock_t in order to help
+	 * with performance when lockdep debugging is enabled.
+	 *
+	 * It is also used in other places outside the update_max_tr
+	 * so it needs to be defined outside of the
+	 * CONFIG_TRACER_MAX_TRACE.
+	 */
+	arch_spinlock_t		max_lock;
 	int			buffer_disabled;
 #ifdef CONFIG_FTRACE_SYSCALLS
 	int			sys_refcount_enter;
@@ -237,6 +252,9 @@
 {
 	struct trace_array *tr;
 
+	if (list_empty(&ftrace_trace_arrays))
+		return NULL;
+
 	tr = list_entry(ftrace_trace_arrays.prev,
 			typeof(*tr), list);
 	WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
@@ -323,7 +341,6 @@
  * @stop: called when tracing is paused (echo 0 > tracing_enabled)
  * @open: called when the trace file is opened
  * @pipe_open: called when the trace_pipe file is opened
- * @wait_pipe: override how the user waits for traces on trace_pipe
  * @close: called when the trace file is released
  * @pipe_close: called when the trace_pipe file is released
  * @read: override the default read callback on trace_pipe
@@ -342,7 +359,6 @@
 	void			(*stop)(struct trace_array *tr);
 	void			(*open)(struct trace_iterator *iter);
 	void			(*pipe_open)(struct trace_iterator *iter);
-	void			(*wait_pipe)(struct trace_iterator *iter);
 	void			(*close)(struct trace_iterator *iter);
 	void			(*pipe_close)(struct trace_iterator *iter);
 	ssize_t			(*read)(struct trace_iterator *iter,
@@ -416,13 +432,7 @@
 	TRACE_FTRACE_IRQ_BIT,
 	TRACE_FTRACE_SIRQ_BIT,
 
-	/* GLOBAL_BITs must be greater than FTRACE_BITs */
-	TRACE_GLOBAL_BIT,
-	TRACE_GLOBAL_NMI_BIT,
-	TRACE_GLOBAL_IRQ_BIT,
-	TRACE_GLOBAL_SIRQ_BIT,
-
-	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
+	/* INTERNAL_BITs must be greater than FTRACE_BITs */
 	TRACE_INTERNAL_BIT,
 	TRACE_INTERNAL_NMI_BIT,
 	TRACE_INTERNAL_IRQ_BIT,
@@ -449,9 +459,6 @@
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
 #define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
 
-#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
-#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
-
 #define TRACE_LIST_START	TRACE_INTERNAL_BIT
 #define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
 
@@ -560,8 +567,6 @@
 
 void tracing_iter_reset(struct trace_iterator *iter, int cpu);
 
-void poll_wait_pipe(struct trace_iterator *iter);
-
 void tracing_sched_switch_trace(struct trace_array *tr,
 				struct task_struct *prev,
 				struct task_struct *next,
@@ -608,8 +613,6 @@
 extern unsigned long tracing_thresh;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-extern unsigned long tracing_max_latency;
-
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
@@ -724,6 +727,8 @@
 #define TRACE_GRAPH_PRINT_PROC          0x8
 #define TRACE_GRAPH_PRINT_DURATION      0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME      0x20
+#define TRACE_GRAPH_PRINT_IRQS          0x40
+#define TRACE_GRAPH_PRINT_TAIL          0x80
 #define TRACE_GRAPH_PRINT_FILL_SHIFT	28
 #define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
@@ -823,6 +828,10 @@
 int ftrace_create_function_files(struct trace_array *tr,
 				 struct dentry *parent);
 void ftrace_destroy_function_files(struct trace_array *tr);
+void ftrace_init_global_array_ops(struct trace_array *tr);
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
+void ftrace_reset_array_ops(struct trace_array *tr);
+int using_ftrace_ops_list_func(void);
 #else
 static inline int ftrace_trace_task(struct task_struct *task)
 {
@@ -836,6 +845,11 @@
 	return 0;
 }
 static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
+static inline __init void
+ftrace_init_global_array_ops(struct trace_array *tr) { }
+static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+/* ftace_func_t type is not defined, use macro instead of static inline */
+#define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)

diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
new file mode 100644
index 0000000..40a14cb
--- /dev/null
+++ b/kernel/trace/trace_benchmark.c

@@ -0,0 +1,198 @@
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/trace_clock.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace_benchmark.h"
+
+static struct task_struct *bm_event_thread;
+
+static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
+
+static u64 bm_total;
+static u64 bm_totalsq;
+static u64 bm_last;
+static u64 bm_max;
+static u64 bm_min;
+static u64 bm_first;
+static u64 bm_cnt;
+static u64 bm_stddev;
+static unsigned int bm_avg;
+static unsigned int bm_std;
+
+/*
+ * This gets called in a loop recording the time it took to write
+ * the tracepoint. What it writes is the time statistics of the last
+ * tracepoint write. As there is nothing to write the first time
+ * it simply writes "START". As the first write is cold cache and
+ * the rest is hot, we save off that time in bm_first and it is
+ * reported as "first", which is shown in the second write to the
+ * tracepoint. The "first" field is writen within the statics from
+ * then on but never changes.
+ */
+static void trace_do_benchmark(void)
+{
+	u64 start;
+	u64 stop;
+	u64 delta;
+	u64 stddev;
+	u64 seed;
+	u64 last_seed;
+	unsigned int avg;
+	unsigned int std = 0;
+
+	/* Only run if the tracepoint is actually active */
+	if (!trace_benchmark_event_enabled())
+		return;
+
+	local_irq_disable();
+	start = trace_clock_local();
+	trace_benchmark_event(bm_str);
+	stop = trace_clock_local();
+	local_irq_enable();
+
+	bm_cnt++;
+
+	delta = stop - start;
+
+	/*
+	 * The first read is cold cached, keep it separate from the
+	 * other calculations.
+	 */
+	if (bm_cnt == 1) {
+		bm_first = delta;
+		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+			  "first=%llu [COLD CACHED]", bm_first);
+		return;
+	}
+
+	bm_last = delta;
+
+	if (delta > bm_max)
+		bm_max = delta;
+	if (!bm_min || delta < bm_min)
+		bm_min = delta;
+
+	/*
+	 * When bm_cnt is greater than UINT_MAX, it breaks the statistics
+	 * accounting. Freeze the statistics when that happens.
+	 * We should have enough data for the avg and stddev anyway.
+	 */
+	if (bm_cnt > UINT_MAX) {
+		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+		    "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
+			  bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
+		return;
+	}
+
+	bm_total += delta;
+	bm_totalsq += delta * delta;
+
+
+	if (bm_cnt > 1) {
+		/*
+		 * Apply Welford's method to calculate standard deviation:
+		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
+		 */
+		stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
+		do_div(stddev, (u32)bm_cnt);
+		do_div(stddev, (u32)bm_cnt - 1);
+	} else
+		stddev = 0;
+
+	delta = bm_total;
+	do_div(delta, bm_cnt);
+	avg = delta;
+
+	if (stddev > 0) {
+		int i = 0;
+		/*
+		 * stddev is the square of standard deviation but
+		 * we want the actualy number. Use the average
+		 * as our seed to find the std.
+		 *
+		 * The next try is:
+		 *  x = (x + N/x) / 2
+		 *
+		 * Where N is the squared number to find the square
+		 * root of.
+		 */
+		seed = avg;
+		do {
+			last_seed = seed;
+			seed = stddev;
+			if (!last_seed)
+				break;
+			do_div(seed, last_seed);
+			seed += last_seed;
+			do_div(seed, 2);
+		} while (i++ < 10 && last_seed != seed);
+
+		std = seed;
+	}
+
+	scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+		  "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
+		  bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
+
+	bm_std = std;
+	bm_avg = avg;
+	bm_stddev = stddev;
+}
+
+static int benchmark_event_kthread(void *arg)
+{
+	/* sleep a bit to make sure the tracepoint gets activated */
+	msleep(100);
+
+	while (!kthread_should_stop()) {
+
+		trace_do_benchmark();
+
+		/*
+		 * We don't go to sleep, but let others
+		 * run as well.
+		 */
+		cond_resched();
+	}
+
+	return 0;
+}
+
+/*
+ * When the benchmark tracepoint is enabled, it calls this
+ * function and the thread that calls the tracepoint is created.
+ */
+void trace_benchmark_reg(void)
+{
+	bm_event_thread = kthread_run(benchmark_event_kthread,
+				      NULL, "event_benchmark");
+	WARN_ON(!bm_event_thread);
+}
+
+/*
+ * When the benchmark tracepoint is disabled, it calls this
+ * function and the thread that calls the tracepoint is deleted
+ * and all the numbers are reset.
+ */
+void trace_benchmark_unreg(void)
+{
+	if (!bm_event_thread)
+		return;
+
+	kthread_stop(bm_event_thread);
+
+	strcpy(bm_str, "START");
+	bm_total = 0;
+	bm_totalsq = 0;
+	bm_last = 0;
+	bm_max = 0;
+	bm_min = 0;
+	bm_cnt = 0;
+	/* These don't need to be reset but reset them anyway */
+	bm_first = 0;
+	bm_std = 0;
+	bm_avg = 0;
+	bm_stddev = 0;
+}

diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h
new file mode 100644
index 0000000..3c1df1df
--- /dev/null
+++ b/kernel/trace/trace_benchmark.h

@@ -0,0 +1,41 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM benchmark
+
+#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BENCHMARK_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_benchmark_reg(void);
+extern void trace_benchmark_unreg(void);
+
+#define BENCHMARK_EVENT_STRLEN		128
+
+TRACE_EVENT_FN(benchmark_event,
+
+	TP_PROTO(const char *str),
+
+	TP_ARGS(str),
+
+	TP_STRUCT__entry(
+		__array(	char,	str,	BENCHMARK_EVENT_STRLEN	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
+	),
+
+	TP_printk("%s", __entry->str),
+
+	trace_benchmark_reg, trace_benchmark_unreg
+);
+
+#endif /* _TRACE_BENCHMARK_H */
+
+#undef TRACE_INCLUDE_FILE
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_benchmark
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index c894614..5d12bb4 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c

@@ -248,8 +248,8 @@
 	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
 }
 
-__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
-				       struct pt_regs *regs, int *rctxp)
+void *perf_trace_buf_prepare(int size, unsigned short type,
+			     struct pt_regs *regs, int *rctxp)
 {
 	struct trace_entry *entry;
 	unsigned long flags;
@@ -281,6 +281,7 @@
 	return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare);
 
 #ifdef CONFIG_FUNCTION_TRACER
 static void

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3ddfd8f..f99e0b3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c

@@ -574,6 +574,9 @@
 {
 	struct trace_array *tr = top_trace_array();
 
+	if (!tr)
+		return -ENODEV;
+
 	return __ftrace_set_clr_event(tr, NULL, system, event, set);
 }
 EXPORT_SYMBOL_GPL(trace_set_clr_event);
@@ -2065,6 +2068,9 @@
 	bool enable;
 	int ret;
 
+	if (!tr)
+		return -ENODEV;
+
 	/* hash funcs only work with set_ftrace_filter */
 	if (!enabled || !param)
 		return -EINVAL;
@@ -2396,6 +2402,9 @@
 	char *token;
 	int ret;
 
+	if (!tr)
+		return -ENODEV;
+
 	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
 
 		call = *iter;
@@ -2442,6 +2451,8 @@
 	int ret;
 
 	tr = top_trace_array();
+	if (!tr)
+		return -ENODEV;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -2535,6 +2546,8 @@
 	int ret;
 
 	tr = top_trace_array();
+	if (!tr)
+		return;
 
 	pr_info("Running tests on trace events:\n");
 

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index ffd5635..57f0ec9 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c

@@ -26,8 +26,6 @@
 static void
 function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct pt_regs *pt_regs);
-static struct ftrace_ops trace_ops;
-static struct ftrace_ops trace_stack_ops;
 static struct tracer_flags func_flags;
 
 /* Our option */
@@ -83,28 +81,24 @@
 
 static int function_trace_init(struct trace_array *tr)
 {
-	struct ftrace_ops *ops;
+	ftrace_func_t func;
 
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		/* There's only one global tr */
-		if (!trace_ops.private) {
-			trace_ops.private = tr;
-			trace_stack_ops.private = tr;
-		}
-
-		if (func_flags.val & TRACE_FUNC_OPT_STACK)
-			ops = &trace_stack_ops;
-		else
-			ops = &trace_ops;
-		tr->ops = ops;
-	} else if (!tr->ops) {
-		/*
-		 * Instance trace_arrays get their ops allocated
-		 * at instance creation. Unless it failed
-		 * the allocation.
-		 */
+	/*
+	 * Instance trace_arrays get their ops allocated
+	 * at instance creation. Unless it failed
+	 * the allocation.
+	 */
+	if (!tr->ops)
 		return -ENOMEM;
-	}
+
+	/* Currently only the global instance can do stack tracing */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+	    func_flags.val & TRACE_FUNC_OPT_STACK)
+		func = function_stack_trace_call;
+	else
+		func = function_trace_call;
+
+	ftrace_init_array_ops(tr, func);
 
 	tr->trace_buffer.cpu = get_cpu();
 	put_cpu();
@@ -118,6 +112,7 @@
 {
 	tracing_stop_function_trace(tr);
 	tracing_stop_cmdline_record();
+	ftrace_reset_array_ops(tr);
 }
 
 static void function_trace_start(struct trace_array *tr)
@@ -199,18 +194,6 @@
 	local_irq_restore(flags);
 }
 
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = function_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
-static struct ftrace_ops trace_stack_ops __read_mostly =
-{
-	.func = function_stack_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static struct tracer_opt func_opts[] = {
 #ifdef CONFIG_STACKTRACE
 	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
@@ -248,10 +231,10 @@
 		unregister_ftrace_function(tr->ops);
 
 		if (set) {
-			tr->ops = &trace_stack_ops;
+			tr->ops->func = function_stack_trace_call;
 			register_ftrace_function(tr->ops);
 		} else {
-			tr->ops = &trace_ops;
+			tr->ops->func = function_trace_call;
 			register_ftrace_function(tr->ops);
 		}
 
@@ -269,7 +252,6 @@
 	.init		= function_trace_init,
 	.reset		= function_trace_reset,
 	.start		= function_trace_start,
-	.wait_pipe	= poll_wait_pipe,
 	.flags		= &func_flags,
 	.set_flag	= func_set_flag,
 	.allow_instances = true,

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index deff112..4de3e57 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c

@@ -38,15 +38,6 @@
 
 #define TRACE_GRAPH_INDENT	2
 
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN	0x1
-#define TRACE_GRAPH_PRINT_CPU		0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD	0x4
-#define TRACE_GRAPH_PRINT_PROC		0x8
-#define TRACE_GRAPH_PRINT_DURATION	0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME	0x20
-#define TRACE_GRAPH_PRINT_IRQS		0x40
-
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {
@@ -64,11 +55,13 @@
 	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
 	/* Display interrupts */
 	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+	/* Display function name after trailing } */
+	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
 	{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-	/* Don't display overruns and proc by default */
+	/* Don't display overruns, proc, or tail by default */
 	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
 	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
 	.opts = trace_opts
@@ -1176,9 +1169,10 @@
 	 * If the return function does not have a matching entry,
 	 * then the entry was lost. Instead of just printing
 	 * the '}' and letting the user guess what function this
-	 * belongs to, write out the function name.
+	 * belongs to, write out the function name. Always do
+	 * that if the funcgraph-tail option is enabled.
 	 */
-	if (func_match) {
+	if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
 		ret = trace_seq_puts(s, "}\n");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
@@ -1505,7 +1499,6 @@
 	.pipe_open	= graph_trace_open,
 	.close		= graph_trace_close,
 	.pipe_close	= graph_trace_close,
-	.wait_pipe	= poll_wait_pipe,
 	.init		= graph_trace_init,
 	.reset		= graph_trace_reset,
 	.print_line	= print_graph_function,

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 8ff02cb..9bb104f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c

@@ -151,12 +151,6 @@
 
 	atomic_dec(&data->disabled);
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = irqsoff_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -176,7 +170,7 @@
 	for_each_possible_cpu(cpu)
 		per_cpu(tracing_cpu, cpu) = 0;
 
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
 
 	return start_irqsoff_tracer(irqsoff_trace, set);
@@ -303,13 +297,13 @@
 /*
  * Should this new latency be reported/recorded?
  */
-static int report_latency(cycle_t delta)
+static int report_latency(struct trace_array *tr, cycle_t delta)
 {
 	if (tracing_thresh) {
 		if (delta < tracing_thresh)
 			return 0;
 	} else {
-		if (delta <= tracing_max_latency)
+		if (delta <= tr->max_latency)
 			return 0;
 	}
 	return 1;
@@ -333,13 +327,13 @@
 
 	pc = preempt_count();
 
-	if (!report_latency(delta))
+	if (!report_latency(tr, delta))
 		goto out;
 
 	raw_spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
-	if (!report_latency(delta))
+	if (!report_latency(tr, delta))
 		goto out_unlock;
 
 	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
@@ -352,7 +346,7 @@
 	data->critical_end = parent_ip;
 
 	if (likely(!is_tracing_stopped())) {
-		tracing_max_latency = delta;
+		tr->max_latency = delta;
 		update_max_tr_single(tr, current, cpu);
 	}
 
@@ -531,7 +525,7 @@
 }
 #endif /* CONFIG_PREEMPT_TRACER */
 
-static int register_irqsoff_function(int graph, int set)
+static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -543,7 +537,7 @@
 		ret = register_ftrace_graph(&irqsoff_graph_return,
 					    &irqsoff_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -551,7 +545,7 @@
 	return ret;
 }
 
-static void unregister_irqsoff_function(int graph)
+static void unregister_irqsoff_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -559,17 +553,17 @@
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void irqsoff_function_set(int set)
+static void irqsoff_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_irqsoff_function(is_graph(), 1);
+		register_irqsoff_function(tr, is_graph(), 1);
 	else
-		unregister_irqsoff_function(is_graph());
+		unregister_irqsoff_function(tr, is_graph());
 }
 
 static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -577,7 +571,7 @@
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		irqsoff_function_set(set);
+		irqsoff_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
@@ -586,7 +580,7 @@
 {
 	int ret;
 
-	ret = register_irqsoff_function(graph, 0);
+	ret = register_irqsoff_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -600,25 +594,37 @@
 {
 	tracer_enabled = 0;
 
-	unregister_irqsoff_function(graph);
+	unregister_irqsoff_function(tr, graph);
 }
 
-static void __irqsoff_tracer_init(struct trace_array *tr)
+static bool irqsoff_busy;
+
+static int __irqsoff_tracer_init(struct trace_array *tr)
 {
+	if (irqsoff_busy)
+		return -EBUSY;
+
 	save_flags = trace_flags;
 
 	/* non overwrite screws up the latency tracers */
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
 
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	irqsoff_trace = tr;
 	/* make sure that the tracer is visible */
 	smp_wmb();
 	tracing_reset_online_cpus(&tr->trace_buffer);
 
-	if (start_irqsoff_tracer(tr, is_graph()))
+	ftrace_init_array_ops(tr, irqsoff_tracer_call);
+
+	/* Only toplevel instance supports graph tracing */
+	if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+				      is_graph())))
 		printk(KERN_ERR "failed to start irqsoff tracer\n");
+
+	irqsoff_busy = true;
+	return 0;
 }
 
 static void irqsoff_tracer_reset(struct trace_array *tr)
@@ -630,6 +636,9 @@
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
+
+	irqsoff_busy = false;
 }
 
 static void irqsoff_tracer_start(struct trace_array *tr)
@@ -647,8 +656,7 @@
 {
 	trace_type = TRACER_IRQS_OFF;
 
-	__irqsoff_tracer_init(tr);
-	return 0;
+	return __irqsoff_tracer_init(tr);
 }
 static struct tracer irqsoff_tracer __read_mostly =
 {
@@ -668,6 +676,7 @@
 #endif
 	.open           = irqsoff_trace_open,
 	.close          = irqsoff_trace_close,
+	.allow_instances = true,
 	.use_max_tr	= true,
 };
 # define register_irqsoff(trace) register_tracer(&trace)
@@ -680,8 +689,7 @@
 {
 	trace_type = TRACER_PREEMPT_OFF;
 
-	__irqsoff_tracer_init(tr);
-	return 0;
+	return __irqsoff_tracer_init(tr);
 }
 
 static struct tracer preemptoff_tracer __read_mostly =
@@ -702,6 +710,7 @@
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
+	.allow_instances = true,
 	.use_max_tr	= true,
 };
 # define register_preemptoff(trace) register_tracer(&trace)
@@ -716,8 +725,7 @@
 {
 	trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
 
-	__irqsoff_tracer_init(tr);
-	return 0;
+	return __irqsoff_tracer_init(tr);
 }
 
 static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -738,6 +746,7 @@
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
+	.allow_instances = true,
 	.use_max_tr	= true,
 };
 

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 903ae28..282f6e4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -40,27 +40,27 @@
 	(sizeof(struct probe_arg) * (n)))
 
 
-static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
 	return tk->rp.handler != NULL;
 }
 
-static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk)
+static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
 {
 	return tk->symbol ? tk->symbol : "unknown";
 }
 
-static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
+static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
 {
 	return tk->rp.kp.offset;
 }
 
-static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 {
 	return !!(kprobe_gone(&tk->rp.kp));
 }
 
-static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
+static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
 						 struct module *mod)
 {
 	int len = strlen(mod->name);
@@ -68,7 +68,7 @@
 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
 
-static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
 {
 	return !!strchr(trace_kprobe_symbol(tk), ':');
 }
@@ -132,19 +132,21 @@
  * Kprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)					\
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
 					  void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
 				(unsigned int)((unsigned long)offset));	\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
+
 DEFINE_BASIC_FETCH_FUNCS(stack)
 /* No string on the stack entry */
 #define fetch_stack_string	NULL
 #define fetch_stack_string_size	NULL
 
 #define DEFINE_FETCH_memory(type)					\
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
 					  void *addr, void *dest)	\
 {									\
 	type retval;							\
@@ -152,14 +154,16 @@
 		*(type *)dest = 0;					\
 	else								\
 		*(type *)dest = retval;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
+
 DEFINE_BASIC_FETCH_FUNCS(memory)
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+					    void *addr, void *dest)
 {
 	long ret;
 	int maxlen = get_rloc_len(*(u32 *)dest);
@@ -193,10 +197,11 @@
 					      get_rloc_offs(*(u32 *)dest));
 	}
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
 
 /* Return the length of string -- including null terminal byte */
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-							void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+						 void *addr, void *dest)
 {
 	mm_segment_t old_fs;
 	int ret, len = 0;
@@ -219,17 +224,19 @@
 	else
 		*(u32 *)dest = len;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
 
 #define DEFINE_FETCH_symbol(type)					\
-__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,	\
-					  void *data, void *dest)	\
+void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
 {									\
 	struct symbol_cache *sc = data;					\
 	if (sc->addr)							\
 		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
 	else								\
 		*(type *)dest = 0;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
+
 DEFINE_BASIC_FETCH_FUNCS(symbol)
 DEFINE_FETCH_symbol(string)
 DEFINE_FETCH_symbol(string_size)
@@ -907,7 +914,7 @@
 };
 
 /* Kprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
 		    struct ftrace_event_file *ftrace_file)
 {
@@ -943,7 +950,7 @@
 					 entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct event_file_link *link;
@@ -951,9 +958,10 @@
 	list_for_each_entry_rcu(link, &tk->tp.files, list)
 		__kprobe_trace_func(tk, regs, link->file);
 }
+NOKPROBE_SYMBOL(kprobe_trace_func);
 
 /* Kretprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		       struct pt_regs *regs,
 		       struct ftrace_event_file *ftrace_file)
@@ -991,7 +999,7 @@
 					 entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		     struct pt_regs *regs)
 {
@@ -1000,6 +1008,7 @@
 	list_for_each_entry_rcu(link, &tk->tp.files, list)
 		__kretprobe_trace_func(tk, ri, regs, link->file);
 }
+NOKPROBE_SYMBOL(kretprobe_trace_func);
 
 /* Event entry printers */
 static enum print_line_t
@@ -1131,7 +1140,7 @@
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void
+static void
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
@@ -1158,9 +1167,10 @@
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kprobe_perf_func);
 
 /* Kretprobe profile handler */
-static __kprobes void
+static void
 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		    struct pt_regs *regs)
 {
@@ -1188,6 +1198,7 @@
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kretprobe_perf_func);
 #endif	/* CONFIG_PERF_EVENTS */
 
 /*
@@ -1196,9 +1207,8 @@
  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
  * lockless, but we can't race with this __init function.
  */
-static __kprobes
-int kprobe_register(struct ftrace_event_call *event,
-		    enum trace_reg type, void *data)
+static int kprobe_register(struct ftrace_event_call *event,
+			   enum trace_reg type, void *data)
 {
 	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
 	struct ftrace_event_file *file = data;
@@ -1224,8 +1234,7 @@
 	return 0;
 }
 
-static __kprobes
-int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
@@ -1239,9 +1248,10 @@
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kprobe_dispatcher);
 
-static __kprobes
-int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+static int
+kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
@@ -1255,6 +1265,7 @@
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kretprobe_dispatcher);
 
 static struct trace_event_functions kretprobe_funcs = {
 	.trace		= print_kretprobe_event
@@ -1377,6 +1388,9 @@
 	struct trace_kprobe *tk;
 	struct ftrace_event_file *file;
 
+	if (tracing_is_disabled())
+		return -ENODEV;
+
 	target = kprobe_trace_selftest_target;
 
 	pr_info("Testing kprobe tracing: ");

diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 69a5cc9..fcf0a9e 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c

@@ -91,7 +91,6 @@
 	.name		= "nop",
 	.init		= nop_trace_init,
 	.reset		= nop_trace_reset,
-	.wait_pipe	= poll_wait_pipe,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest	= trace_selftest_startup_nop,
 #endif

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a436de1..f3dad80 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c

@@ -126,6 +126,34 @@
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
 /**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s:		trace sequence descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
+/**
  * trace_seq_vprintf - sequence printing of trace information
  * @s: trace sequence descriptor
  * @fmt: printf format string
@@ -399,6 +427,19 @@
 #endif
 
 const char *
+ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+			 unsigned int bitmask_size)
+{
+	const char *ret = p->buffer + p->len;
+
+	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+
+const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
 	int i;

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 8364a42..d4b9fc2 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c

@@ -37,13 +37,13 @@
 
 /* Printing  in basic type function template */
 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)				\
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\
-						const char *name,	\
-						void *data, void *ent)	\
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+				void *data, void *ent)			\
 {									\
 	return trace_seq_printf(s, " %s=" fmt, name, *(type *)data);	\
 }									\
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
+const char PRINT_TYPE_FMT_NAME(type)[] = fmt;				\
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
 
 DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
@@ -55,9 +55,8 @@
 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
 
 /* Print type function for string type */
-__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
-						  const char *name,
-						  void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
+				 void *data, void *ent)
 {
 	int len = *(u32 *)data >> 16;
 
@@ -67,6 +66,7 @@
 		return trace_seq_printf(s, " %s=\"%s\"", name,
 					(const char *)get_loc_data(data, ent));
 }
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
 
 const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
 
@@ -81,23 +81,24 @@
 
 /* Data fetch function templates */
 #define DEFINE_FETCH_reg(type)						\
-__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,		\
-					void *offset, void *dest)	\
+void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)regs_get_register(regs,			\
 				(unsigned int)((unsigned long)offset));	\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
 DEFINE_BASIC_FETCH_FUNCS(reg)
 /* No string on the register */
 #define fetch_reg_string	NULL
 #define fetch_reg_string_size	NULL
 
 #define DEFINE_FETCH_retval(type)					\
-__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,	\
-					  void *dummy, void *dest)	\
+void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,		\
+				   void *dummy, void *dest)		\
 {									\
 	*(type *)dest = (type)regs_return_value(regs);			\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
 DEFINE_BASIC_FETCH_FUNCS(retval)
 /* No string on the retval */
 #define fetch_retval_string		NULL
@@ -112,8 +113,8 @@
 };
 
 #define DEFINE_FETCH_deref(type)					\
-__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,	\
-					    void *data, void *dest)	\
+void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,			\
+				  void *data, void *dest)		\
 {									\
 	struct deref_fetch_param *dprm = data;				\
 	unsigned long addr;						\
@@ -123,12 +124,13 @@
 		dprm->fetch(regs, (void *)addr, dest);			\
 	} else								\
 		*(type *)dest = 0;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
 DEFINE_BASIC_FETCH_FUNCS(deref)
 DEFINE_FETCH_deref(string)
 
-__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
-						   void *data, void *dest)
+void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
+					 void *data, void *dest)
 {
 	struct deref_fetch_param *dprm = data;
 	unsigned long addr;
@@ -140,16 +142,18 @@
 	} else
 		*(string_size *)dest = 0;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
 
-static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+static void update_deref_fetch_param(struct deref_fetch_param *data)
 {
 	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
 		update_deref_fetch_param(data->orig.data);
 	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
 		update_symbol_cache(data->orig.data);
 }
+NOKPROBE_SYMBOL(update_deref_fetch_param);
 
-static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
+static void free_deref_fetch_param(struct deref_fetch_param *data)
 {
 	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
 		free_deref_fetch_param(data->orig.data);
@@ -157,6 +161,7 @@
 		free_symbol_cache(data->orig.data);
 	kfree(data);
 }
+NOKPROBE_SYMBOL(free_deref_fetch_param);
 
 /* Bitfield fetch function */
 struct bitfield_fetch_param {
@@ -166,8 +171,8 @@
 };
 
 #define DEFINE_FETCH_bitfield(type)					\
-__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,	\
-					    void *data, void *dest)	\
+void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,		\
+				     void *data, void *dest)		\
 {									\
 	struct bitfield_fetch_param *bprm = data;			\
 	type buf = 0;							\
@@ -177,13 +182,13 @@
 		buf >>= bprm->low_shift;				\
 	}								\
 	*(type *)dest = buf;						\
-}
-
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
 DEFINE_BASIC_FETCH_FUNCS(bitfield)
 #define fetch_bitfield_string		NULL
 #define fetch_bitfield_string_size	NULL
 
-static __kprobes void
+static void
 update_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
 	/*
@@ -196,7 +201,7 @@
 		update_symbol_cache(data->orig.data);
 }
 
-static __kprobes void
+static void
 free_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
 	/*
@@ -255,17 +260,17 @@
 }
 
 /* Special function : only accept unsigned long */
-static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs,
-						 void *dummy, void *dest)
+static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
 	*(unsigned long *)dest = kernel_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_kernel_stack_address);
 
-static __kprobes void fetch_user_stack_address(struct pt_regs *regs,
-					       void *dummy, void *dest)
+static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
 	*(unsigned long *)dest = user_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_user_stack_address);
 
 static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
 					    fetch_func_t orig_fn,

diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb1ab5d..4f815fb 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h

@@ -81,13 +81,13 @@
  */
 #define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs))
 
-static inline void *get_rloc_data(u32 *dl)
+static nokprobe_inline void *get_rloc_data(u32 *dl)
 {
 	return (u8 *)dl + get_rloc_offs(*dl);
 }
 
 /* For data_loc conversion */
-static inline void *get_loc_data(u32 *dl, void *ent)
+static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
 {
 	return (u8 *)ent + get_rloc_offs(*dl);
 }
@@ -136,9 +136,8 @@
 
 /* Printing  in basic type function template */
 #define DECLARE_BASIC_PRINT_TYPE_FUNC(type)				\
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,		\
-					 const char *name,		\
-					 void *data, void *ent);	\
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+				void *data, void *ent);			\
 extern const char PRINT_TYPE_FMT_NAME(type)[]
 
 DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -303,7 +302,7 @@
 	return !!(tp->flags & TP_FLAG_REGISTERED);
 }
 
-static inline __kprobes void call_fetch(struct fetch_param *fprm,
+static nokprobe_inline void call_fetch(struct fetch_param *fprm,
 				 struct pt_regs *regs, void *dest)
 {
 	return fprm->fn(regs, fprm->data, dest);
@@ -351,7 +350,7 @@
 extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
 
 /* Sum up total data length for dynamic arraies (strings) */
-static inline __kprobes int
+static nokprobe_inline int
 __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 {
 	int i, ret = 0;
@@ -367,7 +366,7 @@
 }
 
 /* Store the value of each argument */
-static inline __kprobes void
+static nokprobe_inline void
 store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
 		 u8 *data, int maxlen)
 {

diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e14da5e..19bd892 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c

@@ -130,15 +130,9 @@
 	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = wakeup_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
-static int register_wakeup_function(int graph, int set)
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -150,7 +144,7 @@
 		ret = register_ftrace_graph(&wakeup_graph_return,
 					    &wakeup_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -158,7 +152,7 @@
 	return ret;
 }
 
-static void unregister_wakeup_function(int graph)
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -166,17 +160,17 @@
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void wakeup_function_set(int set)
+static void wakeup_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_wakeup_function(is_graph(), 1);
+		register_wakeup_function(tr, is_graph(), 1);
 	else
-		unregister_wakeup_function(is_graph());
+		unregister_wakeup_function(tr, is_graph());
 }
 
 static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -184,16 +178,16 @@
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		wakeup_function_set(set);
+		wakeup_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
 
-static int start_func_tracer(int graph)
+static int start_func_tracer(struct trace_array *tr, int graph)
 {
 	int ret;
 
-	ret = register_wakeup_function(graph, 0);
+	ret = register_wakeup_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -203,11 +197,11 @@
 	return ret;
 }
 
-static void stop_func_tracer(int graph)
+static void stop_func_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
 
-	unregister_wakeup_function(graph);
+	unregister_wakeup_function(tr, graph);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -221,12 +215,12 @@
 	if (!(is_graph() ^ set))
 		return 0;
 
-	stop_func_tracer(!set);
+	stop_func_tracer(tr, !set);
 
 	wakeup_reset(wakeup_trace);
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 
-	return start_func_tracer(set);
+	return start_func_tracer(tr, set);
 }
 
 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
@@ -350,13 +344,13 @@
 /*
  * Should this new latency be reported/recorded?
  */
-static int report_latency(cycle_t delta)
+static int report_latency(struct trace_array *tr, cycle_t delta)
 {
 	if (tracing_thresh) {
 		if (delta < tracing_thresh)
 			return 0;
 	} else {
-		if (delta <= tracing_max_latency)
+		if (delta <= tr->max_latency)
 			return 0;
 	}
 	return 1;
@@ -424,11 +418,11 @@
 	T1 = ftrace_now(cpu);
 	delta = T1-T0;
 
-	if (!report_latency(delta))
+	if (!report_latency(wakeup_trace, delta))
 		goto out_unlock;
 
 	if (likely(!is_tracing_stopped())) {
-		tracing_max_latency = delta;
+		wakeup_trace->max_latency = delta;
 		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 	}
 
@@ -587,7 +581,7 @@
 	 */
 	smp_wmb();
 
-	if (start_func_tracer(is_graph()))
+	if (start_func_tracer(tr, is_graph()))
 		printk(KERN_ERR "failed to start wakeup tracer\n");
 
 	return;
@@ -600,13 +594,15 @@
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
-	stop_func_tracer(is_graph());
+	stop_func_tracer(tr, is_graph());
 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
 	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
 }
 
+static bool wakeup_busy;
+
 static int __wakeup_tracer_init(struct trace_array *tr)
 {
 	save_flags = trace_flags;
@@ -615,14 +611,20 @@
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
 
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	wakeup_trace = tr;
+	ftrace_init_array_ops(tr, wakeup_tracer_call);
 	start_wakeup_tracer(tr);
+
+	wakeup_busy = true;
 	return 0;
 }
 
 static int wakeup_tracer_init(struct trace_array *tr)
 {
+	if (wakeup_busy)
+		return -EBUSY;
+
 	wakeup_dl = 0;
 	wakeup_rt = 0;
 	return __wakeup_tracer_init(tr);
@@ -630,6 +632,9 @@
 
 static int wakeup_rt_tracer_init(struct trace_array *tr)
 {
+	if (wakeup_busy)
+		return -EBUSY;
+
 	wakeup_dl = 0;
 	wakeup_rt = 1;
 	return __wakeup_tracer_init(tr);
@@ -637,6 +642,9 @@
 
 static int wakeup_dl_tracer_init(struct trace_array *tr)
 {
+	if (wakeup_busy)
+		return -EBUSY;
+
 	wakeup_dl = 1;
 	wakeup_rt = 0;
 	return __wakeup_tracer_init(tr);
@@ -653,6 +661,8 @@
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
+	wakeup_busy = false;
 }
 
 static void wakeup_tracer_start(struct trace_array *tr)
@@ -684,6 +694,7 @@
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
+	.allow_instances = true,
 	.use_max_tr	= true,
 };
 
@@ -694,7 +705,6 @@
 	.reset		= wakeup_tracer_reset,
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
-	.wait_pipe	= poll_wait_pipe,
 	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,
@@ -706,6 +716,7 @@
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
+	.allow_instances = true,
 	.use_max_tr	= true,
 };
 
@@ -716,7 +727,6 @@
 	.reset		= wakeup_tracer_reset,
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
-	.wait_pipe	= poll_wait_pipe,
 	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index e98fca6..5ef6049 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c

@@ -65,7 +65,7 @@
 
 	/* Don't allow flipping of max traces now */
 	local_irq_save(flags);
-	arch_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&buf->tr->max_lock);
 
 	cnt = ring_buffer_entries(buf->buffer);
 
@@ -83,7 +83,7 @@
 			break;
 	}
 	tracing_on();
-	arch_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&buf->tr->max_lock);
 	local_irq_restore(flags);
 
 	if (count)
@@ -161,11 +161,6 @@
 	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,
 };
 
-static struct ftrace_ops test_global = {
-	.func		= trace_selftest_test_global_func,
-	.flags		= FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static void print_counts(void)
 {
 	printk("(%d %d %d %d %d) ",
@@ -185,7 +180,7 @@
 	trace_selftest_test_dyn_cnt = 0;
 }
 
-static int trace_selftest_ops(int cnt)
+static int trace_selftest_ops(struct trace_array *tr, int cnt)
 {
 	int save_ftrace_enabled = ftrace_enabled;
 	struct ftrace_ops *dyn_ops;
@@ -220,7 +215,11 @@
 	register_ftrace_function(&test_probe1);
 	register_ftrace_function(&test_probe2);
 	register_ftrace_function(&test_probe3);
-	register_ftrace_function(&test_global);
+	/* First time we are running with main function */
+	if (cnt > 1) {
+		ftrace_init_array_ops(tr, trace_selftest_test_global_func);
+		register_ftrace_function(tr->ops);
+	}
 
 	DYN_FTRACE_TEST_NAME();
 
@@ -232,8 +231,10 @@
 		goto out;
 	if (trace_selftest_test_probe3_cnt != 1)
 		goto out;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 
 	DYN_FTRACE_TEST_NAME2();
 
@@ -269,8 +270,10 @@
 		goto out_free;
 	if (trace_selftest_test_probe3_cnt != 3)
 		goto out_free;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 	if (trace_selftest_test_dyn_cnt == 0)
 		goto out_free;
 
@@ -295,7 +298,9 @@
 	unregister_ftrace_function(&test_probe1);
 	unregister_ftrace_function(&test_probe2);
 	unregister_ftrace_function(&test_probe3);
-	unregister_ftrace_function(&test_global);
+	if (cnt > 1)
+		unregister_ftrace_function(tr->ops);
+	ftrace_reset_array_ops(tr);
 
 	/* Make sure everything is off */
 	reset_counts();
@@ -315,9 +320,9 @@
 }
 
 /* Test dynamic code modification and ftrace filters */
-int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
-					   struct trace_array *tr,
-					   int (*func)(void))
+static int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
+						  struct trace_array *tr,
+						  int (*func)(void))
 {
 	int save_ftrace_enabled = ftrace_enabled;
 	unsigned long count;
@@ -388,7 +393,7 @@
 	}
 
 	/* Test the ops with global tracing running */
-	ret = trace_selftest_ops(1);
+	ret = trace_selftest_ops(tr, 1);
 	trace->reset(tr);
 
  out:
@@ -399,7 +404,7 @@
 
 	/* Test the ops with global tracing off */
 	if (!ret)
-		ret = trace_selftest_ops(2);
+		ret = trace_selftest_ops(tr, 2);
 
 	return ret;
 }
@@ -802,7 +807,7 @@
 int
 trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long save_max = tracing_max_latency;
+	unsigned long save_max = tr->max_latency;
 	unsigned long count;
 	int ret;
 
@@ -814,7 +819,7 @@
 	}
 
 	/* reset the max latency */
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	/* disable interrupts for a bit */
 	local_irq_disable();
 	udelay(100);
@@ -841,7 +846,7 @@
 		ret = -1;
 	}
 
-	tracing_max_latency = save_max;
+	tr->max_latency = save_max;
 
 	return ret;
 }
@@ -851,7 +856,7 @@
 int
 trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long save_max = tracing_max_latency;
+	unsigned long save_max = tr->max_latency;
 	unsigned long count;
 	int ret;
 
@@ -876,7 +881,7 @@
 	}
 
 	/* reset the max latency */
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	/* disable preemption for a bit */
 	preempt_disable();
 	udelay(100);
@@ -903,7 +908,7 @@
 		ret = -1;
 	}
 
-	tracing_max_latency = save_max;
+	tr->max_latency = save_max;
 
 	return ret;
 }
@@ -913,7 +918,7 @@
 int
 trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long save_max = tracing_max_latency;
+	unsigned long save_max = tr->max_latency;
 	unsigned long count;
 	int ret;
 
@@ -938,7 +943,7 @@
 	}
 
 	/* reset the max latency */
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 
 	/* disable preemption and interrupts for a bit */
 	preempt_disable();
@@ -973,7 +978,7 @@
 	}
 
 	/* do the test by disabling interrupts first this time */
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 	tracing_start();
 	trace->start(tr);
 
@@ -1004,7 +1009,7 @@
 	tracing_start();
 out_no_start:
 	trace->reset(tr);
-	tracing_max_latency = save_max;
+	tr->max_latency = save_max;
 
 	return ret;
 }
@@ -1057,7 +1062,7 @@
 int
 trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long save_max = tracing_max_latency;
+	unsigned long save_max = tr->max_latency;
 	struct task_struct *p;
 	struct completion is_ready;
 	unsigned long count;
@@ -1083,7 +1088,7 @@
 	}
 
 	/* reset the max latency */
-	tracing_max_latency = 0;
+	tr->max_latency = 0;
 
 	while (p->on_rq) {
 		/*
@@ -1113,7 +1118,7 @@
 	trace->reset(tr);
 	tracing_start();
 
-	tracing_max_latency = save_max;
+	tr->max_latency = save_max;
 
 	/* kill the thread */
 	kthread_stop(p);

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 21b320e..8a4e5cb 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c

@@ -51,11 +51,33 @@
 int stack_tracer_enabled;
 static int last_stack_tracer_enabled;
 
+static inline void print_max_stack(void)
+{
+	long i;
+	int size;
+
+	pr_emerg("        Depth    Size   Location    (%d entries)\n"
+			   "        -----    ----   --------\n",
+			   max_stack_trace.nr_entries - 1);
+
+	for (i = 0; i < max_stack_trace.nr_entries; i++) {
+		if (stack_dump_trace[i] == ULONG_MAX)
+			break;
+		if (i+1 == max_stack_trace.nr_entries ||
+				stack_dump_trace[i+1] == ULONG_MAX)
+			size = stack_dump_index[i];
+		else
+			size = stack_dump_index[i] - stack_dump_index[i+1];
+
+		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_dump_index[i],
+				size, (void *)stack_dump_trace[i]);
+	}
+}
+
 static inline void
 check_stack(unsigned long ip, unsigned long *stack)
 {
-	unsigned long this_size, flags;
-	unsigned long *p, *top, *start;
+	unsigned long this_size, flags; unsigned long *p, *top, *start;
 	static int tracer_frame;
 	int frame_size = ACCESS_ONCE(tracer_frame);
 	int i;
@@ -85,8 +107,12 @@
 
 	max_stack_size = this_size;
 
-	max_stack_trace.nr_entries	= 0;
-	max_stack_trace.skip		= 3;
+	max_stack_trace.nr_entries = 0;
+
+	if (using_ftrace_ops_list_func())
+		max_stack_trace.skip = 4;
+	else
+		max_stack_trace.skip = 3;
 
 	save_stack_trace(&max_stack_trace);
 
@@ -145,8 +171,12 @@
 			i++;
 	}
 
-	BUG_ON(current != &init_task &&
-		*(end_of_stack(current)) != STACK_END_MAGIC);
+	if ((current != &init_task &&
+		*(end_of_stack(current)) != STACK_END_MAGIC)) {
+		print_max_stack();
+		BUG();
+	}
+
  out:
 	arch_spin_unlock(&max_stack_lock);
 	local_irq_restore(flags);

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c082a74..04fdb5d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c

@@ -108,8 +108,8 @@
  * Uprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)					\
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
-					  void *offset, void *dest)	\
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
+					 void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)get_user_stack_nth(regs,			\
 					      ((unsigned long)offset)); \
@@ -120,8 +120,8 @@
 #define fetch_stack_string_size	NULL
 
 #define DEFINE_FETCH_memory(type)					\
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
-						void *addr, void *dest) \
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
+					  void *addr, void *dest)	\
 {									\
 	type retval;							\
 	void __user *vaddr = (void __force __user *) addr;		\
@@ -136,8 +136,8 @@
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+					    void *addr, void *dest)
 {
 	long ret;
 	u32 rloc = *(u32 *)dest;
@@ -158,8 +158,8 @@
 	}
 }
 
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+						 void *addr, void *dest)
 {
 	int len;
 	void __user *vaddr = (void __force __user *) addr;
@@ -184,8 +184,8 @@
 }
 
 #define DEFINE_FETCH_file_offset(type)					\
-static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\
-					void *offset, void *dest) 	\
+static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,	\
+					       void *offset, void *dest)\
 {									\
 	void *vaddr = (void *)translate_user_vaddr(offset);		\
 									\
@@ -1009,9 +1009,32 @@
 	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
 }
 
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+{
+	bool done;
+
+	write_lock(&tu->filter.rwlock);
+	if (event->hw.tp_target) {
+		list_del(&event->hw.tp_list);
+		done = tu->filter.nr_systemwide ||
+			(event->hw.tp_target->flags & PF_EXITING) ||
+			uprobe_filter_event(tu, event);
+	} else {
+		tu->filter.nr_systemwide--;
+		done = tu->filter.nr_systemwide;
+	}
+	write_unlock(&tu->filter.rwlock);
+
+	if (!done)
+		return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
+
+	return 0;
+}
+
 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
 {
 	bool done;
+	int err;
 
 	write_lock(&tu->filter.rwlock);
 	if (event->hw.tp_target) {
@@ -1033,32 +1056,13 @@
 	}
 	write_unlock(&tu->filter.rwlock);
 
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
-
-	return 0;
-}
-
-static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
-{
-	bool done;
-
-	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
-		list_del(&event->hw.tp_list);
-		done = tu->filter.nr_systemwide ||
-			(event->hw.tp_target->flags & PF_EXITING) ||
-			uprobe_filter_event(tu, event);
-	} else {
-		tu->filter.nr_systemwide--;
-		done = tu->filter.nr_systemwide;
+	err = 0;
+	if (!done) {
+		err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+		if (err)
+			uprobe_perf_close(tu, event);
 	}
-	write_unlock(&tu->filter.rwlock);
-
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
-
-	return 0;
+	return err;
 }
 
 static bool uprobe_perf_filter(struct uprobe_consumer *uc,

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a4bab46..6203d29 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c

@@ -65,15 +65,12 @@
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
 	 * Note that DISASSOCIATED should be flipped only while holding
-	 * manager_mutex to avoid changing binding state while
-	 * create_worker() is in progress.
+	 * attach_mutex to avoid changing binding state while
+	 * worker_attach_to_pool() is in progress.
 	 */
-	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
 	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
-	POOL_FREEZING		= 1 << 3,	/* freeze in progress */
 
 	/* worker flags */
-	WORKER_STARTED		= 1 << 0,	/* started */
 	WORKER_DIE		= 1 << 1,	/* die die die */
 	WORKER_IDLE		= 1 << 2,	/* is idle */
 	WORKER_PREP		= 1 << 3,	/* preparing to run works */
@@ -124,8 +121,7 @@
  *    cpu or grabbing pool->lock is enough for read access.  If
  *    POOL_DISASSOCIATED is set, it's identical to L.
  *
- * MG: pool->manager_mutex and pool->lock protected.  Writes require both
- *     locks.  Reads can happen under either lock.
+ * A: pool->attach_mutex protected.
  *
  * PL: wq_pool_mutex protected.
  *
@@ -163,8 +159,11 @@
 
 	/* see manage_workers() for details on the two manager mutexes */
 	struct mutex		manager_arb;	/* manager arbitration */
-	struct mutex		manager_mutex;	/* manager exclusion */
-	struct idr		worker_idr;	/* MG: worker IDs and iteration */
+	struct mutex		attach_mutex;	/* attach/detach exclusion */
+	struct list_head	workers;	/* A: attached workers */
+	struct completion	*detach_completion; /* all workers detached */
+
+	struct ida		worker_ida;	/* worker IDs for task name */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
 	struct hlist_node	hash_node;	/* PL: unbound_pool_hash node */
@@ -340,16 +339,6 @@
 			   lockdep_is_held(&wq->mutex),			\
 			   "sched RCU or wq->mutex should be held")
 
-#ifdef CONFIG_LOCKDEP
-#define assert_manager_or_pool_lock(pool)				\
-	WARN_ONCE(debug_locks &&					\
-		  !lockdep_is_held(&(pool)->manager_mutex) &&		\
-		  !lockdep_is_held(&(pool)->lock),			\
-		  "pool->manager_mutex or ->lock should be held")
-#else
-#define assert_manager_or_pool_lock(pool)	do { } while (0)
-#endif
-
 #define for_each_cpu_worker_pool(pool, cpu)				\
 	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];		\
 	     (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -375,17 +364,16 @@
 /**
  * for_each_pool_worker - iterate through all workers of a worker_pool
  * @worker: iteration cursor
- * @wi: integer used for iteration
  * @pool: worker_pool to iterate workers of
  *
- * This must be called with either @pool->manager_mutex or ->lock held.
+ * This must be called with @pool->attach_mutex.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
-#define for_each_pool_worker(worker, wi, pool)				\
-	idr_for_each_entry(&(pool)->worker_idr, (worker), (wi))		\
-		if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+#define for_each_pool_worker(worker, pool)				\
+	list_for_each_entry((worker), &(pool)->workers, node)		\
+		if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
 		else
 
 /**
@@ -763,13 +751,6 @@
 	return need_more_worker(pool) && !may_start_working(pool);
 }
 
-/* Do I need to be the manager? */
-static bool need_to_manage_workers(struct worker_pool *pool)
-{
-	return need_to_create_worker(pool) ||
-		(pool->flags & POOL_MANAGE_WORKERS);
-}
-
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
@@ -791,8 +772,8 @@
  * Wake up functions.
  */
 
-/* Return the first worker.  Safe with preemption disabled */
-static struct worker *first_worker(struct worker_pool *pool)
+/* Return the first idle worker.  Safe with preemption disabled */
+static struct worker *first_idle_worker(struct worker_pool *pool)
 {
 	if (unlikely(list_empty(&pool->idle_list)))
 		return NULL;
@@ -811,7 +792,7 @@
  */
 static void wake_up_worker(struct worker_pool *pool)
 {
-	struct worker *worker = first_worker(pool);
+	struct worker *worker = first_idle_worker(pool);
 
 	if (likely(worker))
 		wake_up_process(worker->task);
@@ -885,7 +866,7 @@
 	 */
 	if (atomic_dec_and_test(&pool->nr_running) &&
 	    !list_empty(&pool->worklist))
-		to_wakeup = first_worker(pool);
+		to_wakeup = first_idle_worker(pool);
 	return to_wakeup ? to_wakeup->task : NULL;
 }
 
@@ -1621,70 +1602,6 @@
 	list_del_init(&worker->entry);
 }
 
-/**
- * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
- * @pool: target worker_pool
- *
- * Bind %current to the cpu of @pool if it is associated and lock @pool.
- *
- * Works which are scheduled while the cpu is online must at least be
- * scheduled to a worker which is bound to the cpu so that if they are
- * flushed from cpu callbacks while cpu is going down, they are
- * guaranteed to execute on the cpu.
- *
- * This function is to be used by unbound workers and rescuers to bind
- * themselves to the target cpu and may race with cpu going down or
- * coming online.  kthread_bind() can't be used because it may put the
- * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
- * verbatim as it's best effort and blocking and pool may be
- * [dis]associated in the meantime.
- *
- * This function tries set_cpus_allowed() and locks pool and verifies the
- * binding against %POOL_DISASSOCIATED which is set during
- * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
- * enters idle state or fetches works without dropping lock, it can
- * guarantee the scheduling requirement described in the first paragraph.
- *
- * CONTEXT:
- * Might sleep.  Called without any lock but returns with pool->lock
- * held.
- *
- * Return:
- * %true if the associated pool is online (@worker is successfully
- * bound), %false if offline.
- */
-static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
-__acquires(&pool->lock)
-{
-	while (true) {
-		/*
-		 * The following call may fail, succeed or succeed
-		 * without actually migrating the task to the cpu if
-		 * it races with cpu hotunplug operation.  Verify
-		 * against POOL_DISASSOCIATED.
-		 */
-		if (!(pool->flags & POOL_DISASSOCIATED))
-			set_cpus_allowed_ptr(current, pool->attrs->cpumask);
-
-		spin_lock_irq(&pool->lock);
-		if (pool->flags & POOL_DISASSOCIATED)
-			return false;
-		if (task_cpu(current) == pool->cpu &&
-		    cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
-			return true;
-		spin_unlock_irq(&pool->lock);
-
-		/*
-		 * We've raced with CPU hot[un]plug.  Give it a breather
-		 * and retry migration.  cond_resched() is required here;
-		 * otherwise, we might deadlock against cpu_stop trying to
-		 * bring down the CPU on non-preemptive kernel.
-		 */
-		cpu_relax();
-		cond_resched();
-	}
-}
-
 static struct worker *alloc_worker(void)
 {
 	struct worker *worker;
@@ -1693,6 +1610,7 @@
 	if (worker) {
 		INIT_LIST_HEAD(&worker->entry);
 		INIT_LIST_HEAD(&worker->scheduled);
+		INIT_LIST_HEAD(&worker->node);
 		/* on creation a worker is in !idle && prep state */
 		worker->flags = WORKER_PREP;
 	}
@@ -1700,12 +1618,68 @@
 }
 
 /**
+ * worker_attach_to_pool() - attach a worker to a pool
+ * @worker: worker to be attached
+ * @pool: the target pool
+ *
+ * Attach @worker to @pool.  Once attached, the %WORKER_UNBOUND flag and
+ * cpu-binding of @worker are kept coordinated with the pool across
+ * cpu-[un]hotplugs.
+ */
+static void worker_attach_to_pool(struct worker *worker,
+				   struct worker_pool *pool)
+{
+	mutex_lock(&pool->attach_mutex);
+
+	/*
+	 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+	 * online CPUs.  It'll be re-applied when any of the CPUs come up.
+	 */
+	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
+
+	/*
+	 * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
+	 * stable across this function.  See the comments above the
+	 * flag definition for details.
+	 */
+	if (pool->flags & POOL_DISASSOCIATED)
+		worker->flags |= WORKER_UNBOUND;
+
+	list_add_tail(&worker->node, &pool->workers);
+
+	mutex_unlock(&pool->attach_mutex);
+}
+
+/**
+ * worker_detach_from_pool() - detach a worker from its pool
+ * @worker: worker which is attached to its pool
+ * @pool: the pool @worker is attached to
+ *
+ * Undo the attaching which had been done in worker_attach_to_pool().  The
+ * caller worker shouldn't access to the pool after detached except it has
+ * other reference to the pool.
+ */
+static void worker_detach_from_pool(struct worker *worker,
+				    struct worker_pool *pool)
+{
+	struct completion *detach_completion = NULL;
+
+	mutex_lock(&pool->attach_mutex);
+	list_del(&worker->node);
+	if (list_empty(&pool->workers))
+		detach_completion = pool->detach_completion;
+	mutex_unlock(&pool->attach_mutex);
+
+	if (detach_completion)
+		complete(detach_completion);
+}
+
+/**
  * create_worker - create a new workqueue worker
  * @pool: pool the new worker will belong to
  *
- * Create a new worker which is bound to @pool.  The returned worker
- * can be started by calling start_worker() or destroyed using
- * destroy_worker().
+ * Create a new worker which is attached to @pool.  The new worker must be
+ * started by start_worker().
  *
  * CONTEXT:
  * Might sleep.  Does GFP_KERNEL allocations.
@@ -1719,19 +1693,8 @@
 	int id = -1;
 	char id_buf[16];
 
-	lockdep_assert_held(&pool->manager_mutex);
-
-	/*
-	 * ID is needed to determine kthread name.  Allocate ID first
-	 * without installing the pointer.
-	 */
-	idr_preload(GFP_KERNEL);
-	spin_lock_irq(&pool->lock);
-
-	id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
-
-	spin_unlock_irq(&pool->lock);
-	idr_preload_end();
+	/* ID is needed to determine kthread name */
+	id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
 	if (id < 0)
 		goto fail;
 
@@ -1758,33 +1721,14 @@
 	/* prevent userland from meddling with cpumask of workqueue workers */
 	worker->task->flags |= PF_NO_SETAFFINITY;
 
-	/*
-	 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
-	 * online CPUs.  It'll be re-applied when any of the CPUs come up.
-	 */
-	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
-
-	/*
-	 * The caller is responsible for ensuring %POOL_DISASSOCIATED
-	 * remains stable across this function.  See the comments above the
-	 * flag definition for details.
-	 */
-	if (pool->flags & POOL_DISASSOCIATED)
-		worker->flags |= WORKER_UNBOUND;
-
-	/* successful, commit the pointer to idr */
-	spin_lock_irq(&pool->lock);
-	idr_replace(&pool->worker_idr, worker, worker->id);
-	spin_unlock_irq(&pool->lock);
+	/* successful, attach the worker to the pool */
+	worker_attach_to_pool(worker, pool);
 
 	return worker;
 
 fail:
-	if (id >= 0) {
-		spin_lock_irq(&pool->lock);
-		idr_remove(&pool->worker_idr, id);
-		spin_unlock_irq(&pool->lock);
-	}
+	if (id >= 0)
+		ida_simple_remove(&pool->worker_ida, id);
 	kfree(worker);
 	return NULL;
 }
@@ -1800,7 +1744,6 @@
  */
 static void start_worker(struct worker *worker)
 {
-	worker->flags |= WORKER_STARTED;
 	worker->pool->nr_workers++;
 	worker_enter_idle(worker);
 	wake_up_process(worker->task);
@@ -1818,8 +1761,6 @@
 {
 	struct worker *worker;
 
-	mutex_lock(&pool->manager_mutex);
-
 	worker = create_worker(pool);
 	if (worker) {
 		spin_lock_irq(&pool->lock);
@@ -1827,8 +1768,6 @@
 		spin_unlock_irq(&pool->lock);
 	}
 
-	mutex_unlock(&pool->manager_mutex);
-
 	return worker ? 0 : -ENOMEM;
 }
 
@@ -1836,46 +1775,30 @@
  * destroy_worker - destroy a workqueue worker
  * @worker: worker to be destroyed
  *
- * Destroy @worker and adjust @pool stats accordingly.
+ * Destroy @worker and adjust @pool stats accordingly.  The worker should
+ * be idle.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock).
  */
 static void destroy_worker(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
 
-	lockdep_assert_held(&pool->manager_mutex);
 	lockdep_assert_held(&pool->lock);
 
 	/* sanity check frenzy */
 	if (WARN_ON(worker->current_work) ||
-	    WARN_ON(!list_empty(&worker->scheduled)))
+	    WARN_ON(!list_empty(&worker->scheduled)) ||
+	    WARN_ON(!(worker->flags & WORKER_IDLE)))
 		return;
 
-	if (worker->flags & WORKER_STARTED)
-		pool->nr_workers--;
-	if (worker->flags & WORKER_IDLE)
-		pool->nr_idle--;
-
-	/*
-	 * Once WORKER_DIE is set, the kworker may destroy itself at any
-	 * point.  Pin to ensure the task stays until we're done with it.
-	 */
-	get_task_struct(worker->task);
+	pool->nr_workers--;
+	pool->nr_idle--;
 
 	list_del_init(&worker->entry);
 	worker->flags |= WORKER_DIE;
-
-	idr_remove(&pool->worker_idr, worker->id);
-
-	spin_unlock_irq(&pool->lock);
-
-	kthread_stop(worker->task);
-	put_task_struct(worker->task);
-	kfree(worker);
-
-	spin_lock_irq(&pool->lock);
+	wake_up_process(worker->task);
 }
 
 static void idle_worker_timeout(unsigned long __pool)
@@ -1884,7 +1807,7 @@
 
 	spin_lock_irq(&pool->lock);
 
-	if (too_many_workers(pool)) {
+	while (too_many_workers(pool)) {
 		struct worker *worker;
 		unsigned long expires;
 
@@ -1892,13 +1815,12 @@
 		worker = list_entry(pool->idle_list.prev, struct worker, entry);
 		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
 
-		if (time_before(jiffies, expires))
+		if (time_before(jiffies, expires)) {
 			mod_timer(&pool->idle_timer, expires);
-		else {
-			/* it's been idle for too long, wake up manager */
-			pool->flags |= POOL_MANAGE_WORKERS;
-			wake_up_worker(pool);
+			break;
 		}
+
+		destroy_worker(worker);
 	}
 
 	spin_unlock_irq(&pool->lock);
@@ -2017,44 +1939,6 @@
 }
 
 /**
- * maybe_destroy_worker - destroy workers which have been idle for a while
- * @pool: pool to destroy workers for
- *
- * Destroy @pool workers which have been idle for longer than
- * IDLE_WORKER_TIMEOUT.
- *
- * LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times.  Called only from manager.
- *
- * Return:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
- */
-static bool maybe_destroy_workers(struct worker_pool *pool)
-{
-	bool ret = false;
-
-	while (too_many_workers(pool)) {
-		struct worker *worker;
-		unsigned long expires;
-
-		worker = list_entry(pool->idle_list.prev, struct worker, entry);
-		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
-
-		if (time_before(jiffies, expires)) {
-			mod_timer(&pool->idle_timer, expires);
-			break;
-		}
-
-		destroy_worker(worker);
-		ret = true;
-	}
-
-	return ret;
-}
-
-/**
  * manage_workers - manage worker pool
  * @worker: self
  *
@@ -2083,8 +1967,6 @@
 	bool ret = false;
 
 	/*
-	 * Managership is governed by two mutexes - manager_arb and
-	 * manager_mutex.  manager_arb handles arbitration of manager role.
 	 * Anyone who successfully grabs manager_arb wins the arbitration
 	 * and becomes the manager.  mutex_trylock() on pool->manager_arb
 	 * failure while holding pool->lock reliably indicates that someone
@@ -2093,40 +1975,12 @@
 	 * grabbing manager_arb is responsible for actually performing
 	 * manager duties.  If manager_arb is grabbed and released without
 	 * actual management, the pool may stall indefinitely.
-	 *
-	 * manager_mutex is used for exclusion of actual management
-	 * operations.  The holder of manager_mutex can be sure that none
-	 * of management operations, including creation and destruction of
-	 * workers, won't take place until the mutex is released.  Because
-	 * manager_mutex doesn't interfere with manager role arbitration,
-	 * it is guaranteed that the pool's management, while may be
-	 * delayed, won't be disturbed by someone else grabbing
-	 * manager_mutex.
 	 */
 	if (!mutex_trylock(&pool->manager_arb))
 		return ret;
 
-	/*
-	 * With manager arbitration won, manager_mutex would be free in
-	 * most cases.  trylock first without dropping @pool->lock.
-	 */
-	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
-		spin_unlock_irq(&pool->lock);
-		mutex_lock(&pool->manager_mutex);
-		spin_lock_irq(&pool->lock);
-		ret = true;
-	}
-
-	pool->flags &= ~POOL_MANAGE_WORKERS;
-
-	/*
-	 * Destroy and then create so that may_start_working() is true
-	 * on return.
-	 */
-	ret |= maybe_destroy_workers(pool);
 	ret |= maybe_create_worker(pool);
 
-	mutex_unlock(&pool->manager_mutex);
 	mutex_unlock(&pool->manager_arb);
 	return ret;
 }
@@ -2314,6 +2168,11 @@
 		spin_unlock_irq(&pool->lock);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
 		worker->task->flags &= ~PF_WQ_WORKER;
+
+		set_task_comm(worker->task, "kworker/dying");
+		ida_simple_remove(&pool->worker_ida, worker->id);
+		worker_detach_from_pool(worker, pool);
+		kfree(worker);
 		return 0;
 	}
 
@@ -2361,9 +2220,6 @@
 
 	worker_set_flags(worker, WORKER_PREP, false);
 sleep:
-	if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
-		goto recheck;
-
 	/*
 	 * pool->lock is held and there's no work to process and no need to
 	 * manage, sleep.  Workers are woken up only while holding
@@ -2440,8 +2296,9 @@
 
 		spin_unlock_irq(&wq_mayday_lock);
 
-		/* migrate to the target cpu if possible */
-		worker_maybe_bind_and_lock(pool);
+		worker_attach_to_pool(rescuer, pool);
+
+		spin_lock_irq(&pool->lock);
 		rescuer->pool = pool;
 
 		/*
@@ -2454,6 +2311,11 @@
 				move_linked_works(work, scheduled, &n);
 
 		process_scheduled_works(rescuer);
+		spin_unlock_irq(&pool->lock);
+
+		worker_detach_from_pool(rescuer, pool);
+
+		spin_lock_irq(&pool->lock);
 
 		/*
 		 * Put the reference grabbed by send_mayday().  @pool won't
@@ -3550,9 +3412,10 @@
 		    (unsigned long)pool);
 
 	mutex_init(&pool->manager_arb);
-	mutex_init(&pool->manager_mutex);
-	idr_init(&pool->worker_idr);
+	mutex_init(&pool->attach_mutex);
+	INIT_LIST_HEAD(&pool->workers);
 
+	ida_init(&pool->worker_ida);
 	INIT_HLIST_NODE(&pool->hash_node);
 	pool->refcnt = 1;
 
@@ -3567,7 +3430,7 @@
 {
 	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
 
-	idr_destroy(&pool->worker_idr);
+	ida_destroy(&pool->worker_ida);
 	free_workqueue_attrs(pool->attrs);
 	kfree(pool);
 }
@@ -3585,6 +3448,7 @@
  */
 static void put_unbound_pool(struct worker_pool *pool)
 {
+	DECLARE_COMPLETION_ONSTACK(detach_completion);
 	struct worker *worker;
 
 	lockdep_assert_held(&wq_pool_mutex);
@@ -3605,18 +3469,24 @@
 	/*
 	 * Become the manager and destroy all workers.  Grabbing
 	 * manager_arb prevents @pool's workers from blocking on
-	 * manager_mutex.
+	 * attach_mutex.
 	 */
 	mutex_lock(&pool->manager_arb);
-	mutex_lock(&pool->manager_mutex);
-	spin_lock_irq(&pool->lock);
 
-	while ((worker = first_worker(pool)))
+	spin_lock_irq(&pool->lock);
+	while ((worker = first_idle_worker(pool)))
 		destroy_worker(worker);
 	WARN_ON(pool->nr_workers || pool->nr_idle);
-
 	spin_unlock_irq(&pool->lock);
-	mutex_unlock(&pool->manager_mutex);
+
+	mutex_lock(&pool->attach_mutex);
+	if (!list_empty(&pool->workers))
+		pool->detach_completion = &detach_completion;
+	mutex_unlock(&pool->attach_mutex);
+
+	if (pool->detach_completion)
+		wait_for_completion(pool->detach_completion);
+
 	mutex_unlock(&pool->manager_arb);
 
 	/* shut down the timers */
@@ -3662,9 +3532,6 @@
 	if (!pool || init_worker_pool(pool) < 0)
 		goto fail;
 
-	if (workqueue_freezing)
-		pool->flags |= POOL_FREEZING;
-
 	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
@@ -3771,7 +3638,12 @@
 
 	spin_lock_irq(&pwq->pool->lock);
 
-	if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
+	/*
+	 * During [un]freezing, the caller is responsible for ensuring that
+	 * this function is called at least once after @workqueue_freezing
+	 * is updated and visible.
+	 */
+	if (!freezable || !workqueue_freezing) {
 		pwq->max_active = wq->saved_max_active;
 
 		while (!list_empty(&pwq->delayed_works) &&
@@ -4103,17 +3975,13 @@
 	 * Let's determine what needs to be done.  If the target cpumask is
 	 * different from wq's, we need to compare it to @pwq's and create
 	 * a new one if they don't match.  If the target cpumask equals
-	 * wq's, the default pwq should be used.  If @pwq is already the
-	 * default one, nothing to do; otherwise, install the default one.
+	 * wq's, the default pwq should be used.
 	 */
 	if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
 		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
 			goto out_unlock;
 	} else {
-		if (pwq == wq->dfl_pwq)
-			goto out_unlock;
-		else
-			goto use_dfl_pwq;
+		goto use_dfl_pwq;
 	}
 
 	mutex_unlock(&wq->mutex);
@@ -4121,8 +3989,8 @@
 	/* create a new pwq */
 	pwq = alloc_unbound_pwq(wq, target_attrs);
 	if (!pwq) {
-		pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
-			   wq->name);
+		pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
+			wq->name);
 		mutex_lock(&wq->mutex);
 		goto use_dfl_pwq;
 	}
@@ -4599,28 +4467,27 @@
 	int cpu = smp_processor_id();
 	struct worker_pool *pool;
 	struct worker *worker;
-	int wi;
 
 	for_each_cpu_worker_pool(pool, cpu) {
 		WARN_ON_ONCE(cpu != smp_processor_id());
 
-		mutex_lock(&pool->manager_mutex);
+		mutex_lock(&pool->attach_mutex);
 		spin_lock_irq(&pool->lock);
 
 		/*
-		 * We've blocked all manager operations.  Make all workers
+		 * We've blocked all attach/detach operations. Make all workers
 		 * unbound and set DISASSOCIATED.  Before this, all workers
 		 * except for the ones which are still executing works from
 		 * before the last CPU down must be on the cpu.  After
 		 * this, they may become diasporas.
 		 */
-		for_each_pool_worker(worker, wi, pool)
+		for_each_pool_worker(worker, pool)
 			worker->flags |= WORKER_UNBOUND;
 
 		pool->flags |= POOL_DISASSOCIATED;
 
 		spin_unlock_irq(&pool->lock);
-		mutex_unlock(&pool->manager_mutex);
+		mutex_unlock(&pool->attach_mutex);
 
 		/*
 		 * Call schedule() so that we cross rq->lock and thus can
@@ -4660,9 +4527,8 @@
 static void rebind_workers(struct worker_pool *pool)
 {
 	struct worker *worker;
-	int wi;
 
-	lockdep_assert_held(&pool->manager_mutex);
+	lockdep_assert_held(&pool->attach_mutex);
 
 	/*
 	 * Restore CPU affinity of all workers.  As all idle workers should
@@ -4671,13 +4537,13 @@
 	 * of all workers first and then clear UNBOUND.  As we're called
 	 * from CPU_ONLINE, the following shouldn't fail.
 	 */
-	for_each_pool_worker(worker, wi, pool)
+	for_each_pool_worker(worker, pool)
 		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 						  pool->attrs->cpumask) < 0);
 
 	spin_lock_irq(&pool->lock);
 
-	for_each_pool_worker(worker, wi, pool) {
+	for_each_pool_worker(worker, pool) {
 		unsigned int worker_flags = worker->flags;
 
 		/*
@@ -4729,9 +4595,8 @@
 {
 	static cpumask_t cpumask;
 	struct worker *worker;
-	int wi;
 
-	lockdep_assert_held(&pool->manager_mutex);
+	lockdep_assert_held(&pool->attach_mutex);
 
 	/* is @cpu allowed for @pool? */
 	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4743,7 +4608,7 @@
 		return;
 
 	/* as we're called from CPU_ONLINE, the following shouldn't fail */
-	for_each_pool_worker(worker, wi, pool)
+	for_each_pool_worker(worker, pool)
 		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 						  pool->attrs->cpumask) < 0);
 }
@@ -4776,7 +4641,7 @@
 		mutex_lock(&wq_pool_mutex);
 
 		for_each_pool(pool, pi) {
-			mutex_lock(&pool->manager_mutex);
+			mutex_lock(&pool->attach_mutex);
 
 			if (pool->cpu == cpu) {
 				spin_lock_irq(&pool->lock);
@@ -4788,7 +4653,7 @@
 				restore_unbound_workers_cpumask(pool, cpu);
 			}
 
-			mutex_unlock(&pool->manager_mutex);
+			mutex_unlock(&pool->attach_mutex);
 		}
 
 		/* update NUMA affinity of unbound workqueues */
@@ -4887,24 +4752,14 @@
  */
 void freeze_workqueues_begin(void)
 {
-	struct worker_pool *pool;
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
-	int pi;
 
 	mutex_lock(&wq_pool_mutex);
 
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
 
-	/* set FREEZING */
-	for_each_pool(pool, pi) {
-		spin_lock_irq(&pool->lock);
-		WARN_ON_ONCE(pool->flags & POOL_FREEZING);
-		pool->flags |= POOL_FREEZING;
-		spin_unlock_irq(&pool->lock);
-	}
-
 	list_for_each_entry(wq, &workqueues, list) {
 		mutex_lock(&wq->mutex);
 		for_each_pwq(pwq, wq)
@@ -4974,21 +4829,13 @@
 {
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
-	struct worker_pool *pool;
-	int pi;
 
 	mutex_lock(&wq_pool_mutex);
 
 	if (!workqueue_freezing)
 		goto out_unlock;
 
-	/* clear FREEZING */
-	for_each_pool(pool, pi) {
-		spin_lock_irq(&pool->lock);
-		WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
-		pool->flags &= ~POOL_FREEZING;
-		spin_unlock_irq(&pool->lock);
-	}
+	workqueue_freezing = false;
 
 	/* restore max_active and repopulate worklist */
 	list_for_each_entry(wq, &workqueues, list) {
@@ -4998,7 +4845,6 @@
 		mutex_unlock(&wq->mutex);
 	}
 
-	workqueue_freezing = false;
 out_unlock:
 	mutex_unlock(&wq_pool_mutex);
 }

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 7e2204d..4521587 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h

@@ -37,6 +37,8 @@
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
 						/* L: for rescuers */
+	struct list_head	node;		/* A: anchored at pool->workers */
+						/* A: runs through worker->node */
 
 	unsigned long		last_active;	/* L: last active timestamp */
 	unsigned int		flags;		/* X: flags */

diff --git a/lib/Kconfig b/lib/Kconfig
index 4771fb3..334f772 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig

@@ -331,6 +331,20 @@
 config BTREE
 	boolean
 
+config INTERVAL_TREE
+	boolean
+	help
+	  Simple, embeddable, interval-tree. Can find the start of an
+	  overlapping range in log(n) time and then iterate over all
+	  overlapping nodes. The algorithm is implemented as an
+	  augmented rbtree.
+
+	  See:
+
+		Documentation/rbtree.txt
+
+	  for more information.
+
 config ASSOCIATIVE_ARRAY
 	bool
 	help

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ccca322..7cfcc1b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -1511,6 +1511,7 @@
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
 	depends on m && DEBUG_KERNEL
+	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
 
@@ -1635,6 +1636,19 @@
 
 	  If unsure, say N.
 
+config TEST_BPF
+	tristate "Test BPF filter functionality"
+	default n
+	depends on m && NET
+	help
+	  This builds the "test_bpf" module that runs various test vectors
+	  against the BPF interpreter or BPF JIT compiler depending on the
+	  current setting. This is in particular useful for BPF JIT compiler
+	  development, but also to run regression tests against changes in
+	  the interpreter code.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"

diff --git a/lib/Makefile b/lib/Makefile
index 74a32dc..ba967a1 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -33,6 +33,7 @@
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_MODULE) += test_module.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
+obj-$(CONFIG_TEST_BPF) += test_bpf.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -50,6 +51,7 @@
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -157,8 +159,6 @@
 obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
 obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 
-interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
-
 obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
 
 obj-$(CONFIG_ASN1) += asn1_decoder.o

diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b75..c101230 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c

@@ -164,3 +164,66 @@
 	memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (!cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);

diff --git a/lib/crc7.c b/lib/crc7.c
index f1c3a14..bf6255e 100644
--- a/lib/crc7.c
+++ b/lib/crc7.c

@@ -10,42 +10,47 @@
 #include <linux/crc7.h>
 
 
-/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */
-const u8 crc7_syndrome_table[256] = {
-	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
-	0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
-	0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26,
-	0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
-	0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
-	0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
-	0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14,
-	0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
-	0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
-	0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
-	0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42,
-	0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
-	0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
-	0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
-	0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70,
-	0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
-	0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e,
-	0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
-	0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67,
-	0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
-	0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
-	0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
-	0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55,
-	0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
-	0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a,
-	0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
-	0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03,
-	0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
-	0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28,
-	0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
-	0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31,
-	0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79
+/*
+ * Table for CRC-7 (polynomial x^7 + x^3 + 1).
+ * This is a big-endian CRC (msbit is highest power of x),
+ * aligned so the msbit of the byte is the x^6 coefficient
+ * and the lsbit is not used.
+ */
+const u8 crc7_be_syndrome_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
+	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
+	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
+	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
+	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
+	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
+	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
+	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
+	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
+	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
+	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
+	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
+	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
-EXPORT_SYMBOL(crc7_syndrome_table);
+EXPORT_SYMBOL(crc7_be_syndrome_table);
 
 /**
  * crc7 - update the CRC7 for the data buffer
@@ -55,14 +60,17 @@
  * Context: any
  *
  * Returns the updated CRC7 value.
+ * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that
+ * makes the computation easier, and all callers want it in that form.
+ *
  */
-u8 crc7(u8 crc, const u8 *buffer, size_t len)
+u8 crc7_be(u8 crc, const u8 *buffer, size_t len)
 {
 	while (len--)
-		crc = crc7_byte(crc, *buffer++);
+		crc = crc7_be_byte(crc, *buffer++);
 	return crc;
 }
-EXPORT_SYMBOL(crc7);
+EXPORT_SYMBOL(crc7_be);
 
 MODULE_DESCRIPTION("CRC7 calculations");
 MODULE_LICENSE("GPL");

diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index e6eb406..f367f9a 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c

@@ -1,6 +1,7 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/module.h>
 
 #define START(node) ((node)->start)
 #define LAST(node)  ((node)->last)
@@ -8,3 +9,8 @@
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb,
 		     unsigned long, __subtree_last,
 		     START, LAST,, interval_tree)
+
+EXPORT_SYMBOL_GPL(interval_tree_insert);
+EXPORT_SYMBOL_GPL(interval_tree_remove);
+EXPORT_SYMBOL_GPL(interval_tree_iter_first);
+EXPORT_SYMBOL_GPL(interval_tree_iter_next);

diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test.c
similarity index 100%
rename from lib/interval_tree_test_main.c
rename to lib/interval_tree_test.c


diff --git a/lib/test_bpf.c b/lib/test_bpf.c
new file mode 100644
index 0000000..c579e0f
--- /dev/null
+++ b/lib/test_bpf.c

@@ -0,0 +1,1929 @@
+/*
+ * Testsuite for BPF interpreter and BPF JIT compiler
+ *
+ * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/filter.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+
+/* General test specific settings */
+#define MAX_SUBTESTS	3
+#define MAX_TESTRUNS	10000
+#define MAX_DATA	128
+#define MAX_INSNS	512
+#define MAX_K		0xffffFFFF
+
+/* Few constants used to init test 'skb' */
+#define SKB_TYPE	3
+#define SKB_MARK	0x1234aaaa
+#define SKB_HASH	0x1234aaab
+#define SKB_QUEUE_MAP	123
+#define SKB_VLAN_TCI	0xffff
+#define SKB_DEV_IFINDEX	577
+#define SKB_DEV_TYPE	588
+
+/* Redefine REGs to make tests less verbose */
+#define R0		BPF_REG_0
+#define R1		BPF_REG_1
+#define R2		BPF_REG_2
+#define R3		BPF_REG_3
+#define R4		BPF_REG_4
+#define R5		BPF_REG_5
+#define R6		BPF_REG_6
+#define R7		BPF_REG_7
+#define R8		BPF_REG_8
+#define R9		BPF_REG_9
+#define R10		BPF_REG_10
+
+/* Flags that can be passed to test cases */
+#define FLAG_NO_DATA		BIT(0)
+#define FLAG_EXPECTED_FAIL	BIT(1)
+
+enum {
+	CLASSIC  = BIT(6),	/* Old BPF instructions only. */
+	INTERNAL = BIT(7),	/* Extended instruction set.  */
+};
+
+#define TEST_TYPE_MASK		(CLASSIC | INTERNAL)
+
+struct bpf_test {
+	const char *descr;
+	union {
+		struct sock_filter insns[MAX_INSNS];
+		struct sock_filter_int insns_int[MAX_INSNS];
+	} u;
+	__u8 aux;
+	__u8 data[MAX_DATA];
+	struct {
+		int data_size;
+		__u32 result;
+	} test[MAX_SUBTESTS];
+};
+
+static struct bpf_test tests[] = {
+	{
+		"TAX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_ALU | BPF_NEG, 0), /* A == -3 */
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0), /* X == len - 3 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 1),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 10, 20, 30, 40, 50 },
+		{ { 2, 10 }, { 3, 20 }, { 4, 30 } },
+	},
+	{
+		"TXA",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0) /* A == len * 2 */
+		},
+		CLASSIC,
+		{ 10, 20, 30, 40, 50 },
+		{ { 1, 2 }, { 3, 6 }, { 4, 8 } },
+	},
+	{
+		"ADD_SUB_MUL_K",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_MUL | BPF_K, 3),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0xfffffffd } }
+	},
+	{
+		"DIV_KX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 8),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x40000001 } }
+	},
+	{
+		"AND_OR_LSH_K",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xff),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 27),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xf),
+			BPF_STMT(BPF_ALU | BPF_OR | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x800000ff }, { 1, 0x800000ff } },
+	},
+	{
+		"LD_IMM_0",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0), /* ld #0 */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 1 } },
+	},
+	{
+		"LD_IND",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, MAX_K),
+			BPF_STMT(BPF_RET | BPF_K, 1)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 }, { 60, 0 } },
+	},
+	{
+		"LD_ABS",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 1000),
+			BPF_STMT(BPF_RET | BPF_K, 1)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 }, { 60, 0 } },
+	},
+	{
+		"LD_ABS_LL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF + 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 1, 2, 3 },
+		{ { 1, 0 }, { 2, 3 } },
+	},
+	{
+		"LD_IND_LL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, SKF_LL_OFF - 1),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 1, 2, 3, 0xff },
+		{ { 1, 1 }, { 3, 3 }, { 4, 0xff } },
+	},
+	{
+		"LD_ABS_NET",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF + 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 },
+		{ { 15, 0 }, { 16, 3 } },
+	},
+	{
+		"LD_IND_NET",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, SKF_NET_OFF - 15),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 },
+		{ { 14, 0 }, { 15, 1 }, { 17, 3 } },
+	},
+	{
+		"LD_PKTTYPE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 3 }, { 10, 3 } },
+	},
+	{
+		"LD_MARK",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_MARK),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_MARK}, { 10, SKB_MARK} },
+	},
+	{
+		"LD_RXHASH",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_RXHASH),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_HASH}, { 10, SKB_HASH} },
+	},
+	{
+		"LD_QUEUE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_QUEUE),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_QUEUE_MAP }, { 10, SKB_QUEUE_MAP } },
+	},
+	{
+		"LD_PROTOCOL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 1),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PROTOCOL),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 30, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 10, 20, 30 },
+		{ { 10, ETH_P_IP }, { 100, ETH_P_IP } },
+	},
+	{
+		"LD_VLAN_TAG",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_VLAN_TAG),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{
+			{ 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT },
+			{ 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }
+		},
+	},
+	{
+		"LD_VLAN_TAG_PRESENT",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{
+			{ 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
+			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
+		},
+	},
+	{
+		"LD_IFINDEX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_IFINDEX),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_DEV_IFINDEX }, { 10, SKB_DEV_IFINDEX } },
+	},
+	{
+		"LD_HATYPE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_HATYPE),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_DEV_TYPE }, { 10, SKB_DEV_TYPE } },
+	},
+	{
+		"LD_CPU",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_CPU),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_CPU),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 } },
+	},
+	{
+		"LD_NLATTR",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 2),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+#ifdef __BIG_ENDIAN
+		{ 0xff, 0xff, 0, 4, 0, 2, 0, 4, 0, 3 },
+#else
+		{ 0xff, 0xff, 4, 0, 2, 0, 4, 0, 3, 0 },
+#endif
+		{ { 4, 0 }, { 20, 6 } },
+	},
+	{
+		"LD_NLATTR_NEST",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+#ifdef __BIG_ENDIAN
+		{ 0xff, 0xff, 0, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3 },
+#else
+		{ 0xff, 0xff, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3, 0 },
+#endif
+		{ { 4, 0 }, { 20, 10 } },
+	},
+	{
+		"LD_PAYLOAD_OFF",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		/* 00:00:00:00:00:00 > 00:00:00:00:00:00, ethtype IPv4 (0x0800),
+		 * length 98: 127.0.0.1 > 127.0.0.1: ICMP echo request,
+		 * id 9737, seq 1, length 64
+		 */
+		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x08, 0x00,
+		  0x45, 0x00, 0x00, 0x54, 0xac, 0x8b, 0x40, 0x00, 0x40,
+		  0x01, 0x90, 0x1b, 0x7f, 0x00, 0x00, 0x01 },
+		{ { 30, 0 }, { 100, 42 } },
+	},
+	{
+		"LD_ANC_XOR",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 10),
+			BPF_STMT(BPF_LDX | BPF_IMM, 300),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_ALU_XOR_X),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 4, 10 ^ 300 }, { 20, 10 ^ 300 } },
+	},
+	{
+		"SPILL_FILL",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_ALU | BPF_RSH, 1),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_ST, 1), /* M1 = 1 ^ len */
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_K, 0x80000000),
+			BPF_STMT(BPF_ST, 2), /* M2 = 1 ^ len ^ 0x80000000 */
+			BPF_STMT(BPF_STX, 15), /* M3 = len */
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_LD | BPF_MEM, 2),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0x80000001 }, { 2, 0x80000002 }, { 60, 0x80000000 ^ 60 } }
+	},
+	{
+		"JEQ",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 3, 3, 3, 3, 3 },
+		{ { 1, 0 }, { 3, 1 }, { 4, MAX_K } },
+	},
+	{
+		"JGT",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 4, 4, 4, 3, 3 },
+		{ { 2, 0 }, { 3, 1 }, { 4, MAX_K } },
+	},
+	{
+		"JGE",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, MAX_K),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 1, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 10),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 2, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 20),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 3, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 4, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 40),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 1, 2, 3, 4, 5 },
+		{ { 1, 20 }, { 3, 40 }, { 5, MAX_K } },
+	},
+	{
+		"JSET",
+		.u.insns = {
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_JUMP(BPF_JMP | BPF_JA, 1, 1, 1),
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, 4),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_IND, 0),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 1, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 10),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x80000000, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 0, 0xAA, 0x55, 1 },
+		{ { 4, 10 }, { 5, 20 }, { 6, MAX_K } },
+	},
+	{
+		"tcpdump port 22",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 0, 8), /* IPv6 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 17),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 54),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 14, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 56),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 12, 13),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0800, 0, 12), /* IPv4 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 8),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 6, 0),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 0xffff),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+		},
+		CLASSIC,
+		/* 3c:07:54:43:e5:76 > 10:bf:48:d6:43:d6, ethertype IPv4(0x0800)
+		 * length 114: 10.1.1.149.49700 > 10.1.2.10.22: Flags [P.],
+		 * seq 1305692979:1305693027, ack 3650467037, win 65535,
+		 * options [nop,nop,TS val 2502645400 ecr 3971138], length 48
+		 */
+		{ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		  0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76,
+		  0x08, 0x00,
+		  0x45, 0x10, 0x00, 0x64, 0x75, 0xb5,
+		  0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */
+		  0x0a, 0x01, 0x01, 0x95, /* ip src */
+		  0x0a, 0x01, 0x02, 0x0a, /* ip dst */
+		  0xc2, 0x24,
+		  0x00, 0x16 /* dst port */ },
+		{ { 10, 0 }, { 30, 0 }, { 100, 65535 } },
+	},
+	{
+		"tcpdump complex",
+		.u.insns = {
+			/* tcpdump -nei eth0 'tcp port 22 and (((ip[2:2] -
+			 * ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0) and
+			 * (len > 115 or len < 30000000000)' -d
+			 */
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 30, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x800, 0, 29),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 0, 27),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 25, 0),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 20),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 16),
+			BPF_STMT(BPF_ST, 1),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 14),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf),
+			BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0x5), /* libpcap emits K on TAX */
+			BPF_STMT(BPF_LD | BPF_MEM, 1),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_ST, 5),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 26),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_RSH | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0x9), /* libpcap emits K on TAX */
+			BPF_STMT(BPF_LD | BPF_MEM, 5),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 4, 0),
+			BPF_STMT(BPF_LD | BPF_LEN, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, 0x73, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 0xfc23ac00, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0xffff),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+		},
+		CLASSIC,
+		{ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		  0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76,
+		  0x08, 0x00,
+		  0x45, 0x10, 0x00, 0x64, 0x75, 0xb5,
+		  0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */
+		  0x0a, 0x01, 0x01, 0x95, /* ip src */
+		  0x0a, 0x01, 0x02, 0x0a, /* ip dst */
+		  0xc2, 0x24,
+		  0x00, 0x16 /* dst port */ },
+		{ { 10, 0 }, { 30, 0 }, { 100, 65535 } },
+	},
+	{
+		"RET_A",
+		.u.insns = {
+			/* check that unitialized X and A contain zeros */
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ {1, 0}, {2, 0} },
+	},
+	{
+		"INT: ADD trivial",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_ALU64_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_SUB, R1, R2),
+			BPF_ALU64_IMM(BPF_ADD, R1, -1),
+			BPF_ALU64_IMM(BPF_MUL, R1, 3),
+			BPF_ALU64_REG(BPF_MOV, R0, R1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xfffffffd } }
+	},
+	{
+		"INT: MUL_X",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xfffffffd, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		"INT: MUL_X2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, -1),
+			BPF_ALU32_IMM(BPF_MOV, R1, -1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_IMM(BPF_RSH, R1, 8),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0x2ffffff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		"INT: MUL32_X",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 3),
+			BPF_ALU32_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_IMM(BPF_RSH, R1, 8),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xffffff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		/* Have to test all register combinations, since
+		 * JITing of different registers will produce
+		 * different asm code.
+		 */
+		"INT: ADD 64-bit",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_IMM(BPF_ADD, R0, 20),
+			BPF_ALU64_IMM(BPF_ADD, R1, 20),
+			BPF_ALU64_IMM(BPF_ADD, R2, 20),
+			BPF_ALU64_IMM(BPF_ADD, R3, 20),
+			BPF_ALU64_IMM(BPF_ADD, R4, 20),
+			BPF_ALU64_IMM(BPF_ADD, R5, 20),
+			BPF_ALU64_IMM(BPF_ADD, R6, 20),
+			BPF_ALU64_IMM(BPF_ADD, R7, 20),
+			BPF_ALU64_IMM(BPF_ADD, R8, 20),
+			BPF_ALU64_IMM(BPF_ADD, R9, 20),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_ALU64_IMM(BPF_SUB, R1, 10),
+			BPF_ALU64_IMM(BPF_SUB, R2, 10),
+			BPF_ALU64_IMM(BPF_SUB, R3, 10),
+			BPF_ALU64_IMM(BPF_SUB, R4, 10),
+			BPF_ALU64_IMM(BPF_SUB, R5, 10),
+			BPF_ALU64_IMM(BPF_SUB, R6, 10),
+			BPF_ALU64_IMM(BPF_SUB, R7, 10),
+			BPF_ALU64_IMM(BPF_SUB, R8, 10),
+			BPF_ALU64_IMM(BPF_SUB, R9, 10),
+			BPF_ALU64_REG(BPF_ADD, R0, R0),
+			BPF_ALU64_REG(BPF_ADD, R0, R1),
+			BPF_ALU64_REG(BPF_ADD, R0, R2),
+			BPF_ALU64_REG(BPF_ADD, R0, R3),
+			BPF_ALU64_REG(BPF_ADD, R0, R4),
+			BPF_ALU64_REG(BPF_ADD, R0, R5),
+			BPF_ALU64_REG(BPF_ADD, R0, R6),
+			BPF_ALU64_REG(BPF_ADD, R0, R7),
+			BPF_ALU64_REG(BPF_ADD, R0, R8),
+			BPF_ALU64_REG(BPF_ADD, R0, R9), /* R0 == 155 */
+			BPF_JMP_IMM(BPF_JEQ, R0, 155, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R1, R0),
+			BPF_ALU64_REG(BPF_ADD, R1, R1),
+			BPF_ALU64_REG(BPF_ADD, R1, R2),
+			BPF_ALU64_REG(BPF_ADD, R1, R3),
+			BPF_ALU64_REG(BPF_ADD, R1, R4),
+			BPF_ALU64_REG(BPF_ADD, R1, R5),
+			BPF_ALU64_REG(BPF_ADD, R1, R6),
+			BPF_ALU64_REG(BPF_ADD, R1, R7),
+			BPF_ALU64_REG(BPF_ADD, R1, R8),
+			BPF_ALU64_REG(BPF_ADD, R1, R9), /* R1 == 456 */
+			BPF_JMP_IMM(BPF_JEQ, R1, 456, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R2, R0),
+			BPF_ALU64_REG(BPF_ADD, R2, R1),
+			BPF_ALU64_REG(BPF_ADD, R2, R2),
+			BPF_ALU64_REG(BPF_ADD, R2, R3),
+			BPF_ALU64_REG(BPF_ADD, R2, R4),
+			BPF_ALU64_REG(BPF_ADD, R2, R5),
+			BPF_ALU64_REG(BPF_ADD, R2, R6),
+			BPF_ALU64_REG(BPF_ADD, R2, R7),
+			BPF_ALU64_REG(BPF_ADD, R2, R8),
+			BPF_ALU64_REG(BPF_ADD, R2, R9), /* R2 == 1358 */
+			BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R3, R0),
+			BPF_ALU64_REG(BPF_ADD, R3, R1),
+			BPF_ALU64_REG(BPF_ADD, R3, R2),
+			BPF_ALU64_REG(BPF_ADD, R3, R3),
+			BPF_ALU64_REG(BPF_ADD, R3, R4),
+			BPF_ALU64_REG(BPF_ADD, R3, R5),
+			BPF_ALU64_REG(BPF_ADD, R3, R6),
+			BPF_ALU64_REG(BPF_ADD, R3, R7),
+			BPF_ALU64_REG(BPF_ADD, R3, R8),
+			BPF_ALU64_REG(BPF_ADD, R3, R9), /* R3 == 4063 */
+			BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R4, R0),
+			BPF_ALU64_REG(BPF_ADD, R4, R1),
+			BPF_ALU64_REG(BPF_ADD, R4, R2),
+			BPF_ALU64_REG(BPF_ADD, R4, R3),
+			BPF_ALU64_REG(BPF_ADD, R4, R4),
+			BPF_ALU64_REG(BPF_ADD, R4, R5),
+			BPF_ALU64_REG(BPF_ADD, R4, R6),
+			BPF_ALU64_REG(BPF_ADD, R4, R7),
+			BPF_ALU64_REG(BPF_ADD, R4, R8),
+			BPF_ALU64_REG(BPF_ADD, R4, R9), /* R4 == 12177 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R5, R0),
+			BPF_ALU64_REG(BPF_ADD, R5, R1),
+			BPF_ALU64_REG(BPF_ADD, R5, R2),
+			BPF_ALU64_REG(BPF_ADD, R5, R3),
+			BPF_ALU64_REG(BPF_ADD, R5, R4),
+			BPF_ALU64_REG(BPF_ADD, R5, R5),
+			BPF_ALU64_REG(BPF_ADD, R5, R6),
+			BPF_ALU64_REG(BPF_ADD, R5, R7),
+			BPF_ALU64_REG(BPF_ADD, R5, R8),
+			BPF_ALU64_REG(BPF_ADD, R5, R9), /* R5 == 36518 */
+			BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R6, R0),
+			BPF_ALU64_REG(BPF_ADD, R6, R1),
+			BPF_ALU64_REG(BPF_ADD, R6, R2),
+			BPF_ALU64_REG(BPF_ADD, R6, R3),
+			BPF_ALU64_REG(BPF_ADD, R6, R4),
+			BPF_ALU64_REG(BPF_ADD, R6, R5),
+			BPF_ALU64_REG(BPF_ADD, R6, R6),
+			BPF_ALU64_REG(BPF_ADD, R6, R7),
+			BPF_ALU64_REG(BPF_ADD, R6, R8),
+			BPF_ALU64_REG(BPF_ADD, R6, R9), /* R6 == 109540 */
+			BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R7, R0),
+			BPF_ALU64_REG(BPF_ADD, R7, R1),
+			BPF_ALU64_REG(BPF_ADD, R7, R2),
+			BPF_ALU64_REG(BPF_ADD, R7, R3),
+			BPF_ALU64_REG(BPF_ADD, R7, R4),
+			BPF_ALU64_REG(BPF_ADD, R7, R5),
+			BPF_ALU64_REG(BPF_ADD, R7, R6),
+			BPF_ALU64_REG(BPF_ADD, R7, R7),
+			BPF_ALU64_REG(BPF_ADD, R7, R8),
+			BPF_ALU64_REG(BPF_ADD, R7, R9), /* R7 == 328605 */
+			BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R8, R0),
+			BPF_ALU64_REG(BPF_ADD, R8, R1),
+			BPF_ALU64_REG(BPF_ADD, R8, R2),
+			BPF_ALU64_REG(BPF_ADD, R8, R3),
+			BPF_ALU64_REG(BPF_ADD, R8, R4),
+			BPF_ALU64_REG(BPF_ADD, R8, R5),
+			BPF_ALU64_REG(BPF_ADD, R8, R6),
+			BPF_ALU64_REG(BPF_ADD, R8, R7),
+			BPF_ALU64_REG(BPF_ADD, R8, R8),
+			BPF_ALU64_REG(BPF_ADD, R8, R9), /* R8 == 985799 */
+			BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R9, R0),
+			BPF_ALU64_REG(BPF_ADD, R9, R1),
+			BPF_ALU64_REG(BPF_ADD, R9, R2),
+			BPF_ALU64_REG(BPF_ADD, R9, R3),
+			BPF_ALU64_REG(BPF_ADD, R9, R4),
+			BPF_ALU64_REG(BPF_ADD, R9, R5),
+			BPF_ALU64_REG(BPF_ADD, R9, R6),
+			BPF_ALU64_REG(BPF_ADD, R9, R7),
+			BPF_ALU64_REG(BPF_ADD, R9, R8),
+			BPF_ALU64_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */
+			BPF_ALU64_REG(BPF_MOV, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2957380 } }
+	},
+	{
+		"INT: ADD 32-bit",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 20),
+			BPF_ALU32_IMM(BPF_MOV, R1, 1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 2),
+			BPF_ALU32_IMM(BPF_MOV, R3, 3),
+			BPF_ALU32_IMM(BPF_MOV, R4, 4),
+			BPF_ALU32_IMM(BPF_MOV, R5, 5),
+			BPF_ALU32_IMM(BPF_MOV, R6, 6),
+			BPF_ALU32_IMM(BPF_MOV, R7, 7),
+			BPF_ALU32_IMM(BPF_MOV, R8, 8),
+			BPF_ALU32_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_IMM(BPF_ADD, R1, 10),
+			BPF_ALU64_IMM(BPF_ADD, R2, 10),
+			BPF_ALU64_IMM(BPF_ADD, R3, 10),
+			BPF_ALU64_IMM(BPF_ADD, R4, 10),
+			BPF_ALU64_IMM(BPF_ADD, R5, 10),
+			BPF_ALU64_IMM(BPF_ADD, R6, 10),
+			BPF_ALU64_IMM(BPF_ADD, R7, 10),
+			BPF_ALU64_IMM(BPF_ADD, R8, 10),
+			BPF_ALU64_IMM(BPF_ADD, R9, 10),
+			BPF_ALU32_REG(BPF_ADD, R0, R1),
+			BPF_ALU32_REG(BPF_ADD, R0, R2),
+			BPF_ALU32_REG(BPF_ADD, R0, R3),
+			BPF_ALU32_REG(BPF_ADD, R0, R4),
+			BPF_ALU32_REG(BPF_ADD, R0, R5),
+			BPF_ALU32_REG(BPF_ADD, R0, R6),
+			BPF_ALU32_REG(BPF_ADD, R0, R7),
+			BPF_ALU32_REG(BPF_ADD, R0, R8),
+			BPF_ALU32_REG(BPF_ADD, R0, R9), /* R0 == 155 */
+			BPF_JMP_IMM(BPF_JEQ, R0, 155, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R1, R0),
+			BPF_ALU32_REG(BPF_ADD, R1, R1),
+			BPF_ALU32_REG(BPF_ADD, R1, R2),
+			BPF_ALU32_REG(BPF_ADD, R1, R3),
+			BPF_ALU32_REG(BPF_ADD, R1, R4),
+			BPF_ALU32_REG(BPF_ADD, R1, R5),
+			BPF_ALU32_REG(BPF_ADD, R1, R6),
+			BPF_ALU32_REG(BPF_ADD, R1, R7),
+			BPF_ALU32_REG(BPF_ADD, R1, R8),
+			BPF_ALU32_REG(BPF_ADD, R1, R9), /* R1 == 456 */
+			BPF_JMP_IMM(BPF_JEQ, R1, 456, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R2, R0),
+			BPF_ALU32_REG(BPF_ADD, R2, R1),
+			BPF_ALU32_REG(BPF_ADD, R2, R2),
+			BPF_ALU32_REG(BPF_ADD, R2, R3),
+			BPF_ALU32_REG(BPF_ADD, R2, R4),
+			BPF_ALU32_REG(BPF_ADD, R2, R5),
+			BPF_ALU32_REG(BPF_ADD, R2, R6),
+			BPF_ALU32_REG(BPF_ADD, R2, R7),
+			BPF_ALU32_REG(BPF_ADD, R2, R8),
+			BPF_ALU32_REG(BPF_ADD, R2, R9), /* R2 == 1358 */
+			BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R3, R0),
+			BPF_ALU32_REG(BPF_ADD, R3, R1),
+			BPF_ALU32_REG(BPF_ADD, R3, R2),
+			BPF_ALU32_REG(BPF_ADD, R3, R3),
+			BPF_ALU32_REG(BPF_ADD, R3, R4),
+			BPF_ALU32_REG(BPF_ADD, R3, R5),
+			BPF_ALU32_REG(BPF_ADD, R3, R6),
+			BPF_ALU32_REG(BPF_ADD, R3, R7),
+			BPF_ALU32_REG(BPF_ADD, R3, R8),
+			BPF_ALU32_REG(BPF_ADD, R3, R9), /* R3 == 4063 */
+			BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R4, R0),
+			BPF_ALU32_REG(BPF_ADD, R4, R1),
+			BPF_ALU32_REG(BPF_ADD, R4, R2),
+			BPF_ALU32_REG(BPF_ADD, R4, R3),
+			BPF_ALU32_REG(BPF_ADD, R4, R4),
+			BPF_ALU32_REG(BPF_ADD, R4, R5),
+			BPF_ALU32_REG(BPF_ADD, R4, R6),
+			BPF_ALU32_REG(BPF_ADD, R4, R7),
+			BPF_ALU32_REG(BPF_ADD, R4, R8),
+			BPF_ALU32_REG(BPF_ADD, R4, R9), /* R4 == 12177 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R5, R0),
+			BPF_ALU32_REG(BPF_ADD, R5, R1),
+			BPF_ALU32_REG(BPF_ADD, R5, R2),
+			BPF_ALU32_REG(BPF_ADD, R5, R3),
+			BPF_ALU32_REG(BPF_ADD, R5, R4),
+			BPF_ALU32_REG(BPF_ADD, R5, R5),
+			BPF_ALU32_REG(BPF_ADD, R5, R6),
+			BPF_ALU32_REG(BPF_ADD, R5, R7),
+			BPF_ALU32_REG(BPF_ADD, R5, R8),
+			BPF_ALU32_REG(BPF_ADD, R5, R9), /* R5 == 36518 */
+			BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R6, R0),
+			BPF_ALU32_REG(BPF_ADD, R6, R1),
+			BPF_ALU32_REG(BPF_ADD, R6, R2),
+			BPF_ALU32_REG(BPF_ADD, R6, R3),
+			BPF_ALU32_REG(BPF_ADD, R6, R4),
+			BPF_ALU32_REG(BPF_ADD, R6, R5),
+			BPF_ALU32_REG(BPF_ADD, R6, R6),
+			BPF_ALU32_REG(BPF_ADD, R6, R7),
+			BPF_ALU32_REG(BPF_ADD, R6, R8),
+			BPF_ALU32_REG(BPF_ADD, R6, R9), /* R6 == 109540 */
+			BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R7, R0),
+			BPF_ALU32_REG(BPF_ADD, R7, R1),
+			BPF_ALU32_REG(BPF_ADD, R7, R2),
+			BPF_ALU32_REG(BPF_ADD, R7, R3),
+			BPF_ALU32_REG(BPF_ADD, R7, R4),
+			BPF_ALU32_REG(BPF_ADD, R7, R5),
+			BPF_ALU32_REG(BPF_ADD, R7, R6),
+			BPF_ALU32_REG(BPF_ADD, R7, R7),
+			BPF_ALU32_REG(BPF_ADD, R7, R8),
+			BPF_ALU32_REG(BPF_ADD, R7, R9), /* R7 == 328605 */
+			BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R8, R0),
+			BPF_ALU32_REG(BPF_ADD, R8, R1),
+			BPF_ALU32_REG(BPF_ADD, R8, R2),
+			BPF_ALU32_REG(BPF_ADD, R8, R3),
+			BPF_ALU32_REG(BPF_ADD, R8, R4),
+			BPF_ALU32_REG(BPF_ADD, R8, R5),
+			BPF_ALU32_REG(BPF_ADD, R8, R6),
+			BPF_ALU32_REG(BPF_ADD, R8, R7),
+			BPF_ALU32_REG(BPF_ADD, R8, R8),
+			BPF_ALU32_REG(BPF_ADD, R8, R9), /* R8 == 985799 */
+			BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R9, R0),
+			BPF_ALU32_REG(BPF_ADD, R9, R1),
+			BPF_ALU32_REG(BPF_ADD, R9, R2),
+			BPF_ALU32_REG(BPF_ADD, R9, R3),
+			BPF_ALU32_REG(BPF_ADD, R9, R4),
+			BPF_ALU32_REG(BPF_ADD, R9, R5),
+			BPF_ALU32_REG(BPF_ADD, R9, R6),
+			BPF_ALU32_REG(BPF_ADD, R9, R7),
+			BPF_ALU32_REG(BPF_ADD, R9, R8),
+			BPF_ALU32_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */
+			BPF_ALU32_REG(BPF_MOV, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2957380 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: SUB",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_REG(BPF_SUB, R0, R0),
+			BPF_ALU64_REG(BPF_SUB, R0, R1),
+			BPF_ALU64_REG(BPF_SUB, R0, R2),
+			BPF_ALU64_REG(BPF_SUB, R0, R3),
+			BPF_ALU64_REG(BPF_SUB, R0, R4),
+			BPF_ALU64_REG(BPF_SUB, R0, R5),
+			BPF_ALU64_REG(BPF_SUB, R0, R6),
+			BPF_ALU64_REG(BPF_SUB, R0, R7),
+			BPF_ALU64_REG(BPF_SUB, R0, R8),
+			BPF_ALU64_REG(BPF_SUB, R0, R9),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_JMP_IMM(BPF_JEQ, R0, -55, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R1, R0),
+			BPF_ALU64_REG(BPF_SUB, R1, R2),
+			BPF_ALU64_REG(BPF_SUB, R1, R3),
+			BPF_ALU64_REG(BPF_SUB, R1, R4),
+			BPF_ALU64_REG(BPF_SUB, R1, R5),
+			BPF_ALU64_REG(BPF_SUB, R1, R6),
+			BPF_ALU64_REG(BPF_SUB, R1, R7),
+			BPF_ALU64_REG(BPF_SUB, R1, R8),
+			BPF_ALU64_REG(BPF_SUB, R1, R9),
+			BPF_ALU64_IMM(BPF_SUB, R1, 10),
+			BPF_ALU64_REG(BPF_SUB, R2, R0),
+			BPF_ALU64_REG(BPF_SUB, R2, R1),
+			BPF_ALU64_REG(BPF_SUB, R2, R3),
+			BPF_ALU64_REG(BPF_SUB, R2, R4),
+			BPF_ALU64_REG(BPF_SUB, R2, R5),
+			BPF_ALU64_REG(BPF_SUB, R2, R6),
+			BPF_ALU64_REG(BPF_SUB, R2, R7),
+			BPF_ALU64_REG(BPF_SUB, R2, R8),
+			BPF_ALU64_REG(BPF_SUB, R2, R9),
+			BPF_ALU64_IMM(BPF_SUB, R2, 10),
+			BPF_ALU64_REG(BPF_SUB, R3, R0),
+			BPF_ALU64_REG(BPF_SUB, R3, R1),
+			BPF_ALU64_REG(BPF_SUB, R3, R2),
+			BPF_ALU64_REG(BPF_SUB, R3, R4),
+			BPF_ALU64_REG(BPF_SUB, R3, R5),
+			BPF_ALU64_REG(BPF_SUB, R3, R6),
+			BPF_ALU64_REG(BPF_SUB, R3, R7),
+			BPF_ALU64_REG(BPF_SUB, R3, R8),
+			BPF_ALU64_REG(BPF_SUB, R3, R9),
+			BPF_ALU64_IMM(BPF_SUB, R3, 10),
+			BPF_ALU64_REG(BPF_SUB, R4, R0),
+			BPF_ALU64_REG(BPF_SUB, R4, R1),
+			BPF_ALU64_REG(BPF_SUB, R4, R2),
+			BPF_ALU64_REG(BPF_SUB, R4, R3),
+			BPF_ALU64_REG(BPF_SUB, R4, R5),
+			BPF_ALU64_REG(BPF_SUB, R4, R6),
+			BPF_ALU64_REG(BPF_SUB, R4, R7),
+			BPF_ALU64_REG(BPF_SUB, R4, R8),
+			BPF_ALU64_REG(BPF_SUB, R4, R9),
+			BPF_ALU64_IMM(BPF_SUB, R4, 10),
+			BPF_ALU64_REG(BPF_SUB, R5, R0),
+			BPF_ALU64_REG(BPF_SUB, R5, R1),
+			BPF_ALU64_REG(BPF_SUB, R5, R2),
+			BPF_ALU64_REG(BPF_SUB, R5, R3),
+			BPF_ALU64_REG(BPF_SUB, R5, R4),
+			BPF_ALU64_REG(BPF_SUB, R5, R6),
+			BPF_ALU64_REG(BPF_SUB, R5, R7),
+			BPF_ALU64_REG(BPF_SUB, R5, R8),
+			BPF_ALU64_REG(BPF_SUB, R5, R9),
+			BPF_ALU64_IMM(BPF_SUB, R5, 10),
+			BPF_ALU64_REG(BPF_SUB, R6, R0),
+			BPF_ALU64_REG(BPF_SUB, R6, R1),
+			BPF_ALU64_REG(BPF_SUB, R6, R2),
+			BPF_ALU64_REG(BPF_SUB, R6, R3),
+			BPF_ALU64_REG(BPF_SUB, R6, R4),
+			BPF_ALU64_REG(BPF_SUB, R6, R5),
+			BPF_ALU64_REG(BPF_SUB, R6, R7),
+			BPF_ALU64_REG(BPF_SUB, R6, R8),
+			BPF_ALU64_REG(BPF_SUB, R6, R9),
+			BPF_ALU64_IMM(BPF_SUB, R6, 10),
+			BPF_ALU64_REG(BPF_SUB, R7, R0),
+			BPF_ALU64_REG(BPF_SUB, R7, R1),
+			BPF_ALU64_REG(BPF_SUB, R7, R2),
+			BPF_ALU64_REG(BPF_SUB, R7, R3),
+			BPF_ALU64_REG(BPF_SUB, R7, R4),
+			BPF_ALU64_REG(BPF_SUB, R7, R5),
+			BPF_ALU64_REG(BPF_SUB, R7, R6),
+			BPF_ALU64_REG(BPF_SUB, R7, R8),
+			BPF_ALU64_REG(BPF_SUB, R7, R9),
+			BPF_ALU64_IMM(BPF_SUB, R7, 10),
+			BPF_ALU64_REG(BPF_SUB, R8, R0),
+			BPF_ALU64_REG(BPF_SUB, R8, R1),
+			BPF_ALU64_REG(BPF_SUB, R8, R2),
+			BPF_ALU64_REG(BPF_SUB, R8, R3),
+			BPF_ALU64_REG(BPF_SUB, R8, R4),
+			BPF_ALU64_REG(BPF_SUB, R8, R5),
+			BPF_ALU64_REG(BPF_SUB, R8, R6),
+			BPF_ALU64_REG(BPF_SUB, R8, R7),
+			BPF_ALU64_REG(BPF_SUB, R8, R9),
+			BPF_ALU64_IMM(BPF_SUB, R8, 10),
+			BPF_ALU64_REG(BPF_SUB, R9, R0),
+			BPF_ALU64_REG(BPF_SUB, R9, R1),
+			BPF_ALU64_REG(BPF_SUB, R9, R2),
+			BPF_ALU64_REG(BPF_SUB, R9, R3),
+			BPF_ALU64_REG(BPF_SUB, R9, R4),
+			BPF_ALU64_REG(BPF_SUB, R9, R5),
+			BPF_ALU64_REG(BPF_SUB, R9, R6),
+			BPF_ALU64_REG(BPF_SUB, R9, R7),
+			BPF_ALU64_REG(BPF_SUB, R9, R8),
+			BPF_ALU64_IMM(BPF_SUB, R9, 10),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_ALU64_IMM(BPF_NEG, R0, 0),
+			BPF_ALU64_REG(BPF_SUB, R0, R1),
+			BPF_ALU64_REG(BPF_SUB, R0, R2),
+			BPF_ALU64_REG(BPF_SUB, R0, R3),
+			BPF_ALU64_REG(BPF_SUB, R0, R4),
+			BPF_ALU64_REG(BPF_SUB, R0, R5),
+			BPF_ALU64_REG(BPF_SUB, R0, R6),
+			BPF_ALU64_REG(BPF_SUB, R0, R7),
+			BPF_ALU64_REG(BPF_SUB, R0, R8),
+			BPF_ALU64_REG(BPF_SUB, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 11 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: XOR",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_SUB, R0, R0),
+			BPF_ALU64_REG(BPF_XOR, R1, R1),
+			BPF_JMP_REG(BPF_JEQ, R0, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 10),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU64_REG(BPF_SUB, R1, R1),
+			BPF_ALU64_REG(BPF_XOR, R2, R2),
+			BPF_JMP_REG(BPF_JEQ, R1, R2, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R2, R2),
+			BPF_ALU64_REG(BPF_XOR, R3, R3),
+			BPF_ALU64_IMM(BPF_MOV, R0, 10),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_JMP_REG(BPF_JEQ, R2, R3, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R3, R3),
+			BPF_ALU64_REG(BPF_XOR, R4, R4),
+			BPF_ALU64_IMM(BPF_MOV, R2, 1),
+			BPF_ALU64_IMM(BPF_MOV, R5, -1),
+			BPF_JMP_REG(BPF_JEQ, R3, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R4, R4),
+			BPF_ALU64_REG(BPF_XOR, R5, R5),
+			BPF_ALU64_IMM(BPF_MOV, R3, 1),
+			BPF_ALU64_IMM(BPF_MOV, R7, -1),
+			BPF_JMP_REG(BPF_JEQ, R5, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R5, 1),
+			BPF_ALU64_REG(BPF_SUB, R5, R5),
+			BPF_ALU64_REG(BPF_XOR, R6, R6),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R8, -1),
+			BPF_JMP_REG(BPF_JEQ, R5, R6, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R6, R6),
+			BPF_ALU64_REG(BPF_XOR, R7, R7),
+			BPF_JMP_REG(BPF_JEQ, R7, R6, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R7, R7),
+			BPF_ALU64_REG(BPF_XOR, R8, R8),
+			BPF_JMP_REG(BPF_JEQ, R7, R8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R8, R8),
+			BPF_ALU64_REG(BPF_XOR, R9, R9),
+			BPF_JMP_REG(BPF_JEQ, R9, R8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R9, R9),
+			BPF_ALU64_REG(BPF_XOR, R0, R0),
+			BPF_JMP_REG(BPF_JEQ, R9, R0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R1, R1),
+			BPF_ALU64_REG(BPF_XOR, R0, R0),
+			BPF_JMP_REG(BPF_JEQ, R9, R0, 2),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: MUL",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 11),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_REG(BPF_MUL, R0, R0),
+			BPF_ALU64_REG(BPF_MUL, R0, R1),
+			BPF_ALU64_REG(BPF_MUL, R0, R2),
+			BPF_ALU64_REG(BPF_MUL, R0, R3),
+			BPF_ALU64_REG(BPF_MUL, R0, R4),
+			BPF_ALU64_REG(BPF_MUL, R0, R5),
+			BPF_ALU64_REG(BPF_MUL, R0, R6),
+			BPF_ALU64_REG(BPF_MUL, R0, R7),
+			BPF_ALU64_REG(BPF_MUL, R0, R8),
+			BPF_ALU64_REG(BPF_MUL, R0, R9),
+			BPF_ALU64_IMM(BPF_MUL, R0, 10),
+			BPF_JMP_IMM(BPF_JEQ, R0, 439084800, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_MUL, R1, R0),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_REG(BPF_MUL, R1, R3),
+			BPF_ALU64_REG(BPF_MUL, R1, R4),
+			BPF_ALU64_REG(BPF_MUL, R1, R5),
+			BPF_ALU64_REG(BPF_MUL, R1, R6),
+			BPF_ALU64_REG(BPF_MUL, R1, R7),
+			BPF_ALU64_REG(BPF_MUL, R1, R8),
+			BPF_ALU64_REG(BPF_MUL, R1, R9),
+			BPF_ALU64_IMM(BPF_MUL, R1, 10),
+			BPF_ALU64_REG(BPF_MOV, R2, R1),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5a924, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_LSH, R1, 32),
+			BPF_ALU64_IMM(BPF_ARSH, R1, 32),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xebb90000, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_MUL, R2, R0),
+			BPF_ALU64_REG(BPF_MUL, R2, R1),
+			BPF_ALU64_REG(BPF_MUL, R2, R3),
+			BPF_ALU64_REG(BPF_MUL, R2, R4),
+			BPF_ALU64_REG(BPF_MUL, R2, R5),
+			BPF_ALU64_REG(BPF_MUL, R2, R6),
+			BPF_ALU64_REG(BPF_MUL, R2, R7),
+			BPF_ALU64_REG(BPF_MUL, R2, R8),
+			BPF_ALU64_REG(BPF_MUL, R2, R9),
+			BPF_ALU64_IMM(BPF_MUL, R2, 10),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_REG(BPF_MOV, R0, R2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x35d97ef2 } }
+	},
+	{
+		"INT: ALU MIX",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 11),
+			BPF_ALU64_IMM(BPF_ADD, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_XOR, R2, 3),
+			BPF_ALU64_REG(BPF_DIV, R0, R2),
+			BPF_JMP_IMM(BPF_JEQ, R0, 10, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOD, R0, 3),
+			BPF_JMP_IMM(BPF_JEQ, R0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, -1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } }
+	},
+	{
+		"INT: DIV + ABS",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_MOV, R6, R1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU32_REG(BPF_DIV, R0, R2),
+			BPF_ALU64_REG(BPF_MOV, R8, R0),
+			BPF_LD_ABS(BPF_B, 4),
+			BPF_ALU64_REG(BPF_ADD, R8, R0),
+			BPF_LD_IND(BPF_B, R8, -70),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ 10, 20, 30, 40, 50 },
+		{ { 4, 0 }, { 5, 10 } }
+	},
+	{
+		"INT: DIV by zero",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_MOV, R6, R1),
+			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU32_REG(BPF_DIV, R0, R7),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ 10, 20, 30, 40, 50 },
+		{ { 3, 0 }, { 4, 0 } }
+	},
+	{
+		"check: missing ret",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: div_k_0",
+		.u.insns = {
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: unknown insn",
+		.u.insns = {
+			/* seccomp insn, rejected in socket filter */
+			BPF_STMT(BPF_LDX | BPF_W | BPF_ABS, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: out of range spill/fill",
+		.u.insns = {
+			BPF_STMT(BPF_STX, 16),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"JUMPS + HOLES",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 15),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 3, 4),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 2, 3),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 2, 3),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC,
+		{ 0x00, 0x1b, 0x21, 0x3c, 0x9d, 0xf8,
+		  0x90, 0xe2, 0xba, 0x0a, 0x56, 0xb4,
+		  0x08, 0x00,
+		  0x45, 0x00, 0x00, 0x28, 0x00, 0x00,
+		  0x20, 0x00, 0x40, 0x11, 0x00, 0x00, /* IP header */
+		  0xc0, 0xa8, 0x33, 0x01,
+		  0xc0, 0xa8, 0x33, 0x02,
+		  0xbb, 0xb6,
+		  0xa9, 0xfa,
+		  0x00, 0x14, 0x00, 0x00,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc },
+		{ { 88, 0x001b } }
+	},
+	{
+		"check: RET X",
+		.u.insns = {
+			BPF_STMT(BPF_RET | BPF_X, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{
+		"check: LDX + RET X",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 42),
+			BPF_STMT(BPF_RET | BPF_X, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{	/* Mainly checking JIT here. */
+		"M[]: alt STX + LDX",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 100),
+			BPF_STMT(BPF_STX, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 1),
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 2),
+			BPF_STMT(BPF_LDX | BPF_MEM, 2),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 3),
+			BPF_STMT(BPF_LDX | BPF_MEM, 3),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 4),
+			BPF_STMT(BPF_LDX | BPF_MEM, 4),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 5),
+			BPF_STMT(BPF_LDX | BPF_MEM, 5),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 6),
+			BPF_STMT(BPF_LDX | BPF_MEM, 6),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 7),
+			BPF_STMT(BPF_LDX | BPF_MEM, 7),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 8),
+			BPF_STMT(BPF_LDX | BPF_MEM, 8),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 9),
+			BPF_STMT(BPF_LDX | BPF_MEM, 9),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 10),
+			BPF_STMT(BPF_LDX | BPF_MEM, 10),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 11),
+			BPF_STMT(BPF_LDX | BPF_MEM, 11),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 12),
+			BPF_STMT(BPF_LDX | BPF_MEM, 12),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 13),
+			BPF_STMT(BPF_LDX | BPF_MEM, 13),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 14),
+			BPF_STMT(BPF_LDX | BPF_MEM, 14),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 15),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 116 } },
+	},
+	{	/* Mainly checking JIT here. */
+		"M[]: full STX + full LDX",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbadfeedb),
+			BPF_STMT(BPF_STX, 0),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xecabedae),
+			BPF_STMT(BPF_STX, 1),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xafccfeaf),
+			BPF_STMT(BPF_STX, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbffdcedc),
+			BPF_STMT(BPF_STX, 3),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfbbbdccb),
+			BPF_STMT(BPF_STX, 4),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfbabcbda),
+			BPF_STMT(BPF_STX, 5),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaedecbdb),
+			BPF_STMT(BPF_STX, 6),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xadebbade),
+			BPF_STMT(BPF_STX, 7),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfcfcfaec),
+			BPF_STMT(BPF_STX, 8),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbcdddbdc),
+			BPF_STMT(BPF_STX, 9),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfeefdfac),
+			BPF_STMT(BPF_STX, 10),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xcddcdeea),
+			BPF_STMT(BPF_STX, 11),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaccfaebb),
+			BPF_STMT(BPF_STX, 12),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbdcccdcf),
+			BPF_STMT(BPF_STX, 13),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaaedecde),
+			BPF_STMT(BPF_STX, 14),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfaeacdad),
+			BPF_STMT(BPF_STX, 15),
+			BPF_STMT(BPF_LDX | BPF_MEM, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 2),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 3),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 4),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 5),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 6),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 7),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 8),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 9),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 10),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 11),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 12),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 13),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 14),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x2a5a5e5 } },
+	},
+	{
+		"check: SKF_AD_MAX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_MAX),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{	/* Passes checker but fails during runtime. */
+		"LD [SKF_AD_OFF-1]",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF - 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 } },
+	},
+};
+
+static struct net_device dev;
+
+static struct sk_buff *populate_skb(char *buf, int size)
+{
+	struct sk_buff *skb;
+
+	if (size >= MAX_DATA)
+		return NULL;
+
+	skb = alloc_skb(MAX_DATA, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	memcpy(__skb_put(skb, size), buf, size);
+
+	/* Initialize a fake skb with test pattern. */
+	skb_reset_mac_header(skb);
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = SKB_TYPE;
+	skb->mark = SKB_MARK;
+	skb->hash = SKB_HASH;
+	skb->queue_mapping = SKB_QUEUE_MAP;
+	skb->vlan_tci = SKB_VLAN_TCI;
+	skb->dev = &dev;
+	skb->dev->ifindex = SKB_DEV_IFINDEX;
+	skb->dev->type = SKB_DEV_TYPE;
+	skb_set_network_header(skb, min(size, ETH_HLEN));
+
+	return skb;
+}
+
+static void *generate_test_data(struct bpf_test *test, int sub)
+{
+	if (test->aux & FLAG_NO_DATA)
+		return NULL;
+
+	/* Test case expects an skb, so populate one. Various
+	 * subtests generate skbs of different sizes based on
+	 * the same data.
+	 */
+	return populate_skb(test->data, test->test[sub].data_size);
+}
+
+static void release_test_data(const struct bpf_test *test, void *data)
+{
+	if (test->aux & FLAG_NO_DATA)
+		return;
+
+	kfree_skb(data);
+}
+
+static int probe_filter_length(struct sock_filter *fp)
+{
+	int len = 0;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].k != 0)
+			break;
+
+	return len + 1;
+}
+
+static struct sk_filter *generate_filter(int which, int *err)
+{
+	struct sk_filter *fp;
+	struct sock_fprog_kern fprog;
+	unsigned int flen = probe_filter_length(tests[which].u.insns);
+	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
+
+	switch (test_type) {
+	case CLASSIC:
+		fprog.filter = tests[which].u.insns;
+		fprog.len = flen;
+
+		*err = sk_unattached_filter_create(&fp, &fprog);
+		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
+			if (*err == -EINVAL) {
+				pr_cont("PASS\n");
+				/* Verifier rejected filter as expected. */
+				*err = 0;
+				return NULL;
+			} else {
+				pr_cont("UNEXPECTED_PASS\n");
+				/* Verifier didn't reject the test that's
+				 * bad enough, just return!
+				 */
+				*err = -EINVAL;
+				return NULL;
+			}
+		}
+		/* We don't expect to fail. */
+		if (*err) {
+			pr_cont("FAIL to attach err=%d len=%d\n",
+				*err, fprog.len);
+			return NULL;
+		}
+		break;
+
+	case INTERNAL:
+		fp = kzalloc(sk_filter_size(flen), GFP_KERNEL);
+		if (fp == NULL) {
+			pr_cont("UNEXPECTED_FAIL no memory left\n");
+			*err = -ENOMEM;
+			return NULL;
+		}
+
+		fp->len = flen;
+		memcpy(fp->insnsi, tests[which].u.insns_int,
+		       fp->len * sizeof(struct sock_filter_int));
+
+		sk_filter_select_runtime(fp);
+		break;
+	}
+
+	*err = 0;
+	return fp;
+}
+
+static void release_filter(struct sk_filter *fp, int which)
+{
+	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
+
+	switch (test_type) {
+	case CLASSIC:
+		sk_unattached_filter_destroy(fp);
+		break;
+	case INTERNAL:
+		sk_filter_free(fp);
+		break;
+	}
+}
+
+static int __run_one(const struct sk_filter *fp, const void *data,
+		     int runs, u64 *duration)
+{
+	u64 start, finish;
+	int ret, i;
+
+	start = ktime_to_us(ktime_get());
+
+	for (i = 0; i < runs; i++)
+		ret = SK_RUN_FILTER(fp, data);
+
+	finish = ktime_to_us(ktime_get());
+
+	*duration = (finish - start) * 1000ULL;
+	do_div(*duration, runs);
+
+	return ret;
+}
+
+static int run_one(const struct sk_filter *fp, struct bpf_test *test)
+{
+	int err_cnt = 0, i, runs = MAX_TESTRUNS;
+
+	for (i = 0; i < MAX_SUBTESTS; i++) {
+		void *data;
+		u64 duration;
+		u32 ret;
+
+		if (test->test[i].data_size == 0 &&
+		    test->test[i].result == 0)
+			break;
+
+		data = generate_test_data(test, i);
+		ret = __run_one(fp, data, runs, &duration);
+		release_test_data(test, data);
+
+		if (ret == test->test[i].result) {
+			pr_cont("%lld ", duration);
+		} else {
+			pr_cont("ret %d != %d ", ret,
+				test->test[i].result);
+			err_cnt++;
+		}
+	}
+
+	return err_cnt;
+}
+
+static __init int test_bpf(void)
+{
+	int i, err_cnt = 0, pass_cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct sk_filter *fp;
+		int err;
+
+		pr_info("#%d %s ", i, tests[i].descr);
+
+		fp = generate_filter(i, &err);
+		if (fp == NULL) {
+			if (err == 0) {
+				pass_cnt++;
+				continue;
+			}
+
+			return err;
+		}
+		err = run_one(fp, &tests[i]);
+		release_filter(fp, i);
+
+		if (err) {
+			pr_cont("FAIL (%d times)\n", err);
+			err_cnt++;
+		} else {
+			pr_cont("PASS\n");
+			pass_cnt++;
+		}
+	}
+
+	pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt);
+	return err_cnt ? -EINVAL : 0;
+}
+
+static int __init test_bpf_init(void)
+{
+	return test_bpf();
+}
+
+static void __exit test_bpf_exit(void)
+{
+}
+
+module_init(test_bpf_init);
+module_exit(test_bpf_exit);
+
+MODULE_LICENSE("GPL");

diff --git a/mm/filemap.c b/mm/filemap.c
index 7fadf1c..dafb06f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -1665,96 +1665,42 @@
 	return written ? written : error;
 }
 
-/*
- * Performs necessary checks before doing a write
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @count:	number of bytes to write
- * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
- *
- * Adjust number of segments and amount of bytes to write (nr_segs should be
- * properly initialized first). Returns appropriate error code that caller
- * should return or zero in case that write should be allowed.
- */
-int generic_segment_checks(const struct iovec *iov,
-			unsigned long *nr_segs, size_t *count, int access_flags)
-{
-	unsigned long   seg;
-	size_t cnt = 0;
-	for (seg = 0; seg < *nr_segs; seg++) {
-		const struct iovec *iv = &iov[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		cnt += iv->iov_len;
-		if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
-			return -EINVAL;
-		if (access_ok(access_flags, iv->iov_base, iv->iov_len))
-			continue;
-		if (seg == 0)
-			return -EFAULT;
-		*nr_segs = seg;
-		cnt -= iv->iov_len;	/* This segment is no good */
-		break;
-	}
-	*count = cnt;
-	return 0;
-}
-EXPORT_SYMBOL(generic_segment_checks);
-
 /**
- * generic_file_aio_read - generic filesystem read routine
+ * generic_file_read_iter - generic filesystem read routine
  * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
+ * @iter:	destination for the data read
  *
- * This is the "read()" routine for all filesystems
+ * This is the "read_iter()" routine for all filesystems
  * that can use the page cache directly.
  */
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-	struct file *filp = iocb->ki_filp;
-	ssize_t retval;
-	size_t count;
+	struct file *file = iocb->ki_filp;
+	ssize_t retval = 0;
 	loff_t *ppos = &iocb->ki_pos;
-	struct iov_iter i;
-
-	count = 0;
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	loff_t pos = *ppos;
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
-	if (filp->f_flags & O_DIRECT) {
+	if (file->f_flags & O_DIRECT) {
+		struct address_space *mapping = file->f_mapping;
+		struct inode *inode = mapping->host;
+		size_t count = iov_iter_count(iter);
 		loff_t size;
-		struct address_space *mapping;
-		struct inode *inode;
 
-		mapping = filp->f_mapping;
-		inode = mapping->host;
 		if (!count)
 			goto out; /* skip atime */
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
-					pos + iov_length(iov, nr_segs) - 1);
+					pos + count - 1);
 		if (!retval) {
-			retval = mapping->a_ops->direct_IO(READ, iocb,
-							   iov, pos, nr_segs);
+			struct iov_iter data = *iter;
+			retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
 		}
+
 		if (retval > 0) {
 			*ppos = pos + retval;
-			count -= retval;
-			/*
-			 * If we did a short DIO read we need to skip the
-			 * section of the iov that we've already read data into.
-			 */
-			iov_iter_advance(&i, retval);
+			iov_iter_advance(iter, retval);
 		}
 
 		/*
@@ -1765,17 +1711,17 @@
 		 * and return.  Otherwise fallthrough to buffered io for
 		 * the rest of the read.
 		 */
-		if (retval < 0 || !count || *ppos >= size) {
-			file_accessed(filp);
+		if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+			file_accessed(file);
 			goto out;
 		}
 	}
 
-	retval = do_generic_file_read(filp, ppos, &i, retval);
+	retval = do_generic_file_read(file, ppos, iter, retval);
 out:
 	return retval;
 }
-EXPORT_SYMBOL(generic_file_aio_read);
+EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
 /**
@@ -2386,9 +2332,7 @@
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long *nr_segs, loff_t pos,
-		size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 {
 	struct file	*file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -2396,11 +2340,9 @@
 	ssize_t		written;
 	size_t		write_len;
 	pgoff_t		end;
+	struct iov_iter data;
 
-	if (count != ocount)
-		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
-
-	write_len = iov_length(iov, *nr_segs);
+	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2427,7 +2369,8 @@
 		}
 	}
 
-	written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+	data = *from;
+	written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
@@ -2444,6 +2387,7 @@
 
 	if (written > 0) {
 		pos += written;
+		iov_iter_advance(from, written);
 		if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
 			i_size_write(inode, pos);
 			mark_inode_dirty(inode);
@@ -2568,10 +2512,9 @@
 EXPORT_SYMBOL(generic_perform_write);
 
 /**
- * __generic_file_aio_write - write data to a file
+ * __generic_file_write_iter - write data to a file
  * @iocb:	IO state structure (file, offset, etc.)
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
+ * @from:	iov_iter with data to write
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -2585,26 +2528,16 @@
  * A caller has to handle it. This is mainly due to the fact that we want to
  * avoid syncing under i_mutex.
  */
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs)
+ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
-	size_t ocount;		/* original count */
-	size_t count;		/* after file limit checks */
 	struct inode 	*inode = mapping->host;
 	loff_t		pos = iocb->ki_pos;
 	ssize_t		written = 0;
 	ssize_t		err;
 	ssize_t		status;
-	struct iov_iter from;
-
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
+	size_t		count = iov_iter_count(from);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2615,6 +2548,8 @@
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(from, count);
+
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -2623,17 +2558,13 @@
 	if (err)
 		goto out;
 
-	iov_iter_init(&from, iov, nr_segs, count, 0);
-
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
-							count, ocount);
+		written = generic_file_direct_write(iocb, from, pos);
 		if (written < 0 || written == count)
 			goto out;
-		iov_iter_advance(&from, written);
 
 		/*
 		 * direct-io write to a hole: fall through to buffered I/O
@@ -2642,7 +2573,7 @@
 		pos += written;
 		count -= written;
 
-		status = generic_perform_write(file, &from, pos);
+		status = generic_perform_write(file, from, pos);
 		/*
 		 * If generic_perform_write() returned a synchronous error
 		 * then we want to return the number of bytes which were
@@ -2674,7 +2605,7 @@
 			 */
 		}
 	} else {
-		written = generic_perform_write(file, &from, pos);
+		written = generic_perform_write(file, from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
 	}
@@ -2682,30 +2613,25 @@
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
-EXPORT_SYMBOL(__generic_file_aio_write);
+EXPORT_SYMBOL(__generic_file_write_iter);
 
 /**
- * generic_file_aio_write - write data to a file
+ * generic_file_write_iter - write data to a file
  * @iocb:	IO state structure
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
- * @pos:	position in file where to write
+ * @from:	iov_iter with data to write
  *
- * This is a wrapper around __generic_file_aio_write() to be used by most
+ * This is a wrapper around __generic_file_write_iter() to be used by most
  * filesystems. It takes care of syncing the file in case of O_SYNC file
  * and acquires i_mutex as needed.
  */
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0) {
@@ -2717,7 +2643,7 @@
 	}
 	return ret;
 }
-EXPORT_SYMBOL(generic_file_aio_write);
+EXPORT_SYMBOL(generic_file_write_iter);
 
 /**
  * try_to_release_page() - release old fs-specific metadata on a page

diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 595d7fd..493f758 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c

@@ -52,7 +52,7 @@
 static inline struct hugetlb_cgroup *
 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
 {
-	return hugetlb_cgroup_from_css(css_parent(&h_cg->css));
+	return hugetlb_cgroup_from_css(h_cg->css.parent);
 }
 
 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
@@ -181,7 +181,7 @@
 again:
 	rcu_read_lock();
 	h_cg = hugetlb_cgroup_from_task(current);
-	if (!css_tryget(&h_cg->css)) {
+	if (!css_tryget_online(&h_cg->css)) {
 		rcu_read_unlock();
 		goto again;
 	}
@@ -253,15 +253,16 @@
 	return res_counter_read_u64(&h_cg->hugepage[idx], name);
 }
 
-static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buffer)
+static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret;
 	unsigned long long val;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -271,7 +272,7 @@
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
@@ -280,17 +281,17 @@
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
-static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
-				unsigned int event)
+static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret = 0;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(event);
-	name = MEMFILE_ATTR(event);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -303,7 +304,7 @@
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static char *mem_fmt(char *buf, int size, unsigned long hsize)
@@ -331,7 +332,7 @@
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 	cft->read_u64 = hugetlb_cgroup_read_u64;
-	cft->write_string = hugetlb_cgroup_write;
+	cft->write = hugetlb_cgroup_write;
 
 	/* Add the usage file */
 	cft = &h->cgroup_files[1];
@@ -343,14 +344,14 @@
 	cft = &h->cgroup_files[2];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
-	cft->trigger = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* Add the failcntfile */
 	cft = &h->cgroup_files[3];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 	cft->private  = MEMFILE_PRIVATE(idx, RES_FAILCNT);
-	cft->trigger  = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* NULL terminate the last cft */

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 10e46cd..7b5dbd1 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c

@@ -1,8 +1,10 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -72,13 +74,97 @@
 	}
 	kunmap(page);
 done:
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
 	i->count -= wanted - bytes;
 	i->nr_segs -= iov - i->iov;
 	i->iov = iov;
 	i->iov_offset = skip;
 	return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_to_iter);
+
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	if (!fault_in_pages_readable(buf, copy)) {
+		kaddr = kmap_atomic(page);
+		to = kaddr + offset;
+
+		/* first chunk, usually the only one */
+		left = __copy_from_user_inatomic(to, buf, copy);
+		copy -= left;
+		skip += copy;
+		to += copy;
+		bytes -= copy;
+
+		while (unlikely(!left && bytes)) {
+			iov++;
+			buf = iov->iov_base;
+			copy = min(bytes, iov->iov_len);
+			left = __copy_from_user_inatomic(to, buf, copy);
+			copy -= left;
+			skip = copy;
+			to += copy;
+			bytes -= copy;
+		}
+		if (likely(!bytes)) {
+			kunmap_atomic(kaddr);
+			goto done;
+		}
+		offset = to - kaddr;
+		buf += copy;
+		kunmap_atomic(kaddr);
+		copy = min(bytes, iov->iov_len - skip);
+	}
+	/* Too bad - revert to non-atomic kmap */
+	kaddr = kmap(page);
+	to = kaddr + offset;
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap(page);
+done:
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
@@ -107,7 +193,7 @@
  * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t copy_from_user_atomic_iovec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
 	char *kaddr;
@@ -127,36 +213,8 @@
 
 	return copied;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-/*
- * This has the same sideeffects and return value as
- * iov_iter_copy_from_user_atomic().
- * The difference is that it attempts to resolve faults.
- * Page must not be locked.
- */
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr;
-	size_t copied;
-
-	kaddr = kmap(page);
-	if (likely(i->nr_segs == 1)) {
-		int left;
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		left = __copy_from_user(kaddr + offset, buf, bytes);
-		copied = bytes - left;
-	} else {
-		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-						i->iov, i->iov_offset, bytes);
-	}
-	kunmap(page);
-	return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user);
-
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
+static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
 
@@ -191,7 +249,6 @@
 		i->nr_segs = nr_segs;
 	}
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
@@ -204,21 +261,483 @@
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-	char __user *buf = i->iov->iov_base + i->iov_offset;
-	bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-	return fault_in_pages_readable(buf, bytes);
+	if (!(i->type & ITER_BVEC)) {
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+		return fault_in_pages_readable(buf, bytes);
+	}
+	return 0;
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
+static unsigned long alignment_iovec(const struct iov_iter *i)
+{
+	const struct iovec *iov = i->iov;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = (unsigned long)iov->iov_base + i->iov_offset;
+	n = iov->iov_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++iov)->iov_len) {
+		res |= (unsigned long)iov->iov_base | iov->iov_len;
+		size -= iov->iov_len;
+	}
+	res |= (unsigned long)iov->iov_base | size;
+	return res;
+}
+
+void iov_iter_init(struct iov_iter *i, int direction,
+			const struct iovec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	/* It will get better.  Eventually... */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		direction |= ITER_KVEC;
+	i->type = direction;
+	i->iov = iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_init);
+
+static ssize_t get_pages_iovec(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+	if (unlikely(res < 0))
+		return res;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+
+static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	void *p;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	
+	p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+	if (!p)
+		p = vmalloc(n * sizeof(struct page *));
+	if (!p)
+		return -ENOMEM;
+
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+	if (unlikely(res < 0)) {
+		kvfree(p);
+		return res;
+	}
+	*pages = p;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct iovec *iov = i->iov;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, iov++) {
+		unsigned long addr = (unsigned long)iov->iov_base + offset;
+		size_t len = iov->iov_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
+			  - addr / PAGE_SIZE;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+	char *from = kmap_atomic(page);
+	memcpy(to, from + offset, len);
+	kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+	char *to = kmap_atomic(page);
+	memcpy(to + offset, from, len);
+	kunmap_atomic(to);
+}
+
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *from;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	kaddr = kmap_atomic(page);
+	from = kaddr + offset;
+	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+
+	to = kaddr + offset;
+
+	copy = min(bytes, bvec->bv_len - skip);
+
+	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
+
+	to += copy;
+	skip += copy;
+	bytes -= copy;
+
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted;
+}
+
+static size_t copy_from_user_bvec(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr;
+	size_t left;
+	const struct bio_vec *bvec;
+	size_t base = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+	for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
+		size_t copy = min(left, bvec->bv_len - base);
+		if (!bvec->bv_len)
+			continue;
+		memcpy_from_page(kaddr + offset, bvec->bv_page,
+				 bvec->bv_offset + base, copy);
+		offset += copy;
+		left -= copy;
+	}
+	kunmap_atomic(kaddr);
+	return bytes;
+}
+
+static void advance_bvec(struct iov_iter *i, size_t bytes)
+{
+	BUG_ON(i->count < bytes);
+
+	if (likely(i->nr_segs == 1)) {
+		i->iov_offset += bytes;
+		i->count -= bytes;
+	} else {
+		const struct bio_vec *bvec = i->bvec;
+		size_t base = i->iov_offset;
+		unsigned long nr_segs = i->nr_segs;
+
+		/*
+		 * The !iov->iov_len check ensures we skip over unlikely
+		 * zero-length segments (without overruning the iovec).
+		 */
+		while (bytes || unlikely(i->count && !bvec->bv_len)) {
+			int copy;
+
+			copy = min(bytes, bvec->bv_len - base);
+			BUG_ON(!i->count || i->count < copy);
+			i->count -= copy;
+			bytes -= copy;
+			base += copy;
+			if (bvec->bv_len == base) {
+				bvec++;
+				nr_segs--;
+				base = 0;
+			}
+		}
+		i->bvec = bvec;
+		i->iov_offset = base;
+		i->nr_segs = nr_segs;
+	}
+}
+
+static unsigned long alignment_bvec(const struct iov_iter *i)
+{
+	const struct bio_vec *bvec = i->bvec;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = bvec->bv_offset + i->iov_offset;
+	n = bvec->bv_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++bvec)->bv_len) {
+		res |= bvec->bv_offset | bvec->bv_len;
+		size -= bvec->bv_len;
+	}
+	res |= bvec->bv_offset | size;
+	return res;
+}
+
+static ssize_t get_pages_bvec(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	get_page(*pages = bvec->bv_page);
+
+	return len;
+}
+
+static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	*pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!*pages)
+		return -ENOMEM;
+
+	get_page(**pages = bvec->bv_page);
+
+	return len;
+}
+
+static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct bio_vec *bvec = i->bvec;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+		size_t len = bvec->bv_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages++;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_to_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_from_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_user_bvec(page, i, offset, bytes);
+	else
+		return copy_from_user_atomic_iovec(page, i, offset, bytes);
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+	if (i->type & ITER_BVEC)
+		advance_bvec(i, size);
+	else
+		advance_iovec(i, size);
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
 /*
  * Return the count of just the current iov_iter segment.
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
-	const struct iovec *iov = i->iov;
 	if (i->nr_segs == 1)
 		return i->count;
+	else if (i->type & ITER_BVEC)
+		return min(i->count, i->iov->iov_len - i->iov_offset);
 	else
-		return min(i->count, iov->iov_len - i->iov_offset);
+		return min(i->count, i->bvec->bv_len - i->iov_offset);
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return alignment_bvec(i);
+	else
+		return alignment_iovec(i);
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_alloc_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_alloc_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	if (i->type & ITER_BVEC)
+		return iov_iter_npages_bvec(i, maxpages);
+	else
+		return iov_iter_npages_iovec(i, maxpages);
+}
+EXPORT_SYMBOL(iov_iter_npages);

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a9559b9..a2c7bcb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c

@@ -526,18 +526,14 @@
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	/*
-	 * The ID of the root cgroup is 0, but memcg treat 0 as an
-	 * invalid ID, so we return (cgroup_id + 1).
-	 */
-	return memcg->css.cgroup->id + 1;
+	return memcg->css.id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	struct cgroup_subsys_state *css;
 
-	css = css_from_id(id - 1, &memory_cgrp_subsys);
+	css = css_from_id(id, &memory_cgrp_subsys);
 	return mem_cgroup_from_css(css);
 }
 
@@ -570,7 +566,8 @@
 		memcg = mem_cgroup_from_task(current);
 		cg_proto = sk->sk_prot->proto_cgroup(memcg);
 		if (!mem_cgroup_is_root(memcg) &&
-		    memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) {
+		    memcg_proto_active(cg_proto) &&
+		    css_tryget_online(&memcg->css)) {
 			sk->sk_cgrp = cg_proto;
 		}
 		rcu_read_unlock();
@@ -831,7 +828,7 @@
 	 */
 	__mem_cgroup_remove_exceeded(mz, mctz);
 	if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-		!css_tryget(&mz->memcg->css))
+	    !css_tryget_online(&mz->memcg->css))
 		goto retry;
 done:
 	return mz;
@@ -1073,7 +1070,7 @@
 			if (unlikely(!memcg))
 				memcg = root_mem_cgroup;
 		}
-	} while (!css_tryget(&memcg->css));
+	} while (!css_tryget_online(&memcg->css));
 	rcu_read_unlock();
 	return memcg;
 }
@@ -1110,7 +1107,8 @@
 	 */
 	if (next_css) {
 		if ((next_css == &root->css) ||
-		    ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
+		    ((next_css->flags & CSS_ONLINE) &&
+		     css_tryget_online(next_css)))
 			return mem_cgroup_from_css(next_css);
 
 		prev_css = next_css;
@@ -1156,7 +1154,7 @@
 		 * would be returned all the time.
 		 */
 		if (position && position != root &&
-				!css_tryget(&position->css))
+		    !css_tryget_online(&position->css))
 			position = NULL;
 	}
 	return position;
@@ -1533,7 +1531,7 @@
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
 	/* root ? */
-	if (mem_cgroup_disabled() || !css_parent(&memcg->css))
+	if (mem_cgroup_disabled() || !memcg->css.parent)
 		return vm_swappiness;
 
 	return memcg->swappiness;
@@ -2769,9 +2767,9 @@
 
 /*
  * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock().  The caller is responsible for calling css_tryget if
- * the mem_cgroup is used for charging. (dropping refcnt from swap can be
- * called against removed memcg.)
+ * rcu_read_lock().  The caller is responsible for calling
+ * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+ * refcnt from swap can be called against removed memcg.)
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
@@ -2794,14 +2792,14 @@
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		memcg = pc->mem_cgroup;
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 		rcu_read_unlock();
 	}
@@ -3365,7 +3363,7 @@
 	}
 
 	/* The corresponding put will be done in the workqueue. */
-	if (!css_tryget(&memcg->css))
+	if (!css_tryget_online(&memcg->css))
 		goto out;
 	rcu_read_unlock();
 
@@ -4125,8 +4123,8 @@
 	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
 		/*
-		 * We uncharge this because swap is freed.
-		 * This memcg can be obsolete one. We avoid calling css_tryget
+		 * We uncharge this because swap is freed.  This memcg can
+		 * be obsolete one. We avoid calling css_tryget_online().
 		 */
 		if (!mem_cgroup_is_root(memcg))
 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
@@ -4711,18 +4709,28 @@
 	} while (usage > 0);
 }
 
+/*
+ * Test whether @memcg has children, dead or alive.  Note that this
+ * function doesn't care whether @memcg has use_hierarchy enabled and
+ * returns %true if there are child csses according to the cgroup
+ * hierarchy.  Testing use_hierarchy is the caller's responsiblity.
+ */
 static inline bool memcg_has_children(struct mem_cgroup *memcg)
 {
-	lockdep_assert_held(&memcg_create_mutex);
+	bool ret;
+
 	/*
-	 * The lock does not prevent addition or deletion to the list
-	 * of children, but it prevents a new child from being
-	 * initialized based on this parent in css_online(), so it's
-	 * enough to decide whether hierarchically inherited
-	 * attributes can still be changed or not.
+	 * The lock does not prevent addition or deletion of children, but
+	 * it prevents a new child from being initialized based on this
+	 * parent in css_online(), so it's enough to decide whether
+	 * hierarchically inherited attributes can still be changed or not.
 	 */
-	return memcg->use_hierarchy &&
-		!list_empty(&memcg->css.cgroup->children);
+	lockdep_assert_held(&memcg_create_mutex);
+
+	rcu_read_lock();
+	ret = css_next_child(NULL, &memcg->css);
+	rcu_read_unlock();
+	return ret;
 }
 
 /*
@@ -4734,11 +4742,6 @@
 static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 {
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct cgroup *cgrp = memcg->css.cgroup;
-
-	/* returns EBUSY if there is a task or if we come here twice. */
-	if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children))
-		return -EBUSY;
 
 	/* we call try-to-free pages for make this cgroup empty */
 	lru_add_drain_all();
@@ -4758,20 +4761,19 @@
 		}
 
 	}
-	lru_add_drain();
-	mem_cgroup_reparent_charges(memcg);
 
 	return 0;
 }
 
-static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
-					unsigned int event)
+static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 
 	if (mem_cgroup_is_root(memcg))
 		return -EINVAL;
-	return mem_cgroup_force_empty(memcg);
+	return mem_cgroup_force_empty(memcg) ?: nbytes;
 }
 
 static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
@@ -4785,7 +4787,7 @@
 {
 	int retval = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
 
 	mutex_lock(&memcg_create_mutex);
 
@@ -4802,7 +4804,7 @@
 	 */
 	if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
 				(val == 1 || val == 0)) {
-		if (list_empty(&memcg->css.cgroup->children))
+		if (!memcg_has_children(memcg))
 			memcg->use_hierarchy = val;
 		else
 			retval = -EBUSY;
@@ -4919,7 +4921,8 @@
 	 * of course permitted.
 	 */
 	mutex_lock(&memcg_create_mutex);
-	if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg))
+	if (cgroup_has_tasks(memcg->css.cgroup) ||
+	    (memcg->use_hierarchy && memcg_has_children(memcg)))
 		err = -EBUSY;
 	mutex_unlock(&memcg_create_mutex);
 	if (err)
@@ -5021,17 +5024,18 @@
  * The user of this function is...
  * RES_LIMIT.
  */
-static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	enum res_type type;
 	int name;
 	unsigned long long val;
 	int ret;
 
-	type = MEMFILE_TYPE(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -5040,7 +5044,7 @@
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		if (type == _MEM)
@@ -5053,7 +5057,7 @@
 			return -EINVAL;
 		break;
 	case RES_SOFT_LIMIT:
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		/*
@@ -5070,7 +5074,7 @@
 		ret = -EINVAL; /* should be BUG() ? */
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
@@ -5083,8 +5087,8 @@
 	if (!memcg->use_hierarchy)
 		goto out;
 
-	while (css_parent(&memcg->css)) {
-		memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	while (memcg->css.parent) {
+		memcg = mem_cgroup_from_css(memcg->css.parent);
 		if (!memcg->use_hierarchy)
 			break;
 		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5097,14 +5101,15 @@
 	*memsw_limit = min_memsw_limit;
 }
 
-static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	int name;
 	enum res_type type;
 
-	type = MEMFILE_TYPE(event);
-	name = MEMFILE_ATTR(event);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -5129,7 +5134,7 @@
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
@@ -5322,7 +5327,7 @@
 	if (val > 100)
 		return -EINVAL;
 
-	if (css_parent(css))
+	if (css->parent)
 		memcg->swappiness = val;
 	else
 		vm_swappiness = val;
@@ -5659,7 +5664,7 @@
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	/* cannot set to root cgroup and only 0 and 1 are allowed */
-	if (!css_parent(css) || !((val == 0) || (val == 1)))
+	if (!css->parent || !((val == 0) || (val == 1)))
 		return -EINVAL;
 
 	memcg->oom_kill_disable = val;
@@ -5705,10 +5710,10 @@
 	 * which is then paired with css_put during uncharge resp. here.
 	 *
 	 * Although this might sound strange as this path is called from
-	 * css_offline() when the referencemight have dropped down to 0
-	 * and shouldn't be incremented anymore (css_tryget would fail)
-	 * we do not have other options because of the kmem allocations
-	 * lifetime.
+	 * css_offline() when the referencemight have dropped down to 0 and
+	 * shouldn't be incremented anymore (css_tryget_online() would
+	 * fail) we do not have other options because of the kmem
+	 * allocations lifetime.
 	 */
 	css_get(&memcg->css);
 
@@ -5827,9 +5832,10 @@
  * Input must be in format '<event_fd> <control_fd> <args>'.
  * Interpretation of args is defined by control file implementation.
  */
-static int memcg_write_event_control(struct cgroup_subsys_state *css,
-				     struct cftype *cft, char *buffer)
+static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+					 char *buf, size_t nbytes, loff_t off)
 {
+	struct cgroup_subsys_state *css = of_css(of);
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup_event *event;
 	struct cgroup_subsys_state *cfile_css;
@@ -5840,15 +5846,17 @@
 	char *endp;
 	int ret;
 
-	efd = simple_strtoul(buffer, &endp, 10);
+	buf = strstrip(buf);
+
+	efd = simple_strtoul(buf, &endp, 10);
 	if (*endp != ' ')
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
-	cfd = simple_strtoul(buffer, &endp, 10);
+	cfd = simple_strtoul(buf, &endp, 10);
 	if ((*endp != ' ') && (*endp != '\0'))
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (!event)
@@ -5916,8 +5924,8 @@
 	 * automatically removed on cgroup destruction but the removal is
 	 * asynchronous, so take an extra ref on @css.
 	 */
-	cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent,
-					&memory_cgrp_subsys);
+	cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+					       &memory_cgrp_subsys);
 	ret = -EINVAL;
 	if (IS_ERR(cfile_css))
 		goto out_put_cfile;
@@ -5926,7 +5934,7 @@
 		goto out_put_cfile;
 	}
 
-	ret = event->register_event(memcg, event->eventfd, buffer);
+	ret = event->register_event(memcg, event->eventfd, buf);
 	if (ret)
 		goto out_put_css;
 
@@ -5939,7 +5947,7 @@
 	fdput(cfile);
 	fdput(efile);
 
-	return 0;
+	return nbytes;
 
 out_put_css:
 	css_put(css);
@@ -5964,25 +5972,25 @@
 	{
 		.name = "max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "soft_limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "failcnt",
 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -5991,7 +5999,7 @@
 	},
 	{
 		.name = "force_empty",
-		.trigger = mem_cgroup_force_empty_write,
+		.write = mem_cgroup_force_empty_write,
 	},
 	{
 		.name = "use_hierarchy",
@@ -6001,7 +6009,7 @@
 	},
 	{
 		.name = "cgroup.event_control",		/* XXX: for compat */
-		.write_string = memcg_write_event_control,
+		.write = memcg_write_event_control,
 		.flags = CFTYPE_NO_PREFIX,
 		.mode = S_IWUGO,
 	},
@@ -6034,7 +6042,7 @@
 	{
 		.name = "kmem.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6045,13 +6053,13 @@
 	{
 		.name = "kmem.failcnt",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "kmem.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 #ifdef CONFIG_SLABINFO
@@ -6074,19 +6082,19 @@
 	{
 		.name = "memsw.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "memsw.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "memsw.failcnt",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{ },	/* terminate */
@@ -6264,9 +6272,9 @@
 mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
 
-	if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+	if (css->id > MEM_CGROUP_ID_MAX)
 		return -ENOSPC;
 
 	if (!parent)
@@ -6361,7 +6369,7 @@
 	/*
 	 * XXX: css_offline() would be where we should reparent all
 	 * memory to prepare the cgroup for destruction.  However,
-	 * memcg does not do css_tryget() and res_counter charging
+	 * memcg does not do css_tryget_online() and res_counter charging
 	 * under the same RCU lock region, which means that charging
 	 * could race with offlining.  Offlining only happens to
 	 * cgroups with no tasks in them but charges can show up
@@ -6375,9 +6383,9 @@
 	 *                           lookup_swap_cgroup_id()
 	 *                           rcu_read_lock()
 	 *                           mem_cgroup_lookup()
-	 *                           css_tryget()
+	 *                           css_tryget_online()
 	 *                           rcu_read_unlock()
-	 * disable css_tryget()
+	 * disable css_tryget_online()
 	 * call_rcu()
 	 *   offline_css()
 	 *     reparent_charges()

diff --git a/mm/page_io.c b/mm/page_io.c
index 58b50d2..955db8b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -264,10 +264,18 @@
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
-		struct iovec iov = {
-			.iov_base = kmap(page),
-			.iov_len  = PAGE_SIZE,
+		struct bio_vec bv = {
+			.bv_page = page,
+			.bv_len  = PAGE_SIZE,
+			.bv_offset = 0
 		};
+		struct iov_iter from = {
+			.type = ITER_BVEC | WRITE,
+			.count = PAGE_SIZE,
+			.iov_offset = 0,
+			.nr_segs = 1,
+		};
+		from.bvec = &bv;	/* older gcc versions are broken */
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
@@ -275,10 +283,9 @@
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
-						&kiocb, &iov,
-						kiocb.ki_pos, 1);
-		kunmap(page);
+		ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
+						&kiocb, &from,
+						kiocb.ki_pos);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
 			ret = 0;

diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 8505c92..5077afc 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c

@@ -46,11 +46,7 @@
 			copy = len;
 
 		if (vm_write) {
-			if (copy > iov_iter_count(iter))
-				copy = iov_iter_count(iter);
-			copied = iov_iter_copy_from_user(page, iter,
-					offset, copy);
-			iov_iter_advance(iter, copied);
+			copied = copy_page_from_iter(page, offset, copy, iter);
 			set_page_dirty_lock(page);
 		} else {
 			copied = copy_page_to_iter(page, offset, copy, iter);
@@ -278,7 +274,7 @@
 	if (rc <= 0)
 		goto free_iovecs;
 
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 
 	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
 				   iovstack_r, &iov_r);
@@ -341,7 +337,7 @@
 						  &iov_l);
 	if (rc <= 0)
 		goto free_iovecs;
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
 					  &iov_r);

diff --git a/mm/shmem.c b/mm/shmem.c
index 5402481..f484c27 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -1406,8 +1406,7 @@
 	return copied;
 }
 
-static ssize_t shmem_file_aio_read(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
+static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
@@ -1416,15 +1415,8 @@
 	unsigned long offset;
 	enum sgp_type sgp = SGP_READ;
 	int error = 0;
-	ssize_t retval;
-	size_t count;
+	ssize_t retval = 0;
 	loff_t *ppos = &iocb->ki_pos;
-	struct iov_iter iter;
-
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
 
 	/*
 	 * Might this read be for a stacking filesystem?  Then when reading
@@ -1500,14 +1492,14 @@
 		 * Ok, we have the page, and it's up-to-date, so
 		 * now we can copy it to user space...
 		 */
-		ret = copy_page_to_iter(page, offset, nr, &iter);
+		ret = copy_page_to_iter(page, offset, nr, to);
 		retval += ret;
 		offset += ret;
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
 
 		page_cache_release(page);
-		if (!iov_iter_count(&iter))
+		if (!iov_iter_count(to))
 			break;
 		if (ret < nr) {
 			error = -EFAULT;
@@ -2629,13 +2621,13 @@
 	.mmap		= shmem_mmap,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= shmem_file_aio_read,
-	.aio_write	= generic_file_aio_write,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= shmem_file_read_iter,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= shmem_fallocate,
 #endif
 };

diff --git a/mm/vmscan.c b/mm/vmscan.c
index e01ded3..0f16ffe 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -464,7 +464,7 @@
 	 * stalls if we need to run get_block().  We could test
 	 * PagePrivate for that.
 	 *
-	 * If this process is currently in __generic_file_aio_write() against
+	 * If this process is currently in __generic_file_write_iter() against
 	 * this page's queue, we can perform writeback even if that
 	 * will block.
 	 *

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 3c32bd2..9012b1c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c

@@ -63,7 +63,7 @@
 }
 
 /* Must be invoked with rcu_read_lock. */
-struct net_device *__vlan_find_dev_deep(struct net_device *dev,
+struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
 					__be16 vlan_proto, u16 vlan_id)
 {
 	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
@@ -81,13 +81,13 @@
 
 		upper_dev = netdev_master_upper_dev_get_rcu(dev);
 		if (upper_dev)
-			return __vlan_find_dev_deep(upper_dev,
+			return __vlan_find_dev_deep_rcu(upper_dev,
 						    vlan_proto, vlan_id);
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(__vlan_find_dev_deep);
+EXPORT_SYMBOL(__vlan_find_dev_deep_rcu);
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 019efb7..ad2ac3c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c

@@ -643,9 +643,9 @@
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	netdev_features_t old_features = features;
 
-	features &= real_dev->vlan_features;
+	features = netdev_intersect_features(features, real_dev->vlan_features);
 	features |= NETIF_F_RXCSUM;
-	features &= real_dev->features;
+	features = netdev_intersect_features(features, real_dev->features);
 
 	features |= old_features & NETIF_F_SOFT_FEATURES;
 	features |= NETIF_F_LLTX;
@@ -671,38 +671,36 @@
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
+	struct vlan_pcpu_stats *p;
+	u32 rx_errors = 0, tx_dropped = 0;
+	int i;
 
-	if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
-		struct vlan_pcpu_stats *p;
-		u32 rx_errors = 0, tx_dropped = 0;
-		int i;
+	for_each_possible_cpu(i) {
+		u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
+		unsigned int start;
 
-		for_each_possible_cpu(i) {
-			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
-			unsigned int start;
+		p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&p->syncp);
+			rxpackets	= p->rx_packets;
+			rxbytes		= p->rx_bytes;
+			rxmulticast	= p->rx_multicast;
+			txpackets	= p->tx_packets;
+			txbytes		= p->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&p->syncp, start));
 
-			p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
-			do {
-				start = u64_stats_fetch_begin_irq(&p->syncp);
-				rxpackets	= p->rx_packets;
-				rxbytes		= p->rx_bytes;
-				rxmulticast	= p->rx_multicast;
-				txpackets	= p->tx_packets;
-				txbytes		= p->tx_bytes;
-			} while (u64_stats_fetch_retry_irq(&p->syncp, start));
-
-			stats->rx_packets	+= rxpackets;
-			stats->rx_bytes		+= rxbytes;
-			stats->multicast	+= rxmulticast;
-			stats->tx_packets	+= txpackets;
-			stats->tx_bytes		+= txbytes;
-			/* rx_errors & tx_dropped are u32 */
-			rx_errors	+= p->rx_errors;
-			tx_dropped	+= p->tx_dropped;
-		}
-		stats->rx_errors  = rx_errors;
-		stats->tx_dropped = tx_dropped;
+		stats->rx_packets	+= rxpackets;
+		stats->rx_bytes		+= rxbytes;
+		stats->multicast	+= rxmulticast;
+		stats->tx_packets	+= txpackets;
+		stats->tx_bytes		+= txbytes;
+		/* rx_errors & tx_dropped are u32 */
+		rx_errors	+= p->rx_errors;
+		tx_dropped	+= p->tx_dropped;
 	}
+	stats->rx_errors  = rx_errors;
+	stats->tx_dropped = tx_dropped;
+
 	return stats;
 }
 

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 786ee2f..01a1082 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c

@@ -1669,7 +1669,7 @@
 		goto out;
 	}
 
-	if (sk->sk_no_check == 1)
+	if (sk->sk_no_check_tx)
 		ddp->deh_sum = 0;
 	else
 		ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));

diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1281049..d8e5d0c2 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c

@@ -263,17 +263,11 @@
 			goto out;
 		}
 	}
-/*
- * Not supported yet
- *
- * #ifndef CONFIG_SINGLE_SIGITF
- */
+
 	vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp);
 	vcc->qos.txtp.pcr = 0;
 	vcc->qos.txtp.min_pcr = 0;
-/*
- * #endif
- */
+
 	error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci);
 	if (!error)
 		sock->state = SS_CONNECTED;

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index b758881..a12e25e 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c

@@ -245,6 +245,7 @@
 static int batadv_originators_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_orig_seq_print_text, net_dev);
 }
 
@@ -258,18 +259,21 @@
 					  struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_orig_hardif_seq_print_text, net_dev);
 }
 
 static int batadv_gateways_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_gw_client_seq_print_text, net_dev);
 }
 
 static int batadv_transtable_global_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_tt_global_seq_print_text, net_dev);
 }
 
@@ -277,6 +281,7 @@
 static int batadv_bla_claim_table_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_bla_claim_table_seq_print_text,
 			   net_dev);
 }
@@ -285,6 +290,7 @@
 					  struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_bla_backbone_table_seq_print_text,
 			   net_dev);
 }
@@ -300,6 +306,7 @@
 static int batadv_dat_cache_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
 }
 #endif
@@ -307,6 +314,7 @@
 static int batadv_transtable_local_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_tt_local_seq_print_text, net_dev);
 }
 
@@ -319,6 +327,7 @@
 static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
 	return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
 }
 #endif
@@ -333,7 +342,7 @@
 		  .llseek = seq_lseek,			\
 		  .release = single_release,		\
 		}					\
-};
+}
 
 /* the following attributes are general and therefore they will be directly
  * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
@@ -395,7 +404,7 @@
 		.llseek = seq_lseek,				\
 		.release = single_release,			\
 	},							\
-};
+}
 static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO,
 			       batadv_originators_hardif_open);
 

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index aa5d494..f2c066b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c

@@ -594,7 +594,7 @@
 		if (!neigh_node)
 			goto free_orig;
 
-		tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+		tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 		if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
 							   cand[i].orig_node,
 							   packet_subtype)) {
@@ -662,6 +662,7 @@
 void batadv_dat_status_update(struct net_device *net_dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
 	batadv_dat_tvlv_container_update(bat_priv);
 }
 

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 770dc89..118b990 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h

@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2014.2.0"
+#define BATADV_SOURCE_VERSION "2014.3.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */

diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index a9546fe..8d04d17 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c

@@ -86,6 +86,7 @@
 void batadv_nc_status_update(struct net_device *net_dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
 	batadv_nc_tvlv_container_update(bat_priv);
 }
 
@@ -1343,7 +1344,7 @@
 	struct ethhdr *ethhdr;
 
 	/* Copy skb header to change the mac header */
-	skb = pskb_copy(skb, GFP_ATOMIC);
+	skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 744a59b..e7ee65d 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c

@@ -884,7 +884,7 @@
 	/* generate random address */
 	eth_hw_addr_random(dev);
 
-	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+	dev->ethtool_ops = &batadv_ethtool_ops;
 
 	memset(priv, 0, sizeof(*priv));
 }

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 1ebb0d9..fc47baa 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c

@@ -29,12 +29,14 @@
 static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
 {
 	struct device *dev = container_of(obj->parent, struct device, kobj);
+
 	return to_net_dev(dev);
 }
 
 static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
 {
 	struct net_device *net_dev = batadv_kobj_to_netdev(obj);
+
 	return netdev_priv(net_dev);
 }
 
@@ -106,7 +108,7 @@
 		 .mode = _mode },			\
 	.show   = _show,				\
 	.store  = _store,				\
-};
+}
 
 /* Use this, if you have customized show and store functions */
 #define BATADV_ATTR(_name, _mode, _show, _store)	\
@@ -115,7 +117,7 @@
 		 .mode = _mode },			\
 	.show   = _show,				\
 	.store  = _store,				\
-};
+}
 
 #define BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func)			\
 ssize_t batadv_store_##_name(struct kobject *kobj,			\
@@ -124,6 +126,7 @@
 {									\
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);	\
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);		\
+									\
 	return __batadv_store_bool_attr(buff, count, _post_func, attr,	\
 					&bat_priv->_name, net_dev);	\
 }
@@ -133,6 +136,7 @@
 			    struct attribute *attr, char *buff)		\
 {									\
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);	\
+									\
 	return sprintf(buff, "%s\n",					\
 		       atomic_read(&bat_priv->_name) == 0 ?		\
 		       "disabled" : "enabled");				\
@@ -155,6 +159,7 @@
 {									\
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);	\
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);		\
+									\
 	return __batadv_store_uint_attr(buff, count, _min, _max,	\
 					_post_func, attr,		\
 					&bat_priv->_name, net_dev);	\
@@ -165,6 +170,7 @@
 			    struct attribute *attr, char *buff)		\
 {									\
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);	\
+									\
 	return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name));	\
 }									\
 
@@ -188,6 +194,7 @@
 	size_t res = __batadv_store_bool_attr(buff, count, _post_func,	\
 					      attr, &vlan->_name,	\
 					      bat_priv->soft_iface);	\
+									\
 	batadv_softif_vlan_free_ref(vlan);				\
 	return res;							\
 }
@@ -202,6 +209,7 @@
 	size_t res = sprintf(buff, "%s\n",				\
 			     atomic_read(&vlan->_name) == 0 ?		\
 			     "disabled" : "enabled");			\
+									\
 	batadv_softif_vlan_free_ref(vlan);				\
 	return res;							\
 }
@@ -324,12 +332,14 @@
 				    struct attribute *attr, char *buff)
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
 	return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
 }
 
 static void batadv_post_gw_reselect(struct net_device *net_dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
 	batadv_gw_reselect(bat_priv);
 }
 

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 73492b9..8796ffa 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c

@@ -420,12 +420,18 @@
 	return 0;
 }
 
-static void get_dest_bdaddr(struct in6_addr *ip6_daddr,
-			    bdaddr_t *addr, u8 *addr_type)
+static u8 get_addr_type_from_eui64(u8 byte)
 {
-	u8 *eui64;
+	/* Is universal(0) or local(1) bit,  */
+	if (byte & 0x02)
+		return ADDR_LE_DEV_RANDOM;
 
-	eui64 = ip6_daddr->s6_addr + 8;
+	return ADDR_LE_DEV_PUBLIC;
+}
+
+static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
+{
+	u8 *eui64 = ip6_daddr->s6_addr + 8;
 
 	addr->b[0] = eui64[7];
 	addr->b[1] = eui64[6];
@@ -433,16 +439,19 @@
 	addr->b[3] = eui64[2];
 	addr->b[4] = eui64[1];
 	addr->b[5] = eui64[0];
+}
 
-	addr->b[5] ^= 2;
+static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
+				bdaddr_t *addr, u8 *addr_type)
+{
+	copy_to_bdaddr(ip6_daddr, addr);
 
-	/* Set universal/local bit to 0 */
-	if (addr->b[5] & 1) {
-		addr->b[5] &= ~1;
-		*addr_type = ADDR_LE_DEV_PUBLIC;
-	} else {
-		*addr_type = ADDR_LE_DEV_RANDOM;
-	}
+	/* We need to toggle the U/L bit that we got from IPv6 address
+	 * so that we get the proper address and type of the BD address.
+	 */
+	addr->b[5] ^= 0x02;
+
+	*addr_type = get_addr_type_from_eui64(addr->b[5]);
 }
 
 static int header_create(struct sk_buff *skb, struct net_device *netdev,
@@ -473,9 +482,11 @@
 		/* Get destination BT device from skb.
 		 * If there is no such peer then discard the packet.
 		 */
-		get_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
+		convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
 
-		BT_DBG("dest addr %pMR type %d", &addr, addr_type);
+		BT_DBG("dest addr %pMR type %s IP %pI6c", &addr,
+		       addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+		       &hdr->daddr);
 
 		read_lock_irqsave(&devices_lock, flags);
 		peer = peer_lookup_ba(dev, &addr, addr_type);
@@ -556,7 +567,7 @@
 	} else {
 		unsigned long flags;
 
-		get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
+		convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
 		eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8;
 		dev = lowpan_dev(netdev);
 
@@ -564,8 +575,10 @@
 		peer = peer_lookup_ba(dev, &addr, addr_type);
 		read_unlock_irqrestore(&devices_lock, flags);
 
-		BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name,
-		       &addr, &lowpan_cb(skb)->addr, peer);
+		BT_DBG("xmit %s to %pMR type %s IP %pI6c peer %p",
+		       netdev->name, &addr,
+		       addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+		       &lowpan_cb(skb)->addr, peer);
 
 		if (peer && peer->conn)
 			err = send_pkt(peer->conn, netdev->dev_addr,
@@ -620,13 +633,13 @@
 	eui[6] = addr[1];
 	eui[7] = addr[0];
 
-	eui[0] ^= 2;
-
-	/* Universal/local bit set, RFC 4291 */
+	/* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
 	if (addr_type == ADDR_LE_DEV_PUBLIC)
-		eui[0] |= 1;
+		eui[0] &= ~0x02;
 	else
-		eui[0] &= ~1;
+		eui[0] |= 0x02;
+
+	BT_DBG("type %d addr %*phC", addr_type, 8, eui);
 }
 
 static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
@@ -634,7 +647,6 @@
 {
 	netdev->addr_assign_type = NET_ADDR_PERM;
 	set_addr(netdev->dev_addr, addr->b, addr_type);
-	netdev->dev_addr[0] ^= 2;
 }
 
 static void ifup(struct net_device *netdev)
@@ -684,13 +696,6 @@
 
 	memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
 	       EUI64_ADDR_LEN);
-	peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local)
-				   * is done according RFC2464
-				   */
-
-	raw_dump_inline(__func__, "peer IPv6 address",
-			(unsigned char *)&peer->peer_addr, 16);
-	raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8);
 
 	write_lock_irqsave(&devices_lock, flags);
 	INIT_LIST_HEAD(&peer->list);

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 521fd4f..8671bc7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c

@@ -28,6 +28,7 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
 
 #include "smp.h"
 #include "a2mp.h"
@@ -367,9 +368,23 @@
 {
 	struct hci_conn *conn = container_of(work, struct hci_conn,
 					     le_conn_timeout.work);
+	struct hci_dev *hdev = conn->hdev;
 
 	BT_DBG("");
 
+	/* We could end up here due to having done directed advertising,
+	 * so clean up the state if necessary. This should however only
+	 * happen with broken hardware or if low duty cycle was used
+	 * (which doesn't have a timeout of its own).
+	 */
+	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+		u8 enable = 0x00;
+		hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable),
+			     &enable);
+		hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+		return;
+	}
+
 	hci_le_create_connection_cancel(conn);
 }
 
@@ -393,6 +408,8 @@
 	conn->io_capability = hdev->io_capability;
 	conn->remote_auth = 0xff;
 	conn->key_type = 0xff;
+	conn->tx_power = HCI_TX_POWER_INVALID;
+	conn->max_tx_power = HCI_TX_POWER_INVALID;
 
 	set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -401,6 +418,10 @@
 	case ACL_LINK:
 		conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
 		break;
+	case LE_LINK:
+		/* conn->src should reflect the local identity address */
+		hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+		break;
 	case SCO_LINK:
 		if (lmp_esco_capable(hdev))
 			conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
@@ -545,6 +566,11 @@
 	 * favor of connection establishment, we should restart it.
 	 */
 	hci_update_background_scan(hdev);
+
+	/* Re-enable advertising in case this was a failed connection
+	 * attempt as a peripheral.
+	 */
+	mgmt_reenable_advertising(hdev);
 }
 
 static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
@@ -605,6 +631,45 @@
 	conn->state = BT_CONNECT;
 }
 
+static void hci_req_directed_advertising(struct hci_request *req,
+					 struct hci_conn *conn)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_cp_le_set_adv_param cp;
+	u8 own_addr_type;
+	u8 enable;
+
+	enable = 0x00;
+	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+	/* Clear the HCI_ADVERTISING bit temporarily so that the
+	 * hci_update_random_address knows that it's safe to go ahead
+	 * and write a new random address. The flag will be set back on
+	 * as soon as the SET_ADV_ENABLE HCI command completes.
+	 */
+	clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+
+	/* Set require_privacy to false so that the remote device has a
+	 * chance of identifying us.
+	 */
+	if (hci_update_random_address(req, false, &own_addr_type) < 0)
+		return;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.type = LE_ADV_DIRECT_IND;
+	cp.own_address_type = own_addr_type;
+	cp.direct_addr_type = conn->dst_type;
+	bacpy(&cp.direct_addr, &conn->dst);
+	cp.channel_map = hdev->le_adv_channel_map;
+
+	hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
+
+	enable = 0x01;
+	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+	conn->state = BT_CONNECT;
+}
+
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, u8 sec_level, u8 auth_type)
 {
@@ -614,9 +679,6 @@
 	struct hci_request req;
 	int err;
 
-	if (test_bit(HCI_ADVERTISING, &hdev->flags))
-		return ERR_PTR(-ENOTSUPP);
-
 	/* Some devices send ATT messages as soon as the physical link is
 	 * established. To be able to handle these ATT messages, the user-
 	 * space first establishes the connection and then starts the pairing
@@ -664,13 +726,20 @@
 		return ERR_PTR(-ENOMEM);
 
 	conn->dst_type = dst_type;
-
-	conn->out = true;
-	conn->link_mode |= HCI_LM_MASTER;
 	conn->sec_level = BT_SECURITY_LOW;
 	conn->pending_sec_level = sec_level;
 	conn->auth_type = auth_type;
 
+	hci_req_init(&req, hdev);
+
+	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+		hci_req_directed_advertising(&req, conn);
+		goto create_conn;
+	}
+
+	conn->out = true;
+	conn->link_mode |= HCI_LM_MASTER;
+
 	params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
 	if (params) {
 		conn->le_conn_min_interval = params->conn_min_interval;
@@ -680,8 +749,6 @@
 		conn->le_conn_max_interval = hdev->le_conn_max_interval;
 	}
 
-	hci_req_init(&req, hdev);
-
 	/* If controller is scanning, we stop it since some controllers are
 	 * not able to scan and connect at the same time. Also set the
 	 * HCI_LE_SCAN_INTERRUPTED flag so that the command complete
@@ -695,6 +762,7 @@
 
 	hci_req_add_le_create_conn(&req, conn);
 
+create_conn:
 	err = hci_req_run(&req, create_le_conn_complete);
 	if (err) {
 		hci_conn_del(conn);

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1c6ffaa..0a43cce 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c

@@ -34,6 +34,7 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
 
 #include "smp.h"
 
@@ -579,6 +580,62 @@
 DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
 			sniff_max_interval_set, "%llu\n");
 
+static int conn_info_min_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val > hdev->conn_info_max_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_min_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_min_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_min_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+			conn_info_min_age_set, "%llu\n");
+
+static int conn_info_max_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val < hdev->conn_info_min_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_max_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_max_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_max_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+			conn_info_max_age_set, "%llu\n");
+
 static int identity_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
@@ -955,14 +1012,9 @@
 	if (count < 3)
 		return -EINVAL;
 
-	buf = kzalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, data, count)) {
-		err = -EFAULT;
-		goto done;
-	}
+	buf = memdup_user(data, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
 
 	if (memcmp(buf, "add", 3) == 0) {
 		n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu %hhu",
@@ -1759,6 +1811,11 @@
 			    &blacklist_fops);
 	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
 
+	debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_min_age_fops);
+	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_max_age_fops);
+
 	if (lmp_bredr_capable(hdev)) {
 		debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
 				    hdev, &inquiry_cache_fops);
@@ -1828,6 +1885,9 @@
 				    &lowpan_debugfs_fops);
 		debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev,
 				    &le_auto_conn_fops);
+		debugfs_create_u16("discov_interleaved_timeout", 0644,
+				   hdev->debugfs,
+				   &hdev->discov_interleaved_timeout);
 	}
 
 	return 0;
@@ -2033,12 +2093,11 @@
 
 	hci_remove_remote_oob_data(hdev, &data->bdaddr);
 
-	if (ssp)
-		*ssp = data->ssp_mode;
+	*ssp = data->ssp_mode;
 
 	ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr);
 	if (ie) {
-		if (ie->data.ssp_mode && ssp)
+		if (ie->data.ssp_mode)
 			*ssp = true;
 
 		if (ie->name_state == NAME_NEEDED &&
@@ -3791,6 +3850,9 @@
 	hdev->le_conn_max_interval = 0x0038;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
+	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
+	hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
+	hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
 
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 682f33a..21e5913 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c

@@ -991,10 +991,25 @@
 	if (!sent)
 		return;
 
+	if (status)
+		return;
+
 	hci_dev_lock(hdev);
 
-	if (!status)
-		mgmt_advertising(hdev, *sent);
+	/* If we're doing connection initation as peripheral. Set a
+	 * timeout in case something goes wrong.
+	 */
+	if (*sent) {
+		struct hci_conn *conn;
+
+		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+		if (conn)
+			queue_delayed_work(hdev->workqueue,
+					   &conn->le_conn_timeout,
+					   HCI_LE_CONN_TIMEOUT);
+	}
+
+	mgmt_advertising(hdev, *sent);
 
 	hci_dev_unlock(hdev);
 }
@@ -1018,6 +1033,33 @@
 	hci_dev_unlock(hdev);
 }
 
+static bool has_pending_adv_report(struct hci_dev *hdev)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	return bacmp(&d->last_adv_addr, BDADDR_ANY);
+}
+
+static void clear_pending_adv_report(struct hci_dev *hdev)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	bacpy(&d->last_adv_addr, BDADDR_ANY);
+	d->last_adv_data_len = 0;
+}
+
+static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
+				     u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	bacpy(&d->last_adv_addr, bdaddr);
+	d->last_adv_addr_type = bdaddr_type;
+	d->last_adv_rssi = rssi;
+	memcpy(d->last_adv_data, data, len);
+	d->last_adv_data_len = len;
+}
+
 static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 				      struct sk_buff *skb)
 {
@@ -1036,9 +1078,25 @@
 	switch (cp->enable) {
 	case LE_SCAN_ENABLE:
 		set_bit(HCI_LE_SCAN, &hdev->dev_flags);
+		if (hdev->le_scan_type == LE_SCAN_ACTIVE)
+			clear_pending_adv_report(hdev);
 		break;
 
 	case LE_SCAN_DISABLE:
+		/* We do this here instead of when setting DISCOVERY_STOPPED
+		 * since the latter would potentially require waiting for
+		 * inquiry to stop too.
+		 */
+		if (has_pending_adv_report(hdev)) {
+			struct discovery_state *d = &hdev->discovery;
+
+			mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+					  d->last_adv_addr_type, NULL,
+					  d->last_adv_rssi, 0, 1,
+					  d->last_adv_data,
+					  d->last_adv_data_len, NULL, 0);
+		}
+
 		/* Cancel this timer so that we don't try to disable scanning
 		 * when it's already disabled.
 		 */
@@ -1187,6 +1245,59 @@
 	amp_write_rem_assoc_continue(hdev, rp->phy_handle);
 }
 
+static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_read_rssi *rp = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn)
+		conn->rssi = rp->rssi;
+
+	hci_dev_unlock(hdev);
+}
+
+static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_cp_read_tx_power *sent;
+	struct hci_rp_read_tx_power *rp = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+	if (!sent)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (!conn)
+		goto unlock;
+
+	switch (sent->type) {
+	case 0x00:
+		conn->tx_power = rp->tx_power;
+		break;
+	case 0x01:
+		conn->max_tx_power = rp->tx_power;
+		break;
+	}
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1342,6 +1453,7 @@
 	 * is requested.
 	 */
 	if (!hci_conn_ssp_enabled(conn) && !(conn->auth_type & 0x01) &&
+	    conn->pending_sec_level != BT_SECURITY_FIPS &&
 	    conn->pending_sec_level != BT_SECURITY_HIGH &&
 	    conn->pending_sec_level != BT_SECURITY_MEDIUM)
 		return 0;
@@ -1827,7 +1939,7 @@
 		name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp);
 		mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 				  info->dev_class, 0, !name_known, ssp, NULL,
-				  0);
+				  0, NULL, 0);
 	}
 
 	hci_dev_unlock(hdev);
@@ -2579,6 +2691,14 @@
 		hci_cc_write_remote_amp_assoc(hdev, skb);
 		break;
 
+	case HCI_OP_READ_RSSI:
+		hci_cc_read_rssi(hdev, skb);
+		break;
+
+	case HCI_OP_READ_TX_POWER:
+		hci_cc_read_tx_power(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
 		break;
@@ -2957,7 +3077,8 @@
 		}
 
 		if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 &&
-		    conn->pending_sec_level == BT_SECURITY_HIGH) {
+		    (conn->pending_sec_level == BT_SECURITY_HIGH ||
+		     conn->pending_sec_level == BT_SECURITY_FIPS)) {
 			BT_DBG("%s ignoring key unauthenticated for high security",
 			       hdev->name);
 			goto not_found;
@@ -3102,7 +3223,7 @@
 							      false, &ssp);
 			mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 					  info->dev_class, info->rssi,
-					  !name_known, ssp, NULL, 0);
+					  !name_known, ssp, NULL, 0, NULL, 0);
 		}
 	} else {
 		struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
@@ -3120,7 +3241,7 @@
 							      false, &ssp);
 			mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 					  info->dev_class, info->rssi,
-					  !name_known, ssp, NULL, 0);
+					  !name_known, ssp, NULL, 0, NULL, 0);
 		}
 	}
 
@@ -3309,7 +3430,7 @@
 		eir_len = eir_get_length(info->data, sizeof(info->data));
 		mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 				  info->dev_class, info->rssi, !name_known,
-				  ssp, info->data, eir_len);
+				  ssp, info->data, eir_len, NULL, 0);
 	}
 
 	hci_dev_unlock(hdev);
@@ -3367,24 +3488,20 @@
 
 static u8 hci_get_auth_req(struct hci_conn *conn)
 {
-	/* If remote requests dedicated bonding follow that lead */
-	if (conn->remote_auth == HCI_AT_DEDICATED_BONDING ||
-	    conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) {
-		/* If both remote and local IO capabilities allow MITM
-		 * protection then require it, otherwise don't */
-		if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT ||
-		    conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)
-			return HCI_AT_DEDICATED_BONDING;
-		else
-			return HCI_AT_DEDICATED_BONDING_MITM;
-	}
-
 	/* If remote requests no-bonding follow that lead */
 	if (conn->remote_auth == HCI_AT_NO_BONDING ||
 	    conn->remote_auth == HCI_AT_NO_BONDING_MITM)
 		return conn->remote_auth | (conn->auth_type & 0x01);
 
-	return conn->auth_type;
+	/* If both remote and local have enough IO capabilities, require
+	 * MITM protection
+	 */
+	if (conn->remote_cap != HCI_IO_NO_INPUT_OUTPUT &&
+	    conn->io_capability != HCI_IO_NO_INPUT_OUTPUT)
+		return conn->remote_auth | 0x01;
+
+	/* No MITM protection possible so ignore remote requirement */
+	return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01);
 }
 
 static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -3414,8 +3531,21 @@
 		 * to DisplayYesNo as it is not supported by BT spec. */
 		cp.capability = (conn->io_capability == 0x04) ?
 				HCI_IO_DISPLAY_YESNO : conn->io_capability;
-		conn->auth_type = hci_get_auth_req(conn);
-		cp.authentication = conn->auth_type;
+
+		/* If we are initiators, there is no remote information yet */
+		if (conn->remote_auth == 0xff) {
+			cp.authentication = conn->auth_type;
+
+			/* Request MITM protection if our IO caps allow it
+			 * except for the no-bonding case
+			 */
+			if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
+			    cp.authentication != HCI_AT_NO_BONDING)
+				cp.authentication |= 0x01;
+		} else {
+			conn->auth_type = hci_get_auth_req(conn);
+			cp.authentication = conn->auth_type;
+		}
 
 		if (hci_find_remote_oob_data(hdev, &conn->dst) &&
 		    (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)))
@@ -3483,12 +3613,9 @@
 	rem_mitm = (conn->remote_auth & 0x01);
 
 	/* If we require MITM but the remote device can't provide that
-	 * (it has NoInputNoOutput) then reject the confirmation
-	 * request. The only exception is when we're dedicated bonding
-	 * initiators (connect_cfm_cb set) since then we always have the MITM
-	 * bit set. */
-	if (!conn->connect_cfm_cb && loc_mitm &&
-	    conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
+	 * (it has NoInputNoOutput) then reject the confirmation request
+	 */
+	if (loc_mitm && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
 		BT_DBG("Rejecting request: remote device can't provide MITM");
 		hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
 			     sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3846,17 +3973,6 @@
 
 		conn->dst_type = ev->bdaddr_type;
 
-		/* The advertising parameters for own address type
-		 * define which source address and source address
-		 * type this connections has.
-		 */
-		if (bacmp(&conn->src, BDADDR_ANY)) {
-			conn->src_type = ADDR_LE_DEV_PUBLIC;
-		} else {
-			bacpy(&conn->src, &hdev->static_addr);
-			conn->src_type = ADDR_LE_DEV_RANDOM;
-		}
-
 		if (ev->role == LE_CONN_ROLE_MASTER) {
 			conn->out = true;
 			conn->link_mode |= HCI_LM_MASTER;
@@ -3881,27 +3997,24 @@
 							  &conn->init_addr,
 							  &conn->init_addr_type);
 			}
-		} else {
-			/* Set the responder (our side) address type based on
-			 * the advertising address type.
-			 */
-			conn->resp_addr_type = hdev->adv_addr_type;
-			if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM)
-				bacpy(&conn->resp_addr, &hdev->random_addr);
-			else
-				bacpy(&conn->resp_addr, &hdev->bdaddr);
-
-			conn->init_addr_type = ev->bdaddr_type;
-			bacpy(&conn->init_addr, &ev->bdaddr);
 		}
 	} else {
 		cancel_delayed_work(&conn->le_conn_timeout);
 	}
 
-	/* Ensure that the hci_conn contains the identity address type
-	 * regardless of which address the connection was made with.
-	 */
-	hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+	if (!conn->out) {
+		/* Set the responder (our side) address type based on
+		 * the advertising address type.
+		 */
+		conn->resp_addr_type = hdev->adv_addr_type;
+		if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM)
+			bacpy(&conn->resp_addr, &hdev->random_addr);
+		else
+			bacpy(&conn->resp_addr, &hdev->bdaddr);
+
+		conn->init_addr_type = ev->bdaddr_type;
+		bacpy(&conn->init_addr, &ev->bdaddr);
+	}
 
 	/* Lookup the identity address from the stored connection
 	 * address and address type.
@@ -3981,25 +4094,97 @@
 	}
 }
 
+static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
+			       u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
+{
+	struct discovery_state *d = &hdev->discovery;
+	bool match;
+
+	/* Passive scanning shouldn't trigger any device found events */
+	if (hdev->le_scan_type == LE_SCAN_PASSIVE) {
+		if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND)
+			check_pending_le_conn(hdev, bdaddr, bdaddr_type);
+		return;
+	}
+
+	/* If there's nothing pending either store the data from this
+	 * event or send an immediate device found event if the data
+	 * should not be stored for later.
+	 */
+	if (!has_pending_adv_report(hdev)) {
+		/* If the report will trigger a SCAN_REQ store it for
+		 * later merging.
+		 */
+		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+						 rssi, data, len);
+			return;
+		}
+
+		mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+				  rssi, 0, 1, data, len, NULL, 0);
+		return;
+	}
+
+	/* Check if the pending report is for the same device as the new one */
+	match = (!bacmp(bdaddr, &d->last_adv_addr) &&
+		 bdaddr_type == d->last_adv_addr_type);
+
+	/* If the pending data doesn't match this report or this isn't a
+	 * scan response (e.g. we got a duplicate ADV_IND) then force
+	 * sending of the pending data.
+	 */
+	if (type != LE_ADV_SCAN_RSP || !match) {
+		/* Send out whatever is in the cache, but skip duplicates */
+		if (!match)
+			mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+					  d->last_adv_addr_type, NULL,
+					  d->last_adv_rssi, 0, 1,
+					  d->last_adv_data,
+					  d->last_adv_data_len, NULL, 0);
+
+		/* If the new report will trigger a SCAN_REQ store it for
+		 * later merging.
+		 */
+		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+						 rssi, data, len);
+			return;
+		}
+
+		/* The advertising reports cannot be merged, so clear
+		 * the pending report and send out a device found event.
+		 */
+		clear_pending_adv_report(hdev);
+		mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+				  rssi, 0, 1, data, len, NULL, 0);
+		return;
+	}
+
+	/* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and
+	 * the new event is a SCAN_RSP. We can therefore proceed with
+	 * sending a merged device found event.
+	 */
+	mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+			  d->last_adv_addr_type, NULL, rssi, 0, 1, data, len,
+			  d->last_adv_data, d->last_adv_data_len);
+	clear_pending_adv_report(hdev);
+}
+
 static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	u8 num_reports = skb->data[0];
 	void *ptr = &skb->data[1];
-	s8 rssi;
 
 	hci_dev_lock(hdev);
 
 	while (num_reports--) {
 		struct hci_ev_le_advertising_info *ev = ptr;
-
-		if (ev->evt_type == LE_ADV_IND ||
-		    ev->evt_type == LE_ADV_DIRECT_IND)
-			check_pending_le_conn(hdev, &ev->bdaddr,
-					      ev->bdaddr_type);
+		s8 rssi;
 
 		rssi = ev->data[ev->length];
-		mgmt_device_found(hdev, &ev->bdaddr, LE_LINK, ev->bdaddr_type,
-				  NULL, rssi, 0, 1, ev->data, ev->length);
+		process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
+				   ev->bdaddr_type, rssi, ev->data, ev->length);
 
 		ptr += sizeof(*ev) + ev->length + 1;
 	}

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b9a418e..80d25c1 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c

@@ -143,7 +143,7 @@
 
 		if (!skb_copy) {
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
@@ -247,8 +247,8 @@
 			struct hci_mon_hdr *hdr;
 
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
+						      GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
@@ -524,16 +524,7 @@
 	case HCISETRAW:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-
-		if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
-			return -EPERM;
-
-		if (arg)
-			set_bit(HCI_RAW, &hdev->flags);
-		else
-			clear_bit(HCI_RAW, &hdev->flags);
-
-		return 0;
+		return -EOPNOTSUPP;
 
 	case HCIGETCONNINFO:
 		return hci_get_conn_info(hdev, (void __user *) arg);

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index dc4d301..6eabbe0 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c

@@ -471,8 +471,14 @@
 	chan->max_tx = L2CAP_DEFAULT_MAX_TX;
 	chan->tx_win = L2CAP_DEFAULT_TX_WINDOW;
 	chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW;
+	chan->remote_max_tx = chan->max_tx;
+	chan->remote_tx_win = chan->tx_win;
 	chan->ack_win = L2CAP_DEFAULT_TX_WINDOW;
 	chan->sec_level = BT_SECURITY_LOW;
+	chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+	chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
+	chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+	chan->conf_state = 0;
 
 	set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
 }

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ef5e5b0..ade3fb4 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c

@@ -1180,13 +1180,16 @@
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(parent)) {
 		BT_DBG("backlog full %d", parent->sk_ack_backlog);
+		release_sock(parent);
 		return NULL;
 	}
 
 	sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
 			      GFP_ATOMIC);
-	if (!sk)
+	if (!sk) {
+		release_sock(parent);
 		return NULL;
+        }
 
 	bt_sock_reclassify_lock(sk, BTPROTO_L2CAP);
 

diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index b3fbc73..941ad75 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c

@@ -58,6 +58,7 @@
 		return EIO;
 
 	case 0x04:
+	case 0x3c:
 		return EHOSTDOWN;
 
 	case 0x05:

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d2d4e0d..0fce544 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c

@@ -29,12 +29,13 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
 #include "smp.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	5
+#define MGMT_REVISION	6
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
@@ -83,6 +84,7 @@
 	MGMT_OP_SET_DEBUG_KEYS,
 	MGMT_OP_SET_PRIVACY,
 	MGMT_OP_LOAD_IRKS,
+	MGMT_OP_GET_CONN_INFO,
 };
 
 static const u16 mgmt_events[] = {
@@ -2850,10 +2852,7 @@
 	}
 
 	sec_level = BT_SECURITY_MEDIUM;
-	if (cp->io_cap == 0x03)
-		auth_type = HCI_AT_DEDICATED_BONDING;
-	else
-		auth_type = HCI_AT_DEDICATED_BONDING_MITM;
+	auth_type = HCI_AT_DEDICATED_BONDING;
 
 	if (cp->addr.type == BDADDR_BREDR) {
 		conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
@@ -3351,6 +3350,8 @@
 
 static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 {
+	unsigned long timeout = 0;
+
 	BT_DBG("status %d", status);
 
 	if (status) {
@@ -3366,13 +3367,11 @@
 
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
-		queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
-				   DISCOV_LE_TIMEOUT);
+		timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
 		break;
 
 	case DISCOV_TYPE_INTERLEAVED:
-		queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
-				   DISCOV_INTERLEAVED_TIMEOUT);
+		timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
 		break;
 
 	case DISCOV_TYPE_BREDR:
@@ -3381,6 +3380,11 @@
 	default:
 		BT_ERR("Invalid discovery type %d", hdev->discovery.type);
 	}
+
+	if (!timeout)
+		return;
+
+	queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout);
 }
 
 static int start_discovery(struct sock *sk, struct hci_dev *hdev,
@@ -4530,7 +4534,7 @@
 
 	for (i = 0; i < key_count; i++) {
 		struct mgmt_ltk_info *key = &cp->keys[i];
-		u8 type, addr_type;
+		u8 type, addr_type, authenticated;
 
 		if (key->addr.type == BDADDR_LE_PUBLIC)
 			addr_type = ADDR_LE_DEV_PUBLIC;
@@ -4542,8 +4546,19 @@
 		else
 			type = HCI_SMP_LTK_SLAVE;
 
+		switch (key->type) {
+		case MGMT_LTK_UNAUTHENTICATED:
+			authenticated = 0x00;
+			break;
+		case MGMT_LTK_AUTHENTICATED:
+			authenticated = 0x01;
+			break;
+		default:
+			continue;
+		}
+
 		hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
-			    key->type, key->val, key->enc_size, key->ediv,
+			    authenticated, key->val, key->enc_size, key->ediv,
 			    key->rand);
 	}
 
@@ -4555,6 +4570,218 @@
 	return err;
 }
 
+struct cmd_conn_lookup {
+	struct hci_conn *conn;
+	bool valid_tx_power;
+	u8 mgmt_status;
+};
+
+static void get_conn_info_complete(struct pending_cmd *cmd, void *data)
+{
+	struct cmd_conn_lookup *match = data;
+	struct mgmt_cp_get_conn_info *cp;
+	struct mgmt_rp_get_conn_info rp;
+	struct hci_conn *conn = cmd->user_data;
+
+	if (conn != match->conn)
+		return;
+
+	cp = (struct mgmt_cp_get_conn_info *) cmd->param;
+
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!match->mgmt_status) {
+		rp.rssi = conn->rssi;
+
+		if (match->valid_tx_power) {
+			rp.tx_power = conn->tx_power;
+			rp.max_tx_power = conn->max_tx_power;
+		} else {
+			rp.tx_power = HCI_TX_POWER_INVALID;
+			rp.max_tx_power = HCI_TX_POWER_INVALID;
+		}
+	}
+
+	cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO,
+		     match->mgmt_status, &rp, sizeof(rp));
+
+	hci_conn_drop(conn);
+
+	mgmt_pending_remove(cmd);
+}
+
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status)
+{
+	struct hci_cp_read_rssi *cp;
+	struct hci_conn *conn;
+	struct cmd_conn_lookup match;
+	u16 handle;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	/* TX power data is valid in case request completed successfully,
+	 * otherwise we assume it's not valid. At the moment we assume that
+	 * either both or none of current and max values are valid to keep code
+	 * simple.
+	 */
+	match.valid_tx_power = !status;
+
+	/* Commands sent in request are either Read RSSI or Read Transmit Power
+	 * Level so we check which one was last sent to retrieve connection
+	 * handle.  Both commands have handle as first parameter so it's safe to
+	 * cast data on the same command struct.
+	 *
+	 * First command sent is always Read RSSI and we fail only if it fails.
+	 * In other case we simply override error to indicate success as we
+	 * already remembered if TX power value is actually valid.
+	 */
+	cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI);
+	if (!cp) {
+		cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+		status = 0;
+	}
+
+	if (!cp) {
+		BT_ERR("invalid sent_cmd in response");
+		goto unlock;
+	}
+
+	handle = __le16_to_cpu(cp->handle);
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (!conn) {
+		BT_ERR("unknown handle (%d) in response", handle);
+		goto unlock;
+	}
+
+	match.conn = conn;
+	match.mgmt_status = mgmt_status(status);
+
+	/* Cache refresh is complete, now reply for mgmt request for given
+	 * connection only.
+	 */
+	mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev,
+			     get_conn_info_complete, &match);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
+			 u16 len)
+{
+	struct mgmt_cp_get_conn_info *cp = data;
+	struct mgmt_rp_get_conn_info rp;
+	struct hci_conn *conn;
+	unsigned long conn_info_age;
+	int err = 0;
+
+	BT_DBG("%s", hdev->name);
+
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
+	hci_dev_lock(hdev);
+
+	if (!hdev_is_powered(hdev)) {
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
+		goto unlock;
+	}
+
+	if (cp->addr.type == BDADDR_BREDR)
+		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+					       &cp->addr.bdaddr);
+	else
+		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+
+	if (!conn || conn->state != BT_CONNECTED) {
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp));
+		goto unlock;
+	}
+
+	/* To avoid client trying to guess when to poll again for information we
+	 * calculate conn info age as random value between min/max set in hdev.
+	 */
+	conn_info_age = hdev->conn_info_min_age +
+			prandom_u32_max(hdev->conn_info_max_age -
+					hdev->conn_info_min_age);
+
+	/* Query controller to refresh cached values if they are too old or were
+	 * never read.
+	 */
+	if (time_after(jiffies, conn->conn_info_timestamp +
+		       msecs_to_jiffies(conn_info_age)) ||
+	    !conn->conn_info_timestamp) {
+		struct hci_request req;
+		struct hci_cp_read_tx_power req_txp_cp;
+		struct hci_cp_read_rssi req_rssi_cp;
+		struct pending_cmd *cmd;
+
+		hci_req_init(&req, hdev);
+		req_rssi_cp.handle = cpu_to_le16(conn->handle);
+		hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp),
+			    &req_rssi_cp);
+
+		/* For LE links TX power does not change thus we don't need to
+		 * query for it once value is known.
+		 */
+		if (!bdaddr_type_is_le(cp->addr.type) ||
+		    conn->tx_power == HCI_TX_POWER_INVALID) {
+			req_txp_cp.handle = cpu_to_le16(conn->handle);
+			req_txp_cp.type = 0x00;
+			hci_req_add(&req, HCI_OP_READ_TX_POWER,
+				    sizeof(req_txp_cp), &req_txp_cp);
+		}
+
+		/* Max TX power needs to be read only once per connection */
+		if (conn->max_tx_power == HCI_TX_POWER_INVALID) {
+			req_txp_cp.handle = cpu_to_le16(conn->handle);
+			req_txp_cp.type = 0x01;
+			hci_req_add(&req, HCI_OP_READ_TX_POWER,
+				    sizeof(req_txp_cp), &req_txp_cp);
+		}
+
+		err = hci_req_run(&req, conn_info_refresh_complete);
+		if (err < 0)
+			goto unlock;
+
+		cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev,
+				       data, len);
+		if (!cmd) {
+			err = -ENOMEM;
+			goto unlock;
+		}
+
+		hci_conn_hold(conn);
+		cmd->user_data = conn;
+
+		conn->conn_info_timestamp = jiffies;
+	} else {
+		/* Cache is valid, just reply with values cached in hci_conn */
+		rp.rssi = conn->rssi;
+		rp.tx_power = conn->tx_power;
+		rp.max_tx_power = conn->max_tx_power;
+
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+	}
+
+unlock:
+	hci_dev_unlock(hdev);
+	return err;
+}
+
 static const struct mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
 		     u16 data_len);
@@ -4610,6 +4837,7 @@
 	{ set_debug_keys,         false, MGMT_SETTING_SIZE },
 	{ set_privacy,            false, MGMT_SET_PRIVACY_SIZE },
 	{ load_irks,              true,  MGMT_LOAD_IRKS_SIZE },
+	{ get_conn_info,          false, MGMT_GET_CONN_INFO_SIZE },
 };
 
 
@@ -5005,6 +5233,14 @@
 	mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL);
 }
 
+static u8 mgmt_ltk_type(struct smp_ltk *ltk)
+{
+	if (ltk->authenticated)
+		return MGMT_LTK_AUTHENTICATED;
+
+	return MGMT_LTK_UNAUTHENTICATED;
+}
+
 void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
 {
 	struct mgmt_ev_new_long_term_key ev;
@@ -5030,7 +5266,7 @@
 
 	bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
 	ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
-	ev.key.type = key->authenticated;
+	ev.key.type = mgmt_ltk_type(key);
 	ev.key.enc_size = key->enc_size;
 	ev.key.ediv = key->ediv;
 	ev.key.rand = key->rand;
@@ -5668,8 +5904,9 @@
 }
 
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
-		       ssp, u8 *eir, u16 eir_len)
+		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
+		       u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 scan_rsp_len)
 {
 	char buf[512];
 	struct mgmt_ev_device_found *ev = (void *) buf;
@@ -5679,8 +5916,10 @@
 	if (!hci_discovery_active(hdev))
 		return;
 
-	/* Leave 5 bytes for a potential CoD field */
-	if (sizeof(*ev) + eir_len + 5 > sizeof(buf))
+	/* Make sure that the buffer is big enough. The 5 extra bytes
+	 * are for the potential CoD field.
+	 */
+	if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf))
 		return;
 
 	memset(buf, 0, sizeof(buf));
@@ -5707,8 +5946,11 @@
 		eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV,
 					  dev_class, 3);
 
-	ev->eir_len = cpu_to_le16(eir_len);
-	ev_size = sizeof(*ev) + eir_len;
+	if (scan_rsp_len > 0)
+		memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len);
+
+	ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
+	ev_size = sizeof(*ev) + eir_len + scan_rsp_len;
 
 	mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL);
 }

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index cf62026..754b6fe 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c

@@ -307,7 +307,7 @@
 	setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
 
 	skb_queue_head_init(&d->tx_queue);
-	spin_lock_init(&d->lock);
+	mutex_init(&d->lock);
 	atomic_set(&d->refcnt, 1);
 
 	rfcomm_dlc_clear_state(d);

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 403ec09..8e385a0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c

@@ -70,7 +70,7 @@
 };
 
 static LIST_HEAD(rfcomm_dev_list);
-static DEFINE_SPINLOCK(rfcomm_dev_lock);
+static DEFINE_MUTEX(rfcomm_dev_lock);
 
 static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
 static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
@@ -96,9 +96,9 @@
 	if (dev->tty_dev)
 		tty_unregister_device(rfcomm_tty_driver, dev->id);
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 	list_del(&dev->list);
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	kfree(dev);
 
@@ -161,14 +161,14 @@
 {
 	struct rfcomm_dev *dev;
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	dev = __rfcomm_dev_lookup(id);
 
 	if (dev && !tty_port_get(&dev->port))
 		dev = NULL;
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	return dev;
 }
@@ -224,7 +224,7 @@
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	if (req->dev_id < 0) {
 		dev->id = 0;
@@ -305,11 +305,11 @@
 	   holds reference to this module. */
 	__module_get(THIS_MODULE);
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 	return dev;
 
 out:
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 	kfree(dev);
 	return ERR_PTR(err);
 }
@@ -524,7 +524,7 @@
 
 	di = dl->dev_info;
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	list_for_each_entry(dev, &rfcomm_dev_list, list) {
 		if (!tty_port_get(&dev->port))
@@ -540,7 +540,7 @@
 			break;
 	}
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	dl->dev_num = n;
 	size = sizeof(*dl) + n * sizeof(*di);

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index dfb4e11..3d1cc16 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c

@@ -35,6 +35,33 @@
 
 #define AUTH_REQ_MASK   0x07
 
+#define SMP_FLAG_TK_VALID	1
+#define SMP_FLAG_CFM_PENDING	2
+#define SMP_FLAG_MITM_AUTH	3
+#define SMP_FLAG_COMPLETE	4
+#define SMP_FLAG_INITIATOR	5
+
+struct smp_chan {
+	struct l2cap_conn *conn;
+	u8		preq[7]; /* SMP Pairing Request */
+	u8		prsp[7]; /* SMP Pairing Response */
+	u8		prnd[16]; /* SMP Pairing Random (local) */
+	u8		rrnd[16]; /* SMP Pairing Random (remote) */
+	u8		pcnf[16]; /* SMP Pairing Confirm */
+	u8		tk[16]; /* SMP Temporary Key */
+	u8		enc_key_size;
+	u8		remote_key_dist;
+	bdaddr_t	id_addr;
+	u8		id_addr_type;
+	u8		irk[16];
+	struct smp_csrk	*csrk;
+	struct smp_csrk	*slave_csrk;
+	struct smp_ltk	*ltk;
+	struct smp_ltk	*slave_ltk;
+	struct smp_irk	*remote_irk;
+	unsigned long	flags;
+};
+
 static inline void swap128(const u8 src[16], u8 dst[16])
 {
 	int i;
@@ -369,7 +396,7 @@
 
 	/* Initialize key for JUST WORKS */
 	memset(smp->tk, 0, sizeof(smp->tk));
-	clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+	clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
 
 	BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
 
@@ -388,19 +415,18 @@
 		method = JUST_WORKS;
 
 	/* Don't confirm locally initiated pairing attempts */
-	if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR,
-					   &smp->smp_flags))
+	if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags))
 		method = JUST_WORKS;
 
 	/* If Just Works, Continue with Zero TK */
 	if (method == JUST_WORKS) {
-		set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
 		return 0;
 	}
 
 	/* Not Just Works/Confirm results in MITM Authentication */
 	if (method != JUST_CFM)
-		set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags);
+		set_bit(SMP_FLAG_MITM_AUTH, &smp->flags);
 
 	/* If both devices have Keyoard-Display I/O, the master
 	 * Confirms and the slave Enters the passkey.
@@ -419,7 +445,7 @@
 		passkey %= 1000000;
 		put_unaligned_le32(passkey, smp->tk);
 		BT_DBG("PassKey: %d", passkey);
-		set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
 	}
 
 	hci_dev_lock(hcon->hdev);
@@ -441,15 +467,13 @@
 	return ret;
 }
 
-static void confirm_work(struct work_struct *work)
+static u8 smp_confirm(struct smp_chan *smp)
 {
-	struct smp_chan *smp = container_of(work, struct smp_chan, confirm);
 	struct l2cap_conn *conn = smp->conn;
 	struct hci_dev *hdev = conn->hcon->hdev;
 	struct crypto_blkcipher *tfm = hdev->tfm_aes;
 	struct smp_cmd_pairing_confirm cp;
 	int ret;
-	u8 reason;
 
 	BT_DBG("conn %p", conn);
 
@@ -463,35 +487,27 @@
 
 	hci_dev_unlock(hdev);
 
-	if (ret) {
-		reason = SMP_UNSPECIFIED;
-		goto error;
-	}
+	if (ret)
+		return SMP_UNSPECIFIED;
 
-	clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+	clear_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
 
 	smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
 
-	return;
-
-error:
-	smp_failure(conn, reason);
+	return 0;
 }
 
-static void random_work(struct work_struct *work)
+static u8 smp_random(struct smp_chan *smp)
 {
-	struct smp_chan *smp = container_of(work, struct smp_chan, random);
 	struct l2cap_conn *conn = smp->conn;
 	struct hci_conn *hcon = conn->hcon;
 	struct hci_dev *hdev = hcon->hdev;
 	struct crypto_blkcipher *tfm = hdev->tfm_aes;
-	u8 reason, confirm[16];
+	u8 confirm[16];
 	int ret;
 
-	if (IS_ERR_OR_NULL(tfm)) {
-		reason = SMP_UNSPECIFIED;
-		goto error;
-	}
+	if (IS_ERR_OR_NULL(tfm))
+		return SMP_UNSPECIFIED;
 
 	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
 
@@ -504,15 +520,12 @@
 
 	hci_dev_unlock(hdev);
 
-	if (ret) {
-		reason = SMP_UNSPECIFIED;
-		goto error;
-	}
+	if (ret)
+		return SMP_UNSPECIFIED;
 
 	if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) {
 		BT_ERR("Pairing failed (confirmation values mismatch)");
-		reason = SMP_CONFIRM_FAILED;
-		goto error;
+		return SMP_CONFIRM_FAILED;
 	}
 
 	if (hcon->out) {
@@ -525,10 +538,8 @@
 		memset(stk + smp->enc_key_size, 0,
 		       SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
 
-		if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) {
-			reason = SMP_UNSPECIFIED;
-			goto error;
-		}
+		if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
+			return SMP_UNSPECIFIED;
 
 		hci_le_start_enc(hcon, ediv, rand, stk);
 		hcon->enc_key_size = smp->enc_key_size;
@@ -550,10 +561,7 @@
 			    ediv, rand);
 	}
 
-	return;
-
-error:
-	smp_failure(conn, reason);
+	return 0;
 }
 
 static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -564,9 +572,6 @@
 	if (!smp)
 		return NULL;
 
-	INIT_WORK(&smp->confirm, confirm_work);
-	INIT_WORK(&smp->random, random_work);
-
 	smp->conn = conn;
 	conn->smp_chan = smp;
 	conn->hcon->smp_conn = conn;
@@ -583,7 +588,7 @@
 
 	BUG_ON(!smp);
 
-	complete = test_bit(SMP_FLAG_COMPLETE, &smp->smp_flags);
+	complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags);
 	mgmt_smp_complete(conn->hcon, complete);
 
 	kfree(smp->csrk);
@@ -634,7 +639,7 @@
 		put_unaligned_le32(value, smp->tk);
 		/* Fall Through */
 	case MGMT_OP_USER_CONFIRM_REPLY:
-		set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
 		break;
 	case MGMT_OP_USER_PASSKEY_NEG_REPLY:
 	case MGMT_OP_USER_CONFIRM_NEG_REPLY:
@@ -646,8 +651,11 @@
 	}
 
 	/* If it is our turn to send Pairing Confirm, do so now */
-	if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags))
-		queue_work(hcon->hdev->workqueue, &smp->confirm);
+	if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) {
+		u8 rsp = smp_confirm(smp);
+		if (rsp)
+			smp_failure(conn, rsp);
+	}
 
 	return 0;
 }
@@ -656,14 +664,13 @@
 {
 	struct smp_cmd_pairing rsp, *req = (void *) skb->data;
 	struct smp_chan *smp;
-	u8 key_size;
-	u8 auth = SMP_AUTH_NONE;
+	u8 key_size, auth;
 	int ret;
 
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*req))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	if (conn->hcon->link_mode & HCI_LM_MASTER)
 		return SMP_CMD_NOTSUPP;
@@ -681,8 +688,7 @@
 	skb_pull(skb, sizeof(*req));
 
 	/* We didn't start the pairing, so match remote */
-	if (req->auth_req & SMP_AUTH_BONDING)
-		auth = req->auth_req;
+	auth = req->auth_req;
 
 	conn->hcon->pending_sec_level = authreq_to_seclevel(auth);
 
@@ -704,7 +710,7 @@
 	if (ret)
 		return SMP_UNSPECIFIED;
 
-	clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
+	clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
 
 	return 0;
 }
@@ -713,14 +719,13 @@
 {
 	struct smp_cmd_pairing *req, *rsp = (void *) skb->data;
 	struct smp_chan *smp = conn->smp_chan;
-	struct hci_dev *hdev = conn->hcon->hdev;
 	u8 key_size, auth = SMP_AUTH_NONE;
 	int ret;
 
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*rsp))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	if (!(conn->hcon->link_mode & HCI_LM_MASTER))
 		return SMP_CMD_NOTSUPP;
@@ -753,11 +758,11 @@
 	if (ret)
 		return SMP_UNSPECIFIED;
 
-	set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+	set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
 
 	/* Can't compose response until we have been confirmed */
-	if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags))
-		queue_work(hdev->workqueue, &smp->confirm);
+	if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+		return smp_confirm(smp);
 
 	return 0;
 }
@@ -765,12 +770,11 @@
 static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
 {
 	struct smp_chan *smp = conn->smp_chan;
-	struct hci_dev *hdev = conn->hcon->hdev;
 
 	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
 
 	if (skb->len < sizeof(smp->pcnf))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf));
 	skb_pull(skb, sizeof(smp->pcnf));
@@ -778,10 +782,10 @@
 	if (conn->hcon->out)
 		smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
 			     smp->prnd);
-	else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags))
-		queue_work(hdev->workqueue, &smp->confirm);
+	else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+		return smp_confirm(smp);
 	else
-		set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+		set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
 
 	return 0;
 }
@@ -789,19 +793,16 @@
 static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
 {
 	struct smp_chan *smp = conn->smp_chan;
-	struct hci_dev *hdev = conn->hcon->hdev;
 
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(smp->rrnd))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd));
 	skb_pull(skb, sizeof(smp->rrnd));
 
-	queue_work(hdev->workqueue, &smp->random);
-
-	return 0;
+	return smp_random(smp);
 }
 
 static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
@@ -836,7 +837,7 @@
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	if (!(conn->hcon->link_mode & HCI_LM_MASTER))
 		return SMP_CMD_NOTSUPP;
@@ -861,7 +862,7 @@
 
 	smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
 
-	clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
+	clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
 
 	return 0;
 }
@@ -908,10 +909,11 @@
 
 	authreq = seclevel_to_authreq(sec_level);
 
-	/* hcon->auth_type is set by pair_device in mgmt.c. If the MITM
-	 * flag is set we should also set it for the SMP request.
+	/* Require MITM if IO Capability allows or the security level
+	 * requires it.
 	 */
-	if ((hcon->auth_type & 0x01))
+	if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
+	    sec_level > BT_SECURITY_MEDIUM)
 		authreq |= SMP_AUTH_MITM;
 
 	if (hcon->link_mode & HCI_LM_MASTER) {
@@ -928,7 +930,7 @@
 		smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
 	}
 
-	set_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
+	set_bit(SMP_FLAG_INITIATOR, &smp->flags);
 
 done:
 	hcon->pending_sec_level = sec_level;
@@ -944,7 +946,7 @@
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	/* Ignore this PDU if it wasn't requested */
 	if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
@@ -969,7 +971,7 @@
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	/* Ignore this PDU if it wasn't requested */
 	if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
@@ -1001,7 +1003,7 @@
 	BT_DBG("");
 
 	if (skb->len < sizeof(*info))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	/* Ignore this PDU if it wasn't requested */
 	if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
@@ -1025,7 +1027,7 @@
 	BT_DBG("");
 
 	if (skb->len < sizeof(*info))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	/* Ignore this PDU if it wasn't requested */
 	if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
@@ -1075,7 +1077,7 @@
 	BT_DBG("conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
-		return SMP_UNSPECIFIED;
+		return SMP_INVALID_PARAMS;
 
 	/* Ignore this PDU if it wasn't requested */
 	if (!(smp->remote_key_dist & SMP_DIST_SIGN))
@@ -1358,7 +1360,7 @@
 
 	clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags);
 	cancel_delayed_work_sync(&conn->security_timer);
-	set_bit(SMP_FLAG_COMPLETE, &smp->smp_flags);
+	set_bit(SMP_FLAG_COMPLETE, &smp->flags);
 	smp_notify_keys(conn);
 
 	smp_chan_destroy(conn);

diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 1277147..5a8dc36 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h

@@ -111,39 +111,11 @@
 #define SMP_CMD_NOTSUPP			0x07
 #define SMP_UNSPECIFIED			0x08
 #define SMP_REPEATED_ATTEMPTS		0x09
+#define SMP_INVALID_PARAMS		0x0a
 
 #define SMP_MIN_ENC_KEY_SIZE		7
 #define SMP_MAX_ENC_KEY_SIZE		16
 
-#define SMP_FLAG_TK_VALID	1
-#define SMP_FLAG_CFM_PENDING	2
-#define SMP_FLAG_MITM_AUTH	3
-#define SMP_FLAG_COMPLETE	4
-#define SMP_FLAG_INITIATOR	5
-
-struct smp_chan {
-	struct l2cap_conn *conn;
-	u8		preq[7]; /* SMP Pairing Request */
-	u8		prsp[7]; /* SMP Pairing Response */
-	u8		prnd[16]; /* SMP Pairing Random (local) */
-	u8		rrnd[16]; /* SMP Pairing Random (remote) */
-	u8		pcnf[16]; /* SMP Pairing Confirm */
-	u8		tk[16]; /* SMP Temporary Key */
-	u8		enc_key_size;
-	u8		remote_key_dist;
-	bdaddr_t	id_addr;
-	u8		id_addr_type;
-	u8		irk[16];
-	struct smp_csrk	*csrk;
-	struct smp_csrk	*slave_csrk;
-	struct smp_ltk	*ltk;
-	struct smp_ltk	*slave_ltk;
-	struct smp_irk	*remote_irk;
-	unsigned long	smp_flags;
-	struct work_struct confirm;
-	struct work_struct random;
-};
-
 /* SMP Commands */
 bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level);
 int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);

diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f..8590b94 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile

@@ -5,7 +5,7 @@
 obj-$(CONFIG_BRIDGE) += bridge.o
 
 bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
-			br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+			br_ioctl.o br_stp.o br_stp_bpdu.o \
 			br_stp_if.o br_stp_timer.o br_netlink.o
 
 bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
@@ -16,4 +16,4 @@
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
 
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_NETFILTER) += netfilter/

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 19311aa..1a755a1 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c

@@ -22,6 +22,104 @@
 
 #include "br_private.h"
 
+/*
+ * Handle changes in state of network devices enslaved to a bridge.
+ *
+ * Note: don't care about up/down if bridge itself is down, because
+ *     port state is checked when bridge is brought up.
+ */
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	bool changed_addr;
+	int err;
+
+	/* register of bridge completed, add sysfs entries */
+	if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
+		br_sysfs_addbr(dev);
+		return NOTIFY_DONE;
+	}
+
+	/* not a port of a bridge */
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		return NOTIFY_DONE;
+
+	br = p->br;
+
+	switch (event) {
+	case NETDEV_CHANGEMTU:
+		dev_set_mtu(br->dev, br_min_mtu(br));
+		break;
+
+	case NETDEV_CHANGEADDR:
+		spin_lock_bh(&br->lock);
+		br_fdb_changeaddr(p, dev->dev_addr);
+		changed_addr = br_stp_recalculate_bridge_id(br);
+		spin_unlock_bh(&br->lock);
+
+		if (changed_addr)
+			call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
+		break;
+
+	case NETDEV_CHANGE:
+		br_port_carrier_check(p);
+		break;
+
+	case NETDEV_FEAT_CHANGE:
+		netdev_update_features(br->dev);
+		break;
+
+	case NETDEV_DOWN:
+		spin_lock_bh(&br->lock);
+		if (br->dev->flags & IFF_UP)
+			br_stp_disable_port(p);
+		spin_unlock_bh(&br->lock);
+		break;
+
+	case NETDEV_UP:
+		if (netif_running(br->dev) && netif_oper_up(dev)) {
+			spin_lock_bh(&br->lock);
+			br_stp_enable_port(p);
+			spin_unlock_bh(&br->lock);
+		}
+		break;
+
+	case NETDEV_UNREGISTER:
+		br_del_if(br, dev);
+		break;
+
+	case NETDEV_CHANGENAME:
+		err = br_sysfs_renameif(p);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
+
+	case NETDEV_RESEND_IGMP:
+		/* Propagate to master device */
+		call_netdevice_notifiers(event, br->dev);
+		break;
+	}
+
+	/* Events that may cause spanning tree to refresh */
+	if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+	    event == NETDEV_CHANGE || event == NETDEV_DOWN)
+		br_ifinfo_notify(RTM_NEWLINK, p);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block br_device_notifier = {
+	.notifier_call = br_device_event
+};
+
 static void __net_exit br_net_exit(struct net *net)
 {
 	struct net_device *dev;

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3e2da2c..568cccd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c

@@ -112,6 +112,12 @@
 {
 }
 
+static void br_dev_change_rx_flags(struct net_device *dev, int change)
+{
+	if (change & IFF_PROMISC)
+		br_manage_promisc(netdev_priv(dev));
+}
+
 static int br_dev_stop(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -309,6 +315,7 @@
 	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_rx_mode	 = br_dev_set_multicast_list,
+	.ndo_change_rx_flags	 = br_dev_change_rx_flags,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -348,14 +355,15 @@
 
 	dev->netdev_ops = &br_netdev_ops;
 	dev->destructor = br_dev_free;
-	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
+	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->tx_queue_len = 0;
 	dev->priv_flags = IFF_EBRIDGE;
 
 	dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
-			NETIF_F_HW_VLAN_CTAG_TX;
-	dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
+			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+	dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+			   NETIF_F_HW_VLAN_STAG_TX;
 	dev->vlan_features = COMMON_FEATURES;
 
 	br->dev = dev;
@@ -370,6 +378,7 @@
 
 	br->stp_enabled = BR_NO_STP;
 	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
+	br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
 
 	br->designated_root = br->bridge_id;
 	br->bridge_max_age = br->max_age = 20 * HZ;
@@ -380,4 +389,5 @@
 	br_netfilter_rtable_init(br);
 	br_stp_timer_init(br);
 	br_multicast_init(br);
+	br_vlan_init(br);
 }

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 474d36f..b524c36 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c

@@ -85,8 +85,58 @@
 	kmem_cache_free(br_fdb_cache, ent);
 }
 
+/* When a static FDB entry is added, the mac address from the entry is
+ * added to the bridge private HW address list and all required ports
+ * are then updated with the new information.
+ * Called under RTNL.
+ */
+static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
+{
+	int err;
+	struct net_bridge_port *p, *tmp;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (!br_promisc_port(p)) {
+			err = dev_uc_add(p->dev, addr);
+			if (err)
+				goto undo;
+		}
+	}
+
+	return;
+undo:
+	list_for_each_entry(tmp, &br->port_list, list) {
+		if (tmp == p)
+			break;
+		if (!br_promisc_port(tmp))
+			dev_uc_del(tmp->dev, addr);
+	}
+}
+
+/* When a static FDB entry is deleted, the HW address from that entry is
+ * also removed from the bridge private HW address list and updates all
+ * the ports with needed information.
+ * Called under RTNL.
+ */
+static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
+{
+	struct net_bridge_port *p;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (!br_promisc_port(p))
+			dev_uc_del(p->dev, addr);
+	}
+}
+
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
+	if (f->is_static)
+		fdb_del_hw(br, f->addr.addr);
+
 	hlist_del_rcu(&f->hlist);
 	fdb_notify(br, f, RTM_DELNEIGH);
 	call_rcu(&f->rcu, fdb_rcu_free);
@@ -466,6 +516,7 @@
 		return -ENOMEM;
 
 	fdb->is_local = fdb->is_static = 1;
+	fdb_add_hw(br, addr);
 	fdb_notify(br, fdb, RTM_NEWNEIGH);
 	return 0;
 }
@@ -571,6 +622,8 @@
 
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+		goto nla_put_failure;
 	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
 	ci.ndm_confirmed = 0;
 	ci.ndm_updated	 = jiffies_to_clock_t(now - fdb->updated);
@@ -592,6 +645,7 @@
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u32)) /* NDA_MASTER */
 		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
@@ -684,13 +738,25 @@
 	}
 
 	if (fdb_to_nud(fdb) != state) {
-		if (state & NUD_PERMANENT)
-			fdb->is_local = fdb->is_static = 1;
-		else if (state & NUD_NOARP) {
+		if (state & NUD_PERMANENT) {
+			fdb->is_local = 1;
+			if (!fdb->is_static) {
+				fdb->is_static = 1;
+				fdb_add_hw(br, addr);
+			}
+		} else if (state & NUD_NOARP) {
 			fdb->is_local = 0;
-			fdb->is_static = 1;
-		} else
-			fdb->is_local = fdb->is_static = 0;
+			if (!fdb->is_static) {
+				fdb->is_static = 1;
+				fdb_add_hw(br, addr);
+			}
+		} else {
+			fdb->is_local = 0;
+			if (fdb->is_static) {
+				fdb->is_static = 0;
+				fdb_del_hw(br, addr);
+			}
+		}
 
 		modified = true;
 	}
@@ -880,3 +946,59 @@
 out:
 	return err;
 }
+
+int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
+{
+	struct net_bridge_fdb_entry *fdb, *tmp;
+	int i;
+	int err;
+
+	ASSERT_RTNL();
+
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		hlist_for_each_entry(fdb, &br->hash[i], hlist) {
+			/* We only care for static entries */
+			if (!fdb->is_static)
+				continue;
+
+			err = dev_uc_add(p->dev, fdb->addr.addr);
+			if (err)
+				goto rollback;
+		}
+	}
+	return 0;
+
+rollback:
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		hlist_for_each_entry(tmp, &br->hash[i], hlist) {
+			/* If we reached the fdb that failed, we can stop */
+			if (tmp == fdb)
+				break;
+
+			/* We only care for static entries */
+			if (!tmp->is_static)
+				continue;
+
+			dev_uc_del(p->dev, tmp->addr.addr);
+		}
+	}
+	return err;
+}
+
+void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
+{
+	struct net_bridge_fdb_entry *fdb;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) {
+			/* We only care for static entries */
+			if (!fdb->is_static)
+				continue;
+
+			dev_uc_del(p->dev, fdb->addr.addr);
+		}
+	}
+}

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 5262b86..3eca3fd 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c

@@ -85,6 +85,111 @@
 	spin_unlock_bh(&br->lock);
 }
 
+static void br_port_set_promisc(struct net_bridge_port *p)
+{
+	int err = 0;
+
+	if (br_promisc_port(p))
+		return;
+
+	err = dev_set_promiscuity(p->dev, 1);
+	if (err)
+		return;
+
+	br_fdb_unsync_static(p->br, p);
+	p->flags |= BR_PROMISC;
+}
+
+static void br_port_clear_promisc(struct net_bridge_port *p)
+{
+	int err;
+
+	/* Check if the port is already non-promisc or if it doesn't
+	 * support UNICAST filtering.  Without unicast filtering support
+	 * we'll end up re-enabling promisc mode anyway, so just check for
+	 * it here.
+	 */
+	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
+		return;
+
+	/* Since we'll be clearing the promisc mode, program the port
+	 * first so that we don't have interruption in traffic.
+	 */
+	err = br_fdb_sync_static(p->br, p);
+	if (err)
+		return;
+
+	dev_set_promiscuity(p->dev, -1);
+	p->flags &= ~BR_PROMISC;
+}
+
+/* When a port is added or removed or when certain port flags
+ * change, this function is called to automatically manage
+ * promiscuity setting of all the bridge ports.  We are always called
+ * under RTNL so can skip using rcu primitives.
+ */
+void br_manage_promisc(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool set_all = false;
+
+	/* If vlan filtering is disabled or bridge interface is placed
+	 * into promiscuous mode, place all ports in promiscuous mode.
+	 */
+	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
+		set_all = true;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (set_all) {
+			br_port_set_promisc(p);
+		} else {
+			/* If the number of auto-ports is <= 1, then all other
+			 * ports will have their output configuration
+			 * statically specified through fdbs.  Since ingress
+			 * on the auto-port becomes forwarding/egress to other
+			 * ports and egress configuration is statically known,
+			 * we can say that ingress configuration of the
+			 * auto-port is also statically known.
+			 * This lets us disable promiscuous mode and write
+			 * this config to hw.
+			 */
+			if (br->auto_cnt == 0 ||
+			    (br->auto_cnt == 1 && br_auto_port(p)))
+				br_port_clear_promisc(p);
+			else
+				br_port_set_promisc(p);
+		}
+	}
+}
+
+static void nbp_update_port_count(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	u32 cnt = 0;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (br_auto_port(p))
+			cnt++;
+	}
+	if (br->auto_cnt != cnt) {
+		br->auto_cnt = cnt;
+		br_manage_promisc(br);
+	}
+}
+
+static void nbp_delete_promisc(struct net_bridge_port *p)
+{
+	/* If port is currently promiscuous, unset promiscuity.
+	 * Otherwise, it is a static port so remove all addresses
+	 * from it.
+	 */
+	dev_set_allmulti(p->dev, -1);
+	if (br_promisc_port(p))
+		dev_set_promiscuity(p->dev, -1);
+	else
+		br_fdb_unsync_static(p->br, p);
+}
+
 static void release_nbp(struct kobject *kobj)
 {
 	struct net_bridge_port *p
@@ -133,7 +238,7 @@
 
 	sysfs_remove_link(br->ifobj, p->dev->name);
 
-	dev_set_promiscuity(dev, -1);
+	nbp_delete_promisc(p);
 
 	spin_lock_bh(&br->lock);
 	br_stp_disable_port(p);
@@ -141,10 +246,11 @@
 
 	br_ifinfo_notify(RTM_DELLINK, p);
 
+	list_del_rcu(&p->list);
+
 	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 1);
-
-	list_del_rcu(&p->list);
+	nbp_update_port_count(br);
 
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 
@@ -353,7 +459,7 @@
 
 	call_netdevice_notifiers(NETDEV_JOIN, dev);
 
-	err = dev_set_promiscuity(dev, 1);
+	err = dev_set_allmulti(dev, 1);
 	if (err)
 		goto put_back;
 
@@ -384,6 +490,8 @@
 
 	list_add_rcu(&p->list, &br->port_list);
 
+	nbp_update_port_count(br);
+
 	netdev_update_features(br->dev);
 
 	if (br->dev->needed_headroom < dev->needed_headroom)
@@ -421,7 +529,7 @@
 	kobject_put(&p->kobj);
 	p = NULL; /* kobject_put frees */
 err1:
-	dev_set_promiscuity(dev, -1);
+	dev_set_allmulti(dev, -1);
 put_back:
 	dev_put(dev);
 	kfree(p);
@@ -455,3 +563,11 @@
 
 	return 0;
 }
+
+void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
+{
+	struct net_bridge *br = p->br;
+
+	if (mask & BR_AUTO_MASK)
+		nbp_update_port_count(br);
+}

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 04d6348..366c436 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c

@@ -177,6 +177,8 @@
 	p = br_port_get_rcu(skb->dev);
 
 	if (unlikely(is_link_local_ether_addr(dest))) {
+		u16 fwd_mask = p->br->group_fwd_mask_required;
+
 		/*
 		 * See IEEE 802.1D Table 7-10 Reserved addresses
 		 *
@@ -194,7 +196,8 @@
 		case 0x00:	/* Bridge Group Address */
 			/* If STP is turned off,
 			   then must forward to keep loop detection */
-			if (p->br->stp_enabled == BR_NO_STP)
+			if (p->br->stp_enabled == BR_NO_STP ||
+			    fwd_mask & (1u << dest[5]))
 				goto forward;
 			break;
 
@@ -203,7 +206,8 @@
 
 		default:
 			/* Allow selective forwarding for most other protocols */
-			if (p->br->group_fwd_mask & (1u << dest[5]))
+			fwd_mask |= p->br->group_fwd_mask;
+			if (fwd_mask & (1u << dest[5]))
 				goto forward;
 		}
 

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b7b1914..5df0526 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c

@@ -418,13 +418,13 @@
 
 	ip.proto = entry->addr.proto;
 	if (ip.proto == htons(ETH_P_IP)) {
-		if (timer_pending(&br->ip4_querier.timer))
+		if (timer_pending(&br->ip4_other_query.timer))
 			return -EBUSY;
 
 		ip.u.ip4 = entry->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		if (timer_pending(&br->ip6_querier.timer))
+		if (timer_pending(&br->ip6_other_query.timer))
 			return -EBUSY;
 
 		ip.u.ip6 = entry->addr.u.ip6;

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 7b757b5..abfa0b65 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c

@@ -11,6 +11,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/export.h>
 #include <linux/if_ether.h>
 #include <linux/igmp.h>
 #include <linux/jhash.h>
@@ -35,7 +36,7 @@
 #include "br_private.h"
 
 static void br_multicast_start_querier(struct net_bridge *br,
-				       struct bridge_mcast_query *query);
+				       struct bridge_mcast_own_query *query);
 unsigned int br_mdb_rehash_seq;
 
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -761,7 +762,7 @@
 }
 
 static void br_multicast_querier_expired(struct net_bridge *br,
-					 struct bridge_mcast_query *query)
+					 struct bridge_mcast_own_query *query)
 {
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) || br->multicast_disabled)
@@ -777,7 +778,7 @@
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_querier_expired(br, &br->ip4_query);
+	br_multicast_querier_expired(br, &br->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -785,10 +786,22 @@
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_querier_expired(br, &br->ip6_query);
+	br_multicast_querier_expired(br, &br->ip6_own_query);
 }
 #endif
 
+static void br_multicast_select_own_querier(struct net_bridge *br,
+					    struct br_ip *ip,
+					    struct sk_buff *skb)
+{
+	if (ip->proto == htons(ETH_P_IP))
+		br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
+#endif
+}
+
 static void __br_multicast_send_query(struct net_bridge *br,
 				      struct net_bridge_port *port,
 				      struct br_ip *ip)
@@ -804,17 +817,19 @@
 		skb->dev = port->dev;
 		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			dev_queue_xmit);
-	} else
+	} else {
+		br_multicast_select_own_querier(br, ip, skb);
 		netif_rx(skb);
+	}
 }
 
 static void br_multicast_send_query(struct net_bridge *br,
 				    struct net_bridge_port *port,
-				    struct bridge_mcast_query *query)
+				    struct bridge_mcast_own_query *own_query)
 {
 	unsigned long time;
 	struct br_ip br_group;
-	struct bridge_mcast_querier *querier = NULL;
+	struct bridge_mcast_other_query *other_query = NULL;
 
 	if (!netif_running(br->dev) || br->multicast_disabled ||
 	    !br->multicast_querier)
@@ -822,31 +837,32 @@
 
 	memset(&br_group.u, 0, sizeof(br_group.u));
 
-	if (port ? (query == &port->ip4_query) :
-		   (query == &br->ip4_query)) {
-		querier = &br->ip4_querier;
+	if (port ? (own_query == &port->ip4_own_query) :
+		   (own_query == &br->ip4_own_query)) {
+		other_query = &br->ip4_other_query;
 		br_group.proto = htons(ETH_P_IP);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		querier = &br->ip6_querier;
+		other_query = &br->ip6_other_query;
 		br_group.proto = htons(ETH_P_IPV6);
 #endif
 	}
 
-	if (!querier || timer_pending(&querier->timer))
+	if (!other_query || timer_pending(&other_query->timer))
 		return;
 
 	__br_multicast_send_query(br, port, &br_group);
 
 	time = jiffies;
-	time += query->startup_sent < br->multicast_startup_query_count ?
+	time += own_query->startup_sent < br->multicast_startup_query_count ?
 		br->multicast_startup_query_interval :
 		br->multicast_query_interval;
-	mod_timer(&query->timer, time);
+	mod_timer(&own_query->timer, time);
 }
 
-static void br_multicast_port_query_expired(struct net_bridge_port *port,
-					    struct bridge_mcast_query *query)
+static void
+br_multicast_port_query_expired(struct net_bridge_port *port,
+				struct bridge_mcast_own_query *query)
 {
 	struct net_bridge *br = port->br;
 
@@ -868,7 +884,7 @@
 {
 	struct net_bridge_port *port = (void *)data;
 
-	br_multicast_port_query_expired(port, &port->ip4_query);
+	br_multicast_port_query_expired(port, &port->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -876,7 +892,7 @@
 {
 	struct net_bridge_port *port = (void *)data;
 
-	br_multicast_port_query_expired(port, &port->ip6_query);
+	br_multicast_port_query_expired(port, &port->ip6_own_query);
 }
 #endif
 
@@ -886,11 +902,11 @@
 
 	setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
 		    (unsigned long)port);
-	setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired,
-		    (unsigned long)port);
+	setup_timer(&port->ip4_own_query.timer,
+		    br_ip4_multicast_port_query_expired, (unsigned long)port);
 #if IS_ENABLED(CONFIG_IPV6)
-	setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired,
-		    (unsigned long)port);
+	setup_timer(&port->ip6_own_query.timer,
+		    br_ip6_multicast_port_query_expired, (unsigned long)port);
 #endif
 }
 
@@ -899,7 +915,7 @@
 	del_timer_sync(&port->multicast_router_timer);
 }
 
-static void br_multicast_enable(struct bridge_mcast_query *query)
+static void br_multicast_enable(struct bridge_mcast_own_query *query)
 {
 	query->startup_sent = 0;
 
@@ -916,9 +932,9 @@
 	if (br->multicast_disabled || !netif_running(br->dev))
 		goto out;
 
-	br_multicast_enable(&port->ip4_query);
+	br_multicast_enable(&port->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
-	br_multicast_enable(&port->ip6_query);
+	br_multicast_enable(&port->ip6_own_query);
 #endif
 
 out:
@@ -938,9 +954,9 @@
 	if (!hlist_unhashed(&port->rlist))
 		hlist_del_init_rcu(&port->rlist);
 	del_timer(&port->multicast_router_timer);
-	del_timer(&port->ip4_query.timer);
+	del_timer(&port->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
-	del_timer(&port->ip6_query.timer);
+	del_timer(&port->ip6_own_query.timer);
 #endif
 	spin_unlock(&br->multicast_lock);
 }
@@ -1064,15 +1080,80 @@
 }
 #endif
 
-static void
-br_multicast_update_querier_timer(struct net_bridge *br,
-				  struct bridge_mcast_querier *querier,
-				  unsigned long max_delay)
+static bool br_ip4_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
+					    __be32 saddr)
 {
-	if (!timer_pending(&querier->timer))
-		querier->delay_time = jiffies + max_delay;
+	if (!timer_pending(&br->ip4_own_query.timer) &&
+	    !timer_pending(&br->ip4_other_query.timer))
+		goto update;
 
-	mod_timer(&querier->timer, jiffies + br->multicast_querier_interval);
+	if (!br->ip4_querier.addr.u.ip4)
+		goto update;
+
+	if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
+		goto update;
+
+	return false;
+
+update:
+	br->ip4_querier.addr.u.ip4 = saddr;
+
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip4_querier.port, port);
+
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool br_ip6_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
+					    struct in6_addr *saddr)
+{
+	if (!timer_pending(&br->ip6_own_query.timer) &&
+	    !timer_pending(&br->ip6_other_query.timer))
+		goto update;
+
+	if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
+		goto update;
+
+	return false;
+
+update:
+	br->ip6_querier.addr.u.ip6 = *saddr;
+
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip6_querier.port, port);
+
+	return true;
+}
+#endif
+
+static bool br_multicast_select_querier(struct net_bridge *br,
+					struct net_bridge_port *port,
+					struct br_ip *saddr)
+{
+	switch (saddr->proto) {
+	case htons(ETH_P_IP):
+		return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
+#endif
+	}
+
+	return false;
+}
+
+static void
+br_multicast_update_query_timer(struct net_bridge *br,
+				struct bridge_mcast_other_query *query,
+				unsigned long max_delay)
+{
+	if (!timer_pending(&query->timer))
+		query->delay_time = jiffies + max_delay;
+
+	mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
 }
 
 /*
@@ -1125,16 +1206,14 @@
 
 static void br_multicast_query_received(struct net_bridge *br,
 					struct net_bridge_port *port,
-					struct bridge_mcast_querier *querier,
-					int saddr,
-					bool is_general_query,
+					struct bridge_mcast_other_query *query,
+					struct br_ip *saddr,
 					unsigned long max_delay)
 {
-	if (saddr && is_general_query)
-		br_multicast_update_querier_timer(br, querier, max_delay);
-	else if (timer_pending(&querier->timer))
+	if (!br_multicast_select_querier(br, port, saddr))
 		return;
 
+	br_multicast_update_query_timer(br, query, max_delay);
 	br_multicast_mark_router(br, port);
 }
 
@@ -1149,6 +1228,7 @@
 	struct igmpv3_query *ih3;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
+	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	__be32 group;
@@ -1190,11 +1270,14 @@
 		goto out;
 	}
 
-	br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr,
-				    !group, max_delay);
+	if (!group) {
+		saddr.proto = htons(ETH_P_IP);
+		saddr.u.ip4 = iph->saddr;
 
-	if (!group)
+		br_multicast_query_received(br, port, &br->ip4_other_query,
+					    &saddr, max_delay);
 		goto out;
+	}
 
 	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
@@ -1234,6 +1317,7 @@
 	struct mld2_query *mld2q;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
+	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	const struct in6_addr *group = NULL;
@@ -1282,12 +1366,16 @@
 		goto out;
 	}
 
-	br_multicast_query_received(br, port, &br->ip6_querier,
-				    !ipv6_addr_any(&ip6h->saddr),
-				    is_general_query, max_delay);
+	if (is_general_query) {
+		saddr.proto = htons(ETH_P_IPV6);
+		saddr.u.ip6 = ip6h->saddr;
 
-	if (!group)
+		br_multicast_query_received(br, port, &br->ip6_other_query,
+					    &saddr, max_delay);
 		goto out;
+	} else if (!group) {
+		goto out;
+	}
 
 	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
@@ -1315,11 +1403,12 @@
 }
 #endif
 
-static void br_multicast_leave_group(struct net_bridge *br,
-				     struct net_bridge_port *port,
-				     struct br_ip *group,
-				     struct bridge_mcast_querier *querier,
-				     struct bridge_mcast_query *query)
+static void
+br_multicast_leave_group(struct net_bridge *br,
+			 struct net_bridge_port *port,
+			 struct br_ip *group,
+			 struct bridge_mcast_other_query *other_query,
+			 struct bridge_mcast_own_query *own_query)
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -1330,7 +1419,7 @@
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
 	    (port && port->state == BR_STATE_DISABLED) ||
-	    timer_pending(&querier->timer))
+	    timer_pending(&other_query->timer))
 		goto out;
 
 	mdb = mlock_dereference(br->mdb, br);
@@ -1344,7 +1433,7 @@
 		time = jiffies + br->multicast_last_member_count *
 				 br->multicast_last_member_interval;
 
-		mod_timer(&query->timer, time);
+		mod_timer(&own_query->timer, time);
 
 		for (p = mlock_dereference(mp->ports, br);
 		     p != NULL;
@@ -1425,17 +1514,19 @@
 					 __u16 vid)
 {
 	struct br_ip br_group;
-	struct bridge_mcast_query *query = port ? &port->ip4_query :
-						  &br->ip4_query;
+	struct bridge_mcast_own_query *own_query;
 
 	if (ipv4_is_local_multicast(group))
 		return;
 
+	own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
 	br_group.vid = vid;
 
-	br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query);
+	br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+				 own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1445,18 +1536,19 @@
 					 __u16 vid)
 {
 	struct br_ip br_group;
-	struct bridge_mcast_query *query = port ? &port->ip6_query :
-						  &br->ip6_query;
-
+	struct bridge_mcast_own_query *own_query;
 
 	if (ipv6_addr_is_ll_all_nodes(group))
 		return;
 
+	own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 	br_group.vid = vid;
 
-	br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query);
+	br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+				 own_query);
 }
 #endif
 
@@ -1723,12 +1815,14 @@
 }
 
 static void br_multicast_query_expired(struct net_bridge *br,
-				       struct bridge_mcast_query *query)
+				       struct bridge_mcast_own_query *query,
+				       struct bridge_mcast_querier *querier)
 {
 	spin_lock(&br->multicast_lock);
 	if (query->startup_sent < br->multicast_startup_query_count)
 		query->startup_sent++;
 
+	rcu_assign_pointer(querier, NULL);
 	br_multicast_send_query(br, NULL, query);
 	spin_unlock(&br->multicast_lock);
 }
@@ -1737,7 +1831,7 @@
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip4_query);
+	br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1745,7 +1839,7 @@
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip6_query);
+	br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
 }
 #endif
 
@@ -1767,28 +1861,30 @@
 	br->multicast_querier_interval = 255 * HZ;
 	br->multicast_membership_interval = 260 * HZ;
 
-	br->ip4_querier.delay_time = 0;
+	br->ip4_other_query.delay_time = 0;
+	br->ip4_querier.port = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
-	br->ip6_querier.delay_time = 0;
+	br->ip6_other_query.delay_time = 0;
+	br->ip6_querier.port = NULL;
 #endif
 
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
 		    br_multicast_local_router_expired, 0);
-	setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired,
-		    (unsigned long)br);
-	setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired,
+	setup_timer(&br->ip4_other_query.timer,
+		    br_ip4_multicast_querier_expired, (unsigned long)br);
+	setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
 		    (unsigned long)br);
 #if IS_ENABLED(CONFIG_IPV6)
-	setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired,
-		    (unsigned long)br);
-	setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired,
+	setup_timer(&br->ip6_other_query.timer,
+		    br_ip6_multicast_querier_expired, (unsigned long)br);
+	setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
 		    (unsigned long)br);
 #endif
 }
 
 static void __br_multicast_open(struct net_bridge *br,
-				struct bridge_mcast_query *query)
+				struct bridge_mcast_own_query *query)
 {
 	query->startup_sent = 0;
 
@@ -1800,9 +1896,9 @@
 
 void br_multicast_open(struct net_bridge *br)
 {
-	__br_multicast_open(br, &br->ip4_query);
+	__br_multicast_open(br, &br->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
-	__br_multicast_open(br, &br->ip6_query);
+	__br_multicast_open(br, &br->ip6_own_query);
 #endif
 }
 
@@ -1815,11 +1911,11 @@
 	int i;
 
 	del_timer_sync(&br->multicast_router_timer);
-	del_timer_sync(&br->ip4_querier.timer);
-	del_timer_sync(&br->ip4_query.timer);
+	del_timer_sync(&br->ip4_other_query.timer);
+	del_timer_sync(&br->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
-	del_timer_sync(&br->ip6_querier.timer);
-	del_timer_sync(&br->ip6_query.timer);
+	del_timer_sync(&br->ip6_other_query.timer);
+	del_timer_sync(&br->ip6_own_query.timer);
 #endif
 
 	spin_lock_bh(&br->multicast_lock);
@@ -1923,7 +2019,7 @@
 }
 
 static void br_multicast_start_querier(struct net_bridge *br,
-				       struct bridge_mcast_query *query)
+				       struct bridge_mcast_own_query *query)
 {
 	struct net_bridge_port *port;
 
@@ -1934,11 +2030,11 @@
 		    port->state == BR_STATE_BLOCKING)
 			continue;
 
-		if (query == &br->ip4_query)
-			br_multicast_enable(&port->ip4_query);
+		if (query == &br->ip4_own_query)
+			br_multicast_enable(&port->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
 		else
-			br_multicast_enable(&port->ip6_query);
+			br_multicast_enable(&port->ip6_own_query);
 #endif
 	}
 }
@@ -1974,9 +2070,9 @@
 			goto rollback;
 	}
 
-	br_multicast_start_querier(br, &br->ip4_query);
+	br_multicast_start_querier(br, &br->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
-	br_multicast_start_querier(br, &br->ip6_query);
+	br_multicast_start_querier(br, &br->ip6_own_query);
 #endif
 
 unlock:
@@ -2001,16 +2097,16 @@
 
 	max_delay = br->multicast_query_response_interval;
 
-	if (!timer_pending(&br->ip4_querier.timer))
-		br->ip4_querier.delay_time = jiffies + max_delay;
+	if (!timer_pending(&br->ip4_other_query.timer))
+		br->ip4_other_query.delay_time = jiffies + max_delay;
 
-	br_multicast_start_querier(br, &br->ip4_query);
+	br_multicast_start_querier(br, &br->ip4_own_query);
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (!timer_pending(&br->ip6_querier.timer))
-		br->ip6_querier.delay_time = jiffies + max_delay;
+	if (!timer_pending(&br->ip6_other_query.timer))
+		br->ip6_other_query.delay_time = jiffies + max_delay;
 
-	br_multicast_start_querier(br, &br->ip6_query);
+	br_multicast_start_querier(br, &br->ip6_own_query);
 #endif
 
 unlock:
@@ -2061,3 +2157,109 @@
 
 	return err;
 }
+
+/**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev:	The bridge port adjacent to which to retrieve addresses
+ * @br_ip_list:	The list to store found, snooped multicast IP addresses in
+ *
+ * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
+ * snooping feature on all bridge ports of dev's bridge device, excluding
+ * the addresses from dev itself.
+ *
+ * Returns the number of items added to br_ip_list.
+ *
+ * Notes:
+ * - br_ip_list needs to be initialized by caller
+ * - br_ip_list might contain duplicates in the end
+ *   (needs to be taken care of by caller)
+ * - br_ip_list needs to be freed by caller
+ */
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	struct net_bridge_port_group *group;
+	struct br_ip_list *entry;
+	int count = 0;
+
+	rcu_read_lock();
+	if (!br_ip_list || !br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	list_for_each_entry_rcu(port, &br->port_list, list) {
+		if (!port->dev || port->dev == dev)
+			continue;
+
+		hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
+			entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+			if (!entry)
+				goto unlock;
+
+			entry->addr = group->addr;
+			list_add(&entry->list, br_ip_list);
+			count++;
+		}
+	}
+
+unlock:
+	rcu_read_unlock();
+	return count;
+}
+EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
+
+/**
+ * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a selected querier is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	bool ret = false;
+
+	rcu_read_lock();
+	if (!br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	switch (proto) {
+	case ETH_P_IP:
+		if (!timer_pending(&br->ip4_other_query.timer) ||
+		    rcu_dereference(br->ip4_querier.port) == port)
+			goto unlock;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case ETH_P_IPV6:
+		if (!timer_pending(&br->ip6_other_query.timer) ||
+		    rcu_dereference(br->ip6_querier.port) == port)
+			goto unlock;
+		break;
+#endif
+	default:
+		goto unlock;
+	}
+
+	ret = true;
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2acf7fa..a615264 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c

@@ -535,7 +535,7 @@
 	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
 		return br;
 
-	vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
 				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e8844d9..26edb51 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c

@@ -328,6 +328,7 @@
 static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 {
 	int err;
+	unsigned long old_flags = p->flags;
 
 	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
 	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
@@ -353,6 +354,8 @@
 		if (err)
 			return err;
 	}
+
+	br_port_flags_change(p, old_flags ^ p->flags);
 	return 0;
 }
 

diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
deleted file mode 100644
index 2998dd1..0000000
--- a/net/bridge/br_notify.c
+++ /dev/null

@@ -1,118 +0,0 @@
-/*
- *	Device event handling
- *	Linux ethernet bridge
- *
- *	Authors:
- *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/net_namespace.h>
-
-#include "br_private.h"
-
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr);
-
-struct notifier_block br_device_notifier = {
-	.notifier_call = br_device_event
-};
-
-/*
- * Handle changes in state of network devices enslaved to a bridge.
- *
- * Note: don't care about up/down if bridge itself is down, because
- *     port state is checked when bridge is brought up.
- */
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct net_bridge_port *p;
-	struct net_bridge *br;
-	bool changed_addr;
-	int err;
-
-	/* register of bridge completed, add sysfs entries */
-	if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
-		br_sysfs_addbr(dev);
-		return NOTIFY_DONE;
-	}
-
-	/* not a port of a bridge */
-	p = br_port_get_rtnl(dev);
-	if (!p)
-		return NOTIFY_DONE;
-
-	br = p->br;
-
-	switch (event) {
-	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_min_mtu(br));
-		break;
-
-	case NETDEV_CHANGEADDR:
-		spin_lock_bh(&br->lock);
-		br_fdb_changeaddr(p, dev->dev_addr);
-		changed_addr = br_stp_recalculate_bridge_id(br);
-		spin_unlock_bh(&br->lock);
-
-		if (changed_addr)
-			call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
-
-		break;
-
-	case NETDEV_CHANGE:
-		br_port_carrier_check(p);
-		break;
-
-	case NETDEV_FEAT_CHANGE:
-		netdev_update_features(br->dev);
-		break;
-
-	case NETDEV_DOWN:
-		spin_lock_bh(&br->lock);
-		if (br->dev->flags & IFF_UP)
-			br_stp_disable_port(p);
-		spin_unlock_bh(&br->lock);
-		break;
-
-	case NETDEV_UP:
-		if (netif_running(br->dev) && netif_oper_up(dev)) {
-			spin_lock_bh(&br->lock);
-			br_stp_enable_port(p);
-			spin_unlock_bh(&br->lock);
-		}
-		break;
-
-	case NETDEV_UNREGISTER:
-		br_del_if(br, dev);
-		break;
-
-	case NETDEV_CHANGENAME:
-		err = br_sysfs_renameif(p);
-		if (err)
-			return notifier_from_errno(err);
-		break;
-
-	case NETDEV_PRE_TYPE_CHANGE:
-		/* Forbid underlaying device to change its type. */
-		return NOTIFY_BAD;
-
-	case NETDEV_RESEND_IGMP:
-		/* Propagate to master device */
-		call_netdevice_notifiers(event, br->dev);
-		break;
-	}
-
-	/* Events that may cause spanning tree to refresh */
-	if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
-	    event == NETDEV_CHANGE || event == NETDEV_DOWN)
-		br_ifinfo_notify(RTM_NEWLINK, p);
-
-	return NOTIFY_DONE;
-}

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 59d3a85..23caf5b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h

@@ -35,6 +35,8 @@
 #define BR_GROUPFWD_DEFAULT	0
 /* Don't allow forwarding control protocols like STP and LLDP */
 #define BR_GROUPFWD_RESTRICTED	0x4007u
+/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
+#define BR_GROUPFWD_8021AD	0xB801u
 
 /* Path to usermode spanning tree program */
 #define BR_STP_PROG	"/sbin/bridge-stp"
@@ -54,30 +56,24 @@
 	unsigned char	addr[ETH_ALEN];
 };
 
-struct br_ip
-{
-	union {
-		__be32	ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct in6_addr ip6;
-#endif
-	} u;
-	__be16		proto;
-	__u16		vid;
-};
-
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 /* our own querier */
-struct bridge_mcast_query {
+struct bridge_mcast_own_query {
 	struct timer_list	timer;
 	u32			startup_sent;
 };
 
 /* other querier */
-struct bridge_mcast_querier {
+struct bridge_mcast_other_query {
 	struct timer_list		timer;
 	unsigned long			delay_time;
 };
+
+/* selected querier */
+struct bridge_mcast_querier {
+	struct br_ip addr;
+	struct net_bridge_port __rcu	*port;
+};
 #endif
 
 struct net_port_vlans {
@@ -174,11 +170,13 @@
 #define BR_ADMIN_COST		0x00000010
 #define BR_LEARNING		0x00000020
 #define BR_FLOOD		0x00000040
+#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
+#define BR_PROMISC		0x00000080
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	struct bridge_mcast_query	ip4_query;
+	struct bridge_mcast_own_query	ip4_own_query;
 #if IS_ENABLED(CONFIG_IPV6)
-	struct bridge_mcast_query	ip6_query;
+	struct bridge_mcast_own_query	ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 	unsigned char			multicast_router;
 	struct timer_list		multicast_router_timer;
@@ -198,6 +196,9 @@
 #endif
 };
 
+#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
+#define br_promisc_port(p) ((p)->flags & BR_PROMISC)
+
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
 static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
@@ -227,6 +228,7 @@
 	bool				nf_call_arptables;
 #endif
 	u16				group_fwd_mask;
+	u16				group_fwd_mask_required;
 
 	/* STP */
 	bridge_id			designated_root;
@@ -241,6 +243,7 @@
 	unsigned long			bridge_forward_delay;
 
 	u8				group_addr[ETH_ALEN];
+	bool				group_addr_set;
 	u16				root_port;
 
 	enum {
@@ -277,11 +280,13 @@
 	struct hlist_head		router_list;
 
 	struct timer_list		multicast_router_timer;
+	struct bridge_mcast_other_query	ip4_other_query;
+	struct bridge_mcast_own_query	ip4_own_query;
 	struct bridge_mcast_querier	ip4_querier;
-	struct bridge_mcast_query	ip4_query;
 #if IS_ENABLED(CONFIG_IPV6)
+	struct bridge_mcast_other_query	ip6_other_query;
+	struct bridge_mcast_own_query	ip6_own_query;
 	struct bridge_mcast_querier	ip6_querier;
-	struct bridge_mcast_query	ip6_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif
 
@@ -290,8 +295,10 @@
 	struct timer_list		topology_change_timer;
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
+	u32				auto_cnt;
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	u8				vlan_enabled;
+	__be16				vlan_proto;
 	struct net_port_vlans __rcu	*vlan_info;
 #endif
 };
@@ -327,8 +334,6 @@
 #define br_debug(br, format, args...)			\
 	pr_debug("%s: " format,  (br)->dev->name, ##args)
 
-extern struct notifier_block br_device_notifier;
-
 /* called under bridge lock */
 static inline int br_is_root_bridge(const struct net_bridge *br)
 {
@@ -395,6 +400,8 @@
 	       const unsigned char *addr, u16 nlh_flags);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, int idx);
+int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
+void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
@@ -415,6 +422,8 @@
 int br_min_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
+void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
+void br_manage_promisc(struct net_bridge *br);
 
 /* br_input.c */
 int br_handle_frame_finish(struct sk_buff *skb);
@@ -485,7 +494,7 @@
 
 static inline bool
 __br_multicast_querier_exists(struct net_bridge *br,
-			      struct bridge_mcast_querier *querier)
+			      struct bridge_mcast_other_query *querier)
 {
 	return time_is_before_jiffies(querier->delay_time) &&
 	       (br->multicast_querier || timer_pending(&querier->timer));
@@ -496,10 +505,10 @@
 {
 	switch (eth->h_proto) {
 	case (htons(ETH_P_IP)):
-		return __br_multicast_querier_exists(br, &br->ip4_querier);
+		return __br_multicast_querier_exists(br, &br->ip4_other_query);
 #if IS_ENABLED(CONFIG_IPV6)
 	case (htons(ETH_P_IPV6)):
-		return __br_multicast_querier_exists(br, &br->ip6_querier);
+		return __br_multicast_querier_exists(br, &br->ip6_other_query);
 #endif
 	default:
 		return false;
@@ -589,7 +598,10 @@
 int br_vlan_delete(struct net_bridge *br, u16 vid);
 void br_vlan_flush(struct net_bridge *br);
 bool br_vlan_find(struct net_bridge *br, u16 vid);
+void br_recalculate_fwd_mask(struct net_bridge *br);
 int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
+void br_vlan_init(struct net_bridge *br);
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
 void nbp_vlan_flush(struct net_bridge_port *port);
@@ -633,6 +645,10 @@
 	return v->pvid ?: VLAN_N_VID;
 }
 
+static inline int br_vlan_enabled(struct net_bridge *br)
+{
+	return br->vlan_enabled;
+}
 #else
 static inline bool br_allowed_ingress(struct net_bridge *br,
 				      struct net_port_vlans *v,
@@ -681,6 +697,14 @@
 	return false;
 }
 
+static inline void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+}
+
+static inline void br_vlan_init(struct net_bridge *br)
+{
+}
+
 static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 {
 	return -EOPNOTSUPP;
@@ -719,6 +743,11 @@
 {
 	return VLAN_N_VID;	/* Returns invalid vid */
 }
+
+static inline int br_vlan_enabled(struct net_bridge *br)
+{
+	return 0;
+}
 #endif
 
 /* br_netfilter.c */

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8dac6555..c9e2572 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c

@@ -312,10 +312,19 @@
 	    new_addr[5] == 3)		/* 802.1X PAE address */
 		return -EINVAL;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	spin_lock_bh(&br->lock);
 	for (i = 0; i < 6; i++)
 		br->group_addr[i] = new_addr[i];
 	spin_unlock_bh(&br->lock);
+
+	br->group_addr_set = true;
+	br_recalculate_fwd_mask(br);
+
+	rtnl_unlock();
+
 	return len;
 }
 
@@ -700,6 +709,22 @@
 	return store_bridge_parm(d, buf, len, br_vlan_filter_toggle);
 }
 static DEVICE_ATTR_RW(vlan_filtering);
+
+static ssize_t vlan_protocol_show(struct device *d,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto));
+}
+
+static ssize_t vlan_protocol_store(struct device *d,
+				   struct device_attribute *attr,
+				   const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_vlan_set_proto);
+}
+static DEVICE_ATTR_RW(vlan_protocol);
 #endif
 
 static struct attribute *bridge_attrs[] = {
@@ -745,6 +770,7 @@
 #endif
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	&dev_attr_vlan_filtering.attr,
+	&dev_attr_vlan_protocol.attr,
 #endif
 	NULL
 };

diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index dd595bd..e561cd5 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c

@@ -41,20 +41,30 @@
 }								\
 static int store_##_name(struct net_bridge_port *p, unsigned long v) \
 {								\
-	unsigned long flags = p->flags;				\
-	if (v)							\
-		flags |= _mask;					\
-	else							\
-		flags &= ~_mask;				\
-	if (flags != p->flags) {				\
-		p->flags = flags;				\
-		br_ifinfo_notify(RTM_NEWLINK, p);		\
-	}							\
-	return 0;						\
+	return store_flag(p, v, _mask);				\
 }								\
 static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR,			\
 		   show_##_name, store_##_name)
 
+static int store_flag(struct net_bridge_port *p, unsigned long v,
+		      unsigned long mask)
+{
+	unsigned long flags;
+
+	flags = p->flags;
+
+	if (v)
+		flags |= mask;
+	else
+		flags &= ~mask;
+
+	if (flags != p->flags) {
+		p->flags = flags;
+		br_port_flags_change(p, mask);
+		br_ifinfo_notify(RTM_NEWLINK, p);
+	}
+	return 0;
+}
 
 static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
 {

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5fee2fe..2b2774f 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c

@@ -60,7 +60,7 @@
 		 * that ever changes this code will allow tagged
 		 * traffic to enter the bridge.
 		 */
-		err = vlan_vid_add(dev, htons(ETH_P_8021Q), vid);
+		err = vlan_vid_add(dev, br->vlan_proto, vid);
 		if (err)
 			return err;
 	}
@@ -80,7 +80,7 @@
 
 out_filt:
 	if (p)
-		vlan_vid_del(dev, htons(ETH_P_8021Q), vid);
+		vlan_vid_del(dev, br->vlan_proto, vid);
 	return err;
 }
 
@@ -92,8 +92,10 @@
 	__vlan_delete_pvid(v, vid);
 	clear_bit(vid, v->untagged_bitmap);
 
-	if (v->port_idx)
-		vlan_vid_del(v->parent.port->dev, htons(ETH_P_8021Q), vid);
+	if (v->port_idx) {
+		struct net_bridge_port *p = v->parent.port;
+		vlan_vid_del(p->dev, p->br->vlan_proto, vid);
+	}
 
 	clear_bit(vid, v->vlan_bitmap);
 	v->num_vlans--;
@@ -158,7 +160,8 @@
 bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 			struct sk_buff *skb, u16 *vid)
 {
-	int err;
+	bool tagged;
+	__be16 proto;
 
 	/* If VLAN filtering is disabled on the bridge, all packets are
 	 * permitted.
@@ -172,19 +175,41 @@
 	if (!v)
 		goto drop;
 
+	proto = br->vlan_proto;
+
 	/* If vlan tx offload is disabled on bridge device and frame was
 	 * sent from vlan device on the bridge device, it does not have
 	 * HW accelerated vlan tag.
 	 */
 	if (unlikely(!vlan_tx_tag_present(skb) &&
-		     (skb->protocol == htons(ETH_P_8021Q) ||
-		      skb->protocol == htons(ETH_P_8021AD)))) {
+		     skb->protocol == proto)) {
 		skb = vlan_untag(skb);
 		if (unlikely(!skb))
 			return false;
 	}
 
-	err = br_vlan_get_tag(skb, vid);
+	if (!br_vlan_get_tag(skb, vid)) {
+		/* Tagged frame */
+		if (skb->vlan_proto != proto) {
+			/* Protocol-mismatch, empty out vlan_tci for new tag */
+			skb_push(skb, ETH_HLEN);
+			skb = __vlan_put_tag(skb, skb->vlan_proto,
+					     vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				return false;
+
+			skb_pull(skb, ETH_HLEN);
+			skb_reset_mac_len(skb);
+			*vid = 0;
+			tagged = false;
+		} else {
+			tagged = true;
+		}
+	} else {
+		/* Untagged frame */
+		tagged = false;
+	}
+
 	if (!*vid) {
 		u16 pvid = br_get_pvid(v);
 
@@ -199,9 +224,9 @@
 		 * ingress frame is considered to belong to this vlan.
 		 */
 		*vid = pvid;
-		if (likely(err))
+		if (likely(!tagged))
 			/* Untagged Frame. */
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+			__vlan_hwaccel_put_tag(skb, proto, pvid);
 		else
 			/* Priority-tagged Frame.
 			 * At this point, We know that skb->vlan_tci had
@@ -254,7 +279,9 @@
 	if (!v)
 		return false;
 
-	br_vlan_get_tag(skb, vid);
+	if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
+		*vid = 0;
+
 	if (!*vid) {
 		*vid = br_get_pvid(v);
 		if (*vid == VLAN_N_VID)
@@ -351,6 +378,33 @@
 	return found;
 }
 
+/* Must be protected by RTNL. */
+static void recalculate_group_addr(struct net_bridge *br)
+{
+	if (br->group_addr_set)
+		return;
+
+	spin_lock_bh(&br->lock);
+	if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) {
+		/* Bridge Group Address */
+		br->group_addr[5] = 0x00;
+	} else { /* vlan_enabled && ETH_P_8021AD */
+		/* Provider Bridge Group Address */
+		br->group_addr[5] = 0x08;
+	}
+	spin_unlock_bh(&br->lock);
+}
+
+/* Must be protected by RTNL. */
+void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+	if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q))
+		br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
+	else /* vlan_enabled && ETH_P_8021AD */
+		br->group_fwd_mask_required = BR_GROUPFWD_8021AD &
+					      ~(1u << br->group_addr[5]);
+}
+
 int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
 {
 	if (!rtnl_trylock())
@@ -360,12 +414,88 @@
 		goto unlock;
 
 	br->vlan_enabled = val;
+	br_manage_promisc(br);
+	recalculate_group_addr(br);
+	br_recalculate_fwd_mask(br);
 
 unlock:
 	rtnl_unlock();
 	return 0;
 }
 
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+{
+	int err = 0;
+	struct net_bridge_port *p;
+	struct net_port_vlans *pv;
+	__be16 proto, oldproto;
+	u16 vid, errvid;
+
+	if (val != ETH_P_8021Q && val != ETH_P_8021AD)
+		return -EPROTONOSUPPORT;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	proto = htons(val);
+	if (br->vlan_proto == proto)
+		goto unlock;
+
+	/* Add VLANs for the new proto to the device filter. */
+	list_for_each_entry(p, &br->port_list, list) {
+		pv = rtnl_dereference(p->vlan_info);
+		if (!pv)
+			continue;
+
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+			err = vlan_vid_add(p->dev, proto, vid);
+			if (err)
+				goto err_filt;
+		}
+	}
+
+	oldproto = br->vlan_proto;
+	br->vlan_proto = proto;
+
+	recalculate_group_addr(br);
+	br_recalculate_fwd_mask(br);
+
+	/* Delete VLANs for the old proto from the device filter. */
+	list_for_each_entry(p, &br->port_list, list) {
+		pv = rtnl_dereference(p->vlan_info);
+		if (!pv)
+			continue;
+
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+			vlan_vid_del(p->dev, oldproto, vid);
+	}
+
+unlock:
+	rtnl_unlock();
+	return err;
+
+err_filt:
+	errvid = vid;
+	for_each_set_bit(vid, pv->vlan_bitmap, errvid)
+		vlan_vid_del(p->dev, proto, vid);
+
+	list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+		pv = rtnl_dereference(p->vlan_info);
+		if (!pv)
+			continue;
+
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+			vlan_vid_del(p->dev, proto, vid);
+	}
+
+	goto unlock;
+}
+
+void br_vlan_init(struct net_bridge *br)
+{
+	br->vlan_proto = htons(ETH_P_8021Q);
+}
+
 /* Must be protected by RTNL.
  * Must be called with vid in range from 1 to 4094 inclusive.
  */
@@ -432,7 +562,7 @@
 		return;
 
 	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
-		vlan_vid_del(port->dev, htons(ETH_P_8021Q), vid);
+		vlan_vid_del(port->dev, port->br->vlan_proto, vid);
 
 	__vlan_flush(pv);
 }

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0..629dc77 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig

@@ -2,14 +2,23 @@
 # Bridge netfilter configuration
 #
 #
-config NF_TABLES_BRIDGE
-	depends on NF_TABLES
+menuconfig NF_TABLES_BRIDGE
+	depends on BRIDGE && NETFILTER && NF_TABLES
 	tristate "Ethernet Bridge nf_tables support"
 
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+	tristate "Netfilter nf_table bridge meta support"
+	depends on NFT_META
+	help
+	  Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
-	depends on BRIDGE && NETFILTER
-	select NETFILTER_XTABLES
+	depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
 	help
 	  ebtables is a general, extensible frame/packet identification
 	  framework. Say 'Y' or 'M' here if you want to do Ethernet

diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f..6f2f394 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile

@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META)  += nft_meta_bridge.o
 
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 

diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 0000000..4f02109
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c

@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+				     struct nft_data data[NFT_REG_MAX + 1],
+				     const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct nft_data *dest = &data[priv->dreg];
+	const struct net_bridge_port *p;
+
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+		if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+			goto err;
+		break;
+	case NFT_META_BRI_OIFNAME:
+		if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+			goto err;
+		break;
+	default:
+		goto out;
+	}
+
+	strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+	return;
+out:
+	return nft_meta_get_eval(expr, data, pkt);
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr,
+				    const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+	case NFT_META_BRI_OIFNAME:
+		break;
+	default:
+		return nft_meta_get_init(ctx, expr, tb);
+	}
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_bridge_get_eval,
+	.init		= nft_meta_bridge_get_init,
+	.dump		= nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_set_eval,
+	.init		= nft_meta_set_init,
+	.dump		= nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+			   const struct nlattr * const tb[])
+{
+	if (tb[NFTA_META_KEY] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG])
+		return &nft_meta_bridge_get_ops;
+
+	if (tb[NFTA_META_SREG])
+		return &nft_meta_bridge_set_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+	.family         = NFPROTO_BRIDGE,
+	.name           = "meta",
+	.select_ops     = &nft_meta_bridge_select_ops,
+	.policy         = nft_meta_policy,
+	.maxattr        = NFTA_META_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+	return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");

diff --git a/net/can/af_can.c b/net/can/af_can.c
index a27f8aa..ce82337 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c

@@ -338,6 +338,29 @@
 }
 
 /**
+ * effhash - hash function for 29 bit CAN identifier reduction
+ * @can_id: 29 bit CAN identifier
+ *
+ * Description:
+ *  To reduce the linear traversal in one linked list of _single_ EFF CAN
+ *  frame subscriptions the 29 bit identifier is mapped to 10 bits.
+ *  (see CAN_EFF_RCV_HASH_BITS definition)
+ *
+ * Return:
+ *  Hash value from 0x000 - 0x3FF ( enforced by CAN_EFF_RCV_HASH_BITS mask )
+ */
+static unsigned int effhash(canid_t can_id)
+{
+	unsigned int hash;
+
+	hash = can_id;
+	hash ^= can_id >> CAN_EFF_RCV_HASH_BITS;
+	hash ^= can_id >> (2 * CAN_EFF_RCV_HASH_BITS);
+
+	return hash & ((1 << CAN_EFF_RCV_HASH_BITS) - 1);
+}
+
+/**
  * find_rcv_list - determine optimal filterlist inside device filter struct
  * @can_id: pointer to CAN identifier of a given can_filter
  * @mask: pointer to CAN mask of a given can_filter
@@ -400,10 +423,8 @@
 	    !(*can_id & CAN_RTR_FLAG)) {
 
 		if (*can_id & CAN_EFF_FLAG) {
-			if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) {
-				/* RFC: a future use-case for hash-tables? */
-				return &d->rx[RX_EFF];
-			}
+			if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS))
+				return &d->rx_eff[effhash(*can_id)];
 		} else {
 			if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
 				return &d->rx_sff[*can_id];
@@ -632,7 +653,7 @@
 		return matches;
 
 	if (can_id & CAN_EFF_FLAG) {
-		hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) {
+		hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) {
 			if (r->can_id == can_id) {
 				deliver(skb, r);
 				matches++;

diff --git a/net/can/af_can.h b/net/can/af_can.h
index 6de58b4..fca0fe9 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h

@@ -59,12 +59,17 @@
 	char *ident;
 };
 
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
+#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
+#define CAN_EFF_RCV_HASH_BITS 10
+#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
+
+enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
 
 /* per device receive filters linked at dev->ml_priv */
 struct dev_rcv_lists {
 	struct hlist_head rx[RX_MAX];
-	struct hlist_head rx_sff[0x800];
+	struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
+	struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
 	int remove_on_zero_entries;
 	int entries;
 };

diff --git a/net/can/proc.c b/net/can/proc.c
index b543470..1a19b98 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c

@@ -80,7 +80,6 @@
 	[RX_ALL] = "rx_all",
 	[RX_FIL] = "rx_fil",
 	[RX_INV] = "rx_inv",
-	[RX_EFF] = "rx_eff",
 };
 
 /*
@@ -389,25 +388,26 @@
 	.release	= single_release,
 };
 
-static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m,
-						 struct net_device *dev,
-						 struct dev_rcv_lists *d)
+static inline void can_rcvlist_proc_show_array(struct seq_file *m,
+					       struct net_device *dev,
+					       struct hlist_head *rcv_array,
+					       unsigned int rcv_array_sz)
 {
-	int i;
+	unsigned int i;
 	int all_empty = 1;
 
 	/* check whether at least one list is non-empty */
-	for (i = 0; i < 0x800; i++)
-		if (!hlist_empty(&d->rx_sff[i])) {
+	for (i = 0; i < rcv_array_sz; i++)
+		if (!hlist_empty(&rcv_array[i])) {
 			all_empty = 0;
 			break;
 		}
 
 	if (!all_empty) {
 		can_print_recv_banner(m);
-		for (i = 0; i < 0x800; i++) {
-			if (!hlist_empty(&d->rx_sff[i]))
-				can_print_rcvlist(m, &d->rx_sff[i], dev);
+		for (i = 0; i < rcv_array_sz; i++) {
+			if (!hlist_empty(&rcv_array[i]))
+				can_print_rcvlist(m, &rcv_array[i], dev);
 		}
 	} else
 		seq_printf(m, "  (%s: no entry)\n", DNAME(dev));
@@ -425,12 +425,15 @@
 
 	/* sff receive list for 'all' CAN devices (dev == NULL) */
 	d = &can_rx_alldev_list;
-	can_rcvlist_sff_proc_show_one(m, NULL, d);
+	can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
 
 	/* sff receive list for registered CAN devices */
 	for_each_netdev_rcu(&init_net, dev) {
-		if (dev->type == ARPHRD_CAN && dev->ml_priv)
-			can_rcvlist_sff_proc_show_one(m, dev, dev->ml_priv);
+		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+			d = dev->ml_priv;
+			can_rcvlist_proc_show_array(m, dev, d->rx_sff,
+						    ARRAY_SIZE(d->rx_sff));
+		}
 	}
 
 	rcu_read_unlock();
@@ -452,6 +455,49 @@
 	.release	= single_release,
 };
 
+
+static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
+{
+	struct net_device *dev;
+	struct dev_rcv_lists *d;
+
+	/* RX_EFF */
+	seq_puts(m, "\nreceive list 'rx_eff':\n");
+
+	rcu_read_lock();
+
+	/* eff receive list for 'all' CAN devices (dev == NULL) */
+	d = &can_rx_alldev_list;
+	can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
+
+	/* eff receive list for registered CAN devices */
+	for_each_netdev_rcu(&init_net, dev) {
+		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+			d = dev->ml_priv;
+			can_rcvlist_proc_show_array(m, dev, d->rx_eff,
+						    ARRAY_SIZE(d->rx_eff));
+		}
+	}
+
+	rcu_read_unlock();
+
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, can_rcvlist_eff_proc_show, NULL);
+}
+
+static const struct file_operations can_rcvlist_eff_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= can_rcvlist_eff_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * proc utility functions
  */
@@ -491,8 +537,8 @@
 					   &can_rcvlist_proc_fops, (void *)RX_FIL);
 	pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir,
 					   &can_rcvlist_proc_fops, (void *)RX_INV);
-	pde_rcvlist_eff = proc_create_data(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
-					   &can_rcvlist_proc_fops, (void *)RX_EFF);
+	pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
+				      &can_rcvlist_eff_proc_fops);
 	pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir,
 				      &can_rcvlist_sff_proc_fops);
 }

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721..1675021 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -72,6 +72,8 @@
 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
 	case CEPH_MSG_STATFS: return "statfs";
 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 10421a4..d1a62c6 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -126,9 +126,13 @@
 		req = rb_entry(rp, struct ceph_mon_generic_request, node);
 		op = le16_to_cpu(req->request->hdr.type);
 		if (op == CEPH_MSG_STATFS)
-			seq_printf(s, "%lld statfs\n", req->tid);
+			seq_printf(s, "%llu statfs\n", req->tid);
+		else if (op == CEPH_MSG_POOLOP)
+			seq_printf(s, "%llu poolop\n", req->tid);
+		else if (op == CEPH_MSG_MON_GET_VERSION)
+			seq_printf(s, "%llu mon_get_version", req->tid);
 		else
-			seq_printf(s, "%lld unknown\n", req->tid);
+			seq_printf(s, "%llu unknown\n", req->tid);
 	}
 
 	mutex_unlock(&monc->mutex);

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef3..067d3af 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -296,6 +296,33 @@
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+			  unsigned long timeout)
+{
+	unsigned long started = jiffies;
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	while (monc->have_osdmap < epoch) {
+		mutex_unlock(&monc->mutex);
+
+		if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+			return -ETIMEDOUT;
+
+		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+					 monc->have_osdmap >= epoch, timeout);
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&monc->mutex);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
  *
@@ -477,14 +504,13 @@
 	return m;
 }
 
-static int do_generic_request(struct ceph_mon_client *monc,
-			      struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+				struct ceph_mon_generic_request *req)
 {
 	int err;
 
 	/* register request */
-	mutex_lock(&monc->mutex);
-	req->tid = ++monc->last_tid;
+	req->tid = tid != 0 ? tid : ++monc->last_tid;
 	req->request->hdr.tid = cpu_to_le64(req->tid);
 	__insert_generic_request(monc, req);
 	monc->num_generic_requests++;
@@ -496,13 +522,24 @@
 	mutex_lock(&monc->mutex);
 	rb_erase(&req->node, &monc->generic_request_tree);
 	monc->num_generic_requests--;
-	mutex_unlock(&monc->mutex);
 
 	if (!err)
 		err = req->result;
 	return err;
 }
 
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	mutex_lock(&monc->mutex);
+	err = __do_generic_request(monc, 0, req);
+	mutex_unlock(&monc->mutex);
+
+	return err;
+}
+
 /*
  * statfs
  */
@@ -579,6 +616,96 @@
 }
 EXPORT_SYMBOL(ceph_monc_do_statfs);
 
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+				     struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front_alloc_len;
+	u64 handle;
+
+	dout("%s %p tid %llu\n", __func__, msg, tid);
+
+	ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+	handle = ceph_decode_64(&p);
+	if (tid != 0 && tid != handle)
+		goto bad;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, handle);
+	if (req) {
+		*(u64 *)req->buf = ceph_decode_64(&p);
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+
+	return;
+bad:
+	pr_err("corrupt mon_get_version reply\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+			     u64 *newest)
+{
+	struct ceph_mon_generic_request *req;
+	void *p, *end;
+	u64 tid;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = newest;
+	req->buf_len = sizeof(*newest);
+	init_completion(&req->completion);
+
+	req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+				    sizeof(u64) + sizeof(u32) + strlen(what),
+				    GFP_NOFS, true);
+	if (!req->request) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+				  GFP_NOFS, true);
+	if (!req->reply) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	p = req->request->front.iov_base;
+	end = p + req->request->front_alloc_len;
+
+	/* fill out request */
+	mutex_lock(&monc->mutex);
+	tid = ++monc->last_tid;
+	ceph_encode_64(&p, tid); /* handle */
+	ceph_encode_string(&p, end, what, strlen(what));
+
+	err = __do_generic_request(monc, tid, req);
+
+	mutex_unlock(&monc->mutex);
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
 /*
  * pool ops
  */
@@ -981,6 +1108,10 @@
 		handle_statfs_reply(monc, msg);
 		break;
 
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		handle_get_version_reply(monc, msg);
+		break;
+
 	case CEPH_MSG_POOLOP_REPLY:
 		handle_poolop_reply(monc, msg);
 		break;
@@ -1029,6 +1160,15 @@
 	case CEPH_MSG_AUTH_REPLY:
 		m = ceph_msg_get(monc->m_auth_reply);
 		break;
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		if (le64_to_cpu(hdr->tid) != 0)
+			return get_generic_reply(con, hdr, skip);
+
+		/*
+		 * Older OSDs don't set reply tid even if the orignal
+		 * request had a non-zero tid.  Workaround this weirdness
+		 * by falling through to the allocate case.
+		 */
 	case CEPH_MSG_MON_MAP:
 	case CEPH_MSG_MDS_MAP:
 	case CEPH_MSG_OSD_MAP:

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b0dfce7..05be0c1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -2491,7 +2491,7 @@
  * Call all pending notify callbacks - for use after a watch is
  * unregistered, to make sure no more callbacks for it will be invoked
  */
-extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
 {
 	flush_workqueue(osdc->notify_wq);
 }

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a224..5550130 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c

@@ -53,7 +53,10 @@
 			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
 	}
-	kfree(pages);
+	if (is_vmalloc_addr(pages))
+		vfree(pages);
+	else
+		kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
 
@@ -165,36 +168,6 @@
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
 /*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
-					 void __user *data,
-					 loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		if (po) {
-			po += l - bad;
-			if (po == PAGE_CACHE_SIZE)
-				po = 0;
-		}
-		i++;
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
-/*
  * Zero an extent within a page vector.  Offset is relative to the
  * start of the first page.
  */

diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925..71093d9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile

@@ -9,7 +9,7 @@
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o
+			sock_diag.o dev_ioctl.o tso.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o

diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7b..6b1c04c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c

@@ -739,11 +739,15 @@
 	__sum16 sum;
 
 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
-	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
-			netdev_rx_csum_fault(skb->dev);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
+	    !skb->csum_complete_sw)
+		netdev_rx_csum_fault(skb->dev);
+
+	/* Save checksum complete for later use */
+	skb->csum = sum;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb->csum_complete_sw = 1;
+
 	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete_head);

diff --git a/net/core/dev.c b/net/core/dev.c
index 8908a68..30eedf6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1661,6 +1661,29 @@
 }
 EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+			atomic_long_inc(&dev->rx_dropped);
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+	}
+
+	if (unlikely(!is_skb_forwardable(dev, skb))) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb_scrub_packet(skb, true);
+	skb->protocol = eth_type_trans(skb, dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1681,24 +1704,7 @@
  */
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-			atomic_long_inc(&dev->rx_dropped);
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-	}
-
-	if (unlikely(!is_skb_forwardable(dev, skb))) {
-		atomic_long_inc(&dev->rx_dropped);
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	skb_scrub_packet(skb, true);
-	skb->protocol = eth_type_trans(skb, dev);
-
-	return netif_rx_internal(skb);
+	return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
@@ -2507,13 +2513,39 @@
 	return 0;
 }
 
+/* If MPLS offload request, verify we are testing hardware MPLS features
+ * instead of standard features for the netdev.
+ */
+#ifdef CONFIG_NET_MPLS_GSO
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+					   netdev_features_t features,
+					   __be16 type)
+{
+	if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+		features &= skb->dev->mpls_features;
+
+	return features;
+}
+#else
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+					   netdev_features_t features,
+					   __be16 type)
+{
+	return features;
+}
+#endif
+
 static netdev_features_t harmonize_features(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	int tmp;
+	__be16 type;
+
+	type = skb_network_protocol(skb, &tmp);
+	features = net_mpls_features(skb, features, type);
 
 	if (skb->ip_summed != CHECKSUM_NONE &&
-	    !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
+	    !can_checksum_protocol(features, type)) {
 		features &= ~NETIF_F_ALL_CSUM;
 	} else if (illegal_highdma(skb->dev, skb)) {
 		features &= ~NETIF_F_SG;
@@ -5689,10 +5721,6 @@
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
-		if (!dev->rtnl_link_ops ||
-		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
-
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
@@ -5702,6 +5730,10 @@
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
 		/* Notifier chain MUST detach us all upper devices. */
 		WARN_ON(netdev_has_any_upper_dev(dev));
 
@@ -5927,10 +5959,7 @@
 
 static void netif_free_tx_queues(struct net_device *dev)
 {
-	if (is_vmalloc_addr(dev->_tx))
-		vfree(dev->_tx);
-	else
-		kfree(dev->_tx);
+	kvfree(dev->_tx);
 }
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -6404,10 +6433,7 @@
 {
 	char *addr = (char *)dev - dev->padded;
 
-	if (is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		kfree(addr);
+	kvfree(addr);
 }
 
 /**
@@ -6512,11 +6538,6 @@
 
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
-	netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
-	kfree(dev->_rx);
-#endif
-
 free_dev:
 	netdev_freemem(dev);
 	return NULL;
@@ -6613,6 +6634,9 @@
 /**
  *	unregister_netdevice_many - unregister many devices
  *	@head: list of devices
+ *
+ *  Note: As most callers use a stack allocated list_head,
+ *  we force a list_del() to make sure stack wont be corrupted later.
  */
 void unregister_netdevice_many(struct list_head *head)
 {
@@ -6622,6 +6646,7 @@
 		rollback_registered_many(head);
 		list_for_each_entry(dev, head, unreg_list)
 			net_set_todo(dev);
+		list_del(head);
 	}
 }
 EXPORT_SYMBOL(unregister_netdevice_many);
@@ -7077,7 +7102,6 @@
 		}
 	}
 	unregister_netdevice_many(&dev_kill_list);
-	list_del(&dev_kill_list);
 	rtnl_unlock();
 }
 

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d579..b6b2306 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c

@@ -225,6 +225,91 @@
 }
 EXPORT_SYMBOL(__hw_addr_unsync);
 
+/**
+ *  __hw_addr_sync_dev - Synchonize device's multicast list
+ *  @list: address list to syncronize
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  This funciton is intended to be called from the ndo_set_rx_mode
+ *  function of devices that require explicit address add/remove
+ *  notifications.  The unsync function may be NULL in which case
+ *  the addresses requiring removal will simply be removed without
+ *  any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+	int err;
+
+	/* first go through and flush out any stale entries */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt || ha->refcount != 1)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+
+	/* go through and sync new entries to the list */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (ha->sync_cnt)
+			continue;
+
+		err = sync(dev, ha->addr);
+		if (err)
+			return err;
+
+		ha->sync_cnt++;
+		ha->refcount++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ *  __hw_addr_unsync_dev - Remove synchonized addresses from device
+ *  @list: address list to remove syncronized addresses from
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ *  This function is intended to be called from the ndo_stop or ndo_open
+ *  functions on devices that require explicit address add/remove
+ *  notifications.  If the unsync function pointer is NULL then this function
+ *  can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e..17cb912 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c

@@ -557,6 +557,23 @@
 	return ret;
 }
 
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+					struct ethtool_rxnfc *rx_rings,
+					u32 size)
+{
+	int i;
+
+	if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+		return -EFAULT;
+
+	/* Validate ring indices */
+	for (i = 0; i < size; i++)
+		if (indir[i] >= rx_rings->data)
+			return -EINVAL;
+
+	return 0;
+}
+
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
@@ -565,7 +582,7 @@
 	int ret;
 
 	if (!dev->ethtool_ops->get_rxfh_indir_size ||
-	    !dev->ethtool_ops->get_rxfh_indir)
+	    !dev->ethtool_ops->get_rxfh)
 		return -EOPNOTSUPP;
 	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
 	if (dev_size == 0)
@@ -591,7 +608,7 @@
 	if (!indir)
 		return -ENOMEM;
 
-	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
 	if (ret)
 		goto out;
 
@@ -613,8 +630,9 @@
 	u32 *indir;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
+	u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
 
-	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+	if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
 	    !ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
@@ -643,31 +661,187 @@
 		for (i = 0; i < dev_size; i++)
 			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
 	} else {
-		if (copy_from_user(indir,
-				  useraddr +
-				  offsetof(struct ethtool_rxfh_indir,
-					   ring_index[0]),
-				  dev_size * sizeof(indir[0]))) {
-			ret = -EFAULT;
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + ringidx_offset,
+						  &rx_rings,
+						  dev_size);
+		if (ret)
 			goto out;
-		}
-
-		/* Validate ring indices */
-		for (i = 0; i < dev_size; i++) {
-			if (indir[i] >= rx_rings.data) {
-				ret = -EINVAL;
-				goto out;
-			}
-		}
 	}
 
-	ret = ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh(dev, indir, NULL);
 
 out:
 	kfree(indir);
 	return ret;
 }
 
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u32 user_indir_size, user_key_size;
+	u32 dev_indir_size = 0, dev_key_size = 0;
+	struct ethtool_rxfh rxfh;
+	u32 total_size;
+	u32 indir_bytes;
+	u32 *indir = NULL;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+
+	if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+	      dev->ethtool_ops->get_rxfh_key_size) ||
+	      !dev->ethtool_ops->get_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+	if (ops->get_rxfh_key_size)
+		dev_key_size = ops->get_rxfh_key_size(dev);
+
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+		return -EFAULT;
+	user_indir_size = rxfh.indir_size;
+	user_key_size = rxfh.key_size;
+
+	/* Check that reserved fields are 0 for now */
+	if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+		return -EINVAL;
+
+	rxfh.indir_size = dev_indir_size;
+	rxfh.key_size = dev_key_size;
+	if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size and key size.  Otherwise, if the User size is
+	 * not equal to device table size or key size it's an error.
+	 */
+	if (!user_indir_size && !user_key_size)
+		return 0;
+
+	if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+	    (user_key_size && (user_key_size != dev_key_size)))
+		return -EINVAL;
+
+	indir_bytes = user_indir_size * sizeof(indir[0]);
+	total_size = indir_bytes + user_key_size;
+	rss_config = kzalloc(total_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	if (user_indir_size)
+		indir = (u32 *)rss_config;
+
+	if (user_key_size)
+		hkey = rss_config + indir_bytes;
+
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+	if (!ret) {
+		if (copy_to_user(useraddr +
+				 offsetof(struct ethtool_rxfh, rss_config[0]),
+				 rss_config, total_size))
+			ret = -EFAULT;
+	}
+
+	kfree(rss_config);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rxnfc rx_rings;
+	struct ethtool_rxfh rxfh;
+	u32 dev_indir_size = 0, dev_key_size = 0, i;
+	u32 *indir = NULL, indir_bytes = 0;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+	    !ops->get_rxnfc || !ops->set_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+	if (ops->get_rxfh_key_size)
+		dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+		return -EFAULT;
+
+	/* Check that reserved fields are 0 for now */
+	if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+		return -EINVAL;
+
+	/* If either indir or hash key is valid, proceed further.
+	 * It is not valid to request that both be unchanged.
+	 */
+	if ((rxfh.indir_size &&
+	     rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
+	     rxfh.indir_size != dev_indir_size) ||
+	    (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
+	    (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+	     rxfh.key_size == 0))
+		return -EINVAL;
+
+	if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+		indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+	rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+
+	/* rxfh.indir_size == 0 means reset the indir table to default.
+	 * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
+	 */
+	if (rxfh.indir_size &&
+	    rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
+		indir = (u32 *)rss_config;
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + rss_cfg_offset,
+						  &rx_rings,
+						  rxfh.indir_size);
+		if (ret)
+			goto out;
+	} else if (rxfh.indir_size == 0) {
+		indir = (u32 *)rss_config;
+		for (i = 0; i < dev_indir_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	}
+
+	if (rxfh.key_size) {
+		hkey = rss_config + indir_bytes;
+		if (copy_from_user(hkey,
+				   useraddr + rss_cfg_offset + indir_bytes,
+				   rxfh.key_size)) {
+			ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	ret = ops->set_rxfh(dev, indir, hkey);
+
+out:
+	kfree(rss_config);
+	return ret;
+}
+
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -1491,6 +1665,7 @@
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
 	case ETHTOOL_GRXFHINDIR:
+	case ETHTOOL_GRSSH:
 	case ETHTOOL_GFEATURES:
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1803,12 @@
 	case ETHTOOL_SRXFHINDIR:
 		rc = ethtool_set_rxfh_indir(dev, useraddr);
 		break;
+	case ETHTOOL_GRSSH:
+		rc = ethtool_get_rxfh(dev, useraddr);
+		break;
+	case ETHTOOL_SRSSH:
+		rc = ethtool_set_rxfh(dev, useraddr);
+		break;
 	case ETHTOOL_GFEATURES:
 		rc = ethtool_get_features(dev, useraddr);
 		break;

diff --git a/net/core/filter.c b/net/core/filter.c
index 4aec7b9..735fad8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -45,6 +45,27 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 
+/* Registers */
+#define BPF_R0	regs[BPF_REG_0]
+#define BPF_R1	regs[BPF_REG_1]
+#define BPF_R2	regs[BPF_REG_2]
+#define BPF_R3	regs[BPF_REG_3]
+#define BPF_R4	regs[BPF_REG_4]
+#define BPF_R5	regs[BPF_REG_5]
+#define BPF_R6	regs[BPF_REG_6]
+#define BPF_R7	regs[BPF_REG_7]
+#define BPF_R8	regs[BPF_REG_8]
+#define BPF_R9	regs[BPF_REG_9]
+#define BPF_R10	regs[BPF_REG_10]
+
+/* Named registers */
+#define DST	regs[insn->dst_reg]
+#define SRC	regs[insn->src_reg]
+#define FP	regs[BPF_REG_FP]
+#define ARG1	regs[BPF_REG_ARG1]
+#define CTX	regs[BPF_REG_CTX]
+#define IMM	insn->imm
+
 /* No hurry in this branch
  *
  * Exported for the bpf jit load helper.
@@ -57,9 +78,9 @@
 		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
 	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
 		return ptr;
+
 	return NULL;
 }
 
@@ -68,6 +89,7 @@
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
+
 	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
@@ -122,13 +144,6 @@
 	return 0;
 }
 
-/* Register mappings for user programs. */
-#define A_REG		0
-#define X_REG		7
-#define TMP_REG		8
-#define ARG2_REG	2
-#define ARG3_REG	3
-
 /**
  *	__sk_run_filter - run a filter on a given context
  *	@ctx: buffer to run the filter on
@@ -138,447 +153,442 @@
  * keep, 0 for none. @ctx is the data we are operating on, @insn is the
  * array of filter instructions.
  */
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
-	void *ptr;
-	int off;
-
-#define K  insn->imm
-#define A  regs[insn->a_reg]
-#define X  regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT	 ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
-#define DL(A, B, C)	[A|B|C] = &&A##_##B##_##C
-		DL(BPF_ALU, BPF_ADD, BPF_X),
-		DL(BPF_ALU, BPF_ADD, BPF_K),
-		DL(BPF_ALU, BPF_SUB, BPF_X),
-		DL(BPF_ALU, BPF_SUB, BPF_K),
-		DL(BPF_ALU, BPF_AND, BPF_X),
-		DL(BPF_ALU, BPF_AND, BPF_K),
-		DL(BPF_ALU, BPF_OR, BPF_X),
-		DL(BPF_ALU, BPF_OR, BPF_K),
-		DL(BPF_ALU, BPF_LSH, BPF_X),
-		DL(BPF_ALU, BPF_LSH, BPF_K),
-		DL(BPF_ALU, BPF_RSH, BPF_X),
-		DL(BPF_ALU, BPF_RSH, BPF_K),
-		DL(BPF_ALU, BPF_XOR, BPF_X),
-		DL(BPF_ALU, BPF_XOR, BPF_K),
-		DL(BPF_ALU, BPF_MUL, BPF_X),
-		DL(BPF_ALU, BPF_MUL, BPF_K),
-		DL(BPF_ALU, BPF_MOV, BPF_X),
-		DL(BPF_ALU, BPF_MOV, BPF_K),
-		DL(BPF_ALU, BPF_DIV, BPF_X),
-		DL(BPF_ALU, BPF_DIV, BPF_K),
-		DL(BPF_ALU, BPF_MOD, BPF_X),
-		DL(BPF_ALU, BPF_MOD, BPF_K),
-		DL(BPF_ALU, BPF_NEG, 0),
-		DL(BPF_ALU, BPF_END, BPF_TO_BE),
-		DL(BPF_ALU, BPF_END, BPF_TO_LE),
-		DL(BPF_ALU64, BPF_ADD, BPF_X),
-		DL(BPF_ALU64, BPF_ADD, BPF_K),
-		DL(BPF_ALU64, BPF_SUB, BPF_X),
-		DL(BPF_ALU64, BPF_SUB, BPF_K),
-		DL(BPF_ALU64, BPF_AND, BPF_X),
-		DL(BPF_ALU64, BPF_AND, BPF_K),
-		DL(BPF_ALU64, BPF_OR, BPF_X),
-		DL(BPF_ALU64, BPF_OR, BPF_K),
-		DL(BPF_ALU64, BPF_LSH, BPF_X),
-		DL(BPF_ALU64, BPF_LSH, BPF_K),
-		DL(BPF_ALU64, BPF_RSH, BPF_X),
-		DL(BPF_ALU64, BPF_RSH, BPF_K),
-		DL(BPF_ALU64, BPF_XOR, BPF_X),
-		DL(BPF_ALU64, BPF_XOR, BPF_K),
-		DL(BPF_ALU64, BPF_MUL, BPF_X),
-		DL(BPF_ALU64, BPF_MUL, BPF_K),
-		DL(BPF_ALU64, BPF_MOV, BPF_X),
-		DL(BPF_ALU64, BPF_MOV, BPF_K),
-		DL(BPF_ALU64, BPF_ARSH, BPF_X),
-		DL(BPF_ALU64, BPF_ARSH, BPF_K),
-		DL(BPF_ALU64, BPF_DIV, BPF_X),
-		DL(BPF_ALU64, BPF_DIV, BPF_K),
-		DL(BPF_ALU64, BPF_MOD, BPF_X),
-		DL(BPF_ALU64, BPF_MOD, BPF_K),
-		DL(BPF_ALU64, BPF_NEG, 0),
-		DL(BPF_JMP, BPF_CALL, 0),
-		DL(BPF_JMP, BPF_JA, 0),
-		DL(BPF_JMP, BPF_JEQ, BPF_X),
-		DL(BPF_JMP, BPF_JEQ, BPF_K),
-		DL(BPF_JMP, BPF_JNE, BPF_X),
-		DL(BPF_JMP, BPF_JNE, BPF_K),
-		DL(BPF_JMP, BPF_JGT, BPF_X),
-		DL(BPF_JMP, BPF_JGT, BPF_K),
-		DL(BPF_JMP, BPF_JGE, BPF_X),
-		DL(BPF_JMP, BPF_JGE, BPF_K),
-		DL(BPF_JMP, BPF_JSGT, BPF_X),
-		DL(BPF_JMP, BPF_JSGT, BPF_K),
-		DL(BPF_JMP, BPF_JSGE, BPF_X),
-		DL(BPF_JMP, BPF_JSGE, BPF_K),
-		DL(BPF_JMP, BPF_JSET, BPF_X),
-		DL(BPF_JMP, BPF_JSET, BPF_K),
-		DL(BPF_JMP, BPF_EXIT, 0),
-		DL(BPF_STX, BPF_MEM, BPF_B),
-		DL(BPF_STX, BPF_MEM, BPF_H),
-		DL(BPF_STX, BPF_MEM, BPF_W),
-		DL(BPF_STX, BPF_MEM, BPF_DW),
-		DL(BPF_STX, BPF_XADD, BPF_W),
-		DL(BPF_STX, BPF_XADD, BPF_DW),
-		DL(BPF_ST, BPF_MEM, BPF_B),
-		DL(BPF_ST, BPF_MEM, BPF_H),
-		DL(BPF_ST, BPF_MEM, BPF_W),
-		DL(BPF_ST, BPF_MEM, BPF_DW),
-		DL(BPF_LDX, BPF_MEM, BPF_B),
-		DL(BPF_LDX, BPF_MEM, BPF_H),
-		DL(BPF_LDX, BPF_MEM, BPF_W),
-		DL(BPF_LDX, BPF_MEM, BPF_DW),
-		DL(BPF_LD, BPF_ABS, BPF_W),
-		DL(BPF_LD, BPF_ABS, BPF_H),
-		DL(BPF_LD, BPF_ABS, BPF_B),
-		DL(BPF_LD, BPF_IND, BPF_W),
-		DL(BPF_LD, BPF_IND, BPF_H),
-		DL(BPF_LD, BPF_IND, BPF_B),
-#undef DL
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+		[BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+		[BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+		[BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+		[BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+		[BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+		[BPF_ALU | BPF_OR | BPF_X]  = &&ALU_OR_X,
+		[BPF_ALU | BPF_OR | BPF_K]  = &&ALU_OR_K,
+		[BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+		[BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+		[BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+		[BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+		[BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+		[BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+		[BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+		[BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+		[BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+		[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+		[BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+		[BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+		[BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+		[BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+		[BPF_ALU | BPF_NEG] = &&ALU_NEG,
+		[BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+		[BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+		/* 64 bit ALU operations */
+		[BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+		[BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+		[BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+		[BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+		[BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+		[BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+		[BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+		[BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+		[BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+		[BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+		[BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+		[BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+		[BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+		[BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+		[BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+		[BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+		[BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+		[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+		[BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+		[BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+		[BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+		[BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+		[BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+		[BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+		/* Call instruction */
+		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+		/* Jumps */
+		[BPF_JMP | BPF_JA] = &&JMP_JA,
+		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+		[BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+		[BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+		[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+		[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+		[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+		[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+		[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+		[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+		[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+		[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+		[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+		[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+		[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+		/* Program return */
+		[BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+		/* Store instructions */
+		[BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+		[BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+		[BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+		[BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+		[BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+		[BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+		[BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+		[BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+		[BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+		[BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+		/* Load instructions */
+		[BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+		[BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+		[BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+		[BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+		[BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+		[BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+		[BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
 	};
+	void *ptr;
+	int off;
 
-	regs[FP_REG]  = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
-	regs[ARG1_REG] = (u64) (unsigned long) ctx;
-	regs[A_REG] = 0;
-	regs[X_REG] = 0;
+#define CONT	 ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
+
+	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+	ARG1 = (u64) (unsigned long) ctx;
+
+	/* Registers used in classic BPF programs need to be reset first. */
+	regs[BPF_REG_A] = 0;
+	regs[BPF_REG_X] = 0;
 
 select_insn:
 	goto *jumptable[insn->code];
 
 	/* ALU */
 #define ALU(OPCODE, OP)			\
-	BPF_ALU64_##OPCODE##_BPF_X:	\
-		A = A OP X;		\
+	ALU64_##OPCODE##_X:		\
+		DST = DST OP SRC;	\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_X:	\
-		A = (u32) A OP (u32) X;	\
+	ALU_##OPCODE##_X:		\
+		DST = (u32) DST OP (u32) SRC;	\
 		CONT;			\
-	BPF_ALU64_##OPCODE##_BPF_K:	\
-		A = A OP K;		\
+	ALU64_##OPCODE##_K:		\
+		DST = DST OP IMM;		\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_K:	\
-		A = (u32) A OP (u32) K;	\
+	ALU_##OPCODE##_K:		\
+		DST = (u32) DST OP (u32) IMM;	\
 		CONT;
 
-	ALU(BPF_ADD,  +)
-	ALU(BPF_SUB,  -)
-	ALU(BPF_AND,  &)
-	ALU(BPF_OR,   |)
-	ALU(BPF_LSH, <<)
-	ALU(BPF_RSH, >>)
-	ALU(BPF_XOR,  ^)
-	ALU(BPF_MUL,  *)
+	ALU(ADD,  +)
+	ALU(SUB,  -)
+	ALU(AND,  &)
+	ALU(OR,   |)
+	ALU(LSH, <<)
+	ALU(RSH, >>)
+	ALU(XOR,  ^)
+	ALU(MUL,  *)
 #undef ALU
-	BPF_ALU_BPF_NEG_0:
-		A = (u32) -A;
+	ALU_NEG:
+		DST = (u32) -DST;
 		CONT;
-	BPF_ALU64_BPF_NEG_0:
-		A = -A;
+	ALU64_NEG:
+		DST = -DST;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_X:
-		A = (u32) X;
+	ALU_MOV_X:
+		DST = (u32) SRC;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_K:
-		A = (u32) K;
+	ALU_MOV_K:
+		DST = (u32) IMM;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_X:
-		A = X;
+	ALU64_MOV_X:
+		DST = SRC;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_K:
-		A = K;
+	ALU64_MOV_K:
+		DST = IMM;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_X:
-		(*(s64 *) &A) >>= X;
+	ALU64_ARSH_X:
+		(*(s64 *) &DST) >>= SRC;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_K:
-		(*(s64 *) &A) >>= K;
+	ALU64_ARSH_K:
+		(*(s64 *) &DST) >>= IMM;
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_X:
-		if (unlikely(X == 0))
+	ALU64_MOD_X:
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = A;
-		A = do_div(tmp, X);
+		tmp = DST;
+		DST = do_div(tmp, SRC);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_X:
-		if (unlikely(X == 0))
+	ALU_MOD_X:
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) X);
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) SRC);
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_K:
-		tmp = A;
-		A = do_div(tmp, K);
+	ALU64_MOD_K:
+		tmp = DST;
+		DST = do_div(tmp, IMM);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_K:
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) K);
+	ALU_MOD_K:
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) IMM);
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_X:
-		if (unlikely(X == 0))
+	ALU64_DIV_X:
+		if (unlikely(SRC == 0))
 			return 0;
-		do_div(A, X);
+		do_div(DST, SRC);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_X:
-		if (unlikely(X == 0))
+	ALU_DIV_X:
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		do_div(tmp, (u32) X);
-		A = (u32) tmp;
+		tmp = (u32) DST;
+		do_div(tmp, (u32) SRC);
+		DST = (u32) tmp;
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_K:
-		do_div(A, K);
+	ALU64_DIV_K:
+		do_div(DST, IMM);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_K:
-		tmp = (u32) A;
-		do_div(tmp, (u32) K);
-		A = (u32) tmp;
+	ALU_DIV_K:
+		tmp = (u32) DST;
+		do_div(tmp, (u32) IMM);
+		DST = (u32) tmp;
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_BE:
-		switch (K) {
+	ALU_END_TO_BE:
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_be16(A);
+			DST = (__force u16) cpu_to_be16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_be32(A);
+			DST = (__force u32) cpu_to_be32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_be64(A);
+			DST = (__force u64) cpu_to_be64(DST);
 			break;
 		}
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_LE:
-		switch (K) {
+	ALU_END_TO_LE:
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_le16(A);
+			DST = (__force u16) cpu_to_le16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_le32(A);
+			DST = (__force u32) cpu_to_le32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_le64(A);
+			DST = (__force u64) cpu_to_le64(DST);
 			break;
 		}
 		CONT;
 
 	/* CALL */
-	BPF_JMP_BPF_CALL_0:
-		/* Function call scratches R1-R5 registers, preserves R6-R9,
-		 * and stores return value into R0.
+	JMP_CALL:
+		/* Function call scratches BPF_R1-BPF_R5 registers,
+		 * preserves BPF_R6-BPF_R9, and stores return value
+		 * into BPF_R0.
 		 */
-		R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
-						   regs[4], regs[5]);
+		BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
+						       BPF_R4, BPF_R5);
 		CONT;
 
 	/* JMP */
-	BPF_JMP_BPF_JA_0:
+	JMP_JA:
 		insn += insn->off;
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_X:
-		if (A == X) {
+	JMP_JEQ_X:
+		if (DST == SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_K:
-		if (A == K) {
+	JMP_JEQ_K:
+		if (DST == IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_X:
-		if (A != X) {
+	JMP_JNE_X:
+		if (DST != SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_K:
-		if (A != K) {
+	JMP_JNE_K:
+		if (DST != IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_X:
-		if (A > X) {
+	JMP_JGT_X:
+		if (DST > SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_K:
-		if (A > K) {
+	JMP_JGT_K:
+		if (DST > IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_X:
-		if (A >= X) {
+	JMP_JGE_X:
+		if (DST >= SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_K:
-		if (A >= K) {
+	JMP_JGE_K:
+		if (DST >= IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_X:
-		if (((s64)A) > ((s64)X)) {
+	JMP_JSGT_X:
+		if (((s64) DST) > ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_K:
-		if (((s64)A) > ((s64)K)) {
+	JMP_JSGT_K:
+		if (((s64) DST) > ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_X:
-		if (((s64)A) >= ((s64)X)) {
+	JMP_JSGE_X:
+		if (((s64) DST) >= ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_K:
-		if (((s64)A) >= ((s64)K)) {
+	JMP_JSGE_K:
+		if (((s64) DST) >= ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_X:
-		if (A & X) {
+	JMP_JSET_X:
+		if (DST & SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_K:
-		if (A & K) {
+	JMP_JSET_K:
+		if (DST & IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_EXIT_0:
-		return R0;
+	JMP_EXIT:
+		return BPF_R0;
 
 	/* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE)					\
-	BPF_STX_BPF_MEM_##SIZEOP:				\
-		*(SIZE *)(unsigned long) (A + insn->off) = X;	\
-		CONT;						\
-	BPF_ST_BPF_MEM_##SIZEOP:				\
-		*(SIZE *)(unsigned long) (A + insn->off) = K;	\
-		CONT;						\
-	BPF_LDX_BPF_MEM_##SIZEOP:				\
-		A = *(SIZE *)(unsigned long) (X + insn->off);	\
+#define LDST(SIZEOP, SIZE)						\
+	STX_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = SRC;	\
+		CONT;							\
+	ST_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = IMM;	\
+		CONT;							\
+	LDX_MEM_##SIZEOP:						\
+		DST = *(SIZE *)(unsigned long) (SRC + insn->off);	\
 		CONT;
 
-	LDST(BPF_B,   u8)
-	LDST(BPF_H,  u16)
-	LDST(BPF_W,  u32)
-	LDST(BPF_DW, u64)
+	LDST(B,   u8)
+	LDST(H,  u16)
+	LDST(W,  u32)
+	LDST(DW, u64)
 #undef LDST
-	BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
-		atomic_add((u32) X, (atomic_t *)(unsigned long)
-			   (A + insn->off));
+	STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+		atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+			   (DST + insn->off));
 		CONT;
-	BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
-		atomic64_add((u64) X, (atomic64_t *)(unsigned long)
-			     (A + insn->off));
+	STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+		atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+			     (DST + insn->off));
 		CONT;
-	BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+		off = IMM;
 load_word:
-		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
-		 * appearing in the programs where ctx == skb. All programs
-		 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
-		 * saves it in R6, internal BPF verifier will check that
-		 * R6 == ctx.
+		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
+		 * only appearing in the programs where ctx ==
+		 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
+		 * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
+		 * internal BPF verifier will check that BPF_R6 ==
+		 * ctx.
 		 *
-		 * BPF_ABS and BPF_IND are wrappers of function calls, so
-		 * they scratch R1-R5 registers, preserve R6-R9, and store
-		 * return value into R0.
+		 * BPF_ABS and BPF_IND are wrappers of function calls,
+		 * so they scratch BPF_R1-BPF_R5 registers, preserve
+		 * BPF_R6-BPF_R9, and store return value into BPF_R0.
 		 *
 		 * Implicit input:
-		 *   ctx
+		 *   ctx == skb == BPF_R6 == CTX
 		 *
 		 * Explicit input:
-		 *   X == any register
-		 *   K == 32-bit immediate
+		 *   SRC == any register
+		 *   IMM == 32-bit immediate
 		 *
 		 * Output:
-		 *   R0 - 8/16/32-bit skb data converted to cpu endianness
+		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
-		ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
+
+		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
-			R0 = get_unaligned_be32(ptr);
+			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
 		}
+
 		return 0;
-	BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+		off = IMM;
 load_half:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
+		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
 		if (likely(ptr != NULL)) {
-			R0 = get_unaligned_be16(ptr);
+			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
 		}
+
 		return 0;
-	BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
-		off = K;
+	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+		off = IMM;
 load_byte:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
+		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
 		if (likely(ptr != NULL)) {
-			R0 = *(u8 *)ptr;
+			BPF_R0 = *(u8 *)ptr;
 			CONT;
 		}
+
 		return 0;
-	BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_word;
-	BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_half;
-	BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
-		off = K + X;
+	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+		off = IMM + SRC;
 		goto load_byte;
 
 	default_label:
 		/* If we ever reach this, we have a bug somewhere. */
 		WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
 		return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
 }
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
 /* Helper to find the offset of pkt_type in sk_buff structure. We want
  * to make sure its still a 3bit field starting at a byte boundary;
  * taken from arch/x86/net/bpf_jit_comp.c.
  */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
 #define PKT_TYPE_MAX	7
+#endif
 static unsigned int pkt_type_offset(void)
 {
 	struct sk_buff skb_probe = { .pkt_type = ~0, };
@@ -594,16 +604,14 @@
 	return -1;
 }
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
-	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
-
-	return __skb_get_poff(skb);
+	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
 }
 
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
-	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
 	struct nlattr *nla;
 
 	if (skb_is_nonlinear(skb))
@@ -612,19 +620,19 @@
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
-	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
 	struct nlattr *nla;
 
 	if (skb_is_nonlinear(skb))
@@ -633,25 +641,31 @@
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = (struct nlattr *) &skb->data[A];
-	if (nla->nla_len > skb->len - A)
+	nla = (struct nlattr *) &skb->data[a];
+	if (nla->nla_len > skb->len - a)
 		return 0;
 
-	nla = nla_find_nested(nla, X);
+	nla = nla_find_nested(nla, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return raw_smp_processor_id();
 }
 
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+	return prandom_u32();
+}
+
 static bool convert_bpf_extensions(struct sock_filter *fp,
 				   struct sock_filter_int **insnp)
 {
@@ -661,119 +675,83 @@
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, protocol);
-		insn++;
-
+		/* A = *(u16 *) (CTX + offsetof(protocol)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, protocol));
 		/* A = ntohs(A) [emitting a nop or swap16] */
-		insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
-		insn->a_reg = A_REG;
-		insn->imm = 16;
+		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		insn->code = BPF_LDX | BPF_MEM | BPF_B;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = pkt_type_offset();
+		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+				    pkt_type_offset());
 		if (insn->off < 0)
 			return false;
 		insn++;
-
-		insn->code = BPF_ALU | BPF_AND | BPF_K;
-		insn->a_reg = A_REG;
-		insn->imm = PKT_TYPE_MAX;
+		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+		insn++;
+                *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
+#endif
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
 	case SKF_AD_OFF + SKF_AD_HATYPE:
-		if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
-			insn->code = BPF_LDX | BPF_MEM | BPF_DW;
-		else
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = TMP_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, dev);
-		insn++;
-
-		insn->code = BPF_JMP | BPF_JNE | BPF_K;
-		insn->a_reg = TMP_REG;
-		insn->imm = 0;
-		insn->off = 1;
-		insn++;
-
-		insn->code = BPF_JMP | BPF_EXIT;
-		insn++;
-
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
 
-		insn->a_reg = A_REG;
-		insn->x_reg = TMP_REG;
-
-		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->off = offsetof(struct net_device, ifindex);
-		} else {
-			insn->code = BPF_LDX | BPF_MEM | BPF_H;
-			insn->off = offsetof(struct net_device, type);
-		}
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+				      BPF_REG_TMP, BPF_REG_CTX,
+				      offsetof(struct sk_buff, dev));
+		/* if (tmp != 0) goto pc + 1 */
+		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+		*insn++ = BPF_EXIT_INSN();
+		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, ifindex));
+		else
+			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, type));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_MARK:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, mark);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, mark));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_RXHASH:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, hash);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, hash));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_QUEUE:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, queue_mapping);
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, queue_mapping));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
-		insn->off = offsetof(struct sk_buff, vlan_tci);
-		insn++;
-
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
+		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, vlan_tci));
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
-			insn->imm = ~VLAN_TAG_PRESENT;
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+					      ~VLAN_TAG_PRESENT);
 		} else {
-			insn->code = BPF_ALU | BPF_RSH | BPF_K;
-			insn->a_reg = A_REG;
-			insn->imm = 12;
-			insn++;
-
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
-			insn->imm = 1;
+			/* A >>= 12 */
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
+			/* A &= 1 */
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
 		}
 		break;
 
@@ -781,46 +759,36 @@
 	case SKF_AD_OFF + SKF_AD_NLATTR:
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 	case SKF_AD_OFF + SKF_AD_CPU:
-		/* arg1 = ctx */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG1_REG;
-		insn->x_reg = CTX_REG;
-		insn++;
-
+	case SKF_AD_OFF + SKF_AD_RANDOM:
+		/* arg1 = CTX */
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 		/* arg2 = A */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG2_REG;
-		insn->x_reg = A_REG;
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 		/* arg3 = X */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG3_REG;
-		insn->x_reg = X_REG;
-		insn++;
-
-		/* Emit call(ctx, arg2=A, arg3=X) */
-		insn->code = BPF_JMP | BPF_CALL;
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
+		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
-			insn->imm = __skb_get_pay_offset - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR:
-			insn->imm = __skb_get_nlattr - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
-			insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
 			break;
 		case SKF_AD_OFF + SKF_AD_CPU:
-			insn->imm = __get_raw_cpu_id - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+			break;
+		case SKF_AD_OFF + SKF_AD_RANDOM:
+			*insn = BPF_EMIT_CALL(__get_random_u32);
 			break;
 		}
 		break;
 
 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
-		insn->code = BPF_ALU | BPF_XOR | BPF_X;
-		insn->a_reg = A_REG;
-		insn->x_reg = X_REG;
+		/* A ^= X */
+		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
 		break;
 
 	default:
@@ -870,7 +838,7 @@
 	u8 bpf_src;
 
 	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
-	BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
 
 	if (len <= 0 || len >= BPF_MAXINSNS)
 		return -EINVAL;
@@ -885,11 +853,8 @@
 	new_insn = new_prog;
 	fp = prog;
 
-	if (new_insn) {
-		new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		new_insn->a_reg = CTX_REG;
-		new_insn->x_reg = ARG1_REG;
-	}
+	if (new_insn)
+		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
 	new_insn++;
 
 	for (i = 0; i < len; fp++, i++) {
@@ -937,17 +902,16 @@
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
-			insn->code = fp->code;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
-			insn->imm = fp->k;
+			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 
-		/* Jump opcodes map as-is, but offsets need adjustment. */
-		case BPF_JMP | BPF_JA:
-			target = i + fp->k + 1;
-			insn->code = fp->code;
-#define EMIT_JMP							\
+		/* Jump transformation cannot use BPF block macros
+		 * everywhere as offset calculation and target updates
+		 * require a bit more work than the rest, i.e. jump
+		 * opcodes map as-is, but offsets need adjustment.
+		 */
+
+#define BPF_EMIT_JMP							\
 	do {								\
 		if (target >= len || target < 0)			\
 			goto err;					\
@@ -956,7 +920,10 @@
 		insn->off -= insn - tmp_insns;				\
 	} while (0)
 
-			EMIT_JMP;
+		case BPF_JMP | BPF_JA:
+			target = i + fp->k + 1;
+			insn->code = fp->code;
+			BPF_EMIT_JMP;
 			break;
 
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -972,17 +939,14 @@
 				 * immediate into tmp register and use it
 				 * in compare insn.
 				 */
-				insn->code = BPF_ALU | BPF_MOV | BPF_K;
-				insn->a_reg = TMP_REG;
-				insn->imm = fp->k;
-				insn++;
+				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 
-				insn->a_reg = A_REG;
-				insn->x_reg = TMP_REG;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_TMP;
 				bpf_src = BPF_X;
 			} else {
-				insn->a_reg = A_REG;
-				insn->x_reg = X_REG;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_X;
 				insn->imm = fp->k;
 				bpf_src = BPF_SRC(fp->code);
 			}
@@ -991,7 +955,7 @@
 			if (fp->jf == 0) {
 				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
 				target = i + fp->jt + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
@@ -999,127 +963,94 @@
 			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
 				insn->code = BPF_JMP | BPF_JNE | bpf_src;
 				target = i + fp->jf + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
 			/* Other jumps are mapped into two insns: Jxx and JA. */
 			target = i + fp->jt + 1;
 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			insn++;
 
 			insn->code = BPF_JMP | BPF_JA;
 			target = i + fp->jf + 1;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			break;
 
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = TMP_REG;
-			insn->x_reg = A_REG;
-			insn++;
-
-			insn->code = BPF_LD | BPF_ABS | BPF_B;
-			insn->a_reg = A_REG;
-			insn->imm = fp->k;
-			insn++;
-
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
-			insn->imm = 0xf;
-			insn++;
-
-			insn->code = BPF_ALU | BPF_LSH | BPF_K;
-			insn->a_reg = A_REG;
-			insn->imm = 2;
-			insn++;
-
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
-			insn++;
-
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = TMP_REG;
+			/* tmp = A */
+			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
+			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
+			/* A &= 0xf */
+			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
+			/* A <<= 2 */
+			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+			/* X = A */
+			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
+			/* A = tmp */
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
 		case BPF_RET | BPF_A:
 		case BPF_RET | BPF_K:
-			insn->code = BPF_ALU | BPF_MOV |
-				     (BPF_RVAL(fp->code) == BPF_K ?
-				      BPF_K : BPF_X);
-			insn->a_reg = 0;
-			insn->x_reg = A_REG;
-			insn->imm = fp->k;
-			insn++;
-
-			insn->code = BPF_JMP | BPF_EXIT;
+			*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+						BPF_K : BPF_X, BPF_REG_0,
+						BPF_REG_A, fp->k);
+			*insn = BPF_EXIT_INSN();
 			break;
 
 		/* Store to stack. */
 		case BPF_ST:
 		case BPF_STX:
-			insn->code = BPF_STX | BPF_MEM | BPF_W;
-			insn->a_reg = FP_REG;
-			insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+					    BPF_ST ? BPF_REG_A : BPF_REG_X,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* Load from stack. */
 		case BPF_LD | BPF_MEM:
 		case BPF_LDX | BPF_MEM:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = FP_REG;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* A = K or X = K */
 		case BPF_LD | BPF_IMM:
 		case BPF_LDX | BPF_IMM:
-			insn->code = BPF_ALU | BPF_MOV | BPF_K;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->imm = fp->k;
+			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+					      BPF_REG_A : BPF_REG_X, fp->k);
 			break;
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
+			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
 			break;
 
 		/* A = skb->len or X = skb->len */
 		case BPF_LD | BPF_W | BPF_LEN:
 		case BPF_LDX | BPF_W | BPF_LEN:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = CTX_REG;
-			insn->off = offsetof(struct sk_buff, len);
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+					    offsetof(struct sk_buff, len));
 			break;
 
-		/* access seccomp_data fields */
+		/* Access seccomp_data fields. */
 		case BPF_LDX | BPF_ABS | BPF_W:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = A_REG;
-			insn->x_reg = CTX_REG;
-			insn->off = fp->k;
+			/* A = *(u32 *) (ctx + K) */
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
+		/* Unkown instruction. */
 		default:
 			goto err;
 		}
@@ -1128,7 +1059,6 @@
 		if (new_prog)
 			memcpy(new_insn, tmp_insns,
 			       sizeof(*insn) * (insn - tmp_insns));
-
 		new_insn += insn - tmp_insns;
 	}
 
@@ -1143,7 +1073,6 @@
 		new_flen = new_insn - new_prog;
 		if (pass > 2)
 			goto err;
-
 		goto do_pass;
 	}
 
@@ -1167,44 +1096,46 @@
  */
 static int check_load_and_stores(struct sock_filter *filter, int flen)
 {
-	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
 	int pc, ret = 0;
 
 	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+
 	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		return -ENOMEM;
+
 	memset(masks, 0xff, flen * sizeof(*masks));
 
 	for (pc = 0; pc < flen; pc++) {
 		memvalid &= masks[pc];
 
 		switch (filter[pc].code) {
-		case BPF_S_ST:
-		case BPF_S_STX:
+		case BPF_ST:
+		case BPF_STX:
 			memvalid |= (1 << filter[pc].k);
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
 			if (!(memvalid & (1 << filter[pc].k))) {
 				ret = -EINVAL;
 				goto error;
 			}
 			break;
-		case BPF_S_JMP_JA:
-			/* a jump must set masks on target */
+		case BPF_JMP | BPF_JA:
+			/* A jump must set masks on target */
 			masks[pc + 1 + filter[pc].k] &= memvalid;
 			memvalid = ~0;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* a jump must set masks on targets */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* A jump must set masks on targets */
 			masks[pc + 1 + filter[pc].jt] &= memvalid;
 			masks[pc + 1 + filter[pc].jf] &= memvalid;
 			memvalid = ~0;
@@ -1216,6 +1147,72 @@
 	return ret;
 }
 
+static bool chk_code_allowed(u16 code_to_probe)
+{
+	static const bool codes[] = {
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_K] = true,
+		[BPF_ALU | BPF_ADD | BPF_X] = true,
+		[BPF_ALU | BPF_SUB | BPF_K] = true,
+		[BPF_ALU | BPF_SUB | BPF_X] = true,
+		[BPF_ALU | BPF_MUL | BPF_K] = true,
+		[BPF_ALU | BPF_MUL | BPF_X] = true,
+		[BPF_ALU | BPF_DIV | BPF_K] = true,
+		[BPF_ALU | BPF_DIV | BPF_X] = true,
+		[BPF_ALU | BPF_MOD | BPF_K] = true,
+		[BPF_ALU | BPF_MOD | BPF_X] = true,
+		[BPF_ALU | BPF_AND | BPF_K] = true,
+		[BPF_ALU | BPF_AND | BPF_X] = true,
+		[BPF_ALU | BPF_OR | BPF_K] = true,
+		[BPF_ALU | BPF_OR | BPF_X] = true,
+		[BPF_ALU | BPF_XOR | BPF_K] = true,
+		[BPF_ALU | BPF_XOR | BPF_X] = true,
+		[BPF_ALU | BPF_LSH | BPF_K] = true,
+		[BPF_ALU | BPF_LSH | BPF_X] = true,
+		[BPF_ALU | BPF_RSH | BPF_K] = true,
+		[BPF_ALU | BPF_RSH | BPF_X] = true,
+		[BPF_ALU | BPF_NEG] = true,
+		/* Load instructions */
+		[BPF_LD | BPF_W | BPF_ABS] = true,
+		[BPF_LD | BPF_H | BPF_ABS] = true,
+		[BPF_LD | BPF_B | BPF_ABS] = true,
+		[BPF_LD | BPF_W | BPF_LEN] = true,
+		[BPF_LD | BPF_W | BPF_IND] = true,
+		[BPF_LD | BPF_H | BPF_IND] = true,
+		[BPF_LD | BPF_B | BPF_IND] = true,
+		[BPF_LD | BPF_IMM] = true,
+		[BPF_LD | BPF_MEM] = true,
+		[BPF_LDX | BPF_W | BPF_LEN] = true,
+		[BPF_LDX | BPF_B | BPF_MSH] = true,
+		[BPF_LDX | BPF_IMM] = true,
+		[BPF_LDX | BPF_MEM] = true,
+		/* Store instructions */
+		[BPF_ST] = true,
+		[BPF_STX] = true,
+		/* Misc instructions */
+		[BPF_MISC | BPF_TAX] = true,
+		[BPF_MISC | BPF_TXA] = true,
+		/* Return instructions */
+		[BPF_RET | BPF_K] = true,
+		[BPF_RET | BPF_A] = true,
+		/* Jump instructions */
+		[BPF_JMP | BPF_JA] = true,
+		[BPF_JMP | BPF_JEQ | BPF_K] = true,
+		[BPF_JMP | BPF_JEQ | BPF_X] = true,
+		[BPF_JMP | BPF_JGE | BPF_K] = true,
+		[BPF_JMP | BPF_JGE | BPF_X] = true,
+		[BPF_JMP | BPF_JGT | BPF_K] = true,
+		[BPF_JMP | BPF_JGT | BPF_X] = true,
+		[BPF_JMP | BPF_JSET | BPF_K] = true,
+		[BPF_JMP | BPF_JSET | BPF_X] = true,
+	};
+
+	if (code_to_probe >= ARRAY_SIZE(codes))
+		return false;
+
+	return codes[code_to_probe];
+}
+
 /**
  *	sk_chk_filter - verify socket filter code
  *	@filter: filter to verify
@@ -1232,153 +1229,76 @@
  */
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 {
-	/*
-	 * Valid instructions are initialized to non-0.
-	 * Invalid instructions are initialized to 0.
-	 */
-	static const u8 codes[] = {
-		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
-		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
-		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
-		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
-		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
-		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
-		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
-		[BPF_ALU|BPF_MOD|BPF_K]  = BPF_S_ALU_MOD_K,
-		[BPF_ALU|BPF_MOD|BPF_X]  = BPF_S_ALU_MOD_X,
-		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
-		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
-		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
-		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
-		[BPF_ALU|BPF_XOR|BPF_K]  = BPF_S_ALU_XOR_K,
-		[BPF_ALU|BPF_XOR|BPF_X]  = BPF_S_ALU_XOR_X,
-		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
-		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
-		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
-		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
-		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
-		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
-		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
-		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
-		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
-		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
-		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
-		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
-		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
-		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
-		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
-		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
-		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
-		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
-		[BPF_RET|BPF_K]          = BPF_S_RET_K,
-		[BPF_RET|BPF_A]          = BPF_S_RET_A,
-		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
-		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
-		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
-		[BPF_ST]                 = BPF_S_ST,
-		[BPF_STX]                = BPF_S_STX,
-		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
-		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
-		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
-		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
-		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
-		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
-		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
-		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
-		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
-	};
-	int pc;
 	bool anc_found;
+	int pc;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
 
-	/* check the filter code now */
+	/* Check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
 		struct sock_filter *ftest = &filter[pc];
-		u16 code = ftest->code;
 
-		if (code >= ARRAY_SIZE(codes))
+		/* May we actually operate on this code? */
+		if (!chk_code_allowed(ftest->code))
 			return -EINVAL;
-		code = codes[code];
-		if (!code)
-			return -EINVAL;
+
 		/* Some instructions need special checks */
-		switch (code) {
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_ALU_MOD_K:
-			/* check for division by zero */
+		switch (ftest->code) {
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_MOD | BPF_K:
+			/* Check for division by zero */
 			if (ftest->k == 0)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-			/* check for invalid memory addresses */
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* Check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JA:
-			/*
-			 * Note, the large ftest->k might cause loops.
+		case BPF_JMP | BPF_JA:
+			/* Note, the large ftest->k might cause loops.
 			 * Compare this with conditional jumps below,
 			 * where offsets are limited. --ANK (981016)
 			 */
-			if (ftest->k >= (unsigned int)(flen-pc-1))
+			if (ftest->k >= (unsigned int)(flen - pc - 1))
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* for conditionals both must be safe */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* Both conditionals must be safe */
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
-				code = BPF_S_ANC_##CODE;	\
-				anc_found = true;		\
-				break
-			switch (ftest->k) {
-			ANCILLARY(PROTOCOL);
-			ANCILLARY(PKTTYPE);
-			ANCILLARY(IFINDEX);
-			ANCILLARY(NLATTR);
-			ANCILLARY(NLATTR_NEST);
-			ANCILLARY(MARK);
-			ANCILLARY(QUEUE);
-			ANCILLARY(HATYPE);
-			ANCILLARY(RXHASH);
-			ANCILLARY(CPU);
-			ANCILLARY(ALU_XOR_X);
-			ANCILLARY(VLAN_TAG);
-			ANCILLARY(VLAN_TAG_PRESENT);
-			ANCILLARY(PAY_OFFSET);
-			}
-
-			/* ancillary operation unknown or unsupported */
+			if (bpf_anc_helper(ftest) & BPF_ANC)
+				anc_found = true;
+			/* Ancillary operation unknown or unsupported */
 			if (anc_found == false && ftest->k >= SKF_AD_OFF)
 				return -EINVAL;
 		}
-		ftest->code = code;
 	}
 
-	/* last instruction must be a RET code */
+	/* Last instruction must be a RET code */
 	switch (filter[flen - 1].code) {
-	case BPF_S_RET_K:
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_K:
+	case BPF_RET | BPF_A:
 		return check_load_and_stores(filter, flen);
 	}
+
 	return -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
@@ -1423,7 +1343,7 @@
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
 	sk_release_orig_filter(fp);
-	bpf_jit_free(fp);
+	sk_filter_free(fp);
 }
 
 /**
@@ -1461,7 +1381,7 @@
 
 	fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (fp_new) {
-		memcpy(fp_new, fp, sizeof(struct sk_filter));
+		*fp_new = *fp;
 		/* As we're kepping orig_prog in fp_new along,
 		 * we need to make sure we're not evicting it
 		 * from the old fp.
@@ -1478,7 +1398,7 @@
 {
 	struct sock_filter *old_prog;
 	struct sk_filter *old_fp;
-	int i, err, new_len, old_len = fp->len;
+	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
 	 * won't be used at this point in time anymore internally
@@ -1488,13 +1408,6 @@
 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
 		     sizeof(struct sock_filter_int));
 
-	/* For now, we need to unfiddle BPF_S_* identifiers in place.
-	 * This can sooner or later on be subject to removal, e.g. when
-	 * JITs have been converted.
-	 */
-	for (i = 0; i < fp->len; i++)
-		sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
 	/* Conversion cannot happen on overlapping memory areas,
 	 * so we need to keep the user BPF around until the 2nd
 	 * pass. At this time, the user BPF is stored in fp->insns.
@@ -1523,7 +1436,6 @@
 		goto out_err_free;
 	}
 
-	fp->bpf_func = sk_run_filter_int_skb;
 	fp->len = new_len;
 
 	/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1536,6 +1448,8 @@
 		 */
 		goto out_err_free;
 
+	sk_filter_select_runtime(fp);
+
 	kfree(old_prog);
 	return fp;
 
@@ -1550,6 +1464,33 @@
 	return ERR_PTR(err);
 }
 
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
+/**
+ *	sk_filter_select_runtime - select execution runtime for BPF program
+ *	@fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+	fp->bpf_func = (void *) __sk_run_filter;
+
+	/* Probe if internal BPF can be JITed */
+	bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+	bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
 static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 					     struct sock *sk)
 {
@@ -1592,7 +1533,7 @@
  * a negative errno code is returned. On success the return is zero.
  */
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog)
+				struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = sk_filter_proglen(fprog);
 	struct sk_filter *fp;
@@ -1713,83 +1654,6 @@
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
 
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
-	static const u16 decodes[] = {
-		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
-		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X,
-		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K,
-		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X,
-		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K,
-		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X,
-		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X,
-		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K,
-		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X,
-		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K,
-		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X,
-		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K,
-		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X,
-		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K,
-		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X,
-		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K,
-		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X,
-		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K,
-		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X,
-		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG,
-		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS,
-		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS,
-		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
-		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
-		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
-		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND,
-		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM,
-		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN,
-		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH,
-		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM,
-		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX,
-		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA,
-		[BPF_S_RET_K]		= BPF_RET|BPF_K,
-		[BPF_S_RET_A]		= BPF_RET|BPF_A,
-		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K,
-		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM,
-		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM,
-		[BPF_S_ST]		= BPF_ST,
-		[BPF_S_STX]		= BPF_STX,
-		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA,
-		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K,
-		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X,
-		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K,
-		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X,
-		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K,
-		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X,
-		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K,
-		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X,
-	};
-	u16 code;
-
-	code = filt->code;
-
-	to->code = decodes[code];
-	to->jt = filt->jt;
-	to->jf = filt->jf;
-	to->k = filt->k;
-}
-
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)
 {

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7c8ffd9..85b6269 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -273,7 +273,7 @@
 {
 	const struct pernet_operations *ops;
 	struct net *net, *tmp;
-	LIST_HEAD(net_kill_list);
+	struct list_head net_kill_list;
 	LIST_HEAD(net_exit_list);
 
 	/* Atomically snapshot the list of namespaces to cleanup */

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 22931e1..30d903b 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c

@@ -42,7 +42,7 @@
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
 	struct cgroup_cls_state *cs = css_cls_state(css);
-	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+	struct cgroup_cls_state *parent = css_cls_state(css->parent);
 
 	if (parent)
 		cs->classid = parent->classid;

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3825f66..2f385b9 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c

@@ -140,7 +140,7 @@
 
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
-	struct cgroup_subsys_state *parent_css = css_parent(css);
+	struct cgroup_subsys_state *parent_css = css->parent;
 	struct net_device *dev;
 	int ret = 0;
 
@@ -185,15 +185,15 @@
 	return 0;
 }
 
-static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	char devname[IFNAMSIZ + 1];
 	struct net_device *dev;
 	u32 prio;
 	int ret;
 
-	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+	if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
 		return -EINVAL;
 
 	dev = dev_get_by_name(&init_net, devname);
@@ -202,11 +202,11 @@
 
 	rtnl_lock();
 
-	ret = netprio_set_prio(css, dev, prio);
+	ret = netprio_set_prio(of_css(of), dev, prio);
 
 	rtnl_unlock();
 	dev_put(dev);
-	return ret;
+	return ret ?: nbytes;
 }
 
 static int update_netprio(const void *v, struct file *file, unsigned n)
@@ -239,7 +239,7 @@
 	{
 		.name = "ifpriomap",
 		.seq_show = read_priomap,
-		.write_string = write_priomap,
+		.write = write_priomap,
 	},
 	{ }	/* terminate */
 };

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0304f98..fc17a9d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c

@@ -573,7 +573,7 @@
 		   is_zero_ether_addr(pkt_dev->src_mac) ?
 			     pkt_dev->odev->dev_addr : pkt_dev->src_mac);
 
-	seq_printf(seq, "dst_mac: ");
+	seq_puts(seq, "dst_mac: ");
 	seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
 
 	seq_printf(seq,
@@ -588,7 +588,7 @@
 
 	if (pkt_dev->nr_labels) {
 		unsigned int i;
-		seq_printf(seq, "     mpls: ");
+		seq_puts(seq, "     mpls: ");
 		for (i = 0; i < pkt_dev->nr_labels; i++)
 			seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
 				   i == pkt_dev->nr_labels-1 ? "\n" : ", ");
@@ -613,67 +613,67 @@
 	if (pkt_dev->node >= 0)
 		seq_printf(seq, "     node: %d\n", pkt_dev->node);
 
-	seq_printf(seq, "     Flags: ");
+	seq_puts(seq, "     Flags: ");
 
 	if (pkt_dev->flags & F_IPV6)
-		seq_printf(seq, "IPV6  ");
+		seq_puts(seq, "IPV6  ");
 
 	if (pkt_dev->flags & F_IPSRC_RND)
-		seq_printf(seq, "IPSRC_RND  ");
+		seq_puts(seq, "IPSRC_RND  ");
 
 	if (pkt_dev->flags & F_IPDST_RND)
-		seq_printf(seq, "IPDST_RND  ");
+		seq_puts(seq, "IPDST_RND  ");
 
 	if (pkt_dev->flags & F_TXSIZE_RND)
-		seq_printf(seq, "TXSIZE_RND  ");
+		seq_puts(seq, "TXSIZE_RND  ");
 
 	if (pkt_dev->flags & F_UDPSRC_RND)
-		seq_printf(seq, "UDPSRC_RND  ");
+		seq_puts(seq, "UDPSRC_RND  ");
 
 	if (pkt_dev->flags & F_UDPDST_RND)
-		seq_printf(seq, "UDPDST_RND  ");
+		seq_puts(seq, "UDPDST_RND  ");
 
 	if (pkt_dev->flags & F_UDPCSUM)
-		seq_printf(seq, "UDPCSUM  ");
+		seq_puts(seq, "UDPCSUM  ");
 
 	if (pkt_dev->flags & F_MPLS_RND)
-		seq_printf(seq,  "MPLS_RND  ");
+		seq_puts(seq,  "MPLS_RND  ");
 
 	if (pkt_dev->flags & F_QUEUE_MAP_RND)
-		seq_printf(seq,  "QUEUE_MAP_RND  ");
+		seq_puts(seq,  "QUEUE_MAP_RND  ");
 
 	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
-		seq_printf(seq,  "QUEUE_MAP_CPU  ");
+		seq_puts(seq,  "QUEUE_MAP_CPU  ");
 
 	if (pkt_dev->cflows) {
 		if (pkt_dev->flags & F_FLOW_SEQ)
-			seq_printf(seq,  "FLOW_SEQ  "); /*in sequence flows*/
+			seq_puts(seq,  "FLOW_SEQ  "); /*in sequence flows*/
 		else
-			seq_printf(seq,  "FLOW_RND  ");
+			seq_puts(seq,  "FLOW_RND  ");
 	}
 
 #ifdef CONFIG_XFRM
 	if (pkt_dev->flags & F_IPSEC_ON) {
-		seq_printf(seq,  "IPSEC  ");
+		seq_puts(seq,  "IPSEC  ");
 		if (pkt_dev->spi)
 			seq_printf(seq, "spi:%u", pkt_dev->spi);
 	}
 #endif
 
 	if (pkt_dev->flags & F_MACSRC_RND)
-		seq_printf(seq, "MACSRC_RND  ");
+		seq_puts(seq, "MACSRC_RND  ");
 
 	if (pkt_dev->flags & F_MACDST_RND)
-		seq_printf(seq, "MACDST_RND  ");
+		seq_puts(seq, "MACDST_RND  ");
 
 	if (pkt_dev->flags & F_VID_RND)
-		seq_printf(seq, "VID_RND  ");
+		seq_puts(seq, "VID_RND  ");
 
 	if (pkt_dev->flags & F_SVID_RND)
-		seq_printf(seq, "SVID_RND  ");
+		seq_puts(seq, "SVID_RND  ");
 
 	if (pkt_dev->flags & F_NODE)
-		seq_printf(seq, "NODE_ALLOC  ");
+		seq_puts(seq, "NODE_ALLOC  ");
 
 	seq_puts(seq, "\n");
 
@@ -716,7 +716,7 @@
 	if (pkt_dev->result[0])
 		seq_printf(seq, "Result: %s\n", pkt_dev->result);
 	else
-		seq_printf(seq, "Result: Idle\n");
+		seq_puts(seq, "Result: Idle\n");
 
 	return 0;
 }
@@ -1735,14 +1735,14 @@
 
 	BUG_ON(!t);
 
-	seq_printf(seq, "Running: ");
+	seq_puts(seq, "Running: ");
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (pkt_dev->running)
 			seq_printf(seq, "%s ", pkt_dev->odevname);
 
-	seq_printf(seq, "\nStopped: ");
+	seq_puts(seq, "\nStopped: ");
 
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (!pkt_dev->running)
@@ -1751,7 +1751,7 @@
 	if (t->result[0])
 		seq_printf(seq, "\nResult: %s\n", t->result);
 	else
-		seq_printf(seq, "\nResult: NA\n");
+		seq_puts(seq, "\nResult: NA\n");
 
 	if_unlock(t);
 

diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index eaba0f6..d3027a7 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c

@@ -88,7 +88,7 @@
 
 void __init ptp_classifier_init(void)
 {
-	static struct sock_filter ptp_filter[] = {
+	static struct sock_filter ptp_filter[] __initdata = {
 		{ 0x28,  0,  0, 0x0000000c },
 		{ 0x15,  0, 12, 0x00000800 },
 		{ 0x30,  0,  0, 0x00000017 },
@@ -133,7 +133,7 @@
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-	struct sock_fprog ptp_prog = {
+	struct sock_fprog_kern ptp_prog = {
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d8d8fc..1063996 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -798,8 +798,8 @@
 		size += num_vfs *
 			(nla_total_size(sizeof(struct ifla_vf_mac)) +
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
-			 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+			 nla_total_size(sizeof(struct ifla_vf_rate)));
 		return size;
 	} else
 		return 0;
@@ -1065,6 +1065,7 @@
 			struct ifla_vf_info ivi;
 			struct ifla_vf_mac vf_mac;
 			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_rate vf_rate;
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
@@ -1085,6 +1086,7 @@
 				break;
 			vf_mac.vf =
 				vf_vlan.vf =
+				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf = ivi.vf;
@@ -1092,7 +1094,9 @@
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
 			vf_vlan.qos = ivi.qos;
-			vf_tx_rate.rate = ivi.tx_rate;
+			vf_tx_rate.rate = ivi.max_tx_rate;
+			vf_rate.min_tx_rate = ivi.min_tx_rate;
+			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1102,6 +1106,8 @@
 			}
 			if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
 			    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+			    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+				    &vf_rate) ||
 			    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
 				    &vf_tx_rate) ||
 			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1208,6 +1214,10 @@
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 	[IFLA_VF_SPOOFCHK]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_spoofchk) },
+	[IFLA_VF_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_rate) },
+	[IFLA_VF_LINK_STATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_link_state) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1234,6 +1244,7 @@
 	struct nlattr *tb[IFLA_MAX+1];
 	u32 ext_filter_mask = 0;
 	int err;
+	int hdrlen;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -1241,8 +1252,17 @@
 	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
-			ifla_policy) >= 0) {
+	/* A hack to preserve kernel<->userspace interface.
+	 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+	 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+	 * what iproute2 < v3.9.0 used.
+	 * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+	 * attribute, its netlink message is shorter than struct ifinfomsg.
+	 */
+	hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+		 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+	if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
 
 		if (tb[IFLA_EXT_MASK])
 			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1367,11 +1387,29 @@
 		}
 		case IFLA_VF_TX_RATE: {
 			struct ifla_vf_tx_rate *ivt;
+			struct ifla_vf_info ivf;
 			ivt = nla_data(vf);
 			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_tx_rate)
-				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
-							      ivt->rate);
+			if (ops->ndo_get_vf_config)
+				err = ops->ndo_get_vf_config(dev, ivt->vf,
+							     &ivf);
+			if (err)
+				break;
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivf.min_tx_rate,
+							   ivt->rate);
+			break;
+		}
+		case IFLA_VF_RATE: {
+			struct ifla_vf_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivt->min_tx_rate,
+							   ivt->max_tx_rate);
 			break;
 		}
 		case IFLA_VF_SPOOFCHK: {
@@ -1744,7 +1782,6 @@
 
 	ops->dellink(dev, &list_kill);
 	unregister_netdevice_many(&list_kill);
-	list_del(&list_kill);
 	return 0;
 }
 
@@ -2019,11 +2056,15 @@
 		if (ops->newlink) {
 			err = ops->newlink(net, dev, tb, data);
 			/* Drivers should call free_netdev() in ->destructor
-			 * and unregister it on failure so that device could be
-			 * finally freed in rtnl_unlock.
+			 * and unregister it on failure after registration
+			 * so that device could be finally freed in rtnl_unlock.
 			 */
-			if (err < 0)
+			if (err < 0) {
+				/* If device is not registered at all, free it now */
+				if (dev->reg_state == NETREG_UNINITIALIZED)
+					free_netdev(dev);
 				goto out;
+			}
 		} else {
 			err = register_netdevice(dev);
 			if (err < 0) {
@@ -2095,9 +2136,13 @@
 	struct nlattr *tb[IFLA_MAX+1];
 	u32 ext_filter_mask = 0;
 	u16 min_ifinfo_dump_size = 0;
+	int hdrlen;
 
-	if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
-			ifla_policy) >= 0) {
+	/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+	hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+		 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+	if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
 		if (tb[IFLA_EXT_MASK])
 			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 	}

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56..ba71212 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c

@@ -85,31 +85,6 @@
 #endif
 
 #ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
-	u32 hash[MD5_DIGEST_WORDS];
-
-	net_secret_init();
-	hash[0] = (__force __u32) daddr;
-	hash[1] = net_secret[13];
-	hash[2] = net_secret[14];
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
-	__u32 hash[4];
-
-	net_secret_init();
-	memcpy(hash, daddr, 16);
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
 
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8383b2b..bf92824 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -694,7 +694,7 @@
 #endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->csum		= old->csum;
-	new->local_df		= old->local_df;
+	new->ignore_df		= old->ignore_df;
 	new->pkt_type		= old->pkt_type;
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
@@ -951,10 +951,13 @@
 EXPORT_SYMBOL(skb_copy);
 
 /**
- *	__pskb_copy	-	create copy of an sk_buff with private head.
+ *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
  *	@skb: buffer to copy
  *	@headroom: headroom of new skb
  *	@gfp_mask: allocation priority
+ *	@fclone: if true allocate the copy of the skb from the fclone
+ *	cache instead of the head cache; it is recommended to set this
+ *	to true for the cases where the copy will likely be cloned
  *
  *	Make a copy of both an &sk_buff and part of its data, located
  *	in header. Fragmented data remain shared. This is used when
@@ -964,11 +967,12 @@
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone)
 {
 	unsigned int size = skb_headlen(skb) + headroom;
-	struct sk_buff *n = __alloc_skb(size, gfp_mask,
-					skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+	int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
 
 	if (!n)
 		goto out;
@@ -1008,7 +1012,7 @@
 out:
 	return n;
 }
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
 
 /**
  *	pskb_expand_head - reallocate header of &sk_buff
@@ -2881,12 +2885,14 @@
 	int pos;
 	int dummy;
 
+	__skb_push(head_skb, doffset);
 	proto = skb_network_protocol(head_skb, &dummy);
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);
 
-	csum = !!can_checksum_protocol(features, proto);
-	__skb_push(head_skb, doffset);
+	csum = !head_skb->encap_hdr_csum &&
+	    !!can_checksum_protocol(features, proto);
+
 	headroom = skb_headroom(head_skb);
 	pos = skb_headlen(head_skb);
 
@@ -2983,6 +2989,8 @@
 			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
 							    skb_put(nskb, len),
 							    len, 0);
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + offset;
 			continue;
 		}
 
@@ -3052,6 +3060,8 @@
 			nskb->csum = skb_checksum(nskb, doffset,
 						  nskb->len - doffset, 0);
 			nskb->ip_summed = CHECKSUM_NONE;
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + doffset;
 		}
 	} while ((offset += len) < head_skb->len);
 
@@ -3913,7 +3923,7 @@
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
-	skb->local_df = 0;
+	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
 	secpath_reset(skb);

diff --git a/net/core/sock.c b/net/core/sock.c
index 664ee42..026e01f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c

@@ -784,7 +784,7 @@
 		break;
 
 	case SO_NO_CHECK:
-		sk->sk_no_check = valbool;
+		sk->sk_no_check_tx = valbool;
 		break;
 
 	case SO_PRIORITY:
@@ -1064,7 +1064,7 @@
 		break;
 
 	case SO_NO_CHECK:
-		v.val = sk->sk_no_check;
+		v.val = sk->sk_no_check_tx;
 		break;
 
 	case SO_PRIORITY:

diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 0000000..8c3203c
--- /dev/null
+++ b/net/core/tso.c

@@ -0,0 +1,77 @@
+#include <linux/export.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+	/* The Marvell Way */
+	return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+EXPORT_SYMBOL(tso_count_descs);
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+		   int size, bool is_last)
+{
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int mac_hdr_len = skb_network_offset(skb);
+
+	memcpy(hdr, skb->data, hdr_len);
+	iph = (struct iphdr *)(hdr + mac_hdr_len);
+	iph->id = htons(tso->ip_id);
+	iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+	tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+	tcph->seq = htonl(tso->tcp_seq);
+	tso->ip_id++;
+
+	if (!is_last) {
+		/* Clear all special flags for not last packet */
+		tcph->psh = 0;
+		tcph->fin = 0;
+		tcph->rst = 0;
+	}
+}
+EXPORT_SYMBOL(tso_build_hdr);
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+	tso->tcp_seq += size;
+	tso->size -= size;
+	tso->data += size;
+
+	if ((tso->size == 0) &&
+	    (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+		/* Move to next segment */
+		tso->size = frag->size;
+		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->next_frag_idx++;
+	}
+}
+EXPORT_SYMBOL(tso_build_data);
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	tso->ip_id = ntohs(ip_hdr(skb)->id);
+	tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+	tso->next_frag_idx = 0;
+
+	/* Build first data */
+	tso->size = skb_headlen(skb) - hdr_len;
+	tso->data = skb->data + hdr_len;
+	if ((tso->size == 0) &&
+	    (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+		/* Move to next segment */
+		tso->size = frag->size;
+		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->next_frag_idx++;
+	}
+}
+EXPORT_SYMBOL(tso_start);

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 22b5d81..6ca645c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c

@@ -1024,7 +1024,6 @@
 	.protocol	= IPPROTO_DCCP,
 	.prot		= &dccp_v4_prot,
 	.ops		= &inet_dccp_ops,
-	.no_check	= 0,
 	.flags		= INET_PROTOSW_ICSK,
 };
 

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index eb892b4..de2c1e7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c

@@ -1084,14 +1084,15 @@
 
 static inline int dccp_mib_init(void)
 {
-	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib),
-			     __alignof__(struct dccp_mib));
+	dccp_statistics = alloc_percpu(struct dccp_mib);
+	if (!dccp_statistics)
+		return -ENOMEM;
+	return 0;
 }
 
 static inline void dccp_mib_exit(void)
 {
-	snmp_mib_free((void __percpu **)dccp_statistics);
+	free_percpu(dccp_statistics);
 }
 
 static int thash_entries;

diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 607ab71..53731e4 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c

@@ -20,6 +20,7 @@
 
 /* Boundary values */
 static int		zero     = 0,
+			one      = 1,
 			u8_max   = 0xFF;
 static unsigned long	seqw_min = DCCPF_SEQ_WMIN,
 			seqw_max = 0xFFFFFFFF;		/* maximum on 32 bit */
@@ -58,7 +59,7 @@
 		.maxlen		= sizeof(sysctl_dccp_request_retries),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= &one,
 		.extra2		= &u8_max,
 	},
 	{

diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 16f0b22..1cd46a3 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c

@@ -280,7 +280,7 @@
  */
 u32 dccp_timestamp(void)
 {
-	s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+	u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
 
 	do_div(delta, 10);
 	return delta;

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 4c04848..ae011b4 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c

@@ -481,7 +481,7 @@
 
 	sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
 	sk->sk_destruct    = dn_destruct;
-	sk->sk_no_check    = 1;
+	sk->sk_no_check_tx = 1;
 	sk->sk_family      = PF_DECnet;
 	sk->sk_protocol    = 0;
 	sk->sk_allocation  = gfp;

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index e7b6d53..9acec61f 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c

@@ -93,8 +93,8 @@
 	}
 
 	if (!namelen)
-		namelen = strlen(name);
-	if (namelen < 3)
+		namelen = strnlen(name, 256);
+	if (namelen < 3 || namelen > 255)
 		return -EINVAL;
 	desclen += namelen + 1;
 
@@ -149,7 +149,9 @@
 	if (!*_result)
 		goto put;
 
-	memcpy(*_result, upayload->data, len + 1);
+	memcpy(*_result, upayload->data, len);
+	*_result[len] = '\0';
+
 	if (_expiry)
 		*_expiry = rkey->expiry;
 

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 02c0e17..64c5af0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c

@@ -346,7 +346,7 @@
 		return slave_dev;
 
 	slave_dev->features = master->vlan_features;
-	SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
+	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
 

diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 0f5a69e..fe6bd7a 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c

@@ -92,6 +92,7 @@
 	const u8 *saddr = _saddr;
 	const u8 *daddr = _daddr;
 	struct ieee802154_addr sa, da;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 
 	/* TODO:
 	 * if this package isn't ipv6 one, where should it be routed?
@@ -115,8 +116,7 @@
 	 * from MAC subif of the 'dev' and 'real_dev' network devices, but
 	 * this isn't implemented in mainline yet, so currently we assign 0xff
 	 */
-	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
-	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+	cb->type = IEEE802154_FC_TYPE_DATA;
 
 	/* prepare wpan address data */
 	sa.mode = IEEE802154_ADDR_LONG;
@@ -135,11 +135,10 @@
 	} else {
 		da.mode = IEEE802154_ADDR_LONG;
 		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
-
-		/* request acknowledgment */
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
 	}
 
+	cb->ackreq = !lowpan_is_addr_broadcast(daddr);
+
 	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
 			type, (void *)&da, (void *)&sa, 0);
 }
@@ -221,139 +220,149 @@
 	return 0;
 }
 
+static struct sk_buff*
+lowpan_alloc_frag(struct sk_buff *skb, int size,
+		  const struct ieee802154_hdr *master_hdr)
+{
+	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
+	struct sk_buff *frag;
+	int rc;
+
+	frag = alloc_skb(real_dev->hard_header_len +
+			 real_dev->needed_tailroom + size,
+			 GFP_ATOMIC);
+
+	if (likely(frag)) {
+		frag->dev = real_dev;
+		frag->priority = skb->priority;
+		skb_reserve(frag, real_dev->hard_header_len);
+		skb_reset_network_header(frag);
+		*mac_cb(frag) = *mac_cb(skb);
+
+		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
+				     &master_hdr->source, size);
+		if (rc < 0) {
+			kfree_skb(frag);
+			return ERR_PTR(-rc);
+		}
+	} else {
+		frag = ERR_PTR(ENOMEM);
+	}
+
+	return frag;
+}
+
 static int
-lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
-		     int mlen, int plen, int offset, int type)
+lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
+		     u8 *frag_hdr, int frag_hdrlen,
+		     int offset, int len)
 {
 	struct sk_buff *frag;
-	int hlen;
 
-	hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
-			LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
+	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
 
-	raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
+	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+	if (IS_ERR(frag))
+		return -PTR_ERR(frag);
 
-	frag = netdev_alloc_skb(skb->dev,
-				hlen + mlen + plen + IEEE802154_MFR_SIZE);
-	if (!frag)
-		return -ENOMEM;
+	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
+	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
 
-	frag->priority = skb->priority;
-
-	/* copy header, MFR and payload */
-	skb_put(frag, mlen);
-	skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen);
-
-	skb_put(frag, hlen);
-	skb_copy_to_linear_data_offset(frag, mlen, head, hlen);
-
-	skb_put(frag, plen);
-	skb_copy_to_linear_data_offset(frag, mlen + hlen,
-				       skb_network_header(skb) + offset, plen);
-
-	raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len);
+	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
 
 	return dev_queue_xmit(frag);
 }
 
 static int
-lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
+		       const struct ieee802154_hdr *wpan_hdr)
 {
-	int err;
-	u16 dgram_offset, dgram_size, payload_length, header_length,
-	    lowpan_size, frag_plen, offset;
-	__be16 tag;
-	u8 head[5];
+	u16 dgram_size, dgram_offset;
+	__be16 frag_tag;
+	u8 frag_hdr[5];
+	int frag_cap, frag_len, payload_cap, rc;
+	int skb_unprocessed, skb_offset;
 
-	header_length = skb->mac_len;
-	payload_length = skb->len - header_length;
-	tag = lowpan_dev_info(dev)->fragment_tag++;
-	lowpan_size = skb_network_header_len(skb);
 	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
-		     header_length;
+		     skb->mac_len;
+	frag_tag = lowpan_dev_info(dev)->fragment_tag++;
 
-	/* first fragment header */
-	head[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x7);
-	head[1] = dgram_size & 0xff;
-	memcpy(head + 2, &tag, sizeof(tag));
+	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
+	frag_hdr[1] = dgram_size & 0xff;
+	memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
 
-	/* calc the nearest payload length(divided to 8) for first fragment
-	 * which fits into a IEEE802154_MTU
-	 */
-	frag_plen = round_down(IEEE802154_MTU - header_length -
-			       LOWPAN_FRAG1_HEAD_SIZE - lowpan_size -
-			       IEEE802154_MFR_SIZE, 8);
+	payload_cap = ieee802154_max_payload(wpan_hdr);
 
-	err = lowpan_fragment_xmit(skb, head, header_length,
-				   frag_plen + lowpan_size, 0,
-				   LOWPAN_DISPATCH_FRAG1);
-	if (err) {
+	frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
+			      skb_network_header_len(skb), 8);
+
+	skb_offset = skb_network_header_len(skb);
+	skb_unprocessed = skb->len - skb->mac_len - skb_offset;
+
+	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+				  LOWPAN_FRAG1_HEAD_SIZE, 0,
+				  frag_len + skb_network_header_len(skb));
+	if (rc) {
 		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
-			 __func__, tag);
-		goto exit;
+			 __func__, frag_tag);
+		goto err;
 	}
 
-	offset = lowpan_size + frag_plen;
-	dgram_offset += frag_plen;
+	frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
+	frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
+	frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
 
-	/* next fragment header */
-	head[0] &= ~LOWPAN_DISPATCH_FRAG1;
-	head[0] |= LOWPAN_DISPATCH_FRAGN;
+	do {
+		dgram_offset += frag_len;
+		skb_offset += frag_len;
+		skb_unprocessed -= frag_len;
+		frag_len = min(frag_cap, skb_unprocessed);
 
-	frag_plen = round_down(IEEE802154_MTU - header_length -
-			       LOWPAN_FRAGN_HEAD_SIZE - IEEE802154_MFR_SIZE, 8);
+		frag_hdr[4] = dgram_offset >> 3;
 
-	while (payload_length - offset > 0) {
-		int len = frag_plen;
-
-		head[4] = dgram_offset >> 3;
-
-		if (payload_length - offset < len)
-			len = payload_length - offset;
-
-		err = lowpan_fragment_xmit(skb, head, header_length, len,
-					   offset, LOWPAN_DISPATCH_FRAGN);
-		if (err) {
+		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
+					  frag_len);
+		if (rc) {
 			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
-				 __func__, tag, offset);
-			goto exit;
+				 __func__, frag_tag, skb_offset);
+			goto err;
 		}
+	} while (skb_unprocessed > frag_cap);
 
-		offset += len;
-		dgram_offset += len;
-	}
+	consume_skb(skb);
+	return NET_XMIT_SUCCESS;
 
-exit:
-	return err;
+err:
+	kfree_skb(skb);
+	return rc;
 }
 
 static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	int err = -1;
+	struct ieee802154_hdr wpan_hdr;
+	int max_single;
 
 	pr_debug("package xmit\n");
 
-	skb->dev = lowpan_dev_info(dev)->real_dev;
-	if (skb->dev == NULL) {
-		pr_debug("ERROR: no real wpan device found\n");
-		goto error;
+	if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
 	}
 
-	/* Send directly if less than the MTU minus the 2 checksum bytes. */
-	if (skb->len <= IEEE802154_MTU - IEEE802154_MFR_SIZE) {
-		err = dev_queue_xmit(skb);
-		goto out;
+	max_single = ieee802154_max_payload(&wpan_hdr);
+
+	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
+		skb->dev = lowpan_dev_info(dev)->real_dev;
+		return dev_queue_xmit(skb);
+	} else {
+		netdev_tx_t rc;
+
+		pr_debug("frame is too big, fragmentation is needed\n");
+		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
+
+		return rc < 0 ? NET_XMIT_DROP : rc;
 	}
-
-	pr_debug("frame is too big, fragmentation is needed\n");
-	err = lowpan_skb_fragmentation(skb, dev);
-error:
-	dev_kfree_skb(skb);
-out:
-	if (err)
-		pr_debug("ERROR: xmit failed\n");
-
-	return (err < 0) ? NET_XMIT_DROP : err;
 }
 
 static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)

diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 786437b..4f0ed87 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c

@@ -21,6 +21,7 @@
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  */
 
+#include <linux/capability.h>
 #include <linux/net.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
@@ -45,7 +46,12 @@
 	struct ieee802154_addr dst_addr;
 
 	unsigned int bound:1;
+	unsigned int connected:1;
 	unsigned int want_ack:1;
+	unsigned int secen:1;
+	unsigned int secen_override:1;
+	unsigned int seclevel:3;
+	unsigned int seclevel_override:1;
 };
 
 static inline struct dgram_sock *dgram_sk(const struct sock *sk)
@@ -73,10 +79,7 @@
 {
 	struct dgram_sock *ro = dgram_sk(sk);
 
-	ro->dst_addr.mode = IEEE802154_ADDR_LONG;
-	ro->dst_addr.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
 	ro->want_ack = 1;
-	memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN);
 	return 0;
 }
 
@@ -183,6 +186,7 @@
 	}
 
 	ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
+	ro->connected = 1;
 
 out:
 	release_sock(sk);
@@ -194,10 +198,7 @@
 	struct dgram_sock *ro = dgram_sk(sk);
 
 	lock_sock(sk);
-
-	ro->dst_addr.mode = IEEE802154_ADDR_LONG;
-	memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN);
-
+	ro->connected = 0;
 	release_sock(sk);
 
 	return 0;
@@ -209,7 +210,9 @@
 	struct net_device *dev;
 	unsigned int mtu;
 	struct sk_buff *skb;
+	struct ieee802154_mac_cb *cb;
 	struct dgram_sock *ro = dgram_sk(sk);
+	struct ieee802154_addr dst_addr;
 	int hlen, tlen;
 	int err;
 
@@ -218,6 +221,11 @@
 		return -EOPNOTSUPP;
 	}
 
+	if (!ro->connected && !msg->msg_name)
+		return -EDESTADDRREQ;
+	else if (ro->connected && msg->msg_name)
+		return -EISCONN;
+
 	if (!ro->bound)
 		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
 	else
@@ -249,18 +257,28 @@
 
 	skb_reset_network_header(skb);
 
-	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
-	if (ro->want_ack)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+	cb = mac_cb_init(skb);
+	cb->type = IEEE802154_FC_TYPE_DATA;
+	cb->ackreq = ro->want_ack;
 
-	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
-	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr,
-			ro->bound ? &ro->src_addr : NULL, size);
+	if (msg->msg_name) {
+		DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
+
+		ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+	} else {
+		dst_addr = ro->dst_addr;
+	}
+
+	cb->secen = ro->secen;
+	cb->secen_override = ro->secen_override;
+	cb->seclevel = ro->seclevel;
+	cb->seclevel_override = ro->seclevel_override;
+
+	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
+			      ro->bound ? &ro->src_addr : NULL, size);
 	if (err < 0)
 		goto out_skb;
 
-	skb_reset_mac_header(skb);
-
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto out_skb;
@@ -419,6 +437,20 @@
 	case WPAN_WANTACK:
 		val = ro->want_ack;
 		break;
+	case WPAN_SECURITY:
+		if (!ro->secen_override)
+			val = WPAN_SECURITY_DEFAULT;
+		else if (ro->secen)
+			val = WPAN_SECURITY_ON;
+		else
+			val = WPAN_SECURITY_OFF;
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ro->seclevel_override)
+			val = WPAN_SECURITY_LEVEL_DEFAULT;
+		else
+			val = ro->seclevel;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -434,6 +466,7 @@
 		    char __user *optval, unsigned int optlen)
 {
 	struct dgram_sock *ro = dgram_sk(sk);
+	struct net *net = sock_net(sk);
 	int val;
 	int err = 0;
 
@@ -449,6 +482,47 @@
 	case WPAN_WANTACK:
 		ro->want_ack = !!val;
 		break;
+	case WPAN_SECURITY:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		switch (val) {
+		case WPAN_SECURITY_DEFAULT:
+			ro->secen_override = 0;
+			break;
+		case WPAN_SECURITY_ON:
+			ro->secen_override = 1;
+			ro->secen = 1;
+			break;
+		case WPAN_SECURITY_OFF:
+			ro->secen_override = 1;
+			ro->secen = 0;
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
+		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
+			err = -EINVAL;
+		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
+			ro->seclevel_override = 0;
+		} else {
+			ro->seclevel_override = 1;
+			ro->seclevel = val;
+		}
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;

diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index bed42a4..c09294e 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c

@@ -195,15 +195,16 @@
 	return pos;
 }
 
+static int ieee802154_sechdr_lengths[4] = {
+	[IEEE802154_SCF_KEY_IMPLICIT] = 5,
+	[IEEE802154_SCF_KEY_INDEX] = 6,
+	[IEEE802154_SCF_KEY_SHORT_INDEX] = 10,
+	[IEEE802154_SCF_KEY_HW_INDEX] = 14,
+};
+
 static int ieee802154_hdr_sechdr_len(u8 sc)
 {
-	switch (IEEE802154_SCF_KEY_ID_MODE(sc)) {
-	case IEEE802154_SCF_KEY_IMPLICIT: return 5;
-	case IEEE802154_SCF_KEY_INDEX: return 6;
-	case IEEE802154_SCF_KEY_SHORT_INDEX: return 10;
-	case IEEE802154_SCF_KEY_HW_INDEX: return 14;
-	default: return -EINVAL;
-	}
+	return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)];
 }
 
 static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr)
@@ -285,3 +286,40 @@
 	return pos;
 }
 EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs);
+
+int
+ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+	const u8 *buf = skb_mac_header(skb);
+	int pos;
+
+	pos = ieee802154_hdr_peek_addrs(skb, hdr);
+	if (pos < 0)
+		return -EINVAL;
+
+	if (hdr->fc.security_enabled) {
+		u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos));
+		int want = pos + ieee802154_sechdr_lengths[key_id_mode];
+
+		if (buf + want > skb_tail_pointer(skb))
+			return -EINVAL;
+
+		pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec);
+	}
+
+	return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_peek);
+
+int ieee802154_max_payload(const struct ieee802154_hdr *hdr)
+{
+	int hlen = ieee802154_hdr_minlen(hdr);
+
+	if (hdr->fc.security_enabled) {
+		hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1;
+		hlen += ieee802154_sechdr_authtag_len(&hdr->sec);
+	}
+
+	return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE;
+}
+EXPORT_SYMBOL_GPL(ieee802154_max_payload);

diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 6693a5c..8b83a23 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h

@@ -68,4 +68,23 @@
 int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb);
 int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info);
 
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_keys(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devs(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb);
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb);
+
 #endif

diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 04b2058..26efcf4 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c

@@ -124,6 +124,26 @@
 	IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface,
 			ieee802154_dump_iface),
 	IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams),
+	IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams),
+	IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL,
+			ieee802154_llsec_dump_keys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL,
+			ieee802154_llsec_dump_devs),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL,
+			ieee802154_llsec_dump_devkeys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL,
+			ieee802154_llsec_dump_seclevels),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL,
+		      ieee802154_llsec_add_seclevel),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL,
+		      ieee802154_llsec_del_seclevel),
 };
 
 static const struct genl_multicast_group ieee802154_mcgrps[] = {

diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 5d28549..a3281b8 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c

@@ -715,3 +715,812 @@
 	dev_put(dev);
 	return rc;
 }
+
+
+
+static int
+ieee802154_llsec_parse_key_id(struct genl_info *info,
+			      struct ieee802154_llsec_key_id *desc)
+{
+	memset(desc, 0, sizeof(*desc));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE])
+		return -EINVAL;
+
+	desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
+		    !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
+		      info->attrs[IEEE802154_ATTR_HW_ADDR]))
+			return -EINVAL;
+
+		desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+
+		if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) {
+			desc->device_addr.mode = IEEE802154_ADDR_SHORT;
+			desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+		} else {
+			desc->device_addr.mode = IEEE802154_ADDR_LONG;
+			desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+		}
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED])
+		return -EINVAL;
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT)
+		desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]);
+
+	switch (desc->mode) {
+	case IEEE802154_SCF_KEY_SHORT_INDEX:
+	{
+		u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]);
+		desc->short_source = cpu_to_le32(source);
+		break;
+	}
+	case IEEE802154_SCF_KEY_HW_INDEX:
+		desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]);
+		break;
+	}
+
+	return 0;
+}
+
+static int
+ieee802154_llsec_fill_key_id(struct sk_buff *msg,
+			     const struct ieee802154_llsec_key_id *desc)
+{
+	if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID,
+				      desc->device_addr.pan_id))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_SHORT &&
+		    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+				      desc->device_addr.short_addr))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_LONG &&
+		    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR,
+				   desc->device_addr.extended_addr))
+			return -EMSGSIZE;
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+			le32_to_cpu(desc->short_source)))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+			   desc->extended_source))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	struct net_device *dev = NULL;
+	int rc = -ENOBUFS;
+	struct ieee802154_mlme_ops *ops;
+	void *hdr;
+	struct ieee802154_llsec_params params;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec) {
+		rc = -EOPNOTSUPP;
+		goto out_dev;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out_dev;
+
+	hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0,
+		IEEE802154_LLSEC_GETPARAMS);
+	if (!hdr)
+		goto out_free;
+
+	rc = ops->llsec->get_params(dev, &params);
+	if (rc < 0)
+		goto out_free;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			be32_to_cpu(params.frame_counter)) ||
+	    ieee802154_llsec_fill_key_id(msg, &params.out_key))
+		goto out_free;
+
+	dev_put(dev);
+
+	return ieee802154_nl_reply(msg, info);
+out_free:
+	nlmsg_free(msg);
+out_dev:
+	dev_put(dev);
+	return rc;
+}
+
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_params params;
+	int changed = 0;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL])
+		goto out;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec) {
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] &&
+	    nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7)
+		goto out;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) {
+		params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]);
+		changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) {
+		if (ieee802154_llsec_parse_key_id(info, &params.out_key))
+			goto out;
+
+		changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) {
+		params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]);
+		changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) {
+		u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+		params.frame_counter = cpu_to_be32(fc);
+		changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+	}
+
+	rc = ops->llsec->set_params(dev, &params, changed);
+
+	dev_put(dev);
+
+	return rc;
+out:
+	dev_put(dev);
+	return rc;
+}
+
+
+
+struct llsec_dump_data {
+	struct sk_buff *skb;
+	int s_idx, s_idx2;
+	int portid;
+	int nlmsg_seq;
+	struct net_device *dev;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_table *table;
+};
+
+static int
+ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
+			    int (*step)(struct llsec_dump_data*))
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct llsec_dump_data data;
+	int idx = 0;
+	int first_dev = cb->args[0];
+	int rc;
+
+	for_each_netdev(net, dev) {
+		if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
+			goto skip;
+
+		data.ops = ieee802154_mlme_ops(dev);
+		if (!data.ops->llsec)
+			goto skip;
+
+		data.skb = skb;
+		data.s_idx = cb->args[1];
+		data.s_idx2 = cb->args[2];
+		data.dev = dev;
+		data.portid = NETLINK_CB(cb->skb).portid;
+		data.nlmsg_seq = cb->nlh->nlmsg_seq;
+
+		data.ops->llsec->lock_table(dev);
+		data.ops->llsec->get_table(data.dev, &data.table);
+		rc = step(&data);
+		data.ops->llsec->unlock_table(dev);
+
+		if (rc < 0)
+			break;
+
+skip:
+		idx++;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int
+ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info,
+			   int (*fn)(struct net_device*, struct genl_info*))
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!ieee802154_mlme_ops(dev)->llsec)
+		rc = -EOPNOTSUPP;
+	else
+		rc = fn(dev, info);
+
+	dev_put(dev);
+	return rc;
+}
+
+
+
+static int
+ieee802154_llsec_parse_key(struct genl_info *info,
+			   struct ieee802154_llsec_key *key)
+{
+	u8 frames;
+	u32 commands[256 / 32];
+
+	memset(key, 0, sizeof(*key));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES])
+		return -EINVAL;
+
+	frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]);
+	if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS])
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) {
+		nla_memcpy(commands,
+			   info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS],
+			   256 / 8);
+
+		if (commands[0] || commands[1] || commands[2] || commands[3] ||
+		    commands[4] || commands[5] || commands[6] ||
+		    commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1))
+			return -EINVAL;
+
+		key->cmd_frame_ids = commands[7];
+	}
+
+	key->frame_types = frames;
+
+	nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES],
+		   IEEE802154_LLSEC_KEY_SIZE);
+
+	return 0;
+}
+
+static int llsec_add_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key key;
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key(info, &key) ||
+	    ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->add_key(dev, &id, &key);
+}
+
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_key);
+}
+
+static int llsec_remove_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->del_key(dev, &id);
+}
+
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_remove_key);
+}
+
+static int
+ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_key_entry *key,
+		       const struct net_device *dev)
+{
+	void *hdr;
+	u32 commands[256 / 32];
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_KEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    ieee802154_llsec_fill_key_id(msg, &key->id) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+		       key->key->frame_types))
+		goto nla_put_failure;
+
+	if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) {
+		memset(commands, 0, sizeof(commands));
+		commands[7] = key->key->cmd_frame_ids;
+		if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+			    sizeof(commands), commands))
+			goto nla_put_failure;
+	}
+
+	if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES,
+		    IEEE802154_LLSEC_KEY_SIZE, key->key->key))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_keys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_key_entry *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->keys, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_key(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys);
+}
+
+
+
+static int
+llsec_parse_dev(struct genl_info *info,
+		struct ieee802154_llsec_device *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] ||
+	    (!!info->attrs[IEEE802154_ATTR_PAN_ID] !=
+	     !!info->attrs[IEEE802154_ATTR_SHORT_ADDR]))
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_PAN_ID]) {
+		dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+		dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+	} else {
+		dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF);
+	}
+
+	dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+	dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+	dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]);
+
+	if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int llsec_add_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device desc;
+
+	if (llsec_parse_dev(info, &desc))
+		return -EINVAL;
+
+	return ops->llsec->add_dev(dev, &desc);
+}
+
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_dev);
+}
+
+static int llsec_del_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_dev(dev, devaddr);
+}
+
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_dev);
+}
+
+static int
+ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_device *desc,
+		       const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEV);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+			      desc->short_addr) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			desc->frame_counter) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       desc->seclevel_exempt) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devs(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_dev(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs);
+}
+
+
+
+static int llsec_add_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+	return ops->llsec->add_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey);
+}
+
+static int llsec_del_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey);
+}
+
+static int
+ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq,
+			  __le64 devaddr,
+			  const struct ieee802154_llsec_device_key *devkey,
+			  const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEVKEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			devkey->frame_counter) ||
+	    ieee802154_llsec_fill_key_id(msg, &devkey->key_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devkeys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *dpos;
+	struct ieee802154_llsec_device_key *kpos;
+	int rc = 0, idx = 0, idx2;
+
+	list_for_each_entry(dpos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		idx2 = 0;
+
+		list_for_each_entry(kpos, &dpos->keys, list) {
+			if (idx2++ < data->s_idx2)
+				continue;
+
+			if (ieee802154_nl_fill_devkey(data->skb, data->portid,
+						      data->nlmsg_seq,
+						      dpos->hwaddr, kpos,
+						      data->dev)) {
+				return rc = -EMSGSIZE;
+			}
+
+			data->s_idx2++;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys);
+}
+
+
+
+static int
+llsec_parse_seclevel(struct genl_info *info,
+		     struct ieee802154_llsec_seclevel *sl)
+{
+	memset(sl, 0, sizeof(*sl));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE])
+		return -EINVAL;
+
+	sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]);
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) {
+		if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID])
+			return -EINVAL;
+
+		sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]);
+	}
+
+	sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]);
+	sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+
+	return 0;
+}
+
+static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->add_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel);
+}
+
+static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->del_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel);
+}
+
+static int
+ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq,
+			    const struct ieee802154_llsec_seclevel *sl,
+			    const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_SECLEVEL);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       sl->device_override))
+		goto nla_put_failure;
+
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+		       sl->cmd_frame_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_seclevels(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_seclevel *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->security_levels, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_seclevel(data->skb, data->portid,
+						data->nlmsg_seq, pos,
+						data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels);
+}

diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index fd7be5e..3a703ab8 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c

@@ -62,5 +62,21 @@
 	[IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, },
 
 	[IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, },
+
+	[IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 },
+	[IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 },
+	[IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, },
 };
 

diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index ef2d543..6f1428c 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c

@@ -36,7 +36,7 @@
 	u8 d_offset;
 };
 
-struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
+static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
 {
 	return (struct lowpan_frag_info *)skb->cb;
 }
@@ -120,6 +120,8 @@
 	struct inet_frag_queue *q;
 	struct lowpan_create_arg arg;
 	unsigned int hash;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
 	arg.tag = frag_info->d_tag;
 	arg.d_size = frag_info->d_size;
@@ -129,7 +131,7 @@
 	read_lock(&lowpan_frags.lock);
 	hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
 
-	q = inet_frag_find(&net->ieee802154_lowpan.frags,
+	q = inet_frag_find(&ieee802154_lowpan->frags,
 			   &lowpan_frags, &arg, hash);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
@@ -357,6 +359,8 @@
 	struct net *net = dev_net(skb->dev);
 	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
 	struct ieee802154_addr source, dest;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 	int err;
 
 	source = mac_cb(skb)->source;
@@ -366,10 +370,10 @@
 	if (err < 0)
 		goto err;
 
-	if (frag_info->d_size > net->ieee802154_lowpan.max_dsize)
+	if (frag_info->d_size > ieee802154_lowpan->max_dsize)
 		goto err;
 
-	inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false);
+	inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false);
 
 	fq = fq_find(net, frag_info, &source, &dest);
 	if (fq != NULL) {
@@ -436,6 +440,8 @@
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
 	table = lowpan_frags_ns_ctl_table;
 	if (!net_eq(net, &init_net)) {
@@ -444,10 +450,10 @@
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].data = &net->ieee802154_lowpan.frags.high_thresh;
-		table[1].data = &net->ieee802154_lowpan.frags.low_thresh;
-		table[2].data = &net->ieee802154_lowpan.frags.timeout;
-		table[3].data = &net->ieee802154_lowpan.max_dsize;
+		table[0].data = &ieee802154_lowpan->frags.high_thresh;
+		table[1].data = &ieee802154_lowpan->frags.low_thresh;
+		table[2].data = &ieee802154_lowpan->frags.timeout;
+		table[3].data = &ieee802154_lowpan->max_dsize;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
@@ -458,7 +464,7 @@
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->ieee802154_lowpan.sysctl.frags_hdr = hdr;
+	ieee802154_lowpan->sysctl.frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -471,9 +477,11 @@
 static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
-	table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr);
+	table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }
@@ -514,20 +522,26 @@
 
 static int __net_init lowpan_frags_init_net(struct net *net)
 {
-	net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ieee802154_lowpan.max_dsize = 0xFFFF;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
-	inet_frags_init_net(&net->ieee802154_lowpan.frags);
+	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+	ieee802154_lowpan->max_dsize = 0xFFFF;
+
+	inet_frags_init_net(&ieee802154_lowpan->frags);
 
 	return lowpan_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
 {
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
 	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags);
+	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
 }
 
 static struct pernet_operations lowpan_frags_ops = {

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6d6dd34..d5e6836 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c

@@ -254,7 +254,6 @@
 	struct inet_sock *inet;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
-	char answer_no_check;
 	int try_loading_module = 0;
 	int err;
 
@@ -312,7 +311,6 @@
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
@@ -324,7 +322,6 @@
 		goto out;
 
 	err = 0;
-	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
@@ -1002,7 +999,6 @@
 		.protocol =   IPPROTO_TCP,
 		.prot =       &tcp_prot,
 		.ops =        &inet_stream_ops,
-		.no_check =   0,
 		.flags =      INET_PROTOSW_PERMANENT |
 			      INET_PROTOSW_ICSK,
 	},
@@ -1012,7 +1008,6 @@
 		.protocol =   IPPROTO_UDP,
 		.prot =       &udp_prot,
 		.ops =        &inet_dgram_ops,
-		.no_check =   UDP_CSUM_DEFAULT,
 		.flags =      INET_PROTOSW_PERMANENT,
        },
 
@@ -1021,7 +1016,6 @@
 		.protocol =   IPPROTO_ICMP,
 		.prot =       &ping_prot,
 		.ops =        &inet_dgram_ops,
-		.no_check =   UDP_CSUM_DEFAULT,
 		.flags =      INET_PROTOSW_REUSE,
        },
 
@@ -1030,7 +1024,6 @@
 	       .protocol =   IPPROTO_IP,	/* wild card */
 	       .prot =       &raw_prot,
 	       .ops =        &inet_sockraw_ops,
-	       .no_check =   UDP_CSUM_DEFAULT,
 	       .flags =      INET_PROTOSW_REUSE,
        }
 };
@@ -1261,10 +1254,12 @@
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       0)))
 		goto out;
@@ -1476,22 +1471,20 @@
 }
 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
 
-unsigned long snmp_fold_field(void __percpu *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib, int offt)
 {
 	unsigned long res = 0;
-	int i, j;
+	int i;
 
-	for_each_possible_cpu(i) {
-		for (j = 0; j < SNMP_ARRAY_SZ; j++)
-			res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
-	}
+	for_each_possible_cpu(i)
+		res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt);
 	return res;
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
 #if BITS_PER_LONG==32
 
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
 {
 	u64 res = 0;
 	int cpu;
@@ -1502,7 +1495,7 @@
 		u64 v;
 		unsigned int start;
 
-		bhptr = per_cpu_ptr(mib[0], cpu);
+		bhptr = per_cpu_ptr(mib, cpu);
 		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
 		do {
 			start = u64_stats_fetch_begin_irq(syncp);
@@ -1516,25 +1509,6 @@
 EXPORT_SYMBOL_GPL(snmp_fold_field64);
 #endif
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
-{
-	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, align);
-	if (!ptr[0])
-		return -ENOMEM;
-
-#if SNMP_ARRAY_SZ == 2
-	ptr[1] = __alloc_percpu(mibsize, align);
-	if (!ptr[1]) {
-		free_percpu(ptr[0]);
-		ptr[0] = NULL;
-		return -ENOMEM;
-	}
-#endif
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_init);
-
 #ifdef CONFIG_IP_MULTICAST
 static const struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
@@ -1570,40 +1544,30 @@
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib),
-			  __alignof__(struct tcp_mib)) < 0)
+	net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
+	if (!net->mib.tcp_statistics)
 		goto err_tcp_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
+	if (!net->mib.ip_statistics)
 		goto err_ip_mib;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *af_inet_stats;
-		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
+		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
 		u64_stats_init(&af_inet_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
-		u64_stats_init(&af_inet_stats->syncp);
-#endif
 	}
 
-	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib),
-			  __alignof__(struct linux_mib)) < 0)
+	net->mib.net_statistics = alloc_percpu(struct linux_mib);
+	if (!net->mib.net_statistics)
 		goto err_net_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udp_statistics = alloc_percpu(struct udp_mib);
+	if (!net->mib.udp_statistics)
 		goto err_udp_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
+	if (!net->mib.udplite_statistics)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib),
-			  __alignof__(struct icmp_mib)) < 0)
+	net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
+	if (!net->mib.icmp_statistics)
 		goto err_icmp_mib;
 	net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
 					      GFP_KERNEL);
@@ -1614,17 +1578,17 @@
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+	free_percpu(net->mib.icmp_statistics);
 err_icmp_mib:
-	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+	free_percpu(net->mib.udplite_statistics);
 err_udplite_mib:
-	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+	free_percpu(net->mib.udp_statistics);
 err_udp_mib:
-	snmp_mib_free((void __percpu **)net->mib.net_statistics);
+	free_percpu(net->mib.net_statistics);
 err_net_mib:
-	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+	free_percpu(net->mib.ip_statistics);
 err_ip_mib:
-	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+	free_percpu(net->mib.tcp_statistics);
 err_tcp_mib:
 	return -ENOMEM;
 }
@@ -1632,12 +1596,12 @@
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
 	kfree(net->mib.icmpmsg_statistics);
-	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
-	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
-	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
-	snmp_mib_free((void __percpu **)net->mib.net_statistics);
-	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
-	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+	free_percpu(net->mib.icmp_statistics);
+	free_percpu(net->mib.udplite_statistics);
+	free_percpu(net->mib.udp_statistics);
+	free_percpu(net->mib.net_statistics);
+	free_percpu(net->mib.ip_statistics);
+	free_percpu(net->mib.tcp_statistics);
 }
 
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1736,13 +1700,9 @@
 
 	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
-	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
-	if (!sysctl_local_reserved_ports)
-		goto out;
-
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
-		goto out_free_reserved_ports;
+		goto out;
 
 	rc = proto_register(&udp_prot, 1);
 	if (rc)
@@ -1852,8 +1812,6 @@
 	proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
 	proto_unregister(&tcp_prot);
-out_free_reserved_ports:
-	kfree(sysctl_local_reserved_ports);
 	goto out;
 }
 

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 8b5134c..a3095fd 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c

@@ -86,18 +86,26 @@
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
 
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
 void ip4_datagram_release_cb(struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct ip_options_rcu *inet_opt;
 	__be32 daddr = inet->inet_daddr;
+	struct dst_entry *dst;
 	struct flowi4 fl4;
 	struct rtable *rt;
 
-	if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
-		return;
-
 	rcu_read_lock();
+
+	dst = __sk_dst_get(sk);
+	if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+		rcu_read_unlock();
+		return;
+	}
 	inet_opt = rcu_dereference(inet->inet_opt);
 	if (inet_opt && inet_opt->opt.srr)
 		daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@
 				   inet->inet_saddr, inet->inet_dport,
 				   inet->inet_sport, sk->sk_protocol,
 				   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
-	if (!IS_ERR(rt))
-		__sk_dst_set(sk, &rt->dst);
+
+	dst = !IS_ERR(rt) ? &rt->dst : NULL;
+	sk_dst_set(sk, dst);
+
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bdbf68b..e944937 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c

@@ -106,7 +106,6 @@
 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
 
 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-static DEFINE_SPINLOCK(inet_addr_hash_lock);
 
 static u32 inet_addr_hash(struct net *net, __be32 addr)
 {
@@ -119,16 +118,14 @@
 {
 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
 
-	spin_lock(&inet_addr_hash_lock);
+	ASSERT_RTNL();
 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
-	spin_unlock(&inet_addr_hash_lock);
 }
 
 static void inet_hash_remove(struct in_ifaddr *ifa)
 {
-	spin_lock(&inet_addr_hash_lock);
+	ASSERT_RTNL();
 	hlist_del_init_rcu(&ifa->hash);
-	spin_unlock(&inet_addr_hash_lock);
 }
 
 /**
@@ -830,7 +827,7 @@
 	ifa_existing = find_matching_ifa(ifa);
 	if (!ifa_existing) {
 		/* It would be best to check for !NLM_F_CREATE here but
-		 * userspace alreay relies on not having to provide this.
+		 * userspace already relies on not having to provide this.
 		 */
 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);

diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 250be74..4e9619b 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c

@@ -84,7 +84,8 @@
 			ptr--;
 		}
 		if (tpi->flags&TUNNEL_CSUM &&
-		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
 			*ptr = 0;
 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
 								 skb->len, 0));
@@ -93,28 +94,6 @@
 }
 EXPORT_SYMBOL_GPL(gre_build_header);
 
-static __sum16 check_checksum(struct sk_buff *skb)
-{
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		csum = csum_fold(skb->csum);
-
-		if (!csum)
-			break;
-		/* Fall through. */
-
-	case CHECKSUM_NONE:
-		skb->csum = 0;
-		csum = __skb_checksum_complete(skb);
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		break;
-	}
-
-	return csum;
-}
-
 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			    bool *csum_err)
 {
@@ -141,7 +120,7 @@
 
 	options = (__be32 *)(greh + 1);
 	if (greh->flags & GRE_CSUM) {
-		if (check_checksum(skb)) {
+		if (skb_checksum_simple_validate(skb)) {
 			*csum_err = true;
 			return -EINVAL;
 		}

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d3228..eb92deb 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c

@@ -42,6 +42,7 @@
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP)))
 		goto out;
 
@@ -55,6 +56,8 @@
 		goto out;
 
 	csum = !!(greh->flags & GRE_CSUM);
+	if (csum)
+		skb->encap_hdr_csum = 1;
 
 	if (unlikely(!pskb_may_pull(skb, ghl)))
 		goto out;
@@ -94,10 +97,13 @@
 				}
 			}
 
-			greh = (struct gre_base_hdr *)(skb->data);
+			skb_reset_transport_header(skb);
+
+			greh = (struct gre_base_hdr *)
+			    skb_transport_header(skb);
 			pcsum = (__be32 *)(greh + 1);
 			*pcsum = 0;
-			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+			*(__sum16 *)pcsum = gso_make_checksum(skb, 0);
 		}
 		__skb_push(skb, tnl_hlen - ghl);
 
@@ -125,10 +131,12 @@
 		csum_partial(skb->data, skb_gro_offset(skb), 0));
 	sum = csum_fold(NAPI_GRO_CB(skb)->csum);
 	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
-		if (unlikely(!sum))
+		if (unlikely(!sum) && !skb->csum_complete_sw)
 			netdev_rx_csum_fault(skb->dev);
-	} else
+	} else {
 		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_complete_sw = 1;
+	}
 
 	return sum;
 }

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0134663..79c3d94 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c

@@ -337,6 +337,7 @@
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr, saddr;
+	u32 mark = IP4_REPLY_MARK(net, skb->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 		return;
@@ -349,6 +350,7 @@
 	icmp_param->data.icmph.checksum = 0;
 
 	inet->tos = ip_hdr(skb)->tos;
+	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
 	ipc.opt = NULL;
@@ -364,6 +366,7 @@
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
+	fl4.flowi4_mark = mark;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@
 					struct flowi4 *fl4,
 					struct sk_buff *skb_in,
 					const struct iphdr *iph,
-					__be32 saddr, u8 tos,
+					__be32 saddr, u8 tos, u32 mark,
 					int type, int code,
 					struct icmp_bxm *param)
 {
@@ -394,6 +397,7 @@
 	fl4->daddr = (param->replyopts.opt.opt.srr ?
 		      param->replyopts.opt.opt.faddr : iph->saddr);
 	fl4->saddr = saddr;
+	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = RT_TOS(tos);
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@
 	struct flowi4 fl4;
 	__be32 saddr;
 	u8  tos;
+	u32 mark;
 	struct net *net;
 	struct sock *sk;
 
@@ -592,6 +597,7 @@
 	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 					   IPTOS_PREC_INTERNETCONTROL) :
 					  iph->tos;
+	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
 		goto out_unlock;
@@ -608,13 +614,14 @@
 	icmp_param->skb	  = skb_in;
 	icmp_param->offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
+	sk->sk_mark = mark;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param->replyopts.opt;
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
 
-	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, icmp_param);
 	if (IS_ERR(rt))
 		goto out_unlock;
@@ -908,16 +915,8 @@
 
 	ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
 
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (!csum_fold(skb->csum))
-			break;
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = 0;
-		if (__skb_checksum_complete(skb))
-			goto csum_error;
-	}
+	if (skb_checksum_simple_validate(skb))
+		goto csum_error;
 
 	if (!pskb_pull(skb, sizeof(*icmph)))
 		goto error;

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 97e4d16..6748d42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -369,7 +369,7 @@
 	pip->saddr    = fl4.saddr;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&pip[1])[0] = IPOPT_RA;
 	((u8 *)&pip[1])[1] = 4;
 	((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@
 	iph->daddr    = dst;
 	iph->saddr    = fl4.saddr;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&iph[1])[0] = IPOPT_RA;
 	((u8 *)&iph[1])[1] = 4;
 	((u8 *)&iph[1])[2] = 0;
@@ -988,16 +988,8 @@
 	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
 		goto drop;
 
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (!csum_fold(skb->csum))
-			break;
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = 0;
-		if (__skb_checksum_complete(skb))
-			goto drop;
-	}
+	if (skb_checksum_simple_validate(skb))
+		goto drop;
 
 	ih = igmp_hdr(skb);
 	switch (ih->type) {

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a56b8e6..14d02ea 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c

@@ -29,9 +29,6 @@
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
 #endif
 
-unsigned long *sysctl_local_reserved_ports;
-EXPORT_SYMBOL(sysctl_local_reserved_ports);
-
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
 	unsigned int seq;
@@ -113,7 +110,7 @@
 
 		smallest_size = -1;
 		do {
-			if (inet_is_reserved_local_port(rover))
+			if (inet_is_local_reserved_port(net, rover))
 				goto next_nolock;
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
@@ -408,7 +405,7 @@
 	struct net *net = sock_net(sk);
 	int flags = inet_sk_flowi_flags(sk);
 
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
@@ -445,7 +442,7 @@
 
 	rcu_read_lock();
 	opt = rcu_dereference(newinet->inet_opt);
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +677,8 @@
 		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
 		newsk->sk_write_space = sk_stream_write_space;
 
+		newsk->sk_mark = inet_rsk(req)->ir_mark;
+
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
 		newicsk->icsk_probes_out  = 0;

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8b9cf27..43116e8 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c

@@ -274,7 +274,7 @@
 				  const __be32 daddr, const u16 hnum,
 				  const int dif)
 {
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
@@ -327,7 +327,7 @@
 	__be32 daddr = inet->inet_rcv_saddr;
 	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport,
@@ -500,7 +500,7 @@
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
-			if (inet_is_reserved_local_port(port))
+			if (inet_is_local_reserved_port(net, port))
 				continue;
 			head = &hinfo->bhash[inet_bhashfn(net, port,
 					hinfo->bhash_size)];

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 56cd458..bd5f592 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c

@@ -26,20 +26,7 @@
  *  Theory of operations.
  *  We keep one entry for each peer IP address.  The nodes contains long-living
  *  information about the peer which doesn't depend on routes.
- *  At this moment this information consists only of ID field for the next
- *  outgoing IP packet.  This field is incremented with each packet as encoded
- *  in inet_getid() function (include/net/inetpeer.h).
- *  At the moment of writing this notes identifier of IP packets is generated
- *  to be unpredictable using this code only for packets subjected
- *  (actually or potentially) to defragmentation.  I.e. DF packets less than
- *  PMTU in size when local fragmentation is disabled use a constant ID and do
- *  not use this code (see ip_select_ident() in include/net/ip.h).
  *
- *  Route cache entries hold references to our nodes.
- *  New cache entries get references via lookup by destination IP address in
- *  the avl tree.  The reference is grabbed only when it's needed i.e. only
- *  when we try to output IP packet which needs an unpredictable ID (see
- *  __ip_select_ident() in net/ipv4/route.c).
  *  Nodes are removed only when reference counter goes to 0.
  *  When it's happened the node may be removed when a sufficient amount of
  *  time has been passed since its last use.  The less-recently-used entry can
@@ -62,7 +49,6 @@
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
- *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -120,7 +106,7 @@
 static void inetpeer_gc_worker(struct work_struct *work)
 {
 	struct inet_peer *p, *n, *c;
-	LIST_HEAD(list);
+	struct list_head list;
 
 	spin_lock_bh(&gc_lock);
 	list_replace_init(&gc_list, &list);
@@ -497,10 +483,6 @@
 		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
-		atomic_set(&p->ip_id_count,
-				(daddr->family == AF_INET) ?
-					secure_ip_id(daddr->addr.a4) :
-					secure_ipv6_id(daddr->addr.a6));
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
 		/* 60*HZ is arbitrary, but chosen enough high so that the first

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 6f111e4..3a83ce5 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c

@@ -42,7 +42,7 @@
 static bool ip_may_fragment(const struct sk_buff *skb)
 {
 	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
-		skb->local_df;
+		skb->ignore_df;
 }
 
 static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 94213c8..9b84254 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c

@@ -410,7 +410,7 @@
 		struct flowi4 fl4;
 		struct rtable *rt;
 
-		rt = ip_route_output_gre(dev_net(dev), &fl4,
+		rt = ip_route_output_gre(t->net, &fl4,
 					 t->parms.iph.daddr,
 					 t->parms.iph.saddr,
 					 t->parms.o_key,
@@ -434,7 +434,7 @@
 
 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 		struct in_device *in_dev;
-		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
+		in_dev = inetdev_by_index(t->net, t->mlink);
 		if (in_dev)
 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 	}
@@ -478,7 +478,7 @@
 	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 
-	dev->features		|= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
+	dev->features		|= GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
 
 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
@@ -649,6 +649,7 @@
 {
 	ether_setup(dev);
 	dev->netdev_ops		= &gre_tap_netdev_ops;
+	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f4ab72e..5e7aece 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c

@@ -364,7 +364,7 @@
 			}
 			if (optptr[2] <= optlen) {
 				unsigned char *timeptr = NULL;
-				if (optptr[2]+3 > optptr[1]) {
+				if (optptr[2]+3 > optlen) {
 					pp_ptr = optptr + 2;
 					goto error;
 				}
@@ -376,7 +376,7 @@
 					optptr[2] += 4;
 					break;
 				case IPOPT_TS_TSANDADDR:
-					if (optptr[2]+7 > optptr[1]) {
+					if (optptr[2]+7 > optlen) {
 						pp_ptr = optptr + 2;
 						goto error;
 					}
@@ -390,7 +390,7 @@
 					optptr[2] += 8;
 					break;
 				case IPOPT_TS_PRESPEC:
-					if (optptr[2]+7 > optptr[1]) {
+					if (optptr[2]+7 > optlen) {
 						pp_ptr = optptr + 2;
 						goto error;
 					}

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a52f501..8d3b6b0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c

@@ -148,7 +148,7 @@
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt && opt->opt.optlen) {
 		iph->ihl += opt->opt.optlen>>2;
@@ -415,7 +415,7 @@
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
@@ -430,8 +430,7 @@
 		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(skb, &rt->dst, sk,
-			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
 
 	/* TODO : should we use skb->sk here instead of sk ? */
 	skb->priority = sk->sk_priority;
@@ -501,7 +500,7 @@
 	iph = ip_hdr(skb);
 
 	mtu = ip_skb_dst_mtu(skb);
-	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -866,7 +865,7 @@
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + length > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1189,7 +1188,7 @@
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + size > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1350,10 +1349,10 @@
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	skb->local_df = ip_sk_local_df(sk);
+	skb->ignore_df = ip_sk_ignore_df(sk);
 
 	/* DF bit is set when we want to see DF on outgoing frames.
-	 * If local_df is set too, we still allow to fragment this frame
+	 * If ignore_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc == IP_PMTUDISC_DO ||
 	    inet->pmtudisc == IP_PMTUDISC_PROBE ||
@@ -1379,7 +1378,7 @@
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
@@ -1546,7 +1545,8 @@
 			daddr = replyopts.opt.opt.faddr;
 	}
 
-	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+	flowi4_init_output(&fl4, arg->bound_dev_if,
+			   IP4_REPLY_MARK(net, skb->mark),
 			   RT_TOS(arg->tos),
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2acc233..097b3e7 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c

@@ -268,6 +268,7 @@
 	__be32 remote = parms->iph.daddr;
 	__be32 local = parms->iph.saddr;
 	__be32 key = parms->i_key;
+	__be16 flags = parms->i_flags;
 	int link = parms->link;
 	struct ip_tunnel *t = NULL;
 	struct hlist_head *head = ip_bucket(itn, parms);
@@ -275,9 +276,9 @@
 	hlist_for_each_entry_rcu(t, head, hash_node) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
-		    key == t->parms.i_key &&
 		    link == t->parms.link &&
-		    type == t->dev->type)
+		    type == t->dev->type &&
+		    ip_tunnel_key_match(&t->parms, flags, key))
 			break;
 	}
 	return t;
@@ -395,11 +396,10 @@
 					  struct ip_tunnel_net *itn,
 					  struct ip_tunnel_parm *parms)
 {
-	struct ip_tunnel *nt, *fbt;
+	struct ip_tunnel *nt;
 	struct net_device *dev;
 
 	BUG_ON(!itn->fb_tunnel_dev);
-	fbt = netdev_priv(itn->fb_tunnel_dev);
 	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
 	if (IS_ERR(dev))
 		return ERR_CAST(dev);
@@ -668,6 +668,7 @@
 		dev->needed_headroom = max_headroom;
 
 	if (skb_cow_head(skb, dev->needed_headroom)) {
+		ip_rt_put(rt);
 		dev->stats.tx_dropped++;
 		kfree_skb(skb);
 		return;
@@ -747,19 +748,19 @@
 			goto done;
 		if (p->iph.ttl)
 			p->iph.frag_off |= htons(IP_DF);
-		if (!(p->i_flags&TUNNEL_KEY))
-			p->i_key = 0;
-		if (!(p->o_flags&TUNNEL_KEY))
-			p->o_key = 0;
+		if (!(p->i_flags & VTI_ISVTI)) {
+			if (!(p->i_flags & TUNNEL_KEY))
+				p->i_key = 0;
+			if (!(p->o_flags & TUNNEL_KEY))
+				p->o_key = 0;
+		}
 
 		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 
 		if (!t && (cmd == SIOCADDTUNNEL)) {
 			t = ip_tunnel_create(net, itn, p);
-			if (IS_ERR(t)) {
-				err = PTR_ERR(t);
-				break;
-			}
+			err = PTR_ERR_OR_ZERO(t);
+			break;
 		}
 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index bcf206c..f4c987b 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c

@@ -74,7 +74,7 @@
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	err = ip_local_out_sk(sk, skb);
 	if (unlikely(net_xmit_eval(err)))
@@ -135,6 +135,14 @@
 		return skb;
 	}
 
+	/* If packet is not gso and we are resolving any partial checksum,
+	 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+	 * on the outer header without confusing devices that implement
+	 * NETIF_F_IP_CSUM with encapsulation.
+	 */
+	if (csum_help)
+		skb->encapsulation = 0;
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
 		err = skb_checksum_help(skb);
 		if (unlikely(err))

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 13ef00f..b8960f3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c

@@ -313,7 +313,13 @@
 			return -EINVAL;
 	}
 
-	p.i_flags |= VTI_ISVTI;
+	if (!(p.i_flags & GRE_KEY))
+		p.i_key = 0;
+	if (!(p.o_flags & GRE_KEY))
+		p.o_key = 0;
+
+	p.i_flags = VTI_ISVTI;
+
 	err = ip_tunnel_ioctl(dev, &p, cmd);
 	if (err)
 		return err;

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b183..62eaa00 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c

@@ -149,13 +149,13 @@
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+				 t->parms.link, 0, IPPROTO_IPIP, 0);
 		err = 0;
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 			      IPPROTO_IPIP, 0);
 		err = 0;
 		goto out;
@@ -486,4 +486,5 @@
 module_init(ipip_init);
 module_exit(ipip_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
 MODULE_ALIAS_NETDEV("tunl0");

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d84dc8d..65bcaa7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -484,7 +484,7 @@
 	dev->type		= ARPHRD_PIMREG;
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
-	dev->netdev_ops		= &reg_vif_netdev_ops,
+	dev->netdev_ops		= &reg_vif_netdev_ops;
 	dev->destructor		= free_netdev;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
@@ -1663,7 +1663,7 @@
 	iph->protocol	=	IPPROTO_IPIP;
 	iph->ihl	=	5;
 	iph->tot_len	=	htons(skb->len);
-	ip_select_ident(skb, skb_dst(skb), NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ee28861..f1787c0 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c

@@ -91,17 +91,9 @@
 	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (!nat) {
-		/* NAT module was loaded late. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f40f321..b8f6381 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c

@@ -34,7 +34,7 @@
 
 	if (!err) {
 		ip_send_check(ip_hdr(skb));
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 	}
 
 	return err;

diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5b256d..3964157 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c

@@ -48,15 +48,9 @@
 
 	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
-	nat = nfct_nat(ct);
-	if (nat == NULL) {
-		/* Conntrack module was loaded late, can't add extension. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL)
-			return NF_ACCEPT;
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ad737fa..ae0af93 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c

@@ -345,15 +345,15 @@
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu %lu",
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
@@ -379,7 +379,7 @@
 	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %llu",
-			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+			   snmp_fold_field64(net->mib.ip_statistics,
 					     snmp4_ipstats_list[i].entry,
 					     offsetof(struct ipstats_mib, syncp)));
 
@@ -395,11 +395,11 @@
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+				   snmp_fold_field(net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+				   snmp_fold_field(net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 	}
 
@@ -410,7 +410,7 @@
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.udp_statistics,
+			   snmp_fold_field(net->mib.udp_statistics,
 					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
@@ -421,7 +421,7 @@
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
+			   snmp_fold_field(net->mib.udplite_statistics,
 					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -458,7 +458,7 @@
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.net_statistics,
+			   snmp_fold_field(net->mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
@@ -468,7 +468,7 @@
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %llu",
-			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+			   snmp_fold_field64(net->mib.ip_statistics,
 					     snmp4_ipextstats_list[i].entry,
 					     offsetof(struct ipstats_mib, syncp)));
 

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a9dbe58..2c65160 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c

@@ -389,7 +389,7 @@
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(skb, &rt->dst, NULL);
+			ip_select_ident(skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5e676be..082239f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -89,6 +89,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
+#include <linux/jhash.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
@@ -456,39 +457,19 @@
 	return neigh_create(&arp_tbl, pkey, dev);
 }
 
-/*
- * Peer allocation may fail only in serious out-of-memory conditions.  However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
- */
-static void ip_select_fb_ident(struct iphdr *iph)
+atomic_t *ip_idents __read_mostly;
+EXPORT_SYMBOL(ip_idents);
+
+void __ip_select_ident(struct iphdr *iph, int segs)
 {
-	static DEFINE_SPINLOCK(ip_fb_id_lock);
-	static u32 ip_fallback_id;
-	u32 salt;
+	static u32 ip_idents_hashrnd __read_mostly;
+	u32 hash, id;
 
-	spin_lock_bh(&ip_fb_id_lock);
-	salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
-	iph->id = htons(salt & 0xFFFF);
-	ip_fallback_id = salt;
-	spin_unlock_bh(&ip_fb_id_lock);
-}
+	net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
-{
-	struct net *net = dev_net(dst->dev);
-	struct inet_peer *peer;
-
-	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
-	if (peer) {
-		iph->id = htons(inet_getid(peer, more));
-		inet_putpeer(peer);
-		return;
-	}
-
-	ip_select_fb_ident(iph);
+	hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+	id = ip_idents_reserve(hash, segs);
+	iph->id = htons(id);
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
@@ -993,6 +974,9 @@
 	struct flowi4 fl4;
 	struct rtable *rt;
 
+	if (!mark)
+		mark = IP4_REPLY_MARK(net, skb->mark);
+
 	__build_flow_key(&fl4, NULL, iph, oif,
 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
 	rt = __ip_route_output_key(net, &fl4);
@@ -1010,6 +994,10 @@
 	struct rtable *rt;
 
 	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+	if (!fl4.flowi4_mark)
+		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
+
 	rt = __ip_route_output_key(sock_net(sk), &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -2704,6 +2692,12 @@
 {
 	int rc = 0;
 
+	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+	if (!ip_idents)
+		panic("IP: failed to allocate ip_idents\n");
+
+	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c..c86624b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c

@@ -303,6 +303,7 @@
 	ireq->ir_rmt_port	= th->source;
 	ireq->ir_loc_addr	= ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr	= ip_hdr(skb)->saddr;
+	ireq->ir_mark		= inet_request_mark(sk, skb);
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -339,7 +340,7 @@
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this.
 	 */
-	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5cde8f2..79a007c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c

@@ -437,13 +437,6 @@
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "ip_local_reserved_ports",
-		.data		= NULL, /* initialized in sysctl_ipv4_init */
-		.maxlen		= 65536,
-		.mode		= 0644,
-		.proc_handler	= proc_do_large_bitmap,
-	},
-	{
 		.procname	= "igmp_max_memberships",
 		.data		= &sysctl_igmp_max_memberships,
 		.maxlen		= sizeof(int),
@@ -825,6 +818,13 @@
 		.proc_handler	= ipv4_local_port_range,
 	},
 	{
+		.procname	= "ip_local_reserved_ports",
+		.data		= &init_net.ipv4.sysctl_local_reserved_ports,
+		.maxlen		= 65536,
+		.mode		= 0644,
+		.proc_handler	= proc_do_large_bitmap,
+	},
+	{
 		.procname	= "ip_no_pmtu_disc",
 		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
 		.maxlen		= sizeof(int),
@@ -838,6 +838,20 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_fwmark_accept",
+		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
@@ -862,8 +876,14 @@
 	if (net->ipv4.ipv4_hdr == NULL)
 		goto err_reg;
 
+	net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+	if (!net->ipv4.sysctl_local_reserved_ports)
+		goto err_ports;
+
 	return 0;
 
+err_ports:
+	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
 err_reg:
 	if (!net_eq(net, &init_net))
 		kfree(table);
@@ -875,6 +895,7 @@
 {
 	struct ctl_table *table;
 
+	kfree(net->ipv4.sysctl_local_reserved_ports);
 	table = net->ipv4.ipv4_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
 	kfree(table);
@@ -888,16 +909,6 @@
 static __init int sysctl_ipv4_init(void)
 {
 	struct ctl_table_header *hdr;
-	struct ctl_table *i;
-
-	for (i = ipv4_table; i->procname; i++) {
-		if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
-			i->data = sysctl_local_reserved_ports;
-			break;
-		}
-	}
-	if (!i->procname)
-		return -EINVAL;
 
 	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
 	if (hdr == NULL)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bd6d52..eb1dde3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -2916,6 +2916,14 @@
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
+
+	case TCP_FASTOPEN:
+		if (icsk->icsk_accept_queue.fastopenq != NULL)
+			val = icsk->icsk_accept_queue.fastopenq->max_qlen;
+		else
+			val = 0;
+		break;
+
 	case TCP_TIMESTAMP:
 		val = tcp_time_stamp + tp->tsoffset;
 		break;

diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846f..d5de69b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c

@@ -140,13 +140,12 @@
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c..7b09d8b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c

@@ -276,26 +276,6 @@
 	return err;
 }
 
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	u32 left;
-
-	if (in_flight >= tp->snd_cwnd)
-		return true;
-
-	left = tp->snd_cwnd - in_flight;
-	if (sk_can_gso(sk) &&
-	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-	    left < tp->xmit_size_goal_segs)
-		return true;
-	return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-
 /* Slow start is used when congestion window is no greater than the slow start
  * threshold. We base on RFC2581 and also handle stretch ACKs properly.
  * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
@@ -337,11 +317,11 @@
 /* This is Jacobson's slow start and congestion avoidance.
  * SIGCOMM '88, p. 328.
  */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* In "safe" area, increase. */

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b4f1b29..a9bd8a4 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c

@@ -304,13 +304,12 @@
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh) {

diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f195d93..62e48cf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c

@@ -72,25 +72,224 @@
 	return err;
 }
 
-/* Computes the fastopen cookie for the IP path.
- * The path is a 128 bits long (pad with zeros for IPv4).
- *
- * The caller must check foc->len to determine if a valid cookie
- * has been generated successfully.
-*/
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
-			     struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen(const void *path,
+				      struct tcp_fastopen_cookie *foc)
 {
-	__be32 path[4] = { src, dst, 0, 0 };
 	struct tcp_fastopen_context *ctx;
+	bool ok = false;
 
 	tcp_fastopen_init_key_once(true);
 
 	rcu_read_lock();
 	ctx = rcu_dereference(tcp_fastopen_ctx);
 	if (ctx) {
-		crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
+		crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
 		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+		ok = true;
 	}
 	rcu_read_unlock();
+	return ok;
 }
+
+/* Generate the fastopen cookie by doing aes128 encryption on both
+ * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
+ * addresses. For the longer IPv6 addresses use CBC-MAC.
+ *
+ * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
+ */
+static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+				    struct sk_buff *syn,
+				    struct tcp_fastopen_cookie *foc)
+{
+	if (req->rsk_ops->family == AF_INET) {
+		const struct iphdr *iph = ip_hdr(syn);
+
+		__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
+		return __tcp_fastopen_cookie_gen(path, foc);
+	}
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (req->rsk_ops->family == AF_INET6) {
+		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+		struct tcp_fastopen_cookie tmp;
+
+		if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+			struct in6_addr *buf = (struct in6_addr *) tmp.val;
+			int i = 4;
+
+			for (i = 0; i < 4; i++)
+				buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+			return __tcp_fastopen_cookie_gen(buf, foc);
+		}
+	}
+#endif
+	return false;
+}
+
+static bool tcp_fastopen_create_child(struct sock *sk,
+				      struct sk_buff *skb,
+				      struct dst_entry *dst,
+				      struct request_sock *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+	struct sock *child;
+
+	req->num_retrans = 0;
+	req->num_timeout = 0;
+	req->sk = NULL;
+
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+	if (child == NULL)
+		return false;
+
+	spin_lock(&queue->fastopenq->lock);
+	queue->fastopenq->qlen++;
+	spin_unlock(&queue->fastopenq->lock);
+
+	/* Initialize the child socket. Have to fix some values to take
+	 * into account the child is a Fast Open socket and is created
+	 * only out of the bits carried in the SYN packet.
+	 */
+	tp = tcp_sk(child);
+
+	tp->fastopen_rsk = req;
+	/* Do a hold on the listner sk so that if the listener is being
+	 * closed, the child that has been accepted can live on and still
+	 * access listen_lock.
+	 */
+	sock_hold(sk);
+	tcp_rsk(req)->listener = sk;
+
+	/* RFC1323: The window in SYN & SYN/ACK segments is never
+	 * scaled. So correct it appropriately.
+	 */
+	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+
+	/* Activate the retrans timer so that SYNACK can be retransmitted.
+	 * The request socket is not added to the SYN table of the parent
+	 * because it's been added to the accept queue directly.
+	 */
+	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+
+	/* Add the child socket directly into the accept queue */
+	inet_csk_reqsk_queue_add(sk, req, child);
+
+	/* Now finish processing the fastopen child socket. */
+	inet_csk(child)->icsk_af_ops->rebuild_header(child);
+	tcp_init_congestion_control(child);
+	tcp_mtup_init(child);
+	tcp_init_metrics(child);
+	tcp_init_buffer_space(child);
+
+	/* Queue the data carried in the SYN packet. We need to first
+	 * bump skb's refcnt because the caller will attempt to free it.
+	 *
+	 * XXX (TFO) - we honor a zero-payload TFO request for now,
+	 * (any reason not to?) but no need to queue the skb since
+	 * there is no data. How about SYN+FIN?
+	 */
+	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
+		skb = skb_get(skb);
+		skb_dst_drop(skb);
+		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
+		skb_set_owner_r(skb, child);
+		__skb_queue_tail(&child->sk_receive_queue, skb);
+		tp->syn_data_acked = 1;
+	}
+	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	sk->sk_data_ready(sk);
+	bh_unlock_sock(child);
+	sock_put(child);
+	WARN_ON(req->sk == NULL);
+	return true;
+}
+EXPORT_SYMBOL(tcp_fastopen_create_child);
+
+static bool tcp_fastopen_queue_check(struct sock *sk)
+{
+	struct fastopen_queue *fastopenq;
+
+	/* Make sure the listener has enabled fastopen, and we don't
+	 * exceed the max # of pending TFO requests allowed before trying
+	 * to validating the cookie in order to avoid burning CPU cycles
+	 * unnecessarily.
+	 *
+	 * XXX (TFO) - The implication of checking the max_qlen before
+	 * processing a cookie request is that clients can't differentiate
+	 * between qlen overflow causing Fast Open to be disabled
+	 * temporarily vs a server not supporting Fast Open at all.
+	 */
+	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+	if (fastopenq == NULL || fastopenq->max_qlen == 0)
+		return false;
+
+	if (fastopenq->qlen >= fastopenq->max_qlen) {
+		struct request_sock *req1;
+		spin_lock(&fastopenq->lock);
+		req1 = fastopenq->rskq_rst_head;
+		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+			spin_unlock(&fastopenq->lock);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+			return false;
+		}
+		fastopenq->rskq_rst_head = req1->dl_next;
+		fastopenq->qlen--;
+		spin_unlock(&fastopenq->lock);
+		reqsk_free(req1);
+	}
+	return true;
+}
+
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+		      struct request_sock *req,
+		      struct tcp_fastopen_cookie *foc,
+		      struct dst_entry *dst)
+{
+	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+
+	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+	      (syn_data || foc->len >= 0) &&
+	      tcp_fastopen_queue_check(sk))) {
+		foc->len = -1;
+		return false;
+	}
+
+	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+		goto fastopen;
+
+	if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+	    foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+	    foc->len == valid_foc.len &&
+	    !memcmp(foc->val, valid_foc.val, foc->len)) {
+		/* Cookie is valid. Create a (full) child socket to accept
+		 * the data in SYN before returning a SYN-ACK to ack the
+		 * data. If we fail to create the socket, fall back and
+		 * ack the ISN only but includes the same cookie.
+		 *
+		 * Note: Data-less SYN with valid cookie is allowed to send
+		 * data in SYN_RECV state.
+		 */
+fastopen:
+		if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+			foc->len = -1;
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPFASTOPENPASSIVE);
+			return true;
+		}
+	}
+
+	NET_INC_STATS_BH(sock_net(sk), foc->len ?
+			 LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+			 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+	*foc = valid_foc;
+	return false;
+}
+EXPORT_SYMBOL(tcp_try_fastopen);

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b9e7ba..1c49082 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c

@@ -109,12 +109,12 @@
 	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
 }
 
-static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)

diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194ac..0313613 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c

@@ -227,12 +227,12 @@
 	return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
 
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index a15a799..d8f8f05 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c

@@ -87,8 +87,7 @@
  *     o Give cwnd a new value based on the model proposed
  *     o remember increments <1
  */
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			     u32 in_flight)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hybla *ca = inet_csk_ca(sk);
@@ -101,11 +100,11 @@
 		ca->minrtt_us = tp->srtt_us;
 	}
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (!ca->hybla_en) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 

diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 863d105..5999b39 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c

@@ -255,8 +255,7 @@
 /*
  * Increase window in response to successful acknowledgment.
  */
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				    u32 in_flight)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@
 		update_params(sk);
 
 	/* RFC2861 only increase cwnd if fully utilized */
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* In slow start */

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3a26b3b..40661fc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -1167,7 +1167,7 @@
 			}
 			pkt_len = new_len;
 		}
-		err = tcp_fragment(sk, skb, pkt_len, mss);
+		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
 		if (err < 0)
 			return err;
 	}
@@ -2241,7 +2241,8 @@
 				break;
 
 			mss = skb_shinfo(skb)->gso_size;
-			err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+			err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
+					   mss, GFP_ATOMIC);
 			if (err < 0)
 				break;
 			cnt = packets;
@@ -2937,10 +2938,11 @@
 		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
 }
 
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
+
+	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
 	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -3363,7 +3365,6 @@
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
-	u32 prior_in_flight;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3396,7 +3397,6 @@
 		flag |= FLAG_SND_UNA_ADVANCED;
 
 	prior_fackets = tp->fackets_out;
-	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* ts_recent update must be made after we are sure that the packet
 	 * is in window.
@@ -3451,7 +3451,7 @@
 
 	/* Advance cwnd if state allows */
 	if (tcp_may_raise_cwnd(sk, flag))
-		tcp_cong_avoid(sk, ack, acked, prior_in_flight);
+		tcp_cong_avoid(sk, ack, acked);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -4702,28 +4702,6 @@
 	return -1;
 }
 
-/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
- * As additional protections, we do not touch cwnd in retransmission phases,
- * and if application hit its sndbuf limit recently.
- */
-void tcp_cwnd_application_limited(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
-	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-		/* Limited by application or receiver window. */
-		u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
-		u32 win_used = max(tp->snd_cwnd_used, init_win);
-		if (win_used < tp->snd_cwnd) {
-			tp->snd_ssthresh = tcp_current_ssthresh(sk);
-			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
-		}
-		tp->snd_cwnd_used = 0;
-	}
-	tp->snd_cwnd_stamp = tcp_time_stamp;
-}
-
 static bool tcp_should_expand_sndbuf(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 438f3b9..77cccda 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c

@@ -336,8 +336,8 @@
 	const int code = icmp_hdr(icmp_skb)->code;
 	struct sock *sk;
 	struct sk_buff *skb;
-	struct request_sock *req;
-	__u32 seq;
+	struct request_sock *fastopen;
+	__u32 seq, snd_una;
 	__u32 remaining;
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
@@ -378,12 +378,12 @@
 
 	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
-	req = tp->fastopen_rsk;
 	seq = ntohl(th->seq);
+	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+	fastopen = tp->fastopen_rsk;
+	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 	if (sk->sk_state != TCP_LISTEN &&
-	    !between(seq, tp->snd_una, tp->snd_nxt) &&
-	    (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
-		/* For a Fast Open socket, allow seq to be snt_isn. */
+	    !between(seq, snd_una, tp->snd_nxt)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
@@ -426,11 +426,9 @@
 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 			break;
 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
-		    !icsk->icsk_backoff)
+		    !icsk->icsk_backoff || fastopen)
 			break;
 
-		/* XXX (TFO) - revisit the following logic for TFO */
-
 		if (sock_owned_by_user(sk))
 			break;
 
@@ -462,14 +460,6 @@
 		goto out;
 	}
 
-	/* XXX (TFO) - if it's a TFO socket and has been accepted, rather
-	 * than following the TCP_SYN_RECV case and closing the socket,
-	 * we ignore the ICMP error and keep trying like a fully established
-	 * socket. Is this the right thing to do?
-	 */
-	if (req && req->sk == NULL)
-		goto out;
-
 	switch (sk->sk_state) {
 		struct request_sock *req, **prev;
 	case TCP_LISTEN:
@@ -502,10 +492,13 @@
 		goto out;
 
 	case TCP_SYN_SENT:
-	case TCP_SYN_RECV:  /* Cannot happen.
-			       It can f.e. if SYNs crossed,
-			       or Fast Open.
-			     */
+	case TCP_SYN_RECV:
+		/* Only in fast or simultaneous open. If a fast open socket is
+		 * is already accepted it is treated as a connected one below.
+		 */
+		if (fastopen && fastopen->sk == NULL)
+			break;
+
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
 
@@ -822,7 +815,8 @@
  */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
-			      u16 queue_mapping)
+			      u16 queue_mapping,
+			      struct tcp_fastopen_cookie *foc)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct flowi4 fl4;
@@ -833,7 +827,7 @@
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, NULL);
+	skb = tcp_make_synack(sk, dst, req, foc);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,7 +846,7 @@
 
 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-	int res = tcp_v4_send_synack(sk, NULL, req, 0);
+	int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
 
 	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1260,187 +1254,6 @@
 };
 #endif
 
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-			       struct request_sock *req,
-			       struct tcp_fastopen_cookie *foc,
-			       struct tcp_fastopen_cookie *valid_foc)
-{
-	bool skip_cookie = false;
-	struct fastopen_queue *fastopenq;
-
-	if (likely(!fastopen_cookie_present(foc))) {
-		/* See include/net/tcp.h for the meaning of these knobs */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-			skip_cookie = true; /* no cookie to validate */
-		else
-			return false;
-	}
-	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-	/* A FO option is present; bump the counter. */
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-
-	/* Make sure the listener has enabled fastopen, and we don't
-	 * exceed the max # of pending TFO requests allowed before trying
-	 * to validating the cookie in order to avoid burning CPU cycles
-	 * unnecessarily.
-	 *
-	 * XXX (TFO) - The implication of checking the max_qlen before
-	 * processing a cookie request is that clients can't differentiate
-	 * between qlen overflow causing Fast Open to be disabled
-	 * temporarily vs a server not supporting Fast Open at all.
-	 */
-	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-	    fastopenq == NULL || fastopenq->max_qlen == 0)
-		return false;
-
-	if (fastopenq->qlen >= fastopenq->max_qlen) {
-		struct request_sock *req1;
-		spin_lock(&fastopenq->lock);
-		req1 = fastopenq->rskq_rst_head;
-		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
-			spin_unlock(&fastopenq->lock);
-			NET_INC_STATS_BH(sock_net(sk),
-			    LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
-			/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
-			foc->len = -1;
-			return false;
-		}
-		fastopenq->rskq_rst_head = req1->dl_next;
-		fastopenq->qlen--;
-		spin_unlock(&fastopenq->lock);
-		reqsk_free(req1);
-	}
-	if (skip_cookie) {
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
-	}
-
-	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-						ip_hdr(skb)->daddr, valid_foc);
-			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-			    memcmp(&foc->val[0], &valid_foc->val[0],
-			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
-				return false;
-			valid_foc->len = -1;
-		}
-		/* Acknowledge the data received from the peer. */
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
-	} else if (foc->len == 0) { /* Client requesting a cookie */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-		NET_INC_STATS_BH(sock_net(sk),
-		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-	} else {
-		/* Client sent a cookie with wrong size. Treat it
-		 * the same as invalid and return a valid one.
-		 */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-	}
-	return false;
-}
-
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
-				    struct sk_buff *skb,
-				    struct sk_buff *skb_synack,
-				    struct request_sock *req)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
-	const struct inet_request_sock *ireq = inet_rsk(req);
-	struct sock *child;
-	int err;
-
-	req->num_retrans = 0;
-	req->num_timeout = 0;
-	req->sk = NULL;
-
-	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-	if (child == NULL) {
-		NET_INC_STATS_BH(sock_net(sk),
-				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-		kfree_skb(skb_synack);
-		return -1;
-	}
-	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-				    ireq->ir_rmt_addr, ireq->opt);
-	err = net_xmit_eval(err);
-	if (!err)
-		tcp_rsk(req)->snt_synack = tcp_time_stamp;
-	/* XXX (TFO) - is it ok to ignore error and continue? */
-
-	spin_lock(&queue->fastopenq->lock);
-	queue->fastopenq->qlen++;
-	spin_unlock(&queue->fastopenq->lock);
-
-	/* Initialize the child socket. Have to fix some values to take
-	 * into account the child is a Fast Open socket and is created
-	 * only out of the bits carried in the SYN packet.
-	 */
-	tp = tcp_sk(child);
-
-	tp->fastopen_rsk = req;
-	/* Do a hold on the listner sk so that if the listener is being
-	 * closed, the child that has been accepted can live on and still
-	 * access listen_lock.
-	 */
-	sock_hold(sk);
-	tcp_rsk(req)->listener = sk;
-
-	/* RFC1323: The window in SYN & SYN/ACK segments is never
-	 * scaled. So correct it appropriately.
-	 */
-	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-
-	/* Activate the retrans timer so that SYNACK can be retransmitted.
-	 * The request socket is not added to the SYN table of the parent
-	 * because it's been added to the accept queue directly.
-	 */
-	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
-	    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-
-	/* Add the child socket directly into the accept queue */
-	inet_csk_reqsk_queue_add(sk, req, child);
-
-	/* Now finish processing the fastopen child socket. */
-	inet_csk(child)->icsk_af_ops->rebuild_header(child);
-	tcp_init_congestion_control(child);
-	tcp_mtup_init(child);
-	tcp_init_metrics(child);
-	tcp_init_buffer_space(child);
-
-	/* Queue the data carried in the SYN packet. We need to first
-	 * bump skb's refcnt because the caller will attempt to free it.
-	 *
-	 * XXX (TFO) - we honor a zero-payload TFO request for now.
-	 * (Any reason not to?)
-	 */
-	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
-		/* Don't queue the skb if there is no payload in SYN.
-		 * XXX (TFO) - How about SYN+FIN?
-		 */
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-	} else {
-		skb = skb_get(skb);
-		skb_dst_drop(skb);
-		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
-		skb_set_owner_r(skb, child);
-		__skb_queue_tail(&child->sk_receive_queue, skb);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		tp->syn_data_acked = 1;
-	}
-	sk->sk_data_ready(sk);
-	bh_unlock_sock(child);
-	sock_put(child);
-	WARN_ON(req->sk == NULL);
-	return 0;
-}
-
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tmp_opt;
@@ -1451,12 +1264,10 @@
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
-	bool want_cookie = false;
+	bool want_cookie = false, fastopen;
 	struct flowi4 fl4;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
-	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
-	struct sk_buff *skb_synack;
-	int do_fastopen;
+	int err;
 
 	/* Never answer to SYNs send to broadcast or multicast */
 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1507,6 +1318,7 @@
 	ireq->ir_rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
@@ -1555,52 +1367,24 @@
 
 		isn = tcp_v4_init_sequence(skb);
 	}
-	tcp_rsk(req)->snt_isn = isn;
-
-	if (dst == NULL) {
-		dst = inet_csk_route_req(sk, &fl4, req);
-		if (dst == NULL)
-			goto drop_and_free;
-	}
-	do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-
-	/* We don't call tcp_v4_send_synack() directly because we need
-	 * to make sure a child socket can be created successfully before
-	 * sending back synack!
-	 *
-	 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
-	 * (or better yet, call tcp_send_synack() in the child context
-	 * directly, but will have to fix bunch of other code first)
-	 * after syn_recv_sock() except one will need to first fix the
-	 * latter to remove its dependency on the current implementation
-	 * of tcp_v4_send_synack()->tcp_select_initial_window().
-	 */
-	skb_synack = tcp_make_synack(sk, dst, req,
-	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-
-	if (skb_synack) {
-		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
-		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
-	} else
+	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		goto drop_and_free;
 
-	if (likely(!do_fastopen)) {
-		int err;
-		err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-		     ireq->ir_rmt_addr, ireq->opt);
-		err = net_xmit_eval(err);
+	tcp_rsk(req)->snt_isn = isn;
+	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_openreq_init_rwin(req, sk, dst);
+	fastopen = !want_cookie &&
+		   tcp_try_fastopen(sk, skb, req, &foc, dst);
+	err = tcp_v4_send_synack(sk, dst, req,
+				 skb_get_queue_mapping(skb), &foc);
+	if (!fastopen) {
 		if (err || want_cookie)
 			goto drop_and_free;
 
 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
 		tcp_rsk(req)->listener = NULL;
-		/* Add the request_sock to the SYN table */
 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-		if (fastopen_cookie_present(&foc) && foc.len != 0)
-			NET_INC_STATS_BH(sock_net(sk),
-			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
-		goto drop_and_free;
+	}
 
 	return 0;
 
@@ -1744,28 +1528,6 @@
 	return sk;
 }
 
-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v4_check(skb->len, iph->saddr,
-				  iph->daddr, skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return 0;
-		}
-	}
-
-	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-				       skb->len, IPPROTO_TCP, 0);
-
-	if (skb->len <= 76) {
-		return __skb_checksum_complete(skb);
-	}
-	return 0;
-}
-
-
 /* The socket must have it's spinlock held when we get
  * here.
  *
@@ -1960,7 +1722,8 @@
 	 * Packet length and doff are validated by header prediction,
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
-	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
+
+	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
 		goto csum_error;
 
 	th = tcp_hdr(skb);

diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c9aecae..1e70fa8 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c

@@ -115,13 +115,12 @@
  * Will only call newReno CA when away from inference.
  * From TCP-LP's paper, this will be handled in additive increasement.
  */
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct lp *lp = inet_csk_ca(sk);
 
 	if (!(lp->flag & LP_WITHIN_INF))
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 }
 
 /**

diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d4f015a..f7a2ec3 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c

@@ -102,17 +102,19 @@
 	return 0;
 }
 
-static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	unsigned long long val;
 	int ret = 0;
 
-	switch (cft->private) {
+	buf = strstrip(buf);
+
+	switch (of_cft(of)->private) {
 	case RES_LIMIT:
 		/* see memcontrol.c */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = tcp_update_limit(memcg, val);
@@ -121,7 +123,7 @@
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
@@ -168,17 +170,18 @@
 	return val;
 }
 
-static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg;
 	struct cg_proto *cg_proto;
 
-	memcg = mem_cgroup_from_css(css);
+	memcg = mem_cgroup_from_css(of_css(of));
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
-		return 0;
+		return nbytes;
 
-	switch (event) {
+	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
 		res_counter_reset_max(&cg_proto->memory_allocated);
 		break;
@@ -187,13 +190,13 @@
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static struct cftype tcp_files[] = {
 	{
 		.name = "kmem.tcp.limit_in_bytes",
-		.write_string = tcp_cgroup_write,
+		.write = tcp_cgroup_write,
 		.read_u64 = tcp_cgroup_read,
 		.private = RES_LIMIT,
 	},
@@ -205,13 +208,13 @@
 	{
 		.name = "kmem.tcp.failcnt",
 		.private = RES_FAILCNT,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{
 		.name = "kmem.tcp.max_usage_in_bytes",
 		.private = RES_MAX_USAGE,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{ }	/* terminate */

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index dcaf72f..4fe0418 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c

@@ -1159,10 +1159,7 @@
 			tm = next;
 		}
 	}
-	if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
-		vfree(net->ipv4.tcp_metrics_hash);
-	else
-		kfree(net->ipv4.tcp_metrics_hash);
+	kvfree(net->ipv4.tcp_metrics_hash);
 }
 
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 05c1b15..e68e0d4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c

@@ -362,6 +362,37 @@
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
+void tcp_openreq_init_rwin(struct request_sock *req,
+			   struct sock *sk, struct dst_entry *dst)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u8 rcv_wscale;
+	int mss = dst_metric_advmss(dst);
+
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+		mss = tp->rx_opt.user_mss;
+
+	/* Set this up on the first call only */
+	req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+
+	/* limit the window selection if the user enforce a smaller rx buffer */
+	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+	    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
+		req->window_clamp = tcp_full_space(sk);
+
+	/* tcp_full_space because it is guaranteed to be the first packet */
+	tcp_select_initial_window(tcp_full_space(sk),
+		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+		&req->rcv_wnd,
+		&req->window_clamp,
+		ireq->wscale_ok,
+		&rcv_wscale,
+		dst_metric(dst, RTAX_INITRWND));
+	ireq->rcv_wscale = rcv_wscale;
+}
+EXPORT_SYMBOL(tcp_openreq_init_rwin);
+
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
 					 struct request_sock *req)
 {

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b92b817..4e86c59 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c

@@ -57,10 +57,12 @@
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
+			       SKB_GSO_GRE_CSUM |
 			       SKB_GSO_IPIP |
 			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
+			       SKB_GSO_UDP_TUNNEL_CSUM |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
@@ -97,9 +99,7 @@
 		th->check = newcheck;
 
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			th->check =
-			     csum_fold(csum_partial(skb_transport_header(skb),
-						    thlen, skb->csum));
+			th->check = gso_make_checksum(skb, ~th->check);
 
 		seq += mss;
 		if (copy_destructor) {
@@ -133,8 +133,7 @@
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		th->check = csum_fold(csum_partial(skb_transport_header(skb),
-						   thlen, skb->csum));
+		th->check = gso_make_checksum(skb, ~th->check);
 out:
 	return segs;
 }

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2d340bd..d92bce0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c

@@ -627,7 +627,7 @@
 		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
-	if (foc != NULL) {
+	if (foc != NULL && foc->len >= 0) {
 		u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
 		need = (need + 3) & ~3U;  /* Align to 32 bits */
 		if (remaining >= need) {
@@ -878,15 +878,8 @@
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 
 	if (clone_it) {
-		const struct sk_buff *fclone = skb + 1;
-
 		skb_mstamp_get(&skb->skb_mstamp);
 
-		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-			     fclone->fclone == SKB_FCLONE_CLONE))
-			NET_INC_STATS(sock_net(sk),
-				      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
-
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
 		else
@@ -1081,7 +1074,7 @@
  * Remember, these are still headerless SKBs at this point.
  */
 int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
-		 unsigned int mss_now)
+		 unsigned int mss_now, gfp_t gfp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@@ -1096,11 +1089,11 @@
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_unclone(skb, GFP_ATOMIC))
+	if (skb_unclone(skb, gfp))
 		return -ENOMEM;
 
 	/* Get a new skb... force flag on. */
-	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, nsize, gfp);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
 
@@ -1387,12 +1380,43 @@
 	return mss_now;
 }
 
-/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk)
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+static void tcp_cwnd_application_limited(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->packets_out >= tp->snd_cwnd) {
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		/* Limited by application or receiver window. */
+		u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+		u32 win_used = max(tp->snd_cwnd_used, init_win);
+		if (win_used < tp->snd_cwnd) {
+			tp->snd_ssthresh = tcp_current_ssthresh(sk);
+			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+		}
+		tp->snd_cwnd_used = 0;
+	}
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Track the maximum number of outstanding packets in each
+	 * window, and remember whether we were cwnd-limited then.
+	 */
+	if (!before(tp->snd_una, tp->max_packets_seq) ||
+	    tp->packets_out > tp->max_packets_out) {
+		tp->max_packets_out = tp->packets_out;
+		tp->max_packets_seq = tp->snd_nxt;
+		tp->is_cwnd_limited = is_cwnd_limited;
+	}
+
+	if (tcp_is_cwnd_limited(sk)) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1601,7 +1625,7 @@
 
 	/* All of a TSO frame must be composed of paged data.  */
 	if (skb->len != skb->data_len)
-		return tcp_fragment(sk, skb, len, mss_now);
+		return tcp_fragment(sk, skb, len, mss_now, gfp);
 
 	buff = sk_stream_alloc_skb(sk, 0, gfp);
 	if (unlikely(buff == NULL))
@@ -1644,7 +1668,8 @@
  *
  * This algorithm is from John Heffner.
  */
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+				 bool *is_cwnd_limited)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1708,6 +1733,9 @@
 	if (!tp->tso_deferred)
 		tp->tso_deferred = 1 | (jiffies << 1);
 
+	if (cong_win < send_win && cong_win < skb->len)
+		*is_cwnd_limited = true;
+
 	return true;
 
 send_now:
@@ -1868,6 +1896,7 @@
 	unsigned int tso_segs, sent_pkts;
 	int cwnd_quota;
 	int result;
+	bool is_cwnd_limited = false;
 
 	sent_pkts = 0;
 
@@ -1892,6 +1921,7 @@
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
+			is_cwnd_limited = true;
 			if (push_one == 2)
 				/* Force out a loss probe pkt. */
 				cwnd_quota = 1;
@@ -1908,7 +1938,8 @@
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (!push_one && tcp_tso_should_defer(sk, skb))
+			if (!push_one &&
+			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
 				break;
 		}
 
@@ -1973,7 +2004,7 @@
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
-		tcp_cwnd_validate(sk);
+		tcp_cwnd_validate(sk, is_cwnd_limited);
 		return false;
 	}
 	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -2037,6 +2068,25 @@
 	return true;
 }
 
+/* Thanks to skb fast clones, we can detect if a prior transmit of
+ * a packet is still in a qdisc or driver queue.
+ * In this case, there is very little point doing a retransmit !
+ * Note: This is called from BH context only.
+ */
+static bool skb_still_in_host_queue(const struct sock *sk,
+				    const struct sk_buff *skb)
+{
+	const struct sk_buff *fclone = skb + 1;
+
+	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+		     fclone->fclone == SKB_FCLONE_CLONE)) {
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+		return true;
+	}
+	return false;
+}
+
 /* When probe timeout (PTO) fires, send a new segment if one exists, else
  * retransmit the last segment.
  */
@@ -2062,12 +2112,16 @@
 	if (WARN_ON(!skb))
 		goto rearm_timer;
 
+	if (skb_still_in_host_queue(sk, skb))
+		goto rearm_timer;
+
 	pcount = tcp_skb_pcount(skb);
 	if (WARN_ON(!pcount))
 		goto rearm_timer;
 
 	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+					  GFP_ATOMIC)))
 			goto rearm_timer;
 		skb = tcp_write_queue_tail(sk);
 	}
@@ -2075,9 +2129,7 @@
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
 
-	/* Probe with zero data doesn't trigger fast recovery. */
-	if (skb->len > 0)
-		err = __tcp_retransmit_skb(sk, skb);
+	err = __tcp_retransmit_skb(sk, skb);
 
 	/* Record snd_nxt for loss detection. */
 	if (likely(!err))
@@ -2383,6 +2435,9 @@
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
 		return -EAGAIN;
 
+	if (skb_still_in_host_queue(sk, skb))
+		return -EBUSY;
+
 	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
 		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
 			BUG();
@@ -2405,7 +2460,7 @@
 		return -EAGAIN;
 
 	if (skb->len > cur_mss) {
-		if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+		if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
 		int oldpcount = tcp_skb_pcount(skb);
@@ -2476,7 +2531,7 @@
 		 * see tcp_input.c tcp_sacktag_write_queue().
 		 */
 		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
-	} else {
+	} else if (err != -EBUSY) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 	}
 	return err;
@@ -2754,27 +2809,6 @@
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
 		mss = tp->rx_opt.user_mss;
 
-	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-		__u8 rcv_wscale;
-		/* Set this up on the first call only */
-		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-
-		/* limit the window selection if the user enforce a smaller rx buffer */
-		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-		    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-			req->window_clamp = tcp_full_space(sk);
-
-		/* tcp_full_space because it is guaranteed to be the first packet */
-		tcp_select_initial_window(tcp_full_space(sk),
-			mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-			&req->rcv_wnd,
-			&req->window_clamp,
-			ireq->wscale_ok,
-			&rcv_wscale,
-			dst_metric(dst, RTAX_INITRWND));
-		ireq->rcv_wscale = rcv_wscale;
-	}
-
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
@@ -3207,7 +3241,7 @@
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-			if (tcp_fragment(sk, skb, seg_size, mss))
+			if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);

diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 0ac5083..8250949 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c

@@ -15,12 +15,11 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				    u32 in_flight)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 48539ff..9a5e05f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c

@@ -163,14 +163,13 @@
 	return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
 }
 
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				 u32 in_flight)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct vegas *vegas = inet_csk_ca(sk);
 
 	if (!vegas->doing_vegas_now) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 
@@ -195,7 +194,7 @@
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+			tcp_reno_cong_avoid(sk, ack, acked);
 		} else {
 			u32 rtt, diff;
 			u64 target_cwnd;

diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 1b8e28f..27b9825 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c

@@ -114,19 +114,18 @@
 		tcp_veno_init(sk);
 }
 
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				u32 in_flight)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
 	if (!veno->doing_veno_now) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 
 	/* limited by applications */
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@
 		/* We don't have enough rtt samples to do the Veno
 		 * calculation, so we'll behave like Reno.
 		 */
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 	} else {
 		u64 target_cwnd;
 		u32 rtt;

diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 5ede0e7..599b79b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c

@@ -69,13 +69,12 @@
 	tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
 
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				u32 in_flight)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4468e1a..185ed3e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -246,7 +246,7 @@
 			do {
 				if (low <= snum && snum <= high &&
 				    !test_bit(snum >> udptable->log, bitmap) &&
-				    !inet_is_reserved_local_port(snum))
+				    !inet_is_local_reserved_port(net, snum))
 					goto found;
 				snum += rand;
 			} while (snum != first);
@@ -727,13 +727,12 @@
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
 {
 	struct udphdr *uh = udp_hdr(skb);
-	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
 	int offset = skb_transport_offset(skb);
 	int len = skb->len - offset;
 	int hlen = len;
 	__wsum csum = 0;
 
-	if (!frags) {
+	if (!skb_has_frag_list(skb)) {
 		/*
 		 * Only one fragment on the socket.
 		 */
@@ -742,15 +741,17 @@
 		uh->check = ~csum_tcpudp_magic(src, dst, len,
 					       IPPROTO_UDP, 0);
 	} else {
+		struct sk_buff *frags;
+
 		/*
 		 * HW-checksum won't work as there are two or more
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together
 		 */
-		do {
+		skb_walk_frags(skb, frags) {
 			csum = csum_add(csum, frags->csum);
 			hlen -= frags->len;
-		} while ((frags = frags->next));
+		}
 
 		csum = skb_checksum(skb, offset, hlen, csum);
 		skb->ip_summed = CHECKSUM_NONE;
@@ -762,6 +763,43 @@
 }
 EXPORT_SYMBOL_GPL(udp4_hwcsum);
 
+/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+		  __be32 saddr, __be32 daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v4_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+EXPORT_SYMBOL(udp_set_csum);
+
 static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 {
 	struct sock *sk = skb->sk;
@@ -785,7 +823,7 @@
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
+	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
@@ -1495,6 +1533,10 @@
 		if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
 			int ret;
 
+			/* Verify checksum before giving to encap */
+			if (udp_lib_checksum_complete(skb))
+				goto csum_error;
+
 			ret = encap_rcv(sk, skb);
 			if (ret <= 0) {
 				UDP_INC_STATS_BH(sock_net(sk),
@@ -1672,7 +1714,6 @@
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 				 int proto)
 {
-	const struct iphdr *iph;
 	int err;
 
 	UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1684,22 +1725,8 @@
 			return err;
 	}
 
-	iph = ip_hdr(skb);
-	if (uh->check == 0) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-				      proto, skb->csum))
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-					       skb->len, proto, 0);
-	/* Probably, we should checksum udp header (it should be in cache
-	 * in any case) and data in tiny packets (< rx copybreak).
-	 */
-
-	return 0;
+	return skb_checksum_init_zero_check(skb, proto, uh->check,
+					    inet_compute_pseudo);
 }
 
 /*
@@ -1886,7 +1913,7 @@
 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
 	unsigned int slot2 = hash2 & udp_table.mask;
 	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
-	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr)
+	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
 	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
 
 	rcu_read_lock();
@@ -1979,7 +2006,7 @@
 		       int (*push_pending_frames)(struct sock *))
 {
 	struct udp_sock *up = udp_sk(sk);
-	int val;
+	int val, valbool;
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
 
@@ -1989,6 +2016,8 @@
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
+	valbool = val ? 1 : 0;
+
 	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
@@ -2018,6 +2047,14 @@
 		}
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		up->no_check6_tx = valbool;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		up->no_check6_rx = valbool;
+		break;
+
 	/*
 	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
 	 */
@@ -2100,6 +2137,14 @@
 		val = up->encap_type;
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		val = up->no_check6_tx;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		val = up->no_check6_rx;
+		break;
+
 	/* The following two cannot be changed on UDP sockets, the return is
 	 * always 0 (which corresponds to the full checksum coverage of UDP). */
 	case UDPLITE_SEND_CSCOV:
@@ -2484,7 +2529,11 @@
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
 	__be16 protocol = skb->protocol;
 	netdev_features_t enc_features;
-	int outer_hlen;
+	int udp_offset, outer_hlen;
+	unsigned int oldlen;
+	bool need_csum;
+
+	oldlen = (u16)~skb->len;
 
 	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
 		goto out;
@@ -2496,6 +2545,10 @@
 	skb->mac_len = skb_inner_network_offset(skb);
 	skb->protocol = htons(ETH_P_TEB);
 
+	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+	if (need_csum)
+		skb->encap_hdr_csum = 1;
+
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
 	segs = skb_mac_gso_segment(skb, enc_features);
@@ -2506,10 +2559,11 @@
 	}
 
 	outer_hlen = skb_tnl_header_len(skb);
+	udp_offset = outer_hlen - tnl_hlen;
 	skb = segs;
 	do {
 		struct udphdr *uh;
-		int udp_offset = outer_hlen - tnl_hlen;
+		int len;
 
 		skb_reset_inner_headers(skb);
 		skb->encapsulation = 1;
@@ -2520,31 +2574,20 @@
 		skb_reset_mac_header(skb);
 		skb_set_network_header(skb, mac_len);
 		skb_set_transport_header(skb, udp_offset);
+		len = skb->len - udp_offset;
 		uh = udp_hdr(skb);
-		uh->len = htons(skb->len - udp_offset);
+		uh->len = htons(len);
 
-		/* csum segment if tunnel sets skb with csum. */
-		if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
-			struct iphdr *iph = ip_hdr(skb);
+		if (need_csum) {
+			__be32 delta = htonl(oldlen + len);
 
-			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						       skb->len - udp_offset,
-						       IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset,
-							   skb->len - udp_offset, 0));
+			uh->check = ~csum_fold((__force __wsum)
+					       ((__force u32)uh->check +
+						(__force u32)delta));
+			uh->check = gso_make_checksum(skb, ~uh->check);
+
 			if (uh->check == 0)
 				uh->check = CSUM_MANGLED_0;
-
-		} else if (protocol == htons(ETH_P_IPV6)) {
-			struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-			u32 len = skb->len - udp_offset;
-
-			uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-						     len, IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
-			if (uh->check == 0)
-				uh->check = CSUM_MANGLED_0;
-			skb->ip_summed = CHECKSUM_NONE;
 		}
 
 		skb->protocol = protocol;

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023..546d2d4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c

@@ -56,7 +56,8 @@
 	__wsum csum;
 
 	if (skb->encapsulation &&
-	    skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+	    (skb_shinfo(skb)->gso_type &
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
 		segs = skb_udp_tunnel_segment(skb, features);
 		goto out;
 	}
@@ -71,8 +72,10 @@
 
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_IPIP |
-				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
+				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
@@ -197,6 +200,7 @@
 	}
 
 	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 	pp = uo_priv->offload->callbacks.gro_receive(head, skb);
 
 out_unlock:

diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2c46acd..3b3efbd 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c

@@ -70,7 +70,6 @@
 	.protocol	=  IPPROTO_UDPLITE,
 	.prot		=  &udplite_prot,
 	.ops		=  &inet_dgram_ops,
-	.no_check	=  0,		/* must checksum (RFC 3828) */
 	.flags		=  INET_PROTOSW_PERMANENT,
 };
 

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 05f2b48..91771a7 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c

@@ -58,12 +58,12 @@
 
 	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-	ip_select_ident(skb, dst->child, NULL);
 
 	top_iph->ttl = ip4_dst_hoplimit(dst->child);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
+	ip_select_ident(skb, NULL);
 
 	return 0;
 }

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 186a8ec..d5f6bd9 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c

@@ -25,7 +25,7 @@
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
-	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
 		goto out;
 
 	mtu = dst_mtu(skb_dst(skb));

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6c7fa08..5667b30 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -275,19 +275,14 @@
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+	if (!idev->stats.ipv6)
 		goto err_ip;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *addrconf_stats;
-		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
+		addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
 		u64_stats_init(&addrconf_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
-		u64_stats_init(&addrconf_stats->syncp);
-#endif
 	}
 
 
@@ -305,7 +300,7 @@
 err_icmpmsg:
 	kfree(idev->stats.icmpv6dev);
 err_icmp:
-	snmp_mib_free((void __percpu **)idev->stats.ipv6);
+	free_percpu(idev->stats.ipv6);
 err_ip:
 	return -ENOMEM;
 }
@@ -2504,8 +2499,8 @@
 	return PTR_ERR(ifp);
 }
 
-static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
-			  unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+			  const struct in6_addr *pfx, unsigned int plen)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
@@ -2528,7 +2523,12 @@
 			in6_ifa_hold(ifp);
 			read_unlock_bh(&idev->lock);
 
+			if (!(ifp->flags & IFA_F_TEMPORARY) &&
+			    (ifa_flags & IFA_F_MANAGETEMPADDR))
+				manage_tempaddrs(idev, ifp, 0, 0, false,
+						 jiffies);
 			ipv6_del_addr(ifp);
+			addrconf_verify_rtnl();
 			return 0;
 		}
 	}
@@ -2568,7 +2568,7 @@
 		return -EFAULT;
 
 	rtnl_lock();
-	err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+	err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
 			     ireq.ifr6_prefixlen);
 	rtnl_unlock();
 	return err;
@@ -2813,18 +2813,6 @@
 }
 #endif
 
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
-	struct in6_addr lladdr;
-
-	if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
-		addrconf_add_linklocal(idev, &lladdr);
-		return 0;
-	}
-	return -1;
-}
-
 static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			   void *ptr)
 {
@@ -3743,6 +3731,7 @@
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx, *peer_pfx;
+	u32 ifa_flags;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3754,7 +3743,13 @@
 	if (pfx == NULL)
 		return -EINVAL;
 
-	return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+	/* We ignore other flags so far. */
+	ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+	return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+			      ifm->ifa_prefixlen);
 }
 
 static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
@@ -4363,7 +4358,7 @@
 	memset(&stats[items], 0, pad);
 }
 
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
 				      int items, int bytes, size_t syncpoff)
 {
 	int i;
@@ -4383,7 +4378,7 @@
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+		__snmp6_fill_stats64(stats, idev->stats.ipv6,
 				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:

diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbc..e696045 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c

@@ -123,7 +123,7 @@
 {
 	kfree(idev->stats.icmpv6msgdev);
 	kfree(idev->stats.icmpv6dev);
-	snmp_mib_free((void __percpu **)idev->stats.ipv6);
+	free_percpu(idev->stats.ipv6);
 }
 
 /* Nobody refers to this device, we may destroy it. */

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d935889..7cb4392 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c

@@ -106,7 +106,6 @@
 	struct inet_protosw *answer;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
-	char answer_no_check;
 	int try_loading_module = 0;
 	int err;
 
@@ -162,7 +161,6 @@
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
@@ -176,7 +174,6 @@
 	sock_init_data(sock, sk);
 
 	err = 0;
-	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
@@ -715,33 +712,25 @@
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udp_stats_in6)
 		return -ENOMEM;
-	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udplite_stats_in6)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+	if (!net->mib.ipv6_statistics)
 		goto err_ip_mib;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *af_inet6_stats;
-		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
+		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
 		u64_stats_init(&af_inet6_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
-		u64_stats_init(&af_inet6_stats->syncp);
-#endif
 	}
 
 
-	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib),
-			  __alignof__(struct icmpv6_mib)) < 0)
+	net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+	if (!net->mib.icmpv6_statistics)
 		goto err_icmp_mib;
 	net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
 						GFP_KERNEL);
@@ -750,22 +739,22 @@
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+	free_percpu(net->mib.icmpv6_statistics);
 err_icmp_mib:
-	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+	free_percpu(net->mib.ipv6_statistics);
 err_ip_mib:
-	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+	free_percpu(net->mib.udplite_stats_in6);
 err_udplite_mib:
-	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+	free_percpu(net->mib.udp_stats_in6);
 	return -ENOMEM;
 }
 
 static void ipv6_cleanup_mibs(struct net *net)
 {
-	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
-	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
-	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
-	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+	free_percpu(net->mib.udp_stats_in6);
+	free_percpu(net->mib.udplite_stats_in6);
+	free_percpu(net->mib.ipv6_statistics);
+	free_percpu(net->mib.icmpv6_statistics);
 	kfree(net->mib.icmpv6msg_statistics);
 }
 

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7b32652..f6c84a6 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c

@@ -400,6 +400,7 @@
 	int len;
 	int hlimit;
 	int err = 0;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	if ((u8 *)hdr < skb->head ||
 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@
 	fl6.daddr = hdr->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
+	fl6.flowi6_mark = mark;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -493,12 +496,7 @@
 	if (IS_ERR(dst))
 		goto out;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	msg.skb = skb;
 	msg.offset = skb_network_offset(skb);
@@ -556,6 +554,7 @@
 	int err = 0;
 	int hlimit;
 	u8 tclass;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -574,11 +573,13 @@
 		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -593,12 +594,7 @@
 	if (IS_ERR(dst))
 		goto out;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	idev = __in6_dev_get(skb->dev);
 
@@ -702,22 +698,11 @@
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
 
-	/* Perform checksum. */
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-				     skb->csum))
-			break;
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
-					     IPPROTO_ICMPV6, 0));
-		if (__skb_checksum_complete(skb)) {
-			LIMIT_NETDEBUG(KERN_DEBUG
-				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
-				       saddr, daddr);
-			goto csum_error;
-		}
+	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+		LIMIT_NETDEBUG(KERN_DEBUG
+			       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+			       saddr, daddr);
+		goto csum_error;
 	}
 
 	if (!pskb_pull(skb, sizeof(*hdr)))

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d4ade34..a245e5d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c

@@ -81,7 +81,7 @@
 	final_p = fl6_update_dst(fl6, np->opt, &final);
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
-	fl6->flowi6_mark = sk->sk_mark;
+	fl6->flowi6_mark = ireq->ir_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
 	fl6->fl6_sport = htons(ireq->ir_num);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));

diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index ee7a97f..9a4d732 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c

@@ -75,25 +75,50 @@
 			return err;
 	}
 
-	if (uh->check == 0) {
-		/* RFC 2460 section 8.1 says that we SHOULD log
-		   this error. Well, it is reasonable.
-		 */
-		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
-			       &ipv6_hdr(skb)->saddr, ntohs(uh->source),
-			       &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
-		return 1;
-	}
-	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
-			     skb->len, proto, skb->csum))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-							 &ipv6_hdr(skb)->daddr,
-							 skb->len, proto, 0));
-
-	return 0;
+	/* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+	 * we accept a checksum of zero here. When we find the socket
+	 * for the UDP packet we'll check if that socket allows zero checksum
+	 * for IPv6 (set by socket option).
+	 */
+	return skb_checksum_init_zero_check(skb, proto, uh->check,
+					   ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v6_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+EXPORT_SYMBOL(udp6_set_csum);

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 87891f5..cb4459b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c

@@ -71,8 +71,7 @@
 #define FWS_INIT FWS_L
 #endif
 
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
-			      struct rt6_info *rt);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
 static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
 static int fib6_walk(struct fib6_walker_t *w);
@@ -941,7 +940,7 @@
 	if (!err) {
 		fib6_start_gc(info->nl_net, rt);
 		if (!(rt->rt6i_flags & RTF_CACHE))
-			fib6_prune_clones(info->nl_net, pn, rt);
+			fib6_prune_clones(info->nl_net, pn);
 	}
 
 out:
@@ -1375,7 +1374,7 @@
 			pn = pn->parent;
 		}
 #endif
-		fib6_prune_clones(info->nl_net, pn, rt);
+		fib6_prune_clones(info->nl_net, pn);
 	}
 
 	/*
@@ -1601,10 +1600,9 @@
 	return 0;
 }
 
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
-			      struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
 {
-	fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+	fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
 }
 
 /*

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 0961b5e..4052694 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c

@@ -26,7 +26,6 @@
 #include <net/sock.h>
 
 #include <net/ipv6.h>
-#include <net/addrconf.h>
 #include <net/rawv6.h>
 #include <net/transp_v6.h>
 

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9d92146..3873181 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c

@@ -72,6 +72,7 @@
 };
 
 static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
 static int ip6gre_tunnel_init(struct net_device *dev);
 static void ip6gre_tunnel_setup(struct net_device *dev);
 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -353,10 +354,10 @@
 
 static void ip6gre_tunnel_uninit(struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
-	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
 
-	ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+	ip6gre_tunnel_unlink(ign, t);
 	dev_put(dev);
 }
 
@@ -467,17 +468,7 @@
 			goto drop;
 
 		if (flags&GRE_CSUM) {
-			switch (skb->ip_summed) {
-			case CHECKSUM_COMPLETE:
-				csum = csum_fold(skb->csum);
-				if (!csum)
-					break;
-				/* fall through */
-			case CHECKSUM_NONE:
-				skb->csum = 0;
-				csum = __skb_checksum_complete(skb);
-				skb->ip_summed = CHECKSUM_COMPLETE;
-			}
+			csum = skb_checksum_simple_validate(skb);
 			offset += 4;
 		}
 		if (flags&GRE_KEY) {
@@ -611,8 +602,8 @@
 			 int encap_limit,
 			 __u32 *pmtu)
 {
-	struct net *net = dev_net(dev);
 	struct ip6_tnl *tunnel = netdev_priv(dev);
+	struct net *net = tunnel->net;
 	struct net_device *tdev;    /* Device to other host */
 	struct ipv6hdr  *ipv6h;     /* Our new IP header */
 	unsigned int max_headroom = 0; /* The extra header space needed */
@@ -979,7 +970,7 @@
 		int strict = (ipv6_addr_type(&p->raddr) &
 			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
 
-		struct rt6_info *rt = rt6_lookup(dev_net(dev),
+		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
 						 p->link, strict);
 
@@ -1063,13 +1054,12 @@
 	int err = 0;
 	struct ip6_tnl_parm2 p;
 	struct __ip6_tnl_parm p1;
-	struct ip6_tnl *t;
-	struct net *net = dev_net(dev);
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = t->net;
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		t = NULL;
 		if (dev == ign->fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 				err = -EFAULT;
@@ -1077,9 +1067,9 @@
 			}
 			ip6gre_tnl_parm_from_user(&p1, &p);
 			t = ip6gre_tunnel_locate(net, &p1, 0);
+			if (t == NULL)
+				t = netdev_priv(dev);
 		}
-		if (t == NULL)
-			t = netdev_priv(dev);
 		memset(&p, 0, sizeof(p));
 		ip6gre_tnl_parm_to_user(&p, &t->parms);
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
@@ -1242,7 +1232,6 @@
 	dev->flags |= IFF_NOARP;
 	dev->iflink = 0;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 }
 
@@ -1297,11 +1286,17 @@
 	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
-	struct list_head *head)
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
 {
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	struct net_device *dev, *aux;
 	int prio;
 
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+		    dev->rtnl_link_ops == &ip6gre_tap_ops)
+			unregister_netdevice_queue(dev, head);
+
 	for (prio = 0; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
@@ -1310,7 +1305,12 @@
 			t = rtnl_dereference(ign->tunnels[prio][h]);
 
 			while (t != NULL) {
-				unregister_netdevice_queue(t->dev, head);
+				/* If dev is in the same netns, it has already
+				 * been added to the list by the previous loop.
+				 */
+				if (!net_eq(dev_net(t->dev), net))
+					unregister_netdevice_queue(t->dev,
+								   head);
 				t = rtnl_dereference(t->next);
 			}
 		}
@@ -1329,6 +1329,11 @@
 		goto err_alloc_dev;
 	}
 	dev_net_set(ign->fb_tunnel_dev, net);
+	/* FB netdevice is special: we have one, and only one per netns.
+	 * Allowing to move it to another netns is clearly unsafe.
+	 */
+	ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
 
 	ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
 	ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1349,12 +1354,10 @@
 
 static void __net_exit ip6gre_exit_net(struct net *net)
 {
-	struct ip6gre_net *ign;
 	LIST_HEAD(list);
 
-	ign = net_generic(net, ip6gre_net_id);
 	rtnl_lock();
-	ip6gre_destroy_tunnels(ign, &list);
+	ip6gre_destroy_tunnels(net, &list);
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
@@ -1531,15 +1534,14 @@
 static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
 			    struct nlattr *data[])
 {
-	struct ip6_tnl *t, *nt;
-	struct net *net = dev_net(dev);
+	struct ip6_tnl *t, *nt = netdev_priv(dev);
+	struct net *net = nt->net;
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	struct __ip6_tnl_parm p;
 
 	if (dev == ign->fb_tunnel_dev)
 		return -EINVAL;
 
-	nt = netdev_priv(dev);
 	ip6gre_netlink_parms(data, &p);
 
 	t = ip6gre_tunnel_locate(net, &p, 0);

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b2f0915..65eda2a 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c

@@ -97,9 +97,11 @@
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
 		       0)))

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fbf1156..cb9df0e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c

@@ -219,7 +219,7 @@
 	skb->mark = sk->sk_mark;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -347,11 +347,11 @@
 	if (skb->len <= mtu)
 		return false;
 
-	/* ipv6 conntrack defrag sets max_frag_size + local_df */
+	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 		return true;
 
-	if (skb->local_df)
+	if (skb->ignore_df)
 		return false;
 
 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -537,6 +537,18 @@
 	skb_copy_secmark(to, from);
 }
 
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+	static u32 ip6_idents_hashrnd __read_mostly;
+	u32 hash, id;
+
+	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+	id = ip_idents_reserve(hash, 1);
+	fhdr->identification = htonl(id);
+}
+
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
@@ -559,7 +571,7 @@
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.
 	 */
-	if (unlikely(!skb->local_df && skb->len > mtu) ||
+	if (unlikely(!skb->ignore_df && skb->len > mtu) ||
 		     (IP6CB(skb)->frag_max_size &&
 		      IP6CB(skb)->frag_max_size > mtu)) {
 		if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -1234,7 +1246,7 @@
 			      sizeof(struct frag_hdr) : 0) +
 			     rt->rt6i_nfheader_len;
 
-		if (ip6_sk_local_df(sk))
+		if (ip6_sk_ignore_df(sk))
 			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
 		else
 			maxnonfragsize = mtu;
@@ -1544,7 +1556,7 @@
 	}
 
 	/* Allow local fragmentation. */
-	skb->local_df = ip6_sk_local_df(sk);
+	skb->ignore_df = ip6_sk_ignore_df(sk);
 
 	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f6a66bb..afa0824 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -61,6 +61,7 @@
 MODULE_AUTHOR("Ville Nuorvala");
 MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
 MODULE_ALIAS_NETDEV("ip6tnl0");
 
 #ifdef IP6_TNL_DEBUG

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 6cc9f93..9aaa6bb 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c

@@ -795,15 +795,12 @@
  **/
 static void vti6_dev_setup(struct net_device *dev)
 {
-	struct ip6_tnl *t;
-
 	dev->netdev_ops = &vti6_netdev_ops;
 	dev->destructor = vti6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
 	dev->mtu = ETH_DATA_LEN;
-	t = netdev_priv(dev);
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
 	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;

diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 84c7f33..387d8b8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c

@@ -90,17 +90,9 @@
 	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (!nat) {
-		/* NAT module was loaded late. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 767ab8d..0d5279f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c

@@ -451,7 +451,7 @@
 	}
 	sub_frag_mem_limit(&fq->q, head->truesize);
 
-	head->local_df = 1;
+	head->ignore_df = 1;
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;

diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 9c3297a..d189fcb 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c

@@ -47,15 +47,9 @@
 	if (ct == NULL || nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (nat == NULL) {
-		/* Conntrack module was loaded late, can't add extension. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL)
-			return NF_ACCEPT;
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 56596ce..5ec867e 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c

@@ -8,32 +8,6 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
-	static atomic_t ipv6_fragmentation_id;
-	struct in6_addr addr;
-	int ident;
-
-#if IS_ENABLED(CONFIG_IPV6)
-	struct inet_peer *peer;
-	struct net *net;
-
-	net = dev_net(rt->dst.dev);
-	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
-	if (peer) {
-		fhdr->identification = htonl(inet_getid(peer, 0));
-		inet_putpeer(peer);
-		return;
-	}
-#endif
-	ident = atomic_inc_return(&ipv6_fragmentation_id);
-
-	addr = rt->rt6i_dst.addr;
-	addr.s6_addr32[0] ^= (__force __be32)ident;
-	fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
-}
-EXPORT_SYMBOL(ipv6_select_ident);
-
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
@@ -104,6 +78,7 @@
 	if (len > IPV6_MAXPLEN)
 		len = 0;
 	ipv6_hdr(skb)->payload_len = htons(len);
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 		       skb_dst(skb)->dev, dst_output);

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index bda7429..5b7a1ed 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c

@@ -51,7 +51,6 @@
 	.protocol =  IPPROTO_ICMPV6,
 	.prot =      &pingv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_REUSE,
 };
 
@@ -168,12 +167,7 @@
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 091d066..3317440 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c

@@ -186,7 +186,7 @@
 /* can be called either with percpu mib (pcpumib != NULL),
  * or shared one (smib != NULL)
  */
-static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
 				atomic_long_t *smib,
 				const struct snmp_mib *itemlist)
 {
@@ -201,7 +201,7 @@
 	}
 }
 
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
 				  const struct snmp_mib *itemlist, size_t syncpoff)
 {
 	int i;
@@ -215,14 +215,14 @@
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+	snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
 			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
+	snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
 			    NULL, snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
+	snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
 			    NULL, snmp6_udp6_list);
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
+	snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
 			    NULL, snmp6_udplite6_list);
 	return 0;
 }
@@ -245,7 +245,7 @@
 	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
 
 	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
-	snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+	snmp6_seq_show_item64(seq, idev->stats.ipv6,
 			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
 			    snmp6_icmp6_list);

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f29996..b2dc60b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c

@@ -873,14 +873,8 @@
 		err = PTR_ERR(dst);
 		goto out;
 	}
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
@@ -1328,7 +1322,6 @@
 	.protocol	= IPPROTO_IP,	/* wild card */
 	.prot		= &rawv6_prot,
 	.ops		= &inet6_sockraw_ops,
-	.no_check	= UDP_CSUM_DEFAULT,
 	.flags		= INET_PROTOSW_REUSE,
 };
 

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ebdb7b6..f23fbd2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -1176,7 +1176,7 @@
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark;
+	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
@@ -1455,7 +1455,7 @@
 		goto out;
 
 	net->ipv6.ip6_rt_gc_expire++;
-	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
 	entries = dst_entries_get_slow(ops);
 	if (entries < ops->gc_thresh)
 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e5a453c..4f40817 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -560,12 +560,12 @@
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+				 t->parms.link, 0, IPPROTO_IPV6, 0);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 			      IPPROTO_IPV6, 0);
 		err = 0;
 		goto out;
@@ -1828,4 +1828,5 @@
 module_init(sit_init);
 module_exit(sit_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
 MODULE_ALIAS_NETDEV("sit0");

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e7..a822b88 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c

@@ -216,6 +216,8 @@
 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq->ir_iif = inet6_iif(skb);
 
+	ireq->ir_mark = inet_request_mark(sk, skb);
+
 	req->expires = 0UL;
 	req->num_retrans = 0;
 	ireq->ecn_ok		= ecn_ok;
@@ -242,7 +244,7 @@
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.flowi6_mark = sk->sk_mark;
+		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a1..058f3ec 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c

@@ -38,6 +38,13 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e289830..229239a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c

@@ -340,7 +340,8 @@
 	struct sock *sk;
 	int err;
 	struct tcp_sock *tp;
-	__u32 seq;
+	struct request_sock *fastopen;
+	__u32 seq, snd_una;
 	struct net *net = dev_net(skb->dev);
 
 	sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -371,8 +372,11 @@
 
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
+	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+	fastopen = tp->fastopen_rsk;
+	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 	if (sk->sk_state != TCP_LISTEN &&
-	    !between(seq, tp->snd_una, tp->snd_nxt)) {
+	    !between(seq, snd_una, tp->snd_nxt)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
@@ -436,8 +440,13 @@
 		goto out;
 
 	case TCP_SYN_SENT:
-	case TCP_SYN_RECV:  /* Cannot happen.
-			       It can, it SYNs are crossed. --ANK */
+	case TCP_SYN_RECV:
+		/* Only in fast or simultaneous open. If a fast open socket is
+		 * is already accepted it is treated as a connected one below.
+		 */
+		if (fastopen && fastopen->sk == NULL)
+			break;
+
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
@@ -463,7 +472,8 @@
 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct flowi6 *fl6,
 			      struct request_sock *req,
-			      u16 queue_mapping)
+			      u16 queue_mapping,
+			      struct tcp_fastopen_cookie *foc)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -474,7 +484,7 @@
 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, NULL);
+	skb = tcp_make_synack(sk, dst, req, foc);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -498,7 +508,7 @@
 	struct flowi6 fl6;
 	int res;
 
-	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
+	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
 	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
@@ -802,6 +812,7 @@
 		fl6.flowi6_oif = inet6_iif(skb);
 	else
 		fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -917,7 +928,12 @@
 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
-	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
+	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+	 */
+	tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+			tcp_rsk(req)->rcv_nxt,
 			req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
@@ -969,8 +985,10 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
+	struct tcp_fastopen_cookie foc = { .len = -1 };
+	bool want_cookie = false, fastopen;
 	struct flowi6 fl6;
-	bool want_cookie = false;
+	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@@ -1001,7 +1019,7 @@
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, 0, NULL);
+	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1016,6 +1034,7 @@
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
@@ -1074,19 +1093,27 @@
 		isn = tcp_v6_init_sequence(skb);
 	}
 have_isn:
-	tcp_rsk(req)->snt_isn = isn;
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_release;
 
-	if (tcp_v6_send_synack(sk, dst, &fl6, req,
-			       skb_get_queue_mapping(skb)) ||
-	    want_cookie)
+	if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
 		goto drop_and_free;
 
+	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
-	tcp_rsk(req)->listener = NULL;
-	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+	tcp_openreq_init_rwin(req, sk, dst);
+	fastopen = !want_cookie &&
+		   tcp_try_fastopen(sk, skb, req, &foc, dst);
+	err = tcp_v6_send_synack(sk, dst, &fl6, req,
+				 skb_get_queue_mapping(skb), &foc);
+	if (!fastopen) {
+		if (err || want_cookie)
+			goto drop_and_free;
+
+		tcp_rsk(req)->listener = NULL;
+		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+	}
 	return 0;
 
 drop_and_release:
@@ -1294,25 +1321,6 @@
 	return NULL;
 }
 
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
-				  &ipv6_hdr(skb)->daddr, skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return 0;
-		}
-	}
-
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
-					      &ipv6_hdr(skb)->saddr,
-					      &ipv6_hdr(skb)->daddr, 0));
-
-	if (skb->len <= 76)
-		return __skb_checksum_complete(skb);
-	return 0;
-}
-
 /* The socket must have it's spinlock held when we get
  * here.
  *
@@ -1486,7 +1494,7 @@
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
 		goto csum_error;
 
 	th = tcp_hdr(skb);
@@ -1779,6 +1787,7 @@
 	const struct inet_sock *inet = inet_sk(sp);
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
+	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
 
 	dest  = &sp->sk_v6_daddr;
 	src   = &sp->sk_v6_rcv_saddr;
@@ -1821,7 +1830,9 @@
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd,
-		   tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+		   sp->sk_state == TCP_LISTEN ?
+			(fastopenq ? fastopenq->max_qlen : 0) :
+			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );
 }
 
@@ -1981,7 +1992,6 @@
 	.protocol	=	IPPROTO_TCP,
 	.prot		=	&tcpv6_prot,
 	.ops		=	&inet6_stream_ops,
-	.no_check	=	0,
 	.flags		=	INET_PROTOSW_PERMANENT |
 				INET_PROTOSW_ICSK,
 };

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e586d9..95c8347 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -634,6 +634,10 @@
 		if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
 			int ret;
 
+			/* Verify checksum before giving to encap */
+			if (udp_lib_checksum_complete(skb))
+				goto csum_error;
+
 			ret = encap_rcv(sk, skb);
 			if (ret <= 0) {
 				UDP_INC_STATS_BH(sock_net(sk),
@@ -701,17 +705,16 @@
 				      int dif)
 {
 	struct hlist_nulls_node *node;
-	struct sock *s = sk;
 	unsigned short num = ntohs(loc_port);
 
-	sk_nulls_for_each_from(s, node) {
-		struct inet_sock *inet = inet_sk(s);
+	sk_nulls_for_each_from(sk, node) {
+		struct inet_sock *inet = inet_sk(sk);
 
-		if (!net_eq(sock_net(s), net))
+		if (!net_eq(sock_net(sk), net))
 			continue;
 
-		if (udp_sk(s)->udp_port_hash == num &&
-		    s->sk_family == PF_INET6) {
+		if (udp_sk(sk)->udp_port_hash == num &&
+		    sk->sk_family == PF_INET6) {
 			if (inet->inet_dport) {
 				if (inet->inet_dport != rmt_port)
 					continue;
@@ -720,16 +723,16 @@
 			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
 			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
 				if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					continue;
 			}
-			if (!inet6_mc_check(s, loc_addr, rmt_addr))
+			if (!inet6_mc_check(sk, loc_addr, rmt_addr))
 				continue;
-			return s;
+			return sk;
 		}
 	}
 	return NULL;
@@ -760,6 +763,17 @@
 	if (unlikely(skb1))
 		kfree_skb(skb1);
 }
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+	/* RFC 2460 section 8.1 says that we SHOULD log
+	 * this error. Well, it is reasonable.
+	 */
+	LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+		       &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+		       &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
 /*
  * Note: called only from the BH handler context,
  * so we don't need to lock the hashes.
@@ -779,7 +793,12 @@
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	while (sk) {
-		stack[count++] = sk;
+		/* If zero checksum and no_check is not on for
+		 * the socket then skip it.
+		 */
+		if (uh->check || udp_sk(sk)->no_check6_rx)
+			stack[count++] = sk;
+
 		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
 				       uh->source, saddr, dif);
 		if (unlikely(count == ARRAY_SIZE(stack))) {
@@ -867,6 +886,12 @@
 	if (sk != NULL) {
 		int ret;
 
+		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+			sock_put(sk);
+			udp6_csum_zero_error(skb);
+			goto csum_error;
+		}
+
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
@@ -879,6 +904,11 @@
 		return 0;
 	}
 
+	if (!uh->check) {
+		udp6_csum_zero_error(skb);
+		goto csum_error;
+	}
+
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard;
 
@@ -1006,7 +1036,10 @@
 
 	if (is_udplite)
 		csum = udplite_csum_outgoing(sk, skb);
-	else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+	else if (up->no_check6_tx) {   /* UDP csum disabled */
+		skb->ip_summed = CHECKSUM_NONE;
+		goto send;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
 		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
 				     up->len);
 		goto send;
@@ -1232,14 +1265,8 @@
 		goto out;
 	}
 
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
@@ -1479,7 +1506,6 @@
 	.protocol =  IPPROTO_UDP,
 	.prot =      &udpv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_PERMANENT,
 };
 

diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8..0ae3d98 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c

@@ -63,7 +63,9 @@
 		if (unlikely(type & ~(SKB_GSO_UDP |
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_GRE |
+				      SKB_GSO_GRE_CSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_SIT |
 				      SKB_GSO_MPLS) ||
@@ -76,7 +78,8 @@
 		goto out;
 	}
 
-	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features);
 	else {
 		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot

diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be..9cf097e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c

@@ -64,7 +64,6 @@
 	.protocol	= IPPROTO_UDPLITE,
 	.prot		= &udplitev6_prot,
 	.ops		= &inet6_dgram_ops,
-	.no_check	= 0,
 	.flags		= INET_PROTOSW_PERMANENT,
 };
 

diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b930d08..433672d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c

@@ -78,7 +78,7 @@
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (!skb->local_df && skb->len > mtu) {
+	if (!skb->ignore_df && skb->len > mtu) {
 		skb->dev = dst->dev;
 
 		if (xfrm6_local_dontfrag(skb))
@@ -114,7 +114,7 @@
 	if (err)
 		return err;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -153,7 +153,7 @@
 	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
 		xfrm6_local_rxpmtu(skb, mtu);
 		return -EMSGSIZE;
-	} else if (!skb->local_df && skb->len > mtu && skb->sk) {
+	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
 		xfrm_local_error(skb, mtu);
 		return -EMSGSIZE;
 	}

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 41e4e93..91729b8 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c

@@ -1353,7 +1353,7 @@
 
 	sk_refcnt_debug_inc(sk);
 	sock_init_data(sock, sk);
-	sk->sk_no_check = 1;		/* Checksum off by default */
+	sk->sk_no_check_tx = 1;		/* Checksum off by default */
 	sock->ops = &ipx_dgram_ops;
 	rc = 0;
 out:

diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index c1f0318..67e7ad3 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c

@@ -236,7 +236,8 @@
 	}
 
 	/* Apply checksum. Not allowed on 802.3 links. */
-	if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
+	if (sk->sk_no_check_tx ||
+	    intrfc->if_dlink_type == htons(IPX_FRAME_8023))
 		ipx->ipx_checksum = htons(0xFFFF);
 	else
 		ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 8c9d730..7a95fa4 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c

@@ -682,6 +682,18 @@
 	return NULL;
 }
 
+static void __iucv_auto_name(struct iucv_sock *iucv)
+{
+	char name[12];
+
+	sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
+	while (__iucv_get_sock_by_name(name)) {
+		sprintf(name, "%08x",
+			atomic_inc_return(&iucv_sk_list.autobind_name));
+	}
+	memcpy(iucv->src_name, name, 8);
+}
+
 /* Bind an unbound socket */
 static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
 			  int addr_len)
@@ -724,8 +736,12 @@
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
 		if (!memcmp(dev->perm_addr, uid, 8)) {
-			memcpy(iucv->src_name, sa->siucv_name, 8);
 			memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
+			/* Check for unitialized siucv_name */
+			if (strncmp(sa->siucv_name, "        ", 8) == 0)
+				__iucv_auto_name(iucv);
+			else
+				memcpy(iucv->src_name, sa->siucv_name, 8);
 			sk->sk_bound_dev_if = dev->ifindex;
 			iucv->hs_dev = dev;
 			dev_hold(dev);
@@ -763,7 +779,6 @@
 static int iucv_sock_autobind(struct sock *sk)
 {
 	struct iucv_sock *iucv = iucv_sk(sk);
-	char name[12];
 	int err = 0;
 
 	if (unlikely(!pr_iucv))
@@ -772,17 +787,9 @@
 	memcpy(iucv->src_user_id, iucv_userid, 8);
 
 	write_lock_bh(&iucv_sk_list.lock);
-
-	sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
-	while (__iucv_get_sock_by_name(name)) {
-		sprintf(name, "%08x",
-			atomic_inc_return(&iucv_sk_list.autobind_name));
-	}
-
+	__iucv_auto_name(iucv);
 	write_unlock_bh(&iucv_sk_list.lock);
 
-	memcpy(&iucv->src_name, name, 8);
-
 	if (!iucv->msglimit)
 		iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
 
@@ -1936,11 +1943,10 @@
 	    sk_acceptq_is_full(sk) ||
 	    !nsk) {
 		/* error on server socket - connection refused */
-		if (nsk)
-			sk_free(nsk);
 		afiucv_swap_src_dest(skb);
 		trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
 		err = dev_queue_xmit(skb);
+		iucv_sock_kill(nsk);
 		bh_unlock_sock(sk);
 		goto out;
 	}

diff --git a/net/key/af_key.c b/net/key/af_key.c
index f3c8307..ba2a2f9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c

@@ -1476,9 +1476,7 @@
 	else
 		err = xfrm_state_update(x);
 
-	xfrm_audit_state_add(x, err ? 0 : 1,
-			     audit_get_loginuid(current),
-			     audit_get_sessionid(current), 0);
+	xfrm_audit_state_add(x, err ? 0 : 1, true);
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -1532,9 +1530,7 @@
 	c.event = XFRM_MSG_DELSA;
 	km_state_notify(x, &c);
 out:
-	xfrm_audit_state_delete(x, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 	xfrm_state_put(x);
 
 	return err;
@@ -1726,17 +1722,13 @@
 	struct net *net = sock_net(sk);
 	unsigned int proto;
 	struct km_event c;
-	struct xfrm_audit audit_info;
 	int err, err2;
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
 	if (proto == 0)
 		return -EINVAL;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = 0;
-	err = xfrm_state_flush(net, proto, &audit_info);
+	err = xfrm_state_flush(net, proto, true);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - go quietly */
@@ -2288,9 +2280,7 @@
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
 
-	xfrm_audit_policy_add(xp, err ? 0 : 1,
-			      audit_get_loginuid(current),
-			      audit_get_sessionid(current), 0);
+	xfrm_audit_policy_add(xp, err ? 0 : 1, true);
 
 	if (err)
 		goto out;
@@ -2372,9 +2362,7 @@
 	if (xp == NULL)
 		return -ENOENT;
 
-	xfrm_audit_policy_delete(xp, err ? 0 : 1,
-				 audit_get_loginuid(current),
-				 audit_get_sessionid(current), 0);
+	xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 	if (err)
 		goto out;
@@ -2553,7 +2541,7 @@
 		sel.sport_mask = htons(0xffff);
 
 	/* set destination address info of selector */
-	sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1];
 	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
 	sel.prefixlen_d = sa->sadb_address_prefixlen;
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2622,9 +2610,7 @@
 		return -ENOENT;
 
 	if (delete) {
-		xfrm_audit_policy_delete(xp, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 		if (err)
 			goto out;
@@ -2733,13 +2719,9 @@
 {
 	struct net *net = sock_net(sk);
 	struct km_event c;
-	struct xfrm_audit audit_info;
 	int err, err2;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = 0;
-	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a4e37d7..bea2590 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c

@@ -495,52 +495,6 @@
 	spin_unlock_bh(&session->reorder_q.lock);
 }
 
-static inline int l2tp_verify_udp_checksum(struct sock *sk,
-					   struct sk_buff *skb)
-{
-	struct udphdr *uh = udp_hdr(skb);
-	u16 ulen = ntohs(uh->len);
-	__wsum psum;
-
-	if (sk->sk_no_check || skb_csum_unnecessary(skb))
-		return 0;
-
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
-		if (!uh->check) {
-			LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
-			return 1;
-		}
-		if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-		    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-				     &ipv6_hdr(skb)->daddr, ulen,
-				     IPPROTO_UDP, skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return 0;
-		}
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-							 &ipv6_hdr(skb)->daddr,
-							 skb->len, IPPROTO_UDP,
-							 0));
-	} else
-#endif
-	{
-		struct inet_sock *inet;
-		if (!uh->check)
-			return 0;
-		inet = inet_sk(sk);
-		psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr,
-					  ulen, IPPROTO_UDP, 0);
-
-		if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-		    !csum_fold(csum_add(psum, skb->csum)))
-			return 0;
-		skb->csum = psum;
-	}
-
-	return __skb_checksum_complete(skb);
-}
-
 static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr)
 {
 	u32 nws;
@@ -895,8 +849,7 @@
 	u16 version;
 	int length;
 
-	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
+	/* UDP has verifed checksum */
 
 	/* UDP always verifies the packet length. */
 	__skb_pull(skb, sizeof(struct udphdr));
@@ -979,14 +932,6 @@
 
 	return 0;
 
-discard_bad_csum:
-	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
-	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
-	atomic_long_inc(&tunnel->stats.rx_errors);
-	kfree_skb(skb);
-
-	return 0;
-
 error:
 	/* Put UDP header back */
 	__skb_push(skb, sizeof(struct udphdr));
@@ -1128,7 +1073,7 @@
 	}
 
 	/* Queue the packet to IP for output */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
@@ -1150,31 +1095,6 @@
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
-static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
-				int udp_len)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct udphdr *uh = udp_hdr(skb);
-
-	if (!skb_dst(skb) || !skb_dst(skb)->dev ||
-	    !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
-		__wsum csum = skb_checksum(skb, 0, udp_len, 0);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
-					    IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
-					     udp_len, IPPROTO_UDP, 0);
-	}
-}
-#endif
-
 /* If caller requires the skb to have a ppp header, the header must be
  * inserted in the skb data before calling this function.
  */
@@ -1186,7 +1106,6 @@
 	struct flowi *fl;
 	struct udphdr *uh;
 	struct inet_sock *inet;
-	__wsum csum;
 	int headroom;
 	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	int udp_len;
@@ -1235,33 +1154,17 @@
 		uh->dest = inet->inet_dport;
 		udp_len = uhlen + hdr_len + data_len;
 		uh->len = htons(udp_len);
-		uh->check = 0;
 
 		/* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
 		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
-			l2tp_xmit_ipv6_csum(sk, skb, udp_len);
+			udp6_set_csum(udp_get_no_check6_tx(sk),
+				      skb, &inet6_sk(sk)->saddr,
+				      &sk->sk_v6_daddr, udp_len);
 		else
 #endif
-		if (sk->sk_no_check == UDP_CSUM_NOXMIT)
-			skb->ip_summed = CHECKSUM_NONE;
-		else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-			 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-			skb->ip_summed = CHECKSUM_COMPLETE;
-			csum = skb_checksum(skb, 0, udp_len, 0);
-			uh->check = csum_tcpudp_magic(inet->inet_saddr,
-						      inet->inet_daddr,
-						      udp_len, IPPROTO_UDP, csum);
-			if (uh->check == 0)
-				uh->check = CSUM_MANGLED_0;
-		} else {
-			skb->ip_summed = CHECKSUM_PARTIAL;
-			skb->csum_start = skb_transport_header(skb) - skb->head;
-			skb->csum_offset = offsetof(struct udphdr, check);
-			uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-						       inet->inet_daddr,
-						       udp_len, IPPROTO_UDP, 0);
-		}
+		udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
+			     inet->inet_daddr, udp_len);
 		break;
 
 	case L2TP_ENCAPTYPE_IP:
@@ -1490,6 +1393,11 @@
 					     sizeof(udp6_addr), 0);
 			if (err < 0)
 				goto out;
+
+			if (cfg->udp6_zero_tx_checksums)
+				udp_set_no_check6_tx(sock->sk, true);
+			if (cfg->udp6_zero_rx_checksums)
+				udp_set_no_check6_rx(sock->sk, true);
 		} else
 #endif
 		{
@@ -1518,7 +1426,7 @@
 		}
 
 		if (!cfg->use_udp_checksums)
-			sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+			sock->sk->sk_no_check_tx = 1;
 
 		break;
 

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3f93ccd..68aa9ff 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h

@@ -162,7 +162,9 @@
 #endif
 	u16			local_udp_port;
 	u16			peer_udp_port;
-	unsigned int		use_udp_checksums:1;
+	unsigned int		use_udp_checksums:1,
+				udp6_zero_tx_checksums:1,
+				udp6_zero_rx_checksums:1;
 };
 
 struct l2tp_tunnel {

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3397fe6..369a982 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c

@@ -606,7 +606,6 @@
 	.protocol	= IPPROTO_L2TP,
 	.prot		= &l2tp_ip_prot,
 	.ops		= &l2tp_ip_ops,
-	.no_check	= 0,
 };
 
 static struct net_protocol l2tp_ip_protocol __read_mostly = {

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7704ea9..f3f98a1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c

@@ -605,14 +605,8 @@
 		goto out;
 	}
 
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
@@ -761,7 +755,6 @@
 	.protocol	= IPPROTO_L2TP,
 	.prot		= &l2tp_ip6_prot,
 	.ops		= &l2tp_ip6_ops,
-	.no_check	= 0,
 };
 
 static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bd7387a..0ac907a 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c

@@ -161,6 +161,13 @@
 			cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
 		if (info->attrs[L2TP_ATTR_UDP_CSUM])
 			cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
+			cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+		if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
+			cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+#endif
 	}
 
 	if (info->attrs[L2TP_ATTR_DEBUG])
@@ -297,8 +304,7 @@
 	case L2TP_ENCAPTYPE_UDP:
 		if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
 		    nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
-		    nla_put_u8(skb, L2TP_ATTR_UDP_CSUM,
-			       (sk->sk_no_check != UDP_CSUM_NOXMIT)))
+		    nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
 			goto nla_put_failure;
 		/* NOBREAK */
 	case L2TP_ENCAPTYPE_IP:

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 9d7d840..1e46ffa 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile

@@ -25,7 +25,8 @@
 	wme.o \
 	event.o \
 	chan.o \
-	trace.o mlme.o
+	trace.o mlme.o \
+	tdls.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 7c7df47..ec24378 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c

@@ -23,12 +23,13 @@
 			       u8 *data, size_t data_len, u8 *mic)
 {
 	struct scatterlist assoc, pt, ct[2];
-	struct {
-		struct aead_request	req;
-		u8			priv[crypto_aead_reqsize(tfm)];
-	} aead_req;
 
-	memset(&aead_req, 0, sizeof(aead_req));
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *) aead_req_data;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
 
 	sg_init_one(&pt, data, data_len);
 	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -36,23 +37,23 @@
 	sg_set_buf(&ct[0], data, data_len);
 	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
 
-	aead_request_set_tfm(&aead_req.req, tfm);
-	aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
-	aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0);
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0);
 
-	crypto_aead_encrypt(&aead_req.req);
+	crypto_aead_encrypt(aead_req);
 }
 
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 			      u8 *data, size_t data_len, u8 *mic)
 {
 	struct scatterlist assoc, pt, ct[2];
-	struct {
-		struct aead_request	req;
-		u8			priv[crypto_aead_reqsize(tfm)];
-	} aead_req;
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *) aead_req_data;
 
-	memset(&aead_req, 0, sizeof(aead_req));
+	memset(aead_req, 0, sizeof(aead_req_data));
 
 	sg_init_one(&pt, data, data_len);
 	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -60,12 +61,12 @@
 	sg_set_buf(&ct[0], data, data_len);
 	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
 
-	aead_request_set_tfm(&aead_req.req, tfm);
-	aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
-	aead_request_set_crypt(&aead_req.req, ct, &pt,
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, ct, &pt,
 			       data_len + IEEE80211_CCMP_MIC_LEN, b_0);
 
-	return crypto_aead_decrypt(&aead_req.req);
+	return crypto_aead_decrypt(aead_req);
 }
 
 struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index aaa59d7..d7513a5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c

@@ -109,6 +109,15 @@
 static int ieee80211_start_p2p_device(struct wiphy *wiphy,
 				      struct wireless_dev *wdev)
 {
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	int ret;
+
+	mutex_lock(&sdata->local->chanctx_mtx);
+	ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+	mutex_unlock(&sdata->local->chanctx_mtx);
+	if (ret < 0)
+		return ret;
+
 	return ieee80211_do_open(wdev, true);
 }
 
@@ -463,8 +472,10 @@
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
+	struct rate_control_ref *ref = local->rate_ctrl;
 	struct timespec uptime;
 	u64 packets = 0;
+	u32 thr = 0;
 	int i, ac;
 
 	sinfo->generation = sdata->local->sta_generation;
@@ -578,6 +589,17 @@
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+	/* check if the driver has a SW RC implementation */
+	if (ref && ref->ops->get_expected_throughput)
+		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+	else
+		thr = drv_get_expected_throughput(local, &sta->sta);
+
+	if (thr != 0) {
+		sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+		sinfo->expected_throughput = thr;
+	}
 }
 
 static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
@@ -768,7 +790,7 @@
 }
 
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
-				 int idx, u8 *mac, struct station_info *sinfo)
+				  int idx, u8 *mac, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -798,7 +820,7 @@
 }
 
 static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_info *sinfo)
+				 const u8 *mac, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -972,13 +994,13 @@
 	sdata->needed_rx_chains = sdata->local->rx_chains;
 
 	mutex_lock(&local->mtx);
-	sdata->radar_required = params->radar_required;
 	err = ieee80211_vif_use_channel(sdata, &params->chandef,
 					IEEE80211_CHANCTX_SHARED);
+	if (!err)
+		ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
 	mutex_unlock(&local->mtx);
 	if (err)
 		return err;
-	ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
 
 	/*
 	 * Apply control port protocol, this allows us to
@@ -1075,6 +1097,31 @@
 	return 0;
 }
 
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	lockdep_assert_held(&local->mtx);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (!sdata->vif.csa_active)
+			continue;
+
+		if (!sdata->csa_block_tx)
+			continue;
+
+		rcu_read_unlock();
+		return true;
+	}
+	rcu_read_unlock();
+
+	return false;
+}
+
 static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1092,7 +1139,14 @@
 	old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata);
 
 	/* abort any running channel switch */
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
+
 	kfree(sdata->u.ap.next_beacon);
 	sdata->u.ap.next_beacon = NULL;
 
@@ -1131,8 +1185,8 @@
 	local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
 	skb_queue_purge(&sdata->u.ap.ps.bc_buf);
 
-	ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
 	mutex_lock(&local->mtx);
+	ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
 	ieee80211_vif_release_channel(sdata);
 	mutex_unlock(&local->mtx);
 
@@ -1416,7 +1470,8 @@
 }
 
 static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_parameters *params)
+				 const u8 *mac,
+				 struct station_parameters *params)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
@@ -1450,6 +1505,8 @@
 	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
 		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
 		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	} else {
+		sta->sta.tdls = true;
 	}
 
 	err = sta_apply_parameters(local, sta, params);
@@ -1483,7 +1540,7 @@
 }
 
 static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac)
+				 const u8 *mac)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -1497,7 +1554,7 @@
 }
 
 static int ieee80211_change_station(struct wiphy *wiphy,
-				    struct net_device *dev, u8 *mac,
+				    struct net_device *dev, const u8 *mac,
 				    struct station_parameters *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1566,7 +1623,7 @@
 
 		if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
 		    sta->sdata->u.vlan.sta) {
-			rcu_assign_pointer(sta->sdata->u.vlan.sta, NULL);
+			RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
 			prev_4addr = true;
 		}
 
@@ -1622,7 +1679,7 @@
 
 #ifdef CONFIG_MAC80211_MESH
 static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *dst, u8 *next_hop)
+			       const u8 *dst, const u8 *next_hop)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
@@ -1650,7 +1707,7 @@
 }
 
 static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst)
+			       const u8 *dst)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
@@ -1661,9 +1718,8 @@
 	return 0;
 }
 
-static int ieee80211_change_mpath(struct wiphy *wiphy,
-				    struct net_device *dev,
-				    u8 *dst, u8 *next_hop)
+static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev,
+				  const u8 *dst, const u8 *next_hop)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
@@ -1755,8 +1811,8 @@
 }
 
 static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
-				 int idx, u8 *dst, u8 *next_hop,
-				 struct mpath_info *pinfo)
+				int idx, u8 *dst, u8 *next_hop,
+				struct mpath_info *pinfo)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
@@ -2930,7 +2986,6 @@
 	/* whatever, but channel contexts should not complain about that one */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 	sdata->needed_rx_chains = local->rx_chains;
-	sdata->radar_required = true;
 
 	err = ieee80211_vif_use_channel(sdata, chandef,
 					IEEE80211_CHANCTX_SHARED);
@@ -3011,26 +3066,11 @@
 }
 EXPORT_SYMBOL(ieee80211_csa_finish);
 
-static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
+					  u32 *changed)
 {
-	struct ieee80211_local *local = sdata->local;
-	int err, changed = 0;
+	int err;
 
-	sdata_assert_lock(sdata);
-
-	mutex_lock(&local->mtx);
-	sdata->radar_required = sdata->csa_radar_required;
-	err = ieee80211_vif_change_channel(sdata, &changed);
-	mutex_unlock(&local->mtx);
-	if (WARN_ON(err < 0))
-		return;
-
-	if (!local->use_chanctx) {
-		local->_oper_chandef = sdata->csa_chandef;
-		ieee80211_hw_config(local, 0);
-	}
-
-	sdata->vif.csa_active = false;
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP:
 		err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
@@ -3038,35 +3078,74 @@
 		sdata->u.ap.next_beacon = NULL;
 
 		if (err < 0)
-			return;
-		changed |= err;
+			return err;
+		*changed |= err;
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		err = ieee80211_ibss_finish_csa(sdata);
 		if (err < 0)
-			return;
-		changed |= err;
+			return err;
+		*changed |= err;
 		break;
 #ifdef CONFIG_MAC80211_MESH
 	case NL80211_IFTYPE_MESH_POINT:
 		err = ieee80211_mesh_finish_csa(sdata);
 		if (err < 0)
-			return;
-		changed |= err;
+			return err;
+		*changed |= err;
 		break;
 #endif
 	default:
 		WARN_ON(1);
-		return;
+		return -EINVAL;
 	}
 
-	ieee80211_bss_info_change_notify(sdata, changed);
+	return 0;
+}
 
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	u32 changed = 0;
+	int err;
+
+	sdata_assert_lock(sdata);
+	lockdep_assert_held(&local->mtx);
+
+	sdata->radar_required = sdata->csa_radar_required;
+	err = ieee80211_vif_change_channel(sdata, &changed);
+	if (err < 0)
+		return err;
+
+	if (!local->use_chanctx) {
+		local->_oper_chandef = sdata->csa_chandef;
+		ieee80211_hw_config(local, 0);
+	}
+
+	sdata->vif.csa_active = false;
+
+	err = ieee80211_set_after_csa_beacon(sdata, &changed);
+	if (err)
+		return err;
+
+	ieee80211_bss_info_change_notify(sdata, changed);
+	cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
 
-	cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+	return 0;
+}
+
+static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+	if (__ieee80211_csa_finalize(sdata)) {
+		sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
+		cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
+				    GFP_KERNEL);
+	}
 }
 
 void ieee80211_csa_finalize_work(struct work_struct *work)
@@ -3074,8 +3153,11 @@
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
 			     csa_finalize_work);
+	struct ieee80211_local *local = sdata->local;
 
 	sdata_lock(sdata);
+	mutex_lock(&local->mtx);
+
 	/* AP might have been stopped while waiting for the lock. */
 	if (!sdata->vif.csa_active)
 		goto unlock;
@@ -3086,6 +3168,7 @@
 	ieee80211_csa_finalize(sdata);
 
 unlock:
+	mutex_unlock(&local->mtx);
 	sdata_unlock(sdata);
 }
 
@@ -3121,9 +3204,25 @@
 		if (params->count <= 1)
 			break;
 
-		sdata->csa_counter_offset_beacon =
-			params->counter_offset_beacon;
-		sdata->csa_counter_offset_presp = params->counter_offset_presp;
+		if ((params->n_counter_offsets_beacon >
+		     IEEE80211_MAX_CSA_COUNTERS_NUM) ||
+		    (params->n_counter_offsets_presp >
+		     IEEE80211_MAX_CSA_COUNTERS_NUM))
+			return -EINVAL;
+
+		/* make sure we don't have garbage in other counters */
+		memset(sdata->csa_counter_offset_beacon, 0,
+		       sizeof(sdata->csa_counter_offset_beacon));
+		memset(sdata->csa_counter_offset_presp, 0,
+		       sizeof(sdata->csa_counter_offset_presp));
+
+		memcpy(sdata->csa_counter_offset_beacon,
+		       params->counter_offsets_beacon,
+		       params->n_counter_offsets_beacon * sizeof(u16));
+		memcpy(sdata->csa_counter_offset_presp,
+		       params->counter_offsets_presp,
+		       params->n_counter_offsets_presp * sizeof(u16));
+
 		err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
 		if (err < 0) {
 			kfree(sdata->u.ap.next_beacon);
@@ -3212,16 +3311,18 @@
 	return 0;
 }
 
-int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
-			     struct cfg80211_csa_settings *params)
+static int
+__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+			   struct cfg80211_csa_settings *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_chanctx_conf *chanctx_conf;
+	struct ieee80211_chanctx_conf *conf;
 	struct ieee80211_chanctx *chanctx;
 	int err, num_chanctx, changed = 0;
 
 	sdata_assert_lock(sdata);
+	lockdep_assert_held(&local->mtx);
 
 	if (!list_empty(&local->roc_list) || local->scanning)
 		return -EBUSY;
@@ -3233,23 +3334,24 @@
 				       &sdata->vif.bss_conf.chandef))
 		return -EINVAL;
 
-	rcu_read_lock();
-	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-	if (!chanctx_conf) {
-		rcu_read_unlock();
+	mutex_lock(&local->chanctx_mtx);
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (!conf) {
+		mutex_unlock(&local->chanctx_mtx);
 		return -EBUSY;
 	}
 
 	/* don't handle for multi-VIF cases */
-	chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
-	if (chanctx->refcount > 1) {
-		rcu_read_unlock();
+	chanctx = container_of(conf, struct ieee80211_chanctx, conf);
+	if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
+		mutex_unlock(&local->chanctx_mtx);
 		return -EBUSY;
 	}
 	num_chanctx = 0;
 	list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
 		num_chanctx++;
-	rcu_read_unlock();
+	mutex_unlock(&local->chanctx_mtx);
 
 	if (num_chanctx > 1)
 		return -EBUSY;
@@ -3263,15 +3365,16 @@
 		return err;
 
 	sdata->csa_radar_required = params->radar_required;
-
-	if (params->block_tx)
-		ieee80211_stop_queues_by_reason(&local->hw,
-				IEEE80211_MAX_QUEUE_MAP,
-				IEEE80211_QUEUE_STOP_REASON_CSA);
-
 	sdata->csa_chandef = params->chandef;
+	sdata->csa_block_tx = params->block_tx;
+	sdata->csa_current_counter = params->count;
 	sdata->vif.csa_active = true;
 
+	if (sdata->csa_block_tx)
+		ieee80211_stop_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+
 	if (changed) {
 		ieee80211_bss_info_change_notify(sdata, changed);
 		drv_channel_switch_beacon(sdata, &params->chandef);
@@ -3283,6 +3386,20 @@
 	return 0;
 }
 
+int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+			     struct cfg80211_csa_settings *params)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	int err;
+
+	mutex_lock(&local->mtx);
+	err = __ieee80211_channel_switch(wiphy, dev, params);
+	mutex_unlock(&local->mtx);
+
+	return err;
+}
+
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			     struct cfg80211_mgmt_tx_params *params,
 			     u64 *cookie)
@@ -3295,6 +3412,7 @@
 	bool need_offchan = false;
 	u32 flags;
 	int ret;
+	u8 *data;
 
 	if (params->dont_wait_for_ack)
 		flags = IEEE80211_TX_CTL_NO_ACK;
@@ -3388,7 +3506,20 @@
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	memcpy(skb_put(skb, params->len), params->buf, params->len);
+	data = skb_put(skb, params->len);
+	memcpy(data, params->buf, params->len);
+
+	/* Update CSA counters */
+	if (sdata->vif.csa_active &&
+	    (sdata->vif.type == NL80211_IFTYPE_AP ||
+	     sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
+	    params->n_csa_offsets) {
+		int i;
+		u8 c = sdata->csa_current_counter;
+
+		for (i = 0; i < params->n_csa_offsets; i++)
+			data[params->csa_offsets[i]] = c;
+	}
 
 	IEEE80211_SKB_CB(skb)->flags = flags;
 
@@ -3497,320 +3628,6 @@
 	return 0;
 }
 
-static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
-{
-	u8 *pos = (void *)skb_put(skb, 7);
-
-	*pos++ = WLAN_EID_EXT_CAPABILITY;
-	*pos++ = 5; /* len */
-	*pos++ = 0x0;
-	*pos++ = 0x0;
-	*pos++ = 0x0;
-	*pos++ = 0x0;
-	*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
-}
-
-static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_local *local = sdata->local;
-	u16 capab;
-
-	capab = 0;
-	if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
-		return capab;
-
-	if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
-		capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
-	if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
-		capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
-
-	return capab;
-}
-
-static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr,
-				       u8 *peer, u8 *bssid)
-{
-	struct ieee80211_tdls_lnkie *lnkid;
-
-	lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
-
-	lnkid->ie_type = WLAN_EID_LINK_ID;
-	lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
-
-	memcpy(lnkid->bssid, bssid, ETH_ALEN);
-	memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
-	memcpy(lnkid->resp_sta, peer, ETH_ALEN);
-}
-
-static int
-ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *peer, u8 action_code, u8 dialog_token,
-			       u16 status_code, struct sk_buff *skb)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
-	struct ieee80211_tdls_data *tf;
-
-	tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
-
-	memcpy(tf->da, peer, ETH_ALEN);
-	memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
-	tf->ether_type = cpu_to_be16(ETH_P_TDLS);
-	tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
-
-	switch (action_code) {
-	case WLAN_TDLS_SETUP_REQUEST:
-		tf->category = WLAN_CATEGORY_TDLS;
-		tf->action_code = WLAN_TDLS_SETUP_REQUEST;
-
-		skb_put(skb, sizeof(tf->u.setup_req));
-		tf->u.setup_req.dialog_token = dialog_token;
-		tf->u.setup_req.capability =
-			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
-		ieee80211_add_srates_ie(sdata, skb, false, band);
-		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
-		ieee80211_tdls_add_ext_capab(skb);
-		break;
-	case WLAN_TDLS_SETUP_RESPONSE:
-		tf->category = WLAN_CATEGORY_TDLS;
-		tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
-
-		skb_put(skb, sizeof(tf->u.setup_resp));
-		tf->u.setup_resp.status_code = cpu_to_le16(status_code);
-		tf->u.setup_resp.dialog_token = dialog_token;
-		tf->u.setup_resp.capability =
-			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
-		ieee80211_add_srates_ie(sdata, skb, false, band);
-		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
-		ieee80211_tdls_add_ext_capab(skb);
-		break;
-	case WLAN_TDLS_SETUP_CONFIRM:
-		tf->category = WLAN_CATEGORY_TDLS;
-		tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
-
-		skb_put(skb, sizeof(tf->u.setup_cfm));
-		tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
-		tf->u.setup_cfm.dialog_token = dialog_token;
-		break;
-	case WLAN_TDLS_TEARDOWN:
-		tf->category = WLAN_CATEGORY_TDLS;
-		tf->action_code = WLAN_TDLS_TEARDOWN;
-
-		skb_put(skb, sizeof(tf->u.teardown));
-		tf->u.teardown.reason_code = cpu_to_le16(status_code);
-		break;
-	case WLAN_TDLS_DISCOVERY_REQUEST:
-		tf->category = WLAN_CATEGORY_TDLS;
-		tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
-
-		skb_put(skb, sizeof(tf->u.discover_req));
-		tf->u.discover_req.dialog_token = dialog_token;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int
-ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, u8 action_code, u8 dialog_token,
-			   u16 status_code, struct sk_buff *skb)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
-	struct ieee80211_mgmt *mgmt;
-
-	mgmt = (void *)skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, peer, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
-	memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
-
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_ACTION);
-
-	switch (action_code) {
-	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
-		skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
-		mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
-		mgmt->u.action.u.tdls_discover_resp.action_code =
-			WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
-		mgmt->u.action.u.tdls_discover_resp.dialog_token =
-			dialog_token;
-		mgmt->u.action.u.tdls_discover_resp.capability =
-			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
-		ieee80211_add_srates_ie(sdata, skb, false, band);
-		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
-		ieee80211_tdls_add_ext_capab(skb);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *peer, u8 action_code, u8 dialog_token,
-			       u16 status_code, u32 peer_capability,
-			       const u8 *extra_ies, size_t extra_ies_len)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb = NULL;
-	bool send_direct;
-	int ret;
-
-	if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
-		return -ENOTSUPP;
-
-	/* make sure we are in managed mode, and associated */
-	if (sdata->vif.type != NL80211_IFTYPE_STATION ||
-	    !sdata->u.mgd.associated)
-		return -EINVAL;
-
-	tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
-		 action_code, peer);
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    max(sizeof(struct ieee80211_mgmt),
-				sizeof(struct ieee80211_tdls_data)) +
-			    50 + /* supported rates */
-			    7 + /* ext capab */
-			    extra_ies_len +
-			    sizeof(struct ieee80211_tdls_lnkie));
-	if (!skb)
-		return -ENOMEM;
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	switch (action_code) {
-	case WLAN_TDLS_SETUP_REQUEST:
-	case WLAN_TDLS_SETUP_RESPONSE:
-	case WLAN_TDLS_SETUP_CONFIRM:
-	case WLAN_TDLS_TEARDOWN:
-	case WLAN_TDLS_DISCOVERY_REQUEST:
-		ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
-						     action_code, dialog_token,
-						     status_code, skb);
-		send_direct = false;
-		break;
-	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
-		ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
-						 dialog_token, status_code,
-						 skb);
-		send_direct = true;
-		break;
-	default:
-		ret = -ENOTSUPP;
-		break;
-	}
-
-	if (ret < 0)
-		goto fail;
-
-	if (extra_ies_len)
-		memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
-
-	/* the TDLS link IE is always added last */
-	switch (action_code) {
-	case WLAN_TDLS_SETUP_REQUEST:
-	case WLAN_TDLS_SETUP_CONFIRM:
-	case WLAN_TDLS_TEARDOWN:
-	case WLAN_TDLS_DISCOVERY_REQUEST:
-		/* we are the initiator */
-		ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
-					   sdata->u.mgd.bssid);
-		break;
-	case WLAN_TDLS_SETUP_RESPONSE:
-	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
-		/* we are the responder */
-		ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
-					   sdata->u.mgd.bssid);
-		break;
-	default:
-		ret = -ENOTSUPP;
-		goto fail;
-	}
-
-	if (send_direct) {
-		ieee80211_tx_skb(sdata, skb);
-		return 0;
-	}
-
-	/*
-	 * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
-	 * we should default to AC_VI.
-	 */
-	switch (action_code) {
-	case WLAN_TDLS_SETUP_REQUEST:
-	case WLAN_TDLS_SETUP_RESPONSE:
-		skb_set_queue_mapping(skb, IEEE80211_AC_BK);
-		skb->priority = 2;
-		break;
-	default:
-		skb_set_queue_mapping(skb, IEEE80211_AC_VI);
-		skb->priority = 5;
-		break;
-	}
-
-	/* disable bottom halves when entering the Tx path */
-	local_bh_disable();
-	ret = ieee80211_subif_start_xmit(skb, dev);
-	local_bh_enable();
-
-	return ret;
-
-fail:
-	dev_kfree_skb(skb);
-	return ret;
-}
-
-static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *peer, enum nl80211_tdls_operation oper)
-{
-	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
-		return -ENOTSUPP;
-
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
-	tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
-
-	switch (oper) {
-	case NL80211_TDLS_ENABLE_LINK:
-		rcu_read_lock();
-		sta = sta_info_get(sdata, peer);
-		if (!sta) {
-			rcu_read_unlock();
-			return -ENOLINK;
-		}
-
-		set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
-		rcu_read_unlock();
-		break;
-	case NL80211_TDLS_DISABLE_LINK:
-		return sta_info_destroy_addr(sdata, peer);
-	case NL80211_TDLS_TEARDOWN:
-	case NL80211_TDLS_SETUP:
-	case NL80211_TDLS_DISCOVERY_REQ:
-		/* We don't support in-driver setup/teardown/discovery */
-		return -ENOTSUPP;
-	default:
-		return -ENOTSUPP;
-	}
-
-	return 0;
-}
-
 static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
 				  const u8 *peer, u64 *cookie)
 {
@@ -3949,6 +3766,21 @@
 	return 0;
 }
 
+static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
+				      struct net_device *dev,
+				      struct cfg80211_chan_def *chandef)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	int ret;
+	u32 changed = 0;
+
+	ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed);
+	if (ret == 0)
+		ieee80211_bss_info_change_notify(sdata, changed);
+
+	return ret;
+}
+
 const struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -4029,4 +3861,5 @@
 	.start_radar_detection = ieee80211_start_radar_detection,
 	.channel_switch = ieee80211_channel_switch,
 	.set_qos_map = ieee80211_set_qos_map,
+	.set_ap_chanwidth = ieee80211_set_ap_chanwidth,
 };

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 75b5dd2..a310e33 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c

@@ -9,6 +9,170 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 
+static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local,
+					  struct ieee80211_chanctx *ctx)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int num = 0;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list)
+		num++;
+
+	return num;
+}
+
+static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local,
+					  struct ieee80211_chanctx *ctx)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int num = 0;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list)
+		num++;
+
+	return num;
+}
+
+int ieee80211_chanctx_refcount(struct ieee80211_local *local,
+			       struct ieee80211_chanctx *ctx)
+{
+	return ieee80211_chanctx_num_assigned(local, ctx) +
+	       ieee80211_chanctx_num_reserved(local, ctx);
+}
+
+static int ieee80211_num_chanctx(struct ieee80211_local *local)
+{
+	struct ieee80211_chanctx *ctx;
+	int num = 0;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(ctx, &local->chanctx_list, list)
+		num++;
+
+	return num;
+}
+
+static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
+{
+	lockdep_assert_held(&local->chanctx_mtx);
+	return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local);
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
+				   struct ieee80211_chanctx *ctx,
+				   const struct cfg80211_chan_def *compat)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(sdata, &ctx->reserved_vifs,
+			    reserved_chanctx_list) {
+		if (!compat)
+			compat = &sdata->reserved_chandef;
+
+		compat = cfg80211_chandef_compatible(&sdata->reserved_chandef,
+						     compat);
+		if (!compat)
+			break;
+	}
+
+	return compat;
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
+				       struct ieee80211_chanctx *ctx,
+				       const struct cfg80211_chan_def *compat)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(sdata, &ctx->assigned_vifs,
+			    assigned_chanctx_list) {
+		if (sdata->reserved_chanctx != NULL)
+			continue;
+
+		if (!compat)
+			compat = &sdata->vif.bss_conf.chandef;
+
+		compat = cfg80211_chandef_compatible(
+				&sdata->vif.bss_conf.chandef, compat);
+		if (!compat)
+			break;
+	}
+
+	return compat;
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_combined_chandef(struct ieee80211_local *local,
+				   struct ieee80211_chanctx *ctx,
+				   const struct cfg80211_chan_def *compat)
+{
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat);
+	if (!compat)
+		return NULL;
+
+	compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat);
+	if (!compat)
+		return NULL;
+
+	return compat;
+}
+
+static bool
+ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
+				      struct ieee80211_chanctx *ctx,
+				      const struct cfg80211_chan_def *def)
+{
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	if (ieee80211_chanctx_combined_chandef(local, ctx, def))
+		return true;
+
+	if (!list_empty(&ctx->reserved_vifs) &&
+	    ieee80211_chanctx_reserved_chandef(local, ctx, def))
+		return true;
+
+	return false;
+}
+
+static struct ieee80211_chanctx *
+ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
+				   const struct cfg80211_chan_def *chandef,
+				   enum ieee80211_chanctx_mode mode)
+{
+	struct ieee80211_chanctx *ctx;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
+		return NULL;
+
+	list_for_each_entry(ctx, &local->chanctx_list, list) {
+		if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
+			continue;
+
+		if (!ieee80211_chanctx_can_reserve_chandef(local, ctx,
+							   chandef))
+			continue;
+
+		return ctx;
+	}
+
+	return NULL;
+}
+
 static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta)
 {
 	switch (sta->bandwidth) {
@@ -190,6 +354,11 @@
 		if (!compat)
 			continue;
 
+		compat = ieee80211_chanctx_reserved_chandef(local, ctx,
+							    compat);
+		if (!compat)
+			continue;
+
 		ieee80211_change_chanctx(local, ctx, compat);
 
 		return ctx;
@@ -217,62 +386,91 @@
 }
 
 static struct ieee80211_chanctx *
-ieee80211_new_chanctx(struct ieee80211_local *local,
-		      const struct cfg80211_chan_def *chandef,
-		      enum ieee80211_chanctx_mode mode)
+ieee80211_alloc_chanctx(struct ieee80211_local *local,
+			const struct cfg80211_chan_def *chandef,
+			enum ieee80211_chanctx_mode mode)
 {
 	struct ieee80211_chanctx *ctx;
-	u32 changed;
-	int err;
 
 	lockdep_assert_held(&local->chanctx_mtx);
 
 	ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL);
 	if (!ctx)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
+	INIT_LIST_HEAD(&ctx->assigned_vifs);
+	INIT_LIST_HEAD(&ctx->reserved_vifs);
 	ctx->conf.def = *chandef;
 	ctx->conf.rx_chains_static = 1;
 	ctx->conf.rx_chains_dynamic = 1;
 	ctx->mode = mode;
 	ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
 	ieee80211_recalc_chanctx_min_def(local, ctx);
+
+	return ctx;
+}
+
+static int ieee80211_add_chanctx(struct ieee80211_local *local,
+				 struct ieee80211_chanctx *ctx)
+{
+	u32 changed;
+	int err;
+
+	lockdep_assert_held(&local->mtx);
+	lockdep_assert_held(&local->chanctx_mtx);
+
 	if (!local->use_chanctx)
 		local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
 
-	/* we hold the mutex to prevent idle from changing */
-	lockdep_assert_held(&local->mtx);
 	/* turn idle off *before* setting channel -- some drivers need that */
 	changed = ieee80211_idle_off(local);
 	if (changed)
 		ieee80211_hw_config(local, changed);
 
 	if (!local->use_chanctx) {
-		local->_oper_chandef = *chandef;
+		local->_oper_chandef = ctx->conf.def;
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 	} else {
 		err = drv_add_chanctx(local, ctx);
 		if (err) {
-			kfree(ctx);
 			ieee80211_recalc_idle(local);
-			return ERR_PTR(err);
+			return err;
 		}
 	}
 
-	/* and keep the mutex held until the new chanctx is on the list */
-	list_add_rcu(&ctx->list, &local->chanctx_list);
+	return 0;
+}
 
+static struct ieee80211_chanctx *
+ieee80211_new_chanctx(struct ieee80211_local *local,
+		      const struct cfg80211_chan_def *chandef,
+		      enum ieee80211_chanctx_mode mode)
+{
+	struct ieee80211_chanctx *ctx;
+	int err;
+
+	lockdep_assert_held(&local->mtx);
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	err = ieee80211_add_chanctx(local, ctx);
+	if (err) {
+		kfree(ctx);
+		return ERR_PTR(err);
+	}
+
+	list_add_rcu(&ctx->list, &local->chanctx_list);
 	return ctx;
 }
 
-static void ieee80211_free_chanctx(struct ieee80211_local *local,
-				   struct ieee80211_chanctx *ctx)
+static void ieee80211_del_chanctx(struct ieee80211_local *local,
+				  struct ieee80211_chanctx *ctx)
 {
-	bool check_single_channel = false;
 	lockdep_assert_held(&local->chanctx_mtx);
 
-	WARN_ON_ONCE(ctx->refcount != 0);
-
 	if (!local->use_chanctx) {
 		struct cfg80211_chan_def *chandef = &local->_oper_chandef;
 		chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -282,8 +480,9 @@
 		/* NOTE: Disabling radar is only valid here for
 		 * single channel context. To be sure, check it ...
 		 */
-		if (local->hw.conf.radar_enabled)
-			check_single_channel = true;
+		WARN_ON(local->hw.conf.radar_enabled &&
+			!list_empty(&local->chanctx_list));
+
 		local->hw.conf.radar_enabled = false;
 
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
@@ -291,39 +490,19 @@
 		drv_remove_chanctx(local, ctx);
 	}
 
-	list_del_rcu(&ctx->list);
-	kfree_rcu(ctx, rcu_head);
-
-	/* throw a warning if this wasn't the only channel context. */
-	WARN_ON(check_single_channel && !list_empty(&local->chanctx_list));
-
 	ieee80211_recalc_idle(local);
 }
 
-static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
-					struct ieee80211_chanctx *ctx)
+static void ieee80211_free_chanctx(struct ieee80211_local *local,
+				   struct ieee80211_chanctx *ctx)
 {
-	struct ieee80211_local *local = sdata->local;
-	int ret;
-
 	lockdep_assert_held(&local->chanctx_mtx);
 
-	ret = drv_assign_vif_chanctx(local, sdata, ctx);
-	if (ret)
-		return ret;
+	WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0);
 
-	rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf);
-	ctx->refcount++;
-
-	ieee80211_recalc_txpower(sdata);
-	ieee80211_recalc_chanctx_min_def(local, ctx);
-	sdata->vif.bss_conf.idle = false;
-
-	if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
-	    sdata->vif.type != NL80211_IFTYPE_MONITOR)
-		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
-
-	return 0;
+	list_del_rcu(&ctx->list);
+	ieee80211_del_chanctx(local, ctx);
+	kfree_rcu(ctx, rcu_head);
 }
 
 static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
@@ -384,30 +563,58 @@
 	drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
 }
 
-static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
-					   struct ieee80211_chanctx *ctx)
+static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_chanctx *new_ctx)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *conf;
+	struct ieee80211_chanctx *curr_ctx = NULL;
+	int ret = 0;
 
-	lockdep_assert_held(&local->chanctx_mtx);
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
 
-	ctx->refcount--;
-	rcu_assign_pointer(sdata->vif.chanctx_conf, NULL);
+	if (conf) {
+		curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
 
-	sdata->vif.bss_conf.idle = true;
+		drv_unassign_vif_chanctx(local, sdata, curr_ctx);
+		conf = NULL;
+		list_del(&sdata->assigned_chanctx_list);
+	}
+
+	if (new_ctx) {
+		ret = drv_assign_vif_chanctx(local, sdata, new_ctx);
+		if (ret)
+			goto out;
+
+		conf = &new_ctx->conf;
+		list_add(&sdata->assigned_chanctx_list,
+			 &new_ctx->assigned_vifs);
+	}
+
+out:
+	rcu_assign_pointer(sdata->vif.chanctx_conf, conf);
+
+	sdata->vif.bss_conf.idle = !conf;
+
+	if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
+		ieee80211_recalc_chanctx_chantype(local, curr_ctx);
+		ieee80211_recalc_smps_chanctx(local, curr_ctx);
+		ieee80211_recalc_radar_chanctx(local, curr_ctx);
+		ieee80211_recalc_chanctx_min_def(local, curr_ctx);
+	}
+
+	if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
+		ieee80211_recalc_txpower(sdata);
+		ieee80211_recalc_chanctx_min_def(local, new_ctx);
+	}
 
 	if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
 	    sdata->vif.type != NL80211_IFTYPE_MONITOR)
-		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
+		ieee80211_bss_info_change_notify(sdata,
+						 BSS_CHANGED_IDLE);
 
-	drv_unassign_vif_chanctx(local, sdata, ctx);
-
-	if (ctx->refcount > 0) {
-		ieee80211_recalc_chanctx_chantype(sdata->local, ctx);
-		ieee80211_recalc_smps_chanctx(local, ctx);
-		ieee80211_recalc_radar_chanctx(local, ctx);
-		ieee80211_recalc_chanctx_min_def(local, ctx);
-	}
+	return ret;
 }
 
 static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
@@ -425,8 +632,11 @@
 
 	ctx = container_of(conf, struct ieee80211_chanctx, conf);
 
-	ieee80211_unassign_vif_chanctx(sdata, ctx);
-	if (ctx->refcount == 0)
+	if (sdata->reserved_chanctx)
+		ieee80211_vif_unreserve_chanctx(sdata);
+
+	ieee80211_assign_vif_chanctx(sdata, NULL);
+	if (ieee80211_chanctx_refcount(local, ctx) == 0)
 		ieee80211_free_chanctx(local, ctx);
 }
 
@@ -526,6 +736,7 @@
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_chanctx *ctx;
+	u8 radar_detect_width = 0;
 	int ret;
 
 	lockdep_assert_held(&local->mtx);
@@ -533,6 +744,22 @@
 	WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
 
 	mutex_lock(&local->chanctx_mtx);
+
+	ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+					    chandef,
+					    sdata->wdev.iftype);
+	if (ret < 0)
+		goto out;
+	if (ret > 0)
+		radar_detect_width = BIT(chandef->width);
+
+	sdata->radar_required = ret;
+
+	ret = ieee80211_check_combinations(sdata, chandef, mode,
+					   radar_detect_width);
+	if (ret < 0)
+		goto out;
+
 	__ieee80211_vif_release_channel(sdata);
 
 	ctx = ieee80211_find_chanctx(local, chandef, mode);
@@ -548,7 +775,7 @@
 	ret = ieee80211_assign_vif_chanctx(sdata, ctx);
 	if (ret) {
 		/* if assign fails refcount stays the same */
-		if (ctx->refcount == 0)
+		if (ieee80211_chanctx_refcount(local, ctx) == 0)
 			ieee80211_free_chanctx(local, ctx);
 		goto out;
 	}
@@ -560,39 +787,20 @@
 	return ret;
 }
 
-int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
-				 u32 *changed)
+static int __ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
+					  struct ieee80211_chanctx *ctx,
+					  u32 *changed)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_chanctx_conf *conf;
-	struct ieee80211_chanctx *ctx;
 	const struct cfg80211_chan_def *chandef = &sdata->csa_chandef;
-	int ret;
 	u32 chanctx_changed = 0;
 
-	lockdep_assert_held(&local->mtx);
-
-	/* should never be called if not performing a channel switch. */
-	if (WARN_ON(!sdata->vif.csa_active))
-		return -EINVAL;
-
 	if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
 				     IEEE80211_CHAN_DISABLED))
 		return -EINVAL;
 
-	mutex_lock(&local->chanctx_mtx);
-	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
-					 lockdep_is_held(&local->chanctx_mtx));
-	if (!conf) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ctx = container_of(conf, struct ieee80211_chanctx, conf);
-	if (ctx->refcount != 1) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (ieee80211_chanctx_refcount(local, ctx) != 1)
+		return -EINVAL;
 
 	if (sdata->vif.bss_conf.chandef.width != chandef->width) {
 		chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
@@ -610,12 +818,224 @@
 	ieee80211_recalc_radar_chanctx(local, ctx);
 	ieee80211_recalc_chanctx_min_def(local, ctx);
 
-	ret = 0;
+	return 0;
+}
+
+int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
+				 u32 *changed)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *conf;
+	struct ieee80211_chanctx *ctx;
+	int ret;
+
+	lockdep_assert_held(&local->mtx);
+
+	/* should never be called if not performing a channel switch. */
+	if (WARN_ON(!sdata->vif.csa_active))
+		return -EINVAL;
+
+	mutex_lock(&local->chanctx_mtx);
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (!conf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+	ret = __ieee80211_vif_change_channel(sdata, ctx, changed);
  out:
 	mutex_unlock(&local->chanctx_mtx);
 	return ret;
 }
 
+static void
+__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+				      bool clear)
+{
+	struct ieee80211_local *local __maybe_unused = sdata->local;
+	struct ieee80211_sub_if_data *vlan;
+	struct ieee80211_chanctx_conf *conf;
+
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
+		return;
+
+	lockdep_assert_held(&local->mtx);
+
+	/* Check that conf exists, even when clearing this function
+	 * must be called with the AP's channel context still there
+	 * as it would otherwise cause VLANs to have an invalid
+	 * channel context pointer for a while, possibly pointing
+	 * to a channel context that has already been freed.
+	 */
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	WARN_ON(!conf);
+
+	if (clear)
+		conf = NULL;
+
+	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+		rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
+}
+
+void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+					 bool clear)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	mutex_lock(&local->chanctx_mtx);
+
+	__ieee80211_vif_copy_chanctx_to_vlans(sdata, clear);
+
+	mutex_unlock(&local->chanctx_mtx);
+}
+
+int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_chanctx *ctx = sdata->reserved_chanctx;
+
+	lockdep_assert_held(&sdata->local->chanctx_mtx);
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	list_del(&sdata->reserved_chanctx_list);
+	sdata->reserved_chanctx = NULL;
+
+	if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0)
+		ieee80211_free_chanctx(sdata->local, ctx);
+
+	return 0;
+}
+
+int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
+				  const struct cfg80211_chan_def *chandef,
+				  enum ieee80211_chanctx_mode mode,
+				  bool radar_required)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *conf;
+	struct ieee80211_chanctx *new_ctx, *curr_ctx;
+	int ret = 0;
+
+	mutex_lock(&local->chanctx_mtx);
+
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (!conf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+	new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
+	if (!new_ctx) {
+		if (ieee80211_chanctx_refcount(local, curr_ctx) == 1 &&
+		    (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) {
+			/* if we're the only users of the chanctx and
+			 * the driver supports changing a running
+			 * context, reserve our current context
+			 */
+			new_ctx = curr_ctx;
+		} else if (ieee80211_can_create_new_chanctx(local)) {
+			/* create a new context and reserve it */
+			new_ctx = ieee80211_new_chanctx(local, chandef, mode);
+			if (IS_ERR(new_ctx)) {
+				ret = PTR_ERR(new_ctx);
+				goto out;
+			}
+		} else {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs);
+	sdata->reserved_chanctx = new_ctx;
+	sdata->reserved_chandef = *chandef;
+	sdata->reserved_radar_required = radar_required;
+out:
+	mutex_unlock(&local->chanctx_mtx);
+	return ret;
+}
+
+int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
+				       u32 *changed)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx *ctx;
+	struct ieee80211_chanctx *old_ctx;
+	struct ieee80211_chanctx_conf *conf;
+	int ret;
+	u32 tmp_changed = *changed;
+
+	/* TODO: need to recheck if the chandef is usable etc.? */
+
+	lockdep_assert_held(&local->mtx);
+
+	mutex_lock(&local->chanctx_mtx);
+
+	ctx = sdata->reserved_chanctx;
+	if (WARN_ON(!ctx)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (!conf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	old_ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+	if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+		tmp_changed |= BSS_CHANGED_BANDWIDTH;
+
+	sdata->vif.bss_conf.chandef = sdata->reserved_chandef;
+
+	/* unref our reservation */
+	sdata->reserved_chanctx = NULL;
+	sdata->radar_required = sdata->reserved_radar_required;
+	list_del(&sdata->reserved_chanctx_list);
+
+	if (old_ctx == ctx) {
+		/* This is our own context, just change it */
+		ret = __ieee80211_vif_change_channel(sdata, old_ctx,
+						     &tmp_changed);
+		if (ret)
+			goto out;
+	} else {
+		ret = ieee80211_assign_vif_chanctx(sdata, ctx);
+		if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
+			ieee80211_free_chanctx(local, old_ctx);
+		if (ret) {
+			/* if assign fails refcount stays the same */
+			if (ieee80211_chanctx_refcount(local, ctx) == 0)
+				ieee80211_free_chanctx(local, ctx);
+			goto out;
+		}
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP)
+			__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+	}
+
+	*changed = tmp_changed;
+
+	ieee80211_recalc_chanctx_chantype(local, ctx);
+	ieee80211_recalc_smps_chanctx(local, ctx);
+	ieee80211_recalc_radar_chanctx(local, ctx);
+	ieee80211_recalc_chanctx_min_def(local, ctx);
+out:
+	mutex_unlock(&local->chanctx_mtx);
+	return ret;
+}
+
 int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
 				   const struct cfg80211_chan_def *chandef,
 				   u32 *changed)
@@ -695,40 +1115,6 @@
 	mutex_unlock(&local->chanctx_mtx);
 }
 
-void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
-					 bool clear)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sub_if_data *vlan;
-	struct ieee80211_chanctx_conf *conf;
-
-	ASSERT_RTNL();
-
-	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
-		return;
-
-	mutex_lock(&local->chanctx_mtx);
-
-	/*
-	 * Check that conf exists, even when clearing this function
-	 * must be called with the AP's channel context still there
-	 * as it would otherwise cause VLANs to have an invalid
-	 * channel context pointer for a while, possibly pointing
-	 * to a channel context that has already been freed.
-	 */
-	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
-				lockdep_is_held(&local->chanctx_mtx));
-	WARN_ON(!conf);
-
-	if (clear)
-		conf = NULL;
-
-	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
-		rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
-
-	mutex_unlock(&local->chanctx_mtx);
-}
-
 void ieee80211_iter_chan_contexts_atomic(
 	struct ieee80211_hw *hw,
 	void (*iter)(struct ieee80211_hw *hw,

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index fa16e54..0e963bc 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c

@@ -128,7 +128,7 @@
 	if (!strcmp(buf, TX_LATENCY_DISABLED)) {
 		if (!tx_latency)
 			goto unlock;
-		rcu_assign_pointer(local->tx_latency, NULL);
+		RCU_INIT_POINTER(local->tx_latency, NULL);
 		synchronize_rcu();
 		kfree(tx_latency);
 		goto unlock;

diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 214ed4e..60c35af 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h

@@ -1,6 +1,8 @@
 #ifndef __MAC80211_DEBUGFS_H
 #define __MAC80211_DEBUGFS_H
 
+#include "ieee80211_i.h"
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 void debugfs_hw_add(struct ieee80211_local *local);
 int __printf(4, 5) mac80211_format_buffer(char __user *userbuf, size_t count,

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 40a6489..e205eba 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c

@@ -34,8 +34,7 @@
 	ssize_t ret = -EINVAL;
 
 	read_lock(&dev_base_lock);
-	if (sdata->dev->reg_state == NETREG_REGISTERED)
-		ret = (*format)(sdata, buf, sizeof(buf));
+	ret = (*format)(sdata, buf, sizeof(buf));
 	read_unlock(&dev_base_lock);
 
 	if (ret >= 0)
@@ -62,8 +61,7 @@
 
 	ret = -ENODEV;
 	rtnl_lock();
-	if (sdata->dev->reg_state == NETREG_REGISTERED)
-		ret = (*write)(sdata, buf, count);
+	ret = (*write)(sdata, buf, count);
 	rtnl_unlock();
 
 	return ret;

diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 79025e7..9f5501a 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h

@@ -3,6 +3,8 @@
 #ifndef __IEEE80211_DEBUGFS_NETDEV_H
 #define __IEEE80211_DEBUGFS_NETDEV_H
 
+#include "ieee80211_i.h"
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index fc689f5..bd782dc 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h

@@ -5,11 +5,11 @@
 #include "ieee80211_i.h"
 #include "trace.h"
 
-static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
+static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
 {
-	WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
-	     "%s:  Failed check-sdata-in-driver check, flags: 0x%x\n",
-	     sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
+	return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
+		     "%s:  Failed check-sdata-in-driver check, flags: 0x%x\n",
+		     sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
 }
 
 static inline struct ieee80211_sub_if_data *
@@ -168,7 +168,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_change_interface(local, sdata, type, p2p);
 	ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
@@ -181,7 +182,8 @@
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_remove_interface(local, sdata);
 	local->ops->remove_interface(&local->hw, &sdata->vif);
@@ -219,7 +221,8 @@
 			 sdata->vif.type == NL80211_IFTYPE_MONITOR))
 		return;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_bss_info_changed(local, sdata, info, changed);
 	if (local->ops->bss_info_changed)
@@ -278,7 +281,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_set_key(local, cmd, sdata, sta, key);
 	ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
@@ -298,7 +302,8 @@
 		ista = &sta->sta;
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
 	if (local->ops->update_tkip_key)
@@ -315,7 +320,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_hw_scan(local, sdata);
 	ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
@@ -328,7 +334,8 @@
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_cancel_hw_scan(local, sdata);
 	local->ops->cancel_hw_scan(&local->hw, &sdata->vif);
@@ -345,7 +352,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sched_scan_start(local, sdata);
 	ret = local->ops->sched_scan_start(&local->hw, &sdata->vif,
@@ -361,7 +369,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sched_scan_stop(local, sdata);
 	ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif);
@@ -462,7 +471,8 @@
 				  struct ieee80211_sta *sta)
 {
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_notify(local, sdata, cmd, sta);
 	if (local->ops->sta_notify)
@@ -479,7 +489,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sta_add(local, sdata, sta);
 	if (local->ops->sta_add)
@@ -497,7 +508,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_remove(local, sdata, sta);
 	if (local->ops->sta_remove)
@@ -515,7 +527,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	if (local->ops->sta_add_debugfs)
 		local->ops->sta_add_debugfs(&local->hw, &sdata->vif,
@@ -545,7 +558,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta);
 	if (local->ops->sta_pre_rcu_remove)
@@ -566,7 +580,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
 	if (local->ops->sta_state) {
@@ -590,7 +605,8 @@
 				     struct ieee80211_sta *sta, u32 changed)
 {
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
 		(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
@@ -612,7 +628,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_conf_tx(local, sdata, ac, params);
 	if (local->ops->conf_tx)
@@ -629,7 +646,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return ret;
 
 	trace_drv_get_tsf(local, sdata);
 	if (local->ops->get_tsf)
@@ -644,7 +662,8 @@
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_set_tsf(local, sdata, tsf);
 	if (local->ops->set_tsf)
@@ -657,7 +676,8 @@
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_reset_tsf(local, sdata);
 	if (local->ops->reset_tsf)
@@ -689,7 +709,8 @@
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
 
@@ -726,13 +747,19 @@
 }
 
 static inline void drv_flush(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *sdata,
 			     u32 queues, bool drop)
 {
+	struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+
 	might_sleep();
 
+	if (sdata && !check_sdata_in_driver(sdata))
+		return;
+
 	trace_drv_flush(local, queues, drop);
 	if (local->ops->flush)
-		local->ops->flush(&local->hw, queues, drop);
+		local->ops->flush(&local->hw, vif, queues, drop);
 	trace_drv_return_void(local);
 }
 
@@ -848,7 +875,8 @@
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_set_bitrate_mask(local, sdata, mask);
 	if (local->ops->set_bitrate_mask)
@@ -863,7 +891,8 @@
 				      struct ieee80211_sub_if_data *sdata,
 				      struct cfg80211_gtk_rekey_data *data)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_set_rekey_data(local, sdata, data);
 	if (local->ops->set_rekey_data)
@@ -931,7 +960,8 @@
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 	WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
 
 	trace_drv_mgd_prepare_tx(local, sdata);
@@ -958,6 +988,9 @@
 static inline void drv_remove_chanctx(struct ieee80211_local *local,
 				      struct ieee80211_chanctx *ctx)
 {
+	if (WARN_ON(!ctx->driver_present))
+		return;
+
 	trace_drv_remove_chanctx(local, ctx);
 	if (local->ops->remove_chanctx)
 		local->ops->remove_chanctx(&local->hw, &ctx->conf);
@@ -983,7 +1016,8 @@
 {
 	int ret = 0;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_assign_vif_chanctx(local, sdata, ctx);
 	if (local->ops->assign_vif_chanctx) {
@@ -1001,7 +1035,8 @@
 					    struct ieee80211_sub_if_data *sdata,
 					    struct ieee80211_chanctx *ctx)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_unassign_vif_chanctx(local, sdata, ctx);
 	if (local->ops->unassign_vif_chanctx) {
@@ -1013,12 +1048,66 @@
 	trace_drv_return_void(local);
 }
 
+static inline int
+drv_switch_vif_chanctx(struct ieee80211_local *local,
+		       struct ieee80211_vif_chanctx_switch *vifs,
+		       int n_vifs,
+		       enum ieee80211_chanctx_switch_mode mode)
+{
+	int ret = 0;
+	int i;
+
+	if (!local->ops->switch_vif_chanctx)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < n_vifs; i++) {
+		struct ieee80211_chanctx *new_ctx =
+			container_of(vifs[i].new_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+		struct ieee80211_chanctx *old_ctx =
+			container_of(vifs[i].old_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+
+		WARN_ON_ONCE(!old_ctx->driver_present);
+		WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+			      new_ctx->driver_present) ||
+			     (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+			      !new_ctx->driver_present));
+	}
+
+	trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+	ret = local->ops->switch_vif_chanctx(&local->hw,
+					     vifs, n_vifs, mode);
+	trace_drv_return_int(local, ret);
+
+	if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+		for (i = 0; i < n_vifs; i++) {
+			struct ieee80211_chanctx *new_ctx =
+				container_of(vifs[i].new_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+			struct ieee80211_chanctx *old_ctx =
+				container_of(vifs[i].old_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+
+			new_ctx->driver_present = true;
+			old_ctx->driver_present = false;
+		}
+	}
+
+	return ret;
+}
+
 static inline int drv_start_ap(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
 	int ret = 0;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf);
 	if (local->ops->start_ap)
@@ -1030,7 +1119,8 @@
 static inline void drv_stop_ap(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_stop_ap(local, sdata);
 	if (local->ops->stop_ap)
@@ -1053,7 +1143,8 @@
 			    struct ieee80211_sub_if_data *sdata,
 			    int key_idx)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	WARN_ON_ONCE(key_idx < -1 || key_idx > 3);
 
@@ -1095,7 +1186,8 @@
 	int ret = 0;
 
 	might_sleep();
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf);
 	if (local->ops->join_ibss)
@@ -1108,7 +1200,8 @@
 				  struct ieee80211_sub_if_data *sdata)
 {
 	might_sleep();
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_leave_ibss(local, sdata);
 	if (local->ops->leave_ibss)
@@ -1116,4 +1209,17 @@
 	trace_drv_return_void(local);
 }
 
+static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
+					      struct ieee80211_sta *sta)
+{
+	u32 ret = 0;
+
+	trace_drv_get_expected_throughput(sta);
+	if (local->ops->get_expected_throughput)
+		ret = local->ops->get_expected_throughput(sta);
+	trace_drv_return_u32(local, ret);
+
+	return ret;
+}
+
 #endif /* __MAC80211_DRIVER_OPS */

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c150b68..15702ff 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c

@@ -31,6 +31,18 @@
 	}
 }
 
+static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa,
+				  struct ieee80211_ht_cap *ht_capa_mask,
+				  struct ieee80211_sta_ht_cap *ht_cap,
+				  u16 flag)
+{
+	__le16 le_flag = cpu_to_le16(flag);
+
+	if ((ht_capa_mask->cap_info & le_flag) &&
+	    (ht_capa->cap_info & le_flag))
+		ht_cap->cap |= flag;
+}
+
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap)
 {
@@ -59,7 +71,7 @@
 	smask = (u8 *)(&ht_capa_mask->mcs.rx_mask);
 
 	/* NOTE:  If you add more over-rides here, update register_hw
-	 * ht_capa_mod_msk logic in main.c as well.
+	 * ht_capa_mod_mask logic in main.c as well.
 	 * And, if this method can ever change ht_cap.ht_supported, fix
 	 * the check in ieee80211_add_ht_ie.
 	 */
@@ -86,6 +98,14 @@
 	__check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
 			      IEEE80211_HT_CAP_MAX_AMSDU);
 
+	/* Allow user to disable LDPC */
+	__check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+			      IEEE80211_HT_CAP_LDPC_CODING);
+
+	/* Allow user to enable 40 MHz intolerant bit. */
+	__check_htcap_enable(ht_capa, ht_capa_mask, ht_cap,
+			     IEEE80211_HT_CAP_40MHZ_INTOLERANT);
+
 	/* Allow user to decrease AMPDU factor */
 	if (ht_capa_mask->ampdu_params_info &
 	    IEEE80211_HT_AMPDU_PARM_FACTOR) {

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 06d2878..18ee0a2 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c

@@ -143,7 +143,7 @@
 		*pos++ = csa_settings->block_tx ? 1 : 0;
 		*pos++ = ieee80211_frequency_to_channel(
 				csa_settings->chandef.chan->center_freq);
-		sdata->csa_counter_offset_beacon = (pos - presp->head);
+		sdata->csa_counter_offset_beacon[0] = (pos - presp->head);
 		*pos++ = csa_settings->count;
 	}
 
@@ -228,7 +228,7 @@
 	struct beacon_data *presp;
 	enum nl80211_bss_scan_width scan_width;
 	bool have_higher_than_11mbit;
-	bool radar_required = false;
+	bool radar_required;
 	int err;
 
 	sdata_assert_lock(sdata);
@@ -253,7 +253,7 @@
 
 	presp = rcu_dereference_protected(ifibss->presp,
 					  lockdep_is_held(&sdata->wdev.mtx));
-	rcu_assign_pointer(ifibss->presp, NULL);
+	RCU_INIT_POINTER(ifibss->presp, NULL);
 	if (presp)
 		kfree_rcu(presp, rcu_head);
 
@@ -262,7 +262,8 @@
 	/* make a copy of the chandef, it could be modified below. */
 	chandef = *req_chandef;
 	chan = chandef.chan;
-	if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+	if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+				     NL80211_IFTYPE_ADHOC)) {
 		if (chandef.width == NL80211_CHAN_WIDTH_5 ||
 		    chandef.width == NL80211_CHAN_WIDTH_10 ||
 		    chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
@@ -274,7 +275,8 @@
 		chandef.width = NL80211_CHAN_WIDTH_20;
 		chandef.center_freq1 = chan->center_freq;
 		/* check again for downgraded chandef */
-		if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+		if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+					     NL80211_IFTYPE_ADHOC)) {
 			sdata_info(sdata,
 				   "Failed to join IBSS, beacons forbidden\n");
 			return;
@@ -282,21 +284,20 @@
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &chandef);
+					    &chandef, NL80211_IFTYPE_ADHOC);
 	if (err < 0) {
 		sdata_info(sdata,
 			   "Failed to join IBSS, invalid chandef\n");
 		return;
 	}
-	if (err > 0) {
-		if (!ifibss->userspace_handles_dfs) {
-			sdata_info(sdata,
-				   "Failed to join IBSS, DFS channel without control program\n");
-			return;
-		}
-		radar_required = true;
+	if (err > 0 && !ifibss->userspace_handles_dfs) {
+		sdata_info(sdata,
+			   "Failed to join IBSS, DFS channel without control program\n");
+		return;
 	}
 
+	radar_required = err;
+
 	mutex_lock(&local->mtx);
 	if (ieee80211_vif_use_channel(sdata, &chandef,
 				      ifibss->fixed_channel ?
@@ -775,7 +776,8 @@
 	 * unavailable.
 	 */
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &ifibss->chandef);
+					    &ifibss->chandef,
+					    NL80211_IFTYPE_ADHOC);
 	if (err > 0)
 		cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef,
 				     GFP_ATOMIC);
@@ -861,7 +863,8 @@
 		goto disconnect;
 	}
 
-	if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef)) {
+	if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef,
+				     NL80211_IFTYPE_ADHOC)) {
 		sdata_info(sdata,
 			   "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
 			   ifibss->bssid,
@@ -873,17 +876,17 @@
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &params.chandef);
+					    &params.chandef,
+					    NL80211_IFTYPE_ADHOC);
 	if (err < 0)
 		goto disconnect;
-	if (err) {
+	if (err > 0 && !ifibss->userspace_handles_dfs) {
 		/* IBSS-DFS only allowed with a control program */
-		if (!ifibss->userspace_handles_dfs)
-			goto disconnect;
-
-		params.radar_required = true;
+		goto disconnect;
 	}
 
+	params.radar_required = err;
+
 	if (cfg80211_chandef_identical(&params.chandef,
 				       &sdata->vif.bss_conf.chandef)) {
 		ibss_dbg(sdata,
@@ -1636,7 +1639,33 @@
 	u32 changed = 0;
 	u32 rate_flags;
 	struct ieee80211_supported_band *sband;
+	enum ieee80211_chanctx_mode chanmode;
+	struct ieee80211_local *local = sdata->local;
+	int radar_detect_width = 0;
 	int i;
+	int ret;
+
+	ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+					    &params->chandef,
+					    sdata->wdev.iftype);
+	if (ret < 0)
+		return ret;
+
+	if (ret > 0) {
+		if (!params->userspace_handles_dfs)
+			return -EINVAL;
+		radar_detect_width = BIT(params->chandef.width);
+	}
+
+	chanmode = (params->channel_fixed && !ret) ?
+		IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE;
+
+	mutex_lock(&local->chanctx_mtx);
+	ret = ieee80211_check_combinations(sdata, &params->chandef, chanmode,
+					   radar_detect_width);
+	mutex_unlock(&local->chanctx_mtx);
+	if (ret < 0)
+		return ret;
 
 	if (params->bssid) {
 		memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
@@ -1648,10 +1677,11 @@
 	sdata->u.ibss.control_port = params->control_port;
 	sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs;
 	sdata->u.ibss.basic_rates = params->basic_rates;
+	sdata->u.ibss.last_scan_completed = jiffies;
 
 	/* fix basic_rates if channel does not support these rates */
 	rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
-	sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band];
+	sband = local->hw.wiphy->bands[params->chandef.chan->band];
 	for (i = 0; i < sband->n_bitrates; i++) {
 		if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
 			sdata->u.ibss.basic_rates &= ~BIT(i);
@@ -1700,9 +1730,9 @@
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
-	sdata->needed_rx_chains = sdata->local->rx_chains;
+	sdata->needed_rx_chains = local->rx_chains;
 
-	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 
 	return 0;
 }

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f169b6e..ac9836e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h

@@ -260,7 +260,7 @@
 
 	/* to be used after channel switch. */
 	struct cfg80211_beacon_data *next_beacon;
-	struct list_head vlans;
+	struct list_head vlans; /* write-protected with RTNL and local->mtx */
 
 	struct ps_data ps;
 	atomic_t num_mcast_sta; /* number of stations receiving multicast */
@@ -276,7 +276,7 @@
 };
 
 struct ieee80211_if_vlan {
-	struct list_head list;
+	struct list_head list; /* write-protected with RTNL and local->mtx */
 
 	/* used for all tx if the VLAN is configured to 4-addr mode */
 	struct sta_info __rcu *sta;
@@ -692,8 +692,10 @@
 	struct list_head list;
 	struct rcu_head rcu_head;
 
+	struct list_head assigned_vifs;
+	struct list_head reserved_vifs;
+
 	enum ieee80211_chanctx_mode mode;
-	int refcount;
 	bool driver_present;
 
 	struct ieee80211_chanctx_conf conf;
@@ -752,11 +754,21 @@
 	struct mac80211_qos_map __rcu *qos_map;
 
 	struct work_struct csa_finalize_work;
-	int csa_counter_offset_beacon;
-	int csa_counter_offset_presp;
+	u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM];
+	u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM];
 	bool csa_radar_required;
+	bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
 	struct cfg80211_chan_def csa_chandef;
 
+	struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
+	struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
+
+	/* context reservation -- protected with chanctx_mtx */
+	struct ieee80211_chanctx *reserved_chanctx;
+	struct cfg80211_chan_def reserved_chandef;
+	bool reserved_radar_required;
+	u8 csa_current_counter;
+
 	/* used to reconfigure hardware SM PS */
 	struct work_struct recalc_smps;
 
@@ -1449,6 +1461,7 @@
 int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 				       struct cfg80211_sched_scan_request *req);
 int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sched_scan_end(struct ieee80211_local *local);
 void ieee80211_sched_scan_stopped_work(struct work_struct *work);
 
 /* off-channel helpers */
@@ -1463,6 +1476,7 @@
 void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
 
 /* channel switch handling */
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local);
 void ieee80211_csa_finalize_work(struct work_struct *work);
 int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 			     struct cfg80211_csa_settings *params);
@@ -1772,6 +1786,16 @@
 			  const struct cfg80211_chan_def *chandef,
 			  enum ieee80211_chanctx_mode mode);
 int __must_check
+ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
+			      const struct cfg80211_chan_def *chandef,
+			      enum ieee80211_chanctx_mode mode,
+			      bool radar_required);
+int __must_check
+ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
+				   u32 *changed);
+int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata);
+
+int __must_check
 ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
 			       const struct cfg80211_chan_def *chandef,
 			       u32 *changed);
@@ -1783,6 +1807,8 @@
 void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
 void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
 					 bool clear);
+int ieee80211_chanctx_refcount(struct ieee80211_local *local,
+			       struct ieee80211_chanctx *ctx);
 
 void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
 				   struct ieee80211_chanctx *chanctx);
@@ -1806,6 +1832,20 @@
 			  enum nl80211_iftype iftype);
 void ieee80211_recalc_dtim(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata);
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+				 const struct cfg80211_chan_def *chandef,
+				 enum ieee80211_chanctx_mode chanmode,
+				 u8 radar_detect);
+int ieee80211_max_num_channels(struct ieee80211_local *local);
+
+/* TDLS */
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+			const u8 *peer, u8 action_code, u8 dialog_token,
+			u16 status_code, u32 peer_capability,
+			const u8 *extra_ies, size_t extra_ies_len);
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+			const u8 *peer, enum nl80211_tdls_operation oper);
+
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b8d331e..388b863 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c

@@ -250,6 +250,7 @@
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sub_if_data *nsdata;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -300,7 +301,10 @@
 		}
 	}
 
-	return 0;
+	mutex_lock(&local->chanctx_mtx);
+	ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+	mutex_unlock(&local->chanctx_mtx);
+	return ret;
 }
 
 static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
@@ -395,6 +399,7 @@
 	sdata->vif.type = NL80211_IFTYPE_MONITOR;
 	snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
 		 wiphy_name(local->hw.wiphy));
+	sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
 
 	sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
 
@@ -423,7 +428,7 @@
 	mutex_unlock(&local->mtx);
 	if (ret) {
 		mutex_lock(&local->iflist_mtx);
-		rcu_assign_pointer(local->monitor_sdata, NULL);
+		RCU_INIT_POINTER(local->monitor_sdata, NULL);
 		mutex_unlock(&local->iflist_mtx);
 		synchronize_net();
 		drv_remove_interface(local, sdata);
@@ -452,7 +457,7 @@
 		return;
 	}
 
-	rcu_assign_pointer(local->monitor_sdata, NULL);
+	RCU_INIT_POINTER(local->monitor_sdata, NULL);
 	mutex_unlock(&local->iflist_mtx);
 
 	synchronize_net();
@@ -492,7 +497,9 @@
 		if (!sdata->bss)
 			return -ENOLINK;
 
+		mutex_lock(&local->mtx);
 		list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
+		mutex_unlock(&local->mtx);
 
 		master = container_of(sdata->bss,
 				      struct ieee80211_sub_if_data, u.ap);
@@ -722,8 +729,11 @@
 		drv_stop(local);
  err_del_bss:
 	sdata->bss = NULL;
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+		mutex_lock(&local->mtx);
 		list_del(&sdata->u.vlan.list);
+		mutex_unlock(&local->mtx);
+	}
 	/* might already be clear but that doesn't matter */
 	clear_bit(SDATA_STATE_RUNNING, &sdata->state);
 	return res;
@@ -829,8 +839,15 @@
 
 	cancel_work_sync(&sdata->recalc_smps);
 	sdata_lock(sdata);
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 	sdata_unlock(sdata);
+
 	cancel_work_sync(&sdata->csa_finalize_work);
 
 	cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -875,8 +892,10 @@
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP_VLAN:
+		mutex_lock(&local->mtx);
 		list_del(&sdata->u.vlan.list);
-		rcu_assign_pointer(sdata->vif.chanctx_conf, NULL);
+		mutex_unlock(&local->mtx);
+		RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL);
 		/* no need to tell driver */
 		break;
 	case NL80211_IFTYPE_MONITOR:
@@ -895,7 +914,7 @@
 		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/* relies on synchronize_rcu() below */
-		rcu_assign_pointer(local->p2p_sdata, NULL);
+		RCU_INIT_POINTER(local->p2p_sdata, NULL);
 		/* fall through */
 	default:
 		cancel_work_sync(&sdata->work);
@@ -1267,6 +1286,7 @@
 	sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
 	sdata->control_port_no_encrypt = false;
 	sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+	sdata->vif.bss_conf.idle = true;
 
 	sdata->noack_map = 0;
 
@@ -1280,6 +1300,8 @@
 	INIT_WORK(&sdata->work, ieee80211_iface_work);
 	INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
 	INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
+	INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
+	INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
 
 	switch (type) {
 	case NL80211_IFTYPE_P2P_GO:
@@ -1758,7 +1780,6 @@
 	}
 	mutex_unlock(&local->iflist_mtx);
 	unregister_netdevice_many(&unreg_list);
-	list_del(&unreg_list);
 
 	list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
 		list_del(&sdata->list);
@@ -1774,20 +1795,19 @@
 	struct ieee80211_sub_if_data *sdata;
 
 	if (state != NETDEV_CHANGENAME)
-		return 0;
+		return NOTIFY_DONE;
 
 	if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
-		return 0;
+		return NOTIFY_DONE;
 
 	if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
-		return 0;
+		return NOTIFY_DONE;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	memcpy(sdata->name, dev->name, IFNAMSIZ);
-
 	ieee80211_debugfs_rename_netdev(sdata);
-	return 0;
+
+	return NOTIFY_OK;
 }
 
 static struct notifier_block mac80211_netdev_notifier = {

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 6ff65a1..16d97f0 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c

@@ -325,7 +325,8 @@
 	struct ieee80211_key *key;
 	int i, j, err;
 
-	BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
+	if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS))
+		return ERR_PTR(-EINVAL);
 
 	key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
 	if (!key)
@@ -481,8 +482,8 @@
 	int idx, ret;
 	bool pairwise;
 
-	BUG_ON(!sdata);
-	BUG_ON(!key);
+	if (WARN_ON(!sdata || !key))
+		return -EINVAL;
 
 	pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
 	idx = key->conf.keyidx;

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4c1bf61..d17c26d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c

@@ -340,7 +340,7 @@
 
 	sdata_unlock(sdata);
 
-	return NOTIFY_DONE;
+	return NOTIFY_OK;
 }
 #endif
 
@@ -371,7 +371,7 @@
 
 	drv_ipv6_addr_change(local, sdata, idev);
 
-	return NOTIFY_DONE;
+	return NOTIFY_OK;
 }
 #endif
 
@@ -446,7 +446,9 @@
 	.cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
 				IEEE80211_HT_CAP_MAX_AMSDU |
 				IEEE80211_HT_CAP_SGI_20 |
-				IEEE80211_HT_CAP_SGI_40),
+				IEEE80211_HT_CAP_SGI_40 |
+				IEEE80211_HT_CAP_LDPC_CODING |
+				IEEE80211_HT_CAP_40MHZ_INTOLERANT),
 	.mcs = {
 		.rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff,
 			     0xff, 0xff, 0xff, 0xff, 0xff, },
@@ -954,6 +956,8 @@
 	if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
 		local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
 
+	local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		goto fail_wiphy_register;

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f70e9cd..6495a3f 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c

@@ -366,20 +366,15 @@
 		return 0;
 
 	/* find RSN IE */
-	data = ifmsh->ie;
-	while (data < ifmsh->ie + ifmsh->ie_len) {
-		if (*data == WLAN_EID_RSN) {
-			len = data[1] + 2;
-			break;
-		}
-		data++;
-	}
+	data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len);
+	if (!data)
+		return 0;
 
-	if (len) {
-		if (skb_tailroom(skb) < len)
-			return -ENOMEM;
-		memcpy(skb_put(skb, len), data, len);
-	}
+	len = data[1] + 2;
+
+	if (skb_tailroom(skb) < len)
+		return -ENOMEM;
+	memcpy(skb_put(skb, len), data, len);
 
 	return 0;
 }
@@ -684,7 +679,7 @@
 		*pos++ = 0x0;
 		*pos++ = ieee80211_frequency_to_channel(
 				csa->settings.chandef.chan->center_freq);
-		sdata->csa_counter_offset_beacon = hdr_len + 6;
+		sdata->csa_counter_offset_beacon[0] = hdr_len + 6;
 		*pos++ = csa->settings.count;
 		*pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
 		*pos++ = 6;
@@ -829,7 +824,7 @@
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
 	bcn = rcu_dereference_protected(ifmsh->beacon,
 					lockdep_is_held(&sdata->wdev.mtx));
-	rcu_assign_pointer(ifmsh->beacon, NULL);
+	RCU_INIT_POINTER(ifmsh->beacon, NULL);
 	kfree_rcu(bcn, rcu_head);
 
 	/* flush STAs and mpaths on this iface */
@@ -903,14 +898,15 @@
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &params.chandef);
+					    &params.chandef,
+					    NL80211_IFTYPE_MESH_POINT);
 	if (err < 0)
 		return false;
-	if (err) {
-		params.radar_required = true;
+	if (err > 0)
 		/* TODO: DFS not (yet) supported */
 		return false;
-	}
+
+	params.radar_required = err;
 
 	if (cfg80211_chandef_identical(&params.chandef,
 				       &sdata->vif.bss_conf.chandef)) {
@@ -1068,7 +1064,7 @@
 
 	/* Remove the CSA and MCSP elements from the beacon */
 	tmp_csa_settings = rcu_dereference(ifmsh->csa);
-	rcu_assign_pointer(ifmsh->csa, NULL);
+	RCU_INIT_POINTER(ifmsh->csa, NULL);
 	if (tmp_csa_settings)
 		kfree_rcu(tmp_csa_settings, rcu_head);
 	ret = ieee80211_mesh_rebuild_beacon(sdata);
@@ -1102,7 +1098,7 @@
 	ret = ieee80211_mesh_rebuild_beacon(sdata);
 	if (ret) {
 		tmp_csa_settings = rcu_dereference(ifmsh->csa);
-		rcu_assign_pointer(ifmsh->csa, NULL);
+		RCU_INIT_POINTER(ifmsh->csa, NULL);
 		kfree_rcu(tmp_csa_settings, rcu_head);
 		return ret;
 	}

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index f951468..94758b9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c

@@ -37,7 +37,7 @@
 	return get_unaligned_le32(preq_elem + offset);
 }
 
-static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae)
+static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
@@ -544,9 +544,10 @@
 		if (time_after(jiffies, ifmsh->last_sn_update +
 					net_traversal_jiffies(sdata)) ||
 		    time_before(jiffies, ifmsh->last_sn_update)) {
-			target_sn = ++ifmsh->sn;
+			++ifmsh->sn;
 			ifmsh->last_sn_update = jiffies;
 		}
+		target_sn = ifmsh->sn;
 	} else if (is_broadcast_ether_addr(target_addr) &&
 		   (target_flags & IEEE80211_PREQ_TO_FLAG)) {
 		rcu_read_lock();

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 7d050ed..cf032a8 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c

@@ -287,8 +287,10 @@
 	struct sk_buff_head failq;
 	unsigned long flags;
 
-	BUG_ON(gate_mpath == from_mpath);
-	BUG_ON(!gate_mpath->next_hop);
+	if (WARN_ON(gate_mpath == from_mpath))
+		return;
+	if (WARN_ON(!gate_mpath->next_hop))
+		return;
 
 	__skb_queue_head_init(&failq);
 

diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 2bc5dc2..09625d6 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c

@@ -171,7 +171,7 @@
 	u8 cap;
 
 	WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
-	BUG_ON(!rcu_read_lock_held());
+	WARN_ON(!rcu_read_lock_held());
 	cap = beacon->meshconf->meshconf_cap;
 
 	spin_lock_bh(&ifmsh->sync_offset_lock);

diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 3b848da..0e4886f 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h

@@ -11,6 +11,7 @@
 #define MICHAEL_H
 
 #include <linux/types.h>
+#include <linux/ieee80211.h>
 
 #define MICHAEL_MIC_LEN 8
 

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 27600a9..3345401 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c

@@ -975,16 +975,23 @@
 	/* XXX: shouldn't really modify cfg80211-owned data! */
 	ifmgd->associated->channel = sdata->csa_chandef.chan;
 
-	/* XXX: wait for a beacon first? */
-	ieee80211_wake_queues_by_reason(&local->hw,
-					IEEE80211_MAX_QUEUE_MAP,
-					IEEE80211_QUEUE_STOP_REASON_CSA);
-
 	ieee80211_bss_info_change_notify(sdata, changed);
 
- out:
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
+	/* XXX: wait for a beacon first? */
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
+
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+	ieee80211_sta_reset_beacon_monitor(sdata);
+	ieee80211_sta_reset_conn_monitor(sdata);
+
+out:
 	sdata_unlock(sdata);
 }
 
@@ -1089,7 +1096,7 @@
 	}
 	chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
 			       struct ieee80211_chanctx, conf);
-	if (chanctx->refcount > 1) {
+	if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
 		sdata_info(sdata,
 			   "channel switch with multiple interfaces on the same channel, disconnecting\n");
 		ieee80211_queue_work(&local->hw,
@@ -1100,12 +1107,16 @@
 	mutex_unlock(&local->chanctx_mtx);
 
 	sdata->csa_chandef = csa_ie.chandef;
-	sdata->vif.csa_active = true;
 
-	if (csa_ie.mode)
+	mutex_lock(&local->mtx);
+	sdata->vif.csa_active = true;
+	sdata->csa_block_tx = csa_ie.mode;
+
+	if (sdata->csa_block_tx)
 		ieee80211_stop_queues_by_reason(&local->hw,
-				IEEE80211_MAX_QUEUE_MAP,
-				IEEE80211_QUEUE_STOP_REASON_CSA);
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 
 	if (local->ops->channel_switch) {
 		/* use driver's channel switch callback */
@@ -1817,6 +1828,12 @@
 	ifmgd->flags = 0;
 	mutex_lock(&local->mtx);
 	ieee80211_vif_release_channel(sdata);
+
+	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
 	mutex_unlock(&local->mtx);
 
 	sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -2045,6 +2062,7 @@
 
 static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
@@ -2058,10 +2076,14 @@
 			       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
 			       true, frame_buf);
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 
 	cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
 			      IEEE80211_DEAUTH_FRAME_LEN);
@@ -3546,6 +3568,9 @@
 	if (local->quiescing)
 		return;
 
+	if (sdata->vif.csa_active)
+		return;
+
 	sdata->u.mgd.connection_loss = false;
 	ieee80211_queue_work(&sdata->local->hw,
 			     &sdata->u.mgd.beacon_connection_loss_work);
@@ -3561,6 +3586,9 @@
 	if (local->quiescing)
 		return;
 
+	if (sdata->vif.csa_active)
+		return;
+
 	ieee80211_queue_work(&local->hw, &ifmgd->monitor_work);
 }
 
@@ -3707,7 +3735,7 @@
 	ieee80211_recalc_ps(local, latency_usec);
 	mutex_unlock(&local->iflist_mtx);
 
-	return 0;
+	return NOTIFY_OK;
 }
 
 static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 26fd94f..1c1469c 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c

@@ -657,6 +657,17 @@
 	kfree(priv);
 }
 
+static u32 minstrel_get_expected_throughput(void *priv_sta)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+	int idx = mi->max_tp_rate[0];
+
+	/* convert pkt per sec in kbps (1200 is the average pkt size used for
+	 * computing cur_tp
+	 */
+	return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024;
+}
+
 const struct rate_control_ops mac80211_minstrel = {
 	.name = "minstrel",
 	.tx_status = minstrel_tx_status,
@@ -670,6 +681,7 @@
 	.add_sta_debugfs = minstrel_add_sta_debugfs,
 	.remove_sta_debugfs = minstrel_remove_sta_debugfs,
 #endif
+	.get_expected_throughput = minstrel_get_expected_throughput,
 };
 
 int __init

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index bccaf85..85c1e74 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c

@@ -22,7 +22,7 @@
 #define MCS_NBITS (AVG_PKT_SIZE << 3)
 
 /* Number of symbols for a packet with (bps) bits per symbol */
-#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
+#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps))
 
 /* Transmission time (nanoseconds) for a packet containing (syms) symbols */
 #define MCS_SYMBOL_TIME(sgi, syms)					\
@@ -226,8 +226,9 @@
 		nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
 
 	nsecs += minstrel_mcs_groups[group].duration[rate];
-	tp = 1000000 * ((prob * 1000) / nsecs);
 
+	/* prob is scaled - see MINSTREL_FRAC above */
+	tp = 1000000 * ((prob * 1000) / nsecs);
 	mr->cur_tp = MINSTREL_TRUNC(tp);
 }
 
@@ -1031,6 +1032,22 @@
 	mac80211_minstrel.free(priv);
 }
 
+static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	int i, j;
+
+	if (!msp->is_ht)
+		return mac80211_minstrel.get_expected_throughput(priv_sta);
+
+	i = mi->max_tp_rate / MCS_GROUP_RATES;
+	j = mi->max_tp_rate % MCS_GROUP_RATES;
+
+	/* convert cur_tp from pkt per second in kbps */
+	return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
+}
+
 static const struct rate_control_ops mac80211_minstrel_ht = {
 	.name = "minstrel_ht",
 	.tx_status = minstrel_ht_tx_status,
@@ -1045,6 +1062,7 @@
 	.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
 	.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
 #endif
+	.get_expected_throughput = minstrel_ht_get_expected_throughput,
 };
 
 

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2b608b2..394e201 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c

@@ -54,24 +54,25 @@
 	return skb;
 }
 
-static inline int should_drop_frame(struct sk_buff *skb, int present_fcs_len)
+static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len)
 {
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-	struct ieee80211_hdr *hdr;
-
-	hdr = (void *)(skb->data);
+	struct ieee80211_hdr *hdr = (void *)skb->data;
 
 	if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
 			    RX_FLAG_FAILED_PLCP_CRC |
 			    RX_FLAG_AMPDU_IS_ZEROLEN))
-		return 1;
+		return true;
+
 	if (unlikely(skb->len < 16 + present_fcs_len))
-		return 1;
+		return true;
+
 	if (ieee80211_is_ctl(hdr->frame_control) &&
 	    !ieee80211_is_pspoll(hdr->frame_control) &&
 	    !ieee80211_is_back_req(hdr->frame_control))
-		return 1;
-	return 0;
+		return true;
+
+	return false;
 }
 
 static int
@@ -3191,7 +3192,7 @@
 }
 
 /*
- * This is the actual Rx frames handler. as it blongs to Rx path it must
+ * This is the actual Rx frames handler. as it belongs to Rx path it must
  * be called with rcu_read_lock protection.
  */
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 3ce7f2c..f40661e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c

@@ -309,7 +309,7 @@
 	if (local->scan_req != local->int_scan_req)
 		cfg80211_scan_done(local->scan_req, aborted);
 	local->scan_req = NULL;
-	rcu_assign_pointer(local->scan_sdata, NULL);
+	RCU_INIT_POINTER(local->scan_sdata, NULL);
 
 	local->scanning = 0;
 	local->scan_chandef.chan = NULL;
@@ -559,7 +559,7 @@
 		ieee80211_recalc_idle(local);
 
 		local->scan_req = NULL;
-		rcu_assign_pointer(local->scan_sdata, NULL);
+		RCU_INIT_POINTER(local->scan_sdata, NULL);
 	}
 
 	return rc;
@@ -773,7 +773,7 @@
 		int rc;
 
 		local->scan_req = NULL;
-		rcu_assign_pointer(local->scan_sdata, NULL);
+		RCU_INIT_POINTER(local->scan_sdata, NULL);
 
 		rc = __ieee80211_start_scan(sdata, req);
 		if (rc) {
@@ -1014,7 +1014,7 @@
 
 	if (ret) {
 		/* Clean in case of failure after HW restart or upon resume. */
-		rcu_assign_pointer(local->sched_scan_sdata, NULL);
+		RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
 		local->sched_scan_req = NULL;
 	}
 
@@ -1076,12 +1076,8 @@
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_results);
 
-void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+void ieee80211_sched_scan_end(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local =
-		container_of(work, struct ieee80211_local,
-			     sched_scan_stopped_work);
-
 	mutex_lock(&local->mtx);
 
 	if (!rcu_access_pointer(local->sched_scan_sdata)) {
@@ -1089,7 +1085,7 @@
 		return;
 	}
 
-	rcu_assign_pointer(local->sched_scan_sdata, NULL);
+	RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
 
 	/* If sched scan was aborted by the driver. */
 	local->sched_scan_req = NULL;
@@ -1099,6 +1095,15 @@
 	cfg80211_sched_scan_stopped(local->hw.wiphy);
 }
 
+void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local,
+			     sched_scan_stopped_work);
+
+	ieee80211_sched_scan_end(local);
+}
+
 void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 847d92f..a9b46d8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -240,6 +240,7 @@
 
 	sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
 
+	kfree(rcu_dereference_raw(sta->sta.rates));
 	kfree(sta);
 }
 
@@ -552,7 +553,7 @@
 int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
 {
 	struct ieee80211_local *local = sta->local;
-	int err = 0;
+	int err;
 
 	might_sleep();
 
@@ -570,7 +571,6 @@
 
 	return 0;
  out_free:
-	BUG_ON(!err);
 	sta_info_free(local, sta);
 	return err;
 }

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 60cb7a6..ba29ebc 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c

@@ -541,6 +541,23 @@
  */
 #define STA_LOST_PKT_THRESHOLD	50
 
+static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+	/* This packet was aggregated but doesn't carry status info */
+	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
+	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
+		return;
+
+	if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+		return;
+
+	cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
+				    sta->lost_packets, GFP_ATOMIC);
+	sta->lost_packets = 0;
+}
+
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
@@ -680,12 +697,8 @@
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
 				if (sta->lost_packets)
 					sta->lost_packets = 0;
-			} else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) {
-				cfg80211_cqm_pktloss_notify(sta->sdata->dev,
-							    sta->sta.addr,
-							    sta->lost_packets,
-							    GFP_ATOMIC);
-				sta->lost_packets = 0;
+			} else {
+				ieee80211_lost_packet(sta, skb);
 			}
 		}
 

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
new file mode 100644
index 0000000..652813b
--- /dev/null
+++ b/net/mac80211/tdls.c

@@ -0,0 +1,325 @@
+/*
+ * mac80211 TDLS handling code
+ *
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2014, Intel Corporation
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/ieee80211.h>
+#include "ieee80211_i.h"
+
+static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
+{
+	u8 *pos = (void *)skb_put(skb, 7);
+
+	*pos++ = WLAN_EID_EXT_CAPABILITY;
+	*pos++ = 5; /* len */
+	*pos++ = 0x0;
+	*pos++ = 0x0;
+	*pos++ = 0x0;
+	*pos++ = 0x0;
+	*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
+}
+
+static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	u16 capab;
+
+	capab = 0;
+	if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
+		return capab;
+
+	if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+		capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+	if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+		capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+
+	return capab;
+}
+
+static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+				       const u8 *peer, const u8 *bssid)
+{
+	struct ieee80211_tdls_lnkie *lnkid;
+
+	lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
+
+	lnkid->ie_type = WLAN_EID_LINK_ID;
+	lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
+
+	memcpy(lnkid->bssid, bssid, ETH_ALEN);
+	memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
+	memcpy(lnkid->resp_sta, peer, ETH_ALEN);
+}
+
+static int
+ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
+			       const u8 *peer, u8 action_code, u8 dialog_token,
+			       u16 status_code, struct sk_buff *skb)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+	struct ieee80211_tdls_data *tf;
+
+	tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
+
+	memcpy(tf->da, peer, ETH_ALEN);
+	memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
+	tf->ether_type = cpu_to_be16(ETH_P_TDLS);
+	tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
+
+	switch (action_code) {
+	case WLAN_TDLS_SETUP_REQUEST:
+		tf->category = WLAN_CATEGORY_TDLS;
+		tf->action_code = WLAN_TDLS_SETUP_REQUEST;
+
+		skb_put(skb, sizeof(tf->u.setup_req));
+		tf->u.setup_req.dialog_token = dialog_token;
+		tf->u.setup_req.capability =
+			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+		ieee80211_add_srates_ie(sdata, skb, false, band);
+		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+		ieee80211_tdls_add_ext_capab(skb);
+		break;
+	case WLAN_TDLS_SETUP_RESPONSE:
+		tf->category = WLAN_CATEGORY_TDLS;
+		tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
+
+		skb_put(skb, sizeof(tf->u.setup_resp));
+		tf->u.setup_resp.status_code = cpu_to_le16(status_code);
+		tf->u.setup_resp.dialog_token = dialog_token;
+		tf->u.setup_resp.capability =
+			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+		ieee80211_add_srates_ie(sdata, skb, false, band);
+		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+		ieee80211_tdls_add_ext_capab(skb);
+		break;
+	case WLAN_TDLS_SETUP_CONFIRM:
+		tf->category = WLAN_CATEGORY_TDLS;
+		tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
+
+		skb_put(skb, sizeof(tf->u.setup_cfm));
+		tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
+		tf->u.setup_cfm.dialog_token = dialog_token;
+		break;
+	case WLAN_TDLS_TEARDOWN:
+		tf->category = WLAN_CATEGORY_TDLS;
+		tf->action_code = WLAN_TDLS_TEARDOWN;
+
+		skb_put(skb, sizeof(tf->u.teardown));
+		tf->u.teardown.reason_code = cpu_to_le16(status_code);
+		break;
+	case WLAN_TDLS_DISCOVERY_REQUEST:
+		tf->category = WLAN_CATEGORY_TDLS;
+		tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
+
+		skb_put(skb, sizeof(tf->u.discover_req));
+		tf->u.discover_req.dialog_token = dialog_token;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
+			   const u8 *peer, u8 action_code, u8 dialog_token,
+			   u16 status_code, struct sk_buff *skb)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+	struct ieee80211_mgmt *mgmt;
+
+	mgmt = (void *)skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, peer, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+	memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+
+	switch (action_code) {
+	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+		skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
+		mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
+		mgmt->u.action.u.tdls_discover_resp.action_code =
+			WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
+		mgmt->u.action.u.tdls_discover_resp.dialog_token =
+			dialog_token;
+		mgmt->u.action.u.tdls_discover_resp.capability =
+			cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+		ieee80211_add_srates_ie(sdata, skb, false, band);
+		ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+		ieee80211_tdls_add_ext_capab(skb);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+			const u8 *peer, u8 action_code, u8 dialog_token,
+			u16 status_code, u32 peer_capability,
+			const u8 *extra_ies, size_t extra_ies_len)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb = NULL;
+	bool send_direct;
+	int ret;
+
+	if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+		return -ENOTSUPP;
+
+	/* make sure we are in managed mode, and associated */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION ||
+	    !sdata->u.mgd.associated)
+		return -EINVAL;
+
+	tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
+		 action_code, peer);
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    max(sizeof(struct ieee80211_mgmt),
+				sizeof(struct ieee80211_tdls_data)) +
+			    50 + /* supported rates */
+			    7 + /* ext capab */
+			    extra_ies_len +
+			    sizeof(struct ieee80211_tdls_lnkie));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	switch (action_code) {
+	case WLAN_TDLS_SETUP_REQUEST:
+	case WLAN_TDLS_SETUP_RESPONSE:
+	case WLAN_TDLS_SETUP_CONFIRM:
+	case WLAN_TDLS_TEARDOWN:
+	case WLAN_TDLS_DISCOVERY_REQUEST:
+		ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
+						     action_code, dialog_token,
+						     status_code, skb);
+		send_direct = false;
+		break;
+	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+		ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
+						 dialog_token, status_code,
+						 skb);
+		send_direct = true;
+		break;
+	default:
+		ret = -ENOTSUPP;
+		break;
+	}
+
+	if (ret < 0)
+		goto fail;
+
+	if (extra_ies_len)
+		memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+
+	/* the TDLS link IE is always added last */
+	switch (action_code) {
+	case WLAN_TDLS_SETUP_REQUEST:
+	case WLAN_TDLS_SETUP_CONFIRM:
+	case WLAN_TDLS_TEARDOWN:
+	case WLAN_TDLS_DISCOVERY_REQUEST:
+		/* we are the initiator */
+		ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
+					   sdata->u.mgd.bssid);
+		break;
+	case WLAN_TDLS_SETUP_RESPONSE:
+	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+		/* we are the responder */
+		ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
+					   sdata->u.mgd.bssid);
+		break;
+	default:
+		ret = -ENOTSUPP;
+		goto fail;
+	}
+
+	if (send_direct) {
+		ieee80211_tx_skb(sdata, skb);
+		return 0;
+	}
+
+	/*
+	 * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
+	 * we should default to AC_VI.
+	 */
+	switch (action_code) {
+	case WLAN_TDLS_SETUP_REQUEST:
+	case WLAN_TDLS_SETUP_RESPONSE:
+		skb_set_queue_mapping(skb, IEEE80211_AC_BK);
+		skb->priority = 2;
+		break;
+	default:
+		skb_set_queue_mapping(skb, IEEE80211_AC_VI);
+		skb->priority = 5;
+		break;
+	}
+
+	/* disable bottom halves when entering the Tx path */
+	local_bh_disable();
+	ret = ieee80211_subif_start_xmit(skb, dev);
+	local_bh_enable();
+
+	return ret;
+
+fail:
+	dev_kfree_skb(skb);
+	return ret;
+}
+
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+			const u8 *peer, enum nl80211_tdls_operation oper)
+{
+	struct sta_info *sta;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+		return -ENOTSUPP;
+
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EINVAL;
+
+	tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
+
+	switch (oper) {
+	case NL80211_TDLS_ENABLE_LINK:
+		rcu_read_lock();
+		sta = sta_info_get(sdata, peer);
+		if (!sta) {
+			rcu_read_unlock();
+			return -ENOLINK;
+		}
+
+		set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
+		rcu_read_unlock();
+		break;
+	case NL80211_TDLS_DISABLE_LINK:
+		return sta_info_destroy_addr(sdata, peer);
+	case NL80211_TDLS_TEARDOWN:
+	case NL80211_TDLS_SETUP:
+	case NL80211_TDLS_DISCOVERY_REQ:
+		/* We don't support in-driver setup/teardown/discovery */
+		return -ENOTSUPP;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}

diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index cec5b60..cfe1a06 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h

@@ -184,6 +184,20 @@
 		  "true" : "false")
 );
 
+TRACE_EVENT(drv_return_u32,
+	TP_PROTO(struct ieee80211_local *local, u32 ret),
+	TP_ARGS(local, ret),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u32, ret)
+	),
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->ret = ret;
+	),
+	TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret)
+);
+
 TRACE_EVENT(drv_return_u64,
 	TP_PROTO(struct ieee80211_local *local, u64 ret),
 	TP_ARGS(local, ret),
@@ -1375,6 +1389,91 @@
 	)
 );
 
+#if !defined(__TRACE_VIF_ENTRY)
+#define __TRACE_VIF_ENTRY
+struct trace_vif_entry {
+	enum nl80211_iftype vif_type;
+	bool p2p;
+	char vif_name[IFNAMSIZ];
+} __packed;
+
+struct trace_chandef_entry {
+	u32 control_freq;
+	u32 chan_width;
+	u32 center_freq1;
+	u32 center_freq2;
+} __packed;
+
+struct trace_switch_entry {
+	struct trace_vif_entry vif;
+	struct trace_chandef_entry old_chandef;
+	struct trace_chandef_entry new_chandef;
+} __packed;
+
+#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from
+#endif
+
+TRACE_EVENT(drv_switch_vif_chanctx,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_vif_chanctx_switch *vifs,
+		 int n_vifs, enum ieee80211_chanctx_switch_mode mode),
+	    TP_ARGS(local, vifs, n_vifs, mode),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(int, n_vifs)
+		__field(u32, mode)
+		__dynamic_array(u8, vifs,
+				sizeof(struct trace_switch_entry) * n_vifs)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->n_vifs = n_vifs;
+		__entry->mode = mode;
+		{
+			struct trace_switch_entry *local_vifs =
+				__get_dynamic_array(vifs);
+			int i;
+
+			for (i = 0; i < n_vifs; i++) {
+				struct ieee80211_sub_if_data *sdata;
+
+				sdata = container_of(vifs[i].vif,
+						struct ieee80211_sub_if_data,
+						vif);
+
+				SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type);
+				SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p);
+				strncpy(local_vifs[i].vif.vif_name,
+					sdata->name,
+					sizeof(local_vifs[i].vif.vif_name));
+				SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
+						old_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
+						    old_ctx->def.width);
+				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
+						    old_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
+						    old_ctx->def.center_freq2);
+				SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
+						new_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
+						    new_ctx->def.width);
+				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
+						    new_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
+						    new_ctx->def.center_freq2);
+			}
+		}
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " n_vifs:%d mode:%d",
+		LOCAL_PR_ARG, __entry->n_vifs, __entry->mode
+	)
+);
+
 DECLARE_EVENT_CLASS(local_sdata_chanctx,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
@@ -1499,6 +1598,24 @@
 	TP_ARGS(local, sdata)
 );
 
+TRACE_EVENT(drv_get_expected_throughput,
+	TP_PROTO(struct ieee80211_sta *sta),
+
+	TP_ARGS(sta),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+	),
+
+	TP_printk(
+		STA_PR_FMT, STA_PR_ARG
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 19d36d4..5214686 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c

@@ -2328,7 +2328,8 @@
 /* functions for drivers to get certain frames */
 
 static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
-				       struct ps_data *ps, struct sk_buff *skb)
+				       struct ps_data *ps, struct sk_buff *skb,
+				       bool is_template)
 {
 	u8 *pos, *tim;
 	int aid0 = 0;
@@ -2341,11 +2342,12 @@
 		 * checking byte-for-byte */
 		have_bits = !bitmap_empty((unsigned long *)ps->tim,
 					  IEEE80211_MAX_AID+1);
-
-	if (ps->dtim_count == 0)
-		ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
-	else
-		ps->dtim_count--;
+	if (!is_template) {
+		if (ps->dtim_count == 0)
+			ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
+		else
+			ps->dtim_count--;
+	}
 
 	tim = pos = (u8 *) skb_put(skb, 6);
 	*pos++ = WLAN_EID_TIM;
@@ -2391,7 +2393,8 @@
 }
 
 static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
-				    struct ps_data *ps, struct sk_buff *skb)
+				    struct ps_data *ps, struct sk_buff *skb,
+				    bool is_template)
 {
 	struct ieee80211_local *local = sdata->local;
 
@@ -2403,24 +2406,24 @@
 	 * of the tim bitmap in mac80211 and the driver.
 	 */
 	if (local->tim_in_locked_section) {
-		__ieee80211_beacon_add_tim(sdata, ps, skb);
+		__ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
 	} else {
 		spin_lock_bh(&local->tim_lock);
-		__ieee80211_beacon_add_tim(sdata, ps, skb);
+		__ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
 		spin_unlock_bh(&local->tim_lock);
 	}
 
 	return 0;
 }
 
-static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
-				 struct beacon_data *beacon)
+static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
+			      struct beacon_data *beacon)
 {
 	struct probe_resp *resp;
-	int counter_offset_beacon = sdata->csa_counter_offset_beacon;
-	int counter_offset_presp = sdata->csa_counter_offset_presp;
 	u8 *beacon_data;
 	size_t beacon_data_len;
+	int i;
+	u8 count = sdata->csa_current_counter;
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP:
@@ -2438,40 +2441,57 @@
 	default:
 		return;
 	}
-	if (WARN_ON(counter_offset_beacon >= beacon_data_len))
-		return;
 
-	/* Warn if the driver did not check for/react to csa
-	 * completeness.  A beacon with CSA counter set to 0 should
-	 * never occur, because a counter of 1 means switch just
-	 * before the next beacon.
-	 */
-	if (WARN_ON(beacon_data[counter_offset_beacon] == 1))
-		return;
+	for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) {
+		u16 counter_offset_beacon =
+			sdata->csa_counter_offset_beacon[i];
+		u16 counter_offset_presp = sdata->csa_counter_offset_presp[i];
 
-	beacon_data[counter_offset_beacon]--;
+		if (counter_offset_beacon) {
+			if (WARN_ON(counter_offset_beacon >= beacon_data_len))
+				return;
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) {
-		rcu_read_lock();
-		resp = rcu_dereference(sdata->u.ap.probe_resp);
-
-		/* if nl80211 accepted the offset, this should not happen. */
-		if (WARN_ON(!resp)) {
-			rcu_read_unlock();
-			return;
+			beacon_data[counter_offset_beacon] = count;
 		}
-		resp->data[counter_offset_presp]--;
-		rcu_read_unlock();
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP &&
+		    counter_offset_presp) {
+			rcu_read_lock();
+			resp = rcu_dereference(sdata->u.ap.probe_resp);
+
+			/* If nl80211 accepted the offset, this should
+			 * not happen.
+			 */
+			if (WARN_ON(!resp)) {
+				rcu_read_unlock();
+				return;
+			}
+			resp->data[counter_offset_presp] = count;
+			rcu_read_unlock();
+		}
 	}
 }
 
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	sdata->csa_current_counter--;
+
+	/* the counter should never reach 0 */
+	WARN_ON(!sdata->csa_current_counter);
+
+	return sdata->csa_current_counter;
+}
+EXPORT_SYMBOL(ieee80211_csa_update_counter);
+
 bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct beacon_data *beacon = NULL;
 	u8 *beacon_data;
 	size_t beacon_data_len;
-	int counter_beacon = sdata->csa_counter_offset_beacon;
+	int counter_beacon = sdata->csa_counter_offset_beacon[0];
 	int ret = false;
 
 	if (!ieee80211_sdata_running(sdata))
@@ -2521,9 +2541,11 @@
 }
 EXPORT_SYMBOL(ieee80211_csa_is_complete);
 
-struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
-					 struct ieee80211_vif *vif,
-					 u16 *tim_offset, u16 *tim_length)
+static struct sk_buff *
+__ieee80211_beacon_get(struct ieee80211_hw *hw,
+		       struct ieee80211_vif *vif,
+		       struct ieee80211_mutable_offsets *offs,
+		       bool is_template)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb = NULL;
@@ -2532,6 +2554,7 @@
 	enum ieee80211_band band;
 	struct ieee80211_tx_rate_control txrc;
 	struct ieee80211_chanctx_conf *chanctx_conf;
+	int csa_off_base = 0;
 
 	rcu_read_lock();
 
@@ -2541,18 +2564,20 @@
 	if (!ieee80211_sdata_running(sdata) || !chanctx_conf)
 		goto out;
 
-	if (tim_offset)
-		*tim_offset = 0;
-	if (tim_length)
-		*tim_length = 0;
+	if (offs)
+		memset(offs, 0, sizeof(*offs));
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		struct ieee80211_if_ap *ap = &sdata->u.ap;
 		struct beacon_data *beacon = rcu_dereference(ap->beacon);
 
 		if (beacon) {
-			if (sdata->vif.csa_active)
-				ieee80211_update_csa(sdata, beacon);
+			if (sdata->vif.csa_active) {
+				if (!is_template)
+					ieee80211_csa_update_counter(vif);
+
+				ieee80211_set_csa(sdata, beacon);
+			}
 
 			/*
 			 * headroom, head length,
@@ -2569,12 +2594,16 @@
 			memcpy(skb_put(skb, beacon->head_len), beacon->head,
 			       beacon->head_len);
 
-			ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
+			ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
+						 is_template);
 
-			if (tim_offset)
-				*tim_offset = beacon->head_len;
-			if (tim_length)
-				*tim_length = skb->len - beacon->head_len;
+			if (offs) {
+				offs->tim_offset = beacon->head_len;
+				offs->tim_length = skb->len - beacon->head_len;
+
+				/* for AP the csa offsets are from tail */
+				csa_off_base = skb->len;
+			}
 
 			if (beacon->tail)
 				memcpy(skb_put(skb, beacon->tail_len),
@@ -2589,9 +2618,12 @@
 		if (!presp)
 			goto out;
 
-		if (sdata->vif.csa_active)
-			ieee80211_update_csa(sdata, presp);
+		if (sdata->vif.csa_active) {
+			if (!is_template)
+				ieee80211_csa_update_counter(vif);
 
+			ieee80211_set_csa(sdata, presp);
+		}
 
 		skb = dev_alloc_skb(local->tx_headroom + presp->head_len +
 				    local->hw.extra_beacon_tailroom);
@@ -2611,8 +2643,17 @@
 		if (!bcn)
 			goto out;
 
-		if (sdata->vif.csa_active)
-			ieee80211_update_csa(sdata, bcn);
+		if (sdata->vif.csa_active) {
+			if (!is_template)
+				/* TODO: For mesh csa_counter is in TU, so
+				 * decrementing it by one isn't correct, but
+				 * for now we leave it consistent with overall
+				 * mac80211's behavior.
+				 */
+				ieee80211_csa_update_counter(vif);
+
+			ieee80211_set_csa(sdata, bcn);
+		}
 
 		if (ifmsh->sync_ops)
 			ifmsh->sync_ops->adjust_tbtt(sdata, bcn);
@@ -2626,13 +2667,33 @@
 			goto out;
 		skb_reserve(skb, local->tx_headroom);
 		memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
-		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb);
+		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
+
+		if (offs) {
+			offs->tim_offset = bcn->head_len;
+			offs->tim_length = skb->len - bcn->head_len;
+		}
+
 		memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
 	} else {
 		WARN_ON(1);
 		goto out;
 	}
 
+	/* CSA offsets */
+	if (offs) {
+		int i;
+
+		for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
+			u16 csa_off = sdata->csa_counter_offset_beacon[i];
+
+			if (!csa_off)
+				continue;
+
+			offs->csa_counter_offs[i] = csa_off_base + csa_off;
+		}
+	}
+
 	band = chanctx_conf->def.chan->band;
 
 	info = IEEE80211_SKB_CB(skb);
@@ -2663,6 +2724,32 @@
  out:
 	rcu_read_unlock();
 	return skb;
+
+}
+
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      struct ieee80211_mutable_offsets *offs)
+{
+	return __ieee80211_beacon_get(hw, vif, offs, true);
+}
+EXPORT_SYMBOL(ieee80211_beacon_get_template);
+
+struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
+					 struct ieee80211_vif *vif,
+					 u16 *tim_offset, u16 *tim_length)
+{
+	struct ieee80211_mutable_offsets offs = {};
+	struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+
+	if (tim_offset)
+		*tim_offset = offs.tim_offset;
+
+	if (tim_length)
+		*tim_length = offs.tim_length;
+
+	return bcn;
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
 

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3c36583..6886601 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c

@@ -554,7 +554,7 @@
 	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH);
 
-	drv_flush(local, queues, false);
+	drv_flush(local, sdata, queues, false);
 
 	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH);
@@ -1457,6 +1457,44 @@
 	drv_stop(local);
 }
 
+static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_chanctx *ctx;
+
+	/*
+	 * We get here if during resume the device can't be restarted properly.
+	 * We might also get here if this happens during HW reset, which is a
+	 * slightly different situation and we need to drop all connections in
+	 * the latter case.
+	 *
+	 * Ask cfg80211 to turn off all interfaces, this will result in more
+	 * warnings but at least we'll then get into a clean stopped state.
+	 */
+
+	local->resuming = false;
+	local->suspended = false;
+	local->started = false;
+
+	/* scheduled scan clearly can't be running any more, but tell
+	 * cfg80211 and clear local state
+	 */
+	ieee80211_sched_scan_end(local);
+
+	list_for_each_entry(sdata, &local->interfaces, list)
+		sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+
+	/* Mark channel contexts as not being in the driver any more to avoid
+	 * removing them from the driver during the shutdown process...
+	 */
+	mutex_lock(&local->chanctx_mtx);
+	list_for_each_entry(ctx, &local->chanctx_list, list)
+		ctx->driver_present = false;
+	mutex_unlock(&local->chanctx_mtx);
+
+	cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+}
+
 static void ieee80211_assign_chanctx(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata)
 {
@@ -1520,9 +1558,11 @@
 	 */
 	res = drv_start(local);
 	if (res) {
-		WARN(local->suspended, "Hardware became unavailable "
-		     "upon resume. This could be a software issue "
-		     "prior to suspend or a hardware issue.\n");
+		if (local->suspended)
+			WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
+		else
+			WARN(1, "Hardware became unavailable during restart.\n");
+		ieee80211_handle_reconfig_failure(local);
 		return res;
 	}
 
@@ -1546,7 +1586,7 @@
 		WARN_ON(local->resuming);
 		res = drv_add_interface(local, sdata);
 		if (WARN_ON(res)) {
-			rcu_assign_pointer(local->monitor_sdata, NULL);
+			RCU_INIT_POINTER(local->monitor_sdata, NULL);
 			synchronize_net();
 			kfree(sdata);
 		}
@@ -1565,17 +1605,17 @@
 		list_for_each_entry(ctx, &local->chanctx_list, list)
 			WARN_ON(drv_add_chanctx(local, ctx));
 		mutex_unlock(&local->chanctx_mtx);
-	}
 
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-		ieee80211_assign_chanctx(local, sdata);
-	}
+		list_for_each_entry(sdata, &local->interfaces, list) {
+			if (!ieee80211_sdata_running(sdata))
+				continue;
+			ieee80211_assign_chanctx(local, sdata);
+		}
 
-	sdata = rtnl_dereference(local->monitor_sdata);
-	if (sdata && ieee80211_sdata_running(sdata))
-		ieee80211_assign_chanctx(local, sdata);
+		sdata = rtnl_dereference(local->monitor_sdata);
+		if (sdata && ieee80211_sdata_running(sdata))
+			ieee80211_assign_chanctx(local, sdata);
+	}
 
 	/* add STAs back */
 	mutex_lock(&local->sta_mtx);
@@ -1671,13 +1711,10 @@
 			}
 			break;
 		case NL80211_IFTYPE_WDS:
-			break;
 		case NL80211_IFTYPE_AP_VLAN:
 		case NL80211_IFTYPE_MONITOR:
-			/* ignore virtual */
-			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
-			changed = BSS_CHANGED_IDLE;
+			/* nothing to do */
 			break;
 		case NL80211_IFTYPE_UNSPECIFIED:
 		case NUM_NL80211_IFTYPES:
@@ -2797,3 +2834,121 @@
 
 	ps->dtim_count = dtim_count;
 }
+
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+				 const struct cfg80211_chan_def *chandef,
+				 enum ieee80211_chanctx_mode chanmode,
+				 u8 radar_detect)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sub_if_data *sdata_iter;
+	enum nl80211_iftype iftype = sdata->wdev.iftype;
+	int num[NUM_NL80211_IFTYPES];
+	struct ieee80211_chanctx *ctx;
+	int num_different_channels = 0;
+	int total = 1;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	if (WARN_ON(hweight32(radar_detect) > 1))
+		return -EINVAL;
+
+	if (WARN_ON(chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
+		    !chandef->chan))
+		return -EINVAL;
+
+	if (chandef)
+		num_different_channels = 1;
+
+	if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
+		return -EINVAL;
+
+	/* Always allow software iftypes */
+	if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
+		if (radar_detect)
+			return -EINVAL;
+		return 0;
+	}
+
+	memset(num, 0, sizeof(num));
+
+	if (iftype != NL80211_IFTYPE_UNSPECIFIED)
+		num[iftype] = 1;
+
+	list_for_each_entry(ctx, &local->chanctx_list, list) {
+		if (ctx->conf.radar_enabled)
+			radar_detect |= BIT(ctx->conf.def.width);
+		if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
+			num_different_channels++;
+			continue;
+		}
+		if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
+		    cfg80211_chandef_compatible(chandef,
+						&ctx->conf.def))
+			continue;
+		num_different_channels++;
+	}
+
+	list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
+		struct wireless_dev *wdev_iter;
+
+		wdev_iter = &sdata_iter->wdev;
+
+		if (sdata_iter == sdata ||
+		    rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+			continue;
+
+		num[wdev_iter->iftype]++;
+		total++;
+	}
+
+	if (total == 1 && !radar_detect)
+		return 0;
+
+	return cfg80211_check_combinations(local->hw.wiphy,
+					   num_different_channels,
+					   radar_detect, num);
+}
+
+static void
+ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
+			 void *data)
+{
+	u32 *max_num_different_channels = data;
+
+	*max_num_different_channels = max(*max_num_different_channels,
+					  c->num_different_channels);
+}
+
+int ieee80211_max_num_channels(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int num[NUM_NL80211_IFTYPES] = {};
+	struct ieee80211_chanctx *ctx;
+	int num_different_channels = 0;
+	u8 radar_detect = 0;
+	u32 max_num_different_channels = 1;
+	int err;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	list_for_each_entry(ctx, &local->chanctx_list, list) {
+		num_different_channels++;
+
+		if (ctx->conf.radar_enabled)
+			radar_detect |= BIT(ctx->conf.def.width);
+	}
+
+	list_for_each_entry_rcu(sdata, &local->interfaces, list)
+		num[sdata->wdev.iftype]++;
+
+	err = cfg80211_iter_combinations(local->hw.wiphy,
+					 num_different_channels, radar_detect,
+					 num, ieee80211_iter_max_chans,
+					 &max_num_different_channels);
+	if (err < 0)
+		return err;
+
+	return max_num_different_channels;
+}

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b8600e3..9b3dcc2 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c

@@ -406,7 +406,10 @@
 
 	if (info->control.hw_key &&
 	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
-	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) {
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
+	    !((info->control.hw_key->flags &
+	       IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) &&
+	      ieee80211_is_mgmt(hdr->frame_control))) {
 		/*
 		 * hwaccel has no need for preallocated room for CCMP
 		 * header or MIC fields

diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index b33dd76..1818a99 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig

@@ -2,6 +2,10 @@
 	tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
 	depends on IEEE802154
 	select CRC_CCITT
+	select CRYPTO_AUTHENC
+	select CRYPTO_CCM
+	select CRYPTO_CTR
+	select CRYPTO_AES
 	---help---
 	  This option enables the hardware independent IEEE 802.15.4
 	  networking stack for SoftMAC devices (the ones implementing

diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 15d62df5..9723d6f 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile

@@ -1,4 +1,5 @@
 obj-$(CONFIG_MAC802154)	+= mac802154.o
-mac802154-objs		:= ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o
+mac802154-objs		:= ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o \
+			   monitor.o wpan.o llsec.o
 
 ccflags-y += -D__CHECK_ENDIAN__

diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
new file mode 100644
index 0000000..1456f73
--- /dev/null
+++ b/net/mac802154/llsec.c

@@ -0,0 +1,1070 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/completion.h>
+#include <net/ieee802154.h>
+#include <crypto/algapi.h>
+
+#include "mac802154.h"
+#include "llsec.h"
+
+static void llsec_key_put(struct mac802154_llsec_key *key);
+static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
+			       const struct ieee802154_llsec_key_id *b);
+
+static void llsec_dev_free(struct mac802154_llsec_device *dev);
+
+void mac802154_llsec_init(struct mac802154_llsec *sec)
+{
+	memset(sec, 0, sizeof(*sec));
+
+	memset(&sec->params.default_key_source, 0xFF, IEEE802154_ADDR_LEN);
+
+	INIT_LIST_HEAD(&sec->table.security_levels);
+	INIT_LIST_HEAD(&sec->table.devices);
+	INIT_LIST_HEAD(&sec->table.keys);
+	hash_init(sec->devices_short);
+	hash_init(sec->devices_hw);
+	rwlock_init(&sec->lock);
+}
+
+void mac802154_llsec_destroy(struct mac802154_llsec *sec)
+{
+	struct ieee802154_llsec_seclevel *sl, *sn;
+	struct ieee802154_llsec_device *dev, *dn;
+	struct ieee802154_llsec_key_entry *key, *kn;
+
+	list_for_each_entry_safe(sl, sn, &sec->table.security_levels, list) {
+		struct mac802154_llsec_seclevel *msl;
+
+		msl = container_of(sl, struct mac802154_llsec_seclevel, level);
+		list_del(&sl->list);
+		kfree(msl);
+	}
+
+	list_for_each_entry_safe(dev, dn, &sec->table.devices, list) {
+		struct mac802154_llsec_device *mdev;
+
+		mdev = container_of(dev, struct mac802154_llsec_device, dev);
+		list_del(&dev->list);
+		llsec_dev_free(mdev);
+	}
+
+	list_for_each_entry_safe(key, kn, &sec->table.keys, list) {
+		struct mac802154_llsec_key *mkey;
+
+		mkey = container_of(key->key, struct mac802154_llsec_key, key);
+		list_del(&key->list);
+		llsec_key_put(mkey);
+		kfree(key);
+	}
+}
+
+
+
+int mac802154_llsec_get_params(struct mac802154_llsec *sec,
+			       struct ieee802154_llsec_params *params)
+{
+	read_lock_bh(&sec->lock);
+	*params = sec->params;
+	read_unlock_bh(&sec->lock);
+
+	return 0;
+}
+
+int mac802154_llsec_set_params(struct mac802154_llsec *sec,
+			       const struct ieee802154_llsec_params *params,
+			       int changed)
+{
+	write_lock_bh(&sec->lock);
+
+	if (changed & IEEE802154_LLSEC_PARAM_ENABLED)
+		sec->params.enabled = params->enabled;
+	if (changed & IEEE802154_LLSEC_PARAM_FRAME_COUNTER)
+		sec->params.frame_counter = params->frame_counter;
+	if (changed & IEEE802154_LLSEC_PARAM_OUT_LEVEL)
+		sec->params.out_level = params->out_level;
+	if (changed & IEEE802154_LLSEC_PARAM_OUT_KEY)
+		sec->params.out_key = params->out_key;
+	if (changed & IEEE802154_LLSEC_PARAM_KEY_SOURCE)
+		sec->params.default_key_source = params->default_key_source;
+	if (changed & IEEE802154_LLSEC_PARAM_PAN_ID)
+		sec->params.pan_id = params->pan_id;
+	if (changed & IEEE802154_LLSEC_PARAM_HWADDR)
+		sec->params.hwaddr = params->hwaddr;
+	if (changed & IEEE802154_LLSEC_PARAM_COORD_HWADDR)
+		sec->params.coord_hwaddr = params->coord_hwaddr;
+	if (changed & IEEE802154_LLSEC_PARAM_COORD_SHORTADDR)
+		sec->params.coord_shortaddr = params->coord_shortaddr;
+
+	write_unlock_bh(&sec->lock);
+
+	return 0;
+}
+
+
+
+static struct mac802154_llsec_key*
+llsec_key_alloc(const struct ieee802154_llsec_key *template)
+{
+	const int authsizes[3] = { 4, 8, 16 };
+	struct mac802154_llsec_key *key;
+	int i;
+
+	key = kzalloc(sizeof(*key), GFP_KERNEL);
+	if (!key)
+		return NULL;
+
+	kref_init(&key->ref);
+	key->key = *template;
+
+	BUILD_BUG_ON(ARRAY_SIZE(authsizes) != ARRAY_SIZE(key->tfm));
+
+	for (i = 0; i < ARRAY_SIZE(key->tfm); i++) {
+		key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0,
+						CRYPTO_ALG_ASYNC);
+		if (!key->tfm[i])
+			goto err_tfm;
+		if (crypto_aead_setkey(key->tfm[i], template->key,
+				       IEEE802154_LLSEC_KEY_SIZE))
+			goto err_tfm;
+		if (crypto_aead_setauthsize(key->tfm[i], authsizes[i]))
+			goto err_tfm;
+	}
+
+	key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (!key->tfm0)
+		goto err_tfm;
+
+	if (crypto_blkcipher_setkey(key->tfm0, template->key,
+				    IEEE802154_LLSEC_KEY_SIZE))
+		goto err_tfm0;
+
+	return key;
+
+err_tfm0:
+	crypto_free_blkcipher(key->tfm0);
+err_tfm:
+	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+		if (key->tfm[i])
+			crypto_free_aead(key->tfm[i]);
+
+	kfree(key);
+	return NULL;
+}
+
+static void llsec_key_release(struct kref *ref)
+{
+	struct mac802154_llsec_key *key;
+	int i;
+
+	key = container_of(ref, struct mac802154_llsec_key, ref);
+
+	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+		crypto_free_aead(key->tfm[i]);
+
+	crypto_free_blkcipher(key->tfm0);
+	kfree(key);
+}
+
+static struct mac802154_llsec_key*
+llsec_key_get(struct mac802154_llsec_key *key)
+{
+	kref_get(&key->ref);
+	return key;
+}
+
+static void llsec_key_put(struct mac802154_llsec_key *key)
+{
+	kref_put(&key->ref, llsec_key_release);
+}
+
+static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
+			       const struct ieee802154_llsec_key_id *b)
+{
+	if (a->mode != b->mode)
+		return false;
+
+	if (a->mode == IEEE802154_SCF_KEY_IMPLICIT)
+		return ieee802154_addr_equal(&a->device_addr, &b->device_addr);
+
+	if (a->id != b->id)
+		return false;
+
+	switch (a->mode) {
+	case IEEE802154_SCF_KEY_INDEX:
+		return true;
+	case IEEE802154_SCF_KEY_SHORT_INDEX:
+		return a->short_source == b->short_source;
+	case IEEE802154_SCF_KEY_HW_INDEX:
+		return a->extended_source == b->extended_source;
+	}
+
+	return false;
+}
+
+int mac802154_llsec_key_add(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_key_id *id,
+			    const struct ieee802154_llsec_key *key)
+{
+	struct mac802154_llsec_key *mkey = NULL;
+	struct ieee802154_llsec_key_entry *pos, *new;
+
+	if (!(key->frame_types & (1 << IEEE802154_FC_TYPE_MAC_CMD)) &&
+	    key->cmd_frame_ids)
+		return -EINVAL;
+
+	list_for_each_entry(pos, &sec->table.keys, list) {
+		if (llsec_key_id_equal(&pos->id, id))
+			return -EEXIST;
+
+		if (memcmp(pos->key->key, key->key,
+			   IEEE802154_LLSEC_KEY_SIZE))
+			continue;
+
+		mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+		/* Don't allow multiple instances of the same AES key to have
+		 * different allowed frame types/command frame ids, as this is
+		 * not possible in the 802.15.4 PIB.
+		 */
+		if (pos->key->frame_types != key->frame_types ||
+		    pos->key->cmd_frame_ids != key->cmd_frame_ids)
+			return -EEXIST;
+
+		break;
+	}
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	if (!mkey)
+		mkey = llsec_key_alloc(key);
+	else
+		mkey = llsec_key_get(mkey);
+
+	if (!mkey)
+		goto fail;
+
+	new->id = *id;
+	new->key = &mkey->key;
+
+	list_add_rcu(&new->list, &sec->table.keys);
+
+	return 0;
+
+fail:
+	kfree(new);
+	return -ENOMEM;
+}
+
+int mac802154_llsec_key_del(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_key_id *key)
+{
+	struct ieee802154_llsec_key_entry *pos;
+
+	list_for_each_entry(pos, &sec->table.keys, list) {
+		struct mac802154_llsec_key *mkey;
+
+		mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+		if (llsec_key_id_equal(&pos->id, key)) {
+			list_del_rcu(&pos->list);
+			llsec_key_put(mkey);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+
+
+static bool llsec_dev_use_shortaddr(__le16 short_addr)
+{
+	return short_addr != cpu_to_le16(IEEE802154_ADDR_UNDEF) &&
+		short_addr != cpu_to_le16(0xffff);
+}
+
+static u32 llsec_dev_hash_short(__le16 short_addr, __le16 pan_id)
+{
+	return ((__force u16) short_addr) << 16 | (__force u16) pan_id;
+}
+
+static u64 llsec_dev_hash_long(__le64 hwaddr)
+{
+	return (__force u64) hwaddr;
+}
+
+static struct mac802154_llsec_device*
+llsec_dev_find_short(struct mac802154_llsec *sec, __le16 short_addr,
+		     __le16 pan_id)
+{
+	struct mac802154_llsec_device *dev;
+	u32 key = llsec_dev_hash_short(short_addr, pan_id);
+
+	hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) {
+		if (dev->dev.short_addr == short_addr &&
+		    dev->dev.pan_id == pan_id)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static struct mac802154_llsec_device*
+llsec_dev_find_long(struct mac802154_llsec *sec, __le64 hwaddr)
+{
+	struct mac802154_llsec_device *dev;
+	u64 key = llsec_dev_hash_long(hwaddr);
+
+	hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) {
+		if (dev->dev.hwaddr == hwaddr)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static void llsec_dev_free(struct mac802154_llsec_device *dev)
+{
+	struct ieee802154_llsec_device_key *pos, *pn;
+	struct mac802154_llsec_device_key *devkey;
+
+	list_for_each_entry_safe(pos, pn, &dev->dev.keys, list) {
+		devkey = container_of(pos, struct mac802154_llsec_device_key,
+				      devkey);
+
+		list_del(&pos->list);
+		kfree(devkey);
+	}
+
+	kfree(dev);
+}
+
+int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_device *dev)
+{
+	struct mac802154_llsec_device *entry;
+	u32 skey = llsec_dev_hash_short(dev->short_addr, dev->pan_id);
+	u64 hwkey = llsec_dev_hash_long(dev->hwaddr);
+
+	BUILD_BUG_ON(sizeof(hwkey) != IEEE802154_ADDR_LEN);
+
+	if ((llsec_dev_use_shortaddr(dev->short_addr) &&
+	     llsec_dev_find_short(sec, dev->short_addr, dev->pan_id)) ||
+	     llsec_dev_find_long(sec, dev->hwaddr))
+		return -EEXIST;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->dev = *dev;
+	spin_lock_init(&entry->lock);
+	INIT_LIST_HEAD(&entry->dev.keys);
+
+	if (llsec_dev_use_shortaddr(dev->short_addr))
+		hash_add_rcu(sec->devices_short, &entry->bucket_s, skey);
+	else
+		INIT_HLIST_NODE(&entry->bucket_s);
+
+	hash_add_rcu(sec->devices_hw, &entry->bucket_hw, hwkey);
+	list_add_tail_rcu(&entry->dev.list, &sec->table.devices);
+
+	return 0;
+}
+
+static void llsec_dev_free_rcu(struct rcu_head *rcu)
+{
+	llsec_dev_free(container_of(rcu, struct mac802154_llsec_device, rcu));
+}
+
+int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr)
+{
+	struct mac802154_llsec_device *pos;
+
+	pos = llsec_dev_find_long(sec, device_addr);
+	if (!pos)
+		return -ENOENT;
+
+	hash_del_rcu(&pos->bucket_s);
+	hash_del_rcu(&pos->bucket_hw);
+	call_rcu(&pos->rcu, llsec_dev_free_rcu);
+
+	return 0;
+}
+
+
+
+static struct mac802154_llsec_device_key*
+llsec_devkey_find(struct mac802154_llsec_device *dev,
+		  const struct ieee802154_llsec_key_id *key)
+{
+	struct ieee802154_llsec_device_key *devkey;
+
+	list_for_each_entry_rcu(devkey, &dev->dev.keys, list) {
+		if (!llsec_key_id_equal(key, &devkey->key_id))
+			continue;
+
+		return container_of(devkey, struct mac802154_llsec_device_key,
+				    devkey);
+	}
+
+	return NULL;
+}
+
+int mac802154_llsec_devkey_add(struct mac802154_llsec *sec,
+			       __le64 dev_addr,
+			       const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_llsec_device *dev;
+	struct mac802154_llsec_device_key *devkey;
+
+	dev = llsec_dev_find_long(sec, dev_addr);
+
+	if (!dev)
+		return -ENOENT;
+
+	if (llsec_devkey_find(dev, &key->key_id))
+		return -EEXIST;
+
+	devkey = kmalloc(sizeof(*devkey), GFP_KERNEL);
+	if (!devkey)
+		return -ENOMEM;
+
+	devkey->devkey = *key;
+	list_add_tail_rcu(&devkey->devkey.list, &dev->dev.keys);
+	return 0;
+}
+
+int mac802154_llsec_devkey_del(struct mac802154_llsec *sec,
+			       __le64 dev_addr,
+			       const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_llsec_device *dev;
+	struct mac802154_llsec_device_key *devkey;
+
+	dev = llsec_dev_find_long(sec, dev_addr);
+
+	if (!dev)
+		return -ENOENT;
+
+	devkey = llsec_devkey_find(dev, &key->key_id);
+	if (!devkey)
+		return -ENOENT;
+
+	list_del_rcu(&devkey->devkey.list);
+	kfree_rcu(devkey, rcu);
+	return 0;
+}
+
+
+
+static struct mac802154_llsec_seclevel*
+llsec_find_seclevel(const struct mac802154_llsec *sec,
+		    const struct ieee802154_llsec_seclevel *sl)
+{
+	struct ieee802154_llsec_seclevel *pos;
+
+	list_for_each_entry(pos, &sec->table.security_levels, list) {
+		if (pos->frame_type != sl->frame_type ||
+		    (pos->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+		     pos->cmd_frame_id != sl->cmd_frame_id) ||
+		    pos->device_override != sl->device_override ||
+		    pos->sec_levels != sl->sec_levels)
+			continue;
+
+		return container_of(pos, struct mac802154_llsec_seclevel,
+				    level);
+	}
+
+	return NULL;
+}
+
+int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec,
+				 const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_llsec_seclevel *entry;
+
+	if (llsec_find_seclevel(sec, sl))
+		return -EEXIST;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->level = *sl;
+
+	list_add_tail_rcu(&entry->level.list, &sec->table.security_levels);
+
+	return 0;
+}
+
+int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec,
+				 const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_llsec_seclevel *pos;
+
+	pos = llsec_find_seclevel(sec, sl);
+	if (!pos)
+		return -ENOENT;
+
+	list_del_rcu(&pos->level.list);
+	kfree_rcu(pos, rcu);
+
+	return 0;
+}
+
+
+
+static int llsec_recover_addr(struct mac802154_llsec *sec,
+			      struct ieee802154_addr *addr)
+{
+	__le16 caddr = sec->params.coord_shortaddr;
+	addr->pan_id = sec->params.pan_id;
+
+	if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
+		return -EINVAL;
+	} else if (caddr == cpu_to_le16(IEEE802154_ADDR_UNDEF)) {
+		addr->extended_addr = sec->params.coord_hwaddr;
+		addr->mode = IEEE802154_ADDR_LONG;
+	} else {
+		addr->short_addr = sec->params.coord_shortaddr;
+		addr->mode = IEEE802154_ADDR_SHORT;
+	}
+
+	return 0;
+}
+
+static struct mac802154_llsec_key*
+llsec_lookup_key(struct mac802154_llsec *sec,
+		 const struct ieee802154_hdr *hdr,
+		 const struct ieee802154_addr *addr,
+		 struct ieee802154_llsec_key_id *key_id)
+{
+	struct ieee802154_addr devaddr = *addr;
+	u8 key_id_mode = hdr->sec.key_id_mode;
+	struct ieee802154_llsec_key_entry *key_entry;
+	struct mac802154_llsec_key *key;
+
+	if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT &&
+	    devaddr.mode == IEEE802154_ADDR_NONE) {
+		if (hdr->fc.type == IEEE802154_FC_TYPE_BEACON) {
+			devaddr.extended_addr = sec->params.coord_hwaddr;
+			devaddr.mode = IEEE802154_ADDR_LONG;
+		} else if (llsec_recover_addr(sec, &devaddr) < 0) {
+			return NULL;
+		}
+	}
+
+	list_for_each_entry_rcu(key_entry, &sec->table.keys, list) {
+		const struct ieee802154_llsec_key_id *id = &key_entry->id;
+
+		if (!(key_entry->key->frame_types & BIT(hdr->fc.type)))
+			continue;
+
+		if (id->mode != key_id_mode)
+			continue;
+
+		if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT) {
+			if (ieee802154_addr_equal(&devaddr, &id->device_addr))
+				goto found;
+		} else {
+			if (id->id != hdr->sec.key_id)
+				continue;
+
+			if ((key_id_mode == IEEE802154_SCF_KEY_INDEX) ||
+			    (key_id_mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+			     id->short_source == hdr->sec.short_src) ||
+			    (key_id_mode == IEEE802154_SCF_KEY_HW_INDEX &&
+			     id->extended_source == hdr->sec.extended_src))
+				goto found;
+		}
+	}
+
+	return NULL;
+
+found:
+	key = container_of(key_entry->key, struct mac802154_llsec_key, key);
+	if (key_id)
+		*key_id = key_entry->id;
+	return llsec_key_get(key);
+}
+
+
+static void llsec_geniv(u8 iv[16], __le64 addr,
+			const struct ieee802154_sechdr *sec)
+{
+	__be64 addr_bytes = (__force __be64) swab64((__force u64) addr);
+	__be32 frame_counter = (__force __be32) swab32((__force u32) sec->frame_counter);
+
+	iv[0] = 1; /* L' = L - 1 = 1 */
+	memcpy(iv + 1, &addr_bytes, sizeof(addr_bytes));
+	memcpy(iv + 9, &frame_counter, sizeof(frame_counter));
+	iv[13] = sec->level;
+	iv[14] = 0;
+	iv[15] = 1;
+}
+
+static int
+llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+			const struct ieee802154_hdr *hdr,
+			struct mac802154_llsec_key *key)
+{
+	u8 iv[16];
+	struct scatterlist src;
+	struct blkcipher_desc req = {
+		.tfm = key->tfm0,
+		.info = iv,
+		.flags = 0,
+	};
+
+	llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
+	sg_init_one(&src, skb->data, skb->len);
+	return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len);
+}
+
+static struct crypto_aead*
+llsec_tfm_by_len(struct mac802154_llsec_key *key, int authlen)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+		if (crypto_aead_authsize(key->tfm[i]) == authlen)
+			return key->tfm[i];
+
+	BUG();
+}
+
+static int
+llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+		      const struct ieee802154_hdr *hdr,
+		      struct mac802154_llsec_key *key)
+{
+	u8 iv[16];
+	unsigned char *data;
+	int authlen, assoclen, datalen, rc;
+	struct scatterlist src, assoc[2], dst[2];
+	struct aead_request *req;
+
+	authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
+	llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
+
+	req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
+	sg_init_table(assoc, 2);
+	sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
+	assoclen = skb->mac_len;
+
+	data = skb_mac_header(skb) + skb->mac_len;
+	datalen = skb_tail_pointer(skb) - data;
+
+	if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
+		sg_set_buf(&assoc[1], data, 0);
+	} else {
+		sg_set_buf(&assoc[1], data, datalen);
+		assoclen += datalen;
+		datalen = 0;
+	}
+
+	sg_init_one(&src, data, datalen);
+
+	sg_init_table(dst, 2);
+	sg_set_buf(&dst[0], data, datalen);
+	sg_set_buf(&dst[1], skb_put(skb, authlen), authlen);
+
+	aead_request_set_callback(req, 0, NULL, NULL);
+	aead_request_set_assoc(req, assoc, assoclen);
+	aead_request_set_crypt(req, &src, dst, datalen, iv);
+
+	rc = crypto_aead_encrypt(req);
+
+	kfree(req);
+
+	return rc;
+}
+
+static int llsec_do_encrypt(struct sk_buff *skb,
+			    const struct mac802154_llsec *sec,
+			    const struct ieee802154_hdr *hdr,
+			    struct mac802154_llsec_key *key)
+{
+	if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC)
+		return llsec_do_encrypt_unauth(skb, sec, hdr, key);
+	else
+		return llsec_do_encrypt_auth(skb, sec, hdr, key);
+}
+
+int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
+{
+	struct ieee802154_hdr hdr;
+	int rc, authlen, hlen;
+	struct mac802154_llsec_key *key;
+	u32 frame_ctr;
+
+	hlen = ieee802154_hdr_pull(skb, &hdr);
+
+	if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
+		return -EINVAL;
+
+	if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+		skb_push(skb, hlen);
+		return 0;
+	}
+
+	authlen = ieee802154_sechdr_authtag_len(&hdr.sec);
+
+	if (skb->len + hlen + authlen + IEEE802154_MFR_SIZE > IEEE802154_MTU)
+		return -EMSGSIZE;
+
+	rcu_read_lock();
+
+	read_lock_bh(&sec->lock);
+
+	if (!sec->params.enabled) {
+		rc = -EINVAL;
+		goto fail_read;
+	}
+
+	key = llsec_lookup_key(sec, &hdr, &hdr.dest, NULL);
+	if (!key) {
+		rc = -ENOKEY;
+		goto fail_read;
+	}
+
+	read_unlock_bh(&sec->lock);
+
+	write_lock_bh(&sec->lock);
+
+	frame_ctr = be32_to_cpu(sec->params.frame_counter);
+	hdr.sec.frame_counter = cpu_to_le32(frame_ctr);
+	if (frame_ctr == 0xFFFFFFFF) {
+		write_unlock_bh(&sec->lock);
+		llsec_key_put(key);
+		rc = -EOVERFLOW;
+		goto fail;
+	}
+
+	sec->params.frame_counter = cpu_to_be32(frame_ctr + 1);
+
+	write_unlock_bh(&sec->lock);
+
+	rcu_read_unlock();
+
+	skb->mac_len = ieee802154_hdr_push(skb, &hdr);
+	skb_reset_mac_header(skb);
+
+	rc = llsec_do_encrypt(skb, sec, &hdr, key);
+	llsec_key_put(key);
+
+	return rc;
+
+fail_read:
+	read_unlock_bh(&sec->lock);
+fail:
+	rcu_read_unlock();
+	return rc;
+}
+
+
+
+static struct mac802154_llsec_device*
+llsec_lookup_dev(struct mac802154_llsec *sec,
+		 const struct ieee802154_addr *addr)
+{
+	struct ieee802154_addr devaddr = *addr;
+	struct mac802154_llsec_device *dev = NULL;
+
+	if (devaddr.mode == IEEE802154_ADDR_NONE &&
+	    llsec_recover_addr(sec, &devaddr) < 0)
+		return NULL;
+
+	if (devaddr.mode == IEEE802154_ADDR_SHORT) {
+		u32 key = llsec_dev_hash_short(devaddr.short_addr,
+					       devaddr.pan_id);
+
+		hash_for_each_possible_rcu(sec->devices_short, dev,
+					   bucket_s, key) {
+			if (dev->dev.pan_id == devaddr.pan_id &&
+			    dev->dev.short_addr == devaddr.short_addr)
+				return dev;
+		}
+	} else {
+		u64 key = llsec_dev_hash_long(devaddr.extended_addr);
+
+		hash_for_each_possible_rcu(sec->devices_hw, dev,
+					   bucket_hw, key) {
+			if (dev->dev.hwaddr == devaddr.extended_addr)
+				return dev;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+llsec_lookup_seclevel(const struct mac802154_llsec *sec,
+		      u8 frame_type, u8 cmd_frame_id,
+		      struct ieee802154_llsec_seclevel *rlevel)
+{
+	struct ieee802154_llsec_seclevel *level;
+
+	list_for_each_entry_rcu(level, &sec->table.security_levels, list) {
+		if (level->frame_type == frame_type &&
+		    (frame_type != IEEE802154_FC_TYPE_MAC_CMD ||
+		     level->cmd_frame_id == cmd_frame_id)) {
+			*rlevel = *level;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int
+llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+			const struct ieee802154_hdr *hdr,
+			struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+	u8 iv[16];
+	unsigned char *data;
+	int datalen;
+	struct scatterlist src;
+	struct blkcipher_desc req = {
+		.tfm = key->tfm0,
+		.info = iv,
+		.flags = 0,
+	};
+
+	llsec_geniv(iv, dev_addr, &hdr->sec);
+	data = skb_mac_header(skb) + skb->mac_len;
+	datalen = skb_tail_pointer(skb) - data;
+
+	sg_init_one(&src, data, datalen);
+
+	return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen);
+}
+
+static int
+llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+		      const struct ieee802154_hdr *hdr,
+		      struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+	u8 iv[16];
+	unsigned char *data;
+	int authlen, datalen, assoclen, rc;
+	struct scatterlist src, assoc[2];
+	struct aead_request *req;
+
+	authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
+	llsec_geniv(iv, dev_addr, &hdr->sec);
+
+	req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
+	sg_init_table(assoc, 2);
+	sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
+	assoclen = skb->mac_len;
+
+	data = skb_mac_header(skb) + skb->mac_len;
+	datalen = skb_tail_pointer(skb) - data;
+
+	if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
+		sg_set_buf(&assoc[1], data, 0);
+	} else {
+		sg_set_buf(&assoc[1], data, datalen - authlen);
+		assoclen += datalen - authlen;
+		data += datalen - authlen;
+		datalen = authlen;
+	}
+
+	sg_init_one(&src, data, datalen);
+
+	aead_request_set_callback(req, 0, NULL, NULL);
+	aead_request_set_assoc(req, assoc, assoclen);
+	aead_request_set_crypt(req, &src, &src, datalen, iv);
+
+	rc = crypto_aead_decrypt(req);
+
+	kfree(req);
+	skb_trim(skb, skb->len - authlen);
+
+	return rc;
+}
+
+static int
+llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec,
+		 const struct ieee802154_hdr *hdr,
+		 struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+	if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC)
+		return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr);
+	else
+		return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr);
+}
+
+static int
+llsec_update_devkey_record(struct mac802154_llsec_device *dev,
+			   const struct ieee802154_llsec_key_id *in_key)
+{
+	struct mac802154_llsec_device_key *devkey;
+
+	devkey = llsec_devkey_find(dev, in_key);
+
+	if (!devkey) {
+		struct mac802154_llsec_device_key *next;
+
+		next = kzalloc(sizeof(*devkey), GFP_ATOMIC);
+		if (!next)
+			return -ENOMEM;
+
+		next->devkey.key_id = *in_key;
+
+		spin_lock_bh(&dev->lock);
+
+		devkey = llsec_devkey_find(dev, in_key);
+		if (!devkey)
+			list_add_rcu(&next->devkey.list, &dev->dev.keys);
+		else
+			kfree(next);
+
+		spin_unlock_bh(&dev->lock);
+	}
+
+	return 0;
+}
+
+static int
+llsec_update_devkey_info(struct mac802154_llsec_device *dev,
+			 const struct ieee802154_llsec_key_id *in_key,
+			 u32 frame_counter)
+{
+	struct mac802154_llsec_device_key *devkey = NULL;
+
+	if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RESTRICT) {
+		devkey = llsec_devkey_find(dev, in_key);
+		if (!devkey)
+			return -ENOENT;
+	}
+
+	if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) {
+		int rc = llsec_update_devkey_record(dev, in_key);
+
+		if (rc < 0)
+			return rc;
+	}
+
+	spin_lock_bh(&dev->lock);
+
+	if ((!devkey && frame_counter < dev->dev.frame_counter) ||
+	    (devkey && frame_counter < devkey->devkey.frame_counter)) {
+		spin_unlock_bh(&dev->lock);
+		return -EINVAL;
+	}
+
+	if (devkey)
+		devkey->devkey.frame_counter = frame_counter + 1;
+	else
+		dev->dev.frame_counter = frame_counter + 1;
+
+	spin_unlock_bh(&dev->lock);
+
+	return 0;
+}
+
+int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
+{
+	struct ieee802154_hdr hdr;
+	struct mac802154_llsec_key *key;
+	struct ieee802154_llsec_key_id key_id;
+	struct mac802154_llsec_device *dev;
+	struct ieee802154_llsec_seclevel seclevel;
+	int err;
+	__le64 dev_addr;
+	u32 frame_ctr;
+
+	if (ieee802154_hdr_peek(skb, &hdr) < 0)
+		return -EINVAL;
+	if (!hdr.fc.security_enabled)
+		return 0;
+	if (hdr.fc.version == 0)
+		return -EINVAL;
+
+	read_lock_bh(&sec->lock);
+	if (!sec->params.enabled) {
+		read_unlock_bh(&sec->lock);
+		return -EINVAL;
+	}
+	read_unlock_bh(&sec->lock);
+
+	rcu_read_lock();
+
+	key = llsec_lookup_key(sec, &hdr, &hdr.source, &key_id);
+	if (!key) {
+		err = -ENOKEY;
+		goto fail;
+	}
+
+	dev = llsec_lookup_dev(sec, &hdr.source);
+	if (!dev) {
+		err = -EINVAL;
+		goto fail_dev;
+	}
+
+	if (llsec_lookup_seclevel(sec, hdr.fc.type, 0, &seclevel) < 0) {
+		err = -EINVAL;
+		goto fail_dev;
+	}
+
+	if (!(seclevel.sec_levels & BIT(hdr.sec.level)) &&
+	    (hdr.sec.level == 0 && seclevel.device_override &&
+	     !dev->dev.seclevel_exempt)) {
+		err = -EINVAL;
+		goto fail_dev;
+	}
+
+	frame_ctr = le32_to_cpu(hdr.sec.frame_counter);
+
+	if (frame_ctr == 0xffffffff) {
+		err = -EOVERFLOW;
+		goto fail_dev;
+	}
+
+	err = llsec_update_devkey_info(dev, &key_id, frame_ctr);
+	if (err)
+		goto fail_dev;
+
+	dev_addr = dev->dev.hwaddr;
+
+	rcu_read_unlock();
+
+	err = llsec_do_decrypt(skb, sec, &hdr, key, dev_addr);
+	llsec_key_put(key);
+	return err;
+
+fail_dev:
+	llsec_key_put(key);
+fail:
+	rcu_read_unlock();
+	return err;
+}

diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
new file mode 100644
index 0000000..950578e
--- /dev/null
+++ b/net/mac802154/llsec.h

@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#ifndef MAC802154_LLSEC_H
+#define MAC802154_LLSEC_H
+
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <linux/crypto.h>
+#include <linux/kref.h>
+#include <linux/spinlock.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+
+struct mac802154_llsec_key {
+	struct ieee802154_llsec_key key;
+
+	/* one tfm for each authsize (4/8/16) */
+	struct crypto_aead *tfm[3];
+	struct crypto_blkcipher *tfm0;
+
+	struct kref ref;
+};
+
+struct mac802154_llsec_device_key {
+	struct ieee802154_llsec_device_key devkey;
+
+	struct rcu_head rcu;
+};
+
+struct mac802154_llsec_device {
+	struct ieee802154_llsec_device dev;
+
+	struct hlist_node bucket_s;
+	struct hlist_node bucket_hw;
+
+	/* protects dev.frame_counter and the elements of dev.keys */
+	spinlock_t lock;
+
+	struct rcu_head rcu;
+};
+
+struct mac802154_llsec_seclevel {
+	struct ieee802154_llsec_seclevel level;
+
+	struct rcu_head rcu;
+};
+
+struct mac802154_llsec {
+	struct ieee802154_llsec_params params;
+	struct ieee802154_llsec_table table;
+
+	DECLARE_HASHTABLE(devices_short, 6);
+	DECLARE_HASHTABLE(devices_hw, 6);
+
+	/* protects params, all other fields are fine with RCU */
+	rwlock_t lock;
+};
+
+void mac802154_llsec_init(struct mac802154_llsec *sec);
+void mac802154_llsec_destroy(struct mac802154_llsec *sec);
+
+int mac802154_llsec_get_params(struct mac802154_llsec *sec,
+			       struct ieee802154_llsec_params *params);
+int mac802154_llsec_set_params(struct mac802154_llsec *sec,
+			       const struct ieee802154_llsec_params *params,
+			       int changed);
+
+int mac802154_llsec_key_add(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_key_id *id,
+			    const struct ieee802154_llsec_key *key);
+int mac802154_llsec_key_del(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_key_id *key);
+
+int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
+			    const struct ieee802154_llsec_device *dev);
+int mac802154_llsec_dev_del(struct mac802154_llsec *sec,
+			    __le64 device_addr);
+
+int mac802154_llsec_devkey_add(struct mac802154_llsec *sec,
+			       __le64 dev_addr,
+			       const struct ieee802154_llsec_device_key *key);
+int mac802154_llsec_devkey_del(struct mac802154_llsec *sec,
+			       __le64 dev_addr,
+			       const struct ieee802154_llsec_device_key *key);
+
+int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec,
+				 const struct ieee802154_llsec_seclevel *sl);
+int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec,
+				 const struct ieee802154_llsec_seclevel *sl);
+
+int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb);
+int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb);
+
+#endif /* MAC802154_LLSEC_H */

diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 28ef59c..762a6f8 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h

@@ -23,8 +23,12 @@
 #ifndef MAC802154_H
 #define MAC802154_H
 
+#include <linux/mutex.h>
+#include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
 
+#include "llsec.h"
+
 /* mac802154 device private data */
 struct mac802154_priv {
 	struct ieee802154_dev hw;
@@ -90,6 +94,13 @@
 	u8 bsn;
 	/* MAC DSN field */
 	u8 dsn;
+
+	/* protects sec from concurrent access by netlink. access by
+	 * encrypt/decrypt/header_create safe without additional protection.
+	 */
+	struct mutex sec_mtx;
+
+	struct mac802154_llsec sec;
 };
 
 #define mac802154_to_priv(_hw)	container_of(_hw, struct mac802154_priv, hw)
@@ -125,4 +136,37 @@
 void mac802154_get_mac_params(struct net_device *dev,
 			      struct ieee802154_mac_params *params);
 
+int mac802154_get_params(struct net_device *dev,
+			 struct ieee802154_llsec_params *params);
+int mac802154_set_params(struct net_device *dev,
+			 const struct ieee802154_llsec_params *params,
+			 int changed);
+
+int mac802154_add_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id,
+		      const struct ieee802154_llsec_key *key);
+int mac802154_del_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id);
+
+int mac802154_add_dev(struct net_device *dev,
+		      const struct ieee802154_llsec_device *llsec_dev);
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr);
+
+int mac802154_add_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key);
+int mac802154_del_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key);
+
+int mac802154_add_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl);
+int mac802154_del_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl);
+
+void mac802154_lock_table(struct net_device *dev);
+void mac802154_get_table(struct net_device *dev,
+			 struct ieee802154_llsec_table **t);
+void mac802154_unlock_table(struct net_device *dev);
+
 #endif /* MAC802154_H */

diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d40c092..bf80913 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c

@@ -40,6 +40,9 @@
 				    u8 pan_coord, u8 blx,
 				    u8 coord_realign)
 {
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	int rc = 0;
+
 	BUG_ON(addr->mode != IEEE802154_ADDR_SHORT);
 
 	mac802154_dev_set_pan_id(dev, addr->pan_id);
@@ -47,12 +50,31 @@
 	mac802154_dev_set_ieee_addr(dev);
 	mac802154_dev_set_page_channel(dev, page, channel);
 
+	if (ops->llsec) {
+		struct ieee802154_llsec_params params;
+		int changed = 0;
+
+		params.coord_shortaddr = addr->short_addr;
+		changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR;
+
+		params.pan_id = addr->pan_id;
+		changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
+
+		params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr);
+		changed |= IEEE802154_LLSEC_PARAM_HWADDR;
+
+		params.coord_hwaddr = params.hwaddr;
+		changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
+
+		rc = ops->llsec->set_params(dev, &params, changed);
+	}
+
 	/* FIXME: add validation for unused parameters to be sane
 	 * for SoftMAC
 	 */
 	ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS);
 
-	return 0;
+	return rc;
 }
 
 static struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
@@ -64,6 +86,22 @@
 	return to_phy(get_device(&priv->hw->phy->dev));
 }
 
+static struct ieee802154_llsec_ops mac802154_llsec_ops = {
+	.get_params = mac802154_get_params,
+	.set_params = mac802154_set_params,
+	.add_key = mac802154_add_key,
+	.del_key = mac802154_del_key,
+	.add_dev = mac802154_add_dev,
+	.del_dev = mac802154_del_dev,
+	.add_devkey = mac802154_add_devkey,
+	.del_devkey = mac802154_del_devkey,
+	.add_seclevel = mac802154_add_seclevel,
+	.del_seclevel = mac802154_del_seclevel,
+	.lock_table = mac802154_lock_table,
+	.get_table = mac802154_get_table,
+	.unlock_table = mac802154_unlock_table,
+};
+
 struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
 	.get_phy = mac802154_get_phy,
 };
@@ -75,6 +113,8 @@
 	.get_short_addr = mac802154_dev_get_short_addr,
 	.get_dsn = mac802154_dev_get_dsn,
 
+	.llsec = &mac802154_llsec_ops,
+
 	.set_mac_params = mac802154_set_mac_params,
 	.get_mac_params = mac802154_get_mac_params,
 };

diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f0991f2..15aa2f2 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c

@@ -213,3 +213,190 @@
 	} else
 		mutex_unlock(&priv->hw->phy->pib_lock);
 }
+
+
+int mac802154_get_params(struct net_device *dev,
+			 struct ieee802154_llsec_params *params)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_get_params(&priv->sec, params);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_set_params(struct net_device *dev,
+			 const struct ieee802154_llsec_params *params,
+			 int changed)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_set_params(&priv->sec, params, changed);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id,
+		      const struct ieee802154_llsec_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_key_add(&priv->sec, id, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_key_del(&priv->sec, id);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_dev(struct net_device *dev,
+		      const struct ieee802154_llsec_device *llsec_dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_dev_add(&priv->sec, llsec_dev);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_dev_del(&priv->sec, dev_addr);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_seclevel_add(&priv->sec, sl);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_seclevel_del(&priv->sec, sl);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+void mac802154_lock_table(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+}
+
+void mac802154_get_table(struct net_device *dev,
+			 struct ieee802154_llsec_table **t)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	*t = &priv->sec.table;
+}
+
+void mac802154_unlock_table(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_unlock(&priv->sec_mtx);
+}

diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c
index 434a26f..a68230e 100644
--- a/net/mac802154/monitor.c
+++ b/net/mac802154/monitor.c

@@ -70,7 +70,8 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &priv->slaves, list) {
-		if (sdata->type != IEEE802154_DEV_MONITOR)
+		if (sdata->type != IEEE802154_DEV_MONITOR ||
+		    !netif_running(sdata->dev))
 			continue;
 
 		skb2 = skb_clone(skb, GFP_ATOMIC);

diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 03855b0..7f820a1 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c

@@ -59,27 +59,28 @@
 	skb->protocol = htons(ETH_P_IEEE802154);
 	skb_reset_mac_header(skb);
 
-	BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb));
-
 	if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
 		u16 crc;
 
 		if (skb->len < 2) {
 			pr_debug("got invalid frame\n");
-			goto out;
+			goto fail;
 		}
 		crc = crc_ccitt(0, skb->data, skb->len);
 		if (crc) {
 			pr_debug("CRC mismatch\n");
-			goto out;
+			goto fail;
 		}
 		skb_trim(skb, skb->len - 2); /* CRC */
 	}
 
 	mac802154_monitors_rx(priv, skb);
 	mac802154_wpans_rx(priv, skb);
-out:
-	dev_kfree_skb(skb);
+
+	return;
+
+fail:
+	kfree_skb(skb);
 }
 
 static void mac802154_rx_worker(struct work_struct *work)

diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 1df7a6a..3c3069f 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c

@@ -35,6 +35,28 @@
 
 #include "mac802154.h"
 
+static int mac802154_wpan_update_llsec(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	int rc = 0;
+
+	if (ops->llsec) {
+		struct ieee802154_llsec_params params;
+		int changed = 0;
+
+		params.pan_id = priv->pan_id;
+		changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
+
+		params.hwaddr = priv->extended_addr;
+		changed |= IEEE802154_LLSEC_PARAM_HWADDR;
+
+		rc = ops->llsec->set_params(dev, &params, changed);
+	}
+
+	return rc;
+}
+
 static int
 mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -81,7 +103,7 @@
 		priv->pan_id = cpu_to_le16(sa->addr.pan_id);
 		priv->short_addr = cpu_to_le16(sa->addr.short_addr);
 
-		err = 0;
+		err = mac802154_wpan_update_llsec(dev);
 		break;
 	}
 
@@ -99,7 +121,7 @@
 	/* FIXME: validate addr */
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	mac802154_dev_set_ieee_addr(dev);
-	return 0;
+	return mac802154_wpan_update_llsec(dev);
 }
 
 int mac802154_set_mac_params(struct net_device *dev,
@@ -124,7 +146,7 @@
 	mutex_unlock(&priv->hw->slaves_mtx);
 }
 
-int mac802154_wpan_open(struct net_device *dev)
+static int mac802154_wpan_open(struct net_device *dev)
 {
 	int rc;
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -183,6 +205,38 @@
 	return rc;
 }
 
+static int mac802154_set_header_security(struct mac802154_sub_if_data *priv,
+					 struct ieee802154_hdr *hdr,
+					 const struct ieee802154_mac_cb *cb)
+{
+	struct ieee802154_llsec_params params;
+	u8 level;
+
+	mac802154_llsec_get_params(&priv->sec, &params);
+
+	if (!params.enabled && cb->secen_override && cb->secen)
+		return -EINVAL;
+	if (!params.enabled ||
+	    (cb->secen_override && !cb->secen) ||
+	    !params.out_level)
+		return 0;
+	if (cb->seclevel_override && !cb->seclevel)
+		return -EINVAL;
+
+	level = cb->seclevel_override ? cb->seclevel : params.out_level;
+
+	hdr->fc.security_enabled = 1;
+	hdr->sec.level = level;
+	hdr->sec.key_id_mode = params.out_key.mode;
+	if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX)
+		hdr->sec.short_src = params.out_key.short_source;
+	else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX)
+		hdr->sec.extended_src = params.out_key.extended_source;
+	hdr->sec.key_id = params.out_key.id;
+
+	return 0;
+}
+
 static int mac802154_header_create(struct sk_buff *skb,
 				   struct net_device *dev,
 				   unsigned short type,
@@ -192,15 +246,20 @@
 {
 	struct ieee802154_hdr hdr;
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct ieee802154_mac_cb *cb = mac_cb(skb);
 	int hlen;
 
 	if (!daddr)
 		return -EINVAL;
 
 	memset(&hdr.fc, 0, sizeof(hdr.fc));
-	hdr.fc.type = mac_cb_type(skb);
-	hdr.fc.security_enabled = mac_cb_is_secen(skb);
-	hdr.fc.ack_request = mac_cb_is_ackreq(skb);
+	hdr.fc.type = cb->type;
+	hdr.fc.security_enabled = cb->secen;
+	hdr.fc.ack_request = cb->ackreq;
+	hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+
+	if (mac802154_set_header_security(priv, &hdr, cb) < 0)
+		return -EINVAL;
 
 	if (!saddr) {
 		spin_lock_bh(&priv->mib_lock);
@@ -231,7 +290,7 @@
 	skb_reset_mac_header(skb);
 	skb->mac_len = hlen;
 
-	if (hlen + len + 2 > dev->mtu)
+	if (len > ieee802154_max_payload(&hdr))
 		return -EMSGSIZE;
 
 	return hlen;
@@ -257,6 +316,7 @@
 {
 	struct mac802154_sub_if_data *priv;
 	u8 chan, page;
+	int rc;
 
 	priv = netdev_priv(dev);
 
@@ -272,6 +332,13 @@
 		return NETDEV_TX_OK;
 	}
 
+	rc = mac802154_llsec_encrypt(&priv->sec, skb);
+	if (rc) {
+		pr_warn("encryption failed: %i\n", rc);
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
 	skb->skb_iif = dev->ifindex;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
@@ -292,6 +359,15 @@
 	.ndo_set_mac_address	= mac802154_wpan_mac_addr,
 };
 
+static void mac802154_wpan_free(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	mac802154_llsec_destroy(&priv->sec);
+
+	free_netdev(dev);
+}
+
 void mac802154_wpan_setup(struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv;
@@ -301,14 +377,14 @@
 
 	dev->hard_header_len	= MAC802154_FRAME_HARD_HEADER_LEN;
 	dev->header_ops		= &mac802154_header_ops;
-	dev->needed_tailroom	= 2; /* FCS */
+	dev->needed_tailroom	= 2 + 16; /* FCS + MIC */
 	dev->mtu		= IEEE802154_MTU;
 	dev->tx_queue_len	= 300;
 	dev->type		= ARPHRD_IEEE802154;
 	dev->flags		= IFF_NOARP | IFF_BROADCAST;
 	dev->watchdog_timeo	= 0;
 
-	dev->destructor		= free_netdev;
+	dev->destructor		= mac802154_wpan_free;
 	dev->netdev_ops		= &mac802154_wpan_ops;
 	dev->ml_priv		= &mac802154_mlme_wpan;
 
@@ -319,6 +395,7 @@
 	priv->page = 0;
 
 	spin_lock_init(&priv->mib_lock);
+	mutex_init(&priv->sec_mtx);
 
 	get_random_bytes(&priv->bsn, 1);
 	get_random_bytes(&priv->dsn, 1);
@@ -331,6 +408,8 @@
 
 	priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
 	priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+
+	mac802154_llsec_init(&priv->sec);
 }
 
 static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
@@ -339,9 +418,11 @@
 }
 
 static int
-mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
+mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
+		      const struct ieee802154_hdr *hdr)
 {
 	__le16 span, sshort;
+	int rc;
 
 	pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
 
@@ -388,15 +469,22 @@
 
 	skb->dev = sdata->dev;
 
+	rc = mac802154_llsec_decrypt(&sdata->sec, skb);
+	if (rc) {
+		pr_debug("decryption failed: %i\n", rc);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
 	sdata->dev->stats.rx_packets++;
 	sdata->dev->stats.rx_bytes += skb->len;
 
-	switch (mac_cb_type(skb)) {
+	switch (mac_cb(skb)->type) {
 	case IEEE802154_FC_TYPE_DATA:
 		return mac802154_process_data(sdata->dev, skb);
 	default:
 		pr_warn("ieee802154: bad frame received (type = %d)\n",
-			mac_cb_type(skb));
+			mac_cb(skb)->type);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -419,62 +507,58 @@
 	}
 }
 
-static int mac802154_parse_frame_start(struct sk_buff *skb)
+static int mac802154_parse_frame_start(struct sk_buff *skb,
+				       struct ieee802154_hdr *hdr)
 {
 	int hlen;
-	struct ieee802154_hdr hdr;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 
-	hlen = ieee802154_hdr_pull(skb, &hdr);
+	hlen = ieee802154_hdr_pull(skb, hdr);
 	if (hlen < 0)
 		return -EINVAL;
 
 	skb->mac_len = hlen;
 
-	pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc),
-		 hdr.seq);
+	pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc),
+		 hdr->seq);
 
-	mac_cb(skb)->flags = hdr.fc.type;
+	cb->type = hdr->fc.type;
+	cb->ackreq = hdr->fc.ack_request;
+	cb->secen = hdr->fc.security_enabled;
 
-	if (hdr.fc.ack_request)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
-	if (hdr.fc.security_enabled)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN;
+	mac802154_print_addr("destination", &hdr->dest);
+	mac802154_print_addr("source", &hdr->source);
 
-	mac802154_print_addr("destination", &hdr.dest);
-	mac802154_print_addr("source", &hdr.source);
+	cb->source = hdr->source;
+	cb->dest = hdr->dest;
 
-	mac_cb(skb)->source = hdr.source;
-	mac_cb(skb)->dest = hdr.dest;
-
-	if (hdr.fc.security_enabled) {
+	if (hdr->fc.security_enabled) {
 		u64 key;
 
-		pr_debug("seclevel %i\n", hdr.sec.level);
+		pr_debug("seclevel %i\n", hdr->sec.level);
 
-		switch (hdr.sec.key_id_mode) {
+		switch (hdr->sec.key_id_mode) {
 		case IEEE802154_SCF_KEY_IMPLICIT:
 			pr_debug("implicit key\n");
 			break;
 
 		case IEEE802154_SCF_KEY_INDEX:
-			pr_debug("key %02x\n", hdr.sec.key_id);
+			pr_debug("key %02x\n", hdr->sec.key_id);
 			break;
 
 		case IEEE802154_SCF_KEY_SHORT_INDEX:
 			pr_debug("key %04x:%04x %02x\n",
-				 le32_to_cpu(hdr.sec.short_src) >> 16,
-				 le32_to_cpu(hdr.sec.short_src) & 0xffff,
-				 hdr.sec.key_id);
+				 le32_to_cpu(hdr->sec.short_src) >> 16,
+				 le32_to_cpu(hdr->sec.short_src) & 0xffff,
+				 hdr->sec.key_id);
 			break;
 
 		case IEEE802154_SCF_KEY_HW_INDEX:
-			key = swab64((__force u64) hdr.sec.extended_src);
+			key = swab64((__force u64) hdr->sec.extended_src);
 			pr_debug("key source %8phC %02x\n", &key,
-				 hdr.sec.key_id);
+				 hdr->sec.key_id);
 			break;
 		}
-
-		return -EINVAL;
 	}
 
 	return 0;
@@ -483,10 +567,10 @@
 void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
 {
 	int ret;
-	struct sk_buff *sskb;
 	struct mac802154_sub_if_data *sdata;
+	struct ieee802154_hdr hdr;
 
-	ret = mac802154_parse_frame_start(skb);
+	ret = mac802154_parse_frame_start(skb, &hdr);
 	if (ret) {
 		pr_debug("got invalid frame\n");
 		return;
@@ -494,12 +578,16 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &priv->slaves, list) {
-		if (sdata->type != IEEE802154_DEV_WPAN)
+		if (sdata->type != IEEE802154_DEV_WPAN ||
+		    !netif_running(sdata->dev))
 			continue;
 
-		sskb = skb_clone(skb, GFP_ATOMIC);
-		if (sskb)
-			mac802154_subif_frame(sdata, sskb);
+		mac802154_subif_frame(sdata, skb, &hdr);
+		skb = NULL;
+		break;
 	}
 	rcu_read_unlock();
+
+	if (skb)
+		kfree_skb(skb);
 }

diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 851cd88..6b38d08 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c

@@ -33,6 +33,7 @@
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP |
 				  SKB_GSO_MPLS)))
 		goto out;

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1172083..ec8114f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c

@@ -271,10 +271,7 @@
 {
 	pr_debug("%p: free with %s\n", members,
 		 is_vmalloc_addr(members) ? "vfree" : "kfree");
-	if (is_vmalloc_addr(members))
-		vfree(members);
-	else
-		kfree(members);
+	kvfree(members);
 }
 EXPORT_SYMBOL_GPL(ip_set_free);
 

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3d2d2c8..e683675 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c

@@ -97,7 +97,7 @@
 		return "ICMPv6";
 #endif
 	default:
-		sprintf(buf, "IP_%d", proto);
+		sprintf(buf, "IP_%u", proto);
 		return buf;
 	}
 }

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e..73ba1cc 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c

@@ -562,7 +562,7 @@
 	ip_send_check(iph);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -590,7 +590,7 @@
 		goto tx_error;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -684,7 +684,7 @@
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -774,7 +774,7 @@
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();
@@ -883,10 +883,10 @@
 	iph->daddr		=	cp->daddr.ip;
 	iph->saddr		=	saddr;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -974,7 +974,7 @@
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -1023,7 +1023,7 @@
 	ip_send_check(ip_hdr(skb));
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -1060,7 +1060,7 @@
 	}
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -1157,7 +1157,7 @@
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -1249,7 +1249,7 @@
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 52ca952..09096a6 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c

@@ -358,6 +358,19 @@
 	rcu_read_unlock();
 }
 
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	if (nat)
+		return nat;
+
+	if (!nf_ct_is_confirmed(ct))
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+	return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
@@ -368,14 +381,9 @@
 	struct nf_conn_nat *nat;
 
 	/* nat helper or nfctnetlink also setup binding */
-	nat = nfct_nat(ct);
-	if (!nat) {
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159d..624e083 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c

@@ -88,6 +88,45 @@
 	return ERR_PTR(-EAFNOSUPPORT);
 }
 
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 const struct sk_buff *skb,
+			 const struct nlmsghdr *nlh,
+			 struct nft_af_info *afi,
+			 struct nft_table *table,
+			 struct nft_chain *chain,
+			 const struct nlattr * const *nla)
+{
+	ctx->net	= sock_net(skb->sk);
+	ctx->afi	= afi;
+	ctx->table	= table;
+	ctx->chain	= chain;
+	ctx->nla   	= nla;
+	ctx->portid	= NETLINK_CB(skb).portid;
+	ctx->report	= nlmsg_report(nlh);
+	ctx->seq	= nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+					 u32 size)
+{
+	struct nft_trans *trans;
+
+	trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+	if (trans == NULL)
+		return NULL;
+
+	trans->msg_type = msg_type;
+	trans->ctx	= *ctx;
+
+	return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+	list_del(&trans->list);
+	kfree(trans);
+}
+
 /*
  * Tables
  */
@@ -197,20 +236,13 @@
 	return -1;
 }
 
-static int nf_tables_table_notify(const struct sk_buff *oskb,
-				  const struct nlmsghdr *nlh,
-				  const struct nft_table *table,
-				  int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-	u32 seq = nlh ? nlh->nlmsg_seq : 0;
-	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
-	bool report;
 	int err;
 
-	report = nlh ? nlmsg_report(nlh) : false;
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -218,18 +250,20 @@
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
-					family, table);
+	err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+					ctx->afi->family, ctx->table);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -269,6 +303,9 @@
 	return skb->len;
 }
 
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE	(1 << 15)
+
 static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -295,6 +332,8 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -343,7 +382,7 @@
 	return err;
 }
 
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
 				   struct nft_table *table)
 {
 	struct nft_chain *chain;
@@ -353,45 +392,63 @@
 			nf_unregister_hooks(nft_base_chain(chain)->ops,
 					    afi->nops);
 	}
-
-	return 0;
 }
 
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
-			      const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	int family = nfmsg->nfgen_family, ret = 0;
+	struct nft_trans *trans;
+	u32 flags;
+	int ret = 0;
 
-	if (nla[NFTA_TABLE_FLAGS]) {
-		u32 flags;
+	if (!ctx->nla[NFTA_TABLE_FLAGS])
+		return 0;
 
-		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
-		if (flags & ~NFT_TABLE_F_DORMANT)
-			return -EINVAL;
+	flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+	if (flags & ~NFT_TABLE_F_DORMANT)
+		return -EINVAL;
 
-		if ((flags & NFT_TABLE_F_DORMANT) &&
-		    !(table->flags & NFT_TABLE_F_DORMANT)) {
-			ret = nf_tables_table_disable(afi, table);
-			if (ret >= 0)
-				table->flags |= NFT_TABLE_F_DORMANT;
-		} else if (!(flags & NFT_TABLE_F_DORMANT) &&
-			   table->flags & NFT_TABLE_F_DORMANT) {
-			ret = nf_tables_table_enable(afi, table);
-			if (ret >= 0)
-				table->flags &= ~NFT_TABLE_F_DORMANT;
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+				sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if ((flags & NFT_TABLE_F_DORMANT) &&
+	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+		nft_trans_table_enable(trans) = false;
+	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
+		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
+		ret = nf_tables_table_enable(ctx->afi, ctx->table);
+		if (ret >= 0) {
+			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+			nft_trans_table_enable(trans) = true;
 		}
-		if (ret < 0)
-			goto err;
 	}
+	if (ret < 0)
+		goto err;
 
-	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+	nft_trans_table_update(trans) = true;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	return 0;
 err:
+	nft_trans_destroy(trans);
 	return ret;
 }
 
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWTABLE)
+		ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	return 0;
+}
+
 static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -403,6 +460,8 @@
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 	u32 flags = 0;
+	struct nft_ctx ctx;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, true);
 	if (IS_ERR(afi))
@@ -417,11 +476,15 @@
 	}
 
 	if (table != NULL) {
+		if (table->flags & NFT_TABLE_INACTIVE)
+			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
-		return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+		nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+		return nf_tables_updtable(&ctx);
 	}
 
 	if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +507,14 @@
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+	if (err < 0) {
+		kfree(table);
+		module_put(afi->owner);
+		return err;
+	}
 	list_add_tail(&table->list, &afi->tables);
-	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
 	return 0;
 }
 
@@ -457,7 +526,8 @@
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
-	int family = nfmsg->nfgen_family;
+	int family = nfmsg->nfgen_family, err;
+	struct nft_ctx ctx;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -466,17 +536,28 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-
-	if (!list_empty(&table->chains) || !list_empty(&table->sets))
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
+	if (table->use > 0)
 		return -EBUSY;
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+	err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+	if (err < 0)
+		return err;
+
 	list_del(&table->list);
-	nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
-	kfree(table);
-	module_put(afi->owner);
 	return 0;
 }
 
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+	BUG_ON(ctx->table->use > 0);
+
+	kfree(ctx->table);
+	module_put(ctx->afi->owner);
+}
+
 int nft_register_chain_type(const struct nf_chain_type *ctype)
 {
 	int err = 0;
@@ -541,7 +622,7 @@
 				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
 	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
 	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
-	[NFTA_CHAIN_TYPE]	= { .type = NLA_NUL_STRING },
+	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING },
 	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
 };
 
@@ -637,21 +718,13 @@
 	return -1;
 }
 
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
-				  const struct nlmsghdr *nlh,
-				  const struct nft_table *table,
-				  const struct nft_chain *chain,
-				  int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
-	u32 seq = nlh ? nlh->nlmsg_seq : 0;
-	bool report;
 	int err;
 
-	report = nlh ? nlmsg_report(nlh) : false;
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -659,18 +732,21 @@
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
-					table, chain);
+	err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+					ctx->afi->family, ctx->table,
+					ctx->chain);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -740,10 +816,14 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -767,8 +847,7 @@
 	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
 };
 
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
 {
 	struct nlattr *tb[NFTA_COUNTER_MAX+1];
 	struct nft_stats __percpu *newstats;
@@ -777,14 +856,14 @@
 
 	err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
 	if (err < 0)
-		return err;
+		return ERR_PTR(err);
 
 	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	newstats = alloc_percpu(struct nft_stats);
 	if (newstats == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* Restore old counters on this cpu, no problem. Per-cpu statistics
 	 * are not exposed to userspace.
@@ -793,6 +872,12 @@
 	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
 	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
 
+	return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+				    struct nft_stats __percpu *newstats)
+{
 	if (chain->stats) {
 		struct nft_stats __percpu *oldstats =
 				nft_dereference(chain->stats);
@@ -802,17 +887,43 @@
 		free_percpu(oldstats);
 	} else
 		rcu_assign_pointer(chain->stats, newstats);
+}
 
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWCHAIN)
+		ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
 }
 
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+	BUG_ON(chain->use > 0);
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		module_put(nft_base_chain(chain)->type->owner);
+		free_percpu(nft_base_chain(chain)->stats);
+		kfree(nft_base_chain(chain));
+	} else {
+		kfree(chain);
+	}
+}
+
 static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nlattr * uninitialized_var(name);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_base_chain *basechain = NULL;
@@ -822,8 +933,10 @@
 	u8 policy = NF_ACCEPT;
 	u64 handle = 0;
 	unsigned int i;
+	struct nft_stats __percpu *stats;
 	int err;
 	bool create;
+	struct nft_ctx ctx;
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
@@ -869,6 +982,11 @@
 	}
 
 	if (chain != NULL) {
+		struct nft_stats *stats = NULL;
+		struct nft_trans *trans;
+
+		if (chain->flags & NFT_CHAIN_INACTIVE)
+			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -882,19 +1000,31 @@
 			if (!(chain->flags & NFT_BASE_CHAIN))
 				return -EOPNOTSUPP;
 
-			err = nf_tables_counters(nft_base_chain(chain),
-						 nla[NFTA_CHAIN_COUNTERS]);
-			if (err < 0)
-				return err;
+			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+			if (IS_ERR(stats))
+				return PTR_ERR(stats);
 		}
 
+		nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+					sizeof(struct nft_trans_chain));
+		if (trans == NULL)
+			return -ENOMEM;
+
+		nft_trans_chain_stats(trans) = stats;
+		nft_trans_chain_update(trans) = true;
+
 		if (nla[NFTA_CHAIN_POLICY])
-			nft_base_chain(chain)->policy = policy;
+			nft_trans_chain_policy(trans) = policy;
+		else
+			nft_trans_chain_policy(trans) = -1;
 
-		if (nla[NFTA_CHAIN_HANDLE] && name)
-			nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
-
-		goto notify;
+		if (nla[NFTA_CHAIN_HANDLE] && name) {
+			nla_strlcpy(nft_trans_chain_name(trans), name,
+				    NFT_CHAIN_MAXNAMELEN);
+		}
+		list_add_tail(&trans->list, &net->nft.commit_list);
+		return 0;
 	}
 
 	if (table->use == UINT_MAX)
@@ -939,23 +1069,21 @@
 			return -ENOMEM;
 
 		if (nla[NFTA_CHAIN_COUNTERS]) {
-			err = nf_tables_counters(basechain,
-						 nla[NFTA_CHAIN_COUNTERS]);
-			if (err < 0) {
+			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
-				return err;
+				return PTR_ERR(stats);
 			}
+			basechain->stats = stats;
 		} else {
-			struct nft_stats __percpu *newstats;
-
-			newstats = alloc_percpu(struct nft_stats);
-			if (newstats == NULL) {
+			stats = alloc_percpu(struct nft_stats);
+			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
-				return -ENOMEM;
+				return PTR_ERR(stats);
 			}
-			rcu_assign_pointer(basechain->stats, newstats);
+			rcu_assign_pointer(basechain->stats, stats);
 		}
 
 		basechain->type = type;
@@ -992,31 +1120,27 @@
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
 	    chain->flags & NFT_BASE_CHAIN) {
 		err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
-		if (err < 0) {
-			module_put(basechain->type->owner);
-			free_percpu(basechain->stats);
-			kfree(basechain);
-			return err;
-		}
+		if (err < 0)
+			goto err1;
 	}
-	list_add_tail(&chain->list, &table->chains);
+
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+	err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+	if (err < 0)
+		goto err2;
+
 	table->use++;
-notify:
-	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
-			       family);
+	list_add_tail(&chain->list, &table->chains);
 	return 0;
-}
-
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
-	BUG_ON(chain->use > 0);
-
-	if (chain->flags & NFT_BASE_CHAIN) {
-		module_put(nft_base_chain(chain)->type->owner);
-		free_percpu(nft_base_chain(chain)->stats);
-		kfree(nft_base_chain(chain));
-	} else
-		kfree(chain);
+err2:
+	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+	    chain->flags & NFT_BASE_CHAIN) {
+		nf_unregister_hooks(nft_base_chain(chain)->ops,
+				    afi->nops);
+	}
+err1:
+	nf_tables_chain_destroy(chain);
+	return err;
 }
 
 static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1024,11 +1148,13 @@
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct nft_ctx ctx;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1037,48 +1163,27 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-
-	if (!list_empty(&chain->rules) || chain->use > 0)
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
+	if (chain->use > 0)
 		return -EBUSY;
 
-	list_del(&chain->list);
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+	err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+	if (err < 0)
+		return err;
+
 	table->use--;
-
-	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
-	    chain->flags & NFT_BASE_CHAIN)
-		nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
-	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
-			       family);
-
-	/* Make sure all rule references are gone before this is released */
-	synchronize_rcu();
-
-	nf_tables_chain_destroy(chain);
+	list_del(&chain->list);
 	return 0;
 }
 
-static void nft_ctx_init(struct nft_ctx *ctx,
-			 const struct sk_buff *skb,
-			 const struct nlmsghdr *nlh,
-			 const struct nft_af_info *afi,
-			 const struct nft_table *table,
-			 const struct nft_chain *chain,
-			 const struct nlattr * const *nla)
-{
-	ctx->net   = sock_net(skb->sk);
-	ctx->skb   = skb;
-	ctx->nlh   = nlh;
-	ctx->afi   = afi;
-	ctx->table = table;
-	ctx->chain = chain;
-	ctx->nla   = nla;
-}
-
 /*
  * Expressions
  */
@@ -1093,7 +1198,10 @@
 int nft_register_expr(struct nft_expr_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail(&type->list, &nf_tables_expressions);
+	if (type->family == NFPROTO_UNSPEC)
+		list_add_tail(&type->list, &nf_tables_expressions);
+	else
+		list_add(&type->list, &nf_tables_expressions);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -1361,22 +1469,15 @@
 	return -1;
 }
 
-static int nf_tables_rule_notify(const struct sk_buff *oskb,
-				 const struct nlmsghdr *nlh,
-				 const struct nft_table *table,
-				 const struct nft_chain *chain,
+static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 				 const struct nft_rule *rule,
-				 int event, u32 flags, int family)
+				 int event)
 {
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(oskb).portid;
-	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
-	u32 seq = nlh->nlmsg_seq;
-	bool report;
 	int err;
 
-	report = nlmsg_report(nlh);
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -1384,18 +1485,21 @@
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
-				       family, table, chain, rule);
+	err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
+				       ctx->afi->family, ctx->table,
+				       ctx->chain, rule);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -1511,10 +1615,14 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
 
 	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule))
@@ -1554,37 +1662,36 @@
 	kfree(rule);
 }
 
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+					    struct nft_rule *rule)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+	if (trans == NULL)
+		return NULL;
+
+	nft_trans_rule(trans) = rule;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return trans;
+}
+
 #define NFT_RULE_MAXEXPRS	128
 
 static struct nft_expr_info *info;
 
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
-{
-	struct nft_rule_trans *rupd;
-
-	rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
-	if (rupd == NULL)
-	       return NULL;
-
-	rupd->ctx = *ctx;
-	rupd->rule = rule;
-	list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
-
-	return rupd;
-}
-
 static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
-	struct nft_rule_trans *repl = NULL;
+	struct nft_trans *trans = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
@@ -1682,8 +1789,9 @@
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
 		if (nft_rule_is_active_next(net, old_rule)) {
-			repl = nf_tables_trans_add(&ctx, old_rule);
-			if (repl == NULL) {
+			trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
+						   old_rule);
+			if (trans == NULL) {
 				err = -ENOMEM;
 				goto err2;
 			}
@@ -1705,19 +1813,19 @@
 			list_add_rcu(&rule->list, &chain->rules);
 	}
 
-	if (nf_tables_trans_add(&ctx, rule) == NULL) {
+	if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
 		err = -ENOMEM;
 		goto err3;
 	}
+	chain->use++;
 	return 0;
 
 err3:
 	list_del_rcu(&rule->list);
-	if (repl) {
-		list_del_rcu(&repl->rule->list);
-		list_del(&repl->list);
-		nft_rule_clear(net, repl->rule);
-		kfree(repl);
+	if (trans) {
+		list_del_rcu(&nft_trans_rule(trans)->list);
+		nft_rule_clear(net, nft_trans_rule(trans));
+		nft_trans_destroy(trans);
 	}
 err2:
 	nf_tables_rule_destroy(&ctx, rule);
@@ -1734,9 +1842,10 @@
 {
 	/* You cannot delete the same rule twice */
 	if (nft_rule_is_active_next(ctx->net, rule)) {
-		if (nf_tables_trans_add(ctx, rule) == NULL)
+		if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
 			return -ENOMEM;
 		nft_rule_disactivate_next(ctx->net, rule);
+		ctx->chain->use--;
 		return 0;
 	}
 	return -ENOENT;
@@ -1760,9 +1869,9 @@
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
-	const struct nft_table *table;
+	struct nft_table *table;
 	struct nft_chain *chain = NULL;
 	struct nft_rule *rule;
 	int family = nfmsg->nfgen_family, err = 0;
@@ -1775,6 +1884,8 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	if (nla[NFTA_RULE_CHAIN]) {
 		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1807,88 +1918,6 @@
 	return err;
 }
 
-static int nf_tables_commit(struct sk_buff *skb)
-{
-	struct net *net = sock_net(skb->sk);
-	struct nft_rule_trans *rupd, *tmp;
-
-	/* Bump generation counter, invalidate any dump in progress */
-	net->nft.genctr++;
-
-	/* A new generation has just started */
-	net->nft.gencursor = gencursor_next(net);
-
-	/* Make sure all packets have left the previous generation before
-	 * purging old rules.
-	 */
-	synchronize_rcu();
-
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		/* This rule was inactive in the past and just became active.
-		 * Clear the next bit of the genmask since its meaning has
-		 * changed, now it is the future.
-		 */
-		if (nft_rule_is_active(net, rupd->rule)) {
-			nft_rule_clear(net, rupd->rule);
-			nf_tables_rule_notify(skb, rupd->ctx.nlh,
-					      rupd->ctx.table, rupd->ctx.chain,
-					      rupd->rule, NFT_MSG_NEWRULE, 0,
-					      rupd->ctx.afi->family);
-			list_del(&rupd->list);
-			kfree(rupd);
-			continue;
-		}
-
-		/* This rule is in the past, get rid of it */
-		list_del_rcu(&rupd->rule->list);
-		nf_tables_rule_notify(skb, rupd->ctx.nlh,
-				      rupd->ctx.table, rupd->ctx.chain,
-				      rupd->rule, NFT_MSG_DELRULE, 0,
-				      rupd->ctx.afi->family);
-	}
-
-	/* Make sure we don't see any packet traversing old rules */
-	synchronize_rcu();
-
-	/* Now we can safely release unused old rules */
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
-		list_del(&rupd->list);
-		kfree(rupd);
-	}
-
-	return 0;
-}
-
-static int nf_tables_abort(struct sk_buff *skb)
-{
-	struct net *net = sock_net(skb->sk);
-	struct nft_rule_trans *rupd, *tmp;
-
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		if (!nft_rule_is_active_next(net, rupd->rule)) {
-			nft_rule_clear(net, rupd->rule);
-			list_del(&rupd->list);
-			kfree(rupd);
-			continue;
-		}
-
-		/* This rule is inactive, get rid of it */
-		list_del_rcu(&rupd->rule->list);
-	}
-
-	/* Make sure we don't see any packet accessing aborted rules */
-	synchronize_rcu();
-
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
-		list_del(&rupd->list);
-		kfree(rupd);
-	}
-
-	return 0;
-}
-
 /*
  * Sets
  */
@@ -1912,9 +1941,18 @@
 }
 EXPORT_SYMBOL_GPL(nft_unregister_set);
 
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+		   const struct nft_set_desc *desc,
+		   enum nft_set_policies policy)
 {
-	const struct nft_set_ops *ops;
+	const struct nft_set_ops *ops, *bops;
+	struct nft_set_estimate est, best;
 	u32 features;
 
 #ifdef CONFIG_MODULES
@@ -1932,15 +1970,45 @@
 		features &= NFT_SET_INTERVAL | NFT_SET_MAP;
 	}
 
-	// FIXME: implement selection properly
+	bops	   = NULL;
+	best.size  = ~0;
+	best.class = ~0;
+
 	list_for_each_entry(ops, &nf_tables_set_ops, list) {
 		if ((ops->features & features) != features)
 			continue;
+		if (!ops->estimate(desc, features, &est))
+			continue;
+
+		switch (policy) {
+		case NFT_SET_POL_PERFORMANCE:
+			if (est.class < best.class)
+				break;
+			if (est.class == best.class && est.size < best.size)
+				break;
+			continue;
+		case NFT_SET_POL_MEMORY:
+			if (est.size < best.size)
+				break;
+			if (est.size == best.size && est.class < best.class)
+				break;
+			continue;
+		default:
+			break;
+		}
+
 		if (!try_module_get(ops->owner))
 			continue;
-		return ops;
+		if (bops != NULL)
+			module_put(bops->owner);
+
+		bops = ops;
+		best = est;
 	}
 
+	if (bops != NULL)
+		return bops;
+
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
@@ -1953,6 +2021,13 @@
 	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
+	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
+	[NFTA_SET_ID]			= { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+	[NFTA_SET_DESC_SIZE]		= { .type = NLA_U32 },
 };
 
 static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1962,8 +2037,8 @@
 {
 	struct net *net = sock_net(skb->sk);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi = NULL;
-	const struct nft_table *table = NULL;
+	struct nft_af_info *afi = NULL;
+	struct nft_table *table = NULL;
 
 	if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
 		afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1978,6 +2053,8 @@
 		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
+		if (table->flags & NFT_TABLE_INACTIVE)
+			return -ENOENT;
 	}
 
 	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1999,13 +2076,27 @@
 	return ERR_PTR(-ENOENT);
 }
 
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+					  const struct nlattr *nla)
+{
+	struct nft_trans *trans;
+	u32 id = ntohl(nla_get_be32(nla));
+
+	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		if (trans->msg_type == NFT_MSG_NEWSET &&
+		    id == nft_trans_set_id(trans))
+			return nft_trans_set(trans);
+	}
+	return ERR_PTR(-ENOENT);
+}
+
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 				    const char *name)
 {
 	const struct nft_set *i;
 	const char *p;
 	unsigned long *inuse;
-	unsigned int n = 0;
+	unsigned int n = 0, min = 0;
 
 	p = strnchr(name, IFNAMSIZ, '%');
 	if (p != NULL) {
@@ -2015,23 +2106,28 @@
 		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 		if (inuse == NULL)
 			return -ENOMEM;
-
+cont:
 		list_for_each_entry(i, &ctx->table->sets, list) {
 			int tmp;
 
 			if (!sscanf(i->name, name, &tmp))
 				continue;
-			if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
+			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
 				continue;
 
-			set_bit(tmp, inuse);
+			set_bit(tmp - min, inuse);
 		}
 
 		n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+		if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+			min += BITS_PER_BYTE * PAGE_SIZE;
+			memset(inuse, 0, PAGE_SIZE);
+			goto cont;
+		}
 		free_page((unsigned long)inuse);
 	}
 
-	snprintf(set->name, sizeof(set->name), name, n);
+	snprintf(set->name, sizeof(set->name), name, min + n);
 	list_for_each_entry(i, &ctx->table->sets, list) {
 		if (!strcmp(set->name, i->name))
 			return -ENFILE;
@@ -2044,8 +2140,9 @@
 {
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	u32 seq = ctx->nlh->nlmsg_seq;
+	struct nlattr *desc;
+	u32 portid = ctx->portid;
+	u32 seq = ctx->seq;
 
 	event |= NFNL_SUBSYS_NFTABLES << 8;
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2077,6 +2174,14 @@
 			goto nla_put_failure;
 	}
 
+	desc = nla_nest_start(skb, NFTA_SET_DESC);
+	if (desc == NULL)
+		goto nla_put_failure;
+	if (set->size &&
+	    nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+		goto nla_put_failure;
+	nla_nest_end(skb, desc);
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -2086,19 +2191,18 @@
 
 static int nf_tables_set_notify(const struct nft_ctx *ctx,
 				const struct nft_set *set,
-				int event)
+				int event, gfp_t gfp_flags)
 {
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	bool report;
+	u32 portid = ctx->portid;
 	int err;
 
-	report = nlmsg_report(ctx->nlh);
-	if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
 	if (skb == NULL)
 		goto err;
 
@@ -2108,8 +2212,8 @@
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+			     ctx->report, gfp_flags);
 err:
 	if (err < 0)
 		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2183,7 +2287,7 @@
 {
 	const struct nft_set *set;
 	unsigned int idx, s_idx = cb->args[0];
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
 	struct net *net = sock_net(skb->sk);
 	int cur_family = cb->args[3];
@@ -2260,6 +2364,8 @@
 	return ret;
 }
 
+#define NFT_SET_INACTIVE	(1 << 15)	/* Internal set flag */
+
 static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2289,6 +2395,8 @@
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
@@ -2305,13 +2413,50 @@
 	return err;
 }
 
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+				    struct nft_set_desc *desc,
+				    const struct nlattr *nla)
+{
+	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+	if (err < 0)
+		return err;
+
+	if (da[NFTA_SET_DESC_SIZE] != NULL)
+		desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+	return 0;
+}
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+			     struct nft_set *set)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+		nft_trans_set_id(trans) =
+			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+		set->flags |= NFT_SET_INACTIVE;
+	}
+	nft_trans_set(trans) = set;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+}
+
 static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_set_ops *ops;
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
 	struct nft_table *table;
 	struct nft_set *set;
@@ -2319,14 +2464,18 @@
 	char name[IFNAMSIZ];
 	unsigned int size;
 	bool create;
-	u32 ktype, klen, dlen, dtype, flags;
+	u32 ktype, dtype, flags, policy;
+	struct nft_set_desc desc;
 	int err;
 
 	if (nla[NFTA_SET_TABLE] == NULL ||
 	    nla[NFTA_SET_NAME] == NULL ||
-	    nla[NFTA_SET_KEY_LEN] == NULL)
+	    nla[NFTA_SET_KEY_LEN] == NULL ||
+	    nla[NFTA_SET_ID] == NULL)
 		return -EINVAL;
 
+	memset(&desc, 0, sizeof(desc));
+
 	ktype = NFT_DATA_VALUE;
 	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
 		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2334,8 +2483,8 @@
 			return -EINVAL;
 	}
 
-	klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
-	if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+	if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
 		return -EINVAL;
 
 	flags = 0;
@@ -2347,7 +2496,6 @@
 	}
 
 	dtype = 0;
-	dlen  = 0;
 	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
 		if (!(flags & NFT_SET_MAP))
 			return -EINVAL;
@@ -2360,15 +2508,25 @@
 		if (dtype != NFT_DATA_VERDICT) {
 			if (nla[NFTA_SET_DATA_LEN] == NULL)
 				return -EINVAL;
-			dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
-			if (dlen == 0 ||
-			    dlen > FIELD_SIZEOF(struct nft_data, data))
+			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+			if (desc.dlen == 0 ||
+			    desc.dlen > FIELD_SIZEOF(struct nft_data, data))
 				return -EINVAL;
 		} else
-			dlen = sizeof(struct nft_data);
+			desc.dlen = sizeof(struct nft_data);
 	} else if (flags & NFT_SET_MAP)
 		return -EINVAL;
 
+	policy = NFT_SET_POL_PERFORMANCE;
+	if (nla[NFTA_SET_POLICY] != NULL)
+		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+	if (nla[NFTA_SET_DESC] != NULL) {
+		err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+		if (err < 0)
+			return err;
+	}
+
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2399,7 +2557,7 @@
 	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 
-	ops = nft_select_set_ops(nla);
+	ops = nft_select_set_ops(nla, &desc, policy);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 
@@ -2420,17 +2578,22 @@
 	INIT_LIST_HEAD(&set->bindings);
 	set->ops   = ops;
 	set->ktype = ktype;
-	set->klen  = klen;
+	set->klen  = desc.klen;
 	set->dtype = dtype;
-	set->dlen  = dlen;
+	set->dlen  = desc.dlen;
 	set->flags = flags;
+	set->size  = desc.size;
 
-	err = ops->init(set, nla);
+	err = ops->init(set, &desc, nla);
+	if (err < 0)
+		goto err2;
+
+	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
 	if (err < 0)
 		goto err2;
 
 	list_add_tail(&set->list, &table->sets);
-	nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+	table->use++;
 	return 0;
 
 err2:
@@ -2440,16 +2603,20 @@
 	return err;
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
 {
-	list_del(&set->list);
-	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
 	set->ops->destroy(set);
 	module_put(set->ops->owner);
 	kfree(set);
 }
 
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	list_del(&set->list);
+	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+	nft_set_destroy(set);
+}
+
 static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2471,10 +2638,17 @@
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 	if (!list_empty(&set->bindings))
 		return -EBUSY;
 
-	nf_tables_set_destroy(&ctx, set);
+	err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+	if (err < 0)
+		return err;
+
+	list_del(&set->list);
+	ctx.table->use--;
 	return 0;
 }
 
@@ -2534,7 +2708,8 @@
 {
 	list_del(&binding->list);
 
-	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+	    !(set->flags & NFT_SET_INACTIVE))
 		nf_tables_set_destroy(ctx, set);
 }
 
@@ -2552,16 +2727,18 @@
 	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
 };
 
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
 				      const struct sk_buff *skb,
 				      const struct nlmsghdr *nlh,
-				      const struct nlattr * const nla[])
+				      const struct nlattr * const nla[],
+				      bool trans)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
-	const struct nft_table *table;
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 
 	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2571,6 +2748,8 @@
 	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+		return -ENOENT;
 
 	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
 	return 0;
@@ -2644,13 +2823,16 @@
 	if (err < 0)
 		return err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+					 false);
 	if (err < 0)
 		return err;
 
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	event  = NFT_MSG_NEWSETELEM;
 	event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2707,13 +2889,15 @@
 	struct nft_ctx ctx;
 	int err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
 	if (err < 0)
 		return err;
 
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -2724,7 +2908,98 @@
 	return -EOPNOTSUPP;
 }
 
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+				       const struct nft_ctx *ctx, u32 seq,
+				       u32 portid, int event, u16 flags,
+				       const struct nft_set *set,
+				       const struct nft_set_elem *elem)
+{
+	struct nfgenmsg *nfmsg;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	int err;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+			flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= ctx->afi->family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+		goto nla_put_failure;
+
+	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	err = nf_tables_fill_setelem(skb, set, elem);
+	if (err < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
+				    const struct nft_set *set,
+				    const struct nft_set_elem *elem,
+				    int event, u16 flags)
+{
+	struct net *net = ctx->net;
+	u32 portid = ctx->portid;
+	struct sk_buff *skb;
+	int err;
+
+	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+					  set, elem);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+					      int msg_type,
+					      struct nft_set *set)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+	if (trans == NULL)
+		return NULL;
+
+	nft_trans_elem_set(trans) = set;
+	return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2732,8 +3007,12 @@
 	struct nft_set_elem elem;
 	struct nft_set_binding *binding;
 	enum nft_registers dreg;
+	struct nft_trans *trans;
 	int err;
 
+	if (set->size && set->nelems == set->size)
+		return -ENFILE;
+
 	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
 			       nft_set_elem_policy);
 	if (err < 0)
@@ -2786,7 +3065,7 @@
 			struct nft_ctx bind_ctx = {
 				.afi	= ctx->afi,
 				.table	= ctx->table,
-				.chain	= binding->chain,
+				.chain	= (struct nft_chain *)binding->chain,
 			};
 
 			err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2796,12 +3075,20 @@
 		}
 	}
 
-	err = set->ops->insert(set, &elem);
-	if (err < 0)
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+	if (trans == NULL)
 		goto err3;
 
+	err = set->ops->insert(set, &elem);
+	if (err < 0)
+		goto err4;
+
+	nft_trans_elem(trans) = elem;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
 
+err4:
+	kfree(trans);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
 		nft_data_uninit(&elem.data, d2.type);
@@ -2815,35 +3102,46 @@
 				const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	int rem, err;
+	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
 	if (err < 0)
 		return err;
 
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (IS_ERR(set)) {
+		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+			set = nf_tables_set_lookup_byid(net,
+					nla[NFTA_SET_ELEM_LIST_SET_ID]);
+		}
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
+
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;
 
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_add_set_elem(&ctx, set, attr);
 		if (err < 0)
-			return err;
+			break;
+
+		set->nelems++;
 	}
-	return 0;
+	return err;
 }
 
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
 	struct nft_data_desc desc;
 	struct nft_set_elem elem;
+	struct nft_trans *trans;
 	int err;
 
 	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2867,7 +3165,12 @@
 	if (err < 0)
 		goto err2;
 
-	set->ops->remove(set, &elem);
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+	if (trans == NULL)
+		goto err2;
+
+	nft_trans_elem(trans) = elem;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 
 	nft_data_uninit(&elem.key, NFT_DATA_VALUE);
 	if (set->flags & NFT_SET_MAP)
@@ -2886,9 +3189,9 @@
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	int rem, err;
+	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
 	if (err < 0)
 		return err;
 
@@ -2901,14 +3204,16 @@
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_del_setelem(&ctx, set, attr);
 		if (err < 0)
-			return err;
+			break;
+
+		set->nelems--;
 	}
-	return 0;
+	return err;
 }
 
 static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 	[NFT_MSG_NEWTABLE] = {
-		.call		= nf_tables_newtable,
+		.call_batch	= nf_tables_newtable,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
@@ -2918,12 +3223,12 @@
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_DELTABLE] = {
-		.call		= nf_tables_deltable,
+		.call_batch	= nf_tables_deltable,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_NEWCHAIN] = {
-		.call		= nf_tables_newchain,
+		.call_batch	= nf_tables_newchain,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
@@ -2933,7 +3238,7 @@
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_DELCHAIN] = {
-		.call		= nf_tables_delchain,
+		.call_batch	= nf_tables_delchain,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
@@ -2953,7 +3258,7 @@
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_NEWSET] = {
-		.call		= nf_tables_newset,
+		.call_batch	= nf_tables_newset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
@@ -2963,12 +3268,12 @@
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_DELSET] = {
-		.call		= nf_tables_delset,
+		.call_batch	= nf_tables_delset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_NEWSETELEM] = {
-		.call		= nf_tables_newsetelem,
+		.call_batch	= nf_tables_newsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
@@ -2978,12 +3283,282 @@
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_DELSETELEM] = {
-		.call		= nf_tables_delsetelem,
+		.call_batch	= nf_tables_delsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
 };
 
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_trans_chain_name(trans)[0])
+		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+	if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+		return;
+
+	basechain = nft_base_chain(trans->ctx.chain);
+	nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+	switch (nft_trans_chain_policy(trans)) {
+	case NF_DROP:
+	case NF_ACCEPT:
+		basechain->policy = nft_trans_chain_policy(trans);
+		break;
+	}
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+	struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+	switch (trans->msg_type) {
+	case NFT_MSG_DELTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_DELCHAIN:
+		nf_tables_chain_destroy(trans->ctx.chain);
+		break;
+	case NFT_MSG_DELRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_DELSET:
+		nft_set_destroy(nft_trans_set(trans));
+		break;
+	}
+	kfree(trans);
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_trans *trans, *next;
+	struct nft_set *set;
+
+	/* Bump generation counter, invalidate any dump in progress */
+	net->nft.genctr++;
+
+	/* A new generation has just started */
+	net->nft.gencursor = gencursor_next(net);
+
+	/* Make sure all packets have left the previous generation before
+	 * purging old rules.
+	 */
+	synchronize_rcu();
+
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (!nft_trans_table_enable(trans)) {
+					nf_tables_table_disable(trans->ctx.afi,
+								trans->ctx.table);
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				}
+			} else {
+				trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+			}
+			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELTABLE:
+			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+			break;
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans))
+				nft_chain_commit_update(trans);
+			else
+				trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+
+			nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELCHAIN:
+			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+			if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+			    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+				nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+						    trans->ctx.afi->nops);
+			}
+			break;
+		case NFT_MSG_NEWRULE:
+			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nf_tables_rule_notify(&trans->ctx,
+					      nft_trans_rule(trans),
+					      NFT_MSG_NEWRULE);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELRULE:
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			nf_tables_rule_notify(&trans->ctx,
+					      nft_trans_rule(trans),
+					      NFT_MSG_DELRULE);
+			break;
+		case NFT_MSG_NEWSET:
+			nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+			/* This avoids hitting -EBUSY when deleting the table
+			 * from the transaction.
+			 */
+			if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+			    !list_empty(&nft_trans_set(trans)->bindings))
+				trans->ctx.table->use--;
+
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_NEWSET, GFP_KERNEL);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSET:
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_DELSET, GFP_KERNEL);
+			break;
+		case NFT_MSG_NEWSETELEM:
+			nf_tables_setelem_notify(&trans->ctx,
+						 nft_trans_elem_set(trans),
+						 &nft_trans_elem(trans),
+						 NFT_MSG_NEWSETELEM, 0);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSETELEM:
+			nf_tables_setelem_notify(&trans->ctx,
+						 nft_trans_elem_set(trans),
+						 &nft_trans_elem(trans),
+						 NFT_MSG_DELSETELEM, 0);
+			set = nft_trans_elem_set(trans);
+			set->ops->get(set, &nft_trans_elem(trans));
+			set->ops->remove(set, &nft_trans_elem(trans));
+			nft_trans_destroy(trans);
+			break;
+		}
+	}
+
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		list_del(&trans->list);
+		trans->ctx.nla = NULL;
+		call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
+	}
+
+	return 0;
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+	struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+	switch (trans->msg_type) {
+	case NFT_MSG_NEWTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_NEWCHAIN:
+		nf_tables_chain_destroy(trans->ctx.chain);
+		break;
+	case NFT_MSG_NEWRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_NEWSET:
+		nft_set_destroy(nft_trans_set(trans));
+		break;
+	}
+	kfree(trans);
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_trans *trans, *next;
+	struct nft_set *set;
+
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (nft_trans_table_enable(trans)) {
+					nf_tables_table_disable(trans->ctx.afi,
+								trans->ctx.table);
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				}
+				nft_trans_destroy(trans);
+			} else {
+				list_del(&trans->ctx.table->list);
+			}
+			break;
+		case NFT_MSG_DELTABLE:
+			list_add_tail(&trans->ctx.table->list,
+				      &trans->ctx.afi->tables);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans)) {
+				if (nft_trans_chain_stats(trans))
+					free_percpu(nft_trans_chain_stats(trans));
+
+				nft_trans_destroy(trans);
+			} else {
+				trans->ctx.table->use--;
+				list_del(&trans->ctx.chain->list);
+				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+				    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+					nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+							    trans->ctx.afi->nops);
+				}
+			}
+			break;
+		case NFT_MSG_DELCHAIN:
+			trans->ctx.table->use++;
+			list_add_tail(&trans->ctx.chain->list,
+				      &trans->ctx.table->chains);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWRULE:
+			trans->ctx.chain->use--;
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			break;
+		case NFT_MSG_DELRULE:
+			trans->ctx.chain->use++;
+			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWSET:
+			trans->ctx.table->use--;
+			list_del(&nft_trans_set(trans)->list);
+			break;
+		case NFT_MSG_DELSET:
+			trans->ctx.table->use++;
+			list_add_tail(&nft_trans_set(trans)->list,
+				      &trans->ctx.table->sets);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWSETELEM:
+			nft_trans_elem_set(trans)->nelems--;
+			set = nft_trans_elem_set(trans);
+			set->ops->get(set, &nft_trans_elem(trans));
+			set->ops->remove(set, &nft_trans_elem(trans));
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSETELEM:
+			nft_trans_elem_set(trans)->nelems++;
+			nft_trans_destroy(trans);
+			break;
+		}
+	}
+
+	list_for_each_entry_safe_reverse(trans, next,
+					 &net->nft.commit_list, list) {
+		list_del(&trans->list);
+		trans->ctx.nla = NULL;
+		call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
+	}
+
+	return 0;
+}
+
 static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.name		= "nf_tables",
 	.subsys_id	= NFNL_SUBSYS_NFTABLES,

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 23ef77c..c138b8f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c

@@ -399,19 +399,17 @@
 }
 
 #ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
 {
 	const struct nfnetlink_subsystem *ss;
 	int type = nfnl_group2type[group];
 
 	rcu_read_lock();
 	ss = nfnetlink_get_subsys(type);
-	if (!ss) {
-		rcu_read_unlock();
-		request_module("nfnetlink-subsys-%d", type);
-		return;
-	}
 	rcu_read_unlock();
+	if (!ss)
+		request_module("nfnetlink-subsys-%d", type);
+	return 0;
 }
 #endif
 

diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d46..2baa125 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c

@@ -32,18 +32,24 @@
 struct nf_acct {
 	atomic64_t		pkts;
 	atomic64_t		bytes;
+	unsigned long		flags;
 	struct list_head	head;
 	atomic_t		refcnt;
 	char			name[NFACCT_NAME_MAX];
 	struct rcu_head		rcu_head;
+	char			data[0];
 };
 
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
 static int
 nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
 	struct nf_acct *nfacct, *matching = NULL;
 	char *acct_name;
+	unsigned int size = 0;
+	u32 flags = 0;
 
 	if (!tb[NFACCT_NAME])
 		return -EINVAL;
@@ -68,15 +74,38 @@
 			/* reset counters if you request a replacement. */
 			atomic64_set(&matching->pkts, 0);
 			atomic64_set(&matching->bytes, 0);
+			smp_mb__before_atomic();
+			/* reset overquota flag if quota is enabled. */
+			if ((matching->flags & NFACCT_F_QUOTA))
+				clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
 			return 0;
 		}
 		return -EBUSY;
 	}
 
-	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+	if (tb[NFACCT_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+		if (flags & ~NFACCT_F_QUOTA)
+			return -EOPNOTSUPP;
+		if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+			return -EINVAL;
+		if (flags & NFACCT_F_OVERQUOTA)
+			return -EINVAL;
+
+		size += sizeof(u64);
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
 	if (nfacct == NULL)
 		return -ENOMEM;
 
+	if (flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)nfacct->data;
+
+		*quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+		nfacct->flags = flags;
+	}
+
 	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
 
 	if (tb[NFACCT_BYTES]) {
@@ -117,6 +146,9 @@
 	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
 		pkts = atomic64_xchg(&acct->pkts, 0);
 		bytes = atomic64_xchg(&acct->bytes, 0);
+		smp_mb__before_atomic();
+		if (acct->flags & NFACCT_F_QUOTA)
+			clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
 	} else {
 		pkts = atomic64_read(&acct->pkts);
 		bytes = atomic64_read(&acct->bytes);
@@ -125,7 +157,13 @@
 	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
 	    nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
 		goto nla_put_failure;
+	if (acct->flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)acct->data;
 
+		if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+		    nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+			goto nla_put_failure;
+	}
 	nlmsg_end(skb, nlh);
 	return skb->len;
 
@@ -270,6 +308,8 @@
 	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
 	[NFACCT_BYTES] = { .type = NLA_U64 },
 	[NFACCT_PKTS] = { .type = NLA_U64 },
+	[NFACCT_FLAGS] = { .type = NLA_U32 },
+	[NFACCT_QUOTA] = { .type = NLA_U64 },
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +376,50 @@
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+	int ret;
+	struct sk_buff *skb;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+				  nfacct);
+	if (ret <= 0) {
+		kfree_skb(skb);
+		return;
+	}
+	netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+			  GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	u64 now;
+	u64 *quota;
+	int ret = NFACCT_UNDERQUOTA;
+
+	/* no place here if we don't have a quota */
+	if (!(nfacct->flags & NFACCT_F_QUOTA))
+		return NFACCT_NO_QUOTA;
+
+	quota = (u64 *)nfacct->data;
+	now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+	       atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+	ret = now > *quota;
+
+	if (now >= *quota &&
+	    !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+		nfnl_overquota_report(nfacct);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
 static int __init nfnl_acct_init(void)
 {
 	int ret;

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0d41e..cc56030 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c

@@ -215,22 +215,14 @@
 		nf_ct_l3proto_module_put(family);
 }
 
-static int nft_ct_init_validate_get(const struct nft_expr *expr,
-				    const struct nlattr * const tb[])
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
 {
 	struct nft_ct *priv = nft_expr_priv(expr);
+	int err;
 
-	if (tb[NFTA_CT_DIRECTION] != NULL) {
-		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
-		switch (priv->dir) {
-		case IP_CT_DIR_ORIGINAL:
-		case IP_CT_DIR_REPLY:
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
 	switch (priv->key) {
 	case NFT_CT_STATE:
 	case NFT_CT_DIRECTION:
@@ -262,55 +254,55 @@
 		return -EOPNOTSUPP;
 	}
 
-	return 0;
-}
-
-static int nft_ct_init_validate_set(uint32_t key)
-{
-	switch (key) {
-	case NFT_CT_MARK:
-		break;
-	default:
-		return -EOPNOTSUPP;
+	if (tb[NFTA_CT_DIRECTION] != NULL) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			return -EINVAL;
+		}
 	}
 
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+	if (err < 0)
+		return err;
+
 	return 0;
 }
 
-static int nft_ct_init(const struct nft_ctx *ctx,
-		       const struct nft_expr *expr,
-		       const struct nlattr * const tb[])
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
 {
 	struct nft_ct *priv = nft_expr_priv(expr);
 	int err;
 
 	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
-
-	if (tb[NFTA_CT_DREG]) {
-		err = nft_ct_init_validate_get(expr, tb);
-		if (err < 0)
-			return err;
-
-		priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
-		err = nft_validate_output_register(priv->dreg);
-		if (err < 0)
-			return err;
-
-		err = nft_validate_data_load(ctx, priv->dreg, NULL,
-					     NFT_DATA_VALUE);
-		if (err < 0)
-			return err;
-	} else {
-		err = nft_ct_init_validate_set(priv->key);
-		if (err < 0)
-			return err;
-
-		priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
-		err = nft_validate_input_register(priv->sreg);
-		if (err < 0)
-			return err;
+	switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		break;
+#endif
+	default:
+		return -EOPNOTSUPP;
 	}
 
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
 	err = nft_ct_l3proto_try_module_get(ctx->afi->family);
 	if (err < 0)
 		return err;
@@ -370,7 +362,7 @@
 	.type		= &nft_ct_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 	.eval		= nft_ct_get_eval,
-	.init		= nft_ct_init,
+	.init		= nft_ct_get_init,
 	.destroy	= nft_ct_destroy,
 	.dump		= nft_ct_get_dump,
 };
@@ -379,7 +371,7 @@
 	.type		= &nft_ct_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 	.eval		= nft_ct_set_eval,
-	.init		= nft_ct_init,
+	.init		= nft_ct_set_init,
 	.destroy	= nft_ct_destroy,
 	.dump		= nft_ct_set_dump,
 };

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad87..4080ed6 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c

@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/list.h>
+#include <linux/log2.h>
 #include <linux/jhash.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-#define NFT_HASH_MIN_SIZE	4
+#define NFT_HASH_MIN_SIZE	4UL
 
 struct nft_hash {
 	struct nft_hash_table __rcu	*tbl;
@@ -27,7 +28,6 @@
 
 struct nft_hash_table {
 	unsigned int			size;
-	unsigned int			elements;
 	struct nft_hash_elem __rcu	*buckets[];
 };
 
@@ -76,10 +76,12 @@
 
 static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
 {
-	if (is_vmalloc_addr(tbl))
-		vfree(tbl);
-	else
-		kfree(tbl);
+	kvfree(tbl);
+}
+
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+	return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
 }
 
 static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
@@ -161,7 +163,6 @@
 			break;
 		}
 	}
-	ntbl->elements = tbl->elements;
 
 	/* Publish new table */
 	rcu_assign_pointer(priv->tbl, ntbl);
@@ -201,7 +202,6 @@
 			;
 		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
 	}
-	ntbl->elements = tbl->elements;
 
 	/* Publish new table */
 	rcu_assign_pointer(priv->tbl, ntbl);
@@ -237,10 +237,9 @@
 	h = nft_hash_data(&he->key, tbl->size, set->klen);
 	RCU_INIT_POINTER(he->next, tbl->buckets[h]);
 	rcu_assign_pointer(tbl->buckets[h], he);
-	tbl->elements++;
 
 	/* Expand table when exceeding 75% load */
-	if (tbl->elements > tbl->size / 4 * 3)
+	if (set->nelems + 1 > tbl->size / 4 * 3)
 		nft_hash_tbl_expand(set, priv);
 
 	return 0;
@@ -268,10 +267,9 @@
 	RCU_INIT_POINTER(*pprev, he->next);
 	synchronize_rcu();
 	kfree(he);
-	tbl->elements--;
 
 	/* Shrink table beneath 30% load */
-	if (tbl->elements < tbl->size * 3 / 10 &&
+	if (set->nelems - 1 < tbl->size * 3 / 10 &&
 	    tbl->size > NFT_HASH_MIN_SIZE)
 		nft_hash_tbl_shrink(set, priv);
 }
@@ -335,17 +333,23 @@
 }
 
 static int nft_hash_init(const struct nft_set *set,
+			 const struct nft_set_desc *desc,
 			 const struct nlattr * const tb[])
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_table *tbl;
+	unsigned int size;
 
 	if (unlikely(!nft_hash_rnd_initted)) {
 		get_random_bytes(&nft_hash_rnd, 4);
 		nft_hash_rnd_initted = true;
 	}
 
-	tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+	size = NFT_HASH_MIN_SIZE;
+	if (desc->size)
+		size = nft_hash_tbl_size(desc->size);
+
+	tbl = nft_hash_tbl_alloc(size);
 	if (tbl == NULL)
 		return -ENOMEM;
 	RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +373,37 @@
 	kfree(tbl);
 }
 
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	unsigned int esize;
+
+	esize = sizeof(struct nft_hash_elem);
+	if (features & NFT_SET_MAP)
+		esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+	if (desc->size) {
+		est->size = sizeof(struct nft_hash) +
+			    nft_hash_tbl_size(desc->size) *
+			    sizeof(struct nft_hash_elem *) +
+			    desc->size * esize;
+	} else {
+		/* Resizing happens when the load drops below 30% or goes
+		 * above 75%. The average of 52.5% load (approximated by 50%)
+		 * is used for the size estimation of the hash buckets,
+		 * meaning we calculate two buckets per element.
+		 */
+		est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+	}
+
+	est->class = NFT_SET_CLASS_O_1;
+
+	return true;
+}
+
 static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.privsize       = nft_hash_privsize,
+	.estimate	= nft_hash_estimate,
 	.init		= nft_hash_init,
 	.destroy	= nft_hash_destroy,
 	.get		= nft_hash_get,

diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea..6404a72 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c

@@ -56,8 +56,14 @@
 		return -EINVAL;
 
 	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (IS_ERR(set)) {
+		if (tb[NFTA_LOOKUP_SET_ID]) {
+			set = nf_tables_set_lookup_byid(ctx->net,
+							tb[NFTA_LOOKUP_SET_ID]);
+		}
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
 
 	priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
 	err = nft_validate_input_register(priv->sreg);

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39..852b178 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c

@@ -18,18 +18,11 @@
 #include <net/sock.h>
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
 
-struct nft_meta {
-	enum nft_meta_keys	key:8;
-	union {
-		enum nft_registers	dreg:8;
-		enum nft_registers	sreg:8;
-	};
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
-			      struct nft_data data[NFT_REG_MAX + 1],
-			      const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@
 err:
 	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-static void nft_meta_set_eval(const struct nft_expr *expr,
-			      struct nft_data data[NFT_REG_MAX + 1],
-			      const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@
 		WARN_ON(1);
 	}
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
 
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
 	[NFTA_META_DREG]	= { .type = NLA_U32 },
 	[NFTA_META_KEY]		= { .type = NLA_U32 },
 	[NFTA_META_SREG]	= { .type = NLA_U32 },
 };
+EXPORT_SYMBOL_GPL(nft_meta_policy);
 
-static int nft_meta_init_validate_set(uint32_t key)
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
-	switch (key) {
-	case NFT_META_MARK:
-	case NFT_META_PRIORITY:
-	case NFT_META_NFTRACE:
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
 
-static int nft_meta_init_validate_get(uint32_t key)
-{
-	switch (key) {
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
 	case NFT_META_LEN:
 	case NFT_META_PROTOCOL:
 	case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@
 #ifdef CONFIG_NETWORK_SECMARK
 	case NFT_META_SECMARK:
 #endif
-		return 0;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
-}
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
 
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-			 const struct nlattr * const tb[])
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	int err;
 
 	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-
-	if (tb[NFTA_META_DREG]) {
-		err = nft_meta_init_validate_get(priv->key);
-		if (err < 0)
-			return err;
-
-		priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
-		err = nft_validate_output_register(priv->dreg);
-		if (err < 0)
-			return err;
-
-		return nft_validate_data_load(ctx, priv->dreg, NULL,
-					      NFT_DATA_VALUE);
+	switch (priv->key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+	case NFT_META_NFTRACE:
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
-	err = nft_meta_init_validate_set(priv->key);
-	if (err < 0)
-		return err;
-
 	priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
 	err = nft_validate_input_register(priv->sreg);
 	if (err < 0)
@@ -245,9 +237,10 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
 
-static int nft_meta_get_dump(struct sk_buff *skb,
-			     const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -260,9 +253,10 @@
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
 
-static int nft_meta_set_dump(struct sk_buff *skb,
-			     const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -276,13 +270,14 @@
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
 
 static struct nft_expr_type nft_meta_type;
 static const struct nft_expr_ops nft_meta_get_ops = {
 	.type		= &nft_meta_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
 	.eval		= nft_meta_get_eval,
-	.init		= nft_meta_init,
+	.init		= nft_meta_get_init,
 	.dump		= nft_meta_get_dump,
 };
 
@@ -290,7 +285,7 @@
 	.type		= &nft_meta_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
 	.eval		= nft_meta_set_eval,
-	.init		= nft_meta_init,
+	.init		= nft_meta_set_init,
 	.dump		= nft_meta_set_dump,
 };
 

diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d..e1836ff 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c

@@ -18,6 +18,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
 struct nft_rbtree {
 	struct rb_root		root;
 };
@@ -38,6 +40,7 @@
 	const struct rb_node *parent = priv->root.rb_node;
 	int d;
 
+	spin_lock_bh(&nft_rbtree_lock);
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
@@ -53,6 +56,8 @@
 				goto out;
 			if (set->flags & NFT_SET_MAP)
 				nft_data_copy(data, rbe->data);
+
+			spin_unlock_bh(&nft_rbtree_lock);
 			return true;
 		}
 	}
@@ -62,6 +67,7 @@
 		goto found;
 	}
 out:
+	spin_unlock_bh(&nft_rbtree_lock);
 	return false;
 }
 
@@ -124,9 +130,12 @@
 	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 		nft_data_copy(rbe->data, &elem->data);
 
+	spin_lock_bh(&nft_rbtree_lock);
 	err = __nft_rbtree_insert(set, rbe);
 	if (err < 0)
 		kfree(rbe);
+
+	spin_unlock_bh(&nft_rbtree_lock);
 	return err;
 }
 
@@ -136,7 +145,9 @@
 	struct nft_rbtree *priv = nft_set_priv(set);
 	struct nft_rbtree_elem *rbe = elem->cookie;
 
+	spin_lock_bh(&nft_rbtree_lock);
 	rb_erase(&rbe->node, &priv->root);
+	spin_unlock_bh(&nft_rbtree_lock);
 	kfree(rbe);
 }
 
@@ -147,6 +158,7 @@
 	struct nft_rbtree_elem *rbe;
 	int d;
 
+	spin_lock_bh(&nft_rbtree_lock);
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
@@ -161,9 +173,11 @@
 			    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 				nft_data_copy(&elem->data, rbe->data);
 			elem->flags = rbe->flags;
+			spin_unlock_bh(&nft_rbtree_lock);
 			return 0;
 		}
 	}
+	spin_unlock_bh(&nft_rbtree_lock);
 	return -ENOENT;
 }
 
@@ -176,6 +190,7 @@
 	struct nft_set_elem elem;
 	struct rb_node *node;
 
+	spin_lock_bh(&nft_rbtree_lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
 		if (iter->count < iter->skip)
 			goto cont;
@@ -188,11 +203,14 @@
 		elem.flags = rbe->flags;
 
 		iter->err = iter->fn(ctx, set, iter, &elem);
-		if (iter->err < 0)
+		if (iter->err < 0) {
+			spin_unlock_bh(&nft_rbtree_lock);
 			return;
+		}
 cont:
 		iter->count++;
 	}
+	spin_unlock_bh(&nft_rbtree_lock);
 }
 
 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -201,6 +219,7 @@
 }
 
 static int nft_rbtree_init(const struct nft_set *set,
+			   const struct nft_set_desc *desc,
 			   const struct nlattr * const nla[])
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
@@ -215,15 +234,37 @@
 	struct nft_rbtree_elem *rbe;
 	struct rb_node *node;
 
+	spin_lock_bh(&nft_rbtree_lock);
 	while ((node = priv->root.rb_node) != NULL) {
 		rb_erase(node, &priv->root);
 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
 		nft_rbtree_elem_destroy(set, rbe);
 	}
+	spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+				struct nft_set_estimate *est)
+{
+	unsigned int nsize;
+
+	nsize = sizeof(struct nft_rbtree_elem);
+	if (features & NFT_SET_MAP)
+		nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+	if (desc->size)
+		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+	else
+		est->size = nsize;
+
+	est->class = NFT_SET_CLASS_O_LOG_N;
+
+	return true;
 }
 
 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.privsize	= nft_rbtree_privsize,
+	.estimate	= nft_rbtree_estimate,
 	.init		= nft_rbtree_init,
 	.destroy	= nft_rbtree_destroy,
 	.insert		= nft_rbtree_insert,

diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8..bbffdbd 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c

@@ -23,10 +23,11 @@
 static int bpf_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_bpf_info *info = par->matchinfo;
-	struct sock_fprog program;
+	struct sock_fprog_kern program;
 
 	program.len = info->bpf_program_num_elem;
-	program.filter = (struct sock_filter __user *) info->bpf_program;
+	program.filter = info->bpf_program;
+
 	if (sk_unattached_filter_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;

diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef..8c646ed 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c

@@ -21,11 +21,14 @@
 
 static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
+	int overquota;
 	const struct xt_nfacct_match_info *info = par->targinfo;
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	return true;
+	overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
 
 static int

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1e657cf..a9faae8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c

@@ -313,10 +313,7 @@
 
 static void recent_table_free(void *addr)
 {
-	if (is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		kfree(addr);
+	kvfree(addr);
 }
 
 static int recent_mt_check(const struct xt_mtchk_param *par,

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f22757a..15c731f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -1206,7 +1206,8 @@
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	void (*bind)(int group);
+	int (*bind)(int group);
+	void (*unbind)(int group);
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -1232,6 +1233,7 @@
 		err = -EPROTONOSUPPORT;
 	cb_mutex = nl_table[protocol].cb_mutex;
 	bind = nl_table[protocol].bind;
+	unbind = nl_table[protocol].unbind;
 	netlink_unlock_table();
 
 	if (err < 0)
@@ -1248,6 +1250,7 @@
 	nlk = nlk_sk(sock->sk);
 	nlk->module = module;
 	nlk->netlink_bind = bind;
+	nlk->netlink_unbind = unbind;
 out:
 	return err;
 
@@ -1301,6 +1304,7 @@
 			kfree_rcu(old, rcu);
 			nl_table[sk->sk_protocol].module = NULL;
 			nl_table[sk->sk_protocol].bind = NULL;
+			nl_table[sk->sk_protocol].unbind = NULL;
 			nl_table[sk->sk_protocol].flags = 0;
 			nl_table[sk->sk_protocol].registered = 0;
 		}
@@ -1478,6 +1482,19 @@
 	return err;
 }
 
+static void netlink_unbind(int group, long unsigned int groups,
+			   struct netlink_sock *nlk)
+{
+	int undo;
+
+	if (!nlk->netlink_unbind)
+		return;
+
+	for (undo = 0; undo < group; undo++)
+		if (test_bit(group, &groups))
+			nlk->netlink_unbind(undo);
+}
+
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			int addr_len)
 {
@@ -1486,6 +1503,7 @@
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err;
+	long unsigned int groups = nladdr->nl_groups;
 
 	if (addr_len < sizeof(struct sockaddr_nl))
 		return -EINVAL;
@@ -1494,7 +1512,7 @@
 		return -EINVAL;
 
 	/* Only superuser is allowed to listen multicasts */
-	if (nladdr->nl_groups) {
+	if (groups) {
 		if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
@@ -1502,37 +1520,45 @@
 			return err;
 	}
 
-	if (nlk->portid) {
+	if (nlk->portid)
 		if (nladdr->nl_pid != nlk->portid)
 			return -EINVAL;
-	} else {
+
+	if (nlk->netlink_bind && groups) {
+		int group;
+
+		for (group = 0; group < nlk->ngroups; group++) {
+			if (!test_bit(group, &groups))
+				continue;
+			err = nlk->netlink_bind(group);
+			if (!err)
+				continue;
+			netlink_unbind(group, groups, nlk);
+			return err;
+		}
+	}
+
+	if (!nlk->portid) {
 		err = nladdr->nl_pid ?
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
-		if (err)
+		if (err) {
+			netlink_unbind(nlk->ngroups - 1, groups, nlk);
 			return err;
+		}
 	}
 
-	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
+	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 		return 0;
 
 	netlink_table_grab();
 	netlink_update_subscriptions(sk, nlk->subscriptions +
-					 hweight32(nladdr->nl_groups) -
+					 hweight32(groups) -
 					 hweight32(nlk->groups[0]));
-	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
+	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
 	netlink_update_listeners(sk);
 	netlink_table_ungrab();
 
-	if (nlk->netlink_bind && nlk->groups[0]) {
-		int i;
-
-		for (i = 0; i < nlk->ngroups; i++) {
-			if (test_bit(i, nlk->groups))
-				nlk->netlink_bind(i);
-		}
-	}
-
 	return 0;
 }
 
@@ -2170,13 +2196,17 @@
 			return err;
 		if (!val || val - 1 >= nlk->ngroups)
 			return -EINVAL;
+		if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
+			err = nlk->netlink_bind(val);
+			if (err)
+				return err;
+		}
 		netlink_table_grab();
 		netlink_update_socket_mc(nlk, val,
 					 optname == NETLINK_ADD_MEMBERSHIP);
 		netlink_table_ungrab();
-
-		if (nlk->netlink_bind)
-			nlk->netlink_bind(val);
+		if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
+			nlk->netlink_unbind(val);
 
 		err = 0;
 		break;

diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed13a79..0b59d44 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h

@@ -38,7 +38,8 @@
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
 	void			(*netlink_rcv)(struct sk_buff *skb);
-	void			(*netlink_bind)(int group);
+	int			(*netlink_bind)(int group);
+	void			(*netlink_unbind)(int group);
 	struct module		*module;
 #ifdef CONFIG_NETLINK_MMAP
 	struct mutex		pg_vec_lock;
@@ -74,7 +75,8 @@
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
-	void			(*bind)(int group);
+	int			(*bind)(int group);
+	void			(*unbind)(int group);
 	bool			(*compare)(struct net *net, struct sock *sock);
 	int			registered;
 };

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a3ba3ca..76393f2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c

@@ -317,7 +317,7 @@
 	}
 }
 
-static int genl_validate_ops(struct genl_family *family)
+static int genl_validate_ops(const struct genl_family *family)
 {
 	const struct genl_ops *ops = family->ops;
 	unsigned int n_ops = family->n_ops;
@@ -337,10 +337,6 @@
 				return -EINVAL;
 	}
 
-	/* family is not registered yet, so no locking needed */
-	family->ops = ops;
-	family->n_ops = n_ops;
-
 	return 0;
 }
 

diff --git a/net/nfc/digital.h b/net/nfc/digital.h
index 3759add..71ad7ee 100644
--- a/net/nfc/digital.h
+++ b/net/nfc/digital.h

@@ -71,6 +71,7 @@
 void digital_poll_next_tech(struct nfc_digital_dev *ddev);
 
 int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech);
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech);
 int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech);
 int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech);
 

diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index e01e15d..a6ce3c6 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c

@@ -22,6 +22,8 @@
 #define DIGITAL_PROTO_NFCA_RF_TECH \
 	(NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK)
 
+#define DIGITAL_PROTO_NFCB_RF_TECH	NFC_PROTO_ISO14443_B_MASK
+
 #define DIGITAL_PROTO_NFCF_RF_TECH \
 	(NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK)
 
@@ -345,6 +347,12 @@
 		add_crc = digital_skb_add_crc_a;
 		break;
 
+	case NFC_PROTO_ISO14443_B:
+		framing = NFC_DIGITAL_FRAMING_NFCB_T4T;
+		check_crc = digital_skb_check_crc_b;
+		add_crc = digital_skb_add_crc_b;
+		break;
+
 	default:
 		pr_err("Invalid protocol %d\n", protocol);
 		return -EINVAL;
@@ -378,6 +386,8 @@
 
 void digital_poll_next_tech(struct nfc_digital_dev *ddev)
 {
+	u8 rand_mod;
+
 	digital_switch_rf(ddev, 0);
 
 	mutex_lock(&ddev->poll_lock);
@@ -387,8 +397,8 @@
 		return;
 	}
 
-	ddev->poll_tech_index = (ddev->poll_tech_index + 1) %
-				ddev->poll_tech_count;
+	get_random_bytes(&rand_mod, sizeof(rand_mod));
+	ddev->poll_tech_index = rand_mod % ddev->poll_tech_count;
 
 	mutex_unlock(&ddev->poll_lock);
 
@@ -475,6 +485,10 @@
 		digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A,
 				      digital_in_send_sens_req);
 
+	if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH)
+		digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B,
+				      digital_in_send_sensb_req);
+
 	if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) {
 		digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F,
 				      digital_in_send_sensf_req);
@@ -635,7 +649,8 @@
 		goto done;
 	}
 
-	if (ddev->curr_protocol == NFC_PROTO_ISO14443) {
+	if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+	    (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
 		rc = digital_in_iso_dep_pull_sod(ddev, resp);
 		if (rc)
 			goto done;
@@ -676,7 +691,8 @@
 		goto exit;
 	}
 
-	if (ddev->curr_protocol == NFC_PROTO_ISO14443) {
+	if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+	    (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
 		rc = digital_in_iso_dep_push_sod(ddev, skb);
 		if (rc)
 			goto exit;
@@ -747,6 +763,8 @@
 		ddev->protocols |= NFC_PROTO_ISO15693_MASK;
 	if (supported_protocols & NFC_PROTO_ISO14443_MASK)
 		ddev->protocols |= NFC_PROTO_ISO14443_MASK;
+	if (supported_protocols & NFC_PROTO_ISO14443_B_MASK)
+		ddev->protocols |= NFC_PROTO_ISO14443_B_MASK;
 
 	ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN;
 	ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN;

diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index d4ed25f..171cb99 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c

@@ -224,9 +224,8 @@
 
 	ddev->skb_add_crc(skb);
 
-	digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, target);
-
-	return 0;
+	return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res,
+				   target);
 }
 
 static int digital_in_send_rtox(struct nfc_digital_dev *ddev,

diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 278c3fe..c2c1c01 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c

@@ -41,6 +41,24 @@
 #define DIGITAL_MIFARE_READ_RES_LEN 16
 #define DIGITAL_MIFARE_ACK_RES	0x0A
 
+#define DIGITAL_CMD_SENSB_REQ			0x05
+#define DIGITAL_SENSB_ADVANCED			BIT(5)
+#define DIGITAL_SENSB_EXTENDED			BIT(4)
+#define DIGITAL_SENSB_ALLB_REQ			BIT(3)
+#define DIGITAL_SENSB_N(n)			((n) & 0x7)
+
+#define DIGITAL_CMD_SENSB_RES			0x50
+
+#define DIGITAL_CMD_ATTRIB_REQ			0x1D
+#define DIGITAL_ATTRIB_P1_TR0_DEFAULT		(0x0 << 6)
+#define DIGITAL_ATTRIB_P1_TR1_DEFAULT		(0x0 << 4)
+#define DIGITAL_ATTRIB_P1_SUPRESS_EOS		BIT(3)
+#define DIGITAL_ATTRIB_P1_SUPRESS_SOS		BIT(2)
+#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1		(0x0 << 6)
+#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1		(0x0 << 4)
+#define DIGITAL_ATTRIB_P2_MAX_FRAME_256		0x8
+#define DIGITAL_ATTRIB_P4_DID(n)		((n) & 0xf)
+
 #define DIGITAL_CMD_SENSF_REQ	0x00
 #define DIGITAL_CMD_SENSF_RES	0x01
 
@@ -75,6 +93,7 @@
 };
 
 #define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F)
+#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4)
 #define DIGITAL_ATS_MAX_FSC  256
 
 #define DIGITAL_RATS_BYTE1 0xE0
@@ -92,6 +111,32 @@
 	u8 bcc;
 } __packed;
 
+struct digital_sensb_req {
+	u8 cmd;
+	u8 afi;
+	u8 param;
+} __packed;
+
+struct digital_sensb_res {
+	u8 cmd;
+	u8 nfcid0[4];
+	u8 app_data[4];
+	u8 proto_info[3];
+} __packed;
+
+struct digital_attrib_req {
+	u8 cmd;
+	u8 nfcid0[4];
+	u8 param1;
+	u8 param2;
+	u8 param3;
+	u8 param4;
+} __packed;
+
+struct digital_attrib_res {
+	u8 mbli_did;
+} __packed;
+
 struct digital_sensf_req {
 	u8 cmd;
 	u8 sc1;
@@ -531,6 +576,175 @@
 	return -EIO;
 }
 
+static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg,
+				       struct sk_buff *resp)
+{
+	struct nfc_target *target = arg;
+	struct digital_attrib_res *attrib_res;
+	int rc;
+
+	if (IS_ERR(resp)) {
+		rc = PTR_ERR(resp);
+		resp = NULL;
+		goto exit;
+	}
+
+	if (resp->len < sizeof(*attrib_res)) {
+		PROTOCOL_ERR("12.6.2");
+		rc = -EIO;
+		goto exit;
+	}
+
+	attrib_res = (struct digital_attrib_res *)resp->data;
+
+	if (attrib_res->mbli_did & 0x0f) {
+		PROTOCOL_ERR("12.6.2.1");
+		rc = -EIO;
+		goto exit;
+	}
+
+	rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B);
+
+exit:
+	dev_kfree_skb(resp);
+	kfree(target);
+
+	if (rc)
+		digital_poll_next_tech(ddev);
+}
+
+static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev,
+			       struct nfc_target *target,
+			       struct digital_sensb_res *sensb_res)
+{
+	struct digital_attrib_req *attrib_req;
+	struct sk_buff *skb;
+	int rc;
+
+	skb = digital_skb_alloc(ddev, sizeof(*attrib_req));
+	if (!skb)
+		return -ENOMEM;
+
+	attrib_req = (struct digital_attrib_req *)skb_put(skb,
+							  sizeof(*attrib_req));
+
+	attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ;
+	memcpy(attrib_req->nfcid0, sensb_res->nfcid0,
+	       sizeof(attrib_req->nfcid0));
+	attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT |
+			     DIGITAL_ATTRIB_P1_TR1_DEFAULT;
+	attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 |
+			     DIGITAL_ATTRIB_P2_POLL_LISTEN_1 |
+			     DIGITAL_ATTRIB_P2_MAX_FRAME_256;
+	attrib_req->param3 = sensb_res->proto_info[1] & 0x07;
+	attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0);
+
+	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res,
+				 target);
+	if (rc)
+		kfree_skb(skb);
+
+	return rc;
+}
+
+static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg,
+				      struct sk_buff *resp)
+{
+	struct nfc_target *target = NULL;
+	struct digital_sensb_res *sensb_res;
+	u8 fsci;
+	int rc;
+
+	if (IS_ERR(resp)) {
+		rc = PTR_ERR(resp);
+		resp = NULL;
+		goto exit;
+	}
+
+	if (resp->len != sizeof(*sensb_res)) {
+		PROTOCOL_ERR("5.6.2.1");
+		rc = -EIO;
+		goto exit;
+	}
+
+	sensb_res = (struct digital_sensb_res *)resp->data;
+
+	if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) {
+		PROTOCOL_ERR("5.6.2");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (!(sensb_res->proto_info[1] & BIT(0))) {
+		PROTOCOL_ERR("5.6.2.12");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (sensb_res->proto_info[1] & BIT(3)) {
+		PROTOCOL_ERR("5.6.2.16");
+		rc = -EIO;
+		goto exit;
+	}
+
+	fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]);
+	if (fsci >= 8)
+		ddev->target_fsc = DIGITAL_ATS_MAX_FSC;
+	else
+		ddev->target_fsc = digital_ats_fsc[fsci];
+
+	target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL);
+	if (!target) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+
+	rc = digital_in_send_attrib_req(ddev, target, sensb_res);
+
+exit:
+	dev_kfree_skb(resp);
+
+	if (rc) {
+		kfree(target);
+		digital_poll_next_tech(ddev);
+	}
+}
+
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+	struct digital_sensb_req *sensb_req;
+	struct sk_buff *skb;
+	int rc;
+
+	rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+				     NFC_DIGITAL_RF_TECH_106B);
+	if (rc)
+		return rc;
+
+	rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+				     NFC_DIGITAL_FRAMING_NFCB);
+	if (rc)
+		return rc;
+
+	skb = digital_skb_alloc(ddev, sizeof(*sensb_req));
+	if (!skb)
+		return -ENOMEM;
+
+	sensb_req = (struct digital_sensb_req *)skb_put(skb,
+							sizeof(*sensb_req));
+
+	sensb_req->cmd = DIGITAL_CMD_SENSB_REQ;
+	sensb_req->afi = 0x00; /* All families and sub-families */
+	sensb_req->param = DIGITAL_SENSB_N(0);
+
+	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res,
+				 NULL);
+	if (rc)
+		kfree_skb(skb);
+
+	return rc;
+}
+
 static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg,
 				   struct sk_buff *resp)
 {
@@ -877,6 +1091,18 @@
 	dev_kfree_skb(resp);
 }
 
+static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev,
+		void *arg, struct sk_buff *resp)
+{
+	if (!IS_ERR(resp) && (resp->len >= 2) &&
+			(resp->data[1] == DIGITAL_CMD_SENSF_REQ))
+		digital_tg_recv_sensf_req(ddev, arg, resp);
+	else
+		digital_tg_recv_atr_req(ddev, arg, resp);
+
+	return;
+}
+
 static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
 			      struct digital_sensf_req *sensf_req)
 {
@@ -887,7 +1113,7 @@
 
 	size = sizeof(struct digital_sensf_res);
 
-	if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE)
+	if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE)
 		size -= sizeof(sensf_res->rd);
 
 	skb = digital_skb_alloc(ddev, size);
@@ -922,7 +1148,7 @@
 		digital_skb_add_crc_f(skb);
 
 	rc = digital_tg_send_cmd(ddev, skb, 300,
-				 digital_tg_recv_atr_req, NULL);
+				 digital_tg_recv_atr_or_sensf_req, NULL);
 	if (rc)
 		kfree_skb(skb);
 

diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index a9f4d2e..677d24b 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c

@@ -26,6 +26,8 @@
 
 #include "hci.h"
 
+#define MAX_FWI 4949
+
 static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 			       const u8 *param, size_t param_len,
 			       data_exchange_cb_t cb, void *cb_context)
@@ -37,7 +39,7 @@
 	 * for all commands?
 	 */
 	return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd,
-				      param, param_len, cb, cb_context, 3000);
+				      param, param_len, cb, cb_context, MAX_FWI);
 }
 
 /*
@@ -82,7 +84,7 @@
 						    NFC_HCI_HCP_COMMAND, cmd,
 						    param, param_len,
 						    nfc_hci_execute_cb, &hcp_ew,
-						    3000);
+						    MAX_FWI);
 	if (hcp_ew.exec_result < 0)
 		return hcp_ew.exec_result;
 

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index d45b638..4740370 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c

@@ -225,7 +225,7 @@
 			goto exit;
 		}
 
-		targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data);
+		targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data);
 		targets->sel_res = sak_skb->data[0];
 
 		r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE,
@@ -380,34 +380,31 @@
 	if (r < 0)
 		goto disconnect_all;
 
-	if (skb->len && skb->len == strlen(hdev->init_data.session_id))
-		if (memcmp(hdev->init_data.session_id, skb->data,
-			   skb->len) == 0) {
-			/* TODO ELa: restore gate<->pipe table from
-			 * some TBD location.
-			 * note: it doesn't seem possible to get the chip
-			 * currently open gate/pipe table.
-			 * It is only possible to obtain the supported
-			 * gate list.
-			 */
+	if (skb->len && skb->len == strlen(hdev->init_data.session_id) &&
+		(memcmp(hdev->init_data.session_id, skb->data,
+			   skb->len) == 0) && hdev->ops->load_session) {
+		/* Restore gate<->pipe table from some proprietary location. */
 
-			/* goto exit
-			 * For now, always do a full initialization */
-		}
+		r = hdev->ops->load_session(hdev);
 
-	r = nfc_hci_disconnect_all_gates(hdev);
-	if (r < 0)
-		goto exit;
+		if (r < 0)
+			goto disconnect_all;
+	} else {
 
-	r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
-				  hdev->init_data.gates);
-	if (r < 0)
-		goto disconnect_all;
+		r = nfc_hci_disconnect_all_gates(hdev);
+		if (r < 0)
+			goto exit;
 
-	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
-			      NFC_HCI_ADMIN_SESSION_IDENTITY,
-			      hdev->init_data.session_id,
-			      strlen(hdev->init_data.session_id));
+		r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
+					  hdev->init_data.gates);
+		if (r < 0)
+			goto disconnect_all;
+
+		r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+				NFC_HCI_ADMIN_SESSION_IDENTITY,
+				hdev->init_data.session_id,
+				strlen(hdev->init_data.session_id));
+	}
 	if (r == 0)
 		goto exit;
 

diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index bec6ed1..a3ad69a 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c

@@ -387,7 +387,7 @@
 
 	__net_timestamp(skb);
 
-	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);
 
 	return nfc_data_exchange(dev, local->target_idx, skb,
 				 nfc_llcp_recv, local);

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b467195..51e7887 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c

@@ -680,16 +680,17 @@
 			continue;
 
 		if (skb_copy == NULL) {
-			skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
 
 			if (skb_copy == NULL)
 				continue;
 
-			data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+			data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
 
 			data[0] = local->dev ? local->dev->idx : 0xFF;
-			data[1] = direction;
+			data[1] = direction & 0x01;
+			data[1] |= (RAW_PAYLOAD_LLCP << 1);
 		}
 
 		nskb = skb_clone(skb_copy, GFP_ATOMIC);
@@ -747,7 +748,7 @@
 			__net_timestamp(skb);
 
 			nfc_llcp_send_to_raw_sock(local, skb,
-						  NFC_LLCP_DIRECTION_TX);
+						  NFC_DIRECTION_TX);
 
 			ret = nfc_data_exchange(local->dev, local->target_idx,
 						skb, nfc_llcp_recv, local);
@@ -1476,7 +1477,7 @@
 
 	__net_timestamp(skb);
 
-	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX);
 
 	nfc_llcp_rx_skb(local, skb);
 

diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6c34ac97..2b400e1 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c

@@ -861,6 +861,10 @@
 	/* Get rid of skb owner, prior to sending to the driver. */
 	skb_orphan(skb);
 
+	/* Send copy to sniffer */
+	nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+			     RAW_PAYLOAD_NCI, NFC_DIRECTION_TX);
+
 	return ndev->ops->send(ndev, skb);
 }
 
@@ -935,6 +939,11 @@
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&ndev->rx_q))) {
+
+		/* Send copy to sniffer */
+		nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+				     RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+
 		/* Process frame */
 		switch (nci_mt(skb->data)) {
 		case NCI_MT_RSP_PKT:

diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 1e90509..f8f6af2 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c

@@ -366,7 +366,6 @@
 			struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
 {
 	struct activation_params_poll_nfc_dep *poll;
-	int i;
 
 	switch (ntf->activation_rf_tech_and_mode) {
 	case NCI_NFC_A_PASSIVE_POLL_MODE:
@@ -374,10 +373,8 @@
 		poll = &ntf->activation_params.poll_nfc_dep;
 		poll->atr_res_len = min_t(__u8, *data++, 63);
 		pr_debug("atr_res_len %d\n", poll->atr_res_len);
-		if (poll->atr_res_len > 0) {
-			for (i = 0; i < poll->atr_res_len; i++)
-				poll->atr_res[poll->atr_res_len-1-i] = data[i];
-		}
+		if (poll->atr_res_len > 0)
+			memcpy(poll->atr_res, data, poll->atr_res_len);
 		break;
 
 	default:

diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 9d6e74f..88d6006 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h

@@ -40,6 +40,12 @@
 	struct work_struct tx_work;
 	bool tx_work_scheduled;
 };
+
+struct nfc_sock_list {
+	struct hlist_head head;
+	rwlock_t          lock;
+};
+
 #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk)
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))

diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index c27a6e8..11c3544 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c

@@ -27,6 +27,24 @@
 
 #include "nfc.h"
 
+static struct nfc_sock_list raw_sk_list = {
+	.lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock)
+};
+
+static void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk)
+{
+	write_lock(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock(&l->lock);
+}
+
+static void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk)
+{
+	write_lock(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock(&l->lock);
+}
+
 static void rawsock_write_queue_purge(struct sock *sk)
 {
 	pr_debug("sk=%p\n", sk);
@@ -57,6 +75,9 @@
 	if (!sk)
 		return 0;
 
+	if (sock->type == SOCK_RAW)
+		nfc_sock_unlink(&raw_sk_list, sk);
+
 	sock_orphan(sk);
 	sock_put(sk);
 
@@ -275,6 +296,26 @@
 	.mmap           = sock_no_mmap,
 };
 
+static const struct proto_ops rawsock_raw_ops = {
+	.family         = PF_NFC,
+	.owner          = THIS_MODULE,
+	.release        = rawsock_release,
+	.bind           = sock_no_bind,
+	.connect        = sock_no_connect,
+	.socketpair     = sock_no_socketpair,
+	.accept         = sock_no_accept,
+	.getname        = sock_no_getname,
+	.poll           = datagram_poll,
+	.ioctl          = sock_no_ioctl,
+	.listen         = sock_no_listen,
+	.shutdown       = sock_no_shutdown,
+	.setsockopt     = sock_no_setsockopt,
+	.getsockopt     = sock_no_getsockopt,
+	.sendmsg        = sock_no_sendmsg,
+	.recvmsg        = rawsock_recvmsg,
+	.mmap           = sock_no_mmap,
+};
+
 static void rawsock_destruct(struct sock *sk)
 {
 	pr_debug("sk=%p\n", sk);
@@ -300,10 +341,13 @@
 
 	pr_debug("sock=%p\n", sock);
 
-	if (sock->type != SOCK_SEQPACKET)
+	if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
 		return -ESOCKTNOSUPPORT;
 
-	sock->ops = &rawsock_ops;
+	if (sock->type == SOCK_RAW)
+		sock->ops = &rawsock_raw_ops;
+	else
+		sock->ops = &rawsock_ops;
 
 	sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
 	if (!sk)
@@ -313,13 +357,53 @@
 	sk->sk_protocol = nfc_proto->id;
 	sk->sk_destruct = rawsock_destruct;
 	sock->state = SS_UNCONNECTED;
-
-	INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
-	nfc_rawsock(sk)->tx_work_scheduled = false;
+	if (sock->type == SOCK_RAW)
+		nfc_sock_link(&raw_sk_list, sk);
+	else {
+		INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
+		nfc_rawsock(sk)->tx_work_scheduled = false;
+	}
 
 	return 0;
 }
 
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+			  u8 payload_type, u8 direction)
+{
+	struct sk_buff *skb_copy = NULL, *nskb;
+	struct sock *sk;
+	u8 *data;
+
+	read_lock(&raw_sk_list.lock);
+
+	sk_for_each(sk, &raw_sk_list.head) {
+		if (!skb_copy) {
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
+			if (!skb_copy)
+				continue;
+
+			data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
+
+			data[0] = dev ? dev->idx : 0xFF;
+			data[1] = direction & 0x01;
+			data[1] |= (payload_type << 1);
+		}
+
+		nskb = skb_clone(skb_copy, GFP_ATOMIC);
+		if (!nskb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+
+	read_unlock(&raw_sk_list.lock);
+
+	kfree_skb(skb_copy);
+}
+EXPORT_SYMBOL(nfc_send_to_raw_sock);
+
 static struct proto rawsock_proto = {
 	.name     = "NFC_RAW",
 	.owner    = THIS_MODULE,

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2c77e7b..c36856a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c

@@ -134,8 +134,8 @@
 
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
-	memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
+	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a3276e3..0d407bc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c

@@ -44,11 +44,11 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
-#include <linux/lockdep.h>
 #include <linux/openvswitch.h>
 #include <linux/rculist.h>
 #include <linux/dmi.h>
-#include <linux/workqueue.h>
+#include <linux/genetlink.h>
+#include <net/genetlink.h>
 #include <net/genetlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -62,6 +62,31 @@
 
 int ovs_net_id __read_mostly;
 
+static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_flow_genl_family;
+static struct genl_family dp_datapath_genl_family;
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+	.name = OVS_FLOW_MCGROUP
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+	.name = OVS_DATAPATH_MCGROUP
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+	.name = OVS_VPORT_MCGROUP
+};
+
+/* Check if need to build a reply message.
+ * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
+static bool ovs_must_notify(struct genl_info *info,
+			    const struct genl_multicast_group *grp)
+{
+	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
+		netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+}
+
 static void ovs_notify(struct genl_family *family,
 		       struct sk_buff *skb, struct genl_info *info)
 {
@@ -173,6 +198,7 @@
 	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 }
 
+/* Called with ovs_mutex or RCU read lock. */
 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 {
 	struct vport *vport;
@@ -262,16 +288,6 @@
 	u64_stats_update_end(&stats->syncp);
 }
 
-static struct genl_family dp_packet_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_PACKET_FAMILY,
-	.version = OVS_PACKET_VERSION,
-	.maxattr = OVS_PACKET_ATTR_MAX,
-	.netnsok = true,
-	.parallel_ops = true,
-};
-
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 		  const struct dp_upcall_info *upcall_info)
 {
@@ -524,7 +540,7 @@
 		packet->protocol = htons(ETH_P_802_2);
 
 	/* Build an sw_flow for sending this packet. */
-	flow = ovs_flow_alloc(false);
+	flow = ovs_flow_alloc();
 	err = PTR_ERR(flow);
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
@@ -590,6 +606,18 @@
 	}
 };
 
+static struct genl_family dp_packet_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_PACKET_FAMILY,
+	.version = OVS_PACKET_VERSION,
+	.maxattr = OVS_PACKET_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_packet_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+};
+
 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
 			 struct ovs_dp_megaflow_stats *mega_stats)
 {
@@ -621,26 +649,6 @@
 	}
 }
 
-static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
-	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
-	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
-	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
-};
-
-static struct genl_family dp_flow_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_FLOW_FAMILY,
-	.version = OVS_FLOW_VERSION,
-	.maxattr = OVS_FLOW_ATTR_MAX,
-	.netnsok = true,
-	.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_flow_multicast_group = {
-	.name = OVS_FLOW_MCGROUP
-};
-
 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 {
 	return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -652,8 +660,8 @@
 		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
 }
 
-/* Called with ovs_mutex. */
-static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
 				  u32 seq, u32 flags, u8 cmd)
 {
@@ -670,7 +678,7 @@
 	if (!ovs_header)
 		return -EMSGSIZE;
 
-	ovs_header->dp_ifindex = get_dpifindex(dp);
+	ovs_header->dp_ifindex = dp_ifindex;
 
 	/* Fill flow key. */
 	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
@@ -693,6 +701,7 @@
 	nla_nest_end(skb, nla);
 
 	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
+
 	if (used &&
 	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 		goto nla_put_failure;
@@ -720,9 +729,9 @@
 		const struct sw_flow_actions *sf_acts;
 
 		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
-
 		err = ovs_nla_put_actions(sf_acts->actions,
 					  sf_acts->actions_len, skb);
+
 		if (!err)
 			nla_nest_end(skb, start);
 		else {
@@ -743,46 +752,190 @@
 	return err;
 }
 
-static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
-					       struct genl_info *info)
+/* May not be called with RCU read lock. */
+static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       struct genl_info *info,
+					       bool always)
 {
-	size_t len;
+	struct sk_buff *skb;
 
-	len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
+	if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+		return NULL;
 
-	return genlmsg_new_unicast(len, info, GFP_KERNEL);
+	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	return skb;
 }
 
-static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
-					       struct datapath *dp,
-					       struct genl_info *info,
-					       u8 cmd)
+/* Called with ovs_mutex. */
+static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+					       int dp_ifindex,
+					       struct genl_info *info, u8 cmd,
+					       bool always)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = ovs_flow_cmd_alloc_info(flow, info);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
+				      always);
+	if (!skb || IS_ERR(skb))
+		return skb;
 
-	retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
-					info->snd_seq, 0, cmd);
+	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+					info->snd_portid, info->snd_seq, 0,
+					cmd);
 	BUG_ON(retval < 0);
 	return skb;
 }
 
-static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow *flow, *new_flow;
+	struct sw_flow_mask mask;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct sw_flow_actions *acts;
+	struct sw_flow_match match;
+	int error;
+
+	/* Must have key and actions. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+	if (!a[OVS_FLOW_ATTR_ACTIONS])
+		goto error;
+
+	/* Most of the time we need to allocate a new flow, do it before
+	 * locking.
+	 */
+	new_flow = ovs_flow_alloc();
+	if (IS_ERR(new_flow)) {
+		error = PTR_ERR(new_flow);
+		goto error;
+	}
+
+	/* Extract key. */
+	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	error = ovs_nla_get_match(&match,
+				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	if (error)
+		goto err_kfree_flow;
+
+	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+
+	/* Validate actions. */
+	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+	error = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_kfree_flow;
+
+	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
+				     0, &acts);
+	if (error) {
+		OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+		goto err_kfree_acts;
+	}
+
+	reply = ovs_flow_cmd_alloc_info(acts, info, false);
+	if (IS_ERR(reply)) {
+		error = PTR_ERR(reply);
+		goto err_kfree_acts;
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		error = -ENODEV;
+		goto err_unlock_ovs;
+	}
+	/* Check if this is a duplicate flow */
+	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	if (likely(!flow)) {
+		rcu_assign_pointer(new_flow->sf_acts, acts);
+
+		/* Put flow in bucket. */
+		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
+		if (unlikely(error)) {
+			acts = NULL;
+			goto err_unlock_ovs;
+		}
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(new_flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+		ovs_unlock();
+	} else {
+		struct sw_flow_actions *old_acts;
+
+		/* Bail out if we're not allowed to modify an existing flow.
+		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+		 * because Generic Netlink treats the latter as a dump
+		 * request.  We also accept NLM_F_EXCL in case that bug ever
+		 * gets fixed.
+		 */
+		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
+							 | NLM_F_EXCL))) {
+			error = -EEXIST;
+			goto err_unlock_ovs;
+		}
+		/* The unmasked key has to be the same for flow updates. */
+		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+			error = -EEXIST;
+			goto err_unlock_ovs;
+		}
+		/* Update actions. */
+		old_acts = ovsl_dereference(flow->sf_acts);
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+		ovs_unlock();
+
+		ovs_nla_free_flow_actions(old_acts);
+		ovs_flow_free(new_flow, false);
+	}
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+	return 0;
+
+err_unlock_ovs:
+	ovs_unlock();
+	kfree_skb(reply);
+err_kfree_acts:
+	kfree(acts);
+err_kfree_flow:
+	ovs_flow_free(new_flow, false);
+error:
+	return error;
+}
+
+static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key, masked_key;
-	struct sw_flow *flow = NULL;
+	struct sw_flow *flow;
 	struct sw_flow_mask mask;
-	struct sk_buff *reply;
+	struct sk_buff *reply = NULL;
 	struct datapath *dp;
-	struct sw_flow_actions *acts = NULL;
+	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
 	struct sw_flow_match match;
-	bool exact_5tuple;
 	int error;
 
 	/* Extract key. */
@@ -791,7 +944,7 @@
 		goto error;
 
 	ovs_match_init(&match, &key, &mask);
-	error = ovs_nla_get_match(&match, &exact_5tuple,
+	error = ovs_nla_get_match(&match,
 				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
 	if (error)
 		goto error;
@@ -808,90 +961,75 @@
 					     &masked_key, 0, &acts);
 		if (error) {
 			OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
-			goto err_kfree;
+			goto err_kfree_acts;
 		}
-	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
-		error = -EINVAL;
-		goto error;
+	}
+
+	/* Can allocate before locking if have acts. */
+	if (acts) {
+		reply = ovs_flow_cmd_alloc_info(acts, info, false);
+		if (IS_ERR(reply)) {
+			error = PTR_ERR(reply);
+			goto err_kfree_acts;
+		}
 	}
 
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	error = -ENODEV;
-	if (!dp)
+	if (unlikely(!dp)) {
+		error = -ENODEV;
 		goto err_unlock_ovs;
-
-	/* Check if this is a duplicate flow */
+	}
+	/* Check that the flow exists. */
 	flow = ovs_flow_tbl_lookup(&dp->table, &key);
-	if (!flow) {
-		/* Bail out if we're not allowed to create a new flow. */
+	if (unlikely(!flow)) {
 		error = -ENOENT;
-		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
-			goto err_unlock_ovs;
-
-		/* Allocate flow. */
-		flow = ovs_flow_alloc(!exact_5tuple);
-		if (IS_ERR(flow)) {
-			error = PTR_ERR(flow);
-			goto err_unlock_ovs;
-		}
-
-		flow->key = masked_key;
-		flow->unmasked_key = key;
-		rcu_assign_pointer(flow->sf_acts, acts);
-
-		/* Put flow in bucket. */
-		error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
-		if (error) {
-			acts = NULL;
-			goto err_flow_free;
-		}
-
-		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
-	} else {
-		/* We found a matching flow. */
-		struct sw_flow_actions *old_acts;
-
-		/* Bail out if we're not allowed to modify an existing flow.
-		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
-		 * because Generic Netlink treats the latter as a dump
-		 * request.  We also accept NLM_F_EXCL in case that bug ever
-		 * gets fixed.
-		 */
+		goto err_unlock_ovs;
+	}
+	/* The unmasked key has to be the same for flow updates. */
+	if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
 		error = -EEXIST;
-		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
-		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
-			goto err_unlock_ovs;
-
-		/* The unmasked key has to be the same for flow updates. */
-		if (!ovs_flow_cmp_unmasked_key(flow, &match))
-			goto err_unlock_ovs;
-
-		/* Update actions. */
+		goto err_unlock_ovs;
+	}
+	/* Update actions, if present. */
+	if (likely(acts)) {
 		old_acts = ovsl_dereference(flow->sf_acts);
 		rcu_assign_pointer(flow->sf_acts, acts);
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+	} else {
+		/* Could not alloc without acts before locking. */
+		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+						info, OVS_FLOW_CMD_NEW, false);
+		if (unlikely(IS_ERR(reply))) {
+			error = PTR_ERR(reply);
+			goto err_unlock_ovs;
+		}
+	}
+
+	/* Clear stats. */
+	if (a[OVS_FLOW_ATTR_CLEAR])
+		ovs_flow_stats_clear(flow);
+	ovs_unlock();
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+	if (old_acts)
 		ovs_nla_free_flow_actions(old_acts);
 
-		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
-
-		/* Clear stats. */
-		if (a[OVS_FLOW_ATTR_CLEAR])
-			ovs_flow_stats_clear(flow);
-	}
-	ovs_unlock();
-
-	if (!IS_ERR(reply))
-		ovs_notify(&dp_flow_genl_family, reply, info);
-	else
-		genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
-			     0, PTR_ERR(reply));
 	return 0;
 
-err_flow_free:
-	ovs_flow_free(flow, false);
 err_unlock_ovs:
 	ovs_unlock();
-err_kfree:
+	kfree_skb(reply);
+err_kfree_acts:
 	kfree(acts);
 error:
 	return error;
@@ -914,7 +1052,7 @@
 	}
 
 	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
+	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
 	if (err)
 		return err;
 
@@ -931,7 +1069,8 @@
 		goto unlock;
 	}
 
-	reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+					OVS_FLOW_CMD_NEW, true);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -955,45 +1094,53 @@
 	struct sw_flow_match match;
 	int err;
 
+	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+		if (unlikely(err))
+			return err;
+	}
+
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp) {
+	if (unlikely(!dp)) {
 		err = -ENODEV;
 		goto unlock;
 	}
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
 
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
-	if (err)
-		goto unlock;
-
 	flow = ovs_flow_tbl_lookup(&dp->table, &key);
-	if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+	if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
-	reply = ovs_flow_cmd_alloc_info(flow, info);
-	if (!reply) {
-		err = -ENOMEM;
-		goto unlock;
-	}
-
 	ovs_flow_tbl_remove(&dp->table, flow);
-
-	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
-				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
-	BUG_ON(err < 0);
-
-	ovs_flow_free(flow, true);
 	ovs_unlock();
 
-	ovs_notify(&dp_flow_genl_family, reply, info);
+	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+					info, false);
+	if (likely(reply)) {
+		if (likely(!IS_ERR(reply))) {
+			rcu_read_lock();	/*To keep RCU checker happy. */
+			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
+						     reply, info->snd_portid,
+						     info->snd_seq, 0,
+						     OVS_FLOW_CMD_DEL);
+			rcu_read_unlock();
+			BUG_ON(err < 0);
+
+			ovs_notify(&dp_flow_genl_family, reply, info);
+		} else {
+			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+		}
+	}
+
+	ovs_flow_free(flow, true);
 	return 0;
 unlock:
 	ovs_unlock();
@@ -1024,7 +1171,7 @@
 		if (!flow)
 			break;
 
-		if (ovs_flow_cmd_fill_info(flow, dp, skb,
+		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   OVS_FLOW_CMD_NEW) < 0)
@@ -1037,11 +1184,17 @@
 	return skb->len;
 }
 
-static const struct genl_ops dp_flow_genl_ops[] = {
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_ops dp_flow_genl_ops[] = {
 	{ .cmd = OVS_FLOW_CMD_NEW,
 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .policy = flow_policy,
-	  .doit = ovs_flow_cmd_new_or_set
+	  .doit = ovs_flow_cmd_new
 	},
 	{ .cmd = OVS_FLOW_CMD_DEL,
 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
@@ -1057,28 +1210,22 @@
 	{ .cmd = OVS_FLOW_CMD_SET,
 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .policy = flow_policy,
-	  .doit = ovs_flow_cmd_new_or_set,
+	  .doit = ovs_flow_cmd_set,
 	},
 };
 
-static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
-	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
-	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
-	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
-};
-
-static struct genl_family dp_datapath_genl_family = {
+static struct genl_family dp_flow_genl_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_DATAPATH_FAMILY,
-	.version = OVS_DATAPATH_VERSION,
-	.maxattr = OVS_DP_ATTR_MAX,
+	.name = OVS_FLOW_FAMILY,
+	.version = OVS_FLOW_VERSION,
+	.maxattr = OVS_FLOW_ATTR_MAX,
 	.netnsok = true,
 	.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
-	.name = OVS_DATAPATH_MCGROUP
+	.ops = dp_flow_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
+	.mcgrps = &ovs_dp_flow_multicast_group,
+	.n_mcgrps = 1,
 };
 
 static size_t ovs_dp_cmd_msg_size(void)
@@ -1093,6 +1240,7 @@
 	return msgsize;
 }
 
+/* Called with ovs_mutex or RCU read lock. */
 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 				u32 portid, u32 seq, u32 flags, u8 cmd)
 {
@@ -1108,9 +1256,7 @@
 
 	ovs_header->dp_ifindex = get_dpifindex(dp);
 
-	rcu_read_lock();
 	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
-	rcu_read_unlock();
 	if (err)
 		goto nla_put_failure;
 
@@ -1135,25 +1281,12 @@
 	return -EMSGSIZE;
 }
 
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
-					     struct genl_info *info, u8 cmd)
+static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
 {
-	struct sk_buff *skb;
-	int retval;
-
-	skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
-
-	retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
-	if (retval < 0) {
-		kfree_skb(skb);
-		return ERR_PTR(retval);
-	}
-	return skb;
+	return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
 }
 
-/* Called with ovs_mutex. */
+/* Called with rcu_read_lock or ovs_mutex. */
 static struct datapath *lookup_datapath(struct net *net,
 					struct ovs_header *ovs_header,
 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1165,10 +1298,8 @@
 	else {
 		struct vport *vport;
 
-		rcu_read_lock();
 		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
 		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
-		rcu_read_unlock();
 	}
 	return dp ? dp : ERR_PTR(-ENODEV);
 }
@@ -1205,12 +1336,14 @@
 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
 		goto err;
 
-	ovs_lock();
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
 
 	err = -ENOMEM;
 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
 	if (dp == NULL)
-		goto err_unlock_ovs;
+		goto err_free_reply;
 
 	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
@@ -1245,6 +1378,9 @@
 
 	ovs_dp_change(dp, a);
 
+	/* So far only local changes have been made, now need the lock. */
+	ovs_lock();
+
 	vport = new_vport(&parms);
 	if (IS_ERR(vport)) {
 		err = PTR_ERR(vport);
@@ -1263,10 +1399,9 @@
 		goto err_destroy_ports_array;
 	}
 
-	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto err_destroy_local_port;
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
 
 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
 	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
@@ -1276,9 +1411,8 @@
 	ovs_notify(&dp_datapath_genl_family, reply, info);
 	return 0;
 
-err_destroy_local_port:
-	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 err_destroy_ports_array:
+	ovs_unlock();
 	kfree(dp->ports);
 err_destroy_percpu:
 	free_percpu(dp->stats_percpu);
@@ -1287,8 +1421,8 @@
 err_free_dp:
 	release_net(ovs_dp_get_net(dp));
 	kfree(dp);
-err_unlock_ovs:
-	ovs_unlock();
+err_free_reply:
+	kfree_skb(reply);
 err:
 	return err;
 }
@@ -1326,16 +1460,19 @@
 	struct datapath *dp;
 	int err;
 
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
 	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		goto unlock;
+		goto err_unlock_free;
 
-	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto unlock;
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_DEL);
+	BUG_ON(err < 0);
 
 	__dp_destroy(dp);
 	ovs_unlock();
@@ -1343,8 +1480,10 @@
 	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
-unlock:
+
+err_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1354,29 +1493,30 @@
 	struct datapath *dp;
 	int err;
 
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
 	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		goto unlock;
+		goto err_unlock_free;
 
 	ovs_dp_change(dp, info->attrs);
 
-	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
-			     0, err);
-		err = 0;
-		goto unlock;
-	}
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
 
 	ovs_unlock();
 	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
-unlock:
+
+err_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1386,24 +1526,26 @@
 	struct datapath *dp;
 	int err;
 
-	ovs_lock();
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
+	rcu_read_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
 	if (IS_ERR(dp)) {
 		err = PTR_ERR(dp);
-		goto unlock;
+		goto err_unlock_free;
 	}
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
+	rcu_read_unlock();
 
-	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		goto unlock;
-	}
-
-	ovs_unlock();
 	return genlmsg_reply(reply, info);
 
-unlock:
-	ovs_unlock();
+err_unlock_free:
+	rcu_read_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1430,7 +1572,13 @@
 	return skb->len;
 }
 
-static const struct genl_ops dp_datapath_genl_ops[] = {
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+};
+
+static struct genl_ops dp_datapath_genl_ops[] = {
 	{ .cmd = OVS_DP_CMD_NEW,
 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .policy = datapath_policy,
@@ -1454,27 +1602,18 @@
 	},
 };
 
-static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
-	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
-	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
-	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-};
-
-struct genl_family dp_vport_genl_family = {
+static struct genl_family dp_datapath_genl_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_VPORT_FAMILY,
-	.version = OVS_VPORT_VERSION,
-	.maxattr = OVS_VPORT_ATTR_MAX,
+	.name = OVS_DATAPATH_FAMILY,
+	.version = OVS_DATAPATH_VERSION,
+	.maxattr = OVS_DP_ATTR_MAX,
 	.netnsok = true,
 	.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_vport_multicast_group = {
-	.name = OVS_VPORT_MCGROUP
+	.ops = dp_datapath_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+	.mcgrps = &ovs_dp_datapath_multicast_group,
+	.n_mcgrps = 1,
 };
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -1516,7 +1655,12 @@
 	return err;
 }
 
-/* Called with ovs_mutex or RCU read lock. */
+static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+{
+	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+}
+
+/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 					 u32 seq, u8 cmd)
 {
@@ -1578,33 +1722,35 @@
 	u32 port_no;
 	int err;
 
-	err = -EINVAL;
 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
-		goto exit;
+		return -EINVAL;
+
+	port_no = a[OVS_VPORT_ATTR_PORT_NO]
+		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+	if (port_no >= DP_MAX_PORTS)
+		return -EFBIG;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
 
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	err = -ENODEV;
 	if (!dp)
-		goto exit_unlock;
+		goto exit_unlock_free;
 
-	if (a[OVS_VPORT_ATTR_PORT_NO]) {
-		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-
-		err = -EFBIG;
-		if (port_no >= DP_MAX_PORTS)
-			goto exit_unlock;
-
+	if (port_no) {
 		vport = ovs_vport_ovsl(dp, port_no);
 		err = -EBUSY;
 		if (vport)
-			goto exit_unlock;
+			goto exit_unlock_free;
 	} else {
 		for (port_no = 1; ; port_no++) {
 			if (port_no >= DP_MAX_PORTS) {
 				err = -EFBIG;
-				goto exit_unlock;
+				goto exit_unlock_free;
 			}
 			vport = ovs_vport_ovsl(dp, port_no);
 			if (!vport)
@@ -1622,22 +1768,19 @@
 	vport = new_vport(&parms);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
+		goto exit_unlock_free;
 
-	err = 0;
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
-					 OVS_VPORT_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		ovs_dp_detach_port(vport);
-		goto exit_unlock;
-	}
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
+	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
 
-exit_unlock:
+exit_unlock_free:
 	ovs_unlock();
-exit:
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1648,28 +1791,26 @@
 	struct vport *vport;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
+		goto exit_unlock_free;
 
 	if (a[OVS_VPORT_ATTR_TYPE] &&
 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
 		err = -EINVAL;
-		goto exit_unlock;
-	}
-
-	reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!reply) {
-		err = -ENOMEM;
-		goto exit_unlock;
+		goto exit_unlock_free;
 	}
 
 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
 		if (err)
-			goto exit_free;
+			goto exit_unlock_free;
 	}
 
 	if (a[OVS_VPORT_ATTR_UPCALL_PID])
@@ -1683,10 +1824,9 @@
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_free:
-	kfree_skb(reply);
-exit_unlock:
+exit_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1697,30 +1837,33 @@
 	struct vport *vport;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
+		goto exit_unlock_free;
 
 	if (vport->port_no == OVSP_LOCAL) {
 		err = -EINVAL;
-		goto exit_unlock;
+		goto exit_unlock_free;
 	}
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
-					 info->snd_seq, OVS_VPORT_CMD_DEL);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto exit_unlock;
-
-	err = 0;
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+	BUG_ON(err < 0);
 	ovs_dp_detach_port(vport);
+	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
 
-exit_unlock:
+exit_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1732,24 +1875,25 @@
 	struct vport *vport;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	rcu_read_lock();
 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
-
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
-					 info->snd_seq, OVS_VPORT_CMD_NEW);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto exit_unlock;
-
+		goto exit_unlock_free;
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
 	rcu_read_unlock();
 
 	return genlmsg_reply(reply, info);
 
-exit_unlock:
+exit_unlock_free:
 	rcu_read_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1792,7 +1936,16 @@
 	return skb->len;
 }
 
-static const struct genl_ops dp_vport_genl_ops[] = {
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_vport_genl_ops[] = {
 	{ .cmd = OVS_VPORT_CMD_NEW,
 	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .policy = vport_policy,
@@ -1816,26 +1969,25 @@
 	},
 };
 
-struct genl_family_and_ops {
-	struct genl_family *family;
-	const struct genl_ops *ops;
-	int n_ops;
-	const struct genl_multicast_group *group;
+struct genl_family dp_vport_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_VPORT_FAMILY,
+	.version = OVS_VPORT_VERSION,
+	.maxattr = OVS_VPORT_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_vport_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
+	.mcgrps = &ovs_dp_vport_multicast_group,
+	.n_mcgrps = 1,
 };
 
-static const struct genl_family_and_ops dp_genl_families[] = {
-	{ &dp_datapath_genl_family,
-	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
-	  &ovs_dp_datapath_multicast_group },
-	{ &dp_vport_genl_family,
-	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
-	  &ovs_dp_vport_multicast_group },
-	{ &dp_flow_genl_family,
-	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
-	  &ovs_dp_flow_multicast_group },
-	{ &dp_packet_genl_family,
-	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
-	  NULL },
+static struct genl_family * const dp_genl_families[] = {
+	&dp_datapath_genl_family,
+	&dp_vport_genl_family,
+	&dp_flow_genl_family,
+	&dp_packet_genl_family,
 };
 
 static void dp_unregister_genl(int n_families)
@@ -1843,33 +1995,25 @@
 	int i;
 
 	for (i = 0; i < n_families; i++)
-		genl_unregister_family(dp_genl_families[i].family);
+		genl_unregister_family(dp_genl_families[i]);
 }
 
 static int dp_register_genl(void)
 {
-	int n_registered;
 	int err;
 	int i;
 
-	n_registered = 0;
 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
-		const struct genl_family_and_ops *f = &dp_genl_families[i];
 
-		f->family->ops = f->ops;
-		f->family->n_ops = f->n_ops;
-		f->family->mcgrps = f->group;
-		f->family->n_mcgrps = f->group ? 1 : 0;
-		err = genl_register_family(f->family);
+		err = genl_register_family(dp_genl_families[i]);
 		if (err)
 			goto error;
-		n_registered++;
 	}
 
 	return 0;
 
 error:
-	dp_unregister_genl(n_registered);
+	dp_unregister_genl(i);
 	return err;
 }
 

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0531738..7ede507 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h

@@ -194,7 +194,9 @@
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
 void ovs_dp_notify_wq(struct work_struct *work);
 
-#define OVS_NLERR(fmt, ...) \
-	pr_info_once("netlink: " fmt, ##__VA_ARGS__)
-
+#define OVS_NLERR(fmt, ...)					\
+do {								\
+	if (net_ratelimit())					\
+		pr_info("netlink: " fmt, ##__VA_ARGS__);	\
+} while (0)
 #endif /* datapath.h */

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2998989..334751c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c

@@ -64,88 +64,110 @@
 void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
 {
 	struct flow_stats *stats;
-	__be16 tcp_flags = 0;
+	__be16 tcp_flags = flow->key.tp.flags;
+	int node = numa_node_id();
 
-	if (!flow->stats.is_percpu)
-		stats = flow->stats.stat;
-	else
-		stats = this_cpu_ptr(flow->stats.cpu_stats);
+	stats = rcu_dereference(flow->stats[node]);
 
-	if ((flow->key.eth.type == htons(ETH_P_IP) ||
-	     flow->key.eth.type == htons(ETH_P_IPV6)) &&
-	    flow->key.ip.frag != OVS_FRAG_TYPE_LATER &&
-	    flow->key.ip.proto == IPPROTO_TCP &&
-	    likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
-		tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
+	/* Check if already have node-specific stats. */
+	if (likely(stats)) {
+		spin_lock(&stats->lock);
+		/* Mark if we write on the pre-allocated stats. */
+		if (node == 0 && unlikely(flow->stats_last_writer != node))
+			flow->stats_last_writer = node;
+	} else {
+		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+		spin_lock(&stats->lock);
+
+		/* If the current NUMA-node is the only writer on the
+		 * pre-allocated stats keep using them.
+		 */
+		if (unlikely(flow->stats_last_writer != node)) {
+			/* A previous locker may have already allocated the
+			 * stats, so we need to check again.  If node-specific
+			 * stats were already allocated, we update the pre-
+			 * allocated stats as we have already locked them.
+			 */
+			if (likely(flow->stats_last_writer != NUMA_NO_NODE)
+			    && likely(!rcu_dereference(flow->stats[node]))) {
+				/* Try to allocate node-specific stats. */
+				struct flow_stats *new_stats;
+
+				new_stats =
+					kmem_cache_alloc_node(flow_stats_cache,
+							      GFP_THISNODE |
+							      __GFP_NOMEMALLOC,
+							      node);
+				if (likely(new_stats)) {
+					new_stats->used = jiffies;
+					new_stats->packet_count = 1;
+					new_stats->byte_count = skb->len;
+					new_stats->tcp_flags = tcp_flags;
+					spin_lock_init(&new_stats->lock);
+
+					rcu_assign_pointer(flow->stats[node],
+							   new_stats);
+					goto unlock;
+				}
+			}
+			flow->stats_last_writer = node;
+		}
 	}
 
-	spin_lock(&stats->lock);
 	stats->used = jiffies;
 	stats->packet_count++;
 	stats->byte_count += skb->len;
 	stats->tcp_flags |= tcp_flags;
+unlock:
 	spin_unlock(&stats->lock);
 }
 
-static void stats_read(struct flow_stats *stats,
-		       struct ovs_flow_stats *ovs_stats,
-		       unsigned long *used, __be16 *tcp_flags)
-{
-	spin_lock(&stats->lock);
-	if (!*used || time_after(stats->used, *used))
-		*used = stats->used;
-	*tcp_flags |= stats->tcp_flags;
-	ovs_stats->n_packets += stats->packet_count;
-	ovs_stats->n_bytes += stats->byte_count;
-	spin_unlock(&stats->lock);
-}
-
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+			struct ovs_flow_stats *ovs_stats,
 			unsigned long *used, __be16 *tcp_flags)
 {
-	int cpu;
+	int node;
 
 	*used = 0;
 	*tcp_flags = 0;
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
-	local_bh_disable();
-	if (!flow->stats.is_percpu) {
-		stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
-	} else {
-		for_each_possible_cpu(cpu) {
-			struct flow_stats *stats;
+	for_each_node(node) {
+		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
 
-			stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
-			stats_read(stats, ovs_stats, used, tcp_flags);
+		if (stats) {
+			/* Local CPU may write on non-local stats, so we must
+			 * block bottom-halves here.
+			 */
+			spin_lock_bh(&stats->lock);
+			if (!*used || time_after(stats->used, *used))
+				*used = stats->used;
+			*tcp_flags |= stats->tcp_flags;
+			ovs_stats->n_packets += stats->packet_count;
+			ovs_stats->n_bytes += stats->byte_count;
+			spin_unlock_bh(&stats->lock);
 		}
 	}
-	local_bh_enable();
 }
 
-static void stats_reset(struct flow_stats *stats)
-{
-	spin_lock(&stats->lock);
-	stats->used = 0;
-	stats->packet_count = 0;
-	stats->byte_count = 0;
-	stats->tcp_flags = 0;
-	spin_unlock(&stats->lock);
-}
-
+/* Called with ovs_mutex. */
 void ovs_flow_stats_clear(struct sw_flow *flow)
 {
-	int cpu;
+	int node;
 
-	local_bh_disable();
-	if (!flow->stats.is_percpu) {
-		stats_reset(flow->stats.stat);
-	} else {
-		for_each_possible_cpu(cpu) {
-			stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));
+	for_each_node(node) {
+		struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+
+		if (stats) {
+			spin_lock_bh(&stats->lock);
+			stats->used = 0;
+			stats->packet_count = 0;
+			stats->byte_count = 0;
+			stats->tcp_flags = 0;
+			spin_unlock_bh(&stats->lock);
 		}
 	}
-	local_bh_enable();
 }
 
 static int check_header(struct sk_buff *skb, int len)
@@ -332,8 +354,8 @@
 	/* The ICMPv6 type and code fields use the 16-bit transport port
 	 * fields, so we need to store them in 16-bit network byte order.
 	 */
-	key->ipv6.tp.src = htons(icmp->icmp6_type);
-	key->ipv6.tp.dst = htons(icmp->icmp6_code);
+	key->tp.src = htons(icmp->icmp6_type);
+	key->tp.dst = htons(icmp->icmp6_code);
 
 	if (icmp->icmp6_code == 0 &&
 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -372,14 +394,14 @@
 			    && opt_len == 8) {
 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
 					goto invalid;
-				memcpy(key->ipv6.nd.sll,
-				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				ether_addr_copy(key->ipv6.nd.sll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 				   && opt_len == 8) {
 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
 					goto invalid;
-				memcpy(key->ipv6.nd.tll,
-				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				ether_addr_copy(key->ipv6.nd.tll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
 			}
 
 			icmp_len -= opt_len;
@@ -439,8 +461,8 @@
 	 * header in the linear data area.
 	 */
 	eth = eth_hdr(skb);
-	memcpy(key->eth.src, eth->h_source, ETH_ALEN);
-	memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+	ether_addr_copy(key->eth.src, eth->h_source);
+	ether_addr_copy(key->eth.dst, eth->h_dest);
 
 	__skb_pull(skb, 2 * ETH_ALEN);
 	/* We are going to push all headers that we pull, so no need to
@@ -495,21 +517,21 @@
 		if (key->ip.proto == IPPROTO_TCP) {
 			if (tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
-				key->ipv4.tp.src = tcp->source;
-				key->ipv4.tp.dst = tcp->dest;
-				key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
 			}
 		} else if (key->ip.proto == IPPROTO_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
-				key->ipv4.tp.src = udp->source;
-				key->ipv4.tp.dst = udp->dest;
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
 			}
 		} else if (key->ip.proto == IPPROTO_SCTP) {
 			if (sctphdr_ok(skb)) {
 				struct sctphdr *sctp = sctp_hdr(skb);
-				key->ipv4.tp.src = sctp->source;
-				key->ipv4.tp.dst = sctp->dest;
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
 			}
 		} else if (key->ip.proto == IPPROTO_ICMP) {
 			if (icmphdr_ok(skb)) {
@@ -517,8 +539,8 @@
 				/* The ICMP type and code fields use the 16-bit
 				 * transport port fields, so we need to store
 				 * them in 16-bit network byte order. */
-				key->ipv4.tp.src = htons(icmp->type);
-				key->ipv4.tp.dst = htons(icmp->code);
+				key->tp.src = htons(icmp->type);
+				key->tp.dst = htons(icmp->code);
 			}
 		}
 
@@ -538,8 +560,8 @@
 				key->ip.proto = ntohs(arp->ar_op);
 			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
 			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
-			memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
-			memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
 		}
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
@@ -564,21 +586,21 @@
 		if (key->ip.proto == NEXTHDR_TCP) {
 			if (tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
-				key->ipv6.tp.src = tcp->source;
-				key->ipv6.tp.dst = tcp->dest;
-				key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
 			}
 		} else if (key->ip.proto == NEXTHDR_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
-				key->ipv6.tp.src = udp->source;
-				key->ipv6.tp.dst = udp->dest;
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
 			}
 		} else if (key->ip.proto == NEXTHDR_SCTP) {
 			if (sctphdr_ok(skb)) {
 				struct sctphdr *sctp = sctp_hdr(skb);
-				key->ipv6.tp.src = sctp->source;
-				key->ipv6.tp.dst = sctp->dest;
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
 			}
 		} else if (key->ip.proto == NEXTHDR_ICMP) {
 			if (icmp6hdr_ok(skb)) {

diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 2d770e2..ac395d2 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h

@@ -47,7 +47,7 @@
 	__be16 tun_flags;
 	u8   ipv4_tos;
 	u8   ipv4_ttl;
-};
+} __packed __aligned(4); /* Minimize padding. */
 
 static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
 					 const struct iphdr *iph, __be64 tun_id,
@@ -71,7 +71,7 @@
 		u32	priority;	/* Packet QoS priority. */
 		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
-	} phy;
+	} __packed phy; /* Safe when right after 'tun_key'. */
 	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
@@ -84,23 +84,21 @@
 		u8     ttl;		/* IP TTL/hop limit. */
 		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
 	} ip;
+	struct {
+		__be16 src;		/* TCP/UDP/SCTP source port. */
+		__be16 dst;		/* TCP/UDP/SCTP destination port. */
+		__be16 flags;		/* TCP flags. */
+	} tp;
 	union {
 		struct {
 			struct {
 				__be32 src;	/* IP source address. */
 				__be32 dst;	/* IP destination address. */
 			} addr;
-			union {
-				struct {
-					__be16 src;		/* TCP/UDP/SCTP source port. */
-					__be16 dst;		/* TCP/UDP/SCTP destination port. */
-					__be16 flags;		/* TCP flags. */
-				} tp;
-				struct {
-					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
-					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
-				} arp;
-			};
+			struct {
+				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+			} arp;
 		} ipv4;
 		struct {
 			struct {
@@ -109,11 +107,6 @@
 			} addr;
 			__be32 label;			/* IPv6 flow label. */
 			struct {
-				__be16 src;		/* TCP/UDP/SCTP source port. */
-				__be16 dst;		/* TCP/UDP/SCTP destination port. */
-				__be16 flags;		/* TCP flags. */
-			} tp;
-			struct {
 				struct in6_addr target;	/* ND target address. */
 				u8 sll[ETH_ALEN];	/* ND source link layer address. */
 				u8 tll[ETH_ALEN];	/* ND target link layer address. */
@@ -155,24 +148,22 @@
 	__be16 tcp_flags;		/* Union of seen TCP flags. */
 };
 
-struct sw_flow_stats {
-	bool is_percpu;
-	union {
-		struct flow_stats *stat;
-		struct flow_stats __percpu *cpu_stats;
-	};
-};
-
 struct sw_flow {
 	struct rcu_head rcu;
 	struct hlist_node hash_node[2];
 	u32 hash;
-
+	int stats_last_writer;		/* NUMA-node id of the last writer on
+					 * 'stats[0]'.
+					 */
 	struct sw_flow_key key;
 	struct sw_flow_key unmasked_key;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
-	struct sw_flow_stats stats;
+	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
+					   * is allocated at flow creation time,
+					   * the rest are allocated on demand
+					   * while holding the 'stats[0].lock'.
+					   */
 };
 
 struct arp_eth_header {
@@ -189,10 +180,10 @@
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
+void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
 			unsigned long *used, __be16 *tcp_flags);
-void ovs_flow_stats_clear(struct sw_flow *flow);
+void ovs_flow_stats_clear(struct sw_flow *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
 int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 4d000ac..d757848 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c

@@ -16,6 +16,8 @@
  * 02110-1301, USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include "flow.h"
 #include "datapath.h"
 #include <linux/uaccess.h>
@@ -202,11 +204,11 @@
 				if (match->mask && (match->mask->key.ip.proto == 0xff))
 					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
 
-				if (match->key->ipv6.tp.src ==
+				if (match->key->tp.src ==
 						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-				    match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 					key_expected |= 1 << OVS_KEY_ATTR_ND;
-					if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+					if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
 						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
 				}
 			}
@@ -216,14 +218,14 @@
 	if ((key_attrs & key_expected) != key_expected) {
 		/* Key attributes check failed. */
 		OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
-				key_attrs, key_expected);
+				(unsigned long long)key_attrs, (unsigned long long)key_expected);
 		return false;
 	}
 
 	if ((mask_attrs & mask_allowed) != mask_attrs) {
 		/* Mask attributes check failed. */
 		OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
-				mask_attrs, mask_allowed);
+				(unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
 		return false;
 	}
 
@@ -266,20 +268,6 @@
 	return true;
 }
 
-static bool is_all_set(const u8 *fp, size_t size)
-{
-	int i;
-
-	if (!fp)
-		return false;
-
-	for (i = 0; i < size; i++)
-		if (fp[i] != 0xff)
-			return false;
-
-	return true;
-}
-
 static int __parse_flow_nlattrs(const struct nlattr *attr,
 				const struct nlattr *a[],
 				u64 *attrsp, bool nz)
@@ -501,9 +489,8 @@
 	return 0;
 }
 
-static int ovs_key_from_nlattrs(struct sw_flow_match *match,  bool *exact_5tuple,
-				u64 attrs, const struct nlattr **a,
-				bool is_mask)
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+				const struct nlattr **a, bool is_mask)
 {
 	int err;
 	u64 orig_attrs = attrs;
@@ -560,11 +547,6 @@
 		SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
 	}
 
-	if (is_mask && exact_5tuple) {
-		if (match->mask->key.eth.type != htons(0xffff))
-			*exact_5tuple = false;
-	}
-
 	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
 		const struct ovs_key_ipv4 *ipv4_key;
 
@@ -587,13 +569,6 @@
 		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
 				ipv4_key->ipv4_dst, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
-		if (is_mask && exact_5tuple && *exact_5tuple) {
-			if (ipv4_key->ipv4_proto != 0xff ||
-			    ipv4_key->ipv4_src != htonl(0xffffffff) ||
-			    ipv4_key->ipv4_dst != htonl(0xffffffff))
-				*exact_5tuple = false;
-		}
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
@@ -625,13 +600,6 @@
 				is_mask);
 
 		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
-
-		if (is_mask && exact_5tuple && *exact_5tuple) {
-			if (ipv6_key->ipv6_proto != 0xff ||
-			    !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) ||
-			    !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst)))
-				*exact_5tuple = false;
-		}
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
@@ -662,32 +630,18 @@
 		const struct ovs_key_tcp *tcp_key;
 
 		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-		if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
-			SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-					tcp_key->tcp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-					tcp_key->tcp_dst, is_mask);
-		} else {
-			SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-					tcp_key->tcp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-					tcp_key->tcp_dst, is_mask);
-		}
+		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
-		if (is_mask && exact_5tuple && *exact_5tuple &&
-		    (tcp_key->tcp_src != htons(0xffff) ||
-		     tcp_key->tcp_dst != htons(0xffff)))
-			*exact_5tuple = false;
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
 		if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
-			SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
+			SW_FLOW_KEY_PUT(match, tp.flags,
 					nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
 					is_mask);
 		} else {
-			SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
+			SW_FLOW_KEY_PUT(match, tp.flags,
 					nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
 					is_mask);
 		}
@@ -698,40 +652,17 @@
 		const struct ovs_key_udp *udp_key;
 
 		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-		if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
-			SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-					udp_key->udp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-					udp_key->udp_dst, is_mask);
-		} else {
-			SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-					udp_key->udp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-					udp_key->udp_dst, is_mask);
-		}
+		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
-		if (is_mask && exact_5tuple && *exact_5tuple &&
-		    (udp_key->udp_src != htons(0xffff) ||
-		     udp_key->udp_dst != htons(0xffff)))
-			*exact_5tuple = false;
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
 		const struct ovs_key_sctp *sctp_key;
 
 		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
-		if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
-			SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-					sctp_key->sctp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-					sctp_key->sctp_dst, is_mask);
-		} else {
-			SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-					sctp_key->sctp_src, is_mask);
-			SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-					sctp_key->sctp_dst, is_mask);
-		}
+		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
 	}
 
@@ -739,9 +670,9 @@
 		const struct ovs_key_icmp *icmp_key;
 
 		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
-		SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+		SW_FLOW_KEY_PUT(match, tp.src,
 				htons(icmp_key->icmp_type), is_mask);
-		SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+		SW_FLOW_KEY_PUT(match, tp.dst,
 				htons(icmp_key->icmp_code), is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
 	}
@@ -750,9 +681,9 @@
 		const struct ovs_key_icmpv6 *icmpv6_key;
 
 		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
-		SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+		SW_FLOW_KEY_PUT(match, tp.src,
 				htons(icmpv6_key->icmpv6_type), is_mask);
-		SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+		SW_FLOW_KEY_PUT(match, tp.dst,
 				htons(icmpv6_key->icmpv6_code), is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
 	}
@@ -800,7 +731,6 @@
  * attribute specifies the mask field of the wildcarded flow.
  */
 int ovs_nla_get_match(struct sw_flow_match *match,
-		      bool *exact_5tuple,
 		      const struct nlattr *key,
 		      const struct nlattr *mask)
 {
@@ -848,13 +778,10 @@
 		}
 	}
 
-	err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false);
+	err = ovs_key_from_nlattrs(match, key_attrs, a, false);
 	if (err)
 		return err;
 
-	if (exact_5tuple)
-		*exact_5tuple = true;
-
 	if (mask) {
 		err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
 		if (err)
@@ -892,7 +819,7 @@
 			}
 		}
 
-		err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true);
+		err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
 		if (err)
 			return err;
 	} else {
@@ -982,8 +909,8 @@
 		goto nla_put_failure;
 
 	eth_key = nla_data(nla);
-	memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
-	memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
+	ether_addr_copy(eth_key->eth_src, output->eth.src);
+	ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 
 	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
 		__be16 eth_type;
@@ -1055,8 +982,8 @@
 		arp_key->arp_sip = output->ipv4.addr.src;
 		arp_key->arp_tip = output->ipv4.addr.dst;
 		arp_key->arp_op = htons(output->ip.proto);
-		memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
-		memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
+		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1070,19 +997,11 @@
 			if (!nla)
 				goto nla_put_failure;
 			tcp_key = nla_data(nla);
-			if (swkey->eth.type == htons(ETH_P_IP)) {
-				tcp_key->tcp_src = output->ipv4.tp.src;
-				tcp_key->tcp_dst = output->ipv4.tp.dst;
-				if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
-						 output->ipv4.tp.flags))
-					goto nla_put_failure;
-			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				tcp_key->tcp_src = output->ipv6.tp.src;
-				tcp_key->tcp_dst = output->ipv6.tp.dst;
-				if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
-						 output->ipv6.tp.flags))
-					goto nla_put_failure;
-			}
+			tcp_key->tcp_src = output->tp.src;
+			tcp_key->tcp_dst = output->tp.dst;
+			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+					 output->tp.flags))
+				goto nla_put_failure;
 		} else if (swkey->ip.proto == IPPROTO_UDP) {
 			struct ovs_key_udp *udp_key;
 
@@ -1090,13 +1009,8 @@
 			if (!nla)
 				goto nla_put_failure;
 			udp_key = nla_data(nla);
-			if (swkey->eth.type == htons(ETH_P_IP)) {
-				udp_key->udp_src = output->ipv4.tp.src;
-				udp_key->udp_dst = output->ipv4.tp.dst;
-			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				udp_key->udp_src = output->ipv6.tp.src;
-				udp_key->udp_dst = output->ipv6.tp.dst;
-			}
+			udp_key->udp_src = output->tp.src;
+			udp_key->udp_dst = output->tp.dst;
 		} else if (swkey->ip.proto == IPPROTO_SCTP) {
 			struct ovs_key_sctp *sctp_key;
 
@@ -1104,13 +1018,8 @@
 			if (!nla)
 				goto nla_put_failure;
 			sctp_key = nla_data(nla);
-			if (swkey->eth.type == htons(ETH_P_IP)) {
-				sctp_key->sctp_src = swkey->ipv4.tp.src;
-				sctp_key->sctp_dst = swkey->ipv4.tp.dst;
-			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				sctp_key->sctp_src = swkey->ipv6.tp.src;
-				sctp_key->sctp_dst = swkey->ipv6.tp.dst;
-			}
+			sctp_key->sctp_src = output->tp.src;
+			sctp_key->sctp_dst = output->tp.dst;
 		} else if (swkey->eth.type == htons(ETH_P_IP) &&
 			   swkey->ip.proto == IPPROTO_ICMP) {
 			struct ovs_key_icmp *icmp_key;
@@ -1119,8 +1028,8 @@
 			if (!nla)
 				goto nla_put_failure;
 			icmp_key = nla_data(nla);
-			icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
-			icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+			icmp_key->icmp_type = ntohs(output->tp.src);
+			icmp_key->icmp_code = ntohs(output->tp.dst);
 		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
 			   swkey->ip.proto == IPPROTO_ICMPV6) {
 			struct ovs_key_icmpv6 *icmpv6_key;
@@ -1130,8 +1039,8 @@
 			if (!nla)
 				goto nla_put_failure;
 			icmpv6_key = nla_data(nla);
-			icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
-			icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
 
 			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
 			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1143,8 +1052,8 @@
 				nd_key = nla_data(nla);
 				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
 							sizeof(nd_key->nd_target));
-				memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
-				memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
+				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
+				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
 			}
 		}
 	}
@@ -1309,13 +1218,10 @@
 
 static int validate_tp_port(const struct sw_flow_key *flow_key)
 {
-	if (flow_key->eth.type == htons(ETH_P_IP)) {
-		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
-			return 0;
-	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
-		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
-			return 0;
-	}
+	if ((flow_key->eth.type == htons(ETH_P_IP) ||
+	     flow_key->eth.type == htons(ETH_P_IPV6)) &&
+	    (flow_key->tp.src || flow_key->tp.dst))
+		return 0;
 
 	return -EINVAL;
 }

diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index b31fbe2..4401510 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h

@@ -45,7 +45,6 @@
 int ovs_nla_get_flow_metadata(struct sw_flow *flow,
 			      const struct nlattr *attr);
 int ovs_nla_get_match(struct sw_flow_match *match,
-		      bool *exact_5tuple,
 		      const struct nlattr *,
 		      const struct nlattr *);
 

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3c268b3..574c3ab 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c

@@ -48,6 +48,7 @@
 #define REHASH_INTERVAL		(10 * 60 * HZ)
 
 static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
 
 static u16 range_n_bytes(const struct sw_flow_key_range *range)
 {
@@ -57,8 +58,10 @@
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask)
 {
-	const long *m = (long *)((u8 *)&mask->key + mask->range.start);
-	const long *s = (long *)((u8 *)src + mask->range.start);
+	const long *m = (const long *)((const u8 *)&mask->key +
+				mask->range.start);
+	const long *s = (const long *)((const u8 *)src +
+				mask->range.start);
 	long *d = (long *)((u8 *)dst + mask->range.start);
 	int i;
 
@@ -70,10 +73,11 @@
 		*d++ = *s++ & *m++;
 }
 
-struct sw_flow *ovs_flow_alloc(bool percpu_stats)
+struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
-	int cpu;
+	struct flow_stats *stats;
+	int node;
 
 	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
 	if (!flow)
@@ -81,27 +85,22 @@
 
 	flow->sf_acts = NULL;
 	flow->mask = NULL;
+	flow->stats_last_writer = NUMA_NO_NODE;
 
-	flow->stats.is_percpu = percpu_stats;
+	/* Initialize the default stat node. */
+	stats = kmem_cache_alloc_node(flow_stats_cache,
+				      GFP_KERNEL | __GFP_ZERO, 0);
+	if (!stats)
+		goto err;
 
-	if (!percpu_stats) {
-		flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
-		if (!flow->stats.stat)
-			goto err;
+	spin_lock_init(&stats->lock);
 
-		spin_lock_init(&flow->stats.stat->lock);
-	} else {
-		flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
-		if (!flow->stats.cpu_stats)
-			goto err;
+	RCU_INIT_POINTER(flow->stats[0], stats);
 
-		for_each_possible_cpu(cpu) {
-			struct flow_stats *cpu_stats;
+	for_each_node(node)
+		if (node != 0)
+			RCU_INIT_POINTER(flow->stats[node], NULL);
 
-			cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
-			spin_lock_init(&cpu_stats->lock);
-		}
-	}
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -138,11 +137,13 @@
 
 static void flow_free(struct sw_flow *flow)
 {
-	kfree((struct sf_flow_acts __force *)flow->sf_acts);
-	if (flow->stats.is_percpu)
-		free_percpu(flow->stats.cpu_stats);
-	else
-		kfree(flow->stats.stat);
+	int node;
+
+	kfree((struct sw_flow_actions __force *)flow->sf_acts);
+	for_each_node(node)
+		if (flow->stats[node])
+			kmem_cache_free(flow_stats_cache,
+					(struct flow_stats __force *)flow->stats[node]);
 	kmem_cache_free(flow_cache, flow);
 }
 
@@ -158,25 +159,6 @@
 	if (!flow)
 		return;
 
-	if (flow->mask) {
-		struct sw_flow_mask *mask = flow->mask;
-
-		/* ovs-lock is required to protect mask-refcount and
-		 * mask list.
-		 */
-		ASSERT_OVSL();
-		BUG_ON(!mask->ref_count);
-		mask->ref_count--;
-
-		if (!mask->ref_count) {
-			list_del_rcu(&mask->list);
-			if (deferred)
-				kfree_rcu(mask, rcu);
-			else
-				kfree(mask);
-		}
-	}
-
 	if (deferred)
 		call_rcu(&flow->rcu, rcu_free_flow_callback);
 	else
@@ -375,7 +357,7 @@
 static u32 flow_hash(const struct sw_flow_key *key, int key_start,
 		     int key_end)
 {
-	u32 *hash_key = (u32 *)((u8 *)key + key_start);
+	const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
 	int hash_u32s = (key_end - key_start) >> 2;
 
 	/* Make sure number of hash bytes are multiple of u32. */
@@ -397,8 +379,8 @@
 		    const struct sw_flow_key *key2,
 		    int key_start, int key_end)
 {
-	const long *cp1 = (long *)((u8 *)key1 + key_start);
-	const long *cp2 = (long *)((u8 *)key2 + key_start);
+	const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+	const long *cp2 = (const long *)((const u8 *)key2 + key_start);
 	long diffs = 0;
 	int i;
 
@@ -490,6 +472,25 @@
 	return table_instance_rehash(ti, ti->n_buckets * 2);
 }
 
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+	if (mask) {
+		/* ovs-lock is required to protect mask-refcount and
+		 * mask list.
+		 */
+		ASSERT_OVSL();
+		BUG_ON(!mask->ref_count);
+		mask->ref_count--;
+
+		if (!mask->ref_count) {
+			list_del_rcu(&mask->list);
+			kfree_rcu(mask, rcu);
+		}
+	}
+}
+
+/* Must be called with OVS mutex held. */
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
@@ -497,6 +498,11 @@
 	BUG_ON(table->count == 0);
 	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
 	table->count--;
+
+	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
+	 * accessible as long as the RCU read lock is held.
+	 */
+	flow_mask_remove(table, flow->mask);
 }
 
 static struct sw_flow_mask *mask_alloc(void)
@@ -513,8 +519,8 @@
 static bool mask_equal(const struct sw_flow_mask *a,
 		       const struct sw_flow_mask *b)
 {
-	u8 *a_ = (u8 *)&a->key + a->range.start;
-	u8 *b_ = (u8 *)&b->key + b->range.start;
+	const u8 *a_ = (const u8 *)&a->key + a->range.start;
+	const u8 *b_ = (const u8 *)&b->key + b->range.start;
 
 	return  (a->range.end == b->range.end)
 		&& (a->range.start == b->range.start)
@@ -559,6 +565,7 @@
 	return 0;
 }
 
+/* Must be called with OVS mutex held. */
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			struct sw_flow_mask *mask)
 {
@@ -597,16 +604,28 @@
 	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
 
-	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
-					0, NULL);
+	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+				       + (num_possible_nodes()
+					  * sizeof(struct flow_stats *)),
+				       0, 0, NULL);
 	if (flow_cache == NULL)
 		return -ENOMEM;
 
+	flow_stats_cache
+		= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+				    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (flow_stats_cache == NULL) {
+		kmem_cache_destroy(flow_cache);
+		flow_cache = NULL;
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 /* Uninitializes the flow module. */
 void ovs_flow_exit(void)
 {
+	kmem_cache_destroy(flow_stats_cache);
 	kmem_cache_destroy(flow_cache);
 }

diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index baaeb10..ca8a582 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h

@@ -52,10 +52,12 @@
 	unsigned int count;
 };
 
+extern struct kmem_cache *flow_stats_cache;
+
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
-struct sw_flow *ovs_flow_alloc(bool percpu_stats);
+struct sw_flow *ovs_flow_alloc(void);
 void ovs_flow_free(struct sw_flow *, bool deferred);
 
 int ovs_flow_tbl_init(struct flow_table *);

diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index ebb6e24..35ec4fe 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c

@@ -172,7 +172,7 @@
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
 			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
@@ -256,7 +256,7 @@
 
 	ovs_net = net_generic(net, ovs_net_id);
 
-	rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
+	RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
 	ovs_vport_deferred_free(vport);
 	gre_exit();
 }

diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 729c687..789af92 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c

@@ -130,7 +130,7 @@
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	netdev->destructor = internal_dev_destructor;
-	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->tx_queue_len = 0;
 
 	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |

diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index e797a50..0edbd95 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c

@@ -122,7 +122,7 @@
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		return (void *)vs;
@@ -170,7 +170,7 @@
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	inet_get_local_port_range(net, &port_min, &port_max);
 	src_port = vxlan_src_port(port_min, port_max, skb);
@@ -180,7 +180,8 @@
 			     OVS_CB(skb)->tun_key->ipv4_tos,
 			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+			     false);
 	if (err < 0)
 		ip_rt_put(rt);
 error:

diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index d7e50a1..8d721e6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h

@@ -172,7 +172,7 @@
  */
 static inline void *vport_priv(const struct vport *vport)
 {
-	return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+	return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
 }
 
 /**
@@ -185,9 +185,9 @@
  * the result of a hash table lookup.  @priv must point to the start of the
  * private data area.
  */
-static inline struct vport *vport_from_priv(const void *priv)
+static inline struct vport *vport_from_priv(void *priv)
 {
-	return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+	return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
 }
 
 void ovs_vport_receive(struct vport *, struct sk_buff *,

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 37be6e2..1dde91e 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c

@@ -298,7 +298,7 @@
 		rds_ib_stats_inc(s_ib_tx_cq_event);
 
 		if (wc.wr_id == RDS_IB_ACK_WR_ID) {
-			if (ic->i_ack_queued + HZ/2 < jiffies)
+			if (time_after(jiffies, ic->i_ack_queued + HZ/2))
 				rds_ib_stats_inc(s_ib_tx_stalled);
 			rds_ib_ack_send_complete(ic);
 			continue;
@@ -315,7 +315,7 @@
 
 			rm = rds_ib_send_unmap_op(ic, send, wc.status);
 
-			if (send->s_queued + HZ/2 < jiffies)
+			if (time_after(jiffies, send->s_queued + HZ/2))
 				rds_ib_stats_inc(s_ib_tx_stalled);
 
 			if (send->s_op) {

diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index e40c3c5..9105ea0 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c

@@ -232,7 +232,7 @@
 		}
 
 		if (wc.wr_id == RDS_IW_ACK_WR_ID) {
-			if (ic->i_ack_queued + HZ/2 < jiffies)
+			if (time_after(jiffies, ic->i_ack_queued + HZ/2))
 				rds_iw_stats_inc(s_iw_tx_stalled);
 			rds_iw_ack_send_complete(ic);
 			continue;
@@ -267,7 +267,7 @@
 
 			send->s_wr.opcode = 0xdead;
 			send->s_wr.num_sge = 1;
-			if (send->s_queued + HZ/2 < jiffies)
+			if (time_after(jiffies, send->s_queued + HZ/2))
 				rds_iw_stats_inc(s_iw_tx_stalled);
 
 			/* If a RDMA operation produced an error, signal this right

diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 89c9151..139239d 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c

@@ -111,8 +111,7 @@
 
 void rds_iw_sysctl_exit(void)
 {
-	if (rds_iw_sysctl_hdr)
-		unregister_net_sysctl_table(rds_iw_sysctl_hdr);
+	unregister_net_sysctl_table(rds_iw_sysctl_hdr);
 }
 
 int rds_iw_sysctl_init(void)

diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index c2be901..6cd9d1d 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c

@@ -168,7 +168,7 @@
 		return ret;
 	}
 
-	sin.sin_family = AF_INET,
+	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
 	sin.sin_port = (__force u16)htons(RDS_PORT);
 

diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index b5cb2aa..c3b0cd4 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c

@@ -94,8 +94,7 @@
 
 void rds_sysctl_exit(void)
 {
-	if (rds_sysctl_reg_table)
-		unregister_net_sysctl_table(rds_sysctl_reg_table);
+	unregister_net_sysctl_table(rds_sysctl_reg_table);
 }
 
 int rds_sysctl_init(void)

diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 4e638f8..23ab4dcd 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c

@@ -153,7 +153,7 @@
 	sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
-	sin.sin_family = PF_INET,
+	sin.sin_family = PF_INET;
 	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
 	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
 

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index bd2a5b9..14c98e4 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c

@@ -36,8 +36,6 @@
 	struct gpio_desc	*shutdown_gpio;
 
 	struct rfkill		*rfkill_dev;
-	char			*reset_name;
-	char			*shutdown_name;
 	struct clk		*clk;
 
 	bool			clk_enabled;
@@ -47,17 +45,14 @@
 {
 	struct rfkill_gpio_data *rfkill = data;
 
-	if (blocked) {
-		gpiod_set_value(rfkill->shutdown_gpio, 0);
-		gpiod_set_value(rfkill->reset_gpio, 0);
-		if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled)
-			clk_disable(rfkill->clk);
-	} else {
-		if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
-			clk_enable(rfkill->clk);
-		gpiod_set_value(rfkill->reset_gpio, 1);
-		gpiod_set_value(rfkill->shutdown_gpio, 1);
-	}
+	if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
+		clk_enable(rfkill->clk);
+
+	gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked);
+	gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked);
+
+	if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled)
+		clk_disable(rfkill->clk);
 
 	rfkill->clk_enabled = blocked;
 
@@ -87,10 +82,8 @@
 {
 	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 	struct rfkill_gpio_data *rfkill;
-	const char *clk_name = NULL;
 	struct gpio_desc *gpio;
 	int ret;
-	int len;
 
 	rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
 	if (!rfkill)
@@ -101,28 +94,15 @@
 		if (ret)
 			return ret;
 	} else if (pdata) {
-		clk_name = pdata->power_clk_name;
 		rfkill->name = pdata->name;
 		rfkill->type = pdata->type;
 	} else {
 		return -ENODEV;
 	}
 
-	len = strlen(rfkill->name);
-	rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL);
-	if (!rfkill->reset_name)
-		return -ENOMEM;
+	rfkill->clk = devm_clk_get(&pdev->dev, NULL);
 
-	rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL);
-	if (!rfkill->shutdown_name)
-		return -ENOMEM;
-
-	snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name);
-	snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name);
-
-	rfkill->clk = devm_clk_get(&pdev->dev, clk_name);
-
-	gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0);
+	gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0);
 	if (!IS_ERR(gpio)) {
 		ret = gpiod_direction_output(gpio, 0);
 		if (ret)
@@ -130,7 +110,7 @@
 		rfkill->reset_gpio = gpio;
 	}
 
-	gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1);
+	gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1);
 	if (!IS_ERR(gpio)) {
 		ret = gpiod_direction_output(gpio, 0);
 		if (ret)
@@ -146,14 +126,6 @@
 		return -EINVAL;
 	}
 
-	if (pdata && pdata->gpio_runtime_setup) {
-		ret = pdata->gpio_runtime_setup(pdev);
-		if (ret) {
-			dev_err(&pdev->dev, "can't set up gpio\n");
-			return ret;
-		}
-	}
-
 	rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
 					  rfkill->type, &rfkill_gpio_ops,
 					  rfkill);
@@ -174,20 +146,23 @@
 static int rfkill_gpio_remove(struct platform_device *pdev)
 {
 	struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
-	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdata && pdata->gpio_runtime_close)
-		pdata->gpio_runtime_close(pdev);
 	rfkill_unregister(rfkill->rfkill_dev);
 	rfkill_destroy(rfkill->rfkill_dev);
 
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id rfkill_acpi_match[] = {
+	{ "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
+	{ "BCM2E39", RFKILL_TYPE_BLUETOOTH },
+	{ "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM4752", RFKILL_TYPE_GPS },
+	{ "LNV4752", RFKILL_TYPE_GPS },
 	{ },
 };
+#endif
 
 static struct platform_driver rfkill_gpio_driver = {
 	.probe = rfkill_gpio_probe,

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bdbdb1a..45527e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c

@@ -134,7 +134,8 @@
 	int err;
 	int tp_created = 0;
 
-	if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETTFILTER) &&
+	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 replay:
@@ -317,7 +318,8 @@
 		}
 	}
 
-	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
+	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
+			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
 	if (err == 0) {
 		if (tp_created) {
 			spin_lock_bh(root_lock);
@@ -504,7 +506,7 @@
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-		  struct nlattr *rate_tlv, struct tcf_exts *exts)
+		  struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	{
@@ -513,7 +515,7 @@
 		INIT_LIST_HEAD(&exts->actions);
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
-						"police", TCA_ACT_NOREPLACE,
+						"police", ovr,
 						TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
@@ -523,7 +525,7 @@
 		} else if (exts->action && tb[exts->action]) {
 			int err;
 			err = tcf_action_init(net, tb[exts->action], rate_tlv,
-					      NULL, TCA_ACT_NOREPLACE,
+					      NULL, ovr,
 					      TCA_ACT_BIND, &exts->actions);
 			if (err)
 				return err;
@@ -543,14 +545,12 @@
 		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (!list_empty(&src->actions)) {
-		LIST_HEAD(tmp);
-		tcf_tree_lock(tp);
-		list_splice_init(&dst->actions, &tmp);
-		list_splice(&src->actions, &dst->actions);
-		tcf_tree_unlock(tp);
-		tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
-	}
+	LIST_HEAD(tmp);
+	tcf_tree_lock(tp);
+	list_splice_init(&dst->actions, &tmp);
+	list_splice(&src->actions, &dst->actions);
+	tcf_tree_unlock(tp);
+	tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_change);

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index e98ca99..0ae1813 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c

@@ -130,14 +130,14 @@
 static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 			   struct basic_filter *f, unsigned long base,
 			   struct nlattr **tb,
-			   struct nlattr *est)
+			   struct nlattr *est, bool ovr)
 {
 	int err;
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
 	tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -161,7 +161,7 @@
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct nlattr **tca, unsigned long *arg)
+			struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	int err;
 	struct basic_head *head = tp->root;
@@ -179,7 +179,7 @@
 	if (f != NULL) {
 		if (handle && f->handle != handle)
 			return -EINVAL;
-		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
 	}
 
 	err = -ENOBUFS;
@@ -206,7 +206,7 @@
 		f->handle = head->hgenerator;
 	}
 
-	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
 	if (err < 0)
 		goto errout;
 

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8e3cf49..13f64df 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c

@@ -156,11 +156,11 @@
 static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 				   struct cls_bpf_prog *prog,
 				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est)
+				   struct nlattr *est, bool ovr)
 {
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
-	struct sock_fprog tmp;
+	struct sock_fprog_kern tmp;
 	struct sk_filter *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
@@ -170,7 +170,7 @@
 		return -EINVAL;
 
 	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts);
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
 	if (ret < 0)
 		return ret;
 
@@ -191,7 +191,7 @@
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
 	tmp.len = bpf_len;
-	tmp.filter = (struct sock_filter __user *) bpf_ops;
+	tmp.filter = bpf_ops;
 
 	ret = sk_unattached_filter_create(&fp, &tmp);
 	if (ret)
@@ -242,7 +242,7 @@
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  unsigned long *arg)
+			  unsigned long *arg, bool ovr)
 {
 	struct cls_bpf_head *head = tp->root;
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
@@ -260,7 +260,7 @@
 		if (handle && prog->handle != handle)
 			return -EINVAL;
 		return cls_bpf_modify_existing(net, tp, prog, base, tb,
-					       tca[TCA_RATE]);
+					       tca[TCA_RATE], ovr);
 	}
 
 	prog = kzalloc(sizeof(*prog), GFP_KERNEL);
@@ -277,7 +277,7 @@
 		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
+	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 	if (ret < 0)
 		goto errout;
 

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 8e2158a..cacf01b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c

@@ -83,7 +83,7 @@
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     unsigned long *arg)
+			     unsigned long *arg, bool ovr)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = tp->root;
@@ -119,7 +119,7 @@
 		return err;
 
 	tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 257029c..35be16f 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c

@@ -349,7 +349,7 @@
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct flow_head *head = tp->root;
 	struct flow_filter *f;
@@ -393,7 +393,7 @@
 	}
 
 	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 

diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 63a3ce7..861b03c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c

@@ -169,7 +169,7 @@
 
 static int
 fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-	struct nlattr **tb, struct nlattr **tca, unsigned long base)
+	struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
 {
 	struct fw_head *head = tp->root;
 	struct tcf_exts e;
@@ -177,7 +177,7 @@
 	int err;
 
 	tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -218,7 +218,7 @@
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle,
 		     struct nlattr **tca,
-		     unsigned long *arg)
+		     unsigned long *arg, bool ovr)
 {
 	struct fw_head *head = tp->root;
 	struct fw_filter *f = (struct fw_filter *) *arg;
@@ -236,7 +236,7 @@
 	if (f != NULL) {
 		if (f->id != handle && handle)
 			return -EINVAL;
-		return fw_change_attrs(net, tp, f, tb, tca, base);
+		return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
 	}
 
 	if (!handle)
@@ -264,7 +264,7 @@
 	tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
 	f->id = handle;
 
-	err = fw_change_attrs(net, tp, f, tb, tca, base);
+	err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
 	if (err < 0)
 		goto errout;
 

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1ad3068..dd9fc25 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c

@@ -333,7 +333,8 @@
 static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 			    unsigned long base, struct route4_filter *f,
 			    u32 handle, struct route4_head *head,
-			    struct nlattr **tb, struct nlattr *est, int new)
+			    struct nlattr **tb, struct nlattr *est, int new,
+			    bool ovr)
 {
 	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
@@ -343,7 +344,7 @@
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -428,7 +429,7 @@
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct route4_head *head = tp->root;
 	struct route4_filter *f, *f1, **fp;
@@ -455,7 +456,7 @@
 			old_handle = f->handle;
 
 		err = route4_set_parms(net, tp, base, f, handle, head, tb,
-			tca[TCA_RATE], 0);
+			tca[TCA_RATE], 0, ovr);
 		if (err < 0)
 			return err;
 
@@ -479,7 +480,7 @@
 
 	tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	err = route4_set_parms(net, tp, base, f, handle, head, tb,
-		tca[TCA_RATE], 1);
+		tca[TCA_RATE], 1, ovr);
 	if (err < 0)
 		goto errout;
 

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 19f8e5d..1020e23 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h

@@ -415,7 +415,7 @@
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct rsvp_head *data = tp->root;
 	struct rsvp_filter *f, **fp;
@@ -436,7 +436,7 @@
 		return err;
 
 	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index f435a88..c721cd4 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c

@@ -198,7 +198,7 @@
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
 		  struct tcindex_filter_result *r, struct nlattr **tb,
-		 struct nlattr *est)
+		  struct nlattr *est, bool ovr)
 {
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -208,7 +208,7 @@
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -341,7 +341,7 @@
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, unsigned long *arg)
+	       struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -361,7 +361,7 @@
 		return err;
 
 	return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-				 tca[TCA_RATE]);
+				 tca[TCA_RATE], ovr);
 }
 
 

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 84c28da..c39b583 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c

@@ -486,13 +486,13 @@
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 unsigned long base, struct tc_u_hnode *ht,
 			 struct tc_u_knode *n, struct nlattr **tb,
-			 struct nlattr *est)
+			 struct nlattr *est, bool ovr)
 {
 	int err;
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -545,7 +545,7 @@
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
 		      struct nlattr **tca,
-		      unsigned long *arg)
+		      unsigned long *arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -569,7 +569,7 @@
 			return -EINVAL;
 
 		return u32_set_parms(net, tp, base, n->ht_up, n, tb,
-				     tca[TCA_RATE]);
+				     tca[TCA_RATE], ovr);
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
@@ -656,7 +656,7 @@
 	}
 #endif
 
-	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
+	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
 	if (err == 0) {
 		struct tc_u_knode **ins;
 		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4007690..58bed75 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -563,7 +563,7 @@
 }
 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
 {
 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
@@ -1084,7 +1084,8 @@
 	struct Qdisc *p = NULL;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETQDISC) &&
+	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1151,7 +1152,7 @@
 	struct Qdisc *q, *p;
 	int err;
 
-	if (!netlink_capable(skb, CAP_NET_ADMIN))
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 replay:
@@ -1490,7 +1491,8 @@
 	u32 qid;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETTCLASS) &&
+	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2aee028..ed30e43 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c

@@ -391,12 +391,7 @@
 
 static void choke_free(void *addr)
 {
-	if (addr) {
-		if (is_vmalloc_addr(addr))
-			vfree(addr);
-		else
-			kfree(addr);
-	}
+	kvfree(addr);
 }
 
 static int choke_change(struct Qdisc *sch, struct nlattr *opt)

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8302717..7bbbfe1 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c

@@ -391,8 +391,10 @@
 	while (1) {
 		cl = list_first_entry(&q->active, struct drr_class, alist);
 		skb = cl->qdisc->ops->peek(cl->qdisc);
-		if (skb == NULL)
+		if (skb == NULL) {
+			qdisc_warn_nonwc(__func__, cl->qdisc);
 			goto out;
+		}
 
 		len = qdisc_pkt_len(skb);
 		if (len <= cl->deficit) {

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 23c682b..ba32c2b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c

@@ -591,10 +591,7 @@
 
 static void fq_free(void *addr)
 {
-	if (addr && is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		kfree(addr);
+	kvfree(addr);
 }
 
 static int fq_resize(struct Qdisc *sch, u32 log)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 0bf432c..063b726 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c

@@ -365,12 +365,7 @@
 
 static void fq_codel_free(void *addr)
 {
-	if (addr) {
-		if (is_vmalloc_addr(addr))
-			vfree(addr);
-		else
-			kfree(addr);
-	}
+	kvfree(addr);
 }
 
 static void fq_codel_destroy(struct Qdisc *sch)

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 6e957c3..d85b681 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c

@@ -414,7 +414,7 @@
 		}
 		bucket->deficit = weight * q->quantum;
 	}
-	if (++sch->q.qlen < sch->limit)
+	if (++sch->q.qlen <= sch->limit)
 		return NET_XMIT_SUCCESS;
 
 	q->drop_overlimit++;
@@ -494,12 +494,7 @@
 
 static void hhf_free(void *addr)
 {
-	if (addr) {
-		if (is_vmalloc_addr(addr))
-			vfree(addr);
-		else
-			kfree(addr);
-	}
+	kvfree(addr);
 }
 
 static void hhf_destroy(struct Qdisc *sch)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f1669a00..111d70f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c

@@ -648,12 +648,7 @@
 
 static void dist_free(struct disttable *d)
 {
-	if (d) {
-		if (is_vmalloc_addr(d))
-			vfree(d);
-		else
-			kfree(d);
-	}
+	kvfree(d);
 }
 
 /*

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 87317ff..1af2f73 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c

@@ -716,12 +716,7 @@
 
 static void sfq_free(void *addr)
 {
-	if (addr) {
-		if (is_vmalloc_addr(addr))
-			vfree(addr);
-		else
-			kfree(addr);
-	}
+	kvfree(addr);
 }
 
 static void sfq_destroy(struct Qdisc *sch)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 39579c3..9de23a2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c

@@ -55,6 +55,7 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal functions. */
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc);
 static void sctp_assoc_bh_rcv(struct work_struct *work);
 static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
 static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
@@ -330,7 +331,7 @@
 	/* Only real associations count against the endpoint, so
 	 * don't bother for if this is a temporary association.
 	 */
-	if (!asoc->temp) {
+	if (!list_empty(&asoc->asocs)) {
 		list_del(&asoc->asocs);
 
 		/* Decrement the backlog value for a TCP-style listening
@@ -774,9 +775,6 @@
 				  sctp_transport_cmd_t command,
 				  sctp_sn_error_t error)
 {
-	struct sctp_transport *t = NULL;
-	struct sctp_transport *first;
-	struct sctp_transport *second;
 	struct sctp_ulpevent *event;
 	struct sockaddr_storage addr;
 	int spc_state = 0;
@@ -829,13 +827,14 @@
 		return;
 	}
 
-	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
-	 * user.
+	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification
+	 * to the user.
 	 */
 	if (ulp_notify) {
 		memset(&addr, 0, sizeof(struct sockaddr_storage));
 		memcpy(&addr, &transport->ipaddr,
 		       transport->af_specific->sockaddr_len);
+
 		event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
 					0, spc_state, error, GFP_ATOMIC);
 		if (event)
@@ -843,60 +842,7 @@
 	}
 
 	/* Select new active and retran paths. */
-
-	/* Look for the two most recently used active transports.
-	 *
-	 * This code produces the wrong ordering whenever jiffies
-	 * rolls over, but we still get usable transports, so we don't
-	 * worry about it.
-	 */
-	first = NULL; second = NULL;
-
-	list_for_each_entry(t, &asoc->peer.transport_addr_list,
-			transports) {
-
-		if ((t->state == SCTP_INACTIVE) ||
-		    (t->state == SCTP_UNCONFIRMED) ||
-		    (t->state == SCTP_PF))
-			continue;
-		if (!first || t->last_time_heard > first->last_time_heard) {
-			second = first;
-			first = t;
-		} else if (!second ||
-			   t->last_time_heard > second->last_time_heard)
-			second = t;
-	}
-
-	/* RFC 2960 6.4 Multi-Homed SCTP Endpoints
-	 *
-	 * By default, an endpoint should always transmit to the
-	 * primary path, unless the SCTP user explicitly specifies the
-	 * destination transport address (and possibly source
-	 * transport address) to use.
-	 *
-	 * [If the primary is active but not most recent, bump the most
-	 * recently used transport.]
-	 */
-	if (((asoc->peer.primary_path->state == SCTP_ACTIVE) ||
-	     (asoc->peer.primary_path->state == SCTP_UNKNOWN)) &&
-	    first != asoc->peer.primary_path) {
-		second = first;
-		first = asoc->peer.primary_path;
-	}
-
-	if (!second)
-		second = first;
-	/* If we failed to find a usable transport, just camp on the
-	 * primary, even if it is inactive.
-	 */
-	if (!first) {
-		first = asoc->peer.primary_path;
-		second = asoc->peer.primary_path;
-	}
-
-	/* Set the active and retran transports.  */
-	asoc->peer.active_path = first;
-	asoc->peer.retran_path = second;
+	sctp_select_active_and_retran_path(asoc);
 }
 
 /* Hold a reference to an association. */
@@ -1090,7 +1036,7 @@
 		}
 
 		if (chunk->transport)
-			chunk->transport->last_time_heard = jiffies;
+			chunk->transport->last_time_heard = ktime_get();
 
 		/* Run through the state machine. */
 		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
@@ -1278,13 +1224,41 @@
 	return sctp_trans_state_to_prio_map[trans->state];
 }
 
+static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1,
+						   struct sctp_transport *trans2)
+{
+	if (trans1->error_count > trans2->error_count) {
+		return trans2;
+	} else if (trans1->error_count == trans2->error_count &&
+		   ktime_after(trans2->last_time_heard,
+			       trans1->last_time_heard)) {
+		return trans2;
+	} else {
+		return trans1;
+	}
+}
+
 static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
 						    struct sctp_transport *best)
 {
+	u8 score_curr, score_best;
+
 	if (best == NULL)
 		return curr;
 
-	return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best;
+	score_curr = sctp_trans_score(curr);
+	score_best = sctp_trans_score(best);
+
+	/* First, try a score-based selection if both transport states
+	 * differ. If we're in a tie, lets try to make a more clever
+	 * decision here based on error counts and last time heard.
+	 */
+	if (score_curr > score_best)
+		return curr;
+	else if (score_curr == score_best)
+		return sctp_trans_elect_tie(curr, best);
+	else
+		return best;
 }
 
 void sctp_assoc_update_retran_path(struct sctp_association *asoc)
@@ -1325,6 +1299,76 @@
 		 __func__, asoc, &asoc->peer.retran_path->ipaddr.sa);
 }
 
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
+{
+	struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL;
+	struct sctp_transport *trans_pf = NULL;
+
+	/* Look for the two most recently used active transports. */
+	list_for_each_entry(trans, &asoc->peer.transport_addr_list,
+			    transports) {
+		/* Skip uninteresting transports. */
+		if (trans->state == SCTP_INACTIVE ||
+		    trans->state == SCTP_UNCONFIRMED)
+			continue;
+		/* Keep track of the best PF transport from our
+		 * list in case we don't find an active one.
+		 */
+		if (trans->state == SCTP_PF) {
+			trans_pf = sctp_trans_elect_best(trans, trans_pf);
+			continue;
+		}
+		/* For active transports, pick the most recent ones. */
+		if (trans_pri == NULL ||
+		    ktime_after(trans->last_time_heard,
+				trans_pri->last_time_heard)) {
+			trans_sec = trans_pri;
+			trans_pri = trans;
+		} else if (trans_sec == NULL ||
+			   ktime_after(trans->last_time_heard,
+				       trans_sec->last_time_heard)) {
+			trans_sec = trans;
+		}
+	}
+
+	/* RFC 2960 6.4 Multi-Homed SCTP Endpoints
+	 *
+	 * By default, an endpoint should always transmit to the primary
+	 * path, unless the SCTP user explicitly specifies the
+	 * destination transport address (and possibly source transport
+	 * address) to use. [If the primary is active but not most recent,
+	 * bump the most recently used transport.]
+	 */
+	if ((asoc->peer.primary_path->state == SCTP_ACTIVE ||
+	     asoc->peer.primary_path->state == SCTP_UNKNOWN) &&
+	     asoc->peer.primary_path != trans_pri) {
+		trans_sec = trans_pri;
+		trans_pri = asoc->peer.primary_path;
+	}
+
+	/* We did not find anything useful for a possible retransmission
+	 * path; either primary path that we found is the the same as
+	 * the current one, or we didn't generally find an active one.
+	 */
+	if (trans_sec == NULL)
+		trans_sec = trans_pri;
+
+	/* If we failed to find a usable transport, just camp on the
+	 * primary or retran, even if they are inactive, if possible
+	 * pick a PF iff it's the better choice.
+	 */
+	if (trans_pri == NULL) {
+		trans_pri = sctp_trans_elect_best(asoc->peer.primary_path,
+						  asoc->peer.retran_path);
+		trans_pri = sctp_trans_elect_best(trans_pri, trans_pf);
+		trans_sec = asoc->peer.primary_path;
+	}
+
+	/* Set the active and retran transports. */
+	asoc->peer.active_path = trans_pri;
+	asoc->peer.retran_path = trans_sec;
+}
+
 struct sctp_transport *
 sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
 				  struct sctp_transport *last_sent_to)
@@ -1547,7 +1591,7 @@
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-	bool preload = gfp & __GFP_WAIT;
+	bool preload = !!(gfp & __GFP_WAIT);
 	int ret;
 
 	/* If the id is already assigned, keep it. */

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3d9f429..9da76ba 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c

@@ -481,7 +481,7 @@
 		}
 
 		if (chunk->transport)
-			chunk->transport->last_time_heard = jiffies;
+			chunk->transport->last_time_heard = ktime_get();
 
 		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
 				   ep, asoc, chunk, GFP_ATOMIC);

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2b1738e..1999592 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c

@@ -216,7 +216,7 @@
 	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 
 	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
@@ -943,7 +943,6 @@
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG
 };
 static struct inet_protosw sctpv6_stream_protosw = {
@@ -951,7 +950,6 @@
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0f4d15f..01ab8e0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c

@@ -591,7 +591,7 @@
 
 	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
 
-	nskb->local_df = packet->ipfragok;
+	nskb->ignore_df = packet->ipfragok;
 	tp->af_specific->sctp_xmit(nskb, tp);
 
 out:

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 0947f1e..34229ee 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c

@@ -78,7 +78,7 @@
 
 	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
 		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
-			   snmp_fold_field((void __percpu **)net->sctp.sctp_statistics,
+			   snmp_fold_field(net->sctp.sctp_statistics,
 				      sctp_snmp_list[i].entry));
 
 	return 0;

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 44cbb54..6789d785 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c

@@ -1017,7 +1017,6 @@
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
 static struct inet_protosw sctp_stream_protosw = {
@@ -1025,7 +1024,6 @@
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
 
@@ -1105,14 +1103,15 @@
 
 static inline int init_sctp_mibs(struct net *net)
 {
-	return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
-			     sizeof(struct sctp_mib),
-			     __alignof__(struct sctp_mib));
+	net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib);
+	if (!net->sctp.sctp_statistics)
+		return -ENOMEM;
+	return 0;
 }
 
 static inline void cleanup_sctp_mibs(struct net *net)
 {
-	snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
+	free_percpu(net->sctp.sctp_statistics);
 }
 
 static void sctp_v4_pf_init(void)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fee5552..ae0e616 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c

@@ -1782,7 +1782,7 @@
 	else
 		kt = ktime_get();
 
-	if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
+	if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
 		/*
 		 * Section 3.3.10.3 Stale Cookie Error (3)
 		 *

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fee06b9..4298996 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c

@@ -71,6 +71,7 @@
 #include <net/route.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/busy_poll.h>
 
 #include <linux/socket.h> /* for sa_family_t */
 #include <linux/export.h>
@@ -5945,8 +5946,9 @@
 		/* Search for an available port. */
 		int low, high, remaining, index;
 		unsigned int rover;
+		struct net *net = sock_net(sk);
 
-		inet_get_local_port_range(sock_net(sk), &low, &high);
+		inet_get_local_port_range(net, &low, &high);
 		remaining = (high - low) + 1;
 		rover = prandom_u32() % remaining + low;
 
@@ -5954,7 +5956,7 @@
 			rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
-			if (inet_is_reserved_local_port(rover))
+			if (inet_is_local_reserved_port(net, rover))
 				continue;
 			index = sctp_phashfn(sock_net(sk), rover);
 			head = &sctp_port_hashtable[index];
@@ -6557,6 +6559,10 @@
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			break;
 
+		if (sk_can_busy_loop(sk) &&
+		    sk_busy_loop(sk, noblock))
+			continue;
+
 		/* User doesn't want to wait.  */
 		error = -EAGAIN;
 		if (!timeo)
@@ -6940,7 +6946,8 @@
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
 	newsk->sk_flags = sk->sk_flags;
-	newsk->sk_no_check = sk->sk_no_check;
+	newsk->sk_no_check_tx = sk->sk_no_check_tx;
+	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
 
 	newsk->sk_shutdown = sk->sk_shutdown;

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index c82fdc1..7e5eb75 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c

@@ -436,20 +436,21 @@
 
 int sctp_sysctl_net_register(struct net *net)
 {
-	struct ctl_table *table = sctp_net_table;
+	struct ctl_table *table;
+	int i;
 
-	if (!net_eq(net, &init_net)) {
-		int i;
+	table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
 
-		table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
-		if (!table)
-			return -ENOMEM;
-
-		for (i = 0; table[i].data; i++)
-			table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
-	}
+	for (i = 0; table[i].data; i++)
+		table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
 
 	net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+	if (net->sctp.sysctl_header == NULL) {
+		kfree(table);
+		return -ENOMEM;
+	}
 	return 0;
 }
 

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1d348d1..7dd672f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c

@@ -72,7 +72,7 @@
 	 */
 	peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
 
-	peer->last_time_heard = jiffies;
+	peer->last_time_heard = ktime_get();
 	peer->last_time_ecne_reduced = jiffies;
 
 	peer->param_flags = SPP_HB_DISABLE |

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 7144eb6..d49dc2e 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c

@@ -38,6 +38,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/busy_poll.h>
 #include <net/sctp/structs.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
@@ -204,6 +205,9 @@
 	if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
 		goto out_free;
 
+	if (!sctp_ulpevent_is_notification(event))
+		sk_mark_napi_id(sk, skb);
+
 	/* Check if the user wishes to receive this event.  */
 	if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
 		goto out_free;

diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 27ce262..92d5ab9 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c

@@ -218,10 +218,8 @@
 
 	spin_lock(&registered_mechs_lock);
 	list_for_each_entry(pos, &registered_mechs, gm_list) {
-		if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
-			module_put(pos->gm_owner);
+		if (!mech_supports_pseudoflavor(pos, pseudoflavor))
 			continue;
-		}
 		if (try_module_get(pos->gm_owner))
 			gm = pos;
 		break;

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f45..4ce5ecce 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c

@@ -1503,6 +1503,7 @@
 			if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
 			break;
 		case RPC_GSS_SVC_PRIVACY:
 			/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@
 			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
 			break;
 		default:
 			goto auth_err;

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ae333c1..0663621 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c

@@ -374,7 +374,7 @@
 	}
 	return;
 out:
-	printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
+	printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
 }
 EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
 

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 25578af..c0365c1 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c

@@ -832,7 +832,8 @@
  * @size: requested byte size
  *
  * To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL if the request cannot be serviced immediately.
+ * returning NULL and suppressing warning if the request cannot be serviced
+ * immediately.
  * The caller can arrange to sleep in a way that is safe for rpciod.
  *
  * Most requests are 'small' (under 2KiB) and can be serviced from a
@@ -845,7 +846,7 @@
 void *rpc_malloc(struct rpc_task *task, size_t size)
 {
 	struct rpc_buffer *buf;
-	gfp_t gfp = GFP_NOWAIT;
+	gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
 
 	if (RPC_IS_SWAPPER(task))
 		gfp |= __GFP_MEMALLOC;

diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0a648c5..2df87f7 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c

@@ -173,7 +173,8 @@
 		return -1;
 	if (csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !skb->csum_complete_sw)
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:

diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d..f2b7cb5 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h

@@ -43,6 +43,19 @@
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+static inline int sock_is_loopback(struct sock *sk)
+{
+	struct dst_entry *dst;
+	int loopback = 0;
+	rcu_read_lock();
+	dst = rcu_dereference(sk->sk_dst_cache);
+	if (dst && dst->dev &&
+	    (dst->dev->features & NETIF_F_LOOPBACK))
+		loopback = 1;
+	rcu_read_unlock();
+	return loopback;
+}
+
 int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
 		    struct page *headpage, unsigned long headoffset,
 		    struct page *tailpage, unsigned long tailoffset);

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0..b4737fb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c

@@ -597,6 +597,7 @@
 			}
 			rqstp->rq_pages[i] = p;
 		}
+	rqstp->rq_page_end = &rqstp->rq_pages[i];
 	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
 
 	/* Make arg->head point to first page and arg->pages point to rest */
@@ -730,6 +731,8 @@
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt)
 			svc_add_new_temp_xprt(serv, newxpt);
+		else
+			module_put(xprt->xpt_class->xcl_owner);
 	} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
 		/* XPT_DATA|XPT_DEFERRED case: */
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -793,7 +796,7 @@
 
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
-	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+	rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
 	rqstp->rq_chandle.defer = svc_defer;
 
 	if (serv->sv_stats)

diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0c..79c0f34 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c

@@ -54,6 +54,8 @@
 	}
 	spin_unlock(&authtab_lock);
 
+	rqstp->rq_auth_slack = 0;
+
 	rqstp->rq_authop = aops;
 	return aops->accept(rqstp, authp);
 }

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 43bcb46..b507cd3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c

@@ -400,6 +400,12 @@
 	release_sock(sock->sk);
 #endif
 }
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+	return svc_port_is_privileged(svc_addr(rqstp));
+}
+
 /*
  * INET callback when data has been received on the socket.
  */
@@ -678,6 +684,7 @@
 	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 	.xpo_has_wspace = svc_udp_has_wspace,
 	.xpo_accept = svc_udp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -842,8 +849,7 @@
 	 * tell us anything.  For now just warn about unpriv connections.
 	 */
 	if (!svc_port_is_privileged(sin)) {
-		dprintk(KERN_WARNING
-			"%s: connect from unprivileged port: %s\n",
+		dprintk("%s: connect from unprivileged port: %s\n",
 			serv->sv_name,
 			__svc_print_addr(sin, buf, sizeof(buf)));
 	}
@@ -867,6 +873,10 @@
 	}
 	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 
+	if (sock_is_loopback(newsock->sk))
+		set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+	else
+		clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -1112,6 +1122,7 @@
 
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
+	rqstp->rq_local	      = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
 
 	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	calldir = p[1];
@@ -1234,6 +1245,7 @@
 	.xpo_detach = svc_bc_tcp_sock_detach,
 	.xpo_free = svc_bc_sock_free,
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1284,7 @@
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_class = {

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index dd97ba3..23fb4e7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c

@@ -462,6 +462,7 @@
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
 
+	xdr_set_scratch_buffer(xdr, NULL, 0);
 	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->iov = iov;
@@ -482,6 +483,73 @@
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
 /**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it.  But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+	int shift = xdr->scratch.iov_len;
+	void *page;
+
+	if (shift == 0)
+		return;
+	page = page_address(*xdr->page_ptr);
+	memcpy(xdr->scratch.iov_base, page, shift);
+	memmove(page, page + shift, (void *)xdr->p - page);
+	xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+	static __be32 *p;
+	int space_left;
+	int frag1bytes, frag2bytes;
+
+	if (nbytes > PAGE_SIZE)
+		return NULL; /* Bigger buffers require special handling */
+	if (xdr->buf->len + nbytes > xdr->buf->buflen)
+		return NULL; /* Sorry, we're totally out of space */
+	frag1bytes = (xdr->end - xdr->p) << 2;
+	frag2bytes = nbytes - frag1bytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += frag1bytes;
+	else
+		xdr->buf->page_len += frag1bytes;
+	xdr->page_ptr++;
+	xdr->iov = NULL;
+	/*
+	 * If the last encode didn't end exactly on a page boundary, the
+	 * next one will straddle boundaries.  Encode into the next
+	 * page, then copy it back later in xdr_commit_encode.  We use
+	 * the "scratch" iov to track any temporarily unused fragment of
+	 * space at the end of the previous buffer:
+	 */
+	xdr->scratch.iov_base = xdr->p;
+	xdr->scratch.iov_len = frag1bytes;
+	p = page_address(*xdr->page_ptr);
+	/*
+	 * Note this is where the next encode will start after we've
+	 * shifted this one back:
+	 */
+	xdr->p = (void *)p + frag2bytes;
+	space_left = xdr->buf->buflen - xdr->buf->len;
+	xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+	xdr->buf->page_len += frag2bytes;
+	xdr->buf->len += nbytes;
+	return p;
+}
+
+/**
  * xdr_reserve_space - Reserve buffer space for sending
  * @xdr: pointer to xdr_stream
  * @nbytes: number of bytes to reserve
@@ -495,20 +563,122 @@
 	__be32 *p = xdr->p;
 	__be32 *q;
 
+	xdr_commit_encode(xdr);
 	/* align nbytes on the next 32-bit boundary */
 	nbytes += 3;
 	nbytes &= ~3;
 	q = p + (nbytes >> 2);
 	if (unlikely(q > xdr->end || q < p))
-		return NULL;
+		return xdr_get_next_encode_buffer(xdr, nbytes);
 	xdr->p = q;
-	xdr->iov->iov_len += nbytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += nbytes;
+	else
+		xdr->buf->page_len += nbytes;
 	xdr->buf->len += nbytes;
 	return p;
 }
 EXPORT_SYMBOL_GPL(xdr_reserve_space);
 
 /**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+	struct xdr_buf *buf = xdr->buf;
+	struct kvec *head = buf->head;
+	struct kvec *tail = buf->tail;
+	int fraglen;
+	int new, old;
+
+	if (len > buf->len) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+	xdr_commit_encode(xdr);
+
+	fraglen = min_t(int, buf->len - len, tail->iov_len);
+	tail->iov_len -= fraglen;
+	buf->len -= fraglen;
+	if (tail->iov_len && buf->len == len) {
+		xdr->p = tail->iov_base + tail->iov_len;
+		/* xdr->end, xdr->iov should be set already */
+		return;
+	}
+	WARN_ON_ONCE(fraglen);
+	fraglen = min_t(int, buf->len - len, buf->page_len);
+	buf->page_len -= fraglen;
+	buf->len -= fraglen;
+
+	new = buf->page_base + buf->page_len;
+	old = new + fraglen;
+	xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+	if (buf->page_len && buf->len == len) {
+		xdr->p = page_address(*xdr->page_ptr);
+		xdr->end = (void *)xdr->p + PAGE_SIZE;
+		xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+		/* xdr->iov should already be NULL */
+		return;
+	}
+	if (fraglen) {
+		xdr->end = head->iov_base + head->iov_len;
+		xdr->page_ptr--;
+	}
+	/* (otherwise assume xdr->end is already set) */
+	head->iov_len = len;
+	buf->len = len;
+	xdr->p = head->iov_base + head->iov_len;
+	xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing.  Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+	struct xdr_buf *buf = xdr->buf;
+	int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+	int end_offset = buf->len + left_in_this_buf;
+
+	if (newbuflen < 0 || newbuflen < buf->len)
+		return -1;
+	if (newbuflen > buf->buflen)
+		return 0;
+	if (newbuflen < end_offset)
+		xdr->end = (void *)xdr->end + newbuflen - end_offset;
+	buf->buflen = newbuflen;
+	return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
+/**
  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
  * @xdr: pointer to xdr_stream
  * @pages: list of pages

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 89d051d..c3b2b33 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c

@@ -71,24 +71,6 @@
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- *	-	a reply is received and
- *	-	a full number of requests are outstanding and
- *	-	the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT		(8U)
-#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND		RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
 /**
  * xprt_register_transport - register a transport implementation
  * @transport: transport to register
@@ -446,7 +428,15 @@
  * @task: recently completed RPC request used to adjust window
  * @result: result code of completed RPC request
  *
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ *	-	a reply is received and
+ *	-	a full number of requests are outstanding and
+ *	-	the congestion window hasn't been updated recently.
  */
 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
 {

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..693966d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c

@@ -78,8 +78,7 @@
  * elements. Segments are then coalesced when registered, if possible
  * within the selected memreg mode.
  *
- * Note, this routine is never called if the connection's memory
- * registration strategy is 0 (bounce buffers).
+ * Returns positive number of segments converted, or a negative errno.
  */
 
 static int
@@ -102,10 +101,17 @@
 	page_base = xdrbuf->page_base & ~PAGE_MASK;
 	p = 0;
 	while (len && n < nsegs) {
+		if (!ppages[p]) {
+			/* alloc the pagelist for receiving buffer */
+			ppages[p] = alloc_page(GFP_ATOMIC);
+			if (!ppages[p])
+				return -ENOMEM;
+		}
 		seg[n].mr_page = ppages[p];
 		seg[n].mr_offset = (void *)(unsigned long) page_base;
 		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
-		BUG_ON(seg[n].mr_len > PAGE_SIZE);
+		if (seg[n].mr_len > PAGE_SIZE)
+			return -EIO;
 		len -= seg[n].mr_len;
 		++n;
 		++p;
@@ -114,7 +120,7 @@
 
 	/* Message overflows the seg array */
 	if (len && n == nsegs)
-		return 0;
+		return -EIO;
 
 	if (xdrbuf->tail[0].iov_len) {
 		/* the rpcrdma protocol allows us to omit any trailing
@@ -123,7 +129,7 @@
 			return n;
 		if (n == nsegs)
 			/* Tail remains, but we're out of segments */
-			return 0;
+			return -EIO;
 		seg[n].mr_page = NULL;
 		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
 		seg[n].mr_len = xdrbuf->tail[0].iov_len;
@@ -164,15 +170,17 @@
  *  Reply chunk (a counted array):
  *   N elements:
  *    1 - N - HLOO - HLOO - ... - HLOO
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
  */
 
-static unsigned int
+static ssize_t
 rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
 {
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
-	int nsegs, nchunks = 0;
+	int n, nsegs, nchunks = 0;
 	unsigned int pos;
 	struct rpcrdma_mr_seg *seg = req->rl_segments;
 	struct rpcrdma_read_chunk *cur_rchunk = NULL;
@@ -198,12 +206,11 @@
 		pos = target->head[0].iov_len;
 
 	nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
-	if (nsegs == 0)
-		return 0;
+	if (nsegs < 0)
+		return nsegs;
 
 	do {
-		/* bind/register the memory, then build chunk from result. */
-		int n = rpcrdma_register_external(seg, nsegs,
+		n = rpcrdma_register_external(seg, nsegs,
 						cur_wchunk != NULL, r_xprt);
 		if (n <= 0)
 			goto out;
@@ -248,10 +255,6 @@
 	/* success. all failures return above */
 	req->rl_nchunks = nchunks;
 
-	BUG_ON(nchunks == 0);
-	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
-	       && (nchunks > 3));
-
 	/*
 	 * finish off header. If write, marshal discrim and nchunks.
 	 */
@@ -278,8 +281,8 @@
 out:
 	for (pos = 0; nchunks--;)
 		pos += rpcrdma_deregister_external(
-				&req->rl_segments[pos], r_xprt, NULL);
-	return 0;
+				&req->rl_segments[pos], r_xprt);
+	return n;
 }
 
 /*
@@ -361,6 +364,8 @@
  *  [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
  *  [2] -- optional padding.
  *  [3] -- if padded, header only in [1] and data here.
+ *
+ * Returns zero on success, otherwise a negative errno.
  */
 
 int
@@ -370,7 +375,8 @@
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	char *base;
-	size_t hdrlen, rpclen, padlen;
+	size_t rpclen, padlen;
+	ssize_t hdrlen;
 	enum rpcrdma_chunktype rtype, wtype;
 	struct rpcrdma_msg *headerp;
 
@@ -441,14 +447,10 @@
 	/* The following simplification is not true forever */
 	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
 		wtype = rpcrdma_noch;
-	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
-
-	if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
-	    (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
-		/* forced to "pure inline"? */
-		dprintk("RPC:       %s: too much data (%d/%d) for inline\n",
-			__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
-		return -1;
+	if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+		dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
+			__func__);
+		return -EIO;
 	}
 
 	hdrlen = 28; /*sizeof *headerp;*/
@@ -474,8 +476,11 @@
 			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
 			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
 			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
-			BUG_ON(wtype != rpcrdma_noch);
-
+			if (wtype != rpcrdma_noch) {
+				dprintk("RPC:       %s: invalid chunk list\n",
+					__func__);
+				return -EIO;
+			}
 		} else {
 			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
 			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
@@ -492,8 +497,7 @@
 			 * on receive. Therefore, we request a reply chunk
 			 * for non-writes wherever feasible and efficient.
 			 */
-			if (wtype == rpcrdma_noch &&
-			    r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+			if (wtype == rpcrdma_noch)
 				wtype = rpcrdma_replych;
 		}
 	}
@@ -511,9 +515,8 @@
 		hdrlen = rpcrdma_create_chunks(rqst,
 					&rqst->rq_rcv_buf, headerp, wtype);
 	}
-
-	if (hdrlen == 0)
-		return -1;
+	if (hdrlen < 0)
+		return hdrlen;
 
 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
 		" headerp 0x%p base 0x%p lkey 0x%x\n",
@@ -680,15 +683,11 @@
 	rqst->rq_private_buf = rqst->rq_rcv_buf;
 }
 
-/*
- * This function is called when an async event is posted to
- * the connection which changes the connection state. All it
- * does at this point is mark the connection up/down, the rpc
- * timers do the rest.
- */
 void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
+rpcrdma_connect_worker(struct work_struct *work)
 {
+	struct rpcrdma_ep *ep =
+		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
 	struct rpc_xprt *xprt = ep->rep_xprt;
 
 	spin_lock_bh(&xprt->transport_lock);
@@ -705,13 +704,15 @@
 }
 
 /*
- * This function is called when memory window unbind which we are waiting
- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
  */
-static void
-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
 {
-	wake_up(&rep->rr_unbind);
+	schedule_delayed_work(&ep->rep_connect_worker, 0);
 }
 
 /*
@@ -728,7 +729,8 @@
 	struct rpc_xprt *xprt = rep->rr_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	__be32 *iptr;
-	int i, rdmalen, status;
+	int rdmalen, status;
+	unsigned long cwnd;
 
 	/* Check status. If bad, signal disconnect and return rep to pool */
 	if (rep->rr_len == ~0U) {
@@ -783,6 +785,7 @@
 
 	/* from here on, the reply is no longer an orphan */
 	req->rl_reply = rep;
+	xprt->reestablish_timeout = 0;
 
 	/* check for expected message types */
 	/* The order of some of these tests is important. */
@@ -857,26 +860,10 @@
 		break;
 	}
 
-	/* If using mw bind, start the deregister process now. */
-	/* (Note: if mr_free(), cannot perform it here, in tasklet context) */
-	if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
-	case RPCRDMA_MEMWINDOWS:
-		for (i = 0; req->rl_nchunks-- > 1;)
-			i += rpcrdma_deregister_external(
-				&req->rl_segments[i], r_xprt, NULL);
-		/* Optionally wait (not here) for unbinds to complete */
-		rep->rr_func = rpcrdma_unbind_func;
-		(void) rpcrdma_deregister_external(&req->rl_segments[i],
-						   r_xprt, rep);
-		break;
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-		for (i = 0; req->rl_nchunks--;)
-			i += rpcrdma_deregister_external(&req->rl_segments[i],
-							 r_xprt, NULL);
-		break;
-	default:
-		break;
-	}
+	cwnd = xprt->cwnd;
+	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+	if (xprt->cwnd > cwnd)
+		xprt_release_rqst_cong(rqst->rq_task);
 
 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
 			__func__, xprt, rqst, status);

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8d904e4..8f92a61 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@
 
 	/* Set up the XDR head */
 	rqstp->rq_arg.head[0].iov_base = page_address(page);
-	rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+	rqstp->rq_arg.head[0].iov_len =
+		min_t(size_t, byte_count, ctxt->sge[0].length);
 	rqstp->rq_arg.len = byte_count;
 	rqstp->rq_arg.buflen = byte_count;
 
@@ -85,7 +87,7 @@
 		page = ctxt->pages[sge_no];
 		put_page(rqstp->rq_pages[sge_no]);
 		rqstp->rq_pages[sge_no] = page;
-		bc -= min(bc, ctxt->sge[sge_no].length);
+		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
 		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
 		sge_no++;
 	}
@@ -113,291 +115,265 @@
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
-			   struct svc_rqst *rqstp,
-			   struct svc_rdma_op_ctxt *head,
-			   struct rpcrdma_msg *rmsgp,
-			   struct svc_rdma_req_map *rpl_map,
-			   struct svc_rdma_req_map *chl_map,
-			   int ch_count,
-			   int byte_count)
-{
-	int sge_no;
-	int sge_bytes;
-	int page_off;
-	int page_no;
-	int ch_bytes;
-	int ch_no;
-	struct rpcrdma_read_chunk *ch;
-
-	sge_no = 0;
-	page_no = 0;
-	page_off = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	ch_no = 0;
-	ch_bytes = ntohl(ch->rc_target.rs_length);
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = ch_bytes;
-	head->arg.len = rqstp->rq_arg.len + ch_bytes;
-	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
-	head->count++;
-	chl_map->ch[0].start = 0;
-	while (byte_count) {
-		rpl_map->sge[sge_no].iov_base =
-			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
-		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		rpl_map->sge[sge_no].iov_len = sge_bytes;
-		/*
-		 * Don't bump head->count here because the same page
-		 * may be used by multiple SGE.
-		 */
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-		byte_count -= sge_bytes;
-		ch_bytes -= sge_bytes;
-		sge_no++;
-		/*
-		 * If all bytes for this chunk have been mapped to an
-		 * SGE, move to the next SGE
-		 */
-		if (ch_bytes == 0) {
-			chl_map->ch[ch_no].count =
-				sge_no - chl_map->ch[ch_no].start;
-			ch_no++;
-			ch++;
-			chl_map->ch[ch_no].start = sge_no;
-			ch_bytes = ntohl(ch->rc_target.rs_length);
-			/* If bytes remaining account for next chunk */
-			if (byte_count) {
-				head->arg.page_len += ch_bytes;
-				head->arg.len += ch_bytes;
-				head->arg.buflen += ch_bytes;
-			}
-		}
-		/*
-		 * If this SGE consumed all of the page, move to the
-		 * next page
-		 */
-		if ((sge_bytes + page_off) == PAGE_SIZE) {
-			page_no++;
-			page_off = 0;
-			/*
-			 * If there are still bytes left to map, bump
-			 * the page count
-			 */
-			if (byte_count)
-				head->count++;
-		} else
-			page_off += sge_bytes;
-	}
-	BUG_ON(byte_count != 0);
-	return sge_no;
-}
-
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0]	position points to pages[0] at an offset of 0
- * - pages[]	will be made physically contiguous by creating a one-off memory
- *		region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count	is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
-				struct svc_rqst *rqstp,
-				struct svc_rdma_op_ctxt *head,
-				struct rpcrdma_msg *rmsgp,
-				struct svc_rdma_req_map *rpl_map,
-				struct svc_rdma_req_map *chl_map,
-				int ch_count,
-				int byte_count)
-{
-	int page_no;
-	int ch_no;
-	u32 offset;
-	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_fastreg_mr *frmr;
-	int ret = 0;
-
-	frmr = svc_rdma_get_frmr(xprt);
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-
-	head->frmr = frmr;
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = byte_count;
-	head->arg.len = rqstp->rq_arg.len + byte_count;
-	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
-
-	/* Fast register the page list */
-	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
-	frmr->direction = DMA_FROM_DEVICE;
-	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = byte_count;
-	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no], 0,
-					PAGE_SIZE, DMA_FROM_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-		atomic_inc(&xprt->sc_dma_used);
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-	}
-	head->count += page_no;
-
-	/* rq_respages points one past arg pages */
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-	/* Create the reply and chunk maps */
-	offset = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	for (ch_no = 0; ch_no < ch_count; ch_no++) {
-		int len = ntohl(ch->rc_target.rs_length);
-		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
-		rpl_map->sge[ch_no].iov_len = len;
-		chl_map->ch[ch_no].count = 1;
-		chl_map->ch[ch_no].start = ch_no;
-		offset += len;
-		ch++;
-	}
-
-	ret = svc_rdma_fastreg(xprt, frmr);
-	if (ret)
-		goto fatal_err;
-
-	return ch_no;
-
- fatal_err:
-	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
-			     struct svc_rdma_op_ctxt *ctxt,
-			     struct svc_rdma_fastreg_mr *frmr,
-			     struct kvec *vec,
-			     u64 *sgl_offset,
-			     int count)
-{
-	int i;
-	unsigned long off;
-
-	ctxt->count = count;
-	ctxt->direction = DMA_FROM_DEVICE;
-	for (i = 0; i < count; i++) {
-		ctxt->sge[i].length = 0; /* in case map fails */
-		if (!frmr) {
-			BUG_ON(!virt_to_page(vec[i].iov_base));
-			off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
-			ctxt->sge[i].addr =
-				ib_dma_map_page(xprt->sc_cm_id->device,
-						virt_to_page(vec[i].iov_base),
-						off,
-						vec[i].iov_len,
-						DMA_FROM_DEVICE);
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 ctxt->sge[i].addr))
-				return -EINVAL;
-			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
-			atomic_inc(&xprt->sc_dma_used);
-		} else {
-			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
-			ctxt->sge[i].lkey = frmr->mr->lkey;
-		}
-		ctxt->sge[i].length = vec[i].iov_len;
-		*sgl_offset = *sgl_offset + vec[i].iov_len;
-	}
-	return 0;
-}
-
 static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
 {
-	if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
-	     RDMA_TRANSPORT_IWARP) &&
-	    sge_count > 1)
+	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+	     RDMA_TRANSPORT_IWARP)
 		return 1;
 	else
 		return min_t(int, sge_count, xprt->sc_max_sge);
 }
 
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim      - '1', This isn't used for data placement
- * position     - The xdr stream offset (the same for every chunk)
- * handle       - RMR for client memory region
- * length       - data transfer length
- * offset       - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
-			 struct rpcrdma_msg *rmsgp,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *hdr_ctxt)
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+			      struct svc_rqst *rqstp,
+			      struct svc_rdma_op_ctxt *head,
+			      int *page_no,
+			      u32 *page_offset,
+			      u32 rs_handle,
+			      u32 rs_length,
+			      u64 rs_offset,
+			      int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+			       struct svc_rqst *rqstp,
+			       struct svc_rdma_op_ctxt *head,
+			       int *page_no,
+			       u32 *page_offset,
+			       u32 rs_handle,
+			       u32 rs_length,
+			       u64 rs_offset,
+			       int last)
+{
+	struct ib_send_wr read_wr;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->read_hdr = head;
+	pages_needed =
+		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		ctxt->sge[pno].addr =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], pg_off,
+					PAGE_SIZE - pg_off,
+					DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   ctxt->sge[pno].addr);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+
+		/* The lkey here is either a local dma lkey or a dma_mr lkey */
+		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+		ctxt->sge[pno].length = len;
+		ctxt->count++;
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
+	}
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.wr_id = (unsigned long)ctxt;
+	read_wr.opcode = IB_WR_RDMA_READ;
+	ctxt->wr_op = read_wr.opcode;
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = pages_needed;
+
+	ret = svc_rdma_send(xprt, &read_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	return ret;
+}
+
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+				struct svc_rqst *rqstp,
+				struct svc_rdma_op_ctxt *head,
+				int *page_no,
+				u32 *page_offset,
+				u32 rs_handle,
+				u32 rs_length,
+				u64 rs_offset,
+				int last)
 {
 	struct ib_send_wr read_wr;
 	struct ib_send_wr inv_wr;
-	int err = 0;
-	int ch_no;
-	int ch_count;
-	int byte_count;
-	int sge_count;
-	u64 sgl_offset;
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->frmr = frmr;
+	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
+	frmr->direction = DMA_FROM_DEVICE;
+	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+	frmr->map_len = pages_needed << PAGE_SHIFT;
+	frmr->page_list_len = pages_needed;
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		frmr->page_list->page_list[pno] =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], 0,
+					PAGE_SIZE, DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   frmr->page_list->page_list[pno]);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
+	}
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].lkey = frmr->mr->lkey;
+	ctxt->sge[0].length = read;
+	ctxt->count = 1;
+	ctxt->read_hdr = head;
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	fastreg_wr.next = &read_wr;
+
+	/* Prepare RDMA_READ */
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = 1;
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+		read_wr.wr_id = (unsigned long)ctxt;
+		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+	} else {
+		read_wr.opcode = IB_WR_RDMA_READ;
+		read_wr.next = &inv_wr;
+		/* Prepare invalidate */
+		memset(&inv_wr, 0, sizeof(inv_wr));
+		inv_wr.wr_id = (unsigned long)ctxt;
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+	}
+	ctxt->wr_op = read_wr.opcode;
+
+	/* Post the chain */
+	ret = svc_rdma_send(xprt, &fastreg_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	svc_rdma_put_frmr(xprt, frmr);
+	return ret;
+}
+
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+			    struct rpcrdma_msg *rmsgp,
+			    struct svc_rqst *rqstp,
+			    struct svc_rdma_op_ctxt *head)
+{
+	int page_no, ch_count, ret;
 	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_req_map *rpl_map;
-	struct svc_rdma_req_map *chl_map;
+	u32 page_offset, byte_count;
+	u64 rs_offset;
+	rdma_reader_fn reader;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
@@ -408,122 +384,55 @@
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 
-	/* Allocate temporary reply and chunk maps */
-	rpl_map = svc_rdma_get_req_map();
-	chl_map = svc_rdma_get_req_map();
+	/* The request is completed when the RDMA_READs complete. The
+	 * head context keeps all the pages that comprise the
+	 * request.
+	 */
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
 
-	if (!xprt->sc_frmr_pg_list_len)
-		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-					    rpl_map, chl_map, ch_count,
-					    byte_count);
+	/* Use FRMR if supported */
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+		reader = rdma_read_chunk_frmr;
 	else
-		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-						 rpl_map, chl_map, ch_count,
-						 byte_count);
-	if (sge_count < 0) {
-		err = -EIO;
-		goto out;
-	}
+		reader = rdma_read_chunk_lcl;
 
-	sgl_offset = 0;
-	ch_no = 0;
-
+	page_no = 0; page_offset = 0;
 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	     ch->rc_discrim != 0; ch++, ch_no++) {
-		u64 rs_offset;
-next_sge:
-		ctxt = svc_rdma_get_context(xprt);
-		ctxt->direction = DMA_FROM_DEVICE;
-		ctxt->frmr = hdr_ctxt->frmr;
-		ctxt->read_hdr = NULL;
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+	     ch->rc_discrim != 0; ch++) {
 
-		/* Prepare READ WR */
-		memset(&read_wr, 0, sizeof read_wr);
-		read_wr.wr_id = (unsigned long)ctxt;
-		read_wr.opcode = IB_WR_RDMA_READ;
-		ctxt->wr_op = read_wr.opcode;
-		read_wr.send_flags = IB_SEND_SIGNALED;
-		read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
 				 &rs_offset);
-		read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
-		read_wr.sg_list = ctxt->sge;
-		read_wr.num_sge =
-			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
-		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
-					&rpl_map->sge[chl_map->ch[ch_no].start],
-					&sgl_offset,
-					read_wr.num_sge);
-		if (err) {
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
-		}
-		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
-			/*
-			 * Mark the last RDMA_READ with a bit to
-			 * indicate all RPC data has been fetched from
-			 * the client and the RPC needs to be enqueued.
-			 */
-			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-			if (hdr_ctxt->frmr) {
-				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-				/*
-				 * Invalidate the local MR used to map the data
-				 * sink.
-				 */
-				if (xprt->sc_dev_caps &
-				    SVCRDMA_DEVCAP_READ_W_INV) {
-					read_wr.opcode =
-						IB_WR_RDMA_READ_WITH_INV;
-					ctxt->wr_op = read_wr.opcode;
-					read_wr.ex.invalidate_rkey =
-						ctxt->frmr->mr->lkey;
-				} else {
-					/* Prepare INVALIDATE WR */
-					memset(&inv_wr, 0, sizeof inv_wr);
-					inv_wr.opcode = IB_WR_LOCAL_INV;
-					inv_wr.send_flags = IB_SEND_SIGNALED;
-					inv_wr.ex.invalidate_rkey =
-						hdr_ctxt->frmr->mr->lkey;
-					read_wr.next = &inv_wr;
-				}
-			}
-			ctxt->read_hdr = hdr_ctxt;
-		}
-		/* Post the read */
-		err = svc_rdma_send(xprt, &read_wr);
-		if (err) {
-			printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
-			       err);
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
-		}
-		atomic_inc(&rdma_stat_read);
+		byte_count = ntohl(ch->rc_target.rs_length);
 
-		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
-			chl_map->ch[ch_no].count -= read_wr.num_sge;
-			chl_map->ch[ch_no].start += read_wr.num_sge;
-			goto next_sge;
+		while (byte_count > 0) {
+			ret = reader(xprt, rqstp, head,
+				     &page_no, &page_offset,
+				     ntohl(ch->rc_target.rs_handle),
+				     byte_count, rs_offset,
+				     ((ch+1)->rc_discrim == 0) /* last */
+				     );
+			if (ret < 0)
+				goto err;
+			byte_count -= ret;
+			rs_offset += ret;
+			head->arg.buflen += ret;
 		}
-		sgl_offset = 0;
-		err = 1;
 	}
-
- out:
-	svc_rdma_put_req_map(rpl_map);
-	svc_rdma_put_req_map(chl_map);
-
+	ret = 1;
+ err:
 	/* Detach arg pages. svc_recv will replenish them */
-	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
-		rqstp->rq_pages[ch_no] = NULL;
+	for (page_no = 0;
+	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+		rqstp->rq_pages[page_no] = NULL;
 
-	return err;
+	return ret;
 }
 
 static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -595,13 +504,9 @@
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
 		list_del_init(&ctxt->dto_q);
-	}
-	if (ctxt) {
 		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
 		return rdma_read_complete(rqstp, ctxt);
-	}
-
-	if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
 		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
@@ -621,7 +526,6 @@
 		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
 			goto close_out;
 
-		BUG_ON(ret);
 		goto out;
 	}
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -644,12 +548,11 @@
 	}
 
 	/* Read read-list data. */
-	ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
 	if (ret > 0) {
 		/* read-list posted, defer until data received from client. */
 		goto defer;
-	}
-	if (ret < 0) {
+	} else if (ret < 0) {
 		/* Post of read-list failed, free context. */
 		svc_rdma_put_context(ctxt, 1);
 		return 0;

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7e024a5..49fd21a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * SGE[0]              reserved for RCPRDMA header
- * SGE[1]              data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1]    data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
-			struct xdr_buf *xdr,
-			struct svc_rdma_req_map *vec)
-{
-	int sge_no;
-	u32 sge_bytes;
-	u32 page_bytes;
-	u32 page_off;
-	int page_no = 0;
-	u8 *frva;
-	struct svc_rdma_fastreg_mr *frmr;
-
-	frmr = svc_rdma_get_frmr(xprt);
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-	vec->frmr = frmr;
-
-	/* Skip the RPCRDMA header */
-	sge_no = 1;
-
-	/* Map the head. */
-	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
-	vec->count = 2;
-	sge_no++;
-
-	/* Map the XDR head */
-	frmr->kva = frva;
-	frmr->direction = DMA_TO_DEVICE;
-	frmr->access_flags = 0;
-	frmr->map_len = PAGE_SIZE;
-	frmr->page_list_len = 1;
-	page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
-	frmr->page_list->page_list[page_no] =
-		ib_dma_map_page(xprt->sc_cm_id->device,
-				virt_to_page(xdr->head[0].iov_base),
-				page_off,
-				PAGE_SIZE - page_off,
-				DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-				 frmr->page_list->page_list[page_no]))
-		goto fatal_err;
-	atomic_inc(&xprt->sc_dma_used);
-
-	/* Map the XDR page list */
-	page_off = xdr->page_base;
-	page_bytes = xdr->page_len + page_off;
-	if (!page_bytes)
-		goto encode_tail;
-
-	/* Map the pages */
-	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-	vec->sge[sge_no].iov_len = page_bytes;
-	sge_no++;
-	while (page_bytes) {
-		struct page *page;
-
-		page = xdr->pages[page_no++];
-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
-		page_bytes -= sge_bytes;
-
-		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					page, page_off,
-					sge_bytes, DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-
-		atomic_inc(&xprt->sc_dma_used);
-		page_off = 0; /* reset for next time through loop */
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-	vec->count++;
-
- encode_tail:
-	/* Map tail */
-	if (0 == xdr->tail[0].iov_len)
-		goto done;
-
-	vec->count++;
-	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
-	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
-	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
-		/*
-		 * If head and tail use the same page, we don't need
-		 * to map it again.
-		 */
-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
-	} else {
-		void *va;
-
-		/* Map another page for the tail */
-		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
-		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
-		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
-		frmr->page_list->page_list[page_no] =
-		    ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
-				    page_off,
-				    PAGE_SIZE,
-				    DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-		atomic_inc(&xprt->sc_dma_used);
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-
- done:
-	if (svc_rdma_fastreg(xprt, frmr))
-		goto fatal_err;
-
-	return 0;
-
- fatal_err:
-	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
-	vec->frmr = NULL;
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
 static int map_xdr(struct svcxprt_rdma *xprt,
 		   struct xdr_buf *xdr,
 		   struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@
 	BUG_ON(xdr->len !=
 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
 
-	if (xprt->sc_frmr_pg_list_len)
-		return fast_reg_xdr(xprt, xdr, vec);
-
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
@@ -282,8 +134,6 @@
 }
 
 /* Assumptions:
- * - We are using FRMR
- *     - or -
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -327,23 +177,16 @@
 		sge_bytes = min_t(size_t,
 			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
 		sge[sge_no].length = sge_bytes;
-		if (!vec->frmr) {
-			sge[sge_no].addr =
-				dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 sge[sge_no].addr))
-				goto err;
-			atomic_inc(&xprt->sc_dma_used);
-			sge[sge_no].lkey = xprt->sc_dma_lkey;
-		} else {
-			sge[sge_no].addr = (unsigned long)
-				vec->sge[xdr_sge_no].iov_base + sge_off;
-			sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		sge[sge_no].addr =
+			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 sge[sge_no].addr))
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+		sge[sge_no].lkey = xprt->sc_dma_lkey;
 		ctxt->count++;
-		ctxt->frmr = vec->frmr;
 		sge_off = 0;
 		sge_no++;
 		xdr_sge_no++;
@@ -369,7 +212,6 @@
 	return 0;
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, vec->frmr);
 	svc_rdma_put_context(ctxt, 0);
 	/* Fatal error, close transport */
 	return -EIO;
@@ -397,10 +239,7 @@
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* Write chunks start at the pagelist */
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -472,10 +311,7 @@
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* xdr offset starts at RPC message */
 	nchunks = ntohl(arg_ary->wc_nchunks);
@@ -545,7 +381,6 @@
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
-	struct ib_send_wr inv_wr;
 	int sge_no;
 	int sge_bytes;
 	int page_no;
@@ -559,7 +394,6 @@
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -567,11 +401,6 @@
 	/* Prepare the context */
 	ctxt->pages[0] = page;
 	ctxt->count = 1;
-	ctxt->frmr = vec->frmr;
-	if (vec->frmr)
-		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
 	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -590,21 +419,15 @@
 		int xdr_off = 0;
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
-		if (!vec->frmr) {
-			ctxt->sge[sge_no].addr =
-				dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
-						 ctxt->sge[sge_no].addr))
-				goto err;
-			atomic_inc(&rdma->sc_dma_used);
-			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
-		} else {
-			ctxt->sge[sge_no].addr = (unsigned long)
-				vec->sge[sge_no].iov_base;
-			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		ctxt->sge[sge_no].addr =
+			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+					 ctxt->sge[sge_no].addr))
+			goto err;
+		atomic_inc(&rdma->sc_dma_used);
+		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
 		ctxt->sge[sge_no].length = sge_bytes;
 	}
 	BUG_ON(byte_count != 0);
@@ -627,6 +450,7 @@
 			ctxt->sge[page_no+1].length = 0;
 	}
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
 	BUG_ON(sge_no > rdma->sc_max_sge);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
@@ -635,15 +459,6 @@
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
-	if (vec->frmr) {
-		/* Prepare INVALIDATE WR */
-		memset(&inv_wr, 0, sizeof inv_wr);
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED;
-		inv_wr.ex.invalidate_rkey =
-			vec->frmr->mr->lkey;
-		send_wr.next = &inv_wr;
-	}
 
 	ret = svc_rdma_send(rdma, &send_wr);
 	if (ret)
@@ -653,7 +468,6 @@
 
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
 }

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 25688fa..e7323fb 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -65,6 +66,7 @@
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
 static void rq_cq_reap(struct svcxprt_rdma *xprt);
 static void sq_cq_reap(struct svcxprt_rdma *xprt);
 
@@ -82,6 +84,7 @@
 	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
 	.xpo_has_wspace = svc_rdma_has_wspace,
 	.xpo_accept = svc_rdma_accept,
+	.xpo_secure_port = svc_rdma_secure_port,
 };
 
 struct svc_xprt_class svc_rdma_class = {
@@ -160,7 +163,6 @@
 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
 	map->count = 0;
-	map->frmr = NULL;
 	return map;
 }
 
@@ -336,22 +338,21 @@
 
 	switch (ctxt->wr_op) {
 	case IB_WR_SEND:
-		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-			svc_rdma_put_frmr(xprt, ctxt->frmr);
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 1);
 		break;
 
 	case IB_WR_RDMA_WRITE:
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		break;
 
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ_WITH_INV:
+		svc_rdma_put_frmr(xprt, ctxt->frmr);
 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
 			BUG_ON(!read_hdr);
-			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-				svc_rdma_put_frmr(xprt, ctxt->frmr);
 			spin_lock_bh(&xprt->sc_rq_dto_lock);
 			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 			list_add_tail(&read_hdr->dto_q,
@@ -363,6 +364,7 @@
 		break;
 
 	default:
+		BUG_ON(1);
 		printk(KERN_ERR "svcrdma: unexpected completion type, "
 		       "opcode=%d\n",
 		       ctxt->wr_op);
@@ -378,29 +380,42 @@
 static void sq_cq_reap(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct ib_wc wc;
+	struct ib_wc wc_a[6];
+	struct ib_wc *wc;
 	struct ib_cq *cq = xprt->sc_sq_cq;
 	int ret;
 
+	memset(wc_a, 0, sizeof(wc_a));
+
 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
 		return;
 
 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
 	atomic_inc(&rdma_stat_sq_poll);
-	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
-		if (wc.status != IB_WC_SUCCESS)
-			/* Close the transport */
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+		int i;
 
-		/* Decrement used SQ WR count */
-		atomic_dec(&xprt->sc_sq_count);
-		wake_up(&xprt->sc_send_wait);
+		for (i = 0; i < ret; i++) {
+			wc = &wc_a[i];
+			if (wc->status != IB_WC_SUCCESS) {
+				dprintk("svcrdma: sq wc err status %d\n",
+					wc->status);
 
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		if (ctxt)
-			process_context(xprt, ctxt);
+				/* Close the transport */
+				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			}
 
-		svc_xprt_put(&xprt->sc_xprt);
+			/* Decrement used SQ WR count */
+			atomic_dec(&xprt->sc_sq_count);
+			wake_up(&xprt->sc_send_wait);
+
+			ctxt = (struct svc_rdma_op_ctxt *)
+				(unsigned long)wc->wr_id;
+			if (ctxt)
+				process_context(xprt, ctxt);
+
+			svc_xprt_put(&xprt->sc_xprt);
+		}
 	}
 
 	if (ctxt)
@@ -993,7 +1008,11 @@
 			need_dma_mr = 0;
 		break;
 	case RDMA_TRANSPORT_IB:
-		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else if (!(devattr.device_cap_flags &
+			     IB_DEVICE_LOCAL_DMA_LKEY)) {
 			need_dma_mr = 1;
 			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
 		} else
@@ -1190,14 +1209,7 @@
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
 	/*
-	 * If there are fewer SQ WR available than required to send a
-	 * simple response, return false.
-	 */
-	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
-		return 0;
-
-	/*
-	 * ...or there are already waiters on the SQ,
+	 * If there are already waiters on the SQ,
 	 * return false.
 	 */
 	if (waitqueue_active(&rdma->sc_send_wait))
@@ -1207,6 +1219,11 @@
 	return 1;
 }
 
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+	return 1;
+}
+
 /*
  * Attempt to register the kvec representing the RPC memory with the
  * device.

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1eb9c46..66f91f0 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c

@@ -149,6 +149,11 @@
 
 #endif
 
+#define RPCRDMA_BIND_TO		(60U * HZ)
+#define RPCRDMA_INIT_REEST_TO	(5U * HZ)
+#define RPCRDMA_MAX_REEST_TO	(30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO	(5U * 60 * HZ)
+
 static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
 
 static void
@@ -229,7 +234,6 @@
 xprt_rdma_destroy(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-	int rc;
 
 	dprintk("RPC:       %s: called\n", __func__);
 
@@ -238,10 +242,7 @@
 	xprt_clear_connected(xprt);
 
 	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
-	rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
-	if (rc)
-		dprintk("RPC:       %s: rpcrdma_ep_destroy returned %i\n",
-			__func__, rc);
+	rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
 	rpcrdma_ia_close(&r_xprt->rx_ia);
 
 	xprt_rdma_free_addresses(xprt);
@@ -289,9 +290,9 @@
 
 	/* 60 second timeout, no retries */
 	xprt->timeout = &xprt_rdma_default_timeout;
-	xprt->bind_timeout = (60U * HZ);
-	xprt->reestablish_timeout = (5U * HZ);
-	xprt->idle_timeout = (5U * 60 * HZ);
+	xprt->bind_timeout = RPCRDMA_BIND_TO;
+	xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
 	xprt->resvport = 0;		/* privileged port not needed */
 	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
@@ -391,7 +392,7 @@
 	xprt_rdma_free_addresses(xprt);
 	rc = -EINVAL;
 out3:
-	(void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+	rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
 out2:
 	rpcrdma_ia_close(&new_xprt->rx_ia);
 out1:
@@ -436,10 +437,10 @@
 		schedule_delayed_work(&r_xprt->rdma_connect,
 			xprt->reestablish_timeout);
 		xprt->reestablish_timeout <<= 1;
-		if (xprt->reestablish_timeout > (30 * HZ))
-			xprt->reestablish_timeout = (30 * HZ);
-		else if (xprt->reestablish_timeout < (5 * HZ))
-			xprt->reestablish_timeout = (5 * HZ);
+		if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+			xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+		else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+			xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
 	} else {
 		schedule_delayed_work(&r_xprt->rdma_connect, 0);
 		if (!RPC_IS_ASYNC(task))
@@ -447,23 +448,6 @@
 	}
 }
 
-static int
-xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
-
-	/* == RPC_CWNDSCALE @ init, but *after* setup */
-	if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
-		r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
-		dprintk("RPC:       %s: cwndscale %lu\n", __func__,
-			r_xprt->rx_buf.rb_cwndscale);
-		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
-	}
-	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
-	return xprt_reserve_xprt_cong(xprt, task);
-}
-
 /*
  * The RDMA allocate/free functions need the task structure as a place
  * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
@@ -479,7 +463,8 @@
 	struct rpcrdma_req *req, *nreq;
 
 	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
-	BUG_ON(NULL == req);
+	if (req == NULL)
+		return NULL;
 
 	if (size > req->rl_size) {
 		dprintk("RPC:       %s: size %zd too large for buffer[%zd]: "
@@ -503,18 +488,6 @@
 		 * If the allocation or registration fails, the RPC framework
 		 * will (doggedly) retry.
 		 */
-		if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
-				RPCRDMA_BOUNCEBUFFERS) {
-			/* forced to "pure inline" */
-			dprintk("RPC:       %s: too much data (%zd) for inline "
-					"(r/w max %d/%d)\n", __func__, size,
-					rpcx_to_rdmad(xprt).inline_rsize,
-					rpcx_to_rdmad(xprt).inline_wsize);
-			size = req->rl_size;
-			rpc_exit(task, -EIO);		/* fail the operation */
-			rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
-			goto out;
-		}
 		if (task->tk_flags & RPC_TASK_SWAPPER)
 			nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
 		else
@@ -543,7 +516,6 @@
 		req = nreq;
 	}
 	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
-out:
 	req->rl_connect_cookie = 0;	/* our reserved value */
 	return req->rl_xdr_buf;
 
@@ -579,9 +551,7 @@
 		__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
 
 	/*
-	 * Finish the deregistration. When using mw bind, this was
-	 * begun in rpcrdma_reply_handler(). In all other modes, we
-	 * do it here, in thread context. The process is considered
+	 * Finish the deregistration.  The process is considered
 	 * complete when the rr_func vector becomes NULL - this
 	 * was put in place during rpcrdma_reply_handler() - the wait
 	 * call below will not block if the dereg is "done". If
@@ -590,12 +560,7 @@
 	for (i = 0; req->rl_nchunks;) {
 		--req->rl_nchunks;
 		i += rpcrdma_deregister_external(
-			&req->rl_segments[i], r_xprt, NULL);
-	}
-
-	if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
-		rep->rr_func = NULL;	/* abandon the callback */
-		req->rl_reply = NULL;
+			&req->rl_segments[i], r_xprt);
 	}
 
 	if (req->rl_iov.length == 0) {	/* see allocate above */
@@ -630,13 +595,12 @@
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int rc;
 
-	/* marshal the send itself */
-	if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
-		r_xprt->rx_stats.failed_marshal_count++;
-		dprintk("RPC:       %s: rpcrdma_marshal_req failed\n",
-			__func__);
-		return -EIO;
+	if (req->rl_niovs == 0) {
+		rc = rpcrdma_marshal_req(rqst);
+		if (rc < 0)
+			goto failed_marshal;
 	}
 
 	if (req->rl_reply == NULL) 		/* e.g. reconnection */
@@ -660,6 +624,12 @@
 	rqst->rq_bytes_sent = 0;
 	return 0;
 
+failed_marshal:
+	r_xprt->rx_stats.failed_marshal_count++;
+	dprintk("RPC:       %s: rpcrdma_marshal_req failed, status %i\n",
+		__func__, rc);
+	if (rc == -EIO)
+		return -EIO;
 drop_connection:
 	xprt_disconnect_done(xprt);
 	return -ENOTCONN;	/* implies disconnect */
@@ -705,7 +675,7 @@
  */
 
 static struct rpc_xprt_ops xprt_rdma_procs = {
-	.reserve_xprt		= xprt_rdma_reserve_xprt,
+	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
 	.alloc_slot		= xprt_alloc_slot,
 	.release_request	= xprt_release_rqst_cong,       /* ditto */

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..13dbd1c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c

@@ -48,8 +48,8 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/pci.h>	/* for Tavor hack below */
 #include <linux/slab.h>
+#include <asm/bitops.h>
 
 #include "xprt_rdma.h"
 
@@ -142,98 +142,139 @@
 	}
 }
 
-static inline
-void rpcrdma_event_process(struct ib_wc *wc)
+static void
+rpcrdma_sendcq_process_wc(struct ib_wc *wc)
 {
-	struct rpcrdma_mw *frmr;
-	struct rpcrdma_rep *rep =
-			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+	struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
 
-	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
-		__func__, rep, wc->status, wc->opcode, wc->byte_len);
+	dprintk("RPC:       %s: frmr %p status %X opcode %d\n",
+		__func__, frmr, wc->status, wc->opcode);
 
-	if (!rep) /* send or bind completion that we don't care about */
+	if (wc->wr_id == 0ULL)
+		return;
+	if (wc->status != IB_WC_SUCCESS)
 		return;
 
-	if (IB_WC_SUCCESS != wc->status) {
-		dprintk("RPC:       %s: WC opcode %d status %X, connection lost\n",
-			__func__, wc->opcode, wc->status);
-		rep->rr_len = ~0U;
-		if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
-			rpcrdma_schedule_tasklet(rep);
-		return;
-	}
-
-	switch (wc->opcode) {
-	case IB_WC_FAST_REG_MR:
-		frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+	if (wc->opcode == IB_WC_FAST_REG_MR)
 		frmr->r.frmr.state = FRMR_IS_VALID;
-		break;
-	case IB_WC_LOCAL_INV:
-		frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+	else if (wc->opcode == IB_WC_LOCAL_INV)
 		frmr->r.frmr.state = FRMR_IS_INVALID;
-		break;
-	case IB_WC_RECV:
-		rep->rr_len = wc->byte_len;
-		ib_dma_sync_single_for_cpu(
-			rdmab_to_ia(rep->rr_buffer)->ri_id->device,
-			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
-		/* Keep (only) the most recent credits, after check validity */
-		if (rep->rr_len >= 16) {
-			struct rpcrdma_msg *p =
-					(struct rpcrdma_msg *) rep->rr_base;
-			unsigned int credits = ntohl(p->rm_credit);
-			if (credits == 0) {
-				dprintk("RPC:       %s: server"
-					" dropped credits to 0!\n", __func__);
-				/* don't deadlock */
-				credits = 1;
-			} else if (credits > rep->rr_buffer->rb_max_requests) {
-				dprintk("RPC:       %s: server"
-					" over-crediting: %d (%d)\n",
-					__func__, credits,
-					rep->rr_buffer->rb_max_requests);
-				credits = rep->rr_buffer->rb_max_requests;
-			}
-			atomic_set(&rep->rr_buffer->rb_credits, credits);
-		}
-		/* fall through */
-	case IB_WC_BIND_MW:
-		rpcrdma_schedule_tasklet(rep);
-		break;
-	default:
-		dprintk("RPC:       %s: unexpected WC event %X\n",
-			__func__, wc->opcode);
-		break;
-	}
 }
 
-static inline int
-rpcrdma_cq_poll(struct ib_cq *cq)
+static int
+rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
 {
-	struct ib_wc wc;
-	int rc;
+	struct ib_wc *wcs;
+	int budget, count, rc;
 
-	for (;;) {
-		rc = ib_poll_cq(cq, 1, &wc);
-		if (rc < 0) {
-			dprintk("RPC:       %s: ib_poll_cq failed %i\n",
-				__func__, rc);
+	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+	do {
+		wcs = ep->rep_send_wcs;
+
+		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+		if (rc <= 0)
 			return rc;
-		}
-		if (rc == 0)
-			break;
 
-		rpcrdma_event_process(&wc);
-	}
-
+		count = rc;
+		while (count-- > 0)
+			rpcrdma_sendcq_process_wc(wcs++);
+	} while (rc == RPCRDMA_POLLSIZE && --budget);
 	return 0;
 }
 
 /*
- * rpcrdma_cq_event_upcall
+ * Handle send, fast_reg_mr, and local_inv completions.
  *
- * This upcall handles recv, send, bind and unbind events.
+ * Send events are typically suppressed and thus do not result
+ * in an upcall. Occasionally one is signaled, however. This
+ * prevents the provider's completion queue from wrapping and
+ * losing a completion.
+ */
+static void
+rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+{
+	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+	int rc;
+
+	rc = rpcrdma_sendcq_poll(cq, ep);
+	if (rc) {
+		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
+			__func__, rc);
+		return;
+	}
+
+	rc = ib_req_notify_cq(cq,
+			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+	if (rc == 0)
+		return;
+	if (rc < 0) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+			__func__, rc);
+		return;
+	}
+
+	rpcrdma_sendcq_poll(cq, ep);
+}
+
+static void
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+{
+	struct rpcrdma_rep *rep =
+			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
+
+	dprintk("RPC:       %s: rep %p status %X opcode %X length %u\n",
+		__func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+	if (wc->status != IB_WC_SUCCESS) {
+		rep->rr_len = ~0U;
+		goto out_schedule;
+	}
+	if (wc->opcode != IB_WC_RECV)
+		return;
+
+	rep->rr_len = wc->byte_len;
+	ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+
+	if (rep->rr_len >= 16) {
+		struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
+		unsigned int credits = ntohl(p->rm_credit);
+
+		if (credits == 0)
+			credits = 1;	/* don't deadlock */
+		else if (credits > rep->rr_buffer->rb_max_requests)
+			credits = rep->rr_buffer->rb_max_requests;
+		atomic_set(&rep->rr_buffer->rb_credits, credits);
+	}
+
+out_schedule:
+	rpcrdma_schedule_tasklet(rep);
+}
+
+static int
+rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+{
+	struct ib_wc *wcs;
+	int budget, count, rc;
+
+	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+	do {
+		wcs = ep->rep_recv_wcs;
+
+		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+		if (rc <= 0)
+			return rc;
+
+		count = rc;
+		while (count-- > 0)
+			rpcrdma_recvcq_process_wc(wcs++);
+	} while (rc == RPCRDMA_POLLSIZE && --budget);
+	return 0;
+}
+
+/*
+ * Handle receive completions.
+ *
  * It is reentrant but processes single events in order to maintain
  * ordering of receives to keep server credits.
  *
@@ -242,26 +283,31 @@
  * connection shutdown. That is, the structures required for
  * the completion of the reply handler must remain intact until
  * all memory has been reclaimed.
- *
- * Note that send events are suppressed and do not result in an upcall.
  */
 static void
-rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
 {
+	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
 	int rc;
 
-	rc = rpcrdma_cq_poll(cq);
-	if (rc)
-		return;
-
-	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	rc = rpcrdma_recvcq_poll(cq, ep);
 	if (rc) {
-		dprintk("RPC:       %s: ib_req_notify_cq failed %i\n",
+		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
 			__func__, rc);
 		return;
 	}
 
-	rpcrdma_cq_poll(cq);
+	rc = ib_req_notify_cq(cq,
+			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+	if (rc == 0)
+		return;
+	if (rc < 0) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+			__func__, rc);
+		return;
+	}
+
+	rpcrdma_recvcq_poll(cq, ep);
 }
 
 #ifdef RPC_DEBUG
@@ -493,54 +539,32 @@
 		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
 	}
 
-	switch (memreg) {
-	case RPCRDMA_MEMWINDOWS:
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-		if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
-			dprintk("RPC:       %s: MEMWINDOWS registration "
-				"specified but not supported by adapter, "
-				"using slower RPCRDMA_REGISTER\n",
-				__func__);
-			memreg = RPCRDMA_REGISTER;
-		}
-		break;
-	case RPCRDMA_MTHCAFMR:
-		if (!ia->ri_id->device->alloc_fmr) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
-			dprintk("RPC:       %s: MTHCAFMR registration "
-				"specified but not supported by adapter, "
-				"using riskier RPCRDMA_ALLPHYSICAL\n",
-				__func__);
-			memreg = RPCRDMA_ALLPHYSICAL;
-#else
-			dprintk("RPC:       %s: MTHCAFMR registration "
-				"specified but not supported by adapter, "
-				"using slower RPCRDMA_REGISTER\n",
-				__func__);
-			memreg = RPCRDMA_REGISTER;
-#endif
-		}
-		break;
-	case RPCRDMA_FRMR:
+	if (memreg == RPCRDMA_FRMR) {
 		/* Requires both frmr reg and local dma lkey */
 		if ((devattr.device_cap_flags &
 		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
 		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
 			dprintk("RPC:       %s: FRMR registration "
-				"specified but not supported by adapter, "
-				"using riskier RPCRDMA_ALLPHYSICAL\n",
-				__func__);
+				"not supported by HCA\n", __func__);
+			memreg = RPCRDMA_MTHCAFMR;
+		} else {
+			/* Mind the ia limit on FRMR page list depth */
+			ia->ri_max_frmr_depth = min_t(unsigned int,
+				RPCRDMA_MAX_DATA_SEGS,
+				devattr.max_fast_reg_page_list_len);
+		}
+	}
+	if (memreg == RPCRDMA_MTHCAFMR) {
+		if (!ia->ri_id->device->alloc_fmr) {
+			dprintk("RPC:       %s: MTHCAFMR registration "
+				"not supported by HCA\n", __func__);
+#if RPCRDMA_PERSISTENT_REGISTRATION
 			memreg = RPCRDMA_ALLPHYSICAL;
 #else
-			dprintk("RPC:       %s: FRMR registration "
-				"specified but not supported by adapter, "
-				"using slower RPCRDMA_REGISTER\n",
-				__func__);
-			memreg = RPCRDMA_REGISTER;
+			rc = -ENOMEM;
+			goto out2;
 #endif
 		}
-		break;
 	}
 
 	/*
@@ -552,8 +576,6 @@
 	 * adapter.
 	 */
 	switch (memreg) {
-	case RPCRDMA_BOUNCEBUFFERS:
-	case RPCRDMA_REGISTER:
 	case RPCRDMA_FRMR:
 		break;
 #if RPCRDMA_PERSISTENT_REGISTRATION
@@ -563,30 +585,26 @@
 				IB_ACCESS_REMOTE_READ;
 		goto register_setup;
 #endif
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		mem_priv = IB_ACCESS_LOCAL_WRITE |
-				IB_ACCESS_MW_BIND;
-		goto register_setup;
 	case RPCRDMA_MTHCAFMR:
 		if (ia->ri_have_dma_lkey)
 			break;
 		mem_priv = IB_ACCESS_LOCAL_WRITE;
+#if RPCRDMA_PERSISTENT_REGISTRATION
 	register_setup:
+#endif
 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
 		if (IS_ERR(ia->ri_bind_mem)) {
 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
-				"phys register failed with %lX\n\t"
-				"Will continue with degraded performance\n",
+				"phys register failed with %lX\n",
 				__func__, PTR_ERR(ia->ri_bind_mem));
-			memreg = RPCRDMA_REGISTER;
-			ia->ri_bind_mem = NULL;
+			rc = -ENOMEM;
+			goto out2;
 		}
 		break;
 	default:
-		printk(KERN_ERR "%s: invalid memory registration mode %d\n",
-				__func__, memreg);
-		rc = -EINVAL;
+		printk(KERN_ERR "RPC: Unsupported memory "
+				"registration mode: %d\n", memreg);
+		rc = -ENOMEM;
 		goto out2;
 	}
 	dprintk("RPC:       %s: memory registration strategy is %d\n",
@@ -640,6 +658,7 @@
 				struct rpcrdma_create_data_internal *cdata)
 {
 	struct ib_device_attr devattr;
+	struct ib_cq *sendcq, *recvcq;
 	int rc, err;
 
 	rc = ib_query_device(ia->ri_id->device, &devattr);
@@ -659,32 +678,42 @@
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
 	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
+	case RPCRDMA_FRMR: {
+		int depth = 7;
+
 		/* Add room for frmr register and invalidate WRs.
 		 * 1. FRMR reg WR for head
 		 * 2. FRMR invalidate WR for head
-		 * 3. FRMR reg WR for pagelist
-		 * 4. FRMR invalidate WR for pagelist
+		 * 3. N FRMR reg WRs for pagelist
+		 * 4. N FRMR invalidate WRs for pagelist
 		 * 5. FRMR reg WR for tail
 		 * 6. FRMR invalidate WR for tail
 		 * 7. The RDMA_SEND WR
 		 */
-		ep->rep_attr.cap.max_send_wr *= 7;
+
+		/* Calculate N if the device max FRMR depth is smaller than
+		 * RPCRDMA_MAX_DATA_SEGS.
+		 */
+		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+			int delta = RPCRDMA_MAX_DATA_SEGS -
+				    ia->ri_max_frmr_depth;
+
+			do {
+				depth += 2; /* FRMR reg + invalidate */
+				delta -= ia->ri_max_frmr_depth;
+			} while (delta > 0);
+
+		}
+		ep->rep_attr.cap.max_send_wr *= depth;
 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
-			cdata->max_requests = devattr.max_qp_wr / 7;
+			cdata->max_requests = devattr.max_qp_wr / depth;
 			if (!cdata->max_requests)
 				return -EINVAL;
-			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+						       depth;
 		}
 		break;
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		/* Add room for mw_binds+unbinds - overkill! */
-		ep->rep_attr.cap.max_send_wr++;
-		ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
-		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
-			return -EINVAL;
-		break;
+	}
 	default:
 		break;
 	}
@@ -705,46 +734,51 @@
 		ep->rep_attr.cap.max_recv_sge);
 
 	/* set trigger for requesting send completion */
-	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
-		break;
-	default:
-		break;
-	}
+	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
 	if (ep->rep_cqinit <= 2)
 		ep->rep_cqinit = 0;
 	INIT_CQCOUNT(ep);
 	ep->rep_ia = ia;
 	init_waitqueue_head(&ep->rep_connect_wait);
+	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
-	/*
-	 * Create a single cq for receive dto and mw_bind (only ever
-	 * care about unbind, really). Send completions are suppressed.
-	 * Use single threaded tasklet upcalls to maintain ordering.
-	 */
-	ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
-				  rpcrdma_cq_async_error_upcall, NULL,
-				  ep->rep_attr.cap.max_recv_wr +
+	sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+				  rpcrdma_cq_async_error_upcall, ep,
 				  ep->rep_attr.cap.max_send_wr + 1, 0);
-	if (IS_ERR(ep->rep_cq)) {
-		rc = PTR_ERR(ep->rep_cq);
-		dprintk("RPC:       %s: ib_create_cq failed: %i\n",
+	if (IS_ERR(sendcq)) {
+		rc = PTR_ERR(sendcq);
+		dprintk("RPC:       %s: failed to create send CQ: %i\n",
 			__func__, rc);
 		goto out1;
 	}
 
-	rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+	rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
 	if (rc) {
 		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
 			__func__, rc);
 		goto out2;
 	}
 
-	ep->rep_attr.send_cq = ep->rep_cq;
-	ep->rep_attr.recv_cq = ep->rep_cq;
+	recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
+				  rpcrdma_cq_async_error_upcall, ep,
+				  ep->rep_attr.cap.max_recv_wr + 1, 0);
+	if (IS_ERR(recvcq)) {
+		rc = PTR_ERR(recvcq);
+		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
+	if (rc) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+			__func__, rc);
+		ib_destroy_cq(recvcq);
+		goto out2;
+	}
+
+	ep->rep_attr.send_cq = sendcq;
+	ep->rep_attr.recv_cq = recvcq;
 
 	/* Initialize cma parameters */
 
@@ -754,9 +788,7 @@
 
 	/* Client offers RDMA Read but does not initiate */
 	ep->rep_remote_cma.initiator_depth = 0;
-	if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
-		ep->rep_remote_cma.responder_resources = 0;
-	else if (devattr.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
+	if (devattr.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
 		ep->rep_remote_cma.responder_resources = 32;
 	else
 		ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
@@ -768,7 +800,7 @@
 	return 0;
 
 out2:
-	err = ib_destroy_cq(ep->rep_cq);
+	err = ib_destroy_cq(sendcq);
 	if (err)
 		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
 			__func__, err);
@@ -782,11 +814,8 @@
  * Disconnect and destroy endpoint. After this, the only
  * valid operations on the ep are to free it (if dynamically
  * allocated) or re-create it.
- *
- * The caller's error handling must be sure to not leak the endpoint
- * if this function fails.
  */
-int
+void
 rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
 	int rc;
@@ -794,6 +823,8 @@
 	dprintk("RPC:       %s: entering, connected is %d\n",
 		__func__, ep->rep_connected);
 
+	cancel_delayed_work_sync(&ep->rep_connect_worker);
+
 	if (ia->ri_id->qp) {
 		rc = rpcrdma_ep_disconnect(ep, ia);
 		if (rc)
@@ -809,13 +840,17 @@
 		ep->rep_pad_mr = NULL;
 	}
 
-	rpcrdma_clean_cq(ep->rep_cq);
-	rc = ib_destroy_cq(ep->rep_cq);
+	rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+	rc = ib_destroy_cq(ep->rep_attr.recv_cq);
 	if (rc)
 		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
 			__func__, rc);
 
-	return rc;
+	rpcrdma_clean_cq(ep->rep_attr.send_cq);
+	rc = ib_destroy_cq(ep->rep_attr.send_cq);
+	if (rc)
+		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+			__func__, rc);
 }
 
 /*
@@ -831,17 +866,20 @@
 	if (ep->rep_connected != 0) {
 		struct rpcrdma_xprt *xprt;
 retry:
+		dprintk("RPC:       %s: reconnecting...\n", __func__);
 		rc = rpcrdma_ep_disconnect(ep, ia);
 		if (rc && rc != -ENOTCONN)
 			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
 				" status %i\n", __func__, rc);
-		rpcrdma_clean_cq(ep->rep_cq);
+
+		rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+		rpcrdma_clean_cq(ep->rep_attr.send_cq);
 
 		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
 		id = rpcrdma_create_id(xprt, ia,
 				(struct sockaddr *)&xprt->rx_data.addr);
 		if (IS_ERR(id)) {
-			rc = PTR_ERR(id);
+			rc = -EHOSTUNREACH;
 			goto out;
 		}
 		/* TEMP TEMP TEMP - fail if new device:
@@ -855,35 +893,32 @@
 			printk("RPC:       %s: can't reconnect on "
 				"different device!\n", __func__);
 			rdma_destroy_id(id);
-			rc = -ENETDOWN;
+			rc = -ENETUNREACH;
 			goto out;
 		}
 		/* END TEMP */
+		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+		if (rc) {
+			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
+				__func__, rc);
+			rdma_destroy_id(id);
+			rc = -ENETUNREACH;
+			goto out;
+		}
 		rdma_destroy_qp(ia->ri_id);
 		rdma_destroy_id(ia->ri_id);
 		ia->ri_id = id;
+	} else {
+		dprintk("RPC:       %s: connecting...\n", __func__);
+		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+		if (rc) {
+			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
+				__func__, rc);
+			/* do not update ep->rep_connected */
+			return -ENETUNREACH;
+		}
 	}
 
-	rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
-	if (rc) {
-		dprintk("RPC:       %s: rdma_create_qp failed %i\n",
-			__func__, rc);
-		goto out;
-	}
-
-/* XXX Tavor device performs badly with 2K MTU! */
-if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
-	struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
-	if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
-	    (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
-	     pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
-		struct ib_qp_attr attr = {
-			.path_mtu = IB_MTU_1024
-		};
-		rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
-	}
-}
-
 	ep->rep_connected = 0;
 
 	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -944,7 +979,8 @@
 {
 	int rc;
 
-	rpcrdma_clean_cq(ep->rep_cq);
+	rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+	rpcrdma_clean_cq(ep->rep_attr.send_cq);
 	rc = rdma_disconnect(ia->ri_id);
 	if (!rc) {
 		/* returns without wait if not connected */
@@ -967,7 +1003,7 @@
 	struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
 {
 	char *p;
-	size_t len;
+	size_t len, rlen, wlen;
 	int i, rc;
 	struct rpcrdma_mw *r;
 
@@ -997,11 +1033,6 @@
 		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
 				sizeof(struct rpcrdma_mw);
 		break;
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
-				sizeof(struct rpcrdma_mw);
-		break;
 	default:
 		break;
 	}
@@ -1032,32 +1063,29 @@
 	}
 	p += cdata->padding;
 
-	/*
-	 * Allocate the fmr's, or mw's for mw_bind chunk registration.
-	 * We "cycle" the mw's in order to minimize rkey reuse,
-	 * and also reduce unbind-to-bind collision.
-	 */
 	INIT_LIST_HEAD(&buf->rb_mws);
 	r = (struct rpcrdma_mw *)p;
 	switch (ia->ri_memreg_strategy) {
 	case RPCRDMA_FRMR:
 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-							 RPCRDMA_MAX_SEGS);
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_mr)) {
 				rc = PTR_ERR(r->r.frmr.fr_mr);
 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
 					" failed %i\n", __func__, rc);
 				goto out;
 			}
-			r->r.frmr.fr_pgl =
-				ib_alloc_fast_reg_page_list(ia->ri_id->device,
-							    RPCRDMA_MAX_SEGS);
+			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+						ia->ri_id->device,
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_pgl)) {
 				rc = PTR_ERR(r->r.frmr.fr_pgl);
 				dprintk("RPC:       %s: "
 					"ib_alloc_fast_reg_page_list "
 					"failed %i\n", __func__, rc);
+
+				ib_dereg_mr(r->r.frmr.fr_mr);
 				goto out;
 			}
 			list_add(&r->mw_list, &buf->rb_mws);
@@ -1082,21 +1110,6 @@
 			++r;
 		}
 		break;
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		/* Allocate one extra request's worth, for full cycling */
-		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
-			r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
-			if (IS_ERR(r->r.mw)) {
-				rc = PTR_ERR(r->r.mw);
-				dprintk("RPC:       %s: ib_alloc_mw"
-					" failed %i\n", __func__, rc);
-				goto out;
-			}
-			list_add(&r->mw_list, &buf->rb_mws);
-			++r;
-		}
-		break;
 	default:
 		break;
 	}
@@ -1105,16 +1118,16 @@
 	 * Allocate/init the request/reply buffers. Doing this
 	 * using kmalloc for now -- one for each buf.
 	 */
+	wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
+	rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
+	dprintk("RPC:       %s: wlen = %zu, rlen = %zu\n",
+		__func__, wlen, rlen);
+
 	for (i = 0; i < buf->rb_max_requests; i++) {
 		struct rpcrdma_req *req;
 		struct rpcrdma_rep *rep;
 
-		len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
-		/* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
-		/* Typical ~2400b, so rounding up saves work later */
-		if (len < 4096)
-			len = 4096;
-		req = kmalloc(len, GFP_KERNEL);
+		req = kmalloc(wlen, GFP_KERNEL);
 		if (req == NULL) {
 			dprintk("RPC:       %s: request buffer %d alloc"
 				" failed\n", __func__, i);
@@ -1126,16 +1139,16 @@
 		buf->rb_send_bufs[i]->rl_buffer = buf;
 
 		rc = rpcrdma_register_internal(ia, req->rl_base,
-				len - offsetof(struct rpcrdma_req, rl_base),
+				wlen - offsetof(struct rpcrdma_req, rl_base),
 				&buf->rb_send_bufs[i]->rl_handle,
 				&buf->rb_send_bufs[i]->rl_iov);
 		if (rc)
 			goto out;
 
-		buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+		buf->rb_send_bufs[i]->rl_size = wlen -
+						sizeof(struct rpcrdma_req);
 
-		len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
-		rep = kmalloc(len, GFP_KERNEL);
+		rep = kmalloc(rlen, GFP_KERNEL);
 		if (rep == NULL) {
 			dprintk("RPC:       %s: reply buffer %d alloc failed\n",
 				__func__, i);
@@ -1145,10 +1158,9 @@
 		memset(rep, 0, sizeof(struct rpcrdma_rep));
 		buf->rb_recv_bufs[i] = rep;
 		buf->rb_recv_bufs[i]->rr_buffer = buf;
-		init_waitqueue_head(&rep->rr_unbind);
 
 		rc = rpcrdma_register_internal(ia, rep->rr_base,
-				len - offsetof(struct rpcrdma_rep, rr_base),
+				rlen - offsetof(struct rpcrdma_rep, rr_base),
 				&buf->rb_recv_bufs[i]->rr_handle,
 				&buf->rb_recv_bufs[i]->rr_iov);
 		if (rc)
@@ -1179,7 +1191,6 @@
 
 	/* clean up in reverse order from create
 	 *   1.  recv mr memory (mr free, then kfree)
-	 *   1a. bind mw memory
 	 *   2.  send mr memory (mr free, then kfree)
 	 *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
 	 *   4.  arrays
@@ -1194,41 +1205,6 @@
 			kfree(buf->rb_recv_bufs[i]);
 		}
 		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
-			while (!list_empty(&buf->rb_mws)) {
-				r = list_entry(buf->rb_mws.next,
-					struct rpcrdma_mw, mw_list);
-				list_del(&r->mw_list);
-				switch (ia->ri_memreg_strategy) {
-				case RPCRDMA_FRMR:
-					rc = ib_dereg_mr(r->r.frmr.fr_mr);
-					if (rc)
-						dprintk("RPC:       %s:"
-							" ib_dereg_mr"
-							" failed %i\n",
-							__func__, rc);
-					ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-					break;
-				case RPCRDMA_MTHCAFMR:
-					rc = ib_dealloc_fmr(r->r.fmr);
-					if (rc)
-						dprintk("RPC:       %s:"
-							" ib_dealloc_fmr"
-							" failed %i\n",
-							__func__, rc);
-					break;
-				case RPCRDMA_MEMWINDOWS_ASYNC:
-				case RPCRDMA_MEMWINDOWS:
-					rc = ib_dealloc_mw(r->r.mw);
-					if (rc)
-						dprintk("RPC:       %s:"
-							" ib_dealloc_mw"
-							" failed %i\n",
-							__func__, rc);
-					break;
-				default:
-					break;
-				}
-			}
 			rpcrdma_deregister_internal(ia,
 					buf->rb_send_bufs[i]->rl_handle,
 					&buf->rb_send_bufs[i]->rl_iov);
@@ -1236,6 +1212,33 @@
 		}
 	}
 
+	while (!list_empty(&buf->rb_mws)) {
+		r = list_entry(buf->rb_mws.next,
+			struct rpcrdma_mw, mw_list);
+		list_del(&r->mw_list);
+		switch (ia->ri_memreg_strategy) {
+		case RPCRDMA_FRMR:
+			rc = ib_dereg_mr(r->r.frmr.fr_mr);
+			if (rc)
+				dprintk("RPC:       %s:"
+					" ib_dereg_mr"
+					" failed %i\n",
+					__func__, rc);
+			ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+			break;
+		case RPCRDMA_MTHCAFMR:
+			rc = ib_dealloc_fmr(r->r.fmr);
+			if (rc)
+				dprintk("RPC:       %s:"
+					" ib_dealloc_fmr"
+					" failed %i\n",
+					__func__, rc);
+			break;
+		default:
+			break;
+		}
+	}
+
 	kfree(buf->rb_pool);
 }
 
@@ -1299,21 +1302,17 @@
 	int i;
 	unsigned long flags;
 
-	BUG_ON(req->rl_nchunks != 0);
 	spin_lock_irqsave(&buffers->rb_lock, flags);
 	buffers->rb_send_bufs[--buffers->rb_send_index] = req;
 	req->rl_niovs = 0;
 	if (req->rl_reply) {
 		buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
-		init_waitqueue_head(&req->rl_reply->rr_unbind);
 		req->rl_reply->rr_func = NULL;
 		req->rl_reply = NULL;
 	}
 	switch (ia->ri_memreg_strategy) {
 	case RPCRDMA_FRMR:
 	case RPCRDMA_MTHCAFMR:
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
 		/*
 		 * Cycle mw's back in reverse order, and "spin" them.
 		 * This delays and scrambles reuse as much as possible.
@@ -1358,8 +1357,7 @@
 
 /*
  * Put reply buffers back into pool when not attached to
- * request. This happens in error conditions, and when
- * aborting unbinds. Pre-decrement counter/array index.
+ * request. This happens in error conditions.
  */
 void
 rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
@@ -1498,8 +1496,8 @@
 	seg1->mr_offset -= pageoff;	/* start of page */
 	seg1->mr_len += pageoff;
 	len = -pageoff;
-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	if (*nsegs > ia->ri_max_frmr_depth)
+		*nsegs = ia->ri_max_frmr_depth;
 	for (page_no = i = 0; i < *nsegs;) {
 		rpcrdma_map_one(ia, seg, writing);
 		pa = seg->mr_dma;
@@ -1536,10 +1534,6 @@
 	} else
 		post_wr = &frmr_wr;
 
-	/* Bump the key */
-	key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
-	ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
-
 	/* Prepare FRMR WR */
 	memset(&frmr_wr, 0, sizeof frmr_wr);
 	frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
@@ -1550,7 +1544,16 @@
 	frmr_wr.wr.fast_reg.page_list_len = page_no;
 	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
 	frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
-	BUG_ON(frmr_wr.wr.fast_reg.length < len);
+	if (frmr_wr.wr.fast_reg.length < len) {
+		while (seg1->mr_nsegs--)
+			rpcrdma_unmap_one(ia, seg++);
+		return -EIO;
+	}
+
+	/* Bump the key */
+	key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+	ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
 	frmr_wr.wr.fast_reg.access_flags = (writing ?
 				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
 				IB_ACCESS_REMOTE_READ);
@@ -1661,135 +1664,6 @@
 	return rc;
 }
 
-static int
-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
-			int *nsegs, int writing, struct rpcrdma_ia *ia,
-			struct rpcrdma_xprt *r_xprt)
-{
-	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
-				  IB_ACCESS_REMOTE_READ);
-	struct ib_mw_bind param;
-	int rc;
-
-	*nsegs = 1;
-	rpcrdma_map_one(ia, seg, writing);
-	param.bind_info.mr = ia->ri_bind_mem;
-	param.wr_id = 0ULL;	/* no send cookie */
-	param.bind_info.addr = seg->mr_dma;
-	param.bind_info.length = seg->mr_len;
-	param.send_flags = 0;
-	param.bind_info.mw_access_flags = mem_priv;
-
-	DECR_CQCOUNT(&r_xprt->rx_ep);
-	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
-	if (rc) {
-		dprintk("RPC:       %s: failed ib_bind_mw "
-			"%u@0x%llx status %i\n",
-			__func__, seg->mr_len,
-			(unsigned long long)seg->mr_dma, rc);
-		rpcrdma_unmap_one(ia, seg);
-	} else {
-		seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
-		seg->mr_base = param.bind_info.addr;
-		seg->mr_nsegs = 1;
-	}
-	return rc;
-}
-
-static int
-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia,
-			struct rpcrdma_xprt *r_xprt, void **r)
-{
-	struct ib_mw_bind param;
-	LIST_HEAD(l);
-	int rc;
-
-	BUG_ON(seg->mr_nsegs != 1);
-	param.bind_info.mr = ia->ri_bind_mem;
-	param.bind_info.addr = 0ULL;	/* unbind */
-	param.bind_info.length = 0;
-	param.bind_info.mw_access_flags = 0;
-	if (*r) {
-		param.wr_id = (u64) (unsigned long) *r;
-		param.send_flags = IB_SEND_SIGNALED;
-		INIT_CQCOUNT(&r_xprt->rx_ep);
-	} else {
-		param.wr_id = 0ULL;
-		param.send_flags = 0;
-		DECR_CQCOUNT(&r_xprt->rx_ep);
-	}
-	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
-	rpcrdma_unmap_one(ia, seg);
-	if (rc)
-		dprintk("RPC:       %s: failed ib_(un)bind_mw,"
-			" status %i\n", __func__, rc);
-	else
-		*r = NULL;	/* will upcall on completion */
-	return rc;
-}
-
-static int
-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
-			int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
-	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
-				  IB_ACCESS_REMOTE_READ);
-	struct rpcrdma_mr_seg *seg1 = seg;
-	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
-	int len, i, rc = 0;
-
-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
-	for (len = 0, i = 0; i < *nsegs;) {
-		rpcrdma_map_one(ia, seg, writing);
-		ipb[i].addr = seg->mr_dma;
-		ipb[i].size = seg->mr_len;
-		len += seg->mr_len;
-		++seg;
-		++i;
-		/* Check for holes */
-		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-		    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
-			break;
-	}
-	seg1->mr_base = seg1->mr_dma;
-	seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
-				ipb, i, mem_priv, &seg1->mr_base);
-	if (IS_ERR(seg1->mr_chunk.rl_mr)) {
-		rc = PTR_ERR(seg1->mr_chunk.rl_mr);
-		dprintk("RPC:       %s: failed ib_reg_phys_mr "
-			"%u@0x%llx (%d)... status %i\n",
-			__func__, len,
-			(unsigned long long)seg1->mr_dma, i, rc);
-		while (i--)
-			rpcrdma_unmap_one(ia, --seg);
-	} else {
-		seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
-		seg1->mr_nsegs = i;
-		seg1->mr_len = len;
-	}
-	*nsegs = i;
-	return rc;
-}
-
-static int
-rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	int rc;
-
-	rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
-	seg1->mr_chunk.rl_mr = NULL;
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
-	if (rc)
-		dprintk("RPC:       %s: failed ib_dereg_mr,"
-			" status %i\n", __func__, rc);
-	return rc;
-}
-
 int
 rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
@@ -1819,16 +1693,8 @@
 		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
 		break;
 
-	/* Registration using memory windows */
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
-		break;
-
-	/* Default registration each time */
 	default:
-		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
-		break;
+		return -1;
 	}
 	if (rc)
 		return -1;
@@ -1838,7 +1704,7 @@
 
 int
 rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
-		struct rpcrdma_xprt *r_xprt, void *r)
+		struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	int nsegs = seg->mr_nsegs, rc;
@@ -1847,9 +1713,7 @@
 
 #if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
-		BUG_ON(nsegs != 1);
 		rpcrdma_unmap_one(ia, seg);
-		rc = 0;
 		break;
 #endif
 
@@ -1861,21 +1725,9 @@
 		rc = rpcrdma_deregister_fmr_external(seg, ia);
 		break;
 
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-	case RPCRDMA_MEMWINDOWS:
-		rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
-		break;
-
 	default:
-		rc = rpcrdma_deregister_default_external(seg, ia);
 		break;
 	}
-	if (r) {
-		struct rpcrdma_rep *rep = r;
-		void (*func)(struct rpcrdma_rep *) = rep->rr_func;
-		rep->rr_func = NULL;
-		func(rep);	/* dereg done, callback now */
-	}
 	return nsegs;
 }
 
@@ -1950,7 +1802,6 @@
 	ib_dma_sync_single_for_cpu(ia->ri_id->device,
 		rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
 
-	DECR_CQCOUNT(ep);
 	rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
 
 	if (rc)

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..89e7cd4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h

@@ -43,6 +43,7 @@
 #include <linux/wait.h> 		/* wait_queue_head_t, etc */
 #include <linux/spinlock.h> 		/* spinlock_t, etc */
 #include <linux/atomic.h>			/* atomic_t, etc */
+#include <linux/workqueue.h>		/* struct work_struct */
 
 #include <rdma/rdma_cm.h>		/* RDMA connection api */
 #include <rdma/ib_verbs.h>		/* RDMA verbs api */
@@ -66,18 +67,21 @@
 	struct completion	ri_done;
 	int			ri_async_rc;
 	enum rpcrdma_memreg	ri_memreg_strategy;
+	unsigned int		ri_max_frmr_depth;
 };
 
 /*
  * RDMA Endpoint -- one per transport instance
  */
 
+#define RPCRDMA_WC_BUDGET	(128)
+#define RPCRDMA_POLLSIZE	(16)
+
 struct rpcrdma_ep {
 	atomic_t		rep_cqcount;
 	int			rep_cqinit;
 	int			rep_connected;
 	struct rpcrdma_ia	*rep_ia;
-	struct ib_cq		*rep_cq;
 	struct ib_qp_init_attr	rep_attr;
 	wait_queue_head_t 	rep_connect_wait;
 	struct ib_sge		rep_pad;	/* holds zeroed pad */
@@ -86,6 +90,9 @@
 	struct rpc_xprt		*rep_xprt;	/* for rep_func */
 	struct rdma_conn_param	rep_remote_cma;
 	struct sockaddr_storage	rep_remote_addr;
+	struct delayed_work	rep_connect_worker;
+	struct ib_wc		rep_send_wcs[RPCRDMA_POLLSIZE];
+	struct ib_wc		rep_recv_wcs[RPCRDMA_POLLSIZE];
 };
 
 #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
@@ -124,7 +131,6 @@
 	struct rpc_xprt	*rr_xprt;	/* needed for request/reply matching */
 	void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
 	struct list_head rr_list;	/* tasklet list */
-	wait_queue_head_t rr_unbind;	/* optional unbind wait */
 	struct ib_sge	rr_iov;		/* for posting */
 	struct ib_mr	*rr_handle;	/* handle for mem in rr_iov */
 	char	rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
@@ -159,7 +165,6 @@
 		struct ib_mr	*rl_mr;		/* if registered directly */
 		struct rpcrdma_mw {		/* if registered from region */
 			union {
-				struct ib_mw	*mw;
 				struct ib_fmr	*fmr;
 				struct {
 					struct ib_fast_reg_page_list *fr_pgl;
@@ -207,7 +212,6 @@
 struct rpcrdma_buffer {
 	spinlock_t	rb_lock;	/* protects indexes */
 	atomic_t	rb_credits;	/* most recent server credits */
-	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
 	int		rb_max_requests;/* client max requests */
 	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
 	int		rb_send_index;
@@ -300,7 +304,7 @@
  */
 int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
 				struct rpcrdma_create_data_internal *);
-int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
 int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 
@@ -330,11 +334,12 @@
 int rpcrdma_register_external(struct rpcrdma_mr_seg *,
 				int, int, struct rpcrdma_xprt *);
 int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
-				struct rpcrdma_xprt *, void *);
+				struct rpcrdma_xprt *);
 
 /*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
  */
+void rpcrdma_connect_worker(struct work_struct *);
 void rpcrdma_conn_func(struct rpcrdma_ep *);
 void rpcrdma_reply_handler(struct rpcrdma_rep *);
 

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 402a7e9..be8bbd5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -866,8 +866,6 @@
 	xs_restore_old_callbacks(transport, sk);
 	write_unlock_bh(&sk->sk_callback_lock);
 
-	sk->sk_no_check = 0;
-
 	trace_rpc_socket_close(&transport->xprt, sock);
 	sock_release(sock);
 }
@@ -2046,7 +2044,6 @@
 		sk->sk_user_data = xprt;
 		sk->sk_data_ready = xs_udp_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
-		sk->sk_no_check = UDP_CSUM_NORCV;
 		sk->sk_allocation = GFP_ATOMIC;
 
 		xprt_set_connected(xprt);

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index b282f71..a080c66 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile

@@ -5,7 +5,7 @@
 obj-$(CONFIG_TIPC) := tipc.o
 
 tipc-y	+= addr.o bcast.o bearer.o config.o \
-	   core.o handler.o link.o discover.o msg.o  \
+	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
 	   socket.o log.o eth_media.o server.o

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 95ab5ef..2663167 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c

@@ -71,7 +71,7 @@
  * Note: The fields labelled "temporary" are incorporated into the bearer
  * to avoid consuming potentially limited stack space through the use of
  * large local variables within multicast routines.  Concurrent access is
- * prevented through use of the spinlock "bc_lock".
+ * prevented through use of the spinlock "bclink_lock".
  */
 struct tipc_bcbearer {
 	struct tipc_bearer bearer;
@@ -84,34 +84,64 @@
 
 /**
  * struct tipc_bclink - link used for broadcast messages
+ * @lock: spinlock governing access to structure
  * @link: (non-standard) broadcast link structure
  * @node: (non-standard) node structure representing b'cast link's peer node
+ * @flags: represent bclink states
  * @bcast_nodes: map of broadcast-capable nodes
  * @retransmit_to: node that most recently requested a retransmit
  *
  * Handles sequence numbering, fragmentation, bundling, etc.
  */
 struct tipc_bclink {
+	spinlock_t lock;
 	struct tipc_link link;
 	struct tipc_node node;
+	unsigned int flags;
 	struct tipc_node_map bcast_nodes;
 	struct tipc_node *retransmit_to;
 };
 
-static struct tipc_bcbearer bcast_bearer;
-static struct tipc_bclink bcast_link;
-
-static struct tipc_bcbearer *bcbearer = &bcast_bearer;
-static struct tipc_bclink *bclink = &bcast_link;
-static struct tipc_link *bcl = &bcast_link.link;
-
-static DEFINE_SPINLOCK(bc_lock);
+static struct tipc_bcbearer *bcbearer;
+static struct tipc_bclink *bclink;
+static struct tipc_link *bcl;
 
 const char tipc_bclink_name[] = "broadcast-link";
 
 static void tipc_nmap_diff(struct tipc_node_map *nm_a,
 			   struct tipc_node_map *nm_b,
 			   struct tipc_node_map *nm_diff);
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
+
+static void tipc_bclink_lock(void)
+{
+	spin_lock_bh(&bclink->lock);
+}
+
+static void tipc_bclink_unlock(void)
+{
+	struct tipc_node *node = NULL;
+
+	if (likely(!bclink->flags)) {
+		spin_unlock_bh(&bclink->lock);
+		return;
+	}
+
+	if (bclink->flags & TIPC_BCLINK_RESET) {
+		bclink->flags &= ~TIPC_BCLINK_RESET;
+		node = tipc_bclink_retransmit_to();
+	}
+	spin_unlock_bh(&bclink->lock);
+
+	if (node)
+		tipc_link_reset_all(node);
+}
+
+void tipc_bclink_set_flags(unsigned int flags)
+{
+	bclink->flags |= flags;
+}
 
 static u32 bcbuf_acks(struct sk_buff *buf)
 {
@@ -130,16 +160,16 @@
 
 void tipc_bclink_add_node(u32 addr)
 {
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 	tipc_nmap_add(&bclink->bcast_nodes, addr);
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 }
 
 void tipc_bclink_remove_node(u32 addr)
 {
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 	tipc_nmap_remove(&bclink->bcast_nodes, addr);
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 }
 
 static void bclink_set_last_sent(void)
@@ -165,7 +195,7 @@
 /**
  * tipc_bclink_retransmit_to - get most recent node to request retransmission
  *
- * Called with bc_lock locked
+ * Called with bclink_lock locked
  */
 struct tipc_node *tipc_bclink_retransmit_to(void)
 {
@@ -177,7 +207,7 @@
  * @after: sequence number of last packet to *not* retransmit
  * @to: sequence number of last packet to retransmit
  *
- * Called with bc_lock locked
+ * Called with bclink_lock locked
  */
 static void bclink_retransmit_pkt(u32 after, u32 to)
 {
@@ -194,7 +224,7 @@
  * @n_ptr: node that sent acknowledgement info
  * @acked: broadcast sequence # that has been acknowledged
  *
- * Node is locked, bc_lock unlocked.
+ * Node is locked, bclink_lock unlocked.
  */
 void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 {
@@ -202,8 +232,7 @@
 	struct sk_buff *next;
 	unsigned int released = 0;
 
-	spin_lock_bh(&bc_lock);
-
+	tipc_bclink_lock();
 	/* Bail out if tx queue is empty (no clean up is required) */
 	crs = bcl->first_out;
 	if (!crs)
@@ -267,13 +296,13 @@
 	if (unlikely(released && !list_empty(&bcl->waiting_ports)))
 		tipc_link_wakeup_ports(bcl, 0);
 exit:
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 }
 
 /**
  * tipc_bclink_update_link_state - update broadcast link state
  *
- * tipc_net_lock and node lock set
+ * RCU and node lock set
  */
 void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
 {
@@ -320,10 +349,10 @@
 				 ? buf_seqno(n_ptr->bclink.deferred_head) - 1
 				 : n_ptr->bclink.last_sent);
 
-		spin_lock_bh(&bc_lock);
-		tipc_bearer_send(&bcbearer->bearer, buf, NULL);
+		tipc_bclink_lock();
+		tipc_bearer_send(MAX_BEARERS, buf, NULL);
 		bcl->stats.sent_nacks++;
-		spin_unlock_bh(&bc_lock);
+		tipc_bclink_unlock();
 		kfree_skb(buf);
 
 		n_ptr->bclink.oos_state++;
@@ -335,8 +364,6 @@
  *
  * Delay any upcoming NACK by this node if another node has already
  * requested the first message this node is going to ask for.
- *
- * Only tipc_net_lock set.
  */
 static void bclink_peek_nack(struct tipc_msg *msg)
 {
@@ -362,7 +389,7 @@
 {
 	int res;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 
 	if (!bclink->bcast_nodes.count) {
 		res = msg_data_sz(buf_msg(buf));
@@ -377,14 +404,14 @@
 		bcl->stats.accu_queue_sz += bcl->out_queue_size;
 	}
 exit:
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 	return res;
 }
 
 /**
  * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet
  *
- * Called with both sending node's lock and bc_lock taken.
+ * Called with both sending node's lock and bclink_lock taken.
  */
 static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
 {
@@ -408,7 +435,7 @@
 /**
  * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
  *
- * tipc_net_lock is read_locked, no other locks set
+ * RCU is locked, no other locks set
  */
 void tipc_bclink_rcv(struct sk_buff *buf)
 {
@@ -439,12 +466,12 @@
 		if (msg_destnode(msg) == tipc_own_addr) {
 			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
 			tipc_node_unlock(node);
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bcl->stats.recv_nacks++;
 			bclink->retransmit_to = node;
 			bclink_retransmit_pkt(msg_bcgap_after(msg),
 					      msg_bcgap_to(msg));
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 		} else {
 			tipc_node_unlock(node);
 			bclink_peek_nack(msg);
@@ -462,51 +489,47 @@
 		/* Deliver message to destination */
 
 		if (likely(msg_isdata(msg))) {
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 			if (likely(msg_mcast(msg)))
 				tipc_port_mcast_rcv(buf, NULL);
 			else
 				kfree_skb(buf);
 		} else if (msg_user(msg) == MSG_BUNDLER) {
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
 			bcl->stats.recv_bundles++;
 			bcl->stats.recv_bundled += msg_msgcnt(msg);
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 			tipc_link_bundle_rcv(buf);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
-			int ret;
-			ret = tipc_link_frag_rcv(&node->bclink.reasm_head,
-						 &node->bclink.reasm_tail,
-						 &buf);
-			if (ret == LINK_REASM_ERROR)
+			tipc_buf_append(&node->bclink.reasm_buf, &buf);
+			if (unlikely(!buf && !node->bclink.reasm_buf))
 				goto unlock;
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
 			bcl->stats.recv_fragments++;
-			if (ret == LINK_REASM_COMPLETE) {
+			if (buf) {
 				bcl->stats.recv_fragmented++;
-				/* Point msg to inner header */
 				msg = buf_msg(buf);
-				spin_unlock_bh(&bc_lock);
+				tipc_bclink_unlock();
 				goto receive;
 			}
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 		} else if (msg_user(msg) == NAME_DISTRIBUTOR) {
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 			tipc_named_rcv(buf);
 		} else {
-			spin_lock_bh(&bc_lock);
+			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
-			spin_unlock_bh(&bc_lock);
+			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 			kfree_skb(buf);
 		}
@@ -552,14 +575,14 @@
 	} else
 		deferred = 0;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 
 	if (deferred)
 		bcl->stats.deferred_recv++;
 	else
 		bcl->stats.duplicates++;
 
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 
 unlock:
 	tipc_node_unlock(node);
@@ -627,13 +650,13 @@
 
 		if (bp_index == 0) {
 			/* Use original buffer for first bearer */
-			tipc_bearer_send(b, buf, &b->bcast_addr);
+			tipc_bearer_send(b->identity, buf, &b->bcast_addr);
 		} else {
 			/* Avoid concurrent buffer access */
-			tbuf = pskb_copy(buf, GFP_ATOMIC);
+			tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
 			if (!tbuf)
 				break;
-			tipc_bearer_send(b, tbuf, &b->bcast_addr);
+			tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
 			kfree_skb(tbuf); /* Bearer keeps a clone */
 		}
 
@@ -655,20 +678,27 @@
 /**
  * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
  */
-void tipc_bcbearer_sort(void)
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action)
 {
 	struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
 	struct tipc_bcbearer_pair *bp_curr;
+	struct tipc_bearer *b;
 	int b_index;
 	int pri;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
+
+	if (action)
+		tipc_nmap_add(nm_ptr, node);
+	else
+		tipc_nmap_remove(nm_ptr, node);
 
 	/* Group bearers by priority (can assume max of two per priority) */
 	memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
 
+	rcu_read_lock();
 	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
-		struct tipc_bearer *b = bearer_list[b_index];
+		b = rcu_dereference_rtnl(bearer_list[b_index]);
 		if (!b || !b->nodes.count)
 			continue;
 
@@ -677,6 +707,7 @@
 		else
 			bp_temp[b->priority].secondary = b;
 	}
+	rcu_read_unlock();
 
 	/* Create array of bearer pairs for broadcasting */
 	bp_curr = bcbearer->bpairs;
@@ -702,7 +733,7 @@
 		bp_curr++;
 	}
 
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 }
 
 
@@ -714,7 +745,7 @@
 	if (!bcl)
 		return 0;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 
 	s = &bcl->stats;
 
@@ -743,7 +774,7 @@
 			     s->queue_sz_counts ?
 			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
 
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 	return ret;
 }
 
@@ -752,9 +783,9 @@
 	if (!bcl)
 		return -ENOPROTOOPT;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 	memset(&bcl->stats, 0, sizeof(bcl->stats));
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 	return 0;
 }
 
@@ -765,46 +796,59 @@
 	if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
 		return -EINVAL;
 
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 	tipc_link_set_queue_limits(bcl, limit);
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 	return 0;
 }
 
-void tipc_bclink_init(void)
+int tipc_bclink_init(void)
 {
+	bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
+	if (!bcbearer)
+		return -ENOMEM;
+
+	bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
+	if (!bclink) {
+		kfree(bcbearer);
+		return -ENOMEM;
+	}
+
+	bcl = &bclink->link;
 	bcbearer->bearer.media = &bcbearer->media;
 	bcbearer->media.send_msg = tipc_bcbearer_send;
 	sprintf(bcbearer->media.name, "tipc-broadcast");
 
+	spin_lock_init(&bclink->lock);
 	INIT_LIST_HEAD(&bcl->waiting_ports);
 	bcl->next_out_no = 1;
 	spin_lock_init(&bclink->node.lock);
 	bcl->owner = &bclink->node;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
 	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
-	bcl->b_ptr = &bcbearer->bearer;
-	bearer_list[BCBEARER] = &bcbearer->bearer;
+	bcl->bearer_id = MAX_BEARERS;
+	rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer);
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
+	return 0;
 }
 
 void tipc_bclink_stop(void)
 {
-	spin_lock_bh(&bc_lock);
+	tipc_bclink_lock();
 	tipc_link_purge_queues(bcl);
-	spin_unlock_bh(&bc_lock);
+	tipc_bclink_unlock();
 
-	bearer_list[BCBEARER] = NULL;
-	memset(bclink, 0, sizeof(*bclink));
-	memset(bcbearer, 0, sizeof(*bcbearer));
+	RCU_INIT_POINTER(bearer_list[BCBEARER], NULL);
+	synchronize_net();
+	kfree(bcbearer);
+	kfree(bclink);
 }
 
-
 /**
  * tipc_nmap_add - add a node to a node map
  */
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
 {
 	int n = tipc_node(node);
 	int w = n / WSIZE;
@@ -819,7 +863,7 @@
 /**
  * tipc_nmap_remove - remove a node from a node map
  */
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
 {
 	int n = tipc_node(node);
 	int w = n / WSIZE;

diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a80ef54..00330c4 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h

@@ -39,6 +39,7 @@
 
 #define MAX_NODES 4096
 #define WSIZE 32
+#define TIPC_BCLINK_RESET 1
 
 /**
  * struct tipc_node_map - set of node identifiers
@@ -69,9 +70,6 @@
 
 extern const char tipc_bclink_name[];
 
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
-
 /**
  * tipc_nmap_equal - test for equality of node maps
  */
@@ -84,8 +82,9 @@
 void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
 void tipc_port_list_free(struct tipc_port_list *pl_ptr);
 
-void tipc_bclink_init(void);
+int tipc_bclink_init(void);
 void tipc_bclink_stop(void);
+void tipc_bclink_set_flags(unsigned int flags);
 void tipc_bclink_add_node(u32 addr);
 void tipc_bclink_remove_node(u32 addr);
 struct tipc_node *tipc_bclink_retransmit_to(void);
@@ -98,6 +97,6 @@
 int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(void);
 int  tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(void);
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
 
 #endif

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 3fef7eb..2644743 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c

@@ -49,7 +49,7 @@
 	NULL
 };
 
-struct tipc_bearer *bearer_list[MAX_BEARERS + 1];
+struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
 
 static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
 
@@ -178,7 +178,7 @@
 	u32 i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b_ptr = bearer_list[i];
+		b_ptr = rtnl_dereference(bearer_list[i]);
 		if (b_ptr && (!strcmp(b_ptr->name, name)))
 			return b_ptr;
 	}
@@ -198,10 +198,9 @@
 	if (!buf)
 		return NULL;
 
-	read_lock_bh(&tipc_net_lock);
 	for (i = 0; media_info_array[i] != NULL; i++) {
 		for (j = 0; j < MAX_BEARERS; j++) {
-			b = bearer_list[j];
+			b = rtnl_dereference(bearer_list[j]);
 			if (!b)
 				continue;
 			if (b->media == media_info_array[i]) {
@@ -211,22 +210,33 @@
 			}
 		}
 	}
-	read_unlock_bh(&tipc_net_lock);
 	return buf;
 }
 
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest)
 {
-	tipc_nmap_add(&b_ptr->nodes, dest);
-	tipc_bcbearer_sort();
-	tipc_disc_add_dest(b_ptr->link_req);
+	struct tipc_bearer *b_ptr;
+
+	rcu_read_lock();
+	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	if (b_ptr) {
+		tipc_bcbearer_sort(&b_ptr->nodes, dest, true);
+		tipc_disc_add_dest(b_ptr->link_req);
+	}
+	rcu_read_unlock();
 }
 
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
 {
-	tipc_nmap_remove(&b_ptr->nodes, dest);
-	tipc_bcbearer_sort();
-	tipc_disc_remove_dest(b_ptr->link_req);
+	struct tipc_bearer *b_ptr;
+
+	rcu_read_lock();
+	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	if (b_ptr) {
+		tipc_bcbearer_sort(&b_ptr->nodes, dest, false);
+		tipc_disc_remove_dest(b_ptr->link_req);
+	}
+	rcu_read_unlock();
 }
 
 /**
@@ -271,13 +281,11 @@
 		return -EINVAL;
 	}
 
-	write_lock_bh(&tipc_net_lock);
-
 	m_ptr = tipc_media_find(b_names.media_name);
 	if (!m_ptr) {
 		pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
 			name, b_names.media_name);
-		goto exit;
+		return -EINVAL;
 	}
 
 	if (priority == TIPC_MEDIA_LINK_PRI)
@@ -287,7 +295,7 @@
 	bearer_id = MAX_BEARERS;
 	with_this_prio = 1;
 	for (i = MAX_BEARERS; i-- != 0; ) {
-		b_ptr = bearer_list[i];
+		b_ptr = rtnl_dereference(bearer_list[i]);
 		if (!b_ptr) {
 			bearer_id = i;
 			continue;
@@ -295,14 +303,14 @@
 		if (!strcmp(name, b_ptr->name)) {
 			pr_warn("Bearer <%s> rejected, already enabled\n",
 				name);
-			goto exit;
+			return -EINVAL;
 		}
 		if ((b_ptr->priority == priority) &&
 		    (++with_this_prio > 2)) {
 			if (priority-- == 0) {
 				pr_warn("Bearer <%s> rejected, duplicate priority\n",
 					name);
-				goto exit;
+				return -EINVAL;
 			}
 			pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
 				name, priority + 1, priority);
@@ -312,21 +320,20 @@
 	if (bearer_id >= MAX_BEARERS) {
 		pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
 			name, MAX_BEARERS);
-		goto exit;
+		return -EINVAL;
 	}
 
 	b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC);
-	if (!b_ptr) {
-		res = -ENOMEM;
-		goto exit;
-	}
+	if (!b_ptr)
+		return -ENOMEM;
+
 	strcpy(b_ptr->name, name);
 	b_ptr->media = m_ptr;
 	res = m_ptr->enable_media(b_ptr);
 	if (res) {
 		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
 			name, -res);
-		goto exit;
+		return -EINVAL;
 	}
 
 	b_ptr->identity = bearer_id;
@@ -341,16 +348,14 @@
 		bearer_disable(b_ptr, false);
 		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
 			name);
-		goto exit;
+		return -EINVAL;
 	}
 
-	bearer_list[bearer_id] = b_ptr;
+	rcu_assign_pointer(bearer_list[bearer_id], b_ptr);
 
 	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
 		name,
 		tipc_addr_string_fill(addr_string, disc_domain), priority);
-exit:
-	write_unlock_bh(&tipc_net_lock);
 	return res;
 }
 
@@ -359,19 +364,16 @@
  */
 static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
 {
-	read_lock_bh(&tipc_net_lock);
 	pr_info("Resetting bearer <%s>\n", b_ptr->name);
-	tipc_disc_delete(b_ptr->link_req);
 	tipc_link_reset_list(b_ptr->identity);
-	tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
-	read_unlock_bh(&tipc_net_lock);
+	tipc_disc_reset(b_ptr);
 	return 0;
 }
 
 /**
  * bearer_disable
  *
- * Note: This routine assumes caller holds tipc_net_lock.
+ * Note: This routine assumes caller holds RTNL lock.
  */
 static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
 {
@@ -385,12 +387,12 @@
 		tipc_disc_delete(b_ptr->link_req);
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		if (b_ptr == bearer_list[i]) {
-			bearer_list[i] = NULL;
+		if (b_ptr == rtnl_dereference(bearer_list[i])) {
+			RCU_INIT_POINTER(bearer_list[i], NULL);
 			break;
 		}
 	}
-	kfree(b_ptr);
+	kfree_rcu(b_ptr, rcu);
 }
 
 int tipc_disable_bearer(const char *name)
@@ -398,7 +400,6 @@
 	struct tipc_bearer *b_ptr;
 	int res;
 
-	write_lock_bh(&tipc_net_lock);
 	b_ptr = tipc_bearer_find(name);
 	if (b_ptr == NULL) {
 		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
@@ -407,32 +408,9 @@
 		bearer_disable(b_ptr, false);
 		res = 0;
 	}
-	write_unlock_bh(&tipc_net_lock);
 	return res;
 }
 
-
-/* tipc_l2_media_addr_set - initialize Ethernet media address structure
- *
- * Media-dependent "value" field stores MAC address in first 6 bytes
- * and zeroes out the remaining bytes.
- */
-void tipc_l2_media_addr_set(const struct tipc_bearer *b,
-			    struct tipc_media_addr *a, char *mac)
-{
-	int len = b->media->hwaddr_len;
-
-	if (unlikely(sizeof(a->value) < len)) {
-		WARN_ONCE(1, "Media length invalid\n");
-		return;
-	}
-
-	memcpy(a->value, mac, len);
-	memset(a->value + len, 0, sizeof(a->value) - len);
-	a->media_id = b->media->type_id;
-	a->broadcast = !memcmp(mac, b->bcast_addr.value, len);
-}
-
 int tipc_enable_l2_media(struct tipc_bearer *b)
 {
 	struct net_device *dev;
@@ -443,33 +421,37 @@
 	if (!dev)
 		return -ENODEV;
 
-	/* Associate TIPC bearer with Ethernet bearer */
-	b->media_ptr = dev;
-	memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value));
+	/* Associate TIPC bearer with L2 bearer */
+	rcu_assign_pointer(b->media_ptr, dev);
+	memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
 	memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
 	b->bcast_addr.media_id = b->media->type_id;
 	b->bcast_addr.broadcast = 1;
 	b->mtu = dev->mtu;
-	tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr);
+	b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr);
 	rcu_assign_pointer(dev->tipc_ptr, b);
 	return 0;
 }
 
-/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface
+/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
  *
- * Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
+ * Mark L2 bearer as inactive so that incoming buffers are thrown away,
  * then get worker thread to complete bearer cleanup.  (Can't do cleanup
  * here because cleanup code needs to sleep and caller holds spinlocks.)
  */
 void tipc_disable_l2_media(struct tipc_bearer *b)
 {
-	struct net_device *dev = (struct net_device *)b->media_ptr;
+	struct net_device *dev;
+
+	dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+	RCU_INIT_POINTER(b->media_ptr, NULL);
 	RCU_INIT_POINTER(dev->tipc_ptr, NULL);
+	synchronize_net();
 	dev_put(dev);
 }
 
 /**
- * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface
+ * tipc_l2_send_msg - send a TIPC packet out over an L2 interface
  * @buf: the packet to be sent
  * @b_ptr: the bearer through which the packet is to be sent
  * @dest: peer destination address
@@ -478,8 +460,12 @@
 		     struct tipc_media_addr *dest)
 {
 	struct sk_buff *clone;
+	struct net_device *dev;
 	int delta;
-	struct net_device *dev = (struct net_device *)b->media_ptr;
+
+	dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
+	if (!dev)
+		return 0;
 
 	clone = skb_clone(buf, GFP_ATOMIC);
 	if (!clone)
@@ -507,10 +493,16 @@
  * The media send routine must not alter the buffer being passed in
  * as it may be needed for later retransmission!
  */
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
 		      struct tipc_media_addr *dest)
 {
-	b->media->send_msg(buf, b, dest);
+	struct tipc_bearer *b_ptr;
+
+	rcu_read_lock();
+	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	if (likely(b_ptr))
+		b_ptr->media->send_msg(buf, b_ptr, dest);
+	rcu_read_unlock();
 }
 
 /**
@@ -535,7 +527,7 @@
 	}
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference(dev->tipc_ptr);
+	b_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (likely(b_ptr)) {
 		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
 			buf->next = NULL;
@@ -568,12 +560,9 @@
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
-	rcu_read_lock();
-	b_ptr = rcu_dereference(dev->tipc_ptr);
-	if (!b_ptr) {
-		rcu_read_unlock();
+	b_ptr = rtnl_dereference(dev->tipc_ptr);
+	if (!b_ptr)
 		return NOTIFY_DONE;
-	}
 
 	b_ptr->mtu = dev->mtu;
 
@@ -586,17 +575,15 @@
 		tipc_reset_bearer(b_ptr);
 		break;
 	case NETDEV_CHANGEADDR:
-		tipc_l2_media_addr_set(b_ptr, &b_ptr->addr,
+		b_ptr->media->raw2addr(b_ptr, &b_ptr->addr,
 				       (char *)dev->dev_addr);
 		tipc_reset_bearer(b_ptr);
 		break;
 	case NETDEV_UNREGISTER:
 	case NETDEV_CHANGENAME:
-		tipc_disable_bearer(b_ptr->name);
+		bearer_disable(b_ptr, false);
 		break;
 	}
-	rcu_read_unlock();
-
 	return NOTIFY_OK;
 }
 
@@ -633,7 +620,7 @@
 	u32 i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b_ptr = bearer_list[i];
+		b_ptr = rtnl_dereference(bearer_list[i]);
 		if (b_ptr) {
 			bearer_disable(b_ptr, true);
 			bearer_list[i] = NULL;

diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ba48145..78fccc4 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h

@@ -42,14 +42,12 @@
 #define MAX_BEARERS	2
 #define MAX_MEDIA	2
 
-/*
- * Identifiers associated with TIPC message header media address info
- *
- * - address info field is 20 bytes long
- * - media type identifier located at offset 3
- * - remaining bytes vary according to media type
+/* Identifiers associated with TIPC message header media address info
+ * - address info field is 32 bytes long
+ * - the field's actual content and length is defined per media
+ * - remaining unused bytes in the field are set to zero
  */
-#define TIPC_MEDIA_ADDR_SIZE	20
+#define TIPC_MEDIA_ADDR_SIZE	32
 #define TIPC_MEDIA_TYPE_OFFSET	3
 
 /*
@@ -77,9 +75,10 @@
  * @send_msg: routine which handles buffer transmission
  * @enable_media: routine which enables a media
  * @disable_media: routine which disables a media
- * @addr2str: routine which converts media address to string
- * @addr2msg: routine which converts media address to protocol message area
- * @msg2addr: routine which converts media address from protocol message area
+ * @addr2str: convert media address format to string
+ * @addr2msg: convert from media addr format to discovery msg addr format
+ * @msg2addr: convert from discovery msg addr format to media addr format
+ * @raw2addr: convert from raw addr format to media addr format
  * @priority: default link (and bearer) priority
  * @tolerance: default time (in ms) before declaring link failure
  * @window: default window (in packets) before declaring link congestion
@@ -93,10 +92,16 @@
 			struct tipc_media_addr *dest);
 	int (*enable_media)(struct tipc_bearer *b_ptr);
 	void (*disable_media)(struct tipc_bearer *b_ptr);
-	int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
-	int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
-	int (*msg2addr)(const struct tipc_bearer *b_ptr,
-			struct tipc_media_addr *a, char *msg_area);
+	int (*addr2str)(struct tipc_media_addr *addr,
+			char *strbuf,
+			int bufsz);
+	int (*addr2msg)(char *msg, struct tipc_media_addr *addr);
+	int (*msg2addr)(struct tipc_bearer *b,
+			struct tipc_media_addr *addr,
+			char *msg);
+	int (*raw2addr)(struct tipc_bearer *b,
+			struct tipc_media_addr *addr,
+			char *raw);
 	u32 priority;
 	u32 tolerance;
 	u32 window;
@@ -113,6 +118,7 @@
  * @name: bearer name (format = media:interface)
  * @media: ptr to media structure associated with bearer
  * @bcast_addr: media address used in broadcasting
+ * @rcu: rcu struct for tipc_bearer
  * @priority: default link priority for bearer
  * @window: default window size for bearer
  * @tolerance: default link tolerance for bearer
@@ -127,12 +133,13 @@
  * care of initializing all other fields.
  */
 struct tipc_bearer {
-	void *media_ptr;			/* initalized by media */
+	void __rcu *media_ptr;			/* initalized by media */
 	u32 mtu;				/* initalized by media */
 	struct tipc_media_addr addr;		/* initalized by media */
 	char name[TIPC_MAX_BEARER_NAME];
 	struct tipc_media *media;
 	struct tipc_media_addr bcast_addr;
+	struct rcu_head rcu;
 	u32 priority;
 	u32 window;
 	u32 tolerance;
@@ -150,7 +157,7 @@
 
 struct tipc_link;
 
-extern struct tipc_bearer *bearer_list[];
+extern struct tipc_bearer __rcu *bearer_list[];
 
 /*
  * TIPC routines available to supported media types
@@ -173,22 +180,20 @@
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
-void tipc_l2_media_addr_set(const struct tipc_bearer *b,
-			    struct tipc_media_addr *a, char *mac);
 int tipc_enable_l2_media(struct tipc_bearer *b);
 void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
 		     struct tipc_media_addr *dest);
 
 struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest);
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(const char *name);
 struct tipc_media *tipc_media_find(const char *name);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(void);
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
 		      struct tipc_media_addr *dest);
 
 #endif	/* _TIPC_BEARER_H */

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 4b981c0..2b42403 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c

@@ -42,8 +42,6 @@
 
 #define REPLY_TRUNCATED "<truncated>\n"
 
-static DEFINE_MUTEX(config_mutex);
-
 static const void *req_tlv_area;	/* request message TLV area */
 static int req_tlv_space;		/* request message TLV area size */
 static int rep_headroom;		/* reply message headroom to use */
@@ -179,8 +177,10 @@
 	if (tipc_own_addr)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (cannot change node address once assigned)");
-	tipc_net_start(addr);
-	return tipc_cfg_reply_none();
+	if (!tipc_net_start(addr))
+		return tipc_cfg_reply_none();
+
+	return tipc_cfg_reply_error_string("cannot change to network mode");
 }
 
 static struct sk_buff *cfg_set_max_ports(void)
@@ -223,7 +223,7 @@
 {
 	struct sk_buff *rep_tlv_buf;
 
-	mutex_lock(&config_mutex);
+	rtnl_lock();
 
 	/* Save request and reply details in a well-known location */
 	req_tlv_area = request_area;
@@ -337,6 +337,6 @@
 
 	/* Return reply buffer */
 exit:
-	mutex_unlock(&config_mutex);
+	rtnl_unlock();
 	return rep_tlv_buf;
 }

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 50d5742..676d180 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c

@@ -80,7 +80,6 @@
  */
 static void tipc_core_stop(void)
 {
-	tipc_handler_stop();
 	tipc_net_stop();
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
@@ -100,10 +99,6 @@
 
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 
-	err = tipc_handler_start();
-	if (err)
-		goto out_handler;
-
 	err = tipc_ref_table_init(tipc_max_ports, tipc_random);
 	if (err)
 		goto out_reftbl;
@@ -146,8 +141,6 @@
 out_nametbl:
 	tipc_ref_table_stop();
 out_reftbl:
-	tipc_handler_stop();
-out_handler:
 	return err;
 }
 
@@ -161,10 +154,11 @@
 	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_net_id = 4711;
 
-	sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
-	sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
+	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
+			      TIPC_LOW_IMPORTANCE;
+	sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
 			      TIPC_CRITICAL_IMPORTANCE;
-	sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
+	sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
 
 	res = tipc_core_start();
 	if (res)

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8985bbc..bb26ed1 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h

@@ -56,7 +56,8 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
 
 #define TIPC_MOD_VER "2.0.0"
 
@@ -89,8 +90,6 @@
 /*
  * Routines available to privileged subsystems
  */
-int tipc_handler_start(void);
-void tipc_handler_stop(void);
 int tipc_netlink_start(void);
 void tipc_netlink_stop(void);
 int tipc_socket_init(void);
@@ -109,12 +108,10 @@
 #endif
 
 /*
- * TIPC timer and signal code
+ * TIPC timer code
  */
 typedef void (*Handler) (unsigned long);
 
-u32 tipc_k_signal(Handler routine, unsigned long argument);
-
 /**
  * k_init_timer - initialize a timer
  * @timer: pointer to timer structure
@@ -191,6 +188,7 @@
 struct tipc_skb_cb {
 	void *handle;
 	bool deferred;
+	struct sk_buff *tail;
 };
 
 #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 542fe34..aa722a4 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c

@@ -1,7 +1,7 @@
 /*
  * net/tipc/discover.c
  *
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2014, Ericsson AB
  * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -46,8 +46,9 @@
 
 /**
  * struct tipc_link_req - information about an ongoing link setup request
- * @bearer: bearer issuing requests
+ * @bearer_id: identity of bearer issuing requests
  * @dest: destination address for request messages
+ * @domain: network domain to which links can be established
  * @num_nodes: number of nodes currently discovered (i.e. with an active link)
  * @lock: spinlock for controlling access to requests
  * @buf: request message to be (repeatedly) sent
@@ -55,8 +56,9 @@
  * @timer_intv: current interval between requests (in ms)
  */
 struct tipc_link_req {
-	struct tipc_bearer *bearer;
+	u32 bearer_id;
 	struct tipc_media_addr dest;
+	u32 domain;
 	int num_nodes;
 	spinlock_t lock;
 	struct sk_buff *buf;
@@ -69,22 +71,19 @@
  * @type: message type (request or response)
  * @b_ptr: ptr to bearer issuing message
  */
-static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr)
+static void tipc_disc_init_msg(struct sk_buff *buf, u32 type,
+			       struct tipc_bearer *b_ptr)
 {
-	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
 	struct tipc_msg *msg;
 	u32 dest_domain = b_ptr->domain;
 
-	if (buf) {
-		msg = buf_msg(buf);
-		tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
-		msg_set_non_seq(msg, 1);
-		msg_set_node_sig(msg, tipc_random);
-		msg_set_dest_domain(msg, dest_domain);
-		msg_set_bc_netid(msg, tipc_net_id);
-		b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
-	}
-	return buf;
+	msg = buf_msg(buf);
+	tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
+	msg_set_non_seq(msg, 1);
+	msg_set_node_sig(msg, tipc_random);
+	msg_set_dest_domain(msg, dest_domain);
+	msg_set_bc_netid(msg, tipc_net_id);
+	b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
 }
 
 /**
@@ -107,146 +106,150 @@
 }
 
 /**
- * tipc_disc_rcv - handle incoming link setup message (request or response)
+ * tipc_disc_rcv - handle incoming discovery message (request or response)
  * @buf: buffer containing message
- * @b_ptr: bearer that message arrived on
+ * @bearer: bearer that message arrived on
  */
-void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr)
+void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer)
 {
-	struct tipc_node *n_ptr;
+	struct tipc_node *node;
 	struct tipc_link *link;
-	struct tipc_media_addr media_addr;
+	struct tipc_media_addr maddr;
 	struct sk_buff *rbuf;
 	struct tipc_msg *msg = buf_msg(buf);
-	u32 dest = msg_dest_domain(msg);
-	u32 orig = msg_prevnode(msg);
+	u32 ddom = msg_dest_domain(msg);
+	u32 onode = msg_prevnode(msg);
 	u32 net_id = msg_bc_netid(msg);
-	u32 type = msg_type(msg);
+	u32 mtyp = msg_type(msg);
 	u32 signature = msg_node_sig(msg);
-	int addr_mismatch;
-	int link_fully_up;
+	bool addr_match = false;
+	bool sign_match = false;
+	bool link_up = false;
+	bool accept_addr = false;
+	bool accept_sign = false;
+	bool respond = false;
 
-	media_addr.broadcast = 1;
-	b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
+	bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg));
 	kfree_skb(buf);
 
 	/* Ensure message from node is valid and communication is permitted */
 	if (net_id != tipc_net_id)
 		return;
-	if (media_addr.broadcast)
+	if (maddr.broadcast)
 		return;
-	if (!tipc_addr_domain_valid(dest))
+	if (!tipc_addr_domain_valid(ddom))
 		return;
-	if (!tipc_addr_node_valid(orig))
-		return;
-	if (orig == tipc_own_addr) {
-		if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
-			disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
-		return;
-	}
-	if (!tipc_in_scope(dest, tipc_own_addr))
-		return;
-	if (!tipc_in_scope(b_ptr->domain, orig))
+	if (!tipc_addr_node_valid(onode))
 		return;
 
-	/* Locate structure corresponding to requesting node */
-	n_ptr = tipc_node_find(orig);
-	if (!n_ptr) {
-		n_ptr = tipc_node_create(orig);
-		if (!n_ptr)
-			return;
+	if (in_own_node(onode)) {
+		if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
+			disc_dupl_alert(bearer, tipc_own_addr, &maddr);
+		return;
 	}
-	tipc_node_lock(n_ptr);
+	if (!tipc_in_scope(ddom, tipc_own_addr))
+		return;
+	if (!tipc_in_scope(bearer->domain, onode))
+		return;
+
+	/* Locate, or if necessary, create, node: */
+	node = tipc_node_find(onode);
+	if (!node)
+		node = tipc_node_create(onode);
+	if (!node)
+		return;
+
+	tipc_node_lock(node);
+	link = node->links[bearer->identity];
 
 	/* Prepare to validate requesting node's signature and media address */
-	link = n_ptr->links[b_ptr->identity];
-	addr_mismatch = (link != NULL) &&
-		memcmp(&link->media_addr, &media_addr, sizeof(media_addr));
+	sign_match = (signature == node->signature);
+	addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr));
+	link_up = link && tipc_link_is_up(link);
 
-	/*
-	 * Ensure discovery message's signature is correct
-	 *
-	 * If signature is incorrect and there is no working link to the node,
-	 * accept the new signature but invalidate all existing links to the
-	 * node so they won't re-activate without a new discovery message.
-	 *
-	 * If signature is incorrect and the requested link to the node is
-	 * working, accept the new signature. (This is an instance of delayed
-	 * rediscovery, where a link endpoint was able to re-establish contact
-	 * with its peer endpoint on a node that rebooted before receiving a
-	 * discovery message from that node.)
-	 *
-	 * If signature is incorrect and there is a working link to the node
-	 * that is not the requested link, reject the request (must be from
-	 * a duplicate node).
-	 */
-	if (signature != n_ptr->signature) {
-		if (n_ptr->working_links == 0) {
-			struct tipc_link *curr_link;
-			int i;
 
-			for (i = 0; i < MAX_BEARERS; i++) {
-				curr_link = n_ptr->links[i];
-				if (curr_link) {
-					memset(&curr_link->media_addr, 0,
-					       sizeof(media_addr));
-					tipc_link_reset(curr_link);
-				}
-			}
-			addr_mismatch = (link != NULL);
-		} else if (tipc_link_is_up(link) && !addr_mismatch) {
-			/* delayed rediscovery */
-		} else {
-			disc_dupl_alert(b_ptr, orig, &media_addr);
-			tipc_node_unlock(n_ptr);
-			return;
-		}
-		n_ptr->signature = signature;
+	/* These three flags give us eight permutations: */
+
+	if (sign_match && addr_match && link_up) {
+		/* All is fine. Do nothing. */
+	} else if (sign_match && addr_match && !link_up) {
+		/* Respond. The link will come up in due time */
+		respond = true;
+	} else if (sign_match && !addr_match && link_up) {
+		/* Peer has changed i/f address without rebooting.
+		 * If so, the link will reset soon, and the next
+		 * discovery will be accepted. So we can ignore it.
+		 * It may also be an cloned or malicious peer having
+		 * chosen the same node address and signature as an
+		 * existing one.
+		 * Ignore requests until the link goes down, if ever.
+		 */
+		disc_dupl_alert(bearer, onode, &maddr);
+	} else if (sign_match && !addr_match && !link_up) {
+		/* Peer link has changed i/f address without rebooting.
+		 * It may also be a cloned or malicious peer; we can't
+		 * distinguish between the two.
+		 * The signature is correct, so we must accept.
+		 */
+		accept_addr = true;
+		respond = true;
+	} else if (!sign_match && addr_match && link_up) {
+		/* Peer node rebooted. Two possibilities:
+		 *  - Delayed re-discovery; this link endpoint has already
+		 *    reset and re-established contact with the peer, before
+		 *    receiving a discovery message from that node.
+		 *    (The peer happened to receive one from this node first).
+		 *  - The peer came back so fast that our side has not
+		 *    discovered it yet. Probing from this side will soon
+		 *    reset the link, since there can be no working link
+		 *    endpoint at the peer end, and the link will re-establish.
+		 *  Accept the signature, since it comes from a known peer.
+		 */
+		accept_sign = true;
+	} else if (!sign_match && addr_match && !link_up) {
+		/*  The peer node has rebooted.
+		 *  Accept signature, since it is a known peer.
+		 */
+		accept_sign = true;
+		respond = true;
+	} else if (!sign_match && !addr_match && link_up) {
+		/* Peer rebooted with new address, or a new/duplicate peer.
+		 * Ignore until the link goes down, if ever.
+		 */
+		disc_dupl_alert(bearer, onode, &maddr);
+	} else if (!sign_match && !addr_match && !link_up) {
+		/* Peer rebooted with new address, or it is a new peer.
+		 * Accept signature and address.
+		*/
+		accept_sign = true;
+		accept_addr = true;
+		respond = true;
 	}
 
-	/*
-	 * Ensure requesting node's media address is correct
-	 *
-	 * If media address doesn't match and the link is working, reject the
-	 * request (must be from a duplicate node).
-	 *
-	 * If media address doesn't match and the link is not working, accept
-	 * the new media address and reset the link to ensure it starts up
-	 * cleanly.
-	 */
-	if (addr_mismatch) {
-		if (tipc_link_is_up(link)) {
-			disc_dupl_alert(b_ptr, orig, &media_addr);
-			tipc_node_unlock(n_ptr);
-			return;
-		} else {
-			memcpy(&link->media_addr, &media_addr,
-			       sizeof(media_addr));
+	if (accept_sign)
+		node->signature = signature;
+
+	if (accept_addr) {
+		if (!link)
+			link = tipc_link_create(node, bearer, &maddr);
+		if (link) {
+			memcpy(&link->media_addr, &maddr, sizeof(maddr));
 			tipc_link_reset(link);
+		} else {
+			respond = false;
 		}
 	}
 
-	/* Create a link endpoint for this bearer, if necessary */
-	if (!link) {
-		link = tipc_link_create(n_ptr, b_ptr, &media_addr);
-		if (!link) {
-			tipc_node_unlock(n_ptr);
-			return;
-		}
-	}
-
-	/* Accept discovery message & send response, if necessary */
-	link_fully_up = link_working_working(link);
-
-	if ((type == DSC_REQ_MSG) && !link_fully_up) {
-		rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr);
+	/* Send response, if necessary */
+	if (respond && (mtyp == DSC_REQ_MSG)) {
+		rbuf = tipc_buf_acquire(INT_H_SIZE);
 		if (rbuf) {
-			tipc_bearer_send(b_ptr, rbuf, &media_addr);
+			tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer);
+			tipc_bearer_send(bearer->identity, rbuf, &maddr);
 			kfree_skb(rbuf);
 		}
 	}
-
-	tipc_node_unlock(n_ptr);
+	tipc_node_unlock(node);
 }
 
 /**
@@ -303,7 +306,7 @@
 	spin_lock_bh(&req->lock);
 
 	/* Stop searching if only desired node has been found */
-	if (tipc_node(req->bearer->domain) && req->num_nodes) {
+	if (tipc_node(req->domain) && req->num_nodes) {
 		req->timer_intv = TIPC_LINK_REQ_INACTIVE;
 		goto exit;
 	}
@@ -315,7 +318,7 @@
 	 * hold at fast polling rate if don't have any associated nodes,
 	 * otherwise hold at slow polling rate
 	 */
-	tipc_bearer_send(req->bearer, req->buf, &req->dest);
+	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
 
 
 	req->timer_intv *= 2;
@@ -347,21 +350,23 @@
 	if (!req)
 		return -ENOMEM;
 
-	req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr);
+	req->buf = tipc_buf_acquire(INT_H_SIZE);
 	if (!req->buf) {
 		kfree(req);
-		return -ENOMSG;
+		return -ENOMEM;
 	}
 
+	tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
 	memcpy(&req->dest, dest, sizeof(*dest));
-	req->bearer = b_ptr;
+	req->bearer_id = b_ptr->identity;
+	req->domain = b_ptr->domain;
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
 	spin_lock_init(&req->lock);
 	k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
 	k_start_timer(&req->timer, req->timer_intv);
 	b_ptr->link_req = req;
-	tipc_bearer_send(req->bearer, req->buf, &req->dest);
+	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
 	return 0;
 }
 
@@ -376,3 +381,23 @@
 	kfree_skb(req->buf);
 	kfree(req);
 }
+
+/**
+ * tipc_disc_reset - reset object to send periodic link setup requests
+ * @b_ptr: ptr to bearer issuing requests
+ * @dest_domain: network domain to which links can be established
+ */
+void tipc_disc_reset(struct tipc_bearer *b_ptr)
+{
+	struct tipc_link_req *req = b_ptr->link_req;
+
+	spin_lock_bh(&req->lock);
+	tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
+	req->bearer_id = b_ptr->identity;
+	req->domain = b_ptr->domain;
+	req->num_nodes = 0;
+	req->timer_intv = TIPC_LINK_REQ_INIT;
+	k_start_timer(&req->timer, req->timer_intv);
+	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+	spin_unlock_bh(&req->lock);
+}

diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 07f3472..515b573 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h

@@ -41,6 +41,7 @@
 
 int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest);
 void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_reset(struct tipc_bearer *b_ptr);
 void tipc_disc_add_dest(struct tipc_link_req *req);
 void tipc_disc_remove_dest(struct tipc_link_req *req);
 void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr);

diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 67cf3f9..5e1426f 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c

@@ -1,7 +1,7 @@
 /*
  * net/tipc/eth_media.c: Ethernet bearer support for TIPC
  *
- * Copyright (c) 2001-2007, 2013, Ericsson AB
+ * Copyright (c) 2001-2007, 2013-2014, Ericsson AB
  * Copyright (c) 2005-2008, 2011-2013, Wind River Systems
  * All rights reserved.
  *
@@ -37,39 +37,52 @@
 #include "core.h"
 #include "bearer.h"
 
-#define ETH_ADDR_OFFSET	4	/* message header offset of MAC address */
+#define ETH_ADDR_OFFSET  4  /* MAC addr position inside address field */
 
-/* convert Ethernet address to string */
-static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf,
-			     int str_size)
+/* Convert Ethernet address (media address format) to string */
+static int tipc_eth_addr2str(struct tipc_media_addr *addr,
+			     char *strbuf, int bufsz)
 {
-	if (str_size < 18)	/* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
+	if (bufsz < 18)	/* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
 		return 1;
 
-	sprintf(str_buf, "%pM", a->value);
+	sprintf(strbuf, "%pM", addr->value);
 	return 0;
 }
 
-/* convert Ethernet address format to message header format */
-static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert from media address format to discovery message addr format */
+static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
 {
-	memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
-	msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
-	memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN);
+	memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+	msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
+	memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN);
 	return 0;
 }
 
-/* convert message header address format to Ethernet format */
-static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr,
-			     struct tipc_media_addr *a, char *msg_area)
+/* Convert raw mac address format to media addr format */
+static int tipc_eth_raw2addr(struct tipc_bearer *b,
+			     struct tipc_media_addr *addr,
+			     char *msg)
 {
-	if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
-		return 1;
+	char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
-	tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
+	memset(addr, 0, sizeof(*addr));
+	ether_addr_copy(addr->value, msg);
+	addr->media_id = TIPC_MEDIA_TYPE_ETH;
+	addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN);
 	return 0;
 }
 
+/* Convert discovery msg addr format to Ethernet media addr format */
+static int tipc_eth_msg2addr(struct tipc_bearer *b,
+			     struct tipc_media_addr *addr,
+			     char *msg)
+{
+	/* Skip past preamble: */
+	msg += ETH_ADDR_OFFSET;
+	return tipc_eth_raw2addr(b, addr, msg);
+}
+
 /* Ethernet media registration info */
 struct tipc_media eth_media_info = {
 	.send_msg	= tipc_l2_send_msg,
@@ -78,6 +91,7 @@
 	.addr2str	= tipc_eth_addr2str,
 	.addr2msg	= tipc_eth_addr2msg,
 	.msg2addr	= tipc_eth_msg2addr,
+	.raw2addr	= tipc_eth_raw2addr,
 	.priority	= TIPC_DEF_LINK_PRI,
 	.tolerance	= TIPC_DEF_LINK_TOL,
 	.window		= TIPC_DEF_LINK_WIN,
@@ -85,4 +99,3 @@
 	.hwaddr_len	= ETH_ALEN,
 	.name		= "eth"
 };
-

diff --git a/net/tipc/handler.c b/net/tipc/handler.c
deleted file mode 100644
index 1fabf16..0000000
--- a/net/tipc/handler.c
+++ /dev/null

@@ -1,134 +0,0 @@
-/*
- * net/tipc/handler.c: TIPC signal handling
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-
-struct queue_item {
-	struct list_head next_signal;
-	void (*handler) (unsigned long);
-	unsigned long data;
-};
-
-static struct kmem_cache *tipc_queue_item_cache;
-static struct list_head signal_queue_head;
-static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled __read_mostly;
-
-static void process_signal_queue(unsigned long dummy);
-
-static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0);
-
-
-unsigned int tipc_k_signal(Handler routine, unsigned long argument)
-{
-	struct queue_item *item;
-
-	spin_lock_bh(&qitem_lock);
-	if (!handler_enabled) {
-		spin_unlock_bh(&qitem_lock);
-		return -ENOPROTOOPT;
-	}
-
-	item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC);
-	if (!item) {
-		pr_err("Signal queue out of memory\n");
-		spin_unlock_bh(&qitem_lock);
-		return -ENOMEM;
-	}
-	item->handler = routine;
-	item->data = argument;
-	list_add_tail(&item->next_signal, &signal_queue_head);
-	spin_unlock_bh(&qitem_lock);
-	tasklet_schedule(&tipc_tasklet);
-	return 0;
-}
-
-static void process_signal_queue(unsigned long dummy)
-{
-	struct queue_item *__volatile__ item;
-	struct list_head *l, *n;
-
-	spin_lock_bh(&qitem_lock);
-	list_for_each_safe(l, n, &signal_queue_head) {
-		item = list_entry(l, struct queue_item, next_signal);
-		list_del(&item->next_signal);
-		spin_unlock_bh(&qitem_lock);
-		item->handler(item->data);
-		spin_lock_bh(&qitem_lock);
-		kmem_cache_free(tipc_queue_item_cache, item);
-	}
-	spin_unlock_bh(&qitem_lock);
-}
-
-int tipc_handler_start(void)
-{
-	tipc_queue_item_cache =
-		kmem_cache_create("tipc_queue_items", sizeof(struct queue_item),
-				  0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!tipc_queue_item_cache)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&signal_queue_head);
-	tasklet_enable(&tipc_tasklet);
-	handler_enabled = 1;
-	return 0;
-}
-
-void tipc_handler_stop(void)
-{
-	struct list_head *l, *n;
-	struct queue_item *item;
-
-	spin_lock_bh(&qitem_lock);
-	if (!handler_enabled) {
-		spin_unlock_bh(&qitem_lock);
-		return;
-	}
-	handler_enabled = 0;
-	spin_unlock_bh(&qitem_lock);
-
-	tasklet_kill(&tipc_tasklet);
-
-	spin_lock_bh(&qitem_lock);
-	list_for_each_safe(l, n, &signal_queue_head) {
-		item = list_entry(l, struct queue_item, next_signal);
-		list_del(&item->next_signal);
-		kmem_cache_free(tipc_queue_item_cache, item);
-	}
-	spin_unlock_bh(&qitem_lock);
-
-	kmem_cache_destroy(tipc_queue_item_cache);
-}

diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 844a77e..8522eef 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c

@@ -42,7 +42,7 @@
 #include "core.h"
 #include "bearer.h"
 
-/* convert InfiniBand address to string */
+/* convert InfiniBand address (media address format) media address to string */
 static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
 			    int str_size)
 {
@@ -54,23 +54,35 @@
 	return 0;
 }
 
-/* convert InfiniBand address format to message header format */
-static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert from media address format to discovery message addr format */
+static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
 {
-	memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
-	msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
-	memcpy(msg_area, a->value, INFINIBAND_ALEN);
+	memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+	memcpy(msg, addr->value, INFINIBAND_ALEN);
 	return 0;
 }
 
-/* convert message header address format to InfiniBand format */
-static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr,
-			    struct tipc_media_addr *a, char *msg_area)
+/* Convert raw InfiniBand address format to media addr format */
+static int tipc_ib_raw2addr(struct tipc_bearer *b,
+			    struct tipc_media_addr *addr,
+			    char *msg)
 {
-	tipc_l2_media_addr_set(tb_ptr, a, msg_area);
+	memset(addr, 0, sizeof(*addr));
+	memcpy(addr->value, msg, INFINIBAND_ALEN);
+	addr->media_id = TIPC_MEDIA_TYPE_IB;
+	addr->broadcast = !memcmp(msg, b->bcast_addr.value,
+				  INFINIBAND_ALEN);
 	return 0;
 }
 
+/* Convert discovery msg addr format to InfiniBand media addr format */
+static int tipc_ib_msg2addr(struct tipc_bearer *b,
+			    struct tipc_media_addr *addr,
+			    char *msg)
+{
+	return tipc_ib_raw2addr(b, addr, msg);
+}
+
 /* InfiniBand media registration info */
 struct tipc_media ib_media_info = {
 	.send_msg	= tipc_l2_send_msg,
@@ -79,6 +91,7 @@
 	.addr2str	= tipc_ib_addr2str,
 	.addr2msg	= tipc_ib_addr2msg,
 	.msg2addr	= tipc_ib_msg2addr,
+	.raw2addr	= tipc_ib_raw2addr,
 	.priority	= TIPC_DEF_LINK_PRI,
 	.tolerance	= TIPC_DEF_LINK_TOL,
 	.window		= TIPC_DEF_LINK_WIN,
@@ -86,4 +99,3 @@
 	.hwaddr_len	= INFINIBAND_ALEN,
 	.name		= "ib"
 };
-

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c5190ab..ad2c57f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c

@@ -37,6 +37,7 @@
 #include "core.h"
 #include "link.h"
 #include "port.h"
+#include "socket.h"
 #include "name_distr.h"
 #include "discover.h"
 #include "config.h"
@@ -101,9 +102,18 @@
 
 static void link_init_max_pkt(struct tipc_link *l_ptr)
 {
+	struct tipc_bearer *b_ptr;
 	u32 max_pkt;
 
-	max_pkt = (l_ptr->b_ptr->mtu & ~3);
+	rcu_read_lock();
+	b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+	if (!b_ptr) {
+		rcu_read_unlock();
+		return;
+	}
+	max_pkt = (b_ptr->mtu & ~3);
+	rcu_read_unlock();
+
 	if (max_pkt > MAX_MSG_SIZE)
 		max_pkt = MAX_MSG_SIZE;
 
@@ -248,7 +258,7 @@
 	l_ptr->owner = n_ptr;
 	l_ptr->checkpoint = 1;
 	l_ptr->peer_session = INVALID_SESSION;
-	l_ptr->b_ptr = b_ptr;
+	l_ptr->bearer_id = b_ptr->identity;
 	link_set_supervision_props(l_ptr, b_ptr->tolerance);
 	l_ptr->state = RESET_UNKNOWN;
 
@@ -263,6 +273,7 @@
 	l_ptr->priority = b_ptr->priority;
 	tipc_link_set_queue_limits(l_ptr, b_ptr->window);
 
+	l_ptr->net_plane = b_ptr->net_plane;
 	link_init_max_pkt(l_ptr);
 
 	l_ptr->next_out_no = 1;
@@ -287,14 +298,14 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
-		spin_lock_bh(&n_ptr->lock);
+		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->links[bearer_id];
 		if (l_ptr) {
 			tipc_link_reset(l_ptr);
 			if (shutting_down || !tipc_node_is_up(n_ptr)) {
 				tipc_node_detach_link(l_ptr->owner, l_ptr);
 				tipc_link_reset_fragments(l_ptr);
-				spin_unlock_bh(&n_ptr->lock);
+				tipc_node_unlock(n_ptr);
 
 				/* Nobody else can access this link now: */
 				del_timer_sync(&l_ptr->timer);
@@ -302,12 +313,12 @@
 			} else {
 				/* Detach/delete when failover is finished: */
 				l_ptr->flags |= LINK_STOPPED;
-				spin_unlock_bh(&n_ptr->lock);
+				tipc_node_unlock(n_ptr);
 				del_timer_sync(&l_ptr->timer);
 			}
 			continue;
 		}
-		spin_unlock_bh(&n_ptr->lock);
+		tipc_node_unlock(n_ptr);
 	}
 	rcu_read_unlock();
 }
@@ -388,9 +399,8 @@
  */
 void tipc_link_reset_fragments(struct tipc_link *l_ptr)
 {
-	kfree_skb(l_ptr->reasm_head);
-	l_ptr->reasm_head = NULL;
-	l_ptr->reasm_tail = NULL;
+	kfree_skb(l_ptr->reasm_buf);
+	l_ptr->reasm_buf = NULL;
 }
 
 /**
@@ -426,7 +436,7 @@
 		return;
 
 	tipc_node_link_down(l_ptr->owner, l_ptr);
-	tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
+	tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr);
 
 	if (was_active_link && tipc_node_active_links(l_ptr->owner)) {
 		l_ptr->reset_checkpoint = checkpoint;
@@ -464,11 +474,11 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
-		spin_lock_bh(&n_ptr->lock);
+		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->links[bearer_id];
 		if (l_ptr)
 			tipc_link_reset(l_ptr);
-		spin_unlock_bh(&n_ptr->lock);
+		tipc_node_unlock(n_ptr);
 	}
 	rcu_read_unlock();
 }
@@ -477,7 +487,7 @@
 {
 	l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
 	tipc_node_link_up(l_ptr->owner, l_ptr);
-	tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
+	tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr);
 }
 
 /**
@@ -777,7 +787,7 @@
 	if (likely(!link_congested(l_ptr))) {
 		link_add_to_outqueue(l_ptr, buf, msg);
 
-		tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+		tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
 		l_ptr->unacked_window = 0;
 		return dsz;
 	}
@@ -825,7 +835,6 @@
 	struct tipc_node *n_ptr;
 	int res = -ELINKCONG;
 
-	read_lock_bh(&tipc_net_lock);
 	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
@@ -838,7 +847,6 @@
 	} else {
 		kfree_skb(buf);
 	}
-	read_unlock_bh(&tipc_net_lock);
 	return res;
 }
 
@@ -902,7 +910,6 @@
 	if (list_empty(message_list))
 		return;
 
-	read_lock_bh(&tipc_net_lock);
 	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
@@ -917,7 +924,6 @@
 		}
 		tipc_node_unlock(n_ptr);
 	}
-	read_unlock_bh(&tipc_net_lock);
 
 	/* discard the messages if they couldn't be sent */
 	list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
@@ -941,7 +947,7 @@
 	if (likely(!link_congested(l_ptr))) {
 		if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
 			link_add_to_outqueue(l_ptr, buf, msg);
-			tipc_bearer_send(l_ptr->b_ptr, buf,
+			tipc_bearer_send(l_ptr->bearer_id, buf,
 					 &l_ptr->media_addr);
 			l_ptr->unacked_window = 0;
 			return res;
@@ -979,7 +985,6 @@
 	if (unlikely(res < 0))
 		return res;
 
-	read_lock_bh(&tipc_net_lock);
 	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
@@ -990,7 +995,6 @@
 							  &sender->max_pkt);
 exit:
 				tipc_node_unlock(node);
-				read_unlock_bh(&tipc_net_lock);
 				return res;
 			}
 
@@ -1007,7 +1011,6 @@
 			 */
 			sender->max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
-			read_unlock_bh(&tipc_net_lock);
 
 
 			if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
@@ -1018,7 +1021,6 @@
 		}
 		tipc_node_unlock(node);
 	}
-	read_unlock_bh(&tipc_net_lock);
 
 	/* Couldn't find a link to the destination node */
 	kfree_skb(buf);
@@ -1204,7 +1206,7 @@
 	if (r_q_size && buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
-		tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+		tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
 		l_ptr->retransm_queue_head = mod(++r_q_head);
 		l_ptr->retransm_queue_size = --r_q_size;
 		l_ptr->stats.retransmitted++;
@@ -1216,7 +1218,7 @@
 	if (buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
-		tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+		tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
 		l_ptr->unacked_window = 0;
 		kfree_skb(buf);
 		l_ptr->proto_msg_queue = NULL;
@@ -1233,7 +1235,8 @@
 		if (mod(next - first) < l_ptr->queue_limit[0]) {
 			msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 			msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-			tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+			tipc_bearer_send(l_ptr->bearer_id, buf,
+					 &l_ptr->media_addr);
 			if (msg_user(msg) == MSG_BUNDLER)
 				msg_set_type(msg, CLOSED_MSG);
 			l_ptr->next_out = buf->next;
@@ -1256,33 +1259,24 @@
 	} while (!res);
 }
 
-static void link_reset_all(unsigned long addr)
+void tipc_link_reset_all(struct tipc_node *node)
 {
-	struct tipc_node *n_ptr;
 	char addr_string[16];
 	u32 i;
 
-	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_find((u32)addr);
-	if (!n_ptr) {
-		read_unlock_bh(&tipc_net_lock);
-		return;	/* node no longer exists */
-	}
-
-	tipc_node_lock(n_ptr);
+	tipc_node_lock(node);
 
 	pr_warn("Resetting all links to %s\n",
-		tipc_addr_string_fill(addr_string, n_ptr->addr));
+		tipc_addr_string_fill(addr_string, node->addr));
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		if (n_ptr->links[i]) {
-			link_print(n_ptr->links[i], "Resetting link\n");
-			tipc_link_reset(n_ptr->links[i]);
+		if (node->links[i]) {
+			link_print(node->links[i], "Resetting link\n");
+			tipc_link_reset(node->links[i]);
 		}
 	}
 
-	tipc_node_unlock(n_ptr);
-	read_unlock_bh(&tipc_net_lock);
+	tipc_node_unlock(node);
 }
 
 static void link_retransmit_failure(struct tipc_link *l_ptr,
@@ -1319,10 +1313,9 @@
 			n_ptr->bclink.oos_state,
 			n_ptr->bclink.last_sent);
 
-		tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
-
 		tipc_node_unlock(n_ptr);
 
+		tipc_bclink_set_flags(TIPC_BCLINK_RESET);
 		l_ptr->stale_count = 0;
 	}
 }
@@ -1352,7 +1345,7 @@
 		msg = buf_msg(buf);
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-		tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+		tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
 		buf = buf->next;
 		retransmits--;
 		l_ptr->stats.retransmitted++;
@@ -1440,14 +1433,13 @@
 /**
  * tipc_rcv - process TIPC packets/messages arriving from off-node
  * @head: pointer to message buffer chain
- * @tb_ptr: pointer to bearer message arrived on
+ * @b_ptr: pointer to bearer message arrived on
  *
  * Invoked with no locks held.  Bearer pointer must point to a valid bearer
  * structure (i.e. cannot be NULL), but bearer can be inactive.
  */
 void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
 {
-	read_lock_bh(&tipc_net_lock);
 	while (head) {
 		struct tipc_node *n_ptr;
 		struct tipc_link *l_ptr;
@@ -1497,14 +1489,14 @@
 			goto unlock_discard;
 
 		/* Verify that communication with node is currently allowed */
-		if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
-			msg_user(msg) == LINK_PROTOCOL &&
-			(msg_type(msg) == RESET_MSG ||
-			 msg_type(msg) == ACTIVATE_MSG) &&
-			!msg_redundant_link(msg))
-			n_ptr->block_setup &= ~WAIT_PEER_DOWN;
+		if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
+		    msg_user(msg) == LINK_PROTOCOL &&
+		    (msg_type(msg) == RESET_MSG ||
+		    msg_type(msg) == ACTIVATE_MSG) &&
+		    !msg_redundant_link(msg))
+			n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
 
-		if (n_ptr->block_setup)
+		if (tipc_node_blocked(n_ptr))
 			goto unlock_discard;
 
 		/* Validate message sequence number info */
@@ -1581,17 +1573,12 @@
 			}
 			msg = buf_msg(buf);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
-			int rc;
-
 			l_ptr->stats.recv_fragments++;
-			rc = tipc_link_frag_rcv(&l_ptr->reasm_head,
-						&l_ptr->reasm_tail,
-						&buf);
-			if (rc == LINK_REASM_COMPLETE) {
+			if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) {
 				l_ptr->stats.recv_fragmented++;
 				msg = buf_msg(buf);
 			} else {
-				if (rc == LINK_REASM_ERROR)
+				if (!l_ptr->reasm_buf)
 					tipc_link_reset(l_ptr);
 				tipc_node_unlock(n_ptr);
 				continue;
@@ -1604,7 +1591,7 @@
 		case TIPC_HIGH_IMPORTANCE:
 		case TIPC_CRITICAL_IMPORTANCE:
 			tipc_node_unlock(n_ptr);
-			tipc_port_rcv(buf);
+			tipc_sk_rcv(buf);
 			continue;
 		case MSG_BUNDLER:
 			l_ptr->stats.recv_bundles++;
@@ -1635,7 +1622,6 @@
 discard:
 		kfree_skb(buf);
 	}
-	read_unlock_bh(&tipc_net_lock);
 }
 
 /**
@@ -1747,12 +1733,12 @@
 		return;
 
 	/* Abort non-RESET send if communication with node is prohibited */
-	if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
+	if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG))
 		return;
 
 	/* Create protocol message with "out-of-sequence" sequence number */
 	msg_set_type(msg, msg_typ);
-	msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
+	msg_set_net_plane(msg, l_ptr->net_plane);
 	msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
 	msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
 
@@ -1818,7 +1804,7 @@
 	skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 	buf->priority = TC_PRIO_CONTROL;
 
-	tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+	tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
 	l_ptr->unacked_window = 0;
 	kfree_skb(buf);
 }
@@ -1840,12 +1826,9 @@
 	if (l_ptr->exp_msg_count)
 		goto exit;
 
-	/* record unnumbered packet arrival (force mismatch on next timeout) */
-	l_ptr->checkpoint--;
-
-	if (l_ptr->b_ptr->net_plane != msg_net_plane(msg))
+	if (l_ptr->net_plane != msg_net_plane(msg))
 		if (tipc_own_addr > msg_prevnode(msg))
-			l_ptr->b_ptr->net_plane = msg_net_plane(msg);
+			l_ptr->net_plane = msg_net_plane(msg);
 
 	switch (msg_type(msg)) {
 
@@ -1862,7 +1845,7 @@
 			 * peer has lost contact -- don't allow peer's links
 			 * to reactivate before we recognize loss & clean up
 			 */
-			l_ptr->owner->block_setup = WAIT_NODE_DOWN;
+			l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
 		}
 
 		link_state_event(l_ptr, RESET_MSG);
@@ -1918,6 +1901,10 @@
 			tipc_link_reset(l_ptr); /* Enforce change to take effect */
 			break;
 		}
+
+		/* Record reception; force mismatch at next timeout: */
+		l_ptr->checkpoint--;
+
 		link_state_event(l_ptr, TRAFFIC_MSG_EVT);
 		l_ptr->stats.recv_states++;
 		if (link_reset_unknown(l_ptr))
@@ -2177,9 +2164,7 @@
 		}
 		if (msg_user(msg) == MSG_FRAGMENTER) {
 			l_ptr->stats.recv_fragments++;
-			tipc_link_frag_rcv(&l_ptr->reasm_head,
-					   &l_ptr->reasm_tail,
-					   &buf);
+			tipc_buf_append(&l_ptr->reasm_buf, &buf);
 		}
 	}
 exit:
@@ -2317,53 +2302,6 @@
 	return dsz;
 }
 
-/* tipc_link_frag_rcv(): Called with node lock on. Returns
- * the reassembled buffer if message is complete.
- */
-int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail,
-		       struct sk_buff **fbuf)
-{
-	struct sk_buff *frag = *fbuf;
-	struct tipc_msg *msg = buf_msg(frag);
-	u32 fragid = msg_type(msg);
-	bool headstolen;
-	int delta;
-
-	skb_pull(frag, msg_hdr_sz(msg));
-	if (fragid == FIRST_FRAGMENT) {
-		if (*head || skb_unclone(frag, GFP_ATOMIC))
-			goto out_free;
-		*head = frag;
-		skb_frag_list_init(*head);
-		*fbuf = NULL;
-		return 0;
-	} else if (*head &&
-		   skb_try_coalesce(*head, frag, &headstolen, &delta)) {
-		kfree_skb_partial(frag, headstolen);
-	} else {
-		if (!*head)
-			goto out_free;
-		if (!skb_has_frag_list(*head))
-			skb_shinfo(*head)->frag_list = frag;
-		else
-			(*tail)->next = frag;
-		*tail = frag;
-		(*head)->truesize += frag->truesize;
-	}
-	if (fragid == LAST_FRAGMENT) {
-		*fbuf = *head;
-		*tail = *head = NULL;
-		return LINK_REASM_COMPLETE;
-	}
-	*fbuf = NULL;
-	return 0;
-out_free:
-	pr_warn_ratelimited("Link unable to reassemble fragmented message\n");
-	kfree_skb(*fbuf);
-	*fbuf = NULL;
-	return LINK_REASM_ERROR;
-}
-
 static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
 {
 	if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
@@ -2397,8 +2335,6 @@
 /* tipc_link_find_owner - locate owner node of link by link's name
  * @name: pointer to link name string
  * @bearer_id: pointer to index in 'node->links' array where the link was found.
- * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
- * this also prevents link deletion.
  *
  * Returns pointer to node owning the link, or 0 if no matching link is found.
  */
@@ -2460,7 +2396,7 @@
  * @new_value: new value of link, bearer, or media setting
  * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
  *
- * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted.
+ * Caller must hold RTNL lock to ensure link/bearer/media is not deleted.
  *
  * Returns 0 if value updated and negative value on error.
  */
@@ -2566,9 +2502,7 @@
 						   " (cannot change setting on broadcast link)");
 	}
 
-	read_lock_bh(&tipc_net_lock);
 	res = link_cmd_set_value(args->name, new_value, cmd);
-	read_unlock_bh(&tipc_net_lock);
 	if (res)
 		return tipc_cfg_reply_error_string("cannot change link setting");
 
@@ -2602,22 +2536,18 @@
 			return tipc_cfg_reply_error_string("link not found");
 		return tipc_cfg_reply_none();
 	}
-	read_lock_bh(&tipc_net_lock);
 	node = tipc_link_find_owner(link_name, &bearer_id);
-	if (!node) {
-		read_unlock_bh(&tipc_net_lock);
+	if (!node)
 		return tipc_cfg_reply_error_string("link not found");
-	}
+
 	tipc_node_lock(node);
 	l_ptr = node->links[bearer_id];
 	if (!l_ptr) {
 		tipc_node_unlock(node);
-		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string("link not found");
 	}
 	link_reset_statistics(l_ptr);
 	tipc_node_unlock(node);
-	read_unlock_bh(&tipc_net_lock);
 	return tipc_cfg_reply_none();
 }
 
@@ -2650,18 +2580,15 @@
 	if (!strcmp(name, tipc_bclink_name))
 		return tipc_bclink_stats(buf, buf_size);
 
-	read_lock_bh(&tipc_net_lock);
 	node = tipc_link_find_owner(name, &bearer_id);
-	if (!node) {
-		read_unlock_bh(&tipc_net_lock);
+	if (!node)
 		return 0;
-	}
+
 	tipc_node_lock(node);
 
 	l = node->links[bearer_id];
 	if (!l) {
 		tipc_node_unlock(node);
-		read_unlock_bh(&tipc_net_lock);
 		return 0;
 	}
 
@@ -2727,7 +2654,6 @@
 			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
 
 	tipc_node_unlock(node);
-	read_unlock_bh(&tipc_net_lock);
 	return ret;
 }
 
@@ -2778,7 +2704,6 @@
 	if (dest == tipc_own_addr)
 		return MAX_MSG_SIZE;
 
-	read_lock_bh(&tipc_net_lock);
 	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
@@ -2787,13 +2712,18 @@
 			res = l_ptr->max_pkt;
 		tipc_node_unlock(n_ptr);
 	}
-	read_unlock_bh(&tipc_net_lock);
 	return res;
 }
 
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
-	pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name);
+	struct tipc_bearer *b_ptr;
+
+	rcu_read_lock();
+	b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+	if (b_ptr)
+		pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
+	rcu_read_unlock();
 
 	if (link_working_unknown(l_ptr))
 		pr_cont(":WU\n");

diff --git a/net/tipc/link.h b/net/tipc/link.h
index 8c0b49b..200d518 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h

@@ -40,11 +40,6 @@
 #include "msg.h"
 #include "node.h"
 
-/* Link reassembly status codes
- */
-#define LINK_REASM_ERROR	-1
-#define LINK_REASM_COMPLETE	1
-
 /* Out-of-range value for link sequence numbers
  */
 #define INVALID_LINK_SEQ 0x10000
@@ -107,7 +102,7 @@
  * @checkpoint: reference point for triggering link continuity checking
  * @peer_session: link session # being used by peer end of link
  * @peer_bearer_id: bearer id used by link's peer endpoint
- * @b_ptr: pointer to bearer used by link
+ * @bearer_id: local bearer id used by link
  * @tolerance: minimum link continuity loss needed to reset link [in ms]
  * @continuity_interval: link continuity testing interval [in ms]
  * @abort_limit: # of unacknowledged continuity probes needed to reset link
@@ -116,6 +111,7 @@
  * @proto_msg: template for control messages generated by link
  * @pmsg: convenience pointer to "proto_msg" field
  * @priority: current link priority
+ * @net_plane: current link network plane ('A' through 'H')
  * @queue_limit: outbound message queue congestion thresholds (indexed by user)
  * @exp_msg_count: # of tunnelled messages expected during link changeover
  * @reset_checkpoint: seq # of last acknowledged message at time of link reset
@@ -139,8 +135,7 @@
  * @next_out: ptr to first unsent outbound message in queue
  * @waiting_ports: linked list of ports waiting for link congestion to abate
  * @long_msg_seq_no: next identifier to use for outbound fragmented messages
- * @reasm_head: list head of partially reassembled inbound message fragments
- * @reasm_tail: last fragment received
+ * @reasm_buf: head of partially reassembled inbound message fragments
  * @stats: collects statistics regarding link activity
  */
 struct tipc_link {
@@ -155,7 +150,7 @@
 	u32 checkpoint;
 	u32 peer_session;
 	u32 peer_bearer_id;
-	struct tipc_bearer *b_ptr;
+	u32 bearer_id;
 	u32 tolerance;
 	u32 continuity_interval;
 	u32 abort_limit;
@@ -167,6 +162,7 @@
 	} proto_msg;
 	struct tipc_msg *pmsg;
 	u32 priority;
+	char net_plane;
 	u32 queue_limit[15];	/* queue_limit[0]==window limit */
 
 	/* Changeover */
@@ -202,8 +198,7 @@
 
 	/* Fragmentation/reassembly */
 	u32 long_msg_seq_no;
-	struct sk_buff *reasm_head;
-	struct sk_buff *reasm_tail;
+	struct sk_buff *reasm_buf;
 
 	/* Statistics */
 	struct tipc_stats stats;
@@ -228,6 +223,7 @@
 					 int req_tlv_space);
 struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area,
 					  int req_tlv_space);
+void tipc_link_reset_all(struct tipc_node *node);
 void tipc_link_reset(struct tipc_link *l_ptr);
 void tipc_link_reset_list(unsigned int bearer_id);
 int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector);
@@ -239,9 +235,6 @@
 			      struct iovec const *msg_sect,
 			      unsigned int len, u32 destnode);
 void tipc_link_bundle_rcv(struct sk_buff *buf);
-int tipc_link_frag_rcv(struct sk_buff **reasm_head,
-		       struct sk_buff **reasm_tail,
-		       struct sk_buff **fbuf);
 void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
 			  u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
 void tipc_link_push_queue(struct tipc_link *l_ptr);

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e525f8c..8be6e94 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c

@@ -1,7 +1,7 @@
 /*
  * net/tipc/msg.c: TIPC message header routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014, Ericsson AB
  * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -99,3 +99,56 @@
 	}
 	return dsz;
 }
+
+/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
+ * Let first buffer become head buffer
+ * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0
+ * Leaves headbuf pointer at NULL if failure
+ */
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
+{
+	struct sk_buff *head = *headbuf;
+	struct sk_buff *frag = *buf;
+	struct sk_buff *tail;
+	struct tipc_msg *msg = buf_msg(frag);
+	u32 fragid = msg_type(msg);
+	bool headstolen;
+	int delta;
+
+	skb_pull(frag, msg_hdr_sz(msg));
+
+	if (fragid == FIRST_FRAGMENT) {
+		if (head || skb_unclone(frag, GFP_ATOMIC))
+			goto out_free;
+		head = *headbuf = frag;
+		skb_frag_list_init(head);
+		return 0;
+	}
+	if (!head)
+		goto out_free;
+	tail = TIPC_SKB_CB(head)->tail;
+	if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
+		kfree_skb_partial(frag, headstolen);
+	} else {
+		if (!skb_has_frag_list(head))
+			skb_shinfo(head)->frag_list = frag;
+		else
+			tail->next = frag;
+		head->truesize += frag->truesize;
+		head->data_len += frag->len;
+		head->len += frag->len;
+		TIPC_SKB_CB(head)->tail = frag;
+	}
+	if (fragid == LAST_FRAGMENT) {
+		*buf = head;
+		TIPC_SKB_CB(head)->tail = NULL;
+		*headbuf = NULL;
+		return 1;
+	}
+	*buf = NULL;
+	return 0;
+out_free:
+	pr_warn_ratelimited("Unable to build fragment list\n");
+	kfree_skb(*buf);
+	return 0;
+}

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 76d1269..5035119 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h

@@ -1,7 +1,7 @@
 /*
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
- * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2000-2007, 2014, Ericsson AB
  * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -711,4 +711,7 @@
 		   u32 destnode);
 int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
 		   unsigned int len, int max_size, struct sk_buff **buf);
+
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
+
 #endif

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index aff8041..8ce7309 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c

@@ -38,34 +38,6 @@
 #include "link.h"
 #include "name_distr.h"
 
-#define ITEM_SIZE sizeof(struct distr_item)
-
-/**
- * struct distr_item - publication info distributed to other nodes
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @ref: publishing port reference
- * @key: publication key
- *
- * ===> All fields are stored in network byte order. <===
- *
- * First 3 fields identify (name or) name sequence being published.
- * Reference field uniquely identifies port that published name sequence.
- * Key field uniquely identifies publication, in the event a port has
- * multiple publications of the same name sequence.
- *
- * Note: There is no field that identifies the publishing node because it is
- * the same for all items contained within a publication message.
- */
-struct distr_item {
-	__be32 type;
-	__be32 lower;
-	__be32 upper;
-	__be32 ref;
-	__be32 key;
-};
-
 /**
  * struct publ_list - list of publications made by this node
  * @list: circular list of publications
@@ -127,7 +99,7 @@
 	return buf;
 }
 
-static void named_cluster_distribute(struct sk_buff *buf)
+void named_cluster_distribute(struct sk_buff *buf)
 {
 	struct sk_buff *buf_copy;
 	struct tipc_node *n_ptr;
@@ -135,18 +107,18 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
-		spin_lock_bh(&n_ptr->lock);
+		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[n_ptr->addr & 1];
 		if (l_ptr) {
 			buf_copy = skb_copy(buf, GFP_ATOMIC);
 			if (!buf_copy) {
-				spin_unlock_bh(&n_ptr->lock);
+				tipc_node_unlock(n_ptr);
 				break;
 			}
 			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
 			__tipc_link_xmit(l_ptr, buf_copy);
 		}
-		spin_unlock_bh(&n_ptr->lock);
+		tipc_node_unlock(n_ptr);
 	}
 	rcu_read_unlock();
 
@@ -156,7 +128,7 @@
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  */
-void tipc_named_publish(struct publication *publ)
+struct sk_buff *tipc_named_publish(struct publication *publ)
 {
 	struct sk_buff *buf;
 	struct distr_item *item;
@@ -165,23 +137,23 @@
 	publ_lists[publ->scope]->size++;
 
 	if (publ->scope == TIPC_NODE_SCOPE)
-		return;
+		return NULL;
 
 	buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
 	if (!buf) {
 		pr_warn("Publication distribution failure\n");
-		return;
+		return NULL;
 	}
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	named_cluster_distribute(buf);
+	return buf;
 }
 
 /**
  * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
  */
-void tipc_named_withdraw(struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct publication *publ)
 {
 	struct sk_buff *buf;
 	struct distr_item *item;
@@ -190,17 +162,17 @@
 	publ_lists[publ->scope]->size--;
 
 	if (publ->scope == TIPC_NODE_SCOPE)
-		return;
+		return NULL;
 
 	buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
 	if (!buf) {
 		pr_warn("Withdrawal distribution failure\n");
-		return;
+		return NULL;
 	}
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	named_cluster_distribute(buf);
+	return buf;
 }
 
 /*
@@ -239,31 +211,9 @@
 /**
  * tipc_named_node_up - tell specified node about all publications by this node
  */
-void tipc_named_node_up(unsigned long nodearg)
+void tipc_named_node_up(u32 max_item_buf, u32 node)
 {
-	struct tipc_node *n_ptr;
-	struct tipc_link *l_ptr;
-	struct list_head message_list;
-	u32 node = (u32)nodearg;
-	u32 max_item_buf = 0;
-
-	/* compute maximum amount of publication data to send per message */
-	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_find(node);
-	if (n_ptr) {
-		tipc_node_lock(n_ptr);
-		l_ptr = n_ptr->active_links[0];
-		if (l_ptr)
-			max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) /
-				ITEM_SIZE) * ITEM_SIZE;
-		tipc_node_unlock(n_ptr);
-	}
-	read_unlock_bh(&tipc_net_lock);
-	if (!max_item_buf)
-		return;
-
-	/* create list of publication messages, then send them as a unit */
-	INIT_LIST_HEAD(&message_list);
+	LIST_HEAD(message_list);
 
 	read_lock_bh(&tipc_nametbl_lock);
 	named_distribute(&message_list, node, &publ_cluster, max_item_buf);

diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 9b312cc..b2eed4e 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h

@@ -39,9 +39,38 @@
 
 #include "name_table.h"
 
-void tipc_named_publish(struct publication *publ);
-void tipc_named_withdraw(struct publication *publ);
-void tipc_named_node_up(unsigned long node);
+#define ITEM_SIZE sizeof(struct distr_item)
+
+/**
+ * struct distr_item - publication info distributed to other nodes
+ * @type: name sequence type
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @ref: publishing port reference
+ * @key: publication key
+ *
+ * ===> All fields are stored in network byte order. <===
+ *
+ * First 3 fields identify (name or) name sequence being published.
+ * Reference field uniquely identifies port that published name sequence.
+ * Key field uniquely identifies publication, in the event a port has
+ * multiple publications of the same name sequence.
+ *
+ * Note: There is no field that identifies the publishing node because it is
+ * the same for all items contained within a publication message.
+ */
+struct distr_item {
+	__be32 type;
+	__be32 lower;
+	__be32 upper;
+	__be32 ref;
+	__be32 key;
+};
+
+struct sk_buff *tipc_named_publish(struct publication *publ);
+struct sk_buff *tipc_named_withdraw(struct publication *publ);
+void named_cluster_distribute(struct sk_buff *buf);
+void tipc_named_node_up(u32 max_item_buf, u32 node);
 void tipc_named_rcv(struct sk_buff *buf);
 void tipc_named_reinit(void);
 

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 042e8e3..9d7d37d 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c

@@ -664,6 +664,7 @@
 					 u32 scope, u32 port_ref, u32 key)
 {
 	struct publication *publ;
+	struct sk_buff *buf = NULL;
 
 	if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
@@ -676,9 +677,12 @@
 				   tipc_own_addr, port_ref, key);
 	if (likely(publ)) {
 		table.local_publ_count++;
-		tipc_named_publish(publ);
+		buf = tipc_named_publish(publ);
 	}
 	write_unlock_bh(&tipc_nametbl_lock);
+
+	if (buf)
+		named_cluster_distribute(buf);
 	return publ;
 }
 
@@ -688,15 +692,19 @@
 int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 {
 	struct publication *publ;
+	struct sk_buff *buf;
 
 	write_lock_bh(&tipc_nametbl_lock);
 	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
 	if (likely(publ)) {
 		table.local_publ_count--;
-		tipc_named_withdraw(publ);
+		buf = tipc_named_withdraw(publ);
 		write_unlock_bh(&tipc_nametbl_lock);
 		list_del_init(&publ->pport_list);
 		kfree(publ);
+
+		if (buf)
+			named_cluster_distribute(buf);
 		return 1;
 	}
 	write_unlock_bh(&tipc_nametbl_lock);
@@ -961,6 +969,7 @@
 	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
 		tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
 					 publ->ref, publ->key);
+		kfree(publ);
 	}
 }
 
@@ -982,7 +991,6 @@
 		hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
 			tipc_purge_publications(seq);
 		}
-		continue;
 	}
 	kfree(table.types);
 	table.types = NULL;

diff --git a/net/tipc/net.c b/net/tipc/net.c
index 4c564eb..f64375e 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c

@@ -39,45 +39,41 @@
 #include "name_distr.h"
 #include "subscr.h"
 #include "port.h"
+#include "socket.h"
 #include "node.h"
 #include "config.h"
 
 /*
  * The TIPC locking policy is designed to ensure a very fine locking
  * granularity, permitting complete parallel access to individual
- * port and node/link instances. The code consists of three major
+ * port and node/link instances. The code consists of four major
  * locking domains, each protected with their own disjunct set of locks.
  *
- * 1: The routing hierarchy.
- *    Comprises the structures 'zone', 'cluster', 'node', 'link'
- *    and 'bearer'. The whole hierarchy is protected by a big
- *    read/write lock, tipc_net_lock, to enssure that nothing is added
- *    or removed while code is accessing any of these structures.
- *    This layer must not be called from the two others while they
- *    hold any of their own locks.
- *    Neither must it itself do any upcalls to the other two before
- *    it has released tipc_net_lock and other protective locks.
+ * 1: The bearer level.
+ *    RTNL lock is used to serialize the process of configuring bearer
+ *    on update side, and RCU lock is applied on read side to make
+ *    bearer instance valid on both paths of message transmission and
+ *    reception.
  *
- *   Within the tipc_net_lock domain there are two sub-domains;'node' and
- *   'bearer', where local write operations are permitted,
- *   provided that those are protected by individual spin_locks
- *   per instance. Code holding tipc_net_lock(read) and a node spin_lock
- *   is permitted to poke around in both the node itself and its
- *   subordinate links. I.e, it can update link counters and queues,
- *   change link state, send protocol messages, and alter the
- *   "active_links" array in the node; but it can _not_ remove a link
- *   or a node from the overall structure.
- *   Correspondingly, individual bearers may change status within a
- *   tipc_net_lock(read), protected by an individual spin_lock ber bearer
- *   instance, but it needs tipc_net_lock(write) to remove/add any bearers.
+ * 2: The node and link level.
+ *    All node instances are saved into two tipc_node_list and node_htable
+ *    lists. The two lists are protected by node_list_lock on write side,
+ *    and they are guarded with RCU lock on read side. Especially node
+ *    instance is destroyed only when TIPC module is removed, and we can
+ *    confirm that there has no any user who is accessing the node at the
+ *    moment. Therefore, Except for iterating the two lists within RCU
+ *    protection, it's no needed to hold RCU that we access node instance
+ *    in other places.
  *
+ *    In addition, all members in node structure including link instances
+ *    are protected by node spin lock.
  *
- *  2: The transport level of the protocol.
- *     This consists of the structures port, (and its user level
- *     representations, such as user_port and tipc_sock), reference and
- *     tipc_user (port.c, reg.c, socket.c).
+ * 3: The transport level of the protocol.
+ *    This consists of the structures port, (and its user level
+ *    representations, such as user_port and tipc_sock), reference and
+ *    tipc_user (port.c, reg.c, socket.c).
  *
- *     This layer has four different locks:
+ *    This layer has four different locks:
  *     - The tipc_port spin_lock. This is protecting each port instance
  *       from parallel data access and removal. Since we can not place
  *       this lock in the port itself, it has been placed in the
@@ -96,7 +92,7 @@
  *       There are two such lists; 'port_list', which is used for management,
  *       and 'wait_list', which is used to queue ports during congestion.
  *
- *  3: The name table (name_table.c, name_distr.c, subscription.c)
+ *  4: The name table (name_table.c, name_distr.c, subscription.c)
  *     - There is one big read/write-lock (tipc_nametbl_lock) protecting the
  *       overall name table structure. Nothing must be added/removed to
  *       this structure without holding write access to it.
@@ -108,8 +104,6 @@
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
-DEFINE_RWLOCK(tipc_net_lock);
-
 static void net_route_named_msg(struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
@@ -148,7 +142,7 @@
 			if (msg_mcast(msg))
 				tipc_port_mcast_rcv(buf, NULL);
 			else if (msg_destport(msg))
-				tipc_port_rcv(buf);
+				tipc_sk_rcv(buf);
 			else
 				net_route_named_msg(buf);
 			return;
@@ -171,22 +165,25 @@
 	tipc_link_xmit(buf, dnode, msg_link_selector(msg));
 }
 
-void tipc_net_start(u32 addr)
+int tipc_net_start(u32 addr)
 {
 	char addr_string[16];
+	int res;
 
-	write_lock_bh(&tipc_net_lock);
 	tipc_own_addr = addr;
 	tipc_named_reinit();
 	tipc_port_reinit();
-	tipc_bclink_init();
-	write_unlock_bh(&tipc_net_lock);
+	res = tipc_bclink_init();
+	if (res)
+		return res;
 
 	tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
 			     TIPC_ZONE_SCOPE, 0, tipc_own_addr);
+
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
 		tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+	return 0;
 }
 
 void tipc_net_stop(void)
@@ -195,11 +192,11 @@
 		return;
 
 	tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr);
-	write_lock_bh(&tipc_net_lock);
+	rtnl_lock();
 	tipc_bearer_stop();
 	tipc_bclink_stop();
 	tipc_node_stop();
-	write_unlock_bh(&tipc_net_lock);
+	rtnl_unlock();
 
 	pr_info("Left network mode\n");
 }

diff --git a/net/tipc/net.h b/net/tipc/net.h
index 079daad..c6c2b46 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h

@@ -37,11 +37,9 @@
 #ifndef _TIPC_NET_H
 #define _TIPC_NET_H
 
-extern rwlock_t tipc_net_lock;
-
 void tipc_net_route_msg(struct sk_buff *buf);
 
-void tipc_net_start(u32 addr);
+int tipc_net_start(u32 addr);
 void tipc_net_stop(void);
 
 #endif

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 1d3a499..5b44c30 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c

@@ -108,7 +108,7 @@
 			break;
 	}
 	list_add_tail_rcu(&n_ptr->list, &temp_node->list);
-	n_ptr->block_setup = WAIT_PEER_DOWN;
+	n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
 	n_ptr->signature = INVALID_NODE_SIG;
 
 	tipc_num_nodes++;
@@ -144,11 +144,13 @@
 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
 	struct tipc_link **active = &n_ptr->active_links[0];
+	u32 addr = n_ptr->addr;
 
 	n_ptr->working_links++;
-
+	tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE,
+			     l_ptr->bearer_id, addr);
 	pr_info("Established link <%s> on network plane %c\n",
-		l_ptr->name, l_ptr->b_ptr->net_plane);
+		l_ptr->name, l_ptr->net_plane);
 
 	if (!active[0]) {
 		active[0] = active[1] = l_ptr;
@@ -203,16 +205,18 @@
 void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
 	struct tipc_link **active;
+	u32 addr = n_ptr->addr;
 
 	n_ptr->working_links--;
+	tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr);
 
 	if (!tipc_link_is_active(l_ptr)) {
 		pr_info("Lost standby link <%s> on network plane %c\n",
-			l_ptr->name, l_ptr->b_ptr->net_plane);
+			l_ptr->name, l_ptr->net_plane);
 		return;
 	}
 	pr_info("Lost link <%s> on network plane %c\n",
-		l_ptr->name, l_ptr->b_ptr->net_plane);
+		l_ptr->name, l_ptr->net_plane);
 
 	active = &n_ptr->active_links[0];
 	if (active[0] == l_ptr)
@@ -239,7 +243,7 @@
 
 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
-	n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
+	n_ptr->links[l_ptr->bearer_id] = l_ptr;
 	spin_lock_bh(&node_list_lock);
 	tipc_num_links++;
 	spin_unlock_bh(&node_list_lock);
@@ -263,26 +267,12 @@
 
 static void node_established_contact(struct tipc_node *n_ptr)
 {
-	tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
+	n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
 	n_ptr->bclink.oos_state = 0;
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 	tipc_bclink_add_node(n_ptr->addr);
 }
 
-static void node_name_purge_complete(unsigned long node_addr)
-{
-	struct tipc_node *n_ptr;
-
-	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_find(node_addr);
-	if (n_ptr) {
-		tipc_node_lock(n_ptr);
-		n_ptr->block_setup &= ~WAIT_NAMES_GONE;
-		tipc_node_unlock(n_ptr);
-	}
-	read_unlock_bh(&tipc_net_lock);
-}
-
 static void node_lost_contact(struct tipc_node *n_ptr)
 {
 	char addr_string[16];
@@ -296,10 +286,9 @@
 		kfree_skb_list(n_ptr->bclink.deferred_head);
 		n_ptr->bclink.deferred_size = 0;
 
-		if (n_ptr->bclink.reasm_head) {
-			kfree_skb(n_ptr->bclink.reasm_head);
-			n_ptr->bclink.reasm_head = NULL;
-			n_ptr->bclink.reasm_tail = NULL;
+		if (n_ptr->bclink.reasm_buf) {
+			kfree_skb(n_ptr->bclink.reasm_buf);
+			n_ptr->bclink.reasm_buf = NULL;
 		}
 
 		tipc_bclink_remove_node(n_ptr->addr);
@@ -318,12 +307,13 @@
 		tipc_link_reset_fragments(l_ptr);
 	}
 
-	/* Notify subscribers */
-	tipc_nodesub_notify(n_ptr);
+	n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
 
-	/* Prevent re-contact with node until cleanup is done */
-	n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
-	tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
+	/* Notify subscribers and prevent re-contact with node until
+	 * cleanup is done.
+	 */
+	n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN |
+			       TIPC_NOTIFY_NODE_DOWN;
 }
 
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
@@ -436,3 +426,63 @@
 	rcu_read_unlock();
 	return buf;
 }
+
+/**
+ * tipc_node_get_linkname - get the name of a link
+ *
+ * @bearer_id: id of the bearer
+ * @node: peer node address
+ * @linkname: link name output buffer
+ *
+ * Returns 0 on success
+ */
+int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
+{
+	struct tipc_link *link;
+	struct tipc_node *node = tipc_node_find(addr);
+
+	if ((bearer_id >= MAX_BEARERS) || !node)
+		return -EINVAL;
+	tipc_node_lock(node);
+	link = node->links[bearer_id];
+	if (link) {
+		strncpy(linkname, link->name, len);
+		tipc_node_unlock(node);
+		return 0;
+	}
+	tipc_node_unlock(node);
+	return -EINVAL;
+}
+
+void tipc_node_unlock(struct tipc_node *node)
+{
+	LIST_HEAD(nsub_list);
+	struct tipc_link *link;
+	int pkt_sz = 0;
+	u32 addr = 0;
+
+	if (likely(!node->action_flags)) {
+		spin_unlock_bh(&node->lock);
+		return;
+	}
+
+	if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) {
+		list_replace_init(&node->nsub, &nsub_list);
+		node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN;
+	}
+	if (node->action_flags & TIPC_NOTIFY_NODE_UP) {
+		link = node->active_links[0];
+		node->action_flags &= ~TIPC_NOTIFY_NODE_UP;
+		if (link) {
+			pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) *
+				  ITEM_SIZE;
+			addr = node->addr;
+		}
+	}
+	spin_unlock_bh(&node->lock);
+
+	if (!list_empty(&nsub_list))
+		tipc_nodesub_notify(&nsub_list);
+	if (pkt_sz)
+		tipc_named_node_up(pkt_sz, addr);
+}

diff --git a/net/tipc/node.h b/net/tipc/node.h
index 7cbb8ce..9087063 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h

@@ -47,62 +47,73 @@
  */
 #define INVALID_NODE_SIG 0x10000
 
-/* Flags used to block (re)establishment of contact with a neighboring node */
-#define WAIT_PEER_DOWN	0x0001	/* wait to see that peer's links are down */
-#define WAIT_NAMES_GONE	0x0002	/* wait for peer's publications to be purged */
-#define WAIT_NODE_DOWN	0x0004	/* wait until peer node is declared down */
+/* Flags used to take different actions according to flag type
+ * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
+ * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
+ * TIPC_NOTIFY_NODE_DOWN: notify node is down
+ * TIPC_NOTIFY_NODE_UP: notify node is up
+ */
+enum {
+	TIPC_WAIT_PEER_LINKS_DOWN	= (1 << 1),
+	TIPC_WAIT_OWN_LINKS_DOWN	= (1 << 2),
+	TIPC_NOTIFY_NODE_DOWN		= (1 << 3),
+	TIPC_NOTIFY_NODE_UP		= (1 << 4)
+};
+
+/**
+ * struct tipc_node_bclink - TIPC node bclink structure
+ * @acked: sequence # of last outbound b'cast message acknowledged by node
+ * @last_in: sequence # of last in-sequence b'cast message received from node
+ * @last_sent: sequence # of last b'cast message sent by node
+ * @oos_state: state tracker for handling OOS b'cast messages
+ * @deferred_size: number of OOS b'cast messages in deferred queue
+ * @deferred_head: oldest OOS b'cast message received from node
+ * @deferred_tail: newest OOS b'cast message received from node
+ * @reasm_buf: broadcast reassembly queue head from node
+ * @recv_permitted: true if node is allowed to receive b'cast messages
+ */
+struct tipc_node_bclink {
+	u32 acked;
+	u32 last_in;
+	u32 last_sent;
+	u32 oos_state;
+	u32 deferred_size;
+	struct sk_buff *deferred_head;
+	struct sk_buff *deferred_tail;
+	struct sk_buff *reasm_buf;
+	bool recv_permitted;
+};
 
 /**
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
  * @lock: spinlock governing access to structure
  * @hash: links to adjacent nodes in unsorted hash chain
- * @list: links to adjacent nodes in sorted list of cluster's nodes
- * @nsub: list of "node down" subscriptions monitoring node
  * @active_links: pointers to active links to node
  * @links: pointers to all links to node
+ * @action_flags: bit mask of different types of node actions
+ * @bclink: broadcast-related info
+ * @list: links to adjacent nodes in sorted list of cluster's nodes
  * @working_links: number of working links to node (both active and standby)
- * @block_setup: bit mask of conditions preventing link establishment to node
  * @link_cnt: number of links to node
  * @signature: node instance identifier
- * @bclink: broadcast-related info
+ * @nsub: list of "node down" subscriptions monitoring node
  * @rcu: rcu struct for tipc_node
- *    @acked: sequence # of last outbound b'cast message acknowledged by node
- *    @last_in: sequence # of last in-sequence b'cast message received from node
- *    @last_sent: sequence # of last b'cast message sent by node
- *    @oos_state: state tracker for handling OOS b'cast messages
- *    @deferred_size: number of OOS b'cast messages in deferred queue
- *    @deferred_head: oldest OOS b'cast message received from node
- *    @deferred_tail: newest OOS b'cast message received from node
- *    @reasm_head: broadcast reassembly queue head from node
- *    @reasm_tail: last broadcast fragment received from node
- *    @recv_permitted: true if node is allowed to receive b'cast messages
  */
 struct tipc_node {
 	u32 addr;
 	spinlock_t lock;
 	struct hlist_node hash;
-	struct list_head list;
-	struct list_head nsub;
 	struct tipc_link *active_links[2];
 	struct tipc_link *links[MAX_BEARERS];
+	unsigned int action_flags;
+	struct tipc_node_bclink bclink;
+	struct list_head list;
 	int link_cnt;
 	int working_links;
-	int block_setup;
 	u32 signature;
+	struct list_head nsub;
 	struct rcu_head rcu;
-	struct {
-		u32 acked;
-		u32 last_in;
-		u32 last_sent;
-		u32 oos_state;
-		u32 deferred_size;
-		struct sk_buff *deferred_head;
-		struct sk_buff *deferred_tail;
-		struct sk_buff *reasm_head;
-		struct sk_buff *reasm_tail;
-		bool recv_permitted;
-	} bclink;
 };
 
 extern struct list_head tipc_node_list;
@@ -118,15 +129,18 @@
 int tipc_node_is_up(struct tipc_node *n_ptr);
 struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
+int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len);
+void tipc_node_unlock(struct tipc_node *node);
 
-static inline void tipc_node_lock(struct tipc_node *n_ptr)
+static inline void tipc_node_lock(struct tipc_node *node)
 {
-	spin_lock_bh(&n_ptr->lock);
+	spin_lock_bh(&node->lock);
 }
 
-static inline void tipc_node_unlock(struct tipc_node *n_ptr)
+static inline bool tipc_node_blocked(struct tipc_node *node)
 {
-	spin_unlock_bh(&n_ptr->lock);
+	return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
+		TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
 }
 
 #endif

diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 8a7384c..7c59ab1 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c

@@ -81,14 +81,13 @@
  *
  * Note: node is locked by caller
  */
-void tipc_nodesub_notify(struct tipc_node *node)
+void tipc_nodesub_notify(struct list_head *nsub_list)
 {
-	struct tipc_node_subscr *ns;
+	struct tipc_node_subscr *ns, *safe;
 
-	list_for_each_entry(ns, &node->nsub, nodesub_list) {
+	list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) {
 		if (ns->handle_node_down) {
-			tipc_k_signal((Handler)ns->handle_node_down,
-				      (unsigned long)ns->usr_handle);
+			ns->handle_node_down(ns->usr_handle);
 			ns->handle_node_down = NULL;
 		}
 	}

diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
index c95d207..d91b8cc 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/node_subscr.h

@@ -58,6 +58,6 @@
 void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
 			    void *usr_handle, net_ev_handler handle_down);
 void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
-void tipc_nodesub_notify(struct tipc_node *node);
+void tipc_nodesub_notify(struct list_head *nsub_list);
 
 #endif

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5c14c78..5fd7acc 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c

@@ -165,7 +165,7 @@
 		msg_set_destnode(msg, tipc_own_addr);
 		if (dp->count == 1) {
 			msg_set_destport(msg, dp->ports[0]);
-			tipc_port_rcv(buf);
+			tipc_sk_rcv(buf);
 			tipc_port_list_free(dp);
 			return;
 		}
@@ -180,7 +180,7 @@
 			if ((index == 0) && (cnt != 0))
 				item = item->next;
 			msg_set_destport(buf_msg(b), item->ports[index]);
-			tipc_port_rcv(b);
+			tipc_sk_rcv(b);
 		}
 	}
 exit:
@@ -343,7 +343,7 @@
 	/* send returned message & dispose of rejected message */
 	src_node = msg_prevnode(msg);
 	if (in_own_node(src_node))
-		tipc_port_rcv(rbuf);
+		tipc_sk_rcv(rbuf);
 	else
 		tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg));
 exit:
@@ -754,37 +754,6 @@
 	return tipc_port_disconnect(ref);
 }
 
-/**
- * tipc_port_rcv - receive message from lower layer and deliver to port user
- */
-int tipc_port_rcv(struct sk_buff *buf)
-{
-	struct tipc_port *p_ptr;
-	struct tipc_msg *msg = buf_msg(buf);
-	u32 destport = msg_destport(msg);
-	u32 dsz = msg_data_sz(msg);
-	u32 err;
-
-	/* forward unresolved named message */
-	if (unlikely(!destport)) {
-		tipc_net_route_msg(buf);
-		return dsz;
-	}
-
-	/* validate destination & pass to port, otherwise reject message */
-	p_ptr = tipc_port_lock(destport);
-	if (likely(p_ptr)) {
-		err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf);
-		tipc_port_unlock(p_ptr);
-		if (likely(!err))
-			return dsz;
-	} else {
-		err = TIPC_ERR_NO_PORT;
-	}
-
-	return tipc_reject_msg(buf, err);
-}
-
 /*
  *  tipc_port_iovec_rcv: Concatenate and deliver sectioned
  *                       message for this node.
@@ -798,7 +767,7 @@
 
 	res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf);
 	if (likely(buf))
-		tipc_port_rcv(buf);
+		tipc_sk_rcv(buf);
 	return res;
 }
 

diff --git a/net/tipc/port.h b/net/tipc/port.h
index a003973..cf4ca5b 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h

@@ -42,9 +42,10 @@
 #include "msg.h"
 #include "node_subscr.h"
 
-#define TIPC_FLOW_CONTROL_WIN 512
-#define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
-				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
+#define TIPC_CONNACK_INTV         256
+#define TIPC_FLOWCTRL_WIN        (TIPC_CONNACK_INTV * 2)
+#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
+				  SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
 
 /**
  * struct tipc_port - TIPC port structure
@@ -134,7 +135,6 @@
 /*
  * TIPC messaging routines
  */
-int tipc_port_rcv(struct sk_buff *buf);
 
 int tipc_send(struct tipc_port *port,
 	      struct iovec const *msg_sect,
@@ -187,7 +187,7 @@
 
 static inline int tipc_port_congested(struct tipc_port *p_ptr)
 {
-	return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
+	return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN);
 }
 
 

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3c02569..ef04755 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c

@@ -36,6 +36,7 @@
 
 #include "core.h"
 #include "port.h"
+#include "node.h"
 
 #include <linux/export.h>
 
@@ -44,7 +45,7 @@
 
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
 
-static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
 static int tipc_release(struct socket *sock);
@@ -195,11 +196,12 @@
 	sock->state = state;
 
 	sock_init_data(sock, sk);
-	sk->sk_backlog_rcv = backlog_rcv;
+	sk->sk_backlog_rcv = tipc_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
-	tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
+	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+	atomic_set(&tsk->dupl_rcvcnt, 0);
 	tipc_port_unlock(port);
 
 	if (sock->state == SS_READY) {
@@ -983,10 +985,11 @@
 	return 0;
 }
 
-static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
+static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
 {
 	struct sock *sk = sock->sk;
 	DEFINE_WAIT(wait);
+	long timeo = *timeop;
 	int err;
 
 	for (;;) {
@@ -1011,6 +1014,7 @@
 			break;
 	}
 	finish_wait(sk_sleep(sk), &wait);
+	*timeop = timeo;
 	return err;
 }
 
@@ -1054,7 +1058,7 @@
 restart:
 
 	/* Look for a message in receive queue; wait if necessary */
-	res = tipc_wait_for_rcvmsg(sock, timeo);
+	res = tipc_wait_for_rcvmsg(sock, &timeo);
 	if (res)
 		goto exit;
 
@@ -1100,7 +1104,7 @@
 	/* Consume received message (optional) */
 	if (likely(!(flags & MSG_PEEK))) {
 		if ((sock->state != SS_READY) &&
-		    (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+		    (++port->conn_unacked >= TIPC_CONNACK_INTV))
 			tipc_acknowledge(port->ref, port->conn_unacked);
 		advance_rx_queue(sk);
 	}
@@ -1152,7 +1156,7 @@
 
 restart:
 	/* Look for a message in receive queue; wait if necessary */
-	res = tipc_wait_for_rcvmsg(sock, timeo);
+	res = tipc_wait_for_rcvmsg(sock, &timeo);
 	if (res)
 		goto exit;
 
@@ -1209,7 +1213,7 @@
 
 	/* Consume received message (optional) */
 	if (likely(!(flags & MSG_PEEK))) {
-		if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+		if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV))
 			tipc_acknowledge(port->ref, port->conn_unacked);
 		advance_rx_queue(sk);
 	}
@@ -1415,7 +1419,7 @@
 }
 
 /**
- * backlog_rcv - handle incoming message from backlog queue
+ * tipc_backlog_rcv - handle incoming message from backlog queue
  * @sk: socket
  * @buf: message
  *
@@ -1423,47 +1427,74 @@
  *
  * Returns 0
  */
-static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
+static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
 {
 	u32 res;
+	struct tipc_sock *tsk = tipc_sk(sk);
+	uint truesize = buf->truesize;
 
 	res = filter_rcv(sk, buf);
-	if (res)
+	if (unlikely(res))
 		tipc_reject_msg(buf, res);
+
+	if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
+		atomic_add(truesize, &tsk->dupl_rcvcnt);
+
 	return 0;
 }
 
 /**
  * tipc_sk_rcv - handle incoming message
- * @sk:  socket receiving message
- * @buf: message
- *
- * Called with port lock already taken.
- *
- * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ * @buf: buffer containing arriving message
+ * Consumes buffer
+ * Returns 0 if success, or errno: -EHOSTUNREACH
  */
-u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf)
+int tipc_sk_rcv(struct sk_buff *buf)
 {
-	u32 res;
+	struct tipc_sock *tsk;
+	struct tipc_port *port;
+	struct sock *sk;
+	u32 dport = msg_destport(buf_msg(buf));
+	int err = TIPC_OK;
+	uint limit;
 
-	/*
-	 * Process message if socket is unlocked; otherwise add to backlog queue
-	 *
-	 * This code is based on sk_receive_skb(), but must be distinct from it
-	 * since a TIPC-specific filter/reject mechanism is utilized
-	 */
-	bh_lock_sock(sk);
-	if (!sock_owned_by_user(sk)) {
-		res = filter_rcv(sk, buf);
-	} else {
-		if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
-			res = TIPC_ERR_OVERLOAD;
-		else
-			res = TIPC_OK;
+	/* Forward unresolved named message */
+	if (unlikely(!dport)) {
+		tipc_net_route_msg(buf);
+		return 0;
 	}
-	bh_unlock_sock(sk);
 
-	return res;
+	/* Validate destination */
+	port = tipc_port_lock(dport);
+	if (unlikely(!port)) {
+		err = TIPC_ERR_NO_PORT;
+		goto exit;
+	}
+
+	tsk = tipc_port_to_sock(port);
+	sk = &tsk->sk;
+
+	/* Queue message */
+	bh_lock_sock(sk);
+
+	if (!sock_owned_by_user(sk)) {
+		err = filter_rcv(sk, buf);
+	} else {
+		if (sk->sk_backlog.len == 0)
+			atomic_set(&tsk->dupl_rcvcnt, 0);
+		limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt);
+		if (sk_add_backlog(sk, buf, limit))
+			err = TIPC_ERR_OVERLOAD;
+	}
+
+	bh_unlock_sock(sk);
+	tipc_port_unlock(port);
+
+	if (likely(!err))
+		return 0;
+exit:
+	tipc_reject_msg(buf, err);
+	return -EHOSTUNREACH;
 }
 
 static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
@@ -1905,6 +1936,28 @@
 	return put_user(sizeof(value), ol);
 }
 
+int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
+{
+	struct tipc_sioc_ln_req lnr;
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case SIOCGETLINKNAME:
+		if (copy_from_user(&lnr, argp, sizeof(lnr)))
+			return -EFAULT;
+		if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer,
+					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
+			if (copy_to_user(argp, &lnr, sizeof(lnr)))
+				return -EFAULT;
+			return 0;
+		}
+		return -EADDRNOTAVAIL;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
 /* Protocol switches for the various types of TIPC sockets */
 
 static const struct proto_ops msg_ops = {
@@ -1917,7 +1970,7 @@
 	.accept		= sock_no_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= sock_no_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
@@ -1938,7 +1991,7 @@
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
@@ -1959,7 +2012,7 @@
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,

diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 74e5c7f..3afcd2a 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h

@@ -44,12 +44,14 @@
  * @port: port - interacts with 'sk' and with the rest of the TIPC stack
  * @peer_name: the peer of the connection, if any
  * @conn_timeout: the time we can wait for an unresponded setup request
+ * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  */
 
 struct tipc_sock {
 	struct sock sk;
 	struct tipc_port port;
 	unsigned int conn_timeout;
+	atomic_t dupl_rcvcnt;
 };
 
 static inline struct tipc_sock *tipc_sk(const struct sock *sk)
@@ -67,6 +69,6 @@
 	tsk->sk.sk_write_space(&tsk->sk);
 }
 
-u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf);
+int tipc_sk_rcv(struct sk_buff *buf);
 
 #endif

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 749f80c..e968843 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c

@@ -1492,10 +1492,14 @@
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 
-	if (len > SKB_MAX_ALLOC)
+	if (len > SKB_MAX_ALLOC) {
 		data_len = min_t(size_t,
 				 len - SKB_MAX_ALLOC,
 				 MAX_SKB_FRAGS * PAGE_SIZE);
+		data_len = PAGE_ALIGN(data_len);
+
+		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
+	}
 
 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
 				   msg->msg_flags & MSG_DONTWAIT, &err,
@@ -1670,6 +1674,8 @@
 
 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
 
+		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+
 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
 					   msg->msg_flags & MSG_DONTWAIT, &err,
 					   get_order(UNIX_SKB_FRAGS_SZ));

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 16d08b3..405f3c4 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig

@@ -95,6 +95,43 @@
 	  you are a wireless researcher and are working in a controlled
 	  and approved environment by your local regulatory agency.
 
+config CFG80211_REG_CELLULAR_HINTS
+	bool "cfg80211 regulatory support for cellular base station hints"
+	depends on CFG80211_CERTIFICATION_ONUS
+	---help---
+	  This option enables support for parsing regulatory hints
+	  from cellular base stations. If enabled and at least one driver
+	  claims support for parsing cellular base station hints the
+	  regulatory core will allow and parse these regulatory hints.
+	  The regulatory core will only apply these regulatory hints on
+	  drivers that support this feature. You should only enable this
+	  feature if you have tested and validated this feature on your
+	  systems.
+
+config CFG80211_REG_RELAX_NO_IR
+	bool "cfg80211 support for NO_IR relaxation"
+	depends on CFG80211_CERTIFICATION_ONUS
+	---help---
+	 This option enables support for relaxation of the NO_IR flag for
+	 situations that certain regulatory bodies have provided clarifications
+	 on how relaxation can occur. This feature has an inherent dependency on
+	 userspace features which must have been properly tested and as such is
+	 not enabled by default.
+
+	 A relaxation feature example is allowing the operation of a P2P group
+	 owner (GO) on channels marked with NO_IR if there is an additional BSS
+	 interface which associated to an AP which userspace assumes or confirms
+	 to be an authorized master, i.e., with radar detection support and DFS
+	 capabilities. However, note that in order to not create daisy chain
+	 scenarios, this relaxation is not allowed in cases that the BSS client
+	 is associated to P2P GO and in addition the P2P GO instantiated on
+	 a channel due to this relaxation should not allow connection from
+	 non P2P clients.
+
+	 The regulatory core will apply these relaxations only for drivers that
+	 support this feature by declaring the appropriate channel flags and
+	 capabilities in their registration flow.
+
 config CFG80211_DEFAULT_PS
 	bool "enable powersave by default"
 	depends on CFG80211

diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 3e02ade..bdad1f9 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c

@@ -6,8 +6,8 @@
 #include "rdev-ops.h"
 
 
-static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
-			      struct net_device *dev, bool notify)
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, bool notify)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 9c9501a..992b340 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c

@@ -326,28 +326,57 @@
 
 
 int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
-				  const struct cfg80211_chan_def *chandef)
+				  const struct cfg80211_chan_def *chandef,
+				  enum nl80211_iftype iftype)
 {
 	int width;
-	int r;
+	int ret;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
 		return -EINVAL;
 
-	width = cfg80211_chandef_get_width(chandef);
-	if (width < 0)
-		return -EINVAL;
+	switch (iftype) {
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_MESH_POINT:
+		width = cfg80211_chandef_get_width(chandef);
+		if (width < 0)
+			return -EINVAL;
 
-	r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1,
-					    width);
-	if (r)
-		return r;
+		ret = cfg80211_get_chans_dfs_required(wiphy,
+						      chandef->center_freq1,
+						      width);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			return BIT(chandef->width);
 
-	if (!chandef->center_freq2)
-		return 0;
+		if (!chandef->center_freq2)
+			return 0;
 
-	return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2,
-					       width);
+		ret = cfg80211_get_chans_dfs_required(wiphy,
+						      chandef->center_freq2,
+						      width);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			return BIT(chandef->width);
+
+		break;
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_P2P_DEVICE:
+		break;
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case NUM_NL80211_IFTYPES:
+		WARN_ON(1);
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(cfg80211_chandef_dfs_required);
 
@@ -587,12 +616,14 @@
 		width = 5;
 		break;
 	case NL80211_CHAN_WIDTH_10:
+		prohibited_flags |= IEEE80211_CHAN_NO_10MHZ;
 		width = 10;
 		break;
 	case NL80211_CHAN_WIDTH_20:
 		if (!ht_cap->ht_supported)
 			return false;
 	case NL80211_CHAN_WIDTH_20_NOHT:
+		prohibited_flags |= IEEE80211_CHAN_NO_20MHZ;
 		width = 20;
 		break;
 	case NL80211_CHAN_WIDTH_40:
@@ -661,17 +692,111 @@
 }
 EXPORT_SYMBOL(cfg80211_chandef_usable);
 
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef)
+/*
+ * For GO only, check if the channel can be used under permissive conditions
+ * mandated by the some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * associated to an AP on the same channel or on the same UNII band
+ * (assuming that the AP is an authorized master).
+ * In addition allow the GO to operate on a channel on which indoor operation is
+ * allowed, iff we are currently operating in an indoor environment.
+ */
+static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+					struct ieee80211_channel *chan)
 {
+	struct wireless_dev *wdev_iter;
+	struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+
+	ASSERT_RTNL();
+
+	if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) ||
+	    !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
+		return false;
+
+	if (regulatory_indoor_allowed() &&
+	    (chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
+		return true;
+
+	if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+		return false;
+
+	/*
+	 * Generally, it is possible to rely on another device/driver to allow
+	 * the GO concurrent relaxation, however, since the device can further
+	 * enforce the relaxation (by doing a similar verifications as this),
+	 * and thus fail the GO instantiation, consider only the interfaces of
+	 * the current registered device.
+	 */
+	list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
+		struct ieee80211_channel *other_chan = NULL;
+		int r1, r2;
+
+		if (wdev_iter->iftype != NL80211_IFTYPE_STATION ||
+		    !netif_running(wdev_iter->netdev))
+			continue;
+
+		wdev_lock(wdev_iter);
+		if (wdev_iter->current_bss)
+			other_chan = wdev_iter->current_bss->pub.channel;
+		wdev_unlock(wdev_iter);
+
+		if (!other_chan)
+			continue;
+
+		if (chan == other_chan)
+			return true;
+
+		if (chan->band != IEEE80211_BAND_5GHZ)
+			continue;
+
+		r1 = cfg80211_get_unii(chan->center_freq);
+		r2 = cfg80211_get_unii(other_chan->center_freq);
+
+		if (r1 != -EINVAL && r1 == r2) {
+			/*
+			 * At some locations channels 149-165 are considered a
+			 * bundle, but at other locations, e.g., Indonesia,
+			 * channels 149-161 are considered a bundle while
+			 * channel 165 is left out and considered to be in a
+			 * different bundle. Thus, in case that there is a
+			 * station interface connected to an AP on channel 165,
+			 * it is assumed that channels 149-161 are allowed for
+			 * GO operations. However, having a station interface
+			 * connected to an AP on channels 149-161, does not
+			 * allow GO operation on channel 165.
+			 */
+			if (chan->center_freq == 5825 &&
+			    other_chan->center_freq != 5825)
+				continue;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
+			     struct cfg80211_chan_def *chandef,
+			     enum nl80211_iftype iftype)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	bool res;
 	u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
-			       IEEE80211_CHAN_NO_IR |
 			       IEEE80211_CHAN_RADAR;
 
-	trace_cfg80211_reg_can_beacon(wiphy, chandef);
+	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
 
-	if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 &&
+	/*
+	 * Under certain conditions suggested by the some regulatory bodies
+	 * a GO can operate on channels marked with IEEE80211_NO_IR
+	 * so set this flag only if such relaxations are not enabled and
+	 * the conditions are not met.
+	 */
+	if (iftype != NL80211_IFTYPE_P2P_GO ||
+	    !cfg80211_go_permissive_chan(rdev, chandef->chan))
+		prohibited_flags |= IEEE80211_CHAN_NO_IR;
+
+	if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
 	    cfg80211_chandef_dfs_available(wiphy, chandef)) {
 		/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
 		prohibited_flags = IEEE80211_CHAN_DISABLED;
@@ -701,6 +826,8 @@
 		        enum cfg80211_chan_mode *chanmode,
 		        u8 *radar_detect)
 {
+	int ret;
+
 	*chan = NULL;
 	*chanmode = CHAN_MODE_UNDEFINED;
 
@@ -743,8 +870,11 @@
 			*chan = wdev->chandef.chan;
 			*chanmode = CHAN_MODE_SHARED;
 
-			if (cfg80211_chandef_dfs_required(wdev->wiphy,
-							  &wdev->chandef))
+			ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+							    &wdev->chandef,
+							    wdev->iftype);
+			WARN_ON(ret < 0);
+			if (ret > 0)
 				*radar_detect |= BIT(wdev->chandef.width);
 		}
 		return;
@@ -753,8 +883,11 @@
 			*chan = wdev->chandef.chan;
 			*chanmode = CHAN_MODE_SHARED;
 
-			if (cfg80211_chandef_dfs_required(wdev->wiphy,
-							  &wdev->chandef))
+			ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+							    &wdev->chandef,
+							    wdev->iftype);
+			WARN_ON(ret < 0);
+			if (ret > 0)
 				*radar_detect |= BIT(wdev->chandef.width);
 		}
 		return;

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 086cddd..a1c4065 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c

@@ -69,7 +69,7 @@
 
 int get_wiphy_idx(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	return rdev->wiphy_idx;
 }
@@ -130,7 +130,7 @@
 			    newname))
 		pr_err("failed to rename debugfs dir to %s!\n", newname);
 
-	nl80211_notify_dev_rename(rdev);
+	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
 
 	return 0;
 }
@@ -210,15 +210,12 @@
 	}
 }
 
-static int cfg80211_rfkill_set_block(void *data, bool blocked)
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = data;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct wireless_dev *wdev;
 
-	if (!blocked)
-		return 0;
-
-	rtnl_lock();
+	ASSERT_RTNL();
 
 	list_for_each_entry(wdev, &rdev->wdev_list, list) {
 		if (wdev->netdev) {
@@ -234,7 +231,18 @@
 			break;
 		}
 	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);
 
+static int cfg80211_rfkill_set_block(void *data, bool blocked)
+{
+	struct cfg80211_registered_device *rdev = data;
+
+	if (!blocked)
+		return 0;
+
+	rtnl_lock();
+	cfg80211_shutdown_all_interfaces(&rdev->wiphy);
 	rtnl_unlock();
 
 	return 0;
@@ -260,6 +268,45 @@
 	rtnl_unlock();
 }
 
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
+{
+	struct cfg80211_iface_destroy *item;
+
+	ASSERT_RTNL();
+
+	spin_lock_irq(&rdev->destroy_list_lock);
+	while ((item = list_first_entry_or_null(&rdev->destroy_list,
+						struct cfg80211_iface_destroy,
+						list))) {
+		struct wireless_dev *wdev, *tmp;
+		u32 nlportid = item->nlportid;
+
+		list_del(&item->list);
+		kfree(item);
+		spin_unlock_irq(&rdev->destroy_list_lock);
+
+		list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) {
+			if (nlportid == wdev->owner_nlportid)
+				rdev_del_virtual_intf(rdev, wdev);
+		}
+
+		spin_lock_irq(&rdev->destroy_list_lock);
+	}
+	spin_unlock_irq(&rdev->destroy_list_lock);
+}
+
+static void cfg80211_destroy_iface_wk(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			    destroy_work);
+
+	rtnl_lock();
+	cfg80211_destroy_ifaces(rdev);
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
@@ -318,6 +365,10 @@
 	rdev->wiphy.dev.class = &ieee80211_class;
 	rdev->wiphy.dev.platform_data = rdev;
 
+	INIT_LIST_HEAD(&rdev->destroy_list);
+	spin_lock_init(&rdev->destroy_list_lock);
+	INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+
 #ifdef CONFIG_CFG80211_DEFAULT_PS
 	rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
 #endif
@@ -351,6 +402,8 @@
 	rdev->wiphy.rts_threshold = (u32) -1;
 	rdev->wiphy.coverage_class = 0;
 
+	rdev->wiphy.max_num_csa_counters = 1;
+
 	return &rdev->wiphy;
 }
 EXPORT_SYMBOL(wiphy_new);
@@ -396,10 +449,7 @@
 		for (j = 0; j < c->n_limits; j++) {
 			u16 types = c->limits[j].types;
 
-			/*
-			 * interface types shouldn't overlap, this is
-			 * used in cfg80211_can_change_interface()
-			 */
+			/* interface types shouldn't overlap */
 			if (WARN_ON(types & all_iftypes))
 				return -EINVAL;
 			all_iftypes |= types;
@@ -435,7 +485,7 @@
 
 int wiphy_register(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	int res;
 	enum ieee80211_band band;
 	struct ieee80211_supported_band *sband;
@@ -610,13 +660,15 @@
 		return res;
 	}
 
+	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+
 	return 0;
 }
 EXPORT_SYMBOL(wiphy_register);
 
 void wiphy_rfkill_start_polling(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	if (!rdev->ops->rfkill_poll)
 		return;
@@ -627,7 +679,7 @@
 
 void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	rfkill_pause_polling(rdev->rfkill);
 }
@@ -635,7 +687,7 @@
 
 void wiphy_unregister(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	wait_event(rdev->dev_wait, ({
 		int __count;
@@ -648,9 +700,10 @@
 		rfkill_unregister(rdev->rfkill);
 
 	rtnl_lock();
+	nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
 	rdev->wiphy.registered = false;
 
-	BUG_ON(!list_empty(&rdev->wdev_list));
+	WARN_ON(!list_empty(&rdev->wdev_list));
 
 	/*
 	 * First remove the hardware from everywhere, this makes
@@ -675,6 +728,7 @@
 	cancel_work_sync(&rdev->conn_work);
 	flush_work(&rdev->event_work);
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
+	flush_work(&rdev->destroy_work);
 
 #ifdef CONFIG_PM
 	if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -707,7 +761,7 @@
 
 void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	if (rfkill_set_hw_state(rdev->rfkill, blocked))
 		schedule_work(&rdev->rfkill_sync);
@@ -716,7 +770,7 @@
 
 void cfg80211_unregister_wdev(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	ASSERT_RTNL();
 
@@ -751,23 +805,23 @@
 		rdev->num_running_monitor_ifaces += num;
 }
 
-void cfg80211_leave(struct cfg80211_registered_device *rdev,
-		    struct wireless_dev *wdev)
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev)
 {
 	struct net_device *dev = wdev->netdev;
 
 	ASSERT_RTNL();
+	ASSERT_WDEV_LOCK(wdev);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
-		cfg80211_leave_ibss(rdev, dev, true);
+		__cfg80211_leave_ibss(rdev, dev, true);
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
 			__cfg80211_stop_sched_scan(rdev, false);
 
-		wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 		kfree(wdev->wext.ie);
 		wdev->wext.ie = NULL;
@@ -776,32 +830,60 @@
 #endif
 		cfg80211_disconnect(rdev, dev,
 				    WLAN_REASON_DEAUTH_LEAVING, true);
-		wdev_unlock(wdev);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		cfg80211_leave_mesh(rdev, dev);
+		__cfg80211_leave_mesh(rdev, dev);
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		cfg80211_stop_ap(rdev, dev, true);
+		__cfg80211_stop_ap(rdev, dev, true);
 		break;
 	default:
 		break;
 	}
 }
 
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+		    struct wireless_dev *wdev)
+{
+	wdev_lock(wdev);
+	__cfg80211_leave(rdev, wdev);
+	wdev_unlock(wdev);
+}
+
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+			 gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	trace_cfg80211_stop_iface(wiphy, wdev);
+
+	ev = kzalloc(sizeof(*ev), gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_STOPPED;
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_stop_iface);
+
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev;
-	int ret;
 
 	if (!wdev)
 		return NOTIFY_DONE;
 
-	rdev = wiphy_to_dev(wdev->wiphy);
+	rdev = wiphy_to_rdev(wdev->wiphy);
 
 	WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED);
 
@@ -959,13 +1041,14 @@
 	case NETDEV_PRE_UP:
 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
 			return notifier_from_errno(-EOPNOTSUPP);
-		ret = cfg80211_can_add_interface(rdev, wdev->iftype);
-		if (ret)
-			return notifier_from_errno(ret);
+		if (rfkill_blocked(rdev->rfkill))
+			return notifier_from_errno(-ERFKILL);
 		break;
+	default:
+		return NOTIFY_DONE;
 	}
 
-	return NOTIFY_DONE;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block cfg80211_netdev_notifier = {

diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5b1fdca..e9afbf1 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h

@@ -80,13 +80,17 @@
 
 	struct cfg80211_coalesce *coalesce;
 
+	spinlock_t destroy_list_lock;
+	struct list_head destroy_list;
+	struct work_struct destroy_work;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
 };
 
 static inline
-struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy)
 {
 	BUG_ON(!wiphy);
 	return container_of(wiphy, struct cfg80211_registered_device, wiphy);
@@ -181,6 +185,7 @@
 	EVENT_ROAMED,
 	EVENT_DISCONNECTED,
 	EVENT_IBSS_JOINED,
+	EVENT_STOPPED,
 };
 
 struct cfg80211_event {
@@ -232,6 +237,13 @@
 	u32 nlportid;
 };
 
+struct cfg80211_iface_destroy {
+	struct list_head list;
+	u32 nlportid;
+};
+
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
+
 /* free object */
 void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
 
@@ -240,8 +252,8 @@
 
 void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
 
-void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
-void cfg80211_bss_age(struct cfg80211_registered_device *dev,
+void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
+void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
                       unsigned long age_secs);
 
 /* IBSS */
@@ -270,6 +282,8 @@
 		       struct net_device *dev,
 		       struct mesh_setup *setup,
 		       const struct mesh_config *conf);
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev);
 int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
@@ -277,6 +291,8 @@
 			      struct cfg80211_chan_def *chandef);
 
 /* AP */
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, bool notify);
 int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev, bool notify);
 
@@ -401,35 +417,6 @@
 cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
 			      const struct cfg80211_chan_def *chandef);
 
-static inline int
-cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
-			      struct wireless_dev *wdev,
-			      enum nl80211_iftype iftype)
-{
-	return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL,
-					    CHAN_MODE_UNDEFINED, 0);
-}
-
-static inline int
-cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
-			   enum nl80211_iftype iftype)
-{
-	if (rfkill_blocked(rdev->rfkill))
-		return -ERFKILL;
-
-	return cfg80211_can_change_interface(rdev, NULL, iftype);
-}
-
-static inline int
-cfg80211_can_use_chan(struct cfg80211_registered_device *rdev,
-		      struct wireless_dev *wdev,
-		      struct ieee80211_channel *chan,
-		      enum cfg80211_chan_mode chanmode)
-{
-	return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					    chan, chanmode, 0);
-}
-
 static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
 {
 	unsigned long end = jiffies;
@@ -459,6 +446,8 @@
 void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype, int num);
 
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev);
 void cfg80211_leave(struct cfg80211_registered_device *rdev,
 		    struct wireless_dev *wdev);
 

diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e37862f..d4860bf 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c

@@ -43,7 +43,7 @@
 				   struct ethtool_ringparam *rp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	memset(rp, 0, sizeof(*rp));
 
@@ -56,7 +56,7 @@
 				  struct ethtool_ringparam *rp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
 		return -EINVAL;
@@ -70,7 +70,7 @@
 static int cfg80211_get_sset_count(struct net_device *dev, int sset)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	if (rdev->ops->get_et_sset_count)
 		return rdev_get_et_sset_count(rdev, dev, sset);
 	return -EOPNOTSUPP;
@@ -80,7 +80,7 @@
 			       struct ethtool_stats *stats, u64 *data)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	if (rdev->ops->get_et_stats)
 		rdev_get_et_stats(rdev, dev, stats, data);
 }
@@ -88,7 +88,7 @@
 static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	if (rdev->ops->get_et_strings)
 		rdev_get_et_strings(rdev, dev, sset, data);
 }

diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index b35da8d..40c37fc 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk

@@ -68,17 +68,7 @@
 	sub(/,/, "", units)
 	dfs_cac = $9
 	if (units == "mW") {
-		if (power == 100) {
-			power = 20
-		} else if (power == 200) {
-			power = 23
-		} else if (power == 500) {
-			power = 27
-		} else if (power == 1000) {
-			power = 30
-		} else {
-			print "Unknown power value in database!"
-		}
+		power = 10 * log(power)/log(10)
 	} else {
 		dfs_cac = $8
 	}
@@ -117,7 +107,7 @@
 
 	}
 	flags = flags "0"
-	printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
+	printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
 	rules++
 }
 

diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a6b5bda..8f345da 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c

@@ -45,7 +45,7 @@
 
 	cfg80211_upload_connect_keys(wdev);
 
-	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
+	nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
 				GFP_KERNEL);
 #ifdef CONFIG_CFG80211_WEXT
 	memset(&wrqu, 0, sizeof(wrqu));
@@ -58,7 +58,7 @@
 			  struct ieee80211_channel *channel, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_event *ev;
 	unsigned long flags;
 
@@ -88,8 +88,6 @@
 				struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *check_chan;
-	u8 radar_detect_width = 0;
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
@@ -126,28 +124,6 @@
 #ifdef CONFIG_CFG80211_WEXT
 	wdev->wext.ibss.chandef = params->chandef;
 #endif
-	check_chan = params->chandef.chan;
-	if (params->userspace_handles_dfs) {
-		/* Check for radar even if the current channel is not
-		 * a radar channel - it might decide to change to DFS
-		 * channel later.
-		 */
-		radar_detect_width = BIT(params->chandef.width);
-	}
-
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   check_chan,
-					   (params->channel_fixed &&
-					    !radar_detect_width)
-					   ? CHAN_MODE_SHARED
-					   : CHAN_MODE_EXCLUSIVE,
-					   radar_detect_width);
-
-	if (err) {
-		wdev->connect_keys = NULL;
-		return err;
-	}
-
 	err = rdev_join_ibss(rdev, dev, params);
 	if (err) {
 		wdev->connect_keys = NULL;
@@ -180,7 +156,7 @@
 static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int i;
 
 	ASSERT_WDEV_LOCK(wdev);
@@ -335,7 +311,7 @@
 			       struct iw_freq *wextfreq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_channel *chan = NULL;
 	int err, freq;
 
@@ -346,7 +322,7 @@
 	if (!rdev->ops->join_ibss)
 		return -EOPNOTSUPP;
 
-	freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+	freq = cfg80211_wext_freq(wextfreq);
 	if (freq < 0)
 		return freq;
 
@@ -420,7 +396,7 @@
 				struct iw_point *data, char *ssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	size_t len = data->length;
 	int err;
 
@@ -444,8 +420,8 @@
 	if (len > 0 && ssid[len - 1] == '\0')
 		len--;
 
+	memcpy(wdev->ssid, ssid, len);
 	wdev->wext.ibss.ssid = wdev->ssid;
-	memcpy(wdev->wext.ibss.ssid, ssid, len);
 	wdev->wext.ibss.ssid_len = len;
 
 	wdev_lock(wdev);
@@ -487,7 +463,7 @@
 			     struct sockaddr *ap_addr, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u8 *bssid = ap_addr->sa_data;
 	int err;
 
@@ -505,6 +481,9 @@
 	if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
 		bssid = NULL;
 
+	if (bssid && !is_valid_ether_addr(bssid))
+		return -EINVAL;
+
 	/* both automatic */
 	if (!bssid && !wdev->wext.ibss.bssid)
 		return 0;

diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 5af5cc6..092300b 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c

@@ -99,7 +99,6 @@
 			 const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	u8 radar_detect_width = 0;
 	int err;
 
 	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
@@ -175,22 +174,10 @@
 							       scan_width);
 	}
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef,
+				     NL80211_IFTYPE_MESH_POINT))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef);
-	if (err < 0)
-		return err;
-	if (err)
-		radar_detect_width = BIT(setup->chandef.width);
-
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   setup->chandef.chan,
-					   CHAN_MODE_SHARED,
-					   radar_detect_width);
-	if (err)
-		return err;
-
 	err = rdev_join_mesh(rdev, dev, conf, setup);
 	if (!err) {
 		memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
@@ -236,17 +223,6 @@
 		if (!netif_running(wdev->netdev))
 			return -ENETDOWN;
 
-		/* cfg80211_can_use_chan() calls
-		 * cfg80211_can_use_iftype_chan() with no radar
-		 * detection, so if we're trying to use a radar
-		 * channel here, something is wrong.
-		 */
-		WARN_ON_ONCE(chandef->chan->flags & IEEE80211_CHAN_RADAR);
-		err = cfg80211_can_use_chan(rdev, wdev, chandef->chan,
-					    CHAN_MODE_SHARED);
-		if (err)
-			return err;
-
 		err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev,
 						     chandef->chan);
 		if (!err)
@@ -262,8 +238,8 @@
 	return 0;
 }
 
-static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
-				 struct net_device *dev)
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index c52ff59..266766b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c

@@ -23,7 +23,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	u8 *ie = mgmt->u.assoc_resp.variable;
 	int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
@@ -54,7 +54,7 @@
 static void cfg80211_process_auth(struct wireless_dev *wdev,
 				  const u8 *buf, size_t len)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL);
 	cfg80211_sme_rx_auth(wdev, buf, len);
@@ -63,7 +63,7 @@
 static void cfg80211_process_deauth(struct wireless_dev *wdev,
 				    const u8 *buf, size_t len)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	const u8 *bssid = mgmt->bssid;
 	u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
@@ -82,7 +82,7 @@
 static void cfg80211_process_disassoc(struct wireless_dev *wdev,
 				      const u8 *buf, size_t len)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	const u8 *bssid = mgmt->bssid;
 	u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
@@ -123,7 +123,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_send_auth_timeout(dev, addr);
 
@@ -136,7 +136,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_send_assoc_timeout(dev, bss->bssid);
 
@@ -172,7 +172,7 @@
 				  const u8 *tsc, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 #ifdef CONFIG_CFG80211_WEXT
 	union iwreq_data wrqu;
 	char *buf = kmalloc(128, gfp);
@@ -233,14 +233,8 @@
 	if (!req.bss)
 		return -ENOENT;
 
-	err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel,
-				    CHAN_MODE_SHARED);
-	if (err)
-		goto out;
-
 	err = rdev_auth(rdev, dev, &req);
 
-out:
 	cfg80211_put_bss(&rdev->wiphy, req.bss);
 	return err;
 }
@@ -306,16 +300,10 @@
 	if (!req->bss)
 		return -ENOENT;
 
-	err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
-	if (err)
-		goto out;
-
 	err = rdev_assoc(rdev, dev, req);
 	if (!err)
 		cfg80211_hold_bss(bss_from_pub(req->bss));
-
-out:
-	if (err)
+	else
 		cfg80211_put_bss(&rdev->wiphy, req->bss);
 
 	return err;
@@ -414,7 +402,7 @@
 				int match_len)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_mgmt_registration *reg, *nreg;
 	int err = 0;
 	u16 mgmt_type;
@@ -473,7 +461,7 @@
 void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_mgmt_registration *reg, *tmp;
 
 	spin_lock_bh(&wdev->mgmt_registrations_lock);
@@ -620,7 +608,7 @@
 		      const u8 *buf, size_t len, u32 flags, gfp_t gfp)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_mgmt_registration *reg;
 	const struct ieee80211_txrx_stypes *stypes =
 		&wiphy->mgmt_stypes[wdev->iftype];
@@ -739,7 +727,7 @@
 			  struct cfg80211_chan_def *chandef,
 			  gfp_t gfp)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	unsigned long timeout;
 
 	trace_cfg80211_radar_event(wiphy, chandef);
@@ -764,7 +752,7 @@
 {
 	struct wireless_dev *wdev = netdev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	unsigned long timeout;
 
 	trace_cfg80211_cac_event(netdev, event);

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 052c1bf..ba4f172 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -168,8 +168,8 @@
 		netdev = __dev_get_by_index(netns, ifindex);
 		if (netdev) {
 			if (netdev->ieee80211_ptr)
-				tmp = wiphy_to_dev(
-						netdev->ieee80211_ptr->wiphy);
+				tmp = wiphy_to_rdev(
+					netdev->ieee80211_ptr->wiphy);
 			else
 				tmp = NULL;
 
@@ -371,8 +371,8 @@
 	[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
 	[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
-	[NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
-	[NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
+	[NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
+	[NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
 	[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
 	[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
 	[NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG },
@@ -385,6 +385,8 @@
 	[NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
+	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
+	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 };
 
 /* policy for the key attributes */
@@ -484,7 +486,7 @@
 			err = PTR_ERR(*wdev);
 			goto out_unlock;
 		}
-		*rdev = wiphy_to_dev((*wdev)->wiphy);
+		*rdev = wiphy_to_rdev((*wdev)->wiphy);
 		/* 0 is the first index - add 1 to parse only once */
 		cb->args[0] = (*rdev)->wiphy_idx + 1;
 		cb->args[1] = (*wdev)->identifier;
@@ -497,7 +499,7 @@
 			err = -ENODEV;
 			goto out_unlock;
 		}
-		*rdev = wiphy_to_dev(wiphy);
+		*rdev = wiphy_to_rdev(wiphy);
 		*wdev = NULL;
 
 		list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
@@ -566,6 +568,13 @@
 				   struct ieee80211_channel *chan,
 				   bool large)
 {
+	/* Some channels must be completely excluded from the
+	 * list to protect old user-space tools from breaking
+	 */
+	if (!large && chan->flags &
+	    (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ))
+		return 0;
+
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
 			chan->center_freq))
 		goto nla_put_failure;
@@ -613,6 +622,18 @@
 		if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
 			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ))
+			goto nla_put_failure;
 	}
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -950,8 +971,10 @@
 				c->max_interfaces))
 			goto nla_put_failure;
 		if (large &&
-		    nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
-				c->radar_detect_widths))
+		    (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+				c->radar_detect_widths) ||
+		     nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+				c->radar_detect_regions)))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_combi);
@@ -1006,42 +1029,42 @@
 }
 
 static int nl80211_send_wowlan(struct sk_buff *msg,
-			       struct cfg80211_registered_device *dev,
+			       struct cfg80211_registered_device *rdev,
 			       bool large)
 {
 	struct nlattr *nl_wowlan;
 
-	if (!dev->wiphy.wowlan)
+	if (!rdev->wiphy.wowlan)
 		return 0;
 
 	nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
 	if (!nl_wowlan)
 		return -ENOBUFS;
 
-	if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) &&
+	if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
-	    ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
+	    ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
 	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
 		return -ENOBUFS;
 
-	if (dev->wiphy.wowlan->n_patterns) {
+	if (rdev->wiphy.wowlan->n_patterns) {
 		struct nl80211_pattern_support pat = {
-			.max_patterns = dev->wiphy.wowlan->n_patterns,
-			.min_pattern_len = dev->wiphy.wowlan->pattern_min_len,
-			.max_pattern_len = dev->wiphy.wowlan->pattern_max_len,
-			.max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset,
+			.max_patterns = rdev->wiphy.wowlan->n_patterns,
+			.min_pattern_len = rdev->wiphy.wowlan->pattern_min_len,
+			.max_pattern_len = rdev->wiphy.wowlan->pattern_max_len,
+			.max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset,
 		};
 
 		if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
@@ -1049,7 +1072,7 @@
 			return -ENOBUFS;
 	}
 
-	if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
+	if (large && nl80211_send_wowlan_tcp_caps(rdev, msg))
 		return -ENOBUFS;
 
 	nla_nest_end(msg, nl_wowlan);
@@ -1059,19 +1082,19 @@
 #endif
 
 static int nl80211_send_coalesce(struct sk_buff *msg,
-				 struct cfg80211_registered_device *dev)
+				 struct cfg80211_registered_device *rdev)
 {
 	struct nl80211_coalesce_rule_support rule;
 
-	if (!dev->wiphy.coalesce)
+	if (!rdev->wiphy.coalesce)
 		return 0;
 
-	rule.max_rules = dev->wiphy.coalesce->n_rules;
-	rule.max_delay = dev->wiphy.coalesce->max_delay;
-	rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns;
-	rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len;
-	rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len;
-	rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset;
+	rule.max_rules = rdev->wiphy.coalesce->n_rules;
+	rule.max_delay = rdev->wiphy.coalesce->max_delay;
+	rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns;
+	rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len;
+	rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len;
+	rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset;
 
 	if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule))
 		return -ENOBUFS;
@@ -1202,7 +1225,8 @@
 	bool split;
 };
 
-static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
+static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
+			      enum nl80211_commands cmd,
 			      struct sk_buff *msg, u32 portid, u32 seq,
 			      int flags, struct nl80211_dump_wiphy_state *state)
 {
@@ -1214,63 +1238,66 @@
 	struct ieee80211_channel *chan;
 	int i;
 	const struct ieee80211_txrx_stypes *mgmt_stypes =
-				dev->wiphy.mgmt_stypes;
+				rdev->wiphy.mgmt_stypes;
 	u32 features;
 
-	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
 		return -ENOBUFS;
 
 	if (WARN_ON(!state))
 		return -EINVAL;
 
-	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
-			   wiphy_name(&dev->wiphy)) ||
+			   wiphy_name(&rdev->wiphy)) ||
 	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
 			cfg80211_rdev_list_generation))
 		goto nla_put_failure;
 
+	if (cmd != NL80211_CMD_NEW_WIPHY)
+		goto finish;
+
 	switch (state->split_start) {
 	case 0:
 		if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
-			       dev->wiphy.retry_short) ||
+			       rdev->wiphy.retry_short) ||
 		    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
-			       dev->wiphy.retry_long) ||
+			       rdev->wiphy.retry_long) ||
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
-				dev->wiphy.frag_threshold) ||
+				rdev->wiphy.frag_threshold) ||
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
-				dev->wiphy.rts_threshold) ||
+				rdev->wiphy.rts_threshold) ||
 		    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
-			       dev->wiphy.coverage_class) ||
+			       rdev->wiphy.coverage_class) ||
 		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
-			       dev->wiphy.max_scan_ssids) ||
+			       rdev->wiphy.max_scan_ssids) ||
 		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
-			       dev->wiphy.max_sched_scan_ssids) ||
+			       rdev->wiphy.max_sched_scan_ssids) ||
 		    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
-				dev->wiphy.max_scan_ie_len) ||
+				rdev->wiphy.max_scan_ie_len) ||
 		    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
-				dev->wiphy.max_sched_scan_ie_len) ||
+				rdev->wiphy.max_sched_scan_ie_len) ||
 		    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
-			       dev->wiphy.max_match_sets))
+			       rdev->wiphy.max_match_sets))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
 		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
 			goto nla_put_failure;
-		if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
 		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
 			goto nla_put_failure;
-		if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
 		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
 			goto nla_put_failure;
-		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
 		    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
 			goto nla_put_failure;
-		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
 		    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
 			goto nla_put_failure;
-		if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
 		    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
 			goto nla_put_failure;
 		state->split_start++;
@@ -1278,35 +1305,35 @@
 			break;
 	case 1:
 		if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
-			    sizeof(u32) * dev->wiphy.n_cipher_suites,
-			    dev->wiphy.cipher_suites))
+			    sizeof(u32) * rdev->wiphy.n_cipher_suites,
+			    rdev->wiphy.cipher_suites))
 			goto nla_put_failure;
 
 		if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
-			       dev->wiphy.max_num_pmkids))
+			       rdev->wiphy.max_num_pmkids))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
 		    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
 			goto nla_put_failure;
 
 		if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
-				dev->wiphy.available_antennas_tx) ||
+				rdev->wiphy.available_antennas_tx) ||
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
-				dev->wiphy.available_antennas_rx))
+				rdev->wiphy.available_antennas_rx))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
 		    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
-				dev->wiphy.probe_resp_offload))
+				rdev->wiphy.probe_resp_offload))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.available_antennas_tx ||
-		     dev->wiphy.available_antennas_rx) &&
-		    dev->ops->get_antenna) {
+		if ((rdev->wiphy.available_antennas_tx ||
+		     rdev->wiphy.available_antennas_rx) &&
+		    rdev->ops->get_antenna) {
 			u32 tx_ant = 0, rx_ant = 0;
 			int res;
-			res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
+			res = rdev_get_antenna(rdev, &tx_ant, &rx_ant);
 			if (!res) {
 				if (nla_put_u32(msg,
 						NL80211_ATTR_WIPHY_ANTENNA_TX,
@@ -1323,7 +1350,7 @@
 			break;
 	case 2:
 		if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
-					dev->wiphy.interface_modes))
+					rdev->wiphy.interface_modes))
 				goto nla_put_failure;
 		state->split_start++;
 		if (state->split)
@@ -1337,7 +1364,7 @@
 		     band < IEEE80211_NUM_BANDS; band++) {
 			struct ieee80211_supported_band *sband;
 
-			sband = dev->wiphy.bands[band];
+			sband = rdev->wiphy.bands[band];
 
 			if (!sband)
 				continue;
@@ -1414,7 +1441,7 @@
 		i = 0;
 #define CMD(op, n)							\
 		 do {							\
-			if (dev->ops->op) {				\
+			if (rdev->ops->op) {				\
 				i++;					\
 				if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
 					goto nla_put_failure;		\
@@ -1438,32 +1465,32 @@
 		CMD(set_pmksa, SET_PMKSA);
 		CMD(del_pmksa, DEL_PMKSA);
 		CMD(flush_pmksa, FLUSH_PMKSA);
-		if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+		if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
 			CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 		CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 		CMD(mgmt_tx, FRAME);
 		CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
-		if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+		if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 			i++;
 			if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
 				goto nla_put_failure;
 		}
-		if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
-		    dev->ops->join_mesh) {
+		if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
+		    rdev->ops->join_mesh) {
 			i++;
 			if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
 				goto nla_put_failure;
 		}
 		CMD(set_wds_peer, SET_WDS_PEER);
-		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+		if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
 			CMD(tdls_mgmt, TDLS_MGMT);
 			CMD(tdls_oper, TDLS_OPER);
 		}
-		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+		if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 			CMD(sched_scan_start, START_SCHED_SCAN);
 		CMD(probe_client, PROBE_CLIENT);
 		CMD(set_noack_map, SET_NOACK_MAP);
-		if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+		if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
 			i++;
 			if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
 				goto nla_put_failure;
@@ -1473,7 +1500,7 @@
 		if (state->split) {
 			CMD(crit_proto_start, CRIT_PROTOCOL_START);
 			CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
-			if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
+			if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
 				CMD(channel_switch, CHANNEL_SWITCH);
 		}
 		CMD(set_qos_map, SET_QOS_MAP);
@@ -1484,13 +1511,13 @@
 
 #undef CMD
 
-		if (dev->ops->connect || dev->ops->auth) {
+		if (rdev->ops->connect || rdev->ops->auth) {
 			i++;
 			if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
 				goto nla_put_failure;
 		}
 
-		if (dev->ops->disconnect || dev->ops->deauth) {
+		if (rdev->ops->disconnect || rdev->ops->deauth) {
 			i++;
 			if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
 				goto nla_put_failure;
@@ -1501,14 +1528,14 @@
 		if (state->split)
 			break;
 	case 5:
-		if (dev->ops->remain_on_channel &&
-		    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
+		if (rdev->ops->remain_on_channel &&
+		    (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
 		    nla_put_u32(msg,
 				NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
-				dev->wiphy.max_remain_on_channel_duration))
+				rdev->wiphy.max_remain_on_channel_duration))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
 		    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
 			goto nla_put_failure;
 
@@ -1519,7 +1546,7 @@
 			break;
 	case 6:
 #ifdef CONFIG_PM
-		if (nl80211_send_wowlan(msg, dev, state->split))
+		if (nl80211_send_wowlan(msg, rdev, state->split))
 			goto nla_put_failure;
 		state->split_start++;
 		if (state->split)
@@ -1529,10 +1556,10 @@
 #endif
 	case 7:
 		if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
-					dev->wiphy.software_iftypes))
+					rdev->wiphy.software_iftypes))
 			goto nla_put_failure;
 
-		if (nl80211_put_iface_combinations(&dev->wiphy, msg,
+		if (nl80211_put_iface_combinations(&rdev->wiphy, msg,
 						   state->split))
 			goto nla_put_failure;
 
@@ -1540,12 +1567,12 @@
 		if (state->split)
 			break;
 	case 8:
-		if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
 		    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
-				dev->wiphy.ap_sme_capa))
+				rdev->wiphy.ap_sme_capa))
 			goto nla_put_failure;
 
-		features = dev->wiphy.features;
+		features = rdev->wiphy.features;
 		/*
 		 * We can only add the per-channel limit information if the
 		 * dump is split, otherwise it makes it too big. Therefore
@@ -1556,16 +1583,16 @@
 		if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
 			goto nla_put_failure;
 
-		if (dev->wiphy.ht_capa_mod_mask &&
+		if (rdev->wiphy.ht_capa_mod_mask &&
 		    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
-			    sizeof(*dev->wiphy.ht_capa_mod_mask),
-			    dev->wiphy.ht_capa_mod_mask))
+			    sizeof(*rdev->wiphy.ht_capa_mod_mask),
+			    rdev->wiphy.ht_capa_mod_mask))
 			goto nla_put_failure;
 
-		if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
-		    dev->wiphy.max_acl_mac_addrs &&
+		if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
+		    rdev->wiphy.max_acl_mac_addrs &&
 		    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
-				dev->wiphy.max_acl_mac_addrs))
+				rdev->wiphy.max_acl_mac_addrs))
 			goto nla_put_failure;
 
 		/*
@@ -1581,41 +1608,41 @@
 		state->split_start++;
 		break;
 	case 9:
-		if (dev->wiphy.extended_capabilities &&
+		if (rdev->wiphy.extended_capabilities &&
 		    (nla_put(msg, NL80211_ATTR_EXT_CAPA,
-			     dev->wiphy.extended_capabilities_len,
-			     dev->wiphy.extended_capabilities) ||
+			     rdev->wiphy.extended_capabilities_len,
+			     rdev->wiphy.extended_capabilities) ||
 		     nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
-			     dev->wiphy.extended_capabilities_len,
-			     dev->wiphy.extended_capabilities_mask)))
+			     rdev->wiphy.extended_capabilities_len,
+			     rdev->wiphy.extended_capabilities_mask)))
 			goto nla_put_failure;
 
-		if (dev->wiphy.vht_capa_mod_mask &&
+		if (rdev->wiphy.vht_capa_mod_mask &&
 		    nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
-			    sizeof(*dev->wiphy.vht_capa_mod_mask),
-			    dev->wiphy.vht_capa_mod_mask))
+			    sizeof(*rdev->wiphy.vht_capa_mod_mask),
+			    rdev->wiphy.vht_capa_mod_mask))
 			goto nla_put_failure;
 
 		state->split_start++;
 		break;
 	case 10:
-		if (nl80211_send_coalesce(msg, dev))
+		if (nl80211_send_coalesce(msg, rdev))
 			goto nla_put_failure;
 
-		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) &&
+		if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) &&
 		    (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) ||
 		     nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ)))
 			goto nla_put_failure;
 
-		if (dev->wiphy.max_ap_assoc_sta &&
+		if (rdev->wiphy.max_ap_assoc_sta &&
 		    nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA,
-				dev->wiphy.max_ap_assoc_sta))
+				rdev->wiphy.max_ap_assoc_sta))
 			goto nla_put_failure;
 
 		state->split_start++;
 		break;
 	case 11:
-		if (dev->wiphy.n_vendor_commands) {
+		if (rdev->wiphy.n_vendor_commands) {
 			const struct nl80211_vendor_cmd_info *info;
 			struct nlattr *nested;
 
@@ -1623,15 +1650,15 @@
 			if (!nested)
 				goto nla_put_failure;
 
-			for (i = 0; i < dev->wiphy.n_vendor_commands; i++) {
-				info = &dev->wiphy.vendor_commands[i].info;
+			for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) {
+				info = &rdev->wiphy.vendor_commands[i].info;
 				if (nla_put(msg, i + 1, sizeof(*info), info))
 					goto nla_put_failure;
 			}
 			nla_nest_end(msg, nested);
 		}
 
-		if (dev->wiphy.n_vendor_events) {
+		if (rdev->wiphy.n_vendor_events) {
 			const struct nl80211_vendor_cmd_info *info;
 			struct nlattr *nested;
 
@@ -1640,18 +1667,26 @@
 			if (!nested)
 				goto nla_put_failure;
 
-			for (i = 0; i < dev->wiphy.n_vendor_events; i++) {
-				info = &dev->wiphy.vendor_events[i];
+			for (i = 0; i < rdev->wiphy.n_vendor_events; i++) {
+				info = &rdev->wiphy.vendor_events[i];
 				if (nla_put(msg, i + 1, sizeof(*info), info))
 					goto nla_put_failure;
 			}
 			nla_nest_end(msg, nested);
 		}
+		state->split_start++;
+		break;
+	case 12:
+		if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH &&
+		    nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS,
+			       rdev->wiphy.max_num_csa_counters))
+			goto nla_put_failure;
 
 		/* done */
 		state->split_start = 0;
 		break;
 	}
+ finish:
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -1684,7 +1719,7 @@
 		if (!netdev)
 			return -ENODEV;
 		if (netdev->ieee80211_ptr) {
-			rdev = wiphy_to_dev(
+			rdev = wiphy_to_rdev(
 				netdev->ieee80211_ptr->wiphy);
 			state->filter_wiphy = rdev->wiphy_idx;
 		}
@@ -1697,7 +1732,7 @@
 {
 	int idx = 0, ret;
 	struct nl80211_dump_wiphy_state *state = (void *)cb->args[0];
-	struct cfg80211_registered_device *dev;
+	struct cfg80211_registered_device *rdev;
 
 	rtnl_lock();
 	if (!state) {
@@ -1716,17 +1751,18 @@
 		cb->args[0] = (long)state;
 	}
 
-	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
-		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
 			continue;
 		if (++idx <= state->start)
 			continue;
 		if (state->filter_wiphy != -1 &&
-		    state->filter_wiphy != dev->wiphy_idx)
+		    state->filter_wiphy != rdev->wiphy_idx)
 			continue;
 		/* attempt to fit multiple wiphy data chunks into the skb */
 		do {
-			ret = nl80211_send_wiphy(dev, skb,
+			ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY,
+						 skb,
 						 NETLINK_CB(cb->skb).portid,
 						 cb->nlh->nlmsg_seq,
 						 NLM_F_MULTI, state);
@@ -1774,14 +1810,15 @@
 static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *msg;
-	struct cfg80211_registered_device *dev = info->user_ptr[0];
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct nl80211_dump_wiphy_state state = {};
 
 	msg = nlmsg_new(4096, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
+	if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg,
+			       info->snd_portid, info->snd_seq, 0,
 			       &state) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -1908,18 +1945,20 @@
 }
 
 static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
-				 struct wireless_dev *wdev,
+				 struct net_device *dev,
 				 struct genl_info *info)
 {
 	struct cfg80211_chan_def chandef;
 	int result;
 	enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR;
+	struct wireless_dev *wdev = NULL;
 
-	if (wdev)
-		iftype = wdev->iftype;
-
+	if (dev)
+		wdev = dev->ieee80211_ptr;
 	if (!nl80211_can_set_dev_channel(wdev))
 		return -EOPNOTSUPP;
+	if (wdev)
+		iftype = wdev->iftype;
 
 	result = nl80211_parse_chandef(rdev, info, &chandef);
 	if (result)
@@ -1928,14 +1967,27 @@
 	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		if (wdev->beacon_interval) {
-			result = -EBUSY;
-			break;
-		}
-		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) {
+		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
 			result = -EINVAL;
 			break;
 		}
+		if (wdev->beacon_interval) {
+			if (!dev || !rdev->ops->set_ap_chanwidth ||
+			    !(rdev->wiphy.features &
+			      NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) {
+				result = -EBUSY;
+				break;
+			}
+
+			/* Only allow dynamic channel width changes */
+			if (chandef.chan != wdev->preset_chandef.chan) {
+				result = -EBUSY;
+				break;
+			}
+			result = rdev_set_ap_chanwidth(rdev, dev, &chandef);
+			if (result)
+				break;
+		}
 		wdev->preset_chandef = chandef;
 		result = 0;
 		break;
@@ -1957,7 +2009,7 @@
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *netdev = info->user_ptr[1];
 
-	return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+	return __nl80211_set_channel(rdev, netdev, info);
 }
 
 static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
@@ -2013,7 +2065,7 @@
 
 		netdev = __dev_get_by_index(genl_info_net(info), ifindex);
 		if (netdev && netdev->ieee80211_ptr)
-			rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
+			rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy);
 		else
 			netdev = NULL;
 	}
@@ -2079,9 +2131,10 @@
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		result = __nl80211_set_channel(rdev,
-				nl80211_can_set_dev_channel(wdev) ? wdev : NULL,
-				info);
+		result = __nl80211_set_channel(
+			rdev,
+			nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
+			info);
 		if (result)
 			return result;
 	}
@@ -2229,7 +2282,7 @@
 static inline u64 wdev_id(struct wireless_dev *wdev)
 {
 	return (u64)wdev->identifier |
-	       ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32);
+	       ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
 }
 
 static int nl80211_send_chandef(struct sk_buff *msg,
@@ -2355,7 +2408,7 @@
 static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *msg;
-	struct cfg80211_registered_device *dev = info->user_ptr[0];
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct wireless_dev *wdev = info->user_ptr[1];
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -2363,7 +2416,7 @@
 		return -ENOMEM;
 
 	if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
-			       dev, wdev) < 0) {
+			       rdev, wdev) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
@@ -2514,6 +2567,9 @@
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
 
+	/* to avoid failing a new interface creation due to pending removal */
+	cfg80211_destroy_ifaces(rdev);
+
 	memset(&params, 0, sizeof(params));
 
 	if (!info->attrs[NL80211_ATTR_IFNAME])
@@ -2563,6 +2619,9 @@
 		return PTR_ERR(wdev);
 	}
 
+	if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER])
+		wdev->owner_nlportid = info->snd_portid;
+
 	switch (type) {
 	case NL80211_IFTYPE_MESH_POINT:
 		if (!info->attrs[NL80211_ATTR_MESH_ID])
@@ -3142,7 +3201,6 @@
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_ap_settings params;
 	int err;
-	u8 radar_detect_width = 0;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
@@ -3258,24 +3316,10 @@
 	} else if (!nl80211_get_ap_channel(rdev, &params))
 		return -EINVAL;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+				     wdev->iftype))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
-	if (err < 0)
-		return err;
-	if (err) {
-		radar_detect_width = BIT(params.chandef.width);
-		params.radar_required = true;
-	}
-
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   params.chandef.chan,
-					   CHAN_MODE_SHARED,
-					   radar_detect_width);
-	if (err)
-		return err;
-
 	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
 		params.acl = parse_acl_data(&rdev->wiphy, info);
 		if (IS_ERR(params.acl))
@@ -3613,6 +3657,10 @@
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
 			sinfo->tx_failed))
 		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
+	    nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+			sinfo->expected_throughput))
+		goto nla_put_failure;
 	if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
 			sinfo->beacon_loss_count))
@@ -3675,13 +3723,13 @@
 				struct netlink_callback *cb)
 {
 	struct station_info sinfo;
-	struct cfg80211_registered_device *dev;
+	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	u8 mac_addr[ETH_ALEN];
 	int sta_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
 		return err;
 
@@ -3690,14 +3738,14 @@
 		goto out_err;
 	}
 
-	if (!dev->ops->dump_station) {
+	if (!rdev->ops->dump_station) {
 		err = -EOPNOTSUPP;
 		goto out_err;
 	}
 
 	while (1) {
 		memset(&sinfo, 0, sizeof(sinfo));
-		err = rdev_dump_station(dev, wdev->netdev, sta_idx,
+		err = rdev_dump_station(rdev, wdev->netdev, sta_idx,
 					mac_addr, &sinfo);
 		if (err == -ENOENT)
 			break;
@@ -3707,7 +3755,7 @@
 		if (nl80211_send_station(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				dev, wdev->netdev, mac_addr,
+				rdev, wdev->netdev, mac_addr,
 				&sinfo) < 0)
 			goto out;
 
@@ -3719,7 +3767,7 @@
 	cb->args[2] = sta_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(dev);
+	nl80211_finish_wdev_dump(rdev);
 
 	return err;
 }
@@ -4380,18 +4428,18 @@
 			      struct netlink_callback *cb)
 {
 	struct mpath_info pinfo;
-	struct cfg80211_registered_device *dev;
+	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	u8 dst[ETH_ALEN];
 	u8 next_hop[ETH_ALEN];
 	int path_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
 		return err;
 
-	if (!dev->ops->dump_mpath) {
+	if (!rdev->ops->dump_mpath) {
 		err = -EOPNOTSUPP;
 		goto out_err;
 	}
@@ -4402,7 +4450,7 @@
 	}
 
 	while (1) {
-		err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst,
+		err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst,
 				      next_hop, &pinfo);
 		if (err == -ENOENT)
 			break;
@@ -4423,7 +4471,7 @@
 	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(dev);
+	nl80211_finish_wdev_dump(rdev);
 	return err;
 }
 
@@ -4663,7 +4711,6 @@
 
 static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
-	int r;
 	char *data = NULL;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
 
@@ -4676,11 +4723,6 @@
 	if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
 		return -EINPROGRESS;
 
-	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
-		return -EINVAL;
-
-	data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
-
 	if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE])
 		user_reg_hint_type =
 		  nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]);
@@ -4690,14 +4732,16 @@
 	switch (user_reg_hint_type) {
 	case NL80211_USER_REG_HINT_USER:
 	case NL80211_USER_REG_HINT_CELL_BASE:
-		break;
+		if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
+			return -EINVAL;
+
+		data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
+		return regulatory_hint_user(data, user_reg_hint_type);
+	case NL80211_USER_REG_HINT_INDOOR:
+		return regulatory_hint_indoor_user();
 	default:
 		return -EINVAL;
 	}
-
-	r = regulatory_hint_user(data, user_reg_hint_type);
-
-	return r;
 }
 
 static int nl80211_get_mesh_config(struct sk_buff *skb,
@@ -5796,7 +5840,8 @@
 	if (wdev->cac_started)
 		return -EBUSY;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef);
+	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
+					    wdev->iftype);
 	if (err < 0)
 		return err;
 
@@ -5809,12 +5854,6 @@
 	if (!rdev->ops->start_radar_detection)
 		return -EOPNOTSUPP;
 
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   chandef.chan, CHAN_MODE_SHARED,
-					   BIT(chandef.width));
-	if (err)
-		return err;
-
 	cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef);
 	if (WARN_ON(!cac_time_ms))
 		cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
@@ -5843,6 +5882,7 @@
 	u8 radar_detect_width = 0;
 	int err;
 	bool need_new_beacon = false;
+	int len, i;
 
 	if (!rdev->ops->channel_switch ||
 	    !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
@@ -5901,26 +5941,55 @@
 	if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
 		return -EINVAL;
 
-	params.counter_offset_beacon =
-		nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
-	if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
+	len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+	if (!len || (len % sizeof(u16)))
 		return -EINVAL;
 
-	/* sanity check - counters should be the same */
-	if (params.beacon_csa.tail[params.counter_offset_beacon] !=
-	    params.count)
+	params.n_counter_offsets_beacon = len / sizeof(u16);
+	if (rdev->wiphy.max_num_csa_counters &&
+	    (params.n_counter_offsets_beacon >
+	     rdev->wiphy.max_num_csa_counters))
 		return -EINVAL;
 
+	params.counter_offsets_beacon =
+		nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+
+	/* sanity checks - counters should fit and be the same */
+	for (i = 0; i < params.n_counter_offsets_beacon; i++) {
+		u16 offset = params.counter_offsets_beacon[i];
+
+		if (offset >= params.beacon_csa.tail_len)
+			return -EINVAL;
+
+		if (params.beacon_csa.tail[offset] != params.count)
+			return -EINVAL;
+	}
+
 	if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
-		params.counter_offset_presp =
-			nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
-		if (params.counter_offset_presp >=
-		    params.beacon_csa.probe_resp_len)
+		len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+		if (!len || (len % sizeof(u16)))
 			return -EINVAL;
 
-		if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
-		    params.count)
+		params.n_counter_offsets_presp = len / sizeof(u16);
+		if (rdev->wiphy.max_num_csa_counters &&
+		    (params.n_counter_offsets_beacon >
+		     rdev->wiphy.max_num_csa_counters))
 			return -EINVAL;
+
+		params.counter_offsets_presp =
+			nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+
+		/* sanity checks - counters should fit and be the same */
+		for (i = 0; i < params.n_counter_offsets_presp; i++) {
+			u16 offset = params.counter_offsets_presp[i];
+
+			if (offset >= params.beacon_csa.probe_resp_len)
+				return -EINVAL;
+
+			if (params.beacon_csa.probe_resp[offset] !=
+			    params.count)
+				return -EINVAL;
+		}
 	}
 
 skip_beacons:
@@ -5928,27 +5997,25 @@
 	if (err)
 		return err;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+				     wdev->iftype))
 		return -EINVAL;
 
-	switch (dev->ieee80211_ptr->iftype) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_P2P_GO:
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_MESH_POINT:
-		err = cfg80211_chandef_dfs_required(wdev->wiphy,
-						    &params.chandef);
-		if (err < 0)
-			return err;
-		if (err) {
-			radar_detect_width = BIT(params.chandef.width);
-			params.radar_required = true;
-		}
-		break;
-	default:
-		break;
+	err = cfg80211_chandef_dfs_required(wdev->wiphy,
+					    &params.chandef,
+					    wdev->iftype);
+	if (err < 0)
+		return err;
+
+	if (err > 0) {
+		radar_detect_width = BIT(params.chandef.width);
+		params.radar_required = true;
 	}
 
+	/* TODO: I left this here for now.  With channel switch, the
+	 * verification is a bit more complicated, because we only do
+	 * it later when the channel switch really happens.
+	 */
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
 					   params.chandef.chan,
 					   CHAN_MODE_SHARED,
@@ -6175,12 +6242,12 @@
 			struct netlink_callback *cb)
 {
 	struct survey_info survey;
-	struct cfg80211_registered_device *dev;
+	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	int survey_idx = cb->args[2];
 	int res;
 
-	res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
 		return res;
 
@@ -6189,7 +6256,7 @@
 		goto out_err;
 	}
 
-	if (!dev->ops->dump_survey) {
+	if (!rdev->ops->dump_survey) {
 		res = -EOPNOTSUPP;
 		goto out_err;
 	}
@@ -6197,7 +6264,7 @@
 	while (1) {
 		struct ieee80211_channel *chan;
 
-		res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);
+		res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey);
 		if (res == -ENOENT)
 			break;
 		if (res)
@@ -6209,7 +6276,7 @@
 			goto out;
 		}
 
-		chan = ieee80211_get_channel(&dev->wiphy,
+		chan = ieee80211_get_channel(&rdev->wiphy,
 					     survey.channel->center_freq);
 		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) {
 			survey_idx++;
@@ -6228,7 +6295,7 @@
 	cb->args[2] = survey_idx;
 	res = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(dev);
+	nl80211_finish_wdev_dump(rdev);
 	return res;
 }
 
@@ -6704,7 +6771,8 @@
 	if (err)
 		return err;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef,
+				     NL80211_IFTYPE_ADHOC))
 		return -EINVAL;
 
 	switch (ibss.chandef.width) {
@@ -6879,7 +6947,7 @@
 					   int vendor_event_idx,
 					   int approxlen, gfp_t gfp)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	const struct nl80211_vendor_cmd_info *info;
 
 	switch (cmd) {
@@ -7767,6 +7835,27 @@
 	if (!chandef.chan && params.offchan)
 		return -EINVAL;
 
+	params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+	params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+
+	if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
+		int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+		int i;
+
+		if (len % sizeof(u16))
+			return -EINVAL;
+
+		params.n_csa_offsets = len / sizeof(u16);
+		params.csa_offsets =
+			nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+
+		/* check that all the offsets fit the frame */
+		for (i = 0; i < params.n_csa_offsets; i++) {
+			if (params.csa_offsets[i] >= params.len)
+				return -EINVAL;
+		}
+	}
+
 	if (!params.dont_wait_for_ack) {
 		msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 		if (!msg)
@@ -7780,8 +7869,6 @@
 		}
 	}
 
-	params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
-	params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
 	params.chan = chandef.chan;
 	err = cfg80211_mlme_mgmt_tx(rdev, wdev, &params, &cookie);
 	if (err)
@@ -8478,6 +8565,8 @@
 
 		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
 				    rem) {
+			u8 *mask_pat;
+
 			nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
 				  nla_len(pat), NULL);
 			err = -EINVAL;
@@ -8501,19 +8590,18 @@
 				goto error;
 			new_triggers.patterns[i].pkt_offset = pkt_offset;
 
-			new_triggers.patterns[i].mask =
-				kmalloc(mask_len + pat_len, GFP_KERNEL);
-			if (!new_triggers.patterns[i].mask) {
+			mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+			if (!mask_pat) {
 				err = -ENOMEM;
 				goto error;
 			}
-			new_triggers.patterns[i].pattern =
-				new_triggers.patterns[i].mask + mask_len;
-			memcpy(new_triggers.patterns[i].mask,
-			       nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+			new_triggers.patterns[i].mask = mask_pat;
+			memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
 			       mask_len);
+			mask_pat += mask_len;
+			new_triggers.patterns[i].pattern = mask_pat;
 			new_triggers.patterns[i].pattern_len = pat_len;
-			memcpy(new_triggers.patterns[i].pattern,
+			memcpy(mask_pat,
 			       nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
 			       pat_len);
 			i++;
@@ -8705,6 +8793,8 @@
 
 	nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
 			    rem) {
+		u8 *mask_pat;
+
 		nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
 			  nla_len(pat), NULL);
 		if (!pat_tb[NL80211_PKTPAT_MASK] ||
@@ -8726,17 +8816,19 @@
 			return -EINVAL;
 		new_rule->patterns[i].pkt_offset = pkt_offset;
 
-		new_rule->patterns[i].mask =
-			kmalloc(mask_len + pat_len, GFP_KERNEL);
-		if (!new_rule->patterns[i].mask)
+		mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+		if (!mask_pat)
 			return -ENOMEM;
-		new_rule->patterns[i].pattern =
-			new_rule->patterns[i].mask + mask_len;
-		memcpy(new_rule->patterns[i].mask,
-		       nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
+
+		new_rule->patterns[i].mask = mask_pat;
+		memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+		       mask_len);
+
+		mask_pat += mask_len;
+		new_rule->patterns[i].pattern = mask_pat;
 		new_rule->patterns[i].pattern_len = pat_len;
-		memcpy(new_rule->patterns[i].pattern,
-		       nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
+		memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
+		       pat_len);
 		i++;
 	}
 
@@ -8981,9 +9073,8 @@
 	if (wdev->p2p_started)
 		return 0;
 
-	err = cfg80211_can_add_interface(rdev, wdev->iftype);
-	if (err)
-		return err;
+	if (rfkill_blocked(rdev->rfkill))
+		return -ERFKILL;
 
 	err = rdev_start_p2p_device(rdev, wdev);
 	if (err)
@@ -9192,7 +9283,7 @@
 					   enum nl80211_attrs attr,
 					   int approxlen)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	if (WARN_ON(!rdev->cur_cmd_info))
 		return NULL;
@@ -9316,7 +9407,7 @@
 		}
 
 		dev = wdev->netdev;
-		rdev = wiphy_to_dev(wdev->wiphy);
+		rdev = wiphy_to_rdev(wdev->wiphy);
 
 		if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
 			if (!dev) {
@@ -10017,16 +10108,20 @@
 
 /* notification functions */
 
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+			  enum nl80211_commands cmd)
 {
 	struct sk_buff *msg;
 	struct nl80211_dump_wiphy_state state = {};
 
+	WARN_ON(cmd != NL80211_CMD_NEW_WIPHY &&
+		cmd != NL80211_CMD_DEL_WIPHY);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) {
+	if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) {
 		nlmsg_free(msg);
 		return;
 	}
@@ -10345,7 +10440,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	const struct ieee80211_mgmt *mgmt = (void *)buf;
 	u32 cmd;
 
@@ -10567,7 +10662,7 @@
 					const u8* ie, u8 ie_len, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -10747,7 +10842,7 @@
 			       unsigned int duration, gfp_t gfp)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
 	nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
@@ -10761,7 +10856,7 @@
 					gfp_t gfp)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
 	nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
@@ -10773,7 +10868,7 @@
 		      struct station_info *sinfo, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 
 	trace_cfg80211_new_sta(dev, mac_addr, sinfo);
@@ -10796,7 +10891,7 @@
 void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -10833,7 +10928,7 @@
 			  gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -10868,7 +10963,7 @@
 				       const u8 *addr, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 	u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
@@ -10988,7 +11083,7 @@
 			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct net_device *netdev = wdev->netdev;
 	struct sk_buff *msg;
 	void *hdr;
@@ -11032,7 +11127,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
@@ -11124,7 +11219,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_gtk_rekey_notify(dev, bssid);
 	nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
@@ -11182,7 +11277,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
 	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
@@ -11229,7 +11324,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	ASSERT_WDEV_LOCK(wdev);
 
@@ -11253,7 +11348,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
@@ -11353,7 +11448,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
@@ -11400,7 +11495,7 @@
 			   u64 cookie, bool acked, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -11440,7 +11535,7 @@
 				 const u8 *frame, size_t len,
 				 int freq, int sig_dbm)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 	struct cfg80211_beacon_registration *reg;
@@ -11487,7 +11582,7 @@
 				   struct cfg80211_wowlan_wakeup *wakeup,
 				   gfp_t gfp)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 	int size = 200;
@@ -11597,7 +11692,7 @@
 				u16 reason_code, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -11649,9 +11744,15 @@
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
-		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list)
+		bool schedule_destroy_work = false;
+
+		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
 
+			if (wdev->owner_nlportid == notify->portid)
+				schedule_destroy_work = true;
+		}
+
 		spin_lock_bh(&rdev->beacon_registrations_lock);
 		list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations,
 					 list) {
@@ -11662,11 +11763,24 @@
 			}
 		}
 		spin_unlock_bh(&rdev->beacon_registrations_lock);
+
+		if (schedule_destroy_work) {
+			struct cfg80211_iface_destroy *destroy;
+
+			destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC);
+			if (destroy) {
+				destroy->nlportid = notify->portid;
+				spin_lock(&rdev->destroy_list_lock);
+				list_add(&destroy->list, &rdev->destroy_list);
+				spin_unlock(&rdev->destroy_list_lock);
+				schedule_work(&rdev->destroy_work);
+			}
+		}
 	}
 
 	rcu_read_unlock();
 
-	return NOTIFY_DONE;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block nl80211_netlink_notifier = {
@@ -11677,7 +11791,7 @@
 		       struct cfg80211_ft_event_params *ft_event)
 {
 	struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -11724,7 +11838,7 @@
 	void *hdr;
 	u32 nlportid;
 
-	rdev = wiphy_to_dev(wdev->wiphy);
+	rdev = wiphy_to_rdev(wdev->wiphy);
 	if (!rdev->crit_proto_nlportid)
 		return;
 
@@ -11759,7 +11873,7 @@
 void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 

diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 1e6df96..49c9a48 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h

@@ -5,7 +5,8 @@
 
 int nl80211_init(void);
 void nl80211_exit(void);
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+			  enum nl80211_commands cmd);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct wireless_dev *wdev);
 struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,

diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 74d97d3..d95bbe3 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h

@@ -199,7 +199,7 @@
 }
 
 static inline int rdev_get_station(struct cfg80211_registered_device *rdev,
-				   struct net_device *dev, u8 *mac,
+				   struct net_device *dev, const u8 *mac,
 				   struct station_info *sinfo)
 {
 	int ret;
@@ -950,4 +950,17 @@
 	return ret;
 }
 
+static inline int
+rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
+		      struct net_device *dev, struct cfg80211_chan_def *chandef)
+{
+	int ret;
+
+	trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+	ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f59aaac..558b0e3 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c

@@ -65,11 +65,26 @@
 #define REG_DBG_PRINT(args...)
 #endif
 
+/**
+ * enum reg_request_treatment - regulatory request treatment
+ *
+ * @REG_REQ_OK: continue processing the regulatory request
+ * @REG_REQ_IGNORE: ignore the regulatory request
+ * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should
+ *	be intersected with the current one.
+ * @REG_REQ_ALREADY_SET: the regulatory request will not change the current
+ *	regulatory settings, and no further processing is required.
+ * @REG_REQ_USER_HINT_HANDLED: a non alpha2  user hint was handled and no
+ *	further processing is required, i.e., not need to update last_request
+ *	etc. This should be used for user hints that do not provide an alpha2
+ *	but some other type of regulatory hint, i.e., indoor operation.
+ */
 enum reg_request_treatment {
 	REG_REQ_OK,
 	REG_REQ_IGNORE,
 	REG_REQ_INTERSECT,
 	REG_REQ_ALREADY_SET,
+	REG_REQ_USER_HINT_HANDLED,
 };
 
 static struct regulatory_request core_request_world = {
@@ -106,6 +121,14 @@
  */
 static int reg_num_devs_support_basehint;
 
+/*
+ * State variable indicating if the platform on which the devices
+ * are attached is operating in an indoor environment. The state variable
+ * is relevant for all registered devices.
+ * (protected by RTNL)
+ */
+static bool reg_is_indoor;
+
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
 	return rtnl_dereference(cfg80211_regdomain);
@@ -240,8 +263,16 @@
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
 
-static void reg_free_request(struct regulatory_request *lr)
+static void reg_free_request(struct regulatory_request *request)
 {
+	if (request != get_last_request())
+		kfree(request);
+}
+
+static void reg_free_last_request(void)
+{
+	struct regulatory_request *lr = get_last_request();
+
 	if (lr != &core_request_world && lr)
 		kfree_rcu(lr, rcu_head);
 }
@@ -254,7 +285,7 @@
 	if (lr == request)
 		return;
 
-	reg_free_request(lr);
+	reg_free_last_request();
 	rcu_assign_pointer(last_request, request);
 }
 
@@ -873,6 +904,8 @@
 		channel_flags |= IEEE80211_CHAN_RADAR;
 	if (rd_flags & NL80211_RRF_NO_OFDM)
 		channel_flags |= IEEE80211_CHAN_NO_OFDM;
+	if (rd_flags & NL80211_RRF_NO_OUTDOOR)
+		channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
 	return channel_flags;
 }
 
@@ -902,7 +935,7 @@
 		if (!band_rule_found)
 			band_rule_found = freq_in_rule_band(fr, center_freq);
 
-		bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20));
+		bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5));
 
 		if (band_rule_found && bw_fits)
 			return rr;
@@ -986,10 +1019,10 @@
 }
 #endif
 
-/*
- * Note that right now we assume the desired channel bandwidth
- * is always 20 MHz for each individual channel (HT40 uses 20 MHz
- * per channel, the primary and the extension channel).
+/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency
+ * chan->center_freq fits there.
+ * If there is no such reg_rule, disable the channel, otherwise set the
+ * flags corresponding to the bandwidths allowed in the particular reg_rule
  */
 static void handle_channel(struct wiphy *wiphy,
 			   enum nl80211_reg_initiator initiator,
@@ -1050,8 +1083,12 @@
 	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
 		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
 
+	if (max_bandwidth_khz < MHZ_TO_KHZ(10))
+		bw_flags = IEEE80211_CHAN_NO_10MHZ;
+	if (max_bandwidth_khz < MHZ_TO_KHZ(20))
+		bw_flags |= IEEE80211_CHAN_NO_20MHZ;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
-		bw_flags = IEEE80211_CHAN_NO_HT40;
+		bw_flags |= IEEE80211_CHAN_NO_HT40;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(80))
 		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1071,6 +1108,13 @@
 			(int) MBI_TO_DBI(power_rule->max_antenna_gain);
 		chan->max_reg_power = chan->max_power = chan->orig_mpwr =
 			(int) MBM_TO_DBM(power_rule->max_eirp);
+
+		if (chan->flags & IEEE80211_CHAN_RADAR) {
+			chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+			if (reg_rule->dfs_cac_ms)
+				chan->dfs_cac_ms = reg_rule->dfs_cac_ms;
+		}
+
 		return;
 	}
 
@@ -1126,12 +1170,19 @@
 	return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE;
 }
 
+static bool reg_request_indoor(struct regulatory_request *request)
+{
+	if (request->initiator != NL80211_REGDOM_SET_BY_USER)
+		return false;
+	return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR;
+}
+
 bool reg_last_request_cell_base(void)
 {
 	return reg_request_cell_base(get_last_request());
 }
 
-#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS
+#ifdef CONFIG_CFG80211_REG_CELLULAR_HINTS
 /* Core specific check */
 static enum reg_request_treatment
 reg_ignore_cell_hint(struct regulatory_request *pending_request)
@@ -1471,8 +1522,12 @@
 	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
 		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
 
+	if (max_bandwidth_khz < MHZ_TO_KHZ(10))
+		bw_flags = IEEE80211_CHAN_NO_10MHZ;
+	if (max_bandwidth_khz < MHZ_TO_KHZ(20))
+		bw_flags |= IEEE80211_CHAN_NO_20MHZ;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
-		bw_flags = IEEE80211_CHAN_NO_HT40;
+		bw_flags |= IEEE80211_CHAN_NO_HT40;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(80))
 		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
 	if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1568,6 +1623,11 @@
 {
 	struct regulatory_request *lr = get_last_request();
 
+	if (reg_request_indoor(user_request)) {
+		reg_is_indoor = true;
+		return REG_REQ_USER_HINT_HANDLED;
+	}
+
 	if (reg_request_cell_base(user_request))
 		return reg_ignore_cell_hint(user_request);
 
@@ -1615,8 +1675,9 @@
 
 	treatment = __reg_process_hint_user(user_request);
 	if (treatment == REG_REQ_IGNORE ||
-	    treatment == REG_REQ_ALREADY_SET) {
-		kfree(user_request);
+	    treatment == REG_REQ_ALREADY_SET ||
+	    treatment == REG_REQ_USER_HINT_HANDLED) {
+		reg_free_request(user_request);
 		return treatment;
 	}
 
@@ -1676,14 +1737,15 @@
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
-		kfree(driver_request);
+	case REG_REQ_USER_HINT_HANDLED:
+		reg_free_request(driver_request);
 		return treatment;
 	case REG_REQ_INTERSECT:
 		/* fall through */
 	case REG_REQ_ALREADY_SET:
 		regd = reg_copy_regd(get_cfg80211_regdom());
 		if (IS_ERR(regd)) {
-			kfree(driver_request);
+			reg_free_request(driver_request);
 			return REG_REQ_IGNORE;
 		}
 		rcu_assign_pointer(wiphy->regd, regd);
@@ -1775,12 +1837,13 @@
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
+	case REG_REQ_USER_HINT_HANDLED:
 		/* fall through */
 	case REG_REQ_ALREADY_SET:
-		kfree(country_ie_request);
+		reg_free_request(country_ie_request);
 		return treatment;
 	case REG_REQ_INTERSECT:
-		kfree(country_ie_request);
+		reg_free_request(country_ie_request);
 		/*
 		 * This doesn't happen yet, not sure we
 		 * ever want to support it for this case.
@@ -1813,7 +1876,8 @@
 	case NL80211_REGDOM_SET_BY_USER:
 		treatment = reg_process_hint_user(reg_request);
 		if (treatment == REG_REQ_IGNORE ||
-		    treatment == REG_REQ_ALREADY_SET)
+		    treatment == REG_REQ_ALREADY_SET ||
+		    treatment == REG_REQ_USER_HINT_HANDLED)
 			return;
 		queue_delayed_work(system_power_efficient_wq,
 				   &reg_timeout, msecs_to_jiffies(3142));
@@ -1841,7 +1905,7 @@
 	return;
 
 out_free:
-	kfree(reg_request);
+	reg_free_request(reg_request);
 }
 
 /*
@@ -1857,7 +1921,7 @@
 
 	/* When last_request->processed becomes true this will be rescheduled */
 	if (lr && !lr->processed) {
-		REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n");
+		reg_process_hint(lr);
 		return;
 	}
 
@@ -1967,6 +2031,22 @@
 	return 0;
 }
 
+int regulatory_hint_indoor_user(void)
+{
+	struct regulatory_request *request;
+
+	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	request->wiphy_idx = WIPHY_IDX_INVALID;
+	request->initiator = NL80211_REGDOM_SET_BY_USER;
+	request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR;
+	queue_regulatory_request(request);
+
+	return 0;
+}
+
 /* Driver hints */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
@@ -2134,6 +2214,8 @@
 
 	ASSERT_RTNL();
 
+	reg_is_indoor = false;
+
 	reset_regdomains(true, &world_regdom);
 	restore_alpha2(alpha2, reset_user);
 
@@ -2594,7 +2676,7 @@
 		reg_num_devs_support_basehint--;
 
 	rcu_free_regdom(get_wiphy_regdom(wiphy));
-	rcu_assign_pointer(wiphy->regd, NULL);
+	RCU_INIT_POINTER(wiphy->regd, NULL);
 
 	if (lr)
 		request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
@@ -2614,6 +2696,40 @@
 	rtnl_unlock();
 }
 
+/*
+ * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
+ * UNII band definitions
+ */
+int cfg80211_get_unii(int freq)
+{
+	/* UNII-1 */
+	if (freq >= 5150 && freq <= 5250)
+		return 0;
+
+	/* UNII-2A */
+	if (freq > 5250 && freq <= 5350)
+		return 1;
+
+	/* UNII-2B */
+	if (freq > 5350 && freq <= 5470)
+		return 2;
+
+	/* UNII-2C */
+	if (freq > 5470 && freq <= 5725)
+		return 3;
+
+	/* UNII-3 */
+	if (freq > 5725 && freq <= 5825)
+		return 4;
+
+	return -EINVAL;
+}
+
+bool regulatory_indoor_allowed(void)
+{
+	return reg_is_indoor;
+}
+
 int __init regulatory_init(void)
 {
 	int err = 0;

diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 37c180d..5e48031 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h

@@ -25,6 +25,7 @@
 
 int regulatory_hint_user(const char *alpha2,
 			 enum nl80211_user_reg_hint_type user_reg_hint_type);
+int regulatory_hint_indoor_user(void);
 
 void wiphy_regulatory_register(struct wiphy *wiphy);
 void wiphy_regulatory_deregister(struct wiphy *wiphy);
@@ -104,4 +105,21 @@
  */
 void regulatory_hint_disconnect(void);
 
+/**
+ * cfg80211_get_unii - get the U-NII band for the frequency
+ * @freq: the frequency for which we want to get the UNII band.
+
+ * Get a value specifying the U-NII band frequency belongs to.
+ * U-NII bands are defined by the FCC in C.F.R 47 part 15.
+ *
+ * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
+ * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3.
+ */
+int cfg80211_get_unii(int freq);
+
+/**
+ * regulatory_indoor_allowed - is indoor operation allowed
+ */
+bool regulatory_indoor_allowed(void);
+
 #endif  /* __NET_WIRELESS_REG_H */

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 88f108e..0798c62 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c

@@ -81,10 +81,10 @@
 	kfree(bss);
 }
 
-static inline void bss_ref_get(struct cfg80211_registered_device *dev,
+static inline void bss_ref_get(struct cfg80211_registered_device *rdev,
 			       struct cfg80211_internal_bss *bss)
 {
-	lockdep_assert_held(&dev->bss_lock);
+	lockdep_assert_held(&rdev->bss_lock);
 
 	bss->refcount++;
 	if (bss->pub.hidden_beacon_bss) {
@@ -95,10 +95,10 @@
 	}
 }
 
-static inline void bss_ref_put(struct cfg80211_registered_device *dev,
+static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
 			       struct cfg80211_internal_bss *bss)
 {
-	lockdep_assert_held(&dev->bss_lock);
+	lockdep_assert_held(&rdev->bss_lock);
 
 	if (bss->pub.hidden_beacon_bss) {
 		struct cfg80211_internal_bss *hbss;
@@ -114,10 +114,10 @@
 		bss_free(bss);
 }
 
-static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
+static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
 				  struct cfg80211_internal_bss *bss)
 {
-	lockdep_assert_held(&dev->bss_lock);
+	lockdep_assert_held(&rdev->bss_lock);
 
 	if (!list_empty(&bss->hidden_list)) {
 		/*
@@ -134,31 +134,31 @@
 	}
 
 	list_del_init(&bss->list);
-	rb_erase(&bss->rbn, &dev->bss_tree);
-	bss_ref_put(dev, bss);
+	rb_erase(&bss->rbn, &rdev->bss_tree);
+	bss_ref_put(rdev, bss);
 	return true;
 }
 
-static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev,
+static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
 				  unsigned long expire_time)
 {
 	struct cfg80211_internal_bss *bss, *tmp;
 	bool expired = false;
 
-	lockdep_assert_held(&dev->bss_lock);
+	lockdep_assert_held(&rdev->bss_lock);
 
-	list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
+	list_for_each_entry_safe(bss, tmp, &rdev->bss_list, list) {
 		if (atomic_read(&bss->hold))
 			continue;
 		if (!time_after(expire_time, bss->ts))
 			continue;
 
-		if (__cfg80211_unlink_bss(dev, bss))
+		if (__cfg80211_unlink_bss(rdev, bss))
 			expired = true;
 	}
 
 	if (expired)
-		dev->bss_generation++;
+		rdev->bss_generation++;
 }
 
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
@@ -238,11 +238,11 @@
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 {
 	trace_cfg80211_scan_done(request, aborted);
-	WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
+	WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
 
 	request->aborted = aborted;
 	request->notified = true;
-	queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk);
+	queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
 }
 EXPORT_SYMBOL(cfg80211_scan_done);
 
@@ -278,15 +278,15 @@
 {
 	trace_cfg80211_sched_scan_results(wiphy);
 	/* ignore if we're not scanning */
-	if (wiphy_to_dev(wiphy)->sched_scan_req)
+	if (wiphy_to_rdev(wiphy)->sched_scan_req)
 		queue_work(cfg80211_wq,
-			   &wiphy_to_dev(wiphy)->sched_scan_results_wk);
+			   &wiphy_to_rdev(wiphy)->sched_scan_results_wk);
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 
 void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	ASSERT_RTNL();
 
@@ -330,21 +330,21 @@
 	return 0;
 }
 
-void cfg80211_bss_age(struct cfg80211_registered_device *dev,
+void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
                       unsigned long age_secs)
 {
 	struct cfg80211_internal_bss *bss;
 	unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
 
-	spin_lock_bh(&dev->bss_lock);
-	list_for_each_entry(bss, &dev->bss_list, list)
+	spin_lock_bh(&rdev->bss_lock);
+	list_for_each_entry(bss, &rdev->bss_list, list)
 		bss->ts -= age_jiffies;
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
 }
 
-void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
+void cfg80211_bss_expire(struct cfg80211_registered_device *rdev)
 {
-	__cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
+	__cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
 }
 
 const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
@@ -534,32 +534,34 @@
 				      const u8 *ssid, size_t ssid_len,
 				      u16 capa_mask, u16 capa_val)
 {
-	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_internal_bss *bss, *res = NULL;
 	unsigned long now = jiffies;
 
 	trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask,
 			       capa_val);
 
-	spin_lock_bh(&dev->bss_lock);
+	spin_lock_bh(&rdev->bss_lock);
 
-	list_for_each_entry(bss, &dev->bss_list, list) {
+	list_for_each_entry(bss, &rdev->bss_list, list) {
 		if ((bss->pub.capability & capa_mask) != capa_val)
 			continue;
 		if (channel && bss->pub.channel != channel)
 			continue;
+		if (!is_valid_ether_addr(bss->pub.bssid))
+			continue;
 		/* Don't get expired BSS structs */
 		if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
 		    !atomic_read(&bss->hold))
 			continue;
 		if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
 			res = bss;
-			bss_ref_get(dev, res);
+			bss_ref_get(rdev, res);
 			break;
 		}
 	}
 
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
 	if (!res)
 		return NULL;
 	trace_cfg80211_return_bss(&res->pub);
@@ -567,10 +569,10 @@
 }
 EXPORT_SYMBOL(cfg80211_get_bss);
 
-static void rb_insert_bss(struct cfg80211_registered_device *dev,
+static void rb_insert_bss(struct cfg80211_registered_device *rdev,
 			  struct cfg80211_internal_bss *bss)
 {
-	struct rb_node **p = &dev->bss_tree.rb_node;
+	struct rb_node **p = &rdev->bss_tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct cfg80211_internal_bss *tbss;
 	int cmp;
@@ -593,15 +595,15 @@
 	}
 
 	rb_link_node(&bss->rbn, parent, p);
-	rb_insert_color(&bss->rbn, &dev->bss_tree);
+	rb_insert_color(&bss->rbn, &rdev->bss_tree);
 }
 
 static struct cfg80211_internal_bss *
-rb_find_bss(struct cfg80211_registered_device *dev,
+rb_find_bss(struct cfg80211_registered_device *rdev,
 	    struct cfg80211_internal_bss *res,
 	    enum bss_compare_mode mode)
 {
-	struct rb_node *n = dev->bss_tree.rb_node;
+	struct rb_node *n = rdev->bss_tree.rb_node;
 	struct cfg80211_internal_bss *bss;
 	int r;
 
@@ -620,7 +622,7 @@
 	return NULL;
 }
 
-static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
+static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
 				   struct cfg80211_internal_bss *new)
 {
 	const struct cfg80211_bss_ies *ies;
@@ -650,7 +652,7 @@
 
 	/* This is the bad part ... */
 
-	list_for_each_entry(bss, &dev->bss_list, list) {
+	list_for_each_entry(bss, &rdev->bss_list, list) {
 		if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
 			continue;
 		if (bss->pub.channel != new->pub.channel)
@@ -684,7 +686,7 @@
 
 /* Returned bss is reference counted and must be cleaned up appropriately. */
 static struct cfg80211_internal_bss *
-cfg80211_bss_update(struct cfg80211_registered_device *dev,
+cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		    struct cfg80211_internal_bss *tmp,
 		    bool signal_valid)
 {
@@ -695,14 +697,14 @@
 
 	tmp->ts = jiffies;
 
-	spin_lock_bh(&dev->bss_lock);
+	spin_lock_bh(&rdev->bss_lock);
 
 	if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) {
-		spin_unlock_bh(&dev->bss_lock);
+		spin_unlock_bh(&rdev->bss_lock);
 		return NULL;
 	}
 
-	found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR);
+	found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
 
 	if (found) {
 		/* Update IEs */
@@ -789,7 +791,7 @@
 		 * is allocated on the stack since it's not needed in the
 		 * more common case of an update
 		 */
-		new = kzalloc(sizeof(*new) + dev->wiphy.bss_priv_size,
+		new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size,
 			      GFP_ATOMIC);
 		if (!new) {
 			ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
@@ -805,9 +807,9 @@
 		INIT_LIST_HEAD(&new->hidden_list);
 
 		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
-			hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN);
+			hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
 			if (!hidden)
-				hidden = rb_find_bss(dev, tmp,
+				hidden = rb_find_bss(rdev, tmp,
 						     BSS_CMP_HIDE_NUL);
 			if (hidden) {
 				new->pub.hidden_beacon_bss = &hidden->pub;
@@ -824,24 +826,24 @@
 			 * expensive search for any probe responses that should
 			 * be grouped with this beacon for updates ...
 			 */
-			if (!cfg80211_combine_bsses(dev, new)) {
+			if (!cfg80211_combine_bsses(rdev, new)) {
 				kfree(new);
 				goto drop;
 			}
 		}
 
-		list_add_tail(&new->list, &dev->bss_list);
-		rb_insert_bss(dev, new);
+		list_add_tail(&new->list, &rdev->bss_list);
+		rb_insert_bss(rdev, new);
 		found = new;
 	}
 
-	dev->bss_generation++;
-	bss_ref_get(dev, found);
-	spin_unlock_bh(&dev->bss_lock);
+	rdev->bss_generation++;
+	bss_ref_get(rdev, found);
+	spin_unlock_bh(&rdev->bss_lock);
 
 	return found;
  drop:
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
 	return NULL;
 }
 
@@ -889,6 +891,7 @@
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
 	struct cfg80211_internal_bss tmp = {}, *res;
+	bool signal_valid;
 
 	if (WARN_ON(!wiphy))
 		return NULL;
@@ -925,8 +928,9 @@
 	rcu_assign_pointer(tmp.pub.beacon_ies, ies);
 	rcu_assign_pointer(tmp.pub.ies, ies);
 
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp,
-				  rx_channel == channel);
+	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+		wiphy->max_adj_channel_rssi_comp;
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
@@ -950,6 +954,7 @@
 	struct cfg80211_internal_bss tmp = {}, *res;
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
+	bool signal_valid;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
 
@@ -997,8 +1002,9 @@
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
 
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp,
-				  rx_channel == channel);
+	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+		wiphy->max_adj_channel_rssi_comp;
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
@@ -1013,7 +1019,7 @@
 
 void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
-	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_internal_bss *bss;
 
 	if (!pub)
@@ -1021,15 +1027,15 @@
 
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
 
-	spin_lock_bh(&dev->bss_lock);
-	bss_ref_get(dev, bss);
-	spin_unlock_bh(&dev->bss_lock);
+	spin_lock_bh(&rdev->bss_lock);
+	bss_ref_get(rdev, bss);
+	spin_unlock_bh(&rdev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_ref_bss);
 
 void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
-	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_internal_bss *bss;
 
 	if (!pub)
@@ -1037,15 +1043,15 @@
 
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
 
-	spin_lock_bh(&dev->bss_lock);
-	bss_ref_put(dev, bss);
-	spin_unlock_bh(&dev->bss_lock);
+	spin_lock_bh(&rdev->bss_lock);
+	bss_ref_put(rdev, bss);
+	spin_unlock_bh(&rdev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_put_bss);
 
 void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
-	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_internal_bss *bss;
 
 	if (WARN_ON(!pub))
@@ -1053,12 +1059,12 @@
 
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
 
-	spin_lock_bh(&dev->bss_lock);
+	spin_lock_bh(&rdev->bss_lock);
 	if (!list_empty(&bss->list)) {
-		if (__cfg80211_unlink_bss(dev, bss))
-			dev->bss_generation++;
+		if (__cfg80211_unlink_bss(rdev, bss))
+			rdev->bss_generation++;
 	}
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_unlink_bss);
 
@@ -1075,7 +1081,7 @@
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 	if (dev->ieee80211_ptr)
-		rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+		rdev = wiphy_to_rdev(dev->ieee80211_ptr->wiphy);
 	else
 		rdev = ERR_PTR(-ENODEV);
 	dev_put(dev);
@@ -1155,7 +1161,11 @@
 				int k;
 				int wiphy_freq = wiphy->bands[band]->channels[j].center_freq;
 				for (k = 0; k < wreq->num_channels; k++) {
-					int wext_freq = cfg80211_wext_freq(wiphy, &wreq->channel_list[k]);
+					struct iw_freq *freq =
+						&wreq->channel_list[k];
+					int wext_freq =
+						cfg80211_wext_freq(freq);
+
 					if (wext_freq == wiphy_freq)
 						goto wext_freq_found;
 				}
@@ -1467,7 +1477,7 @@
 }
 
 
-static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
+static int ieee80211_scan_results(struct cfg80211_registered_device *rdev,
 				  struct iw_request_info *info,
 				  char *buf, size_t len)
 {
@@ -1475,18 +1485,18 @@
 	char *end_buf = buf + len;
 	struct cfg80211_internal_bss *bss;
 
-	spin_lock_bh(&dev->bss_lock);
-	cfg80211_bss_expire(dev);
+	spin_lock_bh(&rdev->bss_lock);
+	cfg80211_bss_expire(rdev);
 
-	list_for_each_entry(bss, &dev->bss_list, list) {
+	list_for_each_entry(bss, &rdev->bss_list, list) {
 		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
-			spin_unlock_bh(&dev->bss_lock);
+			spin_unlock_bh(&rdev->bss_lock);
 			return -E2BIG;
 		}
-		current_ev = ieee80211_bss(&dev->wiphy, info, bss,
+		current_ev = ieee80211_bss(&rdev->wiphy, info, bss,
 					   current_ev, end_buf);
 	}
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
 	return current_ev - buf;
 }
 

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3546a77..8bbeeb3 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c

@@ -59,7 +59,7 @@
 
 static int cfg80211_conn_scan(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_scan_request *request;
 	int n_channels, err;
 
@@ -130,7 +130,7 @@
 
 static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_connect_params *params;
 	struct cfg80211_assoc_request req = {};
 	int err;
@@ -149,7 +149,8 @@
 	case CFG80211_CONN_SCAN_AGAIN:
 		return cfg80211_conn_scan(wdev);
 	case CFG80211_CONN_AUTHENTICATE_NEXT:
-		BUG_ON(!rdev->ops->auth);
+		if (WARN_ON(!rdev->ops->auth))
+			return -EOPNOTSUPP;
 		wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
 		return cfg80211_mlme_auth(rdev, wdev->netdev,
 					  params->channel, params->auth_type,
@@ -161,7 +162,8 @@
 	case CFG80211_CONN_AUTH_FAILED:
 		return -ENOTCONN;
 	case CFG80211_CONN_ASSOCIATE_NEXT:
-		BUG_ON(!rdev->ops->assoc);
+		if (WARN_ON(!rdev->ops->assoc))
+			return -EOPNOTSUPP;
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
 		if (wdev->conn->prev_bssid_valid)
 			req.prev_bssid = wdev->conn->prev_bssid;
@@ -244,7 +246,7 @@
 /* Returned bss is reference counted and must be cleaned up appropriately. */
 static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_bss *bss;
 	u16 capa = WLAN_CAPABILITY_ESS;
 
@@ -274,7 +276,7 @@
 static void __cfg80211_sme_scan_done(struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_bss *bss;
 
 	ASSERT_WDEV_LOCK(wdev);
@@ -305,7 +307,7 @@
 void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
 {
 	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	u16 status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
@@ -351,7 +353,7 @@
 
 bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (!wdev->conn)
 		return false;
@@ -385,7 +387,7 @@
 
 void cfg80211_sme_auth_timeout(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (!wdev->conn)
 		return;
@@ -396,7 +398,7 @@
 
 void cfg80211_sme_disassoc(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (!wdev->conn)
 		return;
@@ -407,7 +409,7 @@
 
 void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (!wdev->conn)
 		return;
@@ -420,7 +422,7 @@
 				struct cfg80211_connect_params *connect,
 				const u8 *prev_bssid)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_bss *bss;
 	int err;
 
@@ -467,7 +469,7 @@
 	}
 
 	wdev->conn->params.ssid = wdev->ssid;
-	wdev->conn->params.ssid_len = connect->ssid_len;
+	wdev->conn->params.ssid_len = wdev->ssid_len;
 
 	/* see if we have the bss already */
 	bss = cfg80211_get_conn_bss(wdev);
@@ -479,7 +481,6 @@
 
 	/* we're good if we have a matching bss struct */
 	if (bss) {
-		wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
 		err = cfg80211_conn_do_work(wdev);
 		cfg80211_put_bss(wdev->wiphy, bss);
 	} else {
@@ -505,7 +506,7 @@
 
 static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int err;
 
 	if (!wdev->conn)
@@ -593,7 +594,7 @@
 		return;
 	}
 
-	nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev,
+	nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev,
 				    bssid, req_ie, req_ie_len,
 				    resp_ie, resp_ie_len,
 				    status, GFP_KERNEL);
@@ -624,7 +625,7 @@
 #endif
 
 	if (!bss && (status == WLAN_STATUS_SUCCESS)) {
-		WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect);
+		WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
 		bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
 				       wdev->ssid, wdev->ssid_len,
 				       WLAN_CAPABILITY_ESS,
@@ -687,7 +688,7 @@
 			     u16 status, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_event *ev;
 	unsigned long flags;
 
@@ -742,7 +743,8 @@
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
-	nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid,
+	nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
+			    wdev->netdev, bss->bssid,
 			    req_ie, req_ie_len, resp_ie, resp_ie_len,
 			    GFP_KERNEL);
 
@@ -801,7 +803,7 @@
 			 size_t resp_ie_len, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_event *ev;
 	unsigned long flags;
 
@@ -834,7 +836,7 @@
 			     size_t ie_len, u16 reason, bool from_ap)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int i;
 #ifdef CONFIG_CFG80211_WEXT
 	union iwreq_data wrqu;
@@ -877,10 +879,10 @@
 }
 
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-			   u8 *ie, size_t ie_len, gfp_t gfp)
+			   const u8 *ie, size_t ie_len, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_event *ev;
 	unsigned long flags;
 

diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index aabccf1..560ed77 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h

@@ -1876,29 +1876,33 @@
 		WIPHY_ENTRY
 		NETDEV_ENTRY
 		CHAN_DEF_ENTRY
-		__field(u16, counter_offset_beacon)
-		__field(u16, counter_offset_presp)
 		__field(bool, radar_required)
 		__field(bool, block_tx)
 		__field(u8, count)
+		__dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
+		__dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		NETDEV_ASSIGN;
 		CHAN_DEF_ASSIGN(&params->chandef);
-		__entry->counter_offset_beacon = params->counter_offset_beacon;
-		__entry->counter_offset_presp = params->counter_offset_presp;
 		__entry->radar_required = params->radar_required;
 		__entry->block_tx = params->block_tx;
 		__entry->count = params->count;
+		memcpy(__get_dynamic_array(bcn_ofs),
+		       params->counter_offsets_beacon,
+		       params->n_counter_offsets_beacon * sizeof(u16));
+
+		/* probe response offsets are optional */
+		if (params->n_counter_offsets_presp)
+			memcpy(__get_dynamic_array(pres_ofs),
+			       params->counter_offsets_presp,
+			       params->n_counter_offsets_presp * sizeof(u16));
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
-		  ", block_tx: %d, count: %u, radar_required: %d"
-		  ", counter offsets (beacon/presp): %u/%u",
+		  ", block_tx: %d, count: %u, radar_required: %d",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
-		  __entry->block_tx, __entry->count, __entry->radar_required,
-		  __entry->counter_offset_beacon,
-		  __entry->counter_offset_presp)
+		  __entry->block_tx, __entry->count, __entry->radar_required)
 );
 
 TRACE_EVENT(rdev_set_qos_map,
@@ -1919,6 +1923,24 @@
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des)
 );
 
+TRACE_EVENT(rdev_set_ap_chanwidth,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_chan_def *chandef),
+	TP_ARGS(wiphy, netdev, chandef),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		CHAN_DEF_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		CHAN_DEF_ASSIGN(chandef);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
@@ -2193,18 +2215,21 @@
 );
 
 TRACE_EVENT(cfg80211_reg_can_beacon,
-	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
-	TP_ARGS(wiphy, chandef),
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
+		 enum nl80211_iftype iftype),
+	TP_ARGS(wiphy, chandef, iftype),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		CHAN_DEF_ENTRY
+		__field(enum nl80211_iftype, iftype)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		CHAN_DEF_ASSIGN(chandef);
+		__entry->iftype = iftype;
 	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
-		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d",
+		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype)
 );
 
 TRACE_EVENT(cfg80211_chandef_dfs_required,
@@ -2615,6 +2640,21 @@
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
 );
 
+TRACE_EVENT(cfg80211_stop_iface,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+		  WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH

diff --git a/net/wireless/util.c b/net/wireless/util.c
index e5872ff..728f1c0 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c

@@ -476,7 +476,8 @@
 EXPORT_SYMBOL(ieee80211_data_to_8023);
 
 int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
-			     enum nl80211_iftype iftype, u8 *bssid, bool qos)
+			     enum nl80211_iftype iftype,
+			     const u8 *bssid, bool qos)
 {
 	struct ieee80211_hdr hdr;
 	u16 hdrlen, ethertype;
@@ -770,7 +771,7 @@
 
 void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct net_device *dev = wdev->netdev;
 	int i;
 
@@ -839,6 +840,9 @@
 			__cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
 					       ev->ij.channel);
 			break;
+		case EVENT_STOPPED:
+			__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
+			break;
 		}
 		wdev_unlock(wdev);
 
@@ -888,11 +892,6 @@
 		return -EBUSY;
 
 	if (ntype != otype && netif_running(dev)) {
-		err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
-						    ntype);
-		if (err)
-			return err;
-
 		dev->ieee80211_ptr->use_4addr = false;
 		dev->ieee80211_ptr->mesh_id_up_len = 0;
 		wdev_lock(dev->ieee80211_ptr);
@@ -1268,6 +1267,120 @@
 	return res;
 }
 
+int cfg80211_iter_combinations(struct wiphy *wiphy,
+			       const int num_different_channels,
+			       const u8 radar_detect,
+			       const int iftype_num[NUM_NL80211_IFTYPES],
+			       void (*iter)(const struct ieee80211_iface_combination *c,
+					    void *data),
+			       void *data)
+{
+	const struct ieee80211_regdomain *regdom;
+	enum nl80211_dfs_regions region = 0;
+	int i, j, iftype;
+	int num_interfaces = 0;
+	u32 used_iftypes = 0;
+
+	if (radar_detect) {
+		rcu_read_lock();
+		regdom = rcu_dereference(cfg80211_regdomain);
+		if (regdom)
+			region = regdom->dfs_region;
+		rcu_read_unlock();
+	}
+
+	for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+		num_interfaces += iftype_num[iftype];
+		if (iftype_num[iftype] > 0 &&
+		    !(wiphy->software_iftypes & BIT(iftype)))
+			used_iftypes |= BIT(iftype);
+	}
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct ieee80211_iface_limit *limits;
+		u32 all_iftypes = 0;
+
+		c = &wiphy->iface_combinations[i];
+
+		if (num_interfaces > c->max_interfaces)
+			continue;
+		if (num_different_channels > c->num_different_channels)
+			continue;
+
+		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
+				 GFP_KERNEL);
+		if (!limits)
+			return -ENOMEM;
+
+		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+			if (wiphy->software_iftypes & BIT(iftype))
+				continue;
+			for (j = 0; j < c->n_limits; j++) {
+				all_iftypes |= limits[j].types;
+				if (!(limits[j].types & BIT(iftype)))
+					continue;
+				if (limits[j].max < iftype_num[iftype])
+					goto cont;
+				limits[j].max -= iftype_num[iftype];
+			}
+		}
+
+		if (radar_detect != (c->radar_detect_widths & radar_detect))
+			goto cont;
+
+		if (radar_detect && c->radar_detect_regions &&
+		    !(c->radar_detect_regions & BIT(region)))
+			goto cont;
+
+		/* Finally check that all iftypes that we're currently
+		 * using are actually part of this combination. If they
+		 * aren't then we can't use this combination and have
+		 * to continue to the next.
+		 */
+		if ((all_iftypes & used_iftypes) != used_iftypes)
+			goto cont;
+
+		/* This combination covered all interface types and
+		 * supported the requested numbers, so we're good.
+		 */
+
+		(*iter)(c, data);
+ cont:
+		kfree(limits);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_iter_combinations);
+
+static void
+cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
+			  void *data)
+{
+	int *num = data;
+	(*num)++;
+}
+
+int cfg80211_check_combinations(struct wiphy *wiphy,
+				const int num_different_channels,
+				const u8 radar_detect,
+				const int iftype_num[NUM_NL80211_IFTYPES])
+{
+	int err, num = 0;
+
+	err = cfg80211_iter_combinations(wiphy, num_different_channels,
+					 radar_detect, iftype_num,
+					 cfg80211_iter_sum_ifcombs, &num);
+	if (err)
+		return err;
+	if (num == 0)
+		return -EBUSY;
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_check_combinations);
+
 int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
 				 enum nl80211_iftype iftype,
@@ -1276,7 +1389,6 @@
 				 u8 radar_detect)
 {
 	struct wireless_dev *wdev_iter;
-	u32 used_iftypes = BIT(iftype);
 	int num[NUM_NL80211_IFTYPES];
 	struct ieee80211_channel
 			*used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS];
@@ -1284,7 +1396,7 @@
 	enum cfg80211_chan_mode chmode;
 	int num_different_channels = 0;
 	int total = 1;
-	int i, j;
+	int i;
 
 	ASSERT_RTNL();
 
@@ -1306,6 +1418,11 @@
 
 	num[iftype] = 1;
 
+	/* TODO: We'll probably not need this anymore, since this
+	 * should only be called with CHAN_MODE_UNDEFINED. There are
+	 * still a couple of pending calls where other chanmodes are
+	 * used, but we should get rid of them.
+	 */
 	switch (chanmode) {
 	case CHAN_MODE_UNDEFINED:
 		break;
@@ -1369,65 +1486,13 @@
 
 		num[wdev_iter->iftype]++;
 		total++;
-		used_iftypes |= BIT(wdev_iter->iftype);
 	}
 
 	if (total == 1 && !radar_detect)
 		return 0;
 
-	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
-		const struct ieee80211_iface_combination *c;
-		struct ieee80211_iface_limit *limits;
-		u32 all_iftypes = 0;
-
-		c = &rdev->wiphy.iface_combinations[i];
-
-		if (total > c->max_interfaces)
-			continue;
-		if (num_different_channels > c->num_different_channels)
-			continue;
-
-		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
-				 GFP_KERNEL);
-		if (!limits)
-			return -ENOMEM;
-
-		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
-			if (rdev->wiphy.software_iftypes & BIT(iftype))
-				continue;
-			for (j = 0; j < c->n_limits; j++) {
-				all_iftypes |= limits[j].types;
-				if (!(limits[j].types & BIT(iftype)))
-					continue;
-				if (limits[j].max < num[iftype])
-					goto cont;
-				limits[j].max -= num[iftype];
-			}
-		}
-
-		if (radar_detect && !(c->radar_detect_widths & radar_detect))
-			goto cont;
-
-		/*
-		 * Finally check that all iftypes that we're currently
-		 * using are actually part of this combination. If they
-		 * aren't then we can't use this combination and have
-		 * to continue to the next.
-		 */
-		if ((all_iftypes & used_iftypes) != used_iftypes)
-			goto cont;
-
-		/*
-		 * This combination covered all interface types and
-		 * supported the requested numbers, so we're good.
-		 */
-		kfree(limits);
-		return 0;
- cont:
-		kfree(limits);
-	}
-
-	return -EBUSY;
+	return cfg80211_check_combinations(&rdev->wiphy, num_different_channels,
+					   radar_detect, num);
 }
 
 int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
@@ -1481,6 +1546,24 @@
 }
 EXPORT_SYMBOL(ieee80211_get_num_supported_channels);
 
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+			 struct station_info *sinfo)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	wdev = dev->ieee80211_ptr;
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	rdev = wiphy_to_rdev(wdev->wiphy);
+	if (!rdev->ops->get_station)
+		return -EOPNOTSUPP;
+
+	return rdev_get_station(rdev, dev, mac_addr, sinfo);
+}
+EXPORT_SYMBOL(cfg80211_get_station);
+
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
 const unsigned char rfc1042_header[] __aligned(2) =

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5661a54..11120bb 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c

@@ -73,7 +73,7 @@
 	struct vif_params vifparams;
 	enum nl80211_iftype type;
 
-	rdev = wiphy_to_dev(wdev->wiphy);
+	rdev = wiphy_to_rdev(wdev->wiphy);
 
 	switch (*mode) {
 	case IW_MODE_INFRA:
@@ -253,12 +253,12 @@
 
 /**
  * cfg80211_wext_freq - get wext frequency for non-"auto"
- * @wiphy: the wiphy
+ * @dev: the net device
  * @freq: the wext freq encoding
  *
  * Returns a frequency, or a negative error code, or 0 for auto.
  */
-int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
+int cfg80211_wext_freq(struct iw_freq *freq)
 {
 	/*
 	 * Parse frequency - return 0 for auto and
@@ -286,7 +286,7 @@
 			 struct iw_param *rts, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u32 orts = wdev->wiphy->rts_threshold;
 	int err;
 
@@ -324,7 +324,7 @@
 			  struct iw_param *frag, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u32 ofrag = wdev->wiphy->frag_threshold;
 	int err;
 
@@ -364,7 +364,7 @@
 				  struct iw_param *retry, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u32 changed = 0;
 	u8 olong = wdev->wiphy->retry_long;
 	u8 oshort = wdev->wiphy->retry_short;
@@ -587,7 +587,7 @@
 				   struct iw_point *erq, char *keybuf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int idx, err;
 	bool remove = false;
 	struct key_params params;
@@ -647,7 +647,7 @@
 				      struct iw_point *erq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
 	const u8 *addr;
 	int idx;
@@ -775,7 +775,7 @@
 				 struct iw_freq *wextfreq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_chan_def chandef = {
 		.width = NL80211_CHAN_WIDTH_20_NOHT,
 	};
@@ -787,7 +787,7 @@
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
 	case NL80211_IFTYPE_MONITOR:
-		freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+		freq = cfg80211_wext_freq(wextfreq);
 		if (freq < 0)
 			return freq;
 		if (freq == 0)
@@ -798,7 +798,7 @@
 			return -EINVAL;
 		return cfg80211_set_monitor_channel(rdev, &chandef);
 	case NL80211_IFTYPE_MESH_POINT:
-		freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+		freq = cfg80211_wext_freq(wextfreq);
 		if (freq < 0)
 			return freq;
 		if (freq == 0)
@@ -818,7 +818,7 @@
 				 struct iw_freq *freq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_chan_def chandef;
 	int ret;
 
@@ -847,7 +847,7 @@
 				    union iwreq_data *data, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	enum nl80211_tx_power_setting type;
 	int dbm = 0;
 
@@ -899,7 +899,7 @@
 				    union iwreq_data *data, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int err, val;
 
 	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -1119,7 +1119,7 @@
 				  struct iw_param *wrq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	bool ps = wdev->ps;
 	int timeout = wdev->ps_timeout;
 	int err;
@@ -1177,7 +1177,7 @@
 				   struct sockaddr *addr, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	int err;
 
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
@@ -1221,7 +1221,7 @@
 				 struct iw_param *rate, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_bitrate_mask mask;
 	u32 fixed, maxrate;
 	struct ieee80211_supported_band *sband;
@@ -1272,7 +1272,7 @@
 				 struct iw_param *rate, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	/* we are under RTNL - globally locked - so can use a static struct */
 	static struct station_info sinfo;
 	u8 addr[ETH_ALEN];
@@ -1310,7 +1310,7 @@
 static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	/* we are under RTNL - globally locked - so can use static structs */
 	static struct iw_statistics wstats;
 	static struct station_info sinfo;
@@ -1449,7 +1449,7 @@
 				  struct iw_point *data, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_pmksa cfg_pmksa;
 	struct iw_pmksa *pmksa = (struct iw_pmksa *)extra;
 

diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index 5d766b0..ebcacca 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h

@@ -50,7 +50,7 @@
 			   struct iw_point *data, char *extra);
 
 
-int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq);
+int cfg80211_wext_freq(struct iw_freq *freq);
 
 
 extern const struct iw_handler_def cfg80211_wext_handler;

diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 86c331a..c7e5c8e 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c

@@ -67,7 +67,7 @@
 			      struct iw_freq *wextfreq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_channel *chan = NULL;
 	int err, freq;
 
@@ -75,7 +75,7 @@
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return -EINVAL;
 
-	freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+	freq = cfg80211_wext_freq(wextfreq);
 	if (freq < 0)
 		return freq;
 
@@ -169,7 +169,7 @@
 			       struct iw_point *data, char *ssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	size_t len = data->length;
 	int err;
 
@@ -260,7 +260,7 @@
 			    struct sockaddr *ap_addr, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u8 *bssid = ap_addr->sa_data;
 	int err;
 
@@ -333,7 +333,7 @@
 			   struct iw_point *data, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	u8 *ie = extra;
 	int ie_len = data->length, err;
 
@@ -390,7 +390,7 @@
 	if (!wdev)
 		return -EOPNOTSUPP;
 
-	rdev = wiphy_to_dev(wdev->wiphy);
+	rdev = wiphy_to_rdev(wdev->wiphy);
 
 	if (wdev->iftype != NL80211_IFTYPE_STATION)
 		return -EINVAL;

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3bb2cdc..c51e8f7b 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c

@@ -199,6 +199,7 @@
 
 	return xfrm_output2(skb);
 }
+EXPORT_SYMBOL_GPL(xfrm_output);
 
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
@@ -213,6 +214,7 @@
 		return -EAFNOSUPPORT;
 	return inner_mode->afinfo->extract_output(x, skb);
 }
+EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
 
 void xfrm_local_error(struct sk_buff *skb, int mtu)
 {
@@ -233,7 +235,4 @@
 	afinfo->local_error(skb, mtu);
 	xfrm_state_put_afinfo(afinfo);
 }
-
-EXPORT_SYMBOL_GPL(xfrm_output);
-EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
 EXPORT_SYMBOL_GPL(xfrm_local_error);

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c08fbd1..a8ef510 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c

@@ -769,7 +769,7 @@
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 {
 	int dir, err = 0;
 
@@ -783,10 +783,7 @@
 				continue;
 			err = security_xfrm_policy_delete(pol->security);
 			if (err) {
-				xfrm_audit_policy_delete(pol, 0,
-							 audit_info->loginuid,
-							 audit_info->sessionid,
-							 audit_info->secid);
+				xfrm_audit_policy_delete(pol, 0, task_valid);
 				return err;
 			}
 		}
@@ -800,9 +797,7 @@
 								pol->security);
 				if (err) {
 					xfrm_audit_policy_delete(pol, 0,
-							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+								 task_valid);
 					return err;
 				}
 			}
@@ -812,19 +807,19 @@
 }
 #else
 static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 {
 	return 0;
 }
 #endif
 
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 {
 	int dir, err = 0, cnt = 0;
 
 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
 
-	err = xfrm_policy_flush_secctx_check(net, type, audit_info);
+	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
 	if (err)
 		goto out;
 
@@ -841,9 +836,7 @@
 			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 			cnt++;
 
-			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
-						 audit_info->sessionid,
-						 audit_info->secid);
+			xfrm_audit_policy_delete(pol, 1, task_valid);
 
 			xfrm_policy_kill(pol);
 
@@ -862,10 +855,7 @@
 				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 				cnt++;
 
-				xfrm_audit_policy_delete(pol, 1,
-							 audit_info->loginuid,
-							 audit_info->sessionid,
-							 audit_info->secid);
+				xfrm_audit_policy_delete(pol, 1, task_valid);
 				xfrm_policy_kill(pol);
 
 				write_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -2783,21 +2773,19 @@
 static int __net_init xfrm_statistics_init(struct net *net)
 {
 	int rv;
-
-	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib),
-			  __alignof__(struct linux_xfrm_mib)) < 0)
+	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
+	if (!net->mib.xfrm_statistics)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+		free_percpu(net->mib.xfrm_statistics);
 	return rv;
 }
 
 static void xfrm_statistics_fini(struct net *net)
 {
 	xfrm_proc_fini(net);
-	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+	free_percpu(net->mib.xfrm_statistics);
 }
 #else
 static int __net_init xfrm_statistics_init(struct net *net)
@@ -2862,21 +2850,14 @@
 
 static void xfrm_policy_fini(struct net *net)
 {
-	struct xfrm_audit audit_info;
 	unsigned int sz;
 	int dir;
 
 	flush_work(&net->xfrm.policy_hash_work);
 #ifdef CONFIG_XFRM_SUB_POLICY
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
-	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
+	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
 #endif
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
-	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
@@ -2991,15 +2972,14 @@
 	}
 }
 
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-			   kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
@@ -3007,14 +2987,14 @@
 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
 
 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-			      kuid_t auid, unsigned int sessionid, u32 secid)
+			      bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);

diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fc5abd0..9c4fbd8 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c

@@ -54,8 +54,7 @@
 	int i;
 	for (i = 0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   snmp_fold_field((void __percpu **)
-					   net->mib.xfrm_statistics,
+			   snmp_fold_field(net->mib.xfrm_statistics,
 					   xfrm_mib_list[i].entry));
 	return 0;
 }

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8e9c781..0ab5413 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c

@@ -463,9 +463,7 @@
 	if (!err)
 		km_state_expired(x, 1, 0);
 
-	xfrm_audit_state_delete(x, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 
 out:
 	spin_unlock(&x->lock);
@@ -562,7 +560,7 @@
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 {
 	int i, err = 0;
 
@@ -572,10 +570,7 @@
 		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
-				xfrm_audit_state_delete(x, 0,
-							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+				xfrm_audit_state_delete(x, 0, task_valid);
 				return err;
 			}
 		}
@@ -585,18 +580,18 @@
 }
 #else
 static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 {
 	return 0;
 }
 #endif
 
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
 {
 	int i, err = 0, cnt = 0;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
+	err = xfrm_state_flush_secctx_check(net, proto, task_valid);
 	if (err)
 		goto out;
 
@@ -612,9 +607,7 @@
 
 				err = xfrm_state_delete(x);
 				xfrm_audit_state_delete(x, err ? 0 : 1,
-							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+							task_valid);
 				xfrm_state_put(x);
 				if (!err)
 					cnt++;
@@ -2128,14 +2121,10 @@
 
 void xfrm_state_fini(struct net *net)
 {
-	struct xfrm_audit audit_info;
 	unsigned int sz;
 
 	flush_work(&net->xfrm.state_hash_work);
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
+	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	flush_work(&net->xfrm.state_gc_work);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
@@ -2198,30 +2187,28 @@
 	}
 }
 
-void xfrm_audit_state_add(struct xfrm_state *x, int result,
-			  kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
 
-void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-			     kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 51398ae..412d9dc 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -597,9 +597,6 @@
 	struct xfrm_state *x;
 	int err;
 	struct km_event c;
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	err = verify_newsa_info(p, attrs);
 	if (err)
@@ -615,8 +612,7 @@
 	else
 		err = xfrm_state_update(x);
 
-	security_task_getsecid(current, &sid);
-	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_state_add(x, err ? 0 : 1, true);
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -676,9 +672,6 @@
 	int err = -ESRCH;
 	struct km_event c;
 	struct xfrm_usersa_id *p = nlmsg_data(nlh);
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	x = xfrm_user_state_lookup(net, p, attrs, &err);
 	if (x == NULL)
@@ -703,8 +696,7 @@
 	km_state_notify(x, &c);
 
 out:
-	security_task_getsecid(current, &sid);
-	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 	xfrm_state_put(x);
 	return err;
 }
@@ -955,6 +947,20 @@
 	return skb;
 }
 
+/* A wrapper for nlmsg_multicast() checking that nlsk is still available.
+ * Must be called with RCU read lock.
+ */
+static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
+				       u32 pid, unsigned int group)
+{
+	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+	if (nlsk)
+		return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
+	else
+		return -1;
+}
+
 static inline size_t xfrm_spdinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
@@ -1414,9 +1420,6 @@
 	struct km_event c;
 	int err;
 	int excl;
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	err = verify_newpolicy_info(p);
 	if (err)
@@ -1435,8 +1438,7 @@
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
-	security_task_getsecid(current, &sid);
-	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_policy_add(xp, err ? 0 : 1, true);
 
 	if (err) {
 		security_xfrm_policy_free(xp->security);
@@ -1673,13 +1675,7 @@
 					    NETLINK_CB(skb).portid);
 		}
 	} else {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
-
-		security_task_getsecid(current, &sid);
-		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
-					 sid);
+		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 		if (err != 0)
 			goto out;
@@ -1704,13 +1700,9 @@
 	struct net *net = sock_net(skb->sk);
 	struct km_event c;
 	struct xfrm_usersa_flush *p = nlmsg_data(nlh);
-	struct xfrm_audit audit_info;
 	int err;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &audit_info.secid);
-	err = xfrm_state_flush(net, p->proto, &audit_info);
+	err = xfrm_state_flush(net, p->proto, true);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
@@ -1894,16 +1886,12 @@
 	struct km_event c;
 	u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err;
-	struct xfrm_audit audit_info;
 
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
 		return err;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &audit_info.secid);
-	err = xfrm_policy_flush(net, type, &audit_info);
+	err = xfrm_policy_flush(net, type, true);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
@@ -1969,14 +1957,8 @@
 
 	err = 0;
 	if (up->hard) {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
-
-		security_task_getsecid(current, &sid);
 		xfrm_policy_delete(xp, p->dir);
-		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
-
+		xfrm_audit_policy_delete(xp, 1, true);
 	} else {
 		// reset the timers here?
 		WARN(1, "Dont know what to do with soft policy expire\n");
@@ -2012,13 +1994,8 @@
 	km_state_expired(x, ue->hard, nlh->nlmsg_pid);
 
 	if (ue->hard) {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
-
-		security_task_getsecid(current, &sid);
 		__xfrm_state_delete(x);
-		xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
+		xfrm_audit_state_delete(x, 1, true);
 	}
 	err = 0;
 out:
@@ -2265,7 +2242,7 @@
 	if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
 }
 #else
 static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
@@ -2456,7 +2433,7 @@
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
 }
 
 static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2471,7 +2448,7 @@
 	if (build_aevent(skb, x, c) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
 }
 
 static int xfrm_notify_sa_flush(const struct km_event *c)
@@ -2497,7 +2474,7 @@
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
 }
 
 static inline size_t xfrm_sa_len(struct xfrm_state *x)
@@ -2584,7 +2561,7 @@
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
 
 out_free_skb:
 	kfree_skb(skb);
@@ -2675,7 +2652,7 @@
 	if (build_acquire(skb, x, xt, xp) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
 }
 
 /* User gives us xfrm_user_policy_info followed by an array of 0
@@ -2789,7 +2766,7 @@
 	if (build_polexpire(skb, xp, dir, c) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
 }
 
 static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2851,7 +2828,7 @@
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
 
 out_free_skb:
 	kfree_skb(skb);
@@ -2879,7 +2856,7 @@
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
 
 out_free_skb:
 	kfree_skb(skb);
@@ -2948,7 +2925,7 @@
 	if (build_report(skb, proto, sel, addr) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
 }
 
 static inline size_t xfrm_mapping_msgsize(void)
@@ -3000,7 +2977,7 @@
 	if (build_mapping(skb, x, ipaddr, sport) < 0)
 		BUG();
 
-	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
 }
 
 static bool xfrm_is_alive(const struct km_event *c)

diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 86ea0c3..01562e0 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c

@@ -40,8 +40,9 @@
 	return count;
 }
 
+/* Sysfs attributes cannot be world-writable. */
 static struct kobj_attribute foo_attribute =
-	__ATTR(foo, 0666, foo_show, foo_store);
+	__ATTR(foo, 0664, foo_show, foo_store);
 
 /*
  * More complex function where we determine which variable is being accessed by
@@ -73,9 +74,9 @@
 }
 
 static struct kobj_attribute baz_attribute =
-	__ATTR(baz, 0666, b_show, b_store);
+	__ATTR(baz, 0664, b_show, b_store);
 static struct kobj_attribute bar_attribute =
-	__ATTR(bar, 0666, b_show, b_store);
+	__ATTR(bar, 0664, b_show, b_store);
 
 
 /*

diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 5dce351..ab5e447 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c

@@ -124,8 +124,9 @@
 	return count;
 }
 
+/* Sysfs attributes cannot be world-writable. */
 static struct foo_attribute foo_attribute =
-	__ATTR(foo, 0666, foo_show, foo_store);
+	__ATTR(foo, 0664, foo_show, foo_store);
 
 /*
  * More complex function where we determine which variable is being accessed by
@@ -157,9 +158,9 @@
 }
 
 static struct foo_attribute baz_attribute =
-	__ATTR(baz, 0666, b_show, b_store);
+	__ATTR(baz, 0664, b_show, b_store);
 static struct foo_attribute bar_attribute =
-	__ATTR(bar, 0666, b_show, b_store);
+	__ATTR(bar, 0664, b_show, b_store);
 
 /*
  * Create a group of attributes so that we can create and destroy them all

diff --git a/scripts/Makefile b/scripts/Makefile
index 1d07860..890df5c 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile

@@ -39,4 +39,4 @@
 subdir-$(CONFIG_DTC)         += dtc
 
 # Let clean descend into subdirs
-subdir-	+= basic kconfig package selinux
+subdir-	+= basic kconfig package

diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index d17e0ea..045e0098 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic

@@ -21,4 +21,3 @@
 
 $(obj)/%.h:
 	$(call cmd,wrap)
-

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 003bc26..bf3e677 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build

@@ -50,67 +50,6 @@
         endif
 endif
 
-#
-# make W=... settings
-#
-# W=1 - warnings that may be relevant and does not occur too often
-# W=2 - warnings that occur quite often but may still be relevant
-# W=3 - the more obscure warnings, can most likely be ignored
-#
-# $(call cc-option, -W...) handles gcc -W.. options which
-# are not supported by all versions of the compiler
-ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
-warning-  := $(empty)
-
-warning-1 := -Wextra -Wunused -Wno-unused-parameter
-warning-1 += -Wmissing-declarations
-warning-1 += -Wmissing-format-attribute
-warning-1 += $(call cc-option, -Wmissing-prototypes)
-warning-1 += -Wold-style-definition
-warning-1 += $(call cc-option, -Wmissing-include-dirs)
-warning-1 += $(call cc-option, -Wunused-but-set-variable)
-warning-1 += $(call cc-disable-warning, missing-field-initializers)
-
-# Clang
-warning-1 += $(call cc-disable-warning, initializer-overrides)
-warning-1 += $(call cc-disable-warning, unused-value)
-warning-1 += $(call cc-disable-warning, format)
-warning-1 += $(call cc-disable-warning, unknown-warning-option)
-warning-1 += $(call cc-disable-warning, sign-compare)
-warning-1 += $(call cc-disable-warning, format-zero-length)
-warning-1 += $(call cc-disable-warning, uninitialized)
-warning-1 += $(call cc-option, -fcatch-undefined-behavior)
-
-warning-2 := -Waggregate-return
-warning-2 += -Wcast-align
-warning-2 += -Wdisabled-optimization
-warning-2 += -Wnested-externs
-warning-2 += -Wshadow
-warning-2 += $(call cc-option, -Wlogical-op)
-warning-2 += $(call cc-option, -Wmissing-field-initializers)
-
-warning-3 := -Wbad-function-cast
-warning-3 += -Wcast-qual
-warning-3 += -Wconversion
-warning-3 += -Wpacked
-warning-3 += -Wpadded
-warning-3 += -Wpointer-arith
-warning-3 += -Wredundant-decls
-warning-3 += -Wswitch-default
-warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
-warning-3 += $(call cc-option, -Wvla)
-
-warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-
-ifeq ("$(strip $(warning))","")
-        $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
-endif
-
-KBUILD_CFLAGS += $(warning)
-endif
-
 include scripts/Makefile.lib
 
 ifdef host-progs
@@ -342,7 +281,7 @@
 $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
 
 quiet_cmd_as_s_S = CPP $(quiet_modtag) $@
-cmd_as_s_S       = $(CPP) $(a_flags)   -o $@ $< 
+cmd_as_s_S       = $(CPP) $(a_flags)   -o $@ $<
 
 $(obj)/%.s: $(src)/%.S FORCE
 	$(call if_changed_dep,as_s_S)
@@ -436,7 +375,7 @@
 $(filter $(addprefix $(obj)/,         \
 $($(subst $(obj)/,,$(@:.o=-objs)))    \
 $($(subst $(obj)/,,$(@:.o=-y)))), $^)
- 
+
 quiet_cmd_link_multi-y = LD      $@
 cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
 

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
new file mode 100644
index 0000000..6564350
--- /dev/null
+++ b/scripts/Makefile.extrawarn

@@ -0,0 +1,67 @@
+# ==========================================================================
+#
+# make W=... settings
+#
+# W=1 - warnings that may be relevant and does not occur too often
+# W=2 - warnings that occur quite often but may still be relevant
+# W=3 - the more obscure warnings, can most likely be ignored
+#
+# $(call cc-option, -W...) handles gcc -W.. options which
+# are not supported by all versions of the compiler
+# ==========================================================================
+
+ifeq ("$(origin W)", "command line")
+  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
+endif
+
+ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
+warning-  := $(empty)
+
+warning-1 := -Wextra -Wunused -Wno-unused-parameter
+warning-1 += -Wmissing-declarations
+warning-1 += -Wmissing-format-attribute
+warning-1 += $(call cc-option, -Wmissing-prototypes)
+warning-1 += -Wold-style-definition
+warning-1 += $(call cc-option, -Wmissing-include-dirs)
+warning-1 += $(call cc-option, -Wunused-but-set-variable)
+warning-1 += $(call cc-disable-warning, missing-field-initializers)
+
+# Clang
+warning-1 += $(call cc-disable-warning, initializer-overrides)
+warning-1 += $(call cc-disable-warning, unused-value)
+warning-1 += $(call cc-disable-warning, format)
+warning-1 += $(call cc-disable-warning, unknown-warning-option)
+warning-1 += $(call cc-disable-warning, sign-compare)
+warning-1 += $(call cc-disable-warning, format-zero-length)
+warning-1 += $(call cc-disable-warning, uninitialized)
+warning-1 += $(call cc-option, -fcatch-undefined-behavior)
+
+warning-2 := -Waggregate-return
+warning-2 += -Wcast-align
+warning-2 += -Wdisabled-optimization
+warning-2 += -Wnested-externs
+warning-2 += -Wshadow
+warning-2 += $(call cc-option, -Wlogical-op)
+warning-2 += $(call cc-option, -Wmissing-field-initializers)
+
+warning-3 := -Wbad-function-cast
+warning-3 += -Wcast-qual
+warning-3 += -Wconversion
+warning-3 += -Wpacked
+warning-3 += -Wpadded
+warning-3 += -Wpointer-arith
+warning-3 += -Wredundant-decls
+warning-3 += -Wswitch-default
+warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
+warning-3 += $(call cc-option, -Wvla)
+
+warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+
+ifeq ("$(strip $(warning))","")
+        $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
+endif
+
+KBUILD_CFLAGS += $(warning)
+endif

diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index 4d908d1..d8e335e 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst

@@ -18,31 +18,29 @@
 include scripts/Makefile.host
 
 mod-fw := $(fw-shipped-m)
-# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the 
+# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the
 # firmware for in-kernel drivers too.
 ifndef CONFIG_FIRMWARE_IN_KERNEL
 mod-fw += $(fw-shipped-y)
 endif
 
+ifneq ($(KBUILD_SRC),)
+# Create output directory if not already present
+_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj))
+
+firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all))))
+# Create directories for firmware in subdirectories
+_dummy := $(foreach d,$(firmware-dirs), $(shell [ -d $(d) ] || mkdir -p $(d)))
+endif
+
 installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
 
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
-installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./
-
-# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
-PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
-$(INSTALL_FW_PATH)/$$(%): install-all-dirs
-	@true
-install-all-dirs: $(installed-fw-dirs)
-	@true
 
 quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
-      cmd_install = $(INSTALL) -m0644 $< $@
+      cmd_install = mkdir -p $(@D); $(INSTALL) -m0644 $< $@
 
-$(installed-fw-dirs):
-	$(call cmd,mkdir)
-
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/%
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE

diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 1ac414f..6689364 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host

@@ -166,5 +166,4 @@
 	$(call if_changed,host-cshlib)
 
 targets += $(host-csingle)  $(host-cmulti) $(host-cobjs)\
-	   $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs) 
-
+	   $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6a5b0de..260bf8a 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib

@@ -27,7 +27,7 @@
 # ---------------------------------------------------------------------------
 # o if we encounter foo/ in $(obj-y), replace it by foo/built-in.o
 #   and add the directory to the list of dirs to descend into: $(subdir-y)
-# o if we encounter foo/ in $(obj-m), remove it from $(obj-m) 
+# o if we encounter foo/ in $(obj-m), remove it from $(obj-m)
 #   and add the directory to the list of dirs to descend into: $(subdir-m)
 
 # Determine modorder.
@@ -46,7 +46,7 @@
 
 subdir-ym	:= $(sort $(subdir-y) $(subdir-m))
 
-# if $(foo-objs) exists, foo.o is a composite object 
+# if $(foo-objs) exists, foo.o is a composite object
 multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
 multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
 multi-used   := $(multi-used-y) $(multi-used-m)
@@ -91,7 +91,7 @@
 
 # These flags are needed for modversions and compiling, so we define them here
 # already
-# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will 
+# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will
 # end up in (or would, if it gets compiled in)
 # Note: Files that end up in two or more modules are compiled without the
 #       KBUILD_MODNAME definition. The reason is that any made-up name would
@@ -212,7 +212,7 @@
 
 # Commands useful for building a boot image
 # ===========================================================================
-# 
+#
 #	Use as following:
 #
 #	target: source(s) FORCE
@@ -226,7 +226,7 @@
 
 quiet_cmd_ld = LD      $@
 cmd_ld = $(LD) $(LDFLAGS) $(ldflags-y) $(LDFLAGS_$(@F)) \
-	       $(filter-out FORCE,$^) -o $@ 
+	       $(filter-out FORCE,$^) -o $@
 
 # Objcopy
 # ---------------------------------------------------------------------------

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 078fe1d..b304068 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c

@@ -409,10 +409,10 @@
 		exit(2);
 	}
 	if (fstat(fd, &st) < 0) {
-                fprintf(stderr, "fixdep: error fstat'ing depfile: ");
-                perror(depfile);
-                exit(2);
-        }
+		fprintf(stderr, "fixdep: error fstat'ing depfile: ");
+		perror(depfile);
+		exit(2);
+	}
 	if (st.st_size == 0) {
 		fprintf(stderr,"fixdep: %s is empty\n",depfile);
 		close(fd);

diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 544aa56..c05d586 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl

@@ -173,4 +173,3 @@
 
 # Sort output by size (last field)
 print sort { ($b =~ /:\t*(\d+)$/)[0] <=> ($a =~ /:\t*(\d+)$/)[0] } @stack;
-

diff --git a/scripts/coccinelle/misc/of_table.cocci b/scripts/coccinelle/misc/of_table.cocci
new file mode 100644
index 0000000..3c93404
--- /dev/null
+++ b/scripts/coccinelle/misc/of_table.cocci

@@ -0,0 +1,62 @@
+/// Make sure of_device_id tables are NULL terminated
+//
+// Keywords: of_table
+// Confidence: Medium
+// Options: --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@depends on context@
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+*	}
+};
+
+@depends on patch@
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+-	}
++	},
++	{ }
+};
+
+@r depends on org || report@
+position p1;
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+	}
+	@p1
+};
+
+@script:python depends on org@
+p1 << r.p1;
+arr << r.arr;
+@@
+
+cocci.print_main(arr,p1)
+
+@script:python depends on report@
+p1 << r.p1;
+arr << r.arr;
+@@
+
+msg = "%s is not NULL terminated at line %s" % (arr, p1[0].line)
+coccilib.report.print_report(p1[0],msg)

diff --git a/scripts/coccinelle/misc/returnvar.cocci b/scripts/coccinelle/misc/returnvar.cocci
new file mode 100644
index 0000000..605955a
--- /dev/null
+++ b/scripts/coccinelle/misc/returnvar.cocci

@@ -0,0 +1,66 @@
+///
+/// Removes unneeded variable used to store return value.
+///
+// Confidence: Moderate
+// Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6.  GPLv2.
+// URL: http://coccinelle.lip6.fr/
+// Comments: Comments on code can be deleted if near code that is removed.
+//           "when strict" can be removed to get more hits, but adds false
+//           positives
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual report
+virtual context
+virtual org
+
+@depends on patch@
+type T;
+constant C;
+identifier ret;
+@@
+- T ret = C;
+... when != ret
+    when strict
+return
+- ret
++ C
+;
+
+@depends on context@
+type T;
+constant C;
+identifier ret;
+@@
+* T ret = C;
+... when != ret
+    when strict
+* return ret;
+
+@r1 depends on report || org@
+type T;
+constant C;
+identifier ret;
+position p1, p2;
+@@
+T ret@p1 = C;
+... when != ret
+    when strict
+return ret@p2;
+
+@script:python depends on report@
+p1 << r1.p1;
+p2 << r1.p2;
+C << r1.C;
+ret << r1.ret;
+@@
+coccilib.report.print_report(p1[0], "Unneeded variable: \"" + ret + "\". Return \"" + C + "\" on line " + p2[0].line)
+
+@script:python depends on org@
+p1 << r1.p1;
+p2 << r1.p2;
+C << r1.C;
+ret << r1.ret;
+@@
+cocci.print_main("unneeded \"" + ret + "\" variable", p1)
+cocci.print_sec("return " + C + " here", p2)

diff --git a/scripts/config b/scripts/config
index 6804179..026aeb4 100755
--- a/scripts/config
+++ b/scripts/config

@@ -223,4 +223,3 @@
 		;;
 	esac
 done
-

diff --git a/scripts/conmakehash.c b/scripts/conmakehash.c
index 263a44d..61bbda5 100644
--- a/scripts/conmakehash.c
+++ b/scripts/conmakehash.c

@@ -104,7 +104,7 @@
 	}
     }
 
-  /* For now we assume the default font is always 256 characters. */    
+  /* For now we assume the default font is always 256 characters. */
   fontlen = 256;
 
   /* Initialize table */
@@ -236,15 +236,15 @@
     }
 
   /* Okay, we hit EOF, now output hash table */
-  
+
   fclose(ctbl);
-  
+
 
   /* Compute total size of Unicode list */
   nuni = 0;
   for ( i = 0 ; i < fontlen ; i++ )
     nuni += unicount[i];
-  
+
   printf("\
 /*\n\
  * Do not edit this file; it was automatically generated by\n\
@@ -268,9 +268,9 @@
       else
         printf(", ");
     }
-  
+
   printf("\nu16 dfont_unitable[%d] = \n{\n\t", nuni);
-  
+
   fp0 = 0;
   nent = 0;
   for ( i = 0 ; i < nuni ; i++ )

diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
new file mode 100755
index 0000000..515c4c0
--- /dev/null
+++ b/scripts/decode_stacktrace.sh

@@ -0,0 +1,126 @@
+#!/bin/bash
+# (c) 2014, Sasha Levin <sasha.levin@oracle.com>
+#set -x
+
+if [[ $# != 2 ]]; then
+	echo "Usage:"
+	echo "	$0 [vmlinux] [base path]"
+	exit 1
+fi
+
+vmlinux=$1
+basepath=$2
+declare -A cache
+
+parse_symbol() {
+	# The structure of symbol at this point is:
+	#   [name]+[offset]/[total length]
+	#
+	# For example:
+	#   do_basic_setup+0x9c/0xbf
+
+
+	# Strip the symbol name so that we could look it up
+	local name=${symbol%+*}
+
+	# Use 'nm vmlinux' to figure out the base address of said symbol.
+	# It's actually faster to call it every time than to load it
+	# all into bash.
+	if [[ "${cache[$name]+isset}" == "isset" ]]; then
+		local base_addr=${cache[$name]}
+	else
+		local base_addr=$(nm "$vmlinux" | grep -i ' t ' | awk "/ $name\$/ {print \$1}" | head -n1)
+		cache["$name"]="$base_addr"
+	fi
+	# Let's start doing the math to get the exact address into the
+	# symbol. First, strip out the symbol total length.
+	local expr=${symbol%/*}
+
+	# Now, replace the symbol name with the base address we found
+	# before.
+	expr=${expr/$name/0x$base_addr}
+
+	# Evaluate it to find the actual address
+	expr=$((expr))
+	local address=$(printf "%x\n" "$expr")
+
+	# Pass it to addr2line to get filename and line number
+        # Could get more than one result
+	if [[ "${cache[$address]+isset}" == "isset" ]]; then
+		local code=${cache[$address]}
+	else
+		local code=$(addr2line -i -e "$vmlinux" "$address")
+		cache[$address]=$code
+	fi
+
+	# addr2line doesn't return a proper error code if it fails, so
+	# we detect it using the value it prints so that we could preserve
+	# the offset/size into the function and bail out
+	if [[ $code == "??:0" ]]; then
+		return
+	fi
+
+	# Strip out the base of the path
+	code=${code//$basepath/""}
+
+	# In the case of inlines, move everything to same line
+	code=${code//$'\n'/' '}
+
+	# Replace old address with pretty line numbers
+	symbol="$name ($code)"
+}
+
+decode_code() {
+	local scripts=`dirname "${BASH_SOURCE[0]}"`
+
+	echo "$1" | $scripts/decodecode
+}
+
+handle_line() {
+	local words
+
+	# Tokenize
+	read -a words <<<"$1"
+
+	# Remove hex numbers. Do it ourselves until it happens in the
+	# kernel
+
+	# We need to know the index of the last element before we
+	# remove elements because arrays are sparse
+	local last=$(( ${#words[@]} - 1 ))
+
+	for i in "${!words[@]}"; do
+		# Remove the address
+		if [[ ${words[$i]} =~ \[\<([^]]+)\>\] ]]; then
+			unset words[$i]
+		fi
+
+		# Format timestamps with tabs
+		if [[ ${words[$i]} == \[ && ${words[$i+1]} == *\] ]]; then
+			unset words[$i]
+			words[$i+1]=$(printf "[%13s\n" "${words[$i+1]}")
+		fi
+	done
+
+	# The symbol is the last element, process it
+	symbol=${words[$last]}
+	unset words[$last]
+	parse_symbol # modifies $symbol
+
+	# Add up the line number to the symbol
+	echo "${words[@]}" "$symbol"
+}
+
+while read line; do
+	# Let's see if we have an address in the line
+	if [[ $line =~ \[\<([^]]+)\>\]  ]]; then
+		# Translate address to line numbers
+		handle_line "$line"
+	# Is it a code line?
+	elif [[ $line == *Code:* ]]; then
+                decode_code "$line"
+        else
+		# Nothing special in this line, show it as is
+		echo "$line"
+	fi
+done

diff --git a/scripts/docproc.c b/scripts/docproc.c
index 2b69eaf..e267e621 100644
--- a/scripts/docproc.c
+++ b/scripts/docproc.c

@@ -154,7 +154,7 @@
 static void add_new_symbol(struct symfile *sym, char * symname)
 {
 	sym->symbollist =
-          realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
+	  realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
 	sym->symbollist[sym->symbolcnt++].name = strdup(symname);
 }
 
@@ -215,7 +215,7 @@
 			char *p;
 			char *e;
 			if (((p = strstr(line, "EXPORT_SYMBOL_GPL")) != NULL) ||
-                            ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) {
+			    ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) {
 				/* Skip EXPORT_SYMBOL{_GPL} */
 				while (isalnum(*p) || *p == '_')
 					p++;
@@ -291,28 +291,28 @@
 static void singfunc(char * filename, char * line)
 {
 	char *vec[200]; /* Enough for specific functions */
-        int i, idx = 0;
-        int startofsym = 1;
+	int i, idx = 0;
+	int startofsym = 1;
 	vec[idx++] = KERNELDOC;
 	vec[idx++] = DOCBOOK;
 	vec[idx++] = SHOWNOTFOUND;
 
-        /* Split line up in individual parameters preceded by FUNCTION */
-        for (i=0; line[i]; i++) {
-                if (isspace(line[i])) {
-                        line[i] = '\0';
-                        startofsym = 1;
-                        continue;
-                }
-                if (startofsym) {
-                        startofsym = 0;
-                        vec[idx++] = FUNCTION;
-                        vec[idx++] = &line[i];
-                }
-        }
+	/* Split line up in individual parameters preceded by FUNCTION */
+	for (i=0; line[i]; i++) {
+		if (isspace(line[i])) {
+			line[i] = '\0';
+			startofsym = 1;
+			continue;
+		}
+		if (startofsym) {
+			startofsym = 0;
+			vec[idx++] = FUNCTION;
+			vec[idx++] = &line[i];
+		}
+	}
 	for (i = 0; i < idx; i++) {
-        	if (strcmp(vec[i], FUNCTION))
-        		continue;
+		if (strcmp(vec[i], FUNCTION))
+			continue;
 		consume_symbol(vec[i + 1]);
 	}
 	vec[idx++] = filename;
@@ -460,14 +460,14 @@
 					break;
 				case 'D':
 					while (*s && !isspace(*s)) s++;
-                                        *s = '\0';
-                                        symbolsonly(line+2);
-                                        break;
+					*s = '\0';
+					symbolsonly(line+2);
+					break;
 				case 'F':
 					/* filename */
 					while (*s && !isspace(*s)) s++;
 					*s++ = '\0';
-                                        /* function names */
+					/* function names */
 					while (isspace(*s))
 						s++;
 					singlefunctions(line +2, s);
@@ -515,11 +515,11 @@
 	}
 	/* Open file, exit on error */
 	infile = fopen(argv[2], "r");
-        if (infile == NULL) {
-                fprintf(stderr, "docproc: ");
-                perror(argv[2]);
-                exit(2);
-        }
+	if (infile == NULL) {
+		fprintf(stderr, "docproc: ");
+		perror(argv[2]);
+		exit(2);
+	}
 
 	if (strcmp("doc", argv[1]) == 0) {
 		/* Need to do this in two passes.

diff --git a/scripts/dtc/.gitignore b/scripts/dtc/.gitignore
index 095acb4..cdabdc9 100644
--- a/scripts/dtc/.gitignore
+++ b/scripts/dtc/.gitignore

@@ -2,4 +2,3 @@
 dtc-lexer.lex.c
 dtc-parser.tab.c
 dtc-parser.tab.h
-

diff --git a/scripts/dtc/fstree.c b/scripts/dtc/fstree.c
index f377453..e464727 100644
--- a/scripts/dtc/fstree.c
+++ b/scripts/dtc/fstree.c

@@ -88,4 +88,3 @@
 
 	return build_boot_info(NULL, tree, guess_boot_cpuid(tree));
 }
-

diff --git a/scripts/dtc/libfdt/fdt_empty_tree.c b/scripts/dtc/libfdt/fdt_empty_tree.c
index f72d13b..f2ae9b7 100644
--- a/scripts/dtc/libfdt/fdt_empty_tree.c
+++ b/scripts/dtc/libfdt/fdt_empty_tree.c

@@ -81,4 +81,3 @@
 
 	return fdt_open_into(buf, buf, bufsize);
 }
-

diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c
index 33eeba5..5740e69 100644
--- a/scripts/dtc/treesource.c
+++ b/scripts/dtc/treesource.c

@@ -281,4 +281,3 @@
 
 	write_tree_source_node(f, bi->dt, 0);
 }
-

diff --git a/scripts/headers.sh b/scripts/headers.sh
index 978b42b..95ece06 100755
--- a/scripts/headers.sh
+++ b/scripts/headers.sh

@@ -28,5 +28,3 @@
 		;;
 	esac
 done
-
-

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 1237dd7..dc7aa45 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c

@@ -123,7 +123,7 @@
 	}
 	if (strlen(str) > KSYM_NAME_LEN) {
 		fprintf(stderr, "Symbol %s too long for kallsyms (%zu vs %d).\n"
-                                "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
+				"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
 			str, strlen(str), KSYM_NAME_LEN);
 		return -1;
 	}

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 844bc9d..9c4d241 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile

@@ -33,11 +33,11 @@
 	$< --$@ $(Kconfig)
 
 silentoldconfig: $(obj)/conf
-	$(Q)mkdir -p include/generated
+	$(Q)mkdir -p include/config include/generated
 	$< --$@ $(Kconfig)
 
 localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf
-	$(Q)mkdir -p include/generated
+	$(Q)mkdir -p include/config include/generated
 	$(Q)perl $< --$@ $(srctree) $(Kconfig) > .tmp.config
 	$(Q)if [ -f .config ]; then 					\
 			cmp -s .tmp.config .config ||			\
@@ -319,4 +319,3 @@
 $(obj)/gconf.glade.h: $(obj)/gconf.glade
 	$(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \
 	$(obj)/gconf.glade
-

diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
index 854d9c7..55b79ba 100755
--- a/scripts/kconfig/check.sh
+++ b/scripts/kconfig/check.sh

@@ -11,4 +11,3 @@
 if [ ! "$?" -eq "0"  ]; then
 	echo -DKBUILD_NO_NLS;
 fi
-

diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index d19944f..fef75fc 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c

@@ -696,7 +696,7 @@
 	} else if (input_mode == savedefconfig) {
 		if (conf_write_defconfig(defconfig_file)) {
 			fprintf(stderr, _("n*** Error while saving defconfig to: %s\n\n"),
-			        defconfig_file);
+				defconfig_file);
 			return 1;
 		}
 	} else if (input_mode != listnewconfig) {

diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index f2bee70..d0a35b2 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c

@@ -1404,7 +1404,7 @@
 		    && (tree == tree2))
 			continue;
 /*
-                if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT))
+		if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT))
 		    || (view_mode == FULL_VIEW)
 		    || (view_mode == SPLIT_VIEW))*/
 

diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index 3b15c08..8d016fa 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c

@@ -168,13 +168,13 @@
 
 	/* create new window for the list */
 	list = subwin(dialog, list_height, list_width, y + box_y + 1,
-	              x + box_x + 1);
+		      x + box_x + 1);
 
 	keypad(list, TRUE);
 
 	/* draw a box around the list items */
 	draw_box(dialog, box_y, box_x, list_height + 2, list_width + 2,
-	         dlg.menubox_border.atr, dlg.menubox.atr);
+		 dlg.menubox_border.atr, dlg.menubox.atr);
 
 	/* Find length of longest item in order to center checklist */
 	check_x = 0;

diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index 447a582..d58de1d 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c

@@ -42,7 +42,7 @@
  * Display a dialog box for inputing a string
  */
 int dialog_inputbox(const char *title, const char *prompt, int height, int width,
-                    const char *init)
+		    const char *init)
 {
 	int i, x, y, box_y, box_x, box_width;
 	int input_x = 0, key = 0, button = -1;

diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c
index c93de0b..11ae9ad7 100644
--- a/scripts/kconfig/lxdialog/menubox.c
+++ b/scripts/kconfig/lxdialog/menubox.c

@@ -64,7 +64,7 @@
  * Print menu item
  */
 static void do_print_item(WINDOW * win, const char *item, int line_y,
-                          int selected, int hotkey)
+			  int selected, int hotkey)
 {
 	int j;
 	char *menu_item = malloc(menu_width + 1);
@@ -182,7 +182,7 @@
  * Display a menu for choosing among a number of options
  */
 int dialog_menu(const char *title, const char *prompt,
-                const void *selected, int *s_scroll)
+		const void *selected, int *s_scroll)
 {
 	int i, j, x, y, box_x, box_y;
 	int height, width, menu_height;

diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 58a8289..f7abdeb 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c

@@ -623,7 +623,7 @@
 void item_add_str(const char *fmt, ...)
 {
 	va_list ap;
-        size_t avail;
+	size_t avail;
 
 	avail = sizeof(item_cur->node.str) - strlen(item_cur->node.str);
 

diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 59184bb..14cea74 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c

@@ -299,7 +299,7 @@
 	int size;
 
 	size = snprintf(menu_backtitle, sizeof(menu_backtitle),
-	                "%s - %s", config_filename, rootmenu.prompt->text);
+			"%s - %s", config_filename, rootmenu.prompt->text);
 	if (size >= sizeof(menu_backtitle))
 		menu_backtitle[sizeof(menu_backtitle)-1] = '\0';
 	set_dialog_backtitle(menu_backtitle);
@@ -1034,4 +1034,3 @@
 
 	return res;
 }
-

diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 3ac2c9c..a26cc5d 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c

@@ -258,8 +258,8 @@
 				    "config symbol '%s' uses select, but is "
 				    "not boolean or tristate", sym->name);
 			else if (sym2->type != S_UNKNOWN &&
-			         sym2->type != S_BOOLEAN &&
-			         sym2->type != S_TRISTATE)
+				 sym2->type != S_BOOLEAN &&
+				 sym2->type != S_TRISTATE)
 				prop_warn(prop,
 				    "'%s' has wrong type. 'select' only "
 				    "accept arguments of boolean and "
@@ -268,7 +268,7 @@
 		case P_RANGE:
 			if (sym->type != S_INT && sym->type != S_HEX)
 				prop_warn(prop, "range is only allowed "
-				                "for int or hex symbols");
+						"for int or hex symbols");
 			if (!menu_validate_number(sym, prop->expr->left.sym) ||
 			    !menu_validate_number(sym, prop->expr->right.sym))
 				prop_warn(prop, "range is invalid");

diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index 4fbecd2..984489e 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c

@@ -1554,4 +1554,3 @@
 	endwin();
 	return 0;
 }
-

diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index 3133172..9cb8522 100644
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl

@@ -589,7 +589,7 @@
 
     # Now we need to see if we have to check selects;
     loop_select;
-}	    
+}
 
 my %setconfigs;
 

diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index 6e7fbf1..94f9c83 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c

@@ -155,5 +155,3 @@
 	fprintf(stderr, "Out of memory.\n");
 	exit(1);
 }
-
-

diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 1a9f53e..6c62d93 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l

@@ -27,8 +27,8 @@
 static int text_size, text_asize;
 
 struct buffer {
-        struct buffer *parent;
-        YY_BUFFER_STATE state;
+	struct buffer *parent;
+	YY_BUFFER_STATE state;
 };
 
 struct buffer *current_buf;

diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped
index a0521aa..349a7f2 100644
--- a/scripts/kconfig/zconf.lex.c_shipped
+++ b/scripts/kconfig/zconf.lex.c_shipped

@@ -789,8 +789,8 @@
 static int text_size, text_asize;
 
 struct buffer {
-        struct buffer *parent;
-        YY_BUFFER_STATE state;
+	struct buffer *parent;
+	YY_BUFFER_STATE state;
 };
 
 struct buffer *current_buf;

diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped
index 25ae16a..de5e84e 100644
--- a/scripts/kconfig/zconf.tab.c_shipped
+++ b/scripts/kconfig/zconf.tab.c_shipped

@@ -2314,7 +2314,7 @@
 	for_all_symbols(i, sym) {
 		if (sym_check_deps(sym))
 			zconfnerrs++;
-        }
+	}
 	if (zconfnerrs)
 		exit(1);
 	sym_set_change_count(1);

diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
index 0653886..0f683cf 100644
--- a/scripts/kconfig/zconf.y
+++ b/scripts/kconfig/zconf.y

@@ -510,7 +510,7 @@
 	for_all_symbols(i, sym) {
 		if (sym_check_deps(sym))
 			zconfnerrs++;
-        }
+	}
 	if (zconfnerrs)
 		exit(1);
 	sym_set_change_count(1);

diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
index 827896f..c21d163 100644
--- a/scripts/markup_oops.pl
+++ b/scripts/markup_oops.pl

@@ -367,4 +367,3 @@
 EOT
 	exit;
 }
-

diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index cfb8440..6fdc97e 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h

@@ -68,7 +68,7 @@
 
 ( echo /\* This file is auto generated, version $VERSION \*/
   if [ -n "$CONFIG_FLAGS" ] ; then echo "/* $CONFIG_FLAGS */"; fi
-  
+
   echo \#define UTS_MACHINE \"$ARCH\"
 
   echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\"
@@ -84,7 +84,7 @@
 # recompilations.
 # We don't consider the file changed if only the date/time changed.
 # A kernel config change will increase the generation number, thus
-# causing compile.h to be updated (including date/time) due to the 
+# causing compile.h to be updated (including date/time) due to the
 # changed comment in the
 # first line.
 

diff --git a/scripts/mkmakefile b/scripts/mkmakefile
index 0cc0442..84af27b 100644
--- a/scripts/mkmakefile
+++ b/scripts/mkmakefile

@@ -42,18 +42,11 @@
 
 MAKEFLAGS += --no-print-directory
 
-.PHONY: all \$(MAKECMDGOALS)
+.PHONY: __sub-make \$(MAKECMDGOALS)
 
-all	:= \$(filter-out all Makefile,\$(MAKECMDGOALS))
+__sub-make:
+	\$(Q)\$(MAKE) \$(MAKEARGS) \$(MAKECMDGOALS)
 
-all:
-	\$(Q)\$(MAKE) \$(MAKEARGS) \$(all)
-
-Makefile:;
-
-\$(all): all
-	@:
-
-%/: all
+\$(filter-out __sub-make, \$(MAKECMDGOALS)): __sub-make
 	@:
 EOF

diff --git a/scripts/mksysmap b/scripts/mksysmap
index c1b6191..7ada35a 100644
--- a/scripts/mksysmap
+++ b/scripts/mksysmap

@@ -42,4 +42,3 @@
 # (At least sparc64 has __crc_ in the middle).
 
 $NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)' > $2
-

diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore
index 33bae0d..3bd11b6 100644
--- a/scripts/mod/.gitignore
+++ b/scripts/mod/.gitignore

@@ -2,4 +2,3 @@
 mk_elfconfig
 modpost
 devicetable-offsets.h
-

diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 1924990..e614ef6 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c

@@ -644,28 +644,26 @@
 
 static int do_of_entry (const char *filename, void *symval, char *alias)
 {
-    int len;
-    char *tmp;
-    DEF_FIELD_ADDR(symval, of_device_id, name);
-    DEF_FIELD_ADDR(symval, of_device_id, type);
-    DEF_FIELD_ADDR(symval, of_device_id, compatible);
+	int len;
+	char *tmp;
+	DEF_FIELD_ADDR(symval, of_device_id, name);
+	DEF_FIELD_ADDR(symval, of_device_id, type);
+	DEF_FIELD_ADDR(symval, of_device_id, compatible);
 
-    len = sprintf (alias, "of:N%sT%s",
-                    (*name)[0] ? *name : "*",
-                    (*type)[0] ? *type : "*");
+	len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*",
+		      (*type)[0] ? *type : "*");
 
-    if (compatible[0])
-        sprintf (&alias[len], "%sC%s",
-                     (*type)[0] ? "*" : "",
-                     *compatible);
+	if (compatible[0])
+		sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "",
+			*compatible);
 
-    /* Replace all whitespace with underscores */
-    for (tmp = alias; tmp && *tmp; tmp++)
-        if (isspace (*tmp))
-            *tmp = '_';
+	/* Replace all whitespace with underscores */
+	for (tmp = alias; tmp && *tmp; tmp++)
+		if (isspace (*tmp))
+			*tmp = '_';
 
-    add_wildcard(alias);
-    return 1;
+	add_wildcard(alias);
+	return 1;
 }
 ADD_TO_DEVTABLE("of", of_device_id, do_of_entry);
 

diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 639bca7..a4fd71d 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c

@@ -54,4 +54,3 @@
 
 	return 0;
 }
-

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index ea7f953..9d9c5b9 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -862,7 +862,7 @@
  * without "ax" / "aw".
  */
 static void check_section(const char *modname, struct elf_info *elf,
-                          Elf_Shdr *sechdr)
+			  Elf_Shdr *sechdr)
 {
 	const char *sec = sech_name(elf, sechdr);
 
@@ -1296,12 +1296,12 @@
  */
 static void report_sec_mismatch(const char *modname,
 				const struct sectioncheck *mismatch,
-                                const char *fromsec,
-                                unsigned long long fromaddr,
-                                const char *fromsym,
-                                int from_is_func,
-                                const char *tosec, const char *tosym,
-                                int to_is_func)
+				const char *fromsec,
+				unsigned long long fromaddr,
+				const char *fromsym,
+				int from_is_func,
+				const char *tosec, const char *tosym,
+				int to_is_func)
 {
 	const char *from, *from_p;
 	const char *to, *to_p;
@@ -1441,7 +1441,7 @@
 }
 
 static void check_section_mismatch(const char *modname, struct elf_info *elf,
-                                   Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+				   Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
 {
 	const char *tosec;
 	const struct sectioncheck *mismatch;
@@ -1528,7 +1528,7 @@
 	case R_ARM_ABS32:
 		/* From ARM ABI: (S + A) | T */
 		r->r_addend = (int)(long)
-		              (elf->symtab_start + ELF_R_SYM(r->r_info));
+			      (elf->symtab_start + ELF_R_SYM(r->r_info));
 		break;
 	case R_ARM_PC24:
 	case R_ARM_CALL:
@@ -1538,8 +1538,8 @@
 	case R_ARM_THM_JUMP19:
 		/* From ARM ABI: ((S + A) | T) - P */
 		r->r_addend = (int)(long)(elf->hdr +
-		              sechdr->sh_offset +
-		              (r->r_offset - sechdr->sh_addr));
+			      sechdr->sh_offset +
+			      (r->r_offset - sechdr->sh_addr));
 		break;
 	default:
 		return 1;
@@ -1571,7 +1571,7 @@
 }
 
 static void section_rela(const char *modname, struct elf_info *elf,
-                         Elf_Shdr *sechdr)
+			 Elf_Shdr *sechdr)
 {
 	Elf_Sym  *sym;
 	Elf_Rela *rela;
@@ -1615,7 +1615,7 @@
 }
 
 static void section_rel(const char *modname, struct elf_info *elf,
-                        Elf_Shdr *sechdr)
+			Elf_Shdr *sechdr)
 {
 	Elf_Sym *sym;
 	Elf_Rel *rel;
@@ -1685,7 +1685,7 @@
  * be discarded and warns about it.
  **/
 static void check_sec_ref(struct module *mod, const char *modname,
-                          struct elf_info *elf)
+			  struct elf_info *elf)
 {
 	int i;
 	Elf_Shdr *sechdrs = elf->sechdrs;
@@ -1945,7 +1945,7 @@
 					     s->name, mod->name);
 				} else {
 					merror("\"%s\" [%s.ko] undefined!\n",
-					          s->name, mod->name);
+					       s->name, mod->name);
 					err = 1;
 				}
 			}
@@ -2113,8 +2113,10 @@
 		s->preloaded = 1;
 		sym_update_crc(symname, mod, crc, export_no(export));
 	}
+	release_file(file, size);
 	return;
 fail:
+	release_file(file, size);
 	fatal("parse error in symbol dump file\n");
 }
 

diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index deb2994..944418d 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c

@@ -214,7 +214,7 @@
 	mctx->block[14] = mctx->byte_count << 3;
 	mctx->block[15] = mctx->byte_count >> 29;
 	le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
-	                  sizeof(uint64_t)) / sizeof(uint32_t));
+			  sizeof(uint64_t)) / sizeof(uint32_t));
 	md4_transform(mctx->hash, mctx->block);
 	cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t));
 
@@ -367,7 +367,7 @@
 			break;
 		/* Terminate line at first space, to get rid of final ' \' */
 		while (*p) {
-                       if (isspace(*p)) {
+			if (isspace(*p)) {
 				*p = '\0';
 				break;
 			}

diff --git a/scripts/objdiff b/scripts/objdiff
index b3e4f10..62e51da 100755
--- a/scripts/objdiff
+++ b/scripts/objdiff

@@ -25,25 +25,47 @@
 #
 # Note: 'make mrproper' will also remove .tmp_objdiff
 
-GIT_DIR="`git rev-parse --git-dir`"
+SRCTREE=$(cd $(git rev-parse --show-toplevel 2>/dev/null); pwd)
 
-if [ -d "$GIT_DIR" ]; then
-	TMPD="${GIT_DIR%git}tmp_objdiff"
-
-	[ -d "$TMPD" ] || mkdir "$TMPD"
-else
-	echo "ERROR: git directory not found."
+if [ -z "$SRCTREE" ]; then
+	echo >&2 "ERROR: Not a git repository."
 	exit 1
 fi
 
+TMPD=$SRCTREE/.tmp_objdiff
+
 usage() {
-	echo "Usage: $0 <command> <args>"
-	echo "  record    <list of object files>"
-	echo "  diff      <commitA> <commitB>"
-	echo "  clean     all | <commit>"
+	echo >&2 "Usage: $0 <command> <args>"
+	echo >&2 "  record    <list of object files or directories>"
+	echo >&2 "  diff      <commitA> <commitB>"
+	echo >&2 "  clean     all | <commit>"
 	exit 1
 }
 
+get_output_dir() {
+	dir=${1%/*}
+
+	if [ "$dir" = "$1" ]; then
+		dir=.
+	fi
+
+	dir=$(cd $dir; pwd)
+
+	echo $TMPD/$CMT${dir#$SRCTREE}
+}
+
+do_objdump() {
+	dir=$(get_output_dir $1)
+	base=${1##*/}
+	dis=$dir/${base%.o}.dis
+
+	[ ! -d "$dir" ] && mkdir -p $dir
+
+	# remove addresses for a cleaner diff
+	# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
+	$OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis
+}
+
 dorecord() {
 	[ $# -eq 0 ] && usage
 
@@ -52,20 +74,16 @@
 	CMT="`git rev-parse --short HEAD`"
 
 	OBJDUMP="${CROSS_COMPILE}objdump"
-	OBJDIFFD="$TMPD/$CMT"
 
-	[ ! -d "$OBJDIFFD" ] && mkdir -p "$OBJDIFFD"
-
-	for f in $FILES; do
-		dn="${f%/*}"
-		bn="${f##*/}"
-
-		[ ! -d "$OBJDIFFD/$dn" ] && mkdir -p "$OBJDIFFD/$dn"
-
-		# remove addresses for a more clear diff
-		# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
-		$OBJDUMP -D "$f" | sed "s/^[[:space:]]\+[0-9a-f]\+//" \
-			>"$OBJDIFFD/$dn/$bn"
+	for d in $FILES; do
+		if [ -d "$d" ]; then
+			for f in $(find $d -name '*.o')
+			do
+				do_objdump $f
+			done
+		else
+			do_objdump $d
+		fi
 	done
 }
 
@@ -90,12 +108,12 @@
 	DSTD="$TMPD/$DST"
 
 	if [ ! -d "$SRCD" ]; then
-		echo "ERROR: $SRCD doesn't exist"
+		echo >&2 "ERROR: $SRCD doesn't exist"
 		exit 1
 	fi
 
 	if [ ! -d "$DSTD" ]; then
-		echo "ERROR: $DSTD doesn't exist"
+		echo >&2 "ERROR: $DSTD doesn't exist"
 		exit 1
 	fi
 
@@ -114,7 +132,7 @@
 		if [ -d "$TMPD/$CMT" ]; then
 			rm -rf $TMPD/$CMT
 		else
-			echo "$CMT not found"
+			echo >&2 "$CMT not found"
 		fi
 	fi
 }
@@ -135,7 +153,7 @@
 		doclean $*
 		;;
 	*)
-		echo "Unrecognized command '$1'"
+		echo >&2 "Unrecognized command '$1'"
 		exit 1
 		;;
 esac

diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index c5d4733..99ca6e7 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile

@@ -143,4 +143,3 @@
 	@echo '  perf-targz-src-pkg  - Build $(perf-tar).tar.gz source tarball'
 	@echo '  perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball'
 	@echo '  perf-tarxz-src-pkg  - Build $(perf-tar).tar.xz source tarball'
-

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index f46e4dd..b5f08f7 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb

@@ -35,13 +35,15 @@
 	sparc*)
 		debarch=sparc ;;
 	s390*)
-		debarch=s390 ;;
+		debarch=s390$(grep -q CONFIG_64BIT=y $KCONFIG_CONFIG && echo x || true) ;;
 	ppc*)
 		debarch=powerpc ;;
 	parisc*)
 		debarch=hppa ;;
 	mips*)
 		debarch=mips$(grep -q CPU_LITTLE_ENDIAN=y $KCONFIG_CONFIG && echo el || true) ;;
+	arm64)
+		debarch=arm64 ;;
 	arm*)
 		debarch=arm$(grep -q CONFIG_AEABI=y $KCONFIG_CONFIG && echo el || true) ;;
 	*)
@@ -130,7 +132,7 @@
 	cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map"
 	cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config"
 	gzip "$tmpdir/usr/share/doc/$packagename/config"
-else 
+else
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
 fi
@@ -155,11 +157,11 @@
 			for module in $(find lib/modules/ -name *.ko); do
 				mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module)
 				# only keep debug symbols in the debug file
-				objcopy --only-keep-debug $module $dbg_dir/usr/lib/debug/$module
+				$OBJCOPY --only-keep-debug $module $dbg_dir/usr/lib/debug/$module
 				# strip original module from debug symbols
-				objcopy --strip-debug $module
+				$OBJCOPY --strip-debug $module
 				# then add a link to those
-				objcopy --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module
+				$OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module
 			done
 		)
 	fi

diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index aa22f94..995c1ea 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar

@@ -136,4 +136,3 @@
 echo "Tarball successfully created in ${tarball}${file_ext}"
 
 exit 0
-

diff --git a/scripts/patch-kernel b/scripts/patch-kernel
index d000ea3..49b4241 100755
--- a/scripts/patch-kernel
+++ b/scripts/patch-kernel

@@ -27,7 +27,7 @@
 #       Nick Holloway <Nick.Holloway@alfie.demon.co.uk>, 2nd January 1995.
 #
 # Added support for handling multiple types of compression. What includes
-# gzip, bzip, bzip2, zip, compress, and plaintext. 
+# gzip, bzip, bzip2, zip, compress, and plaintext.
 #
 #       Adam Sulmicki <adam@cfar.umd.edu>, 1st January 1997.
 #
@@ -159,7 +159,7 @@
   fi
   # Remove backup files
   find $sourcedir/ '(' -name '*.orig' -o -name '.*.orig' ')' -exec rm -f {} \;
- 
+
   return 0;
 }
 

diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c
index 68bb4ef..4718d78 100644
--- a/scripts/pnmtologo.c
+++ b/scripts/pnmtologo.c

@@ -512,4 +512,3 @@
     }
     exit(0);
 }
-

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index e11aa4a..650ecc8 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c

@@ -487,5 +487,3 @@
 	}
 	return !!n_error;
 }
-
-

diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh
index 43098af..6b5c83b 100644
--- a/scripts/rt-tester/check-all.sh
+++ b/scripts/rt-tester/check-all.sh

@@ -19,4 +19,3 @@
 testit t4-l2-pi-deboost.tst
 testit t5-l4-pi-boost-deboost.tst
 testit t5-l4-pi-boost-deboost-setsched.tst
-

diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 34186ca..6d916c2 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py

@@ -216,5 +216,3 @@
 # Normal exit pass
 print "Pass"
 sys.exit(0)
-
-

diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh
index 7b9ccf6..f6a0ce7 100644
--- a/scripts/selinux/install_policy.sh
+++ b/scripts/selinux/install_policy.sh

@@ -66,4 +66,3 @@
 	$SF file_contexts /dev
 	mount --move /mnt /dev
 fi
-

diff --git a/scripts/show_delta b/scripts/show_delta
index e25732b..5b36500 100755
--- a/scripts/show_delta
+++ b/scripts/show_delta

@@ -126,4 +126,3 @@
 		print (convert_line(line, base_time),)
 
 main()
-

diff --git a/scripts/tags.sh b/scripts/tags.sh
index f2c5b00..e6b011f 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -25,6 +25,9 @@
 	tree=${srctree}/
 fi
 
+# ignore userspace tools
+ignore="$ignore ( -path ${tree}tools ) -prune -o"
+
 # Find all available archs
 find_all_archs()
 {
@@ -47,7 +50,8 @@
 	for i in $archincludedir; do
 		prune="$prune -wholename $i -prune -o"
 	done
-	find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" -print;
+	find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" \
+		-not -type l -print;
 }
 
 # find sources in arch/$1/include
@@ -57,14 +61,15 @@
 					-name include -type d -print);
 	if [ -n "$include" ]; then
 		archincludedir="$archincludedir $include"
-		find $include $ignore -name "$2" -print;
+		find $include $ignore -name "$2" -not -type l -print;
 	fi
 }
 
 # find sources in include/
 find_include_sources()
 {
-	find ${tree}include $ignore -name config -prune -o -name "$1" -print;
+	find ${tree}include $ignore -name config -prune -o -name "$1" \
+		-not -type l -print;
 }
 
 # find sources in rest of tree
@@ -73,7 +78,7 @@
 {
 	find ${tree}* $ignore \
 	     \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \
-	       -name "$1" -print;
+	       -name "$1" -not -type l -print;
 }
 
 find_sources()
@@ -187,6 +192,10 @@
 	--regex-c++='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \
 	--regex-c++='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \
 	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'		\
+	--regex-c++='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/'		\
+	--regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'		\
+	--regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
+	--regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
 	--regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
 	--regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
 	--regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/'	\
@@ -201,7 +210,8 @@
 	--regex-c='/DECLARE_(TASKLET|WORK|DELAYED_WORK)\((\w*)/\2/v/'	\
 	--regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/'		\
 	--regex-c='/(^\s)OFFSET\((\w*)/\2/v/'				\
-	--regex-c='/(^\s)DEFINE\((\w*)/\2/v/'
+	--regex-c='/(^\s)DEFINE\((\w*)/\2/v/'				\
+	--regex-c='/DEFINE_HASHTABLE\((\w*)/\1/v/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--langdef=kconfig --language-force=kconfig              \
@@ -244,9 +254,14 @@
 	--regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \
 	--regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \
 	--regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \
+	--regex='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/'	\
+	--regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'	\
+	--regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
+	--regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
 	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'		\
 	--regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
-	--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'
+	--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\
+	--regex='/DEFINE_HASHTABLE\((\w*)/\1/v/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/'
@@ -266,7 +281,7 @@
 		emacs $1
 	else
 		all_target_sources | xargs $1 -a
-        fi
+	fi
 }
 
 # Support um (which uses SUBARCH)

diff --git a/security/capability.c b/security/capability.c
index ad0d4de..e76373d 100644
--- a/security/capability.c
+++ b/security/capability.c

@@ -879,7 +879,7 @@
 }
 
 static int cap_key_permission(key_ref_t key_ref, const struct cred *cred,
-			      key_perm_t perm)
+			      unsigned perm)
 {
 	return 0;
 }

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9134dbf..d9d69e6 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c

@@ -182,7 +182,7 @@
 static int devcgroup_online(struct cgroup_subsys_state *css)
 {
 	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
-	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css));
+	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
 	int ret = 0;
 
 	mutex_lock(&devcgroup_mutex);
@@ -455,7 +455,7 @@
 static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return 1;
@@ -476,7 +476,7 @@
 static bool parent_allows_removal(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return true;
@@ -587,13 +587,6 @@
 	return rc;
 }
 
-static inline bool has_children(struct dev_cgroup *devcgroup)
-{
-	struct cgroup *cgrp = devcgroup->css.cgroup;
-
-	return !list_empty(&cgrp->children);
-}
-
 /*
  * Modify the exception list using allow/deny rules.
  * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
@@ -614,7 +607,7 @@
 	char temp[12];		/* 11 + 1 characters needed for a u32 */
 	int count, rc = 0;
 	struct dev_exception_item ex;
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css));
+	struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -626,7 +619,7 @@
 	case 'a':
 		switch (filetype) {
 		case DEVCG_ALLOW:
-			if (has_children(devcgroup))
+			if (css_has_online_children(&devcgroup->css))
 				return -EINVAL;
 
 			if (!may_allow_all(parent))
@@ -642,7 +635,7 @@
 				return rc;
 			break;
 		case DEVCG_DENY:
-			if (has_children(devcgroup))
+			if (css_has_online_children(&devcgroup->css))
 				return -EINVAL;
 
 			dev_exception_clean(devcgroup);
@@ -767,27 +760,27 @@
 	return rc;
 }
 
-static int devcgroup_access_write(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buffer)
+static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
 	int retval;
 
 	mutex_lock(&devcgroup_mutex);
-	retval = devcgroup_update_access(css_to_devcgroup(css),
-					 cft->private, buffer);
+	retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
+					 of_cft(of)->private, strstrip(buf));
 	mutex_unlock(&devcgroup_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 static struct cftype dev_cgroup_files[] = {
 	{
 		.name = "allow",
-		.write_string  = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_ALLOW,
 	},
 	{
 		.name = "deny",
-		.write_string = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_DENY,
 	},
 	{

diff --git a/security/integrity/evm/Kconfig b/security/integrity/evm/Kconfig
index d35b491..d606f3d 100644
--- a/security/integrity/evm/Kconfig
+++ b/security/integrity/evm/Kconfig

@@ -12,15 +12,41 @@
 
 	  If you are unsure how to answer this question, answer N.
 
-config EVM_HMAC_VERSION
-	int "EVM HMAC version"
-	depends on EVM
-	default 2
-	help
-	  This options adds EVM HMAC version support.
-	  1 - original version
-	  2 - add per filesystem unique identifier (UUID) (default)
+if EVM
 
-	  WARNING: changing the HMAC calculation method or adding 
+menu "EVM options"
+
+config EVM_ATTR_FSUUID
+	bool "FSUUID (version 2)"
+	default y
+	depends on EVM
+	help
+	  Include filesystem UUID for HMAC calculation.
+
+	  Default value is 'selected', which is former version 2.
+	  if 'not selected', it is former version 1
+
+	  WARNING: changing the HMAC calculation method or adding
 	  additional info to the calculation, requires existing EVM
-	  labeled file systems to be relabeled.  
+	  labeled file systems to be relabeled.
+
+config EVM_EXTRA_SMACK_XATTRS
+	bool "Additional SMACK xattrs"
+	depends on EVM && SECURITY_SMACK
+	default n
+	help
+	  Include additional SMACK xattrs for HMAC calculation.
+
+	  In addition to the original security xattrs (eg. security.selinux,
+	  security.SMACK64, security.capability, and security.ima) included
+	  in the HMAC calculation, enabling this option includes newly defined
+	  Smack xattrs: security.SMACK64EXEC, security.SMACK64TRANSMUTE and
+	  security.SMACK64MMAP.
+
+	  WARNING: changing the HMAC calculation method or adding
+	  additional info to the calculation, requires existing EVM
+	  labeled file systems to be relabeled.
+
+endmenu
+
+endif

diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h
index 37c88dd..88bfe77 100644
--- a/security/integrity/evm/evm.h
+++ b/security/integrity/evm/evm.h

@@ -24,7 +24,10 @@
 extern int evm_initialized;
 extern char *evm_hmac;
 extern char *evm_hash;
-extern int evm_hmac_version;
+
+#define EVM_ATTR_FSUUID		0x0001
+
+extern int evm_hmac_attrs;
 
 extern struct crypto_shash *hmac_tfm;
 extern struct crypto_shash *hash_tfm;

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 6b540f1..5e9687f 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c

@@ -112,7 +112,7 @@
 	hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
-	if (evm_hmac_version > 1)
+	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
 		crypto_shash_update(desc, inode->i_sb->s_uuid,
 				    sizeof(inode->i_sb->s_uuid));
 	crypto_shash_final(desc, digest);

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 6e0bd93..3bcb80d 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c

@@ -32,7 +32,7 @@
 };
 char *evm_hmac = "hmac(sha1)";
 char *evm_hash = "sha1";
-int evm_hmac_version = CONFIG_EVM_HMAC_VERSION;
+int evm_hmac_attrs;
 
 char *evm_config_xattrnames[] = {
 #ifdef CONFIG_SECURITY_SELINUX
@@ -40,6 +40,11 @@
 #endif
 #ifdef CONFIG_SECURITY_SMACK
 	XATTR_NAME_SMACK,
+#ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS
+	XATTR_NAME_SMACKEXEC,
+	XATTR_NAME_SMACKTRANSMUTE,
+	XATTR_NAME_SMACKMMAP,
+#endif
 #endif
 #ifdef CONFIG_IMA_APPRAISE
 	XATTR_NAME_IMA,
@@ -57,6 +62,14 @@
 }
 __setup("evm=", evm_set_fixmode);
 
+static void __init evm_init_config(void)
+{
+#ifdef CONFIG_EVM_ATTR_FSUUID
+	evm_hmac_attrs |= EVM_ATTR_FSUUID;
+#endif
+	pr_info("HMAC attrs: 0x%x\n", evm_hmac_attrs);
+}
+
 static int evm_find_protected_xattrs(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
@@ -287,12 +300,20 @@
  * @xattr_value: pointer to the new extended attribute value
  * @xattr_value_len: pointer to the new extended attribute value length
  *
- * Updating 'security.evm' requires CAP_SYS_ADMIN privileges and that
- * the current value is valid.
+ * Before allowing the 'security.evm' protected xattr to be updated,
+ * verify the existing value is valid.  As only the kernel should have
+ * access to the EVM encrypted key needed to calculate the HMAC, prevent
+ * userspace from writing HMAC value.  Writing 'security.evm' requires
+ * requires CAP_SYS_ADMIN privileges.
  */
 int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
+	const struct evm_ima_xattr_data *xattr_data = xattr_value;
+
+	if ((strcmp(xattr_name, XATTR_NAME_EVM) == 0)
+	    && (xattr_data->type == EVM_XATTR_HMAC))
+		return -EPERM;
 	return evm_protect_xattr(dentry, xattr_name, xattr_value,
 				 xattr_value_len);
 }
@@ -432,6 +453,8 @@
 {
 	int error;
 
+	evm_init_config();
+
 	error = evm_init_secfs();
 	if (error < 0) {
 		pr_info("Error registering secfs\n");

diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index ba9e4d7..d9cd5ce 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c

@@ -199,6 +199,7 @@
 			    struct evm_ima_xattr_data **xattr_value,
 			    int *xattr_len)
 {
+	const char *audit_cause = "failed";
 	struct inode *inode = file_inode(file);
 	const char *filename = file->f_dentry->d_name.name;
 	int result = 0;
@@ -213,6 +214,12 @@
 	if (!(iint->flags & IMA_COLLECTED)) {
 		u64 i_version = file_inode(file)->i_version;
 
+		if (file->f_flags & O_DIRECT) {
+			audit_cause = "failed(directio)";
+			result = -EACCES;
+			goto out;
+		}
+
 		/* use default hash algorithm */
 		hash.hdr.algo = ima_hash_algo;
 
@@ -233,9 +240,10 @@
 				result = -ENOMEM;
 		}
 	}
+out:
 	if (result)
 		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode,
-				    filename, "collect_data", "failed",
+				    filename, "collect_data", audit_cause,
 				    result, 0);
 	return result;
 }

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 291bf0f..d3113d4 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c

@@ -341,7 +341,7 @@
 	return 0;
 }
 
-static void ima_reset_appraise_flags(struct inode *inode)
+static void ima_reset_appraise_flags(struct inode *inode, int digsig)
 {
 	struct integrity_iint_cache *iint;
 
@@ -353,18 +353,22 @@
 		return;
 
 	iint->flags &= ~IMA_DONE_MASK;
+	if (digsig)
+		iint->flags |= IMA_DIGSIG;
 	return;
 }
 
 int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
+	const struct evm_ima_xattr_data *xvalue = xattr_value;
 	int result;
 
 	result = ima_protect_xattr(dentry, xattr_name, xattr_value,
 				   xattr_value_len);
 	if (result == 1) {
-		ima_reset_appraise_flags(dentry->d_inode);
+		ima_reset_appraise_flags(dentry->d_inode,
+			 (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
 		result = 0;
 	}
 	return result;
@@ -376,7 +380,7 @@
 
 	result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
 	if (result == 1) {
-		ima_reset_appraise_flags(dentry->d_inode);
+		ima_reset_appraise_flags(dentry->d_inode, 0);
 		result = 0;
 	}
 	return result;

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 1bde8e6..ccd0ac8 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c

@@ -27,6 +27,36 @@
 
 static struct crypto_shash *ima_shash_tfm;
 
+/**
+ * ima_kernel_read - read file content
+ *
+ * This is a function for reading file content instead of kernel_read().
+ * It does not perform locking checks to ensure it cannot be blocked.
+ * It does not perform security checks because it is irrelevant for IMA.
+ *
+ */
+static int ima_kernel_read(struct file *file, loff_t offset,
+			   char *addr, unsigned long count)
+{
+	mm_segment_t old_fs;
+	char __user *buf = addr;
+	ssize_t ret;
+
+	if (!(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!file->f_op->read && !file->f_op->aio_read)
+		return -EINVAL;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	if (file->f_op->read)
+		ret = file->f_op->read(file, buf, count, &offset);
+	else
+		ret = do_sync_read(file, buf, count, &offset);
+	set_fs(old_fs);
+	return ret;
+}
+
 int ima_init_crypto(void)
 {
 	long rc;
@@ -104,7 +134,7 @@
 	while (offset < i_size) {
 		int rbuf_len;
 
-		rbuf_len = kernel_read(file, offset, rbuf, PAGE_SIZE);
+		rbuf_len = ima_kernel_read(file, offset, rbuf, PAGE_SIZE);
 		if (rbuf_len < 0) {
 			rc = rbuf_len;
 			break;

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 52ac6cf..09baa33 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c

@@ -81,7 +81,6 @@
 {
 	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
-	int must_measure;
 	bool send_tomtou = false, send_writers = false;
 	char *pathbuf = NULL;
 	const char *pathname;
@@ -92,18 +91,19 @@
 	mutex_lock(&inode->i_mutex);	/* file metadata: permissions, xattr */
 
 	if (mode & FMODE_WRITE) {
-		if (atomic_read(&inode->i_readcount) && IS_IMA(inode))
-			send_tomtou = true;
-		goto out;
+		if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
+			struct integrity_iint_cache *iint;
+			iint = integrity_iint_find(inode);
+			/* IMA_MEASURE is set from reader side */
+			if (iint && (iint->flags & IMA_MEASURE))
+				send_tomtou = true;
+		}
+	} else {
+		if ((atomic_read(&inode->i_writecount) > 0) &&
+		    ima_must_measure(inode, MAY_READ, FILE_CHECK))
+			send_writers = true;
 	}
 
-	must_measure = ima_must_measure(inode, MAY_READ, FILE_CHECK);
-	if (!must_measure)
-		goto out;
-
-	if (atomic_read(&inode->i_writecount) > 0)
-		send_writers = true;
-out:
 	mutex_unlock(&inode->i_mutex);
 
 	if (!send_tomtou && !send_writers)
@@ -214,8 +214,11 @@
 		xattr_ptr = &xattr_value;
 
 	rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len);
-	if (rc != 0)
+	if (rc != 0) {
+		if (file->f_flags & O_DIRECT)
+			rc = (iint->flags & IMA_PERMIT_DIRECTIO) ? 0 : -EACCES;
 		goto out_digsig;
+	}
 
 	pathname = filename ?: ima_d_path(&file->f_path, &pathbuf);
 

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 93873a4..40a7488 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c

@@ -353,7 +353,7 @@
 	Opt_obj_user, Opt_obj_role, Opt_obj_type,
 	Opt_subj_user, Opt_subj_role, Opt_subj_type,
 	Opt_func, Opt_mask, Opt_fsmagic, Opt_uid, Opt_fowner,
-	Opt_appraise_type, Opt_fsuuid
+	Opt_appraise_type, Opt_fsuuid, Opt_permit_directio
 };
 
 static match_table_t policy_tokens = {
@@ -375,6 +375,7 @@
 	{Opt_uid, "uid=%s"},
 	{Opt_fowner, "fowner=%s"},
 	{Opt_appraise_type, "appraise_type=%s"},
+	{Opt_permit_directio, "permit_directio"},
 	{Opt_err, NULL}
 };
 
@@ -622,6 +623,9 @@
 			else
 				result = -EINVAL;
 			break;
+		case Opt_permit_directio:
+			entry->flags |= IMA_PERMIT_DIRECTIO;
+			break;
 		case Opt_err:
 			ima_log_string(ab, "UNKNOWN", p);
 			result = -EINVAL;

diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 2fb5e53..33c0a70 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h

@@ -30,6 +30,7 @@
 #define IMA_ACTION_FLAGS	0xff000000
 #define IMA_DIGSIG		0x01000000
 #define IMA_DIGSIG_REQUIRED	0x02000000
+#define IMA_PERMIT_DIRECTIO	0x04000000
 
 #define IMA_DO_MASK		(IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
 				 IMA_APPRAISE_SUBMASK)

diff --git a/security/keys/internal.h b/security/keys/internal.h
index 80b2aac..5f20da0 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h

@@ -176,20 +176,11 @@
 /*
  * Check to see whether permission is granted to use a key in the desired way.
  */
-static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
+static inline int key_permission(const key_ref_t key_ref, unsigned perm)
 {
 	return key_task_permission(key_ref, current_cred(), perm);
 }
 
-/* required permissions */
-#define	KEY_VIEW	0x01	/* require permission to view attributes */
-#define	KEY_READ	0x02	/* require permission to read content */
-#define	KEY_WRITE	0x04	/* require permission to update / modify */
-#define	KEY_SEARCH	0x08	/* require permission to search (keyring) or find (key) */
-#define	KEY_LINK	0x10	/* require permission to link */
-#define	KEY_SETATTR	0x20	/* require permission to change attributes */
-#define	KEY_ALL		0x3f	/* all the above permissions */
-
 /*
  * Authorisation record for request_key().
  */

diff --git a/security/keys/key.c b/security/keys/key.c
index 6e21c11..2048a11 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c

@@ -714,7 +714,7 @@
 	int ret;
 
 	/* need write permission on the key to update it */
-	ret = key_permission(key_ref, KEY_WRITE);
+	ret = key_permission(key_ref, KEY_NEED_WRITE);
 	if (ret < 0)
 		goto error;
 
@@ -838,7 +838,7 @@
 
 	/* if we're going to allocate a new key, we're going to have
 	 * to modify the keyring */
-	ret = key_permission(keyring_ref, KEY_WRITE);
+	ret = key_permission(keyring_ref, KEY_NEED_WRITE);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
 		goto error_link_end;
@@ -928,7 +928,7 @@
 	key_check(key);
 
 	/* the key must be writable */
-	ret = key_permission(key_ref, KEY_WRITE);
+	ret = key_permission(key_ref, KEY_NEED_WRITE);
 	if (ret < 0)
 		goto error;
 

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index cee72ce..cd5bd0c 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c

@@ -111,7 +111,7 @@
 	}
 
 	/* find the target keyring (which must be writable) */
-	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error3;
@@ -195,7 +195,7 @@
 	dest_ref = NULL;
 	if (destringid) {
 		dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
-					   KEY_WRITE);
+					   KEY_NEED_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -253,7 +253,7 @@
 	long ret;
 
 	lflags = create ? KEY_LOOKUP_CREATE : 0;
-	key_ref = lookup_user_key(id, lflags, KEY_SEARCH);
+	key_ref = lookup_user_key(id, lflags, KEY_NEED_SEARCH);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -334,7 +334,7 @@
 	}
 
 	/* find the target key (which must be writable) */
-	key_ref = lookup_user_key(id, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -365,12 +365,12 @@
 	key_ref_t key_ref;
 	long ret;
 
-	key_ref = lookup_user_key(id, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		if (ret != -EACCES)
 			goto error;
-		key_ref = lookup_user_key(id, 0, KEY_SETATTR);
+		key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR);
 		if (IS_ERR(key_ref)) {
 			ret = PTR_ERR(key_ref);
 			goto error;
@@ -401,7 +401,7 @@
 
 	kenter("%d", id);
 
-	key_ref = lookup_user_key(id, 0, KEY_SEARCH);
+	key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -428,7 +428,7 @@
 	key_ref_t keyring_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 
@@ -470,13 +470,13 @@
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_LINK);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -505,7 +505,7 @@
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
@@ -548,7 +548,7 @@
 	char *tmpbuf;
 	long ret;
 
-	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW);
 	if (IS_ERR(key_ref)) {
 		/* viewing a key under construction is permitted if we have the
 		 * authorisation token handy */
@@ -639,7 +639,7 @@
 	}
 
 	/* get the keyring at which to begin the search */
-	keyring_ref = lookup_user_key(ringid, 0, KEY_SEARCH);
+	keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_SEARCH);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error2;
@@ -649,7 +649,7 @@
 	dest_ref = NULL;
 	if (destringid) {
 		dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
-					   KEY_WRITE);
+					   KEY_NEED_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -676,7 +676,7 @@
 
 	/* link the resulting key to the destination keyring if we can */
 	if (dest_ref) {
-		ret = key_permission(key_ref, KEY_LINK);
+		ret = key_permission(key_ref, KEY_NEED_LINK);
 		if (ret < 0)
 			goto error6;
 
@@ -727,7 +727,7 @@
 	key = key_ref_to_ptr(key_ref);
 
 	/* see if we can read it directly */
-	ret = key_permission(key_ref, KEY_READ);
+	ret = key_permission(key_ref, KEY_NEED_READ);
 	if (ret == 0)
 		goto can_read_key;
 	if (ret != -EACCES)
@@ -799,7 +799,7 @@
 		goto error;
 
 	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
-				  KEY_SETATTR);
+				  KEY_NEED_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -905,7 +905,7 @@
 		goto error;
 
 	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
-				  KEY_SETATTR);
+				  KEY_NEED_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -947,7 +947,7 @@
 
 	/* if a specific keyring is nominated by ID, then use that */
 	if (ringid > 0) {
-		dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
+		dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
 		if (IS_ERR(dkref))
 			return PTR_ERR(dkref);
 		*_dest_keyring = key_ref_to_ptr(dkref);
@@ -1315,7 +1315,7 @@
 	long ret;
 
 	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
-				  KEY_SETATTR);
+				  KEY_NEED_SETATTR);
 	if (IS_ERR(key_ref)) {
 		/* setting the timeout on a key under construction is permitted
 		 * if we have the authorisation token handy */
@@ -1418,7 +1418,7 @@
 	char *context;
 	long ret;
 
-	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW);
 	if (IS_ERR(key_ref)) {
 		if (PTR_ERR(key_ref) != -EACCES)
 			return PTR_ERR(key_ref);
@@ -1482,7 +1482,7 @@
 	struct cred *cred;
 	int ret;
 
-	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
+	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK);
 	if (IS_ERR(keyring_r))
 		return PTR_ERR(keyring_r);
 

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 2fb2576..9cf2575 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c

@@ -541,7 +541,7 @@
 	/* key must have search permissions */
 	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
 	    key_task_permission(make_key_ref(key, ctx->possessed),
-				ctx->cred, KEY_SEARCH) < 0) {
+				ctx->cred, KEY_NEED_SEARCH) < 0) {
 		ctx->result = ERR_PTR(-EACCES);
 		kleave(" = %d [!perm]", ctx->skipped_ret);
 		goto skipped;
@@ -721,7 +721,7 @@
 		/* Search a nested keyring */
 		if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
 		    key_task_permission(make_key_ref(key, ctx->possessed),
-					ctx->cred, KEY_SEARCH) < 0)
+					ctx->cred, KEY_NEED_SEARCH) < 0)
 			continue;
 
 		/* stack the current position */
@@ -843,7 +843,7 @@
 		return ERR_PTR(-ENOTDIR);
 
 	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
-		err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+		err = key_task_permission(keyring_ref, ctx->cred, KEY_NEED_SEARCH);
 		if (err < 0)
 			return ERR_PTR(err);
 	}
@@ -973,7 +973,7 @@
 
 			if (!skip_perm_check &&
 			    key_permission(make_key_ref(keyring, 0),
-					   KEY_SEARCH) < 0)
+					   KEY_NEED_SEARCH) < 0)
 				continue;
 
 			/* we've got a match but we might end up racing with

diff --git a/security/keys/permission.c b/security/keys/permission.c
index efcc0c8..732cc0b 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c

@@ -28,7 +28,7 @@
  * permissions bits or the LSM check.
  */
 int key_task_permission(const key_ref_t key_ref, const struct cred *cred,
-			key_perm_t perm)
+			unsigned perm)
 {
 	struct key *key;
 	key_perm_t kperm;
@@ -68,7 +68,7 @@
 	if (is_key_possessed(key_ref))
 		kperm |= key->perm >> 24;
 
-	kperm = kperm & perm & KEY_ALL;
+	kperm = kperm & perm & KEY_NEED_ALL;
 
 	if (kperm != perm)
 		return -EACCES;

diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index 0ad3ee2..c9fae5e 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c

@@ -108,7 +108,7 @@
 	return PTR_ERR(persistent_ref);
 
 found:
-	ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+	ret = key_task_permission(persistent_ref, current_cred(), KEY_NEED_LINK);
 	if (ret == 0) {
 		persistent = key_ref_to_ptr(persistent_ref);
 		ret = key_link(key_ref_to_ptr(dest_ref), persistent);
@@ -151,7 +151,7 @@
 	}
 
 	/* There must be a destination keyring */
-	dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+	dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
 	if (IS_ERR(dest_ref))
 		return PTR_ERR(dest_ref);
 	if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {

diff --git a/security/keys/proc.c b/security/keys/proc.c
index 88e9a46..d3f6f2f 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c

@@ -218,7 +218,7 @@
 	 * - the caller holds a spinlock, and thus the RCU read lock, making our
 	 *   access to __current_cred() safe
 	 */
-	rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
+	rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
 	if (rc < 0)
 		return 0;
 

diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index 8c0af08..b68faa1 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c

@@ -15,7 +15,7 @@
 
 static const int zero, one = 1, max = INT_MAX;
 
-ctl_table key_sysctls[] = {
+struct ctl_table key_sysctls[] = {
 	{
 		.procname = "maxkeys",
 		.data = &key_quota_maxkeys,

diff --git a/security/security.c b/security/security.c
index 8b774f3..31614e9 100644
--- a/security/security.c
+++ b/security/security.c

@@ -1425,7 +1425,7 @@
 }
 
 int security_key_permission(key_ref_t key_ref,
-			    const struct cred *cred, key_perm_t perm)
+			    const struct cred *cred, unsigned perm)
 {
 	return security_ops->key_permission(key_ref, cred, perm);
 }

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index fc3e662..a18f1fa 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c

@@ -444,11 +444,15 @@
 	avc_dump_query(ab, ad->selinux_audit_data->ssid,
 			   ad->selinux_audit_data->tsid,
 			   ad->selinux_audit_data->tclass);
+	if (ad->selinux_audit_data->denied) {
+		audit_log_format(ab, " permissive=%u",
+				 ad->selinux_audit_data->result ? 0 : 1);
+	}
 }
 
 /* This is the slow part of avc audit with big stack footprint */
 noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
-		u32 requested, u32 audited, u32 denied,
+		u32 requested, u32 audited, u32 denied, int result,
 		struct common_audit_data *a,
 		unsigned flags)
 {
@@ -477,6 +481,7 @@
 	sad.tsid = tsid;
 	sad.audited = audited;
 	sad.denied = denied;
+	sad.result = result;
 
 	a->selinux_audit_data = &sad;
 

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2c7341d..83d06db 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -2123,11 +2123,13 @@
 		new_tsec->exec_sid = 0;
 
 		/*
-		 * Minimize confusion: if no_new_privs and a transition is
-		 * explicitly requested, then fail the exec.
+		 * Minimize confusion: if no_new_privs or nosuid and a
+		 * transition is explicitly requested, then fail the exec.
 		 */
 		if (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)
 			return -EPERM;
+		if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+			return -EACCES;
 	} else {
 		/* Check for a default transition on this program. */
 		rc = security_transition_sid(old_tsec->sid, isec->sid,
@@ -2770,6 +2772,7 @@
 
 static noinline int audit_inode_permission(struct inode *inode,
 					   u32 perms, u32 audited, u32 denied,
+					   int result,
 					   unsigned flags)
 {
 	struct common_audit_data ad;
@@ -2780,7 +2783,7 @@
 	ad.u.inode = inode;
 
 	rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms,
-			    audited, denied, &ad, flags);
+			    audited, denied, result, &ad, flags);
 	if (rc)
 		return rc;
 	return 0;
@@ -2822,7 +2825,7 @@
 	if (likely(!audited))
 		return rc;
 
-	rc2 = audit_inode_permission(inode, perms, audited, denied, flags);
+	rc2 = audit_inode_permission(inode, perms, audited, denied, rc, flags);
 	if (rc2)
 		return rc2;
 	return rc;
@@ -5722,7 +5725,7 @@
 
 static int selinux_key_permission(key_ref_t key_ref,
 				  const struct cred *cred,
-				  key_perm_t perm)
+				  unsigned perm)
 {
 	struct key *key;
 	struct key_security_struct *ksec;

diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index f53ee3c..ddf8eec 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h

@@ -102,7 +102,7 @@
 }
 
 int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
-		   u32 requested, u32 audited, u32 denied,
+		   u32 requested, u32 audited, u32 denied, int result,
 		   struct common_audit_data *a,
 		   unsigned flags);
 
@@ -137,7 +137,7 @@
 	if (likely(!audited))
 		return 0;
 	return slow_avc_audit(ssid, tsid, tclass,
-			      requested, audited, denied,
+			      requested, audited, denied, result,
 			      a, 0);
 }
 

diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 14d04e6..be491a7 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h

@@ -147,7 +147,7 @@
 	{ "peer", { "recv", NULL } },
 	{ "capability2",
 	  { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend",
-	    NULL } },
+	    "audit_read", NULL } },
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, "attach_queue", NULL } },

diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index 933e735..2cc4961 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c

@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
+#include <linux/sched.h>
 #include "hashtab.h"
 
 struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void *key),
@@ -40,6 +41,8 @@
 	u32 hvalue;
 	struct hashtab_node *prev, *cur, *newnode;
 
+	cond_resched();
+
 	if (!h || h->nel == HASHTAB_MAX_NODES)
 		return -EINVAL;
 

diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index c85bc1e..d307b37 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c

@@ -492,6 +492,8 @@
 			rc = ebitmap_set_bit(&bitmap, catdatum->value - 1, 1);
 			if (rc)
 				return rc;
+
+			cond_resched();
 		}
 		ebitmap_destroy(&c->range.level[l].cat);
 		c->range.level[l].cat = bitmap;

diff --git a/security/smack/smack.h b/security/smack/smack.h
index d072fd3..020307e 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h

@@ -80,8 +80,8 @@
 
 struct socket_smack {
 	struct smack_known	*smk_out;	/* outbound label */
-	char			*smk_in;	/* inbound label */
-	char			*smk_packet;	/* TCP peer label */
+	struct smack_known	*smk_in;	/* inbound label */
+	struct smack_known	*smk_packet;	/* TCP peer label */
 };
 
 /*
@@ -133,7 +133,7 @@
 	struct list_head	list;
 	struct sock		*smk_sock;	/* socket initialized on */
 	unsigned short		smk_port;	/* the port number */
-	char			*smk_in;	/* incoming label */
+	struct smack_known	*smk_in;	/* inbound label */
 	struct smack_known	*smk_out;	/* outgoing label */
 };
 
@@ -177,6 +177,14 @@
 #define SMACK_CIPSO_MAXCATNUM           184     /* 23 * 8 */
 
 /*
+ * Ptrace rules
+ */
+#define SMACK_PTRACE_DEFAULT	0
+#define SMACK_PTRACE_EXACT	1
+#define SMACK_PTRACE_DRACONIAN	2
+#define SMACK_PTRACE_MAX	SMACK_PTRACE_DRACONIAN
+
+/*
  * Flags for untraditional access modes.
  * It shouldn't be necessary to avoid conflicts with definitions
  * in fs.h, but do so anyway.
@@ -225,6 +233,7 @@
  */
 int smk_access_entry(char *, char *, struct list_head *);
 int smk_access(struct smack_known *, char *, int, struct smk_audit_info *);
+int smk_tskacc(struct task_smack *, char *, u32, struct smk_audit_info *);
 int smk_curacc(char *, u32, struct smk_audit_info *);
 struct smack_known *smack_from_secid(const u32);
 char *smk_parse_smack(const char *string, int len);
@@ -244,6 +253,7 @@
 extern struct smack_known *smack_onlycap;
 extern struct smack_known *smack_syslog_label;
 extern const char *smack_cipso_option;
+extern int smack_ptrace_rule;
 
 extern struct smack_known smack_known_floor;
 extern struct smack_known smack_known_hat;

diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 14293cd..c062e94 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c

@@ -192,20 +192,21 @@
 }
 
 /**
- * smk_curacc - determine if current has a specific access to an object
+ * smk_tskacc - determine if a task has a specific access to an object
+ * @tsp: a pointer to the subject task
  * @obj_label: a pointer to the object's Smack label
  * @mode: the access requested, in "MAY" format
  * @a : common audit data
  *
- * This function checks the current subject label/object label pair
+ * This function checks the subject task's label/object label pair
  * in the access rule list and returns 0 if the access is permitted,
- * non zero otherwise. It allows that current may have the capability
+ * non zero otherwise. It allows that the task may have the capability
  * to override the rules.
  */
-int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
+int smk_tskacc(struct task_smack *subject, char *obj_label,
+	       u32 mode, struct smk_audit_info *a)
 {
-	struct task_smack *tsp = current_security();
-	struct smack_known *skp = smk_of_task(tsp);
+	struct smack_known *skp = smk_of_task(subject);
 	int may;
 	int rc;
 
@@ -219,7 +220,7 @@
 		 * it can further restrict access.
 		 */
 		may = smk_access_entry(skp->smk_known, obj_label,
-					&tsp->smk_rules);
+					&subject->smk_rules);
 		if (may < 0)
 			goto out_audit;
 		if ((mode & may) == mode)
@@ -241,6 +242,24 @@
 	return rc;
 }
 
+/**
+ * smk_curacc - determine if current has a specific access to an object
+ * @obj_label: a pointer to the object's Smack label
+ * @mode: the access requested, in "MAY" format
+ * @a : common audit data
+ *
+ * This function checks the current subject label/object label pair
+ * in the access rule list and returns 0 if the access is permitted,
+ * non zero otherwise. It allows that current may have the capability
+ * to override the rules.
+ */
+int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
+{
+	struct task_smack *tsp = current_security();
+
+	return smk_tskacc(tsp, obj_label, mode, a);
+}
+
 #ifdef CONFIG_AUDIT
 /**
  * smack_str_from_perm : helper to transalate an int to a
@@ -285,7 +304,10 @@
 	audit_log_untrustedstring(ab, sad->subject);
 	audit_log_format(ab, " object=");
 	audit_log_untrustedstring(ab, sad->object);
-	audit_log_format(ab, " requested=%s", sad->request);
+	if (sad->request[0] == '\0')
+		audit_log_format(ab, " labels_differ");
+	else
+		audit_log_format(ab, " requested=%s", sad->request);
 }
 
 /**

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 14f52be..f2c3080 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c

@@ -157,6 +157,74 @@
 	return rc;
 }
 
+/**
+ * smk_ptrace_mode - helper function for converting PTRACE_MODE_* into MAY_*
+ * @mode - input mode in form of PTRACE_MODE_*
+ *
+ * Returns a converted MAY_* mode usable by smack rules
+ */
+static inline unsigned int smk_ptrace_mode(unsigned int mode)
+{
+	switch (mode) {
+	case PTRACE_MODE_READ:
+		return MAY_READ;
+	case PTRACE_MODE_ATTACH:
+		return MAY_READWRITE;
+	}
+
+	return 0;
+}
+
+/**
+ * smk_ptrace_rule_check - helper for ptrace access
+ * @tracer: tracer process
+ * @tracee_label: label of the process that's about to be traced,
+ *                the pointer must originate from smack structures
+ * @mode: ptrace attachment mode (PTRACE_MODE_*)
+ * @func: name of the function that called us, used for audit
+ *
+ * Returns 0 on access granted, -error on error
+ */
+static int smk_ptrace_rule_check(struct task_struct *tracer, char *tracee_label,
+				 unsigned int mode, const char *func)
+{
+	int rc;
+	struct smk_audit_info ad, *saip = NULL;
+	struct task_smack *tsp;
+	struct smack_known *skp;
+
+	if ((mode & PTRACE_MODE_NOAUDIT) == 0) {
+		smk_ad_init(&ad, func, LSM_AUDIT_DATA_TASK);
+		smk_ad_setfield_u_tsk(&ad, tracer);
+		saip = &ad;
+	}
+
+	tsp = task_security(tracer);
+	skp = smk_of_task(tsp);
+
+	if ((mode & PTRACE_MODE_ATTACH) &&
+	    (smack_ptrace_rule == SMACK_PTRACE_EXACT ||
+	     smack_ptrace_rule == SMACK_PTRACE_DRACONIAN)) {
+		if (skp->smk_known == tracee_label)
+			rc = 0;
+		else if (smack_ptrace_rule == SMACK_PTRACE_DRACONIAN)
+			rc = -EACCES;
+		else if (capable(CAP_SYS_PTRACE))
+			rc = 0;
+		else
+			rc = -EACCES;
+
+		if (saip)
+			smack_log(skp->smk_known, tracee_label, 0, rc, saip);
+
+		return rc;
+	}
+
+	/* In case of rule==SMACK_PTRACE_DEFAULT or mode==PTRACE_MODE_READ */
+	rc = smk_tskacc(tsp, tracee_label, smk_ptrace_mode(mode), saip);
+	return rc;
+}
+
 /*
  * LSM hooks.
  * We he, that is fun!
@@ -165,16 +233,15 @@
 /**
  * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
  * @ctp: child task pointer
- * @mode: ptrace attachment mode
+ * @mode: ptrace attachment mode (PTRACE_MODE_*)
  *
  * Returns 0 if access is OK, an error code otherwise
  *
- * Do the capability checks, and require read and write.
+ * Do the capability checks.
  */
 static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
 {
 	int rc;
-	struct smk_audit_info ad;
 	struct smack_known *skp;
 
 	rc = cap_ptrace_access_check(ctp, mode);
@@ -182,10 +249,8 @@
 		return rc;
 
 	skp = smk_of_task(task_security(ctp));
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
-	smk_ad_setfield_u_tsk(&ad, ctp);
 
-	rc = smk_curacc(skp->smk_known, mode, &ad);
+	rc = smk_ptrace_rule_check(current, skp->smk_known, mode, __func__);
 	return rc;
 }
 
@@ -195,23 +260,21 @@
  *
  * Returns 0 if access is OK, an error code otherwise
  *
- * Do the capability checks, and require read and write.
+ * Do the capability checks, and require PTRACE_MODE_ATTACH.
  */
 static int smack_ptrace_traceme(struct task_struct *ptp)
 {
 	int rc;
-	struct smk_audit_info ad;
 	struct smack_known *skp;
 
 	rc = cap_ptrace_traceme(ptp);
 	if (rc != 0)
 		return rc;
 
-	skp = smk_of_task(task_security(ptp));
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
-	smk_ad_setfield_u_tsk(&ad, ptp);
+	skp = smk_of_task(current_security());
 
-	rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad);
+	rc = smk_ptrace_rule_check(ptp, skp->smk_known,
+				   PTRACE_MODE_ATTACH, __func__);
 	return rc;
 }
 
@@ -413,9 +476,11 @@
 	 * Initialize the root inode.
 	 */
 	isp = inode->i_security;
-	if (inode->i_security == NULL) {
-		inode->i_security = new_inode_smack(sp->smk_root);
-		isp = inode->i_security;
+	if (isp == NULL) {
+		isp = new_inode_smack(sp->smk_root);
+		if (isp == NULL)
+			return -ENOMEM;
+		inode->i_security = isp;
 	} else
 		isp->smk_inode = sp->smk_root;
 
@@ -453,7 +518,7 @@
  * smack_bprm_set_creds - set creds for exec
  * @bprm: the exec information
  *
- * Returns 0 if it gets a blob, -ENOMEM otherwise
+ * Returns 0 if it gets a blob, -EPERM if exec forbidden and -ENOMEM otherwise
  */
 static int smack_bprm_set_creds(struct linux_binprm *bprm)
 {
@@ -473,7 +538,22 @@
 	if (isp->smk_task == NULL || isp->smk_task == bsp->smk_task)
 		return 0;
 
-	if (bprm->unsafe)
+	if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+		struct task_struct *tracer;
+		rc = 0;
+
+		rcu_read_lock();
+		tracer = ptrace_parent(current);
+		if (likely(tracer != NULL))
+			rc = smk_ptrace_rule_check(tracer,
+						   isp->smk_task->smk_known,
+						   PTRACE_MODE_ATTACH,
+						   __func__);
+		rcu_read_unlock();
+
+		if (rc != 0)
+			return rc;
+	} else if (bprm->unsafe)
 		return -EPERM;
 
 	bsp->smk_task = isp->smk_task;
@@ -880,18 +960,20 @@
 		return;
 	}
 
-	skp = smk_import_entry(value, size);
 	if (strcmp(name, XATTR_NAME_SMACK) == 0) {
+		skp = smk_import_entry(value, size);
 		if (skp != NULL)
 			isp->smk_inode = skp->smk_known;
 		else
 			isp->smk_inode = smack_known_invalid.smk_known;
 	} else if (strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
+		skp = smk_import_entry(value, size);
 		if (skp != NULL)
 			isp->smk_task = skp;
 		else
 			isp->smk_task = &smack_known_invalid;
 	} else if (strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
+		skp = smk_import_entry(value, size);
 		if (skp != NULL)
 			isp->smk_mmap = skp;
 		else
@@ -938,24 +1020,37 @@
 	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0 ||
-	    strcmp(name, XATTR_NAME_SMACKMMAP)) {
+	    strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
 		if (!smack_privileged(CAP_MAC_ADMIN))
 			rc = -EPERM;
 	} else
 		rc = cap_inode_removexattr(dentry, name);
 
+	if (rc != 0)
+		return rc;
+
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
-	if (rc == 0)
-		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
-	if (rc == 0) {
-		isp = dentry->d_inode->i_security;
+	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
+	if (rc != 0)
+		return rc;
+
+	isp = dentry->d_inode->i_security;
+	/*
+	 * Don't do anything special for these.
+	 *	XATTR_NAME_SMACKIPIN
+	 *	XATTR_NAME_SMACKIPOUT
+	 *	XATTR_NAME_SMACKEXEC
+	 */
+	if (strcmp(name, XATTR_NAME_SMACK) == 0)
 		isp->smk_task = NULL;
+	else if (strcmp(name, XATTR_NAME_SMACKMMAP) == 0)
 		isp->smk_mmap = NULL;
-	}
+	else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0)
+		isp->smk_flags &= ~SMK_INODE_TRANSMUTE;
 
-	return rc;
+	return 0;
 }
 
 /**
@@ -1000,7 +1095,7 @@
 	ssp = sock->sk->sk_security;
 
 	if (strcmp(name, XATTR_SMACK_IPIN) == 0)
-		isp = ssp->smk_in;
+		isp = ssp->smk_in->smk_known;
 	else if (strcmp(name, XATTR_SMACK_IPOUT) == 0)
 		isp = ssp->smk_out->smk_known;
 	else
@@ -1367,19 +1462,32 @@
 /**
  * smack_file_open - Smack dentry open processing
  * @file: the object
- * @cred: unused
+ * @cred: task credential
  *
  * Set the security blob in the file structure.
+ * Allow the open only if the task has read access. There are
+ * many read operations (e.g. fstat) that you can do with an
+ * fd even if you have the file open write-only.
  *
  * Returns 0
  */
 static int smack_file_open(struct file *file, const struct cred *cred)
 {
+	struct task_smack *tsp = cred->security;
 	struct inode_smack *isp = file_inode(file)->i_security;
+	struct smk_audit_info ad;
+	int rc;
 
-	file->f_security = isp->smk_inode;
+	if (smack_privileged(CAP_MAC_OVERRIDE))
+		return 0;
 
-	return 0;
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
+	rc = smk_access(tsp->smk_task, isp->smk_inode, MAY_READ, &ad);
+	if (rc == 0)
+		file->f_security = isp->smk_inode;
+
+	return rc;
 }
 
 /*
@@ -1764,7 +1872,7 @@
 	if (ssp == NULL)
 		return -ENOMEM;
 
-	ssp->smk_in = skp->smk_known;
+	ssp->smk_in = skp;
 	ssp->smk_out = skp;
 	ssp->smk_packet = NULL;
 
@@ -2004,7 +2112,7 @@
 
 	if (act == SMK_RECEIVING) {
 		skp = smack_net_ambient;
-		object = ssp->smk_in;
+		object = ssp->smk_in->smk_known;
 	} else {
 		skp = ssp->smk_out;
 		object = smack_net_ambient->smk_known;
@@ -2034,9 +2142,9 @@
 	list_for_each_entry(spp, &smk_ipv6_port_list, list) {
 		if (spp->smk_port != port)
 			continue;
-		object = spp->smk_in;
+		object = spp->smk_in->smk_known;
 		if (act == SMK_CONNECTING)
-			ssp->smk_packet = spp->smk_out->smk_known;
+			ssp->smk_packet = spp->smk_out;
 		break;
 	}
 
@@ -2076,7 +2184,7 @@
 	int rc = 0;
 
 	if (value == NULL || size > SMK_LONGLABEL || size == 0)
-		return -EACCES;
+		return -EINVAL;
 
 	skp = smk_import_entry(value, size);
 	if (skp == NULL)
@@ -2100,7 +2208,7 @@
 	ssp = sock->sk->sk_security;
 
 	if (strcmp(name, XATTR_SMACK_IPIN) == 0)
-		ssp->smk_in = skp->smk_known;
+		ssp->smk_in = skp;
 	else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) {
 		ssp->smk_out = skp;
 		if (sock->sk->sk_family == PF_INET) {
@@ -2713,6 +2821,15 @@
 	 * of the superblock.
 	 */
 	if (opt_dentry->d_parent == opt_dentry) {
+		if (sbp->s_magic == CGROUP_SUPER_MAGIC) {
+			/*
+			 * The cgroup filesystem is never mounted,
+			 * so there's no opportunity to set the mount
+			 * options.
+			 */
+			sbsp->smk_root = smack_known_star.smk_known;
+			sbsp->smk_default = smack_known_star.smk_known;
+		}
 		isp->smk_inode = sbsp->smk_root;
 		isp->smk_flags |= SMK_INODE_INSTANT;
 		goto unlockandout;
@@ -2726,16 +2843,20 @@
 	 */
 	switch (sbp->s_magic) {
 	case SMACK_MAGIC:
+	case PIPEFS_MAGIC:
+	case SOCKFS_MAGIC:
+	case CGROUP_SUPER_MAGIC:
 		/*
 		 * Casey says that it's a little embarrassing
 		 * that the smack file system doesn't do
 		 * extended attributes.
-		 */
-		final = smack_known_star.smk_known;
-		break;
-	case PIPEFS_MAGIC:
-		/*
+		 *
 		 * Casey says pipes are easy (?)
+		 *
+		 * Socket access is controlled by the socket
+		 * structures associated with the task involved.
+		 *
+		 * Cgroupfs is special
 		 */
 		final = smack_known_star.smk_known;
 		break;
@@ -2747,13 +2868,6 @@
 		 */
 		final = ckp->smk_known;
 		break;
-	case SOCKFS_MAGIC:
-		/*
-		 * Socket access is controlled by the socket
-		 * structures associated with the task involved.
-		 */
-		final = smack_known_star.smk_known;
-		break;
 	case PROC_SUPER_MAGIC:
 		/*
 		 * Casey says procfs appears not to care.
@@ -2959,30 +3073,34 @@
 				     struct sock *other, struct sock *newsk)
 {
 	struct smack_known *skp;
+	struct smack_known *okp;
 	struct socket_smack *ssp = sock->sk_security;
 	struct socket_smack *osp = other->sk_security;
 	struct socket_smack *nsp = newsk->sk_security;
 	struct smk_audit_info ad;
 	int rc = 0;
-
 #ifdef CONFIG_AUDIT
 	struct lsm_network_audit net;
-
-	smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
-	smk_ad_setfield_u_net_sk(&ad, other);
 #endif
 
 	if (!smack_privileged(CAP_MAC_OVERRIDE)) {
 		skp = ssp->smk_out;
-		rc = smk_access(skp, osp->smk_in, MAY_WRITE, &ad);
+		okp = osp->smk_out;
+#ifdef CONFIG_AUDIT
+		smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
+		smk_ad_setfield_u_net_sk(&ad, other);
+#endif
+		rc = smk_access(skp, okp->smk_known, MAY_WRITE, &ad);
+		if (rc == 0)
+			rc = smk_access(okp, okp->smk_known, MAY_WRITE, NULL);
 	}
 
 	/*
 	 * Cross reference the peer labels for SO_PEERSEC.
 	 */
 	if (rc == 0) {
-		nsp->smk_packet = ssp->smk_out->smk_known;
-		ssp->smk_packet = osp->smk_out->smk_known;
+		nsp->smk_packet = ssp->smk_out;
+		ssp->smk_packet = osp->smk_out;
 	}
 
 	return rc;
@@ -3014,7 +3132,7 @@
 		return 0;
 
 	skp = ssp->smk_out;
-	return smk_access(skp, osp->smk_in, MAY_WRITE, &ad);
+	return smk_access(skp, osp->smk_in->smk_known, MAY_WRITE, &ad);
 }
 
 /**
@@ -3109,7 +3227,7 @@
 		if (found)
 			return skp;
 
-		if (ssp != NULL && ssp->smk_in == smack_known_star.smk_known)
+		if (ssp != NULL && ssp->smk_in == &smack_known_star)
 			return &smack_known_web;
 		return &smack_known_star;
 	}
@@ -3228,7 +3346,7 @@
 		 * This is the simplist possible security model
 		 * for networking.
 		 */
-		rc = smk_access(skp, ssp->smk_in, MAY_WRITE, &ad);
+		rc = smk_access(skp, ssp->smk_in->smk_known, MAY_WRITE, &ad);
 		if (rc != 0)
 			netlbl_skbuff_err(skb, rc, 0);
 		break;
@@ -3263,7 +3381,7 @@
 
 	ssp = sock->sk->sk_security;
 	if (ssp->smk_packet != NULL) {
-		rcp = ssp->smk_packet;
+		rcp = ssp->smk_packet->smk_known;
 		slen = strlen(rcp) + 1;
 	}
 
@@ -3348,7 +3466,7 @@
 		return;
 
 	ssp = sk->sk_security;
-	ssp->smk_in = skp->smk_known;
+	ssp->smk_in = skp;
 	ssp->smk_out = skp;
 	/* cssp->smk_packet is already set in smack_inet_csk_clone() */
 }
@@ -3408,7 +3526,7 @@
 	 * Receiving a packet requires that the other end be able to write
 	 * here. Read access is not required.
 	 */
-	rc = smk_access(skp, ssp->smk_in, MAY_WRITE, &ad);
+	rc = smk_access(skp, ssp->smk_in->smk_known, MAY_WRITE, &ad);
 	if (rc != 0)
 		return rc;
 
@@ -3452,7 +3570,7 @@
 
 	if (req->peer_secid != 0) {
 		skp = smack_from_secid(req->peer_secid);
-		ssp->smk_packet = skp->smk_known;
+		ssp->smk_packet = skp;
 	} else
 		ssp->smk_packet = NULL;
 }
@@ -3506,11 +3624,12 @@
  * an error code otherwise
  */
 static int smack_key_permission(key_ref_t key_ref,
-				const struct cred *cred, key_perm_t perm)
+				const struct cred *cred, unsigned perm)
 {
 	struct key *keyp;
 	struct smk_audit_info ad;
 	struct smack_known *tkp = smk_of_task(cred->security);
+	int request = 0;
 
 	keyp = key_ref_to_ptr(key_ref);
 	if (keyp == NULL)
@@ -3531,7 +3650,11 @@
 	ad.a.u.key_struct.key = keyp->serial;
 	ad.a.u.key_struct.key_desc = keyp->description;
 #endif
-	return smk_access(tkp, keyp->security, MAY_READWRITE, &ad);
+	if (perm & KEY_NEED_READ)
+		request = MAY_READ;
+	if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR))
+		request = MAY_WRITE;
+	return smk_access(tkp, keyp->security, request, &ad);
 }
 #endif /* CONFIG_KEYS */
 

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 3198cfe..32b2488 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c

@@ -53,6 +53,7 @@
 	SMK_REVOKE_SUBJ	= 18,	/* set rules with subject label to '-' */
 	SMK_CHANGE_RULE	= 19,	/* change or add rules (long labels) */
 	SMK_SYSLOG	= 20,	/* change syslog label) */
+	SMK_PTRACE	= 21,	/* set ptrace rule */
 };
 
 /*
@@ -101,6 +102,15 @@
 struct smack_known *smack_syslog_label;
 
 /*
+ * Ptrace current rule
+ * SMACK_PTRACE_DEFAULT    regular smack ptrace rules (/proc based)
+ * SMACK_PTRACE_EXACT      labels must match, but can be overriden with
+ *			   CAP_SYS_PTRACE
+ * SMACK_PTRACE_DRACONIAN  lables must match, CAP_SYS_PTRACE has no effect
+ */
+int smack_ptrace_rule = SMACK_PTRACE_DEFAULT;
+
+/*
  * Certain IP addresses may be designated as single label hosts.
  * Packets are sent there unlabeled, but only from tasks that
  * can write to the specified label.
@@ -1183,7 +1193,7 @@
 
 	data[count] = '\0';
 
-	rc = sscanf(data, "%hhd.%hhd.%hhd.%hhd/%d %s",
+	rc = sscanf(data, "%hhd.%hhd.%hhd.%hhd/%u %s",
 		&host[0], &host[1], &host[2], &host[3], &m, smack);
 	if (rc != 6) {
 		rc = sscanf(data, "%hhd.%hhd.%hhd.%hhd %s",
@@ -2244,6 +2254,68 @@
 
 
 /**
+ * smk_read_ptrace - read() for /smack/ptrace
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @count: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_ptrace(struct file *filp, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	char temp[32];
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(temp, "%d\n", smack_ptrace_rule);
+	rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+	return rc;
+}
+
+/**
+ * smk_write_ptrace - write() for /smack/ptrace
+ * @file: file pointer
+ * @buf: data from user space
+ * @count: bytes sent
+ * @ppos: where to start - must be 0
+ */
+static ssize_t smk_write_ptrace(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[32];
+	int i;
+
+	if (!smack_privileged(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (*ppos != 0 || count >= sizeof(temp) || count == 0)
+		return -EINVAL;
+
+	if (copy_from_user(temp, buf, count) != 0)
+		return -EFAULT;
+
+	temp[count] = '\0';
+
+	if (sscanf(temp, "%d", &i) != 1)
+		return -EINVAL;
+	if (i < SMACK_PTRACE_DEFAULT || i > SMACK_PTRACE_MAX)
+		return -EINVAL;
+	smack_ptrace_rule = i;
+
+	return count;
+}
+
+static const struct file_operations smk_ptrace_ops = {
+	.write		= smk_write_ptrace,
+	.read		= smk_read_ptrace,
+	.llseek		= default_llseek,
+};
+
+/**
  * smk_fill_super - fill the smackfs superblock
  * @sb: the empty superblock
  * @data: unused
@@ -2296,6 +2368,8 @@
 			"change-rule", &smk_change_rule_ops, S_IRUGO|S_IWUSR},
 		[SMK_SYSLOG] = {
 			"syslog", &smk_syslog_ops, S_IRUGO|S_IWUSR},
+		[SMK_PTRACE] = {
+			"ptrace", &smk_ptrace_ops, S_IRUGO|S_IWUSR},
 		/* last one */
 			{""}
 	};

diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 9ca5e64..225c7315 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c

@@ -660,7 +660,7 @@
 				  int atomic, int hop)
 {
 	struct snd_seq_subscribers *subs;
-	int err = 0, num_ev = 0;
+	int err, result = 0, num_ev = 0;
 	struct snd_seq_event event_saved;
 	struct snd_seq_client_port *src_port;
 	struct snd_seq_port_subs_info *grp;
@@ -685,8 +685,12 @@
 						  subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL);
 		err = snd_seq_deliver_single_event(client, event,
 						   0, atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev++;
 		/* restore original event record */
 		*event = event_saved;
@@ -697,7 +701,7 @@
 		up_read(&grp->list_mutex);
 	*event = event_saved; /* restore */
 	snd_seq_port_unlock(src_port);
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 
@@ -709,7 +713,7 @@
 				struct snd_seq_event *event,
 				int atomic, int hop)
 {
-	int num_ev = 0, err = 0;
+	int num_ev = 0, err, result = 0;
 	struct snd_seq_client *dest_client;
 	struct snd_seq_client_port *port;
 
@@ -724,14 +728,18 @@
 		err = snd_seq_deliver_single_event(NULL, event,
 						   SNDRV_SEQ_FILTER_BROADCAST,
 						   atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev++;
 	}
 	read_unlock(&dest_client->ports_lock);
 	snd_seq_client_unlock(dest_client);
 	event->dest.port = SNDRV_SEQ_ADDRESS_BROADCAST; /* restore */
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 /*
@@ -741,7 +749,7 @@
 static int broadcast_event(struct snd_seq_client *client,
 			   struct snd_seq_event *event, int atomic, int hop)
 {
-	int err = 0, num_ev = 0;
+	int err, result = 0, num_ev = 0;
 	int dest;
 	struct snd_seq_addr addr;
 
@@ -760,12 +768,16 @@
 			err = snd_seq_deliver_single_event(NULL, event,
 							   SNDRV_SEQ_FILTER_BROADCAST,
 							   atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev += err;
 	}
 	event->dest = addr; /* restore */
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 5599899..53a403e 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c

@@ -124,7 +124,7 @@
 	snd_use_lock_use(&f->use_lock);
 	err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL); /* always non-blocking */
 	if (err < 0) {
-		if (err == -ENOMEM)
+		if ((err == -ENOMEM) || (err == -EAGAIN))
 			atomic_inc(&f->overflow);
 		snd_use_lock_free(&f->use_lock);
 		return err;

diff --git a/sound/core/timer.c b/sound/core/timer.c
index cfd455a..777a45e 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c

@@ -390,7 +390,7 @@
 	struct timespec tstamp;
 
 	if (timer_tstamp_monotonic)
-		do_posix_clock_monotonic_gettime(&tstamp);
+		ktime_get_ts(&tstamp);
 	else
 		getnstimeofday(&tstamp);
 	if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START ||
@@ -1203,7 +1203,7 @@
 	}
 	if (tu->last_resolution != resolution || ticks > 0) {
 		if (timer_tstamp_monotonic)
-			do_posix_clock_monotonic_gettime(&tstamp);
+			ktime_get_ts(&tstamp);
 		else
 			getnstimeofday(&tstamp);
 	}

diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index d1c93a1..e13eef9 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h

@@ -208,8 +208,6 @@
 int snd_bebob_stream_check_internal_clock(struct snd_bebob *bebob,
 					  bool *internal);
 int snd_bebob_stream_discover(struct snd_bebob *bebob);
-int snd_bebob_stream_map(struct snd_bebob *bebob,
-			 struct amdtp_stream *stream);
 int snd_bebob_stream_init_duplex(struct snd_bebob *bebob);
 int snd_bebob_stream_start_duplex(struct snd_bebob *bebob, unsigned int rate);
 void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob);

diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index bc4f827..ef4d0c9 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c

@@ -655,8 +655,6 @@
 	struct amdtp_stream *master, *slave;
 	atomic_t *master_substreams, *slave_substreams;
 
-	mutex_lock(&bebob->mutex);
-
 	if (bebob->master == &bebob->rx_stream) {
 		slave  = &bebob->tx_stream;
 		master = &bebob->rx_stream;
@@ -669,6 +667,8 @@
 		master_substreams = &bebob->capture_substreams;
 	}
 
+	mutex_lock(&bebob->mutex);
+
 	if (atomic_read(slave_substreams) == 0) {
 		amdtp_stream_pcm_abort(slave);
 		amdtp_stream_stop(slave);

diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c
index 996fdc4..3e2ed8e 100644
--- a/sound/firewire/fireworks/fireworks.c
+++ b/sound/firewire/fireworks/fireworks.c

@@ -346,7 +346,6 @@
 {
 	snd_efw_transaction_unregister();
 	driver_unregister(&efw_driver.driver);
-	mutex_destroy(&devices_mutex);
 }
 
 module_init(snd_efw_init);

diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index d2b36be..4f0201a 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h

@@ -162,7 +162,6 @@
 	SND_EFW_CH_TYPE_GUITAR			= 7,
 	SND_EFW_CH_TYPE_PIEZO_GUITAR		= 8,
 	SND_EFW_CH_TYPE_GUITAR_STRING		= 9,
-	SND_EFW_CH_TYPE_VIRTUAL			= 0x10000,
 	SND_EFW_CH_TYPE_DUMMY
 };
 struct snd_efw_phys_meters {

diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
index 4f8216f..33df865 100644
--- a/sound/firewire/fireworks/fireworks_hwdep.c
+++ b/sound/firewire/fireworks/fireworks_hwdep.c

@@ -58,7 +58,7 @@
 			efw->pull_ptr += till_end;
 			if (efw->pull_ptr >= efw->resp_buf +
 					     snd_efw_resp_buf_size)
-				efw->pull_ptr = efw->resp_buf;
+				efw->pull_ptr -= snd_efw_resp_buf_size;
 
 			length -= till_end;
 			buf += till_end;

diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c
index 5415690..b985fc5 100644
--- a/sound/firewire/fireworks/fireworks_stream.c
+++ b/sound/firewire/fireworks/fireworks_stream.c

@@ -284,8 +284,6 @@
 	struct amdtp_stream *master, *slave;
 	atomic_t *master_substreams, *slave_substreams;
 
-	mutex_lock(&efw->mutex);
-
 	if (efw->master == &efw->rx_stream) {
 		slave  = &efw->tx_stream;
 		master = &efw->rx_stream;
@@ -298,6 +296,8 @@
 		master_substreams = &efw->capture_substreams;
 	}
 
+	mutex_lock(&efw->mutex);
+
 	if (atomic_read(slave_substreams) == 0) {
 		stop_stream(efw, slave);
 

diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c
index aa56b8a..255dabc 100644
--- a/sound/firewire/fireworks/fireworks_transaction.c
+++ b/sound/firewire/fireworks/fireworks_transaction.c

@@ -8,19 +8,19 @@
 
 /*
  * Fireworks have its own transaction. The transaction can be delivered by AV/C
- * Vendor Specific command. But at least Windows driver and firmware version 5.5
- * or later don't use it.
+ * Vendor Specific command frame or usual asynchronous transaction. At least,
+ * Windows driver and firmware version 5.5 or later don't use AV/C command.
  *
  * Transaction substance:
- *  At first, 6 data exist. Following to the 6 data, parameters for each
- *  commands exists. All of parameters are 32 bit alighed to big endian.
+ *  At first, 6 data exist. Following to the data, parameters for each command
+ *  exist. All of the parameters are 32 bit alighed to big endian.
  *   data[0]:	Length of transaction substance
  *   data[1]:	Transaction version
  *   data[2]:	Sequence number. This is incremented by the device
- *   data[3]:	transaction category
- *   data[4]:	transaction command
- *   data[5]:	return value in response.
- *   data[6-]:	parameters
+ *   data[3]:	Transaction category
+ *   data[4]:	Transaction command
+ *   data[5]:	Return value in response.
+ *   data[6-]:	Parameters
  *
  * Transaction address:
  *  command:	0xecc000000000
@@ -148,7 +148,7 @@
 
 		efw->push_ptr += till_end;
 		if (efw->push_ptr >= efw->resp_buf + snd_efw_resp_buf_size)
-			efw->push_ptr = efw->resp_buf;
+			efw->push_ptr -= snd_efw_resp_buf_size;
 
 		length -= till_end;
 		data += till_end;

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cd77b9b..bb65a124 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -237,6 +237,12 @@
 	 AZX_DCAPS_COUNT_LPIB_DELAY | AZX_DCAPS_PM_RUNTIME | \
 	 AZX_DCAPS_I915_POWERWELL)
 
+/* Broadwell HDMI can't use position buffer reliably, force to use LPIB */
+#define AZX_DCAPS_INTEL_BROADWELL \
+	(AZX_DCAPS_SCH_SNOOP | AZX_DCAPS_ALIGN_BUFSIZE | \
+	 AZX_DCAPS_POSFIX_LPIB | AZX_DCAPS_PM_RUNTIME | \
+	 AZX_DCAPS_I915_POWERWELL)
+
 /* quirks for ATI SB / AMD Hudson */
 #define AZX_DCAPS_PRESET_ATI_SB \
 	(AZX_DCAPS_ATI_SNOOP | AZX_DCAPS_NO_TCSEL | \
@@ -1367,12 +1373,6 @@
 	/* initialize streams */
 	azx_init_stream(chip);
 
-	/* workaround for Broadwell HDMI: the first stream is broken,
-	 * so mask it by keeping it as if opened
-	 */
-	if (pci->vendor == 0x8086 && pci->device == 0x160c)
-		chip->azx_dev[0].opened = 1;
-
 	/* initialize chip */
 	azx_init_pci(chip);
 	azx_init_chip(chip, (probe_only[dev] & 2) == 0);
@@ -1769,7 +1769,7 @@
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
 	/* Broadwell */
 	{ PCI_DEVICE(0x8086, 0x160c),
-	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
+	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_BROADWELL },
 	/* 5 Series/3400 */
 	{ PCI_DEVICE(0x8086, 0x3b56),
 	  .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index be0a9ee..3e4417b 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -1594,10 +1594,18 @@
 		 * Re-setup pin and infoframe. This is needed e.g. when
 		 * - sink is first plugged-in (infoframe is not set up if !monitor_present)
 		 * - transcoder can change during stream playback on Haswell
+		 *   and this can make HW reset converter selection on a pin.
 		 */
-		if (eld->eld_valid && !old_eld_valid && per_pin->setup)
+		if (eld->eld_valid && !old_eld_valid && per_pin->setup) {
+			if (is_haswell_plus(codec) || is_valleyview(codec)) {
+				intel_verify_pin_cvt_connect(codec, per_pin);
+				intel_not_share_assigned_cvt(codec, pin_nid,
+							per_pin->mux_idx);
+			}
+
 			hdmi_setup_audio_infoframe(codec, per_pin,
 						   per_pin->non_pcm);
+		}
 	}
 
 	if (eld_changed)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 12fb411..af76995 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -929,6 +929,7 @@
 };
 
 static struct alc_codec_rename_table rename_tbl[] = {
+	{ 0x10ec0221, 0xf00f, 0x1003, "ALC231" },
 	{ 0x10ec0269, 0xfff0, 0x3010, "ALC277" },
 	{ 0x10ec0269, 0xf0f0, 0x2010, "ALC259" },
 	{ 0x10ec0269, 0xf0f0, 0x3010, "ALC258" },
@@ -937,6 +938,7 @@
 	{ 0x10ec0269, 0xffff, 0x6023, "ALC281X" },
 	{ 0x10ec0269, 0x00f0, 0x0020, "ALC269VC" },
 	{ 0x10ec0269, 0x00f0, 0x0030, "ALC269VD" },
+	{ 0x10ec0662, 0xffff, 0x4020, "ALC656" },
 	{ 0x10ec0887, 0x00f0, 0x0030, "ALC887-VD" },
 	{ 0x10ec0888, 0x00f0, 0x0030, "ALC888-VD" },
 	{ 0x10ec0888, 0xf0f0, 0x3020, "ALC886" },
@@ -956,6 +958,19 @@
 	{ 0x10ec0293, 0x1028, 0, "ALC3235" },
 	{ 0x10ec0255, 0x1028, 0, "ALC3234" },
 	{ 0x10ec0668, 0x1028, 0, "ALC3661" },
+	{ 0x10ec0275, 0x1028, 0, "ALC3260" },
+	{ 0x10ec0899, 0x1028, 0, "ALC3861" },
+	{ 0x10ec0670, 0x1025, 0, "ALC669X" },
+	{ 0x10ec0676, 0x1025, 0, "ALC679X" },
+	{ 0x10ec0282, 0x1043, 0, "ALC3229" },
+	{ 0x10ec0233, 0x1043, 0, "ALC3236" },
+	{ 0x10ec0280, 0x103c, 0, "ALC3228" },
+	{ 0x10ec0282, 0x103c, 0, "ALC3227" },
+	{ 0x10ec0286, 0x103c, 0, "ALC3242" },
+	{ 0x10ec0290, 0x103c, 0, "ALC3241" },
+	{ 0x10ec0668, 0x103c, 0, "ALC3662" },
+	{ 0x10ec0283, 0x17aa, 0, "ALC3239" },
+	{ 0x10ec0292, 0x17aa, 0, "ALC3232" },
 	{ } /* terminator */
 };
 
@@ -1412,6 +1427,7 @@
 	SND_PCI_QUIRK(0x1043, 0x10c3, "ASUS W5A", ALC880_FIXUP_ASUS_W5A),
 	SND_PCI_QUIRK(0x1043, 0x1964, "ASUS Z71V", ALC880_FIXUP_Z71V),
 	SND_PCI_QUIRK_VENDOR(0x1043, "ASUS", ALC880_FIXUP_GPIO1),
+	SND_PCI_QUIRK(0x147b, 0x1045, "ABit AA8XE", ALC880_FIXUP_6ST_AUTOMUTE),
 	SND_PCI_QUIRK(0x1558, 0x5401, "Clevo GPIO2", ALC880_FIXUP_GPIO2),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", ALC880_FIXUP_EAPD_COEF),
 	SND_PCI_QUIRK(0x1584, 0x9050, "Uniwill", ALC880_FIXUP_UNIWILL_DIG),
@@ -4230,6 +4246,7 @@
 	ALC269_FIXUP_HEADSET_MIC,
 	ALC269_FIXUP_QUANTA_MUTE,
 	ALC269_FIXUP_LIFEBOOK,
+	ALC269_FIXUP_LIFEBOOK_EXTMIC,
 	ALC269_FIXUP_AMIC,
 	ALC269_FIXUP_DMIC,
 	ALC269VB_FIXUP_AMIC,
@@ -4367,6 +4384,13 @@
 		.chained = true,
 		.chain_id = ALC269_FIXUP_QUANTA_MUTE
 	},
+	[ALC269_FIXUP_LIFEBOOK_EXTMIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a1903c }, /* headset mic, with jack detect */
+			{ }
+		},
+	},
 	[ALC269_FIXUP_AMIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -4741,18 +4765,12 @@
 	SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
-	SND_PCI_QUIRK(0x1028, 0x062c, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x063f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x064d, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0668, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0669, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x0674, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x067e, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x067f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x0680, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0684, "Dell", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
@@ -4764,14 +4782,24 @@
 	SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x218b, "HP", ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED),
 	/* ALC282 */
+	SND_PCI_QUIRK(0x103c, 0x220d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x220e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x220f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2210, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2211, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2212, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2213, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2214, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2266, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2267, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2268, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2269, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x226a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x226b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x227a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x227b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x229e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -4811,6 +4839,10 @@
 	SND_PCI_QUIRK(0x103c, 0x22c8, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x22c3, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x22c4, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2334, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -4834,6 +4866,7 @@
 	SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
 	SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
 	SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -4977,6 +5010,26 @@
 #endif
 		.pins = (const struct hda_pintbl[]) {
 			{0x12, 0x90a60160},
+			{0x14, 0x90170120},
+			{0x17, 0x90170140},
+			{0x18, 0x40000000},
+			{0x19, 0x411111f0},
+			{0x1a, 0x411111f0},
+			{0x1b, 0x411111f0},
+			{0x1d, 0x41163b05},
+			{0x1e, 0x411111f0},
+			{0x21, 0x0321102f},
+		},
+		.value = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+	},
+	{
+		.codec = 0x10ec0255,
+		.subvendor = 0x1028,
+#ifdef CONFIG_SND_DEBUG_VERBOSE
+		.name = "Dell",
+#endif
+		.pins = (const struct hda_pintbl[]) {
+			{0x12, 0x90a60160},
 			{0x14, 0x90170130},
 			{0x17, 0x40000000},
 			{0x18, 0x411111f0},
@@ -5129,7 +5182,7 @@
 			{0x1d, 0x40700001},
 			{0x1e, 0x411111f0},
 		},
-		.value = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+		.value = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
 	},
 	{}
 };
@@ -6014,6 +6067,27 @@
 		.name = "Dell",
 #endif
 		.pins = (const struct hda_pintbl[]) {
+			{0x12, 0x99a30140},
+			{0x14, 0x90170110},
+			{0x15, 0x0321101f},
+			{0x16, 0x03011020},
+			{0x18, 0x40000008},
+			{0x19, 0x411111f0},
+			{0x1a, 0x411111f0},
+			{0x1b, 0x411111f0},
+			{0x1d, 0x41000001},
+			{0x1e, 0x411111f0},
+			{0x1f, 0x411111f0},
+		},
+		.value = ALC668_FIXUP_AUTO_MUTE,
+	},
+	{
+		.codec = 0x10ec0668,
+		.subvendor = 0x1028,
+#ifdef CONFIG_SND_DEBUG_VERBOSE
+		.name = "Dell",
+#endif
+		.pins = (const struct hda_pintbl[]) {
 			{0x12, 0x99a30150},
 			{0x14, 0x90170110},
 			{0x15, 0x0321101f},
@@ -6190,6 +6264,7 @@
 	{ .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 },
 	{ .id = 0x10ec0231, .name = "ALC231", .patch = patch_alc269 },
 	{ .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 },
+	{ .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 },
 	{ .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 },
 	{ .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 },
@@ -6223,10 +6298,12 @@
 	  .patch = patch_alc662 },
 	{ .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
 	{ .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
+	{ .id = 0x10ec0667, .name = "ALC667", .patch = patch_alc662 },
 	{ .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
 	{ .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
 	{ .id = 0x10ec0671, .name = "ALC671", .patch = patch_alc662 },
 	{ .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
+	{ .id = 0x10ec0867, .name = "ALC891", .patch = patch_alc882 },
 	{ .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
 	{ .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 },
 	{ .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc882 },

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 68340d7..c91860e 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c

@@ -2779,7 +2779,7 @@
 	unsigned long port;
 	unsigned long pos, pos1, t;
 	int civ, timeout = 1000, attempt = 1;
-	struct timespec start_time, stop_time;
+	ktime_t start_time, stop_time;
 
 	if (chip->ac97_bus->clock != 48000)
 		return; /* specified in module option */
@@ -2813,7 +2813,7 @@
 		iputbyte(chip, port + ICH_REG_OFF_CR, ICH_IOCE);
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << ichdev->ali_slot);
 	}
-	do_posix_clock_monotonic_gettime(&start_time);
+	start_time = ktime_get();
 	spin_unlock_irq(&chip->reg_lock);
 	msleep(50);
 	spin_lock_irq(&chip->reg_lock);
@@ -2837,7 +2837,7 @@
 		pos += ichdev->position;
 	}
 	chip->in_measurement = 0;
-	do_posix_clock_monotonic_gettime(&stop_time);
+	stop_time = ktime_get();
 	/* stop */
 	if (chip->device_type == DEVICE_ALI) {
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << (ichdev->ali_slot + 16));
@@ -2865,9 +2865,7 @@
 	}
 
 	pos /= 4;
-	t = stop_time.tv_sec - start_time.tv_sec;
-	t *= 1000000;
-	t += (stop_time.tv_nsec - start_time.tv_nsec) / 1000;
+	t = ktime_us_delta(stop_time, start_time);
 	dev_info(chip->card->dev,
 		 "%s: measured %lu usecs (%lu samples)\n", __func__, t, pos);
 	if (t == 0) {

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5b5eb78..c1b49c3 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -1,8 +1,10 @@
 /* TODO merge/factor in debugfs.c here */
 
+#include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/vfs.h>
 
@@ -96,12 +98,51 @@
 	return false;
 }
 
+static void mem_toupper(char *f, size_t len)
+{
+	while (len) {
+		*f = toupper(*f);
+		f++;
+		len--;
+	}
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/sysfs-rules.txt
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+	char *override_path;
+	size_t name_len = strlen(fs->name);
+	/* name + "_PATH" + '\0' */
+	char upper_name[name_len + 5 + 1];
+	memcpy(upper_name, fs->name, name_len);
+	mem_toupper(upper_name, name_len);
+	strcpy(&upper_name[name_len], "_PATH");
+
+	override_path = getenv(upper_name);
+	if (!override_path)
+		return false;
+
+	fs->found = true;
+	strncpy(fs->path, override_path, sizeof(fs->path));
+	return true;
+}
+
 static const char *fs__get_mountpoint(struct fs *fs)
 {
+	if (fs__env_override(fs))
+		return fs->path;
+
 	if (fs__check_mounts(fs))
 		return fs->path;
 
-	return fs__read_mounts(fs) ? fs->path : NULL;
+	if (fs__read_mounts(fs))
+		return fs->path;
+
+	return NULL;
 }
 
 static const char *fs__mountpoint(int idx)

diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index bf7be77..833a966 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l

@@ -92,6 +92,7 @@
 "#"?("cpu")	{ return K_CPU; }
 "#"?("vlan_tci") { return K_VLANT; }
 "#"?("vlan_pr")	{ return K_VLANP; }
+"#"?("rand")	{ return K_RAND; }
 
 ":"		{ return ':'; }
 ","		{ return ','; }

diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index d15efc9..e6306c5 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y

@@ -56,7 +56,7 @@
 %token OP_LDXI
 
 %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
 
 %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
 
@@ -164,6 +164,9 @@
 	| OP_LDB K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDB K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldh
@@ -212,6 +215,9 @@
 	| OP_LDH K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDH K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldi
@@ -265,6 +271,9 @@
 	| OP_LD K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LD K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	| OP_LD 'M' '[' number ']' {
 		bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
 	| OP_LD '[' 'x' '+' number ']' {

diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
index cfe0cdc..c5baf9c 100644
--- a/tools/net/bpf_jit_disasm.c
+++ b/tools/net/bpf_jit_disasm.c

@@ -43,8 +43,7 @@
 	free(path);
 }
 
-static void get_asm_insns(uint8_t *image, size_t len, unsigned long base,
-			  int opcodes)
+static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
 {
 	int count, i, pc = 0;
 	char tpath[256];
@@ -107,13 +106,13 @@
 }
 
 static int get_last_jit_image(char *haystack, size_t hlen,
-			      uint8_t *image, size_t ilen,
-			      unsigned long *base)
+			      uint8_t *image, size_t ilen)
 {
 	char *ptr, *pptr, *tmp;
 	off_t off = 0;
 	int ret, flen, proglen, pass, ulen = 0;
 	regmatch_t pmatch[1];
+	unsigned long base;
 	regex_t regex;
 
 	if (hlen == 0)
@@ -136,7 +135,7 @@
 
 	ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
 	ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
-		     &flen, &proglen, &pass, base);
+		     &flen, &proglen, &pass, &base);
 	if (ret != 4)
 		return 0;
 
@@ -162,7 +161,7 @@
 	assert(ulen == proglen);
 	printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
 	       proglen, pass, flen);
-	printf("%lx + <x>:\n", *base);
+	printf("%lx + <x>:\n", base);
 
 	regfree(&regex);
 	return ulen;
@@ -172,8 +171,7 @@
 {
 	int len, klen, opcodes = 0;
 	char *kbuff;
-	unsigned long base;
-	uint8_t image[4096];
+	static uint8_t image[32768];
 
 	if (argc > 1) {
 		if (!strncmp("-o", argv[argc - 1], 2)) {
@@ -189,9 +187,9 @@
 
 	kbuff = get_klog_buff(&klen);
 
-	len = get_last_jit_image(kbuff, klen, image, sizeof(image), &base);
-	if (len > 0 && base > 0)
-		get_asm_insns(image, len, base, opcodes);
+	len = get_last_jit_image(kbuff, klen, image, sizeof(image));
+	if (len > 0)
+		get_asm_insns(image, len, opcodes);
 
 	put_klog_buff(kbuff);
 

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index c71b0f3..d460049 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt

@@ -184,9 +184,10 @@
 	- in_tx: only when the target is in a hardware transaction
 	- no_tx: only when the target is not in a hardware transaction
 	- abort_tx: only when the target is a hardware transaction abort
+	- cond: conditional branches
 
 +
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
 The privilege levels may be omitted, in which case, the privilege levels of the associated
 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
 levels are subject to permissions.  When sampling on multiple events, branch stack sampling

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a1b5185..cefdf43 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt

@@ -111,7 +111,7 @@
 --fields=::
 	Specify output field - multiple keys can be specified in CSV format.
 	Following fields are available:
-	overhead, overhead_sys, overhead_us, sample and period.
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
 	Also it can contain any sort key(s).
 
 	By default, every sort keys not specified in -F will be appended
@@ -163,6 +163,11 @@
 
 	Default: fractal,0.5,callee,function.
 
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires callchains are recorded.
+
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
 	beyond the specified depth will be ignored. This is a trade-off

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index dcfa54c..180ae02 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt

@@ -119,7 +119,7 @@
 --fields=::
 	Specify output field - multiple keys can be specified in CSV format.
 	Following fields are available:
-	overhead, overhead_sys, overhead_us, sample and period.
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
 	Also it can contain any sort key(s).
 
 	By default, every sort keys not specified in --field will be appended
@@ -161,6 +161,12 @@
 	Setup and enable call-graph (stack chain/backtrace) recording,
 	implies -g.
 
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires -g/--call-graph option
+	enabled.
+
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
 	beyond the specified depth will be ignored. This is a trade-off

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 02f0a4d..ae20edf 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -400,6 +400,7 @@
 LIB_OBJS += $(OUTPUT)tests/hists_link.o
 LIB_OBJS += $(OUTPUT)tests/hists_filter.o
 LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
 LIB_OBJS += $(OUTPUT)tests/python-use.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
@@ -788,8 +789,8 @@
 	@echo ''
 	@echo 'Perf install targets:'
 	@echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
-	@echo '  HINT: use "make prefix=<path> <install target>" to install to a particular'
-	@echo '        path like make prefix=/usr/local install install-doc'
+	@echo '  HINT: use "prefix" or "DESTDIR" to install to a particular'
+	@echo '        path like "make prefix=/usr/local install install-doc"'
 	@echo '  install	- install compiled binaries'
 	@echo '  install-doc	- install *all* documentation'
 	@echo '  install-man	- install manpage documentation'
@@ -814,17 +815,20 @@
 $(DOC_TARGETS):
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
 
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
 TAGS:
 	$(RM) TAGS
-	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
 
 tags:
 	$(RM) tags
-	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
 
 cscope:
 	$(RM) cscope*
-	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
 
 ### Detect prefix changes
 TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index d30d2c2..1ec429f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -65,12 +65,13 @@
 		return 0;
 	}
 
-	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
+	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+				true);
 	if (he == NULL)
 		return -ENOMEM;
 
 	ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+	hists__inc_nr_samples(&evsel->hists, true);
 	return ret;
 }
 

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8bff543..9a5a035 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -315,7 +315,7 @@
 			    u64 weight, u64 transaction)
 {
 	if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
-			       transaction) != NULL)
+			       transaction, true) != NULL)
 		return 0;
 	return -ENOMEM;
 }

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e4c85b8..378b85b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -454,7 +454,11 @@
 			if (done)
 				break;
 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
-			if (err < 0 && errno == EINTR)
+			/*
+			 * Propagate error, only if there's any. Ignore positive
+			 * number of returned events and interrupt error.
+			 */
+			if (err > 0 || (err < 0 && errno == EINTR))
 				err = 0;
 			waking++;
 		}
@@ -544,6 +548,7 @@
 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
 	BRANCH_END
 };
 

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bc0eec1..21d830b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -72,6 +72,10 @@
 		rep->min_percent = strtof(value, NULL);
 		return 0;
 	}
+	if (!strcmp(var, "report.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
 
 	return perf_default_config(var, value, cb);
 }
@@ -85,156 +89,52 @@
 	 */
 	if (he->stat.nr_events == 1)
 		rep->nr_entries++;
-
-	/*
-	 * Only counts number of samples at this stage as it's more
-	 * natural to do it here and non-sample events are also
-	 * counted in perf_session_deliver_event().  The dump_trace
-	 * requires this info is ready before going to the output tree.
-	 */
-	hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-	if (!he->filtered)
-		he->hists->stats.nr_non_filtered_samples++;
 }
 
-static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
-				      struct perf_sample *sample, struct perf_evsel *evsel)
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al, bool single,
+				      void *arg)
 {
-	struct symbol *parent = NULL;
-	struct hist_entry *he;
-	struct mem_info *mi, *mx;
-	uint64_t cost;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
+	int err = 0;
+	struct report *rep = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct mem_info *mi;
+	struct branch_info *bi;
 
-	if (err)
-		return err;
+	report__inc_stats(rep, he);
 
-	mi = sample__resolve_mem(sample, al);
-	if (!mi)
-		return -ENOMEM;
-
-	if (rep->hide_unresolved && !al->sym)
+	if (!ui__has_annotation())
 		return 0;
 
-	cost = sample->weight;
-	if (!cost)
-		cost = 1;
-
-	/*
-	 * must pass period=weight in order to get the correct
-	 * sorting from hists__collapse_resort() which is solely
-	 * based on periods. We want sorting be done on nr_events * weight
-	 * and this is indirectly achieved by passing period=weight here
-	 * and the he_stat__add_period() function.
-	 */
-	he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
-				cost, cost, 0);
-	if (!he)
-		return -ENOMEM;
-
-	if (ui__has_annotation()) {
-		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+	if (sort__mode == SORT_MODE__BRANCH) {
+		bi = he->branch_info;
+		err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
 		if (err)
 			goto out;
 
-		mx = he->mem_info;
-		err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
+
+	} else if (rep->mem_mode) {
+		mi = he->mem_info;
+		err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
 		if (err)
 			goto out;
-	}
 
-	report__inc_stats(rep, he);
-
-	err = hist_entry__append_callchain(he, sample);
-out:
-	return err;
-}
-
-static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
-					 struct perf_sample *sample, struct perf_evsel *evsel)
-{
-	struct symbol *parent = NULL;
-	unsigned i;
-	struct hist_entry *he;
-	struct branch_info *bi, *bx;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-	if (err)
-		return err;
-
-	bi = sample__resolve_bstack(sample, al);
-	if (!bi)
-		return -ENOMEM;
-
-	for (i = 0; i < sample->branch_stack->nr; i++) {
-		if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
-			continue;
-
-		err = -ENOMEM;
-
-		/* overwrite the 'al' to branch-to info */
-		al->map = bi[i].to.map;
-		al->sym = bi[i].to.sym;
-		al->addr = bi[i].to.addr;
-		/*
-		 * The report shows the percentage of total branches captured
-		 * and not events sampled. Thus we use a pseudo period of 1.
-		 */
-		he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
-					1, 1, 0);
-		if (he) {
-			if (ui__has_annotation()) {
-				bx = he->branch_info;
-				err = addr_map_symbol__inc_samples(&bx->from,
-								   evsel->idx);
-				if (err)
-					goto out;
-
-				err = addr_map_symbol__inc_samples(&bx->to,
-								   evsel->idx);
-				if (err)
-					goto out;
-			}
-			report__inc_stats(rep, he);
-		} else
-			goto out;
-	}
-	err = 0;
-out:
-	free(bi);
-	return err;
-}
-
-static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
-				  struct addr_location *al, struct perf_sample *sample)
-{
-	struct symbol *parent = NULL;
-	struct hist_entry *he;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-	if (err)
-		return err;
-
-	he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
-				sample->period, sample->weight,
-				sample->transaction);
-	if (he == NULL)
-		return -ENOMEM;
-
-	err = hist_entry__append_callchain(he, sample);
-	if (err)
-		goto out;
-
-	if (ui__has_annotation())
 		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
 
-	report__inc_stats(rep, he);
+	} else if (symbol_conf.cumulate_callchain) {
+		if (single)
+			err = hist_entry__inc_addr_samples(he, evsel->idx,
+							   al->addr);
+	} else {
+		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+	}
 
 out:
 	return err;
 }
 
-
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
@@ -243,6 +143,10 @@
 {
 	struct report *rep = container_of(tool, struct report, tool);
 	struct addr_location al;
+	struct hist_entry_iter iter = {
+		.hide_unresolved = rep->hide_unresolved,
+		.add_entry_cb = hist_iter__report_callback,
+	};
 	int ret;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -257,22 +161,23 @@
 	if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
 		return 0;
 
-	if (sort__mode == SORT_MODE__BRANCH) {
-		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
-		if (ret < 0)
-			pr_debug("problem adding lbr entry, skipping event\n");
-	} else if (rep->mem_mode == 1) {
-		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
-		if (ret < 0)
-			pr_debug("problem adding mem entry, skipping event\n");
-	} else {
-		if (al.map != NULL)
-			al.map->dso->hit = 1;
+	if (sort__mode == SORT_MODE__BRANCH)
+		iter.ops = &hist_iter_branch;
+	else if (rep->mem_mode)
+		iter.ops = &hist_iter_mem;
+	else if (symbol_conf.cumulate_callchain)
+		iter.ops = &hist_iter_cumulative;
+	else
+		iter.ops = &hist_iter_normal;
 
-		ret = report__add_hist_entry(rep, evsel, &al, sample);
-		if (ret < 0)
-			pr_debug("problem incrementing symbol period, skipping event\n");
-	}
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
+				   rep);
+	if (ret < 0)
+		pr_debug("problem adding hist entry, skipping event\n");
+
 	return ret;
 }
 
@@ -329,6 +234,14 @@
 			}
 	}
 
+	if (symbol_conf.cumulate_callchain) {
+		/* Silently ignore if callchain is missing */
+		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			symbol_conf.cumulate_callchain = false;
+			perf_hpp__cancel_cumulate();
+		}
+	}
+
 	if (sort__mode == SORT_MODE__BRANCH) {
 		if (!is_pipe &&
 		    !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -712,6 +625,8 @@
 	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
 		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
 		     "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
 	OPT_INTEGER(0, "max-stack", &report.max_stack,
 		    "Set the maximum stack depth when parsing the callchain, "
 		    "anything beyond the specified depth will be ignored. "
@@ -804,8 +719,10 @@
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
-	if (branch_mode == -1 && has_br_stack)
+	if (branch_mode == -1 && has_br_stack) {
 		sort__mode = SORT_MODE__BRANCH;
+		symbol_conf.cumulate_callchain = false;
+	}
 
 	if (report.mem_mode) {
 		if (sort__mode == SORT_MODE__BRANCH) {
@@ -813,6 +730,7 @@
 			goto error;
 		}
 		sort__mode = SORT_MODE__MEMORY;
+		symbol_conf.cumulate_callchain = false;
 	}
 
 	if (setup_sorting() < 0) {

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d717683..c38d06c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c

@@ -1428,7 +1428,7 @@
 	int err = 0;
 
 	evsel->hists.stats.total_period += sample->period;
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+	hists__inc_nr_samples(&evsel->hists, true);
 
 	if (evsel->handler != NULL) {
 		tracepoint_handler f = evsel->handler;

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5b389ce..377971d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -196,6 +196,12 @@
 
 	pthread_mutex_unlock(&notes->lock);
 
+	/*
+	 * This function is now called with he->hists->lock held.
+	 * Release it before going to sleep.
+	 */
+	pthread_mutex_unlock(&he->hists->lock);
+
 	if (err == -ERANGE && !he->ms.map->erange_warned)
 		ui__warn_map_erange(he->ms.map, sym, ip);
 	else if (err == -ENOMEM) {
@@ -203,6 +209,8 @@
 		       sym->name);
 		sleep(1);
 	}
+
+	pthread_mutex_lock(&he->hists->lock);
 }
 
 static void perf_top__show_details(struct perf_top *top)
@@ -238,27 +246,6 @@
 	pthread_mutex_unlock(&notes->lock);
 }
 
-static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
-						     struct addr_location *al,
-						     struct perf_sample *sample)
-{
-	struct hist_entry *he;
-
-	pthread_mutex_lock(&evsel->hists.lock);
-	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
-				sample->period, sample->weight,
-				sample->transaction);
-	pthread_mutex_unlock(&evsel->hists.lock);
-	if (he == NULL)
-		return NULL;
-
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
-	if (!he->filtered)
-		evsel->hists.stats.nr_non_filtered_samples++;
-
-	return he;
-}
-
 static void perf_top__print_sym_table(struct perf_top *top)
 {
 	char bf[160];
@@ -662,6 +649,26 @@
 	return 0;
 }
 
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+				   struct addr_location *al, bool single,
+				   void *arg)
+{
+	struct perf_top *top = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+
+	if (sort__has_sym && single) {
+		u64 ip = al->addr;
+
+		if (al->map)
+			ip = al->map->unmap_ip(al->map, ip);
+
+		perf_top__record_precise_ip(top, he, evsel->idx, ip);
+	}
+
+	return 0;
+}
+
 static void perf_event__process_sample(struct perf_tool *tool,
 				       const union perf_event *event,
 				       struct perf_evsel *evsel,
@@ -669,8 +676,6 @@
 				       struct machine *machine)
 {
 	struct perf_top *top = container_of(tool, struct perf_top, tool);
-	struct symbol *parent = NULL;
-	u64 ip = sample->ip;
 	struct addr_location al;
 	int err;
 
@@ -745,25 +750,23 @@
 	}
 
 	if (al.sym == NULL || !al.sym->ignore) {
-		struct hist_entry *he;
+		struct hist_entry_iter iter = {
+			.add_entry_cb = hist_iter__top_callback,
+		};
 
-		err = sample__resolve_callchain(sample, &parent, evsel, &al,
-						top->max_stack);
-		if (err)
-			return;
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
 
-		he = perf_evsel__add_hist_entry(evsel, &al, sample);
-		if (he == NULL) {
+		pthread_mutex_lock(&evsel->hists.lock);
+
+		err = hist_entry_iter__add(&iter, &al, evsel, sample,
+					   top->max_stack, top);
+		if (err < 0)
 			pr_err("Problem incrementing symbol period, skipping event\n");
-			return;
-		}
 
-		err = hist_entry__append_callchain(he, sample);
-		if (err)
-			return;
-
-		if (sort__has_sym)
-			perf_top__record_precise_ip(top, he, evsel->idx, ip);
+		pthread_mutex_unlock(&evsel->hists.lock);
 	}
 
 	return;
@@ -1001,6 +1004,10 @@
 
 	if (!strcmp(var, "top.call-graph"))
 		return record_parse_callchain(value, &top->record_opts);
+	if (!strcmp(var, "top.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
 
 	return perf_default_config(var, value, cb);
 }
@@ -1095,6 +1102,8 @@
 	OPT_CALLBACK(0, "call-graph", &top.record_opts,
 		     "mode[,dump_size]", record_callchain_help,
 		     &parse_callchain_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
 	OPT_INTEGER(0, "max-stack", &top.max_stack,
 		    "Set the maximum stack depth when parsing the callchain. "
 		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
@@ -1200,6 +1209,11 @@
 
 	top.sym_evsel = perf_evlist__first(top.evlist);
 
+	if (!symbol_conf.use_callchain) {
+		symbol_conf.cumulate_callchain = false;
+		perf_hpp__cancel_cumulate();
+	}
+
 	symbol_conf.priv_size = sizeof(struct annotation);
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 729bbdf..4f100b5 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile

@@ -447,6 +447,7 @@
   ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
+    msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -599,7 +600,7 @@
 
 # Make the path relative to DESTDIR, not to prefix
 ifndef DESTDIR
-prefix = $(HOME)
+prefix ?= $(HOME)
 endif
 bindir_relative = bin
 bindir = $(prefix)/$(bindir_relative)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 431798a..78f7b92 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c

@@ -481,14 +481,18 @@
 		fprintf(stderr, "cannot handle %s internally", cmd);
 		goto out;
 	}
-#ifdef HAVE_LIBAUDIT_SUPPORT
 	if (!prefixcmp(cmd, "trace")) {
+#ifdef HAVE_LIBAUDIT_SUPPORT
 		set_buildid_dir();
 		setup_path();
 		argv[0] = "trace";
 		return cmd_trace(argc, argv, NULL);
-	}
+#else
+		fprintf(stderr,
+			"trace command not available: missing audit-libs devel package at build time.\n");
+		goto out;
 #endif
+	}
 	/* Look for flags.. */
 	argv++;
 	argc--;

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 831f52c..802e3cd 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c

@@ -140,6 +140,10 @@
 		.func = test__hists_output,
 	},
 	{
+		.desc = "Test cumulation of child hist entries",
+		.func = test__hists_cumulate,
+	},
+	{
 		.func = NULL,
 	},
 };

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index e4e01aad..a62c091 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c

@@ -12,9 +12,9 @@
 	u32 pid;
 	const char *comm;
 } fake_threads[] = {
-	{ 100, "perf" },
-	{ 200, "perf" },
-	{ 300, "bash" },
+	{ FAKE_PID_PERF1, "perf" },
+	{ FAKE_PID_PERF2, "perf" },
+	{ FAKE_PID_BASH,  "bash" },
 };
 
 static struct {
@@ -22,15 +22,15 @@
 	u64 start;
 	const char *filename;
 } fake_mmap_info[] = {
-	{ 100, 0x40000, "perf" },
-	{ 100, 0x50000, "libc" },
-	{ 100, 0xf0000, "[kernel]" },
-	{ 200, 0x40000, "perf" },
-	{ 200, 0x50000, "libc" },
-	{ 200, 0xf0000, "[kernel]" },
-	{ 300, 0x40000, "bash" },
-	{ 300, 0x50000, "libc" },
-	{ 300, 0xf0000, "[kernel]" },
+	{ FAKE_PID_PERF1, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF1, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_PERF2, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF2, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_BASH,  FAKE_MAP_BASH,   "bash" },
+	{ FAKE_PID_BASH,  FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_BASH,  FAKE_MAP_KERNEL, "[kernel]" },
 };
 
 struct fake_sym {
@@ -40,27 +40,30 @@
 };
 
 static struct fake_sym perf_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "run_command" },
-	{ 900, 100, "cmd_record" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
 };
 
 static struct fake_sym bash_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "xmalloc" },
-	{ 900, 100, "xfree" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
 };
 
 static struct fake_sym libc_syms[] = {
 	{ 700, 100, "malloc" },
 	{ 800, 100, "free" },
 	{ 900, 100, "realloc" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
 };
 
 static struct fake_sym kernel_syms[] = {
-	{ 700, 100, "schedule" },
-	{ 800, 100, "page_fault" },
-	{ 900, 100, "sys_perf_event_open" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
 };
 
 static struct {
@@ -102,7 +105,7 @@
 				.pid = fake_mmap_info[i].pid,
 				.tid = fake_mmap_info[i].pid,
 				.start = fake_mmap_info[i].start,
-				.len = 0x1000ULL,
+				.len = FAKE_MAP_LENGTH,
 				.pgoff = 0ULL,
 			},
 		};
@@ -193,10 +196,11 @@
 		he = rb_entry(node, struct hist_entry, rb_node);
 
 		if (!he->filtered) {
-			pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
+			pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
 				i, thread__comm_str(he->thread), he->thread->tid,
 				he->ms.map->dso->short_name,
-				he->ms.sym->name, he->stat.period);
+				he->ms.sym->name, he->stat.period,
+				he->stat_acc ? he->stat_acc->period : 0);
 		}
 
 		i++;

diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h
index 1415ae6..888254e 100644
--- a/tools/perf/tests/hists_common.h
+++ b/tools/perf/tests/hists_common.h

@@ -4,6 +4,34 @@
 struct machine;
 struct machines;
 
+#define FAKE_PID_PERF1  100
+#define FAKE_PID_PERF2  200
+#define FAKE_PID_BASH   300
+
+#define FAKE_MAP_PERF    0x400000
+#define FAKE_MAP_BASH    0x400000
+#define FAKE_MAP_LIBC    0x500000
+#define FAKE_MAP_KERNEL  0xf00000
+#define FAKE_MAP_LENGTH  0x100000
+
+#define FAKE_SYM_OFFSET1  700
+#define FAKE_SYM_OFFSET2  800
+#define FAKE_SYM_OFFSET3  900
+#define FAKE_SYM_LENGTH   100
+
+#define FAKE_IP_PERF_MAIN  FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND  FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD  FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN  FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC  FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE  FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE  FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
 /*
  * The setup_fake_machine() provides a test environment which consists
  * of 3 processes that have 3 mappings and in turn, have 3 symbols
@@ -13,7 +41,7 @@
  * .............  .............  ...................
  *    perf:  100           perf  main
  *    perf:  100           perf  run_command
- *    perf:  100           perf  comd_record
+ *    perf:  100           perf  cmd_record
  *    perf:  100           libc  malloc
  *    perf:  100           libc  free
  *    perf:  100           libc  realloc
@@ -22,7 +50,7 @@
  *    perf:  100       [kernel]  sys_perf_event_open
  *    perf:  200           perf  main
  *    perf:  200           perf  run_command
- *    perf:  200           perf  comd_record
+ *    perf:  200           perf  cmd_record
  *    perf:  200           libc  malloc
  *    perf:  200           libc  free
  *    perf:  200           libc  realloc

diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644
index 0000000..0ac240d
--- /dev/null
+++ b/tools/perf/tests/hists_cumulate.c

@@ -0,0 +1,726 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [perf]   cmd_record() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+	/* perf [libc]   malloc() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+	/* perf [libc]   free() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [kernel] page_fault() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+	/* bash [bash]   main() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
+	/* bash [bash]   xmalloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+	/* bash [kernel] page_fault() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+	/*   schedule => run_command => main */
+	{ 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main  */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   cmd_record => run_command => main */
+	{ 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   malloc => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   free => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   page_fault => sys_perf_event_open => run_command => main */
+	{ 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+	     FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_BASH_MAIN, },
+	/*   xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+	{ 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+	     FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+	/*   page_fault => malloc => main */
+	{ 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+	struct addr_location al;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	struct perf_sample sample = { .period = 1000, };
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+		const union perf_event event = {
+			.header = {
+				.misc = PERF_RECORD_MISC_USER,
+			},
+		};
+		struct hist_entry_iter iter = {
+			.hide_unresolved = false,
+		};
+
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
+
+		sample.pid = fake_samples[i].pid;
+		sample.tid = fake_samples[i].pid;
+		sample.ip = fake_samples[i].ip;
+		sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+		if (perf_event__preprocess_sample(&event, machine, &al,
+						  &sample) < 0)
+			goto out;
+
+		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+					 PERF_MAX_STACK_DEPTH, NULL) < 0)
+			goto out;
+
+		fake_samples[i].thread = al.thread;
+		fake_samples[i].map = al.map;
+		fake_samples[i].sym = al.sym;
+	}
+
+	return TEST_OK;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+	struct hist_entry *he;
+	struct rb_root *root_in;
+	struct rb_root *root_out;
+	struct rb_node *node;
+
+	if (sort__need_collapse)
+		root_in = &hists->entries_collapsed;
+	else
+		root_in = hists->entries_in;
+
+	root_out = &hists->entries;
+
+	while (!RB_EMPTY_ROOT(root_out)) {
+		node = rb_first(root_out);
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		rb_erase(node, root_out);
+		rb_erase(&he->rb_node_in, root_in);
+		hist_entry__free(he);
+	}
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he)  (thread__comm_str(he->thread))
+#define DSO(he)   (he->ms.map->dso->short_name)
+#define SYM(he)   (he->ms.sym->name)
+#define CPU(he)   (he->cpu)
+#define PID(he)   (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl)  (cl->ms.map->dso->short_name)
+#define CSYM(cl)  (cl->ms.sym->name)
+
+struct result {
+	u64 children;
+	u64 self;
+	const char *comm;
+	const char *dso;
+	const char *sym;
+};
+
+struct callchain_result {
+	u64 nr;
+	struct {
+		const char *dso;
+		const char *sym;
+	} node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+		   struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+	char buf[32];
+	size_t i, c;
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+	struct callchain_node *cnode;
+	struct callchain_list *clist;
+
+	/*
+	 * adding and deleting hist entries must be done outside of this
+	 * function since TEST_ASSERT_VAL() returns in case of failure.
+	 */
+	hists__collapse_resort(hists, NULL);
+	hists__output_resort(hists);
+
+	if (verbose > 2) {
+		pr_info("use callchain: %d, cumulate callchain: %d\n",
+			symbol_conf.use_callchain,
+			symbol_conf.cumulate_callchain);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	for (node = rb_first(root), i = 0;
+	     node && (he = rb_entry(node, struct hist_entry, rb_node));
+	     node = rb_next(node), i++) {
+		scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+		TEST_ASSERT_VAL("Incorrect number of hist entry",
+				i < nr_expected);
+		TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+				!strcmp(COMM(he), expected[i].comm) &&
+				!strcmp(DSO(he), expected[i].dso) &&
+				!strcmp(SYM(he), expected[i].sym));
+
+		if (symbol_conf.cumulate_callchain)
+			TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+		if (!symbol_conf.use_callchain)
+			continue;
+
+		/* check callchain entries */
+		root = &he->callchain->node.rb_root;
+		cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+		c = 0;
+		list_for_each_entry(clist, &cnode->val, list) {
+			scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+			TEST_ASSERT_VAL("Incorrect number of callchain entry",
+					c < expected_callchain[i].nr);
+			TEST_ASSERT_VAL(buf,
+				!strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+				!strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+			c++;
+		}
+		/* TODO: handle multiple child nodes properly */
+		TEST_ASSERT_VAL("Incorrect number of callchain entry",
+				c <= expected_callchain[i].nr);
+	}
+	TEST_ASSERT_VAL("Incorrect number of hist entry",
+			i == nr_expected);
+	TEST_ASSERT_VAL("Incorrect number of callchain entry",
+			!symbol_conf.use_callchain || nr_expected == nr_callchain);
+	return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%     bash  bash           [.] main
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     perf  libc           [.] free
+	 *   10.00%     perf  libc           [.] malloc
+	 *   10.00%     perf  perf           [.] cmd_record
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = false;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = false;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = true;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *              |
+	 *              --- run_command
+	 *                  main
+	 *
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                 |
+	 *                 |--50.00%-- xmalloc
+	 *                 |           main
+	 *                  --50.00%-- main
+	 *
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 *              |
+	 *              --- sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			2, {	{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = true;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+int test__hists_cumulate(void)
+{
+	int err = TEST_FAIL;
+	struct machines machines;
+	struct machine *machine;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+	size_t i;
+	test_fn_t testcases[] = {
+		test1,
+		test2,
+		test3,
+		test4,
+	};
+
+	TEST_ASSERT_VAL("No memory", evlist);
+
+	err = parse_events(evlist, "cpu-clock");
+	if (err)
+		goto out;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	evsel = perf_evlist__first(evlist);
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		err = testcases[i](evsel, machine);
+		if (err < 0)
+			break;
+	}
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	machines__exit(&machines);
+
+	return err;
+}

diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index c5ba924..821f581 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c

@@ -21,33 +21,33 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .pid = 100, .ip = 0xf0000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .pid = 100, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [libc]   malloc() */
-	{ .pid = 100, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 	/* perf [perf]   main() */
-	{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
 	/* perf [perf]   cmd_record() */
-	{ .pid = 200, .ip = 0x40000 + 900, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* perf [kernel] page_fault() */
-	{ .pid = 200, .ip = 0xf0000 + 800, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	/* bash [bash]   main() */
-	{ .pid = 300, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
 	/* bash [bash]   xmalloc() */
-	{ .pid = 300, .ip = 0x40000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [libc]   malloc() */
-	{ .pid = 300, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
 	/* bash [kernel] page_fault() */
-	{ .pid = 300, .ip = 0xf0000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
-static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+static int add_hist_entries(struct perf_evlist *evlist,
+			    struct machine *machine __maybe_unused)
 {
 	struct perf_evsel *evsel;
 	struct addr_location al;
-	struct hist_entry *he;
-	struct perf_sample sample = { .cpu = 0, };
+	struct perf_sample sample = { .period = 100, };
 	size_t i;
 
 	/*
@@ -62,6 +62,10 @@
 					.misc = PERF_RECORD_MISC_USER,
 				},
 			};
+			struct hist_entry_iter iter = {
+				.ops = &hist_iter_normal,
+				.hide_unresolved = false,
+			};
 
 			/* make sure it has no filter at first */
 			evsel->hists.thread_filter = NULL;
@@ -76,18 +80,13 @@
 							  &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 100, 1, 0);
-			if (he == NULL)
+			if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+						 PERF_MAX_STACK_DEPTH, NULL) < 0)
 				goto out;
 
 			fake_samples[i].thread = al.thread;
 			fake_samples[i].map = al.map;
 			fake_samples[i].sym = al.sym;
-
-			hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-			if (!he->filtered)
-				he->hists->stats.nr_non_filtered_samples++;
 		}
 	}
 

diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 5ffa2c3..d4b34b0 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c

@@ -21,41 +21,41 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_common_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .pid = 100, .ip = 0xf0000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .pid = 200, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [perf]   cmd_record() */
-	{ .pid = 200, .ip = 0x40000 + 900, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* bash [bash]   xmalloc() */
-	{ .pid = 300, .ip = 0x40000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [libc]   malloc() */
-	{ .pid = 300, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
 };
 
 static struct sample fake_samples[][5] = {
 	{
 		/* perf [perf]   run_command() */
-		{ .pid = 100, .ip = 0x40000 + 800, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
 		/* perf [libc]   malloc() */
-		{ .pid = 100, .ip = 0x50000 + 700, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 		/* perf [kernel] page_fault() */
-		{ .pid = 100, .ip = 0xf0000 + 800, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 		/* perf [kernel] sys_perf_event_open() */
-		{ .pid = 200, .ip = 0xf0000 + 900, },
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
 		/* bash [libc]   free() */
-		{ .pid = 300, .ip = 0x50000 + 800, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_FREE, },
 	},
 	{
 		/* perf [libc]   free() */
-		{ .pid = 200, .ip = 0x50000 + 800, },
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
 		/* bash [libc]   malloc() */
-		{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
 		/* bash [bash]   xfee() */
-		{ .pid = 300, .ip = 0x40000 + 900, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XFREE, },
 		/* bash [libc]   realloc() */
-		{ .pid = 300, .ip = 0x50000 + 900, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_REALLOC, },
 		/* bash [kernel] page_fault() */
-		{ .pid = 300, .ip = 0xf0000 + 800, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	},
 };
 
@@ -64,7 +64,7 @@
 	struct perf_evsel *evsel;
 	struct addr_location al;
 	struct hist_entry *he;
-	struct perf_sample sample = { .cpu = 0, };
+	struct perf_sample sample = { .period = 1, };
 	size_t i = 0, k;
 
 	/*
@@ -88,7 +88,7 @@
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 1, 1, 0);
+						NULL, NULL, 1, 1, 0, true);
 			if (he == NULL)
 				goto out;
 
@@ -112,7 +112,7 @@
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 1, 1, 0);
+						NULL, NULL, 1, 1, 0, true);
 			if (he == NULL)
 				goto out;
 

diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index a168505..e3bbd6c 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c

@@ -22,31 +22,31 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, },
+	{ .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .cpu = 1, .pid = 100, .ip = 0x40000 + 700, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [perf]   cmd_record() */
-	{ .cpu = 1, .pid = 100, .ip = 0x40000 + 900, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* perf [libc]   malloc() */
-	{ .cpu = 1, .pid = 100, .ip = 0x50000 + 700, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 	/* perf [libc]   free() */
-	{ .cpu = 2, .pid = 100, .ip = 0x50000 + 800, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
 	/* perf [perf]   main() */
-	{ .cpu = 2, .pid = 200, .ip = 0x40000 + 700, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [kernel] page_fault() */
-	{ .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	/* bash [bash]   main() */
-	{ .cpu = 3, .pid = 300, .ip = 0x40000 + 700, },
+	{ .cpu = 3, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
 	/* bash [bash]   xmalloc() */
-	{ .cpu = 0, .pid = 300, .ip = 0x40000 + 800, },
+	{ .cpu = 0, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [kernel] page_fault() */
-	{ .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, },
+	{ .cpu = 1, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
 static int add_hist_entries(struct hists *hists, struct machine *machine)
 {
 	struct addr_location al;
-	struct hist_entry *he;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
 	struct perf_sample sample = { .period = 100, };
 	size_t i;
 
@@ -56,6 +56,10 @@
 				.misc = PERF_RECORD_MISC_USER,
 			},
 		};
+		struct hist_entry_iter iter = {
+			.ops = &hist_iter_normal,
+			.hide_unresolved = false,
+		};
 
 		sample.cpu = fake_samples[i].cpu;
 		sample.pid = fake_samples[i].pid;
@@ -66,9 +70,8 @@
 						  &sample) < 0)
 			goto out;
 
-		he = __hists__add_entry(hists, &al, NULL, NULL, NULL,
-					sample.period, 1, 0);
-		if (he == NULL)
+		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+					 PERF_MAX_STACK_DEPTH, NULL) < 0)
 			goto out;
 
 		fake_samples[i].thread = al.thread;

diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d76c0e2..022bb68 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h

@@ -45,6 +45,7 @@
 int test__mmap_thread_lookup(void);
 int test__thread_mg_share(void);
 int test__hists_output(void);
+int test__hists_cumulate(void);
 
 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index d11541d..3ccf6e1 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c

@@ -194,7 +194,7 @@
 		ui_helpline__vpush(format, args);
 		va_end(args);
 	} else {
-		while ((key == ui__question_window("Warning!", text,
+		while ((key = ui__question_window("Warning!", text,
 						   "Press any key...",
 						   timeout)) == K_RESIZE)
 			ui_browser__handle_resize(browser);

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 1c331b9..52c03fb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -37,7 +37,6 @@
 static void hist_browser__update_nr_entries(struct hist_browser *hb);
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-					     struct hists *hists,
 					     float min_pcnt);
 
 static bool hist_browser__has_filter(struct hist_browser *hb)
@@ -319,7 +318,7 @@
 	struct hists *hists = browser->hists;
 
 	for (nd = rb_first(&hists->entries);
-	     (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL;
+	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
 	     nd = rb_next(nd)) {
 		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
 		hist_entry__set_folding(he, unfold);
@@ -651,13 +650,36 @@
 			  __hpp__slsmg_color_printf, true);		\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)			\
+static u64 __hpp_get_acc_##_field(struct hist_entry *he)		\
+{									\
+	return he->stat_acc->_field;					\
+}									\
+									\
+static int								\
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
+				struct perf_hpp *hpp,			\
+				struct hist_entry *he)			\
+{									\
+	if (!symbol_conf.cumulate_callchain) {				\
+		int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A");	\
+		slsmg_printf("%s", hpp->buf);				\
+									\
+		return ret;						\
+	}								\
+	return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%",	\
+			  __hpp__slsmg_color_printf, true);		\
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
 
 void hist_browser__init_hpp(void)
 {
@@ -671,6 +693,8 @@
 				hist_browser__hpp_color_overhead_guest_sys;
 	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
 				hist_browser__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				hist_browser__hpp_color_overhead_acc;
 }
 
 static int hist_browser__show_entry(struct hist_browser *browser,
@@ -783,15 +807,12 @@
 
 	for (nd = browser->top; nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(h->hists);
-		float percent = 0.0;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
-		if (total)
-			percent = h->stat.period * 100.0 / total;
-
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < hb->min_pcnt)
 			continue;
 
@@ -804,16 +825,11 @@
 }
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-					     struct hists *hists,
 					     float min_pcnt)
 {
 	while (nd != NULL) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(hists);
-		float percent = 0.0;
-
-		if (total)
-			percent = h->stat.period * 100.0 / total;
+		float percent = hist_entry__get_percent_limit(h);
 
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
@@ -825,16 +841,11 @@
 }
 
 static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
-						  struct hists *hists,
 						  float min_pcnt)
 {
 	while (nd != NULL) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(hists);
-		float percent = 0.0;
-
-		if (total)
-			percent = h->stat.period * 100.0 / total;
+		float percent = hist_entry__get_percent_limit(h);
 
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
@@ -863,14 +874,14 @@
 	switch (whence) {
 	case SEEK_SET:
 		nd = hists__filter_entries(rb_first(browser->entries),
-					   hb->hists, hb->min_pcnt);
+					   hb->min_pcnt);
 		break;
 	case SEEK_CUR:
 		nd = browser->top;
 		goto do_offset;
 	case SEEK_END:
 		nd = hists__filter_prev_entries(rb_last(browser->entries),
-						hb->hists, hb->min_pcnt);
+						hb->min_pcnt);
 		first = false;
 		break;
 	default:
@@ -913,8 +924,7 @@
 					break;
 				}
 			}
-			nd = hists__filter_entries(rb_next(nd), hb->hists,
-						   hb->min_pcnt);
+			nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
 			if (nd == NULL)
 				break;
 			--offset;
@@ -947,7 +957,7 @@
 				}
 			}
 
-			nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
+			nd = hists__filter_prev_entries(rb_prev(nd),
 							hb->min_pcnt);
 			if (nd == NULL)
 				break;
@@ -1126,7 +1136,6 @@
 static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 {
 	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
-						   browser->hists,
 						   browser->min_pcnt);
 	int printed = 0;
 
@@ -1134,8 +1143,7 @@
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 		printed += hist_browser__fprintf_entry(browser, h, fp);
-		nd = hists__filter_entries(rb_next(nd), browser->hists,
-					   browser->min_pcnt);
+		nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
 	}
 
 	return printed;
@@ -1372,8 +1380,7 @@
 		return;
 	}
 
-	while ((nd = hists__filter_entries(nd, hb->hists,
-					   hb->min_pcnt)) != NULL) {
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		nr_entries++;
 		nd = rb_next(nd);
 	}
@@ -1699,14 +1706,14 @@
 zoom_out_dso:
 				ui_helpline__pop();
 				browser->hists->dso_filter = NULL;
-				sort_dso.elide = false;
+				perf_hpp__set_elide(HISTC_DSO, false);
 			} else {
 				if (dso == NULL)
 					continue;
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
 						   dso->kernel ? "the Kernel" : dso->short_name);
 				browser->hists->dso_filter = dso;
-				sort_dso.elide = true;
+				perf_hpp__set_elide(HISTC_DSO, true);
 				pstack__push(fstack, &browser->hists->dso_filter);
 			}
 			hists__filter_by_dso(hists);
@@ -1718,13 +1725,13 @@
 zoom_out_thread:
 				ui_helpline__pop();
 				browser->hists->thread_filter = NULL;
-				sort_thread.elide = false;
+				perf_hpp__set_elide(HISTC_THREAD, false);
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
 				browser->hists->thread_filter = thread;
-				sort_thread.elide = true;
+				perf_hpp__set_elide(HISTC_THREAD, false);
 				pstack__push(fstack, &browser->hists->thread_filter);
 			}
 			hists__filter_by_thread(hists);

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 9d90683..6ca60e4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c

@@ -47,11 +47,26 @@
 			  __percent_color_snprintf, true);			\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+				       struct perf_hpp *hpp,			\
+				       struct hist_entry *he)			\
+{										\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",		\
+			      __percent_color_snprintf, true);			\
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
 
@@ -68,6 +83,8 @@
 				perf_gtk__hpp_color_overhead_guest_sys;
 	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
 				perf_gtk__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				perf_gtk__hpp_color_overhead_acc;
 }
 
 static void callchain_list__sym_name(struct callchain_list *cl,
@@ -181,6 +198,13 @@
 		if (perf_hpp__should_skip(fmt))
 			continue;
 
+		/*
+		 * XXX no way to determine where symcol column is..
+		 *     Just use last column for now.
+		 */
+		if (perf_hpp__is_sort_entry(fmt))
+			sym_col = col_idx;
+
 		fmt->header(fmt, &hpp, hists_to_evsel(hists));
 
 		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
@@ -209,14 +233,12 @@
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		GtkTreeIter iter;
 		u64 total = hists__total_period(h->hists);
-		float percent = 0.0;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
-		if (total)
-			percent = h->stat.period * 100.0 / total;
-
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < min_pcnt)
 			continue;
 
@@ -238,7 +260,8 @@
 
 		if (symbol_conf.use_callchain && sort__has_sym) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
-				total = h->stat.period;
+				total = symbol_conf.cumulate_callchain ?
+					h->stat_acc->period : h->stat.period;
 
 			perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
 						sym_col, total);

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4484f5b..498adb2 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -104,6 +104,18 @@
 	return ret;
 }
 
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+		   hpp_field_fn get_field, const char *fmt,
+		   hpp_snprint_fn print_fn, bool fmt_percent)
+{
+	if (!symbol_conf.cumulate_callchain) {
+		return snprintf(hpp->buf, hpp->size, "%*s",
+				fmt_percent ? 8 : 12, "N/A");
+	}
+
+	return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
+}
+
 static int field_cmp(u64 field_a, u64 field_b)
 {
 	if (field_a > field_b)
@@ -160,6 +172,24 @@
 	return ret;
 }
 
+static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
+			   hpp_field_fn get_field)
+{
+	s64 ret = 0;
+
+	if (symbol_conf.cumulate_callchain) {
+		/*
+		 * Put caller above callee when they have equal period.
+		 */
+		ret = field_cmp(get_field(a), get_field(b));
+		if (ret)
+			return ret;
+
+		ret = b->callchain->max_depth - a->callchain->max_depth;
+	}
+	return ret;
+}
+
 #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) 		\
 static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
 			       struct perf_hpp *hpp,			\
@@ -242,6 +272,34 @@
 	return __hpp__sort(a, b, he_get_##_field);				\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",		\
+			      hpp_color_scnprintf, true);			\
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%";		\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt,		\
+			      hpp_entry_scnprintf, true);			\
+}
+
+#define __HPP_SORT_ACC_FN(_type, _field)					\
+static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b)	\
+{										\
+	return __hpp__sort_acc(a, b, he_get_acc_##_field);			\
+}
+
 #define __HPP_ENTRY_RAW_FN(_type, _field)					\
 static u64 he_get_raw_##_field(struct hist_entry *he)				\
 {										\
@@ -270,18 +328,27 @@
 __HPP_ENTRY_PERCENT_FN(_type, _field)					\
 __HPP_SORT_FN(_type, _field)
 
+#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
+__HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+__HPP_SORT_ACC_FN(_type, _field)
+
 #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width)	\
 __HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
 __HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
 __HPP_ENTRY_RAW_FN(_type, _field)					\
 __HPP_SORT_RAW_FN(_type, _field)
 
+__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
 
 HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
 HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
 HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
 HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
 HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
+HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
 
 HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
 HPP_RAW_FNS(period, "Period", period, 12, 12)
@@ -303,6 +370,17 @@
 		.sort	= hpp__sort_ ## _name,		\
 	}
 
+#define HPP__COLOR_ACC_PRINT_FNS(_name)			\
+	{						\
+		.header	= hpp__header_ ## _name,	\
+		.width	= hpp__width_ ## _name,		\
+		.color	= hpp__color_ ## _name,		\
+		.entry	= hpp__entry_ ## _name,		\
+		.cmp	= hpp__nop_cmp,			\
+		.collapse = hpp__nop_cmp,		\
+		.sort	= hpp__sort_ ## _name,		\
+	}
+
 #define HPP__PRINT_FNS(_name)				\
 	{						\
 		.header	= hpp__header_ ## _name,	\
@@ -319,6 +397,7 @@
 	HPP__COLOR_PRINT_FNS(overhead_us),
 	HPP__COLOR_PRINT_FNS(overhead_guest_sys),
 	HPP__COLOR_PRINT_FNS(overhead_guest_us),
+	HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
 	HPP__PRINT_FNS(samples),
 	HPP__PRINT_FNS(period)
 };
@@ -328,16 +407,23 @@
 
 
 #undef HPP__COLOR_PRINT_FNS
+#undef HPP__COLOR_ACC_PRINT_FNS
 #undef HPP__PRINT_FNS
 
 #undef HPP_PERCENT_FNS
+#undef HPP_PERCENT_ACC_FNS
 #undef HPP_RAW_FNS
 
 #undef __HPP_HEADER_FN
 #undef __HPP_WIDTH_FN
 #undef __HPP_COLOR_PERCENT_FN
 #undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+#undef __HPP_ENTRY_ACC_PERCENT_FN
 #undef __HPP_ENTRY_RAW_FN
+#undef __HPP_SORT_FN
+#undef __HPP_SORT_ACC_FN
+#undef __HPP_SORT_RAW_FN
 
 
 void perf_hpp__init(void)
@@ -361,6 +447,13 @@
 	if (field_order)
 		return;
 
+	if (symbol_conf.cumulate_callchain) {
+		perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
+
+		perf_hpp__format[PERF_HPP__OVERHEAD].header =
+						hpp__header_overhead_self;
+	}
+
 	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 
 	if (symbol_conf.show_cpu_utilization) {
@@ -383,6 +476,12 @@
 	list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
 	if (list_empty(list))
 		list_add(list, &perf_hpp__sort_list);
+
+	if (symbol_conf.cumulate_callchain) {
+		list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
+		if (list_empty(list))
+			list_add(list, &perf_hpp__sort_list);
+	}
 }
 
 void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -390,6 +489,11 @@
 	list_add_tail(&format->list, &perf_hpp__list);
 }
 
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+	list_del(&format->list);
+}
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
 {
 	list_add_tail(&format->sort_list, &perf_hpp__sort_list);
@@ -401,6 +505,21 @@
 	perf_hpp__column_register(&perf_hpp__format[col]);
 }
 
+void perf_hpp__column_disable(unsigned col)
+{
+	BUG_ON(col >= PERF_HPP__MAX_INDEX);
+	perf_hpp__column_unregister(&perf_hpp__format[col]);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+	if (field_order)
+		return;
+
+	perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
+	perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
+}
+
 void perf_hpp__setup_output_field(void)
 {
 	struct perf_hpp_fmt *fmt;

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9f57991..90122ab 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c

@@ -271,7 +271,9 @@
 {
 	switch (callchain_param.mode) {
 	case CHAIN_GRAPH_REL:
-		return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period,
+		return callchain__fprintf_graph(fp, &he->sorted_chain,
+						symbol_conf.cumulate_callchain ?
+						he->stat_acc->period : he->stat.period,
 						left_margin);
 		break;
 	case CHAIN_GRAPH_ABS:
@@ -461,12 +463,12 @@
 
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		float percent = h->stat.period * 100.0 /
-					hists->stats.total_period;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < min_pcnt)
 			continue;
 

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9a42382..48b6d3f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -616,7 +616,8 @@
 	if (sample->callchain == NULL)
 		return 0;
 
-	if (symbol_conf.use_callchain || sort__has_parent) {
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+	    sort__has_parent) {
 		return machine__resolve_callchain(al->machine, evsel, al->thread,
 						  sample, parent, al, max_stack);
 	}
@@ -629,3 +630,45 @@
 		return 0;
 	return callchain_append(he->callchain, &callchain_cursor, sample->period);
 }
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved)
+{
+	al->map = node->map;
+	al->sym = node->sym;
+	if (node->map)
+		al->addr = node->map->map_ip(node->map, node->ip);
+	else
+		al->addr = node->ip;
+
+	if (al->sym == NULL) {
+		if (hide_unresolved)
+			return 0;
+		if (al->map == NULL)
+			goto out;
+	}
+
+	if (al->map->groups == &al->machine->kmaps) {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_KERNEL;
+			al->level = 'k';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+			al->level = 'g';
+		}
+	} else {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_USER;
+			al->level = '.';
+		} else if (perf_guest) {
+			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+			al->level = 'u';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+			al->level = 'H';
+		}
+	}
+
+out:
+	return 1;
+}

diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index bde2b0c..8f84423 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h

@@ -162,7 +162,18 @@
 			      struct perf_evsel *evsel, struct addr_location *al,
 			      int max_stack);
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved);
 
 extern const char record_callchain_help[];
 int parse_callchain_report_opt(const char *arg);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+					     struct callchain_cursor *src)
+{
+	*dest = *src;
+
+	dest->first = src->curr;
+	dest->nr -= src->pos;
+}
 #endif	/* __PERF_CALLCHAIN_H */

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b262b44..5a0a4b2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c

@@ -4,6 +4,7 @@
 #include "session.h"
 #include "sort.h"
 #include "evsel.h"
+#include "annotate.h"
 #include <math.h>
 
 static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -231,6 +232,8 @@
 		return true;
 
 	he_stat__decay(&he->stat);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__decay(he->stat_acc);
 
 	diff = prev_period - he->stat.period;
 
@@ -276,14 +279,31 @@
  * histogram, sorted on item, collects periods
  */
 
-static struct hist_entry *hist_entry__new(struct hist_entry *template)
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+					  bool sample_self)
 {
-	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
-	struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
+	size_t callchain_size = 0;
+	struct hist_entry *he;
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
+		callchain_size = sizeof(struct callchain_root);
+
+	he = zalloc(sizeof(*he) + callchain_size);
 
 	if (he != NULL) {
 		*he = *template;
 
+		if (symbol_conf.cumulate_callchain) {
+			he->stat_acc = malloc(sizeof(he->stat));
+			if (he->stat_acc == NULL) {
+				free(he);
+				return NULL;
+			}
+			memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+			if (!sample_self)
+				memset(&he->stat, 0, sizeof(he->stat));
+		}
+
 		if (he->ms.map)
 			he->ms.map->referenced = true;
 
@@ -295,6 +315,7 @@
 			 */
 			he->branch_info = malloc(sizeof(*he->branch_info));
 			if (he->branch_info == NULL) {
+				free(he->stat_acc);
 				free(he);
 				return NULL;
 			}
@@ -333,7 +354,8 @@
 
 static struct hist_entry *add_hist_entry(struct hists *hists,
 					 struct hist_entry *entry,
-					 struct addr_location *al)
+					 struct addr_location *al,
+					 bool sample_self)
 {
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -357,7 +379,10 @@
 		cmp = hist_entry__cmp(he, entry);
 
 		if (!cmp) {
-			he_stat__add_period(&he->stat, period, weight);
+			if (sample_self)
+				he_stat__add_period(&he->stat, period, weight);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_period(he->stat_acc, period, weight);
 
 			/*
 			 * This mem info was allocated from sample__resolve_mem
@@ -385,14 +410,17 @@
 			p = &(*p)->rb_right;
 	}
 
-	he = hist_entry__new(entry);
+	he = hist_entry__new(entry, sample_self);
 	if (!he)
 		return NULL;
 
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
-	he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (sample_self)
+		he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
 	return he;
 }
 
@@ -401,7 +429,8 @@
 				      struct symbol *sym_parent,
 				      struct branch_info *bi,
 				      struct mem_info *mi,
-				      u64 period, u64 weight, u64 transaction)
+				      u64 period, u64 weight, u64 transaction,
+				      bool sample_self)
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
@@ -426,7 +455,429 @@
 		.transaction = transaction,
 	};
 
-	return add_hist_entry(hists, &entry, al);
+	return add_hist_entry(hists, &entry, al, sample_self);
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+		    struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+			struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_sample *sample = iter->sample;
+	struct mem_info *mi;
+
+	mi = sample__resolve_mem(sample, al);
+	if (mi == NULL)
+		return -ENOMEM;
+
+	iter->priv = mi;
+	return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	u64 cost;
+	struct mem_info *mi = iter->priv;
+	struct hist_entry *he;
+
+	if (mi == NULL)
+		return -EINVAL;
+
+	cost = iter->sample->weight;
+	if (!cost)
+		cost = 1;
+
+	/*
+	 * must pass period=weight in order to get the correct
+	 * sorting from hists__collapse_resort() which is solely
+	 * based on periods. We want sorting be done on nr_events * weight
+	 * and this is indirectly achieved by passing period=weight here
+	 * and the he_stat__add_period() function.
+	 */
+	he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
+				cost, cost, 0, true);
+	if (!he)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+		      struct addr_location *al __maybe_unused)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct hist_entry *he = iter->he;
+	int err = -EINVAL;
+
+	if (he == NULL)
+		goto out;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+	/*
+	 * We don't need to free iter->priv (mem_info) here since
+	 * the mem info was either already freed in add_hist_entry() or
+	 * passed to a new hist entry by hist_entry__new().
+	 */
+	iter->priv = NULL;
+
+	iter->he = NULL;
+	return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return -ENOMEM;
+
+	iter->curr = 0;
+	iter->total = sample->branch_stack->nr;
+
+	iter->priv = bi;
+	return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+			     struct addr_location *al __maybe_unused)
+{
+	/* to avoid calling callback function */
+	iter->he = NULL;
+
+	return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi = iter->priv;
+	int i = iter->curr;
+
+	if (bi == NULL)
+		return 0;
+
+	if (iter->curr >= iter->total)
+		return 0;
+
+	al->map = bi[i].to.map;
+	al->sym = bi[i].to.sym;
+	al->addr = bi[i].to.addr;
+	return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_evsel *evsel = iter->evsel;
+	struct hist_entry *he = NULL;
+	int i = iter->curr;
+	int err = 0;
+
+	bi = iter->priv;
+
+	if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+		goto out;
+
+	/*
+	 * The report shows the percentage of total branches captured
+	 * and not events sampled. Thus we use a pseudo period of 1.
+	 */
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
+				1, 1, 0, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+out:
+	iter->he = he;
+	iter->curr++;
+	return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+			  struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+
+	if (he == NULL)
+		return 0;
+
+	iter->he = NULL;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
+			      struct addr_location *al __maybe_unused)
+{
+	struct hist_entry **he_cache;
+
+	callchain_cursor_commit(&callchain_cursor);
+
+	/*
+	 * This is for detecting cycles or recursions so that they're
+	 * cumulated only one time to prevent entries more than 100%
+	 * overhead.
+	 */
+	he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
+	if (he_cache == NULL)
+		return -ENOMEM;
+
+	iter->priv = he_cache;
+	iter->curr = 0;
+
+	return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+				 struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	int err = 0;
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	callchain_append(he->callchain, &callchain_cursor, sample->period);
+
+	/*
+	 * We need to re-initialize the cursor since callchain_append()
+	 * advanced the cursor to the end.
+	 */
+	callchain_cursor_commit(&callchain_cursor);
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+			   struct addr_location *al)
+{
+	struct callchain_cursor_node *node;
+
+	node = callchain_cursor_current(&callchain_cursor);
+	if (node == NULL)
+		return 0;
+
+	return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+			       struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	struct hist_entry he_tmp = {
+		.cpu = al->cpu,
+		.thread = al->thread,
+		.comm = thread__comm(al->thread),
+		.ip = al->addr,
+		.ms = {
+			.map = al->map,
+			.sym = al->sym,
+		},
+		.parent = iter->parent,
+	};
+	int i;
+	struct callchain_cursor cursor;
+
+	callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+	callchain_cursor_advance(&callchain_cursor);
+
+	/*
+	 * Check if there's duplicate entries in the callchain.
+	 * It's possible that it has cycles or recursive calls.
+	 */
+	for (i = 0; i < iter->curr; i++) {
+		if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+			/* to avoid calling callback function */
+			iter->he = NULL;
+			return 0;
+		}
+	}
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, false);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	callchain_append(he->callchain, &cursor, sample->period);
+	return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+			     struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+	.prepare_entry 		= iter_prepare_mem_entry,
+	.add_single_entry 	= iter_add_single_mem_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+	.prepare_entry 		= iter_prepare_branch_entry,
+	.add_single_entry 	= iter_add_single_branch_entry,
+	.next_entry 		= iter_next_branch_entry,
+	.add_next_entry 	= iter_add_next_branch_entry,
+	.finish_entry 		= iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+	.prepare_entry 		= iter_prepare_normal_entry,
+	.add_single_entry 	= iter_add_single_normal_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+	.prepare_entry 		= iter_prepare_cumulative_entry,
+	.add_single_entry 	= iter_add_single_cumulative_entry,
+	.next_entry 		= iter_next_cumulative_entry,
+	.add_next_entry 	= iter_add_next_cumulative_entry,
+	.finish_entry 		= iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 struct perf_evsel *evsel, struct perf_sample *sample,
+			 int max_stack_depth, void *arg)
+{
+	int err, err2;
+
+	err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
+					max_stack_depth);
+	if (err)
+		return err;
+
+	iter->evsel = evsel;
+	iter->sample = sample;
+
+	err = iter->ops->prepare_entry(iter, al);
+	if (err)
+		goto out;
+
+	err = iter->ops->add_single_entry(iter, al);
+	if (err)
+		goto out;
+
+	if (iter->he && iter->add_entry_cb) {
+		err = iter->add_entry_cb(iter, al, true, arg);
+		if (err)
+			goto out;
+	}
+
+	while (iter->ops->next_entry(iter, al)) {
+		err = iter->ops->add_next_entry(iter, al);
+		if (err)
+			break;
+
+		if (iter->he && iter->add_entry_cb) {
+			err = iter->add_entry_cb(iter, al, false, arg);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	err2 = iter->ops->finish_entry(iter, al);
+	if (!err)
+		err = err2;
+
+	return err;
 }
 
 int64_t
@@ -469,6 +920,7 @@
 {
 	zfree(&he->branch_info);
 	zfree(&he->mem_info);
+	zfree(&he->stat_acc);
 	free_srcline(he->srcline);
 	free(he);
 }
@@ -494,6 +946,8 @@
 
 		if (!cmp) {
 			he_stat__add_stat(&iter->stat, &he->stat);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
 			if (symbol_conf.use_callchain) {
 				callchain_cursor_reset(&callchain_cursor);
@@ -800,6 +1254,13 @@
 	events_stats__inc(&hists->stats, type);
 }
 
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+	events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
+	if (!filtered)
+		hists->stats.nr_non_filtered_samples++;
+}
+
 static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 						 struct hist_entry *pair)
 {
@@ -831,7 +1292,7 @@
 			p = &(*p)->rb_right;
 	}
 
-	he = hist_entry__new(pair);
+	he = hist_entry__new(pair, true);
 	if (he) {
 		memset(&he->stat, 0, sizeof(he->stat));
 		he->hists = hists;

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a8418d1..d2bf035 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -96,12 +96,50 @@
 	u16			col_len[HISTC_NR_COLS];
 };
 
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+	int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+	int total;
+	int curr;
+
+	bool hide_unresolved;
+
+	struct perf_evsel *evsel;
+	struct perf_sample *sample;
+	struct hist_entry *he;
+	struct symbol *parent;
+	void *priv;
+
+	const struct hist_iter_ops *ops;
+	/* user-defined callback function (optional) */
+	int (*add_entry_cb)(struct hist_entry_iter *iter,
+			    struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
 struct hist_entry *__hists__add_entry(struct hists *hists,
 				      struct addr_location *al,
 				      struct symbol *parent,
 				      struct branch_info *bi,
 				      struct mem_info *mi, u64 period,
-				      u64 weight, u64 transaction);
+				      u64 weight, u64 transaction,
+				      bool sample_self);
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 struct perf_evsel *evsel, struct perf_sample *sample,
+			 int max_stack_depth, void *arg);
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
@@ -119,6 +157,7 @@
 void hists__reset_stats(struct hists *hists);
 void hists__inc_stats(struct hists *hists, struct hist_entry *h);
 void hists__inc_nr_events(struct hists *hists, u32 type);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
 void events_stats__inc(struct events_stats *stats, u32 type);
 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
@@ -166,6 +205,7 @@
 
 	struct list_head list;
 	struct list_head sort_list;
+	bool elide;
 };
 
 extern struct list_head perf_hpp__list;
@@ -192,6 +232,7 @@
 	PERF_HPP__OVERHEAD_US,
 	PERF_HPP__OVERHEAD_GUEST_SYS,
 	PERF_HPP__OVERHEAD_GUEST_US,
+	PERF_HPP__OVERHEAD_ACC,
 	PERF_HPP__SAMPLES,
 	PERF_HPP__PERIOD,
 
@@ -200,7 +241,11 @@
 
 void perf_hpp__init(void);
 void perf_hpp__column_register(struct perf_hpp_fmt *format);
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
 void perf_hpp__column_enable(unsigned col);
+void perf_hpp__column_disable(unsigned col);
+void perf_hpp__cancel_cumulate(void);
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
 void perf_hpp__setup_output_field(void);
 void perf_hpp__reset_output_field(void);
@@ -208,7 +253,12 @@
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+{
+	return format->elide;
+}
+
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
@@ -218,6 +268,9 @@
 int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 	       hpp_field_fn get_field, const char *fmt,
 	       hpp_snprint_fn print_fn, bool fmt_percent);
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+		   hpp_field_fn get_field, const char *fmt,
+		   hpp_snprint_fn print_fn, bool fmt_percent);
 
 static inline void advance_hpp(struct perf_hpp *hpp, int inc)
 {

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 901b9be..45512ba 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -1061,6 +1061,7 @@
 	DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
 	DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
 	DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+	DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
 	DIM(PERF_HPP__SAMPLES, "sample"),
 	DIM(PERF_HPP__PERIOD, "period"),
 };
@@ -1156,6 +1157,7 @@
 
 	INIT_LIST_HEAD(&hse->hpp.list);
 	INIT_LIST_HEAD(&hse->hpp.sort_list);
+	hse->hpp.elide = false;
 
 	return hse;
 }
@@ -1363,27 +1365,64 @@
 	return ret;
 }
 
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+void perf_hpp__set_elide(int idx, bool elide)
 {
-	if (perf_hpp__is_sort_entry(format)) {
-		struct hpp_sort_entry *hse;
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
 
-		hse = container_of(format, struct hpp_sort_entry, hpp);
-		return hse->se->elide;
+	perf_hpp__for_each_format(fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_width_idx == idx) {
+			fmt->elide = elide;
+			break;
+		}
 	}
-	return false;
 }
 
-static void sort_entry__setup_elide(struct sort_entry *se,
-				    struct strlist *list,
-				    const char *list_name, FILE *fp)
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
 {
 	if (list && strlist__nr_entries(list) == 1) {
 		if (fp != NULL)
 			fprintf(fp, "# %s: %s\n", list_name,
 				strlist__entry(list, 0)->s);
-		se->elide = true;
+		return true;
 	}
+	return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+	switch (idx) {
+	case HISTC_SYMBOL:
+		return __get_elide(symbol_conf.sym_list, "symbol", output);
+	case HISTC_DSO:
+		return __get_elide(symbol_conf.dso_list, "dso", output);
+	case HISTC_COMM:
+		return __get_elide(symbol_conf.comm_list, "comm", output);
+	default:
+		break;
+	}
+
+	if (sort__mode != SORT_MODE__BRANCH)
+		return false;
+
+	switch (idx) {
+	case HISTC_SYMBOL_FROM:
+		return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+	case HISTC_SYMBOL_TO:
+		return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+	case HISTC_DSO_FROM:
+		return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+	case HISTC_DSO_TO:
+		return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+	default:
+		break;
+	}
+
+	return false;
 }
 
 void sort__setup_elide(FILE *output)
@@ -1391,39 +1430,12 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-				"dso", output);
-	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list,
-				"comm", output);
-	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list,
-				"symbol", output);
+	perf_hpp__for_each_format(fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
 
-	if (sort__mode == SORT_MODE__BRANCH) {
-		sort_entry__setup_elide(&sort_dso_from,
-					symbol_conf.dso_from_list,
-					"dso_from", output);
-		sort_entry__setup_elide(&sort_dso_to,
-					symbol_conf.dso_to_list,
-					"dso_to", output);
-		sort_entry__setup_elide(&sort_sym_from,
-					symbol_conf.sym_from_list,
-					"sym_from", output);
-		sort_entry__setup_elide(&sort_sym_to,
-					symbol_conf.sym_to_list,
-					"sym_to", output);
-	} else if (sort__mode == SORT_MODE__MEMORY) {
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"symbol_daddr", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"dso_daddr", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"mem", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"local_weight", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"tlb", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"snoop", output);
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		fmt->elide = get_elide(hse->se->se_width_idx, output);
 	}
 
 	/*
@@ -1434,8 +1446,7 @@
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
-		hse = container_of(fmt, struct hpp_sort_entry, hpp);
-		if (!hse->se->elide)
+		if (!fmt->elide)
 			return;
 	}
 
@@ -1443,8 +1454,7 @@
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
-		hse = container_of(fmt, struct hpp_sort_entry, hpp);
-		hse->se->elide = false;
+		fmt->elide = false;
 	}
 }
 
@@ -1581,6 +1591,9 @@
 	sort__has_sym = 0;
 	sort__has_dso = 0;
 
+	field_order = NULL;
+	sort_order = NULL;
+
 	reset_dimensions();
 	perf_hpp__reset_output_field();
 }

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5f38d92..5bf0098 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h

@@ -20,7 +20,7 @@
 
 #include "parse-options.h"
 #include "parse-events.h"
-
+#include "hist.h"
 #include "thread.h"
 
 extern regex_t parent_regex;
@@ -82,6 +82,7 @@
 		struct list_head head;
 	} pairs;
 	struct he_stat		stat;
+	struct he_stat		*stat_acc;
 	struct map_symbol	ms;
 	struct thread		*thread;
 	struct comm		*comm;
@@ -130,6 +131,21 @@
 	list_add_tail(&pair->pairs.node, &he->pairs.head);
 }
 
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+	u64 period = he->stat.period;
+	u64 total_period = hists__total_period(he->hists);
+
+	if (unlikely(total_period == 0))
+		return 0;
+
+	if (symbol_conf.cumulate_callchain)
+		period = he->stat_acc->period;
+
+	return period * 100.0 / total_period;
+}
+
+
 enum sort_mode {
 	SORT_MODE__NORMAL,
 	SORT_MODE__BRANCH,
@@ -186,7 +202,6 @@
 	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
 	u8	se_width_idx;
-	bool	elide;
 };
 
 extern struct sort_entry sort_thread;
@@ -197,6 +212,7 @@
 void reset_output_field(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
 
 int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
 

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 95e2497..7b9096f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -29,11 +29,12 @@
 char **vmlinux_path;
 
 struct symbol_conf symbol_conf = {
-	.use_modules	  = true,
-	.try_vmlinux_path = true,
-	.annotate_src	  = true,
-	.demangle	  = true,
-	.symfs            = "",
+	.use_modules		= true,
+	.try_vmlinux_path	= true,
+	.annotate_src		= true,
+	.demangle		= true,
+	.cumulate_callchain	= true,
+	.symfs			= "",
 };
 
 static enum dso_binary_type binary_type_symtab[] = {

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 33ede53..615c752 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -109,6 +109,7 @@
 			show_nr_samples,
 			show_total_period,
 			use_callchain,
+			cumulate_callchain,
 			exclude_other,
 			show_cpu_utilization,
 			initialized,

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 750512b..c7493b8 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile

@@ -14,6 +14,12 @@
 run_tests: all
 	@/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
 	@/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
-
+	@if /sbin/modprobe test_bpf ; then \
+		/sbin/rmmod test_bpf; \
+		echo "test_bpf: ok"; \
+	else \
+		echo "test_bpf: [FAIL]"; \
+		exit 1; \
+	fi
 clean:
 	$(RM) $(NET_PROGS)

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 316194f..54833a7 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile

@@ -13,7 +13,7 @@
 
 export CC CFLAGS
 
-TARGETS = pmu copyloops
+TARGETS = pmu copyloops mm tm
 
 endif
 

diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index ccd9c84..d1dc374 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h

@@ -46,12 +46,15 @@
 #define R20 r20
 #define R21 r21
 #define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
 
 #define STACKFRAMESIZE	256
-#define STK_PARAM(i)	(48 + ((i)-3)*8)
 #define STK_REG(i)	(112 + ((i)-14)*8)
 
 #define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
 
 #define PPC_MTOCRF(A, B)	mtocrf A, B
 

diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index e80c42a..8ebc58a 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c

@@ -30,12 +30,15 @@
 
 	pid = fork();
 	if (pid == 0) {
+		setpgid(0, 0);
 		exit(test_function());
 	} else if (pid == -1) {
 		perror("fork");
 		return 1;
 	}
 
+	setpgid(pid, pid);
+
 	/* Wake us up in timeout seconds */
 	alarm(TIMEOUT);
 	terminated = false;
@@ -50,17 +53,20 @@
 
 		if (terminated) {
 			printf("!! force killing %s\n", name);
-			kill(pid, SIGKILL);
+			kill(-pid, SIGKILL);
 			return 1;
 		} else {
 			printf("!! killing %s\n", name);
-			kill(pid, SIGTERM);
+			kill(-pid, SIGTERM);
 			terminated = true;
 			alarm(KILL_TIMEOUT);
 			goto wait;
 		}
 	}
 
+	/* Kill anything else in the process group that is still running */
+	kill(-pid, SIGTERM);
+
 	if (WIFEXITED(status))
 		status = WEXITSTATUS(status);
 	else {
@@ -99,7 +105,10 @@
 
 	rc = run_test(test_function, name);
 
-	test_finish(name, rc);
+	if (rc == MAGIC_SKIP_RETURN_VALUE)
+		test_skip(name);
+	else
+		test_finish(name, rc);
 
 	return rc;
 }

diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
new file mode 100644
index 0000000..357ccbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/Makefile

@@ -0,0 +1,18 @@
+noarg:
+	$(MAKE) -C ../
+
+PROGS := hugetlb_vs_thp_test
+
+all: $(PROGS)
+
+$(PROGS): ../harness.c
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS)
+
+.PHONY: all run_tests clean

diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
new file mode 100644
index 0000000..3d8e5b0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c

@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+/* This must match the huge page & THP size */
+#define SIZE	(16 * 1024 * 1024)
+
+static int test_body(void)
+{
+	void *addr;
+	char *p;
+
+	addr = (void *)0xa0000000;
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p != MAP_FAILED) {
+		/*
+		 * Typically the mmap will fail because no huge pages are
+		 * allocated on the system. But if there are huge pages
+		 * allocated the mmap will succeed. That's fine too, we just
+		 * munmap here before continuing.
+		 */
+		munmap(addr, SIZE);
+	}
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p == MAP_FAILED) {
+		printf("Mapping failed @ %p\n", addr);
+		perror("mmap");
+		return 1;
+	}
+
+	/*
+	 * Either a user or kernel access is sufficient to trigger the bug.
+	 * A kernel access is easier to spot & debug, as it will trigger the
+	 * softlockup or RCU stall detectors, and when the system is kicked
+	 * into xmon we get a backtrace in the kernel.
+	 *
+	 * A good option is:
+	 *  getcwd(p, SIZE);
+	 *
+	 * For the purposes of this testcase it's preferable to spin in
+	 * userspace, so the harness can kill us if we get stuck. That way we
+	 * see a test failure rather than a dead system.
+	 */
+	*p = 0xf;
+
+	munmap(addr, SIZE);
+
+	return 0;
+}
+
+static int test_main(void)
+{
+	int i;
+
+	/* 10,000 because it's a "bunch", and completes reasonably quickly */
+	for (i = 0; i < 10000; i++)
+		if (test_body())
+			return 1;
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_main, "hugetlb_vs_thp");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 7216f00..b9ff0db 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile

@@ -4,7 +4,7 @@
 PROGS := count_instructions
 EXTRA_SOURCES := ../harness.c event.c
 
-all: $(PROGS)
+all: $(PROGS) sub_all
 
 $(PROGS): $(EXTRA_SOURCES)
 
@@ -12,12 +12,30 @@
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
 	$(CC) $(CFLAGS) -m64 -o $@ $^
 
-run_tests: all
+run_tests: all sub_run_tests
 	@-for PROG in $(PROGS); do \
 		./$$PROG; \
 	done;
 
-clean:
+clean: sub_clean
 	rm -f $(PROGS) loop.o
 
-.PHONY: all run_tests clean
+
+SUB_TARGETS = ebb
+
+sub_all:
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET all; \
+	done;
+
+sub_run_tests: all
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET run_tests; \
+	done;
+
+sub_clean:
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET clean; \
+	done;
+
+.PHONY: all run_tests clean sub_all sub_run_tests sub_clean

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
new file mode 100644
index 0000000..edbba2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile

@@ -0,0 +1,32 @@
+noarg:
+	$(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+PROGS := reg_access_test event_attributes_test cycles_test	\
+	 cycles_with_freeze_test pmc56_overflow_test		\
+	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
+	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
+	 task_event_pinned_vs_ebb_test multi_ebb_procs_test	\
+	 multi_counter_test pmae_handling_test			\
+	 close_clears_pmcc_test instruction_count_test		\
+	 fork_cleanup_test ebb_on_child_test			\
+	 ebb_on_willing_child_test back_to_back_ebbs_test	\
+	 lost_exception_test no_handler_test
+
+all: $(PROGS)
+
+$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c
+
+instruction_count_test: ../loop.S
+
+lost_exception_test: ../lib.c
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
new file mode 100644
index 0000000..66ea765
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c

@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS	50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+	uint64_t siar, val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	/* Resets the PMC */
+	count_pmc(1, sample_period);
+
+out:
+	if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+		/* Reset but leave counters frozen */
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		/* Unfreezes */
+		reset_ebb();
+
+	/* Do some stuff to chew some cycles and pop the counter */
+	siar = mfspr(SPRN_SIAR);
+	trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+	val = mfspr(SPRN_PMC1);
+	trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = 5;
+
+	ebb_freeze_pmcs();
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	ebb_global_enable();
+	ebb_unfreeze_pmcs();
+
+	while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
new file mode 100644
index 0000000..0f0423d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c

@@ -0,0 +1,59 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* The real test is here, do we take a SIGILL when writing PMU regs now
+	 * that we have closed the event. We expect that we will. */
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We should still be able to read EBB regs though */
+	mfspr(SPRN_EBBHR);
+	mfspr(SPRN_EBBRR);
+	mfspr(SPRN_BESCR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
new file mode 100644
index 0000000..d3ed64d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event should have run */
+	FAIL_IF(event.result.value == 0);
+	FAIL_IF(event.result.enabled != event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
new file mode 100644
index 0000000..8b972c2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c

@@ -0,0 +1,89 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect the child to succeed */
+	FAIL_IF(wait_for_child(pid));
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
new file mode 100644
index 0000000..8590fc1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c

@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 10) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles, "cycles");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
new file mode 100644
index 0000000..754b3f2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c

@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+	uint64_t mask, val;
+
+	mask = MMCR0_PMAO | MMCR0_FC;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	if (counters_frozen) {
+		trace_log_string(ebb_state.trace, "frozen");
+		ebbs_while_frozen++;
+		mask &= ~MMCR0_FC;
+	}
+
+	count_pmc(1, sample_period);
+out:
+	reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+	struct event event;
+	uint64_t val;
+	bool fc_cleared;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	fc_cleared = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		counters_frozen = false;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+		FAIL_IF(core_busy_loop());
+
+		counters_frozen = true;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+		val = mfspr(SPRN_MMCR0);
+		if (! (val & MMCR0_FC)) {
+			printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+			fc_cleared = true;
+		}
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(fc_cleared);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
new file mode 100644
index 0000000..1b46be9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c

@@ -0,0 +1,727 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "reg.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+	if (ebb_user_func)
+		ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+	u64 val;
+
+	/* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+	/* 3) set MMCR0[PMAE]	- docs say BESCR[PME] should do this */
+	val = mfspr(SPRN_MMCR0);
+	mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+	/* 4) clear BESCR[PMEO] */
+	mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+	/* 5) set BESCR[PME] */
+	mtspr(SPRN_BESCRS, BESCR_PME);
+
+	/* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+	reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+	u64 val;
+
+	val = mfspr(SPRN_MMCR0);
+	if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+		/* It's OK if we see FC & PMAO, but not FC by itself */
+		printf("Outside of loop, only FC set 0x%llx\n", val);
+		return 1;
+	}
+
+	return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+	u64 count, upper, lower;
+
+	count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+	lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+	if (count < lower) {
+		printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+			pmc, count, lower, lower - count);
+		return false;
+	}
+
+	upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+	if (count > upper) {
+		printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+			pmc, count, upper, count - upper);
+		return false;
+	}
+
+	printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+		pmc, count, lower, upper, count - lower, upper - count);
+
+	return true;
+}
+
+void standard_ebb_callee(void)
+{
+	int found, i;
+	u64 val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	found = 0;
+	for (i = 1; i <= 6; i++) {
+		if (ebb_state.pmc_enable[PMC_INDEX(i)])
+			found += count_pmc(i, sample_period);
+	}
+
+	if (!found)
+		ebb_state.stats.no_overflow++;
+
+out:
+	reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+	u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	entry = (u64)ebb_handler;
+#else
+	struct opd
+	{
+	    u64 entry;
+	    u64 toc;
+	} *opd;
+
+	opd = (struct opd *)ebb_handler;
+	entry = opd->entry;
+#endif
+	printf("EBB Handler is at %#llx\n", entry);
+
+	ebb_user_func = callee;
+
+	/* Ensure ebb_user_func is set before we set the handler */
+	mb();
+	mtspr(SPRN_EBBHR, entry);
+
+	/* Make sure the handler is set before we return */
+	mb();
+}
+
+void clear_ebb_stats(void)
+{
+	memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+	printf("ebb_state:\n"			\
+	       "  ebb_count    = %d\n"		\
+	       "  spurious     = %d\n"		\
+	       "  negative     = %d\n"		\
+	       "  no_overflow  = %d\n"		\
+	       "  pmc[1] count = 0x%llx\n"	\
+	       "  pmc[2] count = 0x%llx\n"	\
+	       "  pmc[3] count = 0x%llx\n"	\
+	       "  pmc[4] count = 0x%llx\n"	\
+	       "  pmc[5] count = 0x%llx\n"	\
+	       "  pmc[6] count = 0x%llx\n",
+		ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+		ebb_state.stats.negative, ebb_state.stats.no_overflow,
+		ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+		ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+		ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1 << 31))
+		strcat(buf, "FC ");
+	if (value & (1 << 26))
+		strcat(buf, "PMAE ");
+	if (value & (1 << 7))
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1ull << 63))
+		strcat(buf, "GE ");
+	if (value & (1ull << 32))
+		strcat(buf, "PMAE ");
+	if (value & 1)
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+	u64 bescr;
+	u32 mmcr0;
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+	bescr = mfspr(SPRN_BESCR);
+
+	printf("HW state:\n"		\
+	       "MMCR0 0x%016x %s\n"	\
+	       "EBBHR 0x%016lx\n"	\
+	       "BESCR 0x%016llx %s\n"	\
+	       "PMC1  0x%016lx\n"	\
+	       "PMC2  0x%016lx\n"	\
+	       "PMC3  0x%016lx\n"	\
+	       "PMC4  0x%016lx\n"	\
+	       "PMC5  0x%016lx\n"	\
+	       "PMC6  0x%016lx\n"	\
+	       "SIAR  0x%016lx\n",
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_EBBHR), bescr,
+	       decode_bescr(bescr), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
+	       mfspr(SPRN_PMC3), mfspr(SPRN_PMC4), mfspr(SPRN_PMC5),
+	       mfspr(SPRN_PMC6), mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+	dump_summary_ebb_state();
+
+	dump_ebb_hw_state();
+
+	trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+	uint32_t start_value;
+	u64 val;
+
+	/* 0) Read PMC */
+	start_value = pmc_sample_period(sample_period);
+
+	val = read_pmc(pmc);
+	if (val < start_value)
+		ebb_state.stats.negative++;
+	else
+		ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+	trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+	/* 1) Reset PMC */
+	write_pmc(pmc, start_value);
+
+	/* Report if we overflowed */
+	return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+	int rc;
+
+	/* Ensure any SPR writes are ordered vs us */
+	mb();
+
+	rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+	if (rc)
+		return rc;
+
+	rc = event_read(e);
+
+	/* Ditto */
+	mb();
+
+	return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+	mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+	/* Unfreeze counters */
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	mb();
+}
+
+void ebb_global_enable(void)
+{
+	/* Enable EBBs globally and PMU EBBs */
+	mtspr(SPRN_BESCR, 0x8000000100000000ull);
+	mb();
+}
+
+void ebb_global_disable(void)
+{
+	/* Disable EBBs & freeze counters, events are still scheduled */
+	mtspr(SPRN_BESCRR, BESCR_PME);
+	mb();
+}
+
+void event_ebb_init(struct event *e)
+{
+	e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+	e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+	event_ebb_init(e);
+
+	e->attr.exclusive = 1;
+	e->attr.pinned = 1;
+}
+
+int core_busy_loop(void)
+{
+	int rc;
+
+	asm volatile (
+		"li  3,  0x3030\n"
+		"std 3,  -96(1)\n"
+		"li  4,  0x4040\n"
+		"std 4,  -104(1)\n"
+		"li  5,  0x5050\n"
+		"std 5,  -112(1)\n"
+		"li  6,  0x6060\n"
+		"std 6,  -120(1)\n"
+		"li  7,  0x7070\n"
+		"std 7,  -128(1)\n"
+		"li  8,  0x0808\n"
+		"std 8,  -136(1)\n"
+		"li  9,  0x0909\n"
+		"std 9,  -144(1)\n"
+		"li  10, 0x1010\n"
+		"std 10, -152(1)\n"
+		"li  11, 0x1111\n"
+		"std 11, -160(1)\n"
+		"li  14, 0x1414\n"
+		"std 14, -168(1)\n"
+		"li  15, 0x1515\n"
+		"std 15, -176(1)\n"
+		"li  16, 0x1616\n"
+		"std 16, -184(1)\n"
+		"li  17, 0x1717\n"
+		"std 17, -192(1)\n"
+		"li  18, 0x1818\n"
+		"std 18, -200(1)\n"
+		"li  19, 0x1919\n"
+		"std 19, -208(1)\n"
+		"li  20, 0x2020\n"
+		"std 20, -216(1)\n"
+		"li  21, 0x2121\n"
+		"std 21, -224(1)\n"
+		"li  22, 0x2222\n"
+		"std 22, -232(1)\n"
+		"li  23, 0x2323\n"
+		"std 23, -240(1)\n"
+		"li  24, 0x2424\n"
+		"std 24, -248(1)\n"
+		"li  25, 0x2525\n"
+		"std 25, -256(1)\n"
+		"li  26, 0x2626\n"
+		"std 26, -264(1)\n"
+		"li  27, 0x2727\n"
+		"std 27, -272(1)\n"
+		"li  28, 0x2828\n"
+		"std 28, -280(1)\n"
+		"li  29, 0x2929\n"
+		"std 29, -288(1)\n"
+		"li  30, 0x3030\n"
+		"li  31, 0x3131\n"
+
+		"li    3,  0\n"
+		"0: "
+		"addi  3, 3, 1\n"
+		"cmpwi 3, 100\n"
+		"blt   0b\n"
+
+		/* Return 1 (fail) unless we get through all the checks */
+		"li	0, 1\n"
+
+		/* Check none of our registers have been corrupted */
+		"cmpwi  4,  0x4040\n"
+		"bne	1f\n"
+		"cmpwi  5,  0x5050\n"
+		"bne	1f\n"
+		"cmpwi  6,  0x6060\n"
+		"bne	1f\n"
+		"cmpwi  7,  0x7070\n"
+		"bne	1f\n"
+		"cmpwi  8,  0x0808\n"
+		"bne	1f\n"
+		"cmpwi  9,  0x0909\n"
+		"bne	1f\n"
+		"cmpwi  10, 0x1010\n"
+		"bne	1f\n"
+		"cmpwi  11, 0x1111\n"
+		"bne	1f\n"
+		"cmpwi  14, 0x1414\n"
+		"bne	1f\n"
+		"cmpwi  15, 0x1515\n"
+		"bne	1f\n"
+		"cmpwi  16, 0x1616\n"
+		"bne	1f\n"
+		"cmpwi  17, 0x1717\n"
+		"bne	1f\n"
+		"cmpwi  18, 0x1818\n"
+		"bne	1f\n"
+		"cmpwi  19, 0x1919\n"
+		"bne	1f\n"
+		"cmpwi  20, 0x2020\n"
+		"bne	1f\n"
+		"cmpwi  21, 0x2121\n"
+		"bne	1f\n"
+		"cmpwi  22, 0x2222\n"
+		"bne	1f\n"
+		"cmpwi  23, 0x2323\n"
+		"bne	1f\n"
+		"cmpwi  24, 0x2424\n"
+		"bne	1f\n"
+		"cmpwi  25, 0x2525\n"
+		"bne	1f\n"
+		"cmpwi  26, 0x2626\n"
+		"bne	1f\n"
+		"cmpwi  27, 0x2727\n"
+		"bne	1f\n"
+		"cmpwi  28, 0x2828\n"
+		"bne	1f\n"
+		"cmpwi  29, 0x2929\n"
+		"bne	1f\n"
+		"cmpwi  30, 0x3030\n"
+		"bne	1f\n"
+		"cmpwi  31, 0x3131\n"
+		"bne	1f\n"
+
+		/* Load junk into all our registers before we reload them from the stack. */
+		"li  3,  0xde\n"
+		"li  4,  0xad\n"
+		"li  5,  0xbe\n"
+		"li  6,  0xef\n"
+		"li  7,  0xde\n"
+		"li  8,  0xad\n"
+		"li  9,  0xbe\n"
+		"li  10, 0xef\n"
+		"li  11, 0xde\n"
+		"li  14, 0xad\n"
+		"li  15, 0xbe\n"
+		"li  16, 0xef\n"
+		"li  17, 0xde\n"
+		"li  18, 0xad\n"
+		"li  19, 0xbe\n"
+		"li  20, 0xef\n"
+		"li  21, 0xde\n"
+		"li  22, 0xad\n"
+		"li  23, 0xbe\n"
+		"li  24, 0xef\n"
+		"li  25, 0xde\n"
+		"li  26, 0xad\n"
+		"li  27, 0xbe\n"
+		"li  28, 0xef\n"
+		"li  29, 0xdd\n"
+
+		"ld     3,  -96(1)\n"
+		"cmpwi  3,  0x3030\n"
+		"bne	1f\n"
+		"ld     4,  -104(1)\n"
+		"cmpwi  4,  0x4040\n"
+		"bne	1f\n"
+		"ld     5,  -112(1)\n"
+		"cmpwi  5,  0x5050\n"
+		"bne	1f\n"
+		"ld     6,  -120(1)\n"
+		"cmpwi  6,  0x6060\n"
+		"bne	1f\n"
+		"ld     7,  -128(1)\n"
+		"cmpwi  7,  0x7070\n"
+		"bne	1f\n"
+		"ld     8,  -136(1)\n"
+		"cmpwi  8,  0x0808\n"
+		"bne	1f\n"
+		"ld     9,  -144(1)\n"
+		"cmpwi  9,  0x0909\n"
+		"bne	1f\n"
+		"ld     10, -152(1)\n"
+		"cmpwi  10, 0x1010\n"
+		"bne	1f\n"
+		"ld     11, -160(1)\n"
+		"cmpwi  11, 0x1111\n"
+		"bne	1f\n"
+		"ld     14, -168(1)\n"
+		"cmpwi  14, 0x1414\n"
+		"bne	1f\n"
+		"ld     15, -176(1)\n"
+		"cmpwi  15, 0x1515\n"
+		"bne	1f\n"
+		"ld     16, -184(1)\n"
+		"cmpwi  16, 0x1616\n"
+		"bne	1f\n"
+		"ld     17, -192(1)\n"
+		"cmpwi  17, 0x1717\n"
+		"bne	1f\n"
+		"ld     18, -200(1)\n"
+		"cmpwi  18, 0x1818\n"
+		"bne	1f\n"
+		"ld     19, -208(1)\n"
+		"cmpwi  19, 0x1919\n"
+		"bne	1f\n"
+		"ld     20, -216(1)\n"
+		"cmpwi  20, 0x2020\n"
+		"bne	1f\n"
+		"ld     21, -224(1)\n"
+		"cmpwi  21, 0x2121\n"
+		"bne	1f\n"
+		"ld     22, -232(1)\n"
+		"cmpwi  22, 0x2222\n"
+		"bne	1f\n"
+		"ld     23, -240(1)\n"
+		"cmpwi  23, 0x2323\n"
+		"bne	1f\n"
+		"ld     24, -248(1)\n"
+		"cmpwi  24, 0x2424\n"
+		"bne	1f\n"
+		"ld     25, -256(1)\n"
+		"cmpwi  25, 0x2525\n"
+		"bne	1f\n"
+		"ld     26, -264(1)\n"
+		"cmpwi  26, 0x2626\n"
+		"bne	1f\n"
+		"ld     27, -272(1)\n"
+		"cmpwi  27, 0x2727\n"
+		"bne	1f\n"
+		"ld     28, -280(1)\n"
+		"cmpwi  28, 0x2828\n"
+		"bne	1f\n"
+		"ld     29, -288(1)\n"
+		"cmpwi  29, 0x2929\n"
+		"bne	1f\n"
+
+		/* Load 0 (success) to return */
+		"li	0, 0\n"
+
+		"1: 	mr %0, 0\n"
+
+		: "=r" (rc)
+		: /* no inputs */
+		: "3", "4", "5", "6", "7", "8", "9", "10", "11", "14",
+		  "15", "16", "17", "18", "19", "20", "21", "22", "23",
+		   "24", "25", "26", "27", "28", "29", "30", "31",
+		   "memory"
+	);
+
+	return rc;
+}
+
+int core_busy_loop_with_freeze(void)
+{
+	int rc;
+
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	rc = core_busy_loop();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+	return rc;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+	struct event event;
+	uint64_t val;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(event_enable(&event));
+
+	if (event_read(&event)) {
+		/*
+		 * Some tests expect to fail here, so don't report an error on
+		 * this line, and return a distinguisable error code. Tell the
+		 * parent an error happened.
+		 */
+		notify_parent_of_error(write_pipe);
+		return 2;
+	}
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	FAIL_IF(notify_parent(write_pipe));
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+
+		/* To try and hit SIGILL case */
+		val  = mfspr(SPRN_MMCRA);
+		val |= mfspr(SPRN_MMCR2);
+		val |= mfspr(SPRN_MMCR0);
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+	printf("Took sigill\n");
+	longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+	.sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+	if (sigaction(SIGILL, &sigill_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	if (setjmp(setjmp_env) == 0) {
+		func();
+		return 1;
+	}
+
+	return 0;
+}
+
+void write_pmc1(void)
+{
+	mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+	switch (pmc) {
+		case 1: mtspr(SPRN_PMC1, value); break;
+		case 2: mtspr(SPRN_PMC2, value); break;
+		case 3: mtspr(SPRN_PMC3, value); break;
+		case 4: mtspr(SPRN_PMC4, value); break;
+		case 5: mtspr(SPRN_PMC5, value); break;
+		case 6: mtspr(SPRN_PMC6, value); break;
+	}
+}
+
+u64 read_pmc(int pmc)
+{
+	switch (pmc) {
+		case 1: return mfspr(SPRN_PMC1);
+		case 2: return mfspr(SPRN_PMC2);
+		case 3: return mfspr(SPRN_PMC3);
+		case 4: return mfspr(SPRN_PMC4);
+		case 5: return mfspr(SPRN_PMC5);
+		case 6: return mfspr(SPRN_PMC6);
+	}
+
+	return 0;
+}
+
+static void term_handler(int signal)
+{
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+	abort();
+}
+
+struct sigaction term_action = {
+	.sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+	clear_ebb_stats();
+
+	if (sigaction(SIGTERM, &term_action, NULL))
+		perror("sigaction");
+
+	ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
new file mode 100644
index 0000000..e62bde0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h

@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc)	((pmc)-1)
+
+#define NUM_PMC_VALUES	128
+
+struct ebb_state
+{
+	struct {
+		u64 pmc_count[6];
+		volatile int ebb_count;
+		int spurious;
+		int negative;
+		int no_overflow;
+	} stats;
+
+	bool pmc_enable[6];
+	struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+	return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+	ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int core_busy_loop_with_freeze(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
new file mode 100644
index 0000000..14274ea
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S

@@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB   .long 0x4c000924
+
+/* Stack layout:
+ *
+ *                   ^
+ *  User stack       |
+ *  Back chain ------+	<- r1		<-------+
+ *  ...						|
+ *  Red zone / ABI Gap				|
+ *  ...						|
+ *  vr63	<+				|
+ *  vr0		 |				|
+ *  VSCR	 |				|
+ *  FSCR	 |				|
+ *  r31		 | Save area			|
+ *  r0		 |				|
+ *  XER		 |				|
+ *  CTR		 |				|
+ *  LR		 |				|
+ *  CCR		<+				|
+ *  ...		<+				|
+ *  LR		 | Caller frame			|
+ *  CCR		 |				|
+ *  Back chain	<+	<- updated r1	--------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP		512
+#else
+#define ABIGAP		288
+#endif
+
+#define NR_GPR		32
+#define NR_SPR		6
+#define NR_VSR		64
+
+#define SAVE_AREA	((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME	112
+
+#define STACK_FRAME	(ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE	(CALLER_FRAME)
+#define LR_SAVE		(CCR_SAVE + 8)
+#define CTR_SAVE	(LR_SAVE  + 8)
+#define XER_SAVE	(CTR_SAVE + 8)
+#define GPR_SAVE(n)	(XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE	(GPR_SAVE(31) + 8)
+#define VSCR_SAVE	(FSCR_SAVE + 8)
+#define VSR_SAVE(n)	(VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n)	std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n)	ld  n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n)	lis n,0xaaaa
+
+#define SAVE_VSR(n, b)	li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b)	li b, VSR_SAVE(n); lxvd2x  n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr)	\
+	lis     reg,(expr)@highest;	\
+	ori     reg,reg,(expr)@higher;	\
+	rldicr  reg,reg,32,31;		\
+	oris    reg,reg,(expr)@h;	\
+	ori     reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+	FUNC_NAME(name):
+
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer using our entry point */	\
+	LOAD_REG_IMMEDIATE(r12, name)				\
+0:	addis	r2,r12,(.TOC.-0b)@ha;				\
+	addi	r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer via our opd entry */	\
+	LOAD_REG_IMMEDIATE(r2, name)			\
+	ld      r2,8(r2);
+#endif
+
+    .text
+
+ENTRY_POINT(ebb_handler)
+    stdu    r1,-STACK_FRAME(r1)
+    SAVE_GPR(0)
+    mflr    r0
+    std     r0,LR_SAVE(r1)
+    mfcr    r0
+    std     r0,CCR_SAVE(r1)
+    mfctr   r0
+    std     r0,CTR_SAVE(r1)
+    mfxer   r0
+    std     r0,XER_SAVE(r1)
+    SAVE_GPR(2)
+    SAVE_GPR(3)
+    SAVE_GPR(4)
+    SAVE_GPR(5)
+    SAVE_GPR(6)
+    SAVE_GPR(7)
+    SAVE_GPR(8)
+    SAVE_GPR(9)
+    SAVE_GPR(10)
+    SAVE_GPR(11)
+    SAVE_GPR(12)
+    SAVE_GPR(13)
+    SAVE_GPR(14)
+    SAVE_GPR(15)
+    SAVE_GPR(16)
+    SAVE_GPR(17)
+    SAVE_GPR(18)
+    SAVE_GPR(19)
+    SAVE_GPR(20)
+    SAVE_GPR(21)
+    SAVE_GPR(22)
+    SAVE_GPR(23)
+    SAVE_GPR(24)
+    SAVE_GPR(25)
+    SAVE_GPR(26)
+    SAVE_GPR(27)
+    SAVE_GPR(28)
+    SAVE_GPR(29)
+    SAVE_GPR(30)
+    SAVE_GPR(31)
+    SAVE_VSR(0, r3)
+    mffs     f0
+    stfd     f0, FSCR_SAVE(r1)
+    mfvscr   f0
+    stfd     f0, VSCR_SAVE(r1)
+    SAVE_VSR(1,  r3)
+    SAVE_VSR(2,  r3)
+    SAVE_VSR(3,  r3)
+    SAVE_VSR(4,  r3)
+    SAVE_VSR(5,  r3)
+    SAVE_VSR(6,  r3)
+    SAVE_VSR(7,  r3)
+    SAVE_VSR(8,  r3)
+    SAVE_VSR(9,  r3)
+    SAVE_VSR(10, r3)
+    SAVE_VSR(11, r3)
+    SAVE_VSR(12, r3)
+    SAVE_VSR(13, r3)
+    SAVE_VSR(14, r3)
+    SAVE_VSR(15, r3)
+    SAVE_VSR(16, r3)
+    SAVE_VSR(17, r3)
+    SAVE_VSR(18, r3)
+    SAVE_VSR(19, r3)
+    SAVE_VSR(20, r3)
+    SAVE_VSR(21, r3)
+    SAVE_VSR(22, r3)
+    SAVE_VSR(23, r3)
+    SAVE_VSR(24, r3)
+    SAVE_VSR(25, r3)
+    SAVE_VSR(26, r3)
+    SAVE_VSR(27, r3)
+    SAVE_VSR(28, r3)
+    SAVE_VSR(29, r3)
+    SAVE_VSR(30, r3)
+    SAVE_VSR(31, r3)
+    SAVE_VSR(32, r3)
+    SAVE_VSR(33, r3)
+    SAVE_VSR(34, r3)
+    SAVE_VSR(35, r3)
+    SAVE_VSR(36, r3)
+    SAVE_VSR(37, r3)
+    SAVE_VSR(38, r3)
+    SAVE_VSR(39, r3)
+    SAVE_VSR(40, r3)
+    SAVE_VSR(41, r3)
+    SAVE_VSR(42, r3)
+    SAVE_VSR(43, r3)
+    SAVE_VSR(44, r3)
+    SAVE_VSR(45, r3)
+    SAVE_VSR(46, r3)
+    SAVE_VSR(47, r3)
+    SAVE_VSR(48, r3)
+    SAVE_VSR(49, r3)
+    SAVE_VSR(50, r3)
+    SAVE_VSR(51, r3)
+    SAVE_VSR(52, r3)
+    SAVE_VSR(53, r3)
+    SAVE_VSR(54, r3)
+    SAVE_VSR(55, r3)
+    SAVE_VSR(56, r3)
+    SAVE_VSR(57, r3)
+    SAVE_VSR(58, r3)
+    SAVE_VSR(59, r3)
+    SAVE_VSR(60, r3)
+    SAVE_VSR(61, r3)
+    SAVE_VSR(62, r3)
+    SAVE_VSR(63, r3)
+
+    TRASH_GPR(2)
+    TRASH_GPR(3)
+    TRASH_GPR(4)
+    TRASH_GPR(5)
+    TRASH_GPR(6)
+    TRASH_GPR(7)
+    TRASH_GPR(8)
+    TRASH_GPR(9)
+    TRASH_GPR(10)
+    TRASH_GPR(11)
+    TRASH_GPR(12)
+    TRASH_GPR(14)
+    TRASH_GPR(15)
+    TRASH_GPR(16)
+    TRASH_GPR(17)
+    TRASH_GPR(18)
+    TRASH_GPR(19)
+    TRASH_GPR(20)
+    TRASH_GPR(21)
+    TRASH_GPR(22)
+    TRASH_GPR(23)
+    TRASH_GPR(24)
+    TRASH_GPR(25)
+    TRASH_GPR(26)
+    TRASH_GPR(27)
+    TRASH_GPR(28)
+    TRASH_GPR(29)
+    TRASH_GPR(30)
+    TRASH_GPR(31)
+
+    RESTORE_TOC(ebb_handler)
+
+    /*
+     * r13 is our TLS pointer. We leave whatever value was in there when the
+     * EBB fired. That seems to be OK because once set the TLS pointer is not
+     * changed - but presumably that could change in future.
+     */
+
+    bl      ebb_hook
+    nop
+
+    /* r2 may be changed here but we don't care */
+
+    lfd      f0, FSCR_SAVE(r1)
+    mtfsf    0xff,f0
+    lfd      f0, VSCR_SAVE(r1)
+    mtvscr   f0
+    LOAD_VSR(0, r3)
+    LOAD_VSR(1,  r3)
+    LOAD_VSR(2,  r3)
+    LOAD_VSR(3,  r3)
+    LOAD_VSR(4,  r3)
+    LOAD_VSR(5,  r3)
+    LOAD_VSR(6,  r3)
+    LOAD_VSR(7,  r3)
+    LOAD_VSR(8,  r3)
+    LOAD_VSR(9,  r3)
+    LOAD_VSR(10, r3)
+    LOAD_VSR(11, r3)
+    LOAD_VSR(12, r3)
+    LOAD_VSR(13, r3)
+    LOAD_VSR(14, r3)
+    LOAD_VSR(15, r3)
+    LOAD_VSR(16, r3)
+    LOAD_VSR(17, r3)
+    LOAD_VSR(18, r3)
+    LOAD_VSR(19, r3)
+    LOAD_VSR(20, r3)
+    LOAD_VSR(21, r3)
+    LOAD_VSR(22, r3)
+    LOAD_VSR(23, r3)
+    LOAD_VSR(24, r3)
+    LOAD_VSR(25, r3)
+    LOAD_VSR(26, r3)
+    LOAD_VSR(27, r3)
+    LOAD_VSR(28, r3)
+    LOAD_VSR(29, r3)
+    LOAD_VSR(30, r3)
+    LOAD_VSR(31, r3)
+    LOAD_VSR(32, r3)
+    LOAD_VSR(33, r3)
+    LOAD_VSR(34, r3)
+    LOAD_VSR(35, r3)
+    LOAD_VSR(36, r3)
+    LOAD_VSR(37, r3)
+    LOAD_VSR(38, r3)
+    LOAD_VSR(39, r3)
+    LOAD_VSR(40, r3)
+    LOAD_VSR(41, r3)
+    LOAD_VSR(42, r3)
+    LOAD_VSR(43, r3)
+    LOAD_VSR(44, r3)
+    LOAD_VSR(45, r3)
+    LOAD_VSR(46, r3)
+    LOAD_VSR(47, r3)
+    LOAD_VSR(48, r3)
+    LOAD_VSR(49, r3)
+    LOAD_VSR(50, r3)
+    LOAD_VSR(51, r3)
+    LOAD_VSR(52, r3)
+    LOAD_VSR(53, r3)
+    LOAD_VSR(54, r3)
+    LOAD_VSR(55, r3)
+    LOAD_VSR(56, r3)
+    LOAD_VSR(57, r3)
+    LOAD_VSR(58, r3)
+    LOAD_VSR(59, r3)
+    LOAD_VSR(60, r3)
+    LOAD_VSR(61, r3)
+    LOAD_VSR(62, r3)
+    LOAD_VSR(63, r3)
+
+    ld      r0,XER_SAVE(r1)
+    mtxer   r0
+    ld      r0,CTR_SAVE(r1)
+    mtctr   r0
+    ld      r0,LR_SAVE(r1)
+    mtlr    r0
+    ld      r0,CCR_SAVE(r1)
+    mtcr    r0
+    REST_GPR(0)
+    REST_GPR(2)
+    REST_GPR(3)
+    REST_GPR(4)
+    REST_GPR(5)
+    REST_GPR(6)
+    REST_GPR(7)
+    REST_GPR(8)
+    REST_GPR(9)
+    REST_GPR(10)
+    REST_GPR(11)
+    REST_GPR(12)
+    REST_GPR(13)
+    REST_GPR(14)
+    REST_GPR(15)
+    REST_GPR(16)
+    REST_GPR(17)
+    REST_GPR(18)
+    REST_GPR(19)
+    REST_GPR(20)
+    REST_GPR(21)
+    REST_GPR(22)
+    REST_GPR(23)
+    REST_GPR(24)
+    REST_GPR(25)
+    REST_GPR(26)
+    REST_GPR(27)
+    REST_GPR(28)
+    REST_GPR(29)
+    REST_GPR(30)
+    REST_GPR(31)
+    addi    r1,r1,STACK_FRAME
+    RFEBB
+FUNC_END(ebb_handler)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
new file mode 100644
index 0000000..c45f948
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	int i;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Parent creates EBB event */
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Check the EBB is enabled by writing PMC1 */
+	write_pmc1();
+
+	/* EBB event is enabled here */
+	for (i = 0; i < 1000000; i++) ;
+
+	return 0;
+}
+
+int ebb_on_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child should just exit happily */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_child, "ebb_on_child");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
new file mode 100644
index 0000000..11acf1d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c

@@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	/* Setup our EBB handler, before the EBB event is created */
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to setup its EBB handler */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	/* Child show now take EBBs and then exit */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
new file mode 100644
index 0000000..be4dd5a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Now try to install our CPU event */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* .. and wait for it to complete */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run, but we don't expect 100% */
+	FAIL_IF(event.result.enabled >= event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
new file mode 100644
index 0000000..7e78153
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c

@@ -0,0 +1,131 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+	struct event event, leader;
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to succeed */
+	FAIL_IF(event_open(&event));
+	event_close(&event);
+
+
+	event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+	event_leader_ebb_init(&event);
+	/* Expected to fail, no PMC specified */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+	event.attr.exclusive = 0;
+	/* Expected to fail, not exclusive */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x3001e);
+	event_leader_ebb_init(&event);
+	event.attr.freq = 1;
+	/* Expected to fail, sets freq */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x4001e);
+	event_leader_ebb_init(&event);
+	event.attr.sample_period = 1;
+	/* Expected to fail, sets sample_period */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.enable_on_exec = 1;
+	/* Expected to fail, sets enable_on_exec */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.inherit = 1;
+	/* Expected to fail, sets inherit */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to succeed */
+	FAIL_IF(event_open_with_group(&event, leader.fd));
+	event_close(&leader);
+	event_close(&event);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+
+	/* Expected to fail, event doesn't request EBB, leader does */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	/* Clear the EBB flag */
+	leader.attr.config &= ~(1ull << 63);
+
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to fail, leader doesn't request EBB */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.exclusive = 0;
+	/* Expected to fail, leader isn't exclusive */
+	FAIL_IF(event_open(&leader) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.pinned = 0;
+	/* Expected to fail, leader isn't pinned */
+	FAIL_IF(event_open(&leader) == 0);
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to fail, not a task event */
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(event_attributes, "event_attributes");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
new file mode 100644
index 0000000..b866a05
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S

@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(thirty_two_instruction_loop)
+	cmpwi	r3,0
+	beqlr
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
new file mode 100644
index 0000000..9e7af6e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c

@@ -0,0 +1,79 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+	/* Even though we have EBE=0 we can still see the EBB regs */
+	FAIL_IF(mfspr(SPRN_BESCR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We can still read from the event, though it is on our parent */
+	FAIL_IF(event_read(&event));
+
+	return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+	pid_t pid;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Don't need to actually take any EBBs */
+
+	pid = fork();
+	if (pid == 0)
+		exit(child());
+
+	/* Child does the actual testing */
+	FAIL_IF(wait_for_child(pid));
+
+	/* After fork */
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(fork_cleanup, "fork_cleanup");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
new file mode 100644
index 0000000..f8190fa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c

@@ -0,0 +1,164 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+			 uint64_t overhead, bool report)
+{
+	int64_t difference, expected;
+	double percentage;
+
+	clear_ebb_stats();
+
+	counters_frozen = false;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+	thirty_two_instruction_loop(instructions >> 5);
+
+	counters_frozen = true;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+	count_pmc(4, sample_period);
+
+	event->result.value = ebb_state.stats.pmc_count[4-1];
+	expected = instructions + overhead;
+	difference = event->result.value - expected;
+	percentage = (double)difference / event->result.value * 100;
+
+	if (report) {
+		printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+		printf("Expected %lu\n", expected);
+		printf("Actual   %llu\n", event->result.value);
+		printf("Error    %ld, %f%%\n", difference, percentage);
+		printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+	}
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference of up to 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / event->result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+	uint64_t current, overhead;
+	int i;
+
+	do_count_loop(event, 0, 0, false);
+	overhead = event->result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(event, 0, 0, false);
+		current = event->result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %lu with %lu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(4, sample_period);
+out:
+	if (counters_frozen)
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		reset_ebb();
+}
+
+int instruction_count(void)
+{
+	struct event event;
+	uint64_t overhead;
+
+	event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+	event_leader_ebb_init(&event);
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = COUNTER_OVERFLOW;
+
+	setup_ebb_handler(pmc4_ebb_callee);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	ebb_global_enable();
+
+	overhead = determine_overhead(&event);
+	printf("Overhead of null loop: %lu instructions\n", overhead);
+
+	/* Run for 1M instructions */
+	FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+	/* Run for 10M instructions */
+	FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+	/* Run for 100M instructions */
+	FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+	/* Run for 1G instructions */
+	FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+	/* Run for 16G instructions */
+	FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+	/* Run for 64G instructions */
+	FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+	/* Run for 128G instructions */
+	FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+	ebb_global_disable();
+	event_close(&event);
+
+	printf("Finished OK\n");
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(instruction_count, "instruction_count");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
new file mode 100644
index 0000000..0c9dd9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c

@@ -0,0 +1,100 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect
+ * where an exception triggers but we context switch before it is delivered and
+ * lose the exception.
+ */
+
+static int test_body(void)
+{
+	int i, orig_period, max_period;
+	struct event event;
+
+	/* We use PMC4 to make sure the kernel switches all counters correctly */
+	event_init_named(&event, 0x40002, "instructions");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(4);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	/*
+	 * We want a low sample period, but we also want to get out of the EBB
+	 * handler without tripping up again.
+	 *
+	 * This value picked after much experimentation.
+	 */
+	orig_period = max_period = sample_period = 400;
+
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1000000) {
+		/*
+		 * We are trying to get the EBB exception to race exactly with
+		 * us entering the kernel to do the syscall. We then need the
+		 * kernel to decide our timeslice is up and context switch to
+		 * the other thread. When we come back our EBB will have been
+		 * lost and we'll spin in this while loop forever.
+		 */
+
+		for (i = 0; i < 100000; i++)
+			sched_yield();
+
+		/* Change the sample period slightly to try and hit the race */
+		if (sample_period >= (orig_period + 200))
+			sample_period = orig_period;
+		else
+			sample_period++;
+
+		if (sample_period > max_period)
+			max_period = sample_period;
+	}
+
+	ebb_freeze_pmcs();
+	ebb_global_disable();
+
+	count_pmc(4, sample_period);
+	mtspr(SPRN_PMC4, 0xdead);
+
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* We vary our sample period so we need extra fudge here */
+	FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period)));
+
+	return 0;
+}
+
+static int lost_exception(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(lost_exception, "lost_exception");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
new file mode 100644
index 0000000..67d78af
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c

@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test counting multiple events using EBBs.
+ */
+int multi_counter(void)
+{
+	struct event events[6];
+	int i, group_fd;
+
+	event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
+	event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
+	event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
+	event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL");
+	event_init_named(&events[4], 0x600f4, "PM_RUN_CYC");
+	event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL");
+
+	event_leader_ebb_init(&events[0]);
+	for (i = 1; i < 6; i++)
+		event_ebb_init(&events[i]);
+
+	group_fd = -1;
+	for (i = 0; i < 6; i++) {
+		events[i].attr.exclude_kernel = 1;
+		events[i].attr.exclude_hv = 1;
+		events[i].attr.exclude_idle = 1;
+
+		FAIL_IF(event_open_with_group(&events[i], group_fd));
+		if (group_fd == -1)
+			group_fd = events[0].fd;
+	}
+
+	ebb_enable_pmc_counting(1);
+	ebb_enable_pmc_counting(2);
+	ebb_enable_pmc_counting(3);
+	ebb_enable_pmc_counting(4);
+	ebb_enable_pmc_counting(5);
+	ebb_enable_pmc_counting(6);
+	setup_ebb_handler(standard_ebb_callee);
+
+	FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
+	FAIL_IF(event_read(&events[0]));
+
+	ebb_global_enable();
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC3, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC6, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 50) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+	count_pmc(2, sample_period);
+	count_pmc(3, sample_period);
+	count_pmc(4, sample_period);
+	count_pmc(5, sample_period);
+	count_pmc(6, sample_period);
+
+	dump_ebb_state();
+
+	for (i = 0; i < 6; i++)
+		event_close(&events[i]);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(multi_counter, "multi_counter");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
new file mode 100644
index 0000000..b8dc371
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c

@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test running multiple EBB using processes at once on a single CPU. They
+ * should all run happily without interfering with each other.
+ */
+
+static bool child_should_exit;
+
+static void sigint_handler(int signal)
+{
+	child_should_exit = true;
+}
+
+struct sigaction sigint_action = {
+	.sa_handler = sigint_handler,
+};
+
+static int cycles_child(void)
+{
+	struct event event;
+
+	if (sigaction(SIGINT, &sigint_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (!child_should_exit) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_summary_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+#define NR_CHILDREN	4
+
+int multi_ebb_procs(void)
+{
+	pid_t pids[NR_CHILDREN];
+	int cpu, rc, i;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	for (i = 0; i < NR_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0)
+			exit(cycles_child());
+	}
+
+	/* Have them all run for "a while" */
+	sleep(10);
+
+	rc = 0;
+	for (i = 0; i < NR_CHILDREN; i++) {
+		/* Tell them to stop */
+		kill(pids[i], SIGINT);
+		/* And wait */
+		rc |= wait_for_child(pids[i]);
+	}
+
+	return rc;
+}
+
+int main(void)
+{
+	return test_harness(multi_ebb_procs, "multi_ebb_procs");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
new file mode 100644
index 0000000..2f9bf8e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c

@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/* Test that things work sanely if we have no handler */
+
+static int no_handler_test(void)
+{
+	struct event event;
+	u64 val;
+	int i;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	val = mfspr(SPRN_EBBHR);
+	FAIL_IF(val != 0);
+
+	/* Make sure it overflows quickly */
+	sample_period = 1000;
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Spin to make sure the event has time to overflow */
+	for (i = 0; i < 1000; i++)
+		mb();
+
+	dump_ebb_state();
+
+	/* We expect to see the PMU frozen & PMAO set */
+	val = mfspr(SPRN_MMCR0);
+	FAIL_IF(val != 0x0000000080000080);
+
+	event_close(&event);
+
+	dump_ebb_state();
+
+	/* The real test is that we never took an EBB at 0x0 */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(no_handler_test,"no_handler_test");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
new file mode 100644
index 0000000..986500f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c

@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that the kernel properly handles PMAE across context switches.
+ *
+ * We test this by calling into the kernel inside our EBB handler, where PMAE
+ * is clear. A cpu eater companion thread is running on the same CPU as us to
+ * encourage the scheduler to switch us.
+ *
+ * The kernel must make sure that when it context switches us back in, it
+ * honours the fact that we had PMAE clear.
+ *
+ * Observed to hit the failing case on the first EBB with a broken kernel.
+ */
+
+static bool mmcr0_mismatch;
+static uint64_t before, after;
+
+static void syscall_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(1, sample_period);
+
+	before = mfspr(SPRN_MMCR0);
+
+	/* Try and get ourselves scheduled, to force a PMU context switch */
+	sched_yield();
+
+	after = mfspr(SPRN_MMCR0);
+	if (before != after)
+		mmcr0_mismatch = true;
+
+out:
+	reset_ebb();
+}
+
+static int test_body(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(syscall_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	if (mmcr0_mismatch)
+		printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(mmcr0_mismatch);
+
+	return 0;
+}
+
+int pmae_handling(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(pmae_handling, "pmae_handling");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
new file mode 100644
index 0000000..a503fa7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being
+ * used. Tests the MMCR0_FC56 logic in the kernel.
+ */
+
+static int pmc56_overflowed;
+
+static void ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(2, sample_period);
+
+	val = mfspr(SPRN_PMC5);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(5, COUNTER_OVERFLOW);
+
+	val = mfspr(SPRN_PMC6);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(6, COUNTER_OVERFLOW);
+
+out:
+	reset_ebb();
+}
+
+int pmc56_overflow(void)
+{
+	struct event event;
+
+	/* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, 0);
+	mtspr(SPRN_PMC6, 0);
+
+	while (ebb_state.stats.ebb_count < 10)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(2, sample_period);
+
+	dump_ebb_state();
+
+	printf("PMC5/6 overflow %d\n", pmc56_overflowed);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(pmc56_overflow, "pmc56_overflow");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/pmu/ebb/reg.h
new file mode 100644
index 0000000..5921b0d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg.h

@@ -0,0 +1,49 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x)        #x
+#define __stringify(x)          __stringify_1(x)
+
+#define mfspr(rn)       ({unsigned long rval; \
+                         asm volatile("mfspr %0," __stringify(rn) \
+                                 : "=r" (rval)); rval; })
+#define mtspr(rn, v)    asm volatile("mtspr " __stringify(rn) ",%0" : \
+                                    : "r" ((unsigned long)(v)) \
+                                    : "memory")
+
+#define mb()		asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2     769
+#define SPRN_MMCRA     770
+#define SPRN_MMCR0     779
+#define   MMCR0_PMAO   0x00000080
+#define   MMCR0_PMAE   0x04000000
+#define   MMCR0_FC     0x80000000
+#define SPRN_EBBHR     804
+#define SPRN_EBBRR     805
+#define SPRN_BESCR     806     /* Branch event status & control register */
+#define SPRN_BESCRS    800     /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU   801     /* Branch event status & control set upper */
+#define SPRN_BESCRR    802     /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU   803     /* Branch event status & control REset upper */
+
+#define BESCR_PMEO     0x1     /* PMU Event-based exception Occurred */
+#define BESCR_PME      (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1      771
+#define SPRN_PMC2      772
+#define SPRN_PMC3      773
+#define SPRN_PMC4      774
+#define SPRN_PMC5      775
+#define SPRN_PMC6      776
+
+#define SPRN_SIAR      780
+#define SPRN_SDAR      781
+#define SPRN_SIER      768
+
+#endif /* _SELFTESTS_POWERPC_REG_H */

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
new file mode 100644
index 0000000..0cae66f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c

@@ -0,0 +1,39 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+#include "reg.h"
+
+
+/*
+ * Test basic access to the EBB regs, they should be user accessible with no
+ * kernel interaction required.
+ */
+int reg_access(void)
+{
+	uint64_t val, expected;
+
+	expected = 0x8000000100000000ull;
+	mtspr(SPRN_BESCR, expected);
+	val = mfspr(SPRN_BESCR);
+
+	FAIL_IF(val != expected);
+
+	expected = 0x0000000001000000ull;
+	mtspr(SPRN_EBBHR, expected);
+	val = mfspr(SPRN_EBBHR);
+
+	FAIL_IF(val != expected);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(reg_access, "reg_access");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
new file mode 100644
index 0000000..d56607e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c

@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task
+ * event should prevent the EBB event from being enabled.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	FAIL_IF(event.result.value == 0);
+	/*
+	 * For reasons I don't understand enabled is usually just slightly
+	 * lower than running. Would be good to confirm why.
+	 */
+	FAIL_IF(event.result.enabled == 0);
+	FAIL_IF(event.result.running == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
new file mode 100644
index 0000000..eba3219
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c

@@ -0,0 +1,83 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a per-task event vs an EBB - in that order. The EBB should push the
+ * per-task event off the PMU.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* The EBB event should push the task event off so the child should succeed */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The task event may have run, or not so we can't assert anything about it */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_vs_ebb, "task_event_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
new file mode 100644
index 0000000..251e66a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c

@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "trace.h"
+
+
+struct trace_buffer *trace_buffer_allocate(u64 size)
+{
+	struct trace_buffer *tb;
+
+	if (size < sizeof(*tb)) {
+		fprintf(stderr, "Error: trace buffer too small\n");
+		return NULL;
+	}
+
+	tb = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (tb == MAP_FAILED) {
+		perror("mmap");
+		return NULL;
+	}
+
+	tb->size = size;
+	tb->tail = tb->data;
+	tb->overflow = false;
+
+	return tb;
+}
+
+static bool trace_check_bounds(struct trace_buffer *tb, void *p)
+{
+	return p < ((void *)tb + tb->size);
+}
+
+static bool trace_check_alloc(struct trace_buffer *tb, void *p)
+{
+	/*
+	 * If we ever overflowed don't allow any more input. This prevents us
+	 * from dropping a large item and then later logging a small one. The
+	 * buffer should just stop when overflow happened, not be patchy. If
+	 * you're overflowing, make your buffer bigger.
+	 */
+	if (tb->overflow)
+		return false;
+
+	if (!trace_check_bounds(tb, p)) {
+		tb->overflow = true;
+		return false;
+	}
+
+	return true;
+}
+
+static void *trace_alloc(struct trace_buffer *tb, int bytes)
+{
+	void *p, *newtail;
+
+	p = tb->tail;
+	newtail = tb->tail + bytes;
+	if (!trace_check_alloc(tb, newtail))
+		return NULL;
+
+	tb->tail = newtail;
+
+	return p;
+}
+
+static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc(tb, sizeof(*e) + payload_size);
+	if (e)
+		e->length = payload_size;
+
+	return e;
+}
+
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_REG;
+	p = (u64 *)e->data;
+	*p++ = reg;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_counter(struct trace_buffer *tb, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_COUNTER;
+	p = (u64 *)e->data;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_string(struct trace_buffer *tb, char *str)
+{
+	struct trace_entry *e;
+	char *p;
+	int len;
+
+	len = strlen(str);
+
+	/* We NULL terminate to make printing easier */
+	e = trace_alloc_entry(tb, len + 1);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_STRING;
+	p = (char *)e->data;
+	memcpy(p, str, len);
+	p += len;
+	*p = '\0';
+
+	return 0;
+}
+
+int trace_log_indent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_INDENT;
+
+	return 0;
+}
+
+int trace_log_outdent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_OUTDENT;
+
+	return 0;
+}
+
+static void trace_print_header(int seq, int prefix)
+{
+	printf("%*s[%d]: ", prefix, "", seq);
+}
+
+static char *trace_decode_reg(int reg)
+{
+	switch (reg) {
+		case 769: return "SPRN_MMCR2"; break;
+		case 770: return "SPRN_MMCRA"; break;
+		case 779: return "SPRN_MMCR0"; break;
+		case 804: return "SPRN_EBBHR"; break;
+		case 805: return "SPRN_EBBRR"; break;
+		case 806: return "SPRN_BESCR"; break;
+		case 800: return "SPRN_BESCRS"; break;
+		case 801: return "SPRN_BESCRSU"; break;
+		case 802: return "SPRN_BESCRR"; break;
+		case 803: return "SPRN_BESCRRU"; break;
+		case 771: return "SPRN_PMC1"; break;
+		case 772: return "SPRN_PMC2"; break;
+		case 773: return "SPRN_PMC3"; break;
+		case 774: return "SPRN_PMC4"; break;
+		case 775: return "SPRN_PMC5"; break;
+		case 776: return "SPRN_PMC6"; break;
+		case 780: return "SPRN_SIAR"; break;
+		case 781: return "SPRN_SDAR"; break;
+		case 768: return "SPRN_SIER"; break;
+	}
+
+	return NULL;
+}
+
+static void trace_print_reg(struct trace_entry *e)
+{
+	u64 *p, *reg, *value;
+	char *name;
+
+	p = (u64 *)e->data;
+	reg = p++;
+	value = p;
+
+	name = trace_decode_reg(*reg);
+	if (name)
+		printf("register %-10s = 0x%016llx\n", name, *value);
+	else
+		printf("register %lld = 0x%016llx\n", *reg, *value);
+}
+
+static void trace_print_counter(struct trace_entry *e)
+{
+	u64 *value;
+
+	value = (u64 *)e->data;
+	printf("counter = %lld\n", *value);
+}
+
+static void trace_print_string(struct trace_entry *e)
+{
+	char *str;
+
+	str = (char *)e->data;
+	puts(str);
+}
+
+#define BASE_PREFIX	2
+#define PREFIX_DELTA	8
+
+static void trace_print_entry(struct trace_entry *e, int seq, int *prefix)
+{
+	switch (e->type) {
+	case TRACE_TYPE_REG:
+		trace_print_header(seq, *prefix);
+		trace_print_reg(e);
+		break;
+	case TRACE_TYPE_COUNTER:
+		trace_print_header(seq, *prefix);
+		trace_print_counter(e);
+		break;
+	case TRACE_TYPE_STRING:
+		trace_print_header(seq, *prefix);
+		trace_print_string(e);
+		break;
+	case TRACE_TYPE_INDENT:
+		trace_print_header(seq, *prefix);
+		puts("{");
+		*prefix += PREFIX_DELTA;
+		break;
+	case TRACE_TYPE_OUTDENT:
+		*prefix -= PREFIX_DELTA;
+		if (*prefix < BASE_PREFIX)
+			*prefix = BASE_PREFIX;
+		trace_print_header(seq, *prefix);
+		puts("}");
+		break;
+	default:
+		trace_print_header(seq, *prefix);
+		printf("entry @ %p type %d\n", e, e->type);
+		break;
+	}
+}
+
+void trace_buffer_print(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+	int i, prefix;
+	void *p;
+
+	printf("Trace buffer dump:\n");
+	printf("  address  %p \n", tb);
+	printf("  tail     %p\n", tb->tail);
+	printf("  size     %llu\n", tb->size);
+	printf("  overflow %s\n", tb->overflow ? "TRUE" : "false");
+	printf("  Content:\n");
+
+	p = tb->data;
+
+	i = 0;
+	prefix = BASE_PREFIX;
+
+	while (trace_check_bounds(tb, p) && p < tb->tail) {
+		e = p;
+
+		trace_print_entry(e, i, &prefix);
+
+		i++;
+		p = (void *)e + sizeof(*e) + e->length;
+	}
+}
+
+void trace_print_location(struct trace_buffer *tb)
+{
+	printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb);
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
new file mode 100644
index 0000000..926458e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+
+#include "utils.h"
+
+#define TRACE_TYPE_REG		1
+#define TRACE_TYPE_COUNTER	2
+#define TRACE_TYPE_STRING	3
+#define TRACE_TYPE_INDENT	4
+#define TRACE_TYPE_OUTDENT	5
+
+struct trace_entry
+{
+	u8 type;
+	u8 length;
+	u8 data[0];
+};
+
+struct trace_buffer
+{
+	u64  size;
+	bool overflow;
+	void *tail;
+	u8   data[0];
+};
+
+struct trace_buffer *trace_buffer_allocate(u64 size);
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value);
+int trace_log_counter(struct trace_buffer *tb, u64 value);
+int trace_log_string(struct trace_buffer *tb, char *str);
+int trace_log_indent(struct trace_buffer *tb);
+int trace_log_outdent(struct trace_buffer *tb);
+void trace_buffer_print(struct trace_buffer *tb);
+void trace_print_location(struct trace_buffer *tb);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */

diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
index 2b2d11d..184b368 100644
--- a/tools/testing/selftests/powerpc/pmu/event.c
+++ b/tools/testing/selftests/powerpc/pmu/event.c

@@ -39,7 +39,13 @@
 	event_init_opts(e, config, PERF_TYPE_RAW, name);
 }
 
+void event_init(struct event *e, u64 config)
+{
+	event_init_opts(e, config, PERF_TYPE_RAW, "event");
+}
+
 #define PERF_CURRENT_PID	0
+#define PERF_NO_PID		-1
 #define PERF_NO_CPU		-1
 #define PERF_NO_GROUP		-1
 
@@ -59,6 +65,16 @@
 	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd);
 }
 
+int event_open_with_pid(struct event *e, pid_t pid)
+{
+	return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+int event_open_with_cpu(struct event *e, int cpu)
+{
+	return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP);
+}
+
 int event_open(struct event *e)
 {
 	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP);
@@ -69,6 +85,16 @@
 	close(e->fd);
 }
 
+int event_enable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+}
+
+int event_disable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_DISABLE);
+}
+
 int event_reset(struct event *e)
 {
 	return ioctl(e->fd, PERF_EVENT_IOC_RESET);

diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index e699319..a0ea6b1 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h

@@ -29,8 +29,12 @@
 void event_init_opts(struct event *e, u64 config, int type, char *name);
 int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
 int event_open_with_group(struct event *e, int group_fd);
+int event_open_with_pid(struct event *e, pid_t pid);
+int event_open_with_cpu(struct event *e, int cpu);
 int event_open(struct event *e);
 void event_close(struct event *e);
+int event_enable(struct event *e);
+int event_disable(struct event *e);
 int event_reset(struct event *e);
 int event_read(struct event *e);
 void event_report_justified(struct event *e, int name_width, int result_width);

diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
new file mode 100644
index 0000000..0f6a473
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.c

@@ -0,0 +1,252 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <errno.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+#include "lib.h"
+
+
+int pick_online_cpu(void)
+{
+	cpu_set_t mask;
+	int cpu;
+
+	CPU_ZERO(&mask);
+
+	if (sched_getaffinity(0, sizeof(mask), &mask)) {
+		perror("sched_getaffinity");
+		return -1;
+	}
+
+	/* We prefer a primary thread, but skip 0 */
+	for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
+		if (CPU_ISSET(cpu, &mask))
+			return cpu;
+
+	/* Search for anything, but in reverse */
+	for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET(cpu, &mask))
+			return cpu;
+
+	printf("No cpus in affinity mask?!\n");
+	return -1;
+}
+
+int bind_to_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	printf("Binding to cpu %d\n", cpu);
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+
+	return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+#define PARENT_TOKEN	0xAA
+#define CHILD_TOKEN	0x55
+
+int sync_with_child(union pipe read_pipe, union pipe write_pipe)
+{
+	char c = PARENT_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	if (c != CHILD_TOKEN) /* sometimes expected */
+		return 1;
+
+	return 0;
+}
+
+int wait_for_parent(union pipe read_pipe)
+{
+	char c;
+
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	FAIL_IF(c != PARENT_TOKEN);
+
+	return 0;
+}
+
+int notify_parent(union pipe write_pipe)
+{
+	char c = CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int notify_parent_of_error(union pipe write_pipe)
+{
+	char c = ~CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int wait_for_child(pid_t child_pid)
+{
+	int rc;
+
+	if (waitpid(child_pid, &rc, 0) == -1) {
+		perror("waitpid");
+		return 1;
+	}
+
+	if (WIFEXITED(rc))
+		rc = WEXITSTATUS(rc);
+	else
+		rc = 1; /* Signal or other */
+
+	return rc;
+}
+
+int kill_child_and_wait(pid_t child_pid)
+{
+	kill(child_pid, SIGTERM);
+
+	return wait_for_child(child_pid);
+}
+
+static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
+{
+	volatile int i = 0;
+
+	/*
+	 * We are just here to eat cpu and die. So make sure we can be killed,
+	 * and also don't do any custom SIGTERM handling.
+	 */
+	signal(SIGTERM, SIG_DFL);
+
+	notify_parent(write_pipe);
+	wait_for_parent(read_pipe);
+
+	/* Soak up cpu forever */
+	while (1) i++;
+
+	return 0;
+}
+
+pid_t eat_cpu(int (test_function)(void))
+{
+	union pipe read_pipe, write_pipe;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	if (pipe(read_pipe.fds) == -1)
+		return -1;
+
+	if (pipe(write_pipe.fds) == -1)
+		return -1;
+
+	pid = fork();
+	if (pid == 0)
+		exit(eat_cpu_child(write_pipe, read_pipe));
+
+	if (sync_with_child(read_pipe, write_pipe)) {
+		rc = -1;
+		goto out;
+	}
+
+	printf("main test running as pid %d\n", getpid());
+
+	rc = test_function();
+out:
+	kill(pid, SIGKILL);
+
+	return rc;
+}
+
+struct addr_range libc, vdso;
+
+int parse_proc_maps(void)
+{
+	char execute, name[128];
+	uint64_t start, end;
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	do {
+		/* This skips line with no executable which is what we want */
+		rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, &execute, name);
+		if (rc <= 0)
+			break;
+
+		if (execute != 'x')
+			continue;
+
+		if (strstr(name, "libc")) {
+			libc.first = start;
+			libc.last = end - 1;
+		} else if (strstr(name, "[vdso]")) {
+			vdso.first = start;
+			vdso.last = end - 1;
+		}
+	} while(1);
+
+	fclose(f);
+
+	return 0;
+}
+
+#define PARANOID_PATH	"/proc/sys/kernel/perf_event_paranoid"
+
+bool require_paranoia_below(int level)
+{
+	unsigned long current;
+	char *end, buf[16];
+	FILE *f;
+	int rc;
+
+	rc = -1;
+
+	f = fopen(PARANOID_PATH, "r");
+	if (!f) {
+		perror("fopen");
+		goto out;
+	}
+
+	if (!fgets(buf, sizeof(buf), f)) {
+		printf("Couldn't read " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	current = strtoul(buf, &end, 10);
+
+	if (end == buf) {
+		printf("Couldn't parse " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	if (current >= level)
+		goto out;
+
+	rc = 0;
+out_close:
+	fclose(f);
+out:
+	return rc;
+}

diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
new file mode 100644
index 0000000..ca5d72a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
+#define __SELFTESTS_POWERPC_PMU_LIB_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+union pipe {
+	struct {
+		int read_fd;
+		int write_fd;
+	};
+	int fds[2];
+};
+
+extern int pick_online_cpu(void);
+extern int bind_to_cpu(int cpu);
+extern int kill_child_and_wait(pid_t child_pid);
+extern int wait_for_child(pid_t child_pid);
+extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
+extern int wait_for_parent(union pipe read_pipe);
+extern int notify_parent(union pipe write_pipe);
+extern int notify_parent_of_error(union pipe write_pipe);
+extern pid_t eat_cpu(int (test_function)(void));
+extern bool require_paranoia_below(int level);
+
+struct addr_range {
+	uint64_t first, last;
+};
+
+extern struct addr_range libc, vdso;
+
+int parse_proc_maps(void);
+
+#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */

diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8820e3d..20c1f08 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S

@@ -3,44 +3,41 @@
  * Licensed under GPLv2.
  */
 
+#include <ppc-asm.h>
+
 	.text
 
-	.global thirty_two_instruction_loop
-	.type .thirty_two_instruction_loop,@function
-	.section ".opd","aw",@progbits
-thirty_two_instruction_loop:
-	.quad .thirty_two_instruction_loop, .TOC.@tocbase, 0
-	.previous
-.thirty_two_instruction_loop:
-	cmpwi	%r3,0
+FUNC_START(thirty_two_instruction_loop)
+	cmpdi	r3,0
 	beqlr
-	addi	%r4,%r3,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1	# 28 addi's
-	subi	%r3,%r3,1
-	b	.thirty_two_instruction_loop
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)

diff --git a/tools/testing/selftests/powerpc/subunit.h b/tools/testing/selftests/powerpc/subunit.h
index 98a2292..9c6c4e9 100644
--- a/tools/testing/selftests/powerpc/subunit.h
+++ b/tools/testing/selftests/powerpc/subunit.h

@@ -26,6 +26,11 @@
 	printf("error: %s\n", name);
 }
 
+static inline void test_skip(char *name)
+{
+	printf("skip: %s\n", name);
+}
+
 static inline void test_success(char *name)
 {
 	printf("success: %s\n", name);

diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 0000000..51267f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile

@@ -0,0 +1,15 @@
+PROGS := tm-resched-dscr
+
+all: $(PROGS)
+
+$(PROGS):
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS) *.o
+
+.PHONY: all run_tests clean

diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
new file mode 100644
index 0000000..ee98e38
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c

@@ -0,0 +1,90 @@
+/* Test context switching to see if the DSCR SPR is correctly preserved
+ * when within a transaction.
+ *
+ * Note: We assume that the DSCR has been left at the default value (0)
+ * for all CPUs.
+ *
+ * Method:
+ *
+ * Set a value into the DSCR.
+ *
+ * Start a transaction, and suspend it (*).
+ *
+ * Hard loop checking to see if the transaction has become doomed.
+ *
+ * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS.
+ *
+ * If the abort was because of a context switch, check the DSCR value.
+ * Otherwise, try again.
+ *
+ * (*) If the transaction is not suspended we can't see the problem because
+ * the transaction abort handler will restore the DSCR to it's checkpointed
+ * value before we regain control.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <asm/tm.h>
+
+#define TBEGIN          ".long 0x7C00051D ;"
+#define TEND            ".long 0x7C00055D ;"
+#define TCHECK          ".long 0x7C00059C ;"
+#define TSUSPEND        ".long 0x7C0005DD ;"
+#define TRESUME         ".long 0x7C2005DD ;"
+#define SPRN_TEXASR     0x82
+#define SPRN_DSCR       0x03
+
+int main(void) {
+	uint64_t rv, dscr1 = 1, dscr2, texasr;
+
+	printf("Check DSCR TM context switch: ");
+	fflush(stdout);
+	for (;;) {
+		rv = 1;
+		asm __volatile__ (
+			/* set a known value into the DSCR */
+			"ld      3, %[dscr1];"
+			"mtspr   %[sprn_dscr], 3;"
+
+			/* start and suspend a transaction */
+			TBEGIN
+			"beq     1f;"
+			TSUSPEND
+
+			/* hard loop until the transaction becomes doomed */
+			"2: ;"
+			TCHECK
+			"bc      4, 0, 2b;"
+
+			/* record DSCR and TEXASR */
+			"mfspr   3, %[sprn_dscr];"
+			"std     3, %[dscr2];"
+			"mfspr   3, %[sprn_texasr];"
+			"std     3, %[texasr];"
+
+			TRESUME
+			TEND
+			"li      %[rv], 0;"
+			"1: ;"
+			: [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
+			: [dscr1]"m"(dscr1)
+			, [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
+			: "memory", "r3"
+		);
+		assert(rv); /* make sure the transaction aborted */
+		if ((texasr >> 56) != TM_CAUSE_RESCHED) {
+			putchar('.');
+			fflush(stdout);
+			continue;
+		}
+		if (dscr2 != dscr1) {
+			printf(" FAIL\n");
+			exit(EXIT_FAILURE);
+		} else {
+			printf(" OK\n");
+			exit(EXIT_SUCCESS);
+		}
+	}
+}

diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h
index 0de0644..a93777a 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h

@@ -31,6 +31,18 @@
 	}							\
 } while (0)
 
+/* The test harness uses this, yes it's gross */
+#define MAGIC_SKIP_RETURN_VALUE	99
+
+#define SKIP_IF(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[SKIP] Test skipped on line %d\n", __LINE__);	\
+		return MAGIC_SKIP_RETURN_VALUE;			\
+	}							\
+} while (0)
+
 #define _str(s) #s
 #define str(s) _str(s)
 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c86be0f..4b6c01b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -1714,11 +1714,11 @@
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
 	struct task_struct *task = NULL;
-	bool ret = false;
+	int ret = 0;
 
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
commit	dd1845af24a47b70cf84c29126698884f740ff9c	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Sun Jun 15 16:02:20 2014 -1000
committer	Linus Torvalds <torvalds@linux-foundation.org>	Sun Jun 15 16:02:20 2014 -1000
tree	fa12809d854d18ba36a568c21d57ceff43617af1
parent	b55b39020289f225bf2455349ce1a67372a0baa9 [diff]
parent	b640a6037c9ecd1f0ad23a8e9b4ca5f5b4112508 [diff]